Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next

Pull networking updates from David Miller:

 1) Add Maglev hashing scheduler to IPVS, from Inju Song.

 2) Lots of new TC subsystem tests from Roman Mashak.

 3) Add TCP zero copy receive and fix delayed acks and autotuning with
    SO_RCVLOWAT, from Eric Dumazet.

 4) Add XDP_REDIRECT support to mlx5 driver, from Jesper Dangaard
    Brouer.

 5) Add ttl inherit support to vxlan, from Hangbin Liu.

 6) Properly separate ipv6 routes into their logically independant
    components. fib6_info for the routing table, and fib6_nh for sets of
    nexthops, which thus can be shared. From David Ahern.

 7) Add bpf_xdp_adjust_tail helper, which can be used to generate ICMP
    messages from XDP programs. From Nikita V. Shirokov.

 8) Lots of long overdue cleanups to the r8169 driver, from Heiner
    Kallweit.

 9) Add BTF ("BPF Type Format"), from Martin KaFai Lau.

10) Add traffic condition monitoring to iwlwifi, from Luca Coelho.

11) Plumb extack down into fib_rules, from Roopa Prabhu.

12) Add Flower classifier offload support to igb, from Vinicius Costa
    Gomes.

13) Add UDP GSO support, from Willem de Bruijn.

14) Add documentation for eBPF helpers, from Quentin Monnet.

15) Add TLS tx offload to mlx5, from Ilya Lesokhin.

16) Allow applications to be given the number of bytes available to read
    on a socket via a control message returned from recvmsg(), from
    Soheil Hassas Yeganeh.

17) Add x86_32 eBPF JIT compiler, from Wang YanQing.

18) Add AF_XDP sockets, with zerocopy support infrastructure as well.
    From Björn Töpel.

19) Remove indirect load support from all of the BPF JITs and handle
    these operations in the verifier by translating them into native BPF
    instead. From Daniel Borkmann.

20) Add GRO support to ipv6 gre tunnels, from Eran Ben Elisha.

21) Allow XDP programs to do lookups in the main kernel routing tables
    for forwarding. From David Ahern.

22) Allow drivers to store hardware state into an ELF section of kernel
    dump vmcore files, and use it in cxgb4. From Rahul Lakkireddy.

23) Various RACK and loss detection improvements in TCP, from Yuchung
    Cheng.

24) Add TCP SACK compression, from Eric Dumazet.

25) Add User Mode Helper support and basic bpfilter infrastructure, from
    Alexei Starovoitov.

26) Support ports and protocol values in RTM_GETROUTE, from Roopa
    Prabhu.

27) Support bulking in ->ndo_xdp_xmit() API, from Jesper Dangaard
    Brouer.

28) Add lots of forwarding selftests, from Petr Machata.

29) Add generic network device failover driver, from Sridhar Samudrala.

* ra.kernel.org:/pub/scm/linux/kernel/git/davem/net-next: (1959 commits)
  strparser: Add __strp_unpause and use it in ktls.
  rxrpc: Fix terminal retransmission connection ID to include the channel
  net: hns3: Optimize PF CMDQ interrupt switching process
  net: hns3: Fix for VF mailbox receiving unknown message
  net: hns3: Fix for VF mailbox cannot receiving PF response
  bnx2x: use the right constant
  Revert "net: sched: cls: Fix offloading when ingress dev is vxlan"
  net: dsa: b53: Fix for brcm tag issue in Cygnus SoC
  enic: fix UDP rss bits
  netdev-FAQ: clarify DaveM's position for stable backports
  rtnetlink: validate attributes in do_setlink()
  mlxsw: Add extack messages for port_{un, }split failures
  netdevsim: Add extack error message for devlink reload
  devlink: Add extack to reload and port_{un, }split operations
  net: metrics: add proper netlink validation
  ipmr: fix error path when ipmr_new_table fails
  ip6mr: only set ip6mr_table from setsockopt when ip6mr_new_table succeeds
  net: hns3: remove unused hclgevf_cfg_func_mta_filter
  netfilter: provide udp*_lib_lookup for nf_tproxy
  qed*: Utilize FW 8.37.2.0
  ...
diff --git a/.mailmap b/.mailmap
index 7fa9d41..29ddeb1 100644
--- a/.mailmap
+++ b/.mailmap
@@ -186,6 +186,9 @@
 Uwe Kleine-König <ukl@pengutronix.de>
 Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com>
 Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
+Vinod Koul <vkoul@kernel.org> <vinod.koul@intel.com>
+Vinod Koul <vkoul@kernel.org> <vinod.koul@linux.intel.com>
+Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org>
 Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com>
 Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com>
 Viresh Kumar <vireshk@kernel.org> <viresh.kumar2@arm.com>
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 708dc4c..2754fe8 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -64,8 +64,6 @@
 	- misc. LCD driver documentation (cfag12864b, ks0108).
 backlight/
 	- directory with info on controlling backlights in flat panel displays
-bcache.txt
-	- Block-layer cache on fast SSDs to improve slow (raid) I/O performance.
 block/
 	- info on the Block I/O (BIO) layer.
 blockdev/
@@ -78,18 +76,10 @@
 	- directory with info on TI GPMC (General Purpose Memory Controller)
 bus-virt-phys-mapping.txt
 	- how to access I/O mapped memory from within device drivers.
-cachetlb.txt
-	- describes the cache/TLB flushing interfaces Linux uses.
 cdrom/
 	- directory with information on the CD-ROM drivers that Linux has.
 cgroup-v1/
 	- cgroups v1 features, including cpusets and memory controller.
-cgroup-v2.txt
-	- cgroups v2 features, including cpusets and memory controller.
-circular-buffers.txt
-	- how to make use of the existing circular buffer infrastructure
-clk.txt
-	- info on the common clock framework
 cma/
 	- Continuous Memory Area (CMA) debugfs interface.
 conf.py
diff --git a/Documentation/ABI/stable/sysfs-bus-vmbus b/Documentation/ABI/stable/sysfs-bus-vmbus
index 0c9d9dc..3eaffbb 100644
--- a/Documentation/ABI/stable/sysfs-bus-vmbus
+++ b/Documentation/ABI/stable/sysfs-bus-vmbus
@@ -1,25 +1,25 @@
-What:		/sys/bus/vmbus/devices/vmbus_*/id
+What:		/sys/bus/vmbus/devices/<UUID>/id
 Date:		Jul 2009
 KernelVersion:	2.6.31
 Contact:	K. Y. Srinivasan <kys@microsoft.com>
 Description:	The VMBus child_relid of the device's primary channel
 Users:		tools/hv/lsvmbus
 
-What:		/sys/bus/vmbus/devices/vmbus_*/class_id
+What:		/sys/bus/vmbus/devices/<UUID>/class_id
 Date:		Jul 2009
 KernelVersion:	2.6.31
 Contact:	K. Y. Srinivasan <kys@microsoft.com>
 Description:	The VMBus interface type GUID of the device
 Users:		tools/hv/lsvmbus
 
-What:		/sys/bus/vmbus/devices/vmbus_*/device_id
+What:		/sys/bus/vmbus/devices/<UUID>/device_id
 Date:		Jul 2009
 KernelVersion:	2.6.31
 Contact:	K. Y. Srinivasan <kys@microsoft.com>
 Description:	The VMBus interface instance GUID of the device
 Users:		tools/hv/lsvmbus
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channel_vp_mapping
+What:		/sys/bus/vmbus/devices/<UUID>/channel_vp_mapping
 Date:		Jul 2015
 KernelVersion:	4.2.0
 Contact:	K. Y. Srinivasan <kys@microsoft.com>
@@ -28,112 +28,112 @@
 		Format: <channel's child_relid:the bound cpu's number>
 Users:		tools/hv/lsvmbus
 
-What:		/sys/bus/vmbus/devices/vmbus_*/device
+What:		/sys/bus/vmbus/devices/<UUID>/device
 Date:		Dec. 2015
 KernelVersion:	4.5
 Contact:	K. Y. Srinivasan <kys@microsoft.com>
 Description:	The 16 bit device ID of the device
 Users:		tools/hv/lsvmbus and user level RDMA libraries
 
-What:		/sys/bus/vmbus/devices/vmbus_*/vendor
+What:		/sys/bus/vmbus/devices/<UUID>/vendor
 Date:		Dec. 2015
 KernelVersion:	4.5
 Contact:	K. Y. Srinivasan <kys@microsoft.com>
 Description:	The 16 bit vendor ID of the device
 Users:		tools/hv/lsvmbus and user level RDMA libraries
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>
 Date:		September. 2017
 KernelVersion:	4.14
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Directory for per-channel information
 		NN is the VMBUS relid associtated with the channel.
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/cpu
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/cpu
 Date:		September. 2017
 KernelVersion:	4.14
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	VCPU (sub)channel is affinitized to
 Users:		tools/hv/lsvmbus and other debugging tools
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/cpu
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/cpu
 Date:		September. 2017
 KernelVersion:	4.14
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	VCPU (sub)channel is affinitized to
 Users:		tools/hv/lsvmbus and other debugging tools
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/in_mask
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/in_mask
 Date:		September. 2017
 KernelVersion:	4.14
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Host to guest channel interrupt mask
 Users:		Debugging tools
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/latency
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/latency
 Date:		September. 2017
 KernelVersion:	4.14
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Channel signaling latency
 Users:		Debugging tools
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/out_mask
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/out_mask
 Date:		September. 2017
 KernelVersion:	4.14
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Guest to host channel interrupt mask
 Users:		Debugging tools
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/pending
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/pending
 Date:		September. 2017
 KernelVersion:	4.14
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Channel interrupt pending state
 Users:		Debugging tools
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/read_avail
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/read_avail
 Date:		September. 2017
 KernelVersion:	4.14
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Bytes available to read
 Users:		Debugging tools
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/write_avail
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/write_avail
 Date:		September. 2017
 KernelVersion:	4.14
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Bytes available to write
 Users:		Debugging tools
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/events
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/events
 Date:		September. 2017
 KernelVersion:	4.14
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Number of times we have signaled the host
 Users:		Debugging tools
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/interrupts
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/interrupts
 Date:		September. 2017
 KernelVersion:	4.14
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Number of times we have taken an interrupt (incoming)
 Users:		Debugging tools
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/subchannel_id
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/subchannel_id
 Date:		January. 2018
 KernelVersion:	4.16
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Subchannel ID associated with VMBUS channel
 Users:		Debugging tools and userspace drivers
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/monitor_id
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/monitor_id
 Date:		January. 2018
 KernelVersion:	4.16
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Monitor bit associated with channel
 Users:		Debugging tools and userspace drivers
 
-What:		/sys/bus/vmbus/devices/vmbus_*/channels/NN/ring
+What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/ring
 Date:		January. 2018
 KernelVersion:	4.16
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index 5b2d0f0..3e90e1f 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -90,4 +90,4 @@
 Contact:	Lee Schermerhorn <lee.schermerhorn@hp.com>
 Description:
 		The node's huge page size control/query attributes.
-		See Documentation/vm/hugetlbpage.txt
\ No newline at end of file
+		See Documentation/admin-guide/mm/hugetlbpage.rst
\ No newline at end of file
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb
index c702c78..08d456e 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb
+++ b/Documentation/ABI/testing/sysfs-bus-usb
@@ -189,6 +189,28 @@
 		The file will read "hotplug", "wired" and "not used" if the
 		information is available, and "unknown" otherwise.
 
+What:		/sys/bus/usb/devices/.../(hub interface)/portX/quirks
+Date:		May 2018
+Contact:	Nicolas Boichat <drinkcat@chromium.org>
+Description:
+		In some cases, we care about time-to-active for devices
+		connected on a specific port (e.g. non-standard USB port like
+		pogo pins), where the device to be connected is known in
+		advance, and behaves well according to the specification.
+		This attribute is a bit-field that controls the behavior of
+		a specific port:
+		 - Bit 0 of this field selects the "old" enumeration scheme,
+		   as it is considerably faster (it only causes one USB reset
+		   instead of 2).
+		   The old enumeration scheme can also be selected globally
+		   using /sys/module/usbcore/parameters/old_scheme_first, but
+		   it is often not desirable as the new scheme was introduced to
+		   increase compatibility with more devices.
+		 - Bit 1 reduces TRSTRCY to the 10 ms that are required by the
+		   USB 2.0 specification, instead of the 50 ms that are normally
+		   used to help make enumeration work better on some high speed
+		   devices.
+
 What:		/sys/bus/usb/devices/.../(hub interface)/portX/over_current_count
 Date:		February 2018
 Contact:	Richard Leitner <richard.leitner@skidata.com>
@@ -236,3 +258,21 @@
 		Supported values are 0 - 15.
 		More information on how besl values map to microseconds can be found in
 		USB 2.0 ECN Errata for Link Power Management, section 4.10)
+
+What:		/sys/bus/usb/devices/.../rx_lanes
+Date:		March 2018
+Contact:	Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+		Number of rx lanes the device is using.
+		USB 3.2 adds Dual-lane support, 2 rx and 2 tx lanes over Type-C.
+		Inter-Chip SSIC devices support asymmetric lanes up to 4 lanes per
+		direction. Devices before USB 3.2 are single lane (rx_lanes = 1)
+
+What:		/sys/bus/usb/devices/.../tx_lanes
+Date:		March 2018
+Contact:	Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+		Number of tx lanes the device is using.
+		USB 3.2 adds Dual-lane support, 2 rx and 2 tx -lanes over Type-C.
+		Inter-Chip SSIC devices support asymmetric lanes up to 4 lanes per
+		direction. Devices before USB 3.2 are single lane (tx_lanes = 1)
diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power
index f85ce9e..5e23e22 100644
--- a/Documentation/ABI/testing/sysfs-class-power
+++ b/Documentation/ABI/testing/sysfs-class-power
@@ -1,3 +1,458 @@
+===== General Properties =====
+
+What:		/sys/class/power_supply/<supply_name>/manufacturer
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the name of the device manufacturer.
+
+		Access: Read
+		Valid values: Represented as string
+
+What:		/sys/class/power_supply/<supply_name>/model_name
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the name of the device model.
+
+		Access: Read
+		Valid values: Represented as string
+
+What:		/sys/class/power_supply/<supply_name>/serial_number
+Date:		January 2008
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the serial number of the device.
+
+		Access: Read
+		Valid values: Represented as string
+
+What:		/sys/class/power_supply/<supply_name>/type
+Date:		May 2010
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Describes the main type of the supply.
+
+		Access: Read
+		Valid values: "Battery", "UPS", "Mains", "USB"
+
+===== Battery Properties =====
+
+What:		/sys/class/power_supply/<supply_name>/capacity
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Fine grain representation of battery capacity.
+		Access: Read
+		Valid values: 0 - 100 (percent)
+
+What:		/sys/class/power_supply/<supply_name>/capacity_alert_max
+Date:		July 2012
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Maximum battery capacity trip-wire value where the supply will
+		notify user-space of the event. This is normally used for the
+		battery discharging scenario where user-space needs to know the
+		battery has dropped to an upper level so it can take
+		appropriate action (e.g. warning user that battery level is
+		low).
+
+		Access: Read, Write
+		Valid values: 0 - 100 (percent)
+
+What:		/sys/class/power_supply/<supply_name>/capacity_alert_min
+Date:		July 2012
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Minimum battery capacity trip-wire value where the supply will
+		notify user-space of the event. This is normally used for the
+		battery discharging scenario where user-space needs to know the
+		battery has dropped to a lower level so it can take
+		appropriate action (e.g. warning user that battery level is
+		critically low).
+
+		Access: Read, Write
+		Valid values: 0 - 100 (percent)
+
+What:		/sys/class/power_supply/<supply_name>/capacity_level
+Date:		June 2009
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Coarse representation of battery capacity.
+
+		Access: Read
+		Valid values: "Unknown", "Critical", "Low", "Normal", "High",
+			      "Full"
+
+What:		/sys/class/power_supply/<supply_name>/current_avg
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports an average IBAT current reading for the battery, over a
+		fixed period. Normally devices will provide a fixed interval in
+		which they average readings to smooth out the reported value.
+
+		Access: Read
+		Valid values: Represented in microamps
+
+What:		/sys/class/power_supply/<supply_name>/current_max
+Date:		October 2010
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the maximum IBAT current allowed into the battery.
+
+		Access: Read
+		Valid values: Represented in microamps
+
+What:		/sys/class/power_supply/<supply_name>/current_now
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports an instant, single IBAT current reading for the battery.
+		This value is not averaged/smoothed.
+
+		Access: Read
+		Valid values: Represented in microamps
+
+What:		/sys/class/power_supply/<supply_name>/charge_type
+Date:		July 2009
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Represents the type of charging currently being applied to the
+		battery.
+
+		Access: Read
+		Valid values: "Unknown", "N/A", "Trickle", "Fast"
+
+What:		/sys/class/power_supply/<supply_name>/charge_term_current
+Date:		July 2014
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the charging current value which is used to determine
+		when the battery is considered full and charging should end.
+
+		Access: Read
+		Valid values: Represented in microamps
+
+What:		/sys/class/power_supply/<supply_name>/health
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the health of the battery or battery side of charger
+		functionality.
+
+		Access: Read
+		Valid values: "Unknown", "Good", "Overheat", "Dead",
+			      "Over voltage", "Unspecified failure", "Cold",
+			      "Watchdog timer expire", "Safety timer expire"
+
+What:		/sys/class/power_supply/<supply_name>/precharge_current
+Date:		June 2017
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the charging current applied during pre-charging phase
+		for a battery charge cycle.
+
+		Access: Read
+		Valid values: Represented in microamps
+
+What:		/sys/class/power_supply/<supply_name>/present
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports whether a battery is present or not in the system.
+
+		Access: Read
+		Valid values:
+			0: Absent
+			1: Present
+
+What:		/sys/class/power_supply/<supply_name>/status
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Represents the charging status of the battery. Normally this
+		is read-only reporting although for some supplies this can be
+		used to enable/disable charging to the battery.
+
+		Access: Read, Write
+		Valid values: "Unknown", "Charging", "Discharging",
+			      "Not charging", "Full"
+
+What:		/sys/class/power_supply/<supply_name>/technology
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Describes the battery technology supported by the supply.
+
+		Access: Read
+		Valid values: "Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe",
+			      "NiCd", "LiMn"
+
+What:		/sys/class/power_supply/<supply_name>/temp
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the current TBAT battery temperature reading.
+
+		Access: Read
+		Valid values: Represented in 1/10 Degrees Celsius
+
+What:		/sys/class/power_supply/<supply_name>/temp_alert_max
+Date:		July 2012
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Maximum TBAT temperature trip-wire value where the supply will
+		notify user-space of the event. This is normally used for the
+		battery charging scenario where user-space needs to know the
+		battery temperature has crossed an upper threshold so it can
+		take appropriate action (e.g. warning user that battery level is
+		critically high, and charging has stopped).
+
+		Access: Read
+		Valid values: Represented in 1/10 Degrees Celsius
+
+What:		/sys/class/power_supply/<supply_name>/temp_alert_min
+Date:		July 2012
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Minimum TBAT temperature trip-wire value where the supply will
+		notify user-space of the event. This is normally used for the
+		battery charging scenario where user-space needs to know the
+		battery temperature has crossed a lower threshold so it can take
+		appropriate action (e.g. warning user that battery level is
+		high, and charging current has been reduced accordingly to
+		remedy the situation).
+
+		Access: Read
+		Valid values: Represented in 1/10 Degrees Celsius
+
+What:		/sys/class/power_supply/<supply_name>/temp_max
+Date:		July 2014
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the maximum allowed TBAT battery temperature for
+		charging.
+
+		Access: Read
+		Valid values: Represented in 1/10 Degrees Celsius
+
+What:		/sys/class/power_supply/<supply_name>/temp_min
+Date:		July 2014
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the minimum allowed TBAT battery temperature for
+		charging.
+
+		Access: Read
+		Valid values: Represented in 1/10 Degrees Celsius
+
+What:		/sys/class/power_supply/<supply_name>/voltage_avg,
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports an average VBAT voltage reading for the battery, over a
+		fixed period. Normally devices will provide a fixed interval in
+		which they average readings to smooth out the reported value.
+
+		Access: Read
+		Valid values: Represented in microvolts
+
+What:		/sys/class/power_supply/<supply_name>/voltage_max,
+Date:		January 2008
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the maximum safe VBAT voltage permitted for the battery,
+		during charging.
+
+		Access: Read
+		Valid values: Represented in microvolts
+
+What:		/sys/class/power_supply/<supply_name>/voltage_min,
+Date:		January 2008
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the minimum safe VBAT voltage permitted for the battery,
+		during discharging.
+
+		Access: Read
+		Valid values: Represented in microvolts
+
+What:		/sys/class/power_supply/<supply_name>/voltage_now,
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports an instant, single VBAT voltage reading for the battery.
+		This value is not averaged/smoothed.
+
+		Access: Read
+		Valid values: Represented in microvolts
+
+===== USB Properties =====
+
+What: 		/sys/class/power_supply/<supply_name>/current_avg
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports an average IBUS current reading over a fixed period.
+		Normally devices will provide a fixed interval in which they
+		average readings to smooth out the reported value.
+
+		Access: Read
+		Valid values: Represented in microamps
+
+
+What: 		/sys/class/power_supply/<supply_name>/current_max
+Date:		October 2010
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the maximum IBUS current the supply can support.
+
+		Access: Read
+		Valid values: Represented in microamps
+
+What: 		/sys/class/power_supply/<supply_name>/current_now
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the IBUS current supplied now. This value is generally
+		read-only reporting, unless the 'online' state of the supply
+		is set to be programmable, in which case this value can be set
+		within the reported min/max range.
+
+		Access: Read, Write
+		Valid values: Represented in microamps
+
+What:		/sys/class/power_supply/<supply_name>/input_current_limit
+Date:		July 2014
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Details the incoming IBUS current limit currently set in the
+		supply. Normally this is configured based on the type of
+		connection made (e.g. A configured SDP should output a maximum
+		of 500mA so the input current limit is set to the same value).
+
+		Access: Read, Write
+		Valid values: Represented in microamps
+
+What:		/sys/class/power_supply/<supply_name>/online,
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Indicates if VBUS is present for the supply. When the supply is
+		online, and the supply allows it, then it's possible to switch
+		between online states (e.g. Fixed -> Programmable for a PD_PPS
+		USB supply so voltage and current can be controlled).
+
+		Access: Read, Write
+		Valid values:
+			0: Offline
+			1: Online Fixed - Fixed Voltage Supply
+			2: Online Programmable - Programmable Voltage Supply
+
+What:		/sys/class/power_supply/<supply_name>/temp
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the current supply temperature reading. This would
+		normally be the internal temperature of the device itself (e.g
+		TJUNC temperature of an IC)
+
+		Access: Read
+		Valid values: Represented in 1/10 Degrees Celsius
+
+What:		/sys/class/power_supply/<supply_name>/temp_alert_max
+Date:		July 2012
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Maximum supply temperature trip-wire value where the supply will
+		notify user-space of the event. This is normally used for the
+		charging scenario where user-space needs to know the supply
+		temperature has crossed an upper threshold so it can take
+		appropriate action (e.g. warning user that the supply
+		temperature is critically high, and charging has stopped to
+		remedy the situation).
+
+		Access: Read
+		Valid values: Represented in 1/10 Degrees Celsius
+
+What:		/sys/class/power_supply/<supply_name>/temp_alert_min
+Date:		July 2012
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Minimum supply temperature trip-wire value where the supply will
+		notify user-space of the event. This is normally used for the
+		charging scenario where user-space needs to know the supply
+		temperature has crossed a lower threshold so it can take
+		appropriate action (e.g. warning user that the supply
+		temperature is high, and charging current has been reduced
+		accordingly to remedy the situation).
+
+		Access: Read
+		Valid values: Represented in 1/10 Degrees Celsius
+
+What:		/sys/class/power_supply/<supply_name>/temp_max
+Date:		July 2014
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the maximum allowed supply temperature for operation.
+
+		Access: Read
+		Valid values: Represented in 1/10 Degrees Celsius
+
+What:		/sys/class/power_supply/<supply_name>/temp_min
+Date:		July 2014
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the mainimum allowed supply temperature for operation.
+
+		Access: Read
+		Valid values: Represented in 1/10 Degrees Celsius
+
+What: 		/sys/class/power_supply/<supply_name>/usb_type
+Date:		March 2018
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports what type of USB connection is currently active for
+		the supply, for example it can show if USB-PD capable source
+		is attached.
+
+		Access: Read-Only
+		Valid values: "Unknown", "SDP", "DCP", "CDP", "ACA", "C", "PD",
+			      "PD_DRP", "PD_PPS", "BrickID"
+
+What: 		/sys/class/power_supply/<supply_name>/voltage_max
+Date:		January 2008
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the maximum VBUS voltage the supply can support.
+
+		Access: Read
+		Valid values: Represented in microvolts
+
+What: 		/sys/class/power_supply/<supply_name>/voltage_min
+Date:		January 2008
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the minimum VBUS voltage the supply can support.
+
+		Access: Read
+		Valid values: Represented in microvolts
+
+What: 		/sys/class/power_supply/<supply_name>/voltage_now
+Date:		May 2007
+Contact:	linux-pm@vger.kernel.org
+Description:
+		Reports the VBUS voltage supplied now. This value is generally
+		read-only reporting, unless the 'online' state of the supply
+		is set to be programmable, in which case this value can be set
+		within the reported min/max range.
+
+		Access: Read, Write
+		Valid values: Represented in microvolts
+
+===== Device Specific Properties =====
+
 What:		/sys/class/power/ds2760-battery.*/charge_now
 Date:		May 2010
 KernelVersion:	2.6.35
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-hugepages b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
index e21c005..fdaa216 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
@@ -12,4 +12,4 @@
 			free_hugepages
 			surplus_hugepages
 			resv_hugepages
-		See Documentation/vm/hugetlbpage.txt for details.
+		See Documentation/admin-guide/mm/hugetlbpage.rst for details.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-ksm b/Documentation/ABI/testing/sysfs-kernel-mm-ksm
index 73e653e..dfc1324 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-ksm
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-ksm
@@ -40,7 +40,7 @@
 		sleep_millisecs: how many milliseconds ksm should sleep between
 		scans.
 
-		See Documentation/vm/ksm.txt for more information.
+		See Documentation/vm/ksm.rst for more information.
 
 What:		/sys/kernel/mm/ksm/merge_across_nodes
 Date:		January 2013
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
index 2cc0a72..29601d9 100644
--- a/Documentation/ABI/testing/sysfs-kernel-slab
+++ b/Documentation/ABI/testing/sysfs-kernel-slab
@@ -37,7 +37,7 @@
 		The alloc_calls file is read-only and lists the kernel code
 		locations from which allocations for this cache were performed.
 		The alloc_calls file only contains information if debugging is
-		enabled for that cache (see Documentation/vm/slub.txt).
+		enabled for that cache (see Documentation/vm/slub.rst).
 
 What:		/sys/kernel/slab/cache/alloc_fastpath
 Date:		February 2008
@@ -219,7 +219,7 @@
 Description:
 		The free_calls file is read-only and lists the locations of
 		object frees if slab debugging is enabled (see
-		Documentation/vm/slub.txt).
+		Documentation/vm/slub.rst).
 
 What:		/sys/kernel/slab/cache/free_fastpath
 Date:		February 2008
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index a27fbfb..65eb856 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -1,3 +1,5 @@
+What is RCU?  --  "Read, Copy, Update"
+
 Please note that the "What is RCU?" LWN series is an excellent place
 to start learning about RCU:
 
diff --git a/Documentation/acpi/cppc_sysfs.txt b/Documentation/acpi/cppc_sysfs.txt
new file mode 100644
index 0000000..f20fb44
--- /dev/null
+++ b/Documentation/acpi/cppc_sysfs.txt
@@ -0,0 +1,69 @@
+
+	Collaborative Processor Performance Control (CPPC)
+
+CPPC defined in the ACPI spec describes a mechanism for the OS to manage the
+performance of a logical processor on a contigious and abstract performance
+scale. CPPC exposes a set of registers to describe abstract performance scale,
+to request performance levels and to measure per-cpu delivered performance.
+
+For more details on CPPC please refer to the ACPI specification at:
+
+http://uefi.org/specifications
+
+Some of the CPPC registers are exposed via sysfs under:
+
+/sys/devices/system/cpu/cpuX/acpi_cppc/
+
+for each cpu X
+
+--------------------------------------------------------------------------------
+
+$ ls -lR  /sys/devices/system/cpu/cpu0/acpi_cppc/
+/sys/devices/system/cpu/cpu0/acpi_cppc/:
+total 0
+-r--r--r-- 1 root root 65536 Mar  5 19:38 feedback_ctrs
+-r--r--r-- 1 root root 65536 Mar  5 19:38 highest_perf
+-r--r--r-- 1 root root 65536 Mar  5 19:38 lowest_freq
+-r--r--r-- 1 root root 65536 Mar  5 19:38 lowest_nonlinear_perf
+-r--r--r-- 1 root root 65536 Mar  5 19:38 lowest_perf
+-r--r--r-- 1 root root 65536 Mar  5 19:38 nominal_freq
+-r--r--r-- 1 root root 65536 Mar  5 19:38 nominal_perf
+-r--r--r-- 1 root root 65536 Mar  5 19:38 reference_perf
+-r--r--r-- 1 root root 65536 Mar  5 19:38 wraparound_time
+
+--------------------------------------------------------------------------------
+
+* highest_perf : Highest performance of this processor (abstract scale).
+* nominal_perf : Highest sustained performance of this processor (abstract scale).
+* lowest_nonlinear_perf : Lowest performance of this processor with nonlinear
+  power savings (abstract scale).
+* lowest_perf : Lowest performance of this processor (abstract scale).
+
+* lowest_freq : CPU frequency corresponding to lowest_perf (in MHz).
+* nominal_freq : CPU frequency corresponding to nominal_perf (in MHz).
+  The above frequencies should only be used to report processor performance in
+  freqency instead of abstract scale. These values should not be used for any
+  functional decisions.
+
+* feedback_ctrs : Includes both Reference and delivered performance counter.
+  Reference counter ticks up proportional to processor's reference performance.
+  Delivered counter ticks up proportional to processor's delivered performance.
+* wraparound_time: Minimum time for the feedback counters to wraparound (seconds).
+* reference_perf : Performance level at which reference performance counter
+  accumulates (abstract scale).
+
+--------------------------------------------------------------------------------
+
+		Computing Average Delivered Performance
+
+Below describes the steps to compute the average performance delivered by taking
+two different snapshots of feedback counters at time T1 and T2.
+
+T1: Read feedback_ctrs as fbc_t1
+    Wait or run some workload
+T2: Read feedback_ctrs as fbc_t2
+
+delivered_counter_delta = fbc_t2[del] - fbc_t1[del]
+reference_counter_delta = fbc_t2[ref] - fbc_t1[ref]
+
+delivered_perf = (refernce_perf x delivered_counter_delta) / reference_counter_delta
diff --git a/Documentation/bcache.txt b/Documentation/admin-guide/bcache.rst
similarity index 100%
rename from Documentation/bcache.txt
rename to Documentation/admin-guide/bcache.rst
diff --git a/Documentation/cgroup-v2.txt b/Documentation/admin-guide/cgroup-v2.rst
similarity index 100%
rename from Documentation/cgroup-v2.txt
rename to Documentation/admin-guide/cgroup-v2.rst
diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst
index 5bb9161..48d70af 100644
--- a/Documentation/admin-guide/index.rst
+++ b/Documentation/admin-guide/index.rst
@@ -48,6 +48,7 @@
    :maxdepth: 1
 
    initrd
+   cgroup-v2
    serial-console
    braille-console
    parport
@@ -60,9 +61,11 @@
    mono
    java
    ras
+   bcache
    pm/index
    thunderbolt
    LSM/index
+   mm/index
 
 .. only::  subproject and html
 
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f2040d4..1beb30d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -106,11 +106,11 @@
 			use by PCI
 			Format: <irq>,<irq>...
 
-	acpi_mask_gpe=  [HW,ACPI]
+	acpi_mask_gpe=	[HW,ACPI]
 			Due to the existence of _Lxx/_Exx, some GPEs triggered
 			by unsupported hardware/firmware features can result in
-                        GPE floodings that cannot be automatically disabled by
-                        the GPE dispatcher.
+			GPE floodings that cannot be automatically disabled by
+			the GPE dispatcher.
 			This facility can be used to prevent such uncontrolled
 			GPE floodings.
 			Format: <int>
@@ -472,10 +472,10 @@
 			for platform specific values (SB1, Loongson3 and
 			others).
 
-	ccw_timeout_log [S390]
+	ccw_timeout_log	[S390]
 			See Documentation/s390/CommonIO for details.
 
-	cgroup_disable= [KNL] Disable a particular controller
+	cgroup_disable=	[KNL] Disable a particular controller
 			Format: {name of the controller(s) to disable}
 			The effects of cgroup_disable=foo are:
 			- foo isn't auto-mounted if you mount all cgroups in
@@ -518,7 +518,7 @@
 			those clocks in any way. This parameter is useful for
 			debug and development, but should not be needed on a
 			platform with proper driver support.  For more
-			information, see Documentation/clk.txt.
+			information, see Documentation/driver-api/clk.rst.
 
 	clock=		[BUGS=X86-32, HW] gettimeofday clocksource override.
 			[Deprecated]
@@ -587,11 +587,6 @@
 			Sets the size of memory pool for coherent, atomic dma
 			allocations, by default set to 256K.
 
-	code_bytes	[X86] How many bytes of object code to print
-			in an oops report.
-			Range: 0 - 8192
-			Default: 64
-
 	com20020=	[HW,NET] ARCnet - COM20020 chipset
 			Format:
 			<io>[,<irq>[,<nodeID>[,<backplane>[,<ckp>[,<timeout>]]]]]
@@ -641,8 +636,8 @@
 		hvc<n>	Use the hypervisor console device <n>. This is for
 			both Xen and PowerPC hypervisors.
 
-                If the device connected to the port is not a TTY but a braille
-                device, prepend "brl," before the device type, for instance
+		If the device connected to the port is not a TTY but a braille
+		device, prepend "brl," before the device type, for instance
 			console=brl,ttyS0
 		For now, only VisioBraille is supported.
 
@@ -662,7 +657,7 @@
 
 	consoleblank=	[KNL] The console blank (screen saver) timeout in
 			seconds. A value of 0 disables the blank timer.
-                       Defaults to 0.
+			Defaults to 0.
 
 	coredump_filter=
 			[KNL] Change the default value for
@@ -730,7 +725,7 @@
 			or memory reserved is below 4G.
 
 	cryptomgr.notests
-                        [KNL] Disable crypto self-tests
+			[KNL] Disable crypto self-tests
 
 	cs89x0_dma=	[HW,NET]
 			Format: <dma>
@@ -746,7 +741,7 @@
 			Format: <port#>,<type>
 			See also Documentation/input/devices/joystick-parport.rst
 
-	ddebug_query=   [KNL,DYNAMIC_DEBUG] Enable debug messages at early boot
+	ddebug_query=	[KNL,DYNAMIC_DEBUG] Enable debug messages at early boot
 			time. See
 			Documentation/admin-guide/dynamic-debug-howto.rst for
 			details.  Deprecated, see dyndbg.
@@ -833,7 +828,7 @@
 			causing system reset or hang due to sending
 			INIT from AP to BSP.
 
-	disable_ddw     [PPC/PSERIES]
+	disable_ddw	[PPC/PSERIES]
 			Disable Dynamic DMA Window support. Use this if
 			to workaround buggy firmware.
 
@@ -1025,6 +1020,12 @@
 			address. The serial port must already be setup
 			and configured. Options are not yet supported.
 
+		qcom_geni,<addr>
+			Start an early, polled-mode console on a Qualcomm
+			Generic Interface (GENI) based serial port at the
+			specified address. The serial port must already be
+			setup and configured. Options are not yet supported.
+
 	earlyprintk=	[X86,SH,ARM,M68k,S390]
 			earlyprintk=vga
 			earlyprintk=efi
@@ -1188,7 +1189,7 @@
 			parameter will force ia64_sal_cache_flush to call
 			ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
 
-	forcepae [X86-32]
+	forcepae	[X86-32]
 			Forcefully enable Physical Address Extension (PAE).
 			Many Pentium M systems disable PAE but may have a
 			functionally usable PAE implementation.
@@ -1247,7 +1248,7 @@
 
 	gamma=		[HW,DRM]
 
-	gart_fix_e820=  [X86_64] disable the fix e820 for K8 GART
+	gart_fix_e820=	[X86_64] disable the fix e820 for K8 GART
 			Format: off | on
 			default: on
 
@@ -1341,23 +1342,32 @@
 			x86-64 are 2M (when the CPU supports "pse") and 1G
 			(when the CPU supports the "pdpe1gb" cpuinfo flag).
 
-	hvc_iucv=	[S390] Number of z/VM IUCV hypervisor console (HVC)
-			       terminal devices. Valid values: 0..8
-	hvc_iucv_allow=	[S390] Comma-separated list of z/VM user IDs.
-			       If specified, z/VM IUCV HVC accepts connections
-			       from listed z/VM user IDs only.
+	hung_task_panic=
+			[KNL] Should the hung task detector generate panics.
+			Format: <integer>
 
+			A nonzero value instructs the kernel to panic when a
+			hung task is detected. The default value is controlled
+			by the CONFIG_BOOTPARAM_HUNG_TASK_PANIC build-time
+			option. The value selected by this boot parameter can
+			be changed later by the kernel.hung_task_panic sysctl.
+
+	hvc_iucv=	[S390]	Number of z/VM IUCV hypervisor console (HVC)
+				terminal devices. Valid values: 0..8
+	hvc_iucv_allow=	[S390]	Comma-separated list of z/VM user IDs.
+				If specified, z/VM IUCV HVC accepts connections
+				from listed z/VM user IDs only.
 	keep_bootcon	[KNL]
 			Do not unregister boot console at start. This is only
 			useful for debugging when something happens in the window
 			between unregistering the boot console and initializing
 			the real console.
 
-	i2c_bus=	[HW] Override the default board specific I2C bus speed
-			     or register an additional I2C bus that is not
-			     registered from board initialization code.
-			     Format:
-			     <bus_id>,<clkrate>
+	i2c_bus=	[HW]	Override the default board specific I2C bus speed
+				or register an additional I2C bus that is not
+				registered from board initialization code.
+				Format:
+				<bus_id>,<clkrate>
 
 	i8042.debug	[HW] Toggle i8042 debug mode
 	i8042.unmask_kbd_data
@@ -1386,7 +1396,7 @@
 			Default: only on s2r transitions on x86; most other
 			architectures force reset to be always executed
 	i8042.unlock	[HW] Unlock (ignore) the keylock
-	i8042.kbdreset  [HW] Reset device connected to KBD port
+	i8042.kbdreset	[HW] Reset device connected to KBD port
 
 	i810=		[HW,DRM]
 
@@ -1548,13 +1558,13 @@
 			programs exec'd, files mmap'd for exec, and all files
 			opened for read by uid=0.
 
-	ima_template=   [IMA]
+	ima_template=	[IMA]
 			Select one of defined IMA measurements template formats.
 			Formats: { "ima" | "ima-ng" | "ima-sig" }
 			Default: "ima-ng"
 
 	ima_template_fmt=
-	                [IMA] Define a custom template format.
+			[IMA] Define a custom template format.
 			Format: { "field1|...|fieldN" }
 
 	ima.ahash_minsize= [IMA] Minimum file size for asynchronous hash usage
@@ -1597,7 +1607,7 @@
 	inport.irq=	[HW] Inport (ATI XL and Microsoft) busmouse driver
 			Format: <irq>
 
-	int_pln_enable  [x86] Enable power limit notification interrupt
+	int_pln_enable	[x86] Enable power limit notification interrupt
 
 	integrity_audit=[IMA]
 			Format: { "0" | "1" }
@@ -1650,39 +1660,39 @@
 			0	disables intel_idle and fall back on acpi_idle.
 			1 to 9	specify maximum depth of C-state.
 
-	intel_pstate=  [X86]
-		       disable
-		         Do not enable intel_pstate as the default
-		         scaling driver for the supported processors
-		       passive
-			 Use intel_pstate as a scaling driver, but configure it
-			 to work with generic cpufreq governors (instead of
-			 enabling its internal governor).  This mode cannot be
-			 used along with the hardware-managed P-states (HWP)
-			 feature.
-		       force
-			 Enable intel_pstate on systems that prohibit it by default
-			 in favor of acpi-cpufreq. Forcing the intel_pstate driver
-			 instead of acpi-cpufreq may disable platform features, such
-			 as thermal controls and power capping, that rely on ACPI
-			 P-States information being indicated to OSPM and therefore
-			 should be used with caution. This option does not work with
-			 processors that aren't supported by the intel_pstate driver
-			 or on platforms that use pcc-cpufreq instead of acpi-cpufreq.
-		       no_hwp
-		         Do not enable hardware P state control (HWP)
-			 if available.
-		hwp_only
-			Only load intel_pstate on systems which support
-			hardware P state control (HWP) if available.
-		support_acpi_ppc
-			Enforce ACPI _PPC performance limits. If the Fixed ACPI
-			Description Table, specifies preferred power management
-			profile as "Enterprise Server" or "Performance Server",
-			then this feature is turned on by default.
-		per_cpu_perf_limits
-			Allow per-logical-CPU P-State performance control limits using
-			cpufreq sysfs interface
+	intel_pstate=	[X86]
+			disable
+			  Do not enable intel_pstate as the default
+			  scaling driver for the supported processors
+			passive
+			  Use intel_pstate as a scaling driver, but configure it
+			  to work with generic cpufreq governors (instead of
+			  enabling its internal governor).  This mode cannot be
+			  used along with the hardware-managed P-states (HWP)
+			  feature.
+			force
+			  Enable intel_pstate on systems that prohibit it by default
+			  in favor of acpi-cpufreq. Forcing the intel_pstate driver
+			  instead of acpi-cpufreq may disable platform features, such
+			  as thermal controls and power capping, that rely on ACPI
+			  P-States information being indicated to OSPM and therefore
+			  should be used with caution. This option does not work with
+			  processors that aren't supported by the intel_pstate driver
+			  or on platforms that use pcc-cpufreq instead of acpi-cpufreq.
+			no_hwp
+			  Do not enable hardware P state control (HWP)
+			  if available.
+			hwp_only
+			  Only load intel_pstate on systems which support
+			  hardware P state control (HWP) if available.
+			support_acpi_ppc
+			  Enforce ACPI _PPC performance limits. If the Fixed ACPI
+			  Description Table, specifies preferred power management
+			  profile as "Enterprise Server" or "Performance Server",
+			  then this feature is turned on by default.
+			per_cpu_perf_limits
+			  Allow per-logical-CPU P-State performance control limits using
+			  cpufreq sysfs interface
 
 	intremap=	[X86-64, Intel-IOMMU]
 			on	enable Interrupt Remapping (default)
@@ -1705,7 +1715,6 @@
 		nopanic
 		merge
 		nomerge
-		forcesac
 		soft
 		pt		[x86, IA-64]
 		nobypass	[PPC/POWERNV]
@@ -2027,7 +2036,7 @@
 			* [no]ncqtrim: Turn off queued DSM TRIM.
 
 			* nohrst, nosrst, norst: suppress hard, soft
-                          and both resets.
+			  and both resets.
 
 			* rstonce: only attempt one reset during
 			  hot-unplug link recovery
@@ -2215,7 +2224,7 @@
 			[KNL,SH] Allow user to override the default size for
 			per-device physically contiguous DMA buffers.
 
-        memhp_default_state=online/offline
+	memhp_default_state=online/offline
 			[KNL] Set the initial state for the memory hotplug
 			onlining policy. If not specified, the default value is
 			set according to the
@@ -2600,6 +2609,9 @@
 			emulation library even if a 387 maths coprocessor
 			is present.
 
+	no5lvl		[X86-64] Disable 5-level paging mode. Forces
+			kernel to use 4-level paging instead.
+
 	no_console_suspend
 			[HW] Never suspend the console
 			Disable suspending of consoles during suspend and
@@ -2765,7 +2777,7 @@
 			[X86,PV_OPS] Disable paravirtualized VMware scheduler
 			clock and use the default one.
 
-	no-steal-acc    [X86,KVM] Disable paravirtualized steal time accounting.
+	no-steal-acc	[X86,KVM] Disable paravirtualized steal time accounting.
 			steal time is computed, but won't influence scheduler
 			behaviour
 
@@ -2826,7 +2838,7 @@
 	notsc		[BUGS=X86-32] Disable Time Stamp Counter
 
 	nowatchdog	[KNL] Disable both lockup detectors, i.e.
-                        soft-lockup and NMI watchdog (hard-lockup).
+			soft-lockup and NMI watchdog (hard-lockup).
 
 	nowb		[ARM]
 
@@ -2846,7 +2858,7 @@
 			If the dependencies are under your control, you can
 			turn on cpu0_hotplug.
 
-	nps_mtm_hs_ctr= [KNL,ARC]
+	nps_mtm_hs_ctr=	[KNL,ARC]
 			This parameter sets the maximum duration, in
 			cycles, each HW thread of the CTOP can run
 			without interruptions, before HW switches it.
@@ -2987,7 +2999,7 @@
 
 	pci=option[,option...]	[PCI] various PCI subsystem options:
 		earlydump	[X86] dump PCI config space before the kernel
-			        changes anything
+				changes anything
 		off		[X86] don't probe for the PCI bus
 		bios		[X86-32] force use of PCI BIOS, don't access
 				the hardware directly. Use this if your machine
@@ -3075,7 +3087,7 @@
 				is enabled by default.  If you need to use this,
 				please report a bug.
 		nocrs		[X86] Ignore PCI host bridge windows from ACPI.
-			        If you need to use this, please report a bug.
+				If you need to use this, please report a bug.
 		routeirq	Do IRQ routing for all PCI devices.
 				This is normally done in pci_enable_device(),
 				so this option is a temporary workaround
@@ -3918,7 +3930,7 @@
 			cache (risks via metadata attacks are mostly
 			unchanged). Debug options disable merging on their
 			own.
-			For more information see Documentation/vm/slub.txt.
+			For more information see Documentation/vm/slub.rst.
 
 	slab_max_order=	[MM, SLAB]
 			Determines the maximum allowed order for slabs.
@@ -3932,7 +3944,7 @@
 			slub_debug can create guard zones around objects and
 			may poison objects when not in use. Also tracks the
 			last alloc / free. For more information see
-			Documentation/vm/slub.txt.
+			Documentation/vm/slub.rst.
 
 	slub_memcg_sysfs=	[MM, SLUB]
 			Determines whether to enable sysfs directories for
@@ -3946,7 +3958,7 @@
 			Determines the maximum allowed order for slabs.
 			A high setting may cause OOMs due to memory
 			fragmentation. For more information see
-			Documentation/vm/slub.txt.
+			Documentation/vm/slub.rst.
 
 	slub_min_objects=	[MM, SLUB]
 			The minimum number of objects per slab. SLUB will
@@ -3955,12 +3967,12 @@
 			the number of objects indicated. The higher the number
 			of objects the smaller the overhead of tracking slabs
 			and the less frequently locks need to be acquired.
-			For more information see Documentation/vm/slub.txt.
+			For more information see Documentation/vm/slub.rst.
 
 	slub_min_order=	[MM, SLUB]
 			Determines the minimum page order for slabs. Must be
 			lower than slub_max_order.
-			For more information see Documentation/vm/slub.txt.
+			For more information see Documentation/vm/slub.rst.
 
 	slub_nomerge	[MM, SLUB]
 			Same with slab_nomerge. This is supported for legacy.
@@ -4358,7 +4370,8 @@
 			Format: [always|madvise|never]
 			Can be used to control the default behavior of the system
 			with respect to transparent hugepages.
-			See Documentation/vm/transhuge.txt for more details.
+			See Documentation/admin-guide/mm/transhuge.rst
+			for more details.
 
 	tsc=		Disable clocksource stability checks for TSC.
 			Format: <string>
@@ -4436,7 +4449,7 @@
 
 	usbcore.initial_descriptor_timeout=
 			[USB] Specifies timeout for the initial 64-byte
-                        USB_REQ_GET_DESCRIPTOR request in milliseconds
+			USB_REQ_GET_DESCRIPTOR request in milliseconds
 			(default 5000 = 5.0 seconds).
 
 	usbcore.nousb	[USB] Disable the USB subsystem
diff --git a/Documentation/admin-guide/mm/concepts.rst b/Documentation/admin-guide/mm/concepts.rst
new file mode 100644
index 0000000..291699c
--- /dev/null
+++ b/Documentation/admin-guide/mm/concepts.rst
@@ -0,0 +1,222 @@
+.. _mm_concepts:
+
+=================
+Concepts overview
+=================
+
+The memory management in Linux is complex system that evolved over the
+years and included more and more functionality to support variety of
+systems from MMU-less microcontrollers to supercomputers. The memory
+management for systems without MMU is called ``nommu`` and it
+definitely deserves a dedicated document, which hopefully will be
+eventually written. Yet, although some of the concepts are the same,
+here we assume that MMU is available and CPU can translate a virtual
+address to a physical address.
+
+.. contents:: :local:
+
+Virtual Memory Primer
+=====================
+
+The physical memory in a computer system is a limited resource and
+even for systems that support memory hotplug there is a hard limit on
+the amount of memory that can be installed. The physical memory is not
+necessary contiguous, it might be accessible as a set of distinct
+address ranges. Besides, different CPU architectures, and even
+different implementations of the same architecture have different view
+how these address ranges defined.
+
+All this makes dealing directly with physical memory quite complex and
+to avoid this complexity a concept of virtual memory was developed.
+
+The virtual memory abstracts the details of physical memory from the
+application software, allows to keep only needed information in the
+physical memory (demand paging) and provides a mechanism for the
+protection and controlled sharing of data between processes.
+
+With virtual memory, each and every memory access uses a virtual
+address. When the CPU decodes the an instruction that reads (or
+writes) from (or to) the system memory, it translates the `virtual`
+address encoded in that instruction to a `physical` address that the
+memory controller can understand.
+
+The physical system memory is divided into page frames, or pages. The
+size of each page is architecture specific. Some architectures allow
+selection of the page size from several supported values; this
+selection is performed at the kernel build time by setting an
+appropriate kernel configuration option.
+
+Each physical memory page can be mapped as one or more virtual
+pages. These mappings are described by page tables that allow
+translation from virtual address used by programs to real address in
+the physical memory. The page tables organized hierarchically.
+
+The tables at the lowest level of the hierarchy contain physical
+addresses of actual pages used by the software. The tables at higher
+levels contain physical addresses of the pages belonging to the lower
+levels. The pointer to the top level page table resides in a
+register. When the CPU performs the address translation, it uses this
+register to access the top level page table. The high bits of the
+virtual address are used to index an entry in the top level page
+table. That entry is then used to access the next level in the
+hierarchy with the next bits of the virtual address as the index to
+that level page table. The lowest bits in the virtual address define
+the offset inside the actual page.
+
+Huge Pages
+==========
+
+The address translation requires several memory accesses and memory
+accesses are slow relatively to CPU speed. To avoid spending precious
+processor cycles on the address translation, CPUs maintain a cache of
+such translations called Translation Lookaside Buffer (or
+TLB). Usually TLB is pretty scarce resource and applications with
+large memory working set will experience performance hit because of
+TLB misses.
+
+Many modern CPU architectures allow mapping of the memory pages
+directly by the higher levels in the page table. For instance, on x86,
+it is possible to map 2M and even 1G pages using entries in the second
+and the third level page tables. In Linux such pages are called
+`huge`. Usage of huge pages significantly reduces pressure on TLB,
+improves TLB hit-rate and thus improves overall system performance.
+
+There are two mechanisms in Linux that enable mapping of the physical
+memory with the huge pages. The first one is `HugeTLB filesystem`, or
+hugetlbfs. It is a pseudo filesystem that uses RAM as its backing
+store. For the files created in this filesystem the data resides in
+the memory and mapped using huge pages. The hugetlbfs is described at
+:ref:`Documentation/admin-guide/mm/hugetlbpage.rst <hugetlbpage>`.
+
+Another, more recent, mechanism that enables use of the huge pages is
+called `Transparent HugePages`, or THP. Unlike the hugetlbfs that
+requires users and/or system administrators to configure what parts of
+the system memory should and can be mapped by the huge pages, THP
+manages such mappings transparently to the user and hence the
+name. See
+:ref:`Documentation/admin-guide/mm/transhuge.rst <admin_guide_transhuge>`
+for more details about THP.
+
+Zones
+=====
+
+Often hardware poses restrictions on how different physical memory
+ranges can be accessed. In some cases, devices cannot perform DMA to
+all the addressable memory. In other cases, the size of the physical
+memory exceeds the maximal addressable size of virtual memory and
+special actions are required to access portions of the memory. Linux
+groups memory pages into `zones` according to their possible
+usage. For example, ZONE_DMA will contain memory that can be used by
+devices for DMA, ZONE_HIGHMEM will contain memory that is not
+permanently mapped into kernel's address space and ZONE_NORMAL will
+contain normally addressed pages.
+
+The actual layout of the memory zones is hardware dependent as not all
+architectures define all zones, and requirements for DMA are different
+for different platforms.
+
+Nodes
+=====
+
+Many multi-processor machines are NUMA - Non-Uniform Memory Access -
+systems. In such systems the memory is arranged into banks that have
+different access latency depending on the "distance" from the
+processor. Each bank is referred as `node` and for each node Linux
+constructs an independent memory management subsystem. A node has it's
+own set of zones, lists of free and used pages and various statistics
+counters. You can find more details about NUMA in
+:ref:`Documentation/vm/numa.rst <numa>` and in
+:ref:`Documentation/admin-guide/mm/numa_memory_policy.rst <numa_memory_policy>`.
+
+Page cache
+==========
+
+The physical memory is volatile and the common case for getting data
+into the memory is to read it from files. Whenever a file is read, the
+data is put into the `page cache` to avoid expensive disk access on
+the subsequent reads. Similarly, when one writes to a file, the data
+is placed in the page cache and eventually gets into the backing
+storage device. The written pages are marked as `dirty` and when Linux
+decides to reuse them for other purposes, it makes sure to synchronize
+the file contents on the device with the updated data.
+
+Anonymous Memory
+================
+
+The `anonymous memory` or `anonymous mappings` represent memory that
+is not backed by a filesystem. Such mappings are implicitly created
+for program's stack and heap or by explicit calls to mmap(2) system
+call. Usually, the anonymous mappings only define virtual memory areas
+that the program is allowed to access. The read accesses will result
+in creation of a page table entry that references a special physical
+page filled with zeroes. When the program performs a write, regular
+physical page will be allocated to hold the written data. The page
+will be marked dirty and if the kernel will decide to repurpose it,
+the dirty page will be swapped out.
+
+Reclaim
+=======
+
+Throughout the system lifetime, a physical page can be used for storing
+different types of data. It can be kernel internal data structures,
+DMA'able buffers for device drivers use, data read from a filesystem,
+memory allocated by user space processes etc.
+
+Depending on the page usage it is treated differently by the Linux
+memory management. The pages that can be freed at any time, either
+because they cache the data available elsewhere, for instance, on a
+hard disk, or because they can be swapped out, again, to the hard
+disk, are called `reclaimable`. The most notable categories of the
+reclaimable pages are page cache and anonymous memory.
+
+In most cases, the pages holding internal kernel data and used as DMA
+buffers cannot be repurposed, and they remain pinned until freed by
+their user. Such pages are called `unreclaimable`. However, in certain
+circumstances, even pages occupied with kernel data structures can be
+reclaimed. For instance, in-memory caches of filesystem metadata can
+be re-read from the storage device and therefore it is possible to
+discard them from the main memory when system is under memory
+pressure.
+
+The process of freeing the reclaimable physical memory pages and
+repurposing them is called (surprise!) `reclaim`. Linux can reclaim
+pages either asynchronously or synchronously, depending on the state
+of the system. When system is not loaded, most of the memory is free
+and allocation request will be satisfied immediately from the free
+pages supply. As the load increases, the amount of the free pages goes
+down and when it reaches a certain threshold (high watermark), an
+allocation request will awaken the ``kswapd`` daemon. It will
+asynchronously scan memory pages and either just free them if the data
+they contain is available elsewhere, or evict to the backing storage
+device (remember those dirty pages?). As memory usage increases even
+more and reaches another threshold - min watermark - an allocation
+will trigger the `direct reclaim`. In this case allocation is stalled
+until enough memory pages are reclaimed to satisfy the request.
+
+Compaction
+==========
+
+As the system runs, tasks allocate and free the memory and it becomes
+fragmented. Although with virtual memory it is possible to present
+scattered physical pages as virtually contiguous range, sometimes it is
+necessary to allocate large physically contiguous memory areas. Such
+need may arise, for instance, when a device driver requires large
+buffer for DMA, or when THP allocates a huge page. Memory `compaction`
+addresses the fragmentation issue. This mechanism moves occupied pages
+from the lower part of a memory zone to free pages in the upper part
+of the zone. When a compaction scan is finished free pages are grouped
+together at the beginning of the zone and allocations of large
+physically contiguous areas become possible.
+
+Like reclaim, the compaction may happen asynchronously in ``kcompactd``
+daemon or synchronously as a result of memory allocation request.
+
+OOM killer
+==========
+
+It may happen, that on a loaded machine memory will be exhausted. When
+the kernel detects that the system runs out of memory (OOM) it invokes
+`OOM killer`. Its mission is simple: all it has to do is to select a
+task to sacrifice for the sake of the overall system health. The
+selected task is killed in a hope that after it exits enough memory
+will be freed to continue normal operation.
diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst
new file mode 100644
index 0000000..1cc0bc7
--- /dev/null
+++ b/Documentation/admin-guide/mm/hugetlbpage.rst
@@ -0,0 +1,382 @@
+.. _hugetlbpage:
+
+=============
+HugeTLB Pages
+=============
+
+Overview
+========
+
+The intent of this file is to give a brief summary of hugetlbpage support in
+the Linux kernel.  This support is built on top of multiple page size support
+that is provided by most modern architectures.  For example, x86 CPUs normally
+support 4K and 2M (1G if architecturally supported) page sizes, ia64
+architecture supports multiple page sizes 4K, 8K, 64K, 256K, 1M, 4M, 16M,
+256M and ppc64 supports 4K and 16M.  A TLB is a cache of virtual-to-physical
+translations.  Typically this is a very scarce resource on processor.
+Operating systems try to make best use of limited number of TLB resources.
+This optimization is more critical now as bigger and bigger physical memories
+(several GBs) are more readily available.
+
+Users can use the huge page support in Linux kernel by either using the mmap
+system call or standard SYSV shared memory system calls (shmget, shmat).
+
+First the Linux kernel needs to be built with the CONFIG_HUGETLBFS
+(present under "File systems") and CONFIG_HUGETLB_PAGE (selected
+automatically when CONFIG_HUGETLBFS is selected) configuration
+options.
+
+The ``/proc/meminfo`` file provides information about the total number of
+persistent hugetlb pages in the kernel's huge page pool.  It also displays
+default huge page size and information about the number of free, reserved
+and surplus huge pages in the pool of huge pages of default size.
+The huge page size is needed for generating the proper alignment and
+size of the arguments to system calls that map huge page regions.
+
+The output of ``cat /proc/meminfo`` will include lines like::
+
+	HugePages_Total: uuu
+	HugePages_Free:  vvv
+	HugePages_Rsvd:  www
+	HugePages_Surp:  xxx
+	Hugepagesize:    yyy kB
+	Hugetlb:         zzz kB
+
+where:
+
+HugePages_Total
+	is the size of the pool of huge pages.
+HugePages_Free
+	is the number of huge pages in the pool that are not yet
+        allocated.
+HugePages_Rsvd
+	is short for "reserved," and is the number of huge pages for
+        which a commitment to allocate from the pool has been made,
+        but no allocation has yet been made.  Reserved huge pages
+        guarantee that an application will be able to allocate a
+        huge page from the pool of huge pages at fault time.
+HugePages_Surp
+	is short for "surplus," and is the number of huge pages in
+        the pool above the value in ``/proc/sys/vm/nr_hugepages``. The
+        maximum number of surplus huge pages is controlled by
+        ``/proc/sys/vm/nr_overcommit_hugepages``.
+Hugepagesize
+	is the default hugepage size (in Kb).
+Hugetlb
+        is the total amount of memory (in kB), consumed by huge
+        pages of all sizes.
+        If huge pages of different sizes are in use, this number
+        will exceed HugePages_Total \* Hugepagesize. To get more
+        detailed information, please, refer to
+        ``/sys/kernel/mm/hugepages`` (described below).
+
+
+``/proc/filesystems`` should also show a filesystem of type "hugetlbfs"
+configured in the kernel.
+
+``/proc/sys/vm/nr_hugepages`` indicates the current number of "persistent" huge
+pages in the kernel's huge page pool.  "Persistent" huge pages will be
+returned to the huge page pool when freed by a task.  A user with root
+privileges can dynamically allocate more or free some persistent huge pages
+by increasing or decreasing the value of ``nr_hugepages``.
+
+Pages that are used as huge pages are reserved inside the kernel and cannot
+be used for other purposes.  Huge pages cannot be swapped out under
+memory pressure.
+
+Once a number of huge pages have been pre-allocated to the kernel huge page
+pool, a user with appropriate privilege can use either the mmap system call
+or shared memory system calls to use the huge pages.  See the discussion of
+:ref:`Using Huge Pages <using_huge_pages>`, below.
+
+The administrator can allocate persistent huge pages on the kernel boot
+command line by specifying the "hugepages=N" parameter, where 'N' = the
+number of huge pages requested.  This is the most reliable method of
+allocating huge pages as memory has not yet become fragmented.
+
+Some platforms support multiple huge page sizes.  To allocate huge pages
+of a specific size, one must precede the huge pages boot command parameters
+with a huge page size selection parameter "hugepagesz=<size>".  <size> must
+be specified in bytes with optional scale suffix [kKmMgG].  The default huge
+page size may be selected with the "default_hugepagesz=<size>" boot parameter.
+
+When multiple huge page sizes are supported, ``/proc/sys/vm/nr_hugepages``
+indicates the current number of pre-allocated huge pages of the default size.
+Thus, one can use the following command to dynamically allocate/deallocate
+default sized persistent huge pages::
+
+	echo 20 > /proc/sys/vm/nr_hugepages
+
+This command will try to adjust the number of default sized huge pages in the
+huge page pool to 20, allocating or freeing huge pages, as required.
+
+On a NUMA platform, the kernel will attempt to distribute the huge page pool
+over all the set of allowed nodes specified by the NUMA memory policy of the
+task that modifies ``nr_hugepages``. The default for the allowed nodes--when the
+task has default memory policy--is all on-line nodes with memory.  Allowed
+nodes with insufficient available, contiguous memory for a huge page will be
+silently skipped when allocating persistent huge pages.  See the
+:ref:`discussion below <mem_policy_and_hp_alloc>`
+of the interaction of task memory policy, cpusets and per node attributes
+with the allocation and freeing of persistent huge pages.
+
+The success or failure of huge page allocation depends on the amount of
+physically contiguous memory that is present in system at the time of the
+allocation attempt.  If the kernel is unable to allocate huge pages from
+some nodes in a NUMA system, it will attempt to make up the difference by
+allocating extra pages on other nodes with sufficient available contiguous
+memory, if any.
+
+System administrators may want to put this command in one of the local rc
+init files.  This will enable the kernel to allocate huge pages early in
+the boot process when the possibility of getting physical contiguous pages
+is still very high.  Administrators can verify the number of huge pages
+actually allocated by checking the sysctl or meminfo.  To check the per node
+distribution of huge pages in a NUMA system, use::
+
+	cat /sys/devices/system/node/node*/meminfo | fgrep Huge
+
+``/proc/sys/vm/nr_overcommit_hugepages`` specifies how large the pool of
+huge pages can grow, if more huge pages than ``/proc/sys/vm/nr_hugepages`` are
+requested by applications.  Writing any non-zero value into this file
+indicates that the hugetlb subsystem is allowed to try to obtain that
+number of "surplus" huge pages from the kernel's normal page pool, when the
+persistent huge page pool is exhausted. As these surplus huge pages become
+unused, they are freed back to the kernel's normal page pool.
+
+When increasing the huge page pool size via ``nr_hugepages``, any existing
+surplus pages will first be promoted to persistent huge pages.  Then, additional
+huge pages will be allocated, if necessary and if possible, to fulfill
+the new persistent huge page pool size.
+
+The administrator may shrink the pool of persistent huge pages for
+the default huge page size by setting the ``nr_hugepages`` sysctl to a
+smaller value.  The kernel will attempt to balance the freeing of huge pages
+across all nodes in the memory policy of the task modifying ``nr_hugepages``.
+Any free huge pages on the selected nodes will be freed back to the kernel's
+normal page pool.
+
+Caveat: Shrinking the persistent huge page pool via ``nr_hugepages`` such that
+it becomes less than the number of huge pages in use will convert the balance
+of the in-use huge pages to surplus huge pages.  This will occur even if
+the number of surplus pages would exceed the overcommit value.  As long as
+this condition holds--that is, until ``nr_hugepages+nr_overcommit_hugepages`` is
+increased sufficiently, or the surplus huge pages go out of use and are freed--
+no more surplus huge pages will be allowed to be allocated.
+
+With support for multiple huge page pools at run-time available, much of
+the huge page userspace interface in ``/proc/sys/vm`` has been duplicated in
+sysfs.
+The ``/proc`` interfaces discussed above have been retained for backwards
+compatibility. The root huge page control directory in sysfs is::
+
+	/sys/kernel/mm/hugepages
+
+For each huge page size supported by the running kernel, a subdirectory
+will exist, of the form::
+
+	hugepages-${size}kB
+
+Inside each of these directories, the same set of files will exist::
+
+	nr_hugepages
+	nr_hugepages_mempolicy
+	nr_overcommit_hugepages
+	free_hugepages
+	resv_hugepages
+	surplus_hugepages
+
+which function as described above for the default huge page-sized case.
+
+.. _mem_policy_and_hp_alloc:
+
+Interaction of Task Memory Policy with Huge Page Allocation/Freeing
+===================================================================
+
+Whether huge pages are allocated and freed via the ``/proc`` interface or
+the ``/sysfs`` interface using the ``nr_hugepages_mempolicy`` attribute, the
+NUMA nodes from which huge pages are allocated or freed are controlled by the
+NUMA memory policy of the task that modifies the ``nr_hugepages_mempolicy``
+sysctl or attribute.  When the ``nr_hugepages`` attribute is used, mempolicy
+is ignored.
+
+The recommended method to allocate or free huge pages to/from the kernel
+huge page pool, using the ``nr_hugepages`` example above, is::
+
+    numactl --interleave <node-list> echo 20 \
+				>/proc/sys/vm/nr_hugepages_mempolicy
+
+or, more succinctly::
+
+    numactl -m <node-list> echo 20 >/proc/sys/vm/nr_hugepages_mempolicy
+
+This will allocate or free ``abs(20 - nr_hugepages)`` to or from the nodes
+specified in <node-list>, depending on whether number of persistent huge pages
+is initially less than or greater than 20, respectively.  No huge pages will be
+allocated nor freed on any node not included in the specified <node-list>.
+
+When adjusting the persistent hugepage count via ``nr_hugepages_mempolicy``, any
+memory policy mode--bind, preferred, local or interleave--may be used.  The
+resulting effect on persistent huge page allocation is as follows:
+
+#. Regardless of mempolicy mode [see
+   :ref:`Documentation/admin-guide/mm/numa_memory_policy.rst <numa_memory_policy>`],
+   persistent huge pages will be distributed across the node or nodes
+   specified in the mempolicy as if "interleave" had been specified.
+   However, if a node in the policy does not contain sufficient contiguous
+   memory for a huge page, the allocation will not "fallback" to the nearest
+   neighbor node with sufficient contiguous memory.  To do this would cause
+   undesirable imbalance in the distribution of the huge page pool, or
+   possibly, allocation of persistent huge pages on nodes not allowed by
+   the task's memory policy.
+
+#. One or more nodes may be specified with the bind or interleave policy.
+   If more than one node is specified with the preferred policy, only the
+   lowest numeric id will be used.  Local policy will select the node where
+   the task is running at the time the nodes_allowed mask is constructed.
+   For local policy to be deterministic, the task must be bound to a cpu or
+   cpus in a single node.  Otherwise, the task could be migrated to some
+   other node at any time after launch and the resulting node will be
+   indeterminate.  Thus, local policy is not very useful for this purpose.
+   Any of the other mempolicy modes may be used to specify a single node.
+
+#. The nodes allowed mask will be derived from any non-default task mempolicy,
+   whether this policy was set explicitly by the task itself or one of its
+   ancestors, such as numactl.  This means that if the task is invoked from a
+   shell with non-default policy, that policy will be used.  One can specify a
+   node list of "all" with numactl --interleave or --membind [-m] to achieve
+   interleaving over all nodes in the system or cpuset.
+
+#. Any task mempolicy specified--e.g., using numactl--will be constrained by
+   the resource limits of any cpuset in which the task runs.  Thus, there will
+   be no way for a task with non-default policy running in a cpuset with a
+   subset of the system nodes to allocate huge pages outside the cpuset
+   without first moving to a cpuset that contains all of the desired nodes.
+
+#. Boot-time huge page allocation attempts to distribute the requested number
+   of huge pages over all on-lines nodes with memory.
+
+Per Node Hugepages Attributes
+=============================
+
+A subset of the contents of the root huge page control directory in sysfs,
+described above, will be replicated under each the system device of each
+NUMA node with memory in::
+
+	/sys/devices/system/node/node[0-9]*/hugepages/
+
+Under this directory, the subdirectory for each supported huge page size
+contains the following attribute files::
+
+	nr_hugepages
+	free_hugepages
+	surplus_hugepages
+
+The free\_' and surplus\_' attribute files are read-only.  They return the number
+of free and surplus [overcommitted] huge pages, respectively, on the parent
+node.
+
+The ``nr_hugepages`` attribute returns the total number of huge pages on the
+specified node.  When this attribute is written, the number of persistent huge
+pages on the parent node will be adjusted to the specified value, if sufficient
+resources exist, regardless of the task's mempolicy or cpuset constraints.
+
+Note that the number of overcommit and reserve pages remain global quantities,
+as we don't know until fault time, when the faulting task's mempolicy is
+applied, from which node the huge page allocation will be attempted.
+
+.. _using_huge_pages:
+
+Using Huge Pages
+================
+
+If the user applications are going to request huge pages using mmap system
+call, then it is required that system administrator mount a file system of
+type hugetlbfs::
+
+  mount -t hugetlbfs \
+	-o uid=<value>,gid=<value>,mode=<value>,pagesize=<value>,size=<value>,\
+	min_size=<value>,nr_inodes=<value> none /mnt/huge
+
+This command mounts a (pseudo) filesystem of type hugetlbfs on the directory
+``/mnt/huge``.  Any file created on ``/mnt/huge`` uses huge pages.
+
+The ``uid`` and ``gid`` options sets the owner and group of the root of the
+file system.  By default the ``uid`` and ``gid`` of the current process
+are taken.
+
+The ``mode`` option sets the mode of root of file system to value & 01777.
+This value is given in octal. By default the value 0755 is picked.
+
+If the platform supports multiple huge page sizes, the ``pagesize`` option can
+be used to specify the huge page size and associated pool. ``pagesize``
+is specified in bytes. If ``pagesize`` is not specified the platform's
+default huge page size and associated pool will be used.
+
+The ``size`` option sets the maximum value of memory (huge pages) allowed
+for that filesystem (``/mnt/huge``). The ``size`` option can be specified
+in bytes, or as a percentage of the specified huge page pool (``nr_hugepages``).
+The size is rounded down to HPAGE_SIZE boundary.
+
+The ``min_size`` option sets the minimum value of memory (huge pages) allowed
+for the filesystem. ``min_size`` can be specified in the same way as ``size``,
+either bytes or a percentage of the huge page pool.
+At mount time, the number of huge pages specified by ``min_size`` are reserved
+for use by the filesystem.
+If there are not enough free huge pages available, the mount will fail.
+As huge pages are allocated to the filesystem and freed, the reserve count
+is adjusted so that the sum of allocated and reserved huge pages is always
+at least ``min_size``.
+
+The option ``nr_inodes`` sets the maximum number of inodes that ``/mnt/huge``
+can use.
+
+If the ``size``, ``min_size`` or ``nr_inodes`` option is not provided on
+command line then no limits are set.
+
+For ``pagesize``, ``size``, ``min_size`` and ``nr_inodes`` options, you can
+use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo.
+For example, size=2K has the same meaning as size=2048.
+
+While read system calls are supported on files that reside on hugetlb
+file systems, write system calls are not.
+
+Regular chown, chgrp, and chmod commands (with right permissions) could be
+used to change the file attributes on hugetlbfs.
+
+Also, it is important to note that no such mount command is required if
+applications are going to use only shmat/shmget system calls or mmap with
+MAP_HUGETLB.  For an example of how to use mmap with MAP_HUGETLB see
+:ref:`map_hugetlb <map_hugetlb>` below.
+
+Users who wish to use hugetlb memory via shared memory segment should be
+members of a supplementary group and system admin needs to configure that gid
+into ``/proc/sys/vm/hugetlb_shm_group``.  It is possible for same or different
+applications to use any combination of mmaps and shm* calls, though the mount of
+filesystem will be required for using mmap calls without MAP_HUGETLB.
+
+Syscalls that operate on memory backed by hugetlb pages only have their lengths
+aligned to the native page size of the processor; they will normally fail with
+errno set to EINVAL or exclude hugetlb pages that extend beyond the length if
+not hugepage aligned.  For example, munmap(2) will fail if memory is backed by
+a hugetlb page and the length is smaller than the hugepage size.
+
+
+Examples
+========
+
+.. _map_hugetlb:
+
+``map_hugetlb``
+	see tools/testing/selftests/vm/map_hugetlb.c
+
+``hugepage-shm``
+	see tools/testing/selftests/vm/hugepage-shm.c
+
+``hugepage-mmap``
+	see tools/testing/selftests/vm/hugepage-mmap.c
+
+The `libhugetlbfs`_  library provides a wide range of userspace tools
+to help with huge page usability, environment setup, and control.
+
+.. _libhugetlbfs: https://github.com/libhugetlbfs/libhugetlbfs
diff --git a/Documentation/admin-guide/mm/idle_page_tracking.rst b/Documentation/admin-guide/mm/idle_page_tracking.rst
new file mode 100644
index 0000000..6f7b7ca
--- /dev/null
+++ b/Documentation/admin-guide/mm/idle_page_tracking.rst
@@ -0,0 +1,116 @@
+.. _idle_page_tracking:
+
+==================
+Idle Page Tracking
+==================
+
+Motivation
+==========
+
+The idle page tracking feature allows to track which memory pages are being
+accessed by a workload and which are idle. This information can be useful for
+estimating the workload's working set size, which, in turn, can be taken into
+account when configuring the workload parameters, setting memory cgroup limits,
+or deciding where to place the workload within a compute cluster.
+
+It is enabled by CONFIG_IDLE_PAGE_TRACKING=y.
+
+.. _user_api:
+
+User API
+========
+
+The idle page tracking API is located at ``/sys/kernel/mm/page_idle``.
+Currently, it consists of the only read-write file,
+``/sys/kernel/mm/page_idle/bitmap``.
+
+The file implements a bitmap where each bit corresponds to a memory page. The
+bitmap is represented by an array of 8-byte integers, and the page at PFN #i is
+mapped to bit #i%64 of array element #i/64, byte order is native. When a bit is
+set, the corresponding page is idle.
+
+A page is considered idle if it has not been accessed since it was marked idle
+(for more details on what "accessed" actually means see the :ref:`Implementation
+Details <impl_details>` section).
+To mark a page idle one has to set the bit corresponding to
+the page by writing to the file. A value written to the file is OR-ed with the
+current bitmap value.
+
+Only accesses to user memory pages are tracked. These are pages mapped to a
+process address space, page cache and buffer pages, swap cache pages. For other
+page types (e.g. SLAB pages) an attempt to mark a page idle is silently ignored,
+and hence such pages are never reported idle.
+
+For huge pages the idle flag is set only on the head page, so one has to read
+``/proc/kpageflags`` in order to correctly count idle huge pages.
+
+Reading from or writing to ``/sys/kernel/mm/page_idle/bitmap`` will return
+-EINVAL if you are not starting the read/write on an 8-byte boundary, or
+if the size of the read/write is not a multiple of 8 bytes. Writing to
+this file beyond max PFN will return -ENXIO.
+
+That said, in order to estimate the amount of pages that are not used by a
+workload one should:
+
+ 1. Mark all the workload's pages as idle by setting corresponding bits in
+    ``/sys/kernel/mm/page_idle/bitmap``. The pages can be found by reading
+    ``/proc/pid/pagemap`` if the workload is represented by a process, or by
+    filtering out alien pages using ``/proc/kpagecgroup`` in case the workload
+    is placed in a memory cgroup.
+
+ 2. Wait until the workload accesses its working set.
+
+ 3. Read ``/sys/kernel/mm/page_idle/bitmap`` and count the number of bits set.
+    If one wants to ignore certain types of pages, e.g. mlocked pages since they
+    are not reclaimable, he or she can filter them out using
+    ``/proc/kpageflags``.
+
+See :ref:`Documentation/admin-guide/mm/pagemap.rst <pagemap>` for more
+information about ``/proc/pid/pagemap``, ``/proc/kpageflags``, and
+``/proc/kpagecgroup``.
+
+.. _impl_details:
+
+Implementation Details
+======================
+
+The kernel internally keeps track of accesses to user memory pages in order to
+reclaim unreferenced pages first on memory shortage conditions. A page is
+considered referenced if it has been recently accessed via a process address
+space, in which case one or more PTEs it is mapped to will have the Accessed bit
+set, or marked accessed explicitly by the kernel (see mark_page_accessed()). The
+latter happens when:
+
+ - a userspace process reads or writes a page using a system call (e.g. read(2)
+   or write(2))
+
+ - a page that is used for storing filesystem buffers is read or written,
+   because a process needs filesystem metadata stored in it (e.g. lists a
+   directory tree)
+
+ - a page is accessed by a device driver using get_user_pages()
+
+When a dirty page is written to swap or disk as a result of memory reclaim or
+exceeding the dirty memory limit, it is not marked referenced.
+
+The idle memory tracking feature adds a new page flag, the Idle flag. This flag
+is set manually, by writing to ``/sys/kernel/mm/page_idle/bitmap`` (see the
+:ref:`User API <user_api>`
+section), and cleared automatically whenever a page is referenced as defined
+above.
+
+When a page is marked idle, the Accessed bit must be cleared in all PTEs it is
+mapped to, otherwise we will not be able to detect accesses to the page coming
+from a process address space. To avoid interference with the reclaimer, which,
+as noted above, uses the Accessed bit to promote actively referenced pages, one
+more page flag is introduced, the Young flag. When the PTE Accessed bit is
+cleared as a result of setting or updating a page's Idle flag, the Young flag
+is set on the page. The reclaimer treats the Young flag as an extra PTE
+Accessed bit and therefore will consider such a page as referenced.
+
+Since the idle memory tracking feature is based on the memory reclaimer logic,
+it only works with pages that are on an LRU list, other pages are silently
+ignored. That means it will ignore a user memory page if it is isolated, but
+since there are usually not many of them, it should not affect the overall
+result noticeably. In order not to stall scanning of the idle page bitmap,
+locked pages may be skipped too.
diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst
new file mode 100644
index 0000000..ceead68
--- /dev/null
+++ b/Documentation/admin-guide/mm/index.rst
@@ -0,0 +1,36 @@
+=================
+Memory Management
+=================
+
+Linux memory management subsystem is responsible, as the name implies,
+for managing the memory in the system. This includes implemnetation of
+virtual memory and demand paging, memory allocation both for kernel
+internal structures and user space programms, mapping of files into
+processes address space and many other cool things.
+
+Linux memory management is a complex system with many configurable
+settings. Most of these settings are available via ``/proc``
+filesystem and can be quired and adjusted using ``sysctl``. These APIs
+are described in Documentation/sysctl/vm.txt and in `man 5 proc`_.
+
+.. _man 5 proc: http://man7.org/linux/man-pages/man5/proc.5.html
+
+Linux memory management has its own jargon and if you are not yet
+familiar with it, consider reading
+:ref:`Documentation/admin-guide/mm/concepts.rst <mm_concepts>`.
+
+Here we document in detail how to interact with various mechanisms in
+the Linux memory management.
+
+.. toctree::
+   :maxdepth: 1
+
+   concepts
+   hugetlbpage
+   idle_page_tracking
+   ksm
+   numa_memory_policy
+   pagemap
+   soft-dirty
+   transhuge
+   userfaultfd
diff --git a/Documentation/admin-guide/mm/ksm.rst b/Documentation/admin-guide/mm/ksm.rst
new file mode 100644
index 0000000..9303786
--- /dev/null
+++ b/Documentation/admin-guide/mm/ksm.rst
@@ -0,0 +1,189 @@
+.. _admin_guide_ksm:
+
+=======================
+Kernel Samepage Merging
+=======================
+
+Overview
+========
+
+KSM is a memory-saving de-duplication feature, enabled by CONFIG_KSM=y,
+added to the Linux kernel in 2.6.32.  See ``mm/ksm.c`` for its implementation,
+and http://lwn.net/Articles/306704/ and http://lwn.net/Articles/330589/
+
+KSM was originally developed for use with KVM (where it was known as
+Kernel Shared Memory), to fit more virtual machines into physical memory,
+by sharing the data common between them.  But it can be useful to any
+application which generates many instances of the same data.
+
+The KSM daemon ksmd periodically scans those areas of user memory
+which have been registered with it, looking for pages of identical
+content which can be replaced by a single write-protected page (which
+is automatically copied if a process later wants to update its
+content). The amount of pages that KSM daemon scans in a single pass
+and the time between the passes are configured using :ref:`sysfs
+intraface <ksm_sysfs>`
+
+KSM only merges anonymous (private) pages, never pagecache (file) pages.
+KSM's merged pages were originally locked into kernel memory, but can now
+be swapped out just like other user pages (but sharing is broken when they
+are swapped back in: ksmd must rediscover their identity and merge again).
+
+Controlling KSM with madvise
+============================
+
+KSM only operates on those areas of address space which an application
+has advised to be likely candidates for merging, by using the madvise(2)
+system call::
+
+	int madvise(addr, length, MADV_MERGEABLE)
+
+The app may call
+
+::
+
+	int madvise(addr, length, MADV_UNMERGEABLE)
+
+to cancel that advice and restore unshared pages: whereupon KSM
+unmerges whatever it merged in that range.  Note: this unmerging call
+may suddenly require more memory than is available - possibly failing
+with EAGAIN, but more probably arousing the Out-Of-Memory killer.
+
+If KSM is not configured into the running kernel, madvise MADV_MERGEABLE
+and MADV_UNMERGEABLE simply fail with EINVAL.  If the running kernel was
+built with CONFIG_KSM=y, those calls will normally succeed: even if the
+the KSM daemon is not currently running, MADV_MERGEABLE still registers
+the range for whenever the KSM daemon is started; even if the range
+cannot contain any pages which KSM could actually merge; even if
+MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
+
+If a region of memory must be split into at least one new MADV_MERGEABLE
+or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
+will exceed ``vm.max_map_count`` (see Documentation/sysctl/vm.txt).
+
+Like other madvise calls, they are intended for use on mapped areas of
+the user address space: they will report ENOMEM if the specified range
+includes unmapped gaps (though working on the intervening mapped areas),
+and might fail with EAGAIN if not enough memory for internal structures.
+
+Applications should be considerate in their use of MADV_MERGEABLE,
+restricting its use to areas likely to benefit.  KSM's scans may use a lot
+of processing power: some installations will disable KSM for that reason.
+
+.. _ksm_sysfs:
+
+KSM daemon sysfs interface
+==========================
+
+The KSM daemon is controlled by sysfs files in ``/sys/kernel/mm/ksm/``,
+readable by all but writable only by root:
+
+pages_to_scan
+        how many pages to scan before ksmd goes to sleep
+        e.g. ``echo 100 > /sys/kernel/mm/ksm/pages_to_scan``.
+
+        Default: 100 (chosen for demonstration purposes)
+
+sleep_millisecs
+        how many milliseconds ksmd should sleep before next scan
+        e.g. ``echo 20 > /sys/kernel/mm/ksm/sleep_millisecs``
+
+        Default: 20 (chosen for demonstration purposes)
+
+merge_across_nodes
+        specifies if pages from different NUMA nodes can be merged.
+        When set to 0, ksm merges only pages which physically reside
+        in the memory area of same NUMA node. That brings lower
+        latency to access of shared pages. Systems with more nodes, at
+        significant NUMA distances, are likely to benefit from the
+        lower latency of setting 0. Smaller systems, which need to
+        minimize memory usage, are likely to benefit from the greater
+        sharing of setting 1 (default). You may wish to compare how
+        your system performs under each setting, before deciding on
+        which to use. ``merge_across_nodes`` setting can be changed only
+        when there are no ksm shared pages in the system: set run 2 to
+        unmerge pages first, then to 1 after changing
+        ``merge_across_nodes``, to remerge according to the new setting.
+
+        Default: 1 (merging across nodes as in earlier releases)
+
+run
+        * set to 0 to stop ksmd from running but keep merged pages,
+        * set to 1 to run ksmd e.g. ``echo 1 > /sys/kernel/mm/ksm/run``,
+        * set to 2 to stop ksmd and unmerge all pages currently merged, but
+	  leave mergeable areas registered for next run.
+
+        Default: 0 (must be changed to 1 to activate KSM, except if
+        CONFIG_SYSFS is disabled)
+
+use_zero_pages
+        specifies whether empty pages (i.e. allocated pages that only
+        contain zeroes) should be treated specially.  When set to 1,
+        empty pages are merged with the kernel zero page(s) instead of
+        with each other as it would happen normally. This can improve
+        the performance on architectures with coloured zero pages,
+        depending on the workload. Care should be taken when enabling
+        this setting, as it can potentially degrade the performance of
+        KSM for some workloads, for example if the checksums of pages
+        candidate for merging match the checksum of an empty
+        page. This setting can be changed at any time, it is only
+        effective for pages merged after the change.
+
+        Default: 0 (normal KSM behaviour as in earlier releases)
+
+max_page_sharing
+        Maximum sharing allowed for each KSM page. This enforces a
+        deduplication limit to avoid high latency for virtual memory
+        operations that involve traversal of the virtual mappings that
+        share the KSM page. The minimum value is 2 as a newly created
+        KSM page will have at least two sharers. The higher this value
+        the faster KSM will merge the memory and the higher the
+        deduplication factor will be, but the slower the worst case
+        virtual mappings traversal could be for any given KSM
+        page. Slowing down this traversal means there will be higher
+        latency for certain virtual memory operations happening during
+        swapping, compaction, NUMA balancing and page migration, in
+        turn decreasing responsiveness for the caller of those virtual
+        memory operations. The scheduler latency of other tasks not
+        involved with the VM operations doing the virtual mappings
+        traversal is not affected by this parameter as these
+        traversals are always schedule friendly themselves.
+
+stable_node_chains_prune_millisecs
+        specifies how frequently KSM checks the metadata of the pages
+        that hit the deduplication limit for stale information.
+        Smaller milllisecs values will free up the KSM metadata with
+        lower latency, but they will make ksmd use more CPU during the
+        scan. It's a noop if not a single KSM page hit the
+        ``max_page_sharing`` yet.
+
+The effectiveness of KSM and MADV_MERGEABLE is shown in ``/sys/kernel/mm/ksm/``:
+
+pages_shared
+        how many shared pages are being used
+pages_sharing
+        how many more sites are sharing them i.e. how much saved
+pages_unshared
+        how many pages unique but repeatedly checked for merging
+pages_volatile
+        how many pages changing too fast to be placed in a tree
+full_scans
+        how many times all mergeable areas have been scanned
+stable_node_chains
+        the number of KSM pages that hit the ``max_page_sharing`` limit
+stable_node_dups
+        number of duplicated KSM pages
+
+A high ratio of ``pages_sharing`` to ``pages_shared`` indicates good
+sharing, but a high ratio of ``pages_unshared`` to ``pages_sharing``
+indicates wasted effort.  ``pages_volatile`` embraces several
+different kinds of activity, but a high proportion there would also
+indicate poor use of madvise MADV_MERGEABLE.
+
+The maximum possible ``pages_sharing/pages_shared`` ratio is limited by the
+``max_page_sharing`` tunable. To increase the ratio ``max_page_sharing`` must
+be increased accordingly.
+
+--
+Izik Eidus,
+Hugh Dickins, 17 Nov 2009
diff --git a/Documentation/admin-guide/mm/numa_memory_policy.rst b/Documentation/admin-guide/mm/numa_memory_policy.rst
new file mode 100644
index 0000000..d78c5b3
--- /dev/null
+++ b/Documentation/admin-guide/mm/numa_memory_policy.rst
@@ -0,0 +1,495 @@
+.. _numa_memory_policy:
+
+==================
+NUMA Memory Policy
+==================
+
+What is NUMA Memory Policy?
+============================
+
+In the Linux kernel, "memory policy" determines from which node the kernel will
+allocate memory in a NUMA system or in an emulated NUMA system.  Linux has
+supported platforms with Non-Uniform Memory Access architectures since 2.4.?.
+The current memory policy support was added to Linux 2.6 around May 2004.  This
+document attempts to describe the concepts and APIs of the 2.6 memory policy
+support.
+
+Memory policies should not be confused with cpusets
+(``Documentation/cgroup-v1/cpusets.txt``)
+which is an administrative mechanism for restricting the nodes from which
+memory may be allocated by a set of processes. Memory policies are a
+programming interface that a NUMA-aware application can take advantage of.  When
+both cpusets and policies are applied to a task, the restrictions of the cpuset
+takes priority.  See :ref:`Memory Policies and cpusets <mem_pol_and_cpusets>`
+below for more details.
+
+Memory Policy Concepts
+======================
+
+Scope of Memory Policies
+------------------------
+
+The Linux kernel supports _scopes_ of memory policy, described here from
+most general to most specific:
+
+System Default Policy
+	this policy is "hard coded" into the kernel.  It is the policy
+	that governs all page allocations that aren't controlled by
+	one of the more specific policy scopes discussed below.  When
+	the system is "up and running", the system default policy will
+	use "local allocation" described below.  However, during boot
+	up, the system default policy will be set to interleave
+	allocations across all nodes with "sufficient" memory, so as
+	not to overload the initial boot node with boot-time
+	allocations.
+
+Task/Process Policy
+	this is an optional, per-task policy.  When defined for a
+	specific task, this policy controls all page allocations made
+	by or on behalf of the task that aren't controlled by a more
+	specific scope. If a task does not define a task policy, then
+	all page allocations that would have been controlled by the
+	task policy "fall back" to the System Default Policy.
+
+	The task policy applies to the entire address space of a task. Thus,
+	it is inheritable, and indeed is inherited, across both fork()
+	[clone() w/o the CLONE_VM flag] and exec*().  This allows a parent task
+	to establish the task policy for a child task exec()'d from an
+	executable image that has no awareness of memory policy.  See the
+	:ref:`Memory Policy APIs <memory_policy_apis>` section,
+	below, for an overview of the system call
+	that a task may use to set/change its task/process policy.
+
+	In a multi-threaded task, task policies apply only to the thread
+	[Linux kernel task] that installs the policy and any threads
+	subsequently created by that thread.  Any sibling threads existing
+	at the time a new task policy is installed retain their current
+	policy.
+
+	A task policy applies only to pages allocated after the policy is
+	installed.  Any pages already faulted in by the task when the task
+	changes its task policy remain where they were allocated based on
+	the policy at the time they were allocated.
+
+.. _vma_policy:
+
+VMA Policy
+	A "VMA" or "Virtual Memory Area" refers to a range of a task's
+	virtual address space.  A task may define a specific policy for a range
+	of its virtual address space.   See the
+	:ref:`Memory Policy APIs <memory_policy_apis>` section,
+	below, for an overview of the mbind() system call used to set a VMA
+	policy.
+
+	A VMA policy will govern the allocation of pages that back
+	this region of the address space.  Any regions of the task's
+	address space that don't have an explicit VMA policy will fall
+	back to the task policy, which may itself fall back to the
+	System Default Policy.
+
+	VMA policies have a few complicating details:
+
+	* VMA policy applies ONLY to anonymous pages.  These include
+	  pages allocated for anonymous segments, such as the task
+	  stack and heap, and any regions of the address space
+	  mmap()ed with the MAP_ANONYMOUS flag.  If a VMA policy is
+	  applied to a file mapping, it will be ignored if the mapping
+	  used the MAP_SHARED flag.  If the file mapping used the
+	  MAP_PRIVATE flag, the VMA policy will only be applied when
+	  an anonymous page is allocated on an attempt to write to the
+	  mapping-- i.e., at Copy-On-Write.
+
+	* VMA policies are shared between all tasks that share a
+	  virtual address space--a.k.a. threads--independent of when
+	  the policy is installed; and they are inherited across
+	  fork().  However, because VMA policies refer to a specific
+	  region of a task's address space, and because the address
+	  space is discarded and recreated on exec*(), VMA policies
+	  are NOT inheritable across exec().  Thus, only NUMA-aware
+	  applications may use VMA policies.
+
+	* A task may install a new VMA policy on a sub-range of a
+	  previously mmap()ed region.  When this happens, Linux splits
+	  the existing virtual memory area into 2 or 3 VMAs, each with
+	  it's own policy.
+
+	* By default, VMA policy applies only to pages allocated after
+	  the policy is installed.  Any pages already faulted into the
+	  VMA range remain where they were allocated based on the
+	  policy at the time they were allocated.  However, since
+	  2.6.16, Linux supports page migration via the mbind() system
+	  call, so that page contents can be moved to match a newly
+	  installed policy.
+
+Shared Policy
+	Conceptually, shared policies apply to "memory objects" mapped
+	shared into one or more tasks' distinct address spaces.  An
+	application installs shared policies the same way as VMA
+	policies--using the mbind() system call specifying a range of
+	virtual addresses that map the shared object.  However, unlike
+	VMA policies, which can be considered to be an attribute of a
+	range of a task's address space, shared policies apply
+	directly to the shared object.  Thus, all tasks that attach to
+	the object share the policy, and all pages allocated for the
+	shared object, by any task, will obey the shared policy.
+
+	As of 2.6.22, only shared memory segments, created by shmget() or
+	mmap(MAP_ANONYMOUS|MAP_SHARED), support shared policy.  When shared
+	policy support was added to Linux, the associated data structures were
+	added to hugetlbfs shmem segments.  At the time, hugetlbfs did not
+	support allocation at fault time--a.k.a lazy allocation--so hugetlbfs
+	shmem segments were never "hooked up" to the shared policy support.
+	Although hugetlbfs segments now support lazy allocation, their support
+	for shared policy has not been completed.
+
+	As mentioned above in :ref:`VMA policies <vma_policy>` section,
+	allocations of page cache pages for regular files mmap()ed
+	with MAP_SHARED ignore any VMA policy installed on the virtual
+	address range backed by the shared file mapping.  Rather,
+	shared page cache pages, including pages backing private
+	mappings that have not yet been written by the task, follow
+	task policy, if any, else System Default Policy.
+
+	The shared policy infrastructure supports different policies on subset
+	ranges of the shared object.  However, Linux still splits the VMA of
+	the task that installs the policy for each range of distinct policy.
+	Thus, different tasks that attach to a shared memory segment can have
+	different VMA configurations mapping that one shared object.  This
+	can be seen by examining the /proc/<pid>/numa_maps of tasks sharing
+	a shared memory region, when one task has installed shared policy on
+	one or more ranges of the region.
+
+Components of Memory Policies
+-----------------------------
+
+A NUMA memory policy consists of a "mode", optional mode flags, and
+an optional set of nodes.  The mode determines the behavior of the
+policy, the optional mode flags determine the behavior of the mode,
+and the optional set of nodes can be viewed as the arguments to the
+policy behavior.
+
+Internally, memory policies are implemented by a reference counted
+structure, struct mempolicy.  Details of this structure will be
+discussed in context, below, as required to explain the behavior.
+
+NUMA memory policy supports the following 4 behavioral modes:
+
+Default Mode--MPOL_DEFAULT
+	This mode is only used in the memory policy APIs.  Internally,
+	MPOL_DEFAULT is converted to the NULL memory policy in all
+	policy scopes.  Any existing non-default policy will simply be
+	removed when MPOL_DEFAULT is specified.  As a result,
+	MPOL_DEFAULT means "fall back to the next most specific policy
+	scope."
+
+	For example, a NULL or default task policy will fall back to the
+	system default policy.  A NULL or default vma policy will fall
+	back to the task policy.
+
+	When specified in one of the memory policy APIs, the Default mode
+	does not use the optional set of nodes.
+
+	It is an error for the set of nodes specified for this policy to
+	be non-empty.
+
+MPOL_BIND
+	This mode specifies that memory must come from the set of
+	nodes specified by the policy.  Memory will be allocated from
+	the node in the set with sufficient free memory that is
+	closest to the node where the allocation takes place.
+
+MPOL_PREFERRED
+	This mode specifies that the allocation should be attempted
+	from the single node specified in the policy.  If that
+	allocation fails, the kernel will search other nodes, in order
+	of increasing distance from the preferred node based on
+	information provided by the platform firmware.
+
+	Internally, the Preferred policy uses a single node--the
+	preferred_node member of struct mempolicy.  When the internal
+	mode flag MPOL_F_LOCAL is set, the preferred_node is ignored
+	and the policy is interpreted as local allocation.  "Local"
+	allocation policy can be viewed as a Preferred policy that
+	starts at the node containing the cpu where the allocation
+	takes place.
+
+	It is possible for the user to specify that local allocation
+	is always preferred by passing an empty nodemask with this
+	mode.  If an empty nodemask is passed, the policy cannot use
+	the MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags
+	described below.
+
+MPOL_INTERLEAVED
+	This mode specifies that page allocations be interleaved, on a
+	page granularity, across the nodes specified in the policy.
+	This mode also behaves slightly differently, based on the
+	context where it is used:
+
+	For allocation of anonymous pages and shared memory pages,
+	Interleave mode indexes the set of nodes specified by the
+	policy using the page offset of the faulting address into the
+	segment [VMA] containing the address modulo the number of
+	nodes specified by the policy.  It then attempts to allocate a
+	page, starting at the selected node, as if the node had been
+	specified by a Preferred policy or had been selected by a
+	local allocation.  That is, allocation will follow the per
+	node zonelist.
+
+	For allocation of page cache pages, Interleave mode indexes
+	the set of nodes specified by the policy using a node counter
+	maintained per task.  This counter wraps around to the lowest
+	specified node after it reaches the highest specified node.
+	This will tend to spread the pages out over the nodes
+	specified by the policy based on the order in which they are
+	allocated, rather than based on any page offset into an
+	address range or file.  During system boot up, the temporary
+	interleaved system default policy works in this mode.
+
+NUMA memory policy supports the following optional mode flags:
+
+MPOL_F_STATIC_NODES
+	This flag specifies that the nodemask passed by
+	the user should not be remapped if the task or VMA's set of allowed
+	nodes changes after the memory policy has been defined.
+
+	Without this flag, any time a mempolicy is rebound because of a
+	change in the set of allowed nodes, the node (Preferred) or
+	nodemask (Bind, Interleave) is remapped to the new set of
+	allowed nodes.  This may result in nodes being used that were
+	previously undesired.
+
+	With this flag, if the user-specified nodes overlap with the
+	nodes allowed by the task's cpuset, then the memory policy is
+	applied to their intersection.  If the two sets of nodes do not
+	overlap, the Default policy is used.
+
+	For example, consider a task that is attached to a cpuset with
+	mems 1-3 that sets an Interleave policy over the same set.  If
+	the cpuset's mems change to 3-5, the Interleave will now occur
+	over nodes 3, 4, and 5.  With this flag, however, since only node
+	3 is allowed from the user's nodemask, the "interleave" only
+	occurs over that node.  If no nodes from the user's nodemask are
+	now allowed, the Default behavior is used.
+
+	MPOL_F_STATIC_NODES cannot be combined with the
+	MPOL_F_RELATIVE_NODES flag.  It also cannot be used for
+	MPOL_PREFERRED policies that were created with an empty nodemask
+	(local allocation).
+
+MPOL_F_RELATIVE_NODES
+	This flag specifies that the nodemask passed
+	by the user will be mapped relative to the set of the task or VMA's
+	set of allowed nodes.  The kernel stores the user-passed nodemask,
+	and if the allowed nodes changes, then that original nodemask will
+	be remapped relative to the new set of allowed nodes.
+
+	Without this flag (and without MPOL_F_STATIC_NODES), anytime a
+	mempolicy is rebound because of a change in the set of allowed
+	nodes, the node (Preferred) or nodemask (Bind, Interleave) is
+	remapped to the new set of allowed nodes.  That remap may not
+	preserve the relative nature of the user's passed nodemask to its
+	set of allowed nodes upon successive rebinds: a nodemask of
+	1,3,5 may be remapped to 7-9 and then to 1-3 if the set of
+	allowed nodes is restored to its original state.
+
+	With this flag, the remap is done so that the node numbers from
+	the user's passed nodemask are relative to the set of allowed
+	nodes.  In other words, if nodes 0, 2, and 4 are set in the user's
+	nodemask, the policy will be effected over the first (and in the
+	Bind or Interleave case, the third and fifth) nodes in the set of
+	allowed nodes.  The nodemask passed by the user represents nodes
+	relative to task or VMA's set of allowed nodes.
+
+	If the user's nodemask includes nodes that are outside the range
+	of the new set of allowed nodes (for example, node 5 is set in
+	the user's nodemask when the set of allowed nodes is only 0-3),
+	then the remap wraps around to the beginning of the nodemask and,
+	if not already set, sets the node in the mempolicy nodemask.
+
+	For example, consider a task that is attached to a cpuset with
+	mems 2-5 that sets an Interleave policy over the same set with
+	MPOL_F_RELATIVE_NODES.  If the cpuset's mems change to 3-7, the
+	interleave now occurs over nodes 3,5-7.  If the cpuset's mems
+	then change to 0,2-3,5, then the interleave occurs over nodes
+	0,2-3,5.
+
+	Thanks to the consistent remapping, applications preparing
+	nodemasks to specify memory policies using this flag should
+	disregard their current, actual cpuset imposed memory placement
+	and prepare the nodemask as if they were always located on
+	memory nodes 0 to N-1, where N is the number of memory nodes the
+	policy is intended to manage.  Let the kernel then remap to the
+	set of memory nodes allowed by the task's cpuset, as that may
+	change over time.
+
+	MPOL_F_RELATIVE_NODES cannot be combined with the
+	MPOL_F_STATIC_NODES flag.  It also cannot be used for
+	MPOL_PREFERRED policies that were created with an empty nodemask
+	(local allocation).
+
+Memory Policy Reference Counting
+================================
+
+To resolve use/free races, struct mempolicy contains an atomic reference
+count field.  Internal interfaces, mpol_get()/mpol_put() increment and
+decrement this reference count, respectively.  mpol_put() will only free
+the structure back to the mempolicy kmem cache when the reference count
+goes to zero.
+
+When a new memory policy is allocated, its reference count is initialized
+to '1', representing the reference held by the task that is installing the
+new policy.  When a pointer to a memory policy structure is stored in another
+structure, another reference is added, as the task's reference will be dropped
+on completion of the policy installation.
+
+During run-time "usage" of the policy, we attempt to minimize atomic operations
+on the reference count, as this can lead to cache lines bouncing between cpus
+and NUMA nodes.  "Usage" here means one of the following:
+
+1) querying of the policy, either by the task itself [using the get_mempolicy()
+   API discussed below] or by another task using the /proc/<pid>/numa_maps
+   interface.
+
+2) examination of the policy to determine the policy mode and associated node
+   or node lists, if any, for page allocation.  This is considered a "hot
+   path".  Note that for MPOL_BIND, the "usage" extends across the entire
+   allocation process, which may sleep during page reclaimation, because the
+   BIND policy nodemask is used, by reference, to filter ineligible nodes.
+
+We can avoid taking an extra reference during the usages listed above as
+follows:
+
+1) we never need to get/free the system default policy as this is never
+   changed nor freed, once the system is up and running.
+
+2) for querying the policy, we do not need to take an extra reference on the
+   target task's task policy nor vma policies because we always acquire the
+   task's mm's mmap_sem for read during the query.  The set_mempolicy() and
+   mbind() APIs [see below] always acquire the mmap_sem for write when
+   installing or replacing task or vma policies.  Thus, there is no possibility
+   of a task or thread freeing a policy while another task or thread is
+   querying it.
+
+3) Page allocation usage of task or vma policy occurs in the fault path where
+   we hold them mmap_sem for read.  Again, because replacing the task or vma
+   policy requires that the mmap_sem be held for write, the policy can't be
+   freed out from under us while we're using it for page allocation.
+
+4) Shared policies require special consideration.  One task can replace a
+   shared memory policy while another task, with a distinct mmap_sem, is
+   querying or allocating a page based on the policy.  To resolve this
+   potential race, the shared policy infrastructure adds an extra reference
+   to the shared policy during lookup while holding a spin lock on the shared
+   policy management structure.  This requires that we drop this extra
+   reference when we're finished "using" the policy.  We must drop the
+   extra reference on shared policies in the same query/allocation paths
+   used for non-shared policies.  For this reason, shared policies are marked
+   as such, and the extra reference is dropped "conditionally"--i.e., only
+   for shared policies.
+
+   Because of this extra reference counting, and because we must lookup
+   shared policies in a tree structure under spinlock, shared policies are
+   more expensive to use in the page allocation path.  This is especially
+   true for shared policies on shared memory regions shared by tasks running
+   on different NUMA nodes.  This extra overhead can be avoided by always
+   falling back to task or system default policy for shared memory regions,
+   or by prefaulting the entire shared memory region into memory and locking
+   it down.  However, this might not be appropriate for all applications.
+
+.. _memory_policy_apis:
+
+Memory Policy APIs
+==================
+
+Linux supports 3 system calls for controlling memory policy.  These APIS
+always affect only the calling task, the calling task's address space, or
+some shared object mapped into the calling task's address space.
+
+.. note::
+   the headers that define these APIs and the parameter data types for
+   user space applications reside in a package that is not part of the
+   Linux kernel.  The kernel system call interfaces, with the 'sys\_'
+   prefix, are defined in <linux/syscalls.h>; the mode and flag
+   definitions are defined in <linux/mempolicy.h>.
+
+Set [Task] Memory Policy::
+
+	long set_mempolicy(int mode, const unsigned long *nmask,
+					unsigned long maxnode);
+
+Set's the calling task's "task/process memory policy" to mode
+specified by the 'mode' argument and the set of nodes defined by
+'nmask'.  'nmask' points to a bit mask of node ids containing at least
+'maxnode' ids.  Optional mode flags may be passed by combining the
+'mode' argument with the flag (for example: MPOL_INTERLEAVE |
+MPOL_F_STATIC_NODES).
+
+See the set_mempolicy(2) man page for more details
+
+
+Get [Task] Memory Policy or Related Information::
+
+	long get_mempolicy(int *mode,
+			   const unsigned long *nmask, unsigned long maxnode,
+			   void *addr, int flags);
+
+Queries the "task/process memory policy" of the calling task, or the
+policy or location of a specified virtual address, depending on the
+'flags' argument.
+
+See the get_mempolicy(2) man page for more details
+
+
+Install VMA/Shared Policy for a Range of Task's Address Space::
+
+	long mbind(void *start, unsigned long len, int mode,
+		   const unsigned long *nmask, unsigned long maxnode,
+		   unsigned flags);
+
+mbind() installs the policy specified by (mode, nmask, maxnodes) as a
+VMA policy for the range of the calling task's address space specified
+by the 'start' and 'len' arguments.  Additional actions may be
+requested via the 'flags' argument.
+
+See the mbind(2) man page for more details.
+
+Memory Policy Command Line Interface
+====================================
+
+Although not strictly part of the Linux implementation of memory policy,
+a command line tool, numactl(8), exists that allows one to:
+
++ set the task policy for a specified program via set_mempolicy(2), fork(2) and
+  exec(2)
+
++ set the shared policy for a shared memory segment via mbind(2)
+
+The numactl(8) tool is packaged with the run-time version of the library
+containing the memory policy system call wrappers.  Some distributions
+package the headers and compile-time libraries in a separate development
+package.
+
+.. _mem_pol_and_cpusets:
+
+Memory Policies and cpusets
+===========================
+
+Memory policies work within cpusets as described above.  For memory policies
+that require a node or set of nodes, the nodes are restricted to the set of
+nodes whose memories are allowed by the cpuset constraints.  If the nodemask
+specified for the policy contains nodes that are not allowed by the cpuset and
+MPOL_F_RELATIVE_NODES is not used, the intersection of the set of nodes
+specified for the policy and the set of nodes with memory is used.  If the
+result is the empty set, the policy is considered invalid and cannot be
+installed.  If MPOL_F_RELATIVE_NODES is used, the policy's nodes are mapped
+onto and folded into the task's set of allowed nodes as previously described.
+
+The interaction of memory policies and cpusets can be problematic when tasks
+in two cpusets share access to a memory region, such as shared memory segments
+created by shmget() of mmap() with the MAP_ANONYMOUS and MAP_SHARED flags, and
+any of the tasks install shared policy on the region, only nodes whose
+memories are allowed in both cpusets may be used in the policies.  Obtaining
+this information requires "stepping outside" the memory policy APIs to use the
+cpuset information and requires that one know in what cpusets other task might
+be attaching to the shared region.  Furthermore, if the cpusets' allowed
+memory sets are disjoint, "local" allocation is the only valid policy.
diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
new file mode 100644
index 0000000..577af85
--- /dev/null
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -0,0 +1,201 @@
+.. _pagemap:
+
+=============================
+Examining Process Page Tables
+=============================
+
+pagemap is a new (as of 2.6.25) set of interfaces in the kernel that allow
+userspace programs to examine the page tables and related information by
+reading files in ``/proc``.
+
+There are four components to pagemap:
+
+ * ``/proc/pid/pagemap``.  This file lets a userspace process find out which
+   physical frame each virtual page is mapped to.  It contains one 64-bit
+   value for each virtual page, containing the following data (from
+   ``fs/proc/task_mmu.c``, above pagemap_read):
+
+    * Bits 0-54  page frame number (PFN) if present
+    * Bits 0-4   swap type if swapped
+    * Bits 5-54  swap offset if swapped
+    * Bit  55    pte is soft-dirty (see
+      :ref:`Documentation/admin-guide/mm/soft-dirty.rst <soft_dirty>`)
+    * Bit  56    page exclusively mapped (since 4.2)
+    * Bits 57-60 zero
+    * Bit  61    page is file-page or shared-anon (since 3.5)
+    * Bit  62    page swapped
+    * Bit  63    page present
+
+   Since Linux 4.0 only users with the CAP_SYS_ADMIN capability can get PFNs.
+   In 4.0 and 4.1 opens by unprivileged fail with -EPERM.  Starting from
+   4.2 the PFN field is zeroed if the user does not have CAP_SYS_ADMIN.
+   Reason: information about PFNs helps in exploiting Rowhammer vulnerability.
+
+   If the page is not present but in swap, then the PFN contains an
+   encoding of the swap file number and the page's offset into the
+   swap. Unmapped pages return a null PFN. This allows determining
+   precisely which pages are mapped (or in swap) and comparing mapped
+   pages between processes.
+
+   Efficient users of this interface will use ``/proc/pid/maps`` to
+   determine which areas of memory are actually mapped and llseek to
+   skip over unmapped regions.
+
+ * ``/proc/kpagecount``.  This file contains a 64-bit count of the number of
+   times each page is mapped, indexed by PFN.
+
+ * ``/proc/kpageflags``.  This file contains a 64-bit set of flags for each
+   page, indexed by PFN.
+
+   The flags are (from ``fs/proc/page.c``, above kpageflags_read):
+
+    0. LOCKED
+    1. ERROR
+    2. REFERENCED
+    3. UPTODATE
+    4. DIRTY
+    5. LRU
+    6. ACTIVE
+    7. SLAB
+    8. WRITEBACK
+    9. RECLAIM
+    10. BUDDY
+    11. MMAP
+    12. ANON
+    13. SWAPCACHE
+    14. SWAPBACKED
+    15. COMPOUND_HEAD
+    16. COMPOUND_TAIL
+    17. HUGE
+    18. UNEVICTABLE
+    19. HWPOISON
+    20. NOPAGE
+    21. KSM
+    22. THP
+    23. BALLOON
+    24. ZERO_PAGE
+    25. IDLE
+
+ * ``/proc/kpagecgroup``.  This file contains a 64-bit inode number of the
+   memory cgroup each page is charged to, indexed by PFN. Only available when
+   CONFIG_MEMCG is set.
+
+Short descriptions to the page flags
+====================================
+
+0 - LOCKED
+   page is being locked for exclusive access, e.g. by undergoing read/write IO
+7 - SLAB
+   page is managed by the SLAB/SLOB/SLUB/SLQB kernel memory allocator
+   When compound page is used, SLUB/SLQB will only set this flag on the head
+   page; SLOB will not flag it at all.
+10 - BUDDY
+    a free memory block managed by the buddy system allocator
+    The buddy system organizes free memory in blocks of various orders.
+    An order N block has 2^N physically contiguous pages, with the BUDDY flag
+    set for and _only_ for the first page.
+15 - COMPOUND_HEAD
+    A compound page with order N consists of 2^N physically contiguous pages.
+    A compound page with order 2 takes the form of "HTTT", where H donates its
+    head page and T donates its tail page(s).  The major consumers of compound
+    pages are hugeTLB pages
+    (:ref:`Documentation/admin-guide/mm/hugetlbpage.rst <hugetlbpage>`),
+    the SLUB etc.  memory allocators and various device drivers.
+    However in this interface, only huge/giga pages are made visible
+    to end users.
+16 - COMPOUND_TAIL
+    A compound page tail (see description above).
+17 - HUGE
+    this is an integral part of a HugeTLB page
+19 - HWPOISON
+    hardware detected memory corruption on this page: don't touch the data!
+20 - NOPAGE
+    no page frame exists at the requested address
+21 - KSM
+    identical memory pages dynamically shared between one or more processes
+22 - THP
+    contiguous pages which construct transparent hugepages
+23 - BALLOON
+    balloon compaction page
+24 - ZERO_PAGE
+    zero page for pfn_zero or huge_zero page
+25 - IDLE
+    page has not been accessed since it was marked idle (see
+    :ref:`Documentation/admin-guide/mm/idle_page_tracking.rst <idle_page_tracking>`).
+    Note that this flag may be stale in case the page was accessed via
+    a PTE. To make sure the flag is up-to-date one has to read
+    ``/sys/kernel/mm/page_idle/bitmap`` first.
+
+IO related page flags
+---------------------
+
+1 - ERROR
+   IO error occurred
+3 - UPTODATE
+   page has up-to-date data
+   ie. for file backed page: (in-memory data revision >= on-disk one)
+4 - DIRTY
+   page has been written to, hence contains new data
+   i.e. for file backed page: (in-memory data revision >  on-disk one)
+8 - WRITEBACK
+   page is being synced to disk
+
+LRU related page flags
+----------------------
+
+5 - LRU
+   page is in one of the LRU lists
+6 - ACTIVE
+   page is in the active LRU list
+18 - UNEVICTABLE
+   page is in the unevictable (non-)LRU list It is somehow pinned and
+   not a candidate for LRU page reclaims, e.g. ramfs pages,
+   shmctl(SHM_LOCK) and mlock() memory segments
+2 - REFERENCED
+   page has been referenced since last LRU list enqueue/requeue
+9 - RECLAIM
+   page will be reclaimed soon after its pageout IO completed
+11 - MMAP
+   a memory mapped page
+12 - ANON
+   a memory mapped page that is not part of a file
+13 - SWAPCACHE
+   page is mapped to swap space, i.e. has an associated swap entry
+14 - SWAPBACKED
+   page is backed by swap/RAM
+
+The page-types tool in the tools/vm directory can be used to query the
+above flags.
+
+Using pagemap to do something useful
+====================================
+
+The general procedure for using pagemap to find out about a process' memory
+usage goes like this:
+
+ 1. Read ``/proc/pid/maps`` to determine which parts of the memory space are
+    mapped to what.
+ 2. Select the maps you are interested in -- all of them, or a particular
+    library, or the stack or the heap, etc.
+ 3. Open ``/proc/pid/pagemap`` and seek to the pages you would like to examine.
+ 4. Read a u64 for each page from pagemap.
+ 5. Open ``/proc/kpagecount`` and/or ``/proc/kpageflags``.  For each PFN you
+    just read, seek to that entry in the file, and read the data you want.
+
+For example, to find the "unique set size" (USS), which is the amount of
+memory that a process is using that is not shared with any other process,
+you can go through every map in the process, find the PFNs, look those up
+in kpagecount, and tally up the number of pages that are only referenced
+once.
+
+Other notes
+===========
+
+Reading from any of the files will return -EINVAL if you are not starting
+the read on an 8-byte boundary (e.g., if you sought an odd number of bytes
+into the file), or if the size of the read is not a multiple of 8 bytes.
+
+Before Linux 3.11 pagemap bits 55-60 were used for "page-shift" (which is
+always 12 at most architectures). Since Linux 3.11 their meaning changes
+after first clear of soft-dirty bits. Since Linux 4.2 they are used for
+flags unconditionally.
diff --git a/Documentation/admin-guide/mm/soft-dirty.rst b/Documentation/admin-guide/mm/soft-dirty.rst
new file mode 100644
index 0000000..cb0cfd6
--- /dev/null
+++ b/Documentation/admin-guide/mm/soft-dirty.rst
@@ -0,0 +1,47 @@
+.. _soft_dirty:
+
+===============
+Soft-Dirty PTEs
+===============
+
+The soft-dirty is a bit on a PTE which helps to track which pages a task
+writes to. In order to do this tracking one should
+
+  1. Clear soft-dirty bits from the task's PTEs.
+
+     This is done by writing "4" into the ``/proc/PID/clear_refs`` file of the
+     task in question.
+
+  2. Wait some time.
+
+  3. Read soft-dirty bits from the PTEs.
+
+     This is done by reading from the ``/proc/PID/pagemap``. The bit 55 of the
+     64-bit qword is the soft-dirty one. If set, the respective PTE was
+     written to since step 1.
+
+
+Internally, to do this tracking, the writable bit is cleared from PTEs
+when the soft-dirty bit is cleared. So, after this, when the task tries to
+modify a page at some virtual address the #PF occurs and the kernel sets
+the soft-dirty bit on the respective PTE.
+
+Note, that although all the task's address space is marked as r/o after the
+soft-dirty bits clear, the #PF-s that occur after that are processed fast.
+This is so, since the pages are still mapped to physical memory, and thus all
+the kernel does is finds this fact out and puts both writable and soft-dirty
+bits on the PTE.
+
+While in most cases tracking memory changes by #PF-s is more than enough
+there is still a scenario when we can lose soft dirty bits -- a task
+unmaps a previously mapped memory region and then maps a new one at exactly
+the same place. When unmap is called, the kernel internally clears PTE values
+including soft dirty bits. To notify user space application about such
+memory region renewal the kernel always marks new memory regions (and
+expanded regions) as soft dirty.
+
+This feature is actively used by the checkpoint-restore project. You
+can find more details about it on http://criu.org
+
+
+-- Pavel Emelyanov, Apr 9, 2013
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
new file mode 100644
index 0000000..7ab93a8
--- /dev/null
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -0,0 +1,418 @@
+.. _admin_guide_transhuge:
+
+============================
+Transparent Hugepage Support
+============================
+
+Objective
+=========
+
+Performance critical computing applications dealing with large memory
+working sets are already running on top of libhugetlbfs and in turn
+hugetlbfs. Transparent HugePage Support (THP) is an alternative mean of
+using huge pages for the backing of virtual memory with huge pages
+that supports the automatic promotion and demotion of page sizes and
+without the shortcomings of hugetlbfs.
+
+Currently THP only works for anonymous memory mappings and tmpfs/shmem.
+But in the future it can expand to other filesystems.
+
+.. note::
+   in the examples below we presume that the basic page size is 4K and
+   the huge page size is 2M, although the actual numbers may vary
+   depending on the CPU architecture.
+
+The reason applications are running faster is because of two
+factors. The first factor is almost completely irrelevant and it's not
+of significant interest because it'll also have the downside of
+requiring larger clear-page copy-page in page faults which is a
+potentially negative effect. The first factor consists in taking a
+single page fault for each 2M virtual region touched by userland (so
+reducing the enter/exit kernel frequency by a 512 times factor). This
+only matters the first time the memory is accessed for the lifetime of
+a memory mapping. The second long lasting and much more important
+factor will affect all subsequent accesses to the memory for the whole
+runtime of the application. The second factor consist of two
+components:
+
+1) the TLB miss will run faster (especially with virtualization using
+   nested pagetables but almost always also on bare metal without
+   virtualization)
+
+2) a single TLB entry will be mapping a much larger amount of virtual
+   memory in turn reducing the number of TLB misses. With
+   virtualization and nested pagetables the TLB can be mapped of
+   larger size only if both KVM and the Linux guest are using
+   hugepages but a significant speedup already happens if only one of
+   the two is using hugepages just because of the fact the TLB miss is
+   going to run faster.
+
+THP can be enabled system wide or restricted to certain tasks or even
+memory ranges inside task's address space. Unless THP is completely
+disabled, there is ``khugepaged`` daemon that scans memory and
+collapses sequences of basic pages into huge pages.
+
+The THP behaviour is controlled via :ref:`sysfs <thp_sysfs>`
+interface and using madivse(2) and prctl(2) system calls.
+
+Transparent Hugepage Support maximizes the usefulness of free memory
+if compared to the reservation approach of hugetlbfs by allowing all
+unused memory to be used as cache or other movable (or even unmovable
+entities). It doesn't require reservation to prevent hugepage
+allocation failures to be noticeable from userland. It allows paging
+and all other advanced VM features to be available on the
+hugepages. It requires no modifications for applications to take
+advantage of it.
+
+Applications however can be further optimized to take advantage of
+this feature, like for example they've been optimized before to avoid
+a flood of mmap system calls for every malloc(4k). Optimizing userland
+is by far not mandatory and khugepaged already can take care of long
+lived page allocations even for hugepage unaware applications that
+deals with large amounts of memory.
+
+In certain cases when hugepages are enabled system wide, application
+may end up allocating more memory resources. An application may mmap a
+large region but only touch 1 byte of it, in that case a 2M page might
+be allocated instead of a 4k page for no good. This is why it's
+possible to disable hugepages system-wide and to only have them inside
+MADV_HUGEPAGE madvise regions.
+
+Embedded systems should enable hugepages only inside madvise regions
+to eliminate any risk of wasting any precious byte of memory and to
+only run faster.
+
+Applications that gets a lot of benefit from hugepages and that don't
+risk to lose memory by using hugepages, should use
+madvise(MADV_HUGEPAGE) on their critical mmapped regions.
+
+.. _thp_sysfs:
+
+sysfs
+=====
+
+Global THP controls
+-------------------
+
+Transparent Hugepage Support for anonymous memory can be entirely disabled
+(mostly for debugging purposes) or only enabled inside MADV_HUGEPAGE
+regions (to avoid the risk of consuming more memory resources) or enabled
+system wide. This can be achieved with one of::
+
+	echo always >/sys/kernel/mm/transparent_hugepage/enabled
+	echo madvise >/sys/kernel/mm/transparent_hugepage/enabled
+	echo never >/sys/kernel/mm/transparent_hugepage/enabled
+
+It's also possible to limit defrag efforts in the VM to generate
+anonymous hugepages in case they're not immediately free to madvise
+regions or to never try to defrag memory and simply fallback to regular
+pages unless hugepages are immediately available. Clearly if we spend CPU
+time to defrag memory, we would expect to gain even more by the fact we
+use hugepages later instead of regular pages. This isn't always
+guaranteed, but it may be more likely in case the allocation is for a
+MADV_HUGEPAGE region.
+
+::
+
+	echo always >/sys/kernel/mm/transparent_hugepage/defrag
+	echo defer >/sys/kernel/mm/transparent_hugepage/defrag
+	echo defer+madvise >/sys/kernel/mm/transparent_hugepage/defrag
+	echo madvise >/sys/kernel/mm/transparent_hugepage/defrag
+	echo never >/sys/kernel/mm/transparent_hugepage/defrag
+
+always
+	means that an application requesting THP will stall on
+	allocation failure and directly reclaim pages and compact
+	memory in an effort to allocate a THP immediately. This may be
+	desirable for virtual machines that benefit heavily from THP
+	use and are willing to delay the VM start to utilise them.
+
+defer
+	means that an application will wake kswapd in the background
+	to reclaim pages and wake kcompactd to compact memory so that
+	THP is available in the near future. It's the responsibility
+	of khugepaged to then install the THP pages later.
+
+defer+madvise
+	will enter direct reclaim and compaction like ``always``, but
+	only for regions that have used madvise(MADV_HUGEPAGE); all
+	other regions will wake kswapd in the background to reclaim
+	pages and wake kcompactd to compact memory so that THP is
+	available in the near future.
+
+madvise
+	will enter direct reclaim like ``always`` but only for regions
+	that are have used madvise(MADV_HUGEPAGE). This is the default
+	behaviour.
+
+never
+	should be self-explanatory.
+
+By default kernel tries to use huge zero page on read page fault to
+anonymous mapping. It's possible to disable huge zero page by writing 0
+or enable it back by writing 1::
+
+	echo 0 >/sys/kernel/mm/transparent_hugepage/use_zero_page
+	echo 1 >/sys/kernel/mm/transparent_hugepage/use_zero_page
+
+Some userspace (such as a test program, or an optimized memory allocation
+library) may want to know the size (in bytes) of a transparent hugepage::
+
+	cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
+
+khugepaged will be automatically started when
+transparent_hugepage/enabled is set to "always" or "madvise, and it'll
+be automatically shutdown if it's set to "never".
+
+Khugepaged controls
+-------------------
+
+khugepaged runs usually at low frequency so while one may not want to
+invoke defrag algorithms synchronously during the page faults, it
+should be worth invoking defrag at least in khugepaged. However it's
+also possible to disable defrag in khugepaged by writing 0 or enable
+defrag in khugepaged by writing 1::
+
+	echo 0 >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag
+	echo 1 >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag
+
+You can also control how many pages khugepaged should scan at each
+pass::
+
+	/sys/kernel/mm/transparent_hugepage/khugepaged/pages_to_scan
+
+and how many milliseconds to wait in khugepaged between each pass (you
+can set this to 0 to run khugepaged at 100% utilization of one core)::
+
+	/sys/kernel/mm/transparent_hugepage/khugepaged/scan_sleep_millisecs
+
+and how many milliseconds to wait in khugepaged if there's an hugepage
+allocation failure to throttle the next allocation attempt::
+
+	/sys/kernel/mm/transparent_hugepage/khugepaged/alloc_sleep_millisecs
+
+The khugepaged progress can be seen in the number of pages collapsed::
+
+	/sys/kernel/mm/transparent_hugepage/khugepaged/pages_collapsed
+
+for each pass::
+
+	/sys/kernel/mm/transparent_hugepage/khugepaged/full_scans
+
+``max_ptes_none`` specifies how many extra small pages (that are
+not already mapped) can be allocated when collapsing a group
+of small pages into one large page::
+
+	/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none
+
+A higher value leads to use additional memory for programs.
+A lower value leads to gain less thp performance. Value of
+max_ptes_none can waste cpu time very little, you can
+ignore it.
+
+``max_ptes_swap`` specifies how many pages can be brought in from
+swap when collapsing a group of pages into a transparent huge page::
+
+	/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_swap
+
+A higher value can cause excessive swap IO and waste
+memory. A lower value can prevent THPs from being
+collapsed, resulting fewer pages being collapsed into
+THPs, and lower memory access performance.
+
+Boot parameter
+==============
+
+You can change the sysfs boot time defaults of Transparent Hugepage
+Support by passing the parameter ``transparent_hugepage=always`` or
+``transparent_hugepage=madvise`` or ``transparent_hugepage=never``
+to the kernel command line.
+
+Hugepages in tmpfs/shmem
+========================
+
+You can control hugepage allocation policy in tmpfs with mount option
+``huge=``. It can have following values:
+
+always
+    Attempt to allocate huge pages every time we need a new page;
+
+never
+    Do not allocate huge pages;
+
+within_size
+    Only allocate huge page if it will be fully within i_size.
+    Also respect fadvise()/madvise() hints;
+
+advise
+    Only allocate huge pages if requested with fadvise()/madvise();
+
+The default policy is ``never``.
+
+``mount -o remount,huge= /mountpoint`` works fine after mount: remounting
+``huge=never`` will not attempt to break up huge pages at all, just stop more
+from being allocated.
+
+There's also sysfs knob to control hugepage allocation policy for internal
+shmem mount: /sys/kernel/mm/transparent_hugepage/shmem_enabled. The mount
+is used for SysV SHM, memfds, shared anonymous mmaps (of /dev/zero or
+MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
+
+In addition to policies listed above, shmem_enabled allows two further
+values:
+
+deny
+    For use in emergencies, to force the huge option off from
+    all mounts;
+force
+    Force the huge option on for all - very useful for testing;
+
+Need of application restart
+===========================
+
+The transparent_hugepage/enabled values and tmpfs mount option only affect
+future behavior. So to make them effective you need to restart any
+application that could have been using hugepages. This also applies to the
+regions registered in khugepaged.
+
+Monitoring usage
+================
+
+The number of anonymous transparent huge pages currently used by the
+system is available by reading the AnonHugePages field in ``/proc/meminfo``.
+To identify what applications are using anonymous transparent huge pages,
+it is necessary to read ``/proc/PID/smaps`` and count the AnonHugePages fields
+for each mapping.
+
+The number of file transparent huge pages mapped to userspace is available
+by reading ShmemPmdMapped and ShmemHugePages fields in ``/proc/meminfo``.
+To identify what applications are mapping file transparent huge pages, it
+is necessary to read ``/proc/PID/smaps`` and count the FileHugeMapped fields
+for each mapping.
+
+Note that reading the smaps file is expensive and reading it
+frequently will incur overhead.
+
+There are a number of counters in ``/proc/vmstat`` that may be used to
+monitor how successfully the system is providing huge pages for use.
+
+thp_fault_alloc
+	is incremented every time a huge page is successfully
+	allocated to handle a page fault. This applies to both the
+	first time a page is faulted and for COW faults.
+
+thp_collapse_alloc
+	is incremented by khugepaged when it has found
+	a range of pages to collapse into one huge page and has
+	successfully allocated a new huge page to store the data.
+
+thp_fault_fallback
+	is incremented if a page fault fails to allocate
+	a huge page and instead falls back to using small pages.
+
+thp_collapse_alloc_failed
+	is incremented if khugepaged found a range
+	of pages that should be collapsed into one huge page but failed
+	the allocation.
+
+thp_file_alloc
+	is incremented every time a file huge page is successfully
+	allocated.
+
+thp_file_mapped
+	is incremented every time a file huge page is mapped into
+	user address space.
+
+thp_split_page
+	is incremented every time a huge page is split into base
+	pages. This can happen for a variety of reasons but a common
+	reason is that a huge page is old and is being reclaimed.
+	This action implies splitting all PMD the page mapped with.
+
+thp_split_page_failed
+	is incremented if kernel fails to split huge
+	page. This can happen if the page was pinned by somebody.
+
+thp_deferred_split_page
+	is incremented when a huge page is put onto split
+	queue. This happens when a huge page is partially unmapped and
+	splitting it would free up some memory. Pages on split queue are
+	going to be split under memory pressure.
+
+thp_split_pmd
+	is incremented every time a PMD split into table of PTEs.
+	This can happen, for instance, when application calls mprotect() or
+	munmap() on part of huge page. It doesn't split huge page, only
+	page table entry.
+
+thp_zero_page_alloc
+	is incremented every time a huge zero page is
+	successfully allocated. It includes allocations which where
+	dropped due race with other allocation. Note, it doesn't count
+	every map of the huge zero page, only its allocation.
+
+thp_zero_page_alloc_failed
+	is incremented if kernel fails to allocate
+	huge zero page and falls back to using small pages.
+
+thp_swpout
+	is incremented every time a huge page is swapout in one
+	piece without splitting.
+
+thp_swpout_fallback
+	is incremented if a huge page has to be split before swapout.
+	Usually because failed to allocate some continuous swap space
+	for the huge page.
+
+As the system ages, allocating huge pages may be expensive as the
+system uses memory compaction to copy data around memory to free a
+huge page for use. There are some counters in ``/proc/vmstat`` to help
+monitor this overhead.
+
+compact_stall
+	is incremented every time a process stalls to run
+	memory compaction so that a huge page is free for use.
+
+compact_success
+	is incremented if the system compacted memory and
+	freed a huge page for use.
+
+compact_fail
+	is incremented if the system tries to compact memory
+	but failed.
+
+compact_pages_moved
+	is incremented each time a page is moved. If
+	this value is increasing rapidly, it implies that the system
+	is copying a lot of data to satisfy the huge page allocation.
+	It is possible that the cost of copying exceeds any savings
+	from reduced TLB misses.
+
+compact_pagemigrate_failed
+	is incremented when the underlying mechanism
+	for moving a page failed.
+
+compact_blocks_moved
+	is incremented each time memory compaction examines
+	a huge page aligned range of pages.
+
+It is possible to establish how long the stalls were using the function
+tracer to record how long was spent in __alloc_pages_nodemask and
+using the mm_page_alloc tracepoint to identify which allocations were
+for huge pages.
+
+Optimizing the applications
+===========================
+
+To be guaranteed that the kernel will map a 2M page immediately in any
+memory region, the mmap region has to be hugepage naturally
+aligned. posix_memalign() can provide that guarantee.
+
+Hugetlbfs
+=========
+
+You can use hugetlbfs on a kernel that has transparent hugepage
+support enabled just fine as always. No difference can be noted in
+hugetlbfs other than there will be less overall fragmentation. All
+usual features belonging to hugetlbfs are preserved and
+unaffected. libhugetlbfs will also work fine as usual.
diff --git a/Documentation/admin-guide/mm/userfaultfd.rst b/Documentation/admin-guide/mm/userfaultfd.rst
new file mode 100644
index 0000000..5048cf6
--- /dev/null
+++ b/Documentation/admin-guide/mm/userfaultfd.rst
@@ -0,0 +1,241 @@
+.. _userfaultfd:
+
+===========
+Userfaultfd
+===========
+
+Objective
+=========
+
+Userfaults allow the implementation of on-demand paging from userland
+and more generally they allow userland to take control of various
+memory page faults, something otherwise only the kernel code could do.
+
+For example userfaults allows a proper and more optimal implementation
+of the PROT_NONE+SIGSEGV trick.
+
+Design
+======
+
+Userfaults are delivered and resolved through the userfaultfd syscall.
+
+The userfaultfd (aside from registering and unregistering virtual
+memory ranges) provides two primary functionalities:
+
+1) read/POLLIN protocol to notify a userland thread of the faults
+   happening
+
+2) various UFFDIO_* ioctls that can manage the virtual memory regions
+   registered in the userfaultfd that allows userland to efficiently
+   resolve the userfaults it receives via 1) or to manage the virtual
+   memory in the background
+
+The real advantage of userfaults if compared to regular virtual memory
+management of mremap/mprotect is that the userfaults in all their
+operations never involve heavyweight structures like vmas (in fact the
+userfaultfd runtime load never takes the mmap_sem for writing).
+
+Vmas are not suitable for page- (or hugepage) granular fault tracking
+when dealing with virtual address spaces that could span
+Terabytes. Too many vmas would be needed for that.
+
+The userfaultfd once opened by invoking the syscall, can also be
+passed using unix domain sockets to a manager process, so the same
+manager process could handle the userfaults of a multitude of
+different processes without them being aware about what is going on
+(well of course unless they later try to use the userfaultfd
+themselves on the same region the manager is already tracking, which
+is a corner case that would currently return -EBUSY).
+
+API
+===
+
+When first opened the userfaultfd must be enabled invoking the
+UFFDIO_API ioctl specifying a uffdio_api.api value set to UFFD_API (or
+a later API version) which will specify the read/POLLIN protocol
+userland intends to speak on the UFFD and the uffdio_api.features
+userland requires. The UFFDIO_API ioctl if successful (i.e. if the
+requested uffdio_api.api is spoken also by the running kernel and the
+requested features are going to be enabled) will return into
+uffdio_api.features and uffdio_api.ioctls two 64bit bitmasks of
+respectively all the available features of the read(2) protocol and
+the generic ioctl available.
+
+The uffdio_api.features bitmask returned by the UFFDIO_API ioctl
+defines what memory types are supported by the userfaultfd and what
+events, except page fault notifications, may be generated.
+
+If the kernel supports registering userfaultfd ranges on hugetlbfs
+virtual memory areas, UFFD_FEATURE_MISSING_HUGETLBFS will be set in
+uffdio_api.features. Similarly, UFFD_FEATURE_MISSING_SHMEM will be
+set if the kernel supports registering userfaultfd ranges on shared
+memory (covering all shmem APIs, i.e. tmpfs, IPCSHM, /dev/zero
+MAP_SHARED, memfd_create, etc).
+
+The userland application that wants to use userfaultfd with hugetlbfs
+or shared memory need to set the corresponding flag in
+uffdio_api.features to enable those features.
+
+If the userland desires to receive notifications for events other than
+page faults, it has to verify that uffdio_api.features has appropriate
+UFFD_FEATURE_EVENT_* bits set. These events are described in more
+detail below in "Non-cooperative userfaultfd" section.
+
+Once the userfaultfd has been enabled the UFFDIO_REGISTER ioctl should
+be invoked (if present in the returned uffdio_api.ioctls bitmask) to
+register a memory range in the userfaultfd by setting the
+uffdio_register structure accordingly. The uffdio_register.mode
+bitmask will specify to the kernel which kind of faults to track for
+the range (UFFDIO_REGISTER_MODE_MISSING would track missing
+pages). The UFFDIO_REGISTER ioctl will return the
+uffdio_register.ioctls bitmask of ioctls that are suitable to resolve
+userfaults on the range registered. Not all ioctls will necessarily be
+supported for all memory types depending on the underlying virtual
+memory backend (anonymous memory vs tmpfs vs real filebacked
+mappings).
+
+Userland can use the uffdio_register.ioctls to manage the virtual
+address space in the background (to add or potentially also remove
+memory from the userfaultfd registered range). This means a userfault
+could be triggering just before userland maps in the background the
+user-faulted page.
+
+The primary ioctl to resolve userfaults is UFFDIO_COPY. That
+atomically copies a page into the userfault registered range and wakes
+up the blocked userfaults (unless uffdio_copy.mode &
+UFFDIO_COPY_MODE_DONTWAKE is set). Other ioctl works similarly to
+UFFDIO_COPY. They're atomic as in guaranteeing that nothing can see an
+half copied page since it'll keep userfaulting until the copy has
+finished.
+
+QEMU/KVM
+========
+
+QEMU/KVM is using the userfaultfd syscall to implement postcopy live
+migration. Postcopy live migration is one form of memory
+externalization consisting of a virtual machine running with part or
+all of its memory residing on a different node in the cloud. The
+userfaultfd abstraction is generic enough that not a single line of
+KVM kernel code had to be modified in order to add postcopy live
+migration to QEMU.
+
+Guest async page faults, FOLL_NOWAIT and all other GUP features work
+just fine in combination with userfaults. Userfaults trigger async
+page faults in the guest scheduler so those guest processes that
+aren't waiting for userfaults (i.e. network bound) can keep running in
+the guest vcpus.
+
+It is generally beneficial to run one pass of precopy live migration
+just before starting postcopy live migration, in order to avoid
+generating userfaults for readonly guest regions.
+
+The implementation of postcopy live migration currently uses one
+single bidirectional socket but in the future two different sockets
+will be used (to reduce the latency of the userfaults to the minimum
+possible without having to decrease /proc/sys/net/ipv4/tcp_wmem).
+
+The QEMU in the source node writes all pages that it knows are missing
+in the destination node, into the socket, and the migration thread of
+the QEMU running in the destination node runs UFFDIO_COPY|ZEROPAGE
+ioctls on the userfaultfd in order to map the received pages into the
+guest (UFFDIO_ZEROCOPY is used if the source page was a zero page).
+
+A different postcopy thread in the destination node listens with
+poll() to the userfaultfd in parallel. When a POLLIN event is
+generated after a userfault triggers, the postcopy thread read() from
+the userfaultfd and receives the fault address (or -EAGAIN in case the
+userfault was already resolved and waken by a UFFDIO_COPY|ZEROPAGE run
+by the parallel QEMU migration thread).
+
+After the QEMU postcopy thread (running in the destination node) gets
+the userfault address it writes the information about the missing page
+into the socket. The QEMU source node receives the information and
+roughly "seeks" to that page address and continues sending all
+remaining missing pages from that new page offset. Soon after that
+(just the time to flush the tcp_wmem queue through the network) the
+migration thread in the QEMU running in the destination node will
+receive the page that triggered the userfault and it'll map it as
+usual with the UFFDIO_COPY|ZEROPAGE (without actually knowing if it
+was spontaneously sent by the source or if it was an urgent page
+requested through a userfault).
+
+By the time the userfaults start, the QEMU in the destination node
+doesn't need to keep any per-page state bitmap relative to the live
+migration around and a single per-page bitmap has to be maintained in
+the QEMU running in the source node to know which pages are still
+missing in the destination node. The bitmap in the source node is
+checked to find which missing pages to send in round robin and we seek
+over it when receiving incoming userfaults. After sending each page of
+course the bitmap is updated accordingly. It's also useful to avoid
+sending the same page twice (in case the userfault is read by the
+postcopy thread just before UFFDIO_COPY|ZEROPAGE runs in the migration
+thread).
+
+Non-cooperative userfaultfd
+===========================
+
+When the userfaultfd is monitored by an external manager, the manager
+must be able to track changes in the process virtual memory
+layout. Userfaultfd can notify the manager about such changes using
+the same read(2) protocol as for the page fault notifications. The
+manager has to explicitly enable these events by setting appropriate
+bits in uffdio_api.features passed to UFFDIO_API ioctl:
+
+UFFD_FEATURE_EVENT_FORK
+	enable userfaultfd hooks for fork(). When this feature is
+	enabled, the userfaultfd context of the parent process is
+	duplicated into the newly created process. The manager
+	receives UFFD_EVENT_FORK with file descriptor of the new
+	userfaultfd context in the uffd_msg.fork.
+
+UFFD_FEATURE_EVENT_REMAP
+	enable notifications about mremap() calls. When the
+	non-cooperative process moves a virtual memory area to a
+	different location, the manager will receive
+	UFFD_EVENT_REMAP. The uffd_msg.remap will contain the old and
+	new addresses of the area and its original length.
+
+UFFD_FEATURE_EVENT_REMOVE
+	enable notifications about madvise(MADV_REMOVE) and
+	madvise(MADV_DONTNEED) calls. The event UFFD_EVENT_REMOVE will
+	be generated upon these calls to madvise. The uffd_msg.remove
+	will contain start and end addresses of the removed area.
+
+UFFD_FEATURE_EVENT_UNMAP
+	enable notifications about memory unmapping. The manager will
+	get UFFD_EVENT_UNMAP with uffd_msg.remove containing start and
+	end addresses of the unmapped area.
+
+Although the UFFD_FEATURE_EVENT_REMOVE and UFFD_FEATURE_EVENT_UNMAP
+are pretty similar, they quite differ in the action expected from the
+userfaultfd manager. In the former case, the virtual memory is
+removed, but the area is not, the area remains monitored by the
+userfaultfd, and if a page fault occurs in that area it will be
+delivered to the manager. The proper resolution for such page fault is
+to zeromap the faulting address. However, in the latter case, when an
+area is unmapped, either explicitly (with munmap() system call), or
+implicitly (e.g. during mremap()), the area is removed and in turn the
+userfaultfd context for such area disappears too and the manager will
+not get further userland page faults from the removed area. Still, the
+notification is required in order to prevent manager from using
+UFFDIO_COPY on the unmapped area.
+
+Unlike userland page faults which have to be synchronous and require
+explicit or implicit wakeup, all the events are delivered
+asynchronously and the non-cooperative process resumes execution as
+soon as manager executes read(). The userfaultfd manager should
+carefully synchronize calls to UFFDIO_COPY with the events
+processing. To aid the synchronization, the UFFDIO_COPY ioctl will
+return -ENOSPC when the monitored process exits at the time of
+UFFDIO_COPY, and -ENOENT, when the non-cooperative process has changed
+its virtual memory layout simultaneously with outstanding UFFDIO_COPY
+operation.
+
+The current asynchronous model of the event delivery is optimal for
+single threaded non-cooperative userfaultfd manager implementations. A
+synchronous event delivery model can be added later as a new
+userfaultfd feature to facilitate multithreading enhancements of the
+non cooperative manager, for example to allow UFFDIO_COPY ioctls to
+run in parallel to the event reception. Single threaded
+implementations should continue to use the current async event
+delivery model instead.
diff --git a/Documentation/admin-guide/ramoops.rst b/Documentation/admin-guide/ramoops.rst
index 4efd7ce..6dbcc54 100644
--- a/Documentation/admin-guide/ramoops.rst
+++ b/Documentation/admin-guide/ramoops.rst
@@ -61,7 +61,7 @@
 	mem=128M ramoops.mem_address=0x8000000 ramoops.ecc=1
 
  B. Use Device Tree bindings, as described in
- ``Documentation/device-tree/bindings/reserved-memory/admin-guide/ramoops.rst``.
+ ``Documentation/devicetree/bindings/reserved-memory/ramoops.txt``.
  For example::
 
 	reserved-memory {
diff --git a/Documentation/arm/Marvell/README b/Documentation/arm/Marvell/README
index b5bb7f5..56ada27c5 100644
--- a/Documentation/arm/Marvell/README
+++ b/Documentation/arm/Marvell/README
@@ -302,19 +302,15 @@
 	88DE3010, Armada 1000 (no Linux support)
 		Core:		Marvell PJ1 (ARMv5TE), Dual-core
 		Product Brief:	http://www.marvell.com.cn/digital-entertainment/assets/armada_1000_pb.pdf
-	88DE3005, Armada 1500-mini
 	88DE3005, Armada 1500 Mini
 		Design name:	BG2CD
 		Core:		ARM Cortex-A9, PL310 L2CC
-		Homepage:	http://www.marvell.com/multimedia-solutions/armada-1500-mini/
-        88DE3006, Armada 1500 Mini Plus
-                Design name:    BG2CDP
-                Core:           Dual Core ARM Cortex-A7
-                Homepage:       http://www.marvell.com/multimedia-solutions/armada-1500-mini-plus/
+	88DE3006, Armada 1500 Mini Plus
+		Design name:	BG2CDP
+		Core:		Dual Core ARM Cortex-A7
 	88DE3100, Armada 1500
 		Design name:	BG2
 		Core:		Marvell PJ4B-MP (ARMv7), Tauros3 L2CC
-		Product Brief:	http://www.marvell.com/digital-entertainment/armada-1500/assets/Marvell-ARMADA-1500-Product-Brief.pdf
 	88DE3114, Armada 1500 Pro
 		Design name:	BG2Q
 		Core:		Quad Core ARM Cortex-A9, PL310 L2CC
@@ -324,13 +320,16 @@
 	88DE3218, ARMADA 1500 Ultra
 		Core:		ARM Cortex-A53
 
-  Homepage: http://www.marvell.com/multimedia-solutions/
+  Homepage: https://www.synaptics.com/products/multimedia-solutions
   Directory: arch/arm/mach-berlin
 
   Comments:
+
    * This line of SoCs is based on Marvell Sheeva or ARM Cortex CPUs
      with Synopsys DesignWare (IRQ, GPIO, Timers, ...) and PXA IP (SDHCI, USB, ETH, ...).
 
+   * The Berlin family was acquired by Synaptics from Marvell in 2017.
+
 CPU Cores
 ---------
 
diff --git a/Documentation/block/cmdline-partition.txt b/Documentation/block/cmdline-partition.txt
index 525b9f6..760a3f7 100644
--- a/Documentation/block/cmdline-partition.txt
+++ b/Documentation/block/cmdline-partition.txt
@@ -1,7 +1,9 @@
 Embedded device command line partition parsing
 =====================================================================
 
-Support for reading the block device partition table from the command line.
+The "blkdevparts" command line option adds support for reading the
+block device partition table from the kernel command line.
+
 It is typically used for fixed block (eMMC) embedded devices.
 It has no MBR, so saves storage space. Bootloader can be easily accessed
 by absolute address of data on the block device.
@@ -14,22 +16,27 @@
     <partdef> := <size>[@<offset>](part-name)
 
 <blkdev-id>
-    block device disk name, embedded device used fixed block device,
-    it's disk name also fixed. such as: mmcblk0, mmcblk1, mmcblk0boot0.
+    block device disk name. Embedded device uses fixed block device.
+    Its disk name is also fixed, such as: mmcblk0, mmcblk1, mmcblk0boot0.
 
 <size>
     partition size, in bytes, such as: 512, 1m, 1G.
+    size may contain an optional suffix of (upper or lower case):
+      K, M, G, T, P, E.
+    "-" is used to denote all remaining space.
 
 <offset>
     partition start address, in bytes.
+    offset may contain an optional suffix of (upper or lower case):
+      K, M, G, T, P, E.
 
 (part-name)
-    partition name, kernel send uevent with "PARTNAME". application can create
-    a link to block device partition with the name "PARTNAME".
-    user space application can access partition by partition name.
+    partition name. Kernel sends uevent with "PARTNAME". Application can
+    create a link to block device partition with the name "PARTNAME".
+    User space application can access partition by partition name.
 
 Example:
-    eMMC disk name is "mmcblk0" and "mmcblk0boot0"
+    eMMC disk names are "mmcblk0" and "mmcblk0boot0".
 
   bootargs:
     'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt
index 733927a..07f1473 100644
--- a/Documentation/block/null_blk.txt
+++ b/Documentation/block/null_blk.txt
@@ -71,13 +71,16 @@
   1: The multi-queue block layer is instantiated with a hardware dispatch
      queue for each CPU node in the system.
 
-use_lightnvm=[0/1]: Default: 0
-  Register device with LightNVM. Requires blk-mq and CONFIG_NVM to be enabled.
-
 no_sched=[0/1]: Default: 0
   0: nullb* use default blk-mq io scheduler.
   1: nullb* doesn't use io scheduler.
 
+blocking=[0/1]: Default: 0
+  0: Register as a non-blocking blk-mq driver device.
+  1: Register as a blocking blk-mq driver device, null_blk will set
+     the BLK_MQ_F_BLOCKING flag, indicating that it sometimes/always
+     needs to block in its ->queue_rq() function.
+
 shared_tags=[0/1]: Default: 0
   0: Tag set is not shared.
   1: Tag set shared between devices for blk-mq. Only makes sense with
diff --git a/Documentation/core-api/atomic_ops.rst b/Documentation/core-api/atomic_ops.rst
index fce9291..2e7165f 100644
--- a/Documentation/core-api/atomic_ops.rst
+++ b/Documentation/core-api/atomic_ops.rst
@@ -111,7 +111,6 @@
 variable a, then the compiler is within its rights transforming this to
 the following::
 
-	tmp = a;
 	if (a > 0)
 		for (;;)
 			do_something();
@@ -119,7 +118,7 @@
 If you don't want the compiler to do this (and you probably don't), then
 you should use something like the following::
 
-	while (READ_ONCE(a) < 0)
+	while (READ_ONCE(a) > 0)
 		do_something();
 
 Alternatively, you could place a barrier() call in the loop.
@@ -467,10 +466,12 @@
 indicates whether the changed bit was set _BEFORE_ the atomic bit
 operation.
 
-WARNING! It is incredibly important that the value be a boolean,
-ie. "0" or "1".  Do not try to be fancy and save a few instructions by
-declaring the above to return "long" and just returning something like
-"old_val & mask" because that will not work.
+
+.. warning::
+        It is incredibly important that the value be a boolean, ie. "0" or "1".
+        Do not try to be fancy and save a few instructions by declaring the
+        above to return "long" and just returning something like "old_val &
+        mask" because that will not work.
 
 For one thing, this return value gets truncated to int in many code
 paths using these interfaces, so on 64-bit if the bit is set in the
diff --git a/Documentation/cachetlb.txt b/Documentation/core-api/cachetlb.rst
similarity index 100%
rename from Documentation/cachetlb.txt
rename to Documentation/core-api/cachetlb.rst
diff --git a/Documentation/circular-buffers.txt b/Documentation/core-api/circular-buffers.rst
similarity index 100%
rename from Documentation/circular-buffers.txt
rename to Documentation/core-api/circular-buffers.rst
diff --git a/Documentation/core-api/gfp_mask-from-fs-io.rst b/Documentation/core-api/gfp_mask-from-fs-io.rst
new file mode 100644
index 0000000..e0df8f4
--- /dev/null
+++ b/Documentation/core-api/gfp_mask-from-fs-io.rst
@@ -0,0 +1,66 @@
+=================================
+GFP masks used from FS/IO context
+=================================
+
+:Date: May, 2018
+:Author: Michal Hocko <mhocko@kernel.org>
+
+Introduction
+============
+
+Code paths in the filesystem and IO stacks must be careful when
+allocating memory to prevent recursion deadlocks caused by direct
+memory reclaim calling back into the FS or IO paths and blocking on
+already held resources (e.g. locks - most commonly those used for the
+transaction context).
+
+The traditional way to avoid this deadlock problem is to clear __GFP_FS
+respectively __GFP_IO (note the latter implies clearing the first as well) in
+the gfp mask when calling an allocator. GFP_NOFS respectively GFP_NOIO can be
+used as shortcut. It turned out though that above approach has led to
+abuses when the restricted gfp mask is used "just in case" without a
+deeper consideration which leads to problems because an excessive use
+of GFP_NOFS/GFP_NOIO can lead to memory over-reclaim or other memory
+reclaim issues.
+
+New API
+========
+
+Since 4.12 we do have a generic scope API for both NOFS and NOIO context
+``memalloc_nofs_save``, ``memalloc_nofs_restore`` respectively ``memalloc_noio_save``,
+``memalloc_noio_restore`` which allow to mark a scope to be a critical
+section from a filesystem or I/O point of view. Any allocation from that
+scope will inherently drop __GFP_FS respectively __GFP_IO from the given
+mask so no memory allocation can recurse back in the FS/IO.
+
+.. kernel-doc:: include/linux/sched/mm.h
+   :functions: memalloc_nofs_save memalloc_nofs_restore
+.. kernel-doc:: include/linux/sched/mm.h
+   :functions: memalloc_noio_save memalloc_noio_restore
+
+FS/IO code then simply calls the appropriate save function before
+any critical section with respect to the reclaim is started - e.g.
+lock shared with the reclaim context or when a transaction context
+nesting would be possible via reclaim. The restore function should be
+called when the critical section ends. All that ideally along with an
+explanation what is the reclaim context for easier maintenance.
+
+Please note that the proper pairing of save/restore functions
+allows nesting so it is safe to call ``memalloc_noio_save`` or
+``memalloc_noio_restore`` respectively from an existing NOIO or NOFS
+scope.
+
+What about __vmalloc(GFP_NOFS)
+==============================
+
+vmalloc doesn't support GFP_NOFS semantic because there are hardcoded
+GFP_KERNEL allocations deep inside the allocator which are quite non-trivial
+to fix up. That means that calling ``vmalloc`` with GFP_NOFS/GFP_NOIO is
+almost always a bug. The good news is that the NOFS/NOIO semantic can be
+achieved by the scope API.
+
+In the ideal world, upper layers should already mark dangerous contexts
+and so no special care is required and vmalloc should be called without
+any problems. Sometimes if the context is not really clear or there are
+layering violations then the recommended way around that is to wrap ``vmalloc``
+by the scope API with a comment explaining the problem.
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index c670a80..f5a66b7 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -14,6 +14,7 @@
    kernel-api
    assoc_array
    atomic_ops
+   cachetlb
    refcount-vs-atomic
    cpu_hotplug
    idr
@@ -25,6 +26,8 @@
    genalloc
    errseq
    printk-formats
+   circular-buffers
+   gfp_mask-from-fs-io
 
 Interfaces for kernel debugging
 ===============================
diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 92f3000..8e44aea 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -39,17 +39,17 @@
 .. kernel-doc:: lib/string.c
    :export:
 
+Basic Kernel Library Functions
+==============================
+
+The Linux kernel provides more basic utility functions.
+
 Bit Operations
 --------------
 
 .. kernel-doc:: arch/x86/include/asm/bitops.h
    :internal:
 
-Basic Kernel Library Functions
-==============================
-
-The Linux kernel provides more basic utility functions.
-
 Bitmap Operations
 -----------------
 
@@ -80,6 +80,31 @@
 .. kernel-doc:: lib/cmdline.c
    :export:
 
+Sorting
+-------
+
+.. kernel-doc:: lib/sort.c
+   :export:
+
+.. kernel-doc:: lib/list_sort.c
+   :export:
+
+Text Searching
+--------------
+
+.. kernel-doc:: lib/textsearch.c
+   :doc: ts_intro
+
+.. kernel-doc:: lib/textsearch.c
+   :export:
+
+.. kernel-doc:: include/linux/textsearch.h
+   :functions: textsearch_find textsearch_next \
+               textsearch_get_pattern textsearch_get_pattern_len
+
+CRC and Math Functions in Linux
+===============================
+
 CRC Functions
 -------------
 
@@ -103,9 +128,6 @@
 .. kernel-doc:: lib/crc-itu-t.c
    :export:
 
-Math Functions in Linux
-=======================
-
 Base 2 log and power Functions
 ------------------------------
 
@@ -127,28 +149,6 @@
 .. kernel-doc:: lib/gcd.c
    :export:
 
-Sorting
--------
-
-.. kernel-doc:: lib/sort.c
-   :export:
-
-.. kernel-doc:: lib/list_sort.c
-   :export:
-
-Text Searching
---------------
-
-.. kernel-doc:: lib/textsearch.c
-   :doc: ts_intro
-
-.. kernel-doc:: lib/textsearch.c
-   :export:
-
-.. kernel-doc:: include/linux/textsearch.h
-   :functions: textsearch_find textsearch_next \
-               textsearch_get_pattern textsearch_get_pattern_len
-
 UUID/GUID
 ---------
 
diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index eb30efd..25dc591 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -419,11 +419,10 @@
 
 	%pC	pll1
 	%pCn	pll1
-	%pCr	1560000000
 
 For printing struct clk structures. %pC and %pCn print the name
 (Common Clock Framework) or address (legacy clock framework) of the
-structure; %pCr prints the current clock rate.
+structure.
 
 Passed by reference.
 
diff --git a/Documentation/core-api/refcount-vs-atomic.rst b/Documentation/core-api/refcount-vs-atomic.rst
index 83351c2..322851b 100644
--- a/Documentation/core-api/refcount-vs-atomic.rst
+++ b/Documentation/core-api/refcount-vs-atomic.rst
@@ -17,7 +17,7 @@
 these memory ordering guarantees.
 
 The terms used through this document try to follow the formal LKMM defined in
-github.com/aparri/memory-model/blob/master/Documentation/explanation.txt
+tools/memory-model/Documentation/explanation.txt.
 
 memory-barriers.txt and atomic_t.txt provide more background to the
 memory ordering in general and for atomic operations specifically.
diff --git a/Documentation/crypto/index.rst b/Documentation/crypto/index.rst
index 94c4786..c4ff5d7 100644
--- a/Documentation/crypto/index.rst
+++ b/Documentation/crypto/index.rst
@@ -20,5 +20,6 @@
    architecture
    devel-algos
    userspace-if
+   crypto_engine
    api
    api-samples
diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt
index 0fdb6aa..5d1ce7b 100644
--- a/Documentation/dell_rbu.txt
+++ b/Documentation/dell_rbu.txt
@@ -121,10 +121,7 @@
 
 .. note::
 
-   This driver requires a patch for firmware_class.c which has the modified
-   request_firmware_nowait function.
-
-   Also after updating the BIOS image a user mode application needs to execute
+   After updating the BIOS image a user mode application needs to execute
    code which sends the BIOS update request to the BIOS. So on the next reboot
    the BIOS knows about the new image downloaded and it updates itself.
    Also don't unload the rbu driver if the image has to be updated.
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index f7a18f2..aabc873 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -120,7 +120,7 @@
 
 The header of the report discribe what kind of bug happened and what kind of
 access caused it. It's followed by the description of the accessed slub object
-(see 'SLUB Debug output' section in Documentation/vm/slub.txt for details) and
+(see 'SLUB Debug output' section in Documentation/vm/slub.rst for details) and
 the description of the accessed memory page.
 
 In the last section the report shows memory state around the accessed address.
diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst
index e80850e..3bf371a 100644
--- a/Documentation/dev-tools/kselftest.rst
+++ b/Documentation/dev-tools/kselftest.rst
@@ -151,6 +151,11 @@
    TEST_FILES, TEST_GEN_FILES mean it is the file which is used by
    test.
 
+ * First use the headers inside the kernel source and/or git repo, and then the
+   system headers.  Headers for the kernel release as opposed to headers
+   installed by the distro on the system should be the primary focus to be able
+   to find regressions.
+
 Test Harness
 ============
 
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
deleted file mode 100644
index 4a1714f..0000000
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
+++ /dev/null
@@ -1,233 +0,0 @@
-Altera SoCFPGA ECC Manager
-This driver uses the EDAC framework to implement the SOCFPGA ECC Manager.
-The ECC Manager counts and corrects single bit errors and counts/handles
-double bit errors which are uncorrectable.
-
-Cyclone5 and Arria5 ECC Manager
-Required Properties:
-- compatible : Should be "altr,socfpga-ecc-manager"
-- #address-cells: must be 1
-- #size-cells: must be 1
-- ranges : standard definition, should translate from local addresses
-
-Subcomponents:
-
-L2 Cache ECC
-Required Properties:
-- compatible : Should be "altr,socfpga-l2-ecc"
-- reg : Address and size for ECC error interrupt clear registers.
-- interrupts : Should be single bit error interrupt, then double bit error
-	interrupt. Note the rising edge type.
-
-On Chip RAM ECC
-Required Properties:
-- compatible : Should be "altr,socfpga-ocram-ecc"
-- reg : Address and size for ECC error interrupt clear registers.
-- iram : phandle to On-Chip RAM definition.
-- interrupts : Should be single bit error interrupt, then double bit error
-	interrupt. Note the rising edge type.
-
-Example:
-
-	eccmgr: eccmgr@ffd08140 {
-		compatible = "altr,socfpga-ecc-manager";
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
-
-		l2-ecc@ffd08140 {
-			compatible = "altr,socfpga-l2-ecc";
-			reg = <0xffd08140 0x4>;
-			interrupts = <0 36 1>, <0 37 1>;
-		};
-
-		ocram-ecc@ffd08144 {
-			compatible = "altr,socfpga-ocram-ecc";
-			reg = <0xffd08144 0x4>;
-			iram = <&ocram>;
-			interrupts = <0 178 1>, <0 179 1>;
-		};
-	};
-
-Arria10 SoCFPGA ECC Manager
-The Arria10 SoC ECC Manager handles the IRQs for each peripheral
-in a shared register instead of individual IRQs like the Cyclone5
-and Arria5. Therefore the device tree is different as well.
-
-Required Properties:
-- compatible : Should be "altr,socfpga-a10-ecc-manager"
-- altr,sysgr-syscon : phandle to Arria10 System Manager Block
-	containing the ECC manager registers.
-- #address-cells: must be 1
-- #size-cells: must be 1
-- interrupts : Should be single bit error interrupt, then double bit error
-	interrupt.
-- interrupt-controller : boolean indicator that ECC Manager is an interrupt controller
-- #interrupt-cells : must be set to 2.
-- ranges : standard definition, should translate from local addresses
-
-Subcomponents:
-
-L2 Cache ECC
-Required Properties:
-- compatible : Should be "altr,socfpga-a10-l2-ecc"
-- reg : Address and size for ECC error interrupt clear registers.
-- interrupts : Should be single bit error interrupt, then double bit error
-	interrupt, in this order.
-
-On-Chip RAM ECC
-Required Properties:
-- compatible : Should be "altr,socfpga-a10-ocram-ecc"
-- reg        : Address and size for ECC block registers.
-- interrupts : Should be single bit error interrupt, then double bit error
-	interrupt, in this order.
-
-Ethernet FIFO ECC
-Required Properties:
-- compatible      : Should be "altr,socfpga-eth-mac-ecc"
-- reg             : Address and size for ECC block registers.
-- altr,ecc-parent : phandle to parent Ethernet node.
-- interrupts      : Should be single bit error interrupt, then double bit error
-	interrupt, in this order.
-
-NAND FIFO ECC
-Required Properties:
-- compatible      : Should be "altr,socfpga-nand-ecc"
-- reg             : Address and size for ECC block registers.
-- altr,ecc-parent : phandle to parent NAND node.
-- interrupts      : Should be single bit error interrupt, then double bit error
-	interrupt, in this order.
-
-DMA FIFO ECC
-Required Properties:
-- compatible      : Should be "altr,socfpga-dma-ecc"
-- reg             : Address and size for ECC block registers.
-- altr,ecc-parent : phandle to parent DMA node.
-- interrupts      : Should be single bit error interrupt, then double bit error
-	interrupt, in this order.
-
-USB FIFO ECC
-Required Properties:
-- compatible      : Should be "altr,socfpga-usb-ecc"
-- reg             : Address and size for ECC block registers.
-- altr,ecc-parent : phandle to parent USB node.
-- interrupts      : Should be single bit error interrupt, then double bit error
-	interrupt, in this order.
-
-QSPI FIFO ECC
-Required Properties:
-- compatible      : Should be "altr,socfpga-qspi-ecc"
-- reg             : Address and size for ECC block registers.
-- altr,ecc-parent : phandle to parent QSPI node.
-- interrupts      : Should be single bit error interrupt, then double bit error
-	interrupt, in this order.
-
-SDMMC FIFO ECC
-Required Properties:
-- compatible      : Should be "altr,socfpga-sdmmc-ecc"
-- reg             : Address and size for ECC block registers.
-- altr,ecc-parent : phandle to parent SD/MMC node.
-- interrupts      : Should be single bit error interrupt, then double bit error
-	interrupt, in this order for port A, and then single bit error interrupt,
-	then double bit error interrupt in this order for port B.
-
-Example:
-
-	eccmgr: eccmgr@ffd06000 {
-		compatible = "altr,socfpga-a10-ecc-manager";
-		altr,sysmgr-syscon = <&sysmgr>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>,
-			     <0 0 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-controller;
-		#interrupt-cells = <2>;
-		ranges;
-
-		l2-ecc@ffd06010 {
-			compatible = "altr,socfpga-a10-l2-ecc";
-			reg = <0xffd06010 0x4>;
-			interrupts = <0 IRQ_TYPE_LEVEL_HIGH>,
-				     <32 IRQ_TYPE_LEVEL_HIGH>;
-		};
-
-		ocram-ecc@ff8c3000 {
-			compatible = "altr,socfpga-a10-ocram-ecc";
-			reg = <0xff8c3000 0x90>;
-			interrupts = <1 IRQ_TYPE_LEVEL_HIGH>,
-				     <33 IRQ_TYPE_LEVEL_HIGH> ;
-		};
-
-		emac0-rx-ecc@ff8c0800 {
-			compatible = "altr,socfpga-eth-mac-ecc";
-			reg = <0xff8c0800 0x400>;
-			altr,ecc-parent = <&gmac0>;
-			interrupts = <4 IRQ_TYPE_LEVEL_HIGH>,
-				     <36 IRQ_TYPE_LEVEL_HIGH>;
-		};
-
-		emac0-tx-ecc@ff8c0c00 {
-			compatible = "altr,socfpga-eth-mac-ecc";
-			reg = <0xff8c0c00 0x400>;
-			altr,ecc-parent = <&gmac0>;
-			interrupts = <5 IRQ_TYPE_LEVEL_HIGH>,
-				     <37 IRQ_TYPE_LEVEL_HIGH>;
-		};
-
-		nand-buf-ecc@ff8c2000 {
-			compatible = "altr,socfpga-nand-ecc";
-			reg = <0xff8c2000 0x400>;
-			altr,ecc-parent = <&nand>;
-			interrupts = <11 IRQ_TYPE_LEVEL_HIGH>,
-				     <43 IRQ_TYPE_LEVEL_HIGH>;
-		};
-
-		nand-rd-ecc@ff8c2400 {
-			compatible = "altr,socfpga-nand-ecc";
-			reg = <0xff8c2400 0x400>;
-			altr,ecc-parent = <&nand>;
-			interrupts = <13 IRQ_TYPE_LEVEL_HIGH>,
-				     <45 IRQ_TYPE_LEVEL_HIGH>;
-		};
-
-		nand-wr-ecc@ff8c2800 {
-			compatible = "altr,socfpga-nand-ecc";
-			reg = <0xff8c2800 0x400>;
-			altr,ecc-parent = <&nand>;
-			interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
-				     <44 IRQ_TYPE_LEVEL_HIGH>;
-		};
-
-		dma-ecc@ff8c8000 {
-			compatible = "altr,socfpga-dma-ecc";
-			reg = <0xff8c8000 0x400>;
-			altr,ecc-parent = <&pdma>;
-			interrupts = <10 IRQ_TYPE_LEVEL_HIGH>,
-				     <42 IRQ_TYPE_LEVEL_HIGH>;
-
-		usb0-ecc@ff8c8800 {
-			compatible = "altr,socfpga-usb-ecc";
-			reg = <0xff8c8800 0x400>;
-			altr,ecc-parent = <&usb0>;
-			interrupts = <2 IRQ_TYPE_LEVEL_HIGH>,
-				     <34 IRQ_TYPE_LEVEL_HIGH>;
-		};
-
-		qspi-ecc@ff8c8400 {
-			compatible = "altr,socfpga-qspi-ecc";
-			reg = <0xff8c8400 0x400>;
-			altr,ecc-parent = <&qspi>;
-			interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
-				     <46 IRQ_TYPE_LEVEL_HIGH>;
-		};
-
-		sdmmc-ecc@ff8c2c00 {
-			compatible = "altr,socfpga-sdmmc-ecc";
-			reg = <0xff8c2c00 0x400>;
-			altr,ecc-parent = <&mmc>;
-			interrupts = <15 IRQ_TYPE_LEVEL_HIGH>,
-				     <47 IRQ_TYPE_LEVEL_HIGH>,
-				     <16 IRQ_TYPE_LEVEL_HIGH>,
-				     <48 IRQ_TYPE_LEVEL_HIGH>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt b/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt
index 0047b13..2c88753 100644
--- a/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt
+++ b/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt
@@ -14,7 +14,13 @@
 		"adi,adv7513"
 		"adi,adv7533"
 
-- reg: I2C slave address
+- reg: I2C slave addresses
+  The ADV7511 internal registers are split into four pages exposed through
+  different I2C addresses, creating four register maps. Each map has it own
+  I2C address and acts as a standard slave device on the I2C bus. The main
+  address is mandatory, others are optional and revert to defaults if not
+  specified.
+
 
 The ADV7511 supports a large number of input data formats that differ by their
 color depth, color format, clock mode, bit justification and random
@@ -70,6 +76,9 @@
   rather than generate its own timings for HDMI output.
 - clocks: from common clock binding: reference to the CEC clock.
 - clock-names: from common clock binding: must be "cec".
+- reg-names : Names of maps with programmable addresses.
+	It can contain any map needing a non-default address.
+	Possible maps names are : "main", "edid", "cec", "packet"
 
 Required nodes:
 
@@ -88,7 +97,12 @@
 
 	adv7511w: hdmi@39 {
 		compatible = "adi,adv7511w";
-		reg = <39>;
+		/*
+		 * The EDID page will be accessible on address 0x66 on the I2C
+		 * bus. All other maps continue to use their default addresses.
+		 */
+		reg = <0x39>, <0x66>;
+		reg-names = "main", "edid";
 		interrupt-parent = <&gpio3>;
 		interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
 		clocks = <&cec_clock>;
diff --git a/Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt b/Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt
new file mode 100644
index 0000000..f5725bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/cdns,dsi.txt
@@ -0,0 +1,133 @@
+Cadence DSI bridge
+==================
+
+The Cadence DSI bridge is a DPI to DSI bridge supporting up to 4 DSI lanes.
+
+Required properties:
+- compatible: should be set to "cdns,dsi".
+- reg: physical base address and length of the controller's registers.
+- interrupts: interrupt line connected to the DSI bridge.
+- clocks: DSI bridge clocks.
+- clock-names: must contain "dsi_p_clk" and "dsi_sys_clk".
+- phys: phandle link to the MIPI D-PHY controller.
+- phy-names: must contain "dphy".
+- #address-cells: must be set to 1.
+- #size-cells: must be set to 0.
+
+Optional properties:
+- resets: DSI reset lines.
+- reset-names: can contain "dsi_p_rst".
+
+Required subnodes:
+- ports: Ports as described in Documentation/devicetree/bindings/graph.txt.
+  2 ports are available:
+  * port 0: this port is only needed if some of your DSI devices are
+	    controlled through  an external bus like I2C or SPI. Can have at
+	    most 4 endpoints. The endpoint number is directly encoding the
+	    DSI virtual channel used by this device.
+  * port 1: represents the DPI input.
+  Other ports will be added later to support the new kind of inputs.
+
+- one subnode per DSI device connected on the DSI bus. Each DSI device should
+  contain a reg property encoding its virtual channel.
+
+Cadence DPHY
+============
+
+Cadence DPHY block.
+
+Required properties:
+- compatible: should be set to "cdns,dphy".
+- reg: physical base address and length of the DPHY registers.
+- clocks: DPHY reference clocks.
+- clock-names: must contain "psm" and "pll_ref".
+- #phy-cells: must be set to 0.
+
+
+Example:
+	dphy0: dphy@fd0e0000{
+		compatible = "cdns,dphy";
+		reg = <0x0 0xfd0e0000 0x0 0x1000>;
+		clocks = <&psm_clk>, <&pll_ref_clk>;
+		clock-names = "psm", "pll_ref";
+		#phy-cells = <0>;
+	};
+
+	dsi0: dsi@fd0c0000 {
+		compatible = "cdns,dsi";
+		reg = <0x0 0xfd0c0000 0x0 0x1000>;
+		clocks = <&pclk>, <&sysclk>;
+		clock-names = "dsi_p_clk", "dsi_sys_clk";
+		interrupts = <1>;
+		phys = <&dphy0>;
+		phy-names = "dphy";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@1 {
+				reg = <1>;
+				dsi0_dpi_input: endpoint {
+					remote-endpoint = <&xxx_dpi_output>;
+				};
+			};
+		};
+
+		panel: dsi-dev@0 {
+			compatible = "<vendor,panel>";
+			reg = <0>;
+		};
+	};
+
+or
+
+	dsi0: dsi@fd0c0000 {
+		compatible = "cdns,dsi";
+		reg = <0x0 0xfd0c0000 0x0 0x1000>;
+		clocks = <&pclk>, <&sysclk>;
+		clock-names = "dsi_p_clk", "dsi_sys_clk";
+		interrupts = <1>;
+		phys = <&dphy1>;
+		phy-names = "dphy";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				dsi0_output: endpoint@0 {
+					reg = <0>;
+					remote-endpoint = <&dsi_panel_input>;
+				};
+			};
+
+			port@1 {
+				reg = <1>;
+				dsi0_dpi_input: endpoint {
+					remote-endpoint = <&xxx_dpi_output>;
+				};
+			};
+		};
+	};
+
+	i2c@xxx {
+		panel: panel@59 {
+			compatible = "<vendor,panel>";
+			reg = <0x59>;
+
+			port {
+				dsi_panel_input: endpoint {
+					remote-endpoint = <&dsi0_output>;
+				};
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
index 3a72a10..a41d280 100644
--- a/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt
@@ -14,6 +14,7 @@
 - compatible : Shall contain one or more of
   - "renesas,r8a7795-hdmi" for R8A7795 (R-Car H3) compatible HDMI TX
   - "renesas,r8a7796-hdmi" for R8A7796 (R-Car M3-W) compatible HDMI TX
+  - "renesas,r8a77965-hdmi" for R8A77965 (R-Car M3-N) compatible HDMI TX
   - "renesas,rcar-gen3-hdmi" for the generic R-Car Gen3 compatible HDMI TX
 
     When compatible with generic versions, nodes must list the SoC-specific
diff --git a/Documentation/devicetree/bindings/display/bridge/tda998x.txt b/Documentation/devicetree/bindings/display/bridge/tda998x.txt
index 24cc246..1a4eaca 100644
--- a/Documentation/devicetree/bindings/display/bridge/tda998x.txt
+++ b/Documentation/devicetree/bindings/display/bridge/tda998x.txt
@@ -27,6 +27,9 @@
 	in question is used. The implementation allows one or two DAIs. If two
 	DAIs are defined, they must be of different type.
 
+  - nxp,calib-gpios: calibration GPIO, which must correspond with the
+	gpio used for the TDA998x interrupt pin.
+
 [1] Documentation/sound/alsa/soc/DAI.txt
 [2] include/dt-bindings/display/tda998x.h
 
diff --git a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt
new file mode 100644
index 0000000..37f0c04
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt
@@ -0,0 +1,60 @@
+Thine Electronics THC63LVD1024 LVDS decoder
+-------------------------------------------
+
+The THC63LVD1024 is a dual link LVDS receiver designed to convert LVDS streams
+to parallel data outputs. The chip supports single/dual input/output modes,
+handling up to two LVDS input streams and up to two digital CMOS/TTL outputs.
+
+Single or dual operation mode, output data mapping and DDR output modes are
+configured through input signals and the chip does not expose any control bus.
+
+Required properties:
+- compatible: Shall be "thine,thc63lvd1024"
+- vcc-supply: Power supply for TTL output, TTL CLOCKOUT signal, LVDS input,
+  PPL and digital circuitry
+
+Optional properties:
+- powerdown-gpios: Power down GPIO signal, pin name "/PDWN". Active low
+- oe-gpios: Output enable GPIO signal, pin name "OE". Active high
+
+The THC63LVD1024 video port connections are modeled according
+to OF graph bindings specified by Documentation/devicetree/bindings/graph.txt
+
+Required video port nodes:
+- port@0: First LVDS input port
+- port@2: First digital CMOS/TTL parallel output
+
+Optional video port nodes:
+- port@1: Second LVDS input port
+- port@3: Second digital CMOS/TTL parallel output
+
+Example:
+--------
+
+	thc63lvd1024: lvds-decoder {
+		compatible = "thine,thc63lvd1024";
+
+		vcc-supply = <&reg_lvds_vcc>;
+		powerdown-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+
+				lvds_dec_in_0: endpoint {
+					remote-endpoint = <&lvds_out>;
+				};
+			};
+
+			port@2{
+				reg = <2>;
+
+				lvds_dec_out_2: endpoint {
+					remote-endpoint = <&adv7511_in>;
+				};
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt b/Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt
index fc25882..775193e 100644
--- a/Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt
+++ b/Documentation/devicetree/bindings/display/exynos/exynos5433-decon.txt
@@ -19,7 +19,8 @@
 	  clock-names property.
 - clock-names: list of clock names sorted in the same order as the clocks
 	       property. Must contain "pclk", "aclk_decon", "aclk_smmu_decon0x",
-	       "aclk_xiu_decon0x", "pclk_smmu_decon0x", clk_decon_vclk",
+	       "aclk_xiu_decon0x", "pclk_smmu_decon0x", "aclk_smmu_decon1x",
+	       "aclk_xiu_decon1x", "pclk_smmu_decon1x", clk_decon_vclk",
 	       "sclk_decon_eclk"
 - ports: contains a port which is connected to mic node. address-cells and
 	 size-cells must 1 and 0, respectively.
@@ -34,10 +35,14 @@
 	clocks = <&cmu_disp CLK_ACLK_DECON>, <&cmu_disp CLK_ACLK_SMMU_DECON0X>,
 		<&cmu_disp CLK_ACLK_XIU_DECON0X>,
 		<&cmu_disp CLK_PCLK_SMMU_DECON0X>,
+		<&cmu_disp CLK_ACLK_SMMU_DECON1X>,
+		<&cmu_disp CLK_ACLK_XIU_DECON1X>,
+		<&cmu_disp CLK_PCLK_SMMU_DECON1X>,
 		<&cmu_disp CLK_SCLK_DECON_VCLK>,
 		<&cmu_disp CLK_SCLK_DECON_ECLK>;
 	clock-names = "aclk_decon", "aclk_smmu_decon0x", "aclk_xiu_decon0x",
-		"pclk_smmu_decon0x", "sclk_decon_vclk", "sclk_decon_eclk";
+		"pclk_smmu_decon0x", "aclk_smmu_decon1x", "aclk_xiu_decon1x",
+		"pclk_smmu_decon1x", "sclk_decon_vclk", "sclk_decon_eclk";
 	interrupt-names = "vsync", "lcd_sys";
 	interrupts = <0 202 0>, <0 203 0>;
 
diff --git a/Documentation/devicetree/bindings/display/renesas,du.txt b/Documentation/devicetree/bindings/display/renesas,du.txt
index c9cd17f..7c6854b 100644
--- a/Documentation/devicetree/bindings/display/renesas,du.txt
+++ b/Documentation/devicetree/bindings/display/renesas,du.txt
@@ -13,6 +13,7 @@
     - "renesas,du-r8a7794" for R8A7794 (R-Car E2) compatible DU
     - "renesas,du-r8a7795" for R8A7795 (R-Car H3) compatible DU
     - "renesas,du-r8a7796" for R8A7796 (R-Car M3-W) compatible DU
+    - "renesas,du-r8a77965" for R8A77965 (R-Car M3-N) compatible DU
     - "renesas,du-r8a77970" for R8A77970 (R-Car V3M) compatible DU
     - "renesas,du-r8a77995" for R8A77995 (R-Car D3) compatible DU
 
@@ -47,20 +48,21 @@
 The following table lists for each supported model the port number
 corresponding to each DU output.
 
-                      Port0          Port1          Port2          Port3
+                        Port0          Port1          Port2          Port3
 -----------------------------------------------------------------------------
- R8A7743 (RZ/G1M)     DPAD 0         LVDS 0         -              -
- R8A7745 (RZ/G1E)     DPAD 0         DPAD 1         -              -
- R8A7779 (R-Car H1)   DPAD 0         DPAD 1         -              -
- R8A7790 (R-Car H2)   DPAD 0         LVDS 0         LVDS 1         -
- R8A7791 (R-Car M2-W) DPAD 0         LVDS 0         -              -
- R8A7792 (R-Car V2H)  DPAD 0         DPAD 1         -              -
- R8A7793 (R-Car M2-N) DPAD 0         LVDS 0         -              -
- R8A7794 (R-Car E2)   DPAD 0         DPAD 1         -              -
- R8A7795 (R-Car H3)   DPAD 0         HDMI 0         HDMI 1         LVDS 0
- R8A7796 (R-Car M3-W) DPAD 0         HDMI 0         LVDS 0         -
- R8A77970 (R-Car V3M) DPAD 0         LVDS 0         -              -
- R8A77995 (R-Car D3)  DPAD 0         LVDS 0         LVDS 1         -
+ R8A7743 (RZ/G1M)       DPAD 0         LVDS 0         -              -
+ R8A7745 (RZ/G1E)       DPAD 0         DPAD 1         -              -
+ R8A7779 (R-Car H1)     DPAD 0         DPAD 1         -              -
+ R8A7790 (R-Car H2)     DPAD 0         LVDS 0         LVDS 1         -
+ R8A7791 (R-Car M2-W)   DPAD 0         LVDS 0         -              -
+ R8A7792 (R-Car V2H)    DPAD 0         DPAD 1         -              -
+ R8A7793 (R-Car M2-N)   DPAD 0         LVDS 0         -              -
+ R8A7794 (R-Car E2)     DPAD 0         DPAD 1         -              -
+ R8A7795 (R-Car H3)     DPAD 0         HDMI 0         HDMI 1         LVDS 0
+ R8A7796 (R-Car M3-W)   DPAD 0         HDMI 0         LVDS 0         -
+ R8A77965 (R-Car M3-N)  DPAD 0         HDMI 0         LVDS 0         -
+ R8A77970 (R-Car V3M)   DPAD 0         LVDS 0         -              -
+ R8A77995 (R-Car D3)    DPAD 0         LVDS 0         LVDS 1         -
 
 
 Example: R8A7795 (R-Car H3) ES2.0 DU
diff --git a/Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt b/Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt
new file mode 100644
index 0000000..6a6cf5d
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt
@@ -0,0 +1,93 @@
+Allwinner A31 DSI Encoder
+=========================
+
+The DSI pipeline consists of two separate blocks: the DSI controller
+itself, and its associated D-PHY.
+
+DSI Encoder
+-----------
+
+The DSI Encoder generates the DSI signal from the TCON's.
+
+Required properties:
+  - compatible: value must be one of:
+    * allwinner,sun6i-a31-mipi-dsi
+  - reg: base address and size of memory-mapped region
+  - interrupts: interrupt associated to this IP
+  - clocks: phandles to the clocks feeding the DSI encoder
+    * bus: the DSI interface clock
+    * mod: the DSI module clock
+  - clock-names: the clock names mentioned above
+  - phys: phandle to the D-PHY
+  - phy-names: must be "dphy"
+  - resets: phandle to the reset controller driving the encoder
+
+  - ports: A ports node with endpoint definitions as defined in
+    Documentation/devicetree/bindings/media/video-interfaces.txt. The
+    first port should be the input endpoint, usually coming from the
+    associated TCON.
+
+Any MIPI-DSI device attached to this should be described according to
+the bindings defined in ../mipi-dsi-bus.txt
+
+D-PHY
+-----
+
+Required properties:
+  - compatible: value must be one of:
+    * allwinner,sun6i-a31-mipi-dphy
+  - reg: base address and size of memory-mapped region
+  - clocks: phandles to the clocks feeding the DSI encoder
+    * bus: the DSI interface clock
+    * mod: the DSI module clock
+  - clock-names: the clock names mentioned above
+  - resets: phandle to the reset controller driving the encoder
+
+Example:
+
+dsi0: dsi@1ca0000 {
+	compatible = "allwinner,sun6i-a31-mipi-dsi";
+	reg = <0x01ca0000 0x1000>;
+	interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&ccu CLK_BUS_MIPI_DSI>,
+		 <&ccu CLK_DSI_SCLK>;
+	clock-names = "bus", "mod";
+	resets = <&ccu RST_BUS_MIPI_DSI>;
+	phys = <&dphy0>;
+	phy-names = "dphy";
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	panel@0 {
+		compatible = "bananapi,lhr050h41", "ilitek,ili9881c";
+		reg = <0>;
+		power-gpios = <&pio 1 7 GPIO_ACTIVE_HIGH>; /* PB07 */
+		reset-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL05 */
+		backlight = <&pwm_bl>;
+	};
+
+	ports {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		port@0 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0>;
+
+			dsi0_in_tcon0: endpoint {
+				remote-endpoint = <&tcon0_out_dsi0>;
+			};
+		};
+	};
+};
+
+dphy0: d-phy@1ca1000 {
+	compatible = "allwinner,sun6i-a31-mipi-dphy";
+	reg = <0x01ca1000 0x1000>;
+	clocks = <&ccu CLK_BUS_MIPI_DSI>,
+		 <&ccu CLK_DSI_DPHY>;
+	clock-names = "bus", "mod";
+	resets = <&ccu RST_BUS_MIPI_DSI>;
+	#phy-cells = <0>;
+};
diff --git a/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt
new file mode 100644
index 0000000..5626560
--- /dev/null
+++ b/Documentation/devicetree/bindings/edac/socfpga-eccmgr.txt
@@ -0,0 +1,268 @@
+Altera SoCFPGA ECC Manager
+This driver uses the EDAC framework to implement the SOCFPGA ECC Manager.
+The ECC Manager counts and corrects single bit errors and counts/handles
+double bit errors which are uncorrectable.
+
+Cyclone5 and Arria5 ECC Manager
+Required Properties:
+- compatible : Should be "altr,socfpga-ecc-manager"
+- #address-cells: must be 1
+- #size-cells: must be 1
+- ranges : standard definition, should translate from local addresses
+
+Subcomponents:
+
+L2 Cache ECC
+Required Properties:
+- compatible : Should be "altr,socfpga-l2-ecc"
+- reg : Address and size for ECC error interrupt clear registers.
+- interrupts : Should be single bit error interrupt, then double bit error
+	interrupt. Note the rising edge type.
+
+On Chip RAM ECC
+Required Properties:
+- compatible : Should be "altr,socfpga-ocram-ecc"
+- reg : Address and size for ECC error interrupt clear registers.
+- iram : phandle to On-Chip RAM definition.
+- interrupts : Should be single bit error interrupt, then double bit error
+	interrupt. Note the rising edge type.
+
+Example:
+
+	eccmgr: eccmgr@ffd08140 {
+		compatible = "altr,socfpga-ecc-manager";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		l2-ecc@ffd08140 {
+			compatible = "altr,socfpga-l2-ecc";
+			reg = <0xffd08140 0x4>;
+			interrupts = <0 36 1>, <0 37 1>;
+		};
+
+		ocram-ecc@ffd08144 {
+			compatible = "altr,socfpga-ocram-ecc";
+			reg = <0xffd08144 0x4>;
+			iram = <&ocram>;
+			interrupts = <0 178 1>, <0 179 1>;
+		};
+	};
+
+Arria10 SoCFPGA ECC Manager
+The Arria10 SoC ECC Manager handles the IRQs for each peripheral
+in a shared register instead of individual IRQs like the Cyclone5
+and Arria5. Therefore the device tree is different as well.
+
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-ecc-manager"
+- altr,sysgr-syscon : phandle to Arria10 System Manager Block
+	containing the ECC manager registers.
+- #address-cells: must be 1
+- #size-cells: must be 1
+- interrupts : Should be single bit error interrupt, then double bit error
+	interrupt.
+- interrupt-controller : boolean indicator that ECC Manager is an interrupt controller
+- #interrupt-cells : must be set to 2.
+- ranges : standard definition, should translate from local addresses
+
+Subcomponents:
+
+L2 Cache ECC
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-l2-ecc"
+- reg : Address and size for ECC error interrupt clear registers.
+- interrupts : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+On-Chip RAM ECC
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-ocram-ecc"
+- reg        : Address and size for ECC block registers.
+- interrupts : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+Ethernet FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-eth-mac-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent Ethernet node.
+- interrupts      : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+NAND FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-nand-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent NAND node.
+- interrupts      : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+DMA FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-dma-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent DMA node.
+- interrupts      : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+USB FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-usb-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent USB node.
+- interrupts      : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+QSPI FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-qspi-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent QSPI node.
+- interrupts      : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+SDMMC FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-sdmmc-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent SD/MMC node.
+- interrupts      : Should be single bit error interrupt, then double bit error
+	interrupt, in this order for port A, and then single bit error interrupt,
+	then double bit error interrupt in this order for port B.
+
+Example:
+
+	eccmgr: eccmgr@ffd06000 {
+		compatible = "altr,socfpga-a10-ecc-manager";
+		altr,sysmgr-syscon = <&sysmgr>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>,
+			     <0 0 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
+		ranges;
+
+		l2-ecc@ffd06010 {
+			compatible = "altr,socfpga-a10-l2-ecc";
+			reg = <0xffd06010 0x4>;
+			interrupts = <0 IRQ_TYPE_LEVEL_HIGH>,
+				     <32 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		ocram-ecc@ff8c3000 {
+			compatible = "altr,socfpga-a10-ocram-ecc";
+			reg = <0xff8c3000 0x90>;
+			interrupts = <1 IRQ_TYPE_LEVEL_HIGH>,
+				     <33 IRQ_TYPE_LEVEL_HIGH> ;
+		};
+
+		emac0-rx-ecc@ff8c0800 {
+			compatible = "altr,socfpga-eth-mac-ecc";
+			reg = <0xff8c0800 0x400>;
+			altr,ecc-parent = <&gmac0>;
+			interrupts = <4 IRQ_TYPE_LEVEL_HIGH>,
+				     <36 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		emac0-tx-ecc@ff8c0c00 {
+			compatible = "altr,socfpga-eth-mac-ecc";
+			reg = <0xff8c0c00 0x400>;
+			altr,ecc-parent = <&gmac0>;
+			interrupts = <5 IRQ_TYPE_LEVEL_HIGH>,
+				     <37 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		nand-buf-ecc@ff8c2000 {
+			compatible = "altr,socfpga-nand-ecc";
+			reg = <0xff8c2000 0x400>;
+			altr,ecc-parent = <&nand>;
+			interrupts = <11 IRQ_TYPE_LEVEL_HIGH>,
+				     <43 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		nand-rd-ecc@ff8c2400 {
+			compatible = "altr,socfpga-nand-ecc";
+			reg = <0xff8c2400 0x400>;
+			altr,ecc-parent = <&nand>;
+			interrupts = <13 IRQ_TYPE_LEVEL_HIGH>,
+				     <45 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		nand-wr-ecc@ff8c2800 {
+			compatible = "altr,socfpga-nand-ecc";
+			reg = <0xff8c2800 0x400>;
+			altr,ecc-parent = <&nand>;
+			interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
+				     <44 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		dma-ecc@ff8c8000 {
+			compatible = "altr,socfpga-dma-ecc";
+			reg = <0xff8c8000 0x400>;
+			altr,ecc-parent = <&pdma>;
+			interrupts = <10 IRQ_TYPE_LEVEL_HIGH>,
+				     <42 IRQ_TYPE_LEVEL_HIGH>;
+
+		usb0-ecc@ff8c8800 {
+			compatible = "altr,socfpga-usb-ecc";
+			reg = <0xff8c8800 0x400>;
+			altr,ecc-parent = <&usb0>;
+			interrupts = <2 IRQ_TYPE_LEVEL_HIGH>,
+				     <34 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		qspi-ecc@ff8c8400 {
+			compatible = "altr,socfpga-qspi-ecc";
+			reg = <0xff8c8400 0x400>;
+			altr,ecc-parent = <&qspi>;
+			interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
+				     <46 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		sdmmc-ecc@ff8c2c00 {
+			compatible = "altr,socfpga-sdmmc-ecc";
+			reg = <0xff8c2c00 0x400>;
+			altr,ecc-parent = <&mmc>;
+			interrupts = <15 IRQ_TYPE_LEVEL_HIGH>,
+				     <47 IRQ_TYPE_LEVEL_HIGH>,
+				     <16 IRQ_TYPE_LEVEL_HIGH>,
+				     <48 IRQ_TYPE_LEVEL_HIGH>;
+		};
+	};
+
+Stratix10 SoCFPGA ECC Manager
+The Stratix10 SoC ECC Manager handles the IRQs for each peripheral
+in a shared register similar to the Arria10. However, ECC requires
+access to registers that can only be read from Secure Monitor with
+SMC calls. Therefore the device tree is slightly different.
+
+Required Properties:
+- compatible : Should be "altr,socfpga-s10-ecc-manager"
+- interrupts : Should be single bit error interrupt, then double bit error
+	interrupt.
+- interrupt-controller : boolean indicator that ECC Manager is an interrupt controller
+- #interrupt-cells : must be set to 2.
+
+Subcomponents:
+
+SDRAM ECC
+Required Properties:
+- compatible : Should be "altr,sdram-edac-s10"
+- interrupts : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+Example:
+
+	eccmgr {
+		compatible = "altr,socfpga-s10-ecc-manager";
+		interrupts = <0 15 4>, <0 95 4>;
+		interrupt-controller;
+		#interrupt-cells = <2>;
+
+		sdramedac {
+			compatible = "altr,sdram-edac-s10";
+			interrupts = <16 4>, <48 4>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/fpga/lattice-machxo2-spi.txt b/Documentation/devicetree/bindings/fpga/lattice-machxo2-spi.txt
new file mode 100644
index 0000000..a8c362e
--- /dev/null
+++ b/Documentation/devicetree/bindings/fpga/lattice-machxo2-spi.txt
@@ -0,0 +1,29 @@
+Lattice MachXO2 Slave SPI FPGA Manager
+
+Lattice MachXO2 FPGAs support a method of loading the bitstream over
+'slave SPI' interface.
+
+See 'MachXO2ProgrammingandConfigurationUsageGuide.pdf' on www.latticesemi.com
+
+Required properties:
+- compatible: should contain "lattice,machxo2-slave-spi"
+- reg: spi chip select of the FPGA
+
+Example for full FPGA configuration:
+
+	fpga-region0 {
+		compatible = "fpga-region";
+		fpga-mgr = <&fpga_mgr_spi>;
+		#address-cells = <0x1>;
+		#size-cells = <0x1>;
+	};
+
+	spi1: spi@2000 {
+        ...
+
+		fpga_mgr_spi: fpga-mgr@0 {
+			compatible = "lattice,machxo2-slave-spi";
+			spi-max-frequency = <8000000>;
+			reg = <0>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt
new file mode 100644
index 0000000..c907aa8
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.txt
@@ -0,0 +1,28 @@
+Broadcom V3D GPU
+
+Only the Broadcom V3D 3.x and newer GPUs are covered by this binding.
+For V3D 2.x, see brcm,bcm-vc4.txt.
+
+Required properties:
+- compatible:	Should be "brcm,7268-v3d" or "brcm,7278-v3d"
+- reg:		Physical base addresses and lengths of the register areas
+- reg-names:	Names for the register areas.  The "hub", "bridge", and "core0"
+		  register areas are always required.  The "gca" register area
+		  is required if the GCA cache controller is present.
+- interrupts:	The interrupt numbers.  The first interrupt is for the hub,
+		  while the following interrupts are for the cores.
+		  See bindings/interrupt-controller/interrupts.txt
+
+Optional properties:
+- clocks:	The core clock the unit runs on
+
+v3d {
+	compatible = "brcm,7268-v3d";
+	reg = <0xf1204000 0x100>,
+	      <0xf1200000 0x4000>,
+	      <0xf1208000 0x4000>,
+	      <0xf1204100 0x100>;
+	reg-names = "bridge", "hub", "core0", "gca";
+	interrupts = <0 78 4>,
+		     <0 77 4>;
+};
diff --git a/Documentation/devicetree/bindings/gpu/samsung-scaler.txt b/Documentation/devicetree/bindings/gpu/samsung-scaler.txt
new file mode 100644
index 0000000..9c3d981
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/samsung-scaler.txt
@@ -0,0 +1,27 @@
+* Samsung Exynos Image Scaler
+
+Required properties:
+  - compatible : value should be one of the following:
+	(a) "samsung,exynos5420-scaler" for Scaler IP in Exynos5420
+	(b) "samsung,exynos5433-scaler" for Scaler IP in Exynos5433
+
+  - reg : Physical base address of the IP registers and length of memory
+	  mapped region.
+
+  - interrupts : Interrupt specifier for scaler interrupt, according to format
+		 specific to interrupt parent.
+
+  - clocks : Clock specifier for scaler clock, according to generic clock
+	     bindings. (See Documentation/devicetree/bindings/clock/exynos*.txt)
+
+  - clock-names : Names of clocks. For exynos scaler, it should be "mscl"
+		  on 5420 and "pclk", "aclk" and "aclk_xiu" on 5433.
+
+Example:
+	scaler@12800000 {
+		compatible = "samsung,exynos5420-scaler";
+		reg = <0x12800000 0x1294>;
+		interrupts = <0 220 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&clock CLK_MSCL0>;
+		clock-names = "mscl";
+	};
diff --git a/Documentation/devicetree/bindings/hwmon/gpio-fan.txt b/Documentation/devicetree/bindings/hwmon/gpio-fan.txt
index 439a743..2becdcf 100644
--- a/Documentation/devicetree/bindings/hwmon/gpio-fan.txt
+++ b/Documentation/devicetree/bindings/hwmon/gpio-fan.txt
@@ -11,7 +11,7 @@
   must have the RPM values in ascending order.
 - alarm-gpios: This pin going active indicates something is wrong with
   the fan, and a udev event will be fired.
-- cooling-cells: If used as a cooling device, must be <2>
+- #cooling-cells: If used as a cooling device, must be <2>
   Also see: Documentation/devicetree/bindings/thermal/thermal.txt
   min and max states are derived from the speed-map of the fan.
 
diff --git a/Documentation/devicetree/bindings/hwmon/ltc2990.txt b/Documentation/devicetree/bindings/hwmon/ltc2990.txt
new file mode 100644
index 0000000..f92f540
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ltc2990.txt
@@ -0,0 +1,36 @@
+ltc2990: Linear Technology LTC2990 power monitor
+
+Required properties:
+- compatible: Must be "lltc,ltc2990"
+- reg: I2C slave address
+- lltc,meas-mode:
+	An array of two integers for configuring the chip measurement mode.
+
+	The first integer defines the bits 2..0 in the control register. In all
+	cases the internal temperature and supply voltage are measured. In
+	addition the following input measurements are enabled per mode:
+
+		0: V1, V2, TR2
+		1: V1-V2, TR2
+		2: V1-V2, V3, V4
+		3: TR1, V3, V4
+		4: TR1, V3-V4
+		5: TR1, TR2
+		6: V1-V2, V3-V4
+		7: V1, V2, V3, V4
+
+	The second integer defines the bits 4..3 in the control register. This
+	allows a subset of the measurements to be enabled:
+
+		0: Internal temperature and supply voltage only
+		1: TR1, V1 or V1-V2 only per mode
+		2: TR2, V3 or V3-V4 only per mode
+		3: All measurements per mode
+
+Example:
+
+ltc2990@4c {
+	compatible = "lltc,ltc2990";
+	reg = <0x4c>;
+	lltc,meas-mode = <7 3>;	/* V1, V2, V3, V4 */
+};
diff --git a/Documentation/devicetree/bindings/input/elan_i2c.txt b/Documentation/devicetree/bindings/input/elan_i2c.txt
index ee3242c..d80a835 100644
--- a/Documentation/devicetree/bindings/input/elan_i2c.txt
+++ b/Documentation/devicetree/bindings/input/elan_i2c.txt
@@ -14,6 +14,7 @@
 - pinctrl-0: a phandle pointing to the pin settings for the device (see
   pinctrl binding [1]).
 - vcc-supply: a phandle for the regulator supplying 3.3V power.
+- elan,trackpoint: touchpad can support a trackpoint (boolean)
 
 [0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
 [1]: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
diff --git a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
index a83f9a5..89674ad 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.txt
@@ -9,11 +9,12 @@
 
 Required properties:
 
-- compatible : must have "amlogic,meson8-gpio-intc” and either
-   “amlogic,meson8-gpio-intc” for meson8 SoCs (S802) or
-   “amlogic,meson8b-gpio-intc” for meson8b SoCs (S805) or
-   “amlogic,meson-gxbb-gpio-intc” for GXBB SoCs (S905) or
-   “amlogic,meson-gxl-gpio-intc” for GXL SoCs (S905X, S912)
+- compatible : must have "amlogic,meson8-gpio-intc" and either
+    "amlogic,meson8-gpio-intc" for meson8 SoCs (S802) or
+    "amlogic,meson8b-gpio-intc" for meson8b SoCs (S805) or
+    "amlogic,meson-gxbb-gpio-intc" for GXBB SoCs (S905) or
+    "amlogic,meson-gxl-gpio-intc" for GXL SoCs (S905X, S912)
+    "amlogic,meson-axg-gpio-intc" for AXG SoCs (A113D, A113X)
 - interrupt-parent : a phandle to the GIC the interrupts are routed to.
    Usually this is provided at the root level of the device tree as it is
    common to most of the SoC.
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
index 0a57f2f..3ea78c4 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
@@ -57,6 +57,20 @@
   occupied by the redistributors. Required if more than one such
   region is present.
 
+- msi-controller: Boolean property. Identifies the node as an MSI
+  controller. Only present if the Message Based Interrupt
+  functionnality is being exposed by the HW, and the mbi-ranges
+  property present.
+
+- mbi-ranges: A list of pairs <intid span>, where "intid" is the first
+  SPI of a range that can be used an MBI, and "span" the size of that
+  range. Multiple ranges can be provided. Requires "msi-controller" to
+  be set.
+
+- mbi-alias: Address property. Base address of an alias of the GICD
+  region containing only the {SET,CLR}SPI registers to be used if
+  isolation is required, and if supported by the HW.
+
 Sub-nodes:
 
 PPI affinity can be expressed as a single "ppi-partitions" node,
@@ -99,6 +113,9 @@
 		      <0x0 0x2c020000 0 0x2000>;	// GICV
 		interrupts = <1 9 4>;
 
+		msi-controller;
+		mbi-ranges = <256 128>;
+
 		gic-its@2c200000 {
 			compatible = "arm,gic-v3-its";
 			msi-controller;
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt
index edf03f0..136bd61 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt
@@ -5,11 +5,14 @@
 - compatible: Should be:
     "st,stm32-exti"
     "st,stm32h7-exti"
+    "st,stm32mp1-exti"
 - reg: Specifies base physical address and size of the registers
 - interrupt-controller: Indentifies the node as an interrupt controller
 - #interrupt-cells: Specifies the number of cells to encode an interrupt
   specifier, shall be 2
 - interrupts: interrupts references to primary interrupt controller
+  (only needed for exti controller with multiple exti under
+  same parent interrupt: st,stm32-exti and st,stm32h7-exti")
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/ipmi/npcm7xx-kcs-bmc.txt b/Documentation/devicetree/bindings/ipmi/npcm7xx-kcs-bmc.txt
new file mode 100644
index 0000000..3538a21
--- /dev/null
+++ b/Documentation/devicetree/bindings/ipmi/npcm7xx-kcs-bmc.txt
@@ -0,0 +1,39 @@
+* Nuvoton NPCM7xx KCS (Keyboard Controller Style) IPMI interface
+
+The Nuvoton SOCs (NPCM7xx) are commonly used as BMCs
+(Baseboard Management Controllers) and the KCS interface can be
+used to perform in-band IPMI communication with their host.
+
+Required properties:
+- compatible : should be one of
+    "nuvoton,npcm750-kcs-bmc"
+- interrupts : interrupt generated by the controller
+- kcs_chan : The KCS channel number in the controller
+
+Example:
+
+    lpc_kcs: lpc_kcs@f0007000 {
+        compatible = "nuvoton,npcm750-lpc-kcs", "simple-mfd", "syscon";
+        reg = <0xf0007000 0x40>;
+        reg-io-width = <1>;
+
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges = <0x0 0xf0007000 0x40>;
+
+        kcs1: kcs1@0 {
+            compatible = "nuvoton,npcm750-kcs-bmc";
+            reg = <0x0 0x40>;
+            interrupts = <0 9 4>;
+            kcs_chan = <1>;
+            status = "disabled";
+        };
+
+        kcs2: kcs2@0 {
+            compatible = "nuvoton,npcm750-kcs-bmc";
+            reg = <0x0 0x40>;
+            interrupts = <0 9 4>;
+            kcs_chan = <2>;
+            status = "disabled";
+        };
+    };
\ No newline at end of file
diff --git a/Documentation/devicetree/bindings/leds/leds-cr0014114.txt b/Documentation/devicetree/bindings/leds/leds-cr0014114.txt
new file mode 100644
index 0000000..4255b19
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-cr0014114.txt
@@ -0,0 +1,54 @@
+Crane Merchandising System - cr0014114 LED driver
+-------------------------------------------------
+
+This LED Board is widely used in vending machines produced
+by Crane Merchandising Systems.
+
+Required properties:
+- compatible: "crane,cr0014114"
+
+Property rules described in Documentation/devicetree/bindings/spi/spi-bus.txt
+apply. In particular, "reg" and "spi-max-frequency" properties must be given.
+
+LED sub-node properties:
+- label :
+	see Documentation/devicetree/bindings/leds/common.txt
+- linux,default-trigger : (optional)
+	see Documentation/devicetree/bindings/leds/common.txt
+
+Example
+-------
+
+led-controller@0 {
+	compatible = "crane,cr0014114";
+	reg = <0>;
+	spi-max-frequency = <50000>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	led@0 {
+		reg = <0>;
+		label = "red:coin";
+	};
+	led@1 {
+		reg = <1>;
+		label = "green:coin";
+	};
+	led@2 {
+		reg = <2>;
+		label = "blue:coin";
+	};
+	led@3 {
+		reg = <3>;
+		label = "red:bill";
+	};
+	led@4 {
+		reg = <4>;
+		label = "green:bill";
+	};
+	led@5 {
+		reg = <5>;
+		label = "blue:bill";
+	};
+	...
+};
diff --git a/Documentation/devicetree/bindings/leds/leds-lm3601x.txt b/Documentation/devicetree/bindings/leds/leds-lm3601x.txt
new file mode 100644
index 0000000..a88b2c4
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-lm3601x.txt
@@ -0,0 +1,45 @@
+* Texas Instruments - lm3601x Single-LED Flash Driver
+
+The LM3601X are ultra-small LED flash drivers that
+provide a high level of adjustability.
+
+Required properties:
+	- compatible : Can be one of the following
+		"ti,lm36010"
+		"ti,lm36011"
+	- reg : I2C slave address
+	- #address-cells : 1
+	- #size-cells : 0
+
+Required child properties:
+	- reg : 0 - Indicates a IR mode
+		1 - Indicates a Torch (white LED) mode
+
+Required properties for flash LED child nodes:
+	See Documentation/devicetree/bindings/leds/common.txt
+	- flash-max-microamp : Range from 11mA - 1.5A
+	- flash-max-timeout-us : Range from 40ms - 1600ms
+	- led-max-microamp : Range from 2.4mA - 376mA
+
+Optional child properties:
+	- label : see Documentation/devicetree/bindings/leds/common.txt
+
+Example:
+led-controller@64 {
+	compatible = "ti,lm36010";
+	#address-cells = <1>;
+	#size-cells = <0>;
+	reg = <0x64>;
+
+	led@0 {
+		reg = <1>;
+		label = "white:torch";
+		led-max-microamp = <376000>;
+		flash-max-microamp = <1500000>;
+		flash-max-timeout-us = <1600000>;
+	};
+}
+
+For more product information please see the links below:
+http://www.ti.com/product/LM36010
+http://www.ti.com/product/LM36011
diff --git a/Documentation/devicetree/bindings/leds/leds-sc27xx-bltc.txt b/Documentation/devicetree/bindings/leds/leds-sc27xx-bltc.txt
new file mode 100644
index 0000000..dddf84f
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-sc27xx-bltc.txt
@@ -0,0 +1,41 @@
+LEDs connected to Spreadtrum SC27XX PMIC breathing light controller
+
+The SC27xx breathing light controller supports to 3 outputs:
+red LED, green LED and blue LED. Each LED can work at normal
+PWM mode or breath light mode.
+
+Required properties:
+- compatible: Should be "sprd,sc2731-bltc".
+- #address-cells: Must be 1.
+- #size-cells: Must be 0.
+- reg: Specify the controller address.
+
+Required child properties:
+- reg: Port this LED is connected to.
+
+Optional child properties:
+- label: See Documentation/devicetree/bindings/leds/common.txt.
+
+Examples:
+
+led-controller@200 {
+	compatible = "sprd,sc2731-bltc";
+	#address-cells = <1>;
+	#size-cells = <0>;
+	reg = <0x200>;
+
+	led@0 {
+		label = "red";
+		reg = <0x0>;
+	};
+
+	led@1 {
+		label = "green";
+		reg = <0x1>;
+	};
+
+	led@2 {
+		label = "blue";
+		reg = <0x2>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/mmc/amlogic,meson-gx.txt b/Documentation/devicetree/bindings/mmc/amlogic,meson-gx.txt
index 50bf611..13e7040 100644
--- a/Documentation/devicetree/bindings/mmc/amlogic,meson-gx.txt
+++ b/Documentation/devicetree/bindings/mmc/amlogic,meson-gx.txt
@@ -12,6 +12,7 @@
   - "amlogic,meson-gxbb-mmc"
   - "amlogic,meson-gxl-mmc"
   - "amlogic,meson-gxm-mmc"
+  - "amlogic,meson-axg-mmc"
 - clocks     : A list of phandle + clock-specifier pairs for the clocks listed in clock-names.
 - clock-names: Should contain the following:
 	"core" - Main peripheral bus clock
@@ -19,6 +20,7 @@
 	"clkin1" - Other parent clock of internal mux
   The driver has an internal mux clock which switches between clkin0 and clkin1 depending on the
   clock rate requested by the MMC core.
+- resets     : phandle of the internal reset line
 
 Example:
 
@@ -29,4 +31,5 @@
 		clocks = <&clkc CLKID_SD_EMMC_A>, <&xtal>, <&clkc CLKID_FCLK_DIV2>;
 		clock-names = "core", "clkin0", "clkin1";
 		pinctrl-0 = <&emmc_pins>;
+		resets = <&reset RESET_SD_EMMC_A>;
 	};
diff --git a/Documentation/devicetree/bindings/mmc/bluefield-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/bluefield-dw-mshc.txt
new file mode 100644
index 0000000..b0f0999
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/bluefield-dw-mshc.txt
@@ -0,0 +1,29 @@
+* Mellanox Bluefield SoC specific extensions to the Synopsys Designware
+  Mobile Storage Host Controller
+
+Read synopsys-dw-mshc.txt for more details
+
+The Synopsys designware mobile storage host controller is used to interface
+a SoC with storage medium such as eMMC or SD/MMC cards. This file documents
+differences between the core Synopsys dw mshc controller properties described
+by synopsys-dw-mshc.txt and the properties used by the Mellanox Bluefield SoC
+specific extensions to the Synopsys Designware Mobile Storage Host Controller.
+
+Required Properties:
+
+* compatible: should be one of the following.
+  - "mellanox,bluefield-dw-mshc": for controllers with Mellanox Bluefield SoC
+    specific extensions.
+
+Example:
+
+	/* Mellanox Bluefield SoC MMC */
+	mmc@6008000 {
+		compatible = "mellanox,bluefield-dw-mshc";
+		reg = <0x6008000 0x400>;
+		interrupts = <32>;
+		fifo-depth = <0x100>;
+		clock-frequency = <24000000>;
+		bus-width = <8>;
+		cap-mmc-highspeed;
+	};
diff --git a/Documentation/devicetree/bindings/mmc/jz4740.txt b/Documentation/devicetree/bindings/mmc/jz4740.txt
new file mode 100644
index 0000000..7cd8c43
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/jz4740.txt
@@ -0,0 +1,38 @@
+* Ingenic JZ47xx MMC controllers
+
+This file documents the device tree properties used for the MMC controller in
+Ingenic JZ4740/JZ4780 SoCs. These are in addition to the core MMC properties
+described in mmc.txt.
+
+Required properties:
+- compatible: Should be one of the following:
+  - "ingenic,jz4740-mmc" for the JZ4740
+  - "ingenic,jz4780-mmc" for the JZ4780
+- reg: Should contain the MMC controller registers location and length.
+- interrupts: Should contain the interrupt specifier of the MMC controller.
+- clocks: Clock for the MMC controller.
+
+Optional properties:
+- dmas: List of DMA specifiers with the controller specific format
+        as described in the generic DMA client binding. A tx and rx
+        specifier is required.
+- dma-names: RX and TX  DMA request names.
+        Should be "rx" and "tx", in that order.
+
+For additional details on DMA client bindings see ../dma/dma.txt.
+
+Example:
+
+mmc0: mmc@13450000 {
+	compatible = "ingenic,jz4780-mmc";
+	reg = <0x13450000 0x1000>;
+
+	interrupt-parent = <&intc>;
+	interrupts = <37>;
+
+	clocks = <&cgu JZ4780_CLK_MSC0>;
+	clock-names = "mmc";
+
+	dmas = <&dma JZ4780_DMA_MSC0_RX 0xffffffff>, <&dma JZ4780_DMA_MSC0_TX 0xffffffff>;
+	dma-names = "rx", "tx";
+};
diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt
index 467cd7b..f5a0923 100644
--- a/Documentation/devicetree/bindings/mmc/mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/mmc.txt
@@ -19,6 +19,8 @@
 - wp-gpios: Specify GPIOs for write protection, see gpio binding
 - cd-inverted: when present, polarity on the CD line is inverted. See the note
   below for the case, when a GPIO is used for the CD line
+- cd-debounce-delay-ms: Set delay time before detecting card after card insert interrupt.
+  It's only valid when cd-gpios is present.
 - wp-inverted: when present, polarity on the WP line is inverted. See the note
   below for the case, when a GPIO is used for the WP line
 - disable-wp: When set no physical WP line is present. This property should
@@ -56,6 +58,10 @@
 - fixed-emmc-driver-type: for non-removable eMMC, enforce this driver type.
   The value <n> is the driver type as specified in the eMMC specification
   (table 206 in spec version 5.1).
+- post-power-on-delay-ms : It was invented for MMC pwrseq-simple which could
+  be referred to mmc-pwrseq-simple.txt. But now it's reused as a tunable delay
+  waiting for I/O signalling and card power supply to be stable, regardless of
+  whether pwrseq-simple is used. Default to 10ms if no available.
 
 *NOTE* on CD and WP polarity. To use common for all SD/MMC host controllers line
 polarity properties, we have to fix the meaning of the "normal" and "inverted"
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-omap.txt b/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
index 51775a3..393848c 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-omap.txt
@@ -4,7 +4,14 @@
 
 Required properties:
 - compatible: Should be "ti,dra7-sdhci" for DRA7 and DRA72 controllers
+	      Should be "ti,k2g-sdhci" for K2G
 - ti,hwmods: Must be "mmc<n>", <n> is controller instance starting 1
+	     (Not required for K2G).
+- pinctrl-names: Should be subset of "default", "hs", "sdr12", "sdr25", "sdr50",
+		 "ddr50-rev11", "sdr104-rev11", "ddr50", "sdr104",
+		 "ddr_1_8v-rev11", "ddr_1_8v" or "ddr_3_3v", "hs200_1_8v-rev11",
+		 "hs200_1_8v",
+- pinctrl-<n> : Pinctrl states as described in bindings/pinctrl/pinctrl-bindings.txt
 
 Example:
 	mmc1: mmc@4809c000 {
diff --git a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
index 2d5287e..ee978c9 100644
--- a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
@@ -26,6 +26,8 @@
 		"renesas,sdhi-r8a7794" - SDHI IP on R8A7794 SoC
 		"renesas,sdhi-r8a7795" - SDHI IP on R8A7795 SoC
 		"renesas,sdhi-r8a7796" - SDHI IP on R8A7796 SoC
+		"renesas,sdhi-r8a77965" - SDHI IP on R8A77965 SoC
+		"renesas,sdhi-r8a77980" - SDHI IP on R8A77980 SoC
 		"renesas,sdhi-r8a77995" - SDHI IP on R8A77995 SoC
 		"renesas,sdhi-shmobile" - a generic sh-mobile SDHI controller
 		"renesas,rcar-gen1-sdhi" - a generic R-Car Gen1 SDHI controller
diff --git a/Documentation/devicetree/bindings/nvmem/zii,rave-sp-eeprom.txt b/Documentation/devicetree/bindings/nvmem/zii,rave-sp-eeprom.txt
new file mode 100644
index 0000000..d5e22fc
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvmem/zii,rave-sp-eeprom.txt
@@ -0,0 +1,40 @@
+Zodiac Inflight Innovations RAVE EEPROM Bindings
+
+RAVE SP EEPROM device is a "MFD cell" device exposing physical EEPROM
+attached to RAVE Supervisory Processor. It is expected that its Device
+Tree node is specified as a child of the node corresponding to the
+parent RAVE SP device (as documented in
+Documentation/devicetree/bindings/mfd/zii,rave-sp.txt)
+
+Required properties:
+
+- compatible: Should be "zii,rave-sp-eeprom"
+
+Optional properties:
+
+- zii,eeprom-name: Unique EEPROM identifier describing its function in the
+  system. Will be used as created NVMEM deivce's name.
+
+Data cells:
+
+Data cells are child nodes of eerpom node, bindings for which are
+documented in Documentation/bindings/nvmem/nvmem.txt
+
+Example:
+
+	rave-sp {
+		compatible = "zii,rave-sp-rdu1";
+		current-speed = <38400>;
+
+		eeprom@a4 {
+			compatible = "zii,rave-sp-eeprom";
+			reg = <0xa4 0x4000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+			zii,eeprom-name = "main-eeprom";
+
+			wdt_timeout: wdt-timeout@81 {
+				reg = <0x81 2>;
+			};
+		};
+	}
diff --git a/Documentation/devicetree/bindings/opp/kryo-cpufreq.txt b/Documentation/devicetree/bindings/opp/kryo-cpufreq.txt
new file mode 100644
index 0000000..c2127b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/opp/kryo-cpufreq.txt
@@ -0,0 +1,680 @@
+Qualcomm Technologies, Inc. KRYO CPUFreq and OPP bindings
+===================================
+
+In Certain Qualcomm Technologies, Inc. SoCs like apq8096 and msm8996
+that have KRYO processors, the CPU ferequencies subset and voltage value
+of each OPP varies based on the silicon variant in use.
+Qualcomm Technologies, Inc. Process Voltage Scaling Tables
+defines the voltage and frequency value based on the msm-id in SMEM
+and speedbin blown in the efuse combination.
+The qcom-cpufreq-kryo driver reads the msm-id and efuse value from the SoC
+to provide the OPP framework with required information (existing HW bitmap).
+This is used to determine the voltage and frequency value for each OPP of
+operating-points-v2 table when it is parsed by the OPP framework.
+
+Required properties:
+--------------------
+In 'cpus' nodes:
+- operating-points-v2: Phandle to the operating-points-v2 table to use.
+
+In 'operating-points-v2' table:
+- compatible: Should be
+	- 'operating-points-v2-kryo-cpu' for apq8096 and msm8996.
+- nvmem-cells: A phandle pointing to a nvmem-cells node representing the
+		efuse registers that has information about the
+		speedbin that is used to select the right frequency/voltage
+		value pair.
+		Please refer the for nvmem-cells
+		bindings Documentation/devicetree/bindings/nvmem/nvmem.txt
+		and also examples below.
+
+In every OPP node:
+- opp-supported-hw: A single 32 bit bitmap value, representing compatible HW.
+		    Bitmap:
+			0:	MSM8996 V3, speedbin 0
+			1:	MSM8996 V3, speedbin 1
+			2:	MSM8996 V3, speedbin 2
+			3:	unused
+			4:	MSM8996 SG, speedbin 0
+			5:	MSM8996 SG, speedbin 1
+			6:	MSM8996 SG, speedbin 2
+			7-31:	unused
+
+Example 1:
+---------
+
+	cpus {
+		#address-cells = <2>;
+		#size-cells = <0>;
+
+		CPU0: cpu@0 {
+			device_type = "cpu";
+			compatible = "qcom,kryo";
+			reg = <0x0 0x0>;
+			enable-method = "psci";
+			clocks = <&kryocc 0>;
+			cpu-supply = <&pm8994_s11_saw>;
+			operating-points-v2 = <&cluster0_opp>;
+			#cooling-cells = <2>;
+			next-level-cache = <&L2_0>;
+			L2_0: l2-cache {
+			      compatible = "cache";
+			      cache-level = <2>;
+			};
+		};
+
+		CPU1: cpu@1 {
+			device_type = "cpu";
+			compatible = "qcom,kryo";
+			reg = <0x0 0x1>;
+			enable-method = "psci";
+			clocks = <&kryocc 0>;
+			cpu-supply = <&pm8994_s11_saw>;
+			operating-points-v2 = <&cluster0_opp>;
+			#cooling-cells = <2>;
+			next-level-cache = <&L2_0>;
+		};
+
+		CPU2: cpu@100 {
+			device_type = "cpu";
+			compatible = "qcom,kryo";
+			reg = <0x0 0x100>;
+			enable-method = "psci";
+			clocks = <&kryocc 1>;
+			cpu-supply = <&pm8994_s11_saw>;
+			operating-points-v2 = <&cluster1_opp>;
+			#cooling-cells = <2>;
+			next-level-cache = <&L2_1>;
+			L2_1: l2-cache {
+			      compatible = "cache";
+			      cache-level = <2>;
+			};
+		};
+
+		CPU3: cpu@101 {
+			device_type = "cpu";
+			compatible = "qcom,kryo";
+			reg = <0x0 0x101>;
+			enable-method = "psci";
+			clocks = <&kryocc 1>;
+			cpu-supply = <&pm8994_s11_saw>;
+			operating-points-v2 = <&cluster1_opp>;
+			#cooling-cells = <2>;
+			next-level-cache = <&L2_1>;
+		};
+
+		cpu-map {
+			cluster0 {
+				core0 {
+					cpu = <&CPU0>;
+				};
+
+				core1 {
+					cpu = <&CPU1>;
+				};
+			};
+
+			cluster1 {
+				core0 {
+					cpu = <&CPU2>;
+				};
+
+				core1 {
+					cpu = <&CPU3>;
+				};
+			};
+		};
+	};
+
+	cluster0_opp: opp_table0 {
+		compatible = "operating-points-v2-kryo-cpu";
+		nvmem-cells = <&speedbin_efuse>;
+		opp-shared;
+
+		opp-307200000 {
+			opp-hz = /bits/ 64 <307200000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x77>;
+			clock-latency-ns = <200000>;
+		};
+		opp-384000000 {
+			opp-hz = /bits/ 64 <384000000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-422400000 {
+			opp-hz = /bits/ 64 <422400000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-460800000 {
+			opp-hz = /bits/ 64 <460800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-480000000 {
+			opp-hz = /bits/ 64 <480000000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-537600000 {
+			opp-hz = /bits/ 64 <537600000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-556800000 {
+			opp-hz = /bits/ 64 <556800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-614400000 {
+			opp-hz = /bits/ 64 <614400000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-652800000 {
+			opp-hz = /bits/ 64 <652800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-691200000 {
+			opp-hz = /bits/ 64 <691200000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-729600000 {
+			opp-hz = /bits/ 64 <729600000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-768000000 {
+			opp-hz = /bits/ 64 <768000000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-844800000 {
+			opp-hz = /bits/ 64 <844800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x77>;
+			clock-latency-ns = <200000>;
+		};
+		opp-902400000 {
+			opp-hz = /bits/ 64 <902400000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-960000000 {
+			opp-hz = /bits/ 64 <960000000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-979200000 {
+			opp-hz = /bits/ 64 <979200000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1036800000 {
+			opp-hz = /bits/ 64 <1036800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1056000000 {
+			opp-hz = /bits/ 64 <1056000000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1113600000 {
+			opp-hz = /bits/ 64 <1113600000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1132800000 {
+			opp-hz = /bits/ 64 <1132800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1190400000 {
+			opp-hz = /bits/ 64 <1190400000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1209600000 {
+			opp-hz = /bits/ 64 <1209600000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1228800000 {
+			opp-hz = /bits/ 64 <1228800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1286400000 {
+			opp-hz = /bits/ 64 <1286400000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1324800000 {
+			opp-hz = /bits/ 64 <1324800000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x5>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1363200000 {
+			opp-hz = /bits/ 64 <1363200000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x72>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1401600000 {
+			opp-hz = /bits/ 64 <1401600000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x5>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1440000000 {
+			opp-hz = /bits/ 64 <1440000000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1478400000 {
+			opp-hz = /bits/ 64 <1478400000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x1>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1497600000 {
+			opp-hz = /bits/ 64 <1497600000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x4>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1516800000 {
+			opp-hz = /bits/ 64 <1516800000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1593600000 {
+			opp-hz = /bits/ 64 <1593600000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x71>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1996800000 {
+			opp-hz = /bits/ 64 <1996800000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x20>;
+			clock-latency-ns = <200000>;
+		};
+		opp-2188800000 {
+			opp-hz = /bits/ 64 <2188800000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x10>;
+			clock-latency-ns = <200000>;
+		};
+	};
+
+	cluster1_opp: opp_table1 {
+		compatible = "operating-points-v2-kryo-cpu";
+		nvmem-cells = <&speedbin_efuse>;
+		opp-shared;
+
+		opp-307200000 {
+			opp-hz = /bits/ 64 <307200000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x77>;
+			clock-latency-ns = <200000>;
+		};
+		opp-384000000 {
+			opp-hz = /bits/ 64 <384000000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-403200000 {
+			opp-hz = /bits/ 64 <403200000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-460800000 {
+			opp-hz = /bits/ 64 <460800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-480000000 {
+			opp-hz = /bits/ 64 <480000000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-537600000 {
+			opp-hz = /bits/ 64 <537600000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-556800000 {
+			opp-hz = /bits/ 64 <556800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-614400000 {
+			opp-hz = /bits/ 64 <614400000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-652800000 {
+			opp-hz = /bits/ 64 <652800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-691200000 {
+			opp-hz = /bits/ 64 <691200000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-729600000 {
+			opp-hz = /bits/ 64 <729600000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-748800000 {
+			opp-hz = /bits/ 64 <748800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-806400000 {
+			opp-hz = /bits/ 64 <806400000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-825600000 {
+			opp-hz = /bits/ 64 <825600000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-883200000 {
+			opp-hz = /bits/ 64 <883200000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-902400000 {
+			opp-hz = /bits/ 64 <902400000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-940800000 {
+			opp-hz = /bits/ 64 <940800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-979200000 {
+			opp-hz = /bits/ 64 <979200000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1036800000 {
+			opp-hz = /bits/ 64 <1036800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1056000000 {
+			opp-hz = /bits/ 64 <1056000000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1113600000 {
+			opp-hz = /bits/ 64 <1113600000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1132800000 {
+			opp-hz = /bits/ 64 <1132800000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1190400000 {
+			opp-hz = /bits/ 64 <1190400000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1209600000 {
+			opp-hz = /bits/ 64 <1209600000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1248000000 {
+			opp-hz = /bits/ 64 <1248000000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1286400000 {
+			opp-hz = /bits/ 64 <1286400000>;
+			opp-microvolt = <905000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1324800000 {
+			opp-hz = /bits/ 64 <1324800000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1363200000 {
+			opp-hz = /bits/ 64 <1363200000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1401600000 {
+			opp-hz = /bits/ 64 <1401600000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1440000000 {
+			opp-hz = /bits/ 64 <1440000000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1478400000 {
+			opp-hz = /bits/ 64 <1478400000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1516800000 {
+			opp-hz = /bits/ 64 <1516800000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1555200000 {
+			opp-hz = /bits/ 64 <1555200000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1593600000 {
+			opp-hz = /bits/ 64 <1593600000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1632000000 {
+			opp-hz = /bits/ 64 <1632000000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1670400000 {
+			opp-hz = /bits/ 64 <1670400000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1708800000 {
+			opp-hz = /bits/ 64 <1708800000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1747200000 {
+			opp-hz = /bits/ 64 <1747200000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x70>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1785600000 {
+			opp-hz = /bits/ 64 <1785600000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x7>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1804800000 {
+			opp-hz = /bits/ 64 <1804800000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x6>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1824000000 {
+			opp-hz = /bits/ 64 <1824000000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x71>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1900800000 {
+			opp-hz = /bits/ 64 <1900800000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x74>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1920000000 {
+			opp-hz = /bits/ 64 <1920000000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x1>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1977600000 {
+			opp-hz = /bits/ 64 <1977600000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x30>;
+			clock-latency-ns = <200000>;
+		};
+		opp-1996800000 {
+			opp-hz = /bits/ 64 <1996800000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x1>;
+			clock-latency-ns = <200000>;
+		};
+		opp-2054400000 {
+			opp-hz = /bits/ 64 <2054400000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x30>;
+			clock-latency-ns = <200000>;
+		};
+		opp-2073600000 {
+			opp-hz = /bits/ 64 <2073600000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x1>;
+			clock-latency-ns = <200000>;
+		};
+		opp-2150400000 {
+			opp-hz = /bits/ 64 <2150400000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x31>;
+			clock-latency-ns = <200000>;
+		};
+		opp-2246400000 {
+			opp-hz = /bits/ 64 <2246400000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x10>;
+			clock-latency-ns = <200000>;
+		};
+		opp-2342400000 {
+			opp-hz = /bits/ 64 <2342400000>;
+			opp-microvolt = <1140000 905000 1140000>;
+			opp-supported-hw = <0x10>;
+			clock-latency-ns = <200000>;
+		};
+	};
+
+....
+
+reserved-memory {
+	#address-cells = <2>;
+	#size-cells = <2>;
+	ranges;
+....
+	smem_mem: smem-mem@86000000 {
+		reg = <0x0 0x86000000 0x0 0x200000>;
+		no-map;
+	};
+....
+};
+
+smem {
+	compatible = "qcom,smem";
+	memory-region = <&smem_mem>;
+	hwlocks = <&tcsr_mutex 3>;
+};
+
+soc {
+....
+	qfprom: qfprom@74000 {
+		compatible = "qcom,qfprom";
+		reg = <0x00074000 0x8ff>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+		....
+		speedbin_efuse: speedbin@133 {
+			reg = <0x133 0x1>;
+			bits = <5 3>;
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt
index 4e4f302..c396c4c 100644
--- a/Documentation/devicetree/bindings/opp/opp.txt
+++ b/Documentation/devicetree/bindings/opp/opp.txt
@@ -82,7 +82,10 @@
 properties.
 
 Required properties:
-- opp-hz: Frequency in Hz, expressed as a 64-bit big-endian integer.
+- opp-hz: Frequency in Hz, expressed as a 64-bit big-endian integer. This is a
+  required property for all device nodes but devices like power domains. The
+  power domain nodes must have another (implementation dependent) property which
+  uniquely identifies the OPP nodes.
 
 Optional properties:
 - opp-microvolt: voltage in micro Volts.
@@ -159,7 +162,7 @@
 
 - status: Marks the node enabled/disabled.
 
-- required-opp: This contains phandle to an OPP node in another device's OPP
+- required-opps: This contains phandle to an OPP node in another device's OPP
   table. It may contain an array of phandles, where each phandle points to an
   OPP of a different device. It should not contain multiple phandles to the OPP
   nodes in the same OPP table. This specifies the minimum required OPP of the
diff --git a/Documentation/devicetree/bindings/phy/phy-mtk-xsphy.txt b/Documentation/devicetree/bindings/phy/phy-mtk-xsphy.txt
new file mode 100644
index 0000000..e7caefa
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-mtk-xsphy.txt
@@ -0,0 +1,109 @@
+MediaTek XS-PHY binding
+--------------------------
+
+The XS-PHY controller supports physical layer functionality for USB3.1
+GEN2 controller on MediaTek SoCs.
+
+Required properties (controller (parent) node):
+ - compatible	: should be "mediatek,<soc-model>-xsphy", "mediatek,xsphy",
+		  soc-model is the name of SoC, such as mt3611 etc;
+		  when using "mediatek,xsphy" compatible string, you need SoC specific
+		  ones in addition, one of:
+		  - "mediatek,mt3611-xsphy"
+
+ - #address-cells, #size-cells : should use the same values as the root node
+ - ranges: must be present
+
+Optional properties (controller (parent) node):
+ - reg		: offset and length of register shared by multiple U3 ports,
+		  exclude port's private register, if only U2 ports provided,
+		  shouldn't use the property.
+ - mediatek,src-ref-clk-mhz	: u32, frequency of reference clock for slew rate
+		  calibrate
+ - mediatek,src-coef	: u32, coefficient for slew rate calibrate, depends on
+		  SoC process
+
+Required nodes	: a sub-node is required for each port the controller
+		  provides. Address range information including the usual
+		  'reg' property is used inside these nodes to describe
+		  the controller's topology.
+
+Required properties (port (child) node):
+- reg		: address and length of the register set for the port.
+- clocks	: a list of phandle + clock-specifier pairs, one for each
+		  entry in clock-names
+- clock-names	: must contain
+		  "ref": 48M reference clock for HighSpeed analog phy; and 26M
+			reference clock for SuperSpeedPlus analog phy, sometimes is
+			24M, 25M or 27M, depended on platform.
+- #phy-cells	: should be 1
+		  cell after port phandle is phy type from:
+			- PHY_TYPE_USB2
+			- PHY_TYPE_USB3
+
+The following optional properties are only for debug or HQA test
+Optional properties (PHY_TYPE_USB2 port (child) node):
+- mediatek,eye-src	: u32, the value of slew rate calibrate
+- mediatek,eye-vrt	: u32, the selection of VRT reference voltage
+- mediatek,eye-term	: u32, the selection of HS_TX TERM reference voltage
+- mediatek,efuse-intr	: u32, the selection of Internal Resistor
+
+Optional properties (PHY_TYPE_USB3 port (child) node):
+- mediatek,efuse-intr	: u32, the selection of Internal Resistor
+- mediatek,efuse-tx-imp	: u32, the selection of TX Impedance
+- mediatek,efuse-rx-imp	: u32, the selection of RX Impedance
+
+Banks layout of xsphy
+-------------------------------------------------------------
+port        offset    bank
+u2 port0    0x0000    MISC
+            0x0100    FMREG
+            0x0300    U2PHY_COM
+u2 port1    0x1000    MISC
+            0x1100    FMREG
+            0x1300    U2PHY_COM
+u2 port2    0x2000    MISC
+            ...
+u31 common  0x3000    DIG_GLB
+            0x3100    PHYA_GLB
+u31 port0   0x3400    DIG_LN_TOP
+            0x3500    DIG_LN_TX0
+            0x3600    DIG_LN_RX0
+            0x3700    DIG_LN_DAIF
+            0x3800    PHYA_LN
+u31 port1   0x3a00    DIG_LN_TOP
+            0x3b00    DIG_LN_TX0
+            0x3c00    DIG_LN_RX0
+            0x3d00    DIG_LN_DAIF
+            0x3e00    PHYA_LN
+            ...
+
+DIG_GLB & PHYA_GLB are shared by U31 ports.
+
+Example:
+
+u3phy: usb-phy@11c40000 {
+	compatible = "mediatek,mt3611-xsphy", "mediatek,xsphy";
+	reg = <0 0x11c43000 0 0x0200>;
+	mediatek,src-ref-clk-mhz = <26>;
+	mediatek,src-coef = <17>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+	ranges;
+
+	u2port0: usb-phy@11c40000 {
+		reg = <0 0x11c40000 0 0x0400>;
+		clocks = <&clk48m>;
+		clock-names = "ref";
+		mediatek,eye-src = <4>;
+		#phy-cells = <1>;
+	};
+
+	u3port0: usb-phy@11c43000 {
+		reg = <0 0x11c43400 0 0x0500>;
+		clocks = <&clk26m>;
+		clock-names = "ref";
+		mediatek,efuse-intr = <28>;
+		#phy-cells = <1>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
index dcf1b8f..266a1bb 100644
--- a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
+++ b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
@@ -9,7 +9,8 @@
 	       "qcom,ipq8074-qmp-pcie-phy" for PCIe phy on IPQ8074
 	       "qcom,msm8996-qmp-pcie-phy" for 14nm PCIe phy on msm8996,
 	       "qcom,msm8996-qmp-usb3-phy" for 14nm USB3 phy on msm8996,
-	       "qcom,qmp-v3-usb3-phy" for USB3 QMP V3 phy.
+	       "qcom,sdm845-qmp-usb3-phy" for USB3 QMP V3 phy on sdm845,
+	       "qcom,sdm845-qmp-usb3-uni-phy" for USB3 QMP V3 UNI phy on sdm845.
 
  - reg: offset and length of register set for PHY's common serdes block.
 
diff --git a/Documentation/devicetree/bindings/phy/qcom-qusb2-phy.txt b/Documentation/devicetree/bindings/phy/qcom-qusb2-phy.txt
index 42c9742..03025d9 100644
--- a/Documentation/devicetree/bindings/phy/qcom-qusb2-phy.txt
+++ b/Documentation/devicetree/bindings/phy/qcom-qusb2-phy.txt
@@ -6,7 +6,7 @@
 Required properties:
  - compatible: compatible list, contains
 	       "qcom,msm8996-qusb2-phy" for 14nm PHY on msm8996,
-	       "qcom,qusb2-v2-phy" for QUSB2 V2 PHY.
+	       "qcom,sdm845-qusb2-phy" for 10nm PHY on sdm845.
 
  - reg: offset and length of the PHY register set.
  - #phy-cells: must be 0.
@@ -27,6 +27,27 @@
 		tuning parameter value for qusb2 phy.
 
  - qcom,tcsr-syscon: Phandle to TCSR syscon register region.
+ - qcom,imp-res-offset-value: It is a 6 bit value that specifies offset to be
+		added to PHY refgen RESCODE via IMP_CTRL1 register. It is a PHY
+		tuning parameter that may vary for different boards of same SOC.
+		This property is applicable to only QUSB2 v2 PHY (sdm845).
+ - qcom,hstx-trim-value: It is a 4 bit value that specifies tuning for HSTX
+		output current.
+		Possible range is - 15mA to 24mA (stepsize of 600 uA).
+		See dt-bindings/phy/phy-qcom-qusb2.h for applicable values.
+		This property is applicable to only QUSB2 v2 PHY (sdm845).
+		Default value is 22.2mA for sdm845.
+ - qcom,preemphasis-level: It is a 2 bit value that specifies pre-emphasis level.
+		Possible range is 0 to 15% (stepsize of 5%).
+		See dt-bindings/phy/phy-qcom-qusb2.h for applicable values.
+		This property is applicable to only QUSB2 v2 PHY (sdm845).
+		Default value is 10% for sdm845.
+- qcom,preemphasis-width: It is a 1 bit value that specifies how long the HSTX
+		pre-emphasis (specified using qcom,preemphasis-level) must be in
+		effect. Duration could be half-bit of full-bit.
+		See dt-bindings/phy/phy-qcom-qusb2.h for applicable values.
+		This property is applicable to only QUSB2 v2 PHY (sdm845).
+		Default value is full-bit width for sdm845.
 
 Example:
 	hsusb_phy: phy@7411000 {
diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
index f335531..4733f76 100644
--- a/Documentation/devicetree/bindings/power/power_domain.txt
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -127,7 +127,7 @@
 with the label "power".
 
 Optional properties:
-- required-opp: This contains phandle to an OPP node in another device's OPP
+- required-opps: This contains phandle to an OPP node in another device's OPP
   table. It may contain an array of phandles, where each phandle points to an
   OPP of a different device. It should not contain multiple phandles to the OPP
   nodes in the same OPP table. This specifies the minimum required OPP of the
@@ -175,14 +175,14 @@
 		compatible = "foo,i-leak-current";
 		reg = <0x12350000 0x1000>;
 		power-domains = <&power 0>;
-		required-opp = <&domain0_opp_0>;
+		required-opps = <&domain0_opp_0>;
 	};
 
 	leaky-device1@12350000 {
 		compatible = "foo,i-leak-current";
 		reg = <0x12350000 0x1000>;
 		power-domains = <&power 1>;
-		required-opp = <&domain1_opp_1>;
+		required-opps = <&domain1_opp_1>;
 	};
 
 [1]. Documentation/devicetree/bindings/power/domain-idle-state.txt
diff --git a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
index 4a4766e..e66fd4e 100644
--- a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
+++ b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
@@ -31,6 +31,8 @@
 
 Required properties:
 - compatible: should be one of:
+  - "rockchip,px30-io-voltage-domain" for px30
+  - "rockchip,px30-pmu-io-voltage-domain" for px30 pmu-domains
   - "rockchip,rk3188-io-voltage-domain" for rk3188
   - "rockchip,rk3228-io-voltage-domain" for rk3228
   - "rockchip,rk3288-io-voltage-domain" for rk3288
@@ -51,6 +53,19 @@
 to report their voltage.  The IO Voltage Domain for any non-specified
 supplies will be not be touched.
 
+Possible supplies for PX30:
+- vccio6-supply: The supply connected to VCCIO6.
+- vccio1-supply: The supply connected to VCCIO1.
+- vccio2-supply: The supply connected to VCCIO2.
+- vccio3-supply: The supply connected to VCCIO3.
+- vccio4-supply: The supply connected to VCCIO4.
+- vccio5-supply: The supply connected to VCCIO5.
+- vccio-oscgpi-supply: The supply connected to VCCIO_OSCGPI.
+
+Possible supplies for PX30 pmu-domains:
+- pmuio1-supply: The supply connected to PMUIO1.
+- pmuio2-supply: The supply connected to PMUIO2.
+
 Possible supplies for rk3188:
 - ap0-supply:    The supply connected to AP0_VCC.
 - ap1-supply:    The supply connected to AP1_VCC.
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,apr.txt b/Documentation/devicetree/bindings/soc/qcom/qcom,apr.txt
new file mode 100644
index 0000000..bcc612c
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,apr.txt
@@ -0,0 +1,84 @@
+Qualcomm APR (Asynchronous Packet Router) binding
+
+This binding describes the Qualcomm APR. APR is a IPC protocol for
+communication between Application processor and QDSP. APR is mainly
+used for audio/voice services on the QDSP.
+
+- compatible:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "qcom,apr-v<VERSION-NUMBER>", example "qcom,apr-v2"
+
+- reg
+	Usage: required
+	Value type: <u32>
+	Definition: Destination processor ID.
+	Possible values are :
+			1 - APR simulator
+			2 - PC
+			3 - MODEM
+			4 - ADSP
+			5 - APPS
+			6 - MODEM2
+			7 - APPS2
+
+= APR SERVICES
+Each subnode of the APR node represents service tied to this apr. The name
+of the nodes are not important. The properties of these nodes are defined
+by the individual bindings for the specific service
+- All APR services MUST contain the following property:
+
+- reg
+	Usage: required
+	Value type: <u32>
+	Definition: APR Service ID
+	Possible values are :
+			3 - DSP Core Service
+			4 - Audio Front End Service.
+			5 - Voice Stream Manager Service.
+			6 - Voice processing manager.
+			7 - Audio Stream Manager Service.
+			8 - Audio Device Manager Service.
+			9 - Multimode voice manager.
+			10 - Core voice stream.
+			11 - Core voice processor.
+			12 - Ultrasound stream manager.
+			13 - Listen stream manager.
+
+= EXAMPLE
+The following example represents a QDSP based sound card on a MSM8996 device
+which uses apr as communication between Apps and QDSP.
+
+	apr@4 {
+		compatible = "qcom,apr-v2";
+		reg = <APR_DOMAIN_ADSP>;
+
+		q6core@3 {
+			compatible = "qcom,q6core";
+			reg = <APR_SVC_ADSP_CORE>;
+		};
+
+		q6afe@4 {
+			compatible = "qcom,q6afe";
+			reg = <APR_SVC_AFE>;
+
+			dais {
+				#sound-dai-cells = <1>;
+				hdmi@1 {
+					reg = <1>;
+				};
+			};
+		};
+
+		q6asm@7 {
+			compatible = "qcom,q6asm";
+			reg = <APR_SVC_ASM>;
+			...
+		};
+
+		q6adm@8 {
+			compatible = "qcom,q6adm";
+			reg = <APR_SVC_ADM>;
+			...
+		};
+	};
diff --git a/Documentation/devicetree/bindings/sound/adi,ssm2305.txt b/Documentation/devicetree/bindings/sound/adi,ssm2305.txt
new file mode 100644
index 0000000..a9c9d83
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/adi,ssm2305.txt
@@ -0,0 +1,14 @@
+Analog Devices SSM2305 Speaker Amplifier
+========================================
+
+Required properties:
+  - compatible : "adi,ssm2305"
+  - shutdown-gpios : The gpio connected to the shutdown pin.
+                     The gpio signal is ACTIVE_LOW.
+
+Example:
+
+ssm2305: analog-amplifier {
+	compatible = "adi,ssm2305";
+	shutdown-gpios = <&gpio3 20 GPIO_ACTIVE_LOW>;
+};
diff --git a/Documentation/devicetree/bindings/sound/atmel-i2s.txt b/Documentation/devicetree/bindings/sound/atmel-i2s.txt
new file mode 100644
index 0000000..735368b
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/atmel-i2s.txt
@@ -0,0 +1,47 @@
+* Atmel I2S controller
+
+Required properties:
+- compatible:     Should be "atmel,sama5d2-i2s".
+- reg:            Should be the physical base address of the controller and the
+                  length of memory mapped region.
+- interrupts:     Should contain the interrupt for the controller.
+- dmas:           Should be one per channel name listed in the dma-names property,
+                  as described in atmel-dma.txt and dma.txt files.
+- dma-names:      Two dmas have to be defined, "tx" and "rx".
+                  This IP also supports one shared channel for both rx and tx;
+                  if this mode is used, one "rx-tx" name must be used.
+- clocks:         Must contain an entry for each entry in clock-names.
+                  Please refer to clock-bindings.txt.
+- clock-names:    Should be one of each entry matching the clocks phandles list:
+                  - "pclk" (peripheral clock) Required.
+                  - "gclk" (generated clock) Optional (1).
+                  - "aclk" (Audio PLL clock) Optional (1).
+                  - "muxclk" (I2S mux clock) Optional (1).
+
+Optional properties:
+- pinctrl-0:      Should specify pin control groups used for this controller.
+- princtrl-names: Should contain only one value - "default".
+
+
+(1) : Only the peripheral clock is required. The generated clock, the Audio
+      PLL clock adn the I2S mux clock are optional and should only be set
+      together, when Master Mode is required.
+
+Example:
+
+	i2s@f8050000 {
+		compatible = "atmel,sama5d2-i2s";
+		reg = <0xf8050000 0x300>;
+		interrupts = <54 IRQ_TYPE_LEVEL_HIGH 7>;
+		dmas = <&dma0
+			(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) |
+			 AT91_XDMAC_DT_PERID(31))>,
+		       <&dma0
+			(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) |
+			 AT91_XDMAC_DT_PERID(32))>;
+		dma-names = "tx", "rx";
+		clocks = <&i2s0_clk>, <&i2s0_gclk>, <&audio_pll_pmc>, <&i2s0muxck>;
+		clock-names = "pclk", "gclk", "aclk", "muxclk";
+		pinctrl-names = "default";
+		pinctrl-0 = <&pinctrl_i2s0_default>;
+	};
diff --git a/Documentation/devicetree/bindings/sound/cs42xx8.txt b/Documentation/devicetree/bindings/sound/cs42xx8.txt
index f631fbc..8619a15 100644
--- a/Documentation/devicetree/bindings/sound/cs42xx8.txt
+++ b/Documentation/devicetree/bindings/sound/cs42xx8.txt
@@ -16,7 +16,7 @@
 
 Example:
 
-codec: cs42888@48 {
+cs42888: codec@48 {
 	compatible = "cirrus,cs42888";
 	reg = <0x48>;
 	clocks = <&codec_mclk 0>;
diff --git a/Documentation/devicetree/bindings/sound/fsl,asrc.txt b/Documentation/devicetree/bindings/sound/fsl,asrc.txt
index f5a1411..1d4d9f9 100644
--- a/Documentation/devicetree/bindings/sound/fsl,asrc.txt
+++ b/Documentation/devicetree/bindings/sound/fsl,asrc.txt
@@ -31,14 +31,16 @@
 			  it. This property is optional depending on the SoC
 			  design.
 
-   - big-endian		: If this property is absent, the little endian mode
-			  will be in use as default. Otherwise, the big endian
-			  mode will be in use for all the device registers.
-
    - fsl,asrc-rate	: Defines a mutual sample rate used by DPCM Back Ends.
 
    - fsl,asrc-width	: Defines a mutual sample width used by DPCM Back Ends.
 
+Optional properties:
+
+   - big-endian		: If this property is absent, the little endian mode
+			  will be in use as default. Otherwise, the big endian
+			  mode will be in use for all the device registers.
+
 Example:
 
 asrc: asrc@2034000 {
diff --git a/Documentation/devicetree/bindings/sound/fsl,esai.txt b/Documentation/devicetree/bindings/sound/fsl,esai.txt
index cacd18b..5b99143 100644
--- a/Documentation/devicetree/bindings/sound/fsl,esai.txt
+++ b/Documentation/devicetree/bindings/sound/fsl,esai.txt
@@ -42,6 +42,8 @@
 			  means all the settings for Receiving would be
 			  duplicated from Transmition related registers.
 
+Optional properties:
+
   - big-endian		: If this property is absent, the native endian mode
 			  will be in use as default, or the big endian mode
 			  will be in use for all the device registers.
diff --git a/Documentation/devicetree/bindings/sound/fsl,spdif.txt b/Documentation/devicetree/bindings/sound/fsl,spdif.txt
index 38cfa75..8b324f8 100644
--- a/Documentation/devicetree/bindings/sound/fsl,spdif.txt
+++ b/Documentation/devicetree/bindings/sound/fsl,spdif.txt
@@ -33,6 +33,8 @@
 			  it. This property is optional depending on the SoC
 			  design.
 
+Optional properties:
+
    - big-endian		: If this property is absent, the native endian mode
 			  will be in use as default, or the big endian mode
 			  will be in use for all the device registers.
diff --git a/Documentation/devicetree/bindings/sound/fsl-sai.txt b/Documentation/devicetree/bindings/sound/fsl-sai.txt
index 740b467..dd9e597 100644
--- a/Documentation/devicetree/bindings/sound/fsl-sai.txt
+++ b/Documentation/devicetree/bindings/sound/fsl-sai.txt
@@ -28,9 +28,6 @@
 			  pinctrl-names. See ../pinctrl/pinctrl-bindings.txt
 			  for details of the property values.
 
-  - big-endian		: Boolean property, required if all the FTM_PWM
-			  registers are big-endian rather than little-endian.
-
   - lsb-first		: Configures whether the LSB or the MSB is transmitted
 			  first for the fifo data. If this property is absent,
 			  the MSB is transmitted first as default, or the LSB
@@ -48,6 +45,11 @@
 			  receive data by following their own bit clocks and
 			  frame sync clocks separately.
 
+Optional properties:
+
+  - big-endian		: Boolean property, required if all the SAI
+			  registers are big-endian rather than little-endian.
+
 Optional properties (for mx6ul):
 
   - fsl,sai-mclk-direction-output: This is a boolean property. If present,
diff --git a/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt b/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt
index e2f7f49..560762e 100644
--- a/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt
+++ b/Documentation/devicetree/bindings/sound/mt2701-afe-pcm.txt
@@ -1,7 +1,9 @@
 Mediatek AFE PCM controller for mt2701
 
 Required properties:
-- compatible = "mediatek,mt2701-audio";
+- compatible: should be one of the followings.
+	      - "mediatek,mt2701-audio"
+	      - "mediatek,mt7622-audio"
 - interrupts: should contain AFE and ASYS interrupts
 - interrupt-names: should be "afe" and "asys"
 - power-domains: should define the power domain
diff --git a/Documentation/devicetree/bindings/sound/mt6351.txt b/Documentation/devicetree/bindings/sound/mt6351.txt
new file mode 100644
index 0000000..7fb2cb9
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/mt6351.txt
@@ -0,0 +1,16 @@
+Mediatek MT6351 Audio Codec
+
+The communication between MT6351 and SoC is through Mediatek PMIC wrapper.
+For more detail, please visit Mediatek PMIC wrapper documentation.
+
+Must be a child node of PMIC wrapper.
+
+Required properties:
+
+- compatible : "mediatek,mt6351-sound".
+
+Example:
+
+mt6351_snd {
+	compatible = "mediatek,mt6351-sound";
+};
diff --git a/Documentation/devicetree/bindings/sound/mt6797-afe-pcm.txt b/Documentation/devicetree/bindings/sound/mt6797-afe-pcm.txt
new file mode 100644
index 0000000..0ae29de
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/mt6797-afe-pcm.txt
@@ -0,0 +1,42 @@
+Mediatek AFE PCM controller for mt6797
+
+Required properties:
+- compatible = "mediatek,mt6797-audio";
+- reg: register location and size
+- interrupts: should contain AFE interrupt
+- power-domains: should define the power domain
+- clocks: Must contain an entry for each entry in clock-names
+- clock-names: should have these clock names:
+		"infra_sys_audio_clk",
+		"infra_sys_audio_26m",
+		"mtkaif_26m_clk",
+		"top_mux_audio",
+		"top_mux_aud_intbus",
+		"top_sys_pll3_d4",
+		"top_sys_pll1_d4",
+		"top_clk26m_clk";
+
+Example:
+
+	afe: mt6797-afe-pcm@11220000  {
+		compatible = "mediatek,mt6797-audio";
+		reg = <0 0x11220000 0 0x1000>;
+		interrupts = <GIC_SPI 151 IRQ_TYPE_LEVEL_LOW>;
+		power-domains = <&scpsys MT6797_POWER_DOMAIN_AUDIO>;
+		clocks = <&infrasys CLK_INFRA_AUDIO>,
+			 <&infrasys CLK_INFRA_AUDIO_26M>,
+			 <&infrasys CLK_INFRA_AUDIO_26M_PAD_TOP>,
+			 <&topckgen CLK_TOP_MUX_AUDIO>,
+			 <&topckgen CLK_TOP_MUX_AUD_INTBUS>,
+			 <&topckgen CLK_TOP_SYSPLL3_D4>,
+			 <&topckgen CLK_TOP_SYSPLL1_D4>,
+			 <&clk26m>;
+		clock-names = "infra_sys_audio_clk",
+			      "infra_sys_audio_26m",
+			      "mtkaif_26m_clk",
+			      "top_mux_audio",
+			      "top_mux_aud_intbus",
+			      "top_sys_pll3_d4",
+			      "top_sys_pll1_d4",
+			      "top_clk26m_clk";
+	};
diff --git a/Documentation/devicetree/bindings/sound/mt6797-mt6351.txt b/Documentation/devicetree/bindings/sound/mt6797-mt6351.txt
new file mode 100644
index 0000000..1d95a88
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/mt6797-mt6351.txt
@@ -0,0 +1,14 @@
+MT6797 with MT6351 CODEC
+
+Required properties:
+- compatible: "mediatek,mt6797-mt6351-sound"
+- mediatek,platform: the phandle of MT6797 ASoC platform
+- mediatek,audio-codec: the phandles of MT6351 codec
+
+Example:
+
+	sound {
+		compatible = "mediatek,mt6797-mt6351-sound";
+		mediatek,audio-codec = <&mt6351_snd>;
+		mediatek,platform = <&afe>;
+	};
diff --git a/Documentation/devicetree/bindings/sound/qcom,apq8096.txt b/Documentation/devicetree/bindings/sound/qcom,apq8096.txt
new file mode 100644
index 0000000..aa54e49
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/qcom,apq8096.txt
@@ -0,0 +1,109 @@
+* Qualcomm Technologies APQ8096 ASoC sound card driver
+
+This binding describes the APQ8096 sound card, which uses qdsp for audio.
+
+- compatible:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "qcom,apq8096-sndcard"
+
+- qcom,audio-routing:
+	Usage: Optional
+	Value type: <stringlist>
+	Definition:  A list of the connections between audio components.
+		  Each entry is a pair of strings, the first being the
+		  connection's sink, the second being the connection's
+		  source. Valid names could be power supplies, MicBias
+		  of codec and the jacks on the board:
+		  Valid names include:
+
+		Board Connectors:
+			"Headphone Left"
+			"Headphone Right"
+			"Earphone"
+			"Line Out1"
+			"Line Out2"
+			"Line Out3"
+			"Line Out4"
+			"Analog Mic1"
+			"Analog Mic2"
+			"Analog Mic3"
+			"Analog Mic4"
+			"Analog Mic5"
+			"Analog Mic6"
+			"Digital Mic2"
+			"Digital Mic3"
+
+		Audio pins and MicBias on WCD9335 Codec:
+			"MIC_BIAS1
+			"MIC_BIAS2"
+			"MIC_BIAS3"
+			"MIC_BIAS4"
+			"AMIC1"
+			"AMIC2"
+			"AMIC3"
+			"AMIC4"
+			"AMIC5"
+			"AMIC6"
+			"AMIC6"
+			"DMIC1"
+			"DMIC2"
+			"DMIC3"
+= dailinks
+Each subnode of sndcard represents either a dailink, and subnodes of each
+dailinks would be cpu/codec/platform dais.
+
+- link-name:
+	Usage: required
+	Value type: <string>
+	Definition: User friendly name for dai link
+
+= CPU, PLATFORM, CODEC dais subnodes
+- cpu:
+	Usage: required
+	Value type: <subnode>
+	Definition: cpu dai sub-node
+
+- codec:
+	Usage: Optional
+	Value type: <subnode>
+	Definition: codec dai sub-node
+
+- platform:
+	Usage: Optional
+	Value type: <subnode>
+	Definition: platform dai sub-node
+
+- sound-dai:
+	Usage: required
+	Value type: <phandle with arguments>
+	Definition: dai phandle/s and port of CPU/CODEC/PLATFORM node.
+
+Example:
+
+audio {
+	compatible = "qcom,apq8096-sndcard";
+	qcom,model = "DB820c";
+
+	mm1-dai-link {
+		link-name = "MultiMedia1";
+		cpu {
+			sound-dai = <&q6asmdai MSM_FRONTEND_DAI_MULTIMEDIA1>;
+		};
+	};
+
+	hdmi-dai-link {
+		link-name = "HDMI Playback";
+		cpu {
+			sound-dai = <&q6afe HDMI_RX>;
+		};
+
+		platform {
+			sound-dai = <&q6adm>;
+		};
+
+		codec {
+			sound-dai = <&hdmi 0>;
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/sound/qcom,q6adm.txt b/Documentation/devicetree/bindings/sound/qcom,q6adm.txt
new file mode 100644
index 0000000..cb709e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/qcom,q6adm.txt
@@ -0,0 +1,33 @@
+Qualcomm Audio Device Manager (Q6ADM) binding
+
+Q6ADM is one of the APR audio service on Q6DSP.
+Please refer to qcom,apr.txt for details of the coommon apr service bindings
+used by the apr service device.
+
+- but must contain the following property:
+
+- compatible:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "qcom,q6adm-v<MAJOR-NUMBER>.<MINOR-NUMBER>".
+		   Or "qcom,q6adm" where the version number can be queried
+		   from DSP.
+		   example "qcom,q6adm-v2.0"
+
+
+= ADM routing
+"routing" subnode of the ADM node represents adm routing specific configuration
+
+- #sound-dai-cells
+	Usage: required
+	Value type: <u32>
+	Definition: Must be 0
+
+= EXAMPLE
+q6adm@8 {
+	compatible = "qcom,q6adm";
+	reg = <APR_SVC_ADM>;
+	q6routing: routing {
+		#sound-dai-cells = <0>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/sound/qcom,q6afe.txt b/Documentation/devicetree/bindings/sound/qcom,q6afe.txt
new file mode 100644
index 0000000..bdbf87d
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/qcom,q6afe.txt
@@ -0,0 +1,172 @@
+Qualcomm Audio Front End (Q6AFE) binding
+
+AFE is one of the APR audio service on Q6DSP
+Please refer to qcom,apr.txt for details of the common apr service bindings
+used by all apr services. Must contain the following properties.
+
+- compatible:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "qcom,q6afe-v<MAJOR-NUMBER>.<MINOR-NUMBER>"
+		  Or "qcom,q6afe" where the version number can be queried
+		  from DSP.
+		  example "qcom,q6afe"
+
+= AFE DAIs (Digial Audio Interface)
+"dais" subnode of the AFE node. It represents afe dais, each afe dai is a
+subnode of "dais" representing board specific dai setup.
+"dais" node should have following properties followed by dai children.
+
+- #sound-dai-cells
+	Usage: required
+	Value type: <u32>
+	Definition: Must be 1
+
+- #address-cells
+	Usage: required
+	Value type: <u32>
+	Definition: Must be 1
+
+- #size-cells
+	Usage: required
+	Value type: <u32>
+	Definition: Must be 0
+
+== AFE DAI is subnode of "dais" and represent a dai, it includes board specific
+configuration of each dai. Must contain the following properties.
+
+- reg
+	Usage: required
+	Value type: <u32>
+	Definition: Must be dai id
+
+- qcom,sd-lines
+	Usage: required for mi2s interface
+	Value type: <prop-encoded-array>
+	Definition: Must be list of serial data lines used by this dai.
+	should be one or more of the 1-4 sd lines.
+
+ - qcom,tdm-sync-mode:
+	Usage: required for tdm interface
+	Value type: <prop-encoded-array>
+	Definition: Synchronization mode.
+		0 - Short sync bit mode
+		1 - Long sync mode
+		2 - Short sync slot mode
+
+ - qcom,tdm-sync-src:
+	Usage: required for tdm interface
+	Value type: <prop-encoded-array>
+	Definition: Synchronization source.
+		0 - External source
+		1 - Internal source
+
+ - qcom,tdm-data-out:
+	Usage: required for tdm interface
+	Value type: <prop-encoded-array>
+	Definition: Data out signal to drive with other masters.
+		0 - Disable
+		1 - Enable
+
+ - qcom,tdm-invert-sync:
+	Usage: required for tdm interface
+	Value type: <prop-encoded-array>
+	Definition: Invert the sync.
+		0 - Normal
+		1 - Invert
+
+ - qcom,tdm-data-delay:
+	Usage: required for tdm interface
+	Value type: <prop-encoded-array>
+	Definition: Number of bit clock to delay data
+		with respect to sync edge.
+		0 - 0 bit clock cycle
+		1 - 1 bit clock cycle
+		2 - 2 bit clock cycle
+
+ - qcom,tdm-data-align:
+	Usage: required for tdm interface
+	Value type: <prop-encoded-array>
+	Definition: Indicate how data is packed
+		within the slot. For example, 32 slot width in case of
+		sample bit width is 24.
+		0 - MSB
+		1 - LSB
+
+= EXAMPLE
+
+q6afe@4 {
+	compatible = "qcom,q6afe";
+	reg = <APR_SVC_AFE>;
+
+	dais {
+		#sound-dai-cells = <1>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		hdmi@1 {
+			reg = <1>;
+		};
+
+		tdm@24 {
+			reg = <24>;
+			qcom,tdm-sync-mode = <1>:
+			qcom,tdm-sync-src = <1>;
+			qcom,tdm-data-out = <0>;
+			qcom,tdm-invert-sync = <1>;
+			qcom,tdm-data-delay = <1>;
+			qcom,tdm-data-align = <0>;
+
+		};
+
+		tdm@25 {
+			reg = <25>;
+			qcom,tdm-sync-mode = <1>:
+			qcom,tdm-sync-src = <1>;
+			qcom,tdm-data-out = <0>;
+			qcom,tdm-invert-sync = <1>;
+			qcom,tdm-data-delay <1>:
+			qcom,tdm-data-align = <0>;
+		};
+
+		prim-mi2s-rx@16 {
+			reg = <16>;
+			qcom,sd-lines = <1 3>;
+		};
+
+		prim-mi2s-tx@17 {
+			reg = <17>;
+			qcom,sd-lines = <2>;
+		};
+
+		sec-mi2s-rx@18 {
+			reg = <18>;
+			qcom,sd-lines = <1 4>;
+		};
+
+		sec-mi2s-tx@19 {
+			reg = <19>;
+			qcom,sd-lines = <2>;
+		};
+
+		tert-mi2s-rx@20 {
+			reg = <20>;
+			qcom,sd-lines = <2 4>;
+		};
+
+		tert-mi2s-tx@21 {
+			reg = <21>;
+			qcom,sd-lines = <1>;
+		};
+
+		quat-mi2s-rx@22 {
+			reg = <22>;
+			qcom,sd-lines = <1>;
+		};
+
+		quat-mi2s-tx@23 {
+			reg = <23>;
+			qcom,sd-lines = <2>;
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/sound/qcom,q6asm.txt b/Documentation/devicetree/bindings/sound/qcom,q6asm.txt
new file mode 100644
index 0000000..2178eb9
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/qcom,q6asm.txt
@@ -0,0 +1,33 @@
+Qualcomm Audio Stream Manager (Q6ASM) binding
+
+Q6ASM is one of the APR audio service on Q6DSP.
+Please refer to qcom,apr.txt for details of the common apr service bindings
+used by the apr service device.
+
+- but must contain the following property:
+
+- compatible:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "qcom,q6asm-v<MAJOR-NUMBER>.<MINOR-NUMBER>".
+		    Or "qcom,q6asm" where the version number can be queried
+		    from DSP.
+		    example "qcom,q6asm-v2.0"
+
+= ASM DAIs (Digial Audio Interface)
+"dais" subnode of the ASM node represents dai specific configuration
+
+- #sound-dai-cells
+	Usage: required
+	Value type: <u32>
+	Definition: Must be 1
+
+= EXAMPLE
+
+q6asm@7 {
+	compatible = "qcom,q6asm";
+	reg = <APR_SVC_ASM>;
+	q6asmdai: dais {
+		#sound-dai-cells = <1>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/sound/qcom,q6core.txt b/Documentation/devicetree/bindings/sound/qcom,q6core.txt
new file mode 100644
index 0000000..7f36ff8
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/qcom,q6core.txt
@@ -0,0 +1,21 @@
+Qualcomm ADSP Core service binding
+
+Q6CORE is one of the APR audio service on Q6DSP.
+Please refer to qcom,apr.txt for details of the common apr service bindings
+used by the apr service device.
+
+- but must contain the following property:
+
+- compatible:
+	Usage: required
+	Value type: <stringlist>
+	Definition: must be "qcom,q6core-v<MAJOR-NUMBER>.<MINOR-NUMBER>".
+		   Or "qcom,q6core" where the version number can be queried
+		   from DSP.
+		   example "qcom,q6core-v2.0"
+
+= EXAMPLE
+q6core@3 {
+	compatible = "qcom,q6core";
+	reg = <APR_SVC_ADSP_CORE>;
+};
diff --git a/Documentation/devicetree/bindings/sound/rt274.txt b/Documentation/devicetree/bindings/sound/rt274.txt
index e9a6178..791a1bd 100644
--- a/Documentation/devicetree/bindings/sound/rt274.txt
+++ b/Documentation/devicetree/bindings/sound/rt274.txt
@@ -26,7 +26,7 @@
 
 Example:
 
-codec: rt274@1c {
+rt274: codec@1c {
 	compatible = "realtek,rt274";
 	reg = <0x1c>;
 	interrupts = <7 IRQ_TYPE_EDGE_FALLING>;
diff --git a/Documentation/devicetree/bindings/sound/rt5514.txt b/Documentation/devicetree/bindings/sound/rt5514.txt
index 4f33b0d..b25ed08 100644
--- a/Documentation/devicetree/bindings/sound/rt5514.txt
+++ b/Documentation/devicetree/bindings/sound/rt5514.txt
@@ -32,7 +32,7 @@
 
 Example:
 
-codec: rt5514@57 {
+rt5514: codec@57 {
 	compatible = "realtek,rt5514";
 	reg = <0x57>;
 };
diff --git a/Documentation/devicetree/bindings/sound/rt5616.txt b/Documentation/devicetree/bindings/sound/rt5616.txt
index e410858..540a4bf 100644
--- a/Documentation/devicetree/bindings/sound/rt5616.txt
+++ b/Documentation/devicetree/bindings/sound/rt5616.txt
@@ -26,7 +26,7 @@
 
 Example:
 
-codec: rt5616@1b {
+rt5616: codec@1b {
 	compatible = "realtek,rt5616";
 	reg = <0x1b>;
 };
diff --git a/Documentation/devicetree/bindings/sound/rt5640.txt b/Documentation/devicetree/bindings/sound/rt5640.txt
index 57fe646..e40e489 100644
--- a/Documentation/devicetree/bindings/sound/rt5640.txt
+++ b/Documentation/devicetree/bindings/sound/rt5640.txt
@@ -22,6 +22,41 @@
 
 - realtek,ldo1-en-gpios : The GPIO that controls the CODEC's LDO1_EN pin.
 
+- realtek,dmic1-data-pin
+  0: dmic1 is not used
+  1: using IN1P pin as dmic1 data pin
+  2: using GPIO3 pin as dmic1 data pin
+
+- realtek,dmic2-data-pin
+  0: dmic2 is not used
+  1: using IN1N pin as dmic2 data pin
+  2: using GPIO4 pin as dmic2 data pin
+
+- realtek,jack-detect-source
+  u32. Valid values:
+  0: jack-detect is not used
+  1: Use GPIO1 for jack-detect
+  2: Use JD1_IN4P for jack-detect
+  3: Use JD2_IN4N for jack-detect
+  4: Use GPIO2 for jack-detect
+  5: Use GPIO3 for jack-detect
+  6: Use GPIO4 for jack-detect
+
+- realtek,jack-detect-not-inverted
+  bool. Normal jack-detect switches give an inverted signal, set this bool
+  in the rare case you've a jack-detect switch which is not inverted.
+
+- realtek,over-current-threshold-microamp
+  u32, micbias over-current detection threshold in µA, valid values are
+  600, 1500 and 2000µA.
+
+- realtek,over-current-scale-factor
+  u32, micbias over-current detection scale-factor, valid values are:
+  0: Scale current by 0.5
+  1: Scale current by 0.75
+  2: Scale current by 1.0
+  3: Scale current by 1.5
+
 Pins on the device (for linking into audio routes) for RT5639/RT5640:
 
   * DMIC1
diff --git a/Documentation/devicetree/bindings/sound/rt5645.txt b/Documentation/devicetree/bindings/sound/rt5645.txt
index 7cee1f51..a03f9a8 100644
--- a/Documentation/devicetree/bindings/sound/rt5645.txt
+++ b/Documentation/devicetree/bindings/sound/rt5645.txt
@@ -69,4 +69,4 @@
 	realtek,dmic-en = "true";
 	realtek,en-jd-func = "true";
 	realtek,jd-mode = <3>;
-};
\ No newline at end of file
+};
diff --git a/Documentation/devicetree/bindings/sound/rt5651.txt b/Documentation/devicetree/bindings/sound/rt5651.txt
index b852218..a41199a 100644
--- a/Documentation/devicetree/bindings/sound/rt5651.txt
+++ b/Documentation/devicetree/bindings/sound/rt5651.txt
@@ -50,7 +50,7 @@
 
 Example:
 
-codec: rt5651@1a {
+rt5651: codec@1a {
 	compatible = "realtek,rt5651";
 	reg = <0x1a>;
 	realtek,dmic-en = "true";
diff --git a/Documentation/devicetree/bindings/sound/rt5663.txt b/Documentation/devicetree/bindings/sound/rt5663.txt
index 497bcfc..2338644 100644
--- a/Documentation/devicetree/bindings/sound/rt5663.txt
+++ b/Documentation/devicetree/bindings/sound/rt5663.txt
@@ -47,7 +47,7 @@
 
 Example:
 
-codec: rt5663@12 {
+rt5663: codec@12 {
 	compatible = "realtek,rt5663";
 	reg = <0x12>;
 	interrupts = <7 IRQ_TYPE_EDGE_FALLING>;
diff --git a/Documentation/devicetree/bindings/sound/rt5668.txt b/Documentation/devicetree/bindings/sound/rt5668.txt
new file mode 100644
index 0000000..c88b96e
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/rt5668.txt
@@ -0,0 +1,50 @@
+RT5668B audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+- compatible : "realtek,rt5668b"
+
+- reg : The I2C address of the device.
+
+Optional properties:
+
+- interrupts : The CODEC's interrupt output.
+
+- realtek,dmic1-data-pin
+  0: dmic1 is not used
+  1: using GPIO2 pin as dmic1 data pin
+  2: using GPIO5 pin as dmic1 data pin
+
+- realtek,dmic1-clk-pin
+  0: using GPIO1 pin as dmic1 clock pin
+  1: using GPIO3 pin as dmic1 clock pin
+
+- realtek,jd-src
+  0: No JD is used
+  1: using JD1 as JD source
+
+- realtek,ldo1-en-gpios : The GPIO that controls the CODEC's LDO1_EN pin.
+
+Pins on the device (for linking into audio routes) for RT5668B:
+
+  * DMIC L1
+  * DMIC R1
+  * IN1P
+  * HPOL
+  * HPOR
+
+Example:
+
+rt5668 {
+	compatible = "realtek,rt5668b";
+	reg = <0x1a>;
+	interrupt-parent = <&gpio>;
+	interrupts = <TEGRA_GPIO(U, 6) GPIO_ACTIVE_HIGH>;
+	realtek,ldo1-en-gpios =
+		<&gpio TEGRA_GPIO(R, 2) GPIO_ACTIVE_HIGH>;
+	realtek,dmic1-data-pin = <1>;
+	realtek,dmic1-clk-pin = <1>;
+	realtek,jd-src = <1>;
+};
diff --git a/Documentation/devicetree/bindings/sound/sgtl5000.txt b/Documentation/devicetree/bindings/sound/sgtl5000.txt
index 9a36c7e..0f21445 100644
--- a/Documentation/devicetree/bindings/sound/sgtl5000.txt
+++ b/Documentation/devicetree/bindings/sound/sgtl5000.txt
@@ -39,7 +39,7 @@
 
 Example:
 
-codec: sgtl5000@a {
+sgtl5000: codec@a {
 	compatible = "fsl,sgtl5000";
 	reg = <0x0a>;
 	#sound-dai-cells = <0>;
diff --git a/Documentation/devicetree/bindings/sound/simple-card.txt b/Documentation/devicetree/bindings/sound/simple-card.txt
index 17c13e7..a4c72d0 100644
--- a/Documentation/devicetree/bindings/sound/simple-card.txt
+++ b/Documentation/devicetree/bindings/sound/simple-card.txt
@@ -86,6 +86,11 @@
 					  in dai startup() and disabled with
 					  clk_disable_unprepare() in dai
 					  shutdown().
+					  If a clock is specified and a
+					  multiplication factor is given with
+					  mclk-fs, the clock will be set to the
+					  calculated mclk frequency when the
+					  stream starts.
 - system-clock-direction-out		: specifies clock direction as 'out' on
 					  initialization. It is useful for some aCPUs with
 					  fixed clocks.
diff --git a/Documentation/devicetree/bindings/sound/ti,tas6424.txt b/Documentation/devicetree/bindings/sound/ti,tas6424.txt
index 1c4ada0..eacb54f 100644
--- a/Documentation/devicetree/bindings/sound/ti,tas6424.txt
+++ b/Documentation/devicetree/bindings/sound/ti,tas6424.txt
@@ -6,6 +6,8 @@
 	- compatible: "ti,tas6424" - TAS6424
 	- reg: I2C slave address
 	- sound-dai-cells: must be equal to 0
+	- standby-gpios: GPIO used to shut the TAS6424 down.
+	- mute-gpios: GPIO used to mute all the outputs
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/sound/tscs42xx.txt b/Documentation/devicetree/bindings/sound/tscs42xx.txt
index 2ac2f09..7eea32e 100644
--- a/Documentation/devicetree/bindings/sound/tscs42xx.txt
+++ b/Documentation/devicetree/bindings/sound/tscs42xx.txt
@@ -8,9 +8,15 @@
 	- reg : 	<0x71> for analog mic
 			<0x69> for digital mic
 
+	- clock-names:	Must one of  the following "mclk1", "xtal", "mclk2"
+
+	- clocks:	phandle of the clock that provides the codec sysclk
+
 Example:
 
 wookie: codec@69 {
 	compatible = "tempo,tscs42A2";
 	reg = <0x69>;
+	clock-names = "xtal";
+	clocks = <&audio_xtal>;
 };
diff --git a/Documentation/devicetree/bindings/sound/tscs454.txt b/Documentation/devicetree/bindings/sound/tscs454.txt
new file mode 100644
index 0000000..3ba3e2d
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tscs454.txt
@@ -0,0 +1,23 @@
+TSCS454 Audio CODEC
+
+Required Properties:
+
+	- compatible :	"tempo,tscs454"
+
+	- reg : 	<0x69>
+
+	- clock-names:	Must one of  the following "xtal", "mclk1", "mclk2"
+
+	- clocks:	phandle of the clock that provides the codec sysclk
+
+	Note: If clock is not provided then bit clock is assumed
+
+Example:
+
+redwood: codec@69 {
+	#sound-dai-cells = <1>;
+	compatible = "tempo,tscs454";
+	reg = <0x69>;
+	clock-names = "mclk1";
+	clocks = <&audio_mclk>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8510.txt b/Documentation/devicetree/bindings/sound/wm8510.txt
index fa1a32b..e6b6cc0 100644
--- a/Documentation/devicetree/bindings/sound/wm8510.txt
+++ b/Documentation/devicetree/bindings/sound/wm8510.txt
@@ -12,7 +12,7 @@
 
 Example:
 
-codec: wm8510@1a {
+wm8510: codec@1a {
 	compatible = "wlf,wm8510";
 	reg = <0x1a>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8523.txt b/Documentation/devicetree/bindings/sound/wm8523.txt
index 0474618..f3a6485 100644
--- a/Documentation/devicetree/bindings/sound/wm8523.txt
+++ b/Documentation/devicetree/bindings/sound/wm8523.txt
@@ -10,7 +10,7 @@
 
 Example:
 
-codec: wm8523@1a {
+wm8523: codec@1a {
 	compatible = "wlf,wm8523";
 	reg = <0x1a>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8524.txt b/Documentation/devicetree/bindings/sound/wm8524.txt
index 0f05535..f6c0c26 100644
--- a/Documentation/devicetree/bindings/sound/wm8524.txt
+++ b/Documentation/devicetree/bindings/sound/wm8524.txt
@@ -10,7 +10,7 @@
 
 Example:
 
-codec: wm8524 {
+wm8524: codec {
 	compatible = "wlf,wm8524";
 	wlf,mute-gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8580.txt b/Documentation/devicetree/bindings/sound/wm8580.txt
index 78fce9b..ff3f9f5 100644
--- a/Documentation/devicetree/bindings/sound/wm8580.txt
+++ b/Documentation/devicetree/bindings/sound/wm8580.txt
@@ -10,7 +10,7 @@
 
 Example:
 
-codec: wm8580@1a {
+wm8580: codec@1a {
 	compatible = "wlf,wm8580";
 	reg = <0x1a>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8711.txt b/Documentation/devicetree/bindings/sound/wm8711.txt
index 8ed9998..c30a138 100644
--- a/Documentation/devicetree/bindings/sound/wm8711.txt
+++ b/Documentation/devicetree/bindings/sound/wm8711.txt
@@ -12,7 +12,7 @@
 
 Example:
 
-codec: wm8711@1a {
+wm8711: codec@1a {
 	compatible = "wlf,wm8711";
 	reg = <0x1a>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8728.txt b/Documentation/devicetree/bindings/sound/wm8728.txt
index a8b5c36..a3608b4 100644
--- a/Documentation/devicetree/bindings/sound/wm8728.txt
+++ b/Documentation/devicetree/bindings/sound/wm8728.txt
@@ -12,7 +12,7 @@
 
 Example:
 
-codec: wm8728@1a {
+wm8728: codec@1a {
 	compatible = "wlf,wm8728";
 	reg = <0x1a>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8731.txt b/Documentation/devicetree/bindings/sound/wm8731.txt
index 236690e..f660d9b 100644
--- a/Documentation/devicetree/bindings/sound/wm8731.txt
+++ b/Documentation/devicetree/bindings/sound/wm8731.txt
@@ -12,7 +12,7 @@
 
 Example:
 
-codec: wm8731@1a {
+wm8731: codec@1a {
 	compatible = "wlf,wm8731";
 	reg = <0x1a>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8737.txt b/Documentation/devicetree/bindings/sound/wm8737.txt
index 4bc2cea..eda1ec6 100644
--- a/Documentation/devicetree/bindings/sound/wm8737.txt
+++ b/Documentation/devicetree/bindings/sound/wm8737.txt
@@ -12,7 +12,7 @@
 
 Example:
 
-codec: wm8737@1a {
+wm8737: codec@1a {
 	compatible = "wlf,wm8737";
 	reg = <0x1a>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8741.txt b/Documentation/devicetree/bindings/sound/wm8741.txt
index a133154..b69e196 100644
--- a/Documentation/devicetree/bindings/sound/wm8741.txt
+++ b/Documentation/devicetree/bindings/sound/wm8741.txt
@@ -21,7 +21,7 @@
 
 Example:
 
-codec: wm8741@1a {
+wm8741: codec@1a {
 	compatible = "wlf,wm8741";
 	reg = <0x1a>;
 
diff --git a/Documentation/devicetree/bindings/sound/wm8750.txt b/Documentation/devicetree/bindings/sound/wm8750.txt
index 8db239f..682f221 100644
--- a/Documentation/devicetree/bindings/sound/wm8750.txt
+++ b/Documentation/devicetree/bindings/sound/wm8750.txt
@@ -12,7 +12,7 @@
 
 Example:
 
-codec: wm8750@1a {
+wm8750: codec@1a {
 	compatible = "wlf,wm8750";
 	reg = <0x1a>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8753.txt b/Documentation/devicetree/bindings/sound/wm8753.txt
index 8eee612..eca9e5a 100644
--- a/Documentation/devicetree/bindings/sound/wm8753.txt
+++ b/Documentation/devicetree/bindings/sound/wm8753.txt
@@ -34,7 +34,7 @@
 
 Example:
 
-codec: wm8753@1a {
+wm8753: codec@1a {
 	compatible = "wlf,wm8753";
 	reg = <0x1a>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8770.txt b/Documentation/devicetree/bindings/sound/wm8770.txt
index 866e00c..cac762a 100644
--- a/Documentation/devicetree/bindings/sound/wm8770.txt
+++ b/Documentation/devicetree/bindings/sound/wm8770.txt
@@ -10,7 +10,7 @@
 
 Example:
 
-codec: wm8770@1 {
+wm8770: codec@1 {
 	compatible = "wlf,wm8770";
 	reg = <1>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8776.txt b/Documentation/devicetree/bindings/sound/wm8776.txt
index 3b9ca49..0117336 100644
--- a/Documentation/devicetree/bindings/sound/wm8776.txt
+++ b/Documentation/devicetree/bindings/sound/wm8776.txt
@@ -12,7 +12,7 @@
 
 Example:
 
-codec: wm8776@1a {
+wm8776: codec@1a {
 	compatible = "wlf,wm8776";
 	reg = <0x1a>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8804.txt b/Documentation/devicetree/bindings/sound/wm8804.txt
index 6fd124b..2c1641c 100644
--- a/Documentation/devicetree/bindings/sound/wm8804.txt
+++ b/Documentation/devicetree/bindings/sound/wm8804.txt
@@ -19,7 +19,7 @@
 
 Example:
 
-codec: wm8804@1a {
+wm8804: codec@1a {
 	compatible = "wlf,wm8804";
 	reg = <0x1a>;
 };
diff --git a/Documentation/devicetree/bindings/sound/wm8903.txt b/Documentation/devicetree/bindings/sound/wm8903.txt
index afc51ca..6371c24 100644
--- a/Documentation/devicetree/bindings/sound/wm8903.txt
+++ b/Documentation/devicetree/bindings/sound/wm8903.txt
@@ -57,7 +57,7 @@
 
 Example:
 
-codec: wm8903@1a {
+wm8903: codec@1a {
 	compatible = "wlf,wm8903";
 	reg = <0x1a>;
 	interrupts = < 347 >;
diff --git a/Documentation/devicetree/bindings/sound/wm8960.txt b/Documentation/devicetree/bindings/sound/wm8960.txt
index 2deb8a3..6d29ac3 100644
--- a/Documentation/devicetree/bindings/sound/wm8960.txt
+++ b/Documentation/devicetree/bindings/sound/wm8960.txt
@@ -23,7 +23,7 @@
 
 Example:
 
-codec: wm8960@1a {
+wm8960: codec@1a {
 	compatible = "wlf,wm8960";
 	reg = <0x1a>;
 
diff --git a/Documentation/devicetree/bindings/sound/wm8962.txt b/Documentation/devicetree/bindings/sound/wm8962.txt
index 7f82b59..dcfa9a33 100644
--- a/Documentation/devicetree/bindings/sound/wm8962.txt
+++ b/Documentation/devicetree/bindings/sound/wm8962.txt
@@ -24,7 +24,7 @@
 
 Example:
 
-codec: wm8962@1a {
+wm8962: codec@1a {
 	compatible = "wlf,wm8962";
 	reg = <0x1a>;
 
diff --git a/Documentation/devicetree/bindings/sound/wm8994.txt b/Documentation/devicetree/bindings/sound/wm8994.txt
index 68c4e8d..4a9dead 100644
--- a/Documentation/devicetree/bindings/sound/wm8994.txt
+++ b/Documentation/devicetree/bindings/sound/wm8994.txt
@@ -59,7 +59,7 @@
 
 Example:
 
-codec: wm8994@1a {
+wm8994: codec@1a {
 	compatible = "wlf,wm8994";
 	reg = <0x1a>;
 
diff --git a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
index 0e03344..2e93181 100644
--- a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
+++ b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
@@ -76,6 +76,10 @@
   needs to make sure it does not send more than 90%
   maximum_periodic_data_per_frame. The use case is multiple transactions, but
   less frame rate.
+- mux-controls: The mux control for toggling host/device output of this
+  controller. It's expected that a mux state of 0 indicates device mode and a
+  mux state of 1 indicates host mode.
+- mux-control-names: Shall be "usb_switch" if mux-controls is specified.
 
 i.mx specific properties
 - fsl,usbmisc: phandler of non-core register device, with one
@@ -102,4 +106,6 @@
 		rx-burst-size-dword = <0x10>;
 		extcon = <0>, <&usb_id>;
 		phy-clkgate-delay-us = <400>;
+		mux-controls = <&usb_switch>;
+		mux-control-names = "usb_switch";
 	};
diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt b/Documentation/devicetree/bindings/usb/dwc3.txt
index 0dbd308..7f13ebe 100644
--- a/Documentation/devicetree/bindings/usb/dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/dwc3.txt
@@ -7,6 +7,26 @@
  - compatible: must be "snps,dwc3"
  - reg : Address and length of the register set for the device
  - interrupts: Interrupts used by the dwc3 controller.
+ - clock-names: should contain "ref", "bus_early", "suspend"
+ - clocks: list of phandle and clock specifier pairs corresponding to
+           entries in the clock-names property.
+
+Exception for clocks:
+  clocks are optional if the parent node (i.e. glue-layer) is compatible to
+  one of the following:
+    "amlogic,meson-axg-dwc3"
+    "amlogic,meson-gxl-dwc3"
+    "cavium,octeon-7130-usb-uctl"
+    "qcom,dwc3"
+    "samsung,exynos5250-dwusb3"
+    "samsung,exynos7-dwusb3"
+    "sprd,sc9860-dwc3"
+    "st,stih407-dwc3"
+    "ti,am437x-dwc3"
+    "ti,dwc3"
+    "ti,keystone-dwc3"
+    "rockchip,rk3399-dwc3"
+    "xlnx,zynqmp-dwc3"
 
 Optional properties:
  - usb-phy : array of phandle for the PHY device.  The first element
@@ -15,6 +35,7 @@
  - phys: from the *Generic PHY* bindings
  - phy-names: from the *Generic PHY* bindings; supported names are "usb2-phy"
 	or "usb3-phy".
+ - resets: a single pair of phandle and reset specifier
  - snps,usb3_lpm_capable: determines if platform is USB3 LPM capable
  - snps,disable_scramble_quirk: true when SW should disable data scrambling.
 	Only really useful for FPGA builds.
diff --git a/Documentation/devicetree/bindings/usb/fcs,fusb302.txt b/Documentation/devicetree/bindings/usb/fcs,fusb302.txt
index 472facf..6087dc7 100644
--- a/Documentation/devicetree/bindings/usb/fcs,fusb302.txt
+++ b/Documentation/devicetree/bindings/usb/fcs,fusb302.txt
@@ -6,12 +6,6 @@
 - interrupts             : Interrupt specifier
 
 Optional properties :
-- fcs,max-sink-microvolt : Maximum voltage to negotiate when configured as sink
-- fcs,max-sink-microamp  : Maximum current to negotiate when configured as sink
-- fcs,max-sink-microwatt : Maximum power to negotiate when configured as sink
-			   If this is less then max-sink-microvolt *
-			   max-sink-microamp then the configured current will
-			   be clamped.
 - fcs,operating-sink-microwatt :
 			   Minimum amount of power accepted from a sink
 			   when negotiating
diff --git a/Documentation/devicetree/bindings/usb/hisilicon,histb-xhci.txt b/Documentation/devicetree/bindings/usb/hisilicon,histb-xhci.txt
new file mode 100644
index 0000000..f463349
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/hisilicon,histb-xhci.txt
@@ -0,0 +1,45 @@
+HiSilicon STB xHCI
+
+The device node for HiSilicon STB xHCI host controller
+
+Required properties:
+ - compatible: should be "hisilicon,hi3798cv200-xhci"
+ - reg: specifies physical base address and size of the registers
+ - interrupts : interrupt used by the controller
+ - clocks: a list of phandle + clock-specifier pairs, one for each
+	entry in clock-names
+ - clock-names: must contain
+	"bus": for bus clock
+	"utmi": for utmi clock
+	"pipe": for pipe clock
+	"suspend": for suspend clock
+ - resets: a list of phandle and reset specifier pairs as listed in
+	reset-names property.
+ - reset-names: must contain
+	"soft": for soft reset
+ - phys: a list of phandle + phy specifier pairs
+ - phy-names: must contain at least one of following:
+	"inno": for inno phy
+	"combo": for combo phy
+
+Optional properties:
+  - usb2-lpm-disable: indicate if we don't want to enable USB2 HW LPM
+  - usb3-lpm-capable: determines if platform is USB3 LPM capable
+  - imod-interval-ns: default interrupt moderation interval is 40000ns
+
+Example:
+
+xhci0: xchi@f98a0000 {
+	compatible = "hisilicon,hi3798cv200-xhci";
+	reg = <0xf98a0000 0x10000>;
+	interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&crg HISTB_USB3_BUS_CLK>,
+		 <&crg HISTB_USB3_UTMI_CLK>,
+		 <&crg HISTB_USB3_PIPE_CLK>,
+		 <&crg HISTB_USB3_SUSPEND_CLK>;
+	clock-names = "bus", "utmi", "pipe", "suspend";
+	resets = <&crg 0xb0 12>;
+	reset-names = "soft";
+	phys = <&usb2_phy1_port1 0>, <&combphy0 PHY_TYPE_USB3>;
+	phy-names = "inno", "combo";
+};
diff --git a/Documentation/devicetree/bindings/usb/qcom,dwc3.txt b/Documentation/devicetree/bindings/usb/qcom,dwc3.txt
index bc8a2fa..95afdcf 100644
--- a/Documentation/devicetree/bindings/usb/qcom,dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/qcom,dwc3.txt
@@ -1,54 +1,95 @@
 Qualcomm SuperSpeed DWC3 USB SoC controller
 
 Required properties:
-- compatible:	should contain "qcom,dwc3"
+- compatible:		Compatible list, contains
+			"qcom,dwc3"
+			"qcom,msm8996-dwc3" for msm8996 SOC.
+			"qcom,sdm845-dwc3" for sdm845 SOC.
+- reg:			Offset and length of register set for QSCRATCH wrapper
+- power-domains:	specifies a phandle to PM domain provider node
 - clocks:		A list of phandle + clock-specifier pairs for the
 				clocks listed in clock-names
-- clock-names:	Should contain the following:
+- clock-names:		Should contain the following:
   "core"		Master/Core clock, have to be >= 125 MHz for SS
 				operation and >= 60MHz for HS operation
+  "mock_utmi"		Mock utmi clock needed for ITP/SOF generation in
+				host mode. Its frequency should be 19.2MHz.
+  "sleep"		Sleep clock, used for wakeup when USB3 core goes
+				into low power mode (U3).
 
 Optional clocks:
-  "iface"		System bus AXI clock.  Not present on all platforms
-  "sleep"		Sleep clock, used when USB3 core goes into low
-				power mode (U3).
+  "iface"		System bus AXI clock.
+			Not present on "qcom,msm8996-dwc3" compatible.
+  "cfg_noc"		System Config NOC clock.
+			Not present on "qcom,msm8996-dwc3" compatible.
+- assigned-clocks:	Should be:
+				MOCK_UTMI_CLK
+				MASTER_CLK
+- assigned-clock-rates: Should be:
+                                19.2Mhz (192000000) for MOCK_UTMI_CLK
+                                >=125Mhz (125000000) for MASTER_CLK in SS mode
+                                >=60Mhz (60000000) for MASTER_CLK in HS mode
+
+Optional properties:
+- resets:		Phandle to reset control that resets core and wrapper.
+- interrupts:		specifies interrupts from controller wrapper used
+			to wakeup from low power/susepnd state.	Must contain
+			one or more entry for interrupt-names property
+- interrupt-names:	Must include the following entries:
+			- "hs_phy_irq": The interrupt that is asserted when a
+			   wakeup event is received on USB2 bus
+			- "ss_phy_irq": The interrupt that is asserted when a
+			   wakeup event is received on USB3 bus
+			- "dm_hs_phy_irq" and "dp_hs_phy_irq": Separate
+			   interrupts for any wakeup event on DM and DP lines
+- qcom,select-utmi-as-pipe-clk: if present, disable USB3 pipe_clk requirement.
+				Used when dwc3 operates without SSPHY and only
+				HS/FS/LS modes are supported.
 
 Required child node:
 A child node must exist to represent the core DWC3 IP block. The name of
 the node is not important. The content of the node is defined in dwc3.txt.
 
 Phy documentation is provided in the following places:
-Documentation/devicetree/bindings/phy/qcom-dwc3-usb-phy.txt
+Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt   - USB3 QMP PHY
+Documentation/devicetree/bindings/phy/qcom-qusb2-phy.txt - USB2 QUSB2 PHY
 
 Example device nodes:
 
 		hs_phy: phy@100f8800 {
-			compatible = "qcom,dwc3-hs-usb-phy";
-			reg = <0x100f8800 0x30>;
-			clocks = <&gcc USB30_0_UTMI_CLK>;
-			clock-names = "ref";
-			#phy-cells = <0>;
-
+			compatible = "qcom,qusb2-v2-phy";
+			...
 		};
 
 		ss_phy: phy@100f8830 {
-			compatible = "qcom,dwc3-ss-usb-phy";
-			reg = <0x100f8830 0x30>;
-			clocks = <&gcc USB30_0_MASTER_CLK>;
-			clock-names = "ref";
-			#phy-cells = <0>;
-
+			compatible = "qcom,qmp-v3-usb3-phy";
+			...
 		};
 
-		usb3_0: usb30@0 {
+		usb3_0: usb30@a6f8800 {
 			compatible = "qcom,dwc3";
+			reg = <0xa6f8800 0x400>;
 			#address-cells = <1>;
 			#size-cells = <1>;
-			clocks = <&gcc USB30_0_MASTER_CLK>;
-			clock-names = "core";
-
 			ranges;
 
+			interrupts = <0 131 0>, <0 486 0>, <0 488 0>, <0 489 0>;
+			interrupt-names = "hs_phy_irq", "ss_phy_irq",
+				  "dm_hs_phy_irq", "dp_hs_phy_irq";
+
+			clocks = <&gcc GCC_USB30_PRIM_MASTER_CLK>,
+				<&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>,
+				<&gcc GCC_USB30_PRIM_SLEEP_CLK>;
+			clock-names = "core", "mock_utmi", "sleep";
+
+			assigned-clocks = <&gcc GCC_USB30_PRIM_MOCK_UTMI_CLK>,
+					  <&gcc GCC_USB30_PRIM_MASTER_CLK>;
+			assigned-clock-rates = <19200000>, <133000000>;
+
+			resets = <&gcc GCC_USB30_PRIM_BCR>;
+			reset-names = "core_reset";
+			power-domains = <&gcc USB30_PRIM_GDSC>;
+			qcom,select-utmi-as-pipe-clk;
 
 			dwc3@10000000 {
 				compatible = "snps,dwc3";
diff --git a/Documentation/devicetree/bindings/usb/richtek,rt1711h.txt b/Documentation/devicetree/bindings/usb/richtek,rt1711h.txt
new file mode 100644
index 0000000..09e847e
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/richtek,rt1711h.txt
@@ -0,0 +1,17 @@
+Richtek RT1711H TypeC PD Controller.
+
+Required properties:
+ - compatible : Must be "richtek,rt1711h".
+ - reg : Must be 0x4e, it's slave address of RT1711H.
+ - interrupt-parent : the phandle for the interrupt controller that
+   provides interrupts for this device.
+ - interrupts : <a b> where a is the interrupt number and b represents an
+   encoding of the sense and level information for the interrupt.
+
+Example :
+rt1711h@4e {
+	compatible = "richtek,rt1711h";
+	reg = <0x4e>;
+	interrupt-parent = <&gpio26>;
+	interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
+};
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index a38d8bf..3600383 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -75,6 +75,7 @@
 compulab	CompuLab Ltd.
 cortina	Cortina Systems, Inc.
 cosmic	Cosmic Circuits
+crane	Crane Connectivity Solutions
 creative	Creative Technology Ltd
 crystalfontz	Crystalfontz America, Inc.
 cubietech	Cubietech, Ltd.
diff --git a/Documentation/clk.txt b/Documentation/driver-api/clk.rst
similarity index 100%
rename from Documentation/clk.txt
rename to Documentation/driver-api/clk.rst
diff --git a/Documentation/driver-api/device_connection.rst b/Documentation/driver-api/device_connection.rst
index affbc556..ba36422 100644
--- a/Documentation/driver-api/device_connection.rst
+++ b/Documentation/driver-api/device_connection.rst
@@ -40,4 +40,4 @@
 ---
 
 .. kernel-doc:: drivers/base/devcon.c
-   : functions: device_connection_find_match device_connection_find device_connection_add device_connection_remove
+   :functions: device_connection_find_match device_connection_find device_connection_add device_connection_remove
diff --git a/Documentation/driver-api/firmware/fallback-mechanisms.rst b/Documentation/driver-api/firmware/fallback-mechanisms.rst
index f353783..d35fed6 100644
--- a/Documentation/driver-api/firmware/fallback-mechanisms.rst
+++ b/Documentation/driver-api/firmware/fallback-mechanisms.rst
@@ -72,9 +72,12 @@
 associating the device used to make the request as the device's parent.
 The sysfs directory's file attributes are defined and controlled through
 the new device's class (firmware_class) and group (fw_dev_attr_groups).
-This is actually where the original firmware_class.c file name comes from,
-as originally the only firmware loading mechanism available was the
-mechanism we now use as a fallback mechanism.
+This is actually where the original firmware_class module name came from,
+given that originally the only firmware loading mechanism available was the
+mechanism we now use as a fallback mechanism, which registers a struct class
+firmware_class. Because the attributes exposed are part of the module name, the
+module name firmware_class cannot be renamed in the future, to ensure backward
+compatibility with old userspace.
 
 To load firmware using the sysfs interface we expose a loading indicator,
 and a file upload firmware into:
@@ -83,7 +86,7 @@
   * /sys/$DEVPATH/data
 
 To upload firmware you will echo 1 onto the loading file to indicate
-you are loading firmware. You then cat the firmware into the data file,
+you are loading firmware. You then write the firmware into the data file,
 and you notify the kernel the firmware is ready by echo'ing 0 onto
 the loading file.
 
@@ -136,7 +139,8 @@
 distributions today disable CONFIG_FW_LOADER_USER_HELPER_FALLBACK.
 
 Refer to do_firmware_uevent() for details of the kobject event variables
-setup. Variables passwdd with a kobject add event:
+setup. The variables currently passed to userspace with a "kobject add"
+event are:
 
 * FIRMWARE=firmware name
 * TIMEOUT=timeout value
diff --git a/Documentation/driver-api/firmware/firmware_cache.rst b/Documentation/driver-api/firmware/firmware_cache.rst
index 2210e5b..c2e69d9 100644
--- a/Documentation/driver-api/firmware/firmware_cache.rst
+++ b/Documentation/driver-api/firmware/firmware_cache.rst
@@ -29,8 +29,8 @@
 * If an asynchronous call is used the firmware cache is only set up for a
   device if if the second argument (uevent) to request_firmware_nowait() is
   true. When uevent is true it requests that a kobject uevent be sent to
-  userspace for the firmware request. For details refer to the Fackback
-  mechanism documented below.
+  userspace for the firmware request through the sysfs fallback mechanism
+  if the firmware file is not found.
 
 * If the firmware cache is determined to be needed as per the above two
   criteria the firmware cache is setup by adding a devres entry for the
diff --git a/Documentation/driver-api/firmware/request_firmware.rst b/Documentation/driver-api/firmware/request_firmware.rst
index d5ec95a..f62bdcb 100644
--- a/Documentation/driver-api/firmware/request_firmware.rst
+++ b/Documentation/driver-api/firmware/request_firmware.rst
@@ -20,6 +20,11 @@
 .. kernel-doc:: drivers/base/firmware_loader/main.c
    :functions: request_firmware
 
+firmware_request_nowarn
+-----------------------
+.. kernel-doc:: drivers/base/firmware_loader/main.c
+   :functions: firmware_request_nowarn
+
 request_firmware_direct
 -----------------------
 .. kernel-doc:: drivers/base/firmware_loader/main.c
diff --git a/Documentation/driver-api/fpga/fpga-bridge.rst b/Documentation/driver-api/fpga/fpga-bridge.rst
new file mode 100644
index 0000000..2c2aaca
--- /dev/null
+++ b/Documentation/driver-api/fpga/fpga-bridge.rst
@@ -0,0 +1,49 @@
+FPGA Bridge
+===========
+
+API to implement a new FPGA bridge
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: include/linux/fpga/fpga-bridge.h
+   :functions: fpga_bridge
+
+.. kernel-doc:: include/linux/fpga/fpga-bridge.h
+   :functions: fpga_bridge_ops
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: fpga_bridge_create
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: fpga_bridge_free
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: fpga_bridge_register
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: fpga_bridge_unregister
+
+API to control an FPGA bridge
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You probably won't need these directly.  FPGA regions should handle this.
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: of_fpga_bridge_get
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: fpga_bridge_get
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: fpga_bridge_put
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: fpga_bridge_get_to_list
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: of_fpga_bridge_get_to_list
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: fpga_bridge_enable
+
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: fpga_bridge_disable
diff --git a/Documentation/driver-api/fpga/fpga-mgr.rst b/Documentation/driver-api/fpga/fpga-mgr.rst
new file mode 100644
index 0000000..bcf2dd2
--- /dev/null
+++ b/Documentation/driver-api/fpga/fpga-mgr.rst
@@ -0,0 +1,220 @@
+FPGA Manager
+============
+
+Overview
+--------
+
+The FPGA manager core exports a set of functions for programming an FPGA with
+an image.  The API is manufacturer agnostic.  All manufacturer specifics are
+hidden away in a low level driver which registers a set of ops with the core.
+The FPGA image data itself is very manufacturer specific, but for our purposes
+it's just binary data.  The FPGA manager core won't parse it.
+
+The FPGA image to be programmed can be in a scatter gather list, a single
+contiguous buffer, or a firmware file.  Because allocating contiguous kernel
+memory for the buffer should be avoided, users are encouraged to use a scatter
+gather list instead if possible.
+
+The particulars for programming the image are presented in a structure (struct
+fpga_image_info).  This struct contains parameters such as pointers to the
+FPGA image as well as image-specific particulars such as whether the image was
+built for full or partial reconfiguration.
+
+How to support a new FPGA device
+--------------------------------
+
+To add another FPGA manager, write a driver that implements a set of ops.  The
+probe function calls fpga_mgr_register(), such as::
+
+	static const struct fpga_manager_ops socfpga_fpga_ops = {
+		.write_init = socfpga_fpga_ops_configure_init,
+		.write = socfpga_fpga_ops_configure_write,
+		.write_complete = socfpga_fpga_ops_configure_complete,
+		.state = socfpga_fpga_ops_state,
+	};
+
+	static int socfpga_fpga_probe(struct platform_device *pdev)
+	{
+		struct device *dev = &pdev->dev;
+		struct socfpga_fpga_priv *priv;
+		struct fpga_manager *mgr;
+		int ret;
+
+		priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+		if (!priv)
+			return -ENOMEM;
+
+		/*
+		 * do ioremaps, get interrupts, etc. and save
+		 * them in priv
+		 */
+
+		mgr = fpga_mgr_create(dev, "Altera SOCFPGA FPGA Manager",
+				      &socfpga_fpga_ops, priv);
+		if (!mgr)
+			return -ENOMEM;
+
+		platform_set_drvdata(pdev, mgr);
+
+		ret = fpga_mgr_register(mgr);
+		if (ret)
+			fpga_mgr_free(mgr);
+
+		return ret;
+	}
+
+	static int socfpga_fpga_remove(struct platform_device *pdev)
+	{
+		struct fpga_manager *mgr = platform_get_drvdata(pdev);
+
+		fpga_mgr_unregister(mgr);
+
+		return 0;
+	}
+
+
+The ops will implement whatever device specific register writes are needed to
+do the programming sequence for this particular FPGA.  These ops return 0 for
+success or negative error codes otherwise.
+
+The programming sequence is::
+ 1. .write_init
+ 2. .write or .write_sg (may be called once or multiple times)
+ 3. .write_complete
+
+The .write_init function will prepare the FPGA to receive the image data.  The
+buffer passed into .write_init will be atmost .initial_header_size bytes long,
+if the whole bitstream is not immediately available then the core code will
+buffer up at least this much before starting.
+
+The .write function writes a buffer to the FPGA. The buffer may be contain the
+whole FPGA image or may be a smaller chunk of an FPGA image.  In the latter
+case, this function is called multiple times for successive chunks. This interface
+is suitable for drivers which use PIO.
+
+The .write_sg version behaves the same as .write except the input is a sg_table
+scatter list. This interface is suitable for drivers which use DMA.
+
+The .write_complete function is called after all the image has been written
+to put the FPGA into operating mode.
+
+The ops include a .state function which will read the hardware FPGA manager and
+return a code of type enum fpga_mgr_states.  It doesn't result in a change in
+hardware state.
+
+How to write an image buffer to a supported FPGA
+------------------------------------------------
+
+Some sample code::
+
+	#include <linux/fpga/fpga-mgr.h>
+
+	struct fpga_manager *mgr;
+	struct fpga_image_info *info;
+	int ret;
+
+	/*
+	 * Get a reference to FPGA manager.  The manager is not locked, so you can
+	 * hold onto this reference without it preventing programming.
+	 *
+	 * This example uses the device node of the manager.  Alternatively, use
+	 * fpga_mgr_get(dev) instead if you have the device.
+	 */
+	mgr = of_fpga_mgr_get(mgr_node);
+
+	/* struct with information about the FPGA image to program. */
+	info = fpga_image_info_alloc(dev);
+
+	/* flags indicates whether to do full or partial reconfiguration */
+	info->flags = FPGA_MGR_PARTIAL_RECONFIG;
+
+	/*
+	 * At this point, indicate where the image is. This is pseudo-code; you're
+	 * going to use one of these three.
+	 */
+	if (image is in a scatter gather table) {
+
+		info->sgt = [your scatter gather table]
+
+	} else if (image is in a buffer) {
+
+		info->buf = [your image buffer]
+		info->count = [image buffer size]
+
+	} else if (image is in a firmware file) {
+
+		info->firmware_name = devm_kstrdup(dev, firmware_name, GFP_KERNEL);
+
+	}
+
+	/* Get exclusive control of FPGA manager */
+	ret = fpga_mgr_lock(mgr);
+
+	/* Load the buffer to the FPGA */
+	ret = fpga_mgr_buf_load(mgr, &info, buf, count);
+
+	/* Release the FPGA manager */
+	fpga_mgr_unlock(mgr);
+	fpga_mgr_put(mgr);
+
+	/* Deallocate the image info if you're done with it */
+	fpga_image_info_free(info);
+
+API for implementing a new FPGA Manager driver
+----------------------------------------------
+
+.. kernel-doc:: include/linux/fpga/fpga-mgr.h
+   :functions: fpga_manager
+
+.. kernel-doc:: include/linux/fpga/fpga-mgr.h
+   :functions: fpga_manager_ops
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_mgr_create
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_mgr_free
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_mgr_register
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_mgr_unregister
+
+API for programming a FPGA
+--------------------------
+
+.. kernel-doc:: include/linux/fpga/fpga-mgr.h
+   :functions: fpga_image_info
+
+.. kernel-doc:: include/linux/fpga/fpga-mgr.h
+   :functions: fpga_mgr_states
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_image_info_alloc
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_image_info_free
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: of_fpga_mgr_get
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_mgr_get
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_mgr_put
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_mgr_lock
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_mgr_unlock
+
+.. kernel-doc:: include/linux/fpga/fpga-mgr.h
+   :functions: fpga_mgr_states
+
+Note - use :c:func:`fpga_region_program_fpga()` instead of :c:func:`fpga_mgr_load()`
+
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: fpga_mgr_load
diff --git a/Documentation/driver-api/fpga/fpga-region.rst b/Documentation/driver-api/fpga/fpga-region.rst
new file mode 100644
index 0000000..f89e4a3
--- /dev/null
+++ b/Documentation/driver-api/fpga/fpga-region.rst
@@ -0,0 +1,102 @@
+FPGA Region
+===========
+
+Overview
+--------
+
+This document is meant to be an brief overview of the FPGA region API usage.  A
+more conceptual look at regions can be found in the Device Tree binding
+document [#f1]_.
+
+For the purposes of this API document, let's just say that a region associates
+an FPGA Manager and a bridge (or bridges) with a reprogrammable region of an
+FPGA or the whole FPGA.  The API provides a way to register a region and to
+program a region.
+
+Currently the only layer above fpga-region.c in the kernel is the Device Tree
+support (of-fpga-region.c) described in [#f1]_.  The DT support layer uses regions
+to program the FPGA and then DT to handle enumeration.  The common region code
+is intended to be used by other schemes that have other ways of accomplishing
+enumeration after programming.
+
+An fpga-region can be set up to know the following things:
+
+ * which FPGA manager to use to do the programming
+
+ * which bridges to disable before programming and enable afterwards.
+
+Additional info needed to program the FPGA image is passed in the struct
+fpga_image_info including:
+
+ * pointers to the image as either a scatter-gather buffer, a contiguous
+   buffer, or the name of firmware file
+
+ * flags indicating specifics such as whether the image if for partial
+   reconfiguration.
+
+How to program a FPGA using a region
+------------------------------------
+
+First, allocate the info struct::
+
+	info = fpga_image_info_alloc(dev);
+	if (!info)
+		return -ENOMEM;
+
+Set flags as needed, i.e.::
+
+	info->flags |= FPGA_MGR_PARTIAL_RECONFIG;
+
+Point to your FPGA image, such as::
+
+	info->sgt = &sgt;
+
+Add info to region and do the programming::
+
+	region->info = info;
+	ret = fpga_region_program_fpga(region);
+
+:c:func:`fpga_region_program_fpga()` operates on info passed in the
+fpga_image_info (region->info).  This function will attempt to:
+
+ * lock the region's mutex
+ * lock the region's FPGA manager
+ * build a list of FPGA bridges if a method has been specified to do so
+ * disable the bridges
+ * program the FPGA
+ * re-enable the bridges
+ * release the locks
+
+Then you will want to enumerate whatever hardware has appeared in the FPGA.
+
+How to add a new FPGA region
+----------------------------
+
+An example of usage can be seen in the probe function of [#f2]_.
+
+.. [#f1] ../devicetree/bindings/fpga/fpga-region.txt
+.. [#f2] ../../drivers/fpga/of-fpga-region.c
+
+API to program a FGPA
+---------------------
+
+.. kernel-doc:: drivers/fpga/fpga-region.c
+   :functions: fpga_region_program_fpga
+
+API to add a new FPGA region
+----------------------------
+
+.. kernel-doc:: include/linux/fpga/fpga-region.h
+   :functions: fpga_region
+
+.. kernel-doc:: drivers/fpga/fpga-region.c
+   :functions: fpga_region_create
+
+.. kernel-doc:: drivers/fpga/fpga-region.c
+   :functions: fpga_region_free
+
+.. kernel-doc:: drivers/fpga/fpga-region.c
+   :functions: fpga_region_register
+
+.. kernel-doc:: drivers/fpga/fpga-region.c
+   :functions: fpga_region_unregister
diff --git a/Documentation/driver-api/fpga/index.rst b/Documentation/driver-api/fpga/index.rst
new file mode 100644
index 0000000..c51e5eb
--- /dev/null
+++ b/Documentation/driver-api/fpga/index.rst
@@ -0,0 +1,13 @@
+==============
+FPGA Subsystem
+==============
+
+:Author: Alan Tull
+
+.. toctree::
+   :maxdepth: 2
+
+   intro
+   fpga-mgr
+   fpga-bridge
+   fpga-region
diff --git a/Documentation/driver-api/fpga/intro.rst b/Documentation/driver-api/fpga/intro.rst
new file mode 100644
index 0000000..51cd81d
--- /dev/null
+++ b/Documentation/driver-api/fpga/intro.rst
@@ -0,0 +1,54 @@
+Introduction
+============
+
+The FPGA subsystem supports reprogramming FPGAs dynamically under
+Linux.  Some of the core intentions of the FPGA subsystems are:
+
+* The FPGA subsystem is vendor agnostic.
+
+* The FPGA subsystem separates upper layers (userspace interfaces and
+  enumeration) from lower layers that know how to program a specific
+  FPGA.
+
+* Code should not be shared between upper and lower layers.  This
+  should go without saying.  If that seems necessary, there's probably
+  framework functionality that that can be added that will benefit
+  other users.  Write the linux-fpga mailing list and maintainers and
+  seek out a solution that expands the framework for broad reuse.
+
+* Generally, when adding code, think of the future.  Plan for re-use.
+
+The framework in the kernel is divided into:
+
+FPGA Manager
+------------
+
+If you are adding a new FPGA or a new method of programming a FPGA,
+this is the subsystem for you.  Low level FPGA manager drivers contain
+the knowledge of how to program a specific device.  This subsystem
+includes the framework in fpga-mgr.c and the low level drivers that
+are registered with it.
+
+FPGA Bridge
+-----------
+
+FPGA Bridges prevent spurious signals from going out of a FPGA or a
+region of a FPGA during programming.  They are disabled before
+programming begins and re-enabled afterwards.  An FPGA bridge may be
+actual hard hardware that gates a bus to a cpu or a soft ("freeze")
+bridge in FPGA fabric that surrounds a partial reconfiguration region
+of an FPGA.  This subsystem includes fpga-bridge.c and the low level
+drivers that are registered with it.
+
+FPGA Region
+-----------
+
+If you are adding a new interface to the FPGA framework, add it on top
+of a FPGA region to allow the most reuse of your interface.
+
+The FPGA Region framework (fpga-region.c) associates managers and
+bridges as reconfigurable regions.  A region may refer to the whole
+FPGA in full reconfiguration or to a partial reconfiguration region.
+
+The Device Tree FPGA Region support (of-fpga-region.c) handles
+reprogramming FPGAs when device tree overlays are applied.
diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index 505ee90..cbe0242 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -44,7 +44,7 @@
 
  - methods to establish GPIO line direction
  - methods used to access GPIO line values
- - method to set electrical configuration to a a given GPIO line
+ - method to set electrical configuration for a given GPIO line
  - method to return the IRQ number associated to a given GPIO line
  - flag saying whether calls to its methods may sleep
  - optional line names array to identify lines
@@ -143,7 +143,7 @@
 the rail actively pulls it down.
 
 The level on the line will go as high as the VDD on the pull-up resistor, which
-may be higher than the level supported by the transistor, achieveing a
+may be higher than the level supported by the transistor, achieving a
 level-shift to the higher VDD.
 
 Integrated electronics often have an output driver stage in the form of a CMOS
@@ -382,7 +382,7 @@
 
 Any provider of irqchips needs to be carefully tailored to support Real Time
 preemption. It is desirable that all irqchips in the GPIO subsystem keep this
-in mind and does the proper testing to assure they are real time-enabled.
+in mind and do the proper testing to assure they are real time-enabled.
 So, pay attention on above " RT_FULL:" notes, please.
 The following is a checklist to follow when preparing a driver for real
 time-compliance:
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 6d8352c..f4180e7 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -17,7 +17,9 @@
    basics
    infrastructure
    pm/index
+   clk
    device-io
+   device_connection
    dma-buf
    device_link
    message-based
@@ -49,6 +51,7 @@
    dmaengine/index
    slimbus
    soundwire/index
+   fpga/index
 
 .. only::  subproject and html
 
diff --git a/Documentation/driver-api/soundwire/error_handling.rst b/Documentation/driver-api/soundwire/error_handling.rst
new file mode 100644
index 0000000..aa3a0a23
--- /dev/null
+++ b/Documentation/driver-api/soundwire/error_handling.rst
@@ -0,0 +1,65 @@
+========================
+SoundWire Error Handling
+========================
+
+The SoundWire PHY was designed with care and errors on the bus are going to
+be very unlikely, and if they happen it should be limited to single bit
+errors. Examples of this design can be found in the synchronization
+mechanism (sync loss after two errors) and short CRCs used for the Bulk
+Register Access.
+
+The errors can be detected with multiple mechanisms:
+
+1. Bus clash or parity errors: This mechanism relies on low-level detectors
+   that are independent of the payload and usages, and they cover both control
+   and audio data. The current implementation only logs such errors.
+   Improvements could be invalidating an entire programming sequence and
+   restarting from a known position. In the case of such errors outside of a
+   control/command sequence, there is no concealment or recovery for audio
+   data enabled by the SoundWire protocol, the location of the error will also
+   impact its audibility (most-significant bits will be more impacted in PCM),
+   and after a number of such errors are detected the bus might be reset. Note
+   that bus clashes due to programming errors (two streams using the same bit
+   slots) or electrical issues during the transmit/receive transition cannot
+   be distinguished, although a recurring bus clash when audio is enabled is a
+   indication of a bus allocation issue. The interrupt mechanism can also help
+   identify Slaves which detected a Bus Clash or a Parity Error, but they may
+   not be responsible for the errors so resetting them individually is not a
+   viable recovery strategy.
+
+2. Command status: Each command is associated with a status, which only
+   covers transmission of the data between devices. The ACK status indicates
+   that the command was received and will be executed by the end of the
+   current frame. A NAK indicates that the command was in error and will not
+   be applied. In case of a bad programming (command sent to non-existent
+   Slave or to a non-implemented register) or electrical issue, no response
+   signals the command was ignored. Some Master implementations allow for a
+   command to be retransmitted several times.  If the retransmission fails,
+   backtracking and restarting the entire programming sequence might be a
+   solution. Alternatively some implementations might directly issue a bus
+   reset and re-enumerate all devices.
+
+3. Timeouts: In a number of cases such as ChannelPrepare or
+   ClockStopPrepare, the bus driver is supposed to poll a register field until
+   it transitions to a NotFinished value of zero. The MIPI SoundWire spec 1.1
+   does not define timeouts but the MIPI SoundWire DisCo document adds
+   recommendation on timeouts. If such configurations do not complete, the
+   driver will return a -ETIMEOUT. Such timeouts are symptoms of a faulty
+   Slave device and are likely impossible to recover from.
+
+Errors during global reconfiguration sequences are extremely difficult to
+handle:
+
+1. BankSwitch: An error during the last command issuing a BankSwitch is
+   difficult to backtrack from. Retransmitting the Bank Switch command may be
+   possible in a single segment setup, but this can lead to synchronization
+   problems when enabling multiple bus segments (a command with side effects
+   such as frame reconfiguration would be handled at different times). A global
+   hard-reset might be the best solution.
+
+Note that SoundWire does not provide a mechanism to detect illegal values
+written in valid registers. In a number of cases the standard even mentions
+that the Slave might behave in implementation-defined ways. The bus
+implementation does not provide a recovery mechanism for such errors, Slave
+or Master driver implementers are responsible for writing valid values in
+valid registers and implement additional range checking if needed.
diff --git a/Documentation/driver-api/soundwire/index.rst b/Documentation/driver-api/soundwire/index.rst
index 647e946..6db0260 100644
--- a/Documentation/driver-api/soundwire/index.rst
+++ b/Documentation/driver-api/soundwire/index.rst
@@ -6,6 +6,9 @@
    :maxdepth: 1
 
    summary
+   stream
+   error_handling
+   locking
 
 .. only::  subproject
 
diff --git a/Documentation/driver-api/soundwire/locking.rst b/Documentation/driver-api/soundwire/locking.rst
new file mode 100644
index 0000000..253f7355
--- /dev/null
+++ b/Documentation/driver-api/soundwire/locking.rst
@@ -0,0 +1,106 @@
+=================
+SoundWire Locking
+=================
+
+This document explains locking mechanism of the SoundWire Bus. Bus uses
+following locks in order to avoid race conditions in Bus operations on
+shared resources.
+
+  - Bus lock
+
+  - Message lock
+
+Bus lock
+========
+
+SoundWire Bus lock is a mutex and is part of Bus data structure
+(sdw_bus) which is used for every Bus instance. This lock is used to
+serialize each of the following operations(s) within SoundWire Bus instance.
+
+  - Addition and removal of Slave(s), changing Slave status.
+
+  - Prepare, Enable, Disable and De-prepare stream operations.
+
+  - Access of Stream data structure.
+
+Message lock
+============
+
+SoundWire message transfer lock. This mutex is part of
+Bus data structure (sdw_bus). This lock is used to serialize the message
+transfers (read/write) within a SoundWire Bus instance.
+
+Below examples show how locks are acquired.
+
+Example 1
+---------
+
+Message transfer.
+
+  1. For every message transfer
+
+     a. Acquire Message lock.
+
+     b. Transfer message (Read/Write) to Slave1 or broadcast message on
+        Bus in case of bank switch.
+
+     c. Release Message lock ::
+
+	+----------+                    +---------+
+	|          |                    |         |
+	|   Bus    |                    | Master  |
+	|          |                    | Driver  |
+	|          |                    |         |
+	+----+-----+                    +----+----+
+	     |                               |
+	     |     bus->ops->xfer_msg()      |
+	     <-------------------------------+   a. Acquire Message lock
+	     |                               |   b. Transfer message
+	     |                               |
+	     +------------------------------->   c. Release Message lock
+	     |    return success/error       |   d. Return success/error
+	     |                               |
+	     +                               +
+
+Example 2
+---------
+
+Prepare operation.
+
+  1. Acquire lock for Bus instance associated with Master 1.
+
+  2. For every message transfer in Prepare operation
+
+     a. Acquire Message lock.
+
+     b. Transfer message (Read/Write) to Slave1 or broadcast message on
+        Bus in case of bank switch.
+
+     c. Release Message lock.
+
+  3. Release lock for Bus instance associated with Master 1 ::
+
+	+----------+                    +---------+
+	|          |                    |         |
+	|   Bus    |                    | Master  |
+	|          |                    | Driver  |
+	|          |                    |         |
+	+----+-----+                    +----+----+
+	     |                               |
+	     |    sdw_prepare_stream()       |
+	     <-------------------------------+   1. Acquire bus lock
+	     |                               |   2. Perform stream prepare
+	     |                               |
+	     |                               |
+	     |     bus->ops->xfer_msg()      |
+	     <-------------------------------+   a. Acquire Message lock
+	     |                               |   b. Transfer message
+	     |                               |
+	     +------------------------------->   c. Release Message lock
+	     |    return success/error       |   d. Return success/error
+	     |                               |
+	     |                               |
+	     |    return success/error       |   3. Release bus lock
+	     +------------------------------->   4. Return success/error
+	     |                               |
+	     +                               +
diff --git a/Documentation/driver-api/soundwire/stream.rst b/Documentation/driver-api/soundwire/stream.rst
new file mode 100644
index 0000000..29121aa
--- /dev/null
+++ b/Documentation/driver-api/soundwire/stream.rst
@@ -0,0 +1,372 @@
+=========================
+Audio Stream in SoundWire
+=========================
+
+An audio stream is a logical or virtual connection created between
+
+  (1) System memory buffer(s) and Codec(s)
+
+  (2) DSP memory buffer(s) and Codec(s)
+
+  (3) FIFO(s) and Codec(s)
+
+  (4) Codec(s) and Codec(s)
+
+which is typically driven by a DMA(s) channel through the data link. An
+audio stream contains one or more channels of data. All channels within
+stream must have same sample rate and same sample size.
+
+Assume a stream with two channels (Left & Right) is opened using SoundWire
+interface. Below are some ways a stream can be represented in SoundWire.
+
+Stream Sample in memory (System memory, DSP memory or FIFOs) ::
+
+	-------------------------
+	| L | R | L | R | L | R |
+	-------------------------
+
+Example 1: Stereo Stream with L and R channels is rendered from Master to
+Slave. Both Master and Slave is using single port. ::
+
+	+---------------+                    Clock Signal  +---------------+
+	|    Master     +----------------------------------+     Slave     |
+	|   Interface   |                                  |   Interface   |
+	|               |                                  |       1       |
+	|               |                     Data Signal  |               |
+	|    L  +  R    +----------------------------------+    L  +  R    |
+	|     (Data)    |     Data Direction               |     (Data)    |
+	+---------------+  +----------------------->       +---------------+
+
+
+Example 2: Stereo Stream with L and R channels is captured from Slave to
+Master. Both Master and Slave is using single port. ::
+
+
+	+---------------+                    Clock Signal  +---------------+
+	|    Master     +----------------------------------+     Slave     |
+	|   Interface   |                                  |   Interface   |
+	|               |                                  |       1       |
+	|               |                     Data Signal  |               |
+	|    L  +  R    +----------------------------------+    L  +  R    |
+	|     (Data)    |     Data Direction               |     (Data)    |
+	+---------------+  <-----------------------+       +---------------+
+
+
+Example 3: Stereo Stream with L and R channels is rendered by Master. Each
+of the L and R channel is received by two different Slaves. Master and both
+Slaves are using single port. ::
+
+	+---------------+                    Clock Signal  +---------------+
+	|    Master     +---------+------------------------+     Slave     |
+	|   Interface   |         |                        |   Interface   |
+	|               |         |                        |       1       |
+	|               |         |           Data Signal  |               |
+	|    L  +  R    +---+------------------------------+       L       |
+	|     (Data)    |   |     |    Data Direction      |     (Data)    |
+	+---------------+   |     |   +------------->      +---------------+
+	                    |     |
+	                    |     |
+	                    |     |                        +---------------+
+	                    |     +----------------------> |     Slave     |
+	                    |                              |   Interface   |
+	                    |                              |       2       |
+	                    |                              |               |
+	                    +----------------------------> |       R       |
+	                                                   |     (Data)    |
+	                                                   +---------------+
+
+
+Example 4: Stereo Stream with L and R channel is rendered by two different
+Ports of the Master and is received by only single Port of the Slave
+interface. ::
+
+	+--------------------+
+	|                    |
+	|     +--------------+                             +----------------+
+	|     |             ||                             |                |
+	|     |  Data Port  ||  L Channel                  |                |
+	|     |      1      |------------+                 |                |
+	|     |  L Channel  ||           |                 +-----+----+     |
+	|     |   (Data)    ||           |   L + R Channel ||    Data |     |
+	| Master  +----------+           | +---+---------> ||    Port |     |
+	| Interface          |           |                 ||     1   |     |
+	|     +--------------+           |                 ||         |     |
+	|     |             ||           |                 +----------+     |
+	|     |  Data Port  |------------+                 |                |
+	|     |      2      ||  R Channel                  |     Slave      |
+	|     |  R Channel  ||                             |   Interface    |
+	|     |   (Data)    ||                             |       1        |
+	|     +--------------+         Clock Signal        |     L  +  R    |
+	|                    +---------------------------> |      (Data)    |
+	+--------------------+                             |                |
+							   +----------------+
+
+SoundWire Stream Management flow
+================================
+
+Stream definitions
+------------------
+
+  (1) Current stream: This is classified as the stream on which operation has
+      to be performed like prepare, enable, disable, de-prepare etc.
+
+  (2) Active stream: This is classified as the stream which is already active
+      on Bus other than current stream. There can be multiple active streams
+      on the Bus.
+
+SoundWire Bus manages stream operations for each stream getting
+rendered/captured on the SoundWire Bus. This section explains Bus operations
+done for each of the stream allocated/released on Bus. Following are the
+stream states maintained by the Bus for each of the audio stream.
+
+
+SoundWire stream states
+-----------------------
+
+Below shows the SoundWire stream states and state transition diagram. ::
+
+	+-----------+     +------------+     +----------+     +----------+
+	| ALLOCATED +---->| CONFIGURED +---->| PREPARED +---->| ENABLED  |
+	|   STATE   |     |    STATE   |     |  STATE   |     |  STATE   |
+	+-----------+     +------------+     +----------+     +----+-----+
+	                                                           ^
+	                                                           |
+	                                                           |
+	                                                           v
+	         +----------+           +------------+        +----+-----+
+	         | RELEASED |<----------+ DEPREPARED |<-------+ DISABLED |
+	         |  STATE   |           |   STATE    |        |  STATE   |
+	         +----------+           +------------+        +----------+
+
+NOTE: State transition between prepare and deprepare is supported in Spec
+but not in the software (subsystem)
+
+NOTE2: Stream state transition checks need to be handled by caller
+framework, for example ALSA/ASoC. No checks for stream transition exist in
+SoundWire subsystem.
+
+Stream State Operations
+-----------------------
+
+Below section explains the operations done by the Bus on Master(s) and
+Slave(s) as part of stream state transitions.
+
+SDW_STREAM_ALLOCATED
+~~~~~~~~~~~~~~~~~~~~
+
+Allocation state for stream. This is the entry state
+of the stream. Operations performed before entering in this state:
+
+  (1) A stream runtime is allocated for the stream. This stream
+      runtime is used as a reference for all the operations performed
+      on the stream.
+
+  (2) The resources required for holding stream runtime information are
+      allocated and initialized. This holds all stream related information
+      such as stream type (PCM/PDM) and parameters, Master and Slave
+      interface associated with the stream, stream state etc.
+
+After all above operations are successful, stream state is set to
+``SDW_STREAM_ALLOCATED``.
+
+Bus implements below API for allocate a stream which needs to be called once
+per stream. From ASoC DPCM framework, this stream state maybe linked to
+.startup() operation.
+
+  .. code-block:: c
+  int sdw_alloc_stream(char * stream_name);
+
+
+SDW_STREAM_CONFIGURED
+~~~~~~~~~~~~~~~~~~~~~
+
+Configuration state of stream. Operations performed before entering in
+this state:
+
+  (1) The resources allocated for stream information in SDW_STREAM_ALLOCATED
+      state are updated here. This includes stream parameters, Master(s)
+      and Slave(s) runtime information associated with current stream.
+
+  (2) All the Master(s) and Slave(s) associated with current stream provide
+      the port information to Bus which includes port numbers allocated by
+      Master(s) and Slave(s) for current stream and their channel mask.
+
+After all above operations are successful, stream state is set to
+``SDW_STREAM_CONFIGURED``.
+
+Bus implements below APIs for CONFIG state which needs to be called by
+the respective Master(s) and Slave(s) associated with stream. These APIs can
+only be invoked once by respective Master(s) and Slave(s). From ASoC DPCM
+framework, this stream state is linked to .hw_params() operation.
+
+  .. code-block:: c
+  int sdw_stream_add_master(struct sdw_bus * bus,
+		struct sdw_stream_config * stream_config,
+		struct sdw_ports_config * ports_config,
+		struct sdw_stream_runtime * stream);
+
+  int sdw_stream_add_slave(struct sdw_slave * slave,
+		struct sdw_stream_config * stream_config,
+		struct sdw_ports_config * ports_config,
+		struct sdw_stream_runtime * stream);
+
+
+SDW_STREAM_PREPARED
+~~~~~~~~~~~~~~~~~~~
+
+Prepare state of stream. Operations performed before entering in this state:
+
+  (1) Bus parameters such as bandwidth, frame shape, clock frequency,
+      are computed based on current stream as well as already active
+      stream(s) on Bus. Re-computation is required to accommodate current
+      stream on the Bus.
+
+  (2) Transport and port parameters of all Master(s) and Slave(s) port(s) are
+      computed for the current as well as already active stream based on frame
+      shape and clock frequency computed in step 1.
+
+  (3) Computed Bus and transport parameters are programmed in Master(s) and
+      Slave(s) registers. The banked registers programming is done on the
+      alternate bank (bank currently unused). Port(s) are enabled for the
+      already active stream(s) on the alternate bank (bank currently unused).
+      This is done in order to not disrupt already active stream(s).
+
+  (4) Once all the values are programmed, Bus initiates switch to alternate
+      bank where all new values programmed gets into effect.
+
+  (5) Ports of Master(s) and Slave(s) for current stream are prepared by
+      programming PrepareCtrl register.
+
+After all above operations are successful, stream state is set to
+``SDW_STREAM_PREPARED``.
+
+Bus implements below API for PREPARE state which needs to be called once per
+stream. From ASoC DPCM framework, this stream state is linked to
+.prepare() operation.
+
+  .. code-block:: c
+  int sdw_prepare_stream(struct sdw_stream_runtime * stream);
+
+
+SDW_STREAM_ENABLED
+~~~~~~~~~~~~~~~~~~
+
+Enable state of stream. The data port(s) are enabled upon entering this state.
+Operations performed before entering in this state:
+
+  (1) All the values computed in SDW_STREAM_PREPARED state are programmed
+      in alternate bank (bank currently unused). It includes programming of
+      already active stream(s) as well.
+
+  (2) All the Master(s) and Slave(s) port(s) for the current stream are
+      enabled on alternate bank (bank currently unused) by programming
+      ChannelEn register.
+
+  (3) Once all the values are programmed, Bus initiates switch to alternate
+      bank where all new values programmed gets into effect and port(s)
+      associated with current stream are enabled.
+
+After all above operations are successful, stream state is set to
+``SDW_STREAM_ENABLED``.
+
+Bus implements below API for ENABLE state which needs to be called once per
+stream. From ASoC DPCM framework, this stream state is linked to
+.trigger() start operation.
+
+  .. code-block:: c
+  int sdw_enable_stream(struct sdw_stream_runtime * stream);
+
+SDW_STREAM_DISABLED
+~~~~~~~~~~~~~~~~~~~
+
+Disable state of stream. The data port(s) are disabled upon exiting this state.
+Operations performed before entering in this state:
+
+  (1) All the Master(s) and Slave(s) port(s) for the current stream are
+      disabled on alternate bank (bank currently unused) by programming
+      ChannelEn register.
+
+  (2) All the current configuration of Bus and active stream(s) are programmed
+      into alternate bank (bank currently unused).
+
+  (3) Once all the values are programmed, Bus initiates switch to alternate
+      bank where all new values programmed gets into effect and port(s) associated
+      with current stream are disabled.
+
+After all above operations are successful, stream state is set to
+``SDW_STREAM_DISABLED``.
+
+Bus implements below API for DISABLED state which needs to be called once
+per stream. From ASoC DPCM framework, this stream state is linked to
+.trigger() stop operation.
+
+  .. code-block:: c
+  int sdw_disable_stream(struct sdw_stream_runtime * stream);
+
+
+SDW_STREAM_DEPREPARED
+~~~~~~~~~~~~~~~~~~~~~
+
+De-prepare state of stream. Operations performed before entering in this
+state:
+
+  (1) All the port(s) of Master(s) and Slave(s) for current stream are
+      de-prepared by programming PrepareCtrl register.
+
+  (2) The payload bandwidth of current stream is reduced from the total
+      bandwidth requirement of bus and new parameters calculated and
+      applied by performing bank switch etc.
+
+After all above operations are successful, stream state is set to
+``SDW_STREAM_DEPREPARED``.
+
+Bus implements below API for DEPREPARED state which needs to be called once
+per stream. From ASoC DPCM framework, this stream state is linked to
+.trigger() stop operation.
+
+  .. code-block:: c
+  int sdw_deprepare_stream(struct sdw_stream_runtime * stream);
+
+
+SDW_STREAM_RELEASED
+~~~~~~~~~~~~~~~~~~~
+
+Release state of stream. Operations performed before entering in this state:
+
+  (1) Release port resources for all Master(s) and Slave(s) port(s)
+      associated with current stream.
+
+  (2) Release Master(s) and Slave(s) runtime resources associated with
+      current stream.
+
+  (3) Release stream runtime resources associated with current stream.
+
+After all above operations are successful, stream state is set to
+``SDW_STREAM_RELEASED``.
+
+Bus implements below APIs for RELEASE state which needs to be called by
+all the Master(s) and Slave(s) associated with stream. From ASoC DPCM
+framework, this stream state is linked to .hw_free() operation.
+
+  .. code-block:: c
+  int sdw_stream_remove_master(struct sdw_bus * bus,
+		struct sdw_stream_runtime * stream);
+  int sdw_stream_remove_slave(struct sdw_slave * slave,
+		struct sdw_stream_runtime * stream);
+
+
+The .shutdown() ASoC DPCM operation calls below Bus API to release
+stream assigned as part of ALLOCATED state.
+
+In .shutdown() the data structure maintaining stream state are freed up.
+
+  .. code-block:: c
+  void sdw_release_stream(struct sdw_stream_runtime * stream);
+
+Not Supported
+=============
+
+1. A single port with multiple channels supported cannot be used between two
+streams or across stream. For example a port with 4 channels cannot be used
+to handle 2 independent stereo streams even though it's possible in theory
+in SoundWire.
diff --git a/Documentation/driver-api/uio-howto.rst b/Documentation/driver-api/uio-howto.rst
index 92056c2..fb2eb73 100644
--- a/Documentation/driver-api/uio-howto.rst
+++ b/Documentation/driver-api/uio-howto.rst
@@ -711,7 +711,8 @@
 
 If a subchannel is created by a request to host, then the uio_hv_generic
 device driver will create a sysfs binary file for the per-channel ring buffer.
-For example:
+For example::
+
 	/sys/bus/vmbus/devices/3811fe4d-0fa0-4b62-981a-74fc1084c757/channels/21/ring
 
 Further information
diff --git a/Documentation/driver-api/usb/dwc3.rst b/Documentation/driver-api/usb/dwc3.rst
index c3dc84a50..8b36ff1 100644
--- a/Documentation/driver-api/usb/dwc3.rst
+++ b/Documentation/driver-api/usb/dwc3.rst
@@ -674,9 +674,8 @@
   	__entry->flags & DWC3_EP_ENABLED ? 'E' : 'e',
   	__entry->flags & DWC3_EP_STALL ? 'S' : 's',
   	__entry->flags & DWC3_EP_WEDGE ? 'W' : 'w',
-  	__entry->flags & DWC3_EP_BUSY ? 'B' : 'b',
+  	__entry->flags & DWC3_EP_TRANSFER_STARTED ? 'B' : 'b',
   	__entry->flags & DWC3_EP_PENDING_REQUEST ? 'P' : 'p',
-  	__entry->flags & DWC3_EP_MISSED_ISOC ? 'M' : 'm',
   	__entry->flags & DWC3_EP_END_TRANSFER_PENDING ? 'E' : 'e',
   	__entry->direction ? '<' : '>'
   )
diff --git a/Documentation/features/core/BPF-JIT/arch-support.txt b/Documentation/features/core/BPF-JIT/arch-support.txt
deleted file mode 100644
index 0b96b4e..0000000
--- a/Documentation/features/core/BPF-JIT/arch-support.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# Feature name:          BPF-JIT
-#         Kconfig:       HAVE_BPF_JIT
-#         description:   arch supports BPF JIT optimizations
-#
-    -----------------------
-    |         arch |status|
-    -----------------------
-    |       alpha: | TODO |
-    |         arc: | TODO |
-    |         arm: |  ok  |
-    |       arm64: |  ok  |
-    |         c6x: | TODO |
-    |       h8300: | TODO |
-    |     hexagon: | TODO |
-    |        ia64: | TODO |
-    |        m68k: | TODO |
-    |  microblaze: | TODO |
-    |        mips: |  ok  |
-    |       nios2: | TODO |
-    |    openrisc: | TODO |
-    |      parisc: | TODO |
-    |     powerpc: |  ok  |
-    |        s390: |  ok  |
-    |          sh: | TODO |
-    |       sparc: |  ok  |
-    |          um: | TODO |
-    |   unicore32: | TODO |
-    |         x86: |  ok  |
-    |      xtensa: | TODO |
-    -----------------------
diff --git a/Documentation/features/core/cBPF-JIT/arch-support.txt b/Documentation/features/core/cBPF-JIT/arch-support.txt
new file mode 100644
index 0000000..90459cd
--- /dev/null
+++ b/Documentation/features/core/cBPF-JIT/arch-support.txt
@@ -0,0 +1,33 @@
+#
+# Feature name:          cBPF-JIT
+#         Kconfig:       HAVE_CBPF_JIT
+#         description:   arch supports cBPF JIT optimizations
+#
+    -----------------------
+    |         arch |status|
+    -----------------------
+    |       alpha: | TODO |
+    |         arc: | TODO |
+    |         arm: | TODO |
+    |       arm64: | TODO |
+    |         c6x: | TODO |
+    |       h8300: | TODO |
+    |     hexagon: | TODO |
+    |        ia64: | TODO |
+    |        m68k: | TODO |
+    |  microblaze: | TODO |
+    |        mips: |  ok  |
+    |       nds32: | TODO |
+    |       nios2: | TODO |
+    |    openrisc: | TODO |
+    |      parisc: | TODO |
+    |     powerpc: |  ok  |
+    |       riscv: | TODO |
+    |        s390: | TODO |
+    |          sh: | TODO |
+    |       sparc: |  ok  |
+    |          um: | TODO |
+    |   unicore32: | TODO |
+    |         x86: | TODO |
+    |      xtensa: | TODO |
+    -----------------------
diff --git a/Documentation/features/core/eBPF-JIT/arch-support.txt b/Documentation/features/core/eBPF-JIT/arch-support.txt
new file mode 100644
index 0000000..c90a038
--- /dev/null
+++ b/Documentation/features/core/eBPF-JIT/arch-support.txt
@@ -0,0 +1,33 @@
+#
+# Feature name:          eBPF-JIT
+#         Kconfig:       HAVE_EBPF_JIT
+#         description:   arch supports eBPF JIT optimizations
+#
+    -----------------------
+    |         arch |status|
+    -----------------------
+    |       alpha: | TODO |
+    |         arc: | TODO |
+    |         arm: |  ok  |
+    |       arm64: |  ok  |
+    |         c6x: | TODO |
+    |       h8300: | TODO |
+    |     hexagon: | TODO |
+    |        ia64: | TODO |
+    |        m68k: | TODO |
+    |  microblaze: | TODO |
+    |        mips: |  ok  |
+    |       nds32: | TODO |
+    |       nios2: | TODO |
+    |    openrisc: | TODO |
+    |      parisc: | TODO |
+    |     powerpc: |  ok  |
+    |       riscv: | TODO |
+    |        s390: |  ok  |
+    |          sh: | TODO |
+    |       sparc: |  ok  |
+    |          um: | TODO |
+    |   unicore32: | TODO |
+    |         x86: |  ok  |
+    |      xtensa: | TODO |
+    -----------------------
diff --git a/Documentation/features/core/generic-idle-thread/arch-support.txt b/Documentation/features/core/generic-idle-thread/arch-support.txt
index 372a2b1..0ef6acd 100644
--- a/Documentation/features/core/generic-idle-thread/arch-support.txt
+++ b/Documentation/features/core/generic-idle-thread/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
-    |    openrisc: | TODO |
+    |    openrisc: |  ok  |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/core/jump-labels/arch-support.txt b/Documentation/features/core/jump-labels/arch-support.txt
index ad97217..27cbd63a 100644
--- a/Documentation/features/core/jump-labels/arch-support.txt
+++ b/Documentation/features/core/jump-labels/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: |  ok  |
diff --git a/Documentation/features/core/tracehook/arch-support.txt b/Documentation/features/core/tracehook/arch-support.txt
index 36ee7be..f44c274 100644
--- a/Documentation/features/core/tracehook/arch-support.txt
+++ b/Documentation/features/core/tracehook/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: |  ok  |
     |       nios2: |  ok  |
     |    openrisc: |  ok  |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt
index f5c99fa..282ecc8 100644
--- a/Documentation/features/debug/KASAN/arch-support.txt
+++ b/Documentation/features/debug/KASAN/arch-support.txt
@@ -17,15 +17,17 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: | TODO |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
     |   unicore32: | TODO |
-    |         x86: |  ok  | 64-bit only
+    |         x86: |  ok  |
     |      xtensa: |  ok  |
     -----------------------
diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt
index 5170a993..01b2b30 100644
--- a/Documentation/features/debug/gcov-profile-all/arch-support.txt
+++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: | TODO |
diff --git a/Documentation/features/debug/kgdb/arch-support.txt b/Documentation/features/debug/kgdb/arch-support.txt
index 13b6e99..3b4dff2 100644
--- a/Documentation/features/debug/kgdb/arch-support.txt
+++ b/Documentation/features/debug/kgdb/arch-support.txt
@@ -11,16 +11,18 @@
     |         arm: |  ok  |
     |       arm64: |  ok  |
     |         c6x: | TODO |
-    |       h8300: | TODO |
+    |       h8300: |  ok  |
     |     hexagon: |  ok  |
     |        ia64: | TODO |
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: |  ok  |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
index 419bb38..7e963d0 100644
--- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
+++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/debug/kprobes/arch-support.txt b/Documentation/features/debug/kprobes/arch-support.txt
index 52b3ace..4ada027 100644
--- a/Documentation/features/debug/kprobes/arch-support.txt
+++ b/Documentation/features/debug/kprobes/arch-support.txt
@@ -9,7 +9,7 @@
     |       alpha: | TODO |
     |         arc: |  ok  |
     |         arm: |  ok  |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
     |         c6x: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/debug/kretprobes/arch-support.txt b/Documentation/features/debug/kretprobes/arch-support.txt
index 180d244..044e13f 100644
--- a/Documentation/features/debug/kretprobes/arch-support.txt
+++ b/Documentation/features/debug/kretprobes/arch-support.txt
@@ -9,7 +9,7 @@
     |       alpha: | TODO |
     |         arc: |  ok  |
     |         arm: |  ok  |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
     |         c6x: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/debug/optprobes/arch-support.txt b/Documentation/features/debug/optprobes/arch-support.txt
index 0a1241f..dce7669 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
-    |     powerpc: | TODO |
+    |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt
index 5700195..74b89a9 100644
--- a/Documentation/features/debug/stackprotector/arch-support.txt
+++ b/Documentation/features/debug/stackprotector/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: | TODO |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: |  ok  |
     |       sparc: | TODO |
diff --git a/Documentation/features/debug/uprobes/arch-support.txt b/Documentation/features/debug/uprobes/arch-support.txt
index 0b8d922..1a3f9d3 100644
--- a/Documentation/features/debug/uprobes/arch-support.txt
+++ b/Documentation/features/debug/uprobes/arch-support.txt
@@ -9,7 +9,7 @@
     |       alpha: | TODO |
     |         arc: | TODO |
     |         arm: |  ok  |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
     |         c6x: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -17,13 +17,15 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: | TODO |
-    |       sparc: | TODO |
+    |       sparc: |  ok  |
     |          um: | TODO |
     |   unicore32: | TODO |
     |         x86: |  ok  |
diff --git a/Documentation/features/debug/user-ret-profiler/arch-support.txt b/Documentation/features/debug/user-ret-profiler/arch-support.txt
index 13852ae..1d78d10 100644
--- a/Documentation/features/debug/user-ret-profiler/arch-support.txt
+++ b/Documentation/features/debug/user-ret-profiler/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: | TODO |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/io/dma-api-debug/arch-support.txt b/Documentation/features/io/dma-api-debug/arch-support.txt
deleted file mode 100644
index e438ed6..0000000
--- a/Documentation/features/io/dma-api-debug/arch-support.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# Feature name:          dma-api-debug
-#         Kconfig:       HAVE_DMA_API_DEBUG
-#         description:   arch supports DMA debug facilities
-#
-    -----------------------
-    |         arch |status|
-    -----------------------
-    |       alpha: | TODO |
-    |         arc: | TODO |
-    |         arm: |  ok  |
-    |       arm64: |  ok  |
-    |         c6x: |  ok  |
-    |       h8300: | TODO |
-    |     hexagon: | TODO |
-    |        ia64: |  ok  |
-    |        m68k: | TODO |
-    |  microblaze: |  ok  |
-    |        mips: |  ok  |
-    |       nios2: | TODO |
-    |    openrisc: | TODO |
-    |      parisc: | TODO |
-    |     powerpc: |  ok  |
-    |        s390: |  ok  |
-    |          sh: |  ok  |
-    |       sparc: |  ok  |
-    |          um: | TODO |
-    |   unicore32: | TODO |
-    |         x86: |  ok  |
-    |      xtensa: |  ok  |
-    -----------------------
diff --git a/Documentation/features/io/dma-contiguous/arch-support.txt b/Documentation/features/io/dma-contiguous/arch-support.txt
index 47f64a4..30c072d 100644
--- a/Documentation/features/io/dma-contiguous/arch-support.txt
+++ b/Documentation/features/io/dma-contiguous/arch-support.txt
@@ -17,11 +17,13 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: | TODO |
-    |        s390: | TODO |
+    |       riscv: |  ok  |
+    |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
diff --git a/Documentation/features/io/sg-chain/arch-support.txt b/Documentation/features/io/sg-chain/arch-support.txt
index 07f357f..6554f03 100644
--- a/Documentation/features/io/sg-chain/arch-support.txt
+++ b/Documentation/features/io/sg-chain/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: |  ok  |
diff --git a/Documentation/features/lib/strncasecmp/arch-support.txt b/Documentation/features/lib/strncasecmp/arch-support.txt
deleted file mode 100644
index 4f3a6a0..0000000
--- a/Documentation/features/lib/strncasecmp/arch-support.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# Feature name:          strncasecmp
-#         Kconfig:       __HAVE_ARCH_STRNCASECMP
-#         description:   arch provides an optimized strncasecmp() function
-#
-    -----------------------
-    |         arch |status|
-    -----------------------
-    |       alpha: | TODO |
-    |         arc: | TODO |
-    |         arm: | TODO |
-    |       arm64: | TODO |
-    |         c6x: | TODO |
-    |       h8300: | TODO |
-    |     hexagon: | TODO |
-    |        ia64: | TODO |
-    |        m68k: | TODO |
-    |  microblaze: | TODO |
-    |        mips: | TODO |
-    |       nios2: | TODO |
-    |    openrisc: | TODO |
-    |      parisc: | TODO |
-    |     powerpc: | TODO |
-    |        s390: | TODO |
-    |          sh: | TODO |
-    |       sparc: | TODO |
-    |          um: | TODO |
-    |   unicore32: | TODO |
-    |         x86: | TODO |
-    |      xtensa: | TODO |
-    -----------------------
diff --git a/Documentation/features/locking/cmpxchg-local/arch-support.txt b/Documentation/features/locking/cmpxchg-local/arch-support.txt
index 482a0b0..51704a2 100644
--- a/Documentation/features/locking/cmpxchg-local/arch-support.txt
+++ b/Documentation/features/locking/cmpxchg-local/arch-support.txt
@@ -9,7 +9,7 @@
     |       alpha: | TODO |
     |         arc: | TODO |
     |         arm: | TODO |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
     |         c6x: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: | TODO |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt
index bb35c5b..bd39c5e 100644
--- a/Documentation/features/locking/lockdep/arch-support.txt
+++ b/Documentation/features/locking/lockdep/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
+    |       nds32: |  ok  |
     |       nios2: | TODO |
-    |    openrisc: | TODO |
+    |    openrisc: |  ok  |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/locking/queued-rwlocks/arch-support.txt b/Documentation/features/locking/queued-rwlocks/arch-support.txt
index 627e9a6..da7aff3 100644
--- a/Documentation/features/locking/queued-rwlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-rwlocks/arch-support.txt
@@ -9,21 +9,23 @@
     |       alpha: | TODO |
     |         arc: | TODO |
     |         arm: | TODO |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
     |         c6x: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
     |        m68k: | TODO |
     |  microblaze: | TODO |
-    |        mips: | TODO |
+    |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
-    |    openrisc: | TODO |
+    |    openrisc: |  ok  |
     |      parisc: | TODO |
     |     powerpc: | TODO |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
-    |       sparc: | TODO |
+    |       sparc: |  ok  |
     |          um: | TODO |
     |   unicore32: | TODO |
     |         x86: |  ok  |
diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
index 9edda21..478e910 100644
--- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
@@ -16,14 +16,16 @@
     |        ia64: | TODO |
     |        m68k: | TODO |
     |  microblaze: | TODO |
-    |        mips: | TODO |
+    |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
-    |    openrisc: | TODO |
+    |    openrisc: |  ok  |
     |      parisc: | TODO |
     |     powerpc: | TODO |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
-    |       sparc: | TODO |
+    |       sparc: |  ok  |
     |          um: | TODO |
     |   unicore32: | TODO |
     |         x86: |  ok  |
diff --git a/Documentation/features/locking/rwsem-optimized/arch-support.txt b/Documentation/features/locking/rwsem-optimized/arch-support.txt
index 8d9afb1..e54b1f1 100644
--- a/Documentation/features/locking/rwsem-optimized/arch-support.txt
+++ b/Documentation/features/locking/rwsem-optimized/arch-support.txt
@@ -1,6 +1,6 @@
 #
 # Feature name:          rwsem-optimized
-#         Kconfig:       Optimized asm/rwsem.h
+#         Kconfig:       !RWSEM_GENERIC_SPINLOCK
 #         description:   arch provides optimized rwsem APIs
 #
     -----------------------
@@ -8,8 +8,8 @@
     -----------------------
     |       alpha: |  ok  |
     |         arc: | TODO |
-    |         arm: | TODO |
-    |       arm64: | TODO |
+    |         arm: |  ok  |
+    |       arm64: |  ok  |
     |         c6x: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -17,14 +17,16 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: | TODO |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
-    |          um: | TODO |
+    |          um: |  ok  |
     |   unicore32: | TODO |
     |         x86: |  ok  |
     |      xtensa: |  ok  |
diff --git a/Documentation/features/perf/kprobes-event/arch-support.txt b/Documentation/features/perf/kprobes-event/arch-support.txt
index d01239e..7331402 100644
--- a/Documentation/features/perf/kprobes-event/arch-support.txt
+++ b/Documentation/features/perf/kprobes-event/arch-support.txt
@@ -9,7 +9,7 @@
     |       alpha: | TODO |
     |         arc: | TODO |
     |         arm: |  ok  |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
     |         c6x: | TODO |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
@@ -17,13 +17,15 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: |  ok  |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: |  ok  |
-    |       sparc: | TODO |
+    |       sparc: |  ok  |
     |          um: | TODO |
     |   unicore32: | TODO |
     |         x86: |  ok  |
diff --git a/Documentation/features/perf/perf-regs/arch-support.txt b/Documentation/features/perf/perf-regs/arch-support.txt
index 458faba..53feeee 100644
--- a/Documentation/features/perf/perf-regs/arch-support.txt
+++ b/Documentation/features/perf/perf-regs/arch-support.txt
@@ -17,11 +17,13 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |        s390: | TODO |
+    |       riscv: | TODO |
+    |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
diff --git a/Documentation/features/perf/perf-stackdump/arch-support.txt b/Documentation/features/perf/perf-stackdump/arch-support.txt
index 545d01c..1616434 100644
--- a/Documentation/features/perf/perf-stackdump/arch-support.txt
+++ b/Documentation/features/perf/perf-stackdump/arch-support.txt
@@ -17,11 +17,13 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |        s390: | TODO |
+    |       riscv: | TODO |
+    |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
     |          um: | TODO |
diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
index 85a6c9d..dbdf629 100644
--- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt
+++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
@@ -40,10 +40,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: | TODO |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/sched/numa-balancing/arch-support.txt b/Documentation/features/sched/numa-balancing/arch-support.txt
index 3475088..c68bb2c 100644
--- a/Documentation/features/sched/numa-balancing/arch-support.txt
+++ b/Documentation/features/sched/numa-balancing/arch-support.txt
@@ -9,7 +9,7 @@
     |       alpha: | TODO |
     |         arc: |  ..  |
     |         arm: |  ..  |
-    |       arm64: |  ..  |
+    |       arm64: |  ok  |
     |         c6x: |  ..  |
     |       h8300: |  ..  |
     |     hexagon: |  ..  |
@@ -17,11 +17,13 @@
     |        m68k: |  ..  |
     |  microblaze: |  ..  |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: |  ..  |
     |    openrisc: |  ..  |
     |      parisc: |  ..  |
     |     powerpc: |  ok  |
-    |        s390: |  ..  |
+    |       riscv: | TODO |
+    |        s390: |  ok  |
     |          sh: |  ..  |
     |       sparc: | TODO |
     |          um: |  ..  |
diff --git a/Documentation/features/scripts/features-refresh.sh b/Documentation/features/scripts/features-refresh.sh
new file mode 100755
index 0000000..9e72d38
--- /dev/null
+++ b/Documentation/features/scripts/features-refresh.sh
@@ -0,0 +1,98 @@
+#
+# Small script that refreshes the kernel feature support status in place.
+#
+
+for F_FILE in Documentation/features/*/*/arch-support.txt; do
+	F=$(grep "^#         Kconfig:" "$F_FILE" | cut -c26-)
+
+	#
+	# Each feature F is identified by a pair (O, K), where 'O' can
+	# be either the empty string (for 'nop') or "not" (the logical
+	# negation operator '!'); other operators are not supported.
+	#
+	O=""
+	K=$F
+	if [[ "$F" == !* ]]; then
+		O="not"
+		K=$(echo $F | sed -e 's/^!//g')
+	fi
+
+	#
+	# F := (O, K) is 'valid' iff there is a Kconfig file (for some
+	# arch) which contains K.
+	#
+	# Notice that this definition entails an 'asymmetry' between
+	# the case 'O = ""' and the case 'O = "not"'. E.g., F may be
+	# _invalid_ if:
+	#
+	# [case 'O = ""']
+	#   1) no arch provides support for F,
+	#   2) K does not exist (e.g., it was renamed/mis-typed);
+	#
+	# [case 'O = "not"']
+	#   3) all archs provide support for F,
+	#   4) as in (2).
+	#
+	# The rationale for adopting this definition (and, thus, for
+	# keeping the asymmetry) is:
+	#
+	#       We want to be able to 'detect' (2) (or (4)).
+	#
+	# (1) and (3) may further warn the developers about the fact
+	# that K can be removed.
+	#
+	F_VALID="false"
+	for ARCH_DIR in arch/*/; do
+		K_FILES=$(find $ARCH_DIR -name "Kconfig*")
+		K_GREP=$(grep "$K" $K_FILES)
+		if [ ! -z "$K_GREP" ]; then
+			F_VALID="true"
+			break
+		fi
+	done
+	if [ "$F_VALID" = "false" ]; then
+		printf "WARNING: '%s' is not a valid Kconfig\n" "$F"
+	fi
+
+	T_FILE="$F_FILE.tmp"
+	grep "^#" $F_FILE > $T_FILE
+	echo "    -----------------------" >> $T_FILE
+	echo "    |         arch |status|" >> $T_FILE
+	echo "    -----------------------" >> $T_FILE
+	for ARCH_DIR in arch/*/; do
+		ARCH=$(echo $ARCH_DIR | sed -e 's/arch//g' | sed -e 's/\///g')
+		K_FILES=$(find $ARCH_DIR -name "Kconfig*")
+		K_GREP=$(grep "$K" $K_FILES)
+		#
+		# Arch support status values for (O, K) are updated according
+		# to the following rules.
+		#
+		#   - ("", K) is 'supported by a given arch', if there is a
+		#     Kconfig file for that arch which contains K;
+		#
+		#   - ("not", K) is 'supported by a given arch', if there is
+		#     no Kconfig file for that arch which contains K;
+		#
+		#   - otherwise: preserve the previous status value (if any),
+		#                default to 'not yet supported'.
+		#
+		# Notice that, according these rules, invalid features may be
+		# updated/modified.
+		#
+		if [ "$O" = "" ] && [ ! -z "$K_GREP" ]; then
+			printf "    |%12s: |  ok  |\n" "$ARCH" >> $T_FILE
+		elif [ "$O" = "not" ] && [ -z "$K_GREP" ]; then
+			printf "    |%12s: |  ok  |\n" "$ARCH" >> $T_FILE
+		else
+			S=$(grep -v "^#" "$F_FILE" | grep " $ARCH:")
+			if [ ! -z "$S" ]; then
+				echo "$S" >> $T_FILE
+			else
+				printf "    |%12s: | TODO |\n" "$ARCH" \
+					>> $T_FILE
+			fi
+		fi
+	done
+	echo "    -----------------------" >> $T_FILE
+	mv $T_FILE $F_FILE
+done
diff --git a/Documentation/features/seccomp/seccomp-filter/arch-support.txt b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
index e4fad58..d4271b4 100644
--- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt
+++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
-    |      parisc: | TODO |
-    |     powerpc: | TODO |
+    |      parisc: |  ok  |
+    |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/time/arch-tick-broadcast/arch-support.txt b/Documentation/features/time/arch-tick-broadcast/arch-support.txt
index 8052904..83d9e68 100644
--- a/Documentation/features/time/arch-tick-broadcast/arch-support.txt
+++ b/Documentation/features/time/arch-tick-broadcast/arch-support.txt
@@ -17,12 +17,14 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: | TODO |
-    |          sh: | TODO |
+    |          sh: |  ok  |
     |       sparc: | TODO |
     |          um: | TODO |
     |   unicore32: | TODO |
diff --git a/Documentation/features/time/clockevents/arch-support.txt b/Documentation/features/time/clockevents/arch-support.txt
index 7c76b94..3d4908f 100644
--- a/Documentation/features/time/clockevents/arch-support.txt
+++ b/Documentation/features/time/clockevents/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: |  ok  |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
+    |       nds32: |  ok  |
     |       nios2: |  ok  |
     |    openrisc: |  ok  |
-    |      parisc: | TODO |
+    |      parisc: |  ok  |
     |     powerpc: |  ok  |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt
index 9433b3e..c29974a 100644
--- a/Documentation/features/time/context-tracking/arch-support.txt
+++ b/Documentation/features/time/context-tracking/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: |  ok  |
diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt
index 212dde0..8d73c46 100644
--- a/Documentation/features/time/irq-time-acct/arch-support.txt
+++ b/Documentation/features/time/irq-time-acct/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: |  ..  |
-    |     powerpc: |  ..  |
+    |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ..  |
     |          sh: | TODO |
     |       sparc: |  ..  |
diff --git a/Documentation/features/time/modern-timekeeping/arch-support.txt b/Documentation/features/time/modern-timekeeping/arch-support.txt
index 4074028..e7c6ea6 100644
--- a/Documentation/features/time/modern-timekeeping/arch-support.txt
+++ b/Documentation/features/time/modern-timekeeping/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
+    |       nds32: |  ok  |
     |       nios2: |  ok  |
     |    openrisc: |  ok  |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/time/virt-cpuacct/arch-support.txt b/Documentation/features/time/virt-cpuacct/arch-support.txt
index a394d88..4646457 100644
--- a/Documentation/features/time/virt-cpuacct/arch-support.txt
+++ b/Documentation/features/time/virt-cpuacct/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: |  ok  |
diff --git a/Documentation/features/vm/ELF-ASLR/arch-support.txt b/Documentation/features/vm/ELF-ASLR/arch-support.txt
index 082f93d..1f71d09 100644
--- a/Documentation/features/vm/ELF-ASLR/arch-support.txt
+++ b/Documentation/features/vm/ELF-ASLR/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
-    |      parisc: | TODO |
+    |      parisc: |  ok  |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/vm/PG_uncached/arch-support.txt b/Documentation/features/vm/PG_uncached/arch-support.txt
index 605e0ab..fbd5aa4 100644
--- a/Documentation/features/vm/PG_uncached/arch-support.txt
+++ b/Documentation/features/vm/PG_uncached/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: | TODO |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/vm/THP/arch-support.txt b/Documentation/features/vm/THP/arch-support.txt
index 7a8eb0b..5d7ecc3 100644
--- a/Documentation/features/vm/THP/arch-support.txt
+++ b/Documentation/features/vm/THP/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: |  ..  |
     |  microblaze: |  ..  |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: |  ..  |
     |    openrisc: |  ..  |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: |  ..  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/vm/TLB/arch-support.txt b/Documentation/features/vm/TLB/arch-support.txt
index 35fb99b..f7af967 100644
--- a/Documentation/features/vm/TLB/arch-support.txt
+++ b/Documentation/features/vm/TLB/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: |  ..  |
     |  microblaze: |  ..  |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: |  ..  |
     |    openrisc: |  ..  |
     |      parisc: | TODO |
     |     powerpc: | TODO |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt
index ed8b943..d0713cc 100644
--- a/Documentation/features/vm/huge-vmap/arch-support.txt
+++ b/Documentation/features/vm/huge-vmap/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: | TODO |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt
index 589947b..8527601 100644
--- a/Documentation/features/vm/ioremap_prot/arch-support.txt
+++ b/Documentation/features/vm/ioremap_prot/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: |  ok  |
     |       sparc: | TODO |
diff --git a/Documentation/features/vm/numa-memblock/arch-support.txt b/Documentation/features/vm/numa-memblock/arch-support.txt
index 8b8bea0..1a98805 100644
--- a/Documentation/features/vm/numa-memblock/arch-support.txt
+++ b/Documentation/features/vm/numa-memblock/arch-support.txt
@@ -9,7 +9,7 @@
     |       alpha: | TODO |
     |         arc: |  ..  |
     |         arm: |  ..  |
-    |       arm64: |  ..  |
+    |       arm64: |  ok  |
     |         c6x: |  ..  |
     |       h8300: |  ..  |
     |     hexagon: |  ..  |
@@ -17,10 +17,12 @@
     |        m68k: |  ..  |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
+    |       nds32: | TODO |
     |       nios2: |  ..  |
     |    openrisc: |  ..  |
     |      parisc: |  ..  |
     |     powerpc: |  ok  |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt
index 055004f..6a608a6 100644
--- a/Documentation/features/vm/pte_special/arch-support.txt
+++ b/Documentation/features/vm/pte_special/arch-support.txt
@@ -17,10 +17,12 @@
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
+    |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
+    |       riscv: | TODO |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 75d2d57..2c39133 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -69,31 +69,31 @@
 
 locking rules:
 	all may block
-		i_mutex(inode)
-lookup:		yes
-create:		yes
-link:		yes (both)
-mknod:		yes
-symlink:	yes
-mkdir:		yes
-unlink:		yes (both)
-rmdir:		yes (both)	(see below)
-rename:	yes (all)	(see below)
+		i_rwsem(inode)
+lookup:		shared
+create:		exclusive
+link:		exclusive (both)
+mknod:		exclusive
+symlink:	exclusive
+mkdir:		exclusive
+unlink:		exclusive (both)
+rmdir:		exclusive (both)(see below)
+rename:		exclusive (all)	(see below)
 readlink:	no
 get_link:	no
-setattr:	yes
+setattr:	exclusive
 permission:	no (may not block if called in rcu-walk mode)
 get_acl:	no
 getattr:	no
 listxattr:	no
 fiemap:		no
 update_time:	no
-atomic_open:	yes
+atomic_open:	exclusive
 tmpfile:	no
 
 
-	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
-victim.
+	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
+	exclusive on victim.
 	cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
 
 See Documentation/filesystems/directory-locking for more detailed discussion
@@ -111,10 +111,10 @@
 
 locking rules:
 	all may block
-		i_mutex(inode)
+		i_rwsem(inode)
 list:		no
 get:		no
-set:		yes
+set:		exclusive
 
 --------------------------- super_operations ---------------------------
 prototypes:
@@ -217,14 +217,14 @@
 locking rules:
 	All except set_page_dirty and freepage may block
 
-			PageLocked(page)	i_mutex
+			PageLocked(page)	i_rwsem
 writepage:		yes, unlocks (see below)
 readpage:		yes, unlocks
 writepages:
 set_page_dirty		no
 readpages:
-write_begin:		locks the page		yes
-write_end:		yes, unlocks		yes
+write_begin:		locks the page		exclusive
+write_end:		yes, unlocks		exclusive
 bmap:
 invalidatepage:		yes
 releasepage:		yes
@@ -439,7 +439,10 @@
 	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
 	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
-	unsigned int (*poll) (struct file *, struct poll_table_struct *);
+	int (*iterate_shared) (struct file *, struct dir_context *);
+	__poll_t (*poll) (struct file *, struct poll_table_struct *);
+	struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
+	__poll_t (*poll_mask) (struct file *, __poll_t);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
@@ -470,7 +473,7 @@
 };
 
 locking rules:
-	All may block.
+	All except for ->poll_mask may block.
 
 ->llseek() locking has moved from llseek to the individual llseek
 implementations.  If your fs is not using generic_file_llseek, you
@@ -480,6 +483,10 @@
 Note: this does not protect the file->f_pos against concurrent modifications
 since this is something the userspace has to take care about.
 
+->iterate() is called with i_rwsem exclusive.
+
+->iterate_shared() is called with i_rwsem at least shared.
+
 ->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
 Most instances call fasync_helper(), which does that maintenance, so it's
 not normally something one needs to worry about.  Return values > 0 will be
@@ -498,6 +505,9 @@
 the lease within the individual filesystem to record the result of the
 operation
 
+->poll_mask can be called with or without the waitqueue lock for the waitqueue
+returned from ->get_poll_head.
+
 --------------------------- dquot_operations -------------------------------
 prototypes:
 	int (*write_dquot) (struct dquot *);
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index cfbc18f..48b424d 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -191,11 +191,21 @@
 
 - AES-256-XTS for contents and AES-256-CTS-CBC for filenames
 - AES-128-CBC for contents and AES-128-CTS-CBC for filenames
+- Speck128/256-XTS for contents and Speck128/256-CTS-CBC for filenames
 
 It is strongly recommended to use AES-256-XTS for contents encryption.
 AES-128-CBC was added only for low-powered embedded devices with
 crypto accelerators such as CAAM or CESA that do not support XTS.
 
+Similarly, Speck128/256 support was only added for older or low-end
+CPUs which cannot do AES fast enough -- especially ARM CPUs which have
+NEON instructions but not the Cryptography Extensions -- and for which
+it would not otherwise be feasible to use encryption at all.  It is
+not recommended to use Speck on CPUs that have AES instructions.
+Speck support is only available if it has been enabled in the crypto
+API via CONFIG_CRYPTO_SPECK.  Also, on ARM platforms, to get
+acceptable performance CONFIG_CRYPTO_SPECK_NEON must be enabled.
+
 New encryption modes can be added relatively easily, without changes
 to individual filesystems.  However, authenticated encryption (AE)
 modes are not currently supported because of the difficulty of dealing
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 2a84bb3..520f6a8 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -515,7 +515,8 @@
 
 The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
 bits on both physical and virtual pages associated with a process, and the
-soft-dirty bit on pte (see Documentation/vm/soft-dirty.txt for details).
+soft-dirty bit on pte (see Documentation/admin-guide/mm/soft-dirty.rst
+for details).
 To clear the bits for all the pages associated with the process
     > echo 1 > /proc/PID/clear_refs
 
@@ -536,7 +537,8 @@
 
 The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags
 using /proc/kpageflags and number of times a page is mapped using
-/proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt.
+/proc/kpagecount. For detailed explanation, see
+Documentation/admin-guide/mm/pagemap.rst.
 
 The /proc/pid/numa_maps is an extension based on maps, showing the memory
 locality and binding policy, as well as the memory usage (in pages) of
@@ -564,7 +566,7 @@
 
 Where:
 "address" is the starting address for the mapping;
-"policy" reports the NUMA memory policy set for the mapping (see vm/numa_memory_policy.txt);
+"policy" reports the NUMA memory policy set for the mapping (see Documentation/admin-guide/mm/numa_memory_policy.rst);
 "mapping details" summarizes mapping data such as mapping type, page usage counters,
 node locality page counters (N0 == node0, N1 == node1, ...) and the kernel page
 size, in KB, that is backing the mapping up.
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index a85355c..d06e9a5 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -105,8 +105,9 @@
 NUMA memory allocation policies have optional flags that can be used in
 conjunction with their modes.  These optional flags can be specified
 when tmpfs is mounted by appending them to the mode before the NodeList.
-See Documentation/vm/numa_memory_policy.txt for a list of all available
-memory allocation policy mode flags and their effect on memory policy.
+See Documentation/admin-guide/mm/numa_memory_policy.rst for a list of
+all available memory allocation policy mode flags and their effect on
+memory policy.
 
 	=static		is equivalent to	MPOL_F_STATIC_NODES
 	=relative	is equivalent to	MPOL_F_RELATIVE_NODES
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 5fd325d..829a7b7 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -856,7 +856,9 @@
 	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
 	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
-	unsigned int (*poll) (struct file *, struct poll_table_struct *);
+	__poll_t (*poll) (struct file *, struct poll_table_struct *);
+	struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
+	__poll_t (*poll_mask) (struct file *, __poll_t);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
@@ -901,6 +903,17 @@
 	activity on this file and (optionally) go to sleep until there
 	is activity. Called by the select(2) and poll(2) system calls
 
+  get_poll_head: Returns the struct wait_queue_head that callers can
+  wait on.  Callers need to check the returned events using ->poll_mask
+  once woken.  Can return NULL to indicate polling is not supported,
+  or any error code using the ERR_PTR convention to indicate that a
+  grave error occured and ->poll_mask shall not be called.
+
+  poll_mask: return the mask of EPOLL* values describing the file descriptor
+  state.  Called either before going to sleep on the waitqueue returned by
+  get_poll_head, or after it has been woken.  If ->get_poll_head and
+  ->poll_mask are implemented ->poll does not need to be implement.
+
   unlocked_ioctl: called by the ioctl(2) system call.
 
   compat_ioctl: called by the ioctl(2) system call when 32 bit system calls
diff --git a/Documentation/fpga/fpga-mgr.txt b/Documentation/fpga/fpga-mgr.txt
deleted file mode 100644
index cc6413e..0000000
--- a/Documentation/fpga/fpga-mgr.txt
+++ /dev/null
@@ -1,199 +0,0 @@
-FPGA Manager Core
-
-Alan Tull 2015
-
-Overview
-========
-
-The FPGA manager core exports a set of functions for programming an FPGA with
-an image.  The API is manufacturer agnostic.  All manufacturer specifics are
-hidden away in a low level driver which registers a set of ops with the core.
-The FPGA image data itself is very manufacturer specific, but for our purposes
-it's just binary data.  The FPGA manager core won't parse it.
-
-The FPGA image to be programmed can be in a scatter gather list, a single
-contiguous buffer, or a firmware file.  Because allocating contiguous kernel
-memory for the buffer should be avoided, users are encouraged to use a scatter
-gather list instead if possible.
-
-The particulars for programming the image are presented in a structure (struct
-fpga_image_info).  This struct contains parameters such as pointers to the
-FPGA image as well as image-specific particulars such as whether the image was
-built for full or partial reconfiguration.
-
-API Functions:
-==============
-
-To program the FPGA:
---------------------
-
-	int fpga_mgr_load(struct fpga_manager *mgr,
-			  struct fpga_image_info *info);
-
-Load the FPGA from an image which is indicated in the info.  If successful,
-the FPGA ends up in operating mode.  Return 0 on success or a negative error
-code.
-
-To allocate or free a struct fpga_image_info:
----------------------------------------------
-
-	struct fpga_image_info *fpga_image_info_alloc(struct device *dev);
-
-	void fpga_image_info_free(struct fpga_image_info *info);
-
-To get/put a reference to a FPGA manager:
------------------------------------------
-
-	struct fpga_manager *of_fpga_mgr_get(struct device_node *node);
-	struct fpga_manager *fpga_mgr_get(struct device *dev);
-	void fpga_mgr_put(struct fpga_manager *mgr);
-
-Given a DT node or device, get a reference to a FPGA manager.  This pointer
-can be saved until you are ready to program the FPGA.  fpga_mgr_put releases
-the reference.
-
-
-To get exclusive control of a FPGA manager:
--------------------------------------------
-
-	int fpga_mgr_lock(struct fpga_manager *mgr);
-	void fpga_mgr_unlock(struct fpga_manager *mgr);
-
-The user should call fpga_mgr_lock and verify that it returns 0 before
-attempting to program the FPGA.  Likewise, the user should call
-fpga_mgr_unlock when done programming the FPGA.
-
-
-To register or unregister the low level FPGA-specific driver:
--------------------------------------------------------------
-
-	int fpga_mgr_register(struct device *dev, const char *name,
-			      const struct fpga_manager_ops *mops,
-			      void *priv);
-
-	void fpga_mgr_unregister(struct device *dev);
-
-Use of these two functions is described below in "How To Support a new FPGA
-device."
-
-
-How to write an image buffer to a supported FPGA
-================================================
-#include <linux/fpga/fpga-mgr.h>
-
-struct fpga_manager *mgr;
-struct fpga_image_info *info;
-int ret;
-
-/*
- * Get a reference to FPGA manager.  The manager is not locked, so you can
- * hold onto this reference without it preventing programming.
- *
- * This example uses the device node of the manager.  Alternatively, use
- * fpga_mgr_get(dev) instead if you have the device.
- */
-mgr = of_fpga_mgr_get(mgr_node);
-
-/* struct with information about the FPGA image to program. */
-info = fpga_image_info_alloc(dev);
-
-/* flags indicates whether to do full or partial reconfiguration */
-info->flags = FPGA_MGR_PARTIAL_RECONFIG;
-
-/*
- * At this point, indicate where the image is. This is pseudo-code; you're
- * going to use one of these three.
- */
-if (image is in a scatter gather table) {
-
-	info->sgt = [your scatter gather table]
-
-} else if (image is in a buffer) {
-
-	info->buf = [your image buffer]
-	info->count = [image buffer size]
-
-} else if (image is in a firmware file) {
-
-	info->firmware_name = devm_kstrdup(dev, firmware_name, GFP_KERNEL);
-
-}
-
-/* Get exclusive control of FPGA manager */
-ret = fpga_mgr_lock(mgr);
-
-/* Load the buffer to the FPGA */
-ret = fpga_mgr_buf_load(mgr, &info, buf, count);
-
-/* Release the FPGA manager */
-fpga_mgr_unlock(mgr);
-fpga_mgr_put(mgr);
-
-/* Deallocate the image info if you're done with it */
-fpga_image_info_free(info);
-
-How to support a new FPGA device
-================================
-To add another FPGA manager, write a driver that implements a set of ops.  The
-probe function calls fpga_mgr_register(), such as:
-
-static const struct fpga_manager_ops socfpga_fpga_ops = {
-       .write_init = socfpga_fpga_ops_configure_init,
-       .write = socfpga_fpga_ops_configure_write,
-       .write_complete = socfpga_fpga_ops_configure_complete,
-       .state = socfpga_fpga_ops_state,
-};
-
-static int socfpga_fpga_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct socfpga_fpga_priv *priv;
-	int ret;
-
-	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	/* ... do ioremaps, get interrupts, etc. and save
-	   them in priv... */
-
-	return fpga_mgr_register(dev, "Altera SOCFPGA FPGA Manager",
-				 &socfpga_fpga_ops, priv);
-}
-
-static int socfpga_fpga_remove(struct platform_device *pdev)
-{
-	fpga_mgr_unregister(&pdev->dev);
-
-	return 0;
-}
-
-
-The ops will implement whatever device specific register writes are needed to
-do the programming sequence for this particular FPGA.  These ops return 0 for
-success or negative error codes otherwise.
-
-The programming sequence is:
- 1. .write_init
- 2. .write or .write_sg (may be called once or multiple times)
- 3. .write_complete
-
-The .write_init function will prepare the FPGA to receive the image data.  The
-buffer passed into .write_init will be atmost .initial_header_size bytes long,
-if the whole bitstream is not immediately available then the core code will
-buffer up at least this much before starting.
-
-The .write function writes a buffer to the FPGA. The buffer may be contain the
-whole FPGA image or may be a smaller chunk of an FPGA image.  In the latter
-case, this function is called multiple times for successive chunks. This interface
-is suitable for drivers which use PIO.
-
-The .write_sg version behaves the same as .write except the input is a sg_table
-scatter list. This interface is suitable for drivers which use DMA.
-
-The .write_complete function is called after all the image has been written
-to put the FPGA into operating mode.
-
-The ops include a .state function which will read the hardware FPGA manager and
-return a code of type enum fpga_mgr_states.  It doesn't result in a change in
-hardware state.
diff --git a/Documentation/fpga/fpga-region.txt b/Documentation/fpga/fpga-region.txt
deleted file mode 100644
index 139a02b..0000000
--- a/Documentation/fpga/fpga-region.txt
+++ /dev/null
@@ -1,95 +0,0 @@
-FPGA Regions
-
-Alan Tull 2017
-
-CONTENTS
- - Introduction
- - The FPGA region API
- - Usage example
-
-Introduction
-============
-
-This document is meant to be an brief overview of the FPGA region API usage.  A
-more conceptual look at regions can be found in [1].
-
-For the purposes of this API document, let's just say that a region associates
-an FPGA Manager and a bridge (or bridges) with a reprogrammable region of an
-FPGA or the whole FPGA.  The API provides a way to register a region and to
-program a region.
-
-Currently the only layer above fpga-region.c in the kernel is the Device Tree
-support (of-fpga-region.c) described in [1].  The DT support layer uses regions
-to program the FPGA and then DT to handle enumeration.  The common region code
-is intended to be used by other schemes that have other ways of accomplishing
-enumeration after programming.
-
-An fpga-region can be set up to know the following things:
-* which FPGA manager to use to do the programming
-* which bridges to disable before programming and enable afterwards.
-
-Additional info needed to program the FPGA image is passed in the struct
-fpga_image_info [2] including:
-* pointers to the image as either a scatter-gather buffer, a contiguous
-  buffer, or the name of firmware file
-* flags indicating specifics such as whether the image if for partial
-  reconfiguration.
-
-===================
-The FPGA region API
-===================
-
-To register or unregister a region:
------------------------------------
-
-	int fpga_region_register(struct device *dev,
-				 struct fpga_region *region);
-	int fpga_region_unregister(struct fpga_region *region);
-
-An example of usage can be seen in the probe function of [3]
-
-To program an FPGA:
--------------------
-	int fpga_region_program_fpga(struct fpga_region *region);
-
-This function operates on info passed in the fpga_image_info
-(region->info).
-
-This function will attempt to:
- * lock the region's mutex
- * lock the region's FPGA manager
- * build a list of FPGA bridges if a method has been specified to do so
- * disable the bridges
- * program the FPGA
- * re-enable the bridges
- * release the locks
-
-=============
-Usage example
-=============
-
-First, allocate the info struct:
-
-	info = fpga_image_info_alloc(dev);
-	if (!info)
-		return -ENOMEM;
-
-Set flags as needed, i.e.
-
-	info->flags |= FPGA_MGR_PARTIAL_RECONFIG;
-
-Point to your FPGA image, such as:
-
-	info->sgt = &sgt;
-
-Add info to region and do the programming:
-
-	region->info = info;
-	ret = fpga_region_program_fpga(region);
-
-Then enumerate whatever hardware has appeared in the FPGA.
-
---
-[1] ../devicetree/bindings/fpga/fpga-region.txt
-[2] ./fpga-mgr.txt
-[3] ../../drivers/fpga/of-fpga-region.c
diff --git a/Documentation/fpga/overview.txt b/Documentation/fpga/overview.txt
deleted file mode 100644
index 0f1236e..0000000
--- a/Documentation/fpga/overview.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Linux kernel FPGA support
-
-Alan Tull 2017
-
-The main point of this project has been to separate the out the upper layers
-that know when to reprogram a FPGA from the lower layers that know how to
-reprogram a specific FPGA device.  The intention is to make this manufacturer
-agnostic, understanding that of course the FPGA images are very device specific
-themselves.
-
-The framework in the kernel includes:
-* low level FPGA manager drivers that know how to program a specific device
-* the fpga-mgr framework they are registered with
-* low level FPGA bridge drivers for hard/soft bridges which are intended to
-  be disable during FPGA programming
-* the fpga-bridge framework they are registered with
-* the fpga-region framework which associates and controls managers and bridges
-  as reconfigurable regions
-* the of-fpga-region support for reprogramming FPGAs when device tree overlays
-  are applied.
-
-I would encourage you the user to add code that creates FPGA regions rather
-that trying to control managers and bridges separately.
diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst
index e8c8441..f982558 100644
--- a/Documentation/gpu/drivers.rst
+++ b/Documentation/gpu/drivers.rst
@@ -10,8 +10,10 @@
    tegra
    tinydrm
    tve200
+   v3d
    vc4
    bridge/dw-hdmi
+   xen-front
 
 .. only::  subproject and html
 
diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index 41dc881..055df45 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -58,6 +58,12 @@
 .. kernel-doc:: drivers/gpu/drm/i915/intel_gvt.c
    :internal:
 
+Workarounds
+-----------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_workarounds.c
+   :doc: Hardware workarounds
+
 Display Hardware Handling
 =========================
 
@@ -249,6 +255,103 @@
 This sections covers all things related to the GEM implementation in the
 i915 driver.
 
+Intel GPU Basics
+----------------
+
+An Intel GPU has multiple engines. There are several engine types.
+
+- RCS engine is for rendering 3D and performing compute, this is named
+  `I915_EXEC_RENDER` in user space.
+- BCS is a blitting (copy) engine, this is named `I915_EXEC_BLT` in user
+  space.
+- VCS is a video encode and decode engine, this is named `I915_EXEC_BSD`
+  in user space
+- VECS is video enhancement engine, this is named `I915_EXEC_VEBOX` in user
+  space.
+- The enumeration `I915_EXEC_DEFAULT` does not refer to specific engine;
+  instead it is to be used by user space to specify a default rendering
+  engine (for 3D) that may or may not be the same as RCS.
+
+The Intel GPU family is a family of integrated GPU's using Unified
+Memory Access. For having the GPU "do work", user space will feed the
+GPU batch buffers via one of the ioctls `DRM_IOCTL_I915_GEM_EXECBUFFER2`
+or `DRM_IOCTL_I915_GEM_EXECBUFFER2_WR`. Most such batchbuffers will
+instruct the GPU to perform work (for example rendering) and that work
+needs memory from which to read and memory to which to write. All memory
+is encapsulated within GEM buffer objects (usually created with the ioctl
+`DRM_IOCTL_I915_GEM_CREATE`). An ioctl providing a batchbuffer for the GPU
+to create will also list all GEM buffer objects that the batchbuffer reads
+and/or writes. For implementation details of memory management see
+`GEM BO Management Implementation Details`_.
+
+The i915 driver allows user space to create a context via the ioctl
+`DRM_IOCTL_I915_GEM_CONTEXT_CREATE` which is identified by a 32-bit
+integer. Such a context should be viewed by user-space as -loosely-
+analogous to the idea of a CPU process of an operating system. The i915
+driver guarantees that commands issued to a fixed context are to be
+executed so that writes of a previously issued command are seen by
+reads of following commands. Actions issued between different contexts
+(even if from the same file descriptor) are NOT given that guarantee
+and the only way to synchronize across contexts (even from the same
+file descriptor) is through the use of fences. At least as far back as
+Gen4, also have that a context carries with it a GPU HW context;
+the HW context is essentially (most of atleast) the state of a GPU.
+In addition to the ordering guarantees, the kernel will restore GPU
+state via HW context when commands are issued to a context, this saves
+user space the need to restore (most of atleast) the GPU state at the
+start of each batchbuffer. The non-deprecated ioctls to submit batchbuffer
+work can pass that ID (in the lower bits of drm_i915_gem_execbuffer2::rsvd1)
+to identify what context to use with the command.
+
+The GPU has its own memory management and address space. The kernel
+driver maintains the memory translation table for the GPU. For older
+GPUs (i.e. those before Gen8), there is a single global such translation
+table, a global Graphics Translation Table (GTT). For newer generation
+GPUs each context has its own translation table, called Per-Process
+Graphics Translation Table (PPGTT). Of important note, is that although
+PPGTT is named per-process it is actually per context. When user space
+submits a batchbuffer, the kernel walks the list of GEM buffer objects
+used by the batchbuffer and guarantees that not only is the memory of
+each such GEM buffer object resident but it is also present in the
+(PP)GTT. If the GEM buffer object is not yet placed in the (PP)GTT,
+then it is given an address. Two consequences of this are: the kernel
+needs to edit the batchbuffer submitted to write the correct value of
+the GPU address when a GEM BO is assigned a GPU address and the kernel
+might evict a different GEM BO from the (PP)GTT to make address room
+for another GEM BO. Consequently, the ioctls submitting a batchbuffer
+for execution also include a list of all locations within buffers that
+refer to GPU-addresses so that the kernel can edit the buffer correctly.
+This process is dubbed relocation.
+
+GEM BO Management Implementation Details
+----------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_vma.h
+   :doc: Virtual Memory Address
+
+Buffer Object Eviction
+----------------------
+
+This section documents the interface functions for evicting buffer
+objects to make space available in the virtual gpu address spaces. Note
+that this is mostly orthogonal to shrinking buffer objects caches, which
+has the goal to make main memory (shared with the gpu through the
+unified memory architecture) available.
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_evict.c
+   :internal:
+
+Buffer Object Memory Shrinking
+------------------------------
+
+This section documents the interface function for shrinking memory usage
+of buffer object caches. Shrinking is used to make main memory
+available. Note that this is mostly orthogonal to evicting buffer
+objects, which has the goal to make space in gpu virtual address spaces.
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_shrinker.c
+   :internal:
+
 Batchbuffer Parsing
 -------------------
 
@@ -267,6 +370,12 @@
 .. kernel-doc:: drivers/gpu/drm/i915/i915_gem_batch_pool.c
    :internal:
 
+User Batchbuffer Execution
+--------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_execbuffer.c
+   :doc: User command execution
+
 Logical Rings, Logical Ring Contexts and Execlists
 --------------------------------------------------
 
@@ -312,28 +421,14 @@
 .. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c
    :doc: buffer object tiling
 
-Buffer Object Eviction
-----------------------
+WOPCM
+=====
 
-This section documents the interface functions for evicting buffer
-objects to make space available in the virtual gpu address spaces. Note
-that this is mostly orthogonal to shrinking buffer objects caches, which
-has the goal to make main memory (shared with the gpu through the
-unified memory architecture) available.
+WOPCM Layout
+------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_evict.c
-   :internal:
-
-Buffer Object Memory Shrinking
-------------------------------
-
-This section documents the interface function for shrinking memory usage
-of buffer object caches. Shrinking is used to make main memory
-available. Note that this is mostly orthogonal to evicting buffer
-objects, which has the goal to make space in gpu virtual address spaces.
-
-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_shrinker.c
-   :internal:
+.. kernel-doc:: drivers/gpu/drm/i915/intel_wopcm.c
+   :doc: WOPCM Layout
 
 GuC
 ===
@@ -359,6 +454,12 @@
 .. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fwif.h
    :doc: GuC Firmware Layout
 
+GuC Address Space
+-----------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_guc.c
+   :doc: GuC Address Space
+
 Tracing
 =======
 
diff --git a/Documentation/gpu/kms-properties.csv b/Documentation/gpu/kms-properties.csv
index 6b28b01..07ed22e 100644
--- a/Documentation/gpu/kms-properties.csv
+++ b/Documentation/gpu/kms-properties.csv
@@ -98,5 +98,4 @@
 ,,"""underscan vborder""",RANGE,"Min=0, Max=128",Connector,TBD
 ,Audio,“audio”,ENUM,"{ ""off"", ""on"", ""auto"" }",Connector,TBD
 ,FMT Dithering,“dither”,ENUM,"{ ""off"", ""on"" }",Connector,TBD
-rcar-du,Generic,"""alpha""",RANGE,"Min=0, Max=255",Plane,TBD
 ,,"""colorkey""",RANGE,"Min=0, Max=0x01ffffff",Plane,TBD
diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst
index f4d0b34..a7c150d 100644
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@@ -212,6 +212,24 @@
 
 Contact: Maintainer of the driver you plan to convert
 
+Clean up mmap forwarding
+------------------------
+
+A lot of drivers forward gem mmap calls to dma-buf mmap for imported buffers.
+And also a lot of them forward dma-buf mmap to the gem mmap implementations.
+Would be great to refactor this all into a set of small common helpers.
+
+Contact: Daniel Vetter
+
+Put a reservation_object into drm_gem_object
+--------------------------------------------
+
+This would remove the need for the ->gem_prime_res_obj callback. It would also
+allow us to implement generic helpers for waiting for a bo, allowing for quite a
+bit of refactoring in the various wait ioctl implementations.
+
+Contact: Daniel Vetter
+
 idr_init_base()
 ---------------
 
diff --git a/Documentation/gpu/xen-front.rst b/Documentation/gpu/xen-front.rst
new file mode 100644
index 0000000..d988da7
--- /dev/null
+++ b/Documentation/gpu/xen-front.rst
@@ -0,0 +1,31 @@
+====================================================
+ drm/xen-front Xen para-virtualized frontend driver
+====================================================
+
+This frontend driver implements Xen para-virtualized display
+according to the display protocol described at
+include/xen/interface/io/displif.h
+
+Driver modes of operation in terms of display buffers used
+==========================================================
+
+.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h
+   :doc: Driver modes of operation in terms of display buffers used
+
+Buffers allocated by the frontend driver
+----------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h
+   :doc: Buffers allocated by the frontend driver
+
+Buffers allocated by the backend
+--------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h
+   :doc: Buffers allocated by the backend
+
+Driver limitations
+==================
+
+.. kernel-doc:: drivers/gpu/drm/xen/xen_drm_front.h
+   :doc: Driver limitations
diff --git a/Documentation/hwmon/hwmon-kernel-api.txt b/Documentation/hwmon/hwmon-kernel-api.txt
index 53a8066..eb7a78a 100644
--- a/Documentation/hwmon/hwmon-kernel-api.txt
+++ b/Documentation/hwmon/hwmon-kernel-api.txt
@@ -71,7 +71,8 @@
 to register a hardware monitoring device. It creates the standard sysfs
 attributes in the hardware monitoring core, letting the driver focus on reading
 from and writing to the chip instead of having to bother with sysfs attributes.
-Its parameters are described in more detail below.
+The parent device parameter cannot be NULL with non-NULL chip info. Its
+parameters are described in more detail below.
 
 devm_hwmon_device_register_with_info is similar to
 hwmon_device_register_with_info. However, it is device managed, meaning the
diff --git a/Documentation/hwmon/ltc2990 b/Documentation/hwmon/ltc2990
index c25211e..3ed68f6 100644
--- a/Documentation/hwmon/ltc2990
+++ b/Documentation/hwmon/ltc2990
@@ -8,6 +8,7 @@
     Datasheet: http://www.linear.com/product/ltc2990
 
 Author: Mike Looijmans <mike.looijmans@topic.nl>
+        Tom Levens <tom.levens@cern.ch>
 
 
 Description
@@ -16,10 +17,8 @@
 LTC2990 is a Quad I2C Voltage, Current and Temperature Monitor.
 The chip's inputs can measure 4 voltages, or two inputs together (1+2 and 3+4)
 can be combined to measure a differential voltage, which is typically used to
-measure current through a series resistor, or a temperature.
-
-This driver currently uses the 2x differential mode only. In order to support
-other modes, the driver will need to be expanded.
+measure current through a series resistor, or a temperature with an external
+diode.
 
 
 Usage Notes
@@ -32,12 +31,19 @@
 Sysfs attributes
 ----------------
 
+in0_input     Voltage at Vcc pin in millivolt (range 2.5V to 5V)
+temp1_input   Internal chip temperature in millidegrees Celcius
+
+A subset of the following attributes are visible, depending on the measurement
+mode of the chip.
+
+in[1-4]_input Voltage at V[1-4] pin in millivolt
+temp2_input   External temperature sensor TR1 in millidegrees Celcius
+temp3_input   External temperature sensor TR2 in millidegrees Celcius
+curr1_input   Current in mA across V1-V2 assuming a 1mOhm sense resistor
+curr2_input   Current in mA across V3-V4 assuming a 1mOhm sense resistor
+
 The "curr*_input" measurements actually report the voltage drop across the
 input pins in microvolts. This is equivalent to the current through a 1mOhm
 sense resistor. Divide the reported value by the actual sense resistor value
 in mOhm to get the actual value.
-
-in0_input     Voltage at Vcc pin in millivolt (range 2.5V to 5V)
-temp1_input   Internal chip temperature in millidegrees Celcius
-curr1_input   Current in mA across v1-v2 assuming a 1mOhm sense resistor.
-curr2_input   Current in mA across v3-v4 assuming a 1mOhm sense resistor.
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 3b99ab9..fdc5857 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -45,7 +45,7 @@
 .. toctree::
    :maxdepth: 2
 
-   userspace-api/index	      
+   userspace-api/index
 
 
 Introduction to kernel development
@@ -89,6 +89,7 @@
    sound/index
    crypto/index
    filesystems/index
+   vm/index
 
 Architecture-specific documentation
 -----------------------------------
diff --git a/Documentation/ioctl/botching-up-ioctls.txt b/Documentation/ioctl/botching-up-ioctls.txt
index d02cfb4..883fb03 100644
--- a/Documentation/ioctl/botching-up-ioctls.txt
+++ b/Documentation/ioctl/botching-up-ioctls.txt
@@ -73,7 +73,9 @@
    future extensions is going right down the gutters since someone will submit
    an ioctl struct with random stack garbage in the yet unused parts. Which
    then bakes in the ABI that those fields can never be used for anything else
-   but garbage.
+   but garbage. This is also the reason why you must explicitly pad all
+   structures, even if you never use them in an array - the padding the compiler
+   might insert could contain garbage.
 
  * Have simple testcases for all of the above.
 
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 7f7413e..a12488d 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -296,7 +296,8 @@
 0x90	00	drivers/cdrom/sbpcd.h
 0x92	00-0F	drivers/usb/mon/mon_bin.c
 0x93	60-7F	linux/auto_fs.h
-0x94	all	fs/btrfs/ioctl.h
+0x94	all	fs/btrfs/ioctl.h	Btrfs filesystem
+		and linux/fs.h		some lifted to vfs/generic
 0x97	00-7F	fs/ceph/ioctl.h		Ceph file system
 0x99	00-0F				537-Addinboard driver
 					<mailto:buk@buks.ipn.de>
@@ -327,6 +328,7 @@
 0xCA	80-BF	uapi/scsi/cxlflash_ioctl.h
 0xCB	00-1F	CBM serial IEC bus	in development:
 					<mailto:michael.klein@puffin.lb.shuttle.de>
+0xCC	00-0F	drivers/misc/ibmvmc.h    pseries VMC driver
 0xCD	01	linux/reiserfs_fs.h
 0xCF	02	fs/cifs/ioctl.c
 0xDB	00-0F	drivers/char/mwave/mwavepub.h
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index f5b9493..0e966e8 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -198,14 +198,6 @@
     enables the third modular state for all config symbols.
     At most one symbol may have the "modules" option set.
 
-  - "env"=<value>
-    This imports the environment variable into Kconfig. It behaves like
-    a default, except that the value comes from the environment, this
-    also means that the behaviour when mixing it with normal defaults is
-    undefined at this point. The symbol is currently not exported back
-    to the build environment (if this is desired, it can be done via
-    another symbol).
-
   - "allnoconfig_y"
     This declares the symbol as one that should have the value y when
     using "allnoconfig". Used for symbols that hide other symbols.
diff --git a/Documentation/kbuild/kconfig-macro-language.txt b/Documentation/kbuild/kconfig-macro-language.txt
new file mode 100644
index 0000000..07da2ea
--- /dev/null
+++ b/Documentation/kbuild/kconfig-macro-language.txt
@@ -0,0 +1,242 @@
+Concept
+-------
+
+The basic idea was inspired by Make. When we look at Make, we notice sort of
+two languages in one. One language describes dependency graphs consisting of
+targets and prerequisites. The other is a macro language for performing textual
+substitution.
+
+There is clear distinction between the two language stages. For example, you
+can write a makefile like follows:
+
+    APP := foo
+    SRC := foo.c
+    CC := gcc
+
+    $(APP): $(SRC)
+            $(CC) -o $(APP) $(SRC)
+
+The macro language replaces the variable references with their expanded form,
+and handles as if the source file were input like follows:
+
+    foo: foo.c
+            gcc -o foo foo.c
+
+Then, Make analyzes the dependency graph and determines the targets to be
+updated.
+
+The idea is quite similar in Kconfig - it is possible to describe a Kconfig
+file like this:
+
+    CC := gcc
+
+    config CC_HAS_FOO
+            def_bool $(shell, $(srctree)/scripts/gcc-check-foo.sh $(CC))
+
+The macro language in Kconfig processes the source file into the following
+intermediate:
+
+    config CC_HAS_FOO
+            def_bool y
+
+Then, Kconfig moves onto the evaluation stage to resolve inter-symbol
+dependency as explained in kconfig-language.txt.
+
+
+Variables
+---------
+
+Like in Make, a variable in Kconfig works as a macro variable.  A macro
+variable is expanded "in place" to yield a text string that may then be
+expanded further. To get the value of a variable, enclose the variable name in
+$( ). The parentheses are required even for single-letter variable names; $X is
+a syntax error. The curly brace form as in ${CC} is not supported either.
+
+There are two types of variables: simply expanded variables and recursively
+expanded variables.
+
+A simply expanded variable is defined using the := assignment operator. Its
+righthand side is expanded immediately upon reading the line from the Kconfig
+file.
+
+A recursively expanded variable is defined using the = assignment operator.
+Its righthand side is simply stored as the value of the variable without
+expanding it in any way. Instead, the expansion is performed when the variable
+is used.
+
+There is another type of assignment operator; += is used to append text to a
+variable. The righthand side of += is expanded immediately if the lefthand
+side was originally defined as a simple variable. Otherwise, its evaluation is
+deferred.
+
+The variable reference can take parameters, in the following form:
+
+  $(name,arg1,arg2,arg3)
+
+You can consider the parameterized reference as a function. (more precisely,
+"user-defined function" in contrast to "built-in function" listed below).
+
+Useful functions must be expanded when they are used since the same function is
+expanded differently if different parameters are passed. Hence, a user-defined
+function is defined using the = assignment operator. The parameters are
+referenced within the body definition with $(1), $(2), etc.
+
+In fact, recursively expanded variables and user-defined functions are the same
+internally. (In other words, "variable" is "function with zero argument".)
+When we say "variable" in a broad sense, it includes "user-defined function".
+
+
+Built-in functions
+------------------
+
+Like Make, Kconfig provides several built-in functions. Every function takes a
+particular number of arguments.
+
+In Make, every built-in function takes at least one argument. Kconfig allows
+zero argument for built-in functions, such as $(fileno), $(lineno). You could
+consider those as "built-in variable", but it is just a matter of how we call
+it after all. Let's say "built-in function" here to refer to natively supported
+functionality.
+
+Kconfig currently supports the following built-in functions.
+
+ - $(shell,command)
+
+  The "shell" function accepts a single argument that is expanded and passed
+  to a subshell for execution. The standard output of the command is then read
+  and returned as the value of the function. Every newline in the output is
+  replaced with a space. Any trailing newlines are deleted. The standard error
+  is not returned, nor is any program exit status.
+
+ - $(info,text)
+
+  The "info" function takes a single argument and prints it to stdout.
+  It evaluates to an empty string.
+
+ - $(warning-if,condition,text)
+
+  The "warning-if" function takes two arguments. If the condition part is "y",
+  the text part is sent to stderr. The text is prefixed with the name of the
+  current Kconfig file and the current line number.
+
+ - $(error-if,condition,text)
+
+  The "error-if" function is similar to "warning-if", but it terminates the
+  parsing immediately if the condition part is "y".
+
+ - $(filename)
+
+  The 'filename' takes no argument, and $(filename) is expanded to the file
+  name being parsed.
+
+ - $(lineno)
+
+  The 'lineno' takes no argument, and $(lineno) is expanded to the line number
+  being parsed.
+
+
+Make vs Kconfig
+---------------
+
+Kconfig adopts Make-like macro language, but the function call syntax is
+slightly different.
+
+A function call in Make looks like this:
+
+  $(func-name arg1,arg2,arg3)
+
+The function name and the first argument are separated by at least one
+whitespace. Then, leading whitespaces are trimmed from the first argument,
+while whitespaces in the other arguments are kept. You need to use a kind of
+trick to start the first parameter with spaces. For example, if you want
+to make "info" function print "  hello", you can write like follows:
+
+  empty :=
+  space := $(empty) $(empty)
+  $(info $(space)$(space)hello)
+
+Kconfig uses only commas for delimiters, and keeps all whitespaces in the
+function call. Some people prefer putting a space after each comma delimiter:
+
+  $(func-name, arg1, arg2, arg3)
+
+In this case, "func-name" will receive " arg1", " arg2", " arg3". The presence
+of leading spaces may matter depending on the function. The same applies to
+Make - for example, $(subst .c, .o, $(sources)) is a typical mistake; it
+replaces ".c" with " .o".
+
+In Make, a user-defined function is referenced by using a built-in function,
+'call', like this:
+
+    $(call my-func,arg1,arg2,arg3)
+
+Kconfig invokes user-defined functions and built-in functions in the same way.
+The omission of 'call' makes the syntax shorter.
+
+In Make, some functions treat commas verbatim instead of argument separators.
+For example, $(shell echo hello, world) runs the command "echo hello, world".
+Likewise, $(info hello, world) prints "hello, world" to stdout. You could say
+this is _useful_ inconsistency.
+
+In Kconfig, for simpler implementation and grammatical consistency, commas that
+appear in the $( ) context are always delimiters. It means
+
+  $(shell, echo hello, world)
+
+is an error because it is passing two parameters where the 'shell' function
+accepts only one. To pass commas in arguments, you can use the following trick:
+
+  comma := ,
+  $(shell, echo hello$(comma) world)
+
+
+Caveats
+-------
+
+A variable (or function) cannot be expanded across tokens. So, you cannot use
+a variable as a shorthand for an expression that consists of multiple tokens.
+The following works:
+
+    RANGE_MIN := 1
+    RANGE_MAX := 3
+
+    config FOO
+            int "foo"
+            range $(RANGE_MIN) $(RANGE_MAX)
+
+But, the following does not work:
+
+    RANGES := 1 3
+
+    config FOO
+            int "foo"
+            range $(RANGES)
+
+A variable cannot be expanded to any keyword in Kconfig.  The following does
+not work:
+
+    MY_TYPE := tristate
+
+    config FOO
+            $(MY_TYPE) "foo"
+            default y
+
+Obviously from the design, $(shell command) is expanded in the textual
+substitution phase. You cannot pass symbols to the 'shell' function.
+The following does not work as expected.
+
+    config ENDIAN_FLAG
+            string
+            default "-mbig-endian" if CPU_BIG_ENDIAN
+            default "-mlittle-endian" if CPU_LITTLE_ENDIAN
+
+    config CC_HAS_ENDIAN_FLAG
+            def_bool $(shell $(srctree)/scripts/gcc-check-flag ENDIAN_FLAG)
+
+Instead, you can do like follows so that any function call is statically
+expanded.
+
+    config CC_HAS_ENDIAN_FLAG
+            bool
+            default $(shell $(srctree)/scripts/gcc-check-flag -mbig-endian) if CPU_BIG_ENDIAN
+            default $(shell $(srctree)/scripts/gcc-check-flag -mlittle-endian) if CPU_LITTLE_ENDIAN
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 6dafc80..a02d6bb 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1920,9 +1920,6 @@
 		/* assign ownership */
 		desc->status = DEVICE_OWN;
 
-		/* force memory to sync before notifying device via MMIO */
-		wmb();
-
 		/* notify device of new descriptors */
 		writel(DESC_NOTIFY, doorbell);
 	}
@@ -1930,11 +1927,15 @@
      The dma_rmb() allows us guarantee the device has released ownership
      before we read the data from the descriptor, and the dma_wmb() allows
      us to guarantee the data is written to the descriptor before the device
-     can see it now has ownership.  The wmb() is needed to guarantee that the
-     cache coherent memory writes have completed before attempting a write to
-     the cache incoherent MMIO region.
+     can see it now has ownership.  Note that, when using writel(), a prior
+     wmb() is not needed to guarantee that the cache coherent memory writes
+     have completed before writing to the MMIO region.  The cheaper
+     writel_relaxed() does not provide this guarantee and must not be used
+     here.
 
-     See Documentation/DMA-API.txt for more information on consistent memory.
+     See the subsection "Kernel I/O barrier effects" for more information on
+     relaxed I/O accessors and the Documentation/DMA-API.txt file for more
+     information on consistent memory.
 
 
 MMIO WRITE BARRIER
@@ -2903,7 +2904,7 @@
 appropriate part of the kernel must invalidate the overlapping bits of the
 cache on each CPU.
 
-See Documentation/cachetlb.txt for more information on cache management.
+See Documentation/core-api/cachetlb.rst for more information on cache management.
 
 
 CACHE COHERENCY VS MMIO
@@ -3083,7 +3084,7 @@
 Memory barriers can be used to implement circular buffering without the need
 of a lock to serialise the producer with the consumer.  See:
 
-	Documentation/circular-buffers.txt
+	Documentation/core-api/circular-buffers.rst
 
 for details.
 
diff --git a/Documentation/misc-devices/ibmvmc.rst b/Documentation/misc-devices/ibmvmc.rst
new file mode 100644
index 0000000..46ded79
--- /dev/null
+++ b/Documentation/misc-devices/ibmvmc.rst
@@ -0,0 +1,226 @@
+.. SPDX-License-Identifier: GPL-2.0+
+======================================================
+IBM Virtual Management Channel Kernel Driver (IBMVMC)
+======================================================
+
+:Authors:
+	Dave Engebretsen <engebret@us.ibm.com>,
+	Adam Reznechek <adreznec@linux.vnet.ibm.com>,
+	Steven Royer <seroyer@linux.vnet.ibm.com>,
+	Bryant G. Ly <bryantly@linux.vnet.ibm.com>,
+
+Introduction
+============
+
+Note: Knowledge of virtualization technology is required to understand
+this document.
+
+A good reference document would be:
+
+https://openpowerfoundation.org/wp-content/uploads/2016/05/LoPAPR_DRAFT_v11_24March2016_cmt1.pdf
+
+The Virtual Management Channel (VMC) is a logical device which provides an
+interface between the hypervisor and a management partition. This interface
+is like a message passing interface. This management partition is intended
+to provide an alternative to systems that use a Hardware Management
+Console (HMC) - based system management.
+
+The primary hardware management solution that is developed by IBM relies
+on an appliance server named the Hardware Management Console (HMC),
+packaged as an external tower or rack-mounted personal computer. In a
+Power Systems environment, a single HMC can manage multiple POWER
+processor-based systems.
+
+Management Application
+----------------------
+
+In the management partition, a management application exists which enables
+a system administrator to configure the system’s partitioning
+characteristics via a command line interface (CLI) or Representational
+State Transfer Application (REST API's).
+
+The management application runs on a Linux logical partition on a
+POWER8 or newer processor-based server that is virtualized by PowerVM.
+System configuration, maintenance, and control functions which
+traditionally require an HMC can be implemented in the management
+application using a combination of HMC to hypervisor interfaces and
+existing operating system methods. This tool provides a subset of the
+functions implemented by the HMC and enables basic partition configuration.
+The set of HMC to hypervisor messages supported by the management
+application component are passed to the hypervisor over a VMC interface,
+which is defined below.
+
+The VMC enables the management partition to provide basic partitioning
+functions:
+
+- Logical Partitioning Configuration
+- Start, and stop actions for individual partitions
+- Display of partition status
+- Management of virtual Ethernet
+- Management of virtual Storage
+- Basic system management
+
+Virtual Management Channel (VMC)
+--------------------------------
+
+A logical device, called the Virtual Management Channel (VMC), is defined
+for communicating between the management application and the hypervisor. It
+basically creates the pipes that enable virtualization management
+software. This device is presented to a designated management partition as
+a virtual device.
+
+This communication device uses Command/Response Queue (CRQ) and the
+Remote Direct Memory Access (RDMA) interfaces. A three-way handshake is
+defined that must take place to establish that both the hypervisor and
+management partition sides of the channel are running prior to
+sending/receiving any of the protocol messages.
+
+This driver also utilizes Transport Event CRQs. CRQ messages are sent
+when the hypervisor detects one of the peer partitions has abnormally
+terminated, or one side has called H_FREE_CRQ to close their CRQ.
+Two new classes of CRQ messages are introduced for the VMC device. VMC
+Administrative messages are used for each partition using the VMC to
+communicate capabilities to their partner. HMC Interface messages are used
+for the actual flow of HMC messages between the management partition and
+the hypervisor. As most HMC messages far exceed the size of a CRQ buffer,
+a virtual DMA (RMDA) of the HMC message data is done prior to each HMC
+Interface CRQ message. Only the management partition drives RDMA
+operations; hypervisors never directly cause the movement of message data.
+
+
+Terminology
+-----------
+RDMA
+        Remote Direct Memory Access is DMA transfer from the server to its
+        client or from the server to its partner partition. DMA refers
+        to both physical I/O to and from memory operations and to memory
+        to memory move operations.
+CRQ
+        Command/Response Queue a facility which is used to communicate
+        between partner partitions. Transport events which are signaled
+        from the hypervisor to partition are also reported in this queue.
+
+Example Management Partition VMC Driver Interface
+=================================================
+
+This section provides an example for the management application
+implementation where a device driver is used to interface to the VMC
+device. This driver consists of a new device, for example /dev/ibmvmc,
+which provides interfaces to open, close, read, write, and perform
+ioctl’s against the VMC device.
+
+VMC Interface Initialization
+----------------------------
+
+The device driver is responsible for initializing the VMC when the driver
+is loaded. It first creates and initializes the CRQ. Next, an exchange of
+VMC capabilities is performed to indicate the code version and number of
+resources available in both the management partition and the hypervisor.
+Finally, the hypervisor requests that the management partition create an
+initial pool of VMC buffers, one buffer for each possible HMC connection,
+which will be used for management application  session initialization.
+Prior to completion of this initialization sequence, the device returns
+EBUSY to open() calls. EIO is returned for all open() failures.
+
+::
+
+        Management Partition		Hypervisor
+                        CRQ INIT
+        ---------------------------------------->
+        	   CRQ INIT COMPLETE
+        <----------------------------------------
+        	      CAPABILITIES
+        ---------------------------------------->
+        	 CAPABILITIES RESPONSE
+        <----------------------------------------
+              ADD BUFFER (HMC IDX=0,1,..)         _
+        <----------------------------------------  |
+        	  ADD BUFFER RESPONSE              | - Perform # HMCs Iterations
+        ----------------------------------------> -
+
+VMC Interface Open
+------------------
+
+After the basic VMC channel has been initialized, an HMC session level
+connection can be established. The application layer performs an open() to
+the VMC device and executes an ioctl() against it, indicating the HMC ID
+(32 bytes of data) for this session. If the VMC device is in an invalid
+state, EIO will be returned for the ioctl(). The device driver creates a
+new HMC session value (ranging from 1 to 255) and HMC index value (starting
+at index 0 and ranging to 254) for this HMC ID. The driver then does an
+RDMA of the HMC ID to the hypervisor, and then sends an Interface Open
+message to the hypervisor to establish the session over the VMC. After the
+hypervisor receives this information, it sends Add Buffer messages to the
+management partition to seed an initial pool of buffers for the new HMC
+connection. Finally, the hypervisor sends an Interface Open Response
+message, to indicate that it is ready for normal runtime messaging. The
+following illustrates this VMC flow:
+
+::
+
+        Management Partition             Hypervisor
+        	      RDMA HMC ID
+        ---------------------------------------->
+        	    Interface Open
+        ---------------------------------------->
+        	      Add Buffer                  _
+        <----------------------------------------  |
+        	  Add Buffer Response              | - Perform N Iterations
+        ----------------------------------------> -
+        	Interface Open Response
+        <----------------------------------------
+
+VMC Interface Runtime
+---------------------
+
+During normal runtime, the management application and the hypervisor
+exchange HMC messages via the Signal VMC message and RDMA operations. When
+sending data to the hypervisor, the management application performs a
+write() to the VMC device, and the driver RDMA’s the data to the hypervisor
+and then sends a Signal Message. If a write() is attempted before VMC
+device buffers have been made available by the hypervisor, or no buffers
+are currently available, EBUSY is returned in response to the write(). A
+write() will return EIO for all other errors, such as an invalid device
+state. When the hypervisor sends a message to the management, the data is
+put into a VMC buffer and an Signal Message is sent to the VMC driver in
+the management partition. The driver RDMA’s the buffer into the partition
+and passes the data up to the appropriate management application via a
+read() to the VMC device. The read() request blocks if there is no buffer
+available to read. The management application may use select() to wait for
+the VMC device to become ready with data to read.
+
+::
+
+        Management Partition             Hypervisor
+        		MSG RDMA
+        ---------------------------------------->
+        		SIGNAL MSG
+        ---------------------------------------->
+        		SIGNAL MSG
+        <----------------------------------------
+        		MSG RDMA
+        <----------------------------------------
+
+VMC Interface Close
+-------------------
+
+HMC session level connections are closed by the management partition when
+the application layer performs a close() against the device. This action
+results in an Interface Close message flowing to the hypervisor, which
+causes the session to be terminated. The device driver must free any
+storage allocated for buffers for this HMC connection.
+
+::
+
+        Management Partition             Hypervisor
+        	     INTERFACE CLOSE
+        ---------------------------------------->
+                INTERFACE CLOSE RESPONSE
+        <----------------------------------------
+
+Additional Information
+======================
+
+For more information on the documentation for CRQ Messages, VMC Messages,
+HMC interface Buffers, and signal messages please refer to the Linux on
+Power Architecture Platform Reference. Section F.
diff --git a/Documentation/process/2.Process.rst b/Documentation/process/2.Process.rst
index ce5561b..a9c46dd 100644
--- a/Documentation/process/2.Process.rst
+++ b/Documentation/process/2.Process.rst
@@ -18,17 +18,17 @@
 release history looks like this:
 
 	======  =================
-	2.6.38	March 14, 2011
-	2.6.37	January 4, 2011
-	2.6.36	October 20, 2010
-	2.6.35	August 1, 2010
-	2.6.34	May 15, 2010
-	2.6.33	February 24, 2010
+	4.11	April 30, 2017
+	4.12	July 2, 2017
+	4.13	September 3, 2017
+	4.14	November 12, 2017
+	4.15	January 28, 2018
+	4.16	April 1, 2018
 	======  =================
 
-Every 2.6.x release is a major kernel release with new features, internal
-API changes, and more.  A typical 2.6 release can contain nearly 10,000
-changesets with changes to several hundred thousand lines of code.  2.6 is
+Every 4.x release is a major kernel release with new features, internal
+API changes, and more.  A typical 4.x release contain about 13,000
+changesets with changes to several hundred thousand lines of code.  4.x is
 thus the leading edge of Linux kernel development; the kernel uses a
 rolling development model which is continually integrating major changes.
 
@@ -70,20 +70,19 @@
 considered to be sufficiently stable and the final 2.6.x release is made.
 At that point the whole process starts over again.
 
-As an example, here is how the 2.6.38 development cycle went (all dates in
-2011):
+As an example, here is how the 4.16 development cycle went (all dates in
+2018):
 
 	==============  ===============================
-	January 4	2.6.37 stable release
-	January 18	2.6.38-rc1, merge window closes
-	January 21	2.6.38-rc2
-	February 1	2.6.38-rc3
-	February 7	2.6.38-rc4
-	February 15	2.6.38-rc5
-	February 21	2.6.38-rc6
-	March 1		2.6.38-rc7
-	March 7		2.6.38-rc8
-	March 14	2.6.38 stable release
+	January 28	4.15 stable release
+	February 11	4.16-rc1, merge window closes
+	February 18	4.16-rc2
+	February 25	4.16-rc3
+	March 4		4.16-rc4
+	March 11	4.16-rc5
+	March 18	4.16-rc6
+	March 25	4.16-rc7
+	April 1		4.17 stable release
 	==============  ===============================
 
 How do the developers decide when to close the development cycle and create
@@ -99,37 +98,42 @@
 achieve; there are just too many variables in a project of this size.
 There comes a point where delaying the final release just makes the problem
 worse; the pile of changes waiting for the next merge window will grow
-larger, creating even more regressions the next time around.  So most 2.6.x
+larger, creating even more regressions the next time around.  So most 4.x
 kernels go out with a handful of known regressions though, hopefully, none
 of them are serious.
 
 Once a stable release is made, its ongoing maintenance is passed off to the
 "stable team," currently consisting of Greg Kroah-Hartman.  The stable team
-will release occasional updates to the stable release using the 2.6.x.y
+will release occasional updates to the stable release using the 4.x.y
 numbering scheme.  To be considered for an update release, a patch must (1)
 fix a significant bug, and (2) already be merged into the mainline for the
 next development kernel.  Kernels will typically receive stable updates for
 a little more than one development cycle past their initial release.  So,
-for example, the 2.6.36 kernel's history looked like:
+for example, the 4.13 kernel's history looked like:
 
 	==============  ===============================
-	October 10	2.6.36 stable release
-	November 22	2.6.36.1
-	December 9	2.6.36.2
-	January 7	2.6.36.3
-	February 17	2.6.36.4
+	September 3 	4.13 stable release
+	September 13	4.13.1
+	September 20	4.13.2
+	September 27	4.13.3
+	October 5	4.13.4
+	October 12  	4.13.5
+	...		...
+	November 24	4.13.16
 	==============  ===============================
 
-2.6.36.4 was the final stable update for the 2.6.36 release.
+4.13.16 was the final stable update of the 4.13 release.
 
 Some kernels are designated "long term" kernels; they will receive support
 for a longer period.  As of this writing, the current long term kernels
 and their maintainers are:
 
-	======  ======================  ===========================
-	2.6.27	Willy Tarreau		(Deep-frozen stable kernel)
-	2.6.32	Greg Kroah-Hartman
-	2.6.35	Andi Kleen		(Embedded flag kernel)
+	======  ======================  ==============================
+	3.16	Ben Hutchings		(very long-term stable kernel)
+	4.1	Sasha Levin
+	4.4	Greg Kroah-Hartman	(very long-term stable kernel)
+	4.9	Greg Kroah-Hartman
+	4.14	Greg Kroah-Hartman
 	======  ======================  ===========================
 
 The selection of a kernel for long-term support is purely a matter of a
diff --git a/Documentation/process/5.Posting.rst b/Documentation/process/5.Posting.rst
index c209d70..c418c5d 100644
--- a/Documentation/process/5.Posting.rst
+++ b/Documentation/process/5.Posting.rst
@@ -10,8 +10,8 @@
 following them will make life much easier for everybody involved.  This
 document will attempt to cover these expectations in reasonable detail;
 more information can also be found in the files process/submitting-patches.rst,
-process/submitting-drivers.rst, and process/submit-checklist.rst in the kernel documentation
-directory.
+process/submitting-drivers.rst, and process/submit-checklist.rst in the kernel
+documentation directory.
 
 
 When to post
@@ -198,8 +198,8 @@
 
 The tags mentioned above are used to describe how various developers have
 been associated with the development of this patch.  They are described in
-detail in the process/submitting-patches.rst document; what follows here is a brief
-summary.  Each of these lines has the format:
+detail in the process/submitting-patches.rst document; what follows here is a
+brief summary.  Each of these lines has the format:
 
 ::
 
@@ -210,8 +210,8 @@
  - Signed-off-by: this is a developer's certification that he or she has
    the right to submit the patch for inclusion into the kernel.  It is an
    agreement to the Developer's Certificate of Origin, the full text of
-   which can be found in Documentation/process/submitting-patches.rst.  Code without a
-   proper signoff cannot be merged into the mainline.
+   which can be found in Documentation/process/submitting-patches.rst.  Code
+   without a proper signoff cannot be merged into the mainline.
 
  - Co-developed-by: states that the patch was also created by another developer
    along with the original author.  This is useful at times when multiple
@@ -226,8 +226,8 @@
    it to work.
 
  - Reviewed-by: the named developer has reviewed the patch for correctness;
-   see the reviewer's statement in Documentation/process/submitting-patches.rst for more
-   detail.
+   see the reviewer's statement in Documentation/process/submitting-patches.rst
+   for more detail.
 
  - Reported-by: names a user who reported a problem which is fixed by this
    patch; this tag is used to give credit to the (often underappreciated)
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 1c9fe65..37bd062 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -52,6 +52,7 @@
    adding-syscalls
    magic-number
    volatile-considered-harmful
+   clang-format
 
 .. only::  subproject and html
 
diff --git a/Documentation/process/maintainer-pgp-guide.rst b/Documentation/process/maintainer-pgp-guide.rst
index b453561..aff9b1a 100644
--- a/Documentation/process/maintainer-pgp-guide.rst
+++ b/Documentation/process/maintainer-pgp-guide.rst
@@ -219,7 +219,7 @@
 if you only have a combined **[SC]** key, then you should create a separate
 signing subkey::
 
-    $ gpg --quick-add-key [fpr] ed25519 sign
+    $ gpg --quick-addkey [fpr] ed25519 sign
 
 Remember to tell the keyservers about this change, so others can pull down
 your new subkey::
@@ -450,11 +450,18 @@
 others. If you want to use ECC keys, your best bet among commercially
 available devices is the Nitrokey Start.
 
+.. note::
+
+    If you are listed in MAINTAINERS or have an account at kernel.org,
+    you `qualify for a free Nitrokey Start`_ courtesy of The Linux
+    Foundation.
+
 .. _`Nitrokey Start`: https://shop.nitrokey.com/shop/product/nitrokey-start-6
 .. _`Nitrokey Pro`: https://shop.nitrokey.com/shop/product/nitrokey-pro-3
 .. _`Yubikey 4`: https://www.yubico.com/product/yubikey-4-series/
 .. _Gnuk: http://www.fsij.org/doc-gnuk/
 .. _`LWN has a good review`: https://lwn.net/Articles/736231/
+.. _`qualify for a free Nitrokey Start`: https://www.kernel.org/nitrokey-digital-tokens-for-kernel-developers.html
 
 Configure your smartcard device
 -------------------------------
@@ -482,7 +489,7 @@
 You should set the user PIN (1), Admin PIN (3), and the Reset Code (4).
 Please make sure to record and store these in a safe place -- especially
 the Admin PIN and the Reset Code (which allows you to completely wipe
-the smartcard).  You so rarely need to use the Admin PIN, that you will
+the smartcard). You so rarely need to use the Admin PIN, that you will
 inevitably forget what it is if you do not record it.
 
 Getting back to the main card menu, you can also set other values (such
@@ -494,6 +501,12 @@
     Despite having the name "PIN", neither the user PIN nor the admin
     PIN on the card need to be numbers.
 
+.. warning::
+
+    Some devices may require that you move the subkeys onto the device
+    before you can change the passphrase. Please check the documentation
+    provided by the device manufacturer.
+
 Move the subkeys to your smartcard
 ----------------------------------
 
@@ -655,6 +668,20 @@
     $ gpg --export | gpg --homedir ~/.gnupg --import
     $ unset GNUPGHOME
 
+Using gpg-agent over ssh
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can forward your gpg-agent over ssh if you need to sign tags or
+commits on a remote system. Please refer to the instructions provided
+on the GnuPG wiki:
+
+- `Agent Forwarding over SSH`_
+
+It works more smoothly if you can modify the sshd server settings on the
+remote end.
+
+.. _`Agent Forwarding over SSH`: https://wiki.gnupg.org/AgentForwarding
+
 
 Using PGP with Git
 ==================
@@ -692,6 +719,7 @@
 tell git to always use it instead of the legacy ``gpg`` from version 1::
 
     $ git config --global gpg.program gpg2
+    $ git config --global gpgv.program gpgv2
 
 How to work with signed tags
 ----------------------------
@@ -731,6 +759,13 @@
 import their PGP key. Please refer to the
 ":ref:`verify_identities`" section below.
 
+.. note::
+
+    If you get "``gpg: Can't check signature: unknown pubkey
+    algorithm``" error, you need to tell git to use gpgv2 for
+    verification, so it properly processes signatures made by ECC keys.
+    See instructions at the start of this section.
+
 Configure git to always sign annotated tags
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index f7152ed..908bb55 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -761,7 +761,7 @@
 the pull request as the cover letter for a normal posting of the patch
 series, giving the maintainer the option of using either.
 
-A pull request should have [GIT] or [PULL] in the subject line.  The
+A pull request should have [GIT PULL] in the subject line.  The
 request itself should include the repository name and the branch of
 interest on a single line; it should look something like::
 
diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.txt
index 8ce78f8..b14e03f 100644
--- a/Documentation/scheduler/sched-deadline.txt
+++ b/Documentation/scheduler/sched-deadline.txt
@@ -49,7 +49,7 @@
 2.1 Main algorithm
 ------------------
 
- SCHED_DEADLINE uses three parameters, named "runtime", "period", and
+ SCHED_DEADLINE [18] uses three parameters, named "runtime", "period", and
  "deadline", to schedule tasks. A SCHED_DEADLINE task should receive
  "runtime" microseconds of execution time every "period" microseconds, and
  these "runtime" microseconds are available within "deadline" microseconds
@@ -117,6 +117,10 @@
          scheduling deadline = scheduling deadline + period
          remaining runtime = remaining runtime + runtime
 
+ The SCHED_FLAG_DL_OVERRUN flag in sched_attr's sched_flags field allows a task
+ to get informed about runtime overruns through the delivery of SIGXCPU
+ signals.
+
 
 2.2 Bandwidth reclaiming
 ------------------------
@@ -279,6 +283,19 @@
     running_bw is incremented.
 
 
+2.3 Energy-aware scheduling
+------------------------
+
+ When cpufreq's schedutil governor is selected, SCHED_DEADLINE implements the
+ GRUB-PA [19] algorithm, reducing the CPU operating frequency to the minimum
+ value that still allows to meet the deadlines. This behavior is currently
+ implemented only for ARM architectures.
+
+ A particular care must be taken in case the time needed for changing frequency
+ is of the same order of magnitude of the reservation period. In such cases,
+ setting a fixed CPU frequency results in a lower amount of deadline misses.
+
+
 3. Scheduling Real-Time Tasks
 =============================
 
@@ -505,6 +522,12 @@
   17 - L. Abeni, G. Lipari, A. Parri, Y. Sun, Multicore CPU reclaiming: parallel
        or sequential?. In Proceedings of the 31st Annual ACM Symposium on Applied
        Computing, 2016.
+  18 - J. Lelli, C. Scordino, L. Abeni, D. Faggioli, Deadline scheduling in the
+       Linux kernel, Software: Practice and Experience, 46(6): 821-839, June
+       2016.
+  19 - C. Scordino, L. Abeni, J. Lelli, Energy-Aware Real-Time Scheduling in
+       the Linux Kernel, 33rd ACM/SIGAPP Symposium On Applied Computing (SAC
+       2018), Pau, France, April 2018.
 
 
 4. Bandwidth management
diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt
index 11e447b..1b74369 100644
--- a/Documentation/scsi/scsi_eh.txt
+++ b/Documentation/scsi/scsi_eh.txt
@@ -82,24 +82,13 @@
  1. invokes optional hostt->eh_timed_out() callback.  Return value can
     be one of
 
-    - BLK_EH_HANDLED
-	This indicates that eh_timed_out() dealt with the timeout.
-	The command is passed back to the block layer and completed
-	via __blk_complete_requests().
-
-	*NOTE* After returning BLK_EH_HANDLED the SCSI layer is
-	assumed to be finished with the command, and no other
-	functions from the SCSI layer will be called. So this
-	should typically only be returned if the eh_timed_out()
-	handler raced with normal completion.
-
     - BLK_EH_RESET_TIMER
 	This indicates that more time is required to finish the
 	command.  Timer is restarted.  This action is counted as a
 	retry and only allowed scmd->allowed + 1(!) times.  Once the
-	limit is reached, action for BLK_EH_NOT_HANDLED is taken instead.
+	limit is reached, action for BLK_EH_DONE is taken instead.
 
-    - BLK_EH_NOT_HANDLED
+    - BLK_EH_DONE
         eh_timed_out() callback did not handle the command.
 	Step #2 is taken.
 
diff --git a/Documentation/security/index.rst b/Documentation/security/index.rst
index 298a94a..85492bf 100644
--- a/Documentation/security/index.rst
+++ b/Documentation/security/index.rst
@@ -9,5 +9,7 @@
    IMA-templates
    keys/index
    LSM
+   LSM-sctp
+   SELinux-sctp
    self-protection
    tpm/index
diff --git a/Documentation/sound/alsa-configuration.rst b/Documentation/sound/alsa-configuration.rst
index aed6b4f..4d83c1c 100644
--- a/Documentation/sound/alsa-configuration.rst
+++ b/Documentation/sound/alsa-configuration.rst
@@ -1062,7 +1062,7 @@
 ML (see the section `Links and Addresses`_).
 
 ``power_save`` and ``power_save_controller`` options are for power-saving
-mode.  See powersave.txt for details.
+mode.  See powersave.rst for details.
 
 Note 2: If you get click noises on output, try the module option
 ``position_fix=1`` or ``2``.  ``position_fix=1`` will use the SD_LPIB
@@ -1133,7 +1133,7 @@
 enable_monitor
     Enable Analog Out on Channel 63/64 by default.
 
-See hdspm.txt for details.
+See hdspm.rst for details.
 
 Module snd-ice1712
 ------------------
@@ -2224,6 +2224,13 @@
     Quirk alias list, pass strings like ``0123abcd:5678beef``, which
     applies the existing quirk for the device 5678:beef to a new
     device 0123:abcd.
+use_vmalloc
+    Use vmalloc() for allocations of the PCM buffers (default: yes).
+    For architectures with non-coherent memory like ARM or MIPS, the
+    mmap access may give inconsistent results with vmalloc'ed
+    buffers.  If mmap is used on such architectures, turn off this
+    option, so that the DMA-coherent buffers are allocated and used
+    instead.
 
 This module supports multiple devices, autoprobe and hotplugging.
 
diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst
index 1fee5a4..7c2d375 100644
--- a/Documentation/sound/hd-audio/models.rst
+++ b/Documentation/sound/hd-audio/models.rst
@@ -263,6 +263,8 @@
     HP dock support
 mute-led-gpio
     Mute LED control via GPIO
+hp-mic-fix
+    Fix for headset mic pin on HP boxes
 
 STAC9200
 ========
diff --git a/Documentation/sound/soc/codec.rst b/Documentation/sound/soc/codec.rst
index f87612b..8a9737e 100644
--- a/Documentation/sound/soc/codec.rst
+++ b/Documentation/sound/soc/codec.rst
@@ -139,7 +139,7 @@
 ----------------
 The Dynamic Audio Power Management description describes the codec power
 components and their relationships and registers to the ASoC core.
-Please read dapm.txt for details of building the description.
+Please read dapm.rst for details of building the description.
 
 Please also see the examples in other codec drivers.
 
@@ -179,12 +179,12 @@
 
   static int wm8974_mute(struct snd_soc_dai *dai, int mute)
   {
-	struct snd_soc_codec *codec = dai->codec;
-	u16 mute_reg = snd_soc_read(codec, WM8974_DAC) & 0xffbf;
+	struct snd_soc_component *component = dai->component;
+	u16 mute_reg = snd_soc_component_read32(component, WM8974_DAC) & 0xffbf;
 
 	if (mute)
-		snd_soc_write(codec, WM8974_DAC, mute_reg | 0x40);
+		snd_soc_component_write(component, WM8974_DAC, mute_reg | 0x40);
 	else
-		snd_soc_write(codec, WM8974_DAC, mute_reg);
+		snd_soc_component_write(component, WM8974_DAC, mute_reg);
 	return 0;
   }
diff --git a/Documentation/sound/soc/platform.rst b/Documentation/sound/soc/platform.rst
index d557490..c1badea 100644
--- a/Documentation/sound/soc/platform.rst
+++ b/Documentation/sound/soc/platform.rst
@@ -23,30 +23,26 @@
   };
 
 The platform driver exports its DMA functionality via struct
-snd_soc_platform_driver:-
+snd_soc_component_driver:-
 ::
 
-  struct snd_soc_platform_driver {
-	char *name;
+  struct snd_soc_component_driver {
+	const char *name;
 
-	int (*probe)(struct platform_device *pdev);
-	int (*remove)(struct platform_device *pdev);
-	int (*suspend)(struct platform_device *pdev, struct snd_soc_cpu_dai *cpu_dai);
-	int (*resume)(struct platform_device *pdev, struct snd_soc_cpu_dai *cpu_dai);
+	...
+	int (*probe)(struct snd_soc_component *);
+	void (*remove)(struct snd_soc_component *);
+	int (*suspend)(struct snd_soc_component *);
+	int (*resume)(struct snd_soc_component *);
 
 	/* pcm creation and destruction */
-	int (*pcm_new)(struct snd_card *, struct snd_soc_codec_dai *, struct snd_pcm *);
+	int (*pcm_new)(struct snd_soc_pcm_runtime *);
 	void (*pcm_free)(struct snd_pcm *);
 
-	/*
-	 * For platform caused delay reporting.
-	 * Optional.
-	 */
-	snd_pcm_sframes_t (*delay)(struct snd_pcm_substream *,
-		struct snd_soc_dai *);
-
-	/* platform stream ops */
-	struct snd_pcm_ops *pcm_ops;
+	...
+	const struct snd_pcm_ops *ops;
+	const struct snd_compr_ops *compr_ops;
+	...
   };
 
 Please refer to the ALSA driver documentation for details of audio DMA.
@@ -66,7 +62,7 @@
 4. SYSCLK configuration
 5. Suspend and resume (optional)
 
-Please see codec.txt for a description of items 1 - 4.
+Please see codec.rst for a description of items 1 - 4.
 
 
 SoC DSP Drivers
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 17256f2..697ef8c 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -515,7 +515,7 @@
 
 Change the minimum size of the hugepage pool.
 
-See Documentation/vm/hugetlbpage.txt
+See Documentation/admin-guide/mm/hugetlbpage.rst
 
 ==============================================================
 
@@ -524,7 +524,7 @@
 Change the maximum size of the hugepage pool. The maximum is
 nr_hugepages + nr_overcommit_hugepages.
 
-See Documentation/vm/hugetlbpage.txt
+See Documentation/admin-guide/mm/hugetlbpage.rst
 
 ==============================================================
 
@@ -667,7 +667,7 @@
 
 The default value is 0.
 
-See Documentation/vm/overcommit-accounting and
+See Documentation/vm/overcommit-accounting.rst and
 mm/mmap.c::__vm_enough_memory() for more information.
 
 ==============================================================
diff --git a/Documentation/trace/coresight-cpu-debug.txt b/Documentation/trace/coresight-cpu-debug.txt
index 2b9b51c..89ab09e 100644
--- a/Documentation/trace/coresight-cpu-debug.txt
+++ b/Documentation/trace/coresight-cpu-debug.txt
@@ -177,11 +177,11 @@
 ARM external debug module:
 coresight-cpu-debug 850000.debug: CPU[0]:
 coresight-cpu-debug 850000.debug:  EDPRSR:  00000001 (Power:On DLK:Unlock)
-coresight-cpu-debug 850000.debug:  EDPCSR:  [<ffff00000808e9bc>] handle_IPI+0x174/0x1d8
+coresight-cpu-debug 850000.debug:  EDPCSR:  handle_IPI+0x174/0x1d8
 coresight-cpu-debug 850000.debug:  EDCIDSR: 00000000
 coresight-cpu-debug 850000.debug:  EDVIDSR: 90000000 (State:Non-secure Mode:EL1/0 Width:64bits VMID:0)
 coresight-cpu-debug 852000.debug: CPU[1]:
 coresight-cpu-debug 852000.debug:  EDPRSR:  00000001 (Power:On DLK:Unlock)
-coresight-cpu-debug 852000.debug:  EDPCSR:  [<ffff0000087fab34>] debug_notifier_call+0x23c/0x358
+coresight-cpu-debug 852000.debug:  EDPCSR:  debug_notifier_call+0x23c/0x358
 coresight-cpu-debug 852000.debug:  EDCIDSR: 00000000
 coresight-cpu-debug 852000.debug:  EDVIDSR: 90000000 (State:Non-secure Mode:EL1/0 Width:64bits VMID:0)
diff --git a/Documentation/trace/coresight.txt b/Documentation/trace/coresight.txt
index 6f0120c..1d74ad0 100644
--- a/Documentation/trace/coresight.txt
+++ b/Documentation/trace/coresight.txt
@@ -187,13 +187,19 @@
 specific to that component only.  "Implementation defined" customisations are
 expected to be accessed and controlled using those entries.
 
-Last but not least, "struct module *owner" is expected to be set to reflect
-the information carried in "THIS_MODULE".
 
 How to use the tracer modules
 -----------------------------
 
-Before trace collection can start, a coresight sink needs to be identify.
+There are two ways to use the Coresight framework: 1) using the perf cmd line
+tools and 2) interacting directly with the Coresight devices using the sysFS
+interface.  Preference is given to the former as using the sysFS interface
+requires a deep understanding of the Coresight HW.  The following sections
+provide details on using both methods.
+
+1) Using the sysFS interface:
+
+Before trace collection can start, a coresight sink needs to be identified.
 There is no limit on the amount of sinks (nor sources) that can be enabled at
 any given moment.  As a generic operation, all device pertaining to the sink
 class will have an "active" entry in sysfs:
@@ -298,42 +304,48 @@
 Instruction     0       0x8026B588      E8BD8000        true    LDM      sp!,{pc}
 Timestamp                                       Timestamp: 17107041535
 
-How to use the STM module
--------------------------
+2) Using perf framework:
 
-Using the System Trace Macrocell module is the same as the tracers - the only
-difference is that clients are driving the trace capture rather
-than the program flow through the code.
+Coresight tracers are represented using the Perf framework's Performance
+Monitoring Unit (PMU) abstraction.  As such the perf framework takes charge of
+controlling when tracing gets enabled based on when the process of interest is
+scheduled.  When configured in a system, Coresight PMUs will be listed when
+queried by the perf command line tool:
 
-As with any other CoreSight component, specifics about the STM tracer can be
-found in sysfs with more information on each entry being found in [1]:
+	linaro@linaro-nano:~$ ./perf list pmu
 
-root@genericarmv8:~# ls /sys/bus/coresight/devices/20100000.stm
-enable_source   hwevent_select  port_enable     subsystem       uevent
-hwevent_enable  mgmt            port_select     traceid
-root@genericarmv8:~#
+		List of pre-defined events (to be used in -e):
 
-Like any other source a sink needs to be identified and the STM enabled before
-being used:
+		cs_etm//                                    [Kernel PMU event]
 
-root@genericarmv8:~# echo 1 > /sys/bus/coresight/devices/20010000.etf/enable_sink
-root@genericarmv8:~# echo 1 > /sys/bus/coresight/devices/20100000.stm/enable_source
+	linaro@linaro-nano:~$
 
-From there user space applications can request and use channels using the devfs
-interface provided for that purpose by the generic STM API:
+Regardless of the number of tracers available in a system (usually equal to the
+amount of processor cores), the "cs_etm" PMU will be listed only once.
 
-root@genericarmv8:~# ls -l /dev/20100000.stm
-crw-------    1 root     root       10,  61 Jan  3 18:11 /dev/20100000.stm
-root@genericarmv8:~#
+A Coresight PMU works the same way as any other PMU, i.e the name of the PMU is
+listed along with configuration options within forward slashes '/'.  Since a
+Coresight system will typically have more than one sink, the name of the sink to
+work with needs to be specified as an event option.  Names for sink to choose
+from are listed in sysFS under ($SYSFS)/bus/coresight/devices:
 
-Details on how to use the generic STM API can be found here [2].
+	root@linaro-nano:~# ls /sys/bus/coresight/devices/
+		20010000.etf   20040000.funnel  20100000.stm  22040000.etm
+		22140000.etm  230c0000.funnel  23240000.etm 20030000.tpiu
+		20070000.etr     20120000.replicator  220c0000.funnel
+		23040000.etm  23140000.etm     23340000.etm
 
-[1]. Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
-[2]. Documentation/trace/stm.txt
+	root@linaro-nano:~# perf record -e cs_etm/@20070000.etr/u --per-thread program
 
+The syntax within the forward slashes '/' is important.  The '@' character
+tells the parser that a sink is about to be specified and that this is the sink
+to use for the trace session.
 
-Using perf tools
-----------------
+More information on the above and other example on how to use Coresight with
+the perf tools can be found in the "HOWTO.md" file of the openCSD gitHub
+repository [3].
+
+2.1) AutoFDO analysis using the perf tools:
 
 perf can be used to record and analyze trace of programs.
 
@@ -381,3 +393,38 @@
 	$ taskset -c 2 ./sort_autofdo
 	Bubble sorting array of 30000 elements
 	5806 ms
+
+
+How to use the STM module
+-------------------------
+
+Using the System Trace Macrocell module is the same as the tracers - the only
+difference is that clients are driving the trace capture rather
+than the program flow through the code.
+
+As with any other CoreSight component, specifics about the STM tracer can be
+found in sysfs with more information on each entry being found in [1]:
+
+root@genericarmv8:~# ls /sys/bus/coresight/devices/20100000.stm
+enable_source   hwevent_select  port_enable     subsystem       uevent
+hwevent_enable  mgmt            port_select     traceid
+root@genericarmv8:~#
+
+Like any other source a sink needs to be identified and the STM enabled before
+being used:
+
+root@genericarmv8:~# echo 1 > /sys/bus/coresight/devices/20010000.etf/enable_sink
+root@genericarmv8:~# echo 1 > /sys/bus/coresight/devices/20100000.stm/enable_source
+
+From there user space applications can request and use channels using the devfs
+interface provided for that purpose by the generic STM API:
+
+root@genericarmv8:~# ls -l /dev/20100000.stm
+crw-------    1 root     root       10,  61 Jan  3 18:11 /dev/20100000.stm
+root@genericarmv8:~#
+
+Details on how to use the generic STM API can be found here [2].
+
+[1]. Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
+[2]. Documentation/trace/stm.txt
+[3]. https://github.com/Linaro/perf-opencsd
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index a5ea2cb..1afae55 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -338,10 +338,14 @@
 
 The syntax for event triggers is roughly based on the syntax for
 set_ftrace_filter 'ftrace filter commands' (see the 'Filter commands'
-section of Documentation/trace/ftrace.txt), but there are major
+section of Documentation/trace/ftrace.rst), but there are major
 differences and the implementation isn't currently tied to it in any
 way, so beware about making generalizations between the two.
 
+Note: Writing into trace_marker (See Documentation/trace/ftrace.rst)
+     can also enable triggers that are written into
+     /sys/kernel/tracing/events/ftrace/print/trigger
+
 6.1 Expression syntax
 ---------------------
 
diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst
index 998a60a..00283b6 100644
--- a/Documentation/trace/ftrace-uses.rst
+++ b/Documentation/trace/ftrace-uses.rst
@@ -12,7 +12,7 @@
 Introduction
 ============
 
-The ftrace infrastructure was originially created to attach callbacks to the
+The ftrace infrastructure was originally created to attach callbacks to the
 beginning of functions in order to record and trace the flow of the kernel.
 But callbacks to the start of a function can have other use cases. Either
 for live kernel patching, or for security monitoring. This document describes
@@ -30,7 +30,7 @@
   This requires extra care to what can be done inside a callback. A callback
   can be called outside the protective scope of RCU.
 
-The ftrace infrastructure has some protections agains recursions and RCU
+The ftrace infrastructure has some protections against recursions and RCU
 but one must still be very careful how they use the callbacks.
 
 
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index 67d9c38..a20d349 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -224,6 +224,8 @@
 	has a side effect of enabling or disabling specific functions
 	to be traced. Echoing names of functions into this file
 	will limit the trace to only those functions.
+	This influences the tracers "function" and "function_graph"
+	and thus also function profiling (see "function_profile_enabled").
 
 	The functions listed in "available_filter_functions" are what
 	can be written into this file.
@@ -265,6 +267,8 @@
 	Functions listed in this file will cause the function graph
 	tracer to only trace these functions and the functions that
 	they call. (See the section "dynamic ftrace" for more details).
+	Note, set_ftrace_filter and set_ftrace_notrace still affects
+	what functions are being traced.
 
   set_graph_notrace:
 
@@ -277,7 +281,8 @@
 
 	This lists the functions that ftrace has processed and can trace.
 	These are the function names that you can pass to
-	"set_ftrace_filter" or "set_ftrace_notrace".
+	"set_ftrace_filter", "set_ftrace_notrace",
+	"set_graph_function", or "set_graph_notrace".
 	(See the section "dynamic ftrace" below for more details.)
 
   dyn_ftrace_total_info:
@@ -507,6 +512,11 @@
 
 		trace_fd = open("trace_marker", WR_ONLY);
 
+	Note: Writing into the trace_marker file can also initiate triggers
+	      that are written into /sys/kernel/tracing/events/ftrace/print/trigger
+	      See "Event triggers" in Documentation/trace/events.rst and an
+              example in Documentation/trace/histogram.rst (Section 3.)
+
   trace_marker_raw:
 
 	This is similar to trace_marker above, but is meant for for binary data
diff --git a/Documentation/trace/histogram.txt b/Documentation/trace/histogram.txt
index 6e05510..b13771c 100644
--- a/Documentation/trace/histogram.txt
+++ b/Documentation/trace/histogram.txt
@@ -1604,7 +1604,6 @@
         Entries: 7
         Dropped: 0
 
-
 2.2 Inter-event hist triggers
 -----------------------------
 
@@ -1993,3 +1992,547 @@
           Hits: 12970
           Entries: 2
           Dropped: 0
+
+3. User space creating a trigger
+--------------------------------
+
+Writing into /sys/kernel/tracing/trace_marker writes into the ftrace
+ring buffer. This can also act like an event, by writing into the trigger
+file located in /sys/kernel/tracing/events/ftrace/print/
+
+Modifying cyclictest to write into the trace_marker file before it sleeps
+and after it wakes up, something like this:
+
+static void traceputs(char *str)
+{
+	/* tracemark_fd is the trace_marker file descriptor */
+	if (tracemark_fd < 0)
+		return;
+	/* write the tracemark message */
+	write(tracemark_fd, str, strlen(str));
+}
+
+And later add something like:
+
+	traceputs("start");
+	clock_nanosleep(...);
+	traceputs("end");
+
+We can make a histogram from this:
+
+ # cd /sys/kernel/tracing
+ # echo 'latency u64 lat' > synthetic_events
+ # echo 'hist:keys=common_pid:ts0=common_timestamp.usecs if buf == "start"' > events/ftrace/print/trigger
+ # echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(ftrace.print).latency($lat) if buf == "end"' >> events/ftrace/print/trigger
+ # echo 'hist:keys=lat,common_pid:sort=lat' > events/synthetic/latency/trigger
+
+The above created a synthetic event called "latency" and two histograms
+against the trace_marker, one gets triggered when "start" is written into the
+trace_marker file and the other when "end" is written. If the pids match, then
+it will call the "latency" synthetic event with the calculated latency as its
+parameter. Finally, a histogram is added to the latency synthetic event to
+record the calculated latency along with the pid.
+
+Now running cyclictest with:
+
+ # ./cyclictest -p80 -d0 -i250 -n -a -t --tracemark -b 1000
+
+ -p80  : run threads at priority 80
+ -d0   : have all threads run at the same interval
+ -i250 : start the interval at 250 microseconds (all threads will do this)
+ -n    : sleep with nanosleep
+ -a    : affine all threads to a separate CPU
+ -t    : one thread per available CPU
+ --tracemark : enable trace mark writing
+ -b 1000 : stop if any latency is greater than 1000 microseconds
+
+Note, the -b 1000 is used just to make --tracemark available.
+
+Then we can see the histogram created by this with:
+
+ # cat events/synthetic/latency/hist
+# event histogram
+#
+# trigger info: hist:keys=lat,common_pid:vals=hitcount:sort=lat:size=2048 [active]
+#
+
+{ lat:        107, common_pid:       2039 } hitcount:          1
+{ lat:        122, common_pid:       2041 } hitcount:          1
+{ lat:        166, common_pid:       2039 } hitcount:          1
+{ lat:        174, common_pid:       2039 } hitcount:          1
+{ lat:        194, common_pid:       2041 } hitcount:          1
+{ lat:        196, common_pid:       2036 } hitcount:          1
+{ lat:        197, common_pid:       2038 } hitcount:          1
+{ lat:        198, common_pid:       2039 } hitcount:          1
+{ lat:        199, common_pid:       2039 } hitcount:          1
+{ lat:        200, common_pid:       2041 } hitcount:          1
+{ lat:        201, common_pid:       2039 } hitcount:          2
+{ lat:        202, common_pid:       2038 } hitcount:          1
+{ lat:        202, common_pid:       2043 } hitcount:          1
+{ lat:        203, common_pid:       2039 } hitcount:          1
+{ lat:        203, common_pid:       2036 } hitcount:          1
+{ lat:        203, common_pid:       2041 } hitcount:          1
+{ lat:        206, common_pid:       2038 } hitcount:          2
+{ lat:        207, common_pid:       2039 } hitcount:          1
+{ lat:        207, common_pid:       2036 } hitcount:          1
+{ lat:        208, common_pid:       2040 } hitcount:          1
+{ lat:        209, common_pid:       2043 } hitcount:          1
+{ lat:        210, common_pid:       2039 } hitcount:          1
+{ lat:        211, common_pid:       2039 } hitcount:          4
+{ lat:        212, common_pid:       2043 } hitcount:          1
+{ lat:        212, common_pid:       2039 } hitcount:          2
+{ lat:        213, common_pid:       2039 } hitcount:          1
+{ lat:        214, common_pid:       2038 } hitcount:          1
+{ lat:        214, common_pid:       2039 } hitcount:          2
+{ lat:        214, common_pid:       2042 } hitcount:          1
+{ lat:        215, common_pid:       2039 } hitcount:          1
+{ lat:        217, common_pid:       2036 } hitcount:          1
+{ lat:        217, common_pid:       2040 } hitcount:          1
+{ lat:        217, common_pid:       2039 } hitcount:          1
+{ lat:        218, common_pid:       2039 } hitcount:          6
+{ lat:        219, common_pid:       2039 } hitcount:          9
+{ lat:        220, common_pid:       2039 } hitcount:         11
+{ lat:        221, common_pid:       2039 } hitcount:          5
+{ lat:        221, common_pid:       2042 } hitcount:          1
+{ lat:        222, common_pid:       2039 } hitcount:          7
+{ lat:        223, common_pid:       2036 } hitcount:          1
+{ lat:        223, common_pid:       2039 } hitcount:          3
+{ lat:        224, common_pid:       2039 } hitcount:          4
+{ lat:        224, common_pid:       2037 } hitcount:          1
+{ lat:        224, common_pid:       2036 } hitcount:          2
+{ lat:        225, common_pid:       2039 } hitcount:          5
+{ lat:        225, common_pid:       2042 } hitcount:          1
+{ lat:        226, common_pid:       2039 } hitcount:          7
+{ lat:        226, common_pid:       2036 } hitcount:          4
+{ lat:        227, common_pid:       2039 } hitcount:          6
+{ lat:        227, common_pid:       2036 } hitcount:         12
+{ lat:        227, common_pid:       2043 } hitcount:          1
+{ lat:        228, common_pid:       2039 } hitcount:          7
+{ lat:        228, common_pid:       2036 } hitcount:         14
+{ lat:        229, common_pid:       2039 } hitcount:          9
+{ lat:        229, common_pid:       2036 } hitcount:          8
+{ lat:        229, common_pid:       2038 } hitcount:          1
+{ lat:        230, common_pid:       2039 } hitcount:         11
+{ lat:        230, common_pid:       2036 } hitcount:          6
+{ lat:        230, common_pid:       2043 } hitcount:          1
+{ lat:        230, common_pid:       2042 } hitcount:          2
+{ lat:        231, common_pid:       2041 } hitcount:          1
+{ lat:        231, common_pid:       2036 } hitcount:          6
+{ lat:        231, common_pid:       2043 } hitcount:          1
+{ lat:        231, common_pid:       2039 } hitcount:          8
+{ lat:        232, common_pid:       2037 } hitcount:          1
+{ lat:        232, common_pid:       2039 } hitcount:          6
+{ lat:        232, common_pid:       2040 } hitcount:          2
+{ lat:        232, common_pid:       2036 } hitcount:          5
+{ lat:        232, common_pid:       2043 } hitcount:          1
+{ lat:        233, common_pid:       2036 } hitcount:          5
+{ lat:        233, common_pid:       2039 } hitcount:         11
+{ lat:        234, common_pid:       2039 } hitcount:          4
+{ lat:        234, common_pid:       2038 } hitcount:          2
+{ lat:        234, common_pid:       2043 } hitcount:          2
+{ lat:        234, common_pid:       2036 } hitcount:         11
+{ lat:        234, common_pid:       2040 } hitcount:          1
+{ lat:        235, common_pid:       2037 } hitcount:          2
+{ lat:        235, common_pid:       2036 } hitcount:          8
+{ lat:        235, common_pid:       2043 } hitcount:          2
+{ lat:        235, common_pid:       2039 } hitcount:          5
+{ lat:        235, common_pid:       2042 } hitcount:          2
+{ lat:        235, common_pid:       2040 } hitcount:          4
+{ lat:        235, common_pid:       2041 } hitcount:          1
+{ lat:        236, common_pid:       2036 } hitcount:          7
+{ lat:        236, common_pid:       2037 } hitcount:          1
+{ lat:        236, common_pid:       2041 } hitcount:          5
+{ lat:        236, common_pid:       2039 } hitcount:          3
+{ lat:        236, common_pid:       2043 } hitcount:          9
+{ lat:        236, common_pid:       2040 } hitcount:          7
+{ lat:        237, common_pid:       2037 } hitcount:          1
+{ lat:        237, common_pid:       2040 } hitcount:          1
+{ lat:        237, common_pid:       2036 } hitcount:          9
+{ lat:        237, common_pid:       2039 } hitcount:          3
+{ lat:        237, common_pid:       2043 } hitcount:          8
+{ lat:        237, common_pid:       2042 } hitcount:          2
+{ lat:        237, common_pid:       2041 } hitcount:          2
+{ lat:        238, common_pid:       2043 } hitcount:         10
+{ lat:        238, common_pid:       2040 } hitcount:          1
+{ lat:        238, common_pid:       2037 } hitcount:          9
+{ lat:        238, common_pid:       2038 } hitcount:          1
+{ lat:        238, common_pid:       2039 } hitcount:          1
+{ lat:        238, common_pid:       2042 } hitcount:          3
+{ lat:        238, common_pid:       2036 } hitcount:          7
+{ lat:        239, common_pid:       2041 } hitcount:          1
+{ lat:        239, common_pid:       2043 } hitcount:         11
+{ lat:        239, common_pid:       2037 } hitcount:         11
+{ lat:        239, common_pid:       2038 } hitcount:          6
+{ lat:        239, common_pid:       2036 } hitcount:          7
+{ lat:        239, common_pid:       2040 } hitcount:          1
+{ lat:        239, common_pid:       2042 } hitcount:          9
+{ lat:        240, common_pid:       2037 } hitcount:         29
+{ lat:        240, common_pid:       2043 } hitcount:         15
+{ lat:        240, common_pid:       2040 } hitcount:         44
+{ lat:        240, common_pid:       2039 } hitcount:          1
+{ lat:        240, common_pid:       2041 } hitcount:          2
+{ lat:        240, common_pid:       2038 } hitcount:          1
+{ lat:        240, common_pid:       2036 } hitcount:         10
+{ lat:        240, common_pid:       2042 } hitcount:         13
+{ lat:        241, common_pid:       2036 } hitcount:         21
+{ lat:        241, common_pid:       2041 } hitcount:         36
+{ lat:        241, common_pid:       2037 } hitcount:         34
+{ lat:        241, common_pid:       2042 } hitcount:         14
+{ lat:        241, common_pid:       2040 } hitcount:         94
+{ lat:        241, common_pid:       2039 } hitcount:         12
+{ lat:        241, common_pid:       2038 } hitcount:          2
+{ lat:        241, common_pid:       2043 } hitcount:         28
+{ lat:        242, common_pid:       2040 } hitcount:        109
+{ lat:        242, common_pid:       2041 } hitcount:        506
+{ lat:        242, common_pid:       2039 } hitcount:        155
+{ lat:        242, common_pid:       2042 } hitcount:         21
+{ lat:        242, common_pid:       2037 } hitcount:         52
+{ lat:        242, common_pid:       2043 } hitcount:         21
+{ lat:        242, common_pid:       2036 } hitcount:         16
+{ lat:        242, common_pid:       2038 } hitcount:        156
+{ lat:        243, common_pid:       2037 } hitcount:         46
+{ lat:        243, common_pid:       2039 } hitcount:         40
+{ lat:        243, common_pid:       2042 } hitcount:        119
+{ lat:        243, common_pid:       2041 } hitcount:        611
+{ lat:        243, common_pid:       2036 } hitcount:         69
+{ lat:        243, common_pid:       2038 } hitcount:        784
+{ lat:        243, common_pid:       2040 } hitcount:        323
+{ lat:        243, common_pid:       2043 } hitcount:         14
+{ lat:        244, common_pid:       2043 } hitcount:         35
+{ lat:        244, common_pid:       2042 } hitcount:        305
+{ lat:        244, common_pid:       2039 } hitcount:          8
+{ lat:        244, common_pid:       2040 } hitcount:       4515
+{ lat:        244, common_pid:       2038 } hitcount:        371
+{ lat:        244, common_pid:       2037 } hitcount:         31
+{ lat:        244, common_pid:       2036 } hitcount:        114
+{ lat:        244, common_pid:       2041 } hitcount:       3396
+{ lat:        245, common_pid:       2036 } hitcount:        700
+{ lat:        245, common_pid:       2041 } hitcount:       2772
+{ lat:        245, common_pid:       2037 } hitcount:        268
+{ lat:        245, common_pid:       2039 } hitcount:        472
+{ lat:        245, common_pid:       2038 } hitcount:       2758
+{ lat:        245, common_pid:       2042 } hitcount:       3833
+{ lat:        245, common_pid:       2040 } hitcount:       3105
+{ lat:        245, common_pid:       2043 } hitcount:        645
+{ lat:        246, common_pid:       2038 } hitcount:       3451
+{ lat:        246, common_pid:       2041 } hitcount:        142
+{ lat:        246, common_pid:       2037 } hitcount:       5101
+{ lat:        246, common_pid:       2040 } hitcount:         68
+{ lat:        246, common_pid:       2043 } hitcount:       5099
+{ lat:        246, common_pid:       2039 } hitcount:       5608
+{ lat:        246, common_pid:       2042 } hitcount:       3723
+{ lat:        246, common_pid:       2036 } hitcount:       4738
+{ lat:        247, common_pid:       2042 } hitcount:        312
+{ lat:        247, common_pid:       2043 } hitcount:       2385
+{ lat:        247, common_pid:       2041 } hitcount:        452
+{ lat:        247, common_pid:       2038 } hitcount:        792
+{ lat:        247, common_pid:       2040 } hitcount:         78
+{ lat:        247, common_pid:       2036 } hitcount:       2375
+{ lat:        247, common_pid:       2039 } hitcount:       1834
+{ lat:        247, common_pid:       2037 } hitcount:       2655
+{ lat:        248, common_pid:       2037 } hitcount:         36
+{ lat:        248, common_pid:       2042 } hitcount:         11
+{ lat:        248, common_pid:       2038 } hitcount:        122
+{ lat:        248, common_pid:       2036 } hitcount:        135
+{ lat:        248, common_pid:       2039 } hitcount:         26
+{ lat:        248, common_pid:       2041 } hitcount:        503
+{ lat:        248, common_pid:       2043 } hitcount:         66
+{ lat:        248, common_pid:       2040 } hitcount:         46
+{ lat:        249, common_pid:       2037 } hitcount:         29
+{ lat:        249, common_pid:       2038 } hitcount:          1
+{ lat:        249, common_pid:       2043 } hitcount:         29
+{ lat:        249, common_pid:       2039 } hitcount:          8
+{ lat:        249, common_pid:       2042 } hitcount:         56
+{ lat:        249, common_pid:       2040 } hitcount:         27
+{ lat:        249, common_pid:       2041 } hitcount:         11
+{ lat:        249, common_pid:       2036 } hitcount:         27
+{ lat:        250, common_pid:       2038 } hitcount:          1
+{ lat:        250, common_pid:       2036 } hitcount:         30
+{ lat:        250, common_pid:       2040 } hitcount:         19
+{ lat:        250, common_pid:       2043 } hitcount:         22
+{ lat:        250, common_pid:       2042 } hitcount:         20
+{ lat:        250, common_pid:       2041 } hitcount:          1
+{ lat:        250, common_pid:       2039 } hitcount:          6
+{ lat:        250, common_pid:       2037 } hitcount:         48
+{ lat:        251, common_pid:       2037 } hitcount:         43
+{ lat:        251, common_pid:       2039 } hitcount:          1
+{ lat:        251, common_pid:       2036 } hitcount:         12
+{ lat:        251, common_pid:       2042 } hitcount:          2
+{ lat:        251, common_pid:       2041 } hitcount:          1
+{ lat:        251, common_pid:       2043 } hitcount:         15
+{ lat:        251, common_pid:       2040 } hitcount:          3
+{ lat:        252, common_pid:       2040 } hitcount:          1
+{ lat:        252, common_pid:       2036 } hitcount:         12
+{ lat:        252, common_pid:       2037 } hitcount:         21
+{ lat:        252, common_pid:       2043 } hitcount:         14
+{ lat:        253, common_pid:       2037 } hitcount:         21
+{ lat:        253, common_pid:       2039 } hitcount:          2
+{ lat:        253, common_pid:       2036 } hitcount:          9
+{ lat:        253, common_pid:       2043 } hitcount:          6
+{ lat:        253, common_pid:       2040 } hitcount:          1
+{ lat:        254, common_pid:       2036 } hitcount:          8
+{ lat:        254, common_pid:       2043 } hitcount:          3
+{ lat:        254, common_pid:       2041 } hitcount:          1
+{ lat:        254, common_pid:       2042 } hitcount:          1
+{ lat:        254, common_pid:       2039 } hitcount:          1
+{ lat:        254, common_pid:       2037 } hitcount:         12
+{ lat:        255, common_pid:       2043 } hitcount:          1
+{ lat:        255, common_pid:       2037 } hitcount:          2
+{ lat:        255, common_pid:       2036 } hitcount:          2
+{ lat:        255, common_pid:       2039 } hitcount:          8
+{ lat:        256, common_pid:       2043 } hitcount:          1
+{ lat:        256, common_pid:       2036 } hitcount:          4
+{ lat:        256, common_pid:       2039 } hitcount:          6
+{ lat:        257, common_pid:       2039 } hitcount:          5
+{ lat:        257, common_pid:       2036 } hitcount:          4
+{ lat:        258, common_pid:       2039 } hitcount:          5
+{ lat:        258, common_pid:       2036 } hitcount:          2
+{ lat:        259, common_pid:       2036 } hitcount:          7
+{ lat:        259, common_pid:       2039 } hitcount:          7
+{ lat:        260, common_pid:       2036 } hitcount:          8
+{ lat:        260, common_pid:       2039 } hitcount:          6
+{ lat:        261, common_pid:       2036 } hitcount:          5
+{ lat:        261, common_pid:       2039 } hitcount:          7
+{ lat:        262, common_pid:       2039 } hitcount:          5
+{ lat:        262, common_pid:       2036 } hitcount:          5
+{ lat:        263, common_pid:       2039 } hitcount:          7
+{ lat:        263, common_pid:       2036 } hitcount:          7
+{ lat:        264, common_pid:       2039 } hitcount:          9
+{ lat:        264, common_pid:       2036 } hitcount:          9
+{ lat:        265, common_pid:       2036 } hitcount:          5
+{ lat:        265, common_pid:       2039 } hitcount:          1
+{ lat:        266, common_pid:       2036 } hitcount:          1
+{ lat:        266, common_pid:       2039 } hitcount:          3
+{ lat:        267, common_pid:       2036 } hitcount:          1
+{ lat:        267, common_pid:       2039 } hitcount:          3
+{ lat:        268, common_pid:       2036 } hitcount:          1
+{ lat:        268, common_pid:       2039 } hitcount:          6
+{ lat:        269, common_pid:       2036 } hitcount:          1
+{ lat:        269, common_pid:       2043 } hitcount:          1
+{ lat:        269, common_pid:       2039 } hitcount:          2
+{ lat:        270, common_pid:       2040 } hitcount:          1
+{ lat:        270, common_pid:       2039 } hitcount:          6
+{ lat:        271, common_pid:       2041 } hitcount:          1
+{ lat:        271, common_pid:       2039 } hitcount:          5
+{ lat:        272, common_pid:       2039 } hitcount:         10
+{ lat:        273, common_pid:       2039 } hitcount:          8
+{ lat:        274, common_pid:       2039 } hitcount:          2
+{ lat:        275, common_pid:       2039 } hitcount:          1
+{ lat:        276, common_pid:       2039 } hitcount:          2
+{ lat:        276, common_pid:       2037 } hitcount:          1
+{ lat:        276, common_pid:       2038 } hitcount:          1
+{ lat:        277, common_pid:       2039 } hitcount:          1
+{ lat:        277, common_pid:       2042 } hitcount:          1
+{ lat:        278, common_pid:       2039 } hitcount:          1
+{ lat:        279, common_pid:       2039 } hitcount:          4
+{ lat:        279, common_pid:       2043 } hitcount:          1
+{ lat:        280, common_pid:       2039 } hitcount:          3
+{ lat:        283, common_pid:       2036 } hitcount:          2
+{ lat:        284, common_pid:       2039 } hitcount:          1
+{ lat:        284, common_pid:       2043 } hitcount:          1
+{ lat:        288, common_pid:       2039 } hitcount:          1
+{ lat:        289, common_pid:       2039 } hitcount:          1
+{ lat:        300, common_pid:       2039 } hitcount:          1
+{ lat:        384, common_pid:       2039 } hitcount:          1
+
+Totals:
+    Hits: 67625
+    Entries: 278
+    Dropped: 0
+
+Note, the writes are around the sleep, so ideally they will all be of 250
+microseconds. If you are wondering how there are several that are under
+250 microseconds, that is because the way cyclictest works, is if one
+iteration comes in late, the next one will set the timer to wake up less that
+250. That is, if an iteration came in 50 microseconds late, the next wake up
+will be at 200 microseconds.
+
+But this could easily be done in userspace. To make this even more
+interesting, we can mix the histogram between events that happened in the
+kernel with trace_marker.
+
+ # cd /sys/kernel/tracing
+ # echo 'latency u64 lat' > synthetic_events
+ # echo 'hist:keys=pid:ts0=common_timestamp.usecs' > events/sched/sched_waking/trigger
+ # echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).latency($lat) if buf == "end"' > events/ftrace/print/trigger
+ # echo 'hist:keys=lat,common_pid:sort=lat' > events/synthetic/latency/trigger
+
+The difference this time is that instead of using the trace_marker to start
+the latency, the sched_waking event is used, matching the common_pid for the
+trace_marker write with the pid that is being woken by sched_waking.
+
+After running cyclictest again with the same parameters, we now have:
+
+ # cat events/synthetic/latency/hist
+# event histogram
+#
+# trigger info: hist:keys=lat,common_pid:vals=hitcount:sort=lat:size=2048 [active]
+#
+
+{ lat:          7, common_pid:       2302 } hitcount:        640
+{ lat:          7, common_pid:       2299 } hitcount:         42
+{ lat:          7, common_pid:       2303 } hitcount:         18
+{ lat:          7, common_pid:       2305 } hitcount:        166
+{ lat:          7, common_pid:       2306 } hitcount:          1
+{ lat:          7, common_pid:       2301 } hitcount:         91
+{ lat:          7, common_pid:       2300 } hitcount:         17
+{ lat:          8, common_pid:       2303 } hitcount:       8296
+{ lat:          8, common_pid:       2304 } hitcount:       6864
+{ lat:          8, common_pid:       2305 } hitcount:       9464
+{ lat:          8, common_pid:       2301 } hitcount:       9213
+{ lat:          8, common_pid:       2306 } hitcount:       6246
+{ lat:          8, common_pid:       2302 } hitcount:       8797
+{ lat:          8, common_pid:       2299 } hitcount:       8771
+{ lat:          8, common_pid:       2300 } hitcount:       8119
+{ lat:          9, common_pid:       2305 } hitcount:       1519
+{ lat:          9, common_pid:       2299 } hitcount:       2346
+{ lat:          9, common_pid:       2303 } hitcount:       2841
+{ lat:          9, common_pid:       2301 } hitcount:       1846
+{ lat:          9, common_pid:       2304 } hitcount:       3861
+{ lat:          9, common_pid:       2302 } hitcount:       1210
+{ lat:          9, common_pid:       2300 } hitcount:       2762
+{ lat:          9, common_pid:       2306 } hitcount:       4247
+{ lat:         10, common_pid:       2299 } hitcount:         16
+{ lat:         10, common_pid:       2306 } hitcount:        333
+{ lat:         10, common_pid:       2303 } hitcount:         16
+{ lat:         10, common_pid:       2304 } hitcount:        168
+{ lat:         10, common_pid:       2302 } hitcount:        240
+{ lat:         10, common_pid:       2301 } hitcount:         28
+{ lat:         10, common_pid:       2300 } hitcount:         95
+{ lat:         10, common_pid:       2305 } hitcount:         18
+{ lat:         11, common_pid:       2303 } hitcount:          5
+{ lat:         11, common_pid:       2305 } hitcount:          8
+{ lat:         11, common_pid:       2306 } hitcount:        221
+{ lat:         11, common_pid:       2302 } hitcount:         76
+{ lat:         11, common_pid:       2304 } hitcount:         26
+{ lat:         11, common_pid:       2300 } hitcount:        125
+{ lat:         11, common_pid:       2299 } hitcount:          2
+{ lat:         12, common_pid:       2305 } hitcount:          3
+{ lat:         12, common_pid:       2300 } hitcount:          6
+{ lat:         12, common_pid:       2306 } hitcount:         90
+{ lat:         12, common_pid:       2302 } hitcount:          4
+{ lat:         12, common_pid:       2303 } hitcount:          1
+{ lat:         12, common_pid:       2304 } hitcount:        122
+{ lat:         13, common_pid:       2300 } hitcount:         12
+{ lat:         13, common_pid:       2301 } hitcount:          1
+{ lat:         13, common_pid:       2306 } hitcount:         32
+{ lat:         13, common_pid:       2302 } hitcount:          5
+{ lat:         13, common_pid:       2305 } hitcount:          1
+{ lat:         13, common_pid:       2303 } hitcount:          1
+{ lat:         13, common_pid:       2304 } hitcount:         61
+{ lat:         14, common_pid:       2303 } hitcount:          4
+{ lat:         14, common_pid:       2306 } hitcount:          5
+{ lat:         14, common_pid:       2305 } hitcount:          4
+{ lat:         14, common_pid:       2304 } hitcount:         62
+{ lat:         14, common_pid:       2302 } hitcount:         19
+{ lat:         14, common_pid:       2300 } hitcount:         33
+{ lat:         14, common_pid:       2299 } hitcount:          1
+{ lat:         14, common_pid:       2301 } hitcount:          4
+{ lat:         15, common_pid:       2305 } hitcount:          1
+{ lat:         15, common_pid:       2302 } hitcount:         25
+{ lat:         15, common_pid:       2300 } hitcount:         11
+{ lat:         15, common_pid:       2299 } hitcount:          5
+{ lat:         15, common_pid:       2301 } hitcount:          1
+{ lat:         15, common_pid:       2304 } hitcount:          8
+{ lat:         15, common_pid:       2303 } hitcount:          1
+{ lat:         15, common_pid:       2306 } hitcount:          6
+{ lat:         16, common_pid:       2302 } hitcount:         31
+{ lat:         16, common_pid:       2306 } hitcount:          3
+{ lat:         16, common_pid:       2300 } hitcount:          5
+{ lat:         17, common_pid:       2302 } hitcount:          6
+{ lat:         17, common_pid:       2303 } hitcount:          1
+{ lat:         18, common_pid:       2304 } hitcount:          1
+{ lat:         18, common_pid:       2302 } hitcount:          8
+{ lat:         18, common_pid:       2299 } hitcount:          1
+{ lat:         18, common_pid:       2301 } hitcount:          1
+{ lat:         19, common_pid:       2303 } hitcount:          4
+{ lat:         19, common_pid:       2304 } hitcount:          5
+{ lat:         19, common_pid:       2302 } hitcount:          4
+{ lat:         19, common_pid:       2299 } hitcount:          3
+{ lat:         19, common_pid:       2306 } hitcount:          1
+{ lat:         19, common_pid:       2300 } hitcount:          4
+{ lat:         19, common_pid:       2305 } hitcount:          5
+{ lat:         20, common_pid:       2299 } hitcount:          2
+{ lat:         20, common_pid:       2302 } hitcount:          3
+{ lat:         20, common_pid:       2305 } hitcount:          1
+{ lat:         20, common_pid:       2300 } hitcount:          2
+{ lat:         20, common_pid:       2301 } hitcount:          2
+{ lat:         20, common_pid:       2303 } hitcount:          3
+{ lat:         21, common_pid:       2305 } hitcount:          1
+{ lat:         21, common_pid:       2299 } hitcount:          5
+{ lat:         21, common_pid:       2303 } hitcount:          4
+{ lat:         21, common_pid:       2302 } hitcount:          7
+{ lat:         21, common_pid:       2300 } hitcount:          1
+{ lat:         21, common_pid:       2301 } hitcount:          5
+{ lat:         21, common_pid:       2304 } hitcount:          2
+{ lat:         22, common_pid:       2302 } hitcount:          5
+{ lat:         22, common_pid:       2303 } hitcount:          1
+{ lat:         22, common_pid:       2306 } hitcount:          3
+{ lat:         22, common_pid:       2301 } hitcount:          2
+{ lat:         22, common_pid:       2300 } hitcount:          1
+{ lat:         22, common_pid:       2299 } hitcount:          1
+{ lat:         22, common_pid:       2305 } hitcount:          1
+{ lat:         22, common_pid:       2304 } hitcount:          1
+{ lat:         23, common_pid:       2299 } hitcount:          1
+{ lat:         23, common_pid:       2306 } hitcount:          2
+{ lat:         23, common_pid:       2302 } hitcount:          6
+{ lat:         24, common_pid:       2302 } hitcount:          3
+{ lat:         24, common_pid:       2300 } hitcount:          1
+{ lat:         24, common_pid:       2306 } hitcount:          2
+{ lat:         24, common_pid:       2305 } hitcount:          1
+{ lat:         24, common_pid:       2299 } hitcount:          1
+{ lat:         25, common_pid:       2300 } hitcount:          1
+{ lat:         25, common_pid:       2302 } hitcount:          4
+{ lat:         26, common_pid:       2302 } hitcount:          2
+{ lat:         27, common_pid:       2305 } hitcount:          1
+{ lat:         27, common_pid:       2300 } hitcount:          1
+{ lat:         27, common_pid:       2302 } hitcount:          3
+{ lat:         28, common_pid:       2306 } hitcount:          1
+{ lat:         28, common_pid:       2302 } hitcount:          4
+{ lat:         29, common_pid:       2302 } hitcount:          1
+{ lat:         29, common_pid:       2300 } hitcount:          2
+{ lat:         29, common_pid:       2306 } hitcount:          1
+{ lat:         29, common_pid:       2304 } hitcount:          1
+{ lat:         30, common_pid:       2302 } hitcount:          4
+{ lat:         31, common_pid:       2302 } hitcount:          6
+{ lat:         32, common_pid:       2302 } hitcount:          1
+{ lat:         33, common_pid:       2299 } hitcount:          1
+{ lat:         33, common_pid:       2302 } hitcount:          3
+{ lat:         34, common_pid:       2302 } hitcount:          2
+{ lat:         35, common_pid:       2302 } hitcount:          1
+{ lat:         35, common_pid:       2304 } hitcount:          1
+{ lat:         36, common_pid:       2302 } hitcount:          4
+{ lat:         37, common_pid:       2302 } hitcount:          6
+{ lat:         38, common_pid:       2302 } hitcount:          2
+{ lat:         39, common_pid:       2302 } hitcount:          2
+{ lat:         39, common_pid:       2304 } hitcount:          1
+{ lat:         40, common_pid:       2304 } hitcount:          2
+{ lat:         40, common_pid:       2302 } hitcount:          5
+{ lat:         41, common_pid:       2304 } hitcount:          1
+{ lat:         41, common_pid:       2302 } hitcount:          8
+{ lat:         42, common_pid:       2302 } hitcount:          6
+{ lat:         42, common_pid:       2304 } hitcount:          1
+{ lat:         43, common_pid:       2302 } hitcount:          3
+{ lat:         43, common_pid:       2304 } hitcount:          4
+{ lat:         44, common_pid:       2302 } hitcount:          6
+{ lat:         45, common_pid:       2302 } hitcount:          5
+{ lat:         46, common_pid:       2302 } hitcount:          5
+{ lat:         47, common_pid:       2302 } hitcount:          7
+{ lat:         48, common_pid:       2301 } hitcount:          1
+{ lat:         48, common_pid:       2302 } hitcount:          9
+{ lat:         49, common_pid:       2302 } hitcount:          3
+{ lat:         50, common_pid:       2302 } hitcount:          1
+{ lat:         50, common_pid:       2301 } hitcount:          1
+{ lat:         51, common_pid:       2302 } hitcount:          2
+{ lat:         51, common_pid:       2301 } hitcount:          1
+{ lat:         61, common_pid:       2302 } hitcount:          1
+{ lat:        110, common_pid:       2302 } hitcount:          1
+
+Totals:
+    Hits: 89565
+    Entries: 158
+    Dropped: 0
+
+This doesn't tell us any information about how late cyclictest may have
+woken up, but it does show us a nice histogram of how long it took from
+the time that cyclictest was woken to the time it made it into user space.
diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt
index 0a0930a..921739d 100644
--- a/Documentation/translations/ko_KR/memory-barriers.txt
+++ b/Documentation/translations/ko_KR/memory-barriers.txt
@@ -36,6 +36,9 @@
 부분도 있고, 의도하진 않았지만 사람에 의해 쓰였다보니 불완전한 부분도 있습니다.
 이 문서는 리눅스에서 제공하는 다양한 메모리 배리어들을 사용하기 위한
 안내서입니다만, 뭔가 이상하다 싶으면 (그런게 많을 겁니다) 질문을 부탁드립니다.
+일부 이상한 점들은 공식적인 메모리 일관성 모델과 tools/memory-model/ 에 있는
+관련 문서를 참고해서 해결될 수 있을 겁니다.  그러나, 이 메모리 모델조차도 그
+관리자들의 의견의 집합으로 봐야지, 절대 옳은 예언자로 신봉해선 안될 겁니다.
 
 다시 말하지만, 이 문서는 리눅스가 하드웨어에 기대하는 사항에 대한 명세서가
 아닙니다.
@@ -77,7 +80,7 @@
 
      - 메모리 배리어의 종류.
      - 메모리 배리어에 대해 가정해선 안될 것.
-     - 데이터 의존성 배리어.
+     - 데이터 의존성 배리어 (역사적).
      - 컨트롤 의존성.
      - SMP 배리어 짝맞추기.
      - 메모리 배리어 시퀀스의 예.
@@ -255,17 +258,20 @@
  (*) 어떤 CPU 든, 의존성이 존재하는 메모리 액세스들은 해당 CPU 자신에게
      있어서는 순서대로 메모리 시스템에 수행 요청됩니다. 즉, 다음에 대해서:
 
-	Q = READ_ONCE(P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
+	Q = READ_ONCE(P); D = READ_ONCE(*Q);
 
      CPU 는 다음과 같은 메모리 오퍼레이션 시퀀스를 수행 요청합니다:
 
 	Q = LOAD P, D = LOAD *Q
 
-     그리고 그 시퀀스 내에서의 순서는 항상 지켜집니다.  대부분의 시스템에서
-     smp_read_barrier_depends() 는 아무일도 안하지만 DEC Alpha 에서는
-     명시적으로 사용되어야 합니다.  보통의 경우에는 smp_read_barrier_depends()
-     를 직접 사용하는 대신 rcu_dereference() 같은 것들을 사용해야 함을
-     알아두세요.
+     그리고 그 시퀀스 내에서의 순서는 항상 지켜집니다.  하지만, DEC Alpha 에서
+     READ_ONCE() 는 메모리 배리어 명령도 내게 되어 있어서, DEC Alpha CPU 는
+     다음과 같은 메모리 오퍼레이션들을 내놓게 됩니다:
+
+	Q = LOAD P, MEMORY_BARRIER, D = LOAD *Q, MEMORY_BARRIER
+
+     DEC Alpha 에서 수행되든 아니든, READ_ONCE() 는 컴파일러로부터의 악영향
+     또한 제거합니다.
 
  (*) 특정 CPU 내에서 겹치는 영역의 메모리에 행해지는 로드와 스토어 들은 해당
      CPU 안에서는 순서가 바뀌지 않은 것으로 보여집니다.  즉, 다음에 대해서:
@@ -421,8 +427,8 @@
      데이터 의존성 배리어는 읽기 배리어의 보다 완화된 형태입니다.  두개의 로드
      오퍼레이션이 있고 두번째 것이 첫번째 것의 결과에 의존하고 있을 때(예:
      두번째 로드가 참조할 주소를 첫번째 로드가 읽는 경우), 두번째 로드가 읽어올
-     데이터는 첫번째 로드에 의해 그 주소가 얻어지기 전에 업데이트 되어 있음을
-     보장하기 위해서 데이터 의존성 배리어가 필요할 수 있습니다.
+     데이터는 첫번째 로드에 의해 그 주소가 얻어진 뒤에 업데이트 됨을 보장하기
+     위해서 데이터 의존성 배리어가 필요할 수 있습니다.
 
      데이터 의존성 배리어는 상호 의존적인 로드 오퍼레이션들 사이의 부분적 순서
      세우기입니다; 스토어 오퍼레이션들이나 독립적인 로드들, 또는 중복되는
@@ -570,8 +576,14 @@
 	    Documentation/DMA-API.txt
 
 
-데이터 의존성 배리어
---------------------
+데이터 의존성 배리어 (역사적)
+-----------------------------
+
+리눅스 커널 v4.15 기준으로, smp_read_barrier_depends() 가 READ_ONCE() 에
+추가되었는데, 이는 이 섹션에 주의를 기울여야 하는 사람들은 DEC Alpha 아키텍쳐
+전용 코드를 만드는 사람들과 READ_ONCE() 자체를 만드는 사람들 뿐임을 의미합니다.
+그런 분들을 위해, 그리고 역사에 관심 있는 분들을 위해, 여기 데이터 의존성
+배리어에 대한 이야기를 적습니다.
 
 데이터 의존성 배리어의 사용에 있어 지켜야 하는 사항들은 약간 미묘하고, 데이터
 의존성 배리어가 사용되어야 하는 상황도 항상 명백하지는 않습니다.  설명을 위해
@@ -1787,7 +1799,7 @@
 	범용		mb()			smp_mb()
 	쓰기		wmb()			smp_wmb()
 	읽기		rmb()			smp_rmb()
-	데이터 의존성	read_barrier_depends()	smp_read_barrier_depends()
+	데이터 의존성				READ_ONCE()
 
 
 데이터 의존성 배리어를 제외한 모든 메모리 배리어는 컴파일러 배리어를
@@ -2796,8 +2808,9 @@
 
 
 여기에 개입하기 위해선, 데이터 의존성 배리어나 읽기 배리어를 로드 오퍼레이션들
-사이에 넣어야 합니다.  이렇게 함으로써 캐시가 다음 요청을 처리하기 전에 일관성
-큐를 처리하도록 강제하게 됩니다.
+사이에 넣어야 합니다 (v4.15 부터는 READ_ONCE() 매크로에 의해 무조건적으로
+그렇게 됩니다).  이렇게 함으로써 캐시가 다음 요청을 처리하기 전에 일관성 큐를
+처리하도록 강제하게 됩니다.
 
 	CPU 1		CPU 2		COMMENT
 	===============	===============	=======================================
@@ -2826,7 +2839,10 @@
 다른 CPU 들도 분할된 캐시를 가지고 있을 수 있지만, 그런 CPU 들은 평범한 메모리
 액세스를 위해서도 이 분할된 캐시들 사이의 조정을 해야만 합니다.  Alpha 는 가장
 약한 메모리 순서 시맨틱 (semantic) 을 선택함으로써 메모리 배리어가 명시적으로
-사용되지 않았을 때에는 그런 조정이 필요하지 않게 했습니다.
+사용되지 않았을 때에는 그런 조정이 필요하지 않게 했으며, 이는 Alpha 가 당시에
+더 높은 CPU 클락 속도를 가질 수 있게 했습니다.  하지만, (다시 말하건대, v4.15
+이후부터는) Alpha 아키텍쳐 전용 코드와 READ_ONCE() 매크로 내부에서를 제외하고는
+smp_read_barrier_depends() 가 사용되지 않아야 함을 알아두시기 바랍니다.
 
 
 캐시 일관성 VS DMA
@@ -2846,7 +2862,7 @@
 문제를 해결하기 위해선, 커널의 적절한 부분에서 각 CPU 의 캐시 안의 문제가 되는
 비트들을 무효화 시켜야 합니다.
 
-캐시 관리에 대한 더 많은 정보를 위해선 Documentation/cachetlb.txt 를
+캐시 관리에 대한 더 많은 정보를 위해선 Documentation/core-api/cachetlb.rst 를
 참고하세요.
 
 
@@ -2988,7 +3004,9 @@
 메모리 일관성 시스템과 함께 두개의 캐시를 동기화 시켜서, 포인터 변경과 새로운
 데이터의 발견을 올바른 순서로 일어나게 하기 때문입니다.
 
-리눅스 커널의 메모리 배리어 모델은 Alpha 에 기초해서 정의되었습니다.
+리눅스 커널의 메모리 배리어 모델은 Alpha 에 기초해서 정의되었습니다만, v4.15
+부터는 리눅스 커널이 READ_ONCE() 내에 smp_read_barrier_depends() 를 추가해서
+Alpha 의 메모리 모델로의 영향력이 크게 줄어들긴 했습니다.
 
 위의 "캐시 일관성" 서브섹션을 참고하세요.
 
@@ -3023,7 +3041,7 @@
 동기화에 락을 사용하지 않고 구현하는데에 사용될 수 있습니다.  더 자세한 내용을
 위해선 다음을 참고하세요:
 
-	Documentation/circular-buffers.txt
+	Documentation/core-api/circular-buffers.rst
 
 
 =========
diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index 099c412..82a468b 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -207,13 +207,6 @@
 	to the file do not need to be in ordered form but reads from the file
 	will be ordered in the same way as the actions_avail sysctl.
 
-	It is important to note that the value of ``actions_logged`` does not
-	prevent certain actions from being logged when the audit subsystem is
-	configured to audit a task. If the action is not found in
-	``actions_logged`` list, the final decision on whether to audit the
-	action for that task is ultimately left up to the audit subsystem to
-	decide for all seccomp return values other than ``SECCOMP_RET_ALLOW``.
-
 	The ``allow`` string is not accepted in the ``actions_logged`` sysctl
 	as it is not possible to log ``SECCOMP_RET_ALLOW`` actions. Attempting
 	to write ``allow`` to the sysctl will result in an EINVAL being
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index ef6a511..f1a4d3c 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -252,15 +252,14 @@
 the driver should call vfio_add_group_dev() and vfio_del_group_dev()
 respectively::
 
-	extern int vfio_add_group_dev(struct iommu_group *iommu_group,
-	                              struct device *dev,
+	extern int vfio_add_group_dev(struct device *dev,
 				      const struct vfio_device_ops *ops,
 				      void *device_data);
 
 	extern void *vfio_del_group_dev(struct device *dev);
 
 vfio_add_group_dev() indicates to the core to begin tracking the
-specified iommu_group and register the specified dev as owned by
+iommu_group of the specified dev and register the dev as owned by
 a VFIO bus driver.  The driver provides an ops structure for callbacks
 similar to a file operations structure::
 
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
index 0278f2c..f4a4f3e 100644
--- a/Documentation/vm/00-INDEX
+++ b/Documentation/vm/00-INDEX
@@ -1,62 +1,50 @@
 00-INDEX
 	- this file.
-active_mm.txt
+active_mm.rst
 	- An explanation from Linus about tsk->active_mm vs tsk->mm.
-balance
+balance.rst
 	- various information on memory balancing.
-cleancache.txt
+cleancache.rst
 	- Intro to cleancache and page-granularity victim cache.
-frontswap.txt
+frontswap.rst
 	- Outline frontswap, part of the transcendent memory frontend.
-highmem.txt
+highmem.rst
 	- Outline of highmem and common issues.
-hmm.txt
+hmm.rst
 	- Documentation of heterogeneous memory management
-hugetlbpage.txt
-	- a brief summary of hugetlbpage support in the Linux kernel.
-hugetlbfs_reserv.txt
+hugetlbfs_reserv.rst
 	- A brief overview of hugetlbfs reservation design/implementation.
-hwpoison.txt
+hwpoison.rst
 	- explains what hwpoison is
-idle_page_tracking.txt
-	- description of the idle page tracking feature.
-ksm.txt
+ksm.rst
 	- how to use the Kernel Samepage Merging feature.
-mmu_notifier.txt
+mmu_notifier.rst
 	- a note about clearing pte/pmd and mmu notifications
-numa
+numa.rst
 	- information about NUMA specific code in the Linux vm.
-numa_memory_policy.txt
-	- documentation of concepts and APIs of the 2.6 memory policy support.
-overcommit-accounting
+overcommit-accounting.rst
 	- description of the Linux kernels overcommit handling modes.
-page_frags
+page_frags.rst
 	- description of page fragments allocator
-page_migration
+page_migration.rst
 	- description of page migration in NUMA systems.
-pagemap.txt
-	- pagemap, from the userspace perspective
-page_owner.txt
+page_owner.rst
 	- tracking about who allocated each page
-remap_file_pages.txt
+remap_file_pages.rst
 	- a note about remap_file_pages() system call
-slub.txt
+slub.rst
 	- a short users guide for SLUB.
-soft-dirty.txt
-	- short explanation for soft-dirty PTEs
-split_page_table_lock
+split_page_table_lock.rst
 	- Separate per-table lock to improve scalability of the old page_table_lock.
-swap_numa.txt
+swap_numa.rst
 	- automatic binding of swap device to numa node
-transhuge.txt
+transhuge.rst
 	- Transparent Hugepage Support, alternative way of using hugepages.
-unevictable-lru.txt
+unevictable-lru.rst
 	- Unevictable LRU infrastructure
-userfaultfd.txt
-	- description of userfaultfd system call
 z3fold.txt
 	- outline of z3fold allocator for storing compressed pages
-zsmalloc.txt
+zsmalloc.rst
 	- outline of zsmalloc allocator for storing compressed pages
-zswap.txt
+zswap.rst
 	- Intro to compressed cache for swap pages
diff --git a/Documentation/vm/active_mm.rst b/Documentation/vm/active_mm.rst
new file mode 100644
index 0000000..c84471b
--- /dev/null
+++ b/Documentation/vm/active_mm.rst
@@ -0,0 +1,91 @@
+.. _active_mm:
+
+=========
+Active MM
+=========
+
+::
+
+ List:       linux-kernel
+ Subject:    Re: active_mm
+ From:       Linus Torvalds <torvalds () transmeta ! com>
+ Date:       1999-07-30 21:36:24
+
+ Cc'd to linux-kernel, because I don't write explanations all that often,
+ and when I do I feel better about more people reading them.
+
+ On Fri, 30 Jul 1999, David Mosberger wrote:
+ >
+ > Is there a brief description someplace on how "mm" vs. "active_mm" in
+ > the task_struct are supposed to be used?  (My apologies if this was
+ > discussed on the mailing lists---I just returned from vacation and
+ > wasn't able to follow linux-kernel for a while).
+
+ Basically, the new setup is:
+
+  - we have "real address spaces" and "anonymous address spaces". The
+    difference is that an anonymous address space doesn't care about the
+    user-level page tables at all, so when we do a context switch into an
+    anonymous address space we just leave the previous address space
+    active.
+
+    The obvious use for a "anonymous address space" is any thread that
+    doesn't need any user mappings - all kernel threads basically fall into
+    this category, but even "real" threads can temporarily say that for
+    some amount of time they are not going to be interested in user space,
+    and that the scheduler might as well try to avoid wasting time on
+    switching the VM state around. Currently only the old-style bdflush
+    sync does that.
+
+  - "tsk->mm" points to the "real address space". For an anonymous process,
+    tsk->mm will be NULL, for the logical reason that an anonymous process
+    really doesn't _have_ a real address space at all.
+
+  - however, we obviously need to keep track of which address space we
+    "stole" for such an anonymous user. For that, we have "tsk->active_mm",
+    which shows what the currently active address space is.
+
+    The rule is that for a process with a real address space (ie tsk->mm is
+    non-NULL) the active_mm obviously always has to be the same as the real
+    one.
+
+    For a anonymous process, tsk->mm == NULL, and tsk->active_mm is the
+    "borrowed" mm while the anonymous process is running. When the
+    anonymous process gets scheduled away, the borrowed address space is
+    returned and cleared.
+
+ To support all that, the "struct mm_struct" now has two counters: a
+ "mm_users" counter that is how many "real address space users" there are,
+ and a "mm_count" counter that is the number of "lazy" users (ie anonymous
+ users) plus one if there are any real users.
+
+ Usually there is at least one real user, but it could be that the real
+ user exited on another CPU while a lazy user was still active, so you do
+ actually get cases where you have a address space that is _only_ used by
+ lazy users. That is often a short-lived state, because once that thread
+ gets scheduled away in favour of a real thread, the "zombie" mm gets
+ released because "mm_users" becomes zero.
+
+ Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any
+ more. "init_mm" should be considered just a "lazy context when no other
+ context is available", and in fact it is mainly used just at bootup when
+ no real VM has yet been created. So code that used to check
+
+ 	if (current->mm == &init_mm)
+
+ should generally just do
+
+ 	if (!current->mm)
+
+ instead (which makes more sense anyway - the test is basically one of "do
+ we have a user context", and is generally done by the page fault handler
+ and things like that).
+
+ Anyway, I put a pre-patch-2.3.13-1 on ftp.kernel.org just a moment ago,
+ because it slightly changes the interfaces to accommodate the alpha (who
+ would have thought it, but the alpha actually ends up having one of the
+ ugliest context switch codes - unlike the other architectures where the MM
+ and register state is separate, the alpha PALcode joins the two, and you
+ need to switch both together).
+
+ (From http://marc.info/?l=linux-kernel&m=93337278602211&w=2)
diff --git a/Documentation/vm/active_mm.txt b/Documentation/vm/active_mm.txt
deleted file mode 100644
index dbf4581..0000000
--- a/Documentation/vm/active_mm.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-List:       linux-kernel
-Subject:    Re: active_mm
-From:       Linus Torvalds <torvalds () transmeta ! com>
-Date:       1999-07-30 21:36:24
-
-Cc'd to linux-kernel, because I don't write explanations all that often,
-and when I do I feel better about more people reading them.
-
-On Fri, 30 Jul 1999, David Mosberger wrote:
->
-> Is there a brief description someplace on how "mm" vs. "active_mm" in
-> the task_struct are supposed to be used?  (My apologies if this was
-> discussed on the mailing lists---I just returned from vacation and
-> wasn't able to follow linux-kernel for a while).
-
-Basically, the new setup is:
-
- - we have "real address spaces" and "anonymous address spaces". The
-   difference is that an anonymous address space doesn't care about the
-   user-level page tables at all, so when we do a context switch into an
-   anonymous address space we just leave the previous address space
-   active.
-
-   The obvious use for a "anonymous address space" is any thread that
-   doesn't need any user mappings - all kernel threads basically fall into
-   this category, but even "real" threads can temporarily say that for
-   some amount of time they are not going to be interested in user space,
-   and that the scheduler might as well try to avoid wasting time on
-   switching the VM state around. Currently only the old-style bdflush
-   sync does that.
-
- - "tsk->mm" points to the "real address space". For an anonymous process,
-   tsk->mm will be NULL, for the logical reason that an anonymous process
-   really doesn't _have_ a real address space at all.
-
- - however, we obviously need to keep track of which address space we
-   "stole" for such an anonymous user. For that, we have "tsk->active_mm",
-   which shows what the currently active address space is.
-
-   The rule is that for a process with a real address space (ie tsk->mm is
-   non-NULL) the active_mm obviously always has to be the same as the real
-   one.
-
-   For a anonymous process, tsk->mm == NULL, and tsk->active_mm is the
-   "borrowed" mm while the anonymous process is running. When the
-   anonymous process gets scheduled away, the borrowed address space is
-   returned and cleared.
-
-To support all that, the "struct mm_struct" now has two counters: a
-"mm_users" counter that is how many "real address space users" there are,
-and a "mm_count" counter that is the number of "lazy" users (ie anonymous
-users) plus one if there are any real users.
-
-Usually there is at least one real user, but it could be that the real
-user exited on another CPU while a lazy user was still active, so you do
-actually get cases where you have a address space that is _only_ used by
-lazy users. That is often a short-lived state, because once that thread
-gets scheduled away in favour of a real thread, the "zombie" mm gets
-released because "mm_users" becomes zero.
-
-Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any
-more. "init_mm" should be considered just a "lazy context when no other
-context is available", and in fact it is mainly used just at bootup when
-no real VM has yet been created. So code that used to check
-
-	if (current->mm == &init_mm)
-
-should generally just do
-
-	if (!current->mm)
-
-instead (which makes more sense anyway - the test is basically one of "do
-we have a user context", and is generally done by the page fault handler
-and things like that).
-
-Anyway, I put a pre-patch-2.3.13-1 on ftp.kernel.org just a moment ago,
-because it slightly changes the interfaces to accommodate the alpha (who
-would have thought it, but the alpha actually ends up having one of the
-ugliest context switch codes - unlike the other architectures where the MM
-and register state is separate, the alpha PALcode joins the two, and you
-need to switch both together).
-
-(From http://marc.info/?l=linux-kernel&m=93337278602211&w=2)
diff --git a/Documentation/vm/balance b/Documentation/vm/balance
deleted file mode 100644
index 9645954..0000000
--- a/Documentation/vm/balance
+++ /dev/null
@@ -1,95 +0,0 @@
-Started Jan 2000 by Kanoj Sarcar <kanoj@sgi.com>
-
-Memory balancing is needed for !__GFP_ATOMIC and !__GFP_KSWAPD_RECLAIM as
-well as for non __GFP_IO allocations.
-
-The first reason why a caller may avoid reclaim is that the caller can not
-sleep due to holding a spinlock or is in interrupt context. The second may
-be that the caller is willing to fail the allocation without incurring the
-overhead of page reclaim. This may happen for opportunistic high-order
-allocation requests that have order-0 fallback options. In such cases,
-the caller may also wish to avoid waking kswapd.
-
-__GFP_IO allocation requests are made to prevent file system deadlocks.
-
-In the absence of non sleepable allocation requests, it seems detrimental
-to be doing balancing. Page reclamation can be kicked off lazily, that
-is, only when needed (aka zone free memory is 0), instead of making it
-a proactive process.
-
-That being said, the kernel should try to fulfill requests for direct
-mapped pages from the direct mapped pool, instead of falling back on
-the dma pool, so as to keep the dma pool filled for dma requests (atomic
-or not). A similar argument applies to highmem and direct mapped pages.
-OTOH, if there is a lot of free dma pages, it is preferable to satisfy
-regular memory requests by allocating one from the dma pool, instead
-of incurring the overhead of regular zone balancing.
-
-In 2.2, memory balancing/page reclamation would kick off only when the
-_total_ number of free pages fell below 1/64 th of total memory. With the
-right ratio of dma and regular memory, it is quite possible that balancing
-would not be done even when the dma zone was completely empty. 2.2 has
-been running production machines of varying memory sizes, and seems to be
-doing fine even with the presence of this problem. In 2.3, due to
-HIGHMEM, this problem is aggravated.
-
-In 2.3, zone balancing can be done in one of two ways: depending on the
-zone size (and possibly of the size of lower class zones), we can decide
-at init time how many free pages we should aim for while balancing any
-zone. The good part is, while balancing, we do not need to look at sizes
-of lower class zones, the bad part is, we might do too frequent balancing
-due to ignoring possibly lower usage in the lower class zones. Also,
-with a slight change in the allocation routine, it is possible to reduce
-the memclass() macro to be a simple equality.
-
-Another possible solution is that we balance only when the free memory
-of a zone _and_ all its lower class zones falls below 1/64th of the
-total memory in the zone and its lower class zones. This fixes the 2.2
-balancing problem, and stays as close to 2.2 behavior as possible. Also,
-the balancing algorithm works the same way on the various architectures,
-which have different numbers and types of zones. If we wanted to get
-fancy, we could assign different weights to free pages in different
-zones in the future.
-
-Note that if the size of the regular zone is huge compared to dma zone,
-it becomes less significant to consider the free dma pages while
-deciding whether to balance the regular zone. The first solution
-becomes more attractive then.
-
-The appended patch implements the second solution. It also "fixes" two
-problems: first, kswapd is woken up as in 2.2 on low memory conditions
-for non-sleepable allocations. Second, the HIGHMEM zone is also balanced,
-so as to give a fighting chance for replace_with_highmem() to get a
-HIGHMEM page, as well as to ensure that HIGHMEM allocations do not
-fall back into regular zone. This also makes sure that HIGHMEM pages
-are not leaked (for example, in situations where a HIGHMEM page is in 
-the swapcache but is not being used by anyone)
-
-kswapd also needs to know about the zones it should balance. kswapd is
-primarily needed in a situation where balancing can not be done, 
-probably because all allocation requests are coming from intr context
-and all process contexts are sleeping. For 2.3, kswapd does not really
-need to balance the highmem zone, since intr context does not request
-highmem pages. kswapd looks at the zone_wake_kswapd field in the zone
-structure to decide whether a zone needs balancing.
-
-Page stealing from process memory and shm is done if stealing the page would
-alleviate memory pressure on any zone in the page's node that has fallen below
-its watermark.
-
-watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd: These
-are per-zone fields, used to determine when a zone needs to be balanced. When
-the number of pages falls below watermark[WMARK_MIN], the hysteric field
-low_on_memory gets set. This stays set till the number of free pages becomes
-watermark[WMARK_HIGH]. When low_on_memory is set, page allocation requests will
-try to free some pages in the zone (providing GFP_WAIT is set in the request).
-Orthogonal to this, is the decision to poke kswapd to free some zone pages.
-That decision is not hysteresis based, and is done when the number of free
-pages is below watermark[WMARK_LOW]; in which case zone_wake_kswapd is also set.
-
-
-(Good) Ideas that I have heard:
-1. Dynamic experience should influence balancing: number of failed requests
-for a zone can be tracked and fed into the balancing scheme (jalvo@mbay.net)
-2. Implement a replace_with_highmem()-like replace_with_regular() to preserve
-dma pages. (lkd@tantalophile.demon.co.uk)
diff --git a/Documentation/vm/balance.rst b/Documentation/vm/balance.rst
new file mode 100644
index 0000000..6a1fadf
--- /dev/null
+++ b/Documentation/vm/balance.rst
@@ -0,0 +1,102 @@
+.. _balance:
+
+================
+Memory Balancing
+================
+
+Started Jan 2000 by Kanoj Sarcar <kanoj@sgi.com>
+
+Memory balancing is needed for !__GFP_ATOMIC and !__GFP_KSWAPD_RECLAIM as
+well as for non __GFP_IO allocations.
+
+The first reason why a caller may avoid reclaim is that the caller can not
+sleep due to holding a spinlock or is in interrupt context. The second may
+be that the caller is willing to fail the allocation without incurring the
+overhead of page reclaim. This may happen for opportunistic high-order
+allocation requests that have order-0 fallback options. In such cases,
+the caller may also wish to avoid waking kswapd.
+
+__GFP_IO allocation requests are made to prevent file system deadlocks.
+
+In the absence of non sleepable allocation requests, it seems detrimental
+to be doing balancing. Page reclamation can be kicked off lazily, that
+is, only when needed (aka zone free memory is 0), instead of making it
+a proactive process.
+
+That being said, the kernel should try to fulfill requests for direct
+mapped pages from the direct mapped pool, instead of falling back on
+the dma pool, so as to keep the dma pool filled for dma requests (atomic
+or not). A similar argument applies to highmem and direct mapped pages.
+OTOH, if there is a lot of free dma pages, it is preferable to satisfy
+regular memory requests by allocating one from the dma pool, instead
+of incurring the overhead of regular zone balancing.
+
+In 2.2, memory balancing/page reclamation would kick off only when the
+_total_ number of free pages fell below 1/64 th of total memory. With the
+right ratio of dma and regular memory, it is quite possible that balancing
+would not be done even when the dma zone was completely empty. 2.2 has
+been running production machines of varying memory sizes, and seems to be
+doing fine even with the presence of this problem. In 2.3, due to
+HIGHMEM, this problem is aggravated.
+
+In 2.3, zone balancing can be done in one of two ways: depending on the
+zone size (and possibly of the size of lower class zones), we can decide
+at init time how many free pages we should aim for while balancing any
+zone. The good part is, while balancing, we do not need to look at sizes
+of lower class zones, the bad part is, we might do too frequent balancing
+due to ignoring possibly lower usage in the lower class zones. Also,
+with a slight change in the allocation routine, it is possible to reduce
+the memclass() macro to be a simple equality.
+
+Another possible solution is that we balance only when the free memory
+of a zone _and_ all its lower class zones falls below 1/64th of the
+total memory in the zone and its lower class zones. This fixes the 2.2
+balancing problem, and stays as close to 2.2 behavior as possible. Also,
+the balancing algorithm works the same way on the various architectures,
+which have different numbers and types of zones. If we wanted to get
+fancy, we could assign different weights to free pages in different
+zones in the future.
+
+Note that if the size of the regular zone is huge compared to dma zone,
+it becomes less significant to consider the free dma pages while
+deciding whether to balance the regular zone. The first solution
+becomes more attractive then.
+
+The appended patch implements the second solution. It also "fixes" two
+problems: first, kswapd is woken up as in 2.2 on low memory conditions
+for non-sleepable allocations. Second, the HIGHMEM zone is also balanced,
+so as to give a fighting chance for replace_with_highmem() to get a
+HIGHMEM page, as well as to ensure that HIGHMEM allocations do not
+fall back into regular zone. This also makes sure that HIGHMEM pages
+are not leaked (for example, in situations where a HIGHMEM page is in
+the swapcache but is not being used by anyone)
+
+kswapd also needs to know about the zones it should balance. kswapd is
+primarily needed in a situation where balancing can not be done,
+probably because all allocation requests are coming from intr context
+and all process contexts are sleeping. For 2.3, kswapd does not really
+need to balance the highmem zone, since intr context does not request
+highmem pages. kswapd looks at the zone_wake_kswapd field in the zone
+structure to decide whether a zone needs balancing.
+
+Page stealing from process memory and shm is done if stealing the page would
+alleviate memory pressure on any zone in the page's node that has fallen below
+its watermark.
+
+watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd: These
+are per-zone fields, used to determine when a zone needs to be balanced. When
+the number of pages falls below watermark[WMARK_MIN], the hysteric field
+low_on_memory gets set. This stays set till the number of free pages becomes
+watermark[WMARK_HIGH]. When low_on_memory is set, page allocation requests will
+try to free some pages in the zone (providing GFP_WAIT is set in the request).
+Orthogonal to this, is the decision to poke kswapd to free some zone pages.
+That decision is not hysteresis based, and is done when the number of free
+pages is below watermark[WMARK_LOW]; in which case zone_wake_kswapd is also set.
+
+
+(Good) Ideas that I have heard:
+
+1. Dynamic experience should influence balancing: number of failed requests
+   for a zone can be tracked and fed into the balancing scheme (jalvo@mbay.net)
+2. Implement a replace_with_highmem()-like replace_with_regular() to preserve
+   dma pages. (lkd@tantalophile.demon.co.uk)
diff --git a/Documentation/vm/cleancache.rst b/Documentation/vm/cleancache.rst
new file mode 100644
index 0000000..68cba91
--- /dev/null
+++ b/Documentation/vm/cleancache.rst
@@ -0,0 +1,296 @@
+.. _cleancache:
+
+==========
+Cleancache
+==========
+
+Motivation
+==========
+
+Cleancache is a new optional feature provided by the VFS layer that
+potentially dramatically increases page cache effectiveness for
+many workloads in many environments at a negligible cost.
+
+Cleancache can be thought of as a page-granularity victim cache for clean
+pages that the kernel's pageframe replacement algorithm (PFRA) would like
+to keep around, but can't since there isn't enough memory.  So when the
+PFRA "evicts" a page, it first attempts to use cleancache code to
+put the data contained in that page into "transcendent memory", memory
+that is not directly accessible or addressable by the kernel and is
+of unknown and possibly time-varying size.
+
+Later, when a cleancache-enabled filesystem wishes to access a page
+in a file on disk, it first checks cleancache to see if it already
+contains it; if it does, the page of data is copied into the kernel
+and a disk access is avoided.
+
+Transcendent memory "drivers" for cleancache are currently implemented
+in Xen (using hypervisor memory) and zcache (using in-kernel compressed
+memory) and other implementations are in development.
+
+:ref:`FAQs <faq>` are included below.
+
+Implementation Overview
+=======================
+
+A cleancache "backend" that provides transcendent memory registers itself
+to the kernel's cleancache "frontend" by calling cleancache_register_ops,
+passing a pointer to a cleancache_ops structure with funcs set appropriately.
+The functions provided must conform to certain semantics as follows:
+
+Most important, cleancache is "ephemeral".  Pages which are copied into
+cleancache have an indefinite lifetime which is completely unknowable
+by the kernel and so may or may not still be in cleancache at any later time.
+Thus, as its name implies, cleancache is not suitable for dirty pages.
+Cleancache has complete discretion over what pages to preserve and what
+pages to discard and when.
+
+Mounting a cleancache-enabled filesystem should call "init_fs" to obtain a
+pool id which, if positive, must be saved in the filesystem's superblock;
+a negative return value indicates failure.  A "put_page" will copy a
+(presumably about-to-be-evicted) page into cleancache and associate it with
+the pool id, a file key, and a page index into the file.  (The combination
+of a pool id, a file key, and an index is sometimes called a "handle".)
+A "get_page" will copy the page, if found, from cleancache into kernel memory.
+An "invalidate_page" will ensure the page no longer is present in cleancache;
+an "invalidate_inode" will invalidate all pages associated with the specified
+file; and, when a filesystem is unmounted, an "invalidate_fs" will invalidate
+all pages in all files specified by the given pool id and also surrender
+the pool id.
+
+An "init_shared_fs", like init_fs, obtains a pool id but tells cleancache
+to treat the pool as shared using a 128-bit UUID as a key.  On systems
+that may run multiple kernels (such as hard partitioned or virtualized
+systems) that may share a clustered filesystem, and where cleancache
+may be shared among those kernels, calls to init_shared_fs that specify the
+same UUID will receive the same pool id, thus allowing the pages to
+be shared.  Note that any security requirements must be imposed outside
+of the kernel (e.g. by "tools" that control cleancache).  Or a
+cleancache implementation can simply disable shared_init by always
+returning a negative value.
+
+If a get_page is successful on a non-shared pool, the page is invalidated
+(thus making cleancache an "exclusive" cache).  On a shared pool, the page
+is NOT invalidated on a successful get_page so that it remains accessible to
+other sharers.  The kernel is responsible for ensuring coherency between
+cleancache (shared or not), the page cache, and the filesystem, using
+cleancache invalidate operations as required.
+
+Note that cleancache must enforce put-put-get coherency and get-get
+coherency.  For the former, if two puts are made to the same handle but
+with different data, say AAA by the first put and BBB by the second, a
+subsequent get can never return the stale data (AAA).  For get-get coherency,
+if a get for a given handle fails, subsequent gets for that handle will
+never succeed unless preceded by a successful put with that handle.
+
+Last, cleancache provides no SMP serialization guarantees; if two
+different Linux threads are simultaneously putting and invalidating a page
+with the same handle, the results are indeterminate.  Callers must
+lock the page to ensure serial behavior.
+
+Cleancache Performance Metrics
+==============================
+
+If properly configured, monitoring of cleancache is done via debugfs in
+the `/sys/kernel/debug/cleancache` directory.  The effectiveness of cleancache
+can be measured (across all filesystems) with:
+
+``succ_gets``
+	number of gets that were successful
+
+``failed_gets``
+	number of gets that failed
+
+``puts``
+	number of puts attempted (all "succeed")
+
+``invalidates``
+	number of invalidates attempted
+
+A backend implementation may provide additional metrics.
+
+.. _faq:
+
+FAQ
+===
+
+* Where's the value? (Andrew Morton)
+
+Cleancache provides a significant performance benefit to many workloads
+in many environments with negligible overhead by improving the
+effectiveness of the pagecache.  Clean pagecache pages are
+saved in transcendent memory (RAM that is otherwise not directly
+addressable to the kernel); fetching those pages later avoids "refaults"
+and thus disk reads.
+
+Cleancache (and its sister code "frontswap") provide interfaces for
+this transcendent memory (aka "tmem"), which conceptually lies between
+fast kernel-directly-addressable RAM and slower DMA/asynchronous devices.
+Disallowing direct kernel or userland reads/writes to tmem
+is ideal when data is transformed to a different form and size (such
+as with compression) or secretly moved (as might be useful for write-
+balancing for some RAM-like devices).  Evicted page-cache pages (and
+swap pages) are a great use for this kind of slower-than-RAM-but-much-
+faster-than-disk transcendent memory, and the cleancache (and frontswap)
+"page-object-oriented" specification provides a nice way to read and
+write -- and indirectly "name" -- the pages.
+
+In the virtual case, the whole point of virtualization is to statistically
+multiplex physical resources across the varying demands of multiple
+virtual machines.  This is really hard to do with RAM and efforts to
+do it well with no kernel change have essentially failed (except in some
+well-publicized special-case workloads).  Cleancache -- and frontswap --
+with a fairly small impact on the kernel, provide a huge amount
+of flexibility for more dynamic, flexible RAM multiplexing.
+Specifically, the Xen Transcendent Memory backend allows otherwise
+"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple
+virtual machines, but the pages can be compressed and deduplicated to
+optimize RAM utilization.  And when guest OS's are induced to surrender
+underutilized RAM (e.g. with "self-ballooning"), page cache pages
+are the first to go, and cleancache allows those pages to be
+saved and reclaimed if overall host system memory conditions allow.
+
+And the identical interface used for cleancache can be used in
+physical systems as well.  The zcache driver acts as a memory-hungry
+device that stores pages of data in a compressed state.  And
+the proposed "RAMster" driver shares RAM across multiple physical
+systems.
+
+* Why does cleancache have its sticky fingers so deep inside the
+  filesystems and VFS? (Andrew Morton and Christoph Hellwig)
+
+The core hooks for cleancache in VFS are in most cases a single line
+and the minimum set are placed precisely where needed to maintain
+coherency (via cleancache_invalidate operations) between cleancache,
+the page cache, and disk.  All hooks compile into nothingness if
+cleancache is config'ed off and turn into a function-pointer-
+compare-to-NULL if config'ed on but no backend claims the ops
+functions, or to a compare-struct-element-to-negative if a
+backend claims the ops functions but a filesystem doesn't enable
+cleancache.
+
+Some filesystems are built entirely on top of VFS and the hooks
+in VFS are sufficient, so don't require an "init_fs" hook; the
+initial implementation of cleancache didn't provide this hook.
+But for some filesystems (such as btrfs), the VFS hooks are
+incomplete and one or more hooks in fs-specific code are required.
+And for some other filesystems, such as tmpfs, cleancache may
+be counterproductive.  So it seemed prudent to require a filesystem
+to "opt in" to use cleancache, which requires adding a hook in
+each filesystem.  Not all filesystems are supported by cleancache
+only because they haven't been tested.  The existing set should
+be sufficient to validate the concept, the opt-in approach means
+that untested filesystems are not affected, and the hooks in the
+existing filesystems should make it very easy to add more
+filesystems in the future.
+
+The total impact of the hooks to existing fs and mm files is only
+about 40 lines added (not counting comments and blank lines).
+
+* Why not make cleancache asynchronous and batched so it can more
+  easily interface with real devices with DMA instead of copying each
+  individual page? (Minchan Kim)
+
+The one-page-at-a-time copy semantics simplifies the implementation
+on both the frontend and backend and also allows the backend to
+do fancy things on-the-fly like page compression and
+page deduplication.  And since the data is "gone" (copied into/out
+of the pageframe) before the cleancache get/put call returns,
+a great deal of race conditions and potential coherency issues
+are avoided.  While the interface seems odd for a "real device"
+or for real kernel-addressable RAM, it makes perfect sense for
+transcendent memory.
+
+* Why is non-shared cleancache "exclusive"?  And where is the
+  page "invalidated" after a "get"? (Minchan Kim)
+
+The main reason is to free up space in transcendent memory and
+to avoid unnecessary cleancache_invalidate calls.  If you want inclusive,
+the page can be "put" immediately following the "get".  If
+put-after-get for inclusive becomes common, the interface could
+be easily extended to add a "get_no_invalidate" call.
+
+The invalidate is done by the cleancache backend implementation.
+
+* What's the performance impact?
+
+Performance analysis has been presented at OLS'09 and LCA'10.
+Briefly, performance gains can be significant on most workloads,
+especially when memory pressure is high (e.g. when RAM is
+overcommitted in a virtual workload); and because the hooks are
+invoked primarily in place of or in addition to a disk read/write,
+overhead is negligible even in worst case workloads.  Basically
+cleancache replaces I/O with memory-copy-CPU-overhead; on older
+single-core systems with slow memory-copy speeds, cleancache
+has little value, but in newer multicore machines, especially
+consolidated/virtualized machines, it has great value.
+
+* How do I add cleancache support for filesystem X? (Boaz Harrash)
+
+Filesystems that are well-behaved and conform to certain
+restrictions can utilize cleancache simply by making a call to
+cleancache_init_fs at mount time.  Unusual, misbehaving, or
+poorly layered filesystems must either add additional hooks
+and/or undergo extensive additional testing... or should just
+not enable the optional cleancache.
+
+Some points for a filesystem to consider:
+
+  - The FS should be block-device-based (e.g. a ram-based FS such
+    as tmpfs should not enable cleancache)
+  - To ensure coherency/correctness, the FS must ensure that all
+    file removal or truncation operations either go through VFS or
+    add hooks to do the equivalent cleancache "invalidate" operations
+  - To ensure coherency/correctness, either inode numbers must
+    be unique across the lifetime of the on-disk file OR the
+    FS must provide an "encode_fh" function.
+  - The FS must call the VFS superblock alloc and deactivate routines
+    or add hooks to do the equivalent cleancache calls done there.
+  - To maximize performance, all pages fetched from the FS should
+    go through the do_mpag_readpage routine or the FS should add
+    hooks to do the equivalent (cf. btrfs)
+  - Currently, the FS blocksize must be the same as PAGESIZE.  This
+    is not an architectural restriction, but no backends currently
+    support anything different.
+  - A clustered FS should invoke the "shared_init_fs" cleancache
+    hook to get best performance for some backends.
+
+* Why not use the KVA of the inode as the key? (Christoph Hellwig)
+
+If cleancache would use the inode virtual address instead of
+inode/filehandle, the pool id could be eliminated.  But, this
+won't work because cleancache retains pagecache data pages
+persistently even when the inode has been pruned from the
+inode unused list, and only invalidates the data page if the file
+gets removed/truncated.  So if cleancache used the inode kva,
+there would be potential coherency issues if/when the inode
+kva is reused for a different file.  Alternately, if cleancache
+invalidated the pages when the inode kva was freed, much of the value
+of cleancache would be lost because the cache of pages in cleanache
+is potentially much larger than the kernel pagecache and is most
+useful if the pages survive inode cache removal.
+
+* Why is a global variable required?
+
+The cleancache_enabled flag is checked in all of the frequently-used
+cleancache hooks.  The alternative is a function call to check a static
+variable. Since cleancache is enabled dynamically at runtime, systems
+that don't enable cleancache would suffer thousands (possibly
+tens-of-thousands) of unnecessary function calls per second.  So the
+global variable allows cleancache to be enabled by default at compile
+time, but have insignificant performance impact when cleancache remains
+disabled at runtime.
+
+* Does cleanache work with KVM?
+
+The memory model of KVM is sufficiently different that a cleancache
+backend may have less value for KVM.  This remains to be tested,
+especially in an overcommitted system.
+
+* Does cleancache work in userspace?  It sounds useful for
+  memory hungry caches like web browsers.  (Jamie Lokier)
+
+No plans yet, though we agree it sounds useful, at least for
+apps that bypass the page cache (e.g. O_DIRECT).
+
+Last updated: Dan Magenheimer, April 13 2011
diff --git a/Documentation/vm/cleancache.txt b/Documentation/vm/cleancache.txt
deleted file mode 100644
index e4b49df..0000000
--- a/Documentation/vm/cleancache.txt
+++ /dev/null
@@ -1,277 +0,0 @@
-MOTIVATION
-
-Cleancache is a new optional feature provided by the VFS layer that
-potentially dramatically increases page cache effectiveness for
-many workloads in many environments at a negligible cost.
-
-Cleancache can be thought of as a page-granularity victim cache for clean
-pages that the kernel's pageframe replacement algorithm (PFRA) would like
-to keep around, but can't since there isn't enough memory.  So when the
-PFRA "evicts" a page, it first attempts to use cleancache code to
-put the data contained in that page into "transcendent memory", memory
-that is not directly accessible or addressable by the kernel and is
-of unknown and possibly time-varying size.
-
-Later, when a cleancache-enabled filesystem wishes to access a page
-in a file on disk, it first checks cleancache to see if it already
-contains it; if it does, the page of data is copied into the kernel
-and a disk access is avoided.
-
-Transcendent memory "drivers" for cleancache are currently implemented
-in Xen (using hypervisor memory) and zcache (using in-kernel compressed
-memory) and other implementations are in development.
-
-FAQs are included below.
-
-IMPLEMENTATION OVERVIEW
-
-A cleancache "backend" that provides transcendent memory registers itself
-to the kernel's cleancache "frontend" by calling cleancache_register_ops,
-passing a pointer to a cleancache_ops structure with funcs set appropriately.
-The functions provided must conform to certain semantics as follows:
-
-Most important, cleancache is "ephemeral".  Pages which are copied into
-cleancache have an indefinite lifetime which is completely unknowable
-by the kernel and so may or may not still be in cleancache at any later time.
-Thus, as its name implies, cleancache is not suitable for dirty pages.
-Cleancache has complete discretion over what pages to preserve and what
-pages to discard and when.
-
-Mounting a cleancache-enabled filesystem should call "init_fs" to obtain a
-pool id which, if positive, must be saved in the filesystem's superblock;
-a negative return value indicates failure.  A "put_page" will copy a
-(presumably about-to-be-evicted) page into cleancache and associate it with
-the pool id, a file key, and a page index into the file.  (The combination
-of a pool id, a file key, and an index is sometimes called a "handle".)
-A "get_page" will copy the page, if found, from cleancache into kernel memory.
-An "invalidate_page" will ensure the page no longer is present in cleancache;
-an "invalidate_inode" will invalidate all pages associated with the specified
-file; and, when a filesystem is unmounted, an "invalidate_fs" will invalidate
-all pages in all files specified by the given pool id and also surrender
-the pool id.
-
-An "init_shared_fs", like init_fs, obtains a pool id but tells cleancache
-to treat the pool as shared using a 128-bit UUID as a key.  On systems
-that may run multiple kernels (such as hard partitioned or virtualized
-systems) that may share a clustered filesystem, and where cleancache
-may be shared among those kernels, calls to init_shared_fs that specify the
-same UUID will receive the same pool id, thus allowing the pages to
-be shared.  Note that any security requirements must be imposed outside
-of the kernel (e.g. by "tools" that control cleancache).  Or a
-cleancache implementation can simply disable shared_init by always
-returning a negative value.
-
-If a get_page is successful on a non-shared pool, the page is invalidated
-(thus making cleancache an "exclusive" cache).  On a shared pool, the page
-is NOT invalidated on a successful get_page so that it remains accessible to
-other sharers.  The kernel is responsible for ensuring coherency between
-cleancache (shared or not), the page cache, and the filesystem, using
-cleancache invalidate operations as required.
-
-Note that cleancache must enforce put-put-get coherency and get-get
-coherency.  For the former, if two puts are made to the same handle but
-with different data, say AAA by the first put and BBB by the second, a
-subsequent get can never return the stale data (AAA).  For get-get coherency,
-if a get for a given handle fails, subsequent gets for that handle will
-never succeed unless preceded by a successful put with that handle.
-
-Last, cleancache provides no SMP serialization guarantees; if two
-different Linux threads are simultaneously putting and invalidating a page
-with the same handle, the results are indeterminate.  Callers must
-lock the page to ensure serial behavior.
-
-CLEANCACHE PERFORMANCE METRICS
-
-If properly configured, monitoring of cleancache is done via debugfs in
-the /sys/kernel/debug/cleancache directory.  The effectiveness of cleancache
-can be measured (across all filesystems) with:
-
-succ_gets	- number of gets that were successful
-failed_gets	- number of gets that failed
-puts		- number of puts attempted (all "succeed")
-invalidates	- number of invalidates attempted
-
-A backend implementation may provide additional metrics.
-
-FAQ
-
-1) Where's the value? (Andrew Morton)
-
-Cleancache provides a significant performance benefit to many workloads
-in many environments with negligible overhead by improving the
-effectiveness of the pagecache.  Clean pagecache pages are
-saved in transcendent memory (RAM that is otherwise not directly
-addressable to the kernel); fetching those pages later avoids "refaults"
-and thus disk reads.
-
-Cleancache (and its sister code "frontswap") provide interfaces for
-this transcendent memory (aka "tmem"), which conceptually lies between
-fast kernel-directly-addressable RAM and slower DMA/asynchronous devices.
-Disallowing direct kernel or userland reads/writes to tmem
-is ideal when data is transformed to a different form and size (such
-as with compression) or secretly moved (as might be useful for write-
-balancing for some RAM-like devices).  Evicted page-cache pages (and
-swap pages) are a great use for this kind of slower-than-RAM-but-much-
-faster-than-disk transcendent memory, and the cleancache (and frontswap)
-"page-object-oriented" specification provides a nice way to read and
-write -- and indirectly "name" -- the pages.
-
-In the virtual case, the whole point of virtualization is to statistically
-multiplex physical resources across the varying demands of multiple
-virtual machines.  This is really hard to do with RAM and efforts to
-do it well with no kernel change have essentially failed (except in some
-well-publicized special-case workloads).  Cleancache -- and frontswap --
-with a fairly small impact on the kernel, provide a huge amount
-of flexibility for more dynamic, flexible RAM multiplexing.
-Specifically, the Xen Transcendent Memory backend allows otherwise
-"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple
-virtual machines, but the pages can be compressed and deduplicated to
-optimize RAM utilization.  And when guest OS's are induced to surrender
-underutilized RAM (e.g. with "self-ballooning"), page cache pages
-are the first to go, and cleancache allows those pages to be
-saved and reclaimed if overall host system memory conditions allow.
-
-And the identical interface used for cleancache can be used in
-physical systems as well.  The zcache driver acts as a memory-hungry
-device that stores pages of data in a compressed state.  And
-the proposed "RAMster" driver shares RAM across multiple physical
-systems.
-
-2) Why does cleancache have its sticky fingers so deep inside the
-   filesystems and VFS? (Andrew Morton and Christoph Hellwig)
-
-The core hooks for cleancache in VFS are in most cases a single line
-and the minimum set are placed precisely where needed to maintain
-coherency (via cleancache_invalidate operations) between cleancache,
-the page cache, and disk.  All hooks compile into nothingness if
-cleancache is config'ed off and turn into a function-pointer-
-compare-to-NULL if config'ed on but no backend claims the ops
-functions, or to a compare-struct-element-to-negative if a
-backend claims the ops functions but a filesystem doesn't enable
-cleancache.
-
-Some filesystems are built entirely on top of VFS and the hooks
-in VFS are sufficient, so don't require an "init_fs" hook; the
-initial implementation of cleancache didn't provide this hook.
-But for some filesystems (such as btrfs), the VFS hooks are
-incomplete and one or more hooks in fs-specific code are required.
-And for some other filesystems, such as tmpfs, cleancache may
-be counterproductive.  So it seemed prudent to require a filesystem
-to "opt in" to use cleancache, which requires adding a hook in
-each filesystem.  Not all filesystems are supported by cleancache
-only because they haven't been tested.  The existing set should
-be sufficient to validate the concept, the opt-in approach means
-that untested filesystems are not affected, and the hooks in the
-existing filesystems should make it very easy to add more
-filesystems in the future.
-
-The total impact of the hooks to existing fs and mm files is only
-about 40 lines added (not counting comments and blank lines).
-
-3) Why not make cleancache asynchronous and batched so it can
-   more easily interface with real devices with DMA instead
-   of copying each individual page? (Minchan Kim)
-
-The one-page-at-a-time copy semantics simplifies the implementation
-on both the frontend and backend and also allows the backend to
-do fancy things on-the-fly like page compression and
-page deduplication.  And since the data is "gone" (copied into/out
-of the pageframe) before the cleancache get/put call returns,
-a great deal of race conditions and potential coherency issues
-are avoided.  While the interface seems odd for a "real device"
-or for real kernel-addressable RAM, it makes perfect sense for
-transcendent memory.
-
-4) Why is non-shared cleancache "exclusive"?  And where is the
-   page "invalidated" after a "get"? (Minchan Kim)
-
-The main reason is to free up space in transcendent memory and
-to avoid unnecessary cleancache_invalidate calls.  If you want inclusive,
-the page can be "put" immediately following the "get".  If
-put-after-get for inclusive becomes common, the interface could
-be easily extended to add a "get_no_invalidate" call.
-
-The invalidate is done by the cleancache backend implementation.
-
-5) What's the performance impact?
-
-Performance analysis has been presented at OLS'09 and LCA'10.
-Briefly, performance gains can be significant on most workloads,
-especially when memory pressure is high (e.g. when RAM is
-overcommitted in a virtual workload); and because the hooks are
-invoked primarily in place of or in addition to a disk read/write,
-overhead is negligible even in worst case workloads.  Basically
-cleancache replaces I/O with memory-copy-CPU-overhead; on older
-single-core systems with slow memory-copy speeds, cleancache
-has little value, but in newer multicore machines, especially
-consolidated/virtualized machines, it has great value.
-
-6) How do I add cleancache support for filesystem X? (Boaz Harrash)
-
-Filesystems that are well-behaved and conform to certain
-restrictions can utilize cleancache simply by making a call to
-cleancache_init_fs at mount time.  Unusual, misbehaving, or
-poorly layered filesystems must either add additional hooks
-and/or undergo extensive additional testing... or should just
-not enable the optional cleancache.
-
-Some points for a filesystem to consider:
-
-- The FS should be block-device-based (e.g. a ram-based FS such
-  as tmpfs should not enable cleancache)
-- To ensure coherency/correctness, the FS must ensure that all
-  file removal or truncation operations either go through VFS or
-  add hooks to do the equivalent cleancache "invalidate" operations
-- To ensure coherency/correctness, either inode numbers must
-  be unique across the lifetime of the on-disk file OR the
-  FS must provide an "encode_fh" function.
-- The FS must call the VFS superblock alloc and deactivate routines
-  or add hooks to do the equivalent cleancache calls done there.
-- To maximize performance, all pages fetched from the FS should
-  go through the do_mpag_readpage routine or the FS should add
-  hooks to do the equivalent (cf. btrfs)
-- Currently, the FS blocksize must be the same as PAGESIZE.  This
-  is not an architectural restriction, but no backends currently
-  support anything different.
-- A clustered FS should invoke the "shared_init_fs" cleancache
-  hook to get best performance for some backends.
-
-7) Why not use the KVA of the inode as the key? (Christoph Hellwig)
-
-If cleancache would use the inode virtual address instead of
-inode/filehandle, the pool id could be eliminated.  But, this
-won't work because cleancache retains pagecache data pages
-persistently even when the inode has been pruned from the
-inode unused list, and only invalidates the data page if the file
-gets removed/truncated.  So if cleancache used the inode kva,
-there would be potential coherency issues if/when the inode
-kva is reused for a different file.  Alternately, if cleancache
-invalidated the pages when the inode kva was freed, much of the value
-of cleancache would be lost because the cache of pages in cleanache
-is potentially much larger than the kernel pagecache and is most
-useful if the pages survive inode cache removal.
-
-8) Why is a global variable required?
-
-The cleancache_enabled flag is checked in all of the frequently-used
-cleancache hooks.  The alternative is a function call to check a static
-variable. Since cleancache is enabled dynamically at runtime, systems
-that don't enable cleancache would suffer thousands (possibly
-tens-of-thousands) of unnecessary function calls per second.  So the
-global variable allows cleancache to be enabled by default at compile
-time, but have insignificant performance impact when cleancache remains
-disabled at runtime.
-
-9) Does cleanache work with KVM?
-
-The memory model of KVM is sufficiently different that a cleancache
-backend may have less value for KVM.  This remains to be tested,
-especially in an overcommitted system.
-
-10) Does cleancache work in userspace?  It sounds useful for
-   memory hungry caches like web browsers.  (Jamie Lokier)
-
-No plans yet, though we agree it sounds useful, at least for
-apps that bypass the page cache (e.g. O_DIRECT).
-
-Last updated: Dan Magenheimer, April 13 2011
diff --git a/Documentation/vm/conf.py b/Documentation/vm/conf.py
new file mode 100644
index 0000000..3b0b601
--- /dev/null
+++ b/Documentation/vm/conf.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8; mode: python -*-
+
+project = "Linux Memory Management Documentation"
+
+tags.add("subproject")
+
+latex_documents = [
+    ('index', 'memory-management.tex', project,
+     'The kernel development community', 'manual'),
+]
diff --git a/Documentation/vm/frontswap.rst b/Documentation/vm/frontswap.rst
new file mode 100644
index 0000000..1979f43
--- /dev/null
+++ b/Documentation/vm/frontswap.rst
@@ -0,0 +1,293 @@
+.. _frontswap:
+
+=========
+Frontswap
+=========
+
+Frontswap provides a "transcendent memory" interface for swap pages.
+In some environments, dramatic performance savings may be obtained because
+swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk.
+
+(Note, frontswap -- and :ref:`cleancache` (merged at 3.0) -- are the "frontends"
+and the only necessary changes to the core kernel for transcendent memory;
+all other supporting code -- the "backends" -- is implemented as drivers.
+See the LWN.net article `Transcendent memory in a nutshell`_
+for a detailed overview of frontswap and related kernel parts)
+
+.. _Transcendent memory in a nutshell: https://lwn.net/Articles/454795/
+
+Frontswap is so named because it can be thought of as the opposite of
+a "backing" store for a swap device.  The storage is assumed to be
+a synchronous concurrency-safe page-oriented "pseudo-RAM device" conforming
+to the requirements of transcendent memory (such as Xen's "tmem", or
+in-kernel compressed memory, aka "zcache", or future RAM-like devices);
+this pseudo-RAM device is not directly accessible or addressable by the
+kernel and is of unknown and possibly time-varying size.  The driver
+links itself to frontswap by calling frontswap_register_ops to set the
+frontswap_ops funcs appropriately and the functions it provides must
+conform to certain policies as follows:
+
+An "init" prepares the device to receive frontswap pages associated
+with the specified swap device number (aka "type").  A "store" will
+copy the page to transcendent memory and associate it with the type and
+offset associated with the page. A "load" will copy the page, if found,
+from transcendent memory into kernel memory, but will NOT remove the page
+from transcendent memory.  An "invalidate_page" will remove the page
+from transcendent memory and an "invalidate_area" will remove ALL pages
+associated with the swap type (e.g., like swapoff) and notify the "device"
+to refuse further stores with that swap type.
+
+Once a page is successfully stored, a matching load on the page will normally
+succeed.  So when the kernel finds itself in a situation where it needs
+to swap out a page, it first attempts to use frontswap.  If the store returns
+success, the data has been successfully saved to transcendent memory and
+a disk write and, if the data is later read back, a disk read are avoided.
+If a store returns failure, transcendent memory has rejected the data, and the
+page can be written to swap as usual.
+
+If a backend chooses, frontswap can be configured as a "writethrough
+cache" by calling frontswap_writethrough().  In this mode, the reduction
+in swap device writes is lost (and also a non-trivial performance advantage)
+in order to allow the backend to arbitrarily "reclaim" space used to
+store frontswap pages to more completely manage its memory usage.
+
+Note that if a page is stored and the page already exists in transcendent memory
+(a "duplicate" store), either the store succeeds and the data is overwritten,
+or the store fails AND the page is invalidated.  This ensures stale data may
+never be obtained from frontswap.
+
+If properly configured, monitoring of frontswap is done via debugfs in
+the `/sys/kernel/debug/frontswap` directory.  The effectiveness of
+frontswap can be measured (across all swap devices) with:
+
+``failed_stores``
+	how many store attempts have failed
+
+``loads``
+	how many loads were attempted (all should succeed)
+
+``succ_stores``
+	how many store attempts have succeeded
+
+``invalidates``
+	how many invalidates were attempted
+
+A backend implementation may provide additional metrics.
+
+FAQ
+===
+
+* Where's the value?
+
+When a workload starts swapping, performance falls through the floor.
+Frontswap significantly increases performance in many such workloads by
+providing a clean, dynamic interface to read and write swap pages to
+"transcendent memory" that is otherwise not directly addressable to the kernel.
+This interface is ideal when data is transformed to a different form
+and size (such as with compression) or secretly moved (as might be
+useful for write-balancing for some RAM-like devices).  Swap pages (and
+evicted page-cache pages) are a great use for this kind of slower-than-RAM-
+but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and
+cleancache) interface to transcendent memory provides a nice way to read
+and write -- and indirectly "name" -- the pages.
+
+Frontswap -- and cleancache -- with a fairly small impact on the kernel,
+provides a huge amount of flexibility for more dynamic, flexible RAM
+utilization in various system configurations:
+
+In the single kernel case, aka "zcache", pages are compressed and
+stored in local memory, thus increasing the total anonymous pages
+that can be safely kept in RAM.  Zcache essentially trades off CPU
+cycles used in compression/decompression for better memory utilization.
+Benchmarks have shown little or no impact when memory pressure is
+low while providing a significant performance improvement (25%+)
+on some workloads under high memory pressure.
+
+"RAMster" builds on zcache by adding "peer-to-peer" transcendent memory
+support for clustered systems.  Frontswap pages are locally compressed
+as in zcache, but then "remotified" to another system's RAM.  This
+allows RAM to be dynamically load-balanced back-and-forth as needed,
+i.e. when system A is overcommitted, it can swap to system B, and
+vice versa.  RAMster can also be configured as a memory server so
+many servers in a cluster can swap, dynamically as needed, to a single
+server configured with a large amount of RAM... without pre-configuring
+how much of the RAM is available for each of the clients!
+
+In the virtual case, the whole point of virtualization is to statistically
+multiplex physical resources across the varying demands of multiple
+virtual machines.  This is really hard to do with RAM and efforts to do
+it well with no kernel changes have essentially failed (except in some
+well-publicized special-case workloads).
+Specifically, the Xen Transcendent Memory backend allows otherwise
+"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple
+virtual machines, but the pages can be compressed and deduplicated to
+optimize RAM utilization.  And when guest OS's are induced to surrender
+underutilized RAM (e.g. with "selfballooning"), sudden unexpected
+memory pressure may result in swapping; frontswap allows those pages
+to be swapped to and from hypervisor RAM (if overall host system memory
+conditions allow), thus mitigating the potentially awful performance impact
+of unplanned swapping.
+
+A KVM implementation is underway and has been RFC'ed to lkml.  And,
+using frontswap, investigation is also underway on the use of NVM as
+a memory extension technology.
+
+* Sure there may be performance advantages in some situations, but
+  what's the space/time overhead of frontswap?
+
+If CONFIG_FRONTSWAP is disabled, every frontswap hook compiles into
+nothingness and the only overhead is a few extra bytes per swapon'ed
+swap device.  If CONFIG_FRONTSWAP is enabled but no frontswap "backend"
+registers, there is one extra global variable compared to zero for
+every swap page read or written.  If CONFIG_FRONTSWAP is enabled
+AND a frontswap backend registers AND the backend fails every "store"
+request (i.e. provides no memory despite claiming it might),
+CPU overhead is still negligible -- and since every frontswap fail
+precedes a swap page write-to-disk, the system is highly likely
+to be I/O bound and using a small fraction of a percent of a CPU
+will be irrelevant anyway.
+
+As for space, if CONFIG_FRONTSWAP is enabled AND a frontswap backend
+registers, one bit is allocated for every swap page for every swap
+device that is swapon'd.  This is added to the EIGHT bits (which
+was sixteen until about 2.6.34) that the kernel already allocates
+for every swap page for every swap device that is swapon'd.  (Hugh
+Dickins has observed that frontswap could probably steal one of
+the existing eight bits, but let's worry about that minor optimization
+later.)  For very large swap disks (which are rare) on a standard
+4K pagesize, this is 1MB per 32GB swap.
+
+When swap pages are stored in transcendent memory instead of written
+out to disk, there is a side effect that this may create more memory
+pressure that can potentially outweigh the other advantages.  A
+backend, such as zcache, must implement policies to carefully (but
+dynamically) manage memory limits to ensure this doesn't happen.
+
+* OK, how about a quick overview of what this frontswap patch does
+  in terms that a kernel hacker can grok?
+
+Let's assume that a frontswap "backend" has registered during
+kernel initialization; this registration indicates that this
+frontswap backend has access to some "memory" that is not directly
+accessible by the kernel.  Exactly how much memory it provides is
+entirely dynamic and random.
+
+Whenever a swap-device is swapon'd frontswap_init() is called,
+passing the swap device number (aka "type") as a parameter.
+This notifies frontswap to expect attempts to "store" swap pages
+associated with that number.
+
+Whenever the swap subsystem is readying a page to write to a swap
+device (c.f swap_writepage()), frontswap_store is called.  Frontswap
+consults with the frontswap backend and if the backend says it does NOT
+have room, frontswap_store returns -1 and the kernel swaps the page
+to the swap device as normal.  Note that the response from the frontswap
+backend is unpredictable to the kernel; it may choose to never accept a
+page, it could accept every ninth page, or it might accept every
+page.  But if the backend does accept a page, the data from the page
+has already been copied and associated with the type and offset,
+and the backend guarantees the persistence of the data.  In this case,
+frontswap sets a bit in the "frontswap_map" for the swap device
+corresponding to the page offset on the swap device to which it would
+otherwise have written the data.
+
+When the swap subsystem needs to swap-in a page (swap_readpage()),
+it first calls frontswap_load() which checks the frontswap_map to
+see if the page was earlier accepted by the frontswap backend.  If
+it was, the page of data is filled from the frontswap backend and
+the swap-in is complete.  If not, the normal swap-in code is
+executed to obtain the page of data from the real swap device.
+
+So every time the frontswap backend accepts a page, a swap device read
+and (potentially) a swap device write are replaced by a "frontswap backend
+store" and (possibly) a "frontswap backend loads", which are presumably much
+faster.
+
+* Can't frontswap be configured as a "special" swap device that is
+  just higher priority than any real swap device (e.g. like zswap,
+  or maybe swap-over-nbd/NFS)?
+
+No.  First, the existing swap subsystem doesn't allow for any kind of
+swap hierarchy.  Perhaps it could be rewritten to accommodate a hierarchy,
+but this would require fairly drastic changes.  Even if it were
+rewritten, the existing swap subsystem uses the block I/O layer which
+assumes a swap device is fixed size and any page in it is linearly
+addressable.  Frontswap barely touches the existing swap subsystem,
+and works around the constraints of the block I/O subsystem to provide
+a great deal of flexibility and dynamicity.
+
+For example, the acceptance of any swap page by the frontswap backend is
+entirely unpredictable. This is critical to the definition of frontswap
+backends because it grants completely dynamic discretion to the
+backend.  In zcache, one cannot know a priori how compressible a page is.
+"Poorly" compressible pages can be rejected, and "poorly" can itself be
+defined dynamically depending on current memory constraints.
+
+Further, frontswap is entirely synchronous whereas a real swap
+device is, by definition, asynchronous and uses block I/O.  The
+block I/O layer is not only unnecessary, but may perform "optimizations"
+that are inappropriate for a RAM-oriented device including delaying
+the write of some pages for a significant amount of time.  Synchrony is
+required to ensure the dynamicity of the backend and to avoid thorny race
+conditions that would unnecessarily and greatly complicate frontswap
+and/or the block I/O subsystem.  That said, only the initial "store"
+and "load" operations need be synchronous.  A separate asynchronous thread
+is free to manipulate the pages stored by frontswap.  For example,
+the "remotification" thread in RAMster uses standard asynchronous
+kernel sockets to move compressed frontswap pages to a remote machine.
+Similarly, a KVM guest-side implementation could do in-guest compression
+and use "batched" hypercalls.
+
+In a virtualized environment, the dynamicity allows the hypervisor
+(or host OS) to do "intelligent overcommit".  For example, it can
+choose to accept pages only until host-swapping might be imminent,
+then force guests to do their own swapping.
+
+There is a downside to the transcendent memory specifications for
+frontswap:  Since any "store" might fail, there must always be a real
+slot on a real swap device to swap the page.  Thus frontswap must be
+implemented as a "shadow" to every swapon'd device with the potential
+capability of holding every page that the swap device might have held
+and the possibility that it might hold no pages at all.  This means
+that frontswap cannot contain more pages than the total of swapon'd
+swap devices.  For example, if NO swap device is configured on some
+installation, frontswap is useless.  Swapless portable devices
+can still use frontswap but a backend for such devices must configure
+some kind of "ghost" swap device and ensure that it is never used.
+
+* Why this weird definition about "duplicate stores"?  If a page
+  has been previously successfully stored, can't it always be
+  successfully overwritten?
+
+Nearly always it can, but no, sometimes it cannot.  Consider an example
+where data is compressed and the original 4K page has been compressed
+to 1K.  Now an attempt is made to overwrite the page with data that
+is non-compressible and so would take the entire 4K.  But the backend
+has no more space.  In this case, the store must be rejected.  Whenever
+frontswap rejects a store that would overwrite, it also must invalidate
+the old data and ensure that it is no longer accessible.  Since the
+swap subsystem then writes the new data to the read swap device,
+this is the correct course of action to ensure coherency.
+
+* What is frontswap_shrink for?
+
+When the (non-frontswap) swap subsystem swaps out a page to a real
+swap device, that page is only taking up low-value pre-allocated disk
+space.  But if frontswap has placed a page in transcendent memory, that
+page may be taking up valuable real estate.  The frontswap_shrink
+routine allows code outside of the swap subsystem to force pages out
+of the memory managed by frontswap and back into kernel-addressable memory.
+For example, in RAMster, a "suction driver" thread will attempt
+to "repatriate" pages sent to a remote machine back to the local machine;
+this is driven using the frontswap_shrink mechanism when memory pressure
+subsides.
+
+* Why does the frontswap patch create the new include file swapfile.h?
+
+The frontswap code depends on some swap-subsystem-internal data
+structures that have, over the years, moved back and forth between
+static and global.  This seemed a reasonable compromise:  Define
+them as global but declare them in a new include file that isn't
+included by the large number of source files that include swap.h.
+
+Dan Magenheimer, last updated April 9, 2012
diff --git a/Documentation/vm/frontswap.txt b/Documentation/vm/frontswap.txt
deleted file mode 100644
index c71a019..0000000
--- a/Documentation/vm/frontswap.txt
+++ /dev/null
@@ -1,278 +0,0 @@
-Frontswap provides a "transcendent memory" interface for swap pages.
-In some environments, dramatic performance savings may be obtained because
-swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk.
-
-(Note, frontswap -- and cleancache (merged at 3.0) -- are the "frontends"
-and the only necessary changes to the core kernel for transcendent memory;
-all other supporting code -- the "backends" -- is implemented as drivers.
-See the LWN.net article "Transcendent memory in a nutshell" for a detailed
-overview of frontswap and related kernel parts:
-https://lwn.net/Articles/454795/ )
-
-Frontswap is so named because it can be thought of as the opposite of
-a "backing" store for a swap device.  The storage is assumed to be
-a synchronous concurrency-safe page-oriented "pseudo-RAM device" conforming
-to the requirements of transcendent memory (such as Xen's "tmem", or
-in-kernel compressed memory, aka "zcache", or future RAM-like devices);
-this pseudo-RAM device is not directly accessible or addressable by the
-kernel and is of unknown and possibly time-varying size.  The driver
-links itself to frontswap by calling frontswap_register_ops to set the
-frontswap_ops funcs appropriately and the functions it provides must
-conform to certain policies as follows:
-
-An "init" prepares the device to receive frontswap pages associated
-with the specified swap device number (aka "type").  A "store" will
-copy the page to transcendent memory and associate it with the type and
-offset associated with the page. A "load" will copy the page, if found,
-from transcendent memory into kernel memory, but will NOT remove the page
-from transcendent memory.  An "invalidate_page" will remove the page
-from transcendent memory and an "invalidate_area" will remove ALL pages
-associated with the swap type (e.g., like swapoff) and notify the "device"
-to refuse further stores with that swap type.
-
-Once a page is successfully stored, a matching load on the page will normally
-succeed.  So when the kernel finds itself in a situation where it needs
-to swap out a page, it first attempts to use frontswap.  If the store returns
-success, the data has been successfully saved to transcendent memory and
-a disk write and, if the data is later read back, a disk read are avoided.
-If a store returns failure, transcendent memory has rejected the data, and the
-page can be written to swap as usual.
-
-If a backend chooses, frontswap can be configured as a "writethrough
-cache" by calling frontswap_writethrough().  In this mode, the reduction
-in swap device writes is lost (and also a non-trivial performance advantage)
-in order to allow the backend to arbitrarily "reclaim" space used to
-store frontswap pages to more completely manage its memory usage.
-
-Note that if a page is stored and the page already exists in transcendent memory
-(a "duplicate" store), either the store succeeds and the data is overwritten,
-or the store fails AND the page is invalidated.  This ensures stale data may
-never be obtained from frontswap.
-
-If properly configured, monitoring of frontswap is done via debugfs in
-the /sys/kernel/debug/frontswap directory.  The effectiveness of
-frontswap can be measured (across all swap devices) with:
-
-failed_stores	- how many store attempts have failed
-loads		- how many loads were attempted (all should succeed)
-succ_stores	- how many store attempts have succeeded
-invalidates	- how many invalidates were attempted
-
-A backend implementation may provide additional metrics.
-
-FAQ
-
-1) Where's the value?
-
-When a workload starts swapping, performance falls through the floor.
-Frontswap significantly increases performance in many such workloads by
-providing a clean, dynamic interface to read and write swap pages to
-"transcendent memory" that is otherwise not directly addressable to the kernel.
-This interface is ideal when data is transformed to a different form
-and size (such as with compression) or secretly moved (as might be
-useful for write-balancing for some RAM-like devices).  Swap pages (and
-evicted page-cache pages) are a great use for this kind of slower-than-RAM-
-but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and
-cleancache) interface to transcendent memory provides a nice way to read
-and write -- and indirectly "name" -- the pages.
-
-Frontswap -- and cleancache -- with a fairly small impact on the kernel,
-provides a huge amount of flexibility for more dynamic, flexible RAM
-utilization in various system configurations:
-
-In the single kernel case, aka "zcache", pages are compressed and
-stored in local memory, thus increasing the total anonymous pages
-that can be safely kept in RAM.  Zcache essentially trades off CPU
-cycles used in compression/decompression for better memory utilization.
-Benchmarks have shown little or no impact when memory pressure is
-low while providing a significant performance improvement (25%+)
-on some workloads under high memory pressure.
-
-"RAMster" builds on zcache by adding "peer-to-peer" transcendent memory
-support for clustered systems.  Frontswap pages are locally compressed
-as in zcache, but then "remotified" to another system's RAM.  This
-allows RAM to be dynamically load-balanced back-and-forth as needed,
-i.e. when system A is overcommitted, it can swap to system B, and
-vice versa.  RAMster can also be configured as a memory server so
-many servers in a cluster can swap, dynamically as needed, to a single
-server configured with a large amount of RAM... without pre-configuring
-how much of the RAM is available for each of the clients!
-
-In the virtual case, the whole point of virtualization is to statistically
-multiplex physical resources across the varying demands of multiple
-virtual machines.  This is really hard to do with RAM and efforts to do
-it well with no kernel changes have essentially failed (except in some
-well-publicized special-case workloads).
-Specifically, the Xen Transcendent Memory backend allows otherwise
-"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple
-virtual machines, but the pages can be compressed and deduplicated to
-optimize RAM utilization.  And when guest OS's are induced to surrender
-underutilized RAM (e.g. with "selfballooning"), sudden unexpected
-memory pressure may result in swapping; frontswap allows those pages
-to be swapped to and from hypervisor RAM (if overall host system memory
-conditions allow), thus mitigating the potentially awful performance impact
-of unplanned swapping.
-
-A KVM implementation is underway and has been RFC'ed to lkml.  And,
-using frontswap, investigation is also underway on the use of NVM as
-a memory extension technology.
-
-2) Sure there may be performance advantages in some situations, but
-   what's the space/time overhead of frontswap?
-
-If CONFIG_FRONTSWAP is disabled, every frontswap hook compiles into
-nothingness and the only overhead is a few extra bytes per swapon'ed
-swap device.  If CONFIG_FRONTSWAP is enabled but no frontswap "backend"
-registers, there is one extra global variable compared to zero for
-every swap page read or written.  If CONFIG_FRONTSWAP is enabled
-AND a frontswap backend registers AND the backend fails every "store"
-request (i.e. provides no memory despite claiming it might),
-CPU overhead is still negligible -- and since every frontswap fail
-precedes a swap page write-to-disk, the system is highly likely
-to be I/O bound and using a small fraction of a percent of a CPU
-will be irrelevant anyway.
-
-As for space, if CONFIG_FRONTSWAP is enabled AND a frontswap backend
-registers, one bit is allocated for every swap page for every swap
-device that is swapon'd.  This is added to the EIGHT bits (which
-was sixteen until about 2.6.34) that the kernel already allocates
-for every swap page for every swap device that is swapon'd.  (Hugh
-Dickins has observed that frontswap could probably steal one of
-the existing eight bits, but let's worry about that minor optimization
-later.)  For very large swap disks (which are rare) on a standard
-4K pagesize, this is 1MB per 32GB swap.
-
-When swap pages are stored in transcendent memory instead of written
-out to disk, there is a side effect that this may create more memory
-pressure that can potentially outweigh the other advantages.  A
-backend, such as zcache, must implement policies to carefully (but
-dynamically) manage memory limits to ensure this doesn't happen.
-
-3) OK, how about a quick overview of what this frontswap patch does
-   in terms that a kernel hacker can grok?
-
-Let's assume that a frontswap "backend" has registered during
-kernel initialization; this registration indicates that this
-frontswap backend has access to some "memory" that is not directly
-accessible by the kernel.  Exactly how much memory it provides is
-entirely dynamic and random.
-
-Whenever a swap-device is swapon'd frontswap_init() is called,
-passing the swap device number (aka "type") as a parameter.
-This notifies frontswap to expect attempts to "store" swap pages
-associated with that number.
-
-Whenever the swap subsystem is readying a page to write to a swap
-device (c.f swap_writepage()), frontswap_store is called.  Frontswap
-consults with the frontswap backend and if the backend says it does NOT
-have room, frontswap_store returns -1 and the kernel swaps the page
-to the swap device as normal.  Note that the response from the frontswap
-backend is unpredictable to the kernel; it may choose to never accept a
-page, it could accept every ninth page, or it might accept every
-page.  But if the backend does accept a page, the data from the page
-has already been copied and associated with the type and offset,
-and the backend guarantees the persistence of the data.  In this case,
-frontswap sets a bit in the "frontswap_map" for the swap device
-corresponding to the page offset on the swap device to which it would
-otherwise have written the data.
-
-When the swap subsystem needs to swap-in a page (swap_readpage()),
-it first calls frontswap_load() which checks the frontswap_map to
-see if the page was earlier accepted by the frontswap backend.  If
-it was, the page of data is filled from the frontswap backend and
-the swap-in is complete.  If not, the normal swap-in code is
-executed to obtain the page of data from the real swap device.
-
-So every time the frontswap backend accepts a page, a swap device read
-and (potentially) a swap device write are replaced by a "frontswap backend
-store" and (possibly) a "frontswap backend loads", which are presumably much
-faster.
-
-4) Can't frontswap be configured as a "special" swap device that is
-   just higher priority than any real swap device (e.g. like zswap,
-   or maybe swap-over-nbd/NFS)?
-
-No.  First, the existing swap subsystem doesn't allow for any kind of
-swap hierarchy.  Perhaps it could be rewritten to accommodate a hierarchy,
-but this would require fairly drastic changes.  Even if it were
-rewritten, the existing swap subsystem uses the block I/O layer which
-assumes a swap device is fixed size and any page in it is linearly
-addressable.  Frontswap barely touches the existing swap subsystem,
-and works around the constraints of the block I/O subsystem to provide
-a great deal of flexibility and dynamicity.
-
-For example, the acceptance of any swap page by the frontswap backend is
-entirely unpredictable. This is critical to the definition of frontswap
-backends because it grants completely dynamic discretion to the
-backend.  In zcache, one cannot know a priori how compressible a page is.
-"Poorly" compressible pages can be rejected, and "poorly" can itself be
-defined dynamically depending on current memory constraints.
-
-Further, frontswap is entirely synchronous whereas a real swap
-device is, by definition, asynchronous and uses block I/O.  The
-block I/O layer is not only unnecessary, but may perform "optimizations"
-that are inappropriate for a RAM-oriented device including delaying
-the write of some pages for a significant amount of time.  Synchrony is
-required to ensure the dynamicity of the backend and to avoid thorny race
-conditions that would unnecessarily and greatly complicate frontswap
-and/or the block I/O subsystem.  That said, only the initial "store"
-and "load" operations need be synchronous.  A separate asynchronous thread
-is free to manipulate the pages stored by frontswap.  For example,
-the "remotification" thread in RAMster uses standard asynchronous
-kernel sockets to move compressed frontswap pages to a remote machine.
-Similarly, a KVM guest-side implementation could do in-guest compression
-and use "batched" hypercalls.
-
-In a virtualized environment, the dynamicity allows the hypervisor
-(or host OS) to do "intelligent overcommit".  For example, it can
-choose to accept pages only until host-swapping might be imminent,
-then force guests to do their own swapping.
-
-There is a downside to the transcendent memory specifications for
-frontswap:  Since any "store" might fail, there must always be a real
-slot on a real swap device to swap the page.  Thus frontswap must be
-implemented as a "shadow" to every swapon'd device with the potential
-capability of holding every page that the swap device might have held
-and the possibility that it might hold no pages at all.  This means
-that frontswap cannot contain more pages than the total of swapon'd
-swap devices.  For example, if NO swap device is configured on some
-installation, frontswap is useless.  Swapless portable devices
-can still use frontswap but a backend for such devices must configure
-some kind of "ghost" swap device and ensure that it is never used.
-
-5) Why this weird definition about "duplicate stores"?  If a page
-   has been previously successfully stored, can't it always be
-   successfully overwritten?
-
-Nearly always it can, but no, sometimes it cannot.  Consider an example
-where data is compressed and the original 4K page has been compressed
-to 1K.  Now an attempt is made to overwrite the page with data that
-is non-compressible and so would take the entire 4K.  But the backend
-has no more space.  In this case, the store must be rejected.  Whenever
-frontswap rejects a store that would overwrite, it also must invalidate
-the old data and ensure that it is no longer accessible.  Since the
-swap subsystem then writes the new data to the read swap device,
-this is the correct course of action to ensure coherency.
-
-6) What is frontswap_shrink for?
-
-When the (non-frontswap) swap subsystem swaps out a page to a real
-swap device, that page is only taking up low-value pre-allocated disk
-space.  But if frontswap has placed a page in transcendent memory, that
-page may be taking up valuable real estate.  The frontswap_shrink
-routine allows code outside of the swap subsystem to force pages out
-of the memory managed by frontswap and back into kernel-addressable memory.
-For example, in RAMster, a "suction driver" thread will attempt
-to "repatriate" pages sent to a remote machine back to the local machine;
-this is driven using the frontswap_shrink mechanism when memory pressure
-subsides.
-
-7) Why does the frontswap patch create the new include file swapfile.h?
-
-The frontswap code depends on some swap-subsystem-internal data
-structures that have, over the years, moved back and forth between
-static and global.  This seemed a reasonable compromise:  Define
-them as global but declare them in a new include file that isn't
-included by the large number of source files that include swap.h.
-
-Dan Magenheimer, last updated April 9, 2012
diff --git a/Documentation/vm/highmem.rst b/Documentation/vm/highmem.rst
new file mode 100644
index 0000000..0f69a9f
--- /dev/null
+++ b/Documentation/vm/highmem.rst
@@ -0,0 +1,147 @@
+.. _highmem:
+
+====================
+High Memory Handling
+====================
+
+By: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+.. contents:: :local:
+
+What Is High Memory?
+====================
+
+High memory (highmem) is used when the size of physical memory approaches or
+exceeds the maximum size of virtual memory.  At that point it becomes
+impossible for the kernel to keep all of the available physical memory mapped
+at all times.  This means the kernel needs to start using temporary mappings of
+the pieces of physical memory that it wants to access.
+
+The part of (physical) memory not covered by a permanent mapping is what we
+refer to as 'highmem'.  There are various architecture dependent constraints on
+where exactly that border lies.
+
+In the i386 arch, for example, we choose to map the kernel into every process's
+VM space so that we don't have to pay the full TLB invalidation costs for
+kernel entry/exit.  This means the available virtual memory space (4GiB on
+i386) has to be divided between user and kernel space.
+
+The traditional split for architectures using this approach is 3:1, 3GiB for
+userspace and the top 1GiB for kernel space::
+
+		+--------+ 0xffffffff
+		| Kernel |
+		+--------+ 0xc0000000
+		|        |
+		| User   |
+		|        |
+		+--------+ 0x00000000
+
+This means that the kernel can at most map 1GiB of physical memory at any one
+time, but because we need virtual address space for other things - including
+temporary maps to access the rest of the physical memory - the actual direct
+map will typically be less (usually around ~896MiB).
+
+Other architectures that have mm context tagged TLBs can have separate kernel
+and user maps.  Some hardware (like some ARMs), however, have limited virtual
+space when they use mm context tags.
+
+
+Temporary Virtual Mappings
+==========================
+
+The kernel contains several ways of creating temporary mappings:
+
+* vmap().  This can be used to make a long duration mapping of multiple
+  physical pages into a contiguous virtual space.  It needs global
+  synchronization to unmap.
+
+* kmap().  This permits a short duration mapping of a single page.  It needs
+  global synchronization, but is amortized somewhat.  It is also prone to
+  deadlocks when using in a nested fashion, and so it is not recommended for
+  new code.
+
+* kmap_atomic().  This permits a very short duration mapping of a single
+  page.  Since the mapping is restricted to the CPU that issued it, it
+  performs well, but the issuing task is therefore required to stay on that
+  CPU until it has finished, lest some other task displace its mappings.
+
+  kmap_atomic() may also be used by interrupt contexts, since it is does not
+  sleep and the caller may not sleep until after kunmap_atomic() is called.
+
+  It may be assumed that k[un]map_atomic() won't fail.
+
+
+Using kmap_atomic
+=================
+
+When and where to use kmap_atomic() is straightforward.  It is used when code
+wants to access the contents of a page that might be allocated from high memory
+(see __GFP_HIGHMEM), for example a page in the pagecache.  The API has two
+functions, and they can be used in a manner similar to the following::
+
+	/* Find the page of interest. */
+	struct page *page = find_get_page(mapping, offset);
+
+	/* Gain access to the contents of that page. */
+	void *vaddr = kmap_atomic(page);
+
+	/* Do something to the contents of that page. */
+	memset(vaddr, 0, PAGE_SIZE);
+
+	/* Unmap that page. */
+	kunmap_atomic(vaddr);
+
+Note that the kunmap_atomic() call takes the result of the kmap_atomic() call
+not the argument.
+
+If you need to map two pages because you want to copy from one page to
+another you need to keep the kmap_atomic calls strictly nested, like::
+
+	vaddr1 = kmap_atomic(page1);
+	vaddr2 = kmap_atomic(page2);
+
+	memcpy(vaddr1, vaddr2, PAGE_SIZE);
+
+	kunmap_atomic(vaddr2);
+	kunmap_atomic(vaddr1);
+
+
+Cost of Temporary Mappings
+==========================
+
+The cost of creating temporary mappings can be quite high.  The arch has to
+manipulate the kernel's page tables, the data TLB and/or the MMU's registers.
+
+If CONFIG_HIGHMEM is not set, then the kernel will try and create a mapping
+simply with a bit of arithmetic that will convert the page struct address into
+a pointer to the page contents rather than juggling mappings about.  In such a
+case, the unmap operation may be a null operation.
+
+If CONFIG_MMU is not set, then there can be no temporary mappings and no
+highmem.  In such a case, the arithmetic approach will also be used.
+
+
+i386 PAE
+========
+
+The i386 arch, under some circumstances, will permit you to stick up to 64GiB
+of RAM into your 32-bit machine.  This has a number of consequences:
+
+* Linux needs a page-frame structure for each page in the system and the
+  pageframes need to live in the permanent mapping, which means:
+
+* you can have 896M/sizeof(struct page) page-frames at most; with struct
+  page being 32-bytes that would end up being something in the order of 112G
+  worth of pages; the kernel, however, needs to store more than just
+  page-frames in that memory...
+
+* PAE makes your page tables larger - which slows the system down as more
+  data has to be accessed to traverse in TLB fills and the like.  One
+  advantage is that PAE has more PTE bits and can provide advanced features
+  like NX and PAT.
+
+The general recommendation is that you don't use more than 8GiB on a 32-bit
+machine - although more might work for you and your workload, you're pretty
+much on your own - don't expect kernel developers to really care much if things
+come apart.
diff --git a/Documentation/vm/highmem.txt b/Documentation/vm/highmem.txt
deleted file mode 100644
index 4324d24..0000000
--- a/Documentation/vm/highmem.txt
+++ /dev/null
@@ -1,162 +0,0 @@
-
-			     ====================
-			     HIGH MEMORY HANDLING
-			     ====================
-
-By: Peter Zijlstra <a.p.zijlstra@chello.nl>
-
-Contents:
-
- (*) What is high memory?
-
- (*) Temporary virtual mappings.
-
- (*) Using kmap_atomic.
-
- (*) Cost of temporary mappings.
-
- (*) i386 PAE.
-
-
-====================
-WHAT IS HIGH MEMORY?
-====================
-
-High memory (highmem) is used when the size of physical memory approaches or
-exceeds the maximum size of virtual memory.  At that point it becomes
-impossible for the kernel to keep all of the available physical memory mapped
-at all times.  This means the kernel needs to start using temporary mappings of
-the pieces of physical memory that it wants to access.
-
-The part of (physical) memory not covered by a permanent mapping is what we
-refer to as 'highmem'.  There are various architecture dependent constraints on
-where exactly that border lies.
-
-In the i386 arch, for example, we choose to map the kernel into every process's
-VM space so that we don't have to pay the full TLB invalidation costs for
-kernel entry/exit.  This means the available virtual memory space (4GiB on
-i386) has to be divided between user and kernel space.
-
-The traditional split for architectures using this approach is 3:1, 3GiB for
-userspace and the top 1GiB for kernel space:
-
-		+--------+ 0xffffffff
-		| Kernel |
-		+--------+ 0xc0000000
-		|        |
-		| User   |
-		|        |
-		+--------+ 0x00000000
-
-This means that the kernel can at most map 1GiB of physical memory at any one
-time, but because we need virtual address space for other things - including
-temporary maps to access the rest of the physical memory - the actual direct
-map will typically be less (usually around ~896MiB).
-
-Other architectures that have mm context tagged TLBs can have separate kernel
-and user maps.  Some hardware (like some ARMs), however, have limited virtual
-space when they use mm context tags.
-
-
-==========================
-TEMPORARY VIRTUAL MAPPINGS
-==========================
-
-The kernel contains several ways of creating temporary mappings:
-
- (*) vmap().  This can be used to make a long duration mapping of multiple
-     physical pages into a contiguous virtual space.  It needs global
-     synchronization to unmap.
-
- (*) kmap().  This permits a short duration mapping of a single page.  It needs
-     global synchronization, but is amortized somewhat.  It is also prone to
-     deadlocks when using in a nested fashion, and so it is not recommended for
-     new code.
-
- (*) kmap_atomic().  This permits a very short duration mapping of a single
-     page.  Since the mapping is restricted to the CPU that issued it, it
-     performs well, but the issuing task is therefore required to stay on that
-     CPU until it has finished, lest some other task displace its mappings.
-
-     kmap_atomic() may also be used by interrupt contexts, since it is does not
-     sleep and the caller may not sleep until after kunmap_atomic() is called.
-
-     It may be assumed that k[un]map_atomic() won't fail.
-
-
-=================
-USING KMAP_ATOMIC
-=================
-
-When and where to use kmap_atomic() is straightforward.  It is used when code
-wants to access the contents of a page that might be allocated from high memory
-(see __GFP_HIGHMEM), for example a page in the pagecache.  The API has two
-functions, and they can be used in a manner similar to the following:
-
-	/* Find the page of interest. */
-	struct page *page = find_get_page(mapping, offset);
-
-	/* Gain access to the contents of that page. */
-	void *vaddr = kmap_atomic(page);
-
-	/* Do something to the contents of that page. */
-	memset(vaddr, 0, PAGE_SIZE);
-
-	/* Unmap that page. */
-	kunmap_atomic(vaddr);
-
-Note that the kunmap_atomic() call takes the result of the kmap_atomic() call
-not the argument.
-
-If you need to map two pages because you want to copy from one page to
-another you need to keep the kmap_atomic calls strictly nested, like:
-
-	vaddr1 = kmap_atomic(page1);
-	vaddr2 = kmap_atomic(page2);
-
-	memcpy(vaddr1, vaddr2, PAGE_SIZE);
-
-	kunmap_atomic(vaddr2);
-	kunmap_atomic(vaddr1);
-
-
-==========================
-COST OF TEMPORARY MAPPINGS
-==========================
-
-The cost of creating temporary mappings can be quite high.  The arch has to
-manipulate the kernel's page tables, the data TLB and/or the MMU's registers.
-
-If CONFIG_HIGHMEM is not set, then the kernel will try and create a mapping
-simply with a bit of arithmetic that will convert the page struct address into
-a pointer to the page contents rather than juggling mappings about.  In such a
-case, the unmap operation may be a null operation.
-
-If CONFIG_MMU is not set, then there can be no temporary mappings and no
-highmem.  In such a case, the arithmetic approach will also be used.
-
-
-========
-i386 PAE
-========
-
-The i386 arch, under some circumstances, will permit you to stick up to 64GiB
-of RAM into your 32-bit machine.  This has a number of consequences:
-
- (*) Linux needs a page-frame structure for each page in the system and the
-     pageframes need to live in the permanent mapping, which means:
-
- (*) you can have 896M/sizeof(struct page) page-frames at most; with struct
-     page being 32-bytes that would end up being something in the order of 112G
-     worth of pages; the kernel, however, needs to store more than just
-     page-frames in that memory...
-
- (*) PAE makes your page tables larger - which slows the system down as more
-     data has to be accessed to traverse in TLB fills and the like.  One
-     advantage is that PAE has more PTE bits and can provide advanced features
-     like NX and PAT.
-
-The general recommendation is that you don't use more than 8GiB on a 32-bit
-machine - although more might work for you and your workload, you're pretty
-much on your own - don't expect kernel developers to really care much if things
-come apart.
diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
new file mode 100644
index 0000000..cdf3911
--- /dev/null
+++ b/Documentation/vm/hmm.rst
@@ -0,0 +1,386 @@
+.. hmm:
+
+=====================================
+Heterogeneous Memory Management (HMM)
+=====================================
+
+Provide infrastructure and helpers to integrate non-conventional memory (device
+memory like GPU on board memory) into regular kernel path, with the cornerstone
+of this being specialized struct page for such memory (see sections 5 to 7 of
+this document).
+
+HMM also provides optional helpers for SVM (Share Virtual Memory), i.e.,
+allowing a device to transparently access program address coherently with
+the CPU meaning that any valid pointer on the CPU is also a valid pointer
+for the device. This is becoming mandatory to simplify the use of advanced
+heterogeneous computing where GPU, DSP, or FPGA are used to perform various
+computations on behalf of a process.
+
+This document is divided as follows: in the first section I expose the problems
+related to using device specific memory allocators. In the second section, I
+expose the hardware limitations that are inherent to many platforms. The third
+section gives an overview of the HMM design. The fourth section explains how
+CPU page-table mirroring works and the purpose of HMM in this context. The
+fifth section deals with how device memory is represented inside the kernel.
+Finally, the last section presents a new migration helper that allows lever-
+aging the device DMA engine.
+
+.. contents:: :local:
+
+Problems of using a device specific memory allocator
+====================================================
+
+Devices with a large amount of on board memory (several gigabytes) like GPUs
+have historically managed their memory through dedicated driver specific APIs.
+This creates a disconnect between memory allocated and managed by a device
+driver and regular application memory (private anonymous, shared memory, or
+regular file backed memory). From here on I will refer to this aspect as split
+address space. I use shared address space to refer to the opposite situation:
+i.e., one in which any application memory region can be used by a device
+transparently.
+
+Split address space happens because device can only access memory allocated
+through device specific API. This implies that all memory objects in a program
+are not equal from the device point of view which complicates large programs
+that rely on a wide set of libraries.
+
+Concretely this means that code that wants to leverage devices like GPUs needs
+to copy object between generically allocated memory (malloc, mmap private, mmap
+share) and memory allocated through the device driver API (this still ends up
+with an mmap but of the device file).
+
+For flat data sets (array, grid, image, ...) this isn't too hard to achieve but
+complex data sets (list, tree, ...) are hard to get right. Duplicating a
+complex data set needs to re-map all the pointer relations between each of its
+elements. This is error prone and program gets harder to debug because of the
+duplicate data set and addresses.
+
+Split address space also means that libraries cannot transparently use data
+they are getting from the core program or another library and thus each library
+might have to duplicate its input data set using the device specific memory
+allocator. Large projects suffer from this and waste resources because of the
+various memory copies.
+
+Duplicating each library API to accept as input or output memory allocated by
+each device specific allocator is not a viable option. It would lead to a
+combinatorial explosion in the library entry points.
+
+Finally, with the advance of high level language constructs (in C++ but in
+other languages too) it is now possible for the compiler to leverage GPUs and
+other devices without programmer knowledge. Some compiler identified patterns
+are only do-able with a shared address space. It is also more reasonable to use
+a shared address space for all other patterns.
+
+
+I/O bus, device memory characteristics
+======================================
+
+I/O buses cripple shared address spaces due to a few limitations. Most I/O
+buses only allow basic memory access from device to main memory; even cache
+coherency is often optional. Access to device memory from CPU is even more
+limited. More often than not, it is not cache coherent.
+
+If we only consider the PCIE bus, then a device can access main memory (often
+through an IOMMU) and be cache coherent with the CPUs. However, it only allows
+a limited set of atomic operations from device on main memory. This is worse
+in the other direction: the CPU can only access a limited range of the device
+memory and cannot perform atomic operations on it. Thus device memory cannot
+be considered the same as regular memory from the kernel point of view.
+
+Another crippling factor is the limited bandwidth (~32GBytes/s with PCIE 4.0
+and 16 lanes). This is 33 times less than the fastest GPU memory (1 TBytes/s).
+The final limitation is latency. Access to main memory from the device has an
+order of magnitude higher latency than when the device accesses its own memory.
+
+Some platforms are developing new I/O buses or additions/modifications to PCIE
+to address some of these limitations (OpenCAPI, CCIX). They mainly allow two-
+way cache coherency between CPU and device and allow all atomic operations the
+architecture supports. Sadly, not all platforms are following this trend and
+some major architectures are left without hardware solutions to these problems.
+
+So for shared address space to make sense, not only must we allow devices to
+access any memory but we must also permit any memory to be migrated to device
+memory while device is using it (blocking CPU access while it happens).
+
+
+Shared address space and migration
+==================================
+
+HMM intends to provide two main features. First one is to share the address
+space by duplicating the CPU page table in the device page table so the same
+address points to the same physical memory for any valid main memory address in
+the process address space.
+
+To achieve this, HMM offers a set of helpers to populate the device page table
+while keeping track of CPU page table updates. Device page table updates are
+not as easy as CPU page table updates. To update the device page table, you must
+allocate a buffer (or use a pool of pre-allocated buffers) and write GPU
+specific commands in it to perform the update (unmap, cache invalidations, and
+flush, ...). This cannot be done through common code for all devices. Hence
+why HMM provides helpers to factor out everything that can be while leaving the
+hardware specific details to the device driver.
+
+The second mechanism HMM provides is a new kind of ZONE_DEVICE memory that
+allows allocating a struct page for each page of the device memory. Those pages
+are special because the CPU cannot map them. However, they allow migrating
+main memory to device memory using existing migration mechanisms and everything
+looks like a page is swapped out to disk from the CPU point of view. Using a
+struct page gives the easiest and cleanest integration with existing mm mech-
+anisms. Here again, HMM only provides helpers, first to hotplug new ZONE_DEVICE
+memory for the device memory and second to perform migration. Policy decisions
+of what and when to migrate things is left to the device driver.
+
+Note that any CPU access to a device page triggers a page fault and a migration
+back to main memory. For example, when a page backing a given CPU address A is
+migrated from a main memory page to a device page, then any CPU access to
+address A triggers a page fault and initiates a migration back to main memory.
+
+With these two features, HMM not only allows a device to mirror process address
+space and keeping both CPU and device page table synchronized, but also lever-
+ages device memory by migrating the part of the data set that is actively being
+used by the device.
+
+
+Address space mirroring implementation and API
+==============================================
+
+Address space mirroring's main objective is to allow duplication of a range of
+CPU page table into a device page table; HMM helps keep both synchronized. A
+device driver that wants to mirror a process address space must start with the
+registration of an hmm_mirror struct::
+
+ int hmm_mirror_register(struct hmm_mirror *mirror,
+                         struct mm_struct *mm);
+ int hmm_mirror_register_locked(struct hmm_mirror *mirror,
+                                struct mm_struct *mm);
+
+
+The locked variant is to be used when the driver is already holding mmap_sem
+of the mm in write mode. The mirror struct has a set of callbacks that are used
+to propagate CPU page tables::
+
+ struct hmm_mirror_ops {
+     /* sync_cpu_device_pagetables() - synchronize page tables
+      *
+      * @mirror: pointer to struct hmm_mirror
+      * @update_type: type of update that occurred to the CPU page table
+      * @start: virtual start address of the range to update
+      * @end: virtual end address of the range to update
+      *
+      * This callback ultimately originates from mmu_notifiers when the CPU
+      * page table is updated. The device driver must update its page table
+      * in response to this callback. The update argument tells what action
+      * to perform.
+      *
+      * The device driver must not return from this callback until the device
+      * page tables are completely updated (TLBs flushed, etc); this is a
+      * synchronous call.
+      */
+      void (*update)(struct hmm_mirror *mirror,
+                     enum hmm_update action,
+                     unsigned long start,
+                     unsigned long end);
+ };
+
+The device driver must perform the update action to the range (mark range
+read only, or fully unmap, ...). The device must be done with the update before
+the driver callback returns.
+
+When the device driver wants to populate a range of virtual addresses, it can
+use either::
+
+  int hmm_vma_get_pfns(struct vm_area_struct *vma,
+                      struct hmm_range *range,
+                      unsigned long start,
+                      unsigned long end,
+                      hmm_pfn_t *pfns);
+ int hmm_vma_fault(struct vm_area_struct *vma,
+                   struct hmm_range *range,
+                   unsigned long start,
+                   unsigned long end,
+                   hmm_pfn_t *pfns,
+                   bool write,
+                   bool block);
+
+The first one (hmm_vma_get_pfns()) will only fetch present CPU page table
+entries and will not trigger a page fault on missing or non-present entries.
+The second one does trigger a page fault on missing or read-only entry if the
+write parameter is true. Page faults use the generic mm page fault code path
+just like a CPU page fault.
+
+Both functions copy CPU page table entries into their pfns array argument. Each
+entry in that array corresponds to an address in the virtual range. HMM
+provides a set of flags to help the driver identify special CPU page table
+entries.
+
+Locking with the update() callback is the most important aspect the driver must
+respect in order to keep things properly synchronized. The usage pattern is::
+
+ int driver_populate_range(...)
+ {
+      struct hmm_range range;
+      ...
+ again:
+      ret = hmm_vma_get_pfns(vma, &range, start, end, pfns);
+      if (ret)
+          return ret;
+      take_lock(driver->update);
+      if (!hmm_vma_range_done(vma, &range)) {
+          release_lock(driver->update);
+          goto again;
+      }
+
+      // Use pfns array content to update device page table
+
+      release_lock(driver->update);
+      return 0;
+ }
+
+The driver->update lock is the same lock that the driver takes inside its
+update() callback. That lock must be held before hmm_vma_range_done() to avoid
+any race with a concurrent CPU page table update.
+
+HMM implements all this on top of the mmu_notifier API because we wanted a
+simpler API and also to be able to perform optimizations latter on like doing
+concurrent device updates in multi-devices scenario.
+
+HMM also serves as an impedance mismatch between how CPU page table updates
+are done (by CPU write to the page table and TLB flushes) and how devices
+update their own page table. Device updates are a multi-step process. First,
+appropriate commands are written to a buffer, then this buffer is scheduled for
+execution on the device. It is only once the device has executed commands in
+the buffer that the update is done. Creating and scheduling the update command
+buffer can happen concurrently for multiple devices. Waiting for each device to
+report commands as executed is serialized (there is no point in doing this
+concurrently).
+
+
+Represent and manage device memory from core kernel point of view
+=================================================================
+
+Several different designs were tried to support device memory. First one used
+a device specific data structure to keep information about migrated memory and
+HMM hooked itself in various places of mm code to handle any access to
+addresses that were backed by device memory. It turns out that this ended up
+replicating most of the fields of struct page and also needed many kernel code
+paths to be updated to understand this new kind of memory.
+
+Most kernel code paths never try to access the memory behind a page
+but only care about struct page contents. Because of this, HMM switched to
+directly using struct page for device memory which left most kernel code paths
+unaware of the difference. We only need to make sure that no one ever tries to
+map those pages from the CPU side.
+
+HMM provides a set of helpers to register and hotplug device memory as a new
+region needing a struct page. This is offered through a very simple API::
+
+ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
+                                   struct device *device,
+                                   unsigned long size);
+ void hmm_devmem_remove(struct hmm_devmem *devmem);
+
+The hmm_devmem_ops is where most of the important things are::
+
+ struct hmm_devmem_ops {
+     void (*free)(struct hmm_devmem *devmem, struct page *page);
+     int (*fault)(struct hmm_devmem *devmem,
+                  struct vm_area_struct *vma,
+                  unsigned long addr,
+                  struct page *page,
+                  unsigned flags,
+                  pmd_t *pmdp);
+ };
+
+The first callback (free()) happens when the last reference on a device page is
+dropped. This means the device page is now free and no longer used by anyone.
+The second callback happens whenever the CPU tries to access a device page
+which it cannot do. This second callback must trigger a migration back to
+system memory.
+
+
+Migration to and from device memory
+===================================
+
+Because the CPU cannot access device memory, migration must use the device DMA
+engine to perform copy from and to device memory. For this we need a new
+migration helper::
+
+ int migrate_vma(const struct migrate_vma_ops *ops,
+                 struct vm_area_struct *vma,
+                 unsigned long mentries,
+                 unsigned long start,
+                 unsigned long end,
+                 unsigned long *src,
+                 unsigned long *dst,
+                 void *private);
+
+Unlike other migration functions it works on a range of virtual address, there
+are two reasons for that. First, device DMA copy has a high setup overhead cost
+and thus batching multiple pages is needed as otherwise the migration overhead
+makes the whole exercise pointless. The second reason is because the
+migration might be for a range of addresses the device is actively accessing.
+
+The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy())
+controls destination memory allocation and copy operation. Second one is there
+to allow the device driver to perform cleanup operations after migration::
+
+ struct migrate_vma_ops {
+     void (*alloc_and_copy)(struct vm_area_struct *vma,
+                            const unsigned long *src,
+                            unsigned long *dst,
+                            unsigned long start,
+                            unsigned long end,
+                            void *private);
+     void (*finalize_and_map)(struct vm_area_struct *vma,
+                              const unsigned long *src,
+                              const unsigned long *dst,
+                              unsigned long start,
+                              unsigned long end,
+                              void *private);
+ };
+
+It is important to stress that these migration helpers allow for holes in the
+virtual address range. Some pages in the range might not be migrated for all
+the usual reasons (page is pinned, page is locked, ...). This helper does not
+fail but just skips over those pages.
+
+The alloc_and_copy() might decide to not migrate all pages in the
+range (for reasons under the callback control). For those, the callback just
+has to leave the corresponding dst entry empty.
+
+Finally, the migration of the struct page might fail (for file backed page) for
+various reasons (failure to freeze reference, or update page cache, ...). If
+that happens, then the finalize_and_map() can catch any pages that were not
+migrated. Note those pages were still copied to a new page and thus we wasted
+bandwidth but this is considered as a rare event and a price that we are
+willing to pay to keep all the code simpler.
+
+
+Memory cgroup (memcg) and rss accounting
+========================================
+
+For now device memory is accounted as any regular page in rss counters (either
+anonymous if device page is used for anonymous, file if device page is used for
+file backed page or shmem if device page is used for shared memory). This is a
+deliberate choice to keep existing applications, that might start using device
+memory without knowing about it, running unimpacted.
+
+A drawback is that the OOM killer might kill an application using a lot of
+device memory and not a lot of regular system memory and thus not freeing much
+system memory. We want to gather more real world experience on how applications
+and system react under memory pressure in the presence of device memory before
+deciding to account device memory differently.
+
+
+Same decision was made for memory cgroup. Device memory pages are accounted
+against same memory cgroup a regular page would be accounted to. This does
+simplify migration to and from device memory. This also means that migration
+back from device memory to regular memory cannot fail because it would
+go above memory cgroup limit. We might revisit this choice latter on once we
+get more experience in how device memory is used and its impact on memory
+resource control.
+
+
+Note that device memory can never be pinned by device driver nor through GUP
+and thus such memory is always free upon process exit. Or when last reference
+is dropped in case of shared memory or file backed memory.
diff --git a/Documentation/vm/hmm.txt b/Documentation/vm/hmm.txt
deleted file mode 100644
index 2d1d6f6..0000000
--- a/Documentation/vm/hmm.txt
+++ /dev/null
@@ -1,396 +0,0 @@
-Heterogeneous Memory Management (HMM)
-
-Provide infrastructure and helpers to integrate non-conventional memory (device
-memory like GPU on board memory) into regular kernel path, with the cornerstone
-of this being specialized struct page for such memory (see sections 5 to 7 of
-this document).
-
-HMM also provides optional helpers for SVM (Share Virtual Memory), i.e.,
-allowing a device to transparently access program address coherently with the
-CPU meaning that any valid pointer on the CPU is also a valid pointer for the
-device. This is becoming mandatory to simplify the use of advanced hetero-
-geneous computing where GPU, DSP, or FPGA are used to perform various
-computations on behalf of a process.
-
-This document is divided as follows: in the first section I expose the problems
-related to using device specific memory allocators. In the second section, I
-expose the hardware limitations that are inherent to many platforms. The third
-section gives an overview of the HMM design. The fourth section explains how
-CPU page-table mirroring works and the purpose of HMM in this context. The
-fifth section deals with how device memory is represented inside the kernel.
-Finally, the last section presents a new migration helper that allows lever-
-aging the device DMA engine.
-
-
-1) Problems of using a device specific memory allocator:
-2) I/O bus, device memory characteristics
-3) Shared address space and migration
-4) Address space mirroring implementation and API
-5) Represent and manage device memory from core kernel point of view
-6) Migration to and from device memory
-7) Memory cgroup (memcg) and rss accounting
-
-
--------------------------------------------------------------------------------
-
-1) Problems of using a device specific memory allocator:
-
-Devices with a large amount of on board memory (several gigabytes) like GPUs
-have historically managed their memory through dedicated driver specific APIs.
-This creates a disconnect between memory allocated and managed by a device
-driver and regular application memory (private anonymous, shared memory, or
-regular file backed memory). From here on I will refer to this aspect as split
-address space. I use shared address space to refer to the opposite situation:
-i.e., one in which any application memory region can be used by a device
-transparently.
-
-Split address space happens because device can only access memory allocated
-through device specific API. This implies that all memory objects in a program
-are not equal from the device point of view which complicates large programs
-that rely on a wide set of libraries.
-
-Concretely this means that code that wants to leverage devices like GPUs needs
-to copy object between generically allocated memory (malloc, mmap private, mmap
-share) and memory allocated through the device driver API (this still ends up
-with an mmap but of the device file).
-
-For flat data sets (array, grid, image, ...) this isn't too hard to achieve but
-complex data sets (list, tree, ...) are hard to get right. Duplicating a
-complex data set needs to re-map all the pointer relations between each of its
-elements. This is error prone and program gets harder to debug because of the
-duplicate data set and addresses.
-
-Split address space also means that libraries cannot transparently use data
-they are getting from the core program or another library and thus each library
-might have to duplicate its input data set using the device specific memory
-allocator. Large projects suffer from this and waste resources because of the
-various memory copies.
-
-Duplicating each library API to accept as input or output memory allocated by
-each device specific allocator is not a viable option. It would lead to a
-combinatorial explosion in the library entry points.
-
-Finally, with the advance of high level language constructs (in C++ but in
-other languages too) it is now possible for the compiler to leverage GPUs and
-other devices without programmer knowledge. Some compiler identified patterns
-are only do-able with a shared address space. It is also more reasonable to use
-a shared address space for all other patterns.
-
-
--------------------------------------------------------------------------------
-
-2) I/O bus, device memory characteristics
-
-I/O buses cripple shared address spaces due to a few limitations. Most I/O
-buses only allow basic memory access from device to main memory; even cache
-coherency is often optional. Access to device memory from CPU is even more
-limited. More often than not, it is not cache coherent.
-
-If we only consider the PCIE bus, then a device can access main memory (often
-through an IOMMU) and be cache coherent with the CPUs. However, it only allows
-a limited set of atomic operations from device on main memory. This is worse
-in the other direction: the CPU can only access a limited range of the device
-memory and cannot perform atomic operations on it. Thus device memory cannot
-be considered the same as regular memory from the kernel point of view.
-
-Another crippling factor is the limited bandwidth (~32GBytes/s with PCIE 4.0
-and 16 lanes). This is 33 times less than the fastest GPU memory (1 TBytes/s).
-The final limitation is latency. Access to main memory from the device has an
-order of magnitude higher latency than when the device accesses its own memory.
-
-Some platforms are developing new I/O buses or additions/modifications to PCIE
-to address some of these limitations (OpenCAPI, CCIX). They mainly allow two-
-way cache coherency between CPU and device and allow all atomic operations the
-architecture supports. Sadly, not all platforms are following this trend and
-some major architectures are left without hardware solutions to these problems.
-
-So for shared address space to make sense, not only must we allow devices to
-access any memory but we must also permit any memory to be migrated to device
-memory while device is using it (blocking CPU access while it happens).
-
-
--------------------------------------------------------------------------------
-
-3) Shared address space and migration
-
-HMM intends to provide two main features. First one is to share the address
-space by duplicating the CPU page table in the device page table so the same
-address points to the same physical memory for any valid main memory address in
-the process address space.
-
-To achieve this, HMM offers a set of helpers to populate the device page table
-while keeping track of CPU page table updates. Device page table updates are
-not as easy as CPU page table updates. To update the device page table, you must
-allocate a buffer (or use a pool of pre-allocated buffers) and write GPU
-specific commands in it to perform the update (unmap, cache invalidations, and
-flush, ...). This cannot be done through common code for all devices. Hence
-why HMM provides helpers to factor out everything that can be while leaving the
-hardware specific details to the device driver.
-
-The second mechanism HMM provides is a new kind of ZONE_DEVICE memory that
-allows allocating a struct page for each page of the device memory. Those pages
-are special because the CPU cannot map them. However, they allow migrating
-main memory to device memory using existing migration mechanisms and everything
-looks like a page is swapped out to disk from the CPU point of view. Using a
-struct page gives the easiest and cleanest integration with existing mm mech-
-anisms. Here again, HMM only provides helpers, first to hotplug new ZONE_DEVICE
-memory for the device memory and second to perform migration. Policy decisions
-of what and when to migrate things is left to the device driver.
-
-Note that any CPU access to a device page triggers a page fault and a migration
-back to main memory. For example, when a page backing a given CPU address A is
-migrated from a main memory page to a device page, then any CPU access to
-address A triggers a page fault and initiates a migration back to main memory.
-
-With these two features, HMM not only allows a device to mirror process address
-space and keeping both CPU and device page table synchronized, but also lever-
-ages device memory by migrating the part of the data set that is actively being
-used by the device.
-
-
--------------------------------------------------------------------------------
-
-4) Address space mirroring implementation and API
-
-Address space mirroring's main objective is to allow duplication of a range of
-CPU page table into a device page table; HMM helps keep both synchronized. A
-device driver that wants to mirror a process address space must start with the
-registration of an hmm_mirror struct:
-
- int hmm_mirror_register(struct hmm_mirror *mirror,
-                         struct mm_struct *mm);
- int hmm_mirror_register_locked(struct hmm_mirror *mirror,
-                                struct mm_struct *mm);
-
-The locked variant is to be used when the driver is already holding mmap_sem
-of the mm in write mode. The mirror struct has a set of callbacks that are used
-to propagate CPU page tables:
-
- struct hmm_mirror_ops {
-     /* sync_cpu_device_pagetables() - synchronize page tables
-      *
-      * @mirror: pointer to struct hmm_mirror
-      * @update_type: type of update that occurred to the CPU page table
-      * @start: virtual start address of the range to update
-      * @end: virtual end address of the range to update
-      *
-      * This callback ultimately originates from mmu_notifiers when the CPU
-      * page table is updated. The device driver must update its page table
-      * in response to this callback. The update argument tells what action
-      * to perform.
-      *
-      * The device driver must not return from this callback until the device
-      * page tables are completely updated (TLBs flushed, etc); this is a
-      * synchronous call.
-      */
-      void (*update)(struct hmm_mirror *mirror,
-                     enum hmm_update action,
-                     unsigned long start,
-                     unsigned long end);
- };
-
-The device driver must perform the update action to the range (mark range
-read only, or fully unmap, ...). The device must be done with the update before
-the driver callback returns.
-
-
-When the device driver wants to populate a range of virtual addresses, it can
-use either:
- int hmm_vma_get_pfns(struct vm_area_struct *vma,
-                      struct hmm_range *range,
-                      unsigned long start,
-                      unsigned long end,
-                      hmm_pfn_t *pfns);
- int hmm_vma_fault(struct vm_area_struct *vma,
-                   struct hmm_range *range,
-                   unsigned long start,
-                   unsigned long end,
-                   hmm_pfn_t *pfns,
-                   bool write,
-                   bool block);
-
-The first one (hmm_vma_get_pfns()) will only fetch present CPU page table
-entries and will not trigger a page fault on missing or non-present entries.
-The second one does trigger a page fault on missing or read-only entry if the
-write parameter is true. Page faults use the generic mm page fault code path
-just like a CPU page fault.
-
-Both functions copy CPU page table entries into their pfns array argument. Each
-entry in that array corresponds to an address in the virtual range. HMM
-provides a set of flags to help the driver identify special CPU page table
-entries.
-
-Locking with the update() callback is the most important aspect the driver must
-respect in order to keep things properly synchronized. The usage pattern is:
-
- int driver_populate_range(...)
- {
-      struct hmm_range range;
-      ...
- again:
-      ret = hmm_vma_get_pfns(vma, &range, start, end, pfns);
-      if (ret)
-          return ret;
-      take_lock(driver->update);
-      if (!hmm_vma_range_done(vma, &range)) {
-          release_lock(driver->update);
-          goto again;
-      }
-
-      // Use pfns array content to update device page table
-
-      release_lock(driver->update);
-      return 0;
- }
-
-The driver->update lock is the same lock that the driver takes inside its
-update() callback. That lock must be held before hmm_vma_range_done() to avoid
-any race with a concurrent CPU page table update.
-
-HMM implements all this on top of the mmu_notifier API because we wanted a
-simpler API and also to be able to perform optimizations latter on like doing
-concurrent device updates in multi-devices scenario.
-
-HMM also serves as an impedance mismatch between how CPU page table updates
-are done (by CPU write to the page table and TLB flushes) and how devices
-update their own page table. Device updates are a multi-step process. First,
-appropriate commands are written to a buffer, then this buffer is scheduled for
-execution on the device. It is only once the device has executed commands in
-the buffer that the update is done. Creating and scheduling the update command
-buffer can happen concurrently for multiple devices. Waiting for each device to
-report commands as executed is serialized (there is no point in doing this
-concurrently).
-
-
--------------------------------------------------------------------------------
-
-5) Represent and manage device memory from core kernel point of view
-
-Several different designs were tried to support device memory. First one used
-a device specific data structure to keep information about migrated memory and
-HMM hooked itself in various places of mm code to handle any access to
-addresses that were backed by device memory. It turns out that this ended up
-replicating most of the fields of struct page and also needed many kernel code
-paths to be updated to understand this new kind of memory.
-
-Most kernel code paths never try to access the memory behind a page
-but only care about struct page contents. Because of this, HMM switched to
-directly using struct page for device memory which left most kernel code paths
-unaware of the difference. We only need to make sure that no one ever tries to
-map those pages from the CPU side.
-
-HMM provides a set of helpers to register and hotplug device memory as a new
-region needing a struct page. This is offered through a very simple API:
-
- struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
-                                   struct device *device,
-                                   unsigned long size);
- void hmm_devmem_remove(struct hmm_devmem *devmem);
-
-The hmm_devmem_ops is where most of the important things are:
-
- struct hmm_devmem_ops {
-     void (*free)(struct hmm_devmem *devmem, struct page *page);
-     int (*fault)(struct hmm_devmem *devmem,
-                  struct vm_area_struct *vma,
-                  unsigned long addr,
-                  struct page *page,
-                  unsigned flags,
-                  pmd_t *pmdp);
- };
-
-The first callback (free()) happens when the last reference on a device page is
-dropped. This means the device page is now free and no longer used by anyone.
-The second callback happens whenever the CPU tries to access a device page
-which it cannot do. This second callback must trigger a migration back to
-system memory.
-
-
--------------------------------------------------------------------------------
-
-6) Migration to and from device memory
-
-Because the CPU cannot access device memory, migration must use the device DMA
-engine to perform copy from and to device memory. For this we need a new
-migration helper:
-
- int migrate_vma(const struct migrate_vma_ops *ops,
-                 struct vm_area_struct *vma,
-                 unsigned long mentries,
-                 unsigned long start,
-                 unsigned long end,
-                 unsigned long *src,
-                 unsigned long *dst,
-                 void *private);
-
-Unlike other migration functions it works on a range of virtual address, there
-are two reasons for that. First, device DMA copy has a high setup overhead cost
-and thus batching multiple pages is needed as otherwise the migration overhead
-makes the whole exercise pointless. The second reason is because the
-migration might be for a range of addresses the device is actively accessing.
-
-The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy())
-controls destination memory allocation and copy operation. Second one is there
-to allow the device driver to perform cleanup operations after migration.
-
- struct migrate_vma_ops {
-     void (*alloc_and_copy)(struct vm_area_struct *vma,
-                            const unsigned long *src,
-                            unsigned long *dst,
-                            unsigned long start,
-                            unsigned long end,
-                            void *private);
-     void (*finalize_and_map)(struct vm_area_struct *vma,
-                              const unsigned long *src,
-                              const unsigned long *dst,
-                              unsigned long start,
-                              unsigned long end,
-                              void *private);
- };
-
-It is important to stress that these migration helpers allow for holes in the
-virtual address range. Some pages in the range might not be migrated for all
-the usual reasons (page is pinned, page is locked, ...). This helper does not
-fail but just skips over those pages.
-
-The alloc_and_copy() might decide to not migrate all pages in the
-range (for reasons under the callback control). For those, the callback just
-has to leave the corresponding dst entry empty.
-
-Finally, the migration of the struct page might fail (for file backed page) for
-various reasons (failure to freeze reference, or update page cache, ...). If
-that happens, then the finalize_and_map() can catch any pages that were not
-migrated. Note those pages were still copied to a new page and thus we wasted
-bandwidth but this is considered as a rare event and a price that we are
-willing to pay to keep all the code simpler.
-
-
--------------------------------------------------------------------------------
-
-7) Memory cgroup (memcg) and rss accounting
-
-For now device memory is accounted as any regular page in rss counters (either
-anonymous if device page is used for anonymous, file if device page is used for
-file backed page or shmem if device page is used for shared memory). This is a
-deliberate choice to keep existing applications, that might start using device
-memory without knowing about it, running unimpacted.
-
-A drawback is that the OOM killer might kill an application using a lot of
-device memory and not a lot of regular system memory and thus not freeing much
-system memory. We want to gather more real world experience on how applications
-and system react under memory pressure in the presence of device memory before
-deciding to account device memory differently.
-
-
-Same decision was made for memory cgroup. Device memory pages are accounted
-against same memory cgroup a regular page would be accounted to. This does
-simplify migration to and from device memory. This also means that migration
-back from device memory to regular memory cannot fail because it would
-go above memory cgroup limit. We might revisit this choice latter on once we
-get more experience in how device memory is used and its impact on memory
-resource control.
-
-
-Note that device memory can never be pinned by device driver nor through GUP
-and thus such memory is always free upon process exit. Or when last reference
-is dropped in case of shared memory or file backed memory.
diff --git a/Documentation/vm/hugetlbfs_reserv.rst b/Documentation/vm/hugetlbfs_reserv.rst
new file mode 100644
index 0000000..9d20076
--- /dev/null
+++ b/Documentation/vm/hugetlbfs_reserv.rst
@@ -0,0 +1,595 @@
+.. _hugetlbfs_reserve:
+
+=====================
+Hugetlbfs Reservation
+=====================
+
+Overview
+========
+
+Huge pages as described at :ref:`hugetlbpage` are typically
+preallocated for application use.  These huge pages are instantiated in a
+task's address space at page fault time if the VMA indicates huge pages are
+to be used.  If no huge page exists at page fault time, the task is sent
+a SIGBUS and often dies an unhappy death.  Shortly after huge page support
+was added, it was determined that it would be better to detect a shortage
+of huge pages at mmap() time.  The idea is that if there were not enough
+huge pages to cover the mapping, the mmap() would fail.  This was first
+done with a simple check in the code at mmap() time to determine if there
+were enough free huge pages to cover the mapping.  Like most things in the
+kernel, the code has evolved over time.  However, the basic idea was to
+'reserve' huge pages at mmap() time to ensure that huge pages would be
+available for page faults in that mapping.  The description below attempts to
+describe how huge page reserve processing is done in the v4.10 kernel.
+
+
+Audience
+========
+This description is primarily targeted at kernel developers who are modifying
+hugetlbfs code.
+
+
+The Data Structures
+===================
+
+resv_huge_pages
+	This is a global (per-hstate) count of reserved huge pages.  Reserved
+	huge pages are only available to the task which reserved them.
+	Therefore, the number of huge pages generally available is computed
+	as (``free_huge_pages - resv_huge_pages``).
+Reserve Map
+	A reserve map is described by the structure::
+
+		struct resv_map {
+			struct kref refs;
+			spinlock_t lock;
+			struct list_head regions;
+			long adds_in_progress;
+			struct list_head region_cache;
+			long region_cache_count;
+		};
+
+	There is one reserve map for each huge page mapping in the system.
+	The regions list within the resv_map describes the regions within
+	the mapping.  A region is described as::
+
+		struct file_region {
+			struct list_head link;
+			long from;
+			long to;
+		};
+
+	The 'from' and 'to' fields of the file region structure are huge page
+	indices into the mapping.  Depending on the type of mapping, a
+	region in the reserv_map may indicate reservations exist for the
+	range, or reservations do not exist.
+Flags for MAP_PRIVATE Reservations
+	These are stored in the bottom bits of the reservation map pointer.
+
+	``#define HPAGE_RESV_OWNER    (1UL << 0)``
+		Indicates this task is the owner of the reservations
+		associated with the mapping.
+	``#define HPAGE_RESV_UNMAPPED (1UL << 1)``
+		Indicates task originally mapping this range (and creating
+		reserves) has unmapped a page from this task (the child)
+		due to a failed COW.
+Page Flags
+	The PagePrivate page flag is used to indicate that a huge page
+	reservation must be restored when the huge page is freed.  More
+	details will be discussed in the "Freeing huge pages" section.
+
+
+Reservation Map Location (Private or Shared)
+============================================
+
+A huge page mapping or segment is either private or shared.  If private,
+it is typically only available to a single address space (task).  If shared,
+it can be mapped into multiple address spaces (tasks).  The location and
+semantics of the reservation map is significantly different for two types
+of mappings.  Location differences are:
+
+- For private mappings, the reservation map hangs off the the VMA structure.
+  Specifically, vma->vm_private_data.  This reserve map is created at the
+  time the mapping (mmap(MAP_PRIVATE)) is created.
+- For shared mappings, the reservation map hangs off the inode.  Specifically,
+  inode->i_mapping->private_data.  Since shared mappings are always backed
+  by files in the hugetlbfs filesystem, the hugetlbfs code ensures each inode
+  contains a reservation map.  As a result, the reservation map is allocated
+  when the inode is created.
+
+
+Creating Reservations
+=====================
+Reservations are created when a huge page backed shared memory segment is
+created (shmget(SHM_HUGETLB)) or a mapping is created via mmap(MAP_HUGETLB).
+These operations result in a call to the routine hugetlb_reserve_pages()::
+
+	int hugetlb_reserve_pages(struct inode *inode,
+				  long from, long to,
+				  struct vm_area_struct *vma,
+				  vm_flags_t vm_flags)
+
+The first thing hugetlb_reserve_pages() does is check for the NORESERVE
+flag was specified in either the shmget() or mmap() call.  If NORESERVE
+was specified, then this routine returns immediately as no reservation
+are desired.
+
+The arguments 'from' and 'to' are huge page indices into the mapping or
+underlying file.  For shmget(), 'from' is always 0 and 'to' corresponds to
+the length of the segment/mapping.  For mmap(), the offset argument could
+be used to specify the offset into the underlying file.  In such a case
+the 'from' and 'to' arguments have been adjusted by this offset.
+
+One of the big differences between PRIVATE and SHARED mappings is the way
+in which reservations are represented in the reservation map.
+
+- For shared mappings, an entry in the reservation map indicates a reservation
+  exists or did exist for the corresponding page.  As reservations are
+  consumed, the reservation map is not modified.
+- For private mappings, the lack of an entry in the reservation map indicates
+  a reservation exists for the corresponding page.  As reservations are
+  consumed, entries are added to the reservation map.  Therefore, the
+  reservation map can also be used to determine which reservations have
+  been consumed.
+
+For private mappings, hugetlb_reserve_pages() creates the reservation map and
+hangs it off the VMA structure.  In addition, the HPAGE_RESV_OWNER flag is set
+to indicate this VMA owns the reservations.
+
+The reservation map is consulted to determine how many huge page reservations
+are needed for the current mapping/segment.  For private mappings, this is
+always the value (to - from).  However, for shared mappings it is possible that some reservations may already exist within the range (to - from).  See the
+section :ref:`Reservation Map Modifications <resv_map_modifications>`
+for details on how this is accomplished.
+
+The mapping may be associated with a subpool.  If so, the subpool is consulted
+to ensure there is sufficient space for the mapping.  It is possible that the
+subpool has set aside reservations that can be used for the mapping.  See the
+section :ref:`Subpool Reservations <sub_pool_resv>` for more details.
+
+After consulting the reservation map and subpool, the number of needed new
+reservations is known.  The routine hugetlb_acct_memory() is called to check
+for and take the requested number of reservations.  hugetlb_acct_memory()
+calls into routines that potentially allocate and adjust surplus page counts.
+However, within those routines the code is simply checking to ensure there
+are enough free huge pages to accommodate the reservation.  If there are,
+the global reservation count resv_huge_pages is adjusted something like the
+following::
+
+	if (resv_needed <= (resv_huge_pages - free_huge_pages))
+		resv_huge_pages += resv_needed;
+
+Note that the global lock hugetlb_lock is held when checking and adjusting
+these counters.
+
+If there were enough free huge pages and the global count resv_huge_pages
+was adjusted, then the reservation map associated with the mapping is
+modified to reflect the reservations.  In the case of a shared mapping, a
+file_region will exist that includes the range 'from' 'to'.  For private
+mappings, no modifications are made to the reservation map as lack of an
+entry indicates a reservation exists.
+
+If hugetlb_reserve_pages() was successful, the global reservation count and
+reservation map associated with the mapping will be modified as required to
+ensure reservations exist for the range 'from' - 'to'.
+
+.. _consume_resv:
+
+Consuming Reservations/Allocating a Huge Page
+=============================================
+
+Reservations are consumed when huge pages associated with the reservations
+are allocated and instantiated in the corresponding mapping.  The allocation
+is performed within the routine alloc_huge_page()::
+
+	struct page *alloc_huge_page(struct vm_area_struct *vma,
+				     unsigned long addr, int avoid_reserve)
+
+alloc_huge_page is passed a VMA pointer and a virtual address, so it can
+consult the reservation map to determine if a reservation exists.  In addition,
+alloc_huge_page takes the argument avoid_reserve which indicates reserves
+should not be used even if it appears they have been set aside for the
+specified address.  The avoid_reserve argument is most often used in the case
+of Copy on Write and Page Migration where additional copies of an existing
+page are being allocated.
+
+The helper routine vma_needs_reservation() is called to determine if a
+reservation exists for the address within the mapping(vma).  See the section
+:ref:`Reservation Map Helper Routines <resv_map_helpers>` for detailed
+information on what this routine does.
+The value returned from vma_needs_reservation() is generally
+0 or 1.  0 if a reservation exists for the address, 1 if no reservation exists.
+If a reservation does not exist, and there is a subpool associated with the
+mapping the subpool is consulted to determine if it contains reservations.
+If the subpool contains reservations, one can be used for this allocation.
+However, in every case the avoid_reserve argument overrides the use of
+a reservation for the allocation.  After determining whether a reservation
+exists and can be used for the allocation, the routine dequeue_huge_page_vma()
+is called.  This routine takes two arguments related to reservations:
+
+- avoid_reserve, this is the same value/argument passed to alloc_huge_page()
+- chg, even though this argument is of type long only the values 0 or 1 are
+  passed to dequeue_huge_page_vma.  If the value is 0, it indicates a
+  reservation exists (see the section "Memory Policy and Reservations" for
+  possible issues).  If the value is 1, it indicates a reservation does not
+  exist and the page must be taken from the global free pool if possible.
+
+The free lists associated with the memory policy of the VMA are searched for
+a free page.  If a page is found, the value free_huge_pages is decremented
+when the page is removed from the free list.  If there was a reservation
+associated with the page, the following adjustments are made::
+
+	SetPagePrivate(page);	/* Indicates allocating this page consumed
+				 * a reservation, and if an error is
+				 * encountered such that the page must be
+				 * freed, the reservation will be restored. */
+	resv_huge_pages--;	/* Decrement the global reservation count */
+
+Note, if no huge page can be found that satisfies the VMA's memory policy
+an attempt will be made to allocate one using the buddy allocator.  This
+brings up the issue of surplus huge pages and overcommit which is beyond
+the scope reservations.  Even if a surplus page is allocated, the same
+reservation based adjustments as above will be made: SetPagePrivate(page) and
+resv_huge_pages--.
+
+After obtaining a new huge page, (page)->private is set to the value of
+the subpool associated with the page if it exists.  This will be used for
+subpool accounting when the page is freed.
+
+The routine vma_commit_reservation() is then called to adjust the reserve
+map based on the consumption of the reservation.  In general, this involves
+ensuring the page is represented within a file_region structure of the region
+map.  For shared mappings where the the reservation was present, an entry
+in the reserve map already existed so no change is made.  However, if there
+was no reservation in a shared mapping or this was a private mapping a new
+entry must be created.
+
+It is possible that the reserve map could have been changed between the call
+to vma_needs_reservation() at the beginning of alloc_huge_page() and the
+call to vma_commit_reservation() after the page was allocated.  This would
+be possible if hugetlb_reserve_pages was called for the same page in a shared
+mapping.  In such cases, the reservation count and subpool free page count
+will be off by one.  This rare condition can be identified by comparing the
+return value from vma_needs_reservation and vma_commit_reservation.  If such
+a race is detected, the subpool and global reserve counts are adjusted to
+compensate.  See the section
+:ref:`Reservation Map Helper Routines <resv_map_helpers>` for more
+information on these routines.
+
+
+Instantiate Huge Pages
+======================
+
+After huge page allocation, the page is typically added to the page tables
+of the allocating task.  Before this, pages in a shared mapping are added
+to the page cache and pages in private mappings are added to an anonymous
+reverse mapping.  In both cases, the PagePrivate flag is cleared.  Therefore,
+when a huge page that has been instantiated is freed no adjustment is made
+to the global reservation count (resv_huge_pages).
+
+
+Freeing Huge Pages
+==================
+
+Huge page freeing is performed by the routine free_huge_page().  This routine
+is the destructor for hugetlbfs compound pages.  As a result, it is only
+passed a pointer to the page struct.  When a huge page is freed, reservation
+accounting may need to be performed.  This would be the case if the page was
+associated with a subpool that contained reserves, or the page is being freed
+on an error path where a global reserve count must be restored.
+
+The page->private field points to any subpool associated with the page.
+If the PagePrivate flag is set, it indicates the global reserve count should
+be adjusted (see the section
+:ref:`Consuming Reservations/Allocating a Huge Page <consume_resv>`
+for information on how these are set).
+
+The routine first calls hugepage_subpool_put_pages() for the page.  If this
+routine returns a value of 0 (which does not equal the value passed 1) it
+indicates reserves are associated with the subpool, and this newly free page
+must be used to keep the number of subpool reserves above the minimum size.
+Therefore, the global resv_huge_pages counter is incremented in this case.
+
+If the PagePrivate flag was set in the page, the global resv_huge_pages counter
+will always be incremented.
+
+.. _sub_pool_resv:
+
+Subpool Reservations
+====================
+
+There is a struct hstate associated with each huge page size.  The hstate
+tracks all huge pages of the specified size.  A subpool represents a subset
+of pages within a hstate that is associated with a mounted hugetlbfs
+filesystem.
+
+When a hugetlbfs filesystem is mounted a min_size option can be specified
+which indicates the minimum number of huge pages required by the filesystem.
+If this option is specified, the number of huge pages corresponding to
+min_size are reserved for use by the filesystem.  This number is tracked in
+the min_hpages field of a struct hugepage_subpool.  At mount time,
+hugetlb_acct_memory(min_hpages) is called to reserve the specified number of
+huge pages.  If they can not be reserved, the mount fails.
+
+The routines hugepage_subpool_get/put_pages() are called when pages are
+obtained from or released back to a subpool.  They perform all subpool
+accounting, and track any reservations associated with the subpool.
+hugepage_subpool_get/put_pages are passed the number of huge pages by which
+to adjust the subpool 'used page' count (down for get, up for put).  Normally,
+they return the same value that was passed or an error if not enough pages
+exist in the subpool.
+
+However, if reserves are associated with the subpool a return value less
+than the passed value may be returned.  This return value indicates the
+number of additional global pool adjustments which must be made.  For example,
+suppose a subpool contains 3 reserved huge pages and someone asks for 5.
+The 3 reserved pages associated with the subpool can be used to satisfy part
+of the request.  But, 2 pages must be obtained from the global pools.  To
+relay this information to the caller, the value 2 is returned.  The caller
+is then responsible for attempting to obtain the additional two pages from
+the global pools.
+
+
+COW and Reservations
+====================
+
+Since shared mappings all point to and use the same underlying pages, the
+biggest reservation concern for COW is private mappings.  In this case,
+two tasks can be pointing at the same previously allocated page.  One task
+attempts to write to the page, so a new page must be allocated so that each
+task points to its own page.
+
+When the page was originally allocated, the reservation for that page was
+consumed.  When an attempt to allocate a new page is made as a result of
+COW, it is possible that no free huge pages are free and the allocation
+will fail.
+
+When the private mapping was originally created, the owner of the mapping
+was noted by setting the HPAGE_RESV_OWNER bit in the pointer to the reservation
+map of the owner.  Since the owner created the mapping, the owner owns all
+the reservations associated with the mapping.  Therefore, when a write fault
+occurs and there is no page available, different action is taken for the owner
+and non-owner of the reservation.
+
+In the case where the faulting task is not the owner, the fault will fail and
+the task will typically receive a SIGBUS.
+
+If the owner is the faulting task, we want it to succeed since it owned the
+original reservation.  To accomplish this, the page is unmapped from the
+non-owning task.  In this way, the only reference is from the owning task.
+In addition, the HPAGE_RESV_UNMAPPED bit is set in the reservation map pointer
+of the non-owning task.  The non-owning task may receive a SIGBUS if it later
+faults on a non-present page.  But, the original owner of the
+mapping/reservation will behave as expected.
+
+
+.. _resv_map_modifications:
+
+Reservation Map Modifications
+=============================
+
+The following low level routines are used to make modifications to a
+reservation map.  Typically, these routines are not called directly.  Rather,
+a reservation map helper routine is called which calls one of these low level
+routines.  These low level routines are fairly well documented in the source
+code (mm/hugetlb.c).  These routines are::
+
+	long region_chg(struct resv_map *resv, long f, long t);
+	long region_add(struct resv_map *resv, long f, long t);
+	void region_abort(struct resv_map *resv, long f, long t);
+	long region_count(struct resv_map *resv, long f, long t);
+
+Operations on the reservation map typically involve two operations:
+
+1) region_chg() is called to examine the reserve map and determine how
+   many pages in the specified range [f, t) are NOT currently represented.
+
+   The calling code performs global checks and allocations to determine if
+   there are enough huge pages for the operation to succeed.
+
+2)
+  a) If the operation can succeed, region_add() is called to actually modify
+     the reservation map for the same range [f, t) previously passed to
+     region_chg().
+  b) If the operation can not succeed, region_abort is called for the same
+     range [f, t) to abort the operation.
+
+Note that this is a two step process where region_add() and region_abort()
+are guaranteed to succeed after a prior call to region_chg() for the same
+range.  region_chg() is responsible for pre-allocating any data structures
+necessary to ensure the subsequent operations (specifically region_add()))
+will succeed.
+
+As mentioned above, region_chg() determines the number of pages in the range
+which are NOT currently represented in the map.  This number is returned to
+the caller.  region_add() returns the number of pages in the range added to
+the map.  In most cases, the return value of region_add() is the same as the
+return value of region_chg().  However, in the case of shared mappings it is
+possible for changes to the reservation map to be made between the calls to
+region_chg() and region_add().  In this case, the return value of region_add()
+will not match the return value of region_chg().  It is likely that in such
+cases global counts and subpool accounting will be incorrect and in need of
+adjustment.  It is the responsibility of the caller to check for this condition
+and make the appropriate adjustments.
+
+The routine region_del() is called to remove regions from a reservation map.
+It is typically called in the following situations:
+
+- When a file in the hugetlbfs filesystem is being removed, the inode will
+  be released and the reservation map freed.  Before freeing the reservation
+  map, all the individual file_region structures must be freed.  In this case
+  region_del is passed the range [0, LONG_MAX).
+- When a hugetlbfs file is being truncated.  In this case, all allocated pages
+  after the new file size must be freed.  In addition, any file_region entries
+  in the reservation map past the new end of file must be deleted.  In this
+  case, region_del is passed the range [new_end_of_file, LONG_MAX).
+- When a hole is being punched in a hugetlbfs file.  In this case, huge pages
+  are removed from the middle of the file one at a time.  As the pages are
+  removed, region_del() is called to remove the corresponding entry from the
+  reservation map.  In this case, region_del is passed the range
+  [page_idx, page_idx + 1).
+
+In every case, region_del() will return the number of pages removed from the
+reservation map.  In VERY rare cases, region_del() can fail.  This can only
+happen in the hole punch case where it has to split an existing file_region
+entry and can not allocate a new structure.  In this error case, region_del()
+will return -ENOMEM.  The problem here is that the reservation map will
+indicate that there is a reservation for the page.  However, the subpool and
+global reservation counts will not reflect the reservation.  To handle this
+situation, the routine hugetlb_fix_reserve_counts() is called to adjust the
+counters so that they correspond with the reservation map entry that could
+not be deleted.
+
+region_count() is called when unmapping a private huge page mapping.  In
+private mappings, the lack of a entry in the reservation map indicates that
+a reservation exists.  Therefore, by counting the number of entries in the
+reservation map we know how many reservations were consumed and how many are
+outstanding (outstanding = (end - start) - region_count(resv, start, end)).
+Since the mapping is going away, the subpool and global reservation counts
+are decremented by the number of outstanding reservations.
+
+.. _resv_map_helpers:
+
+Reservation Map Helper Routines
+===============================
+
+Several helper routines exist to query and modify the reservation maps.
+These routines are only interested with reservations for a specific huge
+page, so they just pass in an address instead of a range.  In addition,
+they pass in the associated VMA.  From the VMA, the type of mapping (private
+or shared) and the location of the reservation map (inode or VMA) can be
+determined.  These routines simply call the underlying routines described
+in the section "Reservation Map Modifications".  However, they do take into
+account the 'opposite' meaning of reservation map entries for private and
+shared mappings and hide this detail from the caller::
+
+	long vma_needs_reservation(struct hstate *h,
+				   struct vm_area_struct *vma,
+				   unsigned long addr)
+
+This routine calls region_chg() for the specified page.  If no reservation
+exists, 1 is returned.  If a reservation exists, 0 is returned::
+
+	long vma_commit_reservation(struct hstate *h,
+				    struct vm_area_struct *vma,
+				    unsigned long addr)
+
+This calls region_add() for the specified page.  As in the case of region_chg
+and region_add, this routine is to be called after a previous call to
+vma_needs_reservation.  It will add a reservation entry for the page.  It
+returns 1 if the reservation was added and 0 if not.  The return value should
+be compared with the return value of the previous call to
+vma_needs_reservation.  An unexpected difference indicates the reservation
+map was modified between calls::
+
+	void vma_end_reservation(struct hstate *h,
+				 struct vm_area_struct *vma,
+				 unsigned long addr)
+
+This calls region_abort() for the specified page.  As in the case of region_chg
+and region_abort, this routine is to be called after a previous call to
+vma_needs_reservation.  It will abort/end the in progress reservation add
+operation::
+
+	long vma_add_reservation(struct hstate *h,
+				 struct vm_area_struct *vma,
+				 unsigned long addr)
+
+This is a special wrapper routine to help facilitate reservation cleanup
+on error paths.  It is only called from the routine restore_reserve_on_error().
+This routine is used in conjunction with vma_needs_reservation in an attempt
+to add a reservation to the reservation map.  It takes into account the
+different reservation map semantics for private and shared mappings.  Hence,
+region_add is called for shared mappings (as an entry present in the map
+indicates a reservation), and region_del is called for private mappings (as
+the absence of an entry in the map indicates a reservation).  See the section
+"Reservation cleanup in error paths" for more information on what needs to
+be done on error paths.
+
+
+Reservation Cleanup in Error Paths
+==================================
+
+As mentioned in the section
+:ref:`Reservation Map Helper Routines <resv_map_helpers>`, reservation
+map modifications are performed in two steps.  First vma_needs_reservation
+is called before a page is allocated.  If the allocation is successful,
+then vma_commit_reservation is called.  If not, vma_end_reservation is called.
+Global and subpool reservation counts are adjusted based on success or failure
+of the operation and all is well.
+
+Additionally, after a huge page is instantiated the PagePrivate flag is
+cleared so that accounting when the page is ultimately freed is correct.
+
+However, there are several instances where errors are encountered after a huge
+page is allocated but before it is instantiated.  In this case, the page
+allocation has consumed the reservation and made the appropriate subpool,
+reservation map and global count adjustments.  If the page is freed at this
+time (before instantiation and clearing of PagePrivate), then free_huge_page
+will increment the global reservation count.  However, the reservation map
+indicates the reservation was consumed.  This resulting inconsistent state
+will cause the 'leak' of a reserved huge page.  The global reserve count will
+be  higher than it should and prevent allocation of a pre-allocated page.
+
+The routine restore_reserve_on_error() attempts to handle this situation.  It
+is fairly well documented.  The intention of this routine is to restore
+the reservation map to the way it was before the page allocation.   In this
+way, the state of the reservation map will correspond to the global reservation
+count after the page is freed.
+
+The routine restore_reserve_on_error itself may encounter errors while
+attempting to restore the reservation map entry.  In this case, it will
+simply clear the PagePrivate flag of the page.  In this way, the global
+reserve count will not be incremented when the page is freed.  However, the
+reservation map will continue to look as though the reservation was consumed.
+A page can still be allocated for the address, but it will not use a reserved
+page as originally intended.
+
+There is some code (most notably userfaultfd) which can not call
+restore_reserve_on_error.  In this case, it simply modifies the PagePrivate
+so that a reservation will not be leaked when the huge page is freed.
+
+
+Reservations and Memory Policy
+==============================
+Per-node huge page lists existed in struct hstate when git was first used
+to manage Linux code.  The concept of reservations was added some time later.
+When reservations were added, no attempt was made to take memory policy
+into account.  While cpusets are not exactly the same as memory policy, this
+comment in hugetlb_acct_memory sums up the interaction between reservations
+and cpusets/memory policy::
+
+	/*
+	 * When cpuset is configured, it breaks the strict hugetlb page
+	 * reservation as the accounting is done on a global variable. Such
+	 * reservation is completely rubbish in the presence of cpuset because
+	 * the reservation is not checked against page availability for the
+	 * current cpuset. Application can still potentially OOM'ed by kernel
+	 * with lack of free htlb page in cpuset that the task is in.
+	 * Attempt to enforce strict accounting with cpuset is almost
+	 * impossible (or too ugly) because cpuset is too fluid that
+	 * task or memory node can be dynamically moved between cpusets.
+	 *
+	 * The change of semantics for shared hugetlb mapping with cpuset is
+	 * undesirable. However, in order to preserve some of the semantics,
+	 * we fall back to check against current free page availability as
+	 * a best attempt and hopefully to minimize the impact of changing
+	 * semantics that cpuset has.
+	 */
+
+Huge page reservations were added to prevent unexpected page allocation
+failures (OOM) at page fault time.  However, if an application makes use
+of cpusets or memory policy there is no guarantee that huge pages will be
+available on the required nodes.  This is true even if there are a sufficient
+number of global reservations.
+
+Hugetlbfs regression testing
+============================
+
+The most complete set of hugetlb tests are in the libhugetlbfs repository.
+If you modify any hugetlb related code, use the libhugetlbfs test suite
+to check for regressions.  In addition, if you add any new hugetlb
+functionality, please add appropriate tests to libhugetlbfs.
+
+--
+Mike Kravetz, 7 April 2017
diff --git a/Documentation/vm/hugetlbfs_reserv.txt b/Documentation/vm/hugetlbfs_reserv.txt
deleted file mode 100644
index 9aca09a..0000000
--- a/Documentation/vm/hugetlbfs_reserv.txt
+++ /dev/null
@@ -1,529 +0,0 @@
-Hugetlbfs Reservation Overview
-------------------------------
-Huge pages as described at 'Documentation/vm/hugetlbpage.txt' are typically
-preallocated for application use.  These huge pages are instantiated in a
-task's address space at page fault time if the VMA indicates huge pages are
-to be used.  If no huge page exists at page fault time, the task is sent
-a SIGBUS and often dies an unhappy death.  Shortly after huge page support
-was added, it was determined that it would be better to detect a shortage
-of huge pages at mmap() time.  The idea is that if there were not enough
-huge pages to cover the mapping, the mmap() would fail.  This was first
-done with a simple check in the code at mmap() time to determine if there
-were enough free huge pages to cover the mapping.  Like most things in the
-kernel, the code has evolved over time.  However, the basic idea was to
-'reserve' huge pages at mmap() time to ensure that huge pages would be
-available for page faults in that mapping.  The description below attempts to
-describe how huge page reserve processing is done in the v4.10 kernel.
-
-
-Audience
---------
-This description is primarily targeted at kernel developers who are modifying
-hugetlbfs code.
-
-
-The Data Structures
--------------------
-resv_huge_pages
-	This is a global (per-hstate) count of reserved huge pages.  Reserved
-	huge pages are only available to the task which reserved them.
-	Therefore, the number of huge pages generally available is computed
-	as (free_huge_pages - resv_huge_pages).
-Reserve Map
-	A reserve map is described by the structure:
-	struct resv_map {
-		struct kref refs;
-		spinlock_t lock;
-		struct list_head regions;
-		long adds_in_progress;
-		struct list_head region_cache;
-		long region_cache_count;
-	};
-	There is one reserve map for each huge page mapping in the system.
-	The regions list within the resv_map describes the regions within
-	the mapping.  A region is described as:
-	struct file_region {
-		struct list_head link;
-		long from;
-		long to;
-	};
-	The 'from' and 'to' fields of the file region structure are huge page
-	indices into the mapping.  Depending on the type of mapping, a
-	region in the reserv_map may indicate reservations exist for the
-	range, or reservations do not exist.
-Flags for MAP_PRIVATE Reservations
-	These are stored in the bottom bits of the reservation map pointer.
-	#define HPAGE_RESV_OWNER    (1UL << 0) Indicates this task is the
-		owner of the reservations associated with the mapping.
-	#define HPAGE_RESV_UNMAPPED (1UL << 1) Indicates task originally
-		mapping this range (and creating reserves) has unmapped a
-		page from this task (the child) due to a failed COW.
-Page Flags
-	The PagePrivate page flag is used to indicate that a huge page
-	reservation must be restored when the huge page is freed.  More
-	details will be discussed in the "Freeing huge pages" section.
-
-
-Reservation Map Location (Private or Shared)
---------------------------------------------
-A huge page mapping or segment is either private or shared.  If private,
-it is typically only available to a single address space (task).  If shared,
-it can be mapped into multiple address spaces (tasks).  The location and
-semantics of the reservation map is significantly different for two types
-of mappings.  Location differences are:
-- For private mappings, the reservation map hangs off the the VMA structure.
-  Specifically, vma->vm_private_data.  This reserve map is created at the
-  time the mapping (mmap(MAP_PRIVATE)) is created.
-- For shared mappings, the reservation map hangs off the inode.  Specifically,
-  inode->i_mapping->private_data.  Since shared mappings are always backed
-  by files in the hugetlbfs filesystem, the hugetlbfs code ensures each inode
-  contains a reservation map.  As a result, the reservation map is allocated
-  when the inode is created.
-
-
-Creating Reservations
----------------------
-Reservations are created when a huge page backed shared memory segment is
-created (shmget(SHM_HUGETLB)) or a mapping is created via mmap(MAP_HUGETLB).
-These operations result in a call to the routine hugetlb_reserve_pages()
-
-int hugetlb_reserve_pages(struct inode *inode,
-					long from, long to,
-					struct vm_area_struct *vma,
-					vm_flags_t vm_flags)
-
-The first thing hugetlb_reserve_pages() does is check for the NORESERVE
-flag was specified in either the shmget() or mmap() call.  If NORESERVE
-was specified, then this routine returns immediately as no reservation
-are desired.
-
-The arguments 'from' and 'to' are huge page indices into the mapping or
-underlying file.  For shmget(), 'from' is always 0 and 'to' corresponds to
-the length of the segment/mapping.  For mmap(), the offset argument could
-be used to specify the offset into the underlying file.  In such a case
-the 'from' and 'to' arguments have been adjusted by this offset.
-
-One of the big differences between PRIVATE and SHARED mappings is the way
-in which reservations are represented in the reservation map.
-- For shared mappings, an entry in the reservation map indicates a reservation
-  exists or did exist for the corresponding page.  As reservations are
-  consumed, the reservation map is not modified.
-- For private mappings, the lack of an entry in the reservation map indicates
-  a reservation exists for the corresponding page.  As reservations are
-  consumed, entries are added to the reservation map.  Therefore, the
-  reservation map can also be used to determine which reservations have
-  been consumed.
-
-For private mappings, hugetlb_reserve_pages() creates the reservation map and
-hangs it off the VMA structure.  In addition, the HPAGE_RESV_OWNER flag is set
-to indicate this VMA owns the reservations.
-
-The reservation map is consulted to determine how many huge page reservations
-are needed for the current mapping/segment.  For private mappings, this is
-always the value (to - from).  However, for shared mappings it is possible that some reservations may already exist within the range (to - from).  See the
-section "Reservation Map Modifications" for details on how this is accomplished.
-
-The mapping may be associated with a subpool.  If so, the subpool is consulted
-to ensure there is sufficient space for the mapping.  It is possible that the
-subpool has set aside reservations that can be used for the mapping.  See the
-section "Subpool Reservations" for more details.
-
-After consulting the reservation map and subpool, the number of needed new
-reservations is known.  The routine hugetlb_acct_memory() is called to check
-for and take the requested number of reservations.  hugetlb_acct_memory()
-calls into routines that potentially allocate and adjust surplus page counts.
-However, within those routines the code is simply checking to ensure there
-are enough free huge pages to accommodate the reservation.  If there are,
-the global reservation count resv_huge_pages is adjusted something like the
-following.
-	if (resv_needed <= (resv_huge_pages - free_huge_pages))
-		resv_huge_pages += resv_needed;
-Note that the global lock hugetlb_lock is held when checking and adjusting
-these counters.
-
-If there were enough free huge pages and the global count resv_huge_pages
-was adjusted, then the reservation map associated with the mapping is
-modified to reflect the reservations.  In the case of a shared mapping, a
-file_region will exist that includes the range 'from' 'to'.  For private
-mappings, no modifications are made to the reservation map as lack of an
-entry indicates a reservation exists.
-
-If hugetlb_reserve_pages() was successful, the global reservation count and
-reservation map associated with the mapping will be modified as required to
-ensure reservations exist for the range 'from' - 'to'.
-
-
-Consuming Reservations/Allocating a Huge Page
----------------------------------------------
-Reservations are consumed when huge pages associated with the reservations
-are allocated and instantiated in the corresponding mapping.  The allocation
-is performed within the routine alloc_huge_page().
-struct page *alloc_huge_page(struct vm_area_struct *vma,
-                                    unsigned long addr, int avoid_reserve)
-alloc_huge_page is passed a VMA pointer and a virtual address, so it can
-consult the reservation map to determine if a reservation exists.  In addition,
-alloc_huge_page takes the argument avoid_reserve which indicates reserves
-should not be used even if it appears they have been set aside for the
-specified address.  The avoid_reserve argument is most often used in the case
-of Copy on Write and Page Migration where additional copies of an existing
-page are being allocated.
-
-The helper routine vma_needs_reservation() is called to determine if a
-reservation exists for the address within the mapping(vma).  See the section
-"Reservation Map Helper Routines" for detailed information on what this
-routine does.  The value returned from vma_needs_reservation() is generally
-0 or 1.  0 if a reservation exists for the address, 1 if no reservation exists.
-If a reservation does not exist, and there is a subpool associated with the
-mapping the subpool is consulted to determine if it contains reservations.
-If the subpool contains reservations, one can be used for this allocation.
-However, in every case the avoid_reserve argument overrides the use of
-a reservation for the allocation.  After determining whether a reservation
-exists and can be used for the allocation, the routine dequeue_huge_page_vma()
-is called.  This routine takes two arguments related to reservations:
-- avoid_reserve, this is the same value/argument passed to alloc_huge_page()
-- chg, even though this argument is of type long only the values 0 or 1 are
-  passed to dequeue_huge_page_vma.  If the value is 0, it indicates a
-  reservation exists (see the section "Memory Policy and Reservations" for
-  possible issues).  If the value is 1, it indicates a reservation does not
-  exist and the page must be taken from the global free pool if possible.
-The free lists associated with the memory policy of the VMA are searched for
-a free page.  If a page is found, the value free_huge_pages is decremented
-when the page is removed from the free list.  If there was a reservation
-associated with the page, the following adjustments are made:
-	SetPagePrivate(page);	/* Indicates allocating this page consumed
-				 * a reservation, and if an error is
-				 * encountered such that the page must be
-				 * freed, the reservation will be restored. */
-	resv_huge_pages--;	/* Decrement the global reservation count */
-Note, if no huge page can be found that satisfies the VMA's memory policy
-an attempt will be made to allocate one using the buddy allocator.  This
-brings up the issue of surplus huge pages and overcommit which is beyond
-the scope reservations.  Even if a surplus page is allocated, the same
-reservation based adjustments as above will be made: SetPagePrivate(page) and
-resv_huge_pages--.
-
-After obtaining a new huge page, (page)->private is set to the value of
-the subpool associated with the page if it exists.  This will be used for
-subpool accounting when the page is freed.
-
-The routine vma_commit_reservation() is then called to adjust the reserve
-map based on the consumption of the reservation.  In general, this involves
-ensuring the page is represented within a file_region structure of the region
-map.  For shared mappings where the the reservation was present, an entry
-in the reserve map already existed so no change is made.  However, if there
-was no reservation in a shared mapping or this was a private mapping a new
-entry must be created.
-
-It is possible that the reserve map could have been changed between the call
-to vma_needs_reservation() at the beginning of alloc_huge_page() and the
-call to vma_commit_reservation() after the page was allocated.  This would
-be possible if hugetlb_reserve_pages was called for the same page in a shared
-mapping.  In such cases, the reservation count and subpool free page count
-will be off by one.  This rare condition can be identified by comparing the
-return value from vma_needs_reservation and vma_commit_reservation.  If such
-a race is detected, the subpool and global reserve counts are adjusted to
-compensate.  See the section "Reservation Map Helper Routines" for more
-information on these routines.
-
-
-Instantiate Huge Pages
-----------------------
-After huge page allocation, the page is typically added to the page tables
-of the allocating task.  Before this, pages in a shared mapping are added
-to the page cache and pages in private mappings are added to an anonymous
-reverse mapping.  In both cases, the PagePrivate flag is cleared.  Therefore,
-when a huge page that has been instantiated is freed no adjustment is made
-to the global reservation count (resv_huge_pages).
-
-
-Freeing Huge Pages
-------------------
-Huge page freeing is performed by the routine free_huge_page().  This routine
-is the destructor for hugetlbfs compound pages.  As a result, it is only
-passed a pointer to the page struct.  When a huge page is freed, reservation
-accounting may need to be performed.  This would be the case if the page was
-associated with a subpool that contained reserves, or the page is being freed
-on an error path where a global reserve count must be restored.
-
-The page->private field points to any subpool associated with the page.
-If the PagePrivate flag is set, it indicates the global reserve count should
-be adjusted (see the section "Consuming Reservations/Allocating a Huge Page"
-for information on how these are set).
-
-The routine first calls hugepage_subpool_put_pages() for the page.  If this
-routine returns a value of 0 (which does not equal the value passed 1) it
-indicates reserves are associated with the subpool, and this newly free page
-must be used to keep the number of subpool reserves above the minimum size.
-Therefore, the global resv_huge_pages counter is incremented in this case.
-
-If the PagePrivate flag was set in the page, the global resv_huge_pages counter
-will always be incremented.
-
-
-Subpool Reservations
---------------------
-There is a struct hstate associated with each huge page size.  The hstate
-tracks all huge pages of the specified size.  A subpool represents a subset
-of pages within a hstate that is associated with a mounted hugetlbfs
-filesystem.
-
-When a hugetlbfs filesystem is mounted a min_size option can be specified
-which indicates the minimum number of huge pages required by the filesystem.
-If this option is specified, the number of huge pages corresponding to
-min_size are reserved for use by the filesystem.  This number is tracked in
-the min_hpages field of a struct hugepage_subpool.  At mount time,
-hugetlb_acct_memory(min_hpages) is called to reserve the specified number of
-huge pages.  If they can not be reserved, the mount fails.
-
-The routines hugepage_subpool_get/put_pages() are called when pages are
-obtained from or released back to a subpool.  They perform all subpool
-accounting, and track any reservations associated with the subpool.
-hugepage_subpool_get/put_pages are passed the number of huge pages by which
-to adjust the subpool 'used page' count (down for get, up for put).  Normally,
-they return the same value that was passed or an error if not enough pages
-exist in the subpool.
-
-However, if reserves are associated with the subpool a return value less
-than the passed value may be returned.  This return value indicates the
-number of additional global pool adjustments which must be made.  For example,
-suppose a subpool contains 3 reserved huge pages and someone asks for 5.
-The 3 reserved pages associated with the subpool can be used to satisfy part
-of the request.  But, 2 pages must be obtained from the global pools.  To
-relay this information to the caller, the value 2 is returned.  The caller
-is then responsible for attempting to obtain the additional two pages from
-the global pools.
-
-
-COW and Reservations
---------------------
-Since shared mappings all point to and use the same underlying pages, the
-biggest reservation concern for COW is private mappings.  In this case,
-two tasks can be pointing at the same previously allocated page.  One task
-attempts to write to the page, so a new page must be allocated so that each
-task points to its own page.
-
-When the page was originally allocated, the reservation for that page was
-consumed.  When an attempt to allocate a new page is made as a result of
-COW, it is possible that no free huge pages are free and the allocation
-will fail.
-
-When the private mapping was originally created, the owner of the mapping
-was noted by setting the HPAGE_RESV_OWNER bit in the pointer to the reservation
-map of the owner.  Since the owner created the mapping, the owner owns all
-the reservations associated with the mapping.  Therefore, when a write fault
-occurs and there is no page available, different action is taken for the owner
-and non-owner of the reservation.
-
-In the case where the faulting task is not the owner, the fault will fail and
-the task will typically receive a SIGBUS.
-
-If the owner is the faulting task, we want it to succeed since it owned the
-original reservation.  To accomplish this, the page is unmapped from the
-non-owning task.  In this way, the only reference is from the owning task.
-In addition, the HPAGE_RESV_UNMAPPED bit is set in the reservation map pointer
-of the non-owning task.  The non-owning task may receive a SIGBUS if it later
-faults on a non-present page.  But, the original owner of the
-mapping/reservation will behave as expected.
-
-
-Reservation Map Modifications
------------------------------
-The following low level routines are used to make modifications to a
-reservation map.  Typically, these routines are not called directly.  Rather,
-a reservation map helper routine is called which calls one of these low level
-routines.  These low level routines are fairly well documented in the source
-code (mm/hugetlb.c).  These routines are:
-long region_chg(struct resv_map *resv, long f, long t);
-long region_add(struct resv_map *resv, long f, long t);
-void region_abort(struct resv_map *resv, long f, long t);
-long region_count(struct resv_map *resv, long f, long t);
-
-Operations on the reservation map typically involve two operations:
-1) region_chg() is called to examine the reserve map and determine how
-   many pages in the specified range [f, t) are NOT currently represented.
-
-   The calling code performs global checks and allocations to determine if
-   there are enough huge pages for the operation to succeed.
-
-2a) If the operation can succeed, region_add() is called to actually modify
-    the reservation map for the same range [f, t) previously passed to
-    region_chg().
-2b) If the operation can not succeed, region_abort is called for the same range
-    [f, t) to abort the operation.
-
-Note that this is a two step process where region_add() and region_abort()
-are guaranteed to succeed after a prior call to region_chg() for the same
-range.  region_chg() is responsible for pre-allocating any data structures
-necessary to ensure the subsequent operations (specifically region_add()))
-will succeed.
-
-As mentioned above, region_chg() determines the number of pages in the range
-which are NOT currently represented in the map.  This number is returned to
-the caller.  region_add() returns the number of pages in the range added to
-the map.  In most cases, the return value of region_add() is the same as the
-return value of region_chg().  However, in the case of shared mappings it is
-possible for changes to the reservation map to be made between the calls to
-region_chg() and region_add().  In this case, the return value of region_add()
-will not match the return value of region_chg().  It is likely that in such
-cases global counts and subpool accounting will be incorrect and in need of
-adjustment.  It is the responsibility of the caller to check for this condition
-and make the appropriate adjustments.
-
-The routine region_del() is called to remove regions from a reservation map.
-It is typically called in the following situations:
-- When a file in the hugetlbfs filesystem is being removed, the inode will
-  be released and the reservation map freed.  Before freeing the reservation
-  map, all the individual file_region structures must be freed.  In this case
-  region_del is passed the range [0, LONG_MAX).
-- When a hugetlbfs file is being truncated.  In this case, all allocated pages
-  after the new file size must be freed.  In addition, any file_region entries
-  in the reservation map past the new end of file must be deleted.  In this
-  case, region_del is passed the range [new_end_of_file, LONG_MAX).
-- When a hole is being punched in a hugetlbfs file.  In this case, huge pages
-  are removed from the middle of the file one at a time.  As the pages are
-  removed, region_del() is called to remove the corresponding entry from the
-  reservation map.  In this case, region_del is passed the range
-  [page_idx, page_idx + 1).
-In every case, region_del() will return the number of pages removed from the
-reservation map.  In VERY rare cases, region_del() can fail.  This can only
-happen in the hole punch case where it has to split an existing file_region
-entry and can not allocate a new structure.  In this error case, region_del()
-will return -ENOMEM.  The problem here is that the reservation map will
-indicate that there is a reservation for the page.  However, the subpool and
-global reservation counts will not reflect the reservation.  To handle this
-situation, the routine hugetlb_fix_reserve_counts() is called to adjust the
-counters so that they correspond with the reservation map entry that could
-not be deleted.
-
-region_count() is called when unmapping a private huge page mapping.  In
-private mappings, the lack of a entry in the reservation map indicates that
-a reservation exists.  Therefore, by counting the number of entries in the
-reservation map we know how many reservations were consumed and how many are
-outstanding (outstanding = (end - start) - region_count(resv, start, end)).
-Since the mapping is going away, the subpool and global reservation counts
-are decremented by the number of outstanding reservations.
-
-
-Reservation Map Helper Routines
--------------------------------
-Several helper routines exist to query and modify the reservation maps.
-These routines are only interested with reservations for a specific huge
-page, so they just pass in an address instead of a range.  In addition,
-they pass in the associated VMA.  From the VMA, the type of mapping (private
-or shared) and the location of the reservation map (inode or VMA) can be
-determined.  These routines simply call the underlying routines described
-in the section "Reservation Map Modifications".  However, they do take into
-account the 'opposite' meaning of reservation map entries for private and
-shared mappings and hide this detail from the caller.
-
-long vma_needs_reservation(struct hstate *h,
-				struct vm_area_struct *vma, unsigned long addr)
-This routine calls region_chg() for the specified page.  If no reservation
-exists, 1 is returned.  If a reservation exists, 0 is returned.
-
-long vma_commit_reservation(struct hstate *h,
-				struct vm_area_struct *vma, unsigned long addr)
-This calls region_add() for the specified page.  As in the case of region_chg
-and region_add, this routine is to be called after a previous call to
-vma_needs_reservation.  It will add a reservation entry for the page.  It
-returns 1 if the reservation was added and 0 if not.  The return value should
-be compared with the return value of the previous call to
-vma_needs_reservation.  An unexpected difference indicates the reservation
-map was modified between calls.
-
-void vma_end_reservation(struct hstate *h,
-				struct vm_area_struct *vma, unsigned long addr)
-This calls region_abort() for the specified page.  As in the case of region_chg
-and region_abort, this routine is to be called after a previous call to
-vma_needs_reservation.  It will abort/end the in progress reservation add
-operation.
-
-long vma_add_reservation(struct hstate *h,
-				struct vm_area_struct *vma, unsigned long addr)
-This is a special wrapper routine to help facilitate reservation cleanup
-on error paths.  It is only called from the routine restore_reserve_on_error().
-This routine is used in conjunction with vma_needs_reservation in an attempt
-to add a reservation to the reservation map.  It takes into account the
-different reservation map semantics for private and shared mappings.  Hence,
-region_add is called for shared mappings (as an entry present in the map
-indicates a reservation), and region_del is called for private mappings (as
-the absence of an entry in the map indicates a reservation).  See the section
-"Reservation cleanup in error paths" for more information on what needs to
-be done on error paths.
-
-
-Reservation Cleanup in Error Paths
-----------------------------------
-As mentioned in the section "Reservation Map Helper Routines", reservation
-map modifications are performed in two steps.  First vma_needs_reservation
-is called before a page is allocated.  If the allocation is successful,
-then vma_commit_reservation is called.  If not, vma_end_reservation is called.
-Global and subpool reservation counts are adjusted based on success or failure
-of the operation and all is well.
-
-Additionally, after a huge page is instantiated the PagePrivate flag is
-cleared so that accounting when the page is ultimately freed is correct.
-
-However, there are several instances where errors are encountered after a huge
-page is allocated but before it is instantiated.  In this case, the page
-allocation has consumed the reservation and made the appropriate subpool,
-reservation map and global count adjustments.  If the page is freed at this
-time (before instantiation and clearing of PagePrivate), then free_huge_page
-will increment the global reservation count.  However, the reservation map
-indicates the reservation was consumed.  This resulting inconsistent state
-will cause the 'leak' of a reserved huge page.  The global reserve count will
-be  higher than it should and prevent allocation of a pre-allocated page.
-
-The routine restore_reserve_on_error() attempts to handle this situation.  It
-is fairly well documented.  The intention of this routine is to restore
-the reservation map to the way it was before the page allocation.   In this
-way, the state of the reservation map will correspond to the global reservation
-count after the page is freed.
-
-The routine restore_reserve_on_error itself may encounter errors while
-attempting to restore the reservation map entry.  In this case, it will
-simply clear the PagePrivate flag of the page.  In this way, the global
-reserve count will not be incremented when the page is freed.  However, the
-reservation map will continue to look as though the reservation was consumed.
-A page can still be allocated for the address, but it will not use a reserved
-page as originally intended.
-
-There is some code (most notably userfaultfd) which can not call
-restore_reserve_on_error.  In this case, it simply modifies the PagePrivate
-so that a reservation will not be leaked when the huge page is freed.
-
-
-Reservations and Memory Policy
-------------------------------
-Per-node huge page lists existed in struct hstate when git was first used
-to manage Linux code.  The concept of reservations was added some time later.
-When reservations were added, no attempt was made to take memory policy
-into account.  While cpusets are not exactly the same as memory policy, this
-comment in hugetlb_acct_memory sums up the interaction between reservations
-and cpusets/memory policy.
-	/*
-	 * When cpuset is configured, it breaks the strict hugetlb page
-	 * reservation as the accounting is done on a global variable. Such
-	 * reservation is completely rubbish in the presence of cpuset because
-	 * the reservation is not checked against page availability for the
-	 * current cpuset. Application can still potentially OOM'ed by kernel
-	 * with lack of free htlb page in cpuset that the task is in.
-	 * Attempt to enforce strict accounting with cpuset is almost
-	 * impossible (or too ugly) because cpuset is too fluid that
-	 * task or memory node can be dynamically moved between cpusets.
-	 *
-	 * The change of semantics for shared hugetlb mapping with cpuset is
-	 * undesirable. However, in order to preserve some of the semantics,
-	 * we fall back to check against current free page availability as
-	 * a best attempt and hopefully to minimize the impact of changing
-	 * semantics that cpuset has.
-	 */
-
-Huge page reservations were added to prevent unexpected page allocation
-failures (OOM) at page fault time.  However, if an application makes use
-of cpusets or memory policy there is no guarantee that huge pages will be
-available on the required nodes.  This is true even if there are a sufficient
-number of global reservations.
-
-
-Mike Kravetz, 7 April 2017
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
deleted file mode 100644
index faf077d..0000000
--- a/Documentation/vm/hugetlbpage.txt
+++ /dev/null
@@ -1,351 +0,0 @@
-
-The intent of this file is to give a brief summary of hugetlbpage support in
-the Linux kernel.  This support is built on top of multiple page size support
-that is provided by most modern architectures.  For example, x86 CPUs normally
-support 4K and 2M (1G if architecturally supported) page sizes, ia64
-architecture supports multiple page sizes 4K, 8K, 64K, 256K, 1M, 4M, 16M,
-256M and ppc64 supports 4K and 16M.  A TLB is a cache of virtual-to-physical
-translations.  Typically this is a very scarce resource on processor.
-Operating systems try to make best use of limited number of TLB resources.
-This optimization is more critical now as bigger and bigger physical memories
-(several GBs) are more readily available.
-
-Users can use the huge page support in Linux kernel by either using the mmap
-system call or standard SYSV shared memory system calls (shmget, shmat).
-
-First the Linux kernel needs to be built with the CONFIG_HUGETLBFS
-(present under "File systems") and CONFIG_HUGETLB_PAGE (selected
-automatically when CONFIG_HUGETLBFS is selected) configuration
-options.
-
-The /proc/meminfo file provides information about the total number of
-persistent hugetlb pages in the kernel's huge page pool.  It also displays
-default huge page size and information about the number of free, reserved
-and surplus huge pages in the pool of huge pages of default size.
-The huge page size is needed for generating the proper alignment and
-size of the arguments to system calls that map huge page regions.
-
-The output of "cat /proc/meminfo" will include lines like:
-
-.....
-HugePages_Total: uuu
-HugePages_Free:  vvv
-HugePages_Rsvd:  www
-HugePages_Surp:  xxx
-Hugepagesize:    yyy kB
-Hugetlb:         zzz kB
-
-where:
-HugePages_Total is the size of the pool of huge pages.
-HugePages_Free  is the number of huge pages in the pool that are not yet
-                allocated.
-HugePages_Rsvd  is short for "reserved," and is the number of huge pages for
-                which a commitment to allocate from the pool has been made,
-                but no allocation has yet been made.  Reserved huge pages
-                guarantee that an application will be able to allocate a
-                huge page from the pool of huge pages at fault time.
-HugePages_Surp  is short for "surplus," and is the number of huge pages in
-                the pool above the value in /proc/sys/vm/nr_hugepages. The
-                maximum number of surplus huge pages is controlled by
-                /proc/sys/vm/nr_overcommit_hugepages.
-Hugepagesize    is the default hugepage size (in Kb).
-Hugetlb         is the total amount of memory (in kB), consumed by huge
-                pages of all sizes.
-                If huge pages of different sizes are in use, this number
-                will exceed HugePages_Total * Hugepagesize. To get more
-                detailed information, please, refer to
-                /sys/kernel/mm/hugepages (described below).
-
-
-/proc/filesystems should also show a filesystem of type "hugetlbfs" configured
-in the kernel.
-
-/proc/sys/vm/nr_hugepages indicates the current number of "persistent" huge
-pages in the kernel's huge page pool.  "Persistent" huge pages will be
-returned to the huge page pool when freed by a task.  A user with root
-privileges can dynamically allocate more or free some persistent huge pages
-by increasing or decreasing the value of 'nr_hugepages'.
-
-Pages that are used as huge pages are reserved inside the kernel and cannot
-be used for other purposes.  Huge pages cannot be swapped out under
-memory pressure.
-
-Once a number of huge pages have been pre-allocated to the kernel huge page
-pool, a user with appropriate privilege can use either the mmap system call
-or shared memory system calls to use the huge pages.  See the discussion of
-Using Huge Pages, below.
-
-The administrator can allocate persistent huge pages on the kernel boot
-command line by specifying the "hugepages=N" parameter, where 'N' = the
-number of huge pages requested.  This is the most reliable method of
-allocating huge pages as memory has not yet become fragmented.
-
-Some platforms support multiple huge page sizes.  To allocate huge pages
-of a specific size, one must precede the huge pages boot command parameters
-with a huge page size selection parameter "hugepagesz=<size>".  <size> must
-be specified in bytes with optional scale suffix [kKmMgG].  The default huge
-page size may be selected with the "default_hugepagesz=<size>" boot parameter.
-
-When multiple huge page sizes are supported, /proc/sys/vm/nr_hugepages
-indicates the current number of pre-allocated huge pages of the default size.
-Thus, one can use the following command to dynamically allocate/deallocate
-default sized persistent huge pages:
-
-	echo 20 > /proc/sys/vm/nr_hugepages
-
-This command will try to adjust the number of default sized huge pages in the
-huge page pool to 20, allocating or freeing huge pages, as required.
-
-On a NUMA platform, the kernel will attempt to distribute the huge page pool
-over all the set of allowed nodes specified by the NUMA memory policy of the
-task that modifies nr_hugepages.  The default for the allowed nodes--when the
-task has default memory policy--is all on-line nodes with memory.  Allowed
-nodes with insufficient available, contiguous memory for a huge page will be
-silently skipped when allocating persistent huge pages.  See the discussion
-below of the interaction of task memory policy, cpusets and per node attributes
-with the allocation and freeing of persistent huge pages.
-
-The success or failure of huge page allocation depends on the amount of
-physically contiguous memory that is present in system at the time of the
-allocation attempt.  If the kernel is unable to allocate huge pages from
-some nodes in a NUMA system, it will attempt to make up the difference by
-allocating extra pages on other nodes with sufficient available contiguous
-memory, if any.
-
-System administrators may want to put this command in one of the local rc
-init files.  This will enable the kernel to allocate huge pages early in
-the boot process when the possibility of getting physical contiguous pages
-is still very high.  Administrators can verify the number of huge pages
-actually allocated by checking the sysctl or meminfo.  To check the per node
-distribution of huge pages in a NUMA system, use:
-
-	cat /sys/devices/system/node/node*/meminfo | fgrep Huge
-
-/proc/sys/vm/nr_overcommit_hugepages specifies how large the pool of
-huge pages can grow, if more huge pages than /proc/sys/vm/nr_hugepages are
-requested by applications.  Writing any non-zero value into this file
-indicates that the hugetlb subsystem is allowed to try to obtain that
-number of "surplus" huge pages from the kernel's normal page pool, when the
-persistent huge page pool is exhausted. As these surplus huge pages become
-unused, they are freed back to the kernel's normal page pool.
-
-When increasing the huge page pool size via nr_hugepages, any existing surplus
-pages will first be promoted to persistent huge pages.  Then, additional
-huge pages will be allocated, if necessary and if possible, to fulfill
-the new persistent huge page pool size.
-
-The administrator may shrink the pool of persistent huge pages for
-the default huge page size by setting the nr_hugepages sysctl to a
-smaller value.  The kernel will attempt to balance the freeing of huge pages
-across all nodes in the memory policy of the task modifying nr_hugepages.
-Any free huge pages on the selected nodes will be freed back to the kernel's
-normal page pool.
-
-Caveat: Shrinking the persistent huge page pool via nr_hugepages such that
-it becomes less than the number of huge pages in use will convert the balance
-of the in-use huge pages to surplus huge pages.  This will occur even if
-the number of surplus pages it would exceed the overcommit value.  As long as
-this condition holds--that is, until nr_hugepages+nr_overcommit_hugepages is
-increased sufficiently, or the surplus huge pages go out of use and are freed--
-no more surplus huge pages will be allowed to be allocated.
-
-With support for multiple huge page pools at run-time available, much of
-the huge page userspace interface in /proc/sys/vm has been duplicated in sysfs.
-The /proc interfaces discussed above have been retained for backwards
-compatibility. The root huge page control directory in sysfs is:
-
-	/sys/kernel/mm/hugepages
-
-For each huge page size supported by the running kernel, a subdirectory
-will exist, of the form:
-
-	hugepages-${size}kB
-
-Inside each of these directories, the same set of files will exist:
-
-	nr_hugepages
-	nr_hugepages_mempolicy
-	nr_overcommit_hugepages
-	free_hugepages
-	resv_hugepages
-	surplus_hugepages
-
-which function as described above for the default huge page-sized case.
-
-
-Interaction of Task Memory Policy with Huge Page Allocation/Freeing
-===================================================================
-
-Whether huge pages are allocated and freed via the /proc interface or
-the /sysfs interface using the nr_hugepages_mempolicy attribute, the NUMA
-nodes from which huge pages are allocated or freed are controlled by the
-NUMA memory policy of the task that modifies the nr_hugepages_mempolicy
-sysctl or attribute.  When the nr_hugepages attribute is used, mempolicy
-is ignored.
-
-The recommended method to allocate or free huge pages to/from the kernel
-huge page pool, using the nr_hugepages example above, is:
-
-    numactl --interleave <node-list> echo 20 \
-				>/proc/sys/vm/nr_hugepages_mempolicy
-
-or, more succinctly:
-
-    numactl -m <node-list> echo 20 >/proc/sys/vm/nr_hugepages_mempolicy
-
-This will allocate or free abs(20 - nr_hugepages) to or from the nodes
-specified in <node-list>, depending on whether number of persistent huge pages
-is initially less than or greater than 20, respectively.  No huge pages will be
-allocated nor freed on any node not included in the specified <node-list>.
-
-When adjusting the persistent hugepage count via nr_hugepages_mempolicy, any
-memory policy mode--bind, preferred, local or interleave--may be used.  The
-resulting effect on persistent huge page allocation is as follows:
-
-1) Regardless of mempolicy mode [see Documentation/vm/numa_memory_policy.txt],
-   persistent huge pages will be distributed across the node or nodes
-   specified in the mempolicy as if "interleave" had been specified.
-   However, if a node in the policy does not contain sufficient contiguous
-   memory for a huge page, the allocation will not "fallback" to the nearest
-   neighbor node with sufficient contiguous memory.  To do this would cause
-   undesirable imbalance in the distribution of the huge page pool, or
-   possibly, allocation of persistent huge pages on nodes not allowed by
-   the task's memory policy.
-
-2) One or more nodes may be specified with the bind or interleave policy.
-   If more than one node is specified with the preferred policy, only the
-   lowest numeric id will be used.  Local policy will select the node where
-   the task is running at the time the nodes_allowed mask is constructed.
-   For local policy to be deterministic, the task must be bound to a cpu or
-   cpus in a single node.  Otherwise, the task could be migrated to some
-   other node at any time after launch and the resulting node will be
-   indeterminate.  Thus, local policy is not very useful for this purpose.
-   Any of the other mempolicy modes may be used to specify a single node.
-
-3) The nodes allowed mask will be derived from any non-default task mempolicy,
-   whether this policy was set explicitly by the task itself or one of its
-   ancestors, such as numactl.  This means that if the task is invoked from a
-   shell with non-default policy, that policy will be used.  One can specify a
-   node list of "all" with numactl --interleave or --membind [-m] to achieve
-   interleaving over all nodes in the system or cpuset.
-
-4) Any task mempolicy specified--e.g., using numactl--will be constrained by
-   the resource limits of any cpuset in which the task runs.  Thus, there will
-   be no way for a task with non-default policy running in a cpuset with a
-   subset of the system nodes to allocate huge pages outside the cpuset
-   without first moving to a cpuset that contains all of the desired nodes.
-
-5) Boot-time huge page allocation attempts to distribute the requested number
-   of huge pages over all on-lines nodes with memory.
-
-Per Node Hugepages Attributes
-=============================
-
-A subset of the contents of the root huge page control directory in sysfs,
-described above, will be replicated under each the system device of each
-NUMA node with memory in:
-
-	/sys/devices/system/node/node[0-9]*/hugepages/
-
-Under this directory, the subdirectory for each supported huge page size
-contains the following attribute files:
-
-	nr_hugepages
-	free_hugepages
-	surplus_hugepages
-
-The free_' and surplus_' attribute files are read-only.  They return the number
-of free and surplus [overcommitted] huge pages, respectively, on the parent
-node.
-
-The nr_hugepages attribute returns the total number of huge pages on the
-specified node.  When this attribute is written, the number of persistent huge
-pages on the parent node will be adjusted to the specified value, if sufficient
-resources exist, regardless of the task's mempolicy or cpuset constraints.
-
-Note that the number of overcommit and reserve pages remain global quantities,
-as we don't know until fault time, when the faulting task's mempolicy is
-applied, from which node the huge page allocation will be attempted.
-
-
-Using Huge Pages
-================
-
-If the user applications are going to request huge pages using mmap system
-call, then it is required that system administrator mount a file system of
-type hugetlbfs:
-
-  mount -t hugetlbfs \
-	-o uid=<value>,gid=<value>,mode=<value>,pagesize=<value>,size=<value>,\
-	min_size=<value>,nr_inodes=<value> none /mnt/huge
-
-This command mounts a (pseudo) filesystem of type hugetlbfs on the directory
-/mnt/huge.  Any files created on /mnt/huge uses huge pages.  The uid and gid
-options sets the owner and group of the root of the file system.  By default
-the uid and gid of the current process are taken.  The mode option sets the
-mode of root of file system to value & 01777.  This value is given in octal.
-By default the value 0755 is picked. If the platform supports multiple huge
-page sizes, the pagesize option can be used to specify the huge page size and
-associated pool.  pagesize is specified in bytes.  If pagesize is not specified
-the platform's default huge page size and associated pool will be used. The
-size option sets the maximum value of memory (huge pages) allowed for that
-filesystem (/mnt/huge).  The size option can be specified in bytes, or as a
-percentage of the specified huge page pool (nr_hugepages).  The size is
-rounded down to HPAGE_SIZE boundary.  The min_size option sets the minimum
-value of memory (huge pages) allowed for the filesystem.  min_size can be
-specified in the same way as size, either bytes or a percentage of the
-huge page pool.  At mount time, the number of huge pages specified by
-min_size are reserved for use by the filesystem.  If there are not enough
-free huge pages available, the mount will fail.  As huge pages are allocated
-to the filesystem and freed, the reserve count is adjusted so that the sum
-of allocated and reserved huge pages is always at least min_size.  The option
-nr_inodes sets the maximum number of inodes that /mnt/huge can use.  If the
-size, min_size or nr_inodes option is not provided on command line then
-no limits are set.  For pagesize, size, min_size and nr_inodes options, you
-can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For example, size=2K
-has the same meaning as size=2048.
-
-While read system calls are supported on files that reside on hugetlb
-file systems, write system calls are not.
-
-Regular chown, chgrp, and chmod commands (with right permissions) could be
-used to change the file attributes on hugetlbfs.
-
-Also, it is important to note that no such mount command is required if
-applications are going to use only shmat/shmget system calls or mmap with
-MAP_HUGETLB.  For an example of how to use mmap with MAP_HUGETLB see map_hugetlb
-below.
-
-Users who wish to use hugetlb memory via shared memory segment should be a
-member of a supplementary group and system admin needs to configure that gid
-into /proc/sys/vm/hugetlb_shm_group.  It is possible for same or different
-applications to use any combination of mmaps and shm* calls, though the mount of
-filesystem will be required for using mmap calls without MAP_HUGETLB.
-
-Syscalls that operate on memory backed by hugetlb pages only have their lengths
-aligned to the native page size of the processor; they will normally fail with
-errno set to EINVAL or exclude hugetlb pages that extend beyond the length if
-not hugepage aligned.  For example, munmap(2) will fail if memory is backed by
-a hugetlb page and the length is smaller than the hugepage size.
-
-
-Examples
-========
-
-1) map_hugetlb: see tools/testing/selftests/vm/map_hugetlb.c
-
-2) hugepage-shm:  see tools/testing/selftests/vm/hugepage-shm.c
-
-3) hugepage-mmap:  see tools/testing/selftests/vm/hugepage-mmap.c
-
-4) The libhugetlbfs (https://github.com/libhugetlbfs/libhugetlbfs) library
-   provides a wide range of userspace tools to help with huge page usability,
-   environment setup, and control.
-
-Kernel development regression testing
-=====================================
-
-The most complete set of hugetlb tests are in the libhugetlbfs repository.
-If you modify any hugetlb related code, use the libhugetlbfs test suite
-to check for regressions.  In addition, if you add any new hugetlb
-functionality, please add appropriate tests to libhugetlbfs.
diff --git a/Documentation/vm/hwpoison.rst b/Documentation/vm/hwpoison.rst
new file mode 100644
index 0000000..09bd24a
--- /dev/null
+++ b/Documentation/vm/hwpoison.rst
@@ -0,0 +1,186 @@
+.. hwpoison:
+
+========
+hwpoison
+========
+
+What is hwpoison?
+=================
+
+Upcoming Intel CPUs have support for recovering from some memory errors
+(``MCA recovery``). This requires the OS to declare a page "poisoned",
+kill the processes associated with it and avoid using it in the future.
+
+This patchkit implements the necessary infrastructure in the VM.
+
+To quote the overview comment:
+
+ * High level machine check handler. Handles pages reported by the
+ * hardware as being corrupted usually due to a 2bit ECC memory or cache
+ * failure.
+ *
+ * This focusses on pages detected as corrupted in the background.
+ * When the current CPU tries to consume corruption the currently
+ * running process can just be killed directly instead. This implies
+ * that if the error cannot be handled for some reason it's safe to
+ * just ignore it because no corruption has been consumed yet. Instead
+ * when that happens another machine check will happen.
+ *
+ * Handles page cache pages in various states. The tricky part
+ * here is that we can access any page asynchronous to other VM
+ * users, because memory failures could happen anytime and anywhere,
+ * possibly violating some of their assumptions. This is why this code
+ * has to be extremely careful. Generally it tries to use normal locking
+ * rules, as in get the standard locks, even if that means the
+ * error handling takes potentially a long time.
+ *
+ * Some of the operations here are somewhat inefficient and have non
+ * linear algorithmic complexity, because the data structures have not
+ * been optimized for this case. This is in particular the case
+ * for the mapping from a vma to a process. Since this case is expected
+ * to be rare we hope we can get away with this.
+
+The code consists of a the high level handler in mm/memory-failure.c,
+a new page poison bit and various checks in the VM to handle poisoned
+pages.
+
+The main target right now is KVM guests, but it works for all kinds
+of applications. KVM support requires a recent qemu-kvm release.
+
+For the KVM use there was need for a new signal type so that
+KVM can inject the machine check into the guest with the proper
+address. This in theory allows other applications to handle
+memory failures too. The expection is that near all applications
+won't do that, but some very specialized ones might.
+
+Failure recovery modes
+======================
+
+There are two (actually three) modes memory failure recovery can be in:
+
+vm.memory_failure_recovery sysctl set to zero:
+	All memory failures cause a panic. Do not attempt recovery.
+	(on x86 this can be also affected by the tolerant level of the
+	MCE subsystem)
+
+early kill
+	(can be controlled globally and per process)
+	Send SIGBUS to the application as soon as the error is detected
+	This allows applications who can process memory errors in a gentle
+	way (e.g. drop affected object)
+	This is the mode used by KVM qemu.
+
+late kill
+	Send SIGBUS when the application runs into the corrupted page.
+	This is best for memory error unaware applications and default
+	Note some pages are always handled as late kill.
+
+User control
+============
+
+vm.memory_failure_recovery
+	See sysctl.txt
+
+vm.memory_failure_early_kill
+	Enable early kill mode globally
+
+PR_MCE_KILL
+	Set early/late kill mode/revert to system default
+
+	arg1: PR_MCE_KILL_CLEAR:
+		Revert to system default
+	arg1: PR_MCE_KILL_SET:
+		arg2 defines thread specific mode
+
+		PR_MCE_KILL_EARLY:
+			Early kill
+		PR_MCE_KILL_LATE:
+			Late kill
+		PR_MCE_KILL_DEFAULT
+			Use system global default
+
+	Note that if you want to have a dedicated thread which handles
+	the SIGBUS(BUS_MCEERR_AO) on behalf of the process, you should
+	call prctl(PR_MCE_KILL_EARLY) on the designated thread. Otherwise,
+	the SIGBUS is sent to the main thread.
+
+PR_MCE_KILL_GET
+	return current mode
+
+Testing
+=======
+
+* madvise(MADV_HWPOISON, ....) (as root) - Poison a page in the
+  process for testing
+
+* hwpoison-inject module through debugfs ``/sys/kernel/debug/hwpoison/``
+
+  corrupt-pfn
+	Inject hwpoison fault at PFN echoed into this file. This does
+	some early filtering to avoid corrupted unintended pages in test suites.
+
+  unpoison-pfn
+	Software-unpoison page at PFN echoed into this file. This way
+	a page can be reused again.  This only works for Linux
+	injected failures, not for real memory failures.
+
+  Note these injection interfaces are not stable and might change between
+  kernel versions
+
+  corrupt-filter-dev-major, corrupt-filter-dev-minor
+	Only handle memory failures to pages associated with the file
+	system defined by block device major/minor.  -1U is the
+	wildcard value.  This should be only used for testing with
+	artificial injection.
+
+  corrupt-filter-memcg
+	Limit injection to pages owned by memgroup. Specified by inode
+	number of the memcg.
+
+	Example::
+
+		mkdir /sys/fs/cgroup/mem/hwpoison
+
+	        usemem -m 100 -s 1000 &
+		echo `jobs -p` > /sys/fs/cgroup/mem/hwpoison/tasks
+
+		memcg_ino=$(ls -id /sys/fs/cgroup/mem/hwpoison | cut -f1 -d' ')
+		echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg
+
+		page-types -p `pidof init`   --hwpoison  # shall do nothing
+		page-types -p `pidof usemem` --hwpoison  # poison its pages
+
+  corrupt-filter-flags-mask, corrupt-filter-flags-value
+	When specified, only poison pages if ((page_flags & mask) ==
+	value).  This allows stress testing of many kinds of
+	pages. The page_flags are the same as in /proc/kpageflags. The
+	flag bits are defined in include/linux/kernel-page-flags.h and
+	documented in Documentation/admin-guide/mm/pagemap.rst
+
+* Architecture specific MCE injector
+
+  x86 has mce-inject, mce-test
+
+  Some portable hwpoison test programs in mce-test, see below.
+
+References
+==========
+
+http://halobates.de/mce-lc09-2.pdf
+	Overview presentation from LinuxCon 09
+
+git://git.kernel.org/pub/scm/utils/cpu/mce/mce-test.git
+	Test suite (hwpoison specific portable tests in tsrc)
+
+git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git
+	x86 specific injector
+
+
+Limitations
+===========
+- Not all page types are supported and never will. Most kernel internal
+  objects cannot be recovered, only LRU pages for now.
+- Right now hugepage support is missing.
+
+---
+Andi Kleen, Oct 2009
diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt
deleted file mode 100644
index e912d7e..0000000
--- a/Documentation/vm/hwpoison.txt
+++ /dev/null
@@ -1,187 +0,0 @@
-What is hwpoison?
-
-Upcoming Intel CPUs have support for recovering from some memory errors
-(``MCA recovery''). This requires the OS to declare a page "poisoned",
-kill the processes associated with it and avoid using it in the future.
-
-This patchkit implements the necessary infrastructure in the VM.
-
-To quote the overview comment:
-
- * High level machine check handler. Handles pages reported by the
- * hardware as being corrupted usually due to a 2bit ECC memory or cache
- * failure.
- *
- * This focusses on pages detected as corrupted in the background.
- * When the current CPU tries to consume corruption the currently
- * running process can just be killed directly instead. This implies
- * that if the error cannot be handled for some reason it's safe to
- * just ignore it because no corruption has been consumed yet. Instead
- * when that happens another machine check will happen.
- *
- * Handles page cache pages in various states. The tricky part
- * here is that we can access any page asynchronous to other VM
- * users, because memory failures could happen anytime and anywhere,
- * possibly violating some of their assumptions. This is why this code
- * has to be extremely careful. Generally it tries to use normal locking
- * rules, as in get the standard locks, even if that means the
- * error handling takes potentially a long time.
- *
- * Some of the operations here are somewhat inefficient and have non
- * linear algorithmic complexity, because the data structures have not
- * been optimized for this case. This is in particular the case
- * for the mapping from a vma to a process. Since this case is expected
- * to be rare we hope we can get away with this.
-
-The code consists of a the high level handler in mm/memory-failure.c,
-a new page poison bit and various checks in the VM to handle poisoned
-pages.
-
-The main target right now is KVM guests, but it works for all kinds
-of applications. KVM support requires a recent qemu-kvm release.
-
-For the KVM use there was need for a new signal type so that
-KVM can inject the machine check into the guest with the proper
-address. This in theory allows other applications to handle
-memory failures too. The expection is that near all applications
-won't do that, but some very specialized ones might.
-
----
-
-There are two (actually three) modi memory failure recovery can be in:
-
-vm.memory_failure_recovery sysctl set to zero:
-	All memory failures cause a panic. Do not attempt recovery.
-	(on x86 this can be also affected by the tolerant level of the
-	MCE subsystem)
-
-early kill
-	(can be controlled globally and per process)
-	Send SIGBUS to the application as soon as the error is detected
-	This allows applications who can process memory errors in a gentle
-	way (e.g. drop affected object)
-	This is the mode used by KVM qemu.
-
-late kill
-	Send SIGBUS when the application runs into the corrupted page.
-	This is best for memory error unaware applications and default
-	Note some pages are always handled as late kill.
-
----
-
-User control:
-
-vm.memory_failure_recovery
-	See sysctl.txt
-
-vm.memory_failure_early_kill
-	Enable early kill mode globally
-
-PR_MCE_KILL
-	Set early/late kill mode/revert to system default
-	arg1: PR_MCE_KILL_CLEAR: Revert to system default
-	arg1: PR_MCE_KILL_SET: arg2 defines thread specific mode
-		PR_MCE_KILL_EARLY: Early kill
-		PR_MCE_KILL_LATE:  Late kill
-		PR_MCE_KILL_DEFAULT: Use system global default
-	Note that if you want to have a dedicated thread which handles
-	the SIGBUS(BUS_MCEERR_AO) on behalf of the process, you should
-	call prctl(PR_MCE_KILL_EARLY) on the designated thread. Otherwise,
-	the SIGBUS is sent to the main thread.
-
-PR_MCE_KILL_GET
-	return current mode
-
-
----
-
-Testing:
-
-madvise(MADV_HWPOISON, ....)
-	(as root)
-	Poison a page in the process for testing
-
-
-hwpoison-inject module through debugfs
-
-/sys/kernel/debug/hwpoison/
-
-corrupt-pfn
-
-Inject hwpoison fault at PFN echoed into this file. This does
-some early filtering to avoid corrupted unintended pages in test suites.
-
-unpoison-pfn
-
-Software-unpoison page at PFN echoed into this file. This
-way a page can be reused again.
-This only works for Linux injected failures, not for real
-memory failures.
-
-Note these injection interfaces are not stable and might change between
-kernel versions
-
-corrupt-filter-dev-major
-corrupt-filter-dev-minor
-
-Only handle memory failures to pages associated with the file system defined
-by block device major/minor.  -1U is the wildcard value.
-This should be only used for testing with artificial injection.
-
-corrupt-filter-memcg
-
-Limit injection to pages owned by memgroup. Specified by inode number
-of the memcg.
-
-Example:
-        mkdir /sys/fs/cgroup/mem/hwpoison
-
-        usemem -m 100 -s 1000 &
-        echo `jobs -p` > /sys/fs/cgroup/mem/hwpoison/tasks
-
-        memcg_ino=$(ls -id /sys/fs/cgroup/mem/hwpoison | cut -f1 -d' ')
-        echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg
-
-        page-types -p `pidof init`   --hwpoison  # shall do nothing
-        page-types -p `pidof usemem` --hwpoison  # poison its pages
-
-corrupt-filter-flags-mask
-corrupt-filter-flags-value
-
-When specified, only poison pages if ((page_flags & mask) == value).
-This allows stress testing of many kinds of pages. The page_flags
-are the same as in /proc/kpageflags. The flag bits are defined in
-include/linux/kernel-page-flags.h and documented in
-Documentation/vm/pagemap.txt
-
-Architecture specific MCE injector
-
-x86 has mce-inject, mce-test
-
-Some portable hwpoison test programs in mce-test, see blow.
-
----
-
-References:
-
-http://halobates.de/mce-lc09-2.pdf
-	Overview presentation from LinuxCon 09
-
-git://git.kernel.org/pub/scm/utils/cpu/mce/mce-test.git
-	Test suite (hwpoison specific portable tests in tsrc)
-
-git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git
-	x86 specific injector
-
-
----
-
-Limitations:
-
-- Not all page types are supported and never will. Most kernel internal
-objects cannot be recovered, only LRU pages for now.
-- Right now hugepage support is missing.
-
----
-Andi Kleen, Oct 2009
-
diff --git a/Documentation/vm/idle_page_tracking.txt b/Documentation/vm/idle_page_tracking.txt
deleted file mode 100644
index 85dcc3b..0000000
--- a/Documentation/vm/idle_page_tracking.txt
+++ /dev/null
@@ -1,98 +0,0 @@
-MOTIVATION
-
-The idle page tracking feature allows to track which memory pages are being
-accessed by a workload and which are idle. This information can be useful for
-estimating the workload's working set size, which, in turn, can be taken into
-account when configuring the workload parameters, setting memory cgroup limits,
-or deciding where to place the workload within a compute cluster.
-
-It is enabled by CONFIG_IDLE_PAGE_TRACKING=y.
-
-USER API
-
-The idle page tracking API is located at /sys/kernel/mm/page_idle. Currently,
-it consists of the only read-write file, /sys/kernel/mm/page_idle/bitmap.
-
-The file implements a bitmap where each bit corresponds to a memory page. The
-bitmap is represented by an array of 8-byte integers, and the page at PFN #i is
-mapped to bit #i%64 of array element #i/64, byte order is native. When a bit is
-set, the corresponding page is idle.
-
-A page is considered idle if it has not been accessed since it was marked idle
-(for more details on what "accessed" actually means see the IMPLEMENTATION
-DETAILS section). To mark a page idle one has to set the bit corresponding to
-the page by writing to the file. A value written to the file is OR-ed with the
-current bitmap value.
-
-Only accesses to user memory pages are tracked. These are pages mapped to a
-process address space, page cache and buffer pages, swap cache pages. For other
-page types (e.g. SLAB pages) an attempt to mark a page idle is silently ignored,
-and hence such pages are never reported idle.
-
-For huge pages the idle flag is set only on the head page, so one has to read
-/proc/kpageflags in order to correctly count idle huge pages.
-
-Reading from or writing to /sys/kernel/mm/page_idle/bitmap will return
--EINVAL if you are not starting the read/write on an 8-byte boundary, or
-if the size of the read/write is not a multiple of 8 bytes. Writing to
-this file beyond max PFN will return -ENXIO.
-
-That said, in order to estimate the amount of pages that are not used by a
-workload one should:
-
- 1. Mark all the workload's pages as idle by setting corresponding bits in
-    /sys/kernel/mm/page_idle/bitmap. The pages can be found by reading
-    /proc/pid/pagemap if the workload is represented by a process, or by
-    filtering out alien pages using /proc/kpagecgroup in case the workload is
-    placed in a memory cgroup.
-
- 2. Wait until the workload accesses its working set.
-
- 3. Read /sys/kernel/mm/page_idle/bitmap and count the number of bits set. If
-    one wants to ignore certain types of pages, e.g. mlocked pages since they
-    are not reclaimable, he or she can filter them out using /proc/kpageflags.
-
-See Documentation/vm/pagemap.txt for more information about /proc/pid/pagemap,
-/proc/kpageflags, and /proc/kpagecgroup.
-
-IMPLEMENTATION DETAILS
-
-The kernel internally keeps track of accesses to user memory pages in order to
-reclaim unreferenced pages first on memory shortage conditions. A page is
-considered referenced if it has been recently accessed via a process address
-space, in which case one or more PTEs it is mapped to will have the Accessed bit
-set, or marked accessed explicitly by the kernel (see mark_page_accessed()). The
-latter happens when:
-
- - a userspace process reads or writes a page using a system call (e.g. read(2)
-   or write(2))
-
- - a page that is used for storing filesystem buffers is read or written,
-   because a process needs filesystem metadata stored in it (e.g. lists a
-   directory tree)
-
- - a page is accessed by a device driver using get_user_pages()
-
-When a dirty page is written to swap or disk as a result of memory reclaim or
-exceeding the dirty memory limit, it is not marked referenced.
-
-The idle memory tracking feature adds a new page flag, the Idle flag. This flag
-is set manually, by writing to /sys/kernel/mm/page_idle/bitmap (see the USER API
-section), and cleared automatically whenever a page is referenced as defined
-above.
-
-When a page is marked idle, the Accessed bit must be cleared in all PTEs it is
-mapped to, otherwise we will not be able to detect accesses to the page coming
-from a process address space. To avoid interference with the reclaimer, which,
-as noted above, uses the Accessed bit to promote actively referenced pages, one
-more page flag is introduced, the Young flag. When the PTE Accessed bit is
-cleared as a result of setting or updating a page's Idle flag, the Young flag
-is set on the page. The reclaimer treats the Young flag as an extra PTE
-Accessed bit and therefore will consider such a page as referenced.
-
-Since the idle memory tracking feature is based on the memory reclaimer logic,
-it only works with pages that are on an LRU list, other pages are silently
-ignored. That means it will ignore a user memory page if it is isolated, but
-since there are usually not many of them, it should not affect the overall
-result noticeably. In order not to stall scanning of the idle page bitmap,
-locked pages may be skipped too.
diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
new file mode 100644
index 0000000..c4ded22
--- /dev/null
+++ b/Documentation/vm/index.rst
@@ -0,0 +1,50 @@
+=====================================
+Linux Memory Management Documentation
+=====================================
+
+This is a collection of documents about Linux memory management (mm) subsystem.
+
+User guides for MM features
+===========================
+
+The following documents provide guides for controlling and tuning
+various features of the Linux memory management
+
+.. toctree::
+   :maxdepth: 1
+
+   swap_numa
+   zswap
+
+Kernel developers MM documentation
+==================================
+
+The below documents describe MM internals with different level of
+details ranging from notes and mailing list responses to elaborate
+descriptions of data structures and algorithms.
+
+.. toctree::
+   :maxdepth: 1
+
+   active_mm
+   balance
+   cleancache
+   frontswap
+   highmem
+   hmm
+   hwpoison
+   hugetlbfs_reserv
+   ksm
+   mmu_notifier
+   numa
+   overcommit-accounting
+   page_migration
+   page_frags
+   page_owner
+   remap_file_pages
+   slub
+   split_page_table_lock
+   transhuge
+   unevictable-lru
+   z3fold
+   zsmalloc
diff --git a/Documentation/vm/ksm.rst b/Documentation/vm/ksm.rst
new file mode 100644
index 0000000..d32016d
--- /dev/null
+++ b/Documentation/vm/ksm.rst
@@ -0,0 +1,87 @@
+.. _ksm:
+
+=======================
+Kernel Samepage Merging
+=======================
+
+KSM is a memory-saving de-duplication feature, enabled by CONFIG_KSM=y,
+added to the Linux kernel in 2.6.32.  See ``mm/ksm.c`` for its implementation,
+and http://lwn.net/Articles/306704/ and http://lwn.net/Articles/330589/
+
+The userspace interface of KSM is described in :ref:`Documentation/admin-guide/mm/ksm.rst <admin_guide_ksm>`
+
+Design
+======
+
+Overview
+--------
+
+.. kernel-doc:: mm/ksm.c
+   :DOC: Overview
+
+Reverse mapping
+---------------
+KSM maintains reverse mapping information for KSM pages in the stable
+tree.
+
+If a KSM page is shared between less than ``max_page_sharing`` VMAs,
+the node of the stable tree that represents such KSM page points to a
+list of :c:type:`struct rmap_item` and the ``page->mapping`` of the
+KSM page points to the stable tree node.
+
+When the sharing passes this threshold, KSM adds a second dimension to
+the stable tree. The tree node becomes a "chain" that links one or
+more "dups". Each "dup" keeps reverse mapping information for a KSM
+page with ``page->mapping`` pointing to that "dup".
+
+Every "chain" and all "dups" linked into a "chain" enforce the
+invariant that they represent the same write protected memory content,
+even if each "dup" will be pointed by a different KSM page copy of
+that content.
+
+This way the stable tree lookup computational complexity is unaffected
+if compared to an unlimited list of reverse mappings. It is still
+enforced that there cannot be KSM page content duplicates in the
+stable tree itself.
+
+The deduplication limit enforced by ``max_page_sharing`` is required
+to avoid the virtual memory rmap lists to grow too large. The rmap
+walk has O(N) complexity where N is the number of rmap_items
+(i.e. virtual mappings) that are sharing the page, which is in turn
+capped by ``max_page_sharing``. So this effectively spreads the linear
+O(N) computational complexity from rmap walk context over different
+KSM pages. The ksmd walk over the stable_node "chains" is also O(N),
+but N is the number of stable_node "dups", not the number of
+rmap_items, so it has not a significant impact on ksmd performance. In
+practice the best stable_node "dup" candidate will be kept and found
+at the head of the "dups" list.
+
+High values of ``max_page_sharing`` result in faster memory merging
+(because there will be fewer stable_node dups queued into the
+stable_node chain->hlist to check for pruning) and higher
+deduplication factor at the expense of slower worst case for rmap
+walks for any KSM page which can happen during swapping, compaction,
+NUMA balancing and page migration.
+
+The ``stable_node_dups/stable_node_chains`` ratio is also affected by the
+``max_page_sharing`` tunable, and an high ratio may indicate fragmentation
+in the stable_node dups, which could be solved by introducing
+fragmentation algorithms in ksmd which would refile rmap_items from
+one stable_node dup to another stable_node dup, in order to free up
+stable_node "dups" with few rmap_items in them, but that may increase
+the ksmd CPU usage and possibly slowdown the readonly computations on
+the KSM pages of the applications.
+
+The whole list of stable_node "dups" linked in the stable_node
+"chains" is scanned periodically in order to prune stale stable_nodes.
+The frequency of such scans is defined by
+``stable_node_chains_prune_millisecs`` sysfs tunable.
+
+Reference
+---------
+.. kernel-doc:: mm/ksm.c
+   :functions: mm_slot ksm_scan stable_node rmap_item
+
+--
+Izik Eidus,
+Hugh Dickins, 17 Nov 2009
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
deleted file mode 100644
index 6686bd2..0000000
--- a/Documentation/vm/ksm.txt
+++ /dev/null
@@ -1,178 +0,0 @@
-How to use the Kernel Samepage Merging feature
-----------------------------------------------
-
-KSM is a memory-saving de-duplication feature, enabled by CONFIG_KSM=y,
-added to the Linux kernel in 2.6.32.  See mm/ksm.c for its implementation,
-and http://lwn.net/Articles/306704/ and http://lwn.net/Articles/330589/
-
-The KSM daemon ksmd periodically scans those areas of user memory which
-have been registered with it, looking for pages of identical content which
-can be replaced by a single write-protected page (which is automatically
-copied if a process later wants to update its content).
-
-KSM was originally developed for use with KVM (where it was known as
-Kernel Shared Memory), to fit more virtual machines into physical memory,
-by sharing the data common between them.  But it can be useful to any
-application which generates many instances of the same data.
-
-KSM only merges anonymous (private) pages, never pagecache (file) pages.
-KSM's merged pages were originally locked into kernel memory, but can now
-be swapped out just like other user pages (but sharing is broken when they
-are swapped back in: ksmd must rediscover their identity and merge again).
-
-KSM only operates on those areas of address space which an application
-has advised to be likely candidates for merging, by using the madvise(2)
-system call: int madvise(addr, length, MADV_MERGEABLE).
-
-The app may call int madvise(addr, length, MADV_UNMERGEABLE) to cancel
-that advice and restore unshared pages: whereupon KSM unmerges whatever
-it merged in that range.  Note: this unmerging call may suddenly require
-more memory than is available - possibly failing with EAGAIN, but more
-probably arousing the Out-Of-Memory killer.
-
-If KSM is not configured into the running kernel, madvise MADV_MERGEABLE
-and MADV_UNMERGEABLE simply fail with EINVAL.  If the running kernel was
-built with CONFIG_KSM=y, those calls will normally succeed: even if the
-the KSM daemon is not currently running, MADV_MERGEABLE still registers
-the range for whenever the KSM daemon is started; even if the range
-cannot contain any pages which KSM could actually merge; even if
-MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
-
-If a region of memory must be split into at least one new MADV_MERGEABLE
-or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
-will exceed vm.max_map_count (see Documentation/sysctl/vm.txt).
-
-Like other madvise calls, they are intended for use on mapped areas of
-the user address space: they will report ENOMEM if the specified range
-includes unmapped gaps (though working on the intervening mapped areas),
-and might fail with EAGAIN if not enough memory for internal structures.
-
-Applications should be considerate in their use of MADV_MERGEABLE,
-restricting its use to areas likely to benefit.  KSM's scans may use a lot
-of processing power: some installations will disable KSM for that reason.
-
-The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/,
-readable by all but writable only by root:
-
-pages_to_scan    - how many present pages to scan before ksmd goes to sleep
-                   e.g. "echo 100 > /sys/kernel/mm/ksm/pages_to_scan"
-                   Default: 100 (chosen for demonstration purposes)
-
-sleep_millisecs  - how many milliseconds ksmd should sleep before next scan
-                   e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
-                   Default: 20 (chosen for demonstration purposes)
-
-merge_across_nodes - specifies if pages from different numa nodes can be merged.
-                   When set to 0, ksm merges only pages which physically
-                   reside in the memory area of same NUMA node. That brings
-                   lower latency to access of shared pages. Systems with more
-                   nodes, at significant NUMA distances, are likely to benefit
-                   from the lower latency of setting 0. Smaller systems, which
-                   need to minimize memory usage, are likely to benefit from
-                   the greater sharing of setting 1 (default). You may wish to
-                   compare how your system performs under each setting, before
-                   deciding on which to use. merge_across_nodes setting can be
-                   changed only when there are no ksm shared pages in system:
-                   set run 2 to unmerge pages first, then to 1 after changing
-                   merge_across_nodes, to remerge according to the new setting.
-                   Default: 1 (merging across nodes as in earlier releases)
-
-run              - set 0 to stop ksmd from running but keep merged pages,
-                   set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
-                   set 2 to stop ksmd and unmerge all pages currently merged,
-                         but leave mergeable areas registered for next run
-                   Default: 0 (must be changed to 1 to activate KSM,
-                               except if CONFIG_SYSFS is disabled)
-
-use_zero_pages   - specifies whether empty pages (i.e. allocated pages
-                   that only contain zeroes) should be treated specially.
-                   When set to 1, empty pages are merged with the kernel
-                   zero page(s) instead of with each other as it would
-                   happen normally. This can improve the performance on
-                   architectures with coloured zero pages, depending on
-                   the workload. Care should be taken when enabling this
-                   setting, as it can potentially degrade the performance
-                   of KSM for some workloads, for example if the checksums
-                   of pages candidate for merging match the checksum of
-                   an empty page. This setting can be changed at any time,
-                   it is only effective for pages merged after the change.
-                   Default: 0 (normal KSM behaviour as in earlier releases)
-
-max_page_sharing - Maximum sharing allowed for each KSM page. This
-                   enforces a deduplication limit to avoid the virtual
-                   memory rmap lists to grow too large. The minimum
-                   value is 2 as a newly created KSM page will have at
-                   least two sharers. The rmap walk has O(N)
-                   complexity where N is the number of rmap_items
-                   (i.e. virtual mappings) that are sharing the page,
-                   which is in turn capped by max_page_sharing. So
-                   this effectively spread the the linear O(N)
-                   computational complexity from rmap walk context
-                   over different KSM pages. The ksmd walk over the
-                   stable_node "chains" is also O(N), but N is the
-                   number of stable_node "dups", not the number of
-                   rmap_items, so it has not a significant impact on
-                   ksmd performance. In practice the best stable_node
-                   "dup" candidate will be kept and found at the head
-                   of the "dups" list. The higher this value the
-                   faster KSM will merge the memory (because there
-                   will be fewer stable_node dups queued into the
-                   stable_node chain->hlist to check for pruning) and
-                   the higher the deduplication factor will be, but
-                   the slowest the worst case rmap walk could be for
-                   any given KSM page. Slowing down the rmap_walk
-                   means there will be higher latency for certain
-                   virtual memory operations happening during
-                   swapping, compaction, NUMA balancing and page
-                   migration, in turn decreasing responsiveness for
-                   the caller of those virtual memory operations. The
-                   scheduler latency of other tasks not involved with
-                   the VM operations doing the rmap walk is not
-                   affected by this parameter as the rmap walks are
-                   always schedule friendly themselves.
-
-stable_node_chains_prune_millisecs - How frequently to walk the whole
-                   list of stable_node "dups" linked in the
-                   stable_node "chains" in order to prune stale
-                   stable_nodes. Smaller milllisecs values will free
-                   up the KSM metadata with lower latency, but they
-                   will make ksmd use more CPU during the scan. This
-                   only applies to the stable_node chains so it's a
-                   noop if not a single KSM page hit the
-                   max_page_sharing yet (there would be no stable_node
-                   chains in such case).
-
-The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
-
-pages_shared     - how many shared pages are being used
-pages_sharing    - how many more sites are sharing them i.e. how much saved
-pages_unshared   - how many pages unique but repeatedly checked for merging
-pages_volatile   - how many pages changing too fast to be placed in a tree
-full_scans       - how many times all mergeable areas have been scanned
-
-stable_node_chains - number of stable node chains allocated, this is
-		     effectively the number of KSM pages that hit the
-		     max_page_sharing limit
-stable_node_dups   - number of stable node dups queued into the
-		     stable_node chains
-
-A high ratio of pages_sharing to pages_shared indicates good sharing, but
-a high ratio of pages_unshared to pages_sharing indicates wasted effort.
-pages_volatile embraces several different kinds of activity, but a high
-proportion there would also indicate poor use of madvise MADV_MERGEABLE.
-
-The maximum possible page_sharing/page_shared ratio is limited by the
-max_page_sharing tunable. To increase the ratio max_page_sharing must
-be increased accordingly.
-
-The stable_node_dups/stable_node_chains ratio is also affected by the
-max_page_sharing tunable, and an high ratio may indicate fragmentation
-in the stable_node dups, which could be solved by introducing
-fragmentation algorithms in ksmd which would refile rmap_items from
-one stable_node dup to another stable_node dup, in order to freeup
-stable_node "dups" with few rmap_items in them, but that may increase
-the ksmd CPU usage and possibly slowdown the readonly computations on
-the KSM pages of the applications.
-
-Izik Eidus,
-Hugh Dickins, 17 Nov 2009
diff --git a/Documentation/vm/mmu_notifier.rst b/Documentation/vm/mmu_notifier.rst
new file mode 100644
index 0000000..47baa1c
--- /dev/null
+++ b/Documentation/vm/mmu_notifier.rst
@@ -0,0 +1,99 @@
+.. _mmu_notifier:
+
+When do you need to notify inside page table lock ?
+===================================================
+
+When clearing a pte/pmd we are given a choice to notify the event through
+(notify version of \*_clear_flush call mmu_notifier_invalidate_range) under
+the page table lock. But that notification is not necessary in all cases.
+
+For secondary TLB (non CPU TLB) like IOMMU TLB or device TLB (when device use
+thing like ATS/PASID to get the IOMMU to walk the CPU page table to access a
+process virtual address space). There is only 2 cases when you need to notify
+those secondary TLB while holding page table lock when clearing a pte/pmd:
+
+  A) page backing address is free before mmu_notifier_invalidate_range_end()
+  B) a page table entry is updated to point to a new page (COW, write fault
+     on zero page, __replace_page(), ...)
+
+Case A is obvious you do not want to take the risk for the device to write to
+a page that might now be used by some completely different task.
+
+Case B is more subtle. For correctness it requires the following sequence to
+happen:
+
+  - take page table lock
+  - clear page table entry and notify ([pmd/pte]p_huge_clear_flush_notify())
+  - set page table entry to point to new page
+
+If clearing the page table entry is not followed by a notify before setting
+the new pte/pmd value then you can break memory model like C11 or C++11 for
+the device.
+
+Consider the following scenario (device use a feature similar to ATS/PASID):
+
+Two address addrA and addrB such that \|addrA - addrB\| >= PAGE_SIZE we assume
+they are write protected for COW (other case of B apply too).
+
+::
+
+ [Time N] --------------------------------------------------------------------
+ CPU-thread-0  {try to write to addrA}
+ CPU-thread-1  {try to write to addrB}
+ CPU-thread-2  {}
+ CPU-thread-3  {}
+ DEV-thread-0  {read addrA and populate device TLB}
+ DEV-thread-2  {read addrB and populate device TLB}
+ [Time N+1] ------------------------------------------------------------------
+ CPU-thread-0  {COW_step0: {mmu_notifier_invalidate_range_start(addrA)}}
+ CPU-thread-1  {COW_step0: {mmu_notifier_invalidate_range_start(addrB)}}
+ CPU-thread-2  {}
+ CPU-thread-3  {}
+ DEV-thread-0  {}
+ DEV-thread-2  {}
+ [Time N+2] ------------------------------------------------------------------
+ CPU-thread-0  {COW_step1: {update page table to point to new page for addrA}}
+ CPU-thread-1  {COW_step1: {update page table to point to new page for addrB}}
+ CPU-thread-2  {}
+ CPU-thread-3  {}
+ DEV-thread-0  {}
+ DEV-thread-2  {}
+ [Time N+3] ------------------------------------------------------------------
+ CPU-thread-0  {preempted}
+ CPU-thread-1  {preempted}
+ CPU-thread-2  {write to addrA which is a write to new page}
+ CPU-thread-3  {}
+ DEV-thread-0  {}
+ DEV-thread-2  {}
+ [Time N+3] ------------------------------------------------------------------
+ CPU-thread-0  {preempted}
+ CPU-thread-1  {preempted}
+ CPU-thread-2  {}
+ CPU-thread-3  {write to addrB which is a write to new page}
+ DEV-thread-0  {}
+ DEV-thread-2  {}
+ [Time N+4] ------------------------------------------------------------------
+ CPU-thread-0  {preempted}
+ CPU-thread-1  {COW_step3: {mmu_notifier_invalidate_range_end(addrB)}}
+ CPU-thread-2  {}
+ CPU-thread-3  {}
+ DEV-thread-0  {}
+ DEV-thread-2  {}
+ [Time N+5] ------------------------------------------------------------------
+ CPU-thread-0  {preempted}
+ CPU-thread-1  {}
+ CPU-thread-2  {}
+ CPU-thread-3  {}
+ DEV-thread-0  {read addrA from old page}
+ DEV-thread-2  {read addrB from new page}
+
+So here because at time N+2 the clear page table entry was not pair with a
+notification to invalidate the secondary TLB, the device see the new value for
+addrB before seing the new value for addrA. This break total memory ordering
+for the device.
+
+When changing a pte to write protect or to point to a new write protected page
+with same content (KSM) it is fine to delay the mmu_notifier_invalidate_range
+call to mmu_notifier_invalidate_range_end() outside the page table lock. This
+is true even if the thread doing the page table update is preempted right after
+releasing page table lock but before call mmu_notifier_invalidate_range_end().
diff --git a/Documentation/vm/mmu_notifier.txt b/Documentation/vm/mmu_notifier.txt
deleted file mode 100644
index 23b4625..0000000
--- a/Documentation/vm/mmu_notifier.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-When do you need to notify inside page table lock ?
-
-When clearing a pte/pmd we are given a choice to notify the event through
-(notify version of *_clear_flush call mmu_notifier_invalidate_range) under
-the page table lock. But that notification is not necessary in all cases.
-
-For secondary TLB (non CPU TLB) like IOMMU TLB or device TLB (when device use
-thing like ATS/PASID to get the IOMMU to walk the CPU page table to access a
-process virtual address space). There is only 2 cases when you need to notify
-those secondary TLB while holding page table lock when clearing a pte/pmd:
-
-  A) page backing address is free before mmu_notifier_invalidate_range_end()
-  B) a page table entry is updated to point to a new page (COW, write fault
-     on zero page, __replace_page(), ...)
-
-Case A is obvious you do not want to take the risk for the device to write to
-a page that might now be used by some completely different task.
-
-Case B is more subtle. For correctness it requires the following sequence to
-happen:
-  - take page table lock
-  - clear page table entry and notify ([pmd/pte]p_huge_clear_flush_notify())
-  - set page table entry to point to new page
-
-If clearing the page table entry is not followed by a notify before setting
-the new pte/pmd value then you can break memory model like C11 or C++11 for
-the device.
-
-Consider the following scenario (device use a feature similar to ATS/PASID):
-
-Two address addrA and addrB such that |addrA - addrB| >= PAGE_SIZE we assume
-they are write protected for COW (other case of B apply too).
-
-[Time N] --------------------------------------------------------------------
-CPU-thread-0  {try to write to addrA}
-CPU-thread-1  {try to write to addrB}
-CPU-thread-2  {}
-CPU-thread-3  {}
-DEV-thread-0  {read addrA and populate device TLB}
-DEV-thread-2  {read addrB and populate device TLB}
-[Time N+1] ------------------------------------------------------------------
-CPU-thread-0  {COW_step0: {mmu_notifier_invalidate_range_start(addrA)}}
-CPU-thread-1  {COW_step0: {mmu_notifier_invalidate_range_start(addrB)}}
-CPU-thread-2  {}
-CPU-thread-3  {}
-DEV-thread-0  {}
-DEV-thread-2  {}
-[Time N+2] ------------------------------------------------------------------
-CPU-thread-0  {COW_step1: {update page table to point to new page for addrA}}
-CPU-thread-1  {COW_step1: {update page table to point to new page for addrB}}
-CPU-thread-2  {}
-CPU-thread-3  {}
-DEV-thread-0  {}
-DEV-thread-2  {}
-[Time N+3] ------------------------------------------------------------------
-CPU-thread-0  {preempted}
-CPU-thread-1  {preempted}
-CPU-thread-2  {write to addrA which is a write to new page}
-CPU-thread-3  {}
-DEV-thread-0  {}
-DEV-thread-2  {}
-[Time N+3] ------------------------------------------------------------------
-CPU-thread-0  {preempted}
-CPU-thread-1  {preempted}
-CPU-thread-2  {}
-CPU-thread-3  {write to addrB which is a write to new page}
-DEV-thread-0  {}
-DEV-thread-2  {}
-[Time N+4] ------------------------------------------------------------------
-CPU-thread-0  {preempted}
-CPU-thread-1  {COW_step3: {mmu_notifier_invalidate_range_end(addrB)}}
-CPU-thread-2  {}
-CPU-thread-3  {}
-DEV-thread-0  {}
-DEV-thread-2  {}
-[Time N+5] ------------------------------------------------------------------
-CPU-thread-0  {preempted}
-CPU-thread-1  {}
-CPU-thread-2  {}
-CPU-thread-3  {}
-DEV-thread-0  {read addrA from old page}
-DEV-thread-2  {read addrB from new page}
-
-So here because at time N+2 the clear page table entry was not pair with a
-notification to invalidate the secondary TLB, the device see the new value for
-addrB before seing the new value for addrA. This break total memory ordering
-for the device.
-
-When changing a pte to write protect or to point to a new write protected page
-with same content (KSM) it is fine to delay the mmu_notifier_invalidate_range
-call to mmu_notifier_invalidate_range_end() outside the page table lock. This
-is true even if the thread doing the page table update is preempted right after
-releasing page table lock but before call mmu_notifier_invalidate_range_end().
diff --git a/Documentation/vm/numa b/Documentation/vm/numa
deleted file mode 100644
index a31b85b..0000000
--- a/Documentation/vm/numa
+++ /dev/null
@@ -1,146 +0,0 @@
-Started Nov 1999 by Kanoj Sarcar <kanoj@sgi.com>
-
-What is NUMA?
-
-This question can be answered from a couple of perspectives:  the
-hardware view and the Linux software view.
-
-From the hardware perspective, a NUMA system is a computer platform that
-comprises multiple components or assemblies each of which may contain 0
-or more CPUs, local memory, and/or IO buses.  For brevity and to
-disambiguate the hardware view of these physical components/assemblies
-from the software abstraction thereof, we'll call the components/assemblies
-'cells' in this document.
-
-Each of the 'cells' may be viewed as an SMP [symmetric multi-processor] subset
-of the system--although some components necessary for a stand-alone SMP system
-may not be populated on any given cell.   The cells of the NUMA system are
-connected together with some sort of system interconnect--e.g., a crossbar or
-point-to-point link are common types of NUMA system interconnects.  Both of
-these types of interconnects can be aggregated to create NUMA platforms with
-cells at multiple distances from other cells.
-
-For Linux, the NUMA platforms of interest are primarily what is known as Cache
-Coherent NUMA or ccNUMA systems.   With ccNUMA systems, all memory is visible
-to and accessible from any CPU attached to any cell and cache coherency
-is handled in hardware by the processor caches and/or the system interconnect.
-
-Memory access time and effective memory bandwidth varies depending on how far
-away the cell containing the CPU or IO bus making the memory access is from the
-cell containing the target memory.  For example, access to memory by CPUs
-attached to the same cell will experience faster access times and higher
-bandwidths than accesses to memory on other, remote cells.  NUMA platforms
-can have cells at multiple remote distances from any given cell.
-
-Platform vendors don't build NUMA systems just to make software developers'
-lives interesting.  Rather, this architecture is a means to provide scalable
-memory bandwidth.  However, to achieve scalable memory bandwidth, system and
-application software must arrange for a large majority of the memory references
-[cache misses] to be to "local" memory--memory on the same cell, if any--or
-to the closest cell with memory.
-
-This leads to the Linux software view of a NUMA system:
-
-Linux divides the system's hardware resources into multiple software
-abstractions called "nodes".  Linux maps the nodes onto the physical cells
-of the hardware platform, abstracting away some of the details for some
-architectures.  As with physical cells, software nodes may contain 0 or more
-CPUs, memory and/or IO buses.  And, again, memory accesses to memory on
-"closer" nodes--nodes that map to closer cells--will generally experience
-faster access times and higher effective bandwidth than accesses to more
-remote cells.
-
-For some architectures, such as x86, Linux will "hide" any node representing a
-physical cell that has no memory attached, and reassign any CPUs attached to
-that cell to a node representing a cell that does have memory.  Thus, on
-these architectures, one cannot assume that all CPUs that Linux associates with
-a given node will see the same local memory access times and bandwidth.
-
-In addition, for some architectures, again x86 is an example, Linux supports
-the emulation of additional nodes.  For NUMA emulation, linux will carve up
-the existing nodes--or the system memory for non-NUMA platforms--into multiple
-nodes.  Each emulated node will manage a fraction of the underlying cells'
-physical memory.  NUMA emluation is useful for testing NUMA kernel and
-application features on non-NUMA platforms, and as a sort of memory resource
-management mechanism when used together with cpusets.
-[see Documentation/cgroup-v1/cpusets.txt]
-
-For each node with memory, Linux constructs an independent memory management
-subsystem, complete with its own free page lists, in-use page lists, usage
-statistics and locks to mediate access.  In addition, Linux constructs for
-each memory zone [one or more of DMA, DMA32, NORMAL, HIGH_MEMORY, MOVABLE],
-an ordered "zonelist".  A zonelist specifies the zones/nodes to visit when a
-selected zone/node cannot satisfy the allocation request.  This situation,
-when a zone has no available memory to satisfy a request, is called
-"overflow" or "fallback".
-
-Because some nodes contain multiple zones containing different types of
-memory, Linux must decide whether to order the zonelists such that allocations
-fall back to the same zone type on a different node, or to a different zone
-type on the same node.  This is an important consideration because some zones,
-such as DMA or DMA32, represent relatively scarce resources.  Linux chooses
-a default Node ordered zonelist. This means it tries to fallback to other zones
-from the same node before using remote nodes which are ordered by NUMA distance.
-
-By default, Linux will attempt to satisfy memory allocation requests from the
-node to which the CPU that executes the request is assigned.  Specifically,
-Linux will attempt to allocate from the first node in the appropriate zonelist
-for the node where the request originates.  This is called "local allocation."
-If the "local" node cannot satisfy the request, the kernel will examine other
-nodes' zones in the selected zonelist looking for the first zone in the list
-that can satisfy the request.
-
-Local allocation will tend to keep subsequent access to the allocated memory
-"local" to the underlying physical resources and off the system interconnect--
-as long as the task on whose behalf the kernel allocated some memory does not
-later migrate away from that memory.  The Linux scheduler is aware of the
-NUMA topology of the platform--embodied in the "scheduling domains" data
-structures [see Documentation/scheduler/sched-domains.txt]--and the scheduler
-attempts to minimize task migration to distant scheduling domains.  However,
-the scheduler does not take a task's NUMA footprint into account directly.
-Thus, under sufficient imbalance, tasks can migrate between nodes, remote
-from their initial node and kernel data structures.
-
-System administrators and application designers can restrict a task's migration
-to improve NUMA locality using various CPU affinity command line interfaces,
-such as taskset(1) and numactl(1), and program interfaces such as
-sched_setaffinity(2).  Further, one can modify the kernel's default local
-allocation behavior using Linux NUMA memory policy.
-[see Documentation/vm/numa_memory_policy.txt.]
-
-System administrators can restrict the CPUs and nodes' memories that a non-
-privileged user can specify in the scheduling or NUMA commands and functions
-using control groups and CPUsets.  [see Documentation/cgroup-v1/cpusets.txt]
-
-On architectures that do not hide memoryless nodes, Linux will include only
-zones [nodes] with memory in the zonelists.  This means that for a memoryless
-node the "local memory node"--the node of the first zone in CPU's node's
-zonelist--will not be the node itself.  Rather, it will be the node that the
-kernel selected as the nearest node with memory when it built the zonelists.
-So, default, local allocations will succeed with the kernel supplying the
-closest available memory.  This is a consequence of the same mechanism that
-allows such allocations to fallback to other nearby nodes when a node that
-does contain memory overflows.
-
-Some kernel allocations do not want or cannot tolerate this allocation fallback
-behavior.  Rather they want to be sure they get memory from the specified node
-or get notified that the node has no free memory.  This is usually the case when
-a subsystem allocates per CPU memory resources, for example.
-
-A typical model for making such an allocation is to obtain the node id of the
-node to which the "current CPU" is attached using one of the kernel's
-numa_node_id() or CPU_to_node() functions and then request memory from only
-the node id returned.  When such an allocation fails, the requesting subsystem
-may revert to its own fallback path.  The slab kernel memory allocator is an
-example of this.  Or, the subsystem may choose to disable or not to enable
-itself on allocation failure.  The kernel profiling subsystem is an example of
-this.
-
-If the architecture supports--does not hide--memoryless nodes, then CPUs
-attached to memoryless nodes would always incur the fallback path overhead
-or some subsystems would fail to initialize if they attempted to allocated
-memory exclusively from a node without memory.  To support such
-architectures transparently, kernel subsystems can use the numa_mem_id()
-or cpu_to_mem() function to locate the "local memory node" for the calling or
-specified CPU.  Again, this is the same node from which default, local page
-allocations will be attempted.
diff --git a/Documentation/vm/numa.rst b/Documentation/vm/numa.rst
new file mode 100644
index 0000000..185d8a5
--- /dev/null
+++ b/Documentation/vm/numa.rst
@@ -0,0 +1,150 @@
+.. _numa:
+
+Started Nov 1999 by Kanoj Sarcar <kanoj@sgi.com>
+
+=============
+What is NUMA?
+=============
+
+This question can be answered from a couple of perspectives:  the
+hardware view and the Linux software view.
+
+From the hardware perspective, a NUMA system is a computer platform that
+comprises multiple components or assemblies each of which may contain 0
+or more CPUs, local memory, and/or IO buses.  For brevity and to
+disambiguate the hardware view of these physical components/assemblies
+from the software abstraction thereof, we'll call the components/assemblies
+'cells' in this document.
+
+Each of the 'cells' may be viewed as an SMP [symmetric multi-processor] subset
+of the system--although some components necessary for a stand-alone SMP system
+may not be populated on any given cell.   The cells of the NUMA system are
+connected together with some sort of system interconnect--e.g., a crossbar or
+point-to-point link are common types of NUMA system interconnects.  Both of
+these types of interconnects can be aggregated to create NUMA platforms with
+cells at multiple distances from other cells.
+
+For Linux, the NUMA platforms of interest are primarily what is known as Cache
+Coherent NUMA or ccNUMA systems.   With ccNUMA systems, all memory is visible
+to and accessible from any CPU attached to any cell and cache coherency
+is handled in hardware by the processor caches and/or the system interconnect.
+
+Memory access time and effective memory bandwidth varies depending on how far
+away the cell containing the CPU or IO bus making the memory access is from the
+cell containing the target memory.  For example, access to memory by CPUs
+attached to the same cell will experience faster access times and higher
+bandwidths than accesses to memory on other, remote cells.  NUMA platforms
+can have cells at multiple remote distances from any given cell.
+
+Platform vendors don't build NUMA systems just to make software developers'
+lives interesting.  Rather, this architecture is a means to provide scalable
+memory bandwidth.  However, to achieve scalable memory bandwidth, system and
+application software must arrange for a large majority of the memory references
+[cache misses] to be to "local" memory--memory on the same cell, if any--or
+to the closest cell with memory.
+
+This leads to the Linux software view of a NUMA system:
+
+Linux divides the system's hardware resources into multiple software
+abstractions called "nodes".  Linux maps the nodes onto the physical cells
+of the hardware platform, abstracting away some of the details for some
+architectures.  As with physical cells, software nodes may contain 0 or more
+CPUs, memory and/or IO buses.  And, again, memory accesses to memory on
+"closer" nodes--nodes that map to closer cells--will generally experience
+faster access times and higher effective bandwidth than accesses to more
+remote cells.
+
+For some architectures, such as x86, Linux will "hide" any node representing a
+physical cell that has no memory attached, and reassign any CPUs attached to
+that cell to a node representing a cell that does have memory.  Thus, on
+these architectures, one cannot assume that all CPUs that Linux associates with
+a given node will see the same local memory access times and bandwidth.
+
+In addition, for some architectures, again x86 is an example, Linux supports
+the emulation of additional nodes.  For NUMA emulation, linux will carve up
+the existing nodes--or the system memory for non-NUMA platforms--into multiple
+nodes.  Each emulated node will manage a fraction of the underlying cells'
+physical memory.  NUMA emluation is useful for testing NUMA kernel and
+application features on non-NUMA platforms, and as a sort of memory resource
+management mechanism when used together with cpusets.
+[see Documentation/cgroup-v1/cpusets.txt]
+
+For each node with memory, Linux constructs an independent memory management
+subsystem, complete with its own free page lists, in-use page lists, usage
+statistics and locks to mediate access.  In addition, Linux constructs for
+each memory zone [one or more of DMA, DMA32, NORMAL, HIGH_MEMORY, MOVABLE],
+an ordered "zonelist".  A zonelist specifies the zones/nodes to visit when a
+selected zone/node cannot satisfy the allocation request.  This situation,
+when a zone has no available memory to satisfy a request, is called
+"overflow" or "fallback".
+
+Because some nodes contain multiple zones containing different types of
+memory, Linux must decide whether to order the zonelists such that allocations
+fall back to the same zone type on a different node, or to a different zone
+type on the same node.  This is an important consideration because some zones,
+such as DMA or DMA32, represent relatively scarce resources.  Linux chooses
+a default Node ordered zonelist. This means it tries to fallback to other zones
+from the same node before using remote nodes which are ordered by NUMA distance.
+
+By default, Linux will attempt to satisfy memory allocation requests from the
+node to which the CPU that executes the request is assigned.  Specifically,
+Linux will attempt to allocate from the first node in the appropriate zonelist
+for the node where the request originates.  This is called "local allocation."
+If the "local" node cannot satisfy the request, the kernel will examine other
+nodes' zones in the selected zonelist looking for the first zone in the list
+that can satisfy the request.
+
+Local allocation will tend to keep subsequent access to the allocated memory
+"local" to the underlying physical resources and off the system interconnect--
+as long as the task on whose behalf the kernel allocated some memory does not
+later migrate away from that memory.  The Linux scheduler is aware of the
+NUMA topology of the platform--embodied in the "scheduling domains" data
+structures [see Documentation/scheduler/sched-domains.txt]--and the scheduler
+attempts to minimize task migration to distant scheduling domains.  However,
+the scheduler does not take a task's NUMA footprint into account directly.
+Thus, under sufficient imbalance, tasks can migrate between nodes, remote
+from their initial node and kernel data structures.
+
+System administrators and application designers can restrict a task's migration
+to improve NUMA locality using various CPU affinity command line interfaces,
+such as taskset(1) and numactl(1), and program interfaces such as
+sched_setaffinity(2).  Further, one can modify the kernel's default local
+allocation behavior using Linux NUMA memory policy.
+[see Documentation/admin-guide/mm/numa_memory_policy.rst.]
+
+System administrators can restrict the CPUs and nodes' memories that a non-
+privileged user can specify in the scheduling or NUMA commands and functions
+using control groups and CPUsets.  [see Documentation/cgroup-v1/cpusets.txt]
+
+On architectures that do not hide memoryless nodes, Linux will include only
+zones [nodes] with memory in the zonelists.  This means that for a memoryless
+node the "local memory node"--the node of the first zone in CPU's node's
+zonelist--will not be the node itself.  Rather, it will be the node that the
+kernel selected as the nearest node with memory when it built the zonelists.
+So, default, local allocations will succeed with the kernel supplying the
+closest available memory.  This is a consequence of the same mechanism that
+allows such allocations to fallback to other nearby nodes when a node that
+does contain memory overflows.
+
+Some kernel allocations do not want or cannot tolerate this allocation fallback
+behavior.  Rather they want to be sure they get memory from the specified node
+or get notified that the node has no free memory.  This is usually the case when
+a subsystem allocates per CPU memory resources, for example.
+
+A typical model for making such an allocation is to obtain the node id of the
+node to which the "current CPU" is attached using one of the kernel's
+numa_node_id() or CPU_to_node() functions and then request memory from only
+the node id returned.  When such an allocation fails, the requesting subsystem
+may revert to its own fallback path.  The slab kernel memory allocator is an
+example of this.  Or, the subsystem may choose to disable or not to enable
+itself on allocation failure.  The kernel profiling subsystem is an example of
+this.
+
+If the architecture supports--does not hide--memoryless nodes, then CPUs
+attached to memoryless nodes would always incur the fallback path overhead
+or some subsystems would fail to initialize if they attempted to allocated
+memory exclusively from a node without memory.  To support such
+architectures transparently, kernel subsystems can use the numa_mem_id()
+or cpu_to_mem() function to locate the "local memory node" for the calling or
+specified CPU.  Again, this is the same node from which default, local page
+allocations will be attempted.
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt
deleted file mode 100644
index 622b927..0000000
--- a/Documentation/vm/numa_memory_policy.txt
+++ /dev/null
@@ -1,452 +0,0 @@
-
-What is Linux Memory Policy?
-
-In the Linux kernel, "memory policy" determines from which node the kernel will
-allocate memory in a NUMA system or in an emulated NUMA system.  Linux has
-supported platforms with Non-Uniform Memory Access architectures since 2.4.?.
-The current memory policy support was added to Linux 2.6 around May 2004.  This
-document attempts to describe the concepts and APIs of the 2.6 memory policy
-support.
-
-Memory policies should not be confused with cpusets
-(Documentation/cgroup-v1/cpusets.txt)
-which is an administrative mechanism for restricting the nodes from which
-memory may be allocated by a set of processes. Memory policies are a
-programming interface that a NUMA-aware application can take advantage of.  When
-both cpusets and policies are applied to a task, the restrictions of the cpuset
-takes priority.  See "MEMORY POLICIES AND CPUSETS" below for more details.
-
-MEMORY POLICY CONCEPTS
-
-Scope of Memory Policies
-
-The Linux kernel supports _scopes_ of memory policy, described here from
-most general to most specific:
-
-    System Default Policy:  this policy is "hard coded" into the kernel.  It
-    is the policy that governs all page allocations that aren't controlled
-    by one of the more specific policy scopes discussed below.  When the
-    system is "up and running", the system default policy will use "local
-    allocation" described below.  However, during boot up, the system
-    default policy will be set to interleave allocations across all nodes
-    with "sufficient" memory, so as not to overload the initial boot node
-    with boot-time allocations.
-
-    Task/Process Policy:  this is an optional, per-task policy.  When defined
-    for a specific task, this policy controls all page allocations made by or
-    on behalf of the task that aren't controlled by a more specific scope.
-    If a task does not define a task policy, then all page allocations that
-    would have been controlled by the task policy "fall back" to the System
-    Default Policy.
-
-	The task policy applies to the entire address space of a task. Thus,
-	it is inheritable, and indeed is inherited, across both fork()
-	[clone() w/o the CLONE_VM flag] and exec*().  This allows a parent task
-	to establish the task policy for a child task exec()'d from an
-	executable image that has no awareness of memory policy.  See the
-	MEMORY POLICY APIS section, below, for an overview of the system call
-	that a task may use to set/change its task/process policy.
-
-	In a multi-threaded task, task policies apply only to the thread
-	[Linux kernel task] that installs the policy and any threads
-	subsequently created by that thread.  Any sibling threads existing
-	at the time a new task policy is installed retain their current
-	policy.
-
-	A task policy applies only to pages allocated after the policy is
-	installed.  Any pages already faulted in by the task when the task
-	changes its task policy remain where they were allocated based on
-	the policy at the time they were allocated.
-
-    VMA Policy:  A "VMA" or "Virtual Memory Area" refers to a range of a task's
-    virtual address space.  A task may define a specific policy for a range
-    of its virtual address space.   See the MEMORY POLICIES APIS section,
-    below, for an overview of the mbind() system call used to set a VMA
-    policy.
-
-    A VMA policy will govern the allocation of pages that back this region of
-    the address space.  Any regions of the task's address space that don't
-    have an explicit VMA policy will fall back to the task policy, which may
-    itself fall back to the System Default Policy.
-
-    VMA policies have a few complicating details:
-
-	VMA policy applies ONLY to anonymous pages.  These include pages
-	allocated for anonymous segments, such as the task stack and heap, and
-	any regions of the address space mmap()ed with the MAP_ANONYMOUS flag.
-	If a VMA policy is applied to a file mapping, it will be ignored if
-	the mapping used the MAP_SHARED flag.  If the file mapping used the
-	MAP_PRIVATE flag, the VMA policy will only be applied when an
-	anonymous page is allocated on an attempt to write to the mapping--
-	i.e., at Copy-On-Write.
-
-	VMA policies are shared between all tasks that share a virtual address
-	space--a.k.a. threads--independent of when the policy is installed; and
-	they are inherited across fork().  However, because VMA policies refer
-	to a specific region of a task's address space, and because the address
-	space is discarded and recreated on exec*(), VMA policies are NOT
-	inheritable across exec().  Thus, only NUMA-aware applications may
-	use VMA policies.
-
-	A task may install a new VMA policy on a sub-range of a previously
-	mmap()ed region.  When this happens, Linux splits the existing virtual
-	memory area into 2 or 3 VMAs, each with it's own policy.
-
-	By default, VMA policy applies only to pages allocated after the policy
-	is installed.  Any pages already faulted into the VMA range remain
-	where they were allocated based on the policy at the time they were
-	allocated.  However, since 2.6.16, Linux supports page migration via
-	the mbind() system call, so that page contents can be moved to match
-	a newly installed policy.
-
-    Shared Policy:  Conceptually, shared policies apply to "memory objects"
-    mapped shared into one or more tasks' distinct address spaces.  An
-    application installs a shared policies the same way as VMA policies--using
-    the mbind() system call specifying a range of virtual addresses that map
-    the shared object.  However, unlike VMA policies, which can be considered
-    to be an attribute of a range of a task's address space, shared policies
-    apply directly to the shared object.  Thus, all tasks that attach to the
-    object share the policy, and all pages allocated for the shared object,
-    by any task, will obey the shared policy.
-
-	As of 2.6.22, only shared memory segments, created by shmget() or
-	mmap(MAP_ANONYMOUS|MAP_SHARED), support shared policy.  When shared
-	policy support was added to Linux, the associated data structures were
-	added to hugetlbfs shmem segments.  At the time, hugetlbfs did not
-	support allocation at fault time--a.k.a lazy allocation--so hugetlbfs
-	shmem segments were never "hooked up" to the shared policy support.
-	Although hugetlbfs segments now support lazy allocation, their support
-	for shared policy has not been completed.
-
-	As mentioned above [re: VMA policies], allocations of page cache
-	pages for regular files mmap()ed with MAP_SHARED ignore any VMA
-	policy installed on the virtual address range backed by the shared
-	file mapping.  Rather, shared page cache pages, including pages backing
-	private mappings that have not yet been written by the task, follow
-	task policy, if any, else System Default Policy.
-
-	The shared policy infrastructure supports different policies on subset
-	ranges of the shared object.  However, Linux still splits the VMA of
-	the task that installs the policy for each range of distinct policy.
-	Thus, different tasks that attach to a shared memory segment can have
-	different VMA configurations mapping that one shared object.  This
-	can be seen by examining the /proc/<pid>/numa_maps of tasks sharing
-	a shared memory region, when one task has installed shared policy on
-	one or more ranges of the region.
-
-Components of Memory Policies
-
-    A Linux memory policy consists of a "mode", optional mode flags, and an
-    optional set of nodes.  The mode determines the behavior of the policy,
-    the optional mode flags determine the behavior of the mode, and the
-    optional set of nodes can be viewed as the arguments to the policy
-    behavior.
-
-   Internally, memory policies are implemented by a reference counted
-   structure, struct mempolicy.  Details of this structure will be discussed
-   in context, below, as required to explain the behavior.
-
-   Linux memory policy supports the following 4 behavioral modes:
-
-	Default Mode--MPOL_DEFAULT:  This mode is only used in the memory
-	policy APIs.  Internally, MPOL_DEFAULT is converted to the NULL
-	memory policy in all policy scopes.  Any existing non-default policy
-	will simply be removed when MPOL_DEFAULT is specified.  As a result,
-	MPOL_DEFAULT means "fall back to the next most specific policy scope."
-
-	    For example, a NULL or default task policy will fall back to the
-	    system default policy.  A NULL or default vma policy will fall
-	    back to the task policy.
-
-	    When specified in one of the memory policy APIs, the Default mode
-	    does not use the optional set of nodes.
-
-	    It is an error for the set of nodes specified for this policy to
-	    be non-empty.
-
-	MPOL_BIND:  This mode specifies that memory must come from the
-	set of nodes specified by the policy.  Memory will be allocated from
-	the node in the set with sufficient free memory that is closest to
-	the node where the allocation takes place.
-
-	MPOL_PREFERRED:  This mode specifies that the allocation should be
-	attempted from the single node specified in the policy.  If that
-	allocation fails, the kernel will search other nodes, in order of
-	increasing distance from the preferred node based on information
-	provided by the platform firmware.
-
-	    Internally, the Preferred policy uses a single node--the
-	    preferred_node member of struct mempolicy.  When the internal
-	    mode flag MPOL_F_LOCAL is set, the preferred_node is ignored and
-	    the policy is interpreted as local allocation.  "Local" allocation
-	    policy can be viewed as a Preferred policy that starts at the node
-	    containing the cpu where the allocation takes place.
-
-	    It is possible for the user to specify that local allocation is
-	    always preferred by passing an empty nodemask with this mode.
-	    If an empty nodemask is passed, the policy cannot use the
-	    MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags described
-	    below.
-
-	MPOL_INTERLEAVED:  This mode specifies that page allocations be
-	interleaved, on a page granularity, across the nodes specified in
-	the policy.  This mode also behaves slightly differently, based on
-	the context where it is used:
-
-	    For allocation of anonymous pages and shared memory pages,
-	    Interleave mode indexes the set of nodes specified by the policy
-	    using the page offset of the faulting address into the segment
-	    [VMA] containing the address modulo the number of nodes specified
-	    by the policy.  It then attempts to allocate a page, starting at
-	    the selected node, as if the node had been specified by a Preferred
-	    policy or had been selected by a local allocation.  That is,
-	    allocation will follow the per node zonelist.
-
-	    For allocation of page cache pages, Interleave mode indexes the set
-	    of nodes specified by the policy using a node counter maintained
-	    per task.  This counter wraps around to the lowest specified node
-	    after it reaches the highest specified node.  This will tend to
-	    spread the pages out over the nodes specified by the policy based
-	    on the order in which they are allocated, rather than based on any
-	    page offset into an address range or file.  During system boot up,
-	    the temporary interleaved system default policy works in this
-	    mode.
-
-   Linux memory policy supports the following optional mode flags:
-
-	MPOL_F_STATIC_NODES:  This flag specifies that the nodemask passed by
-	the user should not be remapped if the task or VMA's set of allowed
-	nodes changes after the memory policy has been defined.
-
-	    Without this flag, anytime a mempolicy is rebound because of a
-	    change in the set of allowed nodes, the node (Preferred) or
-	    nodemask (Bind, Interleave) is remapped to the new set of
-	    allowed nodes.  This may result in nodes being used that were
-	    previously undesired.
-
-	    With this flag, if the user-specified nodes overlap with the
-	    nodes allowed by the task's cpuset, then the memory policy is
-	    applied to their intersection.  If the two sets of nodes do not
-	    overlap, the Default policy is used.
-
-	    For example, consider a task that is attached to a cpuset with
-	    mems 1-3 that sets an Interleave policy over the same set.  If
-	    the cpuset's mems change to 3-5, the Interleave will now occur
-	    over nodes 3, 4, and 5.  With this flag, however, since only node
-	    3 is allowed from the user's nodemask, the "interleave" only
-	    occurs over that node.  If no nodes from the user's nodemask are
-	    now allowed, the Default behavior is used.
-
-	    MPOL_F_STATIC_NODES cannot be combined with the
-	    MPOL_F_RELATIVE_NODES flag.  It also cannot be used for
-	    MPOL_PREFERRED policies that were created with an empty nodemask
-	    (local allocation).
-
-	MPOL_F_RELATIVE_NODES:  This flag specifies that the nodemask passed
-	by the user will be mapped relative to the set of the task or VMA's
-	set of allowed nodes.  The kernel stores the user-passed nodemask,
-	and if the allowed nodes changes, then that original nodemask will
-	be remapped relative to the new set of allowed nodes.
-
-	    Without this flag (and without MPOL_F_STATIC_NODES), anytime a
-	    mempolicy is rebound because of a change in the set of allowed
-	    nodes, the node (Preferred) or nodemask (Bind, Interleave) is
-	    remapped to the new set of allowed nodes.  That remap may not
-	    preserve the relative nature of the user's passed nodemask to its
-	    set of allowed nodes upon successive rebinds: a nodemask of
-	    1,3,5 may be remapped to 7-9 and then to 1-3 if the set of
-	    allowed nodes is restored to its original state.
-
-	    With this flag, the remap is done so that the node numbers from
-	    the user's passed nodemask are relative to the set of allowed
-	    nodes.  In other words, if nodes 0, 2, and 4 are set in the user's
-	    nodemask, the policy will be effected over the first (and in the
-	    Bind or Interleave case, the third and fifth) nodes in the set of
-	    allowed nodes.  The nodemask passed by the user represents nodes
-	    relative to task or VMA's set of allowed nodes.
-
-	    If the user's nodemask includes nodes that are outside the range
-	    of the new set of allowed nodes (for example, node 5 is set in
-	    the user's nodemask when the set of allowed nodes is only 0-3),
-	    then the remap wraps around to the beginning of the nodemask and,
-	    if not already set, sets the node in the mempolicy nodemask.
-
-	    For example, consider a task that is attached to a cpuset with
-	    mems 2-5 that sets an Interleave policy over the same set with
-	    MPOL_F_RELATIVE_NODES.  If the cpuset's mems change to 3-7, the
-	    interleave now occurs over nodes 3,5-7.  If the cpuset's mems
-	    then change to 0,2-3,5, then the interleave occurs over nodes
-	    0,2-3,5.
-
-	    Thanks to the consistent remapping, applications preparing
-	    nodemasks to specify memory policies using this flag should
-	    disregard their current, actual cpuset imposed memory placement
-	    and prepare the nodemask as if they were always located on
-	    memory nodes 0 to N-1, where N is the number of memory nodes the
-	    policy is intended to manage.  Let the kernel then remap to the
-	    set of memory nodes allowed by the task's cpuset, as that may
-	    change over time.
-
-	    MPOL_F_RELATIVE_NODES cannot be combined with the
-	    MPOL_F_STATIC_NODES flag.  It also cannot be used for
-	    MPOL_PREFERRED policies that were created with an empty nodemask
-	    (local allocation).
-
-MEMORY POLICY REFERENCE COUNTING
-
-To resolve use/free races, struct mempolicy contains an atomic reference
-count field.  Internal interfaces, mpol_get()/mpol_put() increment and
-decrement this reference count, respectively.  mpol_put() will only free
-the structure back to the mempolicy kmem cache when the reference count
-goes to zero.
-
-When a new memory policy is allocated, its reference count is initialized
-to '1', representing the reference held by the task that is installing the
-new policy.  When a pointer to a memory policy structure is stored in another
-structure, another reference is added, as the task's reference will be dropped
-on completion of the policy installation.
-
-During run-time "usage" of the policy, we attempt to minimize atomic operations
-on the reference count, as this can lead to cache lines bouncing between cpus
-and NUMA nodes.  "Usage" here means one of the following:
-
-1) querying of the policy, either by the task itself [using the get_mempolicy()
-   API discussed below] or by another task using the /proc/<pid>/numa_maps
-   interface.
-
-2) examination of the policy to determine the policy mode and associated node
-   or node lists, if any, for page allocation.  This is considered a "hot
-   path".  Note that for MPOL_BIND, the "usage" extends across the entire
-   allocation process, which may sleep during page reclaimation, because the
-   BIND policy nodemask is used, by reference, to filter ineligible nodes.
-
-We can avoid taking an extra reference during the usages listed above as
-follows:
-
-1) we never need to get/free the system default policy as this is never
-   changed nor freed, once the system is up and running.
-
-2) for querying the policy, we do not need to take an extra reference on the
-   target task's task policy nor vma policies because we always acquire the
-   task's mm's mmap_sem for read during the query.  The set_mempolicy() and
-   mbind() APIs [see below] always acquire the mmap_sem for write when
-   installing or replacing task or vma policies.  Thus, there is no possibility
-   of a task or thread freeing a policy while another task or thread is
-   querying it.
-
-3) Page allocation usage of task or vma policy occurs in the fault path where
-   we hold them mmap_sem for read.  Again, because replacing the task or vma
-   policy requires that the mmap_sem be held for write, the policy can't be
-   freed out from under us while we're using it for page allocation.
-
-4) Shared policies require special consideration.  One task can replace a
-   shared memory policy while another task, with a distinct mmap_sem, is
-   querying or allocating a page based on the policy.  To resolve this
-   potential race, the shared policy infrastructure adds an extra reference
-   to the shared policy during lookup while holding a spin lock on the shared
-   policy management structure.  This requires that we drop this extra
-   reference when we're finished "using" the policy.  We must drop the
-   extra reference on shared policies in the same query/allocation paths
-   used for non-shared policies.  For this reason, shared policies are marked
-   as such, and the extra reference is dropped "conditionally"--i.e., only
-   for shared policies.
-
-   Because of this extra reference counting, and because we must lookup
-   shared policies in a tree structure under spinlock, shared policies are
-   more expensive to use in the page allocation path.  This is especially
-   true for shared policies on shared memory regions shared by tasks running
-   on different NUMA nodes.  This extra overhead can be avoided by always
-   falling back to task or system default policy for shared memory regions,
-   or by prefaulting the entire shared memory region into memory and locking
-   it down.  However, this might not be appropriate for all applications.
-
-MEMORY POLICY APIs
-
-Linux supports 3 system calls for controlling memory policy.  These APIS
-always affect only the calling task, the calling task's address space, or
-some shared object mapped into the calling task's address space.
-
-	Note:  the headers that define these APIs and the parameter data types
-	for user space applications reside in a package that is not part of
-	the Linux kernel.  The kernel system call interfaces, with the 'sys_'
-	prefix, are defined in <linux/syscalls.h>; the mode and flag
-	definitions are defined in <linux/mempolicy.h>.
-
-Set [Task] Memory Policy:
-
-	long set_mempolicy(int mode, const unsigned long *nmask,
-					unsigned long maxnode);
-
-	Set's the calling task's "task/process memory policy" to mode
-	specified by the 'mode' argument and the set of nodes defined
-	by 'nmask'.  'nmask' points to a bit mask of node ids containing
-	at least 'maxnode' ids.  Optional mode flags may be passed by
-	combining the 'mode' argument with the flag (for example:
-	MPOL_INTERLEAVE | MPOL_F_STATIC_NODES).
-
-	See the set_mempolicy(2) man page for more details
-
-
-Get [Task] Memory Policy or Related Information
-
-	long get_mempolicy(int *mode,
-			   const unsigned long *nmask, unsigned long maxnode,
-			   void *addr, int flags);
-
-	Queries the "task/process memory policy" of the calling task, or
-	the policy or location of a specified virtual address, depending
-	on the 'flags' argument.
-
-	See the get_mempolicy(2) man page for more details
-
-
-Install VMA/Shared Policy for a Range of Task's Address Space
-
-	long mbind(void *start, unsigned long len, int mode,
-		   const unsigned long *nmask, unsigned long maxnode,
-		   unsigned flags);
-
-	mbind() installs the policy specified by (mode, nmask, maxnodes) as
-	a VMA policy for the range of the calling task's address space
-	specified by the 'start' and 'len' arguments.  Additional actions
-	may be requested via the 'flags' argument.
-
-	See the mbind(2) man page for more details.
-
-MEMORY POLICY COMMAND LINE INTERFACE
-
-Although not strictly part of the Linux implementation of memory policy,
-a command line tool, numactl(8), exists that allows one to:
-
-+ set the task policy for a specified program via set_mempolicy(2), fork(2) and
-  exec(2)
-
-+ set the shared policy for a shared memory segment via mbind(2)
-
-The numactl(8) tool is packaged with the run-time version of the library
-containing the memory policy system call wrappers.  Some distributions
-package the headers and compile-time libraries in a separate development
-package.
-
-
-MEMORY POLICIES AND CPUSETS
-
-Memory policies work within cpusets as described above.  For memory policies
-that require a node or set of nodes, the nodes are restricted to the set of
-nodes whose memories are allowed by the cpuset constraints.  If the nodemask
-specified for the policy contains nodes that are not allowed by the cpuset and
-MPOL_F_RELATIVE_NODES is not used, the intersection of the set of nodes
-specified for the policy and the set of nodes with memory is used.  If the
-result is the empty set, the policy is considered invalid and cannot be
-installed.  If MPOL_F_RELATIVE_NODES is used, the policy's nodes are mapped
-onto and folded into the task's set of allowed nodes as previously described.
-
-The interaction of memory policies and cpusets can be problematic when tasks
-in two cpusets share access to a memory region, such as shared memory segments
-created by shmget() of mmap() with the MAP_ANONYMOUS and MAP_SHARED flags, and
-any of the tasks install shared policy on the region, only nodes whose
-memories are allowed in both cpusets may be used in the policies.  Obtaining
-this information requires "stepping outside" the memory policy APIs to use the
-cpuset information and requires that one know in what cpusets other task might
-be attaching to the shared region.  Furthermore, if the cpusets' allowed
-memory sets are disjoint, "local" allocation is the only valid policy.
diff --git a/Documentation/vm/overcommit-accounting b/Documentation/vm/overcommit-accounting
deleted file mode 100644
index cbfaaa6..0000000
--- a/Documentation/vm/overcommit-accounting
+++ /dev/null
@@ -1,80 +0,0 @@
-The Linux kernel supports the following overcommit handling modes
-
-0	-	Heuristic overcommit handling. Obvious overcommits of
-		address space are refused. Used for a typical system. It
-		ensures a seriously wild allocation fails while allowing
-		overcommit to reduce swap usage.  root is allowed to 
-		allocate slightly more memory in this mode. This is the 
-		default.
-
-1	-	Always overcommit. Appropriate for some scientific
-		applications. Classic example is code using sparse arrays
-		and just relying on the virtual memory consisting almost
-		entirely of zero pages.
-
-2	-	Don't overcommit. The total address space commit
-		for the system is not permitted to exceed swap + a
-		configurable amount (default is 50%) of physical RAM.
-		Depending on the amount you use, in most situations
-		this means a process will not be killed while accessing
-		pages but will receive errors on memory allocation as
-		appropriate.
-
-		Useful for applications that want to guarantee their
-		memory allocations will be available in the future
-		without having to initialize every page.
-
-The overcommit policy is set via the sysctl `vm.overcommit_memory'.
-
-The overcommit amount can be set via `vm.overcommit_ratio' (percentage)
-or `vm.overcommit_kbytes' (absolute value).
-
-The current overcommit limit and amount committed are viewable in
-/proc/meminfo as CommitLimit and Committed_AS respectively.
-
-Gotchas
--------
-
-The C language stack growth does an implicit mremap. If you want absolute
-guarantees and run close to the edge you MUST mmap your stack for the 
-largest size you think you will need. For typical stack usage this does
-not matter much but it's a corner case if you really really care
-
-In mode 2 the MAP_NORESERVE flag is ignored. 
-
-
-How It Works
-------------
-
-The overcommit is based on the following rules
-
-For a file backed map
-	SHARED or READ-only	-	0 cost (the file is the map not swap)
-	PRIVATE WRITABLE	-	size of mapping per instance
-
-For an anonymous or /dev/zero map
-	SHARED			-	size of mapping
-	PRIVATE READ-only	-	0 cost (but of little use)
-	PRIVATE WRITABLE	-	size of mapping per instance
-
-Additional accounting
-	Pages made writable copies by mmap
-	shmfs memory drawn from the same pool
-
-Status
-------
-
-o	We account mmap memory mappings
-o	We account mprotect changes in commit
-o	We account mremap changes in size
-o	We account brk
-o	We account munmap
-o	We report the commit status in /proc
-o	Account and check on fork
-o	Review stack handling/building on exec
-o	SHMfs accounting
-o	Implement actual limit enforcement
-
-To Do
------
-o	Account ptrace pages (this is hard)
diff --git a/Documentation/vm/overcommit-accounting.rst b/Documentation/vm/overcommit-accounting.rst
new file mode 100644
index 0000000..0dd54bb
--- /dev/null
+++ b/Documentation/vm/overcommit-accounting.rst
@@ -0,0 +1,87 @@
+.. _overcommit_accounting:
+
+=====================
+Overcommit Accounting
+=====================
+
+The Linux kernel supports the following overcommit handling modes
+
+0
+	Heuristic overcommit handling. Obvious overcommits of address
+	space are refused. Used for a typical system. It ensures a
+	seriously wild allocation fails while allowing overcommit to
+	reduce swap usage.  root is allowed to allocate slightly more
+	memory in this mode. This is the default.
+
+1
+	Always overcommit. Appropriate for some scientific
+	applications. Classic example is code using sparse arrays and
+	just relying on the virtual memory consisting almost entirely
+	of zero pages.
+
+2
+	Don't overcommit. The total address space commit for the
+	system is not permitted to exceed swap + a configurable amount
+	(default is 50%) of physical RAM.  Depending on the amount you
+	use, in most situations this means a process will not be
+	killed while accessing pages but will receive errors on memory
+	allocation as appropriate.
+
+	Useful for applications that want to guarantee their memory
+	allocations will be available in the future without having to
+	initialize every page.
+
+The overcommit policy is set via the sysctl ``vm.overcommit_memory``.
+
+The overcommit amount can be set via ``vm.overcommit_ratio`` (percentage)
+or ``vm.overcommit_kbytes`` (absolute value).
+
+The current overcommit limit and amount committed are viewable in
+``/proc/meminfo`` as CommitLimit and Committed_AS respectively.
+
+Gotchas
+=======
+
+The C language stack growth does an implicit mremap. If you want absolute
+guarantees and run close to the edge you MUST mmap your stack for the
+largest size you think you will need. For typical stack usage this does
+not matter much but it's a corner case if you really really care
+
+In mode 2 the MAP_NORESERVE flag is ignored.
+
+
+How It Works
+============
+
+The overcommit is based on the following rules
+
+For a file backed map
+	| SHARED or READ-only	-	0 cost (the file is the map not swap)
+	| PRIVATE WRITABLE	-	size of mapping per instance
+
+For an anonymous or ``/dev/zero`` map
+	| SHARED			-	size of mapping
+	| PRIVATE READ-only	-	0 cost (but of little use)
+	| PRIVATE WRITABLE	-	size of mapping per instance
+
+Additional accounting
+	| Pages made writable copies by mmap
+	| shmfs memory drawn from the same pool
+
+Status
+======
+
+*	We account mmap memory mappings
+*	We account mprotect changes in commit
+*	We account mremap changes in size
+*	We account brk
+*	We account munmap
+*	We report the commit status in /proc
+*	Account and check on fork
+*	Review stack handling/building on exec
+*	SHMfs accounting
+*	Implement actual limit enforcement
+
+To Do
+=====
+*	Account ptrace pages (this is hard)
diff --git a/Documentation/vm/page_frags b/Documentation/vm/page_frags
deleted file mode 100644
index a671456..0000000
--- a/Documentation/vm/page_frags
+++ /dev/null
@@ -1,42 +0,0 @@
-Page fragments
---------------
-
-A page fragment is an arbitrary-length arbitrary-offset area of memory
-which resides within a 0 or higher order compound page.  Multiple
-fragments within that page are individually refcounted, in the page's
-reference counter.
-
-The page_frag functions, page_frag_alloc and page_frag_free, provide a
-simple allocation framework for page fragments.  This is used by the
-network stack and network device drivers to provide a backing region of
-memory for use as either an sk_buff->head, or to be used in the "frags"
-portion of skb_shared_info.
-
-In order to make use of the page fragment APIs a backing page fragment
-cache is needed.  This provides a central point for the fragment allocation
-and tracks allows multiple calls to make use of a cached page.  The
-advantage to doing this is that multiple calls to get_page can be avoided
-which can be expensive at allocation time.  However due to the nature of
-this caching it is required that any calls to the cache be protected by
-either a per-cpu limitation, or a per-cpu limitation and forcing interrupts
-to be disabled when executing the fragment allocation.
-
-The network stack uses two separate caches per CPU to handle fragment
-allocation.  The netdev_alloc_cache is used by callers making use of the
-__netdev_alloc_frag and __netdev_alloc_skb calls.  The napi_alloc_cache is
-used by callers of the __napi_alloc_frag and __napi_alloc_skb calls.  The
-main difference between these two calls is the context in which they may be
-called.  The "netdev" prefixed functions are usable in any context as these
-functions will disable interrupts, while the "napi" prefixed functions are
-only usable within the softirq context.
-
-Many network device drivers use a similar methodology for allocating page
-fragments, but the page fragments are cached at the ring or descriptor
-level.  In order to enable these cases it is necessary to provide a generic
-way of tearing down a page cache.  For this reason __page_frag_cache_drain
-was implemented.  It allows for freeing multiple references from a single
-page via a single call.  The advantage to doing this is that it allows for
-cleaning up the multiple references that were added to a page in order to
-avoid calling get_page per allocation.
-
-Alexander Duyck, Nov 29, 2016.
diff --git a/Documentation/vm/page_frags.rst b/Documentation/vm/page_frags.rst
new file mode 100644
index 0000000..637cc49
--- /dev/null
+++ b/Documentation/vm/page_frags.rst
@@ -0,0 +1,45 @@
+.. _page_frags:
+
+==============
+Page fragments
+==============
+
+A page fragment is an arbitrary-length arbitrary-offset area of memory
+which resides within a 0 or higher order compound page.  Multiple
+fragments within that page are individually refcounted, in the page's
+reference counter.
+
+The page_frag functions, page_frag_alloc and page_frag_free, provide a
+simple allocation framework for page fragments.  This is used by the
+network stack and network device drivers to provide a backing region of
+memory for use as either an sk_buff->head, or to be used in the "frags"
+portion of skb_shared_info.
+
+In order to make use of the page fragment APIs a backing page fragment
+cache is needed.  This provides a central point for the fragment allocation
+and tracks allows multiple calls to make use of a cached page.  The
+advantage to doing this is that multiple calls to get_page can be avoided
+which can be expensive at allocation time.  However due to the nature of
+this caching it is required that any calls to the cache be protected by
+either a per-cpu limitation, or a per-cpu limitation and forcing interrupts
+to be disabled when executing the fragment allocation.
+
+The network stack uses two separate caches per CPU to handle fragment
+allocation.  The netdev_alloc_cache is used by callers making use of the
+__netdev_alloc_frag and __netdev_alloc_skb calls.  The napi_alloc_cache is
+used by callers of the __napi_alloc_frag and __napi_alloc_skb calls.  The
+main difference between these two calls is the context in which they may be
+called.  The "netdev" prefixed functions are usable in any context as these
+functions will disable interrupts, while the "napi" prefixed functions are
+only usable within the softirq context.
+
+Many network device drivers use a similar methodology for allocating page
+fragments, but the page fragments are cached at the ring or descriptor
+level.  In order to enable these cases it is necessary to provide a generic
+way of tearing down a page cache.  For this reason __page_frag_cache_drain
+was implemented.  It allows for freeing multiple references from a single
+page via a single call.  The advantage to doing this is that it allows for
+cleaning up the multiple references that were added to a page in order to
+avoid calling get_page per allocation.
+
+Alexander Duyck, Nov 29, 2016.
diff --git a/Documentation/vm/page_migration b/Documentation/vm/page_migration
deleted file mode 100644
index 4968680..0000000
--- a/Documentation/vm/page_migration
+++ /dev/null
@@ -1,252 +0,0 @@
-Page migration
---------------
-
-Page migration allows the moving of the physical location of pages between
-nodes in a numa system while the process is running. This means that the
-virtual addresses that the process sees do not change. However, the
-system rearranges the physical location of those pages.
-
-The main intend of page migration is to reduce the latency of memory access
-by moving pages near to the processor where the process accessing that memory
-is running.
-
-Page migration allows a process to manually relocate the node on which its
-pages are located through the MF_MOVE and MF_MOVE_ALL options while setting
-a new memory policy via mbind(). The pages of process can also be relocated
-from another process using the sys_migrate_pages() function call. The
-migrate_pages function call takes two sets of nodes and moves pages of a
-process that are located on the from nodes to the destination nodes.
-Page migration functions are provided by the numactl package by Andi Kleen
-(a version later than 0.9.3 is required. Get it from
-ftp://oss.sgi.com/www/projects/libnuma/download/). numactl provides libnuma
-which provides an interface similar to other numa functionality for page
-migration.  cat /proc/<pid>/numa_maps allows an easy review of where the
-pages of a process are located. See also the numa_maps documentation in the
-proc(5) man page.
-
-Manual migration is useful if for example the scheduler has relocated
-a process to a processor on a distant node. A batch scheduler or an
-administrator may detect the situation and move the pages of the process
-nearer to the new processor. The kernel itself does only provide
-manual page migration support. Automatic page migration may be implemented
-through user space processes that move pages. A special function call
-"move_pages" allows the moving of individual pages within a process.
-A NUMA profiler may f.e. obtain a log showing frequent off node
-accesses and may use the result to move pages to more advantageous
-locations.
-
-Larger installations usually partition the system using cpusets into
-sections of nodes. Paul Jackson has equipped cpusets with the ability to
-move pages when a task is moved to another cpuset (See
-Documentation/cgroup-v1/cpusets.txt).
-Cpusets allows the automation of process locality. If a task is moved to
-a new cpuset then also all its pages are moved with it so that the
-performance of the process does not sink dramatically. Also the pages
-of processes in a cpuset are moved if the allowed memory nodes of a
-cpuset are changed.
-
-Page migration allows the preservation of the relative location of pages
-within a group of nodes for all migration techniques which will preserve a
-particular memory allocation pattern generated even after migrating a
-process. This is necessary in order to preserve the memory latencies.
-Processes will run with similar performance after migration.
-
-Page migration occurs in several steps. First a high level
-description for those trying to use migrate_pages() from the kernel
-(for userspace usage see the Andi Kleen's numactl package mentioned above)
-and then a low level description of how the low level details work.
-
-A. In kernel use of migrate_pages()
------------------------------------
-
-1. Remove pages from the LRU.
-
-   Lists of pages to be migrated are generated by scanning over
-   pages and moving them into lists. This is done by
-   calling isolate_lru_page().
-   Calling isolate_lru_page increases the references to the page
-   so that it cannot vanish while the page migration occurs.
-   It also prevents the swapper or other scans to encounter
-   the page.
-
-2. We need to have a function of type new_page_t that can be
-   passed to migrate_pages(). This function should figure out
-   how to allocate the correct new page given the old page.
-
-3. The migrate_pages() function is called which attempts
-   to do the migration. It will call the function to allocate
-   the new page for each page that is considered for
-   moving.
-
-B. How migrate_pages() works
-----------------------------
-
-migrate_pages() does several passes over its list of pages. A page is moved
-if all references to a page are removable at the time. The page has
-already been removed from the LRU via isolate_lru_page() and the refcount
-is increased so that the page cannot be freed while page migration occurs.
-
-Steps:
-
-1. Lock the page to be migrated
-
-2. Ensure that writeback is complete.
-
-3. Lock the new page that we want to move to. It is locked so that accesses to
-   this (not yet uptodate) page immediately lock while the move is in progress.
-
-4. All the page table references to the page are converted to migration
-   entries. This decreases the mapcount of a page. If the resulting
-   mapcount is not zero then we do not migrate the page. All user space
-   processes that attempt to access the page will now wait on the page lock.
-
-5. The i_pages lock is taken. This will cause all processes trying
-   to access the page via the mapping to block on the spinlock.
-
-6. The refcount of the page is examined and we back out if references remain
-   otherwise we know that we are the only one referencing this page.
-
-7. The radix tree is checked and if it does not contain the pointer to this
-   page then we back out because someone else modified the radix tree.
-
-8. The new page is prepped with some settings from the old page so that
-   accesses to the new page will discover a page with the correct settings.
-
-9. The radix tree is changed to point to the new page.
-
-10. The reference count of the old page is dropped because the address space
-    reference is gone. A reference to the new page is established because
-    the new page is referenced by the address space.
-
-11. The i_pages lock is dropped. With that lookups in the mapping
-    become possible again. Processes will move from spinning on the lock
-    to sleeping on the locked new page.
-
-12. The page contents are copied to the new page.
-
-13. The remaining page flags are copied to the new page.
-
-14. The old page flags are cleared to indicate that the page does
-    not provide any information anymore.
-
-15. Queued up writeback on the new page is triggered.
-
-16. If migration entries were page then replace them with real ptes. Doing
-    so will enable access for user space processes not already waiting for
-    the page lock.
-
-19. The page locks are dropped from the old and new page.
-    Processes waiting on the page lock will redo their page faults
-    and will reach the new page.
-
-20. The new page is moved to the LRU and can be scanned by the swapper
-    etc again.
-
-C. Non-LRU page migration
--------------------------
-
-Although original migration aimed for reducing the latency of memory access
-for NUMA, compaction who want to create high-order page is also main customer.
-
-Current problem of the implementation is that it is designed to migrate only
-*LRU* pages. However, there are potential non-lru pages which can be migrated
-in drivers, for example, zsmalloc, virtio-balloon pages.
-
-For virtio-balloon pages, some parts of migration code path have been hooked
-up and added virtio-balloon specific functions to intercept migration logics.
-It's too specific to a driver so other drivers who want to make their pages
-movable would have to add own specific hooks in migration path.
-
-To overclome the problem, VM supports non-LRU page migration which provides
-generic functions for non-LRU movable pages without driver specific hooks
-migration path.
-
-If a driver want to make own pages movable, it should define three functions
-which are function pointers of struct address_space_operations.
-
-1. bool (*isolate_page) (struct page *page, isolate_mode_t mode);
-
-What VM expects on isolate_page function of driver is to return *true*
-if driver isolates page successfully. On returing true, VM marks the page
-as PG_isolated so concurrent isolation in several CPUs skip the page
-for isolation. If a driver cannot isolate the page, it should return *false*.
-
-Once page is successfully isolated, VM uses page.lru fields so driver
-shouldn't expect to preserve values in that fields.
-
-2. int (*migratepage) (struct address_space *mapping,
-		struct page *newpage, struct page *oldpage, enum migrate_mode);
-
-After isolation, VM calls migratepage of driver with isolated page.
-The function of migratepage is to move content of the old page to new page
-and set up fields of struct page newpage. Keep in mind that you should
-indicate to the VM the oldpage is no longer movable via __ClearPageMovable()
-under page_lock if you migrated the oldpage successfully and returns
-MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver
-can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time
-because VM interprets -EAGAIN as "temporal migration failure". On returning
-any error except -EAGAIN, VM will give up the page migration without retrying
-in this time.
-
-Driver shouldn't touch page.lru field VM using in the functions.
-
-3. void (*putback_page)(struct page *);
-
-If migration fails on isolated page, VM should return the isolated page
-to the driver so VM calls driver's putback_page with migration failed page.
-In this function, driver should put the isolated page back to the own data
-structure.
-
-4. non-lru movable page flags
-
-There are two page flags for supporting non-lru movable page.
-
-* PG_movable
-
-Driver should use the below function to make page movable under page_lock.
-
-	void __SetPageMovable(struct page *page, struct address_space *mapping)
-
-It needs argument of address_space for registering migration family functions
-which will be called by VM. Exactly speaking, PG_movable is not a real flag of
-struct page. Rather than, VM reuses page->mapping's lower bits to represent it.
-
-	#define PAGE_MAPPING_MOVABLE 0x2
-	page->mapping = page->mapping | PAGE_MAPPING_MOVABLE;
-
-so driver shouldn't access page->mapping directly. Instead, driver should
-use page_mapping which mask off the low two bits of page->mapping under
-page lock so it can get right struct address_space.
-
-For testing of non-lru movable page, VM supports __PageMovable function.
-However, it doesn't guarantee to identify non-lru movable page because
-page->mapping field is unified with other variables in struct page.
-As well, if driver releases the page after isolation by VM, page->mapping
-doesn't have stable value although it has PAGE_MAPPING_MOVABLE
-(Look at __ClearPageMovable). But __PageMovable is cheap to catch whether
-page is LRU or non-lru movable once the page has been isolated. Because
-LRU pages never can have PAGE_MAPPING_MOVABLE in page->mapping. It is also
-good for just peeking to test non-lru movable pages before more expensive
-checking with lock_page in pfn scanning to select victim.
-
-For guaranteeing non-lru movable page, VM provides PageMovable function.
-Unlike __PageMovable, PageMovable functions validates page->mapping and
-mapping->a_ops->isolate_page under lock_page. The lock_page prevents sudden
-destroying of page->mapping.
-
-Driver using __SetPageMovable should clear the flag via __ClearMovablePage
-under page_lock before the releasing the page.
-
-* PG_isolated
-
-To prevent concurrent isolation among several CPUs, VM marks isolated page
-as PG_isolated under lock_page. So if a CPU encounters PG_isolated non-lru
-movable page, it can skip it. Driver doesn't need to manipulate the flag
-because VM will set/clear it automatically. Keep in mind that if driver
-sees PG_isolated page, it means the page have been isolated by VM so it
-shouldn't touch page.lru field.
-PG_isolated is alias with PG_reclaim flag so driver shouldn't use the flag
-for own purpose.
-
-Christoph Lameter, May 8, 2006.
-Minchan Kim, Mar 28, 2016.
diff --git a/Documentation/vm/page_migration.rst b/Documentation/vm/page_migration.rst
new file mode 100644
index 0000000..f68d613
--- /dev/null
+++ b/Documentation/vm/page_migration.rst
@@ -0,0 +1,257 @@
+.. _page_migration:
+
+==============
+Page migration
+==============
+
+Page migration allows the moving of the physical location of pages between
+nodes in a numa system while the process is running. This means that the
+virtual addresses that the process sees do not change. However, the
+system rearranges the physical location of those pages.
+
+The main intend of page migration is to reduce the latency of memory access
+by moving pages near to the processor where the process accessing that memory
+is running.
+
+Page migration allows a process to manually relocate the node on which its
+pages are located through the MF_MOVE and MF_MOVE_ALL options while setting
+a new memory policy via mbind(). The pages of process can also be relocated
+from another process using the sys_migrate_pages() function call. The
+migrate_pages function call takes two sets of nodes and moves pages of a
+process that are located on the from nodes to the destination nodes.
+Page migration functions are provided by the numactl package by Andi Kleen
+(a version later than 0.9.3 is required. Get it from
+ftp://oss.sgi.com/www/projects/libnuma/download/). numactl provides libnuma
+which provides an interface similar to other numa functionality for page
+migration.  cat ``/proc/<pid>/numa_maps`` allows an easy review of where the
+pages of a process are located. See also the numa_maps documentation in the
+proc(5) man page.
+
+Manual migration is useful if for example the scheduler has relocated
+a process to a processor on a distant node. A batch scheduler or an
+administrator may detect the situation and move the pages of the process
+nearer to the new processor. The kernel itself does only provide
+manual page migration support. Automatic page migration may be implemented
+through user space processes that move pages. A special function call
+"move_pages" allows the moving of individual pages within a process.
+A NUMA profiler may f.e. obtain a log showing frequent off node
+accesses and may use the result to move pages to more advantageous
+locations.
+
+Larger installations usually partition the system using cpusets into
+sections of nodes. Paul Jackson has equipped cpusets with the ability to
+move pages when a task is moved to another cpuset (See
+Documentation/cgroup-v1/cpusets.txt).
+Cpusets allows the automation of process locality. If a task is moved to
+a new cpuset then also all its pages are moved with it so that the
+performance of the process does not sink dramatically. Also the pages
+of processes in a cpuset are moved if the allowed memory nodes of a
+cpuset are changed.
+
+Page migration allows the preservation of the relative location of pages
+within a group of nodes for all migration techniques which will preserve a
+particular memory allocation pattern generated even after migrating a
+process. This is necessary in order to preserve the memory latencies.
+Processes will run with similar performance after migration.
+
+Page migration occurs in several steps. First a high level
+description for those trying to use migrate_pages() from the kernel
+(for userspace usage see the Andi Kleen's numactl package mentioned above)
+and then a low level description of how the low level details work.
+
+In kernel use of migrate_pages()
+================================
+
+1. Remove pages from the LRU.
+
+   Lists of pages to be migrated are generated by scanning over
+   pages and moving them into lists. This is done by
+   calling isolate_lru_page().
+   Calling isolate_lru_page increases the references to the page
+   so that it cannot vanish while the page migration occurs.
+   It also prevents the swapper or other scans to encounter
+   the page.
+
+2. We need to have a function of type new_page_t that can be
+   passed to migrate_pages(). This function should figure out
+   how to allocate the correct new page given the old page.
+
+3. The migrate_pages() function is called which attempts
+   to do the migration. It will call the function to allocate
+   the new page for each page that is considered for
+   moving.
+
+How migrate_pages() works
+=========================
+
+migrate_pages() does several passes over its list of pages. A page is moved
+if all references to a page are removable at the time. The page has
+already been removed from the LRU via isolate_lru_page() and the refcount
+is increased so that the page cannot be freed while page migration occurs.
+
+Steps:
+
+1. Lock the page to be migrated
+
+2. Ensure that writeback is complete.
+
+3. Lock the new page that we want to move to. It is locked so that accesses to
+   this (not yet uptodate) page immediately lock while the move is in progress.
+
+4. All the page table references to the page are converted to migration
+   entries. This decreases the mapcount of a page. If the resulting
+   mapcount is not zero then we do not migrate the page. All user space
+   processes that attempt to access the page will now wait on the page lock.
+
+5. The i_pages lock is taken. This will cause all processes trying
+   to access the page via the mapping to block on the spinlock.
+
+6. The refcount of the page is examined and we back out if references remain
+   otherwise we know that we are the only one referencing this page.
+
+7. The radix tree is checked and if it does not contain the pointer to this
+   page then we back out because someone else modified the radix tree.
+
+8. The new page is prepped with some settings from the old page so that
+   accesses to the new page will discover a page with the correct settings.
+
+9. The radix tree is changed to point to the new page.
+
+10. The reference count of the old page is dropped because the address space
+    reference is gone. A reference to the new page is established because
+    the new page is referenced by the address space.
+
+11. The i_pages lock is dropped. With that lookups in the mapping
+    become possible again. Processes will move from spinning on the lock
+    to sleeping on the locked new page.
+
+12. The page contents are copied to the new page.
+
+13. The remaining page flags are copied to the new page.
+
+14. The old page flags are cleared to indicate that the page does
+    not provide any information anymore.
+
+15. Queued up writeback on the new page is triggered.
+
+16. If migration entries were page then replace them with real ptes. Doing
+    so will enable access for user space processes not already waiting for
+    the page lock.
+
+19. The page locks are dropped from the old and new page.
+    Processes waiting on the page lock will redo their page faults
+    and will reach the new page.
+
+20. The new page is moved to the LRU and can be scanned by the swapper
+    etc again.
+
+Non-LRU page migration
+======================
+
+Although original migration aimed for reducing the latency of memory access
+for NUMA, compaction who want to create high-order page is also main customer.
+
+Current problem of the implementation is that it is designed to migrate only
+*LRU* pages. However, there are potential non-lru pages which can be migrated
+in drivers, for example, zsmalloc, virtio-balloon pages.
+
+For virtio-balloon pages, some parts of migration code path have been hooked
+up and added virtio-balloon specific functions to intercept migration logics.
+It's too specific to a driver so other drivers who want to make their pages
+movable would have to add own specific hooks in migration path.
+
+To overclome the problem, VM supports non-LRU page migration which provides
+generic functions for non-LRU movable pages without driver specific hooks
+migration path.
+
+If a driver want to make own pages movable, it should define three functions
+which are function pointers of struct address_space_operations.
+
+1. ``bool (*isolate_page) (struct page *page, isolate_mode_t mode);``
+
+   What VM expects on isolate_page function of driver is to return *true*
+   if driver isolates page successfully. On returing true, VM marks the page
+   as PG_isolated so concurrent isolation in several CPUs skip the page
+   for isolation. If a driver cannot isolate the page, it should return *false*.
+
+   Once page is successfully isolated, VM uses page.lru fields so driver
+   shouldn't expect to preserve values in that fields.
+
+2. ``int (*migratepage) (struct address_space *mapping,``
+|	``struct page *newpage, struct page *oldpage, enum migrate_mode);``
+
+   After isolation, VM calls migratepage of driver with isolated page.
+   The function of migratepage is to move content of the old page to new page
+   and set up fields of struct page newpage. Keep in mind that you should
+   indicate to the VM the oldpage is no longer movable via __ClearPageMovable()
+   under page_lock if you migrated the oldpage successfully and returns
+   MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver
+   can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time
+   because VM interprets -EAGAIN as "temporal migration failure". On returning
+   any error except -EAGAIN, VM will give up the page migration without retrying
+   in this time.
+
+   Driver shouldn't touch page.lru field VM using in the functions.
+
+3. ``void (*putback_page)(struct page *);``
+
+   If migration fails on isolated page, VM should return the isolated page
+   to the driver so VM calls driver's putback_page with migration failed page.
+   In this function, driver should put the isolated page back to the own data
+   structure.
+
+4. non-lru movable page flags
+
+   There are two page flags for supporting non-lru movable page.
+
+   * PG_movable
+
+     Driver should use the below function to make page movable under page_lock::
+
+	void __SetPageMovable(struct page *page, struct address_space *mapping)
+
+     It needs argument of address_space for registering migration
+     family functions which will be called by VM. Exactly speaking,
+     PG_movable is not a real flag of struct page. Rather than, VM
+     reuses page->mapping's lower bits to represent it.
+
+::
+	#define PAGE_MAPPING_MOVABLE 0x2
+	page->mapping = page->mapping | PAGE_MAPPING_MOVABLE;
+
+     so driver shouldn't access page->mapping directly. Instead, driver should
+     use page_mapping which mask off the low two bits of page->mapping under
+     page lock so it can get right struct address_space.
+
+     For testing of non-lru movable page, VM supports __PageMovable function.
+     However, it doesn't guarantee to identify non-lru movable page because
+     page->mapping field is unified with other variables in struct page.
+     As well, if driver releases the page after isolation by VM, page->mapping
+     doesn't have stable value although it has PAGE_MAPPING_MOVABLE
+     (Look at __ClearPageMovable). But __PageMovable is cheap to catch whether
+     page is LRU or non-lru movable once the page has been isolated. Because
+     LRU pages never can have PAGE_MAPPING_MOVABLE in page->mapping. It is also
+     good for just peeking to test non-lru movable pages before more expensive
+     checking with lock_page in pfn scanning to select victim.
+
+     For guaranteeing non-lru movable page, VM provides PageMovable function.
+     Unlike __PageMovable, PageMovable functions validates page->mapping and
+     mapping->a_ops->isolate_page under lock_page. The lock_page prevents sudden
+     destroying of page->mapping.
+
+     Driver using __SetPageMovable should clear the flag via __ClearMovablePage
+     under page_lock before the releasing the page.
+
+   * PG_isolated
+
+     To prevent concurrent isolation among several CPUs, VM marks isolated page
+     as PG_isolated under lock_page. So if a CPU encounters PG_isolated non-lru
+     movable page, it can skip it. Driver doesn't need to manipulate the flag
+     because VM will set/clear it automatically. Keep in mind that if driver
+     sees PG_isolated page, it means the page have been isolated by VM so it
+     shouldn't touch page.lru field.
+     PG_isolated is alias with PG_reclaim flag so driver shouldn't use the flag
+     for own purpose.
+
+Christoph Lameter, May 8, 2006.
+Minchan Kim, Mar 28, 2016.
diff --git a/Documentation/vm/page_owner.rst b/Documentation/vm/page_owner.rst
new file mode 100644
index 0000000..0ed5ab8
--- /dev/null
+++ b/Documentation/vm/page_owner.rst
@@ -0,0 +1,90 @@
+.. _page_owner:
+
+==================================================
+page owner: Tracking about who allocated each page
+==================================================
+
+Introduction
+============
+
+page owner is for the tracking about who allocated each page.
+It can be used to debug memory leak or to find a memory hogger.
+When allocation happens, information about allocation such as call stack
+and order of pages is stored into certain storage for each page.
+When we need to know about status of all pages, we can get and analyze
+this information.
+
+Although we already have tracepoint for tracing page allocation/free,
+using it for analyzing who allocate each page is rather complex. We need
+to enlarge the trace buffer for preventing overlapping until userspace
+program launched. And, launched program continually dump out the trace
+buffer for later analysis and it would change system behviour with more
+possibility rather than just keeping it in memory, so bad for debugging.
+
+page owner can also be used for various purposes. For example, accurate
+fragmentation statistics can be obtained through gfp flag information of
+each page. It is already implemented and activated if page owner is
+enabled. Other usages are more than welcome.
+
+page owner is disabled in default. So, if you'd like to use it, you need
+to add "page_owner=on" into your boot cmdline. If the kernel is built
+with page owner and page owner is disabled in runtime due to no enabling
+boot option, runtime overhead is marginal. If disabled in runtime, it
+doesn't require memory to store owner information, so there is no runtime
+memory overhead. And, page owner inserts just two unlikely branches into
+the page allocator hotpath and if not enabled, then allocation is done
+like as the kernel without page owner. These two unlikely branches should
+not affect to allocation performance, especially if the static keys jump
+label patching functionality is available. Following is the kernel's code
+size change due to this facility.
+
+- Without page owner::
+
+   text    data     bss     dec     hex filename
+   40662   1493     644   42799    a72f mm/page_alloc.o
+
+- With page owner::
+
+   text    data     bss     dec     hex filename
+   40892   1493     644   43029    a815 mm/page_alloc.o
+   1427      24       8    1459     5b3 mm/page_ext.o
+   2722      50       0    2772     ad4 mm/page_owner.o
+
+Although, roughly, 4 KB code is added in total, page_alloc.o increase by
+230 bytes and only half of it is in hotpath. Building the kernel with
+page owner and turning it on if needed would be great option to debug
+kernel memory problem.
+
+There is one notice that is caused by implementation detail. page owner
+stores information into the memory from struct page extension. This memory
+is initialized some time later than that page allocator starts in sparse
+memory system, so, until initialization, many pages can be allocated and
+they would have no owner information. To fix it up, these early allocated
+pages are investigated and marked as allocated in initialization phase.
+Although it doesn't mean that they have the right owner information,
+at least, we can tell whether the page is allocated or not,
+more accurately. On 2GB memory x86-64 VM box, 13343 early allocated pages
+are catched and marked, although they are mostly allocated from struct
+page extension feature. Anyway, after that, no page is left in
+un-tracking state.
+
+Usage
+=====
+
+1) Build user-space helper::
+
+	cd tools/vm
+	make page_owner_sort
+
+2) Enable page owner: add "page_owner=on" to boot cmdline.
+
+3) Do the job what you want to debug
+
+4) Analyze information from page owner::
+
+	cat /sys/kernel/debug/page_owner > page_owner_full.txt
+	grep -v ^PFN page_owner_full.txt > page_owner.txt
+	./page_owner_sort page_owner.txt sorted_page_owner.txt
+
+   See the result about who allocated each page
+   in the ``sorted_page_owner.txt``.
diff --git a/Documentation/vm/page_owner.txt b/Documentation/vm/page_owner.txt
deleted file mode 100644
index ffff143..0000000
--- a/Documentation/vm/page_owner.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-page owner: Tracking about who allocated each page
------------------------------------------------------------
-
-* Introduction
-
-page owner is for the tracking about who allocated each page.
-It can be used to debug memory leak or to find a memory hogger.
-When allocation happens, information about allocation such as call stack
-and order of pages is stored into certain storage for each page.
-When we need to know about status of all pages, we can get and analyze
-this information.
-
-Although we already have tracepoint for tracing page allocation/free,
-using it for analyzing who allocate each page is rather complex. We need
-to enlarge the trace buffer for preventing overlapping until userspace
-program launched. And, launched program continually dump out the trace
-buffer for later analysis and it would change system behviour with more
-possibility rather than just keeping it in memory, so bad for debugging.
-
-page owner can also be used for various purposes. For example, accurate
-fragmentation statistics can be obtained through gfp flag information of
-each page. It is already implemented and activated if page owner is
-enabled. Other usages are more than welcome.
-
-page owner is disabled in default. So, if you'd like to use it, you need
-to add "page_owner=on" into your boot cmdline. If the kernel is built
-with page owner and page owner is disabled in runtime due to no enabling
-boot option, runtime overhead is marginal. If disabled in runtime, it
-doesn't require memory to store owner information, so there is no runtime
-memory overhead. And, page owner inserts just two unlikely branches into
-the page allocator hotpath and if not enabled, then allocation is done
-like as the kernel without page owner. These two unlikely branches should
-not affect to allocation performance, especially if the static keys jump
-label patching functionality is available. Following is the kernel's code
-size change due to this facility.
-
-- Without page owner
-   text    data     bss     dec     hex filename
-  40662    1493     644   42799    a72f mm/page_alloc.o
-
-- With page owner
-   text    data     bss     dec     hex filename
-  40892    1493     644   43029    a815 mm/page_alloc.o
-   1427      24       8    1459     5b3 mm/page_ext.o
-   2722      50       0    2772     ad4 mm/page_owner.o
-
-Although, roughly, 4 KB code is added in total, page_alloc.o increase by
-230 bytes and only half of it is in hotpath. Building the kernel with
-page owner and turning it on if needed would be great option to debug
-kernel memory problem.
-
-There is one notice that is caused by implementation detail. page owner
-stores information into the memory from struct page extension. This memory
-is initialized some time later than that page allocator starts in sparse
-memory system, so, until initialization, many pages can be allocated and
-they would have no owner information. To fix it up, these early allocated
-pages are investigated and marked as allocated in initialization phase.
-Although it doesn't mean that they have the right owner information,
-at least, we can tell whether the page is allocated or not,
-more accurately. On 2GB memory x86-64 VM box, 13343 early allocated pages
-are catched and marked, although they are mostly allocated from struct
-page extension feature. Anyway, after that, no page is left in
-un-tracking state.
-
-* Usage
-
-1) Build user-space helper
-	cd tools/vm
-	make page_owner_sort
-
-2) Enable page owner
-	Add "page_owner=on" to boot cmdline.
-
-3) Do the job what you want to debug
-
-4) Analyze information from page owner
-	cat /sys/kernel/debug/page_owner > page_owner_full.txt
-	grep -v ^PFN page_owner_full.txt > page_owner.txt
-	./page_owner_sort page_owner.txt sorted_page_owner.txt
-
-	See the result about who allocated each page
-	in the sorted_page_owner.txt.
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
deleted file mode 100644
index eafcefa..0000000
--- a/Documentation/vm/pagemap.txt
+++ /dev/null
@@ -1,183 +0,0 @@
-pagemap, from the userspace perspective
----------------------------------------
-
-pagemap is a new (as of 2.6.25) set of interfaces in the kernel that allow
-userspace programs to examine the page tables and related information by
-reading files in /proc.
-
-There are four components to pagemap:
-
- * /proc/pid/pagemap.  This file lets a userspace process find out which
-   physical frame each virtual page is mapped to.  It contains one 64-bit
-   value for each virtual page, containing the following data (from
-   fs/proc/task_mmu.c, above pagemap_read):
-
-    * Bits 0-54  page frame number (PFN) if present
-    * Bits 0-4   swap type if swapped
-    * Bits 5-54  swap offset if swapped
-    * Bit  55    pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
-    * Bit  56    page exclusively mapped (since 4.2)
-    * Bits 57-60 zero
-    * Bit  61    page is file-page or shared-anon (since 3.5)
-    * Bit  62    page swapped
-    * Bit  63    page present
-
-   Since Linux 4.0 only users with the CAP_SYS_ADMIN capability can get PFNs.
-   In 4.0 and 4.1 opens by unprivileged fail with -EPERM.  Starting from
-   4.2 the PFN field is zeroed if the user does not have CAP_SYS_ADMIN.
-   Reason: information about PFNs helps in exploiting Rowhammer vulnerability.
-
-   If the page is not present but in swap, then the PFN contains an
-   encoding of the swap file number and the page's offset into the
-   swap. Unmapped pages return a null PFN. This allows determining
-   precisely which pages are mapped (or in swap) and comparing mapped
-   pages between processes.
-
-   Efficient users of this interface will use /proc/pid/maps to
-   determine which areas of memory are actually mapped and llseek to
-   skip over unmapped regions.
-
- * /proc/kpagecount.  This file contains a 64-bit count of the number of
-   times each page is mapped, indexed by PFN.
-
- * /proc/kpageflags.  This file contains a 64-bit set of flags for each
-   page, indexed by PFN.
-
-   The flags are (from fs/proc/page.c, above kpageflags_read):
-
-     0. LOCKED
-     1. ERROR
-     2. REFERENCED
-     3. UPTODATE
-     4. DIRTY
-     5. LRU
-     6. ACTIVE
-     7. SLAB
-     8. WRITEBACK
-     9. RECLAIM
-    10. BUDDY
-    11. MMAP
-    12. ANON
-    13. SWAPCACHE
-    14. SWAPBACKED
-    15. COMPOUND_HEAD
-    16. COMPOUND_TAIL
-    17. HUGE
-    18. UNEVICTABLE
-    19. HWPOISON
-    20. NOPAGE
-    21. KSM
-    22. THP
-    23. BALLOON
-    24. ZERO_PAGE
-    25. IDLE
-
- * /proc/kpagecgroup.  This file contains a 64-bit inode number of the
-   memory cgroup each page is charged to, indexed by PFN. Only available when
-   CONFIG_MEMCG is set.
-
-Short descriptions to the page flags:
-
- 0. LOCKED
-    page is being locked for exclusive access, eg. by undergoing read/write IO
-
- 7. SLAB
-    page is managed by the SLAB/SLOB/SLUB/SLQB kernel memory allocator
-    When compound page is used, SLUB/SLQB will only set this flag on the head
-    page; SLOB will not flag it at all.
-
-10. BUDDY
-    a free memory block managed by the buddy system allocator
-    The buddy system organizes free memory in blocks of various orders.
-    An order N block has 2^N physically contiguous pages, with the BUDDY flag
-    set for and _only_ for the first page.
-
-15. COMPOUND_HEAD
-16. COMPOUND_TAIL
-    A compound page with order N consists of 2^N physically contiguous pages.
-    A compound page with order 2 takes the form of "HTTT", where H donates its
-    head page and T donates its tail page(s).  The major consumers of compound
-    pages are hugeTLB pages (Documentation/vm/hugetlbpage.txt), the SLUB etc.
-    memory allocators and various device drivers. However in this interface,
-    only huge/giga pages are made visible to end users.
-17. HUGE
-    this is an integral part of a HugeTLB page
-
-19. HWPOISON
-    hardware detected memory corruption on this page: don't touch the data!
-
-20. NOPAGE
-    no page frame exists at the requested address
-
-21. KSM
-    identical memory pages dynamically shared between one or more processes
-
-22. THP
-    contiguous pages which construct transparent hugepages
-
-23. BALLOON
-    balloon compaction page
-
-24. ZERO_PAGE
-    zero page for pfn_zero or huge_zero page
-
-25. IDLE
-    page has not been accessed since it was marked idle (see
-    Documentation/vm/idle_page_tracking.txt). Note that this flag may be
-    stale in case the page was accessed via a PTE. To make sure the flag
-    is up-to-date one has to read /sys/kernel/mm/page_idle/bitmap first.
-
-    [IO related page flags]
- 1. ERROR     IO error occurred
- 3. UPTODATE  page has up-to-date data
-              ie. for file backed page: (in-memory data revision >= on-disk one)
- 4. DIRTY     page has been written to, hence contains new data
-              ie. for file backed page: (in-memory data revision >  on-disk one)
- 8. WRITEBACK page is being synced to disk
-
-    [LRU related page flags]
- 5. LRU         page is in one of the LRU lists
- 6. ACTIVE      page is in the active LRU list
-18. UNEVICTABLE page is in the unevictable (non-)LRU list
-                It is somehow pinned and not a candidate for LRU page reclaims,
-		eg. ramfs pages, shmctl(SHM_LOCK) and mlock() memory segments
- 2. REFERENCED  page has been referenced since last LRU list enqueue/requeue
- 9. RECLAIM     page will be reclaimed soon after its pageout IO completed
-11. MMAP        a memory mapped page
-12. ANON        a memory mapped page that is not part of a file
-13. SWAPCACHE   page is mapped to swap space, ie. has an associated swap entry
-14. SWAPBACKED  page is backed by swap/RAM
-
-The page-types tool in the tools/vm directory can be used to query the
-above flags.
-
-Using pagemap to do something useful:
-
-The general procedure for using pagemap to find out about a process' memory
-usage goes like this:
-
- 1. Read /proc/pid/maps to determine which parts of the memory space are
-    mapped to what.
- 2. Select the maps you are interested in -- all of them, or a particular
-    library, or the stack or the heap, etc.
- 3. Open /proc/pid/pagemap and seek to the pages you would like to examine.
- 4. Read a u64 for each page from pagemap.
- 5. Open /proc/kpagecount and/or /proc/kpageflags.  For each PFN you just
-    read, seek to that entry in the file, and read the data you want.
-
-For example, to find the "unique set size" (USS), which is the amount of
-memory that a process is using that is not shared with any other process,
-you can go through every map in the process, find the PFNs, look those up
-in kpagecount, and tally up the number of pages that are only referenced
-once.
-
-Other notes:
-
-Reading from any of the files will return -EINVAL if you are not starting
-the read on an 8-byte boundary (e.g., if you sought an odd number of bytes
-into the file), or if the size of the read is not a multiple of 8 bytes.
-
-Before Linux 3.11 pagemap bits 55-60 were used for "page-shift" (which is
-always 12 at most architectures). Since Linux 3.11 their meaning changes
-after first clear of soft-dirty bits. Since Linux 4.2 they are used for
-flags unconditionally.
diff --git a/Documentation/vm/remap_file_pages.rst b/Documentation/vm/remap_file_pages.rst
new file mode 100644
index 0000000..7bef671
--- /dev/null
+++ b/Documentation/vm/remap_file_pages.rst
@@ -0,0 +1,33 @@
+.. _remap_file_pages:
+
+==============================
+remap_file_pages() system call
+==============================
+
+The remap_file_pages() system call is used to create a nonlinear mapping,
+that is, a mapping in which the pages of the file are mapped into a
+nonsequential order in memory. The advantage of using remap_file_pages()
+over using repeated calls to mmap(2) is that the former approach does not
+require the kernel to create additional VMA (Virtual Memory Area) data
+structures.
+
+Supporting of nonlinear mapping requires significant amount of non-trivial
+code in kernel virtual memory subsystem including hot paths. Also to get
+nonlinear mapping work kernel need a way to distinguish normal page table
+entries from entries with file offset (pte_file). Kernel reserves flag in
+PTE for this purpose. PTE flags are scarce resource especially on some CPU
+architectures. It would be nice to free up the flag for other usage.
+
+Fortunately, there are not many users of remap_file_pages() in the wild.
+It's only known that one enterprise RDBMS implementation uses the syscall
+on 32-bit systems to map files bigger than can linearly fit into 32-bit
+virtual address space. This use-case is not critical anymore since 64-bit
+systems are widely available.
+
+The syscall is deprecated and replaced it with an emulation now. The
+emulation creates new VMAs instead of nonlinear mappings. It's going to
+work slower for rare users of remap_file_pages() but ABI is preserved.
+
+One side effect of emulation (apart from performance) is that user can hit
+vm.max_map_count limit more easily due to additional VMAs. See comment for
+DEFAULT_MAX_MAP_COUNT for more details on the limit.
diff --git a/Documentation/vm/remap_file_pages.txt b/Documentation/vm/remap_file_pages.txt
deleted file mode 100644
index f609142..0000000
--- a/Documentation/vm/remap_file_pages.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-The remap_file_pages() system call is used to create a nonlinear mapping,
-that is, a mapping in which the pages of the file are mapped into a
-nonsequential order in memory. The advantage of using remap_file_pages()
-over using repeated calls to mmap(2) is that the former approach does not
-require the kernel to create additional VMA (Virtual Memory Area) data
-structures.
-
-Supporting of nonlinear mapping requires significant amount of non-trivial
-code in kernel virtual memory subsystem including hot paths. Also to get
-nonlinear mapping work kernel need a way to distinguish normal page table
-entries from entries with file offset (pte_file). Kernel reserves flag in
-PTE for this purpose. PTE flags are scarce resource especially on some CPU
-architectures. It would be nice to free up the flag for other usage.
-
-Fortunately, there are not many users of remap_file_pages() in the wild.
-It's only known that one enterprise RDBMS implementation uses the syscall
-on 32-bit systems to map files bigger than can linearly fit into 32-bit
-virtual address space. This use-case is not critical anymore since 64-bit
-systems are widely available.
-
-The syscall is deprecated and replaced it with an emulation now. The
-emulation creates new VMAs instead of nonlinear mappings. It's going to
-work slower for rare users of remap_file_pages() but ABI is preserved.
-
-One side effect of emulation (apart from performance) is that user can hit
-vm.max_map_count limit more easily due to additional VMAs. See comment for
-DEFAULT_MAX_MAP_COUNT for more details on the limit.
diff --git a/Documentation/vm/slub.rst b/Documentation/vm/slub.rst
new file mode 100644
index 0000000..3a775fd
--- /dev/null
+++ b/Documentation/vm/slub.rst
@@ -0,0 +1,361 @@
+.. _slub:
+
+==========================
+Short users guide for SLUB
+==========================
+
+The basic philosophy of SLUB is very different from SLAB. SLAB
+requires rebuilding the kernel to activate debug options for all
+slab caches. SLUB always includes full debugging but it is off by default.
+SLUB can enable debugging only for selected slabs in order to avoid
+an impact on overall system performance which may make a bug more
+difficult to find.
+
+In order to switch debugging on one can add an option ``slub_debug``
+to the kernel command line. That will enable full debugging for
+all slabs.
+
+Typically one would then use the ``slabinfo`` command to get statistical
+data and perform operation on the slabs. By default ``slabinfo`` only lists
+slabs that have data in them. See "slabinfo -h" for more options when
+running the command. ``slabinfo`` can be compiled with
+::
+
+	gcc -o slabinfo tools/vm/slabinfo.c
+
+Some of the modes of operation of ``slabinfo`` require that slub debugging
+be enabled on the command line. F.e. no tracking information will be
+available without debugging on and validation can only partially
+be performed if debugging was not switched on.
+
+Some more sophisticated uses of slub_debug:
+-------------------------------------------
+
+Parameters may be given to ``slub_debug``. If none is specified then full
+debugging is enabled. Format:
+
+slub_debug=<Debug-Options>
+	Enable options for all slabs
+slub_debug=<Debug-Options>,<slab name>
+	Enable options only for select slabs
+
+
+Possible debug options are::
+
+	F		Sanity checks on (enables SLAB_DEBUG_CONSISTENCY_CHECKS
+			Sorry SLAB legacy issues)
+	Z		Red zoning
+	P		Poisoning (object and padding)
+	U		User tracking (free and alloc)
+	T		Trace (please only use on single slabs)
+	A		Toggle failslab filter mark for the cache
+	O		Switch debugging off for caches that would have
+			caused higher minimum slab orders
+	-		Switch all debugging off (useful if the kernel is
+			configured with CONFIG_SLUB_DEBUG_ON)
+
+F.e. in order to boot just with sanity checks and red zoning one would specify::
+
+	slub_debug=FZ
+
+Trying to find an issue in the dentry cache? Try::
+
+	slub_debug=,dentry
+
+to only enable debugging on the dentry cache.
+
+Red zoning and tracking may realign the slab.  We can just apply sanity checks
+to the dentry cache with::
+
+	slub_debug=F,dentry
+
+Debugging options may require the minimum possible slab order to increase as
+a result of storing the metadata (for example, caches with PAGE_SIZE object
+sizes).  This has a higher liklihood of resulting in slab allocation errors
+in low memory situations or if there's high fragmentation of memory.  To
+switch off debugging for such caches by default, use::
+
+	slub_debug=O
+
+In case you forgot to enable debugging on the kernel command line: It is
+possible to enable debugging manually when the kernel is up. Look at the
+contents of::
+
+	/sys/kernel/slab/<slab name>/
+
+Look at the writable files. Writing 1 to them will enable the
+corresponding debug option. All options can be set on a slab that does
+not contain objects. If the slab already contains objects then sanity checks
+and tracing may only be enabled. The other options may cause the realignment
+of objects.
+
+Careful with tracing: It may spew out lots of information and never stop if
+used on the wrong slab.
+
+Slab merging
+============
+
+If no debug options are specified then SLUB may merge similar slabs together
+in order to reduce overhead and increase cache hotness of objects.
+``slabinfo -a`` displays which slabs were merged together.
+
+Slab validation
+===============
+
+SLUB can validate all object if the kernel was booted with slub_debug. In
+order to do so you must have the ``slabinfo`` tool. Then you can do
+::
+
+	slabinfo -v
+
+which will test all objects. Output will be generated to the syslog.
+
+This also works in a more limited way if boot was without slab debug.
+In that case ``slabinfo -v`` simply tests all reachable objects. Usually
+these are in the cpu slabs and the partial slabs. Full slabs are not
+tracked by SLUB in a non debug situation.
+
+Getting more performance
+========================
+
+To some degree SLUB's performance is limited by the need to take the
+list_lock once in a while to deal with partial slabs. That overhead is
+governed by the order of the allocation for each slab. The allocations
+can be influenced by kernel parameters:
+
+.. slub_min_objects=x		(default 4)
+.. slub_min_order=x		(default 0)
+.. slub_max_order=x		(default 3 (PAGE_ALLOC_COSTLY_ORDER))
+
+``slub_min_objects``
+	allows to specify how many objects must at least fit into one
+	slab in order for the allocation order to be acceptable.  In
+	general slub will be able to perform this number of
+	allocations on a slab without consulting centralized resources
+	(list_lock) where contention may occur.
+
+``slub_min_order``
+	specifies a minim order of slabs. A similar effect like
+	``slub_min_objects``.
+
+``slub_max_order``
+	specified the order at which ``slub_min_objects`` should no
+	longer be checked. This is useful to avoid SLUB trying to
+	generate super large order pages to fit ``slub_min_objects``
+	of a slab cache with large object sizes into one high order
+	page. Setting command line parameter
+	``debug_guardpage_minorder=N`` (N > 0), forces setting
+	``slub_max_order`` to 0, what cause minimum possible order of
+	slabs allocation.
+
+SLUB Debug output
+=================
+
+Here is a sample of slub debug output::
+
+ ====================================================================
+ BUG kmalloc-8: Redzone overwritten
+ --------------------------------------------------------------------
+
+ INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
+ INFO: Slab 0xc528c530 flags=0x400000c3 inuse=61 fp=0xc90f6d58
+ INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
+ INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
+
+ Bytes b4 0xc90f6d10:  00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
+   Object 0xc90f6d20:  31 30 31 39 2e 30 30 35                         1019.005
+  Redzone 0xc90f6d28:  00 cc cc cc                                     .
+  Padding 0xc90f6d50:  5a 5a 5a 5a 5a 5a 5a 5a                         ZZZZZZZZ
+
+   [<c010523d>] dump_trace+0x63/0x1eb
+   [<c01053df>] show_trace_log_lvl+0x1a/0x2f
+   [<c010601d>] show_trace+0x12/0x14
+   [<c0106035>] dump_stack+0x16/0x18
+   [<c017e0fa>] object_err+0x143/0x14b
+   [<c017e2cc>] check_object+0x66/0x234
+   [<c017eb43>] __slab_free+0x239/0x384
+   [<c017f446>] kfree+0xa6/0xc6
+   [<c02e2335>] get_modalias+0xb9/0xf5
+   [<c02e23b7>] dmi_dev_uevent+0x27/0x3c
+   [<c027866a>] dev_uevent+0x1ad/0x1da
+   [<c0205024>] kobject_uevent_env+0x20a/0x45b
+   [<c020527f>] kobject_uevent+0xa/0xf
+   [<c02779f1>] store_uevent+0x4f/0x58
+   [<c027758e>] dev_attr_store+0x29/0x2f
+   [<c01bec4f>] sysfs_write_file+0x16e/0x19c
+   [<c0183ba7>] vfs_write+0xd1/0x15a
+   [<c01841d7>] sys_write+0x3d/0x72
+   [<c0104112>] sysenter_past_esp+0x5f/0x99
+   [<b7f7b410>] 0xb7f7b410
+   =======================
+
+ FIX kmalloc-8: Restoring Redzone 0xc90f6d28-0xc90f6d2b=0xcc
+
+If SLUB encounters a corrupted object (full detection requires the kernel
+to be booted with slub_debug) then the following output will be dumped
+into the syslog:
+
+1. Description of the problem encountered
+
+   This will be a message in the system log starting with::
+
+     ===============================================
+     BUG <slab cache affected>: <What went wrong>
+     -----------------------------------------------
+
+     INFO: <corruption start>-<corruption_end> <more info>
+     INFO: Slab <address> <slab information>
+     INFO: Object <address> <object information>
+     INFO: Allocated in <kernel function> age=<jiffies since alloc> cpu=<allocated by
+	cpu> pid=<pid of the process>
+     INFO: Freed in <kernel function> age=<jiffies since free> cpu=<freed by cpu>
+	pid=<pid of the process>
+
+   (Object allocation / free information is only available if SLAB_STORE_USER is
+   set for the slab. slub_debug sets that option)
+
+2. The object contents if an object was involved.
+
+   Various types of lines can follow the BUG SLUB line:
+
+   Bytes b4 <address> : <bytes>
+	Shows a few bytes before the object where the problem was detected.
+	Can be useful if the corruption does not stop with the start of the
+	object.
+
+   Object <address> : <bytes>
+	The bytes of the object. If the object is inactive then the bytes
+	typically contain poison values. Any non-poison value shows a
+	corruption by a write after free.
+
+   Redzone <address> : <bytes>
+	The Redzone following the object. The Redzone is used to detect
+	writes after the object. All bytes should always have the same
+	value. If there is any deviation then it is due to a write after
+	the object boundary.
+
+	(Redzone information is only available if SLAB_RED_ZONE is set.
+	slub_debug sets that option)
+
+   Padding <address> : <bytes>
+	Unused data to fill up the space in order to get the next object
+	properly aligned. In the debug case we make sure that there are
+	at least 4 bytes of padding. This allows the detection of writes
+	before the object.
+
+3. A stackdump
+
+   The stackdump describes the location where the error was detected. The cause
+   of the corruption is may be more likely found by looking at the function that
+   allocated or freed the object.
+
+4. Report on how the problem was dealt with in order to ensure the continued
+   operation of the system.
+
+   These are messages in the system log beginning with::
+
+	FIX <slab cache affected>: <corrective action taken>
+
+   In the above sample SLUB found that the Redzone of an active object has
+   been overwritten. Here a string of 8 characters was written into a slab that
+   has the length of 8 characters. However, a 8 character string needs a
+   terminating 0. That zero has overwritten the first byte of the Redzone field.
+   After reporting the details of the issue encountered the FIX SLUB message
+   tells us that SLUB has restored the Redzone to its proper value and then
+   system operations continue.
+
+Emergency operations
+====================
+
+Minimal debugging (sanity checks alone) can be enabled by booting with::
+
+	slub_debug=F
+
+This will be generally be enough to enable the resiliency features of slub
+which will keep the system running even if a bad kernel component will
+keep corrupting objects. This may be important for production systems.
+Performance will be impacted by the sanity checks and there will be a
+continual stream of error messages to the syslog but no additional memory
+will be used (unlike full debugging).
+
+No guarantees. The kernel component still needs to be fixed. Performance
+may be optimized further by locating the slab that experiences corruption
+and enabling debugging only for that cache
+
+I.e.::
+
+	slub_debug=F,dentry
+
+If the corruption occurs by writing after the end of the object then it
+may be advisable to enable a Redzone to avoid corrupting the beginning
+of other objects::
+
+	slub_debug=FZ,dentry
+
+Extended slabinfo mode and plotting
+===================================
+
+The ``slabinfo`` tool has a special 'extended' ('-X') mode that includes:
+ - Slabcache Totals
+ - Slabs sorted by size (up to -N <num> slabs, default 1)
+ - Slabs sorted by loss (up to -N <num> slabs, default 1)
+
+Additionally, in this mode ``slabinfo`` does not dynamically scale
+sizes (G/M/K) and reports everything in bytes (this functionality is
+also available to other slabinfo modes via '-B' option) which makes
+reporting more precise and accurate. Moreover, in some sense the `-X'
+mode also simplifies the analysis of slabs' behaviour, because its
+output can be plotted using the ``slabinfo-gnuplot.sh`` script. So it
+pushes the analysis from looking through the numbers (tons of numbers)
+to something easier -- visual analysis.
+
+To generate plots:
+
+a) collect slabinfo extended records, for example::
+
+	while [ 1 ]; do slabinfo -X >> FOO_STATS; sleep 1; done
+
+b) pass stats file(-s) to ``slabinfo-gnuplot.sh`` script::
+
+	slabinfo-gnuplot.sh FOO_STATS [FOO_STATS2 .. FOO_STATSN]
+
+   The ``slabinfo-gnuplot.sh`` script will pre-processes the collected records
+   and generates 3 png files (and 3 pre-processing cache files) per STATS
+   file:
+   - Slabcache Totals: FOO_STATS-totals.png
+   - Slabs sorted by size: FOO_STATS-slabs-by-size.png
+   - Slabs sorted by loss: FOO_STATS-slabs-by-loss.png
+
+Another use case, when ``slabinfo-gnuplot.sh`` can be useful, is when you
+need to compare slabs' behaviour "prior to" and "after" some code
+modification.  To help you out there, ``slabinfo-gnuplot.sh`` script
+can 'merge' the `Slabcache Totals` sections from different
+measurements. To visually compare N plots:
+
+a) Collect as many STATS1, STATS2, .. STATSN files as you need::
+
+	while [ 1 ]; do slabinfo -X >> STATS<X>; sleep 1; done
+
+b) Pre-process those STATS files::
+
+	slabinfo-gnuplot.sh STATS1 STATS2 .. STATSN
+
+c) Execute ``slabinfo-gnuplot.sh`` in '-t' mode, passing all of the
+   generated pre-processed \*-totals::
+
+	slabinfo-gnuplot.sh -t STATS1-totals STATS2-totals .. STATSN-totals
+
+   This will produce a single plot (png file).
+
+   Plots, expectedly, can be large so some fluctuations or small spikes
+   can go unnoticed. To deal with that, ``slabinfo-gnuplot.sh`` has two
+   options to 'zoom-in'/'zoom-out':
+
+   a) ``-s %d,%d`` -- overwrites the default image width and heigh
+   b) ``-r %d,%d`` -- specifies a range of samples to use (for example,
+      in ``slabinfo -X >> FOO_STATS; sleep 1;`` case, using a ``-r
+      40,60`` range will plot only samples collected between 40th and
+      60th seconds).
+
+Christoph Lameter, May 30, 2007
+Sergey Senozhatsky, October 23, 2015
diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt
deleted file mode 100644
index 8465241..0000000
--- a/Documentation/vm/slub.txt
+++ /dev/null
@@ -1,342 +0,0 @@
-Short users guide for SLUB
---------------------------
-
-The basic philosophy of SLUB is very different from SLAB. SLAB
-requires rebuilding the kernel to activate debug options for all
-slab caches. SLUB always includes full debugging but it is off by default.
-SLUB can enable debugging only for selected slabs in order to avoid
-an impact on overall system performance which may make a bug more
-difficult to find.
-
-In order to switch debugging on one can add an option "slub_debug"
-to the kernel command line. That will enable full debugging for
-all slabs.
-
-Typically one would then use the "slabinfo" command to get statistical
-data and perform operation on the slabs. By default slabinfo only lists
-slabs that have data in them. See "slabinfo -h" for more options when
-running the command. slabinfo can be compiled with
-
-gcc -o slabinfo tools/vm/slabinfo.c
-
-Some of the modes of operation of slabinfo require that slub debugging
-be enabled on the command line. F.e. no tracking information will be
-available without debugging on and validation can only partially
-be performed if debugging was not switched on.
-
-Some more sophisticated uses of slub_debug:
--------------------------------------------
-
-Parameters may be given to slub_debug. If none is specified then full
-debugging is enabled. Format:
-
-slub_debug=<Debug-Options>       Enable options for all slabs
-slub_debug=<Debug-Options>,<slab name>
-				Enable options only for select slabs
-
-Possible debug options are
-	F		Sanity checks on (enables SLAB_DEBUG_CONSISTENCY_CHECKS
-			Sorry SLAB legacy issues)
-	Z		Red zoning
-	P		Poisoning (object and padding)
-	U		User tracking (free and alloc)
-	T		Trace (please only use on single slabs)
-	A		Toggle failslab filter mark for the cache
-	O		Switch debugging off for caches that would have
-			caused higher minimum slab orders
-	-		Switch all debugging off (useful if the kernel is
-			configured with CONFIG_SLUB_DEBUG_ON)
-
-F.e. in order to boot just with sanity checks and red zoning one would specify:
-
-	slub_debug=FZ
-
-Trying to find an issue in the dentry cache? Try
-
-	slub_debug=,dentry
-
-to only enable debugging on the dentry cache.
-
-Red zoning and tracking may realign the slab.  We can just apply sanity checks
-to the dentry cache with
-
-	slub_debug=F,dentry
-
-Debugging options may require the minimum possible slab order to increase as
-a result of storing the metadata (for example, caches with PAGE_SIZE object
-sizes).  This has a higher liklihood of resulting in slab allocation errors
-in low memory situations or if there's high fragmentation of memory.  To
-switch off debugging for such caches by default, use
-
-	slub_debug=O
-
-In case you forgot to enable debugging on the kernel command line: It is
-possible to enable debugging manually when the kernel is up. Look at the
-contents of:
-
-/sys/kernel/slab/<slab name>/
-
-Look at the writable files. Writing 1 to them will enable the
-corresponding debug option. All options can be set on a slab that does
-not contain objects. If the slab already contains objects then sanity checks
-and tracing may only be enabled. The other options may cause the realignment
-of objects.
-
-Careful with tracing: It may spew out lots of information and never stop if
-used on the wrong slab.
-
-Slab merging
-------------
-
-If no debug options are specified then SLUB may merge similar slabs together
-in order to reduce overhead and increase cache hotness of objects.
-slabinfo -a displays which slabs were merged together.
-
-Slab validation
----------------
-
-SLUB can validate all object if the kernel was booted with slub_debug. In
-order to do so you must have the slabinfo tool. Then you can do
-
-slabinfo -v
-
-which will test all objects. Output will be generated to the syslog.
-
-This also works in a more limited way if boot was without slab debug.
-In that case slabinfo -v simply tests all reachable objects. Usually
-these are in the cpu slabs and the partial slabs. Full slabs are not
-tracked by SLUB in a non debug situation.
-
-Getting more performance
-------------------------
-
-To some degree SLUB's performance is limited by the need to take the
-list_lock once in a while to deal with partial slabs. That overhead is
-governed by the order of the allocation for each slab. The allocations
-can be influenced by kernel parameters:
-
-slub_min_objects=x		(default 4)
-slub_min_order=x		(default 0)
-slub_max_order=x		(default 3 (PAGE_ALLOC_COSTLY_ORDER))
-
-slub_min_objects allows to specify how many objects must at least fit
-into one slab in order for the allocation order to be acceptable.
-In general slub will be able to perform this number of allocations
-on a slab without consulting centralized resources (list_lock) where
-contention may occur.
-
-slub_min_order specifies a minim order of slabs. A similar effect like
-slub_min_objects.
-
-slub_max_order specified the order at which slub_min_objects should no
-longer be checked. This is useful to avoid SLUB trying to generate
-super large order pages to fit slub_min_objects of a slab cache with
-large object sizes into one high order page. Setting command line
-parameter debug_guardpage_minorder=N (N > 0), forces setting
-slub_max_order to 0, what cause minimum possible order of slabs
-allocation.
-
-SLUB Debug output
------------------
-
-Here is a sample of slub debug output:
-
-====================================================================
-BUG kmalloc-8: Redzone overwritten
---------------------------------------------------------------------
-
-INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
-INFO: Slab 0xc528c530 flags=0x400000c3 inuse=61 fp=0xc90f6d58
-INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
-INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
-
-Bytes b4 0xc90f6d10:  00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
-  Object 0xc90f6d20:  31 30 31 39 2e 30 30 35                         1019.005
- Redzone 0xc90f6d28:  00 cc cc cc                                     .
- Padding 0xc90f6d50:  5a 5a 5a 5a 5a 5a 5a 5a                         ZZZZZZZZ
-
-  [<c010523d>] dump_trace+0x63/0x1eb
-  [<c01053df>] show_trace_log_lvl+0x1a/0x2f
-  [<c010601d>] show_trace+0x12/0x14
-  [<c0106035>] dump_stack+0x16/0x18
-  [<c017e0fa>] object_err+0x143/0x14b
-  [<c017e2cc>] check_object+0x66/0x234
-  [<c017eb43>] __slab_free+0x239/0x384
-  [<c017f446>] kfree+0xa6/0xc6
-  [<c02e2335>] get_modalias+0xb9/0xf5
-  [<c02e23b7>] dmi_dev_uevent+0x27/0x3c
-  [<c027866a>] dev_uevent+0x1ad/0x1da
-  [<c0205024>] kobject_uevent_env+0x20a/0x45b
-  [<c020527f>] kobject_uevent+0xa/0xf
-  [<c02779f1>] store_uevent+0x4f/0x58
-  [<c027758e>] dev_attr_store+0x29/0x2f
-  [<c01bec4f>] sysfs_write_file+0x16e/0x19c
-  [<c0183ba7>] vfs_write+0xd1/0x15a
-  [<c01841d7>] sys_write+0x3d/0x72
-  [<c0104112>] sysenter_past_esp+0x5f/0x99
-  [<b7f7b410>] 0xb7f7b410
-  =======================
-
-FIX kmalloc-8: Restoring Redzone 0xc90f6d28-0xc90f6d2b=0xcc
-
-If SLUB encounters a corrupted object (full detection requires the kernel
-to be booted with slub_debug) then the following output will be dumped
-into the syslog:
-
-1. Description of the problem encountered
-
-This will be a message in the system log starting with
-
-===============================================
-BUG <slab cache affected>: <What went wrong>
------------------------------------------------
-
-INFO: <corruption start>-<corruption_end> <more info>
-INFO: Slab <address> <slab information>
-INFO: Object <address> <object information>
-INFO: Allocated in <kernel function> age=<jiffies since alloc> cpu=<allocated by
-	cpu> pid=<pid of the process>
-INFO: Freed in <kernel function> age=<jiffies since free> cpu=<freed by cpu>
-	 pid=<pid of the process>
-
-(Object allocation / free information is only available if SLAB_STORE_USER is
-set for the slab. slub_debug sets that option)
-
-2. The object contents if an object was involved.
-
-Various types of lines can follow the BUG SLUB line:
-
-Bytes b4 <address> : <bytes>
-	Shows a few bytes before the object where the problem was detected.
-	Can be useful if the corruption does not stop with the start of the
-	object.
-
-Object <address> : <bytes>
-	The bytes of the object. If the object is inactive then the bytes
-	typically contain poison values. Any non-poison value shows a
-	corruption by a write after free.
-
-Redzone <address> : <bytes>
-	The Redzone following the object. The Redzone is used to detect
-	writes after the object. All bytes should always have the same
-	value. If there is any deviation then it is due to a write after
-	the object boundary.
-
-	(Redzone information is only available if SLAB_RED_ZONE is set.
-	slub_debug sets that option)
-
-Padding <address> : <bytes>
-	Unused data to fill up the space in order to get the next object
-	properly aligned. In the debug case we make sure that there are
-	at least 4 bytes of padding. This allows the detection of writes
-	before the object.
-
-3. A stackdump
-
-The stackdump describes the location where the error was detected. The cause
-of the corruption is may be more likely found by looking at the function that
-allocated or freed the object.
-
-4. Report on how the problem was dealt with in order to ensure the continued
-operation of the system.
-
-These are messages in the system log beginning with
-
-FIX <slab cache affected>: <corrective action taken>
-
-In the above sample SLUB found that the Redzone of an active object has
-been overwritten. Here a string of 8 characters was written into a slab that
-has the length of 8 characters. However, a 8 character string needs a
-terminating 0. That zero has overwritten the first byte of the Redzone field.
-After reporting the details of the issue encountered the FIX SLUB message
-tells us that SLUB has restored the Redzone to its proper value and then
-system operations continue.
-
-Emergency operations:
----------------------
-
-Minimal debugging (sanity checks alone) can be enabled by booting with
-
-	slub_debug=F
-
-This will be generally be enough to enable the resiliency features of slub
-which will keep the system running even if a bad kernel component will
-keep corrupting objects. This may be important for production systems.
-Performance will be impacted by the sanity checks and there will be a
-continual stream of error messages to the syslog but no additional memory
-will be used (unlike full debugging).
-
-No guarantees. The kernel component still needs to be fixed. Performance
-may be optimized further by locating the slab that experiences corruption
-and enabling debugging only for that cache
-
-I.e.
-
-	slub_debug=F,dentry
-
-If the corruption occurs by writing after the end of the object then it
-may be advisable to enable a Redzone to avoid corrupting the beginning
-of other objects.
-
-	slub_debug=FZ,dentry
-
-Extended slabinfo mode and plotting
------------------------------------
-
-The slabinfo tool has a special 'extended' ('-X') mode that includes:
- - Slabcache Totals
- - Slabs sorted by size (up to -N <num> slabs, default 1)
- - Slabs sorted by loss (up to -N <num> slabs, default 1)
-
-Additionally, in this mode slabinfo does not dynamically scale sizes (G/M/K)
-and reports everything in bytes (this functionality is also available to
-other slabinfo modes via '-B' option) which makes reporting more precise and
-accurate. Moreover, in some sense the `-X' mode also simplifies the analysis
-of slabs' behaviour, because its output can be plotted using the
-slabinfo-gnuplot.sh script. So it pushes the analysis from looking through
-the numbers (tons of numbers) to something easier -- visual analysis.
-
-To generate plots:
-a) collect slabinfo extended records, for example:
-
-  while [ 1 ]; do slabinfo -X >> FOO_STATS; sleep 1; done
-
-b) pass stats file(-s) to slabinfo-gnuplot.sh script:
-  slabinfo-gnuplot.sh FOO_STATS [FOO_STATS2 .. FOO_STATSN]
-
-The slabinfo-gnuplot.sh script will pre-processes the collected records
-and generates 3 png files (and 3 pre-processing cache files) per STATS
-file:
- - Slabcache Totals: FOO_STATS-totals.png
- - Slabs sorted by size: FOO_STATS-slabs-by-size.png
- - Slabs sorted by loss: FOO_STATS-slabs-by-loss.png
-
-Another use case, when slabinfo-gnuplot can be useful, is when you need
-to compare slabs' behaviour "prior to" and "after" some code modification.
-To help you out there, slabinfo-gnuplot.sh script can 'merge' the
-`Slabcache Totals` sections from different measurements. To visually
-compare N plots:
-
-a) Collect as many STATS1, STATS2, .. STATSN files as you need
-  while [ 1 ]; do slabinfo -X >> STATS<X>; sleep 1; done
-
-b) Pre-process those STATS files
-  slabinfo-gnuplot.sh STATS1 STATS2 .. STATSN
-
-c) Execute slabinfo-gnuplot.sh in '-t' mode, passing all of the
-generated pre-processed *-totals
-  slabinfo-gnuplot.sh -t STATS1-totals STATS2-totals .. STATSN-totals
-
-This will produce a single plot (png file).
-
-Plots, expectedly, can be large so some fluctuations or small spikes
-can go unnoticed. To deal with that, `slabinfo-gnuplot.sh' has two
-options to 'zoom-in'/'zoom-out':
- a) -s %d,%d  overwrites the default image width and heigh
- b) -r %d,%d  specifies a range of samples to use (for example,
-              in `slabinfo -X >> FOO_STATS; sleep 1;' case, using
-              a "-r 40,60" range will plot only samples collected
-              between 40th and 60th seconds).
-
-Christoph Lameter, May 30, 2007
-Sergey Senozhatsky, October 23, 2015
diff --git a/Documentation/vm/soft-dirty.txt b/Documentation/vm/soft-dirty.txt
deleted file mode 100644
index 55684d1..0000000
--- a/Documentation/vm/soft-dirty.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-                            SOFT-DIRTY PTEs
-
-  The soft-dirty is a bit on a PTE which helps to track which pages a task
-writes to. In order to do this tracking one should
-
-  1. Clear soft-dirty bits from the task's PTEs.
-
-     This is done by writing "4" into the /proc/PID/clear_refs file of the
-     task in question.
-
-  2. Wait some time.
-
-  3. Read soft-dirty bits from the PTEs.
-
-     This is done by reading from the /proc/PID/pagemap. The bit 55 of the
-     64-bit qword is the soft-dirty one. If set, the respective PTE was
-     written to since step 1.
-
-
-  Internally, to do this tracking, the writable bit is cleared from PTEs
-when the soft-dirty bit is cleared. So, after this, when the task tries to
-modify a page at some virtual address the #PF occurs and the kernel sets
-the soft-dirty bit on the respective PTE.
-
-  Note, that although all the task's address space is marked as r/o after the
-soft-dirty bits clear, the #PF-s that occur after that are processed fast.
-This is so, since the pages are still mapped to physical memory, and thus all
-the kernel does is finds this fact out and puts both writable and soft-dirty
-bits on the PTE.
-
-  While in most cases tracking memory changes by #PF-s is more than enough
-there is still a scenario when we can lose soft dirty bits -- a task
-unmaps a previously mapped memory region and then maps a new one at exactly
-the same place. When unmap is called, the kernel internally clears PTE values
-including soft dirty bits. To notify user space application about such
-memory region renewal the kernel always marks new memory regions (and
-expanded regions) as soft dirty.
-
-  This feature is actively used by the checkpoint-restore project. You
-can find more details about it on http://criu.org
-
-
--- Pavel Emelyanov, Apr 9, 2013
diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock
deleted file mode 100644
index 62842a8..0000000
--- a/Documentation/vm/split_page_table_lock
+++ /dev/null
@@ -1,94 +0,0 @@
-Split page table lock
-=====================
-
-Originally, mm->page_table_lock spinlock protected all page tables of the
-mm_struct. But this approach leads to poor page fault scalability of
-multi-threaded applications due high contention on the lock. To improve
-scalability, split page table lock was introduced.
-
-With split page table lock we have separate per-table lock to serialize
-access to the table. At the moment we use split lock for PTE and PMD
-tables. Access to higher level tables protected by mm->page_table_lock.
-
-There are helpers to lock/unlock a table and other accessor functions:
- - pte_offset_map_lock()
-	maps pte and takes PTE table lock, returns pointer to the taken
-	lock;
- - pte_unmap_unlock()
-	unlocks and unmaps PTE table;
- - pte_alloc_map_lock()
-	allocates PTE table if needed and take the lock, returns pointer
-	to taken lock or NULL if allocation failed;
- - pte_lockptr()
-	returns pointer to PTE table lock;
- - pmd_lock()
-	takes PMD table lock, returns pointer to taken lock;
- - pmd_lockptr()
-	returns pointer to PMD table lock;
-
-Split page table lock for PTE tables is enabled compile-time if
-CONFIG_SPLIT_PTLOCK_CPUS (usually 4) is less or equal to NR_CPUS.
-If split lock is disabled, all tables guaded by mm->page_table_lock.
-
-Split page table lock for PMD tables is enabled, if it's enabled for PTE
-tables and the architecture supports it (see below).
-
-Hugetlb and split page table lock
----------------------------------
-
-Hugetlb can support several page sizes. We use split lock only for PMD
-level, but not for PUD.
-
-Hugetlb-specific helpers:
- - huge_pte_lock()
-	takes pmd split lock for PMD_SIZE page, mm->page_table_lock
-	otherwise;
- - huge_pte_lockptr()
-	returns pointer to table lock;
-
-Support of split page table lock by an architecture
----------------------------------------------------
-
-There's no need in special enabling of PTE split page table lock:
-everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
-which must be called on PTE table allocation / freeing.
-
-Make sure the architecture doesn't use slab allocator for page table
-allocation: slab uses page->slab_cache for its pages.
-This field shares storage with page->ptl.
-
-PMD split lock only makes sense if you have more than two page table
-levels.
-
-PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table
-allocation and pgtable_pmd_page_dtor() on freeing.
-
-Allocation usually happens in pmd_alloc_one(), freeing in pmd_free() and
-pmd_free_tlb(), but make sure you cover all PMD table allocation / freeing
-paths: i.e X86_PAE preallocate few PMDs on pgd_alloc().
-
-With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK.
-
-NOTE: pgtable_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must
-be handled properly.
-
-page->ptl
----------
-
-page->ptl is used to access split page table lock, where 'page' is struct
-page of page containing the table. It shares storage with page->private
-(and few other fields in union).
-
-To avoid increasing size of struct page and have best performance, we use a
-trick:
- - if spinlock_t fits into long, we use page->ptr as spinlock, so we
-   can avoid indirect access and save a cache line.
- - if size of spinlock_t is bigger then size of long, we use page->ptl as
-   pointer to spinlock_t and allocate it dynamically. This allows to use
-   split lock with enabled DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC, but costs
-   one more cache line for indirect access;
-
-The spinlock_t allocated in pgtable_page_ctor() for PTE table and in
-pgtable_pmd_page_ctor() for PMD table.
-
-Please, never access page->ptl directly -- use appropriate helper.
diff --git a/Documentation/vm/split_page_table_lock.rst b/Documentation/vm/split_page_table_lock.rst
new file mode 100644
index 0000000..889b00b
--- /dev/null
+++ b/Documentation/vm/split_page_table_lock.rst
@@ -0,0 +1,100 @@
+.. _split_page_table_lock:
+
+=====================
+Split page table lock
+=====================
+
+Originally, mm->page_table_lock spinlock protected all page tables of the
+mm_struct. But this approach leads to poor page fault scalability of
+multi-threaded applications due high contention on the lock. To improve
+scalability, split page table lock was introduced.
+
+With split page table lock we have separate per-table lock to serialize
+access to the table. At the moment we use split lock for PTE and PMD
+tables. Access to higher level tables protected by mm->page_table_lock.
+
+There are helpers to lock/unlock a table and other accessor functions:
+
+ - pte_offset_map_lock()
+	maps pte and takes PTE table lock, returns pointer to the taken
+	lock;
+ - pte_unmap_unlock()
+	unlocks and unmaps PTE table;
+ - pte_alloc_map_lock()
+	allocates PTE table if needed and take the lock, returns pointer
+	to taken lock or NULL if allocation failed;
+ - pte_lockptr()
+	returns pointer to PTE table lock;
+ - pmd_lock()
+	takes PMD table lock, returns pointer to taken lock;
+ - pmd_lockptr()
+	returns pointer to PMD table lock;
+
+Split page table lock for PTE tables is enabled compile-time if
+CONFIG_SPLIT_PTLOCK_CPUS (usually 4) is less or equal to NR_CPUS.
+If split lock is disabled, all tables guaded by mm->page_table_lock.
+
+Split page table lock for PMD tables is enabled, if it's enabled for PTE
+tables and the architecture supports it (see below).
+
+Hugetlb and split page table lock
+=================================
+
+Hugetlb can support several page sizes. We use split lock only for PMD
+level, but not for PUD.
+
+Hugetlb-specific helpers:
+
+ - huge_pte_lock()
+	takes pmd split lock for PMD_SIZE page, mm->page_table_lock
+	otherwise;
+ - huge_pte_lockptr()
+	returns pointer to table lock;
+
+Support of split page table lock by an architecture
+===================================================
+
+There's no need in special enabling of PTE split page table lock:
+everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
+which must be called on PTE table allocation / freeing.
+
+Make sure the architecture doesn't use slab allocator for page table
+allocation: slab uses page->slab_cache for its pages.
+This field shares storage with page->ptl.
+
+PMD split lock only makes sense if you have more than two page table
+levels.
+
+PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table
+allocation and pgtable_pmd_page_dtor() on freeing.
+
+Allocation usually happens in pmd_alloc_one(), freeing in pmd_free() and
+pmd_free_tlb(), but make sure you cover all PMD table allocation / freeing
+paths: i.e X86_PAE preallocate few PMDs on pgd_alloc().
+
+With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK.
+
+NOTE: pgtable_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must
+be handled properly.
+
+page->ptl
+=========
+
+page->ptl is used to access split page table lock, where 'page' is struct
+page of page containing the table. It shares storage with page->private
+(and few other fields in union).
+
+To avoid increasing size of struct page and have best performance, we use a
+trick:
+
+ - if spinlock_t fits into long, we use page->ptr as spinlock, so we
+   can avoid indirect access and save a cache line.
+ - if size of spinlock_t is bigger then size of long, we use page->ptl as
+   pointer to spinlock_t and allocate it dynamically. This allows to use
+   split lock with enabled DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC, but costs
+   one more cache line for indirect access;
+
+The spinlock_t allocated in pgtable_page_ctor() for PTE table and in
+pgtable_pmd_page_ctor() for PMD table.
+
+Please, never access page->ptl directly -- use appropriate helper.
diff --git a/Documentation/vm/swap_numa.rst b/Documentation/vm/swap_numa.rst
new file mode 100644
index 0000000..e0466f2
--- /dev/null
+++ b/Documentation/vm/swap_numa.rst
@@ -0,0 +1,80 @@
+.. _swap_numa:
+
+===========================================
+Automatically bind swap device to numa node
+===========================================
+
+If the system has more than one swap device and swap device has the node
+information, we can make use of this information to decide which swap
+device to use in get_swap_pages() to get better performance.
+
+
+How to use this feature
+=======================
+
+Swap device has priority and that decides the order of it to be used. To make
+use of automatically binding, there is no need to manipulate priority settings
+for swap devices. e.g. on a 2 node machine, assume 2 swap devices swapA and
+swapB, with swapA attached to node 0 and swapB attached to node 1, are going
+to be swapped on. Simply swapping them on by doing::
+
+	# swapon /dev/swapA
+	# swapon /dev/swapB
+
+Then node 0 will use the two swap devices in the order of swapA then swapB and
+node 1 will use the two swap devices in the order of swapB then swapA. Note
+that the order of them being swapped on doesn't matter.
+
+A more complex example on a 4 node machine. Assume 6 swap devices are going to
+be swapped on: swapA and swapB are attached to node 0, swapC is attached to
+node 1, swapD and swapE are attached to node 2 and swapF is attached to node3.
+The way to swap them on is the same as above::
+
+	# swapon /dev/swapA
+	# swapon /dev/swapB
+	# swapon /dev/swapC
+	# swapon /dev/swapD
+	# swapon /dev/swapE
+	# swapon /dev/swapF
+
+Then node 0 will use them in the order of::
+
+	swapA/swapB -> swapC -> swapD -> swapE -> swapF
+
+swapA and swapB will be used in a round robin mode before any other swap device.
+
+node 1 will use them in the order of::
+
+	swapC -> swapA -> swapB -> swapD -> swapE -> swapF
+
+node 2 will use them in the order of::
+
+	swapD/swapE -> swapA -> swapB -> swapC -> swapF
+
+Similaly, swapD and swapE will be used in a round robin mode before any
+other swap devices.
+
+node 3 will use them in the order of::
+
+	swapF -> swapA -> swapB -> swapC -> swapD -> swapE
+
+
+Implementation details
+======================
+
+The current code uses a priority based list, swap_avail_list, to decide
+which swap device to use and if multiple swap devices share the same
+priority, they are used round robin. This change here replaces the single
+global swap_avail_list with a per-numa-node list, i.e. for each numa node,
+it sees its own priority based list of available swap devices. Swap
+device's priority can be promoted on its matching node's swap_avail_list.
+
+The current swap device's priority is set as: user can set a >=0 value,
+or the system will pick one starting from -1 then downwards. The priority
+value in the swap_avail_list is the negated value of the swap device's
+due to plist being sorted from low to high. The new policy doesn't change
+the semantics for priority >=0 cases, the previous starting from -1 then
+downwards now becomes starting from -2 then downwards and -1 is reserved
+as the promoted value. So if multiple swap devices are attached to the same
+node, they will all be promoted to priority -1 on that node's plist and will
+be used round robin before any other swap devices.
diff --git a/Documentation/vm/swap_numa.txt b/Documentation/vm/swap_numa.txt
deleted file mode 100644
index d5960c9..0000000
--- a/Documentation/vm/swap_numa.txt
+++ /dev/null
@@ -1,69 +0,0 @@
-Automatically bind swap device to numa node
--------------------------------------------
-
-If the system has more than one swap device and swap device has the node
-information, we can make use of this information to decide which swap
-device to use in get_swap_pages() to get better performance.
-
-
-How to use this feature
------------------------
-
-Swap device has priority and that decides the order of it to be used. To make
-use of automatically binding, there is no need to manipulate priority settings
-for swap devices. e.g. on a 2 node machine, assume 2 swap devices swapA and
-swapB, with swapA attached to node 0 and swapB attached to node 1, are going
-to be swapped on. Simply swapping them on by doing:
-# swapon /dev/swapA
-# swapon /dev/swapB
-
-Then node 0 will use the two swap devices in the order of swapA then swapB and
-node 1 will use the two swap devices in the order of swapB then swapA. Note
-that the order of them being swapped on doesn't matter.
-
-A more complex example on a 4 node machine. Assume 6 swap devices are going to
-be swapped on: swapA and swapB are attached to node 0, swapC is attached to
-node 1, swapD and swapE are attached to node 2 and swapF is attached to node3.
-The way to swap them on is the same as above:
-# swapon /dev/swapA
-# swapon /dev/swapB
-# swapon /dev/swapC
-# swapon /dev/swapD
-# swapon /dev/swapE
-# swapon /dev/swapF
-
-Then node 0 will use them in the order of:
-swapA/swapB -> swapC -> swapD -> swapE -> swapF
-swapA and swapB will be used in a round robin mode before any other swap device.
-
-node 1 will use them in the order of:
-swapC -> swapA -> swapB -> swapD -> swapE -> swapF
-
-node 2 will use them in the order of:
-swapD/swapE -> swapA -> swapB -> swapC -> swapF
-Similaly, swapD and swapE will be used in a round robin mode before any
-other swap devices.
-
-node 3 will use them in the order of:
-swapF -> swapA -> swapB -> swapC -> swapD -> swapE
-
-
-Implementation details
-----------------------
-
-The current code uses a priority based list, swap_avail_list, to decide
-which swap device to use and if multiple swap devices share the same
-priority, they are used round robin. This change here replaces the single
-global swap_avail_list with a per-numa-node list, i.e. for each numa node,
-it sees its own priority based list of available swap devices. Swap
-device's priority can be promoted on its matching node's swap_avail_list.
-
-The current swap device's priority is set as: user can set a >=0 value,
-or the system will pick one starting from -1 then downwards. The priority
-value in the swap_avail_list is the negated value of the swap device's
-due to plist being sorted from low to high. The new policy doesn't change
-the semantics for priority >=0 cases, the previous starting from -1 then
-downwards now becomes starting from -2 then downwards and -1 is reserved
-as the promoted value. So if multiple swap devices are attached to the same
-node, they will all be promoted to priority -1 on that node's plist and will
-be used round robin before any other swap devices.
diff --git a/Documentation/vm/transhuge.rst b/Documentation/vm/transhuge.rst
new file mode 100644
index 0000000..a8cf680
--- /dev/null
+++ b/Documentation/vm/transhuge.rst
@@ -0,0 +1,197 @@
+.. _transhuge:
+
+============================
+Transparent Hugepage Support
+============================
+
+This document describes design principles Transparent Hugepage (THP)
+Support and its interaction with other parts of the memory management.
+
+Design principles
+=================
+
+- "graceful fallback": mm components which don't have transparent hugepage
+  knowledge fall back to breaking huge pmd mapping into table of ptes and,
+  if necessary, split a transparent hugepage. Therefore these components
+  can continue working on the regular pages or regular pte mappings.
+
+- if a hugepage allocation fails because of memory fragmentation,
+  regular pages should be gracefully allocated instead and mixed in
+  the same vma without any failure or significant delay and without
+  userland noticing
+
+- if some task quits and more hugepages become available (either
+  immediately in the buddy or through the VM), guest physical memory
+  backed by regular pages should be relocated on hugepages
+  automatically (with khugepaged)
+
+- it doesn't require memory reservation and in turn it uses hugepages
+  whenever possible (the only possible reservation here is kernelcore=
+  to avoid unmovable pages to fragment all the memory but such a tweak
+  is not specific to transparent hugepage support and it's a generic
+  feature that applies to all dynamic high order allocations in the
+  kernel)
+
+get_user_pages and follow_page
+==============================
+
+get_user_pages and follow_page if run on a hugepage, will return the
+head or tail pages as usual (exactly as they would do on
+hugetlbfs). Most gup users will only care about the actual physical
+address of the page and its temporary pinning to release after the I/O
+is complete, so they won't ever notice the fact the page is huge. But
+if any driver is going to mangle over the page structure of the tail
+page (like for checking page->mapping or other bits that are relevant
+for the head page and not the tail page), it should be updated to jump
+to check head page instead. Taking reference on any head/tail page would
+prevent page from being split by anyone.
+
+.. note::
+   these aren't new constraints to the GUP API, and they match the
+   same constrains that applies to hugetlbfs too, so any driver capable
+   of handling GUP on hugetlbfs will also work fine on transparent
+   hugepage backed mappings.
+
+In case you can't handle compound pages if they're returned by
+follow_page, the FOLL_SPLIT bit can be specified as parameter to
+follow_page, so that it will split the hugepages before returning
+them. Migration for example passes FOLL_SPLIT as parameter to
+follow_page because it's not hugepage aware and in fact it can't work
+at all on hugetlbfs (but it instead works fine on transparent
+hugepages thanks to FOLL_SPLIT). migration simply can't deal with
+hugepages being returned (as it's not only checking the pfn of the
+page and pinning it during the copy but it pretends to migrate the
+memory in regular page sizes and with regular pte/pmd mappings).
+
+Graceful fallback
+=================
+
+Code walking pagetables but unaware about huge pmds can simply call
+split_huge_pmd(vma, pmd, addr) where the pmd is the one returned by
+pmd_offset. It's trivial to make the code transparent hugepage aware
+by just grepping for "pmd_offset" and adding split_huge_pmd where
+missing after pmd_offset returns the pmd. Thanks to the graceful
+fallback design, with a one liner change, you can avoid to write
+hundred if not thousand of lines of complex code to make your code
+hugepage aware.
+
+If you're not walking pagetables but you run into a physical hugepage
+but you can't handle it natively in your code, you can split it by
+calling split_huge_page(page). This is what the Linux VM does before
+it tries to swapout the hugepage for example. split_huge_page() can fail
+if the page is pinned and you must handle this correctly.
+
+Example to make mremap.c transparent hugepage aware with a one liner
+change::
+
+	diff --git a/mm/mremap.c b/mm/mremap.c
+	--- a/mm/mremap.c
+	+++ b/mm/mremap.c
+	@@ -41,6 +41,7 @@ static pmd_t *get_old_pmd(struct mm_stru
+			return NULL;
+
+		pmd = pmd_offset(pud, addr);
+	+	split_huge_pmd(vma, pmd, addr);
+		if (pmd_none_or_clear_bad(pmd))
+			return NULL;
+
+Locking in hugepage aware code
+==============================
+
+We want as much code as possible hugepage aware, as calling
+split_huge_page() or split_huge_pmd() has a cost.
+
+To make pagetable walks huge pmd aware, all you need to do is to call
+pmd_trans_huge() on the pmd returned by pmd_offset. You must hold the
+mmap_sem in read (or write) mode to be sure an huge pmd cannot be
+created from under you by khugepaged (khugepaged collapse_huge_page
+takes the mmap_sem in write mode in addition to the anon_vma lock). If
+pmd_trans_huge returns false, you just fallback in the old code
+paths. If instead pmd_trans_huge returns true, you have to take the
+page table lock (pmd_lock()) and re-run pmd_trans_huge. Taking the
+page table lock will prevent the huge pmd to be converted into a
+regular pmd from under you (split_huge_pmd can run in parallel to the
+pagetable walk). If the second pmd_trans_huge returns false, you
+should just drop the page table lock and fallback to the old code as
+before. Otherwise you can proceed to process the huge pmd and the
+hugepage natively. Once finished you can drop the page table lock.
+
+Refcounts and transparent huge pages
+====================================
+
+Refcounting on THP is mostly consistent with refcounting on other compound
+pages:
+
+  - get_page()/put_page() and GUP operate in head page's ->_refcount.
+
+  - ->_refcount in tail pages is always zero: get_page_unless_zero() never
+    succeed on tail pages.
+
+  - map/unmap of the pages with PTE entry increment/decrement ->_mapcount
+    on relevant sub-page of the compound page.
+
+  - map/unmap of the whole compound page accounted in compound_mapcount
+    (stored in first tail page). For file huge pages, we also increment
+    ->_mapcount of all sub-pages in order to have race-free detection of
+    last unmap of subpages.
+
+PageDoubleMap() indicates that the page is *possibly* mapped with PTEs.
+
+For anonymous pages PageDoubleMap() also indicates ->_mapcount in all
+subpages is offset up by one. This additional reference is required to
+get race-free detection of unmap of subpages when we have them mapped with
+both PMDs and PTEs.
+
+This is optimization required to lower overhead of per-subpage mapcount
+tracking. The alternative is alter ->_mapcount in all subpages on each
+map/unmap of the whole compound page.
+
+For anonymous pages, we set PG_double_map when a PMD of the page got split
+for the first time, but still have PMD mapping. The additional references
+go away with last compound_mapcount.
+
+File pages get PG_double_map set on first map of the page with PTE and
+goes away when the page gets evicted from page cache.
+
+split_huge_page internally has to distribute the refcounts in the head
+page to the tail pages before clearing all PG_head/tail bits from the page
+structures. It can be done easily for refcounts taken by page table
+entries. But we don't have enough information on how to distribute any
+additional pins (i.e. from get_user_pages). split_huge_page() fails any
+requests to split pinned huge page: it expects page count to be equal to
+sum of mapcount of all sub-pages plus one (split_huge_page caller must
+have reference for head page).
+
+split_huge_page uses migration entries to stabilize page->_refcount and
+page->_mapcount of anonymous pages. File pages just got unmapped.
+
+We safe against physical memory scanners too: the only legitimate way
+scanner can get reference to a page is get_page_unless_zero().
+
+All tail pages have zero ->_refcount until atomic_add(). This prevents the
+scanner from getting a reference to the tail page up to that point. After the
+atomic_add() we don't care about the ->_refcount value. We already known how
+many references should be uncharged from the head page.
+
+For head page get_page_unless_zero() will succeed and we don't mind. It's
+clear where reference should go after split: it will stay on head page.
+
+Note that split_huge_pmd() doesn't have any limitation on refcounting:
+pmd can be split at any point and never fails.
+
+Partial unmap and deferred_split_huge_page()
+============================================
+
+Unmapping part of THP (with munmap() or other way) is not going to free
+memory immediately. Instead, we detect that a subpage of THP is not in use
+in page_remove_rmap() and queue the THP for splitting if memory pressure
+comes. Splitting will free up unused subpages.
+
+Splitting the page right away is not an option due to locking context in
+the place where we can detect partial unmap. It's also might be
+counterproductive since in many cases partial unmap happens during exit(2) if
+a THP crosses a VMA boundary.
+
+Function deferred_split_huge_page() is used to queue page for splitting.
+The splitting itself will happen when we get memory pressure via shrinker
+interface.
diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
deleted file mode 100644
index 4dde03b..0000000
--- a/Documentation/vm/transhuge.txt
+++ /dev/null
@@ -1,527 +0,0 @@
-= Transparent Hugepage Support =
-
-== Objective ==
-
-Performance critical computing applications dealing with large memory
-working sets are already running on top of libhugetlbfs and in turn
-hugetlbfs. Transparent Hugepage Support is an alternative means of
-using huge pages for the backing of virtual memory with huge pages
-that supports the automatic promotion and demotion of page sizes and
-without the shortcomings of hugetlbfs.
-
-Currently it only works for anonymous memory mappings and tmpfs/shmem.
-But in the future it can expand to other filesystems.
-
-The reason applications are running faster is because of two
-factors. The first factor is almost completely irrelevant and it's not
-of significant interest because it'll also have the downside of
-requiring larger clear-page copy-page in page faults which is a
-potentially negative effect. The first factor consists in taking a
-single page fault for each 2M virtual region touched by userland (so
-reducing the enter/exit kernel frequency by a 512 times factor). This
-only matters the first time the memory is accessed for the lifetime of
-a memory mapping. The second long lasting and much more important
-factor will affect all subsequent accesses to the memory for the whole
-runtime of the application. The second factor consist of two
-components: 1) the TLB miss will run faster (especially with
-virtualization using nested pagetables but almost always also on bare
-metal without virtualization) and 2) a single TLB entry will be
-mapping a much larger amount of virtual memory in turn reducing the
-number of TLB misses. With virtualization and nested pagetables the
-TLB can be mapped of larger size only if both KVM and the Linux guest
-are using hugepages but a significant speedup already happens if only
-one of the two is using hugepages just because of the fact the TLB
-miss is going to run faster.
-
-== Design ==
-
-- "graceful fallback": mm components which don't have transparent hugepage
-  knowledge fall back to breaking huge pmd mapping into table of ptes and,
-  if necessary, split a transparent hugepage. Therefore these components
-  can continue working on the regular pages or regular pte mappings.
-
-- if a hugepage allocation fails because of memory fragmentation,
-  regular pages should be gracefully allocated instead and mixed in
-  the same vma without any failure or significant delay and without
-  userland noticing
-
-- if some task quits and more hugepages become available (either
-  immediately in the buddy or through the VM), guest physical memory
-  backed by regular pages should be relocated on hugepages
-  automatically (with khugepaged)
-
-- it doesn't require memory reservation and in turn it uses hugepages
-  whenever possible (the only possible reservation here is kernelcore=
-  to avoid unmovable pages to fragment all the memory but such a tweak
-  is not specific to transparent hugepage support and it's a generic
-  feature that applies to all dynamic high order allocations in the
-  kernel)
-
-Transparent Hugepage Support maximizes the usefulness of free memory
-if compared to the reservation approach of hugetlbfs by allowing all
-unused memory to be used as cache or other movable (or even unmovable
-entities). It doesn't require reservation to prevent hugepage
-allocation failures to be noticeable from userland. It allows paging
-and all other advanced VM features to be available on the
-hugepages. It requires no modifications for applications to take
-advantage of it.
-
-Applications however can be further optimized to take advantage of
-this feature, like for example they've been optimized before to avoid
-a flood of mmap system calls for every malloc(4k). Optimizing userland
-is by far not mandatory and khugepaged already can take care of long
-lived page allocations even for hugepage unaware applications that
-deals with large amounts of memory.
-
-In certain cases when hugepages are enabled system wide, application
-may end up allocating more memory resources. An application may mmap a
-large region but only touch 1 byte of it, in that case a 2M page might
-be allocated instead of a 4k page for no good. This is why it's
-possible to disable hugepages system-wide and to only have them inside
-MADV_HUGEPAGE madvise regions.
-
-Embedded systems should enable hugepages only inside madvise regions
-to eliminate any risk of wasting any precious byte of memory and to
-only run faster.
-
-Applications that gets a lot of benefit from hugepages and that don't
-risk to lose memory by using hugepages, should use
-madvise(MADV_HUGEPAGE) on their critical mmapped regions.
-
-== sysfs ==
-
-Transparent Hugepage Support for anonymous memory can be entirely disabled
-(mostly for debugging purposes) or only enabled inside MADV_HUGEPAGE
-regions (to avoid the risk of consuming more memory resources) or enabled
-system wide. This can be achieved with one of:
-
-echo always >/sys/kernel/mm/transparent_hugepage/enabled
-echo madvise >/sys/kernel/mm/transparent_hugepage/enabled
-echo never >/sys/kernel/mm/transparent_hugepage/enabled
-
-It's also possible to limit defrag efforts in the VM to generate
-anonymous hugepages in case they're not immediately free to madvise
-regions or to never try to defrag memory and simply fallback to regular
-pages unless hugepages are immediately available. Clearly if we spend CPU
-time to defrag memory, we would expect to gain even more by the fact we
-use hugepages later instead of regular pages. This isn't always
-guaranteed, but it may be more likely in case the allocation is for a
-MADV_HUGEPAGE region.
-
-echo always >/sys/kernel/mm/transparent_hugepage/defrag
-echo defer >/sys/kernel/mm/transparent_hugepage/defrag
-echo defer+madvise >/sys/kernel/mm/transparent_hugepage/defrag
-echo madvise >/sys/kernel/mm/transparent_hugepage/defrag
-echo never >/sys/kernel/mm/transparent_hugepage/defrag
-
-"always" means that an application requesting THP will stall on allocation
-failure and directly reclaim pages and compact memory in an effort to
-allocate a THP immediately. This may be desirable for virtual machines
-that benefit heavily from THP use and are willing to delay the VM start
-to utilise them.
-
-"defer" means that an application will wake kswapd in the background
-to reclaim pages and wake kcompactd to compact memory so that THP is
-available in the near future. It's the responsibility of khugepaged
-to then install the THP pages later.
-
-"defer+madvise" will enter direct reclaim and compaction like "always", but
-only for regions that have used madvise(MADV_HUGEPAGE); all other regions
-will wake kswapd in the background to reclaim pages and wake kcompactd to
-compact memory so that THP is available in the near future.
-
-"madvise" will enter direct reclaim like "always" but only for regions
-that are have used madvise(MADV_HUGEPAGE). This is the default behaviour.
-
-"never" should be self-explanatory.
-
-By default kernel tries to use huge zero page on read page fault to
-anonymous mapping. It's possible to disable huge zero page by writing 0
-or enable it back by writing 1:
-
-echo 0 >/sys/kernel/mm/transparent_hugepage/use_zero_page
-echo 1 >/sys/kernel/mm/transparent_hugepage/use_zero_page
-
-Some userspace (such as a test program, or an optimized memory allocation
-library) may want to know the size (in bytes) of a transparent hugepage:
-
-cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
-
-khugepaged will be automatically started when
-transparent_hugepage/enabled is set to "always" or "madvise, and it'll
-be automatically shutdown if it's set to "never".
-
-khugepaged runs usually at low frequency so while one may not want to
-invoke defrag algorithms synchronously during the page faults, it
-should be worth invoking defrag at least in khugepaged. However it's
-also possible to disable defrag in khugepaged by writing 0 or enable
-defrag in khugepaged by writing 1:
-
-echo 0 >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag
-echo 1 >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag
-
-You can also control how many pages khugepaged should scan at each
-pass:
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/pages_to_scan
-
-and how many milliseconds to wait in khugepaged between each pass (you
-can set this to 0 to run khugepaged at 100% utilization of one core):
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/scan_sleep_millisecs
-
-and how many milliseconds to wait in khugepaged if there's an hugepage
-allocation failure to throttle the next allocation attempt.
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/alloc_sleep_millisecs
-
-The khugepaged progress can be seen in the number of pages collapsed:
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/pages_collapsed
-
-for each pass:
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/full_scans
-
-max_ptes_none specifies how many extra small pages (that are
-not already mapped) can be allocated when collapsing a group
-of small pages into one large page.
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none
-
-A higher value leads to use additional memory for programs.
-A lower value leads to gain less thp performance. Value of
-max_ptes_none can waste cpu time very little, you can
-ignore it.
-
-max_ptes_swap specifies how many pages can be brought in from
-swap when collapsing a group of pages into a transparent huge page.
-
-/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_swap
-
-A higher value can cause excessive swap IO and waste
-memory. A lower value can prevent THPs from being
-collapsed, resulting fewer pages being collapsed into
-THPs, and lower memory access performance.
-
-== Boot parameter ==
-
-You can change the sysfs boot time defaults of Transparent Hugepage
-Support by passing the parameter "transparent_hugepage=always" or
-"transparent_hugepage=madvise" or "transparent_hugepage=never"
-(without "") to the kernel command line.
-
-== Hugepages in tmpfs/shmem ==
-
-You can control hugepage allocation policy in tmpfs with mount option
-"huge=". It can have following values:
-
-  - "always":
-    Attempt to allocate huge pages every time we need a new page;
-
-  - "never":
-    Do not allocate huge pages;
-
-  - "within_size":
-    Only allocate huge page if it will be fully within i_size.
-    Also respect fadvise()/madvise() hints;
-
-  - "advise:
-    Only allocate huge pages if requested with fadvise()/madvise();
-
-The default policy is "never".
-
-"mount -o remount,huge= /mountpoint" works fine after mount: remounting
-huge=never will not attempt to break up huge pages at all, just stop more
-from being allocated.
-
-There's also sysfs knob to control hugepage allocation policy for internal
-shmem mount: /sys/kernel/mm/transparent_hugepage/shmem_enabled. The mount
-is used for SysV SHM, memfds, shared anonymous mmaps (of /dev/zero or
-MAP_ANONYMOUS), GPU drivers' DRM objects, Ashmem.
-
-In addition to policies listed above, shmem_enabled allows two further
-values:
-
-  - "deny":
-    For use in emergencies, to force the huge option off from
-    all mounts;
-  - "force":
-    Force the huge option on for all - very useful for testing;
-
-== Need of application restart ==
-
-The transparent_hugepage/enabled values and tmpfs mount option only affect
-future behavior. So to make them effective you need to restart any
-application that could have been using hugepages. This also applies to the
-regions registered in khugepaged.
-
-== Monitoring usage ==
-
-The number of anonymous transparent huge pages currently used by the
-system is available by reading the AnonHugePages field in /proc/meminfo.
-To identify what applications are using anonymous transparent huge pages,
-it is necessary to read /proc/PID/smaps and count the AnonHugePages fields
-for each mapping.
-
-The number of file transparent huge pages mapped to userspace is available
-by reading ShmemPmdMapped and ShmemHugePages fields in /proc/meminfo.
-To identify what applications are mapping file transparent huge pages, it
-is necessary to read /proc/PID/smaps and count the FileHugeMapped fields
-for each mapping.
-
-Note that reading the smaps file is expensive and reading it
-frequently will incur overhead.
-
-There are a number of counters in /proc/vmstat that may be used to
-monitor how successfully the system is providing huge pages for use.
-
-thp_fault_alloc is incremented every time a huge page is successfully
-	allocated to handle a page fault. This applies to both the
-	first time a page is faulted and for COW faults.
-
-thp_collapse_alloc is incremented by khugepaged when it has found
-	a range of pages to collapse into one huge page and has
-	successfully allocated a new huge page to store the data.
-
-thp_fault_fallback is incremented if a page fault fails to allocate
-	a huge page and instead falls back to using small pages.
-
-thp_collapse_alloc_failed is incremented if khugepaged found a range
-	of pages that should be collapsed into one huge page but failed
-	the allocation.
-
-thp_file_alloc is incremented every time a file huge page is successfully
-	allocated.
-
-thp_file_mapped is incremented every time a file huge page is mapped into
-	user address space.
-
-thp_split_page is incremented every time a huge page is split into base
-	pages. This can happen for a variety of reasons but a common
-	reason is that a huge page is old and is being reclaimed.
-	This action implies splitting all PMD the page mapped with.
-
-thp_split_page_failed is incremented if kernel fails to split huge
-	page. This can happen if the page was pinned by somebody.
-
-thp_deferred_split_page is incremented when a huge page is put onto split
-	queue. This happens when a huge page is partially unmapped and
-	splitting it would free up some memory. Pages on split queue are
-	going to be split under memory pressure.
-
-thp_split_pmd is incremented every time a PMD split into table of PTEs.
-	This can happen, for instance, when application calls mprotect() or
-	munmap() on part of huge page. It doesn't split huge page, only
-	page table entry.
-
-thp_zero_page_alloc is incremented every time a huge zero page is
-	successfully allocated. It includes allocations which where
-	dropped due race with other allocation. Note, it doesn't count
-	every map of the huge zero page, only its allocation.
-
-thp_zero_page_alloc_failed is incremented if kernel fails to allocate
-	huge zero page and falls back to using small pages.
-
-As the system ages, allocating huge pages may be expensive as the
-system uses memory compaction to copy data around memory to free a
-huge page for use. There are some counters in /proc/vmstat to help
-monitor this overhead.
-
-compact_stall is incremented every time a process stalls to run
-	memory compaction so that a huge page is free for use.
-
-compact_success is incremented if the system compacted memory and
-	freed a huge page for use.
-
-compact_fail is incremented if the system tries to compact memory
-	but failed.
-
-compact_pages_moved is incremented each time a page is moved. If
-	this value is increasing rapidly, it implies that the system
-	is copying a lot of data to satisfy the huge page allocation.
-	It is possible that the cost of copying exceeds any savings
-	from reduced TLB misses.
-
-compact_pagemigrate_failed is incremented when the underlying mechanism
-	for moving a page failed.
-
-compact_blocks_moved is incremented each time memory compaction examines
-	a huge page aligned range of pages.
-
-It is possible to establish how long the stalls were using the function
-tracer to record how long was spent in __alloc_pages_nodemask and
-using the mm_page_alloc tracepoint to identify which allocations were
-for huge pages.
-
-== get_user_pages and follow_page ==
-
-get_user_pages and follow_page if run on a hugepage, will return the
-head or tail pages as usual (exactly as they would do on
-hugetlbfs). Most gup users will only care about the actual physical
-address of the page and its temporary pinning to release after the I/O
-is complete, so they won't ever notice the fact the page is huge. But
-if any driver is going to mangle over the page structure of the tail
-page (like for checking page->mapping or other bits that are relevant
-for the head page and not the tail page), it should be updated to jump
-to check head page instead. Taking reference on any head/tail page would
-prevent page from being split by anyone.
-
-NOTE: these aren't new constraints to the GUP API, and they match the
-same constrains that applies to hugetlbfs too, so any driver capable
-of handling GUP on hugetlbfs will also work fine on transparent
-hugepage backed mappings.
-
-In case you can't handle compound pages if they're returned by
-follow_page, the FOLL_SPLIT bit can be specified as parameter to
-follow_page, so that it will split the hugepages before returning
-them. Migration for example passes FOLL_SPLIT as parameter to
-follow_page because it's not hugepage aware and in fact it can't work
-at all on hugetlbfs (but it instead works fine on transparent
-hugepages thanks to FOLL_SPLIT). migration simply can't deal with
-hugepages being returned (as it's not only checking the pfn of the
-page and pinning it during the copy but it pretends to migrate the
-memory in regular page sizes and with regular pte/pmd mappings).
-
-== Optimizing the applications ==
-
-To be guaranteed that the kernel will map a 2M page immediately in any
-memory region, the mmap region has to be hugepage naturally
-aligned. posix_memalign() can provide that guarantee.
-
-== Hugetlbfs ==
-
-You can use hugetlbfs on a kernel that has transparent hugepage
-support enabled just fine as always. No difference can be noted in
-hugetlbfs other than there will be less overall fragmentation. All
-usual features belonging to hugetlbfs are preserved and
-unaffected. libhugetlbfs will also work fine as usual.
-
-== Graceful fallback ==
-
-Code walking pagetables but unaware about huge pmds can simply call
-split_huge_pmd(vma, pmd, addr) where the pmd is the one returned by
-pmd_offset. It's trivial to make the code transparent hugepage aware
-by just grepping for "pmd_offset" and adding split_huge_pmd where
-missing after pmd_offset returns the pmd. Thanks to the graceful
-fallback design, with a one liner change, you can avoid to write
-hundred if not thousand of lines of complex code to make your code
-hugepage aware.
-
-If you're not walking pagetables but you run into a physical hugepage
-but you can't handle it natively in your code, you can split it by
-calling split_huge_page(page). This is what the Linux VM does before
-it tries to swapout the hugepage for example. split_huge_page() can fail
-if the page is pinned and you must handle this correctly.
-
-Example to make mremap.c transparent hugepage aware with a one liner
-change:
-
-diff --git a/mm/mremap.c b/mm/mremap.c
---- a/mm/mremap.c
-+++ b/mm/mremap.c
-@@ -41,6 +41,7 @@ static pmd_t *get_old_pmd(struct mm_stru
-		return NULL;
-
-	pmd = pmd_offset(pud, addr);
-+	split_huge_pmd(vma, pmd, addr);
-	if (pmd_none_or_clear_bad(pmd))
-		return NULL;
-
-== Locking in hugepage aware code ==
-
-We want as much code as possible hugepage aware, as calling
-split_huge_page() or split_huge_pmd() has a cost.
-
-To make pagetable walks huge pmd aware, all you need to do is to call
-pmd_trans_huge() on the pmd returned by pmd_offset. You must hold the
-mmap_sem in read (or write) mode to be sure an huge pmd cannot be
-created from under you by khugepaged (khugepaged collapse_huge_page
-takes the mmap_sem in write mode in addition to the anon_vma lock). If
-pmd_trans_huge returns false, you just fallback in the old code
-paths. If instead pmd_trans_huge returns true, you have to take the
-page table lock (pmd_lock()) and re-run pmd_trans_huge. Taking the
-page table lock will prevent the huge pmd to be converted into a
-regular pmd from under you (split_huge_pmd can run in parallel to the
-pagetable walk). If the second pmd_trans_huge returns false, you
-should just drop the page table lock and fallback to the old code as
-before. Otherwise you can proceed to process the huge pmd and the
-hugepage natively. Once finished you can drop the page table lock.
-
-== Refcounts and transparent huge pages ==
-
-Refcounting on THP is mostly consistent with refcounting on other compound
-pages:
-
-  - get_page()/put_page() and GUP operate in head page's ->_refcount.
-
-  - ->_refcount in tail pages is always zero: get_page_unless_zero() never
-    succeed on tail pages.
-
-  - map/unmap of the pages with PTE entry increment/decrement ->_mapcount
-    on relevant sub-page of the compound page.
-
-  - map/unmap of the whole compound page accounted in compound_mapcount
-    (stored in first tail page). For file huge pages, we also increment
-    ->_mapcount of all sub-pages in order to have race-free detection of
-    last unmap of subpages.
-
-PageDoubleMap() indicates that the page is *possibly* mapped with PTEs.
-
-For anonymous pages PageDoubleMap() also indicates ->_mapcount in all
-subpages is offset up by one. This additional reference is required to
-get race-free detection of unmap of subpages when we have them mapped with
-both PMDs and PTEs.
-
-This is optimization required to lower overhead of per-subpage mapcount
-tracking. The alternative is alter ->_mapcount in all subpages on each
-map/unmap of the whole compound page.
-
-For anonymous pages, we set PG_double_map when a PMD of the page got split
-for the first time, but still have PMD mapping. The additional references
-go away with last compound_mapcount.
-
-File pages get PG_double_map set on first map of the page with PTE and
-goes away when the page gets evicted from page cache.
-
-split_huge_page internally has to distribute the refcounts in the head
-page to the tail pages before clearing all PG_head/tail bits from the page
-structures. It can be done easily for refcounts taken by page table
-entries. But we don't have enough information on how to distribute any
-additional pins (i.e. from get_user_pages). split_huge_page() fails any
-requests to split pinned huge page: it expects page count to be equal to
-sum of mapcount of all sub-pages plus one (split_huge_page caller must
-have reference for head page).
-
-split_huge_page uses migration entries to stabilize page->_refcount and
-page->_mapcount of anonymous pages. File pages just got unmapped.
-
-We safe against physical memory scanners too: the only legitimate way
-scanner can get reference to a page is get_page_unless_zero().
-
-All tail pages have zero ->_refcount until atomic_add(). This prevents the
-scanner from getting a reference to the tail page up to that point. After the
-atomic_add() we don't care about the ->_refcount value. We already known how
-many references should be uncharged from the head page.
-
-For head page get_page_unless_zero() will succeed and we don't mind. It's
-clear where reference should go after split: it will stay on head page.
-
-Note that split_huge_pmd() doesn't have any limitation on refcounting:
-pmd can be split at any point and never fails.
-
-== Partial unmap and deferred_split_huge_page() ==
-
-Unmapping part of THP (with munmap() or other way) is not going to free
-memory immediately. Instead, we detect that a subpage of THP is not in use
-in page_remove_rmap() and queue the THP for splitting if memory pressure
-comes. Splitting will free up unused subpages.
-
-Splitting the page right away is not an option due to locking context in
-the place where we can detect partial unmap. It's also might be
-counterproductive since in many cases partial unmap happens during exit(2) if
-a THP crosses a VMA boundary.
-
-Function deferred_split_huge_page() is used to queue page for splitting.
-The splitting itself will happen when we get memory pressure via shrinker
-interface.
diff --git a/Documentation/vm/unevictable-lru.rst b/Documentation/vm/unevictable-lru.rst
new file mode 100644
index 0000000..fdd84cb
--- /dev/null
+++ b/Documentation/vm/unevictable-lru.rst
@@ -0,0 +1,614 @@
+.. _unevictable_lru:
+
+==============================
+Unevictable LRU Infrastructure
+==============================
+
+.. contents:: :local:
+
+
+Introduction
+============
+
+This document describes the Linux memory manager's "Unevictable LRU"
+infrastructure and the use of this to manage several types of "unevictable"
+pages.
+
+The document attempts to provide the overall rationale behind this mechanism
+and the rationale for some of the design decisions that drove the
+implementation.  The latter design rationale is discussed in the context of an
+implementation description.  Admittedly, one can obtain the implementation
+details - the "what does it do?" - by reading the code.  One hopes that the
+descriptions below add value by provide the answer to "why does it do that?".
+
+
+
+The Unevictable LRU
+===================
+
+The Unevictable LRU facility adds an additional LRU list to track unevictable
+pages and to hide these pages from vmscan.  This mechanism is based on a patch
+by Larry Woodman of Red Hat to address several scalability problems with page
+reclaim in Linux.  The problems have been observed at customer sites on large
+memory x86_64 systems.
+
+To illustrate this with an example, a non-NUMA x86_64 platform with 128GB of
+main memory will have over 32 million 4k pages in a single zone.  When a large
+fraction of these pages are not evictable for any reason [see below], vmscan
+will spend a lot of time scanning the LRU lists looking for the small fraction
+of pages that are evictable.  This can result in a situation where all CPUs are
+spending 100% of their time in vmscan for hours or days on end, with the system
+completely unresponsive.
+
+The unevictable list addresses the following classes of unevictable pages:
+
+ * Those owned by ramfs.
+
+ * Those mapped into SHM_LOCK'd shared memory regions.
+
+ * Those mapped into VM_LOCKED [mlock()ed] VMAs.
+
+The infrastructure may also be able to handle other conditions that make pages
+unevictable, either by definition or by circumstance, in the future.
+
+
+The Unevictable Page List
+-------------------------
+
+The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
+called the "unevictable" list and an associated page flag, PG_unevictable, to
+indicate that the page is being managed on the unevictable list.
+
+The PG_unevictable flag is analogous to, and mutually exclusive with, the
+PG_active flag in that it indicates on which LRU list a page resides when
+PG_lru is set.
+
+The Unevictable LRU infrastructure maintains unevictable pages on an additional
+LRU list for a few reasons:
+
+ (1) We get to "treat unevictable pages just like we treat other pages in the
+     system - which means we get to use the same code to manipulate them, the
+     same code to isolate them (for migrate, etc.), the same code to keep track
+     of the statistics, etc..." [Rik van Riel]
+
+ (2) We want to be able to migrate unevictable pages between nodes for memory
+     defragmentation, workload management and memory hotplug.  The linux kernel
+     can only migrate pages that it can successfully isolate from the LRU
+     lists.  If we were to maintain pages elsewhere than on an LRU-like list,
+     where they can be found by isolate_lru_page(), we would prevent their
+     migration, unless we reworked migration code to find the unevictable pages
+     itself.
+
+
+The unevictable list does not differentiate between file-backed and anonymous,
+swap-backed pages.  This differentiation is only important while the pages are,
+in fact, evictable.
+
+The unevictable list benefits from the "arrayification" of the per-zone LRU
+lists and statistics originally proposed and posted by Christoph Lameter.
+
+The unevictable list does not use the LRU pagevec mechanism. Rather,
+unevictable pages are placed directly on the page's zone's unevictable list
+under the zone lru_lock.  This allows us to prevent the stranding of pages on
+the unevictable list when one task has the page isolated from the LRU and other
+tasks are changing the "evictability" state of the page.
+
+
+Memory Control Group Interaction
+--------------------------------
+
+The unevictable LRU facility interacts with the memory control group [aka
+memory controller; see Documentation/cgroup-v1/memory.txt] by extending the
+lru_list enum.
+
+The memory controller data structure automatically gets a per-zone unevictable
+list as a result of the "arrayification" of the per-zone LRU lists (one per
+lru_list enum element).  The memory controller tracks the movement of pages to
+and from the unevictable list.
+
+When a memory control group comes under memory pressure, the controller will
+not attempt to reclaim pages on the unevictable list.  This has a couple of
+effects:
+
+ (1) Because the pages are "hidden" from reclaim on the unevictable list, the
+     reclaim process can be more efficient, dealing only with pages that have a
+     chance of being reclaimed.
+
+ (2) On the other hand, if too many of the pages charged to the control group
+     are unevictable, the evictable portion of the working set of the tasks in
+     the control group may not fit into the available memory.  This can cause
+     the control group to thrash or to OOM-kill tasks.
+
+
+.. _mark_addr_space_unevict:
+
+Marking Address Spaces Unevictable
+----------------------------------
+
+For facilities such as ramfs none of the pages attached to the address space
+may be evicted.  To prevent eviction of any such pages, the AS_UNEVICTABLE
+address space flag is provided, and this can be manipulated by a filesystem
+using a number of wrapper functions:
+
+ * ``void mapping_set_unevictable(struct address_space *mapping);``
+
+	Mark the address space as being completely unevictable.
+
+ * ``void mapping_clear_unevictable(struct address_space *mapping);``
+
+	Mark the address space as being evictable.
+
+ * ``int mapping_unevictable(struct address_space *mapping);``
+
+	Query the address space, and return true if it is completely
+	unevictable.
+
+These are currently used in two places in the kernel:
+
+ (1) By ramfs to mark the address spaces of its inodes when they are created,
+     and this mark remains for the life of the inode.
+
+ (2) By SYSV SHM to mark SHM_LOCK'd address spaces until SHM_UNLOCK is called.
+
+     Note that SHM_LOCK is not required to page in the locked pages if they're
+     swapped out; the application must touch the pages manually if it wants to
+     ensure they're in memory.
+
+
+Detecting Unevictable Pages
+---------------------------
+
+The function page_evictable() in vmscan.c determines whether a page is
+evictable or not using the query function outlined above [see section
+:ref:`Marking address spaces unevictable <mark_addr_space_unevict>`]
+to check the AS_UNEVICTABLE flag.
+
+For address spaces that are so marked after being populated (as SHM regions
+might be), the lock action (eg: SHM_LOCK) can be lazy, and need not populate
+the page tables for the region as does, for example, mlock(), nor need it make
+any special effort to push any pages in the SHM_LOCK'd area to the unevictable
+list.  Instead, vmscan will do this if and when it encounters the pages during
+a reclamation scan.
+
+On an unlock action (such as SHM_UNLOCK), the unlocker (eg: shmctl()) must scan
+the pages in the region and "rescue" them from the unevictable list if no other
+condition is keeping them unevictable.  If an unevictable region is destroyed,
+the pages are also "rescued" from the unevictable list in the process of
+freeing them.
+
+page_evictable() also checks for mlocked pages by testing an additional page
+flag, PG_mlocked (as wrapped by PageMlocked()), which is set when a page is
+faulted into a VM_LOCKED vma, or found in a vma being VM_LOCKED.
+
+
+Vmscan's Handling of Unevictable Pages
+--------------------------------------
+
+If unevictable pages are culled in the fault path, or moved to the unevictable
+list at mlock() or mmap() time, vmscan will not encounter the pages until they
+have become evictable again (via munlock() for example) and have been "rescued"
+from the unevictable list.  However, there may be situations where we decide,
+for the sake of expediency, to leave a unevictable page on one of the regular
+active/inactive LRU lists for vmscan to deal with.  vmscan checks for such
+pages in all of the shrink_{active|inactive|page}_list() functions and will
+"cull" such pages that it encounters: that is, it diverts those pages to the
+unevictable list for the zone being scanned.
+
+There may be situations where a page is mapped into a VM_LOCKED VMA, but the
+page is not marked as PG_mlocked.  Such pages will make it all the way to
+shrink_page_list() where they will be detected when vmscan walks the reverse
+map in try_to_unmap().  If try_to_unmap() returns SWAP_MLOCK,
+shrink_page_list() will cull the page at that point.
+
+To "cull" an unevictable page, vmscan simply puts the page back on the LRU list
+using putback_lru_page() - the inverse operation to isolate_lru_page() - after
+dropping the page lock.  Because the condition which makes the page unevictable
+may change once the page is unlocked, putback_lru_page() will recheck the
+unevictable state of a page that it places on the unevictable list.  If the
+page has become unevictable, putback_lru_page() removes it from the list and
+retries, including the page_unevictable() test.  Because such a race is a rare
+event and movement of pages onto the unevictable list should be rare, these
+extra evictabilty checks should not occur in the majority of calls to
+putback_lru_page().
+
+
+MLOCKED Pages
+=============
+
+The unevictable page list is also useful for mlock(), in addition to ramfs and
+SYSV SHM.  Note that mlock() is only available in CONFIG_MMU=y situations; in
+NOMMU situations, all mappings are effectively mlocked.
+
+
+History
+-------
+
+The "Unevictable mlocked Pages" infrastructure is based on work originally
+posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
+Nick posted his patch as an alternative to a patch posted by Christoph Lameter
+to achieve the same objective: hiding mlocked pages from vmscan.
+
+In Nick's patch, he used one of the struct page LRU list link fields as a count
+of VM_LOCKED VMAs that map the page.  This use of the link field for a count
+prevented the management of the pages on an LRU list, and thus mlocked pages
+were not migratable as isolate_lru_page() could not find them, and the LRU list
+link field was not available to the migration subsystem.
+
+Nick resolved this by putting mlocked pages back on the lru list before
+attempting to isolate them, thus abandoning the count of VM_LOCKED VMAs.  When
+Nick's patch was integrated with the Unevictable LRU work, the count was
+replaced by walking the reverse map to determine whether any VM_LOCKED VMAs
+mapped the page.  More on this below.
+
+
+Basic Management
+----------------
+
+mlocked pages - pages mapped into a VM_LOCKED VMA - are a class of unevictable
+pages.  When such a page has been "noticed" by the memory management subsystem,
+the page is marked with the PG_mlocked flag.  This can be manipulated using the
+PageMlocked() functions.
+
+A PG_mlocked page will be placed on the unevictable list when it is added to
+the LRU.  Such pages can be "noticed" by memory management in several places:
+
+ (1) in the mlock()/mlockall() system call handlers;
+
+ (2) in the mmap() system call handler when mmapping a region with the
+     MAP_LOCKED flag;
+
+ (3) mmapping a region in a task that has called mlockall() with the MCL_FUTURE
+     flag
+
+ (4) in the fault path, if mlocked pages are "culled" in the fault path,
+     and when a VM_LOCKED stack segment is expanded; or
+
+ (5) as mentioned above, in vmscan:shrink_page_list() when attempting to
+     reclaim a page in a VM_LOCKED VMA via try_to_unmap()
+
+all of which result in the VM_LOCKED flag being set for the VMA if it doesn't
+already have it set.
+
+mlocked pages become unlocked and rescued from the unevictable list when:
+
+ (1) mapped in a range unlocked via the munlock()/munlockall() system calls;
+
+ (2) munmap()'d out of the last VM_LOCKED VMA that maps the page, including
+     unmapping at task exit;
+
+ (3) when the page is truncated from the last VM_LOCKED VMA of an mmapped file;
+     or
+
+ (4) before a page is COW'd in a VM_LOCKED VMA.
+
+
+mlock()/mlockall() System Call Handling
+---------------------------------------
+
+Both [do\_]mlock() and [do\_]mlockall() system call handlers call mlock_fixup()
+for each VMA in the range specified by the call.  In the case of mlockall(),
+this is the entire active address space of the task.  Note that mlock_fixup()
+is used for both mlocking and munlocking a range of memory.  A call to mlock()
+an already VM_LOCKED VMA, or to munlock() a VMA that is not VM_LOCKED is
+treated as a no-op, and mlock_fixup() simply returns.
+
+If the VMA passes some filtering as described in "Filtering Special Vmas"
+below, mlock_fixup() will attempt to merge the VMA with its neighbors or split
+off a subset of the VMA if the range does not cover the entire VMA.  Once the
+VMA has been merged or split or neither, mlock_fixup() will call
+populate_vma_page_range() to fault in the pages via get_user_pages() and to
+mark the pages as mlocked via mlock_vma_page().
+
+Note that the VMA being mlocked might be mapped with PROT_NONE.  In this case,
+get_user_pages() will be unable to fault in the pages.  That's okay.  If pages
+do end up getting faulted into this VM_LOCKED VMA, we'll handle them in the
+fault path or in vmscan.
+
+Also note that a page returned by get_user_pages() could be truncated or
+migrated out from under us, while we're trying to mlock it.  To detect this,
+populate_vma_page_range() checks page_mapping() after acquiring the page lock.
+If the page is still associated with its mapping, we'll go ahead and call
+mlock_vma_page().  If the mapping is gone, we just unlock the page and move on.
+In the worst case, this will result in a page mapped in a VM_LOCKED VMA
+remaining on a normal LRU list without being PageMlocked().  Again, vmscan will
+detect and cull such pages.
+
+mlock_vma_page() will call TestSetPageMlocked() for each page returned by
+get_user_pages().  We use TestSetPageMlocked() because the page might already
+be mlocked by another task/VMA and we don't want to do extra work.  We
+especially do not want to count an mlocked page more than once in the
+statistics.  If the page was already mlocked, mlock_vma_page() need do nothing
+more.
+
+If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
+page from the LRU, as it is likely on the appropriate active or inactive list
+at that time.  If the isolate_lru_page() succeeds, mlock_vma_page() will put
+back the page - by calling putback_lru_page() - which will notice that the page
+is now mlocked and divert the page to the zone's unevictable list.  If
+mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
+it later if and when it attempts to reclaim the page.
+
+
+Filtering Special VMAs
+----------------------
+
+mlock_fixup() filters several classes of "special" VMAs:
+
+1) VMAs with VM_IO or VM_PFNMAP set are skipped entirely.  The pages behind
+   these mappings are inherently pinned, so we don't need to mark them as
+   mlocked.  In any case, most of the pages have no struct page in which to so
+   mark the page.  Because of this, get_user_pages() will fail for these VMAs,
+   so there is no sense in attempting to visit them.
+
+2) VMAs mapping hugetlbfs page are already effectively pinned into memory.  We
+   neither need nor want to mlock() these pages.  However, to preserve the
+   prior behavior of mlock() - before the unevictable/mlock changes -
+   mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to
+   allocate the huge pages and populate the ptes.
+
+3) VMAs with VM_DONTEXPAND are generally userspace mappings of kernel pages,
+   such as the VDSO page, relay channel pages, etc. These pages
+   are inherently unevictable and are not managed on the LRU lists.
+   mlock_fixup() treats these VMAs the same as hugetlbfs VMAs.  It calls
+   make_pages_present() to populate the ptes.
+
+Note that for all of these special VMAs, mlock_fixup() does not set the
+VM_LOCKED flag.  Therefore, we won't have to deal with them later during
+munlock(), munmap() or task exit.  Neither does mlock_fixup() account these
+VMAs against the task's "locked_vm".
+
+.. _munlock_munlockall_handling:
+
+munlock()/munlockall() System Call Handling
+-------------------------------------------
+
+The munlock() and munlockall() system calls are handled by the same functions -
+do_mlock[all]() - as the mlock() and mlockall() system calls with the unlock vs
+lock operation indicated by an argument.  So, these system calls are also
+handled by mlock_fixup().  Again, if called for an already munlocked VMA,
+mlock_fixup() simply returns.  Because of the VMA filtering discussed above,
+VM_LOCKED will not be set in any "special" VMAs.  So, these VMAs will be
+ignored for munlock.
+
+If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the
+specified range.  The range is then munlocked via the function
+populate_vma_page_range() - the same function used to mlock a VMA range -
+passing a flag to indicate that munlock() is being performed.
+
+Because the VMA access protections could have been changed to PROT_NONE after
+faulting in and mlocking pages, get_user_pages() was unreliable for visiting
+these pages for munlocking.  Because we don't want to leave pages mlocked,
+get_user_pages() was enhanced to accept a flag to ignore the permissions when
+fetching the pages - all of which should be resident as a result of previous
+mlocking.
+
+For munlock(), populate_vma_page_range() unlocks individual pages by calling
+munlock_vma_page().  munlock_vma_page() unconditionally clears the PG_mlocked
+flag using TestClearPageMlocked().  As with mlock_vma_page(),
+munlock_vma_page() use the Test*PageMlocked() function to handle the case where
+the page might have already been unlocked by another task.  If the page was
+mlocked, munlock_vma_page() updates that zone statistics for the number of
+mlocked pages.  Note, however, that at this point we haven't checked whether
+the page is mapped by other VM_LOCKED VMAs.
+
+We can't call try_to_munlock(), the function that walks the reverse map to
+check for other VM_LOCKED VMAs, without first isolating the page from the LRU.
+try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
+not be on an LRU list [more on these below].  However, the call to
+isolate_lru_page() could fail, in which case we couldn't try_to_munlock().  So,
+we go ahead and clear PG_mlocked up front, as this might be the only chance we
+have.  If we can successfully isolate the page, we go ahead and
+try_to_munlock(), which will restore the PG_mlocked flag and update the zone
+page statistics if it finds another VMA holding the page mlocked.  If we fail
+to isolate the page, we'll have left a potentially mlocked page on the LRU.
+This is fine, because we'll catch it later if and if vmscan tries to reclaim
+the page.  This should be relatively rare.
+
+
+Migrating MLOCKED Pages
+-----------------------
+
+A page that is being migrated has been isolated from the LRU lists and is held
+locked across unmapping of the page, updating the page's address space entry
+and copying the contents and state, until the page table entry has been
+replaced with an entry that refers to the new page.  Linux supports migration
+of mlocked pages and other unevictable pages.  This involves simply moving the
+PG_mlocked and PG_unevictable states from the old page to the new page.
+
+Note that page migration can race with mlocking or munlocking of the same page.
+This has been discussed from the mlock/munlock perspective in the respective
+sections above.  Both processes (migration and m[un]locking) hold the page
+locked.  This provides the first level of synchronization.  Page migration
+zeros out the page_mapping of the old page before unlocking it, so m[un]lock
+can skip these pages by testing the page mapping under page lock.
+
+To complete page migration, we place the new and old pages back onto the LRU
+after dropping the page lock.  The "unneeded" page - old page on success, new
+page on failure - will be freed when the reference count held by the migration
+process is released.  To ensure that we don't strand pages on the unevictable
+list because of a race between munlock and migration, page migration uses the
+putback_lru_page() function to add migrated pages back to the LRU.
+
+
+Compacting MLOCKED Pages
+------------------------
+
+The unevictable LRU can be scanned for compactable regions and the default
+behavior is to do so.  /proc/sys/vm/compact_unevictable_allowed controls
+this behavior (see Documentation/sysctl/vm.txt).  Once scanning of the
+unevictable LRU is enabled, the work of compaction is mostly handled by
+the page migration code and the same work flow as described in MIGRATING
+MLOCKED PAGES will apply.
+
+MLOCKING Transparent Huge Pages
+-------------------------------
+
+A transparent huge page is represented by a single entry on an LRU list.
+Therefore, we can only make unevictable an entire compound page, not
+individual subpages.
+
+If a user tries to mlock() part of a huge page, we want the rest of the
+page to be reclaimable.
+
+We cannot just split the page on partial mlock() as split_huge_page() can
+fail and new intermittent failure mode for the syscall is undesirable.
+
+We handle this by keeping PTE-mapped huge pages on normal LRU lists: the
+PMD on border of VM_LOCKED VMA will be split into PTE table.
+
+This way the huge page is accessible for vmscan. Under memory pressure the
+page will be split, subpages which belong to VM_LOCKED VMAs will be moved
+to unevictable LRU and the rest can be reclaimed.
+
+See also comment in follow_trans_huge_pmd().
+
+mmap(MAP_LOCKED) System Call Handling
+-------------------------------------
+
+In addition the mlock()/mlockall() system calls, an application can request
+that a region of memory be mlocked supplying the MAP_LOCKED flag to the mmap()
+call. There is one important and subtle difference here, though. mmap() + mlock()
+will fail if the range cannot be faulted in (e.g. because mm_populate fails)
+and returns with ENOMEM while mmap(MAP_LOCKED) will not fail. The mmaped
+area will still have properties of the locked area - aka. pages will not get
+swapped out - but major page faults to fault memory in might still happen.
+
+Furthermore, any mmap() call or brk() call that expands the heap by a
+task that has previously called mlockall() with the MCL_FUTURE flag will result
+in the newly mapped memory being mlocked.  Before the unevictable/mlock
+changes, the kernel simply called make_pages_present() to allocate pages and
+populate the page table.
+
+To mlock a range of memory under the unevictable/mlock infrastructure, the
+mmap() handler and task address space expansion functions call
+populate_vma_page_range() specifying the vma and the address range to mlock.
+
+The callers of populate_vma_page_range() will have already added the memory range
+to be mlocked to the task's "locked_vm".  To account for filtered VMAs,
+populate_vma_page_range() returns the number of pages NOT mlocked.  All of the
+callers then subtract a non-negative return value from the task's locked_vm.  A
+negative return value represent an error - for example, from get_user_pages()
+attempting to fault in a VMA with PROT_NONE access.  In this case, we leave the
+memory range accounted as locked_vm, as the protections could be changed later
+and pages allocated into that region.
+
+
+munmap()/exit()/exec() System Call Handling
+-------------------------------------------
+
+When unmapping an mlocked region of memory, whether by an explicit call to
+munmap() or via an internal unmap from exit() or exec() processing, we must
+munlock the pages if we're removing the last VM_LOCKED VMA that maps the pages.
+Before the unevictable/mlock changes, mlocking did not mark the pages in any
+way, so unmapping them required no processing.
+
+To munlock a range of memory under the unevictable/mlock infrastructure, the
+munmap() handler and task address space call tear down function
+munlock_vma_pages_all().  The name reflects the observation that one always
+specifies the entire VMA range when munlock()ing during unmap of a region.
+Because of the VMA filtering when mlocking() regions, only "normal" VMAs that
+actually contain mlocked pages will be passed to munlock_vma_pages_all().
+
+munlock_vma_pages_all() clears the VM_LOCKED VMA flag and, like mlock_fixup()
+for the munlock case, calls __munlock_vma_pages_range() to walk the page table
+for the VMA's memory range and munlock_vma_page() each resident page mapped by
+the VMA.  This effectively munlocks the page, only if this is the last
+VM_LOCKED VMA that maps the page.
+
+
+try_to_unmap()
+--------------
+
+Pages can, of course, be mapped into multiple VMAs.  Some of these VMAs may
+have VM_LOCKED flag set.  It is possible for a page mapped into one or more
+VM_LOCKED VMAs not to have the PG_mlocked flag set and therefore reside on one
+of the active or inactive LRU lists.  This could happen if, for example, a task
+in the process of munlocking the page could not isolate the page from the LRU.
+As a result, vmscan/shrink_page_list() might encounter such a page as described
+in section "vmscan's handling of unevictable pages".  To handle this situation,
+try_to_unmap() checks for VM_LOCKED VMAs while it is walking a page's reverse
+map.
+
+try_to_unmap() is always called, by either vmscan for reclaim or for page
+migration, with the argument page locked and isolated from the LRU.  Separate
+functions handle anonymous and mapped file and KSM pages, as these types of
+pages have different reverse map lookup mechanisms, with different locking.
+In each case, whether rmap_walk_anon() or rmap_walk_file() or rmap_walk_ksm(),
+it will call try_to_unmap_one() for every VMA which might contain the page.
+
+When trying to reclaim, if try_to_unmap_one() finds the page in a VM_LOCKED
+VMA, it will then mlock the page via mlock_vma_page() instead of unmapping it,
+and return SWAP_MLOCK to indicate that the page is unevictable: and the scan
+stops there.
+
+mlock_vma_page() is called while holding the page table's lock (in addition
+to the page lock, and the rmap lock): to serialize against concurrent mlock or
+munlock or munmap system calls, mm teardown (munlock_vma_pages_all), reclaim,
+holepunching, and truncation of file pages and their anonymous COWed pages.
+
+
+try_to_munlock() Reverse Map Scan
+---------------------------------
+
+.. warning::
+   [!] TODO/FIXME: a better name might be page_mlocked() - analogous to the
+   page_referenced() reverse map walker.
+
+When munlock_vma_page() [see section :ref:`munlock()/munlockall() System Call
+Handling <munlock_munlockall_handling>` above] tries to munlock a
+page, it needs to determine whether or not the page is mapped by any
+VM_LOCKED VMA without actually attempting to unmap all PTEs from the
+page.  For this purpose, the unevictable/mlock infrastructure
+introduced a variant of try_to_unmap() called try_to_munlock().
+
+try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
+mapped file and KSM pages with a flag argument specifying unlock versus unmap
+processing.  Again, these functions walk the respective reverse maps looking
+for VM_LOCKED VMAs.  When such a VMA is found, as in the try_to_unmap() case,
+the functions mlock the page via mlock_vma_page() and return SWAP_MLOCK.  This
+undoes the pre-clearing of the page's PG_mlocked done by munlock_vma_page.
+
+Note that try_to_munlock()'s reverse map walk must visit every VMA in a page's
+reverse map to determine that a page is NOT mapped into any VM_LOCKED VMA.
+However, the scan can terminate when it encounters a VM_LOCKED VMA.
+Although try_to_munlock() might be called a great many times when munlocking a
+large region or tearing down a large address space that has been mlocked via
+mlockall(), overall this is a fairly rare event.
+
+
+Page Reclaim in shrink_*_list()
+-------------------------------
+
+shrink_active_list() culls any obviously unevictable pages - i.e.
+!page_evictable(page) - diverting these to the unevictable list.
+However, shrink_active_list() only sees unevictable pages that made it onto the
+active/inactive lru lists.  Note that these pages do not have PageUnevictable
+set - otherwise they would be on the unevictable list and shrink_active_list
+would never see them.
+
+Some examples of these unevictable pages on the LRU lists are:
+
+ (1) ramfs pages that have been placed on the LRU lists when first allocated.
+
+ (2) SHM_LOCK'd shared memory pages.  shmctl(SHM_LOCK) does not attempt to
+     allocate or fault in the pages in the shared memory region.  This happens
+     when an application accesses the page the first time after SHM_LOCK'ing
+     the segment.
+
+ (3) mlocked pages that could not be isolated from the LRU and moved to the
+     unevictable list in mlock_vma_page().
+
+shrink_inactive_list() also diverts any unevictable pages that it finds on the
+inactive lists to the appropriate zone's unevictable list.
+
+shrink_inactive_list() should only see SHM_LOCK'd pages that became SHM_LOCK'd
+after shrink_active_list() had moved them to the inactive list, or pages mapped
+into VM_LOCKED VMAs that munlock_vma_page() couldn't isolate from the LRU to
+recheck via try_to_munlock().  shrink_inactive_list() won't notice the latter,
+but will pass on to shrink_page_list().
+
+shrink_page_list() again culls obviously unevictable pages that it could
+encounter for similar reason to shrink_inactive_list().  Pages mapped into
+VM_LOCKED VMAs but without PG_mlocked set will make it all the way to
+try_to_unmap().  shrink_page_list() will divert them to the unevictable list
+when try_to_unmap() returns SWAP_MLOCK, as discussed above.
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
deleted file mode 100644
index e147185..0000000
--- a/Documentation/vm/unevictable-lru.txt
+++ /dev/null
@@ -1,633 +0,0 @@
-			==============================
-			UNEVICTABLE LRU INFRASTRUCTURE
-			==============================
-
-========
-CONTENTS
-========
-
- (*) The Unevictable LRU
-
-     - The unevictable page list.
-     - Memory control group interaction.
-     - Marking address spaces unevictable.
-     - Detecting Unevictable Pages.
-     - vmscan's handling of unevictable pages.
-
- (*) mlock()'d pages.
-
-     - History.
-     - Basic management.
-     - mlock()/mlockall() system call handling.
-     - Filtering special vmas.
-     - munlock()/munlockall() system call handling.
-     - Migrating mlocked pages.
-     - Compacting mlocked pages.
-     - mmap(MAP_LOCKED) system call handling.
-     - munmap()/exit()/exec() system call handling.
-     - try_to_unmap().
-     - try_to_munlock() reverse map scan.
-     - Page reclaim in shrink_*_list().
-
-
-============
-INTRODUCTION
-============
-
-This document describes the Linux memory manager's "Unevictable LRU"
-infrastructure and the use of this to manage several types of "unevictable"
-pages.
-
-The document attempts to provide the overall rationale behind this mechanism
-and the rationale for some of the design decisions that drove the
-implementation.  The latter design rationale is discussed in the context of an
-implementation description.  Admittedly, one can obtain the implementation
-details - the "what does it do?" - by reading the code.  One hopes that the
-descriptions below add value by provide the answer to "why does it do that?".
-
-
-===================
-THE UNEVICTABLE LRU
-===================
-
-The Unevictable LRU facility adds an additional LRU list to track unevictable
-pages and to hide these pages from vmscan.  This mechanism is based on a patch
-by Larry Woodman of Red Hat to address several scalability problems with page
-reclaim in Linux.  The problems have been observed at customer sites on large
-memory x86_64 systems.
-
-To illustrate this with an example, a non-NUMA x86_64 platform with 128GB of
-main memory will have over 32 million 4k pages in a single zone.  When a large
-fraction of these pages are not evictable for any reason [see below], vmscan
-will spend a lot of time scanning the LRU lists looking for the small fraction
-of pages that are evictable.  This can result in a situation where all CPUs are
-spending 100% of their time in vmscan for hours or days on end, with the system
-completely unresponsive.
-
-The unevictable list addresses the following classes of unevictable pages:
-
- (*) Those owned by ramfs.
-
- (*) Those mapped into SHM_LOCK'd shared memory regions.
-
- (*) Those mapped into VM_LOCKED [mlock()ed] VMAs.
-
-The infrastructure may also be able to handle other conditions that make pages
-unevictable, either by definition or by circumstance, in the future.
-
-
-THE UNEVICTABLE PAGE LIST
--------------------------
-
-The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
-called the "unevictable" list and an associated page flag, PG_unevictable, to
-indicate that the page is being managed on the unevictable list.
-
-The PG_unevictable flag is analogous to, and mutually exclusive with, the
-PG_active flag in that it indicates on which LRU list a page resides when
-PG_lru is set.
-
-The Unevictable LRU infrastructure maintains unevictable pages on an additional
-LRU list for a few reasons:
-
- (1) We get to "treat unevictable pages just like we treat other pages in the
-     system - which means we get to use the same code to manipulate them, the
-     same code to isolate them (for migrate, etc.), the same code to keep track
-     of the statistics, etc..." [Rik van Riel]
-
- (2) We want to be able to migrate unevictable pages between nodes for memory
-     defragmentation, workload management and memory hotplug.  The linux kernel
-     can only migrate pages that it can successfully isolate from the LRU
-     lists.  If we were to maintain pages elsewhere than on an LRU-like list,
-     where they can be found by isolate_lru_page(), we would prevent their
-     migration, unless we reworked migration code to find the unevictable pages
-     itself.
-
-
-The unevictable list does not differentiate between file-backed and anonymous,
-swap-backed pages.  This differentiation is only important while the pages are,
-in fact, evictable.
-
-The unevictable list benefits from the "arrayification" of the per-zone LRU
-lists and statistics originally proposed and posted by Christoph Lameter.
-
-The unevictable list does not use the LRU pagevec mechanism. Rather,
-unevictable pages are placed directly on the page's zone's unevictable list
-under the zone lru_lock.  This allows us to prevent the stranding of pages on
-the unevictable list when one task has the page isolated from the LRU and other
-tasks are changing the "evictability" state of the page.
-
-
-MEMORY CONTROL GROUP INTERACTION
---------------------------------
-
-The unevictable LRU facility interacts with the memory control group [aka
-memory controller; see Documentation/cgroup-v1/memory.txt] by extending the
-lru_list enum.
-
-The memory controller data structure automatically gets a per-zone unevictable
-list as a result of the "arrayification" of the per-zone LRU lists (one per
-lru_list enum element).  The memory controller tracks the movement of pages to
-and from the unevictable list.
-
-When a memory control group comes under memory pressure, the controller will
-not attempt to reclaim pages on the unevictable list.  This has a couple of
-effects:
-
- (1) Because the pages are "hidden" from reclaim on the unevictable list, the
-     reclaim process can be more efficient, dealing only with pages that have a
-     chance of being reclaimed.
-
- (2) On the other hand, if too many of the pages charged to the control group
-     are unevictable, the evictable portion of the working set of the tasks in
-     the control group may not fit into the available memory.  This can cause
-     the control group to thrash or to OOM-kill tasks.
-
-
-MARKING ADDRESS SPACES UNEVICTABLE
-----------------------------------
-
-For facilities such as ramfs none of the pages attached to the address space
-may be evicted.  To prevent eviction of any such pages, the AS_UNEVICTABLE
-address space flag is provided, and this can be manipulated by a filesystem
-using a number of wrapper functions:
-
- (*) void mapping_set_unevictable(struct address_space *mapping);
-
-	Mark the address space as being completely unevictable.
-
- (*) void mapping_clear_unevictable(struct address_space *mapping);
-
-	Mark the address space as being evictable.
-
- (*) int mapping_unevictable(struct address_space *mapping);
-
-	Query the address space, and return true if it is completely
-	unevictable.
-
-These are currently used in two places in the kernel:
-
- (1) By ramfs to mark the address spaces of its inodes when they are created,
-     and this mark remains for the life of the inode.
-
- (2) By SYSV SHM to mark SHM_LOCK'd address spaces until SHM_UNLOCK is called.
-
-     Note that SHM_LOCK is not required to page in the locked pages if they're
-     swapped out; the application must touch the pages manually if it wants to
-     ensure they're in memory.
-
-
-DETECTING UNEVICTABLE PAGES
----------------------------
-
-The function page_evictable() in vmscan.c determines whether a page is
-evictable or not using the query function outlined above [see section "Marking
-address spaces unevictable"] to check the AS_UNEVICTABLE flag.
-
-For address spaces that are so marked after being populated (as SHM regions
-might be), the lock action (eg: SHM_LOCK) can be lazy, and need not populate
-the page tables for the region as does, for example, mlock(), nor need it make
-any special effort to push any pages in the SHM_LOCK'd area to the unevictable
-list.  Instead, vmscan will do this if and when it encounters the pages during
-a reclamation scan.
-
-On an unlock action (such as SHM_UNLOCK), the unlocker (eg: shmctl()) must scan
-the pages in the region and "rescue" them from the unevictable list if no other
-condition is keeping them unevictable.  If an unevictable region is destroyed,
-the pages are also "rescued" from the unevictable list in the process of
-freeing them.
-
-page_evictable() also checks for mlocked pages by testing an additional page
-flag, PG_mlocked (as wrapped by PageMlocked()), which is set when a page is
-faulted into a VM_LOCKED vma, or found in a vma being VM_LOCKED.
-
-
-VMSCAN'S HANDLING OF UNEVICTABLE PAGES
---------------------------------------
-
-If unevictable pages are culled in the fault path, or moved to the unevictable
-list at mlock() or mmap() time, vmscan will not encounter the pages until they
-have become evictable again (via munlock() for example) and have been "rescued"
-from the unevictable list.  However, there may be situations where we decide,
-for the sake of expediency, to leave a unevictable page on one of the regular
-active/inactive LRU lists for vmscan to deal with.  vmscan checks for such
-pages in all of the shrink_{active|inactive|page}_list() functions and will
-"cull" such pages that it encounters: that is, it diverts those pages to the
-unevictable list for the zone being scanned.
-
-There may be situations where a page is mapped into a VM_LOCKED VMA, but the
-page is not marked as PG_mlocked.  Such pages will make it all the way to
-shrink_page_list() where they will be detected when vmscan walks the reverse
-map in try_to_unmap().  If try_to_unmap() returns SWAP_MLOCK,
-shrink_page_list() will cull the page at that point.
-
-To "cull" an unevictable page, vmscan simply puts the page back on the LRU list
-using putback_lru_page() - the inverse operation to isolate_lru_page() - after
-dropping the page lock.  Because the condition which makes the page unevictable
-may change once the page is unlocked, putback_lru_page() will recheck the
-unevictable state of a page that it places on the unevictable list.  If the
-page has become unevictable, putback_lru_page() removes it from the list and
-retries, including the page_unevictable() test.  Because such a race is a rare
-event and movement of pages onto the unevictable list should be rare, these
-extra evictabilty checks should not occur in the majority of calls to
-putback_lru_page().
-
-
-=============
-MLOCKED PAGES
-=============
-
-The unevictable page list is also useful for mlock(), in addition to ramfs and
-SYSV SHM.  Note that mlock() is only available in CONFIG_MMU=y situations; in
-NOMMU situations, all mappings are effectively mlocked.
-
-
-HISTORY
--------
-
-The "Unevictable mlocked Pages" infrastructure is based on work originally
-posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
-Nick posted his patch as an alternative to a patch posted by Christoph Lameter
-to achieve the same objective: hiding mlocked pages from vmscan.
-
-In Nick's patch, he used one of the struct page LRU list link fields as a count
-of VM_LOCKED VMAs that map the page.  This use of the link field for a count
-prevented the management of the pages on an LRU list, and thus mlocked pages
-were not migratable as isolate_lru_page() could not find them, and the LRU list
-link field was not available to the migration subsystem.
-
-Nick resolved this by putting mlocked pages back on the lru list before
-attempting to isolate them, thus abandoning the count of VM_LOCKED VMAs.  When
-Nick's patch was integrated with the Unevictable LRU work, the count was
-replaced by walking the reverse map to determine whether any VM_LOCKED VMAs
-mapped the page.  More on this below.
-
-
-BASIC MANAGEMENT
-----------------
-
-mlocked pages - pages mapped into a VM_LOCKED VMA - are a class of unevictable
-pages.  When such a page has been "noticed" by the memory management subsystem,
-the page is marked with the PG_mlocked flag.  This can be manipulated using the
-PageMlocked() functions.
-
-A PG_mlocked page will be placed on the unevictable list when it is added to
-the LRU.  Such pages can be "noticed" by memory management in several places:
-
- (1) in the mlock()/mlockall() system call handlers;
-
- (2) in the mmap() system call handler when mmapping a region with the
-     MAP_LOCKED flag;
-
- (3) mmapping a region in a task that has called mlockall() with the MCL_FUTURE
-     flag
-
- (4) in the fault path, if mlocked pages are "culled" in the fault path,
-     and when a VM_LOCKED stack segment is expanded; or
-
- (5) as mentioned above, in vmscan:shrink_page_list() when attempting to
-     reclaim a page in a VM_LOCKED VMA via try_to_unmap()
-
-all of which result in the VM_LOCKED flag being set for the VMA if it doesn't
-already have it set.
-
-mlocked pages become unlocked and rescued from the unevictable list when:
-
- (1) mapped in a range unlocked via the munlock()/munlockall() system calls;
-
- (2) munmap()'d out of the last VM_LOCKED VMA that maps the page, including
-     unmapping at task exit;
-
- (3) when the page is truncated from the last VM_LOCKED VMA of an mmapped file;
-     or
-
- (4) before a page is COW'd in a VM_LOCKED VMA.
-
-
-mlock()/mlockall() SYSTEM CALL HANDLING
----------------------------------------
-
-Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
-for each VMA in the range specified by the call.  In the case of mlockall(),
-this is the entire active address space of the task.  Note that mlock_fixup()
-is used for both mlocking and munlocking a range of memory.  A call to mlock()
-an already VM_LOCKED VMA, or to munlock() a VMA that is not VM_LOCKED is
-treated as a no-op, and mlock_fixup() simply returns.
-
-If the VMA passes some filtering as described in "Filtering Special Vmas"
-below, mlock_fixup() will attempt to merge the VMA with its neighbors or split
-off a subset of the VMA if the range does not cover the entire VMA.  Once the
-VMA has been merged or split or neither, mlock_fixup() will call
-populate_vma_page_range() to fault in the pages via get_user_pages() and to
-mark the pages as mlocked via mlock_vma_page().
-
-Note that the VMA being mlocked might be mapped with PROT_NONE.  In this case,
-get_user_pages() will be unable to fault in the pages.  That's okay.  If pages
-do end up getting faulted into this VM_LOCKED VMA, we'll handle them in the
-fault path or in vmscan.
-
-Also note that a page returned by get_user_pages() could be truncated or
-migrated out from under us, while we're trying to mlock it.  To detect this,
-populate_vma_page_range() checks page_mapping() after acquiring the page lock.
-If the page is still associated with its mapping, we'll go ahead and call
-mlock_vma_page().  If the mapping is gone, we just unlock the page and move on.
-In the worst case, this will result in a page mapped in a VM_LOCKED VMA
-remaining on a normal LRU list without being PageMlocked().  Again, vmscan will
-detect and cull such pages.
-
-mlock_vma_page() will call TestSetPageMlocked() for each page returned by
-get_user_pages().  We use TestSetPageMlocked() because the page might already
-be mlocked by another task/VMA and we don't want to do extra work.  We
-especially do not want to count an mlocked page more than once in the
-statistics.  If the page was already mlocked, mlock_vma_page() need do nothing
-more.
-
-If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
-page from the LRU, as it is likely on the appropriate active or inactive list
-at that time.  If the isolate_lru_page() succeeds, mlock_vma_page() will put
-back the page - by calling putback_lru_page() - which will notice that the page
-is now mlocked and divert the page to the zone's unevictable list.  If
-mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
-it later if and when it attempts to reclaim the page.
-
-
-FILTERING SPECIAL VMAS
-----------------------
-
-mlock_fixup() filters several classes of "special" VMAs:
-
-1) VMAs with VM_IO or VM_PFNMAP set are skipped entirely.  The pages behind
-   these mappings are inherently pinned, so we don't need to mark them as
-   mlocked.  In any case, most of the pages have no struct page in which to so
-   mark the page.  Because of this, get_user_pages() will fail for these VMAs,
-   so there is no sense in attempting to visit them.
-
-2) VMAs mapping hugetlbfs page are already effectively pinned into memory.  We
-   neither need nor want to mlock() these pages.  However, to preserve the
-   prior behavior of mlock() - before the unevictable/mlock changes -
-   mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to
-   allocate the huge pages and populate the ptes.
-
-3) VMAs with VM_DONTEXPAND are generally userspace mappings of kernel pages,
-   such as the VDSO page, relay channel pages, etc. These pages
-   are inherently unevictable and are not managed on the LRU lists.
-   mlock_fixup() treats these VMAs the same as hugetlbfs VMAs.  It calls
-   make_pages_present() to populate the ptes.
-
-Note that for all of these special VMAs, mlock_fixup() does not set the
-VM_LOCKED flag.  Therefore, we won't have to deal with them later during
-munlock(), munmap() or task exit.  Neither does mlock_fixup() account these
-VMAs against the task's "locked_vm".
-
-
-munlock()/munlockall() SYSTEM CALL HANDLING
--------------------------------------------
-
-The munlock() and munlockall() system calls are handled by the same functions -
-do_mlock[all]() - as the mlock() and mlockall() system calls with the unlock vs
-lock operation indicated by an argument.  So, these system calls are also
-handled by mlock_fixup().  Again, if called for an already munlocked VMA,
-mlock_fixup() simply returns.  Because of the VMA filtering discussed above,
-VM_LOCKED will not be set in any "special" VMAs.  So, these VMAs will be
-ignored for munlock.
-
-If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the
-specified range.  The range is then munlocked via the function
-populate_vma_page_range() - the same function used to mlock a VMA range -
-passing a flag to indicate that munlock() is being performed.
-
-Because the VMA access protections could have been changed to PROT_NONE after
-faulting in and mlocking pages, get_user_pages() was unreliable for visiting
-these pages for munlocking.  Because we don't want to leave pages mlocked,
-get_user_pages() was enhanced to accept a flag to ignore the permissions when
-fetching the pages - all of which should be resident as a result of previous
-mlocking.
-
-For munlock(), populate_vma_page_range() unlocks individual pages by calling
-munlock_vma_page().  munlock_vma_page() unconditionally clears the PG_mlocked
-flag using TestClearPageMlocked().  As with mlock_vma_page(),
-munlock_vma_page() use the Test*PageMlocked() function to handle the case where
-the page might have already been unlocked by another task.  If the page was
-mlocked, munlock_vma_page() updates that zone statistics for the number of
-mlocked pages.  Note, however, that at this point we haven't checked whether
-the page is mapped by other VM_LOCKED VMAs.
-
-We can't call try_to_munlock(), the function that walks the reverse map to
-check for other VM_LOCKED VMAs, without first isolating the page from the LRU.
-try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
-not be on an LRU list [more on these below].  However, the call to
-isolate_lru_page() could fail, in which case we couldn't try_to_munlock().  So,
-we go ahead and clear PG_mlocked up front, as this might be the only chance we
-have.  If we can successfully isolate the page, we go ahead and
-try_to_munlock(), which will restore the PG_mlocked flag and update the zone
-page statistics if it finds another VMA holding the page mlocked.  If we fail
-to isolate the page, we'll have left a potentially mlocked page on the LRU.
-This is fine, because we'll catch it later if and if vmscan tries to reclaim
-the page.  This should be relatively rare.
-
-
-MIGRATING MLOCKED PAGES
------------------------
-
-A page that is being migrated has been isolated from the LRU lists and is held
-locked across unmapping of the page, updating the page's address space entry
-and copying the contents and state, until the page table entry has been
-replaced with an entry that refers to the new page.  Linux supports migration
-of mlocked pages and other unevictable pages.  This involves simply moving the
-PG_mlocked and PG_unevictable states from the old page to the new page.
-
-Note that page migration can race with mlocking or munlocking of the same page.
-This has been discussed from the mlock/munlock perspective in the respective
-sections above.  Both processes (migration and m[un]locking) hold the page
-locked.  This provides the first level of synchronization.  Page migration
-zeros out the page_mapping of the old page before unlocking it, so m[un]lock
-can skip these pages by testing the page mapping under page lock.
-
-To complete page migration, we place the new and old pages back onto the LRU
-after dropping the page lock.  The "unneeded" page - old page on success, new
-page on failure - will be freed when the reference count held by the migration
-process is released.  To ensure that we don't strand pages on the unevictable
-list because of a race between munlock and migration, page migration uses the
-putback_lru_page() function to add migrated pages back to the LRU.
-
-
-COMPACTING MLOCKED PAGES
-------------------------
-
-The unevictable LRU can be scanned for compactable regions and the default
-behavior is to do so.  /proc/sys/vm/compact_unevictable_allowed controls
-this behavior (see Documentation/sysctl/vm.txt).  Once scanning of the
-unevictable LRU is enabled, the work of compaction is mostly handled by
-the page migration code and the same work flow as described in MIGRATING
-MLOCKED PAGES will apply.
-
-MLOCKING TRANSPARENT HUGE PAGES
--------------------------------
-
-A transparent huge page is represented by a single entry on an LRU list.
-Therefore, we can only make unevictable an entire compound page, not
-individual subpages.
-
-If a user tries to mlock() part of a huge page, we want the rest of the
-page to be reclaimable.
-
-We cannot just split the page on partial mlock() as split_huge_page() can
-fail and new intermittent failure mode for the syscall is undesirable.
-
-We handle this by keeping PTE-mapped huge pages on normal LRU lists: the
-PMD on border of VM_LOCKED VMA will be split into PTE table.
-
-This way the huge page is accessible for vmscan. Under memory pressure the
-page will be split, subpages which belong to VM_LOCKED VMAs will be moved
-to unevictable LRU and the rest can be reclaimed.
-
-See also comment in follow_trans_huge_pmd().
-
-mmap(MAP_LOCKED) SYSTEM CALL HANDLING
--------------------------------------
-
-In addition the mlock()/mlockall() system calls, an application can request
-that a region of memory be mlocked supplying the MAP_LOCKED flag to the mmap()
-call. There is one important and subtle difference here, though. mmap() + mlock()
-will fail if the range cannot be faulted in (e.g. because mm_populate fails)
-and returns with ENOMEM while mmap(MAP_LOCKED) will not fail. The mmaped
-area will still have properties of the locked area - aka. pages will not get
-swapped out - but major page faults to fault memory in might still happen.
-
-Furthermore, any mmap() call or brk() call that expands the heap by a
-task that has previously called mlockall() with the MCL_FUTURE flag will result
-in the newly mapped memory being mlocked.  Before the unevictable/mlock
-changes, the kernel simply called make_pages_present() to allocate pages and
-populate the page table.
-
-To mlock a range of memory under the unevictable/mlock infrastructure, the
-mmap() handler and task address space expansion functions call
-populate_vma_page_range() specifying the vma and the address range to mlock.
-
-The callers of populate_vma_page_range() will have already added the memory range
-to be mlocked to the task's "locked_vm".  To account for filtered VMAs,
-populate_vma_page_range() returns the number of pages NOT mlocked.  All of the
-callers then subtract a non-negative return value from the task's locked_vm.  A
-negative return value represent an error - for example, from get_user_pages()
-attempting to fault in a VMA with PROT_NONE access.  In this case, we leave the
-memory range accounted as locked_vm, as the protections could be changed later
-and pages allocated into that region.
-
-
-munmap()/exit()/exec() SYSTEM CALL HANDLING
--------------------------------------------
-
-When unmapping an mlocked region of memory, whether by an explicit call to
-munmap() or via an internal unmap from exit() or exec() processing, we must
-munlock the pages if we're removing the last VM_LOCKED VMA that maps the pages.
-Before the unevictable/mlock changes, mlocking did not mark the pages in any
-way, so unmapping them required no processing.
-
-To munlock a range of memory under the unevictable/mlock infrastructure, the
-munmap() handler and task address space call tear down function
-munlock_vma_pages_all().  The name reflects the observation that one always
-specifies the entire VMA range when munlock()ing during unmap of a region.
-Because of the VMA filtering when mlocking() regions, only "normal" VMAs that
-actually contain mlocked pages will be passed to munlock_vma_pages_all().
-
-munlock_vma_pages_all() clears the VM_LOCKED VMA flag and, like mlock_fixup()
-for the munlock case, calls __munlock_vma_pages_range() to walk the page table
-for the VMA's memory range and munlock_vma_page() each resident page mapped by
-the VMA.  This effectively munlocks the page, only if this is the last
-VM_LOCKED VMA that maps the page.
-
-
-try_to_unmap()
---------------
-
-Pages can, of course, be mapped into multiple VMAs.  Some of these VMAs may
-have VM_LOCKED flag set.  It is possible for a page mapped into one or more
-VM_LOCKED VMAs not to have the PG_mlocked flag set and therefore reside on one
-of the active or inactive LRU lists.  This could happen if, for example, a task
-in the process of munlocking the page could not isolate the page from the LRU.
-As a result, vmscan/shrink_page_list() might encounter such a page as described
-in section "vmscan's handling of unevictable pages".  To handle this situation,
-try_to_unmap() checks for VM_LOCKED VMAs while it is walking a page's reverse
-map.
-
-try_to_unmap() is always called, by either vmscan for reclaim or for page
-migration, with the argument page locked and isolated from the LRU.  Separate
-functions handle anonymous and mapped file and KSM pages, as these types of
-pages have different reverse map lookup mechanisms, with different locking.
-In each case, whether rmap_walk_anon() or rmap_walk_file() or rmap_walk_ksm(),
-it will call try_to_unmap_one() for every VMA which might contain the page.
-
-When trying to reclaim, if try_to_unmap_one() finds the page in a VM_LOCKED
-VMA, it will then mlock the page via mlock_vma_page() instead of unmapping it,
-and return SWAP_MLOCK to indicate that the page is unevictable: and the scan
-stops there.
-
-mlock_vma_page() is called while holding the page table's lock (in addition
-to the page lock, and the rmap lock): to serialize against concurrent mlock or
-munlock or munmap system calls, mm teardown (munlock_vma_pages_all), reclaim,
-holepunching, and truncation of file pages and their anonymous COWed pages.
-
-
-try_to_munlock() REVERSE MAP SCAN
----------------------------------
-
- [!] TODO/FIXME: a better name might be page_mlocked() - analogous to the
-     page_referenced() reverse map walker.
-
-When munlock_vma_page() [see section "munlock()/munlockall() System Call
-Handling" above] tries to munlock a page, it needs to determine whether or not
-the page is mapped by any VM_LOCKED VMA without actually attempting to unmap
-all PTEs from the page.  For this purpose, the unevictable/mlock infrastructure
-introduced a variant of try_to_unmap() called try_to_munlock().
-
-try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
-mapped file and KSM pages with a flag argument specifying unlock versus unmap
-processing.  Again, these functions walk the respective reverse maps looking
-for VM_LOCKED VMAs.  When such a VMA is found, as in the try_to_unmap() case,
-the functions mlock the page via mlock_vma_page() and return SWAP_MLOCK.  This
-undoes the pre-clearing of the page's PG_mlocked done by munlock_vma_page.
-
-Note that try_to_munlock()'s reverse map walk must visit every VMA in a page's
-reverse map to determine that a page is NOT mapped into any VM_LOCKED VMA.
-However, the scan can terminate when it encounters a VM_LOCKED VMA.
-Although try_to_munlock() might be called a great many times when munlocking a
-large region or tearing down a large address space that has been mlocked via
-mlockall(), overall this is a fairly rare event.
-
-
-PAGE RECLAIM IN shrink_*_list()
--------------------------------
-
-shrink_active_list() culls any obviously unevictable pages - i.e.
-!page_evictable(page) - diverting these to the unevictable list.
-However, shrink_active_list() only sees unevictable pages that made it onto the
-active/inactive lru lists.  Note that these pages do not have PageUnevictable
-set - otherwise they would be on the unevictable list and shrink_active_list
-would never see them.
-
-Some examples of these unevictable pages on the LRU lists are:
-
- (1) ramfs pages that have been placed on the LRU lists when first allocated.
-
- (2) SHM_LOCK'd shared memory pages.  shmctl(SHM_LOCK) does not attempt to
-     allocate or fault in the pages in the shared memory region.  This happens
-     when an application accesses the page the first time after SHM_LOCK'ing
-     the segment.
-
- (3) mlocked pages that could not be isolated from the LRU and moved to the
-     unevictable list in mlock_vma_page().
-
-shrink_inactive_list() also diverts any unevictable pages that it finds on the
-inactive lists to the appropriate zone's unevictable list.
-
-shrink_inactive_list() should only see SHM_LOCK'd pages that became SHM_LOCK'd
-after shrink_active_list() had moved them to the inactive list, or pages mapped
-into VM_LOCKED VMAs that munlock_vma_page() couldn't isolate from the LRU to
-recheck via try_to_munlock().  shrink_inactive_list() won't notice the latter,
-but will pass on to shrink_page_list().
-
-shrink_page_list() again culls obviously unevictable pages that it could
-encounter for similar reason to shrink_inactive_list().  Pages mapped into
-VM_LOCKED VMAs but without PG_mlocked set will make it all the way to
-try_to_unmap().  shrink_page_list() will divert them to the unevictable list
-when try_to_unmap() returns SWAP_MLOCK, as discussed above.
diff --git a/Documentation/vm/userfaultfd.txt b/Documentation/vm/userfaultfd.txt
deleted file mode 100644
index bb2f945..0000000
--- a/Documentation/vm/userfaultfd.txt
+++ /dev/null
@@ -1,229 +0,0 @@
-= Userfaultfd =
-
-== Objective ==
-
-Userfaults allow the implementation of on-demand paging from userland
-and more generally they allow userland to take control of various
-memory page faults, something otherwise only the kernel code could do.
-
-For example userfaults allows a proper and more optimal implementation
-of the PROT_NONE+SIGSEGV trick.
-
-== Design ==
-
-Userfaults are delivered and resolved through the userfaultfd syscall.
-
-The userfaultfd (aside from registering and unregistering virtual
-memory ranges) provides two primary functionalities:
-
-1) read/POLLIN protocol to notify a userland thread of the faults
-   happening
-
-2) various UFFDIO_* ioctls that can manage the virtual memory regions
-   registered in the userfaultfd that allows userland to efficiently
-   resolve the userfaults it receives via 1) or to manage the virtual
-   memory in the background
-
-The real advantage of userfaults if compared to regular virtual memory
-management of mremap/mprotect is that the userfaults in all their
-operations never involve heavyweight structures like vmas (in fact the
-userfaultfd runtime load never takes the mmap_sem for writing).
-
-Vmas are not suitable for page- (or hugepage) granular fault tracking
-when dealing with virtual address spaces that could span
-Terabytes. Too many vmas would be needed for that.
-
-The userfaultfd once opened by invoking the syscall, can also be
-passed using unix domain sockets to a manager process, so the same
-manager process could handle the userfaults of a multitude of
-different processes without them being aware about what is going on
-(well of course unless they later try to use the userfaultfd
-themselves on the same region the manager is already tracking, which
-is a corner case that would currently return -EBUSY).
-
-== API ==
-
-When first opened the userfaultfd must be enabled invoking the
-UFFDIO_API ioctl specifying a uffdio_api.api value set to UFFD_API (or
-a later API version) which will specify the read/POLLIN protocol
-userland intends to speak on the UFFD and the uffdio_api.features
-userland requires. The UFFDIO_API ioctl if successful (i.e. if the
-requested uffdio_api.api is spoken also by the running kernel and the
-requested features are going to be enabled) will return into
-uffdio_api.features and uffdio_api.ioctls two 64bit bitmasks of
-respectively all the available features of the read(2) protocol and
-the generic ioctl available.
-
-The uffdio_api.features bitmask returned by the UFFDIO_API ioctl
-defines what memory types are supported by the userfaultfd and what
-events, except page fault notifications, may be generated.
-
-If the kernel supports registering userfaultfd ranges on hugetlbfs
-virtual memory areas, UFFD_FEATURE_MISSING_HUGETLBFS will be set in
-uffdio_api.features. Similarly, UFFD_FEATURE_MISSING_SHMEM will be
-set if the kernel supports registering userfaultfd ranges on shared
-memory (covering all shmem APIs, i.e. tmpfs, IPCSHM, /dev/zero
-MAP_SHARED, memfd_create, etc).
-
-The userland application that wants to use userfaultfd with hugetlbfs
-or shared memory need to set the corresponding flag in
-uffdio_api.features to enable those features.
-
-If the userland desires to receive notifications for events other than
-page faults, it has to verify that uffdio_api.features has appropriate
-UFFD_FEATURE_EVENT_* bits set. These events are described in more
-detail below in "Non-cooperative userfaultfd" section.
-
-Once the userfaultfd has been enabled the UFFDIO_REGISTER ioctl should
-be invoked (if present in the returned uffdio_api.ioctls bitmask) to
-register a memory range in the userfaultfd by setting the
-uffdio_register structure accordingly. The uffdio_register.mode
-bitmask will specify to the kernel which kind of faults to track for
-the range (UFFDIO_REGISTER_MODE_MISSING would track missing
-pages). The UFFDIO_REGISTER ioctl will return the
-uffdio_register.ioctls bitmask of ioctls that are suitable to resolve
-userfaults on the range registered. Not all ioctls will necessarily be
-supported for all memory types depending on the underlying virtual
-memory backend (anonymous memory vs tmpfs vs real filebacked
-mappings).
-
-Userland can use the uffdio_register.ioctls to manage the virtual
-address space in the background (to add or potentially also remove
-memory from the userfaultfd registered range). This means a userfault
-could be triggering just before userland maps in the background the
-user-faulted page.
-
-The primary ioctl to resolve userfaults is UFFDIO_COPY. That
-atomically copies a page into the userfault registered range and wakes
-up the blocked userfaults (unless uffdio_copy.mode &
-UFFDIO_COPY_MODE_DONTWAKE is set). Other ioctl works similarly to
-UFFDIO_COPY. They're atomic as in guaranteeing that nothing can see an
-half copied page since it'll keep userfaulting until the copy has
-finished.
-
-== QEMU/KVM ==
-
-QEMU/KVM is using the userfaultfd syscall to implement postcopy live
-migration. Postcopy live migration is one form of memory
-externalization consisting of a virtual machine running with part or
-all of its memory residing on a different node in the cloud. The
-userfaultfd abstraction is generic enough that not a single line of
-KVM kernel code had to be modified in order to add postcopy live
-migration to QEMU.
-
-Guest async page faults, FOLL_NOWAIT and all other GUP features work
-just fine in combination with userfaults. Userfaults trigger async
-page faults in the guest scheduler so those guest processes that
-aren't waiting for userfaults (i.e. network bound) can keep running in
-the guest vcpus.
-
-It is generally beneficial to run one pass of precopy live migration
-just before starting postcopy live migration, in order to avoid
-generating userfaults for readonly guest regions.
-
-The implementation of postcopy live migration currently uses one
-single bidirectional socket but in the future two different sockets
-will be used (to reduce the latency of the userfaults to the minimum
-possible without having to decrease /proc/sys/net/ipv4/tcp_wmem).
-
-The QEMU in the source node writes all pages that it knows are missing
-in the destination node, into the socket, and the migration thread of
-the QEMU running in the destination node runs UFFDIO_COPY|ZEROPAGE
-ioctls on the userfaultfd in order to map the received pages into the
-guest (UFFDIO_ZEROCOPY is used if the source page was a zero page).
-
-A different postcopy thread in the destination node listens with
-poll() to the userfaultfd in parallel. When a POLLIN event is
-generated after a userfault triggers, the postcopy thread read() from
-the userfaultfd and receives the fault address (or -EAGAIN in case the
-userfault was already resolved and waken by a UFFDIO_COPY|ZEROPAGE run
-by the parallel QEMU migration thread).
-
-After the QEMU postcopy thread (running in the destination node) gets
-the userfault address it writes the information about the missing page
-into the socket. The QEMU source node receives the information and
-roughly "seeks" to that page address and continues sending all
-remaining missing pages from that new page offset. Soon after that
-(just the time to flush the tcp_wmem queue through the network) the
-migration thread in the QEMU running in the destination node will
-receive the page that triggered the userfault and it'll map it as
-usual with the UFFDIO_COPY|ZEROPAGE (without actually knowing if it
-was spontaneously sent by the source or if it was an urgent page
-requested through a userfault).
-
-By the time the userfaults start, the QEMU in the destination node
-doesn't need to keep any per-page state bitmap relative to the live
-migration around and a single per-page bitmap has to be maintained in
-the QEMU running in the source node to know which pages are still
-missing in the destination node. The bitmap in the source node is
-checked to find which missing pages to send in round robin and we seek
-over it when receiving incoming userfaults. After sending each page of
-course the bitmap is updated accordingly. It's also useful to avoid
-sending the same page twice (in case the userfault is read by the
-postcopy thread just before UFFDIO_COPY|ZEROPAGE runs in the migration
-thread).
-
-== Non-cooperative userfaultfd ==
-
-When the userfaultfd is monitored by an external manager, the manager
-must be able to track changes in the process virtual memory
-layout. Userfaultfd can notify the manager about such changes using
-the same read(2) protocol as for the page fault notifications. The
-manager has to explicitly enable these events by setting appropriate
-bits in uffdio_api.features passed to UFFDIO_API ioctl:
-
-UFFD_FEATURE_EVENT_FORK - enable userfaultfd hooks for fork(). When
-this feature is enabled, the userfaultfd context of the parent process
-is duplicated into the newly created process. The manager receives
-UFFD_EVENT_FORK with file descriptor of the new userfaultfd context in
-the uffd_msg.fork.
-
-UFFD_FEATURE_EVENT_REMAP - enable notifications about mremap()
-calls. When the non-cooperative process moves a virtual memory area to
-a different location, the manager will receive UFFD_EVENT_REMAP. The
-uffd_msg.remap will contain the old and new addresses of the area and
-its original length.
-
-UFFD_FEATURE_EVENT_REMOVE - enable notifications about
-madvise(MADV_REMOVE) and madvise(MADV_DONTNEED) calls. The event
-UFFD_EVENT_REMOVE will be generated upon these calls to madvise. The
-uffd_msg.remove will contain start and end addresses of the removed
-area.
-
-UFFD_FEATURE_EVENT_UNMAP - enable notifications about memory
-unmapping. The manager will get UFFD_EVENT_UNMAP with uffd_msg.remove
-containing start and end addresses of the unmapped area.
-
-Although the UFFD_FEATURE_EVENT_REMOVE and UFFD_FEATURE_EVENT_UNMAP
-are pretty similar, they quite differ in the action expected from the
-userfaultfd manager. In the former case, the virtual memory is
-removed, but the area is not, the area remains monitored by the
-userfaultfd, and if a page fault occurs in that area it will be
-delivered to the manager. The proper resolution for such page fault is
-to zeromap the faulting address. However, in the latter case, when an
-area is unmapped, either explicitly (with munmap() system call), or
-implicitly (e.g. during mremap()), the area is removed and in turn the
-userfaultfd context for such area disappears too and the manager will
-not get further userland page faults from the removed area. Still, the
-notification is required in order to prevent manager from using
-UFFDIO_COPY on the unmapped area.
-
-Unlike userland page faults which have to be synchronous and require
-explicit or implicit wakeup, all the events are delivered
-asynchronously and the non-cooperative process resumes execution as
-soon as manager executes read(). The userfaultfd manager should
-carefully synchronize calls to UFFDIO_COPY with the events
-processing. To aid the synchronization, the UFFDIO_COPY ioctl will
-return -ENOSPC when the monitored process exits at the time of
-UFFDIO_COPY, and -ENOENT, when the non-cooperative process has changed
-its virtual memory layout simultaneously with outstanding UFFDIO_COPY
-operation.
-
-The current asynchronous model of the event delivery is optimal for
-single threaded non-cooperative userfaultfd manager implementations. A
-synchronous event delivery model can be added later as a new
-userfaultfd feature to facilitate multithreading enhancements of the
-non cooperative manager, for example to allow UFFDIO_COPY ioctls to
-run in parallel to the event reception. Single threaded
-implementations should continue to use the current async event
-delivery model instead.
diff --git a/Documentation/vm/z3fold.rst b/Documentation/vm/z3fold.rst
new file mode 100644
index 0000000..224e3c6
--- /dev/null
+++ b/Documentation/vm/z3fold.rst
@@ -0,0 +1,30 @@
+.. _z3fold:
+
+======
+z3fold
+======
+
+z3fold is a special purpose allocator for storing compressed pages.
+It is designed to store up to three compressed pages per physical page.
+It is a zbud derivative which allows for higher compression
+ratio keeping the simplicity and determinism of its predecessor.
+
+The main differences between z3fold and zbud are:
+
+* unlike zbud, z3fold allows for up to PAGE_SIZE allocations
+* z3fold can hold up to 3 compressed pages in its page
+* z3fold doesn't export any API itself and is thus intended to be used
+  via the zpool API.
+
+To keep the determinism and simplicity, z3fold, just like zbud, always
+stores an integral number of compressed pages per page, but it can store
+up to 3 pages unlike zbud which can store at most 2. Therefore the
+compression ratio goes to around 2.7x while zbud's one is around 1.7x.
+
+Unlike zbud (but like zsmalloc for that matter) z3fold_alloc() does not
+return a dereferenceable pointer. Instead, it returns an unsigned long
+handle which encodes actual location of the allocated object.
+
+Keeping effective compression ratio close to zsmalloc's, z3fold doesn't
+depend on MMU enabled and provides more predictable reclaim behavior
+which makes it a better fit for small and response-critical systems.
diff --git a/Documentation/vm/z3fold.txt b/Documentation/vm/z3fold.txt
deleted file mode 100644
index 38e4dac..0000000
--- a/Documentation/vm/z3fold.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-z3fold
-------
-
-z3fold is a special purpose allocator for storing compressed pages.
-It is designed to store up to three compressed pages per physical page.
-It is a zbud derivative which allows for higher compression
-ratio keeping the simplicity and determinism of its predecessor.
-
-The main differences between z3fold and zbud are:
-* unlike zbud, z3fold allows for up to PAGE_SIZE allocations
-* z3fold can hold up to 3 compressed pages in its page
-* z3fold doesn't export any API itself and is thus intended to be used
-  via the zpool API.
-
-To keep the determinism and simplicity, z3fold, just like zbud, always
-stores an integral number of compressed pages per page, but it can store
-up to 3 pages unlike zbud which can store at most 2. Therefore the
-compression ratio goes to around 2.7x while zbud's one is around 1.7x.
-
-Unlike zbud (but like zsmalloc for that matter) z3fold_alloc() does not
-return a dereferenceable pointer. Instead, it returns an unsigned long
-handle which encodes actual location of the allocated object.
-
-Keeping effective compression ratio close to zsmalloc's, z3fold doesn't
-depend on MMU enabled and provides more predictable reclaim behavior
-which makes it a better fit for small and response-critical systems.
diff --git a/Documentation/vm/zsmalloc.rst b/Documentation/vm/zsmalloc.rst
new file mode 100644
index 0000000..6e79893
--- /dev/null
+++ b/Documentation/vm/zsmalloc.rst
@@ -0,0 +1,82 @@
+.. _zsmalloc:
+
+========
+zsmalloc
+========
+
+This allocator is designed for use with zram. Thus, the allocator is
+supposed to work well under low memory conditions. In particular, it
+never attempts higher order page allocation which is very likely to
+fail under memory pressure. On the other hand, if we just use single
+(0-order) pages, it would suffer from very high fragmentation --
+any object of size PAGE_SIZE/2 or larger would occupy an entire page.
+This was one of the major issues with its predecessor (xvmalloc).
+
+To overcome these issues, zsmalloc allocates a bunch of 0-order pages
+and links them together using various 'struct page' fields. These linked
+pages act as a single higher-order page i.e. an object can span 0-order
+page boundaries. The code refers to these linked pages as a single entity
+called zspage.
+
+For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE
+since this satisfies the requirements of all its current users (in the
+worst case, page is incompressible and is thus stored "as-is" i.e. in
+uncompressed form). For allocation requests larger than this size, failure
+is returned (see zs_malloc).
+
+Additionally, zs_malloc() does not return a dereferenceable pointer.
+Instead, it returns an opaque handle (unsigned long) which encodes actual
+location of the allocated object. The reason for this indirection is that
+zsmalloc does not keep zspages permanently mapped since that would cause
+issues on 32-bit systems where the VA region for kernel space mappings
+is very small. So, before using the allocating memory, the object has to
+be mapped using zs_map_object() to get a usable pointer and subsequently
+unmapped using zs_unmap_object().
+
+stat
+====
+
+With CONFIG_ZSMALLOC_STAT, we could see zsmalloc internal information via
+``/sys/kernel/debug/zsmalloc/<user name>``. Here is a sample of stat output::
+
+ # cat /sys/kernel/debug/zsmalloc/zram0/classes
+
+ class  size almost_full almost_empty obj_allocated   obj_used pages_used pages_per_zspage
+    ...
+    ...
+     9   176           0            1           186        129          8                4
+    10   192           1            0          2880       2872        135                3
+    11   208           0            1           819        795         42                2
+    12   224           0            1           219        159         12                4
+    ...
+    ...
+
+
+class
+	index
+size
+	object size zspage stores
+almost_empty
+	the number of ZS_ALMOST_EMPTY zspages(see below)
+almost_full
+	the number of ZS_ALMOST_FULL zspages(see below)
+obj_allocated
+	the number of objects allocated
+obj_used
+	the number of objects allocated to the user
+pages_used
+	the number of pages allocated for the class
+pages_per_zspage
+	the number of 0-order pages to make a zspage
+
+We assign a zspage to ZS_ALMOST_EMPTY fullness group when n <= N / f, where
+
+* n = number of allocated objects
+* N = total number of objects zspage can store
+* f = fullness_threshold_frac(ie, 4 at the moment)
+
+Similarly, we assign zspage to:
+
+* ZS_ALMOST_FULL  when n > N / f
+* ZS_EMPTY        when n == 0
+* ZS_FULL         when n == N
diff --git a/Documentation/vm/zsmalloc.txt b/Documentation/vm/zsmalloc.txt
deleted file mode 100644
index 64ed63c..0000000
--- a/Documentation/vm/zsmalloc.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-zsmalloc
---------
-
-This allocator is designed for use with zram. Thus, the allocator is
-supposed to work well under low memory conditions. In particular, it
-never attempts higher order page allocation which is very likely to
-fail under memory pressure. On the other hand, if we just use single
-(0-order) pages, it would suffer from very high fragmentation --
-any object of size PAGE_SIZE/2 or larger would occupy an entire page.
-This was one of the major issues with its predecessor (xvmalloc).
-
-To overcome these issues, zsmalloc allocates a bunch of 0-order pages
-and links them together using various 'struct page' fields. These linked
-pages act as a single higher-order page i.e. an object can span 0-order
-page boundaries. The code refers to these linked pages as a single entity
-called zspage.
-
-For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE
-since this satisfies the requirements of all its current users (in the
-worst case, page is incompressible and is thus stored "as-is" i.e. in
-uncompressed form). For allocation requests larger than this size, failure
-is returned (see zs_malloc).
-
-Additionally, zs_malloc() does not return a dereferenceable pointer.
-Instead, it returns an opaque handle (unsigned long) which encodes actual
-location of the allocated object. The reason for this indirection is that
-zsmalloc does not keep zspages permanently mapped since that would cause
-issues on 32-bit systems where the VA region for kernel space mappings
-is very small. So, before using the allocating memory, the object has to
-be mapped using zs_map_object() to get a usable pointer and subsequently
-unmapped using zs_unmap_object().
-
-stat
-----
-
-With CONFIG_ZSMALLOC_STAT, we could see zsmalloc internal information via
-/sys/kernel/debug/zsmalloc/<user name>. Here is a sample of stat output:
-
-# cat /sys/kernel/debug/zsmalloc/zram0/classes
-
- class  size almost_full almost_empty obj_allocated   obj_used pages_used pages_per_zspage
-    ..
-    ..
-     9   176           0            1           186        129          8                4
-    10   192           1            0          2880       2872        135                3
-    11   208           0            1           819        795         42                2
-    12   224           0            1           219        159         12                4
-    ..
-    ..
-
-
-class: index
-size: object size zspage stores
-almost_empty: the number of ZS_ALMOST_EMPTY zspages(see below)
-almost_full: the number of ZS_ALMOST_FULL zspages(see below)
-obj_allocated: the number of objects allocated
-obj_used: the number of objects allocated to the user
-pages_used: the number of pages allocated for the class
-pages_per_zspage: the number of 0-order pages to make a zspage
-
-We assign a zspage to ZS_ALMOST_EMPTY fullness group when:
-      n <= N / f, where
-n = number of allocated objects
-N = total number of objects zspage can store
-f = fullness_threshold_frac(ie, 4 at the moment)
-
-Similarly, we assign zspage to:
-      ZS_ALMOST_FULL  when n > N / f
-      ZS_EMPTY        when n == 0
-      ZS_FULL         when n == N
diff --git a/Documentation/vm/zswap.rst b/Documentation/vm/zswap.rst
new file mode 100644
index 0000000..1444ecd
--- /dev/null
+++ b/Documentation/vm/zswap.rst
@@ -0,0 +1,135 @@
+.. _zswap:
+
+=====
+zswap
+=====
+
+Overview
+========
+
+Zswap is a lightweight compressed cache for swap pages. It takes pages that are
+in the process of being swapped out and attempts to compress them into a
+dynamically allocated RAM-based memory pool.  zswap basically trades CPU cycles
+for potentially reduced swap I/O.  This trade-off can also result in a
+significant performance improvement if reads from the compressed cache are
+faster than reads from a swap device.
+
+.. note::
+   Zswap is a new feature as of v3.11 and interacts heavily with memory
+   reclaim.  This interaction has not been fully explored on the large set of
+   potential configurations and workloads that exist.  For this reason, zswap
+   is a work in progress and should be considered experimental.
+
+   Some potential benefits:
+
+* Desktop/laptop users with limited RAM capacities can mitigate the
+  performance impact of swapping.
+* Overcommitted guests that share a common I/O resource can
+  dramatically reduce their swap I/O pressure, avoiding heavy handed I/O
+  throttling by the hypervisor. This allows more work to get done with less
+  impact to the guest workload and guests sharing the I/O subsystem
+* Users with SSDs as swap devices can extend the life of the device by
+  drastically reducing life-shortening writes.
+
+Zswap evicts pages from compressed cache on an LRU basis to the backing swap
+device when the compressed pool reaches its size limit.  This requirement had
+been identified in prior community discussions.
+
+Zswap is disabled by default but can be enabled at boot time by setting
+the ``enabled`` attribute to 1 at boot time. ie: ``zswap.enabled=1``.  Zswap
+can also be enabled and disabled at runtime using the sysfs interface.
+An example command to enable zswap at runtime, assuming sysfs is mounted
+at ``/sys``, is::
+
+	echo 1 > /sys/module/zswap/parameters/enabled
+
+When zswap is disabled at runtime it will stop storing pages that are
+being swapped out.  However, it will _not_ immediately write out or fault
+back into memory all of the pages stored in the compressed pool.  The
+pages stored in zswap will remain in the compressed pool until they are
+either invalidated or faulted back into memory.  In order to force all
+pages out of the compressed pool, a swapoff on the swap device(s) will
+fault back into memory all swapped out pages, including those in the
+compressed pool.
+
+Design
+======
+
+Zswap receives pages for compression through the Frontswap API and is able to
+evict pages from its own compressed pool on an LRU basis and write them back to
+the backing swap device in the case that the compressed pool is full.
+
+Zswap makes use of zpool for the managing the compressed memory pool.  Each
+allocation in zpool is not directly accessible by address.  Rather, a handle is
+returned by the allocation routine and that handle must be mapped before being
+accessed.  The compressed memory pool grows on demand and shrinks as compressed
+pages are freed.  The pool is not preallocated.  By default, a zpool
+of type zbud is created, but it can be selected at boot time by
+setting the ``zpool`` attribute, e.g. ``zswap.zpool=zbud``. It can
+also be changed at runtime using the sysfs ``zpool`` attribute, e.g.::
+
+	echo zbud > /sys/module/zswap/parameters/zpool
+
+The zbud type zpool allocates exactly 1 page to store 2 compressed pages, which
+means the compression ratio will always be 2:1 or worse (because of half-full
+zbud pages).  The zsmalloc type zpool has a more complex compressed page
+storage method, and it can achieve greater storage densities.  However,
+zsmalloc does not implement compressed page eviction, so once zswap fills it
+cannot evict the oldest page, it can only reject new pages.
+
+When a swap page is passed from frontswap to zswap, zswap maintains a mapping
+of the swap entry, a combination of the swap type and swap offset, to the zpool
+handle that references that compressed swap page.  This mapping is achieved
+with a red-black tree per swap type.  The swap offset is the search key for the
+tree nodes.
+
+During a page fault on a PTE that is a swap entry, frontswap calls the zswap
+load function to decompress the page into the page allocated by the page fault
+handler.
+
+Once there are no PTEs referencing a swap page stored in zswap (i.e. the count
+in the swap_map goes to 0) the swap code calls the zswap invalidate function,
+via frontswap, to free the compressed entry.
+
+Zswap seeks to be simple in its policies.  Sysfs attributes allow for one user
+controlled policy:
+
+* max_pool_percent - The maximum percentage of memory that the compressed
+  pool can occupy.
+
+The default compressor is lzo, but it can be selected at boot time by
+setting the ``compressor`` attribute, e.g. ``zswap.compressor=lzo``.
+It can also be changed at runtime using the sysfs "compressor"
+attribute, e.g.::
+
+	echo lzo > /sys/module/zswap/parameters/compressor
+
+When the zpool and/or compressor parameter is changed at runtime, any existing
+compressed pages are not modified; they are left in their own zpool.  When a
+request is made for a page in an old zpool, it is uncompressed using its
+original compressor.  Once all pages are removed from an old zpool, the zpool
+and its compressor are freed.
+
+Some of the pages in zswap are same-value filled pages (i.e. contents of the
+page have same value or repetitive pattern). These pages include zero-filled
+pages and they are handled differently. During store operation, a page is
+checked if it is a same-value filled page before compressing it. If true, the
+compressed length of the page is set to zero and the pattern or same-filled
+value is stored.
+
+Same-value filled pages identification feature is enabled by default and can be
+disabled at boot time by setting the ``same_filled_pages_enabled`` attribute
+to 0, e.g. ``zswap.same_filled_pages_enabled=0``. It can also be enabled and
+disabled at runtime using the sysfs ``same_filled_pages_enabled``
+attribute, e.g.::
+
+	echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
+
+When zswap same-filled page identification is disabled at runtime, it will stop
+checking for the same-value filled pages during store operation. However, the
+existing pages which are marked as same-value filled pages remain stored
+unchanged in zswap until they are either loaded or invalidated.
+
+A debugfs interface is provided for various statistic about pool size, number
+of pages stored, same-value filled pages and various counters for the reasons
+pages are rejected.
diff --git a/Documentation/vm/zswap.txt b/Documentation/vm/zswap.txt
deleted file mode 100644
index 0b3a114..0000000
--- a/Documentation/vm/zswap.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-Overview:
-
-Zswap is a lightweight compressed cache for swap pages. It takes pages that are
-in the process of being swapped out and attempts to compress them into a
-dynamically allocated RAM-based memory pool.  zswap basically trades CPU cycles
-for potentially reduced swap I/O.  This trade-off can also result in a
-significant performance improvement if reads from the compressed cache are
-faster than reads from a swap device.
-
-NOTE: Zswap is a new feature as of v3.11 and interacts heavily with memory
-reclaim.  This interaction has not been fully explored on the large set of
-potential configurations and workloads that exist.  For this reason, zswap
-is a work in progress and should be considered experimental.
-
-Some potential benefits:
-* Desktop/laptop users with limited RAM capacities can mitigate the
-    performance impact of swapping.
-* Overcommitted guests that share a common I/O resource can
-    dramatically reduce their swap I/O pressure, avoiding heavy handed I/O
-    throttling by the hypervisor. This allows more work to get done with less
-    impact to the guest workload and guests sharing the I/O subsystem
-* Users with SSDs as swap devices can extend the life of the device by
-    drastically reducing life-shortening writes.
-
-Zswap evicts pages from compressed cache on an LRU basis to the backing swap
-device when the compressed pool reaches its size limit.  This requirement had
-been identified in prior community discussions.
-
-Zswap is disabled by default but can be enabled at boot time by setting
-the "enabled" attribute to 1 at boot time. ie: zswap.enabled=1.  Zswap
-can also be enabled and disabled at runtime using the sysfs interface.
-An example command to enable zswap at runtime, assuming sysfs is mounted
-at /sys, is:
-
-echo 1 > /sys/module/zswap/parameters/enabled
-
-When zswap is disabled at runtime it will stop storing pages that are
-being swapped out.  However, it will _not_ immediately write out or fault
-back into memory all of the pages stored in the compressed pool.  The
-pages stored in zswap will remain in the compressed pool until they are
-either invalidated or faulted back into memory.  In order to force all
-pages out of the compressed pool, a swapoff on the swap device(s) will
-fault back into memory all swapped out pages, including those in the
-compressed pool.
-
-Design:
-
-Zswap receives pages for compression through the Frontswap API and is able to
-evict pages from its own compressed pool on an LRU basis and write them back to
-the backing swap device in the case that the compressed pool is full.
-
-Zswap makes use of zpool for the managing the compressed memory pool.  Each
-allocation in zpool is not directly accessible by address.  Rather, a handle is
-returned by the allocation routine and that handle must be mapped before being
-accessed.  The compressed memory pool grows on demand and shrinks as compressed
-pages are freed.  The pool is not preallocated.  By default, a zpool of type
-zbud is created, but it can be selected at boot time by setting the "zpool"
-attribute, e.g. zswap.zpool=zbud.  It can also be changed at runtime using the
-sysfs "zpool" attribute, e.g.
-
-echo zbud > /sys/module/zswap/parameters/zpool
-
-The zbud type zpool allocates exactly 1 page to store 2 compressed pages, which
-means the compression ratio will always be 2:1 or worse (because of half-full
-zbud pages).  The zsmalloc type zpool has a more complex compressed page
-storage method, and it can achieve greater storage densities.  However,
-zsmalloc does not implement compressed page eviction, so once zswap fills it
-cannot evict the oldest page, it can only reject new pages.
-
-When a swap page is passed from frontswap to zswap, zswap maintains a mapping
-of the swap entry, a combination of the swap type and swap offset, to the zpool
-handle that references that compressed swap page.  This mapping is achieved
-with a red-black tree per swap type.  The swap offset is the search key for the
-tree nodes.
-
-During a page fault on a PTE that is a swap entry, frontswap calls the zswap
-load function to decompress the page into the page allocated by the page fault
-handler.
-
-Once there are no PTEs referencing a swap page stored in zswap (i.e. the count
-in the swap_map goes to 0) the swap code calls the zswap invalidate function,
-via frontswap, to free the compressed entry.
-
-Zswap seeks to be simple in its policies.  Sysfs attributes allow for one user
-controlled policy:
-* max_pool_percent - The maximum percentage of memory that the compressed
-    pool can occupy.
-
-The default compressor is lzo, but it can be selected at boot time by setting
-the “compressor” attribute, e.g. zswap.compressor=lzo.  It can also be changed
-at runtime using the sysfs "compressor" attribute, e.g.
-
-echo lzo > /sys/module/zswap/parameters/compressor
-
-When the zpool and/or compressor parameter is changed at runtime, any existing
-compressed pages are not modified; they are left in their own zpool.  When a
-request is made for a page in an old zpool, it is uncompressed using its
-original compressor.  Once all pages are removed from an old zpool, the zpool
-and its compressor are freed.
-
-Some of the pages in zswap are same-value filled pages (i.e. contents of the
-page have same value or repetitive pattern). These pages include zero-filled
-pages and they are handled differently. During store operation, a page is
-checked if it is a same-value filled page before compressing it. If true, the
-compressed length of the page is set to zero and the pattern or same-filled
-value is stored.
-
-Same-value filled pages identification feature is enabled by default and can be
-disabled at boot time by setting the "same_filled_pages_enabled" attribute to 0,
-e.g. zswap.same_filled_pages_enabled=0. It can also be enabled and disabled at
-runtime using the sysfs "same_filled_pages_enabled" attribute, e.g.
-
-echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
-
-When zswap same-filled page identification is disabled at runtime, it will stop
-checking for the same-value filled pages during store operation. However, the
-existing pages which are marked as same-value filled pages remain stored
-unchanged in zswap until they are either loaded or invalidated.
-
-A debugfs interface is provided for various statistic about pool size, number
-of pages stored, same-value filled pages and various counters for the reasons
-pages are rejected.
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index 71c3098..a16aa21 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -17,12 +17,14 @@
 
 To use the feature mount the file system:
 
- # mount -t resctrl resctrl [-o cdp[,cdpl2]] /sys/fs/resctrl
+ # mount -t resctrl resctrl [-o cdp[,cdpl2][,mba_MBps]] /sys/fs/resctrl
 
 mount options are:
 
 "cdp": Enable code/data prioritization in L3 cache allocations.
 "cdpl2": Enable code/data prioritization in L2 cache allocations.
+"mba_MBps": Enable the MBA Software Controller(mba_sc) to specify MBA
+ bandwidth in MBps
 
 L2 and L3 CDP are controlled seperately.
 
@@ -270,10 +272,11 @@
 of the capacity of the cache. You could partition the cache into four
 equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000.
 
-Memory bandwidth(b/w) percentage
---------------------------------
-For Memory b/w resource, user controls the resource by indicating the
-percentage of total memory b/w.
+Memory bandwidth Allocation and monitoring
+------------------------------------------
+
+For Memory bandwidth resource, by default the user controls the resource
+by indicating the percentage of total memory bandwidth.
 
 The minimum bandwidth percentage value for each cpu model is predefined
 and can be looked up through "info/MB/min_bandwidth". The bandwidth
@@ -285,7 +288,47 @@
 The bandwidth throttling is a core specific mechanism on some of Intel
 SKUs. Using a high bandwidth and a low bandwidth setting on two threads
 sharing a core will result in both threads being throttled to use the
-low bandwidth.
+low bandwidth. The fact that Memory bandwidth allocation(MBA) is a core
+specific mechanism where as memory bandwidth monitoring(MBM) is done at
+the package level may lead to confusion when users try to apply control
+via the MBA and then monitor the bandwidth to see if the controls are
+effective. Below are such scenarios:
+
+1. User may *not* see increase in actual bandwidth when percentage
+   values are increased:
+
+This can occur when aggregate L2 external bandwidth is more than L3
+external bandwidth. Consider an SKL SKU with 24 cores on a package and
+where L2 external  is 10GBps (hence aggregate L2 external bandwidth is
+240GBps) and L3 external bandwidth is 100GBps. Now a workload with '20
+threads, having 50% bandwidth, each consuming 5GBps' consumes the max L3
+bandwidth of 100GBps although the percentage value specified is only 50%
+<< 100%. Hence increasing the bandwidth percentage will not yeild any
+more bandwidth. This is because although the L2 external bandwidth still
+has capacity, the L3 external bandwidth is fully used. Also note that
+this would be dependent on number of cores the benchmark is run on.
+
+2. Same bandwidth percentage may mean different actual bandwidth
+   depending on # of threads:
+
+For the same SKU in #1, a 'single thread, with 10% bandwidth' and '4
+thread, with 10% bandwidth' can consume upto 10GBps and 40GBps although
+they have same percentage bandwidth of 10%. This is simply because as
+threads start using more cores in an rdtgroup, the actual bandwidth may
+increase or vary although user specified bandwidth percentage is same.
+
+In order to mitigate this and make the interface more user friendly,
+resctrl added support for specifying the bandwidth in MBps as well.  The
+kernel underneath would use a software feedback mechanism or a "Software
+Controller(mba_sc)" which reads the actual bandwidth using MBM counters
+and adjust the memowy bandwidth percentages to ensure
+
+	"actual bandwidth < user specified bandwidth".
+
+By default, the schemata would take the bandwidth percentage values
+where as user can switch to the "MBA software controller" mode using
+a mount option 'mba_MBps'. The schemata format is specified in the below
+sections.
 
 L3 schemata file details (code and data prioritization disabled)
 ----------------------------------------------------------------
@@ -308,13 +351,20 @@
 
 	L2:<cache_id0>=<cbm>;<cache_id1>=<cbm>;...
 
-Memory b/w Allocation details
------------------------------
+Memory bandwidth Allocation (default mode)
+------------------------------------------
 
 Memory b/w domain is L3 cache.
 
 	MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;...
 
+Memory bandwidth Allocation specified in MBps
+---------------------------------------------
+
+Memory bandwidth domain is L3 cache.
+
+	MB:<cache_id0>=bw_MBps0;<cache_id1>=bw_MBps1;...
+
 Reading/writing the schemata file
 ---------------------------------
 Reading the schemata file will show the state of all resources
@@ -358,6 +408,15 @@
 b/w that the group may be able to use and the system admin can configure
 the b/w accordingly.
 
+If the MBA is specified in MB(megabytes) then user can enter the max b/w in MB
+rather than the percentage values.
+
+# echo "L3:0=3;1=c\nMB:0=1024;1=500" > /sys/fs/resctrl/p0/schemata
+# echo "L3:0=3;1=3\nMB:0=1024;1=500" > /sys/fs/resctrl/p1/schemata
+
+In the above example the tasks in "p1" and "p0" on socket 0 would use a max b/w
+of 1024MB where as on socket 1 they would use 500MB.
+
 Example 2
 ---------
 Again two sockets, but this time with a more realistic 20-bit mask.
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index b297c48..8d109ef 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -187,9 +187,9 @@
 
 IOMMU (input/output memory management unit)
 
- Currently four x86-64 PCI-DMA mapping implementations exist:
+ Multiple x86-64 PCI-DMA mapping implementations exist, for example:
 
-   1. <arch/x86_64/kernel/pci-nommu.c>: use no hardware/software IOMMU at all
+   1. <lib/dma-direct.c>: use no hardware/software IOMMU at all
       (e.g. because you have < 3 GB memory).
       Kernel boot message: "PCI-DMA: Disabling IOMMU"
 
@@ -208,7 +208,7 @@
       Kernel boot message: "PCI-DMA: Using Calgary IOMMU"
 
  iommu=[<size>][,noagp][,off][,force][,noforce][,leak[=<nr_of_leak_pages>]
-	[,memaper[=<order>]][,merge][,forcesac][,fullflush][,nomerge]
+	[,memaper[=<order>]][,merge][,fullflush][,nomerge]
 	[,noaperture][,calgary]
 
   General iommu options:
@@ -235,14 +235,7 @@
                        (experimental).
     nomerge            Don't do scatter-gather (SG) merging.
     noaperture         Ask the IOMMU not to touch the aperture for AGP.
-    forcesac           Force single-address cycle (SAC) mode for masks <40bits
-                       (experimental).
     noagp              Don't initialize the AGP driver and use full aperture.
-    allowdac           Allow double-address cycle (DAC) mode, i.e. DMA >4GB.
-                       DAC is used with 32-bit PCI to push a 64-bit address in
-                       two cycles. When off all DMA over >4GB is forced through
-                       an IOMMU or software bounce buffering.
-    nodac              Forbid DAC mode, i.e. DMA >4GB.
     panic              Always panic when IOMMU overflows.
     calgary            Use the Calgary IOMMU if it is available
 
diff --git a/Kconfig b/Kconfig
index 8c4c1cb..a90d9f9 100644
--- a/Kconfig
+++ b/Kconfig
@@ -3,10 +3,10 @@
 # For a description of the syntax of this configuration file,
 # see Documentation/kbuild/kconfig-language.txt.
 #
-mainmenu "Linux/$ARCH $KERNELVERSION Kernel Configuration"
+mainmenu "Linux/$(ARCH) $(KERNELVERSION) Kernel Configuration"
 
-config SRCARCH
-	string
-	option env="SRCARCH"
+comment "Compiler: $(CC_VERSION_TEXT)"
 
-source "arch/$SRCARCH/Kconfig"
+source "scripts/Kconfig.include"
+
+source "arch/$(SRCARCH)/Kconfig"
diff --git a/LICENSES/exceptions/Linux-syscall-note b/LICENSES/exceptions/Linux-syscall-note
index 6b60b61..9abdad7 100644
--- a/LICENSES/exceptions/Linux-syscall-note
+++ b/LICENSES/exceptions/Linux-syscall-note
@@ -1,6 +1,6 @@
 SPDX-Exception-Identifier: Linux-syscall-note
 SPDX-URL: https://spdx.org/licenses/Linux-syscall-note.html
-SPDX-Licenses: GPL-2.0, GPL-2.0+, GPL-1.0+, LGPL-2.0, LGPL-2.0+, LGPL-2.1, LGPL-2.1+
+SPDX-Licenses: GPL-2.0, GPL-2.0+, GPL-1.0+, LGPL-2.0, LGPL-2.0+, LGPL-2.1, LGPL-2.1+, GPL-2.0-only, GPL-2.0-or-later
 Usage-Guide:
   This exception is used together with one of the above SPDX-Licenses
   to mark user space API (uapi) header files so they can be included
diff --git a/LICENSES/other/Apache-2.0 b/LICENSES/other/Apache-2.0
new file mode 100644
index 0000000..7cd903f
--- /dev/null
+++ b/LICENSES/other/Apache-2.0
@@ -0,0 +1,183 @@
+Valid-License-Identifier: Apache-2.0
+SPDX-URL: https://spdx.org/licenses/Apache-2.0.html
+Usage-Guide:
+  To use the Apache License version 2.0 put the following SPDX tag/value
+  pair into a comment according to the placement guidelines in the
+  licensing rules documentation:
+    SPDX-License-Identifier: Apache-2.0
+License-Text:
+
+Apache License
+
+Version 2.0, January 2004
+
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the
+copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other
+entities that control, are controlled by, or are under common control with
+that entity. For the purposes of this definition, "control" means (i) the
+power, direct or indirect, to cause the direction or management of such
+entity, whether by contract or otherwise, or (ii) ownership of fifty
+percent (50%) or more of the outstanding shares, or (iii) beneficial
+ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications,
+including but not limited to software source code, documentation source,
+and configuration files.
+
+"Object" form shall mean any form resulting from mechanical transformation
+or translation of a Source form, including but not limited to compiled
+object code, generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form,
+made available under the License, as indicated by a copyright notice that
+is included in or attached to the work (an example is provided in the
+Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form,
+that is based on (or derived from) the Work and for which the editorial
+revisions, annotations, elaborations, or other modifications represent, as
+a whole, an original work of authorship. For the purposes of this License,
+Derivative Works shall not include works that remain separable from, or
+merely link (or bind by name) to the interfaces of, the Work and Derivative
+Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original
+version of the Work and any modifications or additions to that Work or
+Derivative Works thereof, that is intentionally submitted to Licensor for
+inclusion in the Work by the copyright owner or by an individual or Legal
+Entity authorized to submit on behalf of the copyright owner. For the
+purposes of this definition, "submitted" means any form of electronic,
+verbal, or written communication sent to the Licensor or its
+representatives, including but not limited to communication on electronic
+mailing lists, source code control systems, and issue tracking systems that
+are managed by, or on behalf of, the Licensor for the purpose of discussing
+and improving the Work, but excluding communication that is conspicuously
+marked or otherwise designated in writing by the copyright owner as "Not a
+Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on
+behalf of whom a Contribution has been received by Licensor and
+subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of this
+   License, each Contributor hereby grants to You a perpetual, worldwide,
+   non-exclusive, no-charge, royalty-free, irrevocable copyright license to
+   reproduce, prepare Derivative Works of, publicly display, publicly
+   perform, sublicense, and distribute the Work and such Derivative Works
+   in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of this
+   License, each Contributor hereby grants to You a perpetual, worldwide,
+   non-exclusive, no-charge, royalty-free, irrevocable (except as stated in
+   this section) patent license to make, have made, use, offer to sell,
+   sell, import, and otherwise transfer the Work, where such license
+   applies only to those patent claims licensable by such Contributor that
+   are necessarily infringed by their Contribution(s) alone or by
+   combination of their Contribution(s) with the Work to which such
+   Contribution(s) was submitted. If You institute patent litigation
+   against any entity (including a cross-claim or counterclaim in a
+   lawsuit) alleging that the Work or a Contribution incorporated within
+   the Work constitutes direct or contributory patent infringement, then
+   any patent licenses granted to You under this License for that Work
+   shall terminate as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or
+   Derivative Works thereof in any medium, with or without modifications,
+   and in Source or Object form, provided that You meet the following
+   conditions:
+
+   a. You must give any other recipients of the Work or Derivative Works a
+      copy of this License; and
+
+   b. You must cause any modified files to carry prominent notices stating
+      that You changed the files; and
+
+   c. You must retain, in the Source form of any Derivative Works that You
+      distribute, all copyright, patent, trademark, and attribution notices
+      from the Source form of the Work, excluding those notices that do not
+      pertain to any part of the Derivative Works; and
+
+   d. If the Work includes a "NOTICE" text file as part of its
+      distribution, then any Derivative Works that You distribute must
+      include a readable copy of the attribution notices contained within
+      such NOTICE file, excluding those notices that do not pertain to any
+      part of the Derivative Works, in at least one of the following
+      places: within a NOTICE text file distributed as part of the
+      Derivative Works; within the Source form or documentation, if
+      provided along with the Derivative Works; or, within a display
+      generated by the Derivative Works, if and wherever such third-party
+      notices normally appear. The contents of the NOTICE file are for
+      informational purposes only and do not modify the License. You may
+      add Your own attribution notices within Derivative Works that You
+      distribute, alongside or as an addendum to the NOTICE text from the
+      Work, provided that such additional attribution notices cannot be
+      construed as modifying the License.
+
+    You may add Your own copyright statement to Your modifications and may
+    provide additional or different license terms and conditions for use,
+    reproduction, or distribution of Your modifications, or for any such
+    Derivative Works as a whole, provided Your use, reproduction, and
+    distribution of the Work otherwise complies with the conditions stated
+    in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise, any
+   Contribution intentionally submitted for inclusion in the Work by You to
+   the Licensor shall be under the terms and conditions of this License,
+   without any additional terms or conditions. Notwithstanding the above,
+   nothing herein shall supersede or modify the terms of any separate
+   license agreement you may have executed with Licensor regarding such
+   Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to
+   in writing, Licensor provides the Work (and each Contributor provides
+   its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+   OF ANY KIND, either express or implied, including, without limitation,
+   any warranties or conditions of TITLE, NON-INFRINGEMENT,
+   MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely
+   responsible for determining the appropriateness of using or
+   redistributing the Work and assume any risks associated with Your
+   exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory, whether
+   in tort (including negligence), contract, or otherwise, unless required
+   by applicable law (such as deliberate and grossly negligent acts) or
+   agreed to in writing, shall any Contributor be liable to You for
+   damages, including any direct, indirect, special, incidental, or
+   consequential damages of any character arising as a result of this
+   License or out of the use or inability to use the Work (including but
+   not limited to damages for loss of goodwill, work stoppage, computer
+   failure or malfunction, or any and all other commercial damages or
+   losses), even if such Contributor has been advised of the possibility of
+   such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing the
+   Work or Derivative Works thereof, You may choose to offer, and charge a
+   fee for, acceptance of support, warranty, indemnity, or other liability
+   obligations and/or rights consistent with this License. However, in
+   accepting such obligations, You may act only on Your own behalf and on
+   Your sole responsibility, not on behalf of any other Contributor, and
+   only if You agree to indemnify, defend, and hold each Contributor
+   harmless for any liability incurred by, or claims asserted against, such
+   Contributor by reason of your accepting any such warranty or additional
+   liability.
+
+END OF TERMS AND CONDITIONS
diff --git a/LICENSES/other/CC-BY-SA-4.0 b/LICENSES/other/CC-BY-SA-4.0
new file mode 100644
index 0000000..f9158e8
--- /dev/null
+++ b/LICENSES/other/CC-BY-SA-4.0
@@ -0,0 +1,397 @@
+Valid-License-Identifier: CC-BY-SA-4.0
+SPDX-URL: https://spdx.org/licenses/CC-BY-SA-4.0
+Usage-Guide:
+  To use the Creative Commons Attribution Share Alike 4.0 International
+  license put the following SPDX tag/value pair into a comment according to
+  the placement guidelines in the licensing rules documentation:
+    SPDX-License-Identifier: CC-BY-SA-4.0
+License-Text:
+
+Creative Commons Attribution-ShareAlike 4.0 International
+
+Creative Commons Corporation ("Creative Commons") is not a law firm and
+does not provide legal services or legal advice. Distribution of Creative
+Commons public licenses does not create a lawyer-client or other
+relationship. Creative Commons makes its licenses and related information
+available on an "as-is" basis. Creative Commons gives no warranties
+regarding its licenses, any material licensed under their terms and
+conditions, or any related information. Creative Commons disclaims all
+liability for damages resulting from their use to the fullest extent
+possible.
+
+Using Creative Commons Public Licenses
+
+Creative Commons public licenses provide a standard set of terms and
+conditions that creators and other rights holders may use to share original
+works of authorship and other material subject to copyright and certain
+other rights specified in the public license below. The following
+considerations are for informational purposes only, are not exhaustive, and
+do not form part of our licenses.
+
+Considerations for licensors: Our public licenses are intended for use by
+those authorized to give the public permission to use material in ways
+otherwise restricted by copyright and certain other rights. Our licenses
+are irrevocable. Licensors should read and understand the terms and
+conditions of the license they choose before applying it. Licensors should
+also secure all rights necessary before applying our licenses so that the
+public can reuse the material as expected. Licensors should clearly mark
+any material not subject to the license. This includes other CC-licensed
+material, or material used under an exception or limitation to
+copyright. More considerations for licensors :
+wiki.creativecommons.org/Considerations_for_licensors
+
+Considerations for the public: By using one of our public licenses, a
+licensor grants the public permission to use the licensed material under
+specified terms and conditions. If the licensor's permission is not
+necessary for any reason - for example, because of any applicable exception
+or limitation to copyright - then that use is not regulated by the
+license. Our licenses grant only permissions under copyright and certain
+other rights that a licensor has authority to grant. Use of the licensed
+material may still be restricted for other reasons, including because
+others have copyright or other rights in the material. A licensor may make
+special requests, such as asking that all changes be marked or described.
+
+Although not required by our licenses, you are encouraged to respect those
+requests where reasonable. More considerations for the public :
+wiki.creativecommons.org/Considerations_for_licensees
+
+Creative Commons Attribution-ShareAlike 4.0 International Public License
+
+By exercising the Licensed Rights (defined below), You accept and agree to
+be bound by the terms and conditions of this Creative Commons
+Attribution-ShareAlike 4.0 International Public License ("Public
+License"). To the extent this Public License may be interpreted as a
+contract, You are granted the Licensed Rights in consideration of Your
+acceptance of these terms and conditions, and the Licensor grants You such
+rights in consideration of benefits the Licensor receives from making the
+Licensed Material available under these terms and conditions.
+
+Section 1 - Definitions.
+
+    a. Adapted Material means material subject to Copyright and Similar
+       Rights that is derived from or based upon the Licensed Material and
+       in which the Licensed Material is translated, altered, arranged,
+       transformed, or otherwise modified in a manner requiring permission
+       under the Copyright and Similar Rights held by the Licensor. For
+       purposes of this Public License, where the Licensed Material is a
+       musical work, performance, or sound recording, Adapted Material is
+       always produced where the Licensed Material is synched in timed
+       relation with a moving image.
+
+    b. Adapter's License means the license You apply to Your Copyright and
+       Similar Rights in Your contributions to Adapted Material in
+       accordance with the terms and conditions of this Public License.
+
+    c. BY-SA Compatible License means a license listed at
+       creativecommons.org/compatiblelicenses, approved by Creative Commons
+       as essentially the equivalent of this Public License.
+
+    d. Copyright and Similar Rights means copyright and/or similar rights
+       closely related to copyright including, without limitation,
+       performance, broadcast, sound recording, and Sui Generis Database
+       Rights, without regard to how the rights are labeled or
+       categorized. For purposes of this Public License, the rights
+       specified in Section 2(b)(1)-(2) are not Copyright and Similar
+       Rights.
+
+    e. Effective Technological Measures means those measures that, in the
+       absence of proper authority, may not be circumvented under laws
+       fulfilling obligations under Article 11 of the WIPO Copyright Treaty
+       adopted on December 20, 1996, and/or similar international
+       agreements.
+
+    f. Exceptions and Limitations means fair use, fair dealing, and/or any
+       other exception or limitation to Copyright and Similar Rights that
+       applies to Your use of the Licensed Material.
+
+    g. License Elements means the license attributes listed in the name of
+       a Creative Commons Public License. The License Elements of this
+       Public License are Attribution and ShareAlike.
+
+    h. Licensed Material means the artistic or literary work, database, or
+       other material to which the Licensor applied this Public License.
+
+    i. Licensed Rights means the rights granted to You subject to the terms
+       and conditions of this Public License, which are limited to all
+       Copyright and Similar Rights that apply to Your use of the Licensed
+       Material and that the Licensor has authority to license.
+
+    j. Licensor means the individual(s) or entity(ies) granting rights
+       under this Public License.
+
+    k. Share means to provide material to the public by any means or
+       process that requires permission under the Licensed Rights, such as
+       reproduction, public display, public performance, distribution,
+       dissemination, communication, or importation, and to make material
+       available to the public including in ways that members of the public
+       may access the material from a place and at a time individually
+       chosen by them.
+
+    l. Sui Generis Database Rights means rights other than copyright
+       resulting from Directive 96/9/EC of the European Parliament and of
+       the Council of 11 March 1996 on the legal protection of databases,
+       as amended and/or succeeded, as well as other essentially equivalent
+       rights anywhere in the world.  m. You means the individual or entity
+       exercising the Licensed Rights under this Public License. Your has a
+       corresponding meaning.
+
+Section 2 - Scope.
+
+    a. License grant.
+
+        1. Subject to the terms and conditions of this Public License, the
+           Licensor hereby grants You a worldwide, royalty-free,
+           non-sublicensable, non-exclusive, irrevocable license to
+           exercise the Licensed Rights in the Licensed Material to:
+
+            A. reproduce and Share the Licensed Material, in whole or in part; and
+
+            B. produce, reproduce, and Share Adapted Material.
+
+        2. Exceptions and Limitations. For the avoidance of doubt, where
+           Exceptions and Limitations apply to Your use, this Public
+           License does not apply, and You do not need to comply with its
+           terms and conditions.
+
+        3. Term. The term of this Public License is specified in Section 6(a).
+
+        4. Media and formats; technical modifications allowed. The Licensor
+           authorizes You to exercise the Licensed Rights in all media and
+           formats whether now known or hereafter created, and to make
+           technical modifications necessary to do so. The Licensor waives
+           and/or agrees not to assert any right or authority to forbid You
+           from making technical modifications necessary to exercise the
+           Licensed Rights, including technical modifications necessary to
+           circumvent Effective Technological Measures. For purposes of
+           this Public License, simply making modifications authorized by
+           this Section 2(a)(4) never produces Adapted Material.
+
+        5. Downstream recipients.
+
+            A. Offer from the Licensor - Licensed Material. Every recipient
+               of the Licensed Material automatically receives an offer
+               from the Licensor to exercise the Licensed Rights under the
+               terms and conditions of this Public License.
+
+            B. Additional offer from the Licensor - Adapted Material. Every
+               recipient of Adapted Material from You automatically
+               receives an offer from the Licensor to exercise the Licensed
+               Rights in the Adapted Material under the conditions of the
+               Adapter's License You apply.
+
+            C. No downstream restrictions. You may not offer or impose any
+               additional or different terms or conditions on, or apply any
+               Effective Technological Measures to, the Licensed Material
+               if doing so restricts exercise of the Licensed Rights by any
+               recipient of the Licensed Material.
+
+        6. No endorsement. Nothing in this Public License constitutes or
+           may be construed as permission to assert or imply that You are,
+           or that Your use of the Licensed Material is, connected with, or
+           sponsored, endorsed, or granted official status by, the Licensor
+           or others designated to receive attribution as provided in
+           Section 3(a)(1)(A)(i).
+
+    b. Other rights.
+
+        1. Moral rights, such as the right of integrity, are not licensed
+           under this Public License, nor are publicity, privacy, and/or
+           other similar personality rights; however, to the extent
+           possible, the Licensor waives and/or agrees not to assert any
+           such rights held by the Licensor to the limited extent necessary
+           to allow You to exercise the Licensed Rights, but not otherwise.
+
+        2. Patent and trademark rights are not licensed under this Public
+           License.
+
+        3. To the extent possible, the Licensor waives any right to collect
+           royalties from You for the exercise of the Licensed Rights,
+           whether directly or through a collecting society under any
+           voluntary or waivable statutory or compulsory licensing
+           scheme. In all other cases the Licensor expressly reserves any
+           right to collect such royalties.
+
+Section 3 - License Conditions.
+
+Your exercise of the Licensed Rights is expressly made subject to the
+following conditions.
+
+    a. Attribution.
+
+        1. If You Share the Licensed Material (including in modified form),
+           You must:
+
+            A. retain the following if it is supplied by the Licensor with
+               the Licensed Material:
+
+                i. identification of the creator(s) of the Licensed
+                   Material and any others designated to receive
+                   attribution, in any reasonable manner requested by the
+                   Licensor (including by pseudonym if designated);
+
+                ii. a copyright notice;
+
+                iii. a notice that refers to this Public License;
+
+                iv. a notice that refers to the disclaimer of warranties;
+
+                v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
+
+            B. indicate if You modified the Licensed Material and retain an
+               indication of any previous modifications; and
+
+            C. indicate the Licensed Material is licensed under this Public
+            License, and include the text of, or the URI or hyperlink to,
+            this Public License.
+
+        2. You may satisfy the conditions in Section 3(a)(1) in any
+           reasonable manner based on the medium, means, and context in
+           which You Share the Licensed Material. For example, it may be
+           reasonable to satisfy the conditions by providing a URI or
+           hyperlink to a resource that includes the required information.
+
+        3. If requested by the Licensor, You must remove any of the
+           information required by Section 3(a)(1)(A) to the extent
+           reasonably practicable.  b. ShareAlike.In addition to the
+           conditions in Section 3(a), if You Share Adapted Material You
+           produce, the following conditions also apply.
+
+           1. The Adapter's License You apply must be a Creative Commons
+              license with the same License Elements, this version or
+              later, or a BY-SA Compatible License.
+
+           2. You must include the text of, or the URI or hyperlink to, the
+              Adapter's License You apply. You may satisfy this condition
+              in any reasonable manner based on the medium, means, and
+              context in which You Share Adapted Material.
+
+           3. You may not offer or impose any additional or different terms
+              or conditions on, or apply any Effective Technological
+              Measures to, Adapted Material that restrict exercise of the
+              rights granted under the Adapter's License You apply.
+
+Section 4 - Sui Generis Database Rights.
+
+Where the Licensed Rights include Sui Generis Database Rights that apply to
+Your use of the Licensed Material:
+
+    a. for the avoidance of doubt, Section 2(a)(1) grants You the right to
+       extract, reuse, reproduce, and Share all or a substantial portion of
+       the contents of the database;
+
+    b. if You include all or a substantial portion of the database contents
+       in a database in which You have Sui Generis Database Rights, then
+       the database in which You have Sui Generis Database Rights (but not
+       its individual contents) is Adapted Material, including for purposes
+       of Section 3(b); and
+
+    c. You must comply with the conditions in Section 3(a) if You Share all
+       or a substantial portion of the contents of the database.
+
+    For the avoidance of doubt, this Section 4 supplements and does not
+    replace Your obligations under this Public License where the Licensed
+    Rights include other Copyright and Similar Rights.
+
+Section 5 - Disclaimer of Warranties and Limitation of Liability.
+
+    a. Unless otherwise separately undertaken by the Licensor, to the
+       extent possible, the Licensor offers the Licensed Material as-is and
+       as-available, and makes no representations or warranties of any kind
+       concerning the Licensed Material, whether express, implied,
+       statutory, or other. This includes, without limitation, warranties
+       of title, merchantability, fitness for a particular purpose,
+       non-infringement, absence of latent or other defects, accuracy, or
+       the presence or absence of errors, whether or not known or
+       discoverable. Where disclaimers of warranties are not allowed in
+       full or in part, this disclaimer may not apply to You.
+
+    b. To the extent possible, in no event will the Licensor be liable to
+       You on any legal theory (including, without limitation, negligence)
+       or otherwise for any direct, special, indirect, incidental,
+       consequential, punitive, exemplary, or other losses, costs,
+       expenses, or damages arising out of this Public License or use of
+       the Licensed Material, even if the Licensor has been advised of the
+       possibility of such losses, costs, expenses, or damages. Where a
+       limitation of liability is not allowed in full or in part, this
+       limitation may not apply to You.
+
+    c. The disclaimer of warranties and limitation of liability provided
+       above shall be interpreted in a manner that, to the extent possible,
+       most closely approximates an absolute disclaimer and waiver of all
+       liability.
+
+Section 6 - Term and Termination.
+
+    a. This Public License applies for the term of the Copyright and
+       Similar Rights licensed here. However, if You fail to comply with
+       this Public License, then Your rights under this Public License
+       terminate automatically.
+
+    b. Where Your right to use the Licensed Material has terminated under
+       Section 6(a), it reinstates:
+
+        1. automatically as of the date the violation is cured, provided it
+           is cured within 30 days of Your discovery of the violation; or
+
+        2. upon express reinstatement by the Licensor.
+
+    c. For the avoidance of doubt, this Section 6(b) does not affect any
+       right the Licensor may have to seek remedies for Your violations of
+       this Public License.
+
+    d. For the avoidance of doubt, the Licensor may also offer the Licensed
+       Material under separate terms or conditions or stop distributing the
+       Licensed Material at any time; however, doing so will not terminate
+       this Public License.
+
+    e. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
+
+Section 7 - Other Terms and Conditions.
+
+    a. The Licensor shall not be bound by any additional or different terms
+       or conditions communicated by You unless expressly agreed.
+
+    b. Any arrangements, understandings, or agreements regarding the
+       Licensed Material not stated herein are separate from and
+       independent of the terms and conditions of this Public License.
+
+Section 8 - Interpretation.
+
+    a. For the avoidance of doubt, this Public License does not, and shall
+       not be interpreted to, reduce, limit, restrict, or impose conditions
+       on any use of the Licensed Material that could lawfully be made
+       without permission under this Public License.
+
+    b. To the extent possible, if any provision of this Public License is
+       deemed unenforceable, it shall be automatically reformed to the
+       minimum extent necessary to make it enforceable. If the provision
+       cannot be reformed, it shall be severed from this Public License
+       without affecting the enforceability of the remaining terms and
+       conditions.
+
+    c. No term or condition of this Public License will be waived and no
+       failure to comply consented to unless expressly agreed to by the
+       Licensor.
+
+    d. Nothing in this Public License constitutes or may be interpreted as
+       a limitation upon, or waiver of, any privileges and immunities that
+       apply to the Licensor or You, including from the legal processes of
+       any jurisdiction or authority.
+
+Creative Commons is not a party to its public licenses. Notwithstanding,
+Creative Commons may elect to apply one of its public licenses to material
+it publishes and in those instances will be considered the "Licensor." The
+text of the Creative Commons public licenses is dedicated to the public
+domain under the CC0 Public Domain Dedication. Except for the limited
+purpose of indicating that material is shared under a Creative Commons
+public license or as otherwise permitted by the Creative Commons policies
+published at creativecommons.org/policies, Creative Commons does not
+authorize the use of the trademark "Creative Commons" or any other
+trademark or logo of Creative Commons without its prior written consent
+including, without limitation, in connection with any unauthorized
+modifications to any of its public licenses or any other arrangements,
+understandings, or agreements concerning use of licensed material. For the
+avoidance of doubt, this paragraph does not form part of the public
+licenses.
+
+Creative Commons may be contacted at creativecommons.org.
diff --git a/LICENSES/other/CDDL-1.0 b/LICENSES/other/CDDL-1.0
new file mode 100644
index 0000000..195a168
--- /dev/null
+++ b/LICENSES/other/CDDL-1.0
@@ -0,0 +1,364 @@
+Valid-License-Identifier: CDDL-1.0
+SPDX-URL: https://spdx.org/licenses/CDDL-1.0.html
+Usage-Guide:
+  To use the Common Development and Distribution License 1.0 put the
+  following SPDX tag/value pair into a comment according to the placement
+  guidelines in the licensing rules documentation:
+    SPDX-License-Identifier: CDDL-1.0
+
+License-Text:
+
+COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL)
+Version 1.0
+
+    1. Definitions.
+
+        1.1. "Contributor" means each individual or entity that creates or
+             contributes to the creation of Modifications.
+
+        1.2. "Contributor Version" means the combination of the Original
+	     Software, prior Modifications used by a Contributor (if any),
+	     and the Modifications made by that particular Contributor.
+
+        1.3. "Covered Software" means (a) the Original Software, or (b)
+             Modifications, or (c) the combination of files containing
+             Original Software with files containing Modifications, in each
+             case including portions thereof.
+
+	1.4. "Executable" means the Covered Software in any form other than
+             Source Code.
+
+        1.5. "Initial Developer" means the individual or entity that first
+             makes Original Software available under this License.
+
+        1.6. "Larger Work" means a work which combines Covered Software or
+             portions thereof with code not governed by the terms of this
+             License.
+
+        1.7. "License" means this document.
+
+        1.8. "Licensable" means having the right to grant, to the maximum
+             extent possible, whether at the time of the initial grant or
+             subsequently acquired, any and all of the rights conveyed herein.
+
+        1.9. "Modifications" means the Source Code and Executable form of
+             any of the following:
+
+            A. Any file that results from an addition to, deletion from or
+               modification of the contents of a file containing Original
+               Software or previous Modifications;
+
+            B. Any new file that contains any part of the Original Software
+               or previous Modification; or
+
+            C. Any new file that is contributed or otherwise made available
+               under the terms of this License.
+
+        1.10. "Original Software" means the Source Code and Executable form
+              of computer software code that is originally released under
+              this License.
+
+        1.11. "Patent Claims" means any patent claim(s), now owned or
+              hereafter acquired, including without limitation, method,
+              process, and apparatus claims, in any patent Licensable by
+              grantor.
+
+        1.12. "Source Code" means (a) the common form of computer software
+	      code in which modifications are made and (b) associated
+              documentation included in or with such code.
+
+        1.13. "You" (or "Your") means an individual or a legal entity
+              exercising rights under, and complying with all of the terms
+              of, this License. For legal entities, "You" includes any
+              entity which controls, is controlled by, or is under common
+              control with You. For purposes of this definition, "control"
+              means (a) the power, direct or indirect, to cause the
+              direction or management of such entity, whether by contract
+              or otherwise, or (b) ownership of more than fifty percent
+              (50%) of the outstanding shares or beneficial ownership of
+              such entity.
+
+    2. License Grants.
+        2.1. The Initial Developer Grant.
+
+        Conditioned upon Your compliance with Section 3.1 below and subject
+        to third party intellectual property claims, the Initial Developer
+        hereby grants You a world-wide, royalty-free, non-exclusive
+        license:
+
+            (a) under intellectual property rights (other than patent or
+                trademark) Licensable by Initial Developer, to use,
+                reproduce, modify, display, perform, sublicense and
+                distribute the Original Software (or portions thereof),
+                with or without Modifications, and/or as part of a Larger
+                Work; and
+
+            (b) under Patent Claims infringed by the making, using or
+                selling of Original Software, to make, have made, use,
+                practice, sell, and offer for sale, and/or otherwise
+                dispose of the Original Software (or portions thereof).
+
+            (c) The licenses granted in Sections 2.1(a) and (b) are
+                effective on the date Initial Developer first distributes
+                or otherwise makes the Original Software available to a
+                third party under the terms of this License.
+
+            (d) Notwithstanding Section 2.1(b) above, no patent license is
+                granted: (1) for code that You delete from the Original
+                Software, or (2) for infringements caused by: (i) the
+                modification of the Original Software, or (ii) the
+                combination of the Original Software with other software or
+                devices.
+
+        2.2. Contributor Grant.
+
+        Conditioned upon Your compliance with Section 3.1 below and subject
+        to third party intellectual property claims, each Contributor
+        hereby grants You a world-wide, royalty-free, non-exclusive
+        license:
+
+            (a) under intellectual property rights (other than patent or
+	        trademark) Licensable by Contributor to use, reproduce,
+	        modify, display, perform, sublicense and distribute the
+	        Modifications created by such Contributor (or portions
+	        thereof), either on an unmodified basis, with other
+	        Modifications, as Covered Software and/or as part of a
+	        Larger Work; and
+
+            (b) under Patent Claims infringed by the making, using, or
+                selling of Modifications made by that Contributor either
+                alone and/or in combination with its Contributor Version
+                (or portions of such combination), to make, use, sell,
+                offer for sale, have made, and/or otherwise dispose of: (1)
+                Modifications made by that Contributor (or portions
+                thereof); and (2) the combination of Modifications made by
+                that Contributor with its Contributor Version (or portions
+                of such combination).
+
+            (c) The licenses granted in Sections 2.2(a) and 2.2(b) are
+                effective on the date Contributor first distributes or
+                otherwise makes the Modifications available to a third
+                party.
+
+            (d) Notwithstanding Section 2.2(b) above, no patent license is
+                granted: (1) for any code that Contributor has deleted from
+                the Contributor Version; (2) for infringements caused by:
+                (i) third party modifications of Contributor Version, or
+                (ii) the combination of Modifications made by that
+                Contributor with other software (except as part of the
+                Contributor Version) or other devices; or (3) under Patent
+                Claims infringed by Covered Software in the absence of
+                Modifications made by that Contributor.
+
+    3. Distribution Obligations.
+        3.1. Availability of Source Code.
+
+        Any Covered Software that You distribute or otherwise make
+        available in Executable form must also be made available in Source
+        Code form and that Source Code form must be distributed only under
+        the terms of this License. You must include a copy of this License
+        with every copy of the Source Code form of the Covered Software You
+        distribute or otherwise make available. You must inform recipients
+        of any such Covered Software in Executable form as to how they can
+        obtain such Covered Software in Source Code form in a reasonable
+        manner on or through a medium customarily used for software
+        exchange.
+
+        3.2. Modifications.
+
+        The Modifications that You create or to which You contribute are
+        governed by the terms of this License. You represent that You
+        believe Your Modifications are Your original creation(s) and/or You
+        have sufficient rights to grant the rights conveyed by this
+        License.
+
+        3.3. Required Notices.
+
+        You must include a notice in each of Your Modifications that
+        identifies You as the Contributor of the Modification. You may not
+        remove or alter any copyright, patent or trademark notices
+        contained within the Covered Software, or any notices of licensing
+        or any descriptive text giving attribution to any Contributor or
+        the Initial Developer.
+
+        3.4. Application of Additional Terms.
+
+        You may not offer or impose any terms on any Covered Software in
+        Source Code form that alters or restricts the applicable version of
+        this License or the recipients' rights hereunder. You may choose to
+        offer, and to charge a fee for, warranty, support, indemnity or
+        liability obligations to one or more recipients of Covered
+        Software. However, you may do so only on Your own behalf, and not
+        on behalf of the Initial Developer or any Contributor. You must
+        make it absolutely clear that any such warranty, support, indemnity
+        or liability obligation is offered by You alone, and You hereby
+        agree to indemnify the Initial Developer and every Contributor for
+        any liability incurred by the Initial Developer or such Contributor
+        as a result of warranty, support, indemnity or liability terms You
+        offer.
+
+        3.5. Distribution of Executable Versions.
+
+        You may distribute the Executable form of the Covered Software
+        under the terms of this License or under the terms of a license of
+        Your choice, which may contain terms different from this License,
+        provided that You are in compliance with the terms of this License
+        and that the license for the Executable form does not attempt to
+        limit or alter the recipient's rights in the Source Code form from
+        the rights set forth in this License. If You distribute the Covered
+        Software in Executable form under a different license, You must
+        make it absolutely clear that any terms which differ from this
+        License are offered by You alone, not by the Initial Developer or
+        Contributor. You hereby agree to indemnify the Initial Developer
+        and every Contributor for any liability incurred by the Initial
+        Developer or such Contributor as a result of any such terms You
+        offer.
+
+        3.6. Larger Works.
+
+        You may create a Larger Work by combining Covered Software with
+        other code not governed by the terms of this License and distribute
+        the Larger Work as a single product. In such a case, You must make
+        sure the requirements of this License are fulfilled for the Covered
+        Software.
+
+    4. Versions of the License.
+        4.1. New Versions.
+
+        Sun Microsystems, Inc. is the initial license steward and may
+        publish revised and/or new versions of this License from time to
+        time. Each version will be given a distinguishing version
+        number. Except as provided in Section 4.3, no one other than the
+        license steward has the right to modify this License.
+
+        4.2. Effect of New Versions.
+
+        You may always continue to use, distribute or otherwise make the
+        Covered Software available under the terms of the version of the
+        License under which You originally received the Covered
+        Software. If the Initial Developer includes a notice in the
+        Original Software prohibiting it from being distributed or
+        otherwise made available under any subsequent version of the
+        License, You must distribute and make the Covered Software
+        available under the terms of the version of the License under which
+        You originally received the Covered Software. Otherwise, You may
+        also choose to use, distribute or otherwise make the Covered
+        Software available under the terms of any subsequent version of the
+        License published by the license steward.
+
+        4.3. Modified Versions.
+
+        When You are an Initial Developer and You want to create a new
+        license for Your Original Software, You may create and use a
+        modified version of this License if You: (a) rename the license and
+        remove any references to the name of the license steward (except to
+        note that the license differs from this License); and (b) otherwise
+        make it clear that the license contains terms which differ from
+        this License.
+
+    5. DISCLAIMER OF WARRANTY.
+
+    COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS,
+    WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+    WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF
+    DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR
+    NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF
+    THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE
+    DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER
+    CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR
+    CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART
+    OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER
+    EXCEPT UNDER THIS DISCLAIMER.
+
+    6. TERMINATION.
+
+        6.1. This License and the rights granted hereunder will terminate
+        automatically if You fail to comply with terms herein and fail to
+        cure such breach within 30 days of becoming aware of the
+        breach. Provisions which, by their nature, must remain in effect
+        beyond the termination of this License shall survive.
+
+        6.2. If You assert a patent infringement claim (excluding
+        declaratory judgment actions) against Initial Developer or a
+        Contributor (the Initial Developer or Contributor against whom You
+        assert such claim is referred to as "Participant") alleging that
+        the Participant Software (meaning the Contributor Version where the
+        Participant is a Contributor or the Original Software where the
+        Participant is the Initial Developer) directly or indirectly
+        infringes any patent, then any and all rights granted directly or
+        indirectly to You by such Participant, the Initial Developer (if
+        the Initial Developer is not the Participant) and all Contributors
+        under Sections 2.1 and/or 2.2 of this License shall, upon 60 days
+        notice from Participant terminate prospectively and automatically
+        at the expiration of such 60 day notice period, unless if within
+        such 60 day period You withdraw Your claim with respect to the
+        Participant Software against such Participant either unilaterally
+        or pursuant to a written agreement with Participant.
+
+        6.3. In the event of termination under Sections 6.1 or 6.2 above,
+        all end user licenses that have been validly granted by You or any
+        distributor hereunder prior to termination (excluding licenses
+        granted to You by any distributor) shall survive termination.
+
+    7. LIMITATION OF LIABILITY.
+
+    UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT
+    (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL
+    DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED
+    SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY
+    PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+    OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOST
+    PROFITS, LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR
+    MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF
+    SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH
+    DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR
+    DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE
+    EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO
+    NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL
+    DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU.
+
+    8. U.S. GOVERNMENT END USERS.
+
+    The Covered Software is a "commercial item," as that term is defined in
+    48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer
+    software" (as that term is defined at 48 C.F.R. $ 252.227-7014(a)(1))
+    and "commercial computer software documentation" as such terms are used
+    in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and
+    48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all
+    U.S. Government End Users acquire Covered Software with only those
+    rights set forth herein. This U.S. Government Rights clause is in lieu
+    of, and supersedes, any other FAR, DFAR, or other clause or provision
+    that addresses Government rights in computer software under this
+    License.
+
+    9. MISCELLANEOUS.
+
+    This License represents the complete agreement concerning subject
+    matter hereof. If any provision of this License is held to be
+    unenforceable, such provision shall be reformed only to the extent
+    necessary to make it enforceable. This License shall be governed by the
+    law of the jurisdiction specified in a notice contained within the
+    Original Software (except to the extent applicable law, if any,
+    provides otherwise), excluding such jurisdiction's conflict-of-law
+    provisions. Any litigation relating to this License shall be subject to
+    the jurisdiction of the courts located in the jurisdiction and venue
+    specified in a notice contained within the Original Software, with the
+    losing party responsible for costs, including, without limitation,
+    court costs and reasonable attorneys' fees and expenses. The
+    application of the United Nations Convention on Contracts for the
+    International Sale of Goods is expressly excluded. Any law or
+    regulation which provides that the language of a contract shall be
+    construed against the drafter shall not apply to this License. You
+    agree that You alone are responsible for compliance with the United
+    States export administration regulations (and the export control laws
+    and regulation of any other countries) when You use, distribute or
+    otherwise make available any Covered Software.
+
+    10. RESPONSIBILITY FOR CLAIMS.
+
+    As between Initial Developer and the Contributors, each party is
+    responsible for claims and damages arising, directly or indirectly, out
+    of its utilization of rights under this License and You agree to work
+    with Initial Developer and Contributors to distribute such
+    responsibility on an equitable basis. Nothing herein is intended or
+    shall be deemed to constitute any admission of liability.
diff --git a/LICENSES/other/Linux-OpenIB b/LICENSES/other/Linux-OpenIB
new file mode 100644
index 0000000..1ad85f6
--- /dev/null
+++ b/LICENSES/other/Linux-OpenIB
@@ -0,0 +1,26 @@
+Valid-License-Identifier: Linux-OpenIB
+SPDX-URL: https://spdx.org/licenses/Linux-OpenIB.html
+Usage-Guide:
+  To use the Linux Kernel Variant of OpenIB.org license put the following
+  SPDX tag/value pair into a comment according to the placement guidelines
+  in the licensing rules documentation:
+    SPDX-License-Identifier: Linux-OpenIB
+License-Text:
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    - Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    - Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/LICENSES/other/X11 b/LICENSES/other/X11
new file mode 100644
index 0000000..fe4353f
--- /dev/null
+++ b/LICENSES/other/X11
@@ -0,0 +1,37 @@
+Valid-License-Identifier: X11
+SPDX-URL: https://spdx.org/licenses/X11.html
+Usage-Guide:
+  To use the X11 put the following SPDX tag/value pair into a comment
+  according to the placement guidelines in the licensing rules
+  documentation:
+    SPDX-License-Identifier: X11
+License-Text:
+
+
+X11 License
+
+Copyright (C) 1996 X Consortium
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+Except as contained in this notice, the name of the X Consortium shall not
+be used in advertising or otherwise to promote the sale, use or other
+dealings in this Software without prior written authorization from the X
+Consortium.
+
+X Window System is a trademark of X Consortium, Inc.
diff --git a/LICENSES/preferred/GPL-2.0 b/LICENSES/preferred/GPL-2.0
index b8db91d..ff0812f 100644
--- a/LICENSES/preferred/GPL-2.0
+++ b/LICENSES/preferred/GPL-2.0
@@ -1,5 +1,7 @@
 Valid-License-Identifier: GPL-2.0
+Valid-License-Identifier: GPL-2.0-only
 Valid-License-Identifier: GPL-2.0+
+Valid-License-Identifier: GPL-2.0-or-later
 SPDX-URL: https://spdx.org/licenses/GPL-2.0.html
 Usage-Guide:
   To use this license in source code, put one of the following SPDX
@@ -7,8 +9,12 @@
   guidelines in the licensing rules documentation.
   For 'GNU General Public License (GPL) version 2 only' use:
     SPDX-License-Identifier: GPL-2.0
+  or
+    SPDX-License-Identifier: GPL-2.0-only
   For 'GNU General Public License (GPL) version 2 or any later version' use:
     SPDX-License-Identifier: GPL-2.0+
+  or
+    SPDX-License-Identifier: GPL-2.0-or-later
 License-Text:
 
 		    GNU GENERAL PUBLIC LICENSE
diff --git a/MAINTAINERS b/MAINTAINERS
index 884657f..448957c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -767,12 +767,14 @@
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
 F:	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
 F:	drivers/gpu/drm/amd/amdkfd/
 F:	drivers/gpu/drm/amd/include/cik_structs.h
 F:	drivers/gpu/drm/amd/include/kgd_kfd_interface.h
 F:	drivers/gpu/drm/amd/include/vi_structs.h
+F:	drivers/gpu/drm/amd/include/v9_structs.h
 F:	include/uapi/linux/kfd_ioctl.h
 
 AMD SEATTLE DEVICE TREE SUPPORT
@@ -2331,6 +2333,14 @@
 F:	drivers/gpio/gpio-ath79.c
 F:	Documentation/devicetree/bindings/gpio/gpio-ath79.txt
 
+ATHEROS 71XX/9XXX USB PHY DRIVER
+M:	Alban Bedel <albeu@free.fr>
+W:	https://github.com/AlbanBedel/linux
+T:	git git://github.com/AlbanBedel/linux
+S:	Maintained
+F:	drivers/phy/qualcomm/phy-ath79-usb.c
+F:	Documentation/devicetree/bindings/phy/phy-ath79-usb.txt
+
 ATHEROS ATH GENERIC UTILITIES
 M:	Kalle Valo <kvalo@codeaurora.org>
 L:	linux-wireless@vger.kernel.org
@@ -3432,6 +3442,12 @@
 F:	Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt
 F:	drivers/input/touchscreen/chipone_icn8318.c
 
+CHIPONE ICN8505 I2C TOUCHSCREEN DRIVER
+M:	Hans de Goede <hdegoede@redhat.com>
+L:	linux-input@vger.kernel.org
+S:	Maintained
+F:	drivers/input/touchscreen/chipone_icn8505.c
+
 CHROME HARDWARE PLATFORM SUPPORT
 M:	Benson Leung <bleung@chromium.org>
 M:	Olof Johansson <olof@lixom.net>
@@ -4331,12 +4347,14 @@
 S:	Supported
 F:	lib/dma-debug.c
 F:	lib/dma-direct.c
+F:	lib/dma-noncoherent.c
 F:	lib/dma-virt.c
 F:	drivers/base/dma-mapping.c
 F:	drivers/base/dma-coherent.c
 F:	include/asm-generic/dma-mapping.h
 F:	include/linux/dma-direct.h
 F:	include/linux/dma-mapping.h
+F:	include/linux/dma-noncoherent.h
 
 DME1737 HARDWARE MONITOR DRIVER
 M:	Juerg Haefliger <juergh@gmail.com>
@@ -4670,7 +4688,7 @@
 
 DRM DRIVERS FOR FREESCALE DCU
 M:	Stefan Agner <stefan@agner.ch>
-M:	Alison Wang <alison.wang@freescale.com>
+M:	Alison Wang <alison.wang@nxp.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Supported
 F:	drivers/gpu/drm/fsl-dcu/
@@ -4781,6 +4799,14 @@
 F:	drivers/gpu/drm/omapdrm/
 F:	Documentation/devicetree/bindings/display/ti/
 
+DRM DRIVERS FOR V3D
+M:	Eric Anholt <eric@anholt.net>
+S:	Supported
+F:	drivers/gpu/drm/v3d/
+F:	include/uapi/drm/v3d_drm.h
+F:	Documentation/devicetree/bindings/display/brcm,bcm-v3d.txt
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+
 DRM DRIVERS FOR VC4
 M:	Eric Anholt <eric@anholt.net>
 T:	git git://github.com/anholt/linux
@@ -4827,6 +4853,15 @@
 F:	drivers/gpu/drm/tinydrm/
 F:	include/drm/tinydrm/
 
+DRM DRIVERS FOR XEN
+M:	Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+L:	dri-devel@lists.freedesktop.org
+L:	xen-devel@lists.xen.org
+S:	Supported
+F:	drivers/gpu/drm/xen/
+F:	Documentation/gpu/xen-front.rst
+
 DRM TTM SUBSYSTEM
 M:	Christian Koenig <christian.koenig@amd.com>
 M:	Roger He <Hongbo.He@amd.com>
@@ -5413,6 +5448,11 @@
 F:	Documentation/hwmon/f71805f
 F:	drivers/hwmon/f71805f.c
 
+FADDR2LINE
+M:	Josh Poimboeuf <jpoimboe@redhat.com>
+S:	Maintained
+F:	scripts/faddr2line
+
 FAILOVER MODULE
 M:	Sridhar Samudrala <sridhar.samudrala@intel.com>
 L:	netdev@vger.kernel.org
@@ -5573,6 +5613,7 @@
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/atull/linux-fpga.git
 Q:	http://patchwork.kernel.org/project/linux-fpga/list/
 F:	Documentation/fpga/
+F:	Documentation/driver-api/fpga/
 F:	Documentation/devicetree/bindings/fpga/
 F:	drivers/fpga/
 F:	include/linux/fpga/
@@ -5963,8 +6004,8 @@
 F:	scripts/get_maintainer.pl
 
 GFS2 FILE SYSTEM
-M:	Steven Whitehouse <swhiteho@redhat.com>
 M:	Bob Peterson <rpeterso@redhat.com>
+M:	Andreas Gruenbacher <agruenba@redhat.com>
 L:	cluster-devel@redhat.com
 W:	http://sources.redhat.com/cluster/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git
@@ -6232,6 +6273,7 @@
 W:	http://hwmon.wiki.kernel.org/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
 S:	Maintained
+F:	Documentation/devicetree/bindings/hwmon/
 F:	Documentation/hwmon/
 F:	drivers/hwmon/
 F:	include/linux/hwmon*.h
@@ -6766,6 +6808,12 @@
 S:	Supported
 F:	drivers/scsi/ibmvscsi/ibmvfc*
 
+IBM Power Virtual Management Channel Driver
+M:	Bryant G. Ly <bryantly@linux.vnet.ibm.com>
+M:	Steven Royer <seroyer@linux.vnet.ibm.com>
+S:	Supported
+F:	drivers/misc/ibmvmc.*
+
 IBM Power Virtual SCSI Device Drivers
 M:	Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
 L:	linux-scsi@vger.kernel.org
@@ -7049,14 +7097,13 @@
 S:	Maintained
 F:	drivers/video/fbdev/i810/
 
-INTEL ASoC BDW/HSW DRIVERS
+INTEL ASoC DRIVERS
+M:	Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+M:	Liam Girdwood <liam.r.girdwood@linux.intel.com>
 M:	Jie Yang <yang.jie@linux.intel.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:	Supported
-F:	sound/soc/intel/common/sst-dsp*
-F:	sound/soc/intel/common/sst-firmware.c
-F:	sound/soc/intel/boards/broadwell.c
-F:	sound/soc/intel/haswell/
+F:	sound/soc/intel/
 
 INTEL C600 SERIES SAS CONTROLLER DRIVER
 M:	Intel SCU Linux support <intel-linux-scu@intel.com>
@@ -7652,8 +7699,9 @@
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git kconfig
 L:	linux-kbuild@vger.kernel.org
 S:	Maintained
-F:	Documentation/kbuild/kconfig-language.txt
+F:	Documentation/kbuild/kconfig*
 F:	scripts/kconfig/
+F:	scripts/Kconfig.include
 
 KDUMP
 M:	Dave Young <dyoung@redhat.com>
@@ -8224,7 +8272,7 @@
 
 LINUX KERNEL MEMORY CONSISTENCY MODEL (LKMM)
 M:	Alan Stern <stern@rowland.harvard.edu>
-M:	Andrea Parri <parri.andrea@gmail.com>
+M:	Andrea Parri <andrea.parri@amarulasolutions.com>
 M:	Will Deacon <will.deacon@arm.com>
 M:	Peter Zijlstra <peterz@infradead.org>
 M:	Boqun Feng <boqun.feng@gmail.com>
@@ -8331,6 +8379,7 @@
 LOCKING PRIMITIVES
 M:	Peter Zijlstra <peterz@infradead.org>
 M:	Ingo Molnar <mingo@redhat.com>
+M:	Will Deacon <will.deacon@arm.com>
 L:	linux-kernel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
 S:	Maintained
@@ -9732,7 +9781,7 @@
 F:	drivers/net/ethernet/netronome/
 
 NETWORK BLOCK DEVICE (NBD)
-M:	Josef Bacik <jbacik@fb.com>
+M:	Josef Bacik <josef@toxicpanda.com>
 S:	Maintained
 L:	linux-block@vger.kernel.org
 L:	nbd@other.debian.org
@@ -11298,6 +11347,7 @@
 L:	linux-pm@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
 S:	Maintained
+F:	Documentation/ABI/testing/sysfs-class-power
 F:	Documentation/devicetree/bindings/power/supply/
 F:	include/linux/power_supply.h
 F:	drivers/power/supply/
@@ -11699,6 +11749,13 @@
 F:	Documentation/media/v4l-drivers/qcom_camss.rst
 F:	drivers/media/platform/qcom/camss-8x16/
 
+QUALCOMM CPUFREQ DRIVER MSM8996/APQ8096
+M:  Ilia Lin <ilia.lin@gmail.com>
+L:  linux-pm@vger.kernel.org
+S:  Maintained
+F:  Documentation/devicetree/bindings/opp/kryo-cpufreq.txt
+F:  drivers/cpufreq/qcom-cpufreq-kryo.c
+
 QUALCOMM EMAC GIGABIT ETHERNET DRIVER
 M:	Timur Tabi <timur@codeaurora.org>
 L:	netdev@vger.kernel.org
@@ -11923,7 +11980,7 @@
 F:	include/uapi/linux/rtc.h
 F:	include/linux/rtc/
 F:	include/linux/platform_data/rtc-*
-F:	tools/testing/selftests/timers/rtctest.c
+F:	tools/testing/selftests/rtc/
 
 REALTEK AUDIO CODECS
 M:	Bard Liao <bardliao@realtek.com>
@@ -13150,7 +13207,7 @@
 F:	sound/
 
 SOUND - COMPRESSED AUDIO
-M:	Vinod Koul <vinod.koul@intel.com>
+M:	Vinod Koul <vkoul@kernel.org>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git
 S:	Supported
@@ -14417,6 +14474,15 @@
 F:	drivers/tc/
 F:	include/linux/tc.h
 
+TURBOSTAT UTILITY
+M:	"Len Brown" <lenb@kernel.org>
+L:	linux-pm@vger.kernel.org
+B:	https://bugzilla.kernel.org
+Q:	https://patchwork.kernel.org/project/linux-pm/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat
+S:	Supported
+F:	tools/power/x86/turbostat/
+
 TW5864 VIDEO4LINUX DRIVER
 M:	Bluecherry Maintainers <maintainers@bluecherrydvr.com>
 M:	Anton Sviridenko <anton@corp.bluecherry.net>
@@ -14709,6 +14775,7 @@
 F:	Documentation/usb/usbip_protocol.txt
 F:	drivers/usb/usbip/
 F:	tools/usb/usbip/
+F:	tools/testing/selftests/drivers/usb/usbip/
 
 USB PEGASUS DRIVER
 M:	Petko Manolov <petkan@nucleusys.com>
@@ -15531,6 +15598,13 @@
 F:	arch/x86/xen/*swiotlb*
 F:	drivers/xen/*swiotlb*
 
+XEN SOUND FRONTEND DRIVER
+M:	Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
+L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
+S:	Supported
+F:	sound/xen/*
+
 XFS FILESYSTEM
 M:	Darrick J. Wong <darrick.wong@oracle.com>
 M:	linux-xfs@vger.kernel.org
@@ -15695,7 +15769,7 @@
 S:	Maintained
 F:	mm/zsmalloc.c
 F:	include/linux/zsmalloc.h
-F:	Documentation/vm/zsmalloc.txt
+F:	Documentation/vm/zsmalloc.rst
 
 ZSWAP COMPRESSED SWAP CACHING
 M:	Seth Jennings <sjenning@redhat.com>
diff --git a/Makefile b/Makefile
index 62c1100..019a5a0 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 17
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Merciless Moray
 
 # *DOCUMENTATION*
@@ -316,12 +316,9 @@
 # CROSS_COMPILE can be set on the command line
 # make CROSS_COMPILE=ia64-linux-
 # Alternatively CROSS_COMPILE can be set in the environment.
-# A third alternative is to store a setting in .config so that plain
-# "make" in the configured kernel build directory always uses that.
 # Default value for CROSS_COMPILE is not to prefix executables
 # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile
 ARCH		?= $(SUBARCH)
-CROSS_COMPILE	?= $(CONFIG_CROSS_COMPILE:"%"=%)
 
 # Architecture as present in compile.h
 UTS_MACHINE 	:= $(ARCH)
@@ -445,6 +442,8 @@
 export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL
 export KBUILD_ARFLAGS
 
+export CC_VERSION_TEXT := $(shell $(CC) --version | head -n 1)
+
 # When compiling out-of-tree modules, put MODVERDIR in the module
 # tree rather than in the kernel tree. The kernel tree might
 # even be read-only.
@@ -504,13 +503,13 @@
 KBUILD_AFLAGS	+= $(call cc-option,-fno-PIE)
 
 # check for 'asm goto'
-ifeq ($(call shell-cached,$(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
   CC_HAVE_ASM_GOTO := 1
   KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
   KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
 endif
 
-ifeq ($(call shell-cached,$(CONFIG_SHELL) $(srctree)/scripts/cc-can-link.sh $(CC)), y)
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/cc-can-link.sh $(CC)), y)
   CC_CAN_LINK := y
   export CC_CAN_LINK
 endif
@@ -807,13 +806,12 @@
 endif
 
 ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
-KBUILD_CFLAGS	+= $(call cc-option,-ffunction-sections,)
-KBUILD_CFLAGS	+= $(call cc-option,-fdata-sections,)
+KBUILD_CFLAGS_KERNEL	+= $(call cc-option,-ffunction-sections,)
+KBUILD_CFLAGS_KERNEL	+= $(call cc-option,-fdata-sections,)
 endif
 
 # arch Makefile may override CC so keep this after arch Makefile is included
-NOSTDINC_FLAGS += -nostdinc -isystem $(call shell-cached,$(CC) -print-file-name=include)
-CHECKFLAGS     += $(NOSTDINC_FLAGS)
+NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
 
 # warn about C99 declaration after statement
 KBUILD_CFLAGS += $(call cc-option,-Wdeclaration-after-statement,)
@@ -883,6 +881,12 @@
 LDFLAGS_vmlinux	+= $(call ld-option, -X,)
 endif
 
+# insure the checker run with the right endianness
+CHECKFLAGS += $(if $(CONFIG_CPU_BIG_ENDIAN),-mbig-endian,-mlittle-endian)
+
+# the checker needs the correct machine size
+CHECKFLAGS += $(if $(CONFIG_64BIT),-m64,-m32)
+
 # Default kernel image to build when no specific target is given.
 # KBUILD_IMAGE may be overruled on the command line or
 # set in the environment
@@ -1630,7 +1634,6 @@
 		-o -name '*.asn1.[ch]' \
 		-o -name '*.symtypes' -o -name 'modules.order' \
 		-o -name modules.builtin -o -name '.tmp_*.o.*' \
-		-o -name .cache.mk \
 		-o -name '*.c.[012]*.*' \
 		-o -name '*.ll' \
 		-o -name '*.gcno' \) -type f -print | xargs rm -f
@@ -1768,7 +1771,7 @@
 # Run depmod only if we have System.map and depmod is executable
 quiet_cmd_depmod = DEPMOD  $(KERNELRELEASE)
       cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \
-                   $(KERNELRELEASE) "$(patsubst y,_,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX))"
+                   $(KERNELRELEASE)
 
 # Create temporary dir for module support files
 # clean it up only when building all modules
diff --git a/arch/Kconfig b/arch/Kconfig
index 75dd23a..8a7f7e1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -278,9 +278,6 @@
 	  The <linux/clk.h> calls support software clock gating and
 	  thus are a key power management tool on many systems.
 
-config HAVE_DMA_API_DEBUG
-	bool
-
 config HAVE_HW_BREAKPOINT
 	bool
 	depends on PERF_EVENTS
@@ -600,21 +597,6 @@
 
 endchoice
 
-config LD_DEAD_CODE_DATA_ELIMINATION
-	bool
-	help
-	  Select this if the architecture wants to do dead code and
-	  data elimination with the linker by compiling with
-	  -ffunction-sections -fdata-sections and linking with
-	  --gc-sections.
-
-	  This requires that the arch annotates or otherwise protects
-	  its external entry points from being discarded. Linker scripts
-	  must also merge .text.*, .data.*, and .bss.* correctly into
-	  output sections. Care must be taken not to pull in unrelated
-	  sections (e.g., '.text.init'). Typically '.' in section names
-	  is used to distinguish them from label names / C identifiers.
-
 config HAVE_ARCH_WITHIN_STACK_FRAMES
 	bool
 	help
@@ -690,12 +672,6 @@
 	  Modules only use ELF REL relocations.  Modules with ELF RELA
 	  relocations will give an error.
 
-config HAVE_UNDERSCORE_SYMBOL_PREFIX
-	bool
-	help
-	  Some architectures generate an _ in front of C symbols; things like
-	  module loading and assembly files need to know about this.
-
 config HAVE_IRQ_EXIT_ON_IRQ_STACK
 	bool
 	help
@@ -874,6 +850,21 @@
 config COMPAT_OLD_SIGACTION
 	bool
 
+config 64BIT_TIME
+	def_bool ARCH_HAS_64BIT_TIME
+	help
+	  This should be selected by all architectures that need to support
+	  new system calls with a 64-bit time_t. This is relevant on all 32-bit
+	  architectures, and 64-bit architectures as part of compat syscall
+	  handling.
+
+config COMPAT_32BIT_TIME
+	def_bool (!64BIT && 64BIT_TIME) || COMPAT
+	help
+	  This enables 32 bit time_t support in addition to 64 bit time_t support.
+	  This is relevant on all 32-bit architectures, and 64-bit architectures
+	  as part of compat syscall handling.
+
 config ARCH_NO_COHERENT_DMA_MMAP
 	bool
 
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index f19dc31..0c4805a 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -10,6 +10,8 @@
 	select HAVE_OPROFILE
 	select HAVE_PCSPKR_PLATFORM
 	select HAVE_PERF_EVENTS
+	select NEED_DMA_MAP_STATE
+	select NEED_SG_DMA_LENGTH
 	select VIRT_TO_BUS
 	select GENERIC_IRQ_PROBE
 	select AUTO_IRQ_AFFINITY if SMP
@@ -64,15 +66,6 @@
 	bool
 	default y
 
-config ARCH_DMA_ADDR_T_64BIT
-	def_bool y
-
-config NEED_DMA_MAP_STATE
-       def_bool y
-
-config NEED_SG_DMA_LENGTH
-	def_bool y
-
 config GENERIC_ISA_DMA
 	bool
 	default y
@@ -346,9 +339,6 @@
 config PCI_SYSCALL
 	def_bool PCI
 
-config IOMMU_HELPER
-	def_bool PCI
-
 config ALPHA_NONAME
 	bool
 	depends on ALPHA_BOOK1 || ALPHA_NONAME_CH
@@ -586,7 +576,7 @@
 	  Say Y to support efficient handling of discontiguous physical memory,
 	  for architectures which are either NUMA (Non-Uniform Memory Access)
 	  or have huge holes in the physical address space for other reasons.
-	  See <file:Documentation/vm/numa> for more.
+	  See <file:Documentation/vm/numa.rst> for more.
 
 source "mm/Kconfig"
 
diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile
index 2cc3cc5..c5ec8c0 100644
--- a/arch/alpha/Makefile
+++ b/arch/alpha/Makefile
@@ -11,7 +11,7 @@
 NM := $(NM) -B
 
 LDFLAGS_vmlinux	:= -static -N #-relax
-CHECKFLAGS	+= -D__alpha__ -m64
+CHECKFLAGS	+= -D__alpha__
 cflags-y	:= -pipe -mno-fp-regs -ffixed-8
 cflags-y	+= $(call cc-option, -fno-jump-tables)
 
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 9b68790..0580cb8 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 
+generic-y += compat.h
 generic-y += exec.h
 generic-y += export.h
 generic-y += fb.h
diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index b9ec553..cf6bc1e 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -56,11 +56,6 @@
 
 /* IOMMU controls.  */
 
-/* The PCI address space does not equal the physical memory address space.
-   The networking and block device layers use this boolean for bounce buffer
-   decisions.  */
-#define PCI_DMA_BUS_IS_PHYS  0
-
 /* TODO: integrate with include/asm-generic/pci.h ? */
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
index 9afaba5..1a5b753 100644
--- a/arch/alpha/include/uapi/asm/Kbuild
+++ b/arch/alpha/include/uapi/asm/Kbuild
@@ -2,4 +2,8 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += bpf_perf_event.h
+generic-y += ipcbuf.h
+generic-y += msgbuf.h
 generic-y += poll.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
diff --git a/arch/alpha/include/uapi/asm/ipcbuf.h b/arch/alpha/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 90d6445..0000000
--- a/arch/alpha/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/alpha/include/uapi/asm/msgbuf.h b/arch/alpha/include/uapi/asm/msgbuf.h
deleted file mode 100644
index 8c5d4d8..0000000
--- a/arch/alpha/include/uapi/asm/msgbuf.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ALPHA_MSGBUF_H
-#define _ALPHA_MSGBUF_H
-
-/* 
- * The msqid64_ds structure for alpha architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 2 miscellaneous 64-bit values
- */
-
-struct msqid64_ds {
-	struct ipc64_perm msg_perm;
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	__kernel_time_t msg_ctime;	/* last change time */
-	unsigned long  msg_cbytes;	/* current number of bytes on queue */
-	unsigned long  msg_qnum;	/* number of messages in queue */
-	unsigned long  msg_qbytes;	/* max number of bytes on queue */
-	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
-	__kernel_pid_t msg_lrpid;	/* last receive pid */
-	unsigned long  __unused1;
-	unsigned long  __unused2;
-};
-
-#endif /* _ALPHA_MSGBUF_H */
diff --git a/arch/alpha/include/uapi/asm/sembuf.h b/arch/alpha/include/uapi/asm/sembuf.h
deleted file mode 100644
index f28ffa6..0000000
--- a/arch/alpha/include/uapi/asm/sembuf.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ALPHA_SEMBUF_H
-#define _ALPHA_SEMBUF_H
-
-/* 
- * The semid64_ds structure for alpha architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 2 miscellaneous 64-bit values
- */
-
-struct semid64_ds {
-	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
-	__kernel_time_t	sem_otime;		/* last semop time */
-	__kernel_time_t	sem_ctime;		/* last change time */
-	unsigned long	sem_nsems;		/* no. of semaphores in array */
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-};
-
-#endif /* _ALPHA_SEMBUF_H */
diff --git a/arch/alpha/include/uapi/asm/shmbuf.h b/arch/alpha/include/uapi/asm/shmbuf.h
deleted file mode 100644
index 7e041ca..0000000
--- a/arch/alpha/include/uapi/asm/shmbuf.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ALPHA_SHMBUF_H
-#define _ALPHA_SHMBUF_H
-
-/* 
- * The shmid64_ds structure for alpha architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 2 miscellaneous 64-bit values
- */
-
-struct shmid64_ds {
-	struct ipc64_perm	shm_perm;	/* operation perms */
-	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_time_t		shm_atime;	/* last attach time */
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	__kernel_time_t		shm_ctime;	/* last change time */
-	__kernel_pid_t		shm_cpid;	/* pid of creator */
-	__kernel_pid_t		shm_lpid;	/* pid of last operator */
-	unsigned long		shm_nattch;	/* no. of current attaches */
-	unsigned long		__unused1;
-	unsigned long		__unused2;
-};
-
-struct shminfo64 {
-	unsigned long	shmmax;
-	unsigned long	shmmin;
-	unsigned long	shmmni;
-	unsigned long	shmseg;
-	unsigned long	shmall;
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-};
-
-#endif /* _ALPHA_SHMBUF_H */
diff --git a/arch/alpha/include/uapi/asm/siginfo.h b/arch/alpha/include/uapi/asm/siginfo.h
index 0cf3b52..db3f013 100644
--- a/arch/alpha/include/uapi/asm/siginfo.h
+++ b/arch/alpha/include/uapi/asm/siginfo.h
@@ -7,18 +7,4 @@
 
 #include <asm-generic/siginfo.h>
 
-/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME	0	/* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-/*
- * SIGTRAP si_codes
- */
-#ifdef __KERNEL__
-#define TRAP_FIXME	0	/* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
 #endif
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 89faa6f..6e92175 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -871,8 +871,7 @@
 		   send a signal.  Old exceptions are not signaled.  */
 		fex = (exc >> IEEE_STATUS_TO_EXCSUM_SHIFT) & swcr;
  		if (fex) {
-			siginfo_t info;
-			int si_code = 0;
+			int si_code = FPE_FLTUNK;
 
 			if (fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND;
 			if (fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES;
@@ -881,11 +880,9 @@
 			if (fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV;
 			if (fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV;
 
-			info.si_signo = SIGFPE;
-			info.si_errno = 0;
-			info.si_code = si_code;
-			info.si_addr = NULL;  /* FIXME */
- 			send_sig_info(SIGFPE, &info, current);
+			send_sig_fault(SIGFPE, si_code,
+				       (void __user *)NULL,  /* FIXME */
+				       0, current);
  		}
 		return 0;
 	}
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 9ebb3bc..8c0c4ee 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -219,14 +219,8 @@
 
 	/* Send SIGTRAP if we're single-stepping: */
 	if (ptrace_cancel_bpt (current)) {
-		siginfo_t info;
-
-		info.si_signo = SIGTRAP;
-		info.si_errno = 0;
-		info.si_code = TRAP_BRKPT;
-		info.si_addr = (void __user *) regs->pc;
-		info.si_trapno = 0;
-		send_sig_info(SIGTRAP, &info, current);
+		send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *) regs->pc, 0,
+			       current);
 	}
 	return;
 
@@ -253,14 +247,8 @@
 
 	/* Send SIGTRAP if we're single-stepping: */
 	if (ptrace_cancel_bpt (current)) {
-		siginfo_t info;
-
-		info.si_signo = SIGTRAP;
-		info.si_errno = 0;
-		info.si_code = TRAP_BRKPT;
-		info.si_addr = (void __user *) regs->pc;
-		info.si_trapno = 0;
-		send_sig_info(SIGTRAP, &info, current);
+		send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *) regs->pc, 0,
+			       current);
 	}
 	return;
 
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index f43bd05..bc96276 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -213,7 +213,6 @@
 	    struct pt_regs *regs)
 {
 	long si_code = FPE_FLTINV;
-	siginfo_t info;
 
 	if (summary & 1) {
 		/* Software-completion summary bit is set, so try to
@@ -228,17 +227,12 @@
 	}
 	die_if_kernel("Arithmetic fault", regs, 0, NULL);
 
-	info.si_signo = SIGFPE;
-	info.si_errno = 0;
-	info.si_code = si_code;
-	info.si_addr = (void __user *) regs->pc;
-	send_sig_info(SIGFPE, &info, current);
+	send_sig_fault(SIGFPE, si_code, (void __user *) regs->pc, 0, current);
 }
 
 asmlinkage void
 do_entIF(unsigned long type, struct pt_regs *regs)
 {
-	siginfo_t info;
 	int signo, code;
 
 	if ((regs->ps & ~IPL_MAX) == 0) {
@@ -270,31 +264,20 @@
 
 	switch (type) {
 	      case 0: /* breakpoint */
-		info.si_signo = SIGTRAP;
-		info.si_errno = 0;
-		info.si_code = TRAP_BRKPT;
-		info.si_trapno = 0;
-		info.si_addr = (void __user *) regs->pc;
-
 		if (ptrace_cancel_bpt(current)) {
 			regs->pc -= 4;	/* make pc point to former bpt */
 		}
 
-		send_sig_info(SIGTRAP, &info, current);
+		send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0,
+			       current);
 		return;
 
 	      case 1: /* bugcheck */
-		info.si_signo = SIGTRAP;
-		info.si_errno = 0;
-		info.si_code = TRAP_FIXME;
-		info.si_addr = (void __user *) regs->pc;
-		info.si_trapno = 0;
-		send_sig_info(SIGTRAP, &info, current);
+		send_sig_fault(SIGTRAP, TRAP_UNK, (void __user *) regs->pc, 0,
+			       current);
 		return;
 		
 	      case 2: /* gentrap */
-		info.si_addr = (void __user *) regs->pc;
-		info.si_trapno = regs->r16;
 		switch ((long) regs->r16) {
 		case GEN_INTOVF:
 			signo = SIGFPE;
@@ -326,7 +309,7 @@
 			break;
 		case GEN_ROPRAND:
 			signo = SIGFPE;
-			code = FPE_FIXME;
+			code = FPE_FLTUNK;
 			break;
 
 		case GEN_DECOVF:
@@ -348,15 +331,12 @@
 		case GEN_SUBRNG7:
 		default:
 			signo = SIGTRAP;
-			code = TRAP_FIXME;
+			code = TRAP_UNK;
 			break;
 		}
 
-		info.si_signo = signo;
-		info.si_errno = 0;
-		info.si_code = code;
-		info.si_addr = (void __user *) regs->pc;
-		send_sig_info(signo, &info, current);
+		send_sig_fault(signo, code, (void __user *) regs->pc, regs->r16,
+			       current);
 		return;
 
 	      case 4: /* opDEC */
@@ -380,11 +360,9 @@
 			if (si_code == 0)
 				return;
 			if (si_code > 0) {
-				info.si_signo = SIGFPE;
-				info.si_errno = 0;
-				info.si_code = si_code;
-				info.si_addr = (void __user *) regs->pc;
-				send_sig_info(SIGFPE, &info, current);
+				send_sig_fault(SIGFPE, si_code,
+					       (void __user *) regs->pc, 0,
+					       current);
 				return;
 			}
 		}
@@ -409,11 +387,7 @@
 		      ;
 	}
 
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_ILLOPC;
-	info.si_addr = (void __user *) regs->pc;
-	send_sig_info(SIGILL, &info, current);
+	send_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0, current);
 }
 
 /* There is an ifdef in the PALcode in MILO that enables a 
@@ -426,15 +400,9 @@
 asmlinkage void
 do_entDbg(struct pt_regs *regs)
 {
-	siginfo_t info;
-
 	die_if_kernel("Instruction fault", regs, 0, NULL);
 
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_ILLOPC;
-	info.si_addr = (void __user *) regs->pc;
-	force_sig_info(SIGILL, &info, current);
+	force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0, current);
 }
 
 
@@ -758,7 +726,7 @@
 
 	unsigned long tmp1, tmp2, tmp3, tmp4;
 	unsigned long fake_reg, *reg_addr = &fake_reg;
-	siginfo_t info;
+	int si_code;
 	long error;
 
 	/* Check the UAC bits to decide what the user wants us to do
@@ -981,34 +949,27 @@
 
 give_sigsegv:
 	regs->pc -= 4;  /* make pc point to faulting insn */
-	info.si_signo = SIGSEGV;
-	info.si_errno = 0;
 
 	/* We need to replicate some of the logic in mm/fault.c,
 	   since we don't have access to the fault code in the
 	   exception handling return path.  */
 	if ((unsigned long)va >= TASK_SIZE)
-		info.si_code = SEGV_ACCERR;
+		si_code = SEGV_ACCERR;
 	else {
 		struct mm_struct *mm = current->mm;
 		down_read(&mm->mmap_sem);
 		if (find_vma(mm, (unsigned long)va))
-			info.si_code = SEGV_ACCERR;
+			si_code = SEGV_ACCERR;
 		else
-			info.si_code = SEGV_MAPERR;
+			si_code = SEGV_MAPERR;
 		up_read(&mm->mmap_sem);
 	}
-	info.si_addr = va;
-	send_sig_info(SIGSEGV, &info, current);
+	send_sig_fault(SIGSEGV, si_code, va, 0, current);
 	return;
 
 give_sigbus:
 	regs->pc -= 4;
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRALN;
-	info.si_addr = va;
-	send_sig_info(SIGBUS, &info, current);
+	send_sig_fault(SIGBUS, BUS_ADRALN, va, 0, current);
 	return;
 }
 
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index cd3c572..de2bd21 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -88,7 +88,6 @@
 	struct mm_struct *mm = current->mm;
 	const struct exception_table_entry *fixup;
 	int fault, si_code = SEGV_MAPERR;
-	siginfo_t info;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
@@ -221,21 +220,13 @@
 	up_read(&mm->mmap_sem);
 	/* Send a sigbus, regardless of whether we were in kernel
 	   or user mode.  */
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRERR;
-	info.si_addr = (void __user *) address;
-	force_sig_info(SIGBUS, &info, current);
+	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address, 0, current);
 	if (!user_mode(regs))
 		goto no_context;
 	return;
 
  do_sigsegv:
-	info.si_signo = SIGSEGV;
-	info.si_errno = 0;
-	info.si_code = si_code;
-	info.si_addr = (void __user *) address;
-	force_sig_info(SIGSEGV, &info, current);
+	force_sig_fault(SIGSEGV, si_code, (void __user *) address, 0, current);
 	return;
 
 #ifdef CONFIG_ALPHA_LARGE_VMALLOC
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index d76bf4a..89d47ea 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -9,11 +9,15 @@
 config ARC
 	def_bool y
 	select ARC_TIMERS
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
+	select DMA_NONCOHERENT_OPS
+	select DMA_NONCOHERENT_MMAP
 	select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_FIND_FIRST_BIT
@@ -453,16 +457,11 @@
 	default n
 	depends on ISA_ARCV2
 	select HIGHMEM
+	select PHYS_ADDR_T_64BIT
 	help
 	  Enable access to physical memory beyond 4G, only supported on
 	  ARC cores with 40 bit Physical Addressing support
 
-config ARCH_PHYS_ADDR_T_64BIT
-	def_bool ARC_HAS_PAE40
-
-config ARCH_DMA_ADDR_T_64BIT
-	bool
-
 config ARC_KVADDR_SIZE
 	int "Kernel Virtual Address Space size (MB)"
 	range 0 512
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 4bd5d43..feed50c 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -1,7 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += bugs.h
+generic-y += compat.h
 generic-y += device.h
 generic-y += div64.h
+generic-y += dma-mapping.h
 generic-y += emergency-restart.h
 generic-y += extable.h
 generic-y += fb.h
diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h
deleted file mode 100644
index 7a16824..0000000
--- a/arch/arc/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * DMA Mapping glue for ARC
- *
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef ASM_ARC_DMA_MAPPING_H
-#define ASM_ARC_DMA_MAPPING_H
-
-extern const struct dma_map_ops arc_dma_ops;
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
-	return &arc_dma_ops;
-}
-
-#endif
diff --git a/arch/arc/include/asm/pci.h b/arch/arc/include/asm/pci.h
index ba56c23..4ff53c0 100644
--- a/arch/arc/include/asm/pci.h
+++ b/arch/arc/include/asm/pci.h
@@ -16,12 +16,6 @@
 #define PCIBIOS_MIN_MEM 0x100000
 
 #define pcibios_assign_all_busses()	1
-/*
- * The PCI address space does equal the physical memory address space.
- * The networking and block device layers use this boolean for bounce
- * buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS	1
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 1dcc404..8c10718 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -16,13 +16,12 @@
  * The default DMA address == Phy address which is 0x8000_0000 based.
  */
 
-#include <linux/dma-mapping.h>
+#include <linux/dma-noncoherent.h>
 #include <asm/cache.h>
 #include <asm/cacheflush.h>
 
-
-static void *arc_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
 {
 	unsigned long order = get_order(size);
 	struct page *page;
@@ -89,7 +88,7 @@
 	return kvaddr;
 }
 
-static void arc_dma_free(struct device *dev, size_t size, void *vaddr,
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 		dma_addr_t dma_handle, unsigned long attrs)
 {
 	phys_addr_t paddr = dma_handle;
@@ -105,9 +104,9 @@
 	__free_pages(page, get_order(size));
 }
 
-static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma,
-			void *cpu_addr, dma_addr_t dma_addr, size_t size,
-			unsigned long attrs)
+int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		unsigned long attrs)
 {
 	unsigned long user_count = vma_pages(vma);
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -130,149 +129,14 @@
 	return ret;
 }
 
-/*
- * streaming DMA Mapping API...
- * CPU accesses page via normal paddr, thus needs to explicitly made
- * consistent before each use
- */
-static void _dma_cache_sync(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-	switch (dir) {
-	case DMA_FROM_DEVICE:
-		dma_cache_inv(paddr, size);
-		break;
-	case DMA_TO_DEVICE:
-		dma_cache_wback(paddr, size);
-		break;
-	case DMA_BIDIRECTIONAL:
-		dma_cache_wback_inv(paddr, size);
-		break;
-	default:
-		pr_err("Invalid DMA dir [%d] for OP @ %pa[p]\n", dir, &paddr);
-	}
+	dma_cache_wback(paddr, size);
 }
 
-/*
- * arc_dma_map_page - map a portion of a page for streaming DMA
- *
- * Ensure that any data held in the cache is appropriately discarded
- * or written back.
- *
- * The device owns this memory once this call has completed.  The CPU
- * can regain ownership by calling dma_unmap_page().
- *
- * Note: while it takes struct page as arg, caller can "abuse" it to pass
- * a region larger than PAGE_SIZE, provided it is physically contiguous
- * and this still works correctly
- */
-static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t size, enum dma_data_direction dir,
-		unsigned long attrs)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-	phys_addr_t paddr = page_to_phys(page) + offset;
-
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		_dma_cache_sync(paddr, size, dir);
-
-	return paddr;
+	dma_cache_inv(paddr, size);
 }
-
-/*
- * arc_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
- *
- * After this call, reads by the CPU to the buffer are guaranteed to see
- * whatever the device wrote there.
- *
- * Note: historically this routine was not implemented for ARC
- */
-static void arc_dma_unmap_page(struct device *dev, dma_addr_t handle,
-			       size_t size, enum dma_data_direction dir,
-			       unsigned long attrs)
-{
-	phys_addr_t paddr = handle;
-
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		_dma_cache_sync(paddr, size, dir);
-}
-
-static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg,
-	   int nents, enum dma_data_direction dir, unsigned long attrs)
-{
-	struct scatterlist *s;
-	int i;
-
-	for_each_sg(sg, s, nents, i)
-		s->dma_address = dma_map_page(dev, sg_page(s), s->offset,
-					       s->length, dir);
-
-	return nents;
-}
-
-static void arc_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-			     int nents, enum dma_data_direction dir,
-			     unsigned long attrs)
-{
-	struct scatterlist *s;
-	int i;
-
-	for_each_sg(sg, s, nents, i)
-		arc_dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir,
-				   attrs);
-}
-
-static void arc_dma_sync_single_for_cpu(struct device *dev,
-		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
-{
-	_dma_cache_sync(dma_handle, size, DMA_FROM_DEVICE);
-}
-
-static void arc_dma_sync_single_for_device(struct device *dev,
-		dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
-{
-	_dma_cache_sync(dma_handle, size, DMA_TO_DEVICE);
-}
-
-static void arc_dma_sync_sg_for_cpu(struct device *dev,
-		struct scatterlist *sglist, int nelems,
-		enum dma_data_direction dir)
-{
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sglist, sg, nelems, i)
-		_dma_cache_sync(sg_phys(sg), sg->length, dir);
-}
-
-static void arc_dma_sync_sg_for_device(struct device *dev,
-		struct scatterlist *sglist, int nelems,
-		enum dma_data_direction dir)
-{
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sglist, sg, nelems, i)
-		_dma_cache_sync(sg_phys(sg), sg->length, dir);
-}
-
-static int arc_dma_supported(struct device *dev, u64 dma_mask)
-{
-	/* Support 32 bit DMA mask exclusively */
-	return dma_mask == DMA_BIT_MASK(32);
-}
-
-const struct dma_map_ops arc_dma_ops = {
-	.alloc			= arc_dma_alloc,
-	.free			= arc_dma_free,
-	.mmap			= arc_dma_mmap,
-	.map_page		= arc_dma_map_page,
-	.unmap_page		= arc_dma_unmap_page,
-	.map_sg			= arc_dma_map_sg,
-	.unmap_sg		= arc_dma_unmap_sg,
-	.sync_single_for_device	= arc_dma_sync_single_for_device,
-	.sync_single_for_cpu	= arc_dma_sync_single_for_cpu,
-	.sync_sg_for_cpu	= arc_dma_sync_sg_for_cpu,
-	.sync_sg_for_device	= arc_dma_sync_sg_for_device,
-	.dma_supported		= arc_dma_supported,
-};
-EXPORT_SYMBOL(arc_dma_ops);
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index a0b7bd6..b884bbd 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -70,6 +70,8 @@
 	int write = regs->ecr_cause & ECR_C_PROTV_STORE;  /* ST/EX */
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
+	clear_siginfo(&info);
+
 	/*
 	 * We fault-in kernel-space virtual memory on-demand. The
 	 * 'reference' page table is init_mm.pgd.
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a7f8e7f..8f460bd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -60,7 +60,6 @@
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_CONTIGUOUS if MMU
 	select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 && MMU
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
@@ -96,6 +95,7 @@
 	select HAVE_VIRT_CPU_ACCOUNTING_GEN
 	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_REL
+	select NEED_DMA_MAP_STATE
 	select NO_BOOTMEM
 	select OF_EARLY_FLATTREE if OF
 	select OF_RESERVED_MEM if OF
@@ -119,9 +119,6 @@
 	select ARCH_HAS_SG_CHAIN
 	bool
 
-config NEED_SG_DMA_LENGTH
-	bool
-
 config ARM_DMA_USE_IOMMU
 	bool
 	select ARM_HAS_SG_CHAIN
@@ -224,9 +221,6 @@
 config ZONE_DMA
 	bool
 
-config NEED_DMA_MAP_STATE
-       def_bool y
-
 config ARCH_SUPPORTS_UPROBES
 	def_bool y
 
@@ -1704,6 +1698,7 @@
 config ARM_MODULE_PLTS
 	bool "Use PLTs to allow module memory to spill over into vmalloc area"
 	depends on MODULES
+	default y
 	help
 	  Allocate PLTs when loading modules so that jumps and calls whose
 	  targets are too far away for their relative offsets to be encoded
@@ -1714,7 +1709,8 @@
 	  rounding up to page size, the actual memory footprint is usually
 	  the same.
 
-	  Say y if you are getting out of memory errors while loading modules
+	  Disabling this is usually safe for small single-platform
+	  configurations. If unsure, say y.
 
 source "mm/Kconfig"
 
@@ -1778,12 +1774,6 @@
 	  and the task is only allowed to execute a few safe syscalls
 	  defined by each seccomp mode.
 
-config SWIOTLB
-	def_bool y
-
-config IOMMU_HELPER
-	def_bool SWIOTLB
-
 config PARAVIRT
 	bool "Enable paravirtualization code"
 	help
@@ -1815,6 +1805,7 @@
 	depends on MMU
 	select ARCH_DMA_ADDR_T_64BIT
 	select ARM_PSCI
+	select SWIOTLB
 	select SWIOTLB_XEN
 	select PARAVIRT
 	help
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index e4e537f..1dc4045 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -106,7 +106,7 @@
 tune-y := $(tune-y)
 
 ifeq ($(CONFIG_AEABI),y)
-CFLAGS_ABI	:=-mabi=aapcs-linux -mno-thumb-interwork -mfpu=vfp
+CFLAGS_ABI	:=-mabi=aapcs-linux -mfpu=vfp
 else
 CFLAGS_ABI	:=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,)
 endif
@@ -135,7 +135,7 @@
 KBUILD_CFLAGS	+=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm
 KBUILD_AFLAGS	+=$(CFLAGS_ABI) $(AFLAGS_ISA) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float
 
-CHECKFLAGS	+= -D__arm__ -m32
+CHECKFLAGS	+= -D__arm__
 
 #Default value
 head-y		:= arch/arm/kernel/head$(MMUEXT).o
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 6a4e734..a3c5fbc 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -113,7 +113,7 @@
 CFLAGS_fdt_rw.o := $(nossp_flags)
 CFLAGS_fdt_wip.o := $(nossp_flags)
 
-ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
+ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj)
 asflags-y := -DZIMAGE
 
 # Supply kernel BSS size to the decompressor via a linker symbol.
diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
index 4cc09e4..6916d75 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -163,7 +163,7 @@
 			};
 
 			uart0: serial@12000 {
-				compatible = "snps,dw-apb-uart";
+				compatible = "marvell,armada-38x-uart";
 				reg = <0x12000 0x100>;
 				reg-shift = <2>;
 				interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>;
@@ -173,7 +173,7 @@
 			};
 
 			uart1: serial@12100 {
-				compatible = "snps,dw-apb-uart";
+				compatible = "marvell,armada-38x-uart";
 				reg = <0x12100 0x100>;
 				reg-shift = <2>;
 				interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts
index 2459d13..f50b194 100644
--- a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts
+++ b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts
@@ -161,7 +161,7 @@
 	};
 
 	at24@50 {
-		compatible = "at24,24c01";
+		compatible = "atmel,24c01";
 		pagesize = <8>;
 		reg = <0x50>;
 	};
@@ -213,7 +213,7 @@
 			#size-cells = <0>;
 			reg = <6>;
 			eeprom@51 {
-				compatible = "at,24c01";
+				compatible = "atmel,24c01";
 				pagesize = <8>;
 				reg = <0x51>;
 			};
@@ -224,7 +224,7 @@
 			#size-cells = <0>;
 			reg = <7>;
 			eeprom@51 {
-				compatible = "at,24c01";
+				compatible = "atmel,24c01";
 				pagesize = <8>;
 				reg = <0x51>;
 			};
diff --git a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
index c0743305..eb96ac3 100644
--- a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
+++ b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
@@ -12,6 +12,8 @@
 			#size-cells = <1>;
 			compatible = "st,stm32mp157-pinctrl";
 			ranges = <0 0x50002000 0xa400>;
+			interrupt-parent = <&exti>;
+			st,syscfg = <&exti 0x60 0xff>;
 			pins-are-numbered;
 
 			gpioa: gpio@50002000 {
@@ -166,6 +168,8 @@
 			compatible = "st,stm32mp157-z-pinctrl";
 			ranges = <0 0x54004000 0x400>;
 			pins-are-numbered;
+			interrupt-parent = <&exti>;
+			st,syscfg = <&exti 0x60 0xff>;
 			status = "disabled";
 
 			gpioz: gpio@54004000 {
diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi
index 9e17e42..4fa0df8 100644
--- a/arch/arm/boot/dts/stm32mp157c.dtsi
+++ b/arch/arm/boot/dts/stm32mp157c.dtsi
@@ -183,6 +183,13 @@
 			status = "disabled";
 		};
 
+		exti: interrupt-controller@5000d000 {
+			compatible = "st,stm32mp1-exti", "syscon";
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			reg = <0x5000d000 0x400>;
+		};
+
 		usart1: serial@5c000000 {
 			compatible = "st,stm32h7-uart";
 			reg = <0x5c000000 0x400>;
diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S
index 99207c4..f82cd8c 100644
--- a/arch/arm/crypto/sha1-armv4-large.S
+++ b/arch/arm/crypto/sha1-armv4-large.S
@@ -1,4 +1,14 @@
 #define __ARM_ARCH__ __LINUX_ARM_ARCH__
+@ SPDX-License-Identifier: GPL-2.0
+
+@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
+@ has relicensed it under the GPLv2. Therefore this program is free software;
+@ you can redistribute it and/or modify it under the terms of the GNU General
+@ Public License version 2 as published by the Free Software Foundation.
+@
+@ The original headers, including the original license headers, are
+@ included below for completeness.
+
 @ ====================================================================
 @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
 @ project. The module is, however, dual licensed under OpenSSL and
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl
index fac0533..b9ec440 100644
--- a/arch/arm/crypto/sha256-armv4.pl
+++ b/arch/arm/crypto/sha256-armv4.pl
@@ -1,12 +1,19 @@
 #!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from the OpenSSL project but the author (Andy Polyakov)
+# has relicensed it under the GPLv2. Therefore this program is free software;
+# you can redistribute it and/or modify it under the terms of the GNU General
+# Public License version 2 as published by the Free Software Foundation.
+#
+# The original headers, including the original license headers, are
+# included below for completeness.
 
 # ====================================================================
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
-#
-# Permission to use under GPL terms is granted.
 # ====================================================================
 
 # SHA256 block procedure for ARMv4. May 2007.
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped
index 555a1a8..3b58300 100644
--- a/arch/arm/crypto/sha256-core.S_shipped
+++ b/arch/arm/crypto/sha256-core.S_shipped
@@ -1,11 +1,18 @@
+@ SPDX-License-Identifier: GPL-2.0
+
+@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
+@ has relicensed it under the GPLv2. Therefore this program is free software;
+@ you can redistribute it and/or modify it under the terms of the GNU General
+@ Public License version 2 as published by the Free Software Foundation.
+@
+@ The original headers, including the original license headers, are
+@ included below for completeness.
 
 @ ====================================================================
 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 @ project. The module is, however, dual licensed under OpenSSL and
 @ CRYPTOGAMS licenses depending on where you obtain it. For further
 @ details see http://www.openssl.org/~appro/cryptogams/.
-@
-@ Permission to use under GPL terms is granted.
 @ ====================================================================
 
 @ SHA256 block procedure for ARMv4. May 2007.
diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl
index a2b11a8..fb5d150 100644
--- a/arch/arm/crypto/sha512-armv4.pl
+++ b/arch/arm/crypto/sha512-armv4.pl
@@ -1,12 +1,19 @@
 #!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from the OpenSSL project but the author (Andy Polyakov)
+# has relicensed it under the GPLv2. Therefore this program is free software;
+# you can redistribute it and/or modify it under the terms of the GNU General
+# Public License version 2 as published by the Free Software Foundation.
+#
+# The original headers, including the original license headers, are
+# included below for completeness.
 
 # ====================================================================
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
-#
-# Permission to use under GPL terms is granted.
 # ====================================================================
 
 # SHA512 block procedure for ARMv4. September 2007.
diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped
index 3694c4d..b1c334a 100644
--- a/arch/arm/crypto/sha512-core.S_shipped
+++ b/arch/arm/crypto/sha512-core.S_shipped
@@ -1,11 +1,18 @@
+@ SPDX-License-Identifier: GPL-2.0
+
+@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
+@ has relicensed it under the GPLv2. Therefore this program is free software;
+@ you can redistribute it and/or modify it under the terms of the GNU General
+@ Public License version 2 as published by the Free Software Foundation.
+@
+@ The original headers, including the original license headers, are
+@ included below for completeness.
 
 @ ====================================================================
 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 @ project. The module is, however, dual licensed under OpenSSL and
 @ CRYPTOGAMS licenses depending on where you obtain it. For further
 @ details see http://www.openssl.org/~appro/cryptogams/.
-@
-@ Permission to use under GPL terms is granted.
 @ ====================================================================
 
 @ SHA512 block procedure for ARMv4. September 2007.
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 873e3c1..1d66db9 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -1,3 +1,4 @@
+generic-y += compat.h
 generic-y += current.h
 generic-y += early_ioremap.h
 generic-y += emergency-restart.h
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 9342904..0cd4dcc 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -447,6 +447,14 @@
 	.size \name , . - \name
 	.endm
 
+	.macro	csdb
+#ifdef CONFIG_THUMB2_KERNEL
+	.inst.w	0xf3af8014
+#else
+	.inst	0xe320f014
+#endif
+	.endm
+
 	.macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req
 #ifndef CONFIG_CPU_USE_DOMAINS
 	adds	\tmp, \addr, #\size - 1
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 40f5c41..69772e7 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -17,6 +17,12 @@
 #define isb(option) __asm__ __volatile__ ("isb " #option : : : "memory")
 #define dsb(option) __asm__ __volatile__ ("dsb " #option : : : "memory")
 #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
+#ifdef CONFIG_THUMB2_KERNEL
+#define CSDB	".inst.w 0xf3af8014"
+#else
+#define CSDB	".inst	0xe320f014"
+#endif
+#define csdb() __asm__ __volatile__(CSDB : : : "memory")
 #elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6
 #define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
 				    : : "r" (0) : "memory")
@@ -37,6 +43,13 @@
 #define dmb(x) __asm__ __volatile__ ("" : : : "memory")
 #endif
 
+#ifndef CSDB
+#define CSDB
+#endif
+#ifndef csdb
+#define csdb()
+#endif
+
 #ifdef CONFIG_ARM_HEAVY_MB
 extern void (*soc_mb)(void);
 extern void arm_heavy_mb(void);
@@ -63,6 +76,25 @@
 #define __smp_rmb()	__smp_mb()
 #define __smp_wmb()	dmb(ishst)
 
+#ifdef CONFIG_CPU_SPECTRE
+static inline unsigned long array_index_mask_nospec(unsigned long idx,
+						    unsigned long sz)
+{
+	unsigned long mask;
+
+	asm volatile(
+		"cmp	%1, %2\n"
+	"	sbc	%0, %1, %1\n"
+	CSDB
+	: "=r" (mask)
+	: "r" (idx), "Ir" (sz)
+	: "cc");
+
+	return mask;
+}
+#define array_index_mask_nospec array_index_mask_nospec
+#endif
+
 #include <asm-generic/barrier.h>
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/arm/include/asm/bugs.h b/arch/arm/include/asm/bugs.h
index a97f1ea..73a99c7 100644
--- a/arch/arm/include/asm/bugs.h
+++ b/arch/arm/include/asm/bugs.h
@@ -10,12 +10,14 @@
 #ifndef __ASM_BUGS_H
 #define __ASM_BUGS_H
 
-#ifdef CONFIG_MMU
 extern void check_writebuffer_bugs(void);
 
-#define check_bugs() check_writebuffer_bugs()
+#ifdef CONFIG_MMU
+extern void check_bugs(void);
+extern void check_other_bugs(void);
 #else
 #define check_bugs() do { } while (0)
+#define check_other_bugs() do { } while (0)
 #endif
 
 #endif
diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index 4c9fa72..07e27f2 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -65,6 +65,9 @@
 #define __write_sysreg(v, r, w, c, t)	asm volatile(w " " c : : "r" ((t)(v)))
 #define write_sysreg(v, ...)		__write_sysreg(v, __VA_ARGS__)
 
+#define BPIALL				__ACCESS_CP15(c7, 0, c5, 6)
+#define ICIALLU				__ACCESS_CP15(c7, 0, c5, 0)
+
 extern unsigned long cr_alignment;	/* defined in entry-armv.S */
 
 static inline unsigned long get_cr(void)
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index cb54642..2602198 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -77,8 +77,16 @@
 #define ARM_CPU_PART_CORTEX_A12		0x4100c0d0
 #define ARM_CPU_PART_CORTEX_A17		0x4100c0e0
 #define ARM_CPU_PART_CORTEX_A15		0x4100c0f0
+#define ARM_CPU_PART_CORTEX_A53		0x4100d030
+#define ARM_CPU_PART_CORTEX_A57		0x4100d070
+#define ARM_CPU_PART_CORTEX_A72		0x4100d080
+#define ARM_CPU_PART_CORTEX_A73		0x4100d090
+#define ARM_CPU_PART_CORTEX_A75		0x4100d0a0
 #define ARM_CPU_PART_MASK		0xff00fff0
 
+/* Broadcom cores */
+#define ARM_CPU_PART_BRAHMA_B15		0x420000f0
+
 /* DEC implemented cores */
 #define ARM_CPU_PART_SA1100		0x4400a110
 
diff --git a/arch/arm/include/asm/kgdb.h b/arch/arm/include/asm/kgdb.h
index 3b73fdc..8de1100 100644
--- a/arch/arm/include/asm/kgdb.h
+++ b/arch/arm/include/asm/kgdb.h
@@ -77,7 +77,7 @@
 
 #define KGDB_MAX_NO_CPUS	1
 #define BUFMAX			400
-#define NUMREGBYTES		(DBG_MAX_REG_NUM << 2)
+#define NUMREGBYTES		(GDB_MAX_REGS << 2)
 #define NUMCRITREGBYTES		(32 << 2)
 
 #define _R0			0
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 5a953ec..231e87a 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -61,8 +61,6 @@
 extern char __kvm_hyp_init[];
 extern char __kvm_hyp_init_end[];
 
-extern char __kvm_hyp_vector[];
-
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index c7c28c8..343fc9e 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -21,6 +21,7 @@
 
 #include <linux/types.h>
 #include <linux/kvm_types.h>
+#include <asm/cputype.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -311,8 +312,17 @@
 
 static inline bool kvm_arm_harden_branch_predictor(void)
 {
-	/* No way to detect it yet, pretend it is not there. */
-	return false;
+	switch(read_cpuid_part()) {
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	case ARM_CPU_PART_BRAHMA_B15:
+	case ARM_CPU_PART_CORTEX_A12:
+	case ARM_CPU_PART_CORTEX_A15:
+	case ARM_CPU_PART_CORTEX_A17:
+		return true;
+#endif
+	default:
+		return false;
+	}
 }
 
 static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index f675162..c94d291 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -327,7 +327,28 @@
 
 static inline void *kvm_get_hyp_vector(void)
 {
-	return kvm_ksym_ref(__kvm_hyp_vector);
+	switch(read_cpuid_part()) {
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	case ARM_CPU_PART_CORTEX_A12:
+	case ARM_CPU_PART_CORTEX_A17:
+	{
+		extern char __kvm_hyp_vector_bp_inv[];
+		return kvm_ksym_ref(__kvm_hyp_vector_bp_inv);
+	}
+
+	case ARM_CPU_PART_BRAHMA_B15:
+	case ARM_CPU_PART_CORTEX_A15:
+	{
+		extern char __kvm_hyp_vector_ic_inv[];
+		return kvm_ksym_ref(__kvm_hyp_vector_ic_inv);
+	}
+#endif
+	default:
+	{
+		extern char __kvm_hyp_vector[];
+		return kvm_ksym_ref(__kvm_hyp_vector);
+	}
+	}
 }
 
 static inline int kvm_map_vectors(void)
diff --git a/arch/arm/include/asm/mpu.h b/arch/arm/include/asm/mpu.h
index 6d1491c..5e088c8 100644
--- a/arch/arm/include/asm/mpu.h
+++ b/arch/arm/include/asm/mpu.h
@@ -12,60 +12,101 @@
 /* ID_MMFR0 data relevant to MPU */
 #define MMFR0_PMSA		(0xF << 4)
 #define MMFR0_PMSAv7		(3 << 4)
+#define MMFR0_PMSAv8		(4 << 4)
 
 /* MPU D/I Size Register fields */
-#define MPU_RSR_SZ		1
-#define MPU_RSR_EN		0
-#define MPU_RSR_SD		8
+#define PMSAv7_RSR_SZ		1
+#define PMSAv7_RSR_EN		0
+#define PMSAv7_RSR_SD		8
 
 /* Number of subregions (SD) */
-#define MPU_NR_SUBREGS		8
-#define MPU_MIN_SUBREG_SIZE	256
+#define PMSAv7_NR_SUBREGS	8
+#define PMSAv7_MIN_SUBREG_SIZE	256
 
 /* The D/I RSR value for an enabled region spanning the whole of memory */
-#define MPU_RSR_ALL_MEM		63
+#define PMSAv7_RSR_ALL_MEM	63
 
 /* Individual bits in the DR/IR ACR */
-#define MPU_ACR_XN		(1 << 12)
-#define MPU_ACR_SHARED		(1 << 2)
+#define PMSAv7_ACR_XN		(1 << 12)
+#define PMSAv7_ACR_SHARED	(1 << 2)
 
 /* C, B and TEX[2:0] bits only have semantic meanings when grouped */
-#define MPU_RGN_CACHEABLE	0xB
-#define MPU_RGN_SHARED_CACHEABLE (MPU_RGN_CACHEABLE | MPU_ACR_SHARED)
-#define MPU_RGN_STRONGLY_ORDERED 0
+#define PMSAv7_RGN_CACHEABLE		0xB
+#define PMSAv7_RGN_SHARED_CACHEABLE	(PMSAv7_RGN_CACHEABLE | PMSAv7_ACR_SHARED)
+#define PMSAv7_RGN_STRONGLY_ORDERED	0
 
 /* Main region should only be shared for SMP */
 #ifdef CONFIG_SMP
-#define MPU_RGN_NORMAL		(MPU_RGN_CACHEABLE | MPU_ACR_SHARED)
+#define PMSAv7_RGN_NORMAL	(PMSAv7_RGN_CACHEABLE | PMSAv7_ACR_SHARED)
 #else
-#define MPU_RGN_NORMAL		MPU_RGN_CACHEABLE
+#define PMSAv7_RGN_NORMAL	PMSAv7_RGN_CACHEABLE
 #endif
 
 /* Access permission bits of ACR (only define those that we use)*/
-#define MPU_AP_PL1RO_PL0NA	(0x5 << 8)
-#define MPU_AP_PL1RW_PL0RW	(0x3 << 8)
-#define MPU_AP_PL1RW_PL0R0	(0x2 << 8)
-#define MPU_AP_PL1RW_PL0NA	(0x1 << 8)
+#define PMSAv7_AP_PL1RO_PL0NA	(0x5 << 8)
+#define PMSAv7_AP_PL1RW_PL0RW	(0x3 << 8)
+#define PMSAv7_AP_PL1RW_PL0R0	(0x2 << 8)
+#define PMSAv7_AP_PL1RW_PL0NA	(0x1 << 8)
+
+#define PMSAv8_BAR_XN		1
+
+#define PMSAv8_LAR_EN		1
+#define PMSAv8_LAR_IDX(n)	(((n) & 0x7) << 1)
+
+
+#define PMSAv8_AP_PL1RW_PL0NA	(0 << 1)
+#define PMSAv8_AP_PL1RW_PL0RW	(1 << 1)
+#define PMSAv8_AP_PL1RO_PL0RO	(3 << 1)
+
+#ifdef CONFIG_SMP
+#define PMSAv8_RGN_SHARED	(3 << 3) // inner sharable
+#else
+#define PMSAv8_RGN_SHARED	(0 << 3)
+#endif
+
+#define PMSAv8_RGN_DEVICE_nGnRnE	0
+#define PMSAv8_RGN_NORMAL		1
+
+#define PMSAv8_MAIR(attr, mt)	((attr) << ((mt) * 8))
+
+#ifdef CONFIG_CPU_V7M
+#define PMSAv8_MINALIGN		32
+#else
+#define PMSAv8_MINALIGN		64
+#endif
 
 /* For minimal static MPU region configurations */
-#define MPU_PROBE_REGION	0
-#define MPU_BG_REGION		1
-#define MPU_RAM_REGION		2
-#define MPU_ROM_REGION		3
+#define PMSAv7_PROBE_REGION	0
+#define PMSAv7_BG_REGION	1
+#define PMSAv7_RAM_REGION	2
+#define PMSAv7_ROM_REGION	3
+
+/* Fixed for PMSAv8 only */
+#define PMSAv8_XIP_REGION	0
+#define PMSAv8_KERNEL_REGION	1
 
 /* Maximum number of regions Linux is interested in */
-#define MPU_MAX_REGIONS		16
+#define MPU_MAX_REGIONS	16
 
-#define MPU_DATA_SIDE		0
-#define MPU_INSTR_SIDE		1
+#define PMSAv7_DATA_SIDE	0
+#define PMSAv7_INSTR_SIDE	1
 
 #ifndef __ASSEMBLY__
 
 struct mpu_rgn {
 	/* Assume same attributes for d/i-side  */
-	u32 drbar;
-	u32 drsr;
-	u32 dracr;
+	union {
+		u32 drbar;   /* PMSAv7 */
+		u32 prbar;   /* PMSAv8 */
+	};
+	union {
+		u32 drsr;   /* PMSAv7 */
+		u32 prlar;  /* PMSAv8 */
+	};
+	union {
+		u32 dracr;  /* PMSAv7 */
+		u32 unused; /* not used in PMSAv8 */
+	};
 };
 
 struct mpu_rgn_info {
@@ -75,16 +116,17 @@
 extern struct mpu_rgn_info mpu_rgn_info;
 
 #ifdef CONFIG_ARM_MPU
+extern void __init pmsav7_adjust_lowmem_bounds(void);
+extern void __init pmsav8_adjust_lowmem_bounds(void);
 
-extern void __init adjust_lowmem_bounds_mpu(void);
-extern void __init mpu_setup(void);
-
+extern void __init pmsav7_setup(void);
+extern void __init pmsav8_setup(void);
 #else
-
-static inline void adjust_lowmem_bounds_mpu(void) {}
-static inline void mpu_setup(void) {}
-
-#endif /* !CONFIG_ARM_MPU */
+static inline void pmsav7_adjust_lowmem_bounds(void) {};
+static inline void pmsav8_adjust_lowmem_bounds(void) {};
+static inline void pmsav7_setup(void) {};
+static inline void pmsav8_setup(void) {};
+#endif
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index 1f0de80..0abd389 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -19,13 +19,6 @@
 }
 #endif /* CONFIG_PCI_DOMAINS */
 
-/*
- * The PCI address space does equal the physical memory address space.
- * The networking and block device layers use this boolean for bounce
- * buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS     (1)
-
 #define HAVE_PCI_MMAP
 #define ARCH_GENERIC_PCI_MMAP_RESOURCE
 
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index f2e1af4..e25f439 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -37,6 +37,10 @@
 	 */
 	void (*_proc_init)(void);
 	/*
+	 * Check for processor bugs
+	 */
+	void (*check_bugs)(void);
+	/*
 	 * Disable any processor specifics
 	 */
 	void (*_proc_fin)(void);
diff --git a/arch/arm/include/asm/system_misc.h b/arch/arm/include/asm/system_misc.h
index 78f6db1..8e76db8 100644
--- a/arch/arm/include/asm/system_misc.h
+++ b/arch/arm/include/asm/system_misc.h
@@ -8,6 +8,7 @@
 #include <linux/linkage.h>
 #include <linux/irqflags.h>
 #include <linux/reboot.h>
+#include <linux/percpu.h>
 
 extern void cpu_init(void);
 
@@ -15,6 +16,20 @@
 extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 extern void (*arm_pm_idle)(void);
 
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+typedef void (*harden_branch_predictor_fn_t)(void);
+DECLARE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn);
+static inline void harden_branch_predictor(void)
+{
+	harden_branch_predictor_fn_t fn = per_cpu(harden_branch_predictor_fn,
+						  smp_processor_id());
+	if (fn)
+		fn();
+}
+#else
+#define harden_branch_predictor() do { } while (0)
+#endif
+
 #define UDBG_UNDEFINED	(1 << 0)
 #define UDBG_SYSCALL	(1 << 1)
 #define UDBG_BADABORT	(1 << 2)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 0bf2347..3d614e9 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -152,7 +152,7 @@
 #define __get_user_check(x, p)						\
 	({								\
 		unsigned long __limit = current_thread_info()->addr_limit - 1; \
-		register const typeof(*(p)) __user *__p asm("r0") = (p);\
+		register typeof(*(p)) __user *__p asm("r0") = (p);	\
 		register typeof(x) __r2 asm("r2");			\
 		register unsigned long __l asm("r1") = __limit;		\
 		register int __e asm("r0");				\
diff --git a/arch/arm/include/asm/v7m.h b/arch/arm/include/asm/v7m.h
index 634e771..187ccf6 100644
--- a/arch/arm/include/asm/v7m.h
+++ b/arch/arm/include/asm/v7m.h
@@ -64,9 +64,17 @@
 #define MPU_CTRL_ENABLE		1
 #define MPU_CTRL_PRIVDEFENA	(1 << 2)
 
-#define MPU_RNR			0x98
-#define MPU_RBAR		0x9c
-#define MPU_RASR		0xa0
+#define PMSAv7_RNR		0x98
+#define PMSAv7_RBAR		0x9c
+#define PMSAv7_RASR		0xa0
+
+#define PMSAv8_RNR		0x98
+#define PMSAv8_RBAR		0x9c
+#define PMSAv8_RLAR		0xa0
+#define PMSAv8_RBAR_A(n)	(PMSAv8_RBAR + 8*(n))
+#define PMSAv8_RLAR_A(n)	(PMSAv8_RLAR + 8*(n))
+#define PMSAv8_MAIR0		0xc0
+#define PMSAv8_MAIR1		0xc4
 
 /* Cache opeartions */
 #define	V7M_SCB_ICIALLU		0x250	/* I-cache invalidate all to PoU */
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index b59ac4b..8cad594 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -31,6 +31,7 @@
 obj-y		+= entry-armv.o
 endif
 
+obj-$(CONFIG_MMU)		+= bugs.o
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 obj-$(CONFIG_ISA_DMA_API)	+= dma.o
 obj-$(CONFIG_FIQ)		+= fiq.o fiqasm.o
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index f369ece..27c5381 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -194,9 +194,11 @@
   DEFINE(MPU_RNG_INFO_USED,	offsetof(struct mpu_rgn_info, used));
 
   DEFINE(MPU_RNG_SIZE,		sizeof(struct mpu_rgn));
-  DEFINE(MPU_RGN_DRBAR,		offsetof(struct mpu_rgn, drbar));
-  DEFINE(MPU_RGN_DRSR,		offsetof(struct mpu_rgn, drsr));
-  DEFINE(MPU_RGN_DRACR,		offsetof(struct mpu_rgn, dracr));
+  DEFINE(MPU_RGN_DRBAR,	offsetof(struct mpu_rgn, drbar));
+  DEFINE(MPU_RGN_DRSR,	offsetof(struct mpu_rgn, drsr));
+  DEFINE(MPU_RGN_DRACR,	offsetof(struct mpu_rgn, dracr));
+  DEFINE(MPU_RGN_PRBAR,	offsetof(struct mpu_rgn, prbar));
+  DEFINE(MPU_RGN_PRLAR,	offsetof(struct mpu_rgn, prlar));
 #endif
   return 0; 
 }
diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c
new file mode 100644
index 0000000..7be5113
--- /dev/null
+++ b/arch/arm/kernel/bugs.c
@@ -0,0 +1,18 @@
+// SPDX-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <asm/bugs.h>
+#include <asm/proc-fns.h>
+
+void check_other_bugs(void)
+{
+#ifdef MULTI_CPU
+	if (processor.check_bugs)
+		processor.check_bugs();
+#endif
+}
+
+void __init check_bugs(void)
+{
+	check_writebuffer_bugs();
+	check_other_bugs();
+}
diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c
index e651c4d..6739d37 100644
--- a/arch/arm/kernel/dma.c
+++ b/arch/arm/kernel/dma.c
@@ -276,21 +276,9 @@
 	return 0;
 }
 
-static int proc_dma_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_dma_show, NULL);
-}
-
-static const struct file_operations proc_dma_operations = {
-	.open		= proc_dma_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init proc_dma_init(void)
 {
-	proc_create("dma", 0, NULL, &proc_dma_operations);
+	proc_create_single("dma", 0, NULL, proc_dma_show);
 	return 0;
 }
 
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 3c4f887..20df608 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -242,9 +242,7 @@
 	tst	r10, #_TIF_SYSCALL_WORK		@ are we tracing syscalls?
 	bne	__sys_trace
 
-	cmp	scno, #NR_syscalls		@ check upper syscall limit
-	badr	lr, ret_fast_syscall		@ return address
-	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
+	invoke_syscall tbl, scno, r10, ret_fast_syscall
 
 	add	r1, sp, #S_OFF
 2:	cmp	scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
@@ -278,14 +276,8 @@
 	mov	r1, scno
 	add	r0, sp, #S_OFF
 	bl	syscall_trace_enter
-
-	badr	lr, __sys_trace_return		@ return address
-	mov	scno, r0			@ syscall number (possibly new)
-	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
-	cmp	scno, #NR_syscalls		@ check upper syscall limit
-	ldmccia	r1, {r0 - r6}			@ have to reload r0 - r6
-	stmccia	sp, {r4, r5}			@ and update the stack args
-	ldrcc	pc, [tbl, scno, lsl #2]		@ call sys_* routine
+	mov	scno, r0
+	invoke_syscall tbl, scno, r10, __sys_trace_return, reload=1
 	cmp	scno, #-1			@ skip the syscall?
 	bne	2b
 	add	sp, sp, #S_OFF			@ restore stack
@@ -363,6 +355,10 @@
 		bic	scno, r0, #__NR_OABI_SYSCALL_BASE
 		cmp	scno, #__NR_syscall - __NR_SYSCALL_BASE
 		cmpne	scno, #NR_syscalls	@ check range
+#ifdef CONFIG_CPU_SPECTRE
+		movhs	scno, #0
+		csdb
+#endif
 		stmloia	sp, {r5, r6}		@ shuffle args
 		movlo	r0, r1
 		movlo	r1, r2
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 0f07579..7734248 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -378,6 +378,31 @@
 #endif
 	.endm
 
+	.macro	invoke_syscall, table, nr, tmp, ret, reload=0
+#ifdef CONFIG_CPU_SPECTRE
+	mov	\tmp, \nr
+	cmp	\tmp, #NR_syscalls		@ check upper syscall limit
+	movcs	\tmp, #0
+	csdb
+	badr	lr, \ret			@ return address
+	.if	\reload
+	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
+	ldmccia	r1, {r0 - r6}			@ reload r0-r6
+	stmccia	sp, {r4, r5}			@ update stack arguments
+	.endif
+	ldrcc	pc, [\table, \tmp, lsl #2]	@ call sys_* routine
+#else
+	cmp	\nr, #NR_syscalls		@ check upper syscall limit
+	badr	lr, \ret			@ return address
+	.if	\reload
+	add	r1, sp, #S_R0 + S_OFF		@ pointer to regs
+	ldmccia	r1, {r0 - r6}			@ reload r0-r6
+	stmccia	sp, {r4, r5}			@ update stack arguments
+	.endif
+	ldrcc	pc, [\table, \nr, lsl #2]	@ call sys_* routine
+#endif
+	.endm
+
 /*
  * These are the registers used in the syscall handler, and allow us to
  * have in theory up to 7 arguments to a function - r0 to r6.
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index 2e38f85..dd546d6 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -68,14 +68,6 @@
 	beq	__error_p				@ yes, error 'p'
 
 #ifdef CONFIG_ARM_MPU
-	/* Calculate the size of a region covering just the kernel */
-	ldr	r5, =PLAT_PHYS_OFFSET		@ Region start: PHYS_OFFSET
-	ldr     r6, =(_end)			@ Cover whole kernel
-	sub	r6, r6, r5			@ Minimum size of region to map
-	clz	r6, r6				@ Region size must be 2^N...
-	rsb	r6, r6, #31			@ ...so round up region size
-	lsl	r6, r6, #MPU_RSR_SZ		@ Put size in right field
-	orr	r6, r6, #(1 << MPU_RSR_EN)	@ Set region enabled bit
 	bl	__setup_mpu
 #endif
 
@@ -83,8 +75,8 @@
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10
 	ret	r12
-1:	bl	__after_proc_init
-	b	__mmap_switched
+1:	ldr	lr, =__mmap_switched
+	b	__after_proc_init
 ENDPROC(stext)
 
 #ifdef CONFIG_SMP
@@ -110,8 +102,6 @@
 	ldr	r7, __secondary_data
 
 #ifdef CONFIG_ARM_MPU
-	/* Use MPU region info supplied by __cpu_up */
-	ldr	r6, [r7]			@ get secondary_data.mpu_rgn_info
 	bl      __secondary_setup_mpu		@ Initialize the MPU
 #endif
 
@@ -133,12 +123,45 @@
 /*
  * Set the Control Register and Read the process ID.
  */
+	.text
 __after_proc_init:
+#ifdef CONFIG_ARM_MPU
+M_CLASS(movw	r12, #:lower16:BASEADDR_V7M_SCB)
+M_CLASS(movt	r12, #:upper16:BASEADDR_V7M_SCB)
+M_CLASS(ldr	r3, [r12, 0x50])
+AR_CLASS(mrc	p15, 0, r3, c0, c1, 4)          @ Read ID_MMFR0
+	and	r3, r3, #(MMFR0_PMSA)           @ PMSA field
+	teq	r3, #(MMFR0_PMSAv7)             @ PMSA v7
+	beq	1f
+	teq	r3, #(MMFR0_PMSAv8)		@ PMSA v8
+	/*
+	 * Memory region attributes for PMSAv8:
+	 *
+	 *   n = AttrIndx[2:0]
+	 *                      n       MAIR
+	 *   DEVICE_nGnRnE      000     00000000
+	 *   NORMAL             001     11111111
+	 */
+	ldreq	r3, =PMSAv8_MAIR(0x00, PMSAv8_RGN_DEVICE_nGnRnE) | \
+		     PMSAv8_MAIR(0xff, PMSAv8_RGN_NORMAL)
+AR_CLASS(mcreq	p15, 0, r3, c10, c2, 0)		@ MAIR 0
+M_CLASS(streq	r3, [r12, #PMSAv8_MAIR0])
+	moveq	r3, #0
+AR_CLASS(mcreq	p15, 0, r3, c10, c2, 1)		@ MAIR 1
+M_CLASS(streq	r3, [r12, #PMSAv8_MAIR1])
+
+1:
+#endif
 #ifdef CONFIG_CPU_CP15
 	/*
 	 * CP15 system control register value returned in r0 from
 	 * the CPU init function.
 	 */
+
+#ifdef CONFIG_ARM_MPU
+	biceq	r0, r0, #CR_BR			@ Disable the 'default mem-map'
+	orreq	r0, r0, #CR_M			@ Set SCTRL.M (MPU on)
+#endif
 #if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
 	orr	r0, r0, #CR_A
 #else
@@ -154,7 +177,15 @@
 	bic	r0, r0, #CR_I
 #endif
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
+	isb
 #elif defined (CONFIG_CPU_V7M)
+#ifdef CONFIG_ARM_MPU
+	ldreq	r3, [r12, MPU_CTRL]
+	biceq	r3, #MPU_CTRL_PRIVDEFENA
+	orreq	r3, #MPU_CTRL_ENABLE
+	streq	r3, [r12, MPU_CTRL]
+	isb
+#endif
 	/* For V7M systems we want to modify the CCR similarly to the SCTLR */
 #ifdef CONFIG_CPU_DCACHE_DISABLE
 	bic	r0, r0, #V7M_SCB_CCR_DC
@@ -165,9 +196,7 @@
 #ifdef CONFIG_CPU_ICACHE_DISABLE
 	bic	r0, r0, #V7M_SCB_CCR_IC
 #endif
-	movw	r3, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
-	movt	r3, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
-	str	r0, [r3]
+	str	r0, [r12, V7M_SCB_CCR]
 #endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */
 	ret	lr
 ENDPROC(__after_proc_init)
@@ -184,7 +213,7 @@
 .endm
 
 /* Setup a single MPU region, either D or I side (D-side for unified) */
-.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE, unused
+.macro setup_region bar, acr, sr, side = PMSAv7_DATA_SIDE, unused
 	mcr	p15, 0, \bar, c6, c1, (0 + \side)	@ I/DRBAR
 	mcr	p15, 0, \acr, c6, c1, (4 + \side)	@ I/DRACR
 	mcr	p15, 0, \sr, c6, c1, (2 + \side)		@ I/DRSR
@@ -192,14 +221,14 @@
 #else
 .macro set_region_nr tmp, rgnr, base
 	mov	\tmp, \rgnr
-	str     \tmp, [\base, #MPU_RNR]
+	str     \tmp, [\base, #PMSAv7_RNR]
 .endm
 
 .macro setup_region bar, acr, sr, unused, base
 	lsl     \acr, \acr, #16
 	orr     \acr, \acr, \sr
-	str     \bar, [\base, #MPU_RBAR]
-	str     \acr, [\base, #MPU_RASR]
+	str     \bar, [\base, #PMSAv7_RBAR]
+	str     \acr, [\base, #PMSAv7_RASR]
 .endm
 
 #endif
@@ -210,8 +239,9 @@
  * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6
  * Region 3: Normal, shared, inaccessible from PL0 to protect the vectors page
  *
- * r6: Value to be written to DRSR (and IRSR if required) for MPU_RAM_REGION
+ * r6: Value to be written to DRSR (and IRSR if required) for PMSAv7_RAM_REGION
 */
+	__HEAD
 
 ENTRY(__setup_mpu)
 
@@ -223,7 +253,22 @@
 M_CLASS(ldr	r0, [r12, 0x50])
 	and	r0, r0, #(MMFR0_PMSA)		@ PMSA field
 	teq	r0, #(MMFR0_PMSAv7)		@ PMSA v7
-	bxne	lr
+	beq	__setup_pmsa_v7
+	teq	r0, #(MMFR0_PMSAv8)		@ PMSA v8
+	beq	__setup_pmsa_v8
+
+	ret	lr
+ENDPROC(__setup_mpu)
+
+ENTRY(__setup_pmsa_v7)
+	/* Calculate the size of a region covering just the kernel */
+	ldr	r5, =PLAT_PHYS_OFFSET		@ Region start: PHYS_OFFSET
+	ldr     r6, =(_end)			@ Cover whole kernel
+	sub	r6, r6, r5			@ Minimum size of region to map
+	clz	r6, r6				@ Region size must be 2^N...
+	rsb	r6, r6, #31			@ ...so round up region size
+	lsl	r6, r6, #PMSAv7_RSR_SZ		@ Put size in right field
+	orr	r6, r6, #(1 << PMSAv7_RSR_EN)	@ Set region enabled bit
 
 	/* Determine whether the D/I-side memory map is unified. We set the
 	 * flags here and continue to use them for the rest of this function */
@@ -234,77 +279,189 @@
 	tst	r0, #MPUIR_nU			@ MPUIR_nU = 0 for unified
 
 	/* Setup second region first to free up r6 */
-	set_region_nr r0, #MPU_RAM_REGION, r12
+	set_region_nr r0, #PMSAv7_RAM_REGION, r12
 	isb
 	/* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */
 	ldr	r0, =PLAT_PHYS_OFFSET		@ RAM starts at PHYS_OFFSET
-	ldr	r5,=(MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL)
+	ldr	r5,=(PMSAv7_AP_PL1RW_PL0RW | PMSAv7_RGN_NORMAL)
 
-	setup_region r0, r5, r6, MPU_DATA_SIDE, r12	@ PHYS_OFFSET, shared, enabled
+	setup_region r0, r5, r6, PMSAv7_DATA_SIDE, r12	@ PHYS_OFFSET, shared, enabled
 	beq	1f					@ Memory-map not unified
-	setup_region r0, r5, r6, MPU_INSTR_SIDE, r12	@ PHYS_OFFSET, shared, enabled
+	setup_region r0, r5, r6, PMSAv7_INSTR_SIDE, r12	@ PHYS_OFFSET, shared, enabled
 1:	isb
 
 	/* First/background region */
-	set_region_nr r0, #MPU_BG_REGION, r12
+	set_region_nr r0, #PMSAv7_BG_REGION, r12
 	isb
 	/* Execute Never,  strongly ordered, inaccessible to PL0, rw PL1  */
 	mov	r0, #0				@ BG region starts at 0x0
-	ldr	r5,=(MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA)
-	mov	r6, #MPU_RSR_ALL_MEM		@ 4GB region, enabled
+	ldr	r5,=(PMSAv7_ACR_XN | PMSAv7_RGN_STRONGLY_ORDERED | PMSAv7_AP_PL1RW_PL0NA)
+	mov	r6, #PMSAv7_RSR_ALL_MEM		@ 4GB region, enabled
 
-	setup_region r0, r5, r6, MPU_DATA_SIDE, r12	@ 0x0, BG region, enabled
+	setup_region r0, r5, r6, PMSAv7_DATA_SIDE, r12	@ 0x0, BG region, enabled
 	beq	2f					@ Memory-map not unified
-	setup_region r0, r5, r6, MPU_INSTR_SIDE r12	@ 0x0, BG region, enabled
+	setup_region r0, r5, r6, PMSAv7_INSTR_SIDE r12	@ 0x0, BG region, enabled
 2:	isb
 
 #ifdef CONFIG_XIP_KERNEL
-	set_region_nr r0, #MPU_ROM_REGION, r12
+	set_region_nr r0, #PMSAv7_ROM_REGION, r12
 	isb
 
-	ldr	r5,=(MPU_AP_PL1RO_PL0NA | MPU_RGN_NORMAL)
+	ldr	r5,=(PMSAv7_AP_PL1RO_PL0NA | PMSAv7_RGN_NORMAL)
 
 	ldr	r0, =CONFIG_XIP_PHYS_ADDR		@ ROM start
 	ldr     r6, =(_exiprom)				@ ROM end
 	sub	r6, r6, r0				@ Minimum size of region to map
 	clz	r6, r6					@ Region size must be 2^N...
 	rsb	r6, r6, #31				@ ...so round up region size
-	lsl	r6, r6, #MPU_RSR_SZ			@ Put size in right field
-	orr	r6, r6, #(1 << MPU_RSR_EN)		@ Set region enabled bit
+	lsl	r6, r6, #PMSAv7_RSR_SZ			@ Put size in right field
+	orr	r6, r6, #(1 << PMSAv7_RSR_EN)		@ Set region enabled bit
 
-	setup_region r0, r5, r6, MPU_DATA_SIDE, r12	@ XIP_PHYS_ADDR, shared, enabled
+	setup_region r0, r5, r6, PMSAv7_DATA_SIDE, r12	@ XIP_PHYS_ADDR, shared, enabled
 	beq	3f					@ Memory-map not unified
-	setup_region r0, r5, r6, MPU_INSTR_SIDE, r12	@ XIP_PHYS_ADDR, shared, enabled
+	setup_region r0, r5, r6, PMSAv7_INSTR_SIDE, r12	@ XIP_PHYS_ADDR, shared, enabled
 3:	isb
 #endif
+	ret	lr
+ENDPROC(__setup_pmsa_v7)
 
-	/* Enable the MPU */
-AR_CLASS(mrc	p15, 0, r0, c1, c0, 0)		@ Read SCTLR
-AR_CLASS(bic	r0, r0, #CR_BR)			@ Disable the 'default mem-map'
-AR_CLASS(orr	r0, r0, #CR_M)			@ Set SCTRL.M (MPU on)
-AR_CLASS(mcr	p15, 0, r0, c1, c0, 0)		@ Enable MPU
-
-M_CLASS(ldr	r0, [r12, #MPU_CTRL])
-M_CLASS(bic	r0, #MPU_CTRL_PRIVDEFENA)
-M_CLASS(orr	r0, #MPU_CTRL_ENABLE)
-M_CLASS(str	r0, [r12, #MPU_CTRL])
+ENTRY(__setup_pmsa_v8)
+	mov	r0, #0
+AR_CLASS(mcr	p15, 0, r0, c6, c2, 1)		@ PRSEL
+M_CLASS(str	r0, [r12, #PMSAv8_RNR])
 	isb
 
+#ifdef CONFIG_XIP_KERNEL
+	ldr	r5, =CONFIG_XIP_PHYS_ADDR		@ ROM start
+	ldr     r6, =(_exiprom)				@ ROM end
+	sub	r6, r6, #1
+	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
+
+	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED)
+	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN)
+
+AR_CLASS(mcr	p15, 0, r5, c6, c8, 0)			@ PRBAR0
+AR_CLASS(mcr	p15, 0, r6, c6, c8, 1)			@ PRLAR0
+M_CLASS(str	r5, [r12, #PMSAv8_RBAR_A(0)])
+M_CLASS(str	r6, [r12, #PMSAv8_RLAR_A(0)])
+#endif
+
+	ldr	r5, =KERNEL_START
+	ldr	r6, =KERNEL_END
+	sub	r6, r6, #1
+	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
+
+	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED)
+	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN)
+
+AR_CLASS(mcr	p15, 0, r5, c6, c8, 4)			@ PRBAR1
+AR_CLASS(mcr	p15, 0, r6, c6, c8, 5)			@ PRLAR1
+M_CLASS(str	r5, [r12, #PMSAv8_RBAR_A(1)])
+M_CLASS(str	r6, [r12, #PMSAv8_RLAR_A(1)])
+
+	/* Setup Background: 0x0 - min(KERNEL_START, XIP_PHYS_ADDR) */
+#ifdef CONFIG_XIP_KERNEL
+	ldr	r6, =KERNEL_START
+	ldr	r5, =CONFIG_XIP_PHYS_ADDR
+	cmp	r6, r5
+	movcs	r6, r5
+#else
+	ldr	r6, =KERNEL_START
+#endif
+	cmp	r6, #0
+	beq	1f
+
+	mov	r5, #0
+	sub	r6, r6, #1
+	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
+
+	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN)
+	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN)
+
+AR_CLASS(mcr	p15, 0, r5, c6, c9, 0)			@ PRBAR2
+AR_CLASS(mcr	p15, 0, r6, c6, c9, 1)			@ PRLAR2
+M_CLASS(str	r5, [r12, #PMSAv8_RBAR_A(2)])
+M_CLASS(str	r6, [r12, #PMSAv8_RLAR_A(2)])
+
+1:
+	/* Setup Background: max(KERNEL_END, _exiprom) - 0xffffffff */
+#ifdef CONFIG_XIP_KERNEL
+	ldr	r5, =KERNEL_END
+	ldr	r6, =(_exiprom)
+	cmp	r5, r6
+	movcc	r5, r6
+#else
+	ldr	r5, =KERNEL_END
+#endif
+	mov	r6, #0xffffffff
+	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
+
+	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN)
+	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN)
+
+AR_CLASS(mcr	p15, 0, r5, c6, c9, 4)			@ PRBAR3
+AR_CLASS(mcr	p15, 0, r6, c6, c9, 5)			@ PRLAR3
+M_CLASS(str	r5, [r12, #PMSAv8_RBAR_A(3)])
+M_CLASS(str	r6, [r12, #PMSAv8_RLAR_A(3)])
+
+#ifdef CONFIG_XIP_KERNEL
+	/* Setup Background: min(_exiprom, KERNEL_END) - max(KERNEL_START, XIP_PHYS_ADDR) */
+	ldr	r5, =(_exiprom)
+	ldr	r6, =KERNEL_END
+	cmp	r5, r6
+	movcs	r5, r6
+
+	ldr	r6, =KERNEL_START
+	ldr	r0, =CONFIG_XIP_PHYS_ADDR
+	cmp	r6, r0
+	movcc	r6, r0
+
+	sub	r6, r6, #1
+	bic	r6, r6, #(PMSAv8_MINALIGN - 1)
+
+	orr	r5, r5, #(PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN)
+	orr	r6, r6, #(PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN)
+
+#ifdef CONFIG_CPU_V7M
+	/* There is no alias for n == 4 */
+	mov	r0, #4
+	str	r0, [r12, #PMSAv8_RNR]			@ PRSEL
+	isb
+
+	str	r5, [r12, #PMSAv8_RBAR_A(0)]
+	str	r6, [r12, #PMSAv8_RLAR_A(0)]
+#else
+	mcr	p15, 0, r5, c6, c10, 1			@ PRBAR4
+	mcr	p15, 0, r6, c6, c10, 2			@ PRLAR4
+#endif
+#endif
 	ret	lr
-ENDPROC(__setup_mpu)
+ENDPROC(__setup_pmsa_v8)
 
 #ifdef CONFIG_SMP
 /*
  * r6: pointer at mpu_rgn_info
  */
 
+	.text
 ENTRY(__secondary_setup_mpu)
+	/* Use MPU region info supplied by __cpu_up */
+	ldr	r6, [r7]			@ get secondary_data.mpu_rgn_info
+
 	/* Probe for v7 PMSA compliance */
 	mrc	p15, 0, r0, c0, c1, 4		@ Read ID_MMFR0
 	and	r0, r0, #(MMFR0_PMSA)		@ PMSA field
 	teq	r0, #(MMFR0_PMSAv7)		@ PMSA v7
-	bne	__error_p
+	beq	__secondary_setup_pmsa_v7
+	teq	r0, #(MMFR0_PMSAv8)		@ PMSA v8
+	beq	__secondary_setup_pmsa_v8
+	b	__error_p
+ENDPROC(__secondary_setup_mpu)
 
+/*
+ * r6: pointer at mpu_rgn_info
+ */
+ENTRY(__secondary_setup_pmsa_v7)
 	/* Determine whether the D/I-side memory map is unified. We set the
 	 * flags here and continue to use them for the rest of this function */
 	mrc	p15, 0, r0, c0, c0, 4		@ MPUIR
@@ -328,25 +485,45 @@
 	ldr	r6, [r3, #MPU_RGN_DRSR]
 	ldr	r5, [r3, #MPU_RGN_DRACR]
 
-	setup_region r0, r5, r6, MPU_DATA_SIDE
+	setup_region r0, r5, r6, PMSAv7_DATA_SIDE
 	beq	2f
-	setup_region r0, r5, r6, MPU_INSTR_SIDE
+	setup_region r0, r5, r6, PMSAv7_INSTR_SIDE
 2:	isb
 
 	mrc	p15, 0, r0, c0, c0, 4		@ Reevaluate the MPUIR
 	cmp	r4, #0
 	bgt	1b
 
-	/* Enable the MPU */
-	mrc	p15, 0, r0, c1, c0, 0		@ Read SCTLR
-	bic	r0, r0, #CR_BR			@ Disable the 'default mem-map'
-	orr	r0, r0, #CR_M			@ Set SCTRL.M (MPU on)
-	mcr	p15, 0, r0, c1, c0, 0		@ Enable MPU
+	ret	lr
+ENDPROC(__secondary_setup_pmsa_v7)
+
+ENTRY(__secondary_setup_pmsa_v8)
+	ldr	r4, [r6, #MPU_RNG_INFO_USED]
+#ifndef CONFIG_XIP_KERNEL
+	add	r4, r4, #1
+#endif
+	mov	r5, #MPU_RNG_SIZE
+	add	r3, r6, #MPU_RNG_INFO_RNGS
+	mla	r3, r4, r5, r3
+
+1:
+	sub	r3, r3, #MPU_RNG_SIZE
+	sub	r4, r4, #1
+
+	mcr	p15, 0, r4, c6, c2, 1		@ PRSEL
 	isb
 
-	ret	lr
-ENDPROC(__secondary_setup_mpu)
+	ldr	r5, [r3, #MPU_RGN_PRBAR]
+	ldr	r6, [r3, #MPU_RGN_PRLAR]
 
+	mcr	p15, 0, r5, c6, c3, 0		@ PRBAR
+	mcr	p15, 0, r6, c6, c3, 1           @ PRLAR
+
+	cmp	r4, #0
+	bgt	1b
+
+	ret	lr
+ENDPROC(__secondary_setup_pmsa_v8)
 #endif /* CONFIG_SMP */
 #endif /* CONFIG_ARM_MPU */
 #include "head-common.S"
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 7724b0f..36718a4 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -205,6 +205,7 @@
 {
 	siginfo_t info;
 
+	clear_siginfo(&info);
 	info.si_signo = SIGTRAP;
 	info.si_errno = 0;
 	info.si_code  = TRAP_BRKPT;
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index fc40a2b..35ca494 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -754,7 +754,7 @@
 	else
 		size -= aligned_start - start;
 
-#ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT
+#ifndef CONFIG_PHYS_ADDR_T_64BIT
 	if (aligned_start > ULONG_MAX) {
 		pr_crit("Ignoring memory at 0x%08llx outside 32-bit physical address space\n",
 			(long long)start);
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 2da0879..0978282 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -31,6 +31,7 @@
 #include <linux/irq_work.h>
 
 #include <linux/atomic.h>
+#include <asm/bugs.h>
 #include <asm/smp.h>
 #include <asm/cacheflush.h>
 #include <asm/cpu.h>
@@ -236,8 +237,6 @@
 	flush_cache_louis();
 	local_flush_tlb_all();
 
-	clear_tasks_mm_cpumask(cpu);
-
 	return 0;
 }
 
@@ -255,6 +254,7 @@
 	}
 	pr_debug("CPU%u: shutdown\n", cpu);
 
+	clear_tasks_mm_cpumask(cpu);
 	/*
 	 * platform_cpu_kill() is generally expected to do the powering off
 	 * and/or cutting of clocks to the dying CPU.  Optionally, this may
@@ -405,6 +405,9 @@
 	 * before we continue - which happens after __cpu_up returns.
 	 */
 	set_cpu_online(cpu, true);
+
+	check_other_bugs();
+
 	complete(&cpu_running);
 
 	local_irq_enable();
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index a40ebb7..d080992 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -3,6 +3,7 @@
 #include <linux/slab.h>
 #include <linux/mm_types.h>
 
+#include <asm/bugs.h>
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
 #include <asm/pgalloc.h>
@@ -36,6 +37,7 @@
 		cpu_switch_mm(mm->pgd, mm);
 		local_flush_bp_all();
 		local_flush_tlb_all();
+		check_other_bugs();
 	}
 
 	return ret;
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index 3bda08b..80517f2 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -91,18 +91,6 @@
 		seq_printf(m, "Last process:\t\t%d\n", previous_pid);
 	return 0;
 }
-
-static int proc_status_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_status_show, PDE_DATA(inode));
-}
-
-static const struct file_operations proc_status_fops = {
-	.open		= proc_status_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
 
 /*
@@ -112,6 +100,7 @@
 {
 	siginfo_t info;
 
+	clear_siginfo(&info);
 	down_read(&current->mm->mmap_sem);
 	if (find_vma(current->mm, addr) == NULL)
 		info.si_code = SEGV_MAPERR;
@@ -260,7 +249,8 @@
 		return 0;
 
 #ifdef CONFIG_PROC_FS
-	if (!proc_create("cpu/swp_emulation", S_IRUGO, NULL, &proc_status_fops))
+	if (!proc_create_single("cpu/swp_emulation", S_IRUGO, NULL,
+			proc_status_show))
 		return -ENOMEM;
 #endif /* CONFIG_PROC_FS */
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 2fe8710..badf02c 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -441,6 +441,7 @@
 	siginfo_t info;
 	void __user *pc;
 
+	clear_siginfo(&info);
 	pc = (void __user *)instruction_pointer(regs);
 
 	if (processor_mode(regs) == SVC_MODE) {
@@ -540,6 +541,7 @@
 {
 	siginfo_t info;
 
+	clear_siginfo(&info);
 	if ((current->personality & PER_MASK) != PER_LINUX) {
 		send_sig(SIGSEGV, current, 1);
 		return regs->ARM_r0;
@@ -607,6 +609,7 @@
 {
 	siginfo_t info;
 
+	clear_siginfo(&info);
 	if ((no >> 16) != (__ARM_NR_BASE>> 16))
 		return bad_syscall(no, regs);
 
@@ -743,6 +746,8 @@
 	unsigned long addr = instruction_pointer(regs);
 	siginfo_t info;
 
+	clear_siginfo(&info);
+
 #ifdef CONFIG_DEBUG_USER
 	if (user_debug & UDBG_BADABORT) {
 		pr_err("[%d] %s: bad data abort: code %d instr 0x%08lx\n",
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index d32f5d3..3593d5c 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -13,6 +13,7 @@
 #include <asm/cache.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
+#include <asm/mpu.h>
 #include <asm/page.h>
 
 #include "vmlinux.lds.h"
@@ -148,6 +149,9 @@
 	__init_end = .;
 
 	BSS_SECTION(0, 0, 8)
+#ifdef CONFIG_ARM_MPU
+	. = ALIGN(PMSAv8_MINALIGN);
+#endif
 	_end = .;
 
 	STABS_DEBUG
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index b77dc67..23150c0 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -12,6 +12,7 @@
 #include <asm/cache.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
+#include <asm/mpu.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
@@ -54,6 +55,9 @@
 	. = ALIGN(1<<SECTION_SHIFT);
 #endif
 
+#ifdef CONFIG_ARM_MPU
+	. = ALIGN(PMSAv8_MINALIGN);
+#endif
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
 		ARM_TEXT
@@ -143,6 +147,9 @@
 	_edata = .;
 
 	BSS_SECTION(0, 0, 0)
+#ifdef CONFIG_ARM_MPU
+	. = ALIGN(PMSAv8_MINALIGN);
+#endif
 	_end = .;
 
 	STABS_DEBUG
diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h
index 71281e0..ae5fdff 100644
--- a/arch/arm/kernel/vmlinux.lds.h
+++ b/arch/arm/kernel/vmlinux.lds.h
@@ -27,24 +27,24 @@
 
 #define PROC_INFO							\
 		. = ALIGN(4);						\
-		VMLINUX_SYMBOL(__proc_info_begin) = .;			\
+		__proc_info_begin = .;					\
 		*(.proc.info.init)					\
-		VMLINUX_SYMBOL(__proc_info_end) = .;
+		__proc_info_end = .;
 
 #define HYPERVISOR_TEXT							\
-		VMLINUX_SYMBOL(__hyp_text_start) = .;			\
+		__hyp_text_start = .;					\
 		*(.hyp.text)						\
-		VMLINUX_SYMBOL(__hyp_text_end) = .;
+		__hyp_text_end = .;
 
 #define IDMAP_TEXT							\
 		ALIGN_FUNCTION();					\
-		VMLINUX_SYMBOL(__idmap_text_start) = .;			\
+		__idmap_text_start = .;					\
 		*(.idmap.text)						\
-		VMLINUX_SYMBOL(__idmap_text_end) = .;			\
+		__idmap_text_end = .;					\
 		. = ALIGN(PAGE_SIZE);					\
-		VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;		\
+		__hyp_idmap_text_start = .;				\
 		*(.hyp.idmap.text)					\
-		VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
+		__hyp_idmap_text_end = .;
 
 #define ARM_DISCARD							\
 		*(.ARM.exidx.exit.text)					\
diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S
index 95a2fae..aa3f9a9 100644
--- a/arch/arm/kvm/hyp/hyp-entry.S
+++ b/arch/arm/kvm/hyp/hyp-entry.S
@@ -16,6 +16,7 @@
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
+#include <linux/arm-smccc.h>
 #include <linux/linkage.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
@@ -71,6 +72,90 @@
 	W(b)	hyp_irq
 	W(b)	hyp_fiq
 
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	.align 5
+__kvm_hyp_vector_ic_inv:
+	.global __kvm_hyp_vector_ic_inv
+
+	/*
+	 * We encode the exception entry in the bottom 3 bits of
+	 * SP, and we have to guarantee to be 8 bytes aligned.
+	 */
+	W(add)	sp, sp, #1	/* Reset 	  7 */
+	W(add)	sp, sp, #1	/* Undef	  6 */
+	W(add)	sp, sp, #1	/* Syscall	  5 */
+	W(add)	sp, sp, #1	/* Prefetch abort 4 */
+	W(add)	sp, sp, #1	/* Data abort	  3 */
+	W(add)	sp, sp, #1	/* HVC		  2 */
+	W(add)	sp, sp, #1	/* IRQ		  1 */
+	W(nop)			/* FIQ		  0 */
+
+	mcr	p15, 0, r0, c7, c5, 0	/* ICIALLU */
+	isb
+
+	b	decode_vectors
+
+	.align 5
+__kvm_hyp_vector_bp_inv:
+	.global __kvm_hyp_vector_bp_inv
+
+	/*
+	 * We encode the exception entry in the bottom 3 bits of
+	 * SP, and we have to guarantee to be 8 bytes aligned.
+	 */
+	W(add)	sp, sp, #1	/* Reset 	  7 */
+	W(add)	sp, sp, #1	/* Undef	  6 */
+	W(add)	sp, sp, #1	/* Syscall	  5 */
+	W(add)	sp, sp, #1	/* Prefetch abort 4 */
+	W(add)	sp, sp, #1	/* Data abort	  3 */
+	W(add)	sp, sp, #1	/* HVC		  2 */
+	W(add)	sp, sp, #1	/* IRQ		  1 */
+	W(nop)			/* FIQ		  0 */
+
+	mcr	p15, 0, r0, c7, c5, 6	/* BPIALL */
+	isb
+
+decode_vectors:
+
+#ifdef CONFIG_THUMB2_KERNEL
+	/*
+	 * Yet another silly hack: Use VPIDR as a temp register.
+	 * Thumb2 is really a pain, as SP cannot be used with most
+	 * of the bitwise instructions. The vect_br macro ensures
+	 * things gets cleaned-up.
+	 */
+	mcr	p15, 4, r0, c0, c0, 0	/* VPIDR */
+	mov	r0, sp
+	and	r0, r0, #7
+	sub	sp, sp, r0
+	push	{r1, r2}
+	mov	r1, r0
+	mrc	p15, 4, r0, c0, c0, 0	/* VPIDR */
+	mrc	p15, 0, r2, c0, c0, 0	/* MIDR  */
+	mcr	p15, 4, r2, c0, c0, 0	/* VPIDR */
+#endif
+
+.macro vect_br val, targ
+ARM(	eor	sp, sp, #\val	)
+ARM(	tst	sp, #7		)
+ARM(	eorne	sp, sp, #\val	)
+
+THUMB(	cmp	r1, #\val	)
+THUMB(	popeq	{r1, r2}	)
+
+	beq	\targ
+.endm
+
+	vect_br	0, hyp_fiq
+	vect_br	1, hyp_irq
+	vect_br	2, hyp_hvc
+	vect_br	3, hyp_dabt
+	vect_br	4, hyp_pabt
+	vect_br	5, hyp_svc
+	vect_br	6, hyp_undef
+	vect_br	7, hyp_reset
+#endif
+
 .macro invalid_vector label, cause
 	.align
 \label:	mov	r0, #\cause
@@ -118,7 +203,7 @@
 	lsr     r2, r2, #16
 	and     r2, r2, #0xff
 	cmp     r2, #0
-	bne	guest_trap		@ Guest called HVC
+	bne	guest_hvc_trap		@ Guest called HVC
 
 	/*
 	 * Getting here means host called HVC, we shift parameters and branch
@@ -149,7 +234,14 @@
 	bx	ip
 
 1:
-	push	{lr}
+	/*
+	 * Pushing r2 here is just a way of keeping the stack aligned to
+	 * 8 bytes on any path that can trigger a HYP exception. Here,
+	 * we may well be about to jump into the guest, and the guest
+	 * exit would otherwise be badly decoded by our fancy
+	 * "decode-exception-without-a-branch" code...
+	 */
+	push	{r2, lr}
 
 	mov	lr, r0
 	mov	r0, r1
@@ -159,7 +251,21 @@
 THUMB(	orr	lr, #1)
 	blx	lr			@ Call the HYP function
 
-	pop	{lr}
+	pop	{r2, lr}
+	eret
+
+guest_hvc_trap:
+	movw	r2, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
+	movt	r2, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
+	ldr	r0, [sp]		@ Guest's r0
+	teq	r0, r2
+	bne	guest_trap
+	add	sp, sp, #12
+	@ Returns:
+	@ r0 = 0
+	@ r1 = HSR value (perfectly predictable)
+	@ r2 = ARM_SMCCC_ARCH_WORKAROUND_1
+	mov	r0, #0
 	eret
 
 guest_trap:
diff --git a/arch/arm/mach-axxia/Kconfig b/arch/arm/mach-axxia/Kconfig
index bb2ce1c..d3eae60 100644
--- a/arch/arm/mach-axxia/Kconfig
+++ b/arch/arm/mach-axxia/Kconfig
@@ -2,7 +2,6 @@
 config ARCH_AXXIA
 	bool "LSI Axxia platforms"
 	depends on ARCH_MULTI_V7 && ARM_LPAE
-	select ARCH_DMA_ADDR_T_64BIT
 	select ARM_AMBA
 	select ARM_GIC
 	select ARM_TIMER_SP804
diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index c2f3b0d..c46a728 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -211,7 +211,6 @@
 	select BRCMSTB_L2_IRQ
 	select BCM7120_L2_IRQ
 	select ARCH_HAS_HOLES_MEMORYMODEL
-	select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
 	select ZONE_DMA if ARM_LPAE
 	select SOC_BRCMSTB
 	select SOC_BUS
diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c
index 0581ffb..faf48a3 100644
--- a/arch/arm/mach-ep93xx/core.c
+++ b/arch/arm/mach-ep93xx/core.c
@@ -635,6 +635,7 @@
  *************************************************************************/
 static struct resource ep93xx_i2s_resource[] = {
 	DEFINE_RES_MEM(EP93XX_I2S_PHYS_BASE, 0x100),
+	DEFINE_RES_IRQ(IRQ_EP93XX_SAI),
 };
 
 static struct platform_device ep93xx_i2s_device = {
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 647c319..2ca4058 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -112,7 +112,6 @@
 	bool "SAMSUNG EXYNOS5440"
 	default y
 	depends on ARCH_EXYNOS5
-	select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
 	select HAVE_ARM_ARCH_TIMER
 	select AUTO_ZRELADDR
 	select PINCTRL_EXYNOS5440
diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig
index 81110ec..5552968 100644
--- a/arch/arm/mach-highbank/Kconfig
+++ b/arch/arm/mach-highbank/Kconfig
@@ -1,7 +1,6 @@
 config ARCH_HIGHBANK
 	bool "Calxeda ECX-1000/2000 (Highbank/Midway)"
 	depends on ARCH_MULTI_V7
-	select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
 	select ARCH_HAS_HOLES_MEMORYMODEL
 	select ARCH_SUPPORTS_BIG_ENDIAN
 	select ARM_AMBA
diff --git a/arch/arm/mach-rockchip/Kconfig b/arch/arm/mach-rockchip/Kconfig
index a406596..fafd3d7 100644
--- a/arch/arm/mach-rockchip/Kconfig
+++ b/arch/arm/mach-rockchip/Kconfig
@@ -3,7 +3,6 @@
 	depends on ARCH_MULTI_V7
 	select PINCTRL
 	select PINCTRL_ROCKCHIP
-	select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
 	select ARCH_HAS_RESET_CONTROLLER
 	select ARM_AMBA
 	select ARM_GIC
diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c
index bdb5ec1..39aef48 100644
--- a/arch/arm/mach-rpc/ecard.c
+++ b/arch/arm/mach-rpc/ecard.c
@@ -657,25 +657,13 @@
 	return 0;
 }
 
-static int ecard_devices_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ecard_devices_proc_show, NULL);
-}
-
-static const struct file_operations bus_ecard_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ecard_devices_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static struct proc_dir_entry *proc_bus_ecard_dir = NULL;
 
 static void ecard_proc_init(void)
 {
 	proc_bus_ecard_dir = proc_mkdir("bus/ecard", NULL);
-	proc_create("devices", 0, proc_bus_ecard_dir, &bus_ecard_proc_fops);
+	proc_create_single("devices", 0, proc_bus_ecard_dir,
+			ecard_devices_proc_show);
 }
 
 #define ec_set_resource(ec,nr,st,sz)				\
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index 280e731..fe60cd09 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -29,7 +29,6 @@
 menuconfig ARCH_RENESAS
 	bool "Renesas ARM SoCs"
 	depends on ARCH_MULTI_V7 && MMU
-	select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
 	select ARCH_SHMOBILE
 	select ARM_GIC
 	select GPIOLIB
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index 1e0aeb4..7f3b83e 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -15,6 +15,5 @@
 	select RESET_CONTROLLER
 	select SOC_BUS
 	select ZONE_DMA if ARM_LPAE
-	select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
 	help
 	  This enables support for NVIDIA Tegra based systems.
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 7f14acf..96a7b6c 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -415,6 +415,7 @@
 	select CPU_CP15_MPU if !MMU
 	select CPU_HAS_ASID if MMU
 	select CPU_PABRT_V7
+	select CPU_SPECTRE if MMU
 	select CPU_THUMB_CAPABLE
 	select CPU_TLB_V7 if MMU
 
@@ -661,6 +662,7 @@
 	bool "Support for the Large Physical Address Extension"
 	depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \
 		!CPU_32v4 && !CPU_32v3
+	select PHYS_ADDR_T_64BIT
 	help
 	  Say Y if you have an ARMv7 processor supporting the LPAE page
 	  table format and you would like to access memory beyond the
@@ -673,12 +675,6 @@
 	def_bool y
 	depends on ARM_LPAE && ARM_PATCH_PHYS_VIRT && ARCH_KEYSTONE
 
-config ARCH_PHYS_ADDR_T_64BIT
-	def_bool ARM_LPAE
-
-config ARCH_DMA_ADDR_T_64BIT
-	bool
-
 config ARM_THUMB
 	bool "Support Thumb user binaries" if !CPU_THUMBONLY && EXPERT
 	depends on CPU_THUMB_CAPABLE
@@ -826,6 +822,28 @@
 	help
 	  Say Y here to disable branch prediction.  If unsure, say N.
 
+config CPU_SPECTRE
+	bool
+
+config HARDEN_BRANCH_PREDICTOR
+	bool "Harden the branch predictor against aliasing attacks" if EXPERT
+	depends on CPU_SPECTRE
+	default y
+	help
+	   Speculation attacks against some high-performance processors rely
+	   on being able to manipulate the branch predictor for a victim
+	   context by executing aliasing branches in the attacker context.
+	   Such attacks can be partially mitigated against by clearing
+	   internal branch predictor state and limiting the prediction
+	   logic in some situations.
+
+	   This config option will take CPU-specific actions to harden
+	   the branch predictor against aliasing attacks and may rely on
+	   specific instruction sequences or control bits being set by
+	   the system firmware.
+
+	   If unsure, say Y.
+
 config TLS_REG_EMUL
 	bool
 	select NEED_KUSER_HELPERS
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 9dbb849..7cb1699 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -10,7 +10,7 @@
 
 ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
-obj-$(CONFIG_ARM_MPU)		+= pmsa-v7.o
+obj-$(CONFIG_ARM_MPU)		+= pmsa-v7.o pmsa-v8.o
 endif
 
 obj-$(CONFIG_ARM_PTDUMP_CORE)	+= dump.o
@@ -97,7 +97,7 @@
 obj-$(CONFIG_CPU_FEROCEON)	+= proc-feroceon.o
 obj-$(CONFIG_CPU_V6)		+= proc-v6.o
 obj-$(CONFIG_CPU_V6K)		+= proc-v6.o
-obj-$(CONFIG_CPU_V7)		+= proc-v7.o
+obj-$(CONFIG_CPU_V7)		+= proc-v7.o proc-v7-bugs.o
 obj-$(CONFIG_CPU_V7M)		+= proc-v7m.o
 
 AFLAGS_proc-v6.o	:=-Wa,-march=armv6
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 2c96190..bd2c739 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -950,6 +950,7 @@
 	if (ai_usermode & UM_SIGNAL) {
 		siginfo_t si;
 
+		clear_siginfo(&si);
 		si.si_signo = SIGBUS;
 		si.si_errno = 0;
 		si.si_code = BUS_ADRALN;
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index 619f24a..f448a06 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -241,12 +241,3 @@
 void arch_teardown_dma_ops(struct device *dev)
 {
 }
-
-#define PREALLOC_DMA_DEBUG_ENTRIES	4096
-
-static int __init dma_debug_do_init(void)
-{
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-	return 0;
-}
-core_initcall(dma_debug_do_init);
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ada8eb2..af27f1c 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -831,7 +831,7 @@
 		 unsigned long attrs)
 {
 	int ret;
-	unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long nr_vma_pages = vma_pages(vma);
 	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	unsigned long pfn = dma_to_pfn(dev, dma_addr);
 	unsigned long off = vma->vm_pgoff;
@@ -1151,15 +1151,6 @@
 	return __dma_supported(dev, mask, false);
 }
 
-#define PREALLOC_DMA_DEBUG_ENTRIES	4096
-
-static int __init dma_debug_do_init(void)
-{
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-	return 0;
-}
-core_initcall(dma_debug_do_init);
-
 #ifdef CONFIG_ARM_DMA_USE_IOMMU
 
 static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index b75eada..84becc9 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -163,6 +163,11 @@
 {
 	struct siginfo si;
 
+	if (addr > TASK_SIZE)
+		harden_branch_predictor();
+
+	clear_siginfo(&si);
+
 #ifdef CONFIG_DEBUG_USER
 	if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
 	    ((user_debug & UDBG_BUS)  && (sig == SIGBUS))) {
@@ -557,6 +562,7 @@
 		inf->name, fsr, addr);
 	show_pte(current->mm, addr);
 
+	clear_siginfo(&info);
 	info.si_signo = inf->sig;
 	info.si_errno = 0;
 	info.si_code  = inf->code;
@@ -589,6 +595,7 @@
 	pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
 		inf->name, ifsr, addr);
 
+	clear_siginfo(&info);
 	info.si_signo = inf->sig;
 	info.si_errno = 0;
 	info.si_code  = inf->code;
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 7c08796..5dd6c58 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -99,6 +99,38 @@
 	memblock_reserve(0, 1);
 }
 
+static void __init adjust_lowmem_bounds_mpu(void)
+{
+	unsigned long pmsa = read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA;
+
+	switch (pmsa) {
+	case MMFR0_PMSAv7:
+		pmsav7_adjust_lowmem_bounds();
+		break;
+	case MMFR0_PMSAv8:
+		pmsav8_adjust_lowmem_bounds();
+		break;
+	default:
+		break;
+	}
+}
+
+static void __init mpu_setup(void)
+{
+	unsigned long pmsa = read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA;
+
+	switch (pmsa) {
+	case MMFR0_PMSAv7:
+		pmsav7_setup();
+		break;
+	case MMFR0_PMSAv8:
+		pmsav8_setup();
+		break;
+	default:
+		break;
+	}
+}
+
 void __init adjust_lowmem_bounds(void)
 {
 	phys_addr_t end;
diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c
index e2853bf..699fa2e 100644
--- a/arch/arm/mm/pmsa-v7.c
+++ b/arch/arm/mm/pmsa-v7.c
@@ -102,7 +102,7 @@
 
 static inline void rgnr_write(u32 v)
 {
-	writel_relaxed(v, BASEADDR_V7M_SCB + MPU_RNR);
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv7_RNR);
 }
 
 /* Data-side / unified region attributes */
@@ -110,28 +110,28 @@
 /* Region access control register */
 static inline void dracr_write(u32 v)
 {
-	u32 rsr = readl_relaxed(BASEADDR_V7M_SCB + MPU_RASR) & GENMASK(15, 0);
+	u32 rsr = readl_relaxed(BASEADDR_V7M_SCB + PMSAv7_RASR) & GENMASK(15, 0);
 
-	writel_relaxed((v << 16) | rsr, BASEADDR_V7M_SCB + MPU_RASR);
+	writel_relaxed((v << 16) | rsr, BASEADDR_V7M_SCB + PMSAv7_RASR);
 }
 
 /* Region size register */
 static inline void drsr_write(u32 v)
 {
-	u32 racr = readl_relaxed(BASEADDR_V7M_SCB + MPU_RASR) & GENMASK(31, 16);
+	u32 racr = readl_relaxed(BASEADDR_V7M_SCB + PMSAv7_RASR) & GENMASK(31, 16);
 
-	writel_relaxed(v | racr, BASEADDR_V7M_SCB + MPU_RASR);
+	writel_relaxed(v | racr, BASEADDR_V7M_SCB + PMSAv7_RASR);
 }
 
 /* Region base address register */
 static inline void drbar_write(u32 v)
 {
-	writel_relaxed(v, BASEADDR_V7M_SCB + MPU_RBAR);
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv7_RBAR);
 }
 
 static inline u32 drbar_read(void)
 {
-	return readl_relaxed(BASEADDR_V7M_SCB + MPU_RBAR);
+	return readl_relaxed(BASEADDR_V7M_SCB + PMSAv7_RBAR);
 }
 
 /* ARMv7-M only supports a unified MPU, so I-side operations are nop */
@@ -143,11 +143,6 @@
 
 #endif
 
-static int __init mpu_present(void)
-{
-	return ((read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA) == MMFR0_PMSAv7);
-}
-
 static bool __init try_split_region(phys_addr_t base, phys_addr_t size, struct region *region)
 {
 	unsigned long  subreg, bslots, sslots;
@@ -161,7 +156,7 @@
 
 	bdiff = base - abase;
 	sdiff = p2size - asize;
-	subreg = p2size / MPU_NR_SUBREGS;
+	subreg = p2size / PMSAv7_NR_SUBREGS;
 
 	if ((bdiff % subreg) || (sdiff % subreg))
 		return false;
@@ -172,17 +167,17 @@
 	if (bslots || sslots) {
 		int i;
 
-		if (subreg < MPU_MIN_SUBREG_SIZE)
+		if (subreg < PMSAv7_MIN_SUBREG_SIZE)
 			return false;
 
-		if (bslots + sslots > MPU_NR_SUBREGS)
+		if (bslots + sslots > PMSAv7_NR_SUBREGS)
 			return false;
 
 		for (i = 0; i < bslots; i++)
 			_set_bit(i, &region->subreg);
 
 		for (i = 1; i <= sslots; i++)
-			_set_bit(MPU_NR_SUBREGS - i, &region->subreg);
+			_set_bit(PMSAv7_NR_SUBREGS - i, &region->subreg);
 	}
 
 	region->base = abase;
@@ -233,7 +228,7 @@
 }
 
 /* MPU initialisation functions */
-void __init adjust_lowmem_bounds_mpu(void)
+void __init pmsav7_adjust_lowmem_bounds(void)
 {
 	phys_addr_t  specified_mem_size = 0, total_mem_size = 0;
 	struct memblock_region *reg;
@@ -243,10 +238,7 @@
 	unsigned int mem_max_regions;
 	int num, i;
 
-	if (!mpu_present())
-		return;
-
-	/* Free-up MPU_PROBE_REGION */
+	/* Free-up PMSAv7_PROBE_REGION */
 	mpu_min_region_order = __mpu_min_region_order();
 
 	/* How many regions are supported */
@@ -301,12 +293,12 @@
 	num = allocate_region(mem_start, specified_mem_size, mem_max_regions, mem);
 
 	for (i = 0; i < num; i++) {
-		unsigned long  subreg = mem[i].size / MPU_NR_SUBREGS;
+		unsigned long  subreg = mem[i].size / PMSAv7_NR_SUBREGS;
 
 		total_mem_size += mem[i].size - subreg * hweight_long(mem[i].subreg);
 
 		pr_debug("MPU: base %pa size %pa disable subregions: %*pbl\n",
-			 &mem[i].base, &mem[i].size, MPU_NR_SUBREGS, &mem[i].subreg);
+			 &mem[i].base, &mem[i].size, PMSAv7_NR_SUBREGS, &mem[i].subreg);
 	}
 
 	if (total_mem_size != specified_mem_size) {
@@ -349,7 +341,7 @@
 	u32 drbar_result, irbar_result;
 
 	/* We've kept a region free for this probing */
-	rgnr_write(MPU_PROBE_REGION);
+	rgnr_write(PMSAv7_PROBE_REGION);
 	isb();
 	/*
 	 * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum
@@ -388,8 +380,8 @@
 		return -ENOMEM;
 
 	/* Writing N to bits 5:1 (RSR_SZ)  specifies region size 2^N+1 */
-	size_data = ((size_order - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN;
-	size_data |= subregions << MPU_RSR_SD;
+	size_data = ((size_order - 1) << PMSAv7_RSR_SZ) | 1 << PMSAv7_RSR_EN;
+	size_data |= subregions << PMSAv7_RSR_SD;
 
 	if (need_flush)
 		flush_cache_all();
@@ -424,18 +416,15 @@
 /*
 * Set up default MPU regions, doing nothing if there is no MPU
 */
-void __init mpu_setup(void)
+void __init pmsav7_setup(void)
 {
 	int i, region = 0, err = 0;
 
-	if (!mpu_present())
-		return;
-
 	/* Setup MPU (order is important) */
 
 	/* Background */
 	err |= mpu_setup_region(region++, 0, 32,
-				MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0RW,
+				PMSAv7_ACR_XN | PMSAv7_RGN_STRONGLY_ORDERED | PMSAv7_AP_PL1RW_PL0RW,
 				0, false);
 
 #ifdef CONFIG_XIP_KERNEL
@@ -448,13 +437,13 @@
                  * with BG region (which is uncachable), thus we need
                  * to clean and invalidate cache.
 		 */
-		bool need_flush = region == MPU_RAM_REGION;
+		bool need_flush = region == PMSAv7_RAM_REGION;
 
 		if (!xip[i].size)
 			continue;
 
 		err |= mpu_setup_region(region++, xip[i].base, ilog2(xip[i].size),
-					MPU_AP_PL1RO_PL0NA | MPU_RGN_NORMAL,
+					PMSAv7_AP_PL1RO_PL0NA | PMSAv7_RGN_NORMAL,
 					xip[i].subreg, need_flush);
 	}
 #endif
@@ -465,14 +454,14 @@
 			continue;
 
 		err |= mpu_setup_region(region++, mem[i].base, ilog2(mem[i].size),
-					MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL,
+					PMSAv7_AP_PL1RW_PL0RW | PMSAv7_RGN_NORMAL,
 					mem[i].subreg, false);
 	}
 
 	/* Vectors */
 #ifndef CONFIG_CPU_V7M
 	err |= mpu_setup_region(region++, vectors_base, ilog2(2 * PAGE_SIZE),
-				MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL,
+				PMSAv7_AP_PL1RW_PL0NA | PMSAv7_RGN_NORMAL,
 				0, false);
 #endif
 	if (err) {
diff --git a/arch/arm/mm/pmsa-v8.c b/arch/arm/mm/pmsa-v8.c
new file mode 100644
index 0000000..617a83d
--- /dev/null
+++ b/arch/arm/mm/pmsa-v8.c
@@ -0,0 +1,307 @@
+/*
+ * Based on linux/arch/arm/pmsa-v7.c
+ *
+ * ARM PMSAv8 supporting functions.
+ */
+
+#include <linux/memblock.h>
+#include <linux/range.h>
+
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/mpu.h>
+
+#include <asm/memory.h>
+#include <asm/sections.h>
+
+#include "mm.h"
+
+#ifndef CONFIG_CPU_V7M
+
+#define PRSEL	__ACCESS_CP15(c6, 0, c2, 1)
+#define PRBAR	__ACCESS_CP15(c6, 0, c3, 0)
+#define PRLAR	__ACCESS_CP15(c6, 0, c3, 1)
+
+static inline u32 prlar_read(void)
+{
+	return read_sysreg(PRLAR);
+}
+
+static inline u32 prbar_read(void)
+{
+	return read_sysreg(PRBAR);
+}
+
+static inline void prsel_write(u32 v)
+{
+	write_sysreg(v, PRSEL);
+}
+
+static inline void prbar_write(u32 v)
+{
+	write_sysreg(v, PRBAR);
+}
+
+static inline void prlar_write(u32 v)
+{
+	write_sysreg(v, PRLAR);
+}
+#else
+
+static inline u32 prlar_read(void)
+{
+	return readl_relaxed(BASEADDR_V7M_SCB + PMSAv8_RLAR);
+}
+
+static inline u32 prbar_read(void)
+{
+	return readl_relaxed(BASEADDR_V7M_SCB + PMSAv8_RBAR);
+}
+
+static inline void prsel_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv8_RNR);
+}
+
+static inline void prbar_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv8_RBAR);
+}
+
+static inline void prlar_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv8_RLAR);
+}
+
+#endif
+
+static struct range __initdata io[MPU_MAX_REGIONS];
+static struct range __initdata mem[MPU_MAX_REGIONS];
+
+static unsigned int __initdata mpu_max_regions;
+
+static __init bool is_region_fixed(int number)
+{
+	switch (number) {
+	case PMSAv8_XIP_REGION:
+	case PMSAv8_KERNEL_REGION:
+		return true;
+	default:
+		return false;
+	}
+}
+
+void __init pmsav8_adjust_lowmem_bounds(void)
+{
+	phys_addr_t mem_end;
+	struct memblock_region *reg;
+	bool first = true;
+
+	for_each_memblock(memory, reg) {
+		if (first) {
+			phys_addr_t phys_offset = PHYS_OFFSET;
+
+			/*
+			 * Initially only use memory continuous from
+			 * PHYS_OFFSET */
+			if (reg->base != phys_offset)
+				panic("First memory bank must be contiguous from PHYS_OFFSET");
+			mem_end = reg->base + reg->size;
+			first = false;
+		} else {
+			/*
+			 * memblock auto merges contiguous blocks, remove
+			 * all blocks afterwards in one go (we can't remove
+			 * blocks separately while iterating)
+			 */
+			pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
+				  &mem_end, &reg->base);
+			memblock_remove(reg->base, 0 - reg->base);
+			break;
+		}
+	}
+}
+
+static int __init __mpu_max_regions(void)
+{
+	static int max_regions;
+	u32 mpuir;
+
+	if (max_regions)
+		return max_regions;
+
+	mpuir = read_cpuid_mputype();
+
+	max_regions  = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION;
+
+	return max_regions;
+}
+
+static int __init __pmsav8_setup_region(unsigned int number, u32 bar, u32 lar)
+{
+	if (number > mpu_max_regions
+	    || number >= MPU_MAX_REGIONS)
+		return -ENOENT;
+
+	dsb();
+	prsel_write(number);
+	isb();
+	prbar_write(bar);
+	prlar_write(lar);
+
+	mpu_rgn_info.rgns[number].prbar = bar;
+	mpu_rgn_info.rgns[number].prlar = lar;
+
+	mpu_rgn_info.used++;
+
+	return 0;
+}
+
+static int __init pmsav8_setup_ram(unsigned int number, phys_addr_t start,phys_addr_t end)
+{
+	u32 bar, lar;
+
+	if (is_region_fixed(number))
+		return -EINVAL;
+
+	bar = start;
+	lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);;
+
+	bar |= PMSAv8_AP_PL1RW_PL0RW | PMSAv8_RGN_SHARED;
+	lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN;
+
+	return __pmsav8_setup_region(number, bar, lar);
+}
+
+static int __init pmsav8_setup_io(unsigned int number, phys_addr_t start,phys_addr_t end)
+{
+	u32 bar, lar;
+
+	if (is_region_fixed(number))
+		return -EINVAL;
+
+	bar = start;
+	lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);;
+
+	bar |= PMSAv8_AP_PL1RW_PL0RW | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN;
+	lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN;
+
+	return __pmsav8_setup_region(number, bar, lar);
+}
+
+static int __init pmsav8_setup_fixed(unsigned int number, phys_addr_t start,phys_addr_t end)
+{
+	u32 bar, lar;
+
+	if (!is_region_fixed(number))
+		return -EINVAL;
+
+	bar = start;
+	lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);
+
+	bar |= PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED;
+	lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN;
+
+	prsel_write(number);
+	isb();
+
+	if (prbar_read() != bar || prlar_read() != lar)
+		return -EINVAL;
+
+	/* Reserved region was set up early, we just need a record for secondaries */
+	mpu_rgn_info.rgns[number].prbar = bar;
+	mpu_rgn_info.rgns[number].prlar = lar;
+
+	mpu_rgn_info.used++;
+
+	return 0;
+}
+
+#ifndef CONFIG_CPU_V7M
+static int __init pmsav8_setup_vector(unsigned int number, phys_addr_t start,phys_addr_t end)
+{
+	u32 bar, lar;
+
+	if (number == PMSAv8_KERNEL_REGION)
+		return -EINVAL;
+
+	bar = start;
+	lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);
+
+	bar |= PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED;
+	lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN;
+
+	return __pmsav8_setup_region(number, bar, lar);
+}
+#endif
+
+void __init pmsav8_setup(void)
+{
+	int i, err = 0;
+	int region = PMSAv8_KERNEL_REGION;
+
+	/* How many regions are supported ? */
+	mpu_max_regions = __mpu_max_regions();
+
+	/* RAM: single chunk of memory */
+	add_range(mem,  ARRAY_SIZE(mem), 0,  memblock.memory.regions[0].base,
+		  memblock.memory.regions[0].base + memblock.memory.regions[0].size);
+
+	/* IO: cover full 4G range */
+	add_range(io, ARRAY_SIZE(io), 0, 0, 0xffffffff);
+
+	/* RAM and IO: exclude kernel */
+	subtract_range(mem, ARRAY_SIZE(mem), __pa(KERNEL_START), __pa(KERNEL_END));
+	subtract_range(io, ARRAY_SIZE(io),  __pa(KERNEL_START), __pa(KERNEL_END));
+
+#ifdef CONFIG_XIP_KERNEL
+	/* RAM and IO: exclude xip */
+	subtract_range(mem, ARRAY_SIZE(mem), CONFIG_XIP_PHYS_ADDR, __pa(_exiprom));
+	subtract_range(io, ARRAY_SIZE(io), CONFIG_XIP_PHYS_ADDR, __pa(_exiprom));
+#endif
+
+#ifndef CONFIG_CPU_V7M
+	/* RAM and IO: exclude vectors */
+	subtract_range(mem, ARRAY_SIZE(mem),  vectors_base, vectors_base + 2 * PAGE_SIZE);
+	subtract_range(io, ARRAY_SIZE(io),  vectors_base, vectors_base + 2 * PAGE_SIZE);
+#endif
+	/* IO: exclude RAM */
+	for (i = 0; i < ARRAY_SIZE(mem); i++)
+		subtract_range(io, ARRAY_SIZE(io), mem[i].start, mem[i].end);
+
+	/* Now program MPU */
+
+#ifdef CONFIG_XIP_KERNEL
+	/* ROM */
+	err |= pmsav8_setup_fixed(PMSAv8_XIP_REGION, CONFIG_XIP_PHYS_ADDR, __pa(_exiprom));
+#endif
+	/* Kernel */
+	err |= pmsav8_setup_fixed(region++, __pa(KERNEL_START), __pa(KERNEL_END));
+
+
+	/* IO */
+	for (i = 0; i < ARRAY_SIZE(io); i++) {
+		if (!io[i].end)
+			continue;
+
+		err |= pmsav8_setup_io(region++, io[i].start, io[i].end);
+	}
+
+	/* RAM */
+	for (i = 0; i < ARRAY_SIZE(mem); i++) {
+		if (!mem[i].end)
+			continue;
+
+		err |= pmsav8_setup_ram(region++, mem[i].start, mem[i].end);
+	}
+
+	/* Vectors */
+#ifndef CONFIG_CPU_V7M
+	err |= pmsav8_setup_vector(region++, vectors_base, vectors_base + 2 * PAGE_SIZE);
+#endif
+	if (err)
+		pr_warn("MPU region initialization failure! %d", err);
+	else
+		pr_info("Using ARM PMSAv8 Compliant MPU. Used %d of %d regions\n",
+			mpu_rgn_info.used, mpu_max_regions);
+}
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index f10e31d..81d0efb 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -273,13 +273,14 @@
 	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
 	.endm
 
-.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0
+.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0
 	.type	\name\()_processor_functions, #object
 	.align 2
 ENTRY(\name\()_processor_functions)
 	.word	\dabort
 	.word	\pabort
 	.word	cpu_\name\()_proc_init
+	.word	\bugs
 	.word	cpu_\name\()_proc_fin
 	.word	cpu_\name\()_reset
 	.word	cpu_\name\()_do_idle
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index c6141a5..f8d45ad 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -41,11 +41,6 @@
  *	even on Cortex-A8 revisions not affected by 430973.
  *	If IBE is not set, the flush BTAC/BTB won't do anything.
  */
-ENTRY(cpu_ca8_switch_mm)
-#ifdef CONFIG_MMU
-	mov	r2, #0
-	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
-#endif
 ENTRY(cpu_v7_switch_mm)
 #ifdef CONFIG_MMU
 	mmid	r1, r1				@ get mm->context.id
@@ -66,7 +61,6 @@
 #endif
 	bx	lr
 ENDPROC(cpu_v7_switch_mm)
-ENDPROC(cpu_ca8_switch_mm)
 
 /*
  *	cpu_v7_set_pte_ext(ptep, pte)
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
new file mode 100644
index 0000000..5544b82
--- /dev/null
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/arm-smccc.h>
+#include <linux/kernel.h>
+#include <linux/psci.h>
+#include <linux/smp.h>
+
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/proc-fns.h>
+#include <asm/system_misc.h>
+
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn);
+
+extern void cpu_v7_iciallu_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+extern void cpu_v7_bpiall_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+extern void cpu_v7_smc_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+extern void cpu_v7_hvc_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+
+static void harden_branch_predictor_bpiall(void)
+{
+	write_sysreg(0, BPIALL);
+}
+
+static void harden_branch_predictor_iciallu(void)
+{
+	write_sysreg(0, ICIALLU);
+}
+
+static void __maybe_unused call_smc_arch_workaround_1(void)
+{
+	arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+static void __maybe_unused call_hvc_arch_workaround_1(void)
+{
+	arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+static void cpu_v7_spectre_init(void)
+{
+	const char *spectre_v2_method = NULL;
+	int cpu = smp_processor_id();
+
+	if (per_cpu(harden_branch_predictor_fn, cpu))
+		return;
+
+	switch (read_cpuid_part()) {
+	case ARM_CPU_PART_CORTEX_A8:
+	case ARM_CPU_PART_CORTEX_A9:
+	case ARM_CPU_PART_CORTEX_A12:
+	case ARM_CPU_PART_CORTEX_A17:
+	case ARM_CPU_PART_CORTEX_A73:
+	case ARM_CPU_PART_CORTEX_A75:
+		if (processor.switch_mm != cpu_v7_bpiall_switch_mm)
+			goto bl_error;
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			harden_branch_predictor_bpiall;
+		spectre_v2_method = "BPIALL";
+		break;
+
+	case ARM_CPU_PART_CORTEX_A15:
+	case ARM_CPU_PART_BRAHMA_B15:
+		if (processor.switch_mm != cpu_v7_iciallu_switch_mm)
+			goto bl_error;
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			harden_branch_predictor_iciallu;
+		spectre_v2_method = "ICIALLU";
+		break;
+
+#ifdef CONFIG_ARM_PSCI
+	default:
+		/* Other ARM CPUs require no workaround */
+		if (read_cpuid_implementor() == ARM_CPU_IMP_ARM)
+			break;
+		/* fallthrough */
+		/* Cortex A57/A72 require firmware workaround */
+	case ARM_CPU_PART_CORTEX_A57:
+	case ARM_CPU_PART_CORTEX_A72: {
+		struct arm_smccc_res res;
+
+		if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
+			break;
+
+		switch (psci_ops.conduit) {
+		case PSCI_CONDUIT_HVC:
+			arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+			if ((int)res.a0 != 0)
+				break;
+			if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu)
+				goto bl_error;
+			per_cpu(harden_branch_predictor_fn, cpu) =
+				call_hvc_arch_workaround_1;
+			processor.switch_mm = cpu_v7_hvc_switch_mm;
+			spectre_v2_method = "hypervisor";
+			break;
+
+		case PSCI_CONDUIT_SMC:
+			arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+			if ((int)res.a0 != 0)
+				break;
+			if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu)
+				goto bl_error;
+			per_cpu(harden_branch_predictor_fn, cpu) =
+				call_smc_arch_workaround_1;
+			processor.switch_mm = cpu_v7_smc_switch_mm;
+			spectre_v2_method = "firmware";
+			break;
+
+		default:
+			break;
+		}
+	}
+#endif
+	}
+
+	if (spectre_v2_method)
+		pr_info("CPU%u: Spectre v2: using %s workaround\n",
+			smp_processor_id(), spectre_v2_method);
+	return;
+
+bl_error:
+	pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n",
+		cpu);
+}
+#else
+static void cpu_v7_spectre_init(void)
+{
+}
+#endif
+
+static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned,
+						  u32 mask, const char *msg)
+{
+	u32 aux_cr;
+
+	asm("mrc p15, 0, %0, c1, c0, 1" : "=r" (aux_cr));
+
+	if ((aux_cr & mask) != mask) {
+		if (!*warned)
+			pr_err("CPU%u: %s", smp_processor_id(), msg);
+		*warned = true;
+		return false;
+	}
+	return true;
+}
+
+static DEFINE_PER_CPU(bool, spectre_warned);
+
+static bool check_spectre_auxcr(bool *warned, u32 bit)
+{
+	return IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR) &&
+		cpu_v7_check_auxcr_set(warned, bit,
+				       "Spectre v2: firmware did not set auxiliary control register IBE bit, system vulnerable\n");
+}
+
+void cpu_v7_ca8_ibe(void)
+{
+	if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6)))
+		cpu_v7_spectre_init();
+}
+
+void cpu_v7_ca15_ibe(void)
+{
+	if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)))
+		cpu_v7_spectre_init();
+}
+
+void cpu_v7_bugs_init(void)
+{
+	cpu_v7_spectre_init();
+}
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index b528a15..6fe52819 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -9,6 +9,7 @@
  *
  *  This is the "shell" of the ARMv7 processor support.
  */
+#include <linux/arm-smccc.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <asm/assembler.h>
@@ -93,6 +94,37 @@
 	ret	lr
 ENDPROC(cpu_v7_dcache_clean_area)
 
+#ifdef CONFIG_ARM_PSCI
+	.arch_extension sec
+ENTRY(cpu_v7_smc_switch_mm)
+	stmfd	sp!, {r0 - r3}
+	movw	r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
+	movt	r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
+	smc	#0
+	ldmfd	sp!, {r0 - r3}
+	b	cpu_v7_switch_mm
+ENDPROC(cpu_v7_smc_switch_mm)
+	.arch_extension virt
+ENTRY(cpu_v7_hvc_switch_mm)
+	stmfd	sp!, {r0 - r3}
+	movw	r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
+	movt	r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
+	hvc	#0
+	ldmfd	sp!, {r0 - r3}
+	b	cpu_v7_switch_mm
+ENDPROC(cpu_v7_smc_switch_mm)
+#endif
+ENTRY(cpu_v7_iciallu_switch_mm)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c5, 0		@ ICIALLU
+	b	cpu_v7_switch_mm
+ENDPROC(cpu_v7_iciallu_switch_mm)
+ENTRY(cpu_v7_bpiall_switch_mm)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c5, 6		@ flush BTAC/BTB
+	b	cpu_v7_switch_mm
+ENDPROC(cpu_v7_bpiall_switch_mm)
+
 	string	cpu_v7_name, "ARMv7 Processor"
 	.align
 
@@ -158,31 +190,6 @@
 ENDPROC(cpu_v7_do_resume)
 #endif
 
-/*
- * Cortex-A8
- */
-	globl_equ	cpu_ca8_proc_init,	cpu_v7_proc_init
-	globl_equ	cpu_ca8_proc_fin,	cpu_v7_proc_fin
-	globl_equ	cpu_ca8_reset,		cpu_v7_reset
-	globl_equ	cpu_ca8_do_idle,	cpu_v7_do_idle
-	globl_equ	cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area
-	globl_equ	cpu_ca8_set_pte_ext,	cpu_v7_set_pte_ext
-	globl_equ	cpu_ca8_suspend_size,	cpu_v7_suspend_size
-#ifdef CONFIG_ARM_CPU_SUSPEND
-	globl_equ	cpu_ca8_do_suspend,	cpu_v7_do_suspend
-	globl_equ	cpu_ca8_do_resume,	cpu_v7_do_resume
-#endif
-
-/*
- * Cortex-A9 processor functions
- */
-	globl_equ	cpu_ca9mp_proc_init,	cpu_v7_proc_init
-	globl_equ	cpu_ca9mp_proc_fin,	cpu_v7_proc_fin
-	globl_equ	cpu_ca9mp_reset,	cpu_v7_reset
-	globl_equ	cpu_ca9mp_do_idle,	cpu_v7_do_idle
-	globl_equ	cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area
-	globl_equ	cpu_ca9mp_switch_mm,	cpu_v7_switch_mm
-	globl_equ	cpu_ca9mp_set_pte_ext,	cpu_v7_set_pte_ext
 .globl	cpu_ca9mp_suspend_size
 .equ	cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2
 #ifdef CONFIG_ARM_CPU_SUSPEND
@@ -547,12 +554,79 @@
 
 	__INITDATA
 
+	.weak cpu_v7_bugs_init
+
 	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
-	define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
-#ifndef CONFIG_ARM_LPAE
-	define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
-	define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
+	define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init
+
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	@ generic v7 bpiall on context switch
+	globl_equ	cpu_v7_bpiall_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_v7_bpiall_proc_fin,		cpu_v7_proc_fin
+	globl_equ	cpu_v7_bpiall_reset,		cpu_v7_reset
+	globl_equ	cpu_v7_bpiall_do_idle,		cpu_v7_do_idle
+	globl_equ	cpu_v7_bpiall_dcache_clean_area, cpu_v7_dcache_clean_area
+	globl_equ	cpu_v7_bpiall_set_pte_ext,	cpu_v7_set_pte_ext
+	globl_equ	cpu_v7_bpiall_suspend_size,	cpu_v7_suspend_size
+#ifdef CONFIG_ARM_CPU_SUSPEND
+	globl_equ	cpu_v7_bpiall_do_suspend,	cpu_v7_do_suspend
+	globl_equ	cpu_v7_bpiall_do_resume,	cpu_v7_do_resume
 #endif
+	define_processor_functions v7_bpiall, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init
+
+#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_bpiall_processor_functions
+#else
+#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_processor_functions
+#endif
+
+#ifndef CONFIG_ARM_LPAE
+	@ Cortex-A8 - always needs bpiall switch_mm implementation
+	globl_equ	cpu_ca8_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_ca8_proc_fin,	cpu_v7_proc_fin
+	globl_equ	cpu_ca8_reset,		cpu_v7_reset
+	globl_equ	cpu_ca8_do_idle,	cpu_v7_do_idle
+	globl_equ	cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area
+	globl_equ	cpu_ca8_set_pte_ext,	cpu_v7_set_pte_ext
+	globl_equ	cpu_ca8_switch_mm,	cpu_v7_bpiall_switch_mm
+	globl_equ	cpu_ca8_suspend_size,	cpu_v7_suspend_size
+#ifdef CONFIG_ARM_CPU_SUSPEND
+	globl_equ	cpu_ca8_do_suspend,	cpu_v7_do_suspend
+	globl_equ	cpu_ca8_do_resume,	cpu_v7_do_resume
+#endif
+	define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca8_ibe
+
+	@ Cortex-A9 - needs more registers preserved across suspend/resume
+	@ and bpiall switch_mm for hardening
+	globl_equ	cpu_ca9mp_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_ca9mp_proc_fin,	cpu_v7_proc_fin
+	globl_equ	cpu_ca9mp_reset,	cpu_v7_reset
+	globl_equ	cpu_ca9mp_do_idle,	cpu_v7_do_idle
+	globl_equ	cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	globl_equ	cpu_ca9mp_switch_mm,	cpu_v7_bpiall_switch_mm
+#else
+	globl_equ	cpu_ca9mp_switch_mm,	cpu_v7_switch_mm
+#endif
+	globl_equ	cpu_ca9mp_set_pte_ext,	cpu_v7_set_pte_ext
+	define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init
+#endif
+
+	@ Cortex-A15 - needs iciallu switch_mm for hardening
+	globl_equ	cpu_ca15_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_ca15_proc_fin,	cpu_v7_proc_fin
+	globl_equ	cpu_ca15_reset,		cpu_v7_reset
+	globl_equ	cpu_ca15_do_idle,	cpu_v7_do_idle
+	globl_equ	cpu_ca15_dcache_clean_area, cpu_v7_dcache_clean_area
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	globl_equ	cpu_ca15_switch_mm,	cpu_v7_iciallu_switch_mm
+#else
+	globl_equ	cpu_ca15_switch_mm,	cpu_v7_switch_mm
+#endif
+	globl_equ	cpu_ca15_set_pte_ext,	cpu_v7_set_pte_ext
+	globl_equ	cpu_ca15_suspend_size,	cpu_v7_suspend_size
+	globl_equ	cpu_ca15_do_suspend,	cpu_v7_do_suspend
+	globl_equ	cpu_ca15_do_resume,	cpu_v7_do_resume
+	define_processor_functions ca15, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca15_ibe
 #ifdef CONFIG_CPU_PJ4B
 	define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
 #endif
@@ -669,7 +743,7 @@
 __v7_ca12mp_proc_info:
 	.long	0x410fc0d0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup
+	__v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
 	.size	__v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info
 
 	/*
@@ -679,7 +753,7 @@
 __v7_ca15mp_proc_info:
 	.long	0x410fc0f0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup
+	__v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup, proc_fns = ca15_processor_functions
 	.size	__v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
 
 	/*
@@ -689,7 +763,7 @@
 __v7_b15mp_proc_info:
 	.long	0x420f00f0
 	.long	0xff0ffff0
-	__v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup, cache_fns = b15_cache_fns
+	__v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup, proc_fns = ca15_processor_functions, cache_fns = b15_cache_fns
 	.size	__v7_b15mp_proc_info, . - __v7_b15mp_proc_info
 
 	/*
@@ -699,9 +773,25 @@
 __v7_ca17mp_proc_info:
 	.long	0x410fc0e0
 	.long	0xff0ffff0
-	__v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup
+	__v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
 	.size	__v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info
 
+	/* ARM Ltd. Cortex A73 processor */
+	.type	__v7_ca73_proc_info, #object
+__v7_ca73_proc_info:
+	.long	0x410fd090
+	.long	0xff0ffff0
+	__v7_proc __v7_ca73_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
+	.size	__v7_ca73_proc_info, . - __v7_ca73_proc_info
+
+	/* ARM Ltd. Cortex A75 processor */
+	.type	__v7_ca75_proc_info, #object
+__v7_ca75_proc_info:
+	.long	0x410fd0a0
+	.long	0xff0ffff0
+	__v7_proc __v7_ca75_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
+	.size	__v7_ca75_proc_info, . - __v7_ca75_proc_info
+
 	/*
 	 * Qualcomm Inc. Krait processors.
 	 */
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index af4ee2c..35d0f82 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -218,8 +218,7 @@
 {
 	siginfo_t info;
 
-	memset(&info, 0, sizeof(info));
-
+	clear_siginfo(&info);
 	info.si_signo = SIGFPE;
 	info.si_code = sicode;
 	info.si_addr = (void __user *)(instruction_pointer(regs) - 4);
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index eb2cf49..b25ed78 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -105,7 +105,6 @@
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_DEBUG_BUGVERBOSE
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -133,6 +132,8 @@
 	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_RELA
 	select MULTI_IRQ_HANDLER
+	select NEED_DMA_MAP_STATE
+	select NEED_SG_DMA_LENGTH
 	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
@@ -142,6 +143,7 @@
 	select POWER_SUPPLY
 	select REFCOUNT_FULL
 	select SPARSE_IRQ
+	select SWIOTLB
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
 	help
@@ -150,9 +152,6 @@
 config 64BIT
 	def_bool y
 
-config ARCH_PHYS_ADDR_T_64BIT
-	def_bool y
-
 config MMU
 	def_bool y
 
@@ -237,24 +236,9 @@
 config HAVE_GENERIC_GUP
 	def_bool y
 
-config ARCH_DMA_ADDR_T_64BIT
-	def_bool y
-
-config NEED_DMA_MAP_STATE
-	def_bool y
-
-config NEED_SG_DMA_LENGTH
-	def_bool y
-
 config SMP
 	def_bool y
 
-config SWIOTLB
-	def_bool y
-
-config IOMMU_HELPER
-	def_bool SWIOTLB
-
 config KERNEL_MODE_NEON
 	def_bool y
 
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 87f7d2f..3c353b4 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -78,7 +78,7 @@
 UTS_MACHINE	:= aarch64
 endif
 
-CHECKFLAGS	+= -D__aarch64__ -m64
+CHECKFLAGS	+= -D__aarch64__
 
 ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
 KBUILD_LDFLAGS_MODULE	+= -T $(srctree)/arch/arm64/kernel/module.lds
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index c89d0c30..e6b0593 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -17,6 +17,7 @@
 /dts-v1/;
 #include <dt-bindings/reset/altr,rst-mgr-s10.h>
 #include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/clock/stratix10-clock.h>
 
 / {
 	compatible = "altr,socfpga-stratix10";
@@ -92,9 +93,32 @@
 		interrupt-parent = <&intc>;
 		ranges = <0 0 0 0xffffffff>;
 
-		clkmgr@ffd1000 {
-			compatible = "altr,clk-mgr";
+		clkmgr: clock-controller@ffd10000 {
+			compatible = "intel,stratix10-clkmgr";
 			reg = <0xffd10000 0x1000>;
+			#clock-cells = <1>;
+		};
+
+		clocks {
+			cb_intosc_hs_div2_clk: cb-intosc-hs-div2-clk {
+				#clock-cells = <0>;
+				compatible = "fixed-clock";
+			};
+
+			cb_intosc_ls_clk: cb-intosc-ls-clk {
+				#clock-cells = <0>;
+				compatible = "fixed-clock";
+			};
+
+			f2s_free_clk: f2s-free-clk {
+				#clock-cells = <0>;
+				compatible = "fixed-clock";
+			};
+
+			osc1: osc1 {
+				#clock-cells = <0>;
+				compatible = "fixed-clock";
+			};
 		};
 
 		gmac0: ethernet@ff800000 {
@@ -105,6 +129,8 @@
 			mac-address = [00 00 00 00 00 00];
 			resets = <&rst EMAC0_RESET>;
 			reset-names = "stmmaceth";
+			clocks = <&clkmgr STRATIX10_EMAC0_CLK>;
+			clock-names = "stmmaceth";
 			status = "disabled";
 		};
 
@@ -116,6 +142,8 @@
 			mac-address = [00 00 00 00 00 00];
 			resets = <&rst EMAC1_RESET>;
 			reset-names = "stmmaceth";
+			clocks = <&clkmgr STRATIX10_EMAC1_CLK>;
+			clock-names = "stmmaceth";
 			status = "disabled";
 		};
 
@@ -127,6 +155,8 @@
 			mac-address = [00 00 00 00 00 00];
 			resets = <&rst EMAC2_RESET>;
 			reset-names = "stmmaceth";
+			clocks = <&clkmgr STRATIX10_EMAC2_CLK>;
+			clock-names = "stmmaceth";
 			status = "disabled";
 		};
 
@@ -177,6 +207,7 @@
 			reg = <0xffc02800 0x100>;
 			interrupts = <0 103 4>;
 			resets = <&rst I2C0_RESET>;
+			clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
 			status = "disabled";
 		};
 
@@ -187,6 +218,7 @@
 			reg = <0xffc02900 0x100>;
 			interrupts = <0 104 4>;
 			resets = <&rst I2C1_RESET>;
+			clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
 			status = "disabled";
 		};
 
@@ -197,6 +229,7 @@
 			reg = <0xffc02a00 0x100>;
 			interrupts = <0 105 4>;
 			resets = <&rst I2C2_RESET>;
+			clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
 			status = "disabled";
 		};
 
@@ -207,6 +240,7 @@
 			reg = <0xffc02b00 0x100>;
 			interrupts = <0 106 4>;
 			resets = <&rst I2C3_RESET>;
+			clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
 			status = "disabled";
 		};
 
@@ -217,6 +251,7 @@
 			reg = <0xffc02c00 0x100>;
 			interrupts = <0 107 4>;
 			resets = <&rst I2C4_RESET>;
+			clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
 			status = "disabled";
 		};
 
@@ -229,6 +264,9 @@
 			fifo-depth = <0x400>;
 			resets = <&rst SDMMC_RESET>;
 			reset-names = "reset";
+			clocks = <&clkmgr STRATIX10_L4_MP_CLK>,
+				 <&clkmgr STRATIX10_SDMMC_CLK>;
+			clock-names = "biu", "ciu";
 			status = "disabled";
 		};
 
@@ -237,6 +275,25 @@
 			reg = <0xffe00000 0x100000>;
 		};
 
+		pdma: pdma@ffda0000 {
+			compatible = "arm,pl330", "arm,primecell";
+			reg = <0xffda0000 0x1000>;
+			interrupts = <0 81 4>,
+				     <0 82 4>,
+				     <0 83 4>,
+				     <0 84 4>,
+				     <0 85 4>,
+				     <0 86 4>,
+				     <0 87 4>,
+				     <0 88 4>,
+				     <0 89 4>;
+			#dma-cells = <1>;
+			#dma-channels = <8>;
+			#dma-requests = <32>;
+			clocks = <&clkmgr STRATIX10_L4_MAIN_CLK>;
+			clock-names = "apb_pclk";
+		};
+
 		rst: rstmgr@ffd11000 {
 			#reset-cells = <1>;
 			compatible = "altr,rst-mgr";
@@ -288,24 +345,32 @@
 			compatible = "snps,dw-apb-timer";
 			interrupts = <0 113 4>;
 			reg = <0xffc03000 0x100>;
+			clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
+			clock-names = "timer";
 		};
 
 		timer1: timer1@ffc03100 {
 			compatible = "snps,dw-apb-timer";
 			interrupts = <0 114 4>;
 			reg = <0xffc03100 0x100>;
+			clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
+			clock-names = "timer";
 		};
 
 		timer2: timer2@ffd00000 {
 			compatible = "snps,dw-apb-timer";
 			interrupts = <0 115 4>;
 			reg = <0xffd00000 0x100>;
+			clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
+			clock-names = "timer";
 		};
 
 		timer3: timer3@ffd00100 {
 			compatible = "snps,dw-apb-timer";
 			interrupts = <0 116 4>;
 			reg = <0xffd00100 0x100>;
+			clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
+			clock-names = "timer";
 		};
 
 		uart0: serial0@ffc02000 {
@@ -315,6 +380,7 @@
 			reg-shift = <2>;
 			reg-io-width = <4>;
 			resets = <&rst UART0_RESET>;
+			clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
 			status = "disabled";
 		};
 
@@ -325,6 +391,7 @@
 			reg-shift = <2>;
 			reg-io-width = <4>;
 			resets = <&rst UART1_RESET>;
+			clocks = <&clkmgr STRATIX10_L4_SP_CLK>;
 			status = "disabled";
 		};
 
@@ -387,5 +454,17 @@
 			resets = <&rst WATCHDOG3_RESET>;
 			status = "disabled";
 		};
+
+		eccmgr {
+			compatible = "altr,socfpga-s10-ecc-manager";
+			interrupts = <0 15 4>, <0 95 4>;
+			interrupt-controller;
+			#interrupt-cells = <2>;
+
+			sdramedac {
+				compatible = "altr,sdram-edac-s10";
+				interrupts = <16 4>, <48 4>;
+			};
+		};
 	};
 };
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
index eaf13fe..f9b1ef1 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
@@ -50,6 +50,21 @@
 		/* We expect the bootloader to fill in the reg */
 		reg = <0 0 0 0>;
 	};
+
+	ref_033v: 033-v-ref {
+		compatible = "regulator-fixed";
+		regulator-name = "0.33V";
+		regulator-min-microvolt = <330000>;
+		regulator-max-microvolt = <330000>;
+	};
+
+	soc {
+		clocks {
+			osc1 {
+				clock-frequency = <25000000>;
+			};
+		};
+	};
 };
 
 &gpio1 {
@@ -79,7 +94,7 @@
 			rxd2-skew-ps = <420>; /* 0ps */
 			rxd3-skew-ps = <420>; /* 0ps */
 			txen-skew-ps = <0>; /* -420ps */
-			txc-skew-ps = <1860>; /* 960ps */
+			txc-skew-ps = <900>; /* 0ps */
 			rxdv-skew-ps = <420>; /* 0ps */
 			rxc-skew-ps = <1680>; /* 780ps */
 		};
@@ -105,3 +120,30 @@
 &watchdog0 {
 	status = "okay";
 };
+
+&i2c1 {
+	status = "okay";
+	clock-frequency = <100000>;
+
+	adc@14 {
+		compatible = "lltc,ltc2497";
+		reg = <0x14>;
+		vref-supply = <&ref_033v>;
+	};
+
+	temp@4c {
+		compatible = "maxim,max1619";
+		reg = <0x4c>;
+	};
+
+	eeprom@51 {
+		compatible = "atmel,24c32";
+		reg = <0x51>;
+		pagesize = <32>;
+	};
+
+	rtc@68 {
+		compatible = "dallas,ds1339";
+		reg = <0x68>;
+	};
+};
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index cb5a243..e3fdb0f 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -47,6 +47,12 @@
 	select CRYPTO_HASH
 	select CRYPTO_SM3
 
+config CRYPTO_SM4_ARM64_CE
+	tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+	select CRYPTO_SM4
+
 config CRYPTO_GHASH_ARM64_CE
 	tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
 	depends on KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index f35ac68..bcafd01 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -23,6 +23,9 @@
 obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
 sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
 
+obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o
+sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
+
 obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
 ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
 
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index e3a375c..88f5aef 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -19,24 +19,33 @@
 	 *			     u32 *macp, u8 const rk[], u32 rounds);
 	 */
 ENTRY(ce_aes_ccm_auth_data)
-	ldr	w8, [x3]			/* leftover from prev round? */
+	frame_push	7
+
+	mov	x19, x0
+	mov	x20, x1
+	mov	x21, x2
+	mov	x22, x3
+	mov	x23, x4
+	mov	x24, x5
+
+	ldr	w25, [x22]			/* leftover from prev round? */
 	ld1	{v0.16b}, [x0]			/* load mac */
-	cbz	w8, 1f
-	sub	w8, w8, #16
+	cbz	w25, 1f
+	sub	w25, w25, #16
 	eor	v1.16b, v1.16b, v1.16b
-0:	ldrb	w7, [x1], #1			/* get 1 byte of input */
-	subs	w2, w2, #1
-	add	w8, w8, #1
+0:	ldrb	w7, [x20], #1			/* get 1 byte of input */
+	subs	w21, w21, #1
+	add	w25, w25, #1
 	ins	v1.b[0], w7
 	ext	v1.16b, v1.16b, v1.16b, #1	/* rotate in the input bytes */
 	beq	8f				/* out of input? */
-	cbnz	w8, 0b
+	cbnz	w25, 0b
 	eor	v0.16b, v0.16b, v1.16b
-1:	ld1	{v3.4s}, [x4]			/* load first round key */
-	prfm	pldl1strm, [x1]
-	cmp	w5, #12				/* which key size? */
-	add	x6, x4, #16
-	sub	w7, w5, #2			/* modified # of rounds */
+1:	ld1	{v3.4s}, [x23]			/* load first round key */
+	prfm	pldl1strm, [x20]
+	cmp	w24, #12			/* which key size? */
+	add	x6, x23, #16
+	sub	w7, w24, #2			/* modified # of rounds */
 	bmi	2f
 	bne	5f
 	mov	v5.16b, v3.16b
@@ -55,33 +64,43 @@
 	ld1	{v5.4s}, [x6], #16		/* load next round key */
 	bpl	3b
 	aese	v0.16b, v4.16b
-	subs	w2, w2, #16			/* last data? */
+	subs	w21, w21, #16			/* last data? */
 	eor	v0.16b, v0.16b, v5.16b		/* final round */
 	bmi	6f
-	ld1	{v1.16b}, [x1], #16		/* load next input block */
+	ld1	{v1.16b}, [x20], #16		/* load next input block */
 	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
-	bne	1b
-6:	st1	{v0.16b}, [x0]			/* store mac */
+	beq	6f
+
+	if_will_cond_yield_neon
+	st1	{v0.16b}, [x19]			/* store mac */
+	do_cond_yield_neon
+	ld1	{v0.16b}, [x19]			/* reload mac */
+	endif_yield_neon
+
+	b	1b
+6:	st1	{v0.16b}, [x19]			/* store mac */
 	beq	10f
-	adds	w2, w2, #16
+	adds	w21, w21, #16
 	beq	10f
-	mov	w8, w2
-7:	ldrb	w7, [x1], #1
+	mov	w25, w21
+7:	ldrb	w7, [x20], #1
 	umov	w6, v0.b[0]
 	eor	w6, w6, w7
-	strb	w6, [x0], #1
-	subs	w2, w2, #1
+	strb	w6, [x19], #1
+	subs	w21, w21, #1
 	beq	10f
 	ext	v0.16b, v0.16b, v0.16b, #1	/* rotate out the mac bytes */
 	b	7b
-8:	mov	w7, w8
-	add	w8, w8, #16
+8:	mov	w7, w25
+	add	w25, w25, #16
 9:	ext	v1.16b, v1.16b, v1.16b, #1
 	adds	w7, w7, #1
 	bne	9b
 	eor	v0.16b, v0.16b, v1.16b
-	st1	{v0.16b}, [x0]
-10:	str	w8, [x3]
+	st1	{v0.16b}, [x19]
+10:	str	w25, [x22]
+
+	frame_pop
 	ret
 ENDPROC(ce_aes_ccm_auth_data)
 
@@ -126,19 +145,29 @@
 ENDPROC(ce_aes_ccm_final)
 
 	.macro	aes_ccm_do_crypt,enc
-	ldr	x8, [x6, #8]			/* load lower ctr */
-	ld1	{v0.16b}, [x5]			/* load mac */
-CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
+	frame_push	8
+
+	mov	x19, x0
+	mov	x20, x1
+	mov	x21, x2
+	mov	x22, x3
+	mov	x23, x4
+	mov	x24, x5
+	mov	x25, x6
+
+	ldr	x26, [x25, #8]			/* load lower ctr */
+	ld1	{v0.16b}, [x24]			/* load mac */
+CPU_LE(	rev	x26, x26		)	/* keep swabbed ctr in reg */
 0:	/* outer loop */
-	ld1	{v1.8b}, [x6]			/* load upper ctr */
-	prfm	pldl1strm, [x1]
-	add	x8, x8, #1
-	rev	x9, x8
-	cmp	w4, #12				/* which key size? */
-	sub	w7, w4, #2			/* get modified # of rounds */
+	ld1	{v1.8b}, [x25]			/* load upper ctr */
+	prfm	pldl1strm, [x20]
+	add	x26, x26, #1
+	rev	x9, x26
+	cmp	w23, #12			/* which key size? */
+	sub	w7, w23, #2			/* get modified # of rounds */
 	ins	v1.d[1], x9			/* no carry in lower ctr */
-	ld1	{v3.4s}, [x3]			/* load first round key */
-	add	x10, x3, #16
+	ld1	{v3.4s}, [x22]			/* load first round key */
+	add	x10, x22, #16
 	bmi	1f
 	bne	4f
 	mov	v5.16b, v3.16b
@@ -165,9 +194,9 @@
 	bpl	2b
 	aese	v0.16b, v4.16b
 	aese	v1.16b, v4.16b
-	subs	w2, w2, #16
-	bmi	6f				/* partial block? */
-	ld1	{v2.16b}, [x1], #16		/* load next input block */
+	subs	w21, w21, #16
+	bmi	7f				/* partial block? */
+	ld1	{v2.16b}, [x20], #16		/* load next input block */
 	.if	\enc == 1
 	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
 	eor	v1.16b, v1.16b, v2.16b		/* xor with crypted ctr */
@@ -176,18 +205,29 @@
 	eor	v1.16b, v2.16b, v5.16b		/* final round enc */
 	.endif
 	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
-	st1	{v1.16b}, [x0], #16		/* write output block */
-	bne	0b
-CPU_LE(	rev	x8, x8			)
-	st1	{v0.16b}, [x5]			/* store mac */
-	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
-5:	ret
+	st1	{v1.16b}, [x19], #16		/* write output block */
+	beq	5f
 
-6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
+	if_will_cond_yield_neon
+	st1	{v0.16b}, [x24]			/* store mac */
+	do_cond_yield_neon
+	ld1	{v0.16b}, [x24]			/* reload mac */
+	endif_yield_neon
+
+	b	0b
+5:
+CPU_LE(	rev	x26, x26			)
+	st1	{v0.16b}, [x24]			/* store mac */
+	str	x26, [x25, #8]			/* store lsb end of ctr (BE) */
+
+6:	frame_pop
+	ret
+
+7:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
 	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
-	st1	{v0.16b}, [x5]			/* store mac */
-	add	w2, w2, #16			/* process partial tail block */
-7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
+	st1	{v0.16b}, [x24]			/* store mac */
+	add	w21, w21, #16			/* process partial tail block */
+8:	ldrb	w9, [x20], #1			/* get 1 byte of input */
 	umov	w6, v1.b[0]			/* get top crypted ctr byte */
 	umov	w7, v0.b[0]			/* get top mac byte */
 	.if	\enc == 1
@@ -197,13 +237,13 @@
 	eor	w9, w9, w6
 	eor	w7, w7, w9
 	.endif
-	strb	w9, [x0], #1			/* store out byte */
-	strb	w7, [x5], #1			/* store mac byte */
-	subs	w2, w2, #1
-	beq	5b
+	strb	w9, [x19], #1			/* store out byte */
+	strb	w7, [x24], #1			/* store mac byte */
+	subs	w21, w21, #1
+	beq	6b
 	ext	v0.16b, v0.16b, v0.16b, #1	/* shift out mac byte */
 	ext	v1.16b, v1.16b, v1.16b, #1	/* shift out ctr byte */
-	b	7b
+	b	8b
 	.endm
 
 	/*
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 50330f5..623e74e 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -30,18 +30,21 @@
 	.endm
 
 	/* prepare for encryption with key in rk[] */
-	.macro		enc_prepare, rounds, rk, ignore
-	load_round_keys	\rounds, \rk
+	.macro		enc_prepare, rounds, rk, temp
+	mov		\temp, \rk
+	load_round_keys	\rounds, \temp
 	.endm
 
 	/* prepare for encryption (again) but with new key in rk[] */
-	.macro		enc_switch_key, rounds, rk, ignore
-	load_round_keys	\rounds, \rk
+	.macro		enc_switch_key, rounds, rk, temp
+	mov		\temp, \rk
+	load_round_keys	\rounds, \temp
 	.endm
 
 	/* prepare for decryption with key in rk[] */
-	.macro		dec_prepare, rounds, rk, ignore
-	load_round_keys	\rounds, \rk
+	.macro		dec_prepare, rounds, rk, temp
+	mov		\temp, \rk
+	load_round_keys	\rounds, \temp
 	.endm
 
 	.macro		do_enc_Nx, de, mc, k, i0, i1, i2, i3
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index a68412e..483a713 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -14,12 +14,12 @@
 	.align		4
 
 aes_encrypt_block4x:
-	encrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
+	encrypt_block4x	v0, v1, v2, v3, w22, x21, x8, w7
 	ret
 ENDPROC(aes_encrypt_block4x)
 
 aes_decrypt_block4x:
-	decrypt_block4x	v0, v1, v2, v3, w3, x2, x8, w7
+	decrypt_block4x	v0, v1, v2, v3, w22, x21, x8, w7
 	ret
 ENDPROC(aes_decrypt_block4x)
 
@@ -31,57 +31,71 @@
 	 */
 
 AES_ENTRY(aes_ecb_encrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	5
 
-	enc_prepare	w3, x2, x5
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+
+.Lecbencrestart:
+	enc_prepare	w22, x21, x5
 
 .LecbencloopNx:
-	subs		w4, w4, #4
+	subs		w23, w23, #4
 	bmi		.Lecbenc1x
-	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
 	bl		aes_encrypt_block4x
-	st1		{v0.16b-v3.16b}, [x0], #64
+	st1		{v0.16b-v3.16b}, [x19], #64
+	cond_yield_neon	.Lecbencrestart
 	b		.LecbencloopNx
 .Lecbenc1x:
-	adds		w4, w4, #4
+	adds		w23, w23, #4
 	beq		.Lecbencout
 .Lecbencloop:
-	ld1		{v0.16b}, [x1], #16		/* get next pt block */
-	encrypt_block	v0, w3, x2, x5, w6
-	st1		{v0.16b}, [x0], #16
-	subs		w4, w4, #1
+	ld1		{v0.16b}, [x20], #16		/* get next pt block */
+	encrypt_block	v0, w22, x21, x5, w6
+	st1		{v0.16b}, [x19], #16
+	subs		w23, w23, #1
 	bne		.Lecbencloop
 .Lecbencout:
-	ldp		x29, x30, [sp], #16
+	frame_pop
 	ret
 AES_ENDPROC(aes_ecb_encrypt)
 
 
 AES_ENTRY(aes_ecb_decrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	5
 
-	dec_prepare	w3, x2, x5
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+
+.Lecbdecrestart:
+	dec_prepare	w22, x21, x5
 
 .LecbdecloopNx:
-	subs		w4, w4, #4
+	subs		w23, w23, #4
 	bmi		.Lecbdec1x
-	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
 	bl		aes_decrypt_block4x
-	st1		{v0.16b-v3.16b}, [x0], #64
+	st1		{v0.16b-v3.16b}, [x19], #64
+	cond_yield_neon	.Lecbdecrestart
 	b		.LecbdecloopNx
 .Lecbdec1x:
-	adds		w4, w4, #4
+	adds		w23, w23, #4
 	beq		.Lecbdecout
 .Lecbdecloop:
-	ld1		{v0.16b}, [x1], #16		/* get next ct block */
-	decrypt_block	v0, w3, x2, x5, w6
-	st1		{v0.16b}, [x0], #16
-	subs		w4, w4, #1
+	ld1		{v0.16b}, [x20], #16		/* get next ct block */
+	decrypt_block	v0, w22, x21, x5, w6
+	st1		{v0.16b}, [x19], #16
+	subs		w23, w23, #1
 	bne		.Lecbdecloop
 .Lecbdecout:
-	ldp		x29, x30, [sp], #16
+	frame_pop
 	ret
 AES_ENDPROC(aes_ecb_decrypt)
 
@@ -94,78 +108,100 @@
 	 */
 
 AES_ENTRY(aes_cbc_encrypt)
-	ld1		{v4.16b}, [x5]			/* get iv */
-	enc_prepare	w3, x2, x6
+	frame_push	6
+
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+	mov		x24, x5
+
+.Lcbcencrestart:
+	ld1		{v4.16b}, [x24]			/* get iv */
+	enc_prepare	w22, x21, x6
 
 .Lcbcencloop4x:
-	subs		w4, w4, #4
+	subs		w23, w23, #4
 	bmi		.Lcbcenc1x
-	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
 	eor		v0.16b, v0.16b, v4.16b		/* ..and xor with iv */
-	encrypt_block	v0, w3, x2, x6, w7
+	encrypt_block	v0, w22, x21, x6, w7
 	eor		v1.16b, v1.16b, v0.16b
-	encrypt_block	v1, w3, x2, x6, w7
+	encrypt_block	v1, w22, x21, x6, w7
 	eor		v2.16b, v2.16b, v1.16b
-	encrypt_block	v2, w3, x2, x6, w7
+	encrypt_block	v2, w22, x21, x6, w7
 	eor		v3.16b, v3.16b, v2.16b
-	encrypt_block	v3, w3, x2, x6, w7
-	st1		{v0.16b-v3.16b}, [x0], #64
+	encrypt_block	v3, w22, x21, x6, w7
+	st1		{v0.16b-v3.16b}, [x19], #64
 	mov		v4.16b, v3.16b
+	st1		{v4.16b}, [x24]			/* return iv */
+	cond_yield_neon	.Lcbcencrestart
 	b		.Lcbcencloop4x
 .Lcbcenc1x:
-	adds		w4, w4, #4
+	adds		w23, w23, #4
 	beq		.Lcbcencout
 .Lcbcencloop:
-	ld1		{v0.16b}, [x1], #16		/* get next pt block */
+	ld1		{v0.16b}, [x20], #16		/* get next pt block */
 	eor		v4.16b, v4.16b, v0.16b		/* ..and xor with iv */
-	encrypt_block	v4, w3, x2, x6, w7
-	st1		{v4.16b}, [x0], #16
-	subs		w4, w4, #1
+	encrypt_block	v4, w22, x21, x6, w7
+	st1		{v4.16b}, [x19], #16
+	subs		w23, w23, #1
 	bne		.Lcbcencloop
 .Lcbcencout:
-	st1		{v4.16b}, [x5]			/* return iv */
+	st1		{v4.16b}, [x24]			/* return iv */
+	frame_pop
 	ret
 AES_ENDPROC(aes_cbc_encrypt)
 
 
 AES_ENTRY(aes_cbc_decrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	6
 
-	ld1		{v7.16b}, [x5]			/* get iv */
-	dec_prepare	w3, x2, x6
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+	mov		x24, x5
+
+.Lcbcdecrestart:
+	ld1		{v7.16b}, [x24]			/* get iv */
+	dec_prepare	w22, x21, x6
 
 .LcbcdecloopNx:
-	subs		w4, w4, #4
+	subs		w23, w23, #4
 	bmi		.Lcbcdec1x
-	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
 	mov		v4.16b, v0.16b
 	mov		v5.16b, v1.16b
 	mov		v6.16b, v2.16b
 	bl		aes_decrypt_block4x
-	sub		x1, x1, #16
+	sub		x20, x20, #16
 	eor		v0.16b, v0.16b, v7.16b
 	eor		v1.16b, v1.16b, v4.16b
-	ld1		{v7.16b}, [x1], #16		/* reload 1 ct block */
+	ld1		{v7.16b}, [x20], #16		/* reload 1 ct block */
 	eor		v2.16b, v2.16b, v5.16b
 	eor		v3.16b, v3.16b, v6.16b
-	st1		{v0.16b-v3.16b}, [x0], #64
+	st1		{v0.16b-v3.16b}, [x19], #64
+	st1		{v7.16b}, [x24]			/* return iv */
+	cond_yield_neon	.Lcbcdecrestart
 	b		.LcbcdecloopNx
 .Lcbcdec1x:
-	adds		w4, w4, #4
+	adds		w23, w23, #4
 	beq		.Lcbcdecout
 .Lcbcdecloop:
-	ld1		{v1.16b}, [x1], #16		/* get next ct block */
+	ld1		{v1.16b}, [x20], #16		/* get next ct block */
 	mov		v0.16b, v1.16b			/* ...and copy to v0 */
-	decrypt_block	v0, w3, x2, x6, w7
+	decrypt_block	v0, w22, x21, x6, w7
 	eor		v0.16b, v0.16b, v7.16b		/* xor with iv => pt */
 	mov		v7.16b, v1.16b			/* ct is next iv */
-	st1		{v0.16b}, [x0], #16
-	subs		w4, w4, #1
+	st1		{v0.16b}, [x19], #16
+	subs		w23, w23, #1
 	bne		.Lcbcdecloop
 .Lcbcdecout:
-	st1		{v7.16b}, [x5]			/* return iv */
-	ldp		x29, x30, [sp], #16
+	st1		{v7.16b}, [x24]			/* return iv */
+	frame_pop
 	ret
 AES_ENDPROC(aes_cbc_decrypt)
 
@@ -176,19 +212,26 @@
 	 */
 
 AES_ENTRY(aes_ctr_encrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	6
 
-	enc_prepare	w3, x2, x6
-	ld1		{v4.16b}, [x5]
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+	mov		x24, x5
+
+.Lctrrestart:
+	enc_prepare	w22, x21, x6
+	ld1		{v4.16b}, [x24]
 
 	umov		x6, v4.d[1]		/* keep swabbed ctr in reg */
 	rev		x6, x6
-	cmn		w6, w4			/* 32 bit overflow? */
-	bcs		.Lctrloop
 .LctrloopNx:
-	subs		w4, w4, #4
+	subs		w23, w23, #4
 	bmi		.Lctr1x
+	cmn		w6, #4			/* 32 bit overflow? */
+	bcs		.Lctr1x
 	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
 	dup		v7.4s, w6
 	mov		v0.16b, v4.16b
@@ -200,25 +243,27 @@
 	mov		v1.s[3], v8.s[0]
 	mov		v2.s[3], v8.s[1]
 	mov		v3.s[3], v8.s[2]
-	ld1		{v5.16b-v7.16b}, [x1], #48	/* get 3 input blocks */
+	ld1		{v5.16b-v7.16b}, [x20], #48	/* get 3 input blocks */
 	bl		aes_encrypt_block4x
 	eor		v0.16b, v5.16b, v0.16b
-	ld1		{v5.16b}, [x1], #16		/* get 1 input block  */
+	ld1		{v5.16b}, [x20], #16		/* get 1 input block  */
 	eor		v1.16b, v6.16b, v1.16b
 	eor		v2.16b, v7.16b, v2.16b
 	eor		v3.16b, v5.16b, v3.16b
-	st1		{v0.16b-v3.16b}, [x0], #64
+	st1		{v0.16b-v3.16b}, [x19], #64
 	add		x6, x6, #4
 	rev		x7, x6
 	ins		v4.d[1], x7
-	cbz		w4, .Lctrout
+	cbz		w23, .Lctrout
+	st1		{v4.16b}, [x24]		/* return next CTR value */
+	cond_yield_neon	.Lctrrestart
 	b		.LctrloopNx
 .Lctr1x:
-	adds		w4, w4, #4
+	adds		w23, w23, #4
 	beq		.Lctrout
 .Lctrloop:
 	mov		v0.16b, v4.16b
-	encrypt_block	v0, w3, x2, x8, w7
+	encrypt_block	v0, w22, x21, x8, w7
 
 	adds		x6, x6, #1		/* increment BE ctr */
 	rev		x7, x6
@@ -226,22 +271,22 @@
 	bcs		.Lctrcarry		/* overflow? */
 
 .Lctrcarrydone:
-	subs		w4, w4, #1
+	subs		w23, w23, #1
 	bmi		.Lctrtailblock		/* blocks <0 means tail block */
-	ld1		{v3.16b}, [x1], #16
+	ld1		{v3.16b}, [x20], #16
 	eor		v3.16b, v0.16b, v3.16b
-	st1		{v3.16b}, [x0], #16
+	st1		{v3.16b}, [x19], #16
 	bne		.Lctrloop
 
 .Lctrout:
-	st1		{v4.16b}, [x5]		/* return next CTR value */
-	ldp		x29, x30, [sp], #16
+	st1		{v4.16b}, [x24]		/* return next CTR value */
+.Lctrret:
+	frame_pop
 	ret
 
 .Lctrtailblock:
-	st1		{v0.16b}, [x0]
-	ldp		x29, x30, [sp], #16
-	ret
+	st1		{v0.16b}, [x19]
+	b		.Lctrret
 
 .Lctrcarry:
 	umov		x7, v4.d[0]		/* load upper word of ctr  */
@@ -274,10 +319,16 @@
 CPU_BE(	.quad		0x87, 1		)
 
 AES_ENTRY(aes_xts_encrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	6
 
-	ld1		{v4.16b}, [x6]
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+	mov		x24, x6
+
+	ld1		{v4.16b}, [x24]
 	cbz		w7, .Lxtsencnotfirst
 
 	enc_prepare	w3, x5, x8
@@ -286,15 +337,17 @@
 	ldr		q7, .Lxts_mul_x
 	b		.LxtsencNx
 
+.Lxtsencrestart:
+	ld1		{v4.16b}, [x24]
 .Lxtsencnotfirst:
-	enc_prepare	w3, x2, x8
+	enc_prepare	w22, x21, x8
 .LxtsencloopNx:
 	ldr		q7, .Lxts_mul_x
 	next_tweak	v4, v4, v7, v8
 .LxtsencNx:
-	subs		w4, w4, #4
+	subs		w23, w23, #4
 	bmi		.Lxtsenc1x
-	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
+	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 pt blocks */
 	next_tweak	v5, v4, v7, v8
 	eor		v0.16b, v0.16b, v4.16b
 	next_tweak	v6, v5, v7, v8
@@ -307,35 +360,43 @@
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v1.16b, v1.16b, v5.16b
 	eor		v2.16b, v2.16b, v6.16b
-	st1		{v0.16b-v3.16b}, [x0], #64
+	st1		{v0.16b-v3.16b}, [x19], #64
 	mov		v4.16b, v7.16b
-	cbz		w4, .Lxtsencout
+	cbz		w23, .Lxtsencout
+	st1		{v4.16b}, [x24]
+	cond_yield_neon	.Lxtsencrestart
 	b		.LxtsencloopNx
 .Lxtsenc1x:
-	adds		w4, w4, #4
+	adds		w23, w23, #4
 	beq		.Lxtsencout
 .Lxtsencloop:
-	ld1		{v1.16b}, [x1], #16
+	ld1		{v1.16b}, [x20], #16
 	eor		v0.16b, v1.16b, v4.16b
-	encrypt_block	v0, w3, x2, x8, w7
+	encrypt_block	v0, w22, x21, x8, w7
 	eor		v0.16b, v0.16b, v4.16b
-	st1		{v0.16b}, [x0], #16
-	subs		w4, w4, #1
+	st1		{v0.16b}, [x19], #16
+	subs		w23, w23, #1
 	beq		.Lxtsencout
 	next_tweak	v4, v4, v7, v8
 	b		.Lxtsencloop
 .Lxtsencout:
-	st1		{v4.16b}, [x6]
-	ldp		x29, x30, [sp], #16
+	st1		{v4.16b}, [x24]
+	frame_pop
 	ret
 AES_ENDPROC(aes_xts_encrypt)
 
 
 AES_ENTRY(aes_xts_decrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	6
 
-	ld1		{v4.16b}, [x6]
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+	mov		x24, x6
+
+	ld1		{v4.16b}, [x24]
 	cbz		w7, .Lxtsdecnotfirst
 
 	enc_prepare	w3, x5, x8
@@ -344,15 +405,17 @@
 	ldr		q7, .Lxts_mul_x
 	b		.LxtsdecNx
 
+.Lxtsdecrestart:
+	ld1		{v4.16b}, [x24]
 .Lxtsdecnotfirst:
-	dec_prepare	w3, x2, x8
+	dec_prepare	w22, x21, x8
 .LxtsdecloopNx:
 	ldr		q7, .Lxts_mul_x
 	next_tweak	v4, v4, v7, v8
 .LxtsdecNx:
-	subs		w4, w4, #4
+	subs		w23, w23, #4
 	bmi		.Lxtsdec1x
-	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
+	ld1		{v0.16b-v3.16b}, [x20], #64	/* get 4 ct blocks */
 	next_tweak	v5, v4, v7, v8
 	eor		v0.16b, v0.16b, v4.16b
 	next_tweak	v6, v5, v7, v8
@@ -365,26 +428,28 @@
 	eor		v0.16b, v0.16b, v4.16b
 	eor		v1.16b, v1.16b, v5.16b
 	eor		v2.16b, v2.16b, v6.16b
-	st1		{v0.16b-v3.16b}, [x0], #64
+	st1		{v0.16b-v3.16b}, [x19], #64
 	mov		v4.16b, v7.16b
-	cbz		w4, .Lxtsdecout
+	cbz		w23, .Lxtsdecout
+	st1		{v4.16b}, [x24]
+	cond_yield_neon	.Lxtsdecrestart
 	b		.LxtsdecloopNx
 .Lxtsdec1x:
-	adds		w4, w4, #4
+	adds		w23, w23, #4
 	beq		.Lxtsdecout
 .Lxtsdecloop:
-	ld1		{v1.16b}, [x1], #16
+	ld1		{v1.16b}, [x20], #16
 	eor		v0.16b, v1.16b, v4.16b
-	decrypt_block	v0, w3, x2, x8, w7
+	decrypt_block	v0, w22, x21, x8, w7
 	eor		v0.16b, v0.16b, v4.16b
-	st1		{v0.16b}, [x0], #16
-	subs		w4, w4, #1
+	st1		{v0.16b}, [x19], #16
+	subs		w23, w23, #1
 	beq		.Lxtsdecout
 	next_tweak	v4, v4, v7, v8
 	b		.Lxtsdecloop
 .Lxtsdecout:
-	st1		{v4.16b}, [x6]
-	ldp		x29, x30, [sp], #16
+	st1		{v4.16b}, [x24]
+	frame_pop
 	ret
 AES_ENDPROC(aes_xts_decrypt)
 
@@ -393,43 +458,61 @@
 	 *		  int blocks, u8 dg[], int enc_before, int enc_after)
 	 */
 AES_ENTRY(aes_mac_update)
-	ld1		{v0.16b}, [x4]			/* get dg */
+	frame_push	6
+
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+	mov		x24, x6
+
+	ld1		{v0.16b}, [x23]			/* get dg */
 	enc_prepare	w2, x1, x7
 	cbz		w5, .Lmacloop4x
 
 	encrypt_block	v0, w2, x1, x7, w8
 
 .Lmacloop4x:
-	subs		w3, w3, #4
+	subs		w22, w22, #4
 	bmi		.Lmac1x
-	ld1		{v1.16b-v4.16b}, [x0], #64	/* get next pt block */
+	ld1		{v1.16b-v4.16b}, [x19], #64	/* get next pt block */
 	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
-	encrypt_block	v0, w2, x1, x7, w8
+	encrypt_block	v0, w21, x20, x7, w8
 	eor		v0.16b, v0.16b, v2.16b
-	encrypt_block	v0, w2, x1, x7, w8
+	encrypt_block	v0, w21, x20, x7, w8
 	eor		v0.16b, v0.16b, v3.16b
-	encrypt_block	v0, w2, x1, x7, w8
+	encrypt_block	v0, w21, x20, x7, w8
 	eor		v0.16b, v0.16b, v4.16b
-	cmp		w3, wzr
-	csinv		x5, x6, xzr, eq
+	cmp		w22, wzr
+	csinv		x5, x24, xzr, eq
 	cbz		w5, .Lmacout
-	encrypt_block	v0, w2, x1, x7, w8
+	encrypt_block	v0, w21, x20, x7, w8
+	st1		{v0.16b}, [x23]			/* return dg */
+	cond_yield_neon	.Lmacrestart
 	b		.Lmacloop4x
 .Lmac1x:
-	add		w3, w3, #4
+	add		w22, w22, #4
 .Lmacloop:
-	cbz		w3, .Lmacout
-	ld1		{v1.16b}, [x0], #16		/* get next pt block */
+	cbz		w22, .Lmacout
+	ld1		{v1.16b}, [x19], #16		/* get next pt block */
 	eor		v0.16b, v0.16b, v1.16b		/* ..and xor with dg */
 
-	subs		w3, w3, #1
-	csinv		x5, x6, xzr, eq
+	subs		w22, w22, #1
+	csinv		x5, x24, xzr, eq
 	cbz		w5, .Lmacout
 
-	encrypt_block	v0, w2, x1, x7, w8
+.Lmacenc:
+	encrypt_block	v0, w21, x20, x7, w8
 	b		.Lmacloop
 
 .Lmacout:
-	st1		{v0.16b}, [x4]			/* return dg */
+	st1		{v0.16b}, [x23]			/* return dg */
+	frame_pop
 	ret
+
+.Lmacrestart:
+	ld1		{v0.16b}, [x23]			/* get dg */
+	enc_prepare	w21, x20, x0
+	b		.Lmacloop4x
 AES_ENDPROC(aes_mac_update)
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index ca04725..e613a87 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -565,54 +565,61 @@
 	 *		     int blocks)
 	 */
 	.macro		__ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	5
+
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
 
 99:	mov		x5, #1
-	lsl		x5, x5, x4
-	subs		w4, w4, #8
-	csel		x4, x4, xzr, pl
+	lsl		x5, x5, x23
+	subs		w23, w23, #8
+	csel		x23, x23, xzr, pl
 	csel		x5, x5, xzr, mi
 
-	ld1		{v0.16b}, [x1], #16
+	ld1		{v0.16b}, [x20], #16
 	tbnz		x5, #1, 0f
-	ld1		{v1.16b}, [x1], #16
+	ld1		{v1.16b}, [x20], #16
 	tbnz		x5, #2, 0f
-	ld1		{v2.16b}, [x1], #16
+	ld1		{v2.16b}, [x20], #16
 	tbnz		x5, #3, 0f
-	ld1		{v3.16b}, [x1], #16
+	ld1		{v3.16b}, [x20], #16
 	tbnz		x5, #4, 0f
-	ld1		{v4.16b}, [x1], #16
+	ld1		{v4.16b}, [x20], #16
 	tbnz		x5, #5, 0f
-	ld1		{v5.16b}, [x1], #16
+	ld1		{v5.16b}, [x20], #16
 	tbnz		x5, #6, 0f
-	ld1		{v6.16b}, [x1], #16
+	ld1		{v6.16b}, [x20], #16
 	tbnz		x5, #7, 0f
-	ld1		{v7.16b}, [x1], #16
+	ld1		{v7.16b}, [x20], #16
 
-0:	mov		bskey, x2
-	mov		rounds, x3
+0:	mov		bskey, x21
+	mov		rounds, x22
 	bl		\do8
 
-	st1		{\o0\().16b}, [x0], #16
+	st1		{\o0\().16b}, [x19], #16
 	tbnz		x5, #1, 1f
-	st1		{\o1\().16b}, [x0], #16
+	st1		{\o1\().16b}, [x19], #16
 	tbnz		x5, #2, 1f
-	st1		{\o2\().16b}, [x0], #16
+	st1		{\o2\().16b}, [x19], #16
 	tbnz		x5, #3, 1f
-	st1		{\o3\().16b}, [x0], #16
+	st1		{\o3\().16b}, [x19], #16
 	tbnz		x5, #4, 1f
-	st1		{\o4\().16b}, [x0], #16
+	st1		{\o4\().16b}, [x19], #16
 	tbnz		x5, #5, 1f
-	st1		{\o5\().16b}, [x0], #16
+	st1		{\o5\().16b}, [x19], #16
 	tbnz		x5, #6, 1f
-	st1		{\o6\().16b}, [x0], #16
+	st1		{\o6\().16b}, [x19], #16
 	tbnz		x5, #7, 1f
-	st1		{\o7\().16b}, [x0], #16
+	st1		{\o7\().16b}, [x19], #16
 
-	cbnz		x4, 99b
+	cbz		x23, 1f
+	cond_yield_neon
+	b		99b
 
-1:	ldp		x29, x30, [sp], #16
+1:	frame_pop
 	ret
 	.endm
 
@@ -632,43 +639,49 @@
 	 */
 	.align		4
 ENTRY(aesbs_cbc_decrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	6
+
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+	mov		x24, x5
 
 99:	mov		x6, #1
-	lsl		x6, x6, x4
-	subs		w4, w4, #8
-	csel		x4, x4, xzr, pl
+	lsl		x6, x6, x23
+	subs		w23, w23, #8
+	csel		x23, x23, xzr, pl
 	csel		x6, x6, xzr, mi
 
-	ld1		{v0.16b}, [x1], #16
+	ld1		{v0.16b}, [x20], #16
 	mov		v25.16b, v0.16b
 	tbnz		x6, #1, 0f
-	ld1		{v1.16b}, [x1], #16
+	ld1		{v1.16b}, [x20], #16
 	mov		v26.16b, v1.16b
 	tbnz		x6, #2, 0f
-	ld1		{v2.16b}, [x1], #16
+	ld1		{v2.16b}, [x20], #16
 	mov		v27.16b, v2.16b
 	tbnz		x6, #3, 0f
-	ld1		{v3.16b}, [x1], #16
+	ld1		{v3.16b}, [x20], #16
 	mov		v28.16b, v3.16b
 	tbnz		x6, #4, 0f
-	ld1		{v4.16b}, [x1], #16
+	ld1		{v4.16b}, [x20], #16
 	mov		v29.16b, v4.16b
 	tbnz		x6, #5, 0f
-	ld1		{v5.16b}, [x1], #16
+	ld1		{v5.16b}, [x20], #16
 	mov		v30.16b, v5.16b
 	tbnz		x6, #6, 0f
-	ld1		{v6.16b}, [x1], #16
+	ld1		{v6.16b}, [x20], #16
 	mov		v31.16b, v6.16b
 	tbnz		x6, #7, 0f
-	ld1		{v7.16b}, [x1]
+	ld1		{v7.16b}, [x20]
 
-0:	mov		bskey, x2
-	mov		rounds, x3
+0:	mov		bskey, x21
+	mov		rounds, x22
 	bl		aesbs_decrypt8
 
-	ld1		{v24.16b}, [x5]			// load IV
+	ld1		{v24.16b}, [x24]		// load IV
 
 	eor		v1.16b, v1.16b, v25.16b
 	eor		v6.16b, v6.16b, v26.16b
@@ -679,34 +692,36 @@
 	eor		v3.16b, v3.16b, v30.16b
 	eor		v5.16b, v5.16b, v31.16b
 
-	st1		{v0.16b}, [x0], #16
+	st1		{v0.16b}, [x19], #16
 	mov		v24.16b, v25.16b
 	tbnz		x6, #1, 1f
-	st1		{v1.16b}, [x0], #16
+	st1		{v1.16b}, [x19], #16
 	mov		v24.16b, v26.16b
 	tbnz		x6, #2, 1f
-	st1		{v6.16b}, [x0], #16
+	st1		{v6.16b}, [x19], #16
 	mov		v24.16b, v27.16b
 	tbnz		x6, #3, 1f
-	st1		{v4.16b}, [x0], #16
+	st1		{v4.16b}, [x19], #16
 	mov		v24.16b, v28.16b
 	tbnz		x6, #4, 1f
-	st1		{v2.16b}, [x0], #16
+	st1		{v2.16b}, [x19], #16
 	mov		v24.16b, v29.16b
 	tbnz		x6, #5, 1f
-	st1		{v7.16b}, [x0], #16
+	st1		{v7.16b}, [x19], #16
 	mov		v24.16b, v30.16b
 	tbnz		x6, #6, 1f
-	st1		{v3.16b}, [x0], #16
+	st1		{v3.16b}, [x19], #16
 	mov		v24.16b, v31.16b
 	tbnz		x6, #7, 1f
-	ld1		{v24.16b}, [x1], #16
-	st1		{v5.16b}, [x0], #16
-1:	st1		{v24.16b}, [x5]			// store IV
+	ld1		{v24.16b}, [x20], #16
+	st1		{v5.16b}, [x19], #16
+1:	st1		{v24.16b}, [x24]		// store IV
 
-	cbnz		x4, 99b
+	cbz		x23, 2f
+	cond_yield_neon
+	b		99b
 
-	ldp		x29, x30, [sp], #16
+2:	frame_pop
 	ret
 ENDPROC(aesbs_cbc_decrypt)
 
@@ -731,87 +746,93 @@
 	 */
 __xts_crypt8:
 	mov		x6, #1
-	lsl		x6, x6, x4
-	subs		w4, w4, #8
-	csel		x4, x4, xzr, pl
+	lsl		x6, x6, x23
+	subs		w23, w23, #8
+	csel		x23, x23, xzr, pl
 	csel		x6, x6, xzr, mi
 
-	ld1		{v0.16b}, [x1], #16
+	ld1		{v0.16b}, [x20], #16
 	next_tweak	v26, v25, v30, v31
 	eor		v0.16b, v0.16b, v25.16b
 	tbnz		x6, #1, 0f
 
-	ld1		{v1.16b}, [x1], #16
+	ld1		{v1.16b}, [x20], #16
 	next_tweak	v27, v26, v30, v31
 	eor		v1.16b, v1.16b, v26.16b
 	tbnz		x6, #2, 0f
 
-	ld1		{v2.16b}, [x1], #16
+	ld1		{v2.16b}, [x20], #16
 	next_tweak	v28, v27, v30, v31
 	eor		v2.16b, v2.16b, v27.16b
 	tbnz		x6, #3, 0f
 
-	ld1		{v3.16b}, [x1], #16
+	ld1		{v3.16b}, [x20], #16
 	next_tweak	v29, v28, v30, v31
 	eor		v3.16b, v3.16b, v28.16b
 	tbnz		x6, #4, 0f
 
-	ld1		{v4.16b}, [x1], #16
-	str		q29, [sp, #16]
+	ld1		{v4.16b}, [x20], #16
+	str		q29, [sp, #.Lframe_local_offset]
 	eor		v4.16b, v4.16b, v29.16b
 	next_tweak	v29, v29, v30, v31
 	tbnz		x6, #5, 0f
 
-	ld1		{v5.16b}, [x1], #16
-	str		q29, [sp, #32]
+	ld1		{v5.16b}, [x20], #16
+	str		q29, [sp, #.Lframe_local_offset + 16]
 	eor		v5.16b, v5.16b, v29.16b
 	next_tweak	v29, v29, v30, v31
 	tbnz		x6, #6, 0f
 
-	ld1		{v6.16b}, [x1], #16
-	str		q29, [sp, #48]
+	ld1		{v6.16b}, [x20], #16
+	str		q29, [sp, #.Lframe_local_offset + 32]
 	eor		v6.16b, v6.16b, v29.16b
 	next_tweak	v29, v29, v30, v31
 	tbnz		x6, #7, 0f
 
-	ld1		{v7.16b}, [x1], #16
-	str		q29, [sp, #64]
+	ld1		{v7.16b}, [x20], #16
+	str		q29, [sp, #.Lframe_local_offset + 48]
 	eor		v7.16b, v7.16b, v29.16b
 	next_tweak	v29, v29, v30, v31
 
-0:	mov		bskey, x2
-	mov		rounds, x3
+0:	mov		bskey, x21
+	mov		rounds, x22
 	br		x7
 ENDPROC(__xts_crypt8)
 
 	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
-	stp		x29, x30, [sp, #-80]!
-	mov		x29, sp
+	frame_push	6, 64
 
-	ldr		q30, .Lxts_mul_x
-	ld1		{v25.16b}, [x5]
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+	mov		x24, x5
+
+0:	ldr		q30, .Lxts_mul_x
+	ld1		{v25.16b}, [x24]
 
 99:	adr		x7, \do8
 	bl		__xts_crypt8
 
-	ldp		q16, q17, [sp, #16]
-	ldp		q18, q19, [sp, #48]
+	ldp		q16, q17, [sp, #.Lframe_local_offset]
+	ldp		q18, q19, [sp, #.Lframe_local_offset + 32]
 
 	eor		\o0\().16b, \o0\().16b, v25.16b
 	eor		\o1\().16b, \o1\().16b, v26.16b
 	eor		\o2\().16b, \o2\().16b, v27.16b
 	eor		\o3\().16b, \o3\().16b, v28.16b
 
-	st1		{\o0\().16b}, [x0], #16
+	st1		{\o0\().16b}, [x19], #16
 	mov		v25.16b, v26.16b
 	tbnz		x6, #1, 1f
-	st1		{\o1\().16b}, [x0], #16
+	st1		{\o1\().16b}, [x19], #16
 	mov		v25.16b, v27.16b
 	tbnz		x6, #2, 1f
-	st1		{\o2\().16b}, [x0], #16
+	st1		{\o2\().16b}, [x19], #16
 	mov		v25.16b, v28.16b
 	tbnz		x6, #3, 1f
-	st1		{\o3\().16b}, [x0], #16
+	st1		{\o3\().16b}, [x19], #16
 	mov		v25.16b, v29.16b
 	tbnz		x6, #4, 1f
 
@@ -820,18 +841,22 @@
 	eor		\o6\().16b, \o6\().16b, v18.16b
 	eor		\o7\().16b, \o7\().16b, v19.16b
 
-	st1		{\o4\().16b}, [x0], #16
+	st1		{\o4\().16b}, [x19], #16
 	tbnz		x6, #5, 1f
-	st1		{\o5\().16b}, [x0], #16
+	st1		{\o5\().16b}, [x19], #16
 	tbnz		x6, #6, 1f
-	st1		{\o6\().16b}, [x0], #16
+	st1		{\o6\().16b}, [x19], #16
 	tbnz		x6, #7, 1f
-	st1		{\o7\().16b}, [x0], #16
+	st1		{\o7\().16b}, [x19], #16
 
-	cbnz		x4, 99b
+	cbz		x23, 1f
+	st1		{v25.16b}, [x24]
 
-1:	st1		{v25.16b}, [x5]
-	ldp		x29, x30, [sp], #80
+	cond_yield_neon	0b
+	b		99b
+
+1:	st1		{v25.16b}, [x24]
+	frame_pop
 	ret
 	.endm
 
@@ -856,24 +881,31 @@
 	 *		     int rounds, int blocks, u8 iv[], u8 final[])
 	 */
 ENTRY(aesbs_ctr_encrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	8
 
-	cmp		x6, #0
-	cset		x10, ne
-	add		x4, x4, x10		// do one extra block if final
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+	mov		x24, x5
+	mov		x25, x6
 
-	ldp		x7, x8, [x5]
-	ld1		{v0.16b}, [x5]
+	cmp		x25, #0
+	cset		x26, ne
+	add		x23, x23, x26		// do one extra block if final
+
+98:	ldp		x7, x8, [x24]
+	ld1		{v0.16b}, [x24]
 CPU_LE(	rev		x7, x7		)
 CPU_LE(	rev		x8, x8		)
 	adds		x8, x8, #1
 	adc		x7, x7, xzr
 
 99:	mov		x9, #1
-	lsl		x9, x9, x4
-	subs		w4, w4, #8
-	csel		x4, x4, xzr, pl
+	lsl		x9, x9, x23
+	subs		w23, w23, #8
+	csel		x23, x23, xzr, pl
 	csel		x9, x9, xzr, le
 
 	tbnz		x9, #1, 0f
@@ -891,82 +923,85 @@
 	tbnz		x9, #7, 0f
 	next_ctr	v7
 
-0:	mov		bskey, x2
-	mov		rounds, x3
+0:	mov		bskey, x21
+	mov		rounds, x22
 	bl		aesbs_encrypt8
 
-	lsr		x9, x9, x10		// disregard the extra block
+	lsr		x9, x9, x26		// disregard the extra block
 	tbnz		x9, #0, 0f
 
-	ld1		{v8.16b}, [x1], #16
+	ld1		{v8.16b}, [x20], #16
 	eor		v0.16b, v0.16b, v8.16b
-	st1		{v0.16b}, [x0], #16
+	st1		{v0.16b}, [x19], #16
 	tbnz		x9, #1, 1f
 
-	ld1		{v9.16b}, [x1], #16
+	ld1		{v9.16b}, [x20], #16
 	eor		v1.16b, v1.16b, v9.16b
-	st1		{v1.16b}, [x0], #16
+	st1		{v1.16b}, [x19], #16
 	tbnz		x9, #2, 2f
 
-	ld1		{v10.16b}, [x1], #16
+	ld1		{v10.16b}, [x20], #16
 	eor		v4.16b, v4.16b, v10.16b
-	st1		{v4.16b}, [x0], #16
+	st1		{v4.16b}, [x19], #16
 	tbnz		x9, #3, 3f
 
-	ld1		{v11.16b}, [x1], #16
+	ld1		{v11.16b}, [x20], #16
 	eor		v6.16b, v6.16b, v11.16b
-	st1		{v6.16b}, [x0], #16
+	st1		{v6.16b}, [x19], #16
 	tbnz		x9, #4, 4f
 
-	ld1		{v12.16b}, [x1], #16
+	ld1		{v12.16b}, [x20], #16
 	eor		v3.16b, v3.16b, v12.16b
-	st1		{v3.16b}, [x0], #16
+	st1		{v3.16b}, [x19], #16
 	tbnz		x9, #5, 5f
 
-	ld1		{v13.16b}, [x1], #16
+	ld1		{v13.16b}, [x20], #16
 	eor		v7.16b, v7.16b, v13.16b
-	st1		{v7.16b}, [x0], #16
+	st1		{v7.16b}, [x19], #16
 	tbnz		x9, #6, 6f
 
-	ld1		{v14.16b}, [x1], #16
+	ld1		{v14.16b}, [x20], #16
 	eor		v2.16b, v2.16b, v14.16b
-	st1		{v2.16b}, [x0], #16
+	st1		{v2.16b}, [x19], #16
 	tbnz		x9, #7, 7f
 
-	ld1		{v15.16b}, [x1], #16
+	ld1		{v15.16b}, [x20], #16
 	eor		v5.16b, v5.16b, v15.16b
-	st1		{v5.16b}, [x0], #16
+	st1		{v5.16b}, [x19], #16
 
 8:	next_ctr	v0
-	cbnz		x4, 99b
+	st1		{v0.16b}, [x24]
+	cbz		x23, 0f
 
-0:	st1		{v0.16b}, [x5]
-	ldp		x29, x30, [sp], #16
+	cond_yield_neon	98b
+	b		99b
+
+0:	frame_pop
 	ret
 
 	/*
 	 * If we are handling the tail of the input (x6 != NULL), return the
 	 * final keystream block back to the caller.
 	 */
-1:	cbz		x6, 8b
-	st1		{v1.16b}, [x6]
+1:	cbz		x25, 8b
+	st1		{v1.16b}, [x25]
 	b		8b
-2:	cbz		x6, 8b
-	st1		{v4.16b}, [x6]
+2:	cbz		x25, 8b
+	st1		{v4.16b}, [x25]
 	b		8b
-3:	cbz		x6, 8b
-	st1		{v6.16b}, [x6]
+3:	cbz		x25, 8b
+	st1		{v6.16b}, [x25]
 	b		8b
-4:	cbz		x6, 8b
-	st1		{v3.16b}, [x6]
+4:	cbz		x25, 8b
+	st1		{v3.16b}, [x25]
 	b		8b
-5:	cbz		x6, 8b
-	st1		{v7.16b}, [x6]
+5:	cbz		x25, 8b
+	st1		{v7.16b}, [x25]
 	b		8b
-6:	cbz		x6, 8b
-	st1		{v2.16b}, [x6]
+6:	cbz		x25, 8b
+	st1		{v2.16b}, [x25]
 	b		8b
-7:	cbz		x6, 8b
-	st1		{v5.16b}, [x6]
+7:	cbz		x25, 8b
+	st1		{v5.16b}, [x25]
 	b		8b
 ENDPROC(aesbs_ctr_encrypt)
diff --git a/arch/arm64/crypto/crc32-ce-core.S b/arch/arm64/crypto/crc32-ce-core.S
index 16ed3c7..8061bf0 100644
--- a/arch/arm64/crypto/crc32-ce-core.S
+++ b/arch/arm64/crypto/crc32-ce-core.S
@@ -100,9 +100,10 @@
 	dCONSTANT	.req	d0
 	qCONSTANT	.req	q0
 
-	BUF		.req	x0
-	LEN		.req	x1
-	CRC		.req	x2
+	BUF		.req	x19
+	LEN		.req	x20
+	CRC		.req	x21
+	CONST		.req	x22
 
 	vzr		.req	v9
 
@@ -123,7 +124,14 @@
 ENTRY(crc32c_pmull_le)
 	adr_l		x3, .Lcrc32c_constants
 
-0:	bic		LEN, LEN, #15
+0:	frame_push	4, 64
+
+	mov		BUF, x0
+	mov		LEN, x1
+	mov		CRC, x2
+	mov		CONST, x3
+
+	bic		LEN, LEN, #15
 	ld1		{v1.16b-v4.16b}, [BUF], #0x40
 	movi		vzr.16b, #0
 	fmov		dCONSTANT, CRC
@@ -132,7 +140,7 @@
 	cmp		LEN, #0x40
 	b.lt		less_64
 
-	ldr		qCONSTANT, [x3]
+	ldr		qCONSTANT, [CONST]
 
 loop_64:		/* 64 bytes Full cache line folding */
 	sub		LEN, LEN, #0x40
@@ -162,10 +170,21 @@
 	eor		v4.16b, v4.16b, v8.16b
 
 	cmp		LEN, #0x40
-	b.ge		loop_64
+	b.lt		less_64
+
+	if_will_cond_yield_neon
+	stp		q1, q2, [sp, #.Lframe_local_offset]
+	stp		q3, q4, [sp, #.Lframe_local_offset + 32]
+	do_cond_yield_neon
+	ldp		q1, q2, [sp, #.Lframe_local_offset]
+	ldp		q3, q4, [sp, #.Lframe_local_offset + 32]
+	ldr		qCONSTANT, [CONST]
+	movi		vzr.16b, #0
+	endif_yield_neon
+	b		loop_64
 
 less_64:		/* Folding cache line into 128bit */
-	ldr		qCONSTANT, [x3, #16]
+	ldr		qCONSTANT, [CONST, #16]
 
 	pmull2		v5.1q, v1.2d, vCONSTANT.2d
 	pmull		v1.1q, v1.1d, vCONSTANT.1d
@@ -204,8 +223,8 @@
 	eor		v1.16b, v1.16b, v2.16b
 
 	/* final 32-bit fold */
-	ldr		dCONSTANT, [x3, #32]
-	ldr		d3, [x3, #40]
+	ldr		dCONSTANT, [CONST, #32]
+	ldr		d3, [CONST, #40]
 
 	ext		v2.16b, v1.16b, vzr.16b, #4
 	and		v1.16b, v1.16b, v3.16b
@@ -213,7 +232,7 @@
 	eor		v1.16b, v1.16b, v2.16b
 
 	/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
-	ldr		qCONSTANT, [x3, #48]
+	ldr		qCONSTANT, [CONST, #48]
 
 	and		v2.16b, v1.16b, v3.16b
 	ext		v2.16b, vzr.16b, v2.16b, #8
@@ -223,6 +242,7 @@
 	eor		v1.16b, v1.16b, v2.16b
 	mov		w0, v1.s[1]
 
+	frame_pop
 	ret
 ENDPROC(crc32_pmull_le)
 ENDPROC(crc32c_pmull_le)
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index f179c01..663ea71 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -74,13 +74,19 @@
 	.text
 	.cpu		generic+crypto
 
-	arg1_low32	.req	w0
-	arg2		.req	x1
-	arg3		.req	x2
+	arg1_low32	.req	w19
+	arg2		.req	x20
+	arg3		.req	x21
 
 	vzr		.req	v13
 
 ENTRY(crc_t10dif_pmull)
+	frame_push	3, 128
+
+	mov		arg1_low32, w0
+	mov		arg2, x1
+	mov		arg3, x2
+
 	movi		vzr.16b, #0		// init zero register
 
 	// adjust the 16-bit initial_crc value, scale it to 32 bits
@@ -175,8 +181,25 @@
 	subs		arg3, arg3, #128
 
 	// check if there is another 64B in the buffer to be able to fold
-	b.ge		_fold_64_B_loop
+	b.lt		_fold_64_B_end
 
+	if_will_cond_yield_neon
+	stp		q0, q1, [sp, #.Lframe_local_offset]
+	stp		q2, q3, [sp, #.Lframe_local_offset + 32]
+	stp		q4, q5, [sp, #.Lframe_local_offset + 64]
+	stp		q6, q7, [sp, #.Lframe_local_offset + 96]
+	do_cond_yield_neon
+	ldp		q0, q1, [sp, #.Lframe_local_offset]
+	ldp		q2, q3, [sp, #.Lframe_local_offset + 32]
+	ldp		q4, q5, [sp, #.Lframe_local_offset + 64]
+	ldp		q6, q7, [sp, #.Lframe_local_offset + 96]
+	ldr_l		q10, rk3, x8
+	movi		vzr.16b, #0		// init zero register
+	endif_yield_neon
+
+	b		_fold_64_B_loop
+
+_fold_64_B_end:
 	// at this point, the buffer pointer is pointing at the last y Bytes
 	// of the buffer the 64B of folded data is in 4 of the vector
 	// registers: v0, v1, v2, v3
@@ -304,6 +327,7 @@
 _cleanup:
 	// scale the result back to 16 bits
 	lsr		x0, x0, #16
+	frame_pop
 	ret
 
 _less_than_128:
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 11ebf1a..dcffb9e 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -213,22 +213,31 @@
 	.endm
 
 	.macro		__pmull_ghash, pn
-	ld1		{SHASH.2d}, [x3]
-	ld1		{XL.2d}, [x1]
+	frame_push	5
+
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+
+0:	ld1		{SHASH.2d}, [x22]
+	ld1		{XL.2d}, [x20]
 	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
 	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
 
 	__pmull_pre_\pn
 
 	/* do the head block first, if supplied */
-	cbz		x4, 0f
-	ld1		{T1.2d}, [x4]
-	b		1f
+	cbz		x23, 1f
+	ld1		{T1.2d}, [x23]
+	mov		x23, xzr
+	b		2f
 
-0:	ld1		{T1.2d}, [x2], #16
-	sub		w0, w0, #1
+1:	ld1		{T1.2d}, [x21], #16
+	sub		w19, w19, #1
 
-1:	/* multiply XL by SHASH in GF(2^128) */
+2:	/* multiply XL by SHASH in GF(2^128) */
 CPU_LE(	rev64		T1.16b, T1.16b	)
 
 	ext		T2.16b, XL.16b, XL.16b, #8
@@ -250,9 +259,18 @@
 	eor		T2.16b, T2.16b, XH.16b
 	eor		XL.16b, XL.16b, T2.16b
 
-	cbnz		w0, 0b
+	cbz		w19, 3f
 
-	st1		{XL.2d}, [x1]
+	if_will_cond_yield_neon
+	st1		{XL.2d}, [x20]
+	do_cond_yield_neon
+	b		0b
+	endif_yield_neon
+
+	b		1b
+
+3:	st1		{XL.2d}, [x20]
+	frame_pop
 	ret
 	.endm
 
@@ -304,38 +322,55 @@
 	.endm
 
 	.macro		pmull_gcm_do_crypt, enc
-	ld1		{SHASH.2d}, [x4]
-	ld1		{XL.2d}, [x1]
-	ldr		x8, [x5, #8]			// load lower counter
+	frame_push	10
+
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+	mov		x24, x5
+	mov		x25, x6
+	mov		x26, x7
+	.if		\enc == 1
+	ldr		x27, [sp, #96]			// first stacked arg
+	.endif
+
+	ldr		x28, [x24, #8]			// load lower counter
+CPU_LE(	rev		x28, x28	)
+
+0:	mov		x0, x25
+	load_round_keys	w26, x0
+	ld1		{SHASH.2d}, [x23]
+	ld1		{XL.2d}, [x20]
 
 	movi		MASK.16b, #0xe1
 	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
-CPU_LE(	rev		x8, x8		)
 	shl		MASK.2d, MASK.2d, #57
 	eor		SHASH2.16b, SHASH2.16b, SHASH.16b
 
 	.if		\enc == 1
-	ld1		{KS.16b}, [x7]
+	ld1		{KS.16b}, [x27]
 	.endif
 
-0:	ld1		{CTR.8b}, [x5]			// load upper counter
-	ld1		{INP.16b}, [x3], #16
-	rev		x9, x8
-	add		x8, x8, #1
-	sub		w0, w0, #1
+1:	ld1		{CTR.8b}, [x24]			// load upper counter
+	ld1		{INP.16b}, [x22], #16
+	rev		x9, x28
+	add		x28, x28, #1
+	sub		w19, w19, #1
 	ins		CTR.d[1], x9			// set lower counter
 
 	.if		\enc == 1
 	eor		INP.16b, INP.16b, KS.16b	// encrypt input
-	st1		{INP.16b}, [x2], #16
+	st1		{INP.16b}, [x21], #16
 	.endif
 
 	rev64		T1.16b, INP.16b
 
-	cmp		w6, #12
-	b.ge		2f				// AES-192/256?
+	cmp		w26, #12
+	b.ge		4f				// AES-192/256?
 
-1:	enc_round	CTR, v21
+2:	enc_round	CTR, v21
 
 	ext		T2.16b, XL.16b, XL.16b, #8
 	ext		IN1.16b, T1.16b, T1.16b, #8
@@ -390,27 +425,39 @@
 
 	.if		\enc == 0
 	eor		INP.16b, INP.16b, KS.16b
-	st1		{INP.16b}, [x2], #16
+	st1		{INP.16b}, [x21], #16
 	.endif
 
-	cbnz		w0, 0b
+	cbz		w19, 3f
 
-CPU_LE(	rev		x8, x8		)
-	st1		{XL.2d}, [x1]
-	str		x8, [x5, #8]			// store lower counter
-
+	if_will_cond_yield_neon
+	st1		{XL.2d}, [x20]
 	.if		\enc == 1
-	st1		{KS.16b}, [x7]
+	st1		{KS.16b}, [x27]
+	.endif
+	do_cond_yield_neon
+	b		0b
+	endif_yield_neon
+
+	b		1b
+
+3:	st1		{XL.2d}, [x20]
+	.if		\enc == 1
+	st1		{KS.16b}, [x27]
 	.endif
 
+CPU_LE(	rev		x28, x28	)
+	str		x28, [x24, #8]			// store lower counter
+
+	frame_pop
 	ret
 
-2:	b.eq		3f				// AES-192?
+4:	b.eq		5f				// AES-192?
 	enc_round	CTR, v17
 	enc_round	CTR, v18
-3:	enc_round	CTR, v19
+5:	enc_round	CTR, v19
 	enc_round	CTR, v20
-	b		1b
+	b		2b
 	.endm
 
 	/*
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index cfc9c92..7cf0b1a 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -63,11 +63,12 @@
 
 asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
 				  const u8 src[], struct ghash_key const *k,
-				  u8 ctr[], int rounds, u8 ks[]);
+				  u8 ctr[], u32 const rk[], int rounds,
+				  u8 ks[]);
 
 asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
 				  const u8 src[], struct ghash_key const *k,
-				  u8 ctr[], int rounds);
+				  u8 ctr[], u32 const rk[], int rounds);
 
 asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
 					u32 const rk[], int rounds);
@@ -368,26 +369,29 @@
 		pmull_gcm_encrypt_block(ks, iv, NULL,
 					num_rounds(&ctx->aes_key));
 		put_unaligned_be32(3, iv + GCM_IV_SIZE);
+		kernel_neon_end();
 
-		err = skcipher_walk_aead_encrypt(&walk, req, true);
+		err = skcipher_walk_aead_encrypt(&walk, req, false);
 
 		while (walk.nbytes >= AES_BLOCK_SIZE) {
 			int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
+			kernel_neon_begin();
 			pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
 					  walk.src.virt.addr, &ctx->ghash_key,
-					  iv, num_rounds(&ctx->aes_key), ks);
+					  iv, ctx->aes_key.key_enc,
+					  num_rounds(&ctx->aes_key), ks);
+			kernel_neon_end();
 
 			err = skcipher_walk_done(&walk,
 						 walk.nbytes % AES_BLOCK_SIZE);
 		}
-		kernel_neon_end();
 	} else {
 		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
 				    num_rounds(&ctx->aes_key));
 		put_unaligned_be32(2, iv + GCM_IV_SIZE);
 
-		err = skcipher_walk_aead_encrypt(&walk, req, true);
+		err = skcipher_walk_aead_encrypt(&walk, req, false);
 
 		while (walk.nbytes >= AES_BLOCK_SIZE) {
 			int blocks = walk.nbytes / AES_BLOCK_SIZE;
@@ -467,15 +471,19 @@
 		pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
 					num_rounds(&ctx->aes_key));
 		put_unaligned_be32(2, iv + GCM_IV_SIZE);
+		kernel_neon_end();
 
-		err = skcipher_walk_aead_decrypt(&walk, req, true);
+		err = skcipher_walk_aead_decrypt(&walk, req, false);
 
 		while (walk.nbytes >= AES_BLOCK_SIZE) {
 			int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
+			kernel_neon_begin();
 			pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
 					  walk.src.virt.addr, &ctx->ghash_key,
-					  iv, num_rounds(&ctx->aes_key));
+					  iv, ctx->aes_key.key_enc,
+					  num_rounds(&ctx->aes_key));
+			kernel_neon_end();
 
 			err = skcipher_walk_done(&walk,
 						 walk.nbytes % AES_BLOCK_SIZE);
@@ -483,14 +491,12 @@
 		if (walk.nbytes)
 			pmull_gcm_encrypt_block(iv, iv, NULL,
 						num_rounds(&ctx->aes_key));
-
-		kernel_neon_end();
 	} else {
 		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
 				    num_rounds(&ctx->aes_key));
 		put_unaligned_be32(2, iv + GCM_IV_SIZE);
 
-		err = skcipher_walk_aead_decrypt(&walk, req, true);
+		err = skcipher_walk_aead_decrypt(&walk, req, false);
 
 		while (walk.nbytes >= AES_BLOCK_SIZE) {
 			int blocks = walk.nbytes / AES_BLOCK_SIZE;
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 4604985..78eb35f 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -69,30 +69,36 @@
 	 *			  int blocks)
 	 */
 ENTRY(sha1_ce_transform)
+	frame_push	3
+
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+
 	/* load round constants */
-	loadrc		k0.4s, 0x5a827999, w6
+0:	loadrc		k0.4s, 0x5a827999, w6
 	loadrc		k1.4s, 0x6ed9eba1, w6
 	loadrc		k2.4s, 0x8f1bbcdc, w6
 	loadrc		k3.4s, 0xca62c1d6, w6
 
 	/* load state */
-	ld1		{dgav.4s}, [x0]
-	ldr		dgb, [x0, #16]
+	ld1		{dgav.4s}, [x19]
+	ldr		dgb, [x19, #16]
 
 	/* load sha1_ce_state::finalize */
 	ldr_l		w4, sha1_ce_offsetof_finalize, x4
-	ldr		w4, [x0, x4]
+	ldr		w4, [x19, x4]
 
 	/* load input */
-0:	ld1		{v8.4s-v11.4s}, [x1], #64
-	sub		w2, w2, #1
+1:	ld1		{v8.4s-v11.4s}, [x20], #64
+	sub		w21, w21, #1
 
 CPU_LE(	rev32		v8.16b, v8.16b		)
 CPU_LE(	rev32		v9.16b, v9.16b		)
 CPU_LE(	rev32		v10.16b, v10.16b	)
 CPU_LE(	rev32		v11.16b, v11.16b	)
 
-1:	add		t0.4s, v8.4s, k0.4s
+2:	add		t0.4s, v8.4s, k0.4s
 	mov		dg0v.16b, dgav.16b
 
 	add_update	c, ev, k0,  8,  9, 10, 11, dgb
@@ -123,16 +129,25 @@
 	add		dgbv.2s, dgbv.2s, dg1v.2s
 	add		dgav.4s, dgav.4s, dg0v.4s
 
-	cbnz		w2, 0b
+	cbz		w21, 3f
+
+	if_will_cond_yield_neon
+	st1		{dgav.4s}, [x19]
+	str		dgb, [x19, #16]
+	do_cond_yield_neon
+	b		0b
+	endif_yield_neon
+
+	b		1b
 
 	/*
 	 * Final block: add padding and total bit count.
 	 * Skip if the input size was not a round multiple of the block size,
 	 * the padding is handled by the C code in that case.
 	 */
-	cbz		x4, 3f
+3:	cbz		x4, 4f
 	ldr_l		w4, sha1_ce_offsetof_count, x4
-	ldr		x4, [x0, x4]
+	ldr		x4, [x19, x4]
 	movi		v9.2d, #0
 	mov		x8, #0x80000000
 	movi		v10.2d, #0
@@ -141,10 +156,11 @@
 	mov		x4, #0
 	mov		v11.d[0], xzr
 	mov		v11.d[1], x7
-	b		1b
+	b		2b
 
 	/* store new state */
-3:	st1		{dgav.4s}, [x0]
-	str		dgb, [x0, #16]
+4:	st1		{dgav.4s}, [x19]
+	str		dgb, [x19, #16]
+	frame_pop
 	ret
 ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 4c3c89b..cd8b364 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -79,30 +79,36 @@
 	 */
 	.text
 ENTRY(sha2_ce_transform)
+	frame_push	3
+
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+
 	/* load round constants */
-	adr_l		x8, .Lsha2_rcon
+0:	adr_l		x8, .Lsha2_rcon
 	ld1		{ v0.4s- v3.4s}, [x8], #64
 	ld1		{ v4.4s- v7.4s}, [x8], #64
 	ld1		{ v8.4s-v11.4s}, [x8], #64
 	ld1		{v12.4s-v15.4s}, [x8]
 
 	/* load state */
-	ld1		{dgav.4s, dgbv.4s}, [x0]
+	ld1		{dgav.4s, dgbv.4s}, [x19]
 
 	/* load sha256_ce_state::finalize */
 	ldr_l		w4, sha256_ce_offsetof_finalize, x4
-	ldr		w4, [x0, x4]
+	ldr		w4, [x19, x4]
 
 	/* load input */
-0:	ld1		{v16.4s-v19.4s}, [x1], #64
-	sub		w2, w2, #1
+1:	ld1		{v16.4s-v19.4s}, [x20], #64
+	sub		w21, w21, #1
 
 CPU_LE(	rev32		v16.16b, v16.16b	)
 CPU_LE(	rev32		v17.16b, v17.16b	)
 CPU_LE(	rev32		v18.16b, v18.16b	)
 CPU_LE(	rev32		v19.16b, v19.16b	)
 
-1:	add		t0.4s, v16.4s, v0.4s
+2:	add		t0.4s, v16.4s, v0.4s
 	mov		dg0v.16b, dgav.16b
 	mov		dg1v.16b, dgbv.16b
 
@@ -131,16 +137,24 @@
 	add		dgbv.4s, dgbv.4s, dg1v.4s
 
 	/* handled all input blocks? */
-	cbnz		w2, 0b
+	cbz		w21, 3f
+
+	if_will_cond_yield_neon
+	st1		{dgav.4s, dgbv.4s}, [x19]
+	do_cond_yield_neon
+	b		0b
+	endif_yield_neon
+
+	b		1b
 
 	/*
 	 * Final block: add padding and total bit count.
 	 * Skip if the input size was not a round multiple of the block size,
 	 * the padding is handled by the C code in that case.
 	 */
-	cbz		x4, 3f
+3:	cbz		x4, 4f
 	ldr_l		w4, sha256_ce_offsetof_count, x4
-	ldr		x4, [x0, x4]
+	ldr		x4, [x19, x4]
 	movi		v17.2d, #0
 	mov		x8, #0x80000000
 	movi		v18.2d, #0
@@ -149,9 +163,10 @@
 	mov		x4, #0
 	mov		v19.d[0], xzr
 	mov		v19.d[1], x7
-	b		1b
+	b		2b
 
 	/* store new state */
-3:	st1		{dgav.4s, dgbv.4s}, [x0]
+4:	st1		{dgav.4s, dgbv.4s}, [x19]
+	frame_pop
 	ret
 ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha256-core.S_shipped b/arch/arm64/crypto/sha256-core.S_shipped
index 3ce82cc..7c7ce2e 100644
--- a/arch/arm64/crypto/sha256-core.S_shipped
+++ b/arch/arm64/crypto/sha256-core.S_shipped
@@ -1,3 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// This code is taken from the OpenSSL project but the author (Andy Polyakov)
+// has relicensed it under the GPLv2. Therefore this program is free software;
+// you can redistribute it and/or modify it under the terms of the GNU General
+// Public License version 2 as published by the Free Software Foundation.
+//
+// The original headers, including the original license headers, are
+// included below for completeness.
+
 // Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
 //
 // Licensed under the OpenSSL license (the "License").  You may not use
@@ -10,8 +20,6 @@
 // project. The module is, however, dual licensed under OpenSSL and
 // CRYPTOGAMS licenses depending on where you obtain it. For further
 // details see http://www.openssl.org/~appro/cryptogams/.
-//
-// Permission to use under GPLv2 terms is granted.
 // ====================================================================
 //
 // SHA256/512 for ARMv8.
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
index 332ad75..a7d587f 100644
--- a/arch/arm64/crypto/sha3-ce-core.S
+++ b/arch/arm64/crypto/sha3-ce-core.S
@@ -41,9 +41,16 @@
 	 */
 	.text
 ENTRY(sha3_ce_transform)
-	/* load state */
-	add	x8, x0, #32
-	ld1	{ v0.1d- v3.1d}, [x0]
+	frame_push	4
+
+	mov	x19, x0
+	mov	x20, x1
+	mov	x21, x2
+	mov	x22, x3
+
+0:	/* load state */
+	add	x8, x19, #32
+	ld1	{ v0.1d- v3.1d}, [x19]
 	ld1	{ v4.1d- v7.1d}, [x8], #32
 	ld1	{ v8.1d-v11.1d}, [x8], #32
 	ld1	{v12.1d-v15.1d}, [x8], #32
@@ -51,13 +58,13 @@
 	ld1	{v20.1d-v23.1d}, [x8], #32
 	ld1	{v24.1d}, [x8]
 
-0:	sub	w2, w2, #1
+1:	sub	w21, w21, #1
 	mov	w8, #24
 	adr_l	x9, .Lsha3_rcon
 
 	/* load input */
-	ld1	{v25.8b-v28.8b}, [x1], #32
-	ld1	{v29.8b-v31.8b}, [x1], #24
+	ld1	{v25.8b-v28.8b}, [x20], #32
+	ld1	{v29.8b-v31.8b}, [x20], #24
 	eor	v0.8b, v0.8b, v25.8b
 	eor	v1.8b, v1.8b, v26.8b
 	eor	v2.8b, v2.8b, v27.8b
@@ -66,10 +73,10 @@
 	eor	v5.8b, v5.8b, v30.8b
 	eor	v6.8b, v6.8b, v31.8b
 
-	tbnz	x3, #6, 2f		// SHA3-512
+	tbnz	x22, #6, 3f		// SHA3-512
 
-	ld1	{v25.8b-v28.8b}, [x1], #32
-	ld1	{v29.8b-v30.8b}, [x1], #16
+	ld1	{v25.8b-v28.8b}, [x20], #32
+	ld1	{v29.8b-v30.8b}, [x20], #16
 	eor	 v7.8b,  v7.8b, v25.8b
 	eor	 v8.8b,  v8.8b, v26.8b
 	eor	 v9.8b,  v9.8b, v27.8b
@@ -77,34 +84,34 @@
 	eor	v11.8b, v11.8b, v29.8b
 	eor	v12.8b, v12.8b, v30.8b
 
-	tbnz	x3, #4, 1f		// SHA3-384 or SHA3-224
+	tbnz	x22, #4, 2f		// SHA3-384 or SHA3-224
 
 	// SHA3-256
-	ld1	{v25.8b-v28.8b}, [x1], #32
+	ld1	{v25.8b-v28.8b}, [x20], #32
 	eor	v13.8b, v13.8b, v25.8b
 	eor	v14.8b, v14.8b, v26.8b
 	eor	v15.8b, v15.8b, v27.8b
 	eor	v16.8b, v16.8b, v28.8b
-	b	3f
+	b	4f
 
-1:	tbz	x3, #2, 3f		// bit 2 cleared? SHA-384
+2:	tbz	x22, #2, 4f		// bit 2 cleared? SHA-384
 
 	// SHA3-224
-	ld1	{v25.8b-v28.8b}, [x1], #32
-	ld1	{v29.8b}, [x1], #8
+	ld1	{v25.8b-v28.8b}, [x20], #32
+	ld1	{v29.8b}, [x20], #8
 	eor	v13.8b, v13.8b, v25.8b
 	eor	v14.8b, v14.8b, v26.8b
 	eor	v15.8b, v15.8b, v27.8b
 	eor	v16.8b, v16.8b, v28.8b
 	eor	v17.8b, v17.8b, v29.8b
-	b	3f
+	b	4f
 
 	// SHA3-512
-2:	ld1	{v25.8b-v26.8b}, [x1], #16
+3:	ld1	{v25.8b-v26.8b}, [x20], #16
 	eor	 v7.8b,  v7.8b, v25.8b
 	eor	 v8.8b,  v8.8b, v26.8b
 
-3:	sub	w8, w8, #1
+4:	sub	w8, w8, #1
 
 	eor3	v29.16b,  v4.16b,  v9.16b, v14.16b
 	eor3	v26.16b,  v1.16b,  v6.16b, v11.16b
@@ -183,17 +190,33 @@
 
 	eor	 v0.16b,  v0.16b, v31.16b
 
-	cbnz	w8, 3b
-	cbnz	w2, 0b
+	cbnz	w8, 4b
+	cbz	w21, 5f
+
+	if_will_cond_yield_neon
+	add	x8, x19, #32
+	st1	{ v0.1d- v3.1d}, [x19]
+	st1	{ v4.1d- v7.1d}, [x8], #32
+	st1	{ v8.1d-v11.1d}, [x8], #32
+	st1	{v12.1d-v15.1d}, [x8], #32
+	st1	{v16.1d-v19.1d}, [x8], #32
+	st1	{v20.1d-v23.1d}, [x8], #32
+	st1	{v24.1d}, [x8]
+	do_cond_yield_neon
+	b		0b
+	endif_yield_neon
+
+	b	1b
 
 	/* save state */
-	st1	{ v0.1d- v3.1d}, [x0], #32
-	st1	{ v4.1d- v7.1d}, [x0], #32
-	st1	{ v8.1d-v11.1d}, [x0], #32
-	st1	{v12.1d-v15.1d}, [x0], #32
-	st1	{v16.1d-v19.1d}, [x0], #32
-	st1	{v20.1d-v23.1d}, [x0], #32
-	st1	{v24.1d}, [x0]
+5:	st1	{ v0.1d- v3.1d}, [x19], #32
+	st1	{ v4.1d- v7.1d}, [x19], #32
+	st1	{ v8.1d-v11.1d}, [x19], #32
+	st1	{v12.1d-v15.1d}, [x19], #32
+	st1	{v16.1d-v19.1d}, [x19], #32
+	st1	{v20.1d-v23.1d}, [x19], #32
+	st1	{v24.1d}, [x19]
+	frame_pop
 	ret
 ENDPROC(sha3_ce_transform)
 
diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/crypto/sha512-armv8.pl
index c55efb3..2d8655d 100644
--- a/arch/arm64/crypto/sha512-armv8.pl
+++ b/arch/arm64/crypto/sha512-armv8.pl
@@ -1,4 +1,14 @@
 #! /usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from the OpenSSL project but the author (Andy Polyakov)
+# has relicensed it under the GPLv2. Therefore this program is free software;
+# you can redistribute it and/or modify it under the terms of the GNU General
+# Public License version 2 as published by the Free Software Foundation.
+#
+# The original headers, including the original license headers, are
+# included below for completeness.
+
 # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
@@ -11,8 +21,6 @@
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
 # details see http://www.openssl.org/~appro/cryptogams/.
-#
-# Permission to use under GPLv2 terms is granted.
 # ====================================================================
 #
 # SHA256/512 for ARMv8.
diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S
index 7f3bca5..ce65e3a 100644
--- a/arch/arm64/crypto/sha512-ce-core.S
+++ b/arch/arm64/crypto/sha512-ce-core.S
@@ -107,17 +107,23 @@
 	 */
 	.text
 ENTRY(sha512_ce_transform)
+	frame_push	3
+
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+
 	/* load state */
-	ld1		{v8.2d-v11.2d}, [x0]
+0:	ld1		{v8.2d-v11.2d}, [x19]
 
 	/* load first 4 round constants */
 	adr_l		x3, .Lsha512_rcon
 	ld1		{v20.2d-v23.2d}, [x3], #64
 
 	/* load input */
-0:	ld1		{v12.2d-v15.2d}, [x1], #64
-	ld1		{v16.2d-v19.2d}, [x1], #64
-	sub		w2, w2, #1
+1:	ld1		{v12.2d-v15.2d}, [x20], #64
+	ld1		{v16.2d-v19.2d}, [x20], #64
+	sub		w21, w21, #1
 
 CPU_LE(	rev64		v12.16b, v12.16b	)
 CPU_LE(	rev64		v13.16b, v13.16b	)
@@ -196,9 +202,18 @@
 	add		v11.2d, v11.2d, v3.2d
 
 	/* handled all input blocks? */
-	cbnz		w2, 0b
+	cbz		w21, 3f
+
+	if_will_cond_yield_neon
+	st1		{v8.2d-v11.2d}, [x19]
+	do_cond_yield_neon
+	b		0b
+	endif_yield_neon
+
+	b		1b
 
 	/* store new state */
-3:	st1		{v8.2d-v11.2d}, [x0]
+3:	st1		{v8.2d-v11.2d}, [x19]
+	frame_pop
 	ret
 ENDPROC(sha512_ce_transform)
diff --git a/arch/arm64/crypto/sha512-core.S_shipped b/arch/arm64/crypto/sha512-core.S_shipped
index bd0f59f..e063a61 100644
--- a/arch/arm64/crypto/sha512-core.S_shipped
+++ b/arch/arm64/crypto/sha512-core.S_shipped
@@ -1,3 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// This code is taken from the OpenSSL project but the author (Andy Polyakov)
+// has relicensed it under the GPLv2. Therefore this program is free software;
+// you can redistribute it and/or modify it under the terms of the GNU General
+// Public License version 2 as published by the Free Software Foundation.
+//
+// The original headers, including the original license headers, are
+// included below for completeness.
+
 // Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
 //
 // Licensed under the OpenSSL license (the "License").  You may not use
@@ -10,8 +20,6 @@
 // project. The module is, however, dual licensed under OpenSSL and
 // CRYPTOGAMS licenses depending on where you obtain it. For further
 // details see http://www.openssl.org/~appro/cryptogams/.
-//
-// Permission to use under GPLv2 terms is granted.
 // ====================================================================
 //
 // SHA256/512 for ARMv8.
diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
new file mode 100644
index 0000000..af3bfbc
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8
+	.set		.Lv\b\().4s, \b
+	.endr
+
+	.macro		sm4e, rd, rn
+	.inst		0xcec08400 | .L\rd | (.L\rn << 5)
+	.endm
+
+	/*
+	 * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
+	 */
+	.text
+ENTRY(sm4_ce_do_crypt)
+	ld1		{v8.4s}, [x2]
+	ld1		{v0.4s-v3.4s}, [x0], #64
+CPU_LE(	rev32		v8.16b, v8.16b		)
+	ld1		{v4.4s-v7.4s}, [x0]
+	sm4e		v8.4s, v0.4s
+	sm4e		v8.4s, v1.4s
+	sm4e		v8.4s, v2.4s
+	sm4e		v8.4s, v3.4s
+	sm4e		v8.4s, v4.4s
+	sm4e		v8.4s, v5.4s
+	sm4e		v8.4s, v6.4s
+	sm4e		v8.4s, v7.4s
+	rev64		v8.4s, v8.4s
+	ext		v8.16b, v8.16b, v8.16b, #8
+CPU_LE(	rev32		v8.16b, v8.16b		)
+	st1		{v8.4s}, [x1]
+	ret
+ENDPROC(sm4_ce_do_crypt)
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
new file mode 100644
index 0000000..b7fb527
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/sm4.h>
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/types.h>
+
+MODULE_ALIAS_CRYPTO("sm4");
+MODULE_ALIAS_CRYPTO("sm4-ce");
+MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
+
+static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (!may_use_simd()) {
+		crypto_sm4_encrypt(tfm, out, in);
+	} else {
+		kernel_neon_begin();
+		sm4_ce_do_crypt(ctx->rkey_enc, out, in);
+		kernel_neon_end();
+	}
+}
+
+static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+	const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (!may_use_simd()) {
+		crypto_sm4_decrypt(tfm, out, in);
+	} else {
+		kernel_neon_begin();
+		sm4_ce_do_crypt(ctx->rkey_dec, out, in);
+		kernel_neon_end();
+	}
+}
+
+static struct crypto_alg sm4_ce_alg = {
+	.cra_name			= "sm4",
+	.cra_driver_name		= "sm4-ce",
+	.cra_priority			= 200,
+	.cra_flags			= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize			= SM4_BLOCK_SIZE,
+	.cra_ctxsize			= sizeof(struct crypto_sm4_ctx),
+	.cra_module			= THIS_MODULE,
+	.cra_u.cipher = {
+		.cia_min_keysize	= SM4_KEY_SIZE,
+		.cia_max_keysize	= SM4_KEY_SIZE,
+		.cia_setkey		= crypto_sm4_set_key,
+		.cia_encrypt		= sm4_ce_encrypt,
+		.cia_decrypt		= sm4_ce_decrypt
+	}
+};
+
+static int __init sm4_ce_mod_init(void)
+{
+	return crypto_register_alg(&sm4_ce_alg);
+}
+
+static void __exit sm4_ce_mod_fini(void)
+{
+	crypto_unregister_alg(&sm4_ce_alg);
+}
+
+module_cpu_feature_match(SM3, sm4_ce_mod_init);
+module_exit(sm4_ce_mod_fini);
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index c00c62e..1a037b9 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -34,7 +34,6 @@
 
 typedef u32		compat_size_t;
 typedef s32		compat_ssize_t;
-typedef s32		compat_time_t;
 typedef s32		compat_clock_t;
 typedef s32		compat_pid_t;
 typedef u16		__compat_uid_t;
@@ -66,16 +65,6 @@
 typedef u64		compat_u64;
 typedef u32		compat_uptr_t;
 
-struct compat_timespec {
-	compat_time_t	tv_sec;
-	s32		tv_nsec;
-};
-
-struct compat_timeval {
-	compat_time_t	tv_sec;
-	s32		tv_usec;
-};
-
 struct compat_stat {
 #ifdef __AARCH64EB__
 	short		st_dev;
@@ -192,10 +181,10 @@
 
 struct compat_semid64_ds {
 	struct compat_ipc64_perm sem_perm;
-	compat_time_t  sem_otime;
-	compat_ulong_t __unused1;
-	compat_time_t  sem_ctime;
-	compat_ulong_t __unused2;
+	compat_ulong_t sem_otime;
+	compat_ulong_t sem_otime_high;
+	compat_ulong_t sem_ctime;
+	compat_ulong_t sem_ctime_high;
 	compat_ulong_t sem_nsems;
 	compat_ulong_t __unused3;
 	compat_ulong_t __unused4;
@@ -203,12 +192,12 @@
 
 struct compat_msqid64_ds {
 	struct compat_ipc64_perm msg_perm;
-	compat_time_t  msg_stime;
-	compat_ulong_t __unused1;
-	compat_time_t  msg_rtime;
-	compat_ulong_t __unused2;
-	compat_time_t  msg_ctime;
-	compat_ulong_t __unused3;
+	compat_ulong_t msg_stime;
+	compat_ulong_t msg_stime_high;
+	compat_ulong_t msg_rtime;
+	compat_ulong_t msg_rtime_high;
+	compat_ulong_t msg_ctime;
+	compat_ulong_t msg_ctime_high;
 	compat_ulong_t msg_cbytes;
 	compat_ulong_t msg_qnum;
 	compat_ulong_t msg_qbytes;
@@ -221,12 +210,12 @@
 struct compat_shmid64_ds {
 	struct compat_ipc64_perm shm_perm;
 	compat_size_t  shm_segsz;
-	compat_time_t  shm_atime;
-	compat_ulong_t __unused1;
-	compat_time_t  shm_dtime;
-	compat_ulong_t __unused2;
-	compat_time_t  shm_ctime;
-	compat_ulong_t __unused3;
+	compat_ulong_t shm_atime;
+	compat_ulong_t shm_atime_high;
+	compat_ulong_t shm_dtime;
+	compat_ulong_t shm_dtime_high;
+	compat_ulong_t shm_ctime;
+	compat_ulong_t shm_ctime_high;
 	compat_pid_t   shm_cpid;
 	compat_pid_t   shm_lpid;
 	compat_ulong_t shm_nattch;
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
index 8747f7c..9e69068 100644
--- a/arch/arm64/include/asm/pci.h
+++ b/arch/arm64/include/asm/pci.h
@@ -18,11 +18,6 @@
 #define pcibios_assign_all_busses() \
 	(pci_has_flag(PCI_REASSIGN_ALL_BUS))
 
-/*
- * PCI address space differs from physical memory address space
- */
-#define PCI_DMA_BUS_IS_PHYS	(0)
-
 #define ARCH_GENERIC_PCI_MMAP_RESOURCE	1
 
 extern int isa_dma_bridge_buggy;
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index ebdae15..26c5bd7 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -122,11 +122,6 @@
 
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	/*
-	 * Ensure prior spin_lock operations to other locks have completed
-	 * on this CPU before we test whether "lock" is locked.
-	 */
-	smp_mb(); /* ^^^ */
 	return !arch_spin_value_unlocked(READ_ONCE(*lock));
 }
 
diff --git a/arch/arm64/include/asm/stat.h b/arch/arm64/include/asm/stat.h
index 15e3559..eab7380 100644
--- a/arch/arm64/include/asm/stat.h
+++ b/arch/arm64/include/asm/stat.h
@@ -20,6 +20,7 @@
 
 #ifdef CONFIG_COMPAT
 
+#include <linux/compat_time.h>
 #include <asm/compat.h>
 
 /*
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 87a3536..4bcdd03 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -882,7 +882,7 @@
 			si_code = FPE_FLTRES;
 	}
 
-	memset(&info, 0, sizeof(info));
+	clear_siginfo(&info);
 	info.si_signo = SIGFPE;
 	info.si_code = si_code;
 	info.si_addr = (void __user *)instruction_pointer(regs);
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 74bb56f..413dbe5 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -30,7 +30,6 @@
 #include <linux/smp.h>
 #include <linux/uaccess.h>
 
-#include <asm/compat.h>
 #include <asm/current.h>
 #include <asm/debug-monitors.h>
 #include <asm/hw_breakpoint.h>
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
index 1d091d0..0bbac61 100644
--- a/arch/arm64/kernel/perf_regs.c
+++ b/arch/arm64/kernel/perf_regs.c
@@ -1,11 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/compat.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/perf_event.h>
 #include <linux/bug.h>
 #include <linux/sched/task_stack.h>
 
-#include <asm/compat.h>
 #include <asm/perf_regs.h>
 #include <asm/ptrace.h>
 
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 93ab57d..a610982 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -112,6 +112,7 @@
 		break;
 	}
 
+	clear_siginfo(&info);
 	info.si_signo = SIGILL;
 	info.si_errno = 0;
 	info.si_code  = ILL_ILLTRP;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8bbdc17..d399d45 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -635,6 +635,7 @@
 	siginfo_t info;
 	void __user *pc = (void __user *)instruction_pointer(regs);
 
+	clear_siginfo(&info);
 	info.si_signo = SIGILL;
 	info.si_errno = 0;
 	info.si_code  = ILL_ILLOPC;
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a96ec01..db01f27 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -508,16 +508,6 @@
 }
 arch_initcall(arm64_dma_init);
 
-#define PREALLOC_DMA_DEBUG_ENTRIES	4096
-
-static int __init dma_debug_do_init(void)
-{
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-	return 0;
-}
-fs_initcall(dma_debug_do_init);
-
-
 #ifdef CONFIG_IOMMU_DMA
 #include <linux/dma-iommu.h>
 #include <linux/platform_device.h>
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 2af3dd8..576f151 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -356,11 +356,12 @@
 	 */
 	if (user_mode(regs)) {
 		const struct fault_info *inf = esr_to_fault_info(esr);
-		struct siginfo si = {
-			.si_signo	= inf->sig,
-			.si_code	= inf->code,
-			.si_addr	= (void __user *)addr,
-		};
+		struct siginfo si;
+
+		clear_siginfo(&si);
+		si.si_signo	= inf->sig;
+		si.si_code	= inf->code;
+		si.si_addr	= (void __user *)addr;
 
 		__do_user_fault(&si, esr);
 	} else {
@@ -634,6 +635,7 @@
 			nmi_exit();
 	}
 
+	clear_siginfo(&info);
 	info.si_signo = inf->sig;
 	info.si_errno = 0;
 	info.si_code  = inf->code;
@@ -738,6 +740,7 @@
 		show_pte(addr);
 	}
 
+	clear_siginfo(&info);
 	info.si_signo = inf->sig;
 	info.si_errno = 0;
 	info.si_code  = inf->code;
@@ -780,6 +783,7 @@
 		local_irq_enable();
 	}
 
+	clear_siginfo(&info);
 	info.si_signo = SIGBUS;
 	info.si_errno = 0;
 	info.si_code  = BUS_ADRALN;
@@ -823,7 +827,6 @@
 					      struct pt_regs *regs)
 {
 	const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
-	struct siginfo info;
 	int rv;
 
 	/*
@@ -839,6 +842,9 @@
 	if (!inf->fn(addr, esr, regs)) {
 		rv = 1;
 	} else {
+		struct siginfo info;
+
+		clear_siginfo(&info);
 		info.si_signo = inf->sig;
 		info.si_errno = 0;
 		info.si_code  = inf->code;
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index c6b4dd1..bf598556 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -6,11 +6,13 @@
 
 config C6X
 	def_bool y
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select CLKDEV_LOOKUP
+	select DMA_NONCOHERENT_OPS
 	select GENERIC_ATOMIC64
 	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_DMA_API_DEBUG
 	select HAVE_MEMBLOCK
 	select SPARSE_IRQ
 	select IRQ_DOMAIN
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index fd4c840..33a2c94 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -1,10 +1,12 @@
 generic-y += atomic.h
 generic-y += barrier.h
 generic-y += bugs.h
+generic-y += compat.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += dma.h
+generic-y += dma-mapping.h
 generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += extable.h
diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h
deleted file mode 100644
index 05daf10..0000000
--- a/arch/c6x/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- *  Port on Texas Instruments TMS320C6x architecture
- *
- *  Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated
- *  Author: Aurelien Jacquiot <aurelien.jacquiot@ti.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- */
-#ifndef _ASM_C6X_DMA_MAPPING_H
-#define _ASM_C6X_DMA_MAPPING_H
-
-extern const struct dma_map_ops c6x_dma_ops;
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
-	return &c6x_dma_ops;
-}
-
-extern void coherent_mem_init(u32 start, u32 size);
-void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-		gfp_t gfp, unsigned long attrs);
-void c6x_dma_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_handle, unsigned long attrs);
-
-#endif	/* _ASM_C6X_DMA_MAPPING_H */
diff --git a/arch/c6x/include/asm/setup.h b/arch/c6x/include/asm/setup.h
index 852afb2..350f34d 100644
--- a/arch/c6x/include/asm/setup.h
+++ b/arch/c6x/include/asm/setup.h
@@ -28,5 +28,7 @@
 extern void machine_init(unsigned long dt_ptr);
 extern void time_init(void);
 
+extern void coherent_mem_init(u32 start, u32 size);
+
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASM_C6X_SETUP_H */
diff --git a/arch/c6x/kernel/Makefile b/arch/c6x/kernel/Makefile
index 02f340d..fbe7417 100644
--- a/arch/c6x/kernel/Makefile
+++ b/arch/c6x/kernel/Makefile
@@ -8,6 +8,6 @@
 obj-y := process.o traps.o irq.o signal.o ptrace.o
 obj-y += setup.o sys_c6x.o time.o devicetree.o
 obj-y += switch_to.o entry.o vectors.o c6x_ksyms.o
-obj-y += soc.o dma.o
+obj-y += soc.o
 
 obj-$(CONFIG_MODULES)           += module.o
diff --git a/arch/c6x/kernel/dma.c b/arch/c6x/kernel/dma.c
deleted file mode 100644
index 9fff8be..0000000
--- a/arch/c6x/kernel/dma.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *  Copyright (C) 2011 Texas Instruments Incorporated
- *  Author: Mark Salter <msalter@redhat.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/dma-mapping.h>
-#include <linux/mm.h>
-#include <linux/mm_types.h>
-#include <linux/scatterlist.h>
-
-#include <asm/cacheflush.h>
-
-static void c6x_dma_sync(dma_addr_t handle, size_t size,
-			 enum dma_data_direction dir)
-{
-	unsigned long paddr = handle;
-
-	BUG_ON(!valid_dma_direction(dir));
-
-	switch (dir) {
-	case DMA_FROM_DEVICE:
-		L2_cache_block_invalidate(paddr, paddr + size);
-		break;
-	case DMA_TO_DEVICE:
-		L2_cache_block_writeback(paddr, paddr + size);
-		break;
-	case DMA_BIDIRECTIONAL:
-		L2_cache_block_writeback_invalidate(paddr, paddr + size);
-		break;
-	default:
-		break;
-	}
-}
-
-static dma_addr_t c6x_dma_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t size, enum dma_data_direction dir,
-		unsigned long attrs)
-{
-	dma_addr_t handle = virt_to_phys(page_address(page) + offset);
-
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		c6x_dma_sync(handle, size, dir);
-
-	return handle;
-}
-
-static void c6x_dma_unmap_page(struct device *dev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		c6x_dma_sync(handle, size, dir);
-}
-
-static int c6x_dma_map_sg(struct device *dev, struct scatterlist *sglist,
-		int nents, enum dma_data_direction dir, unsigned long attrs)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sglist, sg, nents, i) {
-		sg->dma_address = sg_phys(sg);
-		if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-			c6x_dma_sync(sg->dma_address, sg->length, dir);
-	}
-
-	return nents;
-}
-
-static void c6x_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
-		  int nents, enum dma_data_direction dir, unsigned long attrs)
-{
-	struct scatterlist *sg;
-	int i;
-
-	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
-		return;
-
-	for_each_sg(sglist, sg, nents, i)
-		c6x_dma_sync(sg_dma_address(sg), sg->length, dir);
-}
-
-static void c6x_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir)
-{
-	c6x_dma_sync(handle, size, dir);
-
-}
-
-static void c6x_dma_sync_single_for_device(struct device *dev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	c6x_dma_sync(handle, size, dir);
-
-}
-
-static void c6x_dma_sync_sg_for_cpu(struct device *dev,
-		struct scatterlist *sglist, int nents,
-		enum dma_data_direction dir)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sglist, sg, nents, i)
-		c6x_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
-					sg->length, dir);
-
-}
-
-static void c6x_dma_sync_sg_for_device(struct device *dev,
-		struct scatterlist *sglist, int nents,
-		enum dma_data_direction dir)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sglist, sg, nents, i)
-		c6x_dma_sync_single_for_device(dev, sg_dma_address(sg),
-					   sg->length, dir);
-
-}
-
-const struct dma_map_ops c6x_dma_ops = {
-	.alloc			= c6x_dma_alloc,
-	.free			= c6x_dma_free,
-	.map_page		= c6x_dma_map_page,
-	.unmap_page		= c6x_dma_unmap_page,
-	.map_sg			= c6x_dma_map_sg,
-	.unmap_sg		= c6x_dma_unmap_sg,
-	.sync_single_for_device	= c6x_dma_sync_single_for_device,
-	.sync_single_for_cpu	= c6x_dma_sync_single_for_cpu,
-	.sync_sg_for_device	= c6x_dma_sync_sg_for_device,
-	.sync_sg_for_cpu	= c6x_dma_sync_sg_for_cpu,
-};
-EXPORT_SYMBOL(c6x_dma_ops);
-
-/* Number of entries preallocated for DMA-API debugging */
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init dma_init(void)
-{
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-
-	return 0;
-}
-fs_initcall(dma_init);
diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c
index 4c1d4b8..5c60aea 100644
--- a/arch/c6x/kernel/traps.c
+++ b/arch/c6x/kernel/traps.c
@@ -244,7 +244,6 @@
 static void do_trap(struct exception_info *except_info, struct pt_regs *regs)
 {
 	unsigned long addr = instruction_pointer(regs);
-	siginfo_t info;
 
 	if (except_info->code != TRAP_BRKPT)
 		pr_err("TRAP: %s PC[0x%lx] signo[%d] code[%d]\n",
@@ -253,12 +252,8 @@
 
 	die_if_kernel(except_info->kernel_str, regs, addr);
 
-	info.si_signo = except_info->signo;
-	info.si_errno = 0;
-	info.si_code  = except_info->code;
-	info.si_addr  = (void __user *)addr;
-
-	force_sig_info(except_info->signo, &info, current);
+	force_sig_fault(except_info->signo, except_info->code,
+			(void __user *)addr, current);
 }
 
 /*
diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c
index 95e38ad..d0a8e0c 100644
--- a/arch/c6x/mm/dma-coherent.c
+++ b/arch/c6x/mm/dma-coherent.c
@@ -19,10 +19,12 @@
 #include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/memblock.h>
 
+#include <asm/cacheflush.h>
 #include <asm/page.h>
+#include <asm/setup.h>
 
 /*
  * DMA coherent memory management, can be redefined using the memdma=
@@ -73,7 +75,7 @@
  * Allocate DMA coherent memory space and return both the kernel
  * virtual and DMA address for that space.
  */
-void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		gfp_t gfp, unsigned long attrs)
 {
 	u32 paddr;
@@ -98,7 +100,7 @@
 /*
  * Free DMA coherent memory as defined by the above mapping.
  */
-void c6x_dma_free(struct device *dev, size_t size, void *vaddr,
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 		dma_addr_t dma_handle, unsigned long attrs)
 {
 	int order;
@@ -139,3 +141,35 @@
 	dma_bitmap = phys_to_virt(bitmap_phys);
 	memset(dma_bitmap, 0, dma_pages * PAGE_SIZE);
 }
+
+static void c6x_dma_sync(struct device *dev, phys_addr_t paddr, size_t size,
+		enum dma_data_direction dir)
+{
+	BUG_ON(!valid_dma_direction(dir));
+
+	switch (dir) {
+	case DMA_FROM_DEVICE:
+		L2_cache_block_invalidate(paddr, paddr + size);
+		break;
+	case DMA_TO_DEVICE:
+		L2_cache_block_writeback(paddr, paddr + size);
+		break;
+	case DMA_BIDIRECTIONAL:
+		L2_cache_block_writeback_invalidate(paddr, paddr + size);
+		break;
+	default:
+		break;
+	}
+}
+
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
+{
+	return c6x_dma_sync(dev, paddr, size, dir);
+}
+
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
+{
+	return c6x_dma_sync(dev, paddr, size, dir);
+}
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 14bac06..a5d0b29 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -3,6 +3,7 @@
 generic-y += bugs.h
 generic-y += cacheflush.h
 generic-y += checksum.h
+generic-y += compat.h
 generic-y += current.h
 generic-y += delay.h
 generic-y += device.h
diff --git a/arch/h8300/include/asm/pci.h b/arch/h8300/include/asm/pci.h
index 7c9e55d..d4d345a 100644
--- a/arch/h8300/include/asm/pci.h
+++ b/arch/h8300/include/asm/pci.h
@@ -15,6 +15,4 @@
 	/* We don't do dynamic PCI IRQ allocation */
 }
 
-#define PCI_DMA_BUS_IS_PHYS	(1)
-
 #endif /* _ASM_H8300_PCI_H */
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 76d2f20..37adb20 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -19,6 +19,7 @@
 	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
+	select NEED_SG_DMA_LENGTH
 	select NO_IOPORT_MAP
 	select GENERIC_IOMAP
 	select GENERIC_SMP_IDLE_THREAD
@@ -63,9 +64,6 @@
 config GENERIC_IRQ_PROBE
 	def_bool y
 
-config NEED_SG_DMA_LENGTH
-	def_bool y
-
 config RWSEM_GENERIC_SPINLOCK
 	def_bool n
 
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index e9743f6..dd2fd9c 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -2,6 +2,7 @@
 generic-y += barrier.h
 generic-y += bug.h
 generic-y += bugs.h
+generic-y += compat.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
index ad8347c..77459df 100644
--- a/arch/hexagon/kernel/dma.c
+++ b/arch/hexagon/kernel/dma.c
@@ -208,7 +208,6 @@
 	.sync_single_for_cpu = hexagon_sync_single_for_cpu,
 	.sync_single_for_device = hexagon_sync_single_for_device,
 	.mapping_error	= hexagon_mapping_error,
-	.is_phys	= 1,
 };
 
 void __init hexagon_dma_init(void)
diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 2942a92..91ee048 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c
@@ -412,10 +412,6 @@
 	case TRAP_DEBUG:
 		/* Trap0 0xdb is debug breakpoint */
 		if (user_mode(regs)) {
-			struct siginfo info;
-
-			info.si_signo = SIGTRAP;
-			info.si_errno = 0;
 			/*
 			 * Some architecures add some per-thread state
 			 * to distinguish between breakpoint traps and
@@ -423,9 +419,8 @@
 			 * set the si_code value appropriately, or we
 			 * may want to use a different trap0 flavor.
 			 */
-			info.si_code = TRAP_BRKPT;
-			info.si_addr = (void __user *) pt_elr(regs);
-			force_sig_info(SIGTRAP, &info, current);
+			force_sig_fault(SIGTRAP, TRAP_BRKPT,
+					(void __user *) pt_elr(regs), current);
 		} else {
 #ifdef CONFIG_KGDB
 			kgdb_handle_exception(pt_cause(regs), SIGTRAP,
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 3eec33c..933bbce 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -50,7 +50,7 @@
 {
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
-	siginfo_t info;
+	int si_signo;
 	int si_code = SEGV_MAPERR;
 	int fault;
 	const struct exception_table_entry *fixup;
@@ -140,28 +140,22 @@
 	 * unable to fix up the page fault.
 	 */
 	if (fault & VM_FAULT_SIGBUS) {
-		info.si_signo = SIGBUS;
-		info.si_code = BUS_ADRERR;
+		si_signo = SIGBUS;
+		si_code = BUS_ADRERR;
 	}
 	/* Address is not in the memory map */
 	else {
-		info.si_signo = SIGSEGV;
-		info.si_code = SEGV_ACCERR;
+		si_signo = SIGSEGV;
+		si_code  = SEGV_ACCERR;
 	}
-	info.si_errno = 0;
-	info.si_addr = (void __user *)address;
-	force_sig_info(info.si_signo, &info, current);
+	force_sig_fault(si_signo, si_code, (void __user *)address, current);
 	return;
 
 bad_area:
 	up_read(&mm->mmap_sem);
 
 	if (user_mode(regs)) {
-		info.si_signo = SIGSEGV;
-		info.si_errno = 0;
-		info.si_code = si_code;
-		info.si_addr = (void *)address;
-		force_sig_info(info.si_signo, &info, current);
+		force_sig_fault(SIGSEGV, si_code, (void __user *)address, current);
 		return;
 	}
 	/* Kernel-mode fault falls through */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index bbe12a0..792437d 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -29,7 +29,6 @@
 	select HAVE_FUNCTION_TRACER
 	select TTY
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_DMA_API_DEBUG
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_VIRT_CPU_ACCOUNTING
@@ -54,6 +53,8 @@
 	select MODULES_USE_ELF_RELA
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select HAVE_ARCH_AUDITSYSCALL
+	select NEED_DMA_MAP_STATE
+	select NEED_SG_DMA_LENGTH
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
@@ -78,18 +79,6 @@
 	bool
 	default y
 
-config ARCH_DMA_ADDR_T_64BIT
-	def_bool y
-
-config NEED_DMA_MAP_STATE
-	def_bool y
-
-config NEED_SG_DMA_LENGTH
-	def_bool y
-
-config SWIOTLB
-       bool
-
 config STACKTRACE_SUPPORT
 	def_bool y
 
@@ -146,7 +135,6 @@
 	bool "generic"
 	select NUMA
 	select ACPI_NUMA
-	select DMA_DIRECT_OPS
 	select SWIOTLB
 	select PCI_MSI
 	help
@@ -167,7 +155,6 @@
 
 config IA64_DIG
 	bool "DIG-compliant"
-	select DMA_DIRECT_OPS
 	select SWIOTLB
 
 config IA64_DIG_VTD
@@ -183,7 +170,6 @@
 
 config IA64_HP_ZX1_SWIOTLB
 	bool "HP-zx1/sx1000 with software I/O TLB"
-	select DMA_DIRECT_OPS
 	select SWIOTLB
 	help
 	  Build a kernel that runs on HP zx1 and sx1000 systems even when they
@@ -207,7 +193,6 @@
 	bool "SGI-UV"
 	select NUMA
 	select ACPI_NUMA
-	select DMA_DIRECT_OPS
 	select SWIOTLB
 	help
 	  Selecting this option will optimize the kernel for use on UV based
@@ -218,7 +203,6 @@
 
 config IA64_HP_SIM
 	bool "Ski-simulator"
-	select DMA_DIRECT_OPS
 	select SWIOTLB
 	depends on !PM
 
@@ -397,7 +381,7 @@
 	  Say Y to support efficient handling of discontiguous physical memory,
 	  for architectures which are either NUMA (Non-Uniform Memory Access)
 	  or have huge holes in the physical address space for other reasons.
- 	  See <file:Documentation/vm/numa> for more.
+	  See <file:Documentation/vm/numa.rst> for more.
 
 config ARCH_FLATMEM_ENABLE
 	def_bool y
@@ -613,6 +597,3 @@
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
-
-config IOMMU_HELPER
-	def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 2dd7f51..45f5980 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -18,7 +18,7 @@
 
 export AWK
 
-CHECKFLAGS	+= -m64 -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
+CHECKFLAGS	+= -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
 
 OBJCOPYFLAGS	:= --strip-all
 LDFLAGS_vmlinux	:= -static
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index aec4a33..ee5b652 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1845,9 +1845,6 @@
 	ioc_resource_init(ioc);
 	ioc_sac_init(ioc);
 
-	if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask)
-		ia64_max_iommu_merge_mask = ~iovp_mask;
-
 	printk(KERN_INFO PFX
 		"%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n",
 		ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF,
@@ -1942,19 +1939,6 @@
 	.show  = ioc_show
 };
 
-static int
-ioc_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ioc_seq_ops);
-}
-
-static const struct file_operations ioc_fops = {
-	.open    = ioc_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release
-};
-
 static void __init
 ioc_proc_init(void)
 {
@@ -1964,7 +1948,7 @@
 	if (!dir)
 		return;
 
-	proc_create(ioc_list->name, 0, dir, &ioc_fops);
+	proc_create_seq(ioc_list->name, 0, dir, &ioc_seq_ops);
 }
 #endif
 
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index a419ccf..663388a 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -435,19 +435,6 @@
 	return 0;
 }
 
-static int rs_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, rs_proc_show, NULL);
-}
-
-static const struct file_operations rs_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= rs_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static const struct tty_operations hp_ops = {
 	.open = rs_open,
 	.close = rs_close,
@@ -462,7 +449,7 @@
 	.unthrottle = rs_unthrottle,
 	.send_xchar = rs_send_xchar,
 	.hangup = rs_hangup,
-	.proc_fops = &rs_proc_fops,
+	.proc_show = rs_proc_show,
 };
 
 static const struct tty_port_operations hp_port_ops = {
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 6dd8678..557bbc8 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -1,3 +1,4 @@
+generic-y += compat.h
 generic-y += exec.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h
index bdc4669..ccde7c2 100644
--- a/arch/ia64/include/asm/hardirq.h
+++ b/arch/ia64/include/asm/hardirq.h
@@ -13,7 +13,7 @@
 
 #define __ARCH_IRQ_STAT	1
 
-#define local_softirq_pending()		(local_cpu_data->softirq_pending)
+#define local_softirq_pending_ref	ia64_cpu_info.softirq_pending
 
 #include <linux/threads.h>
 #include <linux/irq.h>
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index b1d04e8..780e874 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -30,23 +30,6 @@
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0x10000000
 
-/*
- * PCI_DMA_BUS_IS_PHYS should be set to 1 if there is _necessarily_ a direct
- * correspondence between device bus addresses and CPU physical addresses.
- * Platforms with a hardware I/O MMU _must_ turn this off to suppress the
- * bounce buffer handling code in the block and network device layers.
- * Platforms with separate bus address spaces _must_ turn this off and provide
- * a device DMA mapping implementation that takes care of the necessary
- * address translation.
- *
- * For now, the ia64 platforms which may have separate/multiple bus address
- * spaces all have I/O MMUs which support the merging of physically
- * discontiguous buffers, so we can use that as the sole factor to determine
- * the setting of PCI_DMA_BUS_IS_PHYS.
- */
-extern unsigned long ia64_max_iommu_merge_mask;
-#define PCI_DMA_BUS_IS_PHYS	(ia64_max_iommu_merge_mask == ~0UL)
-
 #define HAVE_PCI_MMAP
 #define ARCH_GENERIC_PCI_MMAP_RESOURCE
 #define arch_can_pci_mmap_wc()	1
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
index c0527cf..3982e67 100644
--- a/arch/ia64/include/uapi/asm/Kbuild
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -2,5 +2,9 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += bpf_perf_event.h
+generic-y += ipcbuf.h
 generic-y += kvm_para.h
+generic-y += msgbuf.h
 generic-y += poll.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
diff --git a/arch/ia64/include/uapi/asm/ipcbuf.h b/arch/ia64/include/uapi/asm/ipcbuf.h
deleted file mode 100644
index 90d6445..0000000
--- a/arch/ia64/include/uapi/asm/ipcbuf.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/ipcbuf.h>
diff --git a/arch/ia64/include/uapi/asm/msgbuf.h b/arch/ia64/include/uapi/asm/msgbuf.h
deleted file mode 100644
index aa25df9..0000000
--- a/arch/ia64/include/uapi/asm/msgbuf.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_IA64_MSGBUF_H
-#define _ASM_IA64_MSGBUF_H
-
-/*
- * The msqid64_ds structure for IA-64 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 2 miscellaneous 64-bit values
- */
-
-struct msqid64_ds {
-	struct ipc64_perm msg_perm;
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	__kernel_time_t msg_ctime;	/* last change time */
-	unsigned long  msg_cbytes;	/* current number of bytes on queue */
-	unsigned long  msg_qnum;	/* number of messages in queue */
-	unsigned long  msg_qbytes;	/* max number of bytes on queue */
-	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
-	__kernel_pid_t msg_lrpid;	/* last receive pid */
-	unsigned long  __unused1;
-	unsigned long  __unused2;
-};
-
-#endif /* _ASM_IA64_MSGBUF_H */
diff --git a/arch/ia64/include/uapi/asm/sembuf.h b/arch/ia64/include/uapi/asm/sembuf.h
deleted file mode 100644
index 6ed0587..0000000
--- a/arch/ia64/include/uapi/asm/sembuf.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_IA64_SEMBUF_H
-#define _ASM_IA64_SEMBUF_H
-
-/*
- * The semid64_ds structure for IA-64 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 2 miscellaneous 64-bit values
- */
-
-struct semid64_ds {
-	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
-	__kernel_time_t	sem_otime;		/* last semop time */
-	__kernel_time_t	sem_ctime;		/* last change time */
-	unsigned long	sem_nsems;		/* no. of semaphores in array */
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-};
-
-#endif /* _ASM_IA64_SEMBUF_H */
diff --git a/arch/ia64/include/uapi/asm/shmbuf.h b/arch/ia64/include/uapi/asm/shmbuf.h
deleted file mode 100644
index 6ef57cb..0000000
--- a/arch/ia64/include/uapi/asm/shmbuf.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_IA64_SHMBUF_H
-#define _ASM_IA64_SHMBUF_H
-
-/*
- * The shmid64_ds structure for IA-64 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 2 miscellaneous 64-bit values
- */
-
-struct shmid64_ds {
-	struct ipc64_perm	shm_perm;	/* operation perms */
-	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_time_t		shm_atime;	/* last attach time */
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	__kernel_time_t		shm_ctime;	/* last change time */
-	__kernel_pid_t		shm_cpid;	/* pid of creator */
-	__kernel_pid_t		shm_lpid;	/* pid of last operator */
-	unsigned long		shm_nattch;	/* no. of current attaches */
-	unsigned long		__unused1;
-	unsigned long		__unused2;
-};
-
-struct shminfo64 {
-	unsigned long	shmmax;
-	unsigned long	shmmin;
-	unsigned long	shmmni;
-	unsigned long	shmseg;
-	unsigned long	shmall;
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-};
-
-#endif /* _ASM_IA64_SHMBUF_H */
diff --git a/arch/ia64/include/uapi/asm/siginfo.h b/arch/ia64/include/uapi/asm/siginfo.h
index 5aa454e..52b5af4 100644
--- a/arch/ia64/include/uapi/asm/siginfo.h
+++ b/arch/ia64/include/uapi/asm/siginfo.h
@@ -27,11 +27,4 @@
 #define __ISR_VALID_BIT	0
 #define __ISR_VALID	(1 << __ISR_VALID_BIT)
 
-/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME	0	/* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
 #endif /* _UAPI_ASM_IA64_SIGINFO_H */
diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c
index 9bcc908..a61f6c6 100644
--- a/arch/ia64/kernel/brl_emu.c
+++ b/arch/ia64/kernel/brl_emu.c
@@ -62,6 +62,7 @@
 	struct illegal_op_return rv;
 	long tmp_taken, unimplemented_address;
 
+	clear_siginfo(&siginfo);
 	rv.fkt = (unsigned long) -1;
 
 	/*
diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c
index f2d57e6..7a471d8 100644
--- a/arch/ia64/kernel/dma-mapping.c
+++ b/arch/ia64/kernel/dma-mapping.c
@@ -9,16 +9,6 @@
 const struct dma_map_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init dma_init(void)
-{
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-
-	return 0;
-}
-fs_initcall(dma_init);
-
 const struct dma_map_ops *dma_get_ops(struct device *dev)
 {
 	return dma_ops;
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index b6e5978..f4a9424 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -920,18 +920,6 @@
 	return 0;
 }
 
-static int proc_palinfo_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_palinfo_show, PDE_DATA(inode));
-}
-
-static const struct file_operations proc_palinfo_fops = {
-	.open		= proc_palinfo_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int palinfo_add_proc(unsigned int cpu)
 {
 	pal_func_cpu_u_t f;
@@ -948,8 +936,8 @@
 
 	for (j=0; j < NR_PALINFO_ENTRIES; j++) {
 		f.func_id = j;
-		proc_create_data(palinfo_entries[j].name, 0, cpu_dir,
-				 &proc_palinfo_fops, (void *)f.value);
+		proc_create_single_data(palinfo_entries[j].name, 0, cpu_dir,
+				proc_palinfo_show, (void *)f.value);
 	}
 	return 0;
 }
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 8fb280e..3b38c71 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -5708,13 +5708,6 @@
  	.show =		pfm_proc_show
 };
 
-static int
-pfm_proc_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &pfm_seq_ops);
-}
-
-
 /*
  * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
  * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
@@ -6537,13 +6530,6 @@
 	return 0;
 }
 
-static const struct file_operations pfm_proc_fops = {
-	.open		= pfm_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 int __init
 pfm_init(void)
 {
@@ -6615,7 +6601,7 @@
 	/*
 	 * create /proc/perfmon (mostly for debugging purposes)
 	 */
-	perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops);
+	perfmon_dir = proc_create_seq("perfmon", S_IRUGO, NULL, &pfm_seq_ops);
 	if (perfmon_dir == NULL) {
 		printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
 		pmu_conf = NULL;
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index 52c404b..aba1f46 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -54,8 +54,6 @@
 MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
 MODULE_LICENSE("GPL");
 
-static const struct file_operations proc_salinfo_fops;
-
 typedef struct {
 	const char		*name;		/* name of the proc entry */
 	unsigned long           feature;        /* feature bit */
@@ -578,6 +576,17 @@
 	return 0;
 }
 
+/*
+ * 'data' contains an integer that corresponds to the feature we're
+ * testing
+ */
+static int proc_salinfo_show(struct seq_file *m, void *v)
+{
+	unsigned long data = (unsigned long)v;
+	seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n");
+	return 0;
+}
+
 static int __init
 salinfo_init(void)
 {
@@ -593,9 +602,9 @@
 
 	for (i=0; i < NR_SALINFO_ENTRIES; i++) {
 		/* pass the feature bit in question as misc data */
-		*sdir++ = proc_create_data(salinfo_entries[i].name, 0, salinfo_dir,
-					   &proc_salinfo_fops,
-					   (void *)salinfo_entries[i].feature);
+		*sdir++ = proc_create_single_data(salinfo_entries[i].name, 0,
+				salinfo_dir, proc_salinfo_show,
+				(void *)salinfo_entries[i].feature);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
@@ -633,27 +642,4 @@
 	return 0;
 }
 
-/*
- * 'data' contains an integer that corresponds to the feature we're
- * testing
- */
-static int proc_salinfo_show(struct seq_file *m, void *v)
-{
-	unsigned long data = (unsigned long)v;
-	seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n");
-	return 0;
-}
-
-static int proc_salinfo_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_salinfo_show, PDE_DATA(inode));
-}
-
-static const struct file_operations proc_salinfo_fops = {
-	.open		= proc_salinfo_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 module_init(salinfo_init);
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index dee56bc..ad43cbf 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -124,18 +124,6 @@
 unsigned long ia64_cache_stride_shift = ~0;
 
 /*
- * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
- * mask specifies a mask of address bits that must be 0 in order for two buffers to be
- * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
- * address of the second buffer must be aligned to (merge_mask+1) in order to be
- * mergeable).  By default, we assume there is no I/O MMU which can merge physically
- * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu
- * page-size of 2^64.
- */
-unsigned long ia64_max_iommu_merge_mask = ~0UL;
-EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
-
-/*
  * We use a special marker for the end of memory and it uses the extra (+1) slot
  */
 struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 54547c7..d1234a5 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -153,6 +153,7 @@
 	return retval;
 
   give_sigsegv:
+	clear_siginfo(&si);
 	si.si_signo = SIGSEGV;
 	si.si_errno = 0;
 	si.si_code = SI_KERNEL;
@@ -236,6 +237,7 @@
 	unsigned long flags;
 	struct siginfo si;
 
+	clear_siginfo(&si);
 	if (sig == SIGSEGV) {
 		/*
 		 * Acquiring siglock around the sa_handler-update is almost
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index 6d4e76a..c6f4932 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -104,6 +104,7 @@
 	int sig, code;
 
 	/* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */
+	clear_siginfo(&siginfo);
 	siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
 	siginfo.si_imm = break_num;
 	siginfo.si_flags = 0;		/* clear __ISR_VALID */
@@ -293,7 +294,6 @@
 {
 	long exception, bundle[2];
 	unsigned long fault_ip;
-	struct siginfo siginfo;
 
 	fault_ip = regs->cr_iip;
 	if (!fp_fault && (ia64_psr(regs)->ri == 0))
@@ -344,13 +344,16 @@
 			printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
 			return -1;
 		} else {
+			struct siginfo siginfo;
+
 			/* is next instruction a trap? */
 			if (exception & 2) {
 				ia64_increment_ip(regs);
 			}
+			clear_siginfo(&siginfo);
 			siginfo.si_signo = SIGFPE;
 			siginfo.si_errno = 0;
-			siginfo.si_code = FPE_FIXME;	/* default code */
+			siginfo.si_code = FPE_FLTUNK;	/* default code */
 			siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
 			if (isr & 0x11) {
 				siginfo.si_code = FPE_FLTINV;
@@ -372,9 +375,12 @@
 			return -1;
 		} else if (exception != 0) {
 			/* raise exception */
+			struct siginfo siginfo;
+
+			clear_siginfo(&siginfo);
 			siginfo.si_signo = SIGFPE;
 			siginfo.si_errno = 0;
-			siginfo.si_code = FPE_FIXME;	/* default code */
+			siginfo.si_code = FPE_FLTUNK;	/* default code */
 			siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
 			if (isr & 0x880) {
 				siginfo.si_code = FPE_FLTOVF;
@@ -420,7 +426,7 @@
 	if (die_if_kernel(buf, &regs, 0))
 		return rv;
 
-	memset(&si, 0, sizeof(si));
+	clear_siginfo(&si);
 	si.si_signo = SIGILL;
 	si.si_code = ILL_ILLOPC;
 	si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(&regs)->ri);
@@ -434,7 +440,6 @@
 	    long arg7, struct pt_regs regs)
 {
 	unsigned long code, error = isr, iip;
-	struct siginfo siginfo;
 	char buf[128];
 	int result, sig;
 	static const char *reason[] = {
@@ -485,6 +490,7 @@
 
 	      case 26: /* NaT Consumption */
 		if (user_mode(&regs)) {
+			struct siginfo siginfo;
 			void __user *addr;
 
 			if (((isr >> 4) & 0xf) == 2) {
@@ -499,6 +505,7 @@
 				addr = (void __user *) (regs.cr_iip
 							+ ia64_psr(&regs)->ri);
 			}
+			clear_siginfo(&siginfo);
 			siginfo.si_signo = sig;
 			siginfo.si_code = code;
 			siginfo.si_errno = 0;
@@ -515,6 +522,9 @@
 
 	      case 31: /* Unsupported Data Reference */
 		if (user_mode(&regs)) {
+			struct siginfo siginfo;
+
+			clear_siginfo(&siginfo);
 			siginfo.si_signo = SIGILL;
 			siginfo.si_code = ILL_ILLOPN;
 			siginfo.si_errno = 0;
@@ -531,6 +541,10 @@
 	      case 29: /* Debug */
 	      case 35: /* Taken Branch Trap */
 	      case 36: /* Single Step Trap */
+	      {
+		struct siginfo siginfo;
+
+		clear_siginfo(&siginfo);
 		if (fsys_mode(current, &regs)) {
 			extern char __kernel_syscall_via_break[];
 			/*
@@ -578,11 +592,15 @@
 		siginfo.si_isr   = isr;
 		force_sig_info(SIGTRAP, &siginfo, current);
 		return;
+	      }
 
 	      case 32: /* fp fault */
 	      case 33: /* fp trap */
 		result = handle_fpu_swa((vector == 32) ? 1 : 0, &regs, isr);
 		if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
+			struct siginfo siginfo;
+
+			clear_siginfo(&siginfo);
 			siginfo.si_signo = SIGFPE;
 			siginfo.si_errno = 0;
 			siginfo.si_code = FPE_FLTINV;
@@ -616,6 +634,9 @@
 		} else {
 			/* Unimplemented Instr. Address Trap */
 			if (user_mode(&regs)) {
+				struct siginfo siginfo;
+
+				clear_siginfo(&siginfo);
 				siginfo.si_signo = SIGILL;
 				siginfo.si_code = ILL_BADIADDR;
 				siginfo.si_errno = 0;
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 72e9b42..e309f98 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -1537,6 +1537,7 @@
 		/* NOT_REACHED */
 	}
   force_sigbus:
+	clear_siginfo(&si);
 	si.si_signo = SIGBUS;
 	si.si_errno = 0;
 	si.si_code = BUS_ADRALN;
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index dfdc152..817fa12 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -85,7 +85,6 @@
 	int signal = SIGSEGV, code = SEGV_MAPERR;
 	struct vm_area_struct *vma, *prev_vma;
 	struct mm_struct *mm = current->mm;
-	struct siginfo si;
 	unsigned long mask;
 	int fault;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -249,6 +248,9 @@
 		return;
 	}
 	if (user_mode(regs)) {
+		struct siginfo si;
+
+		clear_siginfo(&si);
 		si.si_signo = signal;
 		si.si_errno = 0;
 		si.si_code = code;
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 11f2275..8479e9a 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -480,11 +480,6 @@
 	tioca_init_provider();
 	tioce_init_provider();
 
-	/*
-	 * This is needed to avoid bounce limit checks in the blk layer
-	 */
-	ia64_max_iommu_merge_mask = ~PAGE_MASK;
-
 	sn_irq_lh_init();
 	INIT_LIST_HEAD(&sn_sysdata_list);
 	sn_init_cpei_timer();
diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
index ec4de2b..e15457b 100644
--- a/arch/ia64/sn/kernel/sn2/prominfo_proc.c
+++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
@@ -140,18 +140,6 @@
 	return 0;
 }
 
-static int proc_fit_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_fit_show, PDE_DATA(inode));
-}
-
-static const struct file_operations proc_fit_fops = {
-	.open		= proc_fit_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int proc_version_show(struct seq_file *m, void *v)
 {
 	unsigned long nasid = (unsigned long)m->private;
@@ -174,18 +162,6 @@
 	return 0;
 }
 
-static int proc_version_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_version_show, PDE_DATA(inode));
-}
-
-static const struct file_operations proc_version_fops = {
-	.open		= proc_version_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /* module entry points */
 int __init prominfo_init(void);
 void __exit prominfo_exit(void);
@@ -217,10 +193,10 @@
 		if (!dir)
 			continue;
 		nasid = cnodeid_to_nasid(cnodeid);
-		proc_create_data("fit", 0, dir,
-				 &proc_fit_fops, (void *)nasid);
-		proc_create_data("version", 0, dir,
-				 &proc_version_fops, (void *)nasid);
+		proc_create_single_data("fit", 0, dir, proc_fit_show, 
+				(void *)nasid);
+		proc_create_single_data("version", 0, dir, proc_version_show,
+				(void *)nasid);
 	}
 	return 0;
 }
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index 29cf8f8..c2a4d84 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -18,33 +18,18 @@
 	return 0;
 }
 
-static int partition_id_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, partition_id_show, NULL);
-}
-
 static int system_serial_number_show(struct seq_file *s, void *p)
 {
 	seq_printf(s, "%s\n", sn_system_serial_number());
 	return 0;
 }
 
-static int system_serial_number_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, system_serial_number_show, NULL);
-}
-
 static int licenseID_show(struct seq_file *s, void *p)
 {
 	seq_printf(s, "0x%llx\n", sn_partition_serial_number_val());
 	return 0;
 }
 
-static int licenseID_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, licenseID_show, NULL);
-}
-
 static int coherence_id_show(struct seq_file *s, void *p)
 {
 	seq_printf(s, "%d\n", partition_coherence_id());
@@ -52,43 +37,10 @@
 	return 0;
 }
 
-static int coherence_id_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, coherence_id_show, NULL);
-}
-
 /* /proc/sgi_sn/sn_topology uses seq_file, see sn_hwperf.c */
 extern int sn_topology_open(struct inode *, struct file *);
 extern int sn_topology_release(struct inode *, struct file *);
 
-static const struct file_operations proc_partition_id_fops = {
-	.open		= partition_id_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static const struct file_operations proc_system_sn_fops = {
-	.open		= system_serial_number_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static const struct file_operations proc_license_id_fops = {
-	.open		= licenseID_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static const struct file_operations proc_coherence_id_fops = {
-	.open		= coherence_id_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static const struct file_operations proc_sn_topo_fops = {
 	.open		= sn_topology_open,
 	.read		= seq_read,
@@ -104,13 +56,13 @@
 	if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL)))
 		return;
 
-	proc_create("partition_id", 0444, sgi_proc_dir,
-		    &proc_partition_id_fops);
-	proc_create("system_serial_number", 0444, sgi_proc_dir,
-		    &proc_system_sn_fops);
-	proc_create("licenseID", 0444, sgi_proc_dir, &proc_license_id_fops);
-	proc_create("coherence_id", 0444, sgi_proc_dir,
-		    &proc_coherence_id_fops);
+	proc_create_single("partition_id", 0444, sgi_proc_dir,
+			partition_id_show);
+	proc_create_single("system_serial_number", 0444, sgi_proc_dir,
+			system_serial_number_show);
+	proc_create_single("licenseID", 0444, sgi_proc_dir, licenseID_show);
+	proc_create_single("coherence_id", 0444, sgi_proc_dir,
+			coherence_id_show);
 	proc_create("sn_topology", 0444, sgi_proc_dir, &proc_sn_topo_fops);
 }
 
diff --git a/arch/m68k/68000/timers.c b/arch/m68k/68000/timers.c
index 252455b..71ddb4c 100644
--- a/arch/m68k/68000/timers.c
+++ b/arch/m68k/68000/timers.c
@@ -125,7 +125,9 @@
 {
 	if (!set) {
 		long now = RTCTIME;
-		t->tm_year = t->tm_mon = t->tm_mday = 1;
+		t->tm_year = 1;
+		t->tm_mon = 0;
+		t->tm_mday = 1;
 		t->tm_hour = (now >> 24) % 24;
 		t->tm_min = (now >> 16) % 60;
 		t->tm_sec = now % 60;
diff --git a/arch/m68k/Kconfig.bus b/arch/m68k/Kconfig.bus
index d5e66ec..aef698f 100644
--- a/arch/m68k/Kconfig.bus
+++ b/arch/m68k/Kconfig.bus
@@ -59,6 +59,10 @@
 config GENERIC_ISA_DMA
 	def_bool ISA
 
+source "drivers/zorro/Kconfig"
+
+endif
+
 config PCI
 	bool "PCI support"
 	depends on M54xx
@@ -66,10 +70,8 @@
 	  Enable the PCI bus. Support for the PCI bus hardware built into the
 	  ColdFire 547x and 548x processors.
 
+if PCI
 source "drivers/pci/Kconfig"
-
-source "drivers/zorro/Kconfig"
-
 endif
 
 if !MMU
diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index 0d27706..b2a6bc6 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -221,8 +221,10 @@
     t->tm_hour=rtc->hours;
     t->tm_mday=rtc->day_of_month;
     t->tm_wday=rtc->day_of_week;
-    t->tm_mon=rtc->month;
+    t->tm_mon = rtc->month - 1;
     t->tm_year=rtc->year;
+    if (t->tm_year < 70)
+	t->tm_year += 100;
   } else {
     rtc->second=t->tm_sec;
     rtc->minute=t->tm_min;
@@ -230,8 +232,8 @@
     rtc->day_of_month=t->tm_mday;
     if(t->tm_wday!=-1)
       rtc->day_of_week=t->tm_wday;
-    rtc->month=t->tm_mon;
-    rtc->year=t->tm_year;
+    rtc->month = t->tm_mon + 1;
+    rtc->year = t->tm_year % 100;
   }
 
   return 0;
diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c
index 3097fa2..62b0eb6 100644
--- a/arch/m68k/coldfire/pci.c
+++ b/arch/m68k/coldfire/pci.c
@@ -23,20 +23,10 @@
 
 /*
  * Memory and IO mappings. We use a 1:1 mapping for local host memory to
- * PCI bus memory (no reason not to really). IO space doesn't matter, we
- * always use access functions for that. The device configuration space is
- * mapped over the IO map space when we enable it in the PCICAR register.
+ * PCI bus memory (no reason not to really). IO space is mapped in its own
+ * separate address region. The device configuration space is mapped over
+ * the IO map space when we enable it in the PCICAR register.
  */
-#define	PCI_MEM_PA	0xf0000000		/* Host physical address */
-#define	PCI_MEM_BA	0xf0000000		/* Bus physical address */
-#define	PCI_MEM_SIZE	0x08000000		/* 128 MB */
-#define	PCI_MEM_MASK	(PCI_MEM_SIZE - 1)
-
-#define	PCI_IO_PA	0xf8000000		/* Host physical address */
-#define	PCI_IO_BA	0x00000000		/* Bus physical address */
-#define	PCI_IO_SIZE	0x00010000		/* 64k */
-#define	PCI_IO_MASK	(PCI_IO_SIZE - 1)
-
 static struct pci_bus *rootbus;
 static unsigned long iospace;
 
@@ -56,13 +46,6 @@
 	0, 69, 69, 71, 71,
 };
 
-
-static inline void syncio(void)
-{
-	/* The ColdFire "nop" instruction waits for all bus IO to complete */
-	__asm__ __volatile__ ("nop");
-}
-
 /*
  * Configuration space access functions. Configuration space access is
  * through the IO mapping window, enabling it via the PCICAR register.
@@ -84,9 +67,9 @@
 			return PCIBIOS_SUCCESSFUL;
 	}
 
-	syncio();
 	addr = mcf_mk_pcicar(bus->number, devfn, where);
 	__raw_writel(PCICAR_E | addr, PCICAR);
+	__raw_readl(PCICAR);
 	addr = iospace + (where & 0x3);
 
 	switch (size) {
@@ -101,8 +84,8 @@
 		break;
 	}
 
-	syncio();
 	__raw_writel(0, PCICAR);
+	__raw_readl(PCICAR);
 	return PCIBIOS_SUCCESSFUL;
 }
 
@@ -116,9 +99,9 @@
 			return PCIBIOS_SUCCESSFUL;
 	}
 
-	syncio();
 	addr = mcf_mk_pcicar(bus->number, devfn, where);
 	__raw_writel(PCICAR_E | addr, PCICAR);
+	__raw_readl(PCICAR);
 	addr = iospace + (where & 0x3);
 
 	switch (size) {
@@ -133,8 +116,8 @@
 		break;
 	}
 
-	syncio();
 	__raw_writel(0, PCICAR);
+	__raw_readl(PCICAR);
 	return PCIBIOS_SUCCESSFUL;
 }
 
@@ -144,89 +127,6 @@
 };
 
 /*
- *	IO address space access functions. Pretty strait forward, these are
- *	directly mapped in to the IO mapping window. And that is mapped into
- *	virtual address space.
- */
-u8 mcf_pci_inb(u32 addr)
-{
-	return __raw_readb(iospace + (addr & PCI_IO_MASK));
-}
-EXPORT_SYMBOL(mcf_pci_inb);
-
-u16 mcf_pci_inw(u32 addr)
-{
-	return le16_to_cpu(__raw_readw(iospace + (addr & PCI_IO_MASK)));
-}
-EXPORT_SYMBOL(mcf_pci_inw);
-
-u32 mcf_pci_inl(u32 addr)
-{
-	return le32_to_cpu(__raw_readl(iospace + (addr & PCI_IO_MASK)));
-}
-EXPORT_SYMBOL(mcf_pci_inl);
-
-void mcf_pci_insb(u32 addr, u8 *buf, u32 len)
-{
-	for (; len; len--)
-		*buf++ = mcf_pci_inb(addr);
-}
-EXPORT_SYMBOL(mcf_pci_insb);
-
-void mcf_pci_insw(u32 addr, u16 *buf, u32 len)
-{
-	for (; len; len--)
-		*buf++ = mcf_pci_inw(addr);
-}
-EXPORT_SYMBOL(mcf_pci_insw);
-
-void mcf_pci_insl(u32 addr, u32 *buf, u32 len)
-{
-	for (; len; len--)
-		*buf++ = mcf_pci_inl(addr);
-}
-EXPORT_SYMBOL(mcf_pci_insl);
-
-void mcf_pci_outb(u8 v, u32 addr)
-{
-	__raw_writeb(v, iospace + (addr & PCI_IO_MASK));
-}
-EXPORT_SYMBOL(mcf_pci_outb);
-
-void mcf_pci_outw(u16 v, u32 addr)
-{
-	__raw_writew(cpu_to_le16(v), iospace + (addr & PCI_IO_MASK));
-}
-EXPORT_SYMBOL(mcf_pci_outw);
-
-void mcf_pci_outl(u32 v, u32 addr)
-{
-	__raw_writel(cpu_to_le32(v), iospace + (addr & PCI_IO_MASK));
-}
-EXPORT_SYMBOL(mcf_pci_outl);
-
-void mcf_pci_outsb(u32 addr, const u8 *buf, u32 len)
-{
-	for (; len; len--)
-		mcf_pci_outb(*buf++, addr);
-}
-EXPORT_SYMBOL(mcf_pci_outsb);
-
-void mcf_pci_outsw(u32 addr, const u16 *buf, u32 len)
-{
-	for (; len; len--)
-		mcf_pci_outw(*buf++, addr);
-}
-EXPORT_SYMBOL(mcf_pci_outsw);
-
-void mcf_pci_outsl(u32 addr, const u32 *buf, u32 len)
-{
-	for (; len; len--)
-		mcf_pci_outl(*buf++, addr);
-}
-EXPORT_SYMBOL(mcf_pci_outsl);
-
-/*
  * Initialize the PCI bus registers, and scan the bus.
  */
 static struct resource mcf_pci_mem = {
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 37a8e5a..a874e54 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -98,8 +98,8 @@
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_RT=m
@@ -204,7 +204,7 @@
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -233,12 +233,12 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_SOCKET_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -259,7 +259,7 @@
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
 CONFIG_NF_LOG_BRIDGE=m
@@ -310,7 +310,6 @@
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
@@ -414,6 +413,7 @@
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
 CONFIG_HYDRA=y
 CONFIG_APNE=y
 CONFIG_ZORRO8390=y
@@ -485,6 +485,7 @@
 CONFIG_RTC_DRV_RP5C01=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
 CONFIG_HEARTBEAT=y
 CONFIG_PROC_HARDWARE=y
 CONFIG_AMIGA_BUILTIN_SERIAL=y
@@ -621,6 +622,7 @@
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
@@ -647,6 +649,8 @@
 CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 6a46626..8ce39e2 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -96,8 +96,8 @@
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_RT=m
@@ -202,7 +202,7 @@
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -231,12 +231,12 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_SOCKET_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -257,7 +257,7 @@
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
 CONFIG_NF_LOG_BRIDGE=m
@@ -308,7 +308,6 @@
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
@@ -392,6 +391,7 @@
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
@@ -446,6 +446,7 @@
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
 CONFIG_HEARTBEAT=y
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
@@ -580,6 +581,7 @@
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
@@ -606,6 +608,8 @@
 CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index b0691a7..346c4e7 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -96,8 +96,8 @@
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_RT=m
@@ -202,7 +202,7 @@
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -231,12 +231,12 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_SOCKET_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -257,7 +257,7 @@
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
 CONFIG_NF_LOG_BRIDGE=m
@@ -308,7 +308,6 @@
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
@@ -401,6 +400,7 @@
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
 CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
@@ -461,6 +461,7 @@
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
 CONFIG_HEARTBEAT=y
 CONFIG_PROC_HARDWARE=y
 CONFIG_NATFEAT=y
@@ -602,6 +603,7 @@
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
@@ -628,6 +630,8 @@
 CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 6f6470f..fca9c7a 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -94,8 +94,8 @@
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_RT=m
@@ -200,7 +200,7 @@
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -229,12 +229,12 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_SOCKET_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -255,7 +255,7 @@
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
 CONFIG_NF_LOG_BRIDGE=m
@@ -306,7 +306,6 @@
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
@@ -391,6 +390,7 @@
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
@@ -439,6 +439,7 @@
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
@@ -572,6 +573,7 @@
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
@@ -598,6 +600,8 @@
 CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 31a1a2b5..f9eab17 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -96,8 +96,8 @@
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_RT=m
@@ -202,7 +202,7 @@
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -231,12 +231,12 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_SOCKET_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -257,7 +257,7 @@
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
 CONFIG_NF_LOG_BRIDGE=m
@@ -308,7 +308,6 @@
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
@@ -393,6 +392,7 @@
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
@@ -449,6 +449,7 @@
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
@@ -582,6 +583,7 @@
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
@@ -608,6 +610,8 @@
 CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 390d4a8..b52e597 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -95,8 +95,8 @@
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_RT=m
@@ -201,7 +201,7 @@
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -230,12 +230,12 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_SOCKET_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -256,7 +256,7 @@
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
 CONFIG_NF_LOG_BRIDGE=m
@@ -310,7 +310,6 @@
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
@@ -410,6 +409,7 @@
 # CONFIG_NET_VENDOR_MICREL is not set
 CONFIG_MACSONIC=y
 # CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
 CONFIG_MAC8390=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
@@ -471,6 +471,7 @@
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
@@ -604,6 +605,7 @@
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
@@ -630,6 +632,8 @@
 CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 77be97d..2a84eee 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -105,8 +105,8 @@
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_RT=m
@@ -211,7 +211,7 @@
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -240,12 +240,12 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_SOCKET_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -266,7 +266,7 @@
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
 CONFIG_NF_LOG_BRIDGE=m
@@ -320,7 +320,6 @@
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
@@ -452,6 +451,7 @@
 # CONFIG_NET_VENDOR_MICREL is not set
 CONFIG_MACSONIC=y
 # CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
 CONFIG_HYDRA=y
 CONFIG_MAC8390=y
 CONFIG_NE2000=y
@@ -541,6 +541,7 @@
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
 CONFIG_HEARTBEAT=y
 CONFIG_PROC_HARDWARE=y
 CONFIG_NATFEAT=y
@@ -684,6 +685,7 @@
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
@@ -710,6 +712,8 @@
 CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 2ca1407..476e699 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -93,8 +93,8 @@
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_RT=m
@@ -199,7 +199,7 @@
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -228,12 +228,12 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_SOCKET_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -254,7 +254,7 @@
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
 CONFIG_NF_LOG_BRIDGE=m
@@ -305,7 +305,6 @@
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
@@ -391,6 +390,7 @@
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
@@ -439,6 +439,7 @@
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
@@ -572,6 +573,7 @@
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
@@ -598,6 +600,8 @@
 CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 6a3b4dc..1477cda 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -94,8 +94,8 @@
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_RT=m
@@ -200,7 +200,7 @@
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -229,12 +229,12 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_SOCKET_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -255,7 +255,7 @@
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
 CONFIG_NF_LOG_BRIDGE=m
@@ -306,7 +306,6 @@
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
@@ -391,6 +390,7 @@
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
@@ -439,6 +439,7 @@
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
@@ -572,6 +573,7 @@
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
@@ -598,6 +600,8 @@
 CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 2a3e29c..b3a543d 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -94,8 +94,8 @@
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_RT=m
@@ -200,7 +200,7 @@
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -229,12 +229,12 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_SOCKET_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -255,7 +255,7 @@
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
 CONFIG_NF_LOG_BRIDGE=m
@@ -306,7 +306,6 @@
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
@@ -400,6 +399,7 @@
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
 CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
@@ -461,6 +461,7 @@
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
 CONFIG_HEARTBEAT=y
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
@@ -595,6 +596,7 @@
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
@@ -621,6 +623,8 @@
 CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index cba2494..d543ed5 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -91,8 +91,8 @@
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_RT=m
@@ -197,7 +197,7 @@
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -226,12 +226,12 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_SOCKET_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -252,7 +252,7 @@
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
 CONFIG_NF_LOG_BRIDGE=m
@@ -303,7 +303,6 @@
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
@@ -388,6 +387,7 @@
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
@@ -441,6 +441,7 @@
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
@@ -573,6 +574,7 @@
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
@@ -599,6 +601,8 @@
 CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index d911561..a67e542 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -91,8 +91,8 @@
 CONFIG_NF_CONNTRACK_SIP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_INET=m
-CONFIG_NF_TABLES_NETDEV=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NFT_EXTHDR=m
 CONFIG_NFT_META=m
 CONFIG_NFT_RT=m
@@ -197,7 +197,7 @@
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
-CONFIG_NF_TABLES_ARP=m
+CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
@@ -226,12 +226,12 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_SOCKET_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_DUP_IPV6=m
-CONFIG_NFT_FIB_IPV6=m
-CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_MASQ_IPV6=m
 CONFIG_NFT_REDIR_IPV6=m
+CONFIG_NFT_DUP_IPV6=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NF_FLOW_TABLE_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -252,7 +252,7 @@
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_BRIDGE=y
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
 CONFIG_NF_LOG_BRIDGE=m
@@ -303,7 +303,6 @@
 CONFIG_MPLS_ROUTING=m
 CONFIG_MPLS_IPTUNNEL=m
 CONFIG_NET_NSH=m
-CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_AF_KCM=m
 # CONFIG_WIRELESS is not set
 CONFIG_PSAMPLE=m
@@ -389,6 +388,7 @@
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_NATSEMI is not set
 # CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RENESAS is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
@@ -441,6 +441,7 @@
 CONFIG_RTC_DRV_GENERIC=m
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
+CONFIG_DAX=m
 CONFIG_PROC_HARDWARE=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=m
@@ -574,6 +575,7 @@
 CONFIG_CRYPTO_MCRYPTD=m
 CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_KEYWRAP=m
@@ -600,6 +602,8 @@
 CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SM4=m
+CONFIG_CRYPTO_SPECK=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRYPTO_LZO=m
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 88a9d27..4d8d68c 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -1,4 +1,5 @@
 generic-y += barrier.h
+generic-y += compat.h
 generic-y += device.h
 generic-y += emergency-restart.h
 generic-y += exec.h
diff --git a/arch/m68k/include/asm/atarihw.h b/arch/m68k/include/asm/atarihw.h
index 972c8f3..9000b24 100644
--- a/arch/m68k/include/asm/atarihw.h
+++ b/arch/m68k/include/asm/atarihw.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 #include <asm/bootinfo-atari.h>
 #include <asm/raw_io.h>
+#include <asm/kmap.h>
 
 extern u_long atari_mch_cookie;
 extern u_long atari_mch_type;
diff --git a/arch/m68k/include/asm/delay.h b/arch/m68k/include/asm/delay.h
index 7f47412..751712f 100644
--- a/arch/m68k/include/asm/delay.h
+++ b/arch/m68k/include/asm/delay.h
@@ -49,8 +49,6 @@
  * The simpler m68k and ColdFire processors do not have a 32*32->64
  * multiply instruction. So we need to handle them a little differently.
  * We use a bit of shifting and a single 32*32->32 multiply to get close.
- * This is a macro so that the const version can factor out the first
- * multiply and shift.
  */
 #define	HZSCALE		(268435456 / (1000000 / HZ))
 
@@ -115,6 +113,13 @@
  */
 #define	HZSCALE		(268435456 / (1000000 / HZ))
 
-#define ndelay(n) __delay(DIV_ROUND_UP((n) * ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6), 1000))
+static inline void ndelay(unsigned long nsec)
+{
+	__delay(DIV_ROUND_UP(nsec *
+			     ((((HZSCALE) >> 11) *
+			       (loops_per_jiffy >> 11)) >> 6),
+			     1000));
+}
+#define ndelay(n) ndelay(n)
 
 #endif /* defined(_M68K_DELAY_H) */
diff --git a/arch/m68k/include/asm/io.h b/arch/m68k/include/asm/io.h
index 756089c..ca2849a 100644
--- a/arch/m68k/include/asm/io.h
+++ b/arch/m68k/include/asm/io.h
@@ -1,14 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifdef __uClinux__
+#if defined(__uClinux__) || defined(CONFIG_COLDFIRE)
 #include <asm/io_no.h>
 #else
 #include <asm/io_mm.h>
 #endif
-
-#define readb_relaxed(addr)	readb(addr)
-#define readw_relaxed(addr)	readw(addr)
-#define readl_relaxed(addr)	readl(addr)
-
-#define writeb_relaxed(b, addr)	writeb(b, addr)
-#define writew_relaxed(b, addr)	writew(b, addr)
-#define writel_relaxed(b, addr)	writel(b, addr)
diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h
index ed5333e..fe485f4 100644
--- a/arch/m68k/include/asm/io_mm.h
+++ b/arch/m68k/include/asm/io_mm.h
@@ -26,6 +26,7 @@
 #include <linux/compiler.h>
 #include <asm/raw_io.h>
 #include <asm/virtconvert.h>
+#include <asm/kmap.h>
 
 #include <asm-generic/iomap.h>
 
@@ -85,53 +86,7 @@
 #endif /* ATARI_ROM_ISA */
 
 
-#if defined(CONFIG_PCI) && defined(CONFIG_COLDFIRE)
-
-#define HAVE_ARCH_PIO_SIZE
-#define PIO_OFFSET	0
-#define PIO_MASK	0xffff
-#define PIO_RESERVED	0x10000
-
-u8 mcf_pci_inb(u32 addr);
-u16 mcf_pci_inw(u32 addr);
-u32 mcf_pci_inl(u32 addr);
-void mcf_pci_insb(u32 addr, u8 *buf, u32 len);
-void mcf_pci_insw(u32 addr, u16 *buf, u32 len);
-void mcf_pci_insl(u32 addr, u32 *buf, u32 len);
-
-void mcf_pci_outb(u8 v, u32 addr);
-void mcf_pci_outw(u16 v, u32 addr);
-void mcf_pci_outl(u32 v, u32 addr);
-void mcf_pci_outsb(u32 addr, const u8 *buf, u32 len);
-void mcf_pci_outsw(u32 addr, const u16 *buf, u32 len);
-void mcf_pci_outsl(u32 addr, const u32 *buf, u32 len);
-
-#define	inb	mcf_pci_inb
-#define	inb_p	mcf_pci_inb
-#define	inw	mcf_pci_inw
-#define	inw_p	mcf_pci_inw
-#define	inl	mcf_pci_inl
-#define	inl_p	mcf_pci_inl
-#define	insb	mcf_pci_insb
-#define	insw	mcf_pci_insw
-#define	insl	mcf_pci_insl
-
-#define	outb	mcf_pci_outb
-#define	outb_p	mcf_pci_outb
-#define	outw	mcf_pci_outw
-#define	outw_p	mcf_pci_outw
-#define	outl	mcf_pci_outl
-#define	outl_p	mcf_pci_outl
-#define	outsb	mcf_pci_outsb
-#define	outsw	mcf_pci_outsw
-#define	outsl	mcf_pci_outsl
-
-#define readb(addr)	in_8(addr)
-#define writeb(v, addr)	out_8((addr), (v))
-#define readw(addr)	in_le16(addr)
-#define writew(v, addr)	out_le16((addr), (v))
-
-#elif defined(CONFIG_ISA) || defined(CONFIG_ATARI_ROM_ISA)
+#if defined(CONFIG_ISA) || defined(CONFIG_ATARI_ROM_ISA)
 
 #if MULTI_ISA == 0
 #undef MULTI_ISA
@@ -414,8 +369,7 @@
 #define writew(val, addr)	out_le16((addr), (val))
 #endif /* CONFIG_ATARI_ROM_ISA */
 
-#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA) && \
-    !(defined(CONFIG_PCI) && defined(CONFIG_COLDFIRE))
+#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA)
 /*
  * We need to define dummy functions for GENERIC_IOMAP support.
  */
@@ -461,39 +415,6 @@
 
 #define mmiowb()
 
-static inline void __iomem *ioremap(unsigned long physaddr, unsigned long size)
-{
-	return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
-}
-static inline void __iomem *ioremap_nocache(unsigned long physaddr, unsigned long size)
-{
-	return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
-}
-#define ioremap_uc ioremap_nocache
-static inline void __iomem *ioremap_wt(unsigned long physaddr,
-					 unsigned long size)
-{
-	return __ioremap(physaddr, size, IOMAP_WRITETHROUGH);
-}
-static inline void __iomem *ioremap_fullcache(unsigned long physaddr,
-				      unsigned long size)
-{
-	return __ioremap(physaddr, size, IOMAP_FULL_CACHING);
-}
-
-static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
-{
-	__builtin_memset((void __force *) addr, val, count);
-}
-static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
-{
-	__builtin_memcpy(dst, (void __force *) src, count);
-}
-static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
-{
-	__builtin_memcpy((void __force *) dst, src, count);
-}
-
 #ifndef CONFIG_SUN3
 #define IO_SPACE_LIMIT 0xffff
 #else
@@ -515,13 +436,12 @@
  */
 #define xlate_dev_kmem_ptr(p)	p
 
-static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
-{
-	return (void __iomem *) port;
-}
+#define readb_relaxed(addr)	readb(addr)
+#define readw_relaxed(addr)	readw(addr)
+#define readl_relaxed(addr)	readl(addr)
 
-static inline void ioport_unmap(void __iomem *p)
-{
-}
+#define writeb_relaxed(b, addr)	writeb(b, addr)
+#define writew_relaxed(b, addr)	writew(b, addr)
+#define writel_relaxed(b, addr)	writel(b, addr)
 
 #endif /* _IO_H */
diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h
index 86f45b4..83a0a6d 100644
--- a/arch/m68k/include/asm/io_no.h
+++ b/arch/m68k/include/asm/io_no.h
@@ -2,191 +2,148 @@
 #ifndef _M68KNOMMU_IO_H
 #define _M68KNOMMU_IO_H
 
-#ifdef __KERNEL__
-
-#define ARCH_HAS_IOREMAP_WT
-
-#include <asm/virtconvert.h>
-#include <asm-generic/iomap.h>
+/*
+ * Convert a physical memory address into a IO memory address.
+ * For us this is trivially a type cast.
+ */
+#define iomem(a)	((void __iomem *) (a))
 
 /*
- * These are for ISA/PCI shared memory _only_ and should never be used
- * on any other type of memory, including Zorro memory. They are meant to
- * access the bus in the bus byte order which is little-endian!.
- *
- * readX/writeX() are used to access memory mapped devices. On some
- * architectures the memory mapped IO stuff needs to be accessed
- * differently. On the m68k architecture, we just read/write the
- * memory location directly.
+ * The non-MMU m68k and ColdFire IO and memory mapped hardware access
+ * functions have always worked in CPU native endian. We need to define
+ * that behavior here first before we include asm-generic/io.h.
  */
-/* ++roman: The assignments to temp. vars avoid that gcc sometimes generates
- * two accesses to memory, which may be undesirable for some devices.
- */
-
-/*
- * swap functions are sometimes needed to interface little-endian hardware
- */
-static inline unsigned short _swapw(volatile unsigned short v)
-{
-    return ((v << 8) | (v >> 8));
-}
-
-static inline unsigned int _swapl(volatile unsigned long v)
-{
-    return ((v << 24) | ((v & 0xff00) << 8) | ((v & 0xff0000) >> 8) | (v >> 24));
-}
-
-#define readb(addr) \
+#define __raw_readb(addr) \
     ({ unsigned char __v = (*(volatile unsigned char *) (addr)); __v; })
-#define readw(addr) \
+#define __raw_readw(addr) \
     ({ unsigned short __v = (*(volatile unsigned short *) (addr)); __v; })
-#define readl(addr) \
+#define __raw_readl(addr) \
     ({ unsigned int __v = (*(volatile unsigned int *) (addr)); __v; })
 
-#define writeb(b,addr) (void)((*(volatile unsigned char *) (addr)) = (b))
-#define writew(b,addr) (void)((*(volatile unsigned short *) (addr)) = (b))
-#define writel(b,addr) (void)((*(volatile unsigned int *) (addr)) = (b))
+#define __raw_writeb(b, addr) (void)((*(volatile unsigned char *) (addr)) = (b))
+#define __raw_writew(b, addr) (void)((*(volatile unsigned short *) (addr)) = (b))
+#define __raw_writel(b, addr) (void)((*(volatile unsigned int *) (addr)) = (b))
 
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-#define __raw_writeb writeb
-#define __raw_writew writew
-#define __raw_writel writel
+#if defined(CONFIG_COLDFIRE)
+/*
+ * For ColdFire platforms we may need to do some extra checks for what
+ * type of address range we are accessing. Include the ColdFire platform
+ * definitions so we can figure out if need to do something special.
+ */
+#include <asm/byteorder.h>
+#include <asm/coldfire.h>
+#include <asm/mcfsim.h>
+#endif /* CONFIG_COLDFIRE */
 
-static inline void io_outsb(unsigned int addr, const void *buf, int len)
+#if defined(IOMEMBASE)
+/*
+ * The ColdFire SoC internal peripherals are mapped into virtual address
+ * space using the ACR registers of the cache control unit. This means we
+ * are using a 1:1 physical:virtual mapping for them. We can quickly
+ * determine if we are accessing an internal peripheral device given the
+ * physical or vitrual address using the same range check. This check logic
+ * applies just the same of there is no MMU but something like a PCI bus
+ * is present.
+ */
+static int __cf_internalio(unsigned long addr)
 {
-	volatile unsigned char *ap = (volatile unsigned char *) addr;
-	unsigned char *bp = (unsigned char *) buf;
-	while (len--)
-		*ap = *bp++;
+	return (addr >= IOMEMBASE) && (addr <= IOMEMBASE + IOMEMSIZE - 1);
 }
 
-static inline void io_outsw(unsigned int addr, const void *buf, int len)
+static int cf_internalio(const volatile void __iomem *addr)
 {
-	volatile unsigned short *ap = (volatile unsigned short *) addr;
-	unsigned short *bp = (unsigned short *) buf;
-	while (len--)
-		*ap = _swapw(*bp++);
+	return __cf_internalio((unsigned long) addr);
 }
 
-static inline void io_outsl(unsigned int addr, const void *buf, int len)
-{
-	volatile unsigned int *ap = (volatile unsigned int *) addr;
-	unsigned int *bp = (unsigned int *) buf;
-	while (len--)
-		*ap = _swapl(*bp++);
-}
-
-static inline void io_insb(unsigned int addr, void *buf, int len)
-{
-	volatile unsigned char *ap = (volatile unsigned char *) addr;
-	unsigned char *bp = (unsigned char *) buf;
-	while (len--)
-		*bp++ = *ap;
-}
-
-static inline void io_insw(unsigned int addr, void *buf, int len)
-{
-	volatile unsigned short *ap = (volatile unsigned short *) addr;
-	unsigned short *bp = (unsigned short *) buf;
-	while (len--)
-		*bp++ = _swapw(*ap);
-}
-
-static inline void io_insl(unsigned int addr, void *buf, int len)
-{
-	volatile unsigned int *ap = (volatile unsigned int *) addr;
-	unsigned int *bp = (unsigned int *) buf;
-	while (len--)
-		*bp++ = _swapl(*ap);
-}
-
-#define mmiowb()
-
 /*
- *	make the short names macros so specific devices
- *	can override them as required
+ * We need to treat built-in peripherals and bus based address ranges
+ * differently. Local built-in peripherals (and the ColdFire SoC parts
+ * have quite a lot of them) are always native endian - which is big
+ * endian on m68k/ColdFire. Bus based address ranges, like the PCI bus,
+ * are accessed little endian - so we need to byte swap those.
  */
-
-#define memset_io(a,b,c)	memset((void *)(a),(b),(c))
-#define memcpy_fromio(a,b,c)	memcpy((a),(void *)(b),(c))
-#define memcpy_toio(a,b,c)	memcpy((void *)(a),(b),(c))
-
-#define inb(addr)    readb(addr)
-#define inw(addr)    readw(addr)
-#define inl(addr)    readl(addr)
-#define outb(x,addr) ((void) writeb(x,addr))
-#define outw(x,addr) ((void) writew(x,addr))
-#define outl(x,addr) ((void) writel(x,addr))
-
-#define inb_p(addr)    inb(addr)
-#define inw_p(addr)    inw(addr)
-#define inl_p(addr)    inl(addr)
-#define outb_p(x,addr) outb(x,addr)
-#define outw_p(x,addr) outw(x,addr)
-#define outl_p(x,addr) outl(x,addr)
-
-#define outsb(a,b,l) io_outsb(a,b,l)
-#define outsw(a,b,l) io_outsw(a,b,l)
-#define outsl(a,b,l) io_outsl(a,b,l)
-
-#define insb(a,b,l) io_insb(a,b,l)
-#define insw(a,b,l) io_insw(a,b,l)
-#define insl(a,b,l) io_insl(a,b,l)
-
-#define IO_SPACE_LIMIT 0xffffffff
-
-
-/* Values for nocacheflag and cmode */
-#define IOMAP_FULL_CACHING		0
-#define IOMAP_NOCACHE_SER		1
-#define IOMAP_NOCACHE_NONSER		2
-#define IOMAP_WRITETHROUGH		3
-
-static inline void *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag)
+#define readw readw
+static inline u16 readw(const volatile void __iomem *addr)
 {
-	return (void *) physaddr;
-}
-static inline void *ioremap(unsigned long physaddr, unsigned long size)
-{
-	return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
-}
-static inline void *ioremap_nocache(unsigned long physaddr, unsigned long size)
-{
-	return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
-}
-static inline void *ioremap_wt(unsigned long physaddr, unsigned long size)
-{
-	return __ioremap(physaddr, size, IOMAP_WRITETHROUGH);
-}
-static inline void *ioremap_fullcache(unsigned long physaddr, unsigned long size)
-{
-	return __ioremap(physaddr, size, IOMAP_FULL_CACHING);
+	if (cf_internalio(addr))
+		return __raw_readw(addr);
+	return __le16_to_cpu(__raw_readw(addr));
 }
 
-#define	iounmap(addr)	do { } while(0)
+#define readl readl
+static inline u32 readl(const volatile void __iomem *addr)
+{
+	if (cf_internalio(addr))
+		return __raw_readl(addr);
+	return __le32_to_cpu(__raw_readl(addr));
+}
+
+#define writew writew
+static inline void writew(u16 value, volatile void __iomem *addr)
+{
+	if (cf_internalio(addr))
+		__raw_writew(value, addr);
+	else
+		__raw_writew(__cpu_to_le16(value), addr);
+}
+
+#define writel writel
+static inline void writel(u32 value, volatile void __iomem *addr)
+{
+	if (cf_internalio(addr))
+		__raw_writel(value, addr);
+	else
+		__raw_writel(__cpu_to_le32(value), addr);
+}
+
+#else
+
+#define readb __raw_readb
+#define readw __raw_readw
+#define readl __raw_readl
+#define writeb __raw_writeb
+#define writew __raw_writew
+#define writel __raw_writel
+
+#endif /* IOMEMBASE */
+
+#if defined(CONFIG_PCI)
+/*
+ * Support for PCI bus access uses the asm-generic access functions.
+ * We need to supply the base address and masks for the normal memory
+ * and IO address space mappings.
+ */
+#define PCI_MEM_PA	0xf0000000		/* Host physical address */
+#define PCI_MEM_BA	0xf0000000		/* Bus physical address */
+#define PCI_MEM_SIZE	0x08000000		/* 128 MB */
+#define PCI_MEM_MASK	(PCI_MEM_SIZE - 1)
+
+#define PCI_IO_PA	0xf8000000		/* Host physical address */
+#define PCI_IO_BA	0x00000000		/* Bus physical address */
+#define PCI_IO_SIZE	0x00010000		/* 64k */
+#define PCI_IO_MASK	(PCI_IO_SIZE - 1)
+
+#define HAVE_ARCH_PIO_SIZE
+#define PIO_OFFSET	0
+#define PIO_MASK	0xffff
+#define PIO_RESERVED	0x10000
+#define PCI_IOBASE	((void __iomem *) PCI_IO_PA)
+#define PCI_SPACE_LIMIT	PCI_IO_MASK
+#endif /* CONFIG_PCI */
 
 /*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
+ * These are defined in kmap.h as static inline functions. To maintain
+ * previous behavior we put these define guards here so io_mm.h doesn't
+ * see them.
  */
-#define xlate_dev_mem_ptr(p)	__va(p)
+#ifdef CONFIG_MMU
+#define memset_io memset_io
+#define memcpy_fromio memcpy_fromio
+#define memcpy_toio memcpy_toio
+#endif
 
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p)	p
-
-static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
-{
-	return (void __iomem *) port;
-}
-
-static inline void ioport_unmap(void __iomem *p)
-{
-}
-
-#endif /* __KERNEL__ */
+#include <asm/kmap.h>
+#include <asm/virtconvert.h>
+#include <asm-generic/io.h>
 
 #endif /* _M68KNOMMU_IO_H */
diff --git a/arch/m68k/include/asm/kmap.h b/arch/m68k/include/asm/kmap.h
new file mode 100644
index 0000000..84b8333
--- /dev/null
+++ b/arch/m68k/include/asm/kmap.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KMAP_H
+#define _KMAP_H
+
+#ifdef CONFIG_MMU
+
+/* Values for nocacheflag and cmode */
+#define IOMAP_FULL_CACHING		0
+#define IOMAP_NOCACHE_SER		1
+#define IOMAP_NOCACHE_NONSER		2
+#define IOMAP_WRITETHROUGH		3
+
+/*
+ * These functions exported by arch/m68k/mm/kmap.c.
+ * Only needed on MMU enabled systems.
+ */
+extern void __iomem *__ioremap(unsigned long physaddr, unsigned long size,
+			       int cacheflag);
+extern void iounmap(void __iomem *addr);
+extern void __iounmap(void *addr, unsigned long size);
+
+#define ioremap ioremap
+static inline void __iomem *ioremap(unsigned long physaddr, unsigned long size)
+{
+	return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
+}
+
+#define ioremap_nocache ioremap_nocache
+static inline void __iomem *ioremap_nocache(unsigned long physaddr,
+					    unsigned long size)
+{
+	return __ioremap(physaddr, size, IOMAP_NOCACHE_SER);
+}
+
+#define ioremap_uc ioremap_nocache
+static inline void __iomem *ioremap_wt(unsigned long physaddr,
+				       unsigned long size)
+{
+	return __ioremap(physaddr, size, IOMAP_WRITETHROUGH);
+}
+
+#define ioremap_fillcache ioremap_fullcache
+static inline void __iomem *ioremap_fullcache(unsigned long physaddr,
+					      unsigned long size)
+{
+	return __ioremap(physaddr, size, IOMAP_FULL_CACHING);
+}
+
+static inline void memset_io(volatile void __iomem *addr, unsigned char val,
+			     int count)
+{
+	__builtin_memset((void __force *) addr, val, count);
+}
+
+static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
+				 int count)
+{
+	__builtin_memcpy(dst, (void __force *) src, count);
+}
+
+static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
+			       int count)
+{
+	__builtin_memcpy((void __force *) dst, src, count);
+}
+
+#endif /* CONFIG_MMU */
+
+#define ioport_map ioport_map
+static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+	return (void __iomem *) port;
+}
+
+#define ioport_unmap ioport_unmap
+static inline void ioport_unmap(void __iomem *p)
+{
+}
+
+#endif /* _KMAP_H */
diff --git a/arch/m68k/include/asm/nubus.h b/arch/m68k/include/asm/nubus.h
index d0d2039..c2281da 100644
--- a/arch/m68k/include/asm/nubus.h
+++ b/arch/m68k/include/asm/nubus.h
@@ -3,6 +3,7 @@
 #define _ASM_M68K_NUBUS_H
 
 #include <asm/raw_io.h>
+#include <asm/kmap.h>
 
 #define nubus_readb raw_inb
 #define nubus_readw raw_inw
diff --git a/arch/m68k/include/asm/pci.h b/arch/m68k/include/asm/pci.h
index ef26fae..5a4bc22 100644
--- a/arch/m68k/include/asm/pci.h
+++ b/arch/m68k/include/asm/pci.h
@@ -4,12 +4,6 @@
 
 #include <asm-generic/pci.h>
 
-/* The PCI address space does equal the physical memory
- * address space.  The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS	(1)
-
 #define	pcibios_assign_all_busses()	1
 
 #define	PCIBIOS_MIN_IO		0x00000100
diff --git a/arch/m68k/include/asm/q40_master.h b/arch/m68k/include/asm/q40_master.h
index 3a89c08..9b00fb8 100644
--- a/arch/m68k/include/asm/q40_master.h
+++ b/arch/m68k/include/asm/q40_master.h
@@ -8,7 +8,7 @@
 #define _Q40_MASTER_H
 
 #include <asm/raw_io.h>
-
+#include <asm/kmap.h>
 
 #define q40_master_addr 0xff000000
 
diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h
index 05e940c..8576125 100644
--- a/arch/m68k/include/asm/raw_io.h
+++ b/arch/m68k/include/asm/raw_io.h
@@ -13,20 +13,6 @@
 
 #include <asm/byteorder.h>
 
-
-/* Values for nocacheflag and cmode */
-#define IOMAP_FULL_CACHING		0
-#define IOMAP_NOCACHE_SER		1
-#define IOMAP_NOCACHE_NONSER		2
-#define IOMAP_WRITETHROUGH		3
-
-extern void iounmap(void __iomem *addr);
-
-extern void __iomem *__ioremap(unsigned long physaddr, unsigned long size,
-		       int cacheflag);
-extern void __iounmap(void *addr, unsigned long size);
-
-
 /* ++roman: The assignments to temp. vars avoid that gcc sometimes generates
  * two accesses to memory, which may be undesirable for some devices.
  */
diff --git a/arch/m68k/include/asm/uaccess_mm.h b/arch/m68k/include/asm/uaccess_mm.h
index 75c172e..c4cb889 100644
--- a/arch/m68k/include/asm/uaccess_mm.h
+++ b/arch/m68k/include/asm/uaccess_mm.h
@@ -141,10 +141,12 @@
 	case 4:								\
 		__get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT);	\
 		break;							\
-/*	case 8:	disabled because gcc-4.1 has a broken typeof		\
- 	    {								\
- 		const void *__gu_ptr = (ptr);				\
- 		u64 __gu_val;						\
+	case 8: {							\
+		const void *__gu_ptr = (ptr);				\
+		union {							\
+			u64 l;						\
+			__typeof__(*(ptr)) t;				\
+		} __gu_val;						\
 		asm volatile ("\n"					\
 			"1:	"MOVES".l	(%2)+,%1\n"		\
 			"2:	"MOVES".l	(%2),%R1\n"		\
@@ -162,13 +164,13 @@
 			"	.long	1b,10b\n"			\
 			"	.long	2b,10b\n"			\
 			"	.previous"				\
-			: "+d" (__gu_err), "=&r" (__gu_val),		\
+			: "+d" (__gu_err), "=&r" (__gu_val.l),		\
 			  "+a" (__gu_ptr)				\
 			: "i" (-EFAULT)					\
 			: "memory");					\
-		(x) = (__force typeof(*(ptr)))__gu_val;			\
+		(x) = __gu_val.t;					\
 		break;							\
-	    }	*/							\
+	}								\
 	default:							\
 		__gu_err = __get_user_bad();				\
 		break;							\
diff --git a/arch/m68k/include/asm/vga.h b/arch/m68k/include/asm/vga.h
index 010a624..4742e6b 100644
--- a/arch/m68k/include/asm/vga.h
+++ b/arch/m68k/include/asm/vga.h
@@ -2,7 +2,15 @@
 #ifndef _ASM_M68K_VGA_H
 #define _ASM_M68K_VGA_H
 
+/*
+ * Some ColdFire platforms do in fact have a PCI bus. So for those we want
+ * to use the real IO access functions, don't fake them out or redirect them
+ * for that case.
+ */
+#ifndef CONFIG_PCI
+
 #include <asm/raw_io.h>
+#include <asm/kmap.h>
 
 /*
  * FIXME
@@ -25,4 +33,5 @@
 #define writeb			raw_outb
 #define writew			raw_outw
 
+#endif /* CONFIG_PCI */
 #endif /* _ASM_M68K_VGA_H */
diff --git a/arch/m68k/include/asm/virtconvert.h b/arch/m68k/include/asm/virtconvert.h
index 4aea6be..dfe4308 100644
--- a/arch/m68k/include/asm/virtconvert.h
+++ b/arch/m68k/include/asm/virtconvert.h
@@ -16,11 +16,13 @@
 /*
  * Change virtual addresses to physical addresses and vv.
  */
+#define virt_to_phys virt_to_phys
 static inline unsigned long virt_to_phys(void *address)
 {
 	return __pa(address);
 }
 
+#define phys_to_virt phys_to_virt
 static inline void *phys_to_virt(unsigned long address)
 {
 	return __va(address);
diff --git a/arch/m68k/include/asm/zorro.h b/arch/m68k/include/asm/zorro.h
index 96f64bf..60fc4b6 100644
--- a/arch/m68k/include/asm/zorro.h
+++ b/arch/m68k/include/asm/zorro.h
@@ -3,6 +3,7 @@
 #define _ASM_M68K_ZORRO_H
 
 #include <asm/raw_io.h>
+#include <asm/kmap.h>
 
 #define z_readb raw_inb
 #define z_readw raw_inw
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index c01b9b8..463572c 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -9,6 +9,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
+#include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -165,3 +166,12 @@
 	.sync_sg_for_device	= m68k_dma_sync_sg_for_device,
 };
 EXPORT_SYMBOL(m68k_dma_ops);
+
+void arch_setup_pdev_archdata(struct platform_device *pdev)
+{
+	if (pdev->dev.coherent_dma_mask == DMA_MASK_NONE &&
+	    pdev->dev.dma_mask == NULL) {
+		pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+		pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
+	}
+}
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index dd25bfc..f35e3eb 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -527,21 +527,9 @@
 	return 0;
 }
 
-static int hardware_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hardware_proc_show, NULL);
-}
-
-static const struct file_operations hardware_proc_fops = {
-	.open		= hardware_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init proc_hardware_init(void)
 {
-	proc_create("hardware", 0, NULL, &hardware_proc_fops);
+	proc_create_single("hardware", 0, NULL, hardware_proc_show);
 	return 0;
 }
 module_init(proc_hardware_init);
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index f7cd5ec..72850b8 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -576,41 +576,42 @@
 
 static inline void siginfo_build_tests(void)
 {
-	/* This needs to be tested on m68k as it has a lesser
-	 * alignment requirment than x86 and that can cause surprises.
+	/*
+	 * This needs to be tested on m68k as it has a lesser
+	 * alignment requirement than x86 and that can cause surprises.
 	 */
 
 	/* This is part of the ABI and can never change in size: */
 	BUILD_BUG_ON(sizeof(siginfo_t) != 128);
 
-	/* Ensure the know fields never change in location */
+	/* Ensure the known fields never change in location */
 	BUILD_BUG_ON(offsetof(siginfo_t, si_signo) != 0);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_errno) != 4);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_code)  != 8);
 
 	/* _kill */
-	BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0C);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0c);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x10);
 
 	/* _timer */
-	BUILD_BUG_ON(offsetof(siginfo_t, si_tid)     != 0x0C);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_tid)     != 0x0c);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_overrun) != 0x10);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_value)   != 0x14);
 
 	/* _rt */
-	BUILD_BUG_ON(offsetof(siginfo_t, si_pid)   != 0x0C);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_pid)   != 0x0c);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_uid)   != 0x10);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x14);
 
 	/* _sigchld */
-	BUILD_BUG_ON(offsetof(siginfo_t, si_pid)    != 0x0C);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_pid)    != 0x0c);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_uid)    != 0x10);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_status) != 0x14);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_utime)  != 0x18);
-	BUILD_BUG_ON(offsetof(siginfo_t, si_stime)  != 0x1C);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_stime)  != 0x1c);
 
 	/* _sigfault */
-	BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x0C);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x0c);
 
 	/* _sigfault._mcerr */
 	BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x10);
@@ -623,11 +624,11 @@
 	BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x12);
 
 	/* _sigpoll */
-	BUILD_BUG_ON(offsetof(siginfo_t, si_band)   != 0x0C);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_band)   != 0x0c);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_fd)     != 0x10);
 
 	/* _sigsys */
-	BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x0C);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x0c);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_syscall)   != 0x10);
 	BUILD_BUG_ON(offsetof(siginfo_t, si_arch)      != 0x14);
 
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 97dd4e2..3a8b47f 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -71,23 +71,26 @@
 	return IRQ_HANDLED;
 }
 
-void read_persistent_clock(struct timespec *ts)
+#ifdef CONFIG_M68KCLASSIC
+#if !IS_BUILTIN(CONFIG_RTC_DRV_GENERIC)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	struct rtc_time time;
+
 	ts->tv_sec = 0;
 	ts->tv_nsec = 0;
 
-	if (mach_hwclk) {
-		mach_hwclk(0, &time);
+	if (!mach_hwclk)
+		return;
 
-		if ((time.tm_year += 1900) < 1970)
-			time.tm_year += 100;
-		ts->tv_sec = mktime(time.tm_year, time.tm_mon, time.tm_mday,
-				      time.tm_hour, time.tm_min, time.tm_sec);
-	}
+	mach_hwclk(0, &time);
+
+	ts->tv_sec = mktime64(time.tm_year + 1900, time.tm_mon + 1, time.tm_mday,
+			      time.tm_hour, time.tm_min, time.tm_sec);
 }
+#endif
 
-#if defined(CONFIG_ARCH_USES_GETTIMEOFFSET) && IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
+#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
 static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
 {
 	mach_hwclk(0, tm);
@@ -145,8 +148,8 @@
 }
 
 module_init(rtc_init);
-
-#endif /* CONFIG_ARCH_USES_GETTIMEOFFSET */
+#endif /* CONFIG_RTC_DRV_GENERIC */
+#endif /* CONFIG M68KCLASSIC */
 
 void __init time_init(void)
 {
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index c1cc4e9..b2fd000 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -1007,9 +1007,9 @@
 
 asmlinkage void trap_c(struct frame *fp)
 {
-	int sig;
+	int sig, si_code;
+	void __user *addr;
 	int vector = (fp->ptregs.vector >> 2) & 0xff;
-	siginfo_t info;
 
 	if (fp->ptregs.sr & PS_S) {
 		if (vector == VEC_TRACE) {
@@ -1029,21 +1029,21 @@
 	/* send the appropriate signal to the user program */
 	switch (vector) {
 	    case VEC_ADDRERR:
-		info.si_code = BUS_ADRALN;
+		si_code = BUS_ADRALN;
 		sig = SIGBUS;
 		break;
 	    case VEC_ILLEGAL:
 	    case VEC_LINE10:
 	    case VEC_LINE11:
-		info.si_code = ILL_ILLOPC;
+		si_code = ILL_ILLOPC;
 		sig = SIGILL;
 		break;
 	    case VEC_PRIV:
-		info.si_code = ILL_PRVOPC;
+		si_code = ILL_PRVOPC;
 		sig = SIGILL;
 		break;
 	    case VEC_COPROC:
-		info.si_code = ILL_COPROC;
+		si_code = ILL_COPROC;
 		sig = SIGILL;
 		break;
 	    case VEC_TRAP1:
@@ -1060,76 +1060,74 @@
 	    case VEC_TRAP12:
 	    case VEC_TRAP13:
 	    case VEC_TRAP14:
-		info.si_code = ILL_ILLTRP;
+		si_code = ILL_ILLTRP;
 		sig = SIGILL;
 		break;
 	    case VEC_FPBRUC:
 	    case VEC_FPOE:
 	    case VEC_FPNAN:
-		info.si_code = FPE_FLTINV;
+		si_code = FPE_FLTINV;
 		sig = SIGFPE;
 		break;
 	    case VEC_FPIR:
-		info.si_code = FPE_FLTRES;
+		si_code = FPE_FLTRES;
 		sig = SIGFPE;
 		break;
 	    case VEC_FPDIVZ:
-		info.si_code = FPE_FLTDIV;
+		si_code = FPE_FLTDIV;
 		sig = SIGFPE;
 		break;
 	    case VEC_FPUNDER:
-		info.si_code = FPE_FLTUND;
+		si_code = FPE_FLTUND;
 		sig = SIGFPE;
 		break;
 	    case VEC_FPOVER:
-		info.si_code = FPE_FLTOVF;
+		si_code = FPE_FLTOVF;
 		sig = SIGFPE;
 		break;
 	    case VEC_ZERODIV:
-		info.si_code = FPE_INTDIV;
+		si_code = FPE_INTDIV;
 		sig = SIGFPE;
 		break;
 	    case VEC_CHK:
 	    case VEC_TRAP:
-		info.si_code = FPE_INTOVF;
+		si_code = FPE_INTOVF;
 		sig = SIGFPE;
 		break;
 	    case VEC_TRACE:		/* ptrace single step */
-		info.si_code = TRAP_TRACE;
+		si_code = TRAP_TRACE;
 		sig = SIGTRAP;
 		break;
 	    case VEC_TRAP15:		/* breakpoint */
-		info.si_code = TRAP_BRKPT;
+		si_code = TRAP_BRKPT;
 		sig = SIGTRAP;
 		break;
 	    default:
-		info.si_code = ILL_ILLOPC;
+		si_code = ILL_ILLOPC;
 		sig = SIGILL;
 		break;
 	}
-	info.si_signo = sig;
-	info.si_errno = 0;
 	switch (fp->ptregs.format) {
 	    default:
-		info.si_addr = (void *) fp->ptregs.pc;
+		addr = (void __user *) fp->ptregs.pc;
 		break;
 	    case 2:
-		info.si_addr = (void *) fp->un.fmt2.iaddr;
+		addr = (void __user *) fp->un.fmt2.iaddr;
 		break;
 	    case 7:
-		info.si_addr = (void *) fp->un.fmt7.effaddr;
+		addr = (void __user *) fp->un.fmt7.effaddr;
 		break;
 	    case 9:
-		info.si_addr = (void *) fp->un.fmt9.iaddr;
+		addr = (void __user *) fp->un.fmt9.iaddr;
 		break;
 	    case 10:
-		info.si_addr = (void *) fp->un.fmta.daddr;
+		addr = (void __user *) fp->un.fmta.daddr;
 		break;
 	    case 11:
-		info.si_addr = (void *) fp->un.fmtb.daddr;
+		addr = (void __user*) fp->un.fmtb.daddr;
 		break;
 	}
-	force_sig_info (sig, &info, current);
+	force_sig_fault(sig, si_code, addr, current);
 }
 
 void die_if_kernel (char *str, struct pt_regs *fp, int nr)
@@ -1161,12 +1159,6 @@
 #ifdef CONFIG_M68KFPU_EMU
 asmlinkage void fpemu_signal(int signal, int code, void *addr)
 {
-	siginfo_t info;
-
-	info.si_signo = signal;
-	info.si_errno = 0;
-	info.si_code = code;
-	info.si_addr = addr;
-	force_sig_info(signal, &info, current);
+	force_sig_fault(signal, code, addr, current);
 }
 #endif
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 0c3275a..e522307 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -1005,7 +1005,7 @@
 		struct resource swim_rsrc = {
 			.flags = IORESOURCE_MEM,
 			.start = (resource_size_t)swim_base,
-			.end   = (resource_size_t)swim_base + 0x2000,
+			.end   = (resource_size_t)swim_base + 0x1FFF,
 		};
 
 		platform_device_register_simple("swim", -1, &swim_rsrc, 1);
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 03253c4..f2ff377 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -21,35 +21,32 @@
 
 int send_fault_sig(struct pt_regs *regs)
 {
-	siginfo_t siginfo;
+	int signo, si_code;
+	void __user *addr;
 
-	clear_siginfo(&siginfo);
-	siginfo.si_signo = current->thread.signo;
-	siginfo.si_code = current->thread.code;
-	siginfo.si_addr = (void *)current->thread.faddr;
-	pr_debug("send_fault_sig: %p,%d,%d\n", siginfo.si_addr,
-		 siginfo.si_signo, siginfo.si_code);
+	signo = current->thread.signo;
+	si_code = current->thread.code;
+	addr = (void __user *)current->thread.faddr;
+	pr_debug("send_fault_sig: %p,%d,%d\n", addr, signo, si_code);
 
 	if (user_mode(regs)) {
-		force_sig_info(siginfo.si_signo,
-			       &siginfo, current);
+		force_sig_fault(signo, si_code, addr, current);
 	} else {
 		if (fixup_exception(regs))
 			return -1;
 
-		//if (siginfo.si_signo == SIGBUS)
-		//	force_sig_info(siginfo.si_signo,
-		//		       &siginfo, current);
+		//if (signo == SIGBUS)
+		//	force_sig_fault(si_signo, si_code, addr, current);
 
 		/*
 		 * Oops. The kernel tried to access some bad page. We'll have to
 		 * terminate things with extreme prejudice.
 		 */
-		if ((unsigned long)siginfo.si_addr < PAGE_SIZE)
+		if ((unsigned long)addr < PAGE_SIZE)
 			pr_alert("Unable to handle kernel NULL pointer dereference");
 		else
 			pr_alert("Unable to handle kernel access");
-		pr_cont(" at virtual address %p\n", siginfo.si_addr);
+		pr_cont(" at virtual address %p\n", addr);
 		die_if_kernel("Oops", regs, 0 /*error_code*/);
 		do_exit(SIGKILL);
 	}
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index c2a3832..40a3b32 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -89,7 +89,8 @@
 	for (p = &iolist ; (tmp = *p) ; p = &tmp->next) {
 		if (tmp->addr == addr) {
 			*p = tmp->next;
-			__iounmap(tmp->addr, tmp->size);
+			/* remove gap added in get_io_area() */
+			__iounmap(tmp->addr, tmp->size - IO_SIZE);
 			kfree(tmp);
 			return;
 		}
@@ -125,6 +126,10 @@
 			return (void __iomem *)physaddr;
 	}
 #endif
+#ifdef CONFIG_COLDFIRE
+	if (__cf_internalio(physaddr))
+		return (void __iomem *) physaddr;
+#endif
 
 #ifdef DEBUG
 	printk("ioremap: 0x%lx,0x%lx(%d) - ", physaddr, size, cacheflag);
@@ -235,6 +240,10 @@
 	     ((unsigned long)addr > 0x60000000)))
 			free_io_area((__force void *)addr);
 #else
+#ifdef CONFIG_COLDFIRE
+	if (cf_internalio(addr))
+		return;
+#endif
 	free_io_area((__force void *)addr);
 #endif
 }
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 8778612..f8a710f 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -153,12 +153,14 @@
 	if (!op) {
 		m147_rtc->ctrl = RTC_READ;
 		t->tm_year = bcd2int (m147_rtc->bcd_year);
-		t->tm_mon  = bcd2int (m147_rtc->bcd_mth);
+		t->tm_mon  = bcd2int(m147_rtc->bcd_mth) - 1;
 		t->tm_mday = bcd2int (m147_rtc->bcd_dom);
 		t->tm_hour = bcd2int (m147_rtc->bcd_hr);
 		t->tm_min  = bcd2int (m147_rtc->bcd_min);
 		t->tm_sec  = bcd2int (m147_rtc->bcd_sec);
 		m147_rtc->ctrl = 0;
+		if (t->tm_year < 70)
+			t->tm_year += 100;
 	}
 	return 0;
 }
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 6fa06d4..4ffd9ef 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -400,12 +400,14 @@
 	if (!op) {
 		rtc->ctrl = RTC_READ;
 		t->tm_year = bcd2int (rtc->bcd_year);
-		t->tm_mon  = bcd2int (rtc->bcd_mth);
+		t->tm_mon  = bcd2int(rtc->bcd_mth) - 1;
 		t->tm_mday = bcd2int (rtc->bcd_dom);
 		t->tm_hour = bcd2int (rtc->bcd_hr);
 		t->tm_min  = bcd2int (rtc->bcd_min);
 		t->tm_sec  = bcd2int (rtc->bcd_sec);
 		rtc->ctrl = 0;
+		if (t->tm_year < 70)
+			t->tm_year += 100;
 	}
 	return 0;
 }
diff --git a/arch/m68k/sun3/intersil.c b/arch/m68k/sun3/intersil.c
index 2cd0bcb..d911070 100644
--- a/arch/m68k/sun3/intersil.c
+++ b/arch/m68k/sun3/intersil.c
@@ -48,9 +48,9 @@
 		todintersil->hour = t->tm_hour;
 		todintersil->minute = t->tm_min;
 		todintersil->second = t->tm_sec;
-		todintersil->month = t->tm_mon;
+		todintersil->month = t->tm_mon + 1;
 		todintersil->day = t->tm_mday;
-		todintersil->year = t->tm_year - 68;
+		todintersil->year = (t->tm_year - 68) % 100;
 		todintersil->weekday = t->tm_wday;
 	} else {
 		/* read clock */
@@ -58,10 +58,12 @@
 		t->tm_hour = todintersil->hour;
 		t->tm_min = todintersil->minute;
 		t->tm_sec = todintersil->second;
-		t->tm_mon = todintersil->month;
+		t->tm_mon = todintersil->month - 1;
 		t->tm_mday = todintersil->day;
 		t->tm_year = todintersil->year + 68;
 		t->tm_wday = todintersil->weekday;
+		if (t->tm_year < 70)
+			t->tm_year += 100;
 	}
 
 	intersil_clock->cmd_reg = START_VAL;
diff --git a/arch/m68k/sun3x/time.c b/arch/m68k/sun3x/time.c
index 7a2c53d..047e2bc 100644
--- a/arch/m68k/sun3x/time.c
+++ b/arch/m68k/sun3x/time.c
@@ -52,8 +52,8 @@
 		h->hour = bin2bcd(t->tm_hour);
 		h->wday = bin2bcd(t->tm_wday);
 		h->mday = bin2bcd(t->tm_mday);
-		h->month = bin2bcd(t->tm_mon);
-		h->year = bin2bcd(t->tm_year);
+		h->month = bin2bcd(t->tm_mon + 1);
+		h->year = bin2bcd(t->tm_year % 100);
 		h->csr &= ~C_WRITE;
 	} else {
 		h->csr |= C_READ;
@@ -62,9 +62,11 @@
 		t->tm_hour = bcd2bin(h->hour);
 		t->tm_wday = bcd2bin(h->wday);
 		t->tm_mday = bcd2bin(h->mday);
-		t->tm_mon = bcd2bin(h->month);
+		t->tm_mon = bcd2bin(h->month) - 1;
 		t->tm_year = bcd2bin(h->year);
 		h->csr &= ~C_READ;
+		if (t->tm_year < 70)
+			t->tm_year += 100;
 	}
 
 	local_irq_restore(flags);
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 3817a3e..d147821 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -19,7 +19,6 @@
 	select HAVE_ARCH_HASH
 	select HAVE_ARCH_KGDB
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_DMA_API_DEBUG
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 3c80a5a..fe6a6c6 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -2,6 +2,7 @@
 generic-y += bitops.h
 generic-y += bug.h
 generic-y += bugs.h
+generic-y += compat.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index 5de871e..0033786 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -62,12 +62,6 @@
 
 #define HAVE_PCI_LEGACY	1
 
-/* The PCI address space does equal the physical memory
- * address space (no IOMMU).  The IDE and SCSI device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS     (1)
-
 extern void pcibios_claim_one_bus(struct pci_bus *b);
 
 extern void pcibios_finish_adding_to_bus(struct pci_bus *bus);
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index c91e8ce..3145e7d 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -184,14 +184,3 @@
 	.sync_sg_for_device	= dma_nommu_sync_sg_for_device,
 };
 EXPORT_SYMBOL(dma_nommu_ops);
-
-/* Number of entries preallocated for DMA-API debugging */
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init dma_init(void)
-{
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-
-	return 0;
-}
-fs_initcall(dma_init);
diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c
index e6f338d..eafff21 100644
--- a/arch/microblaze/kernel/exceptions.c
+++ b/arch/microblaze/kernel/exceptions.c
@@ -60,16 +60,10 @@
 
 void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
 {
-	siginfo_t info;
-
 	if (kernel_mode(regs))
 		die("Exception in kernel mode", regs, signr);
 
-	info.si_signo = signr;
-	info.si_errno = 0;
-	info.si_code = code;
-	info.si_addr = (void __user *) addr;
-	force_sig_info(signr, &info, current);
+	force_sig_fault(signr, code, (void __user *)addr, current);
 }
 
 asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index f91b30f..af60744 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -88,7 +88,6 @@
 {
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
-	siginfo_t info;
 	int code = SEGV_MAPERR;
 	int is_write = error_code & ESR_S;
 	int fault;
@@ -269,11 +268,6 @@
 	/* User mode accesses cause a SIGSEGV */
 	if (user_mode(regs)) {
 		_exception(SIGSEGV, regs, code, address);
-/*		info.si_signo = SIGSEGV;
-		info.si_errno = 0;
-		info.si_code = code;
-		info.si_addr = (void *) address;
-		force_sig_info(SIGSEGV, &info, current);*/
 		return;
 	}
 
@@ -295,11 +289,7 @@
 do_sigbus:
 	up_read(&mm->mmap_sem);
 	if (user_mode(regs)) {
-		info.si_signo = SIGBUS;
-		info.si_errno = 0;
-		info.si_code = BUS_ADRERR;
-		info.si_addr = (void __user *)address;
-		force_sig_info(SIGBUS, &info, current);
+		force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
 		return;
 	}
 	bad_page_fault(regs, address, SIGBUS);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 225c95d..7074b22 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -42,7 +42,6 @@
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DEBUG_STACKOVERFLOW
-	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_EXIT_THREAD
@@ -132,7 +131,7 @@
 
 config MIPS_ALCHEMY
 	bool "Alchemy processor based machines"
-	select ARCH_PHYS_ADDR_T_64BIT
+	select PHYS_ADDR_T_64BIT
 	select CEVT_R4K
 	select CSRC_R4K
 	select IRQ_MIPS_CPU
@@ -890,7 +889,7 @@
 	bool "Cavium Networks Octeon SoC based boards"
 	select CEVT_R4K
 	select ARCH_HAS_PHYS_TO_DMA
-	select ARCH_PHYS_ADDR_T_64BIT
+	select PHYS_ADDR_T_64BIT
 	select DMA_COHERENT
 	select SYS_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
@@ -912,6 +911,7 @@
 	select MIPS_NR_CPU_NR_MAP_1024
 	select BUILTIN_DTB
 	select MTD_COMPLEX_MAPPINGS
+	select SWIOTLB
 	select SYS_SUPPORTS_RELOCATABLE
 	help
 	  This option supports all of the Octeon reference boards from Cavium
@@ -936,7 +936,7 @@
 	select SWAP_IO_SPACE
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_64BIT_KERNEL
-	select ARCH_PHYS_ADDR_T_64BIT
+	select PHYS_ADDR_T_64BIT
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select SYS_SUPPORTS_HIGHMEM
 	select DMA_COHERENT
@@ -962,7 +962,7 @@
 	select HW_HAS_PCI
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_64BIT_KERNEL
-	select ARCH_PHYS_ADDR_T_64BIT
+	select PHYS_ADDR_T_64BIT
 	select GPIOLIB
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select SYS_SUPPORTS_LITTLE_ENDIAN
@@ -1101,9 +1101,6 @@
 config FW_CFE
 	bool
 
-config ARCH_DMA_ADDR_T_64BIT
-	def_bool (HIGHMEM && ARCH_PHYS_ADDR_T_64BIT) || 64BIT
-
 config ARCH_SUPPORTS_UPROBES
 	bool
 
@@ -1122,9 +1119,6 @@
 	bool
 	select NEED_DMA_MAP_STATE
 
-config NEED_DMA_MAP_STATE
-	bool
-
 config SYS_HAS_EARLY_PRINTK
 	bool
 
@@ -1373,6 +1367,7 @@
 	select MIPS_PGD_C0_CONTEXT
 	select MIPS_L1_CACHE_SHIFT_6
 	select GPIOLIB
+	select SWIOTLB
 	help
 		The Loongson 3 processor implements the MIPS64R2 instruction
 		set with many extensions.
@@ -1770,7 +1765,7 @@
 	depends on SYS_SUPPORTS_HIGHMEM
 	select XPA
 	select HIGHMEM
-	select ARCH_PHYS_ADDR_T_64BIT
+	select PHYS_ADDR_T_64BIT
 	default n
 	help
 	  Choose this option if you want to enable the Extended Physical
@@ -2402,9 +2397,6 @@
 	default y
 
 
-config ARCH_PHYS_ADDR_T_64BIT
-       bool
-
 choice
 	prompt "SmartMIPS or microMIPS ASE support"
 
@@ -2556,7 +2548,7 @@
 	  Say Y to support efficient handling of discontiguous physical memory,
 	  for architectures which are either NUMA (Non-Uniform Memory Access)
 	  or have huge holes in the physical address space for other reasons.
-	  See <file:Documentation/vm/numa> for more.
+	  See <file:Documentation/vm/numa.rst> for more.
 
 config ARCH_SPARSEMEM_ENABLE
 	bool
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 5e9fce0..e2122cc 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -309,9 +309,6 @@
 CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
 	egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \
 	sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
-ifdef CONFIG_64BIT
-CHECKFLAGS		+= -m64
-endif
 endif
 
 OBJCOPYFLAGS		+= --remove-section=.reginfo
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index 3807859..50cff3c 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -36,6 +36,28 @@
 	clock-frequency = <48000000>;
 };
 
+&mmc0 {
+	status = "okay";
+
+	bus-width = <4>;
+	max-frequency = <50000000>;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_mmc0>;
+
+	cd-gpios = <&gpf 20 GPIO_ACTIVE_LOW>;
+};
+
+&mmc1 {
+	status = "okay";
+
+	bus-width = <4>;
+	max-frequency = <50000000>;
+
+	pinctrl-names = "default";
+	pinctrl-0 = <&pins_mmc1>;
+};
+
 &uart0 {
 	status = "okay";
 
@@ -203,4 +225,16 @@
 		groups = "nemc-cs6";
 		bias-disable;
 	};
+
+	pins_mmc0: mmc0 {
+		function = "mmc0";
+		groups = "mmc0-1bit-e", "mmc0-4bit-e";
+		bias-disable;
+	};
+
+	pins_mmc1: mmc1 {
+		function = "mmc1";
+		groups = "mmc1-1bit-d", "mmc1-4bit-d";
+		bias-disable;
+	};
 };
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index 9b57946..b72e53b 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <dt-bindings/clock/jz4780-cgu.h>
+#include <dt-bindings/dma/jz4780-dma.h>
 
 / {
 	#address-cells = <1>;
@@ -241,6 +242,57 @@
 		status = "disabled";
 	};
 
+	dma: dma@13420000 {
+		compatible = "ingenic,jz4780-dma";
+		reg = <0x13420000 0x10000>;
+		#dma-cells = <2>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <10>;
+
+		clocks = <&cgu JZ4780_CLK_PDMA>;
+	};
+
+	mmc0: mmc@13450000 {
+		compatible = "ingenic,jz4780-mmc";
+		reg = <0x13450000 0x1000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <37>;
+
+		clocks = <&cgu JZ4780_CLK_MSC0>;
+		clock-names = "mmc";
+
+		cap-sd-highspeed;
+		cap-mmc-highspeed;
+		cap-sdio-irq;
+		dmas = <&dma JZ4780_DMA_MSC0_RX 0xffffffff>,
+		       <&dma JZ4780_DMA_MSC0_TX 0xffffffff>;
+		dma-names = "rx", "tx";
+
+		status = "disabled";
+	};
+
+	mmc1: mmc@13460000 {
+		compatible = "ingenic,jz4780-mmc";
+		reg = <0x13460000 0x1000>;
+
+		interrupt-parent = <&intc>;
+		interrupts = <36>;
+
+		clocks = <&cgu JZ4780_CLK_MSC1>;
+		clock-names = "mmc";
+
+		cap-sd-highspeed;
+		cap-mmc-highspeed;
+		cap-sdio-irq;
+		dmas = <&dma JZ4780_DMA_MSC1_RX 0xffffffff>,
+		       <&dma JZ4780_DMA_MSC1_TX 0xffffffff>;
+		dma-names = "rx", "tx";
+
+		status = "disabled";
+	};
+
 	bch: bch@134d0000 {
 		compatible = "ingenic,jz4780-bch";
 		reg = <0x134d0000 0x10000>;
diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
index b5eee1a..4984e46 100644
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -67,18 +67,6 @@
 	help
 	  Lock the kernel's implementation of memcpy() into L2.
 
-config IOMMU_HELPER
-	bool
-
-config NEED_SG_DMA_LENGTH
-	bool
-
-config SWIOTLB
-	def_bool y
-	select DMA_DIRECT_OPS
-	select IOMMU_HELPER
-	select NEED_SG_DMA_LENGTH
-
 config OCTEON_ILM
 	tristate "Module to measure interrupt latency using Octeon CIU Timer"
 	help
diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig
index b5f4ad8..be23fd2 100644
--- a/arch/mips/configs/ci20_defconfig
+++ b/arch/mips/configs/ci20_defconfig
@@ -104,10 +104,14 @@
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_MMC=y
+CONFIG_MMC_JZ4740=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_JZ4740=y
+CONFIG_DMADEVICES=y
+CONFIG_DMA_JZ4780=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_MEMORY=y
+CONFIG_EXT4_FS=y
 # CONFIG_DNOTIFY is not set
 CONFIG_PROC_KCORE=y
 # CONFIG_PROC_PAGE_MONITOR is not set
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 9a0fa66..78675f1 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -14,7 +14,6 @@
 
 typedef u32		compat_size_t;
 typedef s32		compat_ssize_t;
-typedef s32		compat_time_t;
 typedef s32		compat_clock_t;
 typedef s32		compat_suseconds_t;
 
@@ -38,24 +37,16 @@
 typedef s32		compat_timer_t;
 typedef s32		compat_key_t;
 
+typedef s16		compat_short_t;
 typedef s32		compat_int_t;
 typedef s32		compat_long_t;
 typedef s64		compat_s64;
+typedef u16		compat_ushort_t;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
 typedef u32		compat_uptr_t;
 
-struct compat_timespec {
-	compat_time_t	tv_sec;
-	s32		tv_nsec;
-};
-
-struct compat_timeval {
-	compat_time_t	tv_sec;
-	s32		tv_usec;
-};
-
 struct compat_stat {
 	compat_dev_t	st_dev;
 	s32		st_pad1[3];
@@ -168,35 +159,35 @@
 
 struct compat_semid64_ds {
 	struct compat_ipc64_perm sem_perm;
-	compat_time_t	sem_otime;
-	compat_time_t	sem_ctime;
+	compat_ulong_t	sem_otime;
+	compat_ulong_t	sem_ctime;
 	compat_ulong_t	sem_nsems;
-	compat_ulong_t	__unused1;
-	compat_ulong_t	__unused2;
+	compat_ulong_t	sem_otime_high;
+	compat_ulong_t	sem_ctime_high;
 };
 
 struct compat_msqid64_ds {
 	struct compat_ipc64_perm msg_perm;
 #ifndef CONFIG_CPU_LITTLE_ENDIAN
-	compat_ulong_t	__unused1;
+	compat_ulong_t	msg_stime_high;
 #endif
-	compat_time_t	msg_stime;
+	compat_ulong_t	msg_stime;
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-	compat_ulong_t	__unused1;
+	compat_ulong_t	msg_stime_high;
 #endif
 #ifndef CONFIG_CPU_LITTLE_ENDIAN
-	compat_ulong_t	__unused2;
+	compat_ulong_t	msg_rtime_high;
 #endif
-	compat_time_t	msg_rtime;
+	compat_ulong_t	msg_rtime;
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-	compat_ulong_t	__unused2;
+	compat_ulong_t	msg_rtime_high;
 #endif
 #ifndef CONFIG_CPU_LITTLE_ENDIAN
-	compat_ulong_t	__unused3;
+	compat_ulong_t	msg_ctime_high;
 #endif
-	compat_time_t	msg_ctime;
+	compat_ulong_t	msg_ctime;
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-	compat_ulong_t	__unused3;
+	compat_ulong_t	msg_ctime_high;
 #endif
 	compat_ulong_t	msg_cbytes;
 	compat_ulong_t	msg_qnum;
@@ -210,14 +201,16 @@
 struct compat_shmid64_ds {
 	struct compat_ipc64_perm shm_perm;
 	compat_size_t	shm_segsz;
-	compat_time_t	shm_atime;
-	compat_time_t	shm_dtime;
-	compat_time_t	shm_ctime;
+	compat_ulong_t	shm_atime;
+	compat_ulong_t	shm_dtime;
+	compat_ulong_t	shm_ctime;
 	compat_pid_t	shm_cpid;
 	compat_pid_t	shm_lpid;
 	compat_ulong_t	shm_nattch;
-	compat_ulong_t	__unused1;
-	compat_ulong_t	__unused2;
+	compat_ushort_t	shm_atime_high;
+	compat_ushort_t	shm_dtime_high;
+	compat_ushort_t	shm_ctime_high;
+	compat_ushort_t	__unused2;
 };
 
 /* MIPS has unusual order of fields in stack_t */
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 2339f42..4360998 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -121,13 +121,6 @@
 #include <linux/string.h>
 #include <asm/io.h>
 
-/*
- * The PCI address space does equal the physical memory address space.
- * The networking and block device layers use this boolean for bounce
- * buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS     (1)
-
 #ifdef CONFIG_PCI_DOMAINS_GENERIC
 static inline int pci_proc_domain(struct pci_bus *bus)
 {
diff --git a/arch/mips/include/uapi/asm/msgbuf.h b/arch/mips/include/uapi/asm/msgbuf.h
index eb4d0f9..46aa15b 100644
--- a/arch/mips/include/uapi/asm/msgbuf.h
+++ b/arch/mips/include/uapi/asm/msgbuf.h
@@ -9,33 +9,15 @@
  * between kernel and user space.
  *
  * Pad space is left for:
- * - extension of time_t to 64-bit on 32-bitsystem to solve the y2038 problem
  * - 2 miscellaneous unsigned long values
  */
 
+#if defined(__mips64)
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
-#if !defined(__mips64) && defined(__MIPSEB__)
-	unsigned long	__unused1;
-#endif
 	__kernel_time_t msg_stime;	/* last msgsnd time */
-#if !defined(__mips64) && defined(__MIPSEL__)
-	unsigned long	__unused1;
-#endif
-#if !defined(__mips64) && defined(__MIPSEB__)
-	unsigned long	__unused2;
-#endif
 	__kernel_time_t msg_rtime;	/* last msgrcv time */
-#if !defined(__mips64) && defined(__MIPSEL__)
-	unsigned long	__unused2;
-#endif
-#if !defined(__mips64) && defined(__MIPSEB__)
-	unsigned long	__unused3;
-#endif
 	__kernel_time_t msg_ctime;	/* last change time */
-#if !defined(__mips64) && defined(__MIPSEL__)
-	unsigned long	__unused3;
-#endif
 	unsigned long  msg_cbytes;	/* current number of bytes on queue */
 	unsigned long  msg_qnum;	/* number of messages in queue */
 	unsigned long  msg_qbytes;	/* max number of bytes on queue */
@@ -44,5 +26,42 @@
 	unsigned long  __unused4;
 	unsigned long  __unused5;
 };
+#elif defined (__MIPSEB__)
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+	unsigned long  msg_stime_high;
+	unsigned long  msg_stime;	/* last msgsnd time */
+	unsigned long  msg_rtime_high;
+	unsigned long  msg_rtime;	/* last msgrcv time */
+	unsigned long  msg_ctime_high;
+	unsigned long  msg_ctime;	/* last change time */
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused4;
+	unsigned long  __unused5;
+};
+#elif defined (__MIPSEL__)
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+	unsigned long  msg_stime;	/* last msgsnd time */
+	unsigned long  msg_stime_high;
+	unsigned long  msg_rtime;	/* last msgrcv time */
+	unsigned long  msg_rtime_high;
+	unsigned long  msg_ctime;	/* last change time */
+	unsigned long  msg_ctime_high;
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused4;
+	unsigned long  __unused5;
+};
+#else
+#warning no endianess set
+#endif
 
 #endif /* _ASM_MSGBUF_H */
diff --git a/arch/mips/include/uapi/asm/sembuf.h b/arch/mips/include/uapi/asm/sembuf.h
index 2c0f507..60c89e6 100644
--- a/arch/mips/include/uapi/asm/sembuf.h
+++ b/arch/mips/include/uapi/asm/sembuf.h
@@ -7,10 +7,11 @@
  * Note extra padding because this structure is passed back and forth
  * between kernel and user space.
  *
- * Pad space is left for:
- * - 2 miscellaneous 64-bit values
+ * Pad space is left for 2 miscellaneous 64-bit values on mips64,
+ * but used for the upper 32 bit of the time values on mips32.
  */
 
+#ifdef __mips64
 struct semid64_ds {
 	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
 	__kernel_time_t sem_otime;		/* last semop time */
@@ -19,5 +20,15 @@
 	unsigned long	__unused1;
 	unsigned long	__unused2;
 };
+#else
+struct semid64_ds {
+	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
+	unsigned long   sem_otime;		/* last semop time */
+	unsigned long   sem_ctime;		/* last change time */
+	unsigned long	sem_nsems;		/* no. of semaphores in array */
+	unsigned long	sem_otime_high;
+	unsigned long	sem_ctime_high;
+};
+#endif
 
 #endif /* _ASM_SEMBUF_H */
diff --git a/arch/mips/include/uapi/asm/shmbuf.h b/arch/mips/include/uapi/asm/shmbuf.h
index 379e6bc..9b9bba3 100644
--- a/arch/mips/include/uapi/asm/shmbuf.h
+++ b/arch/mips/include/uapi/asm/shmbuf.h
@@ -7,10 +7,13 @@
  * Note extra padding because this structure is passed back and forth
  * between kernel and user space.
  *
- * Pad space is left for:
- * - 2 miscellaneous 32-bit rsp. 64-bit values
+ * As MIPS was lacking proper padding after shm_?time, we use 48 bits
+ * of the padding at the end to store a few additional bits of the time.
+ * libc implementations need to take care to convert this into a proper
+ * data structure when moving to 64-bit time_t.
  */
 
+#ifdef __mips64
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
 	size_t			shm_segsz;	/* size of segment (bytes) */
@@ -23,6 +26,22 @@
 	unsigned long		__unused1;
 	unsigned long		__unused2;
 };
+#else
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	unsigned long		shm_atime;	/* last attach time */
+	unsigned long		shm_dtime;	/* last detach time */
+	unsigned long		shm_ctime;	/* last change time */
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned short		shm_atime_high;
+	unsigned short		shm_dtime_high;
+	unsigned short		shm_ctime_high;
+	unsigned short		__unused1;
+};
+#endif
 
 struct shminfo64 {
 	unsigned long	shmmax;
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index c4db910..b5d9e17 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -8,13 +8,13 @@
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  * Copyright (C) 2016, Imagination Technologies Ltd.
  */
+#include <linux/compat.h>
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/syscalls.h>
 
-#include <asm/compat.h>
 #include <asm/compat-signal.h>
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 967e9e4..d67fa74 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -699,17 +699,11 @@
 asmlinkage void do_ov(struct pt_regs *regs)
 {
 	enum ctx_state prev_state;
-	siginfo_t info;
-
-	clear_siginfo(&info);
-	info.si_signo = SIGFPE;
-	info.si_code = FPE_INTOVF;
-	info.si_addr = (void __user *)regs->cp0_epc;
 
 	prev_state = exception_enter();
 	die_if_kernel("Integer overflow", regs);
 
-	force_sig_info(SIGFPE, &info, current);
+	force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->cp0_epc, current);
 	exception_exit(prev_state);
 }
 
@@ -722,32 +716,27 @@
 void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr,
 		     struct task_struct *tsk)
 {
-	struct siginfo si;
-
-	clear_siginfo(&si);
-	si.si_addr = fault_addr;
-	si.si_signo = SIGFPE;
+	int si_code = FPE_FLTUNK;
 
 	if (fcr31 & FPU_CSR_INV_X)
-		si.si_code = FPE_FLTINV;
+		si_code = FPE_FLTINV;
 	else if (fcr31 & FPU_CSR_DIV_X)
-		si.si_code = FPE_FLTDIV;
+		si_code = FPE_FLTDIV;
 	else if (fcr31 & FPU_CSR_OVF_X)
-		si.si_code = FPE_FLTOVF;
+		si_code = FPE_FLTOVF;
 	else if (fcr31 & FPU_CSR_UDF_X)
-		si.si_code = FPE_FLTUND;
+		si_code = FPE_FLTUND;
 	else if (fcr31 & FPU_CSR_INE_X)
-		si.si_code = FPE_FLTRES;
+		si_code = FPE_FLTRES;
 
-	force_sig_info(SIGFPE, &si, tsk);
+	force_sig_fault(SIGFPE, si_code, fault_addr, tsk);
 }
 
 int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
 {
-	struct siginfo si;
+	int si_code;
 	struct vm_area_struct *vma;
 
-	clear_siginfo(&si);
 	switch (sig) {
 	case 0:
 		return 0;
@@ -757,23 +746,18 @@
 		return 1;
 
 	case SIGBUS:
-		si.si_addr = fault_addr;
-		si.si_signo = sig;
-		si.si_code = BUS_ADRERR;
-		force_sig_info(sig, &si, current);
+		force_sig_fault(SIGBUS, BUS_ADRERR, fault_addr, current);
 		return 1;
 
 	case SIGSEGV:
-		si.si_addr = fault_addr;
-		si.si_signo = sig;
 		down_read(&current->mm->mmap_sem);
 		vma = find_vma(current->mm, (unsigned long)fault_addr);
 		if (vma && (vma->vm_start <= (unsigned long)fault_addr))
-			si.si_code = SEGV_ACCERR;
+			si_code = SEGV_ACCERR;
 		else
-			si.si_code = SEGV_MAPERR;
+			si_code = SEGV_MAPERR;
 		up_read(&current->mm->mmap_sem);
-		force_sig_info(sig, &si, current);
+		force_sig_fault(SIGSEGV, si_code, fault_addr, current);
 		return 1;
 
 	default:
@@ -896,10 +880,8 @@
 void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code,
 	const char *str)
 {
-	siginfo_t info;
 	char b[40];
 
-	clear_siginfo(&info);
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
 	if (kgdb_ll_trap(DIE_TRAP, str, regs, code, current->thread.trap_nr,
 			 SIGTRAP) == NOTIFY_STOP)
@@ -921,13 +903,9 @@
 	case BRK_DIVZERO:
 		scnprintf(b, sizeof(b), "%s instruction in kernel code", str);
 		die_if_kernel(b, regs);
-		if (code == BRK_DIVZERO)
-			info.si_code = FPE_INTDIV;
-		else
-			info.si_code = FPE_INTOVF;
-		info.si_signo = SIGFPE;
-		info.si_addr = (void __user *) regs->cp0_epc;
-		force_sig_info(SIGFPE, &info, current);
+		force_sig_fault(SIGFPE,
+				code == BRK_DIVZERO ? FPE_INTDIV : FPE_INTOVF,
+				(void __user *) regs->cp0_epc, current);
 		break;
 	case BRK_BUG:
 		die_if_kernel("Kernel bug detected", regs);
@@ -952,9 +930,7 @@
 		scnprintf(b, sizeof(b), "%s instruction in kernel code", str);
 		die_if_kernel(b, regs);
 		if (si_code) {
-			info.si_signo = SIGTRAP;
-			info.si_code = si_code;
-			force_sig_info(SIGTRAP, &info, current);
+			force_sig_fault(SIGTRAP, si_code, NULL,	current);
 		} else {
 			force_sig(SIGTRAP, current);
 		}
@@ -1506,13 +1482,8 @@
  */
 asmlinkage void do_watch(struct pt_regs *regs)
 {
-	siginfo_t info;
 	enum ctx_state prev_state;
 
-	clear_siginfo(&info);
-	info.si_signo = SIGTRAP;
-	info.si_code = TRAP_HWBKPT;
-
 	prev_state = exception_enter();
 	/*
 	 * Clear WP (bit 22) bit of cause register so we don't loop
@@ -1528,7 +1499,7 @@
 	if (test_tsk_thread_flag(current, TIF_LOAD_WATCH)) {
 		mips_read_watch_registers();
 		local_irq_enable();
-		force_sig_info(SIGTRAP, &info, current);
+		force_sig_fault(SIGTRAP, TRAP_HWBKPT, NULL, current);
 	} else {
 		mips_clear_watch_registers();
 		local_irq_enable();
diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig
index 72af0c1..c79e6a5 100644
--- a/arch/mips/loongson64/Kconfig
+++ b/arch/mips/loongson64/Kconfig
@@ -130,21 +130,6 @@
 	default y
 	depends on EARLY_PRINTK || SERIAL_8250
 
-config IOMMU_HELPER
-	bool
-
-config NEED_SG_DMA_LENGTH
-	bool
-
-config SWIOTLB
-	bool "Soft IOMMU Support for All-Memory DMA"
-	default y
-	depends on CPU_LOONGSON3
-	select DMA_DIRECT_OPS
-	select IOMMU_HELPER
-	select NEED_SG_DMA_LENGTH
-	select NEED_DMA_MAP_STATE
-
 config PHYS48_TO_HT40
 	bool
 	default y if CPU_LOONGSON3
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index dcafa43..f9fef00 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -402,13 +402,3 @@
 
 const struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops;
 EXPORT_SYMBOL(mips_dma_map_ops);
-
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init mips_dma_init(void)
-{
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-
-	return 0;
-}
-fs_initcall(mips_dma_init);
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 4f8f5bf..5f71f2b 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -42,7 +42,7 @@
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
 	const int field = sizeof(unsigned long) * 2;
-	siginfo_t info;
+	int si_code;
 	int fault;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
@@ -63,7 +63,7 @@
 		return;
 #endif
 
-	info.si_code = SEGV_MAPERR;
+	si_code = SEGV_MAPERR;
 
 	/*
 	 * We fault-in kernel-space virtual memory on-demand. The
@@ -112,7 +112,7 @@
  * we can handle it..
  */
 good_area:
-	info.si_code = SEGV_ACCERR;
+	si_code = SEGV_ACCERR;
 
 	if (write) {
 		if (!(vma->vm_flags & VM_WRITE))
@@ -223,11 +223,7 @@
 			pr_cont("\n");
 		}
 		current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
-		info.si_signo = SIGSEGV;
-		info.si_errno = 0;
-		/* info.si_code has been set above */
-		info.si_addr = (void __user *) address;
-		force_sig_info(SIGSEGV, &info, tsk);
+		force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
 		return;
 	}
 
@@ -283,11 +279,7 @@
 #endif
 	current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
 	tsk->thread.cp0_badvaddr = address;
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRERR;
-	info.si_addr = (void __user *) address;
-	force_sig_info(SIGBUS, &info, tsk);
+	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
 
 	return;
 #ifndef CONFIG_64BIT
diff --git a/arch/mips/netlogic/Kconfig b/arch/mips/netlogic/Kconfig
index 7fcfc7f..412351c 100644
--- a/arch/mips/netlogic/Kconfig
+++ b/arch/mips/netlogic/Kconfig
@@ -83,10 +83,4 @@
 config NLM_COMMON
 	bool
 
-config IOMMU_HELPER
-	bool
-
-config NEED_SG_DMA_LENGTH
-	bool
-
 endif
diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c
index dd2d9f7..7649372 100644
--- a/arch/mips/pci/ops-pmcmsp.c
+++ b/arch/mips/pci/ops-pmcmsp.c
@@ -83,18 +83,6 @@
 	return 0;
 }
 
-static int msp_pci_rd_cnt_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_msp_pci_counts, NULL);
-}
-
-static const struct file_operations msp_pci_rd_cnt_fops = {
-	.open		= msp_pci_rd_cnt_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /*****************************************************************************
  *
  *  FUNCTION: gen_pci_cfg_wr_show
@@ -160,18 +148,6 @@
 	return 0;
 }
 
-static int gen_pci_cfg_wr_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, gen_pci_cfg_wr_show, NULL);
-}
-
-static const struct file_operations gen_pci_cfg_wr_fops = {
-	.open		= gen_pci_cfg_wr_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /*****************************************************************************
  *
  *  FUNCTION: pci_proc_init
@@ -188,8 +164,8 @@
  ****************************************************************************/
 static void pci_proc_init(void)
 {
-	proc_create("pmc_msp_pci_rd_cnt", 0, NULL, &msp_pci_rd_cnt_fops);
-	proc_create("pmc_msp_pci_cfg_wr", 0, NULL, &gen_pci_cfg_wr_fops);
+	proc_create_single("pmc_msp_pci_rd_cnt", 0, NULL, show_msp_pci_counts);
+	proc_create_single("pmc_msp_pci_cfg_wr", 0, NULL, gen_pci_cfg_wr_show);
 }
 #endif /* CONFIG_PROC_FS && PCI_COUNTERS */
 
diff --git a/arch/mips/sibyte/common/bus_watcher.c b/arch/mips/sibyte/common/bus_watcher.c
index a4e55999..4bb85de 100644
--- a/arch/mips/sibyte/common/bus_watcher.c
+++ b/arch/mips/sibyte/common/bus_watcher.c
@@ -142,24 +142,12 @@
 	return 0;
 }
 
-static int bw_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, bw_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations bw_proc_fops = {
-	.open		= bw_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static void create_proc_decoder(struct bw_stats_struct *stats)
 {
 	struct proc_dir_entry *ent;
 
-	ent = proc_create_data("bus_watcher", S_IWUSR | S_IRUGO, NULL,
-			       &bw_proc_fops, stats);
+	ent = proc_create_single_data("bus_watcher", S_IWUSR | S_IRUGO, NULL,
+			bw_proc_show, stats);
 	if (!ent) {
 		printk(KERN_INFO "Unable to initialize bus_watcher /proc entry\n");
 		return;
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index b7404f2..6aed9742 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -5,10 +5,13 @@
 
 config NDS32
         def_bool y
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_WANT_FRAME_POINTERS if FTRACE
 	select CLKSRC_MMIO
 	select CLONE_BACKWARDS
 	select COMMON_CLK
+	select DMA_NONCOHERENT_OPS
 	select GENERIC_ASHLDI3
 	select GENERIC_ASHRDI3
 	select GENERIC_LSHRDI3
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index 142e612..dbc4e54 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -9,10 +9,12 @@
 generic-y += clkdev.h
 generic-y += cmpxchg.h
 generic-y += cmpxchg-local.h
+generic-y += compat.h
 generic-y += cputime.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += dma.h
+generic-y += dma-mapping.h
 generic-y += emergency-restart.h
 generic-y += errno.h
 generic-y += exec.h
diff --git a/arch/nds32/include/asm/dma-mapping.h b/arch/nds32/include/asm/dma-mapping.h
deleted file mode 100644
index 2dd47d24..0000000
--- a/arch/nds32/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2005-2017 Andes Technology Corporation
-
-#ifndef ASMNDS32_DMA_MAPPING_H
-#define ASMNDS32_DMA_MAPPING_H
-
-extern struct dma_map_ops nds32_dma_ops;
-
-static inline struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
-	return &nds32_dma_ops;
-}
-
-#endif
diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c
index d291800..d0dbd4f 100644
--- a/arch/nds32/kernel/dma.c
+++ b/arch/nds32/kernel/dma.c
@@ -3,17 +3,14 @@
 
 #include <linux/types.h>
 #include <linux/mm.h>
-#include <linux/export.h>
 #include <linux/string.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/io.h>
 #include <linux/cache.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
-#include <asm/dma-mapping.h>
 #include <asm/proc-fns.h>
 
 /*
@@ -22,11 +19,6 @@
 static pte_t *consistent_pte;
 static DEFINE_RAW_SPINLOCK(consistent_lock);
 
-enum master_type {
-	FOR_CPU = 0,
-	FOR_DEVICE = 1,
-};
-
 /*
  * VM region handling support.
  *
@@ -124,10 +116,8 @@
 	return c;
 }
 
-/* FIXME: attrs is not used. */
-static void *nds32_dma_alloc_coherent(struct device *dev, size_t size,
-				      dma_addr_t * handle, gfp_t gfp,
-				      unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+		gfp_t gfp, unsigned long attrs)
 {
 	struct page *page;
 	struct arch_vm_region *c;
@@ -232,8 +222,8 @@
 	return NULL;
 }
 
-static void nds32_dma_free(struct device *dev, size_t size, void *cpu_addr,
-			   dma_addr_t handle, unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t handle, unsigned long attrs)
 {
 	struct arch_vm_region *c;
 	unsigned long flags, addr;
@@ -333,145 +323,69 @@
 }
 
 core_initcall(consistent_init);
-static void consistent_sync(void *vaddr, size_t size, int direction, int master_type);
-static dma_addr_t nds32_dma_map_page(struct device *dev, struct page *page,
-				     unsigned long offset, size_t size,
-				     enum dma_data_direction dir,
-				     unsigned long attrs)
+
+static inline void cache_op(phys_addr_t paddr, size_t size,
+		void (*fn)(unsigned long start, unsigned long end))
 {
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		consistent_sync((void *)(page_address(page) + offset), size, dir, FOR_DEVICE);
-	return page_to_phys(page) + offset;
-}
+	struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
+	unsigned offset = paddr & ~PAGE_MASK;
+	size_t left = size;
+	unsigned long start;
 
-static void nds32_dma_unmap_page(struct device *dev, dma_addr_t handle,
-				 size_t size, enum dma_data_direction dir,
-				 unsigned long attrs)
-{
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		consistent_sync(phys_to_virt(handle), size, dir, FOR_CPU);
-}
+	do {
+		size_t len = left;
 
-/*
- * Make an area consistent for devices.
- */
-static void consistent_sync(void *vaddr, size_t size, int direction, int master_type)
-{
-	unsigned long start = (unsigned long)vaddr;
-	unsigned long end = start + size;
-
-	if (master_type == FOR_CPU) {
-		switch (direction) {
-		case DMA_TO_DEVICE:
-			break;
-		case DMA_FROM_DEVICE:
-		case DMA_BIDIRECTIONAL:
-			cpu_dma_inval_range(start, end);
-			break;
-		default:
-			BUG();
-		}
-	} else {
-		/* FOR_DEVICE */
-		switch (direction) {
-		case DMA_FROM_DEVICE:
-			break;
-		case DMA_TO_DEVICE:
-		case DMA_BIDIRECTIONAL:
-			cpu_dma_wb_range(start, end);
-			break;
-		default:
-			BUG();
-		}
-	}
-}
-
-static int nds32_dma_map_sg(struct device *dev, struct scatterlist *sg,
-			    int nents, enum dma_data_direction dir,
-			    unsigned long attrs)
-{
-	int i;
-
-	for (i = 0; i < nents; i++, sg++) {
-		void *virt;
-		unsigned long pfn;
-		struct page *page = sg_page(sg);
-
-		sg->dma_address = sg_phys(sg);
-		pfn = page_to_pfn(page) + sg->offset / PAGE_SIZE;
-		page = pfn_to_page(pfn);
 		if (PageHighMem(page)) {
-			virt = kmap_atomic(page);
-			consistent_sync(virt, sg->length, dir, FOR_CPU);
-			kunmap_atomic(virt);
+			void *addr;
+
+			if (offset + len > PAGE_SIZE) {
+				if (offset >= PAGE_SIZE) {
+					page += offset >> PAGE_SHIFT;
+					offset &= ~PAGE_MASK;
+				}
+				len = PAGE_SIZE - offset;
+			}
+
+			addr = kmap_atomic(page);
+			start = (unsigned long)(addr + offset);
+			fn(start, start + len);
+			kunmap_atomic(addr);
 		} else {
-			if (sg->offset > PAGE_SIZE)
-				panic("sg->offset:%08x > PAGE_SIZE\n",
-				      sg->offset);
-			virt = page_address(page) + sg->offset;
-			consistent_sync(virt, sg->length, dir, FOR_CPU);
+			start = (unsigned long)phys_to_virt(paddr);
+			fn(start, start + size);
 		}
-	}
-	return nents;
+		offset = 0;
+		page++;
+		left -= len;
+	} while (left);
 }
 
-static void nds32_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-			       int nhwentries, enum dma_data_direction dir,
-			       unsigned long attrs)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-}
-
-static void
-nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
-			      size_t size, enum dma_data_direction dir)
-{
-	consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_CPU);
-}
-
-static void
-nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
-				 size_t size, enum dma_data_direction dir)
-{
-	consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_DEVICE);
-}
-
-static void
-nds32_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
-			  enum dma_data_direction dir)
-{
-	int i;
-
-	for (i = 0; i < nents; i++, sg++) {
-		char *virt =
-		    page_address((struct page *)sg->page_link) + sg->offset;
-		consistent_sync(virt, sg->length, dir, FOR_CPU);
+	switch (dir) {
+	case DMA_FROM_DEVICE:
+		break;
+	case DMA_TO_DEVICE:
+	case DMA_BIDIRECTIONAL:
+		cache_op(paddr, size, cpu_dma_wb_range);
+		break;
+	default:
+		BUG();
 	}
 }
 
-static void
-nds32_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
-			     int nents, enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-	int i;
-
-	for (i = 0; i < nents; i++, sg++) {
-		char *virt =
-		    page_address((struct page *)sg->page_link) + sg->offset;
-		consistent_sync(virt, sg->length, dir, FOR_DEVICE);
+	switch (dir) {
+	case DMA_TO_DEVICE:
+		break;
+	case DMA_FROM_DEVICE:
+	case DMA_BIDIRECTIONAL:
+		cache_op(paddr, size, cpu_dma_inval_range);
+		break;
+	default:
+		BUG();
 	}
 }
-
-struct dma_map_ops nds32_dma_ops = {
-	.alloc = nds32_dma_alloc_coherent,
-	.free = nds32_dma_free,
-	.map_page = nds32_dma_map_page,
-	.unmap_page = nds32_dma_unmap_page,
-	.map_sg = nds32_dma_map_sg,
-	.unmap_sg = nds32_dma_unmap_sg,
-	.sync_single_for_device = nds32_dma_sync_single_for_device,
-	.sync_single_for_cpu = nds32_dma_sync_single_for_cpu,
-	.sync_sg_for_cpu = nds32_dma_sync_sg_for_cpu,
-	.sync_sg_for_device = nds32_dma_sync_sg_for_device,
-};
-
-EXPORT_SYMBOL(nds32_dma_ops);
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index 6e34eb9..a6205fd 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -222,19 +222,13 @@
 
 int bad_syscall(int n, struct pt_regs *regs)
 {
-	siginfo_t info;
-
 	if (current->personality != PER_LINUX) {
 		send_sig(SIGSEGV, current, 1);
 		return regs->uregs[0];
 	}
 
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_ILLTRP;
-	info.si_addr = (void __user *)instruction_pointer(regs) - 4;
-
-	force_sig_info(SIGILL, &info, current);
+	force_sig_fault(SIGILL, ILL_ILLTRP,
+			(void __user *)instruction_pointer(regs) - 4, current);
 	die_if_kernel("Oops - bad syscall", regs, n);
 	return regs->uregs[0];
 }
@@ -287,16 +281,11 @@
 void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
 		  int error_code, int si_code)
 {
-	struct siginfo info;
-
 	tsk->thread.trap_no = ENTRY_DEBUG_RELATED;
 	tsk->thread.error_code = error_code;
 
-	memset(&info, 0, sizeof(info));
-	info.si_signo = SIGTRAP;
-	info.si_code = si_code;
-	info.si_addr = (void __user *)instruction_pointer(regs);
-	force_sig_info(SIGTRAP, &info, tsk);
+	force_sig_fault(SIGTRAP, si_code,
+			(void __user *)instruction_pointer(regs), tsk);
 }
 
 void do_debug_trap(unsigned long entry, unsigned long addr,
@@ -318,29 +307,22 @@
 
 void unhandled_interruption(struct pt_regs *regs)
 {
-	siginfo_t si;
 	pr_emerg("unhandled_interruption\n");
 	show_regs(regs);
 	if (!user_mode(regs))
 		do_exit(SIGKILL);
-	si.si_signo = SIGKILL;
-	si.si_errno = 0;
-	force_sig_info(SIGKILL, &si, current);
+	force_sig(SIGKILL, current);
 }
 
 void unhandled_exceptions(unsigned long entry, unsigned long addr,
 			  unsigned long type, struct pt_regs *regs)
 {
-	siginfo_t si;
 	pr_emerg("Unhandled Exception: entry: %lx addr:%lx itype:%lx\n", entry,
 		 addr, type);
 	show_regs(regs);
 	if (!user_mode(regs))
 		do_exit(SIGKILL);
-	si.si_signo = SIGKILL;
-	si.si_errno = 0;
-	si.si_addr = (void *)addr;
-	force_sig_info(SIGKILL, &si, current);
+	force_sig(SIGKILL, current);
 }
 
 extern int do_page_fault(unsigned long entry, unsigned long addr,
@@ -363,14 +345,11 @@
 
 void do_revinsn(struct pt_regs *regs)
 {
-	siginfo_t si;
 	pr_emerg("Reserved Instruction\n");
 	show_regs(regs);
 	if (!user_mode(regs))
 		do_exit(SIGILL);
-	si.si_signo = SIGILL;
-	si.si_errno = 0;
-	force_sig_info(SIGILL, &si, current);
+	force_sig(SIGILL, current);
 }
 
 #ifdef CONFIG_ALIGNMENT_TRAP
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index 3a246fb..9bdb7c3 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -72,7 +72,7 @@
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
-	siginfo_t info;
+	int si_code;
 	int fault;
 	unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -80,7 +80,7 @@
 	error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE);
 	tsk = current;
 	mm = tsk->mm;
-	info.si_code = SEGV_MAPERR;
+	si_code = SEGV_MAPERR;
 	/*
 	 * We fault-in kernel-space virtual memory on-demand. The
 	 * 'reference' page table is init_mm.pgd.
@@ -161,7 +161,7 @@
 	 */
 
 good_area:
-	info.si_code = SEGV_ACCERR;
+	si_code = SEGV_ACCERR;
 
 	/* first do some preliminary protection checks */
 	if (entry == ENTRY_PTE_NOT_PRESENT) {
@@ -266,11 +266,7 @@
 		tsk->thread.address = addr;
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_no = entry;
-		info.si_signo = SIGSEGV;
-		info.si_errno = 0;
-		/* info.si_code has been set above */
-		info.si_addr = (void *)addr;
-		force_sig_info(SIGSEGV, &info, tsk);
+		force_sig_fault(SIGSEGV, si_code, (void __user *)addr, tsk);
 		return;
 	}
 
@@ -339,11 +335,7 @@
 	tsk->thread.address = addr;
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = entry;
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRERR;
-	info.si_addr = (void *)addr;
-	force_sig_info(SIGBUS, &info, tsk);
+	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, tsk);
 
 	return;
 
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index d232da2..64ed3d6 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -4,6 +4,7 @@
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += cmpxchg.h
+generic-y += compat.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c
index 8184e7d..3bc3cd2 100644
--- a/arch/nios2/kernel/traps.c
+++ b/arch/nios2/kernel/traps.c
@@ -26,13 +26,7 @@
 
 static void _send_sig(int signo, int code, unsigned long addr)
 {
-	siginfo_t info;
-
-	info.si_signo = signo;
-	info.si_errno = 0;
-	info.si_code = code;
-	info.si_addr = (void __user *) addr;
-	force_sig_info(signo, &info, current);
+	force_sig_fault(signo, code, (void __user *) addr, current);
 }
 
 void die(const char *str, struct pt_regs *regs, long err)
diff --git a/arch/openrisc/Makefile b/arch/openrisc/Makefile
index cf88029..89076a6 100644
--- a/arch/openrisc/Makefile
+++ b/arch/openrisc/Makefile
@@ -25,7 +25,6 @@
 LIBGCC 		:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
 KBUILD_CFLAGS	+= -pipe -ffixed-r10 -D__linux__
-CHECKFLAGS	+= -mbig-endian
 
 ifeq ($(CONFIG_OPENRISC_HAVE_INST_MUL),y)
 	KBUILD_CFLAGS += $(call cc-option,-mhard-mul)
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index f05c722..65964d3 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -2,6 +2,7 @@
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += checksum.h
+generic-y += compat.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index a945f00..ec7fd45 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -247,14 +247,3 @@
 	.sync_single_for_device = or1k_sync_single_for_device,
 };
 EXPORT_SYMBOL(or1k_dma_map_ops);
-
-/* Number of entries preallocated for DMA-API debugging */
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init dma_init(void)
-{
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-
-	return 0;
-}
-fs_initcall(dma_init);
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 113c175fe..fac246e 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -250,27 +250,16 @@
 
 asmlinkage void do_trap(struct pt_regs *regs, unsigned long address)
 {
-	siginfo_t info;
-	memset(&info, 0, sizeof(info));
-	info.si_signo = SIGTRAP;
-	info.si_code = TRAP_TRACE;
-	info.si_addr = (void *)address;
-	force_sig_info(SIGTRAP, &info, current);
+	force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)address, current);
 
 	regs->pc += 4;
 }
 
 asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address)
 {
-	siginfo_t info;
-
 	if (user_mode(regs)) {
 		/* Send a SIGBUS */
-		info.si_signo = SIGBUS;
-		info.si_errno = 0;
-		info.si_code = BUS_ADRALN;
-		info.si_addr = (void __user *)address;
-		force_sig_info(SIGBUS, &info, current);
+		force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)address, current);
 	} else {
 		printk("KERNEL: Unaligned Access 0x%.8lx\n", address);
 		show_registers(regs);
@@ -281,15 +270,9 @@
 
 asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address)
 {
-	siginfo_t info;
-
 	if (user_mode(regs)) {
 		/* Send a SIGBUS */
-		info.si_signo = SIGBUS;
-		info.si_errno = 0;
-		info.si_code = BUS_ADRERR;
-		info.si_addr = (void *)address;
-		force_sig_info(SIGBUS, &info, current);
+		force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current);
 	} else {		/* Kernel mode */
 		printk("KERNEL: Bus error (SIGBUS) 0x%.8lx\n", address);
 		show_registers(regs);
@@ -464,7 +447,6 @@
 asmlinkage void do_illegal_instruction(struct pt_regs *regs,
 				       unsigned long address)
 {
-	siginfo_t info;
 	unsigned int op;
 	unsigned int insn = *((unsigned int *)address);
 
@@ -485,11 +467,7 @@
 
 	if (user_mode(regs)) {
 		/* Send a SIGILL */
-		info.si_signo = SIGILL;
-		info.si_errno = 0;
-		info.si_code = ILL_ILLOPC;
-		info.si_addr = (void *)address;
-		force_sig_info(SIGBUS, &info, current);
+		force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)address, current);
 	} else {		/* Kernel mode */
 		printk("KERNEL: Illegal instruction (SIGILL) 0x%.8lx\n",
 		       address);
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index d0021df..9f011d1 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -52,7 +52,7 @@
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
-	siginfo_t info;
+	int si_code;
 	int fault;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
@@ -97,7 +97,7 @@
 	}
 
 	mm = tsk->mm;
-	info.si_code = SEGV_MAPERR;
+	si_code = SEGV_MAPERR;
 
 	/*
 	 * If we're in an interrupt or have no user
@@ -139,7 +139,7 @@
 	 */
 
 good_area:
-	info.si_code = SEGV_ACCERR;
+	si_code = SEGV_ACCERR;
 
 	/* first do some preliminary protection checks */
 
@@ -213,11 +213,7 @@
 	/* User mode accesses just cause a SIGSEGV */
 
 	if (user_mode(regs)) {
-		info.si_signo = SIGSEGV;
-		info.si_errno = 0;
-		/* info.si_code has been set above */
-		info.si_addr = (void *)address;
-		force_sig_info(SIGSEGV, &info, tsk);
+		force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk);
 		return;
 	}
 
@@ -282,11 +278,7 @@
 	 * Send a sigbus, regardless of whether we were in kernel
 	 * or user mode.
 	 */
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRERR;
-	info.si_addr = (void *)address;
-	force_sig_info(SIGBUS, &info, tsk);
+	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk);
 
 	/* Kernel mode? Handle exceptions or die */
 	if (!user_mode(regs))
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index fc5a574..4d8f64d 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -51,6 +51,8 @@
 	select GENERIC_CLOCKEVENTS
 	select ARCH_NO_COHERENT_DMA_MMAP
 	select CPU_NO_EFFICIENT_FFS
+	select NEED_DMA_MAP_STATE
+	select NEED_SG_DMA_LENGTH
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
@@ -111,12 +113,6 @@
 config STACKTRACE_SUPPORT
 	def_bool y
 
-config NEED_DMA_MAP_STATE
-	def_bool y
-
-config NEED_SG_DMA_LENGTH
-	def_bool y
-
 config ISA_DMA_API
 	bool
 
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 34ac503..714284e 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -22,13 +22,13 @@
 KBUILD_DEFCONFIG := default_defconfig
 
 NM		= sh $(srctree)/arch/parisc/nm
-CHECKFLAGS	+= -D__hppa__=1 -mbig-endian
+CHECKFLAGS	+= -D__hppa__=1
 LIBGCC		= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 export LIBGCC
 
 ifdef CONFIG_64BIT
 UTS_MACHINE	:= parisc64
-CHECKFLAGS	+= -D__LP64__=1 -m64
+CHECKFLAGS	+= -D__LP64__=1
 CC_ARCHES	= hppa64
 LD_BFD		:= elf64-hppa-linux
 else # 32-bit
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index 57b8b2a..ab8a547 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -13,7 +13,6 @@
 
 typedef u32	compat_size_t;
 typedef s32	compat_ssize_t;
-typedef s32	compat_time_t;
 typedef s32	compat_clock_t;
 typedef s32	compat_pid_t;
 typedef u32	__compat_uid_t;
@@ -40,16 +39,6 @@
 typedef u64	compat_u64;
 typedef u32	compat_uptr_t;
 
-struct compat_timespec {
-	compat_time_t		tv_sec;
-	s32			tv_nsec;
-};
-
-struct compat_timeval {
-	compat_time_t		tv_sec;
-	s32			tv_usec;
-};
-
 struct compat_stat {
 	compat_dev_t		st_dev;	/* dev_t is 32 bits on parisc */
 	compat_ino_t		st_ino;	/* 32 bits */
@@ -149,10 +138,10 @@
 
 struct compat_semid64_ds {
 	struct compat_ipc64_perm sem_perm;
-	unsigned int __unused1;
-	compat_time_t sem_otime;
-	unsigned int __unused2;
-	compat_time_t sem_ctime;
+	unsigned int sem_otime_high;
+	unsigned int sem_otime;
+	unsigned int sem_ctime_high;
+	unsigned int sem_ctime;
 	compat_ulong_t sem_nsems;
 	compat_ulong_t __unused3;
 	compat_ulong_t __unused4;
@@ -160,12 +149,12 @@
 
 struct compat_msqid64_ds {
 	struct compat_ipc64_perm msg_perm;
-	unsigned int __unused1;
-	compat_time_t msg_stime;
-	unsigned int __unused2;
-	compat_time_t msg_rtime;
-	unsigned int __unused3;
-	compat_time_t msg_ctime;
+	unsigned int msg_stime_high;
+	unsigned int msg_stime;
+	unsigned int msg_rtime_high;
+	unsigned int msg_rtime;
+	unsigned int msg_ctime_high;
+	unsigned int msg_ctime;
 	compat_ulong_t msg_cbytes;
 	compat_ulong_t msg_qnum;
 	compat_ulong_t msg_qbytes;
@@ -177,12 +166,12 @@
 
 struct compat_shmid64_ds {
 	struct compat_ipc64_perm shm_perm;
-	unsigned int __unused1;
-	compat_time_t shm_atime;
-	unsigned int __unused2;
-	compat_time_t shm_dtime;
-	unsigned int __unused3;
-	compat_time_t shm_ctime;
+	unsigned int shm_atime_high;
+	unsigned int shm_atime;
+	unsigned int shm_dtime_high;
+	unsigned int shm_dtime;
+	unsigned int shm_ctime_high;
+	unsigned int shm_ctime;
 	unsigned int __unused4;
 	compat_size_t shm_segsz;
 	compat_pid_t shm_cpid;
diff --git a/arch/parisc/include/asm/hardirq.h b/arch/parisc/include/asm/hardirq.h
index 0778151..1a1235a 100644
--- a/arch/parisc/include/asm/hardirq.h
+++ b/arch/parisc/include/asm/hardirq.h
@@ -34,14 +34,6 @@
 #define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
 #define inc_irq_stat(member)	this_cpu_inc(irq_stat.member)
 #define __inc_irq_stat(member)	__this_cpu_inc(irq_stat.member)
-#define local_softirq_pending()	this_cpu_read(irq_stat.__softirq_pending)
-
-#define __ARCH_SET_SOFTIRQ_PENDING
-
-#define set_softirq_pending(x)	\
-		this_cpu_write(irq_stat.__softirq_pending, (x))
-#define or_softirq_pending(x)	this_cpu_or(irq_stat.__softirq_pending, (x))
-
 #define ack_bad_irq(irq) WARN(1, "unexpected IRQ trap at vector %02x\n", irq)
 
 #endif /* _PARISC_HARDIRQ_H */
diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h
index 96b7dee..3328fd1 100644
--- a/arch/parisc/include/asm/pci.h
+++ b/arch/parisc/include/asm/pci.h
@@ -88,29 +88,6 @@
 #endif /* !CONFIG_64BIT */
 
 /*
- * If the PCI device's view of memory is the same as the CPU's view of memory,
- * PCI_DMA_BUS_IS_PHYS is true.  The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#ifdef CONFIG_PA20
-/* All PA-2.0 machines have an IOMMU. */
-#define PCI_DMA_BUS_IS_PHYS	0
-#define parisc_has_iommu()	do { } while (0)
-#else
-
-#if defined(CONFIG_IOMMU_CCIO) || defined(CONFIG_IOMMU_SBA)
-extern int parisc_bus_is_phys; 	/* in arch/parisc/kernel/setup.c */
-#define PCI_DMA_BUS_IS_PHYS	parisc_bus_is_phys
-#define parisc_has_iommu()	do { parisc_bus_is_phys = 0; } while (0)
-#else
-#define PCI_DMA_BUS_IS_PHYS	1
-#define parisc_has_iommu()	do { } while (0)
-#endif
-
-#endif	/* !CONFIG_PA20 */
-
-
-/*
 ** Most PCI devices (eg Tulip, NCR720) also export the same registers
 ** to both MMIO and I/O port space.  Due to poor performance of I/O Port
 ** access under HP PCI bus adapters, strongly recommend the use of MMIO
diff --git a/arch/parisc/include/uapi/asm/msgbuf.h b/arch/parisc/include/uapi/asm/msgbuf.h
index b48b810..6a2e9ab 100644
--- a/arch/parisc/include/uapi/asm/msgbuf.h
+++ b/arch/parisc/include/uapi/asm/msgbuf.h
@@ -10,31 +10,30 @@
  * between kernel and user space.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
  */
 
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
-#if __BITS_PER_LONG != 64
-	unsigned int   __pad1;
-#endif
+#if __BITS_PER_LONG == 64
 	__kernel_time_t msg_stime;	/* last msgsnd time */
-#if __BITS_PER_LONG != 64
-	unsigned int   __pad2;
-#endif
 	__kernel_time_t msg_rtime;	/* last msgrcv time */
-#if __BITS_PER_LONG != 64
-	unsigned int   __pad3;
-#endif
 	__kernel_time_t msg_ctime;	/* last change time */
-	unsigned long msg_cbytes;	/* current number of bytes on queue */
-	unsigned long msg_qnum;		/* number of messages in queue */
-	unsigned long msg_qbytes;	/* max number of bytes on queue */
-	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
-	__kernel_pid_t msg_lrpid;	/* last receive pid */
-	unsigned long __unused1;
-	unsigned long __unused2;
+#else
+	unsigned long	msg_stime_high;
+	unsigned long	msg_stime;	/* last msgsnd time */
+	unsigned long	msg_rtime_high;
+	unsigned long	msg_rtime;	/* last msgrcv time */
+	unsigned long	msg_ctime_high;
+	unsigned long	msg_ctime;	/* last change time */
+#endif
+	unsigned long	msg_cbytes;	/* current number of bytes on queue */
+	unsigned long	msg_qnum;	/* number of messages in queue */
+	unsigned long	msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t	msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t	msg_lrpid;	/* last receive pid */
+	unsigned long	__unused1;
+	unsigned long	__unused2;
 };
 
 #endif /* _PARISC_MSGBUF_H */
diff --git a/arch/parisc/include/uapi/asm/sembuf.h b/arch/parisc/include/uapi/asm/sembuf.h
index 746c5d8..3c31163 100644
--- a/arch/parisc/include/uapi/asm/sembuf.h
+++ b/arch/parisc/include/uapi/asm/sembuf.h
@@ -10,21 +10,21 @@
  * between kernel and user space.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
  */
 
 struct semid64_ds {
 	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
-#if __BITS_PER_LONG != 64
-	unsigned int	__pad1;
-#endif
+#if __BITS_PER_LONG == 64
 	__kernel_time_t	sem_otime;		/* last semop time */
-#if __BITS_PER_LONG != 64
-	unsigned int	__pad2;
-#endif
 	__kernel_time_t	sem_ctime;		/* last change time */
-	unsigned long 	sem_nsems;		/* no. of semaphores in array */
+#else
+	unsigned long	sem_otime_high;
+	unsigned long	sem_otime;		/* last semop time */
+	unsigned long	sem_ctime_high;
+	unsigned long	sem_ctime;		/* last change time */
+#endif
+	unsigned long	sem_nsems;		/* no. of semaphores in array */
 	unsigned long	__unused1;
 	unsigned long	__unused2;
 };
diff --git a/arch/parisc/include/uapi/asm/shmbuf.h b/arch/parisc/include/uapi/asm/shmbuf.h
index cd4dbce..c89b3dd 100644
--- a/arch/parisc/include/uapi/asm/shmbuf.h
+++ b/arch/parisc/include/uapi/asm/shmbuf.h
@@ -10,25 +10,22 @@
  * between kernel and user space.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
  */
 
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
-#if __BITS_PER_LONG != 64
-	unsigned int		__pad1;
-#endif
+#if __BITS_PER_LONG == 64
 	__kernel_time_t		shm_atime;	/* last attach time */
-#if __BITS_PER_LONG != 64
-	unsigned int		__pad2;
-#endif
 	__kernel_time_t		shm_dtime;	/* last detach time */
-#if __BITS_PER_LONG != 64
-	unsigned int		__pad3;
-#endif
 	__kernel_time_t		shm_ctime;	/* last change time */
-#if __BITS_PER_LONG != 64
+#else
+	unsigned long		shm_atime_high;
+	unsigned long		shm_atime;	/* last attach time */
+	unsigned long		shm_dtime_high;
+	unsigned long		shm_dtime;	/* last detach time */
+	unsigned long		shm_ctime_high;
+	unsigned long		shm_ctime;	/* last change time */
 	unsigned int		__pad4;
 #endif
 	__kernel_size_t		shm_segsz;	/* size of segment (bytes) */
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 91bc0ca..6df07ce 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -367,19 +367,6 @@
 	return 0;
 }
 
-static int proc_pcxl_dma_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_pcxl_dma_show, NULL);
-}
-
-static const struct file_operations proc_pcxl_dma_ops = {
-	.owner		= THIS_MODULE,
-	.open		= proc_pcxl_dma_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init
 pcxl_dma_init(void)
 {
@@ -397,8 +384,8 @@
 			"pcxl_dma_init: Unable to create gsc /proc dir entry\n");
 	else {
 		struct proc_dir_entry* ent;
-		ent = proc_create("pcxl_dma", 0, proc_gsc_root,
-				  &proc_pcxl_dma_ops);
+		ent = proc_create_single("pcxl_dma", 0, proc_gsc_root,
+				proc_pcxl_dma_show);
 		if (!ent)
 			printk(KERN_WARNING
 				"pci-dma.c: Unable to create pcxl_dma /proc entry.\n");
diff --git a/arch/parisc/kernel/pdc_chassis.c b/arch/parisc/kernel/pdc_chassis.c
index 3e04242..28e0748 100644
--- a/arch/parisc/kernel/pdc_chassis.c
+++ b/arch/parisc/kernel/pdc_chassis.c
@@ -266,18 +266,6 @@
 	return 0;
 }
 
-static int pdc_chassis_warn_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pdc_chassis_warn_show, NULL);
-}
-
-static const struct file_operations pdc_chassis_warn_fops = {
-	.open		= pdc_chassis_warn_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init pdc_chassis_create_procfs(void)
 {
 	unsigned long test;
@@ -292,7 +280,7 @@
 
 	printk(KERN_INFO "Enabling PDC chassis warnings support v%s\n",
 			PDC_CHASSIS_VER);
-	proc_create("chassis", 0400, NULL, &pdc_chassis_warn_fops);
+	proc_create_single("chassis", 0400, NULL, pdc_chassis_warn_show);
 	return 0;
 }
 
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 1a2be6e..7aa1d4d 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -76,8 +76,6 @@
 	set_tsk_thread_flag(task, TIF_SINGLESTEP);
 
 	if (pa_psw(task)->n) {
-		struct siginfo si;
-
 		/* Nullified, just crank over the queue. */
 		task_regs(task)->iaoq[0] = task_regs(task)->iaoq[1];
 		task_regs(task)->iasq[0] = task_regs(task)->iasq[1];
@@ -90,11 +88,9 @@
 		ptrace_disable(task);
 		/* Don't wake up the task, but let the
 		   parent know something happened. */
-		si.si_code = TRAP_TRACE;
-		si.si_addr = (void __user *) (task_regs(task)->iaoq[0] & ~3);
-		si.si_signo = SIGTRAP;
-		si.si_errno = 0;
-		force_sig_info(SIGTRAP, &si, task);
+		force_sig_fault(SIGTRAP, TRAP_TRACE,
+				(void __user *) (task_regs(task)->iaoq[0] & ~3),
+				task);
 		/* notify_parent(task, SIGCHLD); */
 		return;
 	}
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 0e9675f..8d3a7b8 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -58,11 +58,6 @@
 struct proc_dir_entry * proc_gsc_root __read_mostly = NULL;
 struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL;
 
-#if !defined(CONFIG_PA20) && (defined(CONFIG_IOMMU_CCIO) || defined(CONFIG_IOMMU_SBA))
-int parisc_bus_is_phys __read_mostly = 1;	/* Assume no IOMMU is present */
-EXPORT_SYMBOL(parisc_bus_is_phys);
-#endif
-
 void __init setup_cmdline(char **cmdline_p)
 {
 	extern unsigned int boot_args[];
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 71d3127..4309ad3 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -297,13 +297,8 @@
 #define GDB_BREAK_INSN 0x10004
 static void handle_gdb_break(struct pt_regs *regs, int wot)
 {
-	struct siginfo si;
-
-	si.si_signo = SIGTRAP;
-	si.si_errno = 0;
-	si.si_code = wot;
-	si.si_addr = (void __user *) (regs->iaoq[0] & ~3);
-	force_sig_info(SIGTRAP, &si, current);
+	force_sig_fault(SIGTRAP, wot,
+			(void __user *) (regs->iaoq[0] & ~3), current);
 }
 
 static void handle_break(struct pt_regs *regs)
@@ -487,7 +482,7 @@
 {
 	unsigned long fault_address = 0;
 	unsigned long fault_space = 0;
-	struct siginfo si;
+	int si_code;
 
 	if (code == 1)
 	    pdc_console_restart();  /* switch back to pdc if HPMC */
@@ -571,7 +566,7 @@
 	case  8:
 		/* Illegal instruction trap */
 		die_if_kernel("Illegal instruction", regs, code);
-		si.si_code = ILL_ILLOPC;
+		si_code = ILL_ILLOPC;
 		goto give_sigill;
 
 	case  9:
@@ -582,7 +577,7 @@
 	case 10:
 		/* Privileged operation trap */
 		die_if_kernel("Privileged operation", regs, code);
-		si.si_code = ILL_PRVOPC;
+		si_code = ILL_PRVOPC;
 		goto give_sigill;
 
 	case 11:
@@ -605,20 +600,16 @@
 		}
 
 		die_if_kernel("Privileged register usage", regs, code);
-		si.si_code = ILL_PRVREG;
+		si_code = ILL_PRVREG;
 	give_sigill:
-		si.si_signo = SIGILL;
-		si.si_errno = 0;
-		si.si_addr = (void __user *) regs->iaoq[0];
-		force_sig_info(SIGILL, &si, current);
+		force_sig_fault(SIGILL, si_code,
+				(void __user *) regs->iaoq[0], current);
 		return;
 
 	case 12:
 		/* Overflow Trap, let the userland signal handler do the cleanup */
-		si.si_signo = SIGFPE;
-		si.si_code = FPE_INTOVF;
-		si.si_addr = (void __user *) regs->iaoq[0];
-		force_sig_info(SIGFPE, &si, current);
+		force_sig_fault(SIGFPE, FPE_INTOVF,
+				(void __user *) regs->iaoq[0], current);
 		return;
 		
 	case 13:
@@ -626,13 +617,11 @@
 		   The condition succeeds in an instruction which traps
 		   on condition  */
 		if(user_mode(regs)){
-			si.si_signo = SIGFPE;
 			/* Let userspace app figure it out from the insn pointed
 			 * to by si_addr.
 			 */
-			si.si_code = FPE_CONDTRAP;
-			si.si_addr = (void __user *) regs->iaoq[0];
-			force_sig_info(SIGFPE, &si, current);
+			force_sig_fault(SIGFPE, FPE_CONDTRAP,
+					(void __user *) regs->iaoq[0], current);
 			return;
 		} 
 		/* The kernel doesn't want to handle condition codes */
@@ -741,14 +730,10 @@
 			return;
 
 		die_if_kernel("Protection id trap", regs, code);
-		si.si_code = SEGV_MAPERR;
-		si.si_signo = SIGSEGV;
-		si.si_errno = 0;
-		if (code == 7)
-		    si.si_addr = (void __user *) regs->iaoq[0];
-		else
-		    si.si_addr = (void __user *) regs->ior;
-		force_sig_info(SIGSEGV, &si, current);
+		force_sig_fault(SIGSEGV, SEGV_MAPERR,
+				(code == 7)?
+				((void __user *) regs->iaoq[0]) :
+				((void __user *) regs->ior), current);
 		return;
 
 	case 28: 
@@ -762,11 +747,8 @@
 				"handle_interruption() pid=%d command='%s'\n",
 				task_pid_nr(current), current->comm);
 			/* SIGBUS, for lack of a better one. */
-			si.si_signo = SIGBUS;
-			si.si_code = BUS_OBJERR;
-			si.si_errno = 0;
-			si.si_addr = (void __user *) regs->ior;
-			force_sig_info(SIGBUS, &si, current);
+			force_sig_fault(SIGBUS, BUS_OBJERR,
+					(void __user *)regs->ior, current);
 			return;
 		}
 		pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
@@ -781,11 +763,8 @@
 				"User fault %d on space 0x%08lx, pid=%d command='%s'\n",
 				code, fault_space,
 				task_pid_nr(current), current->comm);
-		si.si_signo = SIGSEGV;
-		si.si_errno = 0;
-		si.si_code = SEGV_MAPERR;
-		si.si_addr = (void __user *) regs->ior;
-		force_sig_info(SIGSEGV, &si, current);
+		force_sig_fault(SIGSEGV, SEGV_MAPERR,
+				(void __user *)regs->ior, current);
 		return;
 	    }
 	}
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index e36f7b7..932bfc0 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -452,7 +452,6 @@
 	unsigned long newbase = R1(regs->iir)?regs->gr[R1(regs->iir)]:0;
 	int modify = 0;
 	int ret = ERR_NOTHANDLED;
-	struct siginfo si;
 	register int flop=0;	/* true if this is a flop */
 
 	__inc_irq_stat(irq_unaligned_count);
@@ -690,21 +689,15 @@
 
 		if (ret == ERR_PAGEFAULT)
 		{
-			si.si_signo = SIGSEGV;
-			si.si_errno = 0;
-			si.si_code = SEGV_MAPERR;
-			si.si_addr = (void __user *)regs->ior;
-			force_sig_info(SIGSEGV, &si, current);
+			force_sig_fault(SIGSEGV, SEGV_MAPERR,
+					(void __user *)regs->ior, current);
 		}
 		else
 		{
 force_sigbus:
 			/* couldn't handle it ... */
-			si.si_signo = SIGBUS;
-			si.si_errno = 0;
-			si.si_code = BUS_ADRALN;
-			si.si_addr = (void __user *)regs->ior;
-			force_sig_info(SIGBUS, &si, current);
+			force_sig_fault(SIGBUS, BUS_ADRALN,
+					(void __user *)regs->ior, current);
 		}
 		
 		return;
diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c
index 2fb59d2..0590e05 100644
--- a/arch/parisc/math-emu/driver.c
+++ b/arch/parisc/math-emu/driver.c
@@ -81,7 +81,6 @@
 handle_fpe(struct pt_regs *regs)
 {
 	extern void printbinary(unsigned long x, int nbits);
-	struct siginfo si;
 	unsigned int orig_sw, sw;
 	int signalcode;
 	/* need an intermediate copy of float regs because FPU emulation
@@ -117,11 +116,8 @@
 
 	memcpy(regs->fr, frcopy, sizeof regs->fr);
 	if (signalcode != 0) {
-	    si.si_signo = signalcode >> 24;
-	    si.si_errno = 0;
-	    si.si_code = signalcode & 0xffffff;
-	    si.si_addr = (void __user *) regs->iaoq[0];
-	    force_sig_info(si.si_signo, &si, current);
+	    force_sig_fault(signalcode >> 24, signalcode & 0xffffff,
+			    (void __user *) regs->iaoq[0], current);
 	    return -1;
 	}
 
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index e247edb..a801179 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -353,23 +353,22 @@
 	up_read(&mm->mmap_sem);
 
 	if (user_mode(regs)) {
-		struct siginfo si;
-		unsigned int lsb = 0;
+		int signo, si_code;
 
 		switch (code) {
 		case 15:	/* Data TLB miss fault/Data page fault */
 			/* send SIGSEGV when outside of vma */
 			if (!vma ||
 			    address < vma->vm_start || address >= vma->vm_end) {
-				si.si_signo = SIGSEGV;
-				si.si_code = SEGV_MAPERR;
+				signo = SIGSEGV;
+				si_code = SEGV_MAPERR;
 				break;
 			}
 
 			/* send SIGSEGV for wrong permissions */
 			if ((vma->vm_flags & acc_type) != acc_type) {
-				si.si_signo = SIGSEGV;
-				si.si_code = SEGV_ACCERR;
+				signo = SIGSEGV;
+				si_code = SEGV_ACCERR;
 				break;
 			}
 
@@ -377,43 +376,40 @@
 			/* fall through */
 		case 17:	/* NA data TLB miss / page fault */
 		case 18:	/* Unaligned access - PCXS only */
-			si.si_signo = SIGBUS;
-			si.si_code = (code == 18) ? BUS_ADRALN : BUS_ADRERR;
+			signo = SIGBUS;
+			si_code = (code == 18) ? BUS_ADRALN : BUS_ADRERR;
 			break;
 		case 16:	/* Non-access instruction TLB miss fault */
 		case 26:	/* PCXL: Data memory access rights trap */
 		default:
-			si.si_signo = SIGSEGV;
-			si.si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR;
+			signo = SIGSEGV;
+			si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR;
 			break;
 		}
-
 #ifdef CONFIG_MEMORY_FAILURE
 		if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+			unsigned int lsb = 0;
 			printk(KERN_ERR
 	"MCE: Killing %s:%d due to hardware memory corruption fault at %08lx\n",
 			tsk->comm, tsk->pid, address);
-			si.si_signo = SIGBUS;
-			si.si_code = BUS_MCEERR_AR;
+			/*
+			 * Either small page or large page may be poisoned.
+			 * In other words, VM_FAULT_HWPOISON_LARGE and
+			 * VM_FAULT_HWPOISON are mutually exclusive.
+			 */
+			if (fault & VM_FAULT_HWPOISON_LARGE)
+				lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+			else if (fault & VM_FAULT_HWPOISON)
+				lsb = PAGE_SHIFT;
+
+			force_sig_mceerr(BUS_MCEERR_AR, (void __user *) address,
+					 lsb, current);
+			return;
 		}
 #endif
+		show_signal_msg(regs, code, address, tsk, vma);
 
-		/*
-		 * Either small page or large page may be poisoned.
-		 * In other words, VM_FAULT_HWPOISON_LARGE and
-		 * VM_FAULT_HWPOISON are mutually exclusive.
-		 */
-		if (fault & VM_FAULT_HWPOISON_LARGE)
-			lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
-		else if (fault & VM_FAULT_HWPOISON)
-			lsb = PAGE_SHIFT;
-		else
-			show_signal_msg(regs, code, address, tsk, vma);
-		si.si_addr_lsb = lsb;
-
-		si.si_errno = 0;
-		si.si_addr = (void __user *) address;
-		force_sig_info(si.si_signo, &si, current);
+		force_sig_fault(signo, si_code, (void __user *) address, current);
 		return;
 	}
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c32a181..9c0ee18 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -13,12 +13,6 @@
 	bool
 	default y if PPC64
 
-config ARCH_PHYS_ADDR_T_64BIT
-       def_bool PPC64 || PHYS_64BIT
-
-config ARCH_DMA_ADDR_T_64BIT
-	def_bool ARCH_PHYS_ADDR_T_64BIT
-
 config MMU
 	bool
 	default y
@@ -187,7 +181,6 @@
 	select HAVE_CONTEXT_TRACKING		if PPC64
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DEBUG_STACKOVERFLOW
-	select HAVE_DMA_API_DEBUG
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS	if MPROFILE_KERNEL
 	select HAVE_EBPF_JIT			if PPC64
@@ -205,6 +198,7 @@
 	select HAVE_KPROBES
 	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_KRETPROBES
+	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
 	select HAVE_LIVEPATCH			if HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
@@ -223,9 +217,11 @@
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select HAVE_IRQ_TIME_ACCOUNTING
+	select IOMMU_HELPER			if PPC64
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_RELA
+	select NEED_SG_DMA_LENGTH
 	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
@@ -478,19 +474,6 @@
 	depends on PPC64 && CPU_LITTLE_ENDIAN
 	def_bool !DISABLE_MPROFILE_KERNEL
 
-config IOMMU_HELPER
-	def_bool PPC64
-
-config SWIOTLB
-	bool "SWIOTLB support"
-	default n
-	select IOMMU_HELPER
-	---help---
-	  Support for IO bounce buffering for systems without an IOMMU.
-	  This allows us to DMA to the full physical address space on
-	  platforms where the size of a physical address is larger
-	  than the bus address.  Not all platforms support this.
-
 config HOTPLUG_CPU
 	bool "Support for enabling/disabling CPUs"
 	depends on SMP && (PPC_PSERIES || \
@@ -883,7 +866,7 @@
 	  page-based protections, but without requiring modification of the
 	  page tables when an application changes protection domains.
 
-	  For details, see Documentation/vm/protection-keys.txt
+	  For details, see Documentation/vm/protection-keys.rst
 
 	  If unsure, say y.
 
@@ -913,9 +896,6 @@
 config NEED_DMA_MAP_STATE
 	def_bool (PPC64 || NOT_COHERENT_CACHE)
 
-config NEED_SG_DMA_LENGTH
-	def_bool y
-
 config GENERIC_ISA_DMA
 	bool
 	depends on ISA_DMA_API
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 62168e1..85c8af2 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -17,7 +17,6 @@
 
 typedef u32		compat_size_t;
 typedef s32		compat_ssize_t;
-typedef s32		compat_time_t;
 typedef s32		compat_clock_t;
 typedef s32		compat_pid_t;
 typedef u32		__compat_uid_t;
@@ -45,16 +44,6 @@
 typedef u64		compat_u64;
 typedef u32		compat_uptr_t;
 
-struct compat_timespec {
-	compat_time_t	tv_sec;
-	s32		tv_nsec;
-};
-
-struct compat_timeval {
-	compat_time_t	tv_sec;
-	s32		tv_usec;
-};
-
 struct compat_stat {
 	compat_dev_t	st_dev;
 	compat_ino_t	st_ino;
@@ -173,10 +162,10 @@
 
 struct compat_semid64_ds {
 	struct compat_ipc64_perm sem_perm;
-	unsigned int __unused1;
-	compat_time_t sem_otime;
-	unsigned int __unused2;
-	compat_time_t sem_ctime;
+	unsigned int sem_otime_high;
+	unsigned int sem_otime;
+	unsigned int sem_ctime_high;
+	unsigned int sem_ctime;
 	compat_ulong_t sem_nsems;
 	compat_ulong_t __unused3;
 	compat_ulong_t __unused4;
@@ -184,12 +173,12 @@
 
 struct compat_msqid64_ds {
 	struct compat_ipc64_perm msg_perm;
-	unsigned int __unused1;
-	compat_time_t msg_stime;
-	unsigned int __unused2;
-	compat_time_t msg_rtime;
-	unsigned int __unused3;
-	compat_time_t msg_ctime;
+	unsigned int msg_stime_high;
+	unsigned int msg_stime;
+	unsigned int msg_rtime_high;
+	unsigned int msg_rtime;
+	unsigned int msg_ctime_high;
+	unsigned int msg_ctime;
 	compat_ulong_t msg_cbytes;
 	compat_ulong_t msg_qnum;
 	compat_ulong_t msg_qbytes;
@@ -201,12 +190,12 @@
 
 struct compat_shmid64_ds {
 	struct compat_ipc64_perm shm_perm;
-	unsigned int __unused1;
-	compat_time_t shm_atime;
-	unsigned int __unused2;
-	compat_time_t shm_dtime;
-	unsigned int __unused3;
-	compat_time_t shm_ctime;
+	unsigned int shm_atime_high;
+	unsigned int shm_atime;
+	unsigned int shm_dtime_high;
+	unsigned int shm_dtime;
+	unsigned int shm_ctime_high;
+	unsigned int shm_ctime;
 	unsigned int __unused4;
 	compat_size_t shm_segsz;
 	compat_pid_t shm_cpid;
diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
index 5986d47..383f628 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -25,15 +25,8 @@
 DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
 
 #define __ARCH_IRQ_STAT
-
-#define local_softirq_pending()	__this_cpu_read(irq_stat.__softirq_pending)
-
-#define __ARCH_SET_SOFTIRQ_PENDING
 #define __ARCH_IRQ_EXIT_IRQS_DISABLED
 
-#define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x))
-#define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x))
-
 static inline void ack_bad_irq(unsigned int irq)
 {
 	printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 2e2ddda..662c834 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -279,6 +279,7 @@
 #define H_GET_MPP_X		0x314
 #define H_SET_MODE		0x31C
 #define H_CLEAR_HPT		0x358
+#define H_REQUEST_VMC		0x360
 #define H_RESIZE_HPT_PREPARE	0x36C
 #define H_RESIZE_HPT_COMMIT	0x370
 #define H_REGISTER_PROC_TBL	0x37C
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 401c62a..2af9ded 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -92,24 +92,6 @@
 
 #define HAVE_PCI_LEGACY	1
 
-#ifdef CONFIG_PPC64
-
-/* The PCI address space does not equal the physical memory address
- * space (we have an IOMMU).  The IDE and SCSI device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS	(0)
-
-#else /* 32-bit */
-
-/* The PCI address space does equal the physical memory
- * address space (no IOMMU).  The IDE and SCSI device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS     (1)
-
-#endif /* CONFIG_PPC64 */
-
 extern void pcibios_claim_one_bus(struct pci_bus *b);
 
 extern void pcibios_finish_adding_to_bus(struct pci_bus *bus);
diff --git a/arch/powerpc/include/uapi/asm/msgbuf.h b/arch/powerpc/include/uapi/asm/msgbuf.h
index 65beb09..2b1b377 100644
--- a/arch/powerpc/include/uapi/asm/msgbuf.h
+++ b/arch/powerpc/include/uapi/asm/msgbuf.h
@@ -10,18 +10,18 @@
 
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
-#ifndef __powerpc64__
-	unsigned int	__unused1;
-#endif
+#ifdef __powerpc64__
 	__kernel_time_t msg_stime;	/* last msgsnd time */
-#ifndef __powerpc64__
-	unsigned int	__unused2;
-#endif
 	__kernel_time_t msg_rtime;	/* last msgrcv time */
-#ifndef __powerpc64__
-	unsigned int	__unused3;
-#endif
 	__kernel_time_t msg_ctime;	/* last change time */
+#else
+	unsigned long  msg_stime_high;
+	unsigned long  msg_stime;	/* last msgsnd time */
+	unsigned long  msg_rtime_high;
+	unsigned long  msg_rtime;	/* last msgrcv time */
+	unsigned long  msg_ctime_high;
+	unsigned long  msg_ctime;	/* last change time */
+#endif
 	unsigned long  msg_cbytes;	/* current number of bytes on queue */
 	unsigned long  msg_qnum;	/* number of messages in queue */
 	unsigned long  msg_qbytes;	/* max number of bytes on queue */
diff --git a/arch/powerpc/include/uapi/asm/sembuf.h b/arch/powerpc/include/uapi/asm/sembuf.h
index 8f393d6..3f60946 100644
--- a/arch/powerpc/include/uapi/asm/sembuf.h
+++ b/arch/powerpc/include/uapi/asm/sembuf.h
@@ -15,20 +15,20 @@
  * between kernel and user space.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
+ * - 2 miscellaneous 32/64-bit values
  */
 
 struct semid64_ds {
 	struct ipc64_perm sem_perm;	/* permissions .. see ipc.h */
 #ifndef __powerpc64__
-	unsigned long	__unused1;
-#endif
+	unsigned long	sem_otime_high;
+	unsigned long	sem_otime;	/* last semop time */
+	unsigned long	sem_ctime_high;
+	unsigned long	sem_ctime;	/* last change time */
+#else
 	__kernel_time_t	sem_otime;	/* last semop time */
-#ifndef __powerpc64__
-	unsigned long	__unused2;
-#endif
 	__kernel_time_t	sem_ctime;	/* last change time */
+#endif
 	unsigned long	sem_nsems;	/* no. of semaphores in array */
 	unsigned long	__unused3;
 	unsigned long	__unused4;
diff --git a/arch/powerpc/include/uapi/asm/shmbuf.h b/arch/powerpc/include/uapi/asm/shmbuf.h
index deb1c3e..b591c4d 100644
--- a/arch/powerpc/include/uapi/asm/shmbuf.h
+++ b/arch/powerpc/include/uapi/asm/shmbuf.h
@@ -16,25 +16,22 @@
  * between kernel and user space.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
  */
 
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
-#ifndef __powerpc64__
-	unsigned long		__unused1;
-#endif
+#ifdef __powerpc64__
 	__kernel_time_t		shm_atime;	/* last attach time */
-#ifndef __powerpc64__
-	unsigned long		__unused2;
-#endif
 	__kernel_time_t		shm_dtime;	/* last detach time */
-#ifndef __powerpc64__
-	unsigned long		__unused3;
-#endif
 	__kernel_time_t		shm_ctime;	/* last change time */
-#ifndef __powerpc64__
+#else
+	unsigned long		shm_atime_high;
+	unsigned long		shm_atime;	/* last attach time */
+	unsigned long		shm_dtime_high;
+	unsigned long		shm_dtime;	/* last detach time */
+	unsigned long		shm_ctime_high;
+	unsigned long		shm_ctime;	/* last change time */
 	unsigned long		__unused4;
 #endif
 	size_t			shm_segsz;	/* size of segment (bytes) */
diff --git a/arch/powerpc/include/uapi/asm/siginfo.h b/arch/powerpc/include/uapi/asm/siginfo.h
index 9f14245..1d51d9b 100644
--- a/arch/powerpc/include/uapi/asm/siginfo.h
+++ b/arch/powerpc/include/uapi/asm/siginfo.h
@@ -15,19 +15,4 @@
 
 #include <asm-generic/siginfo.h>
 
-/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME	0	/* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-/*
- * SIGTRAP si_codes
- */
-#ifdef __KERNEL__
-#define TRAP_FIXME	0	/* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-
 #endif	/* _ASM_POWERPC_SIGINFO_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 373dc1d..8817c5a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -13,6 +13,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/compat.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -42,7 +43,6 @@
 #include <asm/paca.h>
 #include <asm/lppaca.h>
 #include <asm/cache.h>
-#include <asm/compat.h>
 #include <asm/mmu.h>
 #include <asm/hvcall.h>
 #include <asm/xics.h>
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index da20569..138157de 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -309,8 +309,6 @@
 }
 EXPORT_SYMBOL(dma_set_coherent_mask);
 
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
 int dma_set_mask(struct device *dev, u64 dma_mask)
 {
 	if (ppc_md.dma_set_mask)
@@ -361,7 +359,6 @@
 
 static int __init dma_init(void)
 {
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
 #ifdef CONFIG_PCI
 	dma_debug_add_bus(&pci_bus_type);
 #endif
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index bc640e4..90bb39b 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1775,18 +1775,6 @@
 	return 0;
 }
 
-static int proc_eeh_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_eeh_show, NULL);
-}
-
-static const struct file_operations proc_eeh_operations = {
-	.open      = proc_eeh_open,
-	.read      = seq_read,
-	.llseek    = seq_lseek,
-	.release   = single_release,
-};
-
 #ifdef CONFIG_DEBUG_FS
 static int eeh_enable_dbgfs_set(void *data, u64 val)
 {
@@ -1828,7 +1816,7 @@
 static int __init eeh_init_proc(void)
 {
 	if (machine_is(pseries) || machine_is(powernv)) {
-		proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
+		proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show);
 #ifdef CONFIG_DEBUG_FS
 		debugfs_create_file("eeh_enable", 0600,
                                     powerpc_debugfs_root, NULL,
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1237f13..26ea979 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -632,6 +632,7 @@
 	hw_breakpoint_disable();
 
 	/* Deliver the signal to userspace */
+	clear_siginfo(&info);
 	info.si_signo = SIGTRAP;
 	info.si_errno = 0;
 	info.si_code = TRAP_HWBKPT;
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index fb070d8..d49063d 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -154,18 +154,6 @@
 static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v);
 static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v);
 
-static int sensors_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ppc_rtas_sensors_show, NULL);
-}
-
-static const struct file_operations ppc_rtas_sensors_operations = {
-	.open		= sensors_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int poweron_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, ppc_rtas_poweron_show, NULL);
@@ -231,18 +219,6 @@
 	.release	= single_release,
 };
 
-static int rmo_buf_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ppc_rtas_rmo_buf_show, NULL);
-}
-
-static const struct file_operations ppc_rtas_rmo_buf_ops = {
-	.open		= rmo_buf_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int ppc_rtas_find_all_sensors(void);
 static void ppc_rtas_process_sensor(struct seq_file *m,
 	struct individual_sensor *s, int state, int error, const char *loc);
@@ -267,14 +243,14 @@
 		    &ppc_rtas_clock_operations);
 	proc_create("powerpc/rtas/poweron", 0644, NULL,
 		    &ppc_rtas_poweron_operations);
-	proc_create("powerpc/rtas/sensors", 0444, NULL,
-		    &ppc_rtas_sensors_operations);
+	proc_create_single("powerpc/rtas/sensors", 0444, NULL,
+			ppc_rtas_sensors_show);
 	proc_create("powerpc/rtas/frequency", 0644, NULL,
 		    &ppc_rtas_tone_freq_operations);
 	proc_create("powerpc/rtas/volume", 0644, NULL,
 		    &ppc_rtas_tone_volume_operations);
-	proc_create("powerpc/rtas/rmo_buffer", 0400, NULL,
-		    &ppc_rtas_rmo_buf_ops);
+	proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL,
+			ppc_rtas_rmo_buf_show);
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 0904492..0e17dcb 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -296,7 +296,6 @@
 void user_single_step_siginfo(struct task_struct *tsk,
 				struct pt_regs *regs, siginfo_t *info)
 {
-	memset(info, 0, sizeof(*info));
 	info->si_signo = SIGTRAP;
 	info->si_code = TRAP_TRACE;
 	info->si_addr = (void __user *)regs->nip;
@@ -334,7 +333,7 @@
 	 */
 	thread_pkey_regs_save(&current->thread);
 
-	memset(&info, 0, sizeof(info));
+	clear_siginfo(&info);
 	info.si_signo = signr;
 	info.si_code = code;
 	info.si_addr = (void __user *) addr;
@@ -970,7 +969,7 @@
 	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
 	       regs->nip, regs->msr, regs->trap);
 
-	_exception(SIGTRAP, regs, TRAP_FIXME, 0);
+	_exception(SIGTRAP, regs, TRAP_UNK, 0);
 
 	exception_exit(prev_state);
 }
@@ -992,7 +991,7 @@
 
 void RunModeException(struct pt_regs *regs)
 {
-	_exception(SIGTRAP, regs, TRAP_FIXME, 0);
+	_exception(SIGTRAP, regs, TRAP_UNK, 0);
 }
 
 void single_step_exception(struct pt_regs *regs)
@@ -1032,7 +1031,7 @@
 
 static inline int __parse_fpscr(unsigned long fpscr)
 {
-	int ret = FPE_FIXME;
+	int ret = FPE_FLTUNK;
 
 	/* Invalid operation */
 	if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
@@ -1973,7 +1972,7 @@
 	extern int do_spe_mathemu(struct pt_regs *regs);
 	unsigned long spefscr;
 	int fpexc_mode;
-	int code = FPE_FIXME;
+	int code = FPE_FLTUNK;
 	int err;
 
 	flush_spe_to_thread(current);
@@ -2042,7 +2041,7 @@
 		printk(KERN_ERR "unrecognized spe instruction "
 		       "in %s at %lx\n", current->comm, regs->nip);
 	} else {
-		_exception(SIGFPE, regs, FPE_FIXME, regs->nip);
+		_exception(SIGFPE, regs, FPE_FLTUNK, regs->nip);
 		return;
 	}
 }
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index b8d8267..dd10e6f 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -89,7 +89,7 @@
 	 */
 	.text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) {
 #ifdef CONFIG_LD_HEAD_STUB_CATCH
-		*(.linker_stub_catch);
+		KEEP(*(.linker_stub_catch));
 		. = . ;
 #endif
 
@@ -98,7 +98,7 @@
 		ALIGN_FUNCTION();
 #endif
 		/* careful! __ftr_alt_* sections need to be close to .text */
-		*(.text.hot .text .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
+		*(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
 		SCHED_TEXT
 		CPUIDLE_TEXT
 		LOCK_TEXT
@@ -184,10 +184,10 @@
 	.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
 		INIT_DATA
 		__vtop_table_begin = .;
-		*(.vtop_fixup);
+		KEEP(*(.vtop_fixup));
 		__vtop_table_end = .;
 		__ptov_table_begin = .;
-		*(.ptov_fixup);
+		KEEP(*(.ptov_fixup));
 		__ptov_table_end = .;
 	}
 
@@ -208,26 +208,26 @@
 	. = ALIGN(8);
 	__ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) {
 		__start___ftr_fixup = .;
-		*(__ftr_fixup)
+		KEEP(*(__ftr_fixup))
 		__stop___ftr_fixup = .;
 	}
 	. = ALIGN(8);
 	__mmu_ftr_fixup : AT(ADDR(__mmu_ftr_fixup) - LOAD_OFFSET) {
 		__start___mmu_ftr_fixup = .;
-		*(__mmu_ftr_fixup)
+		KEEP(*(__mmu_ftr_fixup))
 		__stop___mmu_ftr_fixup = .;
 	}
 	. = ALIGN(8);
 	__lwsync_fixup : AT(ADDR(__lwsync_fixup) - LOAD_OFFSET) {
 		__start___lwsync_fixup = .;
-		*(__lwsync_fixup)
+		KEEP(*(__lwsync_fixup))
 		__stop___lwsync_fixup = .;
 	}
 #ifdef CONFIG_PPC64
 	. = ALIGN(8);
 	__fw_ftr_fixup : AT(ADDR(__fw_ftr_fixup) - LOAD_OFFSET) {
 		__start___fw_ftr_fixup = .;
-		*(__fw_ftr_fixup)
+		KEEP(*(__fw_ftr_fixup))
 		__stop___fw_ftr_fixup = .;
 	}
 #endif
@@ -240,7 +240,7 @@
 	. = ALIGN(8);
 	.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
 		__machine_desc_start = . ;
-		*(.machine.desc)
+		KEEP(*(.machine.desc))
 		__machine_desc_end = . ;
 	}
 #ifdef CONFIG_RELOCATABLE
@@ -288,7 +288,7 @@
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {
 		DATA_DATA
 		*(.data.rel*)
-		*(.sdata)
+		*(SDATA_MAIN)
 		*(.sdata2)
 		*(.got.plt) *(.got)
 		*(.plt)
@@ -303,7 +303,7 @@
 
 	.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
 		__start_opd = .;
-		*(.opd)
+		KEEP(*(.opd))
 		__end_opd = .;
 	}
 
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index c01d627..ef268d5 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -168,6 +168,7 @@
 		return SIGBUS;
 
 	current->thread.trap_nr = BUS_ADRERR;
+	clear_siginfo(&info);
 	info.si_signo = SIGBUS;
 	info.si_errno = 0;
 	info.si_code = BUS_ADRERR;
diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c
index ecc66d5..ad054dd 100644
--- a/arch/powerpc/oprofile/backtrace.c
+++ b/arch/powerpc/oprofile/backtrace.c
@@ -7,6 +7,7 @@
  * 2 of the License, or (at your option) any later version.
 **/
 
+#include <linux/compat_time.h>
 #include <linux/oprofile.h>
 #include <linux/sched.h>
 #include <asm/processor.h>
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 67d3125..84b58ab 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -222,6 +222,7 @@
 config PHYS_64BIT
 	bool 'Large physical address support' if E500 || PPC_86xx
 	depends on (44x || E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx
+	select PHYS_ADDR_T_64BIT
 	---help---
 	  This option enables kernel support for larger than 32-bit physical
 	  addresses.  This feature may not be available on all cores.
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index 870c0a8..1e002e9 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -44,7 +44,7 @@
 		return;
 	}
 
-	memset(&info, 0, sizeof(info));
+	clear_siginfo(&info);
 
 	switch (type) {
 	case SPE_EVENT_INVALID_DMA:
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index ccc4215..c9ef3c5 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -1095,18 +1095,6 @@
 		atomic_read(&nr_spu_contexts),
 		idr_get_cursor(&task_active_pid_ns(current)->idr) - 1);
 	return 0;
-}
-
-static int spu_loadavg_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_spu_loadavg, NULL);
-}
-
-static const struct file_operations spu_loadavg_fops = {
-	.open		= spu_loadavg_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
 };
 
 int __init spu_sched_init(void)
@@ -1135,7 +1123,7 @@
 
 	mod_timer(&spuloadavg_timer, 0);
 
-	entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops);
+	entry = proc_create_single("spu_loadavg", 0, NULL, show_spu_loadavg);
 	if (!entry)
 		goto out_stop_kthread;
 
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index cd4fd85..274bc06 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -3,8 +3,16 @@
 # see Documentation/kbuild/kconfig-language.txt.
 #
 
+config 64BIT
+	bool
+
+config 32BIT
+	bool
+
 config RISCV
 	def_bool y
+	# even on 32-bit, physical (and DMA) addresses are > 32-bits
+	select PHYS_ADDR_T_64BIT
 	select OF
 	select OF_EARLY_FLATTREE
 	select OF_IRQ
@@ -22,7 +30,6 @@
 	select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
-	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_GENERIC_DMA_COHERENT
 	select IRQ_DOMAIN
@@ -39,16 +46,9 @@
 config MMU
 	def_bool y
 
-# even on 32-bit, physical (and DMA) addresses are > 32-bits
-config ARCH_PHYS_ADDR_T_64BIT
-	def_bool y
-
 config ZONE_DMA32
 	bool
-	default y
-
-config ARCH_DMA_ADDR_T_64BIT
-	def_bool y
+	default y if 64BIT
 
 config PAGE_OFFSET
 	hex
@@ -101,7 +101,6 @@
 
 config ARCH_RV32I
 	bool "RV32I"
-	select CPU_SUPPORTS_32BIT_KERNEL
 	select 32BIT
 	select GENERIC_ASHLDI3
 	select GENERIC_ASHRDI3
@@ -109,13 +108,13 @@
 
 config ARCH_RV64I
 	bool "RV64I"
-	select CPU_SUPPORTS_64BIT_KERNEL
 	select 64BIT
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
+	select SWIOTLB
 
 endchoice
 
@@ -171,11 +170,6 @@
 	depends on SMP
 	default "8"
 
-config CPU_SUPPORTS_32BIT_KERNEL
-	bool
-config CPU_SUPPORTS_64BIT_KERNEL
-	bool
-
 choice
 	prompt "CPU Tuning"
 	default TUNE_GENERIC
@@ -202,24 +196,6 @@
 
 menu "Kernel type"
 
-choice
-	prompt "Kernel code model"
-	default 64BIT
-
-config 32BIT
-	bool "32-bit kernel"
-	depends on CPU_SUPPORTS_32BIT_KERNEL
-	help
-	  Select this option to build a 32-bit kernel.
-
-config 64BIT
-	bool "64-bit kernel"
-	depends on CPU_SUPPORTS_64BIT_KERNEL
-	help
-	  Select this option to build a 64-bit kernel.
-
-endchoice
-
 source "mm/Kconfig"
 
 source "kernel/Kconfig.preempt"
diff --git a/arch/riscv/include/asm/dma-mapping.h b/arch/riscv/include/asm/dma-mapping.h
new file mode 100644
index 0000000..8facc1c
--- /dev/null
+++ b/arch/riscv/include/asm/dma-mapping.h
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _RISCV_ASM_DMA_MAPPING_H
+#define _RISCV_ASM_DMA_MAPPING_H 1
+
+#ifdef CONFIG_SWIOTLB
+#include <linux/swiotlb.h>
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
+{
+	return &swiotlb_dma_ops;
+}
+#else
+#include <asm-generic/dma-mapping.h>
+#endif /* CONFIG_SWIOTLB */
+
+#endif /* _RISCV_ASM_DMA_MAPPING_H */
diff --git a/arch/riscv/include/asm/pci.h b/arch/riscv/include/asm/pci.h
index 0f2fc9e..b3638c5 100644
--- a/arch/riscv/include/asm/pci.h
+++ b/arch/riscv/include/asm/pci.h
@@ -26,9 +26,6 @@
 /* RISC-V shim does not initialize PCI bus */
 #define pcibios_assign_all_busses() 1
 
-/* We do not have an IOMMU */
-#define PCI_DMA_BUS_IS_PHYS 1
-
 extern int isa_dma_bridge_buggy;
 
 #ifdef CONFIG_PCI
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index c11f40c..ee44a48 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -29,6 +29,7 @@
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 #include <linux/sched/task.h>
+#include <linux/swiotlb.h>
 
 #include <asm/setup.h>
 #include <asm/sections.h>
@@ -206,6 +207,7 @@
 	setup_bootmem();
 	paging_init();
 	unflatten_device_tree();
+	swiotlb_init(1);
 
 #ifdef CONFIG_SMP
 	setup_smp();
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 93132cb..b99d9dd 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -63,18 +63,6 @@
 		do_exit(SIGSEGV);
 }
 
-static inline void do_trap_siginfo(int signo, int code,
-	unsigned long addr, struct task_struct *tsk)
-{
-	siginfo_t info;
-
-	info.si_signo = signo;
-	info.si_errno = 0;
-	info.si_code = code;
-	info.si_addr = (void __user *)addr;
-	force_sig_info(signo, &info, tsk);
-}
-
 void do_trap(struct pt_regs *regs, int signo, int code,
 	unsigned long addr, struct task_struct *tsk)
 {
@@ -87,7 +75,7 @@
 		show_regs(regs);
 	}
 
-	do_trap_siginfo(signo, code, addr, tsk);
+	force_sig_fault(signo, code, (void __user *)addr, tsk);
 }
 
 static void do_trap_error(struct pt_regs *regs, int signo, int code,
@@ -149,7 +137,7 @@
 	}
 #endif /* CONFIG_GENERIC_BUG */
 
-	do_trap_siginfo(SIGTRAP, TRAP_BRKPT, regs->sepc, current);
+	force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc), current);
 	regs->sepc += 0x4;
 }
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 199ac3e..b7deee7 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -35,9 +35,6 @@
 config GENERIC_BUG_RELATIVE_POINTERS
 	def_bool y
 
-config ARCH_DMA_ADDR_T_64BIT
-	def_bool y
-
 config GENERIC_LOCKBREAK
 	def_bool y if SMP && PREEMPT
 
@@ -133,7 +130,6 @@
 	select HAVE_CMPXCHG_LOCAL
 	select HAVE_COPY_THREAD_TLS
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_CONTIGUOUS
 	select DMA_DIRECT_OPS
 	select HAVE_DYNAMIC_FTRACE
@@ -709,7 +705,11 @@
 menuconfig PCI
 	bool "PCI support"
 	select PCI_MSI
+	select IOMMU_HELPER
 	select IOMMU_SUPPORT
+	select NEED_DMA_MAP_STATE
+	select NEED_SG_DMA_LENGTH
+
 	help
 	  Enable PCI support.
 
@@ -733,15 +733,6 @@
 config HAS_IOMEM
 	def_bool PCI
 
-config IOMMU_HELPER
-	def_bool PCI
-
-config NEED_SG_DMA_LENGTH
-	def_bool PCI
-
-config NEED_DMA_MAP_STATE
-	def_bool PCI
-
 config CHSC_SCH
 	def_tristate m
 	prompt "Support for CHSC subchannels"
@@ -847,6 +838,10 @@
 
 source "drivers/Kconfig"
 
+config HAVE_PNETID
+	tristate
+	default (SMC || CCWGROUP)
+
 source "fs/Kconfig"
 
 source "arch/s390/Kconfig.debug"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index c79936d..68a6904 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -18,7 +18,7 @@
 KBUILD_AFLAGS	+= -m64
 UTS_MACHINE	:= s390x
 STACK_SIZE	:= 16384
-CHECKFLAGS	+= -D__s390__ -D__s390x__ -mbig-endian
+CHECKFLAGS	+= -D__s390__ -D__s390x__
 
 export LD_BFD
 
diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c
index 8720e92..dd95cdbd 100644
--- a/arch/s390/crypto/arch_random.c
+++ b/arch/s390/crypto/arch_random.c
@@ -2,14 +2,37 @@
 /*
  * s390 arch random implementation.
  *
- * Copyright IBM Corp. 2017
- * Author(s): Harald Freudenberger <freude@de.ibm.com>
+ * Copyright IBM Corp. 2017, 2018
+ * Author(s): Harald Freudenberger
+ *
+ * The s390_arch_random_generate() function may be called from random.c
+ * in interrupt context. So this implementation does the best to be very
+ * fast. There is a buffer of random data which is asynchronously checked
+ * and filled by a workqueue thread.
+ * If there are enough bytes in the buffer the s390_arch_random_generate()
+ * just delivers these bytes. Otherwise false is returned until the
+ * worker thread refills the buffer.
+ * The worker fills the rng buffer by pulling fresh entropy from the
+ * high quality (but slow) true hardware random generator. This entropy
+ * is then spread over the buffer with an pseudo random generator PRNG.
+ * As the arch_get_random_seed_long() fetches 8 bytes and the calling
+ * function add_interrupt_randomness() counts this as 1 bit entropy the
+ * distribution needs to make sure there is in fact 1 bit entropy contained
+ * in 8 bytes of the buffer. The current values pull 32 byte entropy
+ * and scatter this into a 2048 byte buffer. So 8 byte in the buffer
+ * will contain 1 bit of entropy.
+ * The worker thread is rescheduled based on the charge level of the
+ * buffer but at least with 500 ms delay to avoid too much CPU consumption.
+ * So the max. amount of rng data delivered via arch_get_random_seed is
+ * limited to 4k bytes per second.
  */
 
 #include <linux/kernel.h>
 #include <linux/atomic.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <linux/static_key.h>
+#include <linux/workqueue.h>
 #include <asm/cpacf.h>
 
 DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
@@ -17,11 +40,83 @@
 atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0);
 EXPORT_SYMBOL(s390_arch_random_counter);
 
+#define ARCH_REFILL_TICKS (HZ/2)
+#define ARCH_PRNG_SEED_SIZE 32
+#define ARCH_RNG_BUF_SIZE 2048
+
+static DEFINE_SPINLOCK(arch_rng_lock);
+static u8 *arch_rng_buf;
+static unsigned int arch_rng_buf_idx;
+
+static void arch_rng_refill_buffer(struct work_struct *);
+static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer);
+
+bool s390_arch_random_generate(u8 *buf, unsigned int nbytes)
+{
+	/* lock rng buffer */
+	if (!spin_trylock(&arch_rng_lock))
+		return false;
+
+	/* try to resolve the requested amount of bytes from the buffer */
+	arch_rng_buf_idx -= nbytes;
+	if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) {
+		memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes);
+		atomic64_add(nbytes, &s390_arch_random_counter);
+		spin_unlock(&arch_rng_lock);
+		return true;
+	}
+
+	/* not enough bytes in rng buffer, refill is done asynchronously */
+	spin_unlock(&arch_rng_lock);
+
+	return false;
+}
+EXPORT_SYMBOL(s390_arch_random_generate);
+
+static void arch_rng_refill_buffer(struct work_struct *unused)
+{
+	unsigned int delay = ARCH_REFILL_TICKS;
+
+	spin_lock(&arch_rng_lock);
+	if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) {
+		/* buffer is exhausted and needs refill */
+		u8 seed[ARCH_PRNG_SEED_SIZE];
+		u8 prng_wa[240];
+		/* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */
+		cpacf_trng(NULL, 0, seed, sizeof(seed));
+		/* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */
+		memset(prng_wa, 0, sizeof(prng_wa));
+		cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
+			   &prng_wa, NULL, 0, seed, sizeof(seed));
+		cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
+			   &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0);
+		arch_rng_buf_idx = ARCH_RNG_BUF_SIZE;
+	}
+	delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE;
+	spin_unlock(&arch_rng_lock);
+
+	/* kick next check */
+	queue_delayed_work(system_long_wq, &arch_rng_work, delay);
+}
+
 static int __init s390_arch_random_init(void)
 {
-	/* check if subfunction CPACF_PRNO_TRNG is available */
-	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+	/* all the needed PRNO subfunctions available ? */
+	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) &&
+	    cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) {
+
+		/* alloc arch random working buffer */
+		arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL);
+		if (!arch_rng_buf)
+			return -ENOMEM;
+
+		/* kick worker queue job to fill the random buffer */
+		queue_delayed_work(system_long_wq,
+				   &arch_rng_work, ARCH_REFILL_TICKS);
+
+		/* enable arch random to the outside world */
 		static_branch_enable(&s390_arch_random_available);
+	}
 
 	return 0;
 }
diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c
index ae0ed8dd..5d85a03 100644
--- a/arch/s390/hypfs/hypfs_sprp.c
+++ b/arch/s390/hypfs/hypfs_sprp.c
@@ -13,7 +13,6 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/uaccess.h>
-#include <asm/compat.h>
 #include <asm/diag.h>
 #include <asm/sclp.h>
 #include "hypfs.h"
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
index 09aed10..c67b82d 100644
--- a/arch/s390/include/asm/archrandom.h
+++ b/arch/s390/include/asm/archrandom.h
@@ -15,16 +15,11 @@
 
 #include <linux/static_key.h>
 #include <linux/atomic.h>
-#include <asm/cpacf.h>
 
 DECLARE_STATIC_KEY_FALSE(s390_arch_random_available);
 extern atomic64_t s390_arch_random_counter;
 
-static void s390_arch_random_generate(u8 *buf, unsigned int nbytes)
-{
-	cpacf_trng(NULL, 0, buf, nbytes);
-	atomic64_add(nbytes, &s390_arch_random_counter);
-}
+bool s390_arch_random_generate(u8 *buf, unsigned int nbytes);
 
 static inline bool arch_has_random(void)
 {
@@ -51,8 +46,7 @@
 static inline bool arch_get_random_seed_long(unsigned long *v)
 {
 	if (static_branch_likely(&s390_arch_random_available)) {
-		s390_arch_random_generate((u8 *)v, sizeof(*v));
-		return true;
+		return s390_arch_random_generate((u8 *)v, sizeof(*v));
 	}
 	return false;
 }
@@ -60,8 +54,7 @@
 static inline bool arch_get_random_seed_int(unsigned int *v)
 {
 	if (static_branch_likely(&s390_arch_random_available)) {
-		s390_arch_random_generate((u8 *)v, sizeof(*v));
-		return true;
+		return s390_arch_random_generate((u8 *)v, sizeof(*v));
 	}
 	return false;
 }
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 20bce13..a29dd43 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -231,4 +231,5 @@
 extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
 
 struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int);
+u8 *ccw_device_get_util_str(struct ccw_device *cdev, int chp_idx);
 #endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index 99aa817..860cab7 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -73,4 +73,14 @@
 
 #define to_ccwgroupdev(x) container_of((x), struct ccwgroup_device, dev)
 #define to_ccwgroupdrv(x) container_of((x), struct ccwgroup_driver, driver)
+
+#if IS_ENABLED(CONFIG_CCWGROUP)
+bool dev_is_ccwgroup(struct device *dev);
+#else /* CONFIG_CCWGROUP */
+static inline bool dev_is_ccwgroup(struct device *dev)
+{
+	return false;
+}
+#endif /* CONFIG_CCWGROUP */
+
 #endif
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 9830fb6..97db2fb 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -53,7 +53,6 @@
 
 typedef u32		compat_size_t;
 typedef s32		compat_ssize_t;
-typedef s32		compat_time_t;
 typedef s32		compat_clock_t;
 typedef s32		compat_pid_t;
 typedef u16		__compat_uid_t;
@@ -97,16 +96,6 @@
 	u32 gprs_high[NUM_GPRS];
 } s390_compat_regs_high;
 
-struct compat_timespec {
-	compat_time_t	tv_sec;
-	s32		tv_nsec;
-};
-
-struct compat_timeval {
-	compat_time_t	tv_sec;
-	s32		tv_usec;
-};
-
 struct compat_stat {
 	compat_dev_t	st_dev;
 	u16		__pad1;
@@ -243,10 +232,10 @@
 
 struct compat_semid64_ds {
 	struct compat_ipc64_perm sem_perm;
-	compat_time_t  sem_otime;
-	compat_ulong_t __pad1;
-	compat_time_t  sem_ctime;
-	compat_ulong_t __pad2;
+	compat_ulong_t sem_otime;
+	compat_ulong_t sem_otime_high;
+	compat_ulong_t sem_ctime;
+	compat_ulong_t sem_ctime_high;
 	compat_ulong_t sem_nsems;
 	compat_ulong_t __unused1;
 	compat_ulong_t __unused2;
@@ -254,12 +243,12 @@
 
 struct compat_msqid64_ds {
 	struct compat_ipc64_perm msg_perm;
-	compat_time_t   msg_stime;
-	compat_ulong_t __pad1;
-	compat_time_t   msg_rtime;
-	compat_ulong_t __pad2;
-	compat_time_t   msg_ctime;
-	compat_ulong_t __pad3;
+	compat_ulong_t msg_stime;
+	compat_ulong_t msg_stime_high;
+	compat_ulong_t msg_rtime;
+	compat_ulong_t msg_rtime_high;
+	compat_ulong_t msg_ctime;
+	compat_ulong_t msg_ctime_high;
 	compat_ulong_t msg_cbytes;
 	compat_ulong_t msg_qnum;
 	compat_ulong_t msg_qbytes;
@@ -272,12 +261,12 @@
 struct compat_shmid64_ds {
 	struct compat_ipc64_perm shm_perm;
 	compat_size_t  shm_segsz;
-	compat_time_t  shm_atime;
-	compat_ulong_t __pad1;
-	compat_time_t  shm_dtime;
-	compat_ulong_t __pad2;
-	compat_time_t  shm_ctime;
-	compat_ulong_t __pad3;
+	compat_ulong_t shm_atime;
+	compat_ulong_t shm_atime_high;
+	compat_ulong_t shm_dtime;
+	compat_ulong_t shm_dtime_high;
+	compat_ulong_t shm_ctime;
+	compat_ulong_t shm_ctime_high;
 	compat_pid_t   shm_cpid;
 	compat_pid_t   shm_lpid;
 	compat_ulong_t shm_nattch;
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index f58d17e..de023a9 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -113,7 +113,7 @@
 
 struct hws_diag_entry {
 	unsigned int def:16;	    /* 0-15  Data Entry Format		 */
-	unsigned int R:14;	    /* 16-19 and 20-30 reserved		 */
+	unsigned int R:15;	    /* 16-19 and 20-30 reserved		 */
 	unsigned int I:1;	    /* 31 entry valid or invalid	 */
 	u8	     data[];	    /* Machine-dependent sample data	 */
 } __packed;
@@ -129,7 +129,9 @@
 			unsigned int f:1;	/* 0 - Block Full Indicator   */
 			unsigned int a:1;	/* 1 - Alert request control  */
 			unsigned int t:1;	/* 2 - Timestamp format	      */
-			unsigned long long:61;	/* 3 - 63: Reserved	      */
+			unsigned int :29;	/* 3 - 31: Reserved	      */
+			unsigned int bsdes:16;	/* 32-47: size of basic SDE   */
+			unsigned int dsdes:16;	/* 48-63: size of diagnostic SDE */
 		};
 		unsigned long long flags;	/* 0 - 63: All indicators     */
 	};
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 1a61b1b..7d22a47 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -125,8 +125,9 @@
  * ELF register definitions..
  */
 
+#include <linux/compat.h>
+
 #include <asm/ptrace.h>
-#include <asm/compat.h>
 #include <asm/syscall.h>
 #include <asm/user.h>
 
@@ -136,7 +137,6 @@
 typedef s390_fp_regs compat_elf_fpregset_t;
 typedef s390_compat_regs compat_elf_gregset_t;
 
-#include <linux/compat.h>
 #include <linux/sched/mm.h>	/* for task_struct */
 #include <asm/mmu_context.h>
 
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index a296c6a..dfbc3c6c0 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -14,6 +14,8 @@
 #include <asm/lowcore.h>
 
 #define local_softirq_pending() (S390_lowcore.softirq_pending)
+#define set_softirq_pending(x) (S390_lowcore.softirq_pending = (x))
+#define or_softirq_pending(x)  (S390_lowcore.softirq_pending |= (x))
 
 #define __ARCH_IRQ_STAT
 #define __ARCH_HAS_DO_SOFTIRQ
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 12fe359..94f8db4 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -2,8 +2,6 @@
 #ifndef __ASM_S390_PCI_H
 #define __ASM_S390_PCI_H
 
-/* must be set before including asm-generic/pci.h */
-#define PCI_DMA_BUS_IS_PHYS (0)
 /* must be set before including pci_clp.h */
 #define PCI_BAR_COUNT	6
 
diff --git a/arch/s390/include/asm/pnet.h b/arch/s390/include/asm/pnet.h
new file mode 100644
index 0000000..6e27858
--- /dev/null
+++ b/arch/s390/include/asm/pnet.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  IBM System z PNET ID Support
+ *
+ *    Copyright IBM Corp. 2018
+ */
+
+#ifndef _ASM_S390_PNET_H
+#define _ASM_S390_PNET_H
+
+#include <linux/device.h>
+#include <linux/types.h>
+
+#define PNETIDS_LEN		64	/* Total utility string length in bytes
+					 * to cover up to 4 PNETIDs of 16 bytes
+					 * for up to 4 device ports
+					 */
+#define MAX_PNETID_LEN		16	/* Max.length of a single port PNETID */
+#define MAX_PNETID_PORTS	(PNETIDS_LEN / MAX_PNETID_LEN)
+					/* Max. # of ports with a PNETID */
+
+int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid);
+#endif /* _ASM_S390_PNET_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index faef3f7..e364873 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -9,9 +9,12 @@
 generic-y += fcntl.h
 generic-y += ioctl.h
 generic-y += mman.h
+generic-y += msgbuf.h
 generic-y += param.h
 generic-y += poll.h
 generic-y += resource.h
+generic-y += sembuf.h
+generic-y += shmbuf.h
 generic-y += sockios.h
 generic-y += swab.h
 generic-y += termbits.h
diff --git a/arch/s390/include/uapi/asm/msgbuf.h b/arch/s390/include/uapi/asm/msgbuf.h
deleted file mode 100644
index 604f847..0000000
--- a/arch/s390/include/uapi/asm/msgbuf.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _S390_MSGBUF_H
-#define _S390_MSGBUF_H
-
-/* 
- * The msqid64_ds structure for S/390 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
- */
-
-struct msqid64_ds {
-	struct ipc64_perm msg_perm;
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-#ifndef __s390x__
-	unsigned long	__unused1;
-#endif /* ! __s390x__ */
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-#ifndef __s390x__
-	unsigned long	__unused2;
-#endif /* ! __s390x__ */
-	__kernel_time_t msg_ctime;	/* last change time */
-#ifndef __s390x__
-	unsigned long	__unused3;
-#endif /* ! __s390x__ */
-	unsigned long  msg_cbytes;	/* current number of bytes on queue */
-	unsigned long  msg_qnum;	/* number of messages in queue */
-	unsigned long  msg_qbytes;	/* max number of bytes on queue */
-	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
-	__kernel_pid_t msg_lrpid;	/* last receive pid */
-	unsigned long  __unused4;
-	unsigned long  __unused5;
-};
-
-#endif /* _S390_MSGBUF_H */
diff --git a/arch/s390/include/uapi/asm/sembuf.h b/arch/s390/include/uapi/asm/sembuf.h
deleted file mode 100644
index 3e91769..0000000
--- a/arch/s390/include/uapi/asm/sembuf.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _S390_SEMBUF_H
-#define _S390_SEMBUF_H
-
-/* 
- * The semid64_ds structure for S/390 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem (for !__s390x__)
- * - 2 miscellaneous 32-bit values
- */
-
-struct semid64_ds {
-	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
-	__kernel_time_t	sem_otime;		/* last semop time */
-#ifndef __s390x__
-	unsigned long	__unused1;
-#endif /* ! __s390x__ */
-	__kernel_time_t	sem_ctime;		/* last change time */
-#ifndef __s390x__
-	unsigned long	__unused2;
-#endif /* ! __s390x__ */
-	unsigned long	sem_nsems;		/* no. of semaphores in array */
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-};
-
-#endif /* _S390_SEMBUF_H */
diff --git a/arch/s390/include/uapi/asm/shmbuf.h b/arch/s390/include/uapi/asm/shmbuf.h
deleted file mode 100644
index 9cdce8d..0000000
--- a/arch/s390/include/uapi/asm/shmbuf.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _S390_SHMBUF_H
-#define _S390_SHMBUF_H
-
-/* 
- * The shmid64_ds structure for S/390 architecture.
- * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
- *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem (for !__s390x__)
- * - 2 miscellaneous 32-bit values
- */
-
-struct shmid64_ds {
-	struct ipc64_perm	shm_perm;	/* operation perms */
-	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_time_t		shm_atime;	/* last attach time */
-#ifndef __s390x__
-	unsigned long		__unused1;
-#endif /* ! __s390x__ */
-	__kernel_time_t		shm_dtime;	/* last detach time */
-#ifndef __s390x__
-	unsigned long		__unused2;
-#endif /* ! __s390x__ */
-	__kernel_time_t		shm_ctime;	/* last change time */
-#ifndef __s390x__
-	unsigned long		__unused3;
-#endif /* ! __s390x__ */
-	__kernel_pid_t		shm_cpid;	/* pid of creator */
-	__kernel_pid_t		shm_lpid;	/* pid of last operator */
-	unsigned long		shm_nattch;	/* no. of current attaches */
-	unsigned long		__unused4;
-	unsigned long		__unused5;
-};
-
-struct shminfo64 {
-	unsigned long	shmmax;
-	unsigned long	shmmin;
-	unsigned long	shmmni;
-	unsigned long	shmseg;
-	unsigned long	shmall;
-	unsigned long	__unused1;
-	unsigned long	__unused2;
-	unsigned long	__unused3;
-	unsigned long	__unused4;
-};
-
-#endif /* _S390_SHMBUF_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index f92dd8e..2fed39b 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -6,22 +6,26 @@
 ifdef CONFIG_FUNCTION_TRACER
 
 # Do not trace tracer code
-CFLAGS_REMOVE_ftrace.o	= $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_ftrace.o		= $(CC_FLAGS_FTRACE)
 
 # Do not trace early setup code
-CFLAGS_REMOVE_als.o	= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_early.o	= $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_als.o		= $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_early.o		= $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_early_nobss.o	= $(CC_FLAGS_FTRACE)
 
 endif
 
-GCOV_PROFILE_als.o	:= n
-GCOV_PROFILE_early.o	:= n
+GCOV_PROFILE_als.o		:= n
+GCOV_PROFILE_early.o		:= n
+GCOV_PROFILE_early_nobss.o	:= n
 
-KCOV_INSTRUMENT_als.o	:= n
-KCOV_INSTRUMENT_early.o	:= n
+KCOV_INSTRUMENT_als.o		:= n
+KCOV_INSTRUMENT_early.o		:= n
+KCOV_INSTRUMENT_early_nobss.o	:= n
 
-UBSAN_SANITIZE_als.o	:= n
-UBSAN_SANITIZE_early.o	:= n
+UBSAN_SANITIZE_als.o		:= n
+UBSAN_SANITIZE_early.o		:= n
+UBSAN_SANITIZE_early_nobss.o	:= n
 
 #
 # Use -march=z900 for als.c to be able to print an error
@@ -57,7 +61,7 @@
 
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
+obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o als.o early_nobss.o
 obj-y	+= sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
 obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
 obj-y	+= entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
@@ -94,3 +98,6 @@
 # vdso
 obj-y				+= vdso64/
 obj-$(CONFIG_COMPAT)		+= vdso32/
+
+chkbss := head.o head64.o als.o early_nobss.o
+include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 32daa0f..827699e 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -34,32 +34,6 @@
 static void __init setup_boot_command_line(void);
 
 /*
- * Get the TOD clock running.
- */
-static void __init reset_tod_clock(void)
-{
-	u64 time;
-
-	if (store_tod_clock(&time) == 0)
-		return;
-	/* TOD clock not running. Set the clock to Unix Epoch. */
-	if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
-		disabled_wait(0);
-
-	memset(tod_clock_base, 0, 16);
-	*(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH;
-	S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
-}
-
-/*
- * Clear bss memory
- */
-static noinline __init void clear_bss_section(void)
-{
-	memset(__bss_start, 0, __bss_stop - __bss_start);
-}
-
-/*
  * Initialize storage key for kernel pages
  */
 static noinline __init void init_kernel_storage_key(void)
@@ -310,57 +284,6 @@
 }
 early_param("cad", cad_setup);
 
-static __init void memmove_early(void *dst, const void *src, size_t n)
-{
-	unsigned long addr;
-	long incr;
-	psw_t old;
-
-	if (!n)
-		return;
-	incr = 1;
-	if (dst > src) {
-		incr = -incr;
-		dst += n - 1;
-		src += n - 1;
-	}
-	old = S390_lowcore.program_new_psw;
-	S390_lowcore.program_new_psw.mask = __extract_psw();
-	asm volatile(
-		"	larl	%[addr],1f\n"
-		"	stg	%[addr],%[psw_pgm_addr]\n"
-		"0:     mvc	0(1,%[dst]),0(%[src])\n"
-		"	agr	%[dst],%[incr]\n"
-		"	agr	%[src],%[incr]\n"
-		"	brctg	%[n],0b\n"
-		"1:\n"
-		: [addr] "=&d" (addr),
-		  [psw_pgm_addr] "=Q" (S390_lowcore.program_new_psw.addr),
-		  [dst] "+&a" (dst), [src] "+&a" (src),  [n] "+d" (n)
-		: [incr] "d" (incr)
-		: "cc", "memory");
-	S390_lowcore.program_new_psw = old;
-}
-
-static __init noinline void rescue_initrd(void)
-{
-#ifdef CONFIG_BLK_DEV_INITRD
-	unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
-	/*
-	 * Just like in case of IPL from VM reader we make sure there is a
-	 * gap of 4MB between end of kernel and start of initrd.
-	 * That way we can also be sure that saving an NSS will succeed,
-	 * which however only requires different segments.
-	 */
-	if (!INITRD_START || !INITRD_SIZE)
-		return;
-	if (INITRD_START >= min_initrd_addr)
-		return;
-	memmove_early((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
-	INITRD_START = min_initrd_addr;
-#endif
-}
-
 /* Set up boot command line */
 static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t))
 {
@@ -410,9 +333,6 @@
 
 void __init startup_init(void)
 {
-	reset_tod_clock();
-	rescue_initrd();
-	clear_bss_section();
 	time_early_init();
 	init_kernel_storage_key();
 	lockdep_off();
diff --git a/arch/s390/kernel/early_nobss.c b/arch/s390/kernel/early_nobss.c
new file mode 100644
index 0000000..2d84fc4
--- /dev/null
+++ b/arch/s390/kernel/early_nobss.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2007, 2018
+ */
+
+/*
+ * Early setup functions which may not rely on an initialized bss
+ * section. The last thing that is supposed to happen here is
+ * initialization of the bss section.
+ */
+
+#include <linux/processor.h>
+#include <linux/string.h>
+#include <asm/sections.h>
+#include <asm/lowcore.h>
+#include <asm/setup.h>
+#include <asm/timex.h>
+#include "entry.h"
+
+static void __init reset_tod_clock(void)
+{
+	u64 time;
+
+	if (store_tod_clock(&time) == 0)
+		return;
+	/* TOD clock not running. Set the clock to Unix Epoch. */
+	if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
+		disabled_wait(0);
+
+	memset(tod_clock_base, 0, 16);
+	*(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH;
+	S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
+}
+
+static void __init rescue_initrd(void)
+{
+	unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
+
+	/*
+	 * Just like in case of IPL from VM reader we make sure there is a
+	 * gap of 4MB between end of kernel and start of initrd.
+	 * That way we can also be sure that saving an NSS will succeed,
+	 * which however only requires different segments.
+	 */
+	if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
+		return;
+	if (!INITRD_START || !INITRD_SIZE)
+		return;
+	if (INITRD_START >= min_initrd_addr)
+		return;
+	memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
+	INITRD_START = min_initrd_addr;
+}
+
+static void __init clear_bss_section(void)
+{
+	memset(__bss_start, 0, __bss_stop - __bss_start);
+}
+
+void __init startup_init_nobss(void)
+{
+	reset_tod_clock();
+	rescue_initrd();
+	clear_bss_section();
+}
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index e87758f..961abfa 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -58,6 +58,7 @@
 void __init init_IRQ(void);
 void do_IRQ(struct pt_regs *regs, int irq);
 void do_restart(void);
+void __init startup_init_nobss(void);
 void __init startup_init(void);
 void die(struct pt_regs *regs, const char *str);
 int setup_profiling_timer(unsigned int multiplier);
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 38a973c..791cb90 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -40,8 +40,12 @@
 	stg	%r15,__LC_KERNEL_STACK	# set end of kernel stack
 	aghi	%r15,-160
 #
-# Save ipl parameters, clear bss memory, initialize storage key for kernel pages,
-# and create a kernel NSS if the SAVESYS= parm is defined
+# Early setup functions that may not rely on an initialized bss section,
+# like moving the initrd. Returns with an initialized bss section.
+#
+	brasl	%r14,startup_init_nobss
+#
+# Early machine initialization and detection functions.
 #
 	brasl	%r14,startup_init
 	lpswe	.Lentry-.LPG1(13)	# jump to _stext in primary-space,
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 8ad6a71..18ae7b9 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -36,9 +36,9 @@
 static int __init nospec_report(void)
 {
 	if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
-		pr_info("Spectre V2 mitigation: execute trampolines.\n");
+		pr_info("Spectre V2 mitigation: execute trampolines\n");
 	if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
-		pr_info("Spectre V2 mitigation: limited branch prediction.\n");
+		pr_info("Spectre V2 mitigation: limited branch prediction\n");
 	return 0;
 }
 arch_initcall(nospec_report);
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index fc7e04c..54f5496 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -294,21 +294,9 @@
 	return 0;
 }
 
-static int sysinfo_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sysinfo_show, NULL);
-}
-
-static const struct file_operations sysinfo_fops = {
-	.open		= sysinfo_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init sysinfo_create_proc(void)
 {
-	proc_create("sysinfo", 0444, NULL, &sysinfo_fops);
+	proc_create_single("sysinfo", 0444, NULL, sysinfo_show);
 	return 0;
 }
 device_initcall(sysinfo_create_proc);
@@ -386,18 +374,6 @@
 	.show		= service_level_show
 };
 
-static int service_level_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &service_level_seq_ops);
-}
-
-static const struct file_operations service_level_ops = {
-	.open		= service_level_open,
-	.read		= seq_read,
-	.llseek 	= seq_lseek,
-	.release	= seq_release
-};
-
 static void service_level_vm_print(struct seq_file *m,
 				   struct service_level *slr)
 {
@@ -420,7 +396,7 @@
 
 static __init int create_proc_service_level(void)
 {
-	proc_create("service_levels", 0, NULL, &service_level_ops);
+	proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops);
 	if (MACHINE_IS_VM)
 		register_service_level(&service_level_vm);
 	return 0;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index a5297a2..8003b38 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -44,14 +44,8 @@
 
 void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
 {
-	siginfo_t info;
-
 	if (user_mode(regs)) {
-		info.si_signo = si_signo;
-		info.si_errno = 0;
-		info.si_code = si_code;
-		info.si_addr = get_trap_ip(regs);
-		force_sig_info(si_signo, &info, current);
+		force_sig_fault(si_signo, si_code, get_trap_ip(regs), current);
 		report_user_fault(regs, si_signo, 0);
         } else {
                 const struct exception_table_entry *fixup;
@@ -80,18 +74,12 @@
 
 void do_per_trap(struct pt_regs *regs)
 {
-	siginfo_t info;
-
 	if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
 		return;
 	if (!current->ptrace)
 		return;
-	info.si_signo = SIGTRAP;
-	info.si_errno = 0;
-	info.si_code = TRAP_HWBKPT;
-	info.si_addr =
-		(void __force __user *) current->thread.per_event.address;
-	force_sig_info(SIGTRAP, &info, current);
+	force_sig_fault(SIGTRAP, TRAP_HWBKPT,
+		(void __force __user *) current->thread.per_event.address, current);
 }
 NOKPROBE_SYMBOL(do_per_trap);
 
@@ -165,7 +153,6 @@
 
 void illegal_op(struct pt_regs *regs)
 {
-	siginfo_t info;
         __u8 opcode[6];
 	__u16 __user *location;
 	int is_uprobe_insn = 0;
@@ -177,13 +164,9 @@
 		if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
 			return;
 		if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
-			if (current->ptrace) {
-				info.si_signo = SIGTRAP;
-				info.si_errno = 0;
-				info.si_code = TRAP_BRKPT;
-				info.si_addr = location;
-				force_sig_info(SIGTRAP, &info, current);
-			} else
+			if (current->ptrace)
+				force_sig_fault(SIGTRAP, TRAP_BRKPT, location, current);
+			else
 				signal = SIGILL;
 #ifdef CONFIG_UPROBES
 		} else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 08d12cf..f0414f5 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -82,10 +82,10 @@
 
 	. = ALIGN(PAGE_SIZE);
 	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-		VMLINUX_SYMBOL(_sinittext) = . ;
+		_sinittext = .;
 		INIT_TEXT
 		. = ALIGN(PAGE_SIZE);
-		VMLINUX_SYMBOL(_einittext) = . ;
+		_einittext = .;
 	}
 
 	/*
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index ebfa044..a3bce0e 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -26,7 +26,6 @@
 #include <asm/gmap.h>
 #include <asm/io.h>
 #include <asm/ptrace.h>
-#include <asm/compat.h>
 #include <asm/sclp.h>
 #include "gaccess.h"
 #include "kvm-s390.h"
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 9bfe080..57ab401 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -8,3 +8,6 @@
 lib-$(CONFIG_SMP) += spinlock.o
 lib-$(CONFIG_KPROBES) += probes.o
 lib-$(CONFIG_UPROBES) += probes.o
+
+chkbss := mem.o
+include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 93faeca..e074480 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -265,14 +265,10 @@
  */
 static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
 {
-	struct siginfo si;
-
 	report_user_fault(regs, SIGSEGV, 1);
-	si.si_signo = SIGSEGV;
-	si.si_errno = 0;
-	si.si_code = si_code;
-	si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
-	force_sig_info(SIGSEGV, &si, current);
+	force_sig_fault(SIGSEGV, si_code,
+			(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
+			current);
 }
 
 static noinline void do_no_context(struct pt_regs *regs)
@@ -316,18 +312,13 @@
 
 static noinline void do_sigbus(struct pt_regs *regs)
 {
-	struct task_struct *tsk = current;
-	struct siginfo si;
-
 	/*
 	 * Send a sigbus, regardless of whether we were in kernel
 	 * or user mode.
 	 */
-	si.si_signo = SIGBUS;
-	si.si_errno = 0;
-	si.si_code = BUS_ADRERR;
-	si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
-	force_sig_info(SIGBUS, &si, tsk);
+	force_sig_fault(SIGBUS, BUS_ADRERR,
+			(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
+			current);
 }
 
 static noinline int signal_return(struct pt_regs *regs)
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
index d4663b4..8cab6de 100644
--- a/arch/s390/net/Makefile
+++ b/arch/s390/net/Makefile
@@ -3,3 +3,4 @@
 # Arch-specific network modules
 #
 obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+obj-$(CONFIG_HAVE_PNETID) += pnet.o
diff --git a/arch/s390/net/pnet.c b/arch/s390/net/pnet.c
new file mode 100644
index 0000000..e22f1b1
--- /dev/null
+++ b/arch/s390/net/pnet.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  IBM System z PNET ID Support
+ *
+ *    Copyright IBM Corp. 2018
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <asm/ccwgroup.h>
+#include <asm/ccwdev.h>
+#include <asm/pnet.h>
+
+/*
+ * Get the PNETIDs from a device.
+ * s390 hardware supports the definition of a so-called Physical Network
+ * Identifier (short PNETID) per network device port. These PNETIDs can be
+ * used to identify network devices that are attached to the same physical
+ * network (broadcast domain).
+ *
+ * The device can be
+ * - a ccwgroup device with all bundled subchannels having the same PNETID
+ * - a PCI attached network device
+ *
+ * Returns:
+ * 0:		PNETIDs extracted from device.
+ * -ENOMEM:	No memory to extract utility string.
+ * -EOPNOTSUPP: Device type without utility string support
+ */
+static int pnet_ids_by_device(struct device *dev, u8 *pnetids)
+{
+	memset(pnetids, 0, PNETIDS_LEN);
+	if (dev_is_ccwgroup(dev)) {
+		struct ccwgroup_device *gdev = to_ccwgroupdev(dev);
+		u8 *util_str;
+
+		util_str = ccw_device_get_util_str(gdev->cdev[0], 0);
+		if (!util_str)
+			return -ENOMEM;
+		memcpy(pnetids, util_str, PNETIDS_LEN);
+		kfree(util_str);
+		return 0;
+	}
+	if (dev_is_pci(dev)) {
+		struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+
+		memcpy(pnetids, zdev->util_str, sizeof(zdev->util_str));
+		return 0;
+	}
+	return -EOPNOTSUPP;
+}
+
+/*
+ * Extract the pnetid for a device port.
+ *
+ * Return 0 if a pnetid is found and -ENOENT otherwise.
+ */
+int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid)
+{
+	u8 pnetids[MAX_PNETID_PORTS][MAX_PNETID_LEN];
+	static const u8 zero[MAX_PNETID_LEN] = { 0 };
+	int rc = 0;
+
+	if (!dev || port >= MAX_PNETID_PORTS)
+		return -ENOENT;
+
+	if (!pnet_ids_by_device(dev, (u8 *)pnetids) &&
+	    memcmp(pnetids[port], zero, MAX_PNETID_LEN))
+		memcpy(pnetid, pnetids[port], MAX_PNETID_LEN);
+	else
+		rc = -ENOENT;
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pnet_id_by_dev_port);
+
+MODULE_DESCRIPTION("pnetid determination from utility strings");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 93cd0f1..19b2d2a 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -19,7 +19,6 @@
 #include <linux/uaccess.h>
 #include <asm/pci_debug.h>
 #include <asm/pci_clp.h>
-#include <asm/compat.h>
 #include <asm/clp.h>
 #include <uapi/asm/clp.h>
 
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 2d15d84..d387a0f 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -668,15 +668,6 @@
 	kmem_cache_destroy(dma_region_table_cache);
 }
 
-#define PREALLOC_DMA_DEBUG_ENTRIES	(1 << 16)
-
-static int __init dma_debug_do_init(void)
-{
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-	return 0;
-}
-fs_initcall(dma_debug_do_init);
-
 const struct dma_map_ops s390_pci_dma_ops = {
 	.alloc		= s390_dma_alloc,
 	.free		= s390_dma_free,
@@ -685,8 +676,6 @@
 	.map_page	= s390_dma_map_pages,
 	.unmap_page	= s390_dma_unmap_pages,
 	.mapping_error	= s390_mapping_error,
-	/* if we support direct DMA this must be conditional */
-	.is_phys	= 0,
 	/* dma_supported is unconditionally true without a callback */
 };
 EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss
new file mode 100644
index 0000000..d92f2d9
--- /dev/null
+++ b/arch/s390/scripts/Makefile.chkbss
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+quiet_cmd_chkbss = CHKBSS  $<
+define cmd_chkbss
+	if ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \
+		echo "error: $< .bss section is not empty" >&2; exit 1; \
+	fi; \
+	touch $@;
+endef
+
+$(obj)/built-in.a: $(patsubst %, $(obj)/%.chkbss, $(chkbss))
+
+%.o.chkbss: %.o
+	$(call cmd,chkbss)
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 1851eae..ae619d5 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -14,7 +14,6 @@
 	select HAVE_OPROFILE
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_DMA_API_DEBUG
 	select HAVE_PERF_EVENTS
 	select HAVE_DEBUG_BUGVERBOSE
 	select ARCH_HAVE_CUSTOM_GPIO_H
@@ -51,6 +50,9 @@
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_NMI
+	select NEED_DMA_MAP_STATE
+	select NEED_SG_DMA_LENGTH
+
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
@@ -58,7 +60,7 @@
 	  <http://www.linux-sh.org/>.
 
 config SUPERH32
-	def_bool ARCH = "sh"
+	def_bool "$(ARCH)" = "sh"
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_IOREMAP_PROT if MMU && !X2TLB
@@ -77,7 +79,7 @@
 	select HAVE_CC_STACKPROTECTOR
 
 config SUPERH64
-	def_bool ARCH = "sh64"
+	def_bool "$(ARCH)" = "sh64"
 	select HAVE_EXIT_THREAD
 	select KALLSYMS
 
@@ -161,12 +163,6 @@
 config DMA_NONCOHERENT
 	def_bool !DMA_COHERENT
 
-config NEED_DMA_MAP_STATE
-	def_bool DMA_NONCOHERENT
-
-config NEED_SG_DMA_LENGTH
-	def_bool y
-
 config PGTABLE_LEVELS
 	default 3 if X2TLB
 	default 2
diff --git a/arch/sh/drivers/dma/dma-api.c b/arch/sh/drivers/dma/dma-api.c
index c0eec08..b05be59 100644
--- a/arch/sh/drivers/dma/dma-api.c
+++ b/arch/sh/drivers/dma/dma-api.c
@@ -339,18 +339,6 @@
 	return 0;
 }
 
-static int dma_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, dma_proc_show, NULL);
-}
-
-static const struct file_operations dma_proc_fops = {
-	.open		= dma_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 int register_dmac(struct dma_info *info)
 {
 	unsigned int total_channels, i;
@@ -423,7 +411,7 @@
 static int __init dma_api_init(void)
 {
 	printk(KERN_NOTICE "DMA: Registering DMA API.\n");
-	return proc_create("dma", 0, NULL, &dma_proc_fops) ? 0 : -ENOMEM;
+	return proc_create_single("dma", 0, NULL, dma_proc_show) ? 0 : -ENOMEM;
 }
 subsys_initcall(dma_api_init);
 
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 1efcce7..46dd82a 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -1,3 +1,4 @@
+generic-y += compat.h
 generic-y += current.h
 generic-y += delay.h
 generic-y += div64.h
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index 0033f0d..10a36b1 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -71,12 +71,6 @@
  * SuperH has everything mapped statically like x86.
  */
 
-/* The PCI address space does equal the physical memory
- * address space.  The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS	(dma_ops->is_phys)
-
 #ifdef CONFIG_PCI
 /*
  * None of the SH PCI controllers support MWI, it is always treated as a
diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c
index 178457d..3e3a32f 100644
--- a/arch/sh/kernel/dma-nommu.c
+++ b/arch/sh/kernel/dma-nommu.c
@@ -78,7 +78,6 @@
 	.sync_single_for_device	= nommu_sync_single_for_device,
 	.sync_sg_for_device	= nommu_sync_sg_for_device,
 #endif
-	.is_phys		= 1,
 };
 
 void __init no_iommu_init(void)
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index afe9657..8648ed0 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -347,13 +347,8 @@
 
 		/* Deliver the signal to userspace */
 		if (!arch_check_bp_in_kernelspace(bp)) {
-			siginfo_t info;
-
-			info.si_signo = args->signr;
-			info.si_errno = notifier_to_errno(rc);
-			info.si_code = TRAP_HWBKPT;
-
-			force_sig_info(args->signr, &info, current);
+			force_sig_fault(SIGTRAP, TRAP_HWBKPT,
+					(void __user *)NULL, current);
 		}
 
 		rcu_read_unlock();
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 245dbeb..5717c7c 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -44,7 +44,7 @@
 
 	seq_printf(p, "%*s: ", prec, "NMI");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stat[j].__nmi_count);
+		seq_printf(p, "%10u ", nmi_count(j));
 	seq_printf(p, "  Non-maskable interrupts\n");
 
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index b3770bb..60709ad 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -477,7 +477,6 @@
 {
 	unsigned long error_code = 0;
 	mm_segment_t oldfs;
-	siginfo_t info;
 	insn_size_t instruction;
 	int tmp;
 
@@ -537,11 +536,7 @@
 		       "access (PC %lx PR %lx)\n", current->comm, regs->pc,
 		       regs->pr);
 
-		info.si_signo = SIGBUS;
-		info.si_errno = 0;
-		info.si_code = si_code;
-		info.si_addr = (void __user *)address;
-		force_sig_info(SIGBUS, &info, current);
+		force_sig_fault(SIGBUS, si_code, (void __user *)address, current);
 	} else {
 		inc_unaligned_kernel_access();
 
@@ -598,19 +593,20 @@
 #ifdef CONFIG_CPU_SH2A
 asmlinkage void do_divide_error(unsigned long r4)
 {
-	siginfo_t info;
+	int code;
 
 	switch (r4) {
 	case TRAP_DIVZERO_ERROR:
-		info.si_code = FPE_INTDIV;
+		code = FPE_INTDIV;
 		break;
 	case TRAP_DIVOVF_ERROR:
-		info.si_code = FPE_INTOVF;
+		code = FPE_INTOVF;
 		break;
+	default:
+		/* Let gcc know unhandled cases don't make it past here */
+		return;
 	}
-
-	info.si_signo = SIGFPE;
-	force_sig_info(info.si_signo, &info, current);
+	force_sig_fault(SIGFPE, code, NULL, current);
 }
 #endif
 
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index c86f436..a0fa8fc 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -507,7 +507,6 @@
 	unsigned short insn = *(unsigned short *)regs->pc;
 	unsigned short finsn;
 	unsigned long nextpc;
-	siginfo_t info;
 	int nib[4] = {
 		(insn >> 12) & 0xf,
 		(insn >> 8) & 0xf,
@@ -560,11 +559,8 @@
 				~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
 			task_thread_info(tsk)->status |= TS_USEDFPU;
 		} else {
-			info.si_signo = SIGFPE;
-			info.si_errno = 0;
-			info.si_code = FPE_FLTINV;
-			info.si_addr = (void __user *)regs->pc;
-			force_sig_info(SIGFPE, &info, tsk);
+			force_sig_fault(SIGFPE, FPE_FLTINV,
+					(void __user *)regs->pc, tsk);
 		}
 
 		regs->pc = nextpc;
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index f1b4469..fceb2ad 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -20,18 +20,9 @@
 #include <asm/cacheflush.h>
 #include <asm/addrspace.h>
 
-#define PREALLOC_DMA_DEBUG_ENTRIES	4096
-
 const struct dma_map_ops *dma_ops;
 EXPORT_SYMBOL(dma_ops);
 
-static int __init dma_init(void)
-{
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-	return 0;
-}
-fs_initcall(dma_init);
-
 void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 				 dma_addr_t *dma_handle, gfp_t gfp,
 				 unsigned long attrs)
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 6fd1bf7..b8e7bb8 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -42,14 +42,7 @@
 force_sig_info_fault(int si_signo, int si_code, unsigned long address,
 		     struct task_struct *tsk)
 {
-	siginfo_t info;
-
-	info.si_signo	= si_signo;
-	info.si_errno	= 0;
-	info.si_code	= si_code;
-	info.si_addr	= (void __user *)address;
-
-	force_sig_info(si_signo, &info, tsk);
+	force_sig_fault(si_signo, si_code, (void __user *)address, tsk);
 }
 
 /*
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 8767e45..b42ba88 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -1,6 +1,6 @@
 config 64BIT
-	bool "64-bit kernel" if ARCH = "sparc"
-	default ARCH = "sparc64"
+	bool "64-bit kernel" if "$(ARCH)" = "sparc"
+	default "$(ARCH)" = "sparc64"
 	help
 	  SPARC is a family of RISC microprocessors designed and marketed by
 	  Sun Microsystems, incorporated.  They are very widely found in Sun
@@ -25,7 +25,6 @@
 	select RTC_CLASS
 	select RTC_DRV_M48T59
 	select RTC_SYSTOHC
-	select HAVE_DMA_API_DEBUG
 	select HAVE_ARCH_JUMP_LABEL if SPARC64
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_IPC_PARSE_VERSION
@@ -44,6 +43,8 @@
 	select ARCH_HAS_SG_CHAIN
 	select CPU_NO_EFFICIENT_FFS
 	select LOCKDEP_SMALL if LOCKDEP
+	select NEED_DMA_MAP_STATE
+	select NEED_SG_DMA_LENGTH
 
 config SPARC32
 	def_bool !64BIT
@@ -67,6 +68,7 @@
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_DEBUG_KMEMLEAK
+	select IOMMU_HELPER
 	select SPARSE_IRQ
 	select RTC_DRV_CMOS
 	select RTC_DRV_BQ4802
@@ -102,14 +104,6 @@
 	bool
 	default y if SPARC64
 
-config ARCH_DMA_ADDR_T_64BIT
-	bool
-	default y if ARCH_ATU
-
-config IOMMU_HELPER
-	bool
-	default y if SPARC64
-
 config STACKTRACE_SUPPORT
 	bool
 	default y if SPARC64
@@ -146,12 +140,6 @@
 	bool
 	default y if SPARC32
 
-config NEED_DMA_MAP_STATE
-	def_bool y
-
-config NEED_SG_DMA_LENGTH
-	def_bool y
-
 config GENERIC_ISA_DMA
 	bool
 	default y if SPARC32
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index edac927..966a13d 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -39,7 +39,7 @@
 # sparc64
 #
 
-CHECKFLAGS    += -D__sparc__ -D__sparc_v9__ -D__arch64__ -m64
+CHECKFLAGS    += -D__sparc__ -D__sparc_v9__ -D__arch64__
 LDFLAGS       := -m elf64_sparc
 export BITS   := 64
 UTS_MACHINE   := sparc64
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 615283e..4eb51d2 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -11,7 +11,6 @@
 
 typedef u32		compat_size_t;
 typedef s32		compat_ssize_t;
-typedef s32		compat_time_t;
 typedef s32		compat_clock_t;
 typedef s32		compat_pid_t;
 typedef u16		__compat_uid_t;
@@ -39,16 +38,6 @@
 typedef u64		compat_u64;
 typedef u32		compat_uptr_t;
 
-struct compat_timespec {
-	compat_time_t	tv_sec;
-	s32		tv_nsec;
-};
-
-struct compat_timeval {
-	compat_time_t	tv_sec;
-	s32		tv_usec;
-};
-
 struct compat_stat {
 	compat_dev_t	st_dev;
 	compat_ino_t	st_ino;
@@ -168,6 +157,7 @@
 	return (u32)(unsigned long)uptr;
 }
 
+#ifdef CONFIG_COMPAT
 static inline void __user *arch_compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = current_thread_info()->kregs;
@@ -184,6 +174,7 @@
 
 	return (void __user *) usp;
 }
+#endif
 
 struct compat_ipc64_perm {
 	compat_key_t key;
@@ -201,10 +192,10 @@
 
 struct compat_semid64_ds {
 	struct compat_ipc64_perm sem_perm;
-	unsigned int	__pad1;
-	compat_time_t	sem_otime;
-	unsigned int	__pad2;
-	compat_time_t	sem_ctime;
+	unsigned int	sem_otime_high;
+	unsigned int	sem_otime;
+	unsigned int	sem_ctime_high;
+	unsigned int	sem_ctime;
 	u32		sem_nsems;
 	u32		__unused1;
 	u32		__unused2;
@@ -212,12 +203,12 @@
 
 struct compat_msqid64_ds {
 	struct compat_ipc64_perm msg_perm;
-	unsigned int	__pad1;
-	compat_time_t	msg_stime;
-	unsigned int	__pad2;
-	compat_time_t	msg_rtime;
-	unsigned int	__pad3;
-	compat_time_t	msg_ctime;
+	unsigned int	msg_stime_high;
+	unsigned int	msg_stime;
+	unsigned int	msg_rtime_high;
+	unsigned int	msg_rtime;
+	unsigned int	msg_ctime_high;
+	unsigned int	msg_ctime;
 	unsigned int	msg_cbytes;
 	unsigned int	msg_qnum;
 	unsigned int	msg_qbytes;
@@ -229,12 +220,12 @@
 
 struct compat_shmid64_ds {
 	struct compat_ipc64_perm shm_perm;
-	unsigned int	__pad1;
-	compat_time_t	shm_atime;
-	unsigned int	__pad2;
-	compat_time_t	shm_dtime;
-	unsigned int	__pad3;
-	compat_time_t	shm_ctime;
+	unsigned int	shm_atime_high;
+	unsigned int	shm_atime;
+	unsigned int	shm_dtime_high;
+	unsigned int	shm_dtime;
+	unsigned int	shm_ctime_high;
+	unsigned int	shm_ctime;
 	compat_size_t	shm_segsz;
 	compat_pid_t	shm_cpid;
 	compat_pid_t	shm_lpid;
@@ -243,6 +234,7 @@
 	unsigned int	__unused2;
 };
 
+#ifdef CONFIG_COMPAT
 static inline int is_compat_task(void)
 {
 	return test_thread_flag(TIF_32BIT);
@@ -254,5 +246,6 @@
 	return pt_regs_trap_type(current_pt_regs()) == 0x110;
 }
 #define in_compat_syscall in_compat_syscall
+#endif
 
 #endif /* _ASM_SPARC64_COMPAT_H */
diff --git a/arch/sparc/include/asm/hardirq_64.h b/arch/sparc/include/asm/hardirq_64.h
index f565402..75b92bf 100644
--- a/arch/sparc/include/asm/hardirq_64.h
+++ b/arch/sparc/include/asm/hardirq_64.h
@@ -10,8 +10,9 @@
 #include <asm/cpudata.h>
 
 #define __ARCH_IRQ_STAT
-#define local_softirq_pending() \
-	(local_cpu_data().__softirq_pending)
+
+#define local_softirq_pending_ref \
+	__cpu_data.__softirq_pending
 
 void ack_bad_irq(unsigned int irq);
 
diff --git a/include/linux/iommu-common.h b/arch/sparc/include/asm/iommu-common.h
similarity index 100%
rename from include/linux/iommu-common.h
rename to arch/sparc/include/asm/iommu-common.h
diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h
index 9ed6b54..0ef6ded 100644
--- a/arch/sparc/include/asm/iommu_64.h
+++ b/arch/sparc/include/asm/iommu_64.h
@@ -17,7 +17,7 @@
 #define IOPTE_WRITE   0x0000000000000002UL
 
 #define IOMMU_NUM_CTXS	4096
-#include <linux/iommu-common.h>
+#include <asm/iommu-common.h>
 
 struct iommu_arena {
 	unsigned long	*map;
diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h
index 98917e4..cfc0ee9 100644
--- a/arch/sparc/include/asm/pci_32.h
+++ b/arch/sparc/include/asm/pci_32.h
@@ -17,10 +17,6 @@
 
 #define PCI_IRQ_NONE		0xffffffff
 
-/* Dynamic DMA mapping stuff.
- */
-#define PCI_DMA_BUS_IS_PHYS	(0)
-
 #endif /* __KERNEL__ */
 
 #ifndef CONFIG_LEON_PCI
diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h
index 671274e..fac7781 100644
--- a/arch/sparc/include/asm/pci_64.h
+++ b/arch/sparc/include/asm/pci_64.h
@@ -17,12 +17,6 @@
 
 #define PCI_IRQ_NONE		0xffffffff
 
-/* The PCI address space does not equal the physical memory
- * address space.  The networking and block device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS	(0)
-
 /* PCI IOMMU mapping bypass support. */
 
 /* PCI 64-bit addressing works for all slots on all controller
diff --git a/arch/sparc/include/uapi/asm/jsflash.h b/arch/sparc/include/uapi/asm/jsflash.h
deleted file mode 100644
index 68c98a5..0000000
--- a/arch/sparc/include/uapi/asm/jsflash.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * jsflash.h: OS Flash SIMM support for JavaStations.
- *
- * Copyright (C) 1999  Pete Zaitcev
- */
-
-#ifndef _SPARC_JSFLASH_H
-#define _SPARC_JSFLASH_H
-
-#ifndef _SPARC_TYPES_H
-#include <linux/types.h>
-#endif
-
-/*
- * Semantics of the offset is a full address.
- * Hardcode it or get it from probe ioctl.
- *
- * We use full bus address, so that we would be
- * automatically compatible with possible future systems.
- */
-
-#define JSFLASH_IDENT   (('F'<<8)|54)
-struct jsflash_ident_arg {
-	__u64 off;                /* 0x20000000 is included */
-	__u32 size;
-	char name[32];		/* With trailing zero */
-};
-
-#define JSFLASH_ERASE   (('F'<<8)|55)
-/* Put 0 as argument, may be flags or sector number... */
-
-#define JSFLASH_PROGRAM (('F'<<8)|56)
-struct jsflash_program_arg {
-	__u64 data;		/* char* for sparc and sparc64 */
-	__u64 off;
-	__u32 size;
-};
-
-#endif /* _SPARC_JSFLASH_H */
diff --git a/arch/sparc/include/uapi/asm/msgbuf.h b/arch/sparc/include/uapi/asm/msgbuf.h
index b601c4f..ffc46c2 100644
--- a/arch/sparc/include/uapi/asm/msgbuf.h
+++ b/arch/sparc/include/uapi/asm/msgbuf.h
@@ -8,25 +8,22 @@
  * between kernel and user space.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
  */
-
-#if defined(__sparc__) && defined(__arch64__)
-# define PADDING(x)
-#else
-# define PADDING(x) unsigned int x;
-#endif
-
-
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
-	PADDING(__pad1)
+#if defined(__sparc__) && defined(__arch64__)
 	__kernel_time_t msg_stime;	/* last msgsnd time */
-	PADDING(__pad2)
 	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	PADDING(__pad3)
 	__kernel_time_t msg_ctime;	/* last change time */
+#else
+	unsigned long msg_stime_high;
+	unsigned long msg_stime;	/* last msgsnd time */
+	unsigned long msg_rtime_high;
+	unsigned long msg_rtime;	/* last msgrcv time */
+	unsigned long msg_ctime_high;
+	unsigned long msg_ctime;	/* last change time */
+#endif
 	unsigned long  msg_cbytes;	/* current number of bytes on queue */
 	unsigned long  msg_qnum;	/* number of messages in queue */
 	unsigned long  msg_qbytes;	/* max number of bytes on queue */
@@ -35,5 +32,4 @@
 	unsigned long  __unused1;
 	unsigned long  __unused2;
 };
-#undef PADDING
 #endif /* _SPARC_MSGBUF_H */
diff --git a/arch/sparc/include/uapi/asm/sembuf.h b/arch/sparc/include/uapi/asm/sembuf.h
index f49b0ff..f3d309c 100644
--- a/arch/sparc/include/uapi/asm/sembuf.h
+++ b/arch/sparc/include/uapi/asm/sembuf.h
@@ -8,25 +8,23 @@
  * between kernel and user space.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
  */
-#if defined(__sparc__) && defined(__arch64__)
-# define PADDING(x)
-#else
-# define PADDING(x) unsigned int x;
-#endif
 
 struct semid64_ds {
 	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
-	PADDING(__pad1)
+#if defined(__sparc__) && defined(__arch64__)
 	__kernel_time_t	sem_otime;		/* last semop time */
-	PADDING(__pad2)
 	__kernel_time_t	sem_ctime;		/* last change time */
+#else
+	unsigned long	sem_otime_high;
+	unsigned long	sem_otime;		/* last semop time */
+	unsigned long	sem_ctime_high;
+	unsigned long	sem_ctime;		/* last change time */
+#endif
 	unsigned long	sem_nsems;		/* no. of semaphores in array */
 	unsigned long	__unused1;
 	unsigned long	__unused2;
 };
-#undef PADDING
 
 #endif /* _SPARC64_SEMBUF_H */
diff --git a/arch/sparc/include/uapi/asm/shmbuf.h b/arch/sparc/include/uapi/asm/shmbuf.h
index 286631d..06618b8 100644
--- a/arch/sparc/include/uapi/asm/shmbuf.h
+++ b/arch/sparc/include/uapi/asm/shmbuf.h
@@ -8,24 +8,23 @@
  * between kernel and user space.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
  */
 
-#if defined(__sparc__) && defined(__arch64__)
-# define PADDING(x)
-#else
-# define PADDING(x) unsigned int x;
-#endif
-
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
-	PADDING(__pad1)
+#if defined(__sparc__) && defined(__arch64__)
 	__kernel_time_t		shm_atime;	/* last attach time */
-	PADDING(__pad2)
 	__kernel_time_t		shm_dtime;	/* last detach time */
-	PADDING(__pad3)
 	__kernel_time_t		shm_ctime;	/* last change time */
+#else
+	unsigned long		shm_atime_high;
+	unsigned long		shm_atime;	/* last attach time */
+	unsigned long		shm_dtime_high;
+	unsigned long		shm_dtime;	/* last detach time */
+	unsigned long		shm_ctime_high;
+	unsigned long		shm_ctime;	/* last change time */
+#endif
 	size_t			shm_segsz;	/* size of segment (bytes) */
 	__kernel_pid_t		shm_cpid;	/* pid of creator */
 	__kernel_pid_t		shm_lpid;	/* pid of last operator */
@@ -46,6 +45,4 @@
 	unsigned long	__unused4;
 };
 
-#undef PADDING
-
 #endif /* _SPARC_SHMBUF_H */
diff --git a/arch/sparc/include/uapi/asm/siginfo.h b/arch/sparc/include/uapi/asm/siginfo.h
index 896ce44..e704955 100644
--- a/arch/sparc/include/uapi/asm/siginfo.h
+++ b/arch/sparc/include/uapi/asm/siginfo.h
@@ -18,13 +18,6 @@
 #define SI_NOINFO	32767		/* no information in siginfo_t */
 
 /*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME	0	/* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
-/*
  * SIGEMT si_codes
  */
 #define EMT_TAGOVF	1	/* tag overflow */
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 76cb577..cf86408 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -59,7 +59,7 @@
 
 obj-$(CONFIG_SPARC64)   += reboot.o
 obj-$(CONFIG_SPARC64)   += sysfs.o
-obj-$(CONFIG_SPARC64)   += iommu.o
+obj-$(CONFIG_SPARC64)   += iommu.o iommu-common.o
 obj-$(CONFIG_SPARC64)   += central.o
 obj-$(CONFIG_SPARC64)   += starfire.o
 obj-$(CONFIG_SPARC64)   += power.o
@@ -74,8 +74,6 @@
 obj-$(CONFIG_SPARC64)	+= nmi.o
 obj-$(CONFIG_SPARC64_SMP) += cpumap.o
 
-obj-y                     += dma.o
-
 obj-$(CONFIG_PCIC_PCI)    += pcic.o
 obj-$(CONFIG_LEON_PCI)    += leon_pci.o
 obj-$(CONFIG_SPARC_GRPCI2)+= leon_pci_grpci2.o
diff --git a/arch/sparc/kernel/dma.c b/arch/sparc/kernel/dma.c
deleted file mode 100644
index f73e759..0000000
--- a/arch/sparc/kernel/dma.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-#include <linux/dma-debug.h>
-
-#define PREALLOC_DMA_DEBUG_ENTRIES       (1 << 15)
-
-static int __init dma_init(void)
-{
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-	return 0;
-}
-fs_initcall(dma_init);
diff --git a/arch/sparc/kernel/iommu-common.c b/arch/sparc/kernel/iommu-common.c
new file mode 100644
index 0000000..59cb166
--- /dev/null
+++ b/arch/sparc/kernel/iommu-common.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IOMMU mmap management and range allocation functions.
+ * Based almost entirely upon the powerpc iommu allocator.
+ */
+
+#include <linux/export.h>
+#include <linux/bitmap.h>
+#include <linux/bug.h>
+#include <linux/iommu-helper.h>
+#include <linux/dma-mapping.h>
+#include <linux/hash.h>
+#include <asm/iommu-common.h>
+
+static unsigned long iommu_large_alloc = 15;
+
+static	DEFINE_PER_CPU(unsigned int, iommu_hash_common);
+
+static inline bool need_flush(struct iommu_map_table *iommu)
+{
+	return ((iommu->flags & IOMMU_NEED_FLUSH) != 0);
+}
+
+static inline void set_flush(struct iommu_map_table *iommu)
+{
+	iommu->flags |= IOMMU_NEED_FLUSH;
+}
+
+static inline void clear_flush(struct iommu_map_table *iommu)
+{
+	iommu->flags &= ~IOMMU_NEED_FLUSH;
+}
+
+static void setup_iommu_pool_hash(void)
+{
+	unsigned int i;
+	static bool do_once;
+
+	if (do_once)
+		return;
+	do_once = true;
+	for_each_possible_cpu(i)
+		per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS);
+}
+
+/*
+ * Initialize iommu_pool entries for the iommu_map_table. `num_entries'
+ * is the number of table entries. If `large_pool' is set to true,
+ * the top 1/4 of the table will be set aside for pool allocations
+ * of more than iommu_large_alloc pages.
+ */
+void iommu_tbl_pool_init(struct iommu_map_table *iommu,
+			 unsigned long num_entries,
+			 u32 table_shift,
+			 void (*lazy_flush)(struct iommu_map_table *),
+			 bool large_pool, u32 npools,
+			 bool skip_span_boundary_check)
+{
+	unsigned int start, i;
+	struct iommu_pool *p = &(iommu->large_pool);
+
+	setup_iommu_pool_hash();
+	if (npools == 0)
+		iommu->nr_pools = IOMMU_NR_POOLS;
+	else
+		iommu->nr_pools = npools;
+	BUG_ON(npools > IOMMU_NR_POOLS);
+
+	iommu->table_shift = table_shift;
+	iommu->lazy_flush = lazy_flush;
+	start = 0;
+	if (skip_span_boundary_check)
+		iommu->flags |= IOMMU_NO_SPAN_BOUND;
+	if (large_pool)
+		iommu->flags |= IOMMU_HAS_LARGE_POOL;
+
+	if (!large_pool)
+		iommu->poolsize = num_entries/iommu->nr_pools;
+	else
+		iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
+	for (i = 0; i < iommu->nr_pools; i++) {
+		spin_lock_init(&(iommu->pools[i].lock));
+		iommu->pools[i].start = start;
+		iommu->pools[i].hint = start;
+		start += iommu->poolsize; /* start for next pool */
+		iommu->pools[i].end = start - 1;
+	}
+	if (!large_pool)
+		return;
+	/* initialize large_pool */
+	spin_lock_init(&(p->lock));
+	p->start = start;
+	p->hint = p->start;
+	p->end = num_entries;
+}
+
+unsigned long iommu_tbl_range_alloc(struct device *dev,
+				struct iommu_map_table *iommu,
+				unsigned long npages,
+				unsigned long *handle,
+				unsigned long mask,
+				unsigned int align_order)
+{
+	unsigned int pool_hash = __this_cpu_read(iommu_hash_common);
+	unsigned long n, end, start, limit, boundary_size;
+	struct iommu_pool *pool;
+	int pass = 0;
+	unsigned int pool_nr;
+	unsigned int npools = iommu->nr_pools;
+	unsigned long flags;
+	bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
+	bool largealloc = (large_pool && npages > iommu_large_alloc);
+	unsigned long shift;
+	unsigned long align_mask = 0;
+
+	if (align_order > 0)
+		align_mask = ~0ul >> (BITS_PER_LONG - align_order);
+
+	/* Sanity check */
+	if (unlikely(npages == 0)) {
+		WARN_ON_ONCE(1);
+		return IOMMU_ERROR_CODE;
+	}
+
+	if (largealloc) {
+		pool = &(iommu->large_pool);
+		pool_nr = 0; /* to keep compiler happy */
+	} else {
+		/* pick out pool_nr */
+		pool_nr =  pool_hash & (npools - 1);
+		pool = &(iommu->pools[pool_nr]);
+	}
+	spin_lock_irqsave(&pool->lock, flags);
+
+ again:
+	if (pass == 0 && handle && *handle &&
+	    (*handle >= pool->start) && (*handle < pool->end))
+		start = *handle;
+	else
+		start = pool->hint;
+
+	limit = pool->end;
+
+	/* The case below can happen if we have a small segment appended
+	 * to a large, or when the previous alloc was at the very end of
+	 * the available space. If so, go back to the beginning. If a
+	 * flush is needed, it will get done based on the return value
+	 * from iommu_area_alloc() below.
+	 */
+	if (start >= limit)
+		start = pool->start;
+	shift = iommu->table_map_base >> iommu->table_shift;
+	if (limit + shift > mask) {
+		limit = mask - shift + 1;
+		/* If we're constrained on address range, first try
+		 * at the masked hint to avoid O(n) search complexity,
+		 * but on second pass, start at 0 in pool 0.
+		 */
+		if ((start & mask) >= limit || pass > 0) {
+			spin_unlock(&(pool->lock));
+			pool = &(iommu->pools[0]);
+			spin_lock(&(pool->lock));
+			start = pool->start;
+		} else {
+			start &= mask;
+		}
+	}
+
+	if (dev)
+		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+				      1 << iommu->table_shift);
+	else
+		boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift);
+
+	boundary_size = boundary_size >> iommu->table_shift;
+	/*
+	 * if the skip_span_boundary_check had been set during init, we set
+	 * things up so that iommu_is_span_boundary() merely checks if the
+	 * (index + npages) < num_tsb_entries
+	 */
+	if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
+		shift = 0;
+		boundary_size = iommu->poolsize * iommu->nr_pools;
+	}
+	n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
+			     boundary_size, align_mask);
+	if (n == -1) {
+		if (likely(pass == 0)) {
+			/* First failure, rescan from the beginning.  */
+			pool->hint = pool->start;
+			set_flush(iommu);
+			pass++;
+			goto again;
+		} else if (!largealloc && pass <= iommu->nr_pools) {
+			spin_unlock(&(pool->lock));
+			pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
+			pool = &(iommu->pools[pool_nr]);
+			spin_lock(&(pool->lock));
+			pool->hint = pool->start;
+			set_flush(iommu);
+			pass++;
+			goto again;
+		} else {
+			/* give up */
+			n = IOMMU_ERROR_CODE;
+			goto bail;
+		}
+	}
+	if (iommu->lazy_flush &&
+	    (n < pool->hint || need_flush(iommu))) {
+		clear_flush(iommu);
+		iommu->lazy_flush(iommu);
+	}
+
+	end = n + npages;
+	pool->hint = end;
+
+	/* Update handle for SG allocations */
+	if (handle)
+		*handle = end;
+bail:
+	spin_unlock_irqrestore(&(pool->lock), flags);
+
+	return n;
+}
+
+static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
+				   unsigned long entry)
+{
+	struct iommu_pool *p;
+	unsigned long largepool_start = tbl->large_pool.start;
+	bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
+
+	/* The large pool is the last pool at the top of the table */
+	if (large_pool && entry >= largepool_start) {
+		p = &tbl->large_pool;
+	} else {
+		unsigned int pool_nr = entry / tbl->poolsize;
+
+		BUG_ON(pool_nr >= tbl->nr_pools);
+		p = &tbl->pools[pool_nr];
+	}
+	return p;
+}
+
+/* Caller supplies the index of the entry into the iommu map table
+ * itself when the mapping from dma_addr to the entry is not the
+ * default addr->entry mapping below.
+ */
+void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
+			  unsigned long npages, unsigned long entry)
+{
+	struct iommu_pool *pool;
+	unsigned long flags;
+	unsigned long shift = iommu->table_shift;
+
+	if (entry == IOMMU_ERROR_CODE) /* use default addr->entry mapping */
+		entry = (dma_addr - iommu->table_map_base) >> shift;
+	pool = get_pool(iommu, entry);
+
+	spin_lock_irqsave(&(pool->lock), flags);
+	bitmap_clear(iommu->map, entry, npages);
+	spin_unlock_irqrestore(&(pool->lock), flags);
+}
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index b08dc34..40d008b 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -14,7 +14,7 @@
 #include <linux/errno.h>
 #include <linux/iommu-helper.h>
 #include <linux/bitmap.h>
-#include <linux/iommu-common.h>
+#include <asm/iommu-common.h>
 
 #ifdef CONFIG_PCI
 #include <linux/pci.h>
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 3bcef9c..cca9134 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -678,25 +678,14 @@
 
 	return 0;
 }
-
-static int sparc_io_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sparc_io_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations sparc_io_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= sparc_io_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif /* CONFIG_PROC_FS */
 
 static void register_proc_sparc_ioport(void)
 {
 #ifdef CONFIG_PROC_FS
-	proc_create_data("io_map", 0, NULL, &sparc_io_proc_fops, &sparc_iomap);
-	proc_create_data("dvma_map", 0, NULL, &sparc_io_proc_fops, &_sparc_dvma);
+	proc_create_single_data("io_map", 0, NULL, sparc_io_proc_show,
+			&sparc_iomap);
+	proc_create_single_data("dvma_map", 0, NULL, sparc_io_proc_show,
+			&_sparc_dvma);
 #endif
 }
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
index 86b625f..c0fa3ef 100644
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@@ -16,7 +16,7 @@
 #include <linux/list.h>
 #include <linux/init.h>
 #include <linux/bitmap.h>
-#include <linux/iommu-common.h>
+#include <asm/iommu-common.h>
 
 #include <asm/hypervisor.h>
 #include <asm/iommu.h>
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
index 2493672..565d9ac 100644
--- a/arch/sparc/kernel/pci_sun4v.c
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -16,7 +16,7 @@
 #include <linux/export.h>
 #include <linux/log2.h>
 #include <linux/of_device.h>
-#include <linux/iommu-common.h>
+#include <asm/iommu-common.h>
 
 #include <asm/iommu.h>
 #include <asm/irq.h>
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 454a8af..6c08608 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -518,14 +518,7 @@
 
 static void stack_unaligned(unsigned long sp)
 {
-	siginfo_t info;
-
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRALN;
-	info.si_addr = (void __user *) sp;
-	info.si_trapno = 0;
-	force_sig_info(SIGBUS, &info, current);
+	force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0, current);
 }
 
 void fault_in_user_windows(void)
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index e8c3cb6..7f3d9c5 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -147,17 +147,11 @@
 asmlinkage void
 sparc_breakpoint (struct pt_regs *regs)
 {
-	siginfo_t info;
 
 #ifdef DEBUG_SPARC_BREAKPOINT
         printk ("TRAP: Entering kernel PC=%x, nPC=%x\n", regs->pc, regs->npc);
 #endif
-	info.si_signo = SIGTRAP;
-	info.si_errno = 0;
-	info.si_code = TRAP_BRKPT;
-	info.si_addr = (void __user *)regs->pc;
-	info.si_trapno = 0;
-	force_sig_info(SIGTRAP, &info, current);
+	force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0, current);
 
 #ifdef DEBUG_SPARC_BREAKPOINT
 	printk ("TRAP: Returning to space: PC=%x nPC=%x\n", regs->pc, regs->npc);
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 9ef8de6..7e49bbc9 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -502,7 +502,6 @@
 asmlinkage void sparc_breakpoint(struct pt_regs *regs)
 {
 	enum ctx_state prev_state = exception_enter();
-	siginfo_t info;
 
 	if (test_thread_flag(TIF_32BIT)) {
 		regs->tpc &= 0xffffffff;
@@ -511,12 +510,7 @@
 #ifdef DEBUG_SPARC_BREAKPOINT
         printk ("TRAP: Entering kernel PC=%lx, nPC=%lx\n", regs->tpc, regs->tnpc);
 #endif
-	info.si_signo = SIGTRAP;
-	info.si_errno = 0;
-	info.si_code = TRAP_BRKPT;
-	info.si_addr = (void __user *)regs->tpc;
-	info.si_trapno = 0;
-	force_sig_info(SIGTRAP, &info, current);
+	force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc, 0, current);
 #ifdef DEBUG_SPARC_BREAKPOINT
 	printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc);
 #endif
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index b1ed763..bcdfc61 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -93,8 +93,6 @@
 
 void do_hw_interrupt(struct pt_regs *regs, unsigned long type)
 {
-	siginfo_t info;
-
 	if(type < 0x80) {
 		/* Sun OS's puke from bad traps, Linux survives! */
 		printk("Unimplemented Sparc TRAP, type = %02lx\n", type);
@@ -104,19 +102,13 @@
 	if(regs->psr & PSR_PS)
 		die_if_kernel("Kernel bad trap", regs);
 
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_ILLTRP;
-	info.si_addr = (void __user *)regs->pc;
-	info.si_trapno = type - 0x80;
-	force_sig_info(SIGILL, &info, current);
+	force_sig_fault(SIGILL, ILL_ILLTRP,
+			(void __user *)regs->pc, type - 0x80, current);
 }
 
 void do_illegal_instruction(struct pt_regs *regs, unsigned long pc, unsigned long npc,
 			    unsigned long psr)
 {
-	siginfo_t info;
-
 	if(psr & PSR_PS)
 		die_if_kernel("Kernel illegal instruction", regs);
 #ifdef TRAP_DEBUG
@@ -124,27 +116,15 @@
 	       regs->pc, *(unsigned long *)regs->pc);
 #endif
 
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_ILLOPC;
-	info.si_addr = (void __user *)pc;
-	info.si_trapno = 0;
-	send_sig_info(SIGILL, &info, current);
+	send_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0, current);
 }
 
 void do_priv_instruction(struct pt_regs *regs, unsigned long pc, unsigned long npc,
 			 unsigned long psr)
 {
-	siginfo_t info;
-
 	if(psr & PSR_PS)
 		die_if_kernel("Penguin instruction from Penguin mode??!?!", regs);
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_PRVOPC;
-	info.si_addr = (void __user *)pc;
-	info.si_trapno = 0;
-	send_sig_info(SIGILL, &info, current);
+	send_sig_fault(SIGILL, ILL_PRVOPC, (void __user *)pc, 0, current);
 }
 
 /* XXX User may want to be allowed to do this. XXX */
@@ -152,8 +132,6 @@
 void do_memaccess_unaligned(struct pt_regs *regs, unsigned long pc, unsigned long npc,
 			    unsigned long psr)
 {
-	siginfo_t info;
-
 	if(regs->psr & PSR_PS) {
 		printk("KERNEL MNA at pc %08lx npc %08lx called by %08lx\n", pc, npc,
 		       regs->u_regs[UREG_RETPC]);
@@ -165,12 +143,9 @@
 	instruction_dump ((unsigned long *) regs->pc);
 	printk ("do_MNA!\n");
 #endif
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRALN;
-	info.si_addr = /* FIXME: Should dig out mna address */ (void *)0;
-	info.si_trapno = 0;
-	send_sig_info(SIGBUS, &info, current);
+	send_sig_fault(SIGBUS, BUS_ADRALN,
+		       /* FIXME: Should dig out mna address */ (void *)0,
+		       0, current);
 }
 
 static unsigned long init_fsr = 0x0UL;
@@ -226,9 +201,9 @@
 		 unsigned long psr)
 {
 	static int calls;
-	siginfo_t info;
 	unsigned long fsr;
 	int ret = 0;
+	int code;
 #ifndef CONFIG_SMP
 	struct task_struct *fpt = last_task_used_math;
 #else
@@ -303,24 +278,20 @@
 	}
 
 	fsr = fpt->thread.fsr;
-	info.si_signo = SIGFPE;
-	info.si_errno = 0;
-	info.si_addr = (void __user *)pc;
-	info.si_trapno = 0;
-	info.si_code = FPE_FIXME;
+	code = FPE_FLTUNK;
 	if ((fsr & 0x1c000) == (1 << 14)) {
 		if (fsr & 0x10)
-			info.si_code = FPE_FLTINV;
+			code = FPE_FLTINV;
 		else if (fsr & 0x08)
-			info.si_code = FPE_FLTOVF;
+			code = FPE_FLTOVF;
 		else if (fsr & 0x04)
-			info.si_code = FPE_FLTUND;
+			code = FPE_FLTUND;
 		else if (fsr & 0x02)
-			info.si_code = FPE_FLTDIV;
+			code = FPE_FLTDIV;
 		else if (fsr & 0x01)
-			info.si_code = FPE_FLTRES;
+			code = FPE_FLTRES;
 	}
-	send_sig_info(SIGFPE, &info, fpt);
+	send_sig_fault(SIGFPE, code, (void __user *)pc, 0, fpt);
 #ifndef CONFIG_SMP
 	last_task_used_math = NULL;
 #endif
@@ -332,16 +303,9 @@
 void handle_tag_overflow(struct pt_regs *regs, unsigned long pc, unsigned long npc,
 			 unsigned long psr)
 {
-	siginfo_t info;
-
 	if(psr & PSR_PS)
 		die_if_kernel("Penguin overflow trap from kernel mode", regs);
-	info.si_signo = SIGEMT;
-	info.si_errno = 0;
-	info.si_code = EMT_TAGOVF;
-	info.si_addr = (void __user *)pc;
-	info.si_trapno = 0;
-	send_sig_info(SIGEMT, &info, current);
+	send_sig_fault(SIGEMT, EMT_TAGOVF, (void __user *)pc, 0, current);
 }
 
 void handle_watchpoint(struct pt_regs *regs, unsigned long pc, unsigned long npc,
@@ -359,61 +323,33 @@
 void handle_reg_access(struct pt_regs *regs, unsigned long pc, unsigned long npc,
 		       unsigned long psr)
 {
-	siginfo_t info;
-
 #ifdef TRAP_DEBUG
 	printk("Register Access Exception at PC %08lx NPC %08lx PSR %08lx\n",
 	       pc, npc, psr);
 #endif
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_OBJERR;
-	info.si_addr = (void __user *)pc;
-	info.si_trapno = 0;
-	force_sig_info(SIGBUS, &info, current);
+	force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)pc, 0, current);
 }
 
 void handle_cp_disabled(struct pt_regs *regs, unsigned long pc, unsigned long npc,
 			unsigned long psr)
 {
-	siginfo_t info;
-
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_COPROC;
-	info.si_addr = (void __user *)pc;
-	info.si_trapno = 0;
-	send_sig_info(SIGILL, &info, current);
+	send_sig_fault(SIGILL, ILL_COPROC, (void __user *)pc, 0, current);
 }
 
 void handle_cp_exception(struct pt_regs *regs, unsigned long pc, unsigned long npc,
 			 unsigned long psr)
 {
-	siginfo_t info;
-
 #ifdef TRAP_DEBUG
 	printk("Co-Processor Exception at PC %08lx NPC %08lx PSR %08lx\n",
 	       pc, npc, psr);
 #endif
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_COPROC;
-	info.si_addr = (void __user *)pc;
-	info.si_trapno = 0;
-	send_sig_info(SIGILL, &info, current);
+	send_sig_fault(SIGILL, ILL_COPROC, (void __user *)pc, 0, current);
 }
 
 void handle_hw_divzero(struct pt_regs *regs, unsigned long pc, unsigned long npc,
 		       unsigned long psr)
 {
-	siginfo_t info;
-
-	info.si_signo = SIGFPE;
-	info.si_errno = 0;
-	info.si_code = FPE_INTDIV;
-	info.si_addr = (void __user *)pc;
-	info.si_trapno = 0;
-	send_sig_info(SIGFPE, &info, current);
+	send_sig_fault(SIGFPE, FPE_INTDIV, (void __user *)pc, 0, current);
 }
 
 #ifdef CONFIG_DEBUG_BUGVERBOSE
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 462a21a..aa624ed 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -87,7 +87,6 @@
 void bad_trap(struct pt_regs *regs, long lvl)
 {
 	char buffer[36];
-	siginfo_t info;
 
 	if (notify_die(DIE_TRAP, "bad trap", regs,
 		       0, lvl, SIGTRAP) == NOTIFY_STOP)
@@ -107,12 +106,8 @@
 		regs->tpc &= 0xffffffff;
 		regs->tnpc &= 0xffffffff;
 	}
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_ILLTRP;
-	info.si_addr = (void __user *)regs->tpc;
-	info.si_trapno = lvl;
-	force_sig_info(SIGILL, &info, current);
+	force_sig_fault(SIGILL, ILL_ILLTRP,
+			(void __user *)regs->tpc, lvl, current);
 }
 
 void bad_trap_tl1(struct pt_regs *regs, long lvl)
@@ -191,7 +186,6 @@
 void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
 {
 	enum ctx_state prev_state = exception_enter();
-	siginfo_t info;
 
 	if (notify_die(DIE_TRAP, "instruction access exception", regs,
 		       0, 0x8, SIGTRAP) == NOTIFY_STOP)
@@ -206,12 +200,8 @@
 		regs->tpc &= 0xffffffff;
 		regs->tnpc &= 0xffffffff;
 	}
-	info.si_signo = SIGSEGV;
-	info.si_errno = 0;
-	info.si_code = SEGV_MAPERR;
-	info.si_addr = (void __user *)regs->tpc;
-	info.si_trapno = 0;
-	force_sig_info(SIGSEGV, &info, current);
+	force_sig_fault(SIGSEGV, SEGV_MAPERR,
+			(void __user *)regs->tpc, 0, current);
 out:
 	exception_exit(prev_state);
 }
@@ -230,7 +220,6 @@
 {
 	unsigned short type = (type_ctx >> 16);
 	unsigned short ctx  = (type_ctx & 0xffff);
-	siginfo_t info;
 
 	if (notify_die(DIE_TRAP, "instruction access exception", regs,
 		       0, 0x8, SIGTRAP) == NOTIFY_STOP)
@@ -247,12 +236,7 @@
 		regs->tpc &= 0xffffffff;
 		regs->tnpc &= 0xffffffff;
 	}
-	info.si_signo = SIGSEGV;
-	info.si_errno = 0;
-	info.si_code = SEGV_MAPERR;
-	info.si_addr = (void __user *) addr;
-	info.si_trapno = 0;
-	force_sig_info(SIGSEGV, &info, current);
+	force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *) addr, 0, current);
 }
 
 void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
@@ -307,7 +291,6 @@
 void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
 {
 	enum ctx_state prev_state = exception_enter();
-	siginfo_t info;
 
 	if (notify_die(DIE_TRAP, "data access exception", regs,
 		       0, 0x30, SIGTRAP) == NOTIFY_STOP)
@@ -338,12 +321,7 @@
 	if (is_no_fault_exception(regs))
 		return;
 
-	info.si_signo = SIGSEGV;
-	info.si_errno = 0;
-	info.si_code = SEGV_MAPERR;
-	info.si_addr = (void __user *)sfar;
-	info.si_trapno = 0;
-	force_sig_info(SIGSEGV, &info, current);
+	force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)sfar, 0, current);
 out:
 	exception_exit(prev_state);
 }
@@ -559,8 +537,6 @@
 
 static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long tt, int tl1, struct pt_regs *regs)
 {
-	siginfo_t info;
-
 	printk(KERN_WARNING "CPU[%d]: Uncorrectable Error AFSR[%lx] "
 	       "AFAR[%lx] UDBL[%lx] UDBH[%ld] TT[%lx] TL>1[%d]\n",
 	       smp_processor_id(), afsr, afar, udbl, udbh, tt, tl1);
@@ -595,12 +571,7 @@
 		regs->tpc &= 0xffffffff;
 		regs->tnpc &= 0xffffffff;
 	}
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_OBJERR;
-	info.si_addr = (void *)0;
-	info.si_trapno = 0;
-	force_sig_info(SIGBUS, &info, current);
+	force_sig_fault(SIGBUS, BUS_OBJERR, (void *)0, 0, current);
 }
 
 void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar)
@@ -2190,7 +2161,6 @@
 
 	if (attrs & SUN4V_ERR_ATTRS_MEMORY) {
 		unsigned long addr = ent->err_raddr;
-		siginfo_t info;
 
 		if (addr == ~(u64)0) {
 			/* This seems highly unlikely to ever occur */
@@ -2211,21 +2181,13 @@
 				addr += PAGE_SIZE;
 			}
 		}
-		info.si_signo = SIGKILL;
-		info.si_errno = 0;
-		info.si_trapno = 0;
-		force_sig_info(info.si_signo, &info, current);
+		force_sig(SIGKILL, current);
 
 		return true;
 	}
 	if (attrs & SUN4V_ERR_ATTRS_PIO) {
-		siginfo_t info;
-
-		info.si_signo = SIGBUS;
-		info.si_code = BUS_ADRERR;
-		info.si_addr = (void __user *)sun4v_get_vaddr(regs);
-		force_sig_info(info.si_signo, &info, current);
-
+		force_sig_fault(SIGBUS, BUS_ADRERR,
+				(void __user *)sun4v_get_vaddr(regs), 0, current);
 		return true;
 	}
 
@@ -2362,30 +2324,27 @@
 		regs->tnpc += 4;
 	} else {
 		unsigned long fsr = current_thread_info()->xfsr[0];
-		siginfo_t info;
+		int code;
 
 		if (test_thread_flag(TIF_32BIT)) {
 			regs->tpc &= 0xffffffff;
 			regs->tnpc &= 0xffffffff;
 		}
-		info.si_signo = SIGFPE;
-		info.si_errno = 0;
-		info.si_addr = (void __user *)regs->tpc;
-		info.si_trapno = 0;
-		info.si_code = FPE_FIXME;
+		code = FPE_FLTUNK;
 		if ((fsr & 0x1c000) == (1 << 14)) {
 			if (fsr & 0x10)
-				info.si_code = FPE_FLTINV;
+				code = FPE_FLTINV;
 			else if (fsr & 0x08)
-				info.si_code = FPE_FLTOVF;
+				code = FPE_FLTOVF;
 			else if (fsr & 0x04)
-				info.si_code = FPE_FLTUND;
+				code = FPE_FLTUND;
 			else if (fsr & 0x02)
-				info.si_code = FPE_FLTDIV;
+				code = FPE_FLTDIV;
 			else if (fsr & 0x01)
-				info.si_code = FPE_FLTRES;
+				code = FPE_FLTRES;
 		}
-		force_sig_info(SIGFPE, &info, current);
+		force_sig_fault(SIGFPE, code,
+				(void __user *)regs->tpc, 0, current);
 	}
 }
 
@@ -2428,7 +2387,6 @@
 void do_tof(struct pt_regs *regs)
 {
 	enum ctx_state prev_state = exception_enter();
-	siginfo_t info;
 
 	if (notify_die(DIE_TRAP, "tagged arithmetic overflow", regs,
 		       0, 0x26, SIGEMT) == NOTIFY_STOP)
@@ -2440,12 +2398,8 @@
 		regs->tpc &= 0xffffffff;
 		regs->tnpc &= 0xffffffff;
 	}
-	info.si_signo = SIGEMT;
-	info.si_errno = 0;
-	info.si_code = EMT_TAGOVF;
-	info.si_addr = (void __user *)regs->tpc;
-	info.si_trapno = 0;
-	force_sig_info(SIGEMT, &info, current);
+	force_sig_fault(SIGEMT, EMT_TAGOVF,
+			(void __user *)regs->tpc, 0, current);
 out:
 	exception_exit(prev_state);
 }
@@ -2453,7 +2407,6 @@
 void do_div0(struct pt_regs *regs)
 {
 	enum ctx_state prev_state = exception_enter();
-	siginfo_t info;
 
 	if (notify_die(DIE_TRAP, "integer division by zero", regs,
 		       0, 0x28, SIGFPE) == NOTIFY_STOP)
@@ -2465,12 +2418,8 @@
 		regs->tpc &= 0xffffffff;
 		regs->tnpc &= 0xffffffff;
 	}
-	info.si_signo = SIGFPE;
-	info.si_errno = 0;
-	info.si_code = FPE_INTDIV;
-	info.si_addr = (void __user *)regs->tpc;
-	info.si_trapno = 0;
-	force_sig_info(SIGFPE, &info, current);
+	force_sig_fault(SIGFPE, FPE_INTDIV,
+			(void __user *)regs->tpc, 0, current);
 out:
 	exception_exit(prev_state);
 }
@@ -2632,7 +2581,6 @@
 	unsigned long pc = regs->tpc;
 	unsigned long tstate = regs->tstate;
 	u32 insn;
-	siginfo_t info;
 
 	if (notify_die(DIE_TRAP, "illegal instruction", regs,
 		       0, 0x10, SIGILL) == NOTIFY_STOP)
@@ -2666,12 +2614,7 @@
 			}
 		}
 	}
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_ILLOPC;
-	info.si_addr = (void __user *)pc;
-	info.si_trapno = 0;
-	force_sig_info(SIGILL, &info, current);
+	force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0, current);
 out:
 	exception_exit(prev_state);
 }
@@ -2679,7 +2622,6 @@
 void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr)
 {
 	enum ctx_state prev_state = exception_enter();
-	siginfo_t info;
 
 	if (notify_die(DIE_TRAP, "memory address unaligned", regs,
 		       0, 0x34, SIGSEGV) == NOTIFY_STOP)
@@ -2692,20 +2634,13 @@
 	if (is_no_fault_exception(regs))
 		return;
 
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRALN;
-	info.si_addr = (void __user *)sfar;
-	info.si_trapno = 0;
-	force_sig_info(SIGBUS, &info, current);
+	force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)sfar, 0, current);
 out:
 	exception_exit(prev_state);
 }
 
 void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
 {
-	siginfo_t info;
-
 	if (notify_die(DIE_TRAP, "memory address unaligned", regs,
 		       0, 0x34, SIGSEGV) == NOTIFY_STOP)
 		return;
@@ -2717,12 +2652,7 @@
 	if (is_no_fault_exception(regs))
 		return;
 
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRALN;
-	info.si_addr = (void __user *) addr;
-	info.si_trapno = 0;
-	force_sig_info(SIGBUS, &info, current);
+	force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) addr, 0, current);
 }
 
 /* sun4v_mem_corrupt_detect_precise() - Handle precise exception on an ADI
@@ -2775,7 +2705,6 @@
 void do_privop(struct pt_regs *regs)
 {
 	enum ctx_state prev_state = exception_enter();
-	siginfo_t info;
 
 	if (notify_die(DIE_TRAP, "privileged operation", regs,
 		       0, 0x11, SIGILL) == NOTIFY_STOP)
@@ -2785,12 +2714,8 @@
 		regs->tpc &= 0xffffffff;
 		regs->tnpc &= 0xffffffff;
 	}
-	info.si_signo = SIGILL;
-	info.si_errno = 0;
-	info.si_code = ILL_PRVOPC;
-	info.si_addr = (void __user *)regs->tpc;
-	info.si_trapno = 0;
-	force_sig_info(SIGILL, &info, current);
+	force_sig_fault(SIGILL, ILL_PRVOPC,
+			(void __user *)regs->tpc, 0, current);
 out:
 	exception_exit(prev_state);
 }
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
index 7642d7e..64ac8c0 100644
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -311,14 +311,9 @@
 
 static void user_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
 {
-	siginfo_t info;
-
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRALN;
-	info.si_addr = (void __user *)safe_compute_effective_address(regs, insn);
-	info.si_trapno = 0;
-	send_sig_info(SIGBUS, &info, current);
+	send_sig_fault(SIGBUS, BUS_ADRALN,
+		       (void __user *)safe_compute_effective_address(regs, insn),
+		       0, current);
 }
 
 asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index a8103a8..9f75b64 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -127,19 +127,11 @@
 static void __do_fault_siginfo(int code, int sig, struct pt_regs *regs,
 			       unsigned long addr)
 {
-	siginfo_t info;
-
-	info.si_signo = sig;
-	info.si_code = code;
-	info.si_errno = 0;
-	info.si_addr = (void __user *) addr;
-	info.si_trapno = 0;
-
 	if (unlikely(show_unhandled_signals))
-		show_signal_msg(regs, sig, info.si_code,
+		show_signal_msg(regs, sig, code,
 				addr, current);
 
-	force_sig_info (sig, &info, current);
+	force_sig_fault(sig, code, (void __user *) addr, 0, current);
 }
 
 static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault)
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 41363f4..63166fc 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -170,11 +170,7 @@
 			     int fault_code)
 {
 	unsigned long addr;
-	siginfo_t info;
 
-	info.si_code = code;
-	info.si_signo = sig;
-	info.si_errno = 0;
 	if (fault_code & FAULT_CODE_ITLB) {
 		addr = regs->tpc;
 	} else {
@@ -187,13 +183,11 @@
 		else
 			addr = fault_addr;
 	}
-	info.si_addr = (void __user *) addr;
-	info.si_trapno = 0;
 
 	if (unlikely(show_unhandled_signals))
 		show_signal_msg(regs, sig, code, addr, current);
 
-	force_sig_info(sig, &info, current);
+	force_sig_fault(sig, code, (void __user *) addr, 0, current);
 }
 
 static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn)
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index c68add8..07f84c8 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -54,10 +54,6 @@
 	int
 	default 100
 
-config SUBARCH
-	string
-	option env="SUBARCH"
-
 config NR_CPUS
 	int
 	range 1 1
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index d4e8c49..dcf5ea2 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -208,19 +208,6 @@
 	return 0;
 }
 
-static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, fake_ide_media_proc_show, NULL);
-}
-
-static const struct file_operations fake_ide_media_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= fake_ide_media_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static void make_ide_entries(const char *dev_name)
 {
 	struct proc_dir_entry *dir, *ent;
@@ -231,7 +218,8 @@
 	dir = proc_mkdir(dev_name, proc_ide);
 	if(!dir) return;
 
-	ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
+	ent = proc_create_single("media", S_IRUGO, dir,
+			fake_ide_media_proc_show);
 	if(!ent) return;
 	snprintf(name, sizeof(name), "ide0/%s", dev_name);
 	proc_symlink(dev_name, proc_ide_root, name);
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index bb5a196..b10dde6 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,6 +1,7 @@
 generic-y += barrier.h
 generic-y += bpf_perf_event.h
 generic-y += bug.h
+generic-y += compat.h
 generic-y += current.h
 generic-y += delay.h
 generic-y += device.h
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index bc2a516..1a1d88a 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -115,17 +115,10 @@
 static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
 		  int error_code)
 {
-	struct siginfo info;
-
-	memset(&info, 0, sizeof(info));
-	info.si_signo = SIGTRAP;
-	info.si_code = TRAP_BRKPT;
-
-	/* User-mode eip? */
-	info.si_addr = UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL;
-
 	/* Send us the fake SIGTRAP */
-	force_sig_info(SIGTRAP, &info, tsk);
+	force_sig_fault(SIGTRAP, TRAP_BRKPT,
+			/* User-mode eip? */
+			UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL, tsk);
 }
 
 /*
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index b2b02df..ec9a42c 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -162,13 +162,9 @@
 
 static void bad_segv(struct faultinfo fi, unsigned long ip)
 {
-	struct siginfo si;
-
-	si.si_signo = SIGSEGV;
-	si.si_code = SEGV_ACCERR;
-	si.si_addr = (void __user *) FAULT_ADDRESS(fi);
 	current->thread.arch.faultinfo = fi;
-	force_sig_info(SIGSEGV, &si, current);
+	force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi),
+			current);
 }
 
 void fatal_sigsegv(void)
@@ -214,8 +210,8 @@
 unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 		   struct uml_pt_regs *regs)
 {
-	struct siginfo si;
 	jmp_buf *catcher;
+	int si_code;
 	int err;
 	int is_write = FAULT_WRITE(fi);
 	unsigned long address = FAULT_ADDRESS(fi);
@@ -239,7 +235,7 @@
 
 	if (SEGV_IS_FIXABLE(&fi))
 		err = handle_page_fault(address, ip, is_write, is_user,
-					&si.si_code);
+					&si_code);
 	else {
 		err = -EFAULT;
 		/*
@@ -271,18 +267,14 @@
 	show_segv_info(regs);
 
 	if (err == -EACCES) {
-		si.si_signo = SIGBUS;
-		si.si_errno = 0;
-		si.si_code = BUS_ADRERR;
-		si.si_addr = (void __user *)address;
 		current->thread.arch.faultinfo = fi;
-		force_sig_info(SIGBUS, &si, current);
+		force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address,
+				current);
 	} else {
 		BUG_ON(err != -EFAULT);
-		si.si_signo = SIGSEGV;
-		si.si_addr = (void __user *) address;
 		current->thread.arch.faultinfo = fi;
-		force_sig_info(SIGSEGV, &si, current);
+		force_sig_fault(SIGSEGV, si_code, (void __user *) address,
+				current);
 	}
 
 out:
@@ -294,9 +286,7 @@
 
 void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
 {
-	struct faultinfo *fi;
-	struct siginfo clean_si;
-
+	int code, err;
 	if (!UPT_IS_USER(regs)) {
 		if (sig == SIGBUS)
 			printk(KERN_ERR "Bus error - the host /dev/shm or /tmp "
@@ -306,29 +296,21 @@
 
 	arch_examine_signal(sig, regs);
 
-	clear_siginfo(&clean_si);
-	clean_si.si_signo = si->si_signo;
-	clean_si.si_errno = si->si_errno;
-	clean_si.si_code = si->si_code;
-	switch (sig) {
-	case SIGILL:
-	case SIGFPE:
-	case SIGSEGV:
-	case SIGBUS:
-	case SIGTRAP:
-		fi = UPT_FAULTINFO(regs);
-		clean_si.si_addr = (void __user *) FAULT_ADDRESS(*fi);
+	/* Is the signal layout for the signal known?
+	 * Signal data must be scrubbed to prevent information leaks.
+	 */
+	code = si->si_code;
+	err = si->si_errno;
+	if ((err == 0) && (siginfo_layout(sig, code) == SIL_FAULT)) {
+		struct faultinfo *fi = UPT_FAULTINFO(regs);
 		current->thread.arch.faultinfo = *fi;
-#ifdef __ARCH_SI_TRAPNO
-		clean_si.si_trapno = si->si_trapno;
-#endif
-		break;
-	default:
-		printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d)\n",
-			sig, si->si_code);
+		force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi),
+				current);
+	} else {
+		printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d) with errno %d\n",
+		       sig, code, err);
+		force_sig(sig, current);
 	}
-
-	force_sig_info(sig, &clean_si, current);
 }
 
 void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs)
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 462e59a..03f991e 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -19,6 +19,8 @@
 	select ARCH_WANT_FRAME_POINTERS
 	select GENERIC_IOMAP
 	select MODULES_USE_ELF_REL
+	select NEED_DMA_MAP_STATE
+	select SWIOTLB
 	help
 	  UniCore-32 is 32-bit Instruction Set Architecture,
 	  including a series of low-power-consumption RISC chip
@@ -61,9 +63,6 @@
 config ZONE_DMA
 	def_bool y
 
-config NEED_DMA_MAP_STATE
-       def_bool y
-
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 6f70c76..bfc7abe 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -1,5 +1,6 @@
 generic-y += atomic.h
 generic-y += bugs.h
+generic-y += compat.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
diff --git a/arch/unicore32/kernel/fpu-ucf64.c b/arch/unicore32/kernel/fpu-ucf64.c
index 12c8c95..8594b16 100644
--- a/arch/unicore32/kernel/fpu-ucf64.c
+++ b/arch/unicore32/kernel/fpu-ucf64.c
@@ -52,14 +52,14 @@
  * Raise a SIGFPE for the current process.
  * sicode describes the signal being raised.
  */
-void ucf64_raise_sigfpe(unsigned int sicode, struct pt_regs *regs)
+void ucf64_raise_sigfpe(struct pt_regs *regs)
 {
 	siginfo_t info;
 
-	memset(&info, 0, sizeof(info));
+	clear_siginfo(&info);
 
 	info.si_signo = SIGFPE;
-	info.si_code = sicode;
+	info.si_code = FPE_FLTUNK;
 	info.si_addr = (void __user *)(instruction_pointer(regs) - 4);
 
 	/*
@@ -94,7 +94,7 @@
 		pr_debug("UniCore-F64 FPSCR 0x%08x INST 0x%08x\n",
 				cff(FPSCR), inst);
 
-		ucf64_raise_sigfpe(0, regs);
+		ucf64_raise_sigfpe(regs);
 		return;
 	}
 
diff --git a/arch/unicore32/mm/Kconfig b/arch/unicore32/mm/Kconfig
index e9154a5..82759b6 100644
--- a/arch/unicore32/mm/Kconfig
+++ b/arch/unicore32/mm/Kconfig
@@ -39,14 +39,3 @@
 	default y
 	help
 	  Say Y here to disable the TLB single entry operations.
-
-config SWIOTLB
-	def_bool y
-	select DMA_DIRECT_OPS
-
-config IOMMU_HELPER
-	def_bool SWIOTLB
-
-config NEED_SG_DMA_LENGTH
-	def_bool SWIOTLB
-
diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c
index bbefcc4..3814734 100644
--- a/arch/unicore32/mm/fault.c
+++ b/arch/unicore32/mm/fault.c
@@ -125,6 +125,7 @@
 	tsk->thread.address = addr;
 	tsk->thread.error_code = fsr;
 	tsk->thread.trap_no = 14;
+	clear_siginfo(&si);
 	si.si_signo = sig;
 	si.si_errno = 0;
 	si.si_code = code;
@@ -472,6 +473,7 @@
 	printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n",
 	       inf->name, fsr, addr);
 
+	clear_siginfo(&info);
 	info.si_signo = inf->sig;
 	info.si_errno = 0;
 	info.si_code = inf->code;
@@ -491,6 +493,7 @@
 	printk(KERN_ALERT "Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
 	       inf->name, ifsr, addr);
 
+	clear_siginfo(&info);
 	info.si_signo = inf->sig;
 	info.si_errno = 0;
 	info.si_code = inf->code;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d51a71d..cb6e3a2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # Select 32 or 64 bit
 config 64BIT
-	bool "64-bit kernel" if ARCH = "x86"
-	default ARCH != "i386"
+	bool "64-bit kernel" if "$(ARCH)" = "x86"
+	default "$(ARCH)" != "i386"
 	---help---
 	  Say yes to build a 64-bit kernel - formerly known as x86_64
 	  Say no to build a 32-bit kernel - formerly known as i386
@@ -28,6 +28,8 @@
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select HAVE_ARCH_SOFT_DIRTY
 	select MODULES_USE_ELF_RELA
+	select NEED_DMA_MAP_STATE
+	select SWIOTLB
 	select X86_DEV_DMA_OPS
 	select ARCH_HAS_SYSCALL_WRAPPER
 
@@ -60,6 +62,7 @@
 	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_REFCOUNT
 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
+	select ARCH_HAS_UACCESS_MCSAFE		if X86_64
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_STRICT_KERNEL_RWX
@@ -134,7 +137,6 @@
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DEBUG_STACKOVERFLOW
-	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
@@ -184,6 +186,7 @@
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_USER_RETURN_NOTIFIER
 	select IRQ_FORCED_THREADING
+	select NEED_SG_DMA_LENGTH
 	select PCI_LOCKLESS_CONFIG
 	select PERF_EVENTS
 	select RTC_LIB
@@ -236,13 +239,6 @@
 config SBUS
 	bool
 
-config NEED_DMA_MAP_STATE
-	def_bool y
-	depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG || SWIOTLB
-
-config NEED_SG_DMA_LENGTH
-	def_bool y
-
 config GENERIC_ISA_DMA
 	def_bool y
 	depends on ISA_DMA_API
@@ -875,6 +871,7 @@
 
 config GART_IOMMU
 	bool "Old AMD GART IOMMU support"
+	select IOMMU_HELPER
 	select SWIOTLB
 	depends on X86_64 && PCI && AMD_NB
 	---help---
@@ -896,6 +893,7 @@
 
 config CALGARY_IOMMU
 	bool "IBM Calgary IOMMU support"
+	select IOMMU_HELPER
 	select SWIOTLB
 	depends on X86_64 && PCI
 	---help---
@@ -923,20 +921,6 @@
 	  Calgary anyway, pass 'iommu=calgary' on the kernel command line.
 	  If unsure, say Y.
 
-# need this always selected by IOMMU for the VIA workaround
-config SWIOTLB
-	def_bool y if X86_64
-	---help---
-	  Support for software bounce buffers used on x86-64 systems
-	  which don't have a hardware IOMMU. Using this PCI devices
-	  which can only access 32-bits of memory can be used on systems
-	  with more than 3 GB of memory.
-	  If unsure, say Y.
-
-config IOMMU_HELPER
-	def_bool y
-	depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU
-
 config MAXSMP
 	bool "Enable Maximum number of SMP Processors and NUMA Nodes"
 	depends on X86_64 && SMP && DEBUG_KERNEL
@@ -1458,6 +1442,7 @@
 config X86_PAE
 	bool "PAE (Physical Address Extension) Support"
 	depends on X86_32 && !HIGHMEM4G
+	select PHYS_ADDR_T_64BIT
 	select SWIOTLB
 	---help---
 	  PAE is required for NX support, and furthermore enables
@@ -1485,14 +1470,6 @@
 
 	  Say N if unsure.
 
-config ARCH_PHYS_ADDR_T_64BIT
-	def_bool y
-	depends on X86_64 || X86_PAE
-
-config ARCH_DMA_ADDR_T_64BIT
-	def_bool y
-	depends on X86_64 || HIGHMEM64G
-
 config X86_DIRECT_GBPAGES
 	def_bool y
 	depends on X86_64 && !DEBUG_PAGEALLOC
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 60135cb..f0a6ea2 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -94,7 +94,7 @@
 else
         BITS := 64
         UTS_MACHINE := x86_64
-        CHECKFLAGS += -D__x86_64__ -m64
+        CHECKFLAGS += -D__x86_64__
 
         biarch := -m64
         KBUILD_AFLAGS += -m64
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c
index 0cb3257..af6cda0 100644
--- a/arch/x86/boot/compressed/cmdline.c
+++ b/arch/x86/boot/compressed/cmdline.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "misc.h"
 
-#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
+#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE || CONFIG_X86_5LEVEL
 
 static unsigned long fs;
 static inline void set_fs(unsigned long seg)
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 09f36c0..a8a8642 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -109,23 +109,34 @@
 }
 
 static efi_status_t
-__setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
+__setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
 {
 	struct pci_setup_rom *rom = NULL;
 	efi_status_t status;
 	unsigned long size;
-	uint64_t attributes;
+	uint64_t attributes, romsize;
+	void *romimage;
 
-	status = efi_early->call(pci->attributes, pci,
-				 EfiPciIoAttributeOperationGet, 0, 0,
-				 &attributes);
+	status = efi_call_proto(efi_pci_io_protocol, attributes, pci,
+				EfiPciIoAttributeOperationGet, 0, 0,
+				&attributes);
 	if (status != EFI_SUCCESS)
 		return status;
 
-	if (!pci->romimage || !pci->romsize)
+	/*
+	 * Some firmware images contain EFI function pointers at the place where the
+	 * romimage and romsize fields are supposed to be. Typically the EFI
+	 * code is mapped at high addresses, translating to an unrealistically
+	 * large romsize. The UEFI spec limits the size of option ROMs to 16
+	 * MiB so we reject any ROMs over 16 MiB in size to catch this.
+	 */
+	romimage = (void *)(unsigned long)efi_table_attr(efi_pci_io_protocol,
+							 romimage, pci);
+	romsize = efi_table_attr(efi_pci_io_protocol, romsize, pci);
+	if (!romimage || !romsize || romsize > SZ_16M)
 		return EFI_INVALID_PARAMETER;
 
-	size = pci->romsize + sizeof(*rom);
+	size = romsize + sizeof(*rom);
 
 	status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
 	if (status != EFI_SUCCESS) {
@@ -141,30 +152,32 @@
 	rom->pcilen = pci->romsize;
 	*__rom = rom;
 
-	status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
-				 PCI_VENDOR_ID, 1, &(rom->vendor));
+	status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
+				EfiPciIoWidthUint16, PCI_VENDOR_ID, 1,
+				&rom->vendor);
 
 	if (status != EFI_SUCCESS) {
 		efi_printk(sys_table, "Failed to read rom->vendor\n");
 		goto free_struct;
 	}
 
-	status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
-				 PCI_DEVICE_ID, 1, &(rom->devid));
+	status = efi_call_proto(efi_pci_io_protocol, pci.read, pci,
+				EfiPciIoWidthUint16, PCI_DEVICE_ID, 1,
+				&rom->devid);
 
 	if (status != EFI_SUCCESS) {
 		efi_printk(sys_table, "Failed to read rom->devid\n");
 		goto free_struct;
 	}
 
-	status = efi_early->call(pci->get_location, pci, &(rom->segment),
-				 &(rom->bus), &(rom->device), &(rom->function));
+	status = efi_call_proto(efi_pci_io_protocol, get_location, pci,
+				&rom->segment, &rom->bus, &rom->device,
+				&rom->function);
 
 	if (status != EFI_SUCCESS)
 		goto free_struct;
 
-	memcpy(rom->romdata, (void *)(unsigned long)pci->romimage,
-	       pci->romsize);
+	memcpy(rom->romdata, romimage, romsize);
 	return status;
 
 free_struct:
@@ -176,7 +189,7 @@
 setup_efi_pci32(struct boot_params *params, void **pci_handle,
 		unsigned long size)
 {
-	efi_pci_io_protocol_32 *pci = NULL;
+	efi_pci_io_protocol_t *pci = NULL;
 	efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
 	u32 *handles = (u32 *)(unsigned long)pci_handle;
 	efi_status_t status;
@@ -203,7 +216,7 @@
 		if (!pci)
 			continue;
 
-		status = __setup_efi_pci32(pci, &rom);
+		status = __setup_efi_pci(pci, &rom);
 		if (status != EFI_SUCCESS)
 			continue;
 
@@ -217,74 +230,11 @@
 	}
 }
 
-static efi_status_t
-__setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
-{
-	struct pci_setup_rom *rom;
-	efi_status_t status;
-	unsigned long size;
-	uint64_t attributes;
-
-	status = efi_early->call(pci->attributes, pci,
-				 EfiPciIoAttributeOperationGet, 0,
-				 &attributes);
-	if (status != EFI_SUCCESS)
-		return status;
-
-	if (!pci->romimage || !pci->romsize)
-		return EFI_INVALID_PARAMETER;
-
-	size = pci->romsize + sizeof(*rom);
-
-	status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom);
-	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to alloc mem for rom\n");
-		return status;
-	}
-
-	rom->data.type = SETUP_PCI;
-	rom->data.len = size - sizeof(struct setup_data);
-	rom->data.next = 0;
-	rom->pcilen = pci->romsize;
-	*__rom = rom;
-
-	status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
-				 PCI_VENDOR_ID, 1, &(rom->vendor));
-
-	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to read rom->vendor\n");
-		goto free_struct;
-	}
-
-	status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16,
-				 PCI_DEVICE_ID, 1, &(rom->devid));
-
-	if (status != EFI_SUCCESS) {
-		efi_printk(sys_table, "Failed to read rom->devid\n");
-		goto free_struct;
-	}
-
-	status = efi_early->call(pci->get_location, pci, &(rom->segment),
-				 &(rom->bus), &(rom->device), &(rom->function));
-
-	if (status != EFI_SUCCESS)
-		goto free_struct;
-
-	memcpy(rom->romdata, (void *)(unsigned long)pci->romimage,
-	       pci->romsize);
-	return status;
-
-free_struct:
-	efi_call_early(free_pool, rom);
-	return status;
-
-}
-
 static void
 setup_efi_pci64(struct boot_params *params, void **pci_handle,
 		unsigned long size)
 {
-	efi_pci_io_protocol_64 *pci = NULL;
+	efi_pci_io_protocol_t *pci = NULL;
 	efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
 	u64 *handles = (u64 *)(unsigned long)pci_handle;
 	efi_status_t status;
@@ -311,7 +261,7 @@
 		if (!pci)
 			continue;
 
-		status = __setup_efi_pci64(pci, &rom);
+		status = __setup_efi_pci(pci, &rom);
 		if (status != EFI_SUCCESS)
 			continue;
 
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 8169e8b..6403789 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -365,6 +365,7 @@
 	 * this function call.
 	 */
 	pushq	%rsi
+	movq	%rsi, %rdi		/* real mode address */
 	call	paging_prepare
 	popq	%rsi
 
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a0a50b9..b87a758 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -47,7 +47,7 @@
 #include <linux/decompress/mm.h>
 
 #ifdef CONFIG_X86_5LEVEL
-unsigned int pgtable_l5_enabled __ro_after_init;
+unsigned int __pgtable_l5_enabled;
 unsigned int pgdir_shift __ro_after_init = 39;
 unsigned int ptrs_per_p4d __ro_after_init = 1;
 #endif
@@ -734,7 +734,7 @@
 
 #ifdef CONFIG_X86_5LEVEL
 	if (__read_cr4() & X86_CR4_LA57) {
-		pgtable_l5_enabled = 1;
+		__pgtable_l5_enabled = 1;
 		pgdir_shift = 48;
 		ptrs_per_p4d = 512;
 	}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 9e11be4..a423bdb 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -12,10 +12,8 @@
 #undef CONFIG_PARAVIRT_SPINLOCKS
 #undef CONFIG_KASAN
 
-#ifdef CONFIG_X86_5LEVEL
-/* cpu_feature_enabled() cannot be used that early */
-#define pgtable_l5_enabled __pgtable_l5_enabled
-#endif
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
 
 #include <linux/linkage.h>
 #include <linux/screen_info.h>
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index a362fa0..8c51075 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -31,16 +31,23 @@
  */
 unsigned long *trampoline_32bit __section(.data);
 
-struct paging_config paging_prepare(void)
+extern struct boot_params *boot_params;
+int cmdline_find_option_bool(const char *option);
+
+struct paging_config paging_prepare(void *rmode)
 {
 	struct paging_config paging_config = {};
 	unsigned long bios_start, ebda_start;
 
+	/* Initialize boot_params. Required for cmdline_find_option_bool(). */
+	boot_params = rmode;
+
 	/*
 	 * Check if LA57 is desired and supported.
 	 *
-	 * There are two parts to the check:
+	 * There are several parts to the check:
 	 *   - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
+	 *   - if user asked to disable 5-level paging: no5lvl in cmdline
 	 *   - if the machine supports 5-level paging:
 	 *     + CPUID leaf 7 is supported
 	 *     + the leaf has the feature bit set
@@ -48,6 +55,7 @@
 	 * That's substitute for boot_cpu_has() in early boot code.
 	 */
 	if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
+			!cmdline_find_option_bool("no5lvl") &&
 			native_cpuid_eax(0) >= 7 &&
 			(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
 		paging_config.l5_required = 1;
@@ -130,7 +138,7 @@
 {
 	void *trampoline_pgtable;
 
-	trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET;
+	trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
 
 	/*
 	 * Move the top level page table out of trampoline memory,
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 5f07333..a450ad5 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -15,7 +15,6 @@
 
 obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
 obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
-obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
 obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
@@ -24,7 +23,6 @@
 obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
-obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
 obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
 obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
 obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
@@ -38,6 +36,16 @@
 obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
 obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
 
+obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
+obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o
+obj-$(CONFIG_CRYPTO_AEGIS256_AESNI_SSE2) += aegis256-aesni.o
+
+obj-$(CONFIG_CRYPTO_MORUS640_GLUE) += morus640_glue.o
+obj-$(CONFIG_CRYPTO_MORUS1280_GLUE) += morus1280_glue.o
+
+obj-$(CONFIG_CRYPTO_MORUS640_SSE2) += morus640-sse2.o
+obj-$(CONFIG_CRYPTO_MORUS1280_SSE2) += morus1280-sse2.o
+
 # These modules require assembler to support AVX.
 ifeq ($(avx_supported),yes)
 	obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \
@@ -55,11 +63,12 @@
 	obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
 	obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
 	obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
+
+	obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
 endif
 
 aes-i586-y := aes-i586-asm_32.o aes_glue.o
 twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
-salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
 serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
 
 aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
@@ -68,10 +77,16 @@
 blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
-salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
 chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
 serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
 
+aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
+aegis128l-aesni-y := aegis128l-aesni-asm.o aegis128l-aesni-glue.o
+aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o
+
+morus640-sse2-y := morus640-sse2-asm.o morus640-sse2-glue.o
+morus1280-sse2-y := morus1280-sse2-asm.o morus1280-sse2-glue.o
+
 ifeq ($(avx_supported),yes)
 	camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
 					camellia_aesni_avx_glue.o
@@ -87,6 +102,8 @@
 	camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
 	chacha20-x86_64-y += chacha20-avx2-x86_64.o
 	serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
+
+	morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
 endif
 
 aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
new file mode 100644
index 0000000..9254e0b
--- /dev/null
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -0,0 +1,749 @@
+/*
+ * AES-NI + SSE2 implementation of AEGIS-128
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define STATE0	%xmm0
+#define STATE1	%xmm1
+#define STATE2	%xmm2
+#define STATE3	%xmm3
+#define STATE4	%xmm4
+#define KEY	%xmm5
+#define MSG	%xmm5
+#define T0	%xmm6
+#define T1	%xmm7
+
+#define STATEP	%rdi
+#define LEN	%rsi
+#define SRC	%rdx
+#define DST	%rcx
+
+.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
+.align 16
+.Laegis128_const_0:
+	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+.Laegis128_const_1:
+	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
+.align 16
+.Laegis128_counter:
+	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+
+.text
+
+/*
+ * aegis128_update
+ * input:
+ *   STATE[0-4] - input state
+ * output:
+ *   STATE[0-4] - output state (shifted positions)
+ * changed:
+ *   T0
+ */
+.macro aegis128_update
+	movdqa STATE4, T0
+	aesenc STATE0, STATE4
+	aesenc STATE1, STATE0
+	aesenc STATE2, STATE1
+	aesenc STATE3, STATE2
+	aesenc T0,     STATE3
+.endm
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ *   LEN - bytes
+ *   SRC - src
+ * output:
+ *   MSG  - message block
+ * changed:
+ *   T0
+ *   %r8
+ *   %r9
+ */
+__load_partial:
+	xor %r9, %r9
+	pxor MSG, MSG
+
+	mov LEN, %r8
+	and $0x1, %r8
+	jz .Lld_partial_1
+
+	mov LEN, %r8
+	and $0x1E, %r8
+	add SRC, %r8
+	mov (%r8), %r9b
+
+.Lld_partial_1:
+	mov LEN, %r8
+	and $0x2, %r8
+	jz .Lld_partial_2
+
+	mov LEN, %r8
+	and $0x1C, %r8
+	add SRC, %r8
+	shl $0x10, %r9
+	mov (%r8), %r9w
+
+.Lld_partial_2:
+	mov LEN, %r8
+	and $0x4, %r8
+	jz .Lld_partial_4
+
+	mov LEN, %r8
+	and $0x18, %r8
+	add SRC, %r8
+	shl $32, %r9
+	mov (%r8), %r8d
+	xor %r8, %r9
+
+.Lld_partial_4:
+	movq %r9, MSG
+
+	mov LEN, %r8
+	and $0x8, %r8
+	jz .Lld_partial_8
+
+	mov LEN, %r8
+	and $0x10, %r8
+	add SRC, %r8
+	pslldq $8, MSG
+	movq (%r8), T0
+	pxor T0, MSG
+
+.Lld_partial_8:
+	ret
+ENDPROC(__load_partial)
+
+/*
+ * __store_partial: internal ABI
+ * input:
+ *   LEN - bytes
+ *   DST - dst
+ * output:
+ *   T0   - message block
+ * changed:
+ *   %r8
+ *   %r9
+ *   %r10
+ */
+__store_partial:
+	mov LEN, %r8
+	mov DST, %r9
+
+	movq T0, %r10
+
+	cmp $8, %r8
+	jl .Lst_partial_8
+
+	mov %r10, (%r9)
+	psrldq $8, T0
+	movq T0, %r10
+
+	sub $8, %r8
+	add $8, %r9
+
+.Lst_partial_8:
+	cmp $4, %r8
+	jl .Lst_partial_4
+
+	mov %r10d, (%r9)
+	shr $32, %r10
+
+	sub $4, %r8
+	add $4, %r9
+
+.Lst_partial_4:
+	cmp $2, %r8
+	jl .Lst_partial_2
+
+	mov %r10w, (%r9)
+	shr $0x10, %r10
+
+	sub $2, %r8
+	add $2, %r9
+
+.Lst_partial_2:
+	cmp $1, %r8
+	jl .Lst_partial_1
+
+	mov %r10b, (%r9)
+
+.Lst_partial_1:
+	ret
+ENDPROC(__store_partial)
+
+/*
+ * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
+ */
+ENTRY(crypto_aegis128_aesni_init)
+	FRAME_BEGIN
+
+	/* load IV: */
+	movdqu (%rdx), T1
+
+	/* load key: */
+	movdqa (%rsi), KEY
+	pxor KEY, T1
+	movdqa T1, STATE0
+	movdqa KEY, STATE3
+	movdqa KEY, STATE4
+
+	/* load the constants: */
+	movdqa .Laegis128_const_0, STATE2
+	movdqa .Laegis128_const_1, STATE1
+	pxor STATE2, STATE3
+	pxor STATE1, STATE4
+
+	/* update 10 times with KEY / KEY xor IV: */
+	aegis128_update; pxor KEY, STATE4
+	aegis128_update; pxor T1,  STATE3
+	aegis128_update; pxor KEY, STATE2
+	aegis128_update; pxor T1,  STATE1
+	aegis128_update; pxor KEY, STATE0
+	aegis128_update; pxor T1,  STATE4
+	aegis128_update; pxor KEY, STATE3
+	aegis128_update; pxor T1,  STATE2
+	aegis128_update; pxor KEY, STATE1
+	aegis128_update; pxor T1,  STATE0
+
+	/* store the state: */
+	movdqu STATE0, 0x00(STATEP)
+	movdqu STATE1, 0x10(STATEP)
+	movdqu STATE2, 0x20(STATEP)
+	movdqu STATE3, 0x30(STATEP)
+	movdqu STATE4, 0x40(STATEP)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis128_aesni_init)
+
+/*
+ * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
+ *                               const void *data);
+ */
+ENTRY(crypto_aegis128_aesni_ad)
+	FRAME_BEGIN
+
+	cmp $0x10, LEN
+	jb .Lad_out
+
+	/* load the state: */
+	movdqu 0x00(STATEP), STATE0
+	movdqu 0x10(STATEP), STATE1
+	movdqu 0x20(STATEP), STATE2
+	movdqu 0x30(STATEP), STATE3
+	movdqu 0x40(STATEP), STATE4
+
+	mov SRC, %r8
+	and $0xF, %r8
+	jnz .Lad_u_loop
+
+.align 8
+.Lad_a_loop:
+	movdqa 0x00(SRC), MSG
+	aegis128_update
+	pxor MSG, STATE4
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lad_out_1
+
+	movdqa 0x10(SRC), MSG
+	aegis128_update
+	pxor MSG, STATE3
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lad_out_2
+
+	movdqa 0x20(SRC), MSG
+	aegis128_update
+	pxor MSG, STATE2
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lad_out_3
+
+	movdqa 0x30(SRC), MSG
+	aegis128_update
+	pxor MSG, STATE1
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lad_out_4
+
+	movdqa 0x40(SRC), MSG
+	aegis128_update
+	pxor MSG, STATE0
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lad_out_0
+
+	add $0x50, SRC
+	jmp .Lad_a_loop
+
+.align 8
+.Lad_u_loop:
+	movdqu 0x00(SRC), MSG
+	aegis128_update
+	pxor MSG, STATE4
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lad_out_1
+
+	movdqu 0x10(SRC), MSG
+	aegis128_update
+	pxor MSG, STATE3
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lad_out_2
+
+	movdqu 0x20(SRC), MSG
+	aegis128_update
+	pxor MSG, STATE2
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lad_out_3
+
+	movdqu 0x30(SRC), MSG
+	aegis128_update
+	pxor MSG, STATE1
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lad_out_4
+
+	movdqu 0x40(SRC), MSG
+	aegis128_update
+	pxor MSG, STATE0
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lad_out_0
+
+	add $0x50, SRC
+	jmp .Lad_u_loop
+
+	/* store the state: */
+.Lad_out_0:
+	movdqu STATE0, 0x00(STATEP)
+	movdqu STATE1, 0x10(STATEP)
+	movdqu STATE2, 0x20(STATEP)
+	movdqu STATE3, 0x30(STATEP)
+	movdqu STATE4, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Lad_out_1:
+	movdqu STATE4, 0x00(STATEP)
+	movdqu STATE0, 0x10(STATEP)
+	movdqu STATE1, 0x20(STATEP)
+	movdqu STATE2, 0x30(STATEP)
+	movdqu STATE3, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Lad_out_2:
+	movdqu STATE3, 0x00(STATEP)
+	movdqu STATE4, 0x10(STATEP)
+	movdqu STATE0, 0x20(STATEP)
+	movdqu STATE1, 0x30(STATEP)
+	movdqu STATE2, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Lad_out_3:
+	movdqu STATE2, 0x00(STATEP)
+	movdqu STATE3, 0x10(STATEP)
+	movdqu STATE4, 0x20(STATEP)
+	movdqu STATE0, 0x30(STATEP)
+	movdqu STATE1, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Lad_out_4:
+	movdqu STATE1, 0x00(STATEP)
+	movdqu STATE2, 0x10(STATEP)
+	movdqu STATE3, 0x20(STATEP)
+	movdqu STATE4, 0x30(STATEP)
+	movdqu STATE0, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Lad_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis128_aesni_ad)
+
+.macro encrypt_block a s0 s1 s2 s3 s4 i
+	movdq\a (\i * 0x10)(SRC), MSG
+	movdqa MSG, T0
+	pxor \s1, T0
+	pxor \s4, T0
+	movdqa \s2, T1
+	pand \s3, T1
+	pxor T1, T0
+	movdq\a T0, (\i * 0x10)(DST)
+
+	aegis128_update
+	pxor MSG, \s4
+
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lenc_out_\i
+.endm
+
+/*
+ * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
+ *                                const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128_aesni_enc)
+	FRAME_BEGIN
+
+	cmp $0x10, LEN
+	jb .Lenc_out
+
+	/* load the state: */
+	movdqu 0x00(STATEP), STATE0
+	movdqu 0x10(STATEP), STATE1
+	movdqu 0x20(STATEP), STATE2
+	movdqu 0x30(STATEP), STATE3
+	movdqu 0x40(STATEP), STATE4
+
+	mov  SRC,  %r8
+	or   DST,  %r8
+	and $0xF, %r8
+	jnz .Lenc_u_loop
+
+.align 8
+.Lenc_a_loop:
+	encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
+	encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
+	encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
+	encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
+	encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
+
+	add $0x50, SRC
+	add $0x50, DST
+	jmp .Lenc_a_loop
+
+.align 8
+.Lenc_u_loop:
+	encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
+	encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
+	encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
+	encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
+	encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
+
+	add $0x50, SRC
+	add $0x50, DST
+	jmp .Lenc_u_loop
+
+	/* store the state: */
+.Lenc_out_0:
+	movdqu STATE4, 0x00(STATEP)
+	movdqu STATE0, 0x10(STATEP)
+	movdqu STATE1, 0x20(STATEP)
+	movdqu STATE2, 0x30(STATEP)
+	movdqu STATE3, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Lenc_out_1:
+	movdqu STATE3, 0x00(STATEP)
+	movdqu STATE4, 0x10(STATEP)
+	movdqu STATE0, 0x20(STATEP)
+	movdqu STATE1, 0x30(STATEP)
+	movdqu STATE2, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Lenc_out_2:
+	movdqu STATE2, 0x00(STATEP)
+	movdqu STATE3, 0x10(STATEP)
+	movdqu STATE4, 0x20(STATEP)
+	movdqu STATE0, 0x30(STATEP)
+	movdqu STATE1, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Lenc_out_3:
+	movdqu STATE1, 0x00(STATEP)
+	movdqu STATE2, 0x10(STATEP)
+	movdqu STATE3, 0x20(STATEP)
+	movdqu STATE4, 0x30(STATEP)
+	movdqu STATE0, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Lenc_out_4:
+	movdqu STATE0, 0x00(STATEP)
+	movdqu STATE1, 0x10(STATEP)
+	movdqu STATE2, 0x20(STATEP)
+	movdqu STATE3, 0x30(STATEP)
+	movdqu STATE4, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Lenc_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis128_aesni_enc)
+
+/*
+ * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
+ *                                     const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128_aesni_enc_tail)
+	FRAME_BEGIN
+
+	/* load the state: */
+	movdqu 0x00(STATEP), STATE0
+	movdqu 0x10(STATEP), STATE1
+	movdqu 0x20(STATEP), STATE2
+	movdqu 0x30(STATEP), STATE3
+	movdqu 0x40(STATEP), STATE4
+
+	/* encrypt message: */
+	call __load_partial
+
+	movdqa MSG, T0
+	pxor STATE1, T0
+	pxor STATE4, T0
+	movdqa STATE2, T1
+	pand STATE3, T1
+	pxor T1, T0
+
+	call __store_partial
+
+	aegis128_update
+	pxor MSG, STATE4
+
+	/* store the state: */
+	movdqu STATE4, 0x00(STATEP)
+	movdqu STATE0, 0x10(STATEP)
+	movdqu STATE1, 0x20(STATEP)
+	movdqu STATE2, 0x30(STATEP)
+	movdqu STATE3, 0x40(STATEP)
+
+	FRAME_END
+ENDPROC(crypto_aegis128_aesni_enc_tail)
+
+.macro decrypt_block a s0 s1 s2 s3 s4 i
+	movdq\a (\i * 0x10)(SRC), MSG
+	pxor \s1, MSG
+	pxor \s4, MSG
+	movdqa \s2, T1
+	pand \s3, T1
+	pxor T1, MSG
+	movdq\a MSG, (\i * 0x10)(DST)
+
+	aegis128_update
+	pxor MSG, \s4
+
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Ldec_out_\i
+.endm
+
+/*
+ * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
+ *                                const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128_aesni_dec)
+	FRAME_BEGIN
+
+	cmp $0x10, LEN
+	jb .Ldec_out
+
+	/* load the state: */
+	movdqu 0x00(STATEP), STATE0
+	movdqu 0x10(STATEP), STATE1
+	movdqu 0x20(STATEP), STATE2
+	movdqu 0x30(STATEP), STATE3
+	movdqu 0x40(STATEP), STATE4
+
+	mov  SRC, %r8
+	or   DST, %r8
+	and $0xF, %r8
+	jnz .Ldec_u_loop
+
+.align 8
+.Ldec_a_loop:
+	decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
+	decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
+	decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
+	decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
+	decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
+
+	add $0x50, SRC
+	add $0x50, DST
+	jmp .Ldec_a_loop
+
+.align 8
+.Ldec_u_loop:
+	decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
+	decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
+	decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
+	decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
+	decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
+
+	add $0x50, SRC
+	add $0x50, DST
+	jmp .Ldec_u_loop
+
+	/* store the state: */
+.Ldec_out_0:
+	movdqu STATE4, 0x00(STATEP)
+	movdqu STATE0, 0x10(STATEP)
+	movdqu STATE1, 0x20(STATEP)
+	movdqu STATE2, 0x30(STATEP)
+	movdqu STATE3, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Ldec_out_1:
+	movdqu STATE3, 0x00(STATEP)
+	movdqu STATE4, 0x10(STATEP)
+	movdqu STATE0, 0x20(STATEP)
+	movdqu STATE1, 0x30(STATEP)
+	movdqu STATE2, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Ldec_out_2:
+	movdqu STATE2, 0x00(STATEP)
+	movdqu STATE3, 0x10(STATEP)
+	movdqu STATE4, 0x20(STATEP)
+	movdqu STATE0, 0x30(STATEP)
+	movdqu STATE1, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Ldec_out_3:
+	movdqu STATE1, 0x00(STATEP)
+	movdqu STATE2, 0x10(STATEP)
+	movdqu STATE3, 0x20(STATEP)
+	movdqu STATE4, 0x30(STATEP)
+	movdqu STATE0, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Ldec_out_4:
+	movdqu STATE0, 0x00(STATEP)
+	movdqu STATE1, 0x10(STATEP)
+	movdqu STATE2, 0x20(STATEP)
+	movdqu STATE3, 0x30(STATEP)
+	movdqu STATE4, 0x40(STATEP)
+	FRAME_END
+	ret
+
+.Ldec_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis128_aesni_dec)
+
+/*
+ * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
+ *                                     const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128_aesni_dec_tail)
+	FRAME_BEGIN
+
+	/* load the state: */
+	movdqu 0x00(STATEP), STATE0
+	movdqu 0x10(STATEP), STATE1
+	movdqu 0x20(STATEP), STATE2
+	movdqu 0x30(STATEP), STATE3
+	movdqu 0x40(STATEP), STATE4
+
+	/* decrypt message: */
+	call __load_partial
+
+	pxor STATE1, MSG
+	pxor STATE4, MSG
+	movdqa STATE2, T1
+	pand STATE3, T1
+	pxor T1, MSG
+
+	movdqa MSG, T0
+	call __store_partial
+
+	/* mask with byte count: */
+	movq LEN, T0
+	punpcklbw T0, T0
+	punpcklbw T0, T0
+	punpcklbw T0, T0
+	punpcklbw T0, T0
+	movdqa .Laegis128_counter, T1
+	pcmpgtb T1, T0
+	pand T0, MSG
+
+	aegis128_update
+	pxor MSG, STATE4
+
+	/* store the state: */
+	movdqu STATE4, 0x00(STATEP)
+	movdqu STATE0, 0x10(STATEP)
+	movdqu STATE1, 0x20(STATEP)
+	movdqu STATE2, 0x30(STATEP)
+	movdqu STATE3, 0x40(STATEP)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis128_aesni_dec_tail)
+
+/*
+ * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
+ *                                  u64 assoclen, u64 cryptlen);
+ */
+ENTRY(crypto_aegis128_aesni_final)
+	FRAME_BEGIN
+
+	/* load the state: */
+	movdqu 0x00(STATEP), STATE0
+	movdqu 0x10(STATEP), STATE1
+	movdqu 0x20(STATEP), STATE2
+	movdqu 0x30(STATEP), STATE3
+	movdqu 0x40(STATEP), STATE4
+
+	/* prepare length block: */
+	movq %rdx, MSG
+	movq %rcx, T0
+	pslldq $8, T0
+	pxor T0, MSG
+	psllq $3, MSG /* multiply by 8 (to get bit count) */
+
+	pxor STATE3, MSG
+
+	/* update state: */
+	aegis128_update; pxor MSG, STATE4
+	aegis128_update; pxor MSG, STATE3
+	aegis128_update; pxor MSG, STATE2
+	aegis128_update; pxor MSG, STATE1
+	aegis128_update; pxor MSG, STATE0
+	aegis128_update; pxor MSG, STATE4
+	aegis128_update; pxor MSG, STATE3
+
+	/* xor tag: */
+	movdqu (%rsi), MSG
+
+	pxor STATE0, MSG
+	pxor STATE1, MSG
+	pxor STATE2, MSG
+	pxor STATE3, MSG
+	pxor STATE4, MSG
+
+	movdqu MSG, (%rsi)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis128_aesni_final)
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
new file mode 100644
index 0000000..5de7c0d
--- /dev/null
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -0,0 +1,407 @@
+/*
+ * The AEGIS-128 Authenticated-Encryption Algorithm
+ *   Glue for AES-NI + SSE2 implementation
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/cryptd.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/cpu_device_id.h>
+
+#define AEGIS128_BLOCK_ALIGN 16
+#define AEGIS128_BLOCK_SIZE 16
+#define AEGIS128_NONCE_SIZE 16
+#define AEGIS128_STATE_BLOCKS 5
+#define AEGIS128_KEY_SIZE 16
+#define AEGIS128_MIN_AUTH_SIZE 8
+#define AEGIS128_MAX_AUTH_SIZE 16
+
+asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv);
+
+asmlinkage void crypto_aegis128_aesni_ad(
+		void *state, unsigned int length, const void *data);
+
+asmlinkage void crypto_aegis128_aesni_enc(
+		void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128_aesni_dec(
+		void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128_aesni_enc_tail(
+		void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128_aesni_dec_tail(
+		void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128_aesni_final(
+		void *state, void *tag_xor, unsigned int cryptlen,
+		unsigned int assoclen);
+
+struct aegis_block {
+	u8 bytes[AEGIS128_BLOCK_SIZE] __aligned(AEGIS128_BLOCK_ALIGN);
+};
+
+struct aegis_state {
+	struct aegis_block blocks[AEGIS128_STATE_BLOCKS];
+};
+
+struct aegis_ctx {
+	struct aegis_block key;
+};
+
+struct aegis_crypt_ops {
+	int (*skcipher_walk_init)(struct skcipher_walk *walk,
+				  struct aead_request *req, bool atomic);
+
+	void (*crypt_blocks)(void *state, unsigned int length, const void *src,
+			     void *dst);
+	void (*crypt_tail)(void *state, unsigned int length, const void *src,
+			   void *dst);
+};
+
+static void crypto_aegis128_aesni_process_ad(
+		struct aegis_state *state, struct scatterlist *sg_src,
+		unsigned int assoclen)
+{
+	struct scatter_walk walk;
+	struct aegis_block buf;
+	unsigned int pos = 0;
+
+	scatterwalk_start(&walk, sg_src);
+	while (assoclen != 0) {
+		unsigned int size = scatterwalk_clamp(&walk, assoclen);
+		unsigned int left = size;
+		void *mapped = scatterwalk_map(&walk);
+		const u8 *src = (const u8 *)mapped;
+
+		if (pos + size >= AEGIS128_BLOCK_SIZE) {
+			if (pos > 0) {
+				unsigned int fill = AEGIS128_BLOCK_SIZE - pos;
+				memcpy(buf.bytes + pos, src, fill);
+				crypto_aegis128_aesni_ad(state,
+							 AEGIS128_BLOCK_SIZE,
+							 buf.bytes);
+				pos = 0;
+				left -= fill;
+				src += fill;
+			}
+
+			crypto_aegis128_aesni_ad(state, left, src);
+
+			src += left & ~(AEGIS128_BLOCK_SIZE - 1);
+			left &= AEGIS128_BLOCK_SIZE - 1;
+		}
+
+		memcpy(buf.bytes + pos, src, left);
+		pos += left;
+		assoclen -= size;
+
+		scatterwalk_unmap(mapped);
+		scatterwalk_advance(&walk, size);
+		scatterwalk_done(&walk, 0, assoclen);
+	}
+
+	if (pos > 0) {
+		memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos);
+		crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes);
+	}
+}
+
+static void crypto_aegis128_aesni_process_crypt(
+		struct aegis_state *state, struct aead_request *req,
+		const struct aegis_crypt_ops *ops)
+{
+	struct skcipher_walk walk;
+	u8 *src, *dst;
+	unsigned int chunksize, base;
+
+	ops->skcipher_walk_init(&walk, req, false);
+
+	while (walk.nbytes) {
+		src = walk.src.virt.addr;
+		dst = walk.dst.virt.addr;
+		chunksize = walk.nbytes;
+
+		ops->crypt_blocks(state, chunksize, src, dst);
+
+		base = chunksize & ~(AEGIS128_BLOCK_SIZE - 1);
+		src += base;
+		dst += base;
+		chunksize &= AEGIS128_BLOCK_SIZE - 1;
+
+		if (chunksize > 0)
+			ops->crypt_tail(state, chunksize, src, dst);
+
+		skcipher_walk_done(&walk, 0);
+	}
+}
+
+static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aead)
+{
+	u8 *ctx = crypto_aead_ctx(aead);
+	ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
+	return (void *)ctx;
+}
+
+static int crypto_aegis128_aesni_setkey(struct crypto_aead *aead, const u8 *key,
+					unsigned int keylen)
+{
+	struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(aead);
+
+	if (keylen != AEGIS128_KEY_SIZE) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE);
+
+	return 0;
+}
+
+static int crypto_aegis128_aesni_setauthsize(struct crypto_aead *tfm,
+						unsigned int authsize)
+{
+	if (authsize > AEGIS128_MAX_AUTH_SIZE)
+		return -EINVAL;
+	if (authsize < AEGIS128_MIN_AUTH_SIZE)
+		return -EINVAL;
+	return 0;
+}
+
+static void crypto_aegis128_aesni_crypt(struct aead_request *req,
+					struct aegis_block *tag_xor,
+					unsigned int cryptlen,
+					const struct aegis_crypt_ops *ops)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm);
+	struct aegis_state state;
+
+	kernel_fpu_begin();
+
+	crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv);
+	crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen);
+	crypto_aegis128_aesni_process_crypt(&state, req, ops);
+	crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
+
+	kernel_fpu_end();
+}
+
+static int crypto_aegis128_aesni_encrypt(struct aead_request *req)
+{
+	static const struct aegis_crypt_ops OPS = {
+		.skcipher_walk_init = skcipher_walk_aead_encrypt,
+		.crypt_blocks = crypto_aegis128_aesni_enc,
+		.crypt_tail = crypto_aegis128_aesni_enc_tail,
+	};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aegis_block tag = {};
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen;
+
+	crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
+
+	scatterwalk_map_and_copy(tag.bytes, req->dst,
+				 req->assoclen + cryptlen, authsize, 1);
+	return 0;
+}
+
+static int crypto_aegis128_aesni_decrypt(struct aead_request *req)
+{
+	static const struct aegis_block zeros = {};
+
+	static const struct aegis_crypt_ops OPS = {
+		.skcipher_walk_init = skcipher_walk_aead_decrypt,
+		.crypt_blocks = crypto_aegis128_aesni_dec,
+		.crypt_tail = crypto_aegis128_aesni_dec_tail,
+	};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aegis_block tag;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen - authsize;
+
+	scatterwalk_map_and_copy(tag.bytes, req->src,
+				 req->assoclen + cryptlen, authsize, 0);
+
+	crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
+
+	return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
+}
+
+static int crypto_aegis128_aesni_init_tfm(struct crypto_aead *aead)
+{
+	return 0;
+}
+
+static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
+{
+}
+
+static int cryptd_aegis128_aesni_setkey(struct crypto_aead *aead,
+					const u8 *key, unsigned int keylen)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
+}
+
+static int cryptd_aegis128_aesni_setauthsize(struct crypto_aead *aead,
+					     unsigned int authsize)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
+}
+
+static int cryptd_aegis128_aesni_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	aead = &cryptd_tfm->base;
+	if (irq_fpu_usable() && (!in_atomic() ||
+				 !cryptd_aead_queued(cryptd_tfm)))
+		aead = cryptd_aead_child(cryptd_tfm);
+
+	aead_request_set_tfm(req, aead);
+
+	return crypto_aead_encrypt(req);
+}
+
+static int cryptd_aegis128_aesni_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	aead = &cryptd_tfm->base;
+	if (irq_fpu_usable() && (!in_atomic() ||
+				 !cryptd_aead_queued(cryptd_tfm)))
+		aead = cryptd_aead_child(cryptd_tfm);
+
+	aead_request_set_tfm(req, aead);
+
+	return crypto_aead_decrypt(req);
+}
+
+static int cryptd_aegis128_aesni_init_tfm(struct crypto_aead *aead)
+{
+	struct cryptd_aead *cryptd_tfm;
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_tfm = cryptd_alloc_aead("__aegis128-aesni", CRYPTO_ALG_INTERNAL,
+				       CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+
+	*ctx = cryptd_tfm;
+	crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+	return 0;
+}
+
+static void cryptd_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_free_aead(*ctx);
+}
+
+static struct aead_alg crypto_aegis128_aesni_alg[] = {
+	{
+		.setkey = crypto_aegis128_aesni_setkey,
+		.setauthsize = crypto_aegis128_aesni_setauthsize,
+		.encrypt = crypto_aegis128_aesni_encrypt,
+		.decrypt = crypto_aegis128_aesni_decrypt,
+		.init = crypto_aegis128_aesni_init_tfm,
+		.exit = crypto_aegis128_aesni_exit_tfm,
+
+		.ivsize = AEGIS128_NONCE_SIZE,
+		.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
+		.chunksize = AEGIS128_BLOCK_SIZE,
+
+		.base = {
+			.cra_flags = CRYPTO_ALG_INTERNAL,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct aegis_ctx) +
+				__alignof__(struct aegis_ctx),
+			.cra_alignmask = 0,
+
+			.cra_name = "__aegis128",
+			.cra_driver_name = "__aegis128-aesni",
+
+			.cra_module = THIS_MODULE,
+		}
+	}, {
+		.setkey = cryptd_aegis128_aesni_setkey,
+		.setauthsize = cryptd_aegis128_aesni_setauthsize,
+		.encrypt = cryptd_aegis128_aesni_encrypt,
+		.decrypt = cryptd_aegis128_aesni_decrypt,
+		.init = cryptd_aegis128_aesni_init_tfm,
+		.exit = cryptd_aegis128_aesni_exit_tfm,
+
+		.ivsize = AEGIS128_NONCE_SIZE,
+		.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
+		.chunksize = AEGIS128_BLOCK_SIZE,
+
+		.base = {
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct cryptd_aead *),
+			.cra_alignmask = 0,
+
+			.cra_priority = 400,
+
+			.cra_name = "aegis128",
+			.cra_driver_name = "aegis128-aesni",
+
+			.cra_module = THIS_MODULE,
+		}
+	}
+};
+
+static const struct x86_cpu_id aesni_cpu_id[] = {
+	X86_FEATURE_MATCH(X86_FEATURE_AES),
+	X86_FEATURE_MATCH(X86_FEATURE_XMM2),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
+
+static int __init crypto_aegis128_aesni_module_init(void)
+{
+	if (!x86_match_cpu(aesni_cpu_id))
+		return -ENODEV;
+
+	return crypto_register_aeads(crypto_aegis128_aesni_alg,
+				     ARRAY_SIZE(crypto_aegis128_aesni_alg));
+}
+
+static void __exit crypto_aegis128_aesni_module_exit(void)
+{
+	crypto_unregister_aeads(crypto_aegis128_aesni_alg,
+				ARRAY_SIZE(crypto_aegis128_aesni_alg));
+}
+
+module_init(crypto_aegis128_aesni_module_init);
+module_exit(crypto_aegis128_aesni_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE2 implementation");
+MODULE_ALIAS_CRYPTO("aegis128");
+MODULE_ALIAS_CRYPTO("aegis128-aesni");
diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S
new file mode 100644
index 0000000..9263c34
--- /dev/null
+++ b/arch/x86/crypto/aegis128l-aesni-asm.S
@@ -0,0 +1,825 @@
+/*
+ * AES-NI + SSE2 implementation of AEGIS-128L
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define STATE0	%xmm0
+#define STATE1	%xmm1
+#define STATE2	%xmm2
+#define STATE3	%xmm3
+#define STATE4	%xmm4
+#define STATE5	%xmm5
+#define STATE6	%xmm6
+#define STATE7	%xmm7
+#define MSG0	%xmm8
+#define MSG1	%xmm9
+#define T0	%xmm10
+#define T1	%xmm11
+#define T2	%xmm12
+#define T3	%xmm13
+
+#define STATEP	%rdi
+#define LEN	%rsi
+#define SRC	%rdx
+#define DST	%rcx
+
+.section .rodata.cst16.aegis128l_const, "aM", @progbits, 32
+.align 16
+.Laegis128l_const_0:
+	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+.Laegis128l_const_1:
+	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst16.aegis128l_counter, "aM", @progbits, 16
+.align 16
+.Laegis128l_counter0:
+	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+.Laegis128l_counter1:
+	.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+	.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
+
+.text
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ *   LEN - bytes
+ *   SRC - src
+ * output:
+ *   MSG0 - first message block
+ *   MSG1 - second message block
+ * changed:
+ *   T0
+ *   %r8
+ *   %r9
+ */
+__load_partial:
+	xor %r9, %r9
+	pxor MSG0, MSG0
+	pxor MSG1, MSG1
+
+	mov LEN, %r8
+	and $0x1, %r8
+	jz .Lld_partial_1
+
+	mov LEN, %r8
+	and $0x1E, %r8
+	add SRC, %r8
+	mov (%r8), %r9b
+
+.Lld_partial_1:
+	mov LEN, %r8
+	and $0x2, %r8
+	jz .Lld_partial_2
+
+	mov LEN, %r8
+	and $0x1C, %r8
+	add SRC, %r8
+	shl $0x10, %r9
+	mov (%r8), %r9w
+
+.Lld_partial_2:
+	mov LEN, %r8
+	and $0x4, %r8
+	jz .Lld_partial_4
+
+	mov LEN, %r8
+	and $0x18, %r8
+	add SRC, %r8
+	shl $32, %r9
+	mov (%r8), %r8d
+	xor %r8, %r9
+
+.Lld_partial_4:
+	movq %r9, MSG0
+
+	mov LEN, %r8
+	and $0x8, %r8
+	jz .Lld_partial_8
+
+	mov LEN, %r8
+	and $0x10, %r8
+	add SRC, %r8
+	pslldq $8, MSG0
+	movq (%r8), T0
+	pxor T0, MSG0
+
+.Lld_partial_8:
+	mov LEN, %r8
+	and $0x10, %r8
+	jz .Lld_partial_16
+
+	movdqa MSG0, MSG1
+	movdqu (SRC), MSG0
+
+.Lld_partial_16:
+	ret
+ENDPROC(__load_partial)
+
+/*
+ * __store_partial: internal ABI
+ * input:
+ *   LEN - bytes
+ *   DST - dst
+ * output:
+ *   T0   - first message block
+ *   T1   - second message block
+ * changed:
+ *   %r8
+ *   %r9
+ *   %r10
+ */
+__store_partial:
+	mov LEN, %r8
+	mov DST, %r9
+
+	cmp $16, %r8
+	jl .Lst_partial_16
+
+	movdqu T0, (%r9)
+	movdqa T1, T0
+
+	sub $16, %r8
+	add $16, %r9
+
+.Lst_partial_16:
+	movq T0, %r10
+
+	cmp $8, %r8
+	jl .Lst_partial_8
+
+	mov %r10, (%r9)
+	psrldq $8, T0
+	movq T0, %r10
+
+	sub $8, %r8
+	add $8, %r9
+
+.Lst_partial_8:
+	cmp $4, %r8
+	jl .Lst_partial_4
+
+	mov %r10d, (%r9)
+	shr $32, %r10
+
+	sub $4, %r8
+	add $4, %r9
+
+.Lst_partial_4:
+	cmp $2, %r8
+	jl .Lst_partial_2
+
+	mov %r10w, (%r9)
+	shr $0x10, %r10
+
+	sub $2, %r8
+	add $2, %r9
+
+.Lst_partial_2:
+	cmp $1, %r8
+	jl .Lst_partial_1
+
+	mov %r10b, (%r9)
+
+.Lst_partial_1:
+	ret
+ENDPROC(__store_partial)
+
+.macro update
+	movdqa STATE7, T0
+	aesenc STATE0, STATE7
+	aesenc STATE1, STATE0
+	aesenc STATE2, STATE1
+	aesenc STATE3, STATE2
+	aesenc STATE4, STATE3
+	aesenc STATE5, STATE4
+	aesenc STATE6, STATE5
+	aesenc T0,     STATE6
+.endm
+
+.macro update0
+	update
+	pxor MSG0, STATE7
+	pxor MSG1, STATE3
+.endm
+
+.macro update1
+	update
+	pxor MSG0, STATE6
+	pxor MSG1, STATE2
+.endm
+
+.macro update2
+	update
+	pxor MSG0, STATE5
+	pxor MSG1, STATE1
+.endm
+
+.macro update3
+	update
+	pxor MSG0, STATE4
+	pxor MSG1, STATE0
+.endm
+
+.macro update4
+	update
+	pxor MSG0, STATE3
+	pxor MSG1, STATE7
+.endm
+
+.macro update5
+	update
+	pxor MSG0, STATE2
+	pxor MSG1, STATE6
+.endm
+
+.macro update6
+	update
+	pxor MSG0, STATE1
+	pxor MSG1, STATE5
+.endm
+
+.macro update7
+	update
+	pxor MSG0, STATE0
+	pxor MSG1, STATE4
+.endm
+
+.macro state_load
+	movdqu 0x00(STATEP), STATE0
+	movdqu 0x10(STATEP), STATE1
+	movdqu 0x20(STATEP), STATE2
+	movdqu 0x30(STATEP), STATE3
+	movdqu 0x40(STATEP), STATE4
+	movdqu 0x50(STATEP), STATE5
+	movdqu 0x60(STATEP), STATE6
+	movdqu 0x70(STATEP), STATE7
+.endm
+
+.macro state_store s0 s1 s2 s3 s4 s5 s6 s7
+	movdqu \s7, 0x00(STATEP)
+	movdqu \s0, 0x10(STATEP)
+	movdqu \s1, 0x20(STATEP)
+	movdqu \s2, 0x30(STATEP)
+	movdqu \s3, 0x40(STATEP)
+	movdqu \s4, 0x50(STATEP)
+	movdqu \s5, 0x60(STATEP)
+	movdqu \s6, 0x70(STATEP)
+.endm
+
+.macro state_store0
+	state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
+.endm
+
+.macro state_store1
+	state_store STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
+.endm
+
+.macro state_store2
+	state_store STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
+.endm
+
+.macro state_store3
+	state_store STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
+.endm
+
+.macro state_store4
+	state_store STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
+.endm
+
+.macro state_store5
+	state_store STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
+.endm
+
+.macro state_store6
+	state_store STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
+.endm
+
+.macro state_store7
+	state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
+.endm
+
+/*
+ * void crypto_aegis128l_aesni_init(void *state, const void *key, const void *iv);
+ */
+ENTRY(crypto_aegis128l_aesni_init)
+	FRAME_BEGIN
+
+	/* load key: */
+	movdqa (%rsi), MSG1
+	movdqa MSG1, STATE0
+	movdqa MSG1, STATE4
+	movdqa MSG1, STATE5
+	movdqa MSG1, STATE6
+	movdqa MSG1, STATE7
+
+	/* load IV: */
+	movdqu (%rdx), MSG0
+	pxor MSG0, STATE0
+	pxor MSG0, STATE4
+
+	/* load the constants: */
+	movdqa .Laegis128l_const_0, STATE2
+	movdqa .Laegis128l_const_1, STATE1
+	movdqa STATE1, STATE3
+	pxor STATE2, STATE5
+	pxor STATE1, STATE6
+	pxor STATE2, STATE7
+
+	/* update 10 times with IV and KEY: */
+	update0
+	update1
+	update2
+	update3
+	update4
+	update5
+	update6
+	update7
+	update0
+	update1
+
+	state_store1
+
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis128l_aesni_init)
+
+.macro ad_block a i
+	movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
+	movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
+	update\i
+	sub $0x20, LEN
+	cmp $0x20, LEN
+	jl .Lad_out_\i
+.endm
+
+/*
+ * void crypto_aegis128l_aesni_ad(void *state, unsigned int length,
+ *                                const void *data);
+ */
+ENTRY(crypto_aegis128l_aesni_ad)
+	FRAME_BEGIN
+
+	cmp $0x20, LEN
+	jb .Lad_out
+
+	state_load
+
+	mov  SRC, %r8
+	and $0xf, %r8
+	jnz .Lad_u_loop
+
+.align 8
+.Lad_a_loop:
+	ad_block a 0
+	ad_block a 1
+	ad_block a 2
+	ad_block a 3
+	ad_block a 4
+	ad_block a 5
+	ad_block a 6
+	ad_block a 7
+
+	add $0x100, SRC
+	jmp .Lad_a_loop
+
+.align 8
+.Lad_u_loop:
+	ad_block u 0
+	ad_block u 1
+	ad_block u 2
+	ad_block u 3
+	ad_block u 4
+	ad_block u 5
+	ad_block u 6
+	ad_block u 7
+
+	add $0x100, SRC
+	jmp .Lad_u_loop
+
+.Lad_out_0:
+	state_store0
+	FRAME_END
+	ret
+
+.Lad_out_1:
+	state_store1
+	FRAME_END
+	ret
+
+.Lad_out_2:
+	state_store2
+	FRAME_END
+	ret
+
+.Lad_out_3:
+	state_store3
+	FRAME_END
+	ret
+
+.Lad_out_4:
+	state_store4
+	FRAME_END
+	ret
+
+.Lad_out_5:
+	state_store5
+	FRAME_END
+	ret
+
+.Lad_out_6:
+	state_store6
+	FRAME_END
+	ret
+
+.Lad_out_7:
+	state_store7
+	FRAME_END
+	ret
+
+.Lad_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis128l_aesni_ad)
+
+.macro crypt m0 m1 s0 s1 s2 s3 s4 s5 s6 s7
+	pxor \s1, \m0
+	pxor \s6, \m0
+	movdqa \s2, T3
+	pand \s3, T3
+	pxor T3, \m0
+
+	pxor \s2, \m1
+	pxor \s5, \m1
+	movdqa \s6, T3
+	pand \s7, T3
+	pxor T3, \m1
+.endm
+
+.macro crypt0 m0 m1
+	crypt \m0 \m1 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
+.endm
+
+.macro crypt1 m0 m1
+	crypt \m0 \m1 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
+.endm
+
+.macro crypt2 m0 m1
+	crypt \m0 \m1 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
+.endm
+
+.macro crypt3 m0 m1
+	crypt \m0 \m1 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
+.endm
+
+.macro crypt4 m0 m1
+	crypt \m0 \m1 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
+.endm
+
+.macro crypt5 m0 m1
+	crypt \m0 \m1 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
+.endm
+
+.macro crypt6 m0 m1
+	crypt \m0 \m1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
+.endm
+
+.macro crypt7 m0 m1
+	crypt \m0 \m1 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
+.endm
+
+.macro encrypt_block a i
+	movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
+	movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
+	movdqa MSG0, T0
+	movdqa MSG1, T1
+	crypt\i T0, T1
+	movdq\a T0, (\i * 0x20 + 0x00)(DST)
+	movdq\a T1, (\i * 0x20 + 0x10)(DST)
+
+	update\i
+
+	sub $0x20, LEN
+	cmp $0x20, LEN
+	jl .Lenc_out_\i
+.endm
+
+.macro decrypt_block a i
+	movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
+	movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
+	crypt\i MSG0, MSG1
+	movdq\a MSG0, (\i * 0x20 + 0x00)(DST)
+	movdq\a MSG1, (\i * 0x20 + 0x10)(DST)
+
+	update\i
+
+	sub $0x20, LEN
+	cmp $0x20, LEN
+	jl .Ldec_out_\i
+.endm
+
+/*
+ * void crypto_aegis128l_aesni_enc(void *state, unsigned int length,
+ *                                 const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128l_aesni_enc)
+	FRAME_BEGIN
+
+	cmp $0x20, LEN
+	jb .Lenc_out
+
+	state_load
+
+	mov  SRC, %r8
+	or   DST, %r8
+	and $0xf, %r8
+	jnz .Lenc_u_loop
+
+.align 8
+.Lenc_a_loop:
+	encrypt_block a 0
+	encrypt_block a 1
+	encrypt_block a 2
+	encrypt_block a 3
+	encrypt_block a 4
+	encrypt_block a 5
+	encrypt_block a 6
+	encrypt_block a 7
+
+	add $0x100, SRC
+	add $0x100, DST
+	jmp .Lenc_a_loop
+
+.align 8
+.Lenc_u_loop:
+	encrypt_block u 0
+	encrypt_block u 1
+	encrypt_block u 2
+	encrypt_block u 3
+	encrypt_block u 4
+	encrypt_block u 5
+	encrypt_block u 6
+	encrypt_block u 7
+
+	add $0x100, SRC
+	add $0x100, DST
+	jmp .Lenc_u_loop
+
+.Lenc_out_0:
+	state_store0
+	FRAME_END
+	ret
+
+.Lenc_out_1:
+	state_store1
+	FRAME_END
+	ret
+
+.Lenc_out_2:
+	state_store2
+	FRAME_END
+	ret
+
+.Lenc_out_3:
+	state_store3
+	FRAME_END
+	ret
+
+.Lenc_out_4:
+	state_store4
+	FRAME_END
+	ret
+
+.Lenc_out_5:
+	state_store5
+	FRAME_END
+	ret
+
+.Lenc_out_6:
+	state_store6
+	FRAME_END
+	ret
+
+.Lenc_out_7:
+	state_store7
+	FRAME_END
+	ret
+
+.Lenc_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis128l_aesni_enc)
+
+/*
+ * void crypto_aegis128l_aesni_enc_tail(void *state, unsigned int length,
+ *                                      const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128l_aesni_enc_tail)
+	FRAME_BEGIN
+
+	state_load
+
+	/* encrypt message: */
+	call __load_partial
+
+	movdqa MSG0, T0
+	movdqa MSG1, T1
+	crypt0 T0, T1
+
+	call __store_partial
+
+	update0
+
+	state_store0
+
+	FRAME_END
+ENDPROC(crypto_aegis128l_aesni_enc_tail)
+
+/*
+ * void crypto_aegis128l_aesni_dec(void *state, unsigned int length,
+ *                                 const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128l_aesni_dec)
+	FRAME_BEGIN
+
+	cmp $0x20, LEN
+	jb .Ldec_out
+
+	state_load
+
+	mov  SRC, %r8
+	or   DST, %r8
+	and $0xF, %r8
+	jnz .Ldec_u_loop
+
+.align 8
+.Ldec_a_loop:
+	decrypt_block a 0
+	decrypt_block a 1
+	decrypt_block a 2
+	decrypt_block a 3
+	decrypt_block a 4
+	decrypt_block a 5
+	decrypt_block a 6
+	decrypt_block a 7
+
+	add $0x100, SRC
+	add $0x100, DST
+	jmp .Ldec_a_loop
+
+.align 8
+.Ldec_u_loop:
+	decrypt_block u 0
+	decrypt_block u 1
+	decrypt_block u 2
+	decrypt_block u 3
+	decrypt_block u 4
+	decrypt_block u 5
+	decrypt_block u 6
+	decrypt_block u 7
+
+	add $0x100, SRC
+	add $0x100, DST
+	jmp .Ldec_u_loop
+
+.Ldec_out_0:
+	state_store0
+	FRAME_END
+	ret
+
+.Ldec_out_1:
+	state_store1
+	FRAME_END
+	ret
+
+.Ldec_out_2:
+	state_store2
+	FRAME_END
+	ret
+
+.Ldec_out_3:
+	state_store3
+	FRAME_END
+	ret
+
+.Ldec_out_4:
+	state_store4
+	FRAME_END
+	ret
+
+.Ldec_out_5:
+	state_store5
+	FRAME_END
+	ret
+
+.Ldec_out_6:
+	state_store6
+	FRAME_END
+	ret
+
+.Ldec_out_7:
+	state_store7
+	FRAME_END
+	ret
+
+.Ldec_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis128l_aesni_dec)
+
+/*
+ * void crypto_aegis128l_aesni_dec_tail(void *state, unsigned int length,
+ *                                      const void *src, void *dst);
+ */
+ENTRY(crypto_aegis128l_aesni_dec_tail)
+	FRAME_BEGIN
+
+	state_load
+
+	/* decrypt message: */
+	call __load_partial
+
+	crypt0 MSG0, MSG1
+
+	movdqa MSG0, T0
+	movdqa MSG1, T1
+	call __store_partial
+
+	/* mask with byte count: */
+	movq LEN, T0
+	punpcklbw T0, T0
+	punpcklbw T0, T0
+	punpcklbw T0, T0
+	punpcklbw T0, T0
+	movdqa T0, T1
+	movdqa .Laegis128l_counter0, T2
+	movdqa .Laegis128l_counter1, T3
+	pcmpgtb T2, T0
+	pcmpgtb T3, T1
+	pand T0, MSG0
+	pand T1, MSG1
+
+	update0
+
+	state_store0
+
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis128l_aesni_dec_tail)
+
+/*
+ * void crypto_aegis128l_aesni_final(void *state, void *tag_xor,
+ *                                   u64 assoclen, u64 cryptlen);
+ */
+ENTRY(crypto_aegis128l_aesni_final)
+	FRAME_BEGIN
+
+	state_load
+
+	/* prepare length block: */
+	movq %rdx, MSG0
+	movq %rcx, T0
+	pslldq $8, T0
+	pxor T0, MSG0
+	psllq $3, MSG0 /* multiply by 8 (to get bit count) */
+
+	pxor STATE2, MSG0
+	movdqa MSG0, MSG1
+
+	/* update state: */
+	update0
+	update1
+	update2
+	update3
+	update4
+	update5
+	update6
+
+	/* xor tag: */
+	movdqu (%rsi), T0
+
+	pxor STATE1, T0
+	pxor STATE2, T0
+	pxor STATE3, T0
+	pxor STATE4, T0
+	pxor STATE5, T0
+	pxor STATE6, T0
+	pxor STATE7, T0
+
+	movdqu T0, (%rsi)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis128l_aesni_final)
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
new file mode 100644
index 0000000..876e486
--- /dev/null
+++ b/arch/x86/crypto/aegis128l-aesni-glue.c
@@ -0,0 +1,407 @@
+/*
+ * The AEGIS-128L Authenticated-Encryption Algorithm
+ *   Glue for AES-NI + SSE2 implementation
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/cryptd.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/cpu_device_id.h>
+
+#define AEGIS128L_BLOCK_ALIGN 16
+#define AEGIS128L_BLOCK_SIZE 32
+#define AEGIS128L_NONCE_SIZE 16
+#define AEGIS128L_STATE_BLOCKS 8
+#define AEGIS128L_KEY_SIZE 16
+#define AEGIS128L_MIN_AUTH_SIZE 8
+#define AEGIS128L_MAX_AUTH_SIZE 16
+
+asmlinkage void crypto_aegis128l_aesni_init(void *state, void *key, void *iv);
+
+asmlinkage void crypto_aegis128l_aesni_ad(
+		void *state, unsigned int length, const void *data);
+
+asmlinkage void crypto_aegis128l_aesni_enc(
+		void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128l_aesni_dec(
+		void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128l_aesni_enc_tail(
+		void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128l_aesni_dec_tail(
+		void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis128l_aesni_final(
+		void *state, void *tag_xor, unsigned int cryptlen,
+		unsigned int assoclen);
+
+struct aegis_block {
+	u8 bytes[AEGIS128L_BLOCK_SIZE] __aligned(AEGIS128L_BLOCK_ALIGN);
+};
+
+struct aegis_state {
+	struct aegis_block blocks[AEGIS128L_STATE_BLOCKS];
+};
+
+struct aegis_ctx {
+	struct aegis_block key;
+};
+
+struct aegis_crypt_ops {
+	int (*skcipher_walk_init)(struct skcipher_walk *walk,
+				  struct aead_request *req, bool atomic);
+
+	void (*crypt_blocks)(void *state, unsigned int length, const void *src,
+			     void *dst);
+	void (*crypt_tail)(void *state, unsigned int length, const void *src,
+			   void *dst);
+};
+
+static void crypto_aegis128l_aesni_process_ad(
+		struct aegis_state *state, struct scatterlist *sg_src,
+		unsigned int assoclen)
+{
+	struct scatter_walk walk;
+	struct aegis_block buf;
+	unsigned int pos = 0;
+
+	scatterwalk_start(&walk, sg_src);
+	while (assoclen != 0) {
+		unsigned int size = scatterwalk_clamp(&walk, assoclen);
+		unsigned int left = size;
+		void *mapped = scatterwalk_map(&walk);
+		const u8 *src = (const u8 *)mapped;
+
+		if (pos + size >= AEGIS128L_BLOCK_SIZE) {
+			if (pos > 0) {
+				unsigned int fill = AEGIS128L_BLOCK_SIZE - pos;
+				memcpy(buf.bytes + pos, src, fill);
+				crypto_aegis128l_aesni_ad(state,
+							  AEGIS128L_BLOCK_SIZE,
+							  buf.bytes);
+				pos = 0;
+				left -= fill;
+				src += fill;
+			}
+
+			crypto_aegis128l_aesni_ad(state, left, src);
+
+			src += left & ~(AEGIS128L_BLOCK_SIZE - 1);
+			left &= AEGIS128L_BLOCK_SIZE - 1;
+		}
+
+		memcpy(buf.bytes + pos, src, left);
+		pos += left;
+		assoclen -= size;
+
+		scatterwalk_unmap(mapped);
+		scatterwalk_advance(&walk, size);
+		scatterwalk_done(&walk, 0, assoclen);
+	}
+
+	if (pos > 0) {
+		memset(buf.bytes + pos, 0, AEGIS128L_BLOCK_SIZE - pos);
+		crypto_aegis128l_aesni_ad(state, AEGIS128L_BLOCK_SIZE, buf.bytes);
+	}
+}
+
+static void crypto_aegis128l_aesni_process_crypt(
+		struct aegis_state *state, struct aead_request *req,
+		const struct aegis_crypt_ops *ops)
+{
+	struct skcipher_walk walk;
+	u8 *src, *dst;
+	unsigned int chunksize, base;
+
+	ops->skcipher_walk_init(&walk, req, false);
+
+	while (walk.nbytes) {
+		src = walk.src.virt.addr;
+		dst = walk.dst.virt.addr;
+		chunksize = walk.nbytes;
+
+		ops->crypt_blocks(state, chunksize, src, dst);
+
+		base = chunksize & ~(AEGIS128L_BLOCK_SIZE - 1);
+		src += base;
+		dst += base;
+		chunksize &= AEGIS128L_BLOCK_SIZE - 1;
+
+		if (chunksize > 0)
+			ops->crypt_tail(state, chunksize, src, dst);
+
+		skcipher_walk_done(&walk, 0);
+	}
+}
+
+static struct aegis_ctx *crypto_aegis128l_aesni_ctx(struct crypto_aead *aead)
+{
+	u8 *ctx = crypto_aead_ctx(aead);
+	ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
+	return (void *)ctx;
+}
+
+static int crypto_aegis128l_aesni_setkey(struct crypto_aead *aead,
+					 const u8 *key, unsigned int keylen)
+{
+	struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(aead);
+
+	if (keylen != AEGIS128L_KEY_SIZE) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE);
+
+	return 0;
+}
+
+static int crypto_aegis128l_aesni_setauthsize(struct crypto_aead *tfm,
+					      unsigned int authsize)
+{
+	if (authsize > AEGIS128L_MAX_AUTH_SIZE)
+		return -EINVAL;
+	if (authsize < AEGIS128L_MIN_AUTH_SIZE)
+		return -EINVAL;
+	return 0;
+}
+
+static void crypto_aegis128l_aesni_crypt(struct aead_request *req,
+					 struct aegis_block *tag_xor,
+					 unsigned int cryptlen,
+					 const struct aegis_crypt_ops *ops)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm);
+	struct aegis_state state;
+
+	kernel_fpu_begin();
+
+	crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv);
+	crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen);
+	crypto_aegis128l_aesni_process_crypt(&state, req, ops);
+	crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
+
+	kernel_fpu_end();
+}
+
+static int crypto_aegis128l_aesni_encrypt(struct aead_request *req)
+{
+	static const struct aegis_crypt_ops OPS = {
+		.skcipher_walk_init = skcipher_walk_aead_encrypt,
+		.crypt_blocks = crypto_aegis128l_aesni_enc,
+		.crypt_tail = crypto_aegis128l_aesni_enc_tail,
+	};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aegis_block tag = {};
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen;
+
+	crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
+
+	scatterwalk_map_and_copy(tag.bytes, req->dst,
+				 req->assoclen + cryptlen, authsize, 1);
+	return 0;
+}
+
+static int crypto_aegis128l_aesni_decrypt(struct aead_request *req)
+{
+	static const struct aegis_block zeros = {};
+
+	static const struct aegis_crypt_ops OPS = {
+		.skcipher_walk_init = skcipher_walk_aead_decrypt,
+		.crypt_blocks = crypto_aegis128l_aesni_dec,
+		.crypt_tail = crypto_aegis128l_aesni_dec_tail,
+	};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aegis_block tag;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen - authsize;
+
+	scatterwalk_map_and_copy(tag.bytes, req->src,
+				 req->assoclen + cryptlen, authsize, 0);
+
+	crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
+
+	return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
+}
+
+static int crypto_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
+{
+	return 0;
+}
+
+static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
+{
+}
+
+static int cryptd_aegis128l_aesni_setkey(struct crypto_aead *aead,
+					 const u8 *key, unsigned int keylen)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
+}
+
+static int cryptd_aegis128l_aesni_setauthsize(struct crypto_aead *aead,
+					      unsigned int authsize)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
+}
+
+static int cryptd_aegis128l_aesni_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	aead = &cryptd_tfm->base;
+	if (irq_fpu_usable() && (!in_atomic() ||
+				 !cryptd_aead_queued(cryptd_tfm)))
+		aead = cryptd_aead_child(cryptd_tfm);
+
+	aead_request_set_tfm(req, aead);
+
+	return crypto_aead_encrypt(req);
+}
+
+static int cryptd_aegis128l_aesni_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	aead = &cryptd_tfm->base;
+	if (irq_fpu_usable() && (!in_atomic() ||
+				 !cryptd_aead_queued(cryptd_tfm)))
+		aead = cryptd_aead_child(cryptd_tfm);
+
+	aead_request_set_tfm(req, aead);
+
+	return crypto_aead_decrypt(req);
+}
+
+static int cryptd_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
+{
+	struct cryptd_aead *cryptd_tfm;
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_tfm = cryptd_alloc_aead("__aegis128l-aesni", CRYPTO_ALG_INTERNAL,
+				       CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+
+	*ctx = cryptd_tfm;
+	crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+	return 0;
+}
+
+static void cryptd_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_free_aead(*ctx);
+}
+
+static struct aead_alg crypto_aegis128l_aesni_alg[] = {
+	{
+		.setkey = crypto_aegis128l_aesni_setkey,
+		.setauthsize = crypto_aegis128l_aesni_setauthsize,
+		.encrypt = crypto_aegis128l_aesni_encrypt,
+		.decrypt = crypto_aegis128l_aesni_decrypt,
+		.init = crypto_aegis128l_aesni_init_tfm,
+		.exit = crypto_aegis128l_aesni_exit_tfm,
+
+		.ivsize = AEGIS128L_NONCE_SIZE,
+		.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
+		.chunksize = AEGIS128L_BLOCK_SIZE,
+
+		.base = {
+			.cra_flags = CRYPTO_ALG_INTERNAL,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct aegis_ctx) +
+				__alignof__(struct aegis_ctx),
+			.cra_alignmask = 0,
+
+			.cra_name = "__aegis128l",
+			.cra_driver_name = "__aegis128l-aesni",
+
+			.cra_module = THIS_MODULE,
+		}
+	}, {
+		.setkey = cryptd_aegis128l_aesni_setkey,
+		.setauthsize = cryptd_aegis128l_aesni_setauthsize,
+		.encrypt = cryptd_aegis128l_aesni_encrypt,
+		.decrypt = cryptd_aegis128l_aesni_decrypt,
+		.init = cryptd_aegis128l_aesni_init_tfm,
+		.exit = cryptd_aegis128l_aesni_exit_tfm,
+
+		.ivsize = AEGIS128L_NONCE_SIZE,
+		.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
+		.chunksize = AEGIS128L_BLOCK_SIZE,
+
+		.base = {
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct cryptd_aead *),
+			.cra_alignmask = 0,
+
+			.cra_priority = 400,
+
+			.cra_name = "aegis128l",
+			.cra_driver_name = "aegis128l-aesni",
+
+			.cra_module = THIS_MODULE,
+		}
+	}
+};
+
+static const struct x86_cpu_id aesni_cpu_id[] = {
+	X86_FEATURE_MATCH(X86_FEATURE_AES),
+	X86_FEATURE_MATCH(X86_FEATURE_XMM2),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
+
+static int __init crypto_aegis128l_aesni_module_init(void)
+{
+	if (!x86_match_cpu(aesni_cpu_id))
+		return -ENODEV;
+
+	return crypto_register_aeads(crypto_aegis128l_aesni_alg,
+				     ARRAY_SIZE(crypto_aegis128l_aesni_alg));
+}
+
+static void __exit crypto_aegis128l_aesni_module_exit(void)
+{
+	crypto_unregister_aeads(crypto_aegis128l_aesni_alg,
+				ARRAY_SIZE(crypto_aegis128l_aesni_alg));
+}
+
+module_init(crypto_aegis128l_aesni_module_init);
+module_exit(crypto_aegis128l_aesni_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm -- AESNI+SSE2 implementation");
+MODULE_ALIAS_CRYPTO("aegis128l");
+MODULE_ALIAS_CRYPTO("aegis128l-aesni");
diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S
new file mode 100644
index 0000000..1d977d5
--- /dev/null
+++ b/arch/x86/crypto/aegis256-aesni-asm.S
@@ -0,0 +1,702 @@
+/*
+ * AES-NI + SSE2 implementation of AEGIS-128L
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define STATE0	%xmm0
+#define STATE1	%xmm1
+#define STATE2	%xmm2
+#define STATE3	%xmm3
+#define STATE4	%xmm4
+#define STATE5	%xmm5
+#define MSG	%xmm6
+#define T0	%xmm7
+#define T1	%xmm8
+#define T2	%xmm9
+#define T3	%xmm10
+
+#define STATEP	%rdi
+#define LEN	%rsi
+#define SRC	%rdx
+#define DST	%rcx
+
+.section .rodata.cst16.aegis256_const, "aM", @progbits, 32
+.align 16
+.Laegis256_const_0:
+	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+.Laegis256_const_1:
+	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst16.aegis256_counter, "aM", @progbits, 16
+.align 16
+.Laegis256_counter:
+	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+
+.text
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ *   LEN - bytes
+ *   SRC - src
+ * output:
+ *   MSG  - message block
+ * changed:
+ *   T0
+ *   %r8
+ *   %r9
+ */
+__load_partial:
+	xor %r9, %r9
+	pxor MSG, MSG
+
+	mov LEN, %r8
+	and $0x1, %r8
+	jz .Lld_partial_1
+
+	mov LEN, %r8
+	and $0x1E, %r8
+	add SRC, %r8
+	mov (%r8), %r9b
+
+.Lld_partial_1:
+	mov LEN, %r8
+	and $0x2, %r8
+	jz .Lld_partial_2
+
+	mov LEN, %r8
+	and $0x1C, %r8
+	add SRC, %r8
+	shl $0x10, %r9
+	mov (%r8), %r9w
+
+.Lld_partial_2:
+	mov LEN, %r8
+	and $0x4, %r8
+	jz .Lld_partial_4
+
+	mov LEN, %r8
+	and $0x18, %r8
+	add SRC, %r8
+	shl $32, %r9
+	mov (%r8), %r8d
+	xor %r8, %r9
+
+.Lld_partial_4:
+	movq %r9, MSG
+
+	mov LEN, %r8
+	and $0x8, %r8
+	jz .Lld_partial_8
+
+	mov LEN, %r8
+	and $0x10, %r8
+	add SRC, %r8
+	pslldq $8, MSG
+	movq (%r8), T0
+	pxor T0, MSG
+
+.Lld_partial_8:
+	ret
+ENDPROC(__load_partial)
+
+/*
+ * __store_partial: internal ABI
+ * input:
+ *   LEN - bytes
+ *   DST - dst
+ * output:
+ *   T0   - message block
+ * changed:
+ *   %r8
+ *   %r9
+ *   %r10
+ */
+__store_partial:
+	mov LEN, %r8
+	mov DST, %r9
+
+	movq T0, %r10
+
+	cmp $8, %r8
+	jl .Lst_partial_8
+
+	mov %r10, (%r9)
+	psrldq $8, T0
+	movq T0, %r10
+
+	sub $8, %r8
+	add $8, %r9
+
+.Lst_partial_8:
+	cmp $4, %r8
+	jl .Lst_partial_4
+
+	mov %r10d, (%r9)
+	shr $32, %r10
+
+	sub $4, %r8
+	add $4, %r9
+
+.Lst_partial_4:
+	cmp $2, %r8
+	jl .Lst_partial_2
+
+	mov %r10w, (%r9)
+	shr $0x10, %r10
+
+	sub $2, %r8
+	add $2, %r9
+
+.Lst_partial_2:
+	cmp $1, %r8
+	jl .Lst_partial_1
+
+	mov %r10b, (%r9)
+
+.Lst_partial_1:
+	ret
+ENDPROC(__store_partial)
+
+.macro update
+	movdqa STATE5, T0
+	aesenc STATE0, STATE5
+	aesenc STATE1, STATE0
+	aesenc STATE2, STATE1
+	aesenc STATE3, STATE2
+	aesenc STATE4, STATE3
+	aesenc T0,     STATE4
+.endm
+
+.macro update0 m
+	update
+	pxor \m, STATE5
+.endm
+
+.macro update1 m
+	update
+	pxor \m, STATE4
+.endm
+
+.macro update2 m
+	update
+	pxor \m, STATE3
+.endm
+
+.macro update3 m
+	update
+	pxor \m, STATE2
+.endm
+
+.macro update4 m
+	update
+	pxor \m, STATE1
+.endm
+
+.macro update5 m
+	update
+	pxor \m, STATE0
+.endm
+
+.macro state_load
+	movdqu 0x00(STATEP), STATE0
+	movdqu 0x10(STATEP), STATE1
+	movdqu 0x20(STATEP), STATE2
+	movdqu 0x30(STATEP), STATE3
+	movdqu 0x40(STATEP), STATE4
+	movdqu 0x50(STATEP), STATE5
+.endm
+
+.macro state_store s0 s1 s2 s3 s4 s5
+	movdqu \s5, 0x00(STATEP)
+	movdqu \s0, 0x10(STATEP)
+	movdqu \s1, 0x20(STATEP)
+	movdqu \s2, 0x30(STATEP)
+	movdqu \s3, 0x40(STATEP)
+	movdqu \s4, 0x50(STATEP)
+.endm
+
+.macro state_store0
+	state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
+.endm
+
+.macro state_store1
+	state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
+.endm
+
+.macro state_store2
+	state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
+.endm
+
+.macro state_store3
+	state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
+.endm
+
+.macro state_store4
+	state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
+.endm
+
+.macro state_store5
+	state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
+.endm
+
+/*
+ * void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv);
+ */
+ENTRY(crypto_aegis256_aesni_init)
+	FRAME_BEGIN
+
+	/* load key: */
+	movdqa 0x00(%rsi), MSG
+	movdqa 0x10(%rsi), T1
+	movdqa MSG, STATE4
+	movdqa T1, STATE5
+
+	/* load IV: */
+	movdqu 0x00(%rdx), T2
+	movdqu 0x10(%rdx), T3
+	pxor MSG, T2
+	pxor T1, T3
+	movdqa T2, STATE0
+	movdqa T3, STATE1
+
+	/* load the constants: */
+	movdqa .Laegis256_const_0, STATE3
+	movdqa .Laegis256_const_1, STATE2
+	pxor STATE3, STATE4
+	pxor STATE2, STATE5
+
+	/* update 10 times with IV and KEY: */
+	update0 MSG
+	update1 T1
+	update2 T2
+	update3 T3
+	update4 MSG
+	update5 T1
+	update0 T2
+	update1 T3
+	update2 MSG
+	update3 T1
+	update4 T2
+	update5 T3
+	update0 MSG
+	update1 T1
+	update2 T2
+	update3 T3
+
+	state_store3
+
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis256_aesni_init)
+
+.macro ad_block a i
+	movdq\a (\i * 0x10)(SRC), MSG
+	update\i MSG
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lad_out_\i
+.endm
+
+/*
+ * void crypto_aegis256_aesni_ad(void *state, unsigned int length,
+ *                               const void *data);
+ */
+ENTRY(crypto_aegis256_aesni_ad)
+	FRAME_BEGIN
+
+	cmp $0x10, LEN
+	jb .Lad_out
+
+	state_load
+
+	mov  SRC, %r8
+	and $0xf, %r8
+	jnz .Lad_u_loop
+
+.align 8
+.Lad_a_loop:
+	ad_block a 0
+	ad_block a 1
+	ad_block a 2
+	ad_block a 3
+	ad_block a 4
+	ad_block a 5
+
+	add $0x60, SRC
+	jmp .Lad_a_loop
+
+.align 8
+.Lad_u_loop:
+	ad_block u 0
+	ad_block u 1
+	ad_block u 2
+	ad_block u 3
+	ad_block u 4
+	ad_block u 5
+
+	add $0x60, SRC
+	jmp .Lad_u_loop
+
+.Lad_out_0:
+	state_store0
+	FRAME_END
+	ret
+
+.Lad_out_1:
+	state_store1
+	FRAME_END
+	ret
+
+.Lad_out_2:
+	state_store2
+	FRAME_END
+	ret
+
+.Lad_out_3:
+	state_store3
+	FRAME_END
+	ret
+
+.Lad_out_4:
+	state_store4
+	FRAME_END
+	ret
+
+.Lad_out_5:
+	state_store5
+	FRAME_END
+	ret
+
+.Lad_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis256_aesni_ad)
+
+.macro crypt m s0 s1 s2 s3 s4 s5
+	pxor \s1, \m
+	pxor \s4, \m
+	pxor \s5, \m
+	movdqa \s2, T3
+	pand \s3, T3
+	pxor T3, \m
+.endm
+
+.macro crypt0 m
+	crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
+.endm
+
+.macro crypt1 m
+	crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
+.endm
+
+.macro crypt2 m
+	crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
+.endm
+
+.macro crypt3 m
+	crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
+.endm
+
+.macro crypt4 m
+	crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
+.endm
+
+.macro crypt5 m
+	crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
+.endm
+
+.macro encrypt_block a i
+	movdq\a (\i * 0x10)(SRC), MSG
+	movdqa MSG, T0
+	crypt\i T0
+	movdq\a T0, (\i * 0x10)(DST)
+
+	update\i MSG
+
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Lenc_out_\i
+.endm
+
+.macro decrypt_block a i
+	movdq\a (\i * 0x10)(SRC), MSG
+	crypt\i MSG
+	movdq\a MSG, (\i * 0x10)(DST)
+
+	update\i MSG
+
+	sub $0x10, LEN
+	cmp $0x10, LEN
+	jl .Ldec_out_\i
+.endm
+
+/*
+ * void crypto_aegis256_aesni_enc(void *state, unsigned int length,
+ *                                const void *src, void *dst);
+ */
+ENTRY(crypto_aegis256_aesni_enc)
+	FRAME_BEGIN
+
+	cmp $0x10, LEN
+	jb .Lenc_out
+
+	state_load
+
+	mov  SRC, %r8
+	or   DST, %r8
+	and $0xf, %r8
+	jnz .Lenc_u_loop
+
+.align 8
+.Lenc_a_loop:
+	encrypt_block a 0
+	encrypt_block a 1
+	encrypt_block a 2
+	encrypt_block a 3
+	encrypt_block a 4
+	encrypt_block a 5
+
+	add $0x60, SRC
+	add $0x60, DST
+	jmp .Lenc_a_loop
+
+.align 8
+.Lenc_u_loop:
+	encrypt_block u 0
+	encrypt_block u 1
+	encrypt_block u 2
+	encrypt_block u 3
+	encrypt_block u 4
+	encrypt_block u 5
+
+	add $0x60, SRC
+	add $0x60, DST
+	jmp .Lenc_u_loop
+
+.Lenc_out_0:
+	state_store0
+	FRAME_END
+	ret
+
+.Lenc_out_1:
+	state_store1
+	FRAME_END
+	ret
+
+.Lenc_out_2:
+	state_store2
+	FRAME_END
+	ret
+
+.Lenc_out_3:
+	state_store3
+	FRAME_END
+	ret
+
+.Lenc_out_4:
+	state_store4
+	FRAME_END
+	ret
+
+.Lenc_out_5:
+	state_store5
+	FRAME_END
+	ret
+
+.Lenc_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis256_aesni_enc)
+
+/*
+ * void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length,
+ *                                     const void *src, void *dst);
+ */
+ENTRY(crypto_aegis256_aesni_enc_tail)
+	FRAME_BEGIN
+
+	state_load
+
+	/* encrypt message: */
+	call __load_partial
+
+	movdqa MSG, T0
+	crypt0 T0
+
+	call __store_partial
+
+	update0 MSG
+
+	state_store0
+
+	FRAME_END
+ENDPROC(crypto_aegis256_aesni_enc_tail)
+
+/*
+ * void crypto_aegis256_aesni_dec(void *state, unsigned int length,
+ *                                const void *src, void *dst);
+ */
+ENTRY(crypto_aegis256_aesni_dec)
+	FRAME_BEGIN
+
+	cmp $0x10, LEN
+	jb .Ldec_out
+
+	state_load
+
+	mov  SRC, %r8
+	or   DST, %r8
+	and $0xF, %r8
+	jnz .Ldec_u_loop
+
+.align 8
+.Ldec_a_loop:
+	decrypt_block a 0
+	decrypt_block a 1
+	decrypt_block a 2
+	decrypt_block a 3
+	decrypt_block a 4
+	decrypt_block a 5
+
+	add $0x60, SRC
+	add $0x60, DST
+	jmp .Ldec_a_loop
+
+.align 8
+.Ldec_u_loop:
+	decrypt_block u 0
+	decrypt_block u 1
+	decrypt_block u 2
+	decrypt_block u 3
+	decrypt_block u 4
+	decrypt_block u 5
+
+	add $0x60, SRC
+	add $0x60, DST
+	jmp .Ldec_u_loop
+
+.Ldec_out_0:
+	state_store0
+	FRAME_END
+	ret
+
+.Ldec_out_1:
+	state_store1
+	FRAME_END
+	ret
+
+.Ldec_out_2:
+	state_store2
+	FRAME_END
+	ret
+
+.Ldec_out_3:
+	state_store3
+	FRAME_END
+	ret
+
+.Ldec_out_4:
+	state_store4
+	FRAME_END
+	ret
+
+.Ldec_out_5:
+	state_store5
+	FRAME_END
+	ret
+
+.Ldec_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis256_aesni_dec)
+
+/*
+ * void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length,
+ *                                     const void *src, void *dst);
+ */
+ENTRY(crypto_aegis256_aesni_dec_tail)
+	FRAME_BEGIN
+
+	state_load
+
+	/* decrypt message: */
+	call __load_partial
+
+	crypt0 MSG
+
+	movdqa MSG, T0
+	call __store_partial
+
+	/* mask with byte count: */
+	movq LEN, T0
+	punpcklbw T0, T0
+	punpcklbw T0, T0
+	punpcklbw T0, T0
+	punpcklbw T0, T0
+	movdqa .Laegis256_counter, T1
+	pcmpgtb T1, T0
+	pand T0, MSG
+
+	update0 MSG
+
+	state_store0
+
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis256_aesni_dec_tail)
+
+/*
+ * void crypto_aegis256_aesni_final(void *state, void *tag_xor,
+ *                                  u64 assoclen, u64 cryptlen);
+ */
+ENTRY(crypto_aegis256_aesni_final)
+	FRAME_BEGIN
+
+	state_load
+
+	/* prepare length block: */
+	movq %rdx, MSG
+	movq %rcx, T0
+	pslldq $8, T0
+	pxor T0, MSG
+	psllq $3, MSG /* multiply by 8 (to get bit count) */
+
+	pxor STATE3, MSG
+
+	/* update state: */
+	update0 MSG
+	update1 MSG
+	update2 MSG
+	update3 MSG
+	update4 MSG
+	update5 MSG
+	update0 MSG
+
+	/* xor tag: */
+	movdqu (%rsi), MSG
+
+	pxor STATE0, MSG
+	pxor STATE1, MSG
+	pxor STATE2, MSG
+	pxor STATE3, MSG
+	pxor STATE4, MSG
+	pxor STATE5, MSG
+
+	movdqu MSG, (%rsi)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_aegis256_aesni_final)
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
new file mode 100644
index 0000000..2b5dd3a
--- /dev/null
+++ b/arch/x86/crypto/aegis256-aesni-glue.c
@@ -0,0 +1,407 @@
+/*
+ * The AEGIS-256 Authenticated-Encryption Algorithm
+ *   Glue for AES-NI + SSE2 implementation
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/cryptd.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/cpu_device_id.h>
+
+#define AEGIS256_BLOCK_ALIGN 16
+#define AEGIS256_BLOCK_SIZE 16
+#define AEGIS256_NONCE_SIZE 32
+#define AEGIS256_STATE_BLOCKS 6
+#define AEGIS256_KEY_SIZE 32
+#define AEGIS256_MIN_AUTH_SIZE 8
+#define AEGIS256_MAX_AUTH_SIZE 16
+
+asmlinkage void crypto_aegis256_aesni_init(void *state, void *key, void *iv);
+
+asmlinkage void crypto_aegis256_aesni_ad(
+		void *state, unsigned int length, const void *data);
+
+asmlinkage void crypto_aegis256_aesni_enc(
+		void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis256_aesni_dec(
+		void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis256_aesni_enc_tail(
+		void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis256_aesni_dec_tail(
+		void *state, unsigned int length, const void *src, void *dst);
+
+asmlinkage void crypto_aegis256_aesni_final(
+		void *state, void *tag_xor, unsigned int cryptlen,
+		unsigned int assoclen);
+
+struct aegis_block {
+	u8 bytes[AEGIS256_BLOCK_SIZE] __aligned(AEGIS256_BLOCK_ALIGN);
+};
+
+struct aegis_state {
+	struct aegis_block blocks[AEGIS256_STATE_BLOCKS];
+};
+
+struct aegis_ctx {
+	struct aegis_block key[AEGIS256_KEY_SIZE / AEGIS256_BLOCK_SIZE];
+};
+
+struct aegis_crypt_ops {
+	int (*skcipher_walk_init)(struct skcipher_walk *walk,
+				  struct aead_request *req, bool atomic);
+
+	void (*crypt_blocks)(void *state, unsigned int length, const void *src,
+			     void *dst);
+	void (*crypt_tail)(void *state, unsigned int length, const void *src,
+			   void *dst);
+};
+
+static void crypto_aegis256_aesni_process_ad(
+		struct aegis_state *state, struct scatterlist *sg_src,
+		unsigned int assoclen)
+{
+	struct scatter_walk walk;
+	struct aegis_block buf;
+	unsigned int pos = 0;
+
+	scatterwalk_start(&walk, sg_src);
+	while (assoclen != 0) {
+		unsigned int size = scatterwalk_clamp(&walk, assoclen);
+		unsigned int left = size;
+		void *mapped = scatterwalk_map(&walk);
+		const u8 *src = (const u8 *)mapped;
+
+		if (pos + size >= AEGIS256_BLOCK_SIZE) {
+			if (pos > 0) {
+				unsigned int fill = AEGIS256_BLOCK_SIZE - pos;
+				memcpy(buf.bytes + pos, src, fill);
+				crypto_aegis256_aesni_ad(state,
+							 AEGIS256_BLOCK_SIZE,
+							 buf.bytes);
+				pos = 0;
+				left -= fill;
+				src += fill;
+			}
+
+			crypto_aegis256_aesni_ad(state, left, src);
+
+			src += left & ~(AEGIS256_BLOCK_SIZE - 1);
+			left &= AEGIS256_BLOCK_SIZE - 1;
+		}
+
+		memcpy(buf.bytes + pos, src, left);
+		pos += left;
+		assoclen -= size;
+
+		scatterwalk_unmap(mapped);
+		scatterwalk_advance(&walk, size);
+		scatterwalk_done(&walk, 0, assoclen);
+	}
+
+	if (pos > 0) {
+		memset(buf.bytes + pos, 0, AEGIS256_BLOCK_SIZE - pos);
+		crypto_aegis256_aesni_ad(state, AEGIS256_BLOCK_SIZE, buf.bytes);
+	}
+}
+
+static void crypto_aegis256_aesni_process_crypt(
+		struct aegis_state *state, struct aead_request *req,
+		const struct aegis_crypt_ops *ops)
+{
+	struct skcipher_walk walk;
+	u8 *src, *dst;
+	unsigned int chunksize, base;
+
+	ops->skcipher_walk_init(&walk, req, false);
+
+	while (walk.nbytes) {
+		src = walk.src.virt.addr;
+		dst = walk.dst.virt.addr;
+		chunksize = walk.nbytes;
+
+		ops->crypt_blocks(state, chunksize, src, dst);
+
+		base = chunksize & ~(AEGIS256_BLOCK_SIZE - 1);
+		src += base;
+		dst += base;
+		chunksize &= AEGIS256_BLOCK_SIZE - 1;
+
+		if (chunksize > 0)
+			ops->crypt_tail(state, chunksize, src, dst);
+
+		skcipher_walk_done(&walk, 0);
+	}
+}
+
+static struct aegis_ctx *crypto_aegis256_aesni_ctx(struct crypto_aead *aead)
+{
+	u8 *ctx = crypto_aead_ctx(aead);
+	ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
+	return (void *)ctx;
+}
+
+static int crypto_aegis256_aesni_setkey(struct crypto_aead *aead, const u8 *key,
+					unsigned int keylen)
+{
+	struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(aead);
+
+	if (keylen != AEGIS256_KEY_SIZE) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key, key, AEGIS256_KEY_SIZE);
+
+	return 0;
+}
+
+static int crypto_aegis256_aesni_setauthsize(struct crypto_aead *tfm,
+						unsigned int authsize)
+{
+	if (authsize > AEGIS256_MAX_AUTH_SIZE)
+		return -EINVAL;
+	if (authsize < AEGIS256_MIN_AUTH_SIZE)
+		return -EINVAL;
+	return 0;
+}
+
+static void crypto_aegis256_aesni_crypt(struct aead_request *req,
+					struct aegis_block *tag_xor,
+					unsigned int cryptlen,
+					const struct aegis_crypt_ops *ops)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm);
+	struct aegis_state state;
+
+	kernel_fpu_begin();
+
+	crypto_aegis256_aesni_init(&state, ctx->key, req->iv);
+	crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen);
+	crypto_aegis256_aesni_process_crypt(&state, req, ops);
+	crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
+
+	kernel_fpu_end();
+}
+
+static int crypto_aegis256_aesni_encrypt(struct aead_request *req)
+{
+	static const struct aegis_crypt_ops OPS = {
+		.skcipher_walk_init = skcipher_walk_aead_encrypt,
+		.crypt_blocks = crypto_aegis256_aesni_enc,
+		.crypt_tail = crypto_aegis256_aesni_enc_tail,
+	};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aegis_block tag = {};
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen;
+
+	crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
+
+	scatterwalk_map_and_copy(tag.bytes, req->dst,
+				 req->assoclen + cryptlen, authsize, 1);
+	return 0;
+}
+
+static int crypto_aegis256_aesni_decrypt(struct aead_request *req)
+{
+	static const struct aegis_block zeros = {};
+
+	static const struct aegis_crypt_ops OPS = {
+		.skcipher_walk_init = skcipher_walk_aead_decrypt,
+		.crypt_blocks = crypto_aegis256_aesni_dec,
+		.crypt_tail = crypto_aegis256_aesni_dec_tail,
+	};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aegis_block tag;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen - authsize;
+
+	scatterwalk_map_and_copy(tag.bytes, req->src,
+				 req->assoclen + cryptlen, authsize, 0);
+
+	crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
+
+	return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
+}
+
+static int crypto_aegis256_aesni_init_tfm(struct crypto_aead *aead)
+{
+	return 0;
+}
+
+static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
+{
+}
+
+static int cryptd_aegis256_aesni_setkey(struct crypto_aead *aead,
+					const u8 *key, unsigned int keylen)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
+}
+
+static int cryptd_aegis256_aesni_setauthsize(struct crypto_aead *aead,
+					     unsigned int authsize)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
+}
+
+static int cryptd_aegis256_aesni_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	aead = &cryptd_tfm->base;
+	if (irq_fpu_usable() && (!in_atomic() ||
+				 !cryptd_aead_queued(cryptd_tfm)))
+		aead = cryptd_aead_child(cryptd_tfm);
+
+	aead_request_set_tfm(req, aead);
+
+	return crypto_aead_encrypt(req);
+}
+
+static int cryptd_aegis256_aesni_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	aead = &cryptd_tfm->base;
+	if (irq_fpu_usable() && (!in_atomic() ||
+				 !cryptd_aead_queued(cryptd_tfm)))
+		aead = cryptd_aead_child(cryptd_tfm);
+
+	aead_request_set_tfm(req, aead);
+
+	return crypto_aead_decrypt(req);
+}
+
+static int cryptd_aegis256_aesni_init_tfm(struct crypto_aead *aead)
+{
+	struct cryptd_aead *cryptd_tfm;
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_tfm = cryptd_alloc_aead("__aegis256-aesni", CRYPTO_ALG_INTERNAL,
+				       CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+
+	*ctx = cryptd_tfm;
+	crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+	return 0;
+}
+
+static void cryptd_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_free_aead(*ctx);
+}
+
+static struct aead_alg crypto_aegis256_aesni_alg[] = {
+	{
+		.setkey = crypto_aegis256_aesni_setkey,
+		.setauthsize = crypto_aegis256_aesni_setauthsize,
+		.encrypt = crypto_aegis256_aesni_encrypt,
+		.decrypt = crypto_aegis256_aesni_decrypt,
+		.init = crypto_aegis256_aesni_init_tfm,
+		.exit = crypto_aegis256_aesni_exit_tfm,
+
+		.ivsize = AEGIS256_NONCE_SIZE,
+		.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
+		.chunksize = AEGIS256_BLOCK_SIZE,
+
+		.base = {
+			.cra_flags = CRYPTO_ALG_INTERNAL,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct aegis_ctx) +
+				__alignof__(struct aegis_ctx),
+			.cra_alignmask = 0,
+
+			.cra_name = "__aegis256",
+			.cra_driver_name = "__aegis256-aesni",
+
+			.cra_module = THIS_MODULE,
+		}
+	}, {
+		.setkey = cryptd_aegis256_aesni_setkey,
+		.setauthsize = cryptd_aegis256_aesni_setauthsize,
+		.encrypt = cryptd_aegis256_aesni_encrypt,
+		.decrypt = cryptd_aegis256_aesni_decrypt,
+		.init = cryptd_aegis256_aesni_init_tfm,
+		.exit = cryptd_aegis256_aesni_exit_tfm,
+
+		.ivsize = AEGIS256_NONCE_SIZE,
+		.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
+		.chunksize = AEGIS256_BLOCK_SIZE,
+
+		.base = {
+			.cra_flags = CRYPTO_ALG_ASYNC,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct cryptd_aead *),
+			.cra_alignmask = 0,
+
+			.cra_priority = 400,
+
+			.cra_name = "aegis256",
+			.cra_driver_name = "aegis256-aesni",
+
+			.cra_module = THIS_MODULE,
+		}
+	}
+};
+
+static const struct x86_cpu_id aesni_cpu_id[] = {
+	X86_FEATURE_MATCH(X86_FEATURE_AES),
+	X86_FEATURE_MATCH(X86_FEATURE_XMM2),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
+
+static int __init crypto_aegis256_aesni_module_init(void)
+{
+	if (!x86_match_cpu(aesni_cpu_id))
+		return -ENODEV;
+
+	return crypto_register_aeads(crypto_aegis256_aesni_alg,
+				    ARRAY_SIZE(crypto_aegis256_aesni_alg));
+}
+
+static void __exit crypto_aegis256_aesni_module_exit(void)
+{
+	crypto_unregister_aeads(crypto_aegis256_aesni_alg,
+				ARRAY_SIZE(crypto_aegis256_aesni_alg));
+}
+
+module_init(crypto_aegis256_aesni_module_init);
+module_exit(crypto_aegis256_aesni_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm -- AESNI+SSE2 implementation");
+MODULE_ALIAS_CRYPTO("aegis256");
+MODULE_ALIAS_CRYPTO("aegis256-aesni");
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 0420bab..2ddbe3a 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -364,5 +364,5 @@
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
-		   "acclerated by PCLMULQDQ-NI");
+		   "accelerated by PCLMULQDQ-NI");
 MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S
new file mode 100644
index 0000000..37d422e
--- /dev/null
+++ b/arch/x86/crypto/morus1280-avx2-asm.S
@@ -0,0 +1,621 @@
+/*
+ * AVX2 implementation of MORUS-1280
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define SHUFFLE_MASK(i0, i1, i2, i3) \
+	(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
+
+#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
+#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
+#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
+
+#define STATE0		%ymm0
+#define STATE0_LOW	%xmm0
+#define STATE1		%ymm1
+#define STATE2		%ymm2
+#define STATE3		%ymm3
+#define STATE4		%ymm4
+#define KEY		%ymm5
+#define MSG		%ymm5
+#define MSG_LOW		%xmm5
+#define T0		%ymm6
+#define T0_LOW		%xmm6
+#define T1		%ymm7
+
+.section .rodata.cst32.morus1280_const, "aM", @progbits, 32
+.align 32
+.Lmorus1280_const:
+	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst32.morus1280_counter, "aM", @progbits, 32
+.align 32
+.Lmorus1280_counter:
+	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+	.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+	.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
+
+.text
+
+.macro morus1280_round s0, s1, s2, s3, s4, b, w
+	vpand \s1, \s2, T0
+	vpxor T0, \s0, \s0
+	vpxor \s3, \s0, \s0
+	vpsllq $\b, \s0, T0
+	vpsrlq $(64 - \b), \s0, \s0
+	vpxor T0, \s0, \s0
+	vpermq $\w, \s3, \s3
+.endm
+
+/*
+ * __morus1280_update: internal ABI
+ * input:
+ *   STATE[0-4] - input state
+ *   MSG        - message block
+ * output:
+ *   STATE[0-4] - output state
+ * changed:
+ *   T0
+ */
+__morus1280_update:
+	morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
+	vpxor MSG, STATE1, STATE1
+	morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
+	vpxor MSG, STATE2, STATE2
+	morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
+	vpxor MSG, STATE3, STATE3
+	morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2,  7, MASK2
+	vpxor MSG, STATE4, STATE4
+	morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3,  4, MASK1
+	ret
+ENDPROC(__morus1280_update)
+
+/*
+ * __morus1280_update_zero: internal ABI
+ * input:
+ *   STATE[0-4] - input state
+ * output:
+ *   STATE[0-4] - output state
+ * changed:
+ *   T0
+ */
+__morus1280_update_zero:
+	morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
+	morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
+	morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
+	morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2,  7, MASK2
+	morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3,  4, MASK1
+	ret
+ENDPROC(__morus1280_update_zero)
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ *   %rsi - src
+ *   %rcx - bytes
+ * output:
+ *   MSG  - message block
+ * changed:
+ *   %r8
+ *   %r9
+ */
+__load_partial:
+	xor %r9, %r9
+	vpxor MSG, MSG, MSG
+
+	mov %rcx, %r8
+	and $0x1, %r8
+	jz .Lld_partial_1
+
+	mov %rcx, %r8
+	and $0x1E, %r8
+	add %rsi, %r8
+	mov (%r8), %r9b
+
+.Lld_partial_1:
+	mov %rcx, %r8
+	and $0x2, %r8
+	jz .Lld_partial_2
+
+	mov %rcx, %r8
+	and $0x1C, %r8
+	add %rsi, %r8
+	shl $16, %r9
+	mov (%r8), %r9w
+
+.Lld_partial_2:
+	mov %rcx, %r8
+	and $0x4, %r8
+	jz .Lld_partial_4
+
+	mov %rcx, %r8
+	and $0x18, %r8
+	add %rsi, %r8
+	shl $32, %r9
+	mov (%r8), %r8d
+	xor %r8, %r9
+
+.Lld_partial_4:
+	movq %r9, MSG_LOW
+
+	mov %rcx, %r8
+	and $0x8, %r8
+	jz .Lld_partial_8
+
+	mov %rcx, %r8
+	and $0x10, %r8
+	add %rsi, %r8
+	pshufd $MASK2, MSG_LOW, MSG_LOW
+	pinsrq $0, (%r8), MSG_LOW
+
+.Lld_partial_8:
+	mov %rcx, %r8
+	and $0x10, %r8
+	jz .Lld_partial_16
+
+	vpermq $MASK2, MSG, MSG
+	movdqu (%rsi), MSG_LOW
+
+.Lld_partial_16:
+	ret
+ENDPROC(__load_partial)
+
+/*
+ * __store_partial: internal ABI
+ * input:
+ *   %rdx - dst
+ *   %rcx - bytes
+ * output:
+ *   T0   - message block
+ * changed:
+ *   %r8
+ *   %r9
+ *   %r10
+ */
+__store_partial:
+	mov %rcx, %r8
+	mov %rdx, %r9
+
+	cmp $16, %r8
+	jl .Lst_partial_16
+
+	movdqu T0_LOW, (%r9)
+	vpermq $MASK2, T0, T0
+
+	sub $16, %r8
+	add $16, %r9
+
+.Lst_partial_16:
+	movq T0_LOW, %r10
+
+	cmp $8, %r8
+	jl .Lst_partial_8
+
+	mov %r10, (%r9)
+	pextrq $1, T0_LOW, %r10
+
+	sub $8, %r8
+	add $8, %r9
+
+.Lst_partial_8:
+	cmp $4, %r8
+	jl .Lst_partial_4
+
+	mov %r10d, (%r9)
+	shr $32, %r10
+
+	sub $4, %r8
+	add $4, %r9
+
+.Lst_partial_4:
+	cmp $2, %r8
+	jl .Lst_partial_2
+
+	mov %r10w, (%r9)
+	shr $16, %r10
+
+	sub $2, %r8
+	add $2, %r9
+
+.Lst_partial_2:
+	cmp $1, %r8
+	jl .Lst_partial_1
+
+	mov %r10b, (%r9)
+
+.Lst_partial_1:
+	ret
+ENDPROC(__store_partial)
+
+/*
+ * void crypto_morus1280_avx2_init(void *state, const void *key,
+ *                                 const void *iv);
+ */
+ENTRY(crypto_morus1280_avx2_init)
+	FRAME_BEGIN
+
+	/* load IV: */
+	vpxor STATE0, STATE0, STATE0
+	movdqu (%rdx), STATE0_LOW
+	/* load key: */
+	vmovdqu (%rsi), KEY
+	vmovdqa KEY, STATE1
+	/* load all ones: */
+	vpcmpeqd STATE2, STATE2, STATE2
+	/* load all zeros: */
+	vpxor STATE3, STATE3, STATE3
+	/* load the constant: */
+	vmovdqa .Lmorus1280_const, STATE4
+
+	/* update 16 times with zero: */
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+
+	/* xor-in the key again after updates: */
+	vpxor KEY, STATE1, STATE1
+
+	/* store the state: */
+	vmovdqu STATE0, (0 * 32)(%rdi)
+	vmovdqu STATE1, (1 * 32)(%rdi)
+	vmovdqu STATE2, (2 * 32)(%rdi)
+	vmovdqu STATE3, (3 * 32)(%rdi)
+	vmovdqu STATE4, (4 * 32)(%rdi)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_morus1280_avx2_init)
+
+/*
+ * void crypto_morus1280_avx2_ad(void *state, const void *data,
+ *                               unsigned int length);
+ */
+ENTRY(crypto_morus1280_avx2_ad)
+	FRAME_BEGIN
+
+	cmp $32, %rdx
+	jb .Lad_out
+
+	/* load the state: */
+	vmovdqu (0 * 32)(%rdi), STATE0
+	vmovdqu (1 * 32)(%rdi), STATE1
+	vmovdqu (2 * 32)(%rdi), STATE2
+	vmovdqu (3 * 32)(%rdi), STATE3
+	vmovdqu (4 * 32)(%rdi), STATE4
+
+	mov %rsi,  %r8
+	and $0x1F, %r8
+	jnz .Lad_u_loop
+
+.align 4
+.Lad_a_loop:
+	vmovdqa (%rsi), MSG
+	call __morus1280_update
+	sub $32, %rdx
+	add $32, %rsi
+	cmp $32, %rdx
+	jge .Lad_a_loop
+
+	jmp .Lad_cont
+.align 4
+.Lad_u_loop:
+	vmovdqu (%rsi), MSG
+	call __morus1280_update
+	sub $32, %rdx
+	add $32, %rsi
+	cmp $32, %rdx
+	jge .Lad_u_loop
+
+.Lad_cont:
+	/* store the state: */
+	vmovdqu STATE0, (0 * 32)(%rdi)
+	vmovdqu STATE1, (1 * 32)(%rdi)
+	vmovdqu STATE2, (2 * 32)(%rdi)
+	vmovdqu STATE3, (3 * 32)(%rdi)
+	vmovdqu STATE4, (4 * 32)(%rdi)
+
+.Lad_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_morus1280_avx2_ad)
+
+/*
+ * void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst,
+ *                                unsigned int length);
+ */
+ENTRY(crypto_morus1280_avx2_enc)
+	FRAME_BEGIN
+
+	cmp $32, %rcx
+	jb .Lenc_out
+
+	/* load the state: */
+	vmovdqu (0 * 32)(%rdi), STATE0
+	vmovdqu (1 * 32)(%rdi), STATE1
+	vmovdqu (2 * 32)(%rdi), STATE2
+	vmovdqu (3 * 32)(%rdi), STATE3
+	vmovdqu (4 * 32)(%rdi), STATE4
+
+	mov %rsi,  %r8
+	or  %rdx,  %r8
+	and $0x1F, %r8
+	jnz .Lenc_u_loop
+
+.align 4
+.Lenc_a_loop:
+	vmovdqa (%rsi), MSG
+	vmovdqa MSG, T0
+	vpxor STATE0, T0, T0
+	vpermq $MASK3, STATE1, T1
+	vpxor T1, T0, T0
+	vpand STATE2, STATE3, T1
+	vpxor T1, T0, T0
+	vmovdqa T0, (%rdx)
+
+	call __morus1280_update
+	sub $32, %rcx
+	add $32, %rsi
+	add $32, %rdx
+	cmp $32, %rcx
+	jge .Lenc_a_loop
+
+	jmp .Lenc_cont
+.align 4
+.Lenc_u_loop:
+	vmovdqu (%rsi), MSG
+	vmovdqa MSG, T0
+	vpxor STATE0, T0, T0
+	vpermq $MASK3, STATE1, T1
+	vpxor T1, T0, T0
+	vpand STATE2, STATE3, T1
+	vpxor T1, T0, T0
+	vmovdqu T0, (%rdx)
+
+	call __morus1280_update
+	sub $32, %rcx
+	add $32, %rsi
+	add $32, %rdx
+	cmp $32, %rcx
+	jge .Lenc_u_loop
+
+.Lenc_cont:
+	/* store the state: */
+	vmovdqu STATE0, (0 * 32)(%rdi)
+	vmovdqu STATE1, (1 * 32)(%rdi)
+	vmovdqu STATE2, (2 * 32)(%rdi)
+	vmovdqu STATE3, (3 * 32)(%rdi)
+	vmovdqu STATE4, (4 * 32)(%rdi)
+
+.Lenc_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_morus1280_avx2_enc)
+
+/*
+ * void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst,
+ *                                     unsigned int length);
+ */
+ENTRY(crypto_morus1280_avx2_enc_tail)
+	FRAME_BEGIN
+
+	/* load the state: */
+	vmovdqu (0 * 32)(%rdi), STATE0
+	vmovdqu (1 * 32)(%rdi), STATE1
+	vmovdqu (2 * 32)(%rdi), STATE2
+	vmovdqu (3 * 32)(%rdi), STATE3
+	vmovdqu (4 * 32)(%rdi), STATE4
+
+	/* encrypt message: */
+	call __load_partial
+
+	vmovdqa MSG, T0
+	vpxor STATE0, T0, T0
+	vpermq $MASK3, STATE1, T1
+	vpxor T1, T0, T0
+	vpand STATE2, STATE3, T1
+	vpxor T1, T0, T0
+
+	call __store_partial
+
+	call __morus1280_update
+
+	/* store the state: */
+	vmovdqu STATE0, (0 * 32)(%rdi)
+	vmovdqu STATE1, (1 * 32)(%rdi)
+	vmovdqu STATE2, (2 * 32)(%rdi)
+	vmovdqu STATE3, (3 * 32)(%rdi)
+	vmovdqu STATE4, (4 * 32)(%rdi)
+
+	FRAME_END
+ENDPROC(crypto_morus1280_avx2_enc_tail)
+
+/*
+ * void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst,
+ *                                unsigned int length);
+ */
+ENTRY(crypto_morus1280_avx2_dec)
+	FRAME_BEGIN
+
+	cmp $32, %rcx
+	jb .Ldec_out
+
+	/* load the state: */
+	vmovdqu (0 * 32)(%rdi), STATE0
+	vmovdqu (1 * 32)(%rdi), STATE1
+	vmovdqu (2 * 32)(%rdi), STATE2
+	vmovdqu (3 * 32)(%rdi), STATE3
+	vmovdqu (4 * 32)(%rdi), STATE4
+
+	mov %rsi,  %r8
+	or  %rdx,  %r8
+	and $0x1F, %r8
+	jnz .Ldec_u_loop
+
+.align 4
+.Ldec_a_loop:
+	vmovdqa (%rsi), MSG
+	vpxor STATE0, MSG, MSG
+	vpermq $MASK3, STATE1, T0
+	vpxor T0, MSG, MSG
+	vpand STATE2, STATE3, T0
+	vpxor T0, MSG, MSG
+	vmovdqa MSG, (%rdx)
+
+	call __morus1280_update
+	sub $32, %rcx
+	add $32, %rsi
+	add $32, %rdx
+	cmp $32, %rcx
+	jge .Ldec_a_loop
+
+	jmp .Ldec_cont
+.align 4
+.Ldec_u_loop:
+	vmovdqu (%rsi), MSG
+	vpxor STATE0, MSG, MSG
+	vpermq $MASK3, STATE1, T0
+	vpxor T0, MSG, MSG
+	vpand STATE2, STATE3, T0
+	vpxor T0, MSG, MSG
+	vmovdqu MSG, (%rdx)
+
+	call __morus1280_update
+	sub $32, %rcx
+	add $32, %rsi
+	add $32, %rdx
+	cmp $32, %rcx
+	jge .Ldec_u_loop
+
+.Ldec_cont:
+	/* store the state: */
+	vmovdqu STATE0, (0 * 32)(%rdi)
+	vmovdqu STATE1, (1 * 32)(%rdi)
+	vmovdqu STATE2, (2 * 32)(%rdi)
+	vmovdqu STATE3, (3 * 32)(%rdi)
+	vmovdqu STATE4, (4 * 32)(%rdi)
+
+.Ldec_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_morus1280_avx2_dec)
+
+/*
+ * void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst,
+ *                                     unsigned int length);
+ */
+ENTRY(crypto_morus1280_avx2_dec_tail)
+	FRAME_BEGIN
+
+	/* load the state: */
+	vmovdqu (0 * 32)(%rdi), STATE0
+	vmovdqu (1 * 32)(%rdi), STATE1
+	vmovdqu (2 * 32)(%rdi), STATE2
+	vmovdqu (3 * 32)(%rdi), STATE3
+	vmovdqu (4 * 32)(%rdi), STATE4
+
+	/* decrypt message: */
+	call __load_partial
+
+	vpxor STATE0, MSG, MSG
+	vpermq $MASK3, STATE1, T0
+	vpxor T0, MSG, MSG
+	vpand STATE2, STATE3, T0
+	vpxor T0, MSG, MSG
+	vmovdqa MSG, T0
+
+	call __store_partial
+
+	/* mask with byte count: */
+	movq %rcx, T0_LOW
+	vpbroadcastb T0_LOW, T0
+	vmovdqa .Lmorus1280_counter, T1
+	vpcmpgtb T1, T0, T0
+	vpand T0, MSG, MSG
+
+	call __morus1280_update
+
+	/* store the state: */
+	vmovdqu STATE0, (0 * 32)(%rdi)
+	vmovdqu STATE1, (1 * 32)(%rdi)
+	vmovdqu STATE2, (2 * 32)(%rdi)
+	vmovdqu STATE3, (3 * 32)(%rdi)
+	vmovdqu STATE4, (4 * 32)(%rdi)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_morus1280_avx2_dec_tail)
+
+/*
+ * void crypto_morus1280_avx2_final(void *state, void *tag_xor,
+ *                                  u64 assoclen, u64 cryptlen);
+ */
+ENTRY(crypto_morus1280_avx2_final)
+	FRAME_BEGIN
+
+	/* load the state: */
+	vmovdqu (0 * 32)(%rdi), STATE0
+	vmovdqu (1 * 32)(%rdi), STATE1
+	vmovdqu (2 * 32)(%rdi), STATE2
+	vmovdqu (3 * 32)(%rdi), STATE3
+	vmovdqu (4 * 32)(%rdi), STATE4
+
+	/* xor state[0] into state[4]: */
+	vpxor STATE0, STATE4, STATE4
+
+	/* prepare length block: */
+	vpxor MSG, MSG, MSG
+	vpinsrq $0, %rdx, MSG_LOW, MSG_LOW
+	vpinsrq $1, %rcx, MSG_LOW, MSG_LOW
+	vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */
+
+	/* update state: */
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+
+	/* xor tag: */
+	vmovdqu (%rsi), MSG
+
+	vpxor STATE0, MSG, MSG
+	vpermq $MASK3, STATE1, T0
+	vpxor T0, MSG, MSG
+	vpand STATE2, STATE3, T0
+	vpxor T0, MSG, MSG
+	vmovdqu MSG, (%rsi)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_morus1280_avx2_final)
diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c
new file mode 100644
index 0000000..f111f36d
--- /dev/null
+++ b/arch/x86/crypto/morus1280-avx2-glue.c
@@ -0,0 +1,68 @@
+/*
+ * The MORUS-1280 Authenticated-Encryption Algorithm
+ *   Glue for AVX2 implementation
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/internal/aead.h>
+#include <crypto/morus1280_glue.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/cpu_device_id.h>
+
+asmlinkage void crypto_morus1280_avx2_init(void *state, const void *key,
+					   const void *iv);
+asmlinkage void crypto_morus1280_avx2_ad(void *state, const void *data,
+					 unsigned int length);
+
+asmlinkage void crypto_morus1280_avx2_enc(void *state, const void *src,
+					  void *dst, unsigned int length);
+asmlinkage void crypto_morus1280_avx2_dec(void *state, const void *src,
+					  void *dst, unsigned int length);
+
+asmlinkage void crypto_morus1280_avx2_enc_tail(void *state, const void *src,
+					       void *dst, unsigned int length);
+asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src,
+					       void *dst, unsigned int length);
+
+asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
+					    u64 assoclen, u64 cryptlen);
+
+MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400);
+
+static const struct x86_cpu_id avx2_cpu_id[] = {
+    X86_FEATURE_MATCH(X86_FEATURE_AVX2),
+    {}
+};
+MODULE_DEVICE_TABLE(x86cpu, avx2_cpu_id);
+
+static int __init crypto_morus1280_avx2_module_init(void)
+{
+	if (!x86_match_cpu(avx2_cpu_id))
+		return -ENODEV;
+
+	return crypto_register_aeads(crypto_morus1280_avx2_algs,
+				     ARRAY_SIZE(crypto_morus1280_avx2_algs));
+}
+
+static void __exit crypto_morus1280_avx2_module_exit(void)
+{
+	crypto_unregister_aeads(crypto_morus1280_avx2_algs,
+				ARRAY_SIZE(crypto_morus1280_avx2_algs));
+}
+
+module_init(crypto_morus1280_avx2_module_init);
+module_exit(crypto_morus1280_avx2_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- AVX2 implementation");
+MODULE_ALIAS_CRYPTO("morus1280");
+MODULE_ALIAS_CRYPTO("morus1280-avx2");
diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S
new file mode 100644
index 0000000..1fe637c
--- /dev/null
+++ b/arch/x86/crypto/morus1280-sse2-asm.S
@@ -0,0 +1,895 @@
+/*
+ * SSE2 implementation of MORUS-1280
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define SHUFFLE_MASK(i0, i1, i2, i3) \
+	(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
+
+#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
+
+#define STATE0_LO	%xmm0
+#define STATE0_HI	%xmm1
+#define STATE1_LO	%xmm2
+#define STATE1_HI	%xmm3
+#define STATE2_LO	%xmm4
+#define STATE2_HI	%xmm5
+#define STATE3_LO	%xmm6
+#define STATE3_HI	%xmm7
+#define STATE4_LO	%xmm8
+#define STATE4_HI	%xmm9
+#define KEY_LO		%xmm10
+#define KEY_HI		%xmm11
+#define MSG_LO		%xmm10
+#define MSG_HI		%xmm11
+#define T0_LO		%xmm12
+#define T0_HI		%xmm13
+#define T1_LO		%xmm14
+#define T1_HI		%xmm15
+
+.section .rodata.cst16.morus640_const, "aM", @progbits, 16
+.align 16
+.Lmorus640_const_0:
+	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+.Lmorus640_const_1:
+	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
+.align 16
+.Lmorus640_counter_0:
+	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+.Lmorus640_counter_1:
+	.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
+	.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
+
+.text
+
+.macro rol1 hi, lo
+	/*
+	 * HI_1 | HI_0 || LO_1 | LO_0
+	 *  ==>
+	 * HI_0 | HI_1 || LO_1 | LO_0
+	 *  ==>
+	 * HI_0 | LO_1 || LO_0 | HI_1
+	 */
+	pshufd $MASK2, \hi, \hi
+	movdqa \hi, T0_LO
+	punpcklqdq \lo, T0_LO
+	punpckhqdq \hi, \lo
+	movdqa \lo, \hi
+	movdqa T0_LO, \lo
+.endm
+
+.macro rol2 hi, lo
+	movdqa \lo, T0_LO
+	movdqa \hi, \lo
+	movdqa T0_LO, \hi
+.endm
+
+.macro rol3 hi, lo
+	/*
+	 * HI_1 | HI_0 || LO_1 | LO_0
+	 *  ==>
+	 * HI_0 | HI_1 || LO_1 | LO_0
+	 *  ==>
+	 * LO_0 | HI_1 || HI_0 | LO_1
+	 */
+	pshufd $MASK2, \hi, \hi
+	movdqa \lo, T0_LO
+	punpckhqdq \hi, T0_LO
+	punpcklqdq \lo, \hi
+	movdqa T0_LO, \lo
+.endm
+
+.macro morus1280_round s0_l, s0_h, s1_l, s1_h, s2_l, s2_h, s3_l, s3_h, s4_l, s4_h, b, w
+	movdqa \s1_l, T0_LO
+	pand \s2_l, T0_LO
+	pxor T0_LO, \s0_l
+
+	movdqa \s1_h, T0_LO
+	pand \s2_h, T0_LO
+	pxor T0_LO, \s0_h
+
+	pxor \s3_l, \s0_l
+	pxor \s3_h, \s0_h
+
+	movdqa \s0_l, T0_LO
+	psllq $\b, T0_LO
+	psrlq $(64 - \b), \s0_l
+	pxor T0_LO, \s0_l
+
+	movdqa \s0_h, T0_LO
+	psllq $\b, T0_LO
+	psrlq $(64 - \b), \s0_h
+	pxor T0_LO, \s0_h
+
+	\w \s3_h, \s3_l
+.endm
+
+/*
+ * __morus1280_update: internal ABI
+ * input:
+ *   STATE[0-4] - input state
+ *   MSG        - message block
+ * output:
+ *   STATE[0-4] - output state
+ * changed:
+ *   T0
+ */
+__morus1280_update:
+	morus1280_round \
+		STATE0_LO, STATE0_HI, \
+		STATE1_LO, STATE1_HI, \
+		STATE2_LO, STATE2_HI, \
+		STATE3_LO, STATE3_HI, \
+		STATE4_LO, STATE4_HI, \
+		13, rol1
+	pxor MSG_LO, STATE1_LO
+	pxor MSG_HI, STATE1_HI
+	morus1280_round \
+		STATE1_LO, STATE1_HI, \
+		STATE2_LO, STATE2_HI, \
+		STATE3_LO, STATE3_HI, \
+		STATE4_LO, STATE4_HI, \
+		STATE0_LO, STATE0_HI, \
+		46, rol2
+	pxor MSG_LO, STATE2_LO
+	pxor MSG_HI, STATE2_HI
+	morus1280_round \
+		STATE2_LO, STATE2_HI, \
+		STATE3_LO, STATE3_HI, \
+		STATE4_LO, STATE4_HI, \
+		STATE0_LO, STATE0_HI, \
+		STATE1_LO, STATE1_HI, \
+		38, rol3
+	pxor MSG_LO, STATE3_LO
+	pxor MSG_HI, STATE3_HI
+	morus1280_round \
+		STATE3_LO, STATE3_HI, \
+		STATE4_LO, STATE4_HI, \
+		STATE0_LO, STATE0_HI, \
+		STATE1_LO, STATE1_HI, \
+		STATE2_LO, STATE2_HI, \
+		7, rol2
+	pxor MSG_LO, STATE4_LO
+	pxor MSG_HI, STATE4_HI
+	morus1280_round \
+		STATE4_LO, STATE4_HI, \
+		STATE0_LO, STATE0_HI, \
+		STATE1_LO, STATE1_HI, \
+		STATE2_LO, STATE2_HI, \
+		STATE3_LO, STATE3_HI, \
+		4, rol1
+	ret
+ENDPROC(__morus1280_update)
+
+/*
+ * __morus1280_update_zero: internal ABI
+ * input:
+ *   STATE[0-4] - input state
+ * output:
+ *   STATE[0-4] - output state
+ * changed:
+ *   T0
+ */
+__morus1280_update_zero:
+	morus1280_round \
+		STATE0_LO, STATE0_HI, \
+		STATE1_LO, STATE1_HI, \
+		STATE2_LO, STATE2_HI, \
+		STATE3_LO, STATE3_HI, \
+		STATE4_LO, STATE4_HI, \
+		13, rol1
+	morus1280_round \
+		STATE1_LO, STATE1_HI, \
+		STATE2_LO, STATE2_HI, \
+		STATE3_LO, STATE3_HI, \
+		STATE4_LO, STATE4_HI, \
+		STATE0_LO, STATE0_HI, \
+		46, rol2
+	morus1280_round \
+		STATE2_LO, STATE2_HI, \
+		STATE3_LO, STATE3_HI, \
+		STATE4_LO, STATE4_HI, \
+		STATE0_LO, STATE0_HI, \
+		STATE1_LO, STATE1_HI, \
+		38, rol3
+	morus1280_round \
+		STATE3_LO, STATE3_HI, \
+		STATE4_LO, STATE4_HI, \
+		STATE0_LO, STATE0_HI, \
+		STATE1_LO, STATE1_HI, \
+		STATE2_LO, STATE2_HI, \
+		7, rol2
+	morus1280_round \
+		STATE4_LO, STATE4_HI, \
+		STATE0_LO, STATE0_HI, \
+		STATE1_LO, STATE1_HI, \
+		STATE2_LO, STATE2_HI, \
+		STATE3_LO, STATE3_HI, \
+		4, rol1
+	ret
+ENDPROC(__morus1280_update_zero)
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ *   %rsi - src
+ *   %rcx - bytes
+ * output:
+ *   MSG  - message block
+ * changed:
+ *   %r8
+ *   %r9
+ */
+__load_partial:
+	xor %r9, %r9
+	pxor MSG_LO, MSG_LO
+	pxor MSG_HI, MSG_HI
+
+	mov %rcx, %r8
+	and $0x1, %r8
+	jz .Lld_partial_1
+
+	mov %rcx, %r8
+	and $0x1E, %r8
+	add %rsi, %r8
+	mov (%r8), %r9b
+
+.Lld_partial_1:
+	mov %rcx, %r8
+	and $0x2, %r8
+	jz .Lld_partial_2
+
+	mov %rcx, %r8
+	and $0x1C, %r8
+	add %rsi, %r8
+	shl $16, %r9
+	mov (%r8), %r9w
+
+.Lld_partial_2:
+	mov %rcx, %r8
+	and $0x4, %r8
+	jz .Lld_partial_4
+
+	mov %rcx, %r8
+	and $0x18, %r8
+	add %rsi, %r8
+	shl $32, %r9
+	mov (%r8), %r8d
+	xor %r8, %r9
+
+.Lld_partial_4:
+	movq %r9, MSG_LO
+
+	mov %rcx, %r8
+	and $0x8, %r8
+	jz .Lld_partial_8
+
+	mov %rcx, %r8
+	and $0x10, %r8
+	add %rsi, %r8
+	pslldq $8, MSG_LO
+	movq (%r8), T0_LO
+	pxor T0_LO, MSG_LO
+
+.Lld_partial_8:
+	mov %rcx, %r8
+	and $0x10, %r8
+	jz .Lld_partial_16
+
+	movdqa MSG_LO, MSG_HI
+	movdqu (%rsi), MSG_LO
+
+.Lld_partial_16:
+	ret
+ENDPROC(__load_partial)
+
+/*
+ * __store_partial: internal ABI
+ * input:
+ *   %rdx - dst
+ *   %rcx - bytes
+ * output:
+ *   T0   - message block
+ * changed:
+ *   %r8
+ *   %r9
+ *   %r10
+ */
+__store_partial:
+	mov %rcx, %r8
+	mov %rdx, %r9
+
+	cmp $16, %r8
+	jl .Lst_partial_16
+
+	movdqu T0_LO, (%r9)
+	movdqa T0_HI, T0_LO
+
+	sub $16, %r8
+	add $16, %r9
+
+.Lst_partial_16:
+	movq T0_LO, %r10
+
+	cmp $8, %r8
+	jl .Lst_partial_8
+
+	mov %r10, (%r9)
+	psrldq $8, T0_LO
+	movq T0_LO, %r10
+
+	sub $8, %r8
+	add $8, %r9
+
+.Lst_partial_8:
+	cmp $4, %r8
+	jl .Lst_partial_4
+
+	mov %r10d, (%r9)
+	shr $32, %r10
+
+	sub $4, %r8
+	add $4, %r9
+
+.Lst_partial_4:
+	cmp $2, %r8
+	jl .Lst_partial_2
+
+	mov %r10w, (%r9)
+	shr $16, %r10
+
+	sub $2, %r8
+	add $2, %r9
+
+.Lst_partial_2:
+	cmp $1, %r8
+	jl .Lst_partial_1
+
+	mov %r10b, (%r9)
+
+.Lst_partial_1:
+	ret
+ENDPROC(__store_partial)
+
+/*
+ * void crypto_morus1280_sse2_init(void *state, const void *key,
+ *                                 const void *iv);
+ */
+ENTRY(crypto_morus1280_sse2_init)
+	FRAME_BEGIN
+
+	/* load IV: */
+	pxor STATE0_HI, STATE0_HI
+	movdqu (%rdx), STATE0_LO
+	/* load key: */
+	movdqu  0(%rsi), KEY_LO
+	movdqu 16(%rsi), KEY_HI
+	movdqa KEY_LO, STATE1_LO
+	movdqa KEY_HI, STATE1_HI
+	/* load all ones: */
+	pcmpeqd STATE2_LO, STATE2_LO
+	pcmpeqd STATE2_HI, STATE2_HI
+	/* load all zeros: */
+	pxor STATE3_LO, STATE3_LO
+	pxor STATE3_HI, STATE3_HI
+	/* load the constant: */
+	movdqa .Lmorus640_const_0, STATE4_LO
+	movdqa .Lmorus640_const_1, STATE4_HI
+
+	/* update 16 times with zero: */
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+	call __morus1280_update_zero
+
+	/* xor-in the key again after updates: */
+	pxor KEY_LO, STATE1_LO
+	pxor KEY_HI, STATE1_HI
+
+	/* store the state: */
+	movdqu STATE0_LO, (0 * 16)(%rdi)
+	movdqu STATE0_HI, (1 * 16)(%rdi)
+	movdqu STATE1_LO, (2 * 16)(%rdi)
+	movdqu STATE1_HI, (3 * 16)(%rdi)
+	movdqu STATE2_LO, (4 * 16)(%rdi)
+	movdqu STATE2_HI, (5 * 16)(%rdi)
+	movdqu STATE3_LO, (6 * 16)(%rdi)
+	movdqu STATE3_HI, (7 * 16)(%rdi)
+	movdqu STATE4_LO, (8 * 16)(%rdi)
+	movdqu STATE4_HI, (9 * 16)(%rdi)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_morus1280_sse2_init)
+
+/*
+ * void crypto_morus1280_sse2_ad(void *state, const void *data,
+ *                               unsigned int length);
+ */
+ENTRY(crypto_morus1280_sse2_ad)
+	FRAME_BEGIN
+
+	cmp $32, %rdx
+	jb .Lad_out
+
+	/* load the state: */
+	movdqu (0 * 16)(%rdi), STATE0_LO
+	movdqu (1 * 16)(%rdi), STATE0_HI
+	movdqu (2 * 16)(%rdi), STATE1_LO
+	movdqu (3 * 16)(%rdi), STATE1_HI
+	movdqu (4 * 16)(%rdi), STATE2_LO
+	movdqu (5 * 16)(%rdi), STATE2_HI
+	movdqu (6 * 16)(%rdi), STATE3_LO
+	movdqu (7 * 16)(%rdi), STATE3_HI
+	movdqu (8 * 16)(%rdi), STATE4_LO
+	movdqu (9 * 16)(%rdi), STATE4_HI
+
+	mov %rsi, %r8
+	and $0xF, %r8
+	jnz .Lad_u_loop
+
+.align 4
+.Lad_a_loop:
+	movdqa  0(%rsi), MSG_LO
+	movdqa 16(%rsi), MSG_HI
+	call __morus1280_update
+	sub $32, %rdx
+	add $32, %rsi
+	cmp $32, %rdx
+	jge .Lad_a_loop
+
+	jmp .Lad_cont
+.align 4
+.Lad_u_loop:
+	movdqu  0(%rsi), MSG_LO
+	movdqu 16(%rsi), MSG_HI
+	call __morus1280_update
+	sub $32, %rdx
+	add $32, %rsi
+	cmp $32, %rdx
+	jge .Lad_u_loop
+
+.Lad_cont:
+	/* store the state: */
+	movdqu STATE0_LO, (0 * 16)(%rdi)
+	movdqu STATE0_HI, (1 * 16)(%rdi)
+	movdqu STATE1_LO, (2 * 16)(%rdi)
+	movdqu STATE1_HI, (3 * 16)(%rdi)
+	movdqu STATE2_LO, (4 * 16)(%rdi)
+	movdqu STATE2_HI, (5 * 16)(%rdi)
+	movdqu STATE3_LO, (6 * 16)(%rdi)
+	movdqu STATE3_HI, (7 * 16)(%rdi)
+	movdqu STATE4_LO, (8 * 16)(%rdi)
+	movdqu STATE4_HI, (9 * 16)(%rdi)
+
+.Lad_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_morus1280_sse2_ad)
+
+/*
+ * void crypto_morus1280_sse2_enc(void *state, const void *src, void *dst,
+ *                                unsigned int length);
+ */
+ENTRY(crypto_morus1280_sse2_enc)
+	FRAME_BEGIN
+
+	cmp $32, %rcx
+	jb .Lenc_out
+
+	/* load the state: */
+	movdqu (0 * 16)(%rdi), STATE0_LO
+	movdqu (1 * 16)(%rdi), STATE0_HI
+	movdqu (2 * 16)(%rdi), STATE1_LO
+	movdqu (3 * 16)(%rdi), STATE1_HI
+	movdqu (4 * 16)(%rdi), STATE2_LO
+	movdqu (5 * 16)(%rdi), STATE2_HI
+	movdqu (6 * 16)(%rdi), STATE3_LO
+	movdqu (7 * 16)(%rdi), STATE3_HI
+	movdqu (8 * 16)(%rdi), STATE4_LO
+	movdqu (9 * 16)(%rdi), STATE4_HI
+
+	mov %rsi, %r8
+	or  %rdx, %r8
+	and $0xF, %r8
+	jnz .Lenc_u_loop
+
+.align 4
+.Lenc_a_loop:
+	movdqa  0(%rsi), MSG_LO
+	movdqa 16(%rsi), MSG_HI
+	movdqa STATE1_LO, T1_LO
+	movdqa STATE1_HI, T1_HI
+	rol3 T1_HI, T1_LO
+	movdqa MSG_LO, T0_LO
+	movdqa MSG_HI, T0_HI
+	pxor T1_LO, T0_LO
+	pxor T1_HI, T0_HI
+	pxor STATE0_LO, T0_LO
+	pxor STATE0_HI, T0_HI
+	movdqa STATE2_LO, T1_LO
+	movdqa STATE2_HI, T1_HI
+	pand STATE3_LO, T1_LO
+	pand STATE3_HI, T1_HI
+	pxor T1_LO, T0_LO
+	pxor T1_HI, T0_HI
+	movdqa T0_LO,  0(%rdx)
+	movdqa T0_HI, 16(%rdx)
+
+	call __morus1280_update
+	sub $32, %rcx
+	add $32, %rsi
+	add $32, %rdx
+	cmp $32, %rcx
+	jge .Lenc_a_loop
+
+	jmp .Lenc_cont
+.align 4
+.Lenc_u_loop:
+	movdqu  0(%rsi), MSG_LO
+	movdqu 16(%rsi), MSG_HI
+	movdqa STATE1_LO, T1_LO
+	movdqa STATE1_HI, T1_HI
+	rol3 T1_HI, T1_LO
+	movdqa MSG_LO, T0_LO
+	movdqa MSG_HI, T0_HI
+	pxor T1_LO, T0_LO
+	pxor T1_HI, T0_HI
+	pxor STATE0_LO, T0_LO
+	pxor STATE0_HI, T0_HI
+	movdqa STATE2_LO, T1_LO
+	movdqa STATE2_HI, T1_HI
+	pand STATE3_LO, T1_LO
+	pand STATE3_HI, T1_HI
+	pxor T1_LO, T0_LO
+	pxor T1_HI, T0_HI
+	movdqu T0_LO,  0(%rdx)
+	movdqu T0_HI, 16(%rdx)
+
+	call __morus1280_update
+	sub $32, %rcx
+	add $32, %rsi
+	add $32, %rdx
+	cmp $32, %rcx
+	jge .Lenc_u_loop
+
+.Lenc_cont:
+	/* store the state: */
+	movdqu STATE0_LO, (0 * 16)(%rdi)
+	movdqu STATE0_HI, (1 * 16)(%rdi)
+	movdqu STATE1_LO, (2 * 16)(%rdi)
+	movdqu STATE1_HI, (3 * 16)(%rdi)
+	movdqu STATE2_LO, (4 * 16)(%rdi)
+	movdqu STATE2_HI, (5 * 16)(%rdi)
+	movdqu STATE3_LO, (6 * 16)(%rdi)
+	movdqu STATE3_HI, (7 * 16)(%rdi)
+	movdqu STATE4_LO, (8 * 16)(%rdi)
+	movdqu STATE4_HI, (9 * 16)(%rdi)
+
+.Lenc_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_morus1280_sse2_enc)
+
+/*
+ * void crypto_morus1280_sse2_enc_tail(void *state, const void *src, void *dst,
+ *                                     unsigned int length);
+ */
+ENTRY(crypto_morus1280_sse2_enc_tail)
+	FRAME_BEGIN
+
+	/* load the state: */
+	movdqu (0 * 16)(%rdi), STATE0_LO
+	movdqu (1 * 16)(%rdi), STATE0_HI
+	movdqu (2 * 16)(%rdi), STATE1_LO
+	movdqu (3 * 16)(%rdi), STATE1_HI
+	movdqu (4 * 16)(%rdi), STATE2_LO
+	movdqu (5 * 16)(%rdi), STATE2_HI
+	movdqu (6 * 16)(%rdi), STATE3_LO
+	movdqu (7 * 16)(%rdi), STATE3_HI
+	movdqu (8 * 16)(%rdi), STATE4_LO
+	movdqu (9 * 16)(%rdi), STATE4_HI
+
+	/* encrypt message: */
+	call __load_partial
+
+	movdqa STATE1_LO, T1_LO
+	movdqa STATE1_HI, T1_HI
+	rol3 T1_HI, T1_LO
+	movdqa MSG_LO, T0_LO
+	movdqa MSG_HI, T0_HI
+	pxor T1_LO, T0_LO
+	pxor T1_HI, T0_HI
+	pxor STATE0_LO, T0_LO
+	pxor STATE0_HI, T0_HI
+	movdqa STATE2_LO, T1_LO
+	movdqa STATE2_HI, T1_HI
+	pand STATE3_LO, T1_LO
+	pand STATE3_HI, T1_HI
+	pxor T1_LO, T0_LO
+	pxor T1_HI, T0_HI
+
+	call __store_partial
+
+	call __morus1280_update
+
+	/* store the state: */
+	movdqu STATE0_LO, (0 * 16)(%rdi)
+	movdqu STATE0_HI, (1 * 16)(%rdi)
+	movdqu STATE1_LO, (2 * 16)(%rdi)
+	movdqu STATE1_HI, (3 * 16)(%rdi)
+	movdqu STATE2_LO, (4 * 16)(%rdi)
+	movdqu STATE2_HI, (5 * 16)(%rdi)
+	movdqu STATE3_LO, (6 * 16)(%rdi)
+	movdqu STATE3_HI, (7 * 16)(%rdi)
+	movdqu STATE4_LO, (8 * 16)(%rdi)
+	movdqu STATE4_HI, (9 * 16)(%rdi)
+
+	FRAME_END
+ENDPROC(crypto_morus1280_sse2_enc_tail)
+
+/*
+ * void crypto_morus1280_sse2_dec(void *state, const void *src, void *dst,
+ *                                unsigned int length);
+ */
+ENTRY(crypto_morus1280_sse2_dec)
+	FRAME_BEGIN
+
+	cmp $32, %rcx
+	jb .Ldec_out
+
+	/* load the state: */
+	movdqu (0 * 16)(%rdi), STATE0_LO
+	movdqu (1 * 16)(%rdi), STATE0_HI
+	movdqu (2 * 16)(%rdi), STATE1_LO
+	movdqu (3 * 16)(%rdi), STATE1_HI
+	movdqu (4 * 16)(%rdi), STATE2_LO
+	movdqu (5 * 16)(%rdi), STATE2_HI
+	movdqu (6 * 16)(%rdi), STATE3_LO
+	movdqu (7 * 16)(%rdi), STATE3_HI
+	movdqu (8 * 16)(%rdi), STATE4_LO
+	movdqu (9 * 16)(%rdi), STATE4_HI
+
+	mov %rsi, %r8
+	or  %rdx, %r8
+	and $0xF, %r8
+	jnz .Ldec_u_loop
+
+.align 4
+.Ldec_a_loop:
+	movdqa  0(%rsi), MSG_LO
+	movdqa 16(%rsi), MSG_HI
+	pxor STATE0_LO, MSG_LO
+	pxor STATE0_HI, MSG_HI
+	movdqa STATE1_LO, T1_LO
+	movdqa STATE1_HI, T1_HI
+	rol3 T1_HI, T1_LO
+	pxor T1_LO, MSG_LO
+	pxor T1_HI, MSG_HI
+	movdqa STATE2_LO, T1_LO
+	movdqa STATE2_HI, T1_HI
+	pand STATE3_LO, T1_LO
+	pand STATE3_HI, T1_HI
+	pxor T1_LO, MSG_LO
+	pxor T1_HI, MSG_HI
+	movdqa MSG_LO,  0(%rdx)
+	movdqa MSG_HI, 16(%rdx)
+
+	call __morus1280_update
+	sub $32, %rcx
+	add $32, %rsi
+	add $32, %rdx
+	cmp $32, %rcx
+	jge .Ldec_a_loop
+
+	jmp .Ldec_cont
+.align 4
+.Ldec_u_loop:
+	movdqu  0(%rsi), MSG_LO
+	movdqu 16(%rsi), MSG_HI
+	pxor STATE0_LO, MSG_LO
+	pxor STATE0_HI, MSG_HI
+	movdqa STATE1_LO, T1_LO
+	movdqa STATE1_HI, T1_HI
+	rol3 T1_HI, T1_LO
+	pxor T1_LO, MSG_LO
+	pxor T1_HI, MSG_HI
+	movdqa STATE2_LO, T1_LO
+	movdqa STATE2_HI, T1_HI
+	pand STATE3_LO, T1_LO
+	pand STATE3_HI, T1_HI
+	pxor T1_LO, MSG_LO
+	pxor T1_HI, MSG_HI
+	movdqu MSG_LO,  0(%rdx)
+	movdqu MSG_HI, 16(%rdx)
+
+	call __morus1280_update
+	sub $32, %rcx
+	add $32, %rsi
+	add $32, %rdx
+	cmp $32, %rcx
+	jge .Ldec_u_loop
+
+.Ldec_cont:
+	/* store the state: */
+	movdqu STATE0_LO, (0 * 16)(%rdi)
+	movdqu STATE0_HI, (1 * 16)(%rdi)
+	movdqu STATE1_LO, (2 * 16)(%rdi)
+	movdqu STATE1_HI, (3 * 16)(%rdi)
+	movdqu STATE2_LO, (4 * 16)(%rdi)
+	movdqu STATE2_HI, (5 * 16)(%rdi)
+	movdqu STATE3_LO, (6 * 16)(%rdi)
+	movdqu STATE3_HI, (7 * 16)(%rdi)
+	movdqu STATE4_LO, (8 * 16)(%rdi)
+	movdqu STATE4_HI, (9 * 16)(%rdi)
+
+.Ldec_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_morus1280_sse2_dec)
+
+/*
+ * void crypto_morus1280_sse2_dec_tail(void *state, const void *src, void *dst,
+ *                                     unsigned int length);
+ */
+ENTRY(crypto_morus1280_sse2_dec_tail)
+	FRAME_BEGIN
+
+	/* load the state: */
+	movdqu (0 * 16)(%rdi), STATE0_LO
+	movdqu (1 * 16)(%rdi), STATE0_HI
+	movdqu (2 * 16)(%rdi), STATE1_LO
+	movdqu (3 * 16)(%rdi), STATE1_HI
+	movdqu (4 * 16)(%rdi), STATE2_LO
+	movdqu (5 * 16)(%rdi), STATE2_HI
+	movdqu (6 * 16)(%rdi), STATE3_LO
+	movdqu (7 * 16)(%rdi), STATE3_HI
+	movdqu (8 * 16)(%rdi), STATE4_LO
+	movdqu (9 * 16)(%rdi), STATE4_HI
+
+	/* decrypt message: */
+	call __load_partial
+
+	pxor STATE0_LO, MSG_LO
+	pxor STATE0_HI, MSG_HI
+	movdqa STATE1_LO, T1_LO
+	movdqa STATE1_HI, T1_HI
+	rol3 T1_HI, T1_LO
+	pxor T1_LO, MSG_LO
+	pxor T1_HI, MSG_HI
+	movdqa STATE2_LO, T1_LO
+	movdqa STATE2_HI, T1_HI
+	pand STATE3_LO, T1_LO
+	pand STATE3_HI, T1_HI
+	pxor T1_LO, MSG_LO
+	pxor T1_HI, MSG_HI
+	movdqa MSG_LO, T0_LO
+	movdqa MSG_HI, T0_HI
+
+	call __store_partial
+
+	/* mask with byte count: */
+	movq %rcx, T0_LO
+	punpcklbw T0_LO, T0_LO
+	punpcklbw T0_LO, T0_LO
+	punpcklbw T0_LO, T0_LO
+	punpcklbw T0_LO, T0_LO
+	movdqa T0_LO, T0_HI
+	movdqa .Lmorus640_counter_0, T1_LO
+	movdqa .Lmorus640_counter_1, T1_HI
+	pcmpgtb T1_LO, T0_LO
+	pcmpgtb T1_HI, T0_HI
+	pand T0_LO, MSG_LO
+	pand T0_HI, MSG_HI
+
+	call __morus1280_update
+
+	/* store the state: */
+	movdqu STATE0_LO, (0 * 16)(%rdi)
+	movdqu STATE0_HI, (1 * 16)(%rdi)
+	movdqu STATE1_LO, (2 * 16)(%rdi)
+	movdqu STATE1_HI, (3 * 16)(%rdi)
+	movdqu STATE2_LO, (4 * 16)(%rdi)
+	movdqu STATE2_HI, (5 * 16)(%rdi)
+	movdqu STATE3_LO, (6 * 16)(%rdi)
+	movdqu STATE3_HI, (7 * 16)(%rdi)
+	movdqu STATE4_LO, (8 * 16)(%rdi)
+	movdqu STATE4_HI, (9 * 16)(%rdi)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_morus1280_sse2_dec_tail)
+
+/*
+ * void crypto_morus1280_sse2_final(void *state, void *tag_xor,
+ *                                  u64 assoclen, u64 cryptlen);
+ */
+ENTRY(crypto_morus1280_sse2_final)
+	FRAME_BEGIN
+
+	/* load the state: */
+	movdqu (0 * 16)(%rdi), STATE0_LO
+	movdqu (1 * 16)(%rdi), STATE0_HI
+	movdqu (2 * 16)(%rdi), STATE1_LO
+	movdqu (3 * 16)(%rdi), STATE1_HI
+	movdqu (4 * 16)(%rdi), STATE2_LO
+	movdqu (5 * 16)(%rdi), STATE2_HI
+	movdqu (6 * 16)(%rdi), STATE3_LO
+	movdqu (7 * 16)(%rdi), STATE3_HI
+	movdqu (8 * 16)(%rdi), STATE4_LO
+	movdqu (9 * 16)(%rdi), STATE4_HI
+
+	/* xor state[0] into state[4]: */
+	pxor STATE0_LO, STATE4_LO
+	pxor STATE0_HI, STATE4_HI
+
+	/* prepare length block: */
+	movq %rdx, MSG_LO
+	movq %rcx, T0_LO
+	pslldq $8, T0_LO
+	pxor T0_LO, MSG_LO
+	psllq $3, MSG_LO /* multiply by 8 (to get bit count) */
+	pxor MSG_HI, MSG_HI
+
+	/* update state: */
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+	call __morus1280_update
+
+	/* xor tag: */
+	movdqu  0(%rsi), MSG_LO
+	movdqu 16(%rsi), MSG_HI
+
+	pxor STATE0_LO, MSG_LO
+	pxor STATE0_HI, MSG_HI
+	movdqa STATE1_LO, T0_LO
+	movdqa STATE1_HI, T0_HI
+	rol3 T0_HI, T0_LO
+	pxor T0_LO, MSG_LO
+	pxor T0_HI, MSG_HI
+	movdqa STATE2_LO, T0_LO
+	movdqa STATE2_HI, T0_HI
+	pand STATE3_LO, T0_LO
+	pand STATE3_HI, T0_HI
+	pxor T0_LO, MSG_LO
+	pxor T0_HI, MSG_HI
+
+	movdqu MSG_LO,  0(%rsi)
+	movdqu MSG_HI, 16(%rsi)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_morus1280_sse2_final)
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
new file mode 100644
index 0000000..839270a
--- /dev/null
+++ b/arch/x86/crypto/morus1280-sse2-glue.c
@@ -0,0 +1,68 @@
+/*
+ * The MORUS-1280 Authenticated-Encryption Algorithm
+ *   Glue for SSE2 implementation
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/internal/aead.h>
+#include <crypto/morus1280_glue.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/cpu_device_id.h>
+
+asmlinkage void crypto_morus1280_sse2_init(void *state, const void *key,
+					   const void *iv);
+asmlinkage void crypto_morus1280_sse2_ad(void *state, const void *data,
+					 unsigned int length);
+
+asmlinkage void crypto_morus1280_sse2_enc(void *state, const void *src,
+					  void *dst, unsigned int length);
+asmlinkage void crypto_morus1280_sse2_dec(void *state, const void *src,
+					  void *dst, unsigned int length);
+
+asmlinkage void crypto_morus1280_sse2_enc_tail(void *state, const void *src,
+					       void *dst, unsigned int length);
+asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src,
+					       void *dst, unsigned int length);
+
+asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
+					    u64 assoclen, u64 cryptlen);
+
+MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350);
+
+static const struct x86_cpu_id sse2_cpu_id[] = {
+    X86_FEATURE_MATCH(X86_FEATURE_XMM2),
+    {}
+};
+MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
+
+static int __init crypto_morus1280_sse2_module_init(void)
+{
+	if (!x86_match_cpu(sse2_cpu_id))
+		return -ENODEV;
+
+	return crypto_register_aeads(crypto_morus1280_sse2_algs,
+				     ARRAY_SIZE(crypto_morus1280_sse2_algs));
+}
+
+static void __exit crypto_morus1280_sse2_module_exit(void)
+{
+	crypto_unregister_aeads(crypto_morus1280_sse2_algs,
+				ARRAY_SIZE(crypto_morus1280_sse2_algs));
+}
+
+module_init(crypto_morus1280_sse2_module_init);
+module_exit(crypto_morus1280_sse2_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- SSE2 implementation");
+MODULE_ALIAS_CRYPTO("morus1280");
+MODULE_ALIAS_CRYPTO("morus1280-sse2");
diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c
new file mode 100644
index 0000000..0dccdda
--- /dev/null
+++ b/arch/x86/crypto/morus1280_glue.c
@@ -0,0 +1,302 @@
+/*
+ * The MORUS-1280 Authenticated-Encryption Algorithm
+ *   Common x86 SIMD glue skeleton
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/cryptd.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/morus1280_glue.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <asm/fpu/api.h>
+
+struct morus1280_state {
+	struct morus1280_block s[MORUS_STATE_BLOCKS];
+};
+
+struct morus1280_ops {
+	int (*skcipher_walk_init)(struct skcipher_walk *walk,
+				  struct aead_request *req, bool atomic);
+
+	void (*crypt_blocks)(void *state, const void *src, void *dst,
+			     unsigned int length);
+	void (*crypt_tail)(void *state, const void *src, void *dst,
+			   unsigned int length);
+};
+
+static void crypto_morus1280_glue_process_ad(
+		struct morus1280_state *state,
+		const struct morus1280_glue_ops *ops,
+		struct scatterlist *sg_src, unsigned int assoclen)
+{
+	struct scatter_walk walk;
+	struct morus1280_block buf;
+	unsigned int pos = 0;
+
+	scatterwalk_start(&walk, sg_src);
+	while (assoclen != 0) {
+		unsigned int size = scatterwalk_clamp(&walk, assoclen);
+		unsigned int left = size;
+		void *mapped = scatterwalk_map(&walk);
+		const u8 *src = (const u8 *)mapped;
+
+		if (pos + size >= MORUS1280_BLOCK_SIZE) {
+			if (pos > 0) {
+				unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
+				memcpy(buf.bytes + pos, src, fill);
+				ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
+				pos = 0;
+				left -= fill;
+				src += fill;
+			}
+
+			ops->ad(state, src, left);
+			src += left & ~(MORUS1280_BLOCK_SIZE - 1);
+			left &= MORUS1280_BLOCK_SIZE - 1;
+		}
+
+		memcpy(buf.bytes + pos, src, left);
+
+		pos += left;
+		assoclen -= size;
+		scatterwalk_unmap(mapped);
+		scatterwalk_advance(&walk, size);
+		scatterwalk_done(&walk, 0, assoclen);
+	}
+
+	if (pos > 0) {
+		memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
+		ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
+	}
+}
+
+static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state,
+						struct morus1280_ops ops,
+						struct aead_request *req)
+{
+	struct skcipher_walk walk;
+	u8 *cursor_src, *cursor_dst;
+	unsigned int chunksize, base;
+
+	ops.skcipher_walk_init(&walk, req, false);
+
+	while (walk.nbytes) {
+		cursor_src = walk.src.virt.addr;
+		cursor_dst = walk.dst.virt.addr;
+		chunksize = walk.nbytes;
+
+		ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
+
+		base = chunksize & ~(MORUS1280_BLOCK_SIZE - 1);
+		cursor_src += base;
+		cursor_dst += base;
+		chunksize &= MORUS1280_BLOCK_SIZE - 1;
+
+		if (chunksize > 0)
+			ops.crypt_tail(state, cursor_src, cursor_dst,
+				       chunksize);
+
+		skcipher_walk_done(&walk, 0);
+	}
+}
+
+int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
+				 unsigned int keylen)
+{
+	struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
+
+	if (keylen == MORUS1280_BLOCK_SIZE) {
+		memcpy(ctx->key.bytes, key, MORUS1280_BLOCK_SIZE);
+	} else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
+		memcpy(ctx->key.bytes, key, keylen);
+		memcpy(ctx->key.bytes + keylen, key, keylen);
+	} else {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setkey);
+
+int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm,
+				      unsigned int authsize)
+{
+	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setauthsize);
+
+static void crypto_morus1280_glue_crypt(struct aead_request *req,
+					struct morus1280_ops ops,
+					unsigned int cryptlen,
+					struct morus1280_block *tag_xor)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
+	struct morus1280_state state;
+
+	kernel_fpu_begin();
+
+	ctx->ops->init(&state, &ctx->key, req->iv);
+	crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
+	crypto_morus1280_glue_process_crypt(&state, ops, req);
+	ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
+
+	kernel_fpu_end();
+}
+
+int crypto_morus1280_glue_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
+	struct morus1280_ops OPS = {
+		.skcipher_walk_init = skcipher_walk_aead_encrypt,
+		.crypt_blocks = ctx->ops->enc,
+		.crypt_tail = ctx->ops->enc_tail,
+	};
+
+	struct morus1280_block tag = {};
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen;
+
+	crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
+
+	scatterwalk_map_and_copy(tag.bytes, req->dst,
+				 req->assoclen + cryptlen, authsize, 1);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_morus1280_glue_encrypt);
+
+int crypto_morus1280_glue_decrypt(struct aead_request *req)
+{
+	static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
+	struct morus1280_ops OPS = {
+		.skcipher_walk_init = skcipher_walk_aead_decrypt,
+		.crypt_blocks = ctx->ops->dec,
+		.crypt_tail = ctx->ops->dec_tail,
+	};
+
+	struct morus1280_block tag;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen - authsize;
+
+	scatterwalk_map_and_copy(tag.bytes, req->src,
+				 req->assoclen + cryptlen, authsize, 0);
+
+	crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
+
+	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
+}
+EXPORT_SYMBOL_GPL(crypto_morus1280_glue_decrypt);
+
+void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
+				    const struct morus1280_glue_ops *ops)
+{
+	struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
+	ctx->ops = ops;
+}
+EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops);
+
+int cryptd_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
+				 unsigned int keylen)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setkey);
+
+int cryptd_morus1280_glue_setauthsize(struct crypto_aead *aead,
+				      unsigned int authsize)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setauthsize);
+
+int cryptd_morus1280_glue_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	aead = &cryptd_tfm->base;
+	if (irq_fpu_usable() && (!in_atomic() ||
+				 !cryptd_aead_queued(cryptd_tfm)))
+		aead = cryptd_aead_child(cryptd_tfm);
+
+	aead_request_set_tfm(req, aead);
+
+	return crypto_aead_encrypt(req);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_encrypt);
+
+int cryptd_morus1280_glue_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	aead = &cryptd_tfm->base;
+	if (irq_fpu_usable() && (!in_atomic() ||
+				 !cryptd_aead_queued(cryptd_tfm)))
+		aead = cryptd_aead_child(cryptd_tfm);
+
+	aead_request_set_tfm(req, aead);
+
+	return crypto_aead_decrypt(req);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_decrypt);
+
+int cryptd_morus1280_glue_init_tfm(struct crypto_aead *aead)
+{
+	struct cryptd_aead *cryptd_tfm;
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	const char *name = crypto_aead_alg(aead)->base.cra_driver_name;
+	char internal_name[CRYPTO_MAX_ALG_NAME];
+
+	if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name)
+			>= CRYPTO_MAX_ALG_NAME)
+		return -ENAMETOOLONG;
+
+	cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL,
+				       CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+
+	*ctx = cryptd_tfm;
+	crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_init_tfm);
+
+void cryptd_morus1280_glue_exit_tfm(struct crypto_aead *aead)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_free_aead(*ctx);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_exit_tfm);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations");
diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S
new file mode 100644
index 0000000..71c72a0
--- /dev/null
+++ b/arch/x86/crypto/morus640-sse2-asm.S
@@ -0,0 +1,614 @@
+/*
+ * SSE2 implementation of MORUS-640
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+#define SHUFFLE_MASK(i0, i1, i2, i3) \
+	(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
+
+#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
+#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
+#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
+
+#define STATE0	%xmm0
+#define STATE1	%xmm1
+#define STATE2	%xmm2
+#define STATE3	%xmm3
+#define STATE4	%xmm4
+#define KEY	%xmm5
+#define MSG	%xmm5
+#define T0	%xmm6
+#define T1	%xmm7
+
+.section .rodata.cst16.morus640_const, "aM", @progbits, 32
+.align 16
+.Lmorus640_const_0:
+	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
+	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
+.Lmorus640_const_1:
+	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
+	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
+
+.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
+.align 16
+.Lmorus640_counter:
+	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+
+.text
+
+.macro morus640_round s0, s1, s2, s3, s4, b, w
+	movdqa \s1, T0
+	pand \s2, T0
+	pxor T0, \s0
+	pxor \s3, \s0
+	movdqa \s0, T0
+	pslld $\b, T0
+	psrld $(32 - \b), \s0
+	pxor T0, \s0
+	pshufd $\w, \s3, \s3
+.endm
+
+/*
+ * __morus640_update: internal ABI
+ * input:
+ *   STATE[0-4] - input state
+ *   MSG        - message block
+ * output:
+ *   STATE[0-4] - output state
+ * changed:
+ *   T0
+ */
+__morus640_update:
+	morus640_round STATE0, STATE1, STATE2, STATE3, STATE4,  5, MASK1
+	pxor MSG, STATE1
+	morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
+	pxor MSG, STATE2
+	morus640_round STATE2, STATE3, STATE4, STATE0, STATE1,  7, MASK3
+	pxor MSG, STATE3
+	morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
+	pxor MSG, STATE4
+	morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
+	ret
+ENDPROC(__morus640_update)
+
+
+/*
+ * __morus640_update_zero: internal ABI
+ * input:
+ *   STATE[0-4] - input state
+ * output:
+ *   STATE[0-4] - output state
+ * changed:
+ *   T0
+ */
+__morus640_update_zero:
+	morus640_round STATE0, STATE1, STATE2, STATE3, STATE4,  5, MASK1
+	morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
+	morus640_round STATE2, STATE3, STATE4, STATE0, STATE1,  7, MASK3
+	morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
+	morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
+	ret
+ENDPROC(__morus640_update_zero)
+
+/*
+ * __load_partial: internal ABI
+ * input:
+ *   %rsi - src
+ *   %rcx - bytes
+ * output:
+ *   MSG  - message block
+ * changed:
+ *   T0
+ *   %r8
+ *   %r9
+ */
+__load_partial:
+	xor %r9, %r9
+	pxor MSG, MSG
+
+	mov %rcx, %r8
+	and $0x1, %r8
+	jz .Lld_partial_1
+
+	mov %rcx, %r8
+	and $0x1E, %r8
+	add %rsi, %r8
+	mov (%r8), %r9b
+
+.Lld_partial_1:
+	mov %rcx, %r8
+	and $0x2, %r8
+	jz .Lld_partial_2
+
+	mov %rcx, %r8
+	and $0x1C, %r8
+	add %rsi, %r8
+	shl $16, %r9
+	mov (%r8), %r9w
+
+.Lld_partial_2:
+	mov %rcx, %r8
+	and $0x4, %r8
+	jz .Lld_partial_4
+
+	mov %rcx, %r8
+	and $0x18, %r8
+	add %rsi, %r8
+	shl $32, %r9
+	mov (%r8), %r8d
+	xor %r8, %r9
+
+.Lld_partial_4:
+	movq %r9, MSG
+
+	mov %rcx, %r8
+	and $0x8, %r8
+	jz .Lld_partial_8
+
+	mov %rcx, %r8
+	and $0x10, %r8
+	add %rsi, %r8
+	pslldq $8, MSG
+	movq (%r8), T0
+	pxor T0, MSG
+
+.Lld_partial_8:
+	ret
+ENDPROC(__load_partial)
+
+/*
+ * __store_partial: internal ABI
+ * input:
+ *   %rdx - dst
+ *   %rcx - bytes
+ * output:
+ *   T0   - message block
+ * changed:
+ *   %r8
+ *   %r9
+ *   %r10
+ */
+__store_partial:
+	mov %rcx, %r8
+	mov %rdx, %r9
+
+	movq T0, %r10
+
+	cmp $8, %r8
+	jl .Lst_partial_8
+
+	mov %r10, (%r9)
+	psrldq $8, T0
+	movq T0, %r10
+
+	sub $8, %r8
+	add $8, %r9
+
+.Lst_partial_8:
+	cmp $4, %r8
+	jl .Lst_partial_4
+
+	mov %r10d, (%r9)
+	shr $32, %r10
+
+	sub $4, %r8
+	add $4, %r9
+
+.Lst_partial_4:
+	cmp $2, %r8
+	jl .Lst_partial_2
+
+	mov %r10w, (%r9)
+	shr $16, %r10
+
+	sub $2, %r8
+	add $2, %r9
+
+.Lst_partial_2:
+	cmp $1, %r8
+	jl .Lst_partial_1
+
+	mov %r10b, (%r9)
+
+.Lst_partial_1:
+	ret
+ENDPROC(__store_partial)
+
+/*
+ * void crypto_morus640_sse2_init(void *state, const void *key, const void *iv);
+ */
+ENTRY(crypto_morus640_sse2_init)
+	FRAME_BEGIN
+
+	/* load IV: */
+	movdqu (%rdx), STATE0
+	/* load key: */
+	movdqu (%rsi), KEY
+	movdqa KEY, STATE1
+	/* load all ones: */
+	pcmpeqd STATE2, STATE2
+	/* load the constants: */
+	movdqa .Lmorus640_const_0, STATE3
+	movdqa .Lmorus640_const_1, STATE4
+
+	/* update 16 times with zero: */
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+	call __morus640_update_zero
+
+	/* xor-in the key again after updates: */
+	pxor KEY, STATE1
+
+	/* store the state: */
+	movdqu STATE0, (0 * 16)(%rdi)
+	movdqu STATE1, (1 * 16)(%rdi)
+	movdqu STATE2, (2 * 16)(%rdi)
+	movdqu STATE3, (3 * 16)(%rdi)
+	movdqu STATE4, (4 * 16)(%rdi)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_morus640_sse2_init)
+
+/*
+ * void crypto_morus640_sse2_ad(void *state, const void *data,
+ *                              unsigned int length);
+ */
+ENTRY(crypto_morus640_sse2_ad)
+	FRAME_BEGIN
+
+	cmp $16, %rdx
+	jb .Lad_out
+
+	/* load the state: */
+	movdqu (0 * 16)(%rdi), STATE0
+	movdqu (1 * 16)(%rdi), STATE1
+	movdqu (2 * 16)(%rdi), STATE2
+	movdqu (3 * 16)(%rdi), STATE3
+	movdqu (4 * 16)(%rdi), STATE4
+
+	mov %rsi, %r8
+	and $0xF, %r8
+	jnz .Lad_u_loop
+
+.align 4
+.Lad_a_loop:
+	movdqa (%rsi), MSG
+	call __morus640_update
+	sub $16, %rdx
+	add $16, %rsi
+	cmp $16, %rdx
+	jge .Lad_a_loop
+
+	jmp .Lad_cont
+.align 4
+.Lad_u_loop:
+	movdqu (%rsi), MSG
+	call __morus640_update
+	sub $16, %rdx
+	add $16, %rsi
+	cmp $16, %rdx
+	jge .Lad_u_loop
+
+.Lad_cont:
+	/* store the state: */
+	movdqu STATE0, (0 * 16)(%rdi)
+	movdqu STATE1, (1 * 16)(%rdi)
+	movdqu STATE2, (2 * 16)(%rdi)
+	movdqu STATE3, (3 * 16)(%rdi)
+	movdqu STATE4, (4 * 16)(%rdi)
+
+.Lad_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_morus640_sse2_ad)
+
+/*
+ * void crypto_morus640_sse2_enc(void *state, const void *src, void *dst,
+ *                               unsigned int length);
+ */
+ENTRY(crypto_morus640_sse2_enc)
+	FRAME_BEGIN
+
+	cmp $16, %rcx
+	jb .Lenc_out
+
+	/* load the state: */
+	movdqu (0 * 16)(%rdi), STATE0
+	movdqu (1 * 16)(%rdi), STATE1
+	movdqu (2 * 16)(%rdi), STATE2
+	movdqu (3 * 16)(%rdi), STATE3
+	movdqu (4 * 16)(%rdi), STATE4
+
+	mov %rsi, %r8
+	or  %rdx, %r8
+	and $0xF, %r8
+	jnz .Lenc_u_loop
+
+.align 4
+.Lenc_a_loop:
+	movdqa (%rsi), MSG
+	movdqa MSG, T0
+	pxor STATE0, T0
+	pshufd $MASK3, STATE1, T1
+	pxor T1, T0
+	movdqa STATE2, T1
+	pand STATE3, T1
+	pxor T1, T0
+	movdqa T0, (%rdx)
+
+	call __morus640_update
+	sub $16, %rcx
+	add $16, %rsi
+	add $16, %rdx
+	cmp $16, %rcx
+	jge .Lenc_a_loop
+
+	jmp .Lenc_cont
+.align 4
+.Lenc_u_loop:
+	movdqu (%rsi), MSG
+	movdqa MSG, T0
+	pxor STATE0, T0
+	pshufd $MASK3, STATE1, T1
+	pxor T1, T0
+	movdqa STATE2, T1
+	pand STATE3, T1
+	pxor T1, T0
+	movdqu T0, (%rdx)
+
+	call __morus640_update
+	sub $16, %rcx
+	add $16, %rsi
+	add $16, %rdx
+	cmp $16, %rcx
+	jge .Lenc_u_loop
+
+.Lenc_cont:
+	/* store the state: */
+	movdqu STATE0, (0 * 16)(%rdi)
+	movdqu STATE1, (1 * 16)(%rdi)
+	movdqu STATE2, (2 * 16)(%rdi)
+	movdqu STATE3, (3 * 16)(%rdi)
+	movdqu STATE4, (4 * 16)(%rdi)
+
+.Lenc_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_morus640_sse2_enc)
+
+/*
+ * void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst,
+ *                                    unsigned int length);
+ */
+ENTRY(crypto_morus640_sse2_enc_tail)
+	FRAME_BEGIN
+
+	/* load the state: */
+	movdqu (0 * 16)(%rdi), STATE0
+	movdqu (1 * 16)(%rdi), STATE1
+	movdqu (2 * 16)(%rdi), STATE2
+	movdqu (3 * 16)(%rdi), STATE3
+	movdqu (4 * 16)(%rdi), STATE4
+
+	/* encrypt message: */
+	call __load_partial
+
+	movdqa MSG, T0
+	pxor STATE0, T0
+	pshufd $MASK3, STATE1, T1
+	pxor T1, T0
+	movdqa STATE2, T1
+	pand STATE3, T1
+	pxor T1, T0
+
+	call __store_partial
+
+	call __morus640_update
+
+	/* store the state: */
+	movdqu STATE0, (0 * 16)(%rdi)
+	movdqu STATE1, (1 * 16)(%rdi)
+	movdqu STATE2, (2 * 16)(%rdi)
+	movdqu STATE3, (3 * 16)(%rdi)
+	movdqu STATE4, (4 * 16)(%rdi)
+
+	FRAME_END
+ENDPROC(crypto_morus640_sse2_enc_tail)
+
+/*
+ * void crypto_morus640_sse2_dec(void *state, const void *src, void *dst,
+ *                               unsigned int length);
+ */
+ENTRY(crypto_morus640_sse2_dec)
+	FRAME_BEGIN
+
+	cmp $16, %rcx
+	jb .Ldec_out
+
+	/* load the state: */
+	movdqu (0 * 16)(%rdi), STATE0
+	movdqu (1 * 16)(%rdi), STATE1
+	movdqu (2 * 16)(%rdi), STATE2
+	movdqu (3 * 16)(%rdi), STATE3
+	movdqu (4 * 16)(%rdi), STATE4
+
+	mov %rsi, %r8
+	or  %rdx, %r8
+	and $0xF, %r8
+	jnz .Ldec_u_loop
+
+.align 4
+.Ldec_a_loop:
+	movdqa (%rsi), MSG
+	pxor STATE0, MSG
+	pshufd $MASK3, STATE1, T0
+	pxor T0, MSG
+	movdqa STATE2, T0
+	pand STATE3, T0
+	pxor T0, MSG
+	movdqa MSG, (%rdx)
+
+	call __morus640_update
+	sub $16, %rcx
+	add $16, %rsi
+	add $16, %rdx
+	cmp $16, %rcx
+	jge .Ldec_a_loop
+
+	jmp .Ldec_cont
+.align 4
+.Ldec_u_loop:
+	movdqu (%rsi), MSG
+	pxor STATE0, MSG
+	pshufd $MASK3, STATE1, T0
+	pxor T0, MSG
+	movdqa STATE2, T0
+	pand STATE3, T0
+	pxor T0, MSG
+	movdqu MSG, (%rdx)
+
+	call __morus640_update
+	sub $16, %rcx
+	add $16, %rsi
+	add $16, %rdx
+	cmp $16, %rcx
+	jge .Ldec_u_loop
+
+.Ldec_cont:
+	/* store the state: */
+	movdqu STATE0, (0 * 16)(%rdi)
+	movdqu STATE1, (1 * 16)(%rdi)
+	movdqu STATE2, (2 * 16)(%rdi)
+	movdqu STATE3, (3 * 16)(%rdi)
+	movdqu STATE4, (4 * 16)(%rdi)
+
+.Ldec_out:
+	FRAME_END
+	ret
+ENDPROC(crypto_morus640_sse2_dec)
+
+/*
+ * void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst,
+ *                                    unsigned int length);
+ */
+ENTRY(crypto_morus640_sse2_dec_tail)
+	FRAME_BEGIN
+
+	/* load the state: */
+	movdqu (0 * 16)(%rdi), STATE0
+	movdqu (1 * 16)(%rdi), STATE1
+	movdqu (2 * 16)(%rdi), STATE2
+	movdqu (3 * 16)(%rdi), STATE3
+	movdqu (4 * 16)(%rdi), STATE4
+
+	/* decrypt message: */
+	call __load_partial
+
+	pxor STATE0, MSG
+	pshufd $MASK3, STATE1, T0
+	pxor T0, MSG
+	movdqa STATE2, T0
+	pand STATE3, T0
+	pxor T0, MSG
+	movdqa MSG, T0
+
+	call __store_partial
+
+	/* mask with byte count: */
+	movq %rcx, T0
+	punpcklbw T0, T0
+	punpcklbw T0, T0
+	punpcklbw T0, T0
+	punpcklbw T0, T0
+	movdqa .Lmorus640_counter, T1
+	pcmpgtb T1, T0
+	pand T0, MSG
+
+	call __morus640_update
+
+	/* store the state: */
+	movdqu STATE0, (0 * 16)(%rdi)
+	movdqu STATE1, (1 * 16)(%rdi)
+	movdqu STATE2, (2 * 16)(%rdi)
+	movdqu STATE3, (3 * 16)(%rdi)
+	movdqu STATE4, (4 * 16)(%rdi)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_morus640_sse2_dec_tail)
+
+/*
+ * void crypto_morus640_sse2_final(void *state, void *tag_xor,
+ *	                           u64 assoclen, u64 cryptlen);
+ */
+ENTRY(crypto_morus640_sse2_final)
+	FRAME_BEGIN
+
+	/* load the state: */
+	movdqu (0 * 16)(%rdi), STATE0
+	movdqu (1 * 16)(%rdi), STATE1
+	movdqu (2 * 16)(%rdi), STATE2
+	movdqu (3 * 16)(%rdi), STATE3
+	movdqu (4 * 16)(%rdi), STATE4
+
+	/* xor state[0] into state[4]: */
+	pxor STATE0, STATE4
+
+	/* prepare length block: */
+	movq %rdx, MSG
+	movq %rcx, T0
+	pslldq $8, T0
+	pxor T0, MSG
+	psllq $3, MSG /* multiply by 8 (to get bit count) */
+
+	/* update state: */
+	call __morus640_update
+	call __morus640_update
+	call __morus640_update
+	call __morus640_update
+	call __morus640_update
+	call __morus640_update
+	call __morus640_update
+	call __morus640_update
+	call __morus640_update
+	call __morus640_update
+
+	/* xor tag: */
+	movdqu (%rsi), MSG
+
+	pxor STATE0, MSG
+	pshufd $MASK3, STATE1, T0
+	pxor T0, MSG
+	movdqa STATE2, T0
+	pand STATE3, T0
+	pxor T0, MSG
+
+	movdqu MSG, (%rsi)
+
+	FRAME_END
+	ret
+ENDPROC(crypto_morus640_sse2_final)
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
new file mode 100644
index 0000000..26b47e2
--- /dev/null
+++ b/arch/x86/crypto/morus640-sse2-glue.c
@@ -0,0 +1,68 @@
+/*
+ * The MORUS-640 Authenticated-Encryption Algorithm
+ *   Glue for SSE2 implementation
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/internal/aead.h>
+#include <crypto/morus640_glue.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/cpu_device_id.h>
+
+asmlinkage void crypto_morus640_sse2_init(void *state, const void *key,
+					  const void *iv);
+asmlinkage void crypto_morus640_sse2_ad(void *state, const void *data,
+					unsigned int length);
+
+asmlinkage void crypto_morus640_sse2_enc(void *state, const void *src,
+					 void *dst, unsigned int length);
+asmlinkage void crypto_morus640_sse2_dec(void *state, const void *src,
+					 void *dst, unsigned int length);
+
+asmlinkage void crypto_morus640_sse2_enc_tail(void *state, const void *src,
+					      void *dst, unsigned int length);
+asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src,
+					      void *dst, unsigned int length);
+
+asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
+					   u64 assoclen, u64 cryptlen);
+
+MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400);
+
+static const struct x86_cpu_id sse2_cpu_id[] = {
+    X86_FEATURE_MATCH(X86_FEATURE_XMM2),
+    {}
+};
+MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id);
+
+static int __init crypto_morus640_sse2_module_init(void)
+{
+	if (!x86_match_cpu(sse2_cpu_id))
+		return -ENODEV;
+
+	return crypto_register_aeads(crypto_morus640_sse2_algs,
+				     ARRAY_SIZE(crypto_morus640_sse2_algs));
+}
+
+static void __exit crypto_morus640_sse2_module_exit(void)
+{
+	crypto_unregister_aeads(crypto_morus640_sse2_algs,
+				ARRAY_SIZE(crypto_morus640_sse2_algs));
+}
+
+module_init(crypto_morus640_sse2_module_init);
+module_exit(crypto_morus640_sse2_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("MORUS-640 AEAD algorithm -- SSE2 implementation");
+MODULE_ALIAS_CRYPTO("morus640");
+MODULE_ALIAS_CRYPTO("morus640-sse2");
diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c
new file mode 100644
index 0000000..7b58fe4
--- /dev/null
+++ b/arch/x86/crypto/morus640_glue.c
@@ -0,0 +1,298 @@
+/*
+ * The MORUS-640 Authenticated-Encryption Algorithm
+ *   Common x86 SIMD glue skeleton
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/cryptd.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/morus640_glue.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <asm/fpu/api.h>
+
+struct morus640_state {
+	struct morus640_block s[MORUS_STATE_BLOCKS];
+};
+
+struct morus640_ops {
+	int (*skcipher_walk_init)(struct skcipher_walk *walk,
+				  struct aead_request *req, bool atomic);
+
+	void (*crypt_blocks)(void *state, const void *src, void *dst,
+			     unsigned int length);
+	void (*crypt_tail)(void *state, const void *src, void *dst,
+			   unsigned int length);
+};
+
+static void crypto_morus640_glue_process_ad(
+		struct morus640_state *state,
+		const struct morus640_glue_ops *ops,
+		struct scatterlist *sg_src, unsigned int assoclen)
+{
+	struct scatter_walk walk;
+	struct morus640_block buf;
+	unsigned int pos = 0;
+
+	scatterwalk_start(&walk, sg_src);
+	while (assoclen != 0) {
+		unsigned int size = scatterwalk_clamp(&walk, assoclen);
+		unsigned int left = size;
+		void *mapped = scatterwalk_map(&walk);
+		const u8 *src = (const u8 *)mapped;
+
+		if (pos + size >= MORUS640_BLOCK_SIZE) {
+			if (pos > 0) {
+				unsigned int fill = MORUS640_BLOCK_SIZE - pos;
+				memcpy(buf.bytes + pos, src, fill);
+				ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
+				pos = 0;
+				left -= fill;
+				src += fill;
+			}
+
+			ops->ad(state, src, left);
+			src += left & ~(MORUS640_BLOCK_SIZE - 1);
+			left &= MORUS640_BLOCK_SIZE - 1;
+		}
+
+		memcpy(buf.bytes + pos, src, left);
+
+		pos += left;
+		assoclen -= size;
+		scatterwalk_unmap(mapped);
+		scatterwalk_advance(&walk, size);
+		scatterwalk_done(&walk, 0, assoclen);
+	}
+
+	if (pos > 0) {
+		memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
+		ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
+	}
+}
+
+static void crypto_morus640_glue_process_crypt(struct morus640_state *state,
+					       struct morus640_ops ops,
+					       struct aead_request *req)
+{
+	struct skcipher_walk walk;
+	u8 *cursor_src, *cursor_dst;
+	unsigned int chunksize, base;
+
+	ops.skcipher_walk_init(&walk, req, false);
+
+	while (walk.nbytes) {
+		cursor_src = walk.src.virt.addr;
+		cursor_dst = walk.dst.virt.addr;
+		chunksize = walk.nbytes;
+
+		ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize);
+
+		base = chunksize & ~(MORUS640_BLOCK_SIZE - 1);
+		cursor_src += base;
+		cursor_dst += base;
+		chunksize &= MORUS640_BLOCK_SIZE - 1;
+
+		if (chunksize > 0)
+			ops.crypt_tail(state, cursor_src, cursor_dst,
+				       chunksize);
+
+		skcipher_walk_done(&walk, 0);
+	}
+}
+
+int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
+				unsigned int keylen)
+{
+	struct morus640_ctx *ctx = crypto_aead_ctx(aead);
+
+	if (keylen != MORUS640_BLOCK_SIZE) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key.bytes, key, MORUS640_BLOCK_SIZE);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_morus640_glue_setkey);
+
+int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm,
+				     unsigned int authsize)
+{
+	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
+}
+EXPORT_SYMBOL_GPL(crypto_morus640_glue_setauthsize);
+
+static void crypto_morus640_glue_crypt(struct aead_request *req,
+				       struct morus640_ops ops,
+				       unsigned int cryptlen,
+				       struct morus640_block *tag_xor)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
+	struct morus640_state state;
+
+	kernel_fpu_begin();
+
+	ctx->ops->init(&state, &ctx->key, req->iv);
+	crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
+	crypto_morus640_glue_process_crypt(&state, ops, req);
+	ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
+
+	kernel_fpu_end();
+}
+
+int crypto_morus640_glue_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
+	struct morus640_ops OPS = {
+		.skcipher_walk_init = skcipher_walk_aead_encrypt,
+		.crypt_blocks = ctx->ops->enc,
+		.crypt_tail = ctx->ops->enc_tail,
+	};
+
+	struct morus640_block tag = {};
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen;
+
+	crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
+
+	scatterwalk_map_and_copy(tag.bytes, req->dst,
+				 req->assoclen + cryptlen, authsize, 1);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_morus640_glue_encrypt);
+
+int crypto_morus640_glue_decrypt(struct aead_request *req)
+{
+	static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
+	struct morus640_ops OPS = {
+		.skcipher_walk_init = skcipher_walk_aead_decrypt,
+		.crypt_blocks = ctx->ops->dec,
+		.crypt_tail = ctx->ops->dec_tail,
+	};
+
+	struct morus640_block tag;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen - authsize;
+
+	scatterwalk_map_and_copy(tag.bytes, req->src,
+				 req->assoclen + cryptlen, authsize, 0);
+
+	crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
+
+	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
+}
+EXPORT_SYMBOL_GPL(crypto_morus640_glue_decrypt);
+
+void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
+				   const struct morus640_glue_ops *ops)
+{
+	struct morus640_ctx *ctx = crypto_aead_ctx(aead);
+	ctx->ops = ops;
+}
+EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops);
+
+int cryptd_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
+				unsigned int keylen)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setkey);
+
+int cryptd_morus640_glue_setauthsize(struct crypto_aead *aead,
+				     unsigned int authsize)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setauthsize);
+
+int cryptd_morus640_glue_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	aead = &cryptd_tfm->base;
+	if (irq_fpu_usable() && (!in_atomic() ||
+				 !cryptd_aead_queued(cryptd_tfm)))
+		aead = cryptd_aead_child(cryptd_tfm);
+
+	aead_request_set_tfm(req, aead);
+
+	return crypto_aead_encrypt(req);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus640_glue_encrypt);
+
+int cryptd_morus640_glue_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	struct cryptd_aead *cryptd_tfm = *ctx;
+
+	aead = &cryptd_tfm->base;
+	if (irq_fpu_usable() && (!in_atomic() ||
+				 !cryptd_aead_queued(cryptd_tfm)))
+		aead = cryptd_aead_child(cryptd_tfm);
+
+	aead_request_set_tfm(req, aead);
+
+	return crypto_aead_decrypt(req);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus640_glue_decrypt);
+
+int cryptd_morus640_glue_init_tfm(struct crypto_aead *aead)
+{
+	struct cryptd_aead *cryptd_tfm;
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+	const char *name = crypto_aead_alg(aead)->base.cra_driver_name;
+	char internal_name[CRYPTO_MAX_ALG_NAME];
+
+	if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name)
+			>= CRYPTO_MAX_ALG_NAME)
+		return -ENAMETOOLONG;
+
+	cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL,
+				       CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+
+	*ctx = cryptd_tfm;
+	crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cryptd_morus640_glue_init_tfm);
+
+void cryptd_morus640_glue_exit_tfm(struct crypto_aead *aead)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_free_aead(*ctx);
+}
+EXPORT_SYMBOL_GPL(cryptd_morus640_glue_exit_tfm);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations");
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
deleted file mode 100644
index 6014b7b..0000000
--- a/arch/x86/crypto/salsa20-i586-asm_32.S
+++ /dev/null
@@ -1,938 +0,0 @@
-# Derived from:
-#	salsa20_pm.s version 20051229
-#	D. J. Bernstein
-#	Public domain.
-
-#include <linux/linkage.h>
-
-.text
-
-# enter salsa20_encrypt_bytes
-ENTRY(salsa20_encrypt_bytes)
-	mov	%esp,%eax
-	and	$31,%eax
-	add	$256,%eax
-	sub	%eax,%esp
-	# eax_stack = eax
-	movl	%eax,80(%esp)
-	# ebx_stack = ebx
-	movl	%ebx,84(%esp)
-	# esi_stack = esi
-	movl	%esi,88(%esp)
-	# edi_stack = edi
-	movl	%edi,92(%esp)
-	# ebp_stack = ebp
-	movl	%ebp,96(%esp)
-	# x = arg1
-	movl	4(%esp,%eax),%edx
-	# m = arg2
-	movl	8(%esp,%eax),%esi
-	# out = arg3
-	movl	12(%esp,%eax),%edi
-	# bytes = arg4
-	movl	16(%esp,%eax),%ebx
-	# bytes -= 0
-	sub	$0,%ebx
-	# goto done if unsigned<=
-	jbe	._done
-._start:
-	# in0 = *(uint32 *) (x + 0)
-	movl	0(%edx),%eax
-	# in1 = *(uint32 *) (x + 4)
-	movl	4(%edx),%ecx
-	# in2 = *(uint32 *) (x + 8)
-	movl	8(%edx),%ebp
-	# j0 = in0
-	movl	%eax,164(%esp)
-	# in3 = *(uint32 *) (x + 12)
-	movl	12(%edx),%eax
-	# j1 = in1
-	movl	%ecx,168(%esp)
-	# in4 = *(uint32 *) (x + 16)
-	movl	16(%edx),%ecx
-	# j2 = in2
-	movl	%ebp,172(%esp)
-	# in5 = *(uint32 *) (x + 20)
-	movl	20(%edx),%ebp
-	# j3 = in3
-	movl	%eax,176(%esp)
-	# in6 = *(uint32 *) (x + 24)
-	movl	24(%edx),%eax
-	# j4 = in4
-	movl	%ecx,180(%esp)
-	# in7 = *(uint32 *) (x + 28)
-	movl	28(%edx),%ecx
-	# j5 = in5
-	movl	%ebp,184(%esp)
-	# in8 = *(uint32 *) (x + 32)
-	movl	32(%edx),%ebp
-	# j6 = in6
-	movl	%eax,188(%esp)
-	# in9 = *(uint32 *) (x + 36)
-	movl	36(%edx),%eax
-	# j7 = in7
-	movl	%ecx,192(%esp)
-	# in10 = *(uint32 *) (x + 40)
-	movl	40(%edx),%ecx
-	# j8 = in8
-	movl	%ebp,196(%esp)
-	# in11 = *(uint32 *) (x + 44)
-	movl	44(%edx),%ebp
-	# j9 = in9
-	movl	%eax,200(%esp)
-	# in12 = *(uint32 *) (x + 48)
-	movl	48(%edx),%eax
-	# j10 = in10
-	movl	%ecx,204(%esp)
-	# in13 = *(uint32 *) (x + 52)
-	movl	52(%edx),%ecx
-	# j11 = in11
-	movl	%ebp,208(%esp)
-	# in14 = *(uint32 *) (x + 56)
-	movl	56(%edx),%ebp
-	# j12 = in12
-	movl	%eax,212(%esp)
-	# in15 = *(uint32 *) (x + 60)
-	movl	60(%edx),%eax
-	# j13 = in13
-	movl	%ecx,216(%esp)
-	# j14 = in14
-	movl	%ebp,220(%esp)
-	# j15 = in15
-	movl	%eax,224(%esp)
-	# x_backup = x
-	movl	%edx,64(%esp)
-._bytesatleast1:
-	#   bytes - 64
-	cmp	$64,%ebx
-	#   goto nocopy if unsigned>=
-	jae	._nocopy
-	#     ctarget = out
-	movl	%edi,228(%esp)
-	#     out = &tmp
-	leal	0(%esp),%edi
-	#     i = bytes
-	mov	%ebx,%ecx
-	#     while (i) { *out++ = *m++; --i }
-	rep	movsb
-	#     out = &tmp
-	leal	0(%esp),%edi
-	#     m = &tmp
-	leal	0(%esp),%esi
-._nocopy:
-	#   out_backup = out
-	movl	%edi,72(%esp)
-	#   m_backup = m
-	movl	%esi,68(%esp)
-	#   bytes_backup = bytes
-	movl	%ebx,76(%esp)
-	#   in0 = j0
-	movl	164(%esp),%eax
-	#   in1 = j1
-	movl	168(%esp),%ecx
-	#   in2 = j2
-	movl	172(%esp),%edx
-	#   in3 = j3
-	movl	176(%esp),%ebx
-	#   x0 = in0
-	movl	%eax,100(%esp)
-	#   x1 = in1
-	movl	%ecx,104(%esp)
-	#   x2 = in2
-	movl	%edx,108(%esp)
-	#   x3 = in3
-	movl	%ebx,112(%esp)
-	#   in4 = j4
-	movl	180(%esp),%eax
-	#   in5 = j5
-	movl	184(%esp),%ecx
-	#   in6 = j6
-	movl	188(%esp),%edx
-	#   in7 = j7
-	movl	192(%esp),%ebx
-	#   x4 = in4
-	movl	%eax,116(%esp)
-	#   x5 = in5
-	movl	%ecx,120(%esp)
-	#   x6 = in6
-	movl	%edx,124(%esp)
-	#   x7 = in7
-	movl	%ebx,128(%esp)
-	#   in8 = j8
-	movl	196(%esp),%eax
-	#   in9 = j9
-	movl	200(%esp),%ecx
-	#   in10 = j10
-	movl	204(%esp),%edx
-	#   in11 = j11
-	movl	208(%esp),%ebx
-	#   x8 = in8
-	movl	%eax,132(%esp)
-	#   x9 = in9
-	movl	%ecx,136(%esp)
-	#   x10 = in10
-	movl	%edx,140(%esp)
-	#   x11 = in11
-	movl	%ebx,144(%esp)
-	#   in12 = j12
-	movl	212(%esp),%eax
-	#   in13 = j13
-	movl	216(%esp),%ecx
-	#   in14 = j14
-	movl	220(%esp),%edx
-	#   in15 = j15
-	movl	224(%esp),%ebx
-	#   x12 = in12
-	movl	%eax,148(%esp)
-	#   x13 = in13
-	movl	%ecx,152(%esp)
-	#   x14 = in14
-	movl	%edx,156(%esp)
-	#   x15 = in15
-	movl	%ebx,160(%esp)
-	#   i = 20
-	mov	$20,%ebp
-	# p = x0
-	movl	100(%esp),%eax
-	# s = x5
-	movl	120(%esp),%ecx
-	# t = x10
-	movl	140(%esp),%edx
-	# w = x15
-	movl	160(%esp),%ebx
-._mainloop:
-	# x0 = p
-	movl	%eax,100(%esp)
-	# 				x10 = t
-	movl	%edx,140(%esp)
-	# p += x12
-	addl	148(%esp),%eax
-	# 		x5 = s
-	movl	%ecx,120(%esp)
-	# 				t += x6
-	addl	124(%esp),%edx
-	# 						x15 = w
-	movl	%ebx,160(%esp)
-	# 		r = x1
-	movl	104(%esp),%esi
-	# 		r += s
-	add	%ecx,%esi
-	# 						v = x11
-	movl	144(%esp),%edi
-	# 						v += w
-	add	%ebx,%edi
-	# p <<<= 7
-	rol	$7,%eax
-	# p ^= x4
-	xorl	116(%esp),%eax
-	# 				t <<<= 7
-	rol	$7,%edx
-	# 				t ^= x14
-	xorl	156(%esp),%edx
-	# 		r <<<= 7
-	rol	$7,%esi
-	# 		r ^= x9
-	xorl	136(%esp),%esi
-	# 						v <<<= 7
-	rol	$7,%edi
-	# 						v ^= x3
-	xorl	112(%esp),%edi
-	# x4 = p
-	movl	%eax,116(%esp)
-	# 				x14 = t
-	movl	%edx,156(%esp)
-	# p += x0
-	addl	100(%esp),%eax
-	# 		x9 = r
-	movl	%esi,136(%esp)
-	# 				t += x10
-	addl	140(%esp),%edx
-	# 						x3 = v
-	movl	%edi,112(%esp)
-	# p <<<= 9
-	rol	$9,%eax
-	# p ^= x8
-	xorl	132(%esp),%eax
-	# 				t <<<= 9
-	rol	$9,%edx
-	# 				t ^= x2
-	xorl	108(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 9
-	rol	$9,%ecx
-	# 		s ^= x13
-	xorl	152(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 9
-	rol	$9,%ebx
-	# 						w ^= x7
-	xorl	128(%esp),%ebx
-	# x8 = p
-	movl	%eax,132(%esp)
-	# 				x2 = t
-	movl	%edx,108(%esp)
-	# p += x4
-	addl	116(%esp),%eax
-	# 		x13 = s
-	movl	%ecx,152(%esp)
-	# 				t += x14
-	addl	156(%esp),%edx
-	# 						x7 = w
-	movl	%ebx,128(%esp)
-	# p <<<= 13
-	rol	$13,%eax
-	# p ^= x12
-	xorl	148(%esp),%eax
-	# 				t <<<= 13
-	rol	$13,%edx
-	# 				t ^= x6
-	xorl	124(%esp),%edx
-	# 		r += s
-	add	%ecx,%esi
-	# 		r <<<= 13
-	rol	$13,%esi
-	# 		r ^= x1
-	xorl	104(%esp),%esi
-	# 						v += w
-	add	%ebx,%edi
-	# 						v <<<= 13
-	rol	$13,%edi
-	# 						v ^= x11
-	xorl	144(%esp),%edi
-	# x12 = p
-	movl	%eax,148(%esp)
-	# 				x6 = t
-	movl	%edx,124(%esp)
-	# p += x8
-	addl	132(%esp),%eax
-	# 		x1 = r
-	movl	%esi,104(%esp)
-	# 				t += x2
-	addl	108(%esp),%edx
-	# 						x11 = v
-	movl	%edi,144(%esp)
-	# p <<<= 18
-	rol	$18,%eax
-	# p ^= x0
-	xorl	100(%esp),%eax
-	# 				t <<<= 18
-	rol	$18,%edx
-	# 				t ^= x10
-	xorl	140(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 18
-	rol	$18,%ecx
-	# 		s ^= x5
-	xorl	120(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 18
-	rol	$18,%ebx
-	# 						w ^= x15
-	xorl	160(%esp),%ebx
-	# x0 = p
-	movl	%eax,100(%esp)
-	# 				x10 = t
-	movl	%edx,140(%esp)
-	# p += x3
-	addl	112(%esp),%eax
-	# p <<<= 7
-	rol	$7,%eax
-	# 		x5 = s
-	movl	%ecx,120(%esp)
-	# 				t += x9
-	addl	136(%esp),%edx
-	# 						x15 = w
-	movl	%ebx,160(%esp)
-	# 		r = x4
-	movl	116(%esp),%esi
-	# 		r += s
-	add	%ecx,%esi
-	# 						v = x14
-	movl	156(%esp),%edi
-	# 						v += w
-	add	%ebx,%edi
-	# p ^= x1
-	xorl	104(%esp),%eax
-	# 				t <<<= 7
-	rol	$7,%edx
-	# 				t ^= x11
-	xorl	144(%esp),%edx
-	# 		r <<<= 7
-	rol	$7,%esi
-	# 		r ^= x6
-	xorl	124(%esp),%esi
-	# 						v <<<= 7
-	rol	$7,%edi
-	# 						v ^= x12
-	xorl	148(%esp),%edi
-	# x1 = p
-	movl	%eax,104(%esp)
-	# 				x11 = t
-	movl	%edx,144(%esp)
-	# p += x0
-	addl	100(%esp),%eax
-	# 		x6 = r
-	movl	%esi,124(%esp)
-	# 				t += x10
-	addl	140(%esp),%edx
-	# 						x12 = v
-	movl	%edi,148(%esp)
-	# p <<<= 9
-	rol	$9,%eax
-	# p ^= x2
-	xorl	108(%esp),%eax
-	# 				t <<<= 9
-	rol	$9,%edx
-	# 				t ^= x8
-	xorl	132(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 9
-	rol	$9,%ecx
-	# 		s ^= x7
-	xorl	128(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 9
-	rol	$9,%ebx
-	# 						w ^= x13
-	xorl	152(%esp),%ebx
-	# x2 = p
-	movl	%eax,108(%esp)
-	# 				x8 = t
-	movl	%edx,132(%esp)
-	# p += x1
-	addl	104(%esp),%eax
-	# 		x7 = s
-	movl	%ecx,128(%esp)
-	# 				t += x11
-	addl	144(%esp),%edx
-	# 						x13 = w
-	movl	%ebx,152(%esp)
-	# p <<<= 13
-	rol	$13,%eax
-	# p ^= x3
-	xorl	112(%esp),%eax
-	# 				t <<<= 13
-	rol	$13,%edx
-	# 				t ^= x9
-	xorl	136(%esp),%edx
-	# 		r += s
-	add	%ecx,%esi
-	# 		r <<<= 13
-	rol	$13,%esi
-	# 		r ^= x4
-	xorl	116(%esp),%esi
-	# 						v += w
-	add	%ebx,%edi
-	# 						v <<<= 13
-	rol	$13,%edi
-	# 						v ^= x14
-	xorl	156(%esp),%edi
-	# x3 = p
-	movl	%eax,112(%esp)
-	# 				x9 = t
-	movl	%edx,136(%esp)
-	# p += x2
-	addl	108(%esp),%eax
-	# 		x4 = r
-	movl	%esi,116(%esp)
-	# 				t += x8
-	addl	132(%esp),%edx
-	# 						x14 = v
-	movl	%edi,156(%esp)
-	# p <<<= 18
-	rol	$18,%eax
-	# p ^= x0
-	xorl	100(%esp),%eax
-	# 				t <<<= 18
-	rol	$18,%edx
-	# 				t ^= x10
-	xorl	140(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 18
-	rol	$18,%ecx
-	# 		s ^= x5
-	xorl	120(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 18
-	rol	$18,%ebx
-	# 						w ^= x15
-	xorl	160(%esp),%ebx
-	# x0 = p
-	movl	%eax,100(%esp)
-	# 				x10 = t
-	movl	%edx,140(%esp)
-	# p += x12
-	addl	148(%esp),%eax
-	# 		x5 = s
-	movl	%ecx,120(%esp)
-	# 				t += x6
-	addl	124(%esp),%edx
-	# 						x15 = w
-	movl	%ebx,160(%esp)
-	# 		r = x1
-	movl	104(%esp),%esi
-	# 		r += s
-	add	%ecx,%esi
-	# 						v = x11
-	movl	144(%esp),%edi
-	# 						v += w
-	add	%ebx,%edi
-	# p <<<= 7
-	rol	$7,%eax
-	# p ^= x4
-	xorl	116(%esp),%eax
-	# 				t <<<= 7
-	rol	$7,%edx
-	# 				t ^= x14
-	xorl	156(%esp),%edx
-	# 		r <<<= 7
-	rol	$7,%esi
-	# 		r ^= x9
-	xorl	136(%esp),%esi
-	# 						v <<<= 7
-	rol	$7,%edi
-	# 						v ^= x3
-	xorl	112(%esp),%edi
-	# x4 = p
-	movl	%eax,116(%esp)
-	# 				x14 = t
-	movl	%edx,156(%esp)
-	# p += x0
-	addl	100(%esp),%eax
-	# 		x9 = r
-	movl	%esi,136(%esp)
-	# 				t += x10
-	addl	140(%esp),%edx
-	# 						x3 = v
-	movl	%edi,112(%esp)
-	# p <<<= 9
-	rol	$9,%eax
-	# p ^= x8
-	xorl	132(%esp),%eax
-	# 				t <<<= 9
-	rol	$9,%edx
-	# 				t ^= x2
-	xorl	108(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 9
-	rol	$9,%ecx
-	# 		s ^= x13
-	xorl	152(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 9
-	rol	$9,%ebx
-	# 						w ^= x7
-	xorl	128(%esp),%ebx
-	# x8 = p
-	movl	%eax,132(%esp)
-	# 				x2 = t
-	movl	%edx,108(%esp)
-	# p += x4
-	addl	116(%esp),%eax
-	# 		x13 = s
-	movl	%ecx,152(%esp)
-	# 				t += x14
-	addl	156(%esp),%edx
-	# 						x7 = w
-	movl	%ebx,128(%esp)
-	# p <<<= 13
-	rol	$13,%eax
-	# p ^= x12
-	xorl	148(%esp),%eax
-	# 				t <<<= 13
-	rol	$13,%edx
-	# 				t ^= x6
-	xorl	124(%esp),%edx
-	# 		r += s
-	add	%ecx,%esi
-	# 		r <<<= 13
-	rol	$13,%esi
-	# 		r ^= x1
-	xorl	104(%esp),%esi
-	# 						v += w
-	add	%ebx,%edi
-	# 						v <<<= 13
-	rol	$13,%edi
-	# 						v ^= x11
-	xorl	144(%esp),%edi
-	# x12 = p
-	movl	%eax,148(%esp)
-	# 				x6 = t
-	movl	%edx,124(%esp)
-	# p += x8
-	addl	132(%esp),%eax
-	# 		x1 = r
-	movl	%esi,104(%esp)
-	# 				t += x2
-	addl	108(%esp),%edx
-	# 						x11 = v
-	movl	%edi,144(%esp)
-	# p <<<= 18
-	rol	$18,%eax
-	# p ^= x0
-	xorl	100(%esp),%eax
-	# 				t <<<= 18
-	rol	$18,%edx
-	# 				t ^= x10
-	xorl	140(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 18
-	rol	$18,%ecx
-	# 		s ^= x5
-	xorl	120(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 18
-	rol	$18,%ebx
-	# 						w ^= x15
-	xorl	160(%esp),%ebx
-	# x0 = p
-	movl	%eax,100(%esp)
-	# 				x10 = t
-	movl	%edx,140(%esp)
-	# p += x3
-	addl	112(%esp),%eax
-	# p <<<= 7
-	rol	$7,%eax
-	# 		x5 = s
-	movl	%ecx,120(%esp)
-	# 				t += x9
-	addl	136(%esp),%edx
-	# 						x15 = w
-	movl	%ebx,160(%esp)
-	# 		r = x4
-	movl	116(%esp),%esi
-	# 		r += s
-	add	%ecx,%esi
-	# 						v = x14
-	movl	156(%esp),%edi
-	# 						v += w
-	add	%ebx,%edi
-	# p ^= x1
-	xorl	104(%esp),%eax
-	# 				t <<<= 7
-	rol	$7,%edx
-	# 				t ^= x11
-	xorl	144(%esp),%edx
-	# 		r <<<= 7
-	rol	$7,%esi
-	# 		r ^= x6
-	xorl	124(%esp),%esi
-	# 						v <<<= 7
-	rol	$7,%edi
-	# 						v ^= x12
-	xorl	148(%esp),%edi
-	# x1 = p
-	movl	%eax,104(%esp)
-	# 				x11 = t
-	movl	%edx,144(%esp)
-	# p += x0
-	addl	100(%esp),%eax
-	# 		x6 = r
-	movl	%esi,124(%esp)
-	# 				t += x10
-	addl	140(%esp),%edx
-	# 						x12 = v
-	movl	%edi,148(%esp)
-	# p <<<= 9
-	rol	$9,%eax
-	# p ^= x2
-	xorl	108(%esp),%eax
-	# 				t <<<= 9
-	rol	$9,%edx
-	# 				t ^= x8
-	xorl	132(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 9
-	rol	$9,%ecx
-	# 		s ^= x7
-	xorl	128(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 9
-	rol	$9,%ebx
-	# 						w ^= x13
-	xorl	152(%esp),%ebx
-	# x2 = p
-	movl	%eax,108(%esp)
-	# 				x8 = t
-	movl	%edx,132(%esp)
-	# p += x1
-	addl	104(%esp),%eax
-	# 		x7 = s
-	movl	%ecx,128(%esp)
-	# 				t += x11
-	addl	144(%esp),%edx
-	# 						x13 = w
-	movl	%ebx,152(%esp)
-	# p <<<= 13
-	rol	$13,%eax
-	# p ^= x3
-	xorl	112(%esp),%eax
-	# 				t <<<= 13
-	rol	$13,%edx
-	# 				t ^= x9
-	xorl	136(%esp),%edx
-	# 		r += s
-	add	%ecx,%esi
-	# 		r <<<= 13
-	rol	$13,%esi
-	# 		r ^= x4
-	xorl	116(%esp),%esi
-	# 						v += w
-	add	%ebx,%edi
-	# 						v <<<= 13
-	rol	$13,%edi
-	# 						v ^= x14
-	xorl	156(%esp),%edi
-	# x3 = p
-	movl	%eax,112(%esp)
-	# 				x9 = t
-	movl	%edx,136(%esp)
-	# p += x2
-	addl	108(%esp),%eax
-	# 		x4 = r
-	movl	%esi,116(%esp)
-	# 				t += x8
-	addl	132(%esp),%edx
-	# 						x14 = v
-	movl	%edi,156(%esp)
-	# p <<<= 18
-	rol	$18,%eax
-	# p ^= x0
-	xorl	100(%esp),%eax
-	# 				t <<<= 18
-	rol	$18,%edx
-	# 				t ^= x10
-	xorl	140(%esp),%edx
-	# 		s += r
-	add	%esi,%ecx
-	# 		s <<<= 18
-	rol	$18,%ecx
-	# 		s ^= x5
-	xorl	120(%esp),%ecx
-	# 						w += v
-	add	%edi,%ebx
-	# 						w <<<= 18
-	rol	$18,%ebx
-	# 						w ^= x15
-	xorl	160(%esp),%ebx
-	# i -= 4
-	sub	$4,%ebp
-	# goto mainloop if unsigned >
-	ja	._mainloop
-	# x0 = p
-	movl	%eax,100(%esp)
-	# x5 = s
-	movl	%ecx,120(%esp)
-	# x10 = t
-	movl	%edx,140(%esp)
-	# x15 = w
-	movl	%ebx,160(%esp)
-	#   out = out_backup
-	movl	72(%esp),%edi
-	#   m = m_backup
-	movl	68(%esp),%esi
-	#   in0 = x0
-	movl	100(%esp),%eax
-	#   in1 = x1
-	movl	104(%esp),%ecx
-	#   in0 += j0
-	addl	164(%esp),%eax
-	#   in1 += j1
-	addl	168(%esp),%ecx
-	#   in0 ^= *(uint32 *) (m + 0)
-	xorl	0(%esi),%eax
-	#   in1 ^= *(uint32 *) (m + 4)
-	xorl	4(%esi),%ecx
-	#   *(uint32 *) (out + 0) = in0
-	movl	%eax,0(%edi)
-	#   *(uint32 *) (out + 4) = in1
-	movl	%ecx,4(%edi)
-	#   in2 = x2
-	movl	108(%esp),%eax
-	#   in3 = x3
-	movl	112(%esp),%ecx
-	#   in2 += j2
-	addl	172(%esp),%eax
-	#   in3 += j3
-	addl	176(%esp),%ecx
-	#   in2 ^= *(uint32 *) (m + 8)
-	xorl	8(%esi),%eax
-	#   in3 ^= *(uint32 *) (m + 12)
-	xorl	12(%esi),%ecx
-	#   *(uint32 *) (out + 8) = in2
-	movl	%eax,8(%edi)
-	#   *(uint32 *) (out + 12) = in3
-	movl	%ecx,12(%edi)
-	#   in4 = x4
-	movl	116(%esp),%eax
-	#   in5 = x5
-	movl	120(%esp),%ecx
-	#   in4 += j4
-	addl	180(%esp),%eax
-	#   in5 += j5
-	addl	184(%esp),%ecx
-	#   in4 ^= *(uint32 *) (m + 16)
-	xorl	16(%esi),%eax
-	#   in5 ^= *(uint32 *) (m + 20)
-	xorl	20(%esi),%ecx
-	#   *(uint32 *) (out + 16) = in4
-	movl	%eax,16(%edi)
-	#   *(uint32 *) (out + 20) = in5
-	movl	%ecx,20(%edi)
-	#   in6 = x6
-	movl	124(%esp),%eax
-	#   in7 = x7
-	movl	128(%esp),%ecx
-	#   in6 += j6
-	addl	188(%esp),%eax
-	#   in7 += j7
-	addl	192(%esp),%ecx
-	#   in6 ^= *(uint32 *) (m + 24)
-	xorl	24(%esi),%eax
-	#   in7 ^= *(uint32 *) (m + 28)
-	xorl	28(%esi),%ecx
-	#   *(uint32 *) (out + 24) = in6
-	movl	%eax,24(%edi)
-	#   *(uint32 *) (out + 28) = in7
-	movl	%ecx,28(%edi)
-	#   in8 = x8
-	movl	132(%esp),%eax
-	#   in9 = x9
-	movl	136(%esp),%ecx
-	#   in8 += j8
-	addl	196(%esp),%eax
-	#   in9 += j9
-	addl	200(%esp),%ecx
-	#   in8 ^= *(uint32 *) (m + 32)
-	xorl	32(%esi),%eax
-	#   in9 ^= *(uint32 *) (m + 36)
-	xorl	36(%esi),%ecx
-	#   *(uint32 *) (out + 32) = in8
-	movl	%eax,32(%edi)
-	#   *(uint32 *) (out + 36) = in9
-	movl	%ecx,36(%edi)
-	#   in10 = x10
-	movl	140(%esp),%eax
-	#   in11 = x11
-	movl	144(%esp),%ecx
-	#   in10 += j10
-	addl	204(%esp),%eax
-	#   in11 += j11
-	addl	208(%esp),%ecx
-	#   in10 ^= *(uint32 *) (m + 40)
-	xorl	40(%esi),%eax
-	#   in11 ^= *(uint32 *) (m + 44)
-	xorl	44(%esi),%ecx
-	#   *(uint32 *) (out + 40) = in10
-	movl	%eax,40(%edi)
-	#   *(uint32 *) (out + 44) = in11
-	movl	%ecx,44(%edi)
-	#   in12 = x12
-	movl	148(%esp),%eax
-	#   in13 = x13
-	movl	152(%esp),%ecx
-	#   in12 += j12
-	addl	212(%esp),%eax
-	#   in13 += j13
-	addl	216(%esp),%ecx
-	#   in12 ^= *(uint32 *) (m + 48)
-	xorl	48(%esi),%eax
-	#   in13 ^= *(uint32 *) (m + 52)
-	xorl	52(%esi),%ecx
-	#   *(uint32 *) (out + 48) = in12
-	movl	%eax,48(%edi)
-	#   *(uint32 *) (out + 52) = in13
-	movl	%ecx,52(%edi)
-	#   in14 = x14
-	movl	156(%esp),%eax
-	#   in15 = x15
-	movl	160(%esp),%ecx
-	#   in14 += j14
-	addl	220(%esp),%eax
-	#   in15 += j15
-	addl	224(%esp),%ecx
-	#   in14 ^= *(uint32 *) (m + 56)
-	xorl	56(%esi),%eax
-	#   in15 ^= *(uint32 *) (m + 60)
-	xorl	60(%esi),%ecx
-	#   *(uint32 *) (out + 56) = in14
-	movl	%eax,56(%edi)
-	#   *(uint32 *) (out + 60) = in15
-	movl	%ecx,60(%edi)
-	#   bytes = bytes_backup
-	movl	76(%esp),%ebx
-	#   in8 = j8
-	movl	196(%esp),%eax
-	#   in9 = j9
-	movl	200(%esp),%ecx
-	#   in8 += 1
-	add	$1,%eax
-	#   in9 += 0 + carry
-	adc	$0,%ecx
-	#   j8 = in8
-	movl	%eax,196(%esp)
-	#   j9 = in9
-	movl	%ecx,200(%esp)
-	#   bytes - 64
-	cmp	$64,%ebx
-	#   goto bytesatleast65 if unsigned>
-	ja	._bytesatleast65
-	#     goto bytesatleast64 if unsigned>=
-	jae	._bytesatleast64
-	#       m = out
-	mov	%edi,%esi
-	#       out = ctarget
-	movl	228(%esp),%edi
-	#       i = bytes
-	mov	%ebx,%ecx
-	#       while (i) { *out++ = *m++; --i }
-	rep	movsb
-._bytesatleast64:
-	#     x = x_backup
-	movl	64(%esp),%eax
-	#     in8 = j8
-	movl	196(%esp),%ecx
-	#     in9 = j9
-	movl	200(%esp),%edx
-	#     *(uint32 *) (x + 32) = in8
-	movl	%ecx,32(%eax)
-	#     *(uint32 *) (x + 36) = in9
-	movl	%edx,36(%eax)
-._done:
-	#     eax = eax_stack
-	movl	80(%esp),%eax
-	#     ebx = ebx_stack
-	movl	84(%esp),%ebx
-	#     esi = esi_stack
-	movl	88(%esp),%esi
-	#     edi = edi_stack
-	movl	92(%esp),%edi
-	#     ebp = ebp_stack
-	movl	96(%esp),%ebp
-	#     leave
-	add	%eax,%esp
-	ret
-._bytesatleast65:
-	#   bytes -= 64
-	sub	$64,%ebx
-	#   out += 64
-	add	$64,%edi
-	#   m += 64
-	add	$64,%esi
-	# goto bytesatleast1
-	jmp	._bytesatleast1
-ENDPROC(salsa20_encrypt_bytes)
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S
deleted file mode 100644
index 03a4918..0000000
--- a/arch/x86/crypto/salsa20-x86_64-asm_64.S
+++ /dev/null
@@ -1,805 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-# enter salsa20_encrypt_bytes
-ENTRY(salsa20_encrypt_bytes)
-	mov	%rsp,%r11
-	and	$31,%r11
-	add	$256,%r11
-	sub	%r11,%rsp
-	# x = arg1
-	mov	%rdi,%r8
-	# m = arg2
-	mov	%rsi,%rsi
-	# out = arg3
-	mov	%rdx,%rdi
-	# bytes = arg4
-	mov	%rcx,%rdx
-	#               unsigned>? bytes - 0
-	cmp	$0,%rdx
-	# comment:fp stack unchanged by jump
-	# goto done if !unsigned>
-	jbe	._done
-	# comment:fp stack unchanged by fallthrough
-# start:
-._start:
-	# r11_stack = r11
-	movq	%r11,0(%rsp)
-	# r12_stack = r12
-	movq	%r12,8(%rsp)
-	# r13_stack = r13
-	movq	%r13,16(%rsp)
-	# r14_stack = r14
-	movq	%r14,24(%rsp)
-	# r15_stack = r15
-	movq	%r15,32(%rsp)
-	# rbx_stack = rbx
-	movq	%rbx,40(%rsp)
-	# rbp_stack = rbp
-	movq	%rbp,48(%rsp)
-	# in0 = *(uint64 *) (x + 0)
-	movq	0(%r8),%rcx
-	# in2 = *(uint64 *) (x + 8)
-	movq	8(%r8),%r9
-	# in4 = *(uint64 *) (x + 16)
-	movq	16(%r8),%rax
-	# in6 = *(uint64 *) (x + 24)
-	movq	24(%r8),%r10
-	# in8 = *(uint64 *) (x + 32)
-	movq	32(%r8),%r11
-	# in10 = *(uint64 *) (x + 40)
-	movq	40(%r8),%r12
-	# in12 = *(uint64 *) (x + 48)
-	movq	48(%r8),%r13
-	# in14 = *(uint64 *) (x + 56)
-	movq	56(%r8),%r14
-	# j0 = in0
-	movq	%rcx,56(%rsp)
-	# j2 = in2
-	movq	%r9,64(%rsp)
-	# j4 = in4
-	movq	%rax,72(%rsp)
-	# j6 = in6
-	movq	%r10,80(%rsp)
-	# j8 = in8
-	movq	%r11,88(%rsp)
-	# j10 = in10
-	movq	%r12,96(%rsp)
-	# j12 = in12
-	movq	%r13,104(%rsp)
-	# j14 = in14
-	movq	%r14,112(%rsp)
-	# x_backup = x
-	movq	%r8,120(%rsp)
-# bytesatleast1:
-._bytesatleast1:
-	#                   unsigned<? bytes - 64
-	cmp	$64,%rdx
-	# comment:fp stack unchanged by jump
-	#   goto nocopy if !unsigned<
-	jae	._nocopy
-	#     ctarget = out
-	movq	%rdi,128(%rsp)
-	#     out = &tmp
-	leaq	192(%rsp),%rdi
-	#     i = bytes
-	mov	%rdx,%rcx
-	#     while (i) { *out++ = *m++; --i }
-	rep	movsb
-	#     out = &tmp
-	leaq	192(%rsp),%rdi
-	#     m = &tmp
-	leaq	192(%rsp),%rsi
-	# comment:fp stack unchanged by fallthrough
-#   nocopy:
-._nocopy:
-	#   out_backup = out
-	movq	%rdi,136(%rsp)
-	#   m_backup = m
-	movq	%rsi,144(%rsp)
-	#   bytes_backup = bytes
-	movq	%rdx,152(%rsp)
-	#   x1 = j0
-	movq	56(%rsp),%rdi
-	#   x0 = x1
-	mov	%rdi,%rdx
-	#   (uint64) x1 >>= 32
-	shr	$32,%rdi
-	#   		x3 = j2
-	movq	64(%rsp),%rsi
-	#   		x2 = x3
-	mov	%rsi,%rcx
-	#   		(uint64) x3 >>= 32
-	shr	$32,%rsi
-	#   x5 = j4
-	movq	72(%rsp),%r8
-	#   x4 = x5
-	mov	%r8,%r9
-	#   (uint64) x5 >>= 32
-	shr	$32,%r8
-	#   x5_stack = x5
-	movq	%r8,160(%rsp)
-	#   		x7 = j6
-	movq	80(%rsp),%r8
-	#   		x6 = x7
-	mov	%r8,%rax
-	#   		(uint64) x7 >>= 32
-	shr	$32,%r8
-	#   x9 = j8
-	movq	88(%rsp),%r10
-	#   x8 = x9
-	mov	%r10,%r11
-	#   (uint64) x9 >>= 32
-	shr	$32,%r10
-	#   		x11 = j10
-	movq	96(%rsp),%r12
-	#   		x10 = x11
-	mov	%r12,%r13
-	#   		x10_stack = x10
-	movq	%r13,168(%rsp)
-	#   		(uint64) x11 >>= 32
-	shr	$32,%r12
-	#   x13 = j12
-	movq	104(%rsp),%r13
-	#   x12 = x13
-	mov	%r13,%r14
-	#   (uint64) x13 >>= 32
-	shr	$32,%r13
-	#   		x15 = j14
-	movq	112(%rsp),%r15
-	#   		x14 = x15
-	mov	%r15,%rbx
-	#   		(uint64) x15 >>= 32
-	shr	$32,%r15
-	#   		x15_stack = x15
-	movq	%r15,176(%rsp)
-	#   i = 20
-	mov	$20,%r15
-#   mainloop:
-._mainloop:
-	#   i_backup = i
-	movq	%r15,184(%rsp)
-	# 		x5 = x5_stack
-	movq	160(%rsp),%r15
-	# a = x12 + x0
-	lea	(%r14,%rdx),%rbp
-	# (uint32) a <<<= 7
-	rol	$7,%ebp
-	# x4 ^= a
-	xor	%rbp,%r9
-	# 		b = x1 + x5
-	lea	(%rdi,%r15),%rbp
-	# 		(uint32) b <<<= 7
-	rol	$7,%ebp
-	# 		x9 ^= b
-	xor	%rbp,%r10
-	# a = x0 + x4
-	lea	(%rdx,%r9),%rbp
-	# (uint32) a <<<= 9
-	rol	$9,%ebp
-	# x8 ^= a
-	xor	%rbp,%r11
-	# 		b = x5 + x9
-	lea	(%r15,%r10),%rbp
-	# 		(uint32) b <<<= 9
-	rol	$9,%ebp
-	# 		x13 ^= b
-	xor	%rbp,%r13
-	# a = x4 + x8
-	lea	(%r9,%r11),%rbp
-	# (uint32) a <<<= 13
-	rol	$13,%ebp
-	# x12 ^= a
-	xor	%rbp,%r14
-	# 		b = x9 + x13
-	lea	(%r10,%r13),%rbp
-	# 		(uint32) b <<<= 13
-	rol	$13,%ebp
-	# 		x1 ^= b
-	xor	%rbp,%rdi
-	# a = x8 + x12
-	lea	(%r11,%r14),%rbp
-	# (uint32) a <<<= 18
-	rol	$18,%ebp
-	# x0 ^= a
-	xor	%rbp,%rdx
-	# 		b = x13 + x1
-	lea	(%r13,%rdi),%rbp
-	# 		(uint32) b <<<= 18
-	rol	$18,%ebp
-	# 		x5 ^= b
-	xor	%rbp,%r15
-	# 				x10 = x10_stack
-	movq	168(%rsp),%rbp
-	# 		x5_stack = x5
-	movq	%r15,160(%rsp)
-	# 				c = x6 + x10
-	lea	(%rax,%rbp),%r15
-	# 				(uint32) c <<<= 7
-	rol	$7,%r15d
-	# 				x14 ^= c
-	xor	%r15,%rbx
-	# 				c = x10 + x14
-	lea	(%rbp,%rbx),%r15
-	# 				(uint32) c <<<= 9
-	rol	$9,%r15d
-	# 				x2 ^= c
-	xor	%r15,%rcx
-	# 				c = x14 + x2
-	lea	(%rbx,%rcx),%r15
-	# 				(uint32) c <<<= 13
-	rol	$13,%r15d
-	# 				x6 ^= c
-	xor	%r15,%rax
-	# 				c = x2 + x6
-	lea	(%rcx,%rax),%r15
-	# 				(uint32) c <<<= 18
-	rol	$18,%r15d
-	# 				x10 ^= c
-	xor	%r15,%rbp
-	# 						x15 = x15_stack
-	movq	176(%rsp),%r15
-	# 				x10_stack = x10
-	movq	%rbp,168(%rsp)
-	# 						d = x11 + x15
-	lea	(%r12,%r15),%rbp
-	# 						(uint32) d <<<= 7
-	rol	$7,%ebp
-	# 						x3 ^= d
-	xor	%rbp,%rsi
-	# 						d = x15 + x3
-	lea	(%r15,%rsi),%rbp
-	# 						(uint32) d <<<= 9
-	rol	$9,%ebp
-	# 						x7 ^= d
-	xor	%rbp,%r8
-	# 						d = x3 + x7
-	lea	(%rsi,%r8),%rbp
-	# 						(uint32) d <<<= 13
-	rol	$13,%ebp
-	# 						x11 ^= d
-	xor	%rbp,%r12
-	# 						d = x7 + x11
-	lea	(%r8,%r12),%rbp
-	# 						(uint32) d <<<= 18
-	rol	$18,%ebp
-	# 						x15 ^= d
-	xor	%rbp,%r15
-	# 						x15_stack = x15
-	movq	%r15,176(%rsp)
-	# 		x5 = x5_stack
-	movq	160(%rsp),%r15
-	# a = x3 + x0
-	lea	(%rsi,%rdx),%rbp
-	# (uint32) a <<<= 7
-	rol	$7,%ebp
-	# x1 ^= a
-	xor	%rbp,%rdi
-	# 		b = x4 + x5
-	lea	(%r9,%r15),%rbp
-	# 		(uint32) b <<<= 7
-	rol	$7,%ebp
-	# 		x6 ^= b
-	xor	%rbp,%rax
-	# a = x0 + x1
-	lea	(%rdx,%rdi),%rbp
-	# (uint32) a <<<= 9
-	rol	$9,%ebp
-	# x2 ^= a
-	xor	%rbp,%rcx
-	# 		b = x5 + x6
-	lea	(%r15,%rax),%rbp
-	# 		(uint32) b <<<= 9
-	rol	$9,%ebp
-	# 		x7 ^= b
-	xor	%rbp,%r8
-	# a = x1 + x2
-	lea	(%rdi,%rcx),%rbp
-	# (uint32) a <<<= 13
-	rol	$13,%ebp
-	# x3 ^= a
-	xor	%rbp,%rsi
-	# 		b = x6 + x7
-	lea	(%rax,%r8),%rbp
-	# 		(uint32) b <<<= 13
-	rol	$13,%ebp
-	# 		x4 ^= b
-	xor	%rbp,%r9
-	# a = x2 + x3
-	lea	(%rcx,%rsi),%rbp
-	# (uint32) a <<<= 18
-	rol	$18,%ebp
-	# x0 ^= a
-	xor	%rbp,%rdx
-	# 		b = x7 + x4
-	lea	(%r8,%r9),%rbp
-	# 		(uint32) b <<<= 18
-	rol	$18,%ebp
-	# 		x5 ^= b
-	xor	%rbp,%r15
-	# 				x10 = x10_stack
-	movq	168(%rsp),%rbp
-	# 		x5_stack = x5
-	movq	%r15,160(%rsp)
-	# 				c = x9 + x10
-	lea	(%r10,%rbp),%r15
-	# 				(uint32) c <<<= 7
-	rol	$7,%r15d
-	# 				x11 ^= c
-	xor	%r15,%r12
-	# 				c = x10 + x11
-	lea	(%rbp,%r12),%r15
-	# 				(uint32) c <<<= 9
-	rol	$9,%r15d
-	# 				x8 ^= c
-	xor	%r15,%r11
-	# 				c = x11 + x8
-	lea	(%r12,%r11),%r15
-	# 				(uint32) c <<<= 13
-	rol	$13,%r15d
-	# 				x9 ^= c
-	xor	%r15,%r10
-	# 				c = x8 + x9
-	lea	(%r11,%r10),%r15
-	# 				(uint32) c <<<= 18
-	rol	$18,%r15d
-	# 				x10 ^= c
-	xor	%r15,%rbp
-	# 						x15 = x15_stack
-	movq	176(%rsp),%r15
-	# 				x10_stack = x10
-	movq	%rbp,168(%rsp)
-	# 						d = x14 + x15
-	lea	(%rbx,%r15),%rbp
-	# 						(uint32) d <<<= 7
-	rol	$7,%ebp
-	# 						x12 ^= d
-	xor	%rbp,%r14
-	# 						d = x15 + x12
-	lea	(%r15,%r14),%rbp
-	# 						(uint32) d <<<= 9
-	rol	$9,%ebp
-	# 						x13 ^= d
-	xor	%rbp,%r13
-	# 						d = x12 + x13
-	lea	(%r14,%r13),%rbp
-	# 						(uint32) d <<<= 13
-	rol	$13,%ebp
-	# 						x14 ^= d
-	xor	%rbp,%rbx
-	# 						d = x13 + x14
-	lea	(%r13,%rbx),%rbp
-	# 						(uint32) d <<<= 18
-	rol	$18,%ebp
-	# 						x15 ^= d
-	xor	%rbp,%r15
-	# 						x15_stack = x15
-	movq	%r15,176(%rsp)
-	# 		x5 = x5_stack
-	movq	160(%rsp),%r15
-	# a = x12 + x0
-	lea	(%r14,%rdx),%rbp
-	# (uint32) a <<<= 7
-	rol	$7,%ebp
-	# x4 ^= a
-	xor	%rbp,%r9
-	# 		b = x1 + x5
-	lea	(%rdi,%r15),%rbp
-	# 		(uint32) b <<<= 7
-	rol	$7,%ebp
-	# 		x9 ^= b
-	xor	%rbp,%r10
-	# a = x0 + x4
-	lea	(%rdx,%r9),%rbp
-	# (uint32) a <<<= 9
-	rol	$9,%ebp
-	# x8 ^= a
-	xor	%rbp,%r11
-	# 		b = x5 + x9
-	lea	(%r15,%r10),%rbp
-	# 		(uint32) b <<<= 9
-	rol	$9,%ebp
-	# 		x13 ^= b
-	xor	%rbp,%r13
-	# a = x4 + x8
-	lea	(%r9,%r11),%rbp
-	# (uint32) a <<<= 13
-	rol	$13,%ebp
-	# x12 ^= a
-	xor	%rbp,%r14
-	# 		b = x9 + x13
-	lea	(%r10,%r13),%rbp
-	# 		(uint32) b <<<= 13
-	rol	$13,%ebp
-	# 		x1 ^= b
-	xor	%rbp,%rdi
-	# a = x8 + x12
-	lea	(%r11,%r14),%rbp
-	# (uint32) a <<<= 18
-	rol	$18,%ebp
-	# x0 ^= a
-	xor	%rbp,%rdx
-	# 		b = x13 + x1
-	lea	(%r13,%rdi),%rbp
-	# 		(uint32) b <<<= 18
-	rol	$18,%ebp
-	# 		x5 ^= b
-	xor	%rbp,%r15
-	# 				x10 = x10_stack
-	movq	168(%rsp),%rbp
-	# 		x5_stack = x5
-	movq	%r15,160(%rsp)
-	# 				c = x6 + x10
-	lea	(%rax,%rbp),%r15
-	# 				(uint32) c <<<= 7
-	rol	$7,%r15d
-	# 				x14 ^= c
-	xor	%r15,%rbx
-	# 				c = x10 + x14
-	lea	(%rbp,%rbx),%r15
-	# 				(uint32) c <<<= 9
-	rol	$9,%r15d
-	# 				x2 ^= c
-	xor	%r15,%rcx
-	# 				c = x14 + x2
-	lea	(%rbx,%rcx),%r15
-	# 				(uint32) c <<<= 13
-	rol	$13,%r15d
-	# 				x6 ^= c
-	xor	%r15,%rax
-	# 				c = x2 + x6
-	lea	(%rcx,%rax),%r15
-	# 				(uint32) c <<<= 18
-	rol	$18,%r15d
-	# 				x10 ^= c
-	xor	%r15,%rbp
-	# 						x15 = x15_stack
-	movq	176(%rsp),%r15
-	# 				x10_stack = x10
-	movq	%rbp,168(%rsp)
-	# 						d = x11 + x15
-	lea	(%r12,%r15),%rbp
-	# 						(uint32) d <<<= 7
-	rol	$7,%ebp
-	# 						x3 ^= d
-	xor	%rbp,%rsi
-	# 						d = x15 + x3
-	lea	(%r15,%rsi),%rbp
-	# 						(uint32) d <<<= 9
-	rol	$9,%ebp
-	# 						x7 ^= d
-	xor	%rbp,%r8
-	# 						d = x3 + x7
-	lea	(%rsi,%r8),%rbp
-	# 						(uint32) d <<<= 13
-	rol	$13,%ebp
-	# 						x11 ^= d
-	xor	%rbp,%r12
-	# 						d = x7 + x11
-	lea	(%r8,%r12),%rbp
-	# 						(uint32) d <<<= 18
-	rol	$18,%ebp
-	# 						x15 ^= d
-	xor	%rbp,%r15
-	# 						x15_stack = x15
-	movq	%r15,176(%rsp)
-	# 		x5 = x5_stack
-	movq	160(%rsp),%r15
-	# a = x3 + x0
-	lea	(%rsi,%rdx),%rbp
-	# (uint32) a <<<= 7
-	rol	$7,%ebp
-	# x1 ^= a
-	xor	%rbp,%rdi
-	# 		b = x4 + x5
-	lea	(%r9,%r15),%rbp
-	# 		(uint32) b <<<= 7
-	rol	$7,%ebp
-	# 		x6 ^= b
-	xor	%rbp,%rax
-	# a = x0 + x1
-	lea	(%rdx,%rdi),%rbp
-	# (uint32) a <<<= 9
-	rol	$9,%ebp
-	# x2 ^= a
-	xor	%rbp,%rcx
-	# 		b = x5 + x6
-	lea	(%r15,%rax),%rbp
-	# 		(uint32) b <<<= 9
-	rol	$9,%ebp
-	# 		x7 ^= b
-	xor	%rbp,%r8
-	# a = x1 + x2
-	lea	(%rdi,%rcx),%rbp
-	# (uint32) a <<<= 13
-	rol	$13,%ebp
-	# x3 ^= a
-	xor	%rbp,%rsi
-	# 		b = x6 + x7
-	lea	(%rax,%r8),%rbp
-	# 		(uint32) b <<<= 13
-	rol	$13,%ebp
-	# 		x4 ^= b
-	xor	%rbp,%r9
-	# a = x2 + x3
-	lea	(%rcx,%rsi),%rbp
-	# (uint32) a <<<= 18
-	rol	$18,%ebp
-	# x0 ^= a
-	xor	%rbp,%rdx
-	# 		b = x7 + x4
-	lea	(%r8,%r9),%rbp
-	# 		(uint32) b <<<= 18
-	rol	$18,%ebp
-	# 		x5 ^= b
-	xor	%rbp,%r15
-	# 				x10 = x10_stack
-	movq	168(%rsp),%rbp
-	# 		x5_stack = x5
-	movq	%r15,160(%rsp)
-	# 				c = x9 + x10
-	lea	(%r10,%rbp),%r15
-	# 				(uint32) c <<<= 7
-	rol	$7,%r15d
-	# 				x11 ^= c
-	xor	%r15,%r12
-	# 				c = x10 + x11
-	lea	(%rbp,%r12),%r15
-	# 				(uint32) c <<<= 9
-	rol	$9,%r15d
-	# 				x8 ^= c
-	xor	%r15,%r11
-	# 				c = x11 + x8
-	lea	(%r12,%r11),%r15
-	# 				(uint32) c <<<= 13
-	rol	$13,%r15d
-	# 				x9 ^= c
-	xor	%r15,%r10
-	# 				c = x8 + x9
-	lea	(%r11,%r10),%r15
-	# 				(uint32) c <<<= 18
-	rol	$18,%r15d
-	# 				x10 ^= c
-	xor	%r15,%rbp
-	# 						x15 = x15_stack
-	movq	176(%rsp),%r15
-	# 				x10_stack = x10
-	movq	%rbp,168(%rsp)
-	# 						d = x14 + x15
-	lea	(%rbx,%r15),%rbp
-	# 						(uint32) d <<<= 7
-	rol	$7,%ebp
-	# 						x12 ^= d
-	xor	%rbp,%r14
-	# 						d = x15 + x12
-	lea	(%r15,%r14),%rbp
-	# 						(uint32) d <<<= 9
-	rol	$9,%ebp
-	# 						x13 ^= d
-	xor	%rbp,%r13
-	# 						d = x12 + x13
-	lea	(%r14,%r13),%rbp
-	# 						(uint32) d <<<= 13
-	rol	$13,%ebp
-	# 						x14 ^= d
-	xor	%rbp,%rbx
-	# 						d = x13 + x14
-	lea	(%r13,%rbx),%rbp
-	# 						(uint32) d <<<= 18
-	rol	$18,%ebp
-	# 						x15 ^= d
-	xor	%rbp,%r15
-	# 						x15_stack = x15
-	movq	%r15,176(%rsp)
-	#   i = i_backup
-	movq	184(%rsp),%r15
-	#                  unsigned>? i -= 4
-	sub	$4,%r15
-	# comment:fp stack unchanged by jump
-	# goto mainloop if unsigned>
-	ja	._mainloop
-	#   (uint32) x2 += j2
-	addl	64(%rsp),%ecx
-	#   x3 <<= 32
-	shl	$32,%rsi
-	#   x3 += j2
-	addq	64(%rsp),%rsi
-	#   (uint64) x3 >>= 32
-	shr	$32,%rsi
-	#   x3 <<= 32
-	shl	$32,%rsi
-	#   x2 += x3
-	add	%rsi,%rcx
-	#   (uint32) x6 += j6
-	addl	80(%rsp),%eax
-	#   x7 <<= 32
-	shl	$32,%r8
-	#   x7 += j6
-	addq	80(%rsp),%r8
-	#   (uint64) x7 >>= 32
-	shr	$32,%r8
-	#   x7 <<= 32
-	shl	$32,%r8
-	#   x6 += x7
-	add	%r8,%rax
-	#   (uint32) x8 += j8
-	addl	88(%rsp),%r11d
-	#   x9 <<= 32
-	shl	$32,%r10
-	#   x9 += j8
-	addq	88(%rsp),%r10
-	#   (uint64) x9 >>= 32
-	shr	$32,%r10
-	#   x9 <<= 32
-	shl	$32,%r10
-	#   x8 += x9
-	add	%r10,%r11
-	#   (uint32) x12 += j12
-	addl	104(%rsp),%r14d
-	#   x13 <<= 32
-	shl	$32,%r13
-	#   x13 += j12
-	addq	104(%rsp),%r13
-	#   (uint64) x13 >>= 32
-	shr	$32,%r13
-	#   x13 <<= 32
-	shl	$32,%r13
-	#   x12 += x13
-	add	%r13,%r14
-	#   (uint32) x0 += j0
-	addl	56(%rsp),%edx
-	#   x1 <<= 32
-	shl	$32,%rdi
-	#   x1 += j0
-	addq	56(%rsp),%rdi
-	#   (uint64) x1 >>= 32
-	shr	$32,%rdi
-	#   x1 <<= 32
-	shl	$32,%rdi
-	#   x0 += x1
-	add	%rdi,%rdx
-	#   x5 = x5_stack
-	movq	160(%rsp),%rdi
-	#   (uint32) x4 += j4
-	addl	72(%rsp),%r9d
-	#   x5 <<= 32
-	shl	$32,%rdi
-	#   x5 += j4
-	addq	72(%rsp),%rdi
-	#   (uint64) x5 >>= 32
-	shr	$32,%rdi
-	#   x5 <<= 32
-	shl	$32,%rdi
-	#   x4 += x5
-	add	%rdi,%r9
-	#   x10 = x10_stack
-	movq	168(%rsp),%r8
-	#   (uint32) x10 += j10
-	addl	96(%rsp),%r8d
-	#   x11 <<= 32
-	shl	$32,%r12
-	#   x11 += j10
-	addq	96(%rsp),%r12
-	#   (uint64) x11 >>= 32
-	shr	$32,%r12
-	#   x11 <<= 32
-	shl	$32,%r12
-	#   x10 += x11
-	add	%r12,%r8
-	#   x15 = x15_stack
-	movq	176(%rsp),%rdi
-	#   (uint32) x14 += j14
-	addl	112(%rsp),%ebx
-	#   x15 <<= 32
-	shl	$32,%rdi
-	#   x15 += j14
-	addq	112(%rsp),%rdi
-	#   (uint64) x15 >>= 32
-	shr	$32,%rdi
-	#   x15 <<= 32
-	shl	$32,%rdi
-	#   x14 += x15
-	add	%rdi,%rbx
-	#   out = out_backup
-	movq	136(%rsp),%rdi
-	#   m = m_backup
-	movq	144(%rsp),%rsi
-	#   x0 ^= *(uint64 *) (m + 0)
-	xorq	0(%rsi),%rdx
-	#   *(uint64 *) (out + 0) = x0
-	movq	%rdx,0(%rdi)
-	#   x2 ^= *(uint64 *) (m + 8)
-	xorq	8(%rsi),%rcx
-	#   *(uint64 *) (out + 8) = x2
-	movq	%rcx,8(%rdi)
-	#   x4 ^= *(uint64 *) (m + 16)
-	xorq	16(%rsi),%r9
-	#   *(uint64 *) (out + 16) = x4
-	movq	%r9,16(%rdi)
-	#   x6 ^= *(uint64 *) (m + 24)
-	xorq	24(%rsi),%rax
-	#   *(uint64 *) (out + 24) = x6
-	movq	%rax,24(%rdi)
-	#   x8 ^= *(uint64 *) (m + 32)
-	xorq	32(%rsi),%r11
-	#   *(uint64 *) (out + 32) = x8
-	movq	%r11,32(%rdi)
-	#   x10 ^= *(uint64 *) (m + 40)
-	xorq	40(%rsi),%r8
-	#   *(uint64 *) (out + 40) = x10
-	movq	%r8,40(%rdi)
-	#   x12 ^= *(uint64 *) (m + 48)
-	xorq	48(%rsi),%r14
-	#   *(uint64 *) (out + 48) = x12
-	movq	%r14,48(%rdi)
-	#   x14 ^= *(uint64 *) (m + 56)
-	xorq	56(%rsi),%rbx
-	#   *(uint64 *) (out + 56) = x14
-	movq	%rbx,56(%rdi)
-	#   bytes = bytes_backup
-	movq	152(%rsp),%rdx
-	#   in8 = j8
-	movq	88(%rsp),%rcx
-	#   in8 += 1
-	add	$1,%rcx
-	#   j8 = in8
-	movq	%rcx,88(%rsp)
-	#                          unsigned>? unsigned<? bytes - 64
-	cmp	$64,%rdx
-	# comment:fp stack unchanged by jump
-	#   goto bytesatleast65 if unsigned>
-	ja	._bytesatleast65
-	# comment:fp stack unchanged by jump
-	#     goto bytesatleast64 if !unsigned<
-	jae	._bytesatleast64
-	#       m = out
-	mov	%rdi,%rsi
-	#       out = ctarget
-	movq	128(%rsp),%rdi
-	#       i = bytes
-	mov	%rdx,%rcx
-	#       while (i) { *out++ = *m++; --i }
-	rep	movsb
-	# comment:fp stack unchanged by fallthrough
-#     bytesatleast64:
-._bytesatleast64:
-	#     x = x_backup
-	movq	120(%rsp),%rdi
-	#     in8 = j8
-	movq	88(%rsp),%rsi
-	#     *(uint64 *) (x + 32) = in8
-	movq	%rsi,32(%rdi)
-	#     r11 = r11_stack
-	movq	0(%rsp),%r11
-	#     r12 = r12_stack
-	movq	8(%rsp),%r12
-	#     r13 = r13_stack
-	movq	16(%rsp),%r13
-	#     r14 = r14_stack
-	movq	24(%rsp),%r14
-	#     r15 = r15_stack
-	movq	32(%rsp),%r15
-	#     rbx = rbx_stack
-	movq	40(%rsp),%rbx
-	#     rbp = rbp_stack
-	movq	48(%rsp),%rbp
-	# comment:fp stack unchanged by fallthrough
-#     done:
-._done:
-	#     leave
-	add	%r11,%rsp
-	mov	%rdi,%rax
-	mov	%rsi,%rdx
-	ret
-#   bytesatleast65:
-._bytesatleast65:
-	#   bytes -= 64
-	sub	$64,%rdx
-	#   out += 64
-	add	$64,%rdi
-	#   m += 64
-	add	$64,%rsi
-	# comment:fp stack unchanged by jump
-	# goto bytesatleast1
-	jmp	._bytesatleast1
-ENDPROC(salsa20_encrypt_bytes)
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
deleted file mode 100644
index b07d7d9..0000000
--- a/arch/x86/crypto/salsa20_glue.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Glue code for optimized assembly version of  Salsa20.
- *
- * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
- *
- * The assembly codes are public domain assembly codes written by Daniel. J.
- * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
- * and to remove extraneous comments and functions that are not needed.
- * - i586 version, renamed as salsa20-i586-asm_32.S
- *   available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
- * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
- *   available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
- *
- * Also modified to set up the initial state using the generic C code rather
- * than in assembly.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <asm/unaligned.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/salsa20.h>
-#include <linux/module.h>
-
-asmlinkage void salsa20_encrypt_bytes(u32 state[16], const u8 *src, u8 *dst,
-				      u32 bytes);
-
-static int salsa20_asm_crypt(struct skcipher_request *req)
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	const struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	u32 state[16];
-	int err;
-
-	err = skcipher_walk_virt(&walk, req, true);
-
-	crypto_salsa20_init(state, ctx, walk.iv);
-
-	while (walk.nbytes > 0) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		salsa20_encrypt_bytes(state, walk.src.virt.addr,
-				      walk.dst.virt.addr, nbytes);
-		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-
-	return err;
-}
-
-static struct skcipher_alg alg = {
-	.base.cra_name		= "salsa20",
-	.base.cra_driver_name	= "salsa20-asm",
-	.base.cra_priority	= 200,
-	.base.cra_blocksize	= 1,
-	.base.cra_ctxsize	= sizeof(struct salsa20_ctx),
-	.base.cra_module	= THIS_MODULE,
-
-	.min_keysize		= SALSA20_MIN_KEY_SIZE,
-	.max_keysize		= SALSA20_MAX_KEY_SIZE,
-	.ivsize			= SALSA20_IV_SIZE,
-	.chunksize		= SALSA20_BLOCK_SIZE,
-	.setkey			= crypto_salsa20_setkey,
-	.encrypt		= salsa20_asm_crypt,
-	.decrypt		= salsa20_asm_crypt,
-};
-
-static int __init init(void)
-{
-	return crypto_register_skcipher(&alg);
-}
-
-static void __exit fini(void)
-{
-	crypto_unregister_skcipher(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
-MODULE_ALIAS_CRYPTO("salsa20");
-MODULE_ALIAS_CRYPTO("salsa20-asm");
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index d6b27da..14a2f99 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -396,3 +396,4 @@
 382	i386	pkey_free		sys_pkey_free			__ia32_sys_pkey_free
 383	i386	statx			sys_statx			__ia32_sys_statx
 384	i386	arch_prctl		sys_arch_prctl			__ia32_compat_sys_arch_prctl
+385	i386	io_pgetevents		sys_io_pgetevents		__ia32_compat_sys_io_pgetevents
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 4dfe426..cd36232 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -341,6 +341,7 @@
 330	common	pkey_alloc		__x64_sys_pkey_alloc
 331	common	pkey_free		__x64_sys_pkey_free
 332	common	statx			__x64_sys_statx
+333	common	io_pgetevents		__x64_sys_io_pgetevents
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index d998a48..261802b 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -44,14 +44,14 @@
 targets += $(vdso_img_cfiles)
 targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
 
-export CPPFLAGS_vdso.lds += -P -C
+CPPFLAGS_vdso.lds += -P -C
 
 VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
 			-Wl,--no-undefined \
 			-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
 			$(DISABLE_LTO)
 
-$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,vdso)
 
 HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
@@ -100,11 +100,8 @@
 			   -Wl,-z,max-page-size=4096 \
 			   -Wl,-z,common-page-size=4096
 
-# 64-bit objects to re-brand as x32
-vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y))
-
 # x32-rebranded versions
-vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o)
+vobjx32s-y := $(vobjs-y:.o=-x32.o)
 
 # same thing, but in the output directory
 vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)
@@ -122,7 +119,7 @@
 $(obj)/%.so: $(obj)/%.so.dbg
 	$(call if_changed,objcopy)
 
-$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
+$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
 	$(call if_changed,vdso)
 
 CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 70b7845..7782cdb 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -107,7 +107,7 @@
 		thread->cr2		= ptr;
 		thread->trap_nr		= X86_TRAP_PF;
 
-		memset(&info, 0, sizeof(info));
+		clear_siginfo(&info);
 		info.si_signo		= SIGSEGV;
 		info.si_errno		= 0;
 		info.si_code		= SEGV_MAPERR;
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 786fd87..4b98101 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -889,7 +889,7 @@
 	if (!ibs_eilvt_valid())
 		goto out;
 
-	pr_info("IBS: LVT offset %d assigned\n", offset);
+	pr_info("LVT offset %d assigned\n", offset);
 
 	return;
 out:
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index f5cbbba..981ba5e 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -19,6 +19,7 @@
 #include <asm/cpufeature.h>
 #include <asm/perf_event.h>
 #include <asm/msr.h>
+#include <asm/smp.h>
 
 #define NUM_COUNTERS_NB		4
 #define NUM_COUNTERS_L2		4
@@ -399,26 +400,8 @@
 	}
 
 	if (amd_uncore_llc) {
-		unsigned int apicid = cpu_data(cpu).apicid;
-		unsigned int nshared, subleaf, prev_eax = 0;
-
 		uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
-		/*
-		 * Iterate over Cache Topology Definition leaves until no
-		 * more cache descriptions are available.
-		 */
-		for (subleaf = 0; subleaf < 5; subleaf++) {
-			cpuid_count(0x8000001d, subleaf, &eax, &ebx, &ecx, &edx);
-
-			/* EAX[0:4] gives type of cache */
-			if (!(eax & 0x1f))
-				break;
-
-			prev_eax = eax;
-		}
-		nshared = ((prev_eax >> 14) & 0xfff) + 1;
-
-		uncore->id = apicid - (apicid % nshared);
+		uncore->id = per_cpu(cpu_llc_id, cpu);
 
 		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
 		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 45b2b1c..6e461fb 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2397,7 +2397,7 @@
 
 #ifdef CONFIG_IA32_EMULATION
 
-#include <asm/compat.h>
+#include <linux/compat.h>
 
 static inline int
 perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 3b99394..8d016ce 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1194,7 +1194,7 @@
 		    filter->action == PERF_ADDR_FILTER_ACTION_START)
 			return -EOPNOTSUPP;
 
-		if (!filter->inode) {
+		if (!filter->path.dentry) {
 			if (!valid_kernel_ip(filter->offset))
 				return -EINVAL;
 
@@ -1221,7 +1221,7 @@
 		return;
 
 	list_for_each_entry(filter, &head->list, entry) {
-		if (filter->inode && !offs[range]) {
+		if (filter->path.dentry && !offs[range]) {
 			msr_a = msr_b = 0;
 		} else {
 			/* apply the offset */
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index a7956fc..15b0737 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -203,7 +203,7 @@
 	hwc->idx = idx;
 	hwc->last_tag = ++box->tags[idx];
 
-	if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
+	if (uncore_pmc_fixed(hwc->idx)) {
 		hwc->event_base = uncore_fixed_ctr(box);
 		hwc->config_base = uncore_fixed_ctl(box);
 		return;
@@ -218,7 +218,9 @@
 	u64 prev_count, new_count, delta;
 	int shift;
 
-	if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
+	if (uncore_pmc_freerunning(event->hw.idx))
+		shift = 64 - uncore_freerunning_bits(box, event);
+	else if (uncore_pmc_fixed(event->hw.idx))
 		shift = 64 - uncore_fixed_ctr_bits(box);
 	else
 		shift = 64 - uncore_perf_ctr_bits(box);
@@ -449,15 +451,30 @@
 	return ret ? -EINVAL : 0;
 }
 
-static void uncore_pmu_event_start(struct perf_event *event, int flags)
+void uncore_pmu_event_start(struct perf_event *event, int flags)
 {
 	struct intel_uncore_box *box = uncore_event_to_box(event);
 	int idx = event->hw.idx;
 
-	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
 		return;
 
-	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
+	/*
+	 * Free running counter is read-only and always active.
+	 * Use the current counter value as start point.
+	 * There is no overflow interrupt for free running counter.
+	 * Use hrtimer to periodically poll the counter to avoid overflow.
+	 */
+	if (uncore_pmc_freerunning(event->hw.idx)) {
+		list_add_tail(&event->active_entry, &box->active_list);
+		local64_set(&event->hw.prev_count,
+			    uncore_read_counter(box, event));
+		if (box->n_active++ == 0)
+			uncore_pmu_start_hrtimer(box);
+		return;
+	}
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
 		return;
 
 	event->hw.state = 0;
@@ -474,11 +491,20 @@
 	}
 }
 
-static void uncore_pmu_event_stop(struct perf_event *event, int flags)
+void uncore_pmu_event_stop(struct perf_event *event, int flags)
 {
 	struct intel_uncore_box *box = uncore_event_to_box(event);
 	struct hw_perf_event *hwc = &event->hw;
 
+	/* Cannot disable free running counter which is read-only */
+	if (uncore_pmc_freerunning(hwc->idx)) {
+		list_del(&event->active_entry);
+		if (--box->n_active == 0)
+			uncore_pmu_cancel_hrtimer(box);
+		uncore_perf_event_update(box, event);
+		return;
+	}
+
 	if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
 		uncore_disable_event(box, event);
 		box->n_active--;
@@ -502,7 +528,7 @@
 	}
 }
 
-static int uncore_pmu_event_add(struct perf_event *event, int flags)
+int uncore_pmu_event_add(struct perf_event *event, int flags)
 {
 	struct intel_uncore_box *box = uncore_event_to_box(event);
 	struct hw_perf_event *hwc = &event->hw;
@@ -512,6 +538,17 @@
 	if (!box)
 		return -ENODEV;
 
+	/*
+	 * The free funning counter is assigned in event_init().
+	 * The free running counter event and free running counter
+	 * are 1:1 mapped. It doesn't need to be tracked in event_list.
+	 */
+	if (uncore_pmc_freerunning(hwc->idx)) {
+		if (flags & PERF_EF_START)
+			uncore_pmu_event_start(event, 0);
+		return 0;
+	}
+
 	ret = n = uncore_collect_events(box, event, false);
 	if (ret < 0)
 		return ret;
@@ -563,13 +600,21 @@
 	return 0;
 }
 
-static void uncore_pmu_event_del(struct perf_event *event, int flags)
+void uncore_pmu_event_del(struct perf_event *event, int flags)
 {
 	struct intel_uncore_box *box = uncore_event_to_box(event);
 	int i;
 
 	uncore_pmu_event_stop(event, PERF_EF_UPDATE);
 
+	/*
+	 * The event for free running counter is not tracked by event_list.
+	 * It doesn't need to force event->hw.idx = -1 to reassign the counter.
+	 * Because the event and the free running counter are 1:1 mapped.
+	 */
+	if (uncore_pmc_freerunning(event->hw.idx))
+		return;
+
 	for (i = 0; i < box->n_events; i++) {
 		if (event == box->event_list[i]) {
 			uncore_put_event_constraint(box, event);
@@ -603,6 +648,10 @@
 	struct intel_uncore_box *fake_box;
 	int ret = -EINVAL, n;
 
+	/* The free running counter is always active. */
+	if (uncore_pmc_freerunning(event->hw.idx))
+		return 0;
+
 	fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
 	if (!fake_box)
 		return -ENOMEM;
@@ -690,6 +739,17 @@
 
 		/* fixed counters have event field hardcoded to zero */
 		hwc->config = 0ULL;
+	} else if (is_freerunning_event(event)) {
+		if (!check_valid_freerunning_event(box, event))
+			return -EINVAL;
+		event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
+		/*
+		 * The free running counter event and free running counter
+		 * are always 1:1 mapped.
+		 * The free running counter is always active.
+		 * Assign the free running counter here.
+		 */
+		event->hw.event_base = uncore_freerunning_counter(box, event);
 	} else {
 		hwc->config = event->attr.config &
 			      (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 414dc7e..c9e1e0b 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -12,8 +12,13 @@
 
 #define UNCORE_FIXED_EVENT		0xff
 #define UNCORE_PMC_IDX_MAX_GENERIC	8
+#define UNCORE_PMC_IDX_MAX_FIXED	1
+#define UNCORE_PMC_IDX_MAX_FREERUNNING	1
 #define UNCORE_PMC_IDX_FIXED		UNCORE_PMC_IDX_MAX_GENERIC
-#define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FIXED + 1)
+#define UNCORE_PMC_IDX_FREERUNNING	(UNCORE_PMC_IDX_FIXED + \
+					UNCORE_PMC_IDX_MAX_FIXED)
+#define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FREERUNNING + \
+					UNCORE_PMC_IDX_MAX_FREERUNNING)
 
 #define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx)	\
 		((dev << 24) | (func << 16) | (type << 8) | idx)
@@ -35,6 +40,7 @@
 struct intel_uncore_pmu;
 struct intel_uncore_box;
 struct uncore_event_desc;
+struct freerunning_counters;
 
 struct intel_uncore_type {
 	const char *name;
@@ -42,6 +48,7 @@
 	int num_boxes;
 	int perf_ctr_bits;
 	int fixed_ctr_bits;
+	int num_freerunning_types;
 	unsigned perf_ctr;
 	unsigned event_ctl;
 	unsigned event_mask;
@@ -59,6 +66,7 @@
 	struct intel_uncore_pmu *pmus;
 	struct intel_uncore_ops *ops;
 	struct uncore_event_desc *event_descs;
+	struct freerunning_counters *freerunning;
 	const struct attribute_group *attr_groups[4];
 	struct pmu *pmu; /* for custom pmu ops */
 };
@@ -129,6 +137,14 @@
 	const char *config;
 };
 
+struct freerunning_counters {
+	unsigned int counter_base;
+	unsigned int counter_offset;
+	unsigned int box_offset;
+	unsigned int num_counters;
+	unsigned int bits;
+};
+
 struct pci2phy_map {
 	struct list_head list;
 	int segment;
@@ -157,6 +173,16 @@
 static struct kobj_attribute format_attr_##_var =			\
 	__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
 
+static inline bool uncore_pmc_fixed(int idx)
+{
+	return idx == UNCORE_PMC_IDX_FIXED;
+}
+
+static inline bool uncore_pmc_freerunning(int idx)
+{
+	return idx == UNCORE_PMC_IDX_FREERUNNING;
+}
+
 static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box)
 {
 	return box->pmu->type->box_ctl;
@@ -214,6 +240,60 @@
 	return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box);
 }
 
+
+/*
+ * In the uncore document, there is no event-code assigned to free running
+ * counters. Some events need to be defined to indicate the free running
+ * counters. The events are encoded as event-code + umask-code.
+ *
+ * The event-code for all free running counters is 0xff, which is the same as
+ * the fixed counters.
+ *
+ * The umask-code is used to distinguish a fixed counter and a free running
+ * counter, and different types of free running counters.
+ * - For fixed counters, the umask-code is 0x0X.
+ *   X indicates the index of the fixed counter, which starts from 0.
+ * - For free running counters, the umask-code uses the rest of the space.
+ *   It would bare the format of 0xXY.
+ *   X stands for the type of free running counters, which starts from 1.
+ *   Y stands for the index of free running counters of same type, which
+ *   starts from 0.
+ *
+ * For example, there are three types of IIO free running counters on Skylake
+ * server, IO CLOCKS counters, BANDWIDTH counters and UTILIZATION counters.
+ * The event-code for all the free running counters is 0xff.
+ * 'ioclk' is the first counter of IO CLOCKS. IO CLOCKS is the first type,
+ * which umask-code starts from 0x10.
+ * So 'ioclk' is encoded as event=0xff,umask=0x10
+ * 'bw_in_port2' is the third counter of BANDWIDTH counters. BANDWIDTH is
+ * the second type, which umask-code starts from 0x20.
+ * So 'bw_in_port2' is encoded as event=0xff,umask=0x22
+ */
+static inline unsigned int uncore_freerunning_idx(u64 config)
+{
+	return ((config >> 8) & 0xf);
+}
+
+#define UNCORE_FREERUNNING_UMASK_START		0x10
+
+static inline unsigned int uncore_freerunning_type(u64 config)
+{
+	return ((((config >> 8) - UNCORE_FREERUNNING_UMASK_START) >> 4) & 0xf);
+}
+
+static inline
+unsigned int uncore_freerunning_counter(struct intel_uncore_box *box,
+					struct perf_event *event)
+{
+	unsigned int type = uncore_freerunning_type(event->attr.config);
+	unsigned int idx = uncore_freerunning_idx(event->attr.config);
+	struct intel_uncore_pmu *pmu = box->pmu;
+
+	return pmu->type->freerunning[type].counter_base +
+	       pmu->type->freerunning[type].counter_offset * idx +
+	       pmu->type->freerunning[type].box_offset * pmu->pmu_idx;
+}
+
 static inline
 unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
 {
@@ -276,11 +356,52 @@
 	return box->pmu->type->fixed_ctr_bits;
 }
 
+static inline
+unsigned int uncore_freerunning_bits(struct intel_uncore_box *box,
+				     struct perf_event *event)
+{
+	unsigned int type = uncore_freerunning_type(event->attr.config);
+
+	return box->pmu->type->freerunning[type].bits;
+}
+
+static inline int uncore_num_freerunning(struct intel_uncore_box *box,
+					 struct perf_event *event)
+{
+	unsigned int type = uncore_freerunning_type(event->attr.config);
+
+	return box->pmu->type->freerunning[type].num_counters;
+}
+
+static inline int uncore_num_freerunning_types(struct intel_uncore_box *box,
+					       struct perf_event *event)
+{
+	return box->pmu->type->num_freerunning_types;
+}
+
+static inline bool check_valid_freerunning_event(struct intel_uncore_box *box,
+						 struct perf_event *event)
+{
+	unsigned int type = uncore_freerunning_type(event->attr.config);
+	unsigned int idx = uncore_freerunning_idx(event->attr.config);
+
+	return (type < uncore_num_freerunning_types(box, event)) &&
+	       (idx < uncore_num_freerunning(box, event));
+}
+
 static inline int uncore_num_counters(struct intel_uncore_box *box)
 {
 	return box->pmu->type->num_counters;
 }
 
+static inline bool is_freerunning_event(struct perf_event *event)
+{
+	u64 cfg = event->attr.config;
+
+	return ((cfg & UNCORE_FIXED_EVENT) == UNCORE_FIXED_EVENT) &&
+	       (((cfg >> 8) & 0xff) >= UNCORE_FREERUNNING_UMASK_START);
+}
+
 static inline void uncore_disable_box(struct intel_uncore_box *box)
 {
 	if (box->pmu->type->ops->disable_box)
@@ -346,6 +467,10 @@
 u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event);
 void uncore_pmu_start_hrtimer(struct intel_uncore_box *box);
 void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box);
+void uncore_pmu_event_start(struct perf_event *event, int flags);
+void uncore_pmu_event_stop(struct perf_event *event, int flags);
+int uncore_pmu_event_add(struct perf_event *event, int flags);
+void uncore_pmu_event_del(struct perf_event *event, int flags);
 void uncore_pmu_event_read(struct perf_event *event);
 void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event);
 struct event_constraint *
diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c
index 93e7a83..173e267 100644
--- a/arch/x86/events/intel/uncore_nhmex.c
+++ b/arch/x86/events/intel/uncore_nhmex.c
@@ -246,7 +246,7 @@
 {
 	struct hw_perf_event *hwc = &event->hw;
 
-	if (hwc->idx >= UNCORE_PMC_IDX_FIXED)
+	if (hwc->idx == UNCORE_PMC_IDX_FIXED)
 		wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0);
 	else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0)
 		wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index aee5e84..8527c3e 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -285,6 +285,15 @@
 #define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE	0x5054
 #define SNB_UNCORE_PCI_IMC_CTR_BASE		SNB_UNCORE_PCI_IMC_DATA_READS_BASE
 
+enum perf_snb_uncore_imc_freerunning_types {
+	SNB_PCI_UNCORE_IMC_DATA		= 0,
+	SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+};
+
+static struct freerunning_counters snb_uncore_imc_freerunning[] = {
+	[SNB_PCI_UNCORE_IMC_DATA]     = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x4, 0x0, 2, 32 },
+};
+
 static struct attribute *snb_uncore_imc_formats_attr[] = {
 	&format_attr_event.attr,
 	NULL,
@@ -341,9 +350,8 @@
 }
 
 /*
- * custom event_init() function because we define our own fixed, free
- * running counters, so we do not want to conflict with generic uncore
- * logic. Also simplifies processing
+ * Keep the custom event_init() function compatible with old event
+ * encoding for free running counters.
  */
 static int snb_uncore_imc_event_init(struct perf_event *event)
 {
@@ -405,11 +413,11 @@
 	switch (cfg) {
 	case SNB_UNCORE_PCI_IMC_DATA_READS:
 		base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE;
-		idx = UNCORE_PMC_IDX_FIXED;
+		idx = UNCORE_PMC_IDX_FREERUNNING;
 		break;
 	case SNB_UNCORE_PCI_IMC_DATA_WRITES:
 		base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE;
-		idx = UNCORE_PMC_IDX_FIXED + 1;
+		idx = UNCORE_PMC_IDX_FREERUNNING;
 		break;
 	default:
 		return -EINVAL;
@@ -430,75 +438,6 @@
 	return 0;
 }
 
-static void snb_uncore_imc_event_start(struct perf_event *event, int flags)
-{
-	struct intel_uncore_box *box = uncore_event_to_box(event);
-	u64 count;
-
-	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
-		return;
-
-	event->hw.state = 0;
-	box->n_active++;
-
-	list_add_tail(&event->active_entry, &box->active_list);
-
-	count = snb_uncore_imc_read_counter(box, event);
-	local64_set(&event->hw.prev_count, count);
-
-	if (box->n_active == 1)
-		uncore_pmu_start_hrtimer(box);
-}
-
-static void snb_uncore_imc_event_stop(struct perf_event *event, int flags)
-{
-	struct intel_uncore_box *box = uncore_event_to_box(event);
-	struct hw_perf_event *hwc = &event->hw;
-
-	if (!(hwc->state & PERF_HES_STOPPED)) {
-		box->n_active--;
-
-		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-		hwc->state |= PERF_HES_STOPPED;
-
-		list_del(&event->active_entry);
-
-		if (box->n_active == 0)
-			uncore_pmu_cancel_hrtimer(box);
-	}
-
-	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-		/*
-		 * Drain the remaining delta count out of a event
-		 * that we are disabling:
-		 */
-		uncore_perf_event_update(box, event);
-		hwc->state |= PERF_HES_UPTODATE;
-	}
-}
-
-static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
-{
-	struct intel_uncore_box *box = uncore_event_to_box(event);
-	struct hw_perf_event *hwc = &event->hw;
-
-	if (!box)
-		return -ENODEV;
-
-	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-	if (!(flags & PERF_EF_START))
-		hwc->state |= PERF_HES_ARCH;
-
-	snb_uncore_imc_event_start(event, 0);
-
-	return 0;
-}
-
-static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
-{
-	snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-}
-
 int snb_pci2phy_map_init(int devid)
 {
 	struct pci_dev *dev = NULL;
@@ -530,10 +469,10 @@
 static struct pmu snb_uncore_imc_pmu = {
 	.task_ctx_nr	= perf_invalid_context,
 	.event_init	= snb_uncore_imc_event_init,
-	.add		= snb_uncore_imc_event_add,
-	.del		= snb_uncore_imc_event_del,
-	.start		= snb_uncore_imc_event_start,
-	.stop		= snb_uncore_imc_event_stop,
+	.add		= uncore_pmu_event_add,
+	.del		= uncore_pmu_event_del,
+	.start		= uncore_pmu_event_start,
+	.stop		= uncore_pmu_event_stop,
 	.read		= uncore_pmu_event_read,
 };
 
@@ -552,12 +491,10 @@
 	.name		= "imc",
 	.num_counters   = 2,
 	.num_boxes	= 1,
-	.fixed_ctr_bits	= 32,
-	.fixed_ctr	= SNB_UNCORE_PCI_IMC_CTR_BASE,
+	.num_freerunning_types	= SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+	.freerunning	= snb_uncore_imc_freerunning,
 	.event_descs	= snb_uncore_imc_events,
 	.format_group	= &snb_uncore_imc_format_group,
-	.perf_ctr	= SNB_UNCORE_PCI_IMC_DATA_READS_BASE,
-	.event_mask	= SNB_UNCORE_PCI_IMC_EVENT_MASK,
 	.ops		= &snb_uncore_imc_ops,
 	.pmu		= &snb_uncore_imc_pmu,
 };
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 77076a1..87dc026 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3522,6 +3522,87 @@
 	.format_group		= &skx_uncore_iio_format_group,
 };
 
+enum perf_uncore_iio_freerunning_type_id {
+	SKX_IIO_MSR_IOCLK			= 0,
+	SKX_IIO_MSR_BW				= 1,
+	SKX_IIO_MSR_UTIL			= 2,
+
+	SKX_IIO_FREERUNNING_TYPE_MAX,
+};
+
+
+static struct freerunning_counters skx_iio_freerunning[] = {
+	[SKX_IIO_MSR_IOCLK]	= { 0xa45, 0x1, 0x20, 1, 36 },
+	[SKX_IIO_MSR_BW]	= { 0xb00, 0x1, 0x10, 8, 36 },
+	[SKX_IIO_MSR_UTIL]	= { 0xb08, 0x1, 0x10, 8, 36 },
+};
+
+static struct uncore_event_desc skx_uncore_iio_freerunning_events[] = {
+	/* Free-Running IO CLOCKS Counter */
+	INTEL_UNCORE_EVENT_DESC(ioclk,			"event=0xff,umask=0x10"),
+	/* Free-Running IIO BANDWIDTH Counters */
+	INTEL_UNCORE_EVENT_DESC(bw_in_port0,		"event=0xff,umask=0x20"),
+	INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale,	"3.814697266e-6"),
+	INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit,	"MiB"),
+	INTEL_UNCORE_EVENT_DESC(bw_in_port1,		"event=0xff,umask=0x21"),
+	INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale,	"3.814697266e-6"),
+	INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit,	"MiB"),
+	INTEL_UNCORE_EVENT_DESC(bw_in_port2,		"event=0xff,umask=0x22"),
+	INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale,	"3.814697266e-6"),
+	INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit,	"MiB"),
+	INTEL_UNCORE_EVENT_DESC(bw_in_port3,		"event=0xff,umask=0x23"),
+	INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale,	"3.814697266e-6"),
+	INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit,	"MiB"),
+	INTEL_UNCORE_EVENT_DESC(bw_out_port0,		"event=0xff,umask=0x24"),
+	INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale,	"3.814697266e-6"),
+	INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit,	"MiB"),
+	INTEL_UNCORE_EVENT_DESC(bw_out_port1,		"event=0xff,umask=0x25"),
+	INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale,	"3.814697266e-6"),
+	INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit,	"MiB"),
+	INTEL_UNCORE_EVENT_DESC(bw_out_port2,		"event=0xff,umask=0x26"),
+	INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale,	"3.814697266e-6"),
+	INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit,	"MiB"),
+	INTEL_UNCORE_EVENT_DESC(bw_out_port3,		"event=0xff,umask=0x27"),
+	INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale,	"3.814697266e-6"),
+	INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit,	"MiB"),
+	/* Free-running IIO UTILIZATION Counters */
+	INTEL_UNCORE_EVENT_DESC(util_in_port0,		"event=0xff,umask=0x30"),
+	INTEL_UNCORE_EVENT_DESC(util_out_port0,		"event=0xff,umask=0x31"),
+	INTEL_UNCORE_EVENT_DESC(util_in_port1,		"event=0xff,umask=0x32"),
+	INTEL_UNCORE_EVENT_DESC(util_out_port1,		"event=0xff,umask=0x33"),
+	INTEL_UNCORE_EVENT_DESC(util_in_port2,		"event=0xff,umask=0x34"),
+	INTEL_UNCORE_EVENT_DESC(util_out_port2,		"event=0xff,umask=0x35"),
+	INTEL_UNCORE_EVENT_DESC(util_in_port3,		"event=0xff,umask=0x36"),
+	INTEL_UNCORE_EVENT_DESC(util_out_port3,		"event=0xff,umask=0x37"),
+	{ /* end: all zeroes */ },
+};
+
+static struct intel_uncore_ops skx_uncore_iio_freerunning_ops = {
+	.read_counter		= uncore_msr_read_counter,
+};
+
+static struct attribute *skx_uncore_iio_freerunning_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	NULL,
+};
+
+static const struct attribute_group skx_uncore_iio_freerunning_format_group = {
+	.name = "format",
+	.attrs = skx_uncore_iio_freerunning_formats_attr,
+};
+
+static struct intel_uncore_type skx_uncore_iio_free_running = {
+	.name			= "iio_free_running",
+	.num_counters		= 17,
+	.num_boxes		= 6,
+	.num_freerunning_types	= SKX_IIO_FREERUNNING_TYPE_MAX,
+	.freerunning		= skx_iio_freerunning,
+	.ops			= &skx_uncore_iio_freerunning_ops,
+	.event_descs		= skx_uncore_iio_freerunning_events,
+	.format_group		= &skx_uncore_iio_freerunning_format_group,
+};
+
 static struct attribute *skx_uncore_formats_attr[] = {
 	&format_attr_event.attr,
 	&format_attr_umask.attr,
@@ -3595,6 +3676,7 @@
 	&skx_uncore_ubox,
 	&skx_uncore_chabox,
 	&skx_uncore_iio,
+	&skx_uncore_iio_free_running,
 	&skx_uncore_irp,
 	&skx_uncore_pcu,
 	NULL,
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index 367a820..b173d40 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1 +1,2 @@
-obj-y		:= hv_init.o mmu.o
+obj-y			:= hv_init.o mmu.o
+obj-$(CONFIG_X86_64)	+= hv_apic.o
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
new file mode 100644
index 0000000..f688554
--- /dev/null
+++ b/arch/x86/hyperv/hv_apic.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Hyper-V specific APIC code.
+ *
+ * Copyright (C) 2018, Microsoft, Inc.
+ *
+ * Author : K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/clockchips.h>
+#include <linux/hyperv.h>
+#include <linux/slab.h>
+#include <linux/cpuhotplug.h>
+#include <asm/hypervisor.h>
+#include <asm/mshyperv.h>
+#include <asm/apic.h>
+
+static struct apic orig_apic;
+
+static u64 hv_apic_icr_read(void)
+{
+	u64 reg_val;
+
+	rdmsrl(HV_X64_MSR_ICR, reg_val);
+	return reg_val;
+}
+
+static void hv_apic_icr_write(u32 low, u32 id)
+{
+	u64 reg_val;
+
+	reg_val = SET_APIC_DEST_FIELD(id);
+	reg_val = reg_val << 32;
+	reg_val |= low;
+
+	wrmsrl(HV_X64_MSR_ICR, reg_val);
+}
+
+static u32 hv_apic_read(u32 reg)
+{
+	u32 reg_val, hi;
+
+	switch (reg) {
+	case APIC_EOI:
+		rdmsr(HV_X64_MSR_EOI, reg_val, hi);
+		return reg_val;
+	case APIC_TASKPRI:
+		rdmsr(HV_X64_MSR_TPR, reg_val, hi);
+		return reg_val;
+
+	default:
+		return native_apic_mem_read(reg);
+	}
+}
+
+static void hv_apic_write(u32 reg, u32 val)
+{
+	switch (reg) {
+	case APIC_EOI:
+		wrmsr(HV_X64_MSR_EOI, val, 0);
+		break;
+	case APIC_TASKPRI:
+		wrmsr(HV_X64_MSR_TPR, val, 0);
+		break;
+	default:
+		native_apic_mem_write(reg, val);
+	}
+}
+
+static void hv_apic_eoi_write(u32 reg, u32 val)
+{
+	wrmsr(HV_X64_MSR_EOI, val, 0);
+}
+
+/*
+ * IPI implementation on Hyper-V.
+ */
+static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
+{
+	struct ipi_arg_ex **arg;
+	struct ipi_arg_ex *ipi_arg;
+	unsigned long flags;
+	int nr_bank = 0;
+	int ret = 1;
+
+	local_irq_save(flags);
+	arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
+
+	ipi_arg = *arg;
+	if (unlikely(!ipi_arg))
+		goto ipi_mask_ex_done;
+
+	ipi_arg->vector = vector;
+	ipi_arg->reserved = 0;
+	ipi_arg->vp_set.valid_bank_mask = 0;
+
+	if (!cpumask_equal(mask, cpu_present_mask)) {
+		ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+		nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
+	}
+	if (!nr_bank)
+		ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
+
+	ret = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
+			      ipi_arg, NULL);
+
+ipi_mask_ex_done:
+	local_irq_restore(flags);
+	return ((ret == 0) ? true : false);
+}
+
+static bool __send_ipi_mask(const struct cpumask *mask, int vector)
+{
+	int cur_cpu, vcpu;
+	struct ipi_arg_non_ex **arg;
+	struct ipi_arg_non_ex *ipi_arg;
+	int ret = 1;
+	unsigned long flags;
+
+	if (cpumask_empty(mask))
+		return true;
+
+	if (!hv_hypercall_pg)
+		return false;
+
+	if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+		return false;
+
+	if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+		return __send_ipi_mask_ex(mask, vector);
+
+	local_irq_save(flags);
+	arg = (struct ipi_arg_non_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
+
+	ipi_arg = *arg;
+	if (unlikely(!ipi_arg))
+		goto ipi_mask_done;
+
+	ipi_arg->vector = vector;
+	ipi_arg->reserved = 0;
+	ipi_arg->cpu_mask = 0;
+
+	for_each_cpu(cur_cpu, mask) {
+		vcpu = hv_cpu_number_to_vp_number(cur_cpu);
+		/*
+		 * This particular version of the IPI hypercall can
+		 * only target upto 64 CPUs.
+		 */
+		if (vcpu >= 64)
+			goto ipi_mask_done;
+
+		__set_bit(vcpu, (unsigned long *)&ipi_arg->cpu_mask);
+	}
+
+	ret = hv_do_hypercall(HVCALL_SEND_IPI, ipi_arg, NULL);
+
+ipi_mask_done:
+	local_irq_restore(flags);
+	return ((ret == 0) ? true : false);
+}
+
+static bool __send_ipi_one(int cpu, int vector)
+{
+	struct cpumask mask = CPU_MASK_NONE;
+
+	cpumask_set_cpu(cpu, &mask);
+	return __send_ipi_mask(&mask, vector);
+}
+
+static void hv_send_ipi(int cpu, int vector)
+{
+	if (!__send_ipi_one(cpu, vector))
+		orig_apic.send_IPI(cpu, vector);
+}
+
+static void hv_send_ipi_mask(const struct cpumask *mask, int vector)
+{
+	if (!__send_ipi_mask(mask, vector))
+		orig_apic.send_IPI_mask(mask, vector);
+}
+
+static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
+{
+	unsigned int this_cpu = smp_processor_id();
+	struct cpumask new_mask;
+	const struct cpumask *local_mask;
+
+	cpumask_copy(&new_mask, mask);
+	cpumask_clear_cpu(this_cpu, &new_mask);
+	local_mask = &new_mask;
+	if (!__send_ipi_mask(local_mask, vector))
+		orig_apic.send_IPI_mask_allbutself(mask, vector);
+}
+
+static void hv_send_ipi_allbutself(int vector)
+{
+	hv_send_ipi_mask_allbutself(cpu_online_mask, vector);
+}
+
+static void hv_send_ipi_all(int vector)
+{
+	if (!__send_ipi_mask(cpu_online_mask, vector))
+		orig_apic.send_IPI_all(vector);
+}
+
+static void hv_send_ipi_self(int vector)
+{
+	if (!__send_ipi_one(smp_processor_id(), vector))
+		orig_apic.send_IPI_self(vector);
+}
+
+void __init hv_apic_init(void)
+{
+	if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) {
+		if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+			pr_info("Hyper-V: Using ext hypercalls for IPI\n");
+		else
+			pr_info("Hyper-V: Using IPI hypercalls\n");
+		/*
+		 * Set the IPI entry points.
+		 */
+		orig_apic = *apic;
+
+		apic->send_IPI = hv_send_ipi;
+		apic->send_IPI_mask = hv_send_ipi_mask;
+		apic->send_IPI_mask_allbutself = hv_send_ipi_mask_allbutself;
+		apic->send_IPI_allbutself = hv_send_ipi_allbutself;
+		apic->send_IPI_all = hv_send_ipi_all;
+		apic->send_IPI_self = hv_send_ipi_self;
+	}
+
+	if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) {
+		pr_info("Hyper-V: Using MSR based APIC access\n");
+		apic_set_eoi_write(hv_apic_eoi_write);
+		apic->read      = hv_apic_read;
+		apic->write     = hv_apic_write;
+		apic->icr_write = hv_apic_icr_write;
+		apic->icr_read  = hv_apic_icr_read;
+	}
+}
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index cfecc22..4c431e1 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -91,12 +91,19 @@
 struct hv_vp_assist_page **hv_vp_assist_page;
 EXPORT_SYMBOL_GPL(hv_vp_assist_page);
 
+void  __percpu **hyperv_pcpu_input_arg;
+EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
+
 u32 hv_max_vp_index;
 
 static int hv_cpu_init(unsigned int cpu)
 {
 	u64 msr_vp_index;
 	struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
+	void **input_arg;
+
+	input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
+	*input_arg = page_address(alloc_page(GFP_KERNEL));
 
 	hv_get_vp_index(msr_vp_index);
 
@@ -217,6 +224,16 @@
 {
 	struct hv_reenlightenment_control re_ctrl;
 	unsigned int new_cpu;
+	unsigned long flags;
+	void **input_arg;
+	void *input_pg = NULL;
+
+	local_irq_save(flags);
+	input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
+	input_pg = *input_arg;
+	*input_arg = NULL;
+	local_irq_restore(flags);
+	free_page((unsigned long)input_pg);
 
 	if (hv_vp_assist_page && hv_vp_assist_page[cpu])
 		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
@@ -242,8 +259,9 @@
  *
  * 1. Setup the hypercall page.
  * 2. Register Hyper-V specific clocksource.
+ * 3. Setup Hyper-V specific APIC entry points.
  */
-void hyperv_init(void)
+void __init hyperv_init(void)
 {
 	u64 guest_id, required_msrs;
 	union hv_x64_msr_hypercall_contents hypercall_msr;
@@ -259,6 +277,16 @@
 	if ((ms_hyperv.features & required_msrs) != required_msrs)
 		return;
 
+	/*
+	 * Allocate the per-CPU state for the hypercall input arg.
+	 * If this allocation fails, we will not be able to setup
+	 * (per-CPU) hypercall input page and thus this failure is
+	 * fatal on Hyper-V.
+	 */
+	hyperv_pcpu_input_arg = alloc_percpu(void  *);
+
+	BUG_ON(hyperv_pcpu_input_arg == NULL);
+
 	/* Allocate percpu VP index */
 	hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
 				    GFP_KERNEL);
@@ -296,7 +324,7 @@
 	hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
 	wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
 
-	hyper_alloc_mmu();
+	hv_apic_init();
 
 	/*
 	 * Register Hyper-V specific clocksource.
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 56c9eba..5f053d7 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -25,20 +25,13 @@
 struct hv_flush_pcpu_ex {
 	u64 address_space;
 	u64 flags;
-	struct {
-		u64 format;
-		u64 valid_bank_mask;
-		u64 bank_contents[];
-	} hv_vp_set;
+	struct hv_vpset hv_vp_set;
 	u64 gva_list[];
 };
 
 /* Each gva in gva_list encodes up to 4096 pages to flush */
 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
 
-static struct hv_flush_pcpu __percpu **pcpu_flush;
-
-static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
 
 /*
  * Fills in gva_list starting from offset. Returns the number of items added.
@@ -70,41 +63,6 @@
 	return gva_n - offset;
 }
 
-/* Return the number of banks in the resulting vp_set */
-static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
-				    const struct cpumask *cpus)
-{
-	int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
-
-	/* valid_bank_mask can represent up to 64 banks */
-	if (hv_max_vp_index / 64 >= 64)
-		return 0;
-
-	/*
-	 * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
-	 * structs are not cleared between calls, we risk flushing unneeded
-	 * vCPUs otherwise.
-	 */
-	for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
-		flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
-
-	/*
-	 * Some banks may end up being empty but this is acceptable.
-	 */
-	for_each_cpu(cpu, cpus) {
-		vcpu = hv_cpu_number_to_vp_number(cpu);
-		vcpu_bank = vcpu / 64;
-		vcpu_offset = vcpu % 64;
-		__set_bit(vcpu_offset, (unsigned long *)
-			  &flush->hv_vp_set.bank_contents[vcpu_bank]);
-		if (vcpu_bank >= nr_bank)
-			nr_bank = vcpu_bank + 1;
-	}
-	flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
-
-	return nr_bank;
-}
-
 static void hyperv_flush_tlb_others(const struct cpumask *cpus,
 				    const struct flush_tlb_info *info)
 {
@@ -116,7 +74,7 @@
 
 	trace_hyperv_mmu_flush_tlb_others(cpus, info);
 
-	if (!pcpu_flush || !hv_hypercall_pg)
+	if (!hv_hypercall_pg)
 		goto do_native;
 
 	if (cpumask_empty(cpus))
@@ -124,10 +82,8 @@
 
 	local_irq_save(flags);
 
-	flush_pcpu = this_cpu_ptr(pcpu_flush);
-
-	if (unlikely(!*flush_pcpu))
-		*flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+	flush_pcpu = (struct hv_flush_pcpu **)
+		     this_cpu_ptr(hyperv_pcpu_input_arg);
 
 	flush = *flush_pcpu;
 
@@ -203,7 +159,7 @@
 
 	trace_hyperv_mmu_flush_tlb_others(cpus, info);
 
-	if (!pcpu_flush_ex || !hv_hypercall_pg)
+	if (!hv_hypercall_pg)
 		goto do_native;
 
 	if (cpumask_empty(cpus))
@@ -211,10 +167,8 @@
 
 	local_irq_save(flags);
 
-	flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
-
-	if (unlikely(!*flush_pcpu))
-		*flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+	flush_pcpu = (struct hv_flush_pcpu_ex **)
+		     this_cpu_ptr(hyperv_pcpu_input_arg);
 
 	flush = *flush_pcpu;
 
@@ -239,8 +193,8 @@
 	flush->hv_vp_set.valid_bank_mask = 0;
 
 	if (!cpumask_equal(cpus, cpu_present_mask)) {
-		flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
-		nr_bank = cpumask_to_vp_set(flush, cpus);
+		flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+		nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
 	}
 
 	if (!nr_bank) {
@@ -296,14 +250,3 @@
 		pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
 	}
 }
-
-void hyper_alloc_mmu(void)
-{
-	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
-		return;
-
-	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
-		pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
-	else
-		pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
-}
diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h
new file mode 100644
index 0000000..e958e28
--- /dev/null
+++ b/arch/x86/include/asm/cacheinfo.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CACHEINFO_H
+#define _ASM_X86_CACHEINFO_H
+
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id);
+
+#endif /* _ASM_X86_CACHEINFO_H */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index e1c8dab..fb97cf7 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -17,7 +17,6 @@
 
 typedef u32		compat_size_t;
 typedef s32		compat_ssize_t;
-typedef s32		compat_time_t;
 typedef s32		compat_clock_t;
 typedef s32		compat_pid_t;
 typedef u16		__compat_uid_t;
@@ -46,16 +45,6 @@
 typedef u64 __attribute__((aligned(4))) compat_u64;
 typedef u32		compat_uptr_t;
 
-struct compat_timespec {
-	compat_time_t	tv_sec;
-	s32		tv_nsec;
-};
-
-struct compat_timeval {
-	compat_time_t	tv_sec;
-	s32		tv_usec;
-};
-
 struct compat_stat {
 	compat_dev_t	st_dev;
 	u16		__pad1;
@@ -145,10 +134,10 @@
 
 struct compat_semid64_ds {
 	struct compat_ipc64_perm sem_perm;
-	compat_time_t  sem_otime;
-	compat_ulong_t __unused1;
-	compat_time_t  sem_ctime;
-	compat_ulong_t __unused2;
+	compat_ulong_t sem_otime;
+	compat_ulong_t sem_otime_high;
+	compat_ulong_t sem_ctime;
+	compat_ulong_t sem_ctime_high;
 	compat_ulong_t sem_nsems;
 	compat_ulong_t __unused3;
 	compat_ulong_t __unused4;
@@ -156,12 +145,12 @@
 
 struct compat_msqid64_ds {
 	struct compat_ipc64_perm msg_perm;
-	compat_time_t  msg_stime;
-	compat_ulong_t __unused1;
-	compat_time_t  msg_rtime;
-	compat_ulong_t __unused2;
-	compat_time_t  msg_ctime;
-	compat_ulong_t __unused3;
+	compat_ulong_t msg_stime;
+	compat_ulong_t msg_stime_high;
+	compat_ulong_t msg_rtime;
+	compat_ulong_t msg_rtime_high;
+	compat_ulong_t msg_ctime;
+	compat_ulong_t msg_ctime_high;
 	compat_ulong_t msg_cbytes;
 	compat_ulong_t msg_qnum;
 	compat_ulong_t msg_qbytes;
@@ -174,12 +163,12 @@
 struct compat_shmid64_ds {
 	struct compat_ipc64_perm shm_perm;
 	compat_size_t  shm_segsz;
-	compat_time_t  shm_atime;
-	compat_ulong_t __unused1;
-	compat_time_t  shm_dtime;
-	compat_ulong_t __unused2;
-	compat_time_t  shm_ctime;
-	compat_ulong_t __unused3;
+	compat_ulong_t shm_atime;
+	compat_ulong_t shm_atime_high;
+	compat_ulong_t shm_dtime;
+	compat_ulong_t shm_dtime_high;
+	compat_ulong_t shm_ctime;
+	compat_ulong_t shm_ctime_high;
 	compat_pid_t   shm_cpid;
 	compat_pid_t   shm_lpid;
 	compat_ulong_t shm_nattch;
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 89ce4bf..ce4d176 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -30,10 +30,7 @@
 	return dma_ops;
 }
 
-int arch_dma_supported(struct device *dev, u64 mask);
-#define arch_dma_supported arch_dma_supported
-
-bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
+bool arch_dma_alloc_attrs(struct device **dev);
 #define arch_dma_alloc_attrs arch_dma_alloc_attrs
 
 #endif
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index cc8f8fc..c18ed65 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -63,7 +63,7 @@
 #ifndef COMPILE_OFFSETS
 
 #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_IA32_EMULATION)
-#include <asm/compat.h>
+#include <linux/compat.h>
 
 /*
  * Because ia32 syscalls do not map to x86_64 syscall numbers
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 5ea2afd..740a428a 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -50,14 +50,6 @@
 
 #define inc_irq_stat(member)	this_cpu_inc(irq_stat.member)
 
-#define local_softirq_pending()	this_cpu_read(irq_stat.__softirq_pending)
-
-#define __ARCH_SET_SOFTIRQ_PENDING
-
-#define set_softirq_pending(x)	\
-		this_cpu_write(irq_stat.__softirq_pending, (x))
-#define or_softirq_pending(x)	this_cpu_or(irq_stat.__softirq_pending, (x))
-
 extern void ack_bad_irq(unsigned int irq);
 
 extern u64 arch_irq_stat_cpu(unsigned int cpu);
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 416cb0e..3bfa92c 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -164,6 +164,11 @@
  */
 #define HV_X64_DEPRECATING_AEOI_RECOMMENDED	(1 << 9)
 
+/*
+ * Recommend using cluster IPI hypercalls.
+ */
+#define HV_X64_CLUSTER_IPI_RECOMMENDED         (1 << 10)
+
 /* Recommend using the newer ExProcessorMasks interface */
 #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED	(1 << 11)
 
@@ -329,12 +334,17 @@
 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK	\
 		(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
 
+#define HV_IPI_LOW_VECTOR	0x10
+#define HV_IPI_HIGH_VECTOR	0xff
+
 /* Declare the various hypercall operations. */
 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE	0x0002
 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST	0x0003
 #define HVCALL_NOTIFY_LONG_SPIN_WAIT		0x0008
+#define HVCALL_SEND_IPI				0x000b
 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX  0x0013
 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX   0x0014
+#define HVCALL_SEND_IPI_EX			0x0015
 #define HVCALL_POST_MESSAGE			0x005c
 #define HVCALL_SIGNAL_EVENT			0x005d
 
@@ -360,7 +370,7 @@
 #define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT	BIT(3)
 
 enum HV_GENERIC_SET_FORMAT {
-	HV_GENERIC_SET_SPARCE_4K,
+	HV_GENERIC_SET_SPARSE_4K,
 	HV_GENERIC_SET_ALL,
 };
 
@@ -706,4 +716,22 @@
 #define HV_STIMER_AUTOENABLE		(1ULL << 3)
 #define HV_STIMER_SINT(config)		(__u8)(((config) >> 16) & 0x0F)
 
+struct ipi_arg_non_ex {
+	u32 vector;
+	u32 reserved;
+	u64 cpu_mask;
+};
+
+struct hv_vpset {
+	u64 format;
+	u64 valid_bank_mask;
+	u64 bank_contents[];
+};
+
+struct ipi_arg_ex {
+	u32 vector;
+	u32 reserved;
+	struct hv_vpset vp_set;
+};
+
 #endif
diff --git a/arch/x86/include/asm/intel_mid_vrtc.h b/arch/x86/include/asm/intel_mid_vrtc.h
index 3555501..0b44b1a 100644
--- a/arch/x86/include/asm/intel_mid_vrtc.h
+++ b/arch/x86/include/asm/intel_mid_vrtc.h
@@ -4,7 +4,7 @@
 
 extern unsigned char vrtc_cmos_read(unsigned char reg);
 extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
-extern void vrtc_get_time(struct timespec *now);
-extern int vrtc_set_mmss(const struct timespec *now);
+extern void vrtc_get_time(struct timespec64 *now);
+extern int vrtc_set_mmss(const struct timespec64 *now);
 
 #endif
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index f6e5b93..6de6484 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -94,10 +94,10 @@
 
 #ifdef CONFIG_X86_64
 
-build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
-build_mmio_read(__readq, "q", unsigned long, "=r", )
-build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
-build_mmio_write(__writeq, "q", unsigned long, "r", )
+build_mmio_read(readq, "q", u64, "=r", :"memory")
+build_mmio_read(__readq, "q", u64, "=r", )
+build_mmio_write(writeq, "q", u64, "r", :"memory")
+build_mmio_write(__writeq, "q", u64, "r", )
 
 #define readq_relaxed(a)	__readq(a)
 #define writeq_relaxed(v, a)	__writeq(v, a)
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 1775a32..9719800 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -95,8 +95,8 @@
 unsigned char rtc_cmos_read(unsigned char addr);
 void rtc_cmos_write(unsigned char val, unsigned char addr);
 
-extern int mach_set_rtc_mmss(const struct timespec *now);
-extern void mach_get_cmos_time(struct timespec *now);
+extern int mach_set_rtc_mmss(const struct timespec64 *now);
+extern void mach_get_cmos_time(struct timespec64 *now);
 
 #define RTC_IRQ 8
 
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index b90e796..9971921 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -122,6 +122,7 @@
 #if IS_ENABLED(CONFIG_HYPERV)
 extern struct clocksource *hyperv_cs;
 extern void *hv_hypercall_pg;
+extern void  __percpu  **hyperv_pcpu_input_arg;
 
 static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 {
@@ -258,9 +259,41 @@
 	return hv_vp_index[cpu_number];
 }
 
-void hyperv_init(void);
+static inline int cpumask_to_vpset(struct hv_vpset *vpset,
+				    const struct cpumask *cpus)
+{
+	int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
+
+	/* valid_bank_mask can represent up to 64 banks */
+	if (hv_max_vp_index / 64 >= 64)
+		return 0;
+
+	/*
+	 * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
+	 * structs are not cleared between calls, we risk flushing unneeded
+	 * vCPUs otherwise.
+	 */
+	for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
+		vpset->bank_contents[vcpu_bank] = 0;
+
+	/*
+	 * Some banks may end up being empty but this is acceptable.
+	 */
+	for_each_cpu(cpu, cpus) {
+		vcpu = hv_cpu_number_to_vp_number(cpu);
+		vcpu_bank = vcpu / 64;
+		vcpu_offset = vcpu % 64;
+		__set_bit(vcpu_offset, (unsigned long *)
+			  &vpset->bank_contents[vcpu_bank]);
+		if (vcpu_bank >= nr_bank)
+			nr_bank = vcpu_bank + 1;
+	}
+	vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
+	return nr_bank;
+}
+
+void __init hyperv_init(void);
 void hyperv_setup_mmu_ops(void);
-void hyper_alloc_mmu(void);
 void hyperv_report_panic(struct pt_regs *regs, long err);
 bool hv_is_hyperv_initialized(void);
 void hyperv_cleanup(void);
@@ -269,6 +302,13 @@
 void set_hv_tscchange_cb(void (*cb)(void));
 void clear_hv_tscchange_cb(void);
 void hyperv_stop_tsc_emulation(void);
+
+#ifdef CONFIG_X86_64
+void hv_apic_init(void);
+#else
+static inline void hv_apic_init(void) {}
+#endif
+
 #else /* CONFIG_HYPERV */
 static inline void hyperv_init(void) {}
 static inline bool hv_is_hyperv_initialized(void) { return false; }
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index fda2114..68b2c31 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -62,8 +62,8 @@
 #define NHM_C3_AUTO_DEMOTE		(1UL << 25)
 #define NHM_C1_AUTO_DEMOTE		(1UL << 26)
 #define ATM_LNC_C6_AUTO_DEMOTE		(1UL << 25)
-#define SNB_C1_AUTO_UNDEMOTE		(1UL << 27)
-#define SNB_C3_AUTO_UNDEMOTE		(1UL << 28)
+#define SNB_C3_AUTO_UNDEMOTE		(1UL << 27)
+#define SNB_C1_AUTO_UNDEMOTE		(1UL << 28)
 
 #define MSR_MTRRcap			0x000000fe
 
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 2c5a966..6afac38 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -53,7 +53,7 @@
 #define __PHYSICAL_MASK_SHIFT	52
 
 #ifdef CONFIG_X86_5LEVEL
-#define __VIRTUAL_MASK_SHIFT	(pgtable_l5_enabled ? 56 : 47)
+#define __VIRTUAL_MASK_SHIFT	(pgtable_l5_enabled() ? 56 : 47)
 #else
 #define __VIRTUAL_MASK_SHIFT	47
 #endif
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 9be2bf1..d49bbf4 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -574,14 +574,14 @@
 }
 
 #define set_pgd(pgdp, pgdval) do {					\
-	if (pgtable_l5_enabled)						\
+	if (pgtable_l5_enabled())						\
 		__set_pgd(pgdp, pgdval);				\
 	else								\
 		set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd });	\
 } while (0)
 
 #define pgd_clear(pgdp) do {						\
-	if (pgtable_l5_enabled)						\
+	if (pgtable_l5_enabled())						\
 		set_pgd(pgdp, __pgd(0));				\
 } while (0)
 
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index d32175e..6629636 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -117,9 +117,6 @@
 #define native_setup_msi_irqs		NULL
 #define native_teardown_msi_irq		NULL
 #endif
-
-#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
-
 #endif  /* __KERNEL__ */
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 263c142..ada6410 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -167,7 +167,7 @@
 #if CONFIG_PGTABLE_LEVELS > 4
 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
 {
-	if (!pgtable_l5_enabled)
+	if (!pgtable_l5_enabled())
 		return;
 	paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
 	set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
@@ -193,7 +193,7 @@
 static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
 				  unsigned long address)
 {
-	if (pgtable_l5_enabled)
+	if (pgtable_l5_enabled())
 		___p4d_free_tlb(tlb, p4d);
 }
 
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index f1633de..99ecde2 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -65,7 +65,7 @@
 
 #ifndef __PAGETABLE_P4D_FOLDED
 #define set_pgd(pgdp, pgd)		native_set_pgd(pgdp, pgd)
-#define pgd_clear(pgd)			(pgtable_l5_enabled ? native_pgd_clear(pgd) : 0)
+#define pgd_clear(pgd)			(pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0)
 #endif
 
 #ifndef set_p4d
@@ -881,7 +881,7 @@
 #if CONFIG_PGTABLE_LEVELS > 4
 static inline int pgd_present(pgd_t pgd)
 {
-	if (!pgtable_l5_enabled)
+	if (!pgtable_l5_enabled())
 		return 1;
 	return pgd_flags(pgd) & _PAGE_PRESENT;
 }
@@ -898,9 +898,9 @@
 #define pgd_page(pgd)	pfn_to_page(pgd_pfn(pgd))
 
 /* to find an entry in a page-table-directory. */
-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+static __always_inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
 {
-	if (!pgtable_l5_enabled)
+	if (!pgtable_l5_enabled())
 		return (p4d_t *)pgd;
 	return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
 }
@@ -909,7 +909,7 @@
 {
 	unsigned long ignore_flags = _PAGE_USER;
 
-	if (!pgtable_l5_enabled)
+	if (!pgtable_l5_enabled())
 		return 0;
 
 	if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
@@ -920,7 +920,7 @@
 
 static inline int pgd_none(pgd_t pgd)
 {
-	if (!pgtable_l5_enabled)
+	if (!pgtable_l5_enabled())
 		return 0;
 	/*
 	 * There is no need to do a workaround for the KNL stray
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index e3225e8..d9a001a 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -15,7 +15,7 @@
 # include <asm/pgtable-2level_types.h>
 #endif
 
-#define pgtable_l5_enabled 0
+#define pgtable_l5_enabled() 0
 
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 877bc27..3c5385f 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -220,7 +220,7 @@
 {
 	pgd_t pgd;
 
-	if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
+	if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
 		*p4dp = p4d;
 		return;
 	}
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index adb4755..054765a 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -22,12 +22,23 @@
 
 #ifdef CONFIG_X86_5LEVEL
 extern unsigned int __pgtable_l5_enabled;
-#ifndef pgtable_l5_enabled
-#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57)
-#endif
+
+#ifdef USE_EARLY_PGTABLE_L5
+/*
+ * cpu_feature_enabled() is not available in early boot code.
+ * Use variable instead.
+ */
+static inline bool pgtable_l5_enabled(void)
+{
+	return __pgtable_l5_enabled;
+}
 #else
-#define pgtable_l5_enabled 0
-#endif
+#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
+#endif /* USE_EARLY_PGTABLE_L5 */
+
+#else
+#define pgtable_l5_enabled() 0
+#endif /* CONFIG_X86_5LEVEL */
 
 extern unsigned int pgdir_shift;
 extern unsigned int ptrs_per_p4d;
@@ -102,7 +113,7 @@
 
 #define LDT_PGD_ENTRY_L4	-3UL
 #define LDT_PGD_ENTRY_L5	-112UL
-#define LDT_PGD_ENTRY		(pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
+#define LDT_PGD_ENTRY		(pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
 #define LDT_BASE_ADDR		(LDT_PGD_ENTRY << PGDIR_SHIFT)
 
 #define __VMALLOC_BASE_L4	0xffffc90000000000UL
@@ -116,7 +127,7 @@
 
 #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
 # define VMALLOC_START		vmalloc_base
-# define VMALLOC_SIZE_TB	(pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
+# define VMALLOC_SIZE_TB	(pgtable_l5_enabled() ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
 # define VMEMMAP_START		vmemmap_base
 #else
 # define VMALLOC_START		__VMALLOC_BASE_L4
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 21a1149..e28add6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -186,15 +186,6 @@
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 void print_cpu_msr(struct cpuinfo_x86 *);
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern u32 get_scattered_cpuid_leaf(unsigned int level,
-				    unsigned int sub_leaf,
-				    enum cpuid_regs_idx reg);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
-
-extern void detect_extended_topology(struct cpuinfo_x86 *c);
-extern void detect_ht(struct cpuinfo_x86 *c);
 
 #ifdef CONFIG_X86_32
 extern int have_cpuid_p(void);
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index a7471dc..b603368 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -12,7 +12,7 @@
 unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
 void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
 			    struct pvclock_vcpu_time_info *vcpu,
-			    struct timespec *ts);
+			    struct timespec64 *ts);
 void pvclock_resume(void);
 
 void pvclock_touch_watchdogs(void);
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 5e16b5d..3e70bed 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -7,6 +7,14 @@
 #include <asm-generic/qspinlock_types.h>
 #include <asm/paravirt.h>
 
+#define _Q_PENDING_LOOPS	(1 << 9)
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_init_lock_hash(void);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
+
 #define	queued_spin_unlock queued_spin_unlock
 /**
  * queued_spin_unlock - release a queued spinlock
@@ -16,15 +24,9 @@
  */
 static inline void native_queued_spin_unlock(struct qspinlock *lock)
 {
-	smp_store_release((u8 *)lock, 0);
+	smp_store_release(&lock->locked, 0);
 }
 
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __pv_init_lock_hash(void);
-extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
-
 static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 {
 	pv_queued_spin_lock_slowpath(lock, val);
@@ -40,11 +42,6 @@
 {
 	return pv_vcpu_is_preempted(cpu);
 }
-#else
-static inline void queued_spin_unlock(struct qspinlock *lock)
-{
-	native_queued_spin_unlock(lock);
-}
 #endif
 
 #ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 923307e..9ef5ee0 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -22,8 +22,7 @@
  *
  * void __pv_queued_spin_unlock(struct qspinlock *lock)
  * {
- *	struct __qspinlock *l = (void *)lock;
- *	u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+ *	u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
  *
  *	if (likely(lockval == _Q_LOCKED_VAL))
  *		return;
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index f75bff8..547c4fe 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -171,7 +171,6 @@
 	wbinvd();
 	return 0;
 }
-#define smp_num_siblings	1
 #endif /* CONFIG_SMP */
 
 extern unsigned disabled_cpus;
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 4617a2b..1992187 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,8 +27,8 @@
 # endif
 #else /* CONFIG_X86_32 */
 # define SECTION_SIZE_BITS	27 /* matt - 128 is convenient right now */
-# define MAX_PHYSADDR_BITS	(pgtable_l5_enabled ? 52 : 44)
-# define MAX_PHYSMEM_BITS	(pgtable_l5_enabled ? 52 : 46)
+# define MAX_PHYSADDR_BITS	(pgtable_l5_enabled() ? 52 : 44)
+# define MAX_PHYSMEM_BITS	(pgtable_l5_enabled() ? 52 : 46)
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 133d942..b6dc698 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -111,4 +111,6 @@
 	return (unsigned long)frame;
 }
 
+void show_opcodes(u8 *rip, const char *loglvl);
+void show_ip(struct pt_regs *regs, const char *loglvl);
 #endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 533f74c..d33f92b 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -116,7 +116,8 @@
 #endif
 
 #define __HAVE_ARCH_MEMCPY_MCSAFE 1
-__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
+__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
+		size_t cnt);
 DECLARE_STATIC_KEY_FALSE(mcsafe_key);
 
 /**
@@ -131,14 +132,15 @@
  * actually do machine check recovery. Everyone else can just
  * use memcpy().
  *
- * Return 0 for success, -EFAULT for fail
+ * Return 0 for success, or number of bytes not copied if there was an
+ * exception.
  */
-static __always_inline __must_check int
+static __always_inline __must_check unsigned long
 memcpy_mcsafe(void *dst, const void *src, size_t cnt)
 {
 #ifdef CONFIG_X86_MCE
 	if (static_branch_unlikely(&mcsafe_key))
-		return memcpy_mcsafe_unrolled(dst, src, cnt);
+		return __memcpy_mcsafe(dst, src, cnt);
 	else
 #endif
 		memcpy(dst, src, cnt);
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 62546b3..62acb61 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -47,6 +47,17 @@
 }
 
 static __always_inline __must_check unsigned long
+copy_to_user_mcsafe(void *to, const void *from, unsigned len)
+{
+	unsigned long ret;
+
+	__uaccess_begin();
+	ret = memcpy_mcsafe(to, from, len);
+	__uaccess_end();
+	return ret;
+}
+
+static __always_inline __must_check unsigned long
 raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
 {
 	int ret = 0;
@@ -194,4 +205,7 @@
 unsigned long
 copy_user_handle_tail(char *to, char *from, unsigned len);
 
+unsigned long
+mcsafe_handle_tail(char *to, char *from, unsigned len);
+
 #endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index ce8b4da..2d27236 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -170,7 +170,7 @@
 	void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
 };
 
-struct timespec;
+struct timespec64;
 
 /**
  * struct x86_legacy_devices - legacy x86 devices
@@ -264,8 +264,8 @@
 struct x86_platform_ops {
 	unsigned long (*calibrate_cpu)(void);
 	unsigned long (*calibrate_tsc)(void);
-	void (*get_wallclock)(struct timespec *ts);
-	int (*set_wallclock)(const struct timespec *ts);
+	void (*get_wallclock)(struct timespec64 *ts);
+	int (*set_wallclock)(const struct timespec64 *ts);
 	void (*iommu_shutdown)(void);
 	bool (*is_untracked_pat_range)(u64 start, u64 end);
 	void (*nmi_init)(void);
diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h
index cabd747..89de6cd 100644
--- a/arch/x86/include/uapi/asm/sembuf.h
+++ b/arch/x86/include/uapi/asm/sembuf.h
@@ -8,15 +8,24 @@
  * between kernel and user space.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
+ *
+ * x86_64 and x32 incorrectly added padding here, so the structures
+ * are still incompatible with the padding on x86.
  */
 struct semid64_ds {
 	struct ipc64_perm sem_perm;	/* permissions .. see ipc.h */
+#ifdef __i386__
+	unsigned long	sem_otime;	/* last semop time */
+	unsigned long	sem_otime_high;
+	unsigned long	sem_ctime;	/* last change time */
+	unsigned long	sem_ctime_high;
+#else
 	__kernel_time_t	sem_otime;	/* last semop time */
 	__kernel_ulong_t __unused1;
 	__kernel_time_t	sem_ctime;	/* last change time */
 	__kernel_ulong_t __unused2;
+#endif
 	__kernel_ulong_t sem_nsems;	/* no. of semaphores in array */
 	__kernel_ulong_t __unused3;
 	__kernel_ulong_t __unused4;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index dfcbe69..5d0de79 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1715,19 +1715,6 @@
 	return 0;
 }
 
-static int proc_apm_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_apm_show, NULL);
-}
-
-static const struct file_operations apm_file_ops = {
-	.owner		= THIS_MODULE,
-	.open		= proc_apm_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int apm(void *unused)
 {
 	unsigned short	bx;
@@ -2360,7 +2347,7 @@
 	set_desc_base(&gdt[APM_DS >> 3],
 		 (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4));
 
-	proc_create("apm", 0, NULL, &apm_file_ops);
+	proc_create_single("apm", 0, NULL, proc_apm_show);
 
 	kapmd_task = kthread_create(apm, NULL, "kapmd");
 	if (IS_ERR(kapmd_task)) {
@@ -2446,7 +2433,7 @@
 	"System idle percentage above which to make APM BIOS idle calls");
 module_param(idle_period, int, 0444);
 MODULE_PARM_DESC(idle_period,
-	"Period (in sec/100) over which to caculate the idle percentage");
+	"Period (in sec/100) over which to calculate the idle percentage");
 module_param(smp, bool, 0444);
 MODULE_PARM_DESC(smp,
 	"Set this to enable APM use on an SMP platform. Use with caution on older systems");
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a66229f..7a40196 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -17,7 +17,7 @@
 nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_common.o		:= $(nostackp)
 
-obj-y			:= intel_cacheinfo.o scattered.o topology.o
+obj-y			:= cacheinfo.o scattered.o topology.o
 obj-y			+= common.o
 obj-y			+= rdrand.o
 obj-y			+= match.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1b18be3..082d787 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -9,6 +9,7 @@
 #include <linux/random.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
+#include <asm/cacheinfo.h>
 #include <asm/cpu.h>
 #include <asm/spec-ctrl.h>
 #include <asm/smp.h>
@@ -298,7 +299,6 @@
 }
 #endif
 
-#ifdef CONFIG_SMP
 /*
  * Fix up cpu_core_id for pre-F17h systems to be in the
  * [0 .. cores_per_node - 1] range. Not really needed but
@@ -328,6 +328,7 @@
 
 	/* get information required for multi-node processors */
 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+		int err;
 		u32 eax, ebx, ecx, edx;
 
 		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
@@ -346,21 +347,15 @@
 		}
 
 		/*
-		 * We may have multiple LLCs if L3 caches exist, so check if we
-		 * have an L3 cache by looking at the L3 cache CPUID leaf.
+		 * In case leaf B is available, use it to derive
+		 * topology information.
 		 */
-		if (cpuid_edx(0x80000006)) {
-			if (c->x86 == 0x17) {
-				/*
-				 * LLC is at the core complex level.
-				 * Core complex id is ApicId[3].
-				 */
-				per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
-			} else {
-				/* LLC is at the node level. */
-				per_cpu(cpu_llc_id, cpu) = node_id;
-			}
-		}
+		err = detect_extended_topology(c);
+		if (!err)
+			c->x86_coreid_bits = get_count_order(c->x86_max_cores);
+
+		cacheinfo_amd_init_llc_id(c, cpu, node_id);
+
 	} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
 		u64 value;
 
@@ -376,7 +371,6 @@
 		legacy_fixup_core_id(c);
 	}
 }
-#endif
 
 /*
  * On a AMD dual core setup the lower bits of the APIC id distinguish the cores.
@@ -384,7 +378,6 @@
  */
 static void amd_detect_cmp(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_SMP
 	unsigned bits;
 	int cpu = smp_processor_id();
 
@@ -395,17 +388,11 @@
 	c->phys_proc_id = c->initial_apicid >> bits;
 	/* use socket ID also for last level cache */
 	per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
-	amd_get_topology(c);
-#endif
 }
 
 u16 amd_get_nb_id(int cpu)
 {
-	u16 id = 0;
-#ifdef CONFIG_SMP
-	id = per_cpu(cpu_llc_id, cpu);
-#endif
-	return id;
+	return per_cpu(cpu_llc_id, cpu);
 }
 EXPORT_SYMBOL_GPL(amd_get_nb_id);
 
@@ -864,6 +851,7 @@
 	/* Multi core CPU? */
 	if (c->extended_cpuid_level >= 0x80000008) {
 		amd_detect_cmp(c);
+		amd_get_topology(c);
 		srat_detect_node(c);
 	}
 
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
new file mode 100644
index 0000000..38354c6
--- /dev/null
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -0,0 +1,1010 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *	Routines to identify caches on Intel CPU.
+ *
+ *	Changes:
+ *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
+ *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
+ *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
+ */
+
+#include <linux/slab.h>
+#include <linux/cacheinfo.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/capability.h>
+#include <linux/sysfs.h>
+#include <linux/pci.h>
+
+#include <asm/cpufeature.h>
+#include <asm/amd_nb.h>
+#include <asm/smp.h>
+
+#include "cpu.h"
+
+#define LVL_1_INST	1
+#define LVL_1_DATA	2
+#define LVL_2		3
+#define LVL_3		4
+#define LVL_TRACE	5
+
+struct _cache_table {
+	unsigned char descriptor;
+	char cache_type;
+	short size;
+};
+
+#define MB(x)	((x) * 1024)
+
+/* All the cache descriptor types we care about (no TLB or
+   trace cache entries) */
+
+static const struct _cache_table cache_table[] =
+{
+	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
+	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
+	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
+	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
+	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
+	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
+	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
+	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
+	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
+	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
+	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
+	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
+	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
+	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
+	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
+	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
+	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
+	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
+	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
+	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
+	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
+	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
+	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
+	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
+	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
+	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
+	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
+	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
+	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
+	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
+	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
+	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
+	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
+	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
+	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
+	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
+	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
+	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
+	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
+	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
+	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
+	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
+	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
+	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
+	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
+	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
+	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
+	{ 0x00, 0, 0}
+};
+
+
+enum _cache_type {
+	CTYPE_NULL = 0,
+	CTYPE_DATA = 1,
+	CTYPE_INST = 2,
+	CTYPE_UNIFIED = 3
+};
+
+union _cpuid4_leaf_eax {
+	struct {
+		enum _cache_type	type:5;
+		unsigned int		level:3;
+		unsigned int		is_self_initializing:1;
+		unsigned int		is_fully_associative:1;
+		unsigned int		reserved:4;
+		unsigned int		num_threads_sharing:12;
+		unsigned int		num_cores_on_die:6;
+	} split;
+	u32 full;
+};
+
+union _cpuid4_leaf_ebx {
+	struct {
+		unsigned int		coherency_line_size:12;
+		unsigned int		physical_line_partition:10;
+		unsigned int		ways_of_associativity:10;
+	} split;
+	u32 full;
+};
+
+union _cpuid4_leaf_ecx {
+	struct {
+		unsigned int		number_of_sets:32;
+	} split;
+	u32 full;
+};
+
+struct _cpuid4_info_regs {
+	union _cpuid4_leaf_eax eax;
+	union _cpuid4_leaf_ebx ebx;
+	union _cpuid4_leaf_ecx ecx;
+	unsigned int id;
+	unsigned long size;
+	struct amd_northbridge *nb;
+};
+
+static unsigned short num_cache_leaves;
+
+/* AMD doesn't have CPUID4. Emulate it here to report the same
+   information to the user.  This makes some assumptions about the machine:
+   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+
+   In theory the TLBs could be reported as fake type (they are in "dummy").
+   Maybe later */
+union l1_cache {
+	struct {
+		unsigned line_size:8;
+		unsigned lines_per_tag:8;
+		unsigned assoc:8;
+		unsigned size_in_kb:8;
+	};
+	unsigned val;
+};
+
+union l2_cache {
+	struct {
+		unsigned line_size:8;
+		unsigned lines_per_tag:4;
+		unsigned assoc:4;
+		unsigned size_in_kb:16;
+	};
+	unsigned val;
+};
+
+union l3_cache {
+	struct {
+		unsigned line_size:8;
+		unsigned lines_per_tag:4;
+		unsigned assoc:4;
+		unsigned res:2;
+		unsigned size_encoded:14;
+	};
+	unsigned val;
+};
+
+static const unsigned short assocs[] = {
+	[1] = 1,
+	[2] = 2,
+	[4] = 4,
+	[6] = 8,
+	[8] = 16,
+	[0xa] = 32,
+	[0xb] = 48,
+	[0xc] = 64,
+	[0xd] = 96,
+	[0xe] = 128,
+	[0xf] = 0xffff /* fully associative - no way to show this currently */
+};
+
+static const unsigned char levels[] = { 1, 1, 2, 3 };
+static const unsigned char types[] = { 1, 2, 3, 3 };
+
+static const enum cache_type cache_type_map[] = {
+	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
+	[CTYPE_DATA] = CACHE_TYPE_DATA,
+	[CTYPE_INST] = CACHE_TYPE_INST,
+	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
+};
+
+static void
+amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+		     union _cpuid4_leaf_ebx *ebx,
+		     union _cpuid4_leaf_ecx *ecx)
+{
+	unsigned dummy;
+	unsigned line_size, lines_per_tag, assoc, size_in_kb;
+	union l1_cache l1i, l1d;
+	union l2_cache l2;
+	union l3_cache l3;
+	union l1_cache *l1 = &l1d;
+
+	eax->full = 0;
+	ebx->full = 0;
+	ecx->full = 0;
+
+	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
+	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
+
+	switch (leaf) {
+	case 1:
+		l1 = &l1i;
+	case 0:
+		if (!l1->val)
+			return;
+		assoc = assocs[l1->assoc];
+		line_size = l1->line_size;
+		lines_per_tag = l1->lines_per_tag;
+		size_in_kb = l1->size_in_kb;
+		break;
+	case 2:
+		if (!l2.val)
+			return;
+		assoc = assocs[l2.assoc];
+		line_size = l2.line_size;
+		lines_per_tag = l2.lines_per_tag;
+		/* cpu_data has errata corrections for K7 applied */
+		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
+		break;
+	case 3:
+		if (!l3.val)
+			return;
+		assoc = assocs[l3.assoc];
+		line_size = l3.line_size;
+		lines_per_tag = l3.lines_per_tag;
+		size_in_kb = l3.size_encoded * 512;
+		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
+			size_in_kb = size_in_kb >> 1;
+			assoc = assoc >> 1;
+		}
+		break;
+	default:
+		return;
+	}
+
+	eax->split.is_self_initializing = 1;
+	eax->split.type = types[leaf];
+	eax->split.level = levels[leaf];
+	eax->split.num_threads_sharing = 0;
+	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
+
+
+	if (assoc == 0xffff)
+		eax->split.is_fully_associative = 1;
+	ebx->split.coherency_line_size = line_size - 1;
+	ebx->split.ways_of_associativity = assoc - 1;
+	ebx->split.physical_line_partition = lines_per_tag - 1;
+	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
+		(ebx->split.ways_of_associativity + 1) - 1;
+}
+
+#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
+
+/*
+ * L3 cache descriptors
+ */
+static void amd_calc_l3_indices(struct amd_northbridge *nb)
+{
+	struct amd_l3_cache *l3 = &nb->l3_cache;
+	unsigned int sc0, sc1, sc2, sc3;
+	u32 val = 0;
+
+	pci_read_config_dword(nb->misc, 0x1C4, &val);
+
+	/* calculate subcache sizes */
+	l3->subcaches[0] = sc0 = !(val & BIT(0));
+	l3->subcaches[1] = sc1 = !(val & BIT(4));
+
+	if (boot_cpu_data.x86 == 0x15) {
+		l3->subcaches[0] = sc0 += !(val & BIT(1));
+		l3->subcaches[1] = sc1 += !(val & BIT(5));
+	}
+
+	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
+	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
+
+	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
+}
+
+/*
+ * check whether a slot used for disabling an L3 index is occupied.
+ * @l3: L3 cache descriptor
+ * @slot: slot number (0..1)
+ *
+ * @returns: the disabled index if used or negative value if slot free.
+ */
+static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
+{
+	unsigned int reg = 0;
+
+	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
+
+	/* check whether this slot is activated already */
+	if (reg & (3UL << 30))
+		return reg & 0xfff;
+
+	return -1;
+}
+
+static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
+				  unsigned int slot)
+{
+	int index;
+	struct amd_northbridge *nb = this_leaf->priv;
+
+	index = amd_get_l3_disable_slot(nb, slot);
+	if (index >= 0)
+		return sprintf(buf, "%d\n", index);
+
+	return sprintf(buf, "FREE\n");
+}
+
+#define SHOW_CACHE_DISABLE(slot)					\
+static ssize_t								\
+cache_disable_##slot##_show(struct device *dev,				\
+			    struct device_attribute *attr, char *buf)	\
+{									\
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
+	return show_cache_disable(this_leaf, buf, slot);		\
+}
+SHOW_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
+
+static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
+				 unsigned slot, unsigned long idx)
+{
+	int i;
+
+	idx |= BIT(30);
+
+	/*
+	 *  disable index in all 4 subcaches
+	 */
+	for (i = 0; i < 4; i++) {
+		u32 reg = idx | (i << 20);
+
+		if (!nb->l3_cache.subcaches[i])
+			continue;
+
+		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
+
+		/*
+		 * We need to WBINVD on a core on the node containing the L3
+		 * cache which indices we disable therefore a simple wbinvd()
+		 * is not sufficient.
+		 */
+		wbinvd_on_cpu(cpu);
+
+		reg |= BIT(31);
+		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
+	}
+}
+
+/*
+ * disable a L3 cache index by using a disable-slot
+ *
+ * @l3:    L3 cache descriptor
+ * @cpu:   A CPU on the node containing the L3 cache
+ * @slot:  slot number (0..1)
+ * @index: index to disable
+ *
+ * @return: 0 on success, error status on failure
+ */
+static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
+			    unsigned slot, unsigned long index)
+{
+	int ret = 0;
+
+	/*  check if @slot is already used or the index is already disabled */
+	ret = amd_get_l3_disable_slot(nb, slot);
+	if (ret >= 0)
+		return -EEXIST;
+
+	if (index > nb->l3_cache.indices)
+		return -EINVAL;
+
+	/* check whether the other slot has disabled the same index already */
+	if (index == amd_get_l3_disable_slot(nb, !slot))
+		return -EEXIST;
+
+	amd_l3_disable_index(nb, cpu, slot, index);
+
+	return 0;
+}
+
+static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
+				   const char *buf, size_t count,
+				   unsigned int slot)
+{
+	unsigned long val = 0;
+	int cpu, err = 0;
+	struct amd_northbridge *nb = this_leaf->priv;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	cpu = cpumask_first(&this_leaf->shared_cpu_map);
+
+	if (kstrtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
+	if (err) {
+		if (err == -EEXIST)
+			pr_warn("L3 slot %d in use/index already disabled!\n",
+				   slot);
+		return err;
+	}
+	return count;
+}
+
+#define STORE_CACHE_DISABLE(slot)					\
+static ssize_t								\
+cache_disable_##slot##_store(struct device *dev,			\
+			     struct device_attribute *attr,		\
+			     const char *buf, size_t count)		\
+{									\
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
+	return store_cache_disable(this_leaf, buf, count, slot);	\
+}
+STORE_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(1)
+
+static ssize_t subcaches_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
+
+	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
+}
+
+static ssize_t subcaches_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
+	unsigned long val;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (kstrtoul(buf, 16, &val) < 0)
+		return -EINVAL;
+
+	if (amd_set_subcaches(cpu, val))
+		return -EINVAL;
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(cache_disable_0);
+static DEVICE_ATTR_RW(cache_disable_1);
+static DEVICE_ATTR_RW(subcaches);
+
+static umode_t
+cache_private_attrs_is_visible(struct kobject *kobj,
+			       struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	umode_t mode = attr->mode;
+
+	if (!this_leaf->priv)
+		return 0;
+
+	if ((attr == &dev_attr_subcaches.attr) &&
+	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+		return mode;
+
+	if ((attr == &dev_attr_cache_disable_0.attr ||
+	     attr == &dev_attr_cache_disable_1.attr) &&
+	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+		return mode;
+
+	return 0;
+}
+
+static struct attribute_group cache_private_group = {
+	.is_visible = cache_private_attrs_is_visible,
+};
+
+static void init_amd_l3_attrs(void)
+{
+	int n = 1;
+	static struct attribute **amd_l3_attrs;
+
+	if (amd_l3_attrs) /* already initialized */
+		return;
+
+	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+		n += 2;
+	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+		n += 1;
+
+	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
+	if (!amd_l3_attrs)
+		return;
+
+	n = 0;
+	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
+		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
+		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
+	}
+	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
+
+	cache_private_group.attrs = amd_l3_attrs;
+}
+
+const struct attribute_group *
+cache_get_priv_group(struct cacheinfo *this_leaf)
+{
+	struct amd_northbridge *nb = this_leaf->priv;
+
+	if (this_leaf->level < 3 || !nb)
+		return NULL;
+
+	if (nb && nb->l3_cache.indices)
+		init_amd_l3_attrs();
+
+	return &cache_private_group;
+}
+
+static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
+{
+	int node;
+
+	/* only for L3, and not in virtualized environments */
+	if (index < 3)
+		return;
+
+	node = amd_get_nb_id(smp_processor_id());
+	this_leaf->nb = node_to_amd_nb(node);
+	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
+		amd_calc_l3_indices(this_leaf->nb);
+}
+#else
+#define amd_init_l3_cache(x, y)
+#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
+
+static int
+cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
+{
+	union _cpuid4_leaf_eax	eax;
+	union _cpuid4_leaf_ebx	ebx;
+	union _cpuid4_leaf_ecx	ecx;
+	unsigned		edx;
+
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
+			cpuid_count(0x8000001d, index, &eax.full,
+				    &ebx.full, &ecx.full, &edx);
+		else
+			amd_cpuid4(index, &eax, &ebx, &ecx);
+		amd_init_l3_cache(this_leaf, index);
+	} else {
+		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+	}
+
+	if (eax.split.type == CTYPE_NULL)
+		return -EIO; /* better error ? */
+
+	this_leaf->eax = eax;
+	this_leaf->ebx = ebx;
+	this_leaf->ecx = ecx;
+	this_leaf->size = (ecx.split.number_of_sets          + 1) *
+			  (ebx.split.coherency_line_size     + 1) *
+			  (ebx.split.physical_line_partition + 1) *
+			  (ebx.split.ways_of_associativity   + 1);
+	return 0;
+}
+
+static int find_num_cache_leaves(struct cpuinfo_x86 *c)
+{
+	unsigned int		eax, ebx, ecx, edx, op;
+	union _cpuid4_leaf_eax	cache_eax;
+	int 			i = -1;
+
+	if (c->x86_vendor == X86_VENDOR_AMD)
+		op = 0x8000001d;
+	else
+		op = 4;
+
+	do {
+		++i;
+		/* Do cpuid(op) loop to find out num_cache_leaves */
+		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
+		cache_eax.full = eax;
+	} while (cache_eax.split.type != CTYPE_NULL);
+	return i;
+}
+
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
+{
+	/*
+	 * We may have multiple LLCs if L3 caches exist, so check if we
+	 * have an L3 cache by looking at the L3 cache CPUID leaf.
+	 */
+	if (!cpuid_edx(0x80000006))
+		return;
+
+	if (c->x86 < 0x17) {
+		/* LLC is at the node level. */
+		per_cpu(cpu_llc_id, cpu) = node_id;
+	} else if (c->x86 == 0x17 &&
+		   c->x86_model >= 0 && c->x86_model <= 0x1F) {
+		/*
+		 * LLC is at the core complex level.
+		 * Core complex ID is ApicId[3] for these processors.
+		 */
+		per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
+	} else {
+		/*
+		 * LLC ID is calculated from the number of threads sharing the
+		 * cache.
+		 * */
+		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
+		u32 llc_index = find_num_cache_leaves(c) - 1;
+
+		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
+		if (eax)
+			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
+
+		if (num_sharing_cache) {
+			int bits = get_count_order(num_sharing_cache) - 1;
+
+			per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
+		}
+	}
+}
+
+void init_amd_cacheinfo(struct cpuinfo_x86 *c)
+{
+
+	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+		num_cache_leaves = find_num_cache_leaves(c);
+	} else if (c->extended_cpuid_level >= 0x80000006) {
+		if (cpuid_edx(0x80000006) & 0xf000)
+			num_cache_leaves = 4;
+		else
+			num_cache_leaves = 3;
+	}
+}
+
+void init_intel_cacheinfo(struct cpuinfo_x86 *c)
+{
+	/* Cache sizes */
+	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
+	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
+	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
+	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
+#ifdef CONFIG_SMP
+	unsigned int cpu = c->cpu_index;
+#endif
+
+	if (c->cpuid_level > 3) {
+		static int is_initialized;
+
+		if (is_initialized == 0) {
+			/* Init num_cache_leaves from boot CPU */
+			num_cache_leaves = find_num_cache_leaves(c);
+			is_initialized++;
+		}
+
+		/*
+		 * Whenever possible use cpuid(4), deterministic cache
+		 * parameters cpuid leaf to find the cache details
+		 */
+		for (i = 0; i < num_cache_leaves; i++) {
+			struct _cpuid4_info_regs this_leaf = {};
+			int retval;
+
+			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
+			if (retval < 0)
+				continue;
+
+			switch (this_leaf.eax.split.level) {
+			case 1:
+				if (this_leaf.eax.split.type == CTYPE_DATA)
+					new_l1d = this_leaf.size/1024;
+				else if (this_leaf.eax.split.type == CTYPE_INST)
+					new_l1i = this_leaf.size/1024;
+				break;
+			case 2:
+				new_l2 = this_leaf.size/1024;
+				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+				index_msb = get_count_order(num_threads_sharing);
+				l2_id = c->apicid & ~((1 << index_msb) - 1);
+				break;
+			case 3:
+				new_l3 = this_leaf.size/1024;
+				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+				index_msb = get_count_order(num_threads_sharing);
+				l3_id = c->apicid & ~((1 << index_msb) - 1);
+				break;
+			default:
+				break;
+			}
+		}
+	}
+	/*
+	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
+	 * trace cache
+	 */
+	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
+		/* supports eax=2  call */
+		int j, n;
+		unsigned int regs[4];
+		unsigned char *dp = (unsigned char *)regs;
+		int only_trace = 0;
+
+		if (num_cache_leaves != 0 && c->x86 == 15)
+			only_trace = 1;
+
+		/* Number of times to iterate */
+		n = cpuid_eax(2) & 0xFF;
+
+		for (i = 0 ; i < n ; i++) {
+			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
+
+			/* If bit 31 is set, this is an unknown format */
+			for (j = 0 ; j < 3 ; j++)
+				if (regs[j] & (1 << 31))
+					regs[j] = 0;
+
+			/* Byte 0 is level count, not a descriptor */
+			for (j = 1 ; j < 16 ; j++) {
+				unsigned char des = dp[j];
+				unsigned char k = 0;
+
+				/* look up this descriptor in the table */
+				while (cache_table[k].descriptor != 0) {
+					if (cache_table[k].descriptor == des) {
+						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
+							break;
+						switch (cache_table[k].cache_type) {
+						case LVL_1_INST:
+							l1i += cache_table[k].size;
+							break;
+						case LVL_1_DATA:
+							l1d += cache_table[k].size;
+							break;
+						case LVL_2:
+							l2 += cache_table[k].size;
+							break;
+						case LVL_3:
+							l3 += cache_table[k].size;
+							break;
+						case LVL_TRACE:
+							trace += cache_table[k].size;
+							break;
+						}
+
+						break;
+					}
+
+					k++;
+				}
+			}
+		}
+	}
+
+	if (new_l1d)
+		l1d = new_l1d;
+
+	if (new_l1i)
+		l1i = new_l1i;
+
+	if (new_l2) {
+		l2 = new_l2;
+#ifdef CONFIG_SMP
+		per_cpu(cpu_llc_id, cpu) = l2_id;
+#endif
+	}
+
+	if (new_l3) {
+		l3 = new_l3;
+#ifdef CONFIG_SMP
+		per_cpu(cpu_llc_id, cpu) = l3_id;
+#endif
+	}
+
+#ifdef CONFIG_SMP
+	/*
+	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
+	 * turns means that the only possibility is SMT (as indicated in
+	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
+	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
+	 * c->phys_proc_id.
+	 */
+	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
+		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
+#endif
+
+	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
+
+	if (!l2)
+		cpu_detect_cache_sizes(c);
+}
+
+static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
+				    struct _cpuid4_info_regs *base)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf;
+	int i, sibling;
+
+	/*
+	 * For L3, always use the pre-calculated cpu_llc_shared_mask
+	 * to derive shared_cpu_map.
+	 */
+	if (index == 3) {
+		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
+			this_cpu_ci = get_cpu_cacheinfo(i);
+			if (!this_cpu_ci->info_list)
+				continue;
+			this_leaf = this_cpu_ci->info_list + index;
+			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
+				if (!cpu_online(sibling))
+					continue;
+				cpumask_set_cpu(sibling,
+						&this_leaf->shared_cpu_map);
+			}
+		}
+	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+		unsigned int apicid, nshared, first, last;
+
+		nshared = base->eax.split.num_threads_sharing + 1;
+		apicid = cpu_data(cpu).apicid;
+		first = apicid - (apicid % nshared);
+		last = first + nshared - 1;
+
+		for_each_online_cpu(i) {
+			this_cpu_ci = get_cpu_cacheinfo(i);
+			if (!this_cpu_ci->info_list)
+				continue;
+
+			apicid = cpu_data(i).apicid;
+			if ((apicid < first) || (apicid > last))
+				continue;
+
+			this_leaf = this_cpu_ci->info_list + index;
+
+			for_each_online_cpu(sibling) {
+				apicid = cpu_data(sibling).apicid;
+				if ((apicid < first) || (apicid > last))
+					continue;
+				cpumask_set_cpu(sibling,
+						&this_leaf->shared_cpu_map);
+			}
+		}
+	} else
+		return 0;
+
+	return 1;
+}
+
+static void __cache_cpumap_setup(unsigned int cpu, int index,
+				 struct _cpuid4_info_regs *base)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf, *sibling_leaf;
+	unsigned long num_threads_sharing;
+	int index_msb, i;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+	if (c->x86_vendor == X86_VENDOR_AMD) {
+		if (__cache_amd_cpumap_setup(cpu, index, base))
+			return;
+	}
+
+	this_leaf = this_cpu_ci->info_list + index;
+	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
+
+	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+	if (num_threads_sharing == 1)
+		return;
+
+	index_msb = get_count_order(num_threads_sharing);
+
+	for_each_online_cpu(i)
+		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
+			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
+
+			if (i == cpu || !sib_cpu_ci->info_list)
+				continue;/* skip if itself or no cacheinfo */
+			sibling_leaf = sib_cpu_ci->info_list + index;
+			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
+			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
+		}
+}
+
+static void ci_leaf_init(struct cacheinfo *this_leaf,
+			 struct _cpuid4_info_regs *base)
+{
+	this_leaf->id = base->id;
+	this_leaf->attributes = CACHE_ID;
+	this_leaf->level = base->eax.split.level;
+	this_leaf->type = cache_type_map[base->eax.split.type];
+	this_leaf->coherency_line_size =
+				base->ebx.split.coherency_line_size + 1;
+	this_leaf->ways_of_associativity =
+				base->ebx.split.ways_of_associativity + 1;
+	this_leaf->size = base->size;
+	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
+	this_leaf->physical_line_partition =
+				base->ebx.split.physical_line_partition + 1;
+	this_leaf->priv = base->nb;
+}
+
+static int __init_cache_level(unsigned int cpu)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+
+	if (!num_cache_leaves)
+		return -ENOENT;
+	if (!this_cpu_ci)
+		return -EINVAL;
+	this_cpu_ci->num_levels = 3;
+	this_cpu_ci->num_leaves = num_cache_leaves;
+	return 0;
+}
+
+/*
+ * The max shared threads number comes from CPUID.4:EAX[25-14] with input
+ * ECX as cache index. Then right shift apicid by the number's order to get
+ * cache id for this cache node.
+ */
+static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
+{
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	unsigned long num_threads_sharing;
+	int index_msb;
+
+	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
+	index_msb = get_count_order(num_threads_sharing);
+	id4_regs->id = c->apicid >> index_msb;
+}
+
+static int __populate_cache_leaves(unsigned int cpu)
+{
+	unsigned int idx, ret;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+	struct _cpuid4_info_regs id4_regs = {};
+
+	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
+		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
+		if (ret)
+			return ret;
+		get_cache_id(cpu, &id4_regs);
+		ci_leaf_init(this_leaf++, &id4_regs);
+		__cache_cpumap_setup(cpu, idx, &id4_regs);
+	}
+	this_cpu_ci->cpu_map_populated = true;
+
+	return 0;
+}
+
+DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
+DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e5ec0f1..14433ff 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -18,6 +18,13 @@
 #define RNG_ENABLED	(1 << 3)
 #define RNG_ENABLE	(1 << 6)	/* MSR_VIA_RNG */
 
+#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW	0x00200000
+#define X86_VMX_FEATURE_PROC_CTLS_VNMI		0x00400000
+#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS	0x80000000
+#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC	0x00000001
+#define X86_VMX_FEATURE_PROC_CTLS2_EPT		0x00000002
+#define X86_VMX_FEATURE_PROC_CTLS2_VPID		0x00000020
+
 static void init_c3(struct cpuinfo_x86 *c)
 {
 	u32  lo, hi;
@@ -112,6 +119,31 @@
 	}
 }
 
+static void centaur_detect_vmx_virtcap(struct cpuinfo_x86 *c)
+{
+	u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
+
+	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+	msr_ctl = vmx_msr_high | vmx_msr_low;
+
+	if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
+		set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
+	if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
+		set_cpu_cap(c, X86_FEATURE_VNMI);
+	if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
+		rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
+		      vmx_msr_low, vmx_msr_high);
+		msr_ctl2 = vmx_msr_high | vmx_msr_low;
+		if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
+		    (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
+			set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
+		if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
+			set_cpu_cap(c, X86_FEATURE_EPT);
+		if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
+			set_cpu_cap(c, X86_FEATURE_VPID);
+	}
+}
+
 static void init_centaur(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_X86_32
@@ -128,6 +160,24 @@
 	clear_cpu_cap(c, 0*32+31);
 #endif
 	early_init_centaur(c);
+	init_intel_cacheinfo(c);
+	detect_num_cpu_cores(c);
+#ifdef CONFIG_X86_32
+	detect_ht(c);
+#endif
+
+	if (c->cpuid_level > 9) {
+		unsigned int eax = cpuid_eax(10);
+
+		/*
+		 * Check for version and the number of counters
+		 * Version(eax[7:0]) can't be 0;
+		 * Counters(eax[15:8]) should be greater than 1;
+		 */
+		if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1))
+			set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
+	}
+
 	switch (c->x86) {
 #ifdef CONFIG_X86_32
 	case 5:
@@ -199,6 +249,9 @@
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 #endif
+
+	if (cpu_has(c, X86_FEATURE_VMX))
+		centaur_detect_vmx_virtcap(c);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 38276f5..95c8e50 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -66,6 +66,13 @@
 /* representing cpus for which sibling maps can be computed */
 cpumask_var_t cpu_sibling_setup_mask;
 
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+EXPORT_SYMBOL(smp_num_siblings);
+
+/* Last level cache ID of each logical CPU */
+DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
+
 /* correctly size the local cpu masks */
 void __init setup_cpu_local_masks(void)
 {
@@ -577,6 +584,19 @@
 	*(s + 1) = '\0';
 }
 
+void detect_num_cpu_cores(struct cpuinfo_x86 *c)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	c->x86_max_cores = 1;
+	if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
+		return;
+
+	cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
+	if (eax & 0x1f)
+		c->x86_max_cores = (eax >> 26) + 1;
+}
+
 void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
 {
 	unsigned int n, dummy, ebx, ecx, edx, l2size;
@@ -1044,6 +1064,21 @@
 	 */
 	setup_clear_cpu_cap(X86_FEATURE_PCID);
 #endif
+
+	/*
+	 * Later in the boot process pgtable_l5_enabled() relies on
+	 * cpu_feature_enabled(X86_FEATURE_LA57). If 5-level paging is not
+	 * enabled by this point we need to clear the feature bit to avoid
+	 * false-positives at the later stage.
+	 *
+	 * pgtable_l5_enabled() can be false here for several reasons:
+	 *  - 5-level paging is disabled compile-time;
+	 *  - it's 32-bit kernel;
+	 *  - machine doesn't support 5-level paging;
+	 *  - user specified 'no5lvl' in kernel command line.
+	 */
+	if (!pgtable_l5_enabled())
+		setup_clear_cpu_cap(X86_FEATURE_LA57);
 }
 
 void __init early_cpu_init(void)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 37672d2..38216f6 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -47,6 +47,16 @@
 
 extern void get_cpu_cap(struct cpuinfo_x86 *c);
 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern u32 get_scattered_cpuid_leaf(unsigned int level,
+				    unsigned int sub_leaf,
+				    enum cpuid_regs_idx reg);
+extern void init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
+
+extern void detect_num_cpu_cores(struct cpuinfo_x86 *c);
+extern int detect_extended_topology(struct cpuinfo_x86 *c);
+extern void detect_ht(struct cpuinfo_x86 *c);
 
 unsigned int aperfmperf_get_khz(int cpu);
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 577e7f7..eb75564 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -456,24 +456,6 @@
 #endif
 }
 
-/*
- * find out the number of processor cores on the die
- */
-static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
-{
-	unsigned int eax, ebx, ecx, edx;
-
-	if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
-		return 1;
-
-	/* Intel has a non-standard dependency on %ecx for this CPUID level. */
-	cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
-	if (eax & 0x1f)
-		return (eax >> 26) + 1;
-	else
-		return 1;
-}
-
 static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
 {
 	/* Intel VMX MSR indicated features */
@@ -656,8 +638,6 @@
 
 static void init_intel(struct cpuinfo_x86 *c)
 {
-	unsigned int l2 = 0;
-
 	early_init_intel(c);
 
 	intel_workarounds(c);
@@ -674,19 +654,13 @@
 		 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
 		 * detection.
 		 */
-		c->x86_max_cores = intel_num_cpu_cores(c);
+		detect_num_cpu_cores(c);
 #ifdef CONFIG_X86_32
 		detect_ht(c);
 #endif
 	}
 
-	l2 = init_intel_cacheinfo(c);
-
-	/* Detect legacy cache sizes if init_intel_cacheinfo did not */
-	if (l2 == 0) {
-		cpu_detect_cache_sizes(c);
-		l2 = c->x86_cache_size;
-	}
+	init_intel_cacheinfo(c);
 
 	if (c->cpuid_level > 9) {
 		unsigned eax = cpuid_eax(10);
@@ -699,7 +673,8 @@
 		set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 
 	if (boot_cpu_has(X86_FEATURE_DS)) {
-		unsigned int l1;
+		unsigned int l1, l2;
+
 		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
 		if (!(l1 & (1<<11)))
 			set_cpu_cap(c, X86_FEATURE_BTS);
@@ -727,6 +702,7 @@
 	 * Dixon is NOT a Celeron.
 	 */
 	if (c->x86 == 6) {
+		unsigned int l2 = c->x86_cache_size;
 		char *p = NULL;
 
 		switch (c->x86_model) {
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
deleted file mode 100644
index 54d04d5..0000000
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ /dev/null
@@ -1,968 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *	Routines to identify caches on Intel CPU.
- *
- *	Changes:
- *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
- *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
- *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
- */
-
-#include <linux/slab.h>
-#include <linux/cacheinfo.h>
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/capability.h>
-#include <linux/sysfs.h>
-#include <linux/pci.h>
-
-#include <asm/cpufeature.h>
-#include <asm/amd_nb.h>
-#include <asm/smp.h>
-
-#define LVL_1_INST	1
-#define LVL_1_DATA	2
-#define LVL_2		3
-#define LVL_3		4
-#define LVL_TRACE	5
-
-struct _cache_table {
-	unsigned char descriptor;
-	char cache_type;
-	short size;
-};
-
-#define MB(x)	((x) * 1024)
-
-/* All the cache descriptor types we care about (no TLB or
-   trace cache entries) */
-
-static const struct _cache_table cache_table[] =
-{
-	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
-	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
-	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
-	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
-	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
-	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
-	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
-	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
-	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
-	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
-	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
-	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
-	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
-	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
-	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
-	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
-	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
-	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
-	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
-	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
-	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
-	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
-	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
-	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
-	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
-	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
-	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
-	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
-	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
-	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
-	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
-	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
-	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
-	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
-	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
-	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
-	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
-	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
-	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
-	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
-	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
-	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
-	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
-	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
-	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
-	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
-	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
-	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
-	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
-	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
-	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
-	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
-	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
-	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
-	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
-	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
-	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
-	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
-	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
-	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
-	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
-	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
-	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
-	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
-	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
-	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
-	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
-	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
-	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
-	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
-	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
-	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
-	{ 0x00, 0, 0}
-};
-
-
-enum _cache_type {
-	CTYPE_NULL = 0,
-	CTYPE_DATA = 1,
-	CTYPE_INST = 2,
-	CTYPE_UNIFIED = 3
-};
-
-union _cpuid4_leaf_eax {
-	struct {
-		enum _cache_type	type:5;
-		unsigned int		level:3;
-		unsigned int		is_self_initializing:1;
-		unsigned int		is_fully_associative:1;
-		unsigned int		reserved:4;
-		unsigned int		num_threads_sharing:12;
-		unsigned int		num_cores_on_die:6;
-	} split;
-	u32 full;
-};
-
-union _cpuid4_leaf_ebx {
-	struct {
-		unsigned int		coherency_line_size:12;
-		unsigned int		physical_line_partition:10;
-		unsigned int		ways_of_associativity:10;
-	} split;
-	u32 full;
-};
-
-union _cpuid4_leaf_ecx {
-	struct {
-		unsigned int		number_of_sets:32;
-	} split;
-	u32 full;
-};
-
-struct _cpuid4_info_regs {
-	union _cpuid4_leaf_eax eax;
-	union _cpuid4_leaf_ebx ebx;
-	union _cpuid4_leaf_ecx ecx;
-	unsigned int id;
-	unsigned long size;
-	struct amd_northbridge *nb;
-};
-
-static unsigned short num_cache_leaves;
-
-/* AMD doesn't have CPUID4. Emulate it here to report the same
-   information to the user.  This makes some assumptions about the machine:
-   L2 not shared, no SMT etc. that is currently true on AMD CPUs.
-
-   In theory the TLBs could be reported as fake type (they are in "dummy").
-   Maybe later */
-union l1_cache {
-	struct {
-		unsigned line_size:8;
-		unsigned lines_per_tag:8;
-		unsigned assoc:8;
-		unsigned size_in_kb:8;
-	};
-	unsigned val;
-};
-
-union l2_cache {
-	struct {
-		unsigned line_size:8;
-		unsigned lines_per_tag:4;
-		unsigned assoc:4;
-		unsigned size_in_kb:16;
-	};
-	unsigned val;
-};
-
-union l3_cache {
-	struct {
-		unsigned line_size:8;
-		unsigned lines_per_tag:4;
-		unsigned assoc:4;
-		unsigned res:2;
-		unsigned size_encoded:14;
-	};
-	unsigned val;
-};
-
-static const unsigned short assocs[] = {
-	[1] = 1,
-	[2] = 2,
-	[4] = 4,
-	[6] = 8,
-	[8] = 16,
-	[0xa] = 32,
-	[0xb] = 48,
-	[0xc] = 64,
-	[0xd] = 96,
-	[0xe] = 128,
-	[0xf] = 0xffff /* fully associative - no way to show this currently */
-};
-
-static const unsigned char levels[] = { 1, 1, 2, 3 };
-static const unsigned char types[] = { 1, 2, 3, 3 };
-
-static const enum cache_type cache_type_map[] = {
-	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
-	[CTYPE_DATA] = CACHE_TYPE_DATA,
-	[CTYPE_INST] = CACHE_TYPE_INST,
-	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
-};
-
-static void
-amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
-		     union _cpuid4_leaf_ebx *ebx,
-		     union _cpuid4_leaf_ecx *ecx)
-{
-	unsigned dummy;
-	unsigned line_size, lines_per_tag, assoc, size_in_kb;
-	union l1_cache l1i, l1d;
-	union l2_cache l2;
-	union l3_cache l3;
-	union l1_cache *l1 = &l1d;
-
-	eax->full = 0;
-	ebx->full = 0;
-	ecx->full = 0;
-
-	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
-	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
-
-	switch (leaf) {
-	case 1:
-		l1 = &l1i;
-	case 0:
-		if (!l1->val)
-			return;
-		assoc = assocs[l1->assoc];
-		line_size = l1->line_size;
-		lines_per_tag = l1->lines_per_tag;
-		size_in_kb = l1->size_in_kb;
-		break;
-	case 2:
-		if (!l2.val)
-			return;
-		assoc = assocs[l2.assoc];
-		line_size = l2.line_size;
-		lines_per_tag = l2.lines_per_tag;
-		/* cpu_data has errata corrections for K7 applied */
-		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
-		break;
-	case 3:
-		if (!l3.val)
-			return;
-		assoc = assocs[l3.assoc];
-		line_size = l3.line_size;
-		lines_per_tag = l3.lines_per_tag;
-		size_in_kb = l3.size_encoded * 512;
-		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
-			size_in_kb = size_in_kb >> 1;
-			assoc = assoc >> 1;
-		}
-		break;
-	default:
-		return;
-	}
-
-	eax->split.is_self_initializing = 1;
-	eax->split.type = types[leaf];
-	eax->split.level = levels[leaf];
-	eax->split.num_threads_sharing = 0;
-	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
-
-
-	if (assoc == 0xffff)
-		eax->split.is_fully_associative = 1;
-	ebx->split.coherency_line_size = line_size - 1;
-	ebx->split.ways_of_associativity = assoc - 1;
-	ebx->split.physical_line_partition = lines_per_tag - 1;
-	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
-		(ebx->split.ways_of_associativity + 1) - 1;
-}
-
-#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
-
-/*
- * L3 cache descriptors
- */
-static void amd_calc_l3_indices(struct amd_northbridge *nb)
-{
-	struct amd_l3_cache *l3 = &nb->l3_cache;
-	unsigned int sc0, sc1, sc2, sc3;
-	u32 val = 0;
-
-	pci_read_config_dword(nb->misc, 0x1C4, &val);
-
-	/* calculate subcache sizes */
-	l3->subcaches[0] = sc0 = !(val & BIT(0));
-	l3->subcaches[1] = sc1 = !(val & BIT(4));
-
-	if (boot_cpu_data.x86 == 0x15) {
-		l3->subcaches[0] = sc0 += !(val & BIT(1));
-		l3->subcaches[1] = sc1 += !(val & BIT(5));
-	}
-
-	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
-	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
-
-	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
-}
-
-/*
- * check whether a slot used for disabling an L3 index is occupied.
- * @l3: L3 cache descriptor
- * @slot: slot number (0..1)
- *
- * @returns: the disabled index if used or negative value if slot free.
- */
-static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
-{
-	unsigned int reg = 0;
-
-	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, &reg);
-
-	/* check whether this slot is activated already */
-	if (reg & (3UL << 30))
-		return reg & 0xfff;
-
-	return -1;
-}
-
-static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
-				  unsigned int slot)
-{
-	int index;
-	struct amd_northbridge *nb = this_leaf->priv;
-
-	index = amd_get_l3_disable_slot(nb, slot);
-	if (index >= 0)
-		return sprintf(buf, "%d\n", index);
-
-	return sprintf(buf, "FREE\n");
-}
-
-#define SHOW_CACHE_DISABLE(slot)					\
-static ssize_t								\
-cache_disable_##slot##_show(struct device *dev,				\
-			    struct device_attribute *attr, char *buf)	\
-{									\
-	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
-	return show_cache_disable(this_leaf, buf, slot);		\
-}
-SHOW_CACHE_DISABLE(0)
-SHOW_CACHE_DISABLE(1)
-
-static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
-				 unsigned slot, unsigned long idx)
-{
-	int i;
-
-	idx |= BIT(30);
-
-	/*
-	 *  disable index in all 4 subcaches
-	 */
-	for (i = 0; i < 4; i++) {
-		u32 reg = idx | (i << 20);
-
-		if (!nb->l3_cache.subcaches[i])
-			continue;
-
-		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
-
-		/*
-		 * We need to WBINVD on a core on the node containing the L3
-		 * cache which indices we disable therefore a simple wbinvd()
-		 * is not sufficient.
-		 */
-		wbinvd_on_cpu(cpu);
-
-		reg |= BIT(31);
-		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
-	}
-}
-
-/*
- * disable a L3 cache index by using a disable-slot
- *
- * @l3:    L3 cache descriptor
- * @cpu:   A CPU on the node containing the L3 cache
- * @slot:  slot number (0..1)
- * @index: index to disable
- *
- * @return: 0 on success, error status on failure
- */
-static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
-			    unsigned slot, unsigned long index)
-{
-	int ret = 0;
-
-	/*  check if @slot is already used or the index is already disabled */
-	ret = amd_get_l3_disable_slot(nb, slot);
-	if (ret >= 0)
-		return -EEXIST;
-
-	if (index > nb->l3_cache.indices)
-		return -EINVAL;
-
-	/* check whether the other slot has disabled the same index already */
-	if (index == amd_get_l3_disable_slot(nb, !slot))
-		return -EEXIST;
-
-	amd_l3_disable_index(nb, cpu, slot, index);
-
-	return 0;
-}
-
-static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
-				   const char *buf, size_t count,
-				   unsigned int slot)
-{
-	unsigned long val = 0;
-	int cpu, err = 0;
-	struct amd_northbridge *nb = this_leaf->priv;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	cpu = cpumask_first(&this_leaf->shared_cpu_map);
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
-	if (err) {
-		if (err == -EEXIST)
-			pr_warn("L3 slot %d in use/index already disabled!\n",
-				   slot);
-		return err;
-	}
-	return count;
-}
-
-#define STORE_CACHE_DISABLE(slot)					\
-static ssize_t								\
-cache_disable_##slot##_store(struct device *dev,			\
-			     struct device_attribute *attr,		\
-			     const char *buf, size_t count)		\
-{									\
-	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
-	return store_cache_disable(this_leaf, buf, count, slot);	\
-}
-STORE_CACHE_DISABLE(0)
-STORE_CACHE_DISABLE(1)
-
-static ssize_t subcaches_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
-	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
-
-	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
-}
-
-static ssize_t subcaches_store(struct device *dev,
-			       struct device_attribute *attr,
-			       const char *buf, size_t count)
-{
-	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
-	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
-	unsigned long val;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	if (kstrtoul(buf, 16, &val) < 0)
-		return -EINVAL;
-
-	if (amd_set_subcaches(cpu, val))
-		return -EINVAL;
-
-	return count;
-}
-
-static DEVICE_ATTR_RW(cache_disable_0);
-static DEVICE_ATTR_RW(cache_disable_1);
-static DEVICE_ATTR_RW(subcaches);
-
-static umode_t
-cache_private_attrs_is_visible(struct kobject *kobj,
-			       struct attribute *attr, int unused)
-{
-	struct device *dev = kobj_to_dev(kobj);
-	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
-	umode_t mode = attr->mode;
-
-	if (!this_leaf->priv)
-		return 0;
-
-	if ((attr == &dev_attr_subcaches.attr) &&
-	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-		return mode;
-
-	if ((attr == &dev_attr_cache_disable_0.attr ||
-	     attr == &dev_attr_cache_disable_1.attr) &&
-	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
-		return mode;
-
-	return 0;
-}
-
-static struct attribute_group cache_private_group = {
-	.is_visible = cache_private_attrs_is_visible,
-};
-
-static void init_amd_l3_attrs(void)
-{
-	int n = 1;
-	static struct attribute **amd_l3_attrs;
-
-	if (amd_l3_attrs) /* already initialized */
-		return;
-
-	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
-		n += 2;
-	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-		n += 1;
-
-	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
-	if (!amd_l3_attrs)
-		return;
-
-	n = 0;
-	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
-		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
-		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
-	}
-	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
-
-	cache_private_group.attrs = amd_l3_attrs;
-}
-
-const struct attribute_group *
-cache_get_priv_group(struct cacheinfo *this_leaf)
-{
-	struct amd_northbridge *nb = this_leaf->priv;
-
-	if (this_leaf->level < 3 || !nb)
-		return NULL;
-
-	if (nb && nb->l3_cache.indices)
-		init_amd_l3_attrs();
-
-	return &cache_private_group;
-}
-
-static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
-{
-	int node;
-
-	/* only for L3, and not in virtualized environments */
-	if (index < 3)
-		return;
-
-	node = amd_get_nb_id(smp_processor_id());
-	this_leaf->nb = node_to_amd_nb(node);
-	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
-		amd_calc_l3_indices(this_leaf->nb);
-}
-#else
-#define amd_init_l3_cache(x, y)
-#endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
-
-static int
-cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
-{
-	union _cpuid4_leaf_eax	eax;
-	union _cpuid4_leaf_ebx	ebx;
-	union _cpuid4_leaf_ecx	ecx;
-	unsigned		edx;
-
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
-		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
-			cpuid_count(0x8000001d, index, &eax.full,
-				    &ebx.full, &ecx.full, &edx);
-		else
-			amd_cpuid4(index, &eax, &ebx, &ecx);
-		amd_init_l3_cache(this_leaf, index);
-	} else {
-		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
-	}
-
-	if (eax.split.type == CTYPE_NULL)
-		return -EIO; /* better error ? */
-
-	this_leaf->eax = eax;
-	this_leaf->ebx = ebx;
-	this_leaf->ecx = ecx;
-	this_leaf->size = (ecx.split.number_of_sets          + 1) *
-			  (ebx.split.coherency_line_size     + 1) *
-			  (ebx.split.physical_line_partition + 1) *
-			  (ebx.split.ways_of_associativity   + 1);
-	return 0;
-}
-
-static int find_num_cache_leaves(struct cpuinfo_x86 *c)
-{
-	unsigned int		eax, ebx, ecx, edx, op;
-	union _cpuid4_leaf_eax	cache_eax;
-	int 			i = -1;
-
-	if (c->x86_vendor == X86_VENDOR_AMD)
-		op = 0x8000001d;
-	else
-		op = 4;
-
-	do {
-		++i;
-		/* Do cpuid(op) loop to find out num_cache_leaves */
-		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
-		cache_eax.full = eax;
-	} while (cache_eax.split.type != CTYPE_NULL);
-	return i;
-}
-
-void init_amd_cacheinfo(struct cpuinfo_x86 *c)
-{
-
-	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
-		num_cache_leaves = find_num_cache_leaves(c);
-	} else if (c->extended_cpuid_level >= 0x80000006) {
-		if (cpuid_edx(0x80000006) & 0xf000)
-			num_cache_leaves = 4;
-		else
-			num_cache_leaves = 3;
-	}
-}
-
-unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
-{
-	/* Cache sizes */
-	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
-	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
-	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
-	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
-#ifdef CONFIG_SMP
-	unsigned int cpu = c->cpu_index;
-#endif
-
-	if (c->cpuid_level > 3) {
-		static int is_initialized;
-
-		if (is_initialized == 0) {
-			/* Init num_cache_leaves from boot CPU */
-			num_cache_leaves = find_num_cache_leaves(c);
-			is_initialized++;
-		}
-
-		/*
-		 * Whenever possible use cpuid(4), deterministic cache
-		 * parameters cpuid leaf to find the cache details
-		 */
-		for (i = 0; i < num_cache_leaves; i++) {
-			struct _cpuid4_info_regs this_leaf = {};
-			int retval;
-
-			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
-			if (retval < 0)
-				continue;
-
-			switch (this_leaf.eax.split.level) {
-			case 1:
-				if (this_leaf.eax.split.type == CTYPE_DATA)
-					new_l1d = this_leaf.size/1024;
-				else if (this_leaf.eax.split.type == CTYPE_INST)
-					new_l1i = this_leaf.size/1024;
-				break;
-			case 2:
-				new_l2 = this_leaf.size/1024;
-				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
-				index_msb = get_count_order(num_threads_sharing);
-				l2_id = c->apicid & ~((1 << index_msb) - 1);
-				break;
-			case 3:
-				new_l3 = this_leaf.size/1024;
-				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
-				index_msb = get_count_order(num_threads_sharing);
-				l3_id = c->apicid & ~((1 << index_msb) - 1);
-				break;
-			default:
-				break;
-			}
-		}
-	}
-	/*
-	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
-	 * trace cache
-	 */
-	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
-		/* supports eax=2  call */
-		int j, n;
-		unsigned int regs[4];
-		unsigned char *dp = (unsigned char *)regs;
-		int only_trace = 0;
-
-		if (num_cache_leaves != 0 && c->x86 == 15)
-			only_trace = 1;
-
-		/* Number of times to iterate */
-		n = cpuid_eax(2) & 0xFF;
-
-		for (i = 0 ; i < n ; i++) {
-			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
-
-			/* If bit 31 is set, this is an unknown format */
-			for (j = 0 ; j < 3 ; j++)
-				if (regs[j] & (1 << 31))
-					regs[j] = 0;
-
-			/* Byte 0 is level count, not a descriptor */
-			for (j = 1 ; j < 16 ; j++) {
-				unsigned char des = dp[j];
-				unsigned char k = 0;
-
-				/* look up this descriptor in the table */
-				while (cache_table[k].descriptor != 0) {
-					if (cache_table[k].descriptor == des) {
-						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
-							break;
-						switch (cache_table[k].cache_type) {
-						case LVL_1_INST:
-							l1i += cache_table[k].size;
-							break;
-						case LVL_1_DATA:
-							l1d += cache_table[k].size;
-							break;
-						case LVL_2:
-							l2 += cache_table[k].size;
-							break;
-						case LVL_3:
-							l3 += cache_table[k].size;
-							break;
-						case LVL_TRACE:
-							trace += cache_table[k].size;
-							break;
-						}
-
-						break;
-					}
-
-					k++;
-				}
-			}
-		}
-	}
-
-	if (new_l1d)
-		l1d = new_l1d;
-
-	if (new_l1i)
-		l1i = new_l1i;
-
-	if (new_l2) {
-		l2 = new_l2;
-#ifdef CONFIG_SMP
-		per_cpu(cpu_llc_id, cpu) = l2_id;
-#endif
-	}
-
-	if (new_l3) {
-		l3 = new_l3;
-#ifdef CONFIG_SMP
-		per_cpu(cpu_llc_id, cpu) = l3_id;
-#endif
-	}
-
-#ifdef CONFIG_SMP
-	/*
-	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
-	 * turns means that the only possibility is SMT (as indicated in
-	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
-	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
-	 * c->phys_proc_id.
-	 */
-	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
-		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
-#endif
-
-	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
-
-	return l2;
-}
-
-static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
-				    struct _cpuid4_info_regs *base)
-{
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-	struct cacheinfo *this_leaf;
-	int i, sibling;
-
-	/*
-	 * For L3, always use the pre-calculated cpu_llc_shared_mask
-	 * to derive shared_cpu_map.
-	 */
-	if (index == 3) {
-		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
-			this_cpu_ci = get_cpu_cacheinfo(i);
-			if (!this_cpu_ci->info_list)
-				continue;
-			this_leaf = this_cpu_ci->info_list + index;
-			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
-				if (!cpu_online(sibling))
-					continue;
-				cpumask_set_cpu(sibling,
-						&this_leaf->shared_cpu_map);
-			}
-		}
-	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
-		unsigned int apicid, nshared, first, last;
-
-		nshared = base->eax.split.num_threads_sharing + 1;
-		apicid = cpu_data(cpu).apicid;
-		first = apicid - (apicid % nshared);
-		last = first + nshared - 1;
-
-		for_each_online_cpu(i) {
-			this_cpu_ci = get_cpu_cacheinfo(i);
-			if (!this_cpu_ci->info_list)
-				continue;
-
-			apicid = cpu_data(i).apicid;
-			if ((apicid < first) || (apicid > last))
-				continue;
-
-			this_leaf = this_cpu_ci->info_list + index;
-
-			for_each_online_cpu(sibling) {
-				apicid = cpu_data(sibling).apicid;
-				if ((apicid < first) || (apicid > last))
-					continue;
-				cpumask_set_cpu(sibling,
-						&this_leaf->shared_cpu_map);
-			}
-		}
-	} else
-		return 0;
-
-	return 1;
-}
-
-static void __cache_cpumap_setup(unsigned int cpu, int index,
-				 struct _cpuid4_info_regs *base)
-{
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-	struct cacheinfo *this_leaf, *sibling_leaf;
-	unsigned long num_threads_sharing;
-	int index_msb, i;
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-	if (c->x86_vendor == X86_VENDOR_AMD) {
-		if (__cache_amd_cpumap_setup(cpu, index, base))
-			return;
-	}
-
-	this_leaf = this_cpu_ci->info_list + index;
-	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
-
-	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
-	if (num_threads_sharing == 1)
-		return;
-
-	index_msb = get_count_order(num_threads_sharing);
-
-	for_each_online_cpu(i)
-		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
-			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
-
-			if (i == cpu || !sib_cpu_ci->info_list)
-				continue;/* skip if itself or no cacheinfo */
-			sibling_leaf = sib_cpu_ci->info_list + index;
-			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
-			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
-		}
-}
-
-static void ci_leaf_init(struct cacheinfo *this_leaf,
-			 struct _cpuid4_info_regs *base)
-{
-	this_leaf->id = base->id;
-	this_leaf->attributes = CACHE_ID;
-	this_leaf->level = base->eax.split.level;
-	this_leaf->type = cache_type_map[base->eax.split.type];
-	this_leaf->coherency_line_size =
-				base->ebx.split.coherency_line_size + 1;
-	this_leaf->ways_of_associativity =
-				base->ebx.split.ways_of_associativity + 1;
-	this_leaf->size = base->size;
-	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
-	this_leaf->physical_line_partition =
-				base->ebx.split.physical_line_partition + 1;
-	this_leaf->priv = base->nb;
-}
-
-static int __init_cache_level(unsigned int cpu)
-{
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-
-	if (!num_cache_leaves)
-		return -ENOENT;
-	if (!this_cpu_ci)
-		return -EINVAL;
-	this_cpu_ci->num_levels = 3;
-	this_cpu_ci->num_leaves = num_cache_leaves;
-	return 0;
-}
-
-/*
- * The max shared threads number comes from CPUID.4:EAX[25-14] with input
- * ECX as cache index. Then right shift apicid by the number's order to get
- * cache id for this cache node.
- */
-static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
-{
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	unsigned long num_threads_sharing;
-	int index_msb;
-
-	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
-	index_msb = get_count_order(num_threads_sharing);
-	id4_regs->id = c->apicid >> index_msb;
-}
-
-static int __populate_cache_leaves(unsigned int cpu)
-{
-	unsigned int idx, ret;
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
-	struct _cpuid4_info_regs id4_regs = {};
-
-	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
-		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
-		if (ret)
-			return ret;
-		get_cache_id(cpu, &id4_regs);
-		ci_leaf_init(this_leaf++, &id4_regs);
-		__cache_cpumap_setup(cpu, idx, &id4_regs);
-	}
-	this_cpu_ci->cpu_map_populated = true;
-
-	return 0;
-}
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 589b948..24bfa63 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -33,8 +33,8 @@
 #include <asm/intel_rdt_sched.h>
 #include "intel_rdt.h"
 
-#define MAX_MBA_BW	100u
 #define MBA_IS_LINEAR	0x4
+#define MBA_MAX_MBPS	U32_MAX
 
 /* Mutex to protect rdtgroup access. */
 DEFINE_MUTEX(rdtgroup_mutex);
@@ -178,7 +178,7 @@
 		.msr_update		= mba_wrmsr,
 		.cache_level		= 3,
 		.parse_ctrlval		= parse_bw,
-		.format_str		= "%d=%*d",
+		.format_str		= "%d=%*u",
 		.fflags			= RFTYPE_RES_MB,
 	},
 };
@@ -230,6 +230,14 @@
 	rdt_alloc_capable = true;
 }
 
+bool is_mba_sc(struct rdt_resource *r)
+{
+	if (!r)
+		return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc;
+
+	return r->membw.mba_sc;
+}
+
 /*
  * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
  * exposed to user interface and the h/w understandable delay values.
@@ -341,7 +349,7 @@
  * that can be written to QOS_MSRs.
  * There are currently no SKUs which support non linear delay values.
  */
-static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
+u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
 {
 	if (r->membw.delay_linear)
 		return MAX_MBA_BW - bw;
@@ -431,25 +439,40 @@
 	return NULL;
 }
 
+void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
+{
+	int i;
+
+	/*
+	 * Initialize the Control MSRs to having no control.
+	 * For Cache Allocation: Set all bits in cbm
+	 * For Memory Allocation: Set b/w requested to 100%
+	 * and the bandwidth in MBps to U32_MAX
+	 */
+	for (i = 0; i < r->num_closid; i++, dc++, dm++) {
+		*dc = r->default_ctrl;
+		*dm = MBA_MAX_MBPS;
+	}
+}
+
 static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
 {
 	struct msr_param m;
-	u32 *dc;
-	int i;
+	u32 *dc, *dm;
 
 	dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
 	if (!dc)
 		return -ENOMEM;
 
-	d->ctrl_val = dc;
+	dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL);
+	if (!dm) {
+		kfree(dc);
+		return -ENOMEM;
+	}
 
-	/*
-	 * Initialize the Control MSRs to having no control.
-	 * For Cache Allocation: Set all bits in cbm
-	 * For Memory Allocation: Set b/w requested to 100
-	 */
-	for (i = 0; i < r->num_closid; i++, dc++)
-		*dc = r->default_ctrl;
+	d->ctrl_val = dc;
+	d->mbps_val = dm;
+	setup_default_ctrlval(r, dc, dm);
 
 	m.low = 0;
 	m.high = r->num_closid;
@@ -588,6 +611,7 @@
 		}
 
 		kfree(d->ctrl_val);
+		kfree(d->mbps_val);
 		kfree(d->rmid_busy_llc);
 		kfree(d->mbm_total);
 		kfree(d->mbm_local);
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 3fd7a70..3975282 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -28,6 +28,7 @@
 
 #define MBM_CNTR_WIDTH			24
 #define MBM_OVERFLOW_INTERVAL		1000
+#define MAX_MBA_BW			100u
 
 #define RMID_VAL_ERROR			BIT_ULL(63)
 #define RMID_VAL_UNAVAIL		BIT_ULL(62)
@@ -180,10 +181,20 @@
  * struct mbm_state - status for each MBM counter in each domain
  * @chunks:	Total data moved (multiply by rdt_group.mon_scale to get bytes)
  * @prev_msr	Value of IA32_QM_CTR for this RMID last time we read it
+ * @chunks_bw	Total local data moved. Used for bandwidth calculation
+ * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
+ * @prev_bw	The most recent bandwidth in MBps
+ * @delta_bw	Difference between the current and previous bandwidth
+ * @delta_comp	Indicates whether to compute the delta_bw
  */
 struct mbm_state {
 	u64	chunks;
 	u64	prev_msr;
+	u64	chunks_bw;
+	u64	prev_bw_msr;
+	u32	prev_bw;
+	u32	delta_bw;
+	bool	delta_comp;
 };
 
 /**
@@ -202,6 +213,7 @@
  * @cqm_work_cpu:
  *		worker cpu for CQM h/w counters
  * @ctrl_val:	array of cache or mem ctrl values (indexed by CLOSID)
+ * @mbps_val:	When mba_sc is enabled, this holds the bandwidth in MBps
  * @new_ctrl:	new ctrl value to be loaded
  * @have_new_ctrl: did user provide new_ctrl for this domain
  */
@@ -217,6 +229,7 @@
 	int			mbm_work_cpu;
 	int			cqm_work_cpu;
 	u32			*ctrl_val;
+	u32			*mbps_val;
 	u32			new_ctrl;
 	bool			have_new_ctrl;
 };
@@ -259,6 +272,7 @@
  * @min_bw:		Minimum memory bandwidth percentage user can request
  * @bw_gran:		Granularity at which the memory bandwidth is allocated
  * @delay_linear:	True if memory B/W delay is in linear scale
+ * @mba_sc:		True if MBA software controller(mba_sc) is enabled
  * @mb_map:		Mapping of memory B/W percentage to memory B/W delay
  */
 struct rdt_membw {
@@ -266,6 +280,7 @@
 	u32		min_bw;
 	u32		bw_gran;
 	u32		delay_linear;
+	bool		mba_sc;
 	u32		*mb_map;
 };
 
@@ -445,6 +460,9 @@
 void mbm_setup_overflow_handler(struct rdt_domain *dom,
 				unsigned long delay_ms);
 void mbm_handle_overflow(struct work_struct *work);
+bool is_mba_sc(struct rdt_resource *r);
+void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
+u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
 void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
 void cqm_handle_limbo(struct work_struct *work);
 bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 23e1d5c..116d57b 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -53,7 +53,8 @@
 		return false;
 	}
 
-	if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+	if ((bw < r->membw.min_bw || bw > r->default_ctrl) &&
+	    !is_mba_sc(r)) {
 		rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
 				    r->membw.min_bw, r->default_ctrl);
 		return false;
@@ -179,6 +180,8 @@
 	struct msr_param msr_param;
 	cpumask_var_t cpu_mask;
 	struct rdt_domain *d;
+	bool mba_sc;
+	u32 *dc;
 	int cpu;
 
 	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
@@ -188,13 +191,20 @@
 	msr_param.high = msr_param.low + 1;
 	msr_param.res = r;
 
+	mba_sc = is_mba_sc(r);
 	list_for_each_entry(d, &r->domains, list) {
-		if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) {
+		dc = !mba_sc ? d->ctrl_val : d->mbps_val;
+		if (d->have_new_ctrl && d->new_ctrl != dc[closid]) {
 			cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
-			d->ctrl_val[closid] = d->new_ctrl;
+			dc[closid] = d->new_ctrl;
 		}
 	}
-	if (cpumask_empty(cpu_mask))
+
+	/*
+	 * Avoid writing the control msr with control values when
+	 * MBA software controller is enabled
+	 */
+	if (cpumask_empty(cpu_mask) || mba_sc)
 		goto done;
 	cpu = get_cpu();
 	/* Update CBM on this cpu if it's in cpu_mask. */
@@ -282,13 +292,17 @@
 {
 	struct rdt_domain *dom;
 	bool sep = false;
+	u32 ctrl_val;
 
 	seq_printf(s, "%*s:", max_name_width, r->name);
 	list_for_each_entry(dom, &r->domains, list) {
 		if (sep)
 			seq_puts(s, ";");
+
+		ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] :
+			    dom->mbps_val[closid]);
 		seq_printf(s, r->format_str, dom->id, max_data_width,
-			   dom->ctrl_val[closid]);
+			   ctrl_val);
 		sep = true;
 	}
 	seq_puts(s, "\n");
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 681450e..b0f3aed 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -225,10 +225,18 @@
 		list_add_tail(&entry->list, &rmid_free_lru);
 }
 
+static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr)
+{
+	u64 shift = 64 - MBM_CNTR_WIDTH, chunks;
+
+	chunks = (cur_msr << shift) - (prev_msr << shift);
+	return chunks >>= shift;
+}
+
 static int __mon_event_count(u32 rmid, struct rmid_read *rr)
 {
-	u64 chunks, shift, tval;
 	struct mbm_state *m;
+	u64 chunks, tval;
 
 	tval = __rmid_read(rmid, rr->evtid);
 	if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
@@ -254,14 +262,12 @@
 	}
 
 	if (rr->first) {
-		m->prev_msr = tval;
-		m->chunks = 0;
+		memset(m, 0, sizeof(struct mbm_state));
+		m->prev_bw_msr = m->prev_msr = tval;
 		return 0;
 	}
 
-	shift = 64 - MBM_CNTR_WIDTH;
-	chunks = (tval << shift) - (m->prev_msr << shift);
-	chunks >>= shift;
+	chunks = mbm_overflow_count(m->prev_msr, tval);
 	m->chunks += chunks;
 	m->prev_msr = tval;
 
@@ -270,6 +276,32 @@
 }
 
 /*
+ * Supporting function to calculate the memory bandwidth
+ * and delta bandwidth in MBps.
+ */
+static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
+	struct mbm_state *m = &rr->d->mbm_local[rmid];
+	u64 tval, cur_bw, chunks;
+
+	tval = __rmid_read(rmid, rr->evtid);
+	if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+		return;
+
+	chunks = mbm_overflow_count(m->prev_bw_msr, tval);
+	m->chunks_bw += chunks;
+	m->chunks = m->chunks_bw;
+	cur_bw = (chunks * r->mon_scale) >> 20;
+
+	if (m->delta_comp)
+		m->delta_bw = abs(cur_bw - m->prev_bw);
+	m->delta_comp = false;
+	m->prev_bw = cur_bw;
+	m->prev_bw_msr = tval;
+}
+
+/*
  * This is called via IPI to read the CQM/MBM counters
  * on a domain.
  */
@@ -297,6 +329,118 @@
 	}
 }
 
+/*
+ * Feedback loop for MBA software controller (mba_sc)
+ *
+ * mba_sc is a feedback loop where we periodically read MBM counters and
+ * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
+ * that:
+ *
+ *   current bandwdith(cur_bw) < user specified bandwidth(user_bw)
+ *
+ * This uses the MBM counters to measure the bandwidth and MBA throttle
+ * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
+ * fact that resctrl rdtgroups have both monitoring and control.
+ *
+ * The frequency of the checks is 1s and we just tag along the MBM overflow
+ * timer. Having 1s interval makes the calculation of bandwidth simpler.
+ *
+ * Although MBA's goal is to restrict the bandwidth to a maximum, there may
+ * be a need to increase the bandwidth to avoid uncecessarily restricting
+ * the L2 <-> L3 traffic.
+ *
+ * Since MBA controls the L2 external bandwidth where as MBM measures the
+ * L3 external bandwidth the following sequence could lead to such a
+ * situation.
+ *
+ * Consider an rdtgroup which had high L3 <-> memory traffic in initial
+ * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
+ * after some time rdtgroup has mostly L2 <-> L3 traffic.
+ *
+ * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
+ * throttle MSRs already have low percentage values.  To avoid
+ * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
+ */
+static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
+{
+	u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
+	struct mbm_state *pmbm_data, *cmbm_data;
+	u32 cur_bw, delta_bw, user_bw;
+	struct rdt_resource *r_mba;
+	struct rdt_domain *dom_mba;
+	struct list_head *head;
+	struct rdtgroup *entry;
+
+	r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
+	closid = rgrp->closid;
+	rmid = rgrp->mon.rmid;
+	pmbm_data = &dom_mbm->mbm_local[rmid];
+
+	dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
+	if (!dom_mba) {
+		pr_warn_once("Failure to get domain for MBA update\n");
+		return;
+	}
+
+	cur_bw = pmbm_data->prev_bw;
+	user_bw = dom_mba->mbps_val[closid];
+	delta_bw = pmbm_data->delta_bw;
+	cur_msr_val = dom_mba->ctrl_val[closid];
+
+	/*
+	 * For Ctrl groups read data from child monitor groups.
+	 */
+	head = &rgrp->mon.crdtgrp_list;
+	list_for_each_entry(entry, head, mon.crdtgrp_list) {
+		cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
+		cur_bw += cmbm_data->prev_bw;
+		delta_bw += cmbm_data->delta_bw;
+	}
+
+	/*
+	 * Scale up/down the bandwidth linearly for the ctrl group.  The
+	 * bandwidth step is the bandwidth granularity specified by the
+	 * hardware.
+	 *
+	 * The delta_bw is used when increasing the bandwidth so that we
+	 * dont alternately increase and decrease the control values
+	 * continuously.
+	 *
+	 * For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if
+	 * bandwidth step is 20MBps(> user_bw - cur_bw), we would keep
+	 * switching between 90 and 110 continuously if we only check
+	 * cur_bw < user_bw.
+	 */
+	if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
+		new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
+	} else if (cur_msr_val < MAX_MBA_BW &&
+		   (user_bw > (cur_bw + delta_bw))) {
+		new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
+	} else {
+		return;
+	}
+
+	cur_msr = r_mba->msr_base + closid;
+	wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba));
+	dom_mba->ctrl_val[closid] = new_msr_val;
+
+	/*
+	 * Delta values are updated dynamically package wise for each
+	 * rdtgrp everytime the throttle MSR changes value.
+	 *
+	 * This is because (1)the increase in bandwidth is not perfectly
+	 * linear and only "approximately" linear even when the hardware
+	 * says it is linear.(2)Also since MBA is a core specific
+	 * mechanism, the delta values vary based on number of cores used
+	 * by the rdtgrp.
+	 */
+	pmbm_data->delta_comp = true;
+	list_for_each_entry(entry, head, mon.crdtgrp_list) {
+		cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
+		cmbm_data->delta_comp = true;
+	}
+}
+
 static void mbm_update(struct rdt_domain *d, int rmid)
 {
 	struct rmid_read rr;
@@ -314,7 +458,16 @@
 	}
 	if (is_mbm_local_enabled()) {
 		rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
-		__mon_event_count(rmid, &rr);
+
+		/*
+		 * Call the MBA software controller only for the
+		 * control groups and when user has enabled
+		 * the software controller explicitly.
+		 */
+		if (!is_mba_sc(NULL))
+			__mon_event_count(rmid, &rr);
+		else
+			mbm_bw_count(rmid, &rr);
 	}
 }
 
@@ -385,6 +538,9 @@
 		head = &prgrp->mon.crdtgrp_list;
 		list_for_each_entry(crgrp, head, mon.crdtgrp_list)
 			mbm_update(d, crgrp->mon.rmid);
+
+		if (is_mba_sc(NULL))
+			update_mba_bw(prgrp, d);
 	}
 
 	schedule_delayed_work_on(cpu, &d->mbm_over, delay);
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index fca759d..749856a 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1005,6 +1005,11 @@
 	wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
 }
 
+static inline bool is_mba_linear(void)
+{
+	return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
+}
+
 static int set_cache_qos_cfg(int level, bool enable)
 {
 	void (*update)(void *arg);
@@ -1041,6 +1046,28 @@
 	return 0;
 }
 
+/*
+ * Enable or disable the MBA software controller
+ * which helps user specify bandwidth in MBps.
+ * MBA software controller is supported only if
+ * MBM is supported and MBA is in linear scale.
+ */
+static int set_mba_sc(bool mba_sc)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
+	struct rdt_domain *d;
+
+	if (!is_mbm_enabled() || !is_mba_linear() ||
+	    mba_sc == is_mba_sc(r))
+		return -EINVAL;
+
+	r->membw.mba_sc = mba_sc;
+	list_for_each_entry(d, &r->domains, list)
+		setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
+
+	return 0;
+}
+
 static int cdp_enable(int level, int data_type, int code_type)
 {
 	struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
@@ -1123,6 +1150,10 @@
 			ret = cdpl2_enable();
 			if (ret)
 				goto out;
+		} else if (!strcmp(token, "mba_MBps")) {
+			ret = set_mba_sc(true);
+			if (ret)
+				goto out;
 		} else {
 			ret = -EINVAL;
 			goto out;
@@ -1445,6 +1476,8 @@
 	cpus_read_lock();
 	mutex_lock(&rdtgroup_mutex);
 
+	set_mba_sc(false);
+
 	/*Put everything back to default values. */
 	for_each_alloc_enabled_rdt_resource(r)
 		reset_all_ctrls(r);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 475cb4f..c805a06 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -48,7 +48,7 @@
 
 static u8 n_banks;
 
-#define MAX_FLAG_OPT_SIZE	3
+#define MAX_FLAG_OPT_SIZE	4
 #define NBCFG			0x44
 
 enum injection_type {
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 42cf288..cd76380 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1727,6 +1727,21 @@
 	}
 }
 
+static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
+{
+	struct mca_config *cfg = &mca_cfg;
+
+	 /*
+	  * All newer Centaur CPUs support MCE broadcasting. Enable
+	  * synchronization with a one second timeout.
+	  */
+	if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) ||
+	     c->x86 > 6) {
+		if (cfg->monarch_timeout < 0)
+			cfg->monarch_timeout = USEC_PER_SEC;
+	}
+}
+
 static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 {
 	switch (c->x86_vendor) {
@@ -1739,6 +1754,9 @@
 		mce_amd_feature_init(c);
 		break;
 		}
+	case X86_VENDOR_CENTAUR:
+		mce_centaur_feature_init(c);
+		break;
 
 	default:
 		break;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index c8e0388..f591b01 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -436,8 +436,7 @@
 	wrmsr(MSR_CU_DEF_ERR, low, high);
 }
 
-static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
-				  unsigned int block)
+static u32 smca_get_block_address(unsigned int bank, unsigned int block)
 {
 	u32 low, high;
 	u32 addr = 0;
@@ -456,13 +455,13 @@
 	 * For SMCA enabled processors, BLKPTR field of the first MISC register
 	 * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
 	 */
-	if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+	if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
 		goto out;
 
 	if (!(low & MCI_CONFIG_MCAX))
 		goto out;
 
-	if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+	if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
 	    (low & MASK_BLKPTR_LO))
 		addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
 
@@ -471,7 +470,7 @@
 	return addr;
 }
 
-static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
+static u32 get_block_address(u32 current_addr, u32 low, u32 high,
 			     unsigned int bank, unsigned int block)
 {
 	u32 addr = 0, offset = 0;
@@ -480,7 +479,7 @@
 		return addr;
 
 	if (mce_flags.smca)
-		return smca_get_block_address(cpu, bank, block);
+		return smca_get_block_address(bank, block);
 
 	/* Fall back to method we used for older processors: */
 	switch (block) {
@@ -558,7 +557,7 @@
 			smca_configure(bank, cpu);
 
 		for (block = 0; block < NR_BLOCKS; ++block) {
-			address = get_block_address(cpu, address, low, high, bank, block);
+			address = get_block_address(address, low, high, bank, block);
 			if (!address)
 				break;
 
@@ -1175,7 +1174,7 @@
 	if (err)
 		goto out_free;
 recurse:
-	address = get_block_address(cpu, address, low, high, bank, ++block);
+	address = get_block_address(address, low, high, bank, ++block);
 	if (!address)
 		return 0;
 
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index ad9e5ed..2ad9107 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,3 @@
-obj-y		:= main.o if.o generic.o cleanup.o
+obj-y		:= mtrr.o if.o generic.o cleanup.o
 obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
 
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
deleted file mode 100644
index 7468de4..0000000
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ /dev/null
@@ -1,887 +0,0 @@
-/*  Generic MTRR (Memory Type Range Register) driver.
-
-    Copyright (C) 1997-2000  Richard Gooch
-    Copyright (c) 2002	     Patrick Mochel
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Library General Public
-    License as published by the Free Software Foundation; either
-    version 2 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Library General Public License for more details.
-
-    You should have received a copy of the GNU Library General Public
-    License along with this library; if not, write to the Free
-    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-    Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
-    The postal address is:
-      Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
-
-    Source: "Pentium Pro Family Developer's Manual, Volume 3:
-    Operating System Writer's Guide" (Intel document number 242692),
-    section 11.11.7
-
-    This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
-    on 6-7 March 2002.
-    Source: Intel Architecture Software Developers Manual, Volume 3:
-    System Programming Guide; Section 9.11. (1997 edition - PPro).
-*/
-
-#define DEBUG
-
-#include <linux/types.h> /* FIXME: kvm_para.h needs this */
-
-#include <linux/stop_machine.h>
-#include <linux/kvm_para.h>
-#include <linux/uaccess.h>
-#include <linux/export.h>
-#include <linux/mutex.h>
-#include <linux/init.h>
-#include <linux/sort.h>
-#include <linux/cpu.h>
-#include <linux/pci.h>
-#include <linux/smp.h>
-#include <linux/syscore_ops.h>
-
-#include <asm/cpufeature.h>
-#include <asm/e820/api.h>
-#include <asm/mtrr.h>
-#include <asm/msr.h>
-#include <asm/pat.h>
-
-#include "mtrr.h"
-
-/* arch_phys_wc_add returns an MTRR register index plus this offset. */
-#define MTRR_TO_PHYS_WC_OFFSET 1000
-
-u32 num_var_ranges;
-static bool __mtrr_enabled;
-
-static bool mtrr_enabled(void)
-{
-	return __mtrr_enabled;
-}
-
-unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
-static DEFINE_MUTEX(mtrr_mutex);
-
-u64 size_or_mask, size_and_mask;
-static bool mtrr_aps_delayed_init;
-
-static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init;
-
-const struct mtrr_ops *mtrr_if;
-
-static void set_mtrr(unsigned int reg, unsigned long base,
-		     unsigned long size, mtrr_type type);
-
-void __init set_mtrr_ops(const struct mtrr_ops *ops)
-{
-	if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
-		mtrr_ops[ops->vendor] = ops;
-}
-
-/*  Returns non-zero if we have the write-combining memory type  */
-static int have_wrcomb(void)
-{
-	struct pci_dev *dev;
-
-	dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
-	if (dev != NULL) {
-		/*
-		 * ServerWorks LE chipsets < rev 6 have problems with
-		 * write-combining. Don't allow it and leave room for other
-		 * chipsets to be tagged
-		 */
-		if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
-		    dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
-		    dev->revision <= 5) {
-			pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
-			pci_dev_put(dev);
-			return 0;
-		}
-		/*
-		 * Intel 450NX errata # 23. Non ascending cacheline evictions to
-		 * write combining memory may resulting in data corruption
-		 */
-		if (dev->vendor == PCI_VENDOR_ID_INTEL &&
-		    dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
-			pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
-			pci_dev_put(dev);
-			return 0;
-		}
-		pci_dev_put(dev);
-	}
-	return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
-}
-
-/*  This function returns the number of variable MTRRs  */
-static void __init set_num_var_ranges(void)
-{
-	unsigned long config = 0, dummy;
-
-	if (use_intel())
-		rdmsr(MSR_MTRRcap, config, dummy);
-	else if (is_cpu(AMD))
-		config = 2;
-	else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
-		config = 8;
-
-	num_var_ranges = config & 0xff;
-}
-
-static void __init init_table(void)
-{
-	int i, max;
-
-	max = num_var_ranges;
-	for (i = 0; i < max; i++)
-		mtrr_usage_table[i] = 1;
-}
-
-struct set_mtrr_data {
-	unsigned long	smp_base;
-	unsigned long	smp_size;
-	unsigned int	smp_reg;
-	mtrr_type	smp_type;
-};
-
-/**
- * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
- * by all the CPUs.
- * @info: pointer to mtrr configuration data
- *
- * Returns nothing.
- */
-static int mtrr_rendezvous_handler(void *info)
-{
-	struct set_mtrr_data *data = info;
-
-	/*
-	 * We use this same function to initialize the mtrrs during boot,
-	 * resume, runtime cpu online and on an explicit request to set a
-	 * specific MTRR.
-	 *
-	 * During boot or suspend, the state of the boot cpu's mtrrs has been
-	 * saved, and we want to replicate that across all the cpus that come
-	 * online (either at the end of boot or resume or during a runtime cpu
-	 * online). If we're doing that, @reg is set to something special and on
-	 * all the cpu's we do mtrr_if->set_all() (On the logical cpu that
-	 * started the boot/resume sequence, this might be a duplicate
-	 * set_all()).
-	 */
-	if (data->smp_reg != ~0U) {
-		mtrr_if->set(data->smp_reg, data->smp_base,
-			     data->smp_size, data->smp_type);
-	} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
-		mtrr_if->set_all();
-	}
-	return 0;
-}
-
-static inline int types_compatible(mtrr_type type1, mtrr_type type2)
-{
-	return type1 == MTRR_TYPE_UNCACHABLE ||
-	       type2 == MTRR_TYPE_UNCACHABLE ||
-	       (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
-	       (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
-}
-
-/**
- * set_mtrr - update mtrrs on all processors
- * @reg:	mtrr in question
- * @base:	mtrr base
- * @size:	mtrr size
- * @type:	mtrr type
- *
- * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
- *
- * 1. Queue work to do the following on all processors:
- * 2. Disable Interrupts
- * 3. Wait for all procs to do so
- * 4. Enter no-fill cache mode
- * 5. Flush caches
- * 6. Clear PGE bit
- * 7. Flush all TLBs
- * 8. Disable all range registers
- * 9. Update the MTRRs
- * 10. Enable all range registers
- * 11. Flush all TLBs and caches again
- * 12. Enter normal cache mode and reenable caching
- * 13. Set PGE
- * 14. Wait for buddies to catch up
- * 15. Enable interrupts.
- *
- * What does that mean for us? Well, stop_machine() will ensure that
- * the rendezvous handler is started on each CPU. And in lockstep they
- * do the state transition of disabling interrupts, updating MTRR's
- * (the CPU vendors may each do it differently, so we call mtrr_if->set()
- * callback and let them take care of it.) and enabling interrupts.
- *
- * Note that the mechanism is the same for UP systems, too; all the SMP stuff
- * becomes nops.
- */
-static void
-set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
-{
-	struct set_mtrr_data data = { .smp_reg = reg,
-				      .smp_base = base,
-				      .smp_size = size,
-				      .smp_type = type
-				    };
-
-	stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask);
-}
-
-static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base,
-				unsigned long size, mtrr_type type)
-{
-	struct set_mtrr_data data = { .smp_reg = reg,
-				      .smp_base = base,
-				      .smp_size = size,
-				      .smp_type = type
-				    };
-
-	stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask);
-}
-
-static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base,
-				      unsigned long size, mtrr_type type)
-{
-	struct set_mtrr_data data = { .smp_reg = reg,
-				      .smp_base = base,
-				      .smp_size = size,
-				      .smp_type = type
-				    };
-
-	stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data,
-				       cpu_callout_mask);
-}
-
-/**
- * mtrr_add_page - Add a memory type region
- * @base: Physical base address of region in pages (in units of 4 kB!)
- * @size: Physical size of region in pages (4 kB)
- * @type: Type of MTRR desired
- * @increment: If this is true do usage counting on the region
- *
- * Memory type region registers control the caching on newer Intel and
- * non Intel processors. This function allows drivers to request an
- * MTRR is added. The details and hardware specifics of each processor's
- * implementation are hidden from the caller, but nevertheless the
- * caller should expect to need to provide a power of two size on an
- * equivalent power of two boundary.
- *
- * If the region cannot be added either because all regions are in use
- * or the CPU cannot support it a negative value is returned. On success
- * the register number for this entry is returned, but should be treated
- * as a cookie only.
- *
- * On a multiprocessor machine the changes are made to all processors.
- * This is required on x86 by the Intel processors.
- *
- * The available types are
- *
- * %MTRR_TYPE_UNCACHABLE - No caching
- *
- * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
- *
- * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
- *
- * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
- *
- * BUGS: Needs a quiet flag for the cases where drivers do not mind
- * failures and do not wish system log messages to be sent.
- */
-int mtrr_add_page(unsigned long base, unsigned long size,
-		  unsigned int type, bool increment)
-{
-	unsigned long lbase, lsize;
-	int i, replace, error;
-	mtrr_type ltype;
-
-	if (!mtrr_enabled())
-		return -ENXIO;
-
-	error = mtrr_if->validate_add_page(base, size, type);
-	if (error)
-		return error;
-
-	if (type >= MTRR_NUM_TYPES) {
-		pr_warn("mtrr: type: %u invalid\n", type);
-		return -EINVAL;
-	}
-
-	/* If the type is WC, check that this processor supports it */
-	if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
-		pr_warn("mtrr: your processor doesn't support write-combining\n");
-		return -ENOSYS;
-	}
-
-	if (!size) {
-		pr_warn("mtrr: zero sized request\n");
-		return -EINVAL;
-	}
-
-	if ((base | (base + size - 1)) >>
-	    (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
-		pr_warn("mtrr: base or size exceeds the MTRR width\n");
-		return -EINVAL;
-	}
-
-	error = -EINVAL;
-	replace = -1;
-
-	/* No CPU hotplug when we change MTRR entries */
-	get_online_cpus();
-
-	/* Search for existing MTRR  */
-	mutex_lock(&mtrr_mutex);
-	for (i = 0; i < num_var_ranges; ++i) {
-		mtrr_if->get(i, &lbase, &lsize, &ltype);
-		if (!lsize || base > lbase + lsize - 1 ||
-		    base + size - 1 < lbase)
-			continue;
-		/*
-		 * At this point we know there is some kind of
-		 * overlap/enclosure
-		 */
-		if (base < lbase || base + size - 1 > lbase + lsize - 1) {
-			if (base <= lbase &&
-			    base + size - 1 >= lbase + lsize - 1) {
-				/*  New region encloses an existing region  */
-				if (type == ltype) {
-					replace = replace == -1 ? i : -2;
-					continue;
-				} else if (types_compatible(type, ltype))
-					continue;
-			}
-			pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing"
-				" 0x%lx000,0x%lx000\n", base, size, lbase,
-				lsize);
-			goto out;
-		}
-		/* New region is enclosed by an existing region */
-		if (ltype != type) {
-			if (types_compatible(type, ltype))
-				continue;
-			pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
-				base, size, mtrr_attrib_to_str(ltype),
-				mtrr_attrib_to_str(type));
-			goto out;
-		}
-		if (increment)
-			++mtrr_usage_table[i];
-		error = i;
-		goto out;
-	}
-	/* Search for an empty MTRR */
-	i = mtrr_if->get_free_region(base, size, replace);
-	if (i >= 0) {
-		set_mtrr_cpuslocked(i, base, size, type);
-		if (likely(replace < 0)) {
-			mtrr_usage_table[i] = 1;
-		} else {
-			mtrr_usage_table[i] = mtrr_usage_table[replace];
-			if (increment)
-				mtrr_usage_table[i]++;
-			if (unlikely(replace != i)) {
-				set_mtrr_cpuslocked(replace, 0, 0, 0);
-				mtrr_usage_table[replace] = 0;
-			}
-		}
-	} else {
-		pr_info("mtrr: no more MTRRs available\n");
-	}
-	error = i;
- out:
-	mutex_unlock(&mtrr_mutex);
-	put_online_cpus();
-	return error;
-}
-
-static int mtrr_check(unsigned long base, unsigned long size)
-{
-	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
-		pr_warn("mtrr: size and base must be multiples of 4 kiB\n");
-		pr_debug("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
-		dump_stack();
-		return -1;
-	}
-	return 0;
-}
-
-/**
- * mtrr_add - Add a memory type region
- * @base: Physical base address of region
- * @size: Physical size of region
- * @type: Type of MTRR desired
- * @increment: If this is true do usage counting on the region
- *
- * Memory type region registers control the caching on newer Intel and
- * non Intel processors. This function allows drivers to request an
- * MTRR is added. The details and hardware specifics of each processor's
- * implementation are hidden from the caller, but nevertheless the
- * caller should expect to need to provide a power of two size on an
- * equivalent power of two boundary.
- *
- * If the region cannot be added either because all regions are in use
- * or the CPU cannot support it a negative value is returned. On success
- * the register number for this entry is returned, but should be treated
- * as a cookie only.
- *
- * On a multiprocessor machine the changes are made to all processors.
- * This is required on x86 by the Intel processors.
- *
- * The available types are
- *
- * %MTRR_TYPE_UNCACHABLE - No caching
- *
- * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
- *
- * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
- *
- * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
- *
- * BUGS: Needs a quiet flag for the cases where drivers do not mind
- * failures and do not wish system log messages to be sent.
- */
-int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
-	     bool increment)
-{
-	if (!mtrr_enabled())
-		return -ENODEV;
-	if (mtrr_check(base, size))
-		return -EINVAL;
-	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
-			     increment);
-}
-
-/**
- * mtrr_del_page - delete a memory type region
- * @reg: Register returned by mtrr_add
- * @base: Physical base address
- * @size: Size of region
- *
- * If register is supplied then base and size are ignored. This is
- * how drivers should call it.
- *
- * Releases an MTRR region. If the usage count drops to zero the
- * register is freed and the region returns to default state.
- * On success the register is returned, on failure a negative error
- * code.
- */
-int mtrr_del_page(int reg, unsigned long base, unsigned long size)
-{
-	int i, max;
-	mtrr_type ltype;
-	unsigned long lbase, lsize;
-	int error = -EINVAL;
-
-	if (!mtrr_enabled())
-		return -ENODEV;
-
-	max = num_var_ranges;
-	/* No CPU hotplug when we change MTRR entries */
-	get_online_cpus();
-	mutex_lock(&mtrr_mutex);
-	if (reg < 0) {
-		/*  Search for existing MTRR  */
-		for (i = 0; i < max; ++i) {
-			mtrr_if->get(i, &lbase, &lsize, &ltype);
-			if (lbase == base && lsize == size) {
-				reg = i;
-				break;
-			}
-		}
-		if (reg < 0) {
-			pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
-				 base, size);
-			goto out;
-		}
-	}
-	if (reg >= max) {
-		pr_warn("mtrr: register: %d too big\n", reg);
-		goto out;
-	}
-	mtrr_if->get(reg, &lbase, &lsize, &ltype);
-	if (lsize < 1) {
-		pr_warn("mtrr: MTRR %d not used\n", reg);
-		goto out;
-	}
-	if (mtrr_usage_table[reg] < 1) {
-		pr_warn("mtrr: reg: %d has count=0\n", reg);
-		goto out;
-	}
-	if (--mtrr_usage_table[reg] < 1)
-		set_mtrr_cpuslocked(reg, 0, 0, 0);
-	error = reg;
- out:
-	mutex_unlock(&mtrr_mutex);
-	put_online_cpus();
-	return error;
-}
-
-/**
- * mtrr_del - delete a memory type region
- * @reg: Register returned by mtrr_add
- * @base: Physical base address
- * @size: Size of region
- *
- * If register is supplied then base and size are ignored. This is
- * how drivers should call it.
- *
- * Releases an MTRR region. If the usage count drops to zero the
- * register is freed and the region returns to default state.
- * On success the register is returned, on failure a negative error
- * code.
- */
-int mtrr_del(int reg, unsigned long base, unsigned long size)
-{
-	if (!mtrr_enabled())
-		return -ENODEV;
-	if (mtrr_check(base, size))
-		return -EINVAL;
-	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
-}
-
-/**
- * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
- * @base: Physical base address
- * @size: Size of region
- *
- * If PAT is available, this does nothing.  If PAT is unavailable, it
- * attempts to add a WC MTRR covering size bytes starting at base and
- * logs an error if this fails.
- *
- * The called should provide a power of two size on an equivalent
- * power of two boundary.
- *
- * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
- * but drivers should not try to interpret that return value.
- */
-int arch_phys_wc_add(unsigned long base, unsigned long size)
-{
-	int ret;
-
-	if (pat_enabled() || !mtrr_enabled())
-		return 0;  /* Success!  (We don't need to do anything.) */
-
-	ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
-	if (ret < 0) {
-		pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
-			(void *)base, (void *)(base + size - 1));
-		return ret;
-	}
-	return ret + MTRR_TO_PHYS_WC_OFFSET;
-}
-EXPORT_SYMBOL(arch_phys_wc_add);
-
-/*
- * arch_phys_wc_del - undoes arch_phys_wc_add
- * @handle: Return value from arch_phys_wc_add
- *
- * This cleans up after mtrr_add_wc_if_needed.
- *
- * The API guarantees that mtrr_del_wc_if_needed(error code) and
- * mtrr_del_wc_if_needed(0) do nothing.
- */
-void arch_phys_wc_del(int handle)
-{
-	if (handle >= 1) {
-		WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
-		mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
-	}
-}
-EXPORT_SYMBOL(arch_phys_wc_del);
-
-/*
- * arch_phys_wc_index - translates arch_phys_wc_add's return value
- * @handle: Return value from arch_phys_wc_add
- *
- * This will turn the return value from arch_phys_wc_add into an mtrr
- * index suitable for debugging.
- *
- * Note: There is no legitimate use for this function, except possibly
- * in printk line.  Alas there is an illegitimate use in some ancient
- * drm ioctls.
- */
-int arch_phys_wc_index(int handle)
-{
-	if (handle < MTRR_TO_PHYS_WC_OFFSET)
-		return -1;
-	else
-		return handle - MTRR_TO_PHYS_WC_OFFSET;
-}
-EXPORT_SYMBOL_GPL(arch_phys_wc_index);
-
-/*
- * HACK ALERT!
- * These should be called implicitly, but we can't yet until all the initcall
- * stuff is done...
- */
-static void __init init_ifs(void)
-{
-#ifndef CONFIG_X86_64
-	amd_init_mtrr();
-	cyrix_init_mtrr();
-	centaur_init_mtrr();
-#endif
-}
-
-/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
- * MTRR driver doesn't require this
- */
-struct mtrr_value {
-	mtrr_type	ltype;
-	unsigned long	lbase;
-	unsigned long	lsize;
-};
-
-static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
-
-static int mtrr_save(void)
-{
-	int i;
-
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &mtrr_value[i].lbase,
-				&mtrr_value[i].lsize,
-				&mtrr_value[i].ltype);
-	}
-	return 0;
-}
-
-static void mtrr_restore(void)
-{
-	int i;
-
-	for (i = 0; i < num_var_ranges; i++) {
-		if (mtrr_value[i].lsize) {
-			set_mtrr(i, mtrr_value[i].lbase,
-				    mtrr_value[i].lsize,
-				    mtrr_value[i].ltype);
-		}
-	}
-}
-
-
-
-static struct syscore_ops mtrr_syscore_ops = {
-	.suspend	= mtrr_save,
-	.resume		= mtrr_restore,
-};
-
-int __initdata changed_by_mtrr_cleanup;
-
-#define SIZE_OR_MASK_BITS(n)  (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
-/**
- * mtrr_bp_init - initialize mtrrs on the boot CPU
- *
- * This needs to be called early; before any of the other CPUs are
- * initialized (i.e. before smp_init()).
- *
- */
-void __init mtrr_bp_init(void)
-{
-	u32 phys_addr;
-
-	init_ifs();
-
-	phys_addr = 32;
-
-	if (boot_cpu_has(X86_FEATURE_MTRR)) {
-		mtrr_if = &generic_mtrr_ops;
-		size_or_mask = SIZE_OR_MASK_BITS(36);
-		size_and_mask = 0x00f00000;
-		phys_addr = 36;
-
-		/*
-		 * This is an AMD specific MSR, but we assume(hope?) that
-		 * Intel will implement it too when they extend the address
-		 * bus of the Xeon.
-		 */
-		if (cpuid_eax(0x80000000) >= 0x80000008) {
-			phys_addr = cpuid_eax(0x80000008) & 0xff;
-			/* CPUID workaround for Intel 0F33/0F34 CPU */
-			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
-			    boot_cpu_data.x86 == 0xF &&
-			    boot_cpu_data.x86_model == 0x3 &&
-			    (boot_cpu_data.x86_stepping == 0x3 ||
-			     boot_cpu_data.x86_stepping == 0x4))
-				phys_addr = 36;
-
-			size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
-			size_and_mask = ~size_or_mask & 0xfffff00000ULL;
-		} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
-			   boot_cpu_data.x86 == 6) {
-			/*
-			 * VIA C* family have Intel style MTRRs,
-			 * but don't support PAE
-			 */
-			size_or_mask = SIZE_OR_MASK_BITS(32);
-			size_and_mask = 0;
-			phys_addr = 32;
-		}
-	} else {
-		switch (boot_cpu_data.x86_vendor) {
-		case X86_VENDOR_AMD:
-			if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) {
-				/* Pre-Athlon (K6) AMD CPU MTRRs */
-				mtrr_if = mtrr_ops[X86_VENDOR_AMD];
-				size_or_mask = SIZE_OR_MASK_BITS(32);
-				size_and_mask = 0;
-			}
-			break;
-		case X86_VENDOR_CENTAUR:
-			if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) {
-				mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
-				size_or_mask = SIZE_OR_MASK_BITS(32);
-				size_and_mask = 0;
-			}
-			break;
-		case X86_VENDOR_CYRIX:
-			if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) {
-				mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
-				size_or_mask = SIZE_OR_MASK_BITS(32);
-				size_and_mask = 0;
-			}
-			break;
-		default:
-			break;
-		}
-	}
-
-	if (mtrr_if) {
-		__mtrr_enabled = true;
-		set_num_var_ranges();
-		init_table();
-		if (use_intel()) {
-			/* BIOS may override */
-			__mtrr_enabled = get_mtrr_state();
-
-			if (mtrr_enabled())
-				mtrr_bp_pat_init();
-
-			if (mtrr_cleanup(phys_addr)) {
-				changed_by_mtrr_cleanup = 1;
-				mtrr_if->set_all();
-			}
-		}
-	}
-
-	if (!mtrr_enabled()) {
-		pr_info("MTRR: Disabled\n");
-
-		/*
-		 * PAT initialization relies on MTRR's rendezvous handler.
-		 * Skip PAT init until the handler can initialize both
-		 * features independently.
-		 */
-		pat_disable("MTRRs disabled, skipping PAT initialization too.");
-	}
-}
-
-void mtrr_ap_init(void)
-{
-	if (!mtrr_enabled())
-		return;
-
-	if (!use_intel() || mtrr_aps_delayed_init)
-		return;
-	/*
-	 * Ideally we should hold mtrr_mutex here to avoid mtrr entries
-	 * changed, but this routine will be called in cpu boot time,
-	 * holding the lock breaks it.
-	 *
-	 * This routine is called in two cases:
-	 *
-	 *   1. very earily time of software resume, when there absolutely
-	 *      isn't mtrr entry changes;
-	 *
-	 *   2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
-	 *      lock to prevent mtrr entry changes
-	 */
-	set_mtrr_from_inactive_cpu(~0U, 0, 0, 0);
-}
-
-/**
- * Save current fixed-range MTRR state of the first cpu in cpu_online_mask.
- */
-void mtrr_save_state(void)
-{
-	int first_cpu;
-
-	if (!mtrr_enabled())
-		return;
-
-	first_cpu = cpumask_first(cpu_online_mask);
-	smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
-}
-
-void set_mtrr_aps_delayed_init(void)
-{
-	if (!mtrr_enabled())
-		return;
-	if (!use_intel())
-		return;
-
-	mtrr_aps_delayed_init = true;
-}
-
-/*
- * Delayed MTRR initialization for all AP's
- */
-void mtrr_aps_init(void)
-{
-	if (!use_intel() || !mtrr_enabled())
-		return;
-
-	/*
-	 * Check if someone has requested the delay of AP MTRR initialization,
-	 * by doing set_mtrr_aps_delayed_init(), prior to this point. If not,
-	 * then we are done.
-	 */
-	if (!mtrr_aps_delayed_init)
-		return;
-
-	set_mtrr(~0U, 0, 0, 0);
-	mtrr_aps_delayed_init = false;
-}
-
-void mtrr_bp_restore(void)
-{
-	if (!use_intel() || !mtrr_enabled())
-		return;
-
-	mtrr_if->set_all();
-}
-
-static int __init mtrr_init_finialize(void)
-{
-	if (!mtrr_enabled())
-		return 0;
-
-	if (use_intel()) {
-		if (!changed_by_mtrr_cleanup)
-			mtrr_state_warn();
-		return 0;
-	}
-
-	/*
-	 * The CPU has no MTRR and seems to not support SMP. They have
-	 * specific drivers, we use a tricky method to support
-	 * suspend/resume for them.
-	 *
-	 * TBD: is there any system with such CPU which supports
-	 * suspend/resume? If no, we should remove the code.
-	 */
-	register_syscore_ops(&mtrr_syscore_ops);
-
-	return 0;
-}
-subsys_initcall(mtrr_init_finialize);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
new file mode 100644
index 0000000..9a19c80
--- /dev/null
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -0,0 +1,890 @@
+/*  Generic MTRR (Memory Type Range Register) driver.
+
+    Copyright (C) 1997-2000  Richard Gooch
+    Copyright (c) 2002	     Patrick Mochel
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+    Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
+    The postal address is:
+      Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+
+    Source: "Pentium Pro Family Developer's Manual, Volume 3:
+    Operating System Writer's Guide" (Intel document number 242692),
+    section 11.11.7
+
+    This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
+    on 6-7 March 2002.
+    Source: Intel Architecture Software Developers Manual, Volume 3:
+    System Programming Guide; Section 9.11. (1997 edition - PPro).
+*/
+
+#define DEBUG
+
+#include <linux/types.h> /* FIXME: kvm_para.h needs this */
+
+#include <linux/stop_machine.h>
+#include <linux/kvm_para.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/mutex.h>
+#include <linux/init.h>
+#include <linux/sort.h>
+#include <linux/cpu.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/syscore_ops.h>
+#include <linux/rcupdate.h>
+
+#include <asm/cpufeature.h>
+#include <asm/e820/api.h>
+#include <asm/mtrr.h>
+#include <asm/msr.h>
+#include <asm/pat.h>
+
+#include "mtrr.h"
+
+/* arch_phys_wc_add returns an MTRR register index plus this offset. */
+#define MTRR_TO_PHYS_WC_OFFSET 1000
+
+u32 num_var_ranges;
+static bool __mtrr_enabled;
+
+static bool mtrr_enabled(void)
+{
+	return __mtrr_enabled;
+}
+
+unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
+static DEFINE_MUTEX(mtrr_mutex);
+
+u64 size_or_mask, size_and_mask;
+static bool mtrr_aps_delayed_init;
+
+static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init;
+
+const struct mtrr_ops *mtrr_if;
+
+static void set_mtrr(unsigned int reg, unsigned long base,
+		     unsigned long size, mtrr_type type);
+
+void __init set_mtrr_ops(const struct mtrr_ops *ops)
+{
+	if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
+		mtrr_ops[ops->vendor] = ops;
+}
+
+/*  Returns non-zero if we have the write-combining memory type  */
+static int have_wrcomb(void)
+{
+	struct pci_dev *dev;
+
+	dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
+	if (dev != NULL) {
+		/*
+		 * ServerWorks LE chipsets < rev 6 have problems with
+		 * write-combining. Don't allow it and leave room for other
+		 * chipsets to be tagged
+		 */
+		if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
+		    dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
+		    dev->revision <= 5) {
+			pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");
+			pci_dev_put(dev);
+			return 0;
+		}
+		/*
+		 * Intel 450NX errata # 23. Non ascending cacheline evictions to
+		 * write combining memory may resulting in data corruption
+		 */
+		if (dev->vendor == PCI_VENDOR_ID_INTEL &&
+		    dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
+			pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");
+			pci_dev_put(dev);
+			return 0;
+		}
+		pci_dev_put(dev);
+	}
+	return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
+}
+
+/*  This function returns the number of variable MTRRs  */
+static void __init set_num_var_ranges(void)
+{
+	unsigned long config = 0, dummy;
+
+	if (use_intel())
+		rdmsr(MSR_MTRRcap, config, dummy);
+	else if (is_cpu(AMD))
+		config = 2;
+	else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
+		config = 8;
+
+	num_var_ranges = config & 0xff;
+}
+
+static void __init init_table(void)
+{
+	int i, max;
+
+	max = num_var_ranges;
+	for (i = 0; i < max; i++)
+		mtrr_usage_table[i] = 1;
+}
+
+struct set_mtrr_data {
+	unsigned long	smp_base;
+	unsigned long	smp_size;
+	unsigned int	smp_reg;
+	mtrr_type	smp_type;
+};
+
+/**
+ * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
+ * by all the CPUs.
+ * @info: pointer to mtrr configuration data
+ *
+ * Returns nothing.
+ */
+static int mtrr_rendezvous_handler(void *info)
+{
+	struct set_mtrr_data *data = info;
+
+	/*
+	 * We use this same function to initialize the mtrrs during boot,
+	 * resume, runtime cpu online and on an explicit request to set a
+	 * specific MTRR.
+	 *
+	 * During boot or suspend, the state of the boot cpu's mtrrs has been
+	 * saved, and we want to replicate that across all the cpus that come
+	 * online (either at the end of boot or resume or during a runtime cpu
+	 * online). If we're doing that, @reg is set to something special and on
+	 * all the cpu's we do mtrr_if->set_all() (On the logical cpu that
+	 * started the boot/resume sequence, this might be a duplicate
+	 * set_all()).
+	 */
+	if (data->smp_reg != ~0U) {
+		mtrr_if->set(data->smp_reg, data->smp_base,
+			     data->smp_size, data->smp_type);
+	} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
+		mtrr_if->set_all();
+	}
+	return 0;
+}
+
+static inline int types_compatible(mtrr_type type1, mtrr_type type2)
+{
+	return type1 == MTRR_TYPE_UNCACHABLE ||
+	       type2 == MTRR_TYPE_UNCACHABLE ||
+	       (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
+	       (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
+}
+
+/**
+ * set_mtrr - update mtrrs on all processors
+ * @reg:	mtrr in question
+ * @base:	mtrr base
+ * @size:	mtrr size
+ * @type:	mtrr type
+ *
+ * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
+ *
+ * 1. Queue work to do the following on all processors:
+ * 2. Disable Interrupts
+ * 3. Wait for all procs to do so
+ * 4. Enter no-fill cache mode
+ * 5. Flush caches
+ * 6. Clear PGE bit
+ * 7. Flush all TLBs
+ * 8. Disable all range registers
+ * 9. Update the MTRRs
+ * 10. Enable all range registers
+ * 11. Flush all TLBs and caches again
+ * 12. Enter normal cache mode and reenable caching
+ * 13. Set PGE
+ * 14. Wait for buddies to catch up
+ * 15. Enable interrupts.
+ *
+ * What does that mean for us? Well, stop_machine() will ensure that
+ * the rendezvous handler is started on each CPU. And in lockstep they
+ * do the state transition of disabling interrupts, updating MTRR's
+ * (the CPU vendors may each do it differently, so we call mtrr_if->set()
+ * callback and let them take care of it.) and enabling interrupts.
+ *
+ * Note that the mechanism is the same for UP systems, too; all the SMP stuff
+ * becomes nops.
+ */
+static void
+set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
+{
+	struct set_mtrr_data data = { .smp_reg = reg,
+				      .smp_base = base,
+				      .smp_size = size,
+				      .smp_type = type
+				    };
+
+	stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask);
+}
+
+static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base,
+				unsigned long size, mtrr_type type)
+{
+	struct set_mtrr_data data = { .smp_reg = reg,
+				      .smp_base = base,
+				      .smp_size = size,
+				      .smp_type = type
+				    };
+
+	stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask);
+}
+
+static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base,
+				      unsigned long size, mtrr_type type)
+{
+	struct set_mtrr_data data = { .smp_reg = reg,
+				      .smp_base = base,
+				      .smp_size = size,
+				      .smp_type = type
+				    };
+
+	stop_machine_from_inactive_cpu(mtrr_rendezvous_handler, &data,
+				       cpu_callout_mask);
+}
+
+/**
+ * mtrr_add_page - Add a memory type region
+ * @base: Physical base address of region in pages (in units of 4 kB!)
+ * @size: Physical size of region in pages (4 kB)
+ * @type: Type of MTRR desired
+ * @increment: If this is true do usage counting on the region
+ *
+ * Memory type region registers control the caching on newer Intel and
+ * non Intel processors. This function allows drivers to request an
+ * MTRR is added. The details and hardware specifics of each processor's
+ * implementation are hidden from the caller, but nevertheless the
+ * caller should expect to need to provide a power of two size on an
+ * equivalent power of two boundary.
+ *
+ * If the region cannot be added either because all regions are in use
+ * or the CPU cannot support it a negative value is returned. On success
+ * the register number for this entry is returned, but should be treated
+ * as a cookie only.
+ *
+ * On a multiprocessor machine the changes are made to all processors.
+ * This is required on x86 by the Intel processors.
+ *
+ * The available types are
+ *
+ * %MTRR_TYPE_UNCACHABLE - No caching
+ *
+ * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
+ *
+ * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
+ *
+ * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
+ *
+ * BUGS: Needs a quiet flag for the cases where drivers do not mind
+ * failures and do not wish system log messages to be sent.
+ */
+int mtrr_add_page(unsigned long base, unsigned long size,
+		  unsigned int type, bool increment)
+{
+	unsigned long lbase, lsize;
+	int i, replace, error;
+	mtrr_type ltype;
+
+	if (!mtrr_enabled())
+		return -ENXIO;
+
+	error = mtrr_if->validate_add_page(base, size, type);
+	if (error)
+		return error;
+
+	if (type >= MTRR_NUM_TYPES) {
+		pr_warn("type: %u invalid\n", type);
+		return -EINVAL;
+	}
+
+	/* If the type is WC, check that this processor supports it */
+	if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
+		pr_warn("your processor doesn't support write-combining\n");
+		return -ENOSYS;
+	}
+
+	if (!size) {
+		pr_warn("zero sized request\n");
+		return -EINVAL;
+	}
+
+	if ((base | (base + size - 1)) >>
+	    (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
+		pr_warn("base or size exceeds the MTRR width\n");
+		return -EINVAL;
+	}
+
+	error = -EINVAL;
+	replace = -1;
+
+	/* No CPU hotplug when we change MTRR entries */
+	get_online_cpus();
+
+	/* Search for existing MTRR  */
+	mutex_lock(&mtrr_mutex);
+	for (i = 0; i < num_var_ranges; ++i) {
+		mtrr_if->get(i, &lbase, &lsize, &ltype);
+		if (!lsize || base > lbase + lsize - 1 ||
+		    base + size - 1 < lbase)
+			continue;
+		/*
+		 * At this point we know there is some kind of
+		 * overlap/enclosure
+		 */
+		if (base < lbase || base + size - 1 > lbase + lsize - 1) {
+			if (base <= lbase &&
+			    base + size - 1 >= lbase + lsize - 1) {
+				/*  New region encloses an existing region  */
+				if (type == ltype) {
+					replace = replace == -1 ? i : -2;
+					continue;
+				} else if (types_compatible(type, ltype))
+					continue;
+			}
+			pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,
+				lsize);
+			goto out;
+		}
+		/* New region is enclosed by an existing region */
+		if (ltype != type) {
+			if (types_compatible(type, ltype))
+				continue;
+			pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",
+				base, size, mtrr_attrib_to_str(ltype),
+				mtrr_attrib_to_str(type));
+			goto out;
+		}
+		if (increment)
+			++mtrr_usage_table[i];
+		error = i;
+		goto out;
+	}
+	/* Search for an empty MTRR */
+	i = mtrr_if->get_free_region(base, size, replace);
+	if (i >= 0) {
+		set_mtrr_cpuslocked(i, base, size, type);
+		if (likely(replace < 0)) {
+			mtrr_usage_table[i] = 1;
+		} else {
+			mtrr_usage_table[i] = mtrr_usage_table[replace];
+			if (increment)
+				mtrr_usage_table[i]++;
+			if (unlikely(replace != i)) {
+				set_mtrr_cpuslocked(replace, 0, 0, 0);
+				mtrr_usage_table[replace] = 0;
+			}
+		}
+	} else {
+		pr_info("no more MTRRs available\n");
+	}
+	error = i;
+ out:
+	mutex_unlock(&mtrr_mutex);
+	put_online_cpus();
+	return error;
+}
+
+static int mtrr_check(unsigned long base, unsigned long size)
+{
+	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
+		pr_warn("size and base must be multiples of 4 kiB\n");
+		pr_debug("size: 0x%lx  base: 0x%lx\n", size, base);
+		dump_stack();
+		return -1;
+	}
+	return 0;
+}
+
+/**
+ * mtrr_add - Add a memory type region
+ * @base: Physical base address of region
+ * @size: Physical size of region
+ * @type: Type of MTRR desired
+ * @increment: If this is true do usage counting on the region
+ *
+ * Memory type region registers control the caching on newer Intel and
+ * non Intel processors. This function allows drivers to request an
+ * MTRR is added. The details and hardware specifics of each processor's
+ * implementation are hidden from the caller, but nevertheless the
+ * caller should expect to need to provide a power of two size on an
+ * equivalent power of two boundary.
+ *
+ * If the region cannot be added either because all regions are in use
+ * or the CPU cannot support it a negative value is returned. On success
+ * the register number for this entry is returned, but should be treated
+ * as a cookie only.
+ *
+ * On a multiprocessor machine the changes are made to all processors.
+ * This is required on x86 by the Intel processors.
+ *
+ * The available types are
+ *
+ * %MTRR_TYPE_UNCACHABLE - No caching
+ *
+ * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
+ *
+ * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
+ *
+ * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
+ *
+ * BUGS: Needs a quiet flag for the cases where drivers do not mind
+ * failures and do not wish system log messages to be sent.
+ */
+int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
+	     bool increment)
+{
+	if (!mtrr_enabled())
+		return -ENODEV;
+	if (mtrr_check(base, size))
+		return -EINVAL;
+	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
+			     increment);
+}
+
+/**
+ * mtrr_del_page - delete a memory type region
+ * @reg: Register returned by mtrr_add
+ * @base: Physical base address
+ * @size: Size of region
+ *
+ * If register is supplied then base and size are ignored. This is
+ * how drivers should call it.
+ *
+ * Releases an MTRR region. If the usage count drops to zero the
+ * register is freed and the region returns to default state.
+ * On success the register is returned, on failure a negative error
+ * code.
+ */
+int mtrr_del_page(int reg, unsigned long base, unsigned long size)
+{
+	int i, max;
+	mtrr_type ltype;
+	unsigned long lbase, lsize;
+	int error = -EINVAL;
+
+	if (!mtrr_enabled())
+		return -ENODEV;
+
+	max = num_var_ranges;
+	/* No CPU hotplug when we change MTRR entries */
+	get_online_cpus();
+	mutex_lock(&mtrr_mutex);
+	if (reg < 0) {
+		/*  Search for existing MTRR  */
+		for (i = 0; i < max; ++i) {
+			mtrr_if->get(i, &lbase, &lsize, &ltype);
+			if (lbase == base && lsize == size) {
+				reg = i;
+				break;
+			}
+		}
+		if (reg < 0) {
+			pr_debug("no MTRR for %lx000,%lx000 found\n",
+				 base, size);
+			goto out;
+		}
+	}
+	if (reg >= max) {
+		pr_warn("register: %d too big\n", reg);
+		goto out;
+	}
+	mtrr_if->get(reg, &lbase, &lsize, &ltype);
+	if (lsize < 1) {
+		pr_warn("MTRR %d not used\n", reg);
+		goto out;
+	}
+	if (mtrr_usage_table[reg] < 1) {
+		pr_warn("reg: %d has count=0\n", reg);
+		goto out;
+	}
+	if (--mtrr_usage_table[reg] < 1)
+		set_mtrr_cpuslocked(reg, 0, 0, 0);
+	error = reg;
+ out:
+	mutex_unlock(&mtrr_mutex);
+	put_online_cpus();
+	return error;
+}
+
+/**
+ * mtrr_del - delete a memory type region
+ * @reg: Register returned by mtrr_add
+ * @base: Physical base address
+ * @size: Size of region
+ *
+ * If register is supplied then base and size are ignored. This is
+ * how drivers should call it.
+ *
+ * Releases an MTRR region. If the usage count drops to zero the
+ * register is freed and the region returns to default state.
+ * On success the register is returned, on failure a negative error
+ * code.
+ */
+int mtrr_del(int reg, unsigned long base, unsigned long size)
+{
+	if (!mtrr_enabled())
+		return -ENODEV;
+	if (mtrr_check(base, size))
+		return -EINVAL;
+	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
+}
+
+/**
+ * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
+ * @base: Physical base address
+ * @size: Size of region
+ *
+ * If PAT is available, this does nothing.  If PAT is unavailable, it
+ * attempts to add a WC MTRR covering size bytes starting at base and
+ * logs an error if this fails.
+ *
+ * The called should provide a power of two size on an equivalent
+ * power of two boundary.
+ *
+ * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
+ * but drivers should not try to interpret that return value.
+ */
+int arch_phys_wc_add(unsigned long base, unsigned long size)
+{
+	int ret;
+
+	if (pat_enabled() || !mtrr_enabled())
+		return 0;  /* Success!  (We don't need to do anything.) */
+
+	ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
+	if (ret < 0) {
+		pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
+			(void *)base, (void *)(base + size - 1));
+		return ret;
+	}
+	return ret + MTRR_TO_PHYS_WC_OFFSET;
+}
+EXPORT_SYMBOL(arch_phys_wc_add);
+
+/*
+ * arch_phys_wc_del - undoes arch_phys_wc_add
+ * @handle: Return value from arch_phys_wc_add
+ *
+ * This cleans up after mtrr_add_wc_if_needed.
+ *
+ * The API guarantees that mtrr_del_wc_if_needed(error code) and
+ * mtrr_del_wc_if_needed(0) do nothing.
+ */
+void arch_phys_wc_del(int handle)
+{
+	if (handle >= 1) {
+		WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
+		mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
+	}
+}
+EXPORT_SYMBOL(arch_phys_wc_del);
+
+/*
+ * arch_phys_wc_index - translates arch_phys_wc_add's return value
+ * @handle: Return value from arch_phys_wc_add
+ *
+ * This will turn the return value from arch_phys_wc_add into an mtrr
+ * index suitable for debugging.
+ *
+ * Note: There is no legitimate use for this function, except possibly
+ * in printk line.  Alas there is an illegitimate use in some ancient
+ * drm ioctls.
+ */
+int arch_phys_wc_index(int handle)
+{
+	if (handle < MTRR_TO_PHYS_WC_OFFSET)
+		return -1;
+	else
+		return handle - MTRR_TO_PHYS_WC_OFFSET;
+}
+EXPORT_SYMBOL_GPL(arch_phys_wc_index);
+
+/*
+ * HACK ALERT!
+ * These should be called implicitly, but we can't yet until all the initcall
+ * stuff is done...
+ */
+static void __init init_ifs(void)
+{
+#ifndef CONFIG_X86_64
+	amd_init_mtrr();
+	cyrix_init_mtrr();
+	centaur_init_mtrr();
+#endif
+}
+
+/* The suspend/resume methods are only for CPU without MTRR. CPU using generic
+ * MTRR driver doesn't require this
+ */
+struct mtrr_value {
+	mtrr_type	ltype;
+	unsigned long	lbase;
+	unsigned long	lsize;
+};
+
+static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
+
+static int mtrr_save(void)
+{
+	int i;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &mtrr_value[i].lbase,
+				&mtrr_value[i].lsize,
+				&mtrr_value[i].ltype);
+	}
+	return 0;
+}
+
+static void mtrr_restore(void)
+{
+	int i;
+
+	for (i = 0; i < num_var_ranges; i++) {
+		if (mtrr_value[i].lsize) {
+			set_mtrr(i, mtrr_value[i].lbase,
+				    mtrr_value[i].lsize,
+				    mtrr_value[i].ltype);
+		}
+	}
+}
+
+
+
+static struct syscore_ops mtrr_syscore_ops = {
+	.suspend	= mtrr_save,
+	.resume		= mtrr_restore,
+};
+
+int __initdata changed_by_mtrr_cleanup;
+
+#define SIZE_OR_MASK_BITS(n)  (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
+/**
+ * mtrr_bp_init - initialize mtrrs on the boot CPU
+ *
+ * This needs to be called early; before any of the other CPUs are
+ * initialized (i.e. before smp_init()).
+ *
+ */
+void __init mtrr_bp_init(void)
+{
+	u32 phys_addr;
+
+	init_ifs();
+
+	phys_addr = 32;
+
+	if (boot_cpu_has(X86_FEATURE_MTRR)) {
+		mtrr_if = &generic_mtrr_ops;
+		size_or_mask = SIZE_OR_MASK_BITS(36);
+		size_and_mask = 0x00f00000;
+		phys_addr = 36;
+
+		/*
+		 * This is an AMD specific MSR, but we assume(hope?) that
+		 * Intel will implement it too when they extend the address
+		 * bus of the Xeon.
+		 */
+		if (cpuid_eax(0x80000000) >= 0x80000008) {
+			phys_addr = cpuid_eax(0x80000008) & 0xff;
+			/* CPUID workaround for Intel 0F33/0F34 CPU */
+			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+			    boot_cpu_data.x86 == 0xF &&
+			    boot_cpu_data.x86_model == 0x3 &&
+			    (boot_cpu_data.x86_stepping == 0x3 ||
+			     boot_cpu_data.x86_stepping == 0x4))
+				phys_addr = 36;
+
+			size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
+			size_and_mask = ~size_or_mask & 0xfffff00000ULL;
+		} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
+			   boot_cpu_data.x86 == 6) {
+			/*
+			 * VIA C* family have Intel style MTRRs,
+			 * but don't support PAE
+			 */
+			size_or_mask = SIZE_OR_MASK_BITS(32);
+			size_and_mask = 0;
+			phys_addr = 32;
+		}
+	} else {
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_AMD:
+			if (cpu_feature_enabled(X86_FEATURE_K6_MTRR)) {
+				/* Pre-Athlon (K6) AMD CPU MTRRs */
+				mtrr_if = mtrr_ops[X86_VENDOR_AMD];
+				size_or_mask = SIZE_OR_MASK_BITS(32);
+				size_and_mask = 0;
+			}
+			break;
+		case X86_VENDOR_CENTAUR:
+			if (cpu_feature_enabled(X86_FEATURE_CENTAUR_MCR)) {
+				mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
+				size_or_mask = SIZE_OR_MASK_BITS(32);
+				size_and_mask = 0;
+			}
+			break;
+		case X86_VENDOR_CYRIX:
+			if (cpu_feature_enabled(X86_FEATURE_CYRIX_ARR)) {
+				mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
+				size_or_mask = SIZE_OR_MASK_BITS(32);
+				size_and_mask = 0;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (mtrr_if) {
+		__mtrr_enabled = true;
+		set_num_var_ranges();
+		init_table();
+		if (use_intel()) {
+			/* BIOS may override */
+			__mtrr_enabled = get_mtrr_state();
+
+			if (mtrr_enabled())
+				mtrr_bp_pat_init();
+
+			if (mtrr_cleanup(phys_addr)) {
+				changed_by_mtrr_cleanup = 1;
+				mtrr_if->set_all();
+			}
+		}
+	}
+
+	if (!mtrr_enabled()) {
+		pr_info("Disabled\n");
+
+		/*
+		 * PAT initialization relies on MTRR's rendezvous handler.
+		 * Skip PAT init until the handler can initialize both
+		 * features independently.
+		 */
+		pat_disable("MTRRs disabled, skipping PAT initialization too.");
+	}
+}
+
+void mtrr_ap_init(void)
+{
+	if (!mtrr_enabled())
+		return;
+
+	if (!use_intel() || mtrr_aps_delayed_init)
+		return;
+
+	rcu_cpu_starting(smp_processor_id());
+
+	/*
+	 * Ideally we should hold mtrr_mutex here to avoid mtrr entries
+	 * changed, but this routine will be called in cpu boot time,
+	 * holding the lock breaks it.
+	 *
+	 * This routine is called in two cases:
+	 *
+	 *   1. very earily time of software resume, when there absolutely
+	 *      isn't mtrr entry changes;
+	 *
+	 *   2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
+	 *      lock to prevent mtrr entry changes
+	 */
+	set_mtrr_from_inactive_cpu(~0U, 0, 0, 0);
+}
+
+/**
+ * Save current fixed-range MTRR state of the first cpu in cpu_online_mask.
+ */
+void mtrr_save_state(void)
+{
+	int first_cpu;
+
+	if (!mtrr_enabled())
+		return;
+
+	first_cpu = cpumask_first(cpu_online_mask);
+	smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
+}
+
+void set_mtrr_aps_delayed_init(void)
+{
+	if (!mtrr_enabled())
+		return;
+	if (!use_intel())
+		return;
+
+	mtrr_aps_delayed_init = true;
+}
+
+/*
+ * Delayed MTRR initialization for all AP's
+ */
+void mtrr_aps_init(void)
+{
+	if (!use_intel() || !mtrr_enabled())
+		return;
+
+	/*
+	 * Check if someone has requested the delay of AP MTRR initialization,
+	 * by doing set_mtrr_aps_delayed_init(), prior to this point. If not,
+	 * then we are done.
+	 */
+	if (!mtrr_aps_delayed_init)
+		return;
+
+	set_mtrr(~0U, 0, 0, 0);
+	mtrr_aps_delayed_init = false;
+}
+
+void mtrr_bp_restore(void)
+{
+	if (!use_intel() || !mtrr_enabled())
+		return;
+
+	mtrr_if->set_all();
+}
+
+static int __init mtrr_init_finialize(void)
+{
+	if (!mtrr_enabled())
+		return 0;
+
+	if (use_intel()) {
+		if (!changed_by_mtrr_cleanup)
+			mtrr_state_warn();
+		return 0;
+	}
+
+	/*
+	 * The CPU has no MTRR and seems to not support SMP. They have
+	 * specific drivers, we use a tricky method to support
+	 * suspend/resume for them.
+	 *
+	 * TBD: is there any system with such CPU which supports
+	 * suspend/resume? If no, we should remove the code.
+	 */
+	register_syscore_ops(&mtrr_syscore_ops);
+
+	return 0;
+}
+subsys_initcall(mtrr_init_finialize);
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index b099024..81c0afb 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -27,7 +27,7 @@
  * exists, use it for populating initial_apicid and cpu topology
  * detection.
  */
-void detect_extended_topology(struct cpuinfo_x86 *c)
+int detect_extended_topology(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
 	unsigned int eax, ebx, ecx, edx, sub_index;
@@ -36,7 +36,7 @@
 	static bool printed;
 
 	if (c->cpuid_level < 0xb)
-		return;
+		return -1;
 
 	cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
 
@@ -44,7 +44,7 @@
 	 * check if the cpuid leaf 0xb is actually implemented.
 	 */
 	if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
-		return;
+		return -1;
 
 	set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
 
@@ -95,6 +95,6 @@
 			       c->cpu_core_id);
 		printed = 1;
 	}
-	return;
 #endif
+	return 0;
 }
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 18fa9d7..666a284 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -22,11 +22,14 @@
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
 
+#define OPCODE_BUFSIZE 64
+
 int panic_on_unrecovered_nmi;
 int panic_on_io_nmi;
-static unsigned int code_bytes = 64;
 static int die_counter;
 
+static struct pt_regs exec_summary_regs;
+
 bool in_task_stack(unsigned long *stack, struct task_struct *task,
 		   struct stack_info *info)
 {
@@ -69,9 +72,62 @@
 	printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
 }
 
+/*
+ * There are a couple of reasons for the 2/3rd prologue, courtesy of Linus:
+ *
+ * In case where we don't have the exact kernel image (which, if we did, we can
+ * simply disassemble and navigate to the RIP), the purpose of the bigger
+ * prologue is to have more context and to be able to correlate the code from
+ * the different toolchains better.
+ *
+ * In addition, it helps in recreating the register allocation of the failing
+ * kernel and thus make sense of the register dump.
+ *
+ * What is more, the additional complication of a variable length insn arch like
+ * x86 warrants having longer byte sequence before rIP so that the disassembler
+ * can "sync" up properly and find instruction boundaries when decoding the
+ * opcode bytes.
+ *
+ * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random
+ * guesstimate in attempt to achieve all of the above.
+ */
+void show_opcodes(u8 *rip, const char *loglvl)
+{
+	unsigned int code_prologue = OPCODE_BUFSIZE * 2 / 3;
+	u8 opcodes[OPCODE_BUFSIZE];
+	u8 *ip;
+	int i;
+
+	printk("%sCode: ", loglvl);
+
+	ip = (u8 *)rip - code_prologue;
+	if (probe_kernel_read(opcodes, ip, OPCODE_BUFSIZE)) {
+		pr_cont("Bad RIP value.\n");
+		return;
+	}
+
+	for (i = 0; i < OPCODE_BUFSIZE; i++, ip++) {
+		if (ip == rip)
+			pr_cont("<%02x> ", opcodes[i]);
+		else
+			pr_cont("%02x ", opcodes[i]);
+	}
+	pr_cont("\n");
+}
+
+void show_ip(struct pt_regs *regs, const char *loglvl)
+{
+#ifdef CONFIG_X86_32
+	printk("%sEIP: %pS\n", loglvl, (void *)regs->ip);
+#else
+	printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip);
+#endif
+	show_opcodes((u8 *)regs->ip, loglvl);
+}
+
 void show_iret_regs(struct pt_regs *regs)
 {
-	printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
+	show_ip(regs, KERN_DEFAULT);
 	printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
 		regs->sp, regs->flags);
 }
@@ -267,7 +323,6 @@
 	bust_spinlocks(1);
 	return flags;
 }
-EXPORT_SYMBOL_GPL(oops_begin);
 NOKPROBE_SYMBOL(oops_begin);
 
 void __noreturn rewind_stack_do_exit(int signr);
@@ -287,6 +342,9 @@
 	raw_local_irq_restore(flags);
 	oops_exit();
 
+	/* Executive summary in case the oops scrolled away */
+	__show_regs(&exec_summary_regs, true);
+
 	if (!signr)
 		return;
 	if (in_interrupt())
@@ -305,10 +363,10 @@
 
 int __die(const char *str, struct pt_regs *regs, long err)
 {
-#ifdef CONFIG_X86_32
-	unsigned short ss;
-	unsigned long sp;
-#endif
+	/* Save the regs of the first oops for the executive summary later. */
+	if (!die_counter)
+		exec_summary_regs = *regs;
+
 	printk(KERN_DEFAULT
 	       "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
 	       IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT"         : "",
@@ -318,26 +376,13 @@
 	       IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
 	       (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
 
+	show_regs(regs);
+	print_modules();
+
 	if (notify_die(DIE_OOPS, str, regs, err,
 			current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
 		return 1;
 
-	print_modules();
-	show_regs(regs);
-#ifdef CONFIG_X86_32
-	if (user_mode(regs)) {
-		sp = regs->sp;
-		ss = regs->ss;
-	} else {
-		sp = kernel_stack_pointer(regs);
-		savesegment(ss, ss);
-	}
-	printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n",
-	       (void *)regs->ip, ss, sp);
-#else
-	/* Executive summary in case the oops scrolled away */
-	printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp);
-#endif
 	return 0;
 }
 NOKPROBE_SYMBOL(__die);
@@ -356,30 +401,9 @@
 	oops_end(flags, regs, sig);
 }
 
-static int __init code_bytes_setup(char *s)
-{
-	ssize_t ret;
-	unsigned long val;
-
-	if (!s)
-		return -EINVAL;
-
-	ret = kstrtoul(s, 0, &val);
-	if (ret)
-		return ret;
-
-	code_bytes = val;
-	if (code_bytes > 8192)
-		code_bytes = 8192;
-
-	return 1;
-}
-__setup("code_bytes=", code_bytes_setup);
-
 void show_regs(struct pt_regs *regs)
 {
 	bool all = true;
-	int i;
 
 	show_regs_print_info(KERN_DEFAULT);
 
@@ -389,36 +413,8 @@
 	__show_regs(regs, all);
 
 	/*
-	 * When in-kernel, we also print out the stack and code at the
-	 * time of the fault..
+	 * When in-kernel, we also print out the stack at the time of the fault..
 	 */
-	if (!user_mode(regs)) {
-		unsigned int code_prologue = code_bytes * 43 / 64;
-		unsigned int code_len = code_bytes;
-		unsigned char c;
-		u8 *ip;
-
+	if (!user_mode(regs))
 		show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
-
-		printk(KERN_DEFAULT "Code: ");
-
-		ip = (u8 *)regs->ip - code_prologue;
-		if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
-			/* try starting at IP */
-			ip = (u8 *)regs->ip;
-			code_len = code_len - code_prologue + 1;
-		}
-		for (i = 0; i < code_len; i++, ip++) {
-			if (ip < (u8 *)PAGE_OFFSET ||
-					probe_kernel_address(ip, c)) {
-				pr_cont(" Bad RIP value.");
-				break;
-			}
-			if (ip == (u8 *)regs->ip)
-				pr_cont("<%02x> ", c);
-			else
-				pr_cont("%02x ", c);
-		}
-	}
-	pr_cont("\n");
 }
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 6a2cb14..d1f25c8 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -155,7 +155,8 @@
 	int x = table->nr_entries;
 
 	if (x >= ARRAY_SIZE(table->entries)) {
-		pr_err("e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", start, start + size - 1);
+		pr_err("too many entries; ignoring [mem %#010llx-%#010llx]\n",
+		       start, start + size - 1);
 		return;
 	}
 
@@ -190,9 +191,10 @@
 	int i;
 
 	for (i = 0; i < e820_table->nr_entries; i++) {
-		pr_info("%s: [mem %#018Lx-%#018Lx] ", who,
-		       e820_table->entries[i].addr,
-		       e820_table->entries[i].addr + e820_table->entries[i].size - 1);
+		pr_info("%s: [mem %#018Lx-%#018Lx] ",
+			who,
+			e820_table->entries[i].addr,
+			e820_table->entries[i].addr + e820_table->entries[i].size - 1);
 
 		e820_print_type(e820_table->entries[i].type);
 		pr_cont("\n");
@@ -574,7 +576,7 @@
 	if (e820__update_table(e820_table))
 		return;
 
-	pr_info("e820: modified physical RAM map:\n");
+	pr_info("modified physical RAM map:\n");
 	e820__print_table("modified");
 }
 
@@ -636,9 +638,8 @@
 	if (!found) {
 #ifdef CONFIG_X86_64
 		gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
-		pr_err(
-			"e820: Cannot find an available gap in the 32-bit address range\n"
-			"e820: PCI devices with unassigned 32-bit BARs may not work!\n");
+		pr_err("Cannot find an available gap in the 32-bit address range\n");
+		pr_err("PCI devices with unassigned 32-bit BARs may not work!\n");
 #else
 		gapstart = 0x10000000;
 #endif
@@ -649,7 +650,8 @@
 	 */
 	pci_mem_start = gapstart;
 
-	pr_info("e820: [mem %#010lx-%#010lx] available for PCI devices\n", gapstart, gapstart + gapsize - 1);
+	pr_info("[mem %#010lx-%#010lx] available for PCI devices\n",
+		gapstart, gapstart + gapsize - 1);
 }
 
 /*
@@ -711,7 +713,7 @@
 	memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
 
 	early_memunmap(sdata, data_len);
-	pr_info("e820: extended physical RAM map:\n");
+	pr_info("extended physical RAM map:\n");
 	e820__print_table("extended");
 }
 
@@ -780,7 +782,7 @@
 	addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
 	if (addr) {
 		e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
-		pr_info("e820: update e820_table_kexec for e820__memblock_alloc_reserved()\n");
+		pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
 		e820__update_table_kexec();
 	}
 
@@ -830,8 +832,8 @@
 	if (last_pfn > max_arch_pfn)
 		last_pfn = max_arch_pfn;
 
-	pr_info("e820: last_pfn = %#lx max_arch_pfn = %#lx\n",
-			 last_pfn, max_arch_pfn);
+	pr_info("last_pfn = %#lx max_arch_pfn = %#lx\n",
+		last_pfn, max_arch_pfn);
 	return last_pfn;
 }
 
@@ -1005,7 +1007,7 @@
 		if (e820__update_table(e820_table) < 0)
 			early_panic("Invalid user supplied memory map");
 
-		pr_info("e820: user-defined physical RAM map:\n");
+		pr_info("user-defined physical RAM map:\n");
 		e820__print_table("user");
 	}
 }
@@ -1238,7 +1240,7 @@
 	memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec));
 	memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware));
 
-	pr_info("e820: BIOS-provided physical RAM map:\n");
+	pr_info("BIOS-provided physical RAM map:\n");
 	e820__print_table(who);
 }
 
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index bae0d32..da5d8ac 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -28,8 +28,6 @@
 #include <asm/irq_remapping.h>
 #include <asm/early_ioremap.h>
 
-#define dev_err(msg)  pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg)
-
 static void __init fix_hypertransport_config(int num, int slot, int func)
 {
 	u32 htcfg;
@@ -617,7 +615,8 @@
 
 		pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL);
 		if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) {
-			dev_err("Cannot power up Apple AirPort card\n");
+			pr_err("pci 0000:%02x:%02x.%d: Cannot power up Apple AirPort card\n",
+			       bus, slot, func);
 			return;
 		}
 	}
@@ -628,7 +627,8 @@
 
 	mmio = early_ioremap(addr, BCM4331_MMIO_SIZE);
 	if (!mmio) {
-		dev_err("Cannot iomap Apple AirPort card\n");
+		pr_err("pci 0000:%02x:%02x.%d: Cannot iomap Apple AirPort card\n",
+		       bus, slot, func);
 		return;
 	}
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2d29e47..a21d6ac 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -6,6 +6,10 @@
  */
 
 #define DISABLE_BRANCH_PROFILING
+
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
+
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <linux/types.h>
@@ -32,11 +36,6 @@
 #include <asm/microcode.h>
 #include <asm/kasan.h>
 
-#ifdef CONFIG_X86_5LEVEL
-#undef pgtable_l5_enabled
-#define pgtable_l5_enabled __pgtable_l5_enabled
-#endif
-
 /*
  * Manage page tables very early on.
  */
@@ -45,8 +44,7 @@
 pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
 
 #ifdef CONFIG_X86_5LEVEL
-unsigned int __pgtable_l5_enabled __ro_after_init;
-EXPORT_SYMBOL(__pgtable_l5_enabled);
+unsigned int __pgtable_l5_enabled __initdata;
 unsigned int pgdir_shift __ro_after_init = 39;
 EXPORT_SYMBOL(pgdir_shift);
 unsigned int ptrs_per_p4d __ro_after_init = 1;
@@ -82,13 +80,14 @@
 
 static bool __head check_la57_support(unsigned long physaddr)
 {
-	if (native_cpuid_eax(0) < 7)
+	/*
+	 * 5-level paging is detected and enabled at kernel decomression
+	 * stage. Only check if it has been enabled there.
+	 */
+	if (!(native_read_cr4() & X86_CR4_LA57))
 		return false;
 
-	if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
-		return false;
-
-	*fixup_int(&pgtable_l5_enabled, physaddr) = 1;
+	*fixup_int(&__pgtable_l5_enabled, physaddr) = 1;
 	*fixup_int(&pgdir_shift, physaddr) = 48;
 	*fixup_int(&ptrs_per_p4d, physaddr) = 512;
 	*fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5;
@@ -281,7 +280,7 @@
 	 * critical -- __PAGE_OFFSET would point us back into the dynamic
 	 * range and we might end up looping forever...
 	 */
-	if (!pgtable_l5_enabled)
+	if (!pgtable_l5_enabled())
 		p4d_p = pgd_p;
 	else if (pgd)
 		p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 8ce4212..b6be34e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -975,8 +975,7 @@
 	cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
 	hpet_writel(cfg, HPET_CFG);
 	if (cfg)
-		pr_warn("HPET: Unrecognized bits %#x set in global cfg\n",
-			cfg);
+		pr_warn("Unrecognized bits %#x set in global cfg\n", cfg);
 
 	for (i = 0; i <= last; ++i) {
 		cfg = hpet_readl(HPET_Tn_CFG(i));
@@ -988,7 +987,7 @@
 			 | HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE
 			 | HPET_TN_FSB | HPET_TN_FSB_CAP);
 		if (cfg)
-			pr_warn("HPET: Unrecognized bits %#x set in cfg#%u\n",
+			pr_warn("Unrecognized bits %#x set in cfg#%u\n",
 				cfg, i);
 	}
 	hpet_print_config();
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
index a15fe0e..108c48d 100644
--- a/arch/x86/kernel/jailhouse.c
+++ b/arch/x86/kernel/jailhouse.c
@@ -37,7 +37,7 @@
 	return jailhouse_cpuid_base();
 }
 
-static void jailhouse_get_wallclock(struct timespec *now)
+static void jailhouse_get_wallclock(struct timespec64 *now)
 {
 	memset(now, 0, sizeof(*now));
 }
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 8b26c9e..bf8d1eb 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -53,7 +53,7 @@
  * have elapsed since the hypervisor wrote the data. So we try to account for
  * that with system time
  */
-static void kvm_get_wallclock(struct timespec *now)
+static void kvm_get_wallclock(struct timespec64 *now)
 {
 	struct pvclock_vcpu_time_info *vcpu_time;
 	int low, high;
@@ -72,7 +72,7 @@
 	put_cpu();
 }
 
-static int kvm_set_wallclock(const struct timespec *now)
+static int kvm_set_wallclock(const struct timespec64 *now)
 {
 	return -ENODEV;
 }
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 6010449..4c8acdf 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -354,7 +354,8 @@
 {
 	VMCOREINFO_NUMBER(phys_base);
 	VMCOREINFO_SYMBOL(init_top_pgt);
-	VMCOREINFO_NUMBER(pgtable_l5_enabled);
+	vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n",
+			pgtable_l5_enabled());
 
 #ifdef CONFIG_NUMA
 	VMCOREINFO_SYMBOL(node_data);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 77625b6..ab5d9dd 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -15,13 +15,11 @@
 #include <asm/x86_init.h>
 #include <asm/iommu_table.h>
 
-static int forbid_dac __read_mostly;
+static bool disable_dac_quirk __read_mostly;
 
 const struct dma_map_ops *dma_ops = &dma_direct_ops;
 EXPORT_SYMBOL(dma_ops);
 
-static int iommu_sac_force __read_mostly;
-
 #ifdef CONFIG_IOMMU_DEBUG
 int panic_on_overflow __read_mostly = 1;
 int force_iommu __read_mostly = 1;
@@ -55,9 +53,6 @@
 };
 EXPORT_SYMBOL(x86_dma_fallback_dev);
 
-/* Number of entries preallocated for DMA-API debugging */
-#define PREALLOC_DMA_DEBUG_ENTRIES       65536
-
 void __init pci_iommu_alloc(void)
 {
 	struct iommu_table_entry *p;
@@ -76,7 +71,7 @@
 	}
 }
 
-bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
+bool arch_dma_alloc_attrs(struct device **dev)
 {
 	if (!*dev)
 		*dev = &x86_dma_fallback_dev;
@@ -125,13 +120,13 @@
 		if (!strncmp(p, "nomerge", 7))
 			iommu_merge = 0;
 		if (!strncmp(p, "forcesac", 8))
-			iommu_sac_force = 1;
+			pr_warn("forcesac option ignored.\n");
 		if (!strncmp(p, "allowdac", 8))
-			forbid_dac = 0;
+			pr_warn("allowdac option ignored.\n");
 		if (!strncmp(p, "nodac", 5))
-			forbid_dac = 1;
+			pr_warn("nodac option ignored.\n");
 		if (!strncmp(p, "usedac", 6)) {
-			forbid_dac = -1;
+			disable_dac_quirk = true;
 			return 1;
 		}
 #ifdef CONFIG_SWIOTLB
@@ -156,40 +151,9 @@
 }
 early_param("iommu", iommu_setup);
 
-int arch_dma_supported(struct device *dev, u64 mask)
-{
-#ifdef CONFIG_PCI
-	if (mask > 0xffffffff && forbid_dac > 0) {
-		dev_info(dev, "PCI: Disallowing DAC for device\n");
-		return 0;
-	}
-#endif
-
-	/* Tell the device to use SAC when IOMMU force is on.  This
-	   allows the driver to use cheaper accesses in some cases.
-
-	   Problem with this is that if we overflow the IOMMU area and
-	   return DAC as fallback address the device may not handle it
-	   correctly.
-
-	   As a special case some controllers have a 39bit address
-	   mode that is as efficient as 32bit (aic79xx). Don't force
-	   SAC for these.  Assume all masks <= 40 bits are of this
-	   type. Normally this doesn't make any difference, but gives
-	   more gentle handling of IOMMU overflow. */
-	if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
-		dev_info(dev, "Force SAC with mask %Lx\n", mask);
-		return 0;
-	}
-
-	return 1;
-}
-EXPORT_SYMBOL(arch_dma_supported);
-
 static int __init pci_iommu_init(void)
 {
 	struct iommu_table_entry *p;
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
 
 #ifdef CONFIG_PCI
 	dma_debug_add_bus(&pci_bus_type);
@@ -209,11 +173,17 @@
 #ifdef CONFIG_PCI
 /* Many VIA bridges seem to corrupt data for DAC. Disable it here */
 
+static int via_no_dac_cb(struct pci_dev *pdev, void *data)
+{
+	pdev->dev.dma_32bit_limit = true;
+	return 0;
+}
+
 static void via_no_dac(struct pci_dev *dev)
 {
-	if (forbid_dac == 0) {
+	if (!disable_dac_quirk) {
 		dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
-		forbid_dac = 1;
+		pci_walk_bus(dev->subordinate, via_no_dac_cb, NULL);
 	}
 }
 DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID,
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index e47b2db..c06c4c1 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -151,17 +151,19 @@
 	regs_user_copy->sp = user_regs->sp;
 	regs_user_copy->cs = user_regs->cs;
 	regs_user_copy->ss = user_regs->ss;
-
 	/*
-	 * Most system calls don't save these registers, don't report them.
+	 * Store user space frame-pointer value on sample
+	 * to facilitate stack unwinding for cases when
+	 * user space executable code has such support
+	 * enabled at compile time:
 	 */
+	regs_user_copy->bp = user_regs->bp;
+
 	regs_user_copy->bx = -1;
-	regs_user_copy->bp = -1;
 	regs_user_copy->r12 = -1;
 	regs_user_copy->r13 = -1;
 	regs_user_copy->r14 = -1;
 	regs_user_copy->r15 = -1;
-
 	/*
 	 * For this to be at all useful, we need a reasonable guess for
 	 * the ABI.  Be careful: we're in NMI context, and we're
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 5224c60..0ae659d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -76,16 +76,14 @@
 		savesegment(gs, gs);
 	}
 
-	printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
-	printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
-		raw_smp_processor_id());
+	show_ip(regs, KERN_DEFAULT);
 
 	printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
 		regs->ax, regs->bx, regs->cx, regs->dx);
 	printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
 		regs->si, regs->di, regs->bp, sp);
-	printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
-	       (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
+	printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
+	       (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags);
 
 	if (!all)
 		return;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index ed5c4cd..e2ee403 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1377,7 +1377,6 @@
 	tsk->thread.trap_nr = X86_TRAP_DB;
 	tsk->thread.error_code = error_code;
 
-	memset(info, 0, sizeof(*info));
 	info->si_signo = SIGTRAP;
 	info->si_code = si_code;
 	info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
@@ -1395,6 +1394,7 @@
 {
 	struct siginfo info;
 
+	clear_siginfo(&info);
 	fill_sigtrap_info(tsk, regs, error_code, si_code, &info);
 	/* Send us the fake SIGTRAP */
 	force_sig_info(SIGTRAP, &info, tsk);
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 761f6af..637982e 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -123,28 +123,35 @@
 
 void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
 			    struct pvclock_vcpu_time_info *vcpu_time,
-			    struct timespec *ts)
+			    struct timespec64 *ts)
 {
 	u32 version;
 	u64 delta;
-	struct timespec now;
+	struct timespec64 now;
 
 	/* get wallclock at system boot */
 	do {
 		version = wall_clock->version;
 		rmb();		/* fetch version before time */
+		/*
+		 * Note: wall_clock->sec is a u32 value, so it can
+		 * only store dates between 1970 and 2106. To allow
+		 * times beyond that, we need to create a new hypercall
+		 * interface with an extended pvclock_wall_clock structure
+		 * like ARM has.
+		 */
 		now.tv_sec  = wall_clock->sec;
 		now.tv_nsec = wall_clock->nsec;
 		rmb();		/* fetch time before checking version */
 	} while ((wall_clock->version & 1) || (version != wall_clock->version));
 
 	delta = pvclock_clocksource_read(vcpu_time);	/* time since system boot */
-	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
+	delta += now.tv_sec * NSEC_PER_SEC + now.tv_nsec;
 
 	now.tv_nsec = do_div(delta, NSEC_PER_SEC);
 	now.tv_sec = delta;
 
-	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+	set_normalized_timespec64(ts, now.tv_sec, now.tv_nsec);
 }
 
 void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti)
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index f7b82ed..586f718 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -39,7 +39,7 @@
  * jump to the next second precisely 500 ms later. Check the Motorola
  * MC146818A or Dallas DS12887 data sheet for details.
  */
-int mach_set_rtc_mmss(const struct timespec *now)
+int mach_set_rtc_mmss(const struct timespec64 *now)
 {
 	unsigned long long nowtime = now->tv_sec;
 	struct rtc_time tm;
@@ -60,7 +60,7 @@
 	return retval;
 }
 
-void mach_get_cmos_time(struct timespec *now)
+void mach_get_cmos_time(struct timespec64 *now)
 {
 	unsigned int status, year, mon, day, hour, min, sec, century = 0;
 	unsigned long flags;
@@ -118,7 +118,7 @@
 	} else
 		year += CMOS_YEARS_OFFS;
 
-	now->tv_sec = mktime(year, mon, day, hour, min, sec);
+	now->tv_sec = mktime64(year, mon, day, hour, min, sec);
 	now->tv_nsec = 0;
 }
 
@@ -145,13 +145,13 @@
 }
 EXPORT_SYMBOL(rtc_cmos_write);
 
-int update_persistent_clock(struct timespec now)
+int update_persistent_clock64(struct timespec64 now)
 {
 	return x86_platform.set_wallclock(&now);
 }
 
 /* not static: needed by APM */
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	x86_platform.get_wallclock(ts);
 }
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 14c057f..9ccbf05 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -29,7 +29,7 @@
 	BUILD_BUG_ON(NSIGFPE  != 15);
 	BUILD_BUG_ON(NSIGSEGV != 7);
 	BUILD_BUG_ON(NSIGBUS  != 5);
-	BUILD_BUG_ON(NSIGTRAP != 4);
+	BUILD_BUG_ON(NSIGTRAP != 5);
 	BUILD_BUG_ON(NSIGCHLD != 6);
 	BUILD_BUG_ON(NSIGSYS  != 1);
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9dd324a..c2f7d1d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -81,13 +81,6 @@
 #include <asm/cpu_device_id.h>
 #include <asm/spec-ctrl.h>
 
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-EXPORT_SYMBOL(smp_num_siblings);
-
-/* Last level cache ID of each logical CPU */
-DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
-
 /* representing HT siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index a3f15ed..6a78d4b 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/compat.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
@@ -19,7 +20,6 @@
 #include <linux/elf.h>
 
 #include <asm/elf.h>
-#include <asm/compat.h>
 #include <asm/ia32.h>
 #include <asm/syscalls.h>
 #include <asm/mpx.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 03f3d76..a535dd6 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -299,6 +299,7 @@
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
 			NOTIFY_STOP) {
 		cond_local_irq_enable(regs);
+		clear_siginfo(&info);
 		do_trap(trapnr, signr, str, regs, error_code,
 			fill_trap_info(regs, signr, trapnr, &info));
 	}
@@ -854,6 +855,7 @@
 
 	task->thread.trap_nr	= trapnr;
 	task->thread.error_code = error_code;
+	clear_siginfo(&info);
 	info.si_signo		= SIGFPE;
 	info.si_errno		= 0;
 	info.si_addr		= (void __user *)uprobe_get_trap_addr(regs);
@@ -929,6 +931,7 @@
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
 	local_irq_enable();
 
+	clear_siginfo(&info);
 	info.si_signo = SIGILL;
 	info.si_errno = 0;
 	info.si_code = ILL_BADSTK;
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index f44ce0f..ff20b35 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -278,6 +278,7 @@
 	tsk->thread.error_code	= X86_PF_USER | X86_PF_WRITE;
 	tsk->thread.trap_nr	= X86_TRAP_PF;
 
+	clear_siginfo(&info);
 	info.si_signo	= SIGSEGV;
 	info.si_errno	= 0;
 	info.si_code	= SEGV_MAPERR;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index c84bb53..58d8d80 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -1083,8 +1083,8 @@
 		return orig_ret_vaddr;
 
 	if (nleft != rasize) {
-		pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, "
-			"%%ip=%#lx\n", current->pid, regs->sp, regs->ip);
+		pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n",
+		       current->pid, regs->sp, regs->ip);
 
 		force_sig_info(SIGSEGV, SEND_SIG_FORCED, current);
 	}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 795f3a8..5e1458f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -117,11 +117,11 @@
 
 #ifdef CONFIG_X86_64
 		. = ALIGN(PAGE_SIZE);
-		VMLINUX_SYMBOL(__entry_trampoline_start) = .;
+		__entry_trampoline_start = .;
 		_entry_trampoline = .;
 		*(.entry_trampoline)
 		. = ALIGN(PAGE_SIZE);
-		VMLINUX_SYMBOL(__entry_trampoline_end) = .;
+		__entry_trampoline_end = .;
 		ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
 #endif
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8494dba..d634f033 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3007,6 +3007,7 @@
 {
 	siginfo_t info;
 
+	clear_siginfo(&info);
 	info.si_signo	= SIGBUS;
 	info.si_errno	= 0;
 	info.si_code	= BUS_MCEERR_AR;
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 9a53a06..c3b527a 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -184,11 +184,11 @@
 
 #ifndef CONFIG_UML
 /*
- * memcpy_mcsafe_unrolled - memory copy with machine check exception handling
+ * __memcpy_mcsafe - memory copy with machine check exception handling
  * Note that we only catch machine checks when reading the source addresses.
  * Writes to target are posted and don't generate machine checks.
  */
-ENTRY(memcpy_mcsafe_unrolled)
+ENTRY(__memcpy_mcsafe)
 	cmpl $8, %edx
 	/* Less than 8 bytes? Go to byte copy loop */
 	jb .L_no_whole_words
@@ -204,58 +204,29 @@
 	subl $8, %ecx
 	negl %ecx
 	subl %ecx, %edx
-.L_copy_leading_bytes:
+.L_read_leading_bytes:
 	movb (%rsi), %al
+.L_write_leading_bytes:
 	movb %al, (%rdi)
 	incq %rsi
 	incq %rdi
 	decl %ecx
-	jnz .L_copy_leading_bytes
+	jnz .L_read_leading_bytes
 
 .L_8byte_aligned:
-	/* Figure out how many whole cache lines (64-bytes) to copy */
-	movl %edx, %ecx
-	andl $63, %edx
-	shrl $6, %ecx
-	jz .L_no_whole_cache_lines
-
-	/* Loop copying whole cache lines */
-.L_cache_w0: movq (%rsi), %r8
-.L_cache_w1: movq 1*8(%rsi), %r9
-.L_cache_w2: movq 2*8(%rsi), %r10
-.L_cache_w3: movq 3*8(%rsi), %r11
-	movq %r8, (%rdi)
-	movq %r9, 1*8(%rdi)
-	movq %r10, 2*8(%rdi)
-	movq %r11, 3*8(%rdi)
-.L_cache_w4: movq 4*8(%rsi), %r8
-.L_cache_w5: movq 5*8(%rsi), %r9
-.L_cache_w6: movq 6*8(%rsi), %r10
-.L_cache_w7: movq 7*8(%rsi), %r11
-	movq %r8, 4*8(%rdi)
-	movq %r9, 5*8(%rdi)
-	movq %r10, 6*8(%rdi)
-	movq %r11, 7*8(%rdi)
-	leaq 64(%rsi), %rsi
-	leaq 64(%rdi), %rdi
-	decl %ecx
-	jnz .L_cache_w0
-
-	/* Are there any trailing 8-byte words? */
-.L_no_whole_cache_lines:
 	movl %edx, %ecx
 	andl $7, %edx
 	shrl $3, %ecx
 	jz .L_no_whole_words
 
-	/* Copy trailing words */
-.L_copy_trailing_words:
+.L_read_words:
 	movq (%rsi), %r8
-	mov %r8, (%rdi)
-	leaq 8(%rsi), %rsi
-	leaq 8(%rdi), %rdi
+.L_write_words:
+	movq %r8, (%rdi)
+	addq $8, %rsi
+	addq $8, %rdi
 	decl %ecx
-	jnz .L_copy_trailing_words
+	jnz .L_read_words
 
 	/* Any trailing bytes? */
 .L_no_whole_words:
@@ -264,38 +235,53 @@
 
 	/* Copy trailing bytes */
 	movl %edx, %ecx
-.L_copy_trailing_bytes:
+.L_read_trailing_bytes:
 	movb (%rsi), %al
+.L_write_trailing_bytes:
 	movb %al, (%rdi)
 	incq %rsi
 	incq %rdi
 	decl %ecx
-	jnz .L_copy_trailing_bytes
+	jnz .L_read_trailing_bytes
 
 	/* Copy successful. Return zero */
 .L_done_memcpy_trap:
 	xorq %rax, %rax
 	ret
-ENDPROC(memcpy_mcsafe_unrolled)
-EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
+ENDPROC(__memcpy_mcsafe)
+EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
 
 	.section .fixup, "ax"
-	/* Return -EFAULT for any failure */
-.L_memcpy_mcsafe_fail:
-	mov	$-EFAULT, %rax
+	/*
+	 * Return number of bytes not copied for any failure. Note that
+	 * there is no "tail" handling since the source buffer is 8-byte
+	 * aligned and poison is cacheline aligned.
+	 */
+.E_read_words:
+	shll	$3, %ecx
+.E_leading_bytes:
+	addl	%edx, %ecx
+.E_trailing_bytes:
+	mov	%ecx, %eax
 	ret
 
+	/*
+	 * For write fault handling, given the destination is unaligned,
+	 * we handle faults on multi-byte writes with a byte-by-byte
+	 * copy up to the write-protected page.
+	 */
+.E_write_words:
+	shll	$3, %ecx
+	addl	%edx, %ecx
+	movl	%ecx, %edx
+	jmp mcsafe_handle_tail
+
 	.previous
 
-	_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
-	_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
+	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
+	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
+	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
+	_ASM_EXTABLE(.L_write_words, .E_write_words)
+	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
 #endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 75d3776..9c5606d 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -23,13 +23,13 @@
 	asm volatile(
 		"	testq  %[size8],%[size8]\n"
 		"	jz     4f\n"
-		"0:	movq %[zero],(%[dst])\n"
-		"	addq   %[eight],%[dst]\n"
+		"0:	movq $0,(%[dst])\n"
+		"	addq   $8,%[dst]\n"
 		"	decl %%ecx ; jnz   0b\n"
 		"4:	movq  %[size1],%%rcx\n"
 		"	testl %%ecx,%%ecx\n"
 		"	jz     2f\n"
-		"1:	movb   %b[zero],(%[dst])\n"
+		"1:	movb   $0,(%[dst])\n"
 		"	incq   %[dst]\n"
 		"	decl %%ecx ; jnz  1b\n"
 		"2:\n"
@@ -40,8 +40,7 @@
 		_ASM_EXTABLE(0b,3b)
 		_ASM_EXTABLE(1b,2b)
 		: [size8] "=&c"(size), [dst] "=&D" (__d0)
-		: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
-		  [zero] "r" (0UL), [eight] "r" (8UL));
+		: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
 	clac();
 	return size;
 }
@@ -75,6 +74,27 @@
 	return len;
 }
 
+/*
+ * Similar to copy_user_handle_tail, probe for the write fault point,
+ * but reuse __memcpy_mcsafe in case a new read error is encountered.
+ * clac() is handled in _copy_to_iter_mcsafe().
+ */
+__visible unsigned long
+mcsafe_handle_tail(char *to, char *from, unsigned len)
+{
+	for (; len; --len, to++, from++) {
+		/*
+		 * Call the assembly routine back directly since
+		 * memcpy_mcsafe() may silently fallback to memcpy.
+		 */
+		unsigned long rem = __memcpy_mcsafe(to, from, 1);
+
+		if (rem)
+			break;
+	}
+	return len;
+}
+
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 /**
  * clean_cache_range - write back a cache range with CLWB
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index cc7ff59..2f3c919 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -360,7 +360,7 @@
 				void *pt)
 {
 	if (__pa(pt) == __pa(kasan_zero_pmd) ||
-	    (pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) ||
+	    (pgtable_l5_enabled() && __pa(pt) == __pa(kasan_zero_p4d)) ||
 	    __pa(pt) == __pa(kasan_zero_pud)) {
 		pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
 		note_page(m, st, __pgprot(prot), 0, 5);
@@ -476,8 +476,8 @@
 	}
 }
 
-#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
-#define pgd_none(a)  (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
+#define pgd_large(a) (pgtable_l5_enabled() ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
+#define pgd_none(a)  (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
 
 static inline bool is_hypervisor_range(int idx)
 {
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 73bd8c9..9a84a0d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -209,6 +209,7 @@
 	unsigned lsb = 0;
 	siginfo_t info;
 
+	clear_siginfo(&info);
 	info.si_signo	= si_signo;
 	info.si_errno	= 0;
 	info.si_code	= si_code;
@@ -439,7 +440,7 @@
 	if (pgd_none(*pgd_k))
 		return -1;
 
-	if (pgtable_l5_enabled) {
+	if (pgtable_l5_enabled()) {
 		if (pgd_none(*pgd)) {
 			set_pgd(pgd, *pgd_k);
 			arch_flush_lazy_mmu_mode();
@@ -454,7 +455,7 @@
 	if (p4d_none(*p4d_k))
 		return -1;
 
-	if (p4d_none(*p4d) && !pgtable_l5_enabled) {
+	if (p4d_none(*p4d) && !pgtable_l5_enabled()) {
 		set_p4d(p4d, *p4d_k);
 		arch_flush_lazy_mmu_mode();
 	} else {
@@ -828,6 +829,8 @@
 show_signal_msg(struct pt_regs *regs, unsigned long error_code,
 		unsigned long address, struct task_struct *tsk)
 {
+	const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG;
+
 	if (!unhandled_signal(tsk, SIGSEGV))
 		return;
 
@@ -835,13 +838,14 @@
 		return;
 
 	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
-		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
-		tsk->comm, task_pid_nr(tsk), address,
+		loglvl, tsk->comm, task_pid_nr(tsk), address,
 		(void *)regs->ip, (void *)regs->sp, error_code);
 
 	print_vma_addr(KERN_CONT " in ", regs->ip);
 
 	printk(KERN_CONT "\n");
+
+	show_opcodes((u8 *)regs->ip, loglvl);
 }
 
 static void
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index a2f0c7e..fe7a125 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -123,7 +123,7 @@
 		result = ident_p4d_init(info, p4d, addr, next);
 		if (result)
 			return result;
-		if (pgtable_l5_enabled) {
+		if (pgtable_l5_enabled()) {
 			set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
 		} else {
 			/*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0a40060..17383f9 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -180,7 +180,7 @@
  */
 void sync_global_pgds(unsigned long start, unsigned long end)
 {
-	if (pgtable_l5_enabled)
+	if (pgtable_l5_enabled())
 		sync_global_pgds_l5(start, end);
 	else
 		sync_global_pgds_l4(start, end);
@@ -643,7 +643,7 @@
 	unsigned long vaddr = (unsigned long)__va(paddr);
 	int i = p4d_index(vaddr);
 
-	if (!pgtable_l5_enabled)
+	if (!pgtable_l5_enabled())
 		return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
 
 	for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
@@ -723,7 +723,7 @@
 					   page_size_mask);
 
 		spin_lock(&init_mm.page_table_lock);
-		if (pgtable_l5_enabled)
+		if (pgtable_l5_enabled())
 			pgd_populate(&init_mm, pgd, p4d);
 		else
 			p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
@@ -1100,7 +1100,7 @@
 		 * 5-level case we should free them. This code will have to change
 		 * to adapt for boot-time switching between 4 and 5 level page tables.
 		 */
-		if (pgtable_l5_enabled)
+		if (pgtable_l5_enabled())
 			free_pud_table(pud_base, p4d);
 	}
 
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 980dbeb..e3e7752 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -2,10 +2,8 @@
 #define DISABLE_BRANCH_PROFILING
 #define pr_fmt(fmt) "kasan: " fmt
 
-#ifdef CONFIG_X86_5LEVEL
-/* Too early to use cpu_feature_enabled() */
-#define pgtable_l5_enabled __pgtable_l5_enabled
-#endif
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
 
 #include <linux/bootmem.h>
 #include <linux/kasan.h>
@@ -182,7 +180,7 @@
 		 * With folded p4d, pgd_clear() is nop, use p4d_clear()
 		 * instead.
 		 */
-		if (pgtable_l5_enabled)
+		if (pgtable_l5_enabled())
 			pgd_clear(pgd);
 		else
 			p4d_clear(p4d_offset(pgd, start));
@@ -197,7 +195,7 @@
 {
 	unsigned long p4d;
 
-	if (!pgtable_l5_enabled)
+	if (!pgtable_l5_enabled())
 		return (p4d_t *)pgd;
 
 	p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
@@ -284,7 +282,7 @@
 	for (i = 0; i < PTRS_PER_PUD; i++)
 		kasan_zero_pud[i] = __pud(pud_val);
 
-	for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++)
+	for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
 		kasan_zero_p4d[i] = __p4d(p4d_val);
 
 	kasan_map_early_shadow(early_top_pgt);
@@ -315,7 +313,7 @@
 	 * bunch of things like kernel code, modules, EFI mapping, etc.
 	 * We need to take extra steps to not overwrite them.
 	 */
-	if (pgtable_l5_enabled) {
+	if (pgtable_l5_enabled()) {
 		void *ptr;
 
 		ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 615cc03..61db77b 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -78,7 +78,7 @@
 	struct rnd_state rand_state;
 	unsigned long remain_entropy;
 
-	vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
+	vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
 	vaddr = vaddr_start;
 
 	/*
@@ -124,7 +124,7 @@
 		 */
 		entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
 		prandom_bytes_state(&rand_state, &rand, sizeof(rand));
-		if (pgtable_l5_enabled)
+		if (pgtable_l5_enabled())
 			entropy = (rand % (entropy + 1)) & P4D_MASK;
 		else
 			entropy = (rand % (entropy + 1)) & PUD_MASK;
@@ -136,7 +136,7 @@
 		 * randomization alignment.
 		 */
 		vaddr += get_padding(&kaslr_regions[i]);
-		if (pgtable_l5_enabled)
+		if (pgtable_l5_enabled())
 			vaddr = round_up(vaddr + 1, P4D_SIZE);
 		else
 			vaddr = round_up(vaddr + 1, PUD_SIZE);
@@ -212,7 +212,7 @@
 		return;
 	}
 
-	if (pgtable_l5_enabled)
+	if (pgtable_l5_enabled())
 		init_trampoline_p4d();
 	else
 		init_trampoline_pud();
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 25504d5..fa15085 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -136,13 +136,13 @@
 
 	/* whine about and ignore invalid blks */
 	if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
-		pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
-			   nid, start, end - 1);
+		pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
+			nid, start, end - 1);
 		return 0;
 	}
 
 	if (mi->nr_blks >= NR_NODE_MEMBLKS) {
-		pr_err("NUMA: too many memblk ranges\n");
+		pr_err("too many memblk ranges\n");
 		return -EINVAL;
 	}
 
@@ -267,14 +267,14 @@
 			 */
 			if (bi->end > bj->start && bi->start < bj->end) {
 				if (bi->nid != bj->nid) {
-					pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
+					pr_err("node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
 					       bi->nid, bi->start, bi->end - 1,
 					       bj->nid, bj->start, bj->end - 1);
 					return -EINVAL;
 				}
-				pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
-					   bi->nid, bi->start, bi->end - 1,
-					   bj->start, bj->end - 1);
+				pr_warn("Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
+					bi->nid, bi->start, bi->end - 1,
+					bj->start, bj->end - 1);
 			}
 
 			/*
@@ -364,7 +364,7 @@
 	phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
 				      size, PAGE_SIZE);
 	if (!phys) {
-		pr_warning("NUMA: Warning: can't allocate distance table!\n");
+		pr_warn("Warning: can't allocate distance table!\n");
 		/* don't retry until explicitly reset */
 		numa_distance = (void *)1LU;
 		return -ENOMEM;
@@ -410,14 +410,14 @@
 
 	if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
 			from < 0 || to < 0) {
-		pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
-			    from, to, distance);
+		pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
+			     from, to, distance);
 		return;
 	}
 
 	if ((u8)distance != distance ||
 	    (from == to && distance != LOCAL_DISTANCE)) {
-		pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+		pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
 			     from, to, distance);
 		return;
 	}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e055d1a..6eb1f34 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -157,7 +157,7 @@
 	unsigned long sp = current_stack_pointer;
 	pgd_t *pgd = pgd_offset(mm, sp);
 
-	if (pgtable_l5_enabled) {
+	if (pgtable_l5_enabled()) {
 		if (unlikely(pgd_none(*pgd))) {
 			pgd_t *pgd_ref = pgd_offset_k(sp);
 
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index bed7e7f..e01f7ce 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -225,7 +225,7 @@
 
 	pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
 	if (!pud) {
-		if (pgtable_l5_enabled)
+		if (pgtable_l5_enabled())
 			free_page((unsigned long) pgd_page_vaddr(*pgd));
 		free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
 		return -ENOMEM;
diff --git a/arch/x86/platform/intel-mid/intel_mid_vrtc.c b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
index 5802486..a52914a 100644
--- a/arch/x86/platform/intel-mid/intel_mid_vrtc.c
+++ b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
@@ -57,7 +57,7 @@
 }
 EXPORT_SYMBOL_GPL(vrtc_cmos_write);
 
-void vrtc_get_time(struct timespec *now)
+void vrtc_get_time(struct timespec64 *now)
 {
 	u8 sec, min, hour, mday, mon;
 	unsigned long flags;
@@ -83,18 +83,18 @@
 	pr_info("vRTC: sec: %d min: %d hour: %d day: %d "
 		"mon: %d year: %d\n", sec, min, hour, mday, mon, year);
 
-	now->tv_sec = mktime(year, mon, mday, hour, min, sec);
+	now->tv_sec = mktime64(year, mon, mday, hour, min, sec);
 	now->tv_nsec = 0;
 }
 
-int vrtc_set_mmss(const struct timespec *now)
+int vrtc_set_mmss(const struct timespec64 *now)
 {
 	unsigned long flags;
 	struct rtc_time tm;
 	int year;
 	int retval = 0;
 
-	rtc_time_to_tm(now->tv_sec, &tm);
+	rtc_time64_to_tm(now->tv_sec, &tm);
 	if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) {
 		/*
 		 * tm.year is the number of years since 1900, and the
@@ -110,8 +110,8 @@
 		vrtc_cmos_write(tm.tm_sec, RTC_SECONDS);
 		spin_unlock_irqrestore(&rtc_lock, flags);
 	} else {
-		pr_err("%s: Invalid vRTC value: write of %lx to vRTC failed\n",
-			__func__, now->tv_sec);
+		pr_err("%s: Invalid vRTC value: write of %llx to vRTC failed\n",
+			__func__, (s64)now->tv_sec);
 		retval = -EINVAL;
 	}
 	return retval;
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index ccf4a49..67ccf64 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -72,7 +72,7 @@
 	 * tables used by the image kernel.
 	 */
 
-	if (pgtable_l5_enabled) {
+	if (pgtable_l5_enabled()) {
 		p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
 		if (!p4d)
 			return -ENOMEM;
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 13ed827..9d529f2 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -1,5 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
-mainmenu "User Mode Linux/$SUBARCH $KERNELVERSION Kernel Configuration"
+mainmenu "User Mode Linux/$(SUBARCH) $(KERNELVERSION) Kernel Configuration"
+
+comment "Compiler: $(CC_VERSION_TEXT)"
+
+source "scripts/Kconfig.include"
 
 source "arch/um/Kconfig.common"
 
@@ -16,8 +20,8 @@
 	select GENERIC_FIND_FIRST_BIT
 
 config 64BIT
-	bool "64-bit kernel" if SUBARCH = "x86"
-	default SUBARCH != "i386"
+	bool "64-bit kernel" if "$(SUBARCH)" = "x86"
+	default "$(SUBARCH)" != "i386"
 
 config X86_32
 	def_bool !64BIT
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index 1000335..b2d6967 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -23,14 +23,14 @@
 
 targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
 
-export CPPFLAGS_vdso.lds += -P -C
+CPPFLAGS_vdso.lds += -P -C
 
 VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
        -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
 
 $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
 
-$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
+$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,vdso)
 
 $(obj)/%.so: OBJCOPYFLAGS := -S
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index a18703b..1804b27 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -115,6 +115,61 @@
 	return &efi_systab_xen;
 }
 
+/*
+ * Determine whether we're in secure boot mode.
+ *
+ * Please keep the logic in sync with
+ * drivers/firmware/efi/libstub/secureboot.c:efi_get_secureboot().
+ */
+static enum efi_secureboot_mode xen_efi_get_secureboot(void)
+{
+	static efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID;
+	static efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID;
+	efi_status_t status;
+	u8 moksbstate, secboot, setupmode;
+	unsigned long size;
+
+	size = sizeof(secboot);
+	status = efi.get_variable(L"SecureBoot", &efi_variable_guid,
+				  NULL, &size, &secboot);
+
+	if (status == EFI_NOT_FOUND)
+		return efi_secureboot_mode_disabled;
+
+	if (status != EFI_SUCCESS)
+		goto out_efi_err;
+
+	size = sizeof(setupmode);
+	status = efi.get_variable(L"SetupMode", &efi_variable_guid,
+				  NULL, &size, &setupmode);
+
+	if (status != EFI_SUCCESS)
+		goto out_efi_err;
+
+	if (secboot == 0 || setupmode == 1)
+		return efi_secureboot_mode_disabled;
+
+	/* See if a user has put the shim into insecure mode. */
+	size = sizeof(moksbstate);
+	status = efi.get_variable(L"MokSBStateRT", &shim_guid,
+				  NULL, &size, &moksbstate);
+
+	/* If it fails, we don't care why. Default to secure. */
+	if (status != EFI_SUCCESS)
+		goto secure_boot_enabled;
+
+	if (moksbstate == 1)
+		return efi_secureboot_mode_disabled;
+
+ secure_boot_enabled:
+	pr_info("UEFI Secure Boot is enabled.\n");
+	return efi_secureboot_mode_enabled;
+
+ out_efi_err:
+	pr_err("Could not determine UEFI Secure Boot status.\n");
+	return efi_secureboot_mode_unknown;
+}
+
 void __init xen_efi_init(void)
 {
 	efi_system_table_t *efi_systab_xen;
@@ -129,6 +184,8 @@
 	boot_params.efi_info.efi_systab = (__u32)__pa(efi_systab_xen);
 	boot_params.efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32);
 
+	boot_params.secure_boot = xen_efi_get_secureboot();
+
 	set_bit(EFI_BOOT, &efi.flags);
 	set_bit(EFI_PARAVIRT, &efi.flags);
 	set_bit(EFI_64BIT, &efi.flags);
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 29163c4..e0f1bcf 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -57,7 +57,7 @@
 	return xen_clocksource_read();
 }
 
-static void xen_read_wallclock(struct timespec *ts)
+static void xen_read_wallclock(struct timespec64 *ts)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
 	struct pvclock_wall_clock *wall_clock = &(s->wc);
@@ -68,12 +68,12 @@
 	put_cpu_var(xen_vcpu);
 }
 
-static void xen_get_wallclock(struct timespec *now)
+static void xen_get_wallclock(struct timespec64 *now)
 {
 	xen_read_wallclock(now);
 }
 
-static int xen_set_wallclock(const struct timespec *now)
+static int xen_set_wallclock(const struct timespec64 *now)
 {
 	return -ENODEV;
 }
@@ -461,7 +461,7 @@
 {
 	struct pvclock_vcpu_time_info *pvti;
 	int cpu = smp_processor_id();
-	struct timespec tp;
+	struct timespec64 tp;
 
 	/* As Dom0 is never moved, no penalty on using TSC there */
 	if (xen_initial_domain())
@@ -479,7 +479,7 @@
 
 	/* Set initial system time with full resolution */
 	xen_read_wallclock(&tp);
-	do_settimeofday(&tp);
+	do_settimeofday64(&tp);
 
 	setup_force_cpu_cap(X86_FEATURE_TSC);
 
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index c921e8b..17df3322 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -19,7 +19,6 @@
 	select HAVE_ARCH_KASAN if MMU
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_EXIT_THREAD
 	select HAVE_FUNCTION_TRACER
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 436b203..e5e1e61 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -1,4 +1,5 @@
 generic-y += bug.h
+generic-y += compat.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += dma-contiguous.h
diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h
index d5a8215..6ddf0a3 100644
--- a/arch/xtensa/include/asm/pci.h
+++ b/arch/xtensa/include/asm/pci.h
@@ -42,8 +42,6 @@
  * decisions.
  */
 
-#define PCI_DMA_BUS_IS_PHYS	(1)
-
 /* Tell PCI code what kind of PCI resource mappings we support */
 #define HAVE_PCI_MMAP			1
 #define ARCH_GENERIC_PCI_MMAP_RESOURCE	1
diff --git a/arch/xtensa/include/uapi/asm/msgbuf.h b/arch/xtensa/include/uapi/asm/msgbuf.h
index 36e2e10..d6915e9 100644
--- a/arch/xtensa/include/uapi/asm/msgbuf.h
+++ b/arch/xtensa/include/uapi/asm/msgbuf.h
@@ -7,7 +7,6 @@
  * between kernel and user space.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
  *
  * This file is subject to the terms and conditions of the GNU General
@@ -21,19 +20,19 @@
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
 #ifdef __XTENSA_EB__
-	unsigned int	__unused1;
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	unsigned int	__unused2;
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	unsigned int	__unused3;
-	__kernel_time_t msg_ctime;	/* last change time */
+	unsigned long  msg_stime_high;
+	unsigned long  msg_stime;	/* last msgsnd time */
+	unsigned long  msg_rtime_high;
+	unsigned long  msg_rtime;	/* last msgrcv time */
+	unsigned long  msg_ctime_high;
+	unsigned long  msg_ctime;	/* last change time */
 #elif defined(__XTENSA_EL__)
-	__kernel_time_t msg_stime;	/* last msgsnd time */
-	unsigned int	__unused1;
-	__kernel_time_t msg_rtime;	/* last msgrcv time */
-	unsigned int	__unused2;
-	__kernel_time_t msg_ctime;	/* last change time */
-	unsigned int	__unused3;
+	unsigned long  msg_stime;	/* last msgsnd time */
+	unsigned long  msg_stime_high;
+	unsigned long  msg_rtime;	/* last msgrcv time */
+	unsigned long  msg_rtime_high;
+	unsigned long  msg_ctime;	/* last change time */
+	unsigned long  msg_ctime_high;
 #else
 # error processor byte order undefined!
 #endif
diff --git a/arch/xtensa/include/uapi/asm/sembuf.h b/arch/xtensa/include/uapi/asm/sembuf.h
index f61b633..09f348d 100644
--- a/arch/xtensa/include/uapi/asm/sembuf.h
+++ b/arch/xtensa/include/uapi/asm/sembuf.h
@@ -14,7 +14,6 @@
  * between kernel and user space.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
  *
  */
@@ -27,15 +26,15 @@
 struct semid64_ds {
 	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
 #ifdef __XTENSA_EL__
-	__kernel_time_t	sem_otime;		/* last semop time */
-	unsigned long	__unused1;
-	__kernel_time_t	sem_ctime;		/* last change time */
-	unsigned long	__unused2;
+	unsigned long	sem_otime;		/* last semop time */
+	unsigned long	sem_otime_high;
+	unsigned long	sem_ctime;		/* last change time */
+	unsigned long	sem_ctime_high;
 #else
-	unsigned long	__unused1;
-	__kernel_time_t	sem_otime;		/* last semop time */
-	unsigned long	__unused2;
-	__kernel_time_t	sem_ctime;		/* last change time */
+	unsigned long	sem_otime_high;
+	unsigned long	sem_otime;		/* last semop time */
+	unsigned long	sem_ctime_high;
+	unsigned long	sem_ctime;		/* last change time */
 #endif
 	unsigned long	sem_nsems;		/* no. of semaphores in array */
 	unsigned long	__unused3;
diff --git a/arch/xtensa/include/uapi/asm/shmbuf.h b/arch/xtensa/include/uapi/asm/shmbuf.h
index 26550bd..554a57a 100644
--- a/arch/xtensa/include/uapi/asm/shmbuf.h
+++ b/arch/xtensa/include/uapi/asm/shmbuf.h
@@ -4,10 +4,10 @@
  *
  * The shmid64_ds structure for Xtensa architecture.
  * Note extra padding because this structure is passed back and forth
- * between kernel and user space.
+ * between kernel and user space, but the padding is on the wrong
+ * side for big-endian xtensa, for historic reasons.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -20,42 +20,21 @@
 #ifndef _XTENSA_SHMBUF_H
 #define _XTENSA_SHMBUF_H
 
-#if defined (__XTENSA_EL__)
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
 	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_time_t		shm_atime;	/* last attach time */
-	unsigned long		__unused1;
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	unsigned long		__unused2;
-	__kernel_time_t		shm_ctime;	/* last change time */
-	unsigned long		__unused3;
+	unsigned long		shm_atime;	/* last attach time */
+	unsigned long		shm_atime_high;
+	unsigned long		shm_dtime;	/* last detach time */
+	unsigned long		shm_dtime_high;
+	unsigned long		shm_ctime;	/* last change time */
+	unsigned long		shm_ctime_high;
 	__kernel_pid_t		shm_cpid;	/* pid of creator */
 	__kernel_pid_t		shm_lpid;	/* pid of last operator */
 	unsigned long		shm_nattch;	/* no. of current attaches */
 	unsigned long		__unused4;
 	unsigned long		__unused5;
 };
-#elif defined (__XTENSA_EB__)
-struct shmid64_ds {
-	struct ipc64_perm	shm_perm;	/* operation perms */
-	size_t			shm_segsz;	/* size of segment (bytes) */
-	__kernel_time_t		shm_atime;	/* last attach time */
-	unsigned long		__unused1;
-	__kernel_time_t		shm_dtime;	/* last detach time */
-	unsigned long		__unused2;
-	__kernel_time_t		shm_ctime;	/* last change time */
-	unsigned long		__unused3;
-	__kernel_pid_t		shm_cpid;	/* pid of creator */
-	__kernel_pid_t		shm_lpid;	/* pid of last operator */
-	unsigned long		shm_nattch;	/* no. of current attaches */
-	unsigned long		__unused4;
-	unsigned long		__unused5;
-};
-#else
-# error endian order not defined
-#endif
-
 
 struct shminfo64 {
 	unsigned long	shmmax;
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 732631c..392b4a8 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -261,12 +261,3 @@
 	.mapping_error = xtensa_dma_mapping_error,
 };
 EXPORT_SYMBOL(xtensa_dma_map_ops);
-
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-static int __init xtensa_dma_init(void)
-{
-	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-	return 0;
-}
-fs_initcall(xtensa_dma_init);
diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index 32c5207..86507fa 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -323,8 +323,6 @@
 void
 do_unaligned_user (struct pt_regs *regs)
 {
-	siginfo_t info;
-
 	__die_if_kernel("Unhandled unaligned exception in kernel",
 			regs, SIGKILL);
 
@@ -334,12 +332,7 @@
 			    "(pid = %d, pc = %#010lx)\n",
 			    regs->excvaddr, current->comm,
 			    task_pid_nr(current), regs->pc);
-	info.si_signo = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRALN;
-	info.si_addr = (void *) regs->excvaddr;
-	force_sig_info(SIGSEGV, &info, current);
-
+	force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr, current);
 }
 #endif
 
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 8b9b6f4..c111a83 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -39,13 +39,13 @@
 	struct mm_struct *mm = current->mm;
 	unsigned int exccause = regs->exccause;
 	unsigned int address = regs->excvaddr;
-	siginfo_t info;
+	int code;
 
 	int is_write, is_exec;
 	int fault;
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
-	info.si_code = SEGV_MAPERR;
+	code = SEGV_MAPERR;
 
 	/* We fault-in kernel-space virtual memory on-demand. The
 	 * 'reference' page table is init_mm.pgd.
@@ -91,7 +91,7 @@
 	 */
 
 good_area:
-	info.si_code = SEGV_ACCERR;
+	code = SEGV_ACCERR;
 
 	if (is_write) {
 		if (!(vma->vm_flags & VM_WRITE))
@@ -157,11 +157,7 @@
 	if (user_mode(regs)) {
 		current->thread.bad_vaddr = address;
 		current->thread.error_code = is_write;
-		info.si_signo = SIGSEGV;
-		info.si_errno = 0;
-		/* info.si_code has been set above */
-		info.si_addr = (void *) address;
-		force_sig_info(SIGSEGV, &info, current);
+		force_sig_fault(SIGSEGV, code, (void *) address, current);
 		return;
 	}
 	bad_page_fault(regs, address, SIGSEGV);
@@ -186,11 +182,7 @@
 	 * or user mode.
 	 */
 	current->thread.bad_vaddr = address;
-	info.si_code = SIGBUS;
-	info.si_errno = 0;
-	info.si_code = BUS_ADRERR;
-	info.si_addr = (void *) address;
-	force_sig_info(SIGBUS, &info, current);
+	force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address, current);
 
 	/* Kernel mode? Handle exceptions or die */
 	if (!user_mode(regs))
diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c
index 92f567f..af81a62 100644
--- a/arch/xtensa/platforms/iss/console.c
+++ b/arch/xtensa/platforms/iss/console.c
@@ -153,19 +153,6 @@
 	return 0;
 }
 
-static int rs_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, rs_proc_show, NULL);
-}
-
-static const struct file_operations rs_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= rs_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static const struct tty_operations serial_ops = {
 	.open = rs_open,
 	.close = rs_close,
@@ -176,7 +163,7 @@
 	.chars_in_buffer = rs_chars_in_buffer,
 	.hangup = rs_hangup,
 	.wait_until_sent = rs_wait_until_sent,
-	.proc_fops = &rs_proc_fops,
+	.proc_show = rs_proc_show,
 };
 
 int __init rs_init(void)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index d819dc7..a9e8633 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -55,13 +55,13 @@
 /* This should be called with the scheduler lock held. */
 static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats)
 {
-	unsigned long long now;
+	u64 now;
 
 	if (!bfqg_stats_waiting(stats))
 		return;
 
-	now = sched_clock();
-	if (time_after64(now, stats->start_group_wait_time))
+	now = ktime_get_ns();
+	if (now > stats->start_group_wait_time)
 		blkg_stat_add(&stats->group_wait_time,
 			      now - stats->start_group_wait_time);
 	bfqg_stats_clear_waiting(stats);
@@ -77,20 +77,20 @@
 		return;
 	if (bfqg == curr_bfqg)
 		return;
-	stats->start_group_wait_time = sched_clock();
+	stats->start_group_wait_time = ktime_get_ns();
 	bfqg_stats_mark_waiting(stats);
 }
 
 /* This should be called with the scheduler lock held. */
 static void bfqg_stats_end_empty_time(struct bfqg_stats *stats)
 {
-	unsigned long long now;
+	u64 now;
 
 	if (!bfqg_stats_empty(stats))
 		return;
 
-	now = sched_clock();
-	if (time_after64(now, stats->start_empty_time))
+	now = ktime_get_ns();
+	if (now > stats->start_empty_time)
 		blkg_stat_add(&stats->empty_time,
 			      now - stats->start_empty_time);
 	bfqg_stats_clear_empty(stats);
@@ -116,7 +116,7 @@
 	if (bfqg_stats_empty(stats))
 		return;
 
-	stats->start_empty_time = sched_clock();
+	stats->start_empty_time = ktime_get_ns();
 	bfqg_stats_mark_empty(stats);
 }
 
@@ -125,9 +125,9 @@
 	struct bfqg_stats *stats = &bfqg->stats;
 
 	if (bfqg_stats_idling(stats)) {
-		unsigned long long now = sched_clock();
+		u64 now = ktime_get_ns();
 
-		if (time_after64(now, stats->start_idle_time))
+		if (now > stats->start_idle_time)
 			blkg_stat_add(&stats->idle_time,
 				      now - stats->start_idle_time);
 		bfqg_stats_clear_idling(stats);
@@ -138,7 +138,7 @@
 {
 	struct bfqg_stats *stats = &bfqg->stats;
 
-	stats->start_idle_time = sched_clock();
+	stats->start_idle_time = ktime_get_ns();
 	bfqg_stats_mark_idling(stats);
 }
 
@@ -171,18 +171,18 @@
 	blkg_rwstat_add(&bfqg->stats.merged, op, 1);
 }
 
-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
-				  uint64_t io_start_time, unsigned int op)
+void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
+				  u64 io_start_time_ns, unsigned int op)
 {
 	struct bfqg_stats *stats = &bfqg->stats;
-	unsigned long long now = sched_clock();
+	u64 now = ktime_get_ns();
 
-	if (time_after64(now, io_start_time))
+	if (now > io_start_time_ns)
 		blkg_rwstat_add(&stats->service_time, op,
-				now - io_start_time);
-	if (time_after64(io_start_time, start_time))
+				now - io_start_time_ns);
+	if (io_start_time_ns > start_time_ns)
 		blkg_rwstat_add(&stats->wait_time, op,
-				io_start_time - start_time);
+				io_start_time_ns - start_time_ns);
 }
 
 #else /* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
@@ -191,8 +191,8 @@
 			      unsigned int op) { }
 void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { }
 void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { }
-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
-				  uint64_t io_start_time, unsigned int op) { }
+void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
+				  u64 io_start_time_ns, unsigned int op) { }
 void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { }
 void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { }
 void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { }
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 771ae97..495b9dd 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -49,9 +49,39 @@
  *
  * In particular, to provide these low-latency guarantees, BFQ
  * explicitly privileges the I/O of two classes of time-sensitive
- * applications: interactive and soft real-time. This feature enables
- * BFQ to provide applications in these classes with a very low
- * latency. Finally, BFQ also features additional heuristics for
+ * applications: interactive and soft real-time. In more detail, BFQ
+ * behaves this way if the low_latency parameter is set (default
+ * configuration). This feature enables BFQ to provide applications in
+ * these classes with a very low latency.
+ *
+ * To implement this feature, BFQ constantly tries to detect whether
+ * the I/O requests in a bfq_queue come from an interactive or a soft
+ * real-time application. For brevity, in these cases, the queue is
+ * said to be interactive or soft real-time. In both cases, BFQ
+ * privileges the service of the queue, over that of non-interactive
+ * and non-soft-real-time queues. This privileging is performed,
+ * mainly, by raising the weight of the queue. So, for brevity, we
+ * call just weight-raising periods the time periods during which a
+ * queue is privileged, because deemed interactive or soft real-time.
+ *
+ * The detection of soft real-time queues/applications is described in
+ * detail in the comments on the function
+ * bfq_bfqq_softrt_next_start. On the other hand, the detection of an
+ * interactive queue works as follows: a queue is deemed interactive
+ * if it is constantly non empty only for a limited time interval,
+ * after which it does become empty. The queue may be deemed
+ * interactive again (for a limited time), if it restarts being
+ * constantly non empty, provided that this happens only after the
+ * queue has remained empty for a given minimum idle time.
+ *
+ * By default, BFQ computes automatically the above maximum time
+ * interval, i.e., the time interval after which a constantly
+ * non-empty queue stops being deemed interactive. Since a queue is
+ * weight-raised while it is deemed interactive, this maximum time
+ * interval happens to coincide with the (maximum) duration of the
+ * weight-raising for interactive queues.
+ *
+ * Finally, BFQ also features additional heuristics for
  * preserving both a low latency and a high throughput on NCQ-capable,
  * rotational or flash-based devices, and to get the job done quickly
  * for applications consisting in many I/O-bound processes.
@@ -61,14 +91,14 @@
  * all low-latency heuristics for that device, by setting low_latency
  * to 0.
  *
- * BFQ is described in [1], where also a reference to the initial, more
- * theoretical paper on BFQ can be found. The interested reader can find
- * in the latter paper full details on the main algorithm, as well as
- * formulas of the guarantees and formal proofs of all the properties.
- * With respect to the version of BFQ presented in these papers, this
- * implementation adds a few more heuristics, such as the one that
- * guarantees a low latency to soft real-time applications, and a
- * hierarchical extension based on H-WF2Q+.
+ * BFQ is described in [1], where also a reference to the initial,
+ * more theoretical paper on BFQ can be found. The interested reader
+ * can find in the latter paper full details on the main algorithm, as
+ * well as formulas of the guarantees and formal proofs of all the
+ * properties.  With respect to the version of BFQ presented in these
+ * papers, this implementation adds a few more heuristics, such as the
+ * ones that guarantee a low latency to interactive and soft real-time
+ * applications, and a hierarchical extension based on H-WF2Q+.
  *
  * B-WF2Q+ is based on WF2Q+, which is described in [2], together with
  * H-WF2Q+, while the augmented tree used here to implement B-WF2Q+
@@ -218,56 +248,46 @@
 #define BFQ_RATE_SHIFT		16
 
 /*
- * By default, BFQ computes the duration of the weight raising for
- * interactive applications automatically, using the following formula:
- * duration = (R / r) * T, where r is the peak rate of the device, and
- * R and T are two reference parameters.
- * In particular, R is the peak rate of the reference device (see
- * below), and T is a reference time: given the systems that are
- * likely to be installed on the reference device according to its
- * speed class, T is about the maximum time needed, under BFQ and
- * while reading two files in parallel, to load typical large
- * applications on these systems (see the comments on
- * max_service_from_wr below, for more details on how T is obtained).
- * In practice, the slower/faster the device at hand is, the more/less
- * it takes to load applications with respect to the reference device.
- * Accordingly, the longer/shorter BFQ grants weight raising to
- * interactive applications.
+ * When configured for computing the duration of the weight-raising
+ * for interactive queues automatically (see the comments at the
+ * beginning of this file), BFQ does it using the following formula:
+ * duration = (ref_rate / r) * ref_wr_duration,
+ * where r is the peak rate of the device, and ref_rate and
+ * ref_wr_duration are two reference parameters.  In particular,
+ * ref_rate is the peak rate of the reference storage device (see
+ * below), and ref_wr_duration is about the maximum time needed, with
+ * BFQ and while reading two files in parallel, to load typical large
+ * applications on the reference device (see the comments on
+ * max_service_from_wr below, for more details on how ref_wr_duration
+ * is obtained).  In practice, the slower/faster the device at hand
+ * is, the more/less it takes to load applications with respect to the
+ * reference device.  Accordingly, the longer/shorter BFQ grants
+ * weight raising to interactive applications.
  *
- * BFQ uses four different reference pairs (R, T), depending on:
- * . whether the device is rotational or non-rotational;
- * . whether the device is slow, such as old or portable HDDs, as well as
- *   SD cards, or fast, such as newer HDDs and SSDs.
+ * BFQ uses two different reference pairs (ref_rate, ref_wr_duration),
+ * depending on whether the device is rotational or non-rotational.
  *
- * The device's speed class is dynamically (re)detected in
- * bfq_update_peak_rate() every time the estimated peak rate is updated.
+ * In the following definitions, ref_rate[0] and ref_wr_duration[0]
+ * are the reference values for a rotational device, whereas
+ * ref_rate[1] and ref_wr_duration[1] are the reference values for a
+ * non-rotational device. The reference rates are not the actual peak
+ * rates of the devices used as a reference, but slightly lower
+ * values. The reason for using slightly lower values is that the
+ * peak-rate estimator tends to yield slightly lower values than the
+ * actual peak rate (it can yield the actual peak rate only if there
+ * is only one process doing I/O, and the process does sequential
+ * I/O).
  *
- * In the following definitions, R_slow[0]/R_fast[0] and
- * T_slow[0]/T_fast[0] are the reference values for a slow/fast
- * rotational device, whereas R_slow[1]/R_fast[1] and
- * T_slow[1]/T_fast[1] are the reference values for a slow/fast
- * non-rotational device. Finally, device_speed_thresh are the
- * thresholds used to switch between speed classes. The reference
- * rates are not the actual peak rates of the devices used as a
- * reference, but slightly lower values. The reason for using these
- * slightly lower values is that the peak-rate estimator tends to
- * yield slightly lower values than the actual peak rate (it can yield
- * the actual peak rate only if there is only one process doing I/O,
- * and the process does sequential I/O).
- *
- * Both the reference peak rates and the thresholds are measured in
- * sectors/usec, left-shifted by BFQ_RATE_SHIFT.
+ * The reference peak rates are measured in sectors/usec, left-shifted
+ * by BFQ_RATE_SHIFT.
  */
-static int R_slow[2] = {1000, 10700};
-static int R_fast[2] = {14000, 33000};
+static int ref_rate[2] = {14000, 33000};
 /*
- * To improve readability, a conversion function is used to initialize the
- * following arrays, which entails that they can be initialized only in a
- * function.
+ * To improve readability, a conversion function is used to initialize
+ * the following array, which entails that the array can be
+ * initialized only in a function.
  */
-static int T_slow[2];
-static int T_fast[2];
-static int device_speed_thresh[2];
+static int ref_wr_duration[2];
 
 /*
  * BFQ uses the above-detailed, time-based weight-raising mechanism to
@@ -487,46 +507,6 @@
 }
 
 /*
- * See the comments on bfq_limit_depth for the purpose of
- * the depths set in the function.
- */
-static void bfq_update_depths(struct bfq_data *bfqd, struct sbitmap_queue *bt)
-{
-	bfqd->sb_shift = bt->sb.shift;
-
-	/*
-	 * In-word depths if no bfq_queue is being weight-raised:
-	 * leaving 25% of tags only for sync reads.
-	 *
-	 * In next formulas, right-shift the value
-	 * (1U<<bfqd->sb_shift), instead of computing directly
-	 * (1U<<(bfqd->sb_shift - something)), to be robust against
-	 * any possible value of bfqd->sb_shift, without having to
-	 * limit 'something'.
-	 */
-	/* no more than 50% of tags for async I/O */
-	bfqd->word_depths[0][0] = max((1U<<bfqd->sb_shift)>>1, 1U);
-	/*
-	 * no more than 75% of tags for sync writes (25% extra tags
-	 * w.r.t. async I/O, to prevent async I/O from starving sync
-	 * writes)
-	 */
-	bfqd->word_depths[0][1] = max(((1U<<bfqd->sb_shift) * 3)>>2, 1U);
-
-	/*
-	 * In-word depths in case some bfq_queue is being weight-
-	 * raised: leaving ~63% of tags for sync reads. This is the
-	 * highest percentage for which, in our tests, application
-	 * start-up times didn't suffer from any regression due to tag
-	 * shortage.
-	 */
-	/* no more than ~18% of tags for async I/O */
-	bfqd->word_depths[1][0] = max(((1U<<bfqd->sb_shift) * 3)>>4, 1U);
-	/* no more than ~37% of tags for sync writes (~20% extra tags) */
-	bfqd->word_depths[1][1] = max(((1U<<bfqd->sb_shift) * 6)>>4, 1U);
-}
-
-/*
  * Async I/O can easily starve sync I/O (both sync reads and sync
  * writes), by consuming all tags. Similarly, storms of sync writes,
  * such as those that sync(2) may trigger, can starve sync reads.
@@ -535,25 +515,11 @@
  */
 static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
 {
-	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
 	struct bfq_data *bfqd = data->q->elevator->elevator_data;
-	struct sbitmap_queue *bt;
 
 	if (op_is_sync(op) && !op_is_write(op))
 		return;
 
-	if (data->flags & BLK_MQ_REQ_RESERVED) {
-		if (unlikely(!tags->nr_reserved_tags)) {
-			WARN_ON_ONCE(1);
-			return;
-		}
-		bt = &tags->breserved_tags;
-	} else
-		bt = &tags->bitmap_tags;
-
-	if (unlikely(bfqd->sb_shift != bt->sb.shift))
-		bfq_update_depths(bfqd, bt);
-
 	data->shallow_depth =
 		bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)];
 
@@ -906,26 +872,30 @@
 	if (bfqd->bfq_wr_max_time > 0)
 		return bfqd->bfq_wr_max_time;
 
-	dur = bfqd->RT_prod;
+	dur = bfqd->rate_dur_prod;
 	do_div(dur, bfqd->peak_rate);
 
 	/*
-	 * Limit duration between 3 and 13 seconds. Tests show that
-	 * higher values than 13 seconds often yield the opposite of
-	 * the desired result, i.e., worsen responsiveness by letting
-	 * non-interactive and non-soft-real-time applications
-	 * preserve weight raising for a too long time interval.
+	 * Limit duration between 3 and 25 seconds. The upper limit
+	 * has been conservatively set after the following worst case:
+	 * on a QEMU/KVM virtual machine
+	 * - running in a slow PC
+	 * - with a virtual disk stacked on a slow low-end 5400rpm HDD
+	 * - serving a heavy I/O workload, such as the sequential reading
+	 *   of several files
+	 * mplayer took 23 seconds to start, if constantly weight-raised.
+	 *
+	 * As for higher values than that accomodating the above bad
+	 * scenario, tests show that higher values would often yield
+	 * the opposite of the desired result, i.e., would worsen
+	 * responsiveness by allowing non-interactive applications to
+	 * preserve weight raising for too long.
 	 *
 	 * On the other end, lower values than 3 seconds make it
 	 * difficult for most interactive tasks to complete their jobs
 	 * before weight-raising finishes.
 	 */
-	if (dur > msecs_to_jiffies(13000))
-		dur = msecs_to_jiffies(13000);
-	else if (dur < msecs_to_jiffies(3000))
-		dur = msecs_to_jiffies(3000);
-
-	return dur;
+	return clamp_val(dur, msecs_to_jiffies(3000), msecs_to_jiffies(25000));
 }
 
 /* switch back from soft real-time to interactive weight raising */
@@ -1393,15 +1363,6 @@
 }
 
 /*
- * Return the farthest future time instant according to jiffies
- * macros.
- */
-static unsigned long bfq_greatest_from_now(void)
-{
-	return jiffies + MAX_JIFFY_OFFSET;
-}
-
-/*
  * Return the farthest past time instant according to jiffies
  * macros.
  */
@@ -1545,7 +1506,8 @@
 	in_burst = bfq_bfqq_in_large_burst(bfqq);
 	soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 &&
 		!in_burst &&
-		time_is_before_jiffies(bfqq->soft_rt_next_start);
+		time_is_before_jiffies(bfqq->soft_rt_next_start) &&
+		bfqq->dispatched == 0;
 	*interactive = !in_burst && idle_for_long_time;
 	wr_or_deserves_wr = bfqd->low_latency &&
 		(bfqq->wr_coeff > 1 ||
@@ -1858,6 +1820,8 @@
 	return ELEVATOR_NO_MERGE;
 }
 
+static struct bfq_queue *bfq_init_rq(struct request *rq);
+
 static void bfq_request_merged(struct request_queue *q, struct request *req,
 			       enum elv_merge type)
 {
@@ -1866,7 +1830,7 @@
 	    blk_rq_pos(req) <
 	    blk_rq_pos(container_of(rb_prev(&req->rb_node),
 				    struct request, rb_node))) {
-		struct bfq_queue *bfqq = RQ_BFQQ(req);
+		struct bfq_queue *bfqq = bfq_init_rq(req);
 		struct bfq_data *bfqd = bfqq->bfqd;
 		struct request *prev, *next_rq;
 
@@ -1891,14 +1855,25 @@
 	}
 }
 
+/*
+ * This function is called to notify the scheduler that the requests
+ * rq and 'next' have been merged, with 'next' going away.  BFQ
+ * exploits this hook to address the following issue: if 'next' has a
+ * fifo_time lower that rq, then the fifo_time of rq must be set to
+ * the value of 'next', to not forget the greater age of 'next'.
+ *
+ * NOTE: in this function we assume that rq is in a bfq_queue, basing
+ * on that rq is picked from the hash table q->elevator->hash, which,
+ * in its turn, is filled only with I/O requests present in
+ * bfq_queues, while BFQ is in use for the request queue q. In fact,
+ * the function that fills this hash table (elv_rqhash_add) is called
+ * only by bfq_insert_request.
+ */
 static void bfq_requests_merged(struct request_queue *q, struct request *rq,
 				struct request *next)
 {
-	struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next);
-
-	if (!RB_EMPTY_NODE(&rq->rb_node))
-		goto end;
-	spin_lock_irq(&bfqq->bfqd->lock);
+	struct bfq_queue *bfqq = bfq_init_rq(rq),
+		*next_bfqq = bfq_init_rq(next);
 
 	/*
 	 * If next and rq belong to the same bfq_queue and next is older
@@ -1920,11 +1895,6 @@
 	if (bfqq->next_rq == next)
 		bfqq->next_rq = rq;
 
-	bfq_remove_request(q, next);
-	bfqg_stats_update_io_remove(bfqq_group(bfqq), next->cmd_flags);
-
-	spin_unlock_irq(&bfqq->bfqd->lock);
-end:
 	bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags);
 }
 
@@ -2506,37 +2476,15 @@
 /*
  * Update parameters related to throughput and responsiveness, as a
  * function of the estimated peak rate. See comments on
- * bfq_calc_max_budget(), and on T_slow and T_fast arrays.
+ * bfq_calc_max_budget(), and on the ref_wr_duration array.
  */
 static void update_thr_responsiveness_params(struct bfq_data *bfqd)
 {
-	int dev_type = blk_queue_nonrot(bfqd->queue);
-
-	if (bfqd->bfq_user_max_budget == 0)
+	if (bfqd->bfq_user_max_budget == 0) {
 		bfqd->bfq_max_budget =
 			bfq_calc_max_budget(bfqd);
-
-	if (bfqd->device_speed == BFQ_BFQD_FAST &&
-	    bfqd->peak_rate < device_speed_thresh[dev_type]) {
-		bfqd->device_speed = BFQ_BFQD_SLOW;
-		bfqd->RT_prod = R_slow[dev_type] *
-			T_slow[dev_type];
-	} else if (bfqd->device_speed == BFQ_BFQD_SLOW &&
-		   bfqd->peak_rate > device_speed_thresh[dev_type]) {
-		bfqd->device_speed = BFQ_BFQD_FAST;
-		bfqd->RT_prod = R_fast[dev_type] *
-			T_fast[dev_type];
+		bfq_log(bfqd, "new max_budget = %d", bfqd->bfq_max_budget);
 	}
-
-	bfq_log(bfqd,
-"dev_type %s dev_speed_class = %s (%llu sects/sec), thresh %llu setcs/sec",
-		dev_type == 0 ? "ROT" : "NONROT",
-		bfqd->device_speed == BFQ_BFQD_FAST ? "FAST" : "SLOW",
-		bfqd->device_speed == BFQ_BFQD_FAST ?
-		(USEC_PER_SEC*(u64)R_fast[dev_type])>>BFQ_RATE_SHIFT :
-		(USEC_PER_SEC*(u64)R_slow[dev_type])>>BFQ_RATE_SHIFT,
-		(USEC_PER_SEC*(u64)device_speed_thresh[dev_type])>>
-		BFQ_RATE_SHIFT);
 }
 
 static void bfq_reset_rate_computation(struct bfq_data *bfqd,
@@ -3266,23 +3214,6 @@
 				bfq_bfqq_softrt_next_start(bfqd, bfqq);
 		else {
 			/*
-			 * The application is still waiting for the
-			 * completion of one or more requests:
-			 * prevent it from possibly being incorrectly
-			 * deemed as soft real-time by setting its
-			 * soft_rt_next_start to infinity. In fact,
-			 * without this assignment, the application
-			 * would be incorrectly deemed as soft
-			 * real-time if:
-			 * 1) it issued a new request before the
-			 *    completion of all its in-flight
-			 *    requests, and
-			 * 2) at that time, its soft_rt_next_start
-			 *    happened to be in the past.
-			 */
-			bfqq->soft_rt_next_start =
-				bfq_greatest_from_now();
-			/*
 			 * Schedule an update of soft_rt_next_start to when
 			 * the task may be discovered to be isochronous.
 			 */
@@ -4540,14 +4471,12 @@
 					   unsigned int cmd_flags) {}
 #endif
 
-static void bfq_prepare_request(struct request *rq, struct bio *bio);
-
 static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 			       bool at_head)
 {
 	struct request_queue *q = hctx->queue;
 	struct bfq_data *bfqd = q->elevator->elevator_data;
-	struct bfq_queue *bfqq = RQ_BFQQ(rq);
+	struct bfq_queue *bfqq;
 	bool idle_timer_disabled = false;
 	unsigned int cmd_flags;
 
@@ -4562,24 +4491,13 @@
 	blk_mq_sched_request_inserted(rq);
 
 	spin_lock_irq(&bfqd->lock);
+	bfqq = bfq_init_rq(rq);
 	if (at_head || blk_rq_is_passthrough(rq)) {
 		if (at_head)
 			list_add(&rq->queuelist, &bfqd->dispatch);
 		else
 			list_add_tail(&rq->queuelist, &bfqd->dispatch);
-	} else {
-		if (WARN_ON_ONCE(!bfqq)) {
-			/*
-			 * This should never happen. Most likely rq is
-			 * a requeued regular request, being
-			 * re-inserted without being first
-			 * re-prepared. Do a prepare, to avoid
-			 * failure.
-			 */
-			bfq_prepare_request(rq, rq->bio);
-			bfqq = RQ_BFQQ(rq);
-		}
-
+	} else { /* bfqq is assumed to be non null here */
 		idle_timer_disabled = __bfq_insert_request(bfqd, rq);
 		/*
 		 * Update bfqq, because, if a queue merge has occurred
@@ -4778,8 +4696,8 @@
 
 	if (rq->rq_flags & RQF_STARTED)
 		bfqg_stats_update_completion(bfqq_group(bfqq),
-					     rq_start_time_ns(rq),
-					     rq_io_start_time_ns(rq),
+					     rq->start_time_ns,
+					     rq->io_start_time_ns,
 					     rq->cmd_flags);
 
 	if (likely(rq->rq_flags & RQF_STARTED)) {
@@ -4922,11 +4840,48 @@
 }
 
 /*
- * Allocate bfq data structures associated with this request.
+ * Only reset private fields. The actual request preparation will be
+ * performed by bfq_init_rq, when rq is either inserted or merged. See
+ * comments on bfq_init_rq for the reason behind this delayed
+ * preparation.
  */
 static void bfq_prepare_request(struct request *rq, struct bio *bio)
 {
+	/*
+	 * Regardless of whether we have an icq attached, we have to
+	 * clear the scheduler pointers, as they might point to
+	 * previously allocated bic/bfqq structs.
+	 */
+	rq->elv.priv[0] = rq->elv.priv[1] = NULL;
+}
+
+/*
+ * If needed, init rq, allocate bfq data structures associated with
+ * rq, and increment reference counters in the destination bfq_queue
+ * for rq. Return the destination bfq_queue for rq, or NULL is rq is
+ * not associated with any bfq_queue.
+ *
+ * This function is invoked by the functions that perform rq insertion
+ * or merging. One may have expected the above preparation operations
+ * to be performed in bfq_prepare_request, and not delayed to when rq
+ * is inserted or merged. The rationale behind this delayed
+ * preparation is that, after the prepare_request hook is invoked for
+ * rq, rq may still be transformed into a request with no icq, i.e., a
+ * request not associated with any queue. No bfq hook is invoked to
+ * signal this tranformation. As a consequence, should these
+ * preparation operations be performed when the prepare_request hook
+ * is invoked, and should rq be transformed one moment later, bfq
+ * would end up in an inconsistent state, because it would have
+ * incremented some queue counters for an rq destined to
+ * transformation, without any chance to correctly lower these
+ * counters back. In contrast, no transformation can still happen for
+ * rq after rq has been inserted or merged. So, it is safe to execute
+ * these preparation operations when rq is finally inserted or merged.
+ */
+static struct bfq_queue *bfq_init_rq(struct request *rq)
+{
 	struct request_queue *q = rq->q;
+	struct bio *bio = rq->bio;
 	struct bfq_data *bfqd = q->elevator->elevator_data;
 	struct bfq_io_cq *bic;
 	const int is_sync = rq_is_sync(rq);
@@ -4934,20 +4889,21 @@
 	bool new_queue = false;
 	bool bfqq_already_existing = false, split = false;
 
+	if (unlikely(!rq->elv.icq))
+		return NULL;
+
 	/*
-	 * Even if we don't have an icq attached, we should still clear
-	 * the scheduler pointers, as they might point to previously
-	 * allocated bic/bfqq structs.
+	 * Assuming that elv.priv[1] is set only if everything is set
+	 * for this rq. This holds true, because this function is
+	 * invoked only for insertion or merging, and, after such
+	 * events, a request cannot be manipulated any longer before
+	 * being removed from bfq.
 	 */
-	if (!rq->elv.icq) {
-		rq->elv.priv[0] = rq->elv.priv[1] = NULL;
-		return;
-	}
+	if (rq->elv.priv[1])
+		return rq->elv.priv[1];
 
 	bic = icq_to_bic(rq->elv.icq);
 
-	spin_lock_irq(&bfqd->lock);
-
 	bfq_check_ioprio_change(bic, bio);
 
 	bfq_bic_update_cgroup(bic, bio);
@@ -5006,7 +4962,7 @@
 	if (unlikely(bfq_bfqq_just_created(bfqq)))
 		bfq_handle_burst(bfqd, bfqq);
 
-	spin_unlock_irq(&bfqd->lock);
+	return bfqq;
 }
 
 static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)
@@ -5105,6 +5061,64 @@
 	__bfq_put_async_bfqq(bfqd, &bfqg->async_idle_bfqq);
 }
 
+/*
+ * See the comments on bfq_limit_depth for the purpose of
+ * the depths set in the function. Return minimum shallow depth we'll use.
+ */
+static unsigned int bfq_update_depths(struct bfq_data *bfqd,
+				      struct sbitmap_queue *bt)
+{
+	unsigned int i, j, min_shallow = UINT_MAX;
+
+	/*
+	 * In-word depths if no bfq_queue is being weight-raised:
+	 * leaving 25% of tags only for sync reads.
+	 *
+	 * In next formulas, right-shift the value
+	 * (1U<<bt->sb.shift), instead of computing directly
+	 * (1U<<(bt->sb.shift - something)), to be robust against
+	 * any possible value of bt->sb.shift, without having to
+	 * limit 'something'.
+	 */
+	/* no more than 50% of tags for async I/O */
+	bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U);
+	/*
+	 * no more than 75% of tags for sync writes (25% extra tags
+	 * w.r.t. async I/O, to prevent async I/O from starving sync
+	 * writes)
+	 */
+	bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U);
+
+	/*
+	 * In-word depths in case some bfq_queue is being weight-
+	 * raised: leaving ~63% of tags for sync reads. This is the
+	 * highest percentage for which, in our tests, application
+	 * start-up times didn't suffer from any regression due to tag
+	 * shortage.
+	 */
+	/* no more than ~18% of tags for async I/O */
+	bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U);
+	/* no more than ~37% of tags for sync writes (~20% extra tags) */
+	bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U);
+
+	for (i = 0; i < 2; i++)
+		for (j = 0; j < 2; j++)
+			min_shallow = min(min_shallow, bfqd->word_depths[i][j]);
+
+	return min_shallow;
+}
+
+static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
+{
+	struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
+	struct blk_mq_tags *tags = hctx->sched_tags;
+	unsigned int min_shallow;
+
+	min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags);
+	sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow);
+	return 0;
+}
+
 static void bfq_exit_queue(struct elevator_queue *e)
 {
 	struct bfq_data *bfqd = e->elevator_data;
@@ -5242,14 +5256,12 @@
 	bfqd->wr_busy_queues = 0;
 
 	/*
-	 * Begin by assuming, optimistically, that the device is a
-	 * high-speed one, and that its peak rate is equal to 2/3 of
-	 * the highest reference rate.
+	 * Begin by assuming, optimistically, that the device peak
+	 * rate is equal to 2/3 of the highest reference rate.
 	 */
-	bfqd->RT_prod = R_fast[blk_queue_nonrot(bfqd->queue)] *
-			T_fast[blk_queue_nonrot(bfqd->queue)];
-	bfqd->peak_rate = R_fast[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
-	bfqd->device_speed = BFQ_BFQD_FAST;
+	bfqd->rate_dur_prod = ref_rate[blk_queue_nonrot(bfqd->queue)] *
+		ref_wr_duration[blk_queue_nonrot(bfqd->queue)];
+	bfqd->peak_rate = ref_rate[blk_queue_nonrot(bfqd->queue)] * 2 / 3;
 
 	spin_lock_init(&bfqd->lock);
 
@@ -5526,6 +5538,7 @@
 		.requests_merged	= bfq_requests_merged,
 		.request_merged		= bfq_request_merged,
 		.has_work		= bfq_has_work,
+		.init_hctx		= bfq_init_hctx,
 		.init_sched		= bfq_init_queue,
 		.exit_sched		= bfq_exit_queue,
 	},
@@ -5556,8 +5569,8 @@
 	/*
 	 * Times to load large popular applications for the typical
 	 * systems installed on the reference devices (see the
-	 * comments before the definitions of the next two
-	 * arrays). Actually, we use slightly slower values, as the
+	 * comments before the definition of the next
+	 * array). Actually, we use slightly lower values, as the
 	 * estimated peak rate tends to be smaller than the actual
 	 * peak rate.  The reason for this last fact is that estimates
 	 * are computed over much shorter time intervals than the long
@@ -5566,25 +5579,8 @@
 	 * scheduler cannot rely on a peak-rate-evaluation workload to
 	 * be run for a long time.
 	 */
-	T_slow[0] = msecs_to_jiffies(3500); /* actually 4 sec */
-	T_slow[1] = msecs_to_jiffies(6000); /* actually 6.5 sec */
-	T_fast[0] = msecs_to_jiffies(7000); /* actually 8 sec */
-	T_fast[1] = msecs_to_jiffies(2500); /* actually 3 sec */
-
-	/*
-	 * Thresholds that determine the switch between speed classes
-	 * (see the comments before the definition of the array
-	 * device_speed_thresh). These thresholds are biased towards
-	 * transitions to the fast class. This is safer than the
-	 * opposite bias. In fact, a wrong transition to the slow
-	 * class results in short weight-raising periods, because the
-	 * speed of the device then tends to be higher that the
-	 * reference peak rate. On the opposite end, a wrong
-	 * transition to the fast class tends to increase
-	 * weight-raising periods, because of the opposite reason.
-	 */
-	device_speed_thresh[0] = (4 * R_slow[0]) / 3;
-	device_speed_thresh[1] = (4 * R_slow[1]) / 3;
+	ref_wr_duration[0] = msecs_to_jiffies(7000); /* actually 8 sec */
+	ref_wr_duration[1] = msecs_to_jiffies(2500); /* actually 3 sec */
 
 	ret = elv_register(&iosched_bfq_mq);
 	if (ret)
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index ae2f3da..0f712e0 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -399,11 +399,6 @@
 	struct bfq_ttime saved_ttime;
 };
 
-enum bfq_device_speed {
-	BFQ_BFQD_FAST,
-	BFQ_BFQD_SLOW,
-};
-
 /**
  * struct bfq_data - per-device data structure.
  *
@@ -611,12 +606,11 @@
 	/* Max service-rate for a soft real-time queue, in sectors/sec */
 	unsigned int bfq_wr_max_softrt_rate;
 	/*
-	 * Cached value of the product R*T, used for computing the
-	 * maximum duration of weight raising automatically.
+	 * Cached value of the product ref_rate*ref_wr_duration, used
+	 * for computing the maximum duration of weight raising
+	 * automatically.
 	 */
-	u64 RT_prod;
-	/* device-speed class for the low-latency heuristic */
-	enum bfq_device_speed device_speed;
+	u64 rate_dur_prod;
 
 	/* fallback dummy bfqq for extreme OOM conditions */
 	struct bfq_queue oom_bfqq;
@@ -636,12 +630,6 @@
 	struct bfq_queue *bio_bfqq;
 
 	/*
-	 * Cached sbitmap shift, used to compute depth limits in
-	 * bfq_update_depths.
-	 */
-	unsigned int sb_shift;
-
-	/*
 	 * Depth limits used in bfq_limit_depth (see comments on the
 	 * function)
 	 */
@@ -732,9 +720,9 @@
 	/* total time with empty current active q with other requests queued */
 	struct blkg_stat		empty_time;
 	/* fields after this shouldn't be cleared on stat reset */
-	uint64_t			start_group_wait_time;
-	uint64_t			start_idle_time;
-	uint64_t			start_empty_time;
+	u64				start_group_wait_time;
+	u64				start_idle_time;
+	u64				start_empty_time;
 	uint16_t			flags;
 #endif	/* CONFIG_BFQ_GROUP_IOSCHED && CONFIG_DEBUG_BLK_CGROUP */
 };
@@ -856,8 +844,8 @@
 			      unsigned int op);
 void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
 void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op);
-void bfqg_stats_update_completion(struct bfq_group *bfqg, uint64_t start_time,
-				  uint64_t io_start_time, unsigned int op);
+void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns,
+				  u64 io_start_time_ns, unsigned int op);
 void bfqg_stats_update_dequeue(struct bfq_group *bfqg);
 void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg);
 void bfqg_stats_update_idle_time(struct bfq_group *bfqg);
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 9cfdd6c..add7c7c 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -56,12 +56,12 @@
 	struct bio_set *bs = bio->bi_pool;
 	unsigned inline_vecs;
 
-	if (!bs || !bs->bio_integrity_pool) {
+	if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
 		bip = kmalloc(sizeof(struct bio_integrity_payload) +
 			      sizeof(struct bio_vec) * nr_vecs, gfp_mask);
 		inline_vecs = nr_vecs;
 	} else {
-		bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
+		bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
 		inline_vecs = BIP_INLINE_VECS;
 	}
 
@@ -74,7 +74,7 @@
 		unsigned long idx = 0;
 
 		bip->bip_vec = bvec_alloc(gfp_mask, nr_vecs, &idx,
-					  bs->bvec_integrity_pool);
+					  &bs->bvec_integrity_pool);
 		if (!bip->bip_vec)
 			goto err;
 		bip->bip_max_vcnt = bvec_nr_vecs(idx);
@@ -90,7 +90,7 @@
 
 	return bip;
 err:
-	mempool_free(bip, bs->bio_integrity_pool);
+	mempool_free(bip, &bs->bio_integrity_pool);
 	return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL(bio_integrity_alloc);
@@ -111,10 +111,10 @@
 		kfree(page_address(bip->bip_vec->bv_page) +
 		      bip->bip_vec->bv_offset);
 
-	if (bs && bs->bio_integrity_pool) {
-		bvec_free(bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
+	if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
+		bvec_free(&bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
 
-		mempool_free(bip, bs->bio_integrity_pool);
+		mempool_free(bip, &bs->bio_integrity_pool);
 	} else {
 		kfree(bip);
 	}
@@ -465,16 +465,15 @@
 
 int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
-	if (bs->bio_integrity_pool)
+	if (mempool_initialized(&bs->bio_integrity_pool))
 		return 0;
 
-	bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
-	if (!bs->bio_integrity_pool)
+	if (mempool_init_slab_pool(&bs->bio_integrity_pool,
+				   pool_size, bip_slab))
 		return -1;
 
-	bs->bvec_integrity_pool = biovec_create_pool(pool_size);
-	if (!bs->bvec_integrity_pool) {
-		mempool_destroy(bs->bio_integrity_pool);
+	if (biovec_init_pool(&bs->bvec_integrity_pool, pool_size)) {
+		mempool_exit(&bs->bio_integrity_pool);
 		return -1;
 	}
 
@@ -484,8 +483,8 @@
 
 void bioset_integrity_free(struct bio_set *bs)
 {
-	mempool_destroy(bs->bio_integrity_pool);
-	mempool_destroy(bs->bvec_integrity_pool);
+	mempool_exit(&bs->bio_integrity_pool);
+	mempool_exit(&bs->bvec_integrity_pool);
 }
 EXPORT_SYMBOL(bioset_integrity_free);
 
diff --git a/block/bio.c b/block/bio.c
index 53e0f0a..5f75635 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -53,7 +53,7 @@
  * fs_bio_set is the bio_set containing bio and iovec memory pools used by
  * IO code that does not need private memory pools.
  */
-struct bio_set *fs_bio_set;
+struct bio_set fs_bio_set;
 EXPORT_SYMBOL(fs_bio_set);
 
 /*
@@ -254,7 +254,7 @@
 	bio_uninit(bio);
 
 	if (bs) {
-		bvec_free(bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
+		bvec_free(&bs->bvec_pool, bio->bi_io_vec, BVEC_POOL_IDX(bio));
 
 		/*
 		 * If we have front padding, adjust the bio pointer before freeing
@@ -262,7 +262,7 @@
 		p = bio;
 		p -= bs->front_pad;
 
-		mempool_free(p, bs->bio_pool);
+		mempool_free(p, &bs->bio_pool);
 	} else {
 		/* Bio was allocated by bio_kmalloc() */
 		kfree(bio);
@@ -454,7 +454,8 @@
 		inline_vecs = nr_iovecs;
 	} else {
 		/* should not use nobvec bioset for nr_iovecs > 0 */
-		if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
+		if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) &&
+				 nr_iovecs > 0))
 			return NULL;
 		/*
 		 * generic_make_request() converts recursion to iteration; this
@@ -483,11 +484,11 @@
 		    bs->rescue_workqueue)
 			gfp_mask &= ~__GFP_DIRECT_RECLAIM;
 
-		p = mempool_alloc(bs->bio_pool, gfp_mask);
+		p = mempool_alloc(&bs->bio_pool, gfp_mask);
 		if (!p && gfp_mask != saved_gfp) {
 			punt_bios_to_rescuer(bs);
 			gfp_mask = saved_gfp;
-			p = mempool_alloc(bs->bio_pool, gfp_mask);
+			p = mempool_alloc(&bs->bio_pool, gfp_mask);
 		}
 
 		front_pad = bs->front_pad;
@@ -503,11 +504,11 @@
 	if (nr_iovecs > inline_vecs) {
 		unsigned long idx = 0;
 
-		bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
+		bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
 		if (!bvl && gfp_mask != saved_gfp) {
 			punt_bios_to_rescuer(bs);
 			gfp_mask = saved_gfp;
-			bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
+			bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, &bs->bvec_pool);
 		}
 
 		if (unlikely(!bvl))
@@ -524,25 +525,25 @@
 	return bio;
 
 err_free:
-	mempool_free(p, bs->bio_pool);
+	mempool_free(p, &bs->bio_pool);
 	return NULL;
 }
 EXPORT_SYMBOL(bio_alloc_bioset);
 
-void zero_fill_bio(struct bio *bio)
+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
 {
 	unsigned long flags;
 	struct bio_vec bv;
 	struct bvec_iter iter;
 
-	bio_for_each_segment(bv, bio, iter) {
+	__bio_for_each_segment(bv, bio, iter, start) {
 		char *data = bvec_kmap_irq(&bv, &flags);
 		memset(data, 0, bv.bv_len);
 		flush_dcache_page(bv.bv_page);
 		bvec_kunmap_irq(data, &flags);
 	}
 }
-EXPORT_SYMBOL(zero_fill_bio);
+EXPORT_SYMBOL(zero_fill_bio_iter);
 
 /**
  * bio_put - release a reference to a bio
@@ -773,7 +774,7 @@
 			return 0;
 	}
 
-	if (bio->bi_vcnt >= bio->bi_max_vecs)
+	if (bio_full(bio))
 		return 0;
 
 	/*
@@ -821,6 +822,65 @@
 EXPORT_SYMBOL(bio_add_pc_page);
 
 /**
+ * __bio_try_merge_page - try appending data to an existing bvec.
+ * @bio: destination bio
+ * @page: page to add
+ * @len: length of the data to add
+ * @off: offset of the data in @page
+ *
+ * Try to add the data at @page + @off to the last bvec of @bio.  This is a
+ * a useful optimisation for file systems with a block size smaller than the
+ * page size.
+ *
+ * Return %true on success or %false on failure.
+ */
+bool __bio_try_merge_page(struct bio *bio, struct page *page,
+		unsigned int len, unsigned int off)
+{
+	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
+		return false;
+
+	if (bio->bi_vcnt > 0) {
+		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+
+		if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
+			bv->bv_len += len;
+			bio->bi_iter.bi_size += len;
+			return true;
+		}
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(__bio_try_merge_page);
+
+/**
+ * __bio_add_page - add page to a bio in a new segment
+ * @bio: destination bio
+ * @page: page to add
+ * @len: length of the data to add
+ * @off: offset of the data in @page
+ *
+ * Add the data at @page + @off to @bio as a new bvec.  The caller must ensure
+ * that @bio has space for another bvec.
+ */
+void __bio_add_page(struct bio *bio, struct page *page,
+		unsigned int len, unsigned int off)
+{
+	struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
+
+	WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
+	WARN_ON_ONCE(bio_full(bio));
+
+	bv->bv_page = page;
+	bv->bv_offset = off;
+	bv->bv_len = len;
+
+	bio->bi_iter.bi_size += len;
+	bio->bi_vcnt++;
+}
+EXPORT_SYMBOL_GPL(__bio_add_page);
+
+/**
  *	bio_add_page	-	attempt to add page to bio
  *	@bio: destination bio
  *	@page: page to add
@@ -833,40 +893,11 @@
 int bio_add_page(struct bio *bio, struct page *page,
 		 unsigned int len, unsigned int offset)
 {
-	struct bio_vec *bv;
-
-	/*
-	 * cloned bio must not modify vec list
-	 */
-	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
-		return 0;
-
-	/*
-	 * For filesystems with a blocksize smaller than the pagesize
-	 * we will often be called with the same page as last time and
-	 * a consecutive offset.  Optimize this special case.
-	 */
-	if (bio->bi_vcnt > 0) {
-		bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
-
-		if (page == bv->bv_page &&
-		    offset == bv->bv_offset + bv->bv_len) {
-			bv->bv_len += len;
-			goto done;
-		}
+	if (!__bio_try_merge_page(bio, page, len, offset)) {
+		if (bio_full(bio))
+			return 0;
+		__bio_add_page(bio, page, len, offset);
 	}
-
-	if (bio->bi_vcnt >= bio->bi_max_vecs)
-		return 0;
-
-	bv		= &bio->bi_io_vec[bio->bi_vcnt];
-	bv->bv_page	= page;
-	bv->bv_len	= len;
-	bv->bv_offset	= offset;
-
-	bio->bi_vcnt++;
-done:
-	bio->bi_iter.bi_size += len;
 	return len;
 }
 EXPORT_SYMBOL(bio_add_page);
@@ -970,27 +1001,68 @@
 }
 EXPORT_SYMBOL(bio_advance);
 
+void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+			struct bio *src, struct bvec_iter *src_iter)
+{
+	struct bio_vec src_bv, dst_bv;
+	void *src_p, *dst_p;
+	unsigned bytes;
+
+	while (src_iter->bi_size && dst_iter->bi_size) {
+		src_bv = bio_iter_iovec(src, *src_iter);
+		dst_bv = bio_iter_iovec(dst, *dst_iter);
+
+		bytes = min(src_bv.bv_len, dst_bv.bv_len);
+
+		src_p = kmap_atomic(src_bv.bv_page);
+		dst_p = kmap_atomic(dst_bv.bv_page);
+
+		memcpy(dst_p + dst_bv.bv_offset,
+		       src_p + src_bv.bv_offset,
+		       bytes);
+
+		kunmap_atomic(dst_p);
+		kunmap_atomic(src_p);
+
+		flush_dcache_page(dst_bv.bv_page);
+
+		bio_advance_iter(src, src_iter, bytes);
+		bio_advance_iter(dst, dst_iter, bytes);
+	}
+}
+EXPORT_SYMBOL(bio_copy_data_iter);
+
 /**
- * bio_copy_data - copy contents of data buffers from one chain of bios to
- * another
- * @src: source bio list
- * @dst: destination bio list
- *
- * If @src and @dst are single bios, bi_next must be NULL - otherwise, treats
- * @src and @dst as linked lists of bios.
+ * bio_copy_data - copy contents of data buffers from one bio to another
+ * @src: source bio
+ * @dst: destination bio
  *
  * Stops when it reaches the end of either @src or @dst - that is, copies
  * min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of bios).
  */
 void bio_copy_data(struct bio *dst, struct bio *src)
 {
-	struct bvec_iter src_iter, dst_iter;
-	struct bio_vec src_bv, dst_bv;
-	void *src_p, *dst_p;
-	unsigned bytes;
+	struct bvec_iter src_iter = src->bi_iter;
+	struct bvec_iter dst_iter = dst->bi_iter;
 
-	src_iter = src->bi_iter;
-	dst_iter = dst->bi_iter;
+	bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+}
+EXPORT_SYMBOL(bio_copy_data);
+
+/**
+ * bio_list_copy_data - copy contents of data buffers from one chain of bios to
+ * another
+ * @src: source bio list
+ * @dst: destination bio list
+ *
+ * Stops when it reaches the end of either the @src list or @dst list - that is,
+ * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of
+ * bios).
+ */
+void bio_list_copy_data(struct bio *dst, struct bio *src)
+{
+	struct bvec_iter src_iter = src->bi_iter;
+	struct bvec_iter dst_iter = dst->bi_iter;
 
 	while (1) {
 		if (!src_iter.bi_size) {
@@ -1009,26 +1081,10 @@
 			dst_iter = dst->bi_iter;
 		}
 
-		src_bv = bio_iter_iovec(src, src_iter);
-		dst_bv = bio_iter_iovec(dst, dst_iter);
-
-		bytes = min(src_bv.bv_len, dst_bv.bv_len);
-
-		src_p = kmap_atomic(src_bv.bv_page);
-		dst_p = kmap_atomic(dst_bv.bv_page);
-
-		memcpy(dst_p + dst_bv.bv_offset,
-		       src_p + src_bv.bv_offset,
-		       bytes);
-
-		kunmap_atomic(dst_p);
-		kunmap_atomic(src_p);
-
-		bio_advance_iter(src, &src_iter, bytes);
-		bio_advance_iter(dst, &dst_iter, bytes);
+		bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
 	}
 }
-EXPORT_SYMBOL(bio_copy_data);
+EXPORT_SYMBOL(bio_list_copy_data);
 
 struct bio_map_data {
 	int is_our_pages;
@@ -1584,6 +1640,7 @@
 			set_page_dirty_lock(page);
 	}
 }
+EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
 
 static void bio_release_pages(struct bio *bio)
 {
@@ -1667,6 +1724,7 @@
 		bio_put(bio);
 	}
 }
+EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
 
 void generic_start_io_acct(struct request_queue *q, int rw,
 			   unsigned long sectors, struct hd_struct *part)
@@ -1749,6 +1807,9 @@
 	if (!bio_integrity_endio(bio))
 		return;
 
+	if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL"))
+		bio->bi_next = NULL;
+
 	/*
 	 * Need to have a real endio function for chained bios, otherwise
 	 * various corner cases will break (like stacking block devices that
@@ -1848,30 +1909,38 @@
  * create memory pools for biovec's in a bio_set.
  * use the global biovec slabs created for general use.
  */
-mempool_t *biovec_create_pool(int pool_entries)
+int biovec_init_pool(mempool_t *pool, int pool_entries)
 {
 	struct biovec_slab *bp = bvec_slabs + BVEC_POOL_MAX;
 
-	return mempool_create_slab_pool(pool_entries, bp->slab);
+	return mempool_init_slab_pool(pool, pool_entries, bp->slab);
 }
 
-void bioset_free(struct bio_set *bs)
+/*
+ * bioset_exit - exit a bioset initialized with bioset_init()
+ *
+ * May be called on a zeroed but uninitialized bioset (i.e. allocated with
+ * kzalloc()).
+ */
+void bioset_exit(struct bio_set *bs)
 {
 	if (bs->rescue_workqueue)
 		destroy_workqueue(bs->rescue_workqueue);
+	bs->rescue_workqueue = NULL;
 
-	mempool_destroy(bs->bio_pool);
-	mempool_destroy(bs->bvec_pool);
+	mempool_exit(&bs->bio_pool);
+	mempool_exit(&bs->bvec_pool);
 
 	bioset_integrity_free(bs);
-	bio_put_slab(bs);
-
-	kfree(bs);
+	if (bs->bio_slab)
+		bio_put_slab(bs);
+	bs->bio_slab = NULL;
 }
-EXPORT_SYMBOL(bioset_free);
+EXPORT_SYMBOL(bioset_exit);
 
 /**
- * bioset_create  - Create a bio_set
+ * bioset_init - Initialize a bio_set
+ * @bs:		pool to initialize
  * @pool_size:	Number of bio and bio_vecs to cache in the mempool
  * @front_pad:	Number of bytes to allocate in front of the returned bio
  * @flags:	Flags to modify behavior, currently %BIOSET_NEED_BVECS
@@ -1890,16 +1959,12 @@
  *    dispatch queued requests when the mempool runs out of space.
  *
  */
-struct bio_set *bioset_create(unsigned int pool_size,
-			      unsigned int front_pad,
-			      int flags)
+int bioset_init(struct bio_set *bs,
+		unsigned int pool_size,
+		unsigned int front_pad,
+		int flags)
 {
 	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
-	struct bio_set *bs;
-
-	bs = kzalloc(sizeof(*bs), GFP_KERNEL);
-	if (!bs)
-		return NULL;
 
 	bs->front_pad = front_pad;
 
@@ -1908,34 +1973,29 @@
 	INIT_WORK(&bs->rescue_work, bio_alloc_rescue);
 
 	bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
-	if (!bs->bio_slab) {
-		kfree(bs);
-		return NULL;
-	}
+	if (!bs->bio_slab)
+		return -ENOMEM;
 
-	bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
-	if (!bs->bio_pool)
+	if (mempool_init_slab_pool(&bs->bio_pool, pool_size, bs->bio_slab))
 		goto bad;
 
-	if (flags & BIOSET_NEED_BVECS) {
-		bs->bvec_pool = biovec_create_pool(pool_size);
-		if (!bs->bvec_pool)
-			goto bad;
-	}
+	if ((flags & BIOSET_NEED_BVECS) &&
+	    biovec_init_pool(&bs->bvec_pool, pool_size))
+		goto bad;
 
 	if (!(flags & BIOSET_NEED_RESCUER))
-		return bs;
+		return 0;
 
 	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
 	if (!bs->rescue_workqueue)
 		goto bad;
 
-	return bs;
+	return 0;
 bad:
-	bioset_free(bs);
-	return NULL;
+	bioset_exit(bs);
+	return -ENOMEM;
 }
-EXPORT_SYMBOL(bioset_create);
+EXPORT_SYMBOL(bioset_init);
 
 #ifdef CONFIG_BLK_CGROUP
 
@@ -2020,11 +2080,10 @@
 	bio_integrity_init();
 	biovec_init_slabs();
 
-	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-	if (!fs_bio_set)
+	if (bioset_init(&fs_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS))
 		panic("bio: can't allocate bios\n");
 
-	if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
+	if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
 		panic("bio: can't create integrity pool\n");
 
 	return 0;
diff --git a/block/blk-core.c b/block/blk-core.c
index 85909b4..3f56be1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -196,15 +196,8 @@
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->tag = -1;
 	rq->internal_tag = -1;
-	rq->start_time = jiffies;
-	set_start_time_ns(rq);
+	rq->start_time_ns = ktime_get_ns();
 	rq->part = NULL;
-	seqcount_init(&rq->gstate_seq);
-	u64_stats_init(&rq->aborted_gstate_sync);
-	/*
-	 * See comment of blk_mq_init_request
-	 */
-	WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
 }
 EXPORT_SYMBOL(blk_rq_init);
 
@@ -280,6 +273,10 @@
 	bio_advance(bio, nbytes);
 
 	/* don't actually finish bio if it's part of flush sequence */
+	/*
+	 * XXX this code looks suspicious - it's not consistent with advancing
+	 * req->bio in caller
+	 */
 	if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
 		bio_endio(bio);
 }
@@ -360,7 +357,6 @@
 void blk_start_queue(struct request_queue *q)
 {
 	lockdep_assert_held(q->queue_lock);
-	WARN_ON(!in_interrupt() && !irqs_disabled());
 	WARN_ON_ONCE(q->mq_ops);
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
@@ -996,18 +992,24 @@
 					   spinlock_t *lock)
 {
 	struct request_queue *q;
+	int ret;
 
 	q = kmem_cache_alloc_node(blk_requestq_cachep,
 				gfp_mask | __GFP_ZERO, node_id);
 	if (!q)
 		return NULL;
 
+	INIT_LIST_HEAD(&q->queue_head);
+	q->last_merge = NULL;
+	q->end_sector = 0;
+	q->boundary_rq = NULL;
+
 	q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
 	if (q->id < 0)
 		goto fail_q;
 
-	q->bio_split = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-	if (!q->bio_split)
+	ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+	if (ret)
 		goto fail_id;
 
 	q->backing_dev_info = bdi_alloc_node(gfp_mask, node_id);
@@ -1079,7 +1081,7 @@
 fail_stats:
 	bdi_put(q->backing_dev_info);
 fail_split:
-	bioset_free(q->bio_split);
+	bioset_exit(&q->bio_split);
 fail_id:
 	ida_simple_remove(&blk_queue_ida, q->id);
 fail_q:
@@ -1173,16 +1175,8 @@
 
 	q->sg_reserved_size = INT_MAX;
 
-	/* Protect q->elevator from elevator_change */
-	mutex_lock(&q->sysfs_lock);
-
-	/* init elevator */
-	if (elevator_init(q, NULL)) {
-		mutex_unlock(&q->sysfs_lock);
+	if (elevator_init(q))
 		goto out_exit_flush_rq;
-	}
-
-	mutex_unlock(&q->sysfs_lock);
 	return 0;
 
 out_exit_flush_rq:
@@ -1334,6 +1328,7 @@
  * @op: operation and flags
  * @bio: bio to allocate request for (can be %NULL)
  * @flags: BLQ_MQ_REQ_* flags
+ * @gfp_mask: allocator flags
  *
  * Get a free request from @q.  This function may fail under memory
  * pressure or if @q is dead.
@@ -1343,7 +1338,7 @@
  * Returns request pointer on success, with @q->queue_lock *not held*.
  */
 static struct request *__get_request(struct request_list *rl, unsigned int op,
-				     struct bio *bio, blk_mq_req_flags_t flags)
+		struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp_mask)
 {
 	struct request_queue *q = rl->q;
 	struct request *rq;
@@ -1352,8 +1347,6 @@
 	struct io_cq *icq = NULL;
 	const bool is_sync = op_is_sync(op);
 	int may_queue;
-	gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
-			 __GFP_DIRECT_RECLAIM;
 	req_flags_t rq_flags = RQF_ALLOCED;
 
 	lockdep_assert_held(q->queue_lock);
@@ -1517,8 +1510,9 @@
  * @op: operation and flags
  * @bio: bio to allocate request for (can be %NULL)
  * @flags: BLK_MQ_REQ_* flags.
+ * @gfp: allocator flags
  *
- * Get a free request from @q.  If %__GFP_DIRECT_RECLAIM is set in @gfp_mask,
+ * Get a free request from @q.  If %BLK_MQ_REQ_NOWAIT is set in @flags,
  * this function keeps retrying under memory pressure and fails iff @q is dead.
  *
  * Must be called with @q->queue_lock held and,
@@ -1526,7 +1520,7 @@
  * Returns request pointer on success, with @q->queue_lock *not held*.
  */
 static struct request *get_request(struct request_queue *q, unsigned int op,
-				   struct bio *bio, blk_mq_req_flags_t flags)
+		struct bio *bio, blk_mq_req_flags_t flags, gfp_t gfp)
 {
 	const bool is_sync = op_is_sync(op);
 	DEFINE_WAIT(wait);
@@ -1538,7 +1532,7 @@
 
 	rl = blk_get_rl(q, bio);	/* transferred to @rq on success */
 retry:
-	rq = __get_request(rl, op, bio, flags);
+	rq = __get_request(rl, op, bio, flags, gfp);
 	if (!IS_ERR(rq))
 		return rq;
 
@@ -1579,8 +1573,7 @@
 				unsigned int op, blk_mq_req_flags_t flags)
 {
 	struct request *rq;
-	gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC :
-			 __GFP_DIRECT_RECLAIM;
+	gfp_t gfp_mask = flags & BLK_MQ_REQ_NOWAIT ? GFP_ATOMIC : GFP_NOIO;
 	int ret = 0;
 
 	WARN_ON_ONCE(q->mq_ops);
@@ -1592,7 +1585,7 @@
 	if (ret)
 		return ERR_PTR(ret);
 	spin_lock_irq(q->queue_lock);
-	rq = get_request(q, op, NULL, flags);
+	rq = get_request(q, op, NULL, flags, gfp_mask);
 	if (IS_ERR(rq)) {
 		spin_unlock_irq(q->queue_lock);
 		blk_queue_exit(q);
@@ -1607,13 +1600,13 @@
 }
 
 /**
- * blk_get_request_flags - allocate a request
+ * blk_get_request - allocate a request
  * @q: request queue to allocate a request for
  * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
  * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
  */
-struct request *blk_get_request_flags(struct request_queue *q, unsigned int op,
-				      blk_mq_req_flags_t flags)
+struct request *blk_get_request(struct request_queue *q, unsigned int op,
+				blk_mq_req_flags_t flags)
 {
 	struct request *req;
 
@@ -1632,14 +1625,6 @@
 
 	return req;
 }
-EXPORT_SYMBOL(blk_get_request_flags);
-
-struct request *blk_get_request(struct request_queue *q, unsigned int op,
-				gfp_t gfp_mask)
-{
-	return blk_get_request_flags(q, op, gfp_mask & __GFP_DIRECT_RECLAIM ?
-				     0 : BLK_MQ_REQ_NOWAIT);
-}
 EXPORT_SYMBOL(blk_get_request);
 
 /**
@@ -1660,7 +1645,7 @@
 	blk_delete_timer(rq);
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
-	wbt_requeue(q->rq_wb, &rq->issue_stat);
+	wbt_requeue(q->rq_wb, rq);
 
 	if (rq->rq_flags & RQF_QUEUED)
 		blk_queue_end_tag(q, rq);
@@ -1767,7 +1752,7 @@
 	/* this is a bio leak */
 	WARN_ON(req->bio != NULL);
 
-	wbt_done(q->rq_wb, &req->issue_stat);
+	wbt_done(q->rq_wb, req);
 
 	/*
 	 * Request may not have originated from ll_rw_blk. if not,
@@ -2066,7 +2051,7 @@
 	 * Returns with the queue unlocked.
 	 */
 	blk_queue_enter_live(q);
-	req = get_request(q, bio->bi_opf, bio, 0);
+	req = get_request(q, bio->bi_opf, bio, 0, GFP_NOIO);
 	if (IS_ERR(req)) {
 		blk_queue_exit(q);
 		__wbt_done(q->rq_wb, wb_acct);
@@ -2078,7 +2063,7 @@
 		goto out_unlock;
 	}
 
-	wbt_track(&req->issue_stat, wb_acct);
+	wbt_track(req, wb_acct);
 
 	/*
 	 * After dropping the lock and possibly sleeping here, our request
@@ -2392,7 +2377,9 @@
 
 	if (bio->bi_opf & REQ_NOWAIT)
 		flags = BLK_MQ_REQ_NOWAIT;
-	if (blk_queue_enter(q, flags) < 0) {
+	if (bio_flagged(bio, BIO_QUEUE_ENTERED))
+		blk_queue_enter_live(q);
+	else if (blk_queue_enter(q, flags) < 0) {
 		if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT))
 			bio_wouldblock_error(bio);
 		else
@@ -2727,7 +2714,7 @@
 	}
 }
 
-void blk_account_io_done(struct request *req)
+void blk_account_io_done(struct request *req, u64 now)
 {
 	/*
 	 * Account IO completion.  flush_rq isn't accounted as a
@@ -2735,11 +2722,12 @@
 	 * containing request is enough.
 	 */
 	if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
-		unsigned long duration = jiffies - req->start_time;
+		unsigned long duration;
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
 		int cpu;
 
+		duration = nsecs_to_jiffies(now - req->start_time_ns);
 		cpu = part_stat_lock();
 		part = req->part;
 
@@ -2970,10 +2958,8 @@
 	 * and to it is freed is accounted as io that is in progress at
 	 * the driver side.
 	 */
-	if (blk_account_rq(rq)) {
+	if (blk_account_rq(rq))
 		q->in_flight[rq_is_sync(rq)]++;
-		set_io_start_time_ns(rq);
-	}
 }
 
 /**
@@ -2992,9 +2978,12 @@
 	blk_dequeue_request(req);
 
 	if (test_bit(QUEUE_FLAG_STATS, &req->q->queue_flags)) {
-		blk_stat_set_issue(&req->issue_stat, blk_rq_sectors(req));
+		req->io_start_time_ns = ktime_get_ns();
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+		req->throtl_size = blk_rq_sectors(req);
+#endif
 		req->rq_flags |= RQF_STATS;
-		wbt_issue(req->q->rq_wb, &req->issue_stat);
+		wbt_issue(req->q->rq_wb, req);
 	}
 
 	BUG_ON(blk_rq_is_complete(req));
@@ -3092,8 +3081,10 @@
 		struct bio *bio = req->bio;
 		unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
 
-		if (bio_bytes == bio->bi_iter.bi_size)
+		if (bio_bytes == bio->bi_iter.bi_size) {
 			req->bio = bio->bi_next;
+			bio->bi_next = NULL;
+		}
 
 		/* Completion has already been traced */
 		bio_clear_flag(bio, BIO_TRACE_COMPLETION);
@@ -3190,12 +3181,13 @@
 void blk_finish_request(struct request *req, blk_status_t error)
 {
 	struct request_queue *q = req->q;
+	u64 now = ktime_get_ns();
 
 	lockdep_assert_held(req->q->queue_lock);
 	WARN_ON_ONCE(q->mq_ops);
 
 	if (req->rq_flags & RQF_STATS)
-		blk_stat_add(req);
+		blk_stat_add(req, now);
 
 	if (req->rq_flags & RQF_QUEUED)
 		blk_queue_end_tag(q, req);
@@ -3210,10 +3202,10 @@
 	if (req->rq_flags & RQF_DONTPREP)
 		blk_unprep_request(req);
 
-	blk_account_io_done(req);
+	blk_account_io_done(req, now);
 
 	if (req->end_io) {
-		wbt_done(req->q->rq_wb, &req->issue_stat);
+		wbt_done(req->q->rq_wb, req);
 		req->end_io(req, error);
 	} else {
 		if (blk_bidi_rq(req))
@@ -3519,7 +3511,7 @@
 	struct bio *bio, *bio_src;
 
 	if (!bs)
-		bs = fs_bio_set;
+		bs = &fs_bio_set;
 
 	__rq_for_each_bio(bio_src, rq_src) {
 		bio = bio_clone_fast(bio_src, gfp_mask, bs);
@@ -3630,7 +3622,7 @@
 		blk_run_queue_async(q);
 	else
 		__blk_run_queue(q);
-	spin_unlock(q->queue_lock);
+	spin_unlock_irq(q->queue_lock);
 }
 
 static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
@@ -3678,7 +3670,6 @@
 void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
 	struct request_queue *q;
-	unsigned long flags;
 	struct request *rq;
 	LIST_HEAD(list);
 	unsigned int depth;
@@ -3698,11 +3689,6 @@
 	q = NULL;
 	depth = 0;
 
-	/*
-	 * Save and disable interrupts here, to avoid doing it for every
-	 * queue lock we have to take.
-	 */
-	local_irq_save(flags);
 	while (!list_empty(&list)) {
 		rq = list_entry_rq(list.next);
 		list_del_init(&rq->queuelist);
@@ -3715,7 +3701,7 @@
 				queue_unplugged(q, depth, from_schedule);
 			q = rq->q;
 			depth = 0;
-			spin_lock(q->queue_lock);
+			spin_lock_irq(q->queue_lock);
 		}
 
 		/*
@@ -3742,8 +3728,6 @@
 	 */
 	if (q)
 		queue_unplugged(q, depth, from_schedule);
-
-	local_irq_restore(flags);
 }
 
 void blk_finish_plug(struct blk_plug *plug)
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index feb3057..6121611 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -333,34 +333,34 @@
 }
 
 static struct integrity_sysfs_entry integrity_format_entry = {
-	.attr = { .name = "format", .mode = S_IRUGO },
+	.attr = { .name = "format", .mode = 0444 },
 	.show = integrity_format_show,
 };
 
 static struct integrity_sysfs_entry integrity_tag_size_entry = {
-	.attr = { .name = "tag_size", .mode = S_IRUGO },
+	.attr = { .name = "tag_size", .mode = 0444 },
 	.show = integrity_tag_size_show,
 };
 
 static struct integrity_sysfs_entry integrity_interval_entry = {
-	.attr = { .name = "protection_interval_bytes", .mode = S_IRUGO },
+	.attr = { .name = "protection_interval_bytes", .mode = 0444 },
 	.show = integrity_interval_show,
 };
 
 static struct integrity_sysfs_entry integrity_verify_entry = {
-	.attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
+	.attr = { .name = "read_verify", .mode = 0644 },
 	.show = integrity_verify_show,
 	.store = integrity_verify_store,
 };
 
 static struct integrity_sysfs_entry integrity_generate_entry = {
-	.attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
+	.attr = { .name = "write_generate", .mode = 0644 },
 	.show = integrity_generate_show,
 	.store = integrity_generate_store,
 };
 
 static struct integrity_sysfs_entry integrity_device_entry = {
-	.attr = { .name = "device_is_integrity_capable", .mode = S_IRUGO },
+	.attr = { .name = "device_is_integrity_capable", .mode = 0444 },
 	.show = integrity_device_show,
 };
 
diff --git a/block/blk-lib.c b/block/blk-lib.c
index a676084..8faa70f 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -62,10 +62,16 @@
 		unsigned int req_sects;
 		sector_t end_sect, tmp;
 
-		/* Make sure bi_size doesn't overflow */
-		req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
+		/*
+		 * Issue in chunks of the user defined max discard setting,
+		 * ensuring that bi_size doesn't overflow
+		 */
+		req_sects = min_t(sector_t, nr_sects,
+					q->limits.max_discard_sectors);
+		if (req_sects > UINT_MAX >> 9)
+			req_sects = UINT_MAX >> 9;
 
-		/**
+		/*
 		 * If splitting a request, and the next starting sector would be
 		 * misaligned, stop the discard at the previous aligned sector.
 		 */
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 782940c..aaec38c 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -188,16 +188,16 @@
 	switch (bio_op(*bio)) {
 	case REQ_OP_DISCARD:
 	case REQ_OP_SECURE_ERASE:
-		split = blk_bio_discard_split(q, *bio, q->bio_split, &nsegs);
+		split = blk_bio_discard_split(q, *bio, &q->bio_split, &nsegs);
 		break;
 	case REQ_OP_WRITE_ZEROES:
-		split = blk_bio_write_zeroes_split(q, *bio, q->bio_split, &nsegs);
+		split = blk_bio_write_zeroes_split(q, *bio, &q->bio_split, &nsegs);
 		break;
 	case REQ_OP_WRITE_SAME:
-		split = blk_bio_write_same_split(q, *bio, q->bio_split, &nsegs);
+		split = blk_bio_write_same_split(q, *bio, &q->bio_split, &nsegs);
 		break;
 	default:
-		split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
+		split = blk_bio_segment_split(q, *bio, &q->bio_split, &nsegs);
 		break;
 	}
 
@@ -210,6 +210,16 @@
 		/* there isn't chance to merge the splitted bio */
 		split->bi_opf |= REQ_NOMERGE;
 
+		/*
+		 * Since we're recursing into make_request here, ensure
+		 * that we mark this bio as already having entered the queue.
+		 * If not, and the queue is going away, we can get stuck
+		 * forever on waiting for the queue reference to drop. But
+		 * that will never happen, as we're already holding a
+		 * reference to it.
+		 */
+		bio_set_flag(*bio, BIO_QUEUE_ENTERED);
+
 		bio_chain(split, *bio);
 		trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
 		generic_make_request(*bio);
@@ -724,13 +734,12 @@
 	}
 
 	/*
-	 * At this point we have either done a back merge
-	 * or front merge. We need the smaller start_time of
-	 * the merged requests to be the current request
-	 * for accounting purposes.
+	 * At this point we have either done a back merge or front merge. We
+	 * need the smaller start_time_ns of the merged requests to be the
+	 * current request for accounting purposes.
 	 */
-	if (time_after(req->start_time, next->start_time))
-		req->start_time = next->start_time;
+	if (next->start_time_ns < req->start_time_ns)
+		req->start_time_ns = next->start_time_ns;
 
 	req->biotail->bi_next = next->bio;
 	req->biotail = next->biotail;
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 3080e18..ffa6223 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -344,7 +344,6 @@
 	RQF_NAME(STATS),
 	RQF_NAME(SPECIAL_PAYLOAD),
 	RQF_NAME(ZONE_WRITE_LOCKED),
-	RQF_NAME(MQ_TIMEOUT_EXPIRED),
 	RQF_NAME(MQ_POLL_SLEPT),
 };
 #undef RQF_NAME
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 25c14c5..56c493c 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -268,19 +268,16 @@
 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
 
 /*
- * Reverse check our software queue for entries that we could potentially
- * merge with. Currently includes a hand-wavy stop count of 8, to not spend
- * too much time checking for merges.
+ * Iterate list of requests and see if we can merge this bio with any
+ * of them.
  */
-static bool blk_mq_attempt_merge(struct request_queue *q,
-				 struct blk_mq_ctx *ctx, struct bio *bio)
+bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
+			   struct bio *bio)
 {
 	struct request *rq;
 	int checked = 8;
 
-	lockdep_assert_held(&ctx->lock);
-
-	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
+	list_for_each_entry_reverse(rq, list, queuelist) {
 		bool merged = false;
 
 		if (!checked--)
@@ -305,13 +302,30 @@
 			continue;
 		}
 
-		if (merged)
-			ctx->rq_merged++;
 		return merged;
 	}
 
 	return false;
 }
+EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
+
+/*
+ * Reverse check our software queue for entries that we could potentially
+ * merge with. Currently includes a hand-wavy stop count of 8, to not spend
+ * too much time checking for merges.
+ */
+static bool blk_mq_attempt_merge(struct request_queue *q,
+				 struct blk_mq_ctx *ctx, struct bio *bio)
+{
+	lockdep_assert_held(&ctx->lock);
+
+	if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) {
+		ctx->rq_merged++;
+		return true;
+	}
+
+	return false;
+}
 
 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
@@ -571,6 +585,7 @@
 
 	if (!e) {
 		q->elevator = NULL;
+		q->nr_requests = q->tag_set->queue_depth;
 		return 0;
 	}
 
@@ -633,14 +648,3 @@
 	blk_mq_sched_tags_teardown(q);
 	q->elevator = NULL;
 }
-
-int blk_mq_sched_init(struct request_queue *q)
-{
-	int ret;
-
-	mutex_lock(&q->sysfs_lock);
-	ret = elevator_init(q, NULL);
-	mutex_unlock(&q->sysfs_lock);
-
-	return ret;
-}
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 1e9c901..0cb8f93 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -33,8 +33,6 @@
 void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
 			    unsigned int hctx_idx);
 
-int blk_mq_sched_init(struct request_queue *q);
-
 static inline bool
 blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index a54b4b0..aafb442 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -166,15 +166,15 @@
 };
 
 static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
-	.attr = {.name = "nr_tags", .mode = S_IRUGO },
+	.attr = {.name = "nr_tags", .mode = 0444 },
 	.show = blk_mq_hw_sysfs_nr_tags_show,
 };
 static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
-	.attr = {.name = "nr_reserved_tags", .mode = S_IRUGO },
+	.attr = {.name = "nr_reserved_tags", .mode = 0444 },
 	.show = blk_mq_hw_sysfs_nr_reserved_tags_show,
 };
 static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
-	.attr = {.name = "cpu_list", .mode = S_IRUGO },
+	.attr = {.name = "cpu_list", .mode = 0444 },
 	.show = blk_mq_hw_sysfs_cpus_show,
 };
 
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 336dde0..70356a2a 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -134,6 +134,8 @@
 	ws = bt_wait_ptr(bt, data->hctx);
 	drop_ctx = data->ctx == NULL;
 	do {
+		struct sbitmap_queue *bt_prev;
+
 		/*
 		 * We're out of tags on this hardware queue, kick any
 		 * pending IO submits before going to sleep waiting for
@@ -159,6 +161,7 @@
 		if (data->ctx)
 			blk_mq_put_ctx(data->ctx);
 
+		bt_prev = bt;
 		io_schedule();
 
 		data->ctx = blk_mq_get_ctx(data->q);
@@ -170,6 +173,15 @@
 			bt = &tags->bitmap_tags;
 
 		finish_wait(&ws->wait, &wait);
+
+		/*
+		 * If destination hw queue is changed, fake wake up on
+		 * previous queue for compensating the wake up miss, so
+		 * other allocations on previous queue won't be starved.
+		 */
+		if (bt != bt_prev)
+			sbitmap_queue_wake_up(bt_prev);
+
 		ws = bt_wait_ptr(bt, data->hctx);
 	} while (1);
 
@@ -259,7 +271,7 @@
 	 * test and set the bit before assining ->rqs[].
 	 */
 	rq = tags->rqs[bitnr];
-	if (rq)
+	if (rq && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT)
 		iter_data->fn(rq, iter_data->data, reserved);
 
 	return true;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9ce9cac..d2de0a7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -309,7 +309,8 @@
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->rq_disk = NULL;
 	rq->part = NULL;
-	rq->start_time = jiffies;
+	rq->start_time_ns = ktime_get_ns();
+	rq->io_start_time_ns = 0;
 	rq->nr_phys_segments = 0;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	rq->nr_integrity_segments = 0;
@@ -328,11 +329,10 @@
 
 #ifdef CONFIG_BLK_CGROUP
 	rq->rl = NULL;
-	set_start_time_ns(rq);
-	rq->io_start_time_ns = 0;
 #endif
 
 	data->ctx->rq_dispatched[op_is_sync(op)]++;
+	refcount_set(&rq->ref, 1);
 	return rq;
 }
 
@@ -361,9 +361,11 @@
 
 		/*
 		 * Flush requests are special and go directly to the
-		 * dispatch list.
+		 * dispatch list. Don't include reserved tags in the
+		 * limiting, as it isn't useful.
 		 */
-		if (!op_is_flush(op) && e->type->ops.mq.limit_depth)
+		if (!op_is_flush(op) && e->type->ops.mq.limit_depth &&
+		    !(data->flags & BLK_MQ_REQ_RESERVED))
 			e->type->ops.mq.limit_depth(op, data);
 	}
 
@@ -464,13 +466,27 @@
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
+static void __blk_mq_free_request(struct request *rq)
+{
+	struct request_queue *q = rq->q;
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
+	const int sched_tag = rq->internal_tag;
+
+	if (rq->tag != -1)
+		blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
+	if (sched_tag != -1)
+		blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
+	blk_mq_sched_restart(hctx);
+	blk_queue_exit(q);
+}
+
 void blk_mq_free_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 	struct elevator_queue *e = q->elevator;
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
-	const int sched_tag = rq->internal_tag;
 
 	if (rq->rq_flags & RQF_ELVPRIV) {
 		if (e && e->type->ops.mq.finish_request)
@@ -488,27 +504,30 @@
 	if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
 		laptop_io_completion(q->backing_dev_info);
 
-	wbt_done(q->rq_wb, &rq->issue_stat);
+	wbt_done(q->rq_wb, rq);
 
 	if (blk_rq_rl(rq))
 		blk_put_rl(blk_rq_rl(rq));
 
-	blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
-	if (rq->tag != -1)
-		blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
-	if (sched_tag != -1)
-		blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
-	blk_mq_sched_restart(hctx);
-	blk_queue_exit(q);
+	WRITE_ONCE(rq->state, MQ_RQ_IDLE);
+	if (refcount_dec_and_test(&rq->ref))
+		__blk_mq_free_request(rq);
 }
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
 inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 {
-	blk_account_io_done(rq);
+	u64 now = ktime_get_ns();
+
+	if (rq->rq_flags & RQF_STATS) {
+		blk_mq_poll_stats_start(rq->q);
+		blk_stat_add(rq, now);
+	}
+
+	blk_account_io_done(rq, now);
 
 	if (rq->end_io) {
-		wbt_done(rq->q->rq_wb, &rq->issue_stat);
+		wbt_done(rq->q->rq_wb, rq);
 		rq->end_io(rq, error);
 	} else {
 		if (unlikely(blk_bidi_rq(rq)))
@@ -539,15 +558,12 @@
 	bool shared = false;
 	int cpu;
 
-	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT);
-	blk_mq_rq_update_state(rq, MQ_RQ_COMPLETE);
+	if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) !=
+			MQ_RQ_IN_FLIGHT)
+		return;
 
 	if (rq->internal_tag != -1)
 		blk_mq_sched_completed_request(rq);
-	if (rq->rq_flags & RQF_STATS) {
-		blk_mq_poll_stats_start(rq->q);
-		blk_stat_add(rq);
-	}
 
 	if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
 		rq->q->softirq_done_fn(rq);
@@ -589,36 +605,6 @@
 		*srcu_idx = srcu_read_lock(hctx->srcu);
 }
 
-static void blk_mq_rq_update_aborted_gstate(struct request *rq, u64 gstate)
-{
-	unsigned long flags;
-
-	/*
-	 * blk_mq_rq_aborted_gstate() is used from the completion path and
-	 * can thus be called from irq context.  u64_stats_fetch in the
-	 * middle of update on the same CPU leads to lockup.  Disable irq
-	 * while updating.
-	 */
-	local_irq_save(flags);
-	u64_stats_update_begin(&rq->aborted_gstate_sync);
-	rq->aborted_gstate = gstate;
-	u64_stats_update_end(&rq->aborted_gstate_sync);
-	local_irq_restore(flags);
-}
-
-static u64 blk_mq_rq_aborted_gstate(struct request *rq)
-{
-	unsigned int start;
-	u64 aborted_gstate;
-
-	do {
-		start = u64_stats_fetch_begin(&rq->aborted_gstate_sync);
-		aborted_gstate = rq->aborted_gstate;
-	} while (u64_stats_fetch_retry(&rq->aborted_gstate_sync, start));
-
-	return aborted_gstate;
-}
-
 /**
  * blk_mq_complete_request - end I/O on a request
  * @rq:		the request being processed
@@ -629,28 +615,9 @@
  **/
 void blk_mq_complete_request(struct request *rq)
 {
-	struct request_queue *q = rq->q;
-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
-	int srcu_idx;
-
-	if (unlikely(blk_should_fake_timeout(q)))
+	if (unlikely(blk_should_fake_timeout(rq->q)))
 		return;
-
-	/*
-	 * If @rq->aborted_gstate equals the current instance, timeout is
-	 * claiming @rq and we lost.  This is synchronized through
-	 * hctx_lock().  See blk_mq_timeout_work() for details.
-	 *
-	 * Completion path never blocks and we can directly use RCU here
-	 * instead of hctx_lock() which can be either RCU or SRCU.
-	 * However, that would complicate paths which want to synchronize
-	 * against us.  Let stay in sync with the issue path so that
-	 * hctx_lock() covers both issue and completion paths.
-	 */
-	hctx_lock(hctx, &srcu_idx);
-	if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
-		__blk_mq_complete_request(rq);
-	hctx_unlock(hctx, srcu_idx);
+	__blk_mq_complete_request(rq);
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
@@ -669,32 +636,18 @@
 	trace_block_rq_issue(q, rq);
 
 	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
-		blk_stat_set_issue(&rq->issue_stat, blk_rq_sectors(rq));
+		rq->io_start_time_ns = ktime_get_ns();
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+		rq->throtl_size = blk_rq_sectors(rq);
+#endif
 		rq->rq_flags |= RQF_STATS;
-		wbt_issue(q->rq_wb, &rq->issue_stat);
+		wbt_issue(q->rq_wb, rq);
 	}
 
 	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);
 
-	/*
-	 * Mark @rq in-flight which also advances the generation number,
-	 * and register for timeout.  Protect with a seqcount to allow the
-	 * timeout path to read both @rq->gstate and @rq->deadline
-	 * coherently.
-	 *
-	 * This is the only place where a request is marked in-flight.  If
-	 * the timeout path reads an in-flight @rq->gstate, the
-	 * @rq->deadline it reads together under @rq->gstate_seq is
-	 * guaranteed to be the matching one.
-	 */
-	preempt_disable();
-	write_seqcount_begin(&rq->gstate_seq);
-
-	blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
 	blk_add_timer(rq);
-
-	write_seqcount_end(&rq->gstate_seq);
-	preempt_enable();
+	WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
 
 	if (q->dma_drain_size && blk_rq_bytes(rq)) {
 		/*
@@ -707,11 +660,6 @@
 }
 EXPORT_SYMBOL(blk_mq_start_request);
 
-/*
- * When we reach here because queue is busy, it's safe to change the state
- * to IDLE without checking @rq->aborted_gstate because we should still be
- * holding the RCU read lock and thus protected against timeout.
- */
 static void __blk_mq_requeue_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
@@ -719,10 +667,10 @@
 	blk_mq_put_driver_tag(rq);
 
 	trace_block_rq_requeue(q, rq);
-	wbt_requeue(q->rq_wb, &rq->issue_stat);
+	wbt_requeue(q->rq_wb, rq);
 
-	if (blk_mq_rq_state(rq) != MQ_RQ_IDLE) {
-		blk_mq_rq_update_state(rq, MQ_RQ_IDLE);
+	if (blk_mq_request_started(rq)) {
+		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
 		if (q->dma_drain_size && blk_rq_bytes(rq))
 			rq->nr_phys_segments--;
 	}
@@ -820,101 +768,79 @@
 }
 EXPORT_SYMBOL(blk_mq_tag_to_rq);
 
-struct blk_mq_timeout_data {
-	unsigned long next;
-	unsigned int next_set;
-	unsigned int nr_expired;
-};
-
 static void blk_mq_rq_timed_out(struct request *req, bool reserved)
 {
-	const struct blk_mq_ops *ops = req->q->mq_ops;
-	enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
+	if (req->q->mq_ops->timeout) {
+		enum blk_eh_timer_return ret;
 
-	req->rq_flags |= RQF_MQ_TIMEOUT_EXPIRED;
-
-	if (ops->timeout)
-		ret = ops->timeout(req, reserved);
-
-	switch (ret) {
-	case BLK_EH_HANDLED:
-		__blk_mq_complete_request(req);
-		break;
-	case BLK_EH_RESET_TIMER:
-		/*
-		 * As nothing prevents from completion happening while
-		 * ->aborted_gstate is set, this may lead to ignored
-		 * completions and further spurious timeouts.
-		 */
-		blk_mq_rq_update_aborted_gstate(req, 0);
-		blk_add_timer(req);
-		break;
-	case BLK_EH_NOT_HANDLED:
-		break;
-	default:
-		printk(KERN_ERR "block: bad eh return: %d\n", ret);
-		break;
+		ret = req->q->mq_ops->timeout(req, reserved);
+		if (ret == BLK_EH_DONE)
+			return;
+		WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
 	}
+
+	blk_add_timer(req);
+}
+
+static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
+{
+	unsigned long deadline;
+
+	if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
+		return false;
+
+	deadline = blk_rq_deadline(rq);
+	if (time_after_eq(jiffies, deadline))
+		return true;
+
+	if (*next == 0)
+		*next = deadline;
+	else if (time_after(*next, deadline))
+		*next = deadline;
+	return false;
 }
 
 static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 		struct request *rq, void *priv, bool reserved)
 {
-	struct blk_mq_timeout_data *data = priv;
-	unsigned long gstate, deadline;
-	int start;
+	unsigned long *next = priv;
 
-	might_sleep();
-
-	if (rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED)
+	/*
+	 * Just do a quick check if it is expired before locking the request in
+	 * so we're not unnecessarilly synchronizing across CPUs.
+	 */
+	if (!blk_mq_req_expired(rq, next))
 		return;
 
-	/* read coherent snapshots of @rq->state_gen and @rq->deadline */
-	while (true) {
-		start = read_seqcount_begin(&rq->gstate_seq);
-		gstate = READ_ONCE(rq->gstate);
-		deadline = blk_rq_deadline(rq);
-		if (!read_seqcount_retry(&rq->gstate_seq, start))
-			break;
-		cond_resched();
-	}
-
-	/* if in-flight && overdue, mark for abortion */
-	if ((gstate & MQ_RQ_STATE_MASK) == MQ_RQ_IN_FLIGHT &&
-	    time_after_eq(jiffies, deadline)) {
-		blk_mq_rq_update_aborted_gstate(rq, gstate);
-		data->nr_expired++;
-		hctx->nr_expired++;
-	} else if (!data->next_set || time_after(data->next, deadline)) {
-		data->next = deadline;
-		data->next_set = 1;
-	}
-}
-
-static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx,
-		struct request *rq, void *priv, bool reserved)
-{
 	/*
-	 * We marked @rq->aborted_gstate and waited for RCU.  If there were
-	 * completions that we lost to, they would have finished and
-	 * updated @rq->gstate by now; otherwise, the completion path is
-	 * now guaranteed to see @rq->aborted_gstate and yield.  If
-	 * @rq->aborted_gstate still matches @rq->gstate, @rq is ours.
+	 * We have reason to believe the request may be expired. Take a
+	 * reference on the request to lock this request lifetime into its
+	 * currently allocated context to prevent it from being reallocated in
+	 * the event the completion by-passes this timeout handler.
+	 *
+	 * If the reference was already released, then the driver beat the
+	 * timeout handler to posting a natural completion.
 	 */
-	if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) &&
-	    READ_ONCE(rq->gstate) == rq->aborted_gstate)
+	if (!refcount_inc_not_zero(&rq->ref))
+		return;
+
+	/*
+	 * The request is now locked and cannot be reallocated underneath the
+	 * timeout handler's processing. Re-verify this exact request is truly
+	 * expired; if it is not expired, then the request was completed and
+	 * reallocated as a new request.
+	 */
+	if (blk_mq_req_expired(rq, next))
 		blk_mq_rq_timed_out(rq, reserved);
+	if (refcount_dec_and_test(&rq->ref))
+		__blk_mq_free_request(rq);
 }
 
 static void blk_mq_timeout_work(struct work_struct *work)
 {
 	struct request_queue *q =
 		container_of(work, struct request_queue, timeout_work);
-	struct blk_mq_timeout_data data = {
-		.next		= 0,
-		.next_set	= 0,
-		.nr_expired	= 0,
-	};
+	unsigned long next = 0;
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
@@ -934,39 +860,10 @@
 	if (!percpu_ref_tryget(&q->q_usage_counter))
 		return;
 
-	/* scan for the expired ones and set their ->aborted_gstate */
-	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
+	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next);
 
-	if (data.nr_expired) {
-		bool has_rcu = false;
-
-		/*
-		 * Wait till everyone sees ->aborted_gstate.  The
-		 * sequential waits for SRCUs aren't ideal.  If this ever
-		 * becomes a problem, we can add per-hw_ctx rcu_head and
-		 * wait in parallel.
-		 */
-		queue_for_each_hw_ctx(q, hctx, i) {
-			if (!hctx->nr_expired)
-				continue;
-
-			if (!(hctx->flags & BLK_MQ_F_BLOCKING))
-				has_rcu = true;
-			else
-				synchronize_srcu(hctx->srcu);
-
-			hctx->nr_expired = 0;
-		}
-		if (has_rcu)
-			synchronize_rcu();
-
-		/* terminate the ones we won */
-		blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
-	}
-
-	if (data.next_set) {
-		data.next = blk_rq_timeout(round_jiffies_up(data.next));
-		mod_timer(&q->timeout, data.next);
+	if (next != 0) {
+		mod_timer(&q->timeout, next);
 	} else {
 		/*
 		 * Request timeouts are handled as a forward rolling timer. If
@@ -1029,7 +926,7 @@
 	struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
 
 	spin_lock(&ctx->lock);
-	if (unlikely(!list_empty(&ctx->rq_list))) {
+	if (!list_empty(&ctx->rq_list)) {
 		dispatch_data->rq = list_entry_rq(ctx->rq_list.next);
 		list_del_init(&dispatch_data->rq->queuelist);
 		if (list_empty(&ctx->rq_list))
@@ -1578,7 +1475,7 @@
 	 * If we are stopped, don't run the queue.
 	 */
 	if (test_bit(BLK_MQ_S_STOPPED, &hctx->state))
-		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
+		return;
 
 	__blk_mq_run_hw_queue(hctx);
 }
@@ -1716,15 +1613,6 @@
 	blk_account_io_start(rq, true);
 }
 
-static inline void blk_mq_queue_io(struct blk_mq_hw_ctx *hctx,
-				   struct blk_mq_ctx *ctx,
-				   struct request *rq)
-{
-	spin_lock(&ctx->lock);
-	__blk_mq_insert_request(hctx, rq, false);
-	spin_unlock(&ctx->lock);
-}
-
 static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
 {
 	if (rq->tag != -1)
@@ -1882,7 +1770,7 @@
 		return BLK_QC_T_NONE;
 	}
 
-	wbt_track(&rq->issue_stat, wb_acct);
+	wbt_track(rq, wb_acct);
 
 	cookie = request_to_qc_t(data.hctx, rq);
 
@@ -1949,15 +1837,10 @@
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
 		blk_mq_try_issue_directly(data.hctx, rq, &cookie);
-	} else if (q->elevator) {
-		blk_mq_put_ctx(data.ctx);
-		blk_mq_bio_to_request(rq, bio);
-		blk_mq_sched_insert_request(rq, false, true, true);
 	} else {
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
-		blk_mq_queue_io(data.hctx, data.ctx, rq);
-		blk_mq_run_hw_queue(data.hctx, true);
+		blk_mq_sched_insert_request(rq, false, true, true);
 	}
 
 	return cookie;
@@ -2056,15 +1939,7 @@
 			return ret;
 	}
 
-	seqcount_init(&rq->gstate_seq);
-	u64_stats_init(&rq->aborted_gstate_sync);
-	/*
-	 * start gstate with gen 1 instead of 0, otherwise it will be equal
-	 * to aborted_gstate, and be identified timed out by
-	 * blk_mq_terminate_expired.
-	 */
-	WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
-
+	WRITE_ONCE(rq->state, MQ_RQ_IDLE);
 	return 0;
 }
 
@@ -2365,6 +2240,7 @@
 	queue_for_each_hw_ctx(q, hctx, i) {
 		cpumask_clear(hctx->cpumask);
 		hctx->nr_ctx = 0;
+		hctx->dispatch_from = NULL;
 	}
 
 	/*
@@ -2697,7 +2573,7 @@
 	if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
 		int ret;
 
-		ret = blk_mq_sched_init(q);
+		ret = elevator_init_mq(q);
 		if (ret)
 			return ERR_PTR(ret);
 	}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index e1bb420..89231e4 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -30,20 +30,6 @@
 	struct kobject		kobj;
 } ____cacheline_aligned_in_smp;
 
-/*
- * Bits for request->gstate.  The lower two bits carry MQ_RQ_* state value
- * and the upper bits the generation number.
- */
-enum mq_rq_state {
-	MQ_RQ_IDLE		= 0,
-	MQ_RQ_IN_FLIGHT		= 1,
-	MQ_RQ_COMPLETE		= 2,
-
-	MQ_RQ_STATE_BITS	= 2,
-	MQ_RQ_STATE_MASK	= (1 << MQ_RQ_STATE_BITS) - 1,
-	MQ_RQ_GEN_INC		= 1 << MQ_RQ_STATE_BITS,
-};
-
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
@@ -107,33 +93,9 @@
  * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
  * @rq: target request.
  */
-static inline int blk_mq_rq_state(struct request *rq)
+static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
 {
-	return READ_ONCE(rq->gstate) & MQ_RQ_STATE_MASK;
-}
-
-/**
- * blk_mq_rq_update_state() - set the current MQ_RQ_* state of a request
- * @rq: target request.
- * @state: new state to set.
- *
- * Set @rq's state to @state.  The caller is responsible for ensuring that
- * there are no other updaters.  A request can transition into IN_FLIGHT
- * only from IDLE and doing so increments the generation number.
- */
-static inline void blk_mq_rq_update_state(struct request *rq,
-					  enum mq_rq_state state)
-{
-	u64 old_val = READ_ONCE(rq->gstate);
-	u64 new_val = (old_val & ~MQ_RQ_STATE_MASK) | state;
-
-	if (state == MQ_RQ_IN_FLIGHT) {
-		WARN_ON_ONCE((old_val & MQ_RQ_STATE_MASK) != MQ_RQ_IDLE);
-		new_val += MQ_RQ_GEN_INC;
-	}
-
-	/* avoid exposing interim values */
-	WRITE_ONCE(rq->gstate, new_val);
+	return READ_ONCE(rq->state);
 }
 
 static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
diff --git a/block/blk-stat.c b/block/blk-stat.c
index bd365a9..175c143 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -47,19 +47,15 @@
 	stat->nr_samples++;
 }
 
-void blk_stat_add(struct request *rq)
+void blk_stat_add(struct request *rq, u64 now)
 {
 	struct request_queue *q = rq->q;
 	struct blk_stat_callback *cb;
 	struct blk_rq_stat *stat;
 	int bucket;
-	u64 now, value;
+	u64 value;
 
-	now = __blk_stat_time(ktime_to_ns(ktime_get()));
-	if (now < blk_stat_time(&rq->issue_stat))
-		return;
-
-	value = now - blk_stat_time(&rq->issue_stat);
+	value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
 
 	blk_throtl_stat_add(rq, value);
 
diff --git a/block/blk-stat.h b/block/blk-stat.h
index 2dd3634..78399cd 100644
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -8,21 +8,6 @@
 #include <linux/rcupdate.h>
 #include <linux/timer.h>
 
-/*
- * from upper:
- * 3 bits: reserved for other usage
- * 12 bits: size
- * 49 bits: time
- */
-#define BLK_STAT_RES_BITS	3
-#define BLK_STAT_SIZE_BITS	12
-#define BLK_STAT_RES_SHIFT	(64 - BLK_STAT_RES_BITS)
-#define BLK_STAT_SIZE_SHIFT	(BLK_STAT_RES_SHIFT - BLK_STAT_SIZE_BITS)
-#define BLK_STAT_TIME_MASK	((1ULL << BLK_STAT_SIZE_SHIFT) - 1)
-#define BLK_STAT_SIZE_MASK	\
-	(((1ULL << BLK_STAT_SIZE_BITS) - 1) << BLK_STAT_SIZE_SHIFT)
-#define BLK_STAT_RES_MASK	(~((1ULL << BLK_STAT_RES_SHIFT) - 1))
-
 /**
  * struct blk_stat_callback - Block statistics callback.
  *
@@ -80,35 +65,7 @@
 struct blk_queue_stats *blk_alloc_queue_stats(void);
 void blk_free_queue_stats(struct blk_queue_stats *);
 
-void blk_stat_add(struct request *);
-
-static inline u64 __blk_stat_time(u64 time)
-{
-	return time & BLK_STAT_TIME_MASK;
-}
-
-static inline u64 blk_stat_time(struct blk_issue_stat *stat)
-{
-	return __blk_stat_time(stat->stat);
-}
-
-static inline sector_t blk_capped_size(sector_t size)
-{
-	return size & ((1ULL << BLK_STAT_SIZE_BITS) - 1);
-}
-
-static inline sector_t blk_stat_size(struct blk_issue_stat *stat)
-{
-	return (stat->stat & BLK_STAT_SIZE_MASK) >> BLK_STAT_SIZE_SHIFT;
-}
-
-static inline void blk_stat_set_issue(struct blk_issue_stat *stat,
-	sector_t size)
-{
-	stat->stat = (stat->stat & BLK_STAT_RES_MASK) |
-		(ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK) |
-		(((u64)blk_capped_size(size)) << BLK_STAT_SIZE_SHIFT);
-}
+void blk_stat_add(struct request *rq, u64 now);
 
 /* record time/size info in request but not add a callback */
 void blk_stat_enable_accounting(struct request_queue *q);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d00d1b0..94987b1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -491,188 +491,198 @@
 	return count;
 }
 
+static ssize_t queue_fua_show(struct request_queue *q, char *page)
+{
+	return sprintf(page, "%u\n", test_bit(QUEUE_FLAG_FUA, &q->queue_flags));
+}
+
 static ssize_t queue_dax_show(struct request_queue *q, char *page)
 {
 	return queue_var_show(blk_queue_dax(q), page);
 }
 
 static struct queue_sysfs_entry queue_requests_entry = {
-	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "nr_requests", .mode = 0644 },
 	.show = queue_requests_show,
 	.store = queue_requests_store,
 };
 
 static struct queue_sysfs_entry queue_ra_entry = {
-	.attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "read_ahead_kb", .mode = 0644 },
 	.show = queue_ra_show,
 	.store = queue_ra_store,
 };
 
 static struct queue_sysfs_entry queue_max_sectors_entry = {
-	.attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "max_sectors_kb", .mode = 0644 },
 	.show = queue_max_sectors_show,
 	.store = queue_max_sectors_store,
 };
 
 static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
-	.attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
+	.attr = {.name = "max_hw_sectors_kb", .mode = 0444 },
 	.show = queue_max_hw_sectors_show,
 };
 
 static struct queue_sysfs_entry queue_max_segments_entry = {
-	.attr = {.name = "max_segments", .mode = S_IRUGO },
+	.attr = {.name = "max_segments", .mode = 0444 },
 	.show = queue_max_segments_show,
 };
 
 static struct queue_sysfs_entry queue_max_discard_segments_entry = {
-	.attr = {.name = "max_discard_segments", .mode = S_IRUGO },
+	.attr = {.name = "max_discard_segments", .mode = 0444 },
 	.show = queue_max_discard_segments_show,
 };
 
 static struct queue_sysfs_entry queue_max_integrity_segments_entry = {
-	.attr = {.name = "max_integrity_segments", .mode = S_IRUGO },
+	.attr = {.name = "max_integrity_segments", .mode = 0444 },
 	.show = queue_max_integrity_segments_show,
 };
 
 static struct queue_sysfs_entry queue_max_segment_size_entry = {
-	.attr = {.name = "max_segment_size", .mode = S_IRUGO },
+	.attr = {.name = "max_segment_size", .mode = 0444 },
 	.show = queue_max_segment_size_show,
 };
 
 static struct queue_sysfs_entry queue_iosched_entry = {
-	.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "scheduler", .mode = 0644 },
 	.show = elv_iosched_show,
 	.store = elv_iosched_store,
 };
 
 static struct queue_sysfs_entry queue_hw_sector_size_entry = {
-	.attr = {.name = "hw_sector_size", .mode = S_IRUGO },
+	.attr = {.name = "hw_sector_size", .mode = 0444 },
 	.show = queue_logical_block_size_show,
 };
 
 static struct queue_sysfs_entry queue_logical_block_size_entry = {
-	.attr = {.name = "logical_block_size", .mode = S_IRUGO },
+	.attr = {.name = "logical_block_size", .mode = 0444 },
 	.show = queue_logical_block_size_show,
 };
 
 static struct queue_sysfs_entry queue_physical_block_size_entry = {
-	.attr = {.name = "physical_block_size", .mode = S_IRUGO },
+	.attr = {.name = "physical_block_size", .mode = 0444 },
 	.show = queue_physical_block_size_show,
 };
 
 static struct queue_sysfs_entry queue_chunk_sectors_entry = {
-	.attr = {.name = "chunk_sectors", .mode = S_IRUGO },
+	.attr = {.name = "chunk_sectors", .mode = 0444 },
 	.show = queue_chunk_sectors_show,
 };
 
 static struct queue_sysfs_entry queue_io_min_entry = {
-	.attr = {.name = "minimum_io_size", .mode = S_IRUGO },
+	.attr = {.name = "minimum_io_size", .mode = 0444 },
 	.show = queue_io_min_show,
 };
 
 static struct queue_sysfs_entry queue_io_opt_entry = {
-	.attr = {.name = "optimal_io_size", .mode = S_IRUGO },
+	.attr = {.name = "optimal_io_size", .mode = 0444 },
 	.show = queue_io_opt_show,
 };
 
 static struct queue_sysfs_entry queue_discard_granularity_entry = {
-	.attr = {.name = "discard_granularity", .mode = S_IRUGO },
+	.attr = {.name = "discard_granularity", .mode = 0444 },
 	.show = queue_discard_granularity_show,
 };
 
 static struct queue_sysfs_entry queue_discard_max_hw_entry = {
-	.attr = {.name = "discard_max_hw_bytes", .mode = S_IRUGO },
+	.attr = {.name = "discard_max_hw_bytes", .mode = 0444 },
 	.show = queue_discard_max_hw_show,
 };
 
 static struct queue_sysfs_entry queue_discard_max_entry = {
-	.attr = {.name = "discard_max_bytes", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "discard_max_bytes", .mode = 0644 },
 	.show = queue_discard_max_show,
 	.store = queue_discard_max_store,
 };
 
 static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
-	.attr = {.name = "discard_zeroes_data", .mode = S_IRUGO },
+	.attr = {.name = "discard_zeroes_data", .mode = 0444 },
 	.show = queue_discard_zeroes_data_show,
 };
 
 static struct queue_sysfs_entry queue_write_same_max_entry = {
-	.attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
+	.attr = {.name = "write_same_max_bytes", .mode = 0444 },
 	.show = queue_write_same_max_show,
 };
 
 static struct queue_sysfs_entry queue_write_zeroes_max_entry = {
-	.attr = {.name = "write_zeroes_max_bytes", .mode = S_IRUGO },
+	.attr = {.name = "write_zeroes_max_bytes", .mode = 0444 },
 	.show = queue_write_zeroes_max_show,
 };
 
 static struct queue_sysfs_entry queue_nonrot_entry = {
-	.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "rotational", .mode = 0644 },
 	.show = queue_show_nonrot,
 	.store = queue_store_nonrot,
 };
 
 static struct queue_sysfs_entry queue_zoned_entry = {
-	.attr = {.name = "zoned", .mode = S_IRUGO },
+	.attr = {.name = "zoned", .mode = 0444 },
 	.show = queue_zoned_show,
 };
 
 static struct queue_sysfs_entry queue_nomerges_entry = {
-	.attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "nomerges", .mode = 0644 },
 	.show = queue_nomerges_show,
 	.store = queue_nomerges_store,
 };
 
 static struct queue_sysfs_entry queue_rq_affinity_entry = {
-	.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "rq_affinity", .mode = 0644 },
 	.show = queue_rq_affinity_show,
 	.store = queue_rq_affinity_store,
 };
 
 static struct queue_sysfs_entry queue_iostats_entry = {
-	.attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "iostats", .mode = 0644 },
 	.show = queue_show_iostats,
 	.store = queue_store_iostats,
 };
 
 static struct queue_sysfs_entry queue_random_entry = {
-	.attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "add_random", .mode = 0644 },
 	.show = queue_show_random,
 	.store = queue_store_random,
 };
 
 static struct queue_sysfs_entry queue_poll_entry = {
-	.attr = {.name = "io_poll", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "io_poll", .mode = 0644 },
 	.show = queue_poll_show,
 	.store = queue_poll_store,
 };
 
 static struct queue_sysfs_entry queue_poll_delay_entry = {
-	.attr = {.name = "io_poll_delay", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "io_poll_delay", .mode = 0644 },
 	.show = queue_poll_delay_show,
 	.store = queue_poll_delay_store,
 };
 
 static struct queue_sysfs_entry queue_wc_entry = {
-	.attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "write_cache", .mode = 0644 },
 	.show = queue_wc_show,
 	.store = queue_wc_store,
 };
 
+static struct queue_sysfs_entry queue_fua_entry = {
+	.attr = {.name = "fua", .mode = 0444 },
+	.show = queue_fua_show,
+};
+
 static struct queue_sysfs_entry queue_dax_entry = {
-	.attr = {.name = "dax", .mode = S_IRUGO },
+	.attr = {.name = "dax", .mode = 0444 },
 	.show = queue_dax_show,
 };
 
 static struct queue_sysfs_entry queue_wb_lat_entry = {
-	.attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "wbt_lat_usec", .mode = 0644 },
 	.show = queue_wb_lat_show,
 	.store = queue_wb_lat_store,
 };
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 static struct queue_sysfs_entry throtl_sample_time_entry = {
-	.attr = {.name = "throttle_sample_time", .mode = S_IRUGO | S_IWUSR },
+	.attr = {.name = "throttle_sample_time", .mode = 0644 },
 	.show = blk_throtl_sample_time_show,
 	.store = blk_throtl_sample_time_store,
 };
@@ -708,6 +718,7 @@
 	&queue_random_entry.attr,
 	&queue_poll_entry.attr,
 	&queue_wc_entry.attr,
+	&queue_fua_entry.attr,
 	&queue_dax_entry.attr,
 	&queue_wb_lat_entry.attr,
 	&queue_poll_delay_entry.attr,
@@ -813,8 +824,7 @@
 	if (q->mq_ops)
 		blk_mq_debugfs_unregister(q);
 
-	if (q->bio_split)
-		bioset_free(q->bio_split);
+	bioset_exit(&q->bio_split);
 
 	ida_simple_remove(&blk_queue_ida, q->id);
 	call_rcu(&q->rcu_head, blk_free_queue_rcu);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index c5a1316..82282e6 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -36,8 +36,6 @@
  */
 #define LATENCY_FILTERED_HD (1000L) /* 1ms */
 
-#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
-
 static struct blkcg_policy blkcg_policy_throtl;
 
 /* A workqueue to queue throttle related work */
@@ -821,7 +819,7 @@
 	if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
 		return false;
 
-	return 1;
+	return true;
 }
 
 /* Trim the used slices and adjust slice start accordingly */
@@ -931,7 +929,7 @@
 
 	if (wait)
 		*wait = jiffy_wait;
-	return 0;
+	return false;
 }
 
 static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
@@ -974,7 +972,7 @@
 	jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed);
 	if (wait)
 		*wait = jiffy_wait;
-	return 0;
+	return false;
 }
 
 /*
@@ -1024,7 +1022,7 @@
 	    tg_with_in_iops_limit(tg, bio, &iops_wait)) {
 		if (wait)
 			*wait = 0;
-		return 1;
+		return true;
 	}
 
 	max_wait = max(bps_wait, iops_wait);
@@ -1035,7 +1033,7 @@
 	if (time_before(tg->slice_end[rw], jiffies + max_wait))
 		throtl_extend_slice(tg, rw, jiffies + max_wait);
 
-	return 0;
+	return false;
 }
 
 static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
@@ -1209,7 +1207,7 @@
 
 	while (1) {
 		struct throtl_grp *tg = throtl_rb_first(parent_sq);
-		struct throtl_service_queue *sq = &tg->service_queue;
+		struct throtl_service_queue *sq;
 
 		if (!tg)
 			break;
@@ -1221,6 +1219,7 @@
 
 		nr_disp += throtl_dispatch_tg(tg);
 
+		sq = &tg->service_queue;
 		if (sq->nr_queued[0] || sq->nr_queued[1])
 			tg_update_disptime(tg);
 
@@ -2139,7 +2138,7 @@
 		bio->bi_cg_private = tg;
 		blkg_get(tg_to_blkg(tg));
 	}
-	blk_stat_set_issue(&bio->bi_issue_stat, bio_sectors(bio));
+	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
 #endif
 }
 
@@ -2251,7 +2250,7 @@
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 	if (throttled || !td->track_bio_latency)
-		bio->bi_issue_stat.stat |= SKIP_LATENCY;
+		bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
 #endif
 	return throttled;
 }
@@ -2281,8 +2280,7 @@
 	struct request_queue *q = rq->q;
 	struct throtl_data *td = q->td;
 
-	throtl_track_latency(td, blk_stat_size(&rq->issue_stat),
-		req_op(rq), time_ns >> 10);
+	throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10);
 }
 
 void blk_throtl_bio_endio(struct bio *bio)
@@ -2302,8 +2300,8 @@
 	finish_time_ns = ktime_get_ns();
 	tg->last_finish_time = finish_time_ns >> 10;
 
-	start_time = blk_stat_time(&bio->bi_issue_stat) >> 10;
-	finish_time = __blk_stat_time(finish_time_ns) >> 10;
+	start_time = bio_issue_time(&bio->bi_issue) >> 10;
+	finish_time = __bio_issue_time(finish_time_ns) >> 10;
 	if (!start_time || finish_time <= start_time) {
 		blkg_put(tg_to_blkg(tg));
 		return;
@@ -2311,16 +2309,15 @@
 
 	lat = finish_time - start_time;
 	/* this is only for bio based driver */
-	if (!(bio->bi_issue_stat.stat & SKIP_LATENCY))
-		throtl_track_latency(tg->td, blk_stat_size(&bio->bi_issue_stat),
-			bio_op(bio), lat);
+	if (!(bio->bi_issue.value & BIO_ISSUE_THROTL_SKIP_LATENCY))
+		throtl_track_latency(tg->td, bio_issue_size(&bio->bi_issue),
+				     bio_op(bio), lat);
 
 	if (tg->latency_target && lat >= tg->td->filtered_latency) {
 		int bucket;
 		unsigned int threshold;
 
-		bucket = request_bucket_index(
-			blk_stat_size(&bio->bi_issue_stat));
+		bucket = request_bucket_index(bio_issue_size(&bio->bi_issue));
 		threshold = tg->td->avg_buckets[rw][bucket].latency +
 			tg->latency_target;
 		if (lat > threshold)
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 652d4d4..4b8a48d 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -86,14 +86,11 @@
 	if (q->rq_timed_out_fn)
 		ret = q->rq_timed_out_fn(req);
 	switch (ret) {
-	case BLK_EH_HANDLED:
-		__blk_complete_request(req);
-		break;
 	case BLK_EH_RESET_TIMER:
 		blk_add_timer(req);
 		blk_clear_rq_complete(req);
 		break;
-	case BLK_EH_NOT_HANDLED:
+	case BLK_EH_DONE:
 		/*
 		 * LLD handles this for now but in the future
 		 * we can send a request msg to abort the command
@@ -214,7 +211,6 @@
 		req->timeout = q->rq_timeout;
 
 	blk_rq_set_deadline(req, jiffies + req->timeout);
-	req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED;
 
 	/*
 	 * Only the non-mq case needs to add the request to a protected list.
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index f92fc84..4f89b28 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -29,6 +29,26 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/wbt.h>
 
+static inline void wbt_clear_state(struct request *rq)
+{
+	rq->wbt_flags = 0;
+}
+
+static inline enum wbt_flags wbt_flags(struct request *rq)
+{
+	return rq->wbt_flags;
+}
+
+static inline bool wbt_is_tracked(struct request *rq)
+{
+	return rq->wbt_flags & WBT_TRACKED;
+}
+
+static inline bool wbt_is_read(struct request *rq)
+{
+	return rq->wbt_flags & WBT_READ;
+}
+
 enum {
 	/*
 	 * Default setting, we'll scale up (to 75% of QD max) or down (min 1)
@@ -101,9 +121,15 @@
 	return time_before(jiffies, wb->dirty_sleep + HZ);
 }
 
-static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, bool is_kswapd)
+static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
+					  enum wbt_flags wb_acct)
 {
-	return &rwb->rq_wait[is_kswapd];
+	if (wb_acct & WBT_KSWAPD)
+		return &rwb->rq_wait[WBT_RWQ_KSWAPD];
+	else if (wb_acct & WBT_DISCARD)
+		return &rwb->rq_wait[WBT_RWQ_DISCARD];
+
+	return &rwb->rq_wait[WBT_RWQ_BG];
 }
 
 static void rwb_wake_all(struct rq_wb *rwb)
@@ -126,7 +152,7 @@
 	if (!(wb_acct & WBT_TRACKED))
 		return;
 
-	rqw = get_rq_wait(rwb, wb_acct & WBT_KSWAPD);
+	rqw = get_rq_wait(rwb, wb_acct);
 	inflight = atomic_dec_return(&rqw->inflight);
 
 	/*
@@ -139,10 +165,13 @@
 	}
 
 	/*
-	 * If the device does write back caching, drop further down
-	 * before we wake people up.
+	 * For discards, our limit is always the background. For writes, if
+	 * the device does write back caching, drop further down before we
+	 * wake people up.
 	 */
-	if (rwb->wc && !wb_recent_wait(rwb))
+	if (wb_acct & WBT_DISCARD)
+		limit = rwb->wb_background;
+	else if (rwb->wc && !wb_recent_wait(rwb))
 		limit = 0;
 	else
 		limit = rwb->wb_normal;
@@ -165,24 +194,24 @@
  * Called on completion of a request. Note that it's also called when
  * a request is merged, when the request gets freed.
  */
-void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)
+void wbt_done(struct rq_wb *rwb, struct request *rq)
 {
 	if (!rwb)
 		return;
 
-	if (!wbt_is_tracked(stat)) {
-		if (rwb->sync_cookie == stat) {
+	if (!wbt_is_tracked(rq)) {
+		if (rwb->sync_cookie == rq) {
 			rwb->sync_issue = 0;
 			rwb->sync_cookie = NULL;
 		}
 
-		if (wbt_is_read(stat))
+		if (wbt_is_read(rq))
 			wb_timestamp(rwb, &rwb->last_comp);
 	} else {
-		WARN_ON_ONCE(stat == rwb->sync_cookie);
-		__wbt_done(rwb, wbt_stat_to_mask(stat));
+		WARN_ON_ONCE(rq == rwb->sync_cookie);
+		__wbt_done(rwb, wbt_flags(rq));
 	}
-	wbt_clear_state(stat);
+	wbt_clear_state(rq);
 }
 
 /*
@@ -479,6 +508,9 @@
 {
 	unsigned int limit;
 
+	if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD)
+		return rwb->wb_background;
+
 	/*
 	 * At this point we know it's a buffered write. If this is
 	 * kswapd trying to free memory, or REQ_SYNC is set, then
@@ -529,11 +561,12 @@
  * Block if we will exceed our limit, or if we are currently waiting for
  * the timer to kick off queuing again.
  */
-static void __wbt_wait(struct rq_wb *rwb, unsigned long rw, spinlock_t *lock)
+static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
+		       unsigned long rw, spinlock_t *lock)
 	__releases(lock)
 	__acquires(lock)
 {
-	struct rq_wait *rqw = get_rq_wait(rwb, current_is_kswapd());
+	struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
 	DEFINE_WAIT(wait);
 
 	if (may_queue(rwb, rqw, &wait, rw))
@@ -559,21 +592,20 @@
 
 static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
 {
-	const int op = bio_op(bio);
-
-	/*
-	 * If not a WRITE, do nothing
-	 */
-	if (op != REQ_OP_WRITE)
+	switch (bio_op(bio)) {
+	case REQ_OP_WRITE:
+		/*
+		 * Don't throttle WRITE_ODIRECT
+		 */
+		if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) ==
+		    (REQ_SYNC | REQ_IDLE))
+			return false;
+		/* fallthrough */
+	case REQ_OP_DISCARD:
+		return true;
+	default:
 		return false;
-
-	/*
-	 * Don't throttle WRITE_ODIRECT
-	 */
-	if ((bio->bi_opf & (REQ_SYNC | REQ_IDLE)) == (REQ_SYNC | REQ_IDLE))
-		return false;
-
-	return true;
+	}
 }
 
 /*
@@ -584,7 +616,7 @@
  */
 enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
 {
-	unsigned int ret = 0;
+	enum wbt_flags ret = 0;
 
 	if (!rwb_enabled(rwb))
 		return 0;
@@ -598,41 +630,42 @@
 		return ret;
 	}
 
-	__wbt_wait(rwb, bio->bi_opf, lock);
+	if (current_is_kswapd())
+		ret |= WBT_KSWAPD;
+	if (bio_op(bio) == REQ_OP_DISCARD)
+		ret |= WBT_DISCARD;
+
+	__wbt_wait(rwb, ret, bio->bi_opf, lock);
 
 	if (!blk_stat_is_active(rwb->cb))
 		rwb_arm_timer(rwb);
 
-	if (current_is_kswapd())
-		ret |= WBT_KSWAPD;
-
 	return ret | WBT_TRACKED;
 }
 
-void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+void wbt_issue(struct rq_wb *rwb, struct request *rq)
 {
 	if (!rwb_enabled(rwb))
 		return;
 
 	/*
-	 * Track sync issue, in case it takes a long time to complete. Allows
-	 * us to react quicker, if a sync IO takes a long time to complete.
-	 * Note that this is just a hint. 'stat' can go away when the
-	 * request completes, so it's important we never dereference it. We
-	 * only use the address to compare with, which is why we store the
-	 * sync_issue time locally.
+	 * Track sync issue, in case it takes a long time to complete. Allows us
+	 * to react quicker, if a sync IO takes a long time to complete. Note
+	 * that this is just a hint. The request can go away when it completes,
+	 * so it's important we never dereference it. We only use the address to
+	 * compare with, which is why we store the sync_issue time locally.
 	 */
-	if (wbt_is_read(stat) && !rwb->sync_issue) {
-		rwb->sync_cookie = stat;
-		rwb->sync_issue = blk_stat_time(stat);
+	if (wbt_is_read(rq) && !rwb->sync_issue) {
+		rwb->sync_cookie = rq;
+		rwb->sync_issue = rq->io_start_time_ns;
 	}
 }
 
-void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+void wbt_requeue(struct rq_wb *rwb, struct request *rq)
 {
 	if (!rwb_enabled(rwb))
 		return;
-	if (stat == rwb->sync_cookie) {
+	if (rq == rwb->sync_cookie) {
 		rwb->sync_issue = 0;
 		rwb->sync_cookie = NULL;
 	}
@@ -701,7 +734,7 @@
 
 	if (op == REQ_OP_READ)
 		return READ;
-	else if (op == REQ_OP_WRITE || op == REQ_OP_FLUSH)
+	else if (op_is_write(op))
 		return WRITE;
 
 	/* don't account */
@@ -713,8 +746,6 @@
 	struct rq_wb *rwb;
 	int i;
 
-	BUILD_BUG_ON(WBT_NR_BITS > BLK_STAT_RES_BITS);
-
 	rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
 	if (!rwb)
 		return -ENOMEM;
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index a232c98..300df53 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -14,12 +14,16 @@
 	WBT_TRACKED		= 1,	/* write, tracked for throttling */
 	WBT_READ		= 2,	/* read */
 	WBT_KSWAPD		= 4,	/* write, from kswapd */
+	WBT_DISCARD		= 8,	/* discard */
 
-	WBT_NR_BITS		= 3,	/* number of bits */
+	WBT_NR_BITS		= 4,	/* number of bits */
 };
 
 enum {
-	WBT_NUM_RWQ		= 2,
+	WBT_RWQ_BG		= 0,
+	WBT_RWQ_KSWAPD,
+	WBT_RWQ_DISCARD,
+	WBT_NUM_RWQ,
 };
 
 /*
@@ -31,31 +35,6 @@
 	WBT_STATE_ON_MANUAL	= 2,
 };
 
-static inline void wbt_clear_state(struct blk_issue_stat *stat)
-{
-	stat->stat &= ~BLK_STAT_RES_MASK;
-}
-
-static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat)
-{
-	return (stat->stat & BLK_STAT_RES_MASK) >> BLK_STAT_RES_SHIFT;
-}
-
-static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags wb_acct)
-{
-	stat->stat |= ((u64) wb_acct) << BLK_STAT_RES_SHIFT;
-}
-
-static inline bool wbt_is_tracked(struct blk_issue_stat *stat)
-{
-	return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_TRACKED;
-}
-
-static inline bool wbt_is_read(struct blk_issue_stat *stat)
-{
-	return (stat->stat >> BLK_STAT_RES_SHIFT) & WBT_READ;
-}
-
 struct rq_wait {
 	wait_queue_head_t wait;
 	atomic_t inflight;
@@ -84,7 +63,7 @@
 
 	struct blk_stat_callback *cb;
 
-	s64 sync_issue;
+	u64 sync_issue;
 	void *sync_cookie;
 
 	unsigned int wc;
@@ -109,14 +88,19 @@
 
 #ifdef CONFIG_BLK_WBT
 
+static inline void wbt_track(struct request *rq, enum wbt_flags flags)
+{
+	rq->wbt_flags |= flags;
+}
+
 void __wbt_done(struct rq_wb *, enum wbt_flags);
-void wbt_done(struct rq_wb *, struct blk_issue_stat *);
+void wbt_done(struct rq_wb *, struct request *);
 enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *);
 int wbt_init(struct request_queue *);
 void wbt_exit(struct request_queue *);
 void wbt_update_limits(struct rq_wb *);
-void wbt_requeue(struct rq_wb *, struct blk_issue_stat *);
-void wbt_issue(struct rq_wb *, struct blk_issue_stat *);
+void wbt_requeue(struct rq_wb *, struct request *);
+void wbt_issue(struct rq_wb *, struct request *);
 void wbt_disable_default(struct request_queue *);
 void wbt_enable_default(struct request_queue *);
 
@@ -127,10 +111,13 @@
 
 #else
 
+static inline void wbt_track(struct request *rq, enum wbt_flags flags)
+{
+}
 static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags)
 {
 }
-static inline void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)
+static inline void wbt_done(struct rq_wb *rwb, struct request *rq)
 {
 }
 static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio,
@@ -148,10 +135,10 @@
 static inline void wbt_update_limits(struct rq_wb *rwb)
 {
 }
-static inline void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+static inline void wbt_requeue(struct rq_wb *rwb, struct request *rq)
 {
 }
-static inline void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+static inline void wbt_issue(struct rq_wb *rwb, struct request *rq)
 {
 }
 static inline void wbt_disable_default(struct request_queue *q)
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 08e84ef..3d08dc84 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -328,7 +328,11 @@
 	if (!rep.nr_zones)
 		return -EINVAL;
 
-	zones = kcalloc(rep.nr_zones, sizeof(struct blk_zone), GFP_KERNEL);
+	if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone))
+		return -ERANGE;
+
+	zones = kvmalloc(rep.nr_zones * sizeof(struct blk_zone),
+			GFP_KERNEL | __GFP_ZERO);
 	if (!zones)
 		return -ENOMEM;
 
@@ -350,7 +354,7 @@
 	}
 
  out:
-	kfree(zones);
+	kvfree(zones);
 
 	return ret;
 }
diff --git a/block/blk.h b/block/blk.h
index b034fd2..8d23aea9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -186,7 +186,7 @@
 
 void blk_account_io_start(struct request *req, bool new_io);
 void blk_account_io_completion(struct request *req, unsigned int bytes);
-void blk_account_io_done(struct request *req);
+void blk_account_io_done(struct request *req, u64 now);
 
 /*
  * EH timer and IO completion will both attempt to 'grab' the request, make
@@ -231,6 +231,9 @@
 		e->type->ops.sq.elevator_deactivate_req_fn(q, rq);
 }
 
+int elevator_init(struct request_queue *);
+int elevator_init_mq(struct request_queue *q);
+void elevator_exit(struct request_queue *, struct elevator_queue *);
 int elv_register_queue(struct request_queue *q);
 void elv_unregister_queue(struct request_queue *q);
 
diff --git a/block/bounce.c b/block/bounce.c
index dd0b93f..fd31347 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -28,28 +28,29 @@
 #define POOL_SIZE	64
 #define ISA_POOL_SIZE	16
 
-static struct bio_set *bounce_bio_set, *bounce_bio_split;
-static mempool_t *page_pool, *isa_page_pool;
+static struct bio_set bounce_bio_set, bounce_bio_split;
+static mempool_t page_pool, isa_page_pool;
 
 #if defined(CONFIG_HIGHMEM)
 static __init int init_emergency_pool(void)
 {
+	int ret;
 #if defined(CONFIG_HIGHMEM) && !defined(CONFIG_MEMORY_HOTPLUG)
 	if (max_pfn <= max_low_pfn)
 		return 0;
 #endif
 
-	page_pool = mempool_create_page_pool(POOL_SIZE, 0);
-	BUG_ON(!page_pool);
+	ret = mempool_init_page_pool(&page_pool, POOL_SIZE, 0);
+	BUG_ON(ret);
 	pr_info("pool size: %d pages\n", POOL_SIZE);
 
-	bounce_bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-	BUG_ON(!bounce_bio_set);
-	if (bioset_integrity_create(bounce_bio_set, BIO_POOL_SIZE))
+	ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+	BUG_ON(ret);
+	if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE))
 		BUG_ON(1);
 
-	bounce_bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
-	BUG_ON(!bounce_bio_split);
+	ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0);
+	BUG_ON(ret);
 
 	return 0;
 }
@@ -63,14 +64,11 @@
  */
 static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
 {
-	unsigned long flags;
 	unsigned char *vto;
 
-	local_irq_save(flags);
 	vto = kmap_atomic(to->bv_page);
 	memcpy(vto + to->bv_offset, vfrom, to->bv_len);
 	kunmap_atomic(vto);
-	local_irq_restore(flags);
 }
 
 #else /* CONFIG_HIGHMEM */
@@ -94,12 +92,14 @@
  */
 int init_emergency_isa_pool(void)
 {
-	if (isa_page_pool)
+	int ret;
+
+	if (mempool_initialized(&isa_page_pool))
 		return 0;
 
-	isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
-				       mempool_free_pages, (void *) 0);
-	BUG_ON(!isa_page_pool);
+	ret = mempool_init(&isa_page_pool, ISA_POOL_SIZE, mempool_alloc_pages_isa,
+			   mempool_free_pages, (void *) 0);
+	BUG_ON(ret);
 
 	pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE);
 	return 0;
@@ -166,13 +166,13 @@
 
 static void bounce_end_io_write(struct bio *bio)
 {
-	bounce_end_io(bio, page_pool);
+	bounce_end_io(bio, &page_pool);
 }
 
 static void bounce_end_io_write_isa(struct bio *bio)
 {
 
-	bounce_end_io(bio, isa_page_pool);
+	bounce_end_io(bio, &isa_page_pool);
 }
 
 static void __bounce_end_io_read(struct bio *bio, mempool_t *pool)
@@ -187,12 +187,12 @@
 
 static void bounce_end_io_read(struct bio *bio)
 {
-	__bounce_end_io_read(bio, page_pool);
+	__bounce_end_io_read(bio, &page_pool);
 }
 
 static void bounce_end_io_read_isa(struct bio *bio)
 {
-	__bounce_end_io_read(bio, isa_page_pool);
+	__bounce_end_io_read(bio, &isa_page_pool);
 }
 
 static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
@@ -217,13 +217,13 @@
 		return;
 
 	if (!passthrough && sectors < bio_sectors(*bio_orig)) {
-		bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
+		bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
 		bio_chain(bio, *bio_orig);
 		generic_make_request(*bio_orig);
 		*bio_orig = bio;
 	}
 	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
-			bounce_bio_set);
+			&bounce_bio_set);
 
 	bio_for_each_segment_all(to, bio, i) {
 		struct page *page = to->bv_page;
@@ -250,7 +250,7 @@
 
 	bio->bi_flags |= (1 << BIO_BOUNCED);
 
-	if (pool == page_pool) {
+	if (pool == &page_pool) {
 		bio->bi_end_io = bounce_end_io_write;
 		if (rw == READ)
 			bio->bi_end_io = bounce_end_io_read;
@@ -282,10 +282,10 @@
 	if (!(q->bounce_gfp & GFP_DMA)) {
 		if (q->limits.bounce_pfn >= blk_max_pfn)
 			return;
-		pool = page_pool;
+		pool = &page_pool;
 	} else {
-		BUG_ON(!isa_page_pool);
-		pool = isa_page_pool;
+		BUG_ON(!mempool_initialized(&isa_page_pool));
+		pool = &isa_page_pool;
 	}
 
 	/*
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index fc2e5ff..9419def 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -303,11 +303,9 @@
  * @name: device to give bsg device
  * @job_fn: bsg job handler
  * @dd_job_size: size of LLD data needed for each job
- * @release: @dev release function
  */
 struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
-		bsg_job_fn *job_fn, int dd_job_size,
-		void (*release)(struct device *))
+		bsg_job_fn *job_fn, int dd_job_size)
 {
 	struct request_queue *q;
 	int ret;
@@ -331,7 +329,7 @@
 	blk_queue_softirq_done(q, bsg_softirq_done);
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
-	ret = bsg_register_queue(q, dev, name, &bsg_transport_ops, release);
+	ret = bsg_register_queue(q, dev, name, &bsg_transport_ops);
 	if (ret) {
 		printk(KERN_ERR "%s: bsg interface failed to "
 		       "initialize - register queue\n", dev->kobj.name);
diff --git a/block/bsg.c b/block/bsg.c
index defa06c..132e657 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -226,8 +226,7 @@
 		return ERR_PTR(ret);
 
 	rq = blk_get_request(q, hdr->dout_xfer_len ?
-			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
-			GFP_KERNEL);
+			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
 	if (IS_ERR(rq))
 		return rq;
 
@@ -249,7 +248,7 @@
 			goto out;
 		}
 
-		next_rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
+		next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
 		if (IS_ERR(next_rq)) {
 			ret = PTR_ERR(next_rq);
 			goto out;
@@ -650,18 +649,6 @@
 	return bd;
 }
 
-static void bsg_kref_release_function(struct kref *kref)
-{
-	struct bsg_class_device *bcd =
-		container_of(kref, struct bsg_class_device, ref);
-	struct device *parent = bcd->parent;
-
-	if (bcd->release)
-		bcd->release(bcd->parent);
-
-	put_device(parent);
-}
-
 static int bsg_put_device(struct bsg_device *bd)
 {
 	int ret = 0, do_free;
@@ -694,7 +681,6 @@
 
 	kfree(bd);
 out:
-	kref_put(&q->bsg_dev.ref, bsg_kref_release_function);
 	if (do_free)
 		blk_put_queue(q);
 	return ret;
@@ -760,8 +746,6 @@
 	 */
 	mutex_lock(&bsg_mutex);
 	bcd = idr_find(&bsg_minor_idr, iminor(inode));
-	if (bcd)
-		kref_get(&bcd->ref);
 	mutex_unlock(&bsg_mutex);
 
 	if (!bcd)
@@ -772,8 +756,6 @@
 		return bd;
 
 	bd = bsg_add_device(inode, bcd->queue, file);
-	if (IS_ERR(bd))
-		kref_put(&bcd->ref, bsg_kref_release_function);
 
 	return bd;
 }
@@ -913,25 +895,17 @@
 		sysfs_remove_link(&q->kobj, "bsg");
 	device_unregister(bcd->class_dev);
 	bcd->class_dev = NULL;
-	kref_put(&bcd->ref, bsg_kref_release_function);
 	mutex_unlock(&bsg_mutex);
 }
 EXPORT_SYMBOL_GPL(bsg_unregister_queue);
 
 int bsg_register_queue(struct request_queue *q, struct device *parent,
-		const char *name, const struct bsg_ops *ops,
-		void (*release)(struct device *))
+		const char *name, const struct bsg_ops *ops)
 {
 	struct bsg_class_device *bcd;
 	dev_t dev;
 	int ret;
 	struct device *class_dev = NULL;
-	const char *devname;
-
-	if (name)
-		devname = name;
-	else
-		devname = dev_name(parent);
 
 	/*
 	 * we need a proper transport to send commands, not a stacked device
@@ -955,15 +929,12 @@
 
 	bcd->minor = ret;
 	bcd->queue = q;
-	bcd->parent = get_device(parent);
-	bcd->release = release;
 	bcd->ops = ops;
-	kref_init(&bcd->ref);
 	dev = MKDEV(bsg_major, bcd->minor);
-	class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname);
+	class_dev = device_create(bsg_class, parent, dev, NULL, "%s", name);
 	if (IS_ERR(class_dev)) {
 		ret = PTR_ERR(class_dev);
-		goto put_dev;
+		goto idr_remove;
 	}
 	bcd->class_dev = class_dev;
 
@@ -978,8 +949,7 @@
 
 unregister_class_dev:
 	device_unregister(class_dev);
-put_dev:
-	put_device(parent);
+idr_remove:
 	idr_remove(&bsg_minor_idr, bcd->minor);
 unlock:
 	mutex_unlock(&bsg_mutex);
@@ -993,7 +963,7 @@
 		return -EINVAL;
 	}
 
-	return bsg_register_queue(q, parent, NULL, &bsg_scsi_ops, NULL);
+	return bsg_register_queue(q, parent, dev_name(parent), &bsg_scsi_ops);
 }
 EXPORT_SYMBOL_GPL(bsg_scsi_register_queue);
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 9f342ef..82b6c27 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -210,9 +210,9 @@
 	/* total time with empty current active q with other requests queued */
 	struct blkg_stat		empty_time;
 	/* fields after this shouldn't be cleared on stat reset */
-	uint64_t			start_group_wait_time;
-	uint64_t			start_idle_time;
-	uint64_t			start_empty_time;
+	u64				start_group_wait_time;
+	u64				start_idle_time;
+	u64				start_empty_time;
 	uint16_t			flags;
 #endif	/* CONFIG_DEBUG_BLK_CGROUP */
 #endif	/* CONFIG_CFQ_GROUP_IOSCHED */
@@ -491,13 +491,13 @@
 /* This should be called with the queue_lock held. */
 static void cfqg_stats_update_group_wait_time(struct cfqg_stats *stats)
 {
-	unsigned long long now;
+	u64 now;
 
 	if (!cfqg_stats_waiting(stats))
 		return;
 
-	now = sched_clock();
-	if (time_after64(now, stats->start_group_wait_time))
+	now = ktime_get_ns();
+	if (now > stats->start_group_wait_time)
 		blkg_stat_add(&stats->group_wait_time,
 			      now - stats->start_group_wait_time);
 	cfqg_stats_clear_waiting(stats);
@@ -513,20 +513,20 @@
 		return;
 	if (cfqg == curr_cfqg)
 		return;
-	stats->start_group_wait_time = sched_clock();
+	stats->start_group_wait_time = ktime_get_ns();
 	cfqg_stats_mark_waiting(stats);
 }
 
 /* This should be called with the queue_lock held. */
 static void cfqg_stats_end_empty_time(struct cfqg_stats *stats)
 {
-	unsigned long long now;
+	u64 now;
 
 	if (!cfqg_stats_empty(stats))
 		return;
 
-	now = sched_clock();
-	if (time_after64(now, stats->start_empty_time))
+	now = ktime_get_ns();
+	if (now > stats->start_empty_time)
 		blkg_stat_add(&stats->empty_time,
 			      now - stats->start_empty_time);
 	cfqg_stats_clear_empty(stats);
@@ -552,7 +552,7 @@
 	if (cfqg_stats_empty(stats))
 		return;
 
-	stats->start_empty_time = sched_clock();
+	stats->start_empty_time = ktime_get_ns();
 	cfqg_stats_mark_empty(stats);
 }
 
@@ -561,9 +561,9 @@
 	struct cfqg_stats *stats = &cfqg->stats;
 
 	if (cfqg_stats_idling(stats)) {
-		unsigned long long now = sched_clock();
+		u64 now = ktime_get_ns();
 
-		if (time_after64(now, stats->start_idle_time))
+		if (now > stats->start_idle_time)
 			blkg_stat_add(&stats->idle_time,
 				      now - stats->start_idle_time);
 		cfqg_stats_clear_idling(stats);
@@ -576,7 +576,7 @@
 
 	BUG_ON(cfqg_stats_idling(stats));
 
-	stats->start_idle_time = sched_clock();
+	stats->start_idle_time = ktime_get_ns();
 	cfqg_stats_mark_idling(stats);
 }
 
@@ -701,17 +701,19 @@
 }
 
 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
-			uint64_t start_time, uint64_t io_start_time,
-			unsigned int op)
+						u64 start_time_ns,
+						u64 io_start_time_ns,
+						unsigned int op)
 {
 	struct cfqg_stats *stats = &cfqg->stats;
-	unsigned long long now = sched_clock();
+	u64 now = ktime_get_ns();
 
-	if (time_after64(now, io_start_time))
-		blkg_rwstat_add(&stats->service_time, op, now - io_start_time);
-	if (time_after64(io_start_time, start_time))
+	if (now > io_start_time_ns)
+		blkg_rwstat_add(&stats->service_time, op,
+				now - io_start_time_ns);
+	if (io_start_time_ns > start_time_ns)
 		blkg_rwstat_add(&stats->wait_time, op,
-				io_start_time - start_time);
+				io_start_time_ns - start_time_ns);
 }
 
 /* @stats = 0 */
@@ -797,8 +799,9 @@
 static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg,
 			unsigned int op) { }
 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
-			uint64_t start_time, uint64_t io_start_time,
-			unsigned int op) { }
+						u64 start_time_ns,
+						u64 io_start_time_ns,
+						unsigned int op) { }
 
 #endif	/* CONFIG_CFQ_GROUP_IOSCHED */
 
@@ -4225,8 +4228,8 @@
 	cfqd->rq_in_driver--;
 	cfqq->dispatched--;
 	(RQ_CFQG(rq))->dispatched--;
-	cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
-				     rq_io_start_time_ns(rq), rq->cmd_flags);
+	cfqg_stats_update_completion(cfqq->cfqg, rq->start_time_ns,
+				     rq->io_start_time_ns, rq->cmd_flags);
 
 	cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
 
@@ -4242,16 +4245,7 @@
 					cfqq_type(cfqq));
 
 		st->ttime.last_end_request = now;
-		/*
-		 * We have to do this check in jiffies since start_time is in
-		 * jiffies and it is not trivial to convert to ns. If
-		 * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test
-		 * will become problematic but so far we are fine (the default
-		 * is 128 ms).
-		 */
-		if (!time_after(rq->start_time +
-				  nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]),
-				jiffies))
+		if (rq->start_time_ns + cfqd->cfq_fifo_expire[1] <= now)
 			cfqd->last_delayed_sync = now;
 	}
 
@@ -4792,7 +4786,7 @@
 #undef USEC_STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
-	__ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
+	__ATTR(name, 0644, cfq_##name##_show, cfq_##name##_store)
 
 static struct elv_fs_entry cfq_attrs[] = {
 	CFQ_ATTR(quantum),
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 9de9f15..ef2f1f0 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -512,8 +512,7 @@
 #undef STORE_FUNCTION
 
 #define DD_ATTR(name) \
-	__ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
-				      deadline_##name##_store)
+	__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
 
 static struct elv_fs_entry deadline_attrs[] = {
 	DD_ATTR(read_expire),
diff --git a/block/elevator.c b/block/elevator.c
index e87e9b4..fa828b5 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -199,76 +199,46 @@
 	kfree(e);
 }
 
-int elevator_init(struct request_queue *q, char *name)
+/*
+ * Use the default elevator specified by config boot param for non-mq devices,
+ * or by config option.  Don't try to load modules as we could be running off
+ * async and request_module() isn't allowed from async.
+ */
+int elevator_init(struct request_queue *q)
 {
 	struct elevator_type *e = NULL;
-	int err;
+	int err = 0;
 
 	/*
 	 * q->sysfs_lock must be held to provide mutual exclusion between
 	 * elevator_switch() and here.
 	 */
-	lockdep_assert_held(&q->sysfs_lock);
-
+	mutex_lock(&q->sysfs_lock);
 	if (unlikely(q->elevator))
-		return 0;
+		goto out_unlock;
 
-	INIT_LIST_HEAD(&q->queue_head);
-	q->last_merge = NULL;
-	q->end_sector = 0;
-	q->boundary_rq = NULL;
-
-	if (name) {
-		e = elevator_get(q, name, true);
-		if (!e)
-			return -EINVAL;
-	}
-
-	/*
-	 * Use the default elevator specified by config boot param for
-	 * non-mq devices, or by config option. Don't try to load modules
-	 * as we could be running off async and request_module() isn't
-	 * allowed from async.
-	 */
-	if (!e && !q->mq_ops && *chosen_elevator) {
+	if (*chosen_elevator) {
 		e = elevator_get(q, chosen_elevator, false);
 		if (!e)
 			printk(KERN_ERR "I/O scheduler %s not found\n",
 							chosen_elevator);
 	}
 
+	if (!e)
+		e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
 	if (!e) {
-		/*
-		 * For blk-mq devices, we default to using mq-deadline,
-		 * if available, for single queue devices. If deadline
-		 * isn't available OR we have multiple queues, default
-		 * to "none".
-		 */
-		if (q->mq_ops) {
-			if (q->nr_hw_queues == 1)
-				e = elevator_get(q, "mq-deadline", false);
-			if (!e)
-				return 0;
-		} else
-			e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
-
-		if (!e) {
-			printk(KERN_ERR
-				"Default I/O scheduler not found. " \
-				"Using noop.\n");
-			e = elevator_get(q, "noop", false);
-		}
+		printk(KERN_ERR
+			"Default I/O scheduler not found. Using noop.\n");
+		e = elevator_get(q, "noop", false);
 	}
 
-	if (e->uses_mq)
-		err = blk_mq_init_sched(q, e);
-	else
-		err = e->ops.sq.elevator_init_fn(q, e);
+	err = e->ops.sq.elevator_init_fn(q, e);
 	if (err)
 		elevator_put(e);
+out_unlock:
+	mutex_unlock(&q->sysfs_lock);
 	return err;
 }
-EXPORT_SYMBOL(elevator_init);
 
 void elevator_exit(struct request_queue *q, struct elevator_queue *e)
 {
@@ -281,7 +251,6 @@
 
 	kobject_put(&e->kobj);
 }
-EXPORT_SYMBOL(elevator_exit);
 
 static inline void __elv_rqhash_del(struct request *rq)
 {
@@ -1005,6 +974,40 @@
 }
 
 /*
+ * For blk-mq devices, we default to using mq-deadline, if available, for single
+ * queue devices.  If deadline isn't available OR we have multiple queues,
+ * default to "none".
+ */
+int elevator_init_mq(struct request_queue *q)
+{
+	struct elevator_type *e;
+	int err = 0;
+
+	if (q->nr_hw_queues != 1)
+		return 0;
+
+	/*
+	 * q->sysfs_lock must be held to provide mutual exclusion between
+	 * elevator_switch() and here.
+	 */
+	mutex_lock(&q->sysfs_lock);
+	if (unlikely(q->elevator))
+		goto out_unlock;
+
+	e = elevator_get(q, "mq-deadline", false);
+	if (!e)
+		goto out_unlock;
+
+	err = blk_mq_init_sched(q, e);
+	if (err)
+		elevator_put(e);
+out_unlock:
+	mutex_unlock(&q->sysfs_lock);
+	return err;
+}
+
+
+/*
  * switch to new_e io scheduler. be careful not to introduce deadlocks -
  * we don't free the old io scheduler, before we have allocated what we
  * need for the new one. this way we have a chance of going back to the old
diff --git a/block/genhd.c b/block/genhd.c
index c4513fe..f1543a4 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1027,18 +1027,6 @@
 	.stop	= disk_seqf_stop,
 	.show	= show_partition
 };
-
-static int partitions_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &partitions_op);
-}
-
-static const struct file_operations proc_partitions_operations = {
-	.open		= partitions_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
 #endif
 
 
@@ -1139,28 +1127,25 @@
 	return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
 }
 
-static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
-static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
-static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
-static DEVICE_ATTR(hidden, S_IRUGO, disk_hidden_show, NULL);
-static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
-static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
-static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
-static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
-		   NULL);
-static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
-static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
-static DEVICE_ATTR(badblocks, S_IRUGO | S_IWUSR, disk_badblocks_show,
-		disk_badblocks_store);
+static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
+static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
+static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
+static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
+static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
+static DEVICE_ATTR(size, 0444, part_size_show, NULL);
+static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
+static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
+static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
+static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
+static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
-	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
+	__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
 #endif
 #ifdef CONFIG_FAIL_IO_TIMEOUT
 static struct device_attribute dev_attr_fail_timeout =
-	__ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
-		part_timeout_store);
+	__ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
 #endif
 
 static struct attribute *disk_attrs[] = {
@@ -1377,22 +1362,10 @@
 	.show	= diskstats_show
 };
 
-static int diskstats_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &diskstats_op);
-}
-
-static const struct file_operations proc_diskstats_operations = {
-	.open		= diskstats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static int __init proc_genhd_init(void)
 {
-	proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
-	proc_create("partitions", 0, NULL, &proc_partitions_operations);
+	proc_create_seq("diskstats", 0, NULL, &diskstats_op);
+	proc_create_seq("partitions", 0, NULL, &partitions_op);
 	return 0;
 }
 module_init(proc_genhd_init);
@@ -1924,9 +1897,9 @@
 	return count;
 }
 
-static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
-static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
-static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
+static const DEVICE_ATTR(events, 0444, disk_events_show, NULL);
+static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
+static const DEVICE_ATTR(events_poll_msecs, 0644,
 			 disk_events_poll_msecs_show,
 			 disk_events_poll_msecs_store);
 
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 0d6d25e3..a1660ba 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -72,6 +72,19 @@
 	[KYBER_OTHER] = 8,
 };
 
+/*
+ * There is a same mapping between ctx & hctx and kcq & khd,
+ * we use request->mq_ctx->index_hw to index the kcq in khd.
+ */
+struct kyber_ctx_queue {
+	/*
+	 * Used to ensure operations on rq_list and kcq_map to be an atmoic one.
+	 * Also protect the rqs on rq_list when merge.
+	 */
+	spinlock_t lock;
+	struct list_head rq_list[KYBER_NUM_DOMAINS];
+} ____cacheline_aligned_in_smp;
+
 struct kyber_queue_data {
 	struct request_queue *q;
 
@@ -99,6 +112,8 @@
 	struct list_head rqs[KYBER_NUM_DOMAINS];
 	unsigned int cur_domain;
 	unsigned int batching;
+	struct kyber_ctx_queue *kcqs;
+	struct sbitmap kcq_map[KYBER_NUM_DOMAINS];
 	wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
 	struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
 	atomic_t wait_index[KYBER_NUM_DOMAINS];
@@ -107,10 +122,8 @@
 static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
 			     void *key);
 
-static int rq_sched_domain(const struct request *rq)
+static unsigned int kyber_sched_domain(unsigned int op)
 {
-	unsigned int op = rq->cmd_flags;
-
 	if ((op & REQ_OP_MASK) == REQ_OP_READ)
 		return KYBER_READ;
 	else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op))
@@ -284,6 +297,11 @@
 	return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
 }
 
+static int kyber_bucket_fn(const struct request *rq)
+{
+	return kyber_sched_domain(rq->cmd_flags);
+}
+
 static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
 {
 	struct kyber_queue_data *kqd;
@@ -297,7 +315,7 @@
 		goto err;
 	kqd->q = q;
 
-	kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, rq_sched_domain,
+	kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, kyber_bucket_fn,
 					  KYBER_NUM_DOMAINS, kqd);
 	if (!kqd->cb)
 		goto err_kqd;
@@ -376,8 +394,18 @@
 	kfree(kqd);
 }
 
+static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
+{
+	unsigned int i;
+
+	spin_lock_init(&kcq->lock);
+	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
+		INIT_LIST_HEAD(&kcq->rq_list[i]);
+}
+
 static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 {
+	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
 	struct kyber_hctx_data *khd;
 	int i;
 
@@ -385,6 +413,24 @@
 	if (!khd)
 		return -ENOMEM;
 
+	khd->kcqs = kmalloc_array_node(hctx->nr_ctx,
+				       sizeof(struct kyber_ctx_queue),
+				       GFP_KERNEL, hctx->numa_node);
+	if (!khd->kcqs)
+		goto err_khd;
+
+	for (i = 0; i < hctx->nr_ctx; i++)
+		kyber_ctx_queue_init(&khd->kcqs[i]);
+
+	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
+		if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx,
+				      ilog2(8), GFP_KERNEL, hctx->numa_node)) {
+			while (--i >= 0)
+				sbitmap_free(&khd->kcq_map[i]);
+			goto err_kcqs;
+		}
+	}
+
 	spin_lock_init(&khd->lock);
 
 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
@@ -400,12 +446,26 @@
 	khd->batching = 0;
 
 	hctx->sched_data = khd;
+	sbitmap_queue_min_shallow_depth(&hctx->sched_tags->bitmap_tags,
+					kqd->async_depth);
 
 	return 0;
+
+err_kcqs:
+	kfree(khd->kcqs);
+err_khd:
+	kfree(khd);
+	return -ENOMEM;
 }
 
 static void kyber_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 {
+	struct kyber_hctx_data *khd = hctx->sched_data;
+	int i;
+
+	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
+		sbitmap_free(&khd->kcq_map[i]);
+	kfree(khd->kcqs);
 	kfree(hctx->sched_data);
 }
 
@@ -427,7 +487,7 @@
 
 	nr = rq_get_domain_token(rq);
 	if (nr != -1) {
-		sched_domain = rq_sched_domain(rq);
+		sched_domain = kyber_sched_domain(rq->cmd_flags);
 		sbitmap_queue_clear(&kqd->domain_tokens[sched_domain], nr,
 				    rq->mq_ctx->cpu);
 	}
@@ -446,11 +506,51 @@
 	}
 }
 
+static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
+{
+	struct kyber_hctx_data *khd = hctx->sched_data;
+	struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue);
+	struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw];
+	unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
+	struct list_head *rq_list = &kcq->rq_list[sched_domain];
+	bool merged;
+
+	spin_lock(&kcq->lock);
+	merged = blk_mq_bio_list_merge(hctx->queue, rq_list, bio);
+	spin_unlock(&kcq->lock);
+	blk_mq_put_ctx(ctx);
+
+	return merged;
+}
+
 static void kyber_prepare_request(struct request *rq, struct bio *bio)
 {
 	rq_set_domain_token(rq, -1);
 }
 
+static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
+				  struct list_head *rq_list, bool at_head)
+{
+	struct kyber_hctx_data *khd = hctx->sched_data;
+	struct request *rq, *next;
+
+	list_for_each_entry_safe(rq, next, rq_list, queuelist) {
+		unsigned int sched_domain = kyber_sched_domain(rq->cmd_flags);
+		struct kyber_ctx_queue *kcq = &khd->kcqs[rq->mq_ctx->index_hw];
+		struct list_head *head = &kcq->rq_list[sched_domain];
+
+		spin_lock(&kcq->lock);
+		if (at_head)
+			list_move(&rq->queuelist, head);
+		else
+			list_move_tail(&rq->queuelist, head);
+		sbitmap_set_bit(&khd->kcq_map[sched_domain],
+				rq->mq_ctx->index_hw);
+		blk_mq_sched_request_inserted(rq);
+		spin_unlock(&kcq->lock);
+	}
+}
+
 static void kyber_finish_request(struct request *rq)
 {
 	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
@@ -469,7 +569,7 @@
 	 * Check if this request met our latency goal. If not, quickly gather
 	 * some statistics and start throttling.
 	 */
-	sched_domain = rq_sched_domain(rq);
+	sched_domain = kyber_sched_domain(rq->cmd_flags);
 	switch (sched_domain) {
 	case KYBER_READ:
 		target = kqd->read_lat_nsec;
@@ -485,29 +585,48 @@
 	if (blk_stat_is_active(kqd->cb))
 		return;
 
-	now = __blk_stat_time(ktime_to_ns(ktime_get()));
-	if (now < blk_stat_time(&rq->issue_stat))
+	now = ktime_get_ns();
+	if (now < rq->io_start_time_ns)
 		return;
 
-	latency = now - blk_stat_time(&rq->issue_stat);
+	latency = now - rq->io_start_time_ns;
 
 	if (latency > target)
 		blk_stat_activate_msecs(kqd->cb, 10);
 }
 
-static void kyber_flush_busy_ctxs(struct kyber_hctx_data *khd,
-				  struct blk_mq_hw_ctx *hctx)
+struct flush_kcq_data {
+	struct kyber_hctx_data *khd;
+	unsigned int sched_domain;
+	struct list_head *list;
+};
+
+static bool flush_busy_kcq(struct sbitmap *sb, unsigned int bitnr, void *data)
 {
-	LIST_HEAD(rq_list);
-	struct request *rq, *next;
+	struct flush_kcq_data *flush_data = data;
+	struct kyber_ctx_queue *kcq = &flush_data->khd->kcqs[bitnr];
 
-	blk_mq_flush_busy_ctxs(hctx, &rq_list);
-	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
-		unsigned int sched_domain;
+	spin_lock(&kcq->lock);
+	list_splice_tail_init(&kcq->rq_list[flush_data->sched_domain],
+			      flush_data->list);
+	sbitmap_clear_bit(sb, bitnr);
+	spin_unlock(&kcq->lock);
 
-		sched_domain = rq_sched_domain(rq);
-		list_move_tail(&rq->queuelist, &khd->rqs[sched_domain]);
-	}
+	return true;
+}
+
+static void kyber_flush_busy_kcqs(struct kyber_hctx_data *khd,
+				  unsigned int sched_domain,
+				  struct list_head *list)
+{
+	struct flush_kcq_data data = {
+		.khd = khd,
+		.sched_domain = sched_domain,
+		.list = list,
+	};
+
+	sbitmap_for_each_set(&khd->kcq_map[sched_domain],
+			     flush_busy_kcq, &data);
 }
 
 static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
@@ -570,26 +689,23 @@
 static struct request *
 kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
 			  struct kyber_hctx_data *khd,
-			  struct blk_mq_hw_ctx *hctx,
-			  bool *flushed)
+			  struct blk_mq_hw_ctx *hctx)
 {
 	struct list_head *rqs;
 	struct request *rq;
 	int nr;
 
 	rqs = &khd->rqs[khd->cur_domain];
-	rq = list_first_entry_or_null(rqs, struct request, queuelist);
 
 	/*
-	 * If there wasn't already a pending request and we haven't flushed the
-	 * software queues yet, flush the software queues and check again.
+	 * If we already have a flushed request, then we just need to get a
+	 * token for it. Otherwise, if there are pending requests in the kcqs,
+	 * flush the kcqs, but only if we can get a token. If not, we should
+	 * leave the requests in the kcqs so that they can be merged. Note that
+	 * khd->lock serializes the flushes, so if we observed any bit set in
+	 * the kcq_map, we will always get a request.
 	 */
-	if (!rq && !*flushed) {
-		kyber_flush_busy_ctxs(khd, hctx);
-		*flushed = true;
-		rq = list_first_entry_or_null(rqs, struct request, queuelist);
-	}
-
+	rq = list_first_entry_or_null(rqs, struct request, queuelist);
 	if (rq) {
 		nr = kyber_get_domain_token(kqd, khd, hctx);
 		if (nr >= 0) {
@@ -598,6 +714,16 @@
 			list_del_init(&rq->queuelist);
 			return rq;
 		}
+	} else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) {
+		nr = kyber_get_domain_token(kqd, khd, hctx);
+		if (nr >= 0) {
+			kyber_flush_busy_kcqs(khd, khd->cur_domain, rqs);
+			rq = list_first_entry(rqs, struct request, queuelist);
+			khd->batching++;
+			rq_set_domain_token(rq, nr);
+			list_del_init(&rq->queuelist);
+			return rq;
+		}
 	}
 
 	/* There were either no pending requests or no tokens. */
@@ -608,7 +734,6 @@
 {
 	struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
 	struct kyber_hctx_data *khd = hctx->sched_data;
-	bool flushed = false;
 	struct request *rq;
 	int i;
 
@@ -619,7 +744,7 @@
 	 * from the batch.
 	 */
 	if (khd->batching < kyber_batch_size[khd->cur_domain]) {
-		rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
+		rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
 		if (rq)
 			goto out;
 	}
@@ -640,7 +765,7 @@
 		else
 			khd->cur_domain++;
 
-		rq = kyber_dispatch_cur_domain(kqd, khd, hctx, &flushed);
+		rq = kyber_dispatch_cur_domain(kqd, khd, hctx);
 		if (rq)
 			goto out;
 	}
@@ -657,10 +782,12 @@
 	int i;
 
 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
-		if (!list_empty_careful(&khd->rqs[i]))
+		if (!list_empty_careful(&khd->rqs[i]) ||
+		    sbitmap_any_bit_set(&khd->kcq_map[i]))
 			return true;
 	}
-	return sbitmap_any_bit_set(&hctx->ctx_map);
+
+	return false;
 }
 
 #define KYBER_LAT_SHOW_STORE(op)					\
@@ -831,7 +958,9 @@
 		.init_hctx = kyber_init_hctx,
 		.exit_hctx = kyber_exit_hctx,
 		.limit_depth = kyber_limit_depth,
+		.bio_merge = kyber_bio_merge,
 		.prepare_request = kyber_prepare_request,
+		.insert_requests = kyber_insert_requests,
 		.finish_request = kyber_finish_request,
 		.requeue_request = kyber_finish_request,
 		.completed_request = kyber_completed_request,
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 8ec0ba9..099a9e05 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -630,8 +630,7 @@
 #undef STORE_FUNCTION
 
 #define DD_ATTR(name) \
-	__ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
-				      deadline_##name##_store)
+	__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
 
 static struct elv_fs_entry deadline_attrs[] = {
 	DD_ATTR(read_expire),
diff --git a/block/partition-generic.c b/block/partition-generic.c
index db57cce..3dcfd4e 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -179,18 +179,17 @@
 }
 #endif
 
-static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
-static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
-static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
-static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
-static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
-static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
-		   NULL);
-static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
+static DEVICE_ATTR(start, 0444, part_start_show, NULL);
+static DEVICE_ATTR(size, 0444, part_size_show, NULL);
+static DEVICE_ATTR(ro, 0444, part_ro_show, NULL);
+static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL);
+static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL);
+static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
-	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
+	__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
 #endif
 
 static struct attribute *part_attrs[] = {
@@ -291,8 +290,7 @@
 {
 	return 0;
 }
-static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
-		   whole_disk_show, NULL);
+static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
 
 /*
  * Must be called either with bd_mutex held, before a disk can be opened or
@@ -518,7 +516,7 @@
 
 	if (disk->fops->revalidate_disk)
 		disk->fops->revalidate_disk(disk);
-	check_disk_size_change(disk, bdev);
+	check_disk_size_change(disk, bdev, true);
 	bdev->bd_invalidated = 0;
 	if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
 		return 0;
@@ -643,7 +641,7 @@
 		return res;
 
 	set_capacity(disk, 0);
-	check_disk_size_change(disk, bdev);
+	check_disk_size_change(disk, bdev, false);
 	bdev->bd_invalidated = 0;
 	/* tell userspace that the media / partition table may have changed */
 	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 60b471f..533f4ae 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -321,8 +321,7 @@
 		at_head = 1;
 
 	ret = -ENOMEM;
-	rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
-			GFP_KERNEL);
+	rq = blk_get_request(q, writing ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 	req = scsi_req(rq);
@@ -449,8 +448,7 @@
 
 	}
 
-	rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
-			__GFP_RECLAIM);
+	rq = blk_get_request(q, in_len ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
 	if (IS_ERR(rq)) {
 		err = PTR_ERR(rq);
 		goto error_free_buffer;
@@ -501,7 +499,7 @@
 		break;
 	}
 
-	if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_RECLAIM)) {
+	if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, GFP_NOIO)) {
 		err = DRIVER_ERROR << 24;
 		goto error;
 	}
@@ -538,7 +536,7 @@
 	struct request *rq;
 	int err;
 
-	rq = blk_get_request(q, REQ_OP_SCSI_OUT, __GFP_RECLAIM);
+	rq = blk_get_request(q, REQ_OP_SCSI_OUT, 0);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 76e8c88..f3e40ac 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -289,6 +289,107 @@
 	  with the Poly1305 authenticator. It is defined in RFC7539 for use in
 	  IETF protocols.
 
+config CRYPTO_AEGIS128
+	tristate "AEGIS-128 AEAD algorithm"
+	select CRYPTO_AEAD
+	select CRYPTO_AES  # for AES S-box tables
+	help
+	 Support for the AEGIS-128 dedicated AEAD algorithm.
+
+config CRYPTO_AEGIS128L
+	tristate "AEGIS-128L AEAD algorithm"
+	select CRYPTO_AEAD
+	select CRYPTO_AES  # for AES S-box tables
+	help
+	 Support for the AEGIS-128L dedicated AEAD algorithm.
+
+config CRYPTO_AEGIS256
+	tristate "AEGIS-256 AEAD algorithm"
+	select CRYPTO_AEAD
+	select CRYPTO_AES  # for AES S-box tables
+	help
+	 Support for the AEGIS-256 dedicated AEAD algorithm.
+
+config CRYPTO_AEGIS128_AESNI_SSE2
+	tristate "AEGIS-128 AEAD algorithm (x86_64 AESNI+SSE2 implementation)"
+	depends on X86 && 64BIT
+	select CRYPTO_AEAD
+	select CRYPTO_CRYPTD
+	help
+	 AESNI+SSE2 implementation of the AEGSI-128 dedicated AEAD algorithm.
+
+config CRYPTO_AEGIS128L_AESNI_SSE2
+	tristate "AEGIS-128L AEAD algorithm (x86_64 AESNI+SSE2 implementation)"
+	depends on X86 && 64BIT
+	select CRYPTO_AEAD
+	select CRYPTO_CRYPTD
+	help
+	 AESNI+SSE2 implementation of the AEGSI-128L dedicated AEAD algorithm.
+
+config CRYPTO_AEGIS256_AESNI_SSE2
+	tristate "AEGIS-256 AEAD algorithm (x86_64 AESNI+SSE2 implementation)"
+	depends on X86 && 64BIT
+	select CRYPTO_AEAD
+	select CRYPTO_CRYPTD
+	help
+	 AESNI+SSE2 implementation of the AEGSI-256 dedicated AEAD algorithm.
+
+config CRYPTO_MORUS640
+	tristate "MORUS-640 AEAD algorithm"
+	select CRYPTO_AEAD
+	help
+	  Support for the MORUS-640 dedicated AEAD algorithm.
+
+config CRYPTO_MORUS640_GLUE
+	tristate
+	depends on X86
+	select CRYPTO_AEAD
+	select CRYPTO_CRYPTD
+	help
+	  Common glue for SIMD optimizations of the MORUS-640 dedicated AEAD
+	  algorithm.
+
+config CRYPTO_MORUS640_SSE2
+	tristate "MORUS-640 AEAD algorithm (x86_64 SSE2 implementation)"
+	depends on X86 && 64BIT
+	select CRYPTO_AEAD
+	select CRYPTO_MORUS640_GLUE
+	help
+	  SSE2 implementation of the MORUS-640 dedicated AEAD algorithm.
+
+config CRYPTO_MORUS1280
+	tristate "MORUS-1280 AEAD algorithm"
+	select CRYPTO_AEAD
+	help
+	  Support for the MORUS-1280 dedicated AEAD algorithm.
+
+config CRYPTO_MORUS1280_GLUE
+	tristate
+	depends on X86
+	select CRYPTO_AEAD
+	select CRYPTO_CRYPTD
+	help
+	  Common glue for SIMD optimizations of the MORUS-1280 dedicated AEAD
+	  algorithm.
+
+config CRYPTO_MORUS1280_SSE2
+	tristate "MORUS-1280 AEAD algorithm (x86_64 SSE2 implementation)"
+	depends on X86 && 64BIT
+	select CRYPTO_AEAD
+	select CRYPTO_MORUS1280_GLUE
+	help
+	  SSE2 optimizedimplementation of the MORUS-1280 dedicated AEAD
+	  algorithm.
+
+config CRYPTO_MORUS1280_AVX2
+	tristate "MORUS-1280 AEAD algorithm (x86_64 AVX2 implementation)"
+	depends on X86 && 64BIT
+	select CRYPTO_AEAD
+	select CRYPTO_MORUS1280_GLUE
+	help
+	  AVX2 optimized implementation of the MORUS-1280 dedicated AEAD
+	  algorithm.
+
 config CRYPTO_SEQIV
 	tristate "Sequence Number IV Generator"
 	select CRYPTO_AEAD
@@ -1335,34 +1436,6 @@
 	  The Salsa20 stream cipher algorithm is designed by Daniel J.
 	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
 
-config CRYPTO_SALSA20_586
-	tristate "Salsa20 stream cipher algorithm (i586)"
-	depends on (X86 || UML_X86) && !64BIT
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_SALSA20
-	help
-	  Salsa20 stream cipher algorithm.
-
-	  Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
-	  Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
-
-	  The Salsa20 stream cipher algorithm is designed by Daniel J.
-	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
-
-config CRYPTO_SALSA20_X86_64
-	tristate "Salsa20 stream cipher algorithm (x86_64)"
-	depends on (X86 || UML_X86) && 64BIT
-	select CRYPTO_BLKCIPHER
-	select CRYPTO_SALSA20
-	help
-	  Salsa20 stream cipher algorithm.
-
-	  Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
-	  Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
-
-	  The Salsa20 stream cipher algorithm is designed by Daniel J.
-	  Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
-
 config CRYPTO_CHACHA20
 	tristate "ChaCha20 cipher algorithm"
 	select CRYPTO_BLKCIPHER
@@ -1695,6 +1768,15 @@
 	help
 	  This is the LZ4 high compression mode algorithm.
 
+config CRYPTO_ZSTD
+	tristate "Zstd compression algorithm"
+	select CRYPTO_ALGAPI
+	select CRYPTO_ACOMP2
+	select ZSTD_COMPRESS
+	select ZSTD_DECOMPRESS
+	help
+	  This is the zstd algorithm.
+
 comment "Random Number Generation"
 
 config CRYPTO_ANSI_CPRNG
diff --git a/crypto/Makefile b/crypto/Makefile
index 3a5f016..6d1d40e 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -86,6 +86,11 @@
 obj-$(CONFIG_CRYPTO_GCM) += gcm.o
 obj-$(CONFIG_CRYPTO_CCM) += ccm.o
 obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o
+obj-$(CONFIG_CRYPTO_AEGIS128) += aegis128.o
+obj-$(CONFIG_CRYPTO_AEGIS128L) += aegis128l.o
+obj-$(CONFIG_CRYPTO_AEGIS256) += aegis256.o
+obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o
+obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o
 obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
 obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
 obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o
@@ -137,6 +142,7 @@
 obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
 obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
 obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
+obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
 
 ecdh_generic-y := ecc.o
 ecdh_generic-y += ecdh.o
diff --git a/crypto/aegis.h b/crypto/aegis.h
new file mode 100644
index 0000000..f1c6900
--- /dev/null
+++ b/crypto/aegis.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AEGIS common definitions
+ *
+ * Copyright (c) 2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (c) 2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef _CRYPTO_AEGIS_H
+#define _CRYPTO_AEGIS_H
+
+#include <crypto/aes.h>
+#include <linux/types.h>
+
+#define AEGIS_BLOCK_SIZE 16
+
+union aegis_block {
+	__le64 words64[AEGIS_BLOCK_SIZE / sizeof(__le64)];
+	u32 words32[AEGIS_BLOCK_SIZE / sizeof(u32)];
+	u8 bytes[AEGIS_BLOCK_SIZE];
+};
+
+#define AEGIS_BLOCK_ALIGN (__alignof__(union aegis_block))
+#define AEGIS_ALIGNED(p) IS_ALIGNED((uintptr_t)p, AEGIS_BLOCK_ALIGN)
+
+static const union aegis_block crypto_aegis_const[2] = {
+	{ .words64 = {
+		cpu_to_le64(U64_C(0x0d08050302010100)),
+		cpu_to_le64(U64_C(0x6279e99059372215)),
+	} },
+	{ .words64 = {
+		cpu_to_le64(U64_C(0xf12fc26d55183ddb)),
+		cpu_to_le64(U64_C(0xdd28b57342311120)),
+	} },
+};
+
+static void crypto_aegis_block_xor(union aegis_block *dst,
+				   const union aegis_block *src)
+{
+	dst->words64[0] ^= src->words64[0];
+	dst->words64[1] ^= src->words64[1];
+}
+
+static void crypto_aegis_block_and(union aegis_block *dst,
+				   const union aegis_block *src)
+{
+	dst->words64[0] &= src->words64[0];
+	dst->words64[1] &= src->words64[1];
+}
+
+static void crypto_aegis_aesenc(union aegis_block *dst,
+				const union aegis_block *src,
+				const union aegis_block *key)
+{
+	u32 *d = dst->words32;
+	const u8  *s  = src->bytes;
+	const u32 *k  = key->words32;
+	const u32 *t0 = crypto_ft_tab[0];
+	const u32 *t1 = crypto_ft_tab[1];
+	const u32 *t2 = crypto_ft_tab[2];
+	const u32 *t3 = crypto_ft_tab[3];
+	u32 d0, d1, d2, d3;
+
+	d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]] ^ k[0];
+	d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]] ^ k[1];
+	d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]] ^ k[2];
+	d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]] ^ k[3];
+
+	d[0] = d0;
+	d[1] = d1;
+	d[2] = d2;
+	d[3] = d3;
+}
+
+#endif /* _CRYPTO_AEGIS_H */
diff --git a/crypto/aegis128.c b/crypto/aegis128.c
new file mode 100644
index 0000000..3827130
--- /dev/null
+++ b/crypto/aegis128.c
@@ -0,0 +1,463 @@
+/*
+ * The AEGIS-128 Authenticated-Encryption Algorithm
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+
+#include "aegis.h"
+
+#define AEGIS128_NONCE_SIZE 16
+#define AEGIS128_STATE_BLOCKS 5
+#define AEGIS128_KEY_SIZE 16
+#define AEGIS128_MIN_AUTH_SIZE 8
+#define AEGIS128_MAX_AUTH_SIZE 16
+
+struct aegis_state {
+	union aegis_block blocks[AEGIS128_STATE_BLOCKS];
+};
+
+struct aegis_ctx {
+	union aegis_block key;
+};
+
+struct aegis128_ops {
+	int (*skcipher_walk_init)(struct skcipher_walk *walk,
+				  struct aead_request *req, bool atomic);
+
+	void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
+			    const u8 *src, unsigned int size);
+};
+
+static void crypto_aegis128_update(struct aegis_state *state)
+{
+	union aegis_block tmp;
+	unsigned int i;
+
+	tmp = state->blocks[AEGIS128_STATE_BLOCKS - 1];
+	for (i = AEGIS128_STATE_BLOCKS - 1; i > 0; i--)
+		crypto_aegis_aesenc(&state->blocks[i], &state->blocks[i - 1],
+				    &state->blocks[i]);
+	crypto_aegis_aesenc(&state->blocks[0], &tmp, &state->blocks[0]);
+}
+
+static void crypto_aegis128_update_a(struct aegis_state *state,
+				     const union aegis_block *msg)
+{
+	crypto_aegis128_update(state);
+	crypto_aegis_block_xor(&state->blocks[0], msg);
+}
+
+static void crypto_aegis128_update_u(struct aegis_state *state, const void *msg)
+{
+	crypto_aegis128_update(state);
+	crypto_xor(state->blocks[0].bytes, msg, AEGIS_BLOCK_SIZE);
+}
+
+static void crypto_aegis128_init(struct aegis_state *state,
+				 const union aegis_block *key,
+				 const u8 *iv)
+{
+	union aegis_block key_iv;
+	unsigned int i;
+
+	key_iv = *key;
+	crypto_xor(key_iv.bytes, iv, AEGIS_BLOCK_SIZE);
+
+	state->blocks[0] = key_iv;
+	state->blocks[1] = crypto_aegis_const[1];
+	state->blocks[2] = crypto_aegis_const[0];
+	state->blocks[3] = *key;
+	state->blocks[4] = *key;
+
+	crypto_aegis_block_xor(&state->blocks[3], &crypto_aegis_const[0]);
+	crypto_aegis_block_xor(&state->blocks[4], &crypto_aegis_const[1]);
+
+	for (i = 0; i < 5; i++) {
+		crypto_aegis128_update_a(state, key);
+		crypto_aegis128_update_a(state, &key_iv);
+	}
+}
+
+static void crypto_aegis128_ad(struct aegis_state *state,
+			       const u8 *src, unsigned int size)
+{
+	if (AEGIS_ALIGNED(src)) {
+		const union aegis_block *src_blk =
+				(const union aegis_block *)src;
+
+		while (size >= AEGIS_BLOCK_SIZE) {
+			crypto_aegis128_update_a(state, src_blk);
+
+			size -= AEGIS_BLOCK_SIZE;
+			src_blk++;
+		}
+	} else {
+		while (size >= AEGIS_BLOCK_SIZE) {
+			crypto_aegis128_update_u(state, src);
+
+			size -= AEGIS_BLOCK_SIZE;
+			src += AEGIS_BLOCK_SIZE;
+		}
+	}
+}
+
+static void crypto_aegis128_encrypt_chunk(struct aegis_state *state, u8 *dst,
+					  const u8 *src, unsigned int size)
+{
+	union aegis_block tmp;
+
+	if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
+		while (size >= AEGIS_BLOCK_SIZE) {
+			union aegis_block *dst_blk =
+					(union aegis_block *)dst;
+			const union aegis_block *src_blk =
+					(const union aegis_block *)src;
+
+			tmp = state->blocks[2];
+			crypto_aegis_block_and(&tmp, &state->blocks[3]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[4]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[1]);
+			crypto_aegis_block_xor(&tmp, src_blk);
+
+			crypto_aegis128_update_a(state, src_blk);
+
+			*dst_blk = tmp;
+
+			size -= AEGIS_BLOCK_SIZE;
+			src += AEGIS_BLOCK_SIZE;
+			dst += AEGIS_BLOCK_SIZE;
+		}
+	} else {
+		while (size >= AEGIS_BLOCK_SIZE) {
+			tmp = state->blocks[2];
+			crypto_aegis_block_and(&tmp, &state->blocks[3]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[4]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[1]);
+			crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
+
+			crypto_aegis128_update_u(state, src);
+
+			memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
+
+			size -= AEGIS_BLOCK_SIZE;
+			src += AEGIS_BLOCK_SIZE;
+			dst += AEGIS_BLOCK_SIZE;
+		}
+	}
+
+	if (size > 0) {
+		union aegis_block msg = {};
+		memcpy(msg.bytes, src, size);
+
+		tmp = state->blocks[2];
+		crypto_aegis_block_and(&tmp, &state->blocks[3]);
+		crypto_aegis_block_xor(&tmp, &state->blocks[4]);
+		crypto_aegis_block_xor(&tmp, &state->blocks[1]);
+
+		crypto_aegis128_update_a(state, &msg);
+
+		crypto_aegis_block_xor(&msg, &tmp);
+
+		memcpy(dst, msg.bytes, size);
+	}
+}
+
+static void crypto_aegis128_decrypt_chunk(struct aegis_state *state, u8 *dst,
+					  const u8 *src, unsigned int size)
+{
+	union aegis_block tmp;
+
+	if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
+		while (size >= AEGIS_BLOCK_SIZE) {
+			union aegis_block *dst_blk =
+					(union aegis_block *)dst;
+			const union aegis_block *src_blk =
+					(const union aegis_block *)src;
+
+			tmp = state->blocks[2];
+			crypto_aegis_block_and(&tmp, &state->blocks[3]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[4]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[1]);
+			crypto_aegis_block_xor(&tmp, src_blk);
+
+			crypto_aegis128_update_a(state, &tmp);
+
+			*dst_blk = tmp;
+
+			size -= AEGIS_BLOCK_SIZE;
+			src += AEGIS_BLOCK_SIZE;
+			dst += AEGIS_BLOCK_SIZE;
+		}
+	} else {
+		while (size >= AEGIS_BLOCK_SIZE) {
+			tmp = state->blocks[2];
+			crypto_aegis_block_and(&tmp, &state->blocks[3]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[4]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[1]);
+			crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
+
+			crypto_aegis128_update_a(state, &tmp);
+
+			memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
+
+			size -= AEGIS_BLOCK_SIZE;
+			src += AEGIS_BLOCK_SIZE;
+			dst += AEGIS_BLOCK_SIZE;
+		}
+	}
+
+	if (size > 0) {
+		union aegis_block msg = {};
+		memcpy(msg.bytes, src, size);
+
+		tmp = state->blocks[2];
+		crypto_aegis_block_and(&tmp, &state->blocks[3]);
+		crypto_aegis_block_xor(&tmp, &state->blocks[4]);
+		crypto_aegis_block_xor(&tmp, &state->blocks[1]);
+		crypto_aegis_block_xor(&msg, &tmp);
+
+		memset(msg.bytes + size, 0, AEGIS_BLOCK_SIZE - size);
+
+		crypto_aegis128_update_a(state, &msg);
+
+		memcpy(dst, msg.bytes, size);
+	}
+}
+
+static void crypto_aegis128_process_ad(struct aegis_state *state,
+				       struct scatterlist *sg_src,
+				       unsigned int assoclen)
+{
+	struct scatter_walk walk;
+	union aegis_block buf;
+	unsigned int pos = 0;
+
+	scatterwalk_start(&walk, sg_src);
+	while (assoclen != 0) {
+		unsigned int size = scatterwalk_clamp(&walk, assoclen);
+		unsigned int left = size;
+		void *mapped = scatterwalk_map(&walk);
+		const u8 *src = (const u8 *)mapped;
+
+		if (pos + size >= AEGIS_BLOCK_SIZE) {
+			if (pos > 0) {
+				unsigned int fill = AEGIS_BLOCK_SIZE - pos;
+				memcpy(buf.bytes + pos, src, fill);
+				crypto_aegis128_update_a(state, &buf);
+				pos = 0;
+				left -= fill;
+				src += fill;
+			}
+
+			crypto_aegis128_ad(state, src, left);
+			src += left & ~(AEGIS_BLOCK_SIZE - 1);
+			left &= AEGIS_BLOCK_SIZE - 1;
+		}
+
+		memcpy(buf.bytes + pos, src, left);
+
+		pos += left;
+		assoclen -= size;
+		scatterwalk_unmap(mapped);
+		scatterwalk_advance(&walk, size);
+		scatterwalk_done(&walk, 0, assoclen);
+	}
+
+	if (pos > 0) {
+		memset(buf.bytes + pos, 0, AEGIS_BLOCK_SIZE - pos);
+		crypto_aegis128_update_a(state, &buf);
+	}
+}
+
+static void crypto_aegis128_process_crypt(struct aegis_state *state,
+					  struct aead_request *req,
+					  const struct aegis128_ops *ops)
+{
+	struct skcipher_walk walk;
+	u8 *src, *dst;
+	unsigned int chunksize;
+
+	ops->skcipher_walk_init(&walk, req, false);
+
+	while (walk.nbytes) {
+		src = walk.src.virt.addr;
+		dst = walk.dst.virt.addr;
+		chunksize = walk.nbytes;
+
+		ops->crypt_chunk(state, dst, src, chunksize);
+
+		skcipher_walk_done(&walk, 0);
+	}
+}
+
+static void crypto_aegis128_final(struct aegis_state *state,
+				  union aegis_block *tag_xor,
+				  u64 assoclen, u64 cryptlen)
+{
+	u64 assocbits = assoclen * 8;
+	u64 cryptbits = cryptlen * 8;
+
+	union aegis_block tmp;
+	unsigned int i;
+
+	tmp.words64[0] = cpu_to_le64(assocbits);
+	tmp.words64[1] = cpu_to_le64(cryptbits);
+
+	crypto_aegis_block_xor(&tmp, &state->blocks[3]);
+
+	for (i = 0; i < 7; i++)
+		crypto_aegis128_update_a(state, &tmp);
+
+	for (i = 0; i < AEGIS128_STATE_BLOCKS; i++)
+		crypto_aegis_block_xor(tag_xor, &state->blocks[i]);
+}
+
+static int crypto_aegis128_setkey(struct crypto_aead *aead, const u8 *key,
+				  unsigned int keylen)
+{
+	struct aegis_ctx *ctx = crypto_aead_ctx(aead);
+
+	if (keylen != AEGIS128_KEY_SIZE) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE);
+	return 0;
+}
+
+static int crypto_aegis128_setauthsize(struct crypto_aead *tfm,
+				       unsigned int authsize)
+{
+	if (authsize > AEGIS128_MAX_AUTH_SIZE)
+		return -EINVAL;
+	if (authsize < AEGIS128_MIN_AUTH_SIZE)
+		return -EINVAL;
+	return 0;
+}
+
+static void crypto_aegis128_crypt(struct aead_request *req,
+				  union aegis_block *tag_xor,
+				  unsigned int cryptlen,
+				  const struct aegis128_ops *ops)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aegis_state state;
+
+	crypto_aegis128_init(&state, &ctx->key, req->iv);
+	crypto_aegis128_process_ad(&state, req->src, req->assoclen);
+	crypto_aegis128_process_crypt(&state, req, ops);
+	crypto_aegis128_final(&state, tag_xor, req->assoclen, cryptlen);
+}
+
+static int crypto_aegis128_encrypt(struct aead_request *req)
+{
+	static const struct aegis128_ops ops = {
+		.skcipher_walk_init = skcipher_walk_aead_encrypt,
+		.crypt_chunk = crypto_aegis128_encrypt_chunk,
+	};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	union aegis_block tag = {};
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen;
+
+	crypto_aegis128_crypt(req, &tag, cryptlen, &ops);
+
+	scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
+				 authsize, 1);
+	return 0;
+}
+
+static int crypto_aegis128_decrypt(struct aead_request *req)
+{
+	static const struct aegis128_ops ops = {
+		.skcipher_walk_init = skcipher_walk_aead_decrypt,
+		.crypt_chunk = crypto_aegis128_decrypt_chunk,
+	};
+	static const u8 zeros[AEGIS128_MAX_AUTH_SIZE] = {};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	union aegis_block tag;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen - authsize;
+
+	scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
+				 authsize, 0);
+
+	crypto_aegis128_crypt(req, &tag, cryptlen, &ops);
+
+	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
+}
+
+static int crypto_aegis128_init_tfm(struct crypto_aead *tfm)
+{
+	return 0;
+}
+
+static void crypto_aegis128_exit_tfm(struct crypto_aead *tfm)
+{
+}
+
+static struct aead_alg crypto_aegis128_alg = {
+	.setkey = crypto_aegis128_setkey,
+	.setauthsize = crypto_aegis128_setauthsize,
+	.encrypt = crypto_aegis128_encrypt,
+	.decrypt = crypto_aegis128_decrypt,
+	.init = crypto_aegis128_init_tfm,
+	.exit = crypto_aegis128_exit_tfm,
+
+	.ivsize = AEGIS128_NONCE_SIZE,
+	.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
+	.chunksize = AEGIS_BLOCK_SIZE,
+
+	.base = {
+		.cra_flags = CRYPTO_ALG_TYPE_AEAD,
+		.cra_blocksize = 1,
+		.cra_ctxsize = sizeof(struct aegis_ctx),
+		.cra_alignmask = 0,
+
+		.cra_priority = 100,
+
+		.cra_name = "aegis128",
+		.cra_driver_name = "aegis128-generic",
+
+		.cra_module = THIS_MODULE,
+	}
+};
+
+static int __init crypto_aegis128_module_init(void)
+{
+	return crypto_register_aead(&crypto_aegis128_alg);
+}
+
+static void __exit crypto_aegis128_module_exit(void)
+{
+	crypto_unregister_aead(&crypto_aegis128_alg);
+}
+
+module_init(crypto_aegis128_module_init);
+module_exit(crypto_aegis128_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm");
+MODULE_ALIAS_CRYPTO("aegis128");
+MODULE_ALIAS_CRYPTO("aegis128-generic");
diff --git a/crypto/aegis128l.c b/crypto/aegis128l.c
new file mode 100644
index 0000000..0cc1a75
--- /dev/null
+++ b/crypto/aegis128l.c
@@ -0,0 +1,527 @@
+/*
+ * The AEGIS-128L Authenticated-Encryption Algorithm
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+
+#include "aegis.h"
+
+#define AEGIS128L_CHUNK_BLOCKS 2
+#define AEGIS128L_CHUNK_SIZE (AEGIS128L_CHUNK_BLOCKS * AEGIS_BLOCK_SIZE)
+#define AEGIS128L_NONCE_SIZE 16
+#define AEGIS128L_STATE_BLOCKS 8
+#define AEGIS128L_KEY_SIZE 16
+#define AEGIS128L_MIN_AUTH_SIZE 8
+#define AEGIS128L_MAX_AUTH_SIZE 16
+
+union aegis_chunk {
+	union aegis_block blocks[AEGIS128L_CHUNK_BLOCKS];
+	u8 bytes[AEGIS128L_CHUNK_SIZE];
+};
+
+struct aegis_state {
+	union aegis_block blocks[AEGIS128L_STATE_BLOCKS];
+};
+
+struct aegis_ctx {
+	union aegis_block key;
+};
+
+struct aegis128l_ops {
+	int (*skcipher_walk_init)(struct skcipher_walk *walk,
+				  struct aead_request *req, bool atomic);
+
+	void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
+			    const u8 *src, unsigned int size);
+};
+
+static void crypto_aegis128l_update(struct aegis_state *state)
+{
+	union aegis_block tmp;
+	unsigned int i;
+
+	tmp = state->blocks[AEGIS128L_STATE_BLOCKS - 1];
+	for (i = AEGIS128L_STATE_BLOCKS - 1; i > 0; i--)
+		crypto_aegis_aesenc(&state->blocks[i], &state->blocks[i - 1],
+				    &state->blocks[i]);
+	crypto_aegis_aesenc(&state->blocks[0], &tmp, &state->blocks[0]);
+}
+
+static void crypto_aegis128l_update_a(struct aegis_state *state,
+				      const union aegis_chunk *msg)
+{
+	crypto_aegis128l_update(state);
+	crypto_aegis_block_xor(&state->blocks[0], &msg->blocks[0]);
+	crypto_aegis_block_xor(&state->blocks[4], &msg->blocks[1]);
+}
+
+static void crypto_aegis128l_update_u(struct aegis_state *state,
+				      const void *msg)
+{
+	crypto_aegis128l_update(state);
+	crypto_xor(state->blocks[0].bytes, msg + 0 * AEGIS_BLOCK_SIZE,
+			AEGIS_BLOCK_SIZE);
+	crypto_xor(state->blocks[4].bytes, msg + 1 * AEGIS_BLOCK_SIZE,
+			AEGIS_BLOCK_SIZE);
+}
+
+static void crypto_aegis128l_init(struct aegis_state *state,
+				  const union aegis_block *key,
+				  const u8 *iv)
+{
+	union aegis_block key_iv;
+	union aegis_chunk chunk;
+	unsigned int i;
+
+	memcpy(chunk.blocks[0].bytes, iv, AEGIS_BLOCK_SIZE);
+	chunk.blocks[1] = *key;
+
+	key_iv = *key;
+	crypto_aegis_block_xor(&key_iv, &chunk.blocks[0]);
+
+	state->blocks[0] = key_iv;
+	state->blocks[1] = crypto_aegis_const[1];
+	state->blocks[2] = crypto_aegis_const[0];
+	state->blocks[3] = crypto_aegis_const[1];
+	state->blocks[4] = key_iv;
+	state->blocks[5] = *key;
+	state->blocks[6] = *key;
+	state->blocks[7] = *key;
+
+	crypto_aegis_block_xor(&state->blocks[5], &crypto_aegis_const[0]);
+	crypto_aegis_block_xor(&state->blocks[6], &crypto_aegis_const[1]);
+	crypto_aegis_block_xor(&state->blocks[7], &crypto_aegis_const[0]);
+
+	for (i = 0; i < 10; i++) {
+		crypto_aegis128l_update_a(state, &chunk);
+	}
+}
+
+static void crypto_aegis128l_ad(struct aegis_state *state,
+				const u8 *src, unsigned int size)
+{
+	if (AEGIS_ALIGNED(src)) {
+		const union aegis_chunk *src_chunk =
+				(const union aegis_chunk *)src;
+
+		while (size >= AEGIS128L_CHUNK_SIZE) {
+                    crypto_aegis128l_update_a(state, src_chunk);
+
+			size -= AEGIS128L_CHUNK_SIZE;
+			src_chunk += 1;
+		}
+	} else {
+		while (size >= AEGIS128L_CHUNK_SIZE) {
+			crypto_aegis128l_update_u(state, src);
+
+			size -= AEGIS128L_CHUNK_SIZE;
+			src += AEGIS128L_CHUNK_SIZE;
+		}
+	}
+}
+
+static void crypto_aegis128l_encrypt_chunk(struct aegis_state *state, u8 *dst,
+					   const u8 *src, unsigned int size)
+{
+	union aegis_chunk tmp;
+	union aegis_block *tmp0 = &tmp.blocks[0];
+	union aegis_block *tmp1 = &tmp.blocks[1];
+
+	if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
+		while (size >= AEGIS128L_CHUNK_SIZE) {
+			union aegis_chunk *dst_blk =
+					(union aegis_chunk *)dst;
+			const union aegis_chunk *src_blk =
+					(const union aegis_chunk *)src;
+
+			*tmp0 = state->blocks[2];
+			crypto_aegis_block_and(tmp0, &state->blocks[3]);
+			crypto_aegis_block_xor(tmp0, &state->blocks[6]);
+			crypto_aegis_block_xor(tmp0, &state->blocks[1]);
+			crypto_aegis_block_xor(tmp0, &src_blk->blocks[0]);
+
+			*tmp1 = state->blocks[6];
+			crypto_aegis_block_and(tmp1, &state->blocks[7]);
+			crypto_aegis_block_xor(tmp1, &state->blocks[5]);
+			crypto_aegis_block_xor(tmp1, &state->blocks[2]);
+			crypto_aegis_block_xor(tmp1, &src_blk->blocks[1]);
+
+			crypto_aegis128l_update_a(state, src_blk);
+
+			*dst_blk = tmp;
+
+			size -= AEGIS128L_CHUNK_SIZE;
+			src += AEGIS128L_CHUNK_SIZE;
+			dst += AEGIS128L_CHUNK_SIZE;
+		}
+	} else {
+		while (size >= AEGIS128L_CHUNK_SIZE) {
+			*tmp0 = state->blocks[2];
+			crypto_aegis_block_and(tmp0, &state->blocks[3]);
+			crypto_aegis_block_xor(tmp0, &state->blocks[6]);
+			crypto_aegis_block_xor(tmp0, &state->blocks[1]);
+			crypto_xor(tmp0->bytes, src + 0 * AEGIS_BLOCK_SIZE,
+				   AEGIS_BLOCK_SIZE);
+
+			*tmp1 = state->blocks[6];
+			crypto_aegis_block_and(tmp1, &state->blocks[7]);
+			crypto_aegis_block_xor(tmp1, &state->blocks[5]);
+			crypto_aegis_block_xor(tmp1, &state->blocks[2]);
+			crypto_xor(tmp1->bytes, src + 1 * AEGIS_BLOCK_SIZE,
+				   AEGIS_BLOCK_SIZE);
+
+			crypto_aegis128l_update_u(state, src);
+
+			memcpy(dst, tmp.bytes, AEGIS128L_CHUNK_SIZE);
+
+			size -= AEGIS128L_CHUNK_SIZE;
+			src += AEGIS128L_CHUNK_SIZE;
+			dst += AEGIS128L_CHUNK_SIZE;
+		}
+	}
+
+	if (size > 0) {
+		union aegis_chunk msg = {};
+		memcpy(msg.bytes, src, size);
+
+		*tmp0 = state->blocks[2];
+		crypto_aegis_block_and(tmp0, &state->blocks[3]);
+		crypto_aegis_block_xor(tmp0, &state->blocks[6]);
+		crypto_aegis_block_xor(tmp0, &state->blocks[1]);
+
+		*tmp1 = state->blocks[6];
+		crypto_aegis_block_and(tmp1, &state->blocks[7]);
+		crypto_aegis_block_xor(tmp1, &state->blocks[5]);
+		crypto_aegis_block_xor(tmp1, &state->blocks[2]);
+
+		crypto_aegis128l_update_a(state, &msg);
+
+		crypto_aegis_block_xor(&msg.blocks[0], tmp0);
+		crypto_aegis_block_xor(&msg.blocks[1], tmp1);
+
+		memcpy(dst, msg.bytes, size);
+	}
+}
+
+static void crypto_aegis128l_decrypt_chunk(struct aegis_state *state, u8 *dst,
+					   const u8 *src, unsigned int size)
+{
+	union aegis_chunk tmp;
+	union aegis_block *tmp0 = &tmp.blocks[0];
+	union aegis_block *tmp1 = &tmp.blocks[1];
+
+	if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
+		while (size >= AEGIS128L_CHUNK_SIZE) {
+			union aegis_chunk *dst_blk =
+					(union aegis_chunk *)dst;
+			const union aegis_chunk *src_blk =
+					(const union aegis_chunk *)src;
+
+			*tmp0 = state->blocks[2];
+			crypto_aegis_block_and(tmp0, &state->blocks[3]);
+			crypto_aegis_block_xor(tmp0, &state->blocks[6]);
+			crypto_aegis_block_xor(tmp0, &state->blocks[1]);
+			crypto_aegis_block_xor(tmp0, &src_blk->blocks[0]);
+
+			*tmp1 = state->blocks[6];
+			crypto_aegis_block_and(tmp1, &state->blocks[7]);
+			crypto_aegis_block_xor(tmp1, &state->blocks[5]);
+			crypto_aegis_block_xor(tmp1, &state->blocks[2]);
+			crypto_aegis_block_xor(tmp1, &src_blk->blocks[1]);
+
+			crypto_aegis128l_update_a(state, &tmp);
+
+			*dst_blk = tmp;
+
+			size -= AEGIS128L_CHUNK_SIZE;
+			src += AEGIS128L_CHUNK_SIZE;
+			dst += AEGIS128L_CHUNK_SIZE;
+		}
+	} else {
+		while (size >= AEGIS128L_CHUNK_SIZE) {
+			*tmp0 = state->blocks[2];
+			crypto_aegis_block_and(tmp0, &state->blocks[3]);
+			crypto_aegis_block_xor(tmp0, &state->blocks[6]);
+			crypto_aegis_block_xor(tmp0, &state->blocks[1]);
+			crypto_xor(tmp0->bytes, src + 0 * AEGIS_BLOCK_SIZE,
+				   AEGIS_BLOCK_SIZE);
+
+			*tmp1 = state->blocks[6];
+			crypto_aegis_block_and(tmp1, &state->blocks[7]);
+			crypto_aegis_block_xor(tmp1, &state->blocks[5]);
+			crypto_aegis_block_xor(tmp1, &state->blocks[2]);
+			crypto_xor(tmp1->bytes, src + 1 * AEGIS_BLOCK_SIZE,
+				   AEGIS_BLOCK_SIZE);
+
+			crypto_aegis128l_update_a(state, &tmp);
+
+			memcpy(dst, tmp.bytes, AEGIS128L_CHUNK_SIZE);
+
+			size -= AEGIS128L_CHUNK_SIZE;
+			src += AEGIS128L_CHUNK_SIZE;
+			dst += AEGIS128L_CHUNK_SIZE;
+		}
+	}
+
+	if (size > 0) {
+		union aegis_chunk msg = {};
+		memcpy(msg.bytes, src, size);
+
+		*tmp0 = state->blocks[2];
+		crypto_aegis_block_and(tmp0, &state->blocks[3]);
+		crypto_aegis_block_xor(tmp0, &state->blocks[6]);
+		crypto_aegis_block_xor(tmp0, &state->blocks[1]);
+		crypto_aegis_block_xor(&msg.blocks[0], tmp0);
+
+		*tmp1 = state->blocks[6];
+		crypto_aegis_block_and(tmp1, &state->blocks[7]);
+		crypto_aegis_block_xor(tmp1, &state->blocks[5]);
+		crypto_aegis_block_xor(tmp1, &state->blocks[2]);
+		crypto_aegis_block_xor(&msg.blocks[1], tmp1);
+
+		memset(msg.bytes + size, 0, AEGIS128L_CHUNK_SIZE - size);
+
+		crypto_aegis128l_update_a(state, &msg);
+
+		memcpy(dst, msg.bytes, size);
+	}
+}
+
+static void crypto_aegis128l_process_ad(struct aegis_state *state,
+					struct scatterlist *sg_src,
+					unsigned int assoclen)
+{
+	struct scatter_walk walk;
+	union aegis_chunk buf;
+	unsigned int pos = 0;
+
+	scatterwalk_start(&walk, sg_src);
+	while (assoclen != 0) {
+		unsigned int size = scatterwalk_clamp(&walk, assoclen);
+		unsigned int left = size;
+		void *mapped = scatterwalk_map(&walk);
+		const u8 *src = (const u8 *)mapped;
+
+		if (pos + size >= AEGIS128L_CHUNK_SIZE) {
+			if (pos > 0) {
+				unsigned int fill = AEGIS128L_CHUNK_SIZE - pos;
+				memcpy(buf.bytes + pos, src, fill);
+				crypto_aegis128l_update_a(state, &buf);
+				pos = 0;
+				left -= fill;
+				src += fill;
+			}
+
+			crypto_aegis128l_ad(state, src, left);
+			src += left & ~(AEGIS128L_CHUNK_SIZE - 1);
+			left &= AEGIS128L_CHUNK_SIZE - 1;
+		}
+
+		memcpy(buf.bytes + pos, src, left);
+
+		pos += left;
+		assoclen -= size;
+		scatterwalk_unmap(mapped);
+		scatterwalk_advance(&walk, size);
+		scatterwalk_done(&walk, 0, assoclen);
+	}
+
+	if (pos > 0) {
+		memset(buf.bytes + pos, 0, AEGIS128L_CHUNK_SIZE - pos);
+		crypto_aegis128l_update_a(state, &buf);
+	}
+}
+
+static void crypto_aegis128l_process_crypt(struct aegis_state *state,
+					   struct aead_request *req,
+					   const struct aegis128l_ops *ops)
+{
+	struct skcipher_walk walk;
+	u8 *src, *dst;
+	unsigned int chunksize;
+
+	ops->skcipher_walk_init(&walk, req, false);
+
+	while (walk.nbytes) {
+		src = walk.src.virt.addr;
+		dst = walk.dst.virt.addr;
+		chunksize = walk.nbytes;
+
+		ops->crypt_chunk(state, dst, src, chunksize);
+
+		skcipher_walk_done(&walk, 0);
+	}
+}
+
+static void crypto_aegis128l_final(struct aegis_state *state,
+				   union aegis_block *tag_xor,
+				   u64 assoclen, u64 cryptlen)
+{
+	u64 assocbits = assoclen * 8;
+	u64 cryptbits = cryptlen * 8;
+
+	union aegis_chunk tmp;
+	unsigned int i;
+
+	tmp.blocks[0].words64[0] = cpu_to_le64(assocbits);
+	tmp.blocks[0].words64[1] = cpu_to_le64(cryptbits);
+
+	crypto_aegis_block_xor(&tmp.blocks[0], &state->blocks[2]);
+
+	tmp.blocks[1] = tmp.blocks[0];
+	for (i = 0; i < 7; i++)
+		crypto_aegis128l_update_a(state, &tmp);
+
+	for (i = 0; i < 7; i++)
+		crypto_aegis_block_xor(tag_xor, &state->blocks[i]);
+}
+
+static int crypto_aegis128l_setkey(struct crypto_aead *aead, const u8 *key,
+				   unsigned int keylen)
+{
+	struct aegis_ctx *ctx = crypto_aead_ctx(aead);
+
+	if (keylen != AEGIS128L_KEY_SIZE) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE);
+	return 0;
+}
+
+static int crypto_aegis128l_setauthsize(struct crypto_aead *tfm,
+					unsigned int authsize)
+{
+	if (authsize > AEGIS128L_MAX_AUTH_SIZE)
+		return -EINVAL;
+	if (authsize < AEGIS128L_MIN_AUTH_SIZE)
+		return -EINVAL;
+	return 0;
+}
+
+static void crypto_aegis128l_crypt(struct aead_request *req,
+				   union aegis_block *tag_xor,
+				   unsigned int cryptlen,
+				   const struct aegis128l_ops *ops)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aegis_state state;
+
+	crypto_aegis128l_init(&state, &ctx->key, req->iv);
+	crypto_aegis128l_process_ad(&state, req->src, req->assoclen);
+	crypto_aegis128l_process_crypt(&state, req, ops);
+	crypto_aegis128l_final(&state, tag_xor, req->assoclen, cryptlen);
+}
+
+static int crypto_aegis128l_encrypt(struct aead_request *req)
+{
+	static const struct aegis128l_ops ops = {
+		.skcipher_walk_init = skcipher_walk_aead_encrypt,
+		.crypt_chunk = crypto_aegis128l_encrypt_chunk,
+	};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	union aegis_block tag = {};
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen;
+
+	crypto_aegis128l_crypt(req, &tag, cryptlen, &ops);
+
+	scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
+				 authsize, 1);
+	return 0;
+}
+
+static int crypto_aegis128l_decrypt(struct aead_request *req)
+{
+	static const struct aegis128l_ops ops = {
+		.skcipher_walk_init = skcipher_walk_aead_decrypt,
+		.crypt_chunk = crypto_aegis128l_decrypt_chunk,
+	};
+	static const u8 zeros[AEGIS128L_MAX_AUTH_SIZE] = {};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	union aegis_block tag;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen - authsize;
+
+	scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
+				 authsize, 0);
+
+	crypto_aegis128l_crypt(req, &tag, cryptlen, &ops);
+
+	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
+}
+
+static int crypto_aegis128l_init_tfm(struct crypto_aead *tfm)
+{
+	return 0;
+}
+
+static void crypto_aegis128l_exit_tfm(struct crypto_aead *tfm)
+{
+}
+
+static struct aead_alg crypto_aegis128l_alg = {
+	.setkey = crypto_aegis128l_setkey,
+	.setauthsize = crypto_aegis128l_setauthsize,
+	.encrypt = crypto_aegis128l_encrypt,
+	.decrypt = crypto_aegis128l_decrypt,
+	.init = crypto_aegis128l_init_tfm,
+	.exit = crypto_aegis128l_exit_tfm,
+
+	.ivsize = AEGIS128L_NONCE_SIZE,
+	.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
+	.chunksize = AEGIS128L_CHUNK_SIZE,
+
+	.base = {
+		.cra_flags = CRYPTO_ALG_TYPE_AEAD,
+		.cra_blocksize = 1,
+		.cra_ctxsize = sizeof(struct aegis_ctx),
+		.cra_alignmask = 0,
+
+		.cra_priority = 100,
+
+		.cra_name = "aegis128l",
+		.cra_driver_name = "aegis128l-generic",
+
+		.cra_module = THIS_MODULE,
+	}
+};
+
+static int __init crypto_aegis128l_module_init(void)
+{
+	return crypto_register_aead(&crypto_aegis128l_alg);
+}
+
+static void __exit crypto_aegis128l_module_exit(void)
+{
+	crypto_unregister_aead(&crypto_aegis128l_alg);
+}
+
+module_init(crypto_aegis128l_module_init);
+module_exit(crypto_aegis128l_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm");
+MODULE_ALIAS_CRYPTO("aegis128l");
+MODULE_ALIAS_CRYPTO("aegis128l-generic");
diff --git a/crypto/aegis256.c b/crypto/aegis256.c
new file mode 100644
index 0000000..a489d74
--- /dev/null
+++ b/crypto/aegis256.c
@@ -0,0 +1,478 @@
+/*
+ * The AEGIS-256 Authenticated-Encryption Algorithm
+ *
+ * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+
+#include "aegis.h"
+
+#define AEGIS256_NONCE_SIZE 32
+#define AEGIS256_STATE_BLOCKS 6
+#define AEGIS256_KEY_SIZE 32
+#define AEGIS256_MIN_AUTH_SIZE 8
+#define AEGIS256_MAX_AUTH_SIZE 16
+
+struct aegis_state {
+	union aegis_block blocks[AEGIS256_STATE_BLOCKS];
+};
+
+struct aegis_ctx {
+	union aegis_block key[AEGIS256_KEY_SIZE / AEGIS_BLOCK_SIZE];
+};
+
+struct aegis256_ops {
+	int (*skcipher_walk_init)(struct skcipher_walk *walk,
+				  struct aead_request *req, bool atomic);
+
+	void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
+			    const u8 *src, unsigned int size);
+};
+
+static void crypto_aegis256_update(struct aegis_state *state)
+{
+	union aegis_block tmp;
+	unsigned int i;
+
+	tmp = state->blocks[AEGIS256_STATE_BLOCKS - 1];
+	for (i = AEGIS256_STATE_BLOCKS - 1; i > 0; i--)
+		crypto_aegis_aesenc(&state->blocks[i], &state->blocks[i - 1],
+				    &state->blocks[i]);
+	crypto_aegis_aesenc(&state->blocks[0], &tmp, &state->blocks[0]);
+}
+
+static void crypto_aegis256_update_a(struct aegis_state *state,
+				     const union aegis_block *msg)
+{
+	crypto_aegis256_update(state);
+	crypto_aegis_block_xor(&state->blocks[0], msg);
+}
+
+static void crypto_aegis256_update_u(struct aegis_state *state, const void *msg)
+{
+	crypto_aegis256_update(state);
+	crypto_xor(state->blocks[0].bytes, msg, AEGIS_BLOCK_SIZE);
+}
+
+static void crypto_aegis256_init(struct aegis_state *state,
+				 const union aegis_block *key,
+				 const u8 *iv)
+{
+	union aegis_block key_iv[2];
+	unsigned int i;
+
+	key_iv[0] = key[0];
+	key_iv[1] = key[1];
+	crypto_xor(key_iv[0].bytes, iv + 0 * AEGIS_BLOCK_SIZE,
+			AEGIS_BLOCK_SIZE);
+	crypto_xor(key_iv[1].bytes, iv + 1 * AEGIS_BLOCK_SIZE,
+			AEGIS_BLOCK_SIZE);
+
+	state->blocks[0] = key_iv[0];
+	state->blocks[1] = key_iv[1];
+	state->blocks[2] = crypto_aegis_const[1];
+	state->blocks[3] = crypto_aegis_const[0];
+	state->blocks[4] = key[0];
+	state->blocks[5] = key[1];
+
+	crypto_aegis_block_xor(&state->blocks[4], &crypto_aegis_const[0]);
+	crypto_aegis_block_xor(&state->blocks[5], &crypto_aegis_const[1]);
+
+	for (i = 0; i < 4; i++) {
+		crypto_aegis256_update_a(state, &key[0]);
+		crypto_aegis256_update_a(state, &key[1]);
+		crypto_aegis256_update_a(state, &key_iv[0]);
+		crypto_aegis256_update_a(state, &key_iv[1]);
+	}
+}
+
+static void crypto_aegis256_ad(struct aegis_state *state,
+			       const u8 *src, unsigned int size)
+{
+	if (AEGIS_ALIGNED(src)) {
+		const union aegis_block *src_blk =
+				(const union aegis_block *)src;
+
+		while (size >= AEGIS_BLOCK_SIZE) {
+			crypto_aegis256_update_a(state, src_blk);
+
+			size -= AEGIS_BLOCK_SIZE;
+			src_blk++;
+		}
+	} else {
+		while (size >= AEGIS_BLOCK_SIZE) {
+			crypto_aegis256_update_u(state, src);
+
+			size -= AEGIS_BLOCK_SIZE;
+			src += AEGIS_BLOCK_SIZE;
+		}
+	}
+}
+
+static void crypto_aegis256_encrypt_chunk(struct aegis_state *state, u8 *dst,
+					  const u8 *src, unsigned int size)
+{
+	union aegis_block tmp;
+
+	if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
+		while (size >= AEGIS_BLOCK_SIZE) {
+			union aegis_block *dst_blk =
+					(union aegis_block *)dst;
+			const union aegis_block *src_blk =
+					(const union aegis_block *)src;
+
+			tmp = state->blocks[2];
+			crypto_aegis_block_and(&tmp, &state->blocks[3]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[5]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[4]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[1]);
+			crypto_aegis_block_xor(&tmp, src_blk);
+
+			crypto_aegis256_update_a(state, src_blk);
+
+			*dst_blk = tmp;
+
+			size -= AEGIS_BLOCK_SIZE;
+			src += AEGIS_BLOCK_SIZE;
+			dst += AEGIS_BLOCK_SIZE;
+		}
+	} else {
+		while (size >= AEGIS_BLOCK_SIZE) {
+			tmp = state->blocks[2];
+			crypto_aegis_block_and(&tmp, &state->blocks[3]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[5]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[4]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[1]);
+			crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
+
+			crypto_aegis256_update_u(state, src);
+
+			memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
+
+			size -= AEGIS_BLOCK_SIZE;
+			src += AEGIS_BLOCK_SIZE;
+			dst += AEGIS_BLOCK_SIZE;
+		}
+	}
+
+	if (size > 0) {
+		union aegis_block msg = {};
+		memcpy(msg.bytes, src, size);
+
+		tmp = state->blocks[2];
+		crypto_aegis_block_and(&tmp, &state->blocks[3]);
+		crypto_aegis_block_xor(&tmp, &state->blocks[5]);
+		crypto_aegis_block_xor(&tmp, &state->blocks[4]);
+		crypto_aegis_block_xor(&tmp, &state->blocks[1]);
+
+		crypto_aegis256_update_a(state, &msg);
+
+		crypto_aegis_block_xor(&msg, &tmp);
+
+		memcpy(dst, msg.bytes, size);
+	}
+}
+
+static void crypto_aegis256_decrypt_chunk(struct aegis_state *state, u8 *dst,
+					  const u8 *src, unsigned int size)
+{
+	union aegis_block tmp;
+
+	if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
+		while (size >= AEGIS_BLOCK_SIZE) {
+			union aegis_block *dst_blk =
+					(union aegis_block *)dst;
+			const union aegis_block *src_blk =
+					(const union aegis_block *)src;
+
+			tmp = state->blocks[2];
+			crypto_aegis_block_and(&tmp, &state->blocks[3]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[5]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[4]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[1]);
+			crypto_aegis_block_xor(&tmp, src_blk);
+
+			crypto_aegis256_update_a(state, &tmp);
+
+			*dst_blk = tmp;
+
+			size -= AEGIS_BLOCK_SIZE;
+			src += AEGIS_BLOCK_SIZE;
+			dst += AEGIS_BLOCK_SIZE;
+		}
+	} else {
+		while (size >= AEGIS_BLOCK_SIZE) {
+			tmp = state->blocks[2];
+			crypto_aegis_block_and(&tmp, &state->blocks[3]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[5]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[4]);
+			crypto_aegis_block_xor(&tmp, &state->blocks[1]);
+			crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
+
+			crypto_aegis256_update_a(state, &tmp);
+
+			memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
+
+			size -= AEGIS_BLOCK_SIZE;
+			src += AEGIS_BLOCK_SIZE;
+			dst += AEGIS_BLOCK_SIZE;
+		}
+	}
+
+	if (size > 0) {
+		union aegis_block msg = {};
+		memcpy(msg.bytes, src, size);
+
+		tmp = state->blocks[2];
+		crypto_aegis_block_and(&tmp, &state->blocks[3]);
+		crypto_aegis_block_xor(&tmp, &state->blocks[5]);
+		crypto_aegis_block_xor(&tmp, &state->blocks[4]);
+		crypto_aegis_block_xor(&tmp, &state->blocks[1]);
+		crypto_aegis_block_xor(&msg, &tmp);
+
+		memset(msg.bytes + size, 0, AEGIS_BLOCK_SIZE - size);
+
+		crypto_aegis256_update_a(state, &msg);
+
+		memcpy(dst, msg.bytes, size);
+	}
+}
+
+static void crypto_aegis256_process_ad(struct aegis_state *state,
+				       struct scatterlist *sg_src,
+				       unsigned int assoclen)
+{
+	struct scatter_walk walk;
+	union aegis_block buf;
+	unsigned int pos = 0;
+
+	scatterwalk_start(&walk, sg_src);
+	while (assoclen != 0) {
+		unsigned int size = scatterwalk_clamp(&walk, assoclen);
+		unsigned int left = size;
+		void *mapped = scatterwalk_map(&walk);
+		const u8 *src = (const u8 *)mapped;
+
+		if (pos + size >= AEGIS_BLOCK_SIZE) {
+			if (pos > 0) {
+				unsigned int fill = AEGIS_BLOCK_SIZE - pos;
+				memcpy(buf.bytes + pos, src, fill);
+				crypto_aegis256_update_a(state, &buf);
+				pos = 0;
+				left -= fill;
+				src += fill;
+			}
+
+			crypto_aegis256_ad(state, src, left);
+			src += left & ~(AEGIS_BLOCK_SIZE - 1);
+			left &= AEGIS_BLOCK_SIZE - 1;
+		}
+
+		memcpy(buf.bytes + pos, src, left);
+
+		pos += left;
+		assoclen -= size;
+		scatterwalk_unmap(mapped);
+		scatterwalk_advance(&walk, size);
+		scatterwalk_done(&walk, 0, assoclen);
+	}
+
+	if (pos > 0) {
+		memset(buf.bytes + pos, 0, AEGIS_BLOCK_SIZE - pos);
+		crypto_aegis256_update_a(state, &buf);
+	}
+}
+
+static void crypto_aegis256_process_crypt(struct aegis_state *state,
+					  struct aead_request *req,
+					  const struct aegis256_ops *ops)
+{
+	struct skcipher_walk walk;
+	u8 *src, *dst;
+	unsigned int chunksize;
+
+	ops->skcipher_walk_init(&walk, req, false);
+
+	while (walk.nbytes) {
+		src = walk.src.virt.addr;
+		dst = walk.dst.virt.addr;
+		chunksize = walk.nbytes;
+
+		ops->crypt_chunk(state, dst, src, chunksize);
+
+		skcipher_walk_done(&walk, 0);
+	}
+}
+
+static void crypto_aegis256_final(struct aegis_state *state,
+				  union aegis_block *tag_xor,
+				  u64 assoclen, u64 cryptlen)
+{
+	u64 assocbits = assoclen * 8;
+	u64 cryptbits = cryptlen * 8;
+
+	union aegis_block tmp;
+	unsigned int i;
+
+	tmp.words64[0] = cpu_to_le64(assocbits);
+	tmp.words64[1] = cpu_to_le64(cryptbits);
+
+	crypto_aegis_block_xor(&tmp, &state->blocks[3]);
+
+	for (i = 0; i < 7; i++)
+		crypto_aegis256_update_a(state, &tmp);
+
+	for (i = 0; i < AEGIS256_STATE_BLOCKS; i++)
+		crypto_aegis_block_xor(tag_xor, &state->blocks[i]);
+}
+
+static int crypto_aegis256_setkey(struct crypto_aead *aead, const u8 *key,
+				  unsigned int keylen)
+{
+	struct aegis_ctx *ctx = crypto_aead_ctx(aead);
+
+	if (keylen != AEGIS256_KEY_SIZE) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	memcpy(ctx->key[0].bytes, key, AEGIS_BLOCK_SIZE);
+	memcpy(ctx->key[1].bytes, key + AEGIS_BLOCK_SIZE,
+			AEGIS_BLOCK_SIZE);
+	return 0;
+}
+
+static int crypto_aegis256_setauthsize(struct crypto_aead *tfm,
+				       unsigned int authsize)
+{
+	if (authsize > AEGIS256_MAX_AUTH_SIZE)
+		return -EINVAL;
+	if (authsize < AEGIS256_MIN_AUTH_SIZE)
+		return -EINVAL;
+	return 0;
+}
+
+static void crypto_aegis256_crypt(struct aead_request *req,
+				  union aegis_block *tag_xor,
+				  unsigned int cryptlen,
+				  const struct aegis256_ops *ops)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
+	struct aegis_state state;
+
+	crypto_aegis256_init(&state, ctx->key, req->iv);
+	crypto_aegis256_process_ad(&state, req->src, req->assoclen);
+	crypto_aegis256_process_crypt(&state, req, ops);
+	crypto_aegis256_final(&state, tag_xor, req->assoclen, cryptlen);
+}
+
+static int crypto_aegis256_encrypt(struct aead_request *req)
+{
+	static const struct aegis256_ops ops = {
+		.skcipher_walk_init = skcipher_walk_aead_encrypt,
+		.crypt_chunk = crypto_aegis256_encrypt_chunk,
+	};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	union aegis_block tag = {};
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen;
+
+	crypto_aegis256_crypt(req, &tag, cryptlen, &ops);
+
+	scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
+				 authsize, 1);
+	return 0;
+}
+
+static int crypto_aegis256_decrypt(struct aead_request *req)
+{
+	static const struct aegis256_ops ops = {
+		.skcipher_walk_init = skcipher_walk_aead_decrypt,
+		.crypt_chunk = crypto_aegis256_decrypt_chunk,
+	};
+	static const u8 zeros[AEGIS256_MAX_AUTH_SIZE] = {};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	union aegis_block tag;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen - authsize;
+
+	scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
+				 authsize, 0);
+
+	crypto_aegis256_crypt(req, &tag, cryptlen, &ops);
+
+	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
+}
+
+static int crypto_aegis256_init_tfm(struct crypto_aead *tfm)
+{
+	return 0;
+}
+
+static void crypto_aegis256_exit_tfm(struct crypto_aead *tfm)
+{
+}
+
+static struct aead_alg crypto_aegis256_alg = {
+	.setkey = crypto_aegis256_setkey,
+	.setauthsize = crypto_aegis256_setauthsize,
+	.encrypt = crypto_aegis256_encrypt,
+	.decrypt = crypto_aegis256_decrypt,
+	.init = crypto_aegis256_init_tfm,
+	.exit = crypto_aegis256_exit_tfm,
+
+	.ivsize = AEGIS256_NONCE_SIZE,
+	.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
+	.chunksize = AEGIS_BLOCK_SIZE,
+
+	.base = {
+		.cra_flags = CRYPTO_ALG_TYPE_AEAD,
+		.cra_blocksize = 1,
+		.cra_ctxsize = sizeof(struct aegis_ctx),
+		.cra_alignmask = 0,
+
+		.cra_priority = 100,
+
+		.cra_name = "aegis256",
+		.cra_driver_name = "aegis256-generic",
+
+		.cra_module = THIS_MODULE,
+	}
+};
+
+static int __init crypto_aegis256_module_init(void)
+{
+	return crypto_register_aead(&crypto_aegis256_alg);
+}
+
+static void __exit crypto_aegis256_module_exit(void)
+{
+	crypto_unregister_aead(&crypto_aegis256_alg);
+}
+
+module_init(crypto_aegis256_module_init);
+module_exit(crypto_aegis256_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm");
+MODULE_ALIAS_CRYPTO("aegis256");
+MODULE_ALIAS_CRYPTO("aegis256-generic");
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 7846c0c..49fa858 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -347,7 +347,6 @@
 	.sendpage	=	sock_no_sendpage,
 	.sendmsg	=	sock_no_sendmsg,
 	.recvmsg	=	sock_no_recvmsg,
-	.poll		=	sock_no_poll,
 
 	.bind		=	alg_bind,
 	.release	=	af_alg_release,
@@ -501,8 +500,8 @@
 		sg = sgl->sg;
 
 	if (!sg || sgl->cur >= MAX_SGL_ENTS) {
-		sgl = sock_kmalloc(sk, sizeof(*sgl) +
-				       sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1),
+		sgl = sock_kmalloc(sk,
+				   struct_size(sgl, sg, (MAX_SGL_ENTS + 1)),
 				   GFP_KERNEL);
 		if (!sgl)
 			return -ENOMEM;
@@ -1061,19 +1060,12 @@
 }
 EXPORT_SYMBOL_GPL(af_alg_async_cb);
 
-/**
- * af_alg_poll - poll system call handler
- */
-__poll_t af_alg_poll(struct file *file, struct socket *sock,
-			 poll_table *wait)
+__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct af_alg_ctx *ctx = ask->private;
-	__poll_t mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	__poll_t mask = 0;
 
 	if (!ctx->more || ctx->used)
 		mask |= EPOLLIN | EPOLLRDNORM;
@@ -1083,7 +1075,7 @@
 
 	return mask;
 }
-EXPORT_SYMBOL_GPL(af_alg_poll);
+EXPORT_SYMBOL_GPL(af_alg_poll_mask);
 
 /**
  * af_alg_alloc_areq - allocate struct af_alg_async_req
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 2a0271b..c0755cf 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -10,6 +10,7 @@
  *
  */
 
+#include <crypto/algapi.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/fips.h>
@@ -59,6 +60,15 @@
 	if (alg->cra_blocksize > PAGE_SIZE / 8)
 		return -EINVAL;
 
+	if (!alg->cra_type && (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
+			       CRYPTO_ALG_TYPE_CIPHER) {
+		if (alg->cra_alignmask > MAX_CIPHER_ALIGNMASK)
+			return -EINVAL;
+
+		if (alg->cra_blocksize > MAX_CIPHER_BLOCKSIZE)
+			return -EINVAL;
+	}
+
 	if (alg->cra_priority < 0)
 		return -EINVAL;
 
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 4b07edd..330cf9f 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -375,7 +375,7 @@
 	.sendmsg	=	aead_sendmsg,
 	.sendpage	=	af_alg_sendpage,
 	.recvmsg	=	aead_recvmsg,
-	.poll		=	af_alg_poll,
+	.poll_mask	=	af_alg_poll_mask,
 };
 
 static int aead_check_key(struct socket *sock)
@@ -471,7 +471,7 @@
 	.sendmsg	=	aead_sendmsg_nokey,
 	.sendpage	=	aead_sendpage_nokey,
 	.recvmsg	=	aead_recvmsg_nokey,
-	.poll		=	af_alg_poll,
+	.poll_mask	=	af_alg_poll_mask,
 };
 
 static void *aead_bind(const char *name, u32 type, u32 mask)
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 6c9b192..bfcf595 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -288,7 +288,6 @@
 	.mmap		=	sock_no_mmap,
 	.bind		=	sock_no_bind,
 	.setsockopt	=	sock_no_setsockopt,
-	.poll		=	sock_no_poll,
 
 	.release	=	af_alg_release,
 	.sendmsg	=	hash_sendmsg,
@@ -396,7 +395,6 @@
 	.mmap		=	sock_no_mmap,
 	.bind		=	sock_no_bind,
 	.setsockopt	=	sock_no_setsockopt,
-	.poll		=	sock_no_poll,
 
 	.release	=	af_alg_release,
 	.sendmsg	=	hash_sendmsg_nokey,
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 150c2b6..22df379 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -106,7 +106,6 @@
 	.bind		=	sock_no_bind,
 	.accept		=	sock_no_accept,
 	.setsockopt	=	sock_no_setsockopt,
-	.poll		=	sock_no_poll,
 	.sendmsg	=	sock_no_sendmsg,
 	.sendpage	=	sock_no_sendpage,
 
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index c4e885d..15cf3c5 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -205,7 +205,7 @@
 	.sendmsg	=	skcipher_sendmsg,
 	.sendpage	=	af_alg_sendpage,
 	.recvmsg	=	skcipher_recvmsg,
-	.poll		=	af_alg_poll,
+	.poll_mask	=	af_alg_poll_mask,
 };
 
 static int skcipher_check_key(struct socket *sock)
@@ -301,7 +301,7 @@
 	.sendmsg	=	skcipher_sendmsg_nokey,
 	.sendpage	=	skcipher_sendpage_nokey,
 	.recvmsg	=	skcipher_recvmsg_nokey,
-	.poll		=	af_alg_poll,
+	.poll_mask	=	af_alg_poll_mask,
 };
 
 static void *skcipher_bind(const char *name, u32 type, u32 mask)
diff --git a/crypto/authenc.c b/crypto/authenc.c
index d3d6d72..4fa8d40 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -108,6 +108,7 @@
 				       CRYPTO_TFM_RES_MASK);
 
 out:
+	memzero_explicit(&keys, sizeof(keys));
 	return err;
 
 badkey:
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 15f91dd..50b8047 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -90,6 +90,7 @@
 					   CRYPTO_TFM_RES_MASK);
 
 out:
+	memzero_explicit(&keys, sizeof(keys));
 	return err;
 
 badkey:
diff --git a/crypto/cfb.c b/crypto/cfb.c
index 94ee39b..a0d68c0 100644
--- a/crypto/cfb.c
+++ b/crypto/cfb.c
@@ -53,9 +53,8 @@
 static void crypto_cfb_final(struct skcipher_walk *walk,
 			     struct crypto_skcipher *tfm)
 {
-	const unsigned int bsize = crypto_cfb_bsize(tfm);
 	const unsigned long alignmask = crypto_skcipher_alignmask(tfm);
-	u8 tmp[bsize + alignmask];
+	u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
 	u8 *stream = PTR_ALIGN(tmp + 0, alignmask + 1);
 	u8 *src = walk->src.virt.addr;
 	u8 *dst = walk->dst.virt.addr;
@@ -94,7 +93,7 @@
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
 	u8 *iv = walk->iv;
-	u8 tmp[bsize];
+	u8 tmp[MAX_CIPHER_BLOCKSIZE];
 
 	do {
 		crypto_cfb_encrypt_one(tfm, iv, tmp);
@@ -164,7 +163,7 @@
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
 	u8 *iv = walk->iv;
-	u8 tmp[bsize];
+	u8 tmp[MAX_CIPHER_BLOCKSIZE];
 
 	do {
 		crypto_cfb_encrypt_one(tfm, iv, tmp);
diff --git a/crypto/cipher.c b/crypto/cipher.c
index 94fa355..57836c3 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -13,6 +13,7 @@
  *
  */
 
+#include <crypto/algapi.h>
 #include <linux/kernel.h>
 #include <linux/crypto.h>
 #include <linux/errno.h>
@@ -67,7 +68,7 @@
 {
 	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
 	unsigned int size = crypto_tfm_alg_blocksize(tfm);
-	u8 buffer[size + alignmask];
+	u8 buffer[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
 	u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
 
 	memcpy(tmp, src, size);
diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c
index 718cbce..00facd2 100644
--- a/crypto/crc32_generic.c
+++ b/crypto/crc32_generic.c
@@ -29,6 +29,7 @@
  * This is crypto api shash wrappers to crc32_le.
  */
 
+#include <asm/unaligned.h>
 #include <linux/crc32.h>
 #include <crypto/internal/hash.h>
 #include <linux/init.h>
@@ -39,11 +40,6 @@
 #define CHKSUM_BLOCK_SIZE	1
 #define CHKSUM_DIGEST_SIZE	4
 
-static u32 __crc32_le(u32 crc, unsigned char const *p, size_t len)
-{
-	return crc32_le(crc, p, len);
-}
-
 /** No default init with ~0 */
 static int crc32_cra_init(struct crypto_tfm *tfm)
 {
@@ -54,7 +50,6 @@
 	return 0;
 }
 
-
 /*
  * Setting the seed allows arbitrary accumulators and flexible XOR policy
  * If your algorithm starts with ~0, then XOR with ~0 before you set
@@ -69,7 +64,7 @@
 		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
-	*mctx = le32_to_cpup((__le32 *)key);
+	*mctx = get_unaligned_le32(key);
 	return 0;
 }
 
@@ -88,7 +83,7 @@
 {
 	u32 *crcp = shash_desc_ctx(desc);
 
-	*crcp = __crc32_le(*crcp, data, len);
+	*crcp = crc32_le(*crcp, data, len);
 	return 0;
 }
 
@@ -96,7 +91,7 @@
 static int __crc32_finup(u32 *crcp, const u8 *data, unsigned int len,
 			 u8 *out)
 {
-	*(__le32 *)out = cpu_to_le32(__crc32_le(*crcp, data, len));
+	put_unaligned_le32(crc32_le(*crcp, data, len), out);
 	return 0;
 }
 
@@ -110,7 +105,7 @@
 {
 	u32 *crcp = shash_desc_ctx(desc);
 
-	*(__le32 *)out = cpu_to_le32p(crcp);
+	put_unaligned_le32(*crcp, out);
 	return 0;
 }
 
diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c
index 3723203..7283066 100644
--- a/crypto/crc32c_generic.c
+++ b/crypto/crc32c_generic.c
@@ -35,6 +35,7 @@
  *
  */
 
+#include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -82,7 +83,7 @@
 		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
-	mctx->key = le32_to_cpu(*(__le32 *)key);
+	mctx->key = get_unaligned_le32(key);
 	return 0;
 }
 
@@ -99,13 +100,13 @@
 {
 	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
 
-	*(__le32 *)out = ~cpu_to_le32p(&ctx->crc);
+	put_unaligned_le32(~ctx->crc, out);
 	return 0;
 }
 
 static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out)
 {
-	*(__le32 *)out = ~cpu_to_le32(__crc32c_le(*crcp, data, len));
+	put_unaligned_le32(~__crc32c_le(*crcp, data, len), out);
 	return 0;
 }
 
@@ -148,7 +149,6 @@
 		.cra_priority		=	100,
 		.cra_flags		=	CRYPTO_ALG_OPTIONAL_KEY,
 		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
-		.cra_alignmask		=	3,
 		.cra_ctxsize		=	sizeof(struct chksum_ctx),
 		.cra_module		=	THIS_MODULE,
 		.cra_init		=	crc32c_cra_init,
diff --git a/crypto/ctr.c b/crypto/ctr.c
index 854d924..435b75b 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -58,7 +58,7 @@
 	unsigned int bsize = crypto_cipher_blocksize(tfm);
 	unsigned long alignmask = crypto_cipher_alignmask(tfm);
 	u8 *ctrblk = walk->iv;
-	u8 tmp[bsize + alignmask];
+	u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
 	u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
 	u8 *src = walk->src.virt.addr;
 	u8 *dst = walk->dst.virt.addr;
@@ -106,7 +106,7 @@
 	unsigned int nbytes = walk->nbytes;
 	u8 *ctrblk = walk->iv;
 	u8 *src = walk->src.virt.addr;
-	u8 tmp[bsize + alignmask];
+	u8 tmp[MAX_CIPHER_BLOCKSIZE + MAX_CIPHER_ALIGNMASK];
 	u8 *keystream = PTR_ALIGN(tmp + 0, alignmask + 1);
 
 	do {
diff --git a/crypto/cts.c b/crypto/cts.c
index 4773c18..4e28d83 100644
--- a/crypto/cts.c
+++ b/crypto/cts.c
@@ -40,6 +40,7 @@
  * rfc3962 includes errata information in its Appendix A.
  */
 
+#include <crypto/algapi.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <linux/init.h>
@@ -104,7 +105,7 @@
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct skcipher_request *subreq = &rctx->subreq;
 	int bsize = crypto_skcipher_blocksize(tfm);
-	u8 d[bsize * 2] __aligned(__alignof__(u32));
+	u8 d[MAX_CIPHER_BLOCKSIZE * 2] __aligned(__alignof__(u32));
 	struct scatterlist *sg;
 	unsigned int offset;
 	int lastn;
@@ -183,7 +184,7 @@
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct skcipher_request *subreq = &rctx->subreq;
 	int bsize = crypto_skcipher_blocksize(tfm);
-	u8 d[bsize * 2] __aligned(__alignof__(u32));
+	u8 d[MAX_CIPHER_BLOCKSIZE * 2] __aligned(__alignof__(u32));
 	struct scatterlist *sg;
 	unsigned int offset;
 	u8 *space;
diff --git a/crypto/ecc.c b/crypto/ecc.c
index 9c066b5..8155413 100644
--- a/crypto/ecc.c
+++ b/crypto/ecc.c
@@ -515,7 +515,7 @@
 static bool vli_mmod_fast(u64 *result, u64 *product,
 			  const u64 *curve_prime, unsigned int ndigits)
 {
-	u64 tmp[2 * ndigits];
+	u64 tmp[2 * ECC_MAX_DIGITS];
 
 	switch (ndigits) {
 	case 3:
@@ -536,7 +536,7 @@
 static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
 			      const u64 *curve_prime, unsigned int ndigits)
 {
-	u64 product[2 * ndigits];
+	u64 product[2 * ECC_MAX_DIGITS];
 
 	vli_mult(product, left, right, ndigits);
 	vli_mmod_fast(result, product, curve_prime, ndigits);
@@ -546,7 +546,7 @@
 static void vli_mod_square_fast(u64 *result, const u64 *left,
 				const u64 *curve_prime, unsigned int ndigits)
 {
-	u64 product[2 * ndigits];
+	u64 product[2 * ECC_MAX_DIGITS];
 
 	vli_square(product, left, ndigits);
 	vli_mmod_fast(result, product, curve_prime, ndigits);
@@ -560,8 +560,8 @@
 static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
 			unsigned int ndigits)
 {
-	u64 a[ndigits], b[ndigits];
-	u64 u[ndigits], v[ndigits];
+	u64 a[ECC_MAX_DIGITS], b[ECC_MAX_DIGITS];
+	u64 u[ECC_MAX_DIGITS], v[ECC_MAX_DIGITS];
 	u64 carry;
 	int cmp_result;
 
@@ -649,8 +649,8 @@
 				      u64 *curve_prime, unsigned int ndigits)
 {
 	/* t1 = x, t2 = y, t3 = z */
-	u64 t4[ndigits];
-	u64 t5[ndigits];
+	u64 t4[ECC_MAX_DIGITS];
+	u64 t5[ECC_MAX_DIGITS];
 
 	if (vli_is_zero(z1, ndigits))
 		return;
@@ -711,7 +711,7 @@
 static void apply_z(u64 *x1, u64 *y1, u64 *z, u64 *curve_prime,
 		    unsigned int ndigits)
 {
-	u64 t1[ndigits];
+	u64 t1[ECC_MAX_DIGITS];
 
 	vli_mod_square_fast(t1, z, curve_prime, ndigits);    /* z^2 */
 	vli_mod_mult_fast(x1, x1, t1, curve_prime, ndigits); /* x1 * z^2 */
@@ -724,7 +724,7 @@
 				u64 *p_initial_z, u64 *curve_prime,
 				unsigned int ndigits)
 {
-	u64 z[ndigits];
+	u64 z[ECC_MAX_DIGITS];
 
 	vli_set(x2, x1, ndigits);
 	vli_set(y2, y1, ndigits);
@@ -750,7 +750,7 @@
 		     unsigned int ndigits)
 {
 	/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
-	u64 t5[ndigits];
+	u64 t5[ECC_MAX_DIGITS];
 
 	/* t5 = x2 - x1 */
 	vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
@@ -791,9 +791,9 @@
 		       unsigned int ndigits)
 {
 	/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
-	u64 t5[ndigits];
-	u64 t6[ndigits];
-	u64 t7[ndigits];
+	u64 t5[ECC_MAX_DIGITS];
+	u64 t6[ECC_MAX_DIGITS];
+	u64 t7[ECC_MAX_DIGITS];
 
 	/* t5 = x2 - x1 */
 	vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
@@ -846,9 +846,9 @@
 			   unsigned int ndigits)
 {
 	/* R0 and R1 */
-	u64 rx[2][ndigits];
-	u64 ry[2][ndigits];
-	u64 z[ndigits];
+	u64 rx[2][ECC_MAX_DIGITS];
+	u64 ry[2][ECC_MAX_DIGITS];
+	u64 z[ECC_MAX_DIGITS];
 	int i, nb;
 	int num_bits = vli_num_bits(scalar, ndigits);
 
@@ -943,13 +943,13 @@
 int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey)
 {
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
-	u64 priv[ndigits];
+	u64 priv[ECC_MAX_DIGITS];
 	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
 	unsigned int nbits = vli_num_bits(curve->n, ndigits);
 	int err;
 
 	/* Check that N is included in Table 1 of FIPS 186-4, section 6.1.1 */
-	if (nbits < 160)
+	if (nbits < 160 || ndigits > ARRAY_SIZE(priv))
 		return -EINVAL;
 
 	/*
@@ -988,10 +988,10 @@
 {
 	int ret = 0;
 	struct ecc_point *pk;
-	u64 priv[ndigits];
+	u64 priv[ECC_MAX_DIGITS];
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
 
-	if (!private_key || !curve) {
+	if (!private_key || !curve || ndigits > ARRAY_SIZE(priv)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -1025,30 +1025,25 @@
 {
 	int ret = 0;
 	struct ecc_point *product, *pk;
-	u64 *priv, *rand_z;
+	u64 priv[ECC_MAX_DIGITS];
+	u64 rand_z[ECC_MAX_DIGITS];
+	unsigned int nbytes;
 	const struct ecc_curve *curve = ecc_get_curve(curve_id);
 
-	if (!private_key || !public_key || !curve) {
+	if (!private_key || !public_key || !curve ||
+	    ndigits > ARRAY_SIZE(priv) || ndigits > ARRAY_SIZE(rand_z)) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	priv = kmalloc_array(ndigits, sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
 
-	rand_z = kmalloc_array(ndigits, sizeof(*rand_z), GFP_KERNEL);
-	if (!rand_z) {
-		ret = -ENOMEM;
-		goto kfree_out;
-	}
+	get_random_bytes(rand_z, nbytes);
 
 	pk = ecc_alloc_point(ndigits);
 	if (!pk) {
 		ret = -ENOMEM;
-		goto kfree_out;
+		goto out;
 	}
 
 	product = ecc_alloc_point(ndigits);
@@ -1057,8 +1052,6 @@
 		goto err_alloc_product;
 	}
 
-	get_random_bytes(rand_z, ndigits << ECC_DIGITS_TO_BYTES_SHIFT);
-
 	ecc_swap_digits(public_key, pk->x, ndigits);
 	ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
 	ecc_swap_digits(private_key, priv, ndigits);
@@ -1073,9 +1066,6 @@
 	ecc_free_point(product);
 err_alloc_product:
 	ecc_free_point(pk);
-kfree_out:
-	kzfree(priv);
-	kzfree(rand_z);
 out:
 	return ret;
 }
diff --git a/crypto/ecc.h b/crypto/ecc.h
index e4fd449..f75a86b 100644
--- a/crypto/ecc.h
+++ b/crypto/ecc.h
@@ -26,7 +26,9 @@
 #ifndef _CRYPTO_ECC_H
 #define _CRYPTO_ECC_H
 
-#define ECC_MAX_DIGITS	4 /* 256 */
+#define ECC_CURVE_NIST_P192_DIGITS  3
+#define ECC_CURVE_NIST_P256_DIGITS  4
+#define ECC_MAX_DIGITS              ECC_CURVE_NIST_P256_DIGITS
 
 #define ECC_DIGITS_TO_BYTES_SHIFT 3
 
diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index d2ec33f..bf63001 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -30,8 +30,8 @@
 static unsigned int ecdh_supported_curve(unsigned int curve_id)
 {
 	switch (curve_id) {
-	case ECC_CURVE_NIST_P192: return 3;
-	case ECC_CURVE_NIST_P256: return 4;
+	case ECC_CURVE_NIST_P192: return ECC_CURVE_NIST_P192_DIGITS;
+	case ECC_CURVE_NIST_P256: return ECC_CURVE_NIST_P256_DIGITS;
 	default: return 0;
 	}
 }
diff --git a/crypto/morus1280.c b/crypto/morus1280.c
new file mode 100644
index 0000000..6180b25
--- /dev/null
+++ b/crypto/morus1280.c
@@ -0,0 +1,549 @@
+/*
+ * The MORUS-1280 Authenticated-Encryption Algorithm
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/morus_common.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+
+#define MORUS1280_WORD_SIZE 8
+#define MORUS1280_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS1280_WORD_SIZE)
+#define MORUS1280_BLOCK_ALIGN (__alignof__(__le64))
+#define MORUS1280_ALIGNED(p) IS_ALIGNED((uintptr_t)p, MORUS1280_BLOCK_ALIGN)
+
+struct morus1280_block {
+	u64 words[MORUS_BLOCK_WORDS];
+};
+
+union morus1280_block_in {
+	__le64 words[MORUS_BLOCK_WORDS];
+	u8 bytes[MORUS1280_BLOCK_SIZE];
+};
+
+struct morus1280_state {
+	struct morus1280_block s[MORUS_STATE_BLOCKS];
+};
+
+struct morus1280_ctx {
+	struct morus1280_block key;
+};
+
+struct morus1280_ops {
+	int (*skcipher_walk_init)(struct skcipher_walk *walk,
+				  struct aead_request *req, bool atomic);
+
+	void (*crypt_chunk)(struct morus1280_state *state,
+			    u8 *dst, const u8 *src, unsigned int size);
+};
+
+static const struct morus1280_block crypto_morus1280_const[1] = {
+	{ .words = {
+		U64_C(0x0d08050302010100),
+		U64_C(0x6279e99059372215),
+		U64_C(0xf12fc26d55183ddb),
+		U64_C(0xdd28b57342311120),
+	} },
+};
+
+static void crypto_morus1280_round(struct morus1280_block *b0,
+				   struct morus1280_block *b1,
+				   struct morus1280_block *b2,
+				   struct morus1280_block *b3,
+				   struct morus1280_block *b4,
+				   const struct morus1280_block *m,
+				   unsigned int b, unsigned int w)
+{
+	unsigned int i;
+	struct morus1280_block tmp;
+
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
+		b0->words[i] ^= b1->words[i] & b2->words[i];
+		b0->words[i] ^= b3->words[i];
+		b0->words[i] ^= m->words[i];
+		b0->words[i] = rol64(b0->words[i], b);
+	}
+
+	tmp = *b3;
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
+		b3->words[(i + w) % MORUS_BLOCK_WORDS] = tmp.words[i];
+}
+
+static void crypto_morus1280_update(struct morus1280_state *state,
+				    const struct morus1280_block *m)
+{
+	static const struct morus1280_block z = {};
+
+	struct morus1280_block *s = state->s;
+
+	crypto_morus1280_round(&s[0], &s[1], &s[2], &s[3], &s[4], &z, 13, 1);
+	crypto_morus1280_round(&s[1], &s[2], &s[3], &s[4], &s[0], m,  46, 2);
+	crypto_morus1280_round(&s[2], &s[3], &s[4], &s[0], &s[1], m,  38, 3);
+	crypto_morus1280_round(&s[3], &s[4], &s[0], &s[1], &s[2], m,   7, 2);
+	crypto_morus1280_round(&s[4], &s[0], &s[1], &s[2], &s[3], m,   4, 1);
+}
+
+static void crypto_morus1280_load_a(struct morus1280_block *dst, const u8 *src)
+{
+	unsigned int i;
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
+		dst->words[i] = le64_to_cpu(*(const __le64 *)src);
+		src += MORUS1280_WORD_SIZE;
+	}
+}
+
+static void crypto_morus1280_load_u(struct morus1280_block *dst, const u8 *src)
+{
+	unsigned int i;
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
+		dst->words[i] = get_unaligned_le64(src);
+		src += MORUS1280_WORD_SIZE;
+	}
+}
+
+static void crypto_morus1280_load(struct morus1280_block *dst, const u8 *src)
+{
+	if (MORUS1280_ALIGNED(src))
+		crypto_morus1280_load_a(dst, src);
+	else
+		crypto_morus1280_load_u(dst, src);
+}
+
+static void crypto_morus1280_store_a(u8 *dst, const struct morus1280_block *src)
+{
+	unsigned int i;
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
+		*(__le64 *)dst = cpu_to_le64(src->words[i]);
+		dst += MORUS1280_WORD_SIZE;
+	}
+}
+
+static void crypto_morus1280_store_u(u8 *dst, const struct morus1280_block *src)
+{
+	unsigned int i;
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
+		put_unaligned_le64(src->words[i], dst);
+		dst += MORUS1280_WORD_SIZE;
+	}
+}
+
+static void crypto_morus1280_store(u8 *dst, const struct morus1280_block *src)
+{
+	if (MORUS1280_ALIGNED(dst))
+		crypto_morus1280_store_a(dst, src);
+	else
+		crypto_morus1280_store_u(dst, src);
+}
+
+static void crypto_morus1280_ad(struct morus1280_state *state, const u8 *src,
+				unsigned int size)
+{
+	struct morus1280_block m;
+
+	if (MORUS1280_ALIGNED(src)) {
+		while (size >= MORUS1280_BLOCK_SIZE) {
+			crypto_morus1280_load_a(&m, src);
+			crypto_morus1280_update(state, &m);
+
+			size -= MORUS1280_BLOCK_SIZE;
+			src += MORUS1280_BLOCK_SIZE;
+		}
+	} else {
+		while (size >= MORUS1280_BLOCK_SIZE) {
+			crypto_morus1280_load_u(&m, src);
+			crypto_morus1280_update(state, &m);
+
+			size -= MORUS1280_BLOCK_SIZE;
+			src += MORUS1280_BLOCK_SIZE;
+		}
+	}
+}
+
+static void crypto_morus1280_core(const struct morus1280_state *state,
+				  struct morus1280_block *blk)
+{
+	unsigned int i;
+
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
+		blk->words[(i + 3) % MORUS_BLOCK_WORDS] ^= state->s[1].words[i];
+
+        for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
+		blk->words[i] ^= state->s[0].words[i];
+		blk->words[i] ^= state->s[2].words[i] & state->s[3].words[i];
+	}
+}
+
+static void crypto_morus1280_encrypt_chunk(struct morus1280_state *state,
+					   u8 *dst, const u8 *src,
+					   unsigned int size)
+{
+	struct morus1280_block c, m;
+
+	if (MORUS1280_ALIGNED(src) && MORUS1280_ALIGNED(dst)) {
+		while (size >= MORUS1280_BLOCK_SIZE) {
+			crypto_morus1280_load_a(&m, src);
+			c = m;
+			crypto_morus1280_core(state, &c);
+			crypto_morus1280_store_a(dst, &c);
+			crypto_morus1280_update(state, &m);
+
+			src += MORUS1280_BLOCK_SIZE;
+			dst += MORUS1280_BLOCK_SIZE;
+			size -= MORUS1280_BLOCK_SIZE;
+		}
+	} else {
+		while (size >= MORUS1280_BLOCK_SIZE) {
+			crypto_morus1280_load_u(&m, src);
+			c = m;
+			crypto_morus1280_core(state, &c);
+			crypto_morus1280_store_u(dst, &c);
+			crypto_morus1280_update(state, &m);
+
+			src += MORUS1280_BLOCK_SIZE;
+			dst += MORUS1280_BLOCK_SIZE;
+			size -= MORUS1280_BLOCK_SIZE;
+		}
+	}
+
+	if (size > 0) {
+		union morus1280_block_in tail;
+
+		memcpy(tail.bytes, src, size);
+		memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
+
+		crypto_morus1280_load_a(&m, tail.bytes);
+		c = m;
+		crypto_morus1280_core(state, &c);
+		crypto_morus1280_store_a(tail.bytes, &c);
+		crypto_morus1280_update(state, &m);
+
+		memcpy(dst, tail.bytes, size);
+	}
+}
+
+static void crypto_morus1280_decrypt_chunk(struct morus1280_state *state,
+					   u8 *dst, const u8 *src,
+					   unsigned int size)
+{
+	struct morus1280_block m;
+
+	if (MORUS1280_ALIGNED(src) && MORUS1280_ALIGNED(dst)) {
+		while (size >= MORUS1280_BLOCK_SIZE) {
+			crypto_morus1280_load_a(&m, src);
+			crypto_morus1280_core(state, &m);
+			crypto_morus1280_store_a(dst, &m);
+			crypto_morus1280_update(state, &m);
+
+			src += MORUS1280_BLOCK_SIZE;
+			dst += MORUS1280_BLOCK_SIZE;
+			size -= MORUS1280_BLOCK_SIZE;
+		}
+	} else {
+		while (size >= MORUS1280_BLOCK_SIZE) {
+			crypto_morus1280_load_u(&m, src);
+			crypto_morus1280_core(state, &m);
+			crypto_morus1280_store_u(dst, &m);
+			crypto_morus1280_update(state, &m);
+
+			src += MORUS1280_BLOCK_SIZE;
+			dst += MORUS1280_BLOCK_SIZE;
+			size -= MORUS1280_BLOCK_SIZE;
+		}
+	}
+
+	if (size > 0) {
+		union morus1280_block_in tail;
+
+		memcpy(tail.bytes, src, size);
+		memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
+
+		crypto_morus1280_load_a(&m, tail.bytes);
+		crypto_morus1280_core(state, &m);
+		crypto_morus1280_store_a(tail.bytes, &m);
+		memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
+		crypto_morus1280_load_a(&m, tail.bytes);
+		crypto_morus1280_update(state, &m);
+
+		memcpy(dst, tail.bytes, size);
+	}
+}
+
+static void crypto_morus1280_init(struct morus1280_state *state,
+				  const struct morus1280_block *key,
+				  const u8 *iv)
+{
+	static const struct morus1280_block z = {};
+
+	union morus1280_block_in tmp;
+	unsigned int i;
+
+	memcpy(tmp.bytes, iv, MORUS_NONCE_SIZE);
+	memset(tmp.bytes + MORUS_NONCE_SIZE, 0,
+	       MORUS1280_BLOCK_SIZE - MORUS_NONCE_SIZE);
+
+	crypto_morus1280_load(&state->s[0], tmp.bytes);
+	state->s[1] = *key;
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
+		state->s[2].words[i] = U64_C(0xFFFFFFFFFFFFFFFF);
+	state->s[3] = z;
+	state->s[4] = crypto_morus1280_const[0];
+
+	for (i = 0; i < 16; i++)
+		crypto_morus1280_update(state, &z);
+
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
+		state->s[1].words[i] ^= key->words[i];
+}
+
+static void crypto_morus1280_process_ad(struct morus1280_state *state,
+					struct scatterlist *sg_src,
+					unsigned int assoclen)
+{
+	struct scatter_walk walk;
+	struct morus1280_block m;
+	union morus1280_block_in buf;
+	unsigned int pos = 0;
+
+	scatterwalk_start(&walk, sg_src);
+	while (assoclen != 0) {
+		unsigned int size = scatterwalk_clamp(&walk, assoclen);
+		unsigned int left = size;
+		void *mapped = scatterwalk_map(&walk);
+		const u8 *src = (const u8 *)mapped;
+
+		if (pos + size >= MORUS1280_BLOCK_SIZE) {
+			if (pos > 0) {
+				unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
+				memcpy(buf.bytes + pos, src, fill);
+
+				crypto_morus1280_load_a(&m, buf.bytes);
+				crypto_morus1280_update(state, &m);
+
+				pos = 0;
+				left -= fill;
+				src += fill;
+			}
+
+			crypto_morus1280_ad(state, src, left);
+			src += left & ~(MORUS1280_BLOCK_SIZE - 1);
+			left &= MORUS1280_BLOCK_SIZE - 1;
+		}
+
+		memcpy(buf.bytes + pos, src, left);
+
+		pos += left;
+		assoclen -= size;
+		scatterwalk_unmap(mapped);
+		scatterwalk_advance(&walk, size);
+		scatterwalk_done(&walk, 0, assoclen);
+	}
+
+	if (pos > 0) {
+		memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
+
+		crypto_morus1280_load_a(&m, buf.bytes);
+		crypto_morus1280_update(state, &m);
+	}
+}
+
+static void crypto_morus1280_process_crypt(struct morus1280_state *state,
+					   struct aead_request *req,
+					   const struct morus1280_ops *ops)
+{
+	struct skcipher_walk walk;
+	u8 *dst;
+	const u8 *src;
+
+	ops->skcipher_walk_init(&walk, req, false);
+
+	while (walk.nbytes) {
+		src = walk.src.virt.addr;
+		dst = walk.dst.virt.addr;
+
+		ops->crypt_chunk(state, dst, src, walk.nbytes);
+
+		skcipher_walk_done(&walk, 0);
+	}
+}
+
+static void crypto_morus1280_final(struct morus1280_state *state,
+				   struct morus1280_block *tag_xor,
+				   u64 assoclen, u64 cryptlen)
+{
+	u64 assocbits = assoclen * 8;
+	u64 cryptbits = cryptlen * 8;
+
+	struct morus1280_block tmp;
+	unsigned int i;
+
+	tmp.words[0] = cpu_to_le64(assocbits);
+	tmp.words[1] = cpu_to_le64(cryptbits);
+	tmp.words[2] = 0;
+	tmp.words[3] = 0;
+
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
+		state->s[4].words[i] ^= state->s[0].words[i];
+
+	for (i = 0; i < 10; i++)
+		crypto_morus1280_update(state, &tmp);
+
+	crypto_morus1280_core(state, tag_xor);
+}
+
+static int crypto_morus1280_setkey(struct crypto_aead *aead, const u8 *key,
+				   unsigned int keylen)
+{
+	struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
+	union morus1280_block_in tmp;
+
+	if (keylen == MORUS1280_BLOCK_SIZE)
+		crypto_morus1280_load(&ctx->key, key);
+	else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
+		memcpy(tmp.bytes, key, keylen);
+		memcpy(tmp.bytes + keylen, key, keylen);
+
+		crypto_morus1280_load(&ctx->key, tmp.bytes);
+	} else {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int crypto_morus1280_setauthsize(struct crypto_aead *tfm,
+					unsigned int authsize)
+{
+	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
+}
+
+static void crypto_morus1280_crypt(struct aead_request *req,
+				   struct morus1280_block *tag_xor,
+				   unsigned int cryptlen,
+				   const struct morus1280_ops *ops)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
+	struct morus1280_state state;
+
+	crypto_morus1280_init(&state, &ctx->key, req->iv);
+	crypto_morus1280_process_ad(&state, req->src, req->assoclen);
+	crypto_morus1280_process_crypt(&state, req, ops);
+	crypto_morus1280_final(&state, tag_xor, req->assoclen, cryptlen);
+}
+
+static int crypto_morus1280_encrypt(struct aead_request *req)
+{
+	static const struct morus1280_ops ops = {
+		.skcipher_walk_init = skcipher_walk_aead_encrypt,
+		.crypt_chunk = crypto_morus1280_encrypt_chunk,
+	};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct morus1280_block tag = {};
+	union morus1280_block_in tag_out;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen;
+
+	crypto_morus1280_crypt(req, &tag, cryptlen, &ops);
+	crypto_morus1280_store(tag_out.bytes, &tag);
+
+	scatterwalk_map_and_copy(tag_out.bytes, req->dst,
+				 req->assoclen + cryptlen, authsize, 1);
+	return 0;
+}
+
+static int crypto_morus1280_decrypt(struct aead_request *req)
+{
+	static const struct morus1280_ops ops = {
+		.skcipher_walk_init = skcipher_walk_aead_decrypt,
+		.crypt_chunk = crypto_morus1280_decrypt_chunk,
+	};
+	static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	union morus1280_block_in tag_in;
+	struct morus1280_block tag;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen - authsize;
+
+	scatterwalk_map_and_copy(tag_in.bytes, req->src,
+				 req->assoclen + cryptlen, authsize, 0);
+
+	crypto_morus1280_load(&tag, tag_in.bytes);
+	crypto_morus1280_crypt(req, &tag, cryptlen, &ops);
+	crypto_morus1280_store(tag_in.bytes, &tag);
+
+	return crypto_memneq(tag_in.bytes, zeros, authsize) ? -EBADMSG : 0;
+}
+
+static int crypto_morus1280_init_tfm(struct crypto_aead *tfm)
+{
+	return 0;
+}
+
+static void crypto_morus1280_exit_tfm(struct crypto_aead *tfm)
+{
+}
+
+static struct aead_alg crypto_morus1280_alg = {
+	.setkey = crypto_morus1280_setkey,
+	.setauthsize = crypto_morus1280_setauthsize,
+	.encrypt = crypto_morus1280_encrypt,
+	.decrypt = crypto_morus1280_decrypt,
+	.init = crypto_morus1280_init_tfm,
+	.exit = crypto_morus1280_exit_tfm,
+
+	.ivsize = MORUS_NONCE_SIZE,
+	.maxauthsize = MORUS_MAX_AUTH_SIZE,
+	.chunksize = MORUS1280_BLOCK_SIZE,
+
+	.base = {
+		.cra_flags = CRYPTO_ALG_TYPE_AEAD,
+		.cra_blocksize = 1,
+		.cra_ctxsize = sizeof(struct morus1280_ctx),
+		.cra_alignmask = 0,
+
+		.cra_priority = 100,
+
+		.cra_name = "morus1280",
+		.cra_driver_name = "morus1280-generic",
+
+		.cra_module = THIS_MODULE,
+	}
+};
+
+
+static int __init crypto_morus1280_module_init(void)
+{
+	return crypto_register_aead(&crypto_morus1280_alg);
+}
+
+static void __exit crypto_morus1280_module_exit(void)
+{
+	crypto_unregister_aead(&crypto_morus1280_alg);
+}
+
+module_init(crypto_morus1280_module_init);
+module_exit(crypto_morus1280_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm");
+MODULE_ALIAS_CRYPTO("morus1280");
+MODULE_ALIAS_CRYPTO("morus1280-generic");
diff --git a/crypto/morus640.c b/crypto/morus640.c
new file mode 100644
index 0000000..9fbcde3
--- /dev/null
+++ b/crypto/morus640.c
@@ -0,0 +1,544 @@
+/*
+ * The MORUS-640 Authenticated-Encryption Algorithm
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/morus_common.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+
+#define MORUS640_WORD_SIZE 4
+#define MORUS640_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS640_WORD_SIZE)
+#define MORUS640_BLOCK_ALIGN (__alignof__(__le32))
+#define MORUS640_ALIGNED(p) IS_ALIGNED((uintptr_t)p, MORUS640_BLOCK_ALIGN)
+
+struct morus640_block {
+	u32 words[MORUS_BLOCK_WORDS];
+};
+
+union morus640_block_in {
+	__le32 words[MORUS_BLOCK_WORDS];
+	u8 bytes[MORUS640_BLOCK_SIZE];
+};
+
+struct morus640_state {
+	struct morus640_block s[MORUS_STATE_BLOCKS];
+};
+
+struct morus640_ctx {
+	struct morus640_block key;
+};
+
+struct morus640_ops {
+	int (*skcipher_walk_init)(struct skcipher_walk *walk,
+				  struct aead_request *req, bool atomic);
+
+	void (*crypt_chunk)(struct morus640_state *state,
+			    u8 *dst, const u8 *src, unsigned int size);
+};
+
+static const struct morus640_block crypto_morus640_const[2] = {
+	{ .words = {
+		U32_C(0x02010100),
+		U32_C(0x0d080503),
+		U32_C(0x59372215),
+		U32_C(0x6279e990),
+	} },
+	{ .words = {
+		U32_C(0x55183ddb),
+		U32_C(0xf12fc26d),
+		U32_C(0x42311120),
+		U32_C(0xdd28b573),
+	} },
+};
+
+static void crypto_morus640_round(struct morus640_block *b0,
+				  struct morus640_block *b1,
+				  struct morus640_block *b2,
+				  struct morus640_block *b3,
+				  struct morus640_block *b4,
+				  const struct morus640_block *m,
+				  unsigned int b, unsigned int w)
+{
+	unsigned int i;
+	struct morus640_block tmp;
+
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
+		b0->words[i] ^= b1->words[i] & b2->words[i];
+		b0->words[i] ^= b3->words[i];
+		b0->words[i] ^= m->words[i];
+		b0->words[i] = rol32(b0->words[i], b);
+	}
+
+	tmp = *b3;
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
+		b3->words[(i + w) % MORUS_BLOCK_WORDS] = tmp.words[i];
+}
+
+static void crypto_morus640_update(struct morus640_state *state,
+				   const struct morus640_block *m)
+{
+	static const struct morus640_block z = {};
+
+	struct morus640_block *s = state->s;
+
+	crypto_morus640_round(&s[0], &s[1], &s[2], &s[3], &s[4], &z,  5, 1);
+	crypto_morus640_round(&s[1], &s[2], &s[3], &s[4], &s[0], m,  31, 2);
+	crypto_morus640_round(&s[2], &s[3], &s[4], &s[0], &s[1], m,   7, 3);
+	crypto_morus640_round(&s[3], &s[4], &s[0], &s[1], &s[2], m,  22, 2);
+	crypto_morus640_round(&s[4], &s[0], &s[1], &s[2], &s[3], m,  13, 1);
+}
+
+static void crypto_morus640_load_a(struct morus640_block *dst, const u8 *src)
+{
+	unsigned int i;
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
+		dst->words[i] = le32_to_cpu(*(const __le32 *)src);
+		src += MORUS640_WORD_SIZE;
+	}
+}
+
+static void crypto_morus640_load_u(struct morus640_block *dst, const u8 *src)
+{
+	unsigned int i;
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
+		dst->words[i] = get_unaligned_le32(src);
+		src += MORUS640_WORD_SIZE;
+	}
+}
+
+static void crypto_morus640_load(struct morus640_block *dst, const u8 *src)
+{
+	if (MORUS640_ALIGNED(src))
+		crypto_morus640_load_a(dst, src);
+	else
+		crypto_morus640_load_u(dst, src);
+}
+
+static void crypto_morus640_store_a(u8 *dst, const struct morus640_block *src)
+{
+	unsigned int i;
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
+		*(__le32 *)dst = cpu_to_le32(src->words[i]);
+		dst += MORUS640_WORD_SIZE;
+	}
+}
+
+static void crypto_morus640_store_u(u8 *dst, const struct morus640_block *src)
+{
+	unsigned int i;
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
+		put_unaligned_le32(src->words[i], dst);
+		dst += MORUS640_WORD_SIZE;
+	}
+}
+
+static void crypto_morus640_store(u8 *dst, const struct morus640_block *src)
+{
+	if (MORUS640_ALIGNED(dst))
+		crypto_morus640_store_a(dst, src);
+	else
+		crypto_morus640_store_u(dst, src);
+}
+
+static void crypto_morus640_ad(struct morus640_state *state, const u8 *src,
+			       unsigned int size)
+{
+	struct morus640_block m;
+
+	if (MORUS640_ALIGNED(src)) {
+		while (size >= MORUS640_BLOCK_SIZE) {
+			crypto_morus640_load_a(&m, src);
+			crypto_morus640_update(state, &m);
+
+			size -= MORUS640_BLOCK_SIZE;
+			src += MORUS640_BLOCK_SIZE;
+		}
+	} else {
+		while (size >= MORUS640_BLOCK_SIZE) {
+			crypto_morus640_load_u(&m, src);
+			crypto_morus640_update(state, &m);
+
+			size -= MORUS640_BLOCK_SIZE;
+			src += MORUS640_BLOCK_SIZE;
+		}
+	}
+}
+
+static void crypto_morus640_core(const struct morus640_state *state,
+				 struct morus640_block *blk)
+{
+	unsigned int i;
+
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
+		blk->words[(i + 3) % MORUS_BLOCK_WORDS] ^= state->s[1].words[i];
+
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
+		blk->words[i] ^= state->s[0].words[i];
+		blk->words[i] ^= state->s[2].words[i] & state->s[3].words[i];
+	}
+}
+
+static void crypto_morus640_encrypt_chunk(struct morus640_state *state, u8 *dst,
+					  const u8 *src, unsigned int size)
+{
+	struct morus640_block c, m;
+
+	if (MORUS640_ALIGNED(src) && MORUS640_ALIGNED(dst)) {
+		while (size >= MORUS640_BLOCK_SIZE) {
+			crypto_morus640_load_a(&m, src);
+			c = m;
+			crypto_morus640_core(state, &c);
+			crypto_morus640_store_a(dst, &c);
+			crypto_morus640_update(state, &m);
+
+			src += MORUS640_BLOCK_SIZE;
+			dst += MORUS640_BLOCK_SIZE;
+			size -= MORUS640_BLOCK_SIZE;
+		}
+	} else {
+		while (size >= MORUS640_BLOCK_SIZE) {
+			crypto_morus640_load_u(&m, src);
+			c = m;
+			crypto_morus640_core(state, &c);
+			crypto_morus640_store_u(dst, &c);
+			crypto_morus640_update(state, &m);
+
+			src += MORUS640_BLOCK_SIZE;
+			dst += MORUS640_BLOCK_SIZE;
+			size -= MORUS640_BLOCK_SIZE;
+		}
+	}
+
+	if (size > 0) {
+		union morus640_block_in tail;
+
+		memcpy(tail.bytes, src, size);
+		memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
+
+		crypto_morus640_load_a(&m, tail.bytes);
+		c = m;
+		crypto_morus640_core(state, &c);
+		crypto_morus640_store_a(tail.bytes, &c);
+		crypto_morus640_update(state, &m);
+
+		memcpy(dst, tail.bytes, size);
+	}
+}
+
+static void crypto_morus640_decrypt_chunk(struct morus640_state *state, u8 *dst,
+					  const u8 *src, unsigned int size)
+{
+	struct morus640_block m;
+
+	if (MORUS640_ALIGNED(src) && MORUS640_ALIGNED(dst)) {
+		while (size >= MORUS640_BLOCK_SIZE) {
+			crypto_morus640_load_a(&m, src);
+			crypto_morus640_core(state, &m);
+			crypto_morus640_store_a(dst, &m);
+			crypto_morus640_update(state, &m);
+
+			src += MORUS640_BLOCK_SIZE;
+			dst += MORUS640_BLOCK_SIZE;
+			size -= MORUS640_BLOCK_SIZE;
+		}
+	} else {
+		while (size >= MORUS640_BLOCK_SIZE) {
+			crypto_morus640_load_u(&m, src);
+			crypto_morus640_core(state, &m);
+			crypto_morus640_store_u(dst, &m);
+			crypto_morus640_update(state, &m);
+
+			src += MORUS640_BLOCK_SIZE;
+			dst += MORUS640_BLOCK_SIZE;
+			size -= MORUS640_BLOCK_SIZE;
+		}
+	}
+
+	if (size > 0) {
+		union morus640_block_in tail;
+
+		memcpy(tail.bytes, src, size);
+
+		crypto_morus640_load_a(&m, src);
+		crypto_morus640_core(state, &m);
+		crypto_morus640_store_a(tail.bytes, &m);
+		memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
+		crypto_morus640_load_a(&m, tail.bytes);
+		crypto_morus640_update(state, &m);
+
+		memcpy(dst, tail.bytes, size);
+	}
+}
+
+static void crypto_morus640_init(struct morus640_state *state,
+				 const struct morus640_block *key,
+				 const u8 *iv)
+{
+	static const struct morus640_block z = {};
+
+	unsigned int i;
+
+	crypto_morus640_load(&state->s[0], iv);
+	state->s[1] = *key;
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
+		state->s[2].words[i] = U32_C(0xFFFFFFFF);
+	state->s[3] = crypto_morus640_const[0];
+	state->s[4] = crypto_morus640_const[1];
+
+	for (i = 0; i < 16; i++)
+		crypto_morus640_update(state, &z);
+
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
+		state->s[1].words[i] ^= key->words[i];
+}
+
+static void crypto_morus640_process_ad(struct morus640_state *state,
+				       struct scatterlist *sg_src,
+				       unsigned int assoclen)
+{
+	struct scatter_walk walk;
+	struct morus640_block m;
+	union morus640_block_in buf;
+	unsigned int pos = 0;
+
+	scatterwalk_start(&walk, sg_src);
+	while (assoclen != 0) {
+		unsigned int size = scatterwalk_clamp(&walk, assoclen);
+		unsigned int left = size;
+		void *mapped = scatterwalk_map(&walk);
+		const u8 *src = (const u8 *)mapped;
+
+		if (pos + size >= MORUS640_BLOCK_SIZE) {
+			if (pos > 0) {
+				unsigned int fill = MORUS640_BLOCK_SIZE - pos;
+				memcpy(buf.bytes + pos, src, fill);
+
+				crypto_morus640_load_a(&m, buf.bytes);
+				crypto_morus640_update(state, &m);
+
+				pos = 0;
+				left -= fill;
+				src += fill;
+			}
+
+			crypto_morus640_ad(state, src, left);
+			src += left & ~(MORUS640_BLOCK_SIZE - 1);
+			left &= MORUS640_BLOCK_SIZE - 1;
+		}
+
+		memcpy(buf.bytes + pos, src, left);
+
+		pos += left;
+		assoclen -= size;
+		scatterwalk_unmap(mapped);
+		scatterwalk_advance(&walk, size);
+		scatterwalk_done(&walk, 0, assoclen);
+	}
+
+	if (pos > 0) {
+		memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
+
+		crypto_morus640_load_a(&m, buf.bytes);
+		crypto_morus640_update(state, &m);
+	}
+}
+
+static void crypto_morus640_process_crypt(struct morus640_state *state,
+					  struct aead_request *req,
+					  const struct morus640_ops *ops)
+{
+	struct skcipher_walk walk;
+	u8 *dst;
+	const u8 *src;
+
+	ops->skcipher_walk_init(&walk, req, false);
+
+	while (walk.nbytes) {
+		src = walk.src.virt.addr;
+		dst = walk.dst.virt.addr;
+
+		ops->crypt_chunk(state, dst, src, walk.nbytes);
+
+		skcipher_walk_done(&walk, 0);
+	}
+}
+
+static void crypto_morus640_final(struct morus640_state *state,
+				  struct morus640_block *tag_xor,
+				  u64 assoclen, u64 cryptlen)
+{
+	u64 assocbits = assoclen * 8;
+	u64 cryptbits = cryptlen * 8;
+
+	u32 assocbits_lo = (u32)assocbits;
+	u32 assocbits_hi = (u32)(assocbits >> 32);
+	u32 cryptbits_lo = (u32)cryptbits;
+	u32 cryptbits_hi = (u32)(cryptbits >> 32);
+
+	struct morus640_block tmp;
+	unsigned int i;
+
+	tmp.words[0] = cpu_to_le32(assocbits_lo);
+	tmp.words[1] = cpu_to_le32(assocbits_hi);
+	tmp.words[2] = cpu_to_le32(cryptbits_lo);
+	tmp.words[3] = cpu_to_le32(cryptbits_hi);
+
+	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
+		state->s[4].words[i] ^= state->s[0].words[i];
+
+	for (i = 0; i < 10; i++)
+		crypto_morus640_update(state, &tmp);
+
+	crypto_morus640_core(state, tag_xor);
+}
+
+static int crypto_morus640_setkey(struct crypto_aead *aead, const u8 *key,
+				  unsigned int keylen)
+{
+	struct morus640_ctx *ctx = crypto_aead_ctx(aead);
+
+	if (keylen != MORUS640_BLOCK_SIZE) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	crypto_morus640_load(&ctx->key, key);
+	return 0;
+}
+
+static int crypto_morus640_setauthsize(struct crypto_aead *tfm,
+				       unsigned int authsize)
+{
+	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
+}
+
+static void crypto_morus640_crypt(struct aead_request *req,
+				  struct morus640_block *tag_xor,
+				  unsigned int cryptlen,
+				  const struct morus640_ops *ops)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
+	struct morus640_state state;
+
+	crypto_morus640_init(&state, &ctx->key, req->iv);
+	crypto_morus640_process_ad(&state, req->src, req->assoclen);
+	crypto_morus640_process_crypt(&state, req, ops);
+	crypto_morus640_final(&state, tag_xor, req->assoclen, cryptlen);
+}
+
+static int crypto_morus640_encrypt(struct aead_request *req)
+{
+	static const struct morus640_ops ops = {
+		.skcipher_walk_init = skcipher_walk_aead_encrypt,
+		.crypt_chunk = crypto_morus640_encrypt_chunk,
+	};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct morus640_block tag = {};
+	union morus640_block_in tag_out;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen;
+
+	crypto_morus640_crypt(req, &tag, cryptlen, &ops);
+	crypto_morus640_store(tag_out.bytes, &tag);
+
+	scatterwalk_map_and_copy(tag_out.bytes, req->dst,
+				 req->assoclen + cryptlen, authsize, 1);
+	return 0;
+}
+
+static int crypto_morus640_decrypt(struct aead_request *req)
+{
+	static const struct morus640_ops ops = {
+		.skcipher_walk_init = skcipher_walk_aead_decrypt,
+		.crypt_chunk = crypto_morus640_decrypt_chunk,
+	};
+	static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
+
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	union morus640_block_in tag_in;
+	struct morus640_block tag;
+	unsigned int authsize = crypto_aead_authsize(tfm);
+	unsigned int cryptlen = req->cryptlen - authsize;
+
+	scatterwalk_map_and_copy(tag_in.bytes, req->src,
+				 req->assoclen + cryptlen, authsize, 0);
+
+	crypto_morus640_load(&tag, tag_in.bytes);
+	crypto_morus640_crypt(req, &tag, cryptlen, &ops);
+	crypto_morus640_store(tag_in.bytes, &tag);
+
+	return crypto_memneq(tag_in.bytes, zeros, authsize) ? -EBADMSG : 0;
+}
+
+static int crypto_morus640_init_tfm(struct crypto_aead *tfm)
+{
+	return 0;
+}
+
+static void crypto_morus640_exit_tfm(struct crypto_aead *tfm)
+{
+}
+
+static struct aead_alg crypto_morus640_alg = {
+	.setkey = crypto_morus640_setkey,
+	.setauthsize = crypto_morus640_setauthsize,
+	.encrypt = crypto_morus640_encrypt,
+	.decrypt = crypto_morus640_decrypt,
+	.init = crypto_morus640_init_tfm,
+	.exit = crypto_morus640_exit_tfm,
+
+	.ivsize = MORUS_NONCE_SIZE,
+	.maxauthsize = MORUS_MAX_AUTH_SIZE,
+	.chunksize = MORUS640_BLOCK_SIZE,
+
+	.base = {
+		.cra_flags = CRYPTO_ALG_TYPE_AEAD,
+		.cra_blocksize = 1,
+		.cra_ctxsize = sizeof(struct morus640_ctx),
+		.cra_alignmask = 0,
+
+		.cra_priority = 100,
+
+		.cra_name = "morus640",
+		.cra_driver_name = "morus640-generic",
+
+		.cra_module = THIS_MODULE,
+	}
+};
+
+static int __init crypto_morus640_module_init(void)
+{
+	return crypto_register_aead(&crypto_morus640_alg);
+}
+
+static void __exit crypto_morus640_module_exit(void)
+{
+	crypto_unregister_aead(&crypto_morus640_alg);
+}
+
+module_init(crypto_morus640_module_init);
+module_exit(crypto_morus640_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
+MODULE_DESCRIPTION("MORUS-640 AEAD algorithm");
+MODULE_ALIAS_CRYPTO("morus640");
+MODULE_ALIAS_CRYPTO("morus640-generic");
diff --git a/crypto/pcbc.c b/crypto/pcbc.c
index d9e45a9..ef802f6 100644
--- a/crypto/pcbc.c
+++ b/crypto/pcbc.c
@@ -14,6 +14,7 @@
  *
  */
 
+#include <crypto/algapi.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <linux/init.h>
@@ -72,7 +73,7 @@
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
 	u8 *iv = walk->iv;
-	u8 tmpbuf[bsize];
+	u8 tmpbuf[MAX_CIPHER_BLOCKSIZE];
 
 	do {
 		memcpy(tmpbuf, src, bsize);
@@ -144,7 +145,7 @@
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
 	u8 *iv = walk->iv;
-	u8 tmpbuf[bsize] __aligned(__alignof__(u32));
+	u8 tmpbuf[MAX_CIPHER_BLOCKSIZE] __aligned(__alignof__(u32));
 
 	do {
 		memcpy(tmpbuf, src, bsize);
diff --git a/crypto/proc.c b/crypto/proc.c
index 822fcef..f4eb613 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -94,21 +94,9 @@
 	.show		= c_show
 };
 
-static int crypto_info_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &crypto_seq_ops);
-}
-        
-static const struct file_operations proc_crypto_ops = {
-	.open		= crypto_info_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release
-};
-
 void __init crypto_init_proc(void)
 {
-	proc_create("crypto", 0, NULL, &proc_crypto_ops);
+	proc_create_seq("crypto", 0, NULL, &crypto_seq_ops);
 }
 
 void __exit crypto_exit_proc(void)
diff --git a/crypto/rsa.c b/crypto/rsa.c
index b067f3a..4167980 100644
--- a/crypto/rsa.c
+++ b/crypto/rsa.c
@@ -215,7 +215,6 @@
 		goto err_free_m;
 	}
 
-	ret = -ENOMEM;
 	s = mpi_read_raw_from_sgl(req->src, req->src_len);
 	if (!s) {
 		ret = -ENOMEM;
diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c
index 5074006..8c77bc7 100644
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
@@ -21,9 +21,17 @@
 
 #include <asm/unaligned.h>
 #include <crypto/internal/skcipher.h>
-#include <crypto/salsa20.h>
 #include <linux/module.h>
 
+#define SALSA20_IV_SIZE        8
+#define SALSA20_MIN_KEY_SIZE  16
+#define SALSA20_MAX_KEY_SIZE  32
+#define SALSA20_BLOCK_SIZE    64
+
+struct salsa20_ctx {
+	u32 initial_state[16];
+};
+
 static void salsa20_block(u32 *state, __le32 *stream)
 {
 	u32 x[16];
@@ -93,16 +101,15 @@
 	}
 }
 
-void crypto_salsa20_init(u32 *state, const struct salsa20_ctx *ctx,
+static void salsa20_init(u32 *state, const struct salsa20_ctx *ctx,
 			 const u8 *iv)
 {
 	memcpy(state, ctx->initial_state, sizeof(ctx->initial_state));
 	state[6] = get_unaligned_le32(iv + 0);
 	state[7] = get_unaligned_le32(iv + 4);
 }
-EXPORT_SYMBOL_GPL(crypto_salsa20_init);
 
-int crypto_salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+static int salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			  unsigned int keysize)
 {
 	static const char sigma[16] = "expand 32-byte k";
@@ -143,7 +150,6 @@
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(crypto_salsa20_setkey);
 
 static int salsa20_crypt(struct skcipher_request *req)
 {
@@ -155,7 +161,7 @@
 
 	err = skcipher_walk_virt(&walk, req, true);
 
-	crypto_salsa20_init(state, ctx, walk.iv);
+	salsa20_init(state, ctx, walk.iv);
 
 	while (walk.nbytes > 0) {
 		unsigned int nbytes = walk.nbytes;
@@ -183,7 +189,7 @@
 	.max_keysize		= SALSA20_MAX_KEY_SIZE,
 	.ivsize			= SALSA20_IV_SIZE,
 	.chunksize		= SALSA20_BLOCK_SIZE,
-	.setkey			= crypto_salsa20_setkey,
+	.setkey			= salsa20_setkey,
 	.encrypt		= salsa20_crypt,
 	.decrypt		= salsa20_crypt,
 };
diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c
index f537a27..c18eebf 100644
--- a/crypto/sm4_generic.c
+++ b/crypto/sm4_generic.c
@@ -190,21 +190,23 @@
 
 /* encrypt a block of text */
 
-static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	sm4_do_crypt(ctx->rkey_enc, (u32 *)out, (u32 *)in);
 }
+EXPORT_SYMBOL_GPL(crypto_sm4_encrypt);
 
 /* decrypt a block of text */
 
-static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	sm4_do_crypt(ctx->rkey_dec, (u32 *)out, (u32 *)in);
 }
+EXPORT_SYMBOL_GPL(crypto_sm4_decrypt);
 
 static struct crypto_alg sm4_alg = {
 	.cra_name		=	"sm4",
@@ -219,8 +221,8 @@
 			.cia_min_keysize	=	SM4_KEY_SIZE,
 			.cia_max_keysize	=	SM4_KEY_SIZE,
 			.cia_setkey		=	crypto_sm4_set_key,
-			.cia_encrypt		=	sm4_encrypt,
-			.cia_decrypt		=	sm4_decrypt
+			.cia_encrypt		=	crypto_sm4_encrypt,
+			.cia_decrypt		=	crypto_sm4_decrypt
 		}
 	}
 };
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 51fe7c8..d5bcdd9 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -158,9 +158,9 @@
 };
 
 static int do_mult_aead_op(struct test_mb_aead_data *data, int enc,
-				u32 num_mb)
+				u32 num_mb, int *rc)
 {
-	int i, rc[num_mb], err = 0;
+	int i, err = 0;
 
 	/* Fire up a bunch of concurrent requests */
 	for (i = 0; i < num_mb; i++) {
@@ -188,18 +188,26 @@
 {
 	unsigned long start, end;
 	int bcount;
-	int ret;
+	int ret = 0;
+	int *rc;
+
+	rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
+	if (!rc)
+		return -ENOMEM;
 
 	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
-		ret = do_mult_aead_op(data, enc, num_mb);
+		ret = do_mult_aead_op(data, enc, num_mb, rc);
 		if (ret)
-			return ret;
+			goto out;
 	}
 
 	pr_cont("%d operations in %d seconds (%ld bytes)\n",
 		bcount * num_mb, secs, (long)bcount * blen * num_mb);
-	return 0;
+
+out:
+	kfree(rc);
+	return ret;
 }
 
 static int test_mb_aead_cycles(struct test_mb_aead_data *data, int enc,
@@ -208,10 +216,15 @@
 	unsigned long cycles = 0;
 	int ret = 0;
 	int i;
+	int *rc;
+
+	rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
+	if (!rc)
+		return -ENOMEM;
 
 	/* Warm-up run. */
 	for (i = 0; i < 4; i++) {
-		ret = do_mult_aead_op(data, enc, num_mb);
+		ret = do_mult_aead_op(data, enc, num_mb, rc);
 		if (ret)
 			goto out;
 	}
@@ -221,7 +234,7 @@
 		cycles_t start, end;
 
 		start = get_cycles();
-		ret = do_mult_aead_op(data, enc, num_mb);
+		ret = do_mult_aead_op(data, enc, num_mb, rc);
 		end = get_cycles();
 
 		if (ret)
@@ -230,11 +243,11 @@
 		cycles += end - start;
 	}
 
-out:
-	if (ret == 0)
-		pr_cont("1 operation in %lu cycles (%d bytes)\n",
-			(cycles + 4) / (8 * num_mb), blen);
+	pr_cont("1 operation in %lu cycles (%d bytes)\n",
+		(cycles + 4) / (8 * num_mb), blen);
 
+out:
+	kfree(rc);
 	return ret;
 }
 
@@ -705,9 +718,10 @@
 	char *xbuf[XBUFSIZE];
 };
 
-static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb)
+static inline int do_mult_ahash_op(struct test_mb_ahash_data *data, u32 num_mb,
+				   int *rc)
 {
-	int i, rc[num_mb], err = 0;
+	int i, err = 0;
 
 	/* Fire up a bunch of concurrent requests */
 	for (i = 0; i < num_mb; i++)
@@ -731,18 +745,26 @@
 {
 	unsigned long start, end;
 	int bcount;
-	int ret;
+	int ret = 0;
+	int *rc;
+
+	rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
+	if (!rc)
+		return -ENOMEM;
 
 	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
-		ret = do_mult_ahash_op(data, num_mb);
+		ret = do_mult_ahash_op(data, num_mb, rc);
 		if (ret)
-			return ret;
+			goto out;
 	}
 
 	pr_cont("%d operations in %d seconds (%ld bytes)\n",
 		bcount * num_mb, secs, (long)bcount * blen * num_mb);
-	return 0;
+
+out:
+	kfree(rc);
+	return ret;
 }
 
 static int test_mb_ahash_cycles(struct test_mb_ahash_data *data, int blen,
@@ -751,10 +773,15 @@
 	unsigned long cycles = 0;
 	int ret = 0;
 	int i;
+	int *rc;
+
+	rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
+	if (!rc)
+		return -ENOMEM;
 
 	/* Warm-up run. */
 	for (i = 0; i < 4; i++) {
-		ret = do_mult_ahash_op(data, num_mb);
+		ret = do_mult_ahash_op(data, num_mb, rc);
 		if (ret)
 			goto out;
 	}
@@ -764,7 +791,7 @@
 		cycles_t start, end;
 
 		start = get_cycles();
-		ret = do_mult_ahash_op(data, num_mb);
+		ret = do_mult_ahash_op(data, num_mb, rc);
 		end = get_cycles();
 
 		if (ret)
@@ -773,11 +800,11 @@
 		cycles += end - start;
 	}
 
-out:
-	if (ret == 0)
-		pr_cont("1 operation in %lu cycles (%d bytes)\n",
-			(cycles + 4) / (8 * num_mb), blen);
+	pr_cont("1 operation in %lu cycles (%d bytes)\n",
+		(cycles + 4) / (8 * num_mb), blen);
 
+out:
+	kfree(rc);
 	return ret;
 }
 
@@ -1118,9 +1145,9 @@
 };
 
 static int do_mult_acipher_op(struct test_mb_skcipher_data *data, int enc,
-				u32 num_mb)
+				u32 num_mb, int *rc)
 {
-	int i, rc[num_mb], err = 0;
+	int i, err = 0;
 
 	/* Fire up a bunch of concurrent requests */
 	for (i = 0; i < num_mb; i++) {
@@ -1148,18 +1175,26 @@
 {
 	unsigned long start, end;
 	int bcount;
-	int ret;
+	int ret = 0;
+	int *rc;
+
+	rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
+	if (!rc)
+		return -ENOMEM;
 
 	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
-		ret = do_mult_acipher_op(data, enc, num_mb);
+		ret = do_mult_acipher_op(data, enc, num_mb, rc);
 		if (ret)
-			return ret;
+			goto out;
 	}
 
 	pr_cont("%d operations in %d seconds (%ld bytes)\n",
 		bcount * num_mb, secs, (long)bcount * blen * num_mb);
-	return 0;
+
+out:
+	kfree(rc);
+	return ret;
 }
 
 static int test_mb_acipher_cycles(struct test_mb_skcipher_data *data, int enc,
@@ -1168,10 +1203,15 @@
 	unsigned long cycles = 0;
 	int ret = 0;
 	int i;
+	int *rc;
+
+	rc = kcalloc(num_mb, sizeof(*rc), GFP_KERNEL);
+	if (!rc)
+		return -ENOMEM;
 
 	/* Warm-up run. */
 	for (i = 0; i < 4; i++) {
-		ret = do_mult_acipher_op(data, enc, num_mb);
+		ret = do_mult_acipher_op(data, enc, num_mb, rc);
 		if (ret)
 			goto out;
 	}
@@ -1181,7 +1221,7 @@
 		cycles_t start, end;
 
 		start = get_cycles();
-		ret = do_mult_acipher_op(data, enc, num_mb);
+		ret = do_mult_acipher_op(data, enc, num_mb, rc);
 		end = get_cycles();
 
 		if (ret)
@@ -1190,11 +1230,11 @@
 		cycles += end - start;
 	}
 
-out:
-	if (ret == 0)
-		pr_cont("1 operation in %lu cycles (%d bytes)\n",
-			(cycles + 4) / (8 * num_mb), blen);
+	pr_cont("1 operation in %lu cycles (%d bytes)\n",
+		(cycles + 4) / (8 * num_mb), blen);
 
+out:
+	kfree(rc);
 	return ret;
 }
 
@@ -1606,7 +1646,7 @@
 	return ret;
 }
 
-static int do_test(const char *alg, u32 type, u32 mask, int m)
+static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 {
 	int i;
 	int ret = 0;
@@ -1621,7 +1661,7 @@
 		}
 
 		for (i = 1; i < 200; i++)
-			ret += do_test(NULL, 0, 0, i);
+			ret += do_test(NULL, 0, 0, i, num_mb);
 		break;
 
 	case 1:
@@ -1902,10 +1942,6 @@
 		ret += tcrypt_test("vmac(aes)");
 		break;
 
-	case 110:
-		ret += tcrypt_test("hmac(crc32)");
-		break;
-
 	case 111:
 		ret += tcrypt_test("hmac(sha3-224)");
 		break;
@@ -2903,7 +2939,7 @@
 			goto err_free_tv;
 	}
 
-	err = do_test(alg, type, mask, mode);
+	err = do_test(alg, type, mask, mode, num_mb);
 
 	if (err) {
 		printk(KERN_ERR "tcrypt: one or more tests failed!\n");
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index af4a01c..d1d9984 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -84,10 +84,8 @@
 };
 
 struct cipher_test_suite {
-	struct {
-		const struct cipher_testvec *vecs;
-		unsigned int count;
-	} enc, dec;
+	const struct cipher_testvec *vecs;
+	unsigned int count;
 };
 
 struct comp_test_suite {
@@ -988,6 +986,7 @@
 	unsigned int i, j, k;
 	char *q;
 	const char *e;
+	const char *input, *result;
 	void *data;
 	char *xbuf[XBUFSIZE];
 	int ret = -ENOMEM;
@@ -1008,14 +1007,16 @@
 		if (fips_enabled && template[i].fips_skip)
 			continue;
 
+		input  = enc ? template[i].ptext : template[i].ctext;
+		result = enc ? template[i].ctext : template[i].ptext;
 		j++;
 
 		ret = -EINVAL;
-		if (WARN_ON(template[i].ilen > PAGE_SIZE))
+		if (WARN_ON(template[i].len > PAGE_SIZE))
 			goto out;
 
 		data = xbuf[0];
-		memcpy(data, template[i].input, template[i].ilen);
+		memcpy(data, input, template[i].len);
 
 		crypto_cipher_clear_flags(tfm, ~0);
 		if (template[i].wk)
@@ -1031,7 +1032,7 @@
 		} else if (ret)
 			continue;
 
-		for (k = 0; k < template[i].ilen;
+		for (k = 0; k < template[i].len;
 		     k += crypto_cipher_blocksize(tfm)) {
 			if (enc)
 				crypto_cipher_encrypt_one(tfm, data + k,
@@ -1042,10 +1043,10 @@
 		}
 
 		q = data;
-		if (memcmp(q, template[i].result, template[i].rlen)) {
+		if (memcmp(q, result, template[i].len)) {
 			printk(KERN_ERR "alg: cipher: Test %d failed "
 			       "on %s for %s\n", j, e, algo);
-			hexdump(q, template[i].rlen);
+			hexdump(q, template[i].len);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -1073,6 +1074,7 @@
 	struct scatterlist sgout[8];
 	const char *e, *d;
 	struct crypto_wait wait;
+	const char *input, *result;
 	void *data;
 	char iv[MAX_IVLEN];
 	char *xbuf[XBUFSIZE];
@@ -1116,19 +1118,21 @@
 		if (fips_enabled && template[i].fips_skip)
 			continue;
 
-		if (template[i].iv)
+		if (template[i].iv && !(template[i].generates_iv && enc))
 			memcpy(iv, template[i].iv, ivsize);
 		else
 			memset(iv, 0, MAX_IVLEN);
 
+		input  = enc ? template[i].ptext : template[i].ctext;
+		result = enc ? template[i].ctext : template[i].ptext;
 		j++;
 		ret = -EINVAL;
-		if (WARN_ON(align_offset + template[i].ilen > PAGE_SIZE))
+		if (WARN_ON(align_offset + template[i].len > PAGE_SIZE))
 			goto out;
 
 		data = xbuf[0];
 		data += align_offset;
-		memcpy(data, template[i].input, template[i].ilen);
+		memcpy(data, input, template[i].len);
 
 		crypto_skcipher_clear_flags(tfm, ~0);
 		if (template[i].wk)
@@ -1144,15 +1148,15 @@
 		} else if (ret)
 			continue;
 
-		sg_init_one(&sg[0], data, template[i].ilen);
+		sg_init_one(&sg[0], data, template[i].len);
 		if (diff_dst) {
 			data = xoutbuf[0];
 			data += align_offset;
-			sg_init_one(&sgout[0], data, template[i].ilen);
+			sg_init_one(&sgout[0], data, template[i].len);
 		}
 
 		skcipher_request_set_crypt(req, sg, (diff_dst) ? sgout : sg,
-					   template[i].ilen, iv);
+					   template[i].len, iv);
 		ret = crypto_wait_req(enc ? crypto_skcipher_encrypt(req) :
 				      crypto_skcipher_decrypt(req), &wait);
 
@@ -1163,17 +1167,16 @@
 		}
 
 		q = data;
-		if (memcmp(q, template[i].result, template[i].rlen)) {
+		if (memcmp(q, result, template[i].len)) {
 			pr_err("alg: skcipher%s: Test %d failed (invalid result) on %s for %s\n",
 			       d, j, e, algo);
-			hexdump(q, template[i].rlen);
+			hexdump(q, template[i].len);
 			ret = -EINVAL;
 			goto out;
 		}
 
-		if (template[i].iv_out &&
-		    memcmp(iv, template[i].iv_out,
-			   crypto_skcipher_ivsize(tfm))) {
+		if (template[i].generates_iv && enc &&
+		    memcmp(iv, template[i].iv, crypto_skcipher_ivsize(tfm))) {
 			pr_err("alg: skcipher%s: Test %d failed (invalid output IV) on %s for %s\n",
 			       d, j, e, algo);
 			hexdump(iv, crypto_skcipher_ivsize(tfm));
@@ -1194,11 +1197,13 @@
 		if (fips_enabled && template[i].fips_skip)
 			continue;
 
-		if (template[i].iv)
+		if (template[i].iv && !(template[i].generates_iv && enc))
 			memcpy(iv, template[i].iv, ivsize);
 		else
 			memset(iv, 0, MAX_IVLEN);
 
+		input  = enc ? template[i].ptext : template[i].ctext;
+		result = enc ? template[i].ctext : template[i].ptext;
 		j++;
 		crypto_skcipher_clear_flags(tfm, ~0);
 		if (template[i].wk)
@@ -1226,7 +1231,7 @@
 
 			q = xbuf[IDX[k] >> PAGE_SHIFT] + offset_in_page(IDX[k]);
 
-			memcpy(q, template[i].input + temp, template[i].tap[k]);
+			memcpy(q, input + temp, template[i].tap[k]);
 
 			if (offset_in_page(q) + template[i].tap[k] < PAGE_SIZE)
 				q[template[i].tap[k]] = 0;
@@ -1248,7 +1253,7 @@
 		}
 
 		skcipher_request_set_crypt(req, sg, (diff_dst) ? sgout : sg,
-					   template[i].ilen, iv);
+					   template[i].len, iv);
 
 		ret = crypto_wait_req(enc ? crypto_skcipher_encrypt(req) :
 				      crypto_skcipher_decrypt(req), &wait);
@@ -1269,8 +1274,7 @@
 				q = xbuf[IDX[k] >> PAGE_SHIFT] +
 				    offset_in_page(IDX[k]);
 
-			if (memcmp(q, template[i].result + temp,
-				   template[i].tap[k])) {
+			if (memcmp(q, result + temp, template[i].tap[k])) {
 				pr_err("alg: skcipher%s: Chunk test %d failed on %s at page %u for %s\n",
 				       d, j, e, k, algo);
 				hexdump(q, template[i].tap[k]);
@@ -1342,19 +1346,30 @@
 		     int ctcount, int dtcount)
 {
 	const char *algo = crypto_tfm_alg_driver_name(crypto_comp_tfm(tfm));
+	char *output, *decomp_output;
 	unsigned int i;
-	char result[COMP_BUF_SIZE];
 	int ret;
 
+	output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
+	if (!output)
+		return -ENOMEM;
+
+	decomp_output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
+	if (!decomp_output) {
+		kfree(output);
+		return -ENOMEM;
+	}
+
 	for (i = 0; i < ctcount; i++) {
 		int ilen;
 		unsigned int dlen = COMP_BUF_SIZE;
 
-		memset(result, 0, sizeof (result));
+		memset(output, 0, sizeof(COMP_BUF_SIZE));
+		memset(decomp_output, 0, sizeof(COMP_BUF_SIZE));
 
 		ilen = ctemplate[i].inlen;
 		ret = crypto_comp_compress(tfm, ctemplate[i].input,
-		                           ilen, result, &dlen);
+					   ilen, output, &dlen);
 		if (ret) {
 			printk(KERN_ERR "alg: comp: compression failed "
 			       "on test %d for %s: ret=%d\n", i + 1, algo,
@@ -1362,7 +1377,17 @@
 			goto out;
 		}
 
-		if (dlen != ctemplate[i].outlen) {
+		ilen = dlen;
+		dlen = COMP_BUF_SIZE;
+		ret = crypto_comp_decompress(tfm, output,
+					     ilen, decomp_output, &dlen);
+		if (ret) {
+			pr_err("alg: comp: compression failed: decompress: on test %d for %s failed: ret=%d\n",
+			       i + 1, algo, -ret);
+			goto out;
+		}
+
+		if (dlen != ctemplate[i].inlen) {
 			printk(KERN_ERR "alg: comp: Compression test %d "
 			       "failed for %s: output len = %d\n", i + 1, algo,
 			       dlen);
@@ -1370,10 +1395,11 @@
 			goto out;
 		}
 
-		if (memcmp(result, ctemplate[i].output, dlen)) {
-			printk(KERN_ERR "alg: comp: Compression test %d "
-			       "failed for %s\n", i + 1, algo);
-			hexdump(result, dlen);
+		if (memcmp(decomp_output, ctemplate[i].input,
+			   ctemplate[i].inlen)) {
+			pr_err("alg: comp: compression failed: output differs: on test %d for %s\n",
+			       i + 1, algo);
+			hexdump(decomp_output, dlen);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -1383,11 +1409,11 @@
 		int ilen;
 		unsigned int dlen = COMP_BUF_SIZE;
 
-		memset(result, 0, sizeof (result));
+		memset(decomp_output, 0, sizeof(COMP_BUF_SIZE));
 
 		ilen = dtemplate[i].inlen;
 		ret = crypto_comp_decompress(tfm, dtemplate[i].input,
-		                             ilen, result, &dlen);
+					     ilen, decomp_output, &dlen);
 		if (ret) {
 			printk(KERN_ERR "alg: comp: decompression failed "
 			       "on test %d for %s: ret=%d\n", i + 1, algo,
@@ -1403,10 +1429,10 @@
 			goto out;
 		}
 
-		if (memcmp(result, dtemplate[i].output, dlen)) {
+		if (memcmp(decomp_output, dtemplate[i].output, dlen)) {
 			printk(KERN_ERR "alg: comp: Decompression test %d "
 			       "failed for %s\n", i + 1, algo);
-			hexdump(result, dlen);
+			hexdump(decomp_output, dlen);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -1415,11 +1441,13 @@
 	ret = 0;
 
 out:
+	kfree(decomp_output);
+	kfree(output);
 	return ret;
 }
 
 static int test_acomp(struct crypto_acomp *tfm,
-		      const struct comp_testvec *ctemplate,
+			      const struct comp_testvec *ctemplate,
 		      const struct comp_testvec *dtemplate,
 		      int ctcount, int dtcount)
 {
@@ -1681,8 +1709,9 @@
 static int alg_test_cipher(const struct alg_test_desc *desc,
 			   const char *driver, u32 type, u32 mask)
 {
+	const struct cipher_test_suite *suite = &desc->suite.cipher;
 	struct crypto_cipher *tfm;
-	int err = 0;
+	int err;
 
 	tfm = crypto_alloc_cipher(driver, type, mask);
 	if (IS_ERR(tfm)) {
@@ -1691,18 +1720,10 @@
 		return PTR_ERR(tfm);
 	}
 
-	if (desc->suite.cipher.enc.vecs) {
-		err = test_cipher(tfm, ENCRYPT, desc->suite.cipher.enc.vecs,
-				  desc->suite.cipher.enc.count);
-		if (err)
-			goto out;
-	}
+	err = test_cipher(tfm, ENCRYPT, suite->vecs, suite->count);
+	if (!err)
+		err = test_cipher(tfm, DECRYPT, suite->vecs, suite->count);
 
-	if (desc->suite.cipher.dec.vecs)
-		err = test_cipher(tfm, DECRYPT, desc->suite.cipher.dec.vecs,
-				  desc->suite.cipher.dec.count);
-
-out:
 	crypto_free_cipher(tfm);
 	return err;
 }
@@ -1710,8 +1731,9 @@
 static int alg_test_skcipher(const struct alg_test_desc *desc,
 			     const char *driver, u32 type, u32 mask)
 {
+	const struct cipher_test_suite *suite = &desc->suite.cipher;
 	struct crypto_skcipher *tfm;
-	int err = 0;
+	int err;
 
 	tfm = crypto_alloc_skcipher(driver, type, mask);
 	if (IS_ERR(tfm)) {
@@ -1720,18 +1742,10 @@
 		return PTR_ERR(tfm);
 	}
 
-	if (desc->suite.cipher.enc.vecs) {
-		err = test_skcipher(tfm, ENCRYPT, desc->suite.cipher.enc.vecs,
-				    desc->suite.cipher.enc.count);
-		if (err)
-			goto out;
-	}
+	err = test_skcipher(tfm, ENCRYPT, suite->vecs, suite->count);
+	if (!err)
+		err = test_skcipher(tfm, DECRYPT, suite->vecs, suite->count);
 
-	if (desc->suite.cipher.dec.vecs)
-		err = test_skcipher(tfm, DECRYPT, desc->suite.cipher.dec.vecs,
-				    desc->suite.cipher.dec.count);
-
-out:
 	crypto_free_skcipher(tfm);
 	return err;
 }
@@ -1774,8 +1788,9 @@
 	return err;
 }
 
-static int alg_test_hash(const struct alg_test_desc *desc, const char *driver,
-			 u32 type, u32 mask)
+static int __alg_test_hash(const struct hash_testvec *template,
+			   unsigned int tcount, const char *driver,
+			   u32 type, u32 mask)
 {
 	struct crypto_ahash *tfm;
 	int err;
@@ -1787,16 +1802,51 @@
 		return PTR_ERR(tfm);
 	}
 
-	err = test_hash(tfm, desc->suite.hash.vecs,
-			desc->suite.hash.count, true);
+	err = test_hash(tfm, template, tcount, true);
 	if (!err)
-		err = test_hash(tfm, desc->suite.hash.vecs,
-				desc->suite.hash.count, false);
-
+		err = test_hash(tfm, template, tcount, false);
 	crypto_free_ahash(tfm);
 	return err;
 }
 
+static int alg_test_hash(const struct alg_test_desc *desc, const char *driver,
+			 u32 type, u32 mask)
+{
+	const struct hash_testvec *template = desc->suite.hash.vecs;
+	unsigned int tcount = desc->suite.hash.count;
+	unsigned int nr_unkeyed, nr_keyed;
+	int err;
+
+	/*
+	 * For OPTIONAL_KEY algorithms, we have to do all the unkeyed tests
+	 * first, before setting a key on the tfm.  To make this easier, we
+	 * require that the unkeyed test vectors (if any) are listed first.
+	 */
+
+	for (nr_unkeyed = 0; nr_unkeyed < tcount; nr_unkeyed++) {
+		if (template[nr_unkeyed].ksize)
+			break;
+	}
+	for (nr_keyed = 0; nr_unkeyed + nr_keyed < tcount; nr_keyed++) {
+		if (!template[nr_unkeyed + nr_keyed].ksize) {
+			pr_err("alg: hash: test vectors for %s out of order, "
+			       "unkeyed ones must come first\n", desc->alg);
+			return -EINVAL;
+		}
+	}
+
+	err = 0;
+	if (nr_unkeyed) {
+		err = __alg_test_hash(template, nr_unkeyed, driver, type, mask);
+		template += nr_unkeyed;
+	}
+
+	if (!err && nr_keyed)
+		err = __alg_test_hash(template, nr_keyed, driver, type, mask);
+
+	return err;
+}
+
 static int alg_test_crc32c(const struct alg_test_desc *desc,
 			   const char *driver, u32 type, u32 mask)
 {
@@ -2316,6 +2366,33 @@
 /* Please keep this list sorted by algorithm name. */
 static const struct alg_test_desc alg_test_descs[] = {
 	{
+		.alg = "aegis128",
+		.test = alg_test_aead,
+		.suite = {
+			.aead = {
+				.enc = __VECS(aegis128_enc_tv_template),
+				.dec = __VECS(aegis128_dec_tv_template),
+			}
+		}
+	}, {
+		.alg = "aegis128l",
+		.test = alg_test_aead,
+		.suite = {
+			.aead = {
+				.enc = __VECS(aegis128l_enc_tv_template),
+				.dec = __VECS(aegis128l_dec_tv_template),
+			}
+		}
+	}, {
+		.alg = "aegis256",
+		.test = alg_test_aead,
+		.suite = {
+			.aead = {
+				.enc = __VECS(aegis256_enc_tv_template),
+				.dec = __VECS(aegis256_dec_tv_template),
+			}
+		}
+	}, {
 		.alg = "ansi_cprng",
 		.test = alg_test_cprng,
 		.suite = {
@@ -2488,93 +2565,70 @@
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(aes_cbc_enc_tv_template),
-				.dec = __VECS(aes_cbc_dec_tv_template)
-			}
-		}
+			.cipher = __VECS(aes_cbc_tv_template)
+		},
 	}, {
 		.alg = "cbc(anubis)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(anubis_cbc_enc_tv_template),
-				.dec = __VECS(anubis_cbc_dec_tv_template)
-			}
-		}
+			.cipher = __VECS(anubis_cbc_tv_template)
+		},
 	}, {
 		.alg = "cbc(blowfish)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(bf_cbc_enc_tv_template),
-				.dec = __VECS(bf_cbc_dec_tv_template)
-			}
-		}
+			.cipher = __VECS(bf_cbc_tv_template)
+		},
 	}, {
 		.alg = "cbc(camellia)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(camellia_cbc_enc_tv_template),
-				.dec = __VECS(camellia_cbc_dec_tv_template)
-			}
-		}
+			.cipher = __VECS(camellia_cbc_tv_template)
+		},
 	}, {
 		.alg = "cbc(cast5)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(cast5_cbc_enc_tv_template),
-				.dec = __VECS(cast5_cbc_dec_tv_template)
-			}
-		}
+			.cipher = __VECS(cast5_cbc_tv_template)
+		},
 	}, {
 		.alg = "cbc(cast6)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(cast6_cbc_enc_tv_template),
-				.dec = __VECS(cast6_cbc_dec_tv_template)
-			}
-		}
+			.cipher = __VECS(cast6_cbc_tv_template)
+		},
 	}, {
 		.alg = "cbc(des)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(des_cbc_enc_tv_template),
-				.dec = __VECS(des_cbc_dec_tv_template)
-			}
-		}
+			.cipher = __VECS(des_cbc_tv_template)
+		},
 	}, {
 		.alg = "cbc(des3_ede)",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(des3_ede_cbc_enc_tv_template),
-				.dec = __VECS(des3_ede_cbc_dec_tv_template)
-			}
-		}
+			.cipher = __VECS(des3_ede_cbc_tv_template)
+		},
+	}, {
+		/* Same as cbc(aes) except the key is stored in
+		 * hardware secure memory which we reference by index
+		 */
+		.alg = "cbc(paes)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
 	}, {
 		.alg = "cbc(serpent)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(serpent_cbc_enc_tv_template),
-				.dec = __VECS(serpent_cbc_dec_tv_template)
-			}
-		}
+			.cipher = __VECS(serpent_cbc_tv_template)
+		},
 	}, {
 		.alg = "cbc(twofish)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(tf_cbc_enc_tv_template),
-				.dec = __VECS(tf_cbc_dec_tv_template)
-			}
-		}
+			.cipher = __VECS(tf_cbc_tv_template)
+		},
 	}, {
 		.alg = "cbcmac(aes)",
 		.fips_allowed = 1,
@@ -2596,11 +2650,8 @@
 		.alg = "chacha20",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(chacha20_enc_tv_template),
-				.dec = __VECS(chacha20_enc_tv_template),
-			}
-		}
+			.cipher = __VECS(chacha20_tv_template)
+		},
 	}, {
 		.alg = "cmac(aes)",
 		.fips_allowed = 1,
@@ -2643,92 +2694,69 @@
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(aes_ctr_enc_tv_template),
-				.dec = __VECS(aes_ctr_dec_tv_template)
-			}
+			.cipher = __VECS(aes_ctr_tv_template)
 		}
 	}, {
 		.alg = "ctr(blowfish)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(bf_ctr_enc_tv_template),
-				.dec = __VECS(bf_ctr_dec_tv_template)
-			}
+			.cipher = __VECS(bf_ctr_tv_template)
 		}
 	}, {
 		.alg = "ctr(camellia)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(camellia_ctr_enc_tv_template),
-				.dec = __VECS(camellia_ctr_dec_tv_template)
-			}
+			.cipher = __VECS(camellia_ctr_tv_template)
 		}
 	}, {
 		.alg = "ctr(cast5)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(cast5_ctr_enc_tv_template),
-				.dec = __VECS(cast5_ctr_dec_tv_template)
-			}
+			.cipher = __VECS(cast5_ctr_tv_template)
 		}
 	}, {
 		.alg = "ctr(cast6)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(cast6_ctr_enc_tv_template),
-				.dec = __VECS(cast6_ctr_dec_tv_template)
-			}
+			.cipher = __VECS(cast6_ctr_tv_template)
 		}
 	}, {
 		.alg = "ctr(des)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(des_ctr_enc_tv_template),
-				.dec = __VECS(des_ctr_dec_tv_template)
-			}
+			.cipher = __VECS(des_ctr_tv_template)
 		}
 	}, {
 		.alg = "ctr(des3_ede)",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(des3_ede_ctr_enc_tv_template),
-				.dec = __VECS(des3_ede_ctr_dec_tv_template)
-			}
+			.cipher = __VECS(des3_ede_ctr_tv_template)
 		}
 	}, {
+		/* Same as ctr(aes) except the key is stored in
+		 * hardware secure memory which we reference by index
+		 */
+		.alg = "ctr(paes)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+	}, {
 		.alg = "ctr(serpent)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(serpent_ctr_enc_tv_template),
-				.dec = __VECS(serpent_ctr_dec_tv_template)
-			}
+			.cipher = __VECS(serpent_ctr_tv_template)
 		}
 	}, {
 		.alg = "ctr(twofish)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(tf_ctr_enc_tv_template),
-				.dec = __VECS(tf_ctr_dec_tv_template)
-			}
+			.cipher = __VECS(tf_ctr_tv_template)
 		}
 	}, {
 		.alg = "cts(cbc(aes))",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(cts_mode_enc_tv_template),
-				.dec = __VECS(cts_mode_dec_tv_template)
-			}
+			.cipher = __VECS(cts_mode_tv_template)
 		}
 	}, {
 		.alg = "deflate",
@@ -2876,64 +2904,43 @@
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(aes_enc_tv_template),
-				.dec = __VECS(aes_dec_tv_template)
-			}
+			.cipher = __VECS(aes_tv_template)
 		}
 	}, {
 		.alg = "ecb(anubis)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(anubis_enc_tv_template),
-				.dec = __VECS(anubis_dec_tv_template)
-			}
+			.cipher = __VECS(anubis_tv_template)
 		}
 	}, {
 		.alg = "ecb(arc4)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(arc4_enc_tv_template),
-				.dec = __VECS(arc4_dec_tv_template)
-			}
+			.cipher = __VECS(arc4_tv_template)
 		}
 	}, {
 		.alg = "ecb(blowfish)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(bf_enc_tv_template),
-				.dec = __VECS(bf_dec_tv_template)
-			}
+			.cipher = __VECS(bf_tv_template)
 		}
 	}, {
 		.alg = "ecb(camellia)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(camellia_enc_tv_template),
-				.dec = __VECS(camellia_dec_tv_template)
-			}
+			.cipher = __VECS(camellia_tv_template)
 		}
 	}, {
 		.alg = "ecb(cast5)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(cast5_enc_tv_template),
-				.dec = __VECS(cast5_dec_tv_template)
-			}
+			.cipher = __VECS(cast5_tv_template)
 		}
 	}, {
 		.alg = "ecb(cast6)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(cast6_enc_tv_template),
-				.dec = __VECS(cast6_dec_tv_template)
-			}
+			.cipher = __VECS(cast6_tv_template)
 		}
 	}, {
 		.alg = "ecb(cipher_null)",
@@ -2943,134 +2950,96 @@
 		.alg = "ecb(des)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(des_enc_tv_template),
-				.dec = __VECS(des_dec_tv_template)
-			}
+			.cipher = __VECS(des_tv_template)
 		}
 	}, {
 		.alg = "ecb(des3_ede)",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(des3_ede_enc_tv_template),
-				.dec = __VECS(des3_ede_dec_tv_template)
-			}
+			.cipher = __VECS(des3_ede_tv_template)
 		}
 	}, {
 		.alg = "ecb(fcrypt)",
 		.test = alg_test_skcipher,
 		.suite = {
 			.cipher = {
-				.enc = {
-					.vecs = fcrypt_pcbc_enc_tv_template,
-					.count = 1
-				},
-				.dec = {
-					.vecs = fcrypt_pcbc_dec_tv_template,
-					.count = 1
-				}
+				.vecs = fcrypt_pcbc_tv_template,
+				.count = 1
 			}
 		}
 	}, {
 		.alg = "ecb(khazad)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(khazad_enc_tv_template),
-				.dec = __VECS(khazad_dec_tv_template)
-			}
+			.cipher = __VECS(khazad_tv_template)
 		}
 	}, {
+		/* Same as ecb(aes) except the key is stored in
+		 * hardware secure memory which we reference by index
+		 */
+		.alg = "ecb(paes)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+	}, {
 		.alg = "ecb(seed)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(seed_enc_tv_template),
-				.dec = __VECS(seed_dec_tv_template)
-			}
+			.cipher = __VECS(seed_tv_template)
 		}
 	}, {
 		.alg = "ecb(serpent)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(serpent_enc_tv_template),
-				.dec = __VECS(serpent_dec_tv_template)
-			}
+			.cipher = __VECS(serpent_tv_template)
 		}
 	}, {
 		.alg = "ecb(sm4)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(sm4_enc_tv_template),
-				.dec = __VECS(sm4_dec_tv_template)
-			}
+			.cipher = __VECS(sm4_tv_template)
 		}
 	}, {
 		.alg = "ecb(speck128)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(speck128_enc_tv_template),
-				.dec = __VECS(speck128_dec_tv_template)
-			}
+			.cipher = __VECS(speck128_tv_template)
 		}
 	}, {
 		.alg = "ecb(speck64)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(speck64_enc_tv_template),
-				.dec = __VECS(speck64_dec_tv_template)
-			}
+			.cipher = __VECS(speck64_tv_template)
 		}
 	}, {
 		.alg = "ecb(tea)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(tea_enc_tv_template),
-				.dec = __VECS(tea_dec_tv_template)
-			}
+			.cipher = __VECS(tea_tv_template)
 		}
 	}, {
 		.alg = "ecb(tnepres)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(tnepres_enc_tv_template),
-				.dec = __VECS(tnepres_dec_tv_template)
-			}
+			.cipher = __VECS(tnepres_tv_template)
 		}
 	}, {
 		.alg = "ecb(twofish)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(tf_enc_tv_template),
-				.dec = __VECS(tf_dec_tv_template)
-			}
+			.cipher = __VECS(tf_tv_template)
 		}
 	}, {
 		.alg = "ecb(xeta)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(xeta_enc_tv_template),
-				.dec = __VECS(xeta_dec_tv_template)
-			}
+			.cipher = __VECS(xeta_tv_template)
 		}
 	}, {
 		.alg = "ecb(xtea)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(xtea_enc_tv_template),
-				.dec = __VECS(xtea_dec_tv_template)
-			}
+			.cipher = __VECS(xtea_tv_template)
 		}
 	}, {
 		.alg = "ecdh",
@@ -3097,12 +3066,6 @@
 			.hash = __VECS(ghash_tv_template)
 		}
 	}, {
-		.alg = "hmac(crc32)",
-		.test = alg_test_hash,
-		.suite = {
-			.hash = __VECS(bfin_crc_tv_template)
-		}
-	}, {
 		.alg = "hmac(md5)",
 		.test = alg_test_hash,
 		.suite = {
@@ -3192,55 +3155,37 @@
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(aes_kw_enc_tv_template),
-				.dec = __VECS(aes_kw_dec_tv_template)
-			}
+			.cipher = __VECS(aes_kw_tv_template)
 		}
 	}, {
 		.alg = "lrw(aes)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(aes_lrw_enc_tv_template),
-				.dec = __VECS(aes_lrw_dec_tv_template)
-			}
+			.cipher = __VECS(aes_lrw_tv_template)
 		}
 	}, {
 		.alg = "lrw(camellia)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(camellia_lrw_enc_tv_template),
-				.dec = __VECS(camellia_lrw_dec_tv_template)
-			}
+			.cipher = __VECS(camellia_lrw_tv_template)
 		}
 	}, {
 		.alg = "lrw(cast6)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(cast6_lrw_enc_tv_template),
-				.dec = __VECS(cast6_lrw_dec_tv_template)
-			}
+			.cipher = __VECS(cast6_lrw_tv_template)
 		}
 	}, {
 		.alg = "lrw(serpent)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(serpent_lrw_enc_tv_template),
-				.dec = __VECS(serpent_lrw_dec_tv_template)
-			}
+			.cipher = __VECS(serpent_lrw_tv_template)
 		}
 	}, {
 		.alg = "lrw(twofish)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(tf_lrw_enc_tv_template),
-				.dec = __VECS(tf_lrw_dec_tv_template)
-			}
+			.cipher = __VECS(tf_lrw_tv_template)
 		}
 	}, {
 		.alg = "lz4",
@@ -3291,23 +3236,42 @@
 			.hash = __VECS(michael_mic_tv_template)
 		}
 	}, {
+		.alg = "morus1280",
+		.test = alg_test_aead,
+		.suite = {
+			.aead = {
+				.enc = __VECS(morus1280_enc_tv_template),
+				.dec = __VECS(morus1280_dec_tv_template),
+			}
+		}
+	}, {
+		.alg = "morus640",
+		.test = alg_test_aead,
+		.suite = {
+			.aead = {
+				.enc = __VECS(morus640_enc_tv_template),
+				.dec = __VECS(morus640_dec_tv_template),
+			}
+		}
+	}, {
 		.alg = "ofb(aes)",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(aes_ofb_enc_tv_template),
-				.dec = __VECS(aes_ofb_dec_tv_template)
-			}
+			.cipher = __VECS(aes_ofb_tv_template)
 		}
 	}, {
+		/* Same as ofb(aes) except the key is stored in
+		 * hardware secure memory which we reference by index
+		 */
+		.alg = "ofb(paes)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+	}, {
 		.alg = "pcbc(fcrypt)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(fcrypt_pcbc_enc_tv_template),
-				.dec = __VECS(fcrypt_pcbc_dec_tv_template)
-			}
+			.cipher = __VECS(fcrypt_pcbc_tv_template)
 		}
 	}, {
 		.alg = "pkcs1pad(rsa,sha224)",
@@ -3339,10 +3303,7 @@
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(aes_ctr_rfc3686_enc_tv_template),
-				.dec = __VECS(aes_ctr_rfc3686_dec_tv_template)
-			}
+			.cipher = __VECS(aes_ctr_rfc3686_tv_template)
 		}
 	}, {
 		.alg = "rfc4106(gcm(aes))",
@@ -3426,9 +3387,7 @@
 		.alg = "salsa20",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(salsa20_stream_enc_tv_template)
-			}
+			.cipher = __VECS(salsa20_stream_tv_template)
 		}
 	}, {
 		.alg = "sha1",
@@ -3552,66 +3511,60 @@
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(aes_xts_enc_tv_template),
-				.dec = __VECS(aes_xts_dec_tv_template)
-			}
+			.cipher = __VECS(aes_xts_tv_template)
 		}
 	}, {
 		.alg = "xts(camellia)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(camellia_xts_enc_tv_template),
-				.dec = __VECS(camellia_xts_dec_tv_template)
-			}
+			.cipher = __VECS(camellia_xts_tv_template)
 		}
 	}, {
 		.alg = "xts(cast6)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(cast6_xts_enc_tv_template),
-				.dec = __VECS(cast6_xts_dec_tv_template)
-			}
+			.cipher = __VECS(cast6_xts_tv_template)
 		}
 	}, {
+		/* Same as xts(aes) except the key is stored in
+		 * hardware secure memory which we reference by index
+		 */
+		.alg = "xts(paes)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+	}, {
 		.alg = "xts(serpent)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(serpent_xts_enc_tv_template),
-				.dec = __VECS(serpent_xts_dec_tv_template)
-			}
+			.cipher = __VECS(serpent_xts_tv_template)
 		}
 	}, {
 		.alg = "xts(speck128)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(speck128_xts_enc_tv_template),
-				.dec = __VECS(speck128_xts_dec_tv_template)
-			}
+			.cipher = __VECS(speck128_xts_tv_template)
 		}
 	}, {
 		.alg = "xts(speck64)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(speck64_xts_enc_tv_template),
-				.dec = __VECS(speck64_xts_dec_tv_template)
-			}
+			.cipher = __VECS(speck64_xts_tv_template)
 		}
 	}, {
 		.alg = "xts(twofish)",
 		.test = alg_test_skcipher,
 		.suite = {
-			.cipher = {
-				.enc = __VECS(tf_xts_enc_tv_template),
-				.dec = __VECS(tf_xts_dec_tv_template)
-			}
+			.cipher = __VECS(tf_xts_tv_template)
 		}
 	}, {
+		.alg = "xts4096(paes)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+	}, {
+		.alg = "xts512(paes)",
+		.test = alg_test_null,
+		.fips_allowed = 1,
+	}, {
 		.alg = "zlib-deflate",
 		.test = alg_test_comp,
 		.fips_allowed = 1,
@@ -3621,6 +3574,16 @@
 				.decomp = __VECS(zlib_deflate_decomp_tv_template)
 			}
 		}
+	}, {
+		.alg = "zstd",
+		.test = alg_test_comp,
+		.fips_allowed = 1,
+		.suite = {
+			.comp = {
+				.comp = __VECS(zstd_comp_tv_template),
+				.decomp = __VECS(zstd_decomp_tv_template)
+			}
+		}
 	}
 };
 
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 004c0a0..b950aa2 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -44,14 +44,13 @@
 };
 
 /*
- * cipher_testvec:	structure to describe a cipher test
- * @key:	A pointer to a key used by the test
- * @klen:	The length of @key
- * @iv:		A pointer to the IV used by the test
- * @input:	A pointer to data used as input
- * @ilen	The length of data in @input
- * @result:	A pointer to what the test need to produce
- * @rlen:	The length of data in @result
+ * cipher_testvec:	structure to describe a symmetric cipher test
+ * @key:	Pointer to key
+ * @klen:	Length of @key in bytes
+ * @iv:		Pointer to IV (optional for some ciphers)
+ * @ptext:	Pointer to plaintext
+ * @ctext:	Pointer to ciphertext
+ * @len:	Length of @ptext and @ctext in bytes
  * @fail:	If set to one, the test need to fail
  * @wk:		Does the test need CRYPTO_TFM_REQ_WEAK_KEY
  * 		( e.g. test needs to fail due to a weak key )
@@ -60,23 +59,23 @@
  * @also_non_np: 	if set to 1, the test will be also done without
  * 			splitting data in @np SGs
  * @fips_skip:	Skip the test vector in FIPS mode
+ * @generates_iv: Encryption should ignore the given IV, and output @iv.
+ *		  Decryption takes @iv.  Needed for AES Keywrap ("kw(aes)").
  */
-
 struct cipher_testvec {
 	const char *key;
 	const char *iv;
-	const char *iv_out;
-	const char *input;
-	const char *result;
+	const char *ptext;
+	const char *ctext;
 	unsigned short tap[MAX_TAP];
 	int np;
 	unsigned char also_non_np;
 	bool fail;
 	unsigned char wk; /* weak key flag */
 	unsigned char klen;
-	unsigned short ilen;
-	unsigned short rlen;
+	unsigned short len;
 	bool fips_skip;
+	bool generates_iv;
 };
 
 struct aead_testvec {
@@ -5542,111 +5541,121 @@
 /*
  * DES test vectors.
  */
-static const struct cipher_testvec des_enc_tv_template[] = {
+static const struct cipher_testvec des_tv_template[] = {
 	{ /* From Applied Cryptography */
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xe7",
-		.ilen	= 8,
-		.result	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d",
-		.rlen	= 8,
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xe7",
+		.ctext	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d",
+		.len	= 8,
 	}, { /* Same key, different plaintext block */
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
-		.input	= "\x22\x33\x44\x55\x66\x77\x88\x99",
-		.ilen	= 8,
-		.result	= "\xf7\x9c\x89\x2a\x33\x8f\x4a\x8b",
-		.rlen	= 8,
+		.ptext	= "\x22\x33\x44\x55\x66\x77\x88\x99",
+		.ctext	= "\xf7\x9c\x89\x2a\x33\x8f\x4a\x8b",
+		.len	= 8,
 	}, { /* Sbox test from NBS */
 		.key	= "\x7c\xa1\x10\x45\x4a\x1a\x6e\x57",
 		.klen	= 8,
-		.input	= "\x01\xa1\xd6\xd0\x39\x77\x67\x42",
-		.ilen	= 8,
-		.result	= "\x69\x0f\x5b\x0d\x9a\x26\x93\x9b",
-		.rlen	= 8,
+		.ptext	= "\x01\xa1\xd6\xd0\x39\x77\x67\x42",
+		.ctext	= "\x69\x0f\x5b\x0d\x9a\x26\x93\x9b",
+		.len	= 8,
 	}, { /* Three blocks */
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
 			  "\x22\x33\x44\x55\x66\x77\x88\x99"
 			  "\xca\xfe\xba\xbe\xfe\xed\xbe\xef",
-		.ilen	= 24,
-		.result	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
+		.ctext	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
 			  "\xf7\x9c\x89\x2a\x33\x8f\x4a\x8b"
 			  "\xb4\x99\x26\xf7\x1f\xe1\xd4\x90",
-		.rlen	= 24,
+		.len	= 24,
 	}, { /* Weak key */
 		.fail	= true,
 		.wk	= 1,
 		.key	= "\x01\x01\x01\x01\x01\x01\x01\x01",
 		.klen	= 8,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xe7",
-		.ilen	= 8,
-		.result	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d",
-		.rlen	= 8,
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xe7",
+		.ctext	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d",
+		.len	= 8,
 	}, { /* Two blocks -- for testing encryption across pages */
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
 			  "\x22\x33\x44\x55\x66\x77\x88\x99",
-		.ilen	= 16,
-		.result	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
+		.ctext	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
 			  "\xf7\x9c\x89\x2a\x33\x8f\x4a\x8b",
-		.rlen	= 16,
+		.len	= 16,
 		.np	= 2,
 		.tap	= { 8, 8 }
+	}, {
+		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.klen	= 8,
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
+			  "\xa3\x99\x7b\xca\xaf\x69\xa0\xf5",
+		.ctext	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
+			  "\x69\x0f\x5b\x0d\x9a\x26\x93\x9b",
+		.len	= 16,
+		.np	= 2,
+		.tap	= { 8, 8 }
+	}, {
+		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.klen	= 8,
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
+			  "\xa3\x99\x7b\xca\xaf\x69\xa0\xf5",
+		.ctext	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
+			  "\x69\x0f\x5b\x0d\x9a\x26\x93\x9b",
+		.len	= 16,
+		.np	= 3,
+		.tap	= { 3, 12, 1 }
 	}, { /* Four blocks -- for testing encryption with chunking */
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
 			  "\x22\x33\x44\x55\x66\x77\x88\x99"
 			  "\xca\xfe\xba\xbe\xfe\xed\xbe\xef"
 			  "\x22\x33\x44\x55\x66\x77\x88\x99",
-		.ilen	= 32,
-		.result	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
+		.ctext	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
 			  "\xf7\x9c\x89\x2a\x33\x8f\x4a\x8b"
 			  "\xb4\x99\x26\xf7\x1f\xe1\xd4\x90"
 			  "\xf7\x9c\x89\x2a\x33\x8f\x4a\x8b",
-		.rlen	= 32,
+		.len	= 32,
 		.np	= 3,
 		.tap	= { 14, 10, 8 }
 	}, {
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
 			  "\x22\x33\x44\x55\x66\x77\x88\x99"
 			  "\xca\xfe\xba\xbe\xfe\xed\xbe\xef",
-		.ilen	= 24,
-		.result	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
+		.ctext	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
 			  "\xf7\x9c\x89\x2a\x33\x8f\x4a\x8b"
 			  "\xb4\x99\x26\xf7\x1f\xe1\xd4\x90",
-		.rlen	= 24,
+		.len	= 24,
 		.np	= 4,
 		.tap	= { 2, 1, 3, 18 }
 	}, {
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
 			  "\x22\x33\x44\x55\x66\x77\x88\x99",
-		.ilen	= 16,
-		.result	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
+		.ctext	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
 			  "\xf7\x9c\x89\x2a\x33\x8f\x4a\x8b",
-		.rlen	= 16,
+		.len	= 16,
 		.np	= 5,
 		.tap	= { 2, 2, 2, 2, 8 }
 	}, {
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xe7",
-		.ilen	= 8,
-		.result	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d",
-		.rlen	= 8,
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xe7",
+		.ctext	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d",
+		.len	= 8,
 		.np	= 8,
 		.tap	= { 1, 1, 1, 1, 1, 1, 1, 1 }
 	}, { /* Generated with Crypto++ */
 		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55",
 		.klen	= 8,
-		.input	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
+		.ptext	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
 			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
 			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
 			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
@@ -5677,8 +5686,7 @@
 			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
 			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
 			  "\xC6\x2F\xBB\x24\x8D\x19\x82\xEB",
-		.ilen	= 248,
-		.result	= "\x88\xCB\x1F\xAB\x2F\x2A\x49\x57"
+		.ctext	= "\x88\xCB\x1F\xAB\x2F\x2A\x49\x57"
 			  "\x92\xB9\x77\xFF\x2F\x47\x58\xDD"
 			  "\xD7\x8A\x91\x95\x26\x33\x78\xB2"
 			  "\x33\xBA\xB2\x3E\x02\xF5\x1F\xEF"
@@ -5709,180 +5717,68 @@
 			  "\x46\x31\x4C\x5E\x2E\x95\x61\xEF"
 			  "\xE1\x58\x39\x09\xB4\x8B\x40\xAC"
 			  "\x5F\x62\xC7\x72\xD9\xFC\xCB\x9A",
-		.rlen	= 248,
+		.len	= 248,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 248 - 10, 2, 8 },
 	},
 };
 
-static const struct cipher_testvec des_dec_tv_template[] = {
-	{ /* From Applied Cryptography */
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.klen	= 8,
-		.input	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d",
-		.ilen	= 8,
-		.result	= "\x01\x23\x45\x67\x89\xab\xcd\xe7",
-		.rlen	= 8,
-	}, { /* Sbox test from NBS */
-		.key	= "\x7c\xa1\x10\x45\x4a\x1a\x6e\x57",
-		.klen	= 8,
-		.input	= "\x69\x0f\x5b\x0d\x9a\x26\x93\x9b",
-		.ilen	= 8,
-		.result	= "\x01\xa1\xd6\xd0\x39\x77\x67\x42",
-		.rlen	= 8,
-	}, { /* Two blocks, for chunking test */
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.klen	= 8,
-		.input	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
-			  "\x69\x0f\x5b\x0d\x9a\x26\x93\x9b",
-		.ilen	= 16,
-		.result	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
-			  "\xa3\x99\x7b\xca\xaf\x69\xa0\xf5",
-		.rlen	= 16,
-		.np	= 2,
-		.tap	= { 8, 8 }
-	}, {
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.klen	= 8,
-		.input	= "\xc9\x57\x44\x25\x6a\x5e\xd3\x1d"
-			  "\x69\x0f\x5b\x0d\x9a\x26\x93\x9b",
-		.ilen	= 16,
-		.result	= "\x01\x23\x45\x67\x89\xab\xcd\xe7"
-			  "\xa3\x99\x7b\xca\xaf\x69\xa0\xf5",
-		.rlen	= 16,
-		.np	= 3,
-		.tap	= { 3, 12, 1 }
-	}, { /* Generated with Crypto++ */
-		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55",
-		.klen	= 8,
-		.input	= "\x88\xCB\x1F\xAB\x2F\x2A\x49\x57"
-			  "\x92\xB9\x77\xFF\x2F\x47\x58\xDD"
-			  "\xD7\x8A\x91\x95\x26\x33\x78\xB2"
-			  "\x33\xBA\xB2\x3E\x02\xF5\x1F\xEF"
-			  "\x98\xC5\xA6\xD2\x7D\x79\xEC\xB3"
-			  "\x45\xF3\x4C\x61\xAC\x6C\xC2\x55"
-			  "\xE5\xD3\x06\x58\x8A\x42\x3E\xDD"
-			  "\x3D\x20\x45\xE9\x6F\x0D\x25\xA8"
-			  "\xA5\xC7\x69\xCE\xD5\x3B\x7B\xC9"
-			  "\x9E\x65\xE7\xA3\xF2\xE4\x18\x94"
-			  "\xD2\x81\xE9\x33\x2B\x2D\x49\xC4"
-			  "\xFE\xDA\x7F\xE2\xF2\x8C\x9C\xDC"
-			  "\x73\x58\x11\x1F\x81\xD7\x21\x1A"
-			  "\x80\xD0\x0D\xE8\x45\xD6\xD8\xD5"
-			  "\x2E\x51\x16\xCA\x09\x89\x54\x62"
-			  "\xF7\x04\x3D\x75\xB9\xA3\x84\xF4"
-			  "\x62\xF0\x02\x58\x83\xAF\x30\x87"
-			  "\x85\x3F\x01\xCD\x8E\x58\x42\xC4"
-			  "\x41\x73\xE0\x15\x0A\xE6\x2E\x80"
-			  "\x94\xF8\x5B\x3A\x4E\xDF\x51\xB2"
-			  "\x9D\xE4\xC4\x9D\xF7\x3F\xF8\x8E"
-			  "\x37\x22\x4D\x00\x2A\xEF\xC1\x0F"
-			  "\x14\xA0\x66\xAB\x79\x39\xD0\x8E"
-			  "\xE9\x95\x61\x74\x12\xED\x07\xD7"
-			  "\xDD\x95\xDC\x7B\x57\x25\x27\x9C"
-			  "\x51\x96\x16\xF7\x94\x61\xB8\x87"
-			  "\xF0\x21\x1B\x32\xFB\x07\x0F\x29"
-			  "\x56\xBD\x9D\x22\xA2\x9F\xA2\xB9"
-			  "\x46\x31\x4C\x5E\x2E\x95\x61\xEF"
-			  "\xE1\x58\x39\x09\xB4\x8B\x40\xAC"
-			  "\x5F\x62\xC7\x72\xD9\xFC\xCB\x9A",
-		.ilen	= 248,
-		.result	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
-			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
-			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
-			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
-			  "\xAB\x14\x7D\x09\x72\xDB\x44\xD0"
-			  "\x39\xA2\x0B\x97\x00\x69\xF5\x5E"
-			  "\xC7\x30\xBC\x25\x8E\x1A\x83\xEC"
-			  "\x55\xE1\x4A\xB3\x1C\xA8\x11\x7A"
-			  "\x06\x6F\xD8\x41\xCD\x36\x9F\x08"
-			  "\x94\xFD\x66\xF2\x5B\xC4\x2D\xB9"
-			  "\x22\x8B\x17\x80\xE9\x52\xDE\x47"
-			  "\xB0\x19\xA5\x0E\x77\x03\x6C\xD5"
-			  "\x3E\xCA\x33\x9C\x05\x91\xFA\x63"
-			  "\xEF\x58\xC1\x2A\xB6\x1F\x88\x14"
-			  "\x7D\xE6\x4F\xDB\x44\xAD\x16\xA2"
-			  "\x0B\x74\x00\x69\xD2\x3B\xC7\x30"
-			  "\x99\x02\x8E\xF7\x60\xEC\x55\xBE"
-			  "\x27\xB3\x1C\x85\x11\x7A\xE3\x4C"
-			  "\xD8\x41\xAA\x13\x9F\x08\x71\xFD"
-			  "\x66\xCF\x38\xC4\x2D\x96\x22\x8B"
-			  "\xF4\x5D\xE9\x52\xBB\x24\xB0\x19"
-			  "\x82\x0E\x77\xE0\x49\xD5\x3E\xA7"
-			  "\x10\x9C\x05\x6E\xFA\x63\xCC\x35"
-			  "\xC1\x2A\x93\x1F\x88\xF1\x5A\xE6"
-			  "\x4F\xB8\x21\xAD\x16\x7F\x0B\x74"
-			  "\xDD\x46\xD2\x3B\xA4\x0D\x99\x02"
-			  "\x6B\xF7\x60\xC9\x32\xBE\x27\x90"
-			  "\x1C\x85\xEE\x57\xE3\x4C\xB5\x1E"
-			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
-			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
-			  "\xC6\x2F\xBB\x24\x8D\x19\x82\xEB",
-		.rlen	= 248,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 248 - 10, 2, 8 },
-	},
-};
-
-static const struct cipher_testvec des_cbc_enc_tv_template[] = {
+static const struct cipher_testvec des_cbc_tv_template[] = {
 	{ /* From OpenSSL */
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
 		.iv	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.input	= "\x37\x36\x35\x34\x33\x32\x31\x20"
+		.ptext	= "\x37\x36\x35\x34\x33\x32\x31\x20"
 			  "\x4e\x6f\x77\x20\x69\x73\x20\x74"
 			  "\x68\x65\x20\x74\x69\x6d\x65\x20",
-		.ilen	= 24,
-		.result	= "\xcc\xd1\x73\xff\xab\x20\x39\xf4"
+		.ctext	= "\xcc\xd1\x73\xff\xab\x20\x39\xf4"
 			  "\xac\xd8\xae\xfd\xdf\xd8\xa1\xeb"
 			  "\x46\x8e\x91\x15\x78\x88\xba\x68",
-		.rlen	= 24,
+		.len	= 24,
 	}, { /* FIPS Pub 81 */
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
 		.iv	= "\x12\x34\x56\x78\x90\xab\xcd\xef",
-		.input	= "\x4e\x6f\x77\x20\x69\x73\x20\x74",
-		.ilen	= 8,
-		.result	= "\xe5\xc7\xcd\xde\x87\x2b\xf2\x7c",
-		.rlen	= 8,
+		.ptext	= "\x4e\x6f\x77\x20\x69\x73\x20\x74",
+		.ctext	= "\xe5\xc7\xcd\xde\x87\x2b\xf2\x7c",
+		.len	= 8,
 	}, {
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
 		.iv	= "\xe5\xc7\xcd\xde\x87\x2b\xf2\x7c",
-		.input	= "\x68\x65\x20\x74\x69\x6d\x65\x20",
-		.ilen	= 8,
-		.result	= "\x43\xe9\x34\x00\x8c\x38\x9c\x0f",
-		.rlen	= 8,
+		.ptext	= "\x68\x65\x20\x74\x69\x6d\x65\x20",
+		.ctext	= "\x43\xe9\x34\x00\x8c\x38\x9c\x0f",
+		.len	= 8,
 	}, {
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
 		.iv	= "\x43\xe9\x34\x00\x8c\x38\x9c\x0f",
-		.input	= "\x66\x6f\x72\x20\x61\x6c\x6c\x20",
-		.ilen	= 8,
-		.result	= "\x68\x37\x88\x49\x9a\x7c\x05\xf6",
-		.rlen	= 8,
+		.ptext	= "\x66\x6f\x72\x20\x61\x6c\x6c\x20",
+		.ctext	= "\x68\x37\x88\x49\x9a\x7c\x05\xf6",
+		.len	= 8,
+		.np	= 2,
+		.tap	= { 4, 4 },
+		.also_non_np = 1,
 	}, { /* Copy of openssl vector for chunk testing */
 	     /* From OpenSSL */
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
 		.iv	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.input	= "\x37\x36\x35\x34\x33\x32\x31\x20"
+		.ptext	= "\x37\x36\x35\x34\x33\x32\x31\x20"
 			  "\x4e\x6f\x77\x20\x69\x73\x20\x74"
 			  "\x68\x65\x20\x74\x69\x6d\x65\x20",
-		.ilen	= 24,
-		.result	= "\xcc\xd1\x73\xff\xab\x20\x39\xf4"
+		.ctext	= "\xcc\xd1\x73\xff\xab\x20\x39\xf4"
 			  "\xac\xd8\xae\xfd\xdf\xd8\xa1\xeb"
 			  "\x46\x8e\x91\x15\x78\x88\xba\x68",
-		.rlen	= 24,
+		.len	= 24,
 		.np	= 2,
 		.tap	= { 13, 11 }
 	}, { /* Generated with Crypto++ */
 		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55",
 		.klen	= 8,
 		.iv	= "\xE7\x82\x1D\xB8\x53\x11\xAC\x47",
-		.input	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
+		.ptext	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
 			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
 			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
 			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
@@ -5913,8 +5809,7 @@
 			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
 			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
 			  "\xC6\x2F\xBB\x24\x8D\x19\x82\xEB",
-		.ilen	= 248,
-		.result	= "\x71\xCC\x56\x1C\x87\x2C\x43\x20"
+		.ctext	= "\x71\xCC\x56\x1C\x87\x2C\x43\x20"
 			  "\x1C\x20\x13\x09\xF9\x2B\x40\x47"
 			  "\x99\x10\xD1\x1B\x65\x33\x33\xBA"
 			  "\x88\x0D\xA2\xD1\x86\xFF\x4D\xF4"
@@ -5945,128 +5840,19 @@
 			  "\xD7\x07\x8A\xD7\x18\x92\x36\x8C"
 			  "\x82\xA9\xBD\x6A\x31\x91\x39\x11"
 			  "\xC6\x4A\xF3\x55\xC7\x29\x2E\x63",
-		.rlen	= 248,
+		.len	= 248,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 248 - 10, 2, 8 },
 	},
 };
 
-static const struct cipher_testvec des_cbc_dec_tv_template[] = {
-	{ /* FIPS Pub 81 */
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.klen	= 8,
-		.iv	= "\x12\x34\x56\x78\x90\xab\xcd\xef",
-		.input	= "\xe5\xc7\xcd\xde\x87\x2b\xf2\x7c",
-		.ilen	= 8,
-		.result	= "\x4e\x6f\x77\x20\x69\x73\x20\x74",
-		.rlen	= 8,
-	}, {
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.klen	= 8,
-		.iv	= "\xe5\xc7\xcd\xde\x87\x2b\xf2\x7c",
-		.input	= "\x43\xe9\x34\x00\x8c\x38\x9c\x0f",
-		.ilen	= 8,
-		.result	= "\x68\x65\x20\x74\x69\x6d\x65\x20",
-		.rlen	= 8,
-	}, {
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.klen	= 8,
-		.iv	= "\x43\xe9\x34\x00\x8c\x38\x9c\x0f",
-		.input	= "\x68\x37\x88\x49\x9a\x7c\x05\xf6",
-		.ilen	= 8,
-		.result	= "\x66\x6f\x72\x20\x61\x6c\x6c\x20",
-		.rlen	= 8,
-	}, { /* Copy of above, for chunk testing */
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.klen	= 8,
-		.iv	= "\x43\xe9\x34\x00\x8c\x38\x9c\x0f",
-		.input	= "\x68\x37\x88\x49\x9a\x7c\x05\xf6",
-		.ilen	= 8,
-		.result	= "\x66\x6f\x72\x20\x61\x6c\x6c\x20",
-		.rlen	= 8,
-		.np	= 2,
-		.tap	= { 4, 4 }
-	}, { /* Generated with Crypto++ */
-		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55",
-		.klen	= 8,
-		.iv	= "\xE7\x82\x1D\xB8\x53\x11\xAC\x47",
-		.input	= "\x71\xCC\x56\x1C\x87\x2C\x43\x20"
-			  "\x1C\x20\x13\x09\xF9\x2B\x40\x47"
-			  "\x99\x10\xD1\x1B\x65\x33\x33\xBA"
-			  "\x88\x0D\xA2\xD1\x86\xFF\x4D\xF4"
-			  "\x5A\x0C\x12\x96\x32\x57\xAA\x26"
-			  "\xA7\xF4\x32\x8D\xBC\x10\x31\x9E"
-			  "\x81\x72\x74\xDE\x30\x19\x69\x49"
-			  "\x54\x9C\xC3\xEB\x0B\x97\xDD\xD1"
-			  "\xE8\x6D\x0D\x05\x83\xA5\x12\x08"
-			  "\x47\xF8\x88\x03\x86\x51\x3C\xEF"
-			  "\xE7\x11\x73\x4D\x44\x2B\xE2\x16"
-			  "\xE8\xA5\x06\x50\x66\x70\x0E\x14"
-			  "\xBA\x21\x3B\xD5\x23\x5B\xA7\x8F"
-			  "\x56\xB6\xA7\x44\xDB\x86\xAB\x69"
-			  "\x33\x3C\xBE\x64\xC4\x22\xD3\xFE"
-			  "\x49\x90\x88\x6A\x09\x8F\x76\x59"
-			  "\xCB\xB7\xA0\x2D\x79\x75\x92\x8A"
-			  "\x82\x1D\xC2\xFE\x09\x1F\x78\x6B"
-			  "\x2F\xD6\xA4\x87\x1E\xC4\x53\x63"
-			  "\x80\x02\x61\x2F\xE3\x46\xB6\xB5"
-			  "\xAA\x95\xF4\xEE\xA7\x64\x2B\x4F"
-			  "\x20\xCF\xD2\x47\x4E\x39\x65\xB3"
-			  "\x11\x87\xA2\x6C\x49\x7E\x36\xC7"
-			  "\x62\x8B\x48\x0D\x6A\x64\x00\xBD"
-			  "\x71\x91\x8C\xE9\x70\x19\x01\x4F"
-			  "\x4E\x68\x23\xBA\xDA\x24\x2E\x45"
-			  "\x02\x14\x33\x21\xAE\x58\x4B\xCF"
-			  "\x3B\x4B\xE8\xF8\xF6\x4F\x34\x93"
-			  "\xD7\x07\x8A\xD7\x18\x92\x36\x8C"
-			  "\x82\xA9\xBD\x6A\x31\x91\x39\x11"
-			  "\xC6\x4A\xF3\x55\xC7\x29\x2E\x63",
-		.ilen	= 248,
-		.result	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
-			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
-			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
-			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
-			  "\xAB\x14\x7D\x09\x72\xDB\x44\xD0"
-			  "\x39\xA2\x0B\x97\x00\x69\xF5\x5E"
-			  "\xC7\x30\xBC\x25\x8E\x1A\x83\xEC"
-			  "\x55\xE1\x4A\xB3\x1C\xA8\x11\x7A"
-			  "\x06\x6F\xD8\x41\xCD\x36\x9F\x08"
-			  "\x94\xFD\x66\xF2\x5B\xC4\x2D\xB9"
-			  "\x22\x8B\x17\x80\xE9\x52\xDE\x47"
-			  "\xB0\x19\xA5\x0E\x77\x03\x6C\xD5"
-			  "\x3E\xCA\x33\x9C\x05\x91\xFA\x63"
-			  "\xEF\x58\xC1\x2A\xB6\x1F\x88\x14"
-			  "\x7D\xE6\x4F\xDB\x44\xAD\x16\xA2"
-			  "\x0B\x74\x00\x69\xD2\x3B\xC7\x30"
-			  "\x99\x02\x8E\xF7\x60\xEC\x55\xBE"
-			  "\x27\xB3\x1C\x85\x11\x7A\xE3\x4C"
-			  "\xD8\x41\xAA\x13\x9F\x08\x71\xFD"
-			  "\x66\xCF\x38\xC4\x2D\x96\x22\x8B"
-			  "\xF4\x5D\xE9\x52\xBB\x24\xB0\x19"
-			  "\x82\x0E\x77\xE0\x49\xD5\x3E\xA7"
-			  "\x10\x9C\x05\x6E\xFA\x63\xCC\x35"
-			  "\xC1\x2A\x93\x1F\x88\xF1\x5A\xE6"
-			  "\x4F\xB8\x21\xAD\x16\x7F\x0B\x74"
-			  "\xDD\x46\xD2\x3B\xA4\x0D\x99\x02"
-			  "\x6B\xF7\x60\xC9\x32\xBE\x27\x90"
-			  "\x1C\x85\xEE\x57\xE3\x4C\xB5\x1E"
-			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
-			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
-			  "\xC6\x2F\xBB\x24\x8D\x19\x82\xEB",
-		.rlen	= 248,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 248 - 10, 2, 8 },
-	},
-};
-
-static const struct cipher_testvec des_ctr_enc_tv_template[] = {
+static const struct cipher_testvec des_ctr_tv_template[] = {
 	{ /* Generated with Crypto++ */
 		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55",
 		.klen	= 8,
 		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
+		.ptext	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
 			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
 			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
 			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
@@ -6097,8 +5883,7 @@
 			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
 			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
 			  "\xC6\x2F\xBB\x24\x8D\x19\x82\xEB",
-		.ilen	= 248,
-		.result	= "\x2F\x96\x06\x0F\x50\xC9\x68\x03"
+		.ctext	= "\x2F\x96\x06\x0F\x50\xC9\x68\x03"
 			  "\x0F\x31\xD4\x64\xA5\x29\x77\x35"
 			  "\xBC\x7A\x9F\x19\xE7\x0D\x33\x3E"
 			  "\x12\x0B\x8C\xAE\x48\xAE\xD9\x02"
@@ -6129,7 +5914,7 @@
 			  "\x5C\xC4\x15\xC9\x9A\x21\xC5\xCD"
 			  "\x19\x7F\x99\x19\x53\xCE\x1D\x14"
 			  "\x69\x74\xA1\x06\x46\x0F\x4E\x75",
-		.rlen	= 248,
+		.len	= 248,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 248 - 10, 2, 8 },
@@ -6137,7 +5922,7 @@
 		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55",
 		.klen	= 8,
 		.iv	= "\xE7\x82\x1D\xB8\x53\x11\xAC\x47",
-		.input	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
+		.ptext	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
 			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
 			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
 			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
@@ -6168,8 +5953,7 @@
 			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
 			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
 			  "\xC6\x2F\xBB\x24\x8D\x19\x82",
-		.ilen	= 247,
-		.result	= "\x62\xE5\xF4\xDC\x99\xE7\x89\xE3"
+		.ctext	= "\x62\xE5\xF4\xDC\x99\xE7\x89\xE3"
 			  "\xF4\x10\xCC\x21\x99\xEB\xDC\x15"
 			  "\x19\x13\x93\x27\x9D\xB6\x6F\x45"
 			  "\x17\x55\x61\x72\xC8\xD3\x7F\xA5"
@@ -6200,193 +5984,44 @@
 			  "\xDF\x85\x2D\xE1\xB2\xD6\xAB\x94"
 			  "\xA5\xA6\xE7\xB0\x51\x36\x52\x37"
 			  "\x91\x45\x05\x3E\x58\xBF\x32",
-		.rlen	= 247,
+		.len	= 247,
 		.also_non_np = 1,
 		.np	= 2,
 		.tap	= { 247 - 8, 8 },
 	},
 };
 
-static const struct cipher_testvec des_ctr_dec_tv_template[] = {
-	{ /* Generated with Crypto++ */
-		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55",
-		.klen	= 8,
-		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x2F\x96\x06\x0F\x50\xC9\x68\x03"
-			  "\x0F\x31\xD4\x64\xA5\x29\x77\x35"
-			  "\xBC\x7A\x9F\x19\xE7\x0D\x33\x3E"
-			  "\x12\x0B\x8C\xAE\x48\xAE\xD9\x02"
-			  "\x0A\xD4\xB0\xD6\x37\xB2\x65\x1C"
-			  "\x4B\x65\xEB\x24\xB5\x8E\xAD\x47"
-			  "\x0D\xDA\x79\x77\xA0\x29\xA0\x2B"
-			  "\xC8\x0F\x85\xDC\x03\x13\xA9\x04"
-			  "\x19\x40\xBE\xBE\x5C\x49\x4A\x69"
-			  "\xED\xE8\xE1\x9E\x14\x43\x74\xDE"
-			  "\xEC\x6E\x11\x3F\x36\xEF\x7B\xFB"
-			  "\xBE\x4C\x91\x43\x22\x65\x72\x48"
-			  "\xE2\x12\xED\x88\xAC\xA7\xC9\x91"
-			  "\x14\xA2\x36\x1C\x29\xFF\xC8\x4F"
-			  "\x72\x5C\x4B\xB0\x1E\x93\xC2\xFA"
-			  "\x9D\x53\x86\xA0\xAE\xC6\xB7\x3C"
-			  "\x59\x0C\xD0\x8F\xA6\xD8\xA4\x31"
-			  "\xB7\x30\x1C\x21\x38\xFB\x68\x8C"
-			  "\x2E\xF5\x6E\x73\xC3\x16\x5F\x12"
-			  "\x0C\x33\xB9\x1E\x7B\x70\xDE\x86"
-			  "\x32\xB3\xC1\x16\xAB\xD9\x49\x0B"
-			  "\x96\x28\x72\x6B\xF3\x30\xA9\xEB"
-			  "\x69\xE2\x1E\x58\x46\xA2\x8E\xC7"
-			  "\xC0\xEF\x07\xB7\x77\x2C\x00\x05"
-			  "\x46\xBD\xFE\x53\x81\x8B\xA4\x03"
-			  "\x20\x0F\xDB\x78\x0B\x1F\x53\x04"
-			  "\x4C\x60\x4C\xC3\x2A\x86\x86\x7E"
-			  "\x13\xD2\x26\xED\x5D\x3E\x9C\xF2"
-			  "\x5C\xC4\x15\xC9\x9A\x21\xC5\xCD"
-			  "\x19\x7F\x99\x19\x53\xCE\x1D\x14"
-			  "\x69\x74\xA1\x06\x46\x0F\x4E\x75",
-		.ilen	= 248,
-		.result	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
-			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
-			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
-			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
-			  "\xAB\x14\x7D\x09\x72\xDB\x44\xD0"
-			  "\x39\xA2\x0B\x97\x00\x69\xF5\x5E"
-			  "\xC7\x30\xBC\x25\x8E\x1A\x83\xEC"
-			  "\x55\xE1\x4A\xB3\x1C\xA8\x11\x7A"
-			  "\x06\x6F\xD8\x41\xCD\x36\x9F\x08"
-			  "\x94\xFD\x66\xF2\x5B\xC4\x2D\xB9"
-			  "\x22\x8B\x17\x80\xE9\x52\xDE\x47"
-			  "\xB0\x19\xA5\x0E\x77\x03\x6C\xD5"
-			  "\x3E\xCA\x33\x9C\x05\x91\xFA\x63"
-			  "\xEF\x58\xC1\x2A\xB6\x1F\x88\x14"
-			  "\x7D\xE6\x4F\xDB\x44\xAD\x16\xA2"
-			  "\x0B\x74\x00\x69\xD2\x3B\xC7\x30"
-			  "\x99\x02\x8E\xF7\x60\xEC\x55\xBE"
-			  "\x27\xB3\x1C\x85\x11\x7A\xE3\x4C"
-			  "\xD8\x41\xAA\x13\x9F\x08\x71\xFD"
-			  "\x66\xCF\x38\xC4\x2D\x96\x22\x8B"
-			  "\xF4\x5D\xE9\x52\xBB\x24\xB0\x19"
-			  "\x82\x0E\x77\xE0\x49\xD5\x3E\xA7"
-			  "\x10\x9C\x05\x6E\xFA\x63\xCC\x35"
-			  "\xC1\x2A\x93\x1F\x88\xF1\x5A\xE6"
-			  "\x4F\xB8\x21\xAD\x16\x7F\x0B\x74"
-			  "\xDD\x46\xD2\x3B\xA4\x0D\x99\x02"
-			  "\x6B\xF7\x60\xC9\x32\xBE\x27\x90"
-			  "\x1C\x85\xEE\x57\xE3\x4C\xB5\x1E"
-			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
-			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
-			  "\xC6\x2F\xBB\x24\x8D\x19\x82\xEB",
-		.rlen	= 248,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 248 - 10, 2, 8 },
-	}, { /* Generated with Crypto++ */
-		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55",
-		.klen	= 8,
-		.iv	= "\xE7\x82\x1D\xB8\x53\x11\xAC\x47",
-		.input	= "\x62\xE5\xF4\xDC\x99\xE7\x89\xE3"
-			  "\xF4\x10\xCC\x21\x99\xEB\xDC\x15"
-			  "\x19\x13\x93\x27\x9D\xB6\x6F\x45"
-			  "\x17\x55\x61\x72\xC8\xD3\x7F\xA5"
-			  "\x32\xD0\xD3\x02\x15\xA4\x05\x23"
-			  "\x9C\x23\x61\x60\x77\x7B\x6C\x95"
-			  "\x26\x49\x42\x2E\xF3\xC1\x8C\x6D"
-			  "\xC8\x47\xD5\x94\xE7\x53\xC8\x23"
-			  "\x1B\xA5\x0B\xCB\x12\xD3\x7A\x12"
-			  "\xA4\x42\x15\x34\xF7\x5F\xDC\x58"
-			  "\x5B\x58\x4C\xAD\xD1\x33\x8E\xE6"
-			  "\xE5\xA0\xDA\x4D\x94\x3D\x63\xA8"
-			  "\x02\x82\xBB\x16\xB8\xDC\xB5\x58"
-			  "\xC3\x2D\x79\xE4\x25\x79\x43\xF9"
-			  "\x6D\xD3\xCA\xC0\xE8\x12\xD4\x7E"
-			  "\x04\x25\x79\xFD\x27\xFB\xC4\xEA"
-			  "\x32\x94\x48\x92\xF3\x68\x1A\x7F"
-			  "\x36\x33\x43\x79\xF7\xCA\xC2\x38"
-			  "\xC0\x68\xD4\x53\xA9\xCC\x43\x0C"
-			  "\x40\x57\x3E\xED\x00\x9F\x22\x6E"
-			  "\x80\x99\x0B\xCC\x40\x63\x46\x8A"
-			  "\xE8\xC4\x9B\x6D\x7A\x08\x6E\xA9"
-			  "\x6F\x84\xBC\xB3\xF4\x95\x0B\x2D"
-			  "\x6A\xBA\x37\x50\xC3\xCF\x9F\x7C"
-			  "\x59\x5E\xDE\x0B\x30\xFA\x34\x8A"
-			  "\xF8\xD1\xA2\xF8\x4E\xBD\x5D\x5E"
-			  "\x7D\x71\x99\xE0\xF6\xE5\x7C\xE0"
-			  "\x6D\xEE\x82\x89\x92\xD4\xF5\xD7"
-			  "\xDF\x85\x2D\xE1\xB2\xD6\xAB\x94"
-			  "\xA5\xA6\xE7\xB0\x51\x36\x52\x37"
-			  "\x91\x45\x05\x3E\x58\xBF\x32",
-		.ilen	= 247,
-		.result	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
-			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
-			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
-			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
-			  "\xAB\x14\x7D\x09\x72\xDB\x44\xD0"
-			  "\x39\xA2\x0B\x97\x00\x69\xF5\x5E"
-			  "\xC7\x30\xBC\x25\x8E\x1A\x83\xEC"
-			  "\x55\xE1\x4A\xB3\x1C\xA8\x11\x7A"
-			  "\x06\x6F\xD8\x41\xCD\x36\x9F\x08"
-			  "\x94\xFD\x66\xF2\x5B\xC4\x2D\xB9"
-			  "\x22\x8B\x17\x80\xE9\x52\xDE\x47"
-			  "\xB0\x19\xA5\x0E\x77\x03\x6C\xD5"
-			  "\x3E\xCA\x33\x9C\x05\x91\xFA\x63"
-			  "\xEF\x58\xC1\x2A\xB6\x1F\x88\x14"
-			  "\x7D\xE6\x4F\xDB\x44\xAD\x16\xA2"
-			  "\x0B\x74\x00\x69\xD2\x3B\xC7\x30"
-			  "\x99\x02\x8E\xF7\x60\xEC\x55\xBE"
-			  "\x27\xB3\x1C\x85\x11\x7A\xE3\x4C"
-			  "\xD8\x41\xAA\x13\x9F\x08\x71\xFD"
-			  "\x66\xCF\x38\xC4\x2D\x96\x22\x8B"
-			  "\xF4\x5D\xE9\x52\xBB\x24\xB0\x19"
-			  "\x82\x0E\x77\xE0\x49\xD5\x3E\xA7"
-			  "\x10\x9C\x05\x6E\xFA\x63\xCC\x35"
-			  "\xC1\x2A\x93\x1F\x88\xF1\x5A\xE6"
-			  "\x4F\xB8\x21\xAD\x16\x7F\x0B\x74"
-			  "\xDD\x46\xD2\x3B\xA4\x0D\x99\x02"
-			  "\x6B\xF7\x60\xC9\x32\xBE\x27\x90"
-			  "\x1C\x85\xEE\x57\xE3\x4C\xB5\x1E"
-			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
-			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
-			  "\xC6\x2F\xBB\x24\x8D\x19\x82",
-		.rlen	= 247,
-		.also_non_np = 1,
-		.np	= 2,
-		.tap	= { 247 - 8, 8 },
-	},
-};
-
-static const struct cipher_testvec des3_ede_enc_tv_template[] = {
+static const struct cipher_testvec des3_ede_tv_template[] = {
 	{ /* These are from openssl */
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
 			  "\x55\x55\x55\x55\x55\x55\x55\x55"
 			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
 		.klen	= 24,
-		.input	= "\x73\x6f\x6d\x65\x64\x61\x74\x61",
-		.ilen	= 8,
-		.result	= "\x18\xd7\x48\xe5\x63\x62\x05\x72",
-		.rlen	= 8,
+		.ptext	= "\x73\x6f\x6d\x65\x64\x61\x74\x61",
+		.ctext	= "\x18\xd7\x48\xe5\x63\x62\x05\x72",
+		.len	= 8,
 	}, {
 		.key	= "\x03\x52\x02\x07\x67\x20\x82\x17"
 			  "\x86\x02\x87\x66\x59\x08\x21\x98"
 			  "\x64\x05\x6a\xbd\xfe\xa9\x34\x57",
 		.klen	= 24,
-		.input	= "\x73\x71\x75\x69\x67\x67\x6c\x65",
-		.ilen	= 8,
-		.result	= "\xc0\x7d\x2a\x0f\xa5\x66\xfa\x30",
-		.rlen	= 8,
+		.ptext	= "\x73\x71\x75\x69\x67\x67\x6c\x65",
+		.ctext	= "\xc0\x7d\x2a\x0f\xa5\x66\xfa\x30",
+		.len	= 8,
 	}, {
 		.key	= "\x10\x46\x10\x34\x89\x98\x80\x20"
 			  "\x91\x07\xd0\x15\x89\x19\x01\x01"
 			  "\x19\x07\x92\x10\x98\x1a\x01\x01",
 		.klen	= 24,
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 8,
-		.result	= "\xe1\xef\x62\xc3\x32\xfe\x82\x5b",
-		.rlen	= 8,
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ctext	= "\xe1\xef\x62\xc3\x32\xfe\x82\x5b",
+		.len	= 8,
 	}, { /* Generated with Crypto++ */
 		.key	= "\xF3\x9C\xD6\xF3\x9C\xB9\x5A\x67"
 			  "\x00\x5A\x67\x00\x2D\xCE\xEB\x2D"
 			  "\xCE\xEB\xB4\x51\x72\xB4\x51\x72",
 		.klen	= 24,
-		.input	= "\x05\xEC\x77\xFB\x42\xD5\x59\x20"
+		.ptext	= "\x05\xEC\x77\xFB\x42\xD5\x59\x20"
 			  "\x8B\x12\x86\x69\xF0\x5B\xCF\x56"
 			  "\x39\xAD\x34\x9F\x66\xEA\x7D\xC4"
 			  "\x48\xD3\xBA\x0D\xB1\x18\xE3\x4A"
@@ -6448,8 +6083,7 @@
 			  "\xFB\x42\xF6\x59\x20\x54\x3F\x86"
 			  "\x69\x9D\x64\xCF\x56\xDA\xAD\x34"
 			  "\xB8\x03\xEA\x7D\xE1\x48\xD3\x47",
-		.ilen	= 496,
-		.result	= "\x4E\x9A\x40\x3D\x61\x7D\x17\xFA"
+		.ctext	= "\x4E\x9A\x40\x3D\x61\x7D\x17\xFA"
 			  "\x16\x86\x88\x0B\xD8\xAE\xF8\xE4"
 			  "\x81\x01\x04\x00\x76\xFA\xED\xD3"
 			  "\x44\x7E\x21\x9D\xF0\xFB\x2B\x64"
@@ -6511,186 +6145,21 @@
 			  "\x12\xE1\x71\x4A\xF9\x2A\xF5\xF6"
 			  "\x93\x03\xD7\x51\x09\xFA\xBE\x68"
 			  "\xD8\x45\xFF\x33\xBA\xBB\x2B\x63",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec des3_ede_dec_tv_template[] = {
-	{ /* These are from openssl */
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
-			  "\x55\x55\x55\x55\x55\x55\x55\x55"
-			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.klen	= 24,
-		.input	= "\x18\xd7\x48\xe5\x63\x62\x05\x72",
-		.ilen	= 8,
-		.result	= "\x73\x6f\x6d\x65\x64\x61\x74\x61",
-		.rlen	= 8,
-	}, {
-		.key	= "\x03\x52\x02\x07\x67\x20\x82\x17"
-			  "\x86\x02\x87\x66\x59\x08\x21\x98"
-			  "\x64\x05\x6a\xbd\xfe\xa9\x34\x57",
-		.klen	= 24,
-		.input	= "\xc0\x7d\x2a\x0f\xa5\x66\xfa\x30",
-		.ilen	= 8,
-		.result	= "\x73\x71\x75\x69\x67\x67\x6c\x65",
-		.rlen	= 8,
-	}, {
-		.key	= "\x10\x46\x10\x34\x89\x98\x80\x20"
-			  "\x91\x07\xd0\x15\x89\x19\x01\x01"
-			  "\x19\x07\x92\x10\x98\x1a\x01\x01",
-		.klen	= 24,
-		.input	= "\xe1\xef\x62\xc3\x32\xfe\x82\x5b",
-		.ilen	= 8,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 8,
-	}, { /* Generated with Crypto++ */
-		.key	= "\xF3\x9C\xD6\xF3\x9C\xB9\x5A\x67"
-			  "\x00\x5A\x67\x00\x2D\xCE\xEB\x2D"
-			  "\xCE\xEB\xB4\x51\x72\xB4\x51\x72",
-		.klen	= 24,
-		.input	= "\x4E\x9A\x40\x3D\x61\x7D\x17\xFA"
-			  "\x16\x86\x88\x0B\xD8\xAE\xF8\xE4"
-			  "\x81\x01\x04\x00\x76\xFA\xED\xD3"
-			  "\x44\x7E\x21\x9D\xF0\xFB\x2B\x64"
-			  "\xCA\x4E\x90\xE0\xC0\x63\x28\x92"
-			  "\xF3\x1F\xA4\x53\x2C\x77\xCC\x77"
-			  "\x69\x56\xD0\x19\xAD\x00\x2D\x97"
-			  "\xBC\xDE\x49\x6A\x82\xBC\x16\xE2"
-			  "\x2F\x3E\x72\xEE\xD1\xCE\xFC\x1B"
-			  "\xEA\x32\x56\xE4\x0B\xAF\x27\x36"
-			  "\xAF\x08\xB9\x61\xB7\x48\x23\x27"
-			  "\xEE\x4D\xC8\x79\x56\x06\xEB\xC7"
-			  "\x5B\xCA\x0A\xC6\x5E\x5C\xCB\xB6"
-			  "\x9D\xDA\x04\x59\xE2\x09\x48\x7E"
-			  "\x6B\x37\xC6\xFE\x92\xA9\x1E\x6E"
-			  "\x0D\x19\xFA\x33\x0F\xEE\x36\x68"
-			  "\x11\xBB\xF9\x5A\x73\xAB\x3A\xEA"
-			  "\xAC\x28\xD8\xD5\x27\xE8\x6B\x16"
-			  "\x45\x86\x50\x01\x70\x35\x99\x92"
-			  "\xDF\x0C\x07\x88\x8B\x7F\x9E\x4B"
-			  "\xD2\x04\x84\x90\xC4\x27\xDF\x0A"
-			  "\x49\xA8\xA7\x1A\x6D\x78\x16\xCA"
-			  "\xB3\x18\x5C\xC3\x93\x63\x5A\x68"
-			  "\x77\x02\xBA\xED\x62\x71\xB1\xD9"
-			  "\x5E\xE5\x6F\x1A\xCC\x1D\xBE\x2E"
-			  "\x11\xF3\xA6\x97\xCA\x8E\xBF\xB4"
-			  "\x56\xA1\x36\x6B\xB1\x0A\x3E\x70"
-			  "\xEA\xD7\xCD\x72\x7B\x79\xC8\xAD"
-			  "\x6B\xFE\xFB\xBA\x64\xAE\x19\xC1"
-			  "\x82\xCF\x8A\xA1\x50\x17\x7F\xB2"
-			  "\x6F\x7B\x0F\x52\xC5\x3E\x4A\x52"
-			  "\x3F\xD9\x3F\x01\xA6\x41\x1A\xB3"
-			  "\xB3\x7A\x0E\x8E\x75\xB2\xB1\x5F"
-			  "\xDB\xEA\x84\x13\x26\x6C\x85\x4E"
-			  "\xAE\x6B\xDC\xE7\xE7\xAD\xB0\x06"
-			  "\x5C\xBA\x92\xD0\x30\xBB\x8D\xD2"
-			  "\xAE\x4C\x70\x85\xA0\x07\xE3\x2C"
-			  "\xD1\x27\x9C\xCF\xDB\x13\xB7\xE5"
-			  "\xF9\x6A\x02\xD0\x39\x9D\xB6\xE7"
-			  "\xD1\x17\x25\x08\xF9\xA9\xA6\x67"
-			  "\x38\x80\xD1\x22\xAB\x1A\xD7\x26"
-			  "\xAD\xCA\x19\x1B\xFA\x18\xA7\x57"
-			  "\x31\xEC\xC9\xED\xDB\x79\xC0\x48"
-			  "\xAC\x31\x9F\x03\x8B\x62\x5B\x7E"
-			  "\x0E\xA6\xD0\x64\xEE\xEA\x00\xFC"
-			  "\x58\xC8\xDE\x51\x4E\x17\x15\x11"
-			  "\x66\x58\xB6\x90\xDC\xDF\xA1\x49"
-			  "\xCA\x79\xE9\x31\x31\x42\xDC\x56"
-			  "\x0B\xCD\xB6\x0D\xC7\x64\xF7\x19"
-			  "\xD9\x42\x05\x7F\xBC\x2F\xFC\x90"
-			  "\xAE\x29\x86\xAA\x43\x7A\x4F\x6B"
-			  "\xCE\xEA\xBC\x31\x8D\x65\x9D\x46"
-			  "\xEA\x77\xB4\xF9\x58\xEA\x5D\x84"
-			  "\xE4\xDC\x14\xBB\xBD\x15\x0E\xDA"
-			  "\xD8\xE4\xA4\x5D\x61\xF9\x58\x0F"
-			  "\xE4\x82\x77\xCE\x87\xC0\x09\xF0"
-			  "\xD6\x10\x9E\x34\xE1\x0C\x67\x55"
-			  "\x7B\x6D\xD5\x51\x4B\x00\xEE\xBA"
-			  "\xF2\x7B\xBE\x75\x07\x42\x9D\x99"
-			  "\x12\xE1\x71\x4A\xF9\x2A\xF5\xF6"
-			  "\x93\x03\xD7\x51\x09\xFA\xBE\x68"
-			  "\xD8\x45\xFF\x33\xBA\xBB\x2B\x63",
-		.ilen	= 496,
-		.result	= "\x05\xEC\x77\xFB\x42\xD5\x59\x20"
-			  "\x8B\x12\x86\x69\xF0\x5B\xCF\x56"
-			  "\x39\xAD\x34\x9F\x66\xEA\x7D\xC4"
-			  "\x48\xD3\xBA\x0D\xB1\x18\xE3\x4A"
-			  "\xFE\x41\x28\x5C\x27\x8E\x11\x85"
-			  "\x6C\xF7\x5E\xC2\x55\x3C\xA0\x0B"
-			  "\x92\x65\xE9\x70\xDB\x4F\xD6\xB9"
-			  "\x00\xB4\x1F\xE6\x49\xFD\x44\x2F"
-			  "\x53\x3A\x8D\x14\x98\x63\xCA\x5D"
-			  "\xC1\xA8\x33\xA7\x0E\x91\x78\xEC"
-			  "\x77\xDE\x42\xD5\xBC\x07\x8B\x12"
-			  "\xE5\x4C\xF0\x5B\x22\x56\x39\x80"
-			  "\x6B\x9F\x66\xC9\x50\xC4\xAF\x36"
-			  "\xBA\x0D\x94\x7F\xE3\x4A\xDD\x41"
-			  "\x28\xB3\x1A\x8E\x11\xF8\x43\xF7"
-			  "\x5E\x21\x55\x3C\x87\x6E\x92\x65"
-			  "\xCC\x57\xDB\xA2\x35\xB9\x00\xEB"
-			  "\x72\xE6\x49\xD0\x44\x2F\xB6\x19"
-			  "\x8D\x14\xFF\x46\xCA\x5D\x24\xA8"
-			  "\x33\x9A\x6D\x91\x78\xC3\x77\xDE"
-			  "\xA1\x08\xBC\x07\xEE\x71\xE5\x4C"
-			  "\xD7\x5B\x22\xB5\x1C\x80\x6B\xF2"
-			  "\x45\xC9\x50\x3B\xAF\x36\x99\x60"
-			  "\x94\x7F\xC6\x4A\xDD\xA4\x0F\xB3"
-			  "\x1A\xED\x74\xF8\x43\x2A\x5E\x21"
-			  "\x88\x13\x87\x6E\xF1\x58\xCC\x57"
-			  "\x3E\xA2\x35\x9C\x67\xEB\x72\xC5"
-			  "\x49\xD0\xBB\x02\xB6\x19\xE0\x4B"
-			  "\xFF\x46\x29\x5D\x24\x8F\x16\x9A"
-			  "\x6D\xF4\x5F\xC3\xAA\x3D\xA1\x08"
-			  "\x93\x7A\xEE\x71\xD8\x4C\xD7\xBE"
-			  "\x01\xB5\x1C\xE7\x4E\xF2\x45\x2C"
-			  "\x50\x3B\x82\x15\x99\x60\xCB\x52"
-			  "\xC6\xA9\x30\xA4\x0F\x96\x79\xED"
-			  "\x74\xDF\x43\x2A\xBD\x04\x88\x13"
-			  "\xFA\x4D\xF1\x58\x23\x57\x3E\x81"
-			  "\x68\x9C\x67\xCE\x51\xC5\xAC\x37"
-			  "\xBB\x02\x95\x7C\xE0\x4B\xD2\x46"
-			  "\x29\xB0\x1B\x8F\x16\xF9\x40\xF4"
-			  "\x5F\x26\xAA\x3D\x84\x6F\x93\x7A"
-			  "\xCD\x54\xD8\xA3\x0A\xBE\x01\xE8"
-			  "\x73\xE7\x4E\xD1\x45\x2C\xB7\x1E"
-			  "\x82\x15\xFC\x47\xCB\x52\x25\xA9"
-			  "\x30\x9B\x62\x96\x79\xC0\x74\xDF"
-			  "\xA6\x09\xBD\x04\xEF\x76\xFA\x4D"
-			  "\xD4\x58\x23\x8A\x1D\x81\x68\xF3"
-			  "\x5A\xCE\x51\x38\xAC\x37\x9E\x61"
-			  "\x95\x7C\xC7\x4B\xD2\xA5\x0C\xB0"
-			  "\x1B\xE2\x75\xF9\x40\x2B\x5F\x26"
-			  "\x89\x10\x84\x6F\xF6\x59\xCD\x54"
-			  "\x3F\xA3\x0A\x9D\x64\xE8\x73\xDA"
-			  "\x4E\xD1\xB8\x03\xB7\x1E\xE1\x48"
-			  "\xFC\x47\x2E\x52\x25\x8C\x17\x9B"
-			  "\x62\xF5\x5C\xC0\xAB\x32\xA6\x09"
-			  "\x90\x7B\xEF\x76\xD9\x4D\xD4\xBF"
-			  "\x06\x8A\x1D\xE4\x4F\xF3\x5A\x2D"
-			  "\x51\x38\x83\x6A\x9E\x61\xC8\x53"
-			  "\xC7\xAE\x31\xA5\x0C\x97\x7E\xE2"
-			  "\x75\xDC\x40\x2B\xB2\x05\x89\x10"
-			  "\xFB\x42\xF6\x59\x20\x54\x3F\x86"
-			  "\x69\x9D\x64\xCF\x56\xDA\xAD\x34"
-			  "\xB8\x03\xEA\x7D\xE1\x48\xD3\x47",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec des3_ede_cbc_enc_tv_template[] = {
+static const struct cipher_testvec des3_ede_cbc_tv_template[] = {
 	{ /* Generated from openssl */
 		.key	= "\xE9\xC0\xFF\x2E\x76\x0B\x64\x24"
 			  "\x44\x4D\x99\x5A\x12\xD6\x40\xC0"
 			  "\xEA\xC2\x84\xE8\x14\x95\xDB\xE8",
 		.klen	= 24,
 		.iv	= "\x7D\x33\x88\x93\x0F\x93\xB2\x42",
-		.input	= "\x6f\x54\x20\x6f\x61\x4d\x79\x6e"
+		.ptext	= "\x6f\x54\x20\x6f\x61\x4d\x79\x6e"
 			  "\x53\x20\x63\x65\x65\x72\x73\x74"
 			  "\x54\x20\x6f\x6f\x4d\x20\x6e\x61"
 			  "\x20\x79\x65\x53\x72\x63\x74\x65"
@@ -6706,8 +6175,7 @@
 			  "\x20\x6f\x61\x4d\x79\x6e\x53\x20"
 			  "\x63\x65\x65\x72\x73\x74\x54\x20"
 			  "\x6f\x6f\x4d\x20\x6e\x61\x0a\x79",
-		.ilen	= 128,
-		.result	= "\x0e\x2d\xb6\x97\x3c\x56\x33\xf4"
+		.ctext	= "\x0e\x2d\xb6\x97\x3c\x56\x33\xf4"
 			  "\x67\x17\x21\xc7\x6e\x8a\xd5\x49"
 			  "\x74\xb3\x49\x05\xc5\x1c\xd0\xed"
 			  "\x12\x56\x5c\x53\x96\xb6\x00\x7d"
@@ -6723,7 +6191,7 @@
 			  "\xd6\xbc\x5a\xd3\x2d\x54\x43\xcc"
 			  "\x9d\xde\xa5\x70\xe9\x42\x45\x8a"
 			  "\x6b\xfa\xb1\x91\x13\xb0\xd9\x19",
-		.rlen	= 128,
+		.len	= 128,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x9C\xD6\xF3\x9C\xB9\x5A\x67\x00"
 			  "\x5A\x67\x00\x2D\xCE\xEB\x2D\xCE"
@@ -6731,7 +6199,7 @@
 		.klen	= 24,
 		.iv	= "\xB2\xD7\x48\xED\x06\x44\xF9\x12"
 			  "\xB7\x28\x4D\x83\x24\x59\xF2\x17",
-		.input	= "\x05\xEC\x77\xFB\x42\xD5\x59\x20"
+		.ptext	= "\x05\xEC\x77\xFB\x42\xD5\x59\x20"
 			  "\x8B\x12\x86\x69\xF0\x5B\xCF\x56"
 			  "\x39\xAD\x34\x9F\x66\xEA\x7D\xC4"
 			  "\x48\xD3\xBA\x0D\xB1\x18\xE3\x4A"
@@ -6793,8 +6261,7 @@
 			  "\xFB\x42\xF6\x59\x20\x54\x3F\x86"
 			  "\x69\x9D\x64\xCF\x56\xDA\xAD\x34"
 			  "\xB8\x03\xEA\x7D\xE1\x48\xD3\x47",
-		.ilen	= 496,
-		.result	= "\xF8\xF6\xB5\x60\x5C\x5A\x75\x84"
+		.ctext	= "\xF8\xF6\xB5\x60\x5C\x5A\x75\x84"
 			  "\x87\x81\x53\xBA\xC9\x6F\xEC\xD5"
 			  "\x1E\x68\x8E\x85\x12\x86\x1D\x38"
 			  "\x1C\x91\x40\xCC\x69\x6A\xD5\x35"
@@ -6856,194 +6323,14 @@
 			  "\x90\xE9\xFA\x4B\x00\x10\xAC\x58"
 			  "\x83\x70\xFF\x86\xE6\xAA\x0F\x1F"
 			  "\x95\x63\x73\xA2\x44\xAC\xF8\xA5",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec des3_ede_cbc_dec_tv_template[] = {
-	{ /* Generated from openssl */
-		.key	= "\xE9\xC0\xFF\x2E\x76\x0B\x64\x24"
-			  "\x44\x4D\x99\x5A\x12\xD6\x40\xC0"
-			  "\xEA\xC2\x84\xE8\x14\x95\xDB\xE8",
-		.klen	= 24,
-		.iv	= "\x7D\x33\x88\x93\x0F\x93\xB2\x42",
-		.input	= "\x0e\x2d\xb6\x97\x3c\x56\x33\xf4"
-			  "\x67\x17\x21\xc7\x6e\x8a\xd5\x49"
-			  "\x74\xb3\x49\x05\xc5\x1c\xd0\xed"
-			  "\x12\x56\x5c\x53\x96\xb6\x00\x7d"
-			  "\x90\x48\xfc\xf5\x8d\x29\x39\xcc"
-			  "\x8a\xd5\x35\x18\x36\x23\x4e\xd7"
-			  "\x76\xd1\xda\x0c\x94\x67\xbb\x04"
-			  "\x8b\xf2\x03\x6c\xa8\xcf\xb6\xea"
-			  "\x22\x64\x47\xaa\x8f\x75\x13\xbf"
-			  "\x9f\xc2\xc3\xf0\xc9\x56\xc5\x7a"
-			  "\x71\x63\x2e\x89\x7b\x1e\x12\xca"
-			  "\xe2\x5f\xaf\xd8\xa4\xf8\xc9\x7a"
-			  "\xd6\xf9\x21\x31\x62\x44\x45\xa6"
-			  "\xd6\xbc\x5a\xd3\x2d\x54\x43\xcc"
-			  "\x9d\xde\xa5\x70\xe9\x42\x45\x8a"
-			  "\x6b\xfa\xb1\x91\x13\xb0\xd9\x19",
-		.ilen	= 128,
-		.result	= "\x6f\x54\x20\x6f\x61\x4d\x79\x6e"
-			  "\x53\x20\x63\x65\x65\x72\x73\x74"
-			  "\x54\x20\x6f\x6f\x4d\x20\x6e\x61"
-			  "\x20\x79\x65\x53\x72\x63\x74\x65"
-			  "\x20\x73\x6f\x54\x20\x6f\x61\x4d"
-			  "\x79\x6e\x53\x20\x63\x65\x65\x72"
-			  "\x73\x74\x54\x20\x6f\x6f\x4d\x20"
-			  "\x6e\x61\x20\x79\x65\x53\x72\x63"
-			  "\x74\x65\x20\x73\x6f\x54\x20\x6f"
-			  "\x61\x4d\x79\x6e\x53\x20\x63\x65"
-			  "\x65\x72\x73\x74\x54\x20\x6f\x6f"
-			  "\x4d\x20\x6e\x61\x20\x79\x65\x53"
-			  "\x72\x63\x74\x65\x20\x73\x6f\x54"
-			  "\x20\x6f\x61\x4d\x79\x6e\x53\x20"
-			  "\x63\x65\x65\x72\x73\x74\x54\x20"
-			  "\x6f\x6f\x4d\x20\x6e\x61\x0a\x79",
-		.rlen	= 128,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x9C\xD6\xF3\x9C\xB9\x5A\x67\x00"
-			  "\x5A\x67\x00\x2D\xCE\xEB\x2D\xCE"
-			  "\xEB\xB4\x51\x72\xB4\x51\x72\x1F",
-		.klen	= 24,
-		.iv	= "\xB2\xD7\x48\xED\x06\x44\xF9\x12"
-			  "\xB7\x28\x4D\x83\x24\x59\xF2\x17",
-		.input	= "\xF8\xF6\xB5\x60\x5C\x5A\x75\x84"
-			  "\x87\x81\x53\xBA\xC9\x6F\xEC\xD5"
-			  "\x1E\x68\x8E\x85\x12\x86\x1D\x38"
-			  "\x1C\x91\x40\xCC\x69\x6A\xD5\x35"
-			  "\x0D\x7C\xB5\x07\x7C\x7B\x2A\xAF"
-			  "\x32\xBC\xA1\xB3\x84\x31\x1B\x3C"
-			  "\x0A\x2B\xFA\xD3\x9F\xB0\x8C\x37"
-			  "\x8F\x9D\xA7\x6D\x6C\xFA\xD7\x90"
-			  "\xE3\x69\x54\xED\x3A\xC4\xF1\x6B"
-			  "\xB1\xCC\xFB\x7D\xD8\x8E\x17\x0B"
-			  "\x9C\xF6\x4C\xD6\xFF\x03\x4E\xD9"
-			  "\xE6\xA5\xAD\x25\xE6\x17\x69\x63"
-			  "\x11\x35\x61\x94\x88\x7B\x1C\x48"
-			  "\xF1\x24\x20\x29\x6B\x93\x1A\x8E"
-			  "\x43\x03\x89\xD8\xB1\xDA\x47\x7B"
-			  "\x79\x3A\x83\x76\xDA\xAE\xC6\xBB"
-			  "\x22\xF8\xE8\x3D\x9A\x65\x54\xD8"
-			  "\x4C\xE9\xE7\xE4\x63\x2F\x5C\x73"
-			  "\x5A\xC3\xAE\x46\xA8\xCD\x57\xE6"
-			  "\x67\x88\xA5\x20\x6F\x5F\x97\xC7"
-			  "\xCC\x15\xA2\x0A\x93\xEA\x33\xE7"
-			  "\x03\x5F\xEC\x64\x30\x6F\xEE\xD7"
-			  "\x7E\xDF\xD6\xE9\x6F\x3F\xD6\x1E"
-			  "\xBE\x67\x6C\x5B\x97\xA0\x09\xE6"
-			  "\xEE\xFE\x55\xA3\x29\x65\xE0\x12"
-			  "\xA1\x6A\x8A\x6F\xF2\xE6\xF1\x96"
-			  "\x87\xFB\x9C\x05\xDD\x80\xEC\xFF"
-			  "\xC5\xED\x50\xFE\xFC\x91\xCD\xCE"
-			  "\x25\x2C\x5F\xD9\xAD\x95\x7D\x99"
-			  "\xF0\x05\xC4\x71\x46\x5F\xF9\x0D"
-			  "\xD2\x63\xDF\x9B\x96\x2E\x2B\xA6"
-			  "\x2B\x1C\xD5\xFB\x96\x24\x60\x60"
-			  "\x54\x40\xB8\x62\xA4\xF8\x46\x95"
-			  "\x73\x28\xA3\xA6\x16\x2B\x17\xE7"
-			  "\x7A\xF8\x62\x54\x3B\x64\x69\xE1"
-			  "\x71\x34\x29\x5B\x4E\x05\x9B\xFA"
-			  "\x5E\xF1\x96\xB7\xCE\x16\x9B\x59"
-			  "\xF1\x1A\x4C\x51\x26\xFD\x79\xE2"
-			  "\x3B\x8E\x71\x69\x6A\x91\xB6\x65"
-			  "\x32\x09\xB8\xE4\x09\x1F\xEA\x39"
-			  "\xCE\x20\x65\x9F\xD6\xD1\xC7\xF0"
-			  "\x73\x50\x08\x56\x20\x9B\x94\x23"
-			  "\x14\x39\xB7\x2B\xB1\x2D\x6D\x6F"
-			  "\x41\x5B\xCC\xE2\x18\xAE\x62\x89"
-			  "\x78\x8E\x67\x23\xD0\xFB\x2B\xE5"
-			  "\x25\xC9\x48\x97\xB5\xD3\x17\xD5"
-			  "\x6A\x9F\xA7\x48\x0C\x2B\x73\x3B"
-			  "\x57\x08\xAE\x91\xF2\xB7\x57\x89"
-			  "\xF4\xD0\xB0\x07\xB0\x42\x6C\xAF"
-			  "\x98\x1A\xE7\xD1\xAC\x1E\xB5\x02"
-			  "\xD4\x56\x42\x79\x79\x7F\x2A\x77"
-			  "\x25\xE9\x7D\xC1\x88\x19\x2B\x49"
-			  "\x6F\x46\x59\xAB\x56\x1F\x61\xE0"
-			  "\x0C\x24\x9C\xC9\x5B\x63\xA9\x12"
-			  "\xCF\x88\x96\xB6\xA8\x24\xC6\xA8"
-			  "\x21\x85\x1A\x62\x7E\x34\xBB\xEB"
-			  "\xBD\x02\x2A\xC7\xD8\x89\x80\xC5"
-			  "\xB1\xBB\x60\xA5\x22\xFC\x6F\x38"
-			  "\x02\x80\xA3\x28\x22\x75\xE1\xE9"
-			  "\x90\xE9\xFA\x4B\x00\x10\xAC\x58"
-			  "\x83\x70\xFF\x86\xE6\xAA\x0F\x1F"
-			  "\x95\x63\x73\xA2\x44\xAC\xF8\xA5",
-		.ilen	= 496,
-		.result	= "\x05\xEC\x77\xFB\x42\xD5\x59\x20"
-			  "\x8B\x12\x86\x69\xF0\x5B\xCF\x56"
-			  "\x39\xAD\x34\x9F\x66\xEA\x7D\xC4"
-			  "\x48\xD3\xBA\x0D\xB1\x18\xE3\x4A"
-			  "\xFE\x41\x28\x5C\x27\x8E\x11\x85"
-			  "\x6C\xF7\x5E\xC2\x55\x3C\xA0\x0B"
-			  "\x92\x65\xE9\x70\xDB\x4F\xD6\xB9"
-			  "\x00\xB4\x1F\xE6\x49\xFD\x44\x2F"
-			  "\x53\x3A\x8D\x14\x98\x63\xCA\x5D"
-			  "\xC1\xA8\x33\xA7\x0E\x91\x78\xEC"
-			  "\x77\xDE\x42\xD5\xBC\x07\x8B\x12"
-			  "\xE5\x4C\xF0\x5B\x22\x56\x39\x80"
-			  "\x6B\x9F\x66\xC9\x50\xC4\xAF\x36"
-			  "\xBA\x0D\x94\x7F\xE3\x4A\xDD\x41"
-			  "\x28\xB3\x1A\x8E\x11\xF8\x43\xF7"
-			  "\x5E\x21\x55\x3C\x87\x6E\x92\x65"
-			  "\xCC\x57\xDB\xA2\x35\xB9\x00\xEB"
-			  "\x72\xE6\x49\xD0\x44\x2F\xB6\x19"
-			  "\x8D\x14\xFF\x46\xCA\x5D\x24\xA8"
-			  "\x33\x9A\x6D\x91\x78\xC3\x77\xDE"
-			  "\xA1\x08\xBC\x07\xEE\x71\xE5\x4C"
-			  "\xD7\x5B\x22\xB5\x1C\x80\x6B\xF2"
-			  "\x45\xC9\x50\x3B\xAF\x36\x99\x60"
-			  "\x94\x7F\xC6\x4A\xDD\xA4\x0F\xB3"
-			  "\x1A\xED\x74\xF8\x43\x2A\x5E\x21"
-			  "\x88\x13\x87\x6E\xF1\x58\xCC\x57"
-			  "\x3E\xA2\x35\x9C\x67\xEB\x72\xC5"
-			  "\x49\xD0\xBB\x02\xB6\x19\xE0\x4B"
-			  "\xFF\x46\x29\x5D\x24\x8F\x16\x9A"
-			  "\x6D\xF4\x5F\xC3\xAA\x3D\xA1\x08"
-			  "\x93\x7A\xEE\x71\xD8\x4C\xD7\xBE"
-			  "\x01\xB5\x1C\xE7\x4E\xF2\x45\x2C"
-			  "\x50\x3B\x82\x15\x99\x60\xCB\x52"
-			  "\xC6\xA9\x30\xA4\x0F\x96\x79\xED"
-			  "\x74\xDF\x43\x2A\xBD\x04\x88\x13"
-			  "\xFA\x4D\xF1\x58\x23\x57\x3E\x81"
-			  "\x68\x9C\x67\xCE\x51\xC5\xAC\x37"
-			  "\xBB\x02\x95\x7C\xE0\x4B\xD2\x46"
-			  "\x29\xB0\x1B\x8F\x16\xF9\x40\xF4"
-			  "\x5F\x26\xAA\x3D\x84\x6F\x93\x7A"
-			  "\xCD\x54\xD8\xA3\x0A\xBE\x01\xE8"
-			  "\x73\xE7\x4E\xD1\x45\x2C\xB7\x1E"
-			  "\x82\x15\xFC\x47\xCB\x52\x25\xA9"
-			  "\x30\x9B\x62\x96\x79\xC0\x74\xDF"
-			  "\xA6\x09\xBD\x04\xEF\x76\xFA\x4D"
-			  "\xD4\x58\x23\x8A\x1D\x81\x68\xF3"
-			  "\x5A\xCE\x51\x38\xAC\x37\x9E\x61"
-			  "\x95\x7C\xC7\x4B\xD2\xA5\x0C\xB0"
-			  "\x1B\xE2\x75\xF9\x40\x2B\x5F\x26"
-			  "\x89\x10\x84\x6F\xF6\x59\xCD\x54"
-			  "\x3F\xA3\x0A\x9D\x64\xE8\x73\xDA"
-			  "\x4E\xD1\xB8\x03\xB7\x1E\xE1\x48"
-			  "\xFC\x47\x2E\x52\x25\x8C\x17\x9B"
-			  "\x62\xF5\x5C\xC0\xAB\x32\xA6\x09"
-			  "\x90\x7B\xEF\x76\xD9\x4D\xD4\xBF"
-			  "\x06\x8A\x1D\xE4\x4F\xF3\x5A\x2D"
-			  "\x51\x38\x83\x6A\x9E\x61\xC8\x53"
-			  "\xC7\xAE\x31\xA5\x0C\x97\x7E\xE2"
-			  "\x75\xDC\x40\x2B\xB2\x05\x89\x10"
-			  "\xFB\x42\xF6\x59\x20\x54\x3F\x86"
-			  "\x69\x9D\x64\xCF\x56\xDA\xAD\x34"
-			  "\xB8\x03\xEA\x7D\xE1\x48\xD3\x47",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec des3_ede_ctr_enc_tv_template[] = {
+static const struct cipher_testvec des3_ede_ctr_tv_template[] = {
 	{ /* Generated with Crypto++ */
 		.key	= "\x9C\xD6\xF3\x9C\xB9\x5A\x67\x00"
 			  "\x5A\x67\x00\x2D\xCE\xEB\x2D\xCE"
@@ -7051,7 +6338,7 @@
 		.klen	= 24,
 		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
 			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x05\xEC\x77\xFB\x42\xD5\x59\x20"
+		.ptext	= "\x05\xEC\x77\xFB\x42\xD5\x59\x20"
 			  "\x8B\x12\x86\x69\xF0\x5B\xCF\x56"
 			  "\x39\xAD\x34\x9F\x66\xEA\x7D\xC4"
 			  "\x48\xD3\xBA\x0D\xB1\x18\xE3\x4A"
@@ -7113,8 +6400,7 @@
 			  "\xFB\x42\xF6\x59\x20\x54\x3F\x86"
 			  "\x69\x9D\x64\xCF\x56\xDA\xAD\x34"
 			  "\xB8\x03\xEA\x7D\xE1\x48\xD3\x47",
-		.ilen	= 496,
-		.result	= "\x07\xC2\x08\x20\x72\x1F\x49\xEF"
+		.ctext	= "\x07\xC2\x08\x20\x72\x1F\x49\xEF"
 			  "\x19\xCD\x6F\x32\x53\x05\x22\x15"
 			  "\xA2\x85\x2B\xDB\x85\xD2\xD8\xB9"
 			  "\xDD\x0D\x1B\x45\xCB\x69\x11\xD4"
@@ -7176,7 +6462,7 @@
 			  "\x46\xB9\x91\xB6\xE7\x3D\x51\x42"
 			  "\xFD\x51\xB0\xC6\x2C\x63\x13\x78"
 			  "\x5C\xEE\xFC\xCF\xC4\x70\x00\x34",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
@@ -7187,7 +6473,7 @@
 		.klen	= 24,
 		.iv	= "\xB2\xD7\x48\xED\x06\x44\xF9\x12"
 			  "\xB7\x28\x4D\x83\x24\x59\xF2\x17",
-		.input	= "\x05\xEC\x77\xFB\x42\xD5\x59\x20"
+		.ptext	= "\x05\xEC\x77\xFB\x42\xD5\x59\x20"
 			  "\x8B\x12\x86\x69\xF0\x5B\xCF\x56"
 			  "\x39\xAD\x34\x9F\x66\xEA\x7D\xC4"
 			  "\x48\xD3\xBA\x0D\xB1\x18\xE3\x4A"
@@ -7250,8 +6536,7 @@
 			  "\x69\x9D\x64\xCF\x56\xDA\xAD\x34"
 			  "\xB8\x03\xEA\x7D\xE1\x48\xD3\x47"
 			  "\x2E\xB1\x18",
-		.ilen	= 499,
-		.result	= "\x23\xFF\x5C\x99\x75\xBB\x1F\xD4"
+		.ctext	= "\x23\xFF\x5C\x99\x75\xBB\x1F\xD4"
 			  "\xBC\x27\x9D\x36\x60\xA9\xC9\xF7"
 			  "\x94\x9D\x1B\xFF\x8E\x95\x57\x89"
 			  "\x8C\x2E\x33\x70\x43\x61\xE6\xD2"
@@ -7314,285 +6599,7 @@
 			  "\x5C\x60\x1F\x6E\xA7\xE2\xDC\xE7"
 			  "\x32\x0F\x05\x2F\xF2\x4C\x95\x3B"
 			  "\xF2\x79\xD9",
-		.rlen	= 499,
-		.also_non_np = 1,
-		.np	= 2,
-		.tap	= { 499 - 16, 16 },
-	},
-};
-
-static const struct cipher_testvec des3_ede_ctr_dec_tv_template[] = {
-	{ /* Generated with Crypto++ */
-		.key	= "\x9C\xD6\xF3\x9C\xB9\x5A\x67\x00"
-			  "\x5A\x67\x00\x2D\xCE\xEB\x2D\xCE"
-			  "\xEB\xB4\x51\x72\xB4\x51\x72\x1F",
-		.klen	= 24,
-		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x07\xC2\x08\x20\x72\x1F\x49\xEF"
-			  "\x19\xCD\x6F\x32\x53\x05\x22\x15"
-			  "\xA2\x85\x2B\xDB\x85\xD2\xD8\xB9"
-			  "\xDD\x0D\x1B\x45\xCB\x69\x11\xD4"
-			  "\xEA\xBE\xB2\x45\x5D\x0C\xAE\xBE"
-			  "\xA0\xC1\x27\xAC\x65\x9F\x53\x7E"
-			  "\xAF\xC2\x1B\xB5\xB8\x6D\x36\x0C"
-			  "\x25\xC0\xF8\x6D\x0B\x29\x01\xDA"
-			  "\x13\x78\xDC\x89\x12\x12\x43\xFA"
-			  "\xF6\x12\xEF\x8D\x87\x62\x78\x83"
-			  "\xE2\xBE\x41\x20\x4C\x6D\x35\x1B"
-			  "\xD1\x0C\x30\xCF\xE2\xDE\x2B\x03"
-			  "\xBF\x45\x73\xD4\xE5\x59\x95\xD1"
-			  "\xB3\x9B\x27\x62\x97\xBD\xDE\x7F"
-			  "\xA4\xD2\x39\x80\xAA\x50\x23\xF0"
-			  "\x74\x88\x3D\xA8\x6A\x18\x79\x3B"
-			  "\xC4\x96\x6C\x8D\x22\x40\x92\x6E"
-			  "\xD6\xAD\x2A\x1F\xDE\x63\xC0\xE7"
-			  "\x07\xF7\x2D\xF7\xB5\xF3\xF0\xCC"
-			  "\x01\x7C\x2A\x9B\xC2\x10\xCA\xAA"
-			  "\xFD\x2B\x3F\xC5\xF3\xF6\xFC\x9B"
-			  "\x45\xDB\x53\xE4\x5B\xF3\xC9\x7B"
-			  "\x8E\x52\xFF\xC8\x02\xB8\xAC\x9D"
-			  "\xA1\x00\x39\xDA\x3D\x2D\x0E\x01"
-			  "\x09\x7D\x8D\x5E\xBE\x53\xB9\xB0"
-			  "\x8E\xE7\xE2\x96\x6A\xB2\x78\xEA"
-			  "\xDE\x23\x8B\xA5\xFA\x5C\xE3\xDA"
-			  "\xBF\x8E\x31\x6A\x55\xD1\x6A\xB2"
-			  "\xB5\x46\x6F\xA5\xF0\xEE\xBA\x1F"
-			  "\x9F\x98\xB0\x66\x4F\xD0\x3F\xA9"
-			  "\xDF\x5F\x58\xC4\xF4\xFF\x75\x5C"
-			  "\x40\x3A\x09\x7E\x6E\x1C\x97\xD4"
-			  "\xCC\xE7\xE7\x71\xCF\x0B\x15\x08"
-			  "\x71\xFA\x07\x97\xCD\xE6\xCA\x1D"
-			  "\x14\x28\x0C\xCF\x99\x13\x7A\xF1"
-			  "\xEB\xFA\xFA\x92\x07\xDE\x1D\xA1"
-			  "\xD3\x36\x69\xFE\x51\x4D\x9F\x2E"
-			  "\x83\x37\x4F\x1F\x48\x30\xED\x04"
-			  "\x4D\xA4\xEF\x3A\xCA\x76\xF4\x1C"
-			  "\x41\x8F\x63\x37\x78\x2F\x86\xA6"
-			  "\xEF\x41\x7E\xD2\xAF\x88\xAB\x67"
-			  "\x52\x71\xC3\x8E\xF8\x26\x93\x72"
-			  "\xAA\xD6\x0E\xE7\x0B\x46\xB1\x3A"
-			  "\xB4\x08\xA9\xA8\xA0\xCF\x20\x0C"
-			  "\x52\xBC\x8B\x05\x56\xB2\xBC\x31"
-			  "\x9B\x74\xB9\x29\x29\x96\x9A\x50"
-			  "\xDC\x45\xDC\x1A\xEB\x0C\x64\xD4"
-			  "\xD3\x05\x7E\x59\x55\xC3\xF4\x90"
-			  "\xC2\xAB\xF8\x9B\x8A\xDA\xCE\xA1"
-			  "\xC3\xF4\xAD\x77\xDD\x44\xC8\xAC"
-			  "\xA3\xF1\xC9\xD2\x19\x5C\xB0\xCA"
-			  "\xA2\x34\xC1\xF7\x6C\xFD\xAC\x65"
-			  "\x32\xDC\x48\xC4\xF2\x00\x6B\x77"
-			  "\xF1\x7D\x76\xAC\xC0\x31\x63\x2A"
-			  "\xA5\x3A\x62\xC8\x91\xB1\x03\x65"
-			  "\xCB\x43\xD1\x06\xDF\xC3\x67\xBC"
-			  "\xDC\xE0\xCD\x35\xCE\x49\x65\xA0"
-			  "\x52\x7B\xA7\x0D\x07\xA9\x1B\xB0"
-			  "\x40\x77\x72\xC2\xEA\x0E\x3A\x78"
-			  "\x46\xB9\x91\xB6\xE7\x3D\x51\x42"
-			  "\xFD\x51\xB0\xC6\x2C\x63\x13\x78"
-			  "\x5C\xEE\xFC\xCF\xC4\x70\x00\x34",
-		.ilen	= 496,
-		.result	= "\x05\xEC\x77\xFB\x42\xD5\x59\x20"
-			  "\x8B\x12\x86\x69\xF0\x5B\xCF\x56"
-			  "\x39\xAD\x34\x9F\x66\xEA\x7D\xC4"
-			  "\x48\xD3\xBA\x0D\xB1\x18\xE3\x4A"
-			  "\xFE\x41\x28\x5C\x27\x8E\x11\x85"
-			  "\x6C\xF7\x5E\xC2\x55\x3C\xA0\x0B"
-			  "\x92\x65\xE9\x70\xDB\x4F\xD6\xB9"
-			  "\x00\xB4\x1F\xE6\x49\xFD\x44\x2F"
-			  "\x53\x3A\x8D\x14\x98\x63\xCA\x5D"
-			  "\xC1\xA8\x33\xA7\x0E\x91\x78\xEC"
-			  "\x77\xDE\x42\xD5\xBC\x07\x8B\x12"
-			  "\xE5\x4C\xF0\x5B\x22\x56\x39\x80"
-			  "\x6B\x9F\x66\xC9\x50\xC4\xAF\x36"
-			  "\xBA\x0D\x94\x7F\xE3\x4A\xDD\x41"
-			  "\x28\xB3\x1A\x8E\x11\xF8\x43\xF7"
-			  "\x5E\x21\x55\x3C\x87\x6E\x92\x65"
-			  "\xCC\x57\xDB\xA2\x35\xB9\x00\xEB"
-			  "\x72\xE6\x49\xD0\x44\x2F\xB6\x19"
-			  "\x8D\x14\xFF\x46\xCA\x5D\x24\xA8"
-			  "\x33\x9A\x6D\x91\x78\xC3\x77\xDE"
-			  "\xA1\x08\xBC\x07\xEE\x71\xE5\x4C"
-			  "\xD7\x5B\x22\xB5\x1C\x80\x6B\xF2"
-			  "\x45\xC9\x50\x3B\xAF\x36\x99\x60"
-			  "\x94\x7F\xC6\x4A\xDD\xA4\x0F\xB3"
-			  "\x1A\xED\x74\xF8\x43\x2A\x5E\x21"
-			  "\x88\x13\x87\x6E\xF1\x58\xCC\x57"
-			  "\x3E\xA2\x35\x9C\x67\xEB\x72\xC5"
-			  "\x49\xD0\xBB\x02\xB6\x19\xE0\x4B"
-			  "\xFF\x46\x29\x5D\x24\x8F\x16\x9A"
-			  "\x6D\xF4\x5F\xC3\xAA\x3D\xA1\x08"
-			  "\x93\x7A\xEE\x71\xD8\x4C\xD7\xBE"
-			  "\x01\xB5\x1C\xE7\x4E\xF2\x45\x2C"
-			  "\x50\x3B\x82\x15\x99\x60\xCB\x52"
-			  "\xC6\xA9\x30\xA4\x0F\x96\x79\xED"
-			  "\x74\xDF\x43\x2A\xBD\x04\x88\x13"
-			  "\xFA\x4D\xF1\x58\x23\x57\x3E\x81"
-			  "\x68\x9C\x67\xCE\x51\xC5\xAC\x37"
-			  "\xBB\x02\x95\x7C\xE0\x4B\xD2\x46"
-			  "\x29\xB0\x1B\x8F\x16\xF9\x40\xF4"
-			  "\x5F\x26\xAA\x3D\x84\x6F\x93\x7A"
-			  "\xCD\x54\xD8\xA3\x0A\xBE\x01\xE8"
-			  "\x73\xE7\x4E\xD1\x45\x2C\xB7\x1E"
-			  "\x82\x15\xFC\x47\xCB\x52\x25\xA9"
-			  "\x30\x9B\x62\x96\x79\xC0\x74\xDF"
-			  "\xA6\x09\xBD\x04\xEF\x76\xFA\x4D"
-			  "\xD4\x58\x23\x8A\x1D\x81\x68\xF3"
-			  "\x5A\xCE\x51\x38\xAC\x37\x9E\x61"
-			  "\x95\x7C\xC7\x4B\xD2\xA5\x0C\xB0"
-			  "\x1B\xE2\x75\xF9\x40\x2B\x5F\x26"
-			  "\x89\x10\x84\x6F\xF6\x59\xCD\x54"
-			  "\x3F\xA3\x0A\x9D\x64\xE8\x73\xDA"
-			  "\x4E\xD1\xB8\x03\xB7\x1E\xE1\x48"
-			  "\xFC\x47\x2E\x52\x25\x8C\x17\x9B"
-			  "\x62\xF5\x5C\xC0\xAB\x32\xA6\x09"
-			  "\x90\x7B\xEF\x76\xD9\x4D\xD4\xBF"
-			  "\x06\x8A\x1D\xE4\x4F\xF3\x5A\x2D"
-			  "\x51\x38\x83\x6A\x9E\x61\xC8\x53"
-			  "\xC7\xAE\x31\xA5\x0C\x97\x7E\xE2"
-			  "\x75\xDC\x40\x2B\xB2\x05\x89\x10"
-			  "\xFB\x42\xF6\x59\x20\x54\x3F\x86"
-			  "\x69\x9D\x64\xCF\x56\xDA\xAD\x34"
-			  "\xB8\x03\xEA\x7D\xE1\x48\xD3\x47",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	}, { /* Generated with Crypto++ */
-		.key	= "\x9C\xD6\xF3\x9C\xB9\x5A\x67\x00"
-			  "\x5A\x67\x00\x2D\xCE\xEB\x2D\xCE"
-			  "\xEB\xB4\x51\x72\xB4\x51\x72\x1F",
-		.klen	= 24,
-		.iv	= "\xB2\xD7\x48\xED\x06\x44\xF9\x12"
-			  "\xB7\x28\x4D\x83\x24\x59\xF2\x17",
-		.input	= "\x23\xFF\x5C\x99\x75\xBB\x1F\xD4"
-			  "\xBC\x27\x9D\x36\x60\xA9\xC9\xF7"
-			  "\x94\x9D\x1B\xFF\x8E\x95\x57\x89"
-			  "\x8C\x2E\x33\x70\x43\x61\xE6\xD2"
-			  "\x82\x33\x63\xB6\xC4\x34\x5E\xF8"
-			  "\x96\x07\xA7\xD2\x3B\x8E\xC9\xAA"
-			  "\x7C\xA0\x55\x89\x2E\xE1\x85\x25"
-			  "\x14\x04\xDA\x6B\xE0\xEE\x56\xCF"
-			  "\x08\x2E\x69\xD4\x54\xDE\x22\x84"
-			  "\x69\xA6\xA7\xD3\x3A\x9A\xE8\x05"
-			  "\x63\xDB\xBF\x46\x3A\x26\x2E\x0F"
-			  "\x58\x5C\x46\xEA\x07\x40\xDA\xE1"
-			  "\x14\x1D\xCD\x4F\x06\xC0\xCA\x54"
-			  "\x1E\xC9\x45\x85\x67\x7C\xC2\xB5"
-			  "\x97\x5D\x61\x78\x2E\x46\xEC\x6A"
-			  "\x53\xF4\xD0\xAE\xFA\xB4\x86\x29"
-			  "\x9F\x17\x33\x24\xD8\xB9\xB2\x05"
-			  "\x93\x88\xEA\xF7\xA0\x70\x69\x49"
-			  "\x88\x6B\x73\x40\x41\x8D\xD9\xD9"
-			  "\x7E\x78\xE9\xBE\x6C\x14\x22\x7A"
-			  "\x66\xE1\xDA\xED\x10\xFF\x69\x1D"
-			  "\xB9\xAA\xF2\x56\x72\x1B\x23\xE2"
-			  "\x45\x54\x8B\xA3\x70\x23\xB4\x5E"
-			  "\x8E\x96\xC9\x05\x00\xB3\xB6\xC2"
-			  "\x2A\x02\x43\x7A\x62\xD5\xC8\xD2"
-			  "\xC2\xD0\xE4\x78\xA1\x7B\x3E\xE8"
-			  "\x9F\x7F\x7D\x40\x54\x30\x3B\xC0"
-			  "\xA5\x54\xFD\xCA\x25\xEC\x44\x3E"
-			  "\x1A\x54\x7F\x88\xD0\xE1\xFE\x71"
-			  "\xCE\x05\x49\x89\xBA\xD6\x72\xE7"
-			  "\xD6\x5D\x3F\xA2\xD9\xAB\xC5\x02"
-			  "\xD6\x43\x22\xAF\xA2\xE4\x80\x85"
-			  "\xD7\x87\xB9\xEA\x43\xDB\xC8\xEF"
-			  "\x5C\x82\x2E\x98\x0D\x30\x41\x6B"
-			  "\x08\x48\x8D\xF0\xF8\x60\xD7\x9D"
-			  "\xE9\xDE\x40\xAD\x0D\xAD\x0D\x58"
-			  "\x2A\x98\x35\xFE\xF7\xDD\x4B\x40"
-			  "\xDE\xB0\x05\xD9\x7B\x09\x4D\xBC"
-			  "\x42\xC0\xF1\x15\x0B\xFA\x26\x6B"
-			  "\xC6\x12\x13\x4F\xCB\x35\xBA\x35"
-			  "\xDD\x7A\x36\x9C\x12\x57\x55\x83"
-			  "\x78\x58\x09\xD0\xB0\xCF\x7C\x5C"
-			  "\x38\xCF\xBD\x79\x5B\x13\x4D\x97"
-			  "\xC1\x85\x6F\x97\xC9\xE8\xC2\xA4"
-			  "\x98\xE2\xBD\x77\x6B\x53\x39\x1A"
-			  "\x28\x10\xE7\xE0\xE7\xDE\x9D\x69"
-			  "\x78\x6F\x8E\xD2\xD9\x5D\xD2\x15"
-			  "\x9E\xB5\x4D\x8C\xC0\x78\x22\x2F"
-			  "\x17\x11\x2E\x99\xD7\xE3\xA4\x4F"
-			  "\x65\xA5\x6B\x03\x2C\x35\x6F\xDA"
-			  "\x8A\x19\x08\xE1\x08\x48\x59\x51"
-			  "\x53\x4B\xD1\xDF\xDA\x14\x50\x5F"
-			  "\xDF\xB5\x8C\xDF\xC6\xFD\x85\xFA"
-			  "\xD4\xF9\x64\x45\x65\x0D\x7D\xF4"
-			  "\xC8\xCD\x3F\x32\xAF\xDD\x30\xED"
-			  "\x7B\xAA\xAC\xF0\xDA\x7F\xDF\x75"
-			  "\x1C\xA4\xF1\xCB\x5E\x4F\x0B\xB4"
-			  "\x97\x73\x28\xDE\xCF\xAF\x82\xBD"
-			  "\xC4\xBA\xB4\x9C\x0D\x16\x77\x42"
-			  "\x42\x39\x7C\x53\xA4\xD4\xDD\x40"
-			  "\x5C\x60\x1F\x6E\xA7\xE2\xDC\xE7"
-			  "\x32\x0F\x05\x2F\xF2\x4C\x95\x3B"
-			  "\xF2\x79\xD9",
-		.ilen	= 499,
-		.result	= "\x05\xEC\x77\xFB\x42\xD5\x59\x20"
-			  "\x8B\x12\x86\x69\xF0\x5B\xCF\x56"
-			  "\x39\xAD\x34\x9F\x66\xEA\x7D\xC4"
-			  "\x48\xD3\xBA\x0D\xB1\x18\xE3\x4A"
-			  "\xFE\x41\x28\x5C\x27\x8E\x11\x85"
-			  "\x6C\xF7\x5E\xC2\x55\x3C\xA0\x0B"
-			  "\x92\x65\xE9\x70\xDB\x4F\xD6\xB9"
-			  "\x00\xB4\x1F\xE6\x49\xFD\x44\x2F"
-			  "\x53\x3A\x8D\x14\x98\x63\xCA\x5D"
-			  "\xC1\xA8\x33\xA7\x0E\x91\x78\xEC"
-			  "\x77\xDE\x42\xD5\xBC\x07\x8B\x12"
-			  "\xE5\x4C\xF0\x5B\x22\x56\x39\x80"
-			  "\x6B\x9F\x66\xC9\x50\xC4\xAF\x36"
-			  "\xBA\x0D\x94\x7F\xE3\x4A\xDD\x41"
-			  "\x28\xB3\x1A\x8E\x11\xF8\x43\xF7"
-			  "\x5E\x21\x55\x3C\x87\x6E\x92\x65"
-			  "\xCC\x57\xDB\xA2\x35\xB9\x00\xEB"
-			  "\x72\xE6\x49\xD0\x44\x2F\xB6\x19"
-			  "\x8D\x14\xFF\x46\xCA\x5D\x24\xA8"
-			  "\x33\x9A\x6D\x91\x78\xC3\x77\xDE"
-			  "\xA1\x08\xBC\x07\xEE\x71\xE5\x4C"
-			  "\xD7\x5B\x22\xB5\x1C\x80\x6B\xF2"
-			  "\x45\xC9\x50\x3B\xAF\x36\x99\x60"
-			  "\x94\x7F\xC6\x4A\xDD\xA4\x0F\xB3"
-			  "\x1A\xED\x74\xF8\x43\x2A\x5E\x21"
-			  "\x88\x13\x87\x6E\xF1\x58\xCC\x57"
-			  "\x3E\xA2\x35\x9C\x67\xEB\x72\xC5"
-			  "\x49\xD0\xBB\x02\xB6\x19\xE0\x4B"
-			  "\xFF\x46\x29\x5D\x24\x8F\x16\x9A"
-			  "\x6D\xF4\x5F\xC3\xAA\x3D\xA1\x08"
-			  "\x93\x7A\xEE\x71\xD8\x4C\xD7\xBE"
-			  "\x01\xB5\x1C\xE7\x4E\xF2\x45\x2C"
-			  "\x50\x3B\x82\x15\x99\x60\xCB\x52"
-			  "\xC6\xA9\x30\xA4\x0F\x96\x79\xED"
-			  "\x74\xDF\x43\x2A\xBD\x04\x88\x13"
-			  "\xFA\x4D\xF1\x58\x23\x57\x3E\x81"
-			  "\x68\x9C\x67\xCE\x51\xC5\xAC\x37"
-			  "\xBB\x02\x95\x7C\xE0\x4B\xD2\x46"
-			  "\x29\xB0\x1B\x8F\x16\xF9\x40\xF4"
-			  "\x5F\x26\xAA\x3D\x84\x6F\x93\x7A"
-			  "\xCD\x54\xD8\xA3\x0A\xBE\x01\xE8"
-			  "\x73\xE7\x4E\xD1\x45\x2C\xB7\x1E"
-			  "\x82\x15\xFC\x47\xCB\x52\x25\xA9"
-			  "\x30\x9B\x62\x96\x79\xC0\x74\xDF"
-			  "\xA6\x09\xBD\x04\xEF\x76\xFA\x4D"
-			  "\xD4\x58\x23\x8A\x1D\x81\x68\xF3"
-			  "\x5A\xCE\x51\x38\xAC\x37\x9E\x61"
-			  "\x95\x7C\xC7\x4B\xD2\xA5\x0C\xB0"
-			  "\x1B\xE2\x75\xF9\x40\x2B\x5F\x26"
-			  "\x89\x10\x84\x6F\xF6\x59\xCD\x54"
-			  "\x3F\xA3\x0A\x9D\x64\xE8\x73\xDA"
-			  "\x4E\xD1\xB8\x03\xB7\x1E\xE1\x48"
-			  "\xFC\x47\x2E\x52\x25\x8C\x17\x9B"
-			  "\x62\xF5\x5C\xC0\xAB\x32\xA6\x09"
-			  "\x90\x7B\xEF\x76\xD9\x4D\xD4\xBF"
-			  "\x06\x8A\x1D\xE4\x4F\xF3\x5A\x2D"
-			  "\x51\x38\x83\x6A\x9E\x61\xC8\x53"
-			  "\xC7\xAE\x31\xA5\x0C\x97\x7E\xE2"
-			  "\x75\xDC\x40\x2B\xB2\x05\x89\x10"
-			  "\xFB\x42\xF6\x59\x20\x54\x3F\x86"
-			  "\x69\x9D\x64\xCF\x56\xDA\xAD\x34"
-			  "\xB8\x03\xEA\x7D\xE1\x48\xD3\x47"
-			  "\x2E\xB1\x18",
-		.rlen	= 499,
+		.len	= 499,
 		.also_non_np = 1,
 		.np	= 2,
 		.tap	= { 499 - 16, 16 },
@@ -7602,45 +6609,40 @@
 /*
  * Blowfish test vectors.
  */
-static const struct cipher_testvec bf_enc_tv_template[] = {
+static const struct cipher_testvec bf_tv_template[] = {
 	{ /* DES test vectors from OpenSSL */
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 8,
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 8,
-		.result	= "\x4e\xf9\x97\x45\x61\x98\xdd\x78",
-		.rlen	= 8,
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ctext	= "\x4e\xf9\x97\x45\x61\x98\xdd\x78",
+		.len	= 8,
 	}, {
 		.key	= "\x1f\x1f\x1f\x1f\x0e\x0e\x0e\x0e",
 		.klen	= 8,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.ilen	= 8,
-		.result	= "\xa7\x90\x79\x51\x08\xea\x3c\xae",
-		.rlen	= 8,
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.ctext	= "\xa7\x90\x79\x51\x08\xea\x3c\xae",
+		.len	= 8,
 	}, {
 		.key	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87",
 		.klen	= 8,
-		.input	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.ilen	= 8,
-		.result	= "\xe8\x7a\x24\x4e\x2c\xc8\x5e\x82",
-		.rlen	= 8,
+		.ptext	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+		.ctext	= "\xe8\x7a\x24\x4e\x2c\xc8\x5e\x82",
+		.len	= 8,
 	}, { /* Vary the keylength... */
 		.key	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87"
 			  "\x78\x69\x5a\x4b\x3c\x2d\x1e\x0f",
 		.klen	= 16,
-		.input	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.ilen	= 8,
-		.result	= "\x93\x14\x28\x87\xee\x3b\xe1\x5c",
-		.rlen	= 8,
+		.ptext	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+		.ctext	= "\x93\x14\x28\x87\xee\x3b\xe1\x5c",
+		.len	= 8,
 	}, {
 		.key	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87"
 			  "\x78\x69\x5a\x4b\x3c\x2d\x1e\x0f"
 			  "\x00\x11\x22\x33\x44",
 		.klen	= 21,
-		.input	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.ilen	= 8,
-		.result	= "\xe6\xf5\x1e\xd7\x9b\x9d\xb2\x1f",
-		.rlen	= 8,
+		.ptext	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+		.ctext	= "\xe6\xf5\x1e\xd7\x9b\x9d\xb2\x1f",
+		.len	= 8,
 	}, { /* Generated with bf488 */
 		.key	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87"
 			  "\x78\x69\x5a\x4b\x3c\x2d\x1e\x0f"
@@ -7650,17 +6652,16 @@
 			  "\x1f\x1f\x1f\x1f\x0e\x0e\x0e\x0e"
 			  "\xff\xff\xff\xff\xff\xff\xff\xff",
 		.klen	= 56,
-		.input	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.ilen	= 8,
-		.result	= "\xc0\x45\x04\x01\x2e\x4e\x1f\x53",
-		.rlen	= 8,
+		.ptext	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
+		.ctext	= "\xc0\x45\x04\x01\x2e\x4e\x1f\x53",
+		.len	= 8,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
 			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
 			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
 		.klen	= 32,
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -7723,8 +6724,7 @@
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
 			  "\x2B\xC2\x59\xF0\x64\xFB\x92\x06",
-		.ilen	= 504,
-		.result	= "\x96\x87\x3D\x0C\x7B\xFB\xBD\x1F"
+		.ctext	= "\x96\x87\x3D\x0C\x7B\xFB\xBD\x1F"
 			  "\xE3\xC1\x99\x6D\x39\xD4\xC2\x7D"
 			  "\xD7\x87\xA1\xF2\xDF\x51\x71\x26"
 			  "\xC2\xF4\x6D\xFF\xF6\xCD\x6B\x40"
@@ -7787,221 +6787,28 @@
 			  "\x54\x59\x09\xA5\x3D\xC5\x84\x68"
 			  "\x56\xEB\x36\x77\x3D\xAA\xB8\xF5"
 			  "\xC9\x1A\xFB\x5D\xDE\xBB\x43\xF4",
-		.rlen	= 504,
+		.len	= 504,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 504 - 10, 2, 8 },
 	},
 };
 
-static const struct cipher_testvec bf_dec_tv_template[] = {
-	{ /* DES test vectors from OpenSSL */
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 8,
-		.input	= "\x4e\xf9\x97\x45\x61\x98\xdd\x78",
-		.ilen	= 8,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 8,
-	}, {
-		.key	= "\x1f\x1f\x1f\x1f\x0e\x0e\x0e\x0e",
-		.klen	= 8,
-		.input	= "\xa7\x90\x79\x51\x08\xea\x3c\xae",
-		.ilen	= 8,
-		.result	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.rlen	= 8,
-	}, {
-		.key	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87",
-		.klen	= 8,
-		.input	= "\xe8\x7a\x24\x4e\x2c\xc8\x5e\x82",
-		.ilen	= 8,
-		.result	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.rlen	= 8,
-	}, { /* Vary the keylength... */
-		.key	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87"
-			  "\x78\x69\x5a\x4b\x3c\x2d\x1e\x0f",
-		.klen	= 16,
-		.input	= "\x93\x14\x28\x87\xee\x3b\xe1\x5c",
-		.ilen	= 8,
-		.result	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.rlen	= 8,
-	}, {
-		.key	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87"
-			  "\x78\x69\x5a\x4b\x3c\x2d\x1e\x0f"
-			  "\x00\x11\x22\x33\x44",
-		.klen	= 21,
-		.input	= "\xe6\xf5\x1e\xd7\x9b\x9d\xb2\x1f",
-		.ilen	= 8,
-		.result	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.rlen	= 8,
-	}, { /* Generated with bf488, using OpenSSL, Libgcrypt and Nettle */
-		.key	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87"
-			  "\x78\x69\x5a\x4b\x3c\x2d\x1e\x0f"
-			  "\x00\x11\x22\x33\x44\x55\x66\x77"
-			  "\x04\x68\x91\x04\xc2\xfd\x3b\x2f"
-			  "\x58\x40\x23\x64\x1a\xba\x61\x76"
-			  "\x1f\x1f\x1f\x1f\x0e\x0e\x0e\x0e"
-			  "\xff\xff\xff\xff\xff\xff\xff\xff",
-		.klen	= 56,
-		.input	= "\xc0\x45\x04\x01\x2e\x4e\x1f\x53",
-		.ilen	= 8,
-		.result	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.rlen	= 8,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.input	= "\x96\x87\x3D\x0C\x7B\xFB\xBD\x1F"
-			  "\xE3\xC1\x99\x6D\x39\xD4\xC2\x7D"
-			  "\xD7\x87\xA1\xF2\xDF\x51\x71\x26"
-			  "\xC2\xF4\x6D\xFF\xF6\xCD\x6B\x40"
-			  "\xE1\xB3\xBF\xD4\x38\x2B\xC8\x3B"
-			  "\xD3\xB2\xD4\x61\xC7\x9F\x06\xE9"
-			  "\xCD\xF3\x88\x39\x39\x7A\xDF\x19"
-			  "\xE8\x03\x2A\x0B\x9E\xA0\x2B\x86"
-			  "\x31\xF8\x9D\xB1\xEE\x78\x9D\xB5"
-			  "\xCD\x8B\x7C\x2E\xF5\xA2\x2D\x5D"
-			  "\x6E\x66\xAF\x38\x6C\xD3\x13\xED"
-			  "\x14\xEA\x5D\xD0\x17\x77\x0F\x4A"
-			  "\x50\xF2\xD0\x0F\xC8\xF7\x1E\x7B"
-			  "\x9D\x5B\x54\x65\x4F\x16\x8A\x97"
-			  "\xF3\xF6\xD4\xAA\x87\x36\x77\x72"
-			  "\x99\x4A\xB5\x5E\x88\xC3\xCD\x7D"
-			  "\x1D\x97\xF9\x11\xBD\xE0\x1F\x1F"
-			  "\x96\x3E\x4B\x22\xF4\xC0\xE6\xB8"
-			  "\x47\x82\x98\x23\x33\x36\xBC\x1B"
-			  "\x36\xE7\xF6\xCF\x97\x37\x16\xC0"
-			  "\x87\x31\x8B\xB0\xDB\x19\x42\xA5"
-			  "\x1F\x90\x7E\x66\x34\xDD\x5E\xE9"
-			  "\x4F\xB2\x2B\x9A\xDE\xB3\x5D\x71"
-			  "\x4D\x68\xF0\xDC\xA6\xEA\xE3\x9B"
-			  "\x60\x00\x55\x57\x06\x8B\xD5\xB3"
-			  "\x86\x30\x78\xDA\x33\x9A\x9D\xCC"
-			  "\xBA\x0B\x81\x06\x77\x43\xC7\xC9"
-			  "\xDB\x37\x60\x11\x45\x59\x6D\x2D"
-			  "\x90\x3D\x65\x3E\xD0\x13\xC6\x3C"
-			  "\x0E\x78\x7D\x9A\x00\xD6\x2F\x0B"
-			  "\x3B\x53\x19\x1E\xA8\x9B\x11\xD9"
-			  "\x98\xE4\x7F\xC3\x6E\x51\x24\x70"
-			  "\x9F\x04\x9C\xC2\x9E\x44\x84\xE3"
-			  "\xE0\x8A\x44\xA2\x5C\x94\x74\x34"
-			  "\x37\x52\x7C\x03\xE8\x8E\x97\xE1"
-			  "\x5B\x5C\x0E\xB0\x70\xFE\x54\x3F"
-			  "\xD8\x65\xA9\xC5\xCD\xEC\xF4\x45"
-			  "\x55\xC5\xA7\xA3\x19\x80\x28\x51"
-			  "\xBE\x64\x4A\xC1\xD4\xE1\xBE\xEB"
-			  "\x73\x4C\xB6\xF9\x5F\x6D\x82\xBC"
-			  "\x3E\x42\x14\x49\x88\x51\xBF\x68"
-			  "\x45\x75\x27\x1B\x0A\x72\xED\xAF"
-			  "\xDA\xC4\x4D\x67\x0D\xEE\x75\xE3"
-			  "\x34\xDD\x91\x19\x42\x3A\xCB\xDA"
-			  "\x38\xFA\x3C\x93\x62\xF2\xE3\x81"
-			  "\xB3\xE4\xBB\xF6\x0D\x0B\x1D\x09"
-			  "\x9C\x52\x0D\x50\x63\xA4\xB2\xD2"
-			  "\x82\xA0\x23\x3F\x1F\xB6\xED\x6E"
-			  "\xC2\x9C\x1C\xD0\x9A\x40\xB6\xFC"
-			  "\x36\x56\x6E\x85\x73\xD7\x52\xBA"
-			  "\x35\x5E\x32\x89\x5D\x42\xF5\x36"
-			  "\x52\x8D\x46\x7D\xC8\x71\xAD\x33"
-			  "\xE1\xAF\x6A\xA8\xEC\xBA\x1C\xDC"
-			  "\xFE\x88\xE6\x16\xE4\xC8\x13\x00"
-			  "\x3C\xDA\x59\x32\x38\x19\xD5\xEB"
-			  "\xB6\x7F\x78\x45\x1B\x8E\x07\x8C"
-			  "\x66\x52\x75\xFF\xAF\xCE\x2D\x2B"
-			  "\x22\x29\xCA\xB3\x5F\x7F\xE3\x29"
-			  "\xB2\xB8\x9D\xEB\x16\xC8\xC5\x1D"
-			  "\xC9\x0D\x59\x82\x27\x57\x9D\x42"
-			  "\x54\x59\x09\xA5\x3D\xC5\x84\x68"
-			  "\x56\xEB\x36\x77\x3D\xAA\xB8\xF5"
-			  "\xC9\x1A\xFB\x5D\xDE\xBB\x43\xF4",
-		.ilen	= 504,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59\xF0\x64\xFB\x92\x06",
-		.rlen	= 504,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 504 - 10, 2, 8 },
-	},
-};
-
-static const struct cipher_testvec bf_cbc_enc_tv_template[] = {
+static const struct cipher_testvec bf_cbc_tv_template[] = {
 	{ /* From OpenSSL */
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
 			  "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87",
 		.klen	= 16,
 		.iv	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.input	= "\x37\x36\x35\x34\x33\x32\x31\x20"
+		.ptext	= "\x37\x36\x35\x34\x33\x32\x31\x20"
 			  "\x4e\x6f\x77\x20\x69\x73\x20\x74"
 			  "\x68\x65\x20\x74\x69\x6d\x65\x20"
 			  "\x66\x6f\x72\x20\x00\x00\x00\x00",
-		.ilen	= 32,
-		.result	= "\x6b\x77\xb4\xd6\x30\x06\xde\xe6"
+		.ctext	= "\x6b\x77\xb4\xd6\x30\x06\xde\xe6"
 			  "\x05\xb1\x56\xe2\x74\x03\x97\x93"
 			  "\x58\xde\xb9\xe7\x15\x46\x16\xd9"
 			  "\x59\xf1\x65\x2b\xd5\xff\x92\xcc",
-		.rlen	= 32,
+		.len	= 32,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -8009,7 +6816,7 @@
 			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -8072,8 +6879,7 @@
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
 			  "\x2B\xC2\x59\xF0\x64\xFB\x92\x06",
-		.ilen	= 504,
-		.result	= "\xB4\xFE\xA5\xBB\x3D\x2C\x27\x06"
+		.ctext	= "\xB4\xFE\xA5\xBB\x3D\x2C\x27\x06"
 			  "\x06\x2B\x3A\x92\xB2\xF5\x5E\x62"
 			  "\x84\xCD\xF7\x66\x7E\x41\x6C\x8E"
 			  "\x1B\xD9\x02\xB6\x48\xB0\x87\x25"
@@ -8136,171 +6942,14 @@
 			  "\xCD\xE9\xD5\x0C\xFE\x12\x39\xA9"
 			  "\x93\x9B\xEE\xB5\x97\x41\xD2\xA0"
 			  "\xB4\x98\xD8\x6B\x74\xE7\x65\xF4",
-		.rlen	= 504,
+		.len	= 504,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 504 - 10, 2, 8 },
 	},
 };
 
-static const struct cipher_testvec bf_cbc_dec_tv_template[] = {
-	{ /* From OpenSSL */
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
-			  "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87",
-		.klen	= 16,
-		.iv	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.input	= "\x6b\x77\xb4\xd6\x30\x06\xde\xe6"
-			  "\x05\xb1\x56\xe2\x74\x03\x97\x93"
-			  "\x58\xde\xb9\xe7\x15\x46\x16\xd9"
-			  "\x59\xf1\x65\x2b\xd5\xff\x92\xcc",
-		.ilen	= 32,
-		.result	= "\x37\x36\x35\x34\x33\x32\x31\x20"
-			  "\x4e\x6f\x77\x20\x69\x73\x20\x74"
-			  "\x68\x65\x20\x74\x69\x6d\x65\x20"
-			  "\x66\x6f\x72\x20\x00\x00\x00\x00",
-		.rlen	= 32,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\xB4\xFE\xA5\xBB\x3D\x2C\x27\x06"
-			  "\x06\x2B\x3A\x92\xB2\xF5\x5E\x62"
-			  "\x84\xCD\xF7\x66\x7E\x41\x6C\x8E"
-			  "\x1B\xD9\x02\xB6\x48\xB0\x87\x25"
-			  "\x01\x9C\x93\x63\x51\x60\x82\xD2"
-			  "\x4D\xE5\xC2\xB7\xAE\x60\xD8\xAD"
-			  "\x9F\xAB\x6C\xFA\x20\x05\xDA\x6F"
-			  "\x1F\xD1\xD8\x36\x0F\xB5\x16\x69"
-			  "\x3C\xAF\xB3\x30\x18\x33\xE6\xB5"
-			  "\x43\x29\x9D\x94\xF4\x2F\x0A\x65"
-			  "\x40\xB2\xB2\xB2\x42\x89\xEE\x8A"
-			  "\x60\xD3\x52\xA8\xED\x91\xDF\xE1"
-			  "\x91\x73\x7C\x28\xA1\x14\xC3\x4C"
-			  "\x82\x72\x4B\x7D\x7D\x32\xD5\x19"
-			  "\xE8\xB8\x6B\x30\x21\x09\x0E\x27"
-			  "\x10\x9D\x2D\x3A\x6A\x4B\x7B\xE6"
-			  "\x8D\x4E\x02\x32\xFF\x7F\x8E\x13"
-			  "\xB0\x96\xF4\xC2\xA1\x60\x8A\x69"
-			  "\xEF\x0F\x86\xD0\x25\x13\x1A\x7C"
-			  "\x6E\xF0\x41\xA3\xFB\xB3\xAB\x40"
-			  "\x7D\x19\xA0\x11\x4F\x3E\x1D\x43"
-			  "\x65\xFE\x15\x40\xD0\x62\x41\x02"
-			  "\xEA\x0C\x7A\xC3\x84\xEE\xB0\xBE"
-			  "\xBE\xC8\x57\x51\xCD\x4F\xAD\x5C"
-			  "\xCC\x79\xBA\x0D\x85\x3A\xED\x6B"
-			  "\xAC\x6B\xA3\x4D\xBC\xE8\x02\x6A"
-			  "\xC2\x6D\xBD\x5E\x89\x95\x86\x43"
-			  "\x2C\x17\x4B\xC6\x40\xA2\xBD\x24"
-			  "\x04\xF0\x86\x08\x78\x18\x42\xE0"
-			  "\x39\x1B\x22\x9E\x89\x4C\x04\x6B"
-			  "\x65\xC5\xB6\x0E\xF6\x63\xFC\xD7"
-			  "\xAE\x9E\x87\x13\xCC\xD3\x1A\xEC"
-			  "\xF0\x51\xCC\x93\x68\xFC\xE9\x19"
-			  "\x7C\x4E\x9B\xCC\x17\xAD\xD2\xFC"
-			  "\x97\x18\x92\xFF\x15\x11\xCE\xED"
-			  "\x04\x41\x05\xA3\x92\xFF\x3B\xE6"
-			  "\xB6\x8C\x90\xC6\xCD\x15\xA0\x04"
-			  "\x25\x8B\x5D\x5B\x5F\xDB\xAE\x68"
-			  "\xEF\xB3\x61\x18\xDB\x83\x9B\x39"
-			  "\xCA\x82\xD1\x88\xF0\xA2\x5C\x02"
-			  "\x87\xBD\x8D\x8F\xBB\x62\xF0\x35"
-			  "\x75\x6F\x06\x81\x0A\x97\x4D\xF0"
-			  "\x43\x12\x73\x77\xDB\x91\x83\x5B"
-			  "\xE7\x3A\xA6\x07\x7B\xBF\x2C\x50"
-			  "\x94\xDE\x7B\x65\xDA\x1C\xF1\x9F"
-			  "\x7E\x12\x40\xB2\x3E\x19\x23\xF1"
-			  "\x7C\x1B\x5F\xA8\xF3\xAC\x63\x87"
-			  "\xEB\x3E\x0C\xBE\xA3\x63\x97\x88"
-			  "\x8D\x27\xC6\x2A\xF8\xF2\x67\x9A"
-			  "\x0D\x14\x16\x2B\x6F\xCB\xD4\x76"
-			  "\x14\x48\x2E\xDE\x2A\x44\x5E\x45"
-			  "\xF1\x97\x82\xEF\xB7\xAE\xED\x3A"
-			  "\xED\x73\xD3\x79\xF7\x38\x1D\xD0"
-			  "\xC5\xF8\x69\x83\x28\x84\x87\x56"
-			  "\x3F\xAE\x81\x04\x79\x1F\xD1\x09"
-			  "\xC5\xE5\x05\x0D\x64\x16\xCE\x42"
-			  "\xC5\xF8\xDB\x57\x89\x33\x22\xFC"
-			  "\xB4\xD7\x94\xB9\xF3\xCC\x02\x90"
-			  "\x02\xBA\x55\x1E\x24\x3E\x02\x1D"
-			  "\xC6\xCD\x8F\xD9\xBD\xED\xB0\x51"
-			  "\xCD\xE9\xD5\x0C\xFE\x12\x39\xA9"
-			  "\x93\x9B\xEE\xB5\x97\x41\xD2\xA0"
-			  "\xB4\x98\xD8\x6B\x74\xE7\x65\xF4",
-		.ilen	= 504,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59\xF0\x64\xFB\x92\x06",
-		.rlen	= 504,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 504 - 10, 2, 8 },
-	},
-};
-
-static const struct cipher_testvec bf_ctr_enc_tv_template[] = {
+static const struct cipher_testvec bf_ctr_tv_template[] = {
 	{ /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -8308,7 +6957,7 @@
 			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -8371,8 +7020,7 @@
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
 			  "\x2B\xC2\x59\xF0\x64\xFB\x92\x06",
-		.ilen	= 504,
-		.result	= "\xC7\xA3\xDF\xB9\x05\xF4\x9E\x8D"
+		.ctext	= "\xC7\xA3\xDF\xB9\x05\xF4\x9E\x8D"
 			  "\x9E\xDF\x38\x18\x83\x07\xEF\xC1"
 			  "\x93\x3C\xAA\xAA\xFE\x06\x42\xCC"
 			  "\x0D\x70\x86\x5A\x44\xAD\x85\x17"
@@ -8435,7 +7083,7 @@
 			  "\x64\xBB\x15\xB8\x56\xCF\xEE\xE5"
 			  "\x32\x44\x96\x1C\xD8\xEB\x95\xD2"
 			  "\xF3\x71\xEF\xEB\x4E\xBB\x4D\x29",
-		.rlen	= 504,
+		.len	= 504,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -8443,7 +7091,7 @@
 			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -8506,8 +7154,7 @@
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
 			  "\x2B\xC2\x59\xF0\x64\xFB\x92",
-		.ilen	= 503,
-		.result	= "\xC7\xA3\xDF\xB9\x05\xF4\x9E\x8D"
+		.ctext	= "\xC7\xA3\xDF\xB9\x05\xF4\x9E\x8D"
 			  "\x9E\xDF\x38\x18\x83\x07\xEF\xC1"
 			  "\x93\x3C\xAA\xAA\xFE\x06\x42\xCC"
 			  "\x0D\x70\x86\x5A\x44\xAD\x85\x17"
@@ -8570,7 +7217,7 @@
 			  "\x64\xBB\x15\xB8\x56\xCF\xEE\xE5"
 			  "\x32\x44\x96\x1C\xD8\xEB\x95\xD2"
 			  "\xF3\x71\xEF\xEB\x4E\xBB\x4D",
-		.rlen	= 503,
+		.len	= 503,
 		.also_non_np = 1,
 		.np	= 2,
 		.tap	= { 503 - 8, 8 },
@@ -8581,7 +7228,7 @@
 			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
 		.klen	= 32,
 		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -8644,8 +7291,7 @@
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
 			  "\x2B\xC2\x59\xF0\x64\xFB\x92\x06",
-		.ilen	= 504,
-		.result	= "\x5F\x58\x6E\x60\x51\x6E\xDC\x3D"
+		.ctext	= "\x5F\x58\x6E\x60\x51\x6E\xDC\x3D"
 			  "\xD1\xBB\xF7\xB7\xFD\x04\x44\x82"
 			  "\xDC\x9F\x4B\x02\xF1\xD2\x5A\x6F"
 			  "\x25\xF9\x27\x21\xF2\xD2\x9A\x01"
@@ -8708,462 +7354,47 @@
 			  "\xB5\x77\xBA\x1C\xEE\x71\xFA\xB0"
 			  "\x16\x4C\x18\x6B\xF2\x69\xA0\x07"
 			  "\xEF\xBE\xEC\x69\xAC\xA8\x63\x9E",
-		.rlen	= 504,
-	},
-};
-
-static const struct cipher_testvec bf_ctr_dec_tv_template[] = {
-	{ /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\xC7\xA3\xDF\xB9\x05\xF4\x9E\x8D"
-			  "\x9E\xDF\x38\x18\x83\x07\xEF\xC1"
-			  "\x93\x3C\xAA\xAA\xFE\x06\x42\xCC"
-			  "\x0D\x70\x86\x5A\x44\xAD\x85\x17"
-			  "\xE4\x1F\x5E\xA5\x89\xAC\x32\xBC"
-			  "\x3D\xA7\xE9\x0A\x5C\x70\x4D\xDE"
-			  "\x99\x38\x07\xCA\x1D\x21\xC1\x11"
-			  "\x97\xEB\x98\x75\xC4\x73\x45\x83"
-			  "\x46\x1C\x9C\x91\x87\xC1\xA0\x56"
-			  "\x98\xA1\x8B\xDB\x22\x76\xBD\x62"
-			  "\xA4\xBC\xE8\x86\xDA\xD2\x51\x13"
-			  "\x13\xD2\x96\x68\x69\x10\x67\x0C"
-			  "\xD0\x17\x25\x7C\xB2\xAE\x4F\x93"
-			  "\xA6\x82\x20\xCF\x0F\xA6\x47\x79"
-			  "\x88\x09\x40\x59\xBD\x12\x64\xB5"
-			  "\x19\x38\x0D\xFF\x86\xD9\x42\x20"
-			  "\x81\x0D\x96\x99\xAF\x22\x1F\x94"
-			  "\x5C\x6E\xEC\xEA\xA3\x39\xCB\x09"
-			  "\x43\x19\x7F\xD0\xBB\x10\xC2\x49"
-			  "\xF7\xE9\xF2\xEE\xBF\xF7\xF8\xB3"
-			  "\x0E\x1A\xF1\x8D\x70\x82\x0C\x04"
-			  "\xFD\x29\x1A\xAC\xC0\x92\x48\x34"
-			  "\x6A\xE3\x1D\x4F\xFC\x1C\x72\x6A"
-			  "\x57\xCB\xAD\xD0\x98\xAB\xB1\x01"
-			  "\x03\x6A\x45\xDD\x07\x71\x5F\x5B"
-			  "\xB5\x4A\xE4\xE5\xB9\xB9\xBC\xAC"
-			  "\x44\xF7\x41\xA4\x5F\x2E\xE9\x28"
-			  "\xE3\x05\xD2\x94\x78\x4C\x33\x1B"
-			  "\xBD\xC1\x6E\x51\xD9\xAD\xD9\x86"
-			  "\x15\x4A\x78\xAE\x7B\xAD\x3B\xBC"
-			  "\x2F\xE0\x0E\xC5\x7B\x54\x97\x5F"
-			  "\x60\x51\x14\x65\xF9\x91\xE9\xDA"
-			  "\x9A\xBC\xFC\x19\x29\x67\xAA\x63"
-			  "\x5E\xF2\x48\x88\xEB\x79\xE1\xE4"
-			  "\xF7\xF6\x4C\xA9\xE2\x8C\x3B\xE0"
-			  "\xED\x52\xAE\x90\x8F\x5B\x98\x34"
-			  "\x29\x94\x34\x7F\xF9\x6C\x1E\xB6"
-			  "\xA4\xE7\x2D\x06\x54\x9D\xC3\x02"
-			  "\xC1\x90\xA4\x72\x31\x6B\x24\x51"
-			  "\x0B\xB3\x7C\x63\x15\xBA\xAF\x5D"
-			  "\x41\xE0\x37\x6D\xBE\x41\x58\xDE"
-			  "\xF2\x07\x62\x99\xBE\xC1\x8C\x0F"
-			  "\x0F\x28\xFB\x8F\x0E\x1D\x91\xE2"
-			  "\xDA\x99\x5C\x49\xBA\x9C\xA8\x86"
-			  "\x82\x63\x11\xB3\x54\x49\x00\x08"
-			  "\x07\xF2\xE8\x1F\x34\x49\x61\xF4"
-			  "\x81\xE9\xF6\xA9\x5A\x28\x60\x1F"
-			  "\x66\x99\x08\x06\xF2\xE8\x2D\xD1"
-			  "\xD0\x67\xBA\x32\x1F\x02\x86\x7B"
-			  "\xFB\x79\x3D\xC5\xB1\x7F\x15\xAF"
-			  "\xD7\xBF\x31\x46\x22\x7F\xAE\x5B"
-			  "\x8B\x95\x47\xC2\xB1\x62\xA1\xCE"
-			  "\x52\xAC\x9C\x8B\xC2\x49\x7F\xBC"
-			  "\x9C\x89\xB8\xB6\xCA\xE3\x8F\xEA"
-			  "\xAC\xB4\x5D\xE4\x50\xDC\x3A\xB5"
-			  "\x91\x04\x94\x99\x03\x3B\x42\x6D"
-			  "\x9C\x4A\x02\xF5\xB5\x38\x98\xA8"
-			  "\x5C\x97\x2E\x4D\x79\x67\x71\xAF"
-			  "\xF0\x70\x77\xFF\x2D\xDA\xA0\x9E"
-			  "\x23\x8D\xD6\xA6\x68\x10\x78\x9A"
-			  "\x64\xBB\x15\xB8\x56\xCF\xEE\xE5"
-			  "\x32\x44\x96\x1C\xD8\xEB\x95\xD2"
-			  "\xF3\x71\xEF\xEB\x4E\xBB\x4D\x29",
-		.ilen	= 504,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59\xF0\x64\xFB\x92\x06",
-		.rlen	= 504,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\xC7\xA3\xDF\xB9\x05\xF4\x9E\x8D"
-			  "\x9E\xDF\x38\x18\x83\x07\xEF\xC1"
-			  "\x93\x3C\xAA\xAA\xFE\x06\x42\xCC"
-			  "\x0D\x70\x86\x5A\x44\xAD\x85\x17"
-			  "\xE4\x1F\x5E\xA5\x89\xAC\x32\xBC"
-			  "\x3D\xA7\xE9\x0A\x5C\x70\x4D\xDE"
-			  "\x99\x38\x07\xCA\x1D\x21\xC1\x11"
-			  "\x97\xEB\x98\x75\xC4\x73\x45\x83"
-			  "\x46\x1C\x9C\x91\x87\xC1\xA0\x56"
-			  "\x98\xA1\x8B\xDB\x22\x76\xBD\x62"
-			  "\xA4\xBC\xE8\x86\xDA\xD2\x51\x13"
-			  "\x13\xD2\x96\x68\x69\x10\x67\x0C"
-			  "\xD0\x17\x25\x7C\xB2\xAE\x4F\x93"
-			  "\xA6\x82\x20\xCF\x0F\xA6\x47\x79"
-			  "\x88\x09\x40\x59\xBD\x12\x64\xB5"
-			  "\x19\x38\x0D\xFF\x86\xD9\x42\x20"
-			  "\x81\x0D\x96\x99\xAF\x22\x1F\x94"
-			  "\x5C\x6E\xEC\xEA\xA3\x39\xCB\x09"
-			  "\x43\x19\x7F\xD0\xBB\x10\xC2\x49"
-			  "\xF7\xE9\xF2\xEE\xBF\xF7\xF8\xB3"
-			  "\x0E\x1A\xF1\x8D\x70\x82\x0C\x04"
-			  "\xFD\x29\x1A\xAC\xC0\x92\x48\x34"
-			  "\x6A\xE3\x1D\x4F\xFC\x1C\x72\x6A"
-			  "\x57\xCB\xAD\xD0\x98\xAB\xB1\x01"
-			  "\x03\x6A\x45\xDD\x07\x71\x5F\x5B"
-			  "\xB5\x4A\xE4\xE5\xB9\xB9\xBC\xAC"
-			  "\x44\xF7\x41\xA4\x5F\x2E\xE9\x28"
-			  "\xE3\x05\xD2\x94\x78\x4C\x33\x1B"
-			  "\xBD\xC1\x6E\x51\xD9\xAD\xD9\x86"
-			  "\x15\x4A\x78\xAE\x7B\xAD\x3B\xBC"
-			  "\x2F\xE0\x0E\xC5\x7B\x54\x97\x5F"
-			  "\x60\x51\x14\x65\xF9\x91\xE9\xDA"
-			  "\x9A\xBC\xFC\x19\x29\x67\xAA\x63"
-			  "\x5E\xF2\x48\x88\xEB\x79\xE1\xE4"
-			  "\xF7\xF6\x4C\xA9\xE2\x8C\x3B\xE0"
-			  "\xED\x52\xAE\x90\x8F\x5B\x98\x34"
-			  "\x29\x94\x34\x7F\xF9\x6C\x1E\xB6"
-			  "\xA4\xE7\x2D\x06\x54\x9D\xC3\x02"
-			  "\xC1\x90\xA4\x72\x31\x6B\x24\x51"
-			  "\x0B\xB3\x7C\x63\x15\xBA\xAF\x5D"
-			  "\x41\xE0\x37\x6D\xBE\x41\x58\xDE"
-			  "\xF2\x07\x62\x99\xBE\xC1\x8C\x0F"
-			  "\x0F\x28\xFB\x8F\x0E\x1D\x91\xE2"
-			  "\xDA\x99\x5C\x49\xBA\x9C\xA8\x86"
-			  "\x82\x63\x11\xB3\x54\x49\x00\x08"
-			  "\x07\xF2\xE8\x1F\x34\x49\x61\xF4"
-			  "\x81\xE9\xF6\xA9\x5A\x28\x60\x1F"
-			  "\x66\x99\x08\x06\xF2\xE8\x2D\xD1"
-			  "\xD0\x67\xBA\x32\x1F\x02\x86\x7B"
-			  "\xFB\x79\x3D\xC5\xB1\x7F\x15\xAF"
-			  "\xD7\xBF\x31\x46\x22\x7F\xAE\x5B"
-			  "\x8B\x95\x47\xC2\xB1\x62\xA1\xCE"
-			  "\x52\xAC\x9C\x8B\xC2\x49\x7F\xBC"
-			  "\x9C\x89\xB8\xB6\xCA\xE3\x8F\xEA"
-			  "\xAC\xB4\x5D\xE4\x50\xDC\x3A\xB5"
-			  "\x91\x04\x94\x99\x03\x3B\x42\x6D"
-			  "\x9C\x4A\x02\xF5\xB5\x38\x98\xA8"
-			  "\x5C\x97\x2E\x4D\x79\x67\x71\xAF"
-			  "\xF0\x70\x77\xFF\x2D\xDA\xA0\x9E"
-			  "\x23\x8D\xD6\xA6\x68\x10\x78\x9A"
-			  "\x64\xBB\x15\xB8\x56\xCF\xEE\xE5"
-			  "\x32\x44\x96\x1C\xD8\xEB\x95\xD2"
-			  "\xF3\x71\xEF\xEB\x4E\xBB\x4D",
-		.ilen	= 503,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59\xF0\x64\xFB\x92",
-		.rlen	= 503,
-		.also_non_np = 1,
-		.np	= 2,
-		.tap	= { 503 - 8, 8 },
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x5F\x58\x6E\x60\x51\x6E\xDC\x3D"
-			  "\xD1\xBB\xF7\xB7\xFD\x04\x44\x82"
-			  "\xDC\x9F\x4B\x02\xF1\xD2\x5A\x6F"
-			  "\x25\xF9\x27\x21\xF2\xD2\x9A\x01"
-			  "\xBD\xAD\x3D\x93\x87\xCA\x0D\xFE"
-			  "\xB7\x2C\x17\x1F\x42\x8C\x13\xB2"
-			  "\x62\x44\x72\xB9\x5D\xC0\xF8\x37"
-			  "\xDF\xEA\x78\x81\x8F\xA6\x34\xB2"
-			  "\x07\x09\x7C\xB9\x3A\xA0\x2B\x18"
-			  "\x34\x6A\x9D\x3D\xA5\xEB\xF4\x60"
-			  "\xF8\x98\xA2\x39\x81\x23\x6C\xA9"
-			  "\x70\xCA\xCC\x45\xD8\x1F\xDF\x44"
-			  "\x2A\x67\x7A\x88\x28\xDC\x36\x83"
-			  "\x18\xD7\x48\x43\x17\x2B\x1B\xE6"
-			  "\x0B\x82\x59\x14\x26\x67\x08\x09"
-			  "\x5B\x5D\x38\xD0\x81\xCE\x54\x2A"
-			  "\xCD\x22\x94\x42\xF5\xBA\x74\x7E"
-			  "\xD9\x00\x40\xA9\x0D\x0B\xBD\x8E"
-			  "\xC4\x8E\x5E\x17\x8F\x48\xE2\xB8"
-			  "\xF4\xCC\x19\x76\xAB\x48\x29\xAA"
-			  "\x81\xD5\xCE\xD5\x8A\x3B\xC9\x21"
-			  "\xEF\x50\x4F\x04\x02\xBF\xE1\x1F"
-			  "\x59\x28\x1A\xE4\x18\x16\xA0\x29"
-			  "\xBF\x34\xA9\x2D\x28\x83\xC0\x5E"
-			  "\xEA\x44\xC4\x6E\xAB\x24\x79\x9D"
-			  "\x2D\xA1\xE8\x55\xCA\x74\xFC\xBD"
-			  "\xFE\xDD\xDA\xA5\xFB\x34\x90\x31"
-			  "\x0E\x62\x28\x9B\xDC\xD7\xA1\xBB"
-			  "\xF0\x1A\xB3\xE2\xD0\xFA\xBD\xE8"
-			  "\x5C\x5A\x10\x67\xF6\x6A\x17\x3F"
-			  "\xC5\xE9\x09\x08\xDD\x22\x77\x42"
-			  "\x26\x6A\x6A\x7A\x3F\x87\x80\x0C"
-			  "\xF0\xFF\x15\x8E\x84\x86\xC0\x10"
-			  "\x0F\x8D\x33\x06\xB8\x72\xA4\x47"
-			  "\x6B\xED\x2E\x05\x94\x6C\x5C\x5B"
-			  "\x13\xF6\x77\xEE\x3B\x16\xDF\xC2"
-			  "\x63\x66\x07\x6D\x3F\x6C\x51\x7C"
-			  "\x1C\xAC\x80\xB6\x58\x48\xB7\x9D"
-			  "\xB4\x19\xD8\x19\x45\x66\x27\x02"
-			  "\xA1\xA9\x99\xF3\x1F\xE5\xA7\x1D"
-			  "\x31\xE7\x1B\x0D\xFF\xBB\xB5\xA1"
-			  "\xF5\x9C\x45\x1E\x18\x19\xA1\xE7"
-			  "\xC2\xF1\xBF\x68\xC3\xEC\xCF\x53"
-			  "\x67\xA6\x2B\x7D\x3C\x6D\x24\xC3"
-			  "\xE8\xE6\x07\x5A\x09\xE0\x32\xA8"
-			  "\x52\xF6\xE9\xED\x0E\xC6\x0A\x6A"
-			  "\xFC\x60\x2A\xE0\x93\xCE\xB8\x2E"
-			  "\xA2\xA8\x0E\x79\x9E\x34\x5D\x37"
-			  "\x6F\x12\xFE\x48\x7B\xE7\xB9\x22"
-			  "\x29\xE8\xD7\xBE\x5D\xD1\x8B\xD9"
-			  "\x91\x51\x4E\x71\xF2\x98\x85\x16"
-			  "\x25\x7A\x76\x8A\x51\x0E\x65\x14"
-			  "\x81\xB5\x3A\x37\xFD\xEC\xB5\x8A"
-			  "\xE1\xCF\x41\x72\x14\x29\x4C\xF0"
-			  "\x20\xD9\x9A\xC5\x66\xA4\x03\x76"
-			  "\x5B\xA4\x15\x4F\x0E\x64\x39\x40"
-			  "\x25\xF9\x20\x22\xF5\x88\xF5\xBA"
-			  "\xE4\xDF\x45\x61\xBF\x8D\x7A\x24"
-			  "\x4B\x92\x71\xD9\x2F\x77\xA7\x95"
-			  "\xA8\x7F\x61\xD5\xA4\x57\xB0\xFB"
-			  "\xB5\x77\xBA\x1C\xEE\x71\xFA\xB0"
-			  "\x16\x4C\x18\x6B\xF2\x69\xA0\x07"
-			  "\xEF\xBE\xEC\x69\xAC\xA8\x63\x9E",
-		.ilen	= 504,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59\xF0\x64\xFB\x92\x06",
-		.rlen	= 504,
+		.len	= 504,
 	},
 };
 
 /*
  * Twofish test vectors.
  */
-static const struct cipher_testvec tf_enc_tv_template[] = {
+static const struct cipher_testvec tf_tv_template[] = {
 	{
 		.key	= zeroed_string,
 		.klen	= 16,
-		.input	= zeroed_string,
-		.ilen	= 16,
-		.result	= "\x9f\x58\x9f\x5c\xf6\x12\x2c\x32"
+		.ptext	= zeroed_string,
+		.ctext	= "\x9f\x58\x9f\x5c\xf6\x12\x2c\x32"
 			  "\xb6\xbf\xec\x2f\x2a\xe8\xc3\x5a",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
 			  "\xfe\xdc\xba\x98\x76\x54\x32\x10"
 			  "\x00\x11\x22\x33\x44\x55\x66\x77",
 		.klen	= 24,
-		.input	= zeroed_string,
-		.ilen	= 16,
-		.result	= "\xcf\xd1\xd2\xe5\xa9\xbe\x9c\xdf"
+		.ptext	= zeroed_string,
+		.ctext	= "\xcf\xd1\xd2\xe5\xa9\xbe\x9c\xdf"
 			  "\x50\x1f\x13\xb8\x92\xbd\x22\x48",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
 			  "\xfe\xdc\xba\x98\x76\x54\x32\x10"
 			  "\x00\x11\x22\x33\x44\x55\x66\x77"
 			  "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
 		.klen	= 32,
-		.input	= zeroed_string,
-		.ilen	= 16,
-		.result	= "\x37\x52\x7b\xe0\x05\x23\x34\xb8"
+		.ptext	= zeroed_string,
+		.ctext	= "\x37\x52\x7b\xe0\x05\x23\x34\xb8"
 			  "\x9f\x0c\xfc\xca\xe8\x7c\xfa\x20",
-		.rlen	= 16,
+		.len	= 16,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x3F\x85\x62\x3F\x1C\xF9\xD6\x1C"
 			  "\xF9\xD6\xB3\x90\x6D\x4A\x90\x6D"
 			  "\x4A\x27\x04\xE1\x27\x04\xE1\xBE"
 			  "\x9B\x78\xBE\x9B\x78\x55\x32\x0F",
 		.klen	= 32,
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -9225,8 +7456,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\x88\xCB\x1E\xC2\xAF\x8A\x97\xFF"
+		.ctext	= "\x88\xCB\x1E\xC2\xAF\x8A\x97\xFF"
 			  "\xF6\x90\x46\x9C\x4A\x0F\x08\xDC"
 			  "\xDE\xAB\xAD\xFA\xFC\xA8\xC2\x3D"
 			  "\xE0\xE4\x8B\x3F\xD5\xA3\xF7\x14"
@@ -9288,224 +7518,52 @@
 			  "\xF0\x29\xD8\x59\x5D\x33\x37\xF9"
 			  "\x58\x33\x9B\x78\xC7\x58\x48\x6B"
 			  "\x2C\x75\x64\xC4\xCA\xC1\x7E\xD5",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec tf_dec_tv_template[] = {
-	{
-		.key	= zeroed_string,
-		.klen	= 16,
-		.input	= "\x9f\x58\x9f\x5c\xf6\x12\x2c\x32"
-			  "\xb6\xbf\xec\x2f\x2a\xe8\xc3\x5a",
-		.ilen	= 16,
-		.result	= zeroed_string,
-		.rlen	= 16,
-	}, {
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
-			  "\xfe\xdc\xba\x98\x76\x54\x32\x10"
-			  "\x00\x11\x22\x33\x44\x55\x66\x77",
-		.klen	= 24,
-		.input	= "\xcf\xd1\xd2\xe5\xa9\xbe\x9c\xdf"
-			  "\x50\x1f\x13\xb8\x92\xbd\x22\x48",
-		.ilen	= 16,
-		.result	= zeroed_string,
-		.rlen	= 16,
-	}, {
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
-			  "\xfe\xdc\xba\x98\x76\x54\x32\x10"
-			  "\x00\x11\x22\x33\x44\x55\x66\x77"
-			  "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
-		.klen	= 32,
-		.input	= "\x37\x52\x7b\xe0\x05\x23\x34\xb8"
-			  "\x9f\x0c\xfc\xca\xe8\x7c\xfa\x20",
-		.ilen	= 16,
-		.result	= zeroed_string,
-		.rlen	= 16,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x3F\x85\x62\x3F\x1C\xF9\xD6\x1C"
-			  "\xF9\xD6\xB3\x90\x6D\x4A\x90\x6D"
-			  "\x4A\x27\x04\xE1\x27\x04\xE1\xBE"
-			  "\x9B\x78\xBE\x9B\x78\x55\x32\x0F",
-		.klen	= 32,
-		.input	= "\x88\xCB\x1E\xC2\xAF\x8A\x97\xFF"
-			  "\xF6\x90\x46\x9C\x4A\x0F\x08\xDC"
-			  "\xDE\xAB\xAD\xFA\xFC\xA8\xC2\x3D"
-			  "\xE0\xE4\x8B\x3F\xD5\xA3\xF7\x14"
-			  "\x34\x9E\xB6\x08\xB2\xDD\xA8\xF5"
-			  "\xDF\xFA\xC7\xE8\x09\x50\x76\x08"
-			  "\xA2\xB6\x6A\x59\xC0\x2B\x6D\x05"
-			  "\x89\xF6\x82\xF0\xD3\xDB\x06\x02"
-			  "\xB5\x11\x5C\x5E\x79\x1A\xAC\x43"
-			  "\x5C\xC0\x30\x4B\x6B\x16\xA1\x40"
-			  "\x80\x27\x88\xBA\x2C\x74\x42\xE0"
-			  "\x1B\xA5\x85\x08\xB9\xE6\x22\x7A"
-			  "\x36\x3B\x0D\x9F\xA0\x22\x6C\x2A"
-			  "\x91\x75\x47\xBC\x67\x21\x4E\xF9"
-			  "\xEA\xFF\xD9\xD5\xC0\xFC\x9E\x2C"
-			  "\x3E\xAD\xC6\x61\x0E\x93\x7A\x22"
-			  "\x09\xC8\x8D\xC1\x8E\xB4\x8B\x5C"
-			  "\xC6\x24\x42\xB8\x23\x66\x80\xA9"
-			  "\x32\x0B\x7A\x29\xBF\xB3\x0B\x63"
-			  "\x43\x27\x13\xA9\xBE\xEB\xBD\xF3"
-			  "\x33\x62\x70\xE2\x1B\x86\x7A\xA1"
-			  "\x51\x4A\x16\xFE\x29\x63\x7E\xD0"
-			  "\x7A\xA4\x6E\x2C\xF8\xC1\xDB\xE8"
-			  "\xCB\x4D\xD2\x8C\x04\x14\xB4\x66"
-			  "\x41\xB7\x3A\x96\x16\x7C\x1D\x5B"
-			  "\xB6\x41\x42\x64\x43\xEE\x6E\x7C"
-			  "\x8B\xAF\x01\x9C\xA4\x6E\x75\x8F"
-			  "\xDE\x10\x9F\xA6\xE7\xD6\x44\x97"
-			  "\x66\xA3\x96\x0F\x1C\x25\x60\xF5"
-			  "\x3C\x2E\x32\x69\x0E\x82\xFF\x27"
-			  "\x0F\xB5\x06\xDA\xD8\x31\x15\x6C"
-			  "\xDF\x18\x6C\x87\xF5\x3B\x11\x9A"
-			  "\x1B\x42\x1F\x5B\x29\x19\x96\x13"
-			  "\x68\x2E\x5E\x08\x1C\x8F\x32\x4B"
-			  "\x81\x77\x6D\xF4\xA0\x01\x42\xEC"
-			  "\xDD\x5B\xFD\x3A\x8E\x6A\x14\xFB"
-			  "\x83\x54\xDF\x0F\x86\xB7\xEA\x40"
-			  "\x46\x39\xF7\x2A\x89\x8D\x4E\x96"
-			  "\x5F\x5F\x6D\x76\xC6\x13\x9D\x3D"
-			  "\x1D\x5F\x0C\x7D\xE2\xBC\xC2\x16"
-			  "\x16\xBE\x89\x3E\xB0\x61\xA2\x5D"
-			  "\xAF\xD1\x40\x5F\x1A\xB8\x26\x41"
-			  "\xC6\xBD\x36\xEF\xED\x29\x50\x6D"
-			  "\x10\xEF\x26\xE8\xA8\x93\x11\x3F"
-			  "\x2D\x1F\x88\x20\x77\x45\xF5\x66"
-			  "\x08\xB9\xF1\xEF\xB1\x93\xA8\x81"
-			  "\x65\xC5\xCD\x3E\x8C\x06\x60\x2C"
-			  "\xB2\x10\x7A\xCA\x05\x25\x59\xDB"
-			  "\xC7\x28\xF5\x20\x35\x52\x9E\x62"
-			  "\xF8\x88\x24\x1C\x4D\x84\x12\x39"
-			  "\x39\xE4\x2E\xF4\xD4\x9D\x2B\xBC"
-			  "\x87\x66\xE6\xC0\x6B\x31\x9A\x66"
-			  "\x03\xDC\x95\xD8\x6B\xD0\x30\x8F"
-			  "\xDF\x8F\x8D\xFA\xEC\x1F\x08\xBD"
-			  "\xA3\x63\xE2\x71\x4F\x03\x94\x87"
-			  "\x50\xDF\x15\x1F\xED\x3A\xA3\x7F"
-			  "\x1F\x2A\xB5\xA1\x69\xAC\x4B\x0D"
-			  "\x84\x9B\x2A\xE9\x55\xDD\x46\x91"
-			  "\x15\x33\xF3\x2B\x9B\x46\x97\x00"
-			  "\xF0\x29\xD8\x59\x5D\x33\x37\xF9"
-			  "\x58\x33\x9B\x78\xC7\x58\x48\x6B"
-			  "\x2C\x75\x64\xC4\xCA\xC1\x7E\xD5",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec tf_cbc_enc_tv_template[] = {
+static const struct cipher_testvec tf_cbc_tv_template[] = {
 	{ /* Generated with Nettle */
 		.key	= zeroed_string,
 		.klen	= 16,
 		.iv	= zeroed_string,
-		.input	= zeroed_string,
-		.ilen	= 16,
-		.result	= "\x9f\x58\x9f\x5c\xf6\x12\x2c\x32"
+		.ptext	= zeroed_string,
+		.ctext	= "\x9f\x58\x9f\x5c\xf6\x12\x2c\x32"
 			  "\xb6\xbf\xec\x2f\x2a\xe8\xc3\x5a",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= zeroed_string,
 		.klen	= 16,
 		.iv	= "\x9f\x58\x9f\x5c\xf6\x12\x2c\x32"
 			  "\xb6\xbf\xec\x2f\x2a\xe8\xc3\x5a",
-		.input	= zeroed_string,
-		.ilen	= 16,
-		.result	= "\xd4\x91\xdb\x16\xe7\xb1\xc3\x9e"
+		.ptext	= zeroed_string,
+		.ctext	= "\xd4\x91\xdb\x16\xe7\xb1\xc3\x9e"
 			  "\x86\xcb\x08\x6b\x78\x9f\x54\x19",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= zeroed_string,
 		.klen	= 16,
 		.iv	= "\xd4\x91\xdb\x16\xe7\xb1\xc3\x9e"
 			  "\x86\xcb\x08\x6b\x78\x9f\x54\x19",
-		.input	= zeroed_string,
-		.ilen	= 16,
-		.result	= "\x05\xef\x8c\x61\xa8\x11\x58\x26"
+		.ptext	= zeroed_string,
+		.ctext	= "\x05\xef\x8c\x61\xa8\x11\x58\x26"
 			  "\x34\xba\x5c\xb7\x10\x6a\xa6\x41",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= zeroed_string,
 		.klen	= 16,
 		.iv	= zeroed_string,
-		.input	= zeroed_string,
-		.ilen	= 48,
-		.result	= "\x9f\x58\x9f\x5c\xf6\x12\x2c\x32"
+		.ptext	= zeroed_string,
+		.ctext	= "\x9f\x58\x9f\x5c\xf6\x12\x2c\x32"
 			  "\xb6\xbf\xec\x2f\x2a\xe8\xc3\x5a"
 			  "\xd4\x91\xdb\x16\xe7\xb1\xc3\x9e"
 			  "\x86\xcb\x08\x6b\x78\x9f\x54\x19"
 			  "\x05\xef\x8c\x61\xa8\x11\x58\x26"
 			  "\x34\xba\x5c\xb7\x10\x6a\xa6\x41",
-		.rlen	= 48,
+		.len	= 48,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -9514,7 +7572,7 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -9576,8 +7634,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\xC8\xFF\xF2\x53\xA6\x27\x09\xD1"
+		.ctext	= "\xC8\xFF\xF2\x53\xA6\x27\x09\xD1"
 			  "\x33\x38\xC2\xC0\x0C\x14\x7E\xB5"
 			  "\x26\x1B\x05\x0C\x05\x12\x3F\xC0"
 			  "\xF9\x1C\x02\x28\x40\x96\x6F\xD0"
@@ -9639,197 +7696,14 @@
 			  "\x69\xA1\x62\xBD\x49\x3A\x9D\x91"
 			  "\x30\x70\x56\xA4\x37\xDD\x7C\xC0"
 			  "\x0A\xA3\x30\x10\x26\x25\x41\x2C",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec tf_cbc_dec_tv_template[] = {
-	{ /* Reverse of the first four above */
-		.key	= zeroed_string,
-		.klen	= 16,
-		.iv	= zeroed_string,
-		.input	= "\x9f\x58\x9f\x5c\xf6\x12\x2c\x32"
-			  "\xb6\xbf\xec\x2f\x2a\xe8\xc3\x5a",
-		.ilen	= 16,
-		.result	= zeroed_string,
-		.rlen	= 16,
-	}, {
-		.key	= zeroed_string,
-		.klen	= 16,
-		.iv	= "\x9f\x58\x9f\x5c\xf6\x12\x2c\x32"
-			  "\xb6\xbf\xec\x2f\x2a\xe8\xc3\x5a",
-		.input	= "\xd4\x91\xdb\x16\xe7\xb1\xc3\x9e"
-			  "\x86\xcb\x08\x6b\x78\x9f\x54\x19",
-		.ilen	= 16,
-		.result	= zeroed_string,
-		.rlen	= 16,
-	}, {
-		.key	= zeroed_string,
-		.klen	= 16,
-		.iv	= "\xd4\x91\xdb\x16\xe7\xb1\xc3\x9e"
-			  "\x86\xcb\x08\x6b\x78\x9f\x54\x19",
-		.input	= "\x05\xef\x8c\x61\xa8\x11\x58\x26"
-			  "\x34\xba\x5c\xb7\x10\x6a\xa6\x41",
-		.ilen	= 16,
-		.result	= zeroed_string,
-		.rlen	= 16,
-	}, {
-		.key	= zeroed_string,
-		.klen	= 16,
-		.iv	= zeroed_string,
-		.input	= "\x9f\x58\x9f\x5c\xf6\x12\x2c\x32"
-			  "\xb6\xbf\xec\x2f\x2a\xe8\xc3\x5a"
-			  "\xd4\x91\xdb\x16\xe7\xb1\xc3\x9e"
-			  "\x86\xcb\x08\x6b\x78\x9f\x54\x19"
-			  "\x05\xef\x8c\x61\xa8\x11\x58\x26"
-			  "\x34\xba\x5c\xb7\x10\x6a\xa6\x41",
-		.ilen	= 48,
-		.result	= zeroed_string,
-		.rlen	= 48,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\xC8\xFF\xF2\x53\xA6\x27\x09\xD1"
-			  "\x33\x38\xC2\xC0\x0C\x14\x7E\xB5"
-			  "\x26\x1B\x05\x0C\x05\x12\x3F\xC0"
-			  "\xF9\x1C\x02\x28\x40\x96\x6F\xD0"
-			  "\x3D\x32\xDF\xDA\x56\x00\x6E\xEE"
-			  "\x5B\x2A\x72\x9D\xC2\x4D\x19\xBC"
-			  "\x8C\x53\xFA\x87\x6F\xDD\x81\xA3"
-			  "\xB1\xD3\x44\x65\xDF\xE7\x63\x38"
-			  "\x4A\xFC\xDC\xEC\x3F\x26\x8E\xB8"
-			  "\x43\xFC\xFE\x18\xB5\x11\x6D\x31"
-			  "\x81\x8B\x0D\x75\xF6\x80\xEC\x84"
-			  "\x04\xB9\xE6\x09\x63\xED\x39\xDB"
-			  "\xC3\xF6\x14\xD6\x6E\x5E\x8B\xBD"
-			  "\x3E\xFA\xD7\x98\x50\x6F\xD9\x63"
-			  "\x02\xCD\x0D\x39\x4B\x0D\xEC\x80"
-			  "\xE3\x6A\x17\xF4\xCC\xAD\xFF\x68"
-			  "\x45\xDD\xC8\x83\x1D\x41\x96\x0D"
-			  "\x91\x2E\x05\xD3\x59\x82\xE0\x43"
-			  "\x90\x4F\xB9\xF7\xAD\x6B\x2E\xAF"
-			  "\xA7\x84\x00\x53\xCD\x6F\xD1\x0C"
-			  "\x4E\xF9\x5A\x23\xFB\xCA\xC7\xD3"
-			  "\xA9\xAA\x9D\xB2\x3F\x66\xF1\xAC"
-			  "\x25\x21\x8F\xF7\xEF\xF2\x6A\xDF"
-			  "\xE8\xDA\x75\x1A\x8A\xF1\xDD\x38"
-			  "\x1F\xF9\x3D\x68\x4A\xBB\x9E\x34"
-			  "\x1F\x66\x1F\x9C\x2B\x54\xFF\x60"
-			  "\x7F\x29\x4B\x55\x80\x8F\x4E\xA7"
-			  "\xA6\x9A\x0A\xD9\x0D\x19\x00\xF8"
-			  "\x1F\xBC\x0C\x40\x6B\xEC\x99\x25"
-			  "\x94\x70\x74\x0E\x1D\xC5\xBC\x12"
-			  "\xF3\x42\xBE\x95\xBF\xFB\x4E\x55"
-			  "\x9A\xB9\xCE\x14\x16\x5B\xDC\xD3"
-			  "\x75\x42\x62\x04\x31\x1F\x95\x7C"
-			  "\x66\x1A\x97\xDC\x2F\x40\x5C\x39"
-			  "\x78\xE6\x02\xDB\x49\xE1\xC6\x47"
-			  "\xC2\x78\x9A\xBB\xF3\xBE\xCB\x93"
-			  "\xD8\xB8\xE8\xBB\x8C\xB3\x9B\xA7"
-			  "\xC2\x89\xF3\x91\x88\x83\x3D\xF0"
-			  "\x29\xA2\xCD\xB5\x79\x16\xC2\x40"
-			  "\x11\x03\x8E\x9C\xFD\xC9\x43\xC4"
-			  "\xC2\x19\xF0\x4A\x32\xEF\x0C\x2B"
-			  "\xD3\x2B\xE9\xD4\x4C\xDE\x95\xCF"
-			  "\x04\x03\xD3\x2C\x7F\x82\xC8\xFA"
-			  "\x0F\xD8\x7A\x39\x7B\x01\x41\x9C"
-			  "\x78\xB6\xC9\xBF\xF9\x78\x57\x88"
-			  "\xB1\xA5\xE1\xE0\xD9\x16\xD4\xC8"
-			  "\xEE\xC4\xBE\x7B\x55\x59\x00\x48"
-			  "\x1B\xBC\x14\xFA\x2A\x9D\xC9\x1C"
-			  "\xFB\x28\x3F\x95\xDD\xB7\xD6\xCE"
-			  "\x3A\x7F\x09\x0C\x0E\x69\x30\x7D"
-			  "\xBC\x68\x9C\x91\x2A\x59\x57\x04"
-			  "\xED\x1A\x1E\x00\xB1\x85\x92\x04"
-			  "\x28\x8C\x0C\x3C\xC1\xD5\x12\xF7"
-			  "\x4C\x3E\xB0\xE7\x86\x62\x68\x91"
-			  "\xFC\xC4\xE2\xCE\xA6\xDC\x5E\x93"
-			  "\x5D\x8D\x8C\x68\xB3\xB2\xB9\x64"
-			  "\x16\xB8\xC8\x6F\xD8\xEE\x21\xBD"
-			  "\xAC\x18\x0C\x7D\x0D\x05\xAB\xF1"
-			  "\xFA\xDD\xE2\x48\xDF\x4C\x02\x39"
-			  "\x69\xA1\x62\xBD\x49\x3A\x9D\x91"
-			  "\x30\x70\x56\xA4\x37\xDD\x7C\xC0"
-			  "\x0A\xA3\x30\x10\x26\x25\x41\x2C",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec tf_ctr_enc_tv_template[] = {
+static const struct cipher_testvec tf_ctr_tv_template[] = {
 	{ /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -9838,7 +7712,7 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -9900,8 +7774,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\xDF\xDD\x69\xFA\xB0\x2E\xFD\xFE"
+		.ctext	= "\xDF\xDD\x69\xFA\xB0\x2E\xFD\xFE"
 			  "\x70\x9E\xC5\x4B\xC9\xD4\xA1\x30"
 			  "\x26\x9B\x89\xA1\xEE\x43\xE0\x52"
 			  "\x55\x17\x4E\xC7\x0E\x33\x1F\xF1"
@@ -9963,7 +7836,7 @@
 			  "\xE8\x47\x2A\x4D\xFD\xA1\xBC\xE3"
 			  "\xB9\x32\xE2\xC1\x82\xAC\xFE\xCC"
 			  "\xC5\xC9\x7F\x9E\xCF\x33\x7A\xDF",
-		.rlen	= 496,
+		.len	= 496,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -9972,7 +7845,7 @@
 		.klen	= 32,
 		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
 			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -10034,8 +7907,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\xEB\x44\xAF\x49\x27\xB8\xFB\x44"
+		.ctext	= "\xEB\x44\xAF\x49\x27\xB8\xFB\x44"
 			  "\x4C\xA6\xC3\x0C\x8B\xD0\x01\x0C"
 			  "\x53\xC8\x16\x38\xDE\x40\x4F\x91"
 			  "\x25\x6D\x4C\xA0\x9A\x87\x1E\xDA"
@@ -10097,7 +7969,7 @@
 			  "\x4C\xB6\xF8\xF4\x5F\x48\x52\x54"
 			  "\x94\x63\xA8\x4E\xCF\xD2\x1B\x1B"
 			  "\x22\x18\x6A\xAF\x6E\x3E\xE1\x0D",
-		.rlen	= 496,
+		.len	= 496,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -10106,7 +7978,7 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -10169,8 +8041,7 @@
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
 			  "\x2B\xC2\x59",
-		.ilen	= 499,
-		.result	= "\xDF\xDD\x69\xFA\xB0\x2E\xFD\xFE"
+		.ctext	= "\xDF\xDD\x69\xFA\xB0\x2E\xFD\xFE"
 			  "\x70\x9E\xC5\x4B\xC9\xD4\xA1\x30"
 			  "\x26\x9B\x89\xA1\xEE\x43\xE0\x52"
 			  "\x55\x17\x4E\xC7\x0E\x33\x1F\xF1"
@@ -10233,425 +8104,14 @@
 			  "\xB9\x32\xE2\xC1\x82\xAC\xFE\xCC"
 			  "\xC5\xC9\x7F\x9E\xCF\x33\x7A\xDF"
 			  "\x6C\x82\x9D",
-		.rlen	= 499,
+		.len	= 499,
 		.also_non_np = 1,
 		.np	= 2,
 		.tap	= { 499 - 16, 16 },
 	},
 };
 
-static const struct cipher_testvec tf_ctr_dec_tv_template[] = {
-	{ /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\xDF\xDD\x69\xFA\xB0\x2E\xFD\xFE"
-			  "\x70\x9E\xC5\x4B\xC9\xD4\xA1\x30"
-			  "\x26\x9B\x89\xA1\xEE\x43\xE0\x52"
-			  "\x55\x17\x4E\xC7\x0E\x33\x1F\xF1"
-			  "\x9F\x8D\x40\x9F\x24\xFD\x92\xA0"
-			  "\xBC\x8F\x35\xDD\x67\x38\xD8\xAA"
-			  "\xCF\xF8\x48\xCA\xFB\xE4\x5C\x60"
-			  "\x01\x41\x21\x12\x38\xAB\x52\x4F"
-			  "\xA8\x57\x20\xE0\x21\x6A\x17\x0D"
-			  "\x0E\xF9\x8E\x49\x42\x00\x3C\x94"
-			  "\x14\xC0\xD0\x8D\x8A\x98\xEB\x29"
-			  "\xEC\xAE\x96\x44\xC0\x3C\x48\xDC"
-			  "\x29\x35\x25\x2F\xE7\x11\x6C\x68"
-			  "\xC8\x67\x0A\x2F\xF4\x07\xBE\xF9"
-			  "\x2C\x31\x87\x40\xAB\xB2\xB6\xFA"
-			  "\xD2\xC9\x6D\x5C\x50\xE9\xE6\x7E"
-			  "\xE3\x0A\xD2\xD5\x6D\x8D\x64\x9E"
-			  "\x70\xCE\x03\x76\xDD\xE0\xF0\x8C"
-			  "\x84\x86\x8B\x6A\xFE\xC7\xF9\x69"
-			  "\x2E\xFE\xFC\xC2\xC4\x1A\x55\x58"
-			  "\xB3\xBE\xE2\x7E\xED\x39\x42\x6C"
-			  "\xB4\x42\x97\x9A\xEC\xE1\x0A\x06"
-			  "\x02\xC5\x03\x9D\xC4\x48\x15\x66"
-			  "\x35\x6A\xC2\xC9\xA2\x26\x30\xBB"
-			  "\xDB\x2D\xC8\x08\x2B\xA0\x29\x1A"
-			  "\x23\x61\x48\xEA\x80\x04\x27\xAA"
-			  "\x69\x49\xE8\xE8\x4A\x83\x6B\x5A"
-			  "\xCA\x7C\xD3\xB1\xB5\x0B\xCC\x23"
-			  "\x74\x1F\xA9\x87\xCD\xED\xC0\x2D"
-			  "\xBF\xEB\xCF\x16\x2D\x2A\x2E\x1D"
-			  "\x96\xBA\x36\x11\x45\x41\xDA\xCE"
-			  "\xA4\x48\x80\x8B\x06\xF4\x98\x89"
-			  "\x8B\x23\x08\x53\xF4\xD4\x5A\x24"
-			  "\x8B\xF8\x43\x73\xD1\xEE\xC4\xB0"
-			  "\xF8\xFE\x09\x0C\x75\x05\x38\x0B"
-			  "\x7C\x81\xDE\x9D\xE4\x61\x37\x63"
-			  "\x63\xAD\x12\xD2\x04\xB9\xCE\x45"
-			  "\x5A\x1A\x6E\xB3\x78\x2A\xA4\x74"
-			  "\x86\xD0\xE3\xFF\xDA\x38\x9C\xB5"
-			  "\xB8\xB1\xDB\x38\x2F\xC5\x6A\xB4"
-			  "\xEB\x6E\x96\xE8\x43\x80\xB5\x51"
-			  "\x61\x2D\x48\xAA\x07\x65\x11\x8C"
-			  "\x48\xE3\x90\x7E\x78\x3A\xEC\x97"
-			  "\x05\x3D\x84\xE7\x90\x2B\xAA\xBD"
-			  "\x83\x29\x0E\x1A\x81\x73\x7B\xE0"
-			  "\x7A\x01\x4A\x37\x3B\x77\x7F\x8D"
-			  "\x49\xA4\x2F\x6E\xBE\x68\x99\x08"
-			  "\x99\xAA\x4C\x12\x04\xAE\x1F\x77"
-			  "\x35\x88\xF1\x65\x06\x0A\x0B\x4D"
-			  "\x47\xF9\x50\x38\x5D\x71\xF9\x6E"
-			  "\xDE\xEC\x61\x35\x2C\x4C\x96\x50"
-			  "\xE8\x28\x93\x9C\x7E\x01\xC6\x04"
-			  "\xB2\xD6\xBC\x6C\x17\xEB\xC1\x7D"
-			  "\x11\xE9\x43\x83\x76\xAA\x53\x37"
-			  "\x0C\x1D\x39\x89\x53\x72\x09\x7E"
-			  "\xD9\x85\x16\x04\xA5\x2C\x05\x6F"
-			  "\x17\x0C\x6E\x66\xAA\x84\xA7\xD9"
-			  "\xE2\xD9\xC4\xEB\x43\x3E\xB1\x8D"
-			  "\x7C\x36\xC7\x71\x70\x9C\x10\xD8"
-			  "\xE8\x47\x2A\x4D\xFD\xA1\xBC\xE3"
-			  "\xB9\x32\xE2\xC1\x82\xAC\xFE\xCC"
-			  "\xC5\xC9\x7F\x9E\xCF\x33\x7A\xDF",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\xEB\x44\xAF\x49\x27\xB8\xFB\x44"
-			  "\x4C\xA6\xC3\x0C\x8B\xD0\x01\x0C"
-			  "\x53\xC8\x16\x38\xDE\x40\x4F\x91"
-			  "\x25\x6D\x4C\xA0\x9A\x87\x1E\xDA"
-			  "\x88\x7E\x89\xE9\x67\x2B\x83\xA2"
-			  "\x5F\x2E\x23\x3E\x45\xB9\x77\x7B"
-			  "\xA6\x7E\x47\x36\x81\x9F\x9B\xF3"
-			  "\xE0\xF0\xD7\x47\xA9\xC8\xEF\x33"
-			  "\x0C\x43\xFE\x67\x50\x0A\x2C\x3E"
-			  "\xA0\xE1\x25\x8E\x80\x07\x4A\xC0"
-			  "\x64\x89\x9F\x6A\x27\x96\x07\xA6"
-			  "\x9B\xC8\x1B\x21\x60\xAE\x5D\x01"
-			  "\xE2\xCD\xC8\xAA\x6C\x9D\x1C\x34"
-			  "\x39\x18\x09\xA4\x82\x59\x78\xE7"
-			  "\xFC\x59\x65\xF2\x94\xFF\xFB\xE2"
-			  "\x3C\xDA\xB1\x90\x95\xBF\x91\xE3"
-			  "\xE6\x87\x31\x9E\x16\x85\xAD\xB1"
-			  "\x4C\xAE\x43\x4D\x19\x58\xB5\x5E"
-			  "\x2E\xF5\x09\xAA\x39\xF4\xC0\xB3"
-			  "\xD4\x4D\xDB\x73\x7A\xD4\xF1\xBF"
-			  "\x89\x16\x4D\x2D\xA2\x26\x33\x72"
-			  "\x18\x33\x7E\xD6\xD2\x16\xA4\x54"
-			  "\xF4\x8C\xB3\x52\xDF\x21\x9C\xEB"
-			  "\xBF\x49\xD3\xF9\x05\x06\xCB\xD2"
-			  "\xA9\xD2\x3B\x6E\x19\x8C\xBC\x19"
-			  "\xAB\x89\xD6\xD8\xCD\x56\x89\x5E"
-			  "\xAC\x00\xE3\x50\x63\x4A\x80\x9A"
-			  "\x05\xBC\x50\x39\xD3\x32\xD9\x0D"
-			  "\xE3\x20\x0D\x75\x54\xEC\xE6\x31"
-			  "\x14\xB9\x3A\x59\x00\x43\x37\x8E"
-			  "\x8C\x5A\x79\x62\x14\x76\x8A\xAE"
-			  "\x8F\xCC\xA1\x6C\x38\x78\xDD\x2D"
-			  "\x8B\x6D\xEA\xBD\x7B\x25\xFF\x60"
-			  "\xC9\x87\xB1\x79\x1E\xA5\x86\x68"
-			  "\x81\xB4\xE2\xC1\x05\x7D\x3A\x73"
-			  "\xD0\xDA\x75\x77\x9E\x05\x27\xF1"
-			  "\x08\xA9\x66\x64\x6C\xBC\x82\x17"
-			  "\x2C\x23\x5F\x62\x4D\x02\x1A\x58"
-			  "\xE7\xB7\x23\x6D\xE2\x20\xDA\xEF"
-			  "\xB4\xB3\x3F\xB2\x2B\x69\x98\x83"
-			  "\x95\x87\x13\x57\x60\xD7\xB5\xB1"
-			  "\xEE\x0A\x2F\x95\x36\x4C\x76\x5D"
-			  "\x5F\xD9\x19\xED\xB9\xA5\x48\xBF"
-			  "\xC8\xAB\x0F\x71\xCC\x61\x8E\x0A"
-			  "\xD0\x29\x44\xA8\xB9\xC1\xE8\xC8"
-			  "\xC9\xA8\x28\x81\xFB\x50\xF2\xF0"
-			  "\x26\xAE\x39\xB8\x91\xCD\xA8\xAC"
-			  "\xDE\x55\x1B\x50\x14\x53\x44\x17"
-			  "\x54\x46\xFC\xB1\xE4\x07\x6B\x9A"
-			  "\x01\x14\xF0\x2E\x2E\xDB\x46\x1B"
-			  "\x1A\x09\x97\xA9\xB6\x97\x79\x06"
-			  "\xFB\xCB\x85\xCF\xDD\xA1\x41\xB1"
-			  "\x00\xAA\xF7\xE0\x89\x73\xFB\xE5"
-			  "\xBF\x84\xDB\xC9\xCD\xC4\xA2\x0D"
-			  "\x3B\xAC\xF9\xDF\x96\xBF\x88\x23"
-			  "\x41\x67\xA1\x24\x99\x7E\xCC\x9B"
-			  "\x02\x8F\x6A\x49\xF6\x25\xBA\x7A"
-			  "\xF4\x78\xFD\x79\x62\x63\x4F\x14"
-			  "\xD6\x11\x11\x04\x05\x5F\x7E\xEA"
-			  "\x4C\xB6\xF8\xF4\x5F\x48\x52\x54"
-			  "\x94\x63\xA8\x4E\xCF\xD2\x1B\x1B"
-			  "\x22\x18\x6A\xAF\x6E\x3E\xE1\x0D",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\xDF\xDD\x69\xFA\xB0\x2E\xFD\xFE"
-			  "\x70\x9E\xC5\x4B\xC9\xD4\xA1\x30"
-			  "\x26\x9B\x89\xA1\xEE\x43\xE0\x52"
-			  "\x55\x17\x4E\xC7\x0E\x33\x1F\xF1"
-			  "\x9F\x8D\x40\x9F\x24\xFD\x92\xA0"
-			  "\xBC\x8F\x35\xDD\x67\x38\xD8\xAA"
-			  "\xCF\xF8\x48\xCA\xFB\xE4\x5C\x60"
-			  "\x01\x41\x21\x12\x38\xAB\x52\x4F"
-			  "\xA8\x57\x20\xE0\x21\x6A\x17\x0D"
-			  "\x0E\xF9\x8E\x49\x42\x00\x3C\x94"
-			  "\x14\xC0\xD0\x8D\x8A\x98\xEB\x29"
-			  "\xEC\xAE\x96\x44\xC0\x3C\x48\xDC"
-			  "\x29\x35\x25\x2F\xE7\x11\x6C\x68"
-			  "\xC8\x67\x0A\x2F\xF4\x07\xBE\xF9"
-			  "\x2C\x31\x87\x40\xAB\xB2\xB6\xFA"
-			  "\xD2\xC9\x6D\x5C\x50\xE9\xE6\x7E"
-			  "\xE3\x0A\xD2\xD5\x6D\x8D\x64\x9E"
-			  "\x70\xCE\x03\x76\xDD\xE0\xF0\x8C"
-			  "\x84\x86\x8B\x6A\xFE\xC7\xF9\x69"
-			  "\x2E\xFE\xFC\xC2\xC4\x1A\x55\x58"
-			  "\xB3\xBE\xE2\x7E\xED\x39\x42\x6C"
-			  "\xB4\x42\x97\x9A\xEC\xE1\x0A\x06"
-			  "\x02\xC5\x03\x9D\xC4\x48\x15\x66"
-			  "\x35\x6A\xC2\xC9\xA2\x26\x30\xBB"
-			  "\xDB\x2D\xC8\x08\x2B\xA0\x29\x1A"
-			  "\x23\x61\x48\xEA\x80\x04\x27\xAA"
-			  "\x69\x49\xE8\xE8\x4A\x83\x6B\x5A"
-			  "\xCA\x7C\xD3\xB1\xB5\x0B\xCC\x23"
-			  "\x74\x1F\xA9\x87\xCD\xED\xC0\x2D"
-			  "\xBF\xEB\xCF\x16\x2D\x2A\x2E\x1D"
-			  "\x96\xBA\x36\x11\x45\x41\xDA\xCE"
-			  "\xA4\x48\x80\x8B\x06\xF4\x98\x89"
-			  "\x8B\x23\x08\x53\xF4\xD4\x5A\x24"
-			  "\x8B\xF8\x43\x73\xD1\xEE\xC4\xB0"
-			  "\xF8\xFE\x09\x0C\x75\x05\x38\x0B"
-			  "\x7C\x81\xDE\x9D\xE4\x61\x37\x63"
-			  "\x63\xAD\x12\xD2\x04\xB9\xCE\x45"
-			  "\x5A\x1A\x6E\xB3\x78\x2A\xA4\x74"
-			  "\x86\xD0\xE3\xFF\xDA\x38\x9C\xB5"
-			  "\xB8\xB1\xDB\x38\x2F\xC5\x6A\xB4"
-			  "\xEB\x6E\x96\xE8\x43\x80\xB5\x51"
-			  "\x61\x2D\x48\xAA\x07\x65\x11\x8C"
-			  "\x48\xE3\x90\x7E\x78\x3A\xEC\x97"
-			  "\x05\x3D\x84\xE7\x90\x2B\xAA\xBD"
-			  "\x83\x29\x0E\x1A\x81\x73\x7B\xE0"
-			  "\x7A\x01\x4A\x37\x3B\x77\x7F\x8D"
-			  "\x49\xA4\x2F\x6E\xBE\x68\x99\x08"
-			  "\x99\xAA\x4C\x12\x04\xAE\x1F\x77"
-			  "\x35\x88\xF1\x65\x06\x0A\x0B\x4D"
-			  "\x47\xF9\x50\x38\x5D\x71\xF9\x6E"
-			  "\xDE\xEC\x61\x35\x2C\x4C\x96\x50"
-			  "\xE8\x28\x93\x9C\x7E\x01\xC6\x04"
-			  "\xB2\xD6\xBC\x6C\x17\xEB\xC1\x7D"
-			  "\x11\xE9\x43\x83\x76\xAA\x53\x37"
-			  "\x0C\x1D\x39\x89\x53\x72\x09\x7E"
-			  "\xD9\x85\x16\x04\xA5\x2C\x05\x6F"
-			  "\x17\x0C\x6E\x66\xAA\x84\xA7\xD9"
-			  "\xE2\xD9\xC4\xEB\x43\x3E\xB1\x8D"
-			  "\x7C\x36\xC7\x71\x70\x9C\x10\xD8"
-			  "\xE8\x47\x2A\x4D\xFD\xA1\xBC\xE3"
-			  "\xB9\x32\xE2\xC1\x82\xAC\xFE\xCC"
-			  "\xC5\xC9\x7F\x9E\xCF\x33\x7A\xDF"
-			  "\x6C\x82\x9D",
-		.ilen	= 499,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59",
-		.rlen	= 499,
-		.also_non_np = 1,
-		.np	= 2,
-		.tap	= { 499 - 16, 16 },
-	},
-};
-
-static const struct cipher_testvec tf_lrw_enc_tv_template[] = {
+static const struct cipher_testvec tf_lrw_tv_template[] = {
 	/* Generated from AES-LRW test vectors */
 	{
 		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
@@ -10661,12 +8121,11 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\xa1\x6c\x50\x69\x26\xa4\xef\x7b"
+		.ctext	= "\xa1\x6c\x50\x69\x26\xa4\xef\x7b"
 			  "\x7c\xc6\x91\xeb\x72\xdd\x9b\xee",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
 			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
@@ -10675,12 +8134,11 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\xab\x72\x0a\xad\x3b\x0c\xf0\xc9"
+		.ctext	= "\xab\x72\x0a\xad\x3b\x0c\xf0\xc9"
 			  "\x42\x2f\xf1\xae\xf1\x3c\xb1\xbd",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
 			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
@@ -10689,12 +8147,11 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x85\xa7\x56\x67\x08\xfa\x42\xe1"
+		.ctext	= "\x85\xa7\x56\x67\x08\xfa\x42\xe1"
 			  "\x22\xe6\x82\xfc\xd9\xb4\xd7\xd4",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
 			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
@@ -10704,12 +8161,11 @@
 		.klen	= 40,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\xd2\xaf\x69\x35\x24\x1d\x0e\x1c"
+		.ctext	= "\xd2\xaf\x69\x35\x24\x1d\x0e\x1c"
 			  "\x84\x8b\x05\xe4\xa2\x2f\x16\xf5",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
 			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
@@ -10719,12 +8175,11 @@
 		.klen	= 40,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x4a\x23\x56\xd7\xff\x90\xd0\x9a"
+		.ctext	= "\x4a\x23\x56\xd7\xff\x90\xd0\x9a"
 			  "\x0d\x7c\x26\xfc\xf0\xf0\xf6\xe4",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
 			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
@@ -10735,12 +8190,11 @@
 		.klen	= 48,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x30\xaf\x26\x05\x9d\x5d\x0a\x58"
+		.ctext	= "\x30\xaf\x26\x05\x9d\x5d\x0a\x58"
 			  "\xe2\xe7\xce\x8a\xb2\x56\x6d\x76",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
 			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
@@ -10751,12 +8205,11 @@
 		.klen	= 48,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\xdf\xcf\xdc\xd2\xe1\xcf\x86\x75"
+		.ctext	= "\xdf\xcf\xdc\xd2\xe1\xcf\x86\x75"
 			  "\x17\x66\x5e\x0c\x14\xa1\x3d\x40",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
 			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
@@ -10767,7 +8220,7 @@
 		.klen	= 48,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
+		.ptext	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
 			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
 			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
 			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
@@ -10831,8 +8284,7 @@
 			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
 			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
 			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
-		.ilen	= 512,
-		.result	= "\x30\x38\xeb\xaf\x12\x43\x1a\x89"
+		.ctext	= "\x30\x38\xeb\xaf\x12\x43\x1a\x89"
 			  "\x62\xa2\x36\xe5\xcf\x77\x1e\xd9"
 			  "\x08\xc3\x0d\xdd\x95\xab\x19\x96"
 			  "\x27\x52\x41\xc3\xca\xfb\xf6\xee"
@@ -10896,267 +8348,14 @@
 			  "\x81\x00\xd3\xfe\x4c\x3c\x05\x61"
 			  "\x80\x18\xc4\x6c\x03\xd3\xb7\xba"
 			  "\x11\xd7\xb8\x6e\xea\xe1\x80\x30",
-		.rlen	= 512,
+		.len	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec tf_lrw_dec_tv_template[] = {
-	/* Generated from AES-LRW test vectors */
-	/* same as enc vectors with input and result reversed */
-	{
-		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
-			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
-			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
-			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\xa1\x6c\x50\x69\x26\xa4\xef\x7b"
-			  "\x7c\xc6\x91\xeb\x72\xdd\x9b\xee",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
-			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
-			  "\x0d\x48\xf0\xb7\xb1\x5a\x53\xea"
-			  "\x1c\xaa\x6b\x29\xc2\xca\xfb\xaf",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input	= "\xab\x72\x0a\xad\x3b\x0c\xf0\xc9"
-			  "\x42\x2f\xf1\xae\xf1\x3c\xb1\xbd",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
-			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
-			  "\xcd\xf9\x0b\x16\x0c\x64\x8f\xb6"
-			  "\xb0\x0d\x0d\x1b\xae\x85\x87\x1f",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x85\xa7\x56\x67\x08\xfa\x42\xe1"
-			  "\x22\xe6\x82\xfc\xd9\xb4\xd7\xd4",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
-			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
-			  "\xca\xc6\xbc\x35\x4d\x4a\x65\x54"
-			  "\x90\xae\x61\xcf\x7b\xae\xbd\xcc"
-			  "\xad\xe4\x94\xc5\x4a\x29\xae\x70",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\xd2\xaf\x69\x35\x24\x1d\x0e\x1c"
-			  "\x84\x8b\x05\xe4\xa2\x2f\x16\xf5",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
-			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
-			  "\xa0\x19\x07\xc0\x9d\xf7\xbb\xdd"
-			  "\x52\x13\xb2\xb7\xf0\xff\x11\xd8"
-			  "\xd6\x08\xd0\xcd\x2e\xb1\x17\x6f",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x4a\x23\x56\xd7\xff\x90\xd0\x9a"
-			  "\x0d\x7c\x26\xfc\xf0\xf0\xf6\xe4",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\xaf\x26\x05\x9d\x5d\x0a\x58"
-			  "\xe2\xe7\xce\x8a\xb2\x56\x6d\x76",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
-			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
-			  "\xb2\xfb\x64\xce\x60\x97\x87\x8d"
-			  "\x17\xfc\xe4\x5a\x49\xe8\x30\xb7"
-			  "\x6e\x78\x17\xe7\x2d\x5e\x12\xd4"
-			  "\x60\x64\x04\x7a\xf1\x2f\x9e\x0c",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\xdf\xcf\xdc\xd2\xe1\xcf\x86\x75"
-			  "\x17\x66\x5e\x0c\x14\xa1\x3d\x40",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x38\xeb\xaf\x12\x43\x1a\x89"
-			  "\x62\xa2\x36\xe5\xcf\x77\x1e\xd9"
-			  "\x08\xc3\x0d\xdd\x95\xab\x19\x96"
-			  "\x27\x52\x41\xc3\xca\xfb\xf6\xee"
-			  "\x40\x2d\xdf\xdd\x00\x0c\xb9\x0a"
-			  "\x3a\xf0\xc0\xd1\xda\x63\x9e\x45"
-			  "\x42\xe9\x29\xc0\xb4\x07\xb4\x31"
-			  "\x66\x77\x72\xb5\xb6\xb3\x57\x46"
-			  "\x34\x9a\xfe\x03\xaf\x6b\x36\x07"
-			  "\x63\x8e\xc2\x5d\xa6\x0f\xb6\x7d"
-			  "\xfb\x6d\x82\x51\xb6\x98\xd0\x71"
-			  "\xe7\x10\x7a\xdf\xb2\xbd\xf1\x1d"
-			  "\x72\x2b\x54\x13\xe3\x6d\x79\x37"
-			  "\xa9\x39\x2c\xdf\x21\xab\x87\xd5"
-			  "\xee\xef\x9a\x12\x50\x39\x2e\x1b"
-			  "\x7d\xe6\x6a\x27\x48\xb9\xe7\xac"
-			  "\xaa\xcd\x79\x5f\xf2\xf3\xa0\x08"
-			  "\x6f\x2c\xf4\x0e\xd1\xb8\x89\x25"
-			  "\x31\x9d\xef\xb1\x1d\x27\x55\x04"
-			  "\xc9\x8c\xb7\x68\xdc\xb6\x67\x8a"
-			  "\xdb\xcf\x22\xf2\x3b\x6f\xce\xbb"
-			  "\x26\xbe\x4f\x27\x04\x42\xd1\x44"
-			  "\x4c\x08\xa3\x95\x4c\x7f\x1a\xaf"
-			  "\x1d\x28\x14\xfd\xb1\x1a\x34\x18"
-			  "\xf5\x1e\x28\x69\x95\x6a\x5a\xba"
-			  "\x8e\xb2\x58\x1d\x28\x17\x13\x3d"
-			  "\x38\x7d\x14\x8d\xab\x5d\xf9\xe8"
-			  "\x3c\x0f\x2b\x0d\x2b\x08\xb4\x4b"
-			  "\x6b\x0d\xc8\xa7\x84\xc2\x3a\x1a"
-			  "\xb7\xbd\xda\x92\x29\xb8\x5b\x5a"
-			  "\x63\xa5\x99\x82\x09\x72\x8f\xc6"
-			  "\xa4\x62\x24\x69\x8c\x2d\x26\x00"
-			  "\x99\x83\x91\xd6\xc6\xcf\x57\x67"
-			  "\x38\xea\xf2\xfc\x29\xe0\x73\x39"
-			  "\xf9\x13\x94\x6d\xe2\x58\x28\x75"
-			  "\x3e\xae\x71\x90\x07\x70\x1c\x38"
-			  "\x5b\x4c\x1e\xb5\xa5\x3b\x20\xef"
-			  "\xb1\x4c\x3e\x1a\x72\x62\xbb\x22"
-			  "\x82\x09\xe3\x18\x3f\x4f\x48\xfc"
-			  "\xdd\xac\xfc\xb6\x09\xdb\xd2\x7b"
-			  "\xd6\xb7\x7e\x41\x2f\x14\xf5\x0e"
-			  "\xc3\xac\x4a\xed\xe7\x82\xef\x31"
-			  "\x1f\x1a\x51\x1e\x29\x60\xc8\x98"
-			  "\x93\x51\x1d\x3d\x62\x59\x83\x82"
-			  "\x0c\xf1\xd7\x8d\xac\x33\x44\x81"
-			  "\x3c\x59\xb7\xd4\x5b\x65\x82\xc4"
-			  "\xec\xdc\x24\xfd\x0e\x1a\x79\x94"
-			  "\x34\xb0\x62\xfa\x98\x49\x26\x1f"
-			  "\xf4\x9e\x40\x44\x5b\x1f\xf8\xbe"
-			  "\x36\xff\xc6\xc6\x9d\xf2\xd6\xcc"
-			  "\x63\x93\x29\xb9\x0b\x6d\xd7\x6c"
-			  "\xdb\xf6\x21\x80\xf7\x5a\x37\x15"
-			  "\x0c\xe3\x36\xc8\x74\x75\x20\x91"
-			  "\xdf\x52\x2d\x0c\xe7\x45\xff\x46"
-			  "\xb3\xf4\xec\xc2\xbd\xd3\x37\xb6"
-			  "\x26\xa2\x5d\x7d\x61\xbf\x10\x46"
-			  "\x57\x8d\x05\x96\x70\x0b\xd6\x41"
-			  "\x5c\xe9\xd3\x54\x81\x39\x3a\xdd"
-			  "\x5f\x92\x81\x6e\x35\x03\xd4\x72"
-			  "\x3d\x5a\xe7\xb9\x3b\x0c\x84\x23"
-			  "\x45\x5d\xec\x72\xc1\x52\xef\x2e"
-			  "\x81\x00\xd3\xfe\x4c\x3c\x05\x61"
-			  "\x80\x18\xc4\x6c\x03\xd3\xb7\xba"
-			  "\x11\xd7\xb8\x6e\xea\xe1\x80\x30",
-		.ilen	= 512,
-		.result	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
-			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
-			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
-			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
-			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
-			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
-			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
-			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
-			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
-			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
-			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
-			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
-			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
-			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
-			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
-			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
-			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
-			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
-			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
-			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
-			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
-			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
-			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
-			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
-			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
-			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
-			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
-			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
-			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
-			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
-			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
-			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
-			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
-			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
-			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
-			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
-			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
-			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
-			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
-			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
-			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
-			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
-			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
-			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
-			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
-			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
-			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
-			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
-			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
-			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
-			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
-			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
-			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
-			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
-			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
-			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
-			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
-			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
-			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
-			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
-			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
-			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
-			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
-			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec tf_xts_enc_tv_template[] = {
+static const struct cipher_testvec tf_xts_tv_template[] = {
 	/* Generated from AES-XTS test vectors */
 {
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -11166,16 +8365,15 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 32,
-		.result	= "\x4b\xc9\x44\x4a\x11\xa3\xef\xac"
+		.ctext	= "\x4b\xc9\x44\x4a\x11\xa3\xef\xac"
 			  "\x30\x74\xe4\x44\x52\x77\x97\x43"
 			  "\xa7\x60\xb2\x45\x2e\xf9\x00\x90"
 			  "\x9f\xaa\xfd\x89\x6e\x9d\x4a\xe0",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
 			  "\x11\x11\x11\x11\x11\x11\x11\x11"
@@ -11184,16 +8382,15 @@
 		.klen	= 32,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\x57\x0e\x8f\xe5\x2a\x35\x61\x4f"
+		.ctext	= "\x57\x0e\x8f\xe5\x2a\x35\x61\x4f"
 			  "\x32\xd3\xbd\x36\x05\x15\x44\x2c"
 			  "\x58\x06\xf7\xf8\x00\xa8\xb6\xd5"
 			  "\xc6\x28\x92\xdb\xd8\x34\xa2\xe9",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
 			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
@@ -11202,16 +8399,15 @@
 		.klen	= 32,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\x96\x45\x8f\x8d\x7a\x75\xb1\xde"
+		.ctext	= "\x96\x45\x8f\x8d\x7a\x75\xb1\xde"
 			  "\x40\x0c\x89\x56\xf6\x4d\xa7\x07"
 			  "\x38\xbb\x5b\xe9\xcd\x84\xae\xb2"
 			  "\x7b\x6a\x62\xf4\x8c\xb5\x37\xea",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -11220,7 +8416,7 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -11284,8 +8480,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\xa9\x78\xae\x1e\xea\xa2\x44\x4c"
+		.ctext	= "\xa9\x78\xae\x1e\xea\xa2\x44\x4c"
 			  "\xa2\x7a\x64\x1f\xaf\x46\xc1\xe0"
 			  "\x6c\xb2\xf3\x92\x9a\xd6\x7d\x58"
 			  "\xb8\x2d\xb9\x5d\x58\x07\x66\x50"
@@ -11349,7 +8544,7 @@
 			  "\x43\xc4\x46\x24\x22\x4f\x8f\x7e"
 			  "\xe5\xf4\x6d\x1e\x0e\x18\x7a\xbb"
 			  "\xa6\x8f\xfb\x49\x49\xd8\x7e\x5a",
-		.rlen	= 512,
+		.len	= 512,
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -11362,7 +8557,7 @@
 		.klen	= 64,
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -11426,8 +8621,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\xd7\x4b\x93\x7d\x13\xa2\xa2\xe1"
+		.ctext	= "\xd7\x4b\x93\x7d\x13\xa2\xa2\xe1"
 			  "\x35\x39\x71\x88\x76\x1e\xc9\xea"
 			  "\x86\xad\xf3\x14\x48\x3d\x5e\xe9"
 			  "\xe9\x2d\xb2\x56\x59\x35\x9d\xec"
@@ -11491,350 +8685,7 @@
 			  "\xf3\xea\x67\x52\x78\xc2\xce\x70"
 			  "\xa4\x05\x0b\xb2\xb3\xa8\x30\x97"
 			  "\x37\x30\xe1\x91\x8d\xb3\x2a\xff",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec tf_xts_dec_tv_template[] = {
-	/* Generated from AES-XTS test vectors */
-	/* same as enc vectors with input and result reversed */
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x4b\xc9\x44\x4a\x11\xa3\xef\xac"
-			  "\x30\x74\xe4\x44\x52\x77\x97\x43"
-			  "\xa7\x60\xb2\x45\x2e\xf9\x00\x90"
-			  "\x9f\xaa\xfd\x89\x6e\x9d\x4a\xe0",
-		.ilen	= 32,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 32,
-	}, {
-		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x57\x0e\x8f\xe5\x2a\x35\x61\x4f"
-			  "\x32\xd3\xbd\x36\x05\x15\x44\x2c"
-			  "\x58\x06\xf7\xf8\x00\xa8\xb6\xd5"
-			  "\xc6\x28\x92\xdb\xd8\x34\xa2\xe9",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
-			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x96\x45\x8f\x8d\x7a\x75\xb1\xde"
-			  "\x40\x0c\x89\x56\xf6\x4d\xa7\x07"
-			  "\x38\xbb\x5b\xe9\xcd\x84\xae\xb2"
-			  "\x7b\x6a\x62\xf4\x8c\xb5\x37\xea",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xa9\x78\xae\x1e\xea\xa2\x44\x4c"
-			  "\xa2\x7a\x64\x1f\xaf\x46\xc1\xe0"
-			  "\x6c\xb2\xf3\x92\x9a\xd6\x7d\x58"
-			  "\xb8\x2d\xb9\x5d\x58\x07\x66\x50"
-			  "\xea\x35\x35\x8c\xb2\x46\x61\x06"
-			  "\x5d\x65\xfc\x57\x8f\x69\x74\xab"
-			  "\x8a\x06\x69\xb5\x6c\xda\x66\xc7"
-			  "\x52\x90\xbb\x8e\x6d\x8b\xb5\xa2"
-			  "\x78\x1d\xc2\xa9\xc2\x73\x00\xc3"
-			  "\x32\x36\x7c\x97\x6b\x4e\x8a\x50"
-			  "\xe4\x91\x83\x96\x8f\xf4\x94\x1a"
-			  "\xa6\x27\xe1\x33\xcb\x91\xc6\x5f"
-			  "\x94\x75\xbc\xd7\x3e\x3e\x6f\x9e"
-			  "\xa9\x31\x80\x5e\xe5\xdb\xc8\x53"
-			  "\x01\x73\x68\x32\x25\x19\xfa\xfb"
-			  "\xe4\xcf\xb9\x3e\xa2\xa0\x8f\x31"
-			  "\xbf\x54\x06\x93\xa8\xb1\x0f\xb6"
-			  "\x7c\x3c\xde\x6f\x0f\xfb\x0c\x11"
-			  "\x39\x80\x39\x09\x97\x65\xf2\x83"
-			  "\xae\xe6\xa1\x6f\x47\xb8\x49\xde"
-			  "\x99\x36\x20\x7d\x97\x3b\xec\xfa"
-			  "\xb4\x33\x6e\x7a\xc7\x46\x84\x49"
-			  "\x91\xcd\xe1\x57\x0d\xed\x40\x08"
-			  "\x13\xf1\x4e\x3e\xa4\xa4\x5c\xe6"
-			  "\xd2\x0c\x20\x8f\x3e\xdf\x3f\x47"
-			  "\x9a\x2f\xde\x6d\x66\xc9\x99\x4a"
-			  "\x2d\x9e\x9d\x4b\x1a\x27\xa2\x12"
-			  "\x99\xf0\xf8\xb1\xb6\xf6\x57\xc3"
-			  "\xca\x1c\xa3\x8e\xed\x39\x28\xb5"
-			  "\x10\x1b\x4b\x08\x42\x00\x4a\xd3"
-			  "\xad\x5a\xc6\x8e\xc8\xbb\x95\xc4"
-			  "\x4b\xaa\xfe\xd5\x42\xa8\xa3\x6d"
-			  "\x3c\xf3\x34\x91\x2d\xb4\xdd\x20"
-			  "\x0c\x90\x6d\xa3\x9b\x66\x9d\x24"
-			  "\x02\xa6\xa9\x3f\x3f\x58\x5d\x47"
-			  "\x24\x65\x63\x7e\xbd\x8c\xe6\x52"
-			  "\x7d\xef\x33\x53\x63\xec\xaa\x0b"
-			  "\x64\x15\xa9\xa6\x1f\x10\x00\x38"
-			  "\x35\xa8\xe7\xbe\x23\x70\x22\xe0"
-			  "\xd3\xb9\xe6\xfd\xe6\xaa\x03\x50"
-			  "\xf3\x3c\x27\x36\x8b\xcc\xfe\x9c"
-			  "\x9c\xa3\xb3\xe7\x68\x9b\xa2\x71"
-			  "\xe0\x07\xd9\x1f\x68\x1f\xac\x5e"
-			  "\x7a\x74\x85\xa9\x6a\x90\xab\x2c"
-			  "\x38\x51\xbc\x1f\x43\x4a\x56\x1c"
-			  "\xf8\x47\x03\x4e\x67\xa8\x1f\x99"
-			  "\x04\x39\x73\x32\xb2\x86\x79\xe7"
-			  "\x14\x28\x70\xb8\xe2\x7d\x69\x85"
-			  "\xb6\x0f\xc5\xd0\xd0\x01\x5c\xe6"
-			  "\x09\x0f\x75\xf7\xb6\x81\xd2\x11"
-			  "\x20\x9c\xa1\xee\x11\x44\x79\xd0"
-			  "\xb2\x34\x77\xda\x10\x9a\x6f\x6f"
-			  "\xef\x7c\xd9\xdc\x35\xb7\x61\xdd"
-			  "\xf1\xa4\xc6\x1c\xbf\x05\x22\xac"
-			  "\xfe\x2f\x85\x00\x44\xdf\x33\x16"
-			  "\x35\xb6\xa3\xd3\x70\xdf\x69\x35"
-			  "\x6a\xc7\xb4\x99\x45\x27\xc8\x8e"
-			  "\x5a\x14\x30\xd0\x55\x3e\x4f\x64"
-			  "\x0d\x38\xe3\xdf\x8b\xa8\x93\x26"
-			  "\x75\xae\xf6\xb5\x23\x0b\x17\x31"
-			  "\xbf\x27\xb8\xb5\x94\x31\xa7\x8f"
-			  "\x43\xc4\x46\x24\x22\x4f\x8f\x7e"
-			  "\xe5\xf4\x6d\x1e\x0e\x18\x7a\xbb"
-			  "\xa6\x8f\xfb\x49\x49\xd8\x7e\x5a",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xd7\x4b\x93\x7d\x13\xa2\xa2\xe1"
-			  "\x35\x39\x71\x88\x76\x1e\xc9\xea"
-			  "\x86\xad\xf3\x14\x48\x3d\x5e\xe9"
-			  "\xe9\x2d\xb2\x56\x59\x35\x9d\xec"
-			  "\x84\xfa\x7e\x9d\x6d\x33\x36\x8f"
-			  "\xce\xf4\xa9\x21\x0b\x5f\x96\xec"
-			  "\xcb\xf9\x57\x68\x33\x88\x39\xbf"
-			  "\x2f\xbb\x59\x03\xbd\x66\x8b\x11"
-			  "\x11\x65\x51\x2e\xb8\x67\x05\xd1"
-			  "\x27\x11\x5c\xd4\xcc\x97\xc2\xb3"
-			  "\xa9\x55\xaf\x07\x56\xd1\xdc\xf5"
-			  "\x85\xdc\x46\xe6\xf0\x24\xeb\x93"
-			  "\x4d\xf0\x9b\xf5\x73\x1c\xda\x03"
-			  "\x22\xc8\x3a\x4f\xb4\x19\x91\x09"
-			  "\x54\x0b\xf6\xfe\x17\x3d\x1a\x53"
-			  "\x72\x60\x79\xcb\x0e\x32\x8a\x77"
-			  "\xd5\xed\xdb\x33\xd7\x62\x16\x69"
-			  "\x63\xe0\xab\xb5\xf6\x9c\x5f\x3d"
-			  "\x69\x35\x61\x86\xf8\x86\xb9\x89"
-			  "\x6e\x59\x35\xac\xf6\x6b\x33\xa0"
-			  "\xea\xef\x96\x62\xd8\xa9\xcf\x56"
-			  "\xbf\xdb\x8a\xfd\xa1\x82\x77\x73"
-			  "\x3d\x94\x4a\x49\x42\x6d\x08\x60"
-			  "\xa1\xea\xab\xb6\x88\x13\x94\xb8"
-			  "\x51\x98\xdb\x35\x85\xdf\xf6\xb9"
-			  "\x8f\xcd\xdf\x80\xd3\x40\x2d\x72"
-			  "\xb8\xb2\x6c\x02\x43\x35\x22\x2a"
-			  "\x31\xed\xcd\x16\x19\xdf\x62\x0f"
-			  "\x29\xcf\x87\x04\xec\x02\x4f\xe4"
-			  "\xa2\xed\x73\xc6\x69\xd3\x7e\x89"
-			  "\x0b\x76\x10\x7c\xd6\xf9\x6a\x25"
-			  "\xed\xcc\x60\x5d\x61\x20\xc1\x97"
-			  "\x56\x91\x57\x28\xbe\x71\x0d\xcd"
-			  "\xde\xc4\x9e\x55\x91\xbe\xd1\x28"
-			  "\x9b\x90\xeb\x73\xf3\x68\x51\xc6"
-			  "\xdf\x82\xcc\xd8\x1f\xce\x5b\x27"
-			  "\xc0\x60\x5e\x33\xd6\xa7\x20\xea"
-			  "\xb2\x54\xc7\x5d\x6a\x3b\x67\x47"
-			  "\xcf\xa0\xe3\xab\x86\xaf\xc1\x42"
-			  "\xe6\xb0\x23\x4a\xaf\x53\xdf\xa0"
-			  "\xad\x12\x32\x31\x03\xf7\x21\xbe"
-			  "\x2d\xd5\x82\x42\xb6\x4a\x3d\xcd"
-			  "\xd8\x81\x77\xa9\x49\x98\x6c\x09"
-			  "\xc5\xa3\x61\x12\x62\x85\x6b\xcd"
-			  "\xb3\xf4\x20\x0c\x41\xc4\x05\x37"
-			  "\x46\x5f\xeb\x71\x8b\xf1\xaf\x6e"
-			  "\xba\xf3\x50\x2e\xfe\xa8\x37\xeb"
-			  "\xe8\x8c\x4f\xa4\x0c\xf1\x31\xc8"
-			  "\x6e\x71\x4f\xa5\xd7\x97\x73\xe0"
-			  "\x93\x4a\x2f\xda\x7b\xe0\x20\x54"
-			  "\x1f\x8d\x85\x79\x0b\x7b\x5e\x75"
-			  "\xb9\x07\x67\xcc\xc8\xe7\x21\x15"
-			  "\xa7\xc8\x98\xff\x4b\x80\x1c\x12"
-			  "\xa8\x54\xe1\x38\x52\xe6\x74\x81"
-			  "\x97\x47\xa1\x41\x0e\xc0\x50\xe3"
-			  "\x55\x0e\xc3\xa7\x70\x77\xce\x07"
-			  "\xed\x8c\x88\xe6\xa1\x5b\x14\xec"
-			  "\xe6\xde\x06\x6d\x74\xc5\xd9\xfa"
-			  "\xe5\x2f\x5a\xff\xc8\x05\xee\x27"
-			  "\x35\x61\xbf\x0b\x19\x78\x9b\xd2"
-			  "\x04\xc7\x05\xb1\x79\xb4\xff\x5f"
-			  "\xf3\xea\x67\x52\x78\xc2\xce\x70"
-			  "\xa4\x05\x0b\xb2\xb3\xa8\x30\x97"
-			  "\x37\x30\xe1\x91\x8d\xb3\x2a\xff",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
+		.len	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
@@ -11845,51 +8696,47 @@
  * Serpent test vectors.  These are backwards because Serpent writes
  * octet sequences in right-to-left mode.
  */
-static const struct cipher_testvec serpent_enc_tv_template[] = {
+static const struct cipher_testvec serpent_tv_template[] = {
 	{
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.ilen	= 16,
-		.result	= "\x12\x07\xfc\xce\x9b\xd0\xd6\x47"
+		.ctext	= "\x12\x07\xfc\xce\x9b\xd0\xd6\x47"
 			  "\x6a\xe9\x8f\xbe\xd1\x43\xa0\xe2",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 		.klen	= 16,
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.ilen	= 16,
-		.result	= "\x4c\x7d\x8a\x32\x80\x72\xa2\x2c"
+		.ctext	= "\x4c\x7d\x8a\x32\x80\x72\xa2\x2c"
 			  "\x82\x3e\x4a\x1f\x3a\xcd\xa1\x6d",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 		.klen	= 32,
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.ilen	= 16,
-		.result	= "\xde\x26\x9f\xf8\x33\xe4\x32\xb8"
+		.ctext	= "\xde\x26\x9f\xf8\x33\xe4\x32\xb8"
 			  "\x5b\x2e\x88\xd2\x70\x1c\xe7\x5c",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80",
 		.klen	= 16,
-		.input	= zeroed_string,
-		.ilen	= 16,
-		.result	= "\xdd\xd2\x6b\x98\xa5\xff\xd8\x2c"
+		.ptext	= zeroed_string,
+		.ctext	= "\xdd\xd2\x6b\x98\xa5\xff\xd8\x2c"
 			  "\x05\x34\x5a\x9d\xad\xbf\xaf\x49",
-		.rlen	= 16,
+		.len	= 16,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
 			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
 			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
 		.klen	= 32,
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -11951,8 +8798,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\xFB\xB0\x5D\xDE\xC0\xFE\xFC\xEB"
+		.ctext	= "\xFB\xB0\x5D\xDE\xC0\xFE\xFC\xEB"
 			  "\xB1\x80\x10\x43\xDE\x62\x70\xBD"
 			  "\xFA\x8A\x93\xEA\x6B\xF7\xC5\xD7"
 			  "\x0C\xD1\xBB\x29\x25\x14\x4C\x22"
@@ -12014,281 +8860,93 @@
 			  "\x34\xC1\xC9\xF2\x28\x4A\xCD\x02"
 			  "\x75\x55\x9B\xFF\x36\x73\xAB\x7C"
 			  "\xF4\x46\x2E\xEB\xAC\xF3\xD2\xB7",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec tnepres_enc_tv_template[] = {
+static const struct cipher_testvec tnepres_tv_template[] = {
+	{ /* KeySize=0 */
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.ctext	= "\x41\xcc\x6b\x31\x59\x31\x45\x97"
+			  "\x6d\x6f\xbb\x38\x4b\x37\x21\x28",
+		.len	= 16,
+	},
 	{ /* KeySize=128, PT=0, I=1 */
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.key    = "\x80\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen   = 16,
-		.ilen	= 16,
-		.result	= "\x49\xaf\xbf\xad\x9d\x5a\x34\x05"
+		.ctext	= "\x49\xaf\xbf\xad\x9d\x5a\x34\x05"
 			  "\x2c\xd8\xff\xa5\x98\x6b\xd2\xdd",
-		.rlen	= 16,
+		.len	= 16,
+	}, { /* KeySize=128 */
+		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.klen	= 16,
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.ctext	= "\xea\xf4\xd7\xfc\xd8\x01\x34\x47"
+			  "\x81\x45\x0b\xfa\x0c\xd6\xad\x6e",
+		.len	= 16,
+	}, { /* KeySize=128, I=121 */
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80",
+		.klen	= 16,
+		.ptext	= zeroed_string,
+		.ctext	= "\x3d\xda\xbf\xc0\x06\xda\xab\x06"
+			  "\x46\x2a\xf4\xef\x81\x54\x4e\x26",
+		.len	= 16,
 	}, { /* KeySize=192, PT=0, I=1 */
 		.key	= "\x80\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 24,
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 16,
-		.result	= "\xe7\x8e\x54\x02\xc7\x19\x55\x68"
+		.ctext	= "\xe7\x8e\x54\x02\xc7\x19\x55\x68"
 			  "\xac\x36\x78\xf7\xa3\xf6\x0c\x66",
-		.rlen	= 16,
+		.len	= 16,
 	}, { /* KeySize=256, PT=0, I=1 */
 		.key	= "\x80\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 32,
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 16,
-		.result	= "\xab\xed\x96\xe7\x66\xbf\x28\xcb"
+		.ctext	= "\xab\xed\x96\xe7\x66\xbf\x28\xcb"
 			  "\xc0\xeb\xd2\x1a\x82\xef\x08\x19",
-		.rlen	= 16,
+		.len	= 16,
 	}, { /* KeySize=256, I=257 */
 		.key	= "\x1f\x1e\x1d\x1c\x1b\x1a\x19\x18"
 			  "\x17\x16\x15\x14\x13\x12\x11\x10"
 			  "\x0f\x0e\x0d\x0c\x0b\x0a\x09\x08"
 			  "\x07\x06\x05\x04\x03\x02\x01\x00",
 		.klen	= 32,
-		.input	= "\x0f\x0e\x0d\x0c\x0b\x0a\x09\x08"
+		.ptext	= "\x0f\x0e\x0d\x0c\x0b\x0a\x09\x08"
 			  "\x07\x06\x05\x04\x03\x02\x01\x00",
-		.ilen	= 16,
-		.result	= "\x5c\xe7\x1c\x70\xd2\x88\x2e\x5b"
+		.ctext	= "\x5c\xe7\x1c\x70\xd2\x88\x2e\x5b"
 			  "\xb8\x32\xe4\x33\xf8\x9f\x26\xde",
-		.rlen	= 16,
-	},
-};
-
-
-static const struct cipher_testvec serpent_dec_tv_template[] = {
-	{
-		.input	= "\x12\x07\xfc\xce\x9b\xd0\xd6\x47"
-			  "\x6a\xe9\x8f\xbe\xd1\x43\xa0\xe2",
-		.ilen	= 16,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.rlen	= 16,
-	}, {
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.klen	= 16,
-		.input	= "\x4c\x7d\x8a\x32\x80\x72\xa2\x2c"
-			  "\x82\x3e\x4a\x1f\x3a\xcd\xa1\x6d",
-		.ilen	= 16,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.rlen	= 16,
-	}, {
+		.len	= 16,
+	}, { /* KeySize=256 */
 		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 		.klen	= 32,
-		.input	= "\xde\x26\x9f\xf8\x33\xe4\x32\xb8"
-			  "\x5b\x2e\x88\xd2\x70\x1c\xe7\x5c",
-		.ilen	= 16,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.rlen	= 16,
-	}, {
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80",
-		.klen	= 16,
-		.input	= "\xdd\xd2\x6b\x98\xa5\xff\xd8\x2c"
-			  "\x05\x34\x5a\x9d\xad\xbf\xaf\x49",
-		.ilen	= 16,
-		.result	= zeroed_string,
-		.rlen	= 16,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.input	= "\xFB\xB0\x5D\xDE\xC0\xFE\xFC\xEB"
-			  "\xB1\x80\x10\x43\xDE\x62\x70\xBD"
-			  "\xFA\x8A\x93\xEA\x6B\xF7\xC5\xD7"
-			  "\x0C\xD1\xBB\x29\x25\x14\x4C\x22"
-			  "\x77\xA6\x38\x00\xDB\xB9\xE2\x07"
-			  "\xD1\xAC\x82\xBA\xEA\x67\xAA\x39"
-			  "\x99\x34\x89\x5B\x54\xE9\x12\x13"
-			  "\x3B\x04\xE5\x12\x42\xC5\x79\xAB"
-			  "\x0D\xC7\x3C\x58\x2D\xA3\x98\xF6"
-			  "\xE4\x61\x9E\x17\x0B\xCE\xE8\xAA"
-			  "\xB5\x6C\x1A\x3A\x67\x52\x81\x6A"
-			  "\x04\xFF\x8A\x1B\x96\xFE\xE6\x87"
-			  "\x3C\xD4\x39\x7D\x36\x9B\x03\xD5"
-			  "\xB6\xA0\x75\x3C\x83\xE6\x1C\x73"
-			  "\x9D\x74\x2B\x77\x53\x2D\xE5\xBD"
-			  "\x69\xDA\x7A\x01\xF5\x6A\x70\x39"
-			  "\x30\xD4\x2C\xF2\x8E\x06\x4B\x39"
-			  "\xB3\x12\x1D\xB3\x17\x46\xE6\xD6"
-			  "\xB6\x31\x36\x34\x38\x3C\x1D\x69"
-			  "\x9F\x47\x28\x9A\x1D\x96\x70\x54"
-			  "\x8E\x88\xCB\xE0\xF5\x6A\xAE\x0A"
-			  "\x3C\xD5\x93\x1C\x21\xC9\x14\x3A"
-			  "\x23\x9C\x9B\x79\xC7\x75\xC8\x39"
-			  "\xA6\xAC\x65\x9A\x99\x37\xAF\x6D"
-			  "\xBD\xB5\x32\xFD\xD8\x9C\x95\x7B"
-			  "\xC6\x6A\x80\x64\xEA\xEF\x6D\x3F"
-			  "\xA9\xFE\x5B\x16\xA3\xCF\x32\xC8"
-			  "\xEF\x50\x22\x20\x93\x30\xBE\xE2"
-			  "\x38\x05\x65\xAF\xBA\xB6\xE4\x72"
-			  "\xA9\xEE\x05\x42\x88\xBD\x9D\x49"
-			  "\xAD\x93\xCA\x4D\x45\x11\x43\x4D"
-			  "\xB8\xF5\x74\x2B\x48\xE7\x21\xE4"
-			  "\x4E\x3A\x4C\xDE\x65\x7A\x5A\xAD"
-			  "\x86\xE6\x23\xEC\x6B\xA7\x17\xE6"
-			  "\xF6\xA1\xAC\x29\xAE\xF9\x9B\x69"
-			  "\x73\x65\x65\x51\xD6\x0B\x4E\x8C"
-			  "\x17\x15\x9D\xB0\xCF\xB2\x42\x2B"
-			  "\x51\xC3\x03\xE8\xB7\x7D\x2D\x39"
-			  "\xE8\x10\x93\x16\xC8\x68\x4C\x60"
-			  "\x87\x70\x14\xD0\x01\x57\xCB\x42"
-			  "\x13\x59\xB1\x7F\x12\x4F\xBB\xC7"
-			  "\xBD\x2B\xD4\xA9\x12\x26\x4F\xDE"
-			  "\xFD\x72\xEC\xD7\x6F\x97\x14\x90"
-			  "\x0E\x37\x13\xE6\x67\x1D\xE5\xFE"
-			  "\x9E\x18\x3C\x8F\x3A\x3F\x59\x9B"
-			  "\x71\x80\x05\x35\x3F\x40\x0B\x21"
-			  "\x76\xE5\xEF\x42\x6C\xDB\x31\x05"
-			  "\x5F\x05\xCF\x14\xE3\xF0\x61\xA2"
-			  "\x49\x03\x5E\x77\x2E\x20\xBA\xA1"
-			  "\xAF\x46\x51\xC0\x2B\xC4\x64\x1E"
-			  "\x65\xCC\x51\x58\x0A\xDF\xF0\x5F"
-			  "\x75\x9F\x48\xCD\x81\xEC\xC3\xF6"
-			  "\xED\xC9\x4B\x7B\x4E\x26\x23\xE1"
-			  "\xBB\xE9\x83\x0B\xCF\xE4\xDE\x00"
-			  "\x48\xFF\xBF\x6C\xB4\x72\x16\xEF"
-			  "\xC7\x46\xEE\x48\x8C\xB8\xAF\x45"
-			  "\x91\x76\xE7\x6E\x65\x3D\x15\x86"
-			  "\x10\xF8\xDB\x66\x97\x7C\x43\x4D"
-			  "\x79\x12\x4E\xCE\x06\xD1\xD1\x6A"
-			  "\x34\xC1\xC9\xF2\x28\x4A\xCD\x02"
-			  "\x75\x55\x9B\xFF\x36\x73\xAB\x7C"
-			  "\xF4\x46\x2E\xEB\xAC\xF3\xD2\xB7",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec tnepres_dec_tv_template[] = {
-	{
-		.input	= "\x41\xcc\x6b\x31\x59\x31\x45\x97"
-			  "\x6d\x6f\xbb\x38\x4b\x37\x21\x28",
-		.ilen	= 16,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.rlen	= 16,
-	}, {
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.klen	= 16,
-		.input	= "\xea\xf4\xd7\xfc\xd8\x01\x34\x47"
-			  "\x81\x45\x0b\xfa\x0c\xd6\xad\x6e",
-		.ilen	= 16,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.rlen	= 16,
-	}, {
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.klen	= 32,
-		.input	= "\x64\xa9\x1a\x37\xed\x9f\xe7\x49"
+		.ctext	= "\x64\xa9\x1a\x37\xed\x9f\xe7\x49"
 			  "\xa8\x4e\x76\xd6\xf5\x0d\x78\xee",
-		.ilen	= 16,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.rlen	= 16,
-	}, { /* KeySize=128, I=121 */
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80",
-		.klen	= 16,
-		.input	= "\x3d\xda\xbf\xc0\x06\xda\xab\x06"
-			  "\x46\x2a\xf4\xef\x81\x54\x4e\x26",
-		.ilen	= 16,
-		.result	= zeroed_string,
-		.rlen	= 16,
-	},
+		.len	= 16,
+	}
 };
 
-static const struct cipher_testvec serpent_cbc_enc_tv_template[] = {
+static const struct cipher_testvec serpent_cbc_tv_template[] = {
 	{ /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -12297,7 +8955,7 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -12359,8 +9017,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\x80\xCF\x11\x41\x1A\xB9\x4B\x9C"
+		.ctext	= "\x80\xCF\x11\x41\x1A\xB9\x4B\x9C"
 			  "\xFF\xB7\x6C\xEA\xF0\xAF\x77\x6E"
 			  "\x71\x75\x95\x9D\x4E\x1C\xCF\xAD"
 			  "\x81\x34\xE9\x8F\xAE\x5A\x91\x1C"
@@ -12422,14 +9079,14 @@
 			  "\x02\xC4\xAF\xFA\xAD\x31\xF4\xBF"
 			  "\xFC\x66\xAA\x37\xF2\x37\x39\x6B"
 			  "\xBC\x08\x3A\xA2\x29\xB3\xDF\xD1",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec serpent_cbc_dec_tv_template[] = {
+static const struct cipher_testvec serpent_ctr_tv_template[] = {
 	{ /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -12438,70 +9095,7 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x80\xCF\x11\x41\x1A\xB9\x4B\x9C"
-			  "\xFF\xB7\x6C\xEA\xF0\xAF\x77\x6E"
-			  "\x71\x75\x95\x9D\x4E\x1C\xCF\xAD"
-			  "\x81\x34\xE9\x8F\xAE\x5A\x91\x1C"
-			  "\x38\x63\x35\x7E\x79\x18\x0A\xE8"
-			  "\x67\x06\x76\xD5\xFF\x22\x2F\xDA"
-			  "\xB6\x2D\x57\x13\xB6\x3C\xBC\x97"
-			  "\xFE\x53\x75\x35\x97\x7F\x51\xEA"
-			  "\xDF\x5D\xE8\x9D\xCC\xD9\xAE\xE7"
-			  "\x62\x67\xFF\x04\xC2\x18\x22\x5F"
-			  "\x2E\x06\xC1\xE2\x26\xCD\xC6\x1E"
-			  "\xE5\x2C\x4E\x87\x23\xDD\xF0\x41"
-			  "\x08\xA5\xB4\x3E\x07\x1E\x0B\xBB"
-			  "\x72\x84\xF8\x0A\x3F\x38\x5E\x91"
-			  "\x15\x26\xE1\xDB\xA4\x3D\x74\xD2"
-			  "\x41\x1E\x3F\xA9\xC6\x7D\x2A\xAB"
-			  "\x27\xDF\x89\x1D\x86\x3E\xF7\x5A"
-			  "\xF6\xE3\x0F\xC7\x6B\x4C\x96\x7C"
-			  "\x2D\x12\xA5\x05\x92\xCB\xD7\x4A"
-			  "\x4D\x1E\x88\x21\xE1\x63\xB4\xFC"
-			  "\x4A\xF2\xCD\x35\xB9\xD7\x70\x97"
-			  "\x5A\x5E\x7E\x96\x52\x20\xDC\x25"
-			  "\xE9\x6B\x36\xB4\xE0\x98\x85\x2C"
-			  "\x3C\xD2\xF7\x78\x8A\x73\x26\x9B"
-			  "\xAF\x0B\x11\xE8\x4D\x67\x23\xE9"
-			  "\x77\xDF\x58\xF6\x6F\x9E\xA4\xC5"
-			  "\x10\xA1\x82\x0E\x80\xA0\x8F\x4B"
-			  "\xA1\xC0\x12\x54\x4E\xC9\x20\x92"
-			  "\x11\x00\x10\x4E\xB3\x7C\xCA\x63"
-			  "\xE5\x3F\xD3\x41\x37\xCD\x74\xB7"
-			  "\xA5\x7C\x61\xB8\x0B\x7A\x7F\x4D"
-			  "\xFE\x96\x7D\x1B\xBE\x60\x37\xB7"
-			  "\x81\x92\x66\x67\x15\x1E\x39\x98"
-			  "\x52\xC0\xF4\x69\xC0\x99\x4F\x5A"
-			  "\x2E\x32\xAD\x7C\x8B\xE9\xAD\x05"
-			  "\x55\xF9\x0A\x1F\x97\x5C\xFA\x2B"
-			  "\xF4\x99\x76\x3A\x6E\x4D\xE1\x4C"
-			  "\x14\x4E\x6F\x87\xEE\x1A\x85\xA3"
-			  "\x96\xC6\x66\x49\xDA\x0D\x71\xAC"
-			  "\x04\x05\x46\xD3\x90\x0F\x64\x64"
-			  "\x01\x66\x2C\x62\x5D\x34\xD1\xCB"
-			  "\x3A\x24\xCE\x95\xEF\xAE\x2C\x97"
-			  "\x0E\x0C\x1D\x36\x49\xEB\xE9\x3D"
-			  "\x62\xA6\x19\x28\x9E\x26\xB4\x3F"
-			  "\xD7\x55\x42\x3C\xCD\x72\x0A\xF0"
-			  "\x7D\xE9\x95\x45\x86\xED\xB1\xE0"
-			  "\x8D\xE9\xC5\x86\x13\x24\x28\x7D"
-			  "\x74\xEF\xCA\x50\x12\x7E\x64\x8F"
-			  "\x1B\xF5\x5B\xFE\xE2\xAC\xFA\xE7"
-			  "\xBD\x38\x8C\x11\x20\xEF\xB1\xAA"
-			  "\x7B\xE5\xE5\x78\xAD\x9D\x2D\xA2"
-			  "\x8E\xDD\x48\xB3\xEF\x18\x92\x7E"
-			  "\xE6\x75\x0D\x54\x64\x11\xA3\x3A"
-			  "\xDB\x97\x0F\xD3\xDF\x07\xD3\x7E"
-			  "\x1E\xD1\x87\xE4\x74\xBB\x46\xF4"
-			  "\xBA\x23\x2D\x8D\x29\x07\x12\xCF"
-			  "\x34\xCD\x72\x7F\x01\x30\xE7\xA0"
-			  "\xF8\xDD\xA8\x08\xF0\xBC\xB1\xA2"
-			  "\xCC\xE1\x6B\x5F\xBE\xEA\xF1\xE4"
-			  "\x02\xC4\xAF\xFA\xAD\x31\xF4\xBF"
-			  "\xFC\x66\xAA\x37\xF2\x37\x39\x6B"
-			  "\xBC\x08\x3A\xA2\x29\xB3\xDF\xD1",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -12563,86 +9157,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec serpent_ctr_enc_tv_template[] = {
-	{ /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
+		.ctext	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
 			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
 			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
 			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
@@ -12704,7 +9219,7 @@
 			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
 			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
 			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13",
-		.rlen	= 496,
+		.len	= 496,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -12713,7 +9228,7 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -12776,8 +9291,7 @@
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
 			  "\x2B\xC2\x59",
-		.ilen	= 499,
-		.result	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
+		.ctext	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
 			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
 			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
 			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
@@ -12840,7 +9354,7 @@
 			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
 			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13"
 			  "\x38\xE2\xE5",
-		.rlen	= 499,
+		.len	= 499,
 		.also_non_np = 1,
 		.np	= 2,
 		.tap	= { 499 - 16, 16 },
@@ -12852,7 +9366,7 @@
 		.klen	= 32,
 		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
 			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -12914,8 +9428,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\x06\x9A\xF8\xB4\x53\x88\x62\xFC"
+		.ctext	= "\x06\x9A\xF8\xB4\x53\x88\x62\xFC"
 			  "\x68\xB8\x2E\xDF\xC1\x05\x0F\x3D"
 			  "\xAF\x4D\x95\xAE\xC4\xE9\x1C\xDC"
 			  "\xF6\x2B\x8F\x90\x89\xF6\x7E\x1A"
@@ -12977,422 +9490,11 @@
 			  "\x7F\x67\x9D\xB7\x2C\xCC\xF5\x17"
 			  "\x2B\x89\xAC\xB0\xD7\x1E\x47\xB0"
 			  "\x61\xAF\xD4\x63\x6D\xB8\x2D\x20",
-		.rlen	= 496,
+		.len	= 496,
 	},
 };
 
-static const struct cipher_testvec serpent_ctr_dec_tv_template[] = {
-	{ /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
-			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
-			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
-			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
-			  "\xE0\x4A\x73\x00\x65\xB6\x1A\x0D"
-			  "\x5C\x60\xDF\x34\xDC\x60\x4C\xDF"
-			  "\xB5\x1F\x26\x8C\xDA\xC1\x11\xA8"
-			  "\x80\xFA\x37\x7A\x89\xAA\xAE\x7B"
-			  "\x92\x6E\xB9\xDC\xC9\x62\x4F\x88"
-			  "\x0A\x5D\x97\x2F\x6B\xAC\x03\x7C"
-			  "\x22\xF6\x55\x5A\xFA\x35\xA5\x17"
-			  "\xA1\x5C\x5E\x2B\x63\x2D\xB9\x91"
-			  "\x3E\x83\x26\x00\x4E\xD5\xBE\xCE"
-			  "\x79\xC4\x3D\xFC\x70\xA0\xAD\x96"
-			  "\xBA\x58\x2A\x1C\xDF\xC2\x3A\xA5"
-			  "\x7C\xB5\x12\x89\xED\xBF\xB6\x09"
-			  "\x13\x4F\x7D\x61\x3C\x5C\x27\xFC"
-			  "\x5D\xE1\x4F\xA1\xEA\xB3\xCA\xB9"
-			  "\xE6\xD0\x97\x81\xDE\xD1\xFB\x8A"
-			  "\x30\xDB\xA3\x5D\xEC\x25\x0B\x86"
-			  "\x71\xC8\xA7\x67\xE8\xBC\x7D\x4C"
-			  "\xAE\x82\xD3\x73\x31\x09\xCB\xB3"
-			  "\x4D\xD4\xC0\x8A\x2B\xFA\xA6\x55"
-			  "\x39\x0A\xBC\x6E\x75\xAB\xC2\xE2"
-			  "\x8A\xF2\x26\xCD\x63\x38\x35\xF7"
-			  "\xAE\x12\x83\xCD\x8A\x9E\x7E\x4C"
-			  "\xFE\x4D\xD7\xCE\x5C\x6E\x4C\xAF"
-			  "\xE3\xCD\x76\xA7\x87\xA1\x54\x7C"
-			  "\xEC\x32\xC7\x83\x2A\xFF\xF8\xEA"
-			  "\x87\xB2\x47\xA3\x9D\xC2\x9C\xA2"
-			  "\xB7\x2C\x7C\x1A\x24\xCB\x88\x61"
-			  "\xFF\xA7\x1A\x16\x01\xDD\x4B\xFC"
-			  "\x2E\xE0\x48\x67\x09\x42\xCC\x91"
-			  "\xBE\x20\x38\xC0\x5E\x3B\x95\x00"
-			  "\xA1\x96\x66\x0B\x8A\xE9\x9E\xF7"
-			  "\x6B\x34\x0A\x51\xC0\x3B\xEB\x71"
-			  "\x07\x97\x38\x4B\x5C\x56\x98\x67"
-			  "\x78\x9C\xD0\x0E\x2B\xB5\x67\x90"
-			  "\x75\xF8\xFE\x6D\x4E\x85\xCC\x0D"
-			  "\x18\x06\x15\x9D\x5A\x10\x13\x37"
-			  "\xA3\xD6\x68\xA2\xDF\x7E\xC7\x12"
-			  "\xC9\x0D\x4D\x91\xB0\x2A\x55\xFF"
-			  "\x6F\x73\x13\xDF\x28\xB5\x2A\x2C"
-			  "\xE4\xFC\x20\xD9\xF1\x7A\x82\xB1"
-			  "\xCB\x57\xB6\x3D\x8C\xF4\x8E\x27"
-			  "\x37\xDC\x35\xF3\x79\x01\x53\xA4"
-			  "\x7B\x37\xDE\x7C\x04\xAE\x50\xDB"
-			  "\x9B\x1E\x8C\x07\xA7\x52\x49\x50"
-			  "\x34\x25\x65\xDD\xA9\x8F\x7E\xBD"
-			  "\x7A\xC9\x36\xAE\xDE\x21\x48\x64"
-			  "\xC2\x02\xBA\xBE\x11\x1E\x3D\x9C"
-			  "\x98\x52\xCC\x04\xBD\x5E\x61\x26"
-			  "\x10\xD3\x21\xD9\x6E\x25\x98\x77"
-			  "\x8E\x98\x63\xF6\xF6\x52\xFB\x13"
-			  "\xAA\x30\xF2\xB9\xA4\x43\x53\x39"
-			  "\x1C\x97\x07\x7E\x6B\xFF\x3D\x43"
-			  "\xA6\x71\x6B\x66\x8F\x58\x3F\x71"
-			  "\x90\x47\x40\x92\xE6\x69\xD1\x96"
-			  "\x34\xB3\x3B\xE5\x43\xE4\xD5\x56"
-			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
-			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
-			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x84\x68\xEC\xF2\x1C\x88\x20\xCA"
-			  "\x37\x69\xE3\x3A\x22\x85\x48\x46"
-			  "\x70\xAA\x25\xB4\xCD\x8B\x04\x4E"
-			  "\x8D\x15\x2B\x98\xDF\x7B\x6D\xB9"
-			  "\xE0\x4A\x73\x00\x65\xB6\x1A\x0D"
-			  "\x5C\x60\xDF\x34\xDC\x60\x4C\xDF"
-			  "\xB5\x1F\x26\x8C\xDA\xC1\x11\xA8"
-			  "\x80\xFA\x37\x7A\x89\xAA\xAE\x7B"
-			  "\x92\x6E\xB9\xDC\xC9\x62\x4F\x88"
-			  "\x0A\x5D\x97\x2F\x6B\xAC\x03\x7C"
-			  "\x22\xF6\x55\x5A\xFA\x35\xA5\x17"
-			  "\xA1\x5C\x5E\x2B\x63\x2D\xB9\x91"
-			  "\x3E\x83\x26\x00\x4E\xD5\xBE\xCE"
-			  "\x79\xC4\x3D\xFC\x70\xA0\xAD\x96"
-			  "\xBA\x58\x2A\x1C\xDF\xC2\x3A\xA5"
-			  "\x7C\xB5\x12\x89\xED\xBF\xB6\x09"
-			  "\x13\x4F\x7D\x61\x3C\x5C\x27\xFC"
-			  "\x5D\xE1\x4F\xA1\xEA\xB3\xCA\xB9"
-			  "\xE6\xD0\x97\x81\xDE\xD1\xFB\x8A"
-			  "\x30\xDB\xA3\x5D\xEC\x25\x0B\x86"
-			  "\x71\xC8\xA7\x67\xE8\xBC\x7D\x4C"
-			  "\xAE\x82\xD3\x73\x31\x09\xCB\xB3"
-			  "\x4D\xD4\xC0\x8A\x2B\xFA\xA6\x55"
-			  "\x39\x0A\xBC\x6E\x75\xAB\xC2\xE2"
-			  "\x8A\xF2\x26\xCD\x63\x38\x35\xF7"
-			  "\xAE\x12\x83\xCD\x8A\x9E\x7E\x4C"
-			  "\xFE\x4D\xD7\xCE\x5C\x6E\x4C\xAF"
-			  "\xE3\xCD\x76\xA7\x87\xA1\x54\x7C"
-			  "\xEC\x32\xC7\x83\x2A\xFF\xF8\xEA"
-			  "\x87\xB2\x47\xA3\x9D\xC2\x9C\xA2"
-			  "\xB7\x2C\x7C\x1A\x24\xCB\x88\x61"
-			  "\xFF\xA7\x1A\x16\x01\xDD\x4B\xFC"
-			  "\x2E\xE0\x48\x67\x09\x42\xCC\x91"
-			  "\xBE\x20\x38\xC0\x5E\x3B\x95\x00"
-			  "\xA1\x96\x66\x0B\x8A\xE9\x9E\xF7"
-			  "\x6B\x34\x0A\x51\xC0\x3B\xEB\x71"
-			  "\x07\x97\x38\x4B\x5C\x56\x98\x67"
-			  "\x78\x9C\xD0\x0E\x2B\xB5\x67\x90"
-			  "\x75\xF8\xFE\x6D\x4E\x85\xCC\x0D"
-			  "\x18\x06\x15\x9D\x5A\x10\x13\x37"
-			  "\xA3\xD6\x68\xA2\xDF\x7E\xC7\x12"
-			  "\xC9\x0D\x4D\x91\xB0\x2A\x55\xFF"
-			  "\x6F\x73\x13\xDF\x28\xB5\x2A\x2C"
-			  "\xE4\xFC\x20\xD9\xF1\x7A\x82\xB1"
-			  "\xCB\x57\xB6\x3D\x8C\xF4\x8E\x27"
-			  "\x37\xDC\x35\xF3\x79\x01\x53\xA4"
-			  "\x7B\x37\xDE\x7C\x04\xAE\x50\xDB"
-			  "\x9B\x1E\x8C\x07\xA7\x52\x49\x50"
-			  "\x34\x25\x65\xDD\xA9\x8F\x7E\xBD"
-			  "\x7A\xC9\x36\xAE\xDE\x21\x48\x64"
-			  "\xC2\x02\xBA\xBE\x11\x1E\x3D\x9C"
-			  "\x98\x52\xCC\x04\xBD\x5E\x61\x26"
-			  "\x10\xD3\x21\xD9\x6E\x25\x98\x77"
-			  "\x8E\x98\x63\xF6\xF6\x52\xFB\x13"
-			  "\xAA\x30\xF2\xB9\xA4\x43\x53\x39"
-			  "\x1C\x97\x07\x7E\x6B\xFF\x3D\x43"
-			  "\xA6\x71\x6B\x66\x8F\x58\x3F\x71"
-			  "\x90\x47\x40\x92\xE6\x69\xD1\x96"
-			  "\x34\xB3\x3B\xE5\x43\xE4\xD5\x56"
-			  "\xB2\xE6\x7E\x86\x7A\x12\x17\x5B"
-			  "\x30\xF3\x9B\x0D\xFA\x57\xE4\x50"
-			  "\x40\x53\x77\x8C\x15\xF8\x8D\x13"
-			  "\x38\xE2\xE5",
-		.ilen	= 499,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59",
-		.rlen	= 499,
-		.also_non_np = 1,
-		.np	= 2,
-		.tap	= { 499 - 16, 16 },
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x06\x9A\xF8\xB4\x53\x88\x62\xFC"
-			  "\x68\xB8\x2E\xDF\xC1\x05\x0F\x3D"
-			  "\xAF\x4D\x95\xAE\xC4\xE9\x1C\xDC"
-			  "\xF6\x2B\x8F\x90\x89\xF6\x7E\x1A"
-			  "\xA6\xB9\xE4\xF4\xFA\xCA\xE5\x7E"
-			  "\x71\x28\x06\x4F\xE8\x08\x39\xDA"
-			  "\xA5\x0E\xC8\xC0\xB8\x16\xE5\x69"
-			  "\xE5\xCA\xEC\x4F\x63\x2C\xC0\x9B"
-			  "\x9F\x3E\x39\x79\xF0\xCD\x64\x35"
-			  "\x4A\xD3\xC8\xA9\x31\xCD\x48\x5B"
-			  "\x92\x3D\x8F\x3F\x96\xBD\xB3\x18"
-			  "\x74\x2A\x5D\x29\x3F\x57\x8F\xE2"
-			  "\x67\x9A\xE0\xE5\xD4\x4A\xE2\x47"
-			  "\xBC\xF6\xEB\x14\xF3\x8C\x20\xC2"
-			  "\x7D\xE2\x43\x81\x86\x72\x2E\xB1"
-			  "\x39\xF6\x95\xE1\x1F\xCB\x76\x33"
-			  "\x5B\x7D\x23\x0F\x3A\x67\x2A\x2F"
-			  "\xB9\x37\x9D\xDD\x1F\x16\xA1\x3C"
-			  "\x70\xFE\x52\xAA\x93\x3C\xC4\x46"
-			  "\xB1\xE5\xFF\xDA\xAF\xE2\x84\xFE"
-			  "\x25\x92\xB2\x63\xBD\x49\x77\xB4"
-			  "\x22\xA4\x6A\xD5\x04\xE0\x45\x58"
-			  "\x1C\x34\x96\x7C\x03\x0C\x13\xA2"
-			  "\x05\x22\xE2\xCB\x5A\x35\x03\x09"
-			  "\x40\xD2\x82\x05\xCA\x58\x73\xF2"
-			  "\x29\x5E\x01\x47\x13\x32\x78\xBE"
-			  "\x06\xB0\x51\xDB\x6C\x31\xA0\x1C"
-			  "\x74\xBC\x8D\x25\xDF\xF8\x65\xD1"
-			  "\x38\x35\x11\x26\x4A\xB4\x06\x32"
-			  "\xFA\xD2\x07\x77\xB3\x74\x98\x80"
-			  "\x61\x59\xA8\x9F\xF3\x6F\x2A\xBF"
-			  "\xE6\xA5\x9A\xC4\x6B\xA6\x49\x6F"
-			  "\xBC\x47\xD9\xFB\xC6\xEF\x25\x65"
-			  "\x96\xAC\x9F\xE4\x81\x4B\xD8\xBA"
-			  "\xD6\x9B\xC9\x6D\x58\x40\x81\x02"
-			  "\x73\x44\x4E\x43\x6E\x37\xBB\x11"
-			  "\xE3\xF9\xB8\x2F\xEC\x76\x34\xEA"
-			  "\x90\xCD\xB7\x2E\x0E\x32\x71\xE8"
-			  "\xBB\x4E\x0B\x98\xA4\x17\x17\x5B"
-			  "\x07\xB5\x82\x3A\xC4\xE8\x42\x51"
-			  "\x5A\x4C\x4E\x7D\xBF\xC4\xC0\x4F"
-			  "\x68\xB8\xC6\x4A\x32\x6F\x0B\xD7"
-			  "\x85\xED\x6B\xFB\x72\xD2\xA5\x8F"
-			  "\xBF\xF9\xAC\x59\x50\xA8\x08\x70"
-			  "\xEC\xBD\x0A\xBF\xE5\x87\xA1\xC2"
-			  "\x92\x14\x78\xAF\xE8\xEA\x2E\xDD"
-			  "\xC1\x03\x9A\xAA\x89\x8B\x32\x46"
-			  "\x5B\x18\x27\xBA\x46\xAA\x64\xDE"
-			  "\xE3\xD5\xA3\xFC\x7B\x5B\x61\xDB"
-			  "\x7E\xDA\xEC\x30\x17\x19\xF8\x80"
-			  "\xB5\x5E\x27\xB5\x37\x3A\x1F\x28"
-			  "\x07\x73\xC3\x63\xCE\xFF\x8C\xFE"
-			  "\x81\x4E\xF8\x24\xF3\xB8\xC7\xE8"
-			  "\x16\x9A\xCC\x58\x2F\x88\x1C\x4B"
-			  "\xBB\x33\xA2\x73\xF0\x1C\x89\x0E"
-			  "\xDC\x34\x27\x89\x98\xCE\x1C\xA2"
-			  "\xD8\xB8\x90\xBE\xEC\x72\x28\x13"
-			  "\xAC\x7B\xF1\xD0\x7F\x7A\x28\x50"
-			  "\xB7\x99\x65\x8A\xC9\xC6\x21\x34"
-			  "\x7F\x67\x9D\xB7\x2C\xCC\xF5\x17"
-			  "\x2B\x89\xAC\xB0\xD7\x1E\x47\xB0"
-			  "\x61\xAF\xD4\x63\x6D\xB8\x2D\x20",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-	},
-};
-
-static const struct cipher_testvec serpent_lrw_enc_tv_template[] = {
+static const struct cipher_testvec serpent_lrw_tv_template[] = {
 	/* Generated from AES-LRW test vectors */
 	{
 		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
@@ -13402,12 +9504,11 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x6f\xbf\xd4\xa4\x5d\x71\x16\x79"
+		.ctext	= "\x6f\xbf\xd4\xa4\x5d\x71\x16\x79"
 			  "\x63\x9c\xa6\x8e\x40\xbe\x0d\x8a",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
 			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
@@ -13416,12 +9517,11 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\xfd\xb2\x66\x98\x80\x96\x55\xad"
+		.ctext	= "\xfd\xb2\x66\x98\x80\x96\x55\xad"
 			  "\x08\x94\x54\x9c\x21\x7c\x69\xe3",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
 			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
@@ -13430,12 +9530,11 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x14\x5e\x3d\x70\xc0\x6e\x9c\x34"
+		.ctext	= "\x14\x5e\x3d\x70\xc0\x6e\x9c\x34"
 			  "\x5b\x5e\xcf\x0f\xe4\x8c\x21\x5c",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
 			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
@@ -13445,12 +9544,11 @@
 		.klen	= 40,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x25\x39\xaa\xa5\xf0\x65\xc8\xdc"
+		.ctext	= "\x25\x39\xaa\xa5\xf0\x65\xc8\xdc"
 			  "\x5d\x45\x95\x30\x8f\xff\x2f\x1b",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
 			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
@@ -13460,12 +9558,11 @@
 		.klen	= 40,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x0c\x20\x20\x63\xd6\x8b\xfc\x8f"
+		.ctext	= "\x0c\x20\x20\x63\xd6\x8b\xfc\x8f"
 			  "\xc0\xe2\x17\xbb\xd2\x59\x6f\x26",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
 			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
@@ -13476,12 +9573,11 @@
 		.klen	= 48,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\xc1\x35\x2e\x53\xf0\x96\x4d\x9c"
+		.ctext	= "\xc1\x35\x2e\x53\xf0\x96\x4d\x9c"
 			  "\x2e\x18\xe6\x99\xcd\xd3\x15\x68",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
 			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
@@ -13492,12 +9588,11 @@
 		.klen	= 48,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x86\x0a\xc6\xa9\x1a\x9f\xe7\xe6"
+		.ctext	= "\x86\x0a\xc6\xa9\x1a\x9f\xe7\xe6"
 			  "\x64\x3b\x33\xd6\xd5\x84\xd6\xdf",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
 			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
@@ -13508,7 +9603,7 @@
 		.klen	= 48,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
+		.ptext	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
 			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
 			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
 			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
@@ -13572,8 +9667,7 @@
 			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
 			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
 			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
-		.ilen	= 512,
-		.result	= "\xe3\x5a\x38\x0f\x4d\x92\x3a\x74"
+		.ctext	= "\xe3\x5a\x38\x0f\x4d\x92\x3a\x74"
 			  "\x15\xb1\x50\x8c\x9a\xd8\x99\x1d"
 			  "\x82\xec\xf1\x5f\x03\x6d\x02\x58"
 			  "\x90\x67\xfc\xdd\x8d\xe1\x38\x08"
@@ -13637,267 +9731,14 @@
 			  "\x1b\x2b\x7f\xc3\x2f\x1a\x78\x0a"
 			  "\x5c\xc6\x84\xfe\x7c\xcb\x26\xfd"
 			  "\xd9\x51\x0f\xd7\x94\x2f\xc5\xa7",
-		.rlen	= 512,
+		.len	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec serpent_lrw_dec_tv_template[] = {
-	/* Generated from AES-LRW test vectors */
-	/* same as enc vectors with input and result reversed */
-	{
-		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
-			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
-			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
-			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x6f\xbf\xd4\xa4\x5d\x71\x16\x79"
-			  "\x63\x9c\xa6\x8e\x40\xbe\x0d\x8a",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
-			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
-			  "\x0d\x48\xf0\xb7\xb1\x5a\x53\xea"
-			  "\x1c\xaa\x6b\x29\xc2\xca\xfb\xaf",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input	= "\xfd\xb2\x66\x98\x80\x96\x55\xad"
-			  "\x08\x94\x54\x9c\x21\x7c\x69\xe3",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
-			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
-			  "\xcd\xf9\x0b\x16\x0c\x64\x8f\xb6"
-			  "\xb0\x0d\x0d\x1b\xae\x85\x87\x1f",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x14\x5e\x3d\x70\xc0\x6e\x9c\x34"
-			  "\x5b\x5e\xcf\x0f\xe4\x8c\x21\x5c",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
-			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
-			  "\xca\xc6\xbc\x35\x4d\x4a\x65\x54"
-			  "\x90\xae\x61\xcf\x7b\xae\xbd\xcc"
-			  "\xad\xe4\x94\xc5\x4a\x29\xae\x70",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x25\x39\xaa\xa5\xf0\x65\xc8\xdc"
-			  "\x5d\x45\x95\x30\x8f\xff\x2f\x1b",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
-			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
-			  "\xa0\x19\x07\xc0\x9d\xf7\xbb\xdd"
-			  "\x52\x13\xb2\xb7\xf0\xff\x11\xd8"
-			  "\xd6\x08\xd0\xcd\x2e\xb1\x17\x6f",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x0c\x20\x20\x63\xd6\x8b\xfc\x8f"
-			  "\xc0\xe2\x17\xbb\xd2\x59\x6f\x26",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\xc1\x35\x2e\x53\xf0\x96\x4d\x9c"
-			  "\x2e\x18\xe6\x99\xcd\xd3\x15\x68",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
-			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
-			  "\xb2\xfb\x64\xce\x60\x97\x87\x8d"
-			  "\x17\xfc\xe4\x5a\x49\xe8\x30\xb7"
-			  "\x6e\x78\x17\xe7\x2d\x5e\x12\xd4"
-			  "\x60\x64\x04\x7a\xf1\x2f\x9e\x0c",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x86\x0a\xc6\xa9\x1a\x9f\xe7\xe6"
-			  "\x64\x3b\x33\xd6\xd5\x84\xd6\xdf",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\xe3\x5a\x38\x0f\x4d\x92\x3a\x74"
-			  "\x15\xb1\x50\x8c\x9a\xd8\x99\x1d"
-			  "\x82\xec\xf1\x5f\x03\x6d\x02\x58"
-			  "\x90\x67\xfc\xdd\x8d\xe1\x38\x08"
-			  "\x7b\xc9\x9b\x4b\x04\x09\x50\x15"
-			  "\xce\xab\xda\x33\x30\x20\x12\xfa"
-			  "\x83\xc4\xa6\x9a\x2e\x7d\x90\xd9"
-			  "\xa6\xa6\x67\x43\xb4\xa7\xa8\x5c"
-			  "\xbb\x6a\x49\x2b\x8b\xf8\xd0\x22"
-			  "\xe5\x9e\xba\xe8\x8c\x67\xb8\x5b"
-			  "\x60\xbc\xf5\xa4\x95\x4e\x66\xe5"
-			  "\x6d\x8e\xa9\xf6\x65\x2e\x04\xf5"
-			  "\xba\xb5\xdb\x88\xc2\xf6\x7a\x4b"
-			  "\x89\x58\x7c\x9a\xae\x26\xe8\xb7"
-			  "\xb7\x28\xcc\xd6\xcc\xa5\x98\x4d"
-			  "\xb9\x91\xcb\xb4\xe4\x8b\x96\x47"
-			  "\x5f\x03\x8b\xdd\x94\xd1\xee\x12"
-			  "\xa7\x83\x80\xf2\xc1\x15\x74\x4f"
-			  "\x49\xf9\xb0\x7e\x6f\xdc\x73\x2f"
-			  "\xe2\xcf\xe0\x1b\x34\xa5\xa0\x52"
-			  "\xfb\x3c\x5d\x85\x91\xe6\x6d\x98"
-			  "\x04\xd6\xdd\x4c\x00\x64\xd9\x54"
-			  "\x5c\x3c\x08\x1d\x4c\x06\x9f\xb8"
-			  "\x1c\x4d\x8d\xdc\xa4\x3c\xb9\x3b"
-			  "\x9e\x85\xce\xc3\xa8\x4a\x0c\xd9"
-			  "\x04\xc3\x6f\x17\x66\xa9\x1f\x59"
-			  "\xd9\xe2\x19\x36\xa3\x88\xb8\x0b"
-			  "\x0f\x4a\x4d\xf8\xc8\x6f\xd5\x43"
-			  "\xeb\xa0\xab\x1f\x61\xc0\x06\xeb"
-			  "\x93\xb7\xb8\x6f\x0d\xbd\x07\x49"
-			  "\xb3\xac\x5d\xcf\x31\xa0\x27\x26"
-			  "\x21\xbe\x94\x2e\x19\xea\xf4\xee"
-			  "\xb5\x13\x89\xf7\x94\x0b\xef\x59"
-			  "\x44\xc5\x78\x8b\x3c\x3b\x71\x20"
-			  "\xf9\x35\x0c\x70\x74\xdc\x5b\xc2"
-			  "\xb4\x11\x0e\x2c\x61\xa1\x52\x46"
-			  "\x18\x11\x16\xc6\x86\x44\xa7\xaf"
-			  "\xd5\x0c\x7d\xa6\x9e\x25\x2d\x1b"
-			  "\x9a\x8f\x0f\xf8\x6a\x61\xa0\xea"
-			  "\x3f\x0e\x90\xd6\x8f\x83\x30\x64"
-			  "\xb5\x51\x2d\x08\x3c\xcd\x99\x36"
-			  "\x96\xd4\xb1\xb5\x48\x30\xca\x48"
-			  "\xf7\x11\xa8\xf5\x97\x8a\x6a\x6d"
-			  "\x12\x33\x2f\xc0\xe8\xda\xec\x8a"
-			  "\xe1\x88\x72\x63\xde\x20\xa3\xe1"
-			  "\x8e\xac\x84\x37\x35\xf5\xf7\x3f"
-			  "\x00\x02\x0e\xe4\xc1\x53\x68\x3f"
-			  "\xaa\xd5\xac\x52\x3d\x20\x2f\x4d"
-			  "\x7c\x83\xd0\xbd\xaa\x97\x35\x36"
-			  "\x98\x88\x59\x5d\xe7\x24\xe3\x90"
-			  "\x9d\x30\x47\xa7\xc3\x60\x35\xf4"
-			  "\xd5\xdb\x0e\x4d\x44\xc1\x81\x8b"
-			  "\xfd\xbd\xc3\x2b\xba\x68\xfe\x8d"
-			  "\x49\x5a\x3c\x8a\xa3\x01\xae\x25"
-			  "\x42\xab\xd2\x87\x1b\x35\xd6\xd2"
-			  "\xd7\x70\x1c\x1f\x72\xd1\xe1\x39"
-			  "\x1c\x58\xa2\xb4\xd0\x78\x55\x72"
-			  "\x76\x59\xea\xd9\xd7\x6e\x63\x8b"
-			  "\xcc\x9b\xa7\x74\x89\xfc\xa3\x68"
-			  "\x86\x28\xd1\xbb\x54\x8d\x66\xad"
-			  "\x2a\x92\xf9\x4e\x04\x3d\xae\xfd"
-			  "\x1b\x2b\x7f\xc3\x2f\x1a\x78\x0a"
-			  "\x5c\xc6\x84\xfe\x7c\xcb\x26\xfd"
-			  "\xd9\x51\x0f\xd7\x94\x2f\xc5\xa7",
-		.ilen	= 512,
-		.result	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
-			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
-			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
-			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
-			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
-			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
-			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
-			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
-			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
-			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
-			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
-			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
-			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
-			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
-			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
-			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
-			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
-			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
-			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
-			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
-			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
-			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
-			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
-			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
-			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
-			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
-			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
-			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
-			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
-			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
-			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
-			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
-			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
-			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
-			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
-			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
-			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
-			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
-			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
-			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
-			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
-			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
-			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
-			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
-			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
-			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
-			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
-			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
-			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
-			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
-			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
-			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
-			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
-			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
-			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
-			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
-			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
-			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
-			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
-			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
-			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
-			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
-			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
-			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec serpent_xts_enc_tv_template[] = {
+static const struct cipher_testvec serpent_xts_tv_template[] = {
 	/* Generated from AES-XTS test vectors */
 	{
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -13907,16 +9748,15 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 32,
-		.result	= "\xe1\x08\xb8\x1d\x2c\xf5\x33\x64"
+		.ctext	= "\xe1\x08\xb8\x1d\x2c\xf5\x33\x64"
 			  "\xc8\x12\x04\xc7\xb3\x70\xe8\xc4"
 			  "\x6a\x31\xc5\xf3\x00\xca\xb9\x16"
 			  "\xde\xe2\x77\x66\xf7\xfe\x62\x08",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
 			  "\x11\x11\x11\x11\x11\x11\x11\x11"
@@ -13925,16 +9765,15 @@
 		.klen	= 32,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\x1a\x0a\x09\x5f\xcd\x07\x07\x98"
+		.ctext	= "\x1a\x0a\x09\x5f\xcd\x07\x07\x98"
 			  "\x41\x86\x12\xaf\xb3\xd7\x68\x13"
 			  "\xed\x81\xcd\x06\x87\x43\x1a\xbb"
 			  "\x13\x3d\xd6\x1e\x2b\xe1\x77\xbe",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
 			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
@@ -13943,16 +9782,15 @@
 		.klen	= 32,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\xf9\x9b\x28\xb8\x5c\xaf\x8c\x61"
+		.ctext	= "\xf9\x9b\x28\xb8\x5c\xaf\x8c\x61"
 			  "\xb6\x1c\x81\x8f\x2c\x87\x60\x89"
 			  "\x0d\x8d\x7a\xe8\x60\x48\xcc\x86"
 			  "\xc1\x68\x45\xaa\x00\xe9\x24\xc5",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -13961,7 +9799,7 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -14025,8 +9863,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\xfe\x47\x4a\xc8\x60\x7e\xb4\x8b"
+		.ctext	= "\xfe\x47\x4a\xc8\x60\x7e\xb4\x8b"
 			  "\x0d\x10\xf4\xb0\x0d\xba\xf8\x53"
 			  "\x65\x6e\x38\x4b\xdb\xaa\xb1\x9e"
 			  "\x28\xca\xb0\x22\xb3\x85\x75\xf4"
@@ -14090,7 +9927,7 @@
 			  "\xef\x91\x64\x1d\x18\x07\x4e\x31"
 			  "\x88\x21\x7c\xb0\xa5\x12\x4c\x3c"
 			  "\xb0\x20\xbd\xda\xdf\xf9\x7c\xdd",
-		.rlen	= 512,
+		.len	= 512,
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -14103,7 +9940,7 @@
 		.klen	= 64,
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -14167,8 +10004,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\x2b\xc9\xb4\x6b\x10\x94\xa9\x32"
+		.ctext	= "\x2b\xc9\xb4\x6b\x10\x94\xa9\x32"
 			  "\xaa\xb0\x20\xc6\x44\x3d\x74\x1f"
 			  "\x75\x01\xa7\xf6\xf5\xf7\x62\x1b"
 			  "\x80\x1b\x82\xcb\x01\x59\x91\x7f"
@@ -14232,350 +10068,7 @@
 			  "\x30\x05\xc8\x92\x98\x80\xff\x7a"
 			  "\xaf\x43\x0b\xc5\x20\x41\x92\x20"
 			  "\xd4\xa0\x91\x98\x11\x5f\x4d\xb1",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec serpent_xts_dec_tv_template[] = {
-	/* Generated from AES-XTS test vectors */
-	/* same as enc vectors with input and result reversed */
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xe1\x08\xb8\x1d\x2c\xf5\x33\x64"
-			  "\xc8\x12\x04\xc7\xb3\x70\xe8\xc4"
-			  "\x6a\x31\xc5\xf3\x00\xca\xb9\x16"
-			  "\xde\xe2\x77\x66\xf7\xfe\x62\x08",
-		.ilen	= 32,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 32,
-	}, {
-		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x1a\x0a\x09\x5f\xcd\x07\x07\x98"
-			  "\x41\x86\x12\xaf\xb3\xd7\x68\x13"
-			  "\xed\x81\xcd\x06\x87\x43\x1a\xbb"
-			  "\x13\x3d\xd6\x1e\x2b\xe1\x77\xbe",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
-			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xf9\x9b\x28\xb8\x5c\xaf\x8c\x61"
-			  "\xb6\x1c\x81\x8f\x2c\x87\x60\x89"
-			  "\x0d\x8d\x7a\xe8\x60\x48\xcc\x86"
-			  "\xc1\x68\x45\xaa\x00\xe9\x24\xc5",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xfe\x47\x4a\xc8\x60\x7e\xb4\x8b"
-			  "\x0d\x10\xf4\xb0\x0d\xba\xf8\x53"
-			  "\x65\x6e\x38\x4b\xdb\xaa\xb1\x9e"
-			  "\x28\xca\xb0\x22\xb3\x85\x75\xf4"
-			  "\x00\x5c\x75\x14\x06\xd6\x25\x82"
-			  "\xe6\xcb\x08\xf7\x29\x90\x23\x8e"
-			  "\xa4\x68\x57\xe4\xf0\xd8\x32\xf3"
-			  "\x80\x51\x67\xb5\x0b\x85\x69\xe8"
-			  "\x19\xfe\xc4\xc7\x3e\xea\x90\xd3"
-			  "\x8f\xa3\xf2\x0a\xac\x17\x4b\xa0"
-			  "\x63\x5a\x16\x0f\xf0\xce\x66\x1f"
-			  "\x2c\x21\x07\xf1\xa4\x03\xa3\x44"
-			  "\x41\x61\x87\x5d\x6b\xb3\xef\xd4"
-			  "\xfc\xaa\x32\x7e\x55\x58\x04\x41"
-			  "\xc9\x07\x33\xc6\xa2\x68\xd6\x5a"
-			  "\x55\x79\x4b\x6f\xcf\x89\xb9\x19"
-			  "\xe5\x54\x13\x15\xb2\x1a\xfa\x15"
-			  "\xc2\xf0\x06\x59\xfa\xa0\x25\x05"
-			  "\x58\xfa\x43\x91\x16\x85\x40\xbb"
-			  "\x0d\x34\x4d\xc5\x1e\x20\xd5\x08"
-			  "\xcd\x22\x22\x41\x11\x9f\x6c\x7c"
-			  "\x8d\x57\xc9\xba\x57\xe8\x2c\xf7"
-			  "\xa0\x42\xa8\xde\xfc\xa3\xca\x98"
-			  "\x4b\x43\xb1\xce\x4b\xbf\x01\x67"
-			  "\x6e\x29\x60\xbd\x10\x14\x84\x82"
-			  "\x83\x82\x0c\x63\x73\x92\x02\x7c"
-			  "\x55\x37\x20\x80\x17\x51\xc8\xbc"
-			  "\x46\x02\xcb\x38\x07\x6d\xe2\x85"
-			  "\xaa\x29\xaf\x24\x58\x0d\xf0\x75"
-			  "\x08\x0a\xa5\x34\x25\x16\xf3\x74"
-			  "\xa7\x0b\x97\xbe\xc1\xa9\xdc\x29"
-			  "\x1a\x0a\x56\xc1\x1a\x91\x97\x8c"
-			  "\x0b\xc7\x16\xed\x5a\x22\xa6\x2e"
-			  "\x8c\x2b\x4f\x54\x76\x47\x53\x8e"
-			  "\xe8\x00\xec\x92\xb9\x55\xe6\xa2"
-			  "\xf3\xe2\x4f\x6a\x66\x60\xd0\x87"
-			  "\xe6\xd1\xcc\xe3\x6a\xc5\x2d\x21"
-			  "\xcc\x9d\x6a\xb6\x75\xaa\xe2\x19"
-			  "\x21\x9f\xa1\x5e\x4c\xfd\x72\xf9"
-			  "\x94\x4e\x63\xc7\xae\xfc\xed\x47"
-			  "\xe2\xfe\x7a\x63\x77\xfe\x97\x82"
-			  "\xb1\x10\x6e\x36\x1d\xe1\xc4\x80"
-			  "\xec\x69\x41\xec\xa7\x8a\xe0\x2f"
-			  "\xe3\x49\x26\xa2\x41\xb2\x08\x0f"
-			  "\x28\xb4\xa7\x39\xa1\x99\x2d\x1e"
-			  "\x43\x42\x35\xd0\xcf\xec\x77\x67"
-			  "\xb2\x3b\x9e\x1c\x35\xde\x4f\x5e"
-			  "\x73\x3f\x5d\x6f\x07\x4b\x2e\x50"
-			  "\xab\x6c\x6b\xff\xea\x00\x67\xaa"
-			  "\x0e\x82\x32\xdd\x3d\xb5\xe5\x76"
-			  "\x2b\x77\x3f\xbe\x12\x75\xfb\x92"
-			  "\xc6\x89\x67\x4d\xca\xf7\xd4\x50"
-			  "\xc0\x74\x47\xcc\xd9\x0a\xd4\xc6"
-			  "\x3b\x17\x2e\xe3\x35\xbb\x53\xb5"
-			  "\x86\xad\x51\xcc\xd5\x96\xb8\xdc"
-			  "\x03\x57\xe6\x98\x52\x2f\x61\x62"
-			  "\xc4\x5c\x9c\x36\x71\x07\xfb\x94"
-			  "\xe3\x02\xc4\x2b\x08\x75\xc7\x35"
-			  "\xfb\x2e\x88\x7b\xbb\x67\x00\xe1"
-			  "\xc9\xdd\x99\xb2\x13\x53\x1a\x4e"
-			  "\x76\x87\x19\x04\x1a\x2f\x38\x3e"
-			  "\xef\x91\x64\x1d\x18\x07\x4e\x31"
-			  "\x88\x21\x7c\xb0\xa5\x12\x4c\x3c"
-			  "\xb0\x20\xbd\xda\xdf\xf9\x7c\xdd",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x2b\xc9\xb4\x6b\x10\x94\xa9\x32"
-			  "\xaa\xb0\x20\xc6\x44\x3d\x74\x1f"
-			  "\x75\x01\xa7\xf6\xf5\xf7\x62\x1b"
-			  "\x80\x1b\x82\xcb\x01\x59\x91\x7f"
-			  "\x80\x3a\x98\xf0\xd2\xca\xc4\xc3"
-			  "\x34\xfd\xe6\x11\xf9\x33\x45\x12"
-			  "\x48\xc5\x8c\x25\xf1\xc5\xc5\x23"
-			  "\xd3\x44\xb4\x73\xd5\x04\xc0\xb7"
-			  "\xca\x2f\xf5\xcd\xc5\xb4\xdd\xb0"
-			  "\xf4\x60\xe8\xfb\xc6\x9c\xc5\x78"
-			  "\xcd\xec\x7d\xdc\x19\x9c\x72\x64"
-			  "\x63\x0b\x38\x2e\x76\xdd\x2d\x36"
-			  "\x49\xb0\x1d\xea\x78\x9e\x00\xca"
-			  "\x20\xcc\x1b\x1e\x98\x74\xab\xed"
-			  "\x79\xf7\xd0\x6c\xd8\x93\x80\x29"
-			  "\xac\xa5\x5e\x34\xa9\xab\xa0\x55"
-			  "\x9a\xea\xaa\x95\x4d\x7b\xfe\x46"
-			  "\x26\x8a\xfd\x88\xa2\xa8\xa6\xae"
-			  "\x25\x42\x17\xbf\x76\x8f\x1c\x3d"
-			  "\xec\x9a\xda\x64\x96\xb5\x61\xff"
-			  "\x99\xeb\x12\x96\x85\x82\x9d\xd5"
-			  "\x81\x85\x14\xa8\x59\xac\x8c\x94"
-			  "\xbb\x3b\x85\x2b\xdf\xb3\x0c\xba"
-			  "\x82\xc6\x4d\xca\x86\xea\x53\x28"
-			  "\x4c\xe0\x4e\x31\xe3\x73\x2f\x79"
-			  "\x9d\x42\xe1\x03\xe3\x8b\xc4\xff"
-			  "\x05\xca\x81\x7b\xda\xa2\xde\x63"
-			  "\x3a\x10\xbe\xc2\xac\x32\xc4\x05"
-			  "\x47\x7e\xef\x67\xe2\x5f\x5b\xae"
-			  "\xed\xf1\x70\x34\x16\x9a\x07\x7b"
-			  "\xf2\x25\x2b\xb0\xf8\x3c\x15\x9a"
-			  "\xa6\x59\x55\x5f\xc1\xf4\x1e\xcd"
-			  "\x93\x1f\x06\xba\xd4\x9a\x22\x69"
-			  "\xfa\x8e\x95\x0d\xf3\x23\x59\x2c"
-			  "\xfe\x00\xba\xf0\x0e\xbc\x6d\xd6"
-			  "\x62\xf0\x7a\x0e\x83\x3e\xdb\x32"
-			  "\xfd\x43\x7d\xda\x42\x51\x87\x43"
-			  "\x9d\xf9\xef\xf4\x30\x97\xf8\x09"
-			  "\x88\xfc\x3f\x93\x70\xc1\x4a\xec"
-			  "\x27\x5f\x11\xac\x71\xc7\x48\x46"
-			  "\x2f\xf9\xdf\x8d\x9f\xf7\x2e\x56"
-			  "\x0d\x4e\xb0\x32\x76\xce\x86\x81"
-			  "\xcd\xdf\xe4\x00\xbf\xfd\x5f\x24"
-			  "\xaf\xf7\x9a\xde\xff\x18\xac\x14"
-			  "\x90\xc5\x01\x39\x34\x0f\x24\xf3"
-			  "\x13\x2f\x5e\x4f\x30\x9a\x36\x40"
-			  "\xec\xea\xbc\xcd\x9e\x0e\x5b\x23"
-			  "\x50\x88\x97\x40\x69\xb1\x37\xf5"
-			  "\xc3\x15\xf9\x3f\xb7\x79\x64\xe8"
-			  "\x7b\x10\x20\xb9\x2b\x46\x83\x5b"
-			  "\xd8\x39\xfc\xe4\xfa\x88\x52\xf2"
-			  "\x72\xb0\x97\x4e\x89\xb3\x48\x00"
-			  "\xc1\x16\x73\x50\x77\xba\xa6\x65"
-			  "\x20\x2d\xb0\x02\x27\x89\xda\x99"
-			  "\x45\xfb\xe9\xd3\x1d\x39\x2f\xd6"
-			  "\x2a\xda\x09\x12\x11\xaf\xe6\x57"
-			  "\x01\x04\x8a\xff\x86\x8b\xac\xf8"
-			  "\xee\xe4\x1c\x98\x5b\xcf\x6b\x76"
-			  "\xa3\x0e\x33\x74\x40\x18\x39\x72"
-			  "\x66\x50\x31\xfd\x70\xdf\xe8\x51"
-			  "\x96\x21\x36\xb2\x9b\xfa\x85\xd1"
-			  "\x30\x05\xc8\x92\x98\x80\xff\x7a"
-			  "\xaf\x43\x0b\xc5\x20\x41\x92\x20"
-			  "\xd4\xa0\x91\x98\x11\x5f\x4d\xb1",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
+		.len	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
@@ -14587,17 +10080,16 @@
  * https://tools.ietf.org/html/draft-crypto-sm4-00#ref-GBT.32907-2016
  */
 
-static const struct cipher_testvec sm4_enc_tv_template[] = {
+static const struct cipher_testvec sm4_tv_template[] = {
 	{ /* SM4 Appendix A: Example Calculations. Example 1. */
 		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
 			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
 		.klen	= 16,
-		.input	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
+		.ptext	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
 			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
-		.ilen	= 16,
-		.result	= "\x68\x1E\xDF\x34\xD2\x06\x96\x5E"
+		.ctext	= "\x68\x1E\xDF\x34\xD2\x06\x96\x5E"
 			  "\x86\xB3\xE9\x4F\x53\x6E\x42\x46",
-		.rlen	= 16,
+		.len	= 16,
 	}, { /*
 	      *  SM4 Appendix A: Example Calculations.
 	      *  Last 10 iterations of Example 2.
@@ -14605,7 +10097,7 @@
 		.key    = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
 			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
 		.klen	= 16,
-		.input	= "\x99\x4a\xc3\xe7\xc3\x57\x89\x6a"
+		.ptext	= "\x99\x4a\xc3\xe7\xc3\x57\x89\x6a"
 			  "\x81\xfc\xa8\xe\x38\x3e\xef\x80"
 			  "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
 			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
@@ -14625,8 +10117,7 @@
 			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
 			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
 			  "\x31\x51\xec\x47\xc3\x51\x83\xc1",
-		.ilen	= 160,
-		.result	= "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
+		.ctext	= "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
 			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
 			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
 			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
@@ -14646,70 +10137,7 @@
 			  "\x31\x51\xec\x47\xc3\x51\x83\xc1"
 			  "\x59\x52\x98\xc7\xc6\xfd\x27\x1f"
 			  "\x4\x2\xf8\x4\xc3\x3d\x3f\x66",
-		.rlen	= 160
-	}
-};
-
-static const struct cipher_testvec sm4_dec_tv_template[] = {
-	{ /* SM4 Appendix A: Example Calculations. Example 1. */
-		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
-			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
-		.klen	= 16,
-		.input	= "\x68\x1E\xDF\x34\xD2\x06\x96\x5E"
-			  "\x86\xB3\xE9\x4F\x53\x6E\x42\x46",
-		.ilen	= 16,
-		.result	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
-			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
-		.rlen	= 16,
-	}, { /*
-	      *  SM4 Appendix A: Example Calculations.
-	      *  Last 10 iterations of Example 2.
-	      */
-		.key    = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
-			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
-		.klen	= 16,
-		.input	= "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
-			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
-			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
-			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
-			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
-			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
-			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
-			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
-			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
-			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
-			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
-			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
-			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
-			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
-			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
-			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
-			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
-			  "\x31\x51\xec\x47\xc3\x51\x83\xc1"
-			  "\x59\x52\x98\xc7\xc6\xfd\x27\x1f"
-			  "\x4\x2\xf8\x4\xc3\x3d\x3f\x66",
-		.ilen	= 160,
-		.result	= "\x99\x4a\xc3\xe7\xc3\x57\x89\x6a"
-			  "\x81\xfc\xa8\xe\x38\x3e\xef\x80"
-			  "\xb1\x98\xf2\xde\x3f\x4b\xae\xd1"
-			  "\xf0\xf1\x30\x4c\x1\x27\x5a\x8f"
-			  "\x45\xe1\x39\xb7\xae\xff\x1f\x27"
-			  "\xad\x57\x15\xab\x31\x5d\xc\xef"
-			  "\x8c\xc8\x80\xbd\x11\x98\xf3\x7b"
-			  "\xa2\xdd\x14\x20\xf9\xe8\xbb\x82"
-			  "\xf7\x32\xca\x4b\xa8\xf7\xb3\x4d"
-			  "\x27\xd1\xcd\xe6\xb6\x65\x5a\x23"
-			  "\xc2\xf3\x54\x84\x53\xe3\xb9\x20"
-			  "\xa5\x37\x0\xbe\xe7\x7b\x48\xfb"
-			  "\x21\x3d\x9e\x48\x1d\x9e\xf5\xbf"
-			  "\x77\xd5\xb4\x4a\x53\x71\x94\x7a"
-			  "\x88\xa6\x6e\x6\x93\xca\x43\xa5"
-			  "\xc4\xf6\xcd\x53\x4b\x7b\x8e\xfe"
-			  "\xb4\x28\x7c\x42\x29\x32\x5d\x88"
-			  "\xed\xce\x0\x19\xe\x16\x2\x6e"
-			  "\x87\xff\x2c\xac\xe8\xe7\xe9\xbf"
-			  "\x31\x51\xec\x47\xc3\x51\x83\xc1",
-		.rlen	= 160
+		.len	= 160
 	}
 };
 
@@ -14727,86 +10155,45 @@
  * the lowest memory address.
  */
 
-static const struct cipher_testvec speck128_enc_tv_template[] = {
+static const struct cipher_testvec speck128_tv_template[] = {
 	{ /* Speck128/128 */
 		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 		.klen	= 16,
-		.input	= "\x20\x6d\x61\x64\x65\x20\x69\x74"
+		.ptext	= "\x20\x6d\x61\x64\x65\x20\x69\x74"
 			  "\x20\x65\x71\x75\x69\x76\x61\x6c",
-		.ilen	= 16,
-		.result	= "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
+		.ctext	= "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
 			  "\x65\x32\x78\x79\x51\x98\x5d\xa6",
-		.rlen	= 16,
+		.len	= 16,
 	}, { /* Speck128/192 */
 		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17",
 		.klen	= 24,
-		.input	= "\x65\x6e\x74\x20\x74\x6f\x20\x43"
+		.ptext	= "\x65\x6e\x74\x20\x74\x6f\x20\x43"
 			  "\x68\x69\x65\x66\x20\x48\x61\x72",
-		.ilen	= 16,
-		.result	= "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
+		.ctext	= "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
 			  "\x66\x55\x13\x13\x3a\xcf\xe4\x1b",
-		.rlen	= 16,
+		.len	= 16,
 	}, { /* Speck128/256 */
 		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 		.klen	= 32,
-		.input	= "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
+		.ptext	= "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
 			  "\x49\x6e\x20\x74\x68\x6f\x73\x65",
-		.ilen	= 16,
-		.result	= "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
+		.ctext	= "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
 			  "\x3e\xf5\xc0\x05\x04\x01\x09\x41",
-		.rlen	= 16,
-	},
-};
-
-static const struct cipher_testvec speck128_dec_tv_template[] = {
-	{ /* Speck128/128 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.klen	= 16,
-		.input	= "\x18\x0d\x57\x5c\xdf\xfe\x60\x78"
-			  "\x65\x32\x78\x79\x51\x98\x5d\xa6",
-		.ilen	= 16,
-		.result	= "\x20\x6d\x61\x64\x65\x20\x69\x74"
-			  "\x20\x65\x71\x75\x69\x76\x61\x6c",
-		.rlen	= 16,
-	}, { /* Speck128/192 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17",
-		.klen	= 24,
-		.input	= "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9"
-			  "\x66\x55\x13\x13\x3a\xcf\xe4\x1b",
-		.ilen	= 16,
-		.result	= "\x65\x6e\x74\x20\x74\x6f\x20\x43"
-			  "\x68\x69\x65\x66\x20\x48\x61\x72",
-		.rlen	= 16,
-	}, { /* Speck128/256 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.klen	= 32,
-		.input	= "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e"
-			  "\x3e\xf5\xc0\x05\x04\x01\x09\x41",
-		.ilen	= 16,
-		.result	= "\x70\x6f\x6f\x6e\x65\x72\x2e\x20"
-			  "\x49\x6e\x20\x74\x68\x6f\x73\x65",
-		.rlen	= 16,
+		.len	= 16,
 	},
 };
 
 /*
  * Speck128-XTS test vectors, taken from the AES-XTS test vectors with the
- * result recomputed with Speck128 as the cipher
+ * ciphertext recomputed with Speck128 as the cipher
  */
-
-static const struct cipher_testvec speck128_xts_enc_tv_template[] = {
+static const struct cipher_testvec speck128_xts_tv_template[] = {
 	{
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -14815,16 +10202,15 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 32,
-		.result	= "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
+		.ctext	= "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
 			  "\x3b\x99\x4a\x64\x74\x77\xac\xed"
 			  "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42"
 			  "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
 			  "\x11\x11\x11\x11\x11\x11\x11\x11"
@@ -14833,16 +10219,15 @@
 		.klen	= 32,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
+		.ctext	= "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
 			  "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a"
 			  "\xd4\x84\xa4\x53\xd5\x88\x73\x1b"
 			  "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
 			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
@@ -14851,16 +10236,15 @@
 		.klen	= 32,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\x21\x52\x84\x15\xd1\xf7\x21\x55"
+		.ctext	= "\x21\x52\x84\x15\xd1\xf7\x21\x55"
 			  "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d"
 			  "\xda\x63\xb2\xf1\x82\xb0\x89\x59"
 			  "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -14869,7 +10253,7 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -14933,8 +10317,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
+		.ctext	= "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
 			  "\x53\xd0\xed\x2d\x30\xc1\x20\xef"
 			  "\x70\x67\x5e\xff\x09\x70\xbb\xc1"
 			  "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48"
@@ -14998,7 +10381,7 @@
 			  "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca"
 			  "\x47\x26\xef\x39\xb8\xb4\xf2\xd1"
 			  "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf",
-		.rlen	= 512,
+		.len	= 512,
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -15011,7 +10394,7 @@
 		.klen	= 64,
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -15075,8 +10458,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
+		.ctext	= "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
 			  "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb"
 			  "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73"
 			  "\x92\x99\xde\xd3\x76\xed\xcd\x63"
@@ -15140,400 +10522,36 @@
 			  "\x66\x8d\x13\xca\xe0\x59\x2a\x00"
 			  "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5"
 			  "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c",
-		.rlen	= 512,
+		.len	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
 	}
 };
 
-static const struct cipher_testvec speck128_xts_dec_tv_template[] = {
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62"
-			  "\x3b\x99\x4a\x64\x74\x77\xac\xed"
-			  "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42"
-			  "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54",
-		.ilen	= 32,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 32,
-	}, {
-		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xfb\x53\x81\x75\x6f\x9f\x34\xad"
-			  "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a"
-			  "\xd4\x84\xa4\x53\xd5\x88\x73\x1b"
-			  "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
-			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x21\x52\x84\x15\xd1\xf7\x21\x55"
-			  "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d"
-			  "\xda\x63\xb2\xf1\x82\xb0\x89\x59"
-			  "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82"
-			  "\x53\xd0\xed\x2d\x30\xc1\x20\xef"
-			  "\x70\x67\x5e\xff\x09\x70\xbb\xc1"
-			  "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48"
-			  "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7"
-			  "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9"
-			  "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44"
-			  "\x19\xc5\x58\x84\x63\xb9\x12\x68"
-			  "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c"
-			  "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd"
-			  "\x74\x79\x2e\xb4\x44\xd7\x69\xc4"
-			  "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d"
-			  "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb"
-			  "\x6d\x13\x65\xa0\xf9\x31\x12\xe2"
-			  "\x26\xd1\xec\x2b\x0a\x8b\x59\x99"
-			  "\xa7\x49\xa0\x0e\x09\x33\x85\x50"
-			  "\xc3\x23\xca\x7a\xdd\x13\x45\x5f"
-			  "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f"
-			  "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6"
-			  "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f"
-			  "\x79\x91\x8d\x36\x13\x7b\xd0\x4a"
-			  "\x6c\x39\xfb\x53\xb8\x6f\x02\x51"
-			  "\xa5\x20\xac\x24\x1c\x73\x59\x73"
-			  "\x58\x61\x3a\x87\x58\xb3\x20\x56"
-			  "\x39\x06\x2b\x4d\xd3\x20\x2b\x89"
-			  "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd"
-			  "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91"
-			  "\x09\x35\x71\x50\x65\xac\x92\xe3"
-			  "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92"
-			  "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9"
-			  "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d"
-			  "\x77\x04\x80\xa9\xbf\x38\xb5\xbd"
-			  "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8"
-			  "\x2a\x26\xcc\x49\x14\x6d\x55\x01"
-			  "\x06\x94\xd8\xb2\x2d\x53\x83\x1b"
-			  "\x8f\xd4\xdd\x57\x12\x7e\x18\xba"
-			  "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d"
-			  "\x24\xa9\x60\xa4\x97\x85\x86\x2a"
-			  "\x01\x00\x09\xf1\xcb\x4a\x24\x1c"
-			  "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4"
-			  "\x97\x1c\x10\xc6\x4d\x66\x4f\x98"
-			  "\x87\x30\xac\xd5\xea\x73\x49\x10"
-			  "\x80\xea\xe5\x5f\x4d\x5f\x03\x33"
-			  "\x66\x02\x35\x3d\x60\x06\x36\x4f"
-			  "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8"
-			  "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28"
-			  "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93"
-			  "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30"
-			  "\xcc\x75\xcf\x16\x26\xa9\x26\x3b"
-			  "\xe7\x68\x2f\x15\x21\x5b\xe4\x00"
-			  "\xbd\x48\x50\xcd\x75\x70\xc4\x62"
-			  "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b"
-			  "\x51\x66\x02\x69\x04\x97\x36\xd4"
-			  "\x75\xae\x0b\xa3\x42\xf8\xca\x79"
-			  "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2"
-			  "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd"
-			  "\xea\x15\x5a\xa0\x85\x7e\x81\x0d"
-			  "\x03\xe7\x05\x39\xf5\x05\x26\xee"
-			  "\xec\xaa\x1f\x3d\xc9\x98\x76\x01"
-			  "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4"
-			  "\x50\x65\x50\x6d\x04\x1f\xdf\x5a"
-			  "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca"
-			  "\x47\x26\xef\x39\xb8\xb4\xf2\xd1"
-			  "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1"
-			  "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb"
-			  "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73"
-			  "\x92\x99\xde\xd3\x76\xed\xcd\x63"
-			  "\x64\x3a\x22\x57\xc1\x43\x49\xd4"
-			  "\x79\x36\x31\x19\x62\xae\x10\x7e"
-			  "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa"
-			  "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0"
-			  "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00"
-			  "\xfc\x81\x99\x8a\x14\x62\xf5\x7e"
-			  "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec"
-			  "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6"
-			  "\x62\x62\x37\xfe\x0a\x4c\x4a\x37"
-			  "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e"
-			  "\x85\x3c\x4f\x26\x64\x85\xbc\x68"
-			  "\xb0\xe0\x86\x5e\x26\x41\xce\x11"
-			  "\x50\xda\x97\x14\xe9\x9e\xc7\x6d"
-			  "\x3b\xdc\x43\xde\x2b\x27\x69\x7d"
-			  "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31"
-			  "\x14\x4d\xf0\x74\x37\xfd\x07\x25"
-			  "\x96\x55\xe5\xfc\x9e\x27\x2a\x74"
-			  "\x1b\x83\x4d\x15\x83\xac\x57\xa0"
-			  "\xac\xa5\xd0\x38\xef\x19\x56\x53"
-			  "\x25\x4b\xfc\xce\x04\x23\xe5\x6b"
-			  "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5"
-			  "\xed\x22\x34\x1c\x5d\xed\x17\x06"
-			  "\x36\xa3\xe6\x77\xb9\x97\x46\xb8"
-			  "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc"
-			  "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82"
-			  "\x35\x91\x3d\x1b\xe4\x97\x9f\x92"
-			  "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1"
-			  "\x8d\x39\xfc\x42\xfb\x38\x80\xb9"
-			  "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1"
-			  "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7"
-			  "\xa1\xbf\xf7\xda\x95\x93\x4b\x78"
-			  "\x19\xf5\x94\xf9\xd2\x00\x33\x37"
-			  "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee"
-			  "\x42\xb2\x9e\x2c\x5f\x48\x23\x26"
-			  "\x15\x25\x17\x03\x3d\xfe\x2c\xfc"
-			  "\xeb\xba\xda\xe0\x00\x05\xb6\xa6"
-			  "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf"
-			  "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a"
-			  "\x49\xa1\xc3\xfa\x10\x52\xb9\x14"
-			  "\xad\xb7\x73\xf8\x78\x12\xc8\x59"
-			  "\x17\x80\x4c\x57\x39\xf1\x6d\x80"
-			  "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21"
-			  "\xec\xce\xb7\xc8\x02\x8a\xed\x53"
-			  "\x2c\x25\x68\x2e\x1f\x85\x5e\x67"
-			  "\xd1\x07\x7a\x3a\x89\x08\xe0\x34"
-			  "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40"
-			  "\x31\x15\x72\xa0\xf0\x73\xd9\x3b"
-			  "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2"
-			  "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8"
-			  "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6"
-			  "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58"
-			  "\xcc\x1f\x48\x49\x65\x47\x75\xe9"
-			  "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07"
-			  "\xf2\xec\x76\xd8\x8f\x09\xf3\x16"
-			  "\xa1\x51\x89\x3b\xeb\x96\x42\xac"
-			  "\x65\xe0\x67\x63\x29\xdc\xb4\x7d"
-			  "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb"
-			  "\x66\x8d\x13\xca\xe0\x59\x2a\x00"
-			  "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5"
-			  "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	}
-};
-
-static const struct cipher_testvec speck64_enc_tv_template[] = {
+static const struct cipher_testvec speck64_tv_template[] = {
 	{ /* Speck64/96 */
 		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
 			  "\x10\x11\x12\x13",
 		.klen	= 12,
-		.input	= "\x65\x61\x6e\x73\x20\x46\x61\x74",
-		.ilen	= 8,
-		.result	= "\x6c\x94\x75\x41\xec\x52\x79\x9f",
-		.rlen	= 8,
+		.ptext	= "\x65\x61\x6e\x73\x20\x46\x61\x74",
+		.ctext	= "\x6c\x94\x75\x41\xec\x52\x79\x9f",
+		.len	= 8,
 	}, { /* Speck64/128 */
 		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
 			  "\x10\x11\x12\x13\x18\x19\x1a\x1b",
 		.klen	= 16,
-		.input	= "\x2d\x43\x75\x74\x74\x65\x72\x3b",
-		.ilen	= 8,
-		.result	= "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
-		.rlen	= 8,
-	},
-};
-
-static const struct cipher_testvec speck64_dec_tv_template[] = {
-	{ /* Speck64/96 */
-		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
-			  "\x10\x11\x12\x13",
-		.klen	= 12,
-		.input	= "\x6c\x94\x75\x41\xec\x52\x79\x9f",
-		.ilen	= 8,
-		.result	= "\x65\x61\x6e\x73\x20\x46\x61\x74",
-		.rlen	= 8,
-	}, { /* Speck64/128 */
-		.key	= "\x00\x01\x02\x03\x08\x09\x0a\x0b"
-			  "\x10\x11\x12\x13\x18\x19\x1a\x1b",
-		.klen	= 16,
-		.input	= "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
-		.ilen	= 8,
-		.result	= "\x2d\x43\x75\x74\x74\x65\x72\x3b",
-		.rlen	= 8,
+		.ptext	= "\x2d\x43\x75\x74\x74\x65\x72\x3b",
+		.ctext	= "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c",
+		.len	= 8,
 	},
 };
 
 /*
- * Speck64-XTS test vectors, taken from the AES-XTS test vectors with the result
- * recomputed with Speck64 as the cipher, and key lengths adjusted
+ * Speck64-XTS test vectors, taken from the AES-XTS test vectors with the
+ * ciphertext recomputed with Speck64 as the cipher, and key lengths adjusted
  */
-
-static const struct cipher_testvec speck64_xts_enc_tv_template[] = {
+static const struct cipher_testvec speck64_xts_tv_template[] = {
 	{
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -15541,16 +10559,15 @@
 		.klen	= 24,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 32,
-		.result	= "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
+		.ctext	= "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
 			  "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2"
 			  "\x80\xf5\x72\xe7\xcd\xf0\x99\x22"
 			  "\x35\xa7\x2f\x06\xef\xdc\x51\xaa",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
 			  "\x11\x11\x11\x11\x11\x11\x11\x11"
@@ -15558,16 +10575,15 @@
 		.klen	= 24,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\x12\x56\x73\xcd\x15\x87\xa8\x59"
+		.ctext	= "\x12\x56\x73\xcd\x15\x87\xa8\x59"
 			  "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f"
 			  "\xb3\x12\x69\x7e\x36\xeb\x52\xff"
 			  "\x62\xdd\xba\x90\xb3\xe1\xee\x99",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
 			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
@@ -15575,16 +10591,15 @@
 		.klen	= 24,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
+		.ctext	= "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
 			  "\x27\x36\xc0\xbf\x5d\xea\x36\x37"
 			  "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b"
 			  "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -15592,7 +10607,7 @@
 		.klen	= 24,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -15656,8 +10671,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
+		.ctext	= "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
 			  "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09"
 			  "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3"
 			  "\x11\xc7\x39\x96\xd0\x95\xf4\x56"
@@ -15721,7 +10735,7 @@
 			  "\x50\xf7\x5f\xf4\xc2\xca\x41\x97"
 			  "\x37\xbe\x75\x74\xcd\xf0\x75\x6e"
 			  "\x25\x23\x94\xbd\xda\x8d\xb0\xd4",
-		.rlen	= 512,
+		.len	= 512,
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -15730,7 +10744,7 @@
 		.klen	= 32,
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -15794,8 +10808,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
+		.ctext	= "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
 			  "\xc6\x71\x29\x2d\x3e\x89\x9f\x59"
 			  "\x68\x6a\xcc\x8a\x56\x97\xf3\x95"
 			  "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c"
@@ -15859,340 +10872,7 @@
 			  "\x9b\x63\x76\x32\x2f\x19\x72\x10"
 			  "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5"
 			  "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	}
-};
-
-static const struct cipher_testvec speck64_xts_dec_tv_template[] = {
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 24,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x84\xaf\x54\x07\x19\xd4\x7c\xa6"
-			  "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2"
-			  "\x80\xf5\x72\xe7\xcd\xf0\x99\x22"
-			  "\x35\xa7\x2f\x06\xef\xdc\x51\xaa",
-		.ilen	= 32,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 32,
-	}, {
-		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 24,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x12\x56\x73\xcd\x15\x87\xa8\x59"
-			  "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f"
-			  "\xb3\x12\x69\x7e\x36\xeb\x52\xff"
-			  "\x62\xdd\xba\x90\xb3\xe1\xee\x99",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
-			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 24,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c"
-			  "\x27\x36\xc0\xbf\x5d\xea\x36\x37"
-			  "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b"
-			  "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93",
-		.klen	= 24,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e"
-			  "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09"
-			  "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3"
-			  "\x11\xc7\x39\x96\xd0\x95\xf4\x56"
-			  "\xf4\xdd\x03\x38\x01\x44\x2c\xcf"
-			  "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66"
-			  "\xfe\x3d\xc6\xfb\x01\x23\x51\x43"
-			  "\xd5\xd2\x13\x86\x94\x34\xe9\x62"
-			  "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef"
-			  "\x76\x35\x04\x3f\xdb\x23\x9d\x0b"
-			  "\x85\x42\xb9\x02\xd6\xcc\xdb\x96"
-			  "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d"
-			  "\xae\xd2\x04\xd5\xda\xc1\x7e\x24"
-			  "\x8c\x73\xbe\x48\x7e\xcf\x65\x28"
-			  "\x29\xe5\xbe\x54\x30\xcb\x46\x95"
-			  "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe"
-			  "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69"
-			  "\xa1\x09\x95\x71\x26\xe9\xc4\xdf"
-			  "\xe6\x31\xc3\x46\xda\xaf\x0b\x41"
-			  "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3"
-			  "\x82\xc0\x37\x27\xfc\x91\xa7\x05"
-			  "\xfb\xc5\xdc\x2b\x74\x96\x48\x43"
-			  "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f"
-			  "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a"
-			  "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c"
-			  "\x07\xff\xf3\x72\x74\x48\xb5\x40"
-			  "\x50\xb5\xdd\x90\x43\x31\x18\x15"
-			  "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a"
-			  "\x29\x93\x90\x8b\xda\x07\xf0\x35"
-			  "\x6d\x90\x88\x09\x4e\x83\xf5\x5b"
-			  "\x94\x12\xbb\x33\x27\x1d\x3f\x23"
-			  "\x51\xa8\x7c\x07\xa2\xae\x77\xa6"
-			  "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f"
-			  "\x66\xdd\xcd\x75\x24\x8b\x33\xf7"
-			  "\x20\xdb\x83\x9b\x4f\x11\x63\x6e"
-			  "\xcf\x37\xef\xc9\x11\x01\x5c\x45"
-			  "\x32\x99\x7c\x3c\x9e\x42\x89\xe3"
-			  "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05"
-			  "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc"
-			  "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d"
-			  "\xa0\xa8\x89\x3b\x73\x39\xa5\x94"
-			  "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89"
-			  "\x10\xff\xaf\xef\xca\xdd\x4f\x80"
-			  "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7"
-			  "\x33\xca\x00\x8b\x8b\x3f\xea\xec"
-			  "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f"
-			  "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5"
-			  "\x64\xa3\xf1\x1a\x76\x28\xcc\x35"
-			  "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b"
-			  "\xc7\x1b\x53\x17\x02\xea\xd1\xad"
-			  "\x13\x51\x73\xc0\xa0\xb2\x05\x32"
-			  "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19"
-			  "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d"
-			  "\x59\xda\xee\x1a\x22\x18\xda\x0d"
-			  "\x88\x0f\x55\x8b\x72\x62\xfd\xc1"
-			  "\x69\x13\xcd\x0d\x5f\xc1\x09\x52"
-			  "\xee\xd6\xe3\x84\x4d\xee\xf6\x88"
-			  "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f"
-			  "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54"
-			  "\x7d\x69\x8d\x00\x62\x77\x0d\x14"
-			  "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3"
-			  "\x50\xf7\x5f\xf4\xc2\xca\x41\x97"
-			  "\x37\xbe\x75\x74\xcd\xf0\x75\x6e"
-			  "\x25\x23\x94\xbd\xda\x8d\xb0\xd4",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27",
-		.klen	= 32,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x55\xed\x71\xd3\x02\x8e\x15\x3b"
-			  "\xc6\x71\x29\x2d\x3e\x89\x9f\x59"
-			  "\x68\x6a\xcc\x8a\x56\x97\xf3\x95"
-			  "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c"
-			  "\x78\x16\xea\x80\xdb\x33\x75\x94"
-			  "\xf9\x29\xc4\x2b\x76\x75\x97\xc7"
-			  "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b"
-			  "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee"
-			  "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a"
-			  "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c"
-			  "\xf5\xec\x32\x74\xa3\xb8\x03\x88"
-			  "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f"
-			  "\x84\x5e\x46\xed\x20\x89\xb6\x44"
-			  "\x8d\xd0\xed\x54\x47\x16\xbe\x95"
-			  "\x8a\xb3\x6b\x72\xc4\x32\x52\x13"
-			  "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6"
-			  "\x44\x18\xdd\x8c\x6e\xca\x6e\x45"
-			  "\x8f\x1e\x10\x07\x57\x25\x98\x7b"
-			  "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8"
-			  "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb"
-			  "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff"
-			  "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e"
-			  "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d"
-			  "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65"
-			  "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a"
-			  "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a"
-			  "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78"
-			  "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3"
-			  "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e"
-			  "\x35\x10\x30\x82\x0d\xe7\xc5\x9b"
-			  "\xde\x44\x18\xbd\x9f\xd1\x45\xa9"
-			  "\x7b\x7a\x4a\xad\x35\x65\x27\xca"
-			  "\xb2\xc3\xd4\x9b\x71\x86\x70\xee"
-			  "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf"
-			  "\xfc\x42\xc8\x31\x59\xbe\x16\x60"
-			  "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14"
-			  "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef"
-			  "\x52\x7f\x29\x51\x94\x20\x67\x3c"
-			  "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63"
-			  "\xe7\xff\x73\x25\xd1\xdd\x96\x8a"
-			  "\x98\x52\x6d\xf3\xac\x3e\xf2\x18"
-			  "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed"
-			  "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e"
-			  "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad"
-			  "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa"
-			  "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81"
-			  "\x65\x53\x0f\x41\x11\xbd\x98\x99"
-			  "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d"
-			  "\x84\x98\xf9\x34\xed\x33\x2a\x1f"
-			  "\x82\xed\xc1\x73\x98\xd3\x02\xdc"
-			  "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76"
-			  "\x63\x51\x34\x9d\x96\x12\xae\xce"
-			  "\x83\xc9\x76\x5e\xa4\x1b\x53\x37"
-			  "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d"
-			  "\x54\x27\x74\xbb\x10\x86\x57\x46"
-			  "\x68\xe1\xed\x14\xe7\x9d\xfc\x84"
-			  "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf"
-			  "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d"
-			  "\x7b\x4f\x38\x55\x36\x71\x64\xc1"
-			  "\xfc\x5c\x75\x52\x33\x02\x18\xf8"
-			  "\x17\xe1\x2b\xc2\x43\x39\xbd\x76"
-			  "\x9b\x63\x76\x32\x2f\x19\x72\x10"
-			  "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5"
-			  "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
+		.len	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
@@ -16200,37 +10880,34 @@
 };
 
 /* Cast6 test vectors from RFC 2612 */
-static const struct cipher_testvec cast6_enc_tv_template[] = {
+static const struct cipher_testvec cast6_tv_template[] = {
 	{
 		.key	= "\x23\x42\xbb\x9e\xfa\x38\x54\x2c"
 			  "\x0a\xf7\x56\x47\xf2\x9f\x61\x5d",
 		.klen	= 16,
-		.input	= zeroed_string,
-		.ilen	= 16,
-		.result	= "\xc8\x42\xa0\x89\x72\xb4\x3d\x20"
+		.ptext	= zeroed_string,
+		.ctext	= "\xc8\x42\xa0\x89\x72\xb4\x3d\x20"
 			  "\x83\x6c\x91\xd1\xb7\x53\x0f\x6b",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x23\x42\xbb\x9e\xfa\x38\x54\x2c"
 			  "\xbe\xd0\xac\x83\x94\x0a\xc2\x98"
 			  "\xba\xc7\x7a\x77\x17\x94\x28\x63",
 		.klen	= 24,
-		.input	= zeroed_string,
-		.ilen	= 16,
-		.result	= "\x1b\x38\x6c\x02\x10\xdc\xad\xcb"
+		.ptext	= zeroed_string,
+		.ctext	= "\x1b\x38\x6c\x02\x10\xdc\xad\xcb"
 			  "\xdd\x0e\x41\xaa\x08\xa7\xa7\xe8",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x23\x42\xbb\x9e\xfa\x38\x54\x2c"
 			  "\xbe\xd0\xac\x83\x94\x0a\xc2\x98"
 			  "\x8d\x7c\x47\xce\x26\x49\x08\x46"
 			  "\x1c\xc1\xb5\x13\x7a\xe6\xb6\x04",
 		.klen	= 32,
-		.input	= zeroed_string,
-		.ilen	= 16,
-		.result	= "\x4f\x6a\x20\x38\x28\x68\x97\xb9"
+		.ptext	= zeroed_string,
+		.ctext	= "\x4f\x6a\x20\x38\x28\x68\x97\xb9"
 			  "\xc9\x87\x01\x36\x55\x33\x17\xfa",
-		.rlen	= 16,
+		.len	= 16,
 	}, { /* Generated from TF test vectors */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -16239,7 +10916,7 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -16301,8 +10978,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\xC3\x70\x22\x32\xF5\x80\xCB\x54"
+		.ctext	= "\xC3\x70\x22\x32\xF5\x80\xCB\x54"
 			  "\xFC\x30\xE0\xF6\xEB\x39\x57\xA6"
 			  "\xB6\xB9\xC5\xA4\x91\x55\x14\x97"
 			  "\xC1\x20\xFF\x6C\x5C\xF0\x67\xEA"
@@ -16364,185 +11040,14 @@
 			  "\x49\x2C\xF3\xD4\x90\xCC\x93\x4C"
 			  "\x84\x52\x6D\x68\xDE\xC6\x64\xB2"
 			  "\x11\x74\x93\x57\xB4\x7E\xC6\x00",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec cast6_dec_tv_template[] = {
-	{
-		.key	= "\x23\x42\xbb\x9e\xfa\x38\x54\x2c"
-			  "\x0a\xf7\x56\x47\xf2\x9f\x61\x5d",
-		.klen	= 16,
-		.input	= "\xc8\x42\xa0\x89\x72\xb4\x3d\x20"
-			  "\x83\x6c\x91\xd1\xb7\x53\x0f\x6b",
-		.ilen	= 16,
-		.result	= zeroed_string,
-		.rlen	= 16,
-	}, {
-		.key	= "\x23\x42\xbb\x9e\xfa\x38\x54\x2c"
-			  "\xbe\xd0\xac\x83\x94\x0a\xc2\x98"
-			  "\xba\xc7\x7a\x77\x17\x94\x28\x63",
-		.klen	= 24,
-		.input	= "\x1b\x38\x6c\x02\x10\xdc\xad\xcb"
-			  "\xdd\x0e\x41\xaa\x08\xa7\xa7\xe8",
-		.ilen	= 16,
-		.result	= zeroed_string,
-		.rlen	= 16,
-	}, {
-		.key	= "\x23\x42\xbb\x9e\xfa\x38\x54\x2c"
-			  "\xbe\xd0\xac\x83\x94\x0a\xc2\x98"
-			  "\x8d\x7c\x47\xce\x26\x49\x08\x46"
-			  "\x1c\xc1\xb5\x13\x7a\xe6\xb6\x04",
-		.klen	= 32,
-		.input	= "\x4f\x6a\x20\x38\x28\x68\x97\xb9"
-			  "\xc9\x87\x01\x36\x55\x33\x17\xfa",
-		.ilen	= 16,
-		.result	= zeroed_string,
-		.rlen	= 16,
-	}, { /* Generated from TF test vectors */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\xC3\x70\x22\x32\xF5\x80\xCB\x54"
-			  "\xFC\x30\xE0\xF6\xEB\x39\x57\xA6"
-			  "\xB6\xB9\xC5\xA4\x91\x55\x14\x97"
-			  "\xC1\x20\xFF\x6C\x5C\xF0\x67\xEA"
-			  "\x2F\xED\xD8\xC9\xFB\x38\x3F\xFE"
-			  "\x93\xBE\xDC\x00\xD3\x7F\xAD\x4C"
-			  "\x5A\x08\x92\xD1\x47\x0C\xFA\x6C"
-			  "\xD0\x6A\x99\x10\x72\xF8\x47\x62"
-			  "\x81\x42\xF8\xD8\xF5\xBB\x94\x08"
-			  "\xAA\x97\xA2\x8B\x69\xB3\xD2\x7E"
-			  "\xBC\xB5\x00\x0C\xE5\x44\x4B\x58"
-			  "\xE8\x63\xDC\xB3\xC4\xE5\x23\x12"
-			  "\x5A\x72\x85\x47\x8B\xEC\x9F\x26"
-			  "\x84\xB6\xED\x10\x33\x63\x9B\x5F"
-			  "\x4D\x53\xEE\x94\x45\x8B\x60\x58"
-			  "\x86\x20\xF9\x1E\x82\x08\x3E\x58"
-			  "\x60\x1B\x34\x19\x02\xBE\x4E\x09"
-			  "\xBB\x7C\x15\xCC\x60\x27\x55\x7A"
-			  "\x12\xB8\xD8\x08\x89\x3C\xA6\xF3"
-			  "\xF1\xDD\xA7\x07\xA3\x12\x85\x28"
-			  "\xE9\x57\xAC\x80\x0C\x5C\x0F\x3A"
-			  "\x5D\xC2\x91\xC7\x90\xE4\x8C\x43"
-			  "\x92\xE4\x7C\x26\x69\x4D\x83\x68"
-			  "\x14\x96\x42\x47\xBD\xA9\xE4\x8A"
-			  "\x33\x19\xEB\x54\x8E\x0D\x4B\x6E"
-			  "\x91\x51\xB5\x36\x08\xDE\x1C\x06"
-			  "\x03\xBD\xDE\x81\x26\xF7\x99\xC2"
-			  "\xBA\xF7\x6D\x87\x0D\xE4\xA6\xCF"
-			  "\xC1\xF5\x27\x05\xB8\x02\x57\x72"
-			  "\xE6\x42\x13\x0B\xC6\x47\x05\x74"
-			  "\x24\x15\xF7\x0D\xC2\x23\x9D\xB9"
-			  "\x3C\x77\x18\x93\xBA\xB4\xFC\x8C"
-			  "\x98\x82\x67\x67\xB4\xD7\xD3\x43"
-			  "\x23\x08\x02\xB7\x9B\x99\x05\xFB"
-			  "\xD3\xB5\x00\x0A\xA9\x9D\x66\xD6"
-			  "\x2E\x49\x58\xD0\xA8\x57\x29\x7F"
-			  "\x0A\x0E\x7D\xFC\x92\x83\xCC\x67"
-			  "\xA2\xB1\x70\x3A\x8F\x87\x4A\x8D"
-			  "\x17\xE2\x58\x2B\x88\x0D\x68\x62"
-			  "\xBF\x35\xD1\x6F\xC0\xF0\x18\x62"
-			  "\xB2\xC7\x2D\x58\xC7\x16\xDE\x08"
-			  "\xEB\x84\x1D\x25\xA7\x38\x94\x06"
-			  "\x93\x9D\xF8\xFE\x88\x71\xE7\x84"
-			  "\x2C\xA0\x38\xA3\x1D\x48\xCF\x29"
-			  "\x0B\xBC\xD8\x50\x99\x1A\x26\xFB"
-			  "\x8E\x75\x3D\x73\xEB\x6A\xED\x29"
-			  "\xE0\x8E\xED\xFC\xFE\x6F\xF6\xBA"
-			  "\x41\xE2\x10\x4C\x01\x8B\x69\x2B"
-			  "\x25\x3F\x4D\x70\x7B\x92\xD6\x3B"
-			  "\xAC\xF9\x77\x18\xD9\x6A\x30\xA6"
-			  "\x2E\xFA\x30\xFF\xC8\xD5\x1D\x06"
-			  "\x59\x28\x1D\x86\x43\x04\x5D\x3B"
-			  "\x99\x4C\x04\x5A\x21\x17\x8B\x76"
-			  "\x8F\x72\xCB\xA1\x9C\x29\x4C\xC3"
-			  "\x65\xA2\x58\x2A\xC5\x66\x24\xBF"
-			  "\xBA\xE6\x0C\xDD\x34\x24\x74\xC8"
-			  "\x84\x0A\x66\x2C\xBE\x8F\x32\xA9"
-			  "\xE7\xE4\xA1\xD7\xDA\xAB\x23\x1E"
-			  "\xEB\xEE\x6C\x94\x6F\x9C\x2E\xD1"
-			  "\x49\x2C\xF3\xD4\x90\xCC\x93\x4C"
-			  "\x84\x52\x6D\x68\xDE\xC6\x64\xB2"
-			  "\x11\x74\x93\x57\xB4\x7E\xC6\x00",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec cast6_cbc_enc_tv_template[] = {
+static const struct cipher_testvec cast6_cbc_tv_template[] = {
 	{ /* Generated from TF test vectors */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -16551,7 +11056,7 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -16613,8 +11118,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\xDF\x77\x68\x96\xC7\xBA\xF8\xE2"
+		.ctext	= "\xDF\x77\x68\x96\xC7\xBA\xF8\xE2"
 			  "\x0E\x24\x99\x1A\xAA\xF3\xC6\x9F"
 			  "\xA0\x73\xB3\x70\xC3\x68\x64\x70"
 			  "\xAD\x33\x02\xFB\x88\x74\xAA\x78"
@@ -16676,14 +11180,14 @@
 			  "\xC5\x5C\xAD\xB6\x07\xB6\x84\xF3"
 			  "\x4D\x59\x7D\xC5\x28\x69\xFA\x92"
 			  "\x22\x46\x89\x2D\x0F\x2B\x08\x24",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec cast6_cbc_dec_tv_template[] = {
+static const struct cipher_testvec cast6_ctr_tv_template[] = {
 	{ /* Generated from TF test vectors */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -16692,155 +11196,13 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\xDF\x77\x68\x96\xC7\xBA\xF8\xE2"
-			  "\x0E\x24\x99\x1A\xAA\xF3\xC6\x9F"
-			  "\xA0\x73\xB3\x70\xC3\x68\x64\x70"
-			  "\xAD\x33\x02\xFB\x88\x74\xAA\x78"
-			  "\xC7\x47\x1A\x18\x61\x2D\xAC\x9F"
-			  "\x7E\x6F\xDF\x05\x13\x76\xA6\x72"
-			  "\xB7\x13\x09\x0F\x7D\x38\xDF\x25"
-			  "\x4E\xFD\x50\x45\xFA\x35\x6A\xC0"
-			  "\x57\x95\xE1\x21\x26\x10\x9A\x21"
-			  "\xA1\x8A\x51\x05\xD1\xB1\x78\x35"
-			  "\x98\xF5\xAE\xC0\xC1\x8B\x94\xFF"
-			  "\xD0\x69\x3F\x42\xC2\x01\xA7\x9B"
-			  "\x23\x16\x47\x72\x81\x13\x3A\x72"
-			  "\xEC\xD9\x40\x88\x00\x9C\xB0\xA8"
-			  "\x9C\xAC\xCE\x11\x73\x7B\x63\x3E"
-			  "\xA3\x63\x98\x7D\x35\xE4\xD9\x83"
-			  "\xE2\xD0\x52\x87\x0C\x1F\xB0\xB3"
-			  "\x41\x1A\x93\x8D\x76\x31\x9F\xF2"
-			  "\xFE\x09\xA3\x8F\x22\x6A\x3B\xB9"
-			  "\x6C\x9E\xE4\xA1\xA0\xC4\xE7\xA1"
-			  "\x21\x9C\x1A\xCA\x65\xDE\x44\x03"
-			  "\x99\xF2\xD2\x39\xE3\x3F\x0F\x37"
-			  "\x53\x50\x23\xA4\x81\x6E\xDA\xFB"
-			  "\xF8\x7B\x01\xD7\xB2\x32\x9C\xB8"
-			  "\xB1\x0E\x99\x17\xB5\x38\xF9\xD7"
-			  "\x86\x2D\x6E\x94\x5C\x99\x9D\xB3"
-			  "\xD3\x63\x4B\x2A\x7D\x44\x6A\xB2"
-			  "\xC1\x03\xE6\x5A\x37\xD8\x64\x18"
-			  "\xAA\x32\xCE\x29\xED\xC0\xA2\xCB"
-			  "\x8D\xAF\xCD\xBE\x8F\xB6\xEC\xB4"
-			  "\x89\x05\x81\x6E\x71\x4F\xC3\x28"
-			  "\x10\xC1\x62\xC4\x41\xE9\xD2\x39"
-			  "\xF3\x22\x39\x12\x2C\xC2\x95\x2D"
-			  "\xBF\x93\x58\x4B\x04\xD1\x8D\x57"
-			  "\xAE\xEB\x60\x03\x56\x35\xAD\x5A"
-			  "\xE9\xC3\xFF\x4E\x31\xE1\x37\xF8"
-			  "\x7D\xEE\x65\x8A\xB6\x88\x1A\x3E"
-			  "\x07\x09\x82\xBA\xF0\x80\x8A\xD0"
-			  "\xA0\x3F\x6A\xE9\x24\x87\x19\x65"
-			  "\x73\x3F\x12\x91\x47\x54\xBA\x39"
-			  "\x30\x5B\x1E\xE5\xC2\xF9\x3F\xEF"
-			  "\xD6\x75\xF9\xB8\x7C\x8B\x05\x76"
-			  "\xEE\xB7\x08\x25\x4B\xB6\x7B\x47"
-			  "\x72\xC0\x4C\xD4\xDA\xE0\x75\xF1"
-			  "\x7C\xE8\x94\x9E\x16\x6E\xB8\x12"
-			  "\xA1\xC1\x6E\x3B\x1C\x59\x41\x2D"
-			  "\x23\xFA\x7D\x77\xB8\x46\x75\xFE"
-			  "\x4F\x10\xD3\x09\x60\xA1\x36\x96"
-			  "\x5B\xC2\xDC\x6E\x84\x7D\x9B\x14"
-			  "\x80\x21\x83\x58\x3C\x76\xFD\x28"
-			  "\x1D\xF9\x93\x13\xD7\x0E\x62\x14"
-			  "\x5A\xC5\x4E\x08\xA5\x56\xA4\x3C"
-			  "\x68\x93\x44\x70\xDF\xCF\x4A\x51"
-			  "\x0B\x81\x29\x41\xE5\x62\x4D\x36"
-			  "\xB3\xEA\x94\xA6\xB9\xDD\x3F\x09"
-			  "\x62\x34\xA0\x6A\x7E\x7D\xF5\xF6"
-			  "\x01\x91\xB4\x27\xDA\x59\xD6\x17"
-			  "\x56\x4D\x82\x62\x37\xA3\x48\x01"
-			  "\x99\x91\x77\xB2\x08\x6B\x2C\x37"
-			  "\xC5\x5C\xAD\xB6\x07\xB6\x84\xF3"
-			  "\x4D\x59\x7D\xC5\x28\x69\xFA\x92"
-			  "\x22\x46\x89\x2D\x0F\x2B\x08\x24",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec cast6_ctr_enc_tv_template[] = {
-	{ /* Generated from TF test vectors */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A",
-		.ilen	= 17,
-		.result	= "\x26\x0A\xF1\xE2\x3F\x8A\xEF\xA3"
+		.ctext	= "\x26\x0A\xF1\xE2\x3F\x8A\xEF\xA3"
 			  "\x53\x9A\x5E\x1B\x2A\x1A\xC6\x0A"
 			  "\x57",
-		.rlen	= 17,
+		.len	= 17,
 	}, { /* Generated from TF test vectors */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -16849,7 +11211,7 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -16911,8 +11273,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\x26\x0A\xF1\xE2\x3F\x8A\xEF\xA3"
+		.ctext	= "\x26\x0A\xF1\xE2\x3F\x8A\xEF\xA3"
 			  "\x53\x9A\x5E\x1B\x2A\x1A\xC6\x0A"
 			  "\x57\xA3\xEF\x47\x2A\xE8\x88\xA7"
 			  "\x3C\xD0\xEC\xB9\x94\x50\x7D\x56"
@@ -16974,171 +11335,14 @@
 			  "\x9B\x66\x8D\x32\xBA\x81\x34\x87"
 			  "\x0E\x74\x33\x30\x62\xB9\x89\xDF"
 			  "\xF9\xC5\xDD\x27\xB3\x39\xCB\xCB",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec cast6_ctr_dec_tv_template[] = {
-	{ /* Generated from TF test vectors */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x26\x0A\xF1\xE2\x3F\x8A\xEF\xA3"
-			  "\x53\x9A\x5E\x1B\x2A\x1A\xC6\x0A"
-			  "\x57",
-		.ilen	= 17,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A",
-		.rlen	= 17,
-	}, { /* Generated from TF test vectors */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x26\x0A\xF1\xE2\x3F\x8A\xEF\xA3"
-			  "\x53\x9A\x5E\x1B\x2A\x1A\xC6\x0A"
-			  "\x57\xA3\xEF\x47\x2A\xE8\x88\xA7"
-			  "\x3C\xD0\xEC\xB9\x94\x50\x7D\x56"
-			  "\xBC\xE1\xC1\xF5\xE1\xEE\x12\xF8"
-			  "\x4F\x03\x82\x3A\x93\x6B\x4C\xD3"
-			  "\xE3\xF3\xFA\xC2\x23\x55\x98\x20"
-			  "\x49\x76\x9B\x6B\xC1\x23\xBF\xE5"
-			  "\xD4\xC4\x2F\x61\xE1\x67\x2A\x30"
-			  "\x6F\x29\xCA\x54\xF8\x1B\xA6\x7D"
-			  "\x66\x45\xEE\xC8\x19\xBE\x50\xF0"
-			  "\x5F\x65\xF8\x1E\x4D\x07\x87\xD9"
-			  "\xD3\xD9\x1B\x09\x89\xFD\x42\xC5"
-			  "\xDB\xEB\x86\xF1\x67\x04\x0F\x5C"
-			  "\x81\xDF\x82\x12\xC7\x4C\x1B\x07"
-			  "\xDE\xE6\xFA\x29\x86\xD1\xB0\xBA"
-			  "\x3D\x6A\x69\x76\xEC\x0F\xB4\xE6"
-			  "\xCD\xA7\xF8\xA8\xB8\xE0\x33\xF5"
-			  "\x49\x61\x22\x52\x64\x8C\x46\x41"
-			  "\x1F\x48\x5F\x4F\xA2\x89\x36\x17"
-			  "\x20\xF8\x2F\x8F\x4B\xFA\xF2\xC0"
-			  "\x1E\x18\xA2\xF8\xB7\x6D\x98\xE3"
-			  "\x00\x14\x15\x59\xC1\x30\x64\xAF"
-			  "\xA8\x01\x38\xAB\xD4\x8B\xEC\x7C"
-			  "\x44\x9A\xC6\x2C\x2E\x2B\x2B\xF4"
-			  "\x02\x37\xC4\x69\xEF\x36\xC1\xF3"
-			  "\xA0\xFB\xFE\x29\xAD\x39\xCF\xD0"
-			  "\x51\x73\xA3\x22\x42\x41\xAB\xD2"
-			  "\x0F\x50\x14\xB9\x54\xD3\xD4\xFA"
-			  "\xBF\xC9\xBB\xCE\xC4\x1D\x2D\xAF"
-			  "\xC9\x3F\x07\x87\x42\x4B\x3A\x54"
-			  "\x34\x8E\x37\xA3\x03\x6F\x65\x66"
-			  "\xDB\x44\xC3\xE8\xD7\xDD\x7D\xDD"
-			  "\x61\xB4\x2B\x80\xA3\x98\x13\xF5"
-			  "\x5A\xD3\x34\x58\xC3\x6E\xF6\xB8"
-			  "\x0A\xC6\x50\x01\x8E\xD5\x6C\x7D"
-			  "\xFE\x16\xB6\xCF\xFC\x51\x40\xAE"
-			  "\xB3\x15\xAC\x90\x6F\x0B\x28\x3A"
-			  "\x60\x40\x38\x90\x20\x46\xC7\xB3"
-			  "\x0B\x12\x6D\x3B\x15\x14\xF9\xF4"
-			  "\x11\x41\x76\x6B\xB3\x60\x82\x3C"
-			  "\x84\xFB\x08\x2E\x92\x25\xCB\x79"
-			  "\x6F\x58\xC5\x94\x00\x00\x47\xB6"
-			  "\x9E\xDC\x0F\x29\x70\x46\x20\x76"
-			  "\x65\x75\x66\x5C\x00\x96\xB3\xE1"
-			  "\x0B\xA7\x11\x8B\x2E\x61\x4E\x45"
-			  "\x73\xFC\x91\xAB\x79\x41\x23\x14"
-			  "\x13\xB6\x72\x6C\x46\xB3\x03\x11"
-			  "\xE4\xF1\xEE\xC9\x7A\xCF\x96\x32"
-			  "\xB6\xF0\x8B\x97\xB4\xCF\x82\xB7"
-			  "\x15\x48\x44\x99\x09\xF6\xE0\xD7"
-			  "\xBC\xF1\x5B\x91\x4F\x30\x22\xA2"
-			  "\x45\xC4\x68\x55\xC2\xBE\xA7\xD2"
-			  "\x12\x53\x35\x9C\xF9\xE7\x35\x5D"
-			  "\x81\xE4\x86\x42\xC3\x58\xFB\xF0"
-			  "\x38\x9B\x8E\x5A\xEF\x83\x33\x0F"
-			  "\x00\x4E\x3F\x9F\xF5\x84\x62\xC4"
-			  "\x19\x35\x88\x22\x45\x59\x0E\x8F"
-			  "\xEC\x27\xDD\x4A\xA4\x1F\xBC\x41"
-			  "\x9B\x66\x8D\x32\xBA\x81\x34\x87"
-			  "\x0E\x74\x33\x30\x62\xB9\x89\xDF"
-			  "\xF9\xC5\xDD\x27\xB3\x39\xCB\xCB",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec cast6_lrw_enc_tv_template[] = {
+static const struct cipher_testvec cast6_lrw_tv_template[] = {
 	{ /* Generated from TF test vectors */
 		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
 			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
@@ -17149,7 +11353,7 @@
 		.klen	= 48,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
+		.ptext	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
 			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
 			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
 			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
@@ -17213,8 +11417,7 @@
 			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
 			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
 			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
-		.ilen	= 512,
-		.result	= "\x55\x25\x09\x8B\xB5\xD5\xF8\xBF"
+		.ctext	= "\x55\x25\x09\x8B\xB5\xD5\xF8\xBF"
 			  "\x37\x4A\xFE\x3C\x47\xD8\xE6\xEB"
 			  "\xCA\xA4\x9B\xB0\xAB\x6D\x64\xCA"
 			  "\x58\xB6\x73\xF0\xD7\x52\x34\xEF"
@@ -17278,161 +11481,14 @@
 			  "\xC0\xD5\x33\x11\x56\xDE\xDC\xF5"
 			  "\x8D\xD9\xCD\x3B\x22\x67\x18\xC7"
 			  "\xC4\xF5\x99\x61\xBC\xBB\x5B\x46",
-		.rlen	= 512,
+		.len	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec cast6_lrw_dec_tv_template[] = {
-	{ /* Generated from TF test vectors */
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x55\x25\x09\x8B\xB5\xD5\xF8\xBF"
-			  "\x37\x4A\xFE\x3C\x47\xD8\xE6\xEB"
-			  "\xCA\xA4\x9B\xB0\xAB\x6D\x64\xCA"
-			  "\x58\xB6\x73\xF0\xD7\x52\x34\xEF"
-			  "\xFB\x3E\x96\x81\xB7\x71\x34\xA4"
-			  "\x55\x20\xBE\x39\x5A\x2B\xF9\xD1"
-			  "\x65\x0B\xDA\xD3\x7E\xB3\xA6\xF7"
-			  "\x2E\x0B\x5A\x52\xDB\x39\x8C\x9B"
-			  "\x61\x17\x5F\xAF\xB6\x5A\xC8\x08"
-			  "\xA7\xB7\x2A\x11\x7C\x97\x38\x9D"
-			  "\x59\x0E\x66\x59\x5E\xD8\x8B\xCE"
-			  "\x70\xE0\xC3\x42\xB0\x8C\x0F\xBA"
-			  "\xB2\x0D\x81\xB6\xBE\x61\x1C\x2D"
-			  "\x7E\xEA\x91\x25\xAC\xEC\xF8\x28"
-			  "\x80\x1D\xF0\x30\xBA\x62\x77\x7D"
-			  "\xDB\x15\x69\xDF\xFA\x2A\x81\x64"
-			  "\x95\x5B\xA4\x7F\x3E\x4F\xE3\x30"
-			  "\xB0\x5C\xC2\x05\xF8\xF0\x29\xE7"
-			  "\x0A\xA0\x66\xB2\x5D\x0F\x39\x2B"
-			  "\xB4\xB3\x00\xA9\xD0\xAB\x63\x61"
-			  "\x5E\xDB\xFC\x11\x74\x25\x96\x65"
-			  "\xE8\xE2\x34\x57\x77\x15\x5E\x70"
-			  "\xFF\x10\x90\xC3\x64\xF0\x11\x0A"
-			  "\x63\x3A\xD3\x55\x92\x15\x4B\x0C"
-			  "\xC7\x08\x89\x17\x3B\x99\xAD\x63"
-			  "\xE7\x06\xDF\x52\xBC\x15\x64\x45"
-			  "\x9D\x7A\xFB\x69\xBC\x2D\x6E\xA9"
-			  "\x35\xD9\xD8\xF5\x0C\xC4\xA2\x23"
-			  "\x9C\x18\x8B\xA8\x8C\xFE\xF8\x0E"
-			  "\xBD\xAB\x60\x1A\x51\x17\x54\x27"
-			  "\xB6\xE8\xBE\x0F\xA9\xA5\x82\x19"
-			  "\x2F\x6F\x20\xA7\x47\xED\x74\x6C"
-			  "\x4E\xC1\xF8\x8C\x14\xF3\xBB\x1F"
-			  "\xED\x4D\x8F\x7C\x37\xEF\x19\xA1"
-			  "\x07\x16\xDE\x76\xCC\x5E\x94\x02"
-			  "\xFB\xBF\xE4\x81\x50\xCE\xFC\x0F"
-			  "\x9E\xCF\x3D\xF6\x67\x00\xBF\xA7"
-			  "\x6E\x21\x58\x36\x06\xDE\xB3\xD4"
-			  "\xA2\xFA\xD8\x4E\xE0\xB9\x7F\x23"
-			  "\x51\x21\x2B\x32\x68\xAA\xF8\xA8"
-			  "\x93\x08\xB5\x6D\xE6\x43\x2C\xB7"
-			  "\x31\xB2\x0F\xD0\xA2\x51\xC0\x25"
-			  "\x30\xC7\x10\x3F\x97\x27\x01\x8E"
-			  "\xFA\xD8\x4F\x78\xD8\x2E\x1D\xEB"
-			  "\xA1\x37\x52\x0F\x7B\x5E\x87\xA8"
-			  "\x22\xE2\xE6\x92\xA7\x5F\x11\x32"
-			  "\xCC\x93\x34\xFC\xD1\x7E\xAE\x54"
-			  "\xBC\x6A\x1B\x91\xD1\x2E\x21\xEC"
-			  "\x5D\xF1\xC4\xF1\x55\x20\xBF\xE5"
-			  "\x96\x3D\x69\x91\x20\x4E\xF2\x61"
-			  "\xDA\x77\xFE\xEE\xC3\x74\x57\x2A"
-			  "\x78\x39\xB0\xE0\xCF\x12\x56\xD6"
-			  "\x05\xDC\xF9\x19\x66\x44\x1D\xF9"
-			  "\x82\x37\xD4\xC2\x60\xB6\x31\xDF"
-			  "\x0C\xAF\xBC\x8B\x55\x9A\xC8\x2D"
-			  "\xAB\xA7\x88\x7B\x41\xE8\x29\xC9"
-			  "\x9B\x8D\xA7\x00\x86\x25\xB6\x14"
-			  "\xF5\x13\x73\xD7\x4B\x6B\x83\xF3"
-			  "\xAF\x96\x00\xE4\xB7\x3C\x65\xA6"
-			  "\x15\xB7\x94\x7D\x4E\x70\x4C\x75"
-			  "\xF3\xB4\x02\xA9\x17\x1C\x7A\x0A"
-			  "\xC0\xD5\x33\x11\x56\xDE\xDC\xF5"
-			  "\x8D\xD9\xCD\x3B\x22\x67\x18\xC7"
-			  "\xC4\xF5\x99\x61\xBC\xBB\x5B\x46",
-		.ilen	= 512,
-		.result	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
-			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
-			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
-			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
-			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
-			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
-			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
-			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
-			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
-			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
-			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
-			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
-			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
-			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
-			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
-			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
-			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
-			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
-			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
-			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
-			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
-			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
-			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
-			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
-			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
-			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
-			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
-			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
-			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
-			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
-			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
-			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
-			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
-			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
-			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
-			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
-			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
-			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
-			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
-			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
-			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
-			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
-			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
-			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
-			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
-			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
-			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
-			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
-			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
-			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
-			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
-			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
-			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
-			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
-			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
-			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
-			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
-			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
-			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
-			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
-			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
-			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
-			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
-			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec cast6_xts_enc_tv_template[] = {
+static const struct cipher_testvec cast6_xts_tv_template[] = {
 	{ /* Generated from TF test vectors */
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -17445,7 +11501,7 @@
 		.klen	= 64,
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -17509,8 +11565,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\xDE\x6F\x22\xA5\xE8\x39\xE8\x78"
+		.ctext	= "\xDE\x6F\x22\xA5\xE8\x39\xE8\x78"
 			  "\x88\x5A\x4F\x8D\x82\x76\x52\x6D"
 			  "\xB2\x41\x16\xF4\x2B\xA6\xEB\xF6"
 			  "\xE2\xC5\x62\x8D\x61\xA1\x01\xED"
@@ -17574,207 +11629,54 @@
 			  "\x45\x72\x80\x17\x81\xBD\x9D\x62"
 			  "\xA1\xAC\xE8\xCF\xC6\x74\xCF\xDC"
 			  "\x22\x60\x4E\xE8\xA4\x5D\x85\xB9",
-		.rlen	= 512,
+		.len	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec cast6_xts_dec_tv_template[] = {
-	{ /* Generated from TF test vectors */
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xDE\x6F\x22\xA5\xE8\x39\xE8\x78"
-			  "\x88\x5A\x4F\x8D\x82\x76\x52\x6D"
-			  "\xB2\x41\x16\xF4\x2B\xA6\xEB\xF6"
-			  "\xE2\xC5\x62\x8D\x61\xA1\x01\xED"
-			  "\xD9\x38\x01\xC1\x43\x63\x4E\x88"
-			  "\xC9\x4B\x5A\x88\x80\xB7\x5C\x71"
-			  "\x47\xEE\x11\xD8\xB7\x2D\x5D\x13"
-			  "\x1A\xB1\x68\x5B\x61\xA7\xA9\x81"
-			  "\x8B\x83\xA1\x6A\xAA\x36\xD6\xB6"
-			  "\x60\x54\x09\x32\xFE\x6A\x76\x2E"
-			  "\x28\xFF\xD5\xD6\xDD\x1D\x45\x7D"
-			  "\xF0\x8B\xF3\x32\x4E\x6C\x12\xCB"
-			  "\xB8\x25\x70\xF8\x40\xBC\x90\x1B"
-			  "\x11\xC3\x59\xAF\xF0\x2F\x92\xDD"
-			  "\xD3\x3B\xCF\x60\xA1\x78\x94\x57"
-			  "\xAF\x76\xC1\x67\xA6\x3C\xCD\x98"
-			  "\xB1\xF7\x27\xB9\xA3\xBD\x10\xEA"
-			  "\xCD\x8B\xC2\xF2\x14\xF2\xB2\x67"
-			  "\x05\xDD\x1D\x58\x6E\x2F\x95\x08"
-			  "\x3A\xF8\x78\x76\x82\x56\xA7\xEC"
-			  "\x51\x4B\x85\x77\xC2\x4C\x4A\x34"
-			  "\x71\x38\x17\x91\x44\xE8\xFC\x65"
-			  "\x99\x0D\x52\x91\xEE\xF8\xEF\x27"
-			  "\x2A\x9E\x6E\x78\xC4\x26\x87\xF4"
-			  "\x8A\xF0\x2D\x04\xE8\x14\x92\x5D"
-			  "\x59\x22\x9B\x29\x5C\x18\xF0\xC3"
-			  "\x47\xF3\x76\xD8\xE4\xF3\x1B\xD1"
-			  "\x70\xA3\x0D\xB5\x70\x02\x1D\xA3"
-			  "\x91\x3B\x49\x73\x18\xAB\xD4\xC9"
-			  "\xC3\x1E\xEF\x1F\xFE\xD5\x59\x8A"
-			  "\xD7\xF6\xC9\x71\x67\x79\xD7\x0E"
-			  "\xBE\x1F\x8E\xEC\x55\x7E\x4F\x24"
-			  "\xE6\x87\xEA\xFE\x96\x25\x67\x8E"
-			  "\x93\x03\xFA\xFF\xCE\xAF\xB2\x3C"
-			  "\x6F\xEB\x57\xFB\xD3\x28\x87\xA9"
-			  "\xCE\xC2\xF5\x9C\xC6\x67\xB5\x97"
-			  "\x49\xF7\x04\xCB\xEF\x84\x98\x33"
-			  "\xAF\x38\xD3\x04\x1C\x24\x71\x38"
-			  "\xC7\x71\xDD\x43\x0D\x12\x4A\x18"
-			  "\xBA\xC4\xAF\xBA\xB2\x5B\xEB\x95"
-			  "\x02\x43\x5D\xCE\x19\xCC\xCD\x66"
-			  "\x91\x0B\x8C\x7F\x51\xC4\xBF\x3C"
-			  "\x8B\xF1\xCC\xAA\x29\xD7\x87\xCB"
-			  "\x3E\xC5\xF3\xC9\x75\xE8\xA3\x5B"
-			  "\x30\x45\xA9\xB7\xAF\x80\x64\x6F"
-			  "\x75\x4A\xA7\xC0\x6D\x19\x6B\xDE"
-			  "\x17\xDE\x6D\xEA\x87\x9F\x95\xAE"
-			  "\xF5\x3C\xEE\x54\xB8\x27\x84\xF8"
-			  "\x97\xA3\xE1\x6F\x38\x24\x34\x88"
-			  "\xCE\xBD\x32\x52\xE0\x00\x6C\x94"
-			  "\xC9\xD7\x5D\x37\x81\x33\x2E\x7F"
-			  "\x4F\x7E\x2E\x0D\x94\xBD\xEA\x59"
-			  "\x34\x39\xA8\x35\x12\xB7\xBC\xAC"
-			  "\xEA\x52\x9C\x78\x02\x6D\x92\x36"
-			  "\xFB\x59\x2B\xA4\xEA\x7B\x1B\x83"
-			  "\xE1\x4D\x5E\x2A\x7E\x92\xB1\x64"
-			  "\xDE\xE0\x27\x4B\x0A\x6F\x4C\xE3"
-			  "\xB0\xEB\x31\xE4\x69\x95\xAB\x35"
-			  "\x8B\x2C\xF5\x6B\x7F\xF1\xA2\x82"
-			  "\xF8\xD9\x47\x82\xA9\x82\x03\x91"
-			  "\x69\x1F\xBE\x4C\xE7\xC7\x34\x2F"
-			  "\x45\x72\x80\x17\x81\xBD\x9D\x62"
-			  "\xA1\xAC\xE8\xCF\xC6\x74\xCF\xDC"
-			  "\x22\x60\x4E\xE8\xA4\x5D\x85\xB9",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	},
-};
-
-
 /*
  * AES test vectors.
  */
-static const struct cipher_testvec aes_enc_tv_template[] = {
+static const struct cipher_testvec aes_tv_template[] = {
 	{ /* From FIPS-197 */
 		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 		.klen	= 16,
-		.input	= "\x00\x11\x22\x33\x44\x55\x66\x77"
+		.ptext	= "\x00\x11\x22\x33\x44\x55\x66\x77"
 			  "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
-		.ilen	= 16,
-		.result	= "\x69\xc4\xe0\xd8\x6a\x7b\x04\x30"
+		.ctext	= "\x69\xc4\xe0\xd8\x6a\x7b\x04\x30"
 			  "\xd8\xcd\xb7\x80\x70\xb4\xc5\x5a",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17",
 		.klen	= 24,
-		.input	= "\x00\x11\x22\x33\x44\x55\x66\x77"
+		.ptext	= "\x00\x11\x22\x33\x44\x55\x66\x77"
 			  "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
-		.ilen	= 16,
-		.result	= "\xdd\xa9\x7c\xa4\x86\x4c\xdf\xe0"
+		.ctext	= "\xdd\xa9\x7c\xa4\x86\x4c\xdf\xe0"
 			  "\x6e\xaf\x70\xa0\xec\x0d\x71\x91",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
 		.klen	= 32,
-		.input	= "\x00\x11\x22\x33\x44\x55\x66\x77"
+		.ptext	= "\x00\x11\x22\x33\x44\x55\x66\x77"
 			  "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
-		.ilen	= 16,
-		.result	= "\x8e\xa2\xb7\xca\x51\x67\x45\xbf"
+		.ctext	= "\x8e\xa2\xb7\xca\x51\x67\x45\xbf"
 			  "\xea\xfc\x49\x90\x4b\x49\x60\x89",
-		.rlen	= 16,
+		.len	= 16,
 	}, { /* Generated with Crypto++ */
 		.key	= "\xA6\xC9\x83\xA6\xC9\xEC\x0F\x32"
 			  "\x55\x0F\x32\x55\x78\x9B\xBE\x78"
 			  "\x9B\xBE\xE1\x04\x27\xE1\x04\x27"
 			  "\x4A\x6D\x90\x4A\x6D\x90\xB3\xD6",
 		.klen	= 32,
-		.input	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
+		.ptext	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
 			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
 			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
 			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
@@ -17836,8 +11738,7 @@
 			  "\xAE\x17\xA3\x0C\x75\x01\x6A\xD3"
 			  "\x3C\xC8\x31\x9A\x03\x8F\xF8\x61"
 			  "\xED\x56\xBF\x28\xB4\x1D\x86\x12",
-		.ilen	= 496,
-		.result	= "\x71\x73\xF7\xDB\x24\x93\x21\x6D"
+		.ctext	= "\x71\x73\xF7\xDB\x24\x93\x21\x6D"
 			  "\x61\x1E\xBB\x63\x42\x79\xDB\x64"
 			  "\x6F\x82\xC0\xCA\xA3\x9B\xFA\x0B"
 			  "\xD9\x08\xC7\x4A\x90\xAE\x8F\x5F"
@@ -17899,197 +11800,24 @@
 			  "\x88\xE6\x68\x47\xE3\x2B\xC5\xFF"
 			  "\x09\x79\xA0\x43\x5C\x0D\x08\x58"
 			  "\x17\xBB\xC0\x6B\x62\x3F\x56\xE9",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec aes_dec_tv_template[] = {
-	{ /* From FIPS-197 */
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.klen	= 16,
-		.input	= "\x69\xc4\xe0\xd8\x6a\x7b\x04\x30"
-			  "\xd8\xcd\xb7\x80\x70\xb4\xc5\x5a",
-		.ilen	= 16,
-		.result	= "\x00\x11\x22\x33\x44\x55\x66\x77"
-			  "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
-		.rlen	= 16,
-	}, {
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17",
-		.klen	= 24,
-		.input	= "\xdd\xa9\x7c\xa4\x86\x4c\xdf\xe0"
-			  "\x6e\xaf\x70\xa0\xec\x0d\x71\x91",
-		.ilen	= 16,
-		.result	= "\x00\x11\x22\x33\x44\x55\x66\x77"
-			  "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
-		.rlen	= 16,
-	}, {
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.klen	= 32,
-		.input	= "\x8e\xa2\xb7\xca\x51\x67\x45\xbf"
-			  "\xea\xfc\x49\x90\x4b\x49\x60\x89",
-		.ilen	= 16,
-		.result	= "\x00\x11\x22\x33\x44\x55\x66\x77"
-			  "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
-		.rlen	= 16,
-	}, { /* Generated with Crypto++ */
-		.key	= "\xA6\xC9\x83\xA6\xC9\xEC\x0F\x32"
-			  "\x55\x0F\x32\x55\x78\x9B\xBE\x78"
-			  "\x9B\xBE\xE1\x04\x27\xE1\x04\x27"
-			  "\x4A\x6D\x90\x4A\x6D\x90\xB3\xD6",
-		.klen	= 32,
-		.input	= "\x71\x73\xF7\xDB\x24\x93\x21\x6D"
-			  "\x61\x1E\xBB\x63\x42\x79\xDB\x64"
-			  "\x6F\x82\xC0\xCA\xA3\x9B\xFA\x0B"
-			  "\xD9\x08\xC7\x4A\x90\xAE\x8F\x5F"
-			  "\x5E\x06\xF0\x5F\x31\x51\x18\x37"
-			  "\x45\xD7\xCA\x3A\xFD\x6C\x3F\xE1"
-			  "\xDD\x8D\x22\x65\x2B\x00\x50\xCE"
-			  "\xBA\x28\x67\xD7\xCE\x0E\x0D\xEA"
-			  "\x78\x69\x7F\xAE\x8F\x8B\x69\x37"
-			  "\x75\xE0\xDC\x96\xE0\xB7\xF4\x09"
-			  "\xCB\x6D\xA2\xFB\xDA\xAF\x09\xF8"
-			  "\x81\x82\x27\xFA\x45\x9C\x29\xA4"
-			  "\x22\x8B\x78\x69\x5B\x46\xF9\x39"
-			  "\x1B\xCC\xF9\x1D\x09\xEB\xBC\x5C"
-			  "\x41\x72\x51\x97\x1D\x07\x49\xA0"
-			  "\x1B\x8E\x65\x4B\xB2\x6A\x12\x03"
-			  "\x6A\x60\x95\xAC\xBD\xAC\x1A\x64"
-			  "\xDE\x5A\xA5\xF0\x83\x2F\xCB\xCA"
-			  "\x22\x74\xA6\x6C\x9B\x73\xCE\x3F"
-			  "\xE1\x8B\x22\x17\x59\x0C\x47\x89"
-			  "\x33\xA1\xD6\x47\x03\x19\x4F\xA8"
-			  "\x67\x69\xF0\x5B\xF0\x20\xAD\x06"
-			  "\x27\x81\x92\xD8\xC5\xBA\x98\x12"
-			  "\xBE\x24\xB5\x2F\x75\x02\xC2\xAD"
-			  "\x12\x2F\x07\x32\xEE\x39\xAF\x64"
-			  "\x05\x8F\xB3\xD4\xEB\x1B\x46\x6E"
-			  "\xD9\x21\xF9\xC4\xB7\xC9\x45\x68"
-			  "\xB4\xA1\x74\x9F\x82\x47\xEB\xCC"
-			  "\xBD\x0A\x14\x95\x0F\x8B\xA8\x2F"
-			  "\x4B\x1B\xA7\xBF\x82\xA6\x43\x0C"
-			  "\xB9\x39\x4A\xA8\x10\x6F\x50\x7B"
-			  "\x25\xFB\x26\x81\xE0\x2F\xF0\x96"
-			  "\x8D\x8B\xAC\x92\x0F\xF6\xED\x64"
-			  "\x63\x29\x4C\x8E\x18\x13\xC5\xBF"
-			  "\xFC\xA0\xD9\xBF\x7C\x3A\x0E\x29"
-			  "\x6F\xD1\x6C\x6F\xA5\xDA\xBF\xB1"
-			  "\x30\xEA\x44\x2D\xC3\x8F\x16\xE1"
-			  "\x66\xFA\xA3\x21\x3E\xFC\x13\xCA"
-			  "\xF0\xF6\xF0\x59\xBD\x8F\x38\x50"
-			  "\x31\xCB\x69\x3F\x96\x15\xD6\xF5"
-			  "\xAE\xFF\xF6\xAA\x41\x85\x4C\x10"
-			  "\x58\xE3\xF9\x44\xE6\x28\xDA\x9A"
-			  "\xDC\x6A\x80\x34\x73\x97\x1B\xC5"
-			  "\xCA\x26\x16\x77\x0E\x60\xAB\x89"
-			  "\x0F\x04\x27\xBD\xCE\x3E\x71\xB4"
-			  "\xA0\xD7\x22\x7E\xDB\xEB\x24\x70"
-			  "\x42\x71\x51\x78\x70\xB3\xE0\x3D"
-			  "\x84\x8E\x8D\x7B\xD0\x6D\xEA\x92"
-			  "\x11\x08\x42\x4F\xE5\xAD\x26\x92"
-			  "\xD2\x00\xAE\xA8\xE3\x4B\x37\x47"
-			  "\x22\xC1\x95\xC1\x63\x7F\xCB\x03"
-			  "\xF3\xE3\xD7\x9D\x60\xC7\xBC\xEA"
-			  "\x35\xA2\xFD\x45\x52\x39\x13\x6F"
-			  "\xC1\x53\xF3\x53\xDF\x33\x84\xD7"
-			  "\xD2\xC8\x37\xB0\x75\xE3\x41\x46"
-			  "\xB3\xC7\x83\x2E\x8A\xBB\xA4\xE5"
-			  "\x7F\x3C\xFD\x8B\xEB\xEA\x63\xBD"
-			  "\xB7\x46\xE7\xBF\x09\x9C\x0D\x0F"
-			  "\x40\x86\x7F\x51\xE1\x11\x9C\xCB"
-			  "\x88\xE6\x68\x47\xE3\x2B\xC5\xFF"
-			  "\x09\x79\xA0\x43\x5C\x0D\x08\x58"
-			  "\x17\xBB\xC0\x6B\x62\x3F\x56\xE9",
-		.ilen	= 496,
-		.result	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
-			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
-			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
-			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
-			  "\xAB\x14\x7D\x09\x72\xDB\x44\xD0"
-			  "\x39\xA2\x0B\x97\x00\x69\xF5\x5E"
-			  "\xC7\x30\xBC\x25\x8E\x1A\x83\xEC"
-			  "\x55\xE1\x4A\xB3\x1C\xA8\x11\x7A"
-			  "\x06\x6F\xD8\x41\xCD\x36\x9F\x08"
-			  "\x94\xFD\x66\xF2\x5B\xC4\x2D\xB9"
-			  "\x22\x8B\x17\x80\xE9\x52\xDE\x47"
-			  "\xB0\x19\xA5\x0E\x77\x03\x6C\xD5"
-			  "\x3E\xCA\x33\x9C\x05\x91\xFA\x63"
-			  "\xEF\x58\xC1\x2A\xB6\x1F\x88\x14"
-			  "\x7D\xE6\x4F\xDB\x44\xAD\x16\xA2"
-			  "\x0B\x74\x00\x69\xD2\x3B\xC7\x30"
-			  "\x99\x02\x8E\xF7\x60\xEC\x55\xBE"
-			  "\x27\xB3\x1C\x85\x11\x7A\xE3\x4C"
-			  "\xD8\x41\xAA\x13\x9F\x08\x71\xFD"
-			  "\x66\xCF\x38\xC4\x2D\x96\x22\x8B"
-			  "\xF4\x5D\xE9\x52\xBB\x24\xB0\x19"
-			  "\x82\x0E\x77\xE0\x49\xD5\x3E\xA7"
-			  "\x10\x9C\x05\x6E\xFA\x63\xCC\x35"
-			  "\xC1\x2A\x93\x1F\x88\xF1\x5A\xE6"
-			  "\x4F\xB8\x21\xAD\x16\x7F\x0B\x74"
-			  "\xDD\x46\xD2\x3B\xA4\x0D\x99\x02"
-			  "\x6B\xF7\x60\xC9\x32\xBE\x27\x90"
-			  "\x1C\x85\xEE\x57\xE3\x4C\xB5\x1E"
-			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
-			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
-			  "\xC6\x2F\xBB\x24\x8D\x19\x82\xEB"
-			  "\x54\xE0\x49\xB2\x1B\xA7\x10\x79"
-			  "\x05\x6E\xD7\x40\xCC\x35\x9E\x07"
-			  "\x93\xFC\x65\xF1\x5A\xC3\x2C\xB8"
-			  "\x21\x8A\x16\x7F\xE8\x51\xDD\x46"
-			  "\xAF\x18\xA4\x0D\x76\x02\x6B\xD4"
-			  "\x3D\xC9\x32\x9B\x04\x90\xF9\x62"
-			  "\xEE\x57\xC0\x29\xB5\x1E\x87\x13"
-			  "\x7C\xE5\x4E\xDA\x43\xAC\x15\xA1"
-			  "\x0A\x73\xFF\x68\xD1\x3A\xC6\x2F"
-			  "\x98\x01\x8D\xF6\x5F\xEB\x54\xBD"
-			  "\x26\xB2\x1B\x84\x10\x79\xE2\x4B"
-			  "\xD7\x40\xA9\x12\x9E\x07\x70\xFC"
-			  "\x65\xCE\x37\xC3\x2C\x95\x21\x8A"
-			  "\xF3\x5C\xE8\x51\xBA\x23\xAF\x18"
-			  "\x81\x0D\x76\xDF\x48\xD4\x3D\xA6"
-			  "\x0F\x9B\x04\x6D\xF9\x62\xCB\x34"
-			  "\xC0\x29\x92\x1E\x87\xF0\x59\xE5"
-			  "\x4E\xB7\x20\xAC\x15\x7E\x0A\x73"
-			  "\xDC\x45\xD1\x3A\xA3\x0C\x98\x01"
-			  "\x6A\xF6\x5F\xC8\x31\xBD\x26\x8F"
-			  "\x1B\x84\xED\x56\xE2\x4B\xB4\x1D"
-			  "\xA9\x12\x7B\x07\x70\xD9\x42\xCE"
-			  "\x37\xA0\x09\x95\xFE\x67\xF3\x5C"
-			  "\xC5\x2E\xBA\x23\x8C\x18\x81\xEA"
-			  "\x53\xDF\x48\xB1\x1A\xA6\x0F\x78"
-			  "\x04\x6D\xD6\x3F\xCB\x34\x9D\x06"
-			  "\x92\xFB\x64\xF0\x59\xC2\x2B\xB7"
-			  "\x20\x89\x15\x7E\xE7\x50\xDC\x45"
-			  "\xAE\x17\xA3\x0C\x75\x01\x6A\xD3"
-			  "\x3C\xC8\x31\x9A\x03\x8F\xF8\x61"
-			  "\xED\x56\xBF\x28\xB4\x1D\x86\x12",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec aes_cbc_enc_tv_template[] = {
+static const struct cipher_testvec aes_cbc_tv_template[] = {
 	{ /* From RFC 3602 */
 		.key    = "\x06\xa9\x21\x40\x36\xb8\xa1\x5b"
 			  "\x51\x2e\x03\xd5\x34\x12\x00\x06",
 		.klen   = 16,
 		.iv	= "\x3d\xaf\xba\x42\x9d\x9e\xb4\x30"
 			  "\xb4\x22\xda\x80\x2c\x9f\xac\x41",
-		.input	= "Single block msg",
-		.ilen   = 16,
-		.result = "\xe3\x53\x77\x9c\x10\x79\xae\xb8"
+		.ptext	= "Single block msg",
+		.ctext	= "\xe3\x53\x77\x9c\x10\x79\xae\xb8"
 			  "\x27\x08\x94\x2d\xbe\x77\x18\x1a",
-		.rlen   = 16,
+		.len	= 16,
 		.also_non_np = 1,
 		.np	= 8,
 		.tap	= { 3, 2, 3, 2, 3, 1, 1, 1 },
@@ -18099,16 +11827,15 @@
 		.klen   = 16,
 		.iv     = "\x56\x2e\x17\x99\x6d\x09\x3d\x28"
 			  "\xdd\xb3\xba\x69\x5a\x2e\x6f\x58",
-		.input  = "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.ilen   = 32,
-		.result = "\xd2\x96\xcd\x94\xc2\xcc\xcf\x8a"
+		.ctext	= "\xd2\x96\xcd\x94\xc2\xcc\xcf\x8a"
 			  "\x3a\x86\x30\x28\xb5\xe1\xdc\x0a"
 			  "\x75\x86\x60\x2d\x25\x3c\xff\xf9"
 			  "\x1b\x82\x66\xbe\xa6\xd6\x1a\xb1",
-		.rlen   = 32,
+		.len	= 32,
 	}, { /* From NIST SP800-38A */
 		.key	= "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
 			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
@@ -18116,7 +11843,7 @@
 		.klen	= 24,
 		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.input	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+		.ptext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
 			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
 			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
 			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
@@ -18124,8 +11851,7 @@
 			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
 			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
 			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.ilen	= 64,
-		.result	= "\x4f\x02\x1d\xb2\x43\xbc\x63\x3d"
+		.ctext	= "\x4f\x02\x1d\xb2\x43\xbc\x63\x3d"
 			  "\x71\x78\x18\x3a\x9f\xa0\x71\xe8"
 			  "\xb4\xd9\xad\xa9\xad\x7d\xed\xf4"
 			  "\xe5\xe7\x38\x76\x3f\x69\x14\x5a"
@@ -18133,7 +11859,7 @@
 			  "\x7f\xa9\xba\xac\x3d\xf1\x02\xe0"
 			  "\x08\xb0\xe2\x79\x88\x59\x88\x81"
 			  "\xd9\x20\xa9\xe6\x4f\x56\x15\xcd",
-		.rlen	= 64,
+		.len	= 64,
 	}, {
 		.key	= "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
 			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
@@ -18142,7 +11868,7 @@
 		.klen	= 32,
 		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.input	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+		.ptext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
 			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
 			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
 			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
@@ -18150,8 +11876,7 @@
 			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
 			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
 			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.ilen	= 64,
-		.result	= "\xf5\x8c\x4c\x04\xd6\xe5\xf1\xba"
+		.ctext	= "\xf5\x8c\x4c\x04\xd6\xe5\xf1\xba"
 			  "\x77\x9e\xab\xfb\x5f\x7b\xfb\xd6"
 			  "\x9c\xfc\x4e\x96\x7e\xdb\x80\x8d"
 			  "\x67\x9f\x77\x7b\xc6\x70\x2c\x7d"
@@ -18159,7 +11884,7 @@
 			  "\xa5\x30\xe2\x63\x04\x23\x14\x61"
 			  "\xb2\xeb\x05\xe2\xc3\x9b\xe9\xfc"
 			  "\xda\x6c\x19\x07\x8c\x6a\x9d\x1b",
-		.rlen	= 64,
+		.len	= 64,
 	}, { /* Generated with Crypto++ */
 		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55"
 			  "\x0F\x32\x55\x78\x9B\xBE\x78\x9B"
@@ -18168,7 +11893,7 @@
 		.klen	= 32,
 		.iv	= "\xE7\x82\x1D\xB8\x53\x11\xAC\x47"
 			  "\xE2\x7D\x18\xD6\x71\x0C\xA7\x42",
-		.input	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
+		.ptext	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
 			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
 			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
 			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
@@ -18230,8 +11955,7 @@
 			  "\xAE\x17\xA3\x0C\x75\x01\x6A\xD3"
 			  "\x3C\xC8\x31\x9A\x03\x8F\xF8\x61"
 			  "\xED\x56\xBF\x28\xB4\x1D\x86\x12",
-		.ilen	= 496,
-		.result	= "\xEA\x65\x8A\x19\xB0\x66\xC1\x3F"
+		.ctext	= "\xEA\x65\x8A\x19\xB0\x66\xC1\x3F"
 			  "\xCE\xF1\x97\x75\xC1\xFD\xB5\xAF"
 			  "\x52\x65\xF7\xFF\xBC\xD8\x2D\x9F"
 			  "\x2F\xB9\x26\x9B\x6F\x10\xB7\xB8"
@@ -18293,229 +12017,7 @@
 			  "\xBC\x06\x41\xE3\x01\xB4\x4E\x0A"
 			  "\xE0\x1F\x91\xF8\x82\x96\x2D\x65"
 			  "\xA3\xAA\x13\xCC\x50\xFF\x7B\x02",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec aes_cbc_dec_tv_template[] = {
-	{ /* From RFC 3602 */
-		.key    = "\x06\xa9\x21\x40\x36\xb8\xa1\x5b"
-			  "\x51\x2e\x03\xd5\x34\x12\x00\x06",
-		.klen   = 16,
-		.iv     = "\x3d\xaf\xba\x42\x9d\x9e\xb4\x30"
-			  "\xb4\x22\xda\x80\x2c\x9f\xac\x41",
-		.input  = "\xe3\x53\x77\x9c\x10\x79\xae\xb8"
-			  "\x27\x08\x94\x2d\xbe\x77\x18\x1a",
-		.ilen   = 16,
-		.result = "Single block msg",
-		.rlen   = 16,
-		.also_non_np = 1,
-		.np	= 8,
-		.tap	= { 3, 2, 3, 2, 3, 1, 1, 1 },
-	}, {
-		.key    = "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0"
-			  "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a",
-		.klen   = 16,
-		.iv     = "\x56\x2e\x17\x99\x6d\x09\x3d\x28"
-			  "\xdd\xb3\xba\x69\x5a\x2e\x6f\x58",
-		.input  = "\xd2\x96\xcd\x94\xc2\xcc\xcf\x8a"
-			  "\x3a\x86\x30\x28\xb5\xe1\xdc\x0a"
-			  "\x75\x86\x60\x2d\x25\x3c\xff\xf9"
-			  "\x1b\x82\x66\xbe\xa6\xd6\x1a\xb1",
-		.ilen   = 32,
-		.result = "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.rlen   = 32,
-	}, { /* From NIST SP800-38A */
-		.key	= "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
-			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
-			  "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
-		.klen	= 24,
-		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.input	= "\x4f\x02\x1d\xb2\x43\xbc\x63\x3d"
-			  "\x71\x78\x18\x3a\x9f\xa0\x71\xe8"
-			  "\xb4\xd9\xad\xa9\xad\x7d\xed\xf4"
-			  "\xe5\xe7\x38\x76\x3f\x69\x14\x5a"
-			  "\x57\x1b\x24\x20\x12\xfb\x7a\xe0"
-			  "\x7f\xa9\xba\xac\x3d\xf1\x02\xe0"
-			  "\x08\xb0\xe2\x79\x88\x59\x88\x81"
-			  "\xd9\x20\xa9\xe6\x4f\x56\x15\xcd",
-		.ilen	= 64,
-		.result	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
-			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
-			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
-			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
-			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
-			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
-			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
-			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.rlen	= 64,
-	}, {
-		.key	= "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
-			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
-			  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
-			  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
-		.klen	= 32,
-		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.input	= "\xf5\x8c\x4c\x04\xd6\xe5\xf1\xba"
-			  "\x77\x9e\xab\xfb\x5f\x7b\xfb\xd6"
-			  "\x9c\xfc\x4e\x96\x7e\xdb\x80\x8d"
-			  "\x67\x9f\x77\x7b\xc6\x70\x2c\x7d"
-			  "\x39\xf2\x33\x69\xa9\xd9\xba\xcf"
-			  "\xa5\x30\xe2\x63\x04\x23\x14\x61"
-			  "\xb2\xeb\x05\xe2\xc3\x9b\xe9\xfc"
-			  "\xda\x6c\x19\x07\x8c\x6a\x9d\x1b",
-		.ilen	= 64,
-		.result	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
-			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
-			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
-			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
-			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
-			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
-			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
-			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.rlen	= 64,
-	}, { /* Generated with Crypto++ */
-		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55"
-			  "\x0F\x32\x55\x78\x9B\xBE\x78\x9B"
-			  "\xBE\xE1\x04\x27\xE1\x04\x27\x4A"
-			  "\x6D\x90\x4A\x6D\x90\xB3\xD6\xF9",
-		.klen	= 32,
-		.iv	= "\xE7\x82\x1D\xB8\x53\x11\xAC\x47"
-			  "\xE2\x7D\x18\xD6\x71\x0C\xA7\x42",
-		.input	= "\xEA\x65\x8A\x19\xB0\x66\xC1\x3F"
-			  "\xCE\xF1\x97\x75\xC1\xFD\xB5\xAF"
-			  "\x52\x65\xF7\xFF\xBC\xD8\x2D\x9F"
-			  "\x2F\xB9\x26\x9B\x6F\x10\xB7\xB8"
-			  "\x26\xA1\x02\x46\xA2\xAD\xC6\xC0"
-			  "\x11\x15\xFF\x6D\x1E\x82\x04\xA6"
-			  "\xB1\x74\xD1\x08\x13\xFD\x90\x7C"
-			  "\xF5\xED\xD3\xDB\x5A\x0A\x0C\x2F"
-			  "\x0A\x70\xF1\x88\x07\xCF\x21\x26"
-			  "\x40\x40\x8A\xF5\x53\xF7\x24\x4F"
-			  "\x83\x38\x43\x5F\x08\x99\xEB\xE3"
-			  "\xDC\x02\x64\x67\x50\x6E\x15\xC3"
-			  "\x01\x1A\xA0\x81\x13\x65\xA6\x73"
-			  "\x71\xA6\x3B\x91\x83\x77\xBE\xFA"
-			  "\xDB\x71\x73\xA6\xC1\xAE\x43\xC3"
-			  "\x36\xCE\xD6\xEB\xF9\x30\x1C\x4F"
-			  "\x80\x38\x5E\x9C\x6E\xAB\x98\x2F"
-			  "\x53\xAF\xCF\xC8\x9A\xB8\x86\x43"
-			  "\x3E\x86\xE7\xA1\xF4\x2F\x30\x40"
-			  "\x03\xA8\x6C\x50\x42\x9F\x77\x59"
-			  "\x89\xA0\xC5\xEC\x9A\xB8\xDD\x99"
-			  "\x16\x24\x02\x07\x48\xAE\xF2\x31"
-			  "\x34\x0E\xC3\x85\xFE\x1C\x95\x99"
-			  "\x87\x58\x98\x8B\xE7\xC6\xC5\x70"
-			  "\x73\x81\x07\x7C\x56\x2F\xD8\x1B"
-			  "\xB7\xB9\x2B\xAB\xE3\x01\x87\x0F"
-			  "\xD8\xBB\xC0\x0D\xAC\x2C\x2F\x98"
-			  "\x3C\x0B\xA2\x99\x4A\x8C\xF7\x04"
-			  "\xE0\xE0\xCF\xD1\x81\x5B\xFE\xF5"
-			  "\x24\x04\xFD\xB8\xDF\x13\xD8\xCD"
-			  "\xF1\xE3\x3D\x98\x50\x02\x77\x9E"
-			  "\xBC\x22\xAB\xFA\xC2\x43\x1F\x66"
-			  "\x20\x02\x23\xDA\xDF\xA0\x89\xF6"
-			  "\xD8\xF3\x45\x24\x53\x6F\x16\x77"
-			  "\x02\x3E\x7B\x36\x5F\xA0\x3B\x78"
-			  "\x63\xA2\xBD\xB5\xA4\xCA\x1E\xD3"
-			  "\x57\xBC\x0B\x9F\x43\x51\x28\x4F"
-			  "\x07\x50\x6C\x68\x12\x07\xCF\xFA"
-			  "\x6B\x72\x0B\xEB\xF8\x88\x90\x2C"
-			  "\x7E\xF5\x91\xD1\x03\xD8\xD5\xBD"
-			  "\x22\x39\x7B\x16\x03\x01\x69\xAF"
-			  "\x3D\x38\x66\x28\x0C\xBE\x5B\xC5"
-			  "\x03\xB4\x2F\x51\x8A\x56\x17\x2B"
-			  "\x88\x42\x6D\x40\x68\x8F\xD0\x11"
-			  "\x19\xF9\x1F\x43\x79\x95\x31\xFA"
-			  "\x28\x7A\x3D\xF7\x66\xEB\xEF\xAC"
-			  "\x06\xB2\x01\xAD\xDB\x68\xDB\xEC"
-			  "\x8D\x53\x6E\x72\x68\xA3\xC7\x63"
-			  "\x43\x2B\x78\xE0\x04\x29\x8F\x72"
-			  "\xB2\x2C\xE6\x84\x03\x30\x6D\xCD"
-			  "\x26\x92\x37\xE1\x2F\xBB\x8B\x9D"
-			  "\xE4\x4C\xF6\x93\xBC\xD9\xAD\x44"
-			  "\x52\x65\xC7\xB0\x0E\x3F\x0E\x61"
-			  "\x56\x5D\x1C\x6D\xA7\x05\x2E\xBC"
-			  "\x58\x08\x15\xAB\x12\xAB\x17\x4A"
-			  "\x5E\x1C\xF2\xCD\xB8\xA2\xAE\xFB"
-			  "\x9B\x2E\x0E\x85\x34\x80\x0E\x3F"
-			  "\x4C\xB8\xDB\xCE\x1C\x90\xA1\x61"
-			  "\x6C\x69\x09\x35\x9E\xD4\xF4\xAD"
-			  "\xBC\x06\x41\xE3\x01\xB4\x4E\x0A"
-			  "\xE0\x1F\x91\xF8\x82\x96\x2D\x65"
-			  "\xA3\xAA\x13\xCC\x50\xFF\x7B\x02",
-		.ilen	= 496,
-		.result	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
-			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
-			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
-			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
-			  "\xAB\x14\x7D\x09\x72\xDB\x44\xD0"
-			  "\x39\xA2\x0B\x97\x00\x69\xF5\x5E"
-			  "\xC7\x30\xBC\x25\x8E\x1A\x83\xEC"
-			  "\x55\xE1\x4A\xB3\x1C\xA8\x11\x7A"
-			  "\x06\x6F\xD8\x41\xCD\x36\x9F\x08"
-			  "\x94\xFD\x66\xF2\x5B\xC4\x2D\xB9"
-			  "\x22\x8B\x17\x80\xE9\x52\xDE\x47"
-			  "\xB0\x19\xA5\x0E\x77\x03\x6C\xD5"
-			  "\x3E\xCA\x33\x9C\x05\x91\xFA\x63"
-			  "\xEF\x58\xC1\x2A\xB6\x1F\x88\x14"
-			  "\x7D\xE6\x4F\xDB\x44\xAD\x16\xA2"
-			  "\x0B\x74\x00\x69\xD2\x3B\xC7\x30"
-			  "\x99\x02\x8E\xF7\x60\xEC\x55\xBE"
-			  "\x27\xB3\x1C\x85\x11\x7A\xE3\x4C"
-			  "\xD8\x41\xAA\x13\x9F\x08\x71\xFD"
-			  "\x66\xCF\x38\xC4\x2D\x96\x22\x8B"
-			  "\xF4\x5D\xE9\x52\xBB\x24\xB0\x19"
-			  "\x82\x0E\x77\xE0\x49\xD5\x3E\xA7"
-			  "\x10\x9C\x05\x6E\xFA\x63\xCC\x35"
-			  "\xC1\x2A\x93\x1F\x88\xF1\x5A\xE6"
-			  "\x4F\xB8\x21\xAD\x16\x7F\x0B\x74"
-			  "\xDD\x46\xD2\x3B\xA4\x0D\x99\x02"
-			  "\x6B\xF7\x60\xC9\x32\xBE\x27\x90"
-			  "\x1C\x85\xEE\x57\xE3\x4C\xB5\x1E"
-			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
-			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
-			  "\xC6\x2F\xBB\x24\x8D\x19\x82\xEB"
-			  "\x54\xE0\x49\xB2\x1B\xA7\x10\x79"
-			  "\x05\x6E\xD7\x40\xCC\x35\x9E\x07"
-			  "\x93\xFC\x65\xF1\x5A\xC3\x2C\xB8"
-			  "\x21\x8A\x16\x7F\xE8\x51\xDD\x46"
-			  "\xAF\x18\xA4\x0D\x76\x02\x6B\xD4"
-			  "\x3D\xC9\x32\x9B\x04\x90\xF9\x62"
-			  "\xEE\x57\xC0\x29\xB5\x1E\x87\x13"
-			  "\x7C\xE5\x4E\xDA\x43\xAC\x15\xA1"
-			  "\x0A\x73\xFF\x68\xD1\x3A\xC6\x2F"
-			  "\x98\x01\x8D\xF6\x5F\xEB\x54\xBD"
-			  "\x26\xB2\x1B\x84\x10\x79\xE2\x4B"
-			  "\xD7\x40\xA9\x12\x9E\x07\x70\xFC"
-			  "\x65\xCE\x37\xC3\x2C\x95\x21\x8A"
-			  "\xF3\x5C\xE8\x51\xBA\x23\xAF\x18"
-			  "\x81\x0D\x76\xDF\x48\xD4\x3D\xA6"
-			  "\x0F\x9B\x04\x6D\xF9\x62\xCB\x34"
-			  "\xC0\x29\x92\x1E\x87\xF0\x59\xE5"
-			  "\x4E\xB7\x20\xAC\x15\x7E\x0A\x73"
-			  "\xDC\x45\xD1\x3A\xA3\x0C\x98\x01"
-			  "\x6A\xF6\x5F\xC8\x31\xBD\x26\x8F"
-			  "\x1B\x84\xED\x56\xE2\x4B\xB4\x1D"
-			  "\xA9\x12\x7B\x07\x70\xD9\x42\xCE"
-			  "\x37\xA0\x09\x95\xFE\x67\xF3\x5C"
-			  "\xC5\x2E\xBA\x23\x8C\x18\x81\xEA"
-			  "\x53\xDF\x48\xB1\x1A\xA6\x0F\x78"
-			  "\x04\x6D\xD6\x3F\xCB\x34\x9D\x06"
-			  "\x92\xFB\x64\xF0\x59\xC2\x2B\xB7"
-			  "\x20\x89\x15\x7E\xE7\x50\xDC\x45"
-			  "\xAE\x17\xA3\x0C\x75\x01\x6A\xD3"
-			  "\x3C\xC8\x31\x9A\x03\x8F\xF8\x61"
-			  "\xED\x56\xBF\x28\xB4\x1D\x86\x12",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
@@ -20225,7 +13727,7 @@
 	},
 };
 
-static const struct cipher_testvec aes_lrw_enc_tv_template[] = {
+static const struct cipher_testvec aes_lrw_tv_template[] = {
 	/* from http://grouper.ieee.org/groups/1619/email/pdf00017.pdf */
 	{ /* LRW-32-AES 1 */
 		.key    = "\x45\x62\xac\x25\xf8\x28\x17\x6d"
@@ -20235,12 +13737,11 @@
 		.klen   = 32,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input  = "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen   = 16,
-		.result = "\xf1\xb2\x73\xcd\x65\xa3\xdf\x5f"
+		.ctext	= "\xf1\xb2\x73\xcd\x65\xa3\xdf\x5f"
 			  "\xe9\x5d\x48\x92\x54\x63\x4e\xb8",
-		.rlen   = 16,
+		.len	= 16,
 	}, { /* LRW-32-AES 2 */
 		.key    = "\x59\x70\x47\x14\xf5\x57\x47\x8c"
 			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
@@ -20249,12 +13750,11 @@
 		.klen   = 32,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input  = "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen   = 16,
-		.result = "\x00\xc8\x2b\xae\x95\xbb\xcd\xe5"
+		.ctext	= "\x00\xc8\x2b\xae\x95\xbb\xcd\xe5"
 			  "\x27\x4f\x07\x69\xb2\x60\xe1\x36",
-		.rlen   = 16,
+		.len	= 16,
 	}, { /* LRW-32-AES 3 */
 		.key    = "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
 			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
@@ -20263,12 +13763,11 @@
 		.klen   = 32,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input  = "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen   = 16,
-		.result = "\x76\x32\x21\x83\xed\x8f\xf1\x82"
+		.ctext	= "\x76\x32\x21\x83\xed\x8f\xf1\x82"
 			  "\xf9\x59\x62\x03\x69\x0e\x5e\x01",
-		.rlen   = 16,
+		.len	= 16,
 	}, { /* LRW-32-AES 4 */
 		.key    = "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
 			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
@@ -20278,12 +13777,11 @@
 		.klen   = 40,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input  = "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen   = 16,
-		.result = "\x9c\x0f\x15\x2f\x55\xa2\xd8\xf0"
+		.ctext	= "\x9c\x0f\x15\x2f\x55\xa2\xd8\xf0"
 			  "\xd6\x7b\x8f\x9e\x28\x22\xbc\x41",
-		.rlen   = 16,
+		.len	= 16,
 	}, { /* LRW-32-AES 5 */
 		.key    = "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
 			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
@@ -20293,12 +13791,11 @@
 		.klen   = 40,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input  = "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen   = 16,
-		.result = "\xd4\x27\x6a\x7f\x14\x91\x3d\x65"
+		.ctext	= "\xd4\x27\x6a\x7f\x14\x91\x3d\x65"
 			  "\xc8\x60\x48\x02\x87\xe3\x34\x06",
-		.rlen   = 16,
+		.len	= 16,
 	}, { /* LRW-32-AES 6 */
 		.key    = "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
 			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
@@ -20309,12 +13806,11 @@
 		.klen   = 48,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input  = "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen   = 16,
-		.result = "\xbd\x06\xb8\xe1\xdb\x98\x89\x9e"
+		.ctext	= "\xbd\x06\xb8\xe1\xdb\x98\x89\x9e"
 			  "\xc4\x98\xe4\x91\xcf\x1c\x70\x2b",
-		.rlen   = 16,
+		.len	= 16,
 	}, { /* LRW-32-AES 7 */
 		.key    = "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
 			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
@@ -20325,12 +13821,11 @@
 		.klen   = 48,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input  = "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen   = 16,
-		.result = "\x5b\x90\x8e\xc1\xab\xdd\x67\x5f"
+		.ctext	= "\x5b\x90\x8e\xc1\xab\xdd\x67\x5f"
 			  "\x3d\x69\x8a\x95\x53\xc8\x9c\xe5",
-		.rlen   = 16,
+		.len	= 16,
 	}, {
 /* http://www.mail-archive.com/stds-p1619@listserv.ieee.org/msg00173.html */
 		.key    = "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
@@ -20342,7 +13837,7 @@
 		.klen   = 48,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input  = "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
+		.ptext	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
 			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
 			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
 			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
@@ -20406,8 +13901,7 @@
 			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
 			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
 			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
-		.ilen   = 512,
-		.result = "\x1a\x1d\xa9\x30\xad\xf9\x2f\x9b"
+		.ctext	= "\x1a\x1d\xa9\x30\xad\xf9\x2f\x9b"
 			  "\xb6\x1d\xae\xef\xf0\x2f\xf8\x5a"
 			  "\x39\x3c\xbf\x2a\xb2\x45\xb2\x23"
 			  "\x1b\x63\x3c\xcf\xaa\xbe\xcf\x4e"
@@ -20471,268 +13965,14 @@
 			  "\xb7\x4f\xf8\x92\x4c\xb7\x3c\x29"
 			  "\xcd\x7e\x2b\x5d\x43\xea\x42\xe7"
 			  "\x74\x3f\x7d\x58\x88\x75\xde\x3e",
-		.rlen   = 512,
+		.len	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
 	}
 };
 
-static const struct cipher_testvec aes_lrw_dec_tv_template[] = {
-	/* from http://grouper.ieee.org/groups/1619/email/pdf00017.pdf */
-	/* same as enc vectors with input and result reversed */
-	{ /* LRW-32-AES 1 */
-		.key    = "\x45\x62\xac\x25\xf8\x28\x17\x6d"
-			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
-			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
-			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
-		.klen   = 32,
-		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input  = "\xf1\xb2\x73\xcd\x65\xa3\xdf\x5f"
-			  "\xe9\x5d\x48\x92\x54\x63\x4e\xb8",
-		.ilen   = 16,
-		.result = "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen   = 16,
-	}, { /* LRW-32-AES 2 */
-		.key    = "\x59\x70\x47\x14\xf5\x57\x47\x8c"
-			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
-			  "\x0d\x48\xf0\xb7\xb1\x5a\x53\xea"
-			  "\x1c\xaa\x6b\x29\xc2\xca\xfb\xaf",
-		.klen   = 32,
-		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input  = "\x00\xc8\x2b\xae\x95\xbb\xcd\xe5"
-			  "\x27\x4f\x07\x69\xb2\x60\xe1\x36",
-		.ilen   = 16,
-		.result  = "\x30\x31\x32\x33\x34\x35\x36\x37"
-			   "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen   = 16,
-	}, { /* LRW-32-AES 3 */
-		.key    = "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
-			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
-			  "\xcd\xf9\x0b\x16\x0c\x64\x8f\xb6"
-			  "\xb0\x0d\x0d\x1b\xae\x85\x87\x1f",
-		.klen   = 32,
-		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input  = "\x76\x32\x21\x83\xed\x8f\xf1\x82"
-			  "\xf9\x59\x62\x03\x69\x0e\x5e\x01",
-		.ilen   = 16,
-		.result = "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen   = 16,
-	}, { /* LRW-32-AES 4 */
-		.key    = "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
-			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
-			  "\xca\xc6\xbc\x35\x4d\x4a\x65\x54"
-			  "\x90\xae\x61\xcf\x7b\xae\xbd\xcc"
-			  "\xad\xe4\x94\xc5\x4a\x29\xae\x70",
-		.klen   = 40,
-		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input  = "\x9c\x0f\x15\x2f\x55\xa2\xd8\xf0"
-			  "\xd6\x7b\x8f\x9e\x28\x22\xbc\x41",
-		.ilen   = 16,
-		.result = "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen   = 16,
-	}, { /* LRW-32-AES 5 */
-		.key    = "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
-			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
-			  "\xa0\x19\x07\xc0\x9d\xf7\xbb\xdd"
-			  "\x52\x13\xb2\xb7\xf0\xff\x11\xd8"
-			  "\xd6\x08\xd0\xcd\x2e\xb1\x17\x6f",
-		.klen   = 40,
-		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input  = "\xd4\x27\x6a\x7f\x14\x91\x3d\x65"
-			  "\xc8\x60\x48\x02\x87\xe3\x34\x06",
-		.ilen   = 16,
-		.result = "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen   = 16,
-	}, { /* LRW-32-AES 6 */
-		.key    = "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen   = 48,
-		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input  = "\xbd\x06\xb8\xe1\xdb\x98\x89\x9e"
-			  "\xc4\x98\xe4\x91\xcf\x1c\x70\x2b",
-		.ilen   = 16,
-		.result = "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen   = 16,
-	}, { /* LRW-32-AES 7 */
-		.key    = "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
-			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
-			  "\xb2\xfb\x64\xce\x60\x97\x87\x8d"
-			  "\x17\xfc\xe4\x5a\x49\xe8\x30\xb7"
-			  "\x6e\x78\x17\xe7\x2d\x5e\x12\xd4"
-			  "\x60\x64\x04\x7a\xf1\x2f\x9e\x0c",
-		.klen   = 48,
-		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input  = "\x5b\x90\x8e\xc1\xab\xdd\x67\x5f"
-			  "\x3d\x69\x8a\x95\x53\xc8\x9c\xe5",
-		.ilen   = 16,
-		.result = "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen   = 16,
-	}, {
-/* http://www.mail-archive.com/stds-p1619@listserv.ieee.org/msg00173.html */
-		.key    = "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen   = 48,
-		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x1a\x1d\xa9\x30\xad\xf9\x2f\x9b"
-			  "\xb6\x1d\xae\xef\xf0\x2f\xf8\x5a"
-			  "\x39\x3c\xbf\x2a\xb2\x45\xb2\x23"
-			  "\x1b\x63\x3c\xcf\xaa\xbe\xcf\x4e"
-			  "\xfa\xe8\x29\xc2\x20\x68\x2b\x3c"
-			  "\x2e\x8b\xf7\x6e\x25\xbd\xe3\x3d"
-			  "\x66\x27\xd6\xaf\xd6\x64\x3e\xe3"
-			  "\xe8\x58\x46\x97\x39\x51\x07\xde"
-			  "\xcb\x37\xbc\xa9\xc0\x5f\x75\xc3"
-			  "\x0e\x84\x23\x1d\x16\xd4\x1c\x59"
-			  "\x9c\x1a\x02\x55\xab\x3a\x97\x1d"
-			  "\xdf\xdd\xc7\x06\x51\xd7\x70\xae"
-			  "\x23\xc6\x8c\xf5\x1e\xa0\xe5\x82"
-			  "\xb8\xb2\xbf\x04\xa0\x32\x8e\x68"
-			  "\xeb\xaf\x6e\x2d\x94\x22\x2f\xce"
-			  "\x4c\xb5\x59\xe2\xa2\x2f\xa0\x98"
-			  "\x1a\x97\xc6\xd4\xb5\x00\x59\xf2"
-			  "\x84\x14\x72\xb1\x9a\x6e\xa3\x7f"
-			  "\xea\x20\xe7\xcb\x65\x77\x3a\xdf"
-			  "\xc8\x97\x67\x15\xc2\x2a\x27\xcc"
-			  "\x18\x55\xa1\x24\x0b\x24\x24\xaf"
-			  "\x5b\xec\x68\xb8\xc8\xf5\xba\x63"
-			  "\xff\xed\x89\xce\xd5\x3d\x88\xf3"
-			  "\x25\xef\x05\x7c\x3a\xef\xeb\xd8"
-			  "\x7a\x32\x0d\xd1\x1e\x58\x59\x99"
-			  "\x90\x25\xb5\x26\xb0\xe3\x2b\x6c"
-			  "\x4c\xa9\x8b\x84\x4f\x5e\x01\x50"
-			  "\x41\x30\x58\xc5\x62\x74\x52\x1d"
-			  "\x45\x24\x6a\x42\x64\x4f\x97\x1c"
-			  "\xa8\x66\xb5\x6d\x79\xd4\x0d\x48"
-			  "\xc5\x5f\xf3\x90\x32\xdd\xdd\xe1"
-			  "\xe4\xa9\x9f\xfc\xc3\x52\x5a\x46"
-			  "\xe4\x81\x84\x95\x36\x59\x7a\x6b"
-			  "\xaa\xb3\x60\xad\xce\x9f\x9f\x28"
-			  "\xe0\x01\x75\x22\xc4\x4e\xa9\x62"
-			  "\x5c\x62\x0d\x00\xcb\x13\xe8\x43"
-			  "\x72\xd4\x2d\x53\x46\xb5\xd1\x16"
-			  "\x22\x18\xdf\x34\x33\xf5\xd6\x1c"
-			  "\xb8\x79\x78\x97\x94\xff\x72\x13"
-			  "\x4c\x27\xfc\xcb\xbf\x01\x53\xa6"
-			  "\xb4\x50\x6e\xde\xdf\xb5\x43\xa4"
-			  "\x59\xdf\x52\xf9\x7c\xe0\x11\x6f"
-			  "\x2d\x14\x8e\x24\x61\x2c\xe1\x17"
-			  "\xcc\xce\x51\x0c\x19\x8a\x82\x30"
-			  "\x94\xd5\x3d\x6a\x53\x06\x5e\xbd"
-			  "\xb7\xeb\xfa\xfd\x27\x51\xde\x85"
-			  "\x1e\x86\x53\x11\x53\x94\x00\xee"
-			  "\x2b\x8c\x08\x2a\xbf\xdd\xae\x11"
-			  "\xcb\x1e\xa2\x07\x9a\x80\xcf\x62"
-			  "\x9b\x09\xdc\x95\x3c\x96\x8e\xb1"
-			  "\x09\xbd\xe4\xeb\xdb\xca\x70\x7a"
-			  "\x9e\xfa\x31\x18\x45\x3c\x21\x33"
-			  "\xb0\xb3\x2b\xea\xf3\x71\x2d\xe1"
-			  "\x03\xad\x1b\x48\xd4\x67\x27\xf0"
-			  "\x62\xe4\x3d\xfb\x9b\x08\x76\xe7"
-			  "\xdd\x2b\x01\x39\x04\x5a\x58\x7a"
-			  "\xf7\x11\x90\xec\xbd\x51\x5c\x32"
-			  "\x6b\xd7\x35\x39\x02\x6b\xf2\xa6"
-			  "\xd0\x0d\x07\xe1\x06\xc4\x5b\x7d"
-			  "\xe4\x6a\xd7\xee\x15\x1f\x83\xb4"
-			  "\xa3\xa7\x5e\xc3\x90\xb7\xef\xd3"
-			  "\xb7\x4f\xf8\x92\x4c\xb7\x3c\x29"
-			  "\xcd\x7e\x2b\x5d\x43\xea\x42\xe7"
-			  "\x74\x3f\x7d\x58\x88\x75\xde\x3e",
-		.ilen   = 512,
-		.result	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
-			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
-			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
-			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
-			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
-			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
-			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
-			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
-			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
-			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
-			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
-			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
-			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
-			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
-			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
-			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
-			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
-			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
-			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
-			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
-			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
-			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
-			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
-			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
-			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
-			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
-			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
-			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
-			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
-			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
-			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
-			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
-			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
-			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
-			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
-			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
-			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
-			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
-			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
-			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
-			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
-			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
-			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
-			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
-			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
-			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
-			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
-			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
-			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
-			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
-			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
-			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
-			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
-			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
-			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
-			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
-			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
-			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
-			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
-			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
-			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
-			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
-			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
-			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
-		.rlen   = 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	}
-};
-
-static const struct cipher_testvec aes_xts_enc_tv_template[] = {
+static const struct cipher_testvec aes_xts_tv_template[] = {
 	/* http://grouper.ieee.org/groups/1619/email/pdf00086.pdf */
 	{ /* XTS-AES 1 */
 		.key    = "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -20743,16 +13983,15 @@
 		.fips_skip = 1,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input  = "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen   = 32,
-		.result = "\x91\x7c\xf6\x9e\xbd\x68\xb2\xec"
+		.ctext	= "\x91\x7c\xf6\x9e\xbd\x68\xb2\xec"
 			  "\x9b\x9f\xe9\xa3\xea\xdd\xa6\x92"
 			  "\xcd\x43\xd2\xf5\x95\x98\xed\x85"
 			  "\x8c\x02\xc2\x65\x2f\xbf\x92\x2e",
-		.rlen   = 32,
+		.len	= 32,
 	}, { /* XTS-AES 2 */
 		.key    = "\x11\x11\x11\x11\x11\x11\x11\x11"
 			  "\x11\x11\x11\x11\x11\x11\x11\x11"
@@ -20761,16 +14000,15 @@
 		.klen   = 32,
 		.iv     = "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input  = "\x44\x44\x44\x44\x44\x44\x44\x44"
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen   = 32,
-		.result = "\xc4\x54\x18\x5e\x6a\x16\x93\x6e"
+		.ctext	= "\xc4\x54\x18\x5e\x6a\x16\x93\x6e"
 			  "\x39\x33\x40\x38\xac\xef\x83\x8b"
 			  "\xfb\x18\x6f\xff\x74\x80\xad\xc4"
 			  "\x28\x93\x82\xec\xd6\xd3\x94\xf0",
-		.rlen   = 32,
+		.len	= 32,
 	}, { /* XTS-AES 3 */
 		.key    = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
 			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
@@ -20779,16 +14017,15 @@
 		.klen   = 32,
 		.iv     = "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input  = "\x44\x44\x44\x44\x44\x44\x44\x44"
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen   = 32,
-		.result = "\xaf\x85\x33\x6b\x59\x7a\xfc\x1a"
+		.ctext	= "\xaf\x85\x33\x6b\x59\x7a\xfc\x1a"
 			  "\x90\x0b\x2e\xb2\x1e\xc9\x49\xd2"
 			  "\x92\xdf\x4c\x04\x7e\x0b\x21\x53"
 			  "\x21\x86\xa5\x97\x1a\x22\x7a\x89",
-		.rlen   = 32,
+		.len	= 32,
 	}, { /* XTS-AES 4 */
 		.key    = "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -20797,7 +14034,7 @@
 		.klen   = 32,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input  = "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -20861,8 +14098,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen   = 512,
-		.result = "\x27\xa7\x47\x9b\xef\xa1\xd4\x76"
+		.ctext	= "\x27\xa7\x47\x9b\xef\xa1\xd4\x76"
 			  "\x48\x9f\x30\x8c\xd4\xcf\xa6\xe2"
 			  "\xa9\x6e\x4b\xbe\x32\x08\xff\x25"
 			  "\x28\x7d\xd3\x81\x96\x16\xe8\x9c"
@@ -20926,7 +14162,7 @@
 			  "\xf2\x62\x73\x57\x79\xa4\x18\xf2"
 			  "\x0a\x28\x2d\xf9\x20\x14\x7b\xea"
 			  "\xbe\x42\x1e\xe5\x31\x9d\x05\x68",
-		.rlen   = 512,
+		.len	= 512,
 	}, { /* XTS-AES 10, XTS-AES-256, data unit 512 bytes */
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -20939,7 +14175,7 @@
 		.klen	= 64,
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -21003,8 +14239,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\x1c\x3b\x3a\x10\x2f\x77\x03\x86"
+		.ctext	= "\x1c\x3b\x3a\x10\x2f\x77\x03\x86"
 			  "\xe4\x83\x6c\x99\xe3\x70\xcf\x9b"
 			  "\xea\x00\x80\x3f\x5e\x48\x23\x57"
 			  "\xa4\xae\x12\xd4\x14\xa3\xe6\x3b"
@@ -21068,365 +14303,21 @@
 			  "\x2f\xa7\x55\xc8\x24\xbb\x5e\x54"
 			  "\xc4\xf3\x6f\xfd\xa9\xfc\xea\x70"
 			  "\xb9\xc6\xe6\x93\xe1\x48\xc1\x51",
-		.rlen	= 512,
+		.len	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
 	}
 };
 
-static const struct cipher_testvec aes_xts_dec_tv_template[] = {
-	/* http://grouper.ieee.org/groups/1619/email/pdf00086.pdf */
-	{ /* XTS-AES 1 */
-		.key    = "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen   = 32,
-		.fips_skip = 1,
-		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input = "\x91\x7c\xf6\x9e\xbd\x68\xb2\xec"
-			 "\x9b\x9f\xe9\xa3\xea\xdd\xa6\x92"
-			 "\xcd\x43\xd2\xf5\x95\x98\xed\x85"
-			 "\x8c\x02\xc2\x65\x2f\xbf\x92\x2e",
-		.ilen   = 32,
-		.result  = "\x00\x00\x00\x00\x00\x00\x00\x00"
-			   "\x00\x00\x00\x00\x00\x00\x00\x00"
-			   "\x00\x00\x00\x00\x00\x00\x00\x00"
-			   "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen   = 32,
-	}, { /* XTS-AES 2 */
-		.key    = "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen   = 32,
-		.iv     = "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input  = "\xc4\x54\x18\x5e\x6a\x16\x93\x6e"
-			  "\x39\x33\x40\x38\xac\xef\x83\x8b"
-			  "\xfb\x18\x6f\xff\x74\x80\xad\xc4"
-			  "\x28\x93\x82\xec\xd6\xd3\x94\xf0",
-		.ilen   = 32,
-		.result = "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen   = 32,
-	}, { /* XTS-AES 3 */
-		.key    = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
-			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen   = 32,
-		.iv     = "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input = "\xaf\x85\x33\x6b\x59\x7a\xfc\x1a"
-			  "\x90\x0b\x2e\xb2\x1e\xc9\x49\xd2"
-			  "\x92\xdf\x4c\x04\x7e\x0b\x21\x53"
-			  "\x21\x86\xa5\x97\x1a\x22\x7a\x89",
-		.ilen   = 32,
-		.result  = "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen   = 32,
-	}, { /* XTS-AES 4 */
-		.key    = "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95",
-		.klen   = 32,
-		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input  = "\x27\xa7\x47\x9b\xef\xa1\xd4\x76"
-			  "\x48\x9f\x30\x8c\xd4\xcf\xa6\xe2"
-			  "\xa9\x6e\x4b\xbe\x32\x08\xff\x25"
-			  "\x28\x7d\xd3\x81\x96\x16\xe8\x9c"
-			  "\xc7\x8c\xf7\xf5\xe5\x43\x44\x5f"
-			  "\x83\x33\xd8\xfa\x7f\x56\x00\x00"
-			  "\x05\x27\x9f\xa5\xd8\xb5\xe4\xad"
-			  "\x40\xe7\x36\xdd\xb4\xd3\x54\x12"
-			  "\x32\x80\x63\xfd\x2a\xab\x53\xe5"
-			  "\xea\x1e\x0a\x9f\x33\x25\x00\xa5"
-			  "\xdf\x94\x87\xd0\x7a\x5c\x92\xcc"
-			  "\x51\x2c\x88\x66\xc7\xe8\x60\xce"
-			  "\x93\xfd\xf1\x66\xa2\x49\x12\xb4"
-			  "\x22\x97\x61\x46\xae\x20\xce\x84"
-			  "\x6b\xb7\xdc\x9b\xa9\x4a\x76\x7a"
-			  "\xae\xf2\x0c\x0d\x61\xad\x02\x65"
-			  "\x5e\xa9\x2d\xc4\xc4\xe4\x1a\x89"
-			  "\x52\xc6\x51\xd3\x31\x74\xbe\x51"
-			  "\xa1\x0c\x42\x11\x10\xe6\xd8\x15"
-			  "\x88\xed\xe8\x21\x03\xa2\x52\xd8"
-			  "\xa7\x50\xe8\x76\x8d\xef\xff\xed"
-			  "\x91\x22\x81\x0a\xae\xb9\x9f\x91"
-			  "\x72\xaf\x82\xb6\x04\xdc\x4b\x8e"
-			  "\x51\xbc\xb0\x82\x35\xa6\xf4\x34"
-			  "\x13\x32\xe4\xca\x60\x48\x2a\x4b"
-			  "\xa1\xa0\x3b\x3e\x65\x00\x8f\xc5"
-			  "\xda\x76\xb7\x0b\xf1\x69\x0d\xb4"
-			  "\xea\xe2\x9c\x5f\x1b\xad\xd0\x3c"
-			  "\x5c\xcf\x2a\x55\xd7\x05\xdd\xcd"
-			  "\x86\xd4\x49\x51\x1c\xeb\x7e\xc3"
-			  "\x0b\xf1\x2b\x1f\xa3\x5b\x91\x3f"
-			  "\x9f\x74\x7a\x8a\xfd\x1b\x13\x0e"
-			  "\x94\xbf\xf9\x4e\xff\xd0\x1a\x91"
-			  "\x73\x5c\xa1\x72\x6a\xcd\x0b\x19"
-			  "\x7c\x4e\x5b\x03\x39\x36\x97\xe1"
-			  "\x26\x82\x6f\xb6\xbb\xde\x8e\xcc"
-			  "\x1e\x08\x29\x85\x16\xe2\xc9\xed"
-			  "\x03\xff\x3c\x1b\x78\x60\xf6\xde"
-			  "\x76\xd4\xce\xcd\x94\xc8\x11\x98"
-			  "\x55\xef\x52\x97\xca\x67\xe9\xf3"
-			  "\xe7\xff\x72\xb1\xe9\x97\x85\xca"
-			  "\x0a\x7e\x77\x20\xc5\xb3\x6d\xc6"
-			  "\xd7\x2c\xac\x95\x74\xc8\xcb\xbc"
-			  "\x2f\x80\x1e\x23\xe5\x6f\xd3\x44"
-			  "\xb0\x7f\x22\x15\x4b\xeb\xa0\xf0"
-			  "\x8c\xe8\x89\x1e\x64\x3e\xd9\x95"
-			  "\xc9\x4d\x9a\x69\xc9\xf1\xb5\xf4"
-			  "\x99\x02\x7a\x78\x57\x2a\xee\xbd"
-			  "\x74\xd2\x0c\xc3\x98\x81\xc2\x13"
-			  "\xee\x77\x0b\x10\x10\xe4\xbe\xa7"
-			  "\x18\x84\x69\x77\xae\x11\x9f\x7a"
-			  "\x02\x3a\xb5\x8c\xca\x0a\xd7\x52"
-			  "\xaf\xe6\x56\xbb\x3c\x17\x25\x6a"
-			  "\x9f\x6e\x9b\xf1\x9f\xdd\x5a\x38"
-			  "\xfc\x82\xbb\xe8\x72\xc5\x53\x9e"
-			  "\xdb\x60\x9e\xf4\xf7\x9c\x20\x3e"
-			  "\xbb\x14\x0f\x2e\x58\x3c\xb2\xad"
-			  "\x15\xb4\xaa\x5b\x65\x50\x16\xa8"
-			  "\x44\x92\x77\xdb\xd4\x77\xef\x2c"
-			  "\x8d\x6c\x01\x7d\xb7\x38\xb1\x8d"
-			  "\xeb\x4a\x42\x7d\x19\x23\xce\x3f"
-			  "\xf2\x62\x73\x57\x79\xa4\x18\xf2"
-			  "\x0a\x28\x2d\xf9\x20\x14\x7b\xea"
-			  "\xbe\x42\x1e\xe5\x31\x9d\x05\x68",
-		.ilen   = 512,
-		.result = "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen   = 512,
-	}, { /* XTS-AES 10, XTS-AES-256, data unit 512 bytes */
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x1c\x3b\x3a\x10\x2f\x77\x03\x86"
-			  "\xe4\x83\x6c\x99\xe3\x70\xcf\x9b"
-			  "\xea\x00\x80\x3f\x5e\x48\x23\x57"
-			  "\xa4\xae\x12\xd4\x14\xa3\xe6\x3b"
-			  "\x5d\x31\xe2\x76\xf8\xfe\x4a\x8d"
-			  "\x66\xb3\x17\xf9\xac\x68\x3f\x44"
-			  "\x68\x0a\x86\xac\x35\xad\xfc\x33"
-			  "\x45\xbe\xfe\xcb\x4b\xb1\x88\xfd"
-			  "\x57\x76\x92\x6c\x49\xa3\x09\x5e"
-			  "\xb1\x08\xfd\x10\x98\xba\xec\x70"
-			  "\xaa\xa6\x69\x99\xa7\x2a\x82\xf2"
-			  "\x7d\x84\x8b\x21\xd4\xa7\x41\xb0"
-			  "\xc5\xcd\x4d\x5f\xff\x9d\xac\x89"
-			  "\xae\xba\x12\x29\x61\xd0\x3a\x75"
-			  "\x71\x23\xe9\x87\x0f\x8a\xcf\x10"
-			  "\x00\x02\x08\x87\x89\x14\x29\xca"
-			  "\x2a\x3e\x7a\x7d\x7d\xf7\xb1\x03"
-			  "\x55\x16\x5c\x8b\x9a\x6d\x0a\x7d"
-			  "\xe8\xb0\x62\xc4\x50\x0d\xc4\xcd"
-			  "\x12\x0c\x0f\x74\x18\xda\xe3\xd0"
-			  "\xb5\x78\x1c\x34\x80\x3f\xa7\x54"
-			  "\x21\xc7\x90\xdf\xe1\xde\x18\x34"
-			  "\xf2\x80\xd7\x66\x7b\x32\x7f\x6c"
-			  "\x8c\xd7\x55\x7e\x12\xac\x3a\x0f"
-			  "\x93\xec\x05\xc5\x2e\x04\x93\xef"
-			  "\x31\xa1\x2d\x3d\x92\x60\xf7\x9a"
-			  "\x28\x9d\x6a\x37\x9b\xc7\x0c\x50"
-			  "\x84\x14\x73\xd1\xa8\xcc\x81\xec"
-			  "\x58\x3e\x96\x45\xe0\x7b\x8d\x96"
-			  "\x70\x65\x5b\xa5\xbb\xcf\xec\xc6"
-			  "\xdc\x39\x66\x38\x0a\xd8\xfe\xcb"
-			  "\x17\xb6\xba\x02\x46\x9a\x02\x0a"
-			  "\x84\xe1\x8e\x8f\x84\x25\x20\x70"
-			  "\xc1\x3e\x9f\x1f\x28\x9b\xe5\x4f"
-			  "\xbc\x48\x14\x57\x77\x8f\x61\x60"
-			  "\x15\xe1\x32\x7a\x02\xb1\x40\xf1"
-			  "\x50\x5e\xb3\x09\x32\x6d\x68\x37"
-			  "\x8f\x83\x74\x59\x5c\x84\x9d\x84"
-			  "\xf4\xc3\x33\xec\x44\x23\x88\x51"
-			  "\x43\xcb\x47\xbd\x71\xc5\xed\xae"
-			  "\x9b\xe6\x9a\x2f\xfe\xce\xb1\xbe"
-			  "\xc9\xde\x24\x4f\xbe\x15\x99\x2b"
-			  "\x11\xb7\x7c\x04\x0f\x12\xbd\x8f"
-			  "\x6a\x97\x5a\x44\xa0\xf9\x0c\x29"
-			  "\xa9\xab\xc3\xd4\xd8\x93\x92\x72"
-			  "\x84\xc5\x87\x54\xcc\xe2\x94\x52"
-			  "\x9f\x86\x14\xdc\xd2\xab\xa9\x91"
-			  "\x92\x5f\xed\xc4\xae\x74\xff\xac"
-			  "\x6e\x33\x3b\x93\xeb\x4a\xff\x04"
-			  "\x79\xda\x9a\x41\x0e\x44\x50\xe0"
-			  "\xdd\x7a\xe4\xc6\xe2\x91\x09\x00"
-			  "\x57\x5d\xa4\x01\xfc\x07\x05\x9f"
-			  "\x64\x5e\x8b\x7e\x9b\xfd\xef\x33"
-			  "\x94\x30\x54\xff\x84\x01\x14\x93"
-			  "\xc2\x7b\x34\x29\xea\xed\xb4\xed"
-			  "\x53\x76\x44\x1a\x77\xed\x43\x85"
-			  "\x1a\xd7\x7f\x16\xf5\x41\xdf\xd2"
-			  "\x69\xd5\x0d\x6a\x5f\x14\xfb\x0a"
-			  "\xab\x1c\xbb\x4c\x15\x50\xbe\x97"
-			  "\xf7\xab\x40\x66\x19\x3c\x4c\xaa"
-			  "\x77\x3d\xad\x38\x01\x4b\xd2\x09"
-			  "\x2f\xa7\x55\xc8\x24\xbb\x5e\x54"
-			  "\xc4\xf3\x6f\xfd\xa9\xfc\xea\x70"
-			  "\xb9\xc6\xe6\x93\xe1\x48\xc1\x51",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	}
-};
-
-
-static const struct cipher_testvec aes_ctr_enc_tv_template[] = {
+static const struct cipher_testvec aes_ctr_tv_template[] = {
 	{ /* From NIST Special Publication 800-38A, Appendix F.5 */
 		.key	= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
 			  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
 		.klen	= 16,
 		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.input	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+		.ptext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
 			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
 			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
 			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
@@ -21434,8 +14325,7 @@
 			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
 			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
 			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.ilen	= 64,
-		.result	= "\x87\x4d\x61\x91\xb6\x20\xe3\x26"
+		.ctext	= "\x87\x4d\x61\x91\xb6\x20\xe3\x26"
 			  "\x1b\xef\x68\x64\x99\x0d\xb6\xce"
 			  "\x98\x06\xf6\x6b\x79\x70\xfd\xff"
 			  "\x86\x17\x18\x7b\xb9\xff\xfd\xff"
@@ -21443,7 +14333,7 @@
 			  "\x5b\x4f\x09\x02\x0d\xb0\x3e\xab"
 			  "\x1e\x03\x1d\xda\x2f\xbe\x03\xd1"
 			  "\x79\x21\x70\xa0\xf3\x00\x9c\xee",
-		.rlen	= 64,
+		.len	= 64,
 	}, {
 		.key	= "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
 			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
@@ -21451,7 +14341,7 @@
 		.klen	= 24,
 		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.input	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+		.ptext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
 			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
 			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
 			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
@@ -21459,8 +14349,7 @@
 			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
 			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
 			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.ilen	= 64,
-		.result	= "\x1a\xbc\x93\x24\x17\x52\x1c\xa2"
+		.ctext	= "\x1a\xbc\x93\x24\x17\x52\x1c\xa2"
 			  "\x4f\x2b\x04\x59\xfe\x7e\x6e\x0b"
 			  "\x09\x03\x39\xec\x0a\xa6\xfa\xef"
 			  "\xd5\xcc\xc2\xc6\xf4\xce\x8e\x94"
@@ -21468,7 +14357,7 @@
 			  "\xd1\xbd\x1d\x66\x56\x20\xab\xf7"
 			  "\x4f\x78\xa7\xf6\xd2\x98\x09\x58"
 			  "\x5a\x97\xda\xec\x58\xc6\xb0\x50",
-		.rlen	= 64,
+		.len	= 64,
 	}, {
 		.key	= "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
 			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
@@ -21477,7 +14366,7 @@
 		.klen	= 32,
 		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.input	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+		.ptext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
 			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
 			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
 			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
@@ -21485,8 +14374,7 @@
 			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
 			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
 			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.ilen	= 64,
-		.result	= "\x60\x1e\xc3\x13\x77\x57\x89\xa5"
+		.ctext	= "\x60\x1e\xc3\x13\x77\x57\x89\xa5"
 			  "\xb7\xa7\xf5\x04\xbb\xf3\xd2\x28"
 			  "\xf4\x43\xe3\xca\x4d\x62\xb5\x9a"
 			  "\xca\x84\xe9\x90\xca\xca\xf5\xc5"
@@ -21494,7 +14382,7 @@
 			  "\xe8\x70\x17\xba\x2d\x84\x98\x8d"
 			  "\xdf\xc9\xc5\x8d\xb6\x7a\xad\xa6"
 			  "\x13\xc2\xdd\x08\x45\x79\x41\xa6",
-		.rlen	= 64,
+		.len	= 64,
 	}, { /* Generated with Crypto++ */
 		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55"
 			  "\x0F\x32\x55\x78\x9B\xBE\x78\x9B"
@@ -21503,7 +14391,7 @@
 		.klen	= 32,
 		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
 			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
+		.ptext	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
 			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
 			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
 			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
@@ -21565,8 +14453,7 @@
 			  "\xAE\x17\xA3\x0C\x75\x01\x6A\xD3"
 			  "\x3C\xC8\x31\x9A\x03\x8F\xF8\x61"
 			  "\xED\x56\xBF\x28\xB4\x1D\x86\x12",
-		.ilen	= 496,
-		.result	= "\x04\xF3\xD3\x88\x17\xEF\xDC\xEF"
+		.ctext	= "\x04\xF3\xD3\x88\x17\xEF\xDC\xEF"
 			  "\x8B\x04\xF8\x3A\x66\x8D\x1A\x53"
 			  "\x57\x1F\x4B\x23\xE4\xA0\xAF\xF9"
 			  "\x69\x95\x35\x98\x8D\x4D\x8C\xC1"
@@ -21628,7 +14515,7 @@
 			  "\x10\x09\x9B\x46\x9B\xF2\x2C\x2B"
 			  "\xFA\x3A\x05\x4C\xFA\xD1\xFF\xFE"
 			  "\xF1\x4C\xE5\xB2\x91\x64\x0C\x51",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
@@ -21640,7 +14527,7 @@
 		.klen	= 32,
 		.iv	= "\xE7\x82\x1D\xB8\x53\x11\xAC\x47"
 			  "\xE2\x7D\x18\xD6\x71\x0C\xA7\x42",
-		.input	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
+		.ptext	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
 			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
 			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
 			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
@@ -21703,8 +14590,7 @@
 			  "\x3C\xC8\x31\x9A\x03\x8F\xF8\x61"
 			  "\xED\x56\xBF\x28\xB4\x1D\x86\x12"
 			  "\x7B\xE4\x4D",
-		.ilen	= 499,
-		.result	= "\xDA\x4E\x3F\xBC\xE8\xB6\x3A\xA2"
+		.ctext	= "\xDA\x4E\x3F\xBC\xE8\xB6\x3A\xA2"
 			  "\xD5\x4D\x84\x4A\xA9\x0C\xE1\xA5"
 			  "\xB8\x73\xBC\xF9\xBB\x59\x2F\x44"
 			  "\x8B\xAB\x82\x6C\xB4\x32\x9A\xDE"
@@ -21767,396 +14653,39 @@
 			  "\xD0\xE9\x54\x99\x5D\xBA\x3B\x11"
 			  "\xD8\xFE\xC9\x5B\x5C\x25\xE5\x76"
 			  "\xFB\xF2\x3F",
-		.rlen	= 499,
+		.len	= 499,
 		.also_non_np = 1,
 		.np	= 2,
 		.tap	= { 499 - 16, 16 },
 	},
 };
 
-static const struct cipher_testvec aes_ctr_dec_tv_template[] = {
-	{ /* From NIST Special Publication 800-38A, Appendix F.5 */
-		.key	= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
-			  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
-		.klen	= 16,
-		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.input	= "\x87\x4d\x61\x91\xb6\x20\xe3\x26"
-			  "\x1b\xef\x68\x64\x99\x0d\xb6\xce"
-			  "\x98\x06\xf6\x6b\x79\x70\xfd\xff"
-			  "\x86\x17\x18\x7b\xb9\xff\xfd\xff"
-			  "\x5a\xe4\xdf\x3e\xdb\xd5\xd3\x5e"
-			  "\x5b\x4f\x09\x02\x0d\xb0\x3e\xab"
-			  "\x1e\x03\x1d\xda\x2f\xbe\x03\xd1"
-			  "\x79\x21\x70\xa0\xf3\x00\x9c\xee",
-		.ilen	= 64,
-		.result	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
-			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
-			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
-			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
-			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
-			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
-			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
-			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.rlen	= 64,
-	}, {
-		.key	= "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
-			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
-			  "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
-		.klen	= 24,
-		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.input	= "\x1a\xbc\x93\x24\x17\x52\x1c\xa2"
-			  "\x4f\x2b\x04\x59\xfe\x7e\x6e\x0b"
-			  "\x09\x03\x39\xec\x0a\xa6\xfa\xef"
-			  "\xd5\xcc\xc2\xc6\xf4\xce\x8e\x94"
-			  "\x1e\x36\xb2\x6b\xd1\xeb\xc6\x70"
-			  "\xd1\xbd\x1d\x66\x56\x20\xab\xf7"
-			  "\x4f\x78\xa7\xf6\xd2\x98\x09\x58"
-			  "\x5a\x97\xda\xec\x58\xc6\xb0\x50",
-		.ilen	= 64,
-		.result	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
-			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
-			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
-			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
-			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
-			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
-			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
-			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.rlen	= 64,
-	}, {
-		.key	= "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
-			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
-			  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
-			  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
-		.klen	= 32,
-		.iv	= "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.input	= "\x60\x1e\xc3\x13\x77\x57\x89\xa5"
-			  "\xb7\xa7\xf5\x04\xbb\xf3\xd2\x28"
-			  "\xf4\x43\xe3\xca\x4d\x62\xb5\x9a"
-			  "\xca\x84\xe9\x90\xca\xca\xf5\xc5"
-			  "\x2b\x09\x30\xda\xa2\x3d\xe9\x4c"
-			  "\xe8\x70\x17\xba\x2d\x84\x98\x8d"
-			  "\xdf\xc9\xc5\x8d\xb6\x7a\xad\xa6"
-			  "\x13\xc2\xdd\x08\x45\x79\x41\xa6",
-		.ilen	= 64,
-		.result	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
-			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
-			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
-			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
-			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
-			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
-			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
-			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.rlen	= 64,
-	}, { /* Generated with Crypto++ */
-		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55"
-			  "\x0F\x32\x55\x78\x9B\xBE\x78\x9B"
-			  "\xBE\xE1\x04\x27\xE1\x04\x27\x4A"
-			  "\x6D\x90\x4A\x6D\x90\xB3\xD6\xF9",
-		.klen	= 32,
-		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x04\xF3\xD3\x88\x17\xEF\xDC\xEF"
-			  "\x8B\x04\xF8\x3A\x66\x8D\x1A\x53"
-			  "\x57\x1F\x4B\x23\xE4\xA0\xAF\xF9"
-			  "\x69\x95\x35\x98\x8D\x4D\x8C\xC1"
-			  "\xF0\xB2\x7F\x80\xBB\x54\x28\xA2"
-			  "\x7A\x1B\x9F\x77\xEC\x0E\x6E\xDE"
-			  "\xF0\xEC\xB8\xE4\x20\x62\xEE\xDB"
-			  "\x5D\xF5\xDD\xE3\x54\xFC\xDD\xEB"
-			  "\x6A\xEE\x65\xA1\x21\xD6\xD7\x81"
-			  "\x47\x61\x12\x4D\xC2\x8C\xFA\x78"
-			  "\x1F\x28\x02\x01\xC3\xFC\x1F\xEC"
-			  "\x0F\x10\x4F\xB3\x12\x45\xC6\x3B"
-			  "\x7E\x08\xF9\x5A\xD0\x5D\x73\x2D"
-			  "\x58\xA4\xE5\xCB\x1C\xB4\xCE\x74"
-			  "\x32\x41\x1F\x31\x9C\x08\xA2\x5D"
-			  "\x67\xEB\x72\x1D\xF8\xE7\x70\x54"
-			  "\x34\x4B\x31\x69\x84\x66\x96\x44"
-			  "\x56\xCC\x1E\xD9\xE6\x13\x6A\xB9"
-			  "\x2D\x0A\x05\x45\x2D\x90\xCC\xDF"
-			  "\x16\x5C\x5F\x79\x34\x52\x54\xFE"
-			  "\xFE\xCD\xAD\x04\x2E\xAD\x86\x06"
-			  "\x1F\x37\xE8\x28\xBC\xD3\x8F\x5B"
-			  "\x92\x66\x87\x3B\x8A\x0A\x1A\xCC"
-			  "\x6E\xAB\x9F\x0B\xFA\x5C\xE6\xFD"
-			  "\x3C\x98\x08\x12\xEC\xAA\x9E\x11"
-			  "\xCA\xB2\x1F\xCE\x5E\x5B\xB2\x72"
-			  "\x9C\xCC\x5D\xC5\xE0\x32\xC0\x56"
-			  "\xD5\x45\x16\xD2\xAF\x13\x66\xF7"
-			  "\x8C\x67\xAC\x79\xB2\xAF\x56\x27"
-			  "\x3F\xCC\xFE\xCB\x1E\xC0\x75\xF1"
-			  "\xA7\xC9\xC3\x1D\x8E\xDD\xF9\xD4"
-			  "\x42\xC8\x21\x08\x16\xF7\x01\xD7"
-			  "\xAC\x8E\x3F\x1D\x56\xC1\x06\xE4"
-			  "\x9C\x62\xD6\xA5\x6A\x50\x44\xB3"
-			  "\x35\x1C\x82\xB9\x10\xF9\x42\xA1"
-			  "\xFC\x74\x9B\x44\x4F\x25\x02\xE3"
-			  "\x08\xF5\xD4\x32\x39\x08\x11\xE8"
-			  "\xD2\x6B\x50\x53\xD4\x08\xD1\x6B"
-			  "\x3A\x4A\x68\x7B\x7C\xCD\x46\x5E"
-			  "\x0D\x07\x19\xDB\x67\xD7\x98\x91"
-			  "\xD7\x17\x10\x9B\x7B\x8A\x9B\x33"
-			  "\xAE\xF3\x00\xA6\xD4\x15\xD9\xEA"
-			  "\x85\x99\x22\xE8\x91\x38\x70\x83"
-			  "\x93\x01\x24\x6C\xFA\x9A\xB9\x07"
-			  "\xEA\x8D\x3B\xD9\x2A\x43\x59\x16"
-			  "\x2F\x69\xEE\x84\x36\x44\x76\x98"
-			  "\xF3\x04\x2A\x7C\x74\x3D\x29\x2B"
-			  "\x0D\xAD\x8F\x44\x82\x9E\x57\x8D"
-			  "\xAC\xED\x18\x1F\x50\xA4\xF5\x98"
-			  "\x1F\xBD\x92\x91\x1B\x2D\xA6\xD6"
-			  "\xD2\xE3\x02\xAA\x92\x3B\xC6\xB3"
-			  "\x1B\x39\x72\xD5\x26\xCA\x04\xE0"
-			  "\xFC\x58\x78\xBB\xB1\x3F\xA1\x9C"
-			  "\x42\x24\x3E\x2E\x22\xBB\x4B\xBA"
-			  "\xF4\x52\x0A\xE6\xAE\x47\xB4\x7D"
-			  "\x1D\xA8\xBE\x81\x1A\x75\xDA\xAC"
-			  "\xA6\x25\x1E\xEF\x3A\xC0\x6C\x63"
-			  "\xEF\xDC\xC9\x79\x10\x26\xE8\x61"
-			  "\x29\xFC\xA4\x05\xDF\x7D\x5C\x63"
-			  "\x10\x09\x9B\x46\x9B\xF2\x2C\x2B"
-			  "\xFA\x3A\x05\x4C\xFA\xD1\xFF\xFE"
-			  "\xF1\x4C\xE5\xB2\x91\x64\x0C\x51",
-		.ilen	= 496,
-		.result	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
-			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
-			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
-			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
-			  "\xAB\x14\x7D\x09\x72\xDB\x44\xD0"
-			  "\x39\xA2\x0B\x97\x00\x69\xF5\x5E"
-			  "\xC7\x30\xBC\x25\x8E\x1A\x83\xEC"
-			  "\x55\xE1\x4A\xB3\x1C\xA8\x11\x7A"
-			  "\x06\x6F\xD8\x41\xCD\x36\x9F\x08"
-			  "\x94\xFD\x66\xF2\x5B\xC4\x2D\xB9"
-			  "\x22\x8B\x17\x80\xE9\x52\xDE\x47"
-			  "\xB0\x19\xA5\x0E\x77\x03\x6C\xD5"
-			  "\x3E\xCA\x33\x9C\x05\x91\xFA\x63"
-			  "\xEF\x58\xC1\x2A\xB6\x1F\x88\x14"
-			  "\x7D\xE6\x4F\xDB\x44\xAD\x16\xA2"
-			  "\x0B\x74\x00\x69\xD2\x3B\xC7\x30"
-			  "\x99\x02\x8E\xF7\x60\xEC\x55\xBE"
-			  "\x27\xB3\x1C\x85\x11\x7A\xE3\x4C"
-			  "\xD8\x41\xAA\x13\x9F\x08\x71\xFD"
-			  "\x66\xCF\x38\xC4\x2D\x96\x22\x8B"
-			  "\xF4\x5D\xE9\x52\xBB\x24\xB0\x19"
-			  "\x82\x0E\x77\xE0\x49\xD5\x3E\xA7"
-			  "\x10\x9C\x05\x6E\xFA\x63\xCC\x35"
-			  "\xC1\x2A\x93\x1F\x88\xF1\x5A\xE6"
-			  "\x4F\xB8\x21\xAD\x16\x7F\x0B\x74"
-			  "\xDD\x46\xD2\x3B\xA4\x0D\x99\x02"
-			  "\x6B\xF7\x60\xC9\x32\xBE\x27\x90"
-			  "\x1C\x85\xEE\x57\xE3\x4C\xB5\x1E"
-			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
-			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
-			  "\xC6\x2F\xBB\x24\x8D\x19\x82\xEB"
-			  "\x54\xE0\x49\xB2\x1B\xA7\x10\x79"
-			  "\x05\x6E\xD7\x40\xCC\x35\x9E\x07"
-			  "\x93\xFC\x65\xF1\x5A\xC3\x2C\xB8"
-			  "\x21\x8A\x16\x7F\xE8\x51\xDD\x46"
-			  "\xAF\x18\xA4\x0D\x76\x02\x6B\xD4"
-			  "\x3D\xC9\x32\x9B\x04\x90\xF9\x62"
-			  "\xEE\x57\xC0\x29\xB5\x1E\x87\x13"
-			  "\x7C\xE5\x4E\xDA\x43\xAC\x15\xA1"
-			  "\x0A\x73\xFF\x68\xD1\x3A\xC6\x2F"
-			  "\x98\x01\x8D\xF6\x5F\xEB\x54\xBD"
-			  "\x26\xB2\x1B\x84\x10\x79\xE2\x4B"
-			  "\xD7\x40\xA9\x12\x9E\x07\x70\xFC"
-			  "\x65\xCE\x37\xC3\x2C\x95\x21\x8A"
-			  "\xF3\x5C\xE8\x51\xBA\x23\xAF\x18"
-			  "\x81\x0D\x76\xDF\x48\xD4\x3D\xA6"
-			  "\x0F\x9B\x04\x6D\xF9\x62\xCB\x34"
-			  "\xC0\x29\x92\x1E\x87\xF0\x59\xE5"
-			  "\x4E\xB7\x20\xAC\x15\x7E\x0A\x73"
-			  "\xDC\x45\xD1\x3A\xA3\x0C\x98\x01"
-			  "\x6A\xF6\x5F\xC8\x31\xBD\x26\x8F"
-			  "\x1B\x84\xED\x56\xE2\x4B\xB4\x1D"
-			  "\xA9\x12\x7B\x07\x70\xD9\x42\xCE"
-			  "\x37\xA0\x09\x95\xFE\x67\xF3\x5C"
-			  "\xC5\x2E\xBA\x23\x8C\x18\x81\xEA"
-			  "\x53\xDF\x48\xB1\x1A\xA6\x0F\x78"
-			  "\x04\x6D\xD6\x3F\xCB\x34\x9D\x06"
-			  "\x92\xFB\x64\xF0\x59\xC2\x2B\xB7"
-			  "\x20\x89\x15\x7E\xE7\x50\xDC\x45"
-			  "\xAE\x17\xA3\x0C\x75\x01\x6A\xD3"
-			  "\x3C\xC8\x31\x9A\x03\x8F\xF8\x61"
-			  "\xED\x56\xBF\x28\xB4\x1D\x86\x12",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	}, { /* Generated with Crypto++ */
-		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55"
-			  "\x0F\x32\x55\x78\x9B\xBE\x78\x9B"
-			  "\xBE\xE1\x04\x27\xE1\x04\x27\x4A"
-			  "\x6D\x90\x4A\x6D\x90\xB3\xD6\xF9",
-		.klen	= 32,
-		.iv	= "\xE7\x82\x1D\xB8\x53\x11\xAC\x47"
-			  "\xE2\x7D\x18\xD6\x71\x0C\xA7\x42",
-		.input	= "\xDA\x4E\x3F\xBC\xE8\xB6\x3A\xA2"
-			  "\xD5\x4D\x84\x4A\xA9\x0C\xE1\xA5"
-			  "\xB8\x73\xBC\xF9\xBB\x59\x2F\x44"
-			  "\x8B\xAB\x82\x6C\xB4\x32\x9A\xDE"
-			  "\x5A\x0B\xDB\x7A\x6B\xF2\x38\x9F"
-			  "\x06\xF7\xF7\xFF\xFF\xC0\x8A\x2E"
-			  "\x76\xEA\x06\x32\x23\xF3\x59\x2E"
-			  "\x75\xDE\x71\x86\x3C\x98\x23\x44"
-			  "\x5B\xF2\xFA\x6A\x00\xBB\xC1\xAD"
-			  "\x58\xBD\x3E\x6F\x2E\xB4\x19\x04"
-			  "\x70\x8B\x92\x55\x23\xE9\x6A\x3A"
-			  "\x78\x7A\x1B\x10\x85\x52\x9C\x12"
-			  "\xE4\x55\x81\x21\xCE\x53\xD0\x3B"
-			  "\x63\x77\x2C\x74\xD1\xF5\x60\xF3"
-			  "\xA1\xDE\x44\x3C\x8F\x4D\x2F\xDD"
-			  "\x8A\xFE\x3C\x42\x8E\xD3\xF2\x8E"
-			  "\xA8\x28\x69\x65\x31\xE1\x45\x83"
-			  "\xE4\x49\xC4\x9C\xA7\x28\xAA\x21"
-			  "\xCD\x5D\x0F\x15\xB7\x93\x07\x26"
-			  "\xB0\x65\x6D\x91\x90\x23\x7A\xC6"
-			  "\xDB\x68\xB0\xA1\x8E\xA4\x76\x4E"
-			  "\xC6\x91\x83\x20\x92\x4D\x63\x7A"
-			  "\x45\x18\x18\x74\x19\xAD\x71\x01"
-			  "\x6B\x23\xAD\x9D\x4E\xE4\x6E\x46"
-			  "\xC9\x73\x7A\xF9\x02\x95\xF4\x07"
-			  "\x0E\x7A\xA6\xC5\xAE\xFA\x15\x2C"
-			  "\x51\x71\xF1\xDC\x22\xB6\xAC\xD8"
-			  "\x19\x24\x44\xBC\x0C\xFB\x3C\x2D"
-			  "\xB1\x50\x47\x15\x0E\xDB\xB6\xD7"
-			  "\xE8\x61\xE5\x95\x52\x1E\x3E\x49"
-			  "\x70\xE9\x66\x04\x4C\xE1\xAF\xBD"
-			  "\xDD\x15\x3B\x20\x59\x24\xFF\xB0"
-			  "\x39\xAA\xE7\xBF\x23\xA3\x6E\xD5"
-			  "\x15\xF0\x61\x4F\xAE\x89\x10\x58"
-			  "\x5A\x33\x95\x52\x2A\xB5\x77\x9C"
-			  "\xA5\x43\x80\x40\x27\x2D\xAE\xD9"
-			  "\x3F\xE0\x80\x94\x78\x79\xCB\x7E"
-			  "\xAD\x12\x44\x4C\xEC\x27\xB0\xEE"
-			  "\x0B\x05\x2A\x82\x99\x58\xBB\x7A"
-			  "\x8D\x6D\x9D\x8E\xE2\x8E\xE7\x93"
-			  "\x2F\xB3\x09\x8D\x06\xD5\xEE\x70"
-			  "\x16\xAE\x35\xC5\x52\x0F\x46\x1F"
-			  "\x71\xF9\x5E\xF2\x67\xDC\x98\x2F"
-			  "\xA3\x23\xAA\xD5\xD0\x49\xF4\xA6"
-			  "\xF6\xB8\x32\xCD\xD6\x85\x73\x60"
-			  "\x59\x20\xE7\x55\x0E\x91\xE2\x0C"
-			  "\x3F\x1C\xEB\x3D\xDF\x52\x64\xF2"
-			  "\x7D\x8B\x5D\x63\x16\xB9\xB2\x5D"
-			  "\x5E\xAB\xB2\x97\xAB\x78\x44\xE7"
-			  "\xC6\x72\x20\xC5\x90\x9B\xDC\x5D"
-			  "\xB0\xEF\x44\xEF\x87\x31\x8D\xF4"
-			  "\xFB\x81\x5D\xF7\x96\x96\xD4\x50"
-			  "\x89\xA7\xF6\xB9\x67\x76\x40\x9E"
-			  "\x9D\x40\xD5\x2C\x30\xB8\x01\x8F"
-			  "\xE4\x7B\x71\x48\xA9\xA0\xA0\x1D"
-			  "\x87\x52\xA4\x91\xA9\xD7\xA9\x51"
-			  "\xD9\x59\xF7\xCC\x63\x22\xC1\x8D"
-			  "\x84\x7B\xD8\x22\x32\x5C\x6F\x1D"
-			  "\x6E\x9F\xFA\xDD\x49\x40\xDC\x37"
-			  "\x14\x8C\xE1\x80\x1B\xDD\x36\x2A"
-			  "\xD0\xE9\x54\x99\x5D\xBA\x3B\x11"
-			  "\xD8\xFE\xC9\x5B\x5C\x25\xE5\x76"
-			  "\xFB\xF2\x3F",
-		.ilen	= 499,
-		.result	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
-			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
-			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
-			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
-			  "\xAB\x14\x7D\x09\x72\xDB\x44\xD0"
-			  "\x39\xA2\x0B\x97\x00\x69\xF5\x5E"
-			  "\xC7\x30\xBC\x25\x8E\x1A\x83\xEC"
-			  "\x55\xE1\x4A\xB3\x1C\xA8\x11\x7A"
-			  "\x06\x6F\xD8\x41\xCD\x36\x9F\x08"
-			  "\x94\xFD\x66\xF2\x5B\xC4\x2D\xB9"
-			  "\x22\x8B\x17\x80\xE9\x52\xDE\x47"
-			  "\xB0\x19\xA5\x0E\x77\x03\x6C\xD5"
-			  "\x3E\xCA\x33\x9C\x05\x91\xFA\x63"
-			  "\xEF\x58\xC1\x2A\xB6\x1F\x88\x14"
-			  "\x7D\xE6\x4F\xDB\x44\xAD\x16\xA2"
-			  "\x0B\x74\x00\x69\xD2\x3B\xC7\x30"
-			  "\x99\x02\x8E\xF7\x60\xEC\x55\xBE"
-			  "\x27\xB3\x1C\x85\x11\x7A\xE3\x4C"
-			  "\xD8\x41\xAA\x13\x9F\x08\x71\xFD"
-			  "\x66\xCF\x38\xC4\x2D\x96\x22\x8B"
-			  "\xF4\x5D\xE9\x52\xBB\x24\xB0\x19"
-			  "\x82\x0E\x77\xE0\x49\xD5\x3E\xA7"
-			  "\x10\x9C\x05\x6E\xFA\x63\xCC\x35"
-			  "\xC1\x2A\x93\x1F\x88\xF1\x5A\xE6"
-			  "\x4F\xB8\x21\xAD\x16\x7F\x0B\x74"
-			  "\xDD\x46\xD2\x3B\xA4\x0D\x99\x02"
-			  "\x6B\xF7\x60\xC9\x32\xBE\x27\x90"
-			  "\x1C\x85\xEE\x57\xE3\x4C\xB5\x1E"
-			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
-			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
-			  "\xC6\x2F\xBB\x24\x8D\x19\x82\xEB"
-			  "\x54\xE0\x49\xB2\x1B\xA7\x10\x79"
-			  "\x05\x6E\xD7\x40\xCC\x35\x9E\x07"
-			  "\x93\xFC\x65\xF1\x5A\xC3\x2C\xB8"
-			  "\x21\x8A\x16\x7F\xE8\x51\xDD\x46"
-			  "\xAF\x18\xA4\x0D\x76\x02\x6B\xD4"
-			  "\x3D\xC9\x32\x9B\x04\x90\xF9\x62"
-			  "\xEE\x57\xC0\x29\xB5\x1E\x87\x13"
-			  "\x7C\xE5\x4E\xDA\x43\xAC\x15\xA1"
-			  "\x0A\x73\xFF\x68\xD1\x3A\xC6\x2F"
-			  "\x98\x01\x8D\xF6\x5F\xEB\x54\xBD"
-			  "\x26\xB2\x1B\x84\x10\x79\xE2\x4B"
-			  "\xD7\x40\xA9\x12\x9E\x07\x70\xFC"
-			  "\x65\xCE\x37\xC3\x2C\x95\x21\x8A"
-			  "\xF3\x5C\xE8\x51\xBA\x23\xAF\x18"
-			  "\x81\x0D\x76\xDF\x48\xD4\x3D\xA6"
-			  "\x0F\x9B\x04\x6D\xF9\x62\xCB\x34"
-			  "\xC0\x29\x92\x1E\x87\xF0\x59\xE5"
-			  "\x4E\xB7\x20\xAC\x15\x7E\x0A\x73"
-			  "\xDC\x45\xD1\x3A\xA3\x0C\x98\x01"
-			  "\x6A\xF6\x5F\xC8\x31\xBD\x26\x8F"
-			  "\x1B\x84\xED\x56\xE2\x4B\xB4\x1D"
-			  "\xA9\x12\x7B\x07\x70\xD9\x42\xCE"
-			  "\x37\xA0\x09\x95\xFE\x67\xF3\x5C"
-			  "\xC5\x2E\xBA\x23\x8C\x18\x81\xEA"
-			  "\x53\xDF\x48\xB1\x1A\xA6\x0F\x78"
-			  "\x04\x6D\xD6\x3F\xCB\x34\x9D\x06"
-			  "\x92\xFB\x64\xF0\x59\xC2\x2B\xB7"
-			  "\x20\x89\x15\x7E\xE7\x50\xDC\x45"
-			  "\xAE\x17\xA3\x0C\x75\x01\x6A\xD3"
-			  "\x3C\xC8\x31\x9A\x03\x8F\xF8\x61"
-			  "\xED\x56\xBF\x28\xB4\x1D\x86\x12"
-			  "\x7B\xE4\x4D",
-		.rlen	= 499,
-		.also_non_np = 1,
-		.np	= 2,
-		.tap	= { 499 - 16, 16 },
-	},
-};
-
-static const struct cipher_testvec aes_ctr_rfc3686_enc_tv_template[] = {
+static const struct cipher_testvec aes_ctr_rfc3686_tv_template[] = {
 	{ /* From RFC 3686 */
 		.key	= "\xae\x68\x52\xf8\x12\x10\x67\xcc"
 			  "\x4b\xf7\xa5\x76\x55\x77\xf3\x9e"
 			  "\x00\x00\x00\x30",
 		.klen	= 20,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "Single block msg",
-		.ilen	= 16,
-		.result = "\xe4\x09\x5d\x4f\xb7\xa7\xb3\x79"
+		.ptext	= "Single block msg",
+		.ctext	= "\xe4\x09\x5d\x4f\xb7\xa7\xb3\x79"
 			  "\x2d\x61\x75\xa3\x26\x13\x11\xb8",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x7e\x24\x06\x78\x17\xfa\xe0\xd7"
 			  "\x43\xd6\xce\x1f\x32\x53\x91\x63"
 			  "\x00\x6c\xb6\xdb",
 		.klen	= 20,
 		.iv	= "\xc0\x54\x3b\x59\xda\x48\xd9\x0b",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.ilen	= 32,
-		.result = "\x51\x04\xa1\x06\x16\x8a\x72\xd9"
+		.ctext	= "\x51\x04\xa1\x06\x16\x8a\x72\xd9"
 			  "\x79\x0d\x41\xee\x8e\xda\xd3\x88"
 			  "\xeb\x2e\x1e\xfc\x46\xda\x57\xc8"
 			  "\xfc\xe6\x30\xdf\x91\x41\xbe\x28",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x16\xaf\x5b\x14\x5f\xc9\xf5\x79"
 			  "\xc1\x75\xf9\x3e\x3b\xfb\x0e\xed"
@@ -22164,11 +14693,10 @@
 			  "\x00\x00\x00\x48",
 		.klen	= 28,
 		.iv	= "\x36\x73\x3c\x14\x7d\x6d\x93\xcb",
-		.input	= "Single block msg",
-		.ilen	= 16,
-		.result	= "\x4b\x55\x38\x4f\xe2\x59\xc9\xc8"
+		.ptext	= "Single block msg",
+		.ctext	= "\x4b\x55\x38\x4f\xe2\x59\xc9\xc8"
 			  "\x4e\x79\x35\xa0\x03\xcb\xe9\x28",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x7c\x5c\xb2\x40\x1b\x3d\xc3\x3c"
 			  "\x19\xe7\x34\x08\x19\xe0\xf6\x9c"
@@ -22176,16 +14704,15 @@
 			  "\x00\x96\xb0\x3b",
 		.klen	= 28,
 		.iv	= "\x02\x0c\x6e\xad\xc2\xcb\x50\x0d",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.ilen	= 32,
-		.result	= "\x45\x32\x43\xfc\x60\x9b\x23\x32"
+		.ctext	= "\x45\x32\x43\xfc\x60\x9b\x23\x32"
 			  "\x7e\xdf\xaa\xfa\x71\x31\xcd\x9f"
 			  "\x84\x90\x70\x1c\x5a\xd4\xa7\x9c"
 			  "\xfc\x1f\xe0\xff\x42\xf4\xfb\x00",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x77\x6b\xef\xf2\x85\x1d\xb0\x6f"
 			  "\x4c\x8a\x05\x42\xc8\x69\x6f\x6c"
@@ -22194,11 +14721,10 @@
 			  "\x00\x00\x00\x60",
 		.klen	= 36,
 		.iv	= "\xdb\x56\x72\xc9\x7a\xa8\xf0\xb2",
-		.input	= "Single block msg",
-		.ilen	= 16,
-		.result = "\x14\x5a\xd0\x1d\xbf\x82\x4e\xc7"
+		.ptext	= "Single block msg",
+		.ctext	= "\x14\x5a\xd0\x1d\xbf\x82\x4e\xc7"
 			  "\x56\x08\x63\xdc\x71\xe3\xe0\xc0",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xf6\xd6\x6d\x6b\xd5\x2d\x59\xbb"
 			  "\x07\x96\x36\x58\x79\xef\xf8\x86"
@@ -22207,16 +14733,15 @@
 			  "\x00\xfa\xac\x24",
 		.klen	= 36,
 		.iv	= "\xc1\x58\x5e\xf1\x5a\x43\xd8\x75",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.ilen	= 32,
-		.result = "\xf0\x5e\x23\x1b\x38\x94\x61\x2c"
+		.ctext	= "\xf0\x5e\x23\x1b\x38\x94\x61\x2c"
 			  "\x49\xee\x00\x0b\x80\x4e\xb2\xa9"
 			  "\xb8\x30\x6b\x50\x8f\x83\x9d\x6a"
 			  "\x55\x30\x83\x1d\x93\x44\xaf\x1c",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 	// generated using Crypto++
 		.key = "\x00\x01\x02\x03\x04\x05\x06\x07"
@@ -22226,7 +14751,7 @@
 			"\x00\x00\x00\x00",
 		.klen = 32 + 4,
 		.iv = "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input =
+		.ptext =
 			"\x00\x01\x02\x03\x04\x05\x06\x07"
 			"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			"\x10\x11\x12\x13\x14\x15\x16\x17"
@@ -22740,8 +15265,7 @@
 			"\x10\x2f\x4e\x6d\x8c\xab\xca\xe9"
 			"\x08\x27\x46\x65\x84\xa3\xc2\xe1"
 			"\x00\x21\x42\x63",
-		.ilen = 4100,
-		.result =
+		.ctext =
 			"\xf0\x5c\x74\xad\x4e\xbc\x99\xe2"
 			"\xae\xff\x91\x3a\x44\xcf\x38\x32"
 			"\x1e\xad\xa7\xcd\xa1\x39\x95\xaa"
@@ -23255,104 +15779,13 @@
 			"\x41\x01\x18\x5d\x5d\x07\x97\xa6"
 			"\x4b\xef\x31\x18\xea\xac\xb1\x84"
 			"\x21\xed\xda\x86",
-		.rlen = 4100,
+		.len	= 4100,
 		.np	= 2,
 		.tap	= { 4064, 36 },
 	},
 };
 
-static const struct cipher_testvec aes_ctr_rfc3686_dec_tv_template[] = {
-	{ /* From RFC 3686 */
-		.key	= "\xae\x68\x52\xf8\x12\x10\x67\xcc"
-			  "\x4b\xf7\xa5\x76\x55\x77\xf3\x9e"
-			  "\x00\x00\x00\x30",
-		.klen	= 20,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xe4\x09\x5d\x4f\xb7\xa7\xb3\x79"
-			  "\x2d\x61\x75\xa3\x26\x13\x11\xb8",
-		.ilen	= 16,
-		.result	= "Single block msg",
-		.rlen	= 16,
-	}, {
-		.key	= "\x7e\x24\x06\x78\x17\xfa\xe0\xd7"
-			  "\x43\xd6\xce\x1f\x32\x53\x91\x63"
-			  "\x00\x6c\xb6\xdb",
-		.klen	= 20,
-		.iv	= "\xc0\x54\x3b\x59\xda\x48\xd9\x0b",
-		.input	= "\x51\x04\xa1\x06\x16\x8a\x72\xd9"
-			  "\x79\x0d\x41\xee\x8e\xda\xd3\x88"
-			  "\xeb\x2e\x1e\xfc\x46\xda\x57\xc8"
-			  "\xfc\xe6\x30\xdf\x91\x41\xbe\x28",
-		.ilen	= 32,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.rlen	= 32,
-	}, {
-		.key	= "\x16\xaf\x5b\x14\x5f\xc9\xf5\x79"
-			  "\xc1\x75\xf9\x3e\x3b\xfb\x0e\xed"
-			  "\x86\x3d\x06\xcc\xfd\xb7\x85\x15"
-			  "\x00\x00\x00\x48",
-		.klen	= 28,
-		.iv	= "\x36\x73\x3c\x14\x7d\x6d\x93\xcb",
-		.input	= "\x4b\x55\x38\x4f\xe2\x59\xc9\xc8"
-			  "\x4e\x79\x35\xa0\x03\xcb\xe9\x28",
-		.ilen	= 16,
-		.result	= "Single block msg",
-		.rlen	= 16,
-	}, {
-		.key	= "\x7c\x5c\xb2\x40\x1b\x3d\xc3\x3c"
-			  "\x19\xe7\x34\x08\x19\xe0\xf6\x9c"
-			  "\x67\x8c\x3d\xb8\xe6\xf6\xa9\x1a"
-			  "\x00\x96\xb0\x3b",
-		.klen	= 28,
-		.iv	= "\x02\x0c\x6e\xad\xc2\xcb\x50\x0d",
-		.input	= "\x45\x32\x43\xfc\x60\x9b\x23\x32"
-			  "\x7e\xdf\xaa\xfa\x71\x31\xcd\x9f"
-			  "\x84\x90\x70\x1c\x5a\xd4\xa7\x9c"
-			  "\xfc\x1f\xe0\xff\x42\xf4\xfb\x00",
-		.ilen	= 32,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.rlen	= 32,
-	}, {
-		.key	= "\x77\x6b\xef\xf2\x85\x1d\xb0\x6f"
-			  "\x4c\x8a\x05\x42\xc8\x69\x6f\x6c"
-			  "\x6a\x81\xaf\x1e\xec\x96\xb4\xd3"
-			  "\x7f\xc1\xd6\x89\xe6\xc1\xc1\x04"
-			  "\x00\x00\x00\x60",
-		.klen	= 36,
-		.iv	= "\xdb\x56\x72\xc9\x7a\xa8\xf0\xb2",
-		.input	= "\x14\x5a\xd0\x1d\xbf\x82\x4e\xc7"
-			  "\x56\x08\x63\xdc\x71\xe3\xe0\xc0",
-		.ilen	= 16,
-		.result	= "Single block msg",
-		.rlen	= 16,
-	}, {
-		.key	= "\xf6\xd6\x6d\x6b\xd5\x2d\x59\xbb"
-			  "\x07\x96\x36\x58\x79\xef\xf8\x86"
-			  "\xc6\x6d\xd5\x1a\x5b\x6a\x99\x74"
-			  "\x4b\x50\x59\x0c\x87\xa2\x38\x84"
-			  "\x00\xfa\xac\x24",
-		.klen	= 36,
-		.iv	= "\xc1\x58\x5e\xf1\x5a\x43\xd8\x75",
-		.input	= "\xf0\x5e\x23\x1b\x38\x94\x61\x2c"
-			  "\x49\xee\x00\x0b\x80\x4e\xb2\xa9"
-			  "\xb8\x30\x6b\x50\x8f\x83\x9d\x6a"
-			  "\x55\x30\x83\x1d\x93\x44\xaf\x1c",
-		.ilen	= 32,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.rlen	= 32,
-	},
-};
-
-static const struct cipher_testvec aes_ofb_enc_tv_template[] = {
+static const struct cipher_testvec aes_ofb_tv_template[] = {
 	 /* From NIST Special Publication 800-38A, Appendix F.5 */
 	{
 		.key	= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
@@ -23360,7 +15793,7 @@
 		.klen	= 16,
 		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07\x08"
 			  "\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.input = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+		.ptext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
 			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
 			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
 			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
@@ -23368,8 +15801,7 @@
 			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
 			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
 			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.ilen	= 64,
-		.result = "\x3b\x3f\xd9\x2e\xb7\x2d\xad\x20"
+		.ctext	= "\x3b\x3f\xd9\x2e\xb7\x2d\xad\x20"
 			  "\x33\x34\x49\xf8\xe8\x3c\xfb\x4a"
 			  "\x77\x89\x50\x8d\x16\x91\x8f\x03\xf5"
 			  "\x3c\x52\xda\xc5\x4e\xd8\x25"
@@ -23377,36 +15809,7 @@
 			  "\x44\xf7\xa8\x22\x60\xed\xcc"
 			  "\x30\x4c\x65\x28\xf6\x59\xc7\x78"
 			  "\x66\xa5\x10\xd9\xc1\xd6\xae\x5e",
-		.rlen	= 64,
-	}
-};
-
-static const struct cipher_testvec aes_ofb_dec_tv_template[] = {
-	 /* From NIST Special Publication 800-38A, Appendix F.5 */
-	{
-		.key	= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
-			  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
-		.klen	= 16,
-		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07\x08"
-			  "\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.input = "\x3b\x3f\xd9\x2e\xb7\x2d\xad\x20"
-			  "\x33\x34\x49\xf8\xe8\x3c\xfb\x4a"
-			  "\x77\x89\x50\x8d\x16\x91\x8f\x03\xf5"
-			  "\x3c\x52\xda\xc5\x4e\xd8\x25"
-			  "\x97\x40\x05\x1e\x9c\x5f\xec\xf6\x43"
-			  "\x44\xf7\xa8\x22\x60\xed\xcc"
-			  "\x30\x4c\x65\x28\xf6\x59\xc7\x78"
-			  "\x66\xa5\x10\xd9\xc1\xd6\xae\x5e",
-		.ilen	= 64,
-		.result = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
-			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
-			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
-			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
-			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
-			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
-			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
-			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
-		.rlen	= 64,
+		.len	= 64,
 	}
 };
 
@@ -27377,6 +19780,6241 @@
 	},
 };
 
+static const struct aead_testvec aegis128_enc_tv_template[] = {
+	{
+		.key	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
+			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
+		.klen	= 16,
+		.iv	= "\x1e\x92\x1c\xcf\x88\x3d\x54\x0d"
+			  "\x40\x6d\x59\x48\xfc\x92\x61\x03",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x07\xa5\x11\xf2\x9d\x40\xb8\x6d"
+			  "\xda\xb8\x12\x34\x4c\x53\xd9\x72",
+		.rlen	= 16,
+	}, {
+		.key	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
+			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
+		.klen	= 16,
+		.iv	= "\x5a\xb7\x56\x6e\x98\xb9\xfd\x29"
+			  "\xc1\x47\x0b\xda\xf6\xb6\x23\x09",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x79",
+		.ilen	= 1,
+		.result	= "\x9e\x78\x52\xae\xcb\x9e\xe4\xd3"
+			  "\x9a\xd7\x5d\xd7\xaa\x9a\xe9\x5a"
+			  "\xcc",
+		.rlen	= 17,
+	}, {
+		.key	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
+			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
+		.klen	= 16,
+		.iv	= "\x97\xdb\x90\x0e\xa8\x35\xa5\x45"
+			  "\x42\x21\xbd\x6b\xf0\xda\xe6\x0f",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
+			  "\x82\x8e\x16\xb4\xed\x6d\x47",
+		.ilen	= 15,
+		.result	= "\xc3\x80\x83\x04\x5f\xaa\x61\xc7"
+			  "\xca\xdd\x6f\xac\x85\x08\xb5\x35"
+			  "\x2b\xc2\x3e\x0b\x1b\x39\x37\x2b"
+			  "\x7a\x21\x16\xb3\xe6\x67\x66",
+		.rlen	= 31,
+	}, {
+		.key	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
+			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
+		.klen	= 16,
+		.iv	= "\xd3\x00\xc9\xad\xb8\xb0\x4e\x61"
+			  "\xc3\xfb\x6f\xfd\xea\xff\xa9\x15",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
+			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
+		.ilen	= 16,
+		.result	= "\x23\x25\x30\xe5\x6a\xb6\x36\x7d"
+			  "\x38\xfd\x3a\xd2\xc2\x58\xa9\x11"
+			  "\x1e\xa8\x30\x9c\x16\xa4\xdb\x65"
+			  "\x51\x10\x16\x27\x70\x9b\x64\x29",
+		.rlen	= 32,
+	}, {
+		.key	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
+			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
+		.klen	= 16,
+		.iv	= "\x10\x25\x03\x4c\xc8\x2c\xf7\x7d"
+			  "\x44\xd5\x21\x8e\xe4\x23\x6b\x1c",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
+			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f"
+			  "\xd3",
+		.ilen	= 17,
+		.result	= "\x2a\x8d\x56\x91\xc6\xf3\x56\xa5"
+			  "\x1f\xf0\x89\x2e\x13\xad\xe6\xf6"
+			  "\x46\x80\xb1\x0e\x18\x30\x40\x97"
+			  "\x03\xdf\x64\x3c\xbe\x93\x9e\xc9"
+			  "\x3b",
+		.rlen	= 33,
+	}, {
+		.key	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
+			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
+		.klen	= 16,
+		.iv	= "\x4c\x49\x3d\xec\xd8\xa8\xa0\x98"
+			  "\xc5\xb0\xd3\x1f\xde\x48\x2e\x22",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
+			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25"
+			  "\xfe\x8d\x45\x19\x1e\xc0\x0b\x99"
+			  "\x88\x11\x39\x12\x1c\x3a\xbb",
+		.ilen	= 31,
+		.result	= "\x4e\xf6\xfa\x13\xde\x43\x63\x4c"
+			  "\xe2\x04\x3e\xe4\x85\x14\xb6\x3f"
+			  "\xb1\x8f\x4c\xdb\x41\xa2\x14\x99"
+			  "\xf5\x53\x0f\x73\x86\x7e\x97\xa1"
+			  "\x4b\x56\x5b\x94\xce\xcd\x74\xcd"
+			  "\x75\xc4\x53\x01\x89\x45\x59",
+		.rlen	= 47,
+	}, {
+		.key	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
+			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
+		.klen	= 16,
+		.iv	= "\x89\x6e\x77\x8b\xe8\x23\x49\xb4"
+			  "\x45\x8a\x85\xb1\xd8\x6c\xf1\x28",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
+			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b"
+			  "\x28\x50\x51\x9d\x24\x60\x8d\xb3"
+			  "\x49\x3e\x17\xea\xf6\x99\x5a\xdd",
+		.ilen	= 32,
+		.result	= "\xa4\x9a\xb7\xfd\xa0\xd4\xd6\x47"
+			  "\x95\xf4\x58\x38\x14\x83\x27\x01"
+			  "\x4c\xed\x32\x2c\xf7\xd6\x31\xf7"
+			  "\x38\x1b\x2c\xc9\xb6\x31\xce\xaa"
+			  "\xa5\x3c\x1a\x18\x5c\xce\xb9\xdf"
+			  "\x51\x52\x77\xf2\x5e\x85\x80\x41",
+		.rlen	= 48,
+	}, {
+		.key	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
+			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
+		.klen	= 16,
+		.iv	= "\xc5\x93\xb0\x2a\xf8\x9f\xf1\xd0"
+			  "\xc6\x64\x37\x42\xd2\x90\xb3\x2e",
+		.assoc	= "\xd5",
+		.alen	= 1,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xfb\xd4\x83\x71\x9e\x63\xad\x60"
+			  "\xb9\xf9\xeb\x34\x52\x49\xcf\xb7",
+		.rlen	= 16,
+	}, {
+		.key	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
+			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
+		.klen	= 16,
+		.iv	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
+			  "\x47\x3e\xe9\xd4\xcc\xb5\x76\x34",
+		.assoc	= "\x11\x81\x78\x32\x4d\xb9\x44\x73"
+			  "\x68\x75\x16\xf8\xcb\x7e\xa7",
+		.alen	= 15,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x0c\xaf\x2e\x96\xf6\x97\x08\x71"
+			  "\x7d\x3a\x84\xc4\x44\x57\x77\x7e",
+		.rlen	= 16,
+	}, {
+		.key	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
+			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
+		.klen	= 16,
+		.iv	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
+			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b",
+		.assoc	= "\x4e\xa5\xb2\xd1\x5d\x35\xed\x8f"
+			  "\xe8\x4f\xc8\x89\xc5\xa2\x69\xbc",
+		.alen	= 16,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xc7\x87\x09\x3b\xc7\x19\x74\x22"
+			  "\x22\xa5\x67\x10\xb2\x36\xb3\x45",
+		.rlen	= 16,
+	}, {
+		.key	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
+			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
+		.klen	= 16,
+		.iv	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
+			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41",
+		.assoc	= "\x8a\xca\xec\x70\x6d\xb1\x96\xab"
+			  "\x69\x29\x7a\x1b\xbf\xc7\x2c\xc2"
+			  "\x07",
+		.alen	= 17,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x02\xc6\x3b\x46\x65\xb2\xef\x91"
+			  "\x31\xf0\x45\x48\x8a\x2a\xed\xe4",
+		.rlen	= 16,
+	}, {
+		.key	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
+			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
+		.klen	= 16,
+		.iv	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
+			  "\xca\xcd\xff\x88\xba\x22\xbe\x47",
+		.assoc	= "\xc7\xef\x26\x10\x7d\x2c\x3f\xc6"
+			  "\xea\x03\x2c\xac\xb9\xeb\xef\xc9"
+			  "\x31\x6b\x08\x12\xfc\xd8\x37\x2d"
+			  "\xe0\x17\x3a\x2e\x83\x5c\x8f",
+		.alen	= 31,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x20\x85\xa8\xd0\x91\x48\x85\xf3"
+			  "\x5a\x16\xc0\x57\x68\x47\xdd\xcb",
+		.rlen	= 16,
+	}, {
+		.key	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
+			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
+		.klen	= 16,
+		.iv	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
+			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d",
+		.assoc	= "\x03\x14\x5f\xaf\x8d\xa8\xe7\xe2"
+			  "\x6b\xde\xde\x3e\xb3\x10\xb1\xcf"
+			  "\x5c\x2d\x14\x96\x01\x78\xb9\x47"
+			  "\xa1\x44\x19\x06\x5d\xbb\x2e\x2f",
+		.alen	= 32,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x6a\xf8\x8d\x9c\x42\x75\x35\x79"
+			  "\xc1\x96\xbd\x31\x6e\x69\x1b\x50",
+		.rlen	= 16,
+	}, {
+		.key	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
+			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
+		.klen	= 16,
+		.iv	= "\x31\x6f\x0b\xe6\x59\x85\xe6\x77"
+			  "\xcc\x81\x63\xab\xae\x6b\x43\x54",
+		.assoc	= "\x40",
+		.alen	= 1,
+		.input	= "\x4f",
+		.ilen	= 1,
+		.result	= "\x01\x24\xb1\xba\xf6\xd3\xdf\x83"
+			  "\x70\x45\xe3\x2a\x9d\x5c\x63\x98"
+			  "\x39",
+		.rlen	= 17,
+	}, {
+		.key	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
+			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
+		.klen	= 16,
+		.iv	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
+			  "\x4d\x5b\x15\x3c\xa8\x8f\x06\x5a",
+		.assoc	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
+			  "\x6d\x92\x42\x61\xa7\x58\x37",
+		.alen	= 15,
+		.input	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
+			  "\x8d\xc8\x6e\x85\xa5\x21\x67",
+		.ilen	= 15,
+		.result	= "\x18\x78\xc2\x6e\xe1\xf7\xe6\x8a"
+			  "\xca\x0e\x62\x00\xa8\x21\xb5\x21"
+			  "\x3d\x36\xdb\xf7\xcc\x31\x94\x9c"
+			  "\x98\xbd\x71\x7a\xef\xa4\xfa",
+		.rlen	= 31,
+	}, {
+		.key	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
+			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
+		.klen	= 16,
+		.iv	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
+			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60",
+		.assoc	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
+			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2",
+		.alen	= 16,
+		.input	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
+			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
+		.ilen	= 16,
+		.result	= "\xea\xd1\x81\x75\xb4\x13\x1d\x86"
+			  "\xd4\x17\x26\xe5\xd6\x89\x39\x04"
+			  "\xa9\x6c\xca\xac\x40\x73\xb2\x4c"
+			  "\x9c\xb9\x0e\x79\x4c\x40\x65\xc6",
+		.rlen	= 32,
+	}, {
+		.key	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
+			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
+		.klen	= 16,
+		.iv	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
+			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66",
+		.assoc	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
+			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
+			  "\x05",
+		.alen	= 17,
+		.input	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
+			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69"
+			  "\xd0",
+		.ilen	= 17,
+		.result	= "\xf4\xb2\x84\xd1\x81\xfa\x98\x1c"
+			  "\x38\x2d\x69\x90\x1c\x71\x38\x98"
+			  "\x9f\xe1\x19\x3b\x63\x91\xaf\x6e"
+			  "\x4b\x07\x2c\xac\x53\xc5\xd5\xfe"
+			  "\x93",
+		.rlen	= 33,
+	}, {
+		.key	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
+			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
+		.klen	= 16,
+		.iv	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
+			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d",
+		.assoc	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
+			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
+			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
+			  "\x68\x28\x73\x40\x9f\x96\x4a",
+		.alen	= 31,
+		.input	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
+			  "\x10\x57\x85\x39\x93\x8f\xaf\x70"
+			  "\xfa\xa9\xd0\x4d\x5c\x40\x23\xcd"
+			  "\x98\x34\xab\x37\x56\xae\x32",
+		.ilen	= 31,
+		.result	= "\xa0\xe7\x0a\x60\xe7\xb8\x8a\xdb"
+			  "\x94\xd3\x93\xf2\x41\x86\x16\xdd"
+			  "\x4c\xe8\xe7\xe0\x62\x48\x89\x40"
+			  "\xc0\x49\x9b\x63\x32\xec\x8b\xdb"
+			  "\xdc\xa6\xea\x2c\xc2\x7f\xf5\x04"
+			  "\xcb\xe5\x47\xbb\xa7\xd1\x9d",
+		.rlen	= 47,
+	}, {
+		.key	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
+			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
+		.klen	= 16,
+		.iv	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
+			  "\x50\xc4\xde\x82\x90\x21\x11\x73",
+		.assoc	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
+			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
+			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
+			  "\x29\x56\x52\x19\x79\xf5\xe9\x37",
+		.alen	= 32,
+		.input	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
+			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76"
+			  "\x24\x6b\xdc\xd1\x61\xe0\xa5\xe7"
+			  "\x5a\x61\x8a\x0f\x30\x0d\xd1\xec",
+		.ilen	= 32,
+		.result	= "\x62\xdc\x2d\x68\x2d\x71\xbb\x33"
+			  "\x13\xdf\xc0\x46\xf6\x61\x94\xa7"
+			  "\x60\xd3\xd4\xca\xd9\xbe\x82\xf3"
+			  "\xf1\x5b\xa0\xfa\x15\xba\xda\xea"
+			  "\x87\x68\x47\x08\x5d\xdd\x83\xb0"
+			  "\x60\xf4\x93\x20\xdf\x34\x8f\xea",
+		.rlen	= 48,
+	}, {
+		.key	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
+			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
+		.klen	= 16,
+		.iv	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
+			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79",
+		.assoc	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
+			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
+			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
+			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
+			  "\x9d",
+		.alen	= 33,
+		.input	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
+			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c"
+			  "\x4f\x2e\xe8\x55\x66\x80\x27\x00"
+			  "\x1b\x8f\x68\xe7\x0a\x6c\x71\xc3"
+			  "\x21\x78\x55\x9d\x9c\x65\x7b\xcd"
+			  "\x0a\x34\x97\xff\x47\x37\xb0\x2a"
+			  "\x80\x0d\x19\x98\x33\xa9\x7a\xe3"
+			  "\x2e\x4c\xc6\xf3\x8c\x88\x42\x01"
+			  "\xbd",
+		.ilen	= 65,
+		.result	= "\x84\xc5\x21\xab\xe1\xeb\xbb\x6d"
+			  "\xaa\x2a\xaf\xeb\x3b\x3b\x69\xe7"
+			  "\x2c\x47\xef\x9d\xb7\x53\x36\xb7"
+			  "\xb6\xf5\xe5\xa8\xc9\x9e\x02\xd7"
+			  "\x83\x88\xc2\xbd\x2f\xf9\x10\xc0"
+			  "\xf5\xa1\x6e\xd3\x97\x64\x82\xa3"
+			  "\xfb\xda\x2c\xb1\x94\xa1\x58\x32"
+			  "\xe8\xd4\x39\xfc\x9e\x26\xf9\xf1"
+			  "\x61\xe6\xae\x07\xf2\xe0\xa7\x44"
+			  "\x96\x28\x3b\xee\x6b\xc6\x16\x31"
+			  "\x3f",
+		.rlen	= 81,
+	}, {
+		.key	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
+			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
+		.klen	= 16,
+		.iv	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
+			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f",
+		.assoc	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
+			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
+			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
+			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
+			  "\xac\xfa\x58\x7f\xe5\x73\x09\x8c"
+			  "\x1d\x40\x87\x8c\xd9\x75\xc0\x55"
+			  "\xa2\xda\x07\xd1\xc2\xa9\xd1\xbb"
+			  "\x09\x4f\x77\x62\x88\x2d\xf2\x68"
+			  "\x54",
+		.alen	= 65,
+		.input	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
+			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82"
+			  "\x79\xf0\xf3\xd9\x6c\x20\xa9\x1a"
+			  "\xdc\xbc\x47\xc0\xe4\xcb\x10\x99"
+			  "\x2f",
+		.ilen	= 33,
+		.result	= "\x8f\x23\x47\xfb\xf2\xac\x23\x83"
+			  "\x77\x09\xac\x74\xef\xd2\x56\xae"
+			  "\x20\x7b\x7b\xca\x45\x8e\xc8\xc2"
+			  "\x50\xbd\xc7\x44\x1c\x54\x98\xd8"
+			  "\x1f\xd0\x9a\x79\xaa\xf9\xe1\xb3"
+			  "\xb4\x98\x5a\x9b\xe4\x4d\xbf\x4e"
+			  "\x39",
+		.rlen	= 49,
+	}, {
+		.key	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
+			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
+		.klen	= 16,
+		.iv	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
+			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85",
+		.assoc	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
+			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07",
+		.alen	= 16,
+		.input	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
+			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
+		.ilen	= 16,
+		.result	= "\x42\xc3\x58\xfb\x29\xe2\x4a\x56"
+			  "\xf1\xf5\xe1\x51\x55\x4b\x0a\x45"
+			  "\x46\xb5\x8d\xac\xb6\x34\xd8\x8b"
+			  "\xde\x20\x59\x77\xc1\x74\x90",
+		.rlen	= 31,
+	}, {
+		.key	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
+			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
+		.klen	= 16,
+		.iv	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
+			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c",
+		.assoc	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
+			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d",
+		.alen	= 16,
+		.input	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
+			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
+		.ilen	= 16,
+		.result	= "\xb2\xfb\xf6\x97\x69\x7a\xe9\xec"
+			  "\xe2\x94\xa1\x8b\xa0\x2b\x60\x72"
+			  "\x1d\x04\xdd\x6a\xef\x46\x8f\x68"
+			  "\xe9\xe0\x17\x45\x70\x12",
+		.rlen	= 30,
+	}, {
+		.key	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
+			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
+		.klen	= 16,
+		.iv	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
+			  "\xd5\x07\x58\x59\x72\xd7\xde\x92",
+		.assoc	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
+			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13",
+		.alen	= 16,
+		.input	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
+			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
+		.ilen	= 16,
+		.result	= "\x47\xda\x54\x42\x51\x72\xc4\x8b"
+			  "\xf5\x57\x0f\x2f\x49\x0e\x11\x3b"
+			  "\x78\x93\xec\xfc\xf4\xff\xe1\x2d",
+		.rlen	= 24,
+	},
+};
+
+/*
+ * AEGIS-128 test vectors - generated via reference implementation from
+ * SUPERCOP (https://bench.cr.yp.to/supercop.html):
+ *
+ *   https://bench.cr.yp.to/supercop/supercop-20170228.tar.xz
+ *   (see crypto_aead/aegis128/)
+ */
+static const struct aead_testvec aegis128_dec_tv_template[] = {
+	{
+		.key	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
+			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
+		.klen	= 16,
+		.iv	= "\x1e\x92\x1c\xcf\x88\x3d\x54\x0d"
+			  "\x40\x6d\x59\x48\xfc\x92\x61\x03",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x07\xa5\x11\xf2\x9d\x40\xb8\x6d"
+			  "\xda\xb8\x12\x34\x4c\x53\xd9\x72",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
+			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
+		.klen	= 16,
+		.iv	= "\x5a\xb7\x56\x6e\x98\xb9\xfd\x29"
+			  "\xc1\x47\x0b\xda\xf6\xb6\x23\x09",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x9e\x78\x52\xae\xcb\x9e\xe4\xd3"
+			  "\x9a\xd7\x5d\xd7\xaa\x9a\xe9\x5a"
+			  "\xcc",
+		.ilen	= 17,
+		.result	= "\x79",
+		.rlen	= 1,
+	}, {
+		.key	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
+			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
+		.klen	= 16,
+		.iv	= "\x97\xdb\x90\x0e\xa8\x35\xa5\x45"
+			  "\x42\x21\xbd\x6b\xf0\xda\xe6\x0f",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xc3\x80\x83\x04\x5f\xaa\x61\xc7"
+			  "\xca\xdd\x6f\xac\x85\x08\xb5\x35"
+			  "\x2b\xc2\x3e\x0b\x1b\x39\x37\x2b"
+			  "\x7a\x21\x16\xb3\xe6\x67\x66",
+		.ilen	= 31,
+		.result	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
+			  "\x82\x8e\x16\xb4\xed\x6d\x47",
+		.rlen	= 15,
+	}, {
+		.key	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
+			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
+		.klen	= 16,
+		.iv	= "\xd3\x00\xc9\xad\xb8\xb0\x4e\x61"
+			  "\xc3\xfb\x6f\xfd\xea\xff\xa9\x15",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x23\x25\x30\xe5\x6a\xb6\x36\x7d"
+			  "\x38\xfd\x3a\xd2\xc2\x58\xa9\x11"
+			  "\x1e\xa8\x30\x9c\x16\xa4\xdb\x65"
+			  "\x51\x10\x16\x27\x70\x9b\x64\x29",
+		.ilen	= 32,
+		.result	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
+			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
+		.rlen	= 16,
+	}, {
+		.key	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
+			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
+		.klen	= 16,
+		.iv	= "\x10\x25\x03\x4c\xc8\x2c\xf7\x7d"
+			  "\x44\xd5\x21\x8e\xe4\x23\x6b\x1c",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x2a\x8d\x56\x91\xc6\xf3\x56\xa5"
+			  "\x1f\xf0\x89\x2e\x13\xad\xe6\xf6"
+			  "\x46\x80\xb1\x0e\x18\x30\x40\x97"
+			  "\x03\xdf\x64\x3c\xbe\x93\x9e\xc9"
+			  "\x3b",
+		.ilen	= 33,
+		.result	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
+			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f"
+			  "\xd3",
+		.rlen	= 17,
+	}, {
+		.key	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
+			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
+		.klen	= 16,
+		.iv	= "\x4c\x49\x3d\xec\xd8\xa8\xa0\x98"
+			  "\xc5\xb0\xd3\x1f\xde\x48\x2e\x22",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x4e\xf6\xfa\x13\xde\x43\x63\x4c"
+			  "\xe2\x04\x3e\xe4\x85\x14\xb6\x3f"
+			  "\xb1\x8f\x4c\xdb\x41\xa2\x14\x99"
+			  "\xf5\x53\x0f\x73\x86\x7e\x97\xa1"
+			  "\x4b\x56\x5b\x94\xce\xcd\x74\xcd"
+			  "\x75\xc4\x53\x01\x89\x45\x59",
+		.ilen	= 47,
+		.result	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
+			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25"
+			  "\xfe\x8d\x45\x19\x1e\xc0\x0b\x99"
+			  "\x88\x11\x39\x12\x1c\x3a\xbb",
+		.rlen	= 31,
+	}, {
+		.key	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
+			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
+		.klen	= 16,
+		.iv	= "\x89\x6e\x77\x8b\xe8\x23\x49\xb4"
+			  "\x45\x8a\x85\xb1\xd8\x6c\xf1\x28",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xa4\x9a\xb7\xfd\xa0\xd4\xd6\x47"
+			  "\x95\xf4\x58\x38\x14\x83\x27\x01"
+			  "\x4c\xed\x32\x2c\xf7\xd6\x31\xf7"
+			  "\x38\x1b\x2c\xc9\xb6\x31\xce\xaa"
+			  "\xa5\x3c\x1a\x18\x5c\xce\xb9\xdf"
+			  "\x51\x52\x77\xf2\x5e\x85\x80\x41",
+		.ilen	= 48,
+		.result	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
+			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b"
+			  "\x28\x50\x51\x9d\x24\x60\x8d\xb3"
+			  "\x49\x3e\x17\xea\xf6\x99\x5a\xdd",
+		.rlen	= 32,
+	}, {
+		.key	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
+			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
+		.klen	= 16,
+		.iv	= "\xc5\x93\xb0\x2a\xf8\x9f\xf1\xd0"
+			  "\xc6\x64\x37\x42\xd2\x90\xb3\x2e",
+		.assoc	= "\xd5",
+		.alen	= 1,
+		.input	= "\xfb\xd4\x83\x71\x9e\x63\xad\x60"
+			  "\xb9\xf9\xeb\x34\x52\x49\xcf\xb7",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
+			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
+		.klen	= 16,
+		.iv	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
+			  "\x47\x3e\xe9\xd4\xcc\xb5\x76\x34",
+		.assoc	= "\x11\x81\x78\x32\x4d\xb9\x44\x73"
+			  "\x68\x75\x16\xf8\xcb\x7e\xa7",
+		.alen	= 15,
+		.input	= "\x0c\xaf\x2e\x96\xf6\x97\x08\x71"
+			  "\x7d\x3a\x84\xc4\x44\x57\x77\x7e",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
+			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
+		.klen	= 16,
+		.iv	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
+			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b",
+		.assoc	= "\x4e\xa5\xb2\xd1\x5d\x35\xed\x8f"
+			  "\xe8\x4f\xc8\x89\xc5\xa2\x69\xbc",
+		.alen	= 16,
+		.input	= "\xc7\x87\x09\x3b\xc7\x19\x74\x22"
+			  "\x22\xa5\x67\x10\xb2\x36\xb3\x45",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
+			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
+		.klen	= 16,
+		.iv	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
+			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41",
+		.assoc	= "\x8a\xca\xec\x70\x6d\xb1\x96\xab"
+			  "\x69\x29\x7a\x1b\xbf\xc7\x2c\xc2"
+			  "\x07",
+		.alen	= 17,
+		.input	= "\x02\xc6\x3b\x46\x65\xb2\xef\x91"
+			  "\x31\xf0\x45\x48\x8a\x2a\xed\xe4",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
+			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
+		.klen	= 16,
+		.iv	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
+			  "\xca\xcd\xff\x88\xba\x22\xbe\x47",
+		.assoc	= "\xc7\xef\x26\x10\x7d\x2c\x3f\xc6"
+			  "\xea\x03\x2c\xac\xb9\xeb\xef\xc9"
+			  "\x31\x6b\x08\x12\xfc\xd8\x37\x2d"
+			  "\xe0\x17\x3a\x2e\x83\x5c\x8f",
+		.alen	= 31,
+		.input	= "\x20\x85\xa8\xd0\x91\x48\x85\xf3"
+			  "\x5a\x16\xc0\x57\x68\x47\xdd\xcb",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
+			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
+		.klen	= 16,
+		.iv	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
+			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d",
+		.assoc	= "\x03\x14\x5f\xaf\x8d\xa8\xe7\xe2"
+			  "\x6b\xde\xde\x3e\xb3\x10\xb1\xcf"
+			  "\x5c\x2d\x14\x96\x01\x78\xb9\x47"
+			  "\xa1\x44\x19\x06\x5d\xbb\x2e\x2f",
+		.alen	= 32,
+		.input	= "\x6a\xf8\x8d\x9c\x42\x75\x35\x79"
+			  "\xc1\x96\xbd\x31\x6e\x69\x1b\x50",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
+			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
+		.klen	= 16,
+		.iv	= "\x31\x6f\x0b\xe6\x59\x85\xe6\x77"
+			  "\xcc\x81\x63\xab\xae\x6b\x43\x54",
+		.assoc	= "\x40",
+		.alen	= 1,
+		.input	= "\x01\x24\xb1\xba\xf6\xd3\xdf\x83"
+			  "\x70\x45\xe3\x2a\x9d\x5c\x63\x98"
+			  "\x39",
+		.ilen	= 17,
+		.result	= "\x4f",
+		.rlen	= 1,
+	}, {
+		.key	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
+			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
+		.klen	= 16,
+		.iv	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
+			  "\x4d\x5b\x15\x3c\xa8\x8f\x06\x5a",
+		.assoc	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
+			  "\x6d\x92\x42\x61\xa7\x58\x37",
+		.alen	= 15,
+		.input	= "\x18\x78\xc2\x6e\xe1\xf7\xe6\x8a"
+			  "\xca\x0e\x62\x00\xa8\x21\xb5\x21"
+			  "\x3d\x36\xdb\xf7\xcc\x31\x94\x9c"
+			  "\x98\xbd\x71\x7a\xef\xa4\xfa",
+		.ilen	= 31,
+		.result	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
+			  "\x8d\xc8\x6e\x85\xa5\x21\x67",
+		.rlen	= 15,
+	}, {
+		.key	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
+			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
+		.klen	= 16,
+		.iv	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
+			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60",
+		.assoc	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
+			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2",
+		.alen	= 16,
+		.input	= "\xea\xd1\x81\x75\xb4\x13\x1d\x86"
+			  "\xd4\x17\x26\xe5\xd6\x89\x39\x04"
+			  "\xa9\x6c\xca\xac\x40\x73\xb2\x4c"
+			  "\x9c\xb9\x0e\x79\x4c\x40\x65\xc6",
+		.ilen	= 32,
+		.result	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
+			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
+		.rlen	= 16,
+	}, {
+		.key	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
+			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
+		.klen	= 16,
+		.iv	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
+			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66",
+		.assoc	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
+			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
+			  "\x05",
+		.alen	= 17,
+		.input	= "\xf4\xb2\x84\xd1\x81\xfa\x98\x1c"
+			  "\x38\x2d\x69\x90\x1c\x71\x38\x98"
+			  "\x9f\xe1\x19\x3b\x63\x91\xaf\x6e"
+			  "\x4b\x07\x2c\xac\x53\xc5\xd5\xfe"
+			  "\x93",
+		.ilen	= 33,
+		.result	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
+			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69"
+			  "\xd0",
+		.rlen	= 17,
+	}, {
+		.key	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
+			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
+		.klen	= 16,
+		.iv	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
+			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d",
+		.assoc	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
+			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
+			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
+			  "\x68\x28\x73\x40\x9f\x96\x4a",
+		.alen	= 31,
+		.input	= "\xa0\xe7\x0a\x60\xe7\xb8\x8a\xdb"
+			  "\x94\xd3\x93\xf2\x41\x86\x16\xdd"
+			  "\x4c\xe8\xe7\xe0\x62\x48\x89\x40"
+			  "\xc0\x49\x9b\x63\x32\xec\x8b\xdb"
+			  "\xdc\xa6\xea\x2c\xc2\x7f\xf5\x04"
+			  "\xcb\xe5\x47\xbb\xa7\xd1\x9d",
+		.ilen	= 47,
+		.result	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
+			  "\x10\x57\x85\x39\x93\x8f\xaf\x70"
+			  "\xfa\xa9\xd0\x4d\x5c\x40\x23\xcd"
+			  "\x98\x34\xab\x37\x56\xae\x32",
+		.rlen	= 31,
+	}, {
+		.key	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
+			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
+		.klen	= 16,
+		.iv	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
+			  "\x50\xc4\xde\x82\x90\x21\x11\x73",
+		.assoc	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
+			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
+			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
+			  "\x29\x56\x52\x19\x79\xf5\xe9\x37",
+		.alen	= 32,
+		.input	= "\x62\xdc\x2d\x68\x2d\x71\xbb\x33"
+			  "\x13\xdf\xc0\x46\xf6\x61\x94\xa7"
+			  "\x60\xd3\xd4\xca\xd9\xbe\x82\xf3"
+			  "\xf1\x5b\xa0\xfa\x15\xba\xda\xea"
+			  "\x87\x68\x47\x08\x5d\xdd\x83\xb0"
+			  "\x60\xf4\x93\x20\xdf\x34\x8f\xea",
+		.ilen	= 48,
+		.result	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
+			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76"
+			  "\x24\x6b\xdc\xd1\x61\xe0\xa5\xe7"
+			  "\x5a\x61\x8a\x0f\x30\x0d\xd1\xec",
+		.rlen	= 32,
+	}, {
+		.key	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
+			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
+		.klen	= 16,
+		.iv	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
+			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79",
+		.assoc	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
+			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
+			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
+			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
+			  "\x9d",
+		.alen	= 33,
+		.input	= "\x84\xc5\x21\xab\xe1\xeb\xbb\x6d"
+			  "\xaa\x2a\xaf\xeb\x3b\x3b\x69\xe7"
+			  "\x2c\x47\xef\x9d\xb7\x53\x36\xb7"
+			  "\xb6\xf5\xe5\xa8\xc9\x9e\x02\xd7"
+			  "\x83\x88\xc2\xbd\x2f\xf9\x10\xc0"
+			  "\xf5\xa1\x6e\xd3\x97\x64\x82\xa3"
+			  "\xfb\xda\x2c\xb1\x94\xa1\x58\x32"
+			  "\xe8\xd4\x39\xfc\x9e\x26\xf9\xf1"
+			  "\x61\xe6\xae\x07\xf2\xe0\xa7\x44"
+			  "\x96\x28\x3b\xee\x6b\xc6\x16\x31"
+			  "\x3f",
+		.ilen	= 81,
+		.result	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
+			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c"
+			  "\x4f\x2e\xe8\x55\x66\x80\x27\x00"
+			  "\x1b\x8f\x68\xe7\x0a\x6c\x71\xc3"
+			  "\x21\x78\x55\x9d\x9c\x65\x7b\xcd"
+			  "\x0a\x34\x97\xff\x47\x37\xb0\x2a"
+			  "\x80\x0d\x19\x98\x33\xa9\x7a\xe3"
+			  "\x2e\x4c\xc6\xf3\x8c\x88\x42\x01"
+			  "\xbd",
+		.rlen	= 65,
+	}, {
+		.key	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
+			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
+		.klen	= 16,
+		.iv	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
+			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f",
+		.assoc	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
+			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
+			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
+			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
+			  "\xac\xfa\x58\x7f\xe5\x73\x09\x8c"
+			  "\x1d\x40\x87\x8c\xd9\x75\xc0\x55"
+			  "\xa2\xda\x07\xd1\xc2\xa9\xd1\xbb"
+			  "\x09\x4f\x77\x62\x88\x2d\xf2\x68"
+			  "\x54",
+		.alen	= 65,
+		.input	= "\x8f\x23\x47\xfb\xf2\xac\x23\x83"
+			  "\x77\x09\xac\x74\xef\xd2\x56\xae"
+			  "\x20\x7b\x7b\xca\x45\x8e\xc8\xc2"
+			  "\x50\xbd\xc7\x44\x1c\x54\x98\xd8"
+			  "\x1f\xd0\x9a\x79\xaa\xf9\xe1\xb3"
+			  "\xb4\x98\x5a\x9b\xe4\x4d\xbf\x4e"
+			  "\x39",
+		.ilen	= 49,
+		.result	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
+			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82"
+			  "\x79\xf0\xf3\xd9\x6c\x20\xa9\x1a"
+			  "\xdc\xbc\x47\xc0\xe4\xcb\x10\x99"
+			  "\x2f",
+		.rlen	= 33,
+	}, {
+		.key	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
+			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
+		.klen	= 16,
+		.iv	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
+			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85",
+		.assoc	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
+			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07",
+		.alen	= 16,
+		.input	= "\x42\xc3\x58\xfb\x29\xe2\x4a\x56"
+			  "\xf1\xf5\xe1\x51\x55\x4b\x0a\x45"
+			  "\x46\xb5\x8d\xac\xb6\x34\xd8\x8b"
+			  "\xde\x20\x59\x77\xc1\x74\x90",
+		.ilen	= 31,
+		.result	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
+			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
+		.rlen	= 16,
+	}, {
+		.key	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
+			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
+		.klen	= 16,
+		.iv	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
+			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c",
+		.assoc	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
+			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d",
+		.alen	= 16,
+		.input	= "\xb2\xfb\xf6\x97\x69\x7a\xe9\xec"
+			  "\xe2\x94\xa1\x8b\xa0\x2b\x60\x72"
+			  "\x1d\x04\xdd\x6a\xef\x46\x8f\x68"
+			  "\xe9\xe0\x17\x45\x70\x12",
+		.ilen	= 30,
+		.result	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
+			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
+		.rlen	= 16,
+	}, {
+		.key	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
+			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
+		.klen	= 16,
+		.iv	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
+			  "\xd5\x07\x58\x59\x72\xd7\xde\x92",
+		.assoc	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
+			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13",
+		.alen	= 16,
+		.input	= "\x47\xda\x54\x42\x51\x72\xc4\x8b"
+			  "\xf5\x57\x0f\x2f\x49\x0e\x11\x3b"
+			  "\x78\x93\xec\xfc\xf4\xff\xe1\x2d",
+		.ilen	= 24,
+		.result	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
+			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
+		.rlen	= 16,
+	},
+};
+
+/*
+ * AEGIS-128L test vectors - generated via reference implementation from
+ * SUPERCOP (https://bench.cr.yp.to/supercop.html):
+ *
+ *   https://bench.cr.yp.to/supercop/supercop-20170228.tar.xz
+ *   (see crypto_aead/aegis128l/)
+ */
+static const struct aead_testvec aegis128l_enc_tv_template[] = {
+	{
+		.key	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
+			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
+		.klen	= 16,
+		.iv	= "\x1e\x92\x1c\xcf\x88\x3d\x54\x0d"
+			  "\x40\x6d\x59\x48\xfc\x92\x61\x03",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x30\x4f\xf3\xe9\xb1\xfa\x81\xa6"
+			  "\x20\x72\x78\xdd\x93\xc8\x57\xef",
+		.rlen	= 16,
+	}, {
+		.key	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
+			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
+		.klen	= 16,
+		.iv	= "\x5a\xb7\x56\x6e\x98\xb9\xfd\x29"
+			  "\xc1\x47\x0b\xda\xf6\xb6\x23\x09",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x79",
+		.ilen	= 1,
+		.result	= "\xa9\x24\xa0\xb6\x2d\xdd\x29\xdb"
+			  "\x40\xb3\x71\xc5\x22\x58\x31\x77"
+			  "\x6d",
+		.rlen	= 17,
+	}, {
+		.key	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
+			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
+		.klen	= 16,
+		.iv	= "\x97\xdb\x90\x0e\xa8\x35\xa5\x45"
+			  "\x42\x21\xbd\x6b\xf0\xda\xe6\x0f",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
+			  "\x82\x8e\x16\xb4\xed\x6d\x47",
+		.ilen	= 15,
+		.result	= "\xbb\x0a\x53\xc4\xaa\x7e\xa4\x03"
+			  "\x2b\xee\x62\x99\x7b\x98\x13\x1f"
+			  "\xe0\x76\x4c\x2e\x53\x99\x4f\xbe"
+			  "\xe1\xa8\x04\x7f\xe1\x71\xbe",
+		.rlen	= 31,
+	}, {
+		.key	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
+			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
+		.klen	= 16,
+		.iv	= "\xd3\x00\xc9\xad\xb8\xb0\x4e\x61"
+			  "\xc3\xfb\x6f\xfd\xea\xff\xa9\x15",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
+			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
+		.ilen	= 16,
+		.result	= "\x66\xdf\x6e\x71\xc0\x6e\xa4\x4c"
+			  "\x9d\xb7\x8c\x9a\xdb\x1f\xd2\x2e"
+			  "\x23\xb6\xa4\xfb\xd3\x86\xdd\xbb"
+			  "\xde\x54\x9b\xf5\x92\x8b\x93\xc5",
+		.rlen	= 32,
+	}, {
+		.key	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
+			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
+		.klen	= 16,
+		.iv	= "\x10\x25\x03\x4c\xc8\x2c\xf7\x7d"
+			  "\x44\xd5\x21\x8e\xe4\x23\x6b\x1c",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
+			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f"
+			  "\xd3",
+		.ilen	= 17,
+		.result	= "\x4f\xc3\x69\xb6\xd3\xa4\x64\x8b"
+			  "\x71\xc3\x8a\x91\x22\x4f\x1b\xd2"
+			  "\x33\x6d\x86\xbc\xf8\x2f\x06\xf9"
+			  "\x82\x64\xc7\x72\x00\x30\xfc\xf0"
+			  "\xf8",
+		.rlen	= 33,
+	}, {
+		.key	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
+			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
+		.klen	= 16,
+		.iv	= "\x4c\x49\x3d\xec\xd8\xa8\xa0\x98"
+			  "\xc5\xb0\xd3\x1f\xde\x48\x2e\x22",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
+			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25"
+			  "\xfe\x8d\x45\x19\x1e\xc0\x0b\x99"
+			  "\x88\x11\x39\x12\x1c\x3a\xbb",
+		.ilen	= 31,
+		.result	= "\xe3\x93\x15\xae\x5f\x9d\x3c\xb5"
+			  "\xd6\x9d\xee\xee\xcf\xaa\xaf\xe1"
+			  "\x45\x10\x96\xe0\xbf\x55\x0f\x4c"
+			  "\x1a\xfd\xf4\xda\x4e\x10\xde\xc9"
+			  "\x0e\x6f\xc7\x3c\x49\x94\x41\xfc"
+			  "\x59\x28\x88\x3c\x79\x10\x6b",
+		.rlen	= 47,
+	}, {
+		.key	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
+			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
+		.klen	= 16,
+		.iv	= "\x89\x6e\x77\x8b\xe8\x23\x49\xb4"
+			  "\x45\x8a\x85\xb1\xd8\x6c\xf1\x28",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
+			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b"
+			  "\x28\x50\x51\x9d\x24\x60\x8d\xb3"
+			  "\x49\x3e\x17\xea\xf6\x99\x5a\xdd",
+		.ilen	= 32,
+		.result	= "\x1c\x8e\x22\x34\xfd\xab\xe6\x0d"
+			  "\x1c\x9f\x06\x54\x8b\x0b\xb4\x40"
+			  "\xde\x11\x59\x3e\xfd\x74\xf6\x42"
+			  "\x97\x17\xf7\x24\xb6\x7e\xc4\xc6"
+			  "\x06\xa3\x94\xda\x3d\x7f\x55\x0a"
+			  "\x92\x07\x2f\xa6\xf3\x6b\x2c\xfc",
+		.rlen	= 48,
+	}, {
+		.key	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
+			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
+		.klen	= 16,
+		.iv	= "\xc5\x93\xb0\x2a\xf8\x9f\xf1\xd0"
+			  "\xc6\x64\x37\x42\xd2\x90\xb3\x2e",
+		.assoc	= "\xd5",
+		.alen	= 1,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xa0\x2a\xb4\x9a\x91\x00\x15\xb8"
+			  "\x0f\x9a\x15\x60\x0e\x9b\x13\x8f",
+		.rlen	= 16,
+	}, {
+		.key	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
+			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
+		.klen	= 16,
+		.iv	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
+			  "\x47\x3e\xe9\xd4\xcc\xb5\x76\x34",
+		.assoc	= "\x11\x81\x78\x32\x4d\xb9\x44\x73"
+			  "\x68\x75\x16\xf8\xcb\x7e\xa7",
+		.alen	= 15,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x4c\x26\xad\x9c\x14\xfd\x9c\x8c"
+			  "\x84\xfb\x26\xfb\xd5\xca\x62\x39",
+		.rlen	= 16,
+	}, {
+		.key	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
+			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
+		.klen	= 16,
+		.iv	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
+			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b",
+		.assoc	= "\x4e\xa5\xb2\xd1\x5d\x35\xed\x8f"
+			  "\xe8\x4f\xc8\x89\xc5\xa2\x69\xbc",
+		.alen	= 16,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x45\x85\x0e\x0f\xf4\xae\x96\xa1"
+			  "\x99\x4d\x6d\xb4\x67\x32\xb0\x3a",
+		.rlen	= 16,
+	}, {
+		.key	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
+			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
+		.klen	= 16,
+		.iv	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
+			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41",
+		.assoc	= "\x8a\xca\xec\x70\x6d\xb1\x96\xab"
+			  "\x69\x29\x7a\x1b\xbf\xc7\x2c\xc2"
+			  "\x07",
+		.alen	= 17,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x33\xb1\x42\x97\x8e\x16\x7b\x63"
+			  "\x06\xba\x5b\xcb\xae\x6d\x8b\x56",
+		.rlen	= 16,
+	}, {
+		.key	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
+			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
+		.klen	= 16,
+		.iv	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
+			  "\xca\xcd\xff\x88\xba\x22\xbe\x47",
+		.assoc	= "\xc7\xef\x26\x10\x7d\x2c\x3f\xc6"
+			  "\xea\x03\x2c\xac\xb9\xeb\xef\xc9"
+			  "\x31\x6b\x08\x12\xfc\xd8\x37\x2d"
+			  "\xe0\x17\x3a\x2e\x83\x5c\x8f",
+		.alen	= 31,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xda\x44\x08\x8c\x2a\xa5\x07\x35"
+			  "\x0b\x54\x4e\x6d\xe3\xfd\xc4\x5f",
+		.rlen	= 16,
+	}, {
+		.key	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
+			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
+		.klen	= 16,
+		.iv	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
+			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d",
+		.assoc	= "\x03\x14\x5f\xaf\x8d\xa8\xe7\xe2"
+			  "\x6b\xde\xde\x3e\xb3\x10\xb1\xcf"
+			  "\x5c\x2d\x14\x96\x01\x78\xb9\x47"
+			  "\xa1\x44\x19\x06\x5d\xbb\x2e\x2f",
+		.alen	= 32,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x1b\xb1\xf1\xa8\x9e\xc2\xb2\x88"
+			  "\x40\x7f\x7b\x19\x7a\x52\x8c\xf0",
+		.rlen	= 16,
+	}, {
+		.key	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
+			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
+		.klen	= 16,
+		.iv	= "\x31\x6f\x0b\xe6\x59\x85\xe6\x77"
+			  "\xcc\x81\x63\xab\xae\x6b\x43\x54",
+		.assoc	= "\x40",
+		.alen	= 1,
+		.input	= "\x4f",
+		.ilen	= 1,
+		.result	= "\x6e\xc8\xfb\x15\x9d\x98\x49\xc9"
+			  "\xa0\x98\x09\x85\xbe\x56\x8e\x79"
+			  "\xf4",
+		.rlen	= 17,
+	}, {
+		.key	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
+			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
+		.klen	= 16,
+		.iv	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
+			  "\x4d\x5b\x15\x3c\xa8\x8f\x06\x5a",
+		.assoc	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
+			  "\x6d\x92\x42\x61\xa7\x58\x37",
+		.alen	= 15,
+		.input	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
+			  "\x8d\xc8\x6e\x85\xa5\x21\x67",
+		.ilen	= 15,
+		.result	= "\x99\x2e\x84\x50\x64\x5c\xab\x29"
+			  "\x20\xba\xb9\x2f\x62\x3a\xce\x2a"
+			  "\x75\x25\x3b\xe3\x40\xe0\x1d\xfc"
+			  "\x20\x63\x0b\x49\x7e\x97\x08",
+		.rlen	= 31,
+	}, {
+		.key	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
+			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
+		.klen	= 16,
+		.iv	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
+			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60",
+		.assoc	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
+			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2",
+		.alen	= 16,
+		.input	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
+			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
+		.ilen	= 16,
+		.result	= "\xd9\x8e\xfd\x50\x8f\x02\x9f\xee"
+			  "\x78\x08\x12\xec\x09\xaf\x53\x14"
+			  "\x90\x3e\x3d\x76\xad\x71\x21\x08"
+			  "\x77\xe5\x4b\x15\xc2\xe6\xbc\xdb",
+		.rlen	= 32,
+	}, {
+		.key	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
+			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
+		.klen	= 16,
+		.iv	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
+			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66",
+		.assoc	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
+			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
+			  "\x05",
+		.alen	= 17,
+		.input	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
+			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69"
+			  "\xd0",
+		.ilen	= 17,
+		.result	= "\xf3\xe7\x95\x86\xcf\x34\x95\x96"
+			  "\x17\xfe\x1b\xae\x1b\x31\xf2\x1a"
+			  "\xbd\xbc\xc9\x4e\x11\x29\x09\x5c"
+			  "\x05\xd3\xb4\x2e\x4a\x74\x59\x49"
+			  "\x7d",
+		.rlen	= 33,
+	}, {
+		.key	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
+			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
+		.klen	= 16,
+		.iv	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
+			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d",
+		.assoc	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
+			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
+			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
+			  "\x68\x28\x73\x40\x9f\x96\x4a",
+		.alen	= 31,
+		.input	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
+			  "\x10\x57\x85\x39\x93\x8f\xaf\x70"
+			  "\xfa\xa9\xd0\x4d\x5c\x40\x23\xcd"
+			  "\x98\x34\xab\x37\x56\xae\x32",
+		.ilen	= 31,
+		.result	= "\x06\x96\xb2\xbf\x63\xf4\x1e\x24"
+			  "\x0d\x19\x15\x61\x65\x3b\x06\x26"
+			  "\x71\xe8\x7e\x16\xdb\x96\x01\x01"
+			  "\x52\xcd\x49\x5b\x07\x33\x4e\xe7"
+			  "\xaa\x91\xf5\xd5\xc6\xfe\x41\xb5"
+			  "\xed\x90\xce\xb9\xcd\xcc\xa1",
+		.rlen	= 47,
+	}, {
+		.key	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
+			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
+		.klen	= 16,
+		.iv	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
+			  "\x50\xc4\xde\x82\x90\x21\x11\x73",
+		.assoc	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
+			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
+			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
+			  "\x29\x56\x52\x19\x79\xf5\xe9\x37",
+		.alen	= 32,
+		.input	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
+			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76"
+			  "\x24\x6b\xdc\xd1\x61\xe0\xa5\xe7"
+			  "\x5a\x61\x8a\x0f\x30\x0d\xd1\xec",
+		.ilen	= 32,
+		.result	= "\xf9\xd7\xee\x17\xfd\x24\xcd\xf1"
+			  "\xbc\x0f\x35\x97\x97\x0c\x4b\x18"
+			  "\xce\x58\xc8\x3b\xd4\x85\x93\x79"
+			  "\xcc\x9c\xea\xc1\x73\x13\x0b\x4c"
+			  "\xcc\x6f\x28\xf8\xa4\x4e\xb8\x56"
+			  "\x64\x4e\x47\xce\xb2\xb4\x92\xb4",
+		.rlen	= 48,
+	}, {
+		.key	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
+			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
+		.klen	= 16,
+		.iv	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
+			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79",
+		.assoc	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
+			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
+			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
+			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
+			  "\x9d",
+		.alen	= 33,
+		.input	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
+			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c"
+			  "\x4f\x2e\xe8\x55\x66\x80\x27\x00"
+			  "\x1b\x8f\x68\xe7\x0a\x6c\x71\xc3"
+			  "\x21\x78\x55\x9d\x9c\x65\x7b\xcd"
+			  "\x0a\x34\x97\xff\x47\x37\xb0\x2a"
+			  "\x80\x0d\x19\x98\x33\xa9\x7a\xe3"
+			  "\x2e\x4c\xc6\xf3\x8c\x88\x42\x01"
+			  "\xbd",
+		.ilen	= 65,
+		.result	= "\x58\xfa\x3a\x3d\xd9\x88\x63\xe8"
+			  "\xc5\x78\x50\x8b\x4a\xc9\xdf\x7f"
+			  "\x4b\xfa\xc8\x2e\x67\x43\xf3\x63"
+			  "\x42\x8e\x99\x5a\x9c\x0b\x84\x77"
+			  "\xbc\x46\x76\x48\x82\xc7\x57\x96"
+			  "\xe1\x65\xd1\xed\x1d\xdd\x80\x24"
+			  "\xa6\x4d\xa9\xf1\x53\x8b\x5e\x0e"
+			  "\x26\xb9\xcc\x37\xe5\x43\xe1\x5a"
+			  "\x8a\xd6\x8c\x5a\xe4\x95\xd1\x8d"
+			  "\xf7\x33\x64\xc1\xd3\xf2\xfc\x35"
+			  "\x01",
+		.rlen	= 81,
+	}, {
+		.key	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
+			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
+		.klen	= 16,
+		.iv	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
+			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f",
+		.assoc	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
+			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
+			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
+			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
+			  "\xac\xfa\x58\x7f\xe5\x73\x09\x8c"
+			  "\x1d\x40\x87\x8c\xd9\x75\xc0\x55"
+			  "\xa2\xda\x07\xd1\xc2\xa9\xd1\xbb"
+			  "\x09\x4f\x77\x62\x88\x2d\xf2\x68"
+			  "\x54",
+		.alen	= 65,
+		.input	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
+			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82"
+			  "\x79\xf0\xf3\xd9\x6c\x20\xa9\x1a"
+			  "\xdc\xbc\x47\xc0\xe4\xcb\x10\x99"
+			  "\x2f",
+		.ilen	= 33,
+		.result	= "\x4c\xa9\xac\x71\xed\x10\xa6\x24"
+			  "\xb7\xa7\xdf\x8b\xf5\xc2\x41\xcb"
+			  "\x05\xc9\xd6\x97\xb6\x10\x7f\x17"
+			  "\xc2\xc0\x93\xcf\xe0\x94\xfd\x99"
+			  "\xf2\x62\x25\x28\x01\x23\x6f\x8b"
+			  "\x04\x52\xbc\xb0\x3e\x66\x52\x90"
+			  "\x9f",
+		.rlen	= 49,
+	}, {
+		.key	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
+			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
+		.klen	= 16,
+		.iv	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
+			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85",
+		.assoc	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
+			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07",
+		.alen	= 16,
+		.input	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
+			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
+		.ilen	= 16,
+		.result	= "\x6d\xed\x04\x7a\x2f\x0c\x30\xa5"
+			  "\x96\xe6\x97\xe4\x10\xeb\x40\x95"
+			  "\xc5\x9a\xdf\x31\xd5\xa5\xa6\xec"
+			  "\x05\xa8\x31\x50\x11\x19\x44",
+		.rlen	= 31,
+	}, {
+		.key	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
+			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
+		.klen	= 16,
+		.iv	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
+			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c",
+		.assoc	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
+			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d",
+		.alen	= 16,
+		.input	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
+			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
+		.ilen	= 16,
+		.result	= "\x30\x95\x7d\xea\xdc\x62\xc0\x88"
+			  "\xa1\xe3\x8d\x8c\xac\x04\x10\xa7"
+			  "\xfa\xfa\x07\xbd\xa0\xf0\x36\xeb"
+			  "\x21\x93\x2e\x31\x84\x83",
+		.rlen	= 30,
+	}, {
+		.key	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
+			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
+		.klen	= 16,
+		.iv	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
+			  "\xd5\x07\x58\x59\x72\xd7\xde\x92",
+		.assoc	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
+			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13",
+		.alen	= 16,
+		.input	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
+			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
+		.ilen	= 16,
+		.result	= "\x93\xcd\xee\xd4\xcb\x9d\x8d\x16"
+			  "\x63\x0d\x43\xd5\x49\xca\xa8\x85"
+			  "\x49\xc0\xae\x13\xbc\x26\x1d\x4b",
+		.rlen	= 24,
+	},
+};
+
+static const struct aead_testvec aegis128l_dec_tv_template[] = {
+	{
+		.key	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
+			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
+		.klen	= 16,
+		.iv	= "\x1e\x92\x1c\xcf\x88\x3d\x54\x0d"
+			  "\x40\x6d\x59\x48\xfc\x92\x61\x03",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x30\x4f\xf3\xe9\xb1\xfa\x81\xa6"
+			  "\x20\x72\x78\xdd\x93\xc8\x57\xef",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
+			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
+		.klen	= 16,
+		.iv	= "\x5a\xb7\x56\x6e\x98\xb9\xfd\x29"
+			  "\xc1\x47\x0b\xda\xf6\xb6\x23\x09",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xa9\x24\xa0\xb6\x2d\xdd\x29\xdb"
+			  "\x40\xb3\x71\xc5\x22\x58\x31\x77"
+			  "\x6d",
+		.ilen	= 17,
+		.result	= "\x79",
+		.rlen	= 1,
+	}, {
+		.key	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
+			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
+		.klen	= 16,
+		.iv	= "\x97\xdb\x90\x0e\xa8\x35\xa5\x45"
+			  "\x42\x21\xbd\x6b\xf0\xda\xe6\x0f",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xbb\x0a\x53\xc4\xaa\x7e\xa4\x03"
+			  "\x2b\xee\x62\x99\x7b\x98\x13\x1f"
+			  "\xe0\x76\x4c\x2e\x53\x99\x4f\xbe"
+			  "\xe1\xa8\x04\x7f\xe1\x71\xbe",
+		.ilen	= 31,
+		.result	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
+			  "\x82\x8e\x16\xb4\xed\x6d\x47",
+		.rlen	= 15,
+	}, {
+		.key	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
+			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
+		.klen	= 16,
+		.iv	= "\xd3\x00\xc9\xad\xb8\xb0\x4e\x61"
+			  "\xc3\xfb\x6f\xfd\xea\xff\xa9\x15",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x66\xdf\x6e\x71\xc0\x6e\xa4\x4c"
+			  "\x9d\xb7\x8c\x9a\xdb\x1f\xd2\x2e"
+			  "\x23\xb6\xa4\xfb\xd3\x86\xdd\xbb"
+			  "\xde\x54\x9b\xf5\x92\x8b\x93\xc5",
+		.ilen	= 32,
+		.result	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
+			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
+		.rlen	= 16,
+	}, {
+		.key	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
+			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
+		.klen	= 16,
+		.iv	= "\x10\x25\x03\x4c\xc8\x2c\xf7\x7d"
+			  "\x44\xd5\x21\x8e\xe4\x23\x6b\x1c",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x4f\xc3\x69\xb6\xd3\xa4\x64\x8b"
+			  "\x71\xc3\x8a\x91\x22\x4f\x1b\xd2"
+			  "\x33\x6d\x86\xbc\xf8\x2f\x06\xf9"
+			  "\x82\x64\xc7\x72\x00\x30\xfc\xf0"
+			  "\xf8",
+		.ilen	= 33,
+		.result	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
+			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f"
+			  "\xd3",
+		.rlen	= 17,
+	}, {
+		.key	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
+			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
+		.klen	= 16,
+		.iv	= "\x4c\x49\x3d\xec\xd8\xa8\xa0\x98"
+			  "\xc5\xb0\xd3\x1f\xde\x48\x2e\x22",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xe3\x93\x15\xae\x5f\x9d\x3c\xb5"
+			  "\xd6\x9d\xee\xee\xcf\xaa\xaf\xe1"
+			  "\x45\x10\x96\xe0\xbf\x55\x0f\x4c"
+			  "\x1a\xfd\xf4\xda\x4e\x10\xde\xc9"
+			  "\x0e\x6f\xc7\x3c\x49\x94\x41\xfc"
+			  "\x59\x28\x88\x3c\x79\x10\x6b",
+		.ilen	= 47,
+		.result	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
+			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25"
+			  "\xfe\x8d\x45\x19\x1e\xc0\x0b\x99"
+			  "\x88\x11\x39\x12\x1c\x3a\xbb",
+		.rlen	= 31,
+	}, {
+		.key	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
+			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
+		.klen	= 16,
+		.iv	= "\x89\x6e\x77\x8b\xe8\x23\x49\xb4"
+			  "\x45\x8a\x85\xb1\xd8\x6c\xf1\x28",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x1c\x8e\x22\x34\xfd\xab\xe6\x0d"
+			  "\x1c\x9f\x06\x54\x8b\x0b\xb4\x40"
+			  "\xde\x11\x59\x3e\xfd\x74\xf6\x42"
+			  "\x97\x17\xf7\x24\xb6\x7e\xc4\xc6"
+			  "\x06\xa3\x94\xda\x3d\x7f\x55\x0a"
+			  "\x92\x07\x2f\xa6\xf3\x6b\x2c\xfc",
+		.ilen	= 48,
+		.result	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
+			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b"
+			  "\x28\x50\x51\x9d\x24\x60\x8d\xb3"
+			  "\x49\x3e\x17\xea\xf6\x99\x5a\xdd",
+		.rlen	= 32,
+	}, {
+		.key	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
+			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
+		.klen	= 16,
+		.iv	= "\xc5\x93\xb0\x2a\xf8\x9f\xf1\xd0"
+			  "\xc6\x64\x37\x42\xd2\x90\xb3\x2e",
+		.assoc	= "\xd5",
+		.alen	= 1,
+		.input	= "\xa0\x2a\xb4\x9a\x91\x00\x15\xb8"
+			  "\x0f\x9a\x15\x60\x0e\x9b\x13\x8f",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
+			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
+		.klen	= 16,
+		.iv	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
+			  "\x47\x3e\xe9\xd4\xcc\xb5\x76\x34",
+		.assoc	= "\x11\x81\x78\x32\x4d\xb9\x44\x73"
+			  "\x68\x75\x16\xf8\xcb\x7e\xa7",
+		.alen	= 15,
+		.input	= "\x4c\x26\xad\x9c\x14\xfd\x9c\x8c"
+			  "\x84\xfb\x26\xfb\xd5\xca\x62\x39",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
+			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
+		.klen	= 16,
+		.iv	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
+			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b",
+		.assoc	= "\x4e\xa5\xb2\xd1\x5d\x35\xed\x8f"
+			  "\xe8\x4f\xc8\x89\xc5\xa2\x69\xbc",
+		.alen	= 16,
+		.input	= "\x45\x85\x0e\x0f\xf4\xae\x96\xa1"
+			  "\x99\x4d\x6d\xb4\x67\x32\xb0\x3a",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
+			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
+		.klen	= 16,
+		.iv	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
+			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41",
+		.assoc	= "\x8a\xca\xec\x70\x6d\xb1\x96\xab"
+			  "\x69\x29\x7a\x1b\xbf\xc7\x2c\xc2"
+			  "\x07",
+		.alen	= 17,
+		.input	= "\x33\xb1\x42\x97\x8e\x16\x7b\x63"
+			  "\x06\xba\x5b\xcb\xae\x6d\x8b\x56",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
+			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
+		.klen	= 16,
+		.iv	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
+			  "\xca\xcd\xff\x88\xba\x22\xbe\x47",
+		.assoc	= "\xc7\xef\x26\x10\x7d\x2c\x3f\xc6"
+			  "\xea\x03\x2c\xac\xb9\xeb\xef\xc9"
+			  "\x31\x6b\x08\x12\xfc\xd8\x37\x2d"
+			  "\xe0\x17\x3a\x2e\x83\x5c\x8f",
+		.alen	= 31,
+		.input	= "\xda\x44\x08\x8c\x2a\xa5\x07\x35"
+			  "\x0b\x54\x4e\x6d\xe3\xfd\xc4\x5f",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
+			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
+		.klen	= 16,
+		.iv	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
+			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d",
+		.assoc	= "\x03\x14\x5f\xaf\x8d\xa8\xe7\xe2"
+			  "\x6b\xde\xde\x3e\xb3\x10\xb1\xcf"
+			  "\x5c\x2d\x14\x96\x01\x78\xb9\x47"
+			  "\xa1\x44\x19\x06\x5d\xbb\x2e\x2f",
+		.alen	= 32,
+		.input	= "\x1b\xb1\xf1\xa8\x9e\xc2\xb2\x88"
+			  "\x40\x7f\x7b\x19\x7a\x52\x8c\xf0",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
+			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
+		.klen	= 16,
+		.iv	= "\x31\x6f\x0b\xe6\x59\x85\xe6\x77"
+			  "\xcc\x81\x63\xab\xae\x6b\x43\x54",
+		.assoc	= "\x40",
+		.alen	= 1,
+		.input	= "\x6e\xc8\xfb\x15\x9d\x98\x49\xc9"
+			  "\xa0\x98\x09\x85\xbe\x56\x8e\x79"
+			  "\xf4",
+		.ilen	= 17,
+		.result	= "\x4f",
+		.rlen	= 1,
+	}, {
+		.key	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
+			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
+		.klen	= 16,
+		.iv	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
+			  "\x4d\x5b\x15\x3c\xa8\x8f\x06\x5a",
+		.assoc	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
+			  "\x6d\x92\x42\x61\xa7\x58\x37",
+		.alen	= 15,
+		.input	= "\x99\x2e\x84\x50\x64\x5c\xab\x29"
+			  "\x20\xba\xb9\x2f\x62\x3a\xce\x2a"
+			  "\x75\x25\x3b\xe3\x40\xe0\x1d\xfc"
+			  "\x20\x63\x0b\x49\x7e\x97\x08",
+		.ilen	= 31,
+		.result	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
+			  "\x8d\xc8\x6e\x85\xa5\x21\x67",
+		.rlen	= 15,
+	}, {
+		.key	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
+			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
+		.klen	= 16,
+		.iv	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
+			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60",
+		.assoc	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
+			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2",
+		.alen	= 16,
+		.input	= "\xd9\x8e\xfd\x50\x8f\x02\x9f\xee"
+			  "\x78\x08\x12\xec\x09\xaf\x53\x14"
+			  "\x90\x3e\x3d\x76\xad\x71\x21\x08"
+			  "\x77\xe5\x4b\x15\xc2\xe6\xbc\xdb",
+		.ilen	= 32,
+		.result	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
+			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
+		.rlen	= 16,
+	}, {
+		.key	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
+			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
+		.klen	= 16,
+		.iv	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
+			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66",
+		.assoc	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
+			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
+			  "\x05",
+		.alen	= 17,
+		.input	= "\xf3\xe7\x95\x86\xcf\x34\x95\x96"
+			  "\x17\xfe\x1b\xae\x1b\x31\xf2\x1a"
+			  "\xbd\xbc\xc9\x4e\x11\x29\x09\x5c"
+			  "\x05\xd3\xb4\x2e\x4a\x74\x59\x49"
+			  "\x7d",
+		.ilen	= 33,
+		.result	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
+			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69"
+			  "\xd0",
+		.rlen	= 17,
+	}, {
+		.key	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
+			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
+		.klen	= 16,
+		.iv	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
+			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d",
+		.assoc	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
+			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
+			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
+			  "\x68\x28\x73\x40\x9f\x96\x4a",
+		.alen	= 31,
+		.input	= "\x06\x96\xb2\xbf\x63\xf4\x1e\x24"
+			  "\x0d\x19\x15\x61\x65\x3b\x06\x26"
+			  "\x71\xe8\x7e\x16\xdb\x96\x01\x01"
+			  "\x52\xcd\x49\x5b\x07\x33\x4e\xe7"
+			  "\xaa\x91\xf5\xd5\xc6\xfe\x41\xb5"
+			  "\xed\x90\xce\xb9\xcd\xcc\xa1",
+		.ilen	= 47,
+		.result	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
+			  "\x10\x57\x85\x39\x93\x8f\xaf\x70"
+			  "\xfa\xa9\xd0\x4d\x5c\x40\x23\xcd"
+			  "\x98\x34\xab\x37\x56\xae\x32",
+		.rlen	= 31,
+	}, {
+		.key	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
+			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
+		.klen	= 16,
+		.iv	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
+			  "\x50\xc4\xde\x82\x90\x21\x11\x73",
+		.assoc	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
+			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
+			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
+			  "\x29\x56\x52\x19\x79\xf5\xe9\x37",
+		.alen	= 32,
+		.input	= "\xf9\xd7\xee\x17\xfd\x24\xcd\xf1"
+			  "\xbc\x0f\x35\x97\x97\x0c\x4b\x18"
+			  "\xce\x58\xc8\x3b\xd4\x85\x93\x79"
+			  "\xcc\x9c\xea\xc1\x73\x13\x0b\x4c"
+			  "\xcc\x6f\x28\xf8\xa4\x4e\xb8\x56"
+			  "\x64\x4e\x47\xce\xb2\xb4\x92\xb4",
+		.ilen	= 48,
+		.result	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
+			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76"
+			  "\x24\x6b\xdc\xd1\x61\xe0\xa5\xe7"
+			  "\x5a\x61\x8a\x0f\x30\x0d\xd1\xec",
+		.rlen	= 32,
+	}, {
+		.key	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
+			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
+		.klen	= 16,
+		.iv	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
+			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79",
+		.assoc	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
+			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
+			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
+			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
+			  "\x9d",
+		.alen	= 33,
+		.input	= "\x58\xfa\x3a\x3d\xd9\x88\x63\xe8"
+			  "\xc5\x78\x50\x8b\x4a\xc9\xdf\x7f"
+			  "\x4b\xfa\xc8\x2e\x67\x43\xf3\x63"
+			  "\x42\x8e\x99\x5a\x9c\x0b\x84\x77"
+			  "\xbc\x46\x76\x48\x82\xc7\x57\x96"
+			  "\xe1\x65\xd1\xed\x1d\xdd\x80\x24"
+			  "\xa6\x4d\xa9\xf1\x53\x8b\x5e\x0e"
+			  "\x26\xb9\xcc\x37\xe5\x43\xe1\x5a"
+			  "\x8a\xd6\x8c\x5a\xe4\x95\xd1\x8d"
+			  "\xf7\x33\x64\xc1\xd3\xf2\xfc\x35"
+			  "\x01",
+		.ilen	= 81,
+		.result	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
+			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c"
+			  "\x4f\x2e\xe8\x55\x66\x80\x27\x00"
+			  "\x1b\x8f\x68\xe7\x0a\x6c\x71\xc3"
+			  "\x21\x78\x55\x9d\x9c\x65\x7b\xcd"
+			  "\x0a\x34\x97\xff\x47\x37\xb0\x2a"
+			  "\x80\x0d\x19\x98\x33\xa9\x7a\xe3"
+			  "\x2e\x4c\xc6\xf3\x8c\x88\x42\x01"
+			  "\xbd",
+		.rlen	= 65,
+	}, {
+		.key	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
+			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
+		.klen	= 16,
+		.iv	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
+			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f",
+		.assoc	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
+			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
+			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
+			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
+			  "\xac\xfa\x58\x7f\xe5\x73\x09\x8c"
+			  "\x1d\x40\x87\x8c\xd9\x75\xc0\x55"
+			  "\xa2\xda\x07\xd1\xc2\xa9\xd1\xbb"
+			  "\x09\x4f\x77\x62\x88\x2d\xf2\x68"
+			  "\x54",
+		.alen	= 65,
+		.input	= "\x4c\xa9\xac\x71\xed\x10\xa6\x24"
+			  "\xb7\xa7\xdf\x8b\xf5\xc2\x41\xcb"
+			  "\x05\xc9\xd6\x97\xb6\x10\x7f\x17"
+			  "\xc2\xc0\x93\xcf\xe0\x94\xfd\x99"
+			  "\xf2\x62\x25\x28\x01\x23\x6f\x8b"
+			  "\x04\x52\xbc\xb0\x3e\x66\x52\x90"
+			  "\x9f",
+		.ilen	= 49,
+		.result	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
+			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82"
+			  "\x79\xf0\xf3\xd9\x6c\x20\xa9\x1a"
+			  "\xdc\xbc\x47\xc0\xe4\xcb\x10\x99"
+			  "\x2f",
+		.rlen	= 33,
+	}, {
+		.key	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
+			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
+		.klen	= 16,
+		.iv	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
+			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85",
+		.assoc	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
+			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07",
+		.alen	= 16,
+		.input	= "\x6d\xed\x04\x7a\x2f\x0c\x30\xa5"
+			  "\x96\xe6\x97\xe4\x10\xeb\x40\x95"
+			  "\xc5\x9a\xdf\x31\xd5\xa5\xa6\xec"
+			  "\x05\xa8\x31\x50\x11\x19\x44",
+		.ilen	= 31,
+		.result	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
+			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
+		.rlen	= 16,
+	}, {
+		.key	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
+			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
+		.klen	= 16,
+		.iv	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
+			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c",
+		.assoc	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
+			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d",
+		.alen	= 16,
+		.input	= "\x30\x95\x7d\xea\xdc\x62\xc0\x88"
+			  "\xa1\xe3\x8d\x8c\xac\x04\x10\xa7"
+			  "\xfa\xfa\x07\xbd\xa0\xf0\x36\xeb"
+			  "\x21\x93\x2e\x31\x84\x83",
+		.ilen	= 30,
+		.result	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
+			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
+		.rlen	= 16,
+	}, {
+		.key	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
+			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
+		.klen	= 16,
+		.iv	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
+			  "\xd5\x07\x58\x59\x72\xd7\xde\x92",
+		.assoc	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
+			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13",
+		.alen	= 16,
+		.input	= "\x93\xcd\xee\xd4\xcb\x9d\x8d\x16"
+			  "\x63\x0d\x43\xd5\x49\xca\xa8\x85"
+			  "\x49\xc0\xae\x13\xbc\x26\x1d\x4b",
+		.ilen	= 24,
+		.result	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
+			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
+		.rlen	= 16,
+	},
+};
+
+/*
+ * AEGIS-256 test vectors - generated via reference implementation from
+ * SUPERCOP (https://bench.cr.yp.to/supercop.html):
+ *
+ *   https://bench.cr.yp.to/supercop/supercop-20170228.tar.xz
+ *   (see crypto_aead/aegis256/)
+ */
+static const struct aead_testvec aegis256_enc_tv_template[] = {
+	{
+		.key	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
+			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81"
+			  "\xca\xb0\x82\x21\x41\xa8\xe0\x06"
+			  "\x30\x0b\x37\xf6\xb6\x17\xe7\xb5",
+		.klen	= 32,
+		.iv	= "\x1e\x92\x1c\xcf\x88\x3d\x54\x0d"
+			  "\x40\x6d\x59\x48\xfc\x92\x61\x03"
+			  "\x95\x61\x05\x42\x82\x50\xc0\x0c"
+			  "\x60\x16\x6f\xec\x6d\x2f\xcf\x6b",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xd5\x65\x3a\xa9\x03\x51\xd7\xaa"
+			  "\xfa\x4b\xd8\xa2\x41\x9b\xc1\xb2",
+		.rlen	= 16,
+	}, {
+		.key	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
+			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87"
+			  "\xf4\x72\x8e\xa5\x46\x48\x62\x20"
+			  "\xf1\x38\x16\xce\x90\x76\x87\x8c",
+		.klen	= 32,
+		.iv	= "\x5a\xb7\x56\x6e\x98\xb9\xfd\x29"
+			  "\xc1\x47\x0b\xda\xf6\xb6\x23\x09"
+			  "\xbf\x23\x11\xc6\x87\xf0\x42\x26"
+			  "\x22\x44\x4e\xc4\x47\x8e\x6e\x41",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x79",
+		.ilen	= 1,
+		.result	= "\x84\xa2\x8f\xad\xdb\x8d\x2c\x16"
+			  "\x9e\x89\xd9\x06\xa6\xa8\x14\x29"
+			  "\x8b",
+		.rlen	= 17,
+	}, {
+		.key	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
+			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e"
+			  "\x1f\x35\x9a\x29\x4b\xe8\xe4\x39"
+			  "\xb3\x66\xf5\xa6\x6a\xd5\x26\x62",
+		.klen	= 32,
+		.iv	= "\x97\xdb\x90\x0e\xa8\x35\xa5\x45"
+			  "\x42\x21\xbd\x6b\xf0\xda\xe6\x0f"
+			  "\xe9\xe5\x1d\x4a\x8c\x90\xc4\x40"
+			  "\xe3\x71\x2d\x9c\x21\xed\x0e\x18",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
+			  "\x82\x8e\x16\xb4\xed\x6d\x47",
+		.ilen	= 15,
+		.result	= "\x09\x94\x1f\xa6\x13\xc3\x74\x75"
+			  "\x17\xad\x8a\x0e\xd8\x66\x9a\x28"
+			  "\xd7\x30\x66\x09\x2a\xdc\xfa\x2a"
+			  "\x9f\x3b\xd7\xdd\x66\xd1\x2b",
+		.rlen	= 31,
+	}, {
+		.key	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
+			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94"
+			  "\x49\xf7\xa5\xad\x50\x88\x66\x53"
+			  "\x74\x94\xd4\x7f\x44\x34\xc5\x39",
+		.klen	= 32,
+		.iv	= "\xd3\x00\xc9\xad\xb8\xb0\x4e\x61"
+			  "\xc3\xfb\x6f\xfd\xea\xff\xa9\x15"
+			  "\x14\xa8\x28\xce\x92\x30\x46\x59"
+			  "\xa4\x9f\x0b\x75\xfb\x4c\xad\xee",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
+			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
+		.ilen	= 16,
+		.result	= "\x8a\x46\xa2\x22\x8c\x03\xab\x6f"
+			  "\x54\x63\x4e\x7f\xc9\x8e\xfa\x70"
+			  "\x7b\xe5\x8d\x78\xbc\xe9\xb6\xa1"
+			  "\x29\x17\xc8\x3b\x52\xa4\x98\x72",
+		.rlen	= 32,
+	}, {
+		.key	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
+			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a"
+			  "\x74\xb9\xb1\x32\x55\x28\xe8\x6d"
+			  "\x35\xc1\xb3\x57\x1f\x93\x64\x0f",
+		.klen	= 32,
+		.iv	= "\x10\x25\x03\x4c\xc8\x2c\xf7\x7d"
+			  "\x44\xd5\x21\x8e\xe4\x23\x6b\x1c"
+			  "\x3e\x6a\x34\x53\x97\xd0\xc8\x73"
+			  "\x66\xcd\xea\x4d\xd5\xab\x4c\xc5",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
+			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f"
+			  "\xd3",
+		.ilen	= 17,
+		.result	= "\x71\x6b\x37\x0b\x02\x61\x28\x12"
+			  "\x83\xab\x66\x90\x84\xc7\xd1\xc5"
+			  "\xb2\x7a\xb4\x7b\xb4\xfe\x02\xb2"
+			  "\xc0\x00\x39\x13\xb5\x51\x68\x44"
+			  "\xad",
+		.rlen	= 33,
+	}, {
+		.key	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
+			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0"
+			  "\x9e\x7c\xbc\xb6\x5b\xc8\x6a\x86"
+			  "\xf7\xef\x91\x30\xf9\xf2\x04\xe6",
+		.klen	= 32,
+		.iv	= "\x4c\x49\x3d\xec\xd8\xa8\xa0\x98"
+			  "\xc5\xb0\xd3\x1f\xde\x48\x2e\x22"
+			  "\x69\x2c\x3f\xd7\x9c\x70\x4a\x8d"
+			  "\x27\xfa\xc9\x26\xaf\x0a\xeb\x9c",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
+			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25"
+			  "\xfe\x8d\x45\x19\x1e\xc0\x0b\x99"
+			  "\x88\x11\x39\x12\x1c\x3a\xbb",
+		.ilen	= 31,
+		.result	= "\xaf\xa4\x34\x0d\x59\xe6\x1c\x2f"
+			  "\x06\x3b\x52\x18\x49\x75\x1b\xf0"
+			  "\x53\x09\x72\x7b\x45\x79\xe0\xbe"
+			  "\x89\x85\x23\x15\xb8\x79\x07\x4c"
+			  "\x53\x7a\x15\x37\x0a\xee\xb7\xfb"
+			  "\xc4\x1f\x12\x27\xcf\x77\x90",
+		.rlen	= 47,
+	}, {
+		.key	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
+			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6"
+			  "\xc8\x3e\xc8\x3a\x60\x68\xec\xa0"
+			  "\xb8\x1c\x70\x08\xd3\x51\xa3\xbd",
+		.klen	= 32,
+		.iv	= "\x89\x6e\x77\x8b\xe8\x23\x49\xb4"
+			  "\x45\x8a\x85\xb1\xd8\x6c\xf1\x28"
+			  "\x93\xef\x4b\x5b\xa1\x10\xcc\xa6"
+			  "\xe8\x28\xa8\xfe\x89\x69\x8b\x72",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
+			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b"
+			  "\x28\x50\x51\x9d\x24\x60\x8d\xb3"
+			  "\x49\x3e\x17\xea\xf6\x99\x5a\xdd",
+		.ilen	= 32,
+		.result	= "\xe2\xc9\x0b\x33\x31\x02\xb3\xb4"
+			  "\x33\xfe\xeb\xa8\xb7\x9b\xb2\xd7"
+			  "\xeb\x0f\x05\x2b\xba\xb3\xca\xef"
+			  "\xf6\xd1\xb6\xc0\xb9\x9b\x85\xc5"
+			  "\xbf\x7a\x3e\xcc\x31\x76\x09\x80"
+			  "\x32\x5d\xbb\xe8\x38\x0e\x77\xd3",
+		.rlen	= 48,
+	}, {
+		.key	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
+			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad"
+			  "\xf3\x00\xd4\xbf\x65\x08\x6e\xb9"
+			  "\x7a\x4a\x4f\xe0\xad\xb0\x42\x93",
+		.klen	= 32,
+		.iv	= "\xc5\x93\xb0\x2a\xf8\x9f\xf1\xd0"
+			  "\xc6\x64\x37\x42\xd2\x90\xb3\x2e"
+			  "\xbd\xb1\x57\xe0\xa6\xb0\x4e\xc0"
+			  "\xaa\x55\x87\xd6\x63\xc8\x2a\x49",
+		.assoc	= "\xd5",
+		.alen	= 1,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x96\x43\x30\xca\x6c\x4f\xd7\x12"
+			  "\xba\xd9\xb3\x18\x86\xdf\xc3\x52",
+		.rlen	= 16,
+	}, {
+		.key	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
+			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3"
+			  "\x1d\xc3\xdf\x43\x6a\xa8\xf0\xd3"
+			  "\x3b\x77\x2e\xb9\x87\x0f\xe1\x6a",
+		.klen	= 32,
+		.iv	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
+			  "\x47\x3e\xe9\xd4\xcc\xb5\x76\x34"
+			  "\xe8\x73\x62\x64\xab\x50\xd0\xda"
+			  "\x6b\x83\x66\xaf\x3e\x27\xc9\x1f",
+		.assoc	= "\x11\x81\x78\x32\x4d\xb9\x44\x73"
+			  "\x68\x75\x16\xf8\xcb\x7e\xa7",
+		.alen	= 15,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x2f\xab\x45\xe2\xa7\x46\xc5\x83"
+			  "\x11\x9f\xb0\x74\xee\xc7\x03\xdd",
+		.rlen	= 16,
+	}, {
+		.key	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
+			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9"
+			  "\x47\x85\xeb\xc7\x6f\x48\x72\xed"
+			  "\xfc\xa5\x0d\x91\x61\x6e\x81\x40",
+		.klen	= 32,
+		.iv	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
+			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b"
+			  "\x12\x35\x6e\xe8\xb0\xf0\x52\xf3"
+			  "\x2d\xb0\x45\x87\x18\x86\x68\xf6",
+		.assoc	= "\x4e\xa5\xb2\xd1\x5d\x35\xed\x8f"
+			  "\xe8\x4f\xc8\x89\xc5\xa2\x69\xbc",
+		.alen	= 16,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x16\x44\x73\x33\x5d\xf2\xb9\x04"
+			  "\x6b\x79\x98\xef\xdb\xd5\xc5\xf1",
+		.rlen	= 16,
+	}, {
+		.key	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
+			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf"
+			  "\x72\x47\xf6\x4b\x74\xe8\xf4\x06"
+			  "\xbe\xd3\xec\x6a\x3b\xcd\x20\x17",
+		.klen	= 32,
+		.iv	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
+			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41"
+			  "\x3c\xf8\x79\x6c\xb6\x90\xd4\x0d"
+			  "\xee\xde\x23\x60\xf2\xe5\x08\xcc",
+		.assoc	= "\x8a\xca\xec\x70\x6d\xb1\x96\xab"
+			  "\x69\x29\x7a\x1b\xbf\xc7\x2c\xc2"
+			  "\x07",
+		.alen	= 17,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xa4\x9b\xb8\x47\xc0\xed\x7a\x45"
+			  "\x98\x54\x8c\xed\x3d\x17\xf0\xdd",
+		.rlen	= 16,
+	}, {
+		.key	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
+			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6"
+			  "\x9c\x0a\x02\xd0\x79\x88\x76\x20"
+			  "\x7f\x00\xca\x42\x15\x2c\xbf\xed",
+		.klen	= 32,
+		.iv	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
+			  "\xca\xcd\xff\x88\xba\x22\xbe\x47"
+			  "\x67\xba\x85\xf1\xbb\x30\x56\x26"
+			  "\xaf\x0b\x02\x38\xcc\x44\xa7\xa3",
+		.assoc	= "\xc7\xef\x26\x10\x7d\x2c\x3f\xc6"
+			  "\xea\x03\x2c\xac\xb9\xeb\xef\xc9"
+			  "\x31\x6b\x08\x12\xfc\xd8\x37\x2d"
+			  "\xe0\x17\x3a\x2e\x83\x5c\x8f",
+		.alen	= 31,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x20\x24\xe2\x33\x5c\x60\xc9\xf0"
+			  "\xa4\x96\x2f\x0d\x53\xc2\xf8\xfc",
+		.rlen	= 16,
+	}, {
+		.key	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
+			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc"
+			  "\xc6\xcc\x0e\x54\x7f\x28\xf8\x3a"
+			  "\x40\x2e\xa9\x1a\xf0\x8b\x5e\xc4",
+		.klen	= 32,
+		.iv	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
+			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d"
+			  "\x91\x7c\x91\x75\xc0\xd0\xd8\x40"
+			  "\x71\x39\xe1\x10\xa6\xa3\x46\x7a",
+		.assoc	= "\x03\x14\x5f\xaf\x8d\xa8\xe7\xe2"
+			  "\x6b\xde\xde\x3e\xb3\x10\xb1\xcf"
+			  "\x5c\x2d\x14\x96\x01\x78\xb9\x47"
+			  "\xa1\x44\x19\x06\x5d\xbb\x2e\x2f",
+		.alen	= 32,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x6f\x4a\xb9\xe0\xff\x51\xa3\xf1"
+			  "\xd2\x64\x3e\x66\x6a\xb2\x03\xc0",
+		.rlen	= 16,
+	}, {
+		.key	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
+			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2"
+			  "\xf1\x8e\x19\xd8\x84\xc8\x7a\x53"
+			  "\x02\x5b\x88\xf3\xca\xea\xfe\x9b",
+		.klen	= 32,
+		.iv	= "\x31\x6f\x0b\xe6\x59\x85\xe6\x77"
+			  "\xcc\x81\x63\xab\xae\x6b\x43\x54"
+			  "\xbb\x3f\x9c\xf9\xc5\x70\x5a\x5a"
+			  "\x32\x67\xc0\xe9\x80\x02\xe5\x50",
+		.assoc	= "\x40",
+		.alen	= 1,
+		.input	= "\x4f",
+		.ilen	= 1,
+		.result	= "\x2c\xfb\xad\x7e\xbe\xa0\x9a\x5b"
+			  "\x7a\x3f\x81\xf7\xfc\x1b\x79\x83"
+			  "\xc7",
+		.rlen	= 17,
+	}, {
+		.key	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
+			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8"
+			  "\x1b\x50\x25\x5d\x89\x68\xfc\x6d"
+			  "\xc3\x89\x67\xcb\xa4\x49\x9d\x71",
+		.klen	= 32,
+		.iv	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
+			  "\x4d\x5b\x15\x3c\xa8\x8f\x06\x5a"
+			  "\xe6\x01\xa8\x7e\xca\x10\xdc\x73"
+			  "\xf4\x94\x9f\xc1\x5a\x61\x85\x27",
+		.assoc	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
+			  "\x6d\x92\x42\x61\xa7\x58\x37",
+		.alen	= 15,
+		.input	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
+			  "\x8d\xc8\x6e\x85\xa5\x21\x67",
+		.ilen	= 15,
+		.result	= "\x1f\x7f\xca\x3c\x2b\xe7\x27\xba"
+			  "\x7e\x98\x83\x02\x34\x23\xf7\x94"
+			  "\xde\x35\xe6\x1d\x14\x18\xe5\x38"
+			  "\x14\x80\x6a\xa7\x1b\xae\x1d",
+		.rlen	= 31,
+	}, {
+		.key	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
+			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf"
+			  "\x46\x13\x31\xe1\x8e\x08\x7e\x87"
+			  "\x85\xb6\x46\xa3\x7e\xa8\x3c\x48",
+		.klen	= 32,
+		.iv	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
+			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60"
+			  "\x10\xc3\xb3\x02\xcf\xb0\x5e\x8d"
+			  "\xb5\xc2\x7e\x9a\x35\xc0\x24\xfd",
+		.assoc	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
+			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2",
+		.alen	= 16,
+		.input	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
+			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
+		.ilen	= 16,
+		.result	= "\x05\x86\x9e\xd7\x2b\xa3\x97\x01"
+			  "\xbe\x28\x98\x10\x6f\xe9\x61\x32"
+			  "\x96\xbb\xb1\x2e\x8f\x0c\x44\xb9"
+			  "\x46\x2d\x55\xe3\x42\x67\xf2\xaf",
+		.rlen	= 32,
+	}, {
+		.key	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
+			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5"
+			  "\x70\xd5\x3c\x65\x93\xa8\x00\xa0"
+			  "\x46\xe4\x25\x7c\x58\x08\xdb\x1e",
+		.klen	= 32,
+		.iv	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
+			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66"
+			  "\x3b\x86\xbf\x86\xd4\x50\xe0\xa7"
+			  "\x76\xef\x5c\x72\x0f\x1f\xc3\xd4",
+		.assoc	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
+			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
+			  "\x05",
+		.alen	= 17,
+		.input	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
+			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69"
+			  "\xd0",
+		.ilen	= 17,
+		.result	= "\x9c\xe0\x06\x7b\x86\xcf\x2e\xd8"
+			  "\x45\x65\x1b\x72\x9b\xaa\xa3\x1e"
+			  "\x87\x9d\x26\xdf\xff\x81\x11\xd2"
+			  "\x47\x41\xb9\x24\xc1\x8a\xa3\x8b"
+			  "\x55",
+		.rlen	= 33,
+	}, {
+		.key	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
+			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb"
+			  "\x9a\x97\x48\xe9\x98\x48\x82\xba"
+			  "\x07\x11\x04\x54\x32\x67\x7b\xf5",
+		.klen	= 32,
+		.iv	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
+			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d"
+			  "\x65\x48\xcb\x0a\xda\xf0\x62\xc0"
+			  "\x38\x1d\x3b\x4a\xe9\x7e\x62\xaa",
+		.assoc	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
+			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
+			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
+			  "\x68\x28\x73\x40\x9f\x96\x4a",
+		.alen	= 31,
+		.input	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
+			  "\x10\x57\x85\x39\x93\x8f\xaf\x70"
+			  "\xfa\xa9\xd0\x4d\x5c\x40\x23\xcd"
+			  "\x98\x34\xab\x37\x56\xae\x32",
+		.ilen	= 31,
+		.result	= "\xa0\xc8\xde\x83\x0d\xc3\x4e\xd5"
+			  "\x69\x7f\x7a\xdd\x8c\x46\xda\xba"
+			  "\x0a\x5c\x0e\x7f\xac\xee\x02\xd2"
+			  "\xe5\x4b\x0a\xba\xb8\xa4\x7b\x66"
+			  "\xde\xae\xdb\xc2\xc0\x0b\xf7\x2b"
+			  "\xdf\xb8\xea\xd8\xa9\x38\xed",
+		.rlen	= 47,
+	}, {
+		.key	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
+			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1"
+			  "\xc5\x5a\x53\x6e\x9d\xe8\x04\xd4"
+			  "\xc9\x3f\xe2\x2d\x0c\xc6\x1a\xcb",
+		.klen	= 32,
+		.iv	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
+			  "\x50\xc4\xde\x82\x90\x21\x11\x73"
+			  "\x8f\x0a\xd6\x8f\xdf\x90\xe4\xda"
+			  "\xf9\x4a\x1a\x23\xc3\xdd\x02\x81",
+		.assoc	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
+			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
+			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
+			  "\x29\x56\x52\x19\x79\xf5\xe9\x37",
+		.alen	= 32,
+		.input	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
+			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76"
+			  "\x24\x6b\xdc\xd1\x61\xe0\xa5\xe7"
+			  "\x5a\x61\x8a\x0f\x30\x0d\xd1\xec",
+		.ilen	= 32,
+		.result	= "\xd3\x68\x14\x70\x3c\x01\x43\x86"
+			  "\x02\xab\xbe\x75\xaa\xe7\xf5\x53"
+			  "\x5c\x05\xbd\x9b\x19\xbb\x2a\x61"
+			  "\x8f\x69\x05\x75\x8e\xca\x60\x0c"
+			  "\x5b\xa2\x48\x61\x32\x74\x11\x2b"
+			  "\xf6\xcf\x06\x78\x6f\x78\x1a\x4a",
+		.rlen	= 48,
+	}, {
+		.key	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
+			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7"
+			  "\xef\x1c\x5f\xf2\xa3\x88\x86\xed"
+			  "\x8a\x6d\xc1\x05\xe7\x25\xb9\xa2",
+		.klen	= 32,
+		.iv	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
+			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79"
+			  "\xba\xcd\xe2\x13\xe4\x30\x66\xf4"
+			  "\xba\x78\xf9\xfb\x9d\x3c\xa1\x58",
+		.assoc	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
+			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
+			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
+			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
+			  "\x9d",
+		.alen	= 33,
+		.input	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
+			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c"
+			  "\x4f\x2e\xe8\x55\x66\x80\x27\x00"
+			  "\x1b\x8f\x68\xe7\x0a\x6c\x71\xc3"
+			  "\x21\x78\x55\x9d\x9c\x65\x7b\xcd"
+			  "\x0a\x34\x97\xff\x47\x37\xb0\x2a"
+			  "\x80\x0d\x19\x98\x33\xa9\x7a\xe3"
+			  "\x2e\x4c\xc6\xf3\x8c\x88\x42\x01"
+			  "\xbd",
+		.ilen	= 65,
+		.result	= "\x07\x0a\x35\xb0\x82\x03\x5a\xd2"
+			  "\x15\x3a\x6c\x72\x83\x9b\xb1\x75"
+			  "\xea\xf2\xfc\xff\xc6\xf1\x13\xa4"
+			  "\x1a\x93\x33\x79\x97\x82\x81\xc0"
+			  "\x96\xc2\x00\xab\x39\xae\xa1\x62"
+			  "\x53\xa3\x86\xc9\x07\x8c\xaf\x22"
+			  "\x47\x31\x29\xca\x4a\x95\xf5\xd5"
+			  "\x20\x63\x5a\x54\x80\x2c\x4a\x63"
+			  "\xfb\x18\x73\x31\x4f\x08\x21\x5d"
+			  "\x20\xe9\xc3\x7e\xea\x25\x77\x3a"
+			  "\x65",
+		.rlen	= 81,
+	}, {
+		.key	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
+			  "\x32\x42\x15\x80\x85\xa1\x65\xfe"
+			  "\x19\xde\x6b\x76\xa8\x28\x08\x07"
+			  "\x4b\x9a\xa0\xdd\xc1\x84\x58\x79",
+		.klen	= 32,
+		.iv	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
+			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f"
+			  "\xe4\x8f\xed\x97\xe9\xd0\xe8\x0d"
+			  "\x7c\xa6\xd8\xd4\x77\x9b\x40\x2e",
+		.assoc	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
+			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
+			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
+			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
+			  "\xac\xfa\x58\x7f\xe5\x73\x09\x8c"
+			  "\x1d\x40\x87\x8c\xd9\x75\xc0\x55"
+			  "\xa2\xda\x07\xd1\xc2\xa9\xd1\xbb"
+			  "\x09\x4f\x77\x62\x88\x2d\xf2\x68"
+			  "\x54",
+		.alen	= 65,
+		.input	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
+			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82"
+			  "\x79\xf0\xf3\xd9\x6c\x20\xa9\x1a"
+			  "\xdc\xbc\x47\xc0\xe4\xcb\x10\x99"
+			  "\x2f",
+		.ilen	= 33,
+		.result	= "\x33\xc1\xda\xfa\x15\x21\x07\x8e"
+			  "\x93\x68\xea\x64\x7b\x3d\x4b\x6b"
+			  "\x71\x5e\x5e\x6b\x92\xaa\x65\xc2"
+			  "\x7a\x2a\xc1\xa9\x0a\xa1\x24\x81"
+			  "\x26\x3a\x5a\x09\xe8\xce\x73\x72"
+			  "\xde\x7b\x58\x9e\x85\xb9\xa4\x28"
+			  "\xda",
+		.rlen	= 49,
+	}, {
+		.key	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
+			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04"
+			  "\x44\xa1\x76\xfb\xad\xc8\x8a\x21"
+			  "\x0d\xc8\x7f\xb6\x9b\xe3\xf8\x4f",
+		.klen	= 32,
+		.iv	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
+			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85"
+			  "\x0e\x51\xf9\x1c\xee\x70\x6a\x27"
+			  "\x3d\xd3\xb7\xac\x51\xfa\xdf\x05",
+		.assoc	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
+			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07",
+		.alen	= 16,
+		.input	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
+			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
+		.ilen	= 16,
+		.result	= "\x3e\xf8\x86\x3d\x39\xf8\x96\x02"
+			  "\x0f\xdf\xc9\x6e\x37\x1e\x57\x99"
+			  "\x07\x2a\x1a\xac\xd1\xda\xfd\x3b"
+			  "\xc7\xff\xbd\xbc\x85\x09\x0b",
+		.rlen	= 31,
+	}, {
+		.key	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
+			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a"
+			  "\x6e\x63\x82\x7f\xb2\x68\x0c\x3a"
+			  "\xce\xf5\x5e\x8e\x75\x42\x97\x26",
+		.klen	= 32,
+		.iv	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
+			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c"
+			  "\x39\x14\x05\xa0\xf3\x10\xec\x41"
+			  "\xff\x01\x95\x84\x2b\x59\x7f\xdb",
+		.assoc	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
+			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d",
+		.alen	= 16,
+		.input	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
+			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
+		.ilen	= 16,
+		.result	= "\x2f\xc4\xd8\x0d\xa6\x07\xef\x2e"
+			  "\x6c\xd9\x84\x63\x70\x97\x61\x37"
+			  "\x08\x2f\x16\x90\x9e\x62\x30\x0d"
+			  "\x62\xd5\xc8\xf0\x46\x1a",
+		.rlen	= 30,
+	}, {
+		.key	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
+			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10"
+			  "\x98\x25\x8d\x03\xb7\x08\x8e\x54"
+			  "\x90\x23\x3d\x67\x4f\xa1\x36\xfc",
+		.klen	= 32,
+		.iv	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
+			  "\xd5\x07\x58\x59\x72\xd7\xde\x92"
+			  "\x63\xd6\x10\x24\xf8\xb0\x6e\x5a"
+			  "\xc0\x2e\x74\x5d\x06\xb8\x1e\xb2",
+		.assoc	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
+			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13",
+		.alen	= 16,
+		.input	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
+			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
+		.ilen	= 16,
+		.result	= "\xce\xf3\x17\x87\x49\xc2\x00\x46"
+			  "\xc6\x12\x5c\x8f\x81\x38\xaa\x55"
+			  "\xf8\x67\x75\xf1\x75\xe3\x2a\x24",
+		.rlen	= 24,
+	},
+};
+
+static const struct aead_testvec aegis256_dec_tv_template[] = {
+	{
+		.key	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
+			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81"
+			  "\xca\xb0\x82\x21\x41\xa8\xe0\x06"
+			  "\x30\x0b\x37\xf6\xb6\x17\xe7\xb5",
+		.klen	= 32,
+		.iv	= "\x1e\x92\x1c\xcf\x88\x3d\x54\x0d"
+			  "\x40\x6d\x59\x48\xfc\x92\x61\x03"
+			  "\x95\x61\x05\x42\x82\x50\xc0\x0c"
+			  "\x60\x16\x6f\xec\x6d\x2f\xcf\x6b",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xd5\x65\x3a\xa9\x03\x51\xd7\xaa"
+			  "\xfa\x4b\xd8\xa2\x41\x9b\xc1\xb2",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
+			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87"
+			  "\xf4\x72\x8e\xa5\x46\x48\x62\x20"
+			  "\xf1\x38\x16\xce\x90\x76\x87\x8c",
+		.klen	= 32,
+		.iv	= "\x5a\xb7\x56\x6e\x98\xb9\xfd\x29"
+			  "\xc1\x47\x0b\xda\xf6\xb6\x23\x09"
+			  "\xbf\x23\x11\xc6\x87\xf0\x42\x26"
+			  "\x22\x44\x4e\xc4\x47\x8e\x6e\x41",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x84\xa2\x8f\xad\xdb\x8d\x2c\x16"
+			  "\x9e\x89\xd9\x06\xa6\xa8\x14\x29"
+			  "\x8b",
+		.ilen	= 17,
+		.result	= "\x79",
+		.rlen	= 1,
+	}, {
+		.key	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
+			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e"
+			  "\x1f\x35\x9a\x29\x4b\xe8\xe4\x39"
+			  "\xb3\x66\xf5\xa6\x6a\xd5\x26\x62",
+		.klen	= 32,
+		.iv	= "\x97\xdb\x90\x0e\xa8\x35\xa5\x45"
+			  "\x42\x21\xbd\x6b\xf0\xda\xe6\x0f"
+			  "\xe9\xe5\x1d\x4a\x8c\x90\xc4\x40"
+			  "\xe3\x71\x2d\x9c\x21\xed\x0e\x18",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x09\x94\x1f\xa6\x13\xc3\x74\x75"
+			  "\x17\xad\x8a\x0e\xd8\x66\x9a\x28"
+			  "\xd7\x30\x66\x09\x2a\xdc\xfa\x2a"
+			  "\x9f\x3b\xd7\xdd\x66\xd1\x2b",
+		.ilen	= 31,
+		.result	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
+			  "\x82\x8e\x16\xb4\xed\x6d\x47",
+		.rlen	= 15,
+	}, {
+		.key	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
+			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94"
+			  "\x49\xf7\xa5\xad\x50\x88\x66\x53"
+			  "\x74\x94\xd4\x7f\x44\x34\xc5\x39",
+		.klen	= 32,
+		.iv	= "\xd3\x00\xc9\xad\xb8\xb0\x4e\x61"
+			  "\xc3\xfb\x6f\xfd\xea\xff\xa9\x15"
+			  "\x14\xa8\x28\xce\x92\x30\x46\x59"
+			  "\xa4\x9f\x0b\x75\xfb\x4c\xad\xee",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x8a\x46\xa2\x22\x8c\x03\xab\x6f"
+			  "\x54\x63\x4e\x7f\xc9\x8e\xfa\x70"
+			  "\x7b\xe5\x8d\x78\xbc\xe9\xb6\xa1"
+			  "\x29\x17\xc8\x3b\x52\xa4\x98\x72",
+		.ilen	= 32,
+		.result	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
+			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
+		.rlen	= 16,
+	}, {
+		.key	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
+			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a"
+			  "\x74\xb9\xb1\x32\x55\x28\xe8\x6d"
+			  "\x35\xc1\xb3\x57\x1f\x93\x64\x0f",
+		.klen	= 32,
+		.iv	= "\x10\x25\x03\x4c\xc8\x2c\xf7\x7d"
+			  "\x44\xd5\x21\x8e\xe4\x23\x6b\x1c"
+			  "\x3e\x6a\x34\x53\x97\xd0\xc8\x73"
+			  "\x66\xcd\xea\x4d\xd5\xab\x4c\xc5",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x71\x6b\x37\x0b\x02\x61\x28\x12"
+			  "\x83\xab\x66\x90\x84\xc7\xd1\xc5"
+			  "\xb2\x7a\xb4\x7b\xb4\xfe\x02\xb2"
+			  "\xc0\x00\x39\x13\xb5\x51\x68\x44"
+			  "\xad",
+		.ilen	= 33,
+		.result	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
+			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f"
+			  "\xd3",
+		.rlen	= 17,
+	}, {
+		.key	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
+			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0"
+			  "\x9e\x7c\xbc\xb6\x5b\xc8\x6a\x86"
+			  "\xf7\xef\x91\x30\xf9\xf2\x04\xe6",
+		.klen	= 32,
+		.iv	= "\x4c\x49\x3d\xec\xd8\xa8\xa0\x98"
+			  "\xc5\xb0\xd3\x1f\xde\x48\x2e\x22"
+			  "\x69\x2c\x3f\xd7\x9c\x70\x4a\x8d"
+			  "\x27\xfa\xc9\x26\xaf\x0a\xeb\x9c",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xaf\xa4\x34\x0d\x59\xe6\x1c\x2f"
+			  "\x06\x3b\x52\x18\x49\x75\x1b\xf0"
+			  "\x53\x09\x72\x7b\x45\x79\xe0\xbe"
+			  "\x89\x85\x23\x15\xb8\x79\x07\x4c"
+			  "\x53\x7a\x15\x37\x0a\xee\xb7\xfb"
+			  "\xc4\x1f\x12\x27\xcf\x77\x90",
+		.ilen	= 47,
+		.result	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
+			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25"
+			  "\xfe\x8d\x45\x19\x1e\xc0\x0b\x99"
+			  "\x88\x11\x39\x12\x1c\x3a\xbb",
+		.rlen	= 31,
+	}, {
+		.key	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
+			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6"
+			  "\xc8\x3e\xc8\x3a\x60\x68\xec\xa0"
+			  "\xb8\x1c\x70\x08\xd3\x51\xa3\xbd",
+		.klen	= 32,
+		.iv	= "\x89\x6e\x77\x8b\xe8\x23\x49\xb4"
+			  "\x45\x8a\x85\xb1\xd8\x6c\xf1\x28"
+			  "\x93\xef\x4b\x5b\xa1\x10\xcc\xa6"
+			  "\xe8\x28\xa8\xfe\x89\x69\x8b\x72",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xe2\xc9\x0b\x33\x31\x02\xb3\xb4"
+			  "\x33\xfe\xeb\xa8\xb7\x9b\xb2\xd7"
+			  "\xeb\x0f\x05\x2b\xba\xb3\xca\xef"
+			  "\xf6\xd1\xb6\xc0\xb9\x9b\x85\xc5"
+			  "\xbf\x7a\x3e\xcc\x31\x76\x09\x80"
+			  "\x32\x5d\xbb\xe8\x38\x0e\x77\xd3",
+		.ilen	= 48,
+		.result	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
+			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b"
+			  "\x28\x50\x51\x9d\x24\x60\x8d\xb3"
+			  "\x49\x3e\x17\xea\xf6\x99\x5a\xdd",
+		.rlen	= 32,
+	}, {
+		.key	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
+			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad"
+			  "\xf3\x00\xd4\xbf\x65\x08\x6e\xb9"
+			  "\x7a\x4a\x4f\xe0\xad\xb0\x42\x93",
+		.klen	= 32,
+		.iv	= "\xc5\x93\xb0\x2a\xf8\x9f\xf1\xd0"
+			  "\xc6\x64\x37\x42\xd2\x90\xb3\x2e"
+			  "\xbd\xb1\x57\xe0\xa6\xb0\x4e\xc0"
+			  "\xaa\x55\x87\xd6\x63\xc8\x2a\x49",
+		.assoc	= "\xd5",
+		.alen	= 1,
+		.input	= "\x96\x43\x30\xca\x6c\x4f\xd7\x12"
+			  "\xba\xd9\xb3\x18\x86\xdf\xc3\x52",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
+			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3"
+			  "\x1d\xc3\xdf\x43\x6a\xa8\xf0\xd3"
+			  "\x3b\x77\x2e\xb9\x87\x0f\xe1\x6a",
+		.klen	= 32,
+		.iv	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
+			  "\x47\x3e\xe9\xd4\xcc\xb5\x76\x34"
+			  "\xe8\x73\x62\x64\xab\x50\xd0\xda"
+			  "\x6b\x83\x66\xaf\x3e\x27\xc9\x1f",
+		.assoc	= "\x11\x81\x78\x32\x4d\xb9\x44\x73"
+			  "\x68\x75\x16\xf8\xcb\x7e\xa7",
+		.alen	= 15,
+		.input	= "\x2f\xab\x45\xe2\xa7\x46\xc5\x83"
+			  "\x11\x9f\xb0\x74\xee\xc7\x03\xdd",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
+			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9"
+			  "\x47\x85\xeb\xc7\x6f\x48\x72\xed"
+			  "\xfc\xa5\x0d\x91\x61\x6e\x81\x40",
+		.klen	= 32,
+		.iv	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
+			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b"
+			  "\x12\x35\x6e\xe8\xb0\xf0\x52\xf3"
+			  "\x2d\xb0\x45\x87\x18\x86\x68\xf6",
+		.assoc	= "\x4e\xa5\xb2\xd1\x5d\x35\xed\x8f"
+			  "\xe8\x4f\xc8\x89\xc5\xa2\x69\xbc",
+		.alen	= 16,
+		.input	= "\x16\x44\x73\x33\x5d\xf2\xb9\x04"
+			  "\x6b\x79\x98\xef\xdb\xd5\xc5\xf1",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
+			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf"
+			  "\x72\x47\xf6\x4b\x74\xe8\xf4\x06"
+			  "\xbe\xd3\xec\x6a\x3b\xcd\x20\x17",
+		.klen	= 32,
+		.iv	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
+			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41"
+			  "\x3c\xf8\x79\x6c\xb6\x90\xd4\x0d"
+			  "\xee\xde\x23\x60\xf2\xe5\x08\xcc",
+		.assoc	= "\x8a\xca\xec\x70\x6d\xb1\x96\xab"
+			  "\x69\x29\x7a\x1b\xbf\xc7\x2c\xc2"
+			  "\x07",
+		.alen	= 17,
+		.input	= "\xa4\x9b\xb8\x47\xc0\xed\x7a\x45"
+			  "\x98\x54\x8c\xed\x3d\x17\xf0\xdd",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
+			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6"
+			  "\x9c\x0a\x02\xd0\x79\x88\x76\x20"
+			  "\x7f\x00\xca\x42\x15\x2c\xbf\xed",
+		.klen	= 32,
+		.iv	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
+			  "\xca\xcd\xff\x88\xba\x22\xbe\x47"
+			  "\x67\xba\x85\xf1\xbb\x30\x56\x26"
+			  "\xaf\x0b\x02\x38\xcc\x44\xa7\xa3",
+		.assoc	= "\xc7\xef\x26\x10\x7d\x2c\x3f\xc6"
+			  "\xea\x03\x2c\xac\xb9\xeb\xef\xc9"
+			  "\x31\x6b\x08\x12\xfc\xd8\x37\x2d"
+			  "\xe0\x17\x3a\x2e\x83\x5c\x8f",
+		.alen	= 31,
+		.input	= "\x20\x24\xe2\x33\x5c\x60\xc9\xf0"
+			  "\xa4\x96\x2f\x0d\x53\xc2\xf8\xfc",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
+			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc"
+			  "\xc6\xcc\x0e\x54\x7f\x28\xf8\x3a"
+			  "\x40\x2e\xa9\x1a\xf0\x8b\x5e\xc4",
+		.klen	= 32,
+		.iv	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
+			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d"
+			  "\x91\x7c\x91\x75\xc0\xd0\xd8\x40"
+			  "\x71\x39\xe1\x10\xa6\xa3\x46\x7a",
+		.assoc	= "\x03\x14\x5f\xaf\x8d\xa8\xe7\xe2"
+			  "\x6b\xde\xde\x3e\xb3\x10\xb1\xcf"
+			  "\x5c\x2d\x14\x96\x01\x78\xb9\x47"
+			  "\xa1\x44\x19\x06\x5d\xbb\x2e\x2f",
+		.alen	= 32,
+		.input	= "\x6f\x4a\xb9\xe0\xff\x51\xa3\xf1"
+			  "\xd2\x64\x3e\x66\x6a\xb2\x03\xc0",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
+			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2"
+			  "\xf1\x8e\x19\xd8\x84\xc8\x7a\x53"
+			  "\x02\x5b\x88\xf3\xca\xea\xfe\x9b",
+		.klen	= 32,
+		.iv	= "\x31\x6f\x0b\xe6\x59\x85\xe6\x77"
+			  "\xcc\x81\x63\xab\xae\x6b\x43\x54"
+			  "\xbb\x3f\x9c\xf9\xc5\x70\x5a\x5a"
+			  "\x32\x67\xc0\xe9\x80\x02\xe5\x50",
+		.assoc	= "\x40",
+		.alen	= 1,
+		.input	= "\x2c\xfb\xad\x7e\xbe\xa0\x9a\x5b"
+			  "\x7a\x3f\x81\xf7\xfc\x1b\x79\x83"
+			  "\xc7",
+		.ilen	= 17,
+		.result	= "\x4f",
+		.rlen	= 1,
+	}, {
+		.key	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
+			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8"
+			  "\x1b\x50\x25\x5d\x89\x68\xfc\x6d"
+			  "\xc3\x89\x67\xcb\xa4\x49\x9d\x71",
+		.klen	= 32,
+		.iv	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
+			  "\x4d\x5b\x15\x3c\xa8\x8f\x06\x5a"
+			  "\xe6\x01\xa8\x7e\xca\x10\xdc\x73"
+			  "\xf4\x94\x9f\xc1\x5a\x61\x85\x27",
+		.assoc	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
+			  "\x6d\x92\x42\x61\xa7\x58\x37",
+		.alen	= 15,
+		.input	= "\x1f\x7f\xca\x3c\x2b\xe7\x27\xba"
+			  "\x7e\x98\x83\x02\x34\x23\xf7\x94"
+			  "\xde\x35\xe6\x1d\x14\x18\xe5\x38"
+			  "\x14\x80\x6a\xa7\x1b\xae\x1d",
+		.ilen	= 31,
+		.result	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
+			  "\x8d\xc8\x6e\x85\xa5\x21\x67",
+		.rlen	= 15,
+	}, {
+		.key	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
+			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf"
+			  "\x46\x13\x31\xe1\x8e\x08\x7e\x87"
+			  "\x85\xb6\x46\xa3\x7e\xa8\x3c\x48",
+		.klen	= 32,
+		.iv	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
+			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60"
+			  "\x10\xc3\xb3\x02\xcf\xb0\x5e\x8d"
+			  "\xb5\xc2\x7e\x9a\x35\xc0\x24\xfd",
+		.assoc	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
+			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2",
+		.alen	= 16,
+		.input	= "\x05\x86\x9e\xd7\x2b\xa3\x97\x01"
+			  "\xbe\x28\x98\x10\x6f\xe9\x61\x32"
+			  "\x96\xbb\xb1\x2e\x8f\x0c\x44\xb9"
+			  "\x46\x2d\x55\xe3\x42\x67\xf2\xaf",
+		.ilen	= 32,
+		.result	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
+			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
+		.rlen	= 16,
+	}, {
+		.key	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
+			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5"
+			  "\x70\xd5\x3c\x65\x93\xa8\x00\xa0"
+			  "\x46\xe4\x25\x7c\x58\x08\xdb\x1e",
+		.klen	= 32,
+		.iv	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
+			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66"
+			  "\x3b\x86\xbf\x86\xd4\x50\xe0\xa7"
+			  "\x76\xef\x5c\x72\x0f\x1f\xc3\xd4",
+		.assoc	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
+			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
+			  "\x05",
+		.alen	= 17,
+		.input	= "\x9c\xe0\x06\x7b\x86\xcf\x2e\xd8"
+			  "\x45\x65\x1b\x72\x9b\xaa\xa3\x1e"
+			  "\x87\x9d\x26\xdf\xff\x81\x11\xd2"
+			  "\x47\x41\xb9\x24\xc1\x8a\xa3\x8b"
+			  "\x55",
+		.ilen	= 33,
+		.result	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
+			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69"
+			  "\xd0",
+		.rlen	= 17,
+	}, {
+		.key	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
+			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb"
+			  "\x9a\x97\x48\xe9\x98\x48\x82\xba"
+			  "\x07\x11\x04\x54\x32\x67\x7b\xf5",
+		.klen	= 32,
+		.iv	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
+			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d"
+			  "\x65\x48\xcb\x0a\xda\xf0\x62\xc0"
+			  "\x38\x1d\x3b\x4a\xe9\x7e\x62\xaa",
+		.assoc	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
+			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
+			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
+			  "\x68\x28\x73\x40\x9f\x96\x4a",
+		.alen	= 31,
+		.input	= "\xa0\xc8\xde\x83\x0d\xc3\x4e\xd5"
+			  "\x69\x7f\x7a\xdd\x8c\x46\xda\xba"
+			  "\x0a\x5c\x0e\x7f\xac\xee\x02\xd2"
+			  "\xe5\x4b\x0a\xba\xb8\xa4\x7b\x66"
+			  "\xde\xae\xdb\xc2\xc0\x0b\xf7\x2b"
+			  "\xdf\xb8\xea\xd8\xa9\x38\xed",
+		.ilen	= 47,
+		.result	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
+			  "\x10\x57\x85\x39\x93\x8f\xaf\x70"
+			  "\xfa\xa9\xd0\x4d\x5c\x40\x23\xcd"
+			  "\x98\x34\xab\x37\x56\xae\x32",
+		.rlen	= 31,
+	}, {
+		.key	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
+			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1"
+			  "\xc5\x5a\x53\x6e\x9d\xe8\x04\xd4"
+			  "\xc9\x3f\xe2\x2d\x0c\xc6\x1a\xcb",
+		.klen	= 32,
+		.iv	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
+			  "\x50\xc4\xde\x82\x90\x21\x11\x73"
+			  "\x8f\x0a\xd6\x8f\xdf\x90\xe4\xda"
+			  "\xf9\x4a\x1a\x23\xc3\xdd\x02\x81",
+		.assoc	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
+			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
+			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
+			  "\x29\x56\x52\x19\x79\xf5\xe9\x37",
+		.alen	= 32,
+		.input	= "\xd3\x68\x14\x70\x3c\x01\x43\x86"
+			  "\x02\xab\xbe\x75\xaa\xe7\xf5\x53"
+			  "\x5c\x05\xbd\x9b\x19\xbb\x2a\x61"
+			  "\x8f\x69\x05\x75\x8e\xca\x60\x0c"
+			  "\x5b\xa2\x48\x61\x32\x74\x11\x2b"
+			  "\xf6\xcf\x06\x78\x6f\x78\x1a\x4a",
+		.ilen	= 48,
+		.result	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
+			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76"
+			  "\x24\x6b\xdc\xd1\x61\xe0\xa5\xe7"
+			  "\x5a\x61\x8a\x0f\x30\x0d\xd1\xec",
+		.rlen	= 32,
+	}, {
+		.key	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
+			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7"
+			  "\xef\x1c\x5f\xf2\xa3\x88\x86\xed"
+			  "\x8a\x6d\xc1\x05\xe7\x25\xb9\xa2",
+		.klen	= 32,
+		.iv	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
+			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79"
+			  "\xba\xcd\xe2\x13\xe4\x30\x66\xf4"
+			  "\xba\x78\xf9\xfb\x9d\x3c\xa1\x58",
+		.assoc	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
+			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
+			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
+			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
+			  "\x9d",
+		.alen	= 33,
+		.input	= "\x07\x0a\x35\xb0\x82\x03\x5a\xd2"
+			  "\x15\x3a\x6c\x72\x83\x9b\xb1\x75"
+			  "\xea\xf2\xfc\xff\xc6\xf1\x13\xa4"
+			  "\x1a\x93\x33\x79\x97\x82\x81\xc0"
+			  "\x96\xc2\x00\xab\x39\xae\xa1\x62"
+			  "\x53\xa3\x86\xc9\x07\x8c\xaf\x22"
+			  "\x47\x31\x29\xca\x4a\x95\xf5\xd5"
+			  "\x20\x63\x5a\x54\x80\x2c\x4a\x63"
+			  "\xfb\x18\x73\x31\x4f\x08\x21\x5d"
+			  "\x20\xe9\xc3\x7e\xea\x25\x77\x3a"
+			  "\x65",
+		.ilen	= 81,
+		.result	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
+			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c"
+			  "\x4f\x2e\xe8\x55\x66\x80\x27\x00"
+			  "\x1b\x8f\x68\xe7\x0a\x6c\x71\xc3"
+			  "\x21\x78\x55\x9d\x9c\x65\x7b\xcd"
+			  "\x0a\x34\x97\xff\x47\x37\xb0\x2a"
+			  "\x80\x0d\x19\x98\x33\xa9\x7a\xe3"
+			  "\x2e\x4c\xc6\xf3\x8c\x88\x42\x01"
+			  "\xbd",
+		.rlen	= 65,
+	}, {
+		.key	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
+			  "\x32\x42\x15\x80\x85\xa1\x65\xfe"
+			  "\x19\xde\x6b\x76\xa8\x28\x08\x07"
+			  "\x4b\x9a\xa0\xdd\xc1\x84\x58\x79",
+		.klen	= 32,
+		.iv	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
+			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f"
+			  "\xe4\x8f\xed\x97\xe9\xd0\xe8\x0d"
+			  "\x7c\xa6\xd8\xd4\x77\x9b\x40\x2e",
+		.assoc	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
+			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
+			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
+			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
+			  "\xac\xfa\x58\x7f\xe5\x73\x09\x8c"
+			  "\x1d\x40\x87\x8c\xd9\x75\xc0\x55"
+			  "\xa2\xda\x07\xd1\xc2\xa9\xd1\xbb"
+			  "\x09\x4f\x77\x62\x88\x2d\xf2\x68"
+			  "\x54",
+		.alen	= 65,
+		.input	= "\x33\xc1\xda\xfa\x15\x21\x07\x8e"
+			  "\x93\x68\xea\x64\x7b\x3d\x4b\x6b"
+			  "\x71\x5e\x5e\x6b\x92\xaa\x65\xc2"
+			  "\x7a\x2a\xc1\xa9\x0a\xa1\x24\x81"
+			  "\x26\x3a\x5a\x09\xe8\xce\x73\x72"
+			  "\xde\x7b\x58\x9e\x85\xb9\xa4\x28"
+			  "\xda",
+		.ilen	= 49,
+		.result	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
+			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82"
+			  "\x79\xf0\xf3\xd9\x6c\x20\xa9\x1a"
+			  "\xdc\xbc\x47\xc0\xe4\xcb\x10\x99"
+			  "\x2f",
+		.rlen	= 33,
+	}, {
+		.key	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
+			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04"
+			  "\x44\xa1\x76\xfb\xad\xc8\x8a\x21"
+			  "\x0d\xc8\x7f\xb6\x9b\xe3\xf8\x4f",
+		.klen	= 32,
+		.iv	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
+			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85"
+			  "\x0e\x51\xf9\x1c\xee\x70\x6a\x27"
+			  "\x3d\xd3\xb7\xac\x51\xfa\xdf\x05",
+		.assoc	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
+			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07",
+		.alen	= 16,
+		.input	= "\x3e\xf8\x86\x3d\x39\xf8\x96\x02"
+			  "\x0f\xdf\xc9\x6e\x37\x1e\x57\x99"
+			  "\x07\x2a\x1a\xac\xd1\xda\xfd\x3b"
+			  "\xc7\xff\xbd\xbc\x85\x09\x0b",
+		.ilen	= 31,
+		.result	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
+			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
+		.rlen	= 16,
+	}, {
+		.key	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
+			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a"
+			  "\x6e\x63\x82\x7f\xb2\x68\x0c\x3a"
+			  "\xce\xf5\x5e\x8e\x75\x42\x97\x26",
+		.klen	= 32,
+		.iv	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
+			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c"
+			  "\x39\x14\x05\xa0\xf3\x10\xec\x41"
+			  "\xff\x01\x95\x84\x2b\x59\x7f\xdb",
+		.assoc	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
+			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d",
+		.alen	= 16,
+		.input	= "\x2f\xc4\xd8\x0d\xa6\x07\xef\x2e"
+			  "\x6c\xd9\x84\x63\x70\x97\x61\x37"
+			  "\x08\x2f\x16\x90\x9e\x62\x30\x0d"
+			  "\x62\xd5\xc8\xf0\x46\x1a",
+		.ilen	= 30,
+		.result	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
+			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
+		.rlen	= 16,
+	}, {
+		.key	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
+			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10"
+			  "\x98\x25\x8d\x03\xb7\x08\x8e\x54"
+			  "\x90\x23\x3d\x67\x4f\xa1\x36\xfc",
+		.klen	= 32,
+		.iv	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
+			  "\xd5\x07\x58\x59\x72\xd7\xde\x92"
+			  "\x63\xd6\x10\x24\xf8\xb0\x6e\x5a"
+			  "\xc0\x2e\x74\x5d\x06\xb8\x1e\xb2",
+		.assoc	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
+			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13",
+		.alen	= 16,
+		.input	= "\xce\xf3\x17\x87\x49\xc2\x00\x46"
+			  "\xc6\x12\x5c\x8f\x81\x38\xaa\x55"
+			  "\xf8\x67\x75\xf1\x75\xe3\x2a\x24",
+		.ilen	= 24,
+		.result	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
+			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
+		.rlen	= 16,
+	},
+};
+
+/*
+ * MORUS-640 test vectors - generated via reference implementation from
+ * SUPERCOP (https://bench.cr.yp.to/supercop.html):
+ *
+ *   https://bench.cr.yp.to/supercop/supercop-20170228.tar.xz
+ *   (see crypto_aead/morus640128v2/)
+ */
+static const struct aead_testvec morus640_enc_tv_template[] = {
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 16,
+		.iv	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
+			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x89\x62\x7d\xf3\x07\x9d\x52\x05"
+			  "\x53\xc3\x04\x60\x93\xb4\x37\x9a",
+		.rlen	= 16,
+	}, {
+		.key	= "\x3c\x24\x39\x9f\x10\x7b\xa8\x1b"
+			  "\x80\xda\xb2\x91\xf9\x24\xc2\x06",
+		.klen	= 16,
+		.iv	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
+			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x69",
+		.ilen	= 1,
+		.result	= "\xa8\x8d\xe4\x90\xb5\x50\x8f\x78"
+			  "\xb6\x10\x9a\x59\x5f\x61\x37\x70"
+			  "\x09",
+		.rlen	= 17,
+	}, {
+		.key	= "\x79\x49\x73\x3e\x20\xf7\x51\x37"
+			  "\x01\xb4\x64\x22\xf3\x48\x85\x0c",
+		.klen	= 16,
+		.iv	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
+			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xa6\xa4\x1e\x76\xec\xd4\x50\xcc"
+			  "\x62\x58\xe9\x8f\xef\xa4\x17",
+		.ilen	= 15,
+		.result	= "\x76\xdd\xb9\x05\x3d\xce\x61\x38"
+			  "\xf3\xef\xf7\xe5\xd7\xfd\x70\xa5"
+			  "\xcf\x9d\x64\xb8\x0a\x9f\xfd\x8b"
+			  "\xd4\x6e\xfe\xd9\xc8\x63\x4b",
+		.rlen	= 31,
+	}, {
+		.key	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
+			  "\x82\x8e\x16\xb4\xed\x6d\x47\x12",
+		.klen	= 16,
+		.iv	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
+			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xe2\xc9\x58\x15\xfc\x4f\xf8\xe8"
+			  "\xe3\x32\x9b\x21\xe9\xc8\xd9\x97",
+		.ilen	= 16,
+		.result	= "\xdc\x72\xe8\x14\xfb\x63\xad\x72"
+			  "\x1f\x57\x9a\x1f\x88\x81\xdb\xd6"
+			  "\xc1\x91\x9d\xb9\x25\xc4\x99\x4c"
+			  "\x97\xcd\x8a\x0c\x9d\x68\x00\x1c",
+		.rlen	= 32,
+	}, {
+		.key	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
+			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
+		.klen	= 16,
+		.iv	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
+			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x1f\xee\x92\xb4\x0c\xcb\xa1\x04"
+			  "\x64\x0c\x4d\xb2\xe3\xec\x9c\x9d"
+			  "\x09",
+		.ilen	= 17,
+		.result	= "\x6b\x4f\x3b\x90\x9a\xa2\xb3\x82"
+			  "\x0a\xb8\x55\xee\xeb\x73\x4d\x7f"
+			  "\x54\x11\x3a\x8a\x31\xa3\xb5\xf2"
+			  "\xcd\x49\xdb\xf3\xee\x26\xbd\xa2"
+			  "\x0d",
+		.rlen	= 33,
+	}, {
+		.key	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
+			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f",
+		.klen	= 16,
+		.iv	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
+			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x5c\x13\xcb\x54\x1c\x47\x4a\x1f"
+			  "\xe5\xe6\xff\x44\xdd\x11\x5f\xa3"
+			  "\x33\xdd\xc2\xf8\xdd\x18\x2b\x93"
+			  "\x57\x05\x01\x1c\x66\x22\xd3",
+		.ilen	= 31,
+		.result	= "\x59\xd1\x0f\x6b\xee\x27\x84\x92"
+			  "\xb7\xa9\xb5\xdd\x02\xa4\x12\xa5"
+			  "\x50\x32\xb4\x9a\x2e\x35\x83\x55"
+			  "\x36\x12\x12\xed\xa3\x31\xc5\x30"
+			  "\xa7\xe2\x4a\x6d\x05\x59\x43\x91"
+			  "\x75\xfa\x6c\x17\xc6\x73\xca",
+		.rlen	= 47,
+	}, {
+		.key	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
+			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25",
+		.klen	= 16,
+		.iv	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
+			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x98\x37\x05\xf3\x2c\xc2\xf3\x3b"
+			  "\x66\xc0\xb1\xd5\xd7\x35\x21\xaa"
+			  "\x5d\x9f\xce\x7c\xe2\xb8\xad\xad"
+			  "\x19\x33\xe0\xf4\x40\x81\x72\x28",
+		.ilen	= 32,
+		.result	= "\xdb\x49\x68\x0f\x91\x5b\x21\xb1"
+			  "\xcf\x50\xb2\x4c\x32\xe1\xa6\x69"
+			  "\xc0\xfb\x44\x1f\xa0\x9a\xeb\x39"
+			  "\x1b\xde\x68\x38\xcc\x27\x52\xc5"
+			  "\xf6\x3e\x74\xea\x66\x5b\x5f\x0c"
+			  "\x65\x9e\x58\xe6\x52\xa2\xfe\x59",
+		.rlen	= 48,
+	}, {
+		.key	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
+			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b",
+		.klen	= 16,
+		.iv	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
+			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
+		.assoc	= "\xc5",
+		.alen	= 1,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x56\xe7\x24\x52\xdd\x95\x60\x5b"
+			  "\x09\x48\x39\x69\x9c\xb3\x62\x46",
+		.rlen	= 16,
+	}, {
+		.key	= "\xe4\x25\xcd\xfa\x80\xdd\x46\xde"
+			  "\x07\xd1\x90\x8b\xcf\x23\x15\x31",
+		.klen	= 16,
+		.iv	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
+			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
+		.assoc	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
+			  "\x47\x3e\xe9\xd4\xcc\xb5\x76",
+		.alen	= 15,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xdd\xfa\x6c\x1f\x5d\x86\x87\x01"
+			  "\x13\xe5\x73\x46\x46\xf2\x5c\xe1",
+		.rlen	= 16,
+	}, {
+		.key	= "\x20\x4a\x07\x99\x91\x58\xee\xfa"
+			  "\x88\xab\x42\x1c\xc9\x47\xd7\x38",
+		.klen	= 16,
+		.iv	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
+			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
+		.assoc	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
+			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b",
+		.alen	= 16,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xa6\x1b\xb9\xd7\x5e\x3c\xcf\xac"
+			  "\xa9\x21\x45\x0b\x16\x52\xf7\xe1",
+		.rlen	= 16,
+	}, {
+		.key	= "\x5d\x6f\x41\x39\xa1\xd4\x97\x16"
+			  "\x09\x85\xf4\xae\xc3\x6b\x9a\x3e",
+		.klen	= 16,
+		.iv	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
+			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
+		.assoc	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
+			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41"
+			  "\x3c",
+		.alen	= 17,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x15\xff\xde\x3b\x34\xfc\xf6\xf9"
+			  "\xbb\xa8\x62\xad\x0a\xf5\x48\x60",
+		.rlen	= 16,
+	}, {
+		.key	= "\x99\x93\x7a\xd8\xb1\x50\x40\x31"
+			  "\x8a\x60\xa6\x3f\xbd\x90\x5d\x44",
+		.klen	= 16,
+		.iv	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
+			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
+		.assoc	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
+			  "\xca\xcd\xff\x88\xba\x22\xbe\x47"
+			  "\x67\xba\x85\xf1\xbb\x30\x56\x26"
+			  "\xaf\x0b\x02\x38\xcc\x44\xa7",
+		.alen	= 31,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xd2\x9d\xf8\x3b\xd7\x84\xe9\x2d"
+			  "\x4b\xef\x75\x16\x0a\x99\xae\x6b",
+		.rlen	= 16,
+	}, {
+		.key	= "\xd6\xb8\xb4\x77\xc1\xcb\xe9\x4d"
+			  "\x0a\x3a\x58\xd1\xb7\xb4\x1f\x4a",
+		.klen	= 16,
+		.iv	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
+			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
+		.assoc	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
+			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d"
+			  "\x91\x7c\x91\x75\xc0\xd0\xd8\x40"
+			  "\x71\x39\xe1\x10\xa6\xa3\x46\x7a",
+		.alen	= 32,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xe4\x8d\xa7\xa7\x45\xc1\x31\x4f"
+			  "\xce\xfb\xaf\xd6\xc2\xe6\xee\xc0",
+		.rlen	= 16,
+	}, {
+		.key	= "\x12\xdd\xee\x17\xd1\x47\x92\x69"
+			  "\x8b\x14\x0a\x62\xb1\xd9\xe2\x50",
+		.klen	= 16,
+		.iv	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
+			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
+		.assoc	= "\x31",
+		.alen	= 1,
+		.input	= "\x40",
+		.ilen	= 1,
+		.result	= "\xe2\x67\x38\x4f\xb9\xad\x7d\x38"
+			  "\x01\xfe\x84\x14\x85\xf8\xd1\xe3"
+			  "\x22",
+		.rlen	= 17,
+	}, {
+		.key	= "\x4f\x01\x27\xb6\xe1\xc3\x3a\x85"
+			  "\x0c\xee\xbc\xf4\xab\xfd\xa5\x57",
+		.klen	= 16,
+		.iv	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
+			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
+		.assoc	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
+			  "\x4d\x5b\x15\x3c\xa8\x8f\x06",
+		.alen	= 15,
+		.input	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
+			  "\x6d\x92\x42\x61\xa7\x58\x37",
+		.ilen	= 15,
+		.result	= "\x77\x32\x61\xeb\xb4\x33\x29\x92"
+			  "\x29\x95\xc5\x8e\x85\x76\xab\xfc"
+			  "\x07\x95\xa7\x44\x74\xf7\x22\xff"
+			  "\xd8\xd8\x36\x3d\x8a\x7f\x9e",
+		.rlen	= 31,
+	}, {
+		.key	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
+			  "\x8d\xc8\x6e\x85\xa5\x21\x67\x5d",
+		.klen	= 16,
+		.iv	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
+			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
+		.assoc	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
+			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60",
+		.alen	= 16,
+		.input	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
+			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2",
+		.ilen	= 16,
+		.result	= "\xd8\xfd\x44\x45\xf6\x42\x12\x38"
+			  "\xf2\x0b\xea\x4f\x9e\x11\x61\x07"
+			  "\x48\x67\x98\x18\x9b\xd0\x0c\x59"
+			  "\x67\xa4\x11\xb3\x2b\xd6\xc1\x70",
+		.rlen	= 32,
+	}, {
+		.key	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
+			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
+		.klen	= 16,
+		.iv	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
+			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
+		.assoc	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
+			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66"
+			  "\x3b",
+		.alen	= 17,
+		.input	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
+			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
+			  "\x05",
+		.ilen	= 17,
+		.result	= "\xb1\xab\x53\x4e\xc7\x40\x16\xb6"
+			  "\x71\x3a\x00\x9f\x41\x88\xb0\xb2"
+			  "\x71\x83\x85\x5f\xc8\x79\x0a\x99"
+			  "\x99\xdc\x89\x1c\x88\xd2\x3e\xf9"
+			  "\x83",
+		.rlen	= 33,
+	}, {
+		.key	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
+			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69",
+		.klen	= 16,
+		.iv	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
+			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
+		.assoc	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
+			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d"
+			  "\x65\x48\xcb\x0a\xda\xf0\x62\xc0"
+			  "\x38\x1d\x3b\x4a\xe9\x7e\x62",
+		.alen	= 31,
+		.input	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
+			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
+			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
+			  "\x68\x28\x73\x40\x9f\x96\x4a",
+		.ilen	= 31,
+		.result	= "\x29\xc4\xf0\x03\xc1\x86\xdf\x06"
+			  "\x5c\x7b\xef\x64\x87\x00\xd1\x37"
+			  "\xa7\x08\xbc\x7f\x8f\x41\x54\xd0"
+			  "\x3e\xf1\xc3\xa2\x96\x84\xdd\x2a"
+			  "\x2d\x21\x30\xf9\x02\xdb\x06\x0c"
+			  "\xf1\x5a\x66\x69\xe0\xca\x83",
+		.rlen	= 47,
+	}, {
+		.key	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
+			  "\x10\x57\x85\x39\x93\x8f\xaf\x70",
+		.klen	= 16,
+		.iv	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
+			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
+		.assoc	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
+			  "\x50\xc4\xde\x82\x90\x21\x11\x73"
+			  "\x8f\x0a\xd6\x8f\xdf\x90\xe4\xda"
+			  "\xf9\x4a\x1a\x23\xc3\xdd\x02\x81",
+		.alen	= 32,
+		.input	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
+			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
+			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
+			  "\x29\x56\x52\x19\x79\xf5\xe9\x37",
+		.ilen	= 32,
+		.result	= "\xe2\x2e\x44\xdf\xd3\x60\x6d\xb2"
+			  "\x70\x57\x37\xc5\xc2\x4f\x8d\x14"
+			  "\xc6\xbf\x8b\xec\xf5\x62\x67\xf2"
+			  "\x2f\xa1\xe6\xd6\xa7\xb1\x8c\x54"
+			  "\xe5\x6b\x49\xf9\x6e\x90\xc3\xaa"
+			  "\x7a\x00\x2e\x4d\x7f\x31\x2e\x81",
+		.rlen	= 48,
+	}, {
+		.key	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
+			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76",
+		.klen	= 16,
+		.iv	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
+			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
+		.assoc	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
+			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79"
+			  "\xba\xcd\xe2\x13\xe4\x30\x66\xf4"
+			  "\xba\x78\xf9\xfb\x9d\x3c\xa1\x58"
+			  "\x1a",
+		.alen	= 33,
+		.input	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
+			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
+			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
+			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
+			  "\x9d\x4d\x54\x51\x84\x61\xf6\x8e"
+			  "\x03\x31\xf2\x25\x16\xcc\xaa\xc6"
+			  "\x75\x73\x20\x30\x59\x54\xb2\xf0"
+			  "\x3a\x4b\xe0\x23\x8e\xa6\x08\x35"
+			  "\x8a",
+		.ilen	= 65,
+		.result	= "\xc7\xca\x26\x61\x57\xee\xa2\xb9"
+			  "\xb1\x37\xde\x95\x06\x90\x11\x08"
+			  "\x4d\x30\x9f\x24\xc0\x56\xb7\xe1"
+			  "\x0b\x9f\xd2\x57\xe9\xd2\xb1\x76"
+			  "\x56\x9a\xb4\x58\xc5\x08\xfc\xb5"
+			  "\xf2\x31\x9b\xc9\xcd\xb3\x64\xdb"
+			  "\x6f\x50\xbf\xf4\x73\x9d\xfb\x6b"
+			  "\xef\x35\x25\x48\xed\xcf\x29\xa8"
+			  "\xac\xc3\xb9\xcb\x61\x8f\x73\x92"
+			  "\x2c\x7a\x6f\xda\xf9\x09\x6f\xe1"
+			  "\xc4",
+		.rlen	= 81,
+	}, {
+		.key	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
+			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c",
+		.klen	= 16,
+		.iv	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
+			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
+		.assoc	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
+			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f"
+			  "\xe4\x8f\xed\x97\xe9\xd0\xe8\x0d"
+			  "\x7c\xa6\xd8\xd4\x77\x9b\x40\x2e"
+			  "\x28\xce\x57\x34\xcd\x6e\x84\x4c"
+			  "\x17\x3c\xe1\xb2\xa8\x0b\xbb\xf1"
+			  "\x96\x41\x0d\x69\xe8\x54\x0a\xc8"
+			  "\x15\x4e\x91\x92\x89\x4b\xb7\x9b"
+			  "\x21",
+		.alen	= 65,
+		.input	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
+			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
+			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
+			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
+			  "\xac",
+		.ilen	= 33,
+		.result	= "\x57\xcd\x3d\x46\xc5\xf9\x68\x3b"
+			  "\x2c\x0f\xb4\x7e\x7b\x64\x3e\x40"
+			  "\xf3\x78\x63\x34\x89\x79\x39\x6b"
+			  "\x61\x64\x4a\x9a\xfa\x70\xa4\xd3"
+			  "\x54\x0b\xea\x05\xa6\x95\x64\xed"
+			  "\x3d\x69\xa2\x0c\x27\x56\x2f\x34"
+			  "\x66",
+		.rlen	= 49,
+	}, {
+		.key	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
+			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82",
+		.klen	= 16,
+		.iv	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
+			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
+		.assoc	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
+			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85",
+		.alen	= 16,
+		.input	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
+			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07",
+		.ilen	= 16,
+		.result	= "\xfc\x85\x06\x28\x8f\xe8\x23\x1f"
+			  "\x33\x98\x87\xde\x08\xb6\xb6\xae"
+			  "\x3e\xa4\xf8\x19\xf1\x92\x60\x39"
+			  "\xb9\x6b\x3f\xdf\xc8\xcb\x30",
+		.rlen	= 31,
+	}, {
+		.key	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
+			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
+		.klen	= 16,
+		.iv	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
+			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
+		.assoc	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
+			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c",
+		.alen	= 16,
+		.input	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
+			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d",
+		.ilen	= 16,
+		.result	= "\x74\x7d\x70\x07\xe9\xba\x01\xee"
+			  "\x6c\xc6\x6f\x50\x25\x33\xbe\x50"
+			  "\x17\xb8\x17\x62\xed\x80\xa2\xf5"
+			  "\x03\xde\x85\x71\x5d\x34",
+		.rlen	= 30,
+	}, {
+		.key	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
+			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
+		.klen	= 16,
+		.iv	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
+			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
+		.assoc	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
+			  "\xd5\x07\x58\x59\x72\xd7\xde\x92",
+		.alen	= 16,
+		.input	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
+			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13",
+		.ilen	= 16,
+		.result	= "\xf4\xb3\x85\xf9\xac\xde\xb1\x38"
+			  "\x29\xfd\x6c\x7c\x49\xe5\x1d\xaf"
+			  "\xba\xea\xd4\xfa\x3f\x11\x33\x98",
+		.rlen	= 24,
+	}, {
+		.key	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
+			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
+		.klen	= 16,
+		.iv	= "\xbb\x3a\xf7\x57\xc6\x36\x7c\x22"
+			  "\x36\xab\xde\xc6\x6d\x32\x70\x17",
+		.assoc	= "\xcb\x03\x85\xbf\x0a\xd5\x26\xa9"
+			  "\x56\xe1\x0a\xeb\x6c\xfb\xa1\x98",
+		.alen	= 16,
+		.input	= "\xda\xcc\x14\x27\x4e\x74\xd1\x30"
+			  "\x76\x18\x37\x0f\x6a\xc4\xd1\x1a",
+		.ilen	= 16,
+		.result	= "\xe6\x5c\x49\x4f\x78\xf3\x62\x86"
+			  "\xe1\xb7\xa5\xc3\x32\x88\x3c\x8c"
+			  "\x6e",
+		.rlen	= 17,
+	},
+};
+
+static const struct aead_testvec morus640_dec_tv_template[] = {
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 16,
+		.iv	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
+			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x89\x62\x7d\xf3\x07\x9d\x52\x05"
+			  "\x53\xc3\x04\x60\x93\xb4\x37\x9a",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x3c\x24\x39\x9f\x10\x7b\xa8\x1b"
+			  "\x80\xda\xb2\x91\xf9\x24\xc2\x06",
+		.klen	= 16,
+		.iv	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
+			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xa8\x8d\xe4\x90\xb5\x50\x8f\x78"
+			  "\xb6\x10\x9a\x59\x5f\x61\x37\x70"
+			  "\x09",
+		.ilen	= 17,
+		.result	= "\x69",
+		.rlen	= 1,
+	}, {
+		.key	= "\x79\x49\x73\x3e\x20\xf7\x51\x37"
+			  "\x01\xb4\x64\x22\xf3\x48\x85\x0c",
+		.klen	= 16,
+		.iv	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
+			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x76\xdd\xb9\x05\x3d\xce\x61\x38"
+			  "\xf3\xef\xf7\xe5\xd7\xfd\x70\xa5"
+			  "\xcf\x9d\x64\xb8\x0a\x9f\xfd\x8b"
+			  "\xd4\x6e\xfe\xd9\xc8\x63\x4b",
+		.ilen	= 31,
+		.result	= "\xa6\xa4\x1e\x76\xec\xd4\x50\xcc"
+			  "\x62\x58\xe9\x8f\xef\xa4\x17",
+		.rlen	= 15,
+	}, {
+		.key	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
+			  "\x82\x8e\x16\xb4\xed\x6d\x47\x12",
+		.klen	= 16,
+		.iv	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
+			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xdc\x72\xe8\x14\xfb\x63\xad\x72"
+			  "\x1f\x57\x9a\x1f\x88\x81\xdb\xd6"
+			  "\xc1\x91\x9d\xb9\x25\xc4\x99\x4c"
+			  "\x97\xcd\x8a\x0c\x9d\x68\x00\x1c",
+		.ilen	= 32,
+		.result	= "\xe2\xc9\x58\x15\xfc\x4f\xf8\xe8"
+			  "\xe3\x32\x9b\x21\xe9\xc8\xd9\x97",
+		.rlen	= 16,
+	}, {
+		.key	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
+			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
+		.klen	= 16,
+		.iv	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
+			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x6b\x4f\x3b\x90\x9a\xa2\xb3\x82"
+			  "\x0a\xb8\x55\xee\xeb\x73\x4d\x7f"
+			  "\x54\x11\x3a\x8a\x31\xa3\xb5\xf2"
+			  "\xcd\x49\xdb\xf3\xee\x26\xbd\xa2"
+			  "\x0d",
+		.ilen	= 33,
+		.result	= "\x1f\xee\x92\xb4\x0c\xcb\xa1\x04"
+			  "\x64\x0c\x4d\xb2\xe3\xec\x9c\x9d"
+			  "\x09",
+		.rlen	= 17,
+	}, {
+		.key	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
+			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f",
+		.klen	= 16,
+		.iv	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
+			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x59\xd1\x0f\x6b\xee\x27\x84\x92"
+			  "\xb7\xa9\xb5\xdd\x02\xa4\x12\xa5"
+			  "\x50\x32\xb4\x9a\x2e\x35\x83\x55"
+			  "\x36\x12\x12\xed\xa3\x31\xc5\x30"
+			  "\xa7\xe2\x4a\x6d\x05\x59\x43\x91"
+			  "\x75\xfa\x6c\x17\xc6\x73\xca",
+		.ilen	= 47,
+		.result	= "\x5c\x13\xcb\x54\x1c\x47\x4a\x1f"
+			  "\xe5\xe6\xff\x44\xdd\x11\x5f\xa3"
+			  "\x33\xdd\xc2\xf8\xdd\x18\x2b\x93"
+			  "\x57\x05\x01\x1c\x66\x22\xd3",
+		.rlen	= 31,
+	}, {
+		.key	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
+			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25",
+		.klen	= 16,
+		.iv	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
+			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xdb\x49\x68\x0f\x91\x5b\x21\xb1"
+			  "\xcf\x50\xb2\x4c\x32\xe1\xa6\x69"
+			  "\xc0\xfb\x44\x1f\xa0\x9a\xeb\x39"
+			  "\x1b\xde\x68\x38\xcc\x27\x52\xc5"
+			  "\xf6\x3e\x74\xea\x66\x5b\x5f\x0c"
+			  "\x65\x9e\x58\xe6\x52\xa2\xfe\x59",
+		.ilen	= 48,
+		.result	= "\x98\x37\x05\xf3\x2c\xc2\xf3\x3b"
+			  "\x66\xc0\xb1\xd5\xd7\x35\x21\xaa"
+			  "\x5d\x9f\xce\x7c\xe2\xb8\xad\xad"
+			  "\x19\x33\xe0\xf4\x40\x81\x72\x28",
+		.rlen	= 32,
+	}, {
+		.key	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
+			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b",
+		.klen	= 16,
+		.iv	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
+			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
+		.assoc	= "\xc5",
+		.alen	= 1,
+		.input	= "\x56\xe7\x24\x52\xdd\x95\x60\x5b"
+			  "\x09\x48\x39\x69\x9c\xb3\x62\x46",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xe4\x25\xcd\xfa\x80\xdd\x46\xde"
+			  "\x07\xd1\x90\x8b\xcf\x23\x15\x31",
+		.klen	= 16,
+		.iv	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
+			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
+		.assoc	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
+			  "\x47\x3e\xe9\xd4\xcc\xb5\x76",
+		.alen	= 15,
+		.input	= "\xdd\xfa\x6c\x1f\x5d\x86\x87\x01"
+			  "\x13\xe5\x73\x46\x46\xf2\x5c\xe1",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x20\x4a\x07\x99\x91\x58\xee\xfa"
+			  "\x88\xab\x42\x1c\xc9\x47\xd7\x38",
+		.klen	= 16,
+		.iv	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
+			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
+		.assoc	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
+			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b",
+		.alen	= 16,
+		.input	= "\xa6\x1b\xb9\xd7\x5e\x3c\xcf\xac"
+			  "\xa9\x21\x45\x0b\x16\x52\xf7\xe1",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x5d\x6f\x41\x39\xa1\xd4\x97\x16"
+			  "\x09\x85\xf4\xae\xc3\x6b\x9a\x3e",
+		.klen	= 16,
+		.iv	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
+			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
+		.assoc	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
+			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41"
+			  "\x3c",
+		.alen	= 17,
+		.input	= "\x15\xff\xde\x3b\x34\xfc\xf6\xf9"
+			  "\xbb\xa8\x62\xad\x0a\xf5\x48\x60",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x99\x93\x7a\xd8\xb1\x50\x40\x31"
+			  "\x8a\x60\xa6\x3f\xbd\x90\x5d\x44",
+		.klen	= 16,
+		.iv	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
+			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
+		.assoc	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
+			  "\xca\xcd\xff\x88\xba\x22\xbe\x47"
+			  "\x67\xba\x85\xf1\xbb\x30\x56\x26"
+			  "\xaf\x0b\x02\x38\xcc\x44\xa7",
+		.alen	= 31,
+		.input	= "\xd2\x9d\xf8\x3b\xd7\x84\xe9\x2d"
+			  "\x4b\xef\x75\x16\x0a\x99\xae\x6b",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xd6\xb8\xb4\x77\xc1\xcb\xe9\x4d"
+			  "\x0a\x3a\x58\xd1\xb7\xb4\x1f\x4a",
+		.klen	= 16,
+		.iv	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
+			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
+		.assoc	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
+			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d"
+			  "\x91\x7c\x91\x75\xc0\xd0\xd8\x40"
+			  "\x71\x39\xe1\x10\xa6\xa3\x46\x7a",
+		.alen	= 32,
+		.input	= "\xe4\x8d\xa7\xa7\x45\xc1\x31\x4f"
+			  "\xce\xfb\xaf\xd6\xc2\xe6\xee\xc0",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x12\xdd\xee\x17\xd1\x47\x92\x69"
+			  "\x8b\x14\x0a\x62\xb1\xd9\xe2\x50",
+		.klen	= 16,
+		.iv	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
+			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
+		.assoc	= "\x31",
+		.alen	= 1,
+		.input	= "\xe2\x67\x38\x4f\xb9\xad\x7d\x38"
+			  "\x01\xfe\x84\x14\x85\xf8\xd1\xe3"
+			  "\x22",
+		.ilen	= 17,
+		.result	= "\x40",
+		.rlen	= 1,
+	}, {
+		.key	= "\x4f\x01\x27\xb6\xe1\xc3\x3a\x85"
+			  "\x0c\xee\xbc\xf4\xab\xfd\xa5\x57",
+		.klen	= 16,
+		.iv	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
+			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
+		.assoc	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
+			  "\x4d\x5b\x15\x3c\xa8\x8f\x06",
+		.alen	= 15,
+		.input	= "\x77\x32\x61\xeb\xb4\x33\x29\x92"
+			  "\x29\x95\xc5\x8e\x85\x76\xab\xfc"
+			  "\x07\x95\xa7\x44\x74\xf7\x22\xff"
+			  "\xd8\xd8\x36\x3d\x8a\x7f\x9e",
+		.ilen	= 31,
+		.result	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
+			  "\x6d\x92\x42\x61\xa7\x58\x37",
+		.rlen	= 15,
+	}, {
+		.key	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
+			  "\x8d\xc8\x6e\x85\xa5\x21\x67\x5d",
+		.klen	= 16,
+		.iv	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
+			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
+		.assoc	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
+			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60",
+		.alen	= 16,
+		.input	= "\xd8\xfd\x44\x45\xf6\x42\x12\x38"
+			  "\xf2\x0b\xea\x4f\x9e\x11\x61\x07"
+			  "\x48\x67\x98\x18\x9b\xd0\x0c\x59"
+			  "\x67\xa4\x11\xb3\x2b\xd6\xc1\x70",
+		.ilen	= 32,
+		.result	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
+			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2",
+		.rlen	= 16,
+	}, {
+		.key	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
+			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
+		.klen	= 16,
+		.iv	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
+			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
+		.assoc	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
+			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66"
+			  "\x3b",
+		.alen	= 17,
+		.input	= "\xb1\xab\x53\x4e\xc7\x40\x16\xb6"
+			  "\x71\x3a\x00\x9f\x41\x88\xb0\xb2"
+			  "\x71\x83\x85\x5f\xc8\x79\x0a\x99"
+			  "\x99\xdc\x89\x1c\x88\xd2\x3e\xf9"
+			  "\x83",
+		.ilen	= 33,
+		.result	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
+			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
+			  "\x05",
+		.rlen	= 17,
+	}, {
+		.key	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
+			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69",
+		.klen	= 16,
+		.iv	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
+			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
+		.assoc	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
+			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d"
+			  "\x65\x48\xcb\x0a\xda\xf0\x62\xc0"
+			  "\x38\x1d\x3b\x4a\xe9\x7e\x62",
+		.alen	= 31,
+		.input	= "\x29\xc4\xf0\x03\xc1\x86\xdf\x06"
+			  "\x5c\x7b\xef\x64\x87\x00\xd1\x37"
+			  "\xa7\x08\xbc\x7f\x8f\x41\x54\xd0"
+			  "\x3e\xf1\xc3\xa2\x96\x84\xdd\x2a"
+			  "\x2d\x21\x30\xf9\x02\xdb\x06\x0c"
+			  "\xf1\x5a\x66\x69\xe0\xca\x83",
+		.ilen	= 47,
+		.result	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
+			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
+			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
+			  "\x68\x28\x73\x40\x9f\x96\x4a",
+		.rlen	= 31,
+	}, {
+		.key	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
+			  "\x10\x57\x85\x39\x93\x8f\xaf\x70",
+		.klen	= 16,
+		.iv	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
+			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
+		.assoc	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
+			  "\x50\xc4\xde\x82\x90\x21\x11\x73"
+			  "\x8f\x0a\xd6\x8f\xdf\x90\xe4\xda"
+			  "\xf9\x4a\x1a\x23\xc3\xdd\x02\x81",
+		.alen	= 32,
+		.input	= "\xe2\x2e\x44\xdf\xd3\x60\x6d\xb2"
+			  "\x70\x57\x37\xc5\xc2\x4f\x8d\x14"
+			  "\xc6\xbf\x8b\xec\xf5\x62\x67\xf2"
+			  "\x2f\xa1\xe6\xd6\xa7\xb1\x8c\x54"
+			  "\xe5\x6b\x49\xf9\x6e\x90\xc3\xaa"
+			  "\x7a\x00\x2e\x4d\x7f\x31\x2e\x81",
+		.ilen	= 48,
+		.result	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
+			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
+			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
+			  "\x29\x56\x52\x19\x79\xf5\xe9\x37",
+		.rlen	= 32,
+	}, {
+		.key	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
+			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76",
+		.klen	= 16,
+		.iv	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
+			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
+		.assoc	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
+			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79"
+			  "\xba\xcd\xe2\x13\xe4\x30\x66\xf4"
+			  "\xba\x78\xf9\xfb\x9d\x3c\xa1\x58"
+			  "\x1a",
+		.alen	= 33,
+		.input	= "\xc7\xca\x26\x61\x57\xee\xa2\xb9"
+			  "\xb1\x37\xde\x95\x06\x90\x11\x08"
+			  "\x4d\x30\x9f\x24\xc0\x56\xb7\xe1"
+			  "\x0b\x9f\xd2\x57\xe9\xd2\xb1\x76"
+			  "\x56\x9a\xb4\x58\xc5\x08\xfc\xb5"
+			  "\xf2\x31\x9b\xc9\xcd\xb3\x64\xdb"
+			  "\x6f\x50\xbf\xf4\x73\x9d\xfb\x6b"
+			  "\xef\x35\x25\x48\xed\xcf\x29\xa8"
+			  "\xac\xc3\xb9\xcb\x61\x8f\x73\x92"
+			  "\x2c\x7a\x6f\xda\xf9\x09\x6f\xe1"
+			  "\xc4",
+		.ilen	= 81,
+		.result	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
+			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
+			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
+			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
+			  "\x9d\x4d\x54\x51\x84\x61\xf6\x8e"
+			  "\x03\x31\xf2\x25\x16\xcc\xaa\xc6"
+			  "\x75\x73\x20\x30\x59\x54\xb2\xf0"
+			  "\x3a\x4b\xe0\x23\x8e\xa6\x08\x35"
+			  "\x8a",
+		.rlen	= 65,
+	}, {
+		.key	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
+			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c",
+		.klen	= 16,
+		.iv	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
+			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
+		.assoc	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
+			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f"
+			  "\xe4\x8f\xed\x97\xe9\xd0\xe8\x0d"
+			  "\x7c\xa6\xd8\xd4\x77\x9b\x40\x2e"
+			  "\x28\xce\x57\x34\xcd\x6e\x84\x4c"
+			  "\x17\x3c\xe1\xb2\xa8\x0b\xbb\xf1"
+			  "\x96\x41\x0d\x69\xe8\x54\x0a\xc8"
+			  "\x15\x4e\x91\x92\x89\x4b\xb7\x9b"
+			  "\x21",
+		.alen	= 65,
+		.input	= "\x57\xcd\x3d\x46\xc5\xf9\x68\x3b"
+			  "\x2c\x0f\xb4\x7e\x7b\x64\x3e\x40"
+			  "\xf3\x78\x63\x34\x89\x79\x39\x6b"
+			  "\x61\x64\x4a\x9a\xfa\x70\xa4\xd3"
+			  "\x54\x0b\xea\x05\xa6\x95\x64\xed"
+			  "\x3d\x69\xa2\x0c\x27\x56\x2f\x34"
+			  "\x66",
+		.ilen	= 49,
+		.result	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
+			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
+			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
+			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
+			  "\xac",
+		.rlen	= 33,
+	}, {
+		.key	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
+			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82",
+		.klen	= 16,
+		.iv	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
+			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
+		.assoc	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
+			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85",
+		.alen	= 16,
+		.input	= "\xfc\x85\x06\x28\x8f\xe8\x23\x1f"
+			  "\x33\x98\x87\xde\x08\xb6\xb6\xae"
+			  "\x3e\xa4\xf8\x19\xf1\x92\x60\x39"
+			  "\xb9\x6b\x3f\xdf\xc8\xcb\x30",
+		.ilen	= 31,
+		.result	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
+			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07",
+		.rlen	= 16,
+	}, {
+		.key	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
+			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
+		.klen	= 16,
+		.iv	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
+			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
+		.assoc	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
+			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c",
+		.alen	= 16,
+		.input	= "\x74\x7d\x70\x07\xe9\xba\x01\xee"
+			  "\x6c\xc6\x6f\x50\x25\x33\xbe\x50"
+			  "\x17\xb8\x17\x62\xed\x80\xa2\xf5"
+			  "\x03\xde\x85\x71\x5d\x34",
+		.ilen	= 30,
+		.result	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
+			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d",
+		.rlen	= 16,
+	}, {
+		.key	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
+			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
+		.klen	= 16,
+		.iv	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
+			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
+		.assoc	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
+			  "\xd5\x07\x58\x59\x72\xd7\xde\x92",
+		.alen	= 16,
+		.input	= "\xf4\xb3\x85\xf9\xac\xde\xb1\x38"
+			  "\x29\xfd\x6c\x7c\x49\xe5\x1d\xaf"
+			  "\xba\xea\xd4\xfa\x3f\x11\x33\x98",
+		.ilen	= 24,
+		.result	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
+			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13",
+		.rlen	= 16,
+	}, {
+		.key	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
+			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
+		.klen	= 16,
+		.iv	= "\xbb\x3a\xf7\x57\xc6\x36\x7c\x22"
+			  "\x36\xab\xde\xc6\x6d\x32\x70\x17",
+		.assoc	= "\xcb\x03\x85\xbf\x0a\xd5\x26\xa9"
+			  "\x56\xe1\x0a\xeb\x6c\xfb\xa1\x98",
+		.alen	= 16,
+		.input	= "\xe6\x5c\x49\x4f\x78\xf3\x62\x86"
+			  "\xe1\xb7\xa5\xc3\x32\x88\x3c\x8c"
+			  "\x6e",
+		.ilen	= 17,
+		.result	= "\xda\xcc\x14\x27\x4e\x74\xd1\x30"
+			  "\x76\x18\x37\x0f\x6a\xc4\xd1\x1a",
+		.rlen	= 16,
+	},
+};
+
+/*
+ * MORUS-1280 test vectors - generated via reference implementation from
+ * SUPERCOP (https://bench.cr.yp.to/supercop.html):
+ *
+ *   https://bench.cr.yp.to/supercop/supercop-20170228.tar.xz
+ *   (see crypto_aead/morus1280128v2/ and crypto_aead/morus1280256v2/ )
+ */
+static const struct aead_testvec morus1280_enc_tv_template[] = {
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 16,
+		.iv	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
+			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x91\x85\x0f\xf5\x52\x9e\xce\xce"
+			  "\x65\x99\xc7\xbf\xd3\x76\xe8\x98",
+		.rlen	= 16,
+	}, {
+		.key	= "\x3c\x24\x39\x9f\x10\x7b\xa8\x1b"
+			  "\x80\xda\xb2\x91\xf9\x24\xc2\x06",
+		.klen	= 16,
+		.iv	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
+			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x69",
+		.ilen	= 1,
+		.result	= "\x88\xc3\x4c\xf0\x2f\x43\x76\x13"
+			  "\x96\xda\x76\x34\x33\x4e\xd5\x39"
+			  "\x73",
+		.rlen	= 17,
+	}, {
+		.key	= "\x79\x49\x73\x3e\x20\xf7\x51\x37"
+			  "\x01\xb4\x64\x22\xf3\x48\x85\x0c",
+		.klen	= 16,
+		.iv	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
+			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xa6\xa4\x1e\x76\xec\xd4\x50\xcc"
+			  "\x62\x58\xe9\x8f\xef\xa4\x17\x91"
+			  "\xb4\x96\x9f\x6b\xce\x38\xa5\x46"
+			  "\x13\x7d\x64\x93\xd7\x05\xf5",
+		.ilen	= 31,
+		.result	= "\x3e\x5c\x3b\x58\x3b\x7d\x2a\x22"
+			  "\x75\x0b\x24\xa6\x0e\xc3\xde\x52"
+			  "\x97\x0b\x64\xd4\xce\x90\x52\xf7"
+			  "\xef\xdb\x6a\x38\xd2\xa8\xa1\x0d"
+			  "\xe0\x61\x33\x24\xc6\x4d\x51\xbc"
+			  "\xa4\x21\x74\xcf\x19\x16\x59",
+		.rlen	= 47,
+	}, {
+		.key	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
+			  "\x82\x8e\x16\xb4\xed\x6d\x47\x12",
+		.klen	= 16,
+		.iv	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
+			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xe2\xc9\x58\x15\xfc\x4f\xf8\xe8"
+			  "\xe3\x32\x9b\x21\xe9\xc8\xd9\x97"
+			  "\xde\x58\xab\xf0\xd3\xd8\x27\x60"
+			  "\xd5\xaa\x43\x6b\xb1\x64\x95\xa4",
+		.ilen	= 32,
+		.result	= "\x30\x82\x9c\x2b\x67\xcb\xf9\x1f"
+			  "\xde\x9f\x77\xb2\xda\x92\x61\x5c"
+			  "\x09\x0b\x2d\x9a\x26\xaa\x1c\x06"
+			  "\xab\x74\xb7\x2b\x95\x5f\x9f\xa1"
+			  "\x9a\xff\x50\xa0\xa2\xff\xc5\xad"
+			  "\x21\x8e\x84\x5c\x12\x61\xb2\xae",
+		.rlen	= 48,
+	}, {
+		.key	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
+			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
+		.klen	= 16,
+		.iv	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
+			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x1f\xee\x92\xb4\x0c\xcb\xa1\x04"
+			  "\x64\x0c\x4d\xb2\xe3\xec\x9c\x9d"
+			  "\x09\x1a\xb7\x74\xd8\x78\xa9\x79"
+			  "\x96\xd8\x22\x43\x8c\xc3\x34\x7b"
+			  "\xc4",
+		.ilen	= 33,
+		.result	= "\x67\x5d\x8e\x45\xc8\x39\xf5\x17"
+			  "\xc1\x1d\x2a\xdd\x88\x67\xda\x1f"
+			  "\x6d\xe8\x37\x28\x5a\xc1\x5e\x9f"
+			  "\xa6\xec\xc6\x92\x05\x4b\xc0\xa3"
+			  "\x63\xef\x88\xa4\x9b\x0a\x5c\xed"
+			  "\x2b\x6a\xac\x63\x52\xaa\x10\x94"
+			  "\xd0",
+		.rlen	= 49,
+	}, {
+		.key	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
+			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f",
+		.klen	= 16,
+		.iv	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
+			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x5c\x13\xcb\x54\x1c\x47\x4a\x1f"
+			  "\xe5\xe6\xff\x44\xdd\x11\x5f\xa3"
+			  "\x33\xdd\xc2\xf8\xdd\x18\x2b\x93"
+			  "\x57\x05\x01\x1c\x66\x22\xd3\x51"
+			  "\xd3\xdf\x18\xc9\x30\x66\xed\xb1"
+			  "\x96\x58\xd5\x8c\x64\x8c\x7c\xf5"
+			  "\x01\xd0\x74\x5f\x9b\xaa\xf6\xd1"
+			  "\xe6\x16\xa2\xac\xde\x47\x40",
+		.ilen	= 63,
+		.result	= "\x7d\x61\x1a\x35\x20\xcc\x07\x88"
+			  "\x03\x98\x87\xcf\xc0\x6e\x4d\x19"
+			  "\xe3\xd4\x0b\xfb\x29\x8f\x49\x1a"
+			  "\x3a\x06\x77\xce\x71\x2c\xcd\xdd"
+			  "\xed\xf6\xc9\xbe\xa6\x3b\xb8\xfc"
+			  "\x6c\xbe\x77\xed\x74\x0e\x20\x85"
+			  "\xd0\x65\xde\x24\x6f\xe3\x25\xc5"
+			  "\xdf\x5b\x0f\xbd\x8a\x88\x78\xc9"
+			  "\xe5\x81\x37\xde\x84\x7a\xf6\x84"
+			  "\x99\x7a\x72\x9c\x54\x31\xa1",
+		.rlen	= 79,
+	}, {
+		.key	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
+			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25",
+		.klen	= 16,
+		.iv	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
+			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x98\x37\x05\xf3\x2c\xc2\xf3\x3b"
+			  "\x66\xc0\xb1\xd5\xd7\x35\x21\xaa"
+			  "\x5d\x9f\xce\x7c\xe2\xb8\xad\xad"
+			  "\x19\x33\xe0\xf4\x40\x81\x72\x28"
+			  "\xe1\x8b\x1c\xf8\x91\x78\xff\xaf"
+			  "\xb0\x68\x69\xf2\x27\x35\x91\x84"
+			  "\x2e\x37\x5b\x00\x04\xff\x16\x9c"
+			  "\xb5\x19\x39\xeb\xd9\xcd\x29\x9a",
+		.ilen	= 64,
+		.result	= "\x05\xc5\xb1\xf9\x1b\xb9\xab\x2c"
+			  "\xa5\x07\x12\xa7\x12\x39\x60\x66"
+			  "\x30\x81\x4a\x03\x78\x28\x45\x52"
+			  "\xd2\x2b\x24\xfd\x8b\xa5\xb7\x66"
+			  "\x6f\x45\xd7\x3b\x67\x6f\x51\xb9"
+			  "\xc0\x3d\x6c\xca\x1e\xae\xff\xb6"
+			  "\x79\xa9\xe4\x82\x5d\x4c\x2d\xdf"
+			  "\xeb\x71\x40\xc9\x2c\x40\x45\x6d"
+			  "\x73\x77\x01\xf3\x4f\xf3\x9d\x2a"
+			  "\x5d\x57\xa8\xa1\x18\xa2\xad\xcb",
+		.rlen	= 80,
+	}, {
+		.key	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
+			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b",
+		.klen	= 16,
+		.iv	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
+			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
+		.assoc	= "\xc5",
+		.alen	= 1,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x4d\xbf\x11\xac\x7f\x97\x0b\x2e"
+			  "\x89\x3b\x9d\x0f\x83\x1c\x08\xc3",
+		.rlen	= 16,
+	}, {
+		.key	= "\xe4\x25\xcd\xfa\x80\xdd\x46\xde"
+			  "\x07\xd1\x90\x8b\xcf\x23\x15\x31",
+		.klen	= 16,
+		.iv	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
+			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
+		.assoc	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
+			  "\x47\x3e\xe9\xd4\xcc\xb5\x76\x34"
+			  "\xe8\x73\x62\x64\xab\x50\xd0\xda"
+			  "\x6b\x83\x66\xaf\x3e\x27\xc9",
+		.alen	= 31,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x5b\xc0\x8d\x54\xe4\xec\xbe\x38"
+			  "\x03\x12\xf9\xcc\x9e\x46\x42\x92",
+		.rlen	= 16,
+	}, {
+		.key	= "\x20\x4a\x07\x99\x91\x58\xee\xfa"
+			  "\x88\xab\x42\x1c\xc9\x47\xd7\x38",
+		.klen	= 16,
+		.iv	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
+			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
+		.assoc	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
+			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b"
+			  "\x12\x35\x6e\xe8\xb0\xf0\x52\xf3"
+			  "\x2d\xb0\x45\x87\x18\x86\x68\xf6",
+		.alen	= 32,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x48\xc5\xc3\x4c\x40\x2e\x2f\xc2"
+			  "\x6d\x65\xe0\x67\x9c\x1d\xa0\xf0",
+		.rlen	= 16,
+	}, {
+		.key	= "\x5d\x6f\x41\x39\xa1\xd4\x97\x16"
+			  "\x09\x85\xf4\xae\xc3\x6b\x9a\x3e",
+		.klen	= 16,
+		.iv	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
+			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
+		.assoc	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
+			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41"
+			  "\x3c\xf8\x79\x6c\xb6\x90\xd4\x0d"
+			  "\xee\xde\x23\x60\xf2\xe5\x08\xcc"
+			  "\x97",
+		.alen	= 33,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x28\x64\x78\x51\x55\xd8\x56\x4a"
+			  "\x58\x3e\xf7\xbe\xee\x21\xfe\x94",
+		.rlen	= 16,
+	}, {
+		.key	= "\x99\x93\x7a\xd8\xb1\x50\x40\x31"
+			  "\x8a\x60\xa6\x3f\xbd\x90\x5d\x44",
+		.klen	= 16,
+		.iv	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
+			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
+		.assoc	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
+			  "\xca\xcd\xff\x88\xba\x22\xbe\x47"
+			  "\x67\xba\x85\xf1\xbb\x30\x56\x26"
+			  "\xaf\x0b\x02\x38\xcc\x44\xa7\xa3"
+			  "\xa6\xbf\x31\x93\x60\xcd\xda\x63"
+			  "\x2c\xb1\xaa\x19\xc8\x19\xf8\xeb"
+			  "\x03\xa1\xe8\xbe\x37\x54\xec\xa2"
+			  "\xcd\x2c\x45\x58\xbd\x8e\x80",
+		.alen	= 63,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xb3\xa6\x00\x4e\x09\x20\xac\x21"
+			  "\x77\x72\x69\x76\x2d\x36\xe5\xc8",
+		.rlen	= 16,
+	}, {
+		.key	= "\xd6\xb8\xb4\x77\xc1\xcb\xe9\x4d"
+			  "\x0a\x3a\x58\xd1\xb7\xb4\x1f\x4a",
+		.klen	= 16,
+		.iv	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
+			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
+		.assoc	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
+			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d"
+			  "\x91\x7c\x91\x75\xc0\xd0\xd8\x40"
+			  "\x71\x39\xe1\x10\xa6\xa3\x46\x7a"
+			  "\xb4\x6b\x35\xc2\xc1\xdf\xed\x60"
+			  "\x46\xc1\x3e\x7f\x8c\xc2\x0e\x7a"
+			  "\x30\x08\xd0\x5f\xa0\xaa\x0c\x6d"
+			  "\x9c\x2f\xdb\x97\xb8\x15\x69\x01",
+		.alen	= 64,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x65\x33\x7b\xa1\x63\xf4\x20\xdd"
+			  "\xe4\xb9\x4a\xaa\x9a\x21\xaa\x14",
+		.rlen	= 16,
+	}, {
+		.key	= "\x12\xdd\xee\x17\xd1\x47\x92\x69"
+			  "\x8b\x14\x0a\x62\xb1\xd9\xe2\x50",
+		.klen	= 16,
+		.iv	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
+			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
+		.assoc	= "\x31",
+		.alen	= 1,
+		.input	= "\x40",
+		.ilen	= 1,
+		.result	= "\x1d\x47\x17\x34\x86\xf5\x54\x1a"
+			  "\x6d\x28\xb8\x5d\x6c\xcf\xa0\xb9"
+			  "\xbf",
+		.rlen	= 17,
+	}, {
+		.key	= "\x4f\x01\x27\xb6\xe1\xc3\x3a\x85"
+			  "\x0c\xee\xbc\xf4\xab\xfd\xa5\x57",
+		.klen	= 16,
+		.iv	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
+			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
+		.assoc	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
+			  "\x4d\x5b\x15\x3c\xa8\x8f\x06\x5a"
+			  "\xe6\x01\xa8\x7e\xca\x10\xdc\x73"
+			  "\xf4\x94\x9f\xc1\x5a\x61\x85",
+		.alen	= 31,
+		.input	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
+			  "\x6d\x92\x42\x61\xa7\x58\x37\xdb"
+			  "\xb0\xb2\x2b\x9f\x0b\xb8\xbd\x7a"
+			  "\x24\xa0\xd6\xb7\x11\x79\x6c",
+		.ilen	= 31,
+		.result	= "\x78\x90\x52\xae\x0f\xf7\x2e\xef"
+			  "\x63\x09\x08\x58\xb5\x56\xbd\x72"
+			  "\x6e\x42\xcf\x27\x04\x7c\xdb\x92"
+			  "\x18\xe9\xa4\x33\x90\xba\x62\xb5"
+			  "\x70\xd3\x88\x9b\x4f\x05\xa7\x51"
+			  "\x85\x87\x17\x09\x42\xed\x4e",
+		.rlen	= 47,
+	}, {
+		.key	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
+			  "\x8d\xc8\x6e\x85\xa5\x21\x67\x5d",
+		.klen	= 16,
+		.iv	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
+			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
+		.assoc	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
+			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60"
+			  "\x10\xc3\xb3\x02\xcf\xb0\x5e\x8d"
+			  "\xb5\xc2\x7e\x9a\x35\xc0\x24\xfd",
+		.alen	= 32,
+		.input	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
+			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2"
+			  "\xdb\x74\x36\x23\x11\x58\x3f\x93"
+			  "\xe5\xcd\xb5\x90\xeb\xd8\x0c\xb3",
+		.ilen	= 32,
+		.result	= "\x1d\x2c\x57\xe0\x50\x38\x3d\x41"
+			  "\x2e\x71\xc8\x3b\x92\x43\x58\xaf"
+			  "\x5a\xfb\xad\x8f\xd9\xd5\x8a\x5e"
+			  "\xdb\xf3\xcd\x3a\x2b\xe1\x2c\x1a"
+			  "\xb0\xed\xe3\x0c\x6e\xf9\xf2\xd6"
+			  "\x90\xe6\xb1\x0e\xa5\x8a\xac\xb7",
+		.rlen	= 48,
+	}, {
+		.key	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
+			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
+		.klen	= 16,
+		.iv	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
+			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
+		.assoc	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
+			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66"
+			  "\x3b\x86\xbf\x86\xd4\x50\xe0\xa7"
+			  "\x76\xef\x5c\x72\x0f\x1f\xc3\xd4"
+			  "\xee",
+		.alen	= 33,
+		.input	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
+			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
+			  "\x05\x36\x42\xa7\x16\xf8\xc1\xad"
+			  "\xa7\xfb\x94\x68\xc5\x37\xab\x8a"
+			  "\x72",
+		.ilen	= 33,
+		.result	= "\x59\x10\x84\x1c\x83\x4c\x8b\xfc"
+			  "\xfd\x2e\x4b\x46\x84\xff\x78\x4e"
+			  "\x50\xda\x5c\xb9\x61\x1d\xf5\xb9"
+			  "\xfe\xbb\x7f\xae\x8c\xc1\x24\xbd"
+			  "\x8c\x6f\x1f\x9b\xce\xc6\xc1\x37"
+			  "\x08\x06\x5a\xe5\x96\x10\x95\xc2"
+			  "\x5e",
+		.rlen	= 49,
+	}, {
+		.key	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
+			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69",
+		.klen	= 16,
+		.iv	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
+			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
+		.assoc	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
+			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d"
+			  "\x65\x48\xcb\x0a\xda\xf0\x62\xc0"
+			  "\x38\x1d\x3b\x4a\xe9\x7e\x62\xaa"
+			  "\xfd\xc9\x4a\xa9\xa9\x39\x4b\x54"
+			  "\xc8\x0e\x24\x7f\x5e\x10\x7a\x45"
+			  "\x10\x0b\x56\x85\xad\x54\xaa\x66"
+			  "\xa8\x43\xcd\xd4\x9b\xb7\xfa",
+		.alen	= 63,
+		.input	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
+			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
+			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
+			  "\x68\x28\x73\x40\x9f\x96\x4a\x60"
+			  "\x80\xf4\x4b\xf4\xc1\x3d\xd0\x93"
+			  "\xcf\x12\xc9\x59\x8f\x7a\x7f\xa8"
+			  "\x1b\xa5\x50\xed\x87\xa9\x72\x59"
+			  "\x9c\x44\xb2\xa4\x99\x98\x34",
+		.ilen	= 63,
+		.result	= "\x9a\x12\xbc\xdf\x72\xa8\x56\x22"
+			  "\x49\x2d\x07\x92\xfc\x3d\x6d\x5f"
+			  "\xef\x36\x19\xae\x91\xfa\xd6\x63"
+			  "\x46\xea\x8a\x39\x14\x21\xa6\x37"
+			  "\x18\xfc\x97\x3e\x16\xa5\x4d\x39"
+			  "\x45\x2e\x69\xcc\x9c\x5f\xdf\x6d"
+			  "\x5e\xa2\xbf\xac\x83\x32\x72\x52"
+			  "\x58\x58\x23\x40\xfd\xa5\xc2\xe6"
+			  "\xe9\x5a\x50\x98\x00\x58\xc9\x86"
+			  "\x4f\x20\x37\xdb\x7b\x22\xa3",
+		.rlen	= 79,
+	}, {
+		.key	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
+			  "\x10\x57\x85\x39\x93\x8f\xaf\x70",
+		.klen	= 16,
+		.iv	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
+			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
+		.assoc	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
+			  "\x50\xc4\xde\x82\x90\x21\x11\x73"
+			  "\x8f\x0a\xd6\x8f\xdf\x90\xe4\xda"
+			  "\xf9\x4a\x1a\x23\xc3\xdd\x02\x81"
+			  "\x0b\x76\x4f\xd7\x0a\x4b\x5e\x51"
+			  "\xe3\x1d\xb9\xe5\x21\xb9\x8f\xd4"
+			  "\x3d\x72\x3e\x26\x16\xa9\xca\x32"
+			  "\x77\x47\x63\x14\x95\x3d\xe4\x34",
+		.alen	= 64,
+		.input	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
+			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
+			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
+			  "\x29\x56\x52\x19\x79\xf5\xe9\x37"
+			  "\x8f\xa1\x50\x23\x22\x4f\xe3\x91"
+			  "\xe9\x21\x5e\xbf\x52\x23\x95\x37"
+			  "\x48\x0c\x38\x8f\xf0\xff\x92\x24"
+			  "\x6b\x47\x49\xe3\x94\x1f\x1e\x01",
+		.ilen	= 64,
+		.result	= "\xe6\xeb\x92\x5a\x5b\xf0\x2d\xbb"
+			  "\x23\xec\x35\xe3\xae\xc9\xfb\x0b"
+			  "\x90\x14\x46\xeb\xa8\x8d\xb0\x9b"
+			  "\x39\xda\x8b\x48\xec\xb2\x00\x4e"
+			  "\x80\x6f\x46\x4f\x9b\x1e\xbb\x35"
+			  "\xea\x5a\xbc\xa2\x36\xa5\x89\x45"
+			  "\xc2\xd6\xd7\x15\x0b\xf6\x6c\x56"
+			  "\xec\x99\x7d\x61\xb3\x15\x93\xed"
+			  "\x83\x1e\xd9\x48\x84\x0b\x37\xfe"
+			  "\x95\x74\x44\xd5\x54\xa6\x27\x06",
+		.rlen	= 80,
+	}, {
+		.key	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
+			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76",
+		.klen	= 16,
+		.iv	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
+			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
+		.assoc	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
+			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79"
+			  "\xba\xcd\xe2\x13\xe4\x30\x66\xf4"
+			  "\xba\x78\xf9\xfb\x9d\x3c\xa1\x58"
+			  "\x1a\x22\x53\x05\x6b\x5c\x71\x4f"
+			  "\xfd\x2d\x4d\x4c\xe5\x62\xa5\x63"
+			  "\x6a\xda\x26\xc8\x7f\xff\xea\xfd"
+			  "\x46\x4a\xfa\x53\x8f\xc4\xcd\x68"
+			  "\x58",
+		.alen	= 65,
+		.input	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
+			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
+			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
+			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
+			  "\x9d\x4d\x54\x51\x84\x61\xf6\x8e"
+			  "\x03\x31\xf2\x25\x16\xcc\xaa\xc6"
+			  "\x75\x73\x20\x30\x59\x54\xb2\xf0"
+			  "\x3a\x4b\xe0\x23\x8e\xa6\x08\x35"
+			  "\x8a\xdf\x27\xa0\xe4\x60\x99\xae"
+			  "\x8e\x43\xd9\x39\x7b\x10\x40\x67"
+			  "\x5c\x7e\xc9\x70\x63\x34\xca\x59"
+			  "\xfe\x86\xbc\xb7\x9c\x39\xf3\x6d"
+			  "\x6a\x41\x64\x6f\x16\x7f\x65\x7e"
+			  "\x89\x84\x68\xeb\xb0\x51\xbe\x55"
+			  "\x33\x16\x59\x6c\x3b\xef\x88\xad"
+			  "\x2f\xab\xbc\x25\x76\x87\x41\x2f"
+			  "\x36",
+		.ilen	= 129,
+		.result	= "\x89\x24\x27\x86\xdc\xd7\x6b\xd9"
+			  "\xd1\xcd\xdc\x16\xdd\x2c\xc1\xfb"
+			  "\x52\xb5\xb3\xab\x50\x99\x3f\xa0"
+			  "\x38\xa4\x74\xa5\x04\x15\x63\x05"
+			  "\x8f\x54\x81\x06\x5a\x6b\xa4\x63"
+			  "\x6d\xa7\x21\xcb\xff\x42\x30\x8e"
+			  "\x3b\xd1\xca\x3f\x4b\x1a\xb8\xc3"
+			  "\x42\x01\xe6\xbc\x75\x15\x87\xee"
+			  "\xc9\x8e\x65\x01\xd9\xd8\xb5\x9f"
+			  "\x48\x86\xa6\x5f\x2c\xc7\xb5\xb0"
+			  "\xed\x5d\x14\x7c\x3f\x40\xb1\x0b"
+			  "\x72\xef\x94\x8d\x7a\x85\x56\xe5"
+			  "\x56\x08\x15\x56\xba\xaf\xbd\xf0"
+			  "\x20\xef\xa0\xf6\xa9\xad\xa2\xc9"
+			  "\x1c\x3b\x28\x51\x7e\x77\xb2\x18"
+			  "\x4f\x61\x64\x37\x22\x36\x6d\x78"
+			  "\xed\xed\x35\xe8\x83\xa5\xec\x25"
+			  "\x6b\xff\x5f\x1a\x09\x96\x3d\xdc"
+			  "\x20",
+		.rlen	= 145,
+	}, {
+		.key	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
+			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c",
+		.klen	= 16,
+		.iv	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
+			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
+		.assoc	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
+			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f"
+			  "\xe4\x8f\xed\x97\xe9\xd0\xe8\x0d"
+			  "\x7c\xa6\xd8\xd4\x77\x9b\x40\x2e"
+			  "\x28\xce\x57\x34\xcd\x6e\x84\x4c"
+			  "\x17\x3c\xe1\xb2\xa8\x0b\xbb\xf1"
+			  "\x96\x41\x0d\x69\xe8\x54\x0a\xc8"
+			  "\x15\x4e\x91\x92\x89\x4b\xb7\x9b"
+			  "\x21\xf7\x42\x89\xac\x12\x2a\x54"
+			  "\x69\xee\x18\xc7\x8d\xed\xe8\xfd"
+			  "\xbb\x04\x28\xe6\x8a\x3c\x98\xc1"
+			  "\x04\x2d\xa9\xa1\x24\x83\xff\xe9"
+			  "\x55\x7a\xf0\xd1\xf6\x63\x05\xe1"
+			  "\xd9\x1e\x75\x72\xc1\x9f\xae\x32"
+			  "\xe1\x6b\xcd\x9e\x61\x19\x23\x86"
+			  "\xd9\xd2\xaf\x8e\xd5\xd3\xa8\xa9"
+			  "\x51",
+		.alen	= 129,
+		.input	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
+			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
+			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
+			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
+			  "\xac\xfa\x58\x7f\xe5\x73\x09\x8c"
+			  "\x1d\x40\x87\x8c\xd9\x75\xc0\x55"
+			  "\xa2\xda\x07\xd1\xc2\xa9\xd1\xbb"
+			  "\x09\x4f\x77\x62\x88\x2d\xf2\x68"
+			  "\x54",
+		.ilen	= 65,
+		.result	= "\x36\x78\xb9\x22\xde\x62\x35\x55"
+			  "\x1a\x7a\xf5\x45\xbc\xd7\x15\x82"
+			  "\x01\xe9\x5a\x07\xea\x46\xaf\x91"
+			  "\xcb\x73\xa5\xee\xe1\xb4\xbf\xc2"
+			  "\xdb\xd2\x9d\x59\xde\xfc\x83\x00"
+			  "\xf5\x46\xac\x97\xd5\x57\xa9\xb9"
+			  "\x1f\x8c\xe8\xca\x68\x8b\x91\x0c"
+			  "\x01\xbe\x0a\xaf\x7c\xf6\x67\xa4"
+			  "\xbf\xbc\x88\x3f\x5d\xd1\xf9\x19"
+			  "\x0f\x9d\xb2\xaf\xb9\x6e\x17\xdf"
+			  "\xa2",
+		.rlen	= 81,
+	}, {
+		.key	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
+			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82",
+		.klen	= 16,
+		.iv	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
+			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
+		.assoc	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
+			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85"
+			  "\x0e\x51\xf9\x1c\xee\x70\x6a\x27"
+			  "\x3d\xd3\xb7\xac\x51\xfa\xdf\x05",
+		.alen	= 32,
+		.input	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
+			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07"
+			  "\xd9\x02\x7c\x3d\x2f\x18\x4b\x2d"
+			  "\x6e\xde\xee\xa2\x08\x12\xc7\xba",
+		.ilen	= 32,
+		.result	= "\x08\x1b\x95\x0e\x41\x95\x02\x4b"
+			  "\x9c\xbb\xa8\xd0\x7c\xd3\x44\x6e"
+			  "\x89\x14\x33\x70\x0a\xbc\xea\x39"
+			  "\x88\xaa\x2b\xd5\x73\x11\x55\xf5"
+			  "\x33\x33\x9c\xd7\x42\x34\x49\x8e"
+			  "\x2f\x03\x30\x05\x47\xaf\x34",
+		.rlen	= 47,
+	}, {
+		.key	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
+			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
+		.klen	= 16,
+		.iv	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
+			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
+		.assoc	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
+			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c"
+			  "\x39\x14\x05\xa0\xf3\x10\xec\x41"
+			  "\xff\x01\x95\x84\x2b\x59\x7f\xdb",
+		.alen	= 32,
+		.input	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
+			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d"
+			  "\x03\xc4\x88\xc1\x35\xb8\xcd\x47"
+			  "\x2f\x0c\xcd\x7a\xe2\x71\x66\x91",
+		.ilen	= 32,
+		.result	= "\x97\xca\xf4\xe0\x8d\x89\xbf\x68"
+			  "\x0c\x60\xb9\x27\xdf\xaa\x41\xc6"
+			  "\x25\xd8\xf7\x1f\x10\x15\x48\x61"
+			  "\x4c\x95\x00\xdf\x51\x9b\x7f\xe6"
+			  "\x24\x40\x9e\xbe\x3b\xeb\x1b\x98"
+			  "\xb9\x9c\xe5\xef\xf2\x05",
+		.rlen	= 46,
+	}, {
+		.key	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
+			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
+		.klen	= 16,
+		.iv	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
+			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
+		.assoc	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
+			  "\xd5\x07\x58\x59\x72\xd7\xde\x92"
+			  "\x63\xd6\x10\x24\xf8\xb0\x6e\x5a"
+			  "\xc0\x2e\x74\x5d\x06\xb8\x1e\xb2",
+		.alen	= 32,
+		.input	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
+			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13"
+			  "\x2e\x86\x93\x45\x3a\x58\x4f\x61"
+			  "\xf0\x3a\xac\x53\xbc\xd0\x06\x68",
+		.ilen	= 32,
+		.result	= "\x63\x4c\x2a\x8e\xb4\x6b\x63\x0d"
+			  "\xb5\xec\x9b\x4e\x12\x23\xa3\xcf"
+			  "\x1a\x5a\x70\x15\x5a\x10\x40\x51"
+			  "\xca\x47\x4c\x9d\xc9\x97\xf4\x77"
+			  "\xdb\xc8\x10\x2d\xdc\x65\x20\x3f",
+		.rlen	= 40,
+	}, {
+		.key	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
+			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
+		.klen	= 16,
+		.iv	= "\xbb\x3a\xf7\x57\xc6\x36\x7c\x22"
+			  "\x36\xab\xde\xc6\x6d\x32\x70\x17",
+		.assoc	= "\xcb\x03\x85\xbf\x0a\xd5\x26\xa9"
+			  "\x56\xe1\x0a\xeb\x6c\xfb\xa1\x98"
+			  "\x8d\x98\x1c\xa8\xfe\x50\xf0\x74"
+			  "\x81\x5c\x53\x35\xe0\x17\xbd\x88",
+		.alen	= 32,
+		.input	= "\xda\xcc\x14\x27\x4e\x74\xd1\x30"
+			  "\x76\x18\x37\x0f\x6a\xc4\xd1\x1a"
+			  "\x58\x49\x9f\xc9\x3f\xf8\xd1\x7a"
+			  "\xb2\x67\x8b\x2b\x96\x2f\xa5\x3e",
+		.ilen	= 32,
+		.result	= "\xf1\x62\x44\xc7\x5f\x19\xca\x43"
+			  "\x47\x2c\xaf\x68\x82\xbd\x51\xef"
+			  "\x3d\x65\xd8\x45\x2d\x06\x07\x78"
+			  "\x08\x2e\xb3\x23\xcd\x81\x12\x55"
+			  "\x1a",
+		.rlen	= 33,
+	}, {
+		.key	= "\xe9\x95\xa2\x8f\x93\x13\x7b\xb7"
+			  "\x96\x4e\x63\x33\x69\x8d\x02\x9b"
+			  "\x23\xf9\x22\xeb\x80\xa0\xb1\x81"
+			  "\xe2\x73\xc3\x21\x4d\x47\x8d\xf4",
+		.klen	= 32,
+		.iv	= "\xf8\x5e\x31\xf7\xd7\xb2\x25\x3e"
+			  "\xb7\x85\x90\x58\x67\x57\x33\x1d",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xdf\x2f\x83\xc0\x45\x4a\x2c\xcf"
+			  "\xb9\xd2\x41\xf6\x80\xa1\x52\x70",
+		.rlen	= 16,
+	}, {
+		.key	= "\x25\xba\xdc\x2e\xa3\x8f\x24\xd3"
+			  "\x17\x29\x15\xc5\x63\xb2\xc5\xa1"
+			  "\x4d\xbc\x2d\x6f\x85\x40\x33\x9a"
+			  "\xa3\xa0\xa1\xfa\x27\xa6\x2c\xca",
+		.klen	= 32,
+		.iv	= "\x34\x83\x6a\x96\xe7\x2d\xce\x5a"
+			  "\x38\x5f\x42\xe9\x61\x7b\xf5\x23",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x53",
+		.ilen	= 1,
+		.result	= "\x01\xd8\x55\x3c\xc0\x5a\x4b\xc7"
+			  "\x01\xf4\x08\xe3\x0d\xf7\xf0\x78"
+			  "\x53",
+		.rlen	= 17,
+	}, {
+		.key	= "\x62\xdf\x16\xcd\xb3\x0a\xcc\xef"
+			  "\x98\x03\xc7\x56\x5d\xd6\x87\xa8"
+			  "\x77\x7e\x39\xf3\x8a\xe0\xb5\xb4"
+			  "\x65\xce\x80\xd2\x01\x05\xcb\xa1",
+		.klen	= 32,
+		.iv	= "\x71\xa8\xa4\x35\xf7\xa9\x76\x75"
+			  "\xb8\x39\xf4\x7a\x5b\x9f\xb8\x29",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x8f\x3a\xc1\x05\x7f\xe7\xcb\x83"
+			  "\xf9\xa6\x4d\xc3\x58\x31\x19\x2c"
+			  "\xd7\x90\xc2\x56\x4e\xd8\x57\xc7"
+			  "\xf6\xf0\x27\xb4\x25\x4c\x83",
+		.ilen	= 31,
+		.result	= "\xc2\x4b\x41\x0f\x2d\xb9\x62\x07"
+			  "\xff\x8e\x74\xf8\xa1\xa6\xd5\x37"
+			  "\xa5\x64\x31\x5c\xca\x73\x9b\x43"
+			  "\xe6\x70\x63\x46\x95\xcb\xf7\xb5"
+			  "\x20\x8c\x75\x7a\x2a\x17\x2f\xa9"
+			  "\xb8\x4d\x11\x42\xd1\xf8\xf1",
+		.rlen	= 47,
+	}, {
+		.key	= "\x9e\x03\x4f\x6d\xc3\x86\x75\x0a"
+			  "\x19\xdd\x79\xe8\x57\xfb\x4a\xae"
+			  "\xa2\x40\x45\x77\x90\x80\x37\xce"
+			  "\x26\xfb\x5f\xaa\xdb\x64\x6b\x77",
+		.klen	= 32,
+		.iv	= "\xae\xcc\xde\xd5\x07\x25\x1f\x91"
+			  "\x39\x14\xa6\x0c\x55\xc4\x7b\x30",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xcc\x5f\xfb\xa4\x8f\x63\x74\x9f"
+			  "\x7a\x81\xff\x55\x52\x56\xdc\x33"
+			  "\x01\x52\xcd\xdb\x53\x78\xd9\xe1"
+			  "\xb7\x1d\x06\x8d\xff\xab\x22\x98",
+		.ilen	= 32,
+		.result	= "\xbb\x01\x7c\xd1\x2c\x33\x7b\x37"
+			  "\x0a\xee\xc4\x30\x19\xd7\x3a\x6f"
+			  "\xf8\x2b\x67\xf5\x3b\x84\x87\x2a"
+			  "\xfb\x07\x7a\x82\xb5\xe4\x85\x26"
+			  "\x1e\xa8\xe5\x04\x54\xce\xe5\x5f"
+			  "\xb5\x3f\xc1\xd5\x7f\xbd\xd2\xa6",
+		.rlen	= 48,
+	}, {
+		.key	= "\xdb\x28\x89\x0c\xd3\x01\x1e\x26"
+			  "\x9a\xb7\x2b\x79\x51\x1f\x0d\xb4"
+			  "\xcc\x03\x50\xfc\x95\x20\xb9\xe7"
+			  "\xe8\x29\x3e\x83\xb5\xc3\x0a\x4e",
+		.klen	= 32,
+		.iv	= "\xea\xf1\x18\x74\x17\xa0\xc8\xad"
+			  "\xba\xee\x58\x9d\x4f\xe8\x3d\x36",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x08\x84\x34\x44\x9f\xde\x1c\xbb"
+			  "\xfb\x5b\xb1\xe6\x4c\x7a\x9f\x39"
+			  "\x2c\x14\xd9\x5f\x59\x18\x5b\xfb"
+			  "\x79\x4b\xe5\x65\xd9\x0a\xc1\x6f"
+			  "\x2e",
+		.ilen	= 33,
+		.result	= "\xc2\xf4\x40\x55\xf9\x59\xff\x73"
+			  "\x08\xf5\x98\x92\x0c\x7b\x35\x9a"
+			  "\xa8\xf4\x42\x7e\x6f\x93\xca\x22"
+			  "\x23\x06\x1e\xf8\x89\x22\xf4\x46"
+			  "\x7c\x7c\x67\x75\xab\xe5\x75\xaa"
+			  "\x15\xd7\x83\x19\xfd\x31\x59\x5b"
+			  "\x32",
+		.rlen	= 49,
+	}, {
+		.key	= "\x17\x4d\xc3\xab\xe3\x7d\xc7\x42"
+			  "\x1b\x91\xdd\x0a\x4b\x43\xcf\xba"
+			  "\xf6\xc5\x5c\x80\x9a\xc0\x3b\x01"
+			  "\xa9\x56\x1d\x5b\x8f\x22\xa9\x25",
+		.klen	= 32,
+		.iv	= "\x27\x16\x51\x13\x27\x1c\x71\xc9"
+			  "\x3b\xc8\x0a\x2f\x49\x0c\x00\x3c",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x45\xa8\x6e\xe3\xaf\x5a\xc5\xd7"
+			  "\x7c\x35\x63\x77\x46\x9f\x61\x3f"
+			  "\x56\xd7\xe4\xe3\x5e\xb8\xdc\x14"
+			  "\x3a\x79\xc4\x3e\xb3\x69\x61\x46"
+			  "\x3c\xb6\x83\x4e\xb4\x26\xc7\x73"
+			  "\x22\xda\x52\x8b\x7d\x11\x98\xea"
+			  "\x62\xe1\x14\x1e\xdc\xfe\x0f\xad"
+			  "\x20\x76\x5a\xdc\x4e\x71\x13",
+		.ilen	= 63,
+		.result	= "\xc9\x82\x3b\x4b\x87\x84\xa5\xdb"
+			  "\xa0\x8c\xd3\x3e\x7f\x8d\xe8\x28"
+			  "\x2a\xdc\xfa\x01\x84\x87\x9a\x70"
+			  "\x81\x75\x37\x0a\xd2\x75\xa9\xb6"
+			  "\x21\x72\xee\x7e\x65\x95\xe5\xcc"
+			  "\x01\xb7\x39\xa6\x51\x15\xca\xff"
+			  "\x61\xdc\x97\x38\xcc\xf4\xca\xc7"
+			  "\x83\x9b\x05\x11\x72\x60\xf0\xb4"
+			  "\x7e\x06\xab\x0a\xc0\xbb\x59\x23"
+			  "\xaa\x2d\xfc\x4e\x35\x05\x59",
+		.rlen	= 79,
+	}, {
+		.key	= "\x54\x71\xfd\x4b\xf3\xf9\x6f\x5e"
+			  "\x9c\x6c\x8f\x9c\x45\x68\x92\xc1"
+			  "\x21\x87\x67\x04\x9f\x60\xbd\x1b"
+			  "\x6a\x84\xfc\x34\x6a\x81\x48\xfb",
+		.klen	= 32,
+		.iv	= "\x63\x3b\x8b\xb3\x37\x98\x1a\xe5"
+			  "\xbc\xa2\xbc\xc0\x43\x31\xc2\x42",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x81\xcd\xa8\x82\xbf\xd6\x6e\xf3"
+			  "\xfd\x0f\x15\x09\x40\xc3\x24\x45"
+			  "\x81\x99\xf0\x67\x63\x58\x5e\x2e"
+			  "\xfb\xa6\xa3\x16\x8d\xc8\x00\x1c"
+			  "\x4b\x62\x87\x7c\x15\x38\xda\x70"
+			  "\x3d\xea\xe7\xf2\x40\xba\xae\x79"
+			  "\x8f\x48\xfc\xbf\x45\x53\x2e\x78"
+			  "\xef\x79\xf0\x1b\x49\xf7\xfd\x9c",
+		.ilen	= 64,
+		.result	= "\x11\x7c\x7d\xef\xce\x29\x95\xec"
+			  "\x7e\x9f\x42\xa6\x26\x07\xa1\x75"
+			  "\x2f\x4e\x09\x9a\xf6\x6b\xc2\xfa"
+			  "\x0d\xd0\x17\xdc\x25\x1e\x9b\xdc"
+			  "\x5f\x8c\x1c\x60\x15\x4f\x9b\x20"
+			  "\x7b\xff\xcd\x82\x60\x84\xf4\xa5"
+			  "\x20\x9a\x05\x19\x5b\x02\x0a\x72"
+			  "\x43\x11\x26\x58\xcf\xc5\x41\xcf"
+			  "\x13\xcc\xde\x32\x92\xfa\x86\xf2"
+			  "\xaf\x16\xe8\x8f\xca\xb6\xfd\x54",
+		.rlen	= 80,
+	}, {
+		.key	= "\x90\x96\x36\xea\x03\x74\x18\x7a"
+			  "\x1d\x46\x42\x2d\x3f\x8c\x54\xc7"
+			  "\x4b\x4a\x73\x89\xa4\x00\x3f\x34"
+			  "\x2c\xb1\xdb\x0c\x44\xe0\xe8\xd2",
+		.klen	= 32,
+		.iv	= "\xa0\x5f\xc5\x52\x47\x13\xc2\x01"
+			  "\x3d\x7c\x6e\x52\x3d\x55\x85\x48",
+		.assoc	= "\xaf",
+		.alen	= 1,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x9b\xc5\x3b\x20\x0a\x88\x56\xbe"
+			  "\x69\xdf\xc4\xc4\x02\x46\x3a\xf0",
+		.rlen	= 16,
+	}, {
+		.key	= "\xcd\xbb\x70\x89\x13\xf0\xc1\x95"
+			  "\x9e\x20\xf4\xbf\x39\xb1\x17\xcd"
+			  "\x76\x0c\x7f\x0d\xa9\xa0\xc1\x4e"
+			  "\xed\xdf\xb9\xe4\x1e\x3f\x87\xa8",
+		.klen	= 32,
+		.iv	= "\xdc\x84\xfe\xf1\x58\x8f\x6b\x1c"
+			  "\xbe\x57\x20\xe3\x37\x7a\x48\x4f",
+		.assoc	= "\xeb\x4d\x8d\x59\x9c\x2e\x15\xa3"
+			  "\xde\x8d\x4d\x07\x36\x43\x78\xd0"
+			  "\x0b\x6d\x84\x4f\x2c\xf0\x82\x5b"
+			  "\x4e\xf6\x29\xd1\x8b\x6f\x56",
+		.alen	= 31,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xe0\x6d\xa1\x07\x98\x2f\x40\x2d"
+			  "\x2e\x9a\xd6\x61\x43\xc0\x74\x69",
+		.rlen	= 16,
+	}, {
+		.key	= "\x0a\xe0\xaa\x29\x24\x6c\x6a\xb1"
+			  "\x1f\xfa\xa6\x50\x33\xd5\xda\xd3"
+			  "\xa0\xce\x8a\x91\xae\x40\x43\x68"
+			  "\xae\x0d\x98\xbd\xf8\x9e\x26\x7f",
+		.klen	= 32,
+		.iv	= "\x19\xa9\x38\x91\x68\x0b\x14\x38"
+			  "\x3f\x31\xd2\x74\x31\x9e\x0a\x55",
+		.assoc	= "\x28\x72\xc7\xf8\xac\xaa\xbe\xbf"
+			  "\x5f\x67\xff\x99\x30\x67\x3b\xd6"
+			  "\x35\x2f\x90\xd3\x31\x90\x04\x74"
+			  "\x0f\x23\x08\xa9\x65\xce\xf6\xea",
+		.alen	= 32,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\xb9\x57\x13\x3e\x82\x31\x61\x65"
+			  "\x0d\x7f\x6c\x96\x93\x5c\x50\xe2",
+		.rlen	= 16,
+	}, {
+		.key	= "\x46\x04\xe3\xc8\x34\xe7\x12\xcd"
+			  "\xa0\xd4\x58\xe2\x2d\xf9\x9c\xda"
+			  "\xca\x91\x96\x15\xb4\xe0\xc5\x81"
+			  "\x70\x3a\x77\x95\xd2\xfd\xc5\x55",
+		.klen	= 32,
+		.iv	= "\x55\xcd\x72\x30\x78\x86\xbd\x54"
+			  "\xc0\x0b\x84\x06\x2b\xc2\xcd\x5b",
+		.assoc	= "\x64\x97\x00\x98\xbc\x25\x67\xdb"
+			  "\xe0\x41\xb1\x2a\x2a\x8c\xfe\xdd"
+			  "\x5f\xf2\x9c\x58\x36\x30\x86\x8e"
+			  "\xd1\x51\xe6\x81\x3f\x2d\x95\xc1"
+			  "\x01",
+		.alen	= 33,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x81\x96\x34\xde\xbb\x36\xdd\x3e"
+			  "\x4e\x5e\xcb\x44\x21\xb8\x3f\xf1",
+		.rlen	= 16,
+	}, {
+		.key	= "\x83\x29\x1d\x67\x44\x63\xbb\xe9"
+			  "\x20\xaf\x0a\x73\x27\x1e\x5f\xe0"
+			  "\xf5\x53\xa1\x9a\xb9\x80\x47\x9b"
+			  "\x31\x68\x56\x6e\xac\x5c\x65\x2c",
+		.klen	= 32,
+		.iv	= "\x92\xf2\xac\xcf\x88\x02\x65\x70"
+			  "\x41\xe5\x36\x97\x25\xe7\x90\x61",
+		.assoc	= "\xa1\xbb\x3a\x37\xcc\xa1\x10\xf7"
+			  "\x61\x1c\x63\xbc\x24\xb0\xc0\xe3"
+			  "\x8a\xb4\xa7\xdc\x3b\xd0\x08\xa8"
+			  "\x92\x7f\xc5\x5a\x19\x8c\x34\x97"
+			  "\x0f\x95\x9b\x18\xe4\x8d\xb4\x24"
+			  "\xb9\x33\x28\x18\xe1\x9d\x14\xe0"
+			  "\x64\xb2\x89\x7d\x78\xa8\x05\x7e"
+			  "\x07\x8c\xfc\x88\x2d\xb8\x53",
+		.alen	= 63,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x2e\x99\xb6\x79\x57\x56\x80\x36"
+			  "\x8e\xc4\x1c\x12\x7d\x71\x36\x0c",
+		.rlen	= 16,
+	}, {
+		.key	= "\xbf\x4e\x57\x07\x54\xdf\x64\x05"
+			  "\xa1\x89\xbc\x04\x21\x42\x22\xe6"
+			  "\x1f\x15\xad\x1e\xbe\x20\xc9\xb4"
+			  "\xf3\x95\x35\x46\x86\xbb\x04\x03",
+		.klen	= 32,
+		.iv	= "\xce\x17\xe5\x6f\x98\x7e\x0e\x8c"
+			  "\xc2\xbf\xe8\x29\x1f\x0b\x52\x68",
+		.assoc	= "\xdd\xe0\x74\xd6\xdc\x1d\xb8\x13"
+			  "\xe2\xf6\x15\x4d\x1e\xd4\x83\xe9"
+			  "\xb4\x76\xb3\x60\x40\x70\x8a\xc1"
+			  "\x53\xac\xa4\x32\xf3\xeb\xd3\x6e"
+			  "\x1e\x42\xa0\x46\x45\x9f\xc7\x22"
+			  "\xd3\x43\xbc\x7e\xa5\x47\x2a\x6f"
+			  "\x91\x19\x70\x1e\xe1\xfe\x25\x49"
+			  "\xd6\x8f\x93\xc7\x28\x3f\x3d\x03",
+		.alen	= 64,
+		.input	= "",
+		.ilen	= 0,
+		.result	= "\x7b\x25\x3d\x47\xd4\xa7\x08\xce"
+			  "\x3b\x89\x40\x36\xba\x6d\x0e\xa2",
+		.rlen	= 16,
+	}, {
+		.key	= "\xfc\x72\x90\xa6\x64\x5a\x0d\x21"
+			  "\x22\x63\x6e\x96\x1b\x67\xe4\xec"
+			  "\x49\xd7\xb9\xa2\xc3\xc0\x4b\xce"
+			  "\xb4\xc3\x14\x1e\x61\x1a\xa3\xd9",
+		.klen	= 32,
+		.iv	= "\x0b\x3c\x1f\x0e\xa8\xf9\xb7\xa7"
+			  "\x42\x9a\x9a\xba\x19\x30\x15\x6e",
+		.assoc	= "\x1a",
+		.alen	= 1,
+		.input	= "\x29",
+		.ilen	= 1,
+		.result	= "\xe6\x09\x6f\x95\x9a\x18\xc8\xf6"
+			  "\x17\x75\x81\x16\xdf\x26\xff\x67"
+			  "\x92",
+		.rlen	= 17,
+	}, {
+		.key	= "\x38\x97\xca\x45\x74\xd6\xb6\x3c"
+			  "\xa3\x3d\x20\x27\x15\x8b\xa7\xf2"
+			  "\x74\x9a\xc4\x27\xc8\x60\xcd\xe8"
+			  "\x75\xf0\xf2\xf7\x3b\x79\x42\xb0",
+		.klen	= 32,
+		.iv	= "\x47\x60\x59\xad\xb8\x75\x60\xc3"
+			  "\xc3\x74\x4c\x4c\x13\x54\xd8\x74",
+		.assoc	= "\x56\x29\xe7\x15\xfc\x14\x0a\x4a"
+			  "\xe4\xaa\x79\x70\x12\x1d\x08\xf6"
+			  "\x09\xfb\xca\x69\x4b\xb0\x8e\xf5"
+			  "\xd6\x07\x62\xe3\xa8\xa9\x12",
+		.alen	= 31,
+		.input	= "\x66\xf3\x75\x7d\x40\xb3\xb4\xd1"
+			  "\x04\xe1\xa6\x94\x10\xe6\x39\x77"
+			  "\xd3\xac\x4d\x8a\x8c\x58\x6e\xfb"
+			  "\x06\x13\x9a\xd9\x5e\xc0\xfa",
+		.ilen	= 31,
+		.result	= "\x82\xc0\x56\xf0\xd7\xc4\xc9\xfd"
+			  "\x3c\xd1\x2a\xd4\x15\x86\x9d\xda"
+			  "\xea\x6c\x6f\xa1\x33\xb0\x7a\x01"
+			  "\x57\xe7\xf3\x7b\x73\xe7\x54\x10"
+			  "\xc6\x91\xe2\xc6\xa0\x69\xe7\xe6"
+			  "\x76\xc3\xf5\x3a\x76\xfd\x4a",
+		.rlen	= 47,
+	}, {
+		.key	= "\x75\xbc\x04\xe5\x84\x52\x5e\x58"
+			  "\x24\x17\xd2\xb9\x0e\xaf\x6a\xf9"
+			  "\x9e\x5c\xd0\xab\xcd\x00\x4f\x01"
+			  "\x37\x1e\xd1\xcf\x15\xd8\xe2\x86",
+		.klen	= 32,
+		.iv	= "\x84\x85\x92\x4d\xc8\xf1\x08\xdf"
+			  "\x44\x4e\xff\xdd\x0d\x78\x9a\x7a",
+		.assoc	= "\x93\x4e\x21\xb4\x0c\x90\xb3\x66"
+			  "\x65\x84\x2b\x01\x0b\x42\xcb\xfc"
+			  "\x33\xbd\xd6\xed\x50\x50\x10\x0e"
+			  "\x97\x35\x41\xbb\x82\x08\xb1\xf2",
+		.alen	= 32,
+		.input	= "\xa2\x17\xaf\x1c\x50\x2e\x5d\xed"
+			  "\x85\xbb\x58\x26\x0a\x0b\xfc\x7d"
+			  "\xfe\x6e\x59\x0e\x91\xf8\xf0\x15"
+			  "\xc8\x40\x78\xb1\x38\x1f\x99\xa7",
+		.ilen	= 32,
+		.result	= "\x01\x47\x8e\x6c\xf6\x64\x89\x3a"
+			  "\x71\xce\xe4\xaa\x45\x70\xe6\x84"
+			  "\x62\x48\x08\x64\x86\x6a\xdf\xec"
+			  "\xb4\xa0\xfb\x34\x03\x0c\x19\xf4"
+			  "\x2b\x7b\x36\x73\xec\x54\xa9\x1e"
+			  "\x30\x85\xdb\xe4\xac\xe9\x2c\xca",
+		.rlen	= 48,
+	}, {
+		.key	= "\xb1\xe1\x3e\x84\x94\xcd\x07\x74"
+			  "\xa5\xf2\x84\x4a\x08\xd4\x2c\xff"
+			  "\xc8\x1e\xdb\x2f\xd2\xa0\xd1\x1b"
+			  "\xf8\x4c\xb0\xa8\xef\x37\x81\x5d",
+		.klen	= 32,
+		.iv	= "\xc0\xaa\xcc\xec\xd8\x6c\xb1\xfb"
+			  "\xc5\x28\xb1\x6e\x07\x9d\x5d\x81",
+		.assoc	= "\xd0\x73\x5a\x54\x1d\x0b\x5b\x82"
+			  "\xe5\x5f\xdd\x93\x05\x66\x8e\x02"
+			  "\x5e\x80\xe1\x71\x55\xf0\x92\x28"
+			  "\x59\x62\x20\x94\x5c\x67\x50\xc8"
+			  "\x58",
+		.alen	= 33,
+		.input	= "\xdf\x3c\xe9\xbc\x61\xaa\x06\x09"
+			  "\x06\x95\x0a\xb7\x04\x2f\xbe\x84"
+			  "\x28\x30\x64\x92\x96\x98\x72\x2e"
+			  "\x89\x6e\x57\x8a\x13\x7e\x38\x7e"
+			  "\xdb",
+		.ilen	= 33,
+		.result	= "\x85\xe0\xf8\x0f\x8e\x49\xe3\x60"
+			  "\xcb\x4a\x54\x94\xcf\xf5\x7e\x34"
+			  "\xe9\xf8\x80\x65\x53\xd0\x72\x70"
+			  "\x4f\x7d\x9d\xd1\x15\x6f\xb9\x2c"
+			  "\xfa\xe8\xdd\xac\x2e\xe1\x3f\x67"
+			  "\x63\x0f\x1a\x59\xb7\x89\xdb\xf4"
+			  "\xc3",
+		.rlen	= 49,
+	}, {
+		.key	= "\xee\x05\x77\x23\xa5\x49\xb0\x90"
+			  "\x26\xcc\x36\xdc\x02\xf8\xef\x05"
+			  "\xf3\xe1\xe7\xb3\xd8\x40\x53\x35"
+			  "\xb9\x79\x8f\x80\xc9\x96\x20\x33",
+		.klen	= 32,
+		.iv	= "\xfd\xce\x06\x8b\xe9\xe8\x5a\x17"
+			  "\x46\x02\x63\x00\x01\xc1\x20\x87",
+		.assoc	= "\x0c\x98\x94\xf3\x2d\x87\x04\x9e"
+			  "\x66\x39\x8f\x24\xff\x8a\x50\x08"
+			  "\x88\x42\xed\xf6\x5a\x90\x14\x42"
+			  "\x1a\x90\xfe\x6c\x36\xc6\xf0\x9f"
+			  "\x66\xa0\xb5\x2d\x2c\xf8\x25\x15"
+			  "\x55\x90\xa2\x7e\x77\x94\x96\x3a"
+			  "\x71\x1c\xf7\x44\xee\xa8\xc3\x42"
+			  "\xe2\xa3\x84\x04\x0b\xe1\xce",
+		.alen	= 63,
+		.input	= "\x1b\x61\x23\x5b\x71\x26\xae\x25"
+			  "\x87\x6f\xbc\x49\xfe\x53\x81\x8a"
+			  "\x53\xf2\x70\x17\x9b\x38\xf4\x48"
+			  "\x4b\x9b\x36\x62\xed\xdd\xd8\x54"
+			  "\xea\xcb\xb6\x79\x45\xfc\xaa\x54"
+			  "\x5c\x94\x47\x58\xa7\xff\x9c\x9e"
+			  "\x7c\xb6\xf1\xac\xc8\xfd\x8b\x35"
+			  "\xd5\xa4\x6a\xd4\x09\xc2\x08",
+		.ilen	= 63,
+		.result	= "\x00\xe5\x5b\x87\x5c\x20\x22\x8a"
+			  "\xda\x1f\xd3\xff\xbb\xb2\xb0\xf8"
+			  "\xef\xe9\xeb\x9e\x7c\x80\xf4\x2b"
+			  "\x59\xc0\x79\xbc\x17\xa0\x15\x01"
+			  "\xf5\x72\xfb\x5a\xe7\xaf\x07\xe3"
+			  "\x1b\x49\x21\x34\x23\x63\x55\x5e"
+			  "\xee\x4f\x34\x17\xfa\xfe\xa5\x0c"
+			  "\xed\x0b\x23\xea\x9b\xda\x57\x2f"
+			  "\xf6\xa9\xae\x0d\x4e\x40\x96\x45"
+			  "\x7f\xfa\xf0\xbf\xc4\x98\x78",
+		.rlen	= 79,
+	}, {
+		.key	= "\x2a\x2a\xb1\xc3\xb5\xc5\x59\xac"
+			  "\xa7\xa6\xe8\x6d\xfc\x1d\xb2\x0b"
+			  "\x1d\xa3\xf3\x38\xdd\xe0\xd5\x4e"
+			  "\x7b\xa7\x6e\x58\xa3\xf5\xbf\x0a",
+		.klen	= 32,
+		.iv	= "\x39\xf3\x3f\x2b\xf9\x64\x03\x33"
+			  "\xc7\xdd\x15\x91\xfb\xe6\xe2\x8d",
+		.assoc	= "\x49\xbc\xce\x92\x3d\x02\xad\xba"
+			  "\xe7\x13\x41\xb6\xf9\xaf\x13\x0f"
+			  "\xb2\x04\xf8\x7a\x5f\x30\x96\x5b"
+			  "\xdc\xbd\xdd\x44\x10\x25\x8f\x75"
+			  "\x75\x4d\xb9\x5b\x8e\x0a\x38\x13"
+			  "\x6f\x9f\x36\xe4\x3a\x3e\xac\xc9"
+			  "\x9d\x83\xde\xe5\x57\xfd\xe3\x0e"
+			  "\xb1\xa7\x1b\x44\x05\x67\xb7\x37",
+		.alen	= 64,
+		.input	= "\x58\x85\x5c\xfa\x81\xa1\x57\x40"
+			  "\x08\x4a\x6e\xda\xf8\x78\x44\x90"
+			  "\x7d\xb5\x7b\x9b\xa1\xd8\x76\x62"
+			  "\x0c\xc9\x15\x3b\xc7\x3c\x77\x2b"
+			  "\xf8\x78\xba\xa7\xa6\x0e\xbd\x52"
+			  "\x76\xa3\xdc\xbe\x6b\xa8\xb1\x2d"
+			  "\xa9\x1d\xd8\x4e\x31\x53\xab\x00"
+			  "\xa5\xa7\x01\x13\x04\x49\xf2\x04",
+		.ilen	= 64,
+		.result	= "\x28\xdd\xb9\x4a\x12\xc7\x0a\xe1"
+			  "\x58\x06\x1a\x9b\x8c\x67\xdf\xeb"
+			  "\x35\x35\x60\x9d\x06\x40\x65\xc1"
+			  "\x93\xe8\xb3\x82\x50\x29\xdd\xb5"
+			  "\x2b\xcb\xde\x18\x78\x6b\x42\xbe"
+			  "\x6d\x24\xd0\xb2\x7d\xd7\x08\x8f"
+			  "\x4a\x18\x98\xad\x8c\xf2\x97\xb4"
+			  "\xf4\x77\xe4\xbf\x41\x3b\xc4\x06"
+			  "\xce\x9e\x34\x81\xf0\x89\x11\x13"
+			  "\x02\x65\xa1\x7c\xdf\x07\x33\x06",
+		.rlen	= 80,
+	}, {
+		.key	= "\x67\x4f\xeb\x62\xc5\x40\x01\xc7"
+			  "\x28\x80\x9a\xfe\xf6\x41\x74\x12"
+			  "\x48\x65\xfe\xbc\xe2\x80\x57\x68"
+			  "\x3c\xd4\x4d\x31\x7d\x54\x5f\xe1",
+		.klen	= 32,
+		.iv	= "\x76\x18\x79\xca\x09\xdf\xac\x4e"
+			  "\x48\xb7\xc7\x23\xf5\x0a\xa5\x93",
+		.assoc	= "\x85\xe1\x08\x32\x4d\x7e\x56\xd5"
+			  "\x68\xed\xf3\x47\xf3\xd3\xd6\x15"
+			  "\xdd\xc7\x04\xfe\x64\xd0\x18\x75"
+			  "\x9d\xeb\xbc\x1d\xea\x84\x2e\x4c"
+			  "\x83\xf9\xbe\x8a\xef\x1c\x4b\x10"
+			  "\x89\xaf\xcb\x4b\xfe\xe7\xc1\x58"
+			  "\xca\xea\xc6\x87\xc0\x53\x03\xd9"
+			  "\x80\xaa\xb2\x83\xff\xee\xa1\x6a"
+			  "\x04",
+		.alen	= 65,
+		.input	= "\x94\xaa\x96\x9a\x91\x1d\x00\x5c"
+			  "\x88\x24\x20\x6b\xf2\x9c\x06\x96"
+			  "\xa7\x77\x87\x1f\xa6\x78\xf8\x7b"
+			  "\xcd\xf6\xf4\x13\xa1\x9b\x16\x02"
+			  "\x07\x24\xbf\xd5\x08\x20\xd0\x4f"
+			  "\x90\xb3\x70\x24\x2f\x51\xc7\xbb"
+			  "\xd6\x84\xc0\xef\x9a\xa8\xca\xcc"
+			  "\x74\xab\x97\x53\xfe\xd0\xdb\x37"
+			  "\x37\x6a\x0e\x9f\x3f\xa3\x2a\xe3"
+			  "\x1b\x34\x6d\x51\x72\x2b\x17\xe7"
+			  "\x4d\xaa\x2c\x18\xda\xa3\x33\x89"
+			  "\x2a\x9f\xf4\xd2\xed\x76\x3d\x3f"
+			  "\x3c\x15\x9d\x8e\x4f\x3c\x27\xb0"
+			  "\x42\x3f\x2f\x8a\xd4\xc2\x10\xb2"
+			  "\x27\x7f\xe3\x34\x80\x02\x49\x4b"
+			  "\x07\x68\x22\x2a\x88\x25\x53\xb2"
+			  "\x2f",
+		.ilen	= 129,
+		.result	= "\x85\x39\x69\x35\xfb\xf9\xb0\xa6"
+			  "\x85\x43\x88\xd0\xd7\x78\x60\x19"
+			  "\x3e\x1f\xb1\xa4\xd6\xc5\x96\xec"
+			  "\xf7\x84\x85\xc7\x27\x0f\x74\x57"
+			  "\x28\x9e\xdd\x90\x3c\x43\x12\xc5"
+			  "\x51\x3d\x39\x8f\xa5\xf4\xe0\x0b"
+			  "\x57\x04\xf1\x6d\xfe\x9b\x84\x27"
+			  "\xe8\xeb\x4d\xda\x02\x0a\xc5\x49"
+			  "\x1a\x55\x5e\x50\x56\x4d\x94\xda"
+			  "\x20\xf8\x12\x54\x50\xb3\x11\xda"
+			  "\xed\x44\x27\x67\xd5\xd1\x8b\x4b"
+			  "\x38\x67\x56\x65\x59\xda\xe6\x97"
+			  "\x81\xae\x2f\x92\x3b\xae\x22\x1c"
+			  "\x91\x59\x38\x18\x00\xe8\xba\x92"
+			  "\x04\x19\x56\xdf\xb0\x82\xeb\x6f"
+			  "\x2e\xdb\x54\x3c\x4b\xbb\x60\x90"
+			  "\x4c\x50\x10\x62\xba\x7a\xb1\x68"
+			  "\x37\xd7\x87\x4e\xe4\x66\x09\x1f"
+			  "\xa5",
+		.rlen	= 145,
+	}, {
+		.key	= "\xa3\x73\x24\x01\xd5\xbc\xaa\xe3"
+			  "\xa9\x5a\x4c\x90\xf0\x65\x37\x18"
+			  "\x72\x28\x0a\x40\xe7\x20\xd9\x82"
+			  "\xfe\x02\x2b\x09\x57\xb3\xfe\xb7",
+		.klen	= 32,
+		.iv	= "\xb3\x3d\xb3\x69\x19\x5b\x54\x6a"
+			  "\xc9\x91\x79\xb4\xef\x2e\x68\x99",
+		.assoc	= "\xc2\x06\x41\xd1\x5d\xfa\xff\xf1"
+			  "\xe9\xc7\xa5\xd9\xed\xf8\x98\x1b"
+			  "\x07\x89\x10\x82\x6a\x70\x9a\x8f"
+			  "\x5e\x19\x9b\xf5\xc5\xe3\xcd\x22"
+			  "\x92\xa5\xc2\xb8\x51\x2e\x5e\x0e"
+			  "\xa4\xbe\x5f\xb1\xc1\x90\xd7\xe7"
+			  "\xf7\x52\xae\x28\x29\xa8\x22\xa4"
+			  "\x4f\xae\x48\xc2\xfa\x75\x8b\x9e"
+			  "\xce\x83\x2a\x88\x07\x55\xbb\x89"
+			  "\xf6\xdf\xac\xdf\x83\x08\xbf\x7d"
+			  "\xac\x30\x8b\x8e\x02\xac\x00\xf1"
+			  "\x30\x46\xe1\xbc\x75\xbf\x49\xbb"
+			  "\x26\x4e\x29\xf0\x2f\x21\xc6\x13"
+			  "\x92\xd9\x3d\x11\xe4\x10\x00\x8e"
+			  "\xd4\xd4\x58\x65\xa6\x2b\xe3\x25"
+			  "\xb1\x8f\x15\x93\xe7\x71\xb9\x2c"
+			  "\x4b",
+		.alen	= 129,
+		.input	= "\xd1\xcf\xd0\x39\xa1\x99\xa9\x78"
+			  "\x09\xfe\xd2\xfd\xec\xc1\xc9\x9d"
+			  "\xd2\x39\x93\xa3\xab\x18\x7a\x95"
+			  "\x8f\x24\xd3\xeb\x7b\xfa\xb5\xd8"
+			  "\x15\xd1\xc3\x04\x69\x32\xe3\x4d"
+			  "\xaa\xc2\x04\x8b\xf2\xfa\xdc\x4a"
+			  "\x02\xeb\xa8\x90\x03\xfd\xea\x97"
+			  "\x43\xaf\x2e\x92\xf8\x57\xc5\x6a"
+			  "\x00",
+		.ilen	= 65,
+		.result	= "\x7d\xde\x53\x22\xe4\x23\x3b\x30"
+			  "\x78\xde\x35\x90\x7a\xd9\x0b\x93"
+			  "\xf6\x0e\x0b\xed\x40\xee\x10\x9c"
+			  "\x96\x3a\xd3\x34\xb2\xd0\x67\xcf"
+			  "\x63\x7f\x2d\x0c\xcf\x96\xec\x64"
+			  "\x1a\x87\xcc\x7d\x2c\x5e\x81\x4b"
+			  "\xd2\x8f\x4c\x7c\x00\xb1\xb4\xe0"
+			  "\x87\x4d\xb1\xbc\xd8\x78\x2c\x17"
+			  "\xf2\x3b\xd8\x28\x40\xe2\x76\xf6"
+			  "\x20\x13\x83\x46\xaf\xff\xe3\x0f"
+			  "\x72",
+		.rlen	= 81,
+	}, {
+		.key	= "\xe0\x98\x5e\xa1\xe5\x38\x53\xff"
+			  "\x2a\x35\xfe\x21\xea\x8a\xfa\x1e"
+			  "\x9c\xea\x15\xc5\xec\xc0\x5b\x9b"
+			  "\xbf\x2f\x0a\xe1\x32\x12\x9d\x8e",
+		.klen	= 32,
+		.iv	= "\xef\x61\xed\x08\x29\xd7\xfd\x86"
+			  "\x4a\x6b\x2b\x46\xe9\x53\x2a\xa0",
+		.assoc	= "\xfe\x2a\x7b\x70\x6d\x75\xa7\x0d"
+			  "\x6a\xa2\x57\x6a\xe7\x1c\x5b\x21"
+			  "\x31\x4b\x1b\x07\x6f\x10\x1c\xa8"
+			  "\x20\x46\x7a\xce\x9f\x42\x6d\xf9",
+		.alen	= 32,
+		.input	= "\x0d\xf4\x09\xd8\xb1\x14\x51\x94"
+			  "\x8a\xd8\x84\x8e\xe6\xe5\x8c\xa3"
+			  "\xfc\xfc\x9e\x28\xb0\xb8\xfc\xaf"
+			  "\x50\x52\xb1\xc4\x55\x59\x55\xaf",
+		.ilen	= 32,
+		.result	= "\x5a\xcd\x8c\x57\xf2\x6a\xb6\xbe"
+			  "\x53\xc7\xaa\x9a\x60\x74\x9c\xc4"
+			  "\xa2\xc2\xd0\x6d\xe1\x03\x63\xdc"
+			  "\xbb\x51\x7e\x9c\x89\x73\xde\x4e"
+			  "\x24\xf8\x52\x7c\x15\x41\x0e\xba"
+			  "\x69\x0e\x36\x5f\x2f\x22\x8c",
+		.rlen	= 47,
+	}, {
+		.key	= "\x1c\xbd\x98\x40\xf5\xb3\xfc\x1b"
+			  "\xaa\x0f\xb0\xb3\xe4\xae\xbc\x24"
+			  "\xc7\xac\x21\x49\xf1\x60\xdd\xb5"
+			  "\x80\x5d\xe9\xba\x0c\x71\x3c\x64",
+		.klen	= 32,
+		.iv	= "\x2c\x86\x26\xa8\x39\x52\xa6\xa2"
+			  "\xcb\x45\xdd\xd7\xe3\x77\xed\xa6",
+		.assoc	= "\x3b\x4f\xb5\x10\x7d\xf1\x50\x29"
+			  "\xeb\x7c\x0a\xfb\xe1\x40\x1e\x27"
+			  "\x5c\x0d\x27\x8b\x74\xb0\x9e\xc2"
+			  "\xe1\x74\x59\xa6\x79\xa1\x0c\xd0",
+		.alen	= 32,
+		.input	= "\x4a\x18\x43\x77\xc1\x90\xfa\xb0"
+			  "\x0b\xb2\x36\x20\xe0\x09\x4e\xa9"
+			  "\x26\xbe\xaa\xac\xb5\x58\x7e\xc8"
+			  "\x11\x7f\x90\x9c\x2f\xb8\xf4\x85",
+		.ilen	= 32,
+		.result	= "\x47\xd6\xce\x78\xd6\xbf\x4a\x51"
+			  "\xb8\xda\x92\x3c\xfd\xda\xac\x8e"
+			  "\x8d\x88\xd7\x4d\x90\xe5\xeb\xa1"
+			  "\xab\xd6\x7c\x76\xad\xea\x7d\x76"
+			  "\x53\xee\xb0\xcd\xd0\x02\xbb\x70"
+			  "\x5b\x6f\x7b\xe2\x8c\xe8",
+		.rlen	= 46,
+	}, {
+		.key	= "\x59\xe1\xd2\xdf\x05\x2f\xa4\x37"
+			  "\x2b\xe9\x63\x44\xde\xd3\x7f\x2b"
+			  "\xf1\x6f\x2d\xcd\xf6\x00\x5f\xcf"
+			  "\x42\x8a\xc8\x92\xe6\xd0\xdc\x3b",
+		.klen	= 32,
+		.iv	= "\x68\xab\x60\x47\x49\xce\x4f\xbe"
+			  "\x4c\x20\x8f\x68\xdd\x9c\xb0\xac",
+		.assoc	= "\x77\x74\xee\xaf\x8d\x6d\xf9\x45"
+			  "\x6c\x56\xbc\x8d\xdb\x65\xe0\x2e"
+			  "\x86\xd0\x32\x0f\x79\x50\x20\xdb"
+			  "\xa2\xa1\x37\x7e\x53\x00\xab\xa6",
+		.alen	= 32,
+		.input	= "\x86\x3d\x7d\x17\xd1\x0c\xa3\xcc"
+			  "\x8c\x8d\xe8\xb1\xda\x2e\x11\xaf"
+			  "\x51\x80\xb5\x30\xba\xf8\x00\xe2"
+			  "\xd3\xad\x6f\x75\x09\x18\x93\x5c",
+		.ilen	= 32,
+		.result	= "\x9f\xa9\x2b\xa4\x8f\x00\x05\x2b"
+			  "\xe7\x68\x81\x51\xbb\xfb\xdf\x60"
+			  "\xbb\xac\xe8\xc1\xdc\x68\xae\x68"
+			  "\x3a\xcd\x7a\x06\x49\xfe\x80\x11"
+			  "\xe6\x61\x99\xe2\xdd\xbe\x2c\xbf",
+		.rlen	= 40,
+	}, {
+		.key	= "\x96\x06\x0b\x7f\x15\xab\x4d\x53"
+			  "\xac\xc3\x15\xd6\xd8\xf7\x42\x31"
+			  "\x1b\x31\x38\x51\xfc\xa0\xe1\xe8"
+			  "\x03\xb8\xa7\x6b\xc0\x2f\x7b\x11",
+		.klen	= 32,
+		.iv	= "\xa5\xcf\x9a\xe6\x59\x4a\xf7\xd9"
+			  "\xcd\xfa\x41\xfa\xd7\xc0\x72\xb2",
+		.assoc	= "\xb4\x99\x28\x4e\x9d\xe8\xa2\x60"
+			  "\xed\x30\x6e\x1e\xd5\x89\xa3\x34"
+			  "\xb1\x92\x3e\x93\x7e\xf0\xa2\xf5"
+			  "\x64\xcf\x16\x57\x2d\x5f\x4a\x7d",
+		.alen	= 32,
+		.input	= "\xc3\x62\xb7\xb6\xe2\x87\x4c\xe7"
+			  "\x0d\x67\x9a\x43\xd4\x52\xd4\xb5"
+			  "\x7b\x43\xc1\xb5\xbf\x98\x82\xfc"
+			  "\x94\xda\x4e\x4d\xe4\x77\x32\x32",
+		.ilen	= 32,
+		.result	= "\xe2\x34\xfa\x25\xfd\xfb\x89\x5e"
+			  "\x5b\x4e\x0b\x15\x6e\x39\xfb\x0c"
+			  "\x73\xc7\xd9\x6b\xbe\xce\x9b\x70"
+			  "\xc7\x4f\x96\x16\x03\xfc\xea\xfb"
+			  "\x56",
+		.rlen	= 33,
+	},
+};
+
+static const struct aead_testvec morus1280_dec_tv_template[] = {
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 16,
+		.iv	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
+			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x91\x85\x0f\xf5\x52\x9e\xce\xce"
+			  "\x65\x99\xc7\xbf\xd3\x76\xe8\x98",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x3c\x24\x39\x9f\x10\x7b\xa8\x1b"
+			  "\x80\xda\xb2\x91\xf9\x24\xc2\x06",
+		.klen	= 16,
+		.iv	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
+			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x88\xc3\x4c\xf0\x2f\x43\x76\x13"
+			  "\x96\xda\x76\x34\x33\x4e\xd5\x39"
+			  "\x73",
+		.ilen	= 17,
+		.result	= "\x69",
+		.rlen	= 1,
+	}, {
+		.key	= "\x79\x49\x73\x3e\x20\xf7\x51\x37"
+			  "\x01\xb4\x64\x22\xf3\x48\x85\x0c",
+		.klen	= 16,
+		.iv	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
+			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x3e\x5c\x3b\x58\x3b\x7d\x2a\x22"
+			  "\x75\x0b\x24\xa6\x0e\xc3\xde\x52"
+			  "\x97\x0b\x64\xd4\xce\x90\x52\xf7"
+			  "\xef\xdb\x6a\x38\xd2\xa8\xa1\x0d"
+			  "\xe0\x61\x33\x24\xc6\x4d\x51\xbc"
+			  "\xa4\x21\x74\xcf\x19\x16\x59",
+		.ilen	= 47,
+		.result	= "\xa6\xa4\x1e\x76\xec\xd4\x50\xcc"
+			  "\x62\x58\xe9\x8f\xef\xa4\x17\x91"
+			  "\xb4\x96\x9f\x6b\xce\x38\xa5\x46"
+			  "\x13\x7d\x64\x93\xd7\x05\xf5",
+		.rlen	= 31,
+	}, {
+		.key	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
+			  "\x82\x8e\x16\xb4\xed\x6d\x47\x12",
+		.klen	= 16,
+		.iv	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
+			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x30\x82\x9c\x2b\x67\xcb\xf9\x1f"
+			  "\xde\x9f\x77\xb2\xda\x92\x61\x5c"
+			  "\x09\x0b\x2d\x9a\x26\xaa\x1c\x06"
+			  "\xab\x74\xb7\x2b\x95\x5f\x9f\xa1"
+			  "\x9a\xff\x50\xa0\xa2\xff\xc5\xad"
+			  "\x21\x8e\x84\x5c\x12\x61\xb2\xae",
+		.ilen	= 48,
+		.result	= "\xe2\xc9\x58\x15\xfc\x4f\xf8\xe8"
+			  "\xe3\x32\x9b\x21\xe9\xc8\xd9\x97"
+			  "\xde\x58\xab\xf0\xd3\xd8\x27\x60"
+			  "\xd5\xaa\x43\x6b\xb1\x64\x95\xa4",
+		.rlen	= 32,
+	}, {
+		.key	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
+			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
+		.klen	= 16,
+		.iv	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
+			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x67\x5d\x8e\x45\xc8\x39\xf5\x17"
+			  "\xc1\x1d\x2a\xdd\x88\x67\xda\x1f"
+			  "\x6d\xe8\x37\x28\x5a\xc1\x5e\x9f"
+			  "\xa6\xec\xc6\x92\x05\x4b\xc0\xa3"
+			  "\x63\xef\x88\xa4\x9b\x0a\x5c\xed"
+			  "\x2b\x6a\xac\x63\x52\xaa\x10\x94"
+			  "\xd0",
+		.ilen	= 49,
+		.result	= "\x1f\xee\x92\xb4\x0c\xcb\xa1\x04"
+			  "\x64\x0c\x4d\xb2\xe3\xec\x9c\x9d"
+			  "\x09\x1a\xb7\x74\xd8\x78\xa9\x79"
+			  "\x96\xd8\x22\x43\x8c\xc3\x34\x7b"
+			  "\xc4",
+		.rlen	= 33,
+	}, {
+		.key	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
+			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f",
+		.klen	= 16,
+		.iv	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
+			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x7d\x61\x1a\x35\x20\xcc\x07\x88"
+			  "\x03\x98\x87\xcf\xc0\x6e\x4d\x19"
+			  "\xe3\xd4\x0b\xfb\x29\x8f\x49\x1a"
+			  "\x3a\x06\x77\xce\x71\x2c\xcd\xdd"
+			  "\xed\xf6\xc9\xbe\xa6\x3b\xb8\xfc"
+			  "\x6c\xbe\x77\xed\x74\x0e\x20\x85"
+			  "\xd0\x65\xde\x24\x6f\xe3\x25\xc5"
+			  "\xdf\x5b\x0f\xbd\x8a\x88\x78\xc9"
+			  "\xe5\x81\x37\xde\x84\x7a\xf6\x84"
+			  "\x99\x7a\x72\x9c\x54\x31\xa1",
+		.ilen	= 79,
+		.result	= "\x5c\x13\xcb\x54\x1c\x47\x4a\x1f"
+			  "\xe5\xe6\xff\x44\xdd\x11\x5f\xa3"
+			  "\x33\xdd\xc2\xf8\xdd\x18\x2b\x93"
+			  "\x57\x05\x01\x1c\x66\x22\xd3\x51"
+			  "\xd3\xdf\x18\xc9\x30\x66\xed\xb1"
+			  "\x96\x58\xd5\x8c\x64\x8c\x7c\xf5"
+			  "\x01\xd0\x74\x5f\x9b\xaa\xf6\xd1"
+			  "\xe6\x16\xa2\xac\xde\x47\x40",
+		.rlen	= 63,
+	}, {
+		.key	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
+			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25",
+		.klen	= 16,
+		.iv	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
+			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x05\xc5\xb1\xf9\x1b\xb9\xab\x2c"
+			  "\xa5\x07\x12\xa7\x12\x39\x60\x66"
+			  "\x30\x81\x4a\x03\x78\x28\x45\x52"
+			  "\xd2\x2b\x24\xfd\x8b\xa5\xb7\x66"
+			  "\x6f\x45\xd7\x3b\x67\x6f\x51\xb9"
+			  "\xc0\x3d\x6c\xca\x1e\xae\xff\xb6"
+			  "\x79\xa9\xe4\x82\x5d\x4c\x2d\xdf"
+			  "\xeb\x71\x40\xc9\x2c\x40\x45\x6d"
+			  "\x73\x77\x01\xf3\x4f\xf3\x9d\x2a"
+			  "\x5d\x57\xa8\xa1\x18\xa2\xad\xcb",
+		.ilen	= 80,
+		.result	= "\x98\x37\x05\xf3\x2c\xc2\xf3\x3b"
+			  "\x66\xc0\xb1\xd5\xd7\x35\x21\xaa"
+			  "\x5d\x9f\xce\x7c\xe2\xb8\xad\xad"
+			  "\x19\x33\xe0\xf4\x40\x81\x72\x28"
+			  "\xe1\x8b\x1c\xf8\x91\x78\xff\xaf"
+			  "\xb0\x68\x69\xf2\x27\x35\x91\x84"
+			  "\x2e\x37\x5b\x00\x04\xff\x16\x9c"
+			  "\xb5\x19\x39\xeb\xd9\xcd\x29\x9a",
+		.rlen	= 64,
+	}, {
+		.key	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
+			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b",
+		.klen	= 16,
+		.iv	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
+			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
+		.assoc	= "\xc5",
+		.alen	= 1,
+		.input	= "\x4d\xbf\x11\xac\x7f\x97\x0b\x2e"
+			  "\x89\x3b\x9d\x0f\x83\x1c\x08\xc3",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xe4\x25\xcd\xfa\x80\xdd\x46\xde"
+			  "\x07\xd1\x90\x8b\xcf\x23\x15\x31",
+		.klen	= 16,
+		.iv	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
+			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
+		.assoc	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
+			  "\x47\x3e\xe9\xd4\xcc\xb5\x76\x34"
+			  "\xe8\x73\x62\x64\xab\x50\xd0\xda"
+			  "\x6b\x83\x66\xaf\x3e\x27\xc9",
+		.alen	= 31,
+		.input	= "\x5b\xc0\x8d\x54\xe4\xec\xbe\x38"
+			  "\x03\x12\xf9\xcc\x9e\x46\x42\x92",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x20\x4a\x07\x99\x91\x58\xee\xfa"
+			  "\x88\xab\x42\x1c\xc9\x47\xd7\x38",
+		.klen	= 16,
+		.iv	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
+			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
+		.assoc	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
+			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b"
+			  "\x12\x35\x6e\xe8\xb0\xf0\x52\xf3"
+			  "\x2d\xb0\x45\x87\x18\x86\x68\xf6",
+		.alen	= 32,
+		.input	= "\x48\xc5\xc3\x4c\x40\x2e\x2f\xc2"
+			  "\x6d\x65\xe0\x67\x9c\x1d\xa0\xf0",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x5d\x6f\x41\x39\xa1\xd4\x97\x16"
+			  "\x09\x85\xf4\xae\xc3\x6b\x9a\x3e",
+		.klen	= 16,
+		.iv	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
+			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
+		.assoc	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
+			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41"
+			  "\x3c\xf8\x79\x6c\xb6\x90\xd4\x0d"
+			  "\xee\xde\x23\x60\xf2\xe5\x08\xcc"
+			  "\x97",
+		.alen	= 33,
+		.input	= "\x28\x64\x78\x51\x55\xd8\x56\x4a"
+			  "\x58\x3e\xf7\xbe\xee\x21\xfe\x94",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x99\x93\x7a\xd8\xb1\x50\x40\x31"
+			  "\x8a\x60\xa6\x3f\xbd\x90\x5d\x44",
+		.klen	= 16,
+		.iv	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
+			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
+		.assoc	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
+			  "\xca\xcd\xff\x88\xba\x22\xbe\x47"
+			  "\x67\xba\x85\xf1\xbb\x30\x56\x26"
+			  "\xaf\x0b\x02\x38\xcc\x44\xa7\xa3"
+			  "\xa6\xbf\x31\x93\x60\xcd\xda\x63"
+			  "\x2c\xb1\xaa\x19\xc8\x19\xf8\xeb"
+			  "\x03\xa1\xe8\xbe\x37\x54\xec\xa2"
+			  "\xcd\x2c\x45\x58\xbd\x8e\x80",
+		.alen	= 63,
+		.input	= "\xb3\xa6\x00\x4e\x09\x20\xac\x21"
+			  "\x77\x72\x69\x76\x2d\x36\xe5\xc8",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xd6\xb8\xb4\x77\xc1\xcb\xe9\x4d"
+			  "\x0a\x3a\x58\xd1\xb7\xb4\x1f\x4a",
+		.klen	= 16,
+		.iv	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
+			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
+		.assoc	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
+			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d"
+			  "\x91\x7c\x91\x75\xc0\xd0\xd8\x40"
+			  "\x71\x39\xe1\x10\xa6\xa3\x46\x7a"
+			  "\xb4\x6b\x35\xc2\xc1\xdf\xed\x60"
+			  "\x46\xc1\x3e\x7f\x8c\xc2\x0e\x7a"
+			  "\x30\x08\xd0\x5f\xa0\xaa\x0c\x6d"
+			  "\x9c\x2f\xdb\x97\xb8\x15\x69\x01",
+		.alen	= 64,
+		.input	= "\x65\x33\x7b\xa1\x63\xf4\x20\xdd"
+			  "\xe4\xb9\x4a\xaa\x9a\x21\xaa\x14",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x12\xdd\xee\x17\xd1\x47\x92\x69"
+			  "\x8b\x14\x0a\x62\xb1\xd9\xe2\x50",
+		.klen	= 16,
+		.iv	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
+			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
+		.assoc	= "\x31",
+		.alen	= 1,
+		.input	= "\x1d\x47\x17\x34\x86\xf5\x54\x1a"
+			  "\x6d\x28\xb8\x5d\x6c\xcf\xa0\xb9"
+			  "\xbf",
+		.ilen	= 17,
+		.result	= "\x40",
+		.rlen	= 1,
+	}, {
+		.key	= "\x4f\x01\x27\xb6\xe1\xc3\x3a\x85"
+			  "\x0c\xee\xbc\xf4\xab\xfd\xa5\x57",
+		.klen	= 16,
+		.iv	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
+			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
+		.assoc	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
+			  "\x4d\x5b\x15\x3c\xa8\x8f\x06\x5a"
+			  "\xe6\x01\xa8\x7e\xca\x10\xdc\x73"
+			  "\xf4\x94\x9f\xc1\x5a\x61\x85",
+		.alen	= 31,
+		.input	= "\x78\x90\x52\xae\x0f\xf7\x2e\xef"
+			  "\x63\x09\x08\x58\xb5\x56\xbd\x72"
+			  "\x6e\x42\xcf\x27\x04\x7c\xdb\x92"
+			  "\x18\xe9\xa4\x33\x90\xba\x62\xb5"
+			  "\x70\xd3\x88\x9b\x4f\x05\xa7\x51"
+			  "\x85\x87\x17\x09\x42\xed\x4e",
+		.ilen	= 47,
+		.result	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
+			  "\x6d\x92\x42\x61\xa7\x58\x37\xdb"
+			  "\xb0\xb2\x2b\x9f\x0b\xb8\xbd\x7a"
+			  "\x24\xa0\xd6\xb7\x11\x79\x6c",
+		.rlen	= 31,
+	}, {
+		.key	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
+			  "\x8d\xc8\x6e\x85\xa5\x21\x67\x5d",
+		.klen	= 16,
+		.iv	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
+			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
+		.assoc	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
+			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60"
+			  "\x10\xc3\xb3\x02\xcf\xb0\x5e\x8d"
+			  "\xb5\xc2\x7e\x9a\x35\xc0\x24\xfd",
+		.alen	= 32,
+		.input	= "\x1d\x2c\x57\xe0\x50\x38\x3d\x41"
+			  "\x2e\x71\xc8\x3b\x92\x43\x58\xaf"
+			  "\x5a\xfb\xad\x8f\xd9\xd5\x8a\x5e"
+			  "\xdb\xf3\xcd\x3a\x2b\xe1\x2c\x1a"
+			  "\xb0\xed\xe3\x0c\x6e\xf9\xf2\xd6"
+			  "\x90\xe6\xb1\x0e\xa5\x8a\xac\xb7",
+		.ilen	= 48,
+		.result	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
+			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2"
+			  "\xdb\x74\x36\x23\x11\x58\x3f\x93"
+			  "\xe5\xcd\xb5\x90\xeb\xd8\x0c\xb3",
+		.rlen	= 32,
+	}, {
+		.key	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
+			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
+		.klen	= 16,
+		.iv	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
+			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
+		.assoc	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
+			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66"
+			  "\x3b\x86\xbf\x86\xd4\x50\xe0\xa7"
+			  "\x76\xef\x5c\x72\x0f\x1f\xc3\xd4"
+			  "\xee",
+		.alen	= 33,
+		.input	= "\x59\x10\x84\x1c\x83\x4c\x8b\xfc"
+			  "\xfd\x2e\x4b\x46\x84\xff\x78\x4e"
+			  "\x50\xda\x5c\xb9\x61\x1d\xf5\xb9"
+			  "\xfe\xbb\x7f\xae\x8c\xc1\x24\xbd"
+			  "\x8c\x6f\x1f\x9b\xce\xc6\xc1\x37"
+			  "\x08\x06\x5a\xe5\x96\x10\x95\xc2"
+			  "\x5e",
+		.ilen	= 49,
+		.result	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
+			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
+			  "\x05\x36\x42\xa7\x16\xf8\xc1\xad"
+			  "\xa7\xfb\x94\x68\xc5\x37\xab\x8a"
+			  "\x72",
+		.rlen	= 33,
+	}, {
+		.key	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
+			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69",
+		.klen	= 16,
+		.iv	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
+			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
+		.assoc	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
+			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d"
+			  "\x65\x48\xcb\x0a\xda\xf0\x62\xc0"
+			  "\x38\x1d\x3b\x4a\xe9\x7e\x62\xaa"
+			  "\xfd\xc9\x4a\xa9\xa9\x39\x4b\x54"
+			  "\xc8\x0e\x24\x7f\x5e\x10\x7a\x45"
+			  "\x10\x0b\x56\x85\xad\x54\xaa\x66"
+			  "\xa8\x43\xcd\xd4\x9b\xb7\xfa",
+		.alen	= 63,
+		.input	= "\x9a\x12\xbc\xdf\x72\xa8\x56\x22"
+			  "\x49\x2d\x07\x92\xfc\x3d\x6d\x5f"
+			  "\xef\x36\x19\xae\x91\xfa\xd6\x63"
+			  "\x46\xea\x8a\x39\x14\x21\xa6\x37"
+			  "\x18\xfc\x97\x3e\x16\xa5\x4d\x39"
+			  "\x45\x2e\x69\xcc\x9c\x5f\xdf\x6d"
+			  "\x5e\xa2\xbf\xac\x83\x32\x72\x52"
+			  "\x58\x58\x23\x40\xfd\xa5\xc2\xe6"
+			  "\xe9\x5a\x50\x98\x00\x58\xc9\x86"
+			  "\x4f\x20\x37\xdb\x7b\x22\xa3",
+		.ilen	= 79,
+		.result	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
+			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
+			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
+			  "\x68\x28\x73\x40\x9f\x96\x4a\x60"
+			  "\x80\xf4\x4b\xf4\xc1\x3d\xd0\x93"
+			  "\xcf\x12\xc9\x59\x8f\x7a\x7f\xa8"
+			  "\x1b\xa5\x50\xed\x87\xa9\x72\x59"
+			  "\x9c\x44\xb2\xa4\x99\x98\x34",
+		.rlen	= 63,
+	}, {
+		.key	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
+			  "\x10\x57\x85\x39\x93\x8f\xaf\x70",
+		.klen	= 16,
+		.iv	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
+			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
+		.assoc	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
+			  "\x50\xc4\xde\x82\x90\x21\x11\x73"
+			  "\x8f\x0a\xd6\x8f\xdf\x90\xe4\xda"
+			  "\xf9\x4a\x1a\x23\xc3\xdd\x02\x81"
+			  "\x0b\x76\x4f\xd7\x0a\x4b\x5e\x51"
+			  "\xe3\x1d\xb9\xe5\x21\xb9\x8f\xd4"
+			  "\x3d\x72\x3e\x26\x16\xa9\xca\x32"
+			  "\x77\x47\x63\x14\x95\x3d\xe4\x34",
+		.alen	= 64,
+		.input	= "\xe6\xeb\x92\x5a\x5b\xf0\x2d\xbb"
+			  "\x23\xec\x35\xe3\xae\xc9\xfb\x0b"
+			  "\x90\x14\x46\xeb\xa8\x8d\xb0\x9b"
+			  "\x39\xda\x8b\x48\xec\xb2\x00\x4e"
+			  "\x80\x6f\x46\x4f\x9b\x1e\xbb\x35"
+			  "\xea\x5a\xbc\xa2\x36\xa5\x89\x45"
+			  "\xc2\xd6\xd7\x15\x0b\xf6\x6c\x56"
+			  "\xec\x99\x7d\x61\xb3\x15\x93\xed"
+			  "\x83\x1e\xd9\x48\x84\x0b\x37\xfe"
+			  "\x95\x74\x44\xd5\x54\xa6\x27\x06",
+		.ilen	= 80,
+		.result	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
+			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
+			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
+			  "\x29\x56\x52\x19\x79\xf5\xe9\x37"
+			  "\x8f\xa1\x50\x23\x22\x4f\xe3\x91"
+			  "\xe9\x21\x5e\xbf\x52\x23\x95\x37"
+			  "\x48\x0c\x38\x8f\xf0\xff\x92\x24"
+			  "\x6b\x47\x49\xe3\x94\x1f\x1e\x01",
+		.rlen	= 64,
+	}, {
+		.key	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
+			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76",
+		.klen	= 16,
+		.iv	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
+			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
+		.assoc	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
+			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79"
+			  "\xba\xcd\xe2\x13\xe4\x30\x66\xf4"
+			  "\xba\x78\xf9\xfb\x9d\x3c\xa1\x58"
+			  "\x1a\x22\x53\x05\x6b\x5c\x71\x4f"
+			  "\xfd\x2d\x4d\x4c\xe5\x62\xa5\x63"
+			  "\x6a\xda\x26\xc8\x7f\xff\xea\xfd"
+			  "\x46\x4a\xfa\x53\x8f\xc4\xcd\x68"
+			  "\x58",
+		.alen	= 65,
+		.input	= "\x89\x24\x27\x86\xdc\xd7\x6b\xd9"
+			  "\xd1\xcd\xdc\x16\xdd\x2c\xc1\xfb"
+			  "\x52\xb5\xb3\xab\x50\x99\x3f\xa0"
+			  "\x38\xa4\x74\xa5\x04\x15\x63\x05"
+			  "\x8f\x54\x81\x06\x5a\x6b\xa4\x63"
+			  "\x6d\xa7\x21\xcb\xff\x42\x30\x8e"
+			  "\x3b\xd1\xca\x3f\x4b\x1a\xb8\xc3"
+			  "\x42\x01\xe6\xbc\x75\x15\x87\xee"
+			  "\xc9\x8e\x65\x01\xd9\xd8\xb5\x9f"
+			  "\x48\x86\xa6\x5f\x2c\xc7\xb5\xb0"
+			  "\xed\x5d\x14\x7c\x3f\x40\xb1\x0b"
+			  "\x72\xef\x94\x8d\x7a\x85\x56\xe5"
+			  "\x56\x08\x15\x56\xba\xaf\xbd\xf0"
+			  "\x20\xef\xa0\xf6\xa9\xad\xa2\xc9"
+			  "\x1c\x3b\x28\x51\x7e\x77\xb2\x18"
+			  "\x4f\x61\x64\x37\x22\x36\x6d\x78"
+			  "\xed\xed\x35\xe8\x83\xa5\xec\x25"
+			  "\x6b\xff\x5f\x1a\x09\x96\x3d\xdc"
+			  "\x20",
+		.ilen	= 145,
+		.result	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
+			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
+			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
+			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
+			  "\x9d\x4d\x54\x51\x84\x61\xf6\x8e"
+			  "\x03\x31\xf2\x25\x16\xcc\xaa\xc6"
+			  "\x75\x73\x20\x30\x59\x54\xb2\xf0"
+			  "\x3a\x4b\xe0\x23\x8e\xa6\x08\x35"
+			  "\x8a\xdf\x27\xa0\xe4\x60\x99\xae"
+			  "\x8e\x43\xd9\x39\x7b\x10\x40\x67"
+			  "\x5c\x7e\xc9\x70\x63\x34\xca\x59"
+			  "\xfe\x86\xbc\xb7\x9c\x39\xf3\x6d"
+			  "\x6a\x41\x64\x6f\x16\x7f\x65\x7e"
+			  "\x89\x84\x68\xeb\xb0\x51\xbe\x55"
+			  "\x33\x16\x59\x6c\x3b\xef\x88\xad"
+			  "\x2f\xab\xbc\x25\x76\x87\x41\x2f"
+			  "\x36",
+		.rlen	= 129,
+	}, {
+		.key	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
+			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c",
+		.klen	= 16,
+		.iv	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
+			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
+		.assoc	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
+			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f"
+			  "\xe4\x8f\xed\x97\xe9\xd0\xe8\x0d"
+			  "\x7c\xa6\xd8\xd4\x77\x9b\x40\x2e"
+			  "\x28\xce\x57\x34\xcd\x6e\x84\x4c"
+			  "\x17\x3c\xe1\xb2\xa8\x0b\xbb\xf1"
+			  "\x96\x41\x0d\x69\xe8\x54\x0a\xc8"
+			  "\x15\x4e\x91\x92\x89\x4b\xb7\x9b"
+			  "\x21\xf7\x42\x89\xac\x12\x2a\x54"
+			  "\x69\xee\x18\xc7\x8d\xed\xe8\xfd"
+			  "\xbb\x04\x28\xe6\x8a\x3c\x98\xc1"
+			  "\x04\x2d\xa9\xa1\x24\x83\xff\xe9"
+			  "\x55\x7a\xf0\xd1\xf6\x63\x05\xe1"
+			  "\xd9\x1e\x75\x72\xc1\x9f\xae\x32"
+			  "\xe1\x6b\xcd\x9e\x61\x19\x23\x86"
+			  "\xd9\xd2\xaf\x8e\xd5\xd3\xa8\xa9"
+			  "\x51",
+		.alen	= 129,
+		.input	= "\x36\x78\xb9\x22\xde\x62\x35\x55"
+			  "\x1a\x7a\xf5\x45\xbc\xd7\x15\x82"
+			  "\x01\xe9\x5a\x07\xea\x46\xaf\x91"
+			  "\xcb\x73\xa5\xee\xe1\xb4\xbf\xc2"
+			  "\xdb\xd2\x9d\x59\xde\xfc\x83\x00"
+			  "\xf5\x46\xac\x97\xd5\x57\xa9\xb9"
+			  "\x1f\x8c\xe8\xca\x68\x8b\x91\x0c"
+			  "\x01\xbe\x0a\xaf\x7c\xf6\x67\xa4"
+			  "\xbf\xbc\x88\x3f\x5d\xd1\xf9\x19"
+			  "\x0f\x9d\xb2\xaf\xb9\x6e\x17\xdf"
+			  "\xa2",
+		.ilen	= 81,
+		.result	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
+			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
+			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
+			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
+			  "\xac\xfa\x58\x7f\xe5\x73\x09\x8c"
+			  "\x1d\x40\x87\x8c\xd9\x75\xc0\x55"
+			  "\xa2\xda\x07\xd1\xc2\xa9\xd1\xbb"
+			  "\x09\x4f\x77\x62\x88\x2d\xf2\x68"
+			  "\x54",
+		.rlen	= 65,
+	}, {
+		.key	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
+			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82",
+		.klen	= 16,
+		.iv	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
+			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
+		.assoc	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
+			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85"
+			  "\x0e\x51\xf9\x1c\xee\x70\x6a\x27"
+			  "\x3d\xd3\xb7\xac\x51\xfa\xdf\x05",
+		.alen	= 32,
+		.input	= "\x08\x1b\x95\x0e\x41\x95\x02\x4b"
+			  "\x9c\xbb\xa8\xd0\x7c\xd3\x44\x6e"
+			  "\x89\x14\x33\x70\x0a\xbc\xea\x39"
+			  "\x88\xaa\x2b\xd5\x73\x11\x55\xf5"
+			  "\x33\x33\x9c\xd7\x42\x34\x49\x8e"
+			  "\x2f\x03\x30\x05\x47\xaf\x34",
+		.ilen	= 47,
+		.result	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
+			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07"
+			  "\xd9\x02\x7c\x3d\x2f\x18\x4b\x2d"
+			  "\x6e\xde\xee\xa2\x08\x12\xc7\xba",
+		.rlen	= 32,
+	}, {
+		.key	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
+			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
+		.klen	= 16,
+		.iv	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
+			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
+		.assoc	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
+			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c"
+			  "\x39\x14\x05\xa0\xf3\x10\xec\x41"
+			  "\xff\x01\x95\x84\x2b\x59\x7f\xdb",
+		.alen	= 32,
+		.input	= "\x97\xca\xf4\xe0\x8d\x89\xbf\x68"
+			  "\x0c\x60\xb9\x27\xdf\xaa\x41\xc6"
+			  "\x25\xd8\xf7\x1f\x10\x15\x48\x61"
+			  "\x4c\x95\x00\xdf\x51\x9b\x7f\xe6"
+			  "\x24\x40\x9e\xbe\x3b\xeb\x1b\x98"
+			  "\xb9\x9c\xe5\xef\xf2\x05",
+		.ilen	= 46,
+		.result	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
+			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d"
+			  "\x03\xc4\x88\xc1\x35\xb8\xcd\x47"
+			  "\x2f\x0c\xcd\x7a\xe2\x71\x66\x91",
+		.rlen	= 32,
+	}, {
+		.key	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
+			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
+		.klen	= 16,
+		.iv	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
+			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
+		.assoc	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
+			  "\xd5\x07\x58\x59\x72\xd7\xde\x92"
+			  "\x63\xd6\x10\x24\xf8\xb0\x6e\x5a"
+			  "\xc0\x2e\x74\x5d\x06\xb8\x1e\xb2",
+		.alen	= 32,
+		.input	= "\x63\x4c\x2a\x8e\xb4\x6b\x63\x0d"
+			  "\xb5\xec\x9b\x4e\x12\x23\xa3\xcf"
+			  "\x1a\x5a\x70\x15\x5a\x10\x40\x51"
+			  "\xca\x47\x4c\x9d\xc9\x97\xf4\x77"
+			  "\xdb\xc8\x10\x2d\xdc\x65\x20\x3f",
+		.ilen	= 40,
+		.result	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
+			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13"
+			  "\x2e\x86\x93\x45\x3a\x58\x4f\x61"
+			  "\xf0\x3a\xac\x53\xbc\xd0\x06\x68",
+		.rlen	= 32,
+	}, {
+		.key	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
+			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
+		.klen	= 16,
+		.iv	= "\xbb\x3a\xf7\x57\xc6\x36\x7c\x22"
+			  "\x36\xab\xde\xc6\x6d\x32\x70\x17",
+		.assoc	= "\xcb\x03\x85\xbf\x0a\xd5\x26\xa9"
+			  "\x56\xe1\x0a\xeb\x6c\xfb\xa1\x98"
+			  "\x8d\x98\x1c\xa8\xfe\x50\xf0\x74"
+			  "\x81\x5c\x53\x35\xe0\x17\xbd\x88",
+		.alen	= 32,
+		.input	= "\xf1\x62\x44\xc7\x5f\x19\xca\x43"
+			  "\x47\x2c\xaf\x68\x82\xbd\x51\xef"
+			  "\x3d\x65\xd8\x45\x2d\x06\x07\x78"
+			  "\x08\x2e\xb3\x23\xcd\x81\x12\x55"
+			  "\x1a",
+		.ilen	= 33,
+		.result	= "\xda\xcc\x14\x27\x4e\x74\xd1\x30"
+			  "\x76\x18\x37\x0f\x6a\xc4\xd1\x1a"
+			  "\x58\x49\x9f\xc9\x3f\xf8\xd1\x7a"
+			  "\xb2\x67\x8b\x2b\x96\x2f\xa5\x3e",
+		.rlen	= 32,
+	}, {
+		.key	= "\xe9\x95\xa2\x8f\x93\x13\x7b\xb7"
+			  "\x96\x4e\x63\x33\x69\x8d\x02\x9b"
+			  "\x23\xf9\x22\xeb\x80\xa0\xb1\x81"
+			  "\xe2\x73\xc3\x21\x4d\x47\x8d\xf4",
+		.klen	= 32,
+		.iv	= "\xf8\x5e\x31\xf7\xd7\xb2\x25\x3e"
+			  "\xb7\x85\x90\x58\x67\x57\x33\x1d",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xdf\x2f\x83\xc0\x45\x4a\x2c\xcf"
+			  "\xb9\xd2\x41\xf6\x80\xa1\x52\x70",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x25\xba\xdc\x2e\xa3\x8f\x24\xd3"
+			  "\x17\x29\x15\xc5\x63\xb2\xc5\xa1"
+			  "\x4d\xbc\x2d\x6f\x85\x40\x33\x9a"
+			  "\xa3\xa0\xa1\xfa\x27\xa6\x2c\xca",
+		.klen	= 32,
+		.iv	= "\x34\x83\x6a\x96\xe7\x2d\xce\x5a"
+			  "\x38\x5f\x42\xe9\x61\x7b\xf5\x23",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x01\xd8\x55\x3c\xc0\x5a\x4b\xc7"
+			  "\x01\xf4\x08\xe3\x0d\xf7\xf0\x78"
+			  "\x53",
+		.ilen	= 17,
+		.result	= "\x53",
+		.rlen	= 1,
+	}, {
+		.key	= "\x62\xdf\x16\xcd\xb3\x0a\xcc\xef"
+			  "\x98\x03\xc7\x56\x5d\xd6\x87\xa8"
+			  "\x77\x7e\x39\xf3\x8a\xe0\xb5\xb4"
+			  "\x65\xce\x80\xd2\x01\x05\xcb\xa1",
+		.klen	= 32,
+		.iv	= "\x71\xa8\xa4\x35\xf7\xa9\x76\x75"
+			  "\xb8\x39\xf4\x7a\x5b\x9f\xb8\x29",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xc2\x4b\x41\x0f\x2d\xb9\x62\x07"
+			  "\xff\x8e\x74\xf8\xa1\xa6\xd5\x37"
+			  "\xa5\x64\x31\x5c\xca\x73\x9b\x43"
+			  "\xe6\x70\x63\x46\x95\xcb\xf7\xb5"
+			  "\x20\x8c\x75\x7a\x2a\x17\x2f\xa9"
+			  "\xb8\x4d\x11\x42\xd1\xf8\xf1",
+		.ilen	= 47,
+		.result	= "\x8f\x3a\xc1\x05\x7f\xe7\xcb\x83"
+			  "\xf9\xa6\x4d\xc3\x58\x31\x19\x2c"
+			  "\xd7\x90\xc2\x56\x4e\xd8\x57\xc7"
+			  "\xf6\xf0\x27\xb4\x25\x4c\x83",
+		.rlen	= 31,
+	}, {
+		.key	= "\x9e\x03\x4f\x6d\xc3\x86\x75\x0a"
+			  "\x19\xdd\x79\xe8\x57\xfb\x4a\xae"
+			  "\xa2\x40\x45\x77\x90\x80\x37\xce"
+			  "\x26\xfb\x5f\xaa\xdb\x64\x6b\x77",
+		.klen	= 32,
+		.iv	= "\xae\xcc\xde\xd5\x07\x25\x1f\x91"
+			  "\x39\x14\xa6\x0c\x55\xc4\x7b\x30",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xbb\x01\x7c\xd1\x2c\x33\x7b\x37"
+			  "\x0a\xee\xc4\x30\x19\xd7\x3a\x6f"
+			  "\xf8\x2b\x67\xf5\x3b\x84\x87\x2a"
+			  "\xfb\x07\x7a\x82\xb5\xe4\x85\x26"
+			  "\x1e\xa8\xe5\x04\x54\xce\xe5\x5f"
+			  "\xb5\x3f\xc1\xd5\x7f\xbd\xd2\xa6",
+		.ilen	= 48,
+		.result	= "\xcc\x5f\xfb\xa4\x8f\x63\x74\x9f"
+			  "\x7a\x81\xff\x55\x52\x56\xdc\x33"
+			  "\x01\x52\xcd\xdb\x53\x78\xd9\xe1"
+			  "\xb7\x1d\x06\x8d\xff\xab\x22\x98",
+		.rlen	= 32,
+	}, {
+		.key	= "\xdb\x28\x89\x0c\xd3\x01\x1e\x26"
+			  "\x9a\xb7\x2b\x79\x51\x1f\x0d\xb4"
+			  "\xcc\x03\x50\xfc\x95\x20\xb9\xe7"
+			  "\xe8\x29\x3e\x83\xb5\xc3\x0a\x4e",
+		.klen	= 32,
+		.iv	= "\xea\xf1\x18\x74\x17\xa0\xc8\xad"
+			  "\xba\xee\x58\x9d\x4f\xe8\x3d\x36",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xc2\xf4\x40\x55\xf9\x59\xff\x73"
+			  "\x08\xf5\x98\x92\x0c\x7b\x35\x9a"
+			  "\xa8\xf4\x42\x7e\x6f\x93\xca\x22"
+			  "\x23\x06\x1e\xf8\x89\x22\xf4\x46"
+			  "\x7c\x7c\x67\x75\xab\xe5\x75\xaa"
+			  "\x15\xd7\x83\x19\xfd\x31\x59\x5b"
+			  "\x32",
+		.ilen	= 49,
+		.result	= "\x08\x84\x34\x44\x9f\xde\x1c\xbb"
+			  "\xfb\x5b\xb1\xe6\x4c\x7a\x9f\x39"
+			  "\x2c\x14\xd9\x5f\x59\x18\x5b\xfb"
+			  "\x79\x4b\xe5\x65\xd9\x0a\xc1\x6f"
+			  "\x2e",
+		.rlen	= 33,
+	}, {
+		.key	= "\x17\x4d\xc3\xab\xe3\x7d\xc7\x42"
+			  "\x1b\x91\xdd\x0a\x4b\x43\xcf\xba"
+			  "\xf6\xc5\x5c\x80\x9a\xc0\x3b\x01"
+			  "\xa9\x56\x1d\x5b\x8f\x22\xa9\x25",
+		.klen	= 32,
+		.iv	= "\x27\x16\x51\x13\x27\x1c\x71\xc9"
+			  "\x3b\xc8\x0a\x2f\x49\x0c\x00\x3c",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\xc9\x82\x3b\x4b\x87\x84\xa5\xdb"
+			  "\xa0\x8c\xd3\x3e\x7f\x8d\xe8\x28"
+			  "\x2a\xdc\xfa\x01\x84\x87\x9a\x70"
+			  "\x81\x75\x37\x0a\xd2\x75\xa9\xb6"
+			  "\x21\x72\xee\x7e\x65\x95\xe5\xcc"
+			  "\x01\xb7\x39\xa6\x51\x15\xca\xff"
+			  "\x61\xdc\x97\x38\xcc\xf4\xca\xc7"
+			  "\x83\x9b\x05\x11\x72\x60\xf0\xb4"
+			  "\x7e\x06\xab\x0a\xc0\xbb\x59\x23"
+			  "\xaa\x2d\xfc\x4e\x35\x05\x59",
+		.ilen	= 79,
+		.result	= "\x45\xa8\x6e\xe3\xaf\x5a\xc5\xd7"
+			  "\x7c\x35\x63\x77\x46\x9f\x61\x3f"
+			  "\x56\xd7\xe4\xe3\x5e\xb8\xdc\x14"
+			  "\x3a\x79\xc4\x3e\xb3\x69\x61\x46"
+			  "\x3c\xb6\x83\x4e\xb4\x26\xc7\x73"
+			  "\x22\xda\x52\x8b\x7d\x11\x98\xea"
+			  "\x62\xe1\x14\x1e\xdc\xfe\x0f\xad"
+			  "\x20\x76\x5a\xdc\x4e\x71\x13",
+		.rlen	= 63,
+	}, {
+		.key	= "\x54\x71\xfd\x4b\xf3\xf9\x6f\x5e"
+			  "\x9c\x6c\x8f\x9c\x45\x68\x92\xc1"
+			  "\x21\x87\x67\x04\x9f\x60\xbd\x1b"
+			  "\x6a\x84\xfc\x34\x6a\x81\x48\xfb",
+		.klen	= 32,
+		.iv	= "\x63\x3b\x8b\xb3\x37\x98\x1a\xe5"
+			  "\xbc\xa2\xbc\xc0\x43\x31\xc2\x42",
+		.assoc	= "",
+		.alen	= 0,
+		.input	= "\x11\x7c\x7d\xef\xce\x29\x95\xec"
+			  "\x7e\x9f\x42\xa6\x26\x07\xa1\x75"
+			  "\x2f\x4e\x09\x9a\xf6\x6b\xc2\xfa"
+			  "\x0d\xd0\x17\xdc\x25\x1e\x9b\xdc"
+			  "\x5f\x8c\x1c\x60\x15\x4f\x9b\x20"
+			  "\x7b\xff\xcd\x82\x60\x84\xf4\xa5"
+			  "\x20\x9a\x05\x19\x5b\x02\x0a\x72"
+			  "\x43\x11\x26\x58\xcf\xc5\x41\xcf"
+			  "\x13\xcc\xde\x32\x92\xfa\x86\xf2"
+			  "\xaf\x16\xe8\x8f\xca\xb6\xfd\x54",
+		.ilen	= 80,
+		.result	= "\x81\xcd\xa8\x82\xbf\xd6\x6e\xf3"
+			  "\xfd\x0f\x15\x09\x40\xc3\x24\x45"
+			  "\x81\x99\xf0\x67\x63\x58\x5e\x2e"
+			  "\xfb\xa6\xa3\x16\x8d\xc8\x00\x1c"
+			  "\x4b\x62\x87\x7c\x15\x38\xda\x70"
+			  "\x3d\xea\xe7\xf2\x40\xba\xae\x79"
+			  "\x8f\x48\xfc\xbf\x45\x53\x2e\x78"
+			  "\xef\x79\xf0\x1b\x49\xf7\xfd\x9c",
+		.rlen	= 64,
+	}, {
+		.key	= "\x90\x96\x36\xea\x03\x74\x18\x7a"
+			  "\x1d\x46\x42\x2d\x3f\x8c\x54\xc7"
+			  "\x4b\x4a\x73\x89\xa4\x00\x3f\x34"
+			  "\x2c\xb1\xdb\x0c\x44\xe0\xe8\xd2",
+		.klen	= 32,
+		.iv	= "\xa0\x5f\xc5\x52\x47\x13\xc2\x01"
+			  "\x3d\x7c\x6e\x52\x3d\x55\x85\x48",
+		.assoc	= "\xaf",
+		.alen	= 1,
+		.input	= "\x9b\xc5\x3b\x20\x0a\x88\x56\xbe"
+			  "\x69\xdf\xc4\xc4\x02\x46\x3a\xf0",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xcd\xbb\x70\x89\x13\xf0\xc1\x95"
+			  "\x9e\x20\xf4\xbf\x39\xb1\x17\xcd"
+			  "\x76\x0c\x7f\x0d\xa9\xa0\xc1\x4e"
+			  "\xed\xdf\xb9\xe4\x1e\x3f\x87\xa8",
+		.klen	= 32,
+		.iv	= "\xdc\x84\xfe\xf1\x58\x8f\x6b\x1c"
+			  "\xbe\x57\x20\xe3\x37\x7a\x48\x4f",
+		.assoc	= "\xeb\x4d\x8d\x59\x9c\x2e\x15\xa3"
+			  "\xde\x8d\x4d\x07\x36\x43\x78\xd0"
+			  "\x0b\x6d\x84\x4f\x2c\xf0\x82\x5b"
+			  "\x4e\xf6\x29\xd1\x8b\x6f\x56",
+		.alen	= 31,
+		.input	= "\xe0\x6d\xa1\x07\x98\x2f\x40\x2d"
+			  "\x2e\x9a\xd6\x61\x43\xc0\x74\x69",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x0a\xe0\xaa\x29\x24\x6c\x6a\xb1"
+			  "\x1f\xfa\xa6\x50\x33\xd5\xda\xd3"
+			  "\xa0\xce\x8a\x91\xae\x40\x43\x68"
+			  "\xae\x0d\x98\xbd\xf8\x9e\x26\x7f",
+		.klen	= 32,
+		.iv	= "\x19\xa9\x38\x91\x68\x0b\x14\x38"
+			  "\x3f\x31\xd2\x74\x31\x9e\x0a\x55",
+		.assoc	= "\x28\x72\xc7\xf8\xac\xaa\xbe\xbf"
+			  "\x5f\x67\xff\x99\x30\x67\x3b\xd6"
+			  "\x35\x2f\x90\xd3\x31\x90\x04\x74"
+			  "\x0f\x23\x08\xa9\x65\xce\xf6\xea",
+		.alen	= 32,
+		.input	= "\xb9\x57\x13\x3e\x82\x31\x61\x65"
+			  "\x0d\x7f\x6c\x96\x93\x5c\x50\xe2",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x46\x04\xe3\xc8\x34\xe7\x12\xcd"
+			  "\xa0\xd4\x58\xe2\x2d\xf9\x9c\xda"
+			  "\xca\x91\x96\x15\xb4\xe0\xc5\x81"
+			  "\x70\x3a\x77\x95\xd2\xfd\xc5\x55",
+		.klen	= 32,
+		.iv	= "\x55\xcd\x72\x30\x78\x86\xbd\x54"
+			  "\xc0\x0b\x84\x06\x2b\xc2\xcd\x5b",
+		.assoc	= "\x64\x97\x00\x98\xbc\x25\x67\xdb"
+			  "\xe0\x41\xb1\x2a\x2a\x8c\xfe\xdd"
+			  "\x5f\xf2\x9c\x58\x36\x30\x86\x8e"
+			  "\xd1\x51\xe6\x81\x3f\x2d\x95\xc1"
+			  "\x01",
+		.alen	= 33,
+		.input	= "\x81\x96\x34\xde\xbb\x36\xdd\x3e"
+			  "\x4e\x5e\xcb\x44\x21\xb8\x3f\xf1",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\x83\x29\x1d\x67\x44\x63\xbb\xe9"
+			  "\x20\xaf\x0a\x73\x27\x1e\x5f\xe0"
+			  "\xf5\x53\xa1\x9a\xb9\x80\x47\x9b"
+			  "\x31\x68\x56\x6e\xac\x5c\x65\x2c",
+		.klen	= 32,
+		.iv	= "\x92\xf2\xac\xcf\x88\x02\x65\x70"
+			  "\x41\xe5\x36\x97\x25\xe7\x90\x61",
+		.assoc	= "\xa1\xbb\x3a\x37\xcc\xa1\x10\xf7"
+			  "\x61\x1c\x63\xbc\x24\xb0\xc0\xe3"
+			  "\x8a\xb4\xa7\xdc\x3b\xd0\x08\xa8"
+			  "\x92\x7f\xc5\x5a\x19\x8c\x34\x97"
+			  "\x0f\x95\x9b\x18\xe4\x8d\xb4\x24"
+			  "\xb9\x33\x28\x18\xe1\x9d\x14\xe0"
+			  "\x64\xb2\x89\x7d\x78\xa8\x05\x7e"
+			  "\x07\x8c\xfc\x88\x2d\xb8\x53",
+		.alen	= 63,
+		.input	= "\x2e\x99\xb6\x79\x57\x56\x80\x36"
+			  "\x8e\xc4\x1c\x12\x7d\x71\x36\x0c",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xbf\x4e\x57\x07\x54\xdf\x64\x05"
+			  "\xa1\x89\xbc\x04\x21\x42\x22\xe6"
+			  "\x1f\x15\xad\x1e\xbe\x20\xc9\xb4"
+			  "\xf3\x95\x35\x46\x86\xbb\x04\x03",
+		.klen	= 32,
+		.iv	= "\xce\x17\xe5\x6f\x98\x7e\x0e\x8c"
+			  "\xc2\xbf\xe8\x29\x1f\x0b\x52\x68",
+		.assoc	= "\xdd\xe0\x74\xd6\xdc\x1d\xb8\x13"
+			  "\xe2\xf6\x15\x4d\x1e\xd4\x83\xe9"
+			  "\xb4\x76\xb3\x60\x40\x70\x8a\xc1"
+			  "\x53\xac\xa4\x32\xf3\xeb\xd3\x6e"
+			  "\x1e\x42\xa0\x46\x45\x9f\xc7\x22"
+			  "\xd3\x43\xbc\x7e\xa5\x47\x2a\x6f"
+			  "\x91\x19\x70\x1e\xe1\xfe\x25\x49"
+			  "\xd6\x8f\x93\xc7\x28\x3f\x3d\x03",
+		.alen	= 64,
+		.input	= "\x7b\x25\x3d\x47\xd4\xa7\x08\xce"
+			  "\x3b\x89\x40\x36\xba\x6d\x0e\xa2",
+		.ilen	= 16,
+		.result	= "",
+		.rlen	= 0,
+	}, {
+		.key	= "\xfc\x72\x90\xa6\x64\x5a\x0d\x21"
+			  "\x22\x63\x6e\x96\x1b\x67\xe4\xec"
+			  "\x49\xd7\xb9\xa2\xc3\xc0\x4b\xce"
+			  "\xb4\xc3\x14\x1e\x61\x1a\xa3\xd9",
+		.klen	= 32,
+		.iv	= "\x0b\x3c\x1f\x0e\xa8\xf9\xb7\xa7"
+			  "\x42\x9a\x9a\xba\x19\x30\x15\x6e",
+		.assoc	= "\x1a",
+		.alen	= 1,
+		.input	= "\xe6\x09\x6f\x95\x9a\x18\xc8\xf6"
+			  "\x17\x75\x81\x16\xdf\x26\xff\x67"
+			  "\x92",
+		.ilen	= 17,
+		.result	= "\x29",
+		.rlen	= 1,
+	}, {
+		.key	= "\x38\x97\xca\x45\x74\xd6\xb6\x3c"
+			  "\xa3\x3d\x20\x27\x15\x8b\xa7\xf2"
+			  "\x74\x9a\xc4\x27\xc8\x60\xcd\xe8"
+			  "\x75\xf0\xf2\xf7\x3b\x79\x42\xb0",
+		.klen	= 32,
+		.iv	= "\x47\x60\x59\xad\xb8\x75\x60\xc3"
+			  "\xc3\x74\x4c\x4c\x13\x54\xd8\x74",
+		.assoc	= "\x56\x29\xe7\x15\xfc\x14\x0a\x4a"
+			  "\xe4\xaa\x79\x70\x12\x1d\x08\xf6"
+			  "\x09\xfb\xca\x69\x4b\xb0\x8e\xf5"
+			  "\xd6\x07\x62\xe3\xa8\xa9\x12",
+		.alen	= 31,
+		.input	= "\x82\xc0\x56\xf0\xd7\xc4\xc9\xfd"
+			  "\x3c\xd1\x2a\xd4\x15\x86\x9d\xda"
+			  "\xea\x6c\x6f\xa1\x33\xb0\x7a\x01"
+			  "\x57\xe7\xf3\x7b\x73\xe7\x54\x10"
+			  "\xc6\x91\xe2\xc6\xa0\x69\xe7\xe6"
+			  "\x76\xc3\xf5\x3a\x76\xfd\x4a",
+		.ilen	= 47,
+		.result	= "\x66\xf3\x75\x7d\x40\xb3\xb4\xd1"
+			  "\x04\xe1\xa6\x94\x10\xe6\x39\x77"
+			  "\xd3\xac\x4d\x8a\x8c\x58\x6e\xfb"
+			  "\x06\x13\x9a\xd9\x5e\xc0\xfa",
+		.rlen	= 31,
+	}, {
+		.key	= "\x75\xbc\x04\xe5\x84\x52\x5e\x58"
+			  "\x24\x17\xd2\xb9\x0e\xaf\x6a\xf9"
+			  "\x9e\x5c\xd0\xab\xcd\x00\x4f\x01"
+			  "\x37\x1e\xd1\xcf\x15\xd8\xe2\x86",
+		.klen	= 32,
+		.iv	= "\x84\x85\x92\x4d\xc8\xf1\x08\xdf"
+			  "\x44\x4e\xff\xdd\x0d\x78\x9a\x7a",
+		.assoc	= "\x93\x4e\x21\xb4\x0c\x90\xb3\x66"
+			  "\x65\x84\x2b\x01\x0b\x42\xcb\xfc"
+			  "\x33\xbd\xd6\xed\x50\x50\x10\x0e"
+			  "\x97\x35\x41\xbb\x82\x08\xb1\xf2",
+		.alen	= 32,
+		.input	= "\x01\x47\x8e\x6c\xf6\x64\x89\x3a"
+			  "\x71\xce\xe4\xaa\x45\x70\xe6\x84"
+			  "\x62\x48\x08\x64\x86\x6a\xdf\xec"
+			  "\xb4\xa0\xfb\x34\x03\x0c\x19\xf4"
+			  "\x2b\x7b\x36\x73\xec\x54\xa9\x1e"
+			  "\x30\x85\xdb\xe4\xac\xe9\x2c\xca",
+		.ilen	= 48,
+		.result	= "\xa2\x17\xaf\x1c\x50\x2e\x5d\xed"
+			  "\x85\xbb\x58\x26\x0a\x0b\xfc\x7d"
+			  "\xfe\x6e\x59\x0e\x91\xf8\xf0\x15"
+			  "\xc8\x40\x78\xb1\x38\x1f\x99\xa7",
+		.rlen	= 32,
+	}, {
+		.key	= "\xb1\xe1\x3e\x84\x94\xcd\x07\x74"
+			  "\xa5\xf2\x84\x4a\x08\xd4\x2c\xff"
+			  "\xc8\x1e\xdb\x2f\xd2\xa0\xd1\x1b"
+			  "\xf8\x4c\xb0\xa8\xef\x37\x81\x5d",
+		.klen	= 32,
+		.iv	= "\xc0\xaa\xcc\xec\xd8\x6c\xb1\xfb"
+			  "\xc5\x28\xb1\x6e\x07\x9d\x5d\x81",
+		.assoc	= "\xd0\x73\x5a\x54\x1d\x0b\x5b\x82"
+			  "\xe5\x5f\xdd\x93\x05\x66\x8e\x02"
+			  "\x5e\x80\xe1\x71\x55\xf0\x92\x28"
+			  "\x59\x62\x20\x94\x5c\x67\x50\xc8"
+			  "\x58",
+		.alen	= 33,
+		.input	= "\x85\xe0\xf8\x0f\x8e\x49\xe3\x60"
+			  "\xcb\x4a\x54\x94\xcf\xf5\x7e\x34"
+			  "\xe9\xf8\x80\x65\x53\xd0\x72\x70"
+			  "\x4f\x7d\x9d\xd1\x15\x6f\xb9\x2c"
+			  "\xfa\xe8\xdd\xac\x2e\xe1\x3f\x67"
+			  "\x63\x0f\x1a\x59\xb7\x89\xdb\xf4"
+			  "\xc3",
+		.ilen	= 49,
+		.result	= "\xdf\x3c\xe9\xbc\x61\xaa\x06\x09"
+			  "\x06\x95\x0a\xb7\x04\x2f\xbe\x84"
+			  "\x28\x30\x64\x92\x96\x98\x72\x2e"
+			  "\x89\x6e\x57\x8a\x13\x7e\x38\x7e"
+			  "\xdb",
+		.rlen	= 33,
+	}, {
+		.key	= "\xee\x05\x77\x23\xa5\x49\xb0\x90"
+			  "\x26\xcc\x36\xdc\x02\xf8\xef\x05"
+			  "\xf3\xe1\xe7\xb3\xd8\x40\x53\x35"
+			  "\xb9\x79\x8f\x80\xc9\x96\x20\x33",
+		.klen	= 32,
+		.iv	= "\xfd\xce\x06\x8b\xe9\xe8\x5a\x17"
+			  "\x46\x02\x63\x00\x01\xc1\x20\x87",
+		.assoc	= "\x0c\x98\x94\xf3\x2d\x87\x04\x9e"
+			  "\x66\x39\x8f\x24\xff\x8a\x50\x08"
+			  "\x88\x42\xed\xf6\x5a\x90\x14\x42"
+			  "\x1a\x90\xfe\x6c\x36\xc6\xf0\x9f"
+			  "\x66\xa0\xb5\x2d\x2c\xf8\x25\x15"
+			  "\x55\x90\xa2\x7e\x77\x94\x96\x3a"
+			  "\x71\x1c\xf7\x44\xee\xa8\xc3\x42"
+			  "\xe2\xa3\x84\x04\x0b\xe1\xce",
+		.alen	= 63,
+		.input	= "\x00\xe5\x5b\x87\x5c\x20\x22\x8a"
+			  "\xda\x1f\xd3\xff\xbb\xb2\xb0\xf8"
+			  "\xef\xe9\xeb\x9e\x7c\x80\xf4\x2b"
+			  "\x59\xc0\x79\xbc\x17\xa0\x15\x01"
+			  "\xf5\x72\xfb\x5a\xe7\xaf\x07\xe3"
+			  "\x1b\x49\x21\x34\x23\x63\x55\x5e"
+			  "\xee\x4f\x34\x17\xfa\xfe\xa5\x0c"
+			  "\xed\x0b\x23\xea\x9b\xda\x57\x2f"
+			  "\xf6\xa9\xae\x0d\x4e\x40\x96\x45"
+			  "\x7f\xfa\xf0\xbf\xc4\x98\x78",
+		.ilen	= 79,
+		.result	= "\x1b\x61\x23\x5b\x71\x26\xae\x25"
+			  "\x87\x6f\xbc\x49\xfe\x53\x81\x8a"
+			  "\x53\xf2\x70\x17\x9b\x38\xf4\x48"
+			  "\x4b\x9b\x36\x62\xed\xdd\xd8\x54"
+			  "\xea\xcb\xb6\x79\x45\xfc\xaa\x54"
+			  "\x5c\x94\x47\x58\xa7\xff\x9c\x9e"
+			  "\x7c\xb6\xf1\xac\xc8\xfd\x8b\x35"
+			  "\xd5\xa4\x6a\xd4\x09\xc2\x08",
+		.rlen	= 63,
+	}, {
+		.key	= "\x2a\x2a\xb1\xc3\xb5\xc5\x59\xac"
+			  "\xa7\xa6\xe8\x6d\xfc\x1d\xb2\x0b"
+			  "\x1d\xa3\xf3\x38\xdd\xe0\xd5\x4e"
+			  "\x7b\xa7\x6e\x58\xa3\xf5\xbf\x0a",
+		.klen	= 32,
+		.iv	= "\x39\xf3\x3f\x2b\xf9\x64\x03\x33"
+			  "\xc7\xdd\x15\x91\xfb\xe6\xe2\x8d",
+		.assoc	= "\x49\xbc\xce\x92\x3d\x02\xad\xba"
+			  "\xe7\x13\x41\xb6\xf9\xaf\x13\x0f"
+			  "\xb2\x04\xf8\x7a\x5f\x30\x96\x5b"
+			  "\xdc\xbd\xdd\x44\x10\x25\x8f\x75"
+			  "\x75\x4d\xb9\x5b\x8e\x0a\x38\x13"
+			  "\x6f\x9f\x36\xe4\x3a\x3e\xac\xc9"
+			  "\x9d\x83\xde\xe5\x57\xfd\xe3\x0e"
+			  "\xb1\xa7\x1b\x44\x05\x67\xb7\x37",
+		.alen	= 64,
+		.input	= "\x28\xdd\xb9\x4a\x12\xc7\x0a\xe1"
+			  "\x58\x06\x1a\x9b\x8c\x67\xdf\xeb"
+			  "\x35\x35\x60\x9d\x06\x40\x65\xc1"
+			  "\x93\xe8\xb3\x82\x50\x29\xdd\xb5"
+			  "\x2b\xcb\xde\x18\x78\x6b\x42\xbe"
+			  "\x6d\x24\xd0\xb2\x7d\xd7\x08\x8f"
+			  "\x4a\x18\x98\xad\x8c\xf2\x97\xb4"
+			  "\xf4\x77\xe4\xbf\x41\x3b\xc4\x06"
+			  "\xce\x9e\x34\x81\xf0\x89\x11\x13"
+			  "\x02\x65\xa1\x7c\xdf\x07\x33\x06",
+		.ilen	= 80,
+		.result	= "\x58\x85\x5c\xfa\x81\xa1\x57\x40"
+			  "\x08\x4a\x6e\xda\xf8\x78\x44\x90"
+			  "\x7d\xb5\x7b\x9b\xa1\xd8\x76\x62"
+			  "\x0c\xc9\x15\x3b\xc7\x3c\x77\x2b"
+			  "\xf8\x78\xba\xa7\xa6\x0e\xbd\x52"
+			  "\x76\xa3\xdc\xbe\x6b\xa8\xb1\x2d"
+			  "\xa9\x1d\xd8\x4e\x31\x53\xab\x00"
+			  "\xa5\xa7\x01\x13\x04\x49\xf2\x04",
+		.rlen	= 64,
+	}, {
+		.key	= "\x67\x4f\xeb\x62\xc5\x40\x01\xc7"
+			  "\x28\x80\x9a\xfe\xf6\x41\x74\x12"
+			  "\x48\x65\xfe\xbc\xe2\x80\x57\x68"
+			  "\x3c\xd4\x4d\x31\x7d\x54\x5f\xe1",
+		.klen	= 32,
+		.iv	= "\x76\x18\x79\xca\x09\xdf\xac\x4e"
+			  "\x48\xb7\xc7\x23\xf5\x0a\xa5\x93",
+		.assoc	= "\x85\xe1\x08\x32\x4d\x7e\x56\xd5"
+			  "\x68\xed\xf3\x47\xf3\xd3\xd6\x15"
+			  "\xdd\xc7\x04\xfe\x64\xd0\x18\x75"
+			  "\x9d\xeb\xbc\x1d\xea\x84\x2e\x4c"
+			  "\x83\xf9\xbe\x8a\xef\x1c\x4b\x10"
+			  "\x89\xaf\xcb\x4b\xfe\xe7\xc1\x58"
+			  "\xca\xea\xc6\x87\xc0\x53\x03\xd9"
+			  "\x80\xaa\xb2\x83\xff\xee\xa1\x6a"
+			  "\x04",
+		.alen	= 65,
+		.input	= "\x85\x39\x69\x35\xfb\xf9\xb0\xa6"
+			  "\x85\x43\x88\xd0\xd7\x78\x60\x19"
+			  "\x3e\x1f\xb1\xa4\xd6\xc5\x96\xec"
+			  "\xf7\x84\x85\xc7\x27\x0f\x74\x57"
+			  "\x28\x9e\xdd\x90\x3c\x43\x12\xc5"
+			  "\x51\x3d\x39\x8f\xa5\xf4\xe0\x0b"
+			  "\x57\x04\xf1\x6d\xfe\x9b\x84\x27"
+			  "\xe8\xeb\x4d\xda\x02\x0a\xc5\x49"
+			  "\x1a\x55\x5e\x50\x56\x4d\x94\xda"
+			  "\x20\xf8\x12\x54\x50\xb3\x11\xda"
+			  "\xed\x44\x27\x67\xd5\xd1\x8b\x4b"
+			  "\x38\x67\x56\x65\x59\xda\xe6\x97"
+			  "\x81\xae\x2f\x92\x3b\xae\x22\x1c"
+			  "\x91\x59\x38\x18\x00\xe8\xba\x92"
+			  "\x04\x19\x56\xdf\xb0\x82\xeb\x6f"
+			  "\x2e\xdb\x54\x3c\x4b\xbb\x60\x90"
+			  "\x4c\x50\x10\x62\xba\x7a\xb1\x68"
+			  "\x37\xd7\x87\x4e\xe4\x66\x09\x1f"
+			  "\xa5",
+		.ilen	= 145,
+		.result	= "\x94\xaa\x96\x9a\x91\x1d\x00\x5c"
+			  "\x88\x24\x20\x6b\xf2\x9c\x06\x96"
+			  "\xa7\x77\x87\x1f\xa6\x78\xf8\x7b"
+			  "\xcd\xf6\xf4\x13\xa1\x9b\x16\x02"
+			  "\x07\x24\xbf\xd5\x08\x20\xd0\x4f"
+			  "\x90\xb3\x70\x24\x2f\x51\xc7\xbb"
+			  "\xd6\x84\xc0\xef\x9a\xa8\xca\xcc"
+			  "\x74\xab\x97\x53\xfe\xd0\xdb\x37"
+			  "\x37\x6a\x0e\x9f\x3f\xa3\x2a\xe3"
+			  "\x1b\x34\x6d\x51\x72\x2b\x17\xe7"
+			  "\x4d\xaa\x2c\x18\xda\xa3\x33\x89"
+			  "\x2a\x9f\xf4\xd2\xed\x76\x3d\x3f"
+			  "\x3c\x15\x9d\x8e\x4f\x3c\x27\xb0"
+			  "\x42\x3f\x2f\x8a\xd4\xc2\x10\xb2"
+			  "\x27\x7f\xe3\x34\x80\x02\x49\x4b"
+			  "\x07\x68\x22\x2a\x88\x25\x53\xb2"
+			  "\x2f",
+		.rlen	= 129,
+	}, {
+		.key	= "\xa3\x73\x24\x01\xd5\xbc\xaa\xe3"
+			  "\xa9\x5a\x4c\x90\xf0\x65\x37\x18"
+			  "\x72\x28\x0a\x40\xe7\x20\xd9\x82"
+			  "\xfe\x02\x2b\x09\x57\xb3\xfe\xb7",
+		.klen	= 32,
+		.iv	= "\xb3\x3d\xb3\x69\x19\x5b\x54\x6a"
+			  "\xc9\x91\x79\xb4\xef\x2e\x68\x99",
+		.assoc	= "\xc2\x06\x41\xd1\x5d\xfa\xff\xf1"
+			  "\xe9\xc7\xa5\xd9\xed\xf8\x98\x1b"
+			  "\x07\x89\x10\x82\x6a\x70\x9a\x8f"
+			  "\x5e\x19\x9b\xf5\xc5\xe3\xcd\x22"
+			  "\x92\xa5\xc2\xb8\x51\x2e\x5e\x0e"
+			  "\xa4\xbe\x5f\xb1\xc1\x90\xd7\xe7"
+			  "\xf7\x52\xae\x28\x29\xa8\x22\xa4"
+			  "\x4f\xae\x48\xc2\xfa\x75\x8b\x9e"
+			  "\xce\x83\x2a\x88\x07\x55\xbb\x89"
+			  "\xf6\xdf\xac\xdf\x83\x08\xbf\x7d"
+			  "\xac\x30\x8b\x8e\x02\xac\x00\xf1"
+			  "\x30\x46\xe1\xbc\x75\xbf\x49\xbb"
+			  "\x26\x4e\x29\xf0\x2f\x21\xc6\x13"
+			  "\x92\xd9\x3d\x11\xe4\x10\x00\x8e"
+			  "\xd4\xd4\x58\x65\xa6\x2b\xe3\x25"
+			  "\xb1\x8f\x15\x93\xe7\x71\xb9\x2c"
+			  "\x4b",
+		.alen	= 129,
+		.input	= "\x7d\xde\x53\x22\xe4\x23\x3b\x30"
+			  "\x78\xde\x35\x90\x7a\xd9\x0b\x93"
+			  "\xf6\x0e\x0b\xed\x40\xee\x10\x9c"
+			  "\x96\x3a\xd3\x34\xb2\xd0\x67\xcf"
+			  "\x63\x7f\x2d\x0c\xcf\x96\xec\x64"
+			  "\x1a\x87\xcc\x7d\x2c\x5e\x81\x4b"
+			  "\xd2\x8f\x4c\x7c\x00\xb1\xb4\xe0"
+			  "\x87\x4d\xb1\xbc\xd8\x78\x2c\x17"
+			  "\xf2\x3b\xd8\x28\x40\xe2\x76\xf6"
+			  "\x20\x13\x83\x46\xaf\xff\xe3\x0f"
+			  "\x72",
+		.ilen	= 81,
+		.result	= "\xd1\xcf\xd0\x39\xa1\x99\xa9\x78"
+			  "\x09\xfe\xd2\xfd\xec\xc1\xc9\x9d"
+			  "\xd2\x39\x93\xa3\xab\x18\x7a\x95"
+			  "\x8f\x24\xd3\xeb\x7b\xfa\xb5\xd8"
+			  "\x15\xd1\xc3\x04\x69\x32\xe3\x4d"
+			  "\xaa\xc2\x04\x8b\xf2\xfa\xdc\x4a"
+			  "\x02\xeb\xa8\x90\x03\xfd\xea\x97"
+			  "\x43\xaf\x2e\x92\xf8\x57\xc5\x6a"
+			  "\x00",
+		.rlen	= 65,
+	}, {
+		.key	= "\xe0\x98\x5e\xa1\xe5\x38\x53\xff"
+			  "\x2a\x35\xfe\x21\xea\x8a\xfa\x1e"
+			  "\x9c\xea\x15\xc5\xec\xc0\x5b\x9b"
+			  "\xbf\x2f\x0a\xe1\x32\x12\x9d\x8e",
+		.klen	= 32,
+		.iv	= "\xef\x61\xed\x08\x29\xd7\xfd\x86"
+			  "\x4a\x6b\x2b\x46\xe9\x53\x2a\xa0",
+		.assoc	= "\xfe\x2a\x7b\x70\x6d\x75\xa7\x0d"
+			  "\x6a\xa2\x57\x6a\xe7\x1c\x5b\x21"
+			  "\x31\x4b\x1b\x07\x6f\x10\x1c\xa8"
+			  "\x20\x46\x7a\xce\x9f\x42\x6d\xf9",
+		.alen	= 32,
+		.input	= "\x5a\xcd\x8c\x57\xf2\x6a\xb6\xbe"
+			  "\x53\xc7\xaa\x9a\x60\x74\x9c\xc4"
+			  "\xa2\xc2\xd0\x6d\xe1\x03\x63\xdc"
+			  "\xbb\x51\x7e\x9c\x89\x73\xde\x4e"
+			  "\x24\xf8\x52\x7c\x15\x41\x0e\xba"
+			  "\x69\x0e\x36\x5f\x2f\x22\x8c",
+		.ilen	= 47,
+		.result	= "\x0d\xf4\x09\xd8\xb1\x14\x51\x94"
+			  "\x8a\xd8\x84\x8e\xe6\xe5\x8c\xa3"
+			  "\xfc\xfc\x9e\x28\xb0\xb8\xfc\xaf"
+			  "\x50\x52\xb1\xc4\x55\x59\x55\xaf",
+		.rlen	= 32,
+	}, {
+		.key	= "\x1c\xbd\x98\x40\xf5\xb3\xfc\x1b"
+			  "\xaa\x0f\xb0\xb3\xe4\xae\xbc\x24"
+			  "\xc7\xac\x21\x49\xf1\x60\xdd\xb5"
+			  "\x80\x5d\xe9\xba\x0c\x71\x3c\x64",
+		.klen	= 32,
+		.iv	= "\x2c\x86\x26\xa8\x39\x52\xa6\xa2"
+			  "\xcb\x45\xdd\xd7\xe3\x77\xed\xa6",
+		.assoc	= "\x3b\x4f\xb5\x10\x7d\xf1\x50\x29"
+			  "\xeb\x7c\x0a\xfb\xe1\x40\x1e\x27"
+			  "\x5c\x0d\x27\x8b\x74\xb0\x9e\xc2"
+			  "\xe1\x74\x59\xa6\x79\xa1\x0c\xd0",
+		.alen	= 32,
+		.input	= "\x47\xd6\xce\x78\xd6\xbf\x4a\x51"
+			  "\xb8\xda\x92\x3c\xfd\xda\xac\x8e"
+			  "\x8d\x88\xd7\x4d\x90\xe5\xeb\xa1"
+			  "\xab\xd6\x7c\x76\xad\xea\x7d\x76"
+			  "\x53\xee\xb0\xcd\xd0\x02\xbb\x70"
+			  "\x5b\x6f\x7b\xe2\x8c\xe8",
+		.ilen	= 46,
+		.result	= "\x4a\x18\x43\x77\xc1\x90\xfa\xb0"
+			  "\x0b\xb2\x36\x20\xe0\x09\x4e\xa9"
+			  "\x26\xbe\xaa\xac\xb5\x58\x7e\xc8"
+			  "\x11\x7f\x90\x9c\x2f\xb8\xf4\x85",
+		.rlen	= 32,
+	}, {
+		.key	= "\x59\xe1\xd2\xdf\x05\x2f\xa4\x37"
+			  "\x2b\xe9\x63\x44\xde\xd3\x7f\x2b"
+			  "\xf1\x6f\x2d\xcd\xf6\x00\x5f\xcf"
+			  "\x42\x8a\xc8\x92\xe6\xd0\xdc\x3b",
+		.klen	= 32,
+		.iv	= "\x68\xab\x60\x47\x49\xce\x4f\xbe"
+			  "\x4c\x20\x8f\x68\xdd\x9c\xb0\xac",
+		.assoc	= "\x77\x74\xee\xaf\x8d\x6d\xf9\x45"
+			  "\x6c\x56\xbc\x8d\xdb\x65\xe0\x2e"
+			  "\x86\xd0\x32\x0f\x79\x50\x20\xdb"
+			  "\xa2\xa1\x37\x7e\x53\x00\xab\xa6",
+		.alen	= 32,
+		.input	= "\x9f\xa9\x2b\xa4\x8f\x00\x05\x2b"
+			  "\xe7\x68\x81\x51\xbb\xfb\xdf\x60"
+			  "\xbb\xac\xe8\xc1\xdc\x68\xae\x68"
+			  "\x3a\xcd\x7a\x06\x49\xfe\x80\x11"
+			  "\xe6\x61\x99\xe2\xdd\xbe\x2c\xbf",
+		.ilen	= 40,
+		.result	= "\x86\x3d\x7d\x17\xd1\x0c\xa3\xcc"
+			  "\x8c\x8d\xe8\xb1\xda\x2e\x11\xaf"
+			  "\x51\x80\xb5\x30\xba\xf8\x00\xe2"
+			  "\xd3\xad\x6f\x75\x09\x18\x93\x5c",
+		.rlen	= 32,
+	}, {
+		.key	= "\x96\x06\x0b\x7f\x15\xab\x4d\x53"
+			  "\xac\xc3\x15\xd6\xd8\xf7\x42\x31"
+			  "\x1b\x31\x38\x51\xfc\xa0\xe1\xe8"
+			  "\x03\xb8\xa7\x6b\xc0\x2f\x7b\x11",
+		.klen	= 32,
+		.iv	= "\xa5\xcf\x9a\xe6\x59\x4a\xf7\xd9"
+			  "\xcd\xfa\x41\xfa\xd7\xc0\x72\xb2",
+		.assoc	= "\xb4\x99\x28\x4e\x9d\xe8\xa2\x60"
+			  "\xed\x30\x6e\x1e\xd5\x89\xa3\x34"
+			  "\xb1\x92\x3e\x93\x7e\xf0\xa2\xf5"
+			  "\x64\xcf\x16\x57\x2d\x5f\x4a\x7d",
+		.alen	= 32,
+		.input	= "\xe2\x34\xfa\x25\xfd\xfb\x89\x5e"
+			  "\x5b\x4e\x0b\x15\x6e\x39\xfb\x0c"
+			  "\x73\xc7\xd9\x6b\xbe\xce\x9b\x70"
+			  "\xc7\x4f\x96\x16\x03\xfc\xea\xfb"
+			  "\x56",
+		.ilen	= 33,
+		.result	= "\xc3\x62\xb7\xb6\xe2\x87\x4c\xe7"
+			  "\x0d\x67\x9a\x43\xd4\x52\xd4\xb5"
+			  "\x7b\x43\xc1\xb5\xbf\x98\x82\xfc"
+			  "\x94\xda\x4e\x4d\xe4\x77\x32\x32",
+		.rlen	= 32,
+	},
+};
+
 /*
  * All key wrapping test vectors taken from
  * http://csrc.nist.gov/groups/STM/cavp/documents/mac/kwtestvectors.zip
@@ -27385,35 +26023,31 @@
  * semiblock of the ciphertext from the test vector. For decryption, iv is
  * the first semiblock of the ciphertext.
  */
-static const struct cipher_testvec aes_kw_enc_tv_template[] = {
+static const struct cipher_testvec aes_kw_tv_template[] = {
 	{
 		.key	= "\x75\x75\xda\x3a\x93\x60\x7c\xc2"
 			  "\xbf\xd8\xce\xc7\xaa\xdf\xd9\xa6",
 		.klen	= 16,
-		.input	= "\x42\x13\x6d\x3c\x38\x4a\x3e\xea"
+		.ptext	= "\x42\x13\x6d\x3c\x38\x4a\x3e\xea"
 			  "\xc9\x5a\x06\x6f\xd2\x8f\xed\x3f",
-		.ilen	= 16,
-		.result	= "\xf6\x85\x94\x81\x6f\x64\xca\xa3"
+		.ctext	= "\xf6\x85\x94\x81\x6f\x64\xca\xa3"
 			  "\xf5\x6f\xab\xea\x25\x48\xf5\xfb",
-		.rlen	= 16,
-		.iv_out	= "\x03\x1f\x6b\xd7\xe6\x1e\x64\x3d",
-	},
-};
-
-static const struct cipher_testvec aes_kw_dec_tv_template[] = {
-	{
+		.len	= 16,
+		.iv	= "\x03\x1f\x6b\xd7\xe6\x1e\x64\x3d",
+		.generates_iv = true,
+	}, {
 		.key	= "\x80\xaa\x99\x73\x27\xa4\x80\x6b"
 			  "\x6a\x7a\x41\xa5\x2b\x86\xc3\x71"
 			  "\x03\x86\xf9\x32\x78\x6e\xf7\x96"
 			  "\x76\xfa\xfb\x90\xb8\x26\x3c\x5f",
 		.klen	= 32,
-		.input	= "\xd3\x3d\x3d\x97\x7b\xf0\xa9\x15"
-			  "\x59\xf9\x9c\x8a\xcd\x29\x3d\x43",
-		.ilen	= 16,
-		.result	= "\x0a\x25\x6b\xa7\x5c\xfa\x03\xaa"
+		.ptext	= "\x0a\x25\x6b\xa7\x5c\xfa\x03\xaa"
 			  "\xa0\x2b\xa9\x42\x03\xf1\x5b\xaa",
-		.rlen	= 16,
+		.ctext	= "\xd3\x3d\x3d\x97\x7b\xf0\xa9\x15"
+			  "\x59\xf9\x9c\x8a\xcd\x29\x3d\x43",
+		.len	= 16,
 		.iv	= "\x42\x3c\x96\x0d\x8a\x2a\xc4\xc1",
+		.generates_iv = true,
 	},
 };
 
@@ -28340,36 +26974,33 @@
 };
 
 /* Cast5 test vectors from RFC 2144 */
-static const struct cipher_testvec cast5_enc_tv_template[] = {
+static const struct cipher_testvec cast5_tv_template[] = {
 	{
 		.key	= "\x01\x23\x45\x67\x12\x34\x56\x78"
 			  "\x23\x45\x67\x89\x34\x56\x78\x9a",
 		.klen	= 16,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.ilen	= 8,
-		.result	= "\x23\x8b\x4f\xe5\x84\x7e\x44\xb2",
-		.rlen	= 8,
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.ctext	= "\x23\x8b\x4f\xe5\x84\x7e\x44\xb2",
+		.len	= 8,
 	}, {
 		.key	= "\x01\x23\x45\x67\x12\x34\x56\x78"
 			  "\x23\x45",
 		.klen	= 10,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.ilen	= 8,
-		.result	= "\xeb\x6a\x71\x1a\x2c\x02\x27\x1b",
-		.rlen	= 8,
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.ctext	= "\xeb\x6a\x71\x1a\x2c\x02\x27\x1b",
+		.len	= 8,
 	}, {
 		.key	= "\x01\x23\x45\x67\x12",
 		.klen	= 5,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.ilen	= 8,
-		.result	= "\x7a\xc8\x16\xd1\x6e\x9b\x30\x2e",
-		.rlen	= 8,
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.ctext	= "\x7a\xc8\x16\xd1\x6e\x9b\x30\x2e",
+		.len	= 8,
 	}, { /* Generated from TF test vectors */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A",
 		.klen	= 16,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -28431,8 +27062,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\x8D\xFC\x81\x9C\xCB\xAA\x5A\x1C"
+		.ctext	= "\x8D\xFC\x81\x9C\xCB\xAA\x5A\x1C"
 			  "\x7E\x95\xCF\x40\xAB\x4D\x6F\xEA"
 			  "\xD3\xD9\xB0\x9A\xB7\xC7\xE0\x2E"
 			  "\xD1\x39\x34\x92\x8F\xFA\x14\xF1"
@@ -28494,181 +27124,20 @@
 			  "\x5D\x0B\x3F\x03\x8F\x30\xF9\xAE"
 			  "\x4F\xFE\x24\x9C\x9A\x02\xE5\x57"
 			  "\xF5\xBC\x25\xD6\x02\x56\x57\x1C",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec cast5_dec_tv_template[] = {
-	{
-		.key	= "\x01\x23\x45\x67\x12\x34\x56\x78"
-			  "\x23\x45\x67\x89\x34\x56\x78\x9a",
-		.klen	= 16,
-		.input	= "\x23\x8b\x4f\xe5\x84\x7e\x44\xb2",
-		.ilen	= 8,
-		.result	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.rlen	= 8,
-	}, {
-		.key	= "\x01\x23\x45\x67\x12\x34\x56\x78"
-			  "\x23\x45",
-		.klen	= 10,
-		.input	= "\xeb\x6a\x71\x1a\x2c\x02\x27\x1b",
-		.ilen	= 8,
-		.result	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.rlen	= 8,
-	}, {
-		.key	= "\x01\x23\x45\x67\x12",
-		.klen	= 5,
-		.input	= "\x7a\xc8\x16\xd1\x6e\x9b\x30\x2e",
-		.ilen	= 8,
-		.result	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.rlen	= 8,
-	}, { /* Generated from TF test vectors */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A",
-		.klen	= 16,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\x8D\xFC\x81\x9C\xCB\xAA\x5A\x1C"
-			  "\x7E\x95\xCF\x40\xAB\x4D\x6F\xEA"
-			  "\xD3\xD9\xB0\x9A\xB7\xC7\xE0\x2E"
-			  "\xD1\x39\x34\x92\x8F\xFA\x14\xF1"
-			  "\xD5\xD2\x7B\x59\x1F\x35\x28\xC2"
-			  "\x20\xD9\x42\x06\xC9\x0B\x10\x04"
-			  "\xF8\x79\xCD\x32\x86\x75\x4C\xB6"
-			  "\x7B\x1C\x52\xB1\x91\x64\x22\x4B"
-			  "\x13\xC7\xAE\x98\x0E\xB5\xCF\x6F"
-			  "\x3F\xF4\x43\x96\x73\x0D\xA2\x05"
-			  "\xDB\xFD\x28\x90\x2C\x56\xB9\x37"
-			  "\x5B\x69\x0C\xAD\x84\x67\xFF\x15"
-			  "\x4A\xD4\xA7\xD3\xDD\x99\x47\x3A"
-			  "\xED\x34\x35\x78\x6B\x91\xC9\x32"
-			  "\xE1\xBF\xBC\xB4\x04\x85\x6A\x39"
-			  "\xC0\xBA\x51\xD0\x0F\x4E\xD1\xE2"
-			  "\x1C\xFD\x0E\x05\x07\xF4\x10\xED"
-			  "\xA2\x17\xFF\xF5\x64\xC6\x1A\x22"
-			  "\xAD\x78\xE7\xD7\x11\xE9\x99\xB9"
-			  "\xAA\xEC\x6F\xF8\x3B\xBF\xCE\x77"
-			  "\x93\xE8\xAD\x1D\x50\x6C\xAE\xBC"
-			  "\xBA\x5C\x80\xD1\x91\x65\x51\x1B"
-			  "\xE8\x0A\xCD\x99\x96\x71\x3D\xB6"
-			  "\x78\x75\x37\x55\xC1\xF5\x90\x40"
-			  "\x34\xF4\x7E\xC8\xCC\x3A\x5F\x6E"
-			  "\x36\xA1\xA1\xC2\x3A\x72\x42\x8E"
-			  "\x0E\x37\x88\xE8\xCE\x83\xCB\xAD"
-			  "\xE0\x69\x77\x50\xC7\x0C\x99\xCA"
-			  "\x19\x5B\x30\x25\x9A\xEF\x9B\x0C"
-			  "\xEF\x8F\x74\x4C\xCF\x49\x4E\xB9"
-			  "\xC5\xAE\x9E\x2E\x78\x9A\xB9\x48"
-			  "\xD5\x81\xE4\x37\x1D\xBF\x27\xD9"
-			  "\xC5\xD6\x65\x43\x45\x8C\xBB\xB6"
-			  "\x55\xF4\x06\xBB\x49\x53\x8B\x1B"
-			  "\x07\xA9\x96\x69\x5B\xCB\x0F\xBC"
-			  "\x93\x85\x90\x0F\x0A\x68\x40\x2A"
-			  "\x95\xED\x2D\x88\xBF\x71\xD0\xBB"
-			  "\xEC\xB0\x77\x6C\x79\xFC\x3C\x05"
-			  "\x49\x3F\xB8\x24\xEF\x8E\x09\xA2"
-			  "\x1D\xEF\x92\x02\x96\xD4\x7F\xC8"
-			  "\x03\xB2\xCA\xDB\x17\x5C\x52\xCF"
-			  "\xDD\x70\x37\x63\xAA\xA5\x83\x20"
-			  "\x52\x02\xF6\xB9\xE7\x6E\x0A\xB6"
-			  "\x79\x03\xA0\xDA\xA3\x79\x21\xBD"
-			  "\xE3\x37\x3A\xC0\xF7\x2C\x32\xBE"
-			  "\x8B\xE8\xA6\x00\xC7\x32\xD5\x06"
-			  "\xBB\xE3\xAB\x06\x21\x82\xB8\x32"
-			  "\x31\x34\x2A\xA7\x1F\x64\x99\xBF"
-			  "\xFA\xDA\x3D\x75\xF7\x48\xD5\x48"
-			  "\x4B\x52\x7E\xF6\x7C\xAB\x67\x59"
-			  "\xC5\xDC\xA8\xC6\x63\x85\x4A\xDF"
-			  "\xF0\x40\x5F\xCF\xE3\x58\x52\x67"
-			  "\x7A\x24\x32\xC5\xEC\x9E\xA9\x6F"
-			  "\x58\x56\xDD\x94\x1F\x71\x8D\xF4"
-			  "\x6E\xFF\x2C\xA7\xA5\xD8\xBA\xAF"
-			  "\x1D\x8B\xA2\x46\xB5\xC4\x9F\x57"
-			  "\x8D\xD8\xB3\x3C\x02\x0D\xBB\x84"
-			  "\xC7\xBD\xB4\x9A\x6E\xBB\xB1\x37"
-			  "\x95\x79\xC4\xA7\xEA\x1D\xDC\x33"
-			  "\x5D\x0B\x3F\x03\x8F\x30\xF9\xAE"
-			  "\x4F\xFE\x24\x9C\x9A\x02\xE5\x57"
-			  "\xF5\xBC\x25\xD6\x02\x56\x57\x1C",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec cast5_cbc_enc_tv_template[] = {
+static const struct cipher_testvec cast5_cbc_tv_template[] = {
 	{ /* Generated from TF test vectors */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A",
 		.klen	= 16,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -28730,8 +27199,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\x05\x28\xCE\x61\x90\x80\xE1\x78"
+		.ctext	= "\x05\x28\xCE\x61\x90\x80\xE1\x78"
 			  "\xB9\x2A\x97\x7C\xB0\x83\xD8\x1A"
 			  "\xDE\x58\x7F\xD7\xFD\x72\xB8\xFB"
 			  "\xDA\xF0\x6E\x77\x14\x47\x82\xBA"
@@ -28793,171 +27261,32 @@
 			  "\x8C\xF5\x3F\x19\xF4\x80\x45\xA7"
 			  "\x15\x5F\xDB\xE9\xB1\x83\xD2\xE6"
 			  "\x1D\x18\x66\x44\x5B\x8F\x14\xEB",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec cast5_cbc_dec_tv_template[] = {
+static const struct cipher_testvec cast5_ctr_tv_template[] = {
 	{ /* Generated from TF test vectors */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A",
 		.klen	= 16,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\x05\x28\xCE\x61\x90\x80\xE1\x78"
-			  "\xB9\x2A\x97\x7C\xB0\x83\xD8\x1A"
-			  "\xDE\x58\x7F\xD7\xFD\x72\xB8\xFB"
-			  "\xDA\xF0\x6E\x77\x14\x47\x82\xBA"
-			  "\x29\x0E\x25\x6E\xB4\x39\xD9\x7F"
-			  "\x05\xA7\xA7\x3A\xC1\x5D\x9E\x39"
-			  "\xA7\xFB\x0D\x05\x00\xF3\x58\x67"
-			  "\x60\xEC\x73\x77\x46\x85\x9B\x6A"
-			  "\x08\x3E\xBE\x59\xFB\xE4\x96\x34"
-			  "\xB4\x05\x49\x1A\x97\x43\xAD\xA0"
-			  "\xA9\x1E\x6E\x74\xF1\x94\xEC\xA8"
-			  "\xB5\x8A\x20\xEA\x89\x6B\x19\xAA"
-			  "\xA7\xF1\x33\x67\x90\x23\x0D\xEE"
-			  "\x81\xD5\x78\x4F\xD3\x63\xEA\x46"
-			  "\xB5\xB2\x6E\xBB\xCA\x76\x06\x10"
-			  "\x96\x2A\x0A\xBA\xF9\x41\x5A\x1D"
-			  "\x36\x7C\x56\x14\x54\x83\xFA\xA1"
-			  "\x27\xDD\xBA\x8A\x90\x29\xD6\xA6"
-			  "\xFA\x48\x3E\x1E\x23\x6E\x98\xA8"
-			  "\xA7\xD9\x67\x92\x5C\x13\xB4\x71"
-			  "\xA8\xAA\x89\x4A\xA4\xB3\x49\x7C"
-			  "\x7D\x7F\xCE\x6F\x29\x2E\x7E\x37"
-			  "\xC8\x52\x60\xD9\xE7\xCA\x60\x98"
-			  "\xED\xCD\xE8\x60\x83\xAD\x34\x4D"
-			  "\x96\x4A\x99\x2B\xB7\x14\x75\x66"
-			  "\x6C\x2C\x1A\xBA\x4B\xBB\x49\x56"
-			  "\xE1\x86\xA2\x0E\xD0\xF0\x07\xD3"
-			  "\x18\x38\x09\x9C\x0E\x8B\x86\x07"
-			  "\x90\x12\x37\x49\x27\x98\x69\x18"
-			  "\xB0\xCC\xFB\xD3\xBD\x04\xA0\x85"
-			  "\x4B\x22\x97\x07\xB6\x97\xE9\x95"
-			  "\x0F\x88\x36\xA9\x44\x00\xC6\xE9"
-			  "\x27\x53\x5C\x5B\x1F\xD3\xE2\xEE"
-			  "\xD0\xCD\x63\x30\xA9\xC0\xDD\x49"
-			  "\xFE\x16\xA4\x07\x0D\xE2\x5D\x97"
-			  "\xDE\x89\xBA\x2E\xF3\xA9\x5E\xBE"
-			  "\x03\x55\x0E\x02\x41\x4A\x45\x06"
-			  "\xBE\xEA\x32\xF2\xDC\x91\x5C\x20"
-			  "\x94\x02\x30\xD2\xFC\x29\xFA\x8E"
-			  "\x34\xA0\x31\xB8\x34\xBA\xAE\x54"
-			  "\xB5\x88\x1F\xDC\x43\xDC\x22\x9F"
-			  "\xDC\xCE\xD3\xFA\xA4\xA8\xBC\x8A"
-			  "\xC7\x5A\x43\x21\xA5\xB1\xDB\xC3"
-			  "\x84\x3B\xB4\x9B\xB5\xA7\xF1\x0A"
-			  "\xB6\x37\x21\x19\x55\xC2\xBD\x99"
-			  "\x49\x24\xBB\x7C\xB3\x8E\xEF\xD2"
-			  "\x3A\xCF\xA0\x31\x28\x0E\x25\xA2"
-			  "\x11\xB4\x18\x17\x1A\x65\x92\x56"
-			  "\xE8\xE0\x52\x9C\x61\x18\x2A\xB1"
-			  "\x1A\x01\x22\x45\x17\x62\x52\x6C"
-			  "\x91\x44\xCF\x98\xC7\xC0\x79\x26"
-			  "\x32\x66\x6F\x23\x7F\x94\x36\x88"
-			  "\x3C\xC9\xD0\xB7\x45\x30\x31\x86"
-			  "\x3D\xC6\xA3\x98\x62\x84\x1A\x8B"
-			  "\x16\x88\xC7\xA3\xE9\x4F\xE0\x86"
-			  "\xA4\x93\xA8\x34\x5A\xCA\xDF\xCA"
-			  "\x46\x38\xD2\xF4\xE0\x2D\x1E\xC9"
-			  "\x7C\xEF\x53\xB7\x60\x72\x41\xBF"
-			  "\x29\x00\x87\x02\xAF\x44\x4C\xB7"
-			  "\x8C\xF5\x3F\x19\xF4\x80\x45\xA7"
-			  "\x15\x5F\xDB\xE9\xB1\x83\xD2\xE6"
-			  "\x1D\x18\x66\x44\x5B\x8F\x14\xEB",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec cast5_ctr_enc_tv_template[] = {
-	{ /* Generated from TF test vectors */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A",
-		.klen	= 16,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A",
-		.ilen	= 17,
-		.result	= "\xFF\xC4\x2E\x82\x3D\xF8\xA8\x39"
+		.ctext	= "\xFF\xC4\x2E\x82\x3D\xF8\xA8\x39"
 			  "\x7C\x52\xC4\xD3\xBB\x62\xC6\xA8"
 			  "\x0C",
-		.rlen	= 17,
+		.len	= 17,
 	}, { /* Generated from TF test vectors */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A",
 		.klen	= 16,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -29019,8 +27348,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\xFF\xC4\x2E\x82\x3D\xF8\xA8\x39"
+		.ctext	= "\xFF\xC4\x2E\x82\x3D\xF8\xA8\x39"
 			  "\x7C\x52\xC4\xD3\xBB\x62\xC6\xA8"
 			  "\x0C\x63\xA5\x55\xE3\xF8\x1C\x7F"
 			  "\xDC\x59\xF9\xA0\x52\xAD\x83\xDF"
@@ -29082,158 +27410,7 @@
 			  "\x91\x01\xD7\x21\x23\x28\x1E\xCC"
 			  "\x8C\x98\xDB\xDE\xFC\x72\x94\xAA"
 			  "\xC0\x0D\x96\xAA\x23\xF8\xFE\x13",
-		.rlen	= 496,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 496 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec cast5_ctr_dec_tv_template[] = {
-	{ /* Generated from TF test vectors */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A",
-		.klen	= 16,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\xFF\xC4\x2E\x82\x3D\xF8\xA8\x39"
-			  "\x7C\x52\xC4\xD3\xBB\x62\xC6\xA8"
-			  "\x0C",
-		.ilen	= 17,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A",
-		.rlen	= 17,
-	}, { /* Generated from TF test vectors */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A",
-		.klen	= 16,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F",
-		.input	= "\xFF\xC4\x2E\x82\x3D\xF8\xA8\x39"
-			  "\x7C\x52\xC4\xD3\xBB\x62\xC6\xA8"
-			  "\x0C\x63\xA5\x55\xE3\xF8\x1C\x7F"
-			  "\xDC\x59\xF9\xA0\x52\xAD\x83\xDF"
-			  "\xD5\x3B\x53\x4A\xAA\x1F\x49\x44"
-			  "\xE8\x20\xCC\xF8\x97\xE6\xE0\x3C"
-			  "\x5A\xD2\x83\xEC\xEE\x25\x3F\xCF"
-			  "\x0D\xC2\x79\x80\x99\x6E\xFF\x7B"
-			  "\x64\xB0\x7B\x86\x29\x1D\x9F\x17"
-			  "\x10\xA5\xA5\xEB\x16\x55\x9E\xE3"
-			  "\x88\x18\x52\x56\x48\x58\xD1\x6B"
-			  "\xE8\x74\x6E\x48\xB0\x2E\x69\x63"
-			  "\x32\xAA\xAC\x26\x55\x45\x94\xDE"
-			  "\x30\x26\x26\xE6\x08\x82\x2F\x5F"
-			  "\xA7\x15\x94\x07\x75\x2D\xC6\x3A"
-			  "\x1B\xA0\x39\xFB\xBA\xB9\x06\x56"
-			  "\xF6\x9F\xF1\x2F\x9B\xF3\x89\x8B"
-			  "\x08\xC8\x9D\x5E\x6B\x95\x09\xC7"
-			  "\x98\xB7\x62\xA4\x1D\x25\xFA\xC5"
-			  "\x62\xC8\x5D\x6B\xB4\x85\x88\x7F"
-			  "\x3B\x29\xF9\xB4\x32\x62\x69\xBF"
-			  "\x32\xB8\xEB\xFD\x0E\x26\xAA\xA3"
-			  "\x44\x67\x90\x20\xAC\x41\xDF\x43"
-			  "\xC6\xC7\x19\x9F\x2C\x28\x74\xEB"
-			  "\x3E\x7F\x7A\x80\x5B\xE4\x08\x60"
-			  "\xC7\xC9\x71\x34\x44\xCE\x05\xFD"
-			  "\xA8\x91\xA8\x44\x5E\xD3\x89\x2C"
-			  "\xAE\x59\x0F\x07\x88\x79\x53\x26"
-			  "\xAF\xAC\xCB\x1D\x6F\x08\x25\x62"
-			  "\xD0\x82\x65\x66\xE4\x2A\x29\x1C"
-			  "\x9C\x64\x5F\x49\x9D\xF8\x62\xF9"
-			  "\xED\xC4\x13\x52\x75\xDC\xE4\xF9"
-			  "\x68\x0F\x8A\xCD\xA6\x8D\x75\xAA"
-			  "\x49\xA1\x86\x86\x37\x5C\x6B\x3D"
-			  "\x56\xE5\x6F\xBE\x27\xC0\x10\xF8"
-			  "\x3C\x4D\x17\x35\x14\xDC\x1C\xA0"
-			  "\x6E\xAE\xD1\x10\xDD\x83\x06\xC2"
-			  "\x23\xD3\xC7\x27\x15\x04\x2C\x27"
-			  "\xDD\x1F\x2E\x97\x09\x9C\x33\x7D"
-			  "\xAC\x50\x1B\x2E\xC9\x52\x0C\x14"
-			  "\x4B\x78\xC4\xDE\x07\x6A\x12\x02"
-			  "\x6E\xD7\x4B\x91\xB9\x88\x4D\x02"
-			  "\xC3\xB5\x04\xBC\xE0\x67\xCA\x18"
-			  "\x22\xA1\xAE\x9A\x21\xEF\xB2\x06"
-			  "\x35\xCD\xEC\x37\x70\x2D\xFC\x1E"
-			  "\xA8\x31\xE7\xFC\xE5\x8E\x88\x66"
-			  "\x16\xB5\xC8\x45\x21\x37\xBD\x24"
-			  "\xA9\xD5\x36\x12\x9F\x6E\x67\x80"
-			  "\x87\x54\xD5\xAF\x97\xE1\x15\xA7"
-			  "\x11\xF0\x63\x7B\xE1\x44\x14\x1C"
-			  "\x06\x32\x05\x8C\x6C\xDB\x9B\x36"
-			  "\x6A\x6B\xAD\x3A\x27\x55\x20\x4C"
-			  "\x76\x36\x43\xE8\x16\x60\xB5\xF3"
-			  "\xDF\x5A\xC6\xA5\x69\x78\x59\x51"
-			  "\x54\x68\x65\x06\x84\xDE\x3D\xAE"
-			  "\x38\x91\xBD\xCC\xA2\x8A\xEC\xE6"
-			  "\x9E\x83\xAE\x1E\x8E\x34\x5D\xDE"
-			  "\x91\xCE\x8F\xED\x40\xF7\xC8\x8B"
-			  "\x9A\x13\x4C\xAD\x89\x97\x9E\xD1"
-			  "\x91\x01\xD7\x21\x23\x28\x1E\xCC"
-			  "\x8C\x98\xDB\xDE\xFC\x72\x94\xAA"
-			  "\xC0\x0D\x96\xAA\x23\xF8\xFE\x13",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
+		.len	= 496,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 496 - 20, 4, 16 },
@@ -29243,408 +27420,190 @@
 /*
  * ARC4 test vectors from OpenSSL
  */
-static const struct cipher_testvec arc4_enc_tv_template[] = {
+static const struct cipher_testvec arc4_tv_template[] = {
 	{
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.ilen	= 8,
-		.result	= "\x75\xb7\x87\x80\x99\xe0\xc5\x96",
-		.rlen	= 8,
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
+		.ctext	= "\x75\xb7\x87\x80\x99\xe0\xc5\x96",
+		.len	= 8,
 	}, {
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 8,
-		.result	= "\x74\x94\xc2\xe7\x10\x4b\x08\x79",
-		.rlen	= 8,
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ctext	= "\x74\x94\xc2\xe7\x10\x4b\x08\x79",
+		.len	= 8,
 	}, {
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 8,
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 8,
-		.result	= "\xde\x18\x89\x41\xa3\x37\x5d\x3a",
-		.rlen	= 8,
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ctext	= "\xde\x18\x89\x41\xa3\x37\x5d\x3a",
+		.len	= 8,
 	}, {
 		.key	= "\xef\x01\x23\x45",
 		.klen	= 4,
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00",
-		.ilen	= 20,
-		.result	= "\xd6\xa1\x41\xa7\xec\x3c\x38\xdf"
+		.ctext	= "\xd6\xa1\x41\xa7\xec\x3c\x38\xdf"
 			  "\xbd\x61\x5a\x11\x62\xe1\xc7\xba"
 			  "\x36\xb6\x78\x58",
-		.rlen	= 20,
+		.len	= 20,
 	}, {
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
 		.klen	= 8,
-		.input	= "\x12\x34\x56\x78\x9A\xBC\xDE\xF0"
+		.ptext	= "\x12\x34\x56\x78\x9A\xBC\xDE\xF0"
 			  "\x12\x34\x56\x78\x9A\xBC\xDE\xF0"
 			  "\x12\x34\x56\x78\x9A\xBC\xDE\xF0"
 			  "\x12\x34\x56\x78",
-		.ilen	= 28,
-		.result	= "\x66\xa0\x94\x9f\x8a\xf7\xd6\x89"
+		.ctext	= "\x66\xa0\x94\x9f\x8a\xf7\xd6\x89"
 			  "\x1f\x7f\x83\x2b\xa8\x33\xc0\x0c"
 			  "\x89\x2e\xbe\x30\x14\x3c\xe2\x87"
 			  "\x40\x01\x1e\xcf",
-		.rlen	= 28,
+		.len	= 28,
 	}, {
 		.key	= "\xef\x01\x23\x45",
 		.klen	= 4,
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00",
-		.ilen	= 10,
-		.result	= "\xd6\xa1\x41\xa7\xec\x3c\x38\xdf"
+		.ctext	= "\xd6\xa1\x41\xa7\xec\x3c\x38\xdf"
 			  "\xbd\x61",
-		.rlen	= 10,
+		.len	= 10,
 	}, {
 		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
 			"\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 16,
-		.input	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF",
-		.ilen	= 8,
-		.result	= "\x69\x72\x36\x59\x1B\x52\x42\xB1",
-		.rlen	= 8,
-	},
-};
-
-static const struct cipher_testvec arc4_dec_tv_template[] = {
-	{
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.klen	= 8,
-		.input	= "\x75\xb7\x87\x80\x99\xe0\xc5\x96",
-		.ilen	= 8,
-		.result	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.rlen	= 8,
-	}, {
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.klen	= 8,
-		.input	= "\x74\x94\xc2\xe7\x10\x4b\x08\x79",
-		.ilen	= 8,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 8,
-	}, {
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 8,
-		.input	= "\xde\x18\x89\x41\xa3\x37\x5d\x3a",
-		.ilen	= 8,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 8,
-	}, {
-		.key	= "\xef\x01\x23\x45",
-		.klen	= 4,
-		.input	= "\xd6\xa1\x41\xa7\xec\x3c\x38\xdf"
-			  "\xbd\x61\x5a\x11\x62\xe1\xc7\xba"
-			  "\x36\xb6\x78\x58",
-		.ilen	= 20,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00",
-		.rlen	= 20,
-	}, {
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef",
-		.klen	= 8,
-		.input	= "\x66\xa0\x94\x9f\x8a\xf7\xd6\x89"
-			  "\x1f\x7f\x83\x2b\xa8\x33\xc0\x0c"
-			  "\x89\x2e\xbe\x30\x14\x3c\xe2\x87"
-			  "\x40\x01\x1e\xcf",
-		.ilen	= 28,
-		.result	= "\x12\x34\x56\x78\x9A\xBC\xDE\xF0"
-			  "\x12\x34\x56\x78\x9A\xBC\xDE\xF0"
-			  "\x12\x34\x56\x78\x9A\xBC\xDE\xF0"
-			  "\x12\x34\x56\x78",
-		.rlen	= 28,
-	}, {
-		.key	= "\xef\x01\x23\x45",
-		.klen	= 4,
-		.input	= "\xd6\xa1\x41\xa7\xec\x3c\x38\xdf"
-			  "\xbd\x61",
-		.ilen	= 10,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00",
-		.rlen	= 10,
-	}, {
-		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
-			"\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 16,
-		.input	= "\x69\x72\x36\x59\x1B\x52\x42\xB1",
-		.ilen	= 8,
-		.result	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF",
-		.rlen	= 8,
+		.ptext	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF",
+		.ctext	= "\x69\x72\x36\x59\x1B\x52\x42\xB1",
+		.len	= 8,
 	},
 };
 
 /*
  * TEA test vectors
  */
-static const struct cipher_testvec tea_enc_tv_template[] = {
+static const struct cipher_testvec tea_tv_template[] = {
 	{
 		.key    = zeroed_string,
 		.klen	= 16,
-		.input  = zeroed_string,
-		.ilen	= 8,
-		.result	= "\x0a\x3a\xea\x41\x40\xa9\xba\x94",
-		.rlen	= 8,
+		.ptext	= zeroed_string,
+		.ctext	= "\x0a\x3a\xea\x41\x40\xa9\xba\x94",
+		.len	= 8,
 	}, {
 		.key	= "\x2b\x02\x05\x68\x06\x14\x49\x76"
 			  "\x77\x5d\x0e\x26\x6c\x28\x78\x43",
 		.klen	= 16,
-		.input	= "\x74\x65\x73\x74\x20\x6d\x65\x2e",
-		.ilen	= 8,
-		.result	= "\x77\x5d\x2a\x6a\xf6\xce\x92\x09",
-		.rlen	= 8,
+		.ptext	= "\x74\x65\x73\x74\x20\x6d\x65\x2e",
+		.ctext	= "\x77\x5d\x2a\x6a\xf6\xce\x92\x09",
+		.len	= 8,
 	}, {
 		.key	= "\x09\x65\x43\x11\x66\x44\x39\x25"
 			  "\x51\x3a\x16\x10\x0a\x08\x12\x6e",
 		.klen	= 16,
-		.input	= "\x6c\x6f\x6e\x67\x65\x72\x5f\x74"
+		.ptext	= "\x6c\x6f\x6e\x67\x65\x72\x5f\x74"
 			  "\x65\x73\x74\x5f\x76\x65\x63\x74",
-		.ilen	= 16,
-		.result	= "\xbe\x7a\xbb\x81\x95\x2d\x1f\x1e"
+		.ctext	= "\xbe\x7a\xbb\x81\x95\x2d\x1f\x1e"
 			  "\xdd\x89\xa1\x25\x04\x21\xdf\x95",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x4d\x76\x32\x17\x05\x3f\x75\x2c"
 			  "\x5d\x04\x16\x36\x15\x72\x63\x2f",
 		.klen	= 16,
-		.input	= "\x54\x65\x61\x20\x69\x73\x20\x67"
+		.ptext	= "\x54\x65\x61\x20\x69\x73\x20\x67"
 			  "\x6f\x6f\x64\x20\x66\x6f\x72\x20"
 			  "\x79\x6f\x75\x21\x21\x21\x20\x72"
 			  "\x65\x61\x6c\x6c\x79\x21\x21\x21",
-		.ilen	= 32,
-		.result	= "\xe0\x4d\x5d\x3c\xb7\x8c\x36\x47"
+		.ctext	= "\xe0\x4d\x5d\x3c\xb7\x8c\x36\x47"
 			  "\x94\x18\x95\x91\xa9\xfc\x49\xf8"
 			  "\x44\xd1\x2d\xc2\x99\xb8\x08\x2a"
 			  "\x07\x89\x73\xc2\x45\x92\xc6\x90",
-		.rlen	= 32,
-	}
-};
-
-static const struct cipher_testvec tea_dec_tv_template[] = {
-	{
-		.key    = zeroed_string,
-		.klen	= 16,
-		.input	= "\x0a\x3a\xea\x41\x40\xa9\xba\x94",
-		.ilen	= 8,
-		.result = zeroed_string,
-		.rlen	= 8,
-	}, {
-		.key	= "\x2b\x02\x05\x68\x06\x14\x49\x76"
-			  "\x77\x5d\x0e\x26\x6c\x28\x78\x43",
-		.klen	= 16,
-		.input	= "\x77\x5d\x2a\x6a\xf6\xce\x92\x09",
-		.ilen	= 8,
-		.result	= "\x74\x65\x73\x74\x20\x6d\x65\x2e",
-		.rlen	= 8,
-	}, {
-		.key	= "\x09\x65\x43\x11\x66\x44\x39\x25"
-			  "\x51\x3a\x16\x10\x0a\x08\x12\x6e",
-		.klen	= 16,
-		.input	= "\xbe\x7a\xbb\x81\x95\x2d\x1f\x1e"
-			  "\xdd\x89\xa1\x25\x04\x21\xdf\x95",
-		.ilen   = 16,
-		.result	= "\x6c\x6f\x6e\x67\x65\x72\x5f\x74"
-			  "\x65\x73\x74\x5f\x76\x65\x63\x74",
-		.rlen	= 16,
-	}, {
-		.key	= "\x4d\x76\x32\x17\x05\x3f\x75\x2c"
-			  "\x5d\x04\x16\x36\x15\x72\x63\x2f",
-		.klen	= 16,
-		.input	= "\xe0\x4d\x5d\x3c\xb7\x8c\x36\x47"
-			  "\x94\x18\x95\x91\xa9\xfc\x49\xf8"
-			  "\x44\xd1\x2d\xc2\x99\xb8\x08\x2a"
-			  "\x07\x89\x73\xc2\x45\x92\xc6\x90",
-		.ilen	= 32,
-		.result	= "\x54\x65\x61\x20\x69\x73\x20\x67"
-			  "\x6f\x6f\x64\x20\x66\x6f\x72\x20"
-			  "\x79\x6f\x75\x21\x21\x21\x20\x72"
-			  "\x65\x61\x6c\x6c\x79\x21\x21\x21",
-		.rlen	= 32,
+		.len	= 32,
 	}
 };
 
 /*
  * XTEA test vectors
  */
-static const struct cipher_testvec xtea_enc_tv_template[] = {
+static const struct cipher_testvec xtea_tv_template[] = {
 	{
 		.key    = zeroed_string,
 		.klen	= 16,
-		.input  = zeroed_string,
-		.ilen	= 8,
-		.result	= "\xd8\xd4\xe9\xde\xd9\x1e\x13\xf7",
-		.rlen	= 8,
+		.ptext	= zeroed_string,
+		.ctext	= "\xd8\xd4\xe9\xde\xd9\x1e\x13\xf7",
+		.len	= 8,
 	}, {
 		.key	= "\x2b\x02\x05\x68\x06\x14\x49\x76"
 			  "\x77\x5d\x0e\x26\x6c\x28\x78\x43",
 		.klen	= 16,
-		.input	= "\x74\x65\x73\x74\x20\x6d\x65\x2e",
-		.ilen	= 8,
-		.result	= "\x94\xeb\xc8\x96\x84\x6a\x49\xa8",
-		.rlen	= 8,
+		.ptext	= "\x74\x65\x73\x74\x20\x6d\x65\x2e",
+		.ctext	= "\x94\xeb\xc8\x96\x84\x6a\x49\xa8",
+		.len	= 8,
 	}, {
 		.key	= "\x09\x65\x43\x11\x66\x44\x39\x25"
 			  "\x51\x3a\x16\x10\x0a\x08\x12\x6e",
 		.klen	= 16,
-		.input	= "\x6c\x6f\x6e\x67\x65\x72\x5f\x74"
+		.ptext	= "\x6c\x6f\x6e\x67\x65\x72\x5f\x74"
 			  "\x65\x73\x74\x5f\x76\x65\x63\x74",
-		.ilen	= 16,
-		.result	= "\x3e\xce\xae\x22\x60\x56\xa8\x9d"
+		.ctext	= "\x3e\xce\xae\x22\x60\x56\xa8\x9d"
 			  "\x77\x4d\xd4\xb4\x87\x24\xe3\x9a",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x4d\x76\x32\x17\x05\x3f\x75\x2c"
 			  "\x5d\x04\x16\x36\x15\x72\x63\x2f",
 		.klen	= 16,
-		.input	= "\x54\x65\x61\x20\x69\x73\x20\x67"
+		.ptext	= "\x54\x65\x61\x20\x69\x73\x20\x67"
 			  "\x6f\x6f\x64\x20\x66\x6f\x72\x20"
 			  "\x79\x6f\x75\x21\x21\x21\x20\x72"
 			  "\x65\x61\x6c\x6c\x79\x21\x21\x21",
-		.ilen	= 32,
-		.result	= "\x99\x81\x9f\x5d\x6f\x4b\x31\x3a"
+		.ctext	= "\x99\x81\x9f\x5d\x6f\x4b\x31\x3a"
 			  "\x86\xff\x6f\xd0\xe3\x87\x70\x07"
 			  "\x4d\xb8\xcf\xf3\x99\x50\xb3\xd4"
 			  "\x73\xa2\xfa\xc9\x16\x59\x5d\x81",
-		.rlen	= 32,
-	}
-};
-
-static const struct cipher_testvec xtea_dec_tv_template[] = {
-	{
-		.key    = zeroed_string,
-		.klen	= 16,
-		.input	= "\xd8\xd4\xe9\xde\xd9\x1e\x13\xf7",
-		.ilen	= 8,
-		.result = zeroed_string,
-		.rlen	= 8,
-	}, {
-		.key	= "\x2b\x02\x05\x68\x06\x14\x49\x76"
-			  "\x77\x5d\x0e\x26\x6c\x28\x78\x43",
-		.klen	= 16,
-		.input	= "\x94\xeb\xc8\x96\x84\x6a\x49\xa8",
-		.ilen	= 8,
-		.result	= "\x74\x65\x73\x74\x20\x6d\x65\x2e",
-		.rlen	= 8,
-	}, {
-		.key	= "\x09\x65\x43\x11\x66\x44\x39\x25"
-			  "\x51\x3a\x16\x10\x0a\x08\x12\x6e",
-		.klen	= 16,
-		.input	= "\x3e\xce\xae\x22\x60\x56\xa8\x9d"
-			  "\x77\x4d\xd4\xb4\x87\x24\xe3\x9a",
-		.ilen	= 16,
-		.result	= "\x6c\x6f\x6e\x67\x65\x72\x5f\x74"
-			  "\x65\x73\x74\x5f\x76\x65\x63\x74",
-		.rlen	= 16,
-	}, {
-		.key	= "\x4d\x76\x32\x17\x05\x3f\x75\x2c"
-			  "\x5d\x04\x16\x36\x15\x72\x63\x2f",
-		.klen	= 16,
-		.input	= "\x99\x81\x9f\x5d\x6f\x4b\x31\x3a"
-			  "\x86\xff\x6f\xd0\xe3\x87\x70\x07"
-			  "\x4d\xb8\xcf\xf3\x99\x50\xb3\xd4"
-			  "\x73\xa2\xfa\xc9\x16\x59\x5d\x81",
-		.ilen	= 32,
-		.result	= "\x54\x65\x61\x20\x69\x73\x20\x67"
-			  "\x6f\x6f\x64\x20\x66\x6f\x72\x20"
-			  "\x79\x6f\x75\x21\x21\x21\x20\x72"
-			  "\x65\x61\x6c\x6c\x79\x21\x21\x21",
-		.rlen	= 32,
+		.len	= 32,
 	}
 };
 
 /*
  * KHAZAD test vectors.
  */
-static const struct cipher_testvec khazad_enc_tv_template[] = {
+static const struct cipher_testvec khazad_tv_template[] = {
 	{
 		.key	= "\x80\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 16,
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 8,
-		.result	= "\x49\xa4\xce\x32\xac\x19\x0e\x3f",
-		.rlen	= 8,
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ctext	= "\x49\xa4\xce\x32\xac\x19\x0e\x3f",
+		.len	= 8,
 	}, {
 		.key	= "\x38\x38\x38\x38\x38\x38\x38\x38"
 			  "\x38\x38\x38\x38\x38\x38\x38\x38",
 		.klen	= 16,
-		.input	= "\x38\x38\x38\x38\x38\x38\x38\x38",
-		.ilen	= 8,
-		.result	= "\x7e\x82\x12\xa1\xd9\x5b\xe4\xf9",
-		.rlen	= 8,
+		.ptext	= "\x38\x38\x38\x38\x38\x38\x38\x38",
+		.ctext	= "\x7e\x82\x12\xa1\xd9\x5b\xe4\xf9",
+		.len	= 8,
 	}, {
 		.key	= "\xa2\xa2\xa2\xa2\xa2\xa2\xa2\xa2"
 			"\xa2\xa2\xa2\xa2\xa2\xa2\xa2\xa2",
 		.klen	= 16,
-		.input	= "\xa2\xa2\xa2\xa2\xa2\xa2\xa2\xa2",
-		.ilen	= 8,
-		.result	= "\xaa\xbe\xc1\x95\xc5\x94\x1a\x9c",
-		.rlen	= 8,
+		.ptext	= "\xa2\xa2\xa2\xa2\xa2\xa2\xa2\xa2",
+		.ctext	= "\xaa\xbe\xc1\x95\xc5\x94\x1a\x9c",
+		.len	= 8,
 	}, {
 		.key	= "\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f"
 			"\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f",
 		.klen	= 16,
-		.input	= "\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f",
-		.ilen	= 8,
-		.result = "\x04\x74\xf5\x70\x50\x16\xd3\xb8",
-		.rlen	= 8,
+		.ptext	= "\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f",
+		.ctext	= "\x04\x74\xf5\x70\x50\x16\xd3\xb8",
+		.len	= 8,
 	}, {
 		.key	= "\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f"
 			"\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f",
 		.klen	= 16,
-		.input	= "\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f"
+		.ptext	= "\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f"
 			"\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f",
-		.ilen	= 16,
-		.result = "\x04\x74\xf5\x70\x50\x16\xd3\xb8"
+		.ctext	= "\x04\x74\xf5\x70\x50\x16\xd3\xb8"
 			"\x04\x74\xf5\x70\x50\x16\xd3\xb8",
-		.rlen	= 16,
-	},
-};
-
-static const struct cipher_testvec khazad_dec_tv_template[] = {
-	{
-		.key	= "\x80\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 16,
-		.input	= "\x49\xa4\xce\x32\xac\x19\x0e\x3f",
-		.ilen	= 8,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 8,
-	}, {
-		.key	= "\x38\x38\x38\x38\x38\x38\x38\x38"
-			  "\x38\x38\x38\x38\x38\x38\x38\x38",
-		.klen	= 16,
-		.input	= "\x7e\x82\x12\xa1\xd9\x5b\xe4\xf9",
-		.ilen	= 8,
-		.result	= "\x38\x38\x38\x38\x38\x38\x38\x38",
-		.rlen	= 8,
-	}, {
-		.key	= "\xa2\xa2\xa2\xa2\xa2\xa2\xa2\xa2"
-			"\xa2\xa2\xa2\xa2\xa2\xa2\xa2\xa2",
-		.klen	= 16,
-		.input	= "\xaa\xbe\xc1\x95\xc5\x94\x1a\x9c",
-		.ilen	= 8,
-		.result	= "\xa2\xa2\xa2\xa2\xa2\xa2\xa2\xa2",
-		.rlen	= 8,
-	}, {
-		.key	= "\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f"
-			"\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f",
-		.klen	= 16,
-		.input  = "\x04\x74\xf5\x70\x50\x16\xd3\xb8",
-		.ilen	= 8,
-		.result	= "\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f",
-		.rlen	= 8,
-	}, {
-		.key	= "\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f"
-			"\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f",
-		.klen	= 16,
-		.input  = "\x04\x74\xf5\x70\x50\x16\xd3\xb8"
-			"\x04\x74\xf5\x70\x50\x16\xd3\xb8",
-		.ilen	= 16,
-		.result	= "\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f"
-			"\x2f\x2f\x2f\x2f\x2f\x2f\x2f\x2f",
-		.rlen	= 16,
+		.len	= 16,
 	},
 };
 
@@ -29652,53 +27611,49 @@
  * Anubis test vectors.
  */
 
-static const struct cipher_testvec anubis_enc_tv_template[] = {
+static const struct cipher_testvec anubis_tv_template[] = {
 	{
 		.key	= "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
 			  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe",
 		.klen	= 16,
-		.input	= "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
+		.ptext	= "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
 			  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe",
-		.ilen	= 16,
-		.result	= "\x6d\xc5\xda\xa2\x26\x7d\x62\x6f"
+		.ctext	= "\x6d\xc5\xda\xa2\x26\x7d\x62\x6f"
 			  "\x08\xb7\x52\x8e\x6e\x6e\x86\x90",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 
 		.key	= "\x03\x03\x03\x03\x03\x03\x03\x03"
 			  "\x03\x03\x03\x03\x03\x03\x03\x03"
 			  "\x03\x03\x03\x03",
 		.klen	= 20,
-		.input	= "\x03\x03\x03\x03\x03\x03\x03\x03"
+		.ptext	= "\x03\x03\x03\x03\x03\x03\x03\x03"
 			  "\x03\x03\x03\x03\x03\x03\x03\x03",
-		.ilen	= 16,
-		.result	= "\xdb\xf1\x42\xf4\xd1\x8a\xc7\x49"
+		.ctext	= "\xdb\xf1\x42\xf4\xd1\x8a\xc7\x49"
 			  "\x87\x41\x6f\x82\x0a\x98\x64\xae",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x24\x24\x24\x24\x24\x24\x24\x24"
 			  "\x24\x24\x24\x24\x24\x24\x24\x24"
 			  "\x24\x24\x24\x24\x24\x24\x24\x24"
 			  "\x24\x24\x24\x24",
 		.klen	= 28,
-		.input	= "\x24\x24\x24\x24\x24\x24\x24\x24"
+		.ptext	= "\x24\x24\x24\x24\x24\x24\x24\x24"
 			  "\x24\x24\x24\x24\x24\x24\x24\x24",
-		.ilen	= 16,
-		.result	= "\xfd\x1b\x4a\xe3\xbf\xf0\xad\x3d"
+		.ctext	= "\xfd\x1b\x4a\xe3\xbf\xf0\xad\x3d"
 			  "\x06\xd3\x61\x27\xfd\x13\x9e\xde",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x25\x25\x25\x25\x25\x25\x25\x25"
 			  "\x25\x25\x25\x25\x25\x25\x25\x25"
 			  "\x25\x25\x25\x25\x25\x25\x25\x25"
 			  "\x25\x25\x25\x25\x25\x25\x25\x25",
 		.klen	= 32,
-		.input	= "\x25\x25\x25\x25\x25\x25\x25\x25"
+		.ptext	= "\x25\x25\x25\x25\x25\x25\x25\x25"
 			  "\x25\x25\x25\x25\x25\x25\x25\x25",
-		.ilen	= 16,
-		.result	= "\x1a\x91\xfb\x2b\xb7\x78\x6b\xc4"
+		.ctext	= "\x1a\x91\xfb\x2b\xb7\x78\x6b\xc4"
 			"\x17\xd9\xff\x40\x3b\x0e\xe5\xfe",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x35\x35\x35\x35\x35\x35\x35\x35"
 			  "\x35\x35\x35\x35\x35\x35\x35\x35"
@@ -29706,93 +27661,28 @@
 			  "\x35\x35\x35\x35\x35\x35\x35\x35"
 			  "\x35\x35\x35\x35\x35\x35\x35\x35",
 		.klen	= 40,
-		.input	= "\x35\x35\x35\x35\x35\x35\x35\x35"
+		.ptext	= "\x35\x35\x35\x35\x35\x35\x35\x35"
 			  "\x35\x35\x35\x35\x35\x35\x35\x35",
-		.ilen	= 16,
-		.result = "\xa5\x2c\x85\x6f\x9c\xba\xa0\x97"
+		.ctext	= "\xa5\x2c\x85\x6f\x9c\xba\xa0\x97"
 			  "\x9e\xc6\x84\x0f\x17\x21\x07\xee",
-		.rlen	= 16,
+		.len	= 16,
 	},
 };
 
-static const struct cipher_testvec anubis_dec_tv_template[] = {
+static const struct cipher_testvec anubis_cbc_tv_template[] = {
 	{
 		.key	= "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
 			  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe",
 		.klen	= 16,
-		.input	= "\x6d\xc5\xda\xa2\x26\x7d\x62\x6f"
-			  "\x08\xb7\x52\x8e\x6e\x6e\x86\x90",
-		.ilen	= 16,
-		.result	= "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
-			  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe",
-		.rlen	= 16,
-	}, {
-
-		.key	= "\x03\x03\x03\x03\x03\x03\x03\x03"
-			  "\x03\x03\x03\x03\x03\x03\x03\x03"
-			  "\x03\x03\x03\x03",
-		.klen	= 20,
-		.input	= "\xdb\xf1\x42\xf4\xd1\x8a\xc7\x49"
-			  "\x87\x41\x6f\x82\x0a\x98\x64\xae",
-		.ilen	= 16,
-		.result	= "\x03\x03\x03\x03\x03\x03\x03\x03"
-			  "\x03\x03\x03\x03\x03\x03\x03\x03",
-		.rlen	= 16,
-	}, {
-		.key	= "\x24\x24\x24\x24\x24\x24\x24\x24"
-			  "\x24\x24\x24\x24\x24\x24\x24\x24"
-			  "\x24\x24\x24\x24\x24\x24\x24\x24"
-			  "\x24\x24\x24\x24",
-		.klen	= 28,
-		.input	= "\xfd\x1b\x4a\xe3\xbf\xf0\xad\x3d"
-			  "\x06\xd3\x61\x27\xfd\x13\x9e\xde",
-		.ilen	= 16,
-		.result	= "\x24\x24\x24\x24\x24\x24\x24\x24"
-			  "\x24\x24\x24\x24\x24\x24\x24\x24",
-		.rlen	= 16,
-	}, {
-		.key	= "\x25\x25\x25\x25\x25\x25\x25\x25"
-			  "\x25\x25\x25\x25\x25\x25\x25\x25"
-			  "\x25\x25\x25\x25\x25\x25\x25\x25"
-			  "\x25\x25\x25\x25\x25\x25\x25\x25",
-		.klen	= 32,
-		.input	= "\x1a\x91\xfb\x2b\xb7\x78\x6b\xc4"
-			"\x17\xd9\xff\x40\x3b\x0e\xe5\xfe",
-		.ilen	= 16,
-		.result	= "\x25\x25\x25\x25\x25\x25\x25\x25"
-			  "\x25\x25\x25\x25\x25\x25\x25\x25",
-		.rlen	= 16,
-	}, {
-		.key	= "\x35\x35\x35\x35\x35\x35\x35\x35"
-			  "\x35\x35\x35\x35\x35\x35\x35\x35"
-			  "\x35\x35\x35\x35\x35\x35\x35\x35"
-			  "\x35\x35\x35\x35\x35\x35\x35\x35"
-			  "\x35\x35\x35\x35\x35\x35\x35\x35",
-		.input = "\xa5\x2c\x85\x6f\x9c\xba\xa0\x97"
-			 "\x9e\xc6\x84\x0f\x17\x21\x07\xee",
-		.klen	= 40,
-		.ilen	= 16,
-		.result	= "\x35\x35\x35\x35\x35\x35\x35\x35"
-			  "\x35\x35\x35\x35\x35\x35\x35\x35",
-		.rlen	= 16,
-	},
-};
-
-static const struct cipher_testvec anubis_cbc_enc_tv_template[] = {
-	{
-		.key	= "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
-			  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe",
-		.klen	= 16,
-		.input	= "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
+		.ptext	= "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
 			  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
 			  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
 			  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe",
-		.ilen	= 32,
-		.result	= "\x6d\xc5\xda\xa2\x26\x7d\x62\x6f"
+		.ctext	= "\x6d\xc5\xda\xa2\x26\x7d\x62\x6f"
 			  "\x08\xb7\x52\x8e\x6e\x6e\x86\x90"
 			  "\x86\xd8\xb5\x6f\x98\x5e\x8a\x66"
 			  "\x4f\x1f\x78\xa1\xbb\x37\xf1\xbe",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x35\x35\x35\x35\x35\x35\x35\x35"
 			  "\x35\x35\x35\x35\x35\x35\x35\x35"
@@ -29800,263 +27690,114 @@
 			  "\x35\x35\x35\x35\x35\x35\x35\x35"
 			  "\x35\x35\x35\x35\x35\x35\x35\x35",
 		.klen	= 40,
-		.input	= "\x35\x35\x35\x35\x35\x35\x35\x35"
+		.ptext	= "\x35\x35\x35\x35\x35\x35\x35\x35"
 			  "\x35\x35\x35\x35\x35\x35\x35\x35"
 			  "\x35\x35\x35\x35\x35\x35\x35\x35"
 			  "\x35\x35\x35\x35\x35\x35\x35\x35",
-		.ilen	= 32,
-		.result = "\xa5\x2c\x85\x6f\x9c\xba\xa0\x97"
+		.ctext	= "\xa5\x2c\x85\x6f\x9c\xba\xa0\x97"
 			  "\x9e\xc6\x84\x0f\x17\x21\x07\xee"
 			  "\xa2\xbc\x06\x98\xc6\x4b\xda\x75"
 			  "\x2e\xaa\xbe\x58\xce\x01\x5b\xc7",
-		.rlen	= 32,
-	},
-};
-
-static const struct cipher_testvec anubis_cbc_dec_tv_template[] = {
-	{
-		.key	= "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
-			  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe",
-		.klen	= 16,
-		.input	= "\x6d\xc5\xda\xa2\x26\x7d\x62\x6f"
-			  "\x08\xb7\x52\x8e\x6e\x6e\x86\x90"
-			  "\x86\xd8\xb5\x6f\x98\x5e\x8a\x66"
-			  "\x4f\x1f\x78\xa1\xbb\x37\xf1\xbe",
-		.ilen	= 32,
-		.result	= "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
-			  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
-			  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe"
-			  "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe",
-		.rlen	= 32,
-	}, {
-		.key	= "\x35\x35\x35\x35\x35\x35\x35\x35"
-			  "\x35\x35\x35\x35\x35\x35\x35\x35"
-			  "\x35\x35\x35\x35\x35\x35\x35\x35"
-			  "\x35\x35\x35\x35\x35\x35\x35\x35"
-			  "\x35\x35\x35\x35\x35\x35\x35\x35",
-		.klen	= 40,
-		.input = "\xa5\x2c\x85\x6f\x9c\xba\xa0\x97"
-			  "\x9e\xc6\x84\x0f\x17\x21\x07\xee"
-			  "\xa2\xbc\x06\x98\xc6\x4b\xda\x75"
-			  "\x2e\xaa\xbe\x58\xce\x01\x5b\xc7",
-		.ilen	= 32,
-		.result	= "\x35\x35\x35\x35\x35\x35\x35\x35"
-			  "\x35\x35\x35\x35\x35\x35\x35\x35"
-			  "\x35\x35\x35\x35\x35\x35\x35\x35"
-			  "\x35\x35\x35\x35\x35\x35\x35\x35",
-		.rlen	= 32,
+		.len	= 32,
 	},
 };
 
 /*
  * XETA test vectors
  */
-static const struct cipher_testvec xeta_enc_tv_template[] = {
+static const struct cipher_testvec xeta_tv_template[] = {
 	{
 		.key    = zeroed_string,
 		.klen	= 16,
-		.input  = zeroed_string,
-		.ilen	= 8,
-		.result	= "\xaa\x22\x96\xe5\x6c\x61\xf3\x45",
-		.rlen	= 8,
+		.ptext	= zeroed_string,
+		.ctext	= "\xaa\x22\x96\xe5\x6c\x61\xf3\x45",
+		.len	= 8,
 	}, {
 		.key	= "\x2b\x02\x05\x68\x06\x14\x49\x76"
 			  "\x77\x5d\x0e\x26\x6c\x28\x78\x43",
 		.klen	= 16,
-		.input	= "\x74\x65\x73\x74\x20\x6d\x65\x2e",
-		.ilen	= 8,
-		.result	= "\x82\x3e\xeb\x35\xdc\xdd\xd9\xc3",
-		.rlen	= 8,
+		.ptext	= "\x74\x65\x73\x74\x20\x6d\x65\x2e",
+		.ctext	= "\x82\x3e\xeb\x35\xdc\xdd\xd9\xc3",
+		.len	= 8,
 	}, {
 		.key	= "\x09\x65\x43\x11\x66\x44\x39\x25"
 			  "\x51\x3a\x16\x10\x0a\x08\x12\x6e",
 		.klen	= 16,
-		.input	= "\x6c\x6f\x6e\x67\x65\x72\x5f\x74"
+		.ptext	= "\x6c\x6f\x6e\x67\x65\x72\x5f\x74"
 			  "\x65\x73\x74\x5f\x76\x65\x63\x74",
-		.ilen	= 16,
-		.result	= "\xe2\x04\xdb\xf2\x89\x85\x9e\xea"
+		.ctext	= "\xe2\x04\xdb\xf2\x89\x85\x9e\xea"
 			  "\x61\x35\xaa\xed\xb5\xcb\x71\x2c",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x4d\x76\x32\x17\x05\x3f\x75\x2c"
 			  "\x5d\x04\x16\x36\x15\x72\x63\x2f",
 		.klen	= 16,
-		.input	= "\x54\x65\x61\x20\x69\x73\x20\x67"
+		.ptext	= "\x54\x65\x61\x20\x69\x73\x20\x67"
 			  "\x6f\x6f\x64\x20\x66\x6f\x72\x20"
 			  "\x79\x6f\x75\x21\x21\x21\x20\x72"
 			  "\x65\x61\x6c\x6c\x79\x21\x21\x21",
-		.ilen	= 32,
-		.result	= "\x0b\x03\xcd\x8a\xbe\x95\xfd\xb1"
+		.ctext	= "\x0b\x03\xcd\x8a\xbe\x95\xfd\xb1"
 			  "\xc1\x44\x91\x0b\xa5\xc9\x1b\xb4"
 			  "\xa9\xda\x1e\x9e\xb1\x3e\x2a\x8f"
 			  "\xea\xa5\x6a\x85\xd1\xf4\xa8\xa5",
-		.rlen	= 32,
-	}
-};
-
-static const struct cipher_testvec xeta_dec_tv_template[] = {
-	{
-		.key    = zeroed_string,
-		.klen	= 16,
-		.input	= "\xaa\x22\x96\xe5\x6c\x61\xf3\x45",
-		.ilen	= 8,
-		.result = zeroed_string,
-		.rlen	= 8,
-	}, {
-		.key	= "\x2b\x02\x05\x68\x06\x14\x49\x76"
-			  "\x77\x5d\x0e\x26\x6c\x28\x78\x43",
-		.klen	= 16,
-		.input	= "\x82\x3e\xeb\x35\xdc\xdd\xd9\xc3",
-		.ilen	= 8,
-		.result	= "\x74\x65\x73\x74\x20\x6d\x65\x2e",
-		.rlen	= 8,
-	}, {
-		.key	= "\x09\x65\x43\x11\x66\x44\x39\x25"
-			  "\x51\x3a\x16\x10\x0a\x08\x12\x6e",
-		.klen	= 16,
-		.input	= "\xe2\x04\xdb\xf2\x89\x85\x9e\xea"
-			  "\x61\x35\xaa\xed\xb5\xcb\x71\x2c",
-		.ilen	= 16,
-		.result	= "\x6c\x6f\x6e\x67\x65\x72\x5f\x74"
-			  "\x65\x73\x74\x5f\x76\x65\x63\x74",
-		.rlen	= 16,
-	}, {
-		.key	= "\x4d\x76\x32\x17\x05\x3f\x75\x2c"
-			  "\x5d\x04\x16\x36\x15\x72\x63\x2f",
-		.klen	= 16,
-		.input	= "\x0b\x03\xcd\x8a\xbe\x95\xfd\xb1"
-			  "\xc1\x44\x91\x0b\xa5\xc9\x1b\xb4"
-			  "\xa9\xda\x1e\x9e\xb1\x3e\x2a\x8f"
-			  "\xea\xa5\x6a\x85\xd1\xf4\xa8\xa5",
-		.ilen	= 32,
-		.result	= "\x54\x65\x61\x20\x69\x73\x20\x67"
-			  "\x6f\x6f\x64\x20\x66\x6f\x72\x20"
-			  "\x79\x6f\x75\x21\x21\x21\x20\x72"
-			  "\x65\x61\x6c\x6c\x79\x21\x21\x21",
-		.rlen	= 32,
+		.len	= 32,
 	}
 };
 
 /*
  * FCrypt test vectors
  */
-static const struct cipher_testvec fcrypt_pcbc_enc_tv_template[] = {
+static const struct cipher_testvec fcrypt_pcbc_tv_template[] = {
 	{ /* http://www.openafs.org/pipermail/openafs-devel/2000-December/005320.html */
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 8,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 8,
-		.result	= "\x0E\x09\x00\xC7\x3E\xF7\xED\x41",
-		.rlen	= 8,
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ctext	= "\x0E\x09\x00\xC7\x3E\xF7\xED\x41",
+		.len	= 8,
 	}, {
 		.key	= "\x11\x44\x77\xAA\xDD\x00\x33\x66",
 		.klen	= 8,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x12\x34\x56\x78\x9A\xBC\xDE\xF0",
-		.ilen	= 8,
-		.result	= "\xD8\xED\x78\x74\x77\xEC\x06\x80",
-		.rlen	= 8,
+		.ptext	= "\x12\x34\x56\x78\x9A\xBC\xDE\xF0",
+		.ctext	= "\xD8\xED\x78\x74\x77\xEC\x06\x80",
+		.len	= 8,
 	}, { /* From Arla */
 		.key	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87",
 		.klen	= 8,
 		.iv	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.input	= "The quick brown fox jumps over the lazy dogs.\0\0",
-		.ilen	= 48,
-		.result	= "\x00\xf0\x0e\x11\x75\xe6\x23\x82"
+		.ptext	= "The quick brown fox jumps over the lazy dogs.\0\0",
+		.ctext	= "\x00\xf0\x0e\x11\x75\xe6\x23\x82"
 			  "\xee\xac\x98\x62\x44\x51\xe4\x84"
 			  "\xc3\x59\xd8\xaa\x64\x60\xae\xf7"
 			  "\xd2\xd9\x13\x79\x72\xa3\x45\x03"
 			  "\x23\xb5\x62\xd7\x0c\xf5\x27\xd1"
 			  "\xf8\x91\x3c\xac\x44\x22\x92\xef",
-		.rlen	= 48,
+		.len	= 48,
 	}, {
 		.key	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
 		.klen	= 8,
 		.iv	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87",
-		.input	= "The quick brown fox jumps over the lazy dogs.\0\0",
-		.ilen	= 48,
-		.result	= "\xca\x90\xf5\x9d\xcb\xd4\xd2\x3c"
+		.ptext	= "The quick brown fox jumps over the lazy dogs.\0\0",
+		.ctext	= "\xca\x90\xf5\x9d\xcb\xd4\xd2\x3c"
 			  "\x01\x88\x7f\x3e\x31\x6e\x62\x9d"
 			  "\xd8\xe0\x57\xa3\x06\x3a\x42\x58"
 			  "\x2a\x28\xfe\x72\x52\x2f\xdd\xe0"
 			  "\x19\x89\x09\x1c\x2a\x8e\x8c\x94"
 			  "\xfc\xc7\x68\xe4\x88\xaa\xde\x0f",
-		.rlen	= 48,
+		.len	= 48,
 	}, { /* split-page version */
 		.key	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
 		.klen	= 8,
 		.iv	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87",
-		.input	= "The quick brown fox jumps over the lazy dogs.\0\0",
-		.ilen	= 48,
-		.result	= "\xca\x90\xf5\x9d\xcb\xd4\xd2\x3c"
+		.ptext	= "The quick brown fox jumps over the lazy dogs.\0\0",
+		.ctext	= "\xca\x90\xf5\x9d\xcb\xd4\xd2\x3c"
 			  "\x01\x88\x7f\x3e\x31\x6e\x62\x9d"
 			  "\xd8\xe0\x57\xa3\x06\x3a\x42\x58"
 			  "\x2a\x28\xfe\x72\x52\x2f\xdd\xe0"
 			  "\x19\x89\x09\x1c\x2a\x8e\x8c\x94"
 			  "\xfc\xc7\x68\xe4\x88\xaa\xde\x0f",
-		.rlen	= 48,
-		.np	= 2,
-		.tap	= { 20, 28 },
-	}
-};
-
-static const struct cipher_testvec fcrypt_pcbc_dec_tv_template[] = {
-	{ /* http://www.openafs.org/pipermail/openafs-devel/2000-December/005320.html */
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 8,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x0E\x09\x00\xC7\x3E\xF7\xED\x41",
-		.ilen	= 8,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 8,
-	}, {
-		.key	= "\x11\x44\x77\xAA\xDD\x00\x33\x66",
-		.klen	= 8,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xD8\xED\x78\x74\x77\xEC\x06\x80",
-		.ilen	= 8,
-		.result	= "\x12\x34\x56\x78\x9A\xBC\xDE\xF0",
-		.rlen	= 8,
-	}, { /* From Arla */
-		.key	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87",
-		.klen	= 8,
-		.iv	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.input	= "\x00\xf0\x0e\x11\x75\xe6\x23\x82"
-			  "\xee\xac\x98\x62\x44\x51\xe4\x84"
-			  "\xc3\x59\xd8\xaa\x64\x60\xae\xf7"
-			  "\xd2\xd9\x13\x79\x72\xa3\x45\x03"
-			  "\x23\xb5\x62\xd7\x0c\xf5\x27\xd1"
-			  "\xf8\x91\x3c\xac\x44\x22\x92\xef",
-		.ilen	= 48,
-		.result	= "The quick brown fox jumps over the lazy dogs.\0\0",
-		.rlen	= 48,
-	}, {
-		.key	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.klen	= 8,
-		.iv	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87",
-		.input	= "\xca\x90\xf5\x9d\xcb\xd4\xd2\x3c"
-			  "\x01\x88\x7f\x3e\x31\x6e\x62\x9d"
-			  "\xd8\xe0\x57\xa3\x06\x3a\x42\x58"
-			  "\x2a\x28\xfe\x72\x52\x2f\xdd\xe0"
-			  "\x19\x89\x09\x1c\x2a\x8e\x8c\x94"
-			  "\xfc\xc7\x68\xe4\x88\xaa\xde\x0f",
-		.ilen	= 48,
-		.result	= "The quick brown fox jumps over the lazy dogs.\0\0",
-		.rlen	= 48,
-	}, { /* split-page version */
-		.key	= "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.klen	= 8,
-		.iv	= "\xf0\xe1\xd2\xc3\xb4\xa5\x96\x87",
-		.input	= "\xca\x90\xf5\x9d\xcb\xd4\xd2\x3c"
-			  "\x01\x88\x7f\x3e\x31\x6e\x62\x9d"
-			  "\xd8\xe0\x57\xa3\x06\x3a\x42\x58"
-			  "\x2a\x28\xfe\x72\x52\x2f\xdd\xe0"
-			  "\x19\x89\x09\x1c\x2a\x8e\x8c\x94"
-			  "\xfc\xc7\x68\xe4\x88\xaa\xde\x0f",
-		.ilen	= 48,
-		.result	= "The quick brown fox jumps over the lazy dogs.\0\0",
-		.rlen	= 48,
+		.len	= 48,
 		.np	= 2,
 		.tap	= { 20, 28 },
 	}
@@ -30065,47 +27806,44 @@
 /*
  * CAMELLIA test vectors.
  */
-static const struct cipher_testvec camellia_enc_tv_template[] = {
+static const struct cipher_testvec camellia_tv_template[] = {
 	{
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
 			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
 		.klen	= 16,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
 			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.ilen	= 16,
-		.result	= "\x67\x67\x31\x38\x54\x96\x69\x73"
+		.ctext	= "\x67\x67\x31\x38\x54\x96\x69\x73"
 			  "\x08\x57\x06\x56\x48\xea\xbe\x43",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
 			  "\xfe\xdc\xba\x98\x76\x54\x32\x10"
 			  "\x00\x11\x22\x33\x44\x55\x66\x77",
 		.klen	= 24,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
 			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.ilen	= 16,
-		.result	= "\xb4\x99\x34\x01\xb3\xe9\x96\xf8"
+		.ctext	= "\xb4\x99\x34\x01\xb3\xe9\x96\xf8"
 			  "\x4e\xe5\xce\xe7\xd7\x9b\x09\xb9",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
 			  "\xfe\xdc\xba\x98\x76\x54\x32\x10"
 			  "\x00\x11\x22\x33\x44\x55\x66\x77"
 			  "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
 		.klen	= 32,
-		.input	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
+		.ptext	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
 			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.ilen	= 16,
-		.result	= "\x9a\xcc\x23\x7d\xff\x16\xd7\x6c"
+		.ctext	= "\x9a\xcc\x23\x7d\xff\x16\xd7\x6c"
 			  "\x20\xef\x7c\x91\x9e\x3a\x75\x09",
-		.rlen	= 16,
+		.len	= 16,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x3F\x85\x62\x3F\x1C\xF9\xD6\x1C"
 			  "\xF9\xD6\xB3\x90\x6D\x4A\x90\x6D"
 			  "\x4A\x27\x04\xE1\x27\x04\xE1\xBE"
 			  "\x9B\x78\xBE\x9B\x78\x55\x32\x0F",
 		.klen	= 32,
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -30231,8 +27969,7 @@
 			  "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
 			  "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
 			  "\x72\x09\xA0\x14\xAB\x42\xD9\x4D",
-		.ilen	= 1008,
-		.result	= "\xED\xCD\xDB\xB8\x68\xCE\xBD\xEA"
+		.ctext	= "\xED\xCD\xDB\xB8\x68\xCE\xBD\xEA"
 			  "\x9D\x9D\xCD\x9F\x4F\xFC\x4D\xB7"
 			  "\xA5\xFF\x6F\x43\x0F\xBA\x32\x04"
 			  "\xB3\xC2\xB9\x03\xAA\x91\x56\x29"
@@ -30358,341 +28095,39 @@
 			  "\xAE\xEF\x3E\x82\x12\x0B\x74\x72"
 			  "\xF8\xB2\xAA\x7A\xD6\xFF\xFA\x55"
 			  "\x33\x1A\xBB\xD3\xA2\x7E\x97\x66",
-		.rlen	= 1008,
+		.len	= 1008,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 1008 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec camellia_dec_tv_template[] = {
-	{
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
-			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.klen	= 16,
-		.input	= "\x67\x67\x31\x38\x54\x96\x69\x73"
-			  "\x08\x57\x06\x56\x48\xea\xbe\x43",
-		.ilen	= 16,
-		.result	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
-			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.rlen	= 16,
-	}, {
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
-			  "\xfe\xdc\xba\x98\x76\x54\x32\x10"
-			  "\x00\x11\x22\x33\x44\x55\x66\x77",
-		.klen	= 24,
-		.input	= "\xb4\x99\x34\x01\xb3\xe9\x96\xf8"
-			  "\x4e\xe5\xce\xe7\xd7\x9b\x09\xb9",
-		.ilen	= 16,
-		.result	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
-			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.rlen	= 16,
-	}, {
-		.key	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
-			  "\xfe\xdc\xba\x98\x76\x54\x32\x10"
-			  "\x00\x11\x22\x33\x44\x55\x66\x77"
-			  "\x88\x99\xaa\xbb\xcc\xdd\xee\xff",
-		.klen	= 32,
-		.input	= "\x9a\xcc\x23\x7d\xff\x16\xd7\x6c"
-			  "\x20\xef\x7c\x91\x9e\x3a\x75\x09",
-		.ilen	= 16,
-		.result	= "\x01\x23\x45\x67\x89\xab\xcd\xef"
-			  "\xfe\xdc\xba\x98\x76\x54\x32\x10",
-		.rlen	= 16,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x3F\x85\x62\x3F\x1C\xF9\xD6\x1C"
-			  "\xF9\xD6\xB3\x90\x6D\x4A\x90\x6D"
-			  "\x4A\x27\x04\xE1\x27\x04\xE1\xBE"
-			  "\x9B\x78\xBE\x9B\x78\x55\x32\x0F",
-		.klen	= 32,
-		.input	= "\xED\xCD\xDB\xB8\x68\xCE\xBD\xEA"
-			  "\x9D\x9D\xCD\x9F\x4F\xFC\x4D\xB7"
-			  "\xA5\xFF\x6F\x43\x0F\xBA\x32\x04"
-			  "\xB3\xC2\xB9\x03\xAA\x91\x56\x29"
-			  "\x0D\xD0\xFD\xC4\x65\xA5\x69\xB9"
-			  "\xF1\xF6\xB1\xA5\xB2\x75\x4F\x8A"
-			  "\x8D\x7D\x1B\x9B\xC7\x68\x72\xF8"
-			  "\x01\x9B\x17\x0A\x29\xE7\x61\x28"
-			  "\x7F\xA7\x50\xCA\x20\x2C\x96\x3B"
-			  "\x6E\x5C\x5D\x3F\xB5\x7F\xF3\x2B"
-			  "\x04\xEF\x9D\xD4\xCE\x41\x28\x8E"
-			  "\x83\x54\xAE\x7C\x82\x46\x10\xC9"
-			  "\xC4\x8A\x1E\x1F\x4C\xA9\xFC\xEC"
-			  "\x3C\x8C\x30\xFC\x59\xD2\x54\xC4"
-			  "\x6F\x50\xC6\xCA\x8C\x14\x5B\x9C"
-			  "\x18\x56\x5B\xF8\x33\x0E\x4A\xDB"
-			  "\xEC\xB5\x6E\x5B\x31\xC4\x0E\x98"
-			  "\x9F\x32\xBA\xA2\x18\xCF\x55\x43"
-			  "\xFE\x80\x8F\x60\xCF\x05\x30\x9B"
-			  "\x70\x50\x1E\x9C\x08\x87\xE6\x20"
-			  "\xD2\xF3\x27\xF8\x2A\x8D\x12\xB2"
-			  "\xBC\x5F\xFE\x52\x52\xF6\x7F\xB6"
-			  "\xB8\x30\x86\x3B\x0F\x94\x1E\x79"
-			  "\x13\x94\x35\xA2\xB1\x35\x5B\x05"
-			  "\x2A\x98\x6B\x96\x4C\xB1\x20\xBE"
-			  "\xB6\x14\xC2\x06\xBF\xFD\x5F\x2A"
-			  "\xF5\x33\xC8\x19\x45\x14\x44\x5D"
-			  "\xFE\x94\x7B\xBB\x63\x13\x57\xC3"
-			  "\x2A\x8F\x6C\x11\x2A\x07\xA7\x6A"
-			  "\xBF\x20\xD3\x99\xC6\x00\x0B\xBF"
-			  "\x83\x46\x25\x3A\xB0\xF6\xC5\xC8"
-			  "\x00\xCA\xE5\x28\x4A\x7C\x95\x9C"
-			  "\x7B\x43\xAB\xF9\xE4\xF8\x74\xAB"
-			  "\xA7\xB8\x9C\x0F\x53\x7B\xB6\x74"
-			  "\x60\x64\x0D\x1C\x80\xD1\x20\x9E"
-			  "\xDC\x14\x27\x9B\xFC\xBD\x5C\x96"
-			  "\xD2\x51\xDC\x96\xEE\xE5\xEA\x2B"
-			  "\x02\x7C\xAA\x3C\xDC\x9D\x7B\x01"
-			  "\x20\xC3\xE1\x0B\xDD\xAB\xF3\x1E"
-			  "\x19\xA8\x84\x29\x5F\xCC\xC3\x5B"
-			  "\xE4\x33\x59\xDC\x12\xEB\x2B\x4D"
-			  "\x5B\x55\x23\xB7\x40\x31\xDE\xEE"
-			  "\x18\xC9\x3C\x4D\xBC\xED\xE0\x42"
-			  "\xAD\xDE\xA0\xA3\xC3\xFE\x44\xD3"
-			  "\xE1\x9A\xDA\xAB\x32\xFC\x1A\xBF"
-			  "\x63\xA9\xF0\x6A\x08\x46\xBD\x48"
-			  "\x83\x06\xAB\x82\x99\x01\x16\x1A"
-			  "\x03\x36\xC5\x59\x6B\xB8\x8C\x9F"
-			  "\xC6\x51\x3D\xE5\x7F\xBF\xAB\xBC"
-			  "\xC9\xA1\x88\x34\x5F\xA9\x7C\x3B"
-			  "\x9F\x1B\x98\x2B\x4F\xFB\x9B\xF0"
-			  "\xCD\xB6\x45\xB2\x29\x2E\x34\x23"
-			  "\xA9\x97\xC0\x22\x8C\x42\x9B\x5F"
-			  "\x40\xC8\xD7\x3D\x82\x9A\x6F\xAA"
-			  "\x74\x83\x29\x05\xE8\xC4\x4D\x01"
-			  "\xB5\xE5\x84\x3F\x7F\xD3\xE0\x99"
-			  "\xDA\xE7\x6F\x30\xFD\xAA\x92\x30"
-			  "\xA5\x46\x8B\xA2\xE6\x58\x62\x7C"
-			  "\x2C\x35\x1B\x38\x85\x7D\xE8\xF3"
-			  "\x87\x4F\xDA\xD8\x5F\xFC\xB6\x44"
-			  "\xD0\xE3\x9B\x8B\xBF\xD6\xB8\xC4"
-			  "\x73\xAE\x1D\x8B\x5B\x74\x8B\xCB"
-			  "\xA4\xAD\xCF\x5D\xD4\x58\xC9\xCD"
-			  "\xF7\x90\x68\xCF\xC9\x11\x52\x3E"
-			  "\xE8\xA1\xA3\x78\x8B\xD0\xAC\x0A"
-			  "\xD4\xC9\xA3\xA5\x55\x30\xC8\x3E"
-			  "\xED\x28\x39\xE9\x63\xED\x41\x70"
-			  "\x51\xE3\xC4\xA0\xFC\xD5\x43\xCB"
-			  "\x4D\x65\xC8\xFD\x3A\x91\x8F\x60"
-			  "\x8A\xA6\x6D\x9D\x3E\x01\x23\x4B"
-			  "\x50\x47\xC9\xDC\x9B\xDE\x37\xC5"
-			  "\xBF\x67\xB1\x6B\x78\x38\xD5\x7E"
-			  "\xB6\xFF\x67\x83\x3B\x6E\xBE\x23"
-			  "\x45\xFA\x1D\x69\x44\xFD\xC6\xB9"
-			  "\xD0\x4A\x92\xD1\xBE\xF6\x4A\xB7"
-			  "\xCA\xA8\xA2\x9E\x13\x87\x57\x92"
-			  "\x64\x7C\x85\x0B\xB3\x29\x37\xD8"
-			  "\xE6\xAA\xAF\xC4\x03\x67\xA3\xBF"
-			  "\x2E\x45\x83\xB6\xD8\x54\x00\x89"
-			  "\xF6\xBC\x3A\x7A\x88\x58\x51\xED"
-			  "\xF4\x4E\x01\xA5\xC3\x2E\xD9\x42"
-			  "\xBD\x6E\x0D\x0B\x21\xB0\x1A\xCC"
-			  "\xA4\xD3\x3F\xDC\x9B\x81\xD8\xF1"
-			  "\xEA\x7A\x6A\xB7\x07\xC9\x6D\x91"
-			  "\x6D\x3A\xF5\x5F\xA6\xFF\x87\x1E"
-			  "\x3F\xDD\xC0\x72\xEA\xAC\x08\x15"
-			  "\x21\xE6\xC6\xB6\x0D\xD8\x51\x86"
-			  "\x2A\x03\x73\xF7\x29\xD4\xC4\xE4"
-			  "\x7F\x95\x10\xF7\xAB\x3F\x92\x23"
-			  "\xD3\xCE\x9C\x2E\x46\x3B\x63\x43"
-			  "\xBB\xC2\x82\x7A\x83\xD5\x55\xE2"
-			  "\xE7\x9B\x2F\x92\xAF\xFD\x81\x56"
-			  "\x79\xFD\x3E\xF9\x46\xE0\x25\xD4"
-			  "\x38\xDE\xBC\x2C\xC4\x7A\x2A\x8F"
-			  "\x94\x4F\xD0\xAD\x9B\x37\x18\xD4"
-			  "\x0E\x4D\x0F\x02\x3A\xDC\x5A\xA2"
-			  "\x39\x25\x55\x20\x5A\xA6\x02\x9F"
-			  "\xE6\x77\x21\x77\xE5\x4B\x7B\x0B"
-			  "\x30\xF8\x5F\x33\x0F\x49\xCD\xFF"
-			  "\xF2\xE4\x35\xF9\xF0\x63\xC3\x7E"
-			  "\xF1\xA6\x73\xB4\xDF\xE7\xBB\x78"
-			  "\xFF\x21\xA9\xF3\xF3\xCF\x5D\xBA"
-			  "\xED\x87\x98\xAC\xFE\x48\x97\x6D"
-			  "\xA6\x7F\x69\x31\xB1\xC4\xFF\x14"
-			  "\xC6\x76\xD4\x10\xDD\xF6\x49\x2C"
-			  "\x9C\xC8\x6D\x76\xC0\x8F\x5F\x55"
-			  "\x2F\x3C\x8A\x30\xAA\xC3\x16\x55"
-			  "\xC6\xFC\x8D\x8B\xB9\xE5\x80\x6C"
-			  "\xC8\x7E\xBD\x65\x58\x36\xD5\xBC"
-			  "\xF0\x33\x52\x29\x70\xF9\x5C\xE9"
-			  "\xAC\x1F\xB5\x73\x56\x66\x54\xAF"
-			  "\x1B\x8F\x7D\xED\xAB\x03\xCE\xE3"
-			  "\xAE\x47\xB6\x69\x86\xE9\x01\x31"
-			  "\x83\x18\x3D\xF4\x74\x7B\xF9\x42"
-			  "\x4C\xFD\x75\x4A\x6D\xF0\x03\xA6"
-			  "\x2B\x20\x63\xDA\x49\x65\x5E\x8B"
-			  "\xC0\x19\xE3\x8D\xD9\xF3\xB0\x34"
-			  "\xD3\x52\xFC\x68\x00\x43\x1B\x37"
-			  "\x31\x93\x51\x1C\x63\x97\x70\xB0"
-			  "\x99\x78\x83\x13\xFD\xCF\x53\x81"
-			  "\x36\x46\xB5\x42\x52\x2F\x32\xEB"
-			  "\x4A\x3D\xF1\x8F\x1C\x54\x2E\xFC"
-			  "\x41\x75\x5A\x8C\x8E\x6F\xE7\x1A"
-			  "\xAE\xEF\x3E\x82\x12\x0B\x74\x72"
-			  "\xF8\xB2\xAA\x7A\xD6\xFF\xFA\x55"
-			  "\x33\x1A\xBB\xD3\xA2\x7E\x97\x66",
-		.ilen	= 1008,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59\xF0\x64\xFB\x92\x06"
-			  "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78"
-			  "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA"
-			  "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C"
-			  "\xF3\x67\xFE\x95\x09\xA0\x37\xCE"
-			  "\x42\xD9\x70\x07\x7B\x12\xA9\x1D"
-			  "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F"
-			  "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01"
-			  "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73"
-			  "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5"
-			  "\x59\xF0\x87\x1E\x92\x29\xC0\x34"
-			  "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6"
-			  "\x3D\xD4\x48\xDF\x76\x0D\x81\x18"
-			  "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A"
-			  "\x21\x95\x2C\xC3\x37\xCE\x65\xFC"
-			  "\x70\x07\x9E\x12\xA9\x40\xD7\x4B"
-			  "\xE2\x79\x10\x84\x1B\xB2\x26\xBD"
-			  "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F"
-			  "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1"
-			  "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13"
-			  "\x87\x1E\xB5\x29\xC0\x57\xEE\x62"
-			  "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4"
-			  "\x6B\x02\x76\x0D\xA4\x18\xAF\x46"
-			  "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8"
-			  "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07"
-			  "\x9E\x35\xCC\x40\xD7\x6E\x05\x79"
-			  "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB"
-			  "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D"
-			  "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF"
-			  "\x43\xDA\x71\x08\x7C\x13\xAA\x1E"
-			  "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90"
-			  "\x27\xBE\x32\xC9\x60\xF7\x6B\x02"
-			  "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74"
-			  "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6"
-			  "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35"
-			  "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7"
-			  "\x3E\xD5\x49\xE0\x77\x0E\x82\x19"
-			  "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B"
-			  "\x22\x96\x2D\xC4\x38\xCF\x66\xFD"
-			  "\x71\x08\x9F\x13\xAA\x41\xD8\x4C"
-			  "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE"
-			  "\x55\xEC\x60\xF7\x8E\x02\x99\x30"
-			  "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2"
-			  "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14"
-			  "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63"
-			  "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5"
-			  "\x6C\x03\x77\x0E\xA5\x19\xB0\x47"
-			  "\xDE\x52\xE9\x80\x17\x8B\x22\xB9"
-			  "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08"
-			  "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A"
-			  "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC"
-			  "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E"
-			  "\xF5\x69\x00\x97\x0B\xA2\x39\xD0"
-			  "\x44\xDB\x72\x09\x7D\x14\xAB\x1F"
-			  "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91"
-			  "\x28\xBF\x33\xCA\x61\xF8\x6C\x03"
-			  "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75"
-			  "\x0C\x80\x17\xAE\x22\xB9\x50\xE7"
-			  "\x5B\xF2\x89\x20\x94\x2B\xC2\x36"
-			  "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8"
-			  "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A"
-			  "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
-			  "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
-			  "\x72\x09\xA0\x14\xAB\x42\xD9\x4D",
-		.rlen	= 1008,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 1008 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec camellia_cbc_enc_tv_template[] = {
+static const struct cipher_testvec camellia_cbc_tv_template[] = {
 	{
 		.key    = "\x06\xa9\x21\x40\x36\xb8\xa1\x5b"
 			  "\x51\x2e\x03\xd5\x34\x12\x00\x06",
 		.klen   = 16,
 		.iv	= "\x3d\xaf\xba\x42\x9d\x9e\xb4\x30"
 			  "\xb4\x22\xda\x80\x2c\x9f\xac\x41",
-		.input	= "Single block msg",
-		.ilen   = 16,
-		.result = "\xea\x32\x12\x76\x3b\x50\x10\xe7"
+		.ptext	= "Single block msg",
+		.ctext	= "\xea\x32\x12\x76\x3b\x50\x10\xe7"
 			  "\x18\xf6\xfd\x5d\xf6\x8f\x13\x51",
-		.rlen   = 16,
+		.len	= 16,
 	}, {
 		.key    = "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0"
 			  "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a",
 		.klen   = 16,
 		.iv     = "\x56\x2e\x17\x99\x6d\x09\x3d\x28"
 			  "\xdd\xb3\xba\x69\x5a\x2e\x6f\x58",
-		.input  = "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.ilen   = 32,
-		.result = "\xa5\xdf\x6e\x50\xda\x70\x6c\x01"
+		.ctext	= "\xa5\xdf\x6e\x50\xda\x70\x6c\x01"
 			  "\x4a\xab\xf3\xf2\xd6\xfc\x6c\xfd"
 			  "\x19\xb4\x3e\x57\x1c\x02\x5e\xa0"
 			  "\x15\x78\xe0\x5e\xf2\xcb\x87\x16",
-		.rlen   = 32,
+		.len	= 32,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -30701,7 +28136,7 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -30827,8 +28262,7 @@
 			  "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
 			  "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
 			  "\x72\x09\xA0\x14\xAB\x42\xD9\x4D",
-		.ilen	= 1008,
-		.result	= "\xCD\x3E\x2A\x3B\x3E\x94\xC5\x77"
+		.ctext	= "\xCD\x3E\x2A\x3B\x3E\x94\xC5\x77"
 			  "\xBA\xBB\x5B\xB1\xDE\x7B\xA4\x40"
 			  "\x88\x39\xE3\xFD\x94\x4B\x25\x58"
 			  "\xE1\x4B\xC4\x18\x7A\xFD\x17\x2B"
@@ -30954,310 +28388,14 @@
 			  "\x43\x94\x23\x7E\xEE\xF0\xA5\x79"
 			  "\x55\x01\xD4\x58\xB2\xF2\x85\x49"
 			  "\x70\xC5\xB9\x0B\x3B\x7A\x6E\x6C",
-		.rlen	= 1008,
+		.len	= 1008,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 1008 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec camellia_cbc_dec_tv_template[] = {
-	{
-		.key    = "\x06\xa9\x21\x40\x36\xb8\xa1\x5b"
-			  "\x51\x2e\x03\xd5\x34\x12\x00\x06",
-		.klen   = 16,
-		.iv	= "\x3d\xaf\xba\x42\x9d\x9e\xb4\x30"
-			  "\xb4\x22\xda\x80\x2c\x9f\xac\x41",
-		.input	= "\xea\x32\x12\x76\x3b\x50\x10\xe7"
-			  "\x18\xf6\xfd\x5d\xf6\x8f\x13\x51",
-		.ilen   = 16,
-		.result = "Single block msg",
-		.rlen   = 16,
-	}, {
-		.key    = "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0"
-			  "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a",
-		.klen   = 16,
-		.iv     = "\x56\x2e\x17\x99\x6d\x09\x3d\x28"
-			  "\xdd\xb3\xba\x69\x5a\x2e\x6f\x58",
-		.input = "\xa5\xdf\x6e\x50\xda\x70\x6c\x01"
-			  "\x4a\xab\xf3\xf2\xd6\xfc\x6c\xfd"
-			  "\x19\xb4\x3e\x57\x1c\x02\x5e\xa0"
-			  "\x15\x78\xe0\x5e\xf2\xcb\x87\x16",
-		.ilen   = 32,
-		.result = "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
-		.rlen   = 32,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\xCD\x3E\x2A\x3B\x3E\x94\xC5\x77"
-			  "\xBA\xBB\x5B\xB1\xDE\x7B\xA4\x40"
-			  "\x88\x39\xE3\xFD\x94\x4B\x25\x58"
-			  "\xE1\x4B\xC4\x18\x7A\xFD\x17\x2B"
-			  "\xB9\xF9\xC2\x27\x6A\xB6\x31\x27"
-			  "\xA6\xAD\xEF\xE5\x5D\xE4\x02\x01"
-			  "\x56\x2E\x10\xC2\x2C\xFF\xC6\x83"
-			  "\xB5\xDC\x4F\x63\xAD\x0E\x63\x5E"
-			  "\x56\xC8\x18\x3D\x79\x86\x97\xEF"
-			  "\x57\x0E\x63\xA1\xC1\x41\x48\xB8"
-			  "\x98\xB7\x51\x6D\x18\xF6\x19\x82"
-			  "\x37\x49\x88\xA4\xEF\x91\x21\x47"
-			  "\x03\x28\xEA\x42\xF4\xFB\x7A\x58"
-			  "\x28\x90\x77\x46\xD8\xD2\x35\x16"
-			  "\x44\xA9\x9E\x49\x52\x2A\xE4\x16"
-			  "\x5D\xF7\x65\xEB\x0F\xC9\x29\xE6"
-			  "\xCF\x76\x91\x89\x8A\x94\x39\xFA"
-			  "\x6B\x5F\x63\x53\x74\x43\x91\xF5"
-			  "\x3F\xBC\x88\x53\xB2\x1A\x02\x3F"
-			  "\x9D\x32\x84\xEB\x56\x28\xD6\x06"
-			  "\xD5\xB2\x20\xA9\xFC\xC3\x76\x62"
-			  "\x32\xCC\x86\xC8\x36\x67\x5E\x7E"
-			  "\xA4\xAA\x15\x63\x6B\xA9\x86\xAF"
-			  "\x1A\x52\x82\x36\x5F\xF4\x3F\x7A"
-			  "\x9B\x78\x62\x3B\x02\x28\x60\xB3"
-			  "\xBA\x82\xB1\xDD\xC9\x60\x8F\x47"
-			  "\xF1\x6B\xFE\xE5\x39\x34\xA0\x28"
-			  "\xA4\xB3\xC9\x7E\xED\x28\x8D\x70"
-			  "\xB2\x1D\xFD\xC6\x00\xCF\x1A\x94"
-			  "\x28\xF8\xC1\x34\xB7\x58\xA5\x6C"
-			  "\x1A\x9D\xE4\xE4\xF6\xB9\xB4\xB0"
-			  "\x5D\x51\x54\x9A\x53\xA0\xF9\x32"
-			  "\xBD\x31\x54\x14\x7B\x33\xEE\x17"
-			  "\xD3\xC7\x1F\x48\xBF\x0B\x22\xA2"
-			  "\x7D\x0C\xDF\xD0\x2E\x98\xFA\xD2"
-			  "\xFA\xCF\x24\x1D\x99\x9B\xD0\x7E"
-			  "\xF4\x4F\x88\xFF\x45\x99\x4A\xF4"
-			  "\xF2\x0A\x5B\x3B\x21\xAB\x92\xAE"
-			  "\x40\x78\x91\x95\xC4\x2F\xA3\xE8"
-			  "\x18\xC7\x07\xA6\xC8\xC0\x66\x33"
-			  "\x35\xC0\xB4\xA0\xF8\xEE\x1E\xF3"
-			  "\x40\xF5\x40\x54\xF1\x84\x8C\xEA"
-			  "\x27\x38\x1F\xF8\x77\xC7\xDF\xD8"
-			  "\x1D\xE2\xD9\x59\x40\x4F\x59\xD4"
-			  "\xF8\x17\x99\x8D\x58\x2D\x72\x44"
-			  "\x9D\x1D\x91\x64\xD6\x3F\x0A\x82"
-			  "\xC7\x57\x3D\xEF\xD3\x41\xFA\xA7"
-			  "\x68\xA3\xB8\xA5\x93\x74\x2E\x85"
-			  "\x4C\x9D\x69\x59\xCE\x15\xAE\xBF"
-			  "\x9C\x8F\x14\x64\x5D\x7F\xCF\x0B"
-			  "\xCE\x43\x5D\x28\xC0\x2F\xFB\x18"
-			  "\x79\x9A\xFC\x43\x16\x7C\x6B\x7B"
-			  "\x38\xB8\x48\x36\x66\x4E\x20\x43"
-			  "\xBA\x76\x13\x9A\xC3\xF2\xEB\x52"
-			  "\xD7\xDC\xB2\x67\x63\x14\x25\xCD"
-			  "\xB1\x13\x4B\xDE\x8C\x59\x21\x84"
-			  "\x81\x8D\x97\x23\x45\x33\x7C\xF3"
-			  "\xC5\xBC\x79\x95\xAA\x84\x68\x31"
-			  "\x2D\x1A\x68\xFE\xEC\x92\x94\xDA"
-			  "\x94\x2A\x6F\xD6\xFE\xE5\x76\x97"
-			  "\xF4\x6E\xEE\xCB\x2B\x95\x4E\x36"
-			  "\x5F\x74\x8C\x86\x5B\x71\xD0\x20"
-			  "\x78\x1A\x7F\x18\x8C\xD9\xCD\xF5"
-			  "\x21\x41\x56\x72\x13\xE1\x86\x07"
-			  "\x07\x26\xF3\x4F\x7B\xEA\xB5\x18"
-			  "\xFE\x94\x2D\x9F\xE0\x72\x18\x65"
-			  "\xB2\xA5\x63\x48\xB4\x13\x22\xF7"
-			  "\x25\xF1\x80\xA8\x7F\x54\x86\x7B"
-			  "\x39\xAE\x95\x0C\x09\x32\x22\x2D"
-			  "\x4D\x73\x39\x0C\x09\x2C\x7C\x10"
-			  "\xD0\x4B\x53\xF6\x90\xC5\x99\x2F"
-			  "\x15\xE1\x7F\xC6\xC5\x7A\x52\x14"
-			  "\x65\xEE\x93\x54\xD0\x66\x15\x3C"
-			  "\x4C\x68\xFD\x64\x0F\xF9\x10\x39"
-			  "\x46\x7A\xDD\x97\x20\xEE\xC7\xD2"
-			  "\x98\x4A\xB6\xE6\xF5\xA8\x1F\x4F"
-			  "\xDB\xAB\x6D\xD5\x9B\x34\x16\x97"
-			  "\x2F\x64\xE5\x37\xEF\x0E\xA1\xE9"
-			  "\xBE\x31\x31\x96\x8B\x40\x18\x75"
-			  "\x11\x75\x14\x32\xA5\x2D\x1B\x6B"
-			  "\xDB\x59\xEB\xFA\x3D\x8E\x7C\xC4"
-			  "\xDE\x68\xC8\x9F\xC9\x99\xE3\xC6"
-			  "\x71\xB0\x12\x57\x89\x0D\xC0\x2B"
-			  "\x9F\x12\x6A\x04\x67\xF1\x95\x31"
-			  "\x59\xFD\x84\x95\x2C\x9C\x5B\xEC"
-			  "\x09\xB0\x43\x96\x4A\x64\x80\x40"
-			  "\xB9\x72\x19\xDD\x70\x42\xFA\xB1"
-			  "\x4A\x2C\x0C\x0A\x60\x6E\xE3\x7C"
-			  "\x37\x5A\xBE\xA4\x62\xCF\x29\xAB"
-			  "\x7F\x4D\xA6\xB3\xE2\xB6\x64\xC6"
-			  "\x33\x0B\xF3\xD5\x01\x38\x74\xA4"
-			  "\x67\x1E\x75\x68\xC3\xAD\x76\xE9"
-			  "\xE9\xBC\xF0\xEB\xD8\xFD\x31\x8A"
-			  "\x5F\xC9\x18\x94\x4B\x86\x66\xFC"
-			  "\xBD\x0B\x3D\xB3\x9F\xFA\x1F\xD9"
-			  "\x78\xC4\xE3\x24\x1C\x67\xA2\xF8"
-			  "\x43\xBC\x76\x75\xBF\x6C\x05\xB3"
-			  "\x32\xE8\x7C\x80\xDB\xC7\xB6\x61"
-			  "\x1A\x3E\x2B\xA7\x25\xED\x8F\xA0"
-			  "\x00\x4B\xF8\x90\xCA\xD8\xFB\x12"
-			  "\xAC\x1F\x18\xE9\xD2\x5E\xA2\x8E"
-			  "\xE4\x84\x6B\x9D\xEB\x1E\x6B\xA3"
-			  "\x7B\xDC\xCE\x15\x97\x27\xB2\x65"
-			  "\xBC\x0E\x47\xAB\x55\x13\x53\xAB"
-			  "\x0E\x34\x55\x02\x5F\x27\xC5\x89"
-			  "\xDF\xC5\x70\xC4\xDD\x76\x82\xEE"
-			  "\x68\xA6\x09\xB0\xE5\x5E\xF1\x0C"
-			  "\xE3\xF3\x09\x9B\xFE\x65\x4B\xB8"
-			  "\x30\xEC\xD5\x7C\x6A\xEC\x1D\xD2"
-			  "\x93\xB7\xA1\x1A\x02\xD4\xC0\xD6"
-			  "\x8D\x4D\x83\x9A\xED\x29\x4E\x14"
-			  "\x86\xD5\x3C\x1A\xD5\xB9\x0A\x6A"
-			  "\x72\x22\xD5\x92\x38\xF1\xA1\x86"
-			  "\xB2\x41\x51\xCA\x4E\xAB\x8F\xD3"
-			  "\x80\x56\xC3\xD7\x65\xE1\xB3\x86"
-			  "\xCB\xCE\x98\xA1\xD4\x59\x1C\x06"
-			  "\x01\xED\xF8\x29\x91\x19\x5C\x9A"
-			  "\xEE\x28\x1B\x48\xD7\x32\xEF\x9F"
-			  "\x6C\x2B\x66\x4E\x78\xD5\x8B\x72"
-			  "\x80\xE7\x29\xDC\x23\x55\x98\x54"
-			  "\xB1\xFF\x3E\x95\x56\xA8\x78\x78"
-			  "\xEF\xC4\xA5\x11\x2D\x2B\xD8\x93"
-			  "\x30\x6E\x7E\x51\xBB\x42\x5F\x03"
-			  "\x43\x94\x23\x7E\xEE\xF0\xA5\x79"
-			  "\x55\x01\xD4\x58\xB2\xF2\x85\x49"
-			  "\x70\xC5\xB9\x0B\x3B\x7A\x6E\x6C",
-		.ilen	= 1008,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59\xF0\x64\xFB\x92\x06"
-			  "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78"
-			  "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA"
-			  "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C"
-			  "\xF3\x67\xFE\x95\x09\xA0\x37\xCE"
-			  "\x42\xD9\x70\x07\x7B\x12\xA9\x1D"
-			  "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F"
-			  "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01"
-			  "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73"
-			  "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5"
-			  "\x59\xF0\x87\x1E\x92\x29\xC0\x34"
-			  "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6"
-			  "\x3D\xD4\x48\xDF\x76\x0D\x81\x18"
-			  "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A"
-			  "\x21\x95\x2C\xC3\x37\xCE\x65\xFC"
-			  "\x70\x07\x9E\x12\xA9\x40\xD7\x4B"
-			  "\xE2\x79\x10\x84\x1B\xB2\x26\xBD"
-			  "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F"
-			  "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1"
-			  "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13"
-			  "\x87\x1E\xB5\x29\xC0\x57\xEE\x62"
-			  "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4"
-			  "\x6B\x02\x76\x0D\xA4\x18\xAF\x46"
-			  "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8"
-			  "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07"
-			  "\x9E\x35\xCC\x40\xD7\x6E\x05\x79"
-			  "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB"
-			  "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D"
-			  "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF"
-			  "\x43\xDA\x71\x08\x7C\x13\xAA\x1E"
-			  "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90"
-			  "\x27\xBE\x32\xC9\x60\xF7\x6B\x02"
-			  "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74"
-			  "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6"
-			  "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35"
-			  "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7"
-			  "\x3E\xD5\x49\xE0\x77\x0E\x82\x19"
-			  "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B"
-			  "\x22\x96\x2D\xC4\x38\xCF\x66\xFD"
-			  "\x71\x08\x9F\x13\xAA\x41\xD8\x4C"
-			  "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE"
-			  "\x55\xEC\x60\xF7\x8E\x02\x99\x30"
-			  "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2"
-			  "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14"
-			  "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63"
-			  "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5"
-			  "\x6C\x03\x77\x0E\xA5\x19\xB0\x47"
-			  "\xDE\x52\xE9\x80\x17\x8B\x22\xB9"
-			  "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08"
-			  "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A"
-			  "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC"
-			  "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E"
-			  "\xF5\x69\x00\x97\x0B\xA2\x39\xD0"
-			  "\x44\xDB\x72\x09\x7D\x14\xAB\x1F"
-			  "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91"
-			  "\x28\xBF\x33\xCA\x61\xF8\x6C\x03"
-			  "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75"
-			  "\x0C\x80\x17\xAE\x22\xB9\x50\xE7"
-			  "\x5B\xF2\x89\x20\x94\x2B\xC2\x36"
-			  "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8"
-			  "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A"
-			  "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
-			  "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
-			  "\x72\x09\xA0\x14\xAB\x42\xD9\x4D",
-		.rlen	= 1008,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 1008 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec camellia_ctr_enc_tv_template[] = {
+static const struct cipher_testvec camellia_ctr_tv_template[] = {
 	{ /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -31266,7 +28404,7 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -31328,8 +28466,7 @@
 			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
 			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
 			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.ilen	= 496,
-		.result	= "\xF3\x06\x3A\x84\xCD\xBA\x8E\x11"
+		.ctext	= "\xF3\x06\x3A\x84\xCD\xBA\x8E\x11"
 			  "\xB7\x74\x6F\x5C\x97\xFB\x36\xFE"
 			  "\xDE\x71\x58\xD4\x15\xD1\xC1\xA4"
 			  "\xC9\x28\x74\xA6\x6B\xC7\x95\xA6"
@@ -31391,7 +28528,7 @@
 			  "\x7E\x42\xEC\xB6\x6F\x4D\x6B\x48"
 			  "\xE6\xA6\x50\x80\x78\x9E\xF1\xB0"
 			  "\x4D\xB2\x0D\x3D\xFC\x40\x25\x4D",
-		.rlen	= 496,
+		.len	= 496,
 	}, { /* Generated with Crypto++ */
 		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
 			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
@@ -31400,7 +28537,7 @@
 		.klen	= 32,
 		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
 			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -31527,8 +28664,7 @@
 			  "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
 			  "\x72\x09\xA0\x14\xAB\x42\xD9\x4D"
 			  "\xE4\x7B\x12",
-		.ilen	= 1011,
-		.result	= "\xF3\x06\x3A\x84\xCD\xBA\x8E\x11"
+		.ctext	= "\xF3\x06\x3A\x84\xCD\xBA\x8E\x11"
 			  "\xB7\x74\x6F\x5C\x97\xFB\x36\xFE"
 			  "\xDE\x71\x58\xD4\x15\xD1\xC1\xA4"
 			  "\xC9\x28\x74\xA6\x6B\xC7\x95\xA6"
@@ -31655,7 +28791,7 @@
 			  "\x45\xE3\x35\x0D\x69\x91\x54\x1C"
 			  "\xE7\x2C\x49\x08\x8B\x72\xFA\x5C"
 			  "\xF1\x6B\xD9",
-		.rlen	= 1011,
+		.len	= 1011,
 		.also_non_np = 1,
 		.np	= 2,
 		.tap	= { 1011 - 16, 16 },
@@ -31667,7 +28803,7 @@
 		.klen	= 32,
 		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
 			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
+		.ptext	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
 			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
 			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
 			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
@@ -31793,8 +28929,7 @@
 			  "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
 			  "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
 			  "\x72\x09\xA0\x14\xAB\x42\xD9\x4D",
-		.ilen	= 1008,
-		.result	= "\x85\x79\x6C\x8B\x2B\x6D\x14\xF9"
+		.ctext	= "\x85\x79\x6C\x8B\x2B\x6D\x14\xF9"
 			  "\xA6\x83\xB6\x80\x5B\x3A\xF3\x7E"
 			  "\x30\x29\xEB\x1F\xDC\x19\x5F\xEB"
 			  "\xF7\xC4\x27\x04\x51\x87\xD7\x6F"
@@ -31920,678 +29055,11 @@
 			  "\xC5\x9B\x03\x70\x29\x2A\x49\x09"
 			  "\x67\xA1\xEA\xD6\x3A\x5B\xBF\x71"
 			  "\x1D\x48\x64\x6C\xFB\xC0\x9E\x36",
-		.rlen	= 1008,
+		.len	= 1008,
 	},
 };
 
-static const struct cipher_testvec camellia_ctr_dec_tv_template[] = {
-	{ /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\xF3\x06\x3A\x84\xCD\xBA\x8E\x11"
-			  "\xB7\x74\x6F\x5C\x97\xFB\x36\xFE"
-			  "\xDE\x71\x58\xD4\x15\xD1\xC1\xA4"
-			  "\xC9\x28\x74\xA6\x6B\xC7\x95\xA6"
-			  "\x6C\x77\xF7\x2F\xDF\xC7\xBB\x85"
-			  "\x60\xFC\xE8\x94\xE8\xB5\x09\x2C"
-			  "\x1E\x43\xEF\x6C\xE9\x98\xC5\xA0"
-			  "\x7B\x13\xE5\x7F\xF8\x49\x9A\x8C"
-			  "\xE6\x7B\x08\xC3\x32\x66\x55\x4E"
-			  "\xA5\x44\x1D\x2C\x18\xC7\x29\x1F"
-			  "\x61\x28\x4A\xE3\xCD\xE5\x47\xB2"
-			  "\x82\x2F\x66\x83\x91\x51\xAE\xD7"
-			  "\x1C\x91\x3C\x57\xE3\x1D\x5A\xC9"
-			  "\xFD\xC5\x58\x58\xEF\xCC\x33\xC9"
-			  "\x0F\xEA\x26\x32\xD1\x15\x19\x2D"
-			  "\x25\xB4\x7F\xB0\xDF\xFB\x88\x60"
-			  "\x4E\x4D\x06\x7D\xCC\x1F\xED\x3B"
-			  "\x68\x84\xD5\xB3\x1B\xE7\xB9\xA1"
-			  "\x68\x8B\x2C\x1A\x44\xDA\x63\xD3"
-			  "\x29\xE9\x59\x32\x1F\x30\x1C\x43"
-			  "\xEA\x3A\xA3\x6B\x54\x3C\xAA\x11"
-			  "\xAD\x38\x20\xC9\xB9\x8A\x64\x66"
-			  "\x5A\x07\x49\xDF\xA1\x9C\xF9\x76"
-			  "\x36\x65\xB6\x81\x8F\x76\x09\xE5"
-			  "\xEB\xD1\x29\xA4\xE4\xF4\x4C\xCD"
-			  "\xAF\xFC\xB9\x16\xD9\xC3\x73\x6A"
-			  "\x33\x12\xF8\x7E\xBC\xCC\x7D\x80"
-			  "\xBF\x3C\x25\x06\x13\x84\xFA\x35"
-			  "\xF7\x40\xFA\xA1\x44\x13\x70\xD8"
-			  "\x01\xF9\x85\x15\x63\xEC\x7D\xB9"
-			  "\x02\xD8\xBA\x41\x6C\x92\x68\x66"
-			  "\x95\xDD\xD6\x42\xE7\xBB\xE1\xFD"
-			  "\x28\x3E\x94\xB6\xBD\xA7\xBF\x47"
-			  "\x58\x8D\xFF\x19\x30\x75\x0D\x48"
-			  "\x94\xE9\xA6\xCD\xB3\x8E\x1E\xCD"
-			  "\x59\xBC\x1A\xAC\x3C\x4F\xA9\xEB"
-			  "\xF4\xA7\xE4\x75\x4A\x18\x40\xC9"
-			  "\x1E\xEC\x06\x9C\x28\x4B\xF7\x2B"
-			  "\xE2\xEF\xD6\x42\x2E\xBB\xFC\x0A"
-			  "\x79\xA2\x99\x28\x93\x1B\x00\x57"
-			  "\x35\x1E\x1A\x93\x90\xA4\x68\x95"
-			  "\x5E\x57\x40\xD5\xA9\xAA\x19\x48"
-			  "\xEC\xFF\x76\x77\xDC\x78\x89\x76"
-			  "\xE5\x3B\x00\xEC\x58\x4D\xD1\xE3"
-			  "\xC8\x6C\x2C\x45\x5E\x5F\xD9\x4E"
-			  "\x71\xA5\x36\x6D\x03\xF1\xC7\xD5"
-			  "\xF3\x63\xC0\xD8\xCB\x2B\xF1\xA8"
-			  "\xB9\x2B\xE6\x0B\xB9\x65\x78\xA0"
-			  "\xC4\x46\xE6\x9B\x8B\x43\x2D\xAB"
-			  "\x70\xA6\xE0\x59\x1E\xAC\x9D\xE0"
-			  "\x76\x44\x45\xF3\x24\x11\x57\x98"
-			  "\x9A\x86\xB4\x12\x80\x28\x86\x20"
-			  "\x23\x9D\x2D\xE9\x38\x32\xB1\xE1"
-			  "\xCF\x0A\x23\x73\x7D\xC5\x80\x3D"
-			  "\x9F\x6D\xA0\xD0\xEE\x93\x8A\x79"
-			  "\x3A\xDD\x1D\xBB\x9E\x26\x5D\x01"
-			  "\x44\xD0\xD4\x4E\xC3\xF1\xE4\x38"
-			  "\x09\x62\x0A\x1A\x4E\xD2\x63\x0F"
-			  "\x6E\x3E\xD2\xA4\x3A\xF4\xF3\xFF"
-			  "\x7E\x42\xEC\xB6\x6F\x4D\x6B\x48"
-			  "\xE6\xA6\x50\x80\x78\x9E\xF1\xB0"
-			  "\x4D\xB2\x0D\x3D\xFC\x40\x25\x4D",
-		.ilen	= 496,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7",
-		.rlen	= 496,
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xE2\x24\x89\xEE\x53\xB8\x1D\x5F"
-			  "\xC4\x29\x8E\xF3\x35\x9A\xFF\x64",
-		.input	= "\xF3\x06\x3A\x84\xCD\xBA\x8E\x11"
-			  "\xB7\x74\x6F\x5C\x97\xFB\x36\xFE"
-			  "\xDE\x71\x58\xD4\x15\xD1\xC1\xA4"
-			  "\xC9\x28\x74\xA6\x6B\xC7\x95\xA6"
-			  "\x6C\x77\xF7\x2F\xDF\xC7\xBB\x85"
-			  "\x60\xFC\xE8\x94\xE8\xB5\x09\x2C"
-			  "\x1E\x43\xEF\x6C\xE9\x98\xC5\xA0"
-			  "\x7B\x13\xE5\x7F\xF8\x49\x9A\x8C"
-			  "\xE6\x7B\x08\xC3\x32\x66\x55\x4E"
-			  "\xA5\x44\x1D\x2C\x18\xC7\x29\x1F"
-			  "\x61\x28\x4A\xE3\xCD\xE5\x47\xB2"
-			  "\x82\x2F\x66\x83\x91\x51\xAE\xD7"
-			  "\x1C\x91\x3C\x57\xE3\x1D\x5A\xC9"
-			  "\xFD\xC5\x58\x58\xEF\xCC\x33\xC9"
-			  "\x0F\xEA\x26\x32\xD1\x15\x19\x2D"
-			  "\x25\xB4\x7F\xB0\xDF\xFB\x88\x60"
-			  "\x4E\x4D\x06\x7D\xCC\x1F\xED\x3B"
-			  "\x68\x84\xD5\xB3\x1B\xE7\xB9\xA1"
-			  "\x68\x8B\x2C\x1A\x44\xDA\x63\xD3"
-			  "\x29\xE9\x59\x32\x1F\x30\x1C\x43"
-			  "\xEA\x3A\xA3\x6B\x54\x3C\xAA\x11"
-			  "\xAD\x38\x20\xC9\xB9\x8A\x64\x66"
-			  "\x5A\x07\x49\xDF\xA1\x9C\xF9\x76"
-			  "\x36\x65\xB6\x81\x8F\x76\x09\xE5"
-			  "\xEB\xD1\x29\xA4\xE4\xF4\x4C\xCD"
-			  "\xAF\xFC\xB9\x16\xD9\xC3\x73\x6A"
-			  "\x33\x12\xF8\x7E\xBC\xCC\x7D\x80"
-			  "\xBF\x3C\x25\x06\x13\x84\xFA\x35"
-			  "\xF7\x40\xFA\xA1\x44\x13\x70\xD8"
-			  "\x01\xF9\x85\x15\x63\xEC\x7D\xB9"
-			  "\x02\xD8\xBA\x41\x6C\x92\x68\x66"
-			  "\x95\xDD\xD6\x42\xE7\xBB\xE1\xFD"
-			  "\x28\x3E\x94\xB6\xBD\xA7\xBF\x47"
-			  "\x58\x8D\xFF\x19\x30\x75\x0D\x48"
-			  "\x94\xE9\xA6\xCD\xB3\x8E\x1E\xCD"
-			  "\x59\xBC\x1A\xAC\x3C\x4F\xA9\xEB"
-			  "\xF4\xA7\xE4\x75\x4A\x18\x40\xC9"
-			  "\x1E\xEC\x06\x9C\x28\x4B\xF7\x2B"
-			  "\xE2\xEF\xD6\x42\x2E\xBB\xFC\x0A"
-			  "\x79\xA2\x99\x28\x93\x1B\x00\x57"
-			  "\x35\x1E\x1A\x93\x90\xA4\x68\x95"
-			  "\x5E\x57\x40\xD5\xA9\xAA\x19\x48"
-			  "\xEC\xFF\x76\x77\xDC\x78\x89\x76"
-			  "\xE5\x3B\x00\xEC\x58\x4D\xD1\xE3"
-			  "\xC8\x6C\x2C\x45\x5E\x5F\xD9\x4E"
-			  "\x71\xA5\x36\x6D\x03\xF1\xC7\xD5"
-			  "\xF3\x63\xC0\xD8\xCB\x2B\xF1\xA8"
-			  "\xB9\x2B\xE6\x0B\xB9\x65\x78\xA0"
-			  "\xC4\x46\xE6\x9B\x8B\x43\x2D\xAB"
-			  "\x70\xA6\xE0\x59\x1E\xAC\x9D\xE0"
-			  "\x76\x44\x45\xF3\x24\x11\x57\x98"
-			  "\x9A\x86\xB4\x12\x80\x28\x86\x20"
-			  "\x23\x9D\x2D\xE9\x38\x32\xB1\xE1"
-			  "\xCF\x0A\x23\x73\x7D\xC5\x80\x3D"
-			  "\x9F\x6D\xA0\xD0\xEE\x93\x8A\x79"
-			  "\x3A\xDD\x1D\xBB\x9E\x26\x5D\x01"
-			  "\x44\xD0\xD4\x4E\xC3\xF1\xE4\x38"
-			  "\x09\x62\x0A\x1A\x4E\xD2\x63\x0F"
-			  "\x6E\x3E\xD2\xA4\x3A\xF4\xF3\xFF"
-			  "\x7E\x42\xEC\xB6\x6F\x4D\x6B\x48"
-			  "\xE6\xA6\x50\x80\x78\x9E\xF1\xB0"
-			  "\x4D\xB2\x0D\x3D\xFC\x40\x25\x4D"
-			  "\x93\x11\x1C\xE9\xD2\x9F\x6E\x90"
-			  "\xE5\x41\x4A\xE2\x3C\x45\x29\x35"
-			  "\xEC\xD6\x47\x50\xCB\x7B\xA2\x32"
-			  "\xF7\x8B\x62\xF1\xE3\x9A\xFE\xC7"
-			  "\x1D\x8C\x02\x72\x68\x09\xE9\xB6"
-			  "\x4A\x80\xE6\xB1\x56\xDF\x90\xD4"
-			  "\x93\x74\xA4\xCE\x20\x23\xBF\x48"
-			  "\xA5\xDE\x1B\xFA\x40\x69\x31\x98"
-			  "\x62\x6E\xA5\xC7\xBF\x0C\x62\xE5"
-			  "\x6D\xE1\x93\xF1\x83\x10\x1C\xCA"
-			  "\xF6\x5C\x19\xF8\x90\x78\xCB\xE4"
-			  "\x0B\x3A\xB5\xF8\x43\x86\xD3\x3F"
-			  "\xBA\x83\x34\x3C\x42\xCC\x7D\x28"
-			  "\x29\x63\x4F\xD8\x02\x17\xC5\x07"
-			  "\x2C\xA4\xAC\x79\xCB\xC3\xA9\x09"
-			  "\x81\x45\x18\xED\xE4\xCB\x42\x3B"
-			  "\x87\x2D\x23\xDC\xC5\xBA\x45\xBD"
-			  "\x92\xE5\x02\x97\x96\xCE\xAD\xEC"
-			  "\xBA\xD8\x76\xF8\xCA\xC1\x31\xEC"
-			  "\x1E\x4F\x3F\x83\xF8\x33\xE8\x6E"
-			  "\xCC\xF8\x5F\xDD\x65\x50\x99\x69"
-			  "\xAF\x48\xCE\xA5\xBA\xB6\x14\x9F"
-			  "\x05\x93\xB2\xE6\x59\xC8\x28\xFE"
-			  "\x8F\x37\xF9\x64\xB9\xA5\x56\x8F"
-			  "\xF1\x1B\x90\xEF\xAE\xEB\xFC\x09"
-			  "\x11\x7A\xF2\x19\x0A\x0A\x9A\x3C"
-			  "\xE2\x5E\x29\xFA\x31\x9B\xC1\x74"
-			  "\x1E\x10\x3E\x07\xA9\x31\x6D\xF8"
-			  "\x81\xF5\xD5\x8A\x04\x23\x51\xAC"
-			  "\xA2\xE2\x63\xFD\x27\x1F\x79\x5B"
-			  "\x1F\xE8\xDA\x11\x49\x4D\x1C\xBA"
-			  "\x54\xCC\x0F\xBA\x92\x69\xE5\xCB"
-			  "\x41\x1A\x67\xA6\x40\x82\x70\x8C"
-			  "\x19\x79\x08\xA4\x51\x20\x7D\xC9"
-			  "\x12\x27\xAE\x20\x0D\x2C\xA1\x6D"
-			  "\xF4\x55\xD4\xE7\xE6\xD4\x28\x08"
-			  "\x00\x70\x12\x56\x56\x50\xAD\x14"
-			  "\x5C\x3E\xA2\xD1\x36\x3F\x36\x48"
-			  "\xED\xB1\x57\x3E\x5D\x15\xF6\x1E"
-			  "\x53\xE9\xA4\x3E\xED\x7D\xCF\x7D"
-			  "\x29\xAF\xF3\x1E\x51\xA8\x9F\x85"
-			  "\x8B\xF0\xBB\xCE\xCC\x39\xC3\x64"
-			  "\x4B\xF2\xAD\x70\x19\xD4\x44\x8F"
-			  "\x91\x76\xE8\x15\x66\x34\x9F\xF6"
-			  "\x0F\x15\xA4\xA8\x24\xF8\x58\xB1"
-			  "\x38\x46\x47\xC7\x9B\xCA\xE9\x42"
-			  "\x44\xAA\xE6\xB5\x9C\x91\xA4\xD3"
-			  "\x16\xA0\xED\x42\xBE\xB5\x06\x19"
-			  "\xBE\x67\xE8\xBC\x22\x32\xA4\x1E"
-			  "\x93\xEB\xBE\xE9\xE1\x93\xE5\x31"
-			  "\x3A\xA2\x75\xDF\xE3\x6B\xE7\xCC"
-			  "\xB4\x70\x20\xE0\x6D\x82\x7C\xC8"
-			  "\x94\x5C\x5E\x37\x18\xAD\xED\x8B"
-			  "\x44\x86\xCA\x5E\x07\xB7\x70\x8D"
-			  "\x40\x48\x19\x73\x7C\x78\x64\x0B"
-			  "\xDB\x01\xCA\xAE\x63\x19\xE9\xD1"
-			  "\x6B\x2C\x84\x10\x45\x42\x2E\xC3"
-			  "\xDF\x7F\xAA\xE8\x87\x1B\x63\x46"
-			  "\x74\x28\x9D\x05\x30\x20\x62\x41"
-			  "\xC0\x9F\x2C\x36\x2B\x78\xD7\x26"
-			  "\xDF\x58\x51\xED\xFA\xDC\x87\x79"
-			  "\xBF\x8C\xBF\xC4\x0F\xE5\x05\xDA"
-			  "\x45\xE3\x35\x0D\x69\x91\x54\x1C"
-			  "\xE7\x2C\x49\x08\x8B\x72\xFA\x5C"
-			  "\xF1\x6B\xD9",
-		.ilen	= 1011,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59\xF0\x64\xFB\x92\x06"
-			  "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78"
-			  "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA"
-			  "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C"
-			  "\xF3\x67\xFE\x95\x09\xA0\x37\xCE"
-			  "\x42\xD9\x70\x07\x7B\x12\xA9\x1D"
-			  "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F"
-			  "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01"
-			  "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73"
-			  "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5"
-			  "\x59\xF0\x87\x1E\x92\x29\xC0\x34"
-			  "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6"
-			  "\x3D\xD4\x48\xDF\x76\x0D\x81\x18"
-			  "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A"
-			  "\x21\x95\x2C\xC3\x37\xCE\x65\xFC"
-			  "\x70\x07\x9E\x12\xA9\x40\xD7\x4B"
-			  "\xE2\x79\x10\x84\x1B\xB2\x26\xBD"
-			  "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F"
-			  "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1"
-			  "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13"
-			  "\x87\x1E\xB5\x29\xC0\x57\xEE\x62"
-			  "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4"
-			  "\x6B\x02\x76\x0D\xA4\x18\xAF\x46"
-			  "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8"
-			  "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07"
-			  "\x9E\x35\xCC\x40\xD7\x6E\x05\x79"
-			  "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB"
-			  "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D"
-			  "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF"
-			  "\x43\xDA\x71\x08\x7C\x13\xAA\x1E"
-			  "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90"
-			  "\x27\xBE\x32\xC9\x60\xF7\x6B\x02"
-			  "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74"
-			  "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6"
-			  "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35"
-			  "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7"
-			  "\x3E\xD5\x49\xE0\x77\x0E\x82\x19"
-			  "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B"
-			  "\x22\x96\x2D\xC4\x38\xCF\x66\xFD"
-			  "\x71\x08\x9F\x13\xAA\x41\xD8\x4C"
-			  "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE"
-			  "\x55\xEC\x60\xF7\x8E\x02\x99\x30"
-			  "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2"
-			  "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14"
-			  "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63"
-			  "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5"
-			  "\x6C\x03\x77\x0E\xA5\x19\xB0\x47"
-			  "\xDE\x52\xE9\x80\x17\x8B\x22\xB9"
-			  "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08"
-			  "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A"
-			  "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC"
-			  "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E"
-			  "\xF5\x69\x00\x97\x0B\xA2\x39\xD0"
-			  "\x44\xDB\x72\x09\x7D\x14\xAB\x1F"
-			  "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91"
-			  "\x28\xBF\x33\xCA\x61\xF8\x6C\x03"
-			  "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75"
-			  "\x0C\x80\x17\xAE\x22\xB9\x50\xE7"
-			  "\x5B\xF2\x89\x20\x94\x2B\xC2\x36"
-			  "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8"
-			  "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A"
-			  "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
-			  "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
-			  "\x72\x09\xA0\x14\xAB\x42\xD9\x4D"
-			  "\xE4\x7B\x12",
-		.rlen	= 1011,
-		.also_non_np = 1,
-		.np	= 2,
-		.tap	= { 1011 - 16, 16 },
-	}, { /* Generated with Crypto++ */
-		.key	= "\x85\x62\x3F\x1C\xF9\xD6\x1C\xF9"
-			  "\xD6\xB3\x90\x6D\x4A\x90\x6D\x4A"
-			  "\x27\x04\xE1\x27\x04\xE1\xBE\x9B"
-			  "\x78\xBE\x9B\x78\x55\x32\x0F\x55",
-		.klen	= 32,
-		.iv	= "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"
-			  "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD",
-		.input	= "\x85\x79\x6C\x8B\x2B\x6D\x14\xF9"
-			  "\xA6\x83\xB6\x80\x5B\x3A\xF3\x7E"
-			  "\x30\x29\xEB\x1F\xDC\x19\x5F\xEB"
-			  "\xF7\xC4\x27\x04\x51\x87\xD7\x6F"
-			  "\xB8\x4E\x07\xFB\xAC\x3B\x08\xB4"
-			  "\x4D\xCB\xE8\xE1\x71\x7D\x4F\x48"
-			  "\xCD\x81\x64\xA5\xC4\x07\x1A\x9A"
-			  "\x4B\x62\x90\x0E\xC8\xB3\x2B\x6B"
-			  "\x8F\x9C\x6E\x72\x4B\xBA\xEF\x07"
-			  "\x2C\x56\x07\x5E\x37\x30\x60\xA9"
-			  "\xE3\xEF\xD6\x69\xE1\xA1\x77\x64"
-			  "\x93\x75\x7A\xB7\x7A\x3B\xE9\x43"
-			  "\x23\x35\x95\x91\x80\x8A\xC7\xCF"
-			  "\xC3\xD5\xBF\xE7\xFE\x4C\x06\x6B"
-			  "\x05\x19\x48\xE2\x62\xBA\x4F\xF2"
-			  "\xFB\xEE\xE4\xCB\x79\x9D\xA3\x10"
-			  "\x1D\x29\x8C\x1D\x7A\x88\x5A\xDD"
-			  "\x4E\xB6\x18\xAA\xCD\xE6\x33\x96"
-			  "\xD9\x0F\x90\x5A\x78\x76\x4D\x77"
-			  "\x3C\x20\x89\x3B\xA3\xF9\x07\xFD"
-			  "\xE4\xE8\x20\x2D\x15\x0A\x63\x49"
-			  "\xF5\x4F\x89\xD8\xDE\xA1\x28\x78"
-			  "\x28\x07\x09\x1B\x03\x94\x1D\x4B"
-			  "\x82\x28\x1E\x1D\x95\xBA\xAC\x85"
-			  "\x71\x6E\x3C\x18\x4B\x77\x74\x79"
-			  "\xBF\x67\x0A\x53\x3C\x94\xD9\x60"
-			  "\xE9\x6D\x40\x34\xA0\x2A\x53\x5D"
-			  "\x27\xD5\x47\xF9\xC3\x4B\x27\x29"
-			  "\xE4\x76\x9C\x3F\xA7\x1C\x87\xFC"
-			  "\x6E\x0F\xCF\x9B\x60\xF0\xF0\x8B"
-			  "\x70\x1C\x84\x81\x72\x4D\xB4\x98"
-			  "\x23\x62\xE7\x6A\x2B\xFC\xA5\xB2"
-			  "\xFF\xF5\x71\x07\xCD\x90\x23\x13"
-			  "\x19\xD7\x79\x36\x6C\x9D\x55\x8B"
-			  "\x93\x78\x86\x05\x69\x46\xD0\xC5"
-			  "\x39\x09\xEB\x79\xEF\xFA\x9F\xAE"
-			  "\xF3\xD5\x44\xC3\xFD\x86\xD2\x7C"
-			  "\x83\x4B\xD8\x75\x9C\x18\x04\x7B"
-			  "\x73\xAD\x72\xA4\xF6\xAB\xCF\x4B"
-			  "\xCC\x01\x45\x90\xA6\x43\x05\x0C"
-			  "\x6C\x4F\x62\x77\x57\x97\x9F\xEE"
-			  "\x75\xA7\x3C\x38\xD1\x0F\x3D\x0E"
-			  "\x2C\x43\x98\xFB\x13\x65\x73\xE4"
-			  "\x3C\x1E\xD6\x90\x08\xF7\xE0\x99"
-			  "\x3B\xF1\x9D\x6C\x48\xA9\x0E\x32"
-			  "\x17\xC2\xCC\x20\xA1\x19\x26\xAA"
-			  "\xE0\x75\x2F\xFB\x54\x66\x0A\xDF"
-			  "\xB5\xF2\x1F\xC1\x34\x3C\x30\x56"
-			  "\xE8\xDC\xF7\x92\x6B\xBF\x17\x24"
-			  "\xEC\x94\xB5\x3B\xD6\xCE\xA2\x54"
-			  "\x10\x7F\x50\xDE\x69\x77\xD5\x37"
-			  "\xFE\x9C\x10\x83\xC5\xEB\xC9\x53"
-			  "\xB7\xF3\xC4\x20\xAF\x0A\x7E\x57"
-			  "\x3A\xE6\x75\xFE\x89\x00\x6E\x48"
-			  "\xFB\x99\x17\x2C\xF6\x64\x40\x95"
-			  "\x5E\xDC\x7A\xA6\x70\xC7\xF4\xDD"
-			  "\x52\x05\x24\x34\xF9\x0E\xC8\x64"
-			  "\x6D\xE2\xD8\x80\x53\x31\x4C\xFE"
-			  "\xB4\x3A\x5F\x19\xCF\x42\x1B\x22"
-			  "\x0B\x2D\x7B\xF1\xC5\x43\xF7\x5E"
-			  "\x12\xA8\x01\x64\x16\x0B\x26\x5A"
-			  "\x0C\x95\x0F\x40\xC5\x5A\x06\x7C"
-			  "\xCF\xF5\xD5\xB7\x7A\x34\x23\xB6"
-			  "\xAA\x9E\xA8\x98\xA2\xF8\x3D\xD3"
-			  "\x3F\x23\x69\x63\x56\x96\x45\xD6"
-			  "\x74\x23\x1D\x5C\x63\xCC\xD8\x78"
-			  "\x16\xE2\x9C\xD2\x80\x02\xF2\x28"
-			  "\x69\x2F\xC4\xA8\x15\x15\x24\x3B"
-			  "\xCB\xF0\x14\xE4\x62\xC8\xF3\xD1"
-			  "\x03\x58\x1B\x33\x77\x74\x1F\xB4"
-			  "\x07\x86\xF2\x21\xB7\x41\xAE\xBF"
-			  "\x25\xC2\xFF\x51\xEF\xEA\xCE\xC4"
-			  "\x5F\xD9\xB8\x18\x6A\xF0\x0F\x0D"
-			  "\xF8\x04\xBB\x6D\x62\x33\x87\x26"
-			  "\x4F\x2F\x14\x6E\xDC\xDB\x66\x09"
-			  "\x2A\xEF\x7D\x84\x10\xAC\x82\x5E"
-			  "\xD2\xE4\xAD\x74\x7A\x6D\xCC\x3A"
-			  "\x7B\x62\xD8\xD6\x07\x2D\xF7\xDF"
-			  "\x9B\xB3\x82\xCF\x9C\x1D\x76\x5C"
-			  "\xAC\x7B\xD4\x9B\x45\xA1\x64\x11"
-			  "\x66\xF1\xA7\x0B\xF9\xDD\x00\xDD"
-			  "\xA4\x45\x3D\x3E\x03\xC9\x2E\xCB"
-			  "\xC3\x14\x84\x72\xFD\x41\xDC\xBD"
-			  "\x75\xBE\xA8\xE5\x16\x48\x64\x39"
-			  "\xCA\xF3\xE6\xDC\x25\x24\xF1\x6D"
-			  "\xB2\x8D\xC5\x38\x54\xD3\x5D\x6D"
-			  "\x0B\x29\x10\x15\x0E\x13\x3B\xAC"
-			  "\x7E\xCC\x9E\x3E\x18\x48\xA6\x02"
-			  "\xEF\x03\xB2\x2E\xE3\xD2\x70\x21"
-			  "\xB4\x19\x26\xBE\x3A\x3D\x05\xE0"
-			  "\xF8\x09\xAF\xE4\x31\x26\x92\x2F"
-			  "\x8F\x55\xAC\xED\x0B\xB2\xA5\x34"
-			  "\xBE\x50\xB1\x02\x22\x96\xE3\x40"
-			  "\x7B\x70\x50\x6E\x3B\xD5\xE5\xA0"
-			  "\x8E\xA2\xAD\x14\x60\x5C\x7A\x2B"
-			  "\x3D\x1B\x7F\xC1\xC0\x2C\x56\x36"
-			  "\xD2\x0A\x32\x06\x97\x34\xB9\xF4"
-			  "\x6F\x9F\x7E\x80\xD0\x9D\xF7\x6A"
-			  "\x21\xC1\xA2\x6A\xB1\x96\x5B\x4D"
-			  "\x7A\x15\x6C\xC4\x4E\xB8\xE0\x9E"
-			  "\x6C\x50\xF3\x9C\xC9\xB5\x23\xB7"
-			  "\xF1\xD4\x29\x4A\x23\xC4\xAD\x1E"
-			  "\x2C\x07\xD2\x43\x5F\x57\x93\xCA"
-			  "\x85\xF9\x9F\xAD\x4C\xF1\xE4\xB1"
-			  "\x1A\x8E\x28\xA4\xB6\x52\x77\x7E"
-			  "\x68\xC6\x47\xB9\x76\xCC\x65\x5F"
-			  "\x0B\xF9\x67\x93\xD8\x0E\x9A\x37"
-			  "\x5F\x41\xED\x64\x6C\xAD\x5F\xED"
-			  "\x3F\x8D\xFB\x8E\x1E\xA0\xE4\x1F"
-			  "\xC2\xC7\xED\x18\x43\xE1\x20\x86"
-			  "\x5D\xBC\x30\x70\x22\xA1\xDC\x53"
-			  "\x10\x3A\x8D\x47\x82\xCD\x7F\x59"
-			  "\x03\x2D\x6D\xF5\xE7\x79\xD4\x07"
-			  "\x68\x2A\xA5\x42\x19\x4D\xAF\xF5"
-			  "\xED\x47\x83\xBC\x5F\x62\x84\xDA"
-			  "\xDA\x41\xFF\xB0\x1D\x64\xA3\xC8"
-			  "\xBD\x4E\xE0\xB8\x7F\xEE\x55\x0A"
-			  "\x4E\x61\xB2\x51\xF6\x9C\x95\xF6"
-			  "\x92\xBB\xF6\xC5\xF0\x09\x86\xDE"
-			  "\x37\x9E\x29\xF9\x2A\x18\x73\x0D"
-			  "\xDC\x7E\x6B\x7B\x1B\x43\x8C\xEA"
-			  "\x13\xC8\x1A\x47\x0A\x2D\x6D\x56"
-			  "\xCD\xD2\xE7\x53\x1A\xAB\x1C\x3C"
-			  "\xC5\x9B\x03\x70\x29\x2A\x49\x09"
-			  "\x67\xA1\xEA\xD6\x3A\x5B\xBF\x71"
-			  "\x1D\x48\x64\x6C\xFB\xC0\x9E\x36",
-		.ilen	= 1008,
-		.result	= "\x56\xED\x84\x1B\x8F\x26\xBD\x31"
-			  "\xC8\x5F\xF6\x6A\x01\x98\x0C\xA3"
-			  "\x3A\xD1\x45\xDC\x73\x0A\x7E\x15"
-			  "\xAC\x20\xB7\x4E\xE5\x59\xF0\x87"
-			  "\x1E\x92\x29\xC0\x34\xCB\x62\xF9"
-			  "\x6D\x04\x9B\x0F\xA6\x3D\xD4\x48"
-			  "\xDF\x76\x0D\x81\x18\xAF\x23\xBA"
-			  "\x51\xE8\x5C\xF3\x8A\x21\x95\x2C"
-			  "\xC3\x37\xCE\x65\xFC\x70\x07\x9E"
-			  "\x12\xA9\x40\xD7\x4B\xE2\x79\x10"
-			  "\x84\x1B\xB2\x26\xBD\x54\xEB\x5F"
-			  "\xF6\x8D\x01\x98\x2F\xC6\x3A\xD1"
-			  "\x68\xFF\x73\x0A\xA1\x15\xAC\x43"
-			  "\xDA\x4E\xE5\x7C\x13\x87\x1E\xB5"
-			  "\x29\xC0\x57\xEE\x62\xF9\x90\x04"
-			  "\x9B\x32\xC9\x3D\xD4\x6B\x02\x76"
-			  "\x0D\xA4\x18\xAF\x46\xDD\x51\xE8"
-			  "\x7F\x16\x8A\x21\xB8\x2C\xC3\x5A"
-			  "\xF1\x65\xFC\x93\x07\x9E\x35\xCC"
-			  "\x40\xD7\x6E\x05\x79\x10\xA7\x1B"
-			  "\xB2\x49\xE0\x54\xEB\x82\x19\x8D"
-			  "\x24\xBB\x2F\xC6\x5D\xF4\x68\xFF"
-			  "\x96\x0A\xA1\x38\xCF\x43\xDA\x71"
-			  "\x08\x7C\x13\xAA\x1E\xB5\x4C\xE3"
-			  "\x57\xEE\x85\x1C\x90\x27\xBE\x32"
-			  "\xC9\x60\xF7\x6B\x02\x99\x0D\xA4"
-			  "\x3B\xD2\x46\xDD\x74\x0B\x7F\x16"
-			  "\xAD\x21\xB8\x4F\xE6\x5A\xF1\x88"
-			  "\x1F\x93\x2A\xC1\x35\xCC\x63\xFA"
-			  "\x6E\x05\x9C\x10\xA7\x3E\xD5\x49"
-			  "\xE0\x77\x0E\x82\x19\xB0\x24\xBB"
-			  "\x52\xE9\x5D\xF4\x8B\x22\x96\x2D"
-			  "\xC4\x38\xCF\x66\xFD\x71\x08\x9F"
-			  "\x13\xAA\x41\xD8\x4C\xE3\x7A\x11"
-			  "\x85\x1C\xB3\x27\xBE\x55\xEC\x60"
-			  "\xF7\x8E\x02\x99\x30\xC7\x3B\xD2"
-			  "\x69\x00\x74\x0B\xA2\x16\xAD\x44"
-			  "\xDB\x4F\xE6\x7D\x14\x88\x1F\xB6"
-			  "\x2A\xC1\x58\xEF\x63\xFA\x91\x05"
-			  "\x9C\x33\xCA\x3E\xD5\x6C\x03\x77"
-			  "\x0E\xA5\x19\xB0\x47\xDE\x52\xE9"
-			  "\x80\x17\x8B\x22\xB9\x2D\xC4\x5B"
-			  "\xF2\x66\xFD\x94\x08\x9F\x36\xCD"
-			  "\x41\xD8\x6F\x06\x7A\x11\xA8\x1C"
-			  "\xB3\x4A\xE1\x55\xEC\x83\x1A\x8E"
-			  "\x25\xBC\x30\xC7\x5E\xF5\x69\x00"
-			  "\x97\x0B\xA2\x39\xD0\x44\xDB\x72"
-			  "\x09\x7D\x14\xAB\x1F\xB6\x4D\xE4"
-			  "\x58\xEF\x86\x1D\x91\x28\xBF\x33"
-			  "\xCA\x61\xF8\x6C\x03\x9A\x0E\xA5"
-			  "\x3C\xD3\x47\xDE\x75\x0C\x80\x17"
-			  "\xAE\x22\xB9\x50\xE7\x5B\xF2\x89"
-			  "\x20\x94\x2B\xC2\x36\xCD\x64\xFB"
-			  "\x6F\x06\x9D\x11\xA8\x3F\xD6\x4A"
-			  "\xE1\x78\x0F\x83\x1A\xB1\x25\xBC"
-			  "\x53\xEA\x5E\xF5\x8C\x00\x97\x2E"
-			  "\xC5\x39\xD0\x67\xFE\x72\x09\xA0"
-			  "\x14\xAB\x42\xD9\x4D\xE4\x7B\x12"
-			  "\x86\x1D\xB4\x28\xBF\x56\xED\x61"
-			  "\xF8\x8F\x03\x9A\x31\xC8\x3C\xD3"
-			  "\x6A\x01\x75\x0C\xA3\x17\xAE\x45"
-			  "\xDC\x50\xE7\x7E\x15\x89\x20\xB7"
-			  "\x2B\xC2\x59\xF0\x64\xFB\x92\x06"
-			  "\x9D\x34\xCB\x3F\xD6\x6D\x04\x78"
-			  "\x0F\xA6\x1A\xB1\x48\xDF\x53\xEA"
-			  "\x81\x18\x8C\x23\xBA\x2E\xC5\x5C"
-			  "\xF3\x67\xFE\x95\x09\xA0\x37\xCE"
-			  "\x42\xD9\x70\x07\x7B\x12\xA9\x1D"
-			  "\xB4\x4B\xE2\x56\xED\x84\x1B\x8F"
-			  "\x26\xBD\x31\xC8\x5F\xF6\x6A\x01"
-			  "\x98\x0C\xA3\x3A\xD1\x45\xDC\x73"
-			  "\x0A\x7E\x15\xAC\x20\xB7\x4E\xE5"
-			  "\x59\xF0\x87\x1E\x92\x29\xC0\x34"
-			  "\xCB\x62\xF9\x6D\x04\x9B\x0F\xA6"
-			  "\x3D\xD4\x48\xDF\x76\x0D\x81\x18"
-			  "\xAF\x23\xBA\x51\xE8\x5C\xF3\x8A"
-			  "\x21\x95\x2C\xC3\x37\xCE\x65\xFC"
-			  "\x70\x07\x9E\x12\xA9\x40\xD7\x4B"
-			  "\xE2\x79\x10\x84\x1B\xB2\x26\xBD"
-			  "\x54\xEB\x5F\xF6\x8D\x01\x98\x2F"
-			  "\xC6\x3A\xD1\x68\xFF\x73\x0A\xA1"
-			  "\x15\xAC\x43\xDA\x4E\xE5\x7C\x13"
-			  "\x87\x1E\xB5\x29\xC0\x57\xEE\x62"
-			  "\xF9\x90\x04\x9B\x32\xC9\x3D\xD4"
-			  "\x6B\x02\x76\x0D\xA4\x18\xAF\x46"
-			  "\xDD\x51\xE8\x7F\x16\x8A\x21\xB8"
-			  "\x2C\xC3\x5A\xF1\x65\xFC\x93\x07"
-			  "\x9E\x35\xCC\x40\xD7\x6E\x05\x79"
-			  "\x10\xA7\x1B\xB2\x49\xE0\x54\xEB"
-			  "\x82\x19\x8D\x24\xBB\x2F\xC6\x5D"
-			  "\xF4\x68\xFF\x96\x0A\xA1\x38\xCF"
-			  "\x43\xDA\x71\x08\x7C\x13\xAA\x1E"
-			  "\xB5\x4C\xE3\x57\xEE\x85\x1C\x90"
-			  "\x27\xBE\x32\xC9\x60\xF7\x6B\x02"
-			  "\x99\x0D\xA4\x3B\xD2\x46\xDD\x74"
-			  "\x0B\x7F\x16\xAD\x21\xB8\x4F\xE6"
-			  "\x5A\xF1\x88\x1F\x93\x2A\xC1\x35"
-			  "\xCC\x63\xFA\x6E\x05\x9C\x10\xA7"
-			  "\x3E\xD5\x49\xE0\x77\x0E\x82\x19"
-			  "\xB0\x24\xBB\x52\xE9\x5D\xF4\x8B"
-			  "\x22\x96\x2D\xC4\x38\xCF\x66\xFD"
-			  "\x71\x08\x9F\x13\xAA\x41\xD8\x4C"
-			  "\xE3\x7A\x11\x85\x1C\xB3\x27\xBE"
-			  "\x55\xEC\x60\xF7\x8E\x02\x99\x30"
-			  "\xC7\x3B\xD2\x69\x00\x74\x0B\xA2"
-			  "\x16\xAD\x44\xDB\x4F\xE6\x7D\x14"
-			  "\x88\x1F\xB6\x2A\xC1\x58\xEF\x63"
-			  "\xFA\x91\x05\x9C\x33\xCA\x3E\xD5"
-			  "\x6C\x03\x77\x0E\xA5\x19\xB0\x47"
-			  "\xDE\x52\xE9\x80\x17\x8B\x22\xB9"
-			  "\x2D\xC4\x5B\xF2\x66\xFD\x94\x08"
-			  "\x9F\x36\xCD\x41\xD8\x6F\x06\x7A"
-			  "\x11\xA8\x1C\xB3\x4A\xE1\x55\xEC"
-			  "\x83\x1A\x8E\x25\xBC\x30\xC7\x5E"
-			  "\xF5\x69\x00\x97\x0B\xA2\x39\xD0"
-			  "\x44\xDB\x72\x09\x7D\x14\xAB\x1F"
-			  "\xB6\x4D\xE4\x58\xEF\x86\x1D\x91"
-			  "\x28\xBF\x33\xCA\x61\xF8\x6C\x03"
-			  "\x9A\x0E\xA5\x3C\xD3\x47\xDE\x75"
-			  "\x0C\x80\x17\xAE\x22\xB9\x50\xE7"
-			  "\x5B\xF2\x89\x20\x94\x2B\xC2\x36"
-			  "\xCD\x64\xFB\x6F\x06\x9D\x11\xA8"
-			  "\x3F\xD6\x4A\xE1\x78\x0F\x83\x1A"
-			  "\xB1\x25\xBC\x53\xEA\x5E\xF5\x8C"
-			  "\x00\x97\x2E\xC5\x39\xD0\x67\xFE"
-			  "\x72\x09\xA0\x14\xAB\x42\xD9\x4D",
-		.rlen	= 1008,
-	},
-};
-
-static const struct cipher_testvec camellia_lrw_enc_tv_template[] = {
+static const struct cipher_testvec camellia_lrw_tv_template[] = {
 	/* Generated from AES-LRW test vectors */
 	{
 		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
@@ -32601,12 +29069,11 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x92\x68\x19\xd7\xb7\x5b\x0a\x31"
+		.ctext	= "\x92\x68\x19\xd7\xb7\x5b\x0a\x31"
 			  "\x97\xcc\x72\xbe\x99\x17\xeb\x3e",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
 			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
@@ -32615,12 +29082,11 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x73\x09\xb7\x50\xb6\x77\x30\x50"
+		.ctext	= "\x73\x09\xb7\x50\xb6\x77\x30\x50"
 			  "\x5c\x8a\x9c\x26\x77\x9d\xfc\x4a",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
 			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
@@ -32629,12 +29095,11 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x90\xae\x83\xe0\x22\xb9\x60\x91"
+		.ctext	= "\x90\xae\x83\xe0\x22\xb9\x60\x91"
 			  "\xfa\xa9\xb7\x98\xe3\xed\x87\x01",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
 			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
@@ -32644,12 +29109,11 @@
 		.klen	= 40,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x99\xe9\x6e\xd4\xc9\x21\xa5\xf0"
+		.ctext	= "\x99\xe9\x6e\xd4\xc9\x21\xa5\xf0"
 			  "\xd8\x83\xef\xd9\x07\x16\x5f\x35",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
 			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
@@ -32659,12 +29123,11 @@
 		.klen	= 40,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x42\x88\xf4\xcb\x21\x11\x6d\x8e"
+		.ctext	= "\x42\x88\xf4\xcb\x21\x11\x6d\x8e"
 			  "\xde\x1a\xf2\x29\xf1\x4a\xe0\x15",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
 			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
@@ -32675,12 +29138,11 @@
 		.klen	= 48,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x40\xaa\x34\x86\x4a\x8f\x78\xb9"
+		.ctext	= "\x40\xaa\x34\x86\x4a\x8f\x78\xb9"
 			  "\xdb\xdb\x0f\x3d\x48\x70\xbe\x8d",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
 			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
@@ -32691,12 +29153,11 @@
 		.klen	= 48,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x30\x31\x32\x33\x34\x35\x36\x37"
+		.ptext	= "\x30\x31\x32\x33\x34\x35\x36\x37"
 			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.ilen	= 16,
-		.result	= "\x04\xab\x28\x37\x31\x7a\x26\xab"
+		.ctext	= "\x04\xab\x28\x37\x31\x7a\x26\xab"
 			  "\xa1\x70\x1b\x9c\xe7\xdd\x83\xff",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
 			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
@@ -32707,7 +29168,7 @@
 		.klen	= 48,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
+		.ptext	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
 			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
 			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
 			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
@@ -32771,8 +29232,7 @@
 			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
 			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
 			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
-		.ilen	= 512,
-		.result	= "\x90\x69\x8e\xf2\x14\x86\x59\xf9"
+		.ctext	= "\x90\x69\x8e\xf2\x14\x86\x59\xf9"
 			  "\xec\xe7\xfa\x3f\x48\x9d\x7f\x96"
 			  "\x67\x76\xac\x2c\xd2\x63\x18\x93"
 			  "\x13\xf8\xf1\xf6\x71\x77\xb3\xee"
@@ -32836,267 +29296,14 @@
 			  "\x93\x6c\x01\xf7\xcc\x4e\x20\xd1"
 			  "\xb2\x1a\xd8\x4c\xbd\x1d\x10\xe9"
 			  "\x5a\xa8\x92\x7f\xba\xe6\x0c\x95",
-		.rlen	= 512,
+		.len	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
 	},
 };
 
-static const struct cipher_testvec camellia_lrw_dec_tv_template[] = {
-	/* Generated from AES-LRW test vectors */
-	/* same as enc vectors with input and result reversed */
-	{
-		.key	= "\x45\x62\xac\x25\xf8\x28\x17\x6d"
-			  "\x4c\x26\x84\x14\xb5\x68\x01\x85"
-			  "\x25\x8e\x2a\x05\xe7\x3e\x9d\x03"
-			  "\xee\x5a\x83\x0c\xcc\x09\x4c\x87",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x92\x68\x19\xd7\xb7\x5b\x0a\x31"
-			  "\x97\xcc\x72\xbe\x99\x17\xeb\x3e",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\x59\x70\x47\x14\xf5\x57\x47\x8c"
-			  "\xd7\x79\xe8\x0f\x54\x88\x79\x44"
-			  "\x0d\x48\xf0\xb7\xb1\x5a\x53\xea"
-			  "\x1c\xaa\x6b\x29\xc2\xca\xfb\xaf",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input	= "\x73\x09\xb7\x50\xb6\x77\x30\x50"
-			  "\x5c\x8a\x9c\x26\x77\x9d\xfc\x4a",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\xd8\x2a\x91\x34\xb2\x6a\x56\x50"
-			  "\x30\xfe\x69\xe2\x37\x7f\x98\x47"
-			  "\xcd\xf9\x0b\x16\x0c\x64\x8f\xb6"
-			  "\xb0\x0d\x0d\x1b\xae\x85\x87\x1f",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x90\xae\x83\xe0\x22\xb9\x60\x91"
-			  "\xfa\xa9\xb7\x98\xe3\xed\x87\x01",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\x0f\x6a\xef\xf8\xd3\xd2\xbb\x15"
-			  "\x25\x83\xf7\x3c\x1f\x01\x28\x74"
-			  "\xca\xc6\xbc\x35\x4d\x4a\x65\x54"
-			  "\x90\xae\x61\xcf\x7b\xae\xbd\xcc"
-			  "\xad\xe4\x94\xc5\x4a\x29\xae\x70",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x99\xe9\x6e\xd4\xc9\x21\xa5\xf0"
-			  "\xd8\x83\xef\xd9\x07\x16\x5f\x35",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\x8a\xd4\xee\x10\x2f\xbd\x81\xff"
-			  "\xf8\x86\xce\xac\x93\xc5\xad\xc6"
-			  "\xa0\x19\x07\xc0\x9d\xf7\xbb\xdd"
-			  "\x52\x13\xb2\xb7\xf0\xff\x11\xd8"
-			  "\xd6\x08\xd0\xcd\x2e\xb1\x17\x6f",
-		.klen	= 40,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x42\x88\xf4\xcb\x21\x11\x6d\x8e"
-			  "\xde\x1a\xf2\x29\xf1\x4a\xe0\x15",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x40\xaa\x34\x86\x4a\x8f\x78\xb9"
-			  "\xdb\xdb\x0f\x3d\x48\x70\xbe\x8d",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\xfb\x76\x15\xb2\x3d\x80\x89\x1d"
-			  "\xd4\x70\x98\x0b\xc7\x95\x84\xc8"
-			  "\xb2\xfb\x64\xce\x60\x97\x87\x8d"
-			  "\x17\xfc\xe4\x5a\x49\xe8\x30\xb7"
-			  "\x6e\x78\x17\xe7\x2d\x5e\x12\xd4"
-			  "\x60\x64\x04\x7a\xf1\x2f\x9e\x0c",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x02\x00\x00\x00\x00",
-		.input	= "\x04\xab\x28\x37\x31\x7a\x26\xab"
-			  "\xa1\x70\x1b\x9c\xe7\xdd\x83\xff",
-		.ilen	= 16,
-		.result	= "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x41\x42\x43\x44\x45\x46",
-		.rlen	= 16,
-	}, {
-		.key	= "\xf8\xd4\x76\xff\xd6\x46\xee\x6c"
-			  "\x23\x84\xcb\x1c\x77\xd6\x19\x5d"
-			  "\xfe\xf1\xa9\xf3\x7b\xbc\x8d\x21"
-			  "\xa7\x9c\x21\xf8\xcb\x90\x02\x89"
-			  "\xa8\x45\x34\x8e\xc8\xc5\xb5\xf1"
-			  "\x26\xf5\x0e\x76\xfe\xfd\x1b\x1e",
-		.klen	= 48,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x90\x69\x8e\xf2\x14\x86\x59\xf9"
-			  "\xec\xe7\xfa\x3f\x48\x9d\x7f\x96"
-			  "\x67\x76\xac\x2c\xd2\x63\x18\x93"
-			  "\x13\xf8\xf1\xf6\x71\x77\xb3\xee"
-			  "\x93\xb2\xcc\xf3\x26\xc1\x16\x4f"
-			  "\xd4\xe8\x43\xc1\x68\xa3\x3e\x06"
-			  "\x38\x51\xff\xa8\xb9\xa4\xeb\xb1"
-			  "\x62\xdd\x78\x81\xea\x1d\xef\x04"
-			  "\x1d\x07\xc1\x67\xc8\xd6\x77\xa1"
-			  "\x84\x95\xf4\x9a\xd9\xbc\x2d\xe2"
-			  "\xf6\x80\xfc\x91\x2a\xbc\x42\xa0"
-			  "\x40\x41\x69\xaa\x71\xc0\x37\xec"
-			  "\x39\xf3\xf2\xec\x82\xc3\x88\x79"
-			  "\xbc\xc3\xaa\xb7\xcf\x6a\x72\x80"
-			  "\x4c\xf4\x84\x8f\x13\x9e\x94\x5c"
-			  "\xe5\xb2\x91\xbb\x92\x51\x4d\xf1"
-			  "\xd6\x0d\x71\x6b\x7a\xc2\x2f\x12"
-			  "\x6f\x75\xc7\x80\x99\x50\x84\xcf"
-			  "\xa8\xeb\xd6\xe1\x1c\x59\x81\x7e"
-			  "\xb9\xb3\xde\x7a\x93\x14\x12\xa2"
-			  "\xf7\x43\xb3\x9d\x1a\x87\x65\x91"
-			  "\x42\x08\x40\x82\x06\x1c\x2d\x55"
-			  "\x6e\x48\xd5\x74\x07\x6e\x9d\x80"
-			  "\xeb\xb4\x97\xa1\x36\xdf\xfa\x74"
-			  "\x79\x7f\x5a\x75\xe7\x71\xc8\x8c"
-			  "\x7e\xf8\x3a\x77\xcd\x32\x05\xf9"
-			  "\x3d\xd4\xe9\xa2\xbb\xc4\x8b\x83"
-			  "\x42\x5c\x82\xfa\xe9\x4b\x96\x3b"
-			  "\x7f\x89\x8b\xf9\xf1\x87\xda\xf0"
-			  "\x87\xef\x13\x5d\xf0\xe2\xc5\xc1"
-			  "\xed\x14\xa9\x57\x19\x63\x40\x04"
-			  "\x24\xeb\x6e\x19\xd1\x3d\x70\x78"
-			  "\xeb\xda\x55\x70\x2c\x4f\x41\x5b"
-			  "\x56\x9f\x1a\xd3\xac\xf1\xc0\xc3"
-			  "\x21\xec\xd7\xd2\x55\x32\x7c\x2e"
-			  "\x3c\x48\x8e\xb4\x85\x35\x47\xfe"
-			  "\xe2\x88\x79\x98\x6a\xc9\x8d\xff"
-			  "\xe9\x89\x6e\xb8\xe2\x97\x00\xbd"
-			  "\xa4\x8f\xba\xd0\x8c\xcb\x79\x99"
-			  "\xb3\xb2\xb2\x7a\xc3\xb7\xef\x75"
-			  "\x23\x52\x76\xc3\x50\x6e\x66\xf8"
-			  "\xa2\xe2\xce\xba\x40\x21\x3f\xc9"
-			  "\x0a\x32\x7f\xf7\x08\x8c\x66\xcf"
-			  "\xd3\xdf\x57\x59\x83\xb8\xe1\x85"
-			  "\xd6\x8f\xfb\x48\x1f\x3a\xc4\x2f"
-			  "\xb4\x2d\x58\xab\xd8\x7f\x5e\x3a"
-			  "\xbc\x62\x3e\xe2\x6a\x52\x0d\x76"
-			  "\x2f\x1c\x1a\x30\xed\x95\x2a\x44"
-			  "\x35\xa5\x83\x04\x84\x01\x99\x56"
-			  "\xb7\xe3\x10\x96\xfa\xdc\x19\xdd"
-			  "\xe2\x7f\xcb\xa0\x49\x1b\xff\x4c"
-			  "\x73\xf6\xbb\x94\x00\xe8\xa9\x3d"
-			  "\xe2\x20\xe9\x3f\xfa\x07\x5d\x77"
-			  "\x06\xd5\x4f\x4d\x02\xb8\x40\x1b"
-			  "\x30\xed\x1a\x50\x19\xef\xc4\x2c"
-			  "\x02\xd9\xc5\xd3\x11\x33\x37\xe5"
-			  "\x2b\xa3\x95\xa6\xee\xd8\x74\x1d"
-			  "\x68\xa0\xeb\xbf\xdd\x5e\x99\x96"
-			  "\x91\xc3\x94\x24\xa5\x12\xa2\x37"
-			  "\xb3\xac\xcf\x2a\xfd\x55\x34\xfe"
-			  "\x79\x92\x3e\xe6\x1b\x49\x57\x5d"
-			  "\x93\x6c\x01\xf7\xcc\x4e\x20\xd1"
-			  "\xb2\x1a\xd8\x4c\xbd\x1d\x10\xe9"
-			  "\x5a\xa8\x92\x7f\xba\xe6\x0c\x95",
-		.ilen	= 512,
-		.result	= "\x05\x11\xb7\x18\xab\xc6\x2d\xac"
-			  "\x70\x5d\xf6\x22\x94\xcd\xe5\x6c"
-			  "\x17\x6b\xf6\x1c\xf0\xf3\x6e\xf8"
-			  "\x50\x38\x1f\x71\x49\xb6\x57\xd6"
-			  "\x8f\xcb\x8d\x6b\xe3\xa6\x29\x90"
-			  "\xfe\x2a\x62\x82\xae\x6d\x8b\xf6"
-			  "\xad\x1e\x9e\x20\x5f\x38\xbe\x04"
-			  "\xda\x10\x8e\xed\xa2\xa4\x87\xab"
-			  "\xda\x6b\xb4\x0c\x75\xba\xd3\x7c"
-			  "\xc9\xac\x42\x31\x95\x7c\xc9\x04"
-			  "\xeb\xd5\x6e\x32\x69\x8a\xdb\xa6"
-			  "\x15\xd7\x3f\x4f\x2f\x66\x69\x03"
-			  "\x9c\x1f\x54\x0f\xde\x1f\xf3\x65"
-			  "\x4c\x96\x12\xed\x7c\x92\x03\x01"
-			  "\x6f\xbc\x35\x93\xac\xf1\x27\xf1"
-			  "\xb4\x96\x82\x5a\x5f\xb0\xa0\x50"
-			  "\x89\xa4\x8e\x66\x44\x85\xcc\xfd"
-			  "\x33\x14\x70\xe3\x96\xb2\xc3\xd3"
-			  "\xbb\x54\x5a\x1a\xf9\x74\xa2\xc5"
-			  "\x2d\x64\x75\xdd\xb4\x54\xe6\x74"
-			  "\x8c\xd3\x9d\x9e\x86\xab\x51\x53"
-			  "\xb7\x93\x3e\x6f\xd0\x4e\x2c\x40"
-			  "\xf6\xa8\x2e\x3e\x9d\xf4\x66\xa5"
-			  "\x76\x12\x73\x44\x1a\x56\xd7\x72"
-			  "\x88\xcd\x21\x8c\x4c\x0f\xfe\xda"
-			  "\x95\xe0\x3a\xa6\xa5\x84\x46\xcd"
-			  "\xd5\x3e\x9d\x3a\xe2\x67\xe6\x60"
-			  "\x1a\xe2\x70\x85\x58\xc2\x1b\x09"
-			  "\xe1\xd7\x2c\xca\xad\xa8\x8f\xf9"
-			  "\xac\xb3\x0e\xdb\xca\x2e\xe2\xb8"
-			  "\x51\x71\xd9\x3c\x6c\xf1\x56\xf8"
-			  "\xea\x9c\xf1\xfb\x0c\xe6\xb7\x10"
-			  "\x1c\xf8\xa9\x7c\xe8\x53\x35\xc1"
-			  "\x90\x3e\x76\x4a\x74\xa4\x21\x2c"
-			  "\xf6\x2c\x4e\x0f\x94\x3a\x88\x2e"
-			  "\x41\x09\x6a\x33\x7d\xf6\xdd\x3f"
-			  "\x8d\x23\x31\x74\x84\xeb\x88\x6e"
-			  "\xcc\xb9\xbc\x22\x83\x19\x07\x22"
-			  "\xa5\x2d\xdf\xa5\xf3\x80\x85\x78"
-			  "\x84\x39\x6a\x6d\x6a\x99\x4f\xa5"
-			  "\x15\xfe\x46\xb0\xe4\x6c\xa5\x41"
-			  "\x3c\xce\x8f\x42\x60\x71\xa7\x75"
-			  "\x08\x40\x65\x8a\x82\xbf\xf5\x43"
-			  "\x71\x96\xa9\x4d\x44\x8a\x20\xbe"
-			  "\xfa\x4d\xbb\xc0\x7d\x31\x96\x65"
-			  "\xe7\x75\xe5\x3e\xfd\x92\x3b\xc9"
-			  "\x55\xbb\x16\x7e\xf7\xc2\x8c\xa4"
-			  "\x40\x1d\xe5\xef\x0e\xdf\xe4\x9a"
-			  "\x62\x73\x65\xfd\x46\x63\x25\x3d"
-			  "\x2b\xaf\xe5\x64\xfe\xa5\x5c\xcf"
-			  "\x24\xf3\xb4\xac\x64\xba\xdf\x4b"
-			  "\xc6\x96\x7d\x81\x2d\x8d\x97\xf7"
-			  "\xc5\x68\x77\x84\x32\x2b\xcc\x85"
-			  "\x74\x96\xf0\x12\x77\x61\xb9\xeb"
-			  "\x71\xaa\x82\xcb\x1c\xdb\x89\xc8"
-			  "\xc6\xb5\xe3\x5c\x7d\x39\x07\x24"
-			  "\xda\x39\x87\x45\xc0\x2b\xbb\x01"
-			  "\xac\xbc\x2a\x5c\x7f\xfc\xe8\xce"
-			  "\x6d\x9c\x6f\xed\xd3\xc1\xa1\xd6"
-			  "\xc5\x55\xa9\x66\x2f\xe1\xc8\x32"
-			  "\xa6\x5d\xa4\x3a\x98\x73\xe8\x45"
-			  "\xa4\xc7\xa8\xb4\xf6\x13\x03\xf6"
-			  "\xe9\x2e\xc4\x29\x0f\x84\xdb\xc4"
-			  "\x21\xc4\xc2\x75\x67\x89\x37\x0a",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec camellia_xts_enc_tv_template[] = {
+static const struct cipher_testvec camellia_xts_tv_template[] = {
 	/* Generated from AES-XTS test vectors */
 	{
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -33106,16 +29313,15 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 32,
-		.result	= "\x06\xcb\xa5\xf1\x04\x63\xb2\x41"
+		.ctext	= "\x06\xcb\xa5\xf1\x04\x63\xb2\x41"
 			  "\xdc\xca\xfa\x09\xba\x74\xb9\x05"
 			  "\x78\xba\xa4\xf8\x67\x4d\x7e\xad"
 			  "\x20\x18\xf5\x0c\x41\x16\x2a\x61",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
 			  "\x11\x11\x11\x11\x11\x11\x11\x11"
@@ -33124,16 +29330,15 @@
 		.klen	= 32,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\xc2\xb9\xdc\x44\x1d\xdf\xf2\x86"
+		.ctext	= "\xc2\xb9\xdc\x44\x1d\xdf\xf2\x86"
 			  "\x8d\x35\x42\x0a\xa5\x5e\x3d\x4f"
 			  "\xb5\x37\x06\xff\xbd\xd4\x91\x70"
 			  "\x80\x1f\xb2\x39\x10\x89\x44\xf5",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
 			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
@@ -33142,16 +29347,15 @@
 		.klen	= 32,
 		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44"
 			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.ilen	= 32,
-		.result	= "\x52\x1f\x9d\xf5\x5a\x58\x5a\x7e"
+		.ctext	= "\x52\x1f\x9d\xf5\x5a\x58\x5a\x7e"
 			  "\x9f\xd0\x8e\x02\x9c\x9a\x6a\xa7"
 			  "\xb4\x3b\xce\xe7\x17\xaa\x89\x6a"
 			  "\x35\x3c\x6b\xb5\x61\x1c\x79\x38",
-		.rlen	= 32,
+		.len	= 32,
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -33160,7 +29364,7 @@
 		.klen	= 32,
 		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -33224,8 +29428,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\xc7\xf9\x0a\xaa\xcb\xb5\x8f\x33"
+		.ctext	= "\xc7\xf9\x0a\xaa\xcb\xb5\x8f\x33"
 			  "\x60\xc3\xe9\x47\x90\xb7\x50\x57"
 			  "\xa3\xad\x81\x2f\xf5\x22\x96\x02"
 			  "\xaa\x7f\xea\xac\x29\x78\xca\x2a"
@@ -33289,7 +29492,7 @@
 			  "\xcc\x06\xdb\xe7\x82\x29\x63\xd1"
 			  "\x52\x84\x4f\xee\x27\xe8\x02\xd4"
 			  "\x34\x3c\x69\xc2\xbd\x20\xe6\x7a",
-		.rlen	= 512,
+		.len	= 512,
 	}, {
 		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
 			  "\x23\x53\x60\x28\x74\x71\x35\x26"
@@ -33302,7 +29505,7 @@
 		.klen	= 64,
 		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			  "\x10\x11\x12\x13\x14\x15\x16\x17"
 			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
@@ -33366,8 +29569,7 @@
 			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
 			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
 			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.ilen	= 512,
-		.result	= "\x49\xcd\xb8\xbf\x2f\x73\x37\x28"
+		.ctext	= "\x49\xcd\xb8\xbf\x2f\x73\x37\x28"
 			  "\x9a\x7f\x6e\x57\x55\xb8\x07\x88"
 			  "\x4a\x0d\x8b\x55\x60\xed\xb6\x7b"
 			  "\xf1\x74\xac\x96\x05\x7b\x32\xca"
@@ -33431,350 +29633,7 @@
 			  "\xb1\x02\x0a\x5c\x79\x19\x3b\x75"
 			  "\xb7\x16\xd8\x12\x5c\xcd\x7d\x4e"
 			  "\xd5\xc6\x99\xcc\x4e\x6c\x94\x95",
-		.rlen	= 512,
-		.also_non_np = 1,
-		.np	= 3,
-		.tap	= { 512 - 20, 4, 16 },
-	},
-};
-
-static const struct cipher_testvec camellia_xts_dec_tv_template[] = {
-	/* Generated from AES-XTS test vectors */
-	/* same as enc vectors with input and result reversed */
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x06\xcb\xa5\xf1\x04\x63\xb2\x41"
-			  "\xdc\xca\xfa\x09\xba\x74\xb9\x05"
-			  "\x78\xba\xa4\xf8\x67\x4d\x7e\xad"
-			  "\x20\x18\xf5\x0c\x41\x16\x2a\x61",
-		.ilen	= 32,
-		.result	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.rlen	= 32,
-	}, {
-		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x11\x11\x11\x11\x11\x11\x11\x11"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xc2\xb9\xdc\x44\x1d\xdf\xf2\x86"
-			  "\x8d\x35\x42\x0a\xa5\x5e\x3d\x4f"
-			  "\xb5\x37\x06\xff\xbd\xd4\x91\x70"
-			  "\x80\x1f\xb2\x39\x10\x89\x44\xf5",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
-			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22"
-			  "\x22\x22\x22\x22\x22\x22\x22\x22",
-		.klen	= 32,
-		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x52\x1f\x9d\xf5\x5a\x58\x5a\x7e"
-			  "\x9f\xd0\x8e\x02\x9c\x9a\x6a\xa7"
-			  "\xb4\x3b\xce\xe7\x17\xaa\x89\x6a"
-			  "\x35\x3c\x6b\xb5\x61\x1c\x79\x38",
-		.ilen	= 32,
-		.result	= "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44"
-			  "\x44\x44\x44\x44\x44\x44\x44\x44",
-		.rlen	= 32,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95",
-		.klen	= 32,
-		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\xc7\xf9\x0a\xaa\xcb\xb5\x8f\x33"
-			  "\x60\xc3\xe9\x47\x90\xb7\x50\x57"
-			  "\xa3\xad\x81\x2f\xf5\x22\x96\x02"
-			  "\xaa\x7f\xea\xac\x29\x78\xca\x2a"
-			  "\x7c\xcd\x31\x1a\x3c\x40\x0a\x73"
-			  "\x09\x66\xad\x72\x0e\x4d\x5d\x77"
-			  "\xbc\xb8\x76\x80\x37\x59\xa9\x01"
-			  "\x9e\xfb\xdb\x6c\x93\xef\xb6\x8d"
-			  "\x1e\xc1\x94\xa8\xd4\xb5\xb0\x01"
-			  "\xd5\x01\x97\x28\xcd\x7a\x1f\xe8"
-			  "\x08\xda\x76\x00\x65\xcf\x7b\x31"
-			  "\xc6\xfa\xf2\x3b\x00\xa7\x6a\x9e"
-			  "\x6c\x43\x80\x87\xe0\xbb\x4e\xe5"
-			  "\xdc\x8a\xdf\xc3\x1d\x1b\x41\x04"
-			  "\xfb\x54\xdd\x29\x27\xc2\x65\x17"
-			  "\x36\x88\xb0\x85\x8d\x73\x7e\x4b"
-			  "\x1d\x16\x8a\x52\xbc\xa6\xbc\xa4"
-			  "\x8c\xd1\x04\x16\xbf\x8c\x01\x0f"
-			  "\x7e\x6b\x59\x15\x29\xd1\x9b\xd3"
-			  "\x6c\xee\xac\xdc\x45\x58\xca\x5b"
-			  "\x70\x0e\x6a\x12\x86\x82\x79\x9f"
-			  "\x16\xd4\x9d\x67\xcd\x70\x65\x26"
-			  "\x21\x72\x1e\xa1\x94\x8a\x83\x0c"
-			  "\x92\x42\x58\x5e\xa2\xc5\x31\xf3"
-			  "\x7b\xd1\x31\xd4\x15\x80\x31\x61"
-			  "\x5c\x53\x10\xdd\xea\xc8\x83\x5c"
-			  "\x7d\xa7\x05\x66\xcc\x1e\xbb\x05"
-			  "\x47\xae\xb4\x0f\x84\xd8\xf6\xb5"
-			  "\xa1\xc6\x52\x00\x52\xe8\xdc\xd9"
-			  "\x16\x31\xb2\x47\x91\x67\xaa\x28"
-			  "\x2c\x29\x85\xa3\xf7\xf2\x24\x93"
-			  "\x23\x80\x1f\xa8\x1b\x82\x8d\xdc"
-			  "\x9f\x0b\xcd\xb4\x3c\x20\xbc\xec"
-			  "\x4f\xc7\xee\xf8\xfd\xd9\xfb\x7e"
-			  "\x3f\x0d\x23\xfa\x3f\xa7\xcc\x66"
-			  "\x1c\xfe\xa6\x86\xf6\xf7\x85\xc7"
-			  "\x43\xc1\xd4\xfc\xe4\x79\xc9\x1d"
-			  "\xf8\x89\xcd\x20\x27\x84\x5d\x5c"
-			  "\x8e\x4f\x1f\xeb\x08\x21\x4f\xa3"
-			  "\xe0\x7e\x0b\x9c\xe7\x42\xcf\xb7"
-			  "\x3f\x43\xcc\x86\x71\x34\x6a\xd9"
-			  "\x5e\xec\x8f\x36\xc9\x0a\x03\xfe"
-			  "\x18\x41\xdc\x9e\x2e\x75\x20\x3e"
-			  "\xcc\x77\xe0\x8f\xe8\x43\x37\x4c"
-			  "\xed\x1a\x5a\xb3\xfa\x43\xc9\x71"
-			  "\x9f\xc5\xce\xcf\xff\xe7\x77\x1e"
-			  "\x35\x93\xde\x6b\xc0\x6a\x7e\xa9"
-			  "\x34\xb8\x27\x74\x08\xda\xf2\x4a"
-			  "\x23\x5b\x9f\x55\x3a\x57\x82\x52"
-			  "\xea\x6d\xc3\xc7\xf2\xc8\xb5\xdc"
-			  "\xc5\xb9\xbb\xaa\xf2\x29\x9f\x49"
-			  "\x7a\xef\xfe\xdc\x9f\xc9\x28\xe2"
-			  "\x96\x0b\x35\x84\x05\x0d\xd6\x2a"
-			  "\xea\x5a\xbf\x69\xde\xee\x4f\x8f"
-			  "\x84\xb9\xcf\xa7\x57\xea\xe0\xe8"
-			  "\x96\xef\x0f\x0e\xec\xc7\xa6\x74"
-			  "\xb1\xfe\x7a\x6d\x11\xdd\x0e\x15"
-			  "\x4a\x1e\x73\x7f\x55\xea\xf6\xe1"
-			  "\x5b\xb6\x71\xda\xb0\x0c\xba\x26"
-			  "\x5c\x48\x38\x6d\x1c\x32\xb2\x7d"
-			  "\x05\x87\xc2\x1e\x7e\x2d\xd4\x33"
-			  "\xcc\x06\xdb\xe7\x82\x29\x63\xd1"
-			  "\x52\x84\x4f\xee\x27\xe8\x02\xd4"
-			  "\x34\x3c\x69\xc2\xbd\x20\xe6\x7a",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
-	}, {
-		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
-			  "\x23\x53\x60\x28\x74\x71\x35\x26"
-			  "\x62\x49\x77\x57\x24\x70\x93\x69"
-			  "\x99\x59\x57\x49\x66\x96\x76\x27"
-			  "\x31\x41\x59\x26\x53\x58\x97\x93"
-			  "\x23\x84\x62\x64\x33\x83\x27\x95"
-			  "\x02\x88\x41\x97\x16\x93\x99\x37"
-			  "\x51\x05\x82\x09\x74\x94\x45\x92",
-		.klen	= 64,
-		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x49\xcd\xb8\xbf\x2f\x73\x37\x28"
-			  "\x9a\x7f\x6e\x57\x55\xb8\x07\x88"
-			  "\x4a\x0d\x8b\x55\x60\xed\xb6\x7b"
-			  "\xf1\x74\xac\x96\x05\x7b\x32\xca"
-			  "\xd1\x4e\xf1\x58\x29\x16\x24\x6c"
-			  "\xf2\xb3\xe4\x88\x84\xac\x4d\xee"
-			  "\x97\x07\x82\xf0\x07\x12\x38\x0a"
-			  "\x67\x62\xaf\xfd\x85\x9f\x0a\x55"
-			  "\xa5\x20\xc5\x60\xe4\x68\x53\xa4"
-			  "\x0e\x2e\x65\xe3\xe4\x0c\x30\x7c"
-			  "\x1c\x01\x4f\x55\xa9\x13\xeb\x25"
-			  "\x21\x87\xbc\xd3\xe7\x67\x4f\x38"
-			  "\xa8\x14\x25\x71\xe9\x2e\x4c\x21"
-			  "\x41\x82\x0c\x45\x39\x35\xa8\x75"
-			  "\x03\x29\x01\x84\x8c\xab\x48\xbe"
-			  "\x11\x56\x22\x67\xb7\x67\x1a\x09"
-			  "\xa1\x72\x25\x41\x3c\x39\x65\x80"
-			  "\x7d\x2f\xf8\x2c\x73\x04\x58\x9d"
-			  "\xdd\x16\x8b\x63\x70\x4e\xc5\x17"
-			  "\x21\xe0\x84\x51\x4b\x6f\x05\x52"
-			  "\xe3\x63\x34\xfa\xa4\xaf\x33\x20"
-			  "\xc1\xae\x32\xc4\xb8\x2b\xdb\x76"
-			  "\xd9\x02\x31\x2f\xa3\xc6\xd0\x7b"
-			  "\xaf\x1b\x84\xe3\x9b\xbf\xa6\xe0"
-			  "\xb8\x8a\x13\x88\x71\xf4\x11\xa5"
-			  "\xe9\xa9\x10\x33\xe0\xbe\x49\x89"
-			  "\x41\x22\xf5\x9d\x80\x3e\x3b\x76"
-			  "\x01\x16\x50\x6e\x7c\x6a\x81\xe9"
-			  "\x13\x2c\xde\xb2\x5f\x79\xba\xb2"
-			  "\xb1\x75\xae\xd2\x07\x98\x4b\x69"
-			  "\xae\x7d\x5b\x90\xc2\x6c\xe6\x98"
-			  "\xd3\x4c\xa1\xa3\x9c\xc9\x33\x6a"
-			  "\x0d\x23\xb1\x79\x25\x13\x4b\xe5"
-			  "\xaf\x93\x20\x5c\x7f\x06\x7a\x34"
-			  "\x0b\x78\xe3\x67\x26\xe0\xad\x95"
-			  "\xc5\x4e\x26\x22\xcf\x73\x77\x62"
-			  "\x3e\x10\xd7\x90\x4b\x52\x1c\xc9"
-			  "\xef\x38\x52\x18\x0e\x29\x7e\xef"
-			  "\x34\xfe\x31\x95\xc5\xbc\xa8\xe2"
-			  "\xa8\x4e\x9f\xea\xa6\xf0\xfe\x5d"
-			  "\xc5\x39\x86\xed\x2f\x6d\xa0\xfe"
-			  "\x96\xcd\x41\x10\x78\x4e\x0c\xc9"
-			  "\xc3\x6d\x0f\xb7\xe8\xe0\x62\xab"
-			  "\x8b\xf1\x21\x89\xa1\x12\xaa\xfa"
-			  "\x9d\x70\xbe\x4c\xa8\x98\x89\x01"
-			  "\xb9\xe2\x61\xde\x0c\x4a\x0b\xaa"
-			  "\x89\xf5\x14\x79\x18\x8f\x3b\x0d"
-			  "\x21\x17\xf8\x59\x15\x24\x64\x22"
-			  "\x57\x48\x80\xd5\x3d\x92\x30\x07"
-			  "\xd9\xa1\x4a\x23\x16\x43\x48\x0e"
-			  "\x2b\x2d\x1b\x87\xef\x7e\xbd\xfa"
-			  "\x49\xbc\x7e\x68\x6e\xa8\x46\x95"
-			  "\xad\x5e\xfe\x0a\xa8\xd3\x1a\x5d"
-			  "\x6b\x84\xf3\x00\xba\x52\x05\x02"
-			  "\xe3\x96\x4e\xb6\x79\x3f\x43\xd3"
-			  "\x4d\x3f\xd6\xab\x0a\xc4\x75\x2d"
-			  "\xd1\x08\xc3\x6a\xc8\x37\x29\xa0"
-			  "\xcc\x9a\x05\xdd\x5c\xe1\xff\x66"
-			  "\xf2\x7a\x1d\xf2\xaf\xa9\x48\x89"
-			  "\xf5\x21\x0f\x02\x48\x83\x74\xbf"
-			  "\x2e\xe6\x93\x7b\xa0\xf4\xb1\x2b"
-			  "\xb1\x02\x0a\x5c\x79\x19\x3b\x75"
-			  "\xb7\x16\xd8\x12\x5c\xcd\x7d\x4e"
-			  "\xd5\xc6\x99\xcc\x4e\x6c\x94\x95",
-		.ilen	= 512,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
-			  "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-			  "\x10\x11\x12\x13\x14\x15\x16\x17"
-			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
-			  "\x20\x21\x22\x23\x24\x25\x26\x27"
-			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
-			  "\x30\x31\x32\x33\x34\x35\x36\x37"
-			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
-			  "\x40\x41\x42\x43\x44\x45\x46\x47"
-			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
-			  "\x50\x51\x52\x53\x54\x55\x56\x57"
-			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
-			  "\x60\x61\x62\x63\x64\x65\x66\x67"
-			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
-			  "\x70\x71\x72\x73\x74\x75\x76\x77"
-			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
-			  "\x80\x81\x82\x83\x84\x85\x86\x87"
-			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
-			  "\x90\x91\x92\x93\x94\x95\x96\x97"
-			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
-			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
-			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
-			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
-			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
-			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
-			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
-			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
-			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
-			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
-			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
-			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
-			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
-		.rlen	= 512,
+		.len	= 512,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 512 - 20, 4, 16 },
@@ -33784,91 +29643,45 @@
 /*
  * SEED test vectors
  */
-static const struct cipher_testvec seed_enc_tv_template[] = {
+static const struct cipher_testvec seed_tv_template[] = {
 	{
 		.key    = zeroed_string,
 		.klen	= 16,
-		.input  = "\x00\x01\x02\x03\x04\x05\x06\x07"
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.ilen	= 16,
-		.result	= "\x5e\xba\xc6\xe0\x05\x4e\x16\x68"
+		.ctext	= "\x5e\xba\xc6\xe0\x05\x4e\x16\x68"
 			  "\x19\xaf\xf1\xcc\x6d\x34\x6c\xdb",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
 			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
 		.klen	= 16,
-		.input	= zeroed_string,
-		.ilen	= 16,
-		.result	= "\xc1\x1f\x22\xf2\x01\x40\x50\x50"
+		.ptext	= zeroed_string,
+		.ctext	= "\xc1\x1f\x22\xf2\x01\x40\x50\x50"
 			  "\x84\x48\x35\x97\xe4\x37\x0f\x43",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x47\x06\x48\x08\x51\xe6\x1b\xe8"
 			  "\x5d\x74\xbf\xb3\xfd\x95\x61\x85",
 		.klen	= 16,
-		.input	= "\x83\xa2\xf8\xa2\x88\x64\x1f\xb9"
+		.ptext	= "\x83\xa2\xf8\xa2\x88\x64\x1f\xb9"
 			  "\xa4\xe9\xa5\xcc\x2f\x13\x1c\x7d",
-		.ilen	= 16,
-		.result	= "\xee\x54\xd1\x3e\xbc\xae\x70\x6d"
+		.ctext	= "\xee\x54\xd1\x3e\xbc\xae\x70\x6d"
 			  "\x22\x6b\xc3\x14\x2c\xd4\x0d\x4a",
-		.rlen	= 16,
+		.len	= 16,
 	}, {
 		.key	= "\x28\xdb\xc3\xbc\x49\xff\xd8\x7d"
 			  "\xcf\xa5\x09\xb1\x1d\x42\x2b\xe7",
 		.klen	= 16,
-		.input	= "\xb4\x1e\x6b\xe2\xeb\xa8\x4a\x14"
+		.ptext	= "\xb4\x1e\x6b\xe2\xeb\xa8\x4a\x14"
 			  "\x8e\x2e\xed\x84\x59\x3c\x5e\xc7",
-		.ilen	= 16,
-		.result	= "\x9b\x9b\x7b\xfc\xd1\x81\x3c\xb9"
+		.ctext	= "\x9b\x9b\x7b\xfc\xd1\x81\x3c\xb9"
 			  "\x5d\x0b\x36\x18\xf4\x0f\x51\x22",
-		.rlen	= 16,
+		.len	= 16,
 	}
 };
 
-static const struct cipher_testvec seed_dec_tv_template[] = {
-	{
-		.key    = zeroed_string,
-		.klen	= 16,
-		.input	= "\x5e\xba\xc6\xe0\x05\x4e\x16\x68"
-			  "\x19\xaf\xf1\xcc\x6d\x34\x6c\xdb",
-		.ilen	= 16,
-		.result	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.rlen	= 16,
-	}, {
-		.key	= "\x00\x01\x02\x03\x04\x05\x06\x07"
-			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
-		.klen	= 16,
-		.input	= "\xc1\x1f\x22\xf2\x01\x40\x50\x50"
-			  "\x84\x48\x35\x97\xe4\x37\x0f\x43",
-		.ilen	= 16,
-		.result	= zeroed_string,
-		.rlen	= 16,
-	}, {
-		.key	= "\x47\x06\x48\x08\x51\xe6\x1b\xe8"
-			  "\x5d\x74\xbf\xb3\xfd\x95\x61\x85",
-		.klen	= 16,
-		.input	= "\xee\x54\xd1\x3e\xbc\xae\x70\x6d"
-			  "\x22\x6b\xc3\x14\x2c\xd4\x0d\x4a",
-		.ilen	= 16,
-		.result	= "\x83\xa2\xf8\xa2\x88\x64\x1f\xb9"
-			  "\xa4\xe9\xa5\xcc\x2f\x13\x1c\x7d",
-		.rlen	= 16,
-	}, {
-		.key	= "\x28\xdb\xc3\xbc\x49\xff\xd8\x7d"
-			  "\xcf\xa5\x09\xb1\x1d\x42\x2b\xe7",
-		.klen	= 16,
-		.input	= "\x9b\x9b\x7b\xfc\xd1\x81\x3c\xb9"
-			  "\x5d\x0b\x36\x18\xf4\x0f\x51\x22",
-		.ilen	= 16,
-		.result	= "\xb4\x1e\x6b\xe2\xeb\xa8\x4a\x14"
-			  "\x8e\x2e\xed\x84\x59\x3c\x5e\xc7",
-		.rlen	= 16,
-	}
-};
-
-static const struct cipher_testvec salsa20_stream_enc_tv_template[] = {
+static const struct cipher_testvec salsa20_stream_tv_template[] = {
 	/*
 	* Testvectors from verified.test-vectors submitted to ECRYPT.
 	* They are truncated to size 39, 64, 111, 129 to test a variety
@@ -33879,24 +29692,23 @@
 			"\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F",
 		.klen	= 16,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 39,
-		.result	= "\x2D\xD5\xC3\xF7\xBA\x2B\x20\xF7"
+		.ctext	= "\x2D\xD5\xC3\xF7\xBA\x2B\x20\xF7"
 			 "\x68\x02\x41\x0C\x68\x86\x88\x89"
 			 "\x5A\xD8\xC1\xBD\x4E\xA6\xC9\xB1"
 			 "\x40\xFB\x9B\x90\xE2\x10\x49\xBF"
 			 "\x58\x3F\x52\x79\x70\xEB\xC1",
-		.rlen	= 39,
+		.len	= 39,
 	}, { /* Set 5, vector 0 */
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 16,
 		.iv     = "\x80\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -33904,8 +29716,7 @@
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 64,
-		.result	= "\xB6\x6C\x1E\x44\x46\xDD\x95\x57"
+		.ctext	= "\xB6\x6C\x1E\x44\x46\xDD\x95\x57"
 			 "\xE5\x78\xE2\x23\xB0\xB7\x68\x01"
 			 "\x7B\x23\xB2\x67\xBB\x02\x34\xAE"
 			 "\x46\x26\xBF\x44\x3F\x21\x97\x76"
@@ -33913,7 +29724,7 @@
 			 "\xCD\x0D\xE9\xA9\x53\x8F\x4A\x09"
 			 "\xCA\x9A\xC0\x73\x2E\x30\xBC\xF9"
 			 "\x8E\x4F\x13\xE4\xB9\xE2\x01\xD9",
-		.rlen	= 64,
+		.len	= 64,
 	}, { /* Set 3, vector 27 */
 		.key	= "\x1B\x1C\x1D\x1E\x1F\x20\x21\x22"
 			"\x23\x24\x25\x26\x27\x28\x29\x2A"
@@ -33921,7 +29732,7 @@
 			"\x33\x34\x35\x36\x37\x38\x39\x3A",
 		.klen	= 32,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -33935,8 +29746,7 @@
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 111,
-		.result	= "\xAE\x39\x50\x8E\xAC\x9A\xEC\xE7"
+		.ctext	= "\xAE\x39\x50\x8E\xAC\x9A\xEC\xE7"
 			 "\xBF\x97\xBB\x20\xB9\xDE\xE4\x1F"
 			 "\x87\xD9\x47\xF8\x28\x91\x35\x98"
 			 "\xDB\x72\xCC\x23\x29\x48\x56\x5E"
@@ -33950,7 +29760,7 @@
 			 "\xB1\x41\x3F\x19\x2F\xC4\x3B\xC6"
 			 "\x95\x46\x45\x54\xE9\x75\x03\x08"
 			 "\x44\xAF\xE5\x8A\x81\x12\x09",
-		.rlen	= 111,
+		.len	= 111,
 	}, { /* Set 5, vector 27 */
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -33958,7 +29768,7 @@
 			"\x00\x00\x00\x00\x00\x00\x00\x00",
 		.klen	= 32,
 		.iv     = "\x00\x00\x00\x10\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -33975,8 +29785,7 @@
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00",
-		.ilen	= 129,
-		.result	= "\xD2\xDB\x1A\x5C\xF1\xC1\xAC\xDB"
+		.ctext	= "\xD2\xDB\x1A\x5C\xF1\xC1\xAC\xDB"
 			 "\xE8\x1A\x7A\x43\x40\xEF\x53\x43"
 			 "\x5E\x7F\x4B\x1A\x50\x52\x3F\x8D"
 			 "\x28\x3D\xCF\x85\x1D\x69\x6E\x60"
@@ -33993,7 +29802,7 @@
 			 "\x2E\x40\x48\x75\xE9\xE2\x21\x45"
 			 "\x0B\xC9\xB6\xB5\x66\xBC\x9A\x59"
 			 "\x5A",
-		.rlen	= 129,
+		.len	= 129,
 	}, { /* large test vector generated using Crypto++ */
 		.key =  "\x00\x01\x02\x03\x04\x05\x06\x07"
 			"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
@@ -34002,7 +29811,7 @@
 		.klen = 32,
 		.iv =	"\x00\x00\x00\x00\x00\x00\x00\x00"
 			"\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input =
+		.ptext =
 			"\x00\x01\x02\x03\x04\x05\x06\x07"
 			"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
 			"\x10\x11\x12\x13\x14\x15\x16\x17"
@@ -34516,8 +30325,7 @@
 			"\x10\x2f\x4e\x6d\x8c\xab\xca\xe9"
 			"\x08\x27\x46\x65\x84\xa3\xc2\xe1"
 			"\x00\x21\x42\x63",
-		.ilen = 4100,
-		.result =
+		.ctext =
 			"\xb5\x81\xf5\x64\x18\x73\xe3\xf0"
 			"\x4c\x13\xf2\x77\x18\x60\x65\x5e"
 			"\x29\x01\xce\x98\x55\x53\xf9\x0c"
@@ -35031,13 +30839,13 @@
 			"\xfc\x3f\x09\x7a\x0b\xdc\xc5\x1b"
 			"\x87\x13\xc6\x5b\x59\x8d\xf2\xc8"
 			"\xaf\xdf\x11\x95",
-		.rlen	= 4100,
+		.len	= 4100,
 		.np	= 2,
 		.tap	= { 4064, 36 },
 	},
 };
 
-static const struct cipher_testvec chacha20_enc_tv_template[] = {
+static const struct cipher_testvec chacha20_tv_template[] = {
 	{ /* RFC7539 A.2. Test Vector #1 */
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -35046,7 +30854,7 @@
 		.klen	= 32,
 		.iv     = "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.input	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -35054,8 +30862,7 @@
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.ilen	= 64,
-		.result	= "\x76\xb8\xe0\xad\xa0\xf1\x3d\x90"
+		.ctext	= "\x76\xb8\xe0\xad\xa0\xf1\x3d\x90"
 			  "\x40\x5d\x6a\xe5\x53\x86\xbd\x28"
 			  "\xbd\xd2\x19\xb8\xa0\x8d\xed\x1a"
 			  "\xa8\x36\xef\xcc\x8b\x77\x0d\xc7"
@@ -35063,7 +30870,7 @@
 			  "\x77\x24\xe0\x3f\xb8\xd8\x4a\x37"
 			  "\x6a\x43\xb8\xf4\x15\x18\xa1\x1c"
 			  "\xc3\x87\xb6\x69\xb2\xee\x65\x86",
-		.rlen	= 64,
+		.len	= 64,
 	}, { /* RFC7539 A.2. Test Vector #2 */
 		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -35072,7 +30879,7 @@
 		.klen	= 32,
 		.iv     = "\x01\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input	= "\x41\x6e\x79\x20\x73\x75\x62\x6d"
+		.ptext	= "\x41\x6e\x79\x20\x73\x75\x62\x6d"
 			  "\x69\x73\x73\x69\x6f\x6e\x20\x74"
 			  "\x6f\x20\x74\x68\x65\x20\x49\x45"
 			  "\x54\x46\x20\x69\x6e\x74\x65\x6e"
@@ -35119,8 +30926,7 @@
 			  "\x20\x77\x68\x69\x63\x68\x20\x61"
 			  "\x72\x65\x20\x61\x64\x64\x72\x65"
 			  "\x73\x73\x65\x64\x20\x74\x6f",
-		.ilen	= 375,
-		.result	= "\xa3\xfb\xf0\x7d\xf3\xfa\x2f\xde"
+		.ctext	= "\xa3\xfb\xf0\x7d\xf3\xfa\x2f\xde"
 			  "\x4f\x37\x6c\xa2\x3e\x82\x73\x70"
 			  "\x41\x60\x5d\x9f\x4f\x4f\x57\xbd"
 			  "\x8c\xff\x2c\x1d\x4b\x79\x55\xec"
@@ -35167,7 +30973,7 @@
 			  "\x7a\xc6\x1d\xd2\x9c\x6f\x21\xba"
 			  "\x5b\x86\x2f\x37\x30\xe3\x7c\xfd"
 			  "\xc4\xfd\x80\x6c\x22\xf2\x21",
-		.rlen	= 375,
+		.len	= 375,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 375 - 20, 4, 16 },
@@ -35180,7 +30986,7 @@
 		.klen	= 32,
 		.iv     = "\x2a\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x02",
-		.input	= "\x27\x54\x77\x61\x73\x20\x62\x72"
+		.ptext	= "\x27\x54\x77\x61\x73\x20\x62\x72"
 			  "\x69\x6c\x6c\x69\x67\x2c\x20\x61"
 			  "\x6e\x64\x20\x74\x68\x65\x20\x73"
 			  "\x6c\x69\x74\x68\x79\x20\x74\x6f"
@@ -35196,8 +31002,7 @@
 			  "\x68\x65\x20\x6d\x6f\x6d\x65\x20"
 			  "\x72\x61\x74\x68\x73\x20\x6f\x75"
 			  "\x74\x67\x72\x61\x62\x65\x2e",
-		.ilen	= 127,
-		.result	= "\x62\xe6\x34\x7f\x95\xed\x87\xa4"
+		.ctext	= "\x62\xe6\x34\x7f\x95\xed\x87\xa4"
 			  "\x5f\xfa\xe7\x42\x6f\x27\xa1\xdf"
 			  "\x5f\xb6\x91\x10\x04\x4c\x0d\x73"
 			  "\x11\x8e\xff\xa9\x5b\x01\xe5\xcf"
@@ -35213,7 +31018,7 @@
 			  "\x50\xd6\x15\x4b\x6d\xa7\x31\xb1"
 			  "\x87\xb5\x8d\xfd\x72\x8a\xfa\x36"
 			  "\x75\x7a\x79\x7a\xc1\x88\xd1",
-		.rlen	= 127,
+		.len	= 127,
 	}, { /* Self-made test vector for long data */
 		.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
 			  "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
@@ -35222,7 +31027,7 @@
 		.klen	= 32,
 		.iv     = "\x1c\x00\x00\x00\x00\x00\x00\x00"
 			  "\x00\x00\x00\x00\x00\x00\x00\x01",
-		.input	= "\x49\xee\xe0\xdc\x24\x90\x40\xcd"
+		.ptext	= "\x49\xee\xe0\xdc\x24\x90\x40\xcd"
 			  "\xc5\x40\x8f\x47\x05\xbc\xdd\x81"
 			  "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb"
 			  "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8"
@@ -35383,8 +31188,7 @@
 			  "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f"
 			  "\x92\xd6\x41\x7c\x5b\x2a\x00\x79"
 			  "\x72",
-		.ilen	= 1281,
-		.result	= "\x45\xe8\xe0\xb6\x9c\xca\xfd\x87"
+		.ctext	= "\x45\xe8\xe0\xb6\x9c\xca\xfd\x87"
 			  "\xe8\x1d\x37\x96\x8a\xe3\x40\x35"
 			  "\xcf\x5e\x3a\x46\x3d\xfb\xd0\x69"
 			  "\xde\xaf\x7a\xd5\x0d\xe9\x52\xec"
@@ -35545,7 +31349,7 @@
 			  "\x23\x45\x89\x42\xa0\x30\xeb\xbf"
 			  "\xa1\xed\xad\xd5\x76\xfa\x24\x8f"
 			  "\x98",
-		.rlen	= 1281,
+		.len	= 1281,
 		.also_non_np = 1,
 		.np	= 3,
 		.tap	= { 1200, 1, 80 },
@@ -35555,30 +31359,28 @@
 /*
  * CTS (Cipher Text Stealing) mode tests
  */
-static const struct cipher_testvec cts_mode_enc_tv_template[] = {
+static const struct cipher_testvec cts_mode_tv_template[] = {
 	{ /* from rfc3962 */
 		.klen	= 16,
 		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
 			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
-		.ilen	= 17,
-		.input  = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+		.ptext	= "\x49\x20\x77\x6f\x75\x6c\x64\x20"
 			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
 			  "\x20",
-		.rlen	= 17,
-		.result	= "\xc6\x35\x35\x68\xf2\xbf\x8c\xb4"
+		.len	= 17,
+		.ctext	= "\xc6\x35\x35\x68\xf2\xbf\x8c\xb4"
 			  "\xd8\xa5\x80\x36\x2d\xa7\xff\x7f"
 			  "\x97",
 	}, {
 		.klen	= 16,
 		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
 			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
-		.ilen   = 31,
-		.input  = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+		.ptext	= "\x49\x20\x77\x6f\x75\x6c\x64\x20"
 			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
 			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
 			  "\x20\x47\x61\x75\x27\x73\x20",
-		.rlen   = 31,
-		.result = "\xfc\x00\x78\x3e\x0e\xfd\xb2\xc1"
+		.len	= 31,
+		.ctext	= "\xfc\x00\x78\x3e\x0e\xfd\xb2\xc1"
 			  "\xd4\x45\xd4\xc8\xef\xf7\xed\x22"
 			  "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
 			  "\xc0\x7b\x25\xe2\x5e\xcf\xe5",
@@ -35586,13 +31388,12 @@
 		.klen	= 16,
 		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
 			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
-		.ilen   = 32,
-		.input  = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+		.ptext	= "\x49\x20\x77\x6f\x75\x6c\x64\x20"
 			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
 			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
 			  "\x20\x47\x61\x75\x27\x73\x20\x43",
-		.rlen   = 32,
-		.result = "\x39\x31\x25\x23\xa7\x86\x62\xd5"
+		.len	= 32,
+		.ctext	= "\x39\x31\x25\x23\xa7\x86\x62\xd5"
 			  "\xbe\x7f\xcb\xcc\x98\xeb\xf5\xa8"
 			  "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
 			  "\xc0\x7b\x25\xe2\x5e\xcf\xe5\x84",
@@ -35600,15 +31401,14 @@
 		.klen	= 16,
 		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
 			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
-		.ilen   = 47,
-		.input  = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+		.ptext	= "\x49\x20\x77\x6f\x75\x6c\x64\x20"
 			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
 			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
 			  "\x20\x47\x61\x75\x27\x73\x20\x43"
 			  "\x68\x69\x63\x6b\x65\x6e\x2c\x20"
 			  "\x70\x6c\x65\x61\x73\x65\x2c",
-		.rlen   = 47,
-		.result = "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
+		.len	= 47,
+		.ctext	= "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
 			  "\xc0\x7b\x25\xe2\x5e\xcf\xe5\x84"
 			  "\xb3\xff\xfd\x94\x0c\x16\xa1\x8c"
 			  "\x1b\x55\x49\xd2\xf8\x38\x02\x9e"
@@ -35618,15 +31418,14 @@
 		.klen	= 16,
 		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
 			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
-		.ilen   = 48,
-		.input  = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+		.ptext	= "\x49\x20\x77\x6f\x75\x6c\x64\x20"
 			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
 			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
 			  "\x20\x47\x61\x75\x27\x73\x20\x43"
 			  "\x68\x69\x63\x6b\x65\x6e\x2c\x20"
 			  "\x70\x6c\x65\x61\x73\x65\x2c\x20",
-		.rlen   = 48,
-		.result = "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
+		.len	= 48,
+		.ctext	= "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
 			  "\xc0\x7b\x25\xe2\x5e\xcf\xe5\x84"
 			  "\x9d\xad\x8b\xbb\x96\xc4\xcd\xc0"
 			  "\x3b\xc1\x03\xe1\xa1\x94\xbb\xd8"
@@ -35636,8 +31435,7 @@
 		.klen	= 16,
 		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
 			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
-		.ilen   = 64,
-		.input  = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+		.ptext	= "\x49\x20\x77\x6f\x75\x6c\x64\x20"
 			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
 			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
 			  "\x20\x47\x61\x75\x27\x73\x20\x43"
@@ -35645,110 +31443,8 @@
 			  "\x70\x6c\x65\x61\x73\x65\x2c\x20"
 			  "\x61\x6e\x64\x20\x77\x6f\x6e\x74"
 			  "\x6f\x6e\x20\x73\x6f\x75\x70\x2e",
-		.rlen   = 64,
-		.result = "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
-			  "\xc0\x7b\x25\xe2\x5e\xcf\xe5\x84"
-			  "\x39\x31\x25\x23\xa7\x86\x62\xd5"
-			  "\xbe\x7f\xcb\xcc\x98\xeb\xf5\xa8"
-			  "\x48\x07\xef\xe8\x36\xee\x89\xa5"
-			  "\x26\x73\x0d\xbc\x2f\x7b\xc8\x40"
-			  "\x9d\xad\x8b\xbb\x96\xc4\xcd\xc0"
-			  "\x3b\xc1\x03\xe1\xa1\x94\xbb\xd8",
-	}
-};
-
-static const struct cipher_testvec cts_mode_dec_tv_template[] = {
-	{ /* from rfc3962 */
-		.klen	= 16,
-		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
-			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
-		.rlen	= 17,
-		.result = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
-			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
-			  "\x20",
-		.ilen	= 17,
-		.input	= "\xc6\x35\x35\x68\xf2\xbf\x8c\xb4"
-			  "\xd8\xa5\x80\x36\x2d\xa7\xff\x7f"
-			  "\x97",
-	}, {
-		.klen	= 16,
-		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
-			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
-		.rlen   = 31,
-		.result = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
-			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
-			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
-			  "\x20\x47\x61\x75\x27\x73\x20",
-		.ilen   = 31,
-		.input  = "\xfc\x00\x78\x3e\x0e\xfd\xb2\xc1"
-			  "\xd4\x45\xd4\xc8\xef\xf7\xed\x22"
-			  "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
-			  "\xc0\x7b\x25\xe2\x5e\xcf\xe5",
-	}, {
-		.klen	= 16,
-		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
-			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
-		.rlen   = 32,
-		.result = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
-			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
-			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
-			  "\x20\x47\x61\x75\x27\x73\x20\x43",
-		.ilen   = 32,
-		.input  = "\x39\x31\x25\x23\xa7\x86\x62\xd5"
-			  "\xbe\x7f\xcb\xcc\x98\xeb\xf5\xa8"
-			  "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
-			  "\xc0\x7b\x25\xe2\x5e\xcf\xe5\x84",
-	}, {
-		.klen	= 16,
-		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
-			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
-		.rlen   = 47,
-		.result = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
-			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
-			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
-			  "\x20\x47\x61\x75\x27\x73\x20\x43"
-			  "\x68\x69\x63\x6b\x65\x6e\x2c\x20"
-			  "\x70\x6c\x65\x61\x73\x65\x2c",
-		.ilen   = 47,
-		.input  = "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
-			  "\xc0\x7b\x25\xe2\x5e\xcf\xe5\x84"
-			  "\xb3\xff\xfd\x94\x0c\x16\xa1\x8c"
-			  "\x1b\x55\x49\xd2\xf8\x38\x02\x9e"
-			  "\x39\x31\x25\x23\xa7\x86\x62\xd5"
-			  "\xbe\x7f\xcb\xcc\x98\xeb\xf5",
-	}, {
-		.klen	= 16,
-		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
-			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
-		.rlen   = 48,
-		.result = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
-			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
-			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
-			  "\x20\x47\x61\x75\x27\x73\x20\x43"
-			  "\x68\x69\x63\x6b\x65\x6e\x2c\x20"
-			  "\x70\x6c\x65\x61\x73\x65\x2c\x20",
-		.ilen   = 48,
-		.input  = "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
-			  "\xc0\x7b\x25\xe2\x5e\xcf\xe5\x84"
-			  "\x9d\xad\x8b\xbb\x96\xc4\xcd\xc0"
-			  "\x3b\xc1\x03\xe1\xa1\x94\xbb\xd8"
-			  "\x39\x31\x25\x23\xa7\x86\x62\xd5"
-			  "\xbe\x7f\xcb\xcc\x98\xeb\xf5\xa8",
-	}, {
-		.klen	= 16,
-		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
-			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
-		.rlen   = 64,
-		.result = "\x49\x20\x77\x6f\x75\x6c\x64\x20"
-			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
-			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
-			  "\x20\x47\x61\x75\x27\x73\x20\x43"
-			  "\x68\x69\x63\x6b\x65\x6e\x2c\x20"
-			  "\x70\x6c\x65\x61\x73\x65\x2c\x20"
-			  "\x61\x6e\x64\x20\x77\x6f\x6e\x74"
-			  "\x6f\x6e\x20\x73\x6f\x75\x70\x2e",
-		.ilen   = 64,
-		.input  = "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
+		.len	= 64,
+		.ctext	= "\x97\x68\x72\x68\xd6\xec\xcc\xc0"
 			  "\xc0\x7b\x25\xe2\x5e\xcf\xe5\x84"
 			  "\x39\x31\x25\x23\xa7\x86\x62\xd5"
 			  "\xbe\x7f\xcb\xcc\x98\xeb\xf5\xa8"
@@ -36058,6 +31754,15 @@
  */
 static const struct hash_testvec crc32_tv_template[] = {
 	{
+		.psize = 0,
+		.digest = "\x00\x00\x00\x00",
+	},
+	{
+		.plaintext = "abcdefg",
+		.psize = 7,
+		.digest = "\xd8\xb5\x46\xac",
+	},
+	{
 		.key = "\x87\xa9\xcb\xed",
 		.ksize = 4,
 		.psize = 0,
@@ -36494,6 +32199,11 @@
 		.digest = "\x00\x00\x00\x00",
 	},
 	{
+		.plaintext = "abcdefg",
+		.psize = 7,
+		.digest = "\x41\xf4\x27\xe6",
+	},
+	{
 		.key = "\x87\xa9\xcb\xed",
 		.ksize = 4,
 		.psize = 0,
@@ -36921,94 +32631,6 @@
 	}
 };
 
-/*
- * Blakcifn CRC test vectors
- */
-static const struct hash_testvec bfin_crc_tv_template[] = {
-	{
-		.psize = 0,
-		.digest = "\x00\x00\x00\x00",
-	},
-	{
-		.key = "\x87\xa9\xcb\xed",
-		.ksize = 4,
-		.psize = 0,
-		.digest = "\x87\xa9\xcb\xed",
-	},
-	{
-		.key = "\xff\xff\xff\xff",
-		.ksize = 4,
-		.plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08"
-			     "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
-			     "\x11\x12\x13\x14\x15\x16\x17\x18"
-			     "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
-			     "\x21\x22\x23\x24\x25\x26\x27\x28",
-		.psize = 40,
-		.digest = "\x84\x0c\x8d\xa2",
-	},
-	{
-		.key = "\xff\xff\xff\xff",
-		.ksize = 4,
-		.plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08"
-			     "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
-			     "\x11\x12\x13\x14\x15\x16\x17\x18"
-			     "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
-			     "\x21\x22\x23\x24\x25\x26",
-		.psize = 38,
-		.digest = "\x8c\x58\xec\xb7",
-	},
-	{
-		.key = "\xff\xff\xff\xff",
-		.ksize = 4,
-		.plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08"
-			     "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
-			     "\x11\x12\x13\x14\x15\x16\x17\x18"
-			     "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
-			     "\x21\x22\x23\x24\x25\x26\x27",
-		.psize = 39,
-		.digest = "\xdc\x50\x28\x7b",
-	},
-	{
-		.key = "\xff\xff\xff\xff",
-		.ksize = 4,
-		.plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08"
-			     "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
-			     "\x11\x12\x13\x14\x15\x16\x17\x18"
-			     "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
-			     "\x21\x22\x23\x24\x25\x26\x27\x28"
-			     "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30"
-			     "\x31\x32\x33\x34\x35\x36\x37\x38"
-			     "\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40"
-			     "\x41\x42\x43\x44\x45\x46\x47\x48"
-			     "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50"
-			     "\x51\x52\x53\x54\x55\x56\x57\x58"
-			     "\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60"
-			     "\x61\x62\x63\x64\x65\x66\x67\x68"
-			     "\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
-			     "\x71\x72\x73\x74\x75\x76\x77\x78"
-			     "\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80"
-			     "\x81\x82\x83\x84\x85\x86\x87\x88"
-			     "\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90"
-			     "\x91\x92\x93\x94\x95\x96\x97\x98"
-			     "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0"
-			     "\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8"
-			     "\xa9\xaa\xab\xac\xad\xae\xaf\xb0"
-			     "\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8"
-			     "\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0"
-			     "\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8"
-			     "\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0"
-			     "\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8"
-			     "\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0"
-			     "\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
-			     "\xe9\xea\xeb\xec\xed\xee\xef\xf0",
-		.psize = 240,
-		.digest = "\x10\x19\x4a\x5c",
-		.np = 2,
-		.tap = { 31, 209 }
-	},
-
-};
-
 static const struct comp_testvec lz4_comp_tv_template[] = {
 	{
 		.inlen	= 255,
@@ -37131,4 +32753,75 @@
 	},
 };
 
+static const struct comp_testvec zstd_comp_tv_template[] = {
+	{
+		.inlen	= 68,
+		.outlen	= 39,
+		.input	= "The algorithm is zstd. "
+			  "The algorithm is zstd. "
+			  "The algorithm is zstd.",
+		.output	= "\x28\xb5\x2f\xfd\x00\x50\xf5\x00\x00\xb8\x54\x68\x65"
+			  "\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d\x20\x69\x73"
+			  "\x20\x7a\x73\x74\x64\x2e\x20\x01\x00\x55\x73\x36\x01"
+			  ,
+	},
+	{
+		.inlen	= 244,
+		.outlen	= 151,
+		.input	= "zstd, short for Zstandard, is a fast lossless "
+			  "compression algorithm, targeting real-time "
+			  "compression scenarios at zlib-level and better "
+			  "compression ratios. The zstd compression library "
+			  "provides in-memory compression and decompression "
+			  "functions.",
+		.output	= "\x28\xb5\x2f\xfd\x00\x50\x75\x04\x00\x42\x4b\x1e\x17"
+			  "\x90\x81\x31\x00\xf2\x2f\xe4\x36\xc9\xef\x92\x88\x32"
+			  "\xc9\xf2\x24\x94\xd8\x68\x9a\x0f\x00\x0c\xc4\x31\x6f"
+			  "\x0d\x0c\x38\xac\x5c\x48\x03\xcd\x63\x67\xc0\xf3\xad"
+			  "\x4e\x90\xaa\x78\xa0\xa4\xc5\x99\xda\x2f\xb6\x24\x60"
+			  "\xe2\x79\x4b\xaa\xb6\x6b\x85\x0b\xc9\xc6\x04\x66\x86"
+			  "\xe2\xcc\xe2\x25\x3f\x4f\x09\xcd\xb8\x9d\xdb\xc1\x90"
+			  "\xa9\x11\xbc\x35\x44\x69\x2d\x9c\x64\x4f\x13\x31\x64"
+			  "\xcc\xfb\x4d\x95\x93\x86\x7f\x33\x7f\x1a\xef\xe9\x30"
+			  "\xf9\x67\xa1\x94\x0a\x69\x0f\x60\xcd\xc3\xab\x99\xdc"
+			  "\x42\xed\x97\x05\x00\x33\xc3\x15\x95\x3a\x06\xa0\x0e"
+			  "\x20\xa9\x0e\x82\xb9\x43\x45\x01",
+	},
+};
+
+static const struct comp_testvec zstd_decomp_tv_template[] = {
+	{
+		.inlen	= 43,
+		.outlen	= 68,
+		.input	= "\x28\xb5\x2f\xfd\x04\x50\xf5\x00\x00\xb8\x54\x68\x65"
+			  "\x20\x61\x6c\x67\x6f\x72\x69\x74\x68\x6d\x20\x69\x73"
+			  "\x20\x7a\x73\x74\x64\x2e\x20\x01\x00\x55\x73\x36\x01"
+			  "\x6b\xf4\x13\x35",
+		.output	= "The algorithm is zstd. "
+			  "The algorithm is zstd. "
+			  "The algorithm is zstd.",
+	},
+	{
+		.inlen	= 155,
+		.outlen	= 244,
+		.input	= "\x28\xb5\x2f\xfd\x04\x50\x75\x04\x00\x42\x4b\x1e\x17"
+			  "\x90\x81\x31\x00\xf2\x2f\xe4\x36\xc9\xef\x92\x88\x32"
+			  "\xc9\xf2\x24\x94\xd8\x68\x9a\x0f\x00\x0c\xc4\x31\x6f"
+			  "\x0d\x0c\x38\xac\x5c\x48\x03\xcd\x63\x67\xc0\xf3\xad"
+			  "\x4e\x90\xaa\x78\xa0\xa4\xc5\x99\xda\x2f\xb6\x24\x60"
+			  "\xe2\x79\x4b\xaa\xb6\x6b\x85\x0b\xc9\xc6\x04\x66\x86"
+			  "\xe2\xcc\xe2\x25\x3f\x4f\x09\xcd\xb8\x9d\xdb\xc1\x90"
+			  "\xa9\x11\xbc\x35\x44\x69\x2d\x9c\x64\x4f\x13\x31\x64"
+			  "\xcc\xfb\x4d\x95\x93\x86\x7f\x33\x7f\x1a\xef\xe9\x30"
+			  "\xf9\x67\xa1\x94\x0a\x69\x0f\x60\xcd\xc3\xab\x99\xdc"
+			  "\x42\xed\x97\x05\x00\x33\xc3\x15\x95\x3a\x06\xa0\x0e"
+			  "\x20\xa9\x0e\x82\xb9\x43\x45\x01\xaa\x6d\xda\x0d",
+		.output	= "zstd, short for Zstandard, is a fast lossless "
+			  "compression algorithm, targeting real-time "
+			  "compression scenarios at zlib-level and better "
+			  "compression ratios. The zstd compression library "
+			  "provides in-memory compression and decompression "
+			  "functions.",
+	},
+};
 #endif	/* _CRYPTO_TESTMGR_H */
diff --git a/crypto/zstd.c b/crypto/zstd.c
new file mode 100644
index 0000000..9a76b3e
--- /dev/null
+++ b/crypto/zstd.c
@@ -0,0 +1,265 @@
+/*
+ * Cryptographic API.
+ *
+ * Copyright (c) 2017-present, Facebook, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/vmalloc.h>
+#include <linux/zstd.h>
+#include <crypto/internal/scompress.h>
+
+
+#define ZSTD_DEF_LEVEL	3
+
+struct zstd_ctx {
+	ZSTD_CCtx *cctx;
+	ZSTD_DCtx *dctx;
+	void *cwksp;
+	void *dwksp;
+};
+
+static ZSTD_parameters zstd_params(void)
+{
+	return ZSTD_getParams(ZSTD_DEF_LEVEL, 0, 0);
+}
+
+static int zstd_comp_init(struct zstd_ctx *ctx)
+{
+	int ret = 0;
+	const ZSTD_parameters params = zstd_params();
+	const size_t wksp_size = ZSTD_CCtxWorkspaceBound(params.cParams);
+
+	ctx->cwksp = vzalloc(wksp_size);
+	if (!ctx->cwksp) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ctx->cctx = ZSTD_initCCtx(ctx->cwksp, wksp_size);
+	if (!ctx->cctx) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+out:
+	return ret;
+out_free:
+	vfree(ctx->cwksp);
+	goto out;
+}
+
+static int zstd_decomp_init(struct zstd_ctx *ctx)
+{
+	int ret = 0;
+	const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+
+	ctx->dwksp = vzalloc(wksp_size);
+	if (!ctx->dwksp) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ctx->dctx = ZSTD_initDCtx(ctx->dwksp, wksp_size);
+	if (!ctx->dctx) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+out:
+	return ret;
+out_free:
+	vfree(ctx->dwksp);
+	goto out;
+}
+
+static void zstd_comp_exit(struct zstd_ctx *ctx)
+{
+	vfree(ctx->cwksp);
+	ctx->cwksp = NULL;
+	ctx->cctx = NULL;
+}
+
+static void zstd_decomp_exit(struct zstd_ctx *ctx)
+{
+	vfree(ctx->dwksp);
+	ctx->dwksp = NULL;
+	ctx->dctx = NULL;
+}
+
+static int __zstd_init(void *ctx)
+{
+	int ret;
+
+	ret = zstd_comp_init(ctx);
+	if (ret)
+		return ret;
+	ret = zstd_decomp_init(ctx);
+	if (ret)
+		zstd_comp_exit(ctx);
+	return ret;
+}
+
+static void *zstd_alloc_ctx(struct crypto_scomp *tfm)
+{
+	int ret;
+	struct zstd_ctx *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	ret = __zstd_init(ctx);
+	if (ret) {
+		kfree(ctx);
+		return ERR_PTR(ret);
+	}
+
+	return ctx;
+}
+
+static int zstd_init(struct crypto_tfm *tfm)
+{
+	struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return __zstd_init(ctx);
+}
+
+static void __zstd_exit(void *ctx)
+{
+	zstd_comp_exit(ctx);
+	zstd_decomp_exit(ctx);
+}
+
+static void zstd_free_ctx(struct crypto_scomp *tfm, void *ctx)
+{
+	__zstd_exit(ctx);
+	kzfree(ctx);
+}
+
+static void zstd_exit(struct crypto_tfm *tfm)
+{
+	struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	__zstd_exit(ctx);
+}
+
+static int __zstd_compress(const u8 *src, unsigned int slen,
+			   u8 *dst, unsigned int *dlen, void *ctx)
+{
+	size_t out_len;
+	struct zstd_ctx *zctx = ctx;
+	const ZSTD_parameters params = zstd_params();
+
+	out_len = ZSTD_compressCCtx(zctx->cctx, dst, *dlen, src, slen, params);
+	if (ZSTD_isError(out_len))
+		return -EINVAL;
+	*dlen = out_len;
+	return 0;
+}
+
+static int zstd_compress(struct crypto_tfm *tfm, const u8 *src,
+			 unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+	struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return __zstd_compress(src, slen, dst, dlen, ctx);
+}
+
+static int zstd_scompress(struct crypto_scomp *tfm, const u8 *src,
+			  unsigned int slen, u8 *dst, unsigned int *dlen,
+			  void *ctx)
+{
+	return __zstd_compress(src, slen, dst, dlen, ctx);
+}
+
+static int __zstd_decompress(const u8 *src, unsigned int slen,
+			     u8 *dst, unsigned int *dlen, void *ctx)
+{
+	size_t out_len;
+	struct zstd_ctx *zctx = ctx;
+
+	out_len = ZSTD_decompressDCtx(zctx->dctx, dst, *dlen, src, slen);
+	if (ZSTD_isError(out_len))
+		return -EINVAL;
+	*dlen = out_len;
+	return 0;
+}
+
+static int zstd_decompress(struct crypto_tfm *tfm, const u8 *src,
+			   unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+	struct zstd_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return __zstd_decompress(src, slen, dst, dlen, ctx);
+}
+
+static int zstd_sdecompress(struct crypto_scomp *tfm, const u8 *src,
+			    unsigned int slen, u8 *dst, unsigned int *dlen,
+			    void *ctx)
+{
+	return __zstd_decompress(src, slen, dst, dlen, ctx);
+}
+
+static struct crypto_alg alg = {
+	.cra_name		= "zstd",
+	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
+	.cra_ctxsize		= sizeof(struct zstd_ctx),
+	.cra_module		= THIS_MODULE,
+	.cra_init		= zstd_init,
+	.cra_exit		= zstd_exit,
+	.cra_u			= { .compress = {
+	.coa_compress		= zstd_compress,
+	.coa_decompress		= zstd_decompress } }
+};
+
+static struct scomp_alg scomp = {
+	.alloc_ctx		= zstd_alloc_ctx,
+	.free_ctx		= zstd_free_ctx,
+	.compress		= zstd_scompress,
+	.decompress		= zstd_sdecompress,
+	.base			= {
+		.cra_name	= "zstd",
+		.cra_driver_name = "zstd-scomp",
+		.cra_module	 = THIS_MODULE,
+	}
+};
+
+static int __init zstd_mod_init(void)
+{
+	int ret;
+
+	ret = crypto_register_alg(&alg);
+	if (ret)
+		return ret;
+
+	ret = crypto_register_scomp(&scomp);
+	if (ret)
+		crypto_unregister_alg(&alg);
+
+	return ret;
+}
+
+static void __exit zstd_mod_fini(void)
+{
+	crypto_unregister_alg(&alg);
+	crypto_unregister_scomp(&scomp);
+}
+
+module_init(zstd_mod_init);
+module_exit(zstd_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Zstd Compression Algorithm");
+MODULE_ALIAS_CRYPTO("zstd");
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 2d8de2f..cdd3136 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -82,11 +82,11 @@
 #ifdef CONFIG_ACPI_PROCFS_POWER
 extern struct proc_dir_entry *acpi_lock_ac_dir(void);
 extern void *acpi_unlock_ac_dir(struct proc_dir_entry *acpi_ac_dir);
-static int acpi_ac_open_fs(struct inode *inode, struct file *file);
 #endif
 
 
 static int ac_sleep_before_get_state_ms;
+static int ac_check_pmic = 1;
 
 static struct acpi_driver acpi_ac_driver = {
 	.name = "ac",
@@ -111,16 +111,6 @@
 
 #define to_acpi_ac(x) power_supply_get_drvdata(x)
 
-#ifdef CONFIG_ACPI_PROCFS_POWER
-static const struct file_operations acpi_ac_fops = {
-	.owner = THIS_MODULE,
-	.open = acpi_ac_open_fs,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-#endif
-
 /* --------------------------------------------------------------------------
                                AC Adapter Management
    -------------------------------------------------------------------------- */
@@ -209,11 +199,6 @@
 	return 0;
 }
 
-static int acpi_ac_open_fs(struct inode *inode, struct file *file)
-{
-	return single_open(file, acpi_ac_seq_show, PDE_DATA(inode));
-}
-
 static int acpi_ac_add_fs(struct acpi_ac *ac)
 {
 	struct proc_dir_entry *entry = NULL;
@@ -228,9 +213,8 @@
 	}
 
 	/* 'state' [R] */
-	entry = proc_create_data(ACPI_AC_FILE_STATE,
-				 S_IRUGO, acpi_device_dir(ac->device),
-				 &acpi_ac_fops, ac);
+	entry = proc_create_single_data(ACPI_AC_FILE_STATE, S_IRUGO,
+			acpi_device_dir(ac->device), acpi_ac_seq_show, ac);
 	if (!entry)
 		return -ENODEV;
 	return 0;
@@ -310,21 +294,43 @@
 	return NOTIFY_OK;
 }
 
-static int thinkpad_e530_quirk(const struct dmi_system_id *d)
+static int __init thinkpad_e530_quirk(const struct dmi_system_id *d)
 {
 	ac_sleep_before_get_state_ms = 1000;
 	return 0;
 }
 
-static const struct dmi_system_id ac_dmi_table[] = {
+static int __init ac_do_not_check_pmic_quirk(const struct dmi_system_id *d)
+{
+	ac_check_pmic = 0;
+	return 0;
+}
+
+static const struct dmi_system_id ac_dmi_table[]  __initconst = {
 	{
+	/* Thinkpad e530 */
 	.callback = thinkpad_e530_quirk,
-	.ident = "thinkpad e530",
 	.matches = {
 		DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
 		DMI_MATCH(DMI_PRODUCT_NAME, "32597CG"),
 		},
 	},
+	{
+		/* ECS EF20EA */
+		.callback = ac_do_not_check_pmic_quirk,
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "EF20EA"),
+		},
+	},
+	{
+		/* Lenovo Ideapad Miix 320 */
+		.callback = ac_do_not_check_pmic_quirk,
+		.matches = {
+		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "80XF"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"),
+		},
+	},
 	{},
 };
 
@@ -384,7 +390,6 @@
 		kfree(ac);
 	}
 
-	dmi_check_system(ac_dmi_table);
 	return result;
 }
 
@@ -442,13 +447,17 @@
 	if (acpi_disabled)
 		return -ENODEV;
 
-	for (i = 0; i < ARRAY_SIZE(acpi_ac_blacklist); i++)
-		if (acpi_dev_present(acpi_ac_blacklist[i].hid, "1",
-				     acpi_ac_blacklist[i].hrv)) {
-			pr_info(PREFIX "AC: found native %s PMIC, not loading\n",
-				acpi_ac_blacklist[i].hid);
-			return -ENODEV;
-		}
+	dmi_check_system(ac_dmi_table);
+
+	if (ac_check_pmic) {
+		for (i = 0; i < ARRAY_SIZE(acpi_ac_blacklist); i++)
+			if (acpi_dev_present(acpi_ac_blacklist[i].hid, "1",
+					     acpi_ac_blacklist[i].hrv)) {
+				pr_info(PREFIX "AC: found native %s PMIC, not loading\n",
+					acpi_ac_blacklist[i].hid);
+				return -ENODEV;
+			}
+	}
 
 #ifdef CONFIG_ACPI_PROCFS_POWER
 	acpi_ac_dir = acpi_lock_ac_dir();
diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index d553b00..2664452 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/clk-provider.h>
+#include <linux/platform_data/clk-st.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/clkdev.h>
@@ -72,6 +73,47 @@
 }
 
 #ifdef CONFIG_X86_AMD_PLATFORM_DEVICE
+
+static int misc_check_res(struct acpi_resource *ares, void *data)
+{
+	struct resource res;
+
+	return !acpi_dev_resource_memory(ares, &res);
+}
+
+static int st_misc_setup(struct apd_private_data *pdata)
+{
+	struct acpi_device *adev = pdata->adev;
+	struct platform_device *clkdev;
+	struct st_clk_data *clk_data;
+	struct resource_entry *rentry;
+	struct list_head resource_list;
+	int ret;
+
+	clk_data = devm_kzalloc(&adev->dev, sizeof(*clk_data), GFP_KERNEL);
+	if (!clk_data)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list, misc_check_res,
+				     NULL);
+	if (ret < 0)
+		return -ENOENT;
+
+	list_for_each_entry(rentry, &resource_list, node) {
+		clk_data->base = devm_ioremap(&adev->dev, rentry->res->start,
+					      resource_size(rentry->res));
+		break;
+	}
+
+	acpi_dev_free_resource_list(&resource_list);
+
+	clkdev = platform_device_register_data(&adev->dev, "clk-st",
+					       PLATFORM_DEVID_NONE, clk_data,
+					       sizeof(*clk_data));
+	return PTR_ERR_OR_ZERO(clkdev);
+}
+
 static const struct apd_device_desc cz_i2c_desc = {
 	.setup = acpi_apd_setup,
 	.fixed_clk_rate = 133000000,
@@ -94,6 +136,10 @@
 	.fixed_clk_rate = 48000000,
 	.properties = uart_properties,
 };
+
+static const struct apd_device_desc st_misc_desc = {
+	.setup = st_misc_setup,
+};
 #endif
 
 #ifdef CONFIG_ARM64
@@ -179,6 +225,7 @@
 	{ "AMD0020", APD_ADDR(cz_uart_desc) },
 	{ "AMDI0020", APD_ADDR(cz_uart_desc) },
 	{ "AMD0030", },
+	{ "AMD0040", APD_ADDR(st_misc_desc)},
 #endif
 #ifdef CONFIG_ARM64
 	{ "APMC0D0F", APD_ADDR(xgene_i2c_desc) },
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 2bcffec..cb6ac5c 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -69,6 +69,10 @@
 #define LPSS_SAVE_CTX			BIT(4)
 #define LPSS_NO_D3_DELAY		BIT(5)
 
+/* Crystal Cove PMIC shares same ACPI ID between different platforms */
+#define BYT_CRC_HRV			2
+#define CHT_CRC_HRV			3
+
 struct lpss_private_data;
 
 struct lpss_device_desc {
@@ -162,7 +166,7 @@
 	if (!adev->pnp.unique_id || strcmp(adev->pnp.unique_id, "1"))
 		return;
 
-	if (!acpi_dev_present("INT33FD", NULL, -1))
+	if (!acpi_dev_present("INT33FD", NULL, BYT_CRC_HRV))
 		pwm_add_table(byt_pwm_lookup, ARRAY_SIZE(byt_pwm_lookup));
 }
 
diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c
index 4bde16f..9560030 100644
--- a/drivers/acpi/acpi_watchdog.c
+++ b/drivers/acpi/acpi_watchdog.c
@@ -12,35 +12,51 @@
 #define pr_fmt(fmt) "ACPI: watchdog: " fmt
 
 #include <linux/acpi.h>
-#include <linux/dmi.h>
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
 
 #include "internal.h"
 
-static const struct dmi_system_id acpi_watchdog_skip[] = {
-	{
-		/*
-		 * On Lenovo Z50-70 there are two issues with the WDAT
-		 * table. First some of the instructions use RTC SRAM
-		 * to store persistent information. This does not work well
-		 * with Linux RTC driver. Second, more important thing is
-		 * that the instructions do not actually reset the system.
-		 *
-		 * On this particular system iTCO_wdt seems to work just
-		 * fine so we prefer that over WDAT for now.
-		 *
-		 * See also https://bugzilla.kernel.org/show_bug.cgi?id=199033.
-		 */
-		.ident = "Lenovo Z50-70",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "20354"),
-			DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Z50-70"),
-		},
-	},
-	{}
-};
+#ifdef CONFIG_RTC_MC146818_LIB
+#include <linux/mc146818rtc.h>
+
+/*
+ * There are several systems where the WDAT table is accessing RTC SRAM to
+ * store persistent information. This does not work well with the Linux RTC
+ * driver so on those systems we skip WDAT driver and prefer iTCO_wdt
+ * instead.
+ *
+ * See also https://bugzilla.kernel.org/show_bug.cgi?id=199033.
+ */
+static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat)
+{
+	const struct acpi_wdat_entry *entries;
+	int i;
+
+	entries = (struct acpi_wdat_entry *)(wdat + 1);
+	for (i = 0; i < wdat->entries; i++) {
+		const struct acpi_generic_address *gas;
+
+		gas = &entries[i].register_region;
+		if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
+			switch (gas->address) {
+			case RTC_PORT(0):
+			case RTC_PORT(1):
+			case RTC_PORT(2):
+			case RTC_PORT(3):
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+#else
+static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat)
+{
+	return false;
+}
+#endif
 
 static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void)
 {
@@ -50,9 +66,6 @@
 	if (acpi_disabled)
 		return NULL;
 
-	if (dmi_check_system(acpi_watchdog_skip))
-		return NULL;
-
 	status = acpi_get_table(ACPI_SIG_WDAT, 0,
 				(struct acpi_table_header **)&wdat);
 	if (ACPI_FAILURE(status)) {
@@ -60,6 +73,11 @@
 		return NULL;
 	}
 
+	if (acpi_watchdog_uses_rtc(wdat)) {
+		pr_info("Skipping WDAT on this system because it uses RTC SRAM\n");
+		return NULL;
+	}
+
 	return wdat;
 }
 
diff --git a/drivers/acpi/acpica/acapps.h b/drivers/acpi/acpica/acapps.h
index a2a8512..5a9c2fe 100644
--- a/drivers/acpi/acpica/acapps.h
+++ b/drivers/acpi/acpica/acapps.h
@@ -143,6 +143,8 @@
 fl_split_input_pathname(char *input_path,
 			char **out_directory_path, char **out_filename);
 
+char *fl_get_file_basename(char *file_pathname);
+
 char *ad_generate_filename(char *prefix, char *table_id);
 
 void
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 0bc5500..1e62045 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -82,7 +82,7 @@
  * interrupt level
  */
 ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock);	/* For GPE data structs and registers */
-ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock);	/* For ACPI H/W except GPE registers */
+ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock);	/* For ACPI H/W except GPE registers */
 ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
 
 /* Mutex for _OSI support */
diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c
index 170802c..dc94de9 100644
--- a/drivers/acpi/acpica/dbnames.c
+++ b/drivers/acpi/acpica/dbnames.c
@@ -189,9 +189,15 @@
 	}
 
 	acpi_db_set_output_destination(ACPI_DB_DUPLICATE_OUTPUT);
-	acpi_os_printf("ACPI Namespace (from %4.4s (%p) subtree):\n",
-		       ((struct acpi_namespace_node *)subtree_entry)->name.
-		       ascii, subtree_entry);
+
+	if (((struct acpi_namespace_node *)subtree_entry)->parent) {
+		acpi_os_printf("ACPI Namespace (from %4.4s (%p) subtree):\n",
+			       ((struct acpi_namespace_node *)subtree_entry)->
+			       name.ascii, subtree_entry);
+	} else {
+		acpi_os_printf("ACPI Namespace (from %s):\n",
+			       ACPI_NAMESPACE_ROOT);
+	}
 
 	/* Display the subtree */
 
diff --git a/drivers/acpi/acpica/dbtest.c b/drivers/acpi/acpica/dbtest.c
index 3892680..8a54624 100644
--- a/drivers/acpi/acpica/dbtest.c
+++ b/drivers/acpi/acpica/dbtest.c
@@ -30,6 +30,8 @@
 static acpi_status
 acpi_db_test_string_type(struct acpi_namespace_node *node, u32 byte_length);
 
+static acpi_status acpi_db_test_package_type(struct acpi_namespace_node *node);
+
 static acpi_status
 acpi_db_read_from_object(struct acpi_namespace_node *node,
 			 acpi_object_type expected_type,
@@ -273,6 +275,11 @@
 		bit_length = byte_length * 8;
 		break;
 
+	case ACPI_TYPE_PACKAGE:
+
+		local_type = ACPI_TYPE_PACKAGE;
+		break;
+
 	case ACPI_TYPE_FIELD_UNIT:
 	case ACPI_TYPE_BUFFER_FIELD:
 	case ACPI_TYPE_LOCAL_REGION_FIELD:
@@ -305,6 +312,7 @@
 
 	acpi_os_printf("%14s: %4.4s",
 		       acpi_ut_get_type_name(node->type), node->name.ascii);
+
 	if (!obj_desc) {
 		acpi_os_printf(" Ignoring, no attached object\n");
 		return (AE_OK);
@@ -322,14 +330,13 @@
 		case ACPI_ADR_SPACE_SYSTEM_MEMORY:
 		case ACPI_ADR_SPACE_SYSTEM_IO:
 		case ACPI_ADR_SPACE_PCI_CONFIG:
-		case ACPI_ADR_SPACE_EC:
 
 			break;
 
 		default:
 
 			acpi_os_printf
-			    ("    %s space is not supported [%4.4s]\n",
+			    ("    %s space is not supported in this command [%4.4s]\n",
 			     acpi_ut_get_region_name(region_obj->region.
 						     space_id),
 			     region_obj->region.node->name.ascii);
@@ -359,6 +366,11 @@
 		status = acpi_db_test_buffer_type(node, bit_length);
 		break;
 
+	case ACPI_TYPE_PACKAGE:
+
+		status = acpi_db_test_package_type(node);
+		break;
+
 	default:
 
 		acpi_os_printf(" Ignoring, type not implemented (%2.2X)",
@@ -366,6 +378,13 @@
 		break;
 	}
 
+	/* Exit on error, but don't abort the namespace walk */
+
+	if (ACPI_FAILURE(status)) {
+		status = AE_OK;
+		goto exit;
+	}
+
 	switch (node->type) {
 	case ACPI_TYPE_LOCAL_REGION_FIELD:
 
@@ -373,12 +392,14 @@
 		acpi_os_printf(" (%s)",
 			       acpi_ut_get_region_name(region_obj->region.
 						       space_id));
+
 		break;
 
 	default:
 		break;
 	}
 
+exit:
 	acpi_os_printf("\n");
 	return (status);
 }
@@ -431,7 +452,6 @@
 	if (temp1->integer.value == value_to_write) {
 		value_to_write = 0;
 	}
-
 	/* Write a new value */
 
 	write_value.type = ACPI_TYPE_INTEGER;
@@ -708,6 +728,35 @@
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_db_test_package_type
+ *
+ * PARAMETERS:  node                - Parent NS node for the object
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Test read for a Package object.
+ *
+ ******************************************************************************/
+
+static acpi_status acpi_db_test_package_type(struct acpi_namespace_node *node)
+{
+	union acpi_object *temp1 = NULL;
+	acpi_status status;
+
+	/* Read the original value */
+
+	status = acpi_db_read_from_object(node, ACPI_TYPE_PACKAGE, &temp1);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	acpi_os_printf(" %8.8X Elements", temp1->package.count);
+	acpi_os_free(temp1);
+	return (status);
+}
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_db_read_from_object
  *
  * PARAMETERS:  node                - Parent NS node for the object
@@ -746,8 +795,8 @@
 	acpi_gbl_method_executing = TRUE;
 	status = acpi_evaluate_object(read_handle, NULL,
 				      &param_objects, &return_obj);
-	acpi_gbl_method_executing = FALSE;
 
+	acpi_gbl_method_executing = FALSE;
 	if (ACPI_FAILURE(status)) {
 		acpi_os_printf("Could not read from object, %s",
 			       acpi_format_exception(status));
@@ -760,6 +809,7 @@
 	case ACPI_TYPE_INTEGER:
 	case ACPI_TYPE_BUFFER:
 	case ACPI_TYPE_STRING:
+	case ACPI_TYPE_PACKAGE:
 		/*
 		 * Did we receive the type we wanted? Most important for the
 		 * Integer/Buffer case (when a field is larger than an Integer,
@@ -771,6 +821,7 @@
 			     acpi_ut_get_type_name(expected_type),
 			     acpi_ut_get_type_name(ret_value->type));
 
+			acpi_os_free(return_obj.pointer);
 			return (AE_TYPE);
 		}
 
diff --git a/drivers/acpi/acpica/dswscope.c b/drivers/acpi/acpica/dswscope.c
index d1422f9..7592176 100644
--- a/drivers/acpi/acpica/dswscope.c
+++ b/drivers/acpi/acpica/dswscope.c
@@ -115,7 +115,7 @@
 				      acpi_ut_get_type_name(old_scope_info->
 							    common.value)));
 	} else {
-		ACPI_DEBUG_PRINT_RAW((ACPI_DB_EXEC, "[\\___] (%s)", "ROOT"));
+		ACPI_DEBUG_PRINT_RAW((ACPI_DB_EXEC, ACPI_NAMESPACE_ROOT));
 	}
 
 	ACPI_DEBUG_PRINT_RAW((ACPI_DB_EXEC,
@@ -166,14 +166,14 @@
 
 	new_scope_info = walk_state->scope_info;
 	if (new_scope_info) {
-		ACPI_DEBUG_PRINT_RAW((ACPI_DB_EXEC,
-				      "[%4.4s] (%s)\n",
+		ACPI_DEBUG_PRINT_RAW((ACPI_DB_EXEC, "[%4.4s] (%s)\n",
 				      acpi_ut_get_node_name(new_scope_info->
 							    scope.node),
 				      acpi_ut_get_type_name(new_scope_info->
 							    common.value)));
 	} else {
-		ACPI_DEBUG_PRINT_RAW((ACPI_DB_EXEC, "[\\___] (ROOT)\n"));
+		ACPI_DEBUG_PRINT_RAW((ACPI_DB_EXEC, "%s\n",
+				      ACPI_NAMESPACE_ROOT));
 	}
 
 	acpi_ut_delete_generic_state(scope_info);
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index abbd590..e10fec9 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -634,6 +634,12 @@
 
 	flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
 
+	if (!gpe_event_info) {
+		gpe_event_info = acpi_ev_get_gpe_event_info(gpe_device, gpe_number);
+		if (!gpe_event_info)
+			goto error_exit;
+	}
+
 	/* Get the info block for the entire GPE register */
 
 	gpe_register_info = gpe_event_info->register_info;
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index c80e3bdf..b2d5f66 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -639,6 +639,28 @@
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_gispatch_gpe
+ *
+ * PARAMETERS:  gpe_device          - Parent GPE Device. NULL for GPE0/GPE1
+ *              gpe_number          - GPE level within the GPE block
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Detect and dispatch a General Purpose Event to either a function
+ *              (e.g. EC) or method (e.g. _Lxx/_Exx) handler.
+ *
+ ******************************************************************************/
+void acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number)
+{
+	ACPI_FUNCTION_TRACE(acpi_dispatch_gpe);
+
+	acpi_ev_detect_gpe(gpe_device, NULL, gpe_number);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_dispatch_gpe)
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_finish_gpe
  *
  * PARAMETERS:  gpe_device          - Namespace node for the GPE Block
diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
index 27a86ad..3de794b 100644
--- a/drivers/acpi/acpica/hwregs.c
+++ b/drivers/acpi/acpica/hwregs.c
@@ -390,14 +390,14 @@
 			  ACPI_BITMASK_ALL_FIXED_STATUS,
 			  ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
 
-	lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
+	lock_flags = acpi_os_acquire_raw_lock(acpi_gbl_hardware_lock);
 
 	/* Clear the fixed events in PM1 A/B */
 
 	status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
 					ACPI_BITMASK_ALL_FIXED_STATUS);
 
-	acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
+	acpi_os_release_raw_lock(acpi_gbl_hardware_lock, lock_flags);
 
 	if (ACPI_FAILURE(status)) {
 		goto exit;
diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
index 5d13968..6e39a77 100644
--- a/drivers/acpi/acpica/hwxface.c
+++ b/drivers/acpi/acpica/hwxface.c
@@ -227,7 +227,7 @@
 		return_ACPI_STATUS(AE_BAD_PARAMETER);
 	}
 
-	lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
+	lock_flags = acpi_os_acquire_raw_lock(acpi_gbl_hardware_lock);
 
 	/*
 	 * At this point, we know that the parent register is one of the
@@ -288,7 +288,7 @@
 
 unlock_and_exit:
 
-	acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
+	acpi_os_release_raw_lock(acpi_gbl_hardware_lock, lock_flags);
 	return_ACPI_STATUS(status);
 }
 
diff --git a/drivers/acpi/acpica/rsdump.c b/drivers/acpi/acpica/rsdump.c
index b12a0b1..6601e71 100644
--- a/drivers/acpi/acpica/rsdump.c
+++ b/drivers/acpi/acpica/rsdump.c
@@ -539,7 +539,7 @@
 
 static void acpi_rs_dump_byte_list(u16 length, u8 * data)
 {
-	u8 i;
+	u16 i;
 
 	for (i = 0; i < length; i++) {
 		acpi_os_printf("%25s%2.2X : %2.2X\n", "Byte", i, data[i]);
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index c5085b7..5f8e7b5 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -88,7 +88,7 @@
  * DESCRIPTION: This function is called to verify and install an ACPI table.
  *              When this function is called by "Load" or "LoadTable" opcodes,
  *              or by acpi_load_table() API, the "Reload" parameter is set.
- *              After sucessfully returning from this function, table is
+ *              After successfully returning from this function, table is
  *              "INSTALLED" but not "VALIDATED".
  *
  ******************************************************************************/
diff --git a/drivers/acpi/acpica/utbuffer.c b/drivers/acpi/acpica/utbuffer.c
index 148aeb8..fffa6f5 100644
--- a/drivers/acpi/acpica/utbuffer.c
+++ b/drivers/acpi/acpica/utbuffer.c
@@ -53,7 +53,7 @@
 
 		/* Print current offset */
 
-		acpi_os_printf("%6.4X: ", (base_offset + i));
+		acpi_os_printf("%8.4X: ", (base_offset + i));
 
 		/* Print 16 hex chars */
 
@@ -219,7 +219,7 @@
 
 		/* Print current offset */
 
-		fprintf(file, "%6.4X: ", (base_offset + i));
+		fprintf(file, "%8.4X: ", (base_offset + i));
 
 		/* Print 16 hex chars */
 
diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c
index d2d93e3..2e465e6 100644
--- a/drivers/acpi/acpica/utmutex.c
+++ b/drivers/acpi/acpica/utmutex.c
@@ -52,7 +52,7 @@
 		return_ACPI_STATUS (status);
 	}
 
-	status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
+	status = acpi_os_create_raw_lock(&acpi_gbl_hardware_lock);
 	if (ACPI_FAILURE (status)) {
 		return_ACPI_STATUS (status);
 	}
@@ -109,7 +109,7 @@
 	/* Delete the spinlocks */
 
 	acpi_os_delete_lock(acpi_gbl_gpe_lock);
-	acpi_os_delete_lock(acpi_gbl_hardware_lock);
+	acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
 	acpi_os_delete_lock(acpi_gbl_reference_count_lock);
 
 	/* Delete the reader/writer lock */
diff --git a/drivers/acpi/acpica/utobject.c b/drivers/acpi/acpica/utobject.c
index 5b78fe0..ae6d8cc 100644
--- a/drivers/acpi/acpica/utobject.c
+++ b/drivers/acpi/acpica/utobject.c
@@ -8,6 +8,7 @@
  *****************************************************************************/
 
 #include <acpi/acpi.h>
+#include <linux/kmemleak.h>
 #include "accommon.h"
 #include "acnamesp.h"
 
@@ -70,6 +71,7 @@
 	if (!object) {
 		return_PTR(NULL);
 	}
+	kmemleak_not_leak(object);
 
 	switch (type) {
 	case ACPI_TYPE_REGION:
diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c
index 35ffd8d..a98c334 100644
--- a/drivers/acpi/acpica/utprint.c
+++ b/drivers/acpi/acpica/utprint.c
@@ -470,6 +470,7 @@
 		case 'X':
 
 			type |= ACPI_FORMAT_UPPER;
+			/* FALLTHROUGH */
 
 		case 'x':
 
diff --git a/drivers/acpi/acpica/utstring.c b/drivers/acpi/acpica/utstring.c
index bd57a77..5bef0b0 100644
--- a/drivers/acpi/acpica/utstring.c
+++ b/drivers/acpi/acpica/utstring.c
@@ -141,7 +141,7 @@
 	 * Special case for the root node. This can happen if we get an
 	 * error during the execution of module-level code.
 	 */
-	if (ACPI_COMPARE_NAME(name, "\\___")) {
+	if (ACPI_COMPARE_NAME(name, ACPI_ROOT_PATHNAME)) {
 		return;
 	}
 
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 1efefe9..02c6fd9 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -481,7 +481,7 @@
 		if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
 			struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
 
-			ghes_edac_report_mem_error(ghes, sev, mem_err);
+			ghes_edac_report_mem_error(sev, mem_err);
 
 			arch_apei_report_mem_error(sev, mem_err);
 			ghes_handle_memory_failure(gdata, sev);
@@ -1087,10 +1087,6 @@
 		goto err;
 	}
 
-	rc = ghes_edac_register(ghes, &ghes_dev->dev);
-	if (rc < 0)
-		goto err;
-
 	switch (generic->notify.type) {
 	case ACPI_HEST_NOTIFY_POLLED:
 		timer_setup(&ghes->timer, ghes_poll_func, TIMER_DEFERRABLE);
@@ -1102,14 +1098,14 @@
 		if (rc) {
 			pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
 			       generic->header.source_id);
-			goto err_edac_unreg;
+			goto err;
 		}
 		rc = request_irq(ghes->irq, ghes_irq_func, IRQF_SHARED,
 				 "GHES IRQ", ghes);
 		if (rc) {
 			pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
 			       generic->header.source_id);
-			goto err_edac_unreg;
+			goto err;
 		}
 		break;
 
@@ -1132,14 +1128,16 @@
 	default:
 		BUG();
 	}
+
 	platform_set_drvdata(ghes_dev, ghes);
 
+	ghes_edac_register(ghes, &ghes_dev->dev);
+
 	/* Handle any pending errors right away */
 	ghes_proc(ghes);
 
 	return 0;
-err_edac_unreg:
-	ghes_edac_unregister(ghes);
+
 err:
 	if (ghes) {
 		ghes_fini(ghes);
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index bdb24d6..b0113a5 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -74,6 +74,8 @@
 static bool battery_driver_registered;
 static int battery_bix_broken_package;
 static int battery_notification_delay_ms;
+static int battery_ac_is_broken;
+static int battery_check_pmic = 1;
 static unsigned int cache_time = 1000;
 module_param(cache_time, uint, 0644);
 MODULE_PARM_DESC(cache_time, "cache time in milliseconds");
@@ -81,14 +83,6 @@
 #ifdef CONFIG_ACPI_PROCFS_POWER
 extern struct proc_dir_entry *acpi_lock_battery_dir(void);
 extern void *acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir);
-
-enum acpi_battery_files {
-	info_tag = 0,
-	state_tag,
-	alarm_tag,
-	ACPI_BATTERY_NUMFILES,
-};
-
 #endif
 
 static const struct acpi_device_id battery_device_ids[] = {
@@ -215,6 +209,20 @@
 		battery->full_charge_capacity < battery->design_capacity;
 }
 
+static int acpi_battery_handle_discharging(struct acpi_battery *battery)
+{
+	/*
+	 * Some devices wrongly report discharging if the battery's charge level
+	 * was above the device's start charging threshold atm the AC adapter
+	 * was plugged in and the device thus did not start a new charge cycle.
+	 */
+	if ((battery_ac_is_broken || power_supply_is_system_supplied()) &&
+	    battery->rate_now == 0)
+		return POWER_SUPPLY_STATUS_NOT_CHARGING;
+
+	return POWER_SUPPLY_STATUS_DISCHARGING;
+}
+
 static int acpi_battery_get_property(struct power_supply *psy,
 				     enum power_supply_property psp,
 				     union power_supply_propval *val)
@@ -230,7 +238,7 @@
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
 		if (battery->state & ACPI_BATTERY_STATE_DISCHARGING)
-			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+			val->intval = acpi_battery_handle_discharging(battery);
 		else if (battery->state & ACPI_BATTERY_STATE_CHARGING)
 			val->intval = POWER_SUPPLY_STATUS_CHARGING;
 		else if (acpi_battery_is_charged(battery))
@@ -985,9 +993,10 @@
 		"mA" : "mW";
 }
 
-static int acpi_battery_print_info(struct seq_file *seq, int result)
+static int acpi_battery_info_proc_show(struct seq_file *seq, void *offset)
 {
 	struct acpi_battery *battery = seq->private;
+	int result = acpi_battery_update(battery, false);
 
 	if (result)
 		goto end;
@@ -1041,9 +1050,10 @@
 	return result;
 }
 
-static int acpi_battery_print_state(struct seq_file *seq, int result)
+static int acpi_battery_state_proc_show(struct seq_file *seq, void *offset)
 {
 	struct acpi_battery *battery = seq->private;
+	int result = acpi_battery_update(battery, false);
 
 	if (result)
 		goto end;
@@ -1088,9 +1098,10 @@
 	return result;
 }
 
-static int acpi_battery_print_alarm(struct seq_file *seq, int result)
+static int acpi_battery_alarm_proc_show(struct seq_file *seq, void *offset)
 {
 	struct acpi_battery *battery = seq->private;
+	int result = acpi_battery_update(battery, false);
 
 	if (result)
 		goto end;
@@ -1142,82 +1153,22 @@
 	return result;
 }
 
-typedef int(*print_func)(struct seq_file *seq, int result);
-
-static print_func acpi_print_funcs[ACPI_BATTERY_NUMFILES] = {
-	acpi_battery_print_info,
-	acpi_battery_print_state,
-	acpi_battery_print_alarm,
-};
-
-static int acpi_battery_read(int fid, struct seq_file *seq)
+static int acpi_battery_alarm_proc_open(struct inode *inode, struct file *file)
 {
-	struct acpi_battery *battery = seq->private;
-	int result = acpi_battery_update(battery, false);
-	return acpi_print_funcs[fid](seq, result);
+	return single_open(file, acpi_battery_alarm_proc_show, PDE_DATA(inode));
 }
 
-#define DECLARE_FILE_FUNCTIONS(_name) \
-static int acpi_battery_read_##_name(struct seq_file *seq, void *offset) \
-{ \
-	return acpi_battery_read(_name##_tag, seq); \
-} \
-static int acpi_battery_##_name##_open_fs(struct inode *inode, struct file *file) \
-{ \
-	return single_open(file, acpi_battery_read_##_name, PDE_DATA(inode)); \
-}
-
-DECLARE_FILE_FUNCTIONS(info);
-DECLARE_FILE_FUNCTIONS(state);
-DECLARE_FILE_FUNCTIONS(alarm);
-
-#undef DECLARE_FILE_FUNCTIONS
-
-#define FILE_DESCRIPTION_RO(_name) \
-	{ \
-	.name = __stringify(_name), \
-	.mode = S_IRUGO, \
-	.ops = { \
-		.open = acpi_battery_##_name##_open_fs, \
-		.read = seq_read, \
-		.llseek = seq_lseek, \
-		.release = single_release, \
-		.owner = THIS_MODULE, \
-		}, \
-	}
-
-#define FILE_DESCRIPTION_RW(_name) \
-	{ \
-	.name = __stringify(_name), \
-	.mode = S_IFREG | S_IRUGO | S_IWUSR, \
-	.ops = { \
-		.open = acpi_battery_##_name##_open_fs, \
-		.read = seq_read, \
-		.llseek = seq_lseek, \
-		.write = acpi_battery_write_##_name, \
-		.release = single_release, \
-		.owner = THIS_MODULE, \
-		}, \
-	}
-
-static const struct battery_file {
-	struct file_operations ops;
-	umode_t mode;
-	const char *name;
-} acpi_battery_file[] = {
-	FILE_DESCRIPTION_RO(info),
-	FILE_DESCRIPTION_RO(state),
-	FILE_DESCRIPTION_RW(alarm),
+static const struct file_operations acpi_battery_alarm_fops = {
+	.owner		= THIS_MODULE,
+	.open		= acpi_battery_alarm_proc_open,
+	.read		= seq_read,
+	.write		= acpi_battery_write_alarm,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
 
-#undef FILE_DESCRIPTION_RO
-#undef FILE_DESCRIPTION_RW
-
 static int acpi_battery_add_fs(struct acpi_device *device)
 {
-	struct proc_dir_entry *entry = NULL;
-	int i;
-
 	printk(KERN_WARNING PREFIX "Deprecated procfs I/F for battery is loaded,"
 			" please retry with CONFIG_ACPI_PROCFS_POWER cleared\n");
 	if (!acpi_device_dir(device)) {
@@ -1227,28 +1178,24 @@
 			return -ENODEV;
 	}
 
-	for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) {
-		entry = proc_create_data(acpi_battery_file[i].name,
-					 acpi_battery_file[i].mode,
-					 acpi_device_dir(device),
-					 &acpi_battery_file[i].ops,
-					 acpi_driver_data(device));
-		if (!entry)
-			return -ENODEV;
-	}
+	if (!proc_create_single_data("info", S_IRUGO, acpi_device_dir(device),
+			acpi_battery_info_proc_show, acpi_driver_data(device)))
+		return -ENODEV;
+	if (!proc_create_single_data("state", S_IRUGO, acpi_device_dir(device),
+			acpi_battery_state_proc_show, acpi_driver_data(device)))
+		return -ENODEV;
+	if (!proc_create_data("alarm", S_IFREG | S_IRUGO | S_IWUSR,
+			acpi_device_dir(device), &acpi_battery_alarm_fops,
+			acpi_driver_data(device)))
+		return -ENODEV;
 	return 0;
 }
 
 static void acpi_battery_remove_fs(struct acpi_device *device)
 {
-	int i;
 	if (!acpi_device_dir(device))
 		return;
-	for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i)
-		remove_proc_entry(acpi_battery_file[i].name,
-				  acpi_device_dir(device));
-
-	remove_proc_entry(acpi_device_bid(device), acpi_battery_dir);
+	remove_proc_subtree(acpi_device_bid(device), acpi_battery_dir);
 	acpi_device_dir(device) = NULL;
 }
 
@@ -1332,23 +1279,64 @@
 	return 0;
 }
 
+static int __init
+battery_ac_is_broken_quirk(const struct dmi_system_id *d)
+{
+	battery_ac_is_broken = 1;
+	return 0;
+}
+
+static int __init
+battery_do_not_check_pmic_quirk(const struct dmi_system_id *d)
+{
+	battery_check_pmic = 0;
+	return 0;
+}
+
 static const struct dmi_system_id bat_dmi_table[] __initconst = {
 	{
+		/* NEC LZ750/LS */
 		.callback = battery_bix_broken_package_quirk,
-		.ident = "NEC LZ750/LS",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "NEC"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "PC-LZ750LS"),
 		},
 	},
 	{
+		/* Acer Aspire V5-573G */
 		.callback = battery_notification_delay_quirk,
-		.ident = "Acer Aspire V5-573G",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "Aspire V5-573G"),
 		},
 	},
+	{
+		/* Point of View mobii wintab p800w */
+		.callback = battery_ac_is_broken_quirk,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
+			DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"),
+			DMI_MATCH(DMI_BIOS_VERSION, "3BAIR1013"),
+			/* Above matches are too generic, add bios-date match */
+			DMI_MATCH(DMI_BIOS_DATE, "08/22/2014"),
+		},
+	},
+	{
+		/* ECS EF20EA */
+		.callback = battery_do_not_check_pmic_quirk,
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "EF20EA"),
+		},
+	},
+	{
+		/* Lenovo Ideapad Miix 320 */
+		.callback = battery_do_not_check_pmic_quirk,
+		.matches = {
+		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "80XF"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"),
+		},
+	},
 	{},
 };
 
@@ -1488,16 +1476,18 @@
 	unsigned int i;
 	int result;
 
-	for (i = 0; i < ARRAY_SIZE(acpi_battery_blacklist); i++)
-		if (acpi_dev_present(acpi_battery_blacklist[i], "1", -1)) {
-			pr_info(PREFIX ACPI_BATTERY_DEVICE_NAME
-				": found native %s PMIC, not loading\n",
-				acpi_battery_blacklist[i]);
-			return;
-		}
-
 	dmi_check_system(bat_dmi_table);
 
+	if (battery_check_pmic) {
+		for (i = 0; i < ARRAY_SIZE(acpi_battery_blacklist); i++)
+			if (acpi_dev_present(acpi_battery_blacklist[i], "1", -1)) {
+				pr_info(PREFIX ACPI_BATTERY_DEVICE_NAME
+					": found native %s PMIC, not loading\n",
+					acpi_battery_blacklist[i]);
+				return;
+			}
+	}
+
 #ifdef CONFIG_ACPI_PROCFS_POWER
 	acpi_battery_dir = acpi_lock_battery_dir();
 	if (!acpi_battery_dir)
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index f1cc4f9..2345a5e 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -263,19 +263,6 @@
 	return 0;
 }
 
-static int acpi_button_state_open_fs(struct inode *inode, struct file *file)
-{
-	return single_open(file, acpi_button_state_seq_show, PDE_DATA(inode));
-}
-
-static const struct file_operations acpi_button_state_fops = {
-	.owner = THIS_MODULE,
-	.open = acpi_button_state_open_fs,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
 static int acpi_button_add_fs(struct acpi_device *device)
 {
 	struct acpi_button *button = acpi_driver_data(device);
@@ -311,9 +298,9 @@
 	}
 
 	/* create /proc/acpi/button/lid/LID/state */
-	entry = proc_create_data(ACPI_BUTTON_FILE_STATE,
-				 S_IRUGO, acpi_device_dir(device),
-				 &acpi_button_state_fops, device);
+	entry = proc_create_single_data(ACPI_BUTTON_FILE_STATE, S_IRUGO,
+			acpi_device_dir(device), acpi_button_state_seq_show,
+			device);
 	if (!entry) {
 		ret = -ENODEV;
 		goto remove_dev_dir;
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 735c74a..d9ce4b1 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -39,6 +39,7 @@
 
 #include <linux/cpufreq.h>
 #include <linux/delay.h>
+#include <linux/iopoll.h>
 #include <linux/ktime.h>
 #include <linux/rwsem.h>
 #include <linux/wait.h>
@@ -49,7 +50,7 @@
 	struct mbox_chan *pcc_channel;
 	void __iomem *pcc_comm_addr;
 	bool pcc_channel_acquired;
-	ktime_t deadline;
+	unsigned int deadline_us;
 	unsigned int pcc_mpar, pcc_mrtt, pcc_nominal;
 
 	bool pending_pcc_write_cmd;	/* Any pending/batched PCC write cmds? */
@@ -156,6 +157,9 @@
 show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_perf);
 show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_perf);
 show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_nonlinear_perf);
+show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, lowest_freq);
+show_cppc_data(cppc_get_perf_caps, cppc_perf_caps, nominal_freq);
+
 show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, reference_perf);
 show_cppc_data(cppc_get_perf_ctrs, cppc_perf_fb_ctrs, wraparound_time);
 
@@ -183,6 +187,8 @@
 	&lowest_perf.attr,
 	&lowest_nonlinear_perf.attr,
 	&nominal_perf.attr,
+	&nominal_freq.attr,
+	&lowest_freq.attr,
 	NULL
 };
 
@@ -193,42 +199,31 @@
 
 static int check_pcc_chan(int pcc_ss_id, bool chk_err_bit)
 {
-	int ret = -EIO, status = 0;
+	int ret, status;
 	struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
 	struct acpi_pcct_shared_memory __iomem *generic_comm_base =
 		pcc_ss_data->pcc_comm_addr;
-	ktime_t next_deadline = ktime_add(ktime_get(),
-					  pcc_ss_data->deadline);
 
 	if (!pcc_ss_data->platform_owns_pcc)
 		return 0;
 
-	/* Retry in case the remote processor was too slow to catch up. */
-	while (!ktime_after(ktime_get(), next_deadline)) {
-		/*
-		 * Per spec, prior to boot the PCC space wil be initialized by
-		 * platform and should have set the command completion bit when
-		 * PCC can be used by OSPM
-		 */
-		status = readw_relaxed(&generic_comm_base->status);
-		if (status & PCC_CMD_COMPLETE_MASK) {
-			ret = 0;
-			if (chk_err_bit && (status & PCC_ERROR_MASK))
-				ret = -EIO;
-			break;
-		}
-		/*
-		 * Reducing the bus traffic in case this loop takes longer than
-		 * a few retries.
-		 */
-		udelay(3);
+	/*
+	 * Poll PCC status register every 3us(delay_us) for maximum of
+	 * deadline_us(timeout_us) until PCC command complete bit is set(cond)
+	 */
+	ret = readw_relaxed_poll_timeout(&generic_comm_base->status, status,
+					status & PCC_CMD_COMPLETE_MASK, 3,
+					pcc_ss_data->deadline_us);
+
+	if (likely(!ret)) {
+		pcc_ss_data->platform_owns_pcc = false;
+		if (chk_err_bit && (status & PCC_ERROR_MASK))
+			ret = -EIO;
 	}
 
-	if (likely(!ret))
-		pcc_ss_data->platform_owns_pcc = false;
-	else
-		pr_err("PCC check channel failed for ss: %d. Status=%x\n",
-		       pcc_ss_id, status);
+	if (unlikely(ret))
+		pr_err("PCC check channel failed for ss: %d. ret=%d\n",
+		       pcc_ss_id, ret);
 
 	return ret;
 }
@@ -580,7 +575,7 @@
 		 * So add an arbitrary amount of wait on top of Nominal.
 		 */
 		usecs_lat = NUM_RETRIES * cppc_ss->latency;
-		pcc_data[pcc_ss_idx]->deadline = ns_to_ktime(usecs_lat * NSEC_PER_USEC);
+		pcc_data[pcc_ss_idx]->deadline_us = usecs_lat;
 		pcc_data[pcc_ss_idx]->pcc_mrtt = cppc_ss->min_turnaround_time;
 		pcc_data[pcc_ss_idx]->pcc_mpar = cppc_ss->max_access_rate;
 		pcc_data[pcc_ss_idx]->pcc_nominal = cppc_ss->latency;
@@ -613,7 +608,6 @@
 	return false;
 }
 
-
 /**
  * pcc_data_alloc() - Allocate the pcc_data memory for pcc subspace
  *
@@ -641,6 +635,34 @@
 
 	return 0;
 }
+
+/* Check if CPPC revision + num_ent combination is supported */
+static bool is_cppc_supported(int revision, int num_ent)
+{
+	int expected_num_ent;
+
+	switch (revision) {
+	case CPPC_V2_REV:
+		expected_num_ent = CPPC_V2_NUM_ENT;
+		break;
+	case CPPC_V3_REV:
+		expected_num_ent = CPPC_V3_NUM_ENT;
+		break;
+	default:
+		pr_debug("Firmware exports unsupported CPPC revision: %d\n",
+			revision);
+		return false;
+	}
+
+	if (expected_num_ent != num_ent) {
+		pr_debug("Firmware exports %d entries. Expected: %d for CPPC rev:%d\n",
+			num_ent, expected_num_ent, revision);
+		return false;
+	}
+
+	return true;
+}
+
 /*
  * An example CPC table looks like the following.
  *
@@ -731,14 +753,6 @@
 				cpc_obj->type);
 		goto out_free;
 	}
-
-	/* Only support CPPCv2. Bail otherwise. */
-	if (num_ent != CPPC_NUM_ENT) {
-		pr_debug("Firmware exports %d entries. Expected: %d\n",
-				num_ent, CPPC_NUM_ENT);
-		goto out_free;
-	}
-
 	cpc_ptr->num_entries = num_ent;
 
 	/* Second entry should be revision. */
@@ -750,12 +764,10 @@
 				cpc_obj->type);
 		goto out_free;
 	}
+	cpc_ptr->version = cpc_rev;
 
-	if (cpc_rev != CPPC_REV) {
-		pr_debug("Firmware exports revision:%d. Expected:%d\n",
-				cpc_rev, CPPC_REV);
+	if (!is_cppc_supported(cpc_rev, num_ent))
 		goto out_free;
-	}
 
 	/* Iterate through remaining entries in _CPC */
 	for (i = 2; i < num_ent; i++) {
@@ -808,6 +820,18 @@
 		}
 	}
 	per_cpu(cpu_pcc_subspace_idx, pr->id) = pcc_subspace_id;
+
+	/*
+	 * Initialize the remaining cpc_regs as unsupported.
+	 * Example: In case FW exposes CPPC v2, the below loop will initialize
+	 * LOWEST_FREQ and NOMINAL_FREQ regs as unsupported
+	 */
+	for (i = num_ent - 2; i < MAX_CPC_REG_ENT; i++) {
+		cpc_ptr->cpc_regs[i].type = ACPI_TYPE_INTEGER;
+		cpc_ptr->cpc_regs[i].cpc_entry.int_value = 0;
+	}
+
+
 	/* Store CPU Logical ID */
 	cpc_ptr->cpu_id = pr->id;
 
@@ -1037,26 +1061,34 @@
 {
 	struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
 	struct cpc_register_resource *highest_reg, *lowest_reg,
-		*lowest_non_linear_reg, *nominal_reg;
-	u64 high, low, nom, min_nonlinear;
+		*lowest_non_linear_reg, *nominal_reg,
+		*low_freq_reg = NULL, *nom_freq_reg = NULL;
+	u64 high, low, nom, min_nonlinear, low_f = 0, nom_f = 0;
 	int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
-	struct cppc_pcc_data *pcc_ss_data;
+	struct cppc_pcc_data *pcc_ss_data = NULL;
 	int ret = 0, regs_in_pcc = 0;
 
-	if (!cpc_desc || pcc_ss_id < 0) {
+	if (!cpc_desc) {
 		pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
 		return -ENODEV;
 	}
 
-	pcc_ss_data = pcc_data[pcc_ss_id];
 	highest_reg = &cpc_desc->cpc_regs[HIGHEST_PERF];
 	lowest_reg = &cpc_desc->cpc_regs[LOWEST_PERF];
 	lowest_non_linear_reg = &cpc_desc->cpc_regs[LOW_NON_LINEAR_PERF];
 	nominal_reg = &cpc_desc->cpc_regs[NOMINAL_PERF];
+	low_freq_reg = &cpc_desc->cpc_regs[LOWEST_FREQ];
+	nom_freq_reg = &cpc_desc->cpc_regs[NOMINAL_FREQ];
 
 	/* Are any of the regs PCC ?*/
 	if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) ||
-		CPC_IN_PCC(lowest_non_linear_reg) || CPC_IN_PCC(nominal_reg)) {
+		CPC_IN_PCC(lowest_non_linear_reg) || CPC_IN_PCC(nominal_reg) ||
+		CPC_IN_PCC(low_freq_reg) || CPC_IN_PCC(nom_freq_reg)) {
+		if (pcc_ss_id < 0) {
+			pr_debug("Invalid pcc_ss_id\n");
+			return -ENODEV;
+		}
+		pcc_ss_data = pcc_data[pcc_ss_id];
 		regs_in_pcc = 1;
 		down_write(&pcc_ss_data->pcc_lock);
 		/* Ring doorbell once to update PCC subspace */
@@ -1081,6 +1113,17 @@
 	if (!high || !low || !nom || !min_nonlinear)
 		ret = -EFAULT;
 
+	/* Read optional lowest and nominal frequencies if present */
+	if (CPC_SUPPORTED(low_freq_reg))
+		cpc_read(cpunum, low_freq_reg, &low_f);
+
+	if (CPC_SUPPORTED(nom_freq_reg))
+		cpc_read(cpunum, nom_freq_reg, &nom_f);
+
+	perf_caps->lowest_freq = low_f;
+	perf_caps->nominal_freq = nom_f;
+
+
 out_err:
 	if (regs_in_pcc)
 		up_write(&pcc_ss_data->pcc_lock);
@@ -1101,16 +1144,15 @@
 	struct cpc_register_resource *delivered_reg, *reference_reg,
 		*ref_perf_reg, *ctr_wrap_reg;
 	int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
-	struct cppc_pcc_data *pcc_ss_data;
+	struct cppc_pcc_data *pcc_ss_data = NULL;
 	u64 delivered, reference, ref_perf, ctr_wrap_time;
 	int ret = 0, regs_in_pcc = 0;
 
-	if (!cpc_desc || pcc_ss_id < 0) {
+	if (!cpc_desc) {
 		pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
 		return -ENODEV;
 	}
 
-	pcc_ss_data = pcc_data[pcc_ss_id];
 	delivered_reg = &cpc_desc->cpc_regs[DELIVERED_CTR];
 	reference_reg = &cpc_desc->cpc_regs[REFERENCE_CTR];
 	ref_perf_reg = &cpc_desc->cpc_regs[REFERENCE_PERF];
@@ -1126,6 +1168,11 @@
 	/* Are any of the regs PCC ?*/
 	if (CPC_IN_PCC(delivered_reg) || CPC_IN_PCC(reference_reg) ||
 		CPC_IN_PCC(ctr_wrap_reg) || CPC_IN_PCC(ref_perf_reg)) {
+		if (pcc_ss_id < 0) {
+			pr_debug("Invalid pcc_ss_id\n");
+			return -ENODEV;
+		}
+		pcc_ss_data = pcc_data[pcc_ss_id];
 		down_write(&pcc_ss_data->pcc_lock);
 		regs_in_pcc = 1;
 		/* Ring doorbell once to update PCC subspace */
@@ -1176,15 +1223,14 @@
 	struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
 	struct cpc_register_resource *desired_reg;
 	int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
-	struct cppc_pcc_data *pcc_ss_data;
+	struct cppc_pcc_data *pcc_ss_data = NULL;
 	int ret = 0;
 
-	if (!cpc_desc || pcc_ss_id < 0) {
+	if (!cpc_desc) {
 		pr_debug("No CPC descriptor for CPU:%d\n", cpu);
 		return -ENODEV;
 	}
 
-	pcc_ss_data = pcc_data[pcc_ss_id];
 	desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
 
 	/*
@@ -1195,6 +1241,11 @@
 	 * achieve that goal here
 	 */
 	if (CPC_IN_PCC(desired_reg)) {
+		if (pcc_ss_id < 0) {
+			pr_debug("Invalid pcc_ss_id\n");
+			return -ENODEV;
+		}
+		pcc_ss_data = pcc_data[pcc_ss_id];
 		down_read(&pcc_ss_data->pcc_lock); /* BEGIN Phase-I */
 		if (pcc_ss_data->platform_owns_pcc) {
 			ret = check_pcc_chan(pcc_ss_id, false);
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 3d96e4d..a7c2673 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1257,10 +1257,7 @@
 	struct acpi_device *adev = ACPI_COMPANION(dev);
 
 	if (!adev)
-		return -ENODEV;
-
-	if (dev->pm_domain)
-		return -EEXIST;
+		return 0;
 
 	/*
 	 * Only attach the power domain to the first device if the
@@ -1268,7 +1265,7 @@
 	 * management twice.
 	 */
 	if (!acpi_device_is_first_physical_node(adev, dev))
-		return -EBUSY;
+		return 0;
 
 	acpi_add_pm_notifier(adev, dev, acpi_pm_notify_work_func);
 	dev_pm_domain_set(dev, &acpi_general_pm_domain);
@@ -1278,7 +1275,7 @@
 	}
 
 	dev->pm_domain->detach = acpi_dev_pm_detach;
-	return 0;
+	return 1;
 }
 EXPORT_SYMBOL_GPL(acpi_dev_pm_attach);
 #endif /* CONFIG_PM */
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 30a5729..bb94cf0 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1034,6 +1034,12 @@
 		acpi_ec_start(first_ec, true);
 }
 
+void acpi_ec_dispatch_gpe(void)
+{
+	if (first_ec)
+		acpi_dispatch_gpe(NULL, first_ec->gpe);
+}
+
 /* --------------------------------------------------------------------------
                                 Event Management
    -------------------------------------------------------------------------- */
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 1d0a501..530a3f6 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -188,6 +188,7 @@
 int acpi_ec_dsdt_probe(void);
 void acpi_ec_block_transactions(void);
 void acpi_ec_unblock_transactions(void);
+void acpi_ec_dispatch_gpe(void);
 int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
 			      acpi_handle handle, acpi_ec_query_func func,
 			      void *data);
diff --git a/drivers/acpi/reboot.c b/drivers/acpi/reboot.c
index 71769fd..6fa9c2a 100644
--- a/drivers/acpi/reboot.c
+++ b/drivers/acpi/reboot.c
@@ -8,8 +8,8 @@
 {
 	struct acpi_generic_address *rr;
 	struct pci_bus *bus0;
-	u8 reset_value;
 	unsigned int devfn;
+	u8 reset_value;
 
 	if (acpi_disabled)
 		return;
@@ -40,7 +40,7 @@
 		/* Form PCI device/function pair. */
 		devfn = PCI_DEVFN((rr->address >> 32) & 0xffff,
 				  (rr->address >> 16) & 0xffff);
-		printk(KERN_DEBUG "Resetting with ACPI PCI RESET_REG.");
+		printk(KERN_DEBUG "Resetting with ACPI PCI RESET_REG.\n");
 		/* Write the value that resets us. */
 		pci_bus_write_config_byte(bus0, devfn,
 				(rr->address & 0xffff), reset_value);
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 974e584..5d0486f 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -989,6 +989,13 @@
 	    !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) {
 		pm_system_cancel_wakeup();
 		s2idle_wakeup = true;
+		/*
+		 * On some platforms with the LPS0 _DSM device noirq resume
+		 * takes too much time for EC wakeup events to survive, so look
+		 * for them now.
+		 */
+		if (lps0_device_handle)
+			acpi_ec_dispatch_gpe();
 	}
 }
 
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 849c4fb..4a3410a 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -222,7 +222,7 @@
  * acpi_parse_entries_array - for each proc_num find a suitable subtable
  *
  * @id: table id (for debugging purposes)
- * @table_size: single entry size
+ * @table_size: size of the root table
  * @table_header: where does the table start?
  * @proc: array of acpi_subtable_proc struct containing entry id
  *        and associated handler with it
@@ -233,6 +233,11 @@
  * on it. Assumption is that there's only single handler for particular
  * entry id.
  *
+ * The table_size is not the size of the complete ACPI table (the length
+ * field in the header struct), but only the size of the root table; i.e.,
+ * the offset from the very first byte of the complete ACPI table, to the
+ * first byte of the very first subtable.
+ *
  * On success returns sum of all matching entries for all proc handlers.
  * Otherwise, -ENODEV or -EINVAL is returned.
  */
@@ -400,7 +405,7 @@
 		return -ENODEV;
 }
 
-/* 
+/*
  * The BIOS is supposed to supply a single APIC/MADT,
  * but some report two.  Provide a knob to use either.
  * (don't you wish instance 0 and 1 were not the same?)
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 4a3ac31..41b7064 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -20,6 +20,7 @@
 #include <linux/sizes.h>
 #include <linux/limits.h>
 #include <linux/clk/clk-conf.h>
+#include <linux/platform_device.h>
 
 #include <asm/irq.h>
 
@@ -101,8 +102,8 @@
 	if (strlen(driver_override)) {
 		dev->driver_override = driver_override;
 	} else {
-	       kfree(driver_override);
-	       dev->driver_override = NULL;
+		kfree(driver_override);
+		dev->driver_override = NULL;
 	}
 	device_unlock(_dev);
 
@@ -193,15 +194,18 @@
 /*
  * Primecells are part of the Advanced Microcontroller Bus Architecture,
  * so we call the bus "amba".
+ * DMA configuration for platform and AMBA bus is same. So here we reuse
+ * platform's DMA config routine.
  */
 struct bus_type amba_bustype = {
 	.name		= "amba",
 	.dev_groups	= amba_dev_groups,
 	.match		= amba_match,
 	.uevent		= amba_uevent,
+	.dma_configure	= platform_dma_configure,
 	.pm		= &amba_pm,
-	.force_dma	= true,
 };
+EXPORT_SYMBOL_GPL(amba_bustype);
 
 static int __init amba_init(void)
 {
@@ -248,7 +252,7 @@
 			break;
 
 		ret = dev_pm_domain_attach(dev, true);
-		if (ret == -EPROBE_DEFER)
+		if (ret)
 			break;
 
 		ret = amba_get_enable_pclk(pcdev);
@@ -375,7 +379,7 @@
 	}
 
 	ret = dev_pm_domain_attach(&dev->dev, true);
-	if (ret == -EPROBE_DEFER) {
+	if (ret) {
 		iounmap(tmp);
 		goto err_release;
 	}
diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
index 7dce379..ee4880b 100644
--- a/drivers/android/Kconfig
+++ b/drivers/android/Kconfig
@@ -10,7 +10,7 @@
 
 config ANDROID_BINDER_IPC
 	bool "Android Binder IPC Driver"
-	depends on MMU
+	depends on MMU && !M68K
 	default n
 	---help---
 	  Binder is used in Android for both communication between processes,
@@ -32,19 +32,6 @@
 	  created. Each binder device has its own context manager, and is
 	  therefore logically separated from the other devices.
 
-config ANDROID_BINDER_IPC_32BIT
-	bool "Use old (Android 4.4 and earlier) 32-bit binder API"
-	depends on !64BIT && ANDROID_BINDER_IPC
-	default y
-	---help---
-	  The Binder API has been changed to support both 32 and 64bit
-	  applications in a mixed environment.
-
-	  Enable this to support an old 32-bit Android user-space (v4.4 and
-	  earlier).
-
-	  Note that enabling this will break newer Android user-space.
-
 config ANDROID_BINDER_IPC_SELFTEST
 	bool "Android Binder IPC Driver Selftest"
 	depends on ANDROID_BINDER_IPC
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index e578eee..95283f3 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -72,10 +72,6 @@
 #include <linux/security.h>
 #include <linux/spinlock.h>
 
-#ifdef CONFIG_ANDROID_BINDER_IPC_32BIT
-#define BINDER_IPC_32BIT 1
-#endif
-
 #include <uapi/linux/android/binder.h>
 #include "binder_alloc.h"
 #include "binder_trace.h"
@@ -2058,8 +2054,8 @@
 	struct binder_object_header *hdr;
 	size_t object_size = 0;
 
-	if (offset > buffer->data_size - sizeof(*hdr) ||
-	    buffer->data_size < sizeof(*hdr) ||
+	if (buffer->data_size < sizeof(*hdr) ||
+	    offset > buffer->data_size - sizeof(*hdr) ||
 	    !IS_ALIGNED(offset, sizeof(u32)))
 		return 0;
 
@@ -3925,10 +3921,11 @@
 			binder_inner_proc_unlock(proc);
 			if (put_user(e->cmd, (uint32_t __user *)ptr))
 				return -EFAULT;
+			cmd = e->cmd;
 			e->cmd = BR_OK;
 			ptr += sizeof(uint32_t);
 
-			binder_stat_br(proc, thread, e->cmd);
+			binder_stat_br(proc, thread, cmd);
 		} break;
 		case BINDER_WORK_TRANSACTION_COMPLETE: {
 			binder_inner_proc_unlock(proc);
@@ -4696,7 +4693,7 @@
 	binder_defer_work(proc, BINDER_DEFERRED_PUT_FILES);
 }
 
-static int binder_vm_fault(struct vm_fault *vmf)
+static vm_fault_t binder_vm_fault(struct vm_fault *vmf)
 {
 	return VM_FAULT_SIGBUS;
 }
@@ -4730,7 +4727,9 @@
 		failure_string = "bad vm_flags";
 		goto err_bad_arg;
 	}
-	vma->vm_flags = (vma->vm_flags | VM_DONTCOPY) & ~VM_MAYWRITE;
+	vma->vm_flags |= VM_DONTCOPY | VM_MIXEDMAP;
+	vma->vm_flags &= ~VM_MAYWRITE;
+
 	vma->vm_ops = &binder_vm_ops;
 	vma->vm_private_data = proc;
 
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 5a426c8..4f382d5 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -219,7 +219,7 @@
 		mm = alloc->vma_vm_mm;
 
 	if (mm) {
-		down_write(&mm->mmap_sem);
+		down_read(&mm->mmap_sem);
 		vma = alloc->vma;
 	}
 
@@ -288,7 +288,7 @@
 		/* vm_insert_page does not seem to increment the refcount */
 	}
 	if (mm) {
-		up_write(&mm->mmap_sem);
+		up_read(&mm->mmap_sem);
 		mmput(mm);
 	}
 	return 0;
@@ -321,7 +321,7 @@
 	}
 err_no_vma:
 	if (mm) {
-		up_write(&mm->mmap_sem);
+		up_read(&mm->mmap_sem);
 		mmput(mm);
 	}
 	return vma ? -ENOMEM : -ESRCH;
diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c
index 940ddbc..583e366 100644
--- a/drivers/ata/acard-ahci.c
+++ b/drivers/ata/acard-ahci.c
@@ -271,7 +271,7 @@
 	 * Fill in command table information.  First, the header,
 	 * a SATA Register - Host to Device command FIS.
 	 */
-	cmd_tbl = pp->cmd_tbl + qc->tag * AHCI_CMD_TBL_SZ;
+	cmd_tbl = pp->cmd_tbl + qc->hw_tag * AHCI_CMD_TBL_SZ;
 
 	ata_tf_to_fis(&qc->tf, qc->dev->link->pmp, 1, cmd_tbl);
 	if (is_atapi) {
@@ -294,7 +294,7 @@
 	if (is_atapi)
 		opts |= AHCI_CMD_ATAPI | AHCI_CMD_PREFETCH;
 
-	ahci_fill_cmd_slot(pp, qc->tag, opts);
+	ahci_fill_cmd_slot(pp, qc->hw_tag, opts);
 }
 
 static bool acard_ahci_qc_fill_rtf(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 824bd39..1609eba 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -390,7 +390,7 @@
  */
 #define AHCI_SHT(drv_name)						\
 	ATA_NCQ_SHT(drv_name),						\
-	.can_queue		= AHCI_MAX_CMDS - 1,			\
+	.can_queue		= AHCI_MAX_CMDS,			\
 	.sg_tablesize		= AHCI_MAX_SG,				\
 	.dma_boundary		= AHCI_DMA_BOUNDARY,			\
 	.shost_attrs		= ahci_shost_attrs,			\
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index e5d9097..965842a0 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -1649,7 +1649,7 @@
 	 * Fill in command table information.  First, the header,
 	 * a SATA Register - Host to Device command FIS.
 	 */
-	cmd_tbl = pp->cmd_tbl + qc->tag * AHCI_CMD_TBL_SZ;
+	cmd_tbl = pp->cmd_tbl + qc->hw_tag * AHCI_CMD_TBL_SZ;
 
 	ata_tf_to_fis(&qc->tf, qc->dev->link->pmp, 1, cmd_tbl);
 	if (is_atapi) {
@@ -1670,7 +1670,7 @@
 	if (is_atapi)
 		opts |= AHCI_CMD_ATAPI | AHCI_CMD_PREFETCH;
 
-	ahci_fill_cmd_slot(pp, qc->tag, opts);
+	ahci_fill_cmd_slot(pp, qc->hw_tag, opts);
 }
 
 static void ahci_fbs_dec_intr(struct ata_port *ap)
@@ -2006,7 +2006,7 @@
 	pp->active_link = qc->dev->link;
 
 	if (ata_is_ncq(qc->tf.protocol))
-		writel(1 << qc->tag, port_mmio + PORT_SCR_ACT);
+		writel(1 << qc->hw_tag, port_mmio + PORT_SCR_ACT);
 
 	if (pp->fbs_enabled && pp->fbs_last_dev != qc->dev->link->pmp) {
 		u32 fbs = readl(port_mmio + PORT_FBS);
@@ -2016,7 +2016,7 @@
 		pp->fbs_last_dev = qc->dev->link->pmp;
 	}
 
-	writel(1 << qc->tag, port_mmio + PORT_CMD_ISSUE);
+	writel(1 << qc->hw_tag, port_mmio + PORT_CMD_ISSUE);
 
 	ahci_sw_activity(qc->dev->link);
 
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 346b163..c41b9eea 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -759,7 +759,7 @@
 	tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
 	tf->flags |= tf_flags;
 
-	if (ata_ncq_enabled(dev) && likely(tag != ATA_TAG_INTERNAL)) {
+	if (ata_ncq_enabled(dev) && !ata_tag_internal(tag)) {
 		/* yay, NCQ */
 		if (!lba_48_ok(block, n_block))
 			return -ERANGE;
@@ -1570,8 +1570,9 @@
 	u8 command = tf->command;
 	int auto_timeout = 0;
 	struct ata_queued_cmd *qc;
-	unsigned int tag, preempted_tag;
-	u32 preempted_sactive, preempted_qc_active;
+	unsigned int preempted_tag;
+	u32 preempted_sactive;
+	u64 preempted_qc_active;
 	int preempted_nr_active_links;
 	DECLARE_COMPLETION_ONSTACK(wait);
 	unsigned long flags;
@@ -1587,20 +1588,10 @@
 	}
 
 	/* initialize internal qc */
+	qc = __ata_qc_from_tag(ap, ATA_TAG_INTERNAL);
 
-	/* XXX: Tag 0 is used for drivers with legacy EH as some
-	 * drivers choke if any other tag is given.  This breaks
-	 * ata_tag_internal() test for those drivers.  Don't use new
-	 * EH stuff without converting to it.
-	 */
-	if (ap->ops->error_handler)
-		tag = ATA_TAG_INTERNAL;
-	else
-		tag = 0;
-
-	qc = __ata_qc_from_tag(ap, tag);
-
-	qc->tag = tag;
+	qc->tag = ATA_TAG_INTERNAL;
+	qc->hw_tag = 0;
 	qc->scsicmd = NULL;
 	qc->ap = ap;
 	qc->dev = dev;
@@ -2295,7 +2286,7 @@
 		return 0;
 	}
 	if (ap->flags & ATA_FLAG_NCQ) {
-		hdepth = min(ap->scsi_host->can_queue, ATA_MAX_QUEUE - 1);
+		hdepth = min(ap->scsi_host->can_queue, ATA_MAX_QUEUE);
 		dev->flags |= ATA_DFLAG_NCQ;
 	}
 
@@ -3573,9 +3564,11 @@
 	DPRINTK("xfer_shift=%u, xfer_mode=0x%x\n",
 		dev->xfer_shift, (int)dev->xfer_mode);
 
-	ata_dev_info(dev, "configured for %s%s\n",
-		     ata_mode_string(ata_xfer_mode2mask(dev->xfer_mode)),
-		     dev_err_whine);
+	if (!(ehc->i.flags & ATA_EHI_QUIET) ||
+	    ehc->i.flags & ATA_EHI_DID_HARDRESET)
+		ata_dev_info(dev, "configured for %s%s\n",
+			     ata_mode_string(ata_xfer_mode2mask(dev->xfer_mode)),
+			     dev_err_whine);
 
 	return 0;
 
@@ -4557,9 +4550,6 @@
 	{ "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, },
 	{ "SAMSUNG SSD PM830 mSATA *",  "CXM13D1Q", ATA_HORKAGE_NOLPM, },
 
-	/* Sandisk devices which are known to not handle LPM well */
-	{ "SanDisk SD7UB3Q*G1001",	NULL,	ATA_HORKAGE_NOLPM, },
-
 	/* devices that don't properly handle queued TRIM commands */
 	{ "Micron_M500IT_*",		"MU01",	ATA_HORKAGE_NO_NCQ_TRIM |
 						ATA_HORKAGE_ZERO_AFTER_TRIM, },
@@ -5136,7 +5126,7 @@
 	}
 
 	qc = __ata_qc_from_tag(ap, tag);
-	qc->tag = tag;
+	qc->tag = qc->hw_tag = tag;
 	qc->scsicmd = NULL;
 	qc->ap = ap;
 	qc->dev = dev;
@@ -5166,7 +5156,7 @@
 
 	qc->flags = 0;
 	tag = qc->tag;
-	if (likely(ata_tag_valid(tag))) {
+	if (ata_tag_valid(tag)) {
 		qc->tag = ATA_TAG_POISON;
 		if (ap->flags & ATA_FLAG_SAS_HOST)
 			ata_sas_free_tag(tag, ap);
@@ -5188,7 +5178,7 @@
 
 	/* command should be marked inactive atomically with qc completion */
 	if (ata_is_ncq(qc->tf.protocol)) {
-		link->sactive &= ~(1 << qc->tag);
+		link->sactive &= ~(1 << qc->hw_tag);
 		if (!link->sactive)
 			ap->nr_active_links--;
 	} else {
@@ -5206,7 +5196,7 @@
 	 * is called. (when rc != 0 and atapi request sense is needed)
 	 */
 	qc->flags &= ~ATA_QCFLAG_ACTIVE;
-	ap->qc_active &= ~(1 << qc->tag);
+	ap->qc_active &= ~(1ULL << qc->tag);
 
 	/* call completion callback */
 	qc->complete_fn(qc);
@@ -5363,29 +5353,29 @@
  *	RETURNS:
  *	Number of completed commands on success, -errno otherwise.
  */
-int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active)
+int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active)
 {
 	int nr_done = 0;
-	u32 done_mask;
+	u64 done_mask;
 
 	done_mask = ap->qc_active ^ qc_active;
 
 	if (unlikely(done_mask & qc_active)) {
-		ata_port_err(ap, "illegal qc_active transition (%08x->%08x)\n",
+		ata_port_err(ap, "illegal qc_active transition (%08llx->%08llx)\n",
 			     ap->qc_active, qc_active);
 		return -EINVAL;
 	}
 
 	while (done_mask) {
 		struct ata_queued_cmd *qc;
-		unsigned int tag = __ffs(done_mask);
+		unsigned int tag = __ffs64(done_mask);
 
 		qc = ata_qc_from_tag(ap, tag);
 		if (qc) {
 			ata_qc_complete(qc);
 			nr_done++;
 		}
-		done_mask &= ~(1 << tag);
+		done_mask &= ~(1ULL << tag);
 	}
 
 	return nr_done;
@@ -5416,11 +5406,11 @@
 	WARN_ON_ONCE(ap->ops->error_handler && ata_tag_valid(link->active_tag));
 
 	if (ata_is_ncq(prot)) {
-		WARN_ON_ONCE(link->sactive & (1 << qc->tag));
+		WARN_ON_ONCE(link->sactive & (1 << qc->hw_tag));
 
 		if (!link->sactive)
 			ap->nr_active_links++;
-		link->sactive |= 1 << qc->tag;
+		link->sactive |= 1 << qc->hw_tag;
 	} else {
 		WARN_ON_ONCE(link->sactive);
 
@@ -5429,7 +5419,7 @@
 	}
 
 	qc->flags |= ATA_QCFLAG_ACTIVE;
-	ap->qc_active |= 1 << qc->tag;
+	ap->qc_active |= 1ULL << qc->tag;
 
 	/*
 	 * We guarantee to LLDs that they will have at least one
@@ -6428,7 +6418,7 @@
 {
 	spin_lock_init(&host->lock);
 	mutex_init(&host->eh_mutex);
-	host->n_tags = ATA_MAX_QUEUE - 1;
+	host->n_tags = ATA_MAX_QUEUE;
 	host->dev = dev;
 	host->ops = ops;
 }
@@ -6510,7 +6500,7 @@
 {
 	int i, rc;
 
-	host->n_tags = clamp(sht->can_queue, 1, ATA_MAX_QUEUE - 1);
+	host->n_tags = clamp(sht->can_queue, 1, ATA_MAX_QUEUE);
 
 	/* host must have been started */
 	if (!(host->flags & ATA_HOST_STARTED)) {
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 513b260b..d541214 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -500,57 +500,6 @@
 	mutex_unlock(&ap->host->eh_mutex);
 }
 
-/**
- *	ata_scsi_timed_out - SCSI layer time out callback
- *	@cmd: timed out SCSI command
- *
- *	Handles SCSI layer timeout.  We race with normal completion of
- *	the qc for @cmd.  If the qc is already gone, we lose and let
- *	the scsi command finish (EH_HANDLED).  Otherwise, the qc has
- *	timed out and EH should be invoked.  Prevent ata_qc_complete()
- *	from finishing it by setting EH_SCHEDULED and return
- *	EH_NOT_HANDLED.
- *
- *	TODO: kill this function once old EH is gone.
- *
- *	LOCKING:
- *	Called from timer context
- *
- *	RETURNS:
- *	EH_HANDLED or EH_NOT_HANDLED
- */
-enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
-{
-	struct Scsi_Host *host = cmd->device->host;
-	struct ata_port *ap = ata_shost_to_port(host);
-	unsigned long flags;
-	struct ata_queued_cmd *qc;
-	enum blk_eh_timer_return ret;
-
-	DPRINTK("ENTER\n");
-
-	if (ap->ops->error_handler) {
-		ret = BLK_EH_NOT_HANDLED;
-		goto out;
-	}
-
-	ret = BLK_EH_HANDLED;
-	spin_lock_irqsave(ap->lock, flags);
-	qc = ata_qc_from_tag(ap, ap->link.active_tag);
-	if (qc) {
-		WARN_ON(qc->scsicmd != cmd);
-		qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
-		qc->err_mask |= AC_ERR_TIMEOUT;
-		ret = BLK_EH_NOT_HANDLED;
-	}
-	spin_unlock_irqrestore(ap->lock, flags);
-
- out:
-	DPRINTK("EXIT, ret=%d\n", ret);
-	return ret;
-}
-EXPORT_SYMBOL(ata_scsi_timed_out);
-
 static void ata_eh_unload(struct ata_port *ap)
 {
 	struct ata_link *link;
@@ -873,9 +822,12 @@
 	int nr = 0;
 
 	/* count only non-internal commands */
-	for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++)
+	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
+		if (ata_tag_internal(tag))
+			continue;
 		if (ata_qc_from_tag(ap, tag))
 			nr++;
+	}
 
 	return nr;
 }
@@ -900,7 +852,7 @@
 		/* No progress during the last interval, tag all
 		 * in-flight qcs as timed out and freeze the port.
 		 */
-		for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) {
+		for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
 			struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
 			if (qc)
 				qc->err_mask |= AC_ERR_TIMEOUT;
@@ -1054,7 +1006,8 @@
 	/* we're gonna abort all commands, no need for fast drain */
 	ata_eh_set_pending(ap, 0);
 
-	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
+	/* include internal tag in iteration */
+	for (tag = 0; tag <= ATA_MAX_QUEUE; tag++) {
 		struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
 
 		if (qc && (!link || qc->dev->link == link)) {
@@ -1483,6 +1436,10 @@
 		return "invalid argument";
 	if (err_mask & AC_ERR_DEV)
 		return "device error";
+	if (err_mask & AC_ERR_NCQ)
+		return "NCQ error";
+	if (err_mask & AC_ERR_NODEV_HINT)
+		return "Polling detection error";
 	return "unknown error";
 }
 
@@ -1866,10 +1823,10 @@
 	if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
 		int ret = scsi_check_sense(qc->scsicmd);
 		/*
-		 * SUCCESS here means that the sense code could
+		 * SUCCESS here means that the sense code could be
 		 * evaluated and should be passed to the upper layers
 		 * for correct evaluation.
-		 * FAILED means the sense code could not interpreted
+		 * FAILED means the sense code could not be interpreted
 		 * and the device would need to be reset.
 		 * NEEDS_RETRY and ADD_TO_MLQUEUE means that the
 		 * command would need to be retried.
@@ -2150,6 +2107,21 @@
 }
 
 /**
+ *      ata_eh_quiet - check if we need to be quiet about a command error
+ *      @qc: qc to check
+ *
+ *      Look at the qc flags anbd its scsi command request flags to determine
+ *      if we need to be quiet about the command failure.
+ */
+static inline bool ata_eh_quiet(struct ata_queued_cmd *qc)
+{
+	if (qc->scsicmd &&
+	    qc->scsicmd->request->rq_flags & RQF_QUIET)
+		qc->flags |= ATA_QCFLAG_QUIET;
+	return qc->flags & ATA_QCFLAG_QUIET;
+}
+
+/**
  *	ata_eh_link_autopsy - analyze error and determine recovery action
  *	@link: host link to perform autopsy on
  *
@@ -2166,7 +2138,7 @@
 	struct ata_eh_context *ehc = &link->eh_context;
 	struct ata_device *dev;
 	unsigned int all_err_mask = 0, eflags = 0;
-	int tag;
+	int tag, nr_failed = 0, nr_quiet = 0;
 	u32 serror;
 	int rc;
 
@@ -2218,12 +2190,16 @@
 		if (qc->err_mask & ~AC_ERR_OTHER)
 			qc->err_mask &= ~AC_ERR_OTHER;
 
-		/* SENSE_VALID trumps dev/unknown error and revalidation */
+		/*
+		 * SENSE_VALID trumps dev/unknown error and revalidation. Upper
+		 * layers will determine whether the command is worth retrying
+		 * based on the sense data and device class/type. Otherwise,
+		 * determine directly if the command is worth retrying using its
+		 * error mask and flags.
+		 */
 		if (qc->flags & ATA_QCFLAG_SENSE_VALID)
 			qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
-
-		/* determine whether the command is worth retrying */
-		if (ata_eh_worth_retry(qc))
+		else if (ata_eh_worth_retry(qc))
 			qc->flags |= ATA_QCFLAG_RETRY;
 
 		/* accumulate error info */
@@ -2232,8 +2208,17 @@
 		if (qc->flags & ATA_QCFLAG_IO)
 			eflags |= ATA_EFLAG_IS_IO;
 		trace_ata_eh_link_autopsy_qc(qc);
+
+		/* Count quiet errors */
+		if (ata_eh_quiet(qc))
+			nr_quiet++;
+		nr_failed++;
 	}
 
+	/* If all failed commands requested silence, then be quiet */
+	if (nr_quiet == nr_failed)
+		ehc->i.flags |= ATA_EHI_QUIET;
+
 	/* enforce default EH actions */
 	if (ap->pflags & ATA_PFLAG_FROZEN ||
 	    all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 89a9d4a..ce5019d 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -872,6 +872,9 @@
 
 		qc->sg = scsi_sglist(cmd);
 		qc->n_elem = scsi_sg_count(cmd);
+
+		if (cmd->request->rq_flags & RQF_QUIET)
+			qc->flags |= ATA_QCFLAG_QUIET;
 	} else {
 		cmd->result = (DID_OK << 16) | (QUEUE_FULL << 1);
 		cmd->scsi_done(cmd);
@@ -1316,7 +1319,7 @@
 		int depth;
 
 		depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id));
-		depth = min(ATA_MAX_QUEUE - 1, depth);
+		depth = min(ATA_MAX_QUEUE, depth);
 		scsi_change_queue_depth(sdev, depth);
 	}
 
@@ -1429,7 +1432,7 @@
 	/* limit and apply queue depth */
 	queue_depth = min(queue_depth, sdev->host->can_queue);
 	queue_depth = min(queue_depth, ata_id_queue_depth(dev->id));
-	queue_depth = min(queue_depth, ATA_MAX_QUEUE - 1);
+	queue_depth = min(queue_depth, ATA_MAX_QUEUE);
 
 	if (sdev->queue_depth == queue_depth)
 		return -EINVAL;
@@ -1895,7 +1898,7 @@
 	qc->nbytes = n_block * scmd->device->sector_size;
 
 	rc = ata_build_rw_tf(&qc->tf, qc->dev, block, n_block, tf_flags,
-			     qc->tag, class);
+			     qc->hw_tag, class);
 
 	if (likely(rc == 0))
 		return 0;
@@ -3233,7 +3236,7 @@
 
 	/* For NCQ commands copy the tag value */
 	if (ata_is_ncq(tf->protocol))
-		tf->nsect = qc->tag << 3;
+		tf->nsect = qc->hw_tag << 3;
 
 	/* enforce correct master/slave bit */
 	tf->device = dev->devno ?
@@ -3513,7 +3516,7 @@
 		tf->protocol = ATA_PROT_NCQ;
 		tf->command = ATA_CMD_FPDMA_SEND;
 		tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f;
-		tf->nsect = qc->tag << 3;
+		tf->nsect = qc->hw_tag << 3;
 		tf->hob_feature = (size / 512) >> 8;
 		tf->feature = size / 512;
 
@@ -3733,7 +3736,7 @@
 		tf->protocol = ATA_PROT_NCQ;
 		tf->command = ATA_CMD_FPDMA_RECV;
 		tf->hob_nsect = ATA_SUBCMD_FPDMA_RECV_ZAC_MGMT_IN & 0x1f;
-		tf->nsect = qc->tag << 3;
+		tf->nsect = qc->hw_tag << 3;
 		tf->feature = sect & 0xff;
 		tf->hob_feature = (sect >> 8) & 0xff;
 		tf->auxiliary = ATA_SUBCMD_ZAC_MGMT_IN_REPORT_ZONES | (options << 8);
@@ -3812,7 +3815,7 @@
 		tf->protocol = ATA_PROT_NCQ_NODATA;
 		tf->command = ATA_CMD_NCQ_NON_DATA;
 		tf->feature = ATA_SUBCMD_NCQ_NON_DATA_ZAC_MGMT_OUT;
-		tf->nsect = qc->tag << 3;
+		tf->nsect = qc->hw_tag << 3;
 		tf->auxiliary = sa | ((u16)all << 8);
 	} else {
 		tf->protocol = ATA_PROT_NODATA;
@@ -5117,7 +5120,7 @@
 		tag = tag < max_queue ? tag : 0;
 
 		/* the last tag is reserved for internal command. */
-		if (tag == ATA_TAG_INTERNAL)
+		if (ata_tag_internal(tag))
 			continue;
 
 		if (!test_and_set_bit(tag, &ap->sas_tag_allocated)) {
diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c
index de4ddd0..b3ed8f9 100644
--- a/drivers/ata/libata-zpodd.c
+++ b/drivers/ata/libata-zpodd.c
@@ -35,7 +35,7 @@
 static int eject_tray(struct ata_device *dev)
 {
 	struct ata_taskfile tf;
-	static const char cdb[] = {  GPCMD_START_STOP_UNIT,
+	static const char cdb[ATAPI_CDB_LEN] = {  GPCMD_START_STOP_UNIT,
 		0, 0, 0,
 		0x02,     /* LoEj */
 		0, 0, 0, 0, 0, 0, 0,
diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
index 3ba843f..ef8aaeb 100644
--- a/drivers/ata/pata_hpt37x.c
+++ b/drivers/ata/pata_hpt37x.c
@@ -224,17 +224,14 @@
 			       const char * const list[])
 {
 	unsigned char model_num[ATA_ID_PROD_LEN + 1];
-	int i = 0;
+	int i;
 
 	ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num));
 
-	while (list[i] != NULL) {
-		if (!strcmp(list[i], model_num)) {
-			pr_warn("%s is not supported for %s\n",
-				modestr, list[i]);
-			return 1;
-		}
-		i++;
+	i = match_string(list, -1, model_num);
+	if (i >= 0) {
+		pr_warn("%s is not supported for %s\n", modestr, list[i]);
+		return 1;
 	}
 	return 0;
 }
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
index ce3d667..6f142aa 100644
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -761,7 +761,7 @@
 	if (tag > 0) {
 		dev_info(ap->dev,
 			 "%s tag=%u cmd=0x%02x dma dir=%s proto=%s dmacr=0x%08x\n",
-			 __func__, qc->tag, qc->tf.command,
+			 __func__, qc->hw_tag, qc->tf.command,
 			 get_dma_dir_descript(qc->dma_dir),
 			 get_prot_descript(qc->tf.protocol),
 			 sata_dwc_readl(&hsdev->sata_dwc_regs->dmacr));
@@ -789,7 +789,7 @@
 {
 	u8 status = 0;
 	u32 mask = 0x0;
-	u8 tag = qc->tag;
+	u8 tag = qc->hw_tag;
 	struct sata_dwc_device *hsdev = HSDEV_FROM_AP(ap);
 	struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
 	hsdev->sactive_queued = 0;
@@ -997,7 +997,7 @@
 
 static void sata_dwc_bmdma_setup(struct ata_queued_cmd *qc)
 {
-	u8 tag = qc->tag;
+	u8 tag = qc->hw_tag;
 
 	if (ata_is_ncq(qc->tf.protocol)) {
 		dev_dbg(qc->ap->dev, "%s: ap->link.sactive=0x%08x tag=%d\n",
@@ -1059,7 +1059,7 @@
 
 static void sata_dwc_bmdma_start(struct ata_queued_cmd *qc)
 {
-	u8 tag = qc->tag;
+	u8 tag = qc->hw_tag;
 
 	if (ata_is_ncq(qc->tf.protocol)) {
 		dev_dbg(qc->ap->dev, "%s: ap->link.sactive=0x%08x tag=%d\n",
@@ -1074,17 +1074,17 @@
 static unsigned int sata_dwc_qc_issue(struct ata_queued_cmd *qc)
 {
 	u32 sactive;
-	u8 tag = qc->tag;
+	u8 tag = qc->hw_tag;
 	struct ata_port *ap = qc->ap;
 	struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
 
 #ifdef DEBUG_NCQ
-	if (qc->tag > 0 || ap->link.sactive > 1)
+	if (qc->hw_tag > 0 || ap->link.sactive > 1)
 		dev_info(ap->dev,
 			 "%s ap id=%d cmd(0x%02x)=%s qc tag=%d prot=%s ap active_tag=0x%08x ap sactive=0x%08x\n",
 			 __func__, ap->print_id, qc->tf.command,
 			 ata_get_cmd_descript(qc->tf.command),
-			 qc->tag, get_prot_descript(qc->tf.protocol),
+			 qc->hw_tag, get_prot_descript(qc->tf.protocol),
 			 ap->link.active_tag, ap->link.sactive);
 #endif
 
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 95bf3ab..b8d9cfc 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -519,7 +519,7 @@
 	struct sata_fsl_port_priv *pp = ap->private_data;
 	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
 	void __iomem *hcr_base = host_priv->hcr_base;
-	unsigned int tag = sata_fsl_tag(qc->tag, hcr_base);
+	unsigned int tag = sata_fsl_tag(qc->hw_tag, hcr_base);
 	struct command_desc *cd;
 	u32 desc_info = CMD_DESC_RES | CMD_DESC_SNOOP_ENABLE;
 	u32 num_prde = 0;
@@ -566,7 +566,7 @@
 	struct ata_port *ap = qc->ap;
 	struct sata_fsl_host_priv *host_priv = ap->host->private_data;
 	void __iomem *hcr_base = host_priv->hcr_base;
-	unsigned int tag = sata_fsl_tag(qc->tag, hcr_base);
+	unsigned int tag = sata_fsl_tag(qc->hw_tag, hcr_base);
 
 	VPRINTK("xx_qc_issue called,CQ=0x%x,CA=0x%x,CE=0x%x,CC=0x%x\n",
 		ioread32(CQ + hcr_base),
@@ -595,7 +595,7 @@
 	struct sata_fsl_port_priv *pp = qc->ap->private_data;
 	struct sata_fsl_host_priv *host_priv = qc->ap->host->private_data;
 	void __iomem *hcr_base = host_priv->hcr_base;
-	unsigned int tag = sata_fsl_tag(qc->tag, hcr_base);
+	unsigned int tag = sata_fsl_tag(qc->hw_tag, hcr_base);
 	struct command_desc *cd;
 
 	cd = pp->cmdentry + tag;
@@ -1266,7 +1266,7 @@
 	}
 
 	VPRINTK("Status of all queues :\n");
-	VPRINTK("done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x,CQ=0x%x,apqa=0x%x\n",
+	VPRINTK("done_mask/CC = 0x%x, CA = 0x%x, CE=0x%x,CQ=0x%x,apqa=0x%llx\n",
 		done_mask,
 		ioread32(hcr_base + CA),
 		ioread32(hcr_base + CE),
@@ -1293,7 +1293,7 @@
 		ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask);
 		return;
 
-	} else if ((ap->qc_active & (1 << ATA_TAG_INTERNAL))) {
+	} else if ((ap->qc_active & (1ULL << ATA_TAG_INTERNAL))) {
 		iowrite32(1, hcr_base + CC);
 		qc = ata_qc_from_tag(ap, ATA_TAG_INTERNAL);
 
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 42d4589..cddf96f 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -1802,7 +1802,7 @@
 	struct mv_sg *mv_sg, *last_sg = NULL;
 	unsigned int si;
 
-	mv_sg = pp->sg_tbl[qc->tag];
+	mv_sg = pp->sg_tbl[qc->hw_tag];
 	for_each_sg(qc->sg, sg, qc->n_elem, si) {
 		dma_addr_t addr = sg_dma_address(sg);
 		u32 sg_len = sg_dma_len(sg);
@@ -1903,9 +1903,9 @@
 	writel(0, port_mmio + BMDMA_CMD);
 
 	/* load PRD table addr. */
-	writel((pp->sg_tbl_dma[qc->tag] >> 16) >> 16,
+	writel((pp->sg_tbl_dma[qc->hw_tag] >> 16) >> 16,
 		port_mmio + BMDMA_PRD_HIGH);
-	writelfl(pp->sg_tbl_dma[qc->tag],
+	writelfl(pp->sg_tbl_dma[qc->hw_tag],
 		port_mmio + BMDMA_PRD_LOW);
 
 	/* issue r/w command */
@@ -2071,17 +2071,17 @@
 	 */
 	if (!(tf->flags & ATA_TFLAG_WRITE))
 		flags |= CRQB_FLAG_READ;
-	WARN_ON(MV_MAX_Q_DEPTH <= qc->tag);
-	flags |= qc->tag << CRQB_TAG_SHIFT;
+	WARN_ON(MV_MAX_Q_DEPTH <= qc->hw_tag);
+	flags |= qc->hw_tag << CRQB_TAG_SHIFT;
 	flags |= (qc->dev->link->pmp & 0xf) << CRQB_PMP_SHIFT;
 
 	/* get current queue index from software */
 	in_index = pp->req_idx;
 
 	pp->crqb[in_index].sg_addr =
-		cpu_to_le32(pp->sg_tbl_dma[qc->tag] & 0xffffffff);
+		cpu_to_le32(pp->sg_tbl_dma[qc->hw_tag] & 0xffffffff);
 	pp->crqb[in_index].sg_addr_hi =
-		cpu_to_le32((pp->sg_tbl_dma[qc->tag] >> 16) >> 16);
+		cpu_to_le32((pp->sg_tbl_dma[qc->hw_tag] >> 16) >> 16);
 	pp->crqb[in_index].ctrl_flags = cpu_to_le16(flags);
 
 	cw = &pp->crqb[in_index].ata_cmd[0];
@@ -2164,17 +2164,17 @@
 	if (!(tf->flags & ATA_TFLAG_WRITE))
 		flags |= CRQB_FLAG_READ;
 
-	WARN_ON(MV_MAX_Q_DEPTH <= qc->tag);
-	flags |= qc->tag << CRQB_TAG_SHIFT;
-	flags |= qc->tag << CRQB_HOSTQ_SHIFT;
+	WARN_ON(MV_MAX_Q_DEPTH <= qc->hw_tag);
+	flags |= qc->hw_tag << CRQB_TAG_SHIFT;
+	flags |= qc->hw_tag << CRQB_HOSTQ_SHIFT;
 	flags |= (qc->dev->link->pmp & 0xf) << CRQB_PMP_SHIFT;
 
 	/* get current queue index from software */
 	in_index = pp->req_idx;
 
 	crqb = (struct mv_crqb_iie *) &pp->crqb[in_index];
-	crqb->addr = cpu_to_le32(pp->sg_tbl_dma[qc->tag] & 0xffffffff);
-	crqb->addr_hi = cpu_to_le32((pp->sg_tbl_dma[qc->tag] >> 16) >> 16);
+	crqb->addr = cpu_to_le32(pp->sg_tbl_dma[qc->hw_tag] & 0xffffffff);
+	crqb->addr_hi = cpu_to_le32((pp->sg_tbl_dma[qc->hw_tag] >> 16) >> 16);
 	crqb->flags = cpu_to_le32(flags);
 
 	crqb->ata_cmd[0] = cpu_to_le32(
@@ -2539,7 +2539,7 @@
 	failed_links = hweight16(new_map);
 
 	ata_port_info(ap,
-		      "%s: pmp_map=%04x qc_map=%04x failed_links=%d nr_active_links=%d\n",
+		      "%s: pmp_map=%04x qc_map=%04llx failed_links=%d nr_active_links=%d\n",
 		      __func__, pp->delayed_eh_pmp_map,
 		      ap->qc_active, failed_links,
 		      ap->nr_active_links);
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 8c683dd..10ae11a 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -400,7 +400,7 @@
 
 static struct scsi_host_template nv_swncq_sht = {
 	ATA_NCQ_SHT(DRV_NAME),
-	.can_queue		= ATA_MAX_QUEUE,
+	.can_queue		= ATA_MAX_QUEUE - 1,
 	.sg_tablesize		= LIBATA_MAX_PRD,
 	.dma_boundary		= ATA_DMA_BOUNDARY,
 	.slave_configure	= nv_swncq_slave_config,
@@ -740,32 +740,16 @@
 	sdev1 = ap->host->ports[1]->link.device[0].sdev;
 	if ((port0->flags & NV_ADMA_ATAPI_SETUP_COMPLETE) ||
 	    (port1->flags & NV_ADMA_ATAPI_SETUP_COMPLETE)) {
-		/** We have to set the DMA mask to 32-bit if either port is in
-		    ATAPI mode, since they are on the same PCI device which is
-		    used for DMA mapping. If we set the mask we also need to set
-		    the bounce limit on both ports to ensure that the block
-		    layer doesn't feed addresses that cause DMA mapping to
-		    choke. If either SCSI device is not allocated yet, it's OK
-		    since that port will discover its correct setting when it
-		    does get allocated.
-		    Note: Setting 32-bit mask should not fail. */
-		if (sdev0)
-			blk_queue_bounce_limit(sdev0->request_queue,
-					       ATA_DMA_MASK);
-		if (sdev1)
-			blk_queue_bounce_limit(sdev1->request_queue,
-					       ATA_DMA_MASK);
-
-		dma_set_mask(&pdev->dev, ATA_DMA_MASK);
+		/*
+		 * We have to set the DMA mask to 32-bit if either port is in
+		 * ATAPI mode, since they are on the same PCI device which is
+		 * used for DMA mapping.  If either SCSI device is not allocated
+		 * yet, it's OK since that port will discover its correct
+		 * setting when it does get allocated.
+		 */
+		rc = dma_set_mask(&pdev->dev, ATA_DMA_MASK);
 	} else {
-		/** This shouldn't fail as it was set to this value before */
-		dma_set_mask(&pdev->dev, pp->adma_dma_mask);
-		if (sdev0)
-			blk_queue_bounce_limit(sdev0->request_queue,
-					       pp->adma_dma_mask);
-		if (sdev1)
-			blk_queue_bounce_limit(sdev1->request_queue,
-					       pp->adma_dma_mask);
+		rc = dma_set_mask(&pdev->dev, pp->adma_dma_mask);
 	}
 
 	blk_queue_segment_boundary(sdev->request_queue, segment_boundary);
@@ -1131,12 +1115,11 @@
 
 	VPRINTK("ENTER\n");
 
-	/* Ensure DMA mask is set to 32-bit before allocating legacy PRD and
-	   pad buffers */
-	rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
-	if (rc)
-		return rc;
-	rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+	/*
+	 * Ensure DMA mask is set to 32-bit before allocating legacy PRD and
+	 * pad buffers.
+	 */
+	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
 	if (rc)
 		return rc;
 
@@ -1156,13 +1139,16 @@
 	pp->notifier_clear_block = pp->gen_block +
 	       NV_ADMA_NOTIFIER_CLEAR + (4 * ap->port_no);
 
-	/* Now that the legacy PRD and padding buffer are allocated we can
-	   safely raise the DMA mask to allocate the CPB/APRD table.
-	   These are allowed to fail since we store the value that ends up
-	   being used to set as the bounce limit in slave_config later if
-	   needed. */
-	dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
-	dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
+	/*
+	 * Now that the legacy PRD and padding buffer are allocated we can
+	 * try to raise the DMA mask to allocate the CPB/APRD table.
+	 */
+	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (rc) {
+		rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (rc)
+			return rc;
+	}
 	pp->adma_dma_mask = *dev->dma_mask;
 
 	mem = dmam_alloc_coherent(dev, NV_ADMA_PORT_PRIV_DMA_SZ,
@@ -1356,11 +1342,11 @@
 
 	for_each_sg(qc->sg, sg, qc->n_elem, si) {
 		aprd = (si < 5) ? &cpb->aprd[si] :
-			       &pp->aprd[NV_ADMA_SGTBL_LEN * qc->tag + (si-5)];
+			&pp->aprd[NV_ADMA_SGTBL_LEN * qc->hw_tag + (si-5)];
 		nv_adma_fill_aprd(qc, sg, si, aprd);
 	}
 	if (si > 5)
-		cpb->next_aprd = cpu_to_le64(((u64)(pp->aprd_dma + NV_ADMA_SGTBL_SZ * qc->tag)));
+		cpb->next_aprd = cpu_to_le64(((u64)(pp->aprd_dma + NV_ADMA_SGTBL_SZ * qc->hw_tag)));
 	else
 		cpb->next_aprd = cpu_to_le64(0);
 }
@@ -1385,7 +1371,7 @@
 static void nv_adma_qc_prep(struct ata_queued_cmd *qc)
 {
 	struct nv_adma_port_priv *pp = qc->ap->private_data;
-	struct nv_adma_cpb *cpb = &pp->cpb[qc->tag];
+	struct nv_adma_cpb *cpb = &pp->cpb[qc->hw_tag];
 	u8 ctl_flags = NV_CPB_CTL_CPB_VALID |
 		       NV_CPB_CTL_IEN;
 
@@ -1403,7 +1389,7 @@
 	wmb();
 
 	cpb->len		= 3;
-	cpb->tag		= qc->tag;
+	cpb->tag		= qc->hw_tag;
 	cpb->next_cpb_idx	= 0;
 
 	/* turn on NCQ flags for NCQ commands */
@@ -1466,9 +1452,9 @@
 		pp->last_issue_ncq = curr_ncq;
 	}
 
-	writew(qc->tag, mmio + NV_ADMA_APPEND);
+	writew(qc->hw_tag, mmio + NV_ADMA_APPEND);
 
-	DPRINTK("Issued tag %u\n", qc->tag);
+	DPRINTK("Issued tag %u\n", qc->hw_tag);
 
 	return 0;
 }
@@ -1730,8 +1716,8 @@
 
 	/* queue is full */
 	WARN_ON(dq->tail - dq->head == ATA_MAX_QUEUE);
-	dq->defer_bits |= (1 << qc->tag);
-	dq->tag[dq->tail++ & (ATA_MAX_QUEUE - 1)] = qc->tag;
+	dq->defer_bits |= (1 << qc->hw_tag);
+	dq->tag[dq->tail++ & (ATA_MAX_QUEUE - 1)] = qc->hw_tag;
 }
 
 static struct ata_queued_cmd *nv_swncq_qc_from_dq(struct ata_port *ap)
@@ -1796,7 +1782,7 @@
 	u32 sactive;
 	u32 done_mask;
 
-	ata_port_err(ap, "EH in SWNCQ mode,QC:qc_active 0x%X sactive 0x%X\n",
+	ata_port_err(ap, "EH in SWNCQ mode,QC:qc_active 0x%llX sactive 0x%X\n",
 		     ap->qc_active, ap->link.sactive);
 	ata_port_err(ap,
 		"SWNCQ:qc_active 0x%X defer_bits 0x%X last_issue_tag 0x%x\n  "
@@ -2010,7 +1996,7 @@
 	struct ata_bmdma_prd *prd;
 	unsigned int si, idx;
 
-	prd = pp->prd + ATA_MAX_PRD * qc->tag;
+	prd = pp->prd + ATA_MAX_PRD * qc->hw_tag;
 
 	idx = 0;
 	for_each_sg(qc->sg, sg, qc->n_elem, si) {
@@ -2048,16 +2034,16 @@
 
 	DPRINTK("Enter\n");
 
-	writel((1 << qc->tag), pp->sactive_block);
-	pp->last_issue_tag = qc->tag;
-	pp->dhfis_bits &= ~(1 << qc->tag);
-	pp->dmafis_bits &= ~(1 << qc->tag);
-	pp->qc_active |= (0x1 << qc->tag);
+	writel((1 << qc->hw_tag), pp->sactive_block);
+	pp->last_issue_tag = qc->hw_tag;
+	pp->dhfis_bits &= ~(1 << qc->hw_tag);
+	pp->dmafis_bits &= ~(1 << qc->hw_tag);
+	pp->qc_active |= (0x1 << qc->hw_tag);
 
 	ap->ops->sff_tf_load(ap, &qc->tf);	 /* load tf registers */
 	ap->ops->sff_exec_command(ap, &qc->tf);
 
-	DPRINTK("Issued tag %u\n", qc->tag);
+	DPRINTK("Issued tag %u\n", qc->hw_tag);
 
 	return 0;
 }
@@ -2207,7 +2193,7 @@
 	rw = qc->tf.flags & ATA_TFLAG_WRITE;
 
 	/* load PRD table addr. */
-	iowrite32(pp->prd_dma + ATA_PRD_TBL_SZ * qc->tag,
+	iowrite32(pp->prd_dma + ATA_PRD_TBL_SZ * qc->hw_tag,
 		  ap->ioaddr.bmdma_addr + ATA_DMA_TABLE_OFS);
 
 	/* specify data direction, triple-check start bit is clear */
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index 010ca10..319f517 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -849,7 +849,7 @@
 	struct sil24_sge *sge;
 	u16 ctrl = 0;
 
-	cb = &pp->cmd_block[sil24_tag(qc->tag)];
+	cb = &pp->cmd_block[sil24_tag(qc->hw_tag)];
 
 	if (!ata_is_atapi(qc->tf.protocol)) {
 		prb = &cb->ata.prb;
@@ -891,7 +891,7 @@
 	struct ata_port *ap = qc->ap;
 	struct sil24_port_priv *pp = ap->private_data;
 	void __iomem *port = sil24_port_base(ap);
-	unsigned int tag = sil24_tag(qc->tag);
+	unsigned int tag = sil24_tag(qc->hw_tag);
 	dma_addr_t paddr;
 	void __iomem *activate;
 
@@ -911,7 +911,7 @@
 
 static bool sil24_qc_fill_rtf(struct ata_queued_cmd *qc)
 {
-	sil24_read_tf(qc->ap, qc->tag, &qc->result_tf);
+	sil24_read_tf(qc->ap, qc->hw_tag, &qc->result_tf);
 	return true;
 }
 
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 29b0eb4..3e63a90 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -70,81 +70,25 @@
 	  If unsure, say Y.
 
 config PREVENT_FIRMWARE_BUILD
-	bool "Prevent firmware from being built"
+	bool "Disable drivers features which enable custom firmware building"
 	default y
 	help
-	  Say yes to avoid building firmware. Firmware is usually shipped
-	  with the driver and only when updating the firmware should a
-	  rebuild be made.
-	  If unsure, say Y here.
+	  Say yes to disable driver features which enable building a custom
+	  driver firmware at kernel build time. These drivers do not use the
+	  kernel firmware API to load firmware (CONFIG_FW_LOADER), instead they
+	  use their own custom loading mechanism. The required firmware is
+	  usually shipped with the driver, building the driver firmware
+	  should only be needed if you have an updated firmware source.
 
-config FW_LOADER
-	tristate "Userspace firmware loading support" if EXPERT
-	default y
-	---help---
-	  This option is provided for the case where none of the in-tree modules
-	  require userspace firmware loading support, but a module built
-	  out-of-tree does.
+	  Firmware should not be being built as part of kernel, these days
+	  you should always prevent this and say Y here. There are only two
+	  old drivers which enable building of its firmware at kernel build
+	  time:
 
-config EXTRA_FIRMWARE
-	string "External firmware blobs to build into the kernel binary"
-	depends on FW_LOADER
-	help
-	  Various drivers in the kernel source tree may require firmware,
-	  which is generally available in your distribution's linux-firmware
-	  package.
+	    o CONFIG_WANXL through CONFIG_WANXL_BUILD_FIRMWARE
+	    o CONFIG_SCSI_AIC79XX through CONFIG_AIC79XX_BUILD_FIRMWARE
 
-	  The linux-firmware package should install firmware into
-	  /lib/firmware/ on your system, so they can be loaded by userspace
-	  helpers on request.
-
-	  This option allows firmware to be built into the kernel for the case
-	  where the user either cannot or doesn't want to provide it from
-	  userspace at runtime (for example, when the firmware in question is
-	  required for accessing the boot device, and the user doesn't want to
-	  use an initrd).
-
-	  This option is a string and takes the (space-separated) names of the
-	  firmware files -- the same names that appear in MODULE_FIRMWARE()
-	  and request_firmware() in the source. These files should exist under
-	  the directory specified by the EXTRA_FIRMWARE_DIR option, which is
-	  /lib/firmware by default.
-
-	  For example, you might set CONFIG_EXTRA_FIRMWARE="usb8388.bin", copy
-	  the usb8388.bin file into /lib/firmware, and build the kernel. Then
-	  any request_firmware("usb8388.bin") will be satisfied internally
-	  without needing to call out to userspace.
-
-	  WARNING: If you include additional firmware files into your binary
-	  kernel image that are not available under the terms of the GPL,
-	  then it may be a violation of the GPL to distribute the resulting
-	  image since it combines both GPL and non-GPL work. You should
-	  consult a lawyer of your own before distributing such an image.
-
-config EXTRA_FIRMWARE_DIR
-	string "Firmware blobs root directory"
-	depends on EXTRA_FIRMWARE != ""
-	default "/lib/firmware"
-	help
-	  This option controls the directory in which the kernel build system
-	  looks for the firmware files listed in the EXTRA_FIRMWARE option.
-
-config FW_LOADER_USER_HELPER
-	bool
-
-config FW_LOADER_USER_HELPER_FALLBACK
-	bool "Fallback user-helper invocation for firmware loading"
-	depends on FW_LOADER
-	select FW_LOADER_USER_HELPER
-	help
-	  This option enables / disables the invocation of user-helper
-	  (e.g. udev) for loading firmware files as a fallback after the
-	  direct file loading in kernel fails.  The user-mode helper is
-	  no longer required unless you have a special firmware file that
-	  resides in a non-standard path. Moreover, the udev support has
-	  been deprecated upstream.
-
-	  If you are unsure about this, say N here.
+source "drivers/base/firmware_loader/Kconfig"
 
 config WANT_DEV_COREDUMP
 	bool
diff --git a/drivers/base/base.h b/drivers/base/base.h
index d800de6..a75c302 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -161,3 +161,6 @@
 extern void device_links_no_driver(struct device *dev);
 extern bool device_links_busy(struct device *dev);
 extern void device_links_unbind_consumers(struct device *dev);
+
+/* device pm support */
+void device_pm_move_to_tail(struct device *dev);
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index ef61833..8bfd27e 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -184,10 +184,10 @@
 
 	dev = bus_find_device_by_name(bus, NULL, buf);
 	if (dev && dev->driver == drv) {
-		if (dev->parent)	/* Needed for USB */
+		if (dev->parent && dev->bus->need_parent_lock)
 			device_lock(dev->parent);
 		device_release_driver(dev);
-		if (dev->parent)
+		if (dev->parent && dev->bus->need_parent_lock)
 			device_unlock(dev->parent);
 		err = count;
 	}
@@ -211,12 +211,12 @@
 
 	dev = bus_find_device_by_name(bus, NULL, buf);
 	if (dev && dev->driver == NULL && driver_match_device(drv, dev)) {
-		if (dev->parent)	/* Needed for USB */
+		if (dev->parent && bus->need_parent_lock)
 			device_lock(dev->parent);
 		device_lock(dev);
 		err = driver_probe_device(drv, dev);
 		device_unlock(dev);
-		if (dev->parent)
+		if (dev->parent && bus->need_parent_lock)
 			device_unlock(dev->parent);
 
 		if (err > 0) {
@@ -735,10 +735,10 @@
 	int ret = 0;
 
 	if (!dev->driver) {
-		if (dev->parent)	/* Needed for USB */
+		if (dev->parent && dev->bus->need_parent_lock)
 			device_lock(dev->parent);
 		ret = device_attach(dev);
-		if (dev->parent)
+		if (dev->parent && dev->bus->need_parent_lock)
 			device_unlock(dev->parent);
 	}
 	return ret < 0 ? ret : 0;
@@ -770,10 +770,10 @@
 int device_reprobe(struct device *dev)
 {
 	if (dev->driver) {
-		if (dev->parent)        /* Needed for USB */
+		if (dev->parent && dev->bus->need_parent_lock)
 			device_lock(dev->parent);
 		device_release_driver(dev);
-		if (dev->parent)
+		if (dev->parent && dev->bus->need_parent_lock)
 			device_unlock(dev->parent);
 	}
 	return bus_rescan_devices_helper(dev, NULL);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index b610816..36622b5 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -145,6 +145,26 @@
 }
 
 /**
+ * device_pm_move_to_tail - Move set of devices to the end of device lists
+ * @dev: Device to move
+ *
+ * This is a device_reorder_to_tail() wrapper taking the requisite locks.
+ *
+ * It moves the @dev along with all of its children and all of its consumers
+ * to the ends of the device_kset and dpm_list, recursively.
+ */
+void device_pm_move_to_tail(struct device *dev)
+{
+	int idx;
+
+	idx = device_links_read_lock();
+	device_pm_lock();
+	device_reorder_to_tail(dev, NULL);
+	device_pm_unlock();
+	device_links_read_unlock(idx);
+}
+
+/**
  * device_link_add - Create a link between two devices.
  * @consumer: Consumer end of the link.
  * @supplier: Supplier end of the link.
@@ -1467,7 +1487,7 @@
 
 	dir = kzalloc(sizeof(*dir), GFP_KERNEL);
 	if (!dir)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	dir->class = class;
 	kobject_init(&dir->kobj, &class_dir_ktype);
@@ -1477,7 +1497,7 @@
 	retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name);
 	if (retval < 0) {
 		kobject_put(&dir->kobj);
-		return NULL;
+		return ERR_PTR(retval);
 	}
 	return &dir->kobj;
 }
@@ -1784,6 +1804,10 @@
 
 	parent = get_device(dev->parent);
 	kobj = get_device_parent(dev, parent);
+	if (IS_ERR(kobj)) {
+		error = PTR_ERR(kobj);
+		goto parent_error;
+	}
 	if (kobj)
 		dev->kobj.parent = kobj;
 
@@ -1882,6 +1906,7 @@
 	kobject_del(&dev->kobj);
  Error:
 	cleanup_glue_dir(dev, glue_dir);
+parent_error:
 	put_device(parent);
 name_error:
 	kfree(dev->p);
@@ -2406,7 +2431,7 @@
 	kfree(dev);
 }
 
-static struct device *
+static __printf(6, 0) struct device *
 device_create_groups_vargs(struct class *class, struct device *parent,
 			   dev_t devt, void *drvdata,
 			   const struct attribute_group **groups,
@@ -2684,7 +2709,7 @@
 /**
  * device_move - moves a device to a new parent
  * @dev: the pointer to the struct device to be moved
- * @new_parent: the new parent of the device (can by NULL)
+ * @new_parent: the new parent of the device (can be NULL)
  * @dpm_order: how to reorder the dpm_list
  */
 int device_move(struct device *dev, struct device *new_parent,
@@ -2701,6 +2726,11 @@
 	device_pm_lock();
 	new_parent = get_device(new_parent);
 	new_parent_kobj = get_device_parent(dev, new_parent);
+	if (IS_ERR(new_parent_kobj)) {
+		error = PTR_ERR(new_parent_kobj);
+		put_device(new_parent);
+		goto out;
+	}
 
 	pr_debug("device: '%s': %s: moving to '%s'\n", dev_name(dev),
 		 __func__, new_parent ? dev_name(new_parent) : "<NULL>");
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index c9f5408..fb4e2df 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -122,9 +122,7 @@
 		 * the list is a good order for suspend but deferred
 		 * probe makes that very unsafe.
 		 */
-		device_pm_lock();
-		device_pm_move_last(dev);
-		device_pm_unlock();
+		device_pm_move_to_tail(dev);
 
 		dev_dbg(dev, "Retrying from deferred list\n");
 		if (initcall_debug && !initcalls_done)
@@ -582,7 +580,7 @@
 	pr_debug("bus: '%s': %s: matched device %s with driver %s\n",
 		 drv->bus->name, __func__, dev_name(dev), drv->name);
 
-	pm_runtime_get_suppliers(dev);
+	pm_runtime_resume_suppliers(dev);
 	if (dev->parent)
 		pm_runtime_get_sync(dev->parent);
 
@@ -593,7 +591,6 @@
 	if (dev->parent)
 		pm_runtime_put(dev->parent);
 
-	pm_runtime_put_suppliers(dev);
 	return ret;
 }
 
@@ -817,13 +814,13 @@
 		return ret;
 	} /* ret > 0 means positive match */
 
-	if (dev->parent)	/* Needed for USB */
+	if (dev->parent && dev->bus->need_parent_lock)
 		device_lock(dev->parent);
 	device_lock(dev);
 	if (!dev->driver)
 		driver_probe_device(drv, dev);
 	device_unlock(dev);
-	if (dev->parent)
+	if (dev->parent && dev->bus->need_parent_lock)
 		device_unlock(dev->parent);
 
 	return 0;
@@ -919,7 +916,7 @@
 				    struct device_driver *drv,
 				    struct device *parent)
 {
-	if (parent)
+	if (parent && dev->bus->need_parent_lock)
 		device_lock(parent);
 
 	device_lock(dev);
@@ -927,7 +924,7 @@
 		__device_release_driver(dev, parent);
 
 	device_unlock(dev);
-	if (parent)
+	if (parent && dev->bus->need_parent_lock)
 		device_unlock(parent);
 }
 
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 95b6728..f98a097 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -84,9 +84,14 @@
 static __always_inline struct devres * alloc_dr(dr_release_t release,
 						size_t size, gfp_t gfp, int nid)
 {
-	size_t tot_size = sizeof(struct devres) + size;
+	size_t tot_size;
 	struct devres *dr;
 
+	/* We must catch any near-SIZE_MAX cases that could overflow. */
+	if (unlikely(check_add_overflow(sizeof(struct devres), size,
+					&tot_size)))
+		return NULL;
+
 	dr = kmalloc_node_track_caller(tot_size, gfp, nid);
 	if (unlikely(!dr))
 		return NULL;
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index d82566d..f831a58 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -329,36 +329,13 @@
 #endif
 
 /*
- * Common configuration to enable DMA API use for a device
+ * enables DMA API use for a device
  */
-#include <linux/pci.h>
-
 int dma_configure(struct device *dev)
 {
-	struct device *bridge = NULL, *dma_dev = dev;
-	enum dev_dma_attr attr;
-	int ret = 0;
-
-	if (dev_is_pci(dev)) {
-		bridge = pci_get_host_bridge_device(to_pci_dev(dev));
-		dma_dev = bridge;
-		if (IS_ENABLED(CONFIG_OF) && dma_dev->parent &&
-		    dma_dev->parent->of_node)
-			dma_dev = dma_dev->parent;
-	}
-
-	if (dma_dev->of_node) {
-		ret = of_dma_configure(dev, dma_dev->of_node);
-	} else if (has_acpi_companion(dma_dev)) {
-		attr = acpi_get_dma_attr(to_acpi_device_node(dma_dev->fwnode));
-		if (attr != DEV_DMA_NOT_SUPPORTED)
-			ret = acpi_dma_configure(dev, attr);
-	}
-
-	if (bridge)
-		pci_put_host_bridge_device(bridge);
-
-	return ret;
+	if (dev->bus->dma_configure)
+		return dev->bus->dma_configure(dev);
+	return 0;
 }
 
 void dma_deconfigure(struct device *dev)
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index ba91255..857c8f1 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -148,7 +148,11 @@
 	int ret;
 	struct device_driver *other;
 
-	BUG_ON(!drv->bus->p);
+	if (!drv->bus->p) {
+		pr_err("Driver '%s' was unable to register with bus_type '%s' because the bus was not initialized.\n",
+			   drv->name, drv->bus->name);
+		return -EINVAL;
+	}
 
 	if ((drv->bus->probe && drv->probe) ||
 	    (drv->bus->remove && drv->remove) ||
diff --git a/drivers/base/firmware_loader/Kconfig b/drivers/base/firmware_loader/Kconfig
new file mode 100644
index 0000000..eb15d97
--- /dev/null
+++ b/drivers/base/firmware_loader/Kconfig
@@ -0,0 +1,154 @@
+menu "Firmware loader"
+
+config FW_LOADER
+	tristate "Firmware loading facility" if EXPERT
+	default y
+	help
+	  This enables the firmware loading facility in the kernel. The kernel
+	  will first look for built-in firmware, if it has any. Next, it will
+	  look for the requested firmware in a series of filesystem paths:
+
+		o firmware_class path module parameter or kernel boot param
+		o /lib/firmware/updates/UTS_RELEASE
+		o /lib/firmware/updates
+		o /lib/firmware/UTS_RELEASE
+		o /lib/firmware
+
+	  Enabling this feature only increases your kernel image by about
+	  828 bytes, enable this option unless you are certain you don't
+	  need firmware.
+
+	  You typically want this built-in (=y) but you can also enable this
+	  as a module, in which case the firmware_class module will be built.
+	  You also want to be sure to enable this built-in if you are going to
+	  enable built-in firmware (CONFIG_EXTRA_FIRMWARE).
+
+if FW_LOADER
+
+config EXTRA_FIRMWARE
+	string "Build named firmware blobs into the kernel binary"
+	help
+	  Device drivers which require firmware can typically deal with
+	  having the kernel load firmware from the various supported
+	  /lib/firmware/ paths. This option enables you to build into the
+	  kernel firmware files. Built-in firmware searches are preceded
+	  over firmware lookups using your filesystem over the supported
+	  /lib/firmware paths documented on CONFIG_FW_LOADER.
+
+	  This may be useful for testing or if the firmware is required early on
+	  in boot and cannot rely on the firmware being placed in an initrd or
+	  initramfs.
+
+	  This option is a string and takes the (space-separated) names of the
+	  firmware files -- the same names that appear in MODULE_FIRMWARE()
+	  and request_firmware() in the source. These files should exist under
+	  the directory specified by the EXTRA_FIRMWARE_DIR option, which is
+	  /lib/firmware by default.
+
+	  For example, you might set CONFIG_EXTRA_FIRMWARE="usb8388.bin", copy
+	  the usb8388.bin file into /lib/firmware, and build the kernel. Then
+	  any request_firmware("usb8388.bin") will be satisfied internally
+	  inside the kernel without ever looking at your filesystem at runtime.
+
+	  WARNING: If you include additional firmware files into your binary
+	  kernel image that are not available under the terms of the GPL,
+	  then it may be a violation of the GPL to distribute the resulting
+	  image since it combines both GPL and non-GPL work. You should
+	  consult a lawyer of your own before distributing such an image.
+
+config EXTRA_FIRMWARE_DIR
+	string "Firmware blobs root directory"
+	depends on EXTRA_FIRMWARE != ""
+	default "/lib/firmware"
+	help
+	  This option controls the directory in which the kernel build system
+	  looks for the firmware files listed in the EXTRA_FIRMWARE option.
+
+config FW_LOADER_USER_HELPER
+	bool "Enable the firmware sysfs fallback mechanism"
+	help
+	  This option enables a sysfs loading facility to enable firmware
+	  loading to the kernel through userspace as a fallback mechanism
+	  if and only if the kernel's direct filesystem lookup for the
+	  firmware failed using the different /lib/firmware/ paths, or the
+	  path specified in the firmware_class path module parameter, or the
+	  firmware_class path kernel boot parameter if the firmware_class is
+	  built-in. For details on how to work with the sysfs fallback mechanism
+	  refer to Documentation/driver-api/firmware/fallback-mechanisms.rst.
+
+	  The direct filesystem lookup for firmware is always used first now.
+
+	  If the kernel's direct filesystem lookup for firmware fails to find
+	  the requested firmware a sysfs fallback loading facility is made
+	  available and userspace is informed about this through uevents.
+	  The uevent can be suppressed if the driver explicitly requested it,
+	  this is known as the driver using the custom fallback mechanism.
+	  If the custom fallback mechanism is used userspace must always
+	  acknowledge failure to find firmware as the timeout for the fallback
+	  mechanism is disabled, and failed requests will linger forever.
+
+	  This used to be the default firmware loading facility, and udev used
+	  to listen for uvents to load firmware for the kernel. The firmware
+	  loading facility functionality in udev has been removed, as such it
+	  can no longer be relied upon as a fallback mechanism. Linux no longer
+	  relies on or uses a fallback mechanism in userspace. If you need to
+	  rely on one refer to the permissively licensed firmwared:
+
+	  https://github.com/teg/firmwared
+
+	  Since this was the default firmware loading facility at one point,
+	  old userspace may exist which relies upon it, and as such this
+	  mechanism can never be removed from the kernel.
+
+	  You should only enable this functionality if you are certain you
+	  require a fallback mechanism and have a userspace mechanism ready to
+	  load firmware in case it is not found. One main reason for this may
+	  be if you have drivers which require firmware built-in and for
+	  whatever reason cannot place the required firmware in initramfs.
+	  Another reason kernels may have this feature enabled is to support a
+	  driver which explicitly relies on this fallback mechanism. Only two
+	  drivers need this today:
+
+	    o CONFIG_LEDS_LP55XX_COMMON
+	    o CONFIG_DELL_RBU
+
+	  Outside of supporting the above drivers, another reason for needing
+	  this may be that your firmware resides outside of the paths the kernel
+	  looks for and cannot possibly be specified using the firmware_class
+	  path module parameter or kernel firmware_class path boot parameter
+	  if firmware_class is built-in.
+
+	  A modern use case may be to temporarily mount a custom partition
+	  during provisioning which is only accessible to userspace, and then
+	  to use it to look for and fetch the required firmware. Such type of
+	  driver functionality may not even ever be desirable upstream by
+	  vendors, and as such is only required to be supported as an interface
+	  for provisioning. Since udev's firmware loading facility has been
+	  removed you can use firmwared or a fork of it to customize how you
+	  want to load firmware based on uevents issued.
+
+	  Enabling this option will increase your kernel image size by about
+	  13436 bytes.
+
+	  If you are unsure about this, say N here, unless you are Linux
+	  distribution and need to support the above two drivers, or you are
+	  certain you need to support some really custom firmware loading
+	  facility in userspace.
+
+config FW_LOADER_USER_HELPER_FALLBACK
+	bool "Force the firmware sysfs fallback mechanism when possible"
+	depends on FW_LOADER_USER_HELPER
+	help
+	  Enabling this option forces a sysfs userspace fallback mechanism
+	  to be used for all firmware requests which explicitly do not disable a
+	  a fallback mechanism. Firmware calls which do prohibit a fallback
+	  mechanism is request_firmware_direct(). This option is kept for
+          backward compatibility purposes given this precise mechanism can also
+	  be enabled by setting the proc sysctl value to true:
+
+	       /proc/sys/kernel/firmware_config/force_sysfs_fallback
+
+	  If you are unsure about this, say N here.
+
+endif # FW_LOADER
+endmenu
diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c
index 3583541..b676a99 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -125,7 +125,7 @@
 }
 
 /**
- * firmware_timeout_store - set number of seconds to wait for firmware
+ * firmware_timeout_store() - set number of seconds to wait for firmware
  * @class: device class pointer
  * @attr: device attribute pointer
  * @buf: buffer to scan for timeout value
@@ -239,7 +239,7 @@
 }
 
 /**
- * firmware_loading_store - set value in the 'loading' control file
+ * firmware_loading_store() - set value in the 'loading' control file
  * @dev: device pointer
  * @attr: device attribute pointer
  * @buf: buffer to scan for loading control value
@@ -431,7 +431,7 @@
 }
 
 /**
- * firmware_data_write - write method for firmware
+ * firmware_data_write() - write method for firmware
  * @filp: open sysfs file
  * @kobj: kobject for the device
  * @bin_attr: bin_attr structure
@@ -512,7 +512,7 @@
 
 static struct fw_sysfs *
 fw_create_instance(struct firmware *firmware, const char *fw_name,
-		   struct device *device, unsigned int opt_flags)
+		   struct device *device, enum fw_opt opt_flags)
 {
 	struct fw_sysfs *fw_sysfs;
 	struct device *f_dev;
@@ -537,7 +537,7 @@
 }
 
 /**
- * fw_load_sysfs_fallback - load a firmware via the sysfs fallback mechanism
+ * fw_load_sysfs_fallback() - load a firmware via the sysfs fallback mechanism
  * @fw_sysfs: firmware sysfs information for the firmware to load
  * @opt_flags: flags of options, FW_OPT_*
  * @timeout: timeout to wait for the load
@@ -545,7 +545,7 @@
  * In charge of constructing a sysfs fallback interface for firmware loading.
  **/
 static int fw_load_sysfs_fallback(struct fw_sysfs *fw_sysfs,
-				  unsigned int opt_flags, long timeout)
+				  enum fw_opt opt_flags, long timeout)
 {
 	int retval = 0;
 	struct device *f_dev = &fw_sysfs->dev;
@@ -599,7 +599,7 @@
 
 static int fw_load_from_user_helper(struct firmware *firmware,
 				    const char *name, struct device *device,
-				    unsigned int opt_flags)
+				    enum fw_opt opt_flags)
 {
 	struct fw_sysfs *fw_sysfs;
 	long timeout;
@@ -640,7 +640,7 @@
 	return ret;
 }
 
-static bool fw_force_sysfs_fallback(unsigned int opt_flags)
+static bool fw_force_sysfs_fallback(enum fw_opt opt_flags)
 {
 	if (fw_fallback_config.force_sysfs_fallback)
 		return true;
@@ -649,7 +649,7 @@
 	return true;
 }
 
-static bool fw_run_sysfs_fallback(unsigned int opt_flags)
+static bool fw_run_sysfs_fallback(enum fw_opt opt_flags)
 {
 	if (fw_fallback_config.ignore_sysfs_fallback) {
 		pr_info_once("Ignoring firmware sysfs fallback due to sysctl knob\n");
@@ -662,14 +662,39 @@
 	return fw_force_sysfs_fallback(opt_flags);
 }
 
-int fw_sysfs_fallback(struct firmware *fw, const char *name,
-		      struct device *device,
-		      unsigned int opt_flags,
-		      int ret)
+/**
+ * firmware_fallback_sysfs() - use the fallback mechanism to find firmware
+ * @fw: pointer to firmware image
+ * @name: name of firmware file to look for
+ * @device: device for which firmware is being loaded
+ * @opt_flags: options to control firmware loading behaviour
+ * @ret: return value from direct lookup which triggered the fallback mechanism
+ *
+ * This function is called if direct lookup for the firmware failed, it enables
+ * a fallback mechanism through userspace by exposing a sysfs loading
+ * interface. Userspace is in charge of loading the firmware through the syfs
+ * loading interface. This syfs fallback mechanism may be disabled completely
+ * on a system by setting the proc sysctl value ignore_sysfs_fallback to true.
+ * If this false we check if the internal API caller set the @FW_OPT_NOFALLBACK
+ * flag, if so it would also disable the fallback mechanism. A system may want
+ * to enfoce the sysfs fallback mechanism at all times, it can do this by
+ * setting ignore_sysfs_fallback to false and force_sysfs_fallback to true.
+ * Enabling force_sysfs_fallback is functionally equivalent to build a kernel
+ * with CONFIG_FW_LOADER_USER_HELPER_FALLBACK.
+ **/
+int firmware_fallback_sysfs(struct firmware *fw, const char *name,
+			    struct device *device,
+			    enum fw_opt opt_flags,
+			    int ret)
 {
 	if (!fw_run_sysfs_fallback(opt_flags))
 		return ret;
 
-	dev_warn(device, "Falling back to user helper\n");
+	if (!(opt_flags & FW_OPT_NO_WARN))
+		dev_warn(device, "Falling back to syfs fallback for: %s\n",
+				 name);
+	else
+		dev_dbg(device, "Falling back to sysfs fallback for: %s\n",
+				name);
 	return fw_load_from_user_helper(fw, name, device, opt_flags);
 }
diff --git a/drivers/base/firmware_loader/fallback.h b/drivers/base/firmware_loader/fallback.h
index f825567..2106350 100644
--- a/drivers/base/firmware_loader/fallback.h
+++ b/drivers/base/firmware_loader/fallback.h
@@ -5,6 +5,8 @@
 #include <linux/firmware.h>
 #include <linux/device.h>
 
+#include "firmware.h"
+
 /**
  * struct firmware_fallback_config - firmware fallback configuration settings
  *
@@ -29,10 +31,10 @@
 };
 
 #ifdef CONFIG_FW_LOADER_USER_HELPER
-int fw_sysfs_fallback(struct firmware *fw, const char *name,
-		      struct device *device,
-		      unsigned int opt_flags,
-		      int ret);
+int firmware_fallback_sysfs(struct firmware *fw, const char *name,
+			    struct device *device,
+			    enum fw_opt opt_flags,
+			    int ret);
 void kill_pending_fw_fallback_reqs(bool only_kill_custom);
 
 void fw_fallback_set_cache_timeout(void);
@@ -41,10 +43,10 @@
 int register_sysfs_loader(void);
 void unregister_sysfs_loader(void);
 #else /* CONFIG_FW_LOADER_USER_HELPER */
-static inline int fw_sysfs_fallback(struct firmware *fw, const char *name,
-				    struct device *device,
-				    unsigned int opt_flags,
-				    int ret)
+static inline int firmware_fallback_sysfs(struct firmware *fw, const char *name,
+					  struct device *device,
+					  enum fw_opt opt_flags,
+					  int ret)
 {
 	/* Keep carrying over the same error */
 	return ret;
diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h
index 64acbb1..4c1395f 100644
--- a/drivers/base/firmware_loader/firmware.h
+++ b/drivers/base/firmware_loader/firmware.h
@@ -2,6 +2,7 @@
 #ifndef __FIRMWARE_LOADER_H
 #define __FIRMWARE_LOADER_H
 
+#include <linux/bitops.h>
 #include <linux/firmware.h>
 #include <linux/types.h>
 #include <linux/kref.h>
@@ -10,13 +11,33 @@
 
 #include <generated/utsrelease.h>
 
-/* firmware behavior options */
-#define FW_OPT_UEVENT			(1U << 0)
-#define FW_OPT_NOWAIT			(1U << 1)
-#define FW_OPT_USERHELPER		(1U << 2)
-#define FW_OPT_NO_WARN			(1U << 3)
-#define FW_OPT_NOCACHE			(1U << 4)
-#define FW_OPT_NOFALLBACK		(1U << 5)
+/**
+ * enum fw_opt - options to control firmware loading behaviour
+ *
+ * @FW_OPT_UEVENT: Enables the fallback mechanism to send a kobject uevent
+ *	when the firmware is not found. Userspace is in charge to load the
+ *	firmware using the sysfs loading facility.
+ * @FW_OPT_NOWAIT: Used to describe the firmware request is asynchronous.
+ * @FW_OPT_USERHELPER: Enable the fallback mechanism, in case the direct
+ *	filesystem lookup fails at finding the firmware.  For details refer to
+ *	firmware_fallback_sysfs().
+ * @FW_OPT_NO_WARN: Quiet, avoid printing warning messages.
+ * @FW_OPT_NOCACHE: Disables firmware caching. Firmware caching is used to
+ *	cache the firmware upon suspend, so that upon resume races against the
+ *	firmware file lookup on storage is avoided. Used for calls where the
+ *	file may be too big, or where the driver takes charge of its own
+ *	firmware caching mechanism.
+ * @FW_OPT_NOFALLBACK: Disable the fallback mechanism. Takes precedence over
+ *	&FW_OPT_UEVENT and &FW_OPT_USERHELPER.
+ */
+enum fw_opt {
+	FW_OPT_UEVENT =         BIT(0),
+	FW_OPT_NOWAIT =         BIT(1),
+	FW_OPT_USERHELPER =     BIT(2),
+	FW_OPT_NO_WARN =        BIT(3),
+	FW_OPT_NOCACHE =        BIT(4),
+	FW_OPT_NOFALLBACK =     BIT(5),
+};
 
 enum fw_status {
 	FW_STATUS_UNKNOWN,
@@ -110,6 +131,6 @@
 }
 
 int assign_fw(struct firmware *fw, struct device *device,
-	      unsigned int opt_flags);
+	      enum fw_opt opt_flags);
 
 #endif /* __FIRMWARE_LOADER_H */
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index eb34089e..0943e70 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -443,7 +443,7 @@
 #endif
 
 int assign_fw(struct firmware *fw, struct device *device,
-	      unsigned int opt_flags)
+	      enum fw_opt opt_flags)
 {
 	struct fw_priv *fw_priv = fw->priv;
 	int ret;
@@ -558,7 +558,7 @@
 static int
 _request_firmware(const struct firmware **firmware_p, const char *name,
 		  struct device *device, void *buf, size_t size,
-		  unsigned int opt_flags)
+		  enum fw_opt opt_flags)
 {
 	struct firmware *fw = NULL;
 	int ret;
@@ -581,7 +581,7 @@
 			dev_warn(device,
 				 "Direct firmware load for %s failed with error %d\n",
 				 name, ret);
-		ret = fw_sysfs_fallback(fw, name, device, opt_flags, ret);
+		ret = firmware_fallback_sysfs(fw, name, device, opt_flags, ret);
 	} else
 		ret = assign_fw(fw, device, opt_flags);
 
@@ -597,7 +597,7 @@
 }
 
 /**
- * request_firmware: - send firmware request and wait for it
+ * request_firmware() - send firmware request and wait for it
  * @firmware_p: pointer to firmware image
  * @name: name of firmware file
  * @device: device for which firmware is being loaded
@@ -632,7 +632,34 @@
 EXPORT_SYMBOL(request_firmware);
 
 /**
- * request_firmware_direct: - load firmware directly without usermode helper
+ * firmware_request_nowarn() - request for an optional fw module
+ * @firmware: pointer to firmware image
+ * @name: name of firmware file
+ * @device: device for which firmware is being loaded
+ *
+ * This function is similar in behaviour to request_firmware(), except
+ * it doesn't produce warning messages when the file is not found.
+ * The sysfs fallback mechanism is enabled if direct filesystem lookup fails,
+ * however, however failures to find the firmware file with it are still
+ * suppressed. It is therefore up to the driver to check for the return value
+ * of this call and to decide when to inform the users of errors.
+ **/
+int firmware_request_nowarn(const struct firmware **firmware, const char *name,
+			    struct device *device)
+{
+	int ret;
+
+	/* Need to pin this module until return */
+	__module_get(THIS_MODULE);
+	ret = _request_firmware(firmware, name, device, NULL, 0,
+				FW_OPT_UEVENT | FW_OPT_NO_WARN);
+	module_put(THIS_MODULE);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(firmware_request_nowarn);
+
+/**
+ * request_firmware_direct() - load firmware directly without usermode helper
  * @firmware_p: pointer to firmware image
  * @name: name of firmware file
  * @device: device for which firmware is being loaded
@@ -657,7 +684,7 @@
 EXPORT_SYMBOL_GPL(request_firmware_direct);
 
 /**
- * firmware_request_cache: - cache firmware for suspend so resume can use it
+ * firmware_request_cache() - cache firmware for suspend so resume can use it
  * @name: name of firmware file
  * @device: device for which firmware should be cached for
  *
@@ -681,7 +708,7 @@
 EXPORT_SYMBOL_GPL(firmware_request_cache);
 
 /**
- * request_firmware_into_buf - load firmware into a previously allocated buffer
+ * request_firmware_into_buf() - load firmware into a previously allocated buffer
  * @firmware_p: pointer to firmware image
  * @name: name of firmware file
  * @device: device for which firmware is being loaded and DMA region allocated
@@ -713,7 +740,7 @@
 EXPORT_SYMBOL(request_firmware_into_buf);
 
 /**
- * release_firmware: - release the resource associated with a firmware image
+ * release_firmware() - release the resource associated with a firmware image
  * @fw: firmware resource to release
  **/
 void release_firmware(const struct firmware *fw)
@@ -734,7 +761,7 @@
 	struct device *device;
 	void *context;
 	void (*cont)(const struct firmware *fw, void *context);
-	unsigned int opt_flags;
+	enum fw_opt opt_flags;
 };
 
 static void request_firmware_work_func(struct work_struct *work)
@@ -755,7 +782,7 @@
 }
 
 /**
- * request_firmware_nowait - asynchronous version of request_firmware
+ * request_firmware_nowait() - asynchronous version of request_firmware
  * @module: module requesting the firmware
  * @uevent: sends uevent to copy the firmware image if this flag
  *	is non-zero else the firmware copy must be done manually.
@@ -824,7 +851,7 @@
 static ASYNC_DOMAIN_EXCLUSIVE(fw_cache_domain);
 
 /**
- * cache_firmware - cache one firmware image in kernel memory space
+ * cache_firmware() - cache one firmware image in kernel memory space
  * @fw_name: the firmware image name
  *
  * Cache firmware in kernel memory so that drivers can use it when
@@ -866,7 +893,7 @@
 }
 
 /**
- * uncache_firmware - remove one cached firmware image
+ * uncache_firmware() - remove one cached firmware image
  * @fw_name: the firmware image name
  *
  * Uncache one firmware image which has been cached successfully
@@ -1042,7 +1069,7 @@
 }
 
 /**
- * device_cache_fw_images - cache devices' firmware
+ * device_cache_fw_images() - cache devices' firmware
  *
  * If one device called request_firmware or its nowait version
  * successfully before, the firmware names are recored into the
@@ -1075,7 +1102,7 @@
 }
 
 /**
- * device_uncache_fw_images - uncache devices' firmware
+ * device_uncache_fw_images() - uncache devices' firmware
  *
  * uncache all firmwares which have been cached successfully
  * by device_uncache_fw_images earlier
@@ -1092,7 +1119,7 @@
 }
 
 /**
- * device_uncache_fw_images_delay - uncache devices firmwares
+ * device_uncache_fw_images_delay() - uncache devices firmwares
  * @delay: number of milliseconds to delay uncache device firmwares
  *
  * uncache all devices's firmwares which has been cached successfully
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index bffe861..f5e5601 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -649,13 +649,19 @@
 static
 int register_memory(struct memory_block *memory)
 {
+	int ret;
+
 	memory->dev.bus = &memory_subsys;
 	memory->dev.id = memory->start_section_nr / sections_per_block;
 	memory->dev.release = memory_block_release;
 	memory->dev.groups = memory_memblk_attr_groups;
 	memory->dev.offline = memory->state == MEM_OFFLINE;
 
-	return device_register(&memory->dev);
+	ret = device_register(&memory->dev);
+	if (ret)
+		put_device(&memory->dev);
+
+	return ret;
 }
 
 static int init_memory_block(struct memory_block **memory,
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 8e22073..60d6cc6 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -101,6 +101,9 @@
 		chip->irq_set_affinity = msi_domain_set_affinity;
 	if (!chip->irq_write_msi_msg)
 		chip->irq_write_msi_msg = platform_msi_write_msg;
+	if (WARN_ON((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
+		    !(chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)))
+		info->flags &= ~MSI_FLAG_LEVEL_CAPABLE;
 }
 
 static void platform_msi_free_descs(struct device *dev, int base, int nvec)
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 8075ddc..dff82a3 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -572,17 +572,16 @@
 		return ret;
 
 	ret = dev_pm_domain_attach(_dev, true);
-	if (ret != -EPROBE_DEFER) {
-		if (drv->probe) {
-			ret = drv->probe(dev);
-			if (ret)
-				dev_pm_domain_detach(_dev, true);
-		} else {
-			/* don't fail if just dev_pm_domain_attach failed */
-			ret = 0;
-		}
+	if (ret)
+		goto out;
+
+	if (drv->probe) {
+		ret = drv->probe(dev);
+		if (ret)
+			dev_pm_domain_detach(_dev, true);
 	}
 
+out:
 	if (drv->prevent_deferred_probe && ret == -EPROBE_DEFER) {
 		dev_warn(_dev, "probe deferral not supported\n");
 		ret = -ENXIO;
@@ -1130,6 +1129,22 @@
 
 #endif /* CONFIG_HIBERNATE_CALLBACKS */
 
+int platform_dma_configure(struct device *dev)
+{
+	enum dev_dma_attr attr;
+	int ret = 0;
+
+	if (dev->of_node) {
+		ret = of_dma_configure(dev, dev->of_node, true);
+	} else if (has_acpi_companion(dev)) {
+		attr = acpi_get_dma_attr(to_acpi_device_node(dev->fwnode));
+		if (attr != DEV_DMA_NOT_SUPPORTED)
+			ret = acpi_dma_configure(dev, attr);
+	}
+
+	return ret;
+}
+
 static const struct dev_pm_ops platform_dev_pm_ops = {
 	.runtime_suspend = pm_generic_runtime_suspend,
 	.runtime_resume = pm_generic_runtime_resume,
@@ -1141,8 +1156,8 @@
 	.dev_groups	= platform_dev_groups,
 	.match		= platform_match,
 	.uevent		= platform_uevent,
+	.dma_configure	= platform_dma_configure,
 	.pm		= &platform_dev_pm_ops,
-	.force_dma	= true,
 };
 EXPORT_SYMBOL_GPL(platform_bus_type);
 
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index f6a9ad5..7ae62b6 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -98,17 +98,21 @@
  * Callers must ensure proper synchronization of this function with power
  * management callbacks.
  *
- * Returns 0 on successfully attached PM domain or negative error code.
+ * Returns 0 on successfully attached PM domain, or when it is found that the
+ * device doesn't need a PM domain, else a negative error code.
  */
 int dev_pm_domain_attach(struct device *dev, bool power_on)
 {
 	int ret;
 
+	if (dev->pm_domain)
+		return 0;
+
 	ret = acpi_dev_pm_attach(dev, power_on);
-	if (ret)
+	if (!ret)
 		ret = genpd_dev_pm_attach(dev);
 
-	return ret;
+	return ret < 0 ? ret : 0;
 }
 EXPORT_SYMBOL_GPL(dev_pm_domain_attach);
 
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 1ea0e25..6f403d6 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/pm_opp.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_qos.h>
@@ -1315,7 +1316,6 @@
 #endif /* CONFIG_PM_SLEEP */
 
 static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev,
-					struct generic_pm_domain *genpd,
 					struct gpd_timing_data *td)
 {
 	struct generic_pm_domain_data *gpd_data;
@@ -1377,24 +1377,19 @@
 			    struct gpd_timing_data *td)
 {
 	struct generic_pm_domain_data *gpd_data;
-	int ret = 0;
+	int ret;
 
 	dev_dbg(dev, "%s()\n", __func__);
 
 	if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
 		return -EINVAL;
 
-	gpd_data = genpd_alloc_dev_data(dev, genpd, td);
+	gpd_data = genpd_alloc_dev_data(dev, td);
 	if (IS_ERR(gpd_data))
 		return PTR_ERR(gpd_data);
 
 	genpd_lock(genpd);
 
-	if (genpd->prepared_count > 0) {
-		ret = -EAGAIN;
-		goto out;
-	}
-
 	ret = genpd->attach_dev ? genpd->attach_dev(genpd, dev) : 0;
 	if (ret)
 		goto out;
@@ -1418,23 +1413,21 @@
 }
 
 /**
- * __pm_genpd_add_device - Add a device to an I/O PM domain.
+ * pm_genpd_add_device - Add a device to an I/O PM domain.
  * @genpd: PM domain to add the device to.
  * @dev: Device to be added.
- * @td: Set of PM QoS timing parameters to attach to the device.
  */
-int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
-			  struct gpd_timing_data *td)
+int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
 {
 	int ret;
 
 	mutex_lock(&gpd_list_lock);
-	ret = genpd_add_device(genpd, dev, td);
+	ret = genpd_add_device(genpd, dev, NULL);
 	mutex_unlock(&gpd_list_lock);
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(__pm_genpd_add_device);
+EXPORT_SYMBOL_GPL(pm_genpd_add_device);
 
 static int genpd_remove_device(struct generic_pm_domain *genpd,
 			       struct device *dev)
@@ -1481,13 +1474,13 @@
 
 /**
  * pm_genpd_remove_device - Remove a device from an I/O PM domain.
- * @genpd: PM domain to remove the device from.
  * @dev: Device to be removed.
  */
-int pm_genpd_remove_device(struct generic_pm_domain *genpd,
-			   struct device *dev)
+int pm_genpd_remove_device(struct device *dev)
 {
-	if (!genpd || genpd != genpd_lookup_dev(dev))
+	struct generic_pm_domain *genpd = genpd_lookup_dev(dev);
+
+	if (!genpd)
 		return -EINVAL;
 
 	return genpd_remove_device(genpd, dev);
@@ -1696,6 +1689,9 @@
 			return ret;
 	}
 
+	device_initialize(&genpd->dev);
+	dev_set_name(&genpd->dev, "%s", genpd->name);
+
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
@@ -1892,14 +1888,33 @@
 
 	mutex_lock(&gpd_list_lock);
 
-	if (genpd_present(genpd)) {
-		ret = genpd_add_provider(np, genpd_xlate_simple, genpd);
-		if (!ret) {
-			genpd->provider = &np->fwnode;
-			genpd->has_provider = true;
+	if (!genpd_present(genpd))
+		goto unlock;
+
+	genpd->dev.of_node = np;
+
+	/* Parse genpd OPP table */
+	if (genpd->set_performance_state) {
+		ret = dev_pm_opp_of_add_table(&genpd->dev);
+		if (ret) {
+			dev_err(&genpd->dev, "Failed to add OPP table: %d\n",
+				ret);
+			goto unlock;
 		}
 	}
 
+	ret = genpd_add_provider(np, genpd_xlate_simple, genpd);
+	if (ret) {
+		if (genpd->set_performance_state)
+			dev_pm_opp_of_remove_table(&genpd->dev);
+
+		goto unlock;
+	}
+
+	genpd->provider = &np->fwnode;
+	genpd->has_provider = true;
+
+unlock:
 	mutex_unlock(&gpd_list_lock);
 
 	return ret;
@@ -1914,6 +1929,7 @@
 int of_genpd_add_provider_onecell(struct device_node *np,
 				  struct genpd_onecell_data *data)
 {
+	struct generic_pm_domain *genpd;
 	unsigned int i;
 	int ret = -EINVAL;
 
@@ -1926,13 +1942,27 @@
 		data->xlate = genpd_xlate_onecell;
 
 	for (i = 0; i < data->num_domains; i++) {
-		if (!data->domains[i])
+		genpd = data->domains[i];
+
+		if (!genpd)
 			continue;
-		if (!genpd_present(data->domains[i]))
+		if (!genpd_present(genpd))
 			goto error;
 
-		data->domains[i]->provider = &np->fwnode;
-		data->domains[i]->has_provider = true;
+		genpd->dev.of_node = np;
+
+		/* Parse genpd OPP table */
+		if (genpd->set_performance_state) {
+			ret = dev_pm_opp_of_add_table_indexed(&genpd->dev, i);
+			if (ret) {
+				dev_err(&genpd->dev, "Failed to add OPP table for index %d: %d\n",
+					i, ret);
+				goto error;
+			}
+		}
+
+		genpd->provider = &np->fwnode;
+		genpd->has_provider = true;
 	}
 
 	ret = genpd_add_provider(np, data->xlate, data);
@@ -1945,10 +1975,16 @@
 
 error:
 	while (i--) {
-		if (!data->domains[i])
+		genpd = data->domains[i];
+
+		if (!genpd)
 			continue;
-		data->domains[i]->provider = NULL;
-		data->domains[i]->has_provider = false;
+
+		genpd->provider = NULL;
+		genpd->has_provider = false;
+
+		if (genpd->set_performance_state)
+			dev_pm_opp_of_remove_table(&genpd->dev);
 	}
 
 	mutex_unlock(&gpd_list_lock);
@@ -1975,10 +2011,17 @@
 			 * provider, set the 'has_provider' to false
 			 * so that the PM domain can be safely removed.
 			 */
-			list_for_each_entry(gpd, &gpd_list, gpd_list_node)
-				if (gpd->provider == &np->fwnode)
+			list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
+				if (gpd->provider == &np->fwnode) {
 					gpd->has_provider = false;
 
+					if (!gpd->set_performance_state)
+						continue;
+
+					dev_pm_opp_of_remove_table(&gpd->dev);
+				}
+			}
+
 			list_del(&cp->link);
 			of_node_put(cp->node);
 			kfree(cp);
@@ -2185,31 +2228,25 @@
  * Parse device's OF node to find a PM domain specifier. If such is found,
  * attaches the device to retrieved pm_domain ops.
  *
- * Both generic and legacy Samsung-specific DT bindings are supported to keep
- * backwards compatibility with existing DTBs.
- *
- * Returns 0 on successfully attached PM domain or negative error code. Note
- * that if a power-domain exists for the device, but it cannot be found or
- * turned on, then return -EPROBE_DEFER to ensure that the device is not
- * probed and to re-try again later.
+ * Returns 1 on successfully attached PM domain, 0 when the device don't need a
+ * PM domain or a negative error code in case of failures. Note that if a
+ * power-domain exists for the device, but it cannot be found or turned on,
+ * then return -EPROBE_DEFER to ensure that the device is not probed and to
+ * re-try again later.
  */
 int genpd_dev_pm_attach(struct device *dev)
 {
 	struct of_phandle_args pd_args;
 	struct generic_pm_domain *pd;
-	unsigned int i;
 	int ret;
 
 	if (!dev->of_node)
-		return -ENODEV;
-
-	if (dev->pm_domain)
-		return -EEXIST;
+		return 0;
 
 	ret = of_parse_phandle_with_args(dev->of_node, "power-domains",
 					"#power-domain-cells", 0, &pd_args);
 	if (ret < 0)
-		return ret;
+		return 0;
 
 	mutex_lock(&gpd_list_lock);
 	pd = genpd_get_from_provider(&pd_args);
@@ -2223,21 +2260,14 @@
 
 	dev_dbg(dev, "adding to PM domain %s\n", pd->name);
 
-	for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) {
-		ret = genpd_add_device(pd, dev, NULL);
-		if (ret != -EAGAIN)
-			break;
-
-		mdelay(i);
-		cond_resched();
-	}
+	ret = genpd_add_device(pd, dev, NULL);
 	mutex_unlock(&gpd_list_lock);
 
 	if (ret < 0) {
 		if (ret != -EPROBE_DEFER)
 			dev_err(dev, "failed to add to PM domain %s: %d",
 				pd->name, ret);
-		goto out;
+		return ret;
 	}
 
 	dev->pm_domain->detach = genpd_dev_pm_detach;
@@ -2246,8 +2276,11 @@
 	genpd_lock(pd);
 	ret = genpd_power_on(pd, 0);
 	genpd_unlock(pd);
-out:
-	return ret ? -EPROBE_DEFER : 0;
+
+	if (ret)
+		genpd_remove_device(pd, dev);
+
+	return ret ? -EPROBE_DEFER : 1;
 }
 EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
 
@@ -2361,6 +2394,55 @@
 }
 EXPORT_SYMBOL_GPL(of_genpd_parse_idle_states);
 
+/**
+ * of_genpd_opp_to_performance_state- Gets performance state of device's
+ * power domain corresponding to a DT node's "required-opps" property.
+ *
+ * @dev: Device for which the performance-state needs to be found.
+ * @opp_node: DT node where the "required-opps" property is present. This can be
+ *	the device node itself (if it doesn't have an OPP table) or a node
+ *	within the OPP table of a device (if device has an OPP table).
+ * @state: Pointer to return performance state.
+ *
+ * Returns performance state corresponding to the "required-opps" property of
+ * a DT node. This calls platform specific genpd->opp_to_performance_state()
+ * callback to translate power domain OPP to performance state.
+ *
+ * Returns performance state on success and 0 on failure.
+ */
+unsigned int of_genpd_opp_to_performance_state(struct device *dev,
+					       struct device_node *opp_node)
+{
+	struct generic_pm_domain *genpd;
+	struct dev_pm_opp *opp;
+	int state = 0;
+
+	genpd = dev_to_genpd(dev);
+	if (IS_ERR(genpd))
+		return 0;
+
+	if (unlikely(!genpd->set_performance_state))
+		return 0;
+
+	genpd_lock(genpd);
+
+	opp = of_dev_pm_opp_find_required_opp(&genpd->dev, opp_node);
+	if (IS_ERR(opp)) {
+		dev_err(dev, "Failed to find required OPP: %ld\n",
+			PTR_ERR(opp));
+		goto unlock;
+	}
+
+	state = genpd->opp_to_performance_state(genpd, opp);
+	dev_pm_opp_put(opp);
+
+unlock:
+	genpd_unlock(genpd);
+
+	return state;
+}
+EXPORT_SYMBOL_GPL(of_genpd_opp_to_performance_state);
+
 #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
 
 
@@ -2628,6 +2710,19 @@
 	return ret;
 }
 
+static int genpd_perf_state_show(struct seq_file *s, void *data)
+{
+	struct generic_pm_domain *genpd = s->private;
+
+	if (genpd_lock_interruptible(genpd))
+		return -ERESTARTSYS;
+
+	seq_printf(s, "%u\n", genpd->performance_state);
+
+	genpd_unlock(genpd);
+	return 0;
+}
+
 #define define_genpd_open_function(name) \
 static int genpd_##name##_open(struct inode *inode, struct file *file) \
 { \
@@ -2641,6 +2736,7 @@
 define_genpd_open_function(active_time);
 define_genpd_open_function(total_idle_time);
 define_genpd_open_function(devices);
+define_genpd_open_function(perf_state);
 
 #define define_genpd_debugfs_fops(name) \
 static const struct file_operations genpd_##name##_fops = { \
@@ -2657,6 +2753,7 @@
 define_genpd_debugfs_fops(active_time);
 define_genpd_debugfs_fops(total_idle_time);
 define_genpd_debugfs_fops(devices);
+define_genpd_debugfs_fops(perf_state);
 
 static int __init genpd_debug_init(void)
 {
@@ -2690,6 +2787,9 @@
 				d, genpd, &genpd_total_idle_time_fops);
 		debugfs_create_file("devices", 0444,
 				d, genpd, &genpd_devices_fops);
+		if (genpd->set_performance_state)
+			debugfs_create_file("perf_state", 0444,
+					    d, genpd, &genpd_perf_state_fops);
 	}
 
 	return 0;
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index e5e0670..3f68e29 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -192,34 +192,31 @@
 	list_move_tail(&dev->power.entry, &dpm_list);
 }
 
-static ktime_t initcall_debug_start(struct device *dev)
+static ktime_t initcall_debug_start(struct device *dev, void *cb)
 {
-	ktime_t calltime = 0;
+	if (!pm_print_times_enabled)
+		return 0;
 
-	if (pm_print_times_enabled) {
-		pr_info("calling  %s+ @ %i, parent: %s\n",
-			dev_name(dev), task_pid_nr(current),
-			dev->parent ? dev_name(dev->parent) : "none");
-		calltime = ktime_get();
-	}
-
-	return calltime;
+	dev_info(dev, "calling %pF @ %i, parent: %s\n", cb,
+		 task_pid_nr(current),
+		 dev->parent ? dev_name(dev->parent) : "none");
+	return ktime_get();
 }
 
 static void initcall_debug_report(struct device *dev, ktime_t calltime,
-				  int error, pm_message_t state,
-				  const char *info)
+				  void *cb, int error)
 {
 	ktime_t rettime;
 	s64 nsecs;
 
+	if (!pm_print_times_enabled)
+		return;
+
 	rettime = ktime_get();
 	nsecs = (s64) ktime_to_ns(ktime_sub(rettime, calltime));
 
-	if (pm_print_times_enabled) {
-		pr_info("call %s+ returned %d after %Ld usecs\n", dev_name(dev),
-			error, (unsigned long long)nsecs >> 10);
-	}
+	dev_info(dev, "%pF returned %d after %Ld usecs\n", cb, error,
+		 (unsigned long long)nsecs >> 10);
 }
 
 /**
@@ -446,7 +443,7 @@
 	if (!cb)
 		return 0;
 
-	calltime = initcall_debug_start(dev);
+	calltime = initcall_debug_start(dev, cb);
 
 	pm_dev_dbg(dev, state, info);
 	trace_device_pm_callback_start(dev, info, state.event);
@@ -454,7 +451,7 @@
 	trace_device_pm_callback_end(dev, error);
 	suspend_report_result(cb, error);
 
-	initcall_debug_report(dev, calltime, error, state, info);
+	initcall_debug_report(dev, calltime, cb, error);
 
 	return error;
 }
@@ -1664,14 +1661,14 @@
 	int error;
 	ktime_t calltime;
 
-	calltime = initcall_debug_start(dev);
+	calltime = initcall_debug_start(dev, cb);
 
 	trace_device_pm_callback_start(dev, info, state.event);
 	error = cb(dev, state);
 	trace_device_pm_callback_end(dev, error);
 	suspend_report_result(cb, error);
 
-	initcall_debug_report(dev, calltime, error, state, info);
+	initcall_debug_report(dev, calltime, cb, error);
 
 	return error;
 }
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 86e67e7..c511def 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -56,14 +56,6 @@
 {
 }
 
-static inline void device_wakeup_arm_wake_irqs(void)
-{
-}
-
-static inline void device_wakeup_disarm_wake_irqs(void)
-{
-}
-
 #endif /* CONFIG_PM_SLEEP */
 
 /*
@@ -95,28 +87,6 @@
 
 static inline int dpm_sysfs_add(struct device *dev) { return 0; }
 static inline void dpm_sysfs_remove(struct device *dev) {}
-static inline void rpm_sysfs_remove(struct device *dev) {}
-static inline int wakeup_sysfs_add(struct device *dev) { return 0; }
-static inline void wakeup_sysfs_remove(struct device *dev) {}
-static inline int pm_qos_sysfs_add(struct device *dev) { return 0; }
-static inline void pm_qos_sysfs_remove(struct device *dev) {}
-
-static inline void dev_pm_arm_wake_irq(struct wake_irq *wirq)
-{
-}
-
-static inline void dev_pm_disarm_wake_irq(struct wake_irq *wirq)
-{
-}
-
-static inline void dev_pm_enable_wake_irq_check(struct device *dev,
-						bool can_change_status)
-{
-}
-
-static inline void dev_pm_disable_wake_irq_check(struct device *dev)
-{
-}
 
 #endif
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 8bef3cb..c6030f1 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1563,37 +1563,16 @@
 }
 
 /**
- * pm_runtime_get_suppliers - Resume and reference-count supplier devices.
+ * pm_runtime_resume_suppliers - Resume supplier devices.
  * @dev: Consumer device.
  */
-void pm_runtime_get_suppliers(struct device *dev)
+void pm_runtime_resume_suppliers(struct device *dev)
 {
-	struct device_link *link;
 	int idx;
 
 	idx = device_links_read_lock();
 
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
-		if (link->flags & DL_FLAG_PM_RUNTIME)
-			pm_runtime_get_sync(link->supplier);
-
-	device_links_read_unlock(idx);
-}
-
-/**
- * pm_runtime_put_suppliers - Drop references to supplier devices.
- * @dev: Consumer device.
- */
-void pm_runtime_put_suppliers(struct device *dev)
-{
-	struct device_link *link;
-	int idx;
-
-	idx = device_links_read_lock();
-
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
-		if (link->flags & DL_FLAG_PM_RUNTIME)
-			pm_runtime_put(link->supplier);
+	rpm_get_suppliers(dev);
 
 	device_links_read_unlock(idx);
 }
@@ -1607,6 +1586,8 @@
 
 void pm_runtime_drop_link(struct device *dev)
 {
+	rpm_put_suppliers(dev);
+
 	spin_lock_irq(&dev->power.lock);
 	WARN_ON(dev->power.links_count == 0);
 	dev->power.links_count--;
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index ea01621..5fa1898 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -57,7 +57,7 @@
 /* A preserved old value of the events counter. */
 static unsigned int saved_count;
 
-static DEFINE_SPINLOCK(events_lock);
+static DEFINE_RAW_SPINLOCK(events_lock);
 
 static void pm_wakeup_timer_fn(struct timer_list *t);
 
@@ -183,11 +183,10 @@
 	spin_lock_init(&ws->lock);
 	timer_setup(&ws->timer, pm_wakeup_timer_fn, 0);
 	ws->active = false;
-	ws->last_time = ktime_get();
 
-	spin_lock_irqsave(&events_lock, flags);
+	raw_spin_lock_irqsave(&events_lock, flags);
 	list_add_rcu(&ws->entry, &wakeup_sources);
-	spin_unlock_irqrestore(&events_lock, flags);
+	raw_spin_unlock_irqrestore(&events_lock, flags);
 }
 EXPORT_SYMBOL_GPL(wakeup_source_add);
 
@@ -202,9 +201,9 @@
 	if (WARN_ON(!ws))
 		return;
 
-	spin_lock_irqsave(&events_lock, flags);
+	raw_spin_lock_irqsave(&events_lock, flags);
 	list_del_rcu(&ws->entry);
-	spin_unlock_irqrestore(&events_lock, flags);
+	raw_spin_unlock_irqrestore(&events_lock, flags);
 	synchronize_srcu(&wakeup_srcu);
 }
 EXPORT_SYMBOL_GPL(wakeup_source_remove);
@@ -843,7 +842,7 @@
 	unsigned long flags;
 	bool ret = false;
 
-	spin_lock_irqsave(&events_lock, flags);
+	raw_spin_lock_irqsave(&events_lock, flags);
 	if (events_check_enabled) {
 		unsigned int cnt, inpr;
 
@@ -851,10 +850,10 @@
 		ret = (cnt != saved_count || inpr > 0);
 		events_check_enabled = !ret;
 	}
-	spin_unlock_irqrestore(&events_lock, flags);
+	raw_spin_unlock_irqrestore(&events_lock, flags);
 
 	if (ret) {
-		pr_info("PM: Wakeup pending, aborting suspend\n");
+		pr_debug("PM: Wakeup pending, aborting suspend\n");
 		pm_print_active_wakeup_sources();
 	}
 
@@ -940,13 +939,13 @@
 	unsigned long flags;
 
 	events_check_enabled = false;
-	spin_lock_irqsave(&events_lock, flags);
+	raw_spin_lock_irqsave(&events_lock, flags);
 	split_counters(&cnt, &inpr);
 	if (cnt == count && inpr == 0) {
 		saved_count = count;
 		events_check_enabled = true;
 	}
-	spin_unlock_irqrestore(&events_lock, flags);
+	raw_spin_unlock_irqrestore(&events_lock, flags);
 	return events_check_enabled;
 }
 
@@ -1029,32 +1028,75 @@
 	return 0;
 }
 
-/**
- * wakeup_sources_stats_show - Print wakeup sources statistics information.
- * @m: seq_file to print the statistics into.
- */
-static int wakeup_sources_stats_show(struct seq_file *m, void *unused)
+static void *wakeup_sources_stats_seq_start(struct seq_file *m,
+					loff_t *pos)
 {
 	struct wakeup_source *ws;
-	int srcuidx;
+	loff_t n = *pos;
+	int *srcuidx = m->private;
 
-	seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t"
-		"expire_count\tactive_since\ttotal_time\tmax_time\t"
-		"last_change\tprevent_suspend_time\n");
+	if (n == 0) {
+		seq_puts(m, "name\t\tactive_count\tevent_count\twakeup_count\t"
+			"expire_count\tactive_since\ttotal_time\tmax_time\t"
+			"last_change\tprevent_suspend_time\n");
+	}
 
-	srcuidx = srcu_read_lock(&wakeup_srcu);
-	list_for_each_entry_rcu(ws, &wakeup_sources, entry)
-		print_wakeup_source_stats(m, ws);
-	srcu_read_unlock(&wakeup_srcu, srcuidx);
+	*srcuidx = srcu_read_lock(&wakeup_srcu);
+	list_for_each_entry_rcu(ws, &wakeup_sources, entry) {
+		if (n-- <= 0)
+			return ws;
+	}
 
-	print_wakeup_source_stats(m, &deleted_ws);
+	return NULL;
+}
+
+static void *wakeup_sources_stats_seq_next(struct seq_file *m,
+					void *v, loff_t *pos)
+{
+	struct wakeup_source *ws = v;
+	struct wakeup_source *next_ws = NULL;
+
+	++(*pos);
+
+	list_for_each_entry_continue_rcu(ws, &wakeup_sources, entry) {
+		next_ws = ws;
+		break;
+	}
+
+	return next_ws;
+}
+
+static void wakeup_sources_stats_seq_stop(struct seq_file *m, void *v)
+{
+	int *srcuidx = m->private;
+
+	srcu_read_unlock(&wakeup_srcu, *srcuidx);
+}
+
+/**
+ * wakeup_sources_stats_seq_show - Print wakeup sources statistics information.
+ * @m: seq_file to print the statistics into.
+ * @v: wakeup_source of each iteration
+ */
+static int wakeup_sources_stats_seq_show(struct seq_file *m, void *v)
+{
+	struct wakeup_source *ws = v;
+
+	print_wakeup_source_stats(m, ws);
 
 	return 0;
 }
 
+static const struct seq_operations wakeup_sources_stats_seq_ops = {
+	.start = wakeup_sources_stats_seq_start,
+	.next  = wakeup_sources_stats_seq_next,
+	.stop  = wakeup_sources_stats_seq_stop,
+	.show  = wakeup_sources_stats_seq_show,
+};
+
 static int wakeup_sources_stats_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, wakeup_sources_stats_show, NULL);
+	return seq_open_private(file, &wakeup_sources_stats_seq_ops, sizeof(int));
 }
 
 static const struct file_operations wakeup_sources_stats_fops = {
@@ -1062,7 +1104,7 @@
 	.open = wakeup_sources_stats_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
-	.release = single_release,
+	.release = seq_release_private,
 };
 
 static int __init wakeup_sources_debugfs_init(void)
diff --git a/drivers/base/property.c b/drivers/base/property.c
index 8f205f6..240ab52 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -56,6 +56,72 @@
 	return NULL;
 }
 
+static const void *property_get_pointer(const struct property_entry *prop)
+{
+	switch (prop->type) {
+	case DEV_PROP_U8:
+		if (prop->is_array)
+			return prop->pointer.u8_data;
+		return &prop->value.u8_data;
+	case DEV_PROP_U16:
+		if (prop->is_array)
+			return prop->pointer.u16_data;
+		return &prop->value.u16_data;
+	case DEV_PROP_U32:
+		if (prop->is_array)
+			return prop->pointer.u32_data;
+		return &prop->value.u32_data;
+	case DEV_PROP_U64:
+		if (prop->is_array)
+			return prop->pointer.u64_data;
+		return &prop->value.u64_data;
+	case DEV_PROP_STRING:
+		if (prop->is_array)
+			return prop->pointer.str;
+		return &prop->value.str;
+	default:
+		return NULL;
+	}
+}
+
+static void property_set_pointer(struct property_entry *prop, const void *pointer)
+{
+	switch (prop->type) {
+	case DEV_PROP_U8:
+		if (prop->is_array)
+			prop->pointer.u8_data = pointer;
+		else
+			prop->value.u8_data = *((u8 *)pointer);
+		break;
+	case DEV_PROP_U16:
+		if (prop->is_array)
+			prop->pointer.u16_data = pointer;
+		else
+			prop->value.u16_data = *((u16 *)pointer);
+		break;
+	case DEV_PROP_U32:
+		if (prop->is_array)
+			prop->pointer.u32_data = pointer;
+		else
+			prop->value.u32_data = *((u32 *)pointer);
+		break;
+	case DEV_PROP_U64:
+		if (prop->is_array)
+			prop->pointer.u64_data = pointer;
+		else
+			prop->value.u64_data = *((u64 *)pointer);
+		break;
+	case DEV_PROP_STRING:
+		if (prop->is_array)
+			prop->pointer.str = pointer;
+		else
+			prop->value.str = pointer;
+		break;
+	default:
+		break;
+	}
+}
+
 static const void *pset_prop_find(const struct property_set *pset,
 				  const char *propname, size_t length)
 {
@@ -65,10 +131,7 @@
 	prop = pset_prop_get(pset, propname);
 	if (!prop)
 		return ERR_PTR(-EINVAL);
-	if (prop->is_array)
-		pointer = prop->pointer.raw_data;
-	else
-		pointer = &prop->value.raw_data;
+	pointer = property_get_pointer(prop);
 	if (!pointer)
 		return ERR_PTR(-ENODATA);
 	if (length > prop->length)
@@ -698,16 +761,17 @@
 
 static void property_entry_free_data(const struct property_entry *p)
 {
+	const void *pointer = property_get_pointer(p);
 	size_t i, nval;
 
 	if (p->is_array) {
-		if (p->is_string && p->pointer.str) {
+		if (p->type == DEV_PROP_STRING && p->pointer.str) {
 			nval = p->length / sizeof(const char *);
 			for (i = 0; i < nval; i++)
 				kfree(p->pointer.str[i]);
 		}
-		kfree(p->pointer.raw_data);
-	} else if (p->is_string) {
+		kfree(pointer);
+	} else if (p->type == DEV_PROP_STRING) {
 		kfree(p->value.str);
 	}
 	kfree(p->name);
@@ -716,7 +780,7 @@
 static int property_copy_string_array(struct property_entry *dst,
 				      const struct property_entry *src)
 {
-	char **d;
+	const char **d;
 	size_t nval = src->length / sizeof(*d);
 	int i;
 
@@ -734,40 +798,44 @@
 		}
 	}
 
-	dst->pointer.raw_data = d;
+	dst->pointer.str = d;
 	return 0;
 }
 
 static int property_entry_copy_data(struct property_entry *dst,
 				    const struct property_entry *src)
 {
+	const void *pointer = property_get_pointer(src);
+	const void *new;
 	int error;
 
 	if (src->is_array) {
 		if (!src->length)
 			return -ENODATA;
 
-		if (src->is_string) {
+		if (src->type == DEV_PROP_STRING) {
 			error = property_copy_string_array(dst, src);
 			if (error)
 				return error;
+			new = dst->pointer.str;
 		} else {
-			dst->pointer.raw_data = kmemdup(src->pointer.raw_data,
-							src->length, GFP_KERNEL);
-			if (!dst->pointer.raw_data)
+			new = kmemdup(pointer, src->length, GFP_KERNEL);
+			if (!new)
 				return -ENOMEM;
 		}
-	} else if (src->is_string) {
-		dst->value.str = kstrdup(src->value.str, GFP_KERNEL);
-		if (!dst->value.str && src->value.str)
+	} else if (src->type == DEV_PROP_STRING) {
+		new = kstrdup(src->value.str, GFP_KERNEL);
+		if (!new && src->value.str)
 			return -ENOMEM;
 	} else {
-		dst->value.raw_data = src->value.raw_data;
+		new = pointer;
 	}
 
 	dst->length = src->length;
 	dst->is_array = src->is_array;
-	dst->is_string = src->is_string;
+	dst->type = src->type;
+
+	property_set_pointer(dst, new);
 
 	dst->name = kstrdup(src->name, GFP_KERNEL);
 	if (!dst->name)
diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c
index 5cadfd3..8741fb5 100644
--- a/drivers/base/regmap/regmap-mmio.c
+++ b/drivers/base/regmap/regmap-mmio.c
@@ -206,7 +206,8 @@
 
 	if (!IS_ERR(ctx->clk)) {
 		clk_unprepare(ctx->clk);
-		clk_put(ctx->clk);
+		if (!ctx->attached_clk)
+			clk_put(ctx->clk);
 	}
 	kfree(context);
 }
diff --git a/drivers/base/regmap/regmap-slimbus.c b/drivers/base/regmap/regmap-slimbus.c
index c90bee8..91d501e 100644
--- a/drivers/base/regmap/regmap-slimbus.c
+++ b/drivers/base/regmap/regmap-slimbus.c
@@ -41,7 +41,7 @@
 static const struct regmap_bus *regmap_get_slimbus(struct slim_device *slim,
 					const struct regmap_config *config)
 {
-	if (config->val_bits == 8 && config->reg_bits == 8)
+	if (config->val_bits == 8 && config->reg_bits == 16)
 		return &regmap_slimbus_bus;
 
 	return ERR_PTR(-ENOTSUPP);
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index e6986c7..fc1f4ac 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -207,7 +207,7 @@
 
 	core->irq = bcma_of_get_irq(parent, core, 0);
 
-	of_dma_configure(&core->dev, node);
+	of_dma_configure(&core->dev, node, false);
 }
 
 unsigned int bcma_core_irq(struct bcma_device *core, int num)
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index f781eff..6ca77d6 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -1179,7 +1179,6 @@
 
   if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
 	return DAC960_Failure(Controller, "DMA mask out of range");
-  Controller->BounceBufferLimit = DMA_BIT_MASK(32);
 
   if ((hw_type == DAC960_PD_Controller) || (hw_type == DAC960_P_Controller)) {
     CommandMailboxesSize =  0;
@@ -1380,11 +1379,8 @@
   dma_addr_t	CommandMailboxDMA;
   DAC960_V2_CommandStatus_T CommandStatus;
 
-	if (!pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)))
-		Controller->BounceBufferLimit = DMA_BIT_MASK(64);
-	else if (!pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
-		Controller->BounceBufferLimit = DMA_BIT_MASK(32);
-	else
+	if (pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(64)) &&
+	    pci_set_dma_mask(Controller->PCIDevice, DMA_BIT_MASK(32)))
 		return DAC960_Failure(Controller, "DMA mask out of range");
 
   /* This is a temporary dma mapping, used only in the scope of this function */
@@ -2540,7 +2536,6 @@
 		continue;
   	}
   	Controller->RequestQueue[n] = RequestQueue;
-  	blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
   	RequestQueue->queuedata = Controller;
 	blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit);
 	blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
@@ -6451,19 +6446,6 @@
   return 0;
 }
 
-static int dac960_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, dac960_proc_show, NULL);
-}
-
-static const struct file_operations dac960_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= dac960_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int dac960_initial_status_proc_show(struct seq_file *m, void *v)
 {
 	DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private;
@@ -6471,19 +6453,6 @@
 	return 0;
 }
 
-static int dac960_initial_status_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, dac960_initial_status_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations dac960_initial_status_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= dac960_initial_status_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int dac960_current_status_proc_show(struct seq_file *m, void *v)
 {
   DAC960_Controller_T *Controller = (DAC960_Controller_T *) m->private;
@@ -6517,19 +6486,6 @@
 	return 0;
 }
 
-static int dac960_current_status_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, dac960_current_status_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations dac960_current_status_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= dac960_current_status_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int dac960_user_command_proc_show(struct seq_file *m, void *v)
 {
 	DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private;
@@ -6584,17 +6540,19 @@
 
 	if (DAC960_ProcDirectoryEntry == NULL) {
 		DAC960_ProcDirectoryEntry = proc_mkdir("rd", NULL);
-		proc_create("status", 0, DAC960_ProcDirectoryEntry,
-			    &dac960_proc_fops);
+		proc_create_single("status", 0, DAC960_ProcDirectoryEntry,
+				dac960_proc_show);
 	}
 
 	snprintf(Controller->ControllerName, sizeof(Controller->ControllerName),
 		 "c%d", Controller->ControllerNumber);
 	ControllerProcEntry = proc_mkdir(Controller->ControllerName,
 					 DAC960_ProcDirectoryEntry);
-	proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller);
-	proc_create_data("current_status", 0, ControllerProcEntry, &dac960_current_status_proc_fops, Controller);
-	proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
+	proc_create_single_data("initial_status", 0, ControllerProcEntry,
+			dac960_initial_status_proc_show, Controller);
+	proc_create_single_data("current_status", 0, ControllerProcEntry,
+			dac960_current_status_proc_show, Controller);
+	proc_create_data("user_command", 0600, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
 	Controller->ControllerProcEntry = ControllerProcEntry;
 }
 
diff --git a/drivers/block/DAC960.h b/drivers/block/DAC960.h
index 21aff47..1439e65 100644
--- a/drivers/block/DAC960.h
+++ b/drivers/block/DAC960.h
@@ -2295,7 +2295,6 @@
   unsigned short MaxBlocksPerCommand;
   unsigned short ControllerScatterGatherLimit;
   unsigned short DriverScatterGatherLimit;
-  u64		BounceBufferLimit;
   unsigned int CombinedStatusBufferLength;
   unsigned int InitialStatusLength;
   unsigned int CurrentStatusLength;
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 6797e6c..429ebb8 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -159,14 +159,14 @@
 	return single_open(file, aoedisk_debugfs_show, inode->i_private);
 }
 
-static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
-static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
-static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
+static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL);
+static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL);
+static DEVICE_ATTR(netif, 0444, aoedisk_show_netif, NULL);
 static struct device_attribute dev_attr_firmware_version = {
-	.attr = { .name = "firmware-version", .mode = S_IRUGO },
+	.attr = { .name = "firmware-version", .mode = 0444 },
 	.show = aoedisk_show_fwver,
 };
-static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
+static DEVICE_ATTR(payload, 0444, aoedisk_show_payload, NULL);
 
 static struct attribute *aoe_attrs[] = {
 	&dev_attr_state.attr,
@@ -388,7 +388,6 @@
 			d->aoemajor, d->aoeminor);
 		goto err_mempool;
 	}
-	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
 	spin_lock_irqsave(&d->lock, flags);
 	WARN_ON(!(d->flags & DEVFL_GD_NOW));
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 540bb60..096882e 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1032,8 +1032,9 @@
 	iter.bi_size = cnt;
 
 	__bio_for_each_segment(bv, bio, iter, iter) {
-		char *p = page_address(bv.bv_page) + bv.bv_offset;
+		char *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
 		skb_copy_bits(skb, soff, p, bv.bv_len);
+		kunmap_atomic(p);
 		soff += bv.bv_len;
 	}
 }
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 66cb0f8..bb97659 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -331,15 +331,15 @@
  * And now the modules code and kernel interface.
  */
 static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
-module_param(rd_nr, int, S_IRUGO);
+module_param(rd_nr, int, 0444);
 MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
 
 unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
-module_param(rd_size, ulong, S_IRUGO);
+module_param(rd_size, ulong, 0444);
 MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
 
 static int max_part = 1;
-module_param(max_part, int, S_IRUGO);
+module_param(max_part, int, 0444);
 MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
 
 MODULE_LICENSE("GPL");
@@ -402,6 +402,10 @@
 	set_capacity(disk, rd_size * 2);
 	disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
 
+	/* Tell the block layer that this is not a rotational device */
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+
 	return brd;
 
 out_free_queue:
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 9f4e6f5..11a85b7 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -977,7 +977,7 @@
 	bm_page_unlock_io(device, idx);
 
 	if (ctx->flags & BM_AIO_COPY_PAGES)
-		mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool);
+		mempool_free(bio->bi_io_vec[0].bv_page, &drbd_md_io_page_pool);
 
 	bio_put(bio);
 
@@ -1014,7 +1014,8 @@
 	bm_set_page_unchanged(b->bm_pages[page_nr]);
 
 	if (ctx->flags & BM_AIO_COPY_PAGES) {
-		page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_RECLAIM);
+		page = mempool_alloc(&drbd_md_io_page_pool,
+				GFP_NOIO | __GFP_HIGHMEM);
 		copy_highpage(page, b->bm_pages[page_nr]);
 		bm_store_page_idx(page, page_nr);
 	} else
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index ab21976..5d5e8d6 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -481,9 +481,9 @@
 		goto fail;
 	resource->debugfs_res_connections = dentry;
 
-	dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP,
-			resource->debugfs_res, resource,
-			&in_flight_summary_fops);
+	dentry = debugfs_create_file("in_flight_summary", 0440,
+				     resource->debugfs_res, resource,
+				     &in_flight_summary_fops);
 	if (IS_ERR_OR_NULL(dentry))
 		goto fail;
 	resource->debugfs_res_in_flight_summary = dentry;
@@ -645,16 +645,16 @@
 		goto fail;
 	connection->debugfs_conn = dentry;
 
-	dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP,
-			connection->debugfs_conn, connection,
-			&connection_callback_history_fops);
+	dentry = debugfs_create_file("callback_history", 0440,
+				     connection->debugfs_conn, connection,
+				     &connection_callback_history_fops);
 	if (IS_ERR_OR_NULL(dentry))
 		goto fail;
 	connection->debugfs_conn_callback_history = dentry;
 
-	dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP,
-			connection->debugfs_conn, connection,
-			&connection_oldest_requests_fops);
+	dentry = debugfs_create_file("oldest_requests", 0440,
+				     connection->debugfs_conn, connection,
+				     &connection_oldest_requests_fops);
 	if (IS_ERR_OR_NULL(dentry))
 		goto fail;
 	connection->debugfs_conn_oldest_requests = dentry;
@@ -824,7 +824,7 @@
 	device->debugfs_minor = dentry;
 
 #define DCF(name)	do {					\
-	dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP,	\
+	dentry = debugfs_create_file(#name, 0440,	\
 			device->debugfs_vol, device,		\
 			&device_ ## name ## _fops);		\
 	if (IS_ERR_OR_NULL(dentry))				\
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 06ecee1..bc4ed2e 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1405,8 +1405,8 @@
 extern struct kmem_cache *drbd_ee_cache;	/* peer requests */
 extern struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
 extern struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
-extern mempool_t *drbd_request_mempool;
-extern mempool_t *drbd_ee_mempool;
+extern mempool_t drbd_request_mempool;
+extern mempool_t drbd_ee_mempool;
 
 /* drbd's page pool, used to buffer data received from the peer,
  * or data requested by the peer.
@@ -1432,16 +1432,16 @@
  * 128 should be plenty, currently we probably can get away with as few as 1.
  */
 #define DRBD_MIN_POOL_PAGES	128
-extern mempool_t *drbd_md_io_page_pool;
+extern mempool_t drbd_md_io_page_pool;
 
 /* We also need to make sure we get a bio
  * when we need it for housekeeping purposes */
-extern struct bio_set *drbd_md_io_bio_set;
+extern struct bio_set drbd_md_io_bio_set;
 /* to allocate from that set */
 extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
 
 /* And a bio_set for cloning */
-extern struct bio_set *drbd_io_bio_set;
+extern struct bio_set drbd_io_bio_set;
 
 extern struct mutex resources_mutex;
 
@@ -1643,7 +1643,7 @@
 
 /* drbd_proc.c */
 extern struct proc_dir_entry *drbd_proc;
-extern const struct file_operations drbd_proc_fops;
+int drbd_seq_show(struct seq_file *seq, void *v);
 
 /* drbd_actlog.c */
 extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 185f1ef..7655d61 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -124,11 +124,11 @@
 struct kmem_cache *drbd_ee_cache;	/* peer requests */
 struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
 struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
-mempool_t *drbd_request_mempool;
-mempool_t *drbd_ee_mempool;
-mempool_t *drbd_md_io_page_pool;
-struct bio_set *drbd_md_io_bio_set;
-struct bio_set *drbd_io_bio_set;
+mempool_t drbd_request_mempool;
+mempool_t drbd_ee_mempool;
+mempool_t drbd_md_io_page_pool;
+struct bio_set drbd_md_io_bio_set;
+struct bio_set drbd_io_bio_set;
 
 /* I do not use a standard mempool, because:
    1) I want to hand out the pre-allocated objects first.
@@ -153,10 +153,10 @@
 {
 	struct bio *bio;
 
-	if (!drbd_md_io_bio_set)
+	if (!bioset_initialized(&drbd_md_io_bio_set))
 		return bio_alloc(gfp_mask, 1);
 
-	bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
+	bio = bio_alloc_bioset(gfp_mask, 1, &drbd_md_io_bio_set);
 	if (!bio)
 		return NULL;
 	return bio;
@@ -2097,16 +2097,11 @@
 
 	/* D_ASSERT(device, atomic_read(&drbd_pp_vacant)==0); */
 
-	if (drbd_io_bio_set)
-		bioset_free(drbd_io_bio_set);
-	if (drbd_md_io_bio_set)
-		bioset_free(drbd_md_io_bio_set);
-	if (drbd_md_io_page_pool)
-		mempool_destroy(drbd_md_io_page_pool);
-	if (drbd_ee_mempool)
-		mempool_destroy(drbd_ee_mempool);
-	if (drbd_request_mempool)
-		mempool_destroy(drbd_request_mempool);
+	bioset_exit(&drbd_io_bio_set);
+	bioset_exit(&drbd_md_io_bio_set);
+	mempool_exit(&drbd_md_io_page_pool);
+	mempool_exit(&drbd_ee_mempool);
+	mempool_exit(&drbd_request_mempool);
 	if (drbd_ee_cache)
 		kmem_cache_destroy(drbd_ee_cache);
 	if (drbd_request_cache)
@@ -2116,11 +2111,6 @@
 	if (drbd_al_ext_cache)
 		kmem_cache_destroy(drbd_al_ext_cache);
 
-	drbd_io_bio_set      = NULL;
-	drbd_md_io_bio_set   = NULL;
-	drbd_md_io_page_pool = NULL;
-	drbd_ee_mempool      = NULL;
-	drbd_request_mempool = NULL;
 	drbd_ee_cache        = NULL;
 	drbd_request_cache   = NULL;
 	drbd_bm_ext_cache    = NULL;
@@ -2133,18 +2123,7 @@
 {
 	struct page *page;
 	const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * drbd_minor_count;
-	int i;
-
-	/* prepare our caches and mempools */
-	drbd_request_mempool = NULL;
-	drbd_ee_cache        = NULL;
-	drbd_request_cache   = NULL;
-	drbd_bm_ext_cache    = NULL;
-	drbd_al_ext_cache    = NULL;
-	drbd_pp_pool         = NULL;
-	drbd_md_io_page_pool = NULL;
-	drbd_md_io_bio_set   = NULL;
-	drbd_io_bio_set      = NULL;
+	int i, ret;
 
 	/* caches */
 	drbd_request_cache = kmem_cache_create(
@@ -2168,26 +2147,26 @@
 		goto Enomem;
 
 	/* mempools */
-	drbd_io_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
-	if (drbd_io_bio_set == NULL)
+	ret = bioset_init(&drbd_io_bio_set, BIO_POOL_SIZE, 0, 0);
+	if (ret)
 		goto Enomem;
 
-	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0,
-					   BIOSET_NEED_BVECS);
-	if (drbd_md_io_bio_set == NULL)
+	ret = bioset_init(&drbd_md_io_bio_set, DRBD_MIN_POOL_PAGES, 0,
+			  BIOSET_NEED_BVECS);
+	if (ret)
 		goto Enomem;
 
-	drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
-	if (drbd_md_io_page_pool == NULL)
+	ret = mempool_init_page_pool(&drbd_md_io_page_pool, DRBD_MIN_POOL_PAGES, 0);
+	if (ret)
 		goto Enomem;
 
-	drbd_request_mempool = mempool_create_slab_pool(number,
-		drbd_request_cache);
-	if (drbd_request_mempool == NULL)
+	ret = mempool_init_slab_pool(&drbd_request_mempool, number,
+				     drbd_request_cache);
+	if (ret)
 		goto Enomem;
 
-	drbd_ee_mempool = mempool_create_slab_pool(number, drbd_ee_cache);
-	if (drbd_ee_mempool == NULL)
+	ret = mempool_init_slab_pool(&drbd_ee_mempool, number, drbd_ee_cache);
+	if (ret)
 		goto Enomem;
 
 	/* drbd's page pool */
@@ -3010,7 +2989,7 @@
 		goto fail;
 
 	err = -ENOMEM;
-	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
+	drbd_proc = proc_create_single("drbd", S_IFREG | 0444 , NULL, drbd_seq_show);
 	if (!drbd_proc)	{
 		pr_err("unable to register proc file\n");
 		goto fail;
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 582caeb..74ef292 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -33,18 +33,7 @@
 #include <linux/drbd.h>
 #include "drbd_int.h"
 
-static int drbd_proc_open(struct inode *inode, struct file *file);
-static int drbd_proc_release(struct inode *inode, struct file *file);
-
-
 struct proc_dir_entry *drbd_proc;
-const struct file_operations drbd_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= drbd_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= drbd_proc_release,
-};
 
 static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
 {
@@ -235,7 +224,7 @@
 	}
 }
 
-static int drbd_seq_show(struct seq_file *seq, void *v)
+int drbd_seq_show(struct seq_file *seq, void *v)
 {
 	int i, prev_i = -1;
 	const char *sn;
@@ -345,24 +334,3 @@
 
 	return 0;
 }
-
-static int drbd_proc_open(struct inode *inode, struct file *file)
-{
-	int err;
-
-	if (try_module_get(THIS_MODULE)) {
-		err = single_open(file, drbd_seq_show, NULL);
-		if (err)
-			module_put(THIS_MODULE);
-		return err;
-	}
-	return -ENODEV;
-}
-
-static int drbd_proc_release(struct inode *inode, struct file *file)
-{
-	module_put(THIS_MODULE);
-	return single_release(inode, file);
-}
-
-/* PROC FS stuff end */
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c72dee0..be9450f 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -378,7 +378,7 @@
 	if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
 		return NULL;
 
-	peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
+	peer_req = mempool_alloc(&drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
 	if (!peer_req) {
 		if (!(gfp_mask & __GFP_NOWARN))
 			drbd_err(device, "%s: allocation failed\n", __func__);
@@ -409,7 +409,7 @@
 	return peer_req;
 
  fail:
-	mempool_free(peer_req, drbd_ee_mempool);
+	mempool_free(peer_req, &drbd_ee_mempool);
 	return NULL;
 }
 
@@ -426,7 +426,7 @@
 		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
 		drbd_al_complete_io(device, &peer_req->i);
 	}
-	mempool_free(peer_req, drbd_ee_mempool);
+	mempool_free(peer_req, &drbd_ee_mempool);
 }
 
 int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index a500e73..a47e498 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -55,7 +55,7 @@
 {
 	struct drbd_request *req;
 
-	req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
+	req = mempool_alloc(&drbd_request_mempool, GFP_NOIO);
 	if (!req)
 		return NULL;
 	memset(req, 0, sizeof(*req));
@@ -184,7 +184,7 @@
 		}
 	}
 
-	mempool_free(req, drbd_request_mempool);
+	mempool_free(req, &drbd_request_mempool);
 }
 
 static void wake_all_senders(struct drbd_connection *connection)
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index cb97b3b..94c6540 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -269,7 +269,7 @@
 static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
 {
 	struct bio *bio;
-	bio = bio_clone_fast(bio_src, GFP_NOIO, drbd_io_bio_set);
+	bio = bio_clone_fast(bio_src, GFP_NOIO, &drbd_io_bio_set);
 
 	req->private_bio = bio;
 
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 8ec7235..8871b50 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4450,7 +4450,7 @@
 	return sprintf(buf, "%X\n", UDP->cmos);
 }
 
-static DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL);
+static DEVICE_ATTR(cmos, 0444, floppy_cmos_show, NULL);
 
 static struct attribute *floppy_dev_attrs[] = {
 	&dev_attr_cmos.attr,
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 55cf554..4838b0d 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -732,7 +732,7 @@
 	return loop_attr_show(d, b, loop_attr_##_name##_show);		\
 }									\
 static struct device_attribute loop_attr_##_name =			\
-	__ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
+	__ATTR(_name, 0444, loop_attr_do_show_##_name, NULL);
 
 static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
 {
@@ -809,16 +809,17 @@
 	.attrs= loop_attrs,
 };
 
-static int loop_sysfs_init(struct loop_device *lo)
+static void loop_sysfs_init(struct loop_device *lo)
 {
-	return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
-				  &loop_attribute_group);
+	lo->sysfs_inited = !sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
+						&loop_attribute_group);
 }
 
 static void loop_sysfs_exit(struct loop_device *lo)
 {
-	sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
-			   &loop_attribute_group);
+	if (lo->sysfs_inited)
+		sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
+				   &loop_attribute_group);
 }
 
 static void loop_config_discard(struct loop_device *lo)
@@ -1677,9 +1678,9 @@
  * And now the modules code and kernel interface.
  */
 static int max_loop;
-module_param(max_loop, int, S_IRUGO);
+module_param(max_loop, int, 0444);
 MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
-module_param(max_part, int, S_IRUGO);
+module_param(max_part, int, 0444);
 MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index b78de98..4d42c7a 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -58,6 +58,7 @@
 	struct kthread_worker	worker;
 	struct task_struct	*worker_task;
 	bool			use_dio;
+	bool			sysfs_inited;
 
 	struct request_queue	*lo_queue;
 	struct blk_mq_tag_set	tag_set;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 769c551..c73626d 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2285,7 +2285,7 @@
 	return size;
 }
 
-static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
+static DEVICE_ATTR(status, 0444, mtip_hw_show_status, NULL);
 
 /* debugsfs entries */
 
@@ -2566,10 +2566,9 @@
 		return -1;
 	}
 
-	debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd,
-							&mtip_flags_fops);
-	debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd,
-							&mtip_regs_fops);
+	debugfs_create_file("flags", 0444, dd->dfs_node, dd, &mtip_flags_fops);
+	debugfs_create_file("registers", 0444, dd->dfs_node, dd,
+			    &mtip_regs_fops);
 
 	return 0;
 }
@@ -2726,15 +2725,11 @@
 	blk_mq_end_request(rq, cmd->status);
 }
 
-static void mtip_abort_cmd(struct request *req, void *data,
-							bool reserved)
+static void mtip_abort_cmd(struct request *req, void *data, bool reserved)
 {
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 	struct driver_data *dd = data;
 
-	if (!blk_mq_request_started(req))
-		return;
-
 	dbg_printk(MTIP_DRV_NAME " Aborting request, tag = %d\n", req->tag);
 
 	clear_bit(req->tag, dd->port->cmds_to_issue);
@@ -2742,14 +2737,10 @@
 	mtip_softirq_done_fn(req);
 }
 
-static void mtip_queue_cmd(struct request *req, void *data,
-							bool reserved)
+static void mtip_queue_cmd(struct request *req, void *data, bool reserved)
 {
 	struct driver_data *dd = data;
 
-	if (!blk_mq_request_started(req))
-		return;
-
 	set_bit(req->tag, dd->port->cmds_to_issue);
 	blk_abort_request(req);
 }
@@ -3720,7 +3711,8 @@
 		struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req);
 
 		cmd->status = BLK_STS_TIMEOUT;
-		return BLK_EH_HANDLED;
+		blk_mq_complete_request(req);
+		return BLK_EH_DONE;
 	}
 
 	if (test_bit(req->tag, dd->port->cmds_to_issue))
@@ -3862,7 +3854,6 @@
 	blk_queue_max_hw_sectors(dd->queue, 0xffff);
 	blk_queue_max_segment_size(dd->queue, 0x400000);
 	blk_queue_io_min(dd->queue, 4096);
-	blk_queue_bounce_limit(dd->queue, dd->pdev->dma_mask);
 
 	/* Signal trim support */
 	if (dd->trim_supp == true) {
@@ -4273,7 +4264,7 @@
 	if (!dd->isr_workq) {
 		dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
 		rv = -ENOMEM;
-		goto block_initialize_err;
+		goto setmask_err;
 	}
 
 	memset(cpu_list, 0, sizeof(cpu_list));
@@ -4614,7 +4605,7 @@
 	}
 	if (dfs_parent) {
 		dfs_device_status = debugfs_create_file("device_status",
-					S_IRUGO, dfs_parent, NULL,
+					0444, dfs_parent, NULL,
 					&mtip_device_status_fops);
 		if (IS_ERR_OR_NULL(dfs_device_status)) {
 			pr_err("Error creating device_status node\n");
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index afbc202..3ed1ef8 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -166,16 +166,19 @@
 }
 
 static const struct device_attribute pid_attr = {
-	.attr = { .name = "pid", .mode = S_IRUGO},
+	.attr = { .name = "pid", .mode = 0444},
 	.show = pid_show,
 };
 
 static void nbd_dev_remove(struct nbd_device *nbd)
 {
 	struct gendisk *disk = nbd->disk;
+	struct request_queue *q;
+
 	if (disk) {
+		q = disk->queue;
 		del_gendisk(disk);
-		blk_cleanup_queue(disk->queue);
+		blk_cleanup_queue(q);
 		blk_mq_free_tag_set(&nbd->tag_set);
 		disk->private_data = NULL;
 		put_disk(disk);
@@ -213,7 +216,15 @@
 	}
 	if (!nsock->dead) {
 		kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
-		atomic_dec(&nbd->config->live_connections);
+		if (atomic_dec_return(&nbd->config->live_connections) == 0) {
+			if (test_and_clear_bit(NBD_DISCONNECT_REQUESTED,
+					       &nbd->config->runtime_flags)) {
+				set_bit(NBD_DISCONNECTED,
+					&nbd->config->runtime_flags);
+				dev_info(nbd_to_dev(nbd),
+					"Disconnected due to user request.\n");
+			}
+		}
 	}
 	nsock->dead = true;
 	nsock->pending = NULL;
@@ -231,9 +242,22 @@
 static void nbd_size_update(struct nbd_device *nbd)
 {
 	struct nbd_config *config = nbd->config;
+	struct block_device *bdev = bdget_disk(nbd->disk, 0);
+
+	if (config->flags & NBD_FLAG_SEND_TRIM) {
+		nbd->disk->queue->limits.discard_granularity = config->blksize;
+		blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
+	}
 	blk_queue_logical_block_size(nbd->disk->queue, config->blksize);
 	blk_queue_physical_block_size(nbd->disk->queue, config->blksize);
 	set_capacity(nbd->disk, config->bytesize >> 9);
+	if (bdev) {
+		if (bdev->bd_disk)
+			bd_set_size(bdev, config->bytesize);
+		else
+			bdev->bd_invalidated = 1;
+		bdput(bdev);
+	}
 	kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
 }
 
@@ -243,6 +267,8 @@
 	struct nbd_config *config = nbd->config;
 	config->blksize = blocksize;
 	config->bytesize = blocksize * nr_blocks;
+	if (nbd->task_recv != NULL)
+		nbd_size_update(nbd);
 }
 
 static void nbd_complete_rq(struct request *req)
@@ -286,13 +312,15 @@
 
 	if (!refcount_inc_not_zero(&nbd->config_refs)) {
 		cmd->status = BLK_STS_TIMEOUT;
-		return BLK_EH_HANDLED;
+		goto done;
 	}
 	config = nbd->config;
 
 	if (config->num_connections > 1) {
 		dev_err_ratelimited(nbd_to_dev(nbd),
-				    "Connection timed out, retrying\n");
+				    "Connection timed out, retrying (%d/%d alive)\n",
+				    atomic_read(&config->live_connections),
+				    config->num_connections);
 		/*
 		 * Hooray we have more connections, requeue this IO, the submit
 		 * path will put it on a real connection.
@@ -314,7 +342,7 @@
 			}
 			blk_mq_requeue_request(req, true);
 			nbd_config_put(nbd);
-			return BLK_EH_NOT_HANDLED;
+			return BLK_EH_DONE;
 		}
 	} else {
 		dev_err_ratelimited(nbd_to_dev(nbd),
@@ -324,8 +352,9 @@
 	cmd->status = BLK_STS_IOERR;
 	sock_shutdown(nbd);
 	nbd_config_put(nbd);
-
-	return BLK_EH_HANDLED;
+done:
+	blk_mq_complete_request(req);
+	return BLK_EH_DONE;
 }
 
 /*
@@ -647,11 +676,8 @@
 
 static void nbd_clear_req(struct request *req, void *data, bool reserved)
 {
-	struct nbd_cmd *cmd;
+	struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
 
-	if (!blk_mq_request_started(req))
-		return;
-	cmd = blk_mq_rq_to_pdu(req);
 	cmd->status = BLK_STS_IOERR;
 	blk_mq_complete_request(req);
 }
@@ -714,10 +740,9 @@
 		return 0;
 	if (test_bit(NBD_DISCONNECTED, &config->runtime_flags))
 		return 0;
-	wait_event_timeout(config->conn_wait,
-			   atomic_read(&config->live_connections),
-			   config->dead_conn_timeout);
-	return atomic_read(&config->live_connections);
+	return wait_event_timeout(config->conn_wait,
+				  atomic_read(&config->live_connections) > 0,
+				  config->dead_conn_timeout) > 0;
 }
 
 static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
@@ -950,10 +975,6 @@
 	if (bdev->bd_openers > 1)
 		return;
 	bd_set_size(bdev, 0);
-	if (max_part > 0) {
-		blkdev_reread_part(bdev);
-		bdev->bd_invalidated = 1;
-	}
 }
 
 static void nbd_parse_flags(struct nbd_device *nbd)
@@ -1040,6 +1061,8 @@
 		nbd->config = NULL;
 
 		nbd->tag_set.timeout = 0;
+		nbd->disk->queue->limits.discard_granularity = 0;
+		blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
 		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue);
 
 		mutex_unlock(&nbd->config_lock);
@@ -1109,7 +1132,6 @@
 	if (ret)
 		return ret;
 
-	bd_set_size(bdev, config->bytesize);
 	if (max_part)
 		bdev->bd_invalidated = 1;
 	mutex_unlock(&nbd->config_lock);
@@ -1118,7 +1140,7 @@
 	if (ret)
 		sock_shutdown(nbd);
 	mutex_lock(&nbd->config_lock);
-	bd_set_size(bdev, 0);
+	nbd_bdev_reset(bdev);
 	/* user requested, ignore socket errors */
 	if (test_bit(NBD_DISCONNECT_REQUESTED, &config->runtime_flags))
 		ret = 0;
@@ -1269,6 +1291,9 @@
 		refcount_set(&nbd->config_refs, 1);
 		refcount_inc(&nbd->refs);
 		mutex_unlock(&nbd->config_lock);
+		bdev->bd_invalidated = 1;
+	} else if (nbd_disconnected(nbd->config)) {
+		bdev->bd_invalidated = 1;
 	}
 out:
 	mutex_unlock(&nbd_index_mutex);
@@ -1490,8 +1515,8 @@
 	 */
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
 	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
-	disk->queue->limits.discard_granularity = 512;
-	blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
+	disk->queue->limits.discard_granularity = 0;
+	blk_queue_max_discard_sectors(disk->queue, 0);
 	blk_queue_max_segment_size(disk->queue, UINT_MAX);
 	blk_queue_max_segments(disk->queue, USHRT_MAX);
 	blk_queue_max_hw_sectors(disk->queue, 65536);
@@ -1755,6 +1780,7 @@
 	}
 	mutex_lock(&nbd->config_lock);
 	nbd_disconnect(nbd);
+	nbd_clear_sock(nbd);
 	mutex_unlock(&nbd->config_lock);
 	if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
 			       &nbd->config->runtime_flags))
@@ -2093,7 +2119,8 @@
 	if (nbds_max > 1UL << (MINORBITS - part_shift))
 		return -EINVAL;
 	recv_workqueue = alloc_workqueue("knbd-recv",
-					 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+					 WQ_MEM_RECLAIM | WQ_HIGHPRI |
+					 WQ_UNBOUND, 0);
 	if (!recv_workqueue)
 		return -ENOMEM;
 
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index a765532..2bdadd7 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -157,23 +157,23 @@
 };
 
 static int g_no_sched;
-module_param_named(no_sched, g_no_sched, int, S_IRUGO);
+module_param_named(no_sched, g_no_sched, int, 0444);
 MODULE_PARM_DESC(no_sched, "No io scheduler");
 
 static int g_submit_queues = 1;
-module_param_named(submit_queues, g_submit_queues, int, S_IRUGO);
+module_param_named(submit_queues, g_submit_queues, int, 0444);
 MODULE_PARM_DESC(submit_queues, "Number of submission queues");
 
 static int g_home_node = NUMA_NO_NODE;
-module_param_named(home_node, g_home_node, int, S_IRUGO);
+module_param_named(home_node, g_home_node, int, 0444);
 MODULE_PARM_DESC(home_node, "Home node for the device");
 
 #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
 static char g_timeout_str[80];
-module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO);
+module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
 
 static char g_requeue_str[80];
-module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), S_IRUGO);
+module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
 #endif
 
 static int g_queue_mode = NULL_Q_MQ;
@@ -203,27 +203,27 @@
 	.get	= param_get_int,
 };
 
-device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, S_IRUGO);
+device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
 MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
 
 static int g_gb = 250;
-module_param_named(gb, g_gb, int, S_IRUGO);
+module_param_named(gb, g_gb, int, 0444);
 MODULE_PARM_DESC(gb, "Size in GB");
 
 static int g_bs = 512;
-module_param_named(bs, g_bs, int, S_IRUGO);
+module_param_named(bs, g_bs, int, 0444);
 MODULE_PARM_DESC(bs, "Block size (in bytes)");
 
 static int nr_devices = 1;
-module_param(nr_devices, int, S_IRUGO);
+module_param(nr_devices, int, 0444);
 MODULE_PARM_DESC(nr_devices, "Number of devices to register");
 
 static bool g_blocking;
-module_param_named(blocking, g_blocking, bool, S_IRUGO);
+module_param_named(blocking, g_blocking, bool, 0444);
 MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
 
 static bool shared_tags;
-module_param(shared_tags, bool, S_IRUGO);
+module_param(shared_tags, bool, 0444);
 MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
 
 static int g_irqmode = NULL_IRQ_SOFTIRQ;
@@ -239,19 +239,19 @@
 	.get	= param_get_int,
 };
 
-device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, S_IRUGO);
+device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
 MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
 
 static unsigned long g_completion_nsec = 10000;
-module_param_named(completion_nsec, g_completion_nsec, ulong, S_IRUGO);
+module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
 MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
 
 static int g_hw_queue_depth = 64;
-module_param_named(hw_queue_depth, g_hw_queue_depth, int, S_IRUGO);
+module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
 MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
 
 static bool g_use_per_node_hctx;
-module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, S_IRUGO);
+module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
 MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
 
 static struct nullb_device *null_alloc_dev(void);
@@ -1365,7 +1365,8 @@
 static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq)
 {
 	pr_info("null: rq %p timed out\n", rq);
-	return BLK_EH_HANDLED;
+	blk_mq_complete_request(rq);
+	return BLK_EH_DONE;
 }
 
 static int null_rq_prep_fn(struct request_queue *q, struct request *req)
@@ -1427,7 +1428,8 @@
 static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
 {
 	pr_info("null: rq %p timed out\n", rq);
-	return BLK_EH_HANDLED;
+	blk_mq_complete_request(rq);
+	return BLK_EH_DONE;
 }
 
 static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 27a44b9..8961b190 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -740,7 +740,7 @@
 {
 	struct request *rq;
 
-	rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+	rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, 0);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index c61d20c..b3f83cd 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -97,8 +97,8 @@
 static int write_congestion_on  = PKT_WRITE_CONGESTION_ON;
 static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
 static struct mutex ctl_mutex;	/* Serialize open/close/setup/teardown */
-static mempool_t *psd_pool;
-static struct bio_set *pkt_bio_set;
+static mempool_t psd_pool;
+static struct bio_set pkt_bio_set;
 
 static struct class	*class_pktcdvd = NULL;    /* /sys/class/pktcdvd */
 static struct dentry	*pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
@@ -478,8 +478,8 @@
 	if (!pd->dfs_d_root)
 		return;
 
-	pd->dfs_f_info = debugfs_create_file("info", S_IRUGO,
-				pd->dfs_d_root, pd, &debug_fops);
+	pd->dfs_f_info = debugfs_create_file("info", 0444,
+					     pd->dfs_d_root, pd, &debug_fops);
 }
 
 static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
@@ -631,7 +631,7 @@
 static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node)
 {
 	rb_erase(&node->rb_node, &pd->bio_queue);
-	mempool_free(node, pd->rb_pool);
+	mempool_free(node, &pd->rb_pool);
 	pd->bio_queue_size--;
 	BUG_ON(pd->bio_queue_size < 0);
 }
@@ -704,13 +704,13 @@
 	int ret = 0;
 
 	rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
-			     REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
+			     REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
 	if (cgc->buflen) {
 		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
-				      __GFP_RECLAIM);
+				      GFP_NOIO);
 		if (ret)
 			goto out;
 	}
@@ -1285,7 +1285,7 @@
 	 * Fill-in bvec with data from orig_bios.
 	 */
 	spin_lock(&pkt->lock);
-	bio_copy_data(pkt->w_bio, pkt->orig_bios.head);
+	bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head);
 
 	pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
 	spin_unlock(&pkt->lock);
@@ -2303,14 +2303,14 @@
 	psd->bio->bi_status = bio->bi_status;
 	bio_put(bio);
 	bio_endio(psd->bio);
-	mempool_free(psd, psd_pool);
+	mempool_free(psd, &psd_pool);
 	pkt_bio_finished(pd);
 }
 
 static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
 {
-	struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, pkt_bio_set);
-	struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
+	struct bio *cloned_bio = bio_clone_fast(bio, GFP_NOIO, &pkt_bio_set);
+	struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
 
 	psd->pd = pd;
 	psd->bio = bio;
@@ -2381,7 +2381,7 @@
 	/*
 	 * No matching packet found. Store the bio in the work queue.
 	 */
-	node = mempool_alloc(pd->rb_pool, GFP_NOIO);
+	node = mempool_alloc(&pd->rb_pool, GFP_NOIO);
 	node->bio = bio;
 	spin_lock(&pd->lock);
 	BUG_ON(pd->bio_queue_size < 0);
@@ -2451,7 +2451,7 @@
 
 			split = bio_split(bio, last_zone -
 					  bio->bi_iter.bi_sector,
-					  GFP_NOIO, pkt_bio_set);
+					  GFP_NOIO, &pkt_bio_set);
 			bio_chain(split, bio);
 		} else {
 			split = bio;
@@ -2538,18 +2538,6 @@
 	return 0;
 }
 
-static int pkt_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pkt_seq_show, PDE_DATA(inode));
-}
-
-static const struct file_operations pkt_proc_fops = {
-	.open	= pkt_seq_open,
-	.read	= seq_read,
-	.llseek	= seq_lseek,
-	.release = single_release
-};
-
 static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 {
 	int i;
@@ -2604,7 +2592,7 @@
 		goto out_mem;
 	}
 
-	proc_create_data(pd->name, 0, pkt_proc, &pkt_proc_fops, pd);
+	proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd);
 	pkt_dbg(1, pd, "writer mapped to %s\n", bdevname(bdev, b));
 	return 0;
 
@@ -2707,9 +2695,9 @@
 	if (!pd)
 		goto out_mutex;
 
-	pd->rb_pool = mempool_create_kmalloc_pool(PKT_RB_POOL_SIZE,
-						  sizeof(struct pkt_rb_node));
-	if (!pd->rb_pool)
+	ret = mempool_init_kmalloc_pool(&pd->rb_pool, PKT_RB_POOL_SIZE,
+					sizeof(struct pkt_rb_node));
+	if (ret)
 		goto out_mem;
 
 	INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
@@ -2766,7 +2754,7 @@
 out_mem2:
 	put_disk(disk);
 out_mem:
-	mempool_destroy(pd->rb_pool);
+	mempool_exit(&pd->rb_pool);
 	kfree(pd);
 out_mutex:
 	mutex_unlock(&ctl_mutex);
@@ -2817,7 +2805,7 @@
 	blk_cleanup_queue(pd->disk->queue);
 	put_disk(pd->disk);
 
-	mempool_destroy(pd->rb_pool);
+	mempool_exit(&pd->rb_pool);
 	kfree(pd);
 
 	/* This is safe: open() is still holding a reference. */
@@ -2914,14 +2902,14 @@
 
 	mutex_init(&ctl_mutex);
 
-	psd_pool = mempool_create_kmalloc_pool(PSD_POOL_SIZE,
-					sizeof(struct packet_stacked_data));
-	if (!psd_pool)
-		return -ENOMEM;
-	pkt_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
-	if (!pkt_bio_set) {
-		mempool_destroy(psd_pool);
-		return -ENOMEM;
+	ret = mempool_init_kmalloc_pool(&psd_pool, PSD_POOL_SIZE,
+				    sizeof(struct packet_stacked_data));
+	if (ret)
+		return ret;
+	ret = bioset_init(&pkt_bio_set, BIO_POOL_SIZE, 0, 0);
+	if (ret) {
+		mempool_exit(&psd_pool);
+		return ret;
 	}
 
 	ret = register_blkdev(pktdev_major, DRIVER_NAME);
@@ -2954,8 +2942,8 @@
 out:
 	unregister_blkdev(pktdev_major, DRIVER_NAME);
 out2:
-	mempool_destroy(psd_pool);
-	bioset_free(pkt_bio_set);
+	mempool_exit(&psd_pool);
+	bioset_exit(&pkt_bio_set);
 	return ret;
 }
 
@@ -2968,8 +2956,8 @@
 	pkt_sysfs_cleanup();
 
 	unregister_blkdev(pktdev_major, DRIVER_NAME);
-	mempool_destroy(psd_pool);
-	bioset_free(pkt_bio_set);
+	mempool_exit(&psd_pool);
+	bioset_exit(&pkt_bio_set);
 }
 
 MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 075662f..afe1508 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -465,8 +465,6 @@
 	priv->queue = queue;
 	queue->queuedata = dev;
 
-	blk_queue_bounce_limit(queue, BLK_BOUNCE_HIGH);
-
 	blk_queue_max_hw_sectors(queue, dev->bounce_size >> 9);
 	blk_queue_segment_boundary(queue, -1UL);
 	blk_queue_dma_alignment(queue, dev->blk_size-1);
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 6a55959..8fa4533 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -521,26 +521,13 @@
 	return 0;
 }
 
-static int ps3vram_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ps3vram_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ps3vram_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ps3vram_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static void ps3vram_proc_init(struct ps3_system_bus_device *dev)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
 	struct proc_dir_entry *pde;
 
-	pde = proc_create_data(DEVICE_NAME, 0444, NULL, &ps3vram_proc_fops,
-			       priv);
+	pde = proc_create_single_data(DEVICE_NAME, 0444, NULL,
+			ps3vram_proc_show, priv);
 	if (!pde)
 		dev_warn(&dev->core, "failed to create /proc entry\n");
 }
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 33b36fe..af35404 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -424,7 +424,7 @@
  * single-major requires >= 0.75 version of userspace rbd utility.
  */
 static bool single_major = true;
-module_param(single_major, bool, S_IRUGO);
+module_param(single_major, bool, 0444);
 MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)");
 
 static ssize_t rbd_add(struct bus_type *bus, const char *buf,
@@ -468,11 +468,11 @@
 	return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED);
 }
 
-static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
-static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
-static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
-static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
-static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL);
+static BUS_ATTR(add, 0200, NULL, rbd_add);
+static BUS_ATTR(remove, 0200, NULL, rbd_remove);
+static BUS_ATTR(add_single_major, 0200, NULL, rbd_add_single_major);
+static BUS_ATTR(remove_single_major, 0200, NULL, rbd_remove_single_major);
+static BUS_ATTR(supported_features, 0444, rbd_supported_features_show, NULL);
 
 static struct attribute *rbd_bus_attrs[] = {
 	&bus_attr_add.attr,
@@ -4204,22 +4204,22 @@
 	return size;
 }
 
-static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
-static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL);
-static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
-static DEVICE_ATTR(minor, S_IRUGO, rbd_minor_show, NULL);
-static DEVICE_ATTR(client_addr, S_IRUGO, rbd_client_addr_show, NULL);
-static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
-static DEVICE_ATTR(cluster_fsid, S_IRUGO, rbd_cluster_fsid_show, NULL);
-static DEVICE_ATTR(config_info, S_IRUSR, rbd_config_info_show, NULL);
-static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
-static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
-static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
-static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
-static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
-static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
-static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
-static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL);
+static DEVICE_ATTR(size, 0444, rbd_size_show, NULL);
+static DEVICE_ATTR(features, 0444, rbd_features_show, NULL);
+static DEVICE_ATTR(major, 0444, rbd_major_show, NULL);
+static DEVICE_ATTR(minor, 0444, rbd_minor_show, NULL);
+static DEVICE_ATTR(client_addr, 0444, rbd_client_addr_show, NULL);
+static DEVICE_ATTR(client_id, 0444, rbd_client_id_show, NULL);
+static DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL);
+static DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL);
+static DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL);
+static DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL);
+static DEVICE_ATTR(name, 0444, rbd_name_show, NULL);
+static DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL);
+static DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh);
+static DEVICE_ATTR(current_snap, 0444, rbd_snap_show, NULL);
+static DEVICE_ATTR(snap_id, 0444, rbd_snap_id_show, NULL);
+static DEVICE_ATTR(parent, 0444, rbd_parent_show, NULL);
 
 static struct attribute *rbd_attrs[] = {
 	&dev_attr_size.attr,
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 34997df..09537be 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -247,19 +247,19 @@
 	if (IS_ERR_OR_NULL(card->debugfs_dir))
 		goto failed_debugfs_dir;
 
-	debugfs_stats = debugfs_create_file("stats", S_IRUGO,
+	debugfs_stats = debugfs_create_file("stats", 0444,
 					    card->debugfs_dir, card,
 					    &debugfs_stats_fops);
 	if (IS_ERR_OR_NULL(debugfs_stats))
 		goto failed_debugfs_stats;
 
-	debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO,
+	debugfs_pci_regs = debugfs_create_file("pci_regs", 0444,
 					       card->debugfs_dir, card,
 					       &debugfs_pci_regs_fops);
 	if (IS_ERR_OR_NULL(debugfs_pci_regs))
 		goto failed_debugfs_pci_regs;
 
-	debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR,
+	debugfs_cram = debugfs_create_file("cram", 0644,
 					   card->debugfs_dir, card,
 					   &debugfs_cram_fops);
 	if (IS_ERR_OR_NULL(debugfs_cram))
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 08586dc..4d90e5e 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -567,7 +567,7 @@
 	if (!crq)
 		return NULL;
 
-	rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, GFP_KERNEL);
+	rq = blk_get_request(host->oob_q, REQ_OP_DRV_OUT, 0);
 	if (IS_ERR(rq)) {
 		spin_lock_irqsave(&host->lock, flags);
 		carm_put_request(host, crq);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4a07593c..23752dc 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -298,7 +298,7 @@
 	struct request *req;
 	int err;
 
-	req = blk_get_request(q, REQ_OP_DRV_IN, GFP_KERNEL);
+	req = blk_get_request(q, REQ_OP_DRV_IN, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -371,7 +371,7 @@
 	return err;
 }
 
-static DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
+static DEVICE_ATTR(serial, 0444, virtblk_serial_show, NULL);
 
 /* The queue's logical block size must be set before calling this */
 static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
@@ -576,10 +576,10 @@
 }
 
 static const struct device_attribute dev_attr_cache_type_ro =
-	__ATTR(cache_type, S_IRUGO,
+	__ATTR(cache_type, 0444,
 	       virtblk_cache_type_show, NULL);
 static const struct device_attribute dev_attr_cache_type_rw =
-	__ATTR(cache_type, S_IRUGO|S_IWUSR,
+	__ATTR(cache_type, 0644,
 	       virtblk_cache_type_show, virtblk_cache_type_store);
 
 static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 987d665..b55b245 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -98,7 +98,7 @@
  * backend, 4KB page granularity is used.
  */
 unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
-module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
 MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
 /*
  * The LRU mechanism to clean the lists of persistent grants needs to
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 21c1be1..66412eed 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -367,7 +367,7 @@
 out:									\
 		return sprintf(buf, format, result);			\
 	}								\
-	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+	static DEVICE_ATTR(name, 0444, show_##name, NULL)
 
 VBD_SHOW_ALLRING(oo_req,  "%llu\n");
 VBD_SHOW_ALLRING(rd_req,  "%llu\n");
@@ -403,7 +403,7 @@
 									\
 		return sprintf(buf, format, ##args);			\
 	}								\
-	static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+	static DEVICE_ATTR(name, 0444, show_##name, NULL)
 
 VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
 VBD_SHOW(mode, "%s\n", be->mode);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2a8e781..ae00a82 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -129,13 +129,12 @@
  */
 
 static unsigned int xen_blkif_max_segments = 32;
-module_param_named(max_indirect_segments, xen_blkif_max_segments, uint,
-		   S_IRUGO);
+module_param_named(max_indirect_segments, xen_blkif_max_segments, uint, 0444);
 MODULE_PARM_DESC(max_indirect_segments,
 		 "Maximum amount of segments in indirect requests (default is 32)");
 
 static unsigned int xen_blkif_max_queues = 4;
-module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO);
+module_param_named(max_queues, xen_blkif_max_queues, uint, 0444);
 MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per virtual disk");
 
 /*
@@ -143,7 +142,7 @@
  * backend, 4KB page granularity is used.
  */
 static unsigned int xen_blkif_max_ring_order;
-module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
 MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
 
 #define BLK_RING_SIZE(info)	\
diff --git a/drivers/bus/fsl-mc/fsl-mc-msi.c b/drivers/bus/fsl-mc/fsl-mc-msi.c
index ec35e25..8b9c66d 100644
--- a/drivers/bus/fsl-mc/fsl-mc-msi.c
+++ b/drivers/bus/fsl-mc/fsl-mc-msi.c
@@ -163,6 +163,8 @@
 {
 	struct irq_domain *domain;
 
+	if (WARN_ON((info->flags & MSI_FLAG_LEVEL_CAPABLE)))
+		info->flags &= ~MSI_FLAG_LEVEL_CAPABLE;
 	if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
 		fsl_mc_msi_update_dom_ops(info);
 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index bfc566d..9adc8c3 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2192,7 +2192,7 @@
 
 		len = nr * CD_FRAMESIZE_RAW;
 
-		rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
+		rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
 		if (IS_ERR(rq)) {
 			ret = PTR_ERR(rq);
 			break;
diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c
index a5e2f9e..53436c0 100644
--- a/drivers/char/apm-emulation.c
+++ b/drivers/char/apm-emulation.c
@@ -461,19 +461,6 @@
 
 	return 0;
 }
-
-static int proc_apm_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_apm_show, NULL);
-}
-
-static const struct file_operations apm_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= proc_apm_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
 
 static int kapmd(void *arg)
@@ -657,7 +644,7 @@
 	wake_up_process(kapmd_tsk);
 
 #ifdef CONFIG_PROC_FS
-	proc_create("apm", 0, NULL, &apm_proc_fops);
+	proc_create_single("apm", 0, NULL, proc_apm_show);
 #endif
 
 	ret = misc_register(&apm_device);
diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c
index eb53cba..a5ecf6d 100644
--- a/drivers/char/ds1620.c
+++ b/drivers/char/ds1620.c
@@ -345,18 +345,6 @@
 		   fan_state[netwinder_get_fan()]);
 	return 0;
 }
-
-static int ds1620_proc_therm_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ds1620_proc_therm_show, NULL);
-}
-
-static const struct file_operations ds1620_proc_therm_fops = {
-	.open		= ds1620_proc_therm_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
 
 static const struct file_operations ds1620_fops = {
@@ -404,7 +392,7 @@
 		return ret;
 
 #ifdef THERM_USE_PROC
-	if (!proc_create("therm", 0, NULL, &ds1620_proc_therm_fops))
+	if (!proc_create_single("therm", 0, NULL, ds1620_proc_therm_show))
 		printk(KERN_ERR "therm: unable to register /proc/therm\n");
 #endif
 
diff --git a/drivers/char/efirtc.c b/drivers/char/efirtc.c
index dc62568..d9aab64 100644
--- a/drivers/char/efirtc.c
+++ b/drivers/char/efirtc.c
@@ -358,19 +358,6 @@
 
 	return 0;
 }
-
-static int efi_rtc_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, efi_rtc_proc_show, NULL);
-}
-
-static const struct file_operations efi_rtc_proc_fops = {
-	.open		= efi_rtc_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init 
 efi_rtc_init(void)
 {
@@ -386,7 +373,7 @@
 		return ret;
 	}
 
-	dir = proc_create("driver/efirtc", 0, NULL, &efi_rtc_proc_fops);
+	dir = proc_create_single("driver/efirtc", 0, NULL, efi_rtc_proc_show);
 	if (dir == NULL) {
 		printk(KERN_ERR "efirtc: can't create /proc/driver/efirtc.\n");
 		misc_deregister(&efi_rtc_dev);
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index d53541e..c34b257 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -347,6 +347,7 @@
 	tristate "STMicroelectronics STM32 random number generator"
 	depends on HW_RANDOM && (ARCH_STM32 || COMPILE_TEST)
 	depends on HAS_IOMEM
+	default HW_RANDOM
 	help
 	  This driver provides kernel-side support for the Random Number
 	  Generator hardware found on STM32 microcontrollers.
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index 92dd4e9..f841151 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -435,7 +435,7 @@
 			*data = np->test_data & 0xffffffff;
 			len = 4;
 		} else {
-			dev_err(&np->op->dev, "RNG error, restesting\n");
+			dev_err(&np->op->dev, "RNG error, retesting\n");
 			np->flags &= ~N2RNG_FLAG_READY;
 			if (!(np->flags & N2RNG_FLAG_SHUTDOWN))
 				schedule_delayed_work(&np->work, 0);
diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c
index 0d2328d..042860d 100644
--- a/drivers/char/hw_random/stm32-rng.c
+++ b/drivers/char/hw_random/stm32-rng.c
@@ -187,8 +187,13 @@
 }
 #endif
 
-static UNIVERSAL_DEV_PM_OPS(stm32_rng_pm_ops, stm32_rng_runtime_suspend,
-			    stm32_rng_runtime_resume, NULL);
+static const struct dev_pm_ops stm32_rng_pm_ops = {
+	SET_RUNTIME_PM_OPS(stm32_rng_runtime_suspend,
+			   stm32_rng_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+};
+
 
 static const struct of_device_id stm32_rng_match[] = {
 	{
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index 6e9df55..ffe9b0c 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -135,7 +135,7 @@
 	 * is always enabled if CPUID rng_en is set.  There is no
 	 * RNG configuration like it used to be the case in this
 	 * register */
-	if ((c->x86 == 6) && (c->x86_model >= 0x0f)) {
+	if (((c->x86 == 6) && (c->x86_model >= 0x0f))  || (c->x86 > 6)){
 		if (!boot_cpu_has(X86_FEATURE_XSTORE_EN)) {
 			pr_err(PFX "can't enable hardware RNG "
 				"if XSTORE is not enabled\n");
diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig
index 3bda116..c108441 100644
--- a/drivers/char/ipmi/Kconfig
+++ b/drivers/char/ipmi/Kconfig
@@ -22,14 +22,6 @@
 
 if IPMI_HANDLER
 
-config IPMI_PROC_INTERFACE
-       bool 'Provide an interface for IPMI stats in /proc (deprecated)'
-       depends on PROC_FS
-       default y
-       help
-         Do not use this any more, use sysfs for this info.  It will be
-	 removed in future kernel versions.
-
 config IPMI_PANIC_EVENT
        bool 'Generate a panic event to all BMCs on a panic'
        help
@@ -111,6 +103,21 @@
 	  The driver implements the BMC side of the KCS contorller, it
 	  provides the access of KCS IO space for BMC side.
 
+config NPCM7XX_KCS_IPMI_BMC
+	depends on ARCH_NPCM7XX || COMPILE_TEST
+	select IPMI_KCS_BMC
+	select REGMAP_MMIO
+	tristate "NPCM7xx KCS IPMI BMC driver"
+	help
+	  Provides a driver for the KCS (Keyboard Controller Style) IPMI
+	  interface found on Nuvoton NPCM7xx SOCs.
+
+	  The driver implements the BMC side of the KCS contorller, it
+	  provides the access of KCS IO space for BMC side.
+
+	  This support is also available as a module.  If so, the module
+	  will be called kcs_bmc_npcm7xx.
+
 config ASPEED_BT_IPMI_BMC
 	depends on ARCH_ASPEED || COMPILE_TEST
        depends on REGMAP && REGMAP_MMIO && MFD_SYSCON
diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile
index 21e9e87..7a3baf3 100644
--- a/drivers/char/ipmi/Makefile
+++ b/drivers/char/ipmi/Makefile
@@ -24,3 +24,4 @@
 obj-$(CONFIG_IPMI_KCS_BMC) += kcs_bmc.o
 obj-$(CONFIG_ASPEED_BT_IPMI_BMC) += bt-bmc.o
 obj-$(CONFIG_ASPEED_KCS_IPMI_BMC) += kcs_bmc_aspeed.o
+obj-$(CONFIG_NPCM7XX_KCS_IPMI_BMC) += kcs_bmc_npcm7xx.o
diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c
index fd4ea8d8..a339766 100644
--- a/drivers/char/ipmi/ipmi_bt_sm.c
+++ b/drivers/char/ipmi/ipmi_bt_sm.c
@@ -504,11 +504,12 @@
 		if (status & BT_H_BUSY)		/* clear a leftover H_BUSY */
 			BT_CONTROL(BT_H_BUSY);
 
+		bt->timeout = bt->BT_CAP_req2rsp;
+
 		/* Read BT capabilities if it hasn't been done yet */
 		if (!bt->BT_CAP_outreqs)
 			BT_STATE_CHANGE(BT_STATE_CAPABILITIES_BEGIN,
 					SI_SM_CALL_WITHOUT_DELAY);
-		bt->timeout = bt->BT_CAP_req2rsp;
 		BT_SI_SM_RETURN(SI_SM_IDLE);
 
 	case BT_STATE_XACTION_START:
diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
index 8ecfd47..1a486ae 100644
--- a/drivers/char/ipmi/ipmi_devintf.c
+++ b/drivers/char/ipmi/ipmi_devintf.c
@@ -26,7 +26,7 @@
 
 struct ipmi_file_private
 {
-	ipmi_user_t          user;
+	struct ipmi_user     *user;
 	spinlock_t           recv_msg_lock;
 	struct list_head     recv_msgs;
 	struct file          *file;
@@ -37,7 +37,6 @@
 	unsigned int         default_retry_time_ms;
 };
 
-static DEFINE_MUTEX(ipmi_mutex);
 static void file_receive_handler(struct ipmi_recv_msg *msg,
 				 void                 *handler_data)
 {
@@ -45,17 +44,15 @@
 	int                      was_empty;
 	unsigned long            flags;
 
-	spin_lock_irqsave(&(priv->recv_msg_lock), flags);
-
-	was_empty = list_empty(&(priv->recv_msgs));
-	list_add_tail(&(msg->link), &(priv->recv_msgs));
+	spin_lock_irqsave(&priv->recv_msg_lock, flags);
+	was_empty = list_empty(&priv->recv_msgs);
+	list_add_tail(&msg->link, &priv->recv_msgs);
+	spin_unlock_irqrestore(&priv->recv_msg_lock, flags);
 
 	if (was_empty) {
 		wake_up_interruptible(&priv->wait);
 		kill_fasync(&priv->fasync_queue, SIGIO, POLL_IN);
 	}
-
-	spin_unlock_irqrestore(&(priv->recv_msg_lock), flags);
 }
 
 static __poll_t ipmi_poll(struct file *file, poll_table *wait)
@@ -68,7 +65,7 @@
 
 	spin_lock_irqsave(&priv->recv_msg_lock, flags);
 
-	if (!list_empty(&(priv->recv_msgs)))
+	if (!list_empty(&priv->recv_msgs))
 		mask |= (EPOLLIN | EPOLLRDNORM);
 
 	spin_unlock_irqrestore(&priv->recv_msg_lock, flags);
@@ -79,13 +76,8 @@
 static int ipmi_fasync(int fd, struct file *file, int on)
 {
 	struct ipmi_file_private *priv = file->private_data;
-	int                      result;
 
-	mutex_lock(&ipmi_mutex); /* could race against open() otherwise */
-	result = fasync_helper(fd, file, on, &priv->fasync_queue);
-	mutex_unlock(&ipmi_mutex);
-
-	return (result);
+	return fasync_helper(fd, file, on, &priv->fasync_queue);
 }
 
 static const struct ipmi_user_hndl ipmi_hndlrs =
@@ -99,18 +91,16 @@
 	int                      rv;
 	struct ipmi_file_private *priv;
 
-
 	priv = kmalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
-	mutex_lock(&ipmi_mutex);
 	priv->file = file;
 
 	rv = ipmi_create_user(if_num,
 			      &ipmi_hndlrs,
 			      priv,
-			      &(priv->user));
+			      &priv->user);
 	if (rv) {
 		kfree(priv);
 		goto out;
@@ -118,8 +108,8 @@
 
 	file->private_data = priv;
 
-	spin_lock_init(&(priv->recv_msg_lock));
-	INIT_LIST_HEAD(&(priv->recv_msgs));
+	spin_lock_init(&priv->recv_msg_lock);
+	INIT_LIST_HEAD(&priv->recv_msgs);
 	init_waitqueue_head(&priv->wait);
 	priv->fasync_queue = NULL;
 	mutex_init(&priv->recv_mutex);
@@ -129,7 +119,6 @@
 	priv->default_retry_time_ms = 0;
 
 out:
-	mutex_unlock(&ipmi_mutex);
 	return rv;
 }
 
@@ -137,7 +126,7 @@
 {
 	struct ipmi_file_private *priv = file->private_data;
 	int                      rv;
-	struct  ipmi_recv_msg *msg, *next;
+	struct ipmi_recv_msg *msg, *next;
 
 	rv = ipmi_destroy_user(priv->user);
 	if (rv)
@@ -146,13 +135,12 @@
 	list_for_each_entry_safe(msg, next, &priv->recv_msgs, link)
 		ipmi_free_recv_msg(msg);
 
-
 	kfree(priv);
 
 	return 0;
 }
 
-static int handle_send_req(ipmi_user_t     user,
+static int handle_send_req(struct ipmi_user *user,
 			   struct ipmi_req *req,
 			   int             retries,
 			   unsigned int    retry_time_ms)
@@ -189,8 +177,7 @@
 
 		if (copy_from_user(msg.data,
 				   req->msg.data,
-				   req->msg.data_len))
-		{
+				   req->msg.data_len)) {
 			rv = -EFAULT;
 			goto out;
 		}
@@ -233,25 +220,24 @@
 	mutex_lock(&priv->recv_mutex);
 
 	/* Grab the message off the list. */
-	spin_lock_irqsave(&(priv->recv_msg_lock), flags);
+	spin_lock_irqsave(&priv->recv_msg_lock, flags);
 	if (list_empty(&(priv->recv_msgs))) {
-		spin_unlock_irqrestore(&(priv->recv_msg_lock), flags);
+		spin_unlock_irqrestore(&priv->recv_msg_lock, flags);
 		rv = -EAGAIN;
 		goto recv_err;
 	}
 	entry = priv->recv_msgs.next;
 	msg = list_entry(entry, struct ipmi_recv_msg, link);
 	list_del(entry);
-	spin_unlock_irqrestore(&(priv->recv_msg_lock), flags);
+	spin_unlock_irqrestore(&priv->recv_msg_lock, flags);
 
 	addr_len = ipmi_addr_length(msg->addr.addr_type);
-	if (rsp->addr_len < addr_len)
-	{
+	if (rsp->addr_len < addr_len) {
 		rv = -EINVAL;
 		goto recv_putback_on_err;
 	}
 
-	if (copy_to_user(rsp->addr, &(msg->addr), addr_len)) {
+	if (copy_to_user(rsp->addr, &msg->addr, addr_len)) {
 		rv = -EFAULT;
 		goto recv_putback_on_err;
 	}
@@ -273,8 +259,7 @@
 
 		if (copy_to_user(rsp->msg.data,
 				 msg->msg.data,
-				 msg->msg.data_len))
-		{
+				 msg->msg.data_len)) {
 			rv = -EFAULT;
 			goto recv_putback_on_err;
 		}
@@ -294,9 +279,9 @@
 recv_putback_on_err:
 	/* If we got an error, put the message back onto
 	   the head of the queue. */
-	spin_lock_irqsave(&(priv->recv_msg_lock), flags);
-	list_add(entry, &(priv->recv_msgs));
-	spin_unlock_irqrestore(&(priv->recv_msg_lock), flags);
+	spin_lock_irqsave(&priv->recv_msg_lock, flags);
+	list_add(entry, &priv->recv_msgs);
+	spin_unlock_irqrestore(&priv->recv_msg_lock, flags);
 recv_err:
 	mutex_unlock(&priv->recv_mutex);
 	return rv;
@@ -307,9 +292,9 @@
 	return copy_to_user(to, rsp, sizeof(struct ipmi_recv)) ? -EFAULT : 0;
 }
 
-static int ipmi_ioctl(struct file   *file,
-		      unsigned int  cmd,
-		      unsigned long data)
+static long ipmi_ioctl(struct file   *file,
+		       unsigned int  cmd,
+		       unsigned long data)
 {
 	int                      rv = -EINVAL;
 	struct ipmi_file_private *priv = file->private_data;
@@ -320,16 +305,20 @@
 	case IPMICTL_SEND_COMMAND:
 	{
 		struct ipmi_req req;
+		int retries;
+		unsigned int retry_time_ms;
 
 		if (copy_from_user(&req, arg, sizeof(req))) {
 			rv = -EFAULT;
 			break;
 		}
 
-		rv = handle_send_req(priv->user,
-				     &req,
-				     priv->default_retries,
-				     priv->default_retry_time_ms);
+		mutex_lock(&priv->recv_mutex);
+		retries = priv->default_retries;
+		retry_time_ms = priv->default_retry_time_ms;
+		mutex_unlock(&priv->recv_mutex);
+
+		rv = handle_send_req(priv->user, &req, retries, retry_time_ms);
 		break;
 	}
 
@@ -569,8 +558,10 @@
 			break;
 		}
 
+		mutex_lock(&priv->recv_mutex);
 		priv->default_retries = parms.retries;
 		priv->default_retry_time_ms = parms.retry_time_ms;
+		mutex_unlock(&priv->recv_mutex);
 		rv = 0;
 		break;
 	}
@@ -579,8 +570,10 @@
 	{
 		struct ipmi_timing_parms parms;
 
+		mutex_lock(&priv->recv_mutex);
 		parms.retries = priv->default_retries;
 		parms.retry_time_ms = priv->default_retry_time_ms;
+		mutex_unlock(&priv->recv_mutex);
 
 		if (copy_to_user(arg, &parms, sizeof(parms))) {
 			rv = -EFAULT;
@@ -615,30 +608,16 @@
 		rv = ipmi_set_maintenance_mode(priv->user, mode);
 		break;
 	}
+
+	default:
+		rv = -ENOTTY;
+		break;
 	}
   
 	return rv;
 }
 
-/*
- * Note: it doesn't make sense to take the BKL here but
- *       not in compat_ipmi_ioctl. -arnd
- */
-static long ipmi_unlocked_ioctl(struct file   *file,
-			        unsigned int  cmd,
-			        unsigned long data)
-{
-	int ret;
-
-	mutex_lock(&ipmi_mutex);
-	ret = ipmi_ioctl(file, cmd, data);
-	mutex_unlock(&ipmi_mutex);
-
-	return ret;
-}
-
 #ifdef CONFIG_COMPAT
-
 /*
  * The following code contains code for supporting 32-bit compatible
  * ioctls on 64-bit kernels.  This allows running 32-bit apps on the
@@ -749,15 +728,21 @@
 	{
 		struct ipmi_req	rp;
 		struct compat_ipmi_req r32;
+		int retries;
+		unsigned int retry_time_ms;
 
 		if (copy_from_user(&r32, compat_ptr(arg), sizeof(r32)))
 			return -EFAULT;
 
 		get_compat_ipmi_req(&rp, &r32);
 
+		mutex_lock(&priv->recv_mutex);
+		retries = priv->default_retries;
+		retry_time_ms = priv->default_retry_time_ms;
+		mutex_unlock(&priv->recv_mutex);
+
 		return handle_send_req(priv->user, &rp,
-				priv->default_retries,
-				priv->default_retry_time_ms);
+				       retries, retry_time_ms);
 	}
 	case COMPAT_IPMICTL_SEND_COMMAND_SETTIME:
 	{
@@ -791,25 +776,13 @@
 		return ipmi_ioctl(filep, cmd, arg);
 	}
 }
-
-static long unlocked_compat_ipmi_ioctl(struct file *filep, unsigned int cmd,
-				       unsigned long arg)
-{
-	int ret;
-
-	mutex_lock(&ipmi_mutex);
-	ret = compat_ipmi_ioctl(filep, cmd, arg);
-	mutex_unlock(&ipmi_mutex);
-
-	return ret;
-}
 #endif
 
 static const struct file_operations ipmi_fops = {
 	.owner		= THIS_MODULE,
-	.unlocked_ioctl	= ipmi_unlocked_ioctl,
+	.unlocked_ioctl	= ipmi_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl   = unlocked_compat_ipmi_ioctl,
+	.compat_ioctl   = compat_ipmi_ioctl,
 #endif
 	.open		= ipmi_open,
 	.release	= ipmi_release,
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 3611489..51832b8 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -37,11 +37,30 @@
 static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void);
 static int ipmi_init_msghandler(void);
 static void smi_recv_tasklet(unsigned long);
-static void handle_new_recv_msgs(ipmi_smi_t intf);
-static void need_waiter(ipmi_smi_t intf);
-static int handle_one_recv_msg(ipmi_smi_t          intf,
+static void handle_new_recv_msgs(struct ipmi_smi *intf);
+static void need_waiter(struct ipmi_smi *intf);
+static int handle_one_recv_msg(struct ipmi_smi *intf,
 			       struct ipmi_smi_msg *msg);
 
+#ifdef DEBUG
+static void ipmi_debug_msg(const char *title, unsigned char *data,
+			   unsigned int len)
+{
+	int i, pos;
+	char buf[100];
+
+	pos = snprintf(buf, sizeof(buf), "%s: ", title);
+	for (i = 0; i < len; i++)
+		pos += snprintf(buf + pos, sizeof(buf) - pos,
+				" %2.2x", data[i]);
+	pr_debug("%s\n", buf);
+}
+#else
+static void ipmi_debug_msg(const char *title, unsigned char *data,
+			   unsigned int len)
+{ }
+#endif
+
 static int initialized;
 
 enum ipmi_panic_event_op {
@@ -112,14 +131,13 @@
 MODULE_PARM_DESC(panic_op, "Sets if the IPMI driver will attempt to store panic information in the event log in the event of a panic.  Set to 'none' for no, 'event' for a single event, or 'string' for a generic event and the panic string in IPMI OEM events.");
 
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-static struct proc_dir_entry *proc_ipmi_root;
-#endif /* CONFIG_IPMI_PROC_INTERFACE */
+#define MAX_EVENTS_IN_QUEUE	25
 
 /* Remain in auto-maintenance mode for this amount of time (in ms). */
-#define IPMI_MAINTENANCE_MODE_TIMEOUT 30000
-
-#define MAX_EVENTS_IN_QUEUE	25
+static unsigned long maintenance_mode_timeout_ms = 30000;
+module_param(maintenance_mode_timeout_ms, ulong, 0644);
+MODULE_PARM_DESC(maintenance_mode_timeout_ms,
+		 "The time (milliseconds) after the last maintenance message that the connection stays in maintenance mode.");
 
 /*
  * Don't let a message sit in a queue forever, always time it with at lest
@@ -127,6 +145,31 @@
  */
 #define MAX_MSG_TIMEOUT		60000
 
+/*
+ * Timeout times below are in milliseconds, and are done off a 1
+ * second timer.  So setting the value to 1000 would mean anything
+ * between 0 and 1000ms.  So really the only reasonable minimum
+ * setting it 2000ms, which is between 1 and 2 seconds.
+ */
+
+/* The default timeout for message retries. */
+static unsigned long default_retry_ms = 2000;
+module_param(default_retry_ms, ulong, 0644);
+MODULE_PARM_DESC(default_retry_ms,
+		 "The time (milliseconds) between retry sends");
+
+/* The default timeout for maintenance mode message retries. */
+static unsigned long default_maintenance_retry_ms = 3000;
+module_param(default_maintenance_retry_ms, ulong, 0644);
+MODULE_PARM_DESC(default_maintenance_retry_ms,
+		 "The time (milliseconds) between retry sends in maintenance mode");
+
+/* The default maximum number of retries */
+static unsigned int default_max_retries = 4;
+module_param(default_max_retries, uint, 0644);
+MODULE_PARM_DESC(default_max_retries,
+		 "The time (milliseconds) between retry sends in maintenance mode");
+
 /* Call every ~1000 ms. */
 #define IPMI_TIMEOUT_TIME	1000
 
@@ -150,8 +193,12 @@
 struct ipmi_user {
 	struct list_head link;
 
-	/* Set to false when the user is destroyed. */
-	bool valid;
+	/*
+	 * Set to NULL when the user is destroyed, a pointer to myself
+	 * so srcu_dereference can be used on it.
+	 */
+	struct ipmi_user *self;
+	struct srcu_struct release_barrier;
 
 	struct kref refcount;
 
@@ -160,16 +207,33 @@
 	void             *handler_data;
 
 	/* The interface this user is bound to. */
-	ipmi_smi_t intf;
+	struct ipmi_smi *intf;
 
 	/* Does this interface receive IPMI events? */
 	bool gets_events;
 };
 
+static struct ipmi_user *acquire_ipmi_user(struct ipmi_user *user, int *index)
+	__acquires(user->release_barrier)
+{
+	struct ipmi_user *ruser;
+
+	*index = srcu_read_lock(&user->release_barrier);
+	ruser = srcu_dereference(user->self, &user->release_barrier);
+	if (!ruser)
+		srcu_read_unlock(&user->release_barrier, *index);
+	return ruser;
+}
+
+static void release_ipmi_user(struct ipmi_user *user, int index)
+{
+	srcu_read_unlock(&user->release_barrier, index);
+}
+
 struct cmd_rcvr {
 	struct list_head link;
 
-	ipmi_user_t   user;
+	struct ipmi_user *user;
 	unsigned char netfn;
 	unsigned char cmd;
 	unsigned int  chans;
@@ -247,13 +311,6 @@
 	unsigned char lun;
 };
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-struct ipmi_proc_entry {
-	char                   *name;
-	struct ipmi_proc_entry *next;
-};
-#endif
-
 /*
  * Note that the product id, manufacturer id, guid, and device id are
  * immutable in this structure, so dyn_mutex is not required for
@@ -275,7 +332,7 @@
 };
 #define to_bmc_device(x) container_of((x), struct bmc_device, pdev.dev)
 
-static int bmc_get_device_id(ipmi_smi_t intf, struct bmc_device *bmc,
+static int bmc_get_device_id(struct ipmi_smi *intf, struct bmc_device *bmc,
 			     struct ipmi_device_id *id,
 			     bool *guid_set, guid_t *guid);
 
@@ -397,10 +454,11 @@
 	struct list_head link;
 
 	/*
-	 * The list of upper layers that are using me.  seq_lock
-	 * protects this.
+	 * The list of upper layers that are using me.  seq_lock write
+	 * protects this.  Read protection is with srcu.
 	 */
 	struct list_head users;
+	struct srcu_struct users_srcu;
 
 	/* Used for wake ups at startup. */
 	wait_queue_head_t waitq;
@@ -420,24 +478,9 @@
 	bool in_bmc_register;  /* Handle recursive situations.  Yuck. */
 	struct work_struct bmc_reg_work;
 
-	/*
-	 * This is the lower-layer's sender routine.  Note that you
-	 * must either be holding the ipmi_interfaces_mutex or be in
-	 * an umpreemptible region to use this.  You must fetch the
-	 * value into a local variable and make sure it is not NULL.
-	 */
 	const struct ipmi_smi_handlers *handlers;
 	void                     *send_info;
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-	/* A list of proc entries for this interface. */
-	struct mutex           proc_entry_lock;
-	struct ipmi_proc_entry *proc_entries;
-
-	struct proc_dir_entry *proc_dir;
-	char                  proc_dir_name[10];
-#endif
-
 	/* Driver-model device for the system interface. */
 	struct device          *si_dev;
 
@@ -503,6 +546,13 @@
 	spinlock_t maintenance_mode_lock; /* Used in a timer... */
 
 	/*
+	 * If we are doing maintenance on something on IPMB, extend
+	 * the timeout time to avoid timeouts writing firmware and
+	 * such.
+	 */
+	int ipmb_maintenance_mode_timeout;
+
+	/*
 	 * A cheap hack, if this is non-null and a message to an
 	 * interface comes in with a NULL user, call this routine with
 	 * it.  Note that the message will still be freed by the
@@ -510,7 +560,8 @@
 	 *
 	 * Protected by bmc_reg_mutex.
 	 */
-	void (*null_user_handler)(ipmi_smi_t intf, struct ipmi_recv_msg *msg);
+	void (*null_user_handler)(struct ipmi_smi *intf,
+				  struct ipmi_recv_msg *msg);
 
 	/*
 	 * When we are scanning the channels for an SMI, this will
@@ -536,12 +587,12 @@
 };
 #define to_si_intf_from_dev(device) container_of(device, struct ipmi_smi, dev)
 
-static void __get_guid(ipmi_smi_t intf);
-static void __ipmi_bmc_unregister(ipmi_smi_t intf);
-static int __ipmi_bmc_register(ipmi_smi_t intf,
+static void __get_guid(struct ipmi_smi *intf);
+static void __ipmi_bmc_unregister(struct ipmi_smi *intf);
+static int __ipmi_bmc_register(struct ipmi_smi *intf,
 			       struct ipmi_device_id *id,
 			       bool guid_set, guid_t *guid, int intf_num);
-static int __scan_channels(ipmi_smi_t intf, struct ipmi_device_id *id);
+static int __scan_channels(struct ipmi_smi *intf, struct ipmi_device_id *id);
 
 
 /**
@@ -560,6 +611,7 @@
 
 static LIST_HEAD(ipmi_interfaces);
 static DEFINE_MUTEX(ipmi_interfaces_mutex);
+DEFINE_STATIC_SRCU(ipmi_interfaces_srcu);
 
 /*
  * List of watchers that want to know when smi's are added and deleted.
@@ -620,7 +672,7 @@
 	}
 }
 
-static void clean_up_interface_data(ipmi_smi_t intf)
+static void clean_up_interface_data(struct ipmi_smi *intf)
 {
 	int              i;
 	struct cmd_rcvr  *rcvr, *rcvr2;
@@ -652,7 +704,7 @@
 
 static void intf_free(struct kref *ref)
 {
-	ipmi_smi_t intf = container_of(ref, struct ipmi_smi, refcount);
+	struct ipmi_smi *intf = container_of(ref, struct ipmi_smi, refcount);
 
 	clean_up_interface_data(intf);
 	kfree(intf);
@@ -660,65 +712,39 @@
 
 struct watcher_entry {
 	int              intf_num;
-	ipmi_smi_t       intf;
+	struct ipmi_smi  *intf;
 	struct list_head link;
 };
 
 int ipmi_smi_watcher_register(struct ipmi_smi_watcher *watcher)
 {
-	ipmi_smi_t intf;
-	LIST_HEAD(to_deliver);
-	struct watcher_entry *e, *e2;
+	struct ipmi_smi *intf;
+	int index;
 
 	mutex_lock(&smi_watchers_mutex);
 
-	mutex_lock(&ipmi_interfaces_mutex);
-
-	/* Build a list of things to deliver. */
-	list_for_each_entry(intf, &ipmi_interfaces, link) {
-		if (intf->intf_num == -1)
-			continue;
-		e = kmalloc(sizeof(*e), GFP_KERNEL);
-		if (!e)
-			goto out_err;
-		kref_get(&intf->refcount);
-		e->intf = intf;
-		e->intf_num = intf->intf_num;
-		list_add_tail(&e->link, &to_deliver);
-	}
-
-	/* We will succeed, so add it to the list. */
 	list_add(&watcher->link, &smi_watchers);
 
-	mutex_unlock(&ipmi_interfaces_mutex);
+	index = srcu_read_lock(&ipmi_interfaces_srcu);
+	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
+		int intf_num = READ_ONCE(intf->intf_num);
 
-	list_for_each_entry_safe(e, e2, &to_deliver, link) {
-		list_del(&e->link);
-		watcher->new_smi(e->intf_num, e->intf->si_dev);
-		kref_put(&e->intf->refcount, intf_free);
-		kfree(e);
+		if (intf_num == -1)
+			continue;
+		watcher->new_smi(intf_num, intf->si_dev);
 	}
+	srcu_read_unlock(&ipmi_interfaces_srcu, index);
 
 	mutex_unlock(&smi_watchers_mutex);
 
 	return 0;
-
- out_err:
-	mutex_unlock(&ipmi_interfaces_mutex);
-	mutex_unlock(&smi_watchers_mutex);
-	list_for_each_entry_safe(e, e2, &to_deliver, link) {
-		list_del(&e->link);
-		kref_put(&e->intf->refcount, intf_free);
-		kfree(e);
-	}
-	return -ENOMEM;
 }
 EXPORT_SYMBOL(ipmi_smi_watcher_register);
 
 int ipmi_smi_watcher_unregister(struct ipmi_smi_watcher *watcher)
 {
 	mutex_lock(&smi_watchers_mutex);
-	list_del(&(watcher->link));
+	list_del(&watcher->link);
 	mutex_unlock(&smi_watchers_mutex);
 	return 0;
 }
@@ -732,12 +758,14 @@
 {
 	struct ipmi_smi_watcher *w;
 
+	mutex_lock(&smi_watchers_mutex);
 	list_for_each_entry(w, &smi_watchers, link) {
 		if (try_module_get(w->owner)) {
 			w->new_smi(i, dev);
 			module_put(w->owner);
 		}
 	}
+	mutex_unlock(&smi_watchers_mutex);
 }
 
 static int
@@ -831,18 +859,17 @@
 }
 EXPORT_SYMBOL(ipmi_addr_length);
 
-static void deliver_response(struct ipmi_recv_msg *msg)
+static int deliver_response(struct ipmi_smi *intf, struct ipmi_recv_msg *msg)
 {
-	if (!msg->user) {
-		ipmi_smi_t    intf = msg->user_msg_data;
+	int rv = 0;
 
+	if (!msg->user) {
 		/* Special handling for NULL users. */
 		if (intf->null_user_handler) {
 			intf->null_user_handler(intf, msg);
-			ipmi_inc_stat(intf, handled_local_responses);
 		} else {
 			/* No handler, so give up. */
-			ipmi_inc_stat(intf, unhandled_local_responses);
+			rv = -EINVAL;
 		}
 		ipmi_free_recv_msg(msg);
 	} else if (!oops_in_progress) {
@@ -851,21 +878,40 @@
 		 * receive handler doesn't much meaning and has a deadlock
 		 * risk.  At this moment, simply skip it in that case.
 		 */
+		int index;
+		struct ipmi_user *user = acquire_ipmi_user(msg->user, &index);
 
-		ipmi_user_t user = msg->user;
-		user->handler->ipmi_recv_hndl(msg, user->handler_data);
+		if (user) {
+			user->handler->ipmi_recv_hndl(msg, user->handler_data);
+			release_ipmi_user(msg->user, index);
+		} else {
+			/* User went away, give up. */
+			ipmi_free_recv_msg(msg);
+			rv = -EINVAL;
+		}
 	}
+
+	return rv;
 }
 
-static void
-deliver_err_response(struct ipmi_recv_msg *msg, int err)
+static void deliver_local_response(struct ipmi_smi *intf,
+				   struct ipmi_recv_msg *msg)
+{
+	if (deliver_response(intf, msg))
+		ipmi_inc_stat(intf, unhandled_local_responses);
+	else
+		ipmi_inc_stat(intf, handled_local_responses);
+}
+
+static void deliver_err_response(struct ipmi_smi *intf,
+				 struct ipmi_recv_msg *msg, int err)
 {
 	msg->recv_type = IPMI_RESPONSE_RECV_TYPE;
 	msg->msg_data[0] = err;
 	msg->msg.netfn |= 1; /* Convert to a response. */
 	msg->msg.data_len = 1;
 	msg->msg.data = msg->msg_data;
-	deliver_response(msg);
+	deliver_local_response(intf, msg);
 }
 
 /*
@@ -873,7 +919,7 @@
  * message with the given timeout to the sequence table.  This must be
  * called with the interface's seq_lock held.
  */
-static int intf_next_seq(ipmi_smi_t           intf,
+static int intf_next_seq(struct ipmi_smi      *intf,
 			 struct ipmi_recv_msg *recv_msg,
 			 unsigned long        timeout,
 			 int                  retries,
@@ -884,6 +930,11 @@
 	int          rv = 0;
 	unsigned int i;
 
+	if (timeout == 0)
+		timeout = default_retry_ms;
+	if (retries < 0)
+		retries = default_max_retries;
+
 	for (i = intf->curr_seq; (i+1)%IPMI_IPMB_NUM_SEQ != intf->curr_seq;
 					i = (i+1)%IPMI_IPMB_NUM_SEQ) {
 		if (!intf->seq_table[i].inuse)
@@ -921,7 +972,7 @@
  * guard against message coming in after their timeout and the
  * sequence number being reused).
  */
-static int intf_find_seq(ipmi_smi_t           intf,
+static int intf_find_seq(struct ipmi_smi      *intf,
 			 unsigned char        seq,
 			 short                channel,
 			 unsigned char        cmd,
@@ -935,26 +986,26 @@
 	if (seq >= IPMI_IPMB_NUM_SEQ)
 		return -EINVAL;
 
-	spin_lock_irqsave(&(intf->seq_lock), flags);
+	spin_lock_irqsave(&intf->seq_lock, flags);
 	if (intf->seq_table[seq].inuse) {
 		struct ipmi_recv_msg *msg = intf->seq_table[seq].recv_msg;
 
 		if ((msg->addr.channel == channel) && (msg->msg.cmd == cmd)
 				&& (msg->msg.netfn == netfn)
-				&& (ipmi_addr_equal(addr, &(msg->addr)))) {
+				&& (ipmi_addr_equal(addr, &msg->addr))) {
 			*recv_msg = msg;
 			intf->seq_table[seq].inuse = 0;
 			rv = 0;
 		}
 	}
-	spin_unlock_irqrestore(&(intf->seq_lock), flags);
+	spin_unlock_irqrestore(&intf->seq_lock, flags);
 
 	return rv;
 }
 
 
 /* Start the timer for a specific sequence table entry. */
-static int intf_start_seq_timer(ipmi_smi_t intf,
+static int intf_start_seq_timer(struct ipmi_smi *intf,
 				long       msgid)
 {
 	int           rv = -ENODEV;
@@ -965,24 +1016,24 @@
 
 	GET_SEQ_FROM_MSGID(msgid, seq, seqid);
 
-	spin_lock_irqsave(&(intf->seq_lock), flags);
+	spin_lock_irqsave(&intf->seq_lock, flags);
 	/*
 	 * We do this verification because the user can be deleted
 	 * while a message is outstanding.
 	 */
 	if ((intf->seq_table[seq].inuse)
 				&& (intf->seq_table[seq].seqid == seqid)) {
-		struct seq_table *ent = &(intf->seq_table[seq]);
+		struct seq_table *ent = &intf->seq_table[seq];
 		ent->timeout = ent->orig_timeout;
 		rv = 0;
 	}
-	spin_unlock_irqrestore(&(intf->seq_lock), flags);
+	spin_unlock_irqrestore(&intf->seq_lock, flags);
 
 	return rv;
 }
 
 /* Got an error for the send message for a specific sequence number. */
-static int intf_err_seq(ipmi_smi_t   intf,
+static int intf_err_seq(struct ipmi_smi *intf,
 			long         msgid,
 			unsigned int err)
 {
@@ -995,23 +1046,23 @@
 
 	GET_SEQ_FROM_MSGID(msgid, seq, seqid);
 
-	spin_lock_irqsave(&(intf->seq_lock), flags);
+	spin_lock_irqsave(&intf->seq_lock, flags);
 	/*
 	 * We do this verification because the user can be deleted
 	 * while a message is outstanding.
 	 */
 	if ((intf->seq_table[seq].inuse)
 				&& (intf->seq_table[seq].seqid == seqid)) {
-		struct seq_table *ent = &(intf->seq_table[seq]);
+		struct seq_table *ent = &intf->seq_table[seq];
 
 		ent->inuse = 0;
 		msg = ent->recv_msg;
 		rv = 0;
 	}
-	spin_unlock_irqrestore(&(intf->seq_lock), flags);
+	spin_unlock_irqrestore(&intf->seq_lock, flags);
 
 	if (msg)
-		deliver_err_response(msg, err);
+		deliver_err_response(intf, msg, err);
 
 	return rv;
 }
@@ -1020,12 +1071,12 @@
 int ipmi_create_user(unsigned int          if_num,
 		     const struct ipmi_user_hndl *handler,
 		     void                  *handler_data,
-		     ipmi_user_t           *user)
+		     struct ipmi_user      **user)
 {
 	unsigned long flags;
-	ipmi_user_t   new_user;
-	int           rv = 0;
-	ipmi_smi_t    intf;
+	struct ipmi_user *new_user;
+	int           rv = 0, index;
+	struct ipmi_smi *intf;
 
 	/*
 	 * There is no module usecount here, because it's not
@@ -1059,7 +1110,7 @@
 	if (!new_user)
 		return -ENOMEM;
 
-	mutex_lock(&ipmi_interfaces_mutex);
+	index = srcu_read_lock(&ipmi_interfaces_srcu);
 	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
 		if (intf->intf_num == if_num)
 			goto found;
@@ -1069,6 +1120,10 @@
 	goto out_kfree;
 
  found:
+	rv = init_srcu_struct(&new_user->release_barrier);
+	if (rv)
+		goto out_kfree;
+
 	/* Note that each existing user holds a refcount to the interface. */
 	kref_get(&intf->refcount);
 
@@ -1078,26 +1133,7 @@
 	new_user->intf = intf;
 	new_user->gets_events = false;
 
-	if (!try_module_get(intf->handlers->owner)) {
-		rv = -ENODEV;
-		goto out_kref;
-	}
-
-	if (intf->handlers->inc_usecount) {
-		rv = intf->handlers->inc_usecount(intf->send_info);
-		if (rv) {
-			module_put(intf->handlers->owner);
-			goto out_kref;
-		}
-	}
-
-	/*
-	 * Hold the lock so intf->handlers is guaranteed to be good
-	 * until now
-	 */
-	mutex_unlock(&ipmi_interfaces_mutex);
-
-	new_user->valid = true;
+	rcu_assign_pointer(new_user->self, new_user);
 	spin_lock_irqsave(&intf->seq_lock, flags);
 	list_add_rcu(&new_user->link, &intf->users);
 	spin_unlock_irqrestore(&intf->seq_lock, flags);
@@ -1106,13 +1142,12 @@
 		if (atomic_inc_return(&intf->event_waiters) == 1)
 			need_waiter(intf);
 	}
+	srcu_read_unlock(&ipmi_interfaces_srcu, index);
 	*user = new_user;
 	return 0;
 
-out_kref:
-	kref_put(&intf->refcount, intf_free);
 out_kfree:
-	mutex_unlock(&ipmi_interfaces_mutex);
+	srcu_read_unlock(&ipmi_interfaces_srcu, index);
 	kfree(new_user);
 	return rv;
 }
@@ -1120,26 +1155,25 @@
 
 int ipmi_get_smi_info(int if_num, struct ipmi_smi_info *data)
 {
-	int           rv = 0;
-	ipmi_smi_t    intf;
-	const struct ipmi_smi_handlers *handlers;
+	int rv, index;
+	struct ipmi_smi *intf;
 
-	mutex_lock(&ipmi_interfaces_mutex);
+	index = srcu_read_lock(&ipmi_interfaces_srcu);
 	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
 		if (intf->intf_num == if_num)
 			goto found;
 	}
+	srcu_read_unlock(&ipmi_interfaces_srcu, index);
+
 	/* Not found, return an error */
-	rv = -EINVAL;
-	mutex_unlock(&ipmi_interfaces_mutex);
-	return rv;
+	return -EINVAL;
 
 found:
-	handlers = intf->handlers;
-	rv = -ENOSYS;
-	if (handlers->get_smi_info)
-		rv = handlers->get_smi_info(intf->send_info, data);
-	mutex_unlock(&ipmi_interfaces_mutex);
+	if (!intf->handlers->get_smi_info)
+		rv = -ENOTTY;
+	else
+		rv = intf->handlers->get_smi_info(intf->send_info, data);
+	srcu_read_unlock(&ipmi_interfaces_srcu, index);
 
 	return rv;
 }
@@ -1147,19 +1181,34 @@
 
 static void free_user(struct kref *ref)
 {
-	ipmi_user_t user = container_of(ref, struct ipmi_user, refcount);
+	struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount);
 	kfree(user);
 }
 
-int ipmi_destroy_user(ipmi_user_t user)
+static void _ipmi_destroy_user(struct ipmi_user *user)
 {
-	ipmi_smi_t       intf = user->intf;
+	struct ipmi_smi  *intf = user->intf;
 	int              i;
 	unsigned long    flags;
 	struct cmd_rcvr  *rcvr;
 	struct cmd_rcvr  *rcvrs = NULL;
 
-	user->valid = false;
+	if (!acquire_ipmi_user(user, &i)) {
+		/*
+		 * The user has already been cleaned up, just make sure
+		 * nothing is using it and return.
+		 */
+		synchronize_srcu(&user->release_barrier);
+		return;
+	}
+
+	rcu_assign_pointer(user->self, NULL);
+	release_ipmi_user(user, i);
+
+	synchronize_srcu(&user->release_barrier);
+
+	if (user->handler->shutdown)
+		user->handler->shutdown(user->handler_data);
 
 	if (user->handler->ipmi_watchdog_pretimeout)
 		atomic_dec(&intf->event_waiters);
@@ -1184,7 +1233,7 @@
 	 * Remove the user from the command receiver's table.  First
 	 * we build a list of everything (not using the standard link,
 	 * since other things may be using it till we do
-	 * synchronize_rcu()) then free everything in that list.
+	 * synchronize_srcu()) then free everything in that list.
 	 */
 	mutex_lock(&intf->cmd_rcvrs_mutex);
 	list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link) {
@@ -1202,109 +1251,156 @@
 		kfree(rcvr);
 	}
 
-	mutex_lock(&ipmi_interfaces_mutex);
-	if (intf->handlers) {
-		module_put(intf->handlers->owner);
-		if (intf->handlers->dec_usecount)
-			intf->handlers->dec_usecount(intf->send_info);
-	}
-	mutex_unlock(&ipmi_interfaces_mutex);
-
 	kref_put(&intf->refcount, intf_free);
+}
 
+int ipmi_destroy_user(struct ipmi_user *user)
+{
+	_ipmi_destroy_user(user);
+
+	cleanup_srcu_struct(&user->release_barrier);
 	kref_put(&user->refcount, free_user);
 
 	return 0;
 }
 EXPORT_SYMBOL(ipmi_destroy_user);
 
-int ipmi_get_version(ipmi_user_t   user,
+int ipmi_get_version(struct ipmi_user *user,
 		     unsigned char *major,
 		     unsigned char *minor)
 {
 	struct ipmi_device_id id;
-	int rv;
+	int rv, index;
+
+	user = acquire_ipmi_user(user, &index);
+	if (!user)
+		return -ENODEV;
 
 	rv = bmc_get_device_id(user->intf, NULL, &id, NULL, NULL);
-	if (rv)
-		return rv;
+	if (!rv) {
+		*major = ipmi_version_major(&id);
+		*minor = ipmi_version_minor(&id);
+	}
+	release_ipmi_user(user, index);
 
-	*major = ipmi_version_major(&id);
-	*minor = ipmi_version_minor(&id);
-
-	return 0;
+	return rv;
 }
 EXPORT_SYMBOL(ipmi_get_version);
 
-int ipmi_set_my_address(ipmi_user_t   user,
+int ipmi_set_my_address(struct ipmi_user *user,
 			unsigned int  channel,
 			unsigned char address)
 {
+	int index, rv = 0;
+
+	user = acquire_ipmi_user(user, &index);
+	if (!user)
+		return -ENODEV;
+
 	if (channel >= IPMI_MAX_CHANNELS)
-		return -EINVAL;
-	user->intf->addrinfo[channel].address = address;
-	return 0;
+		rv = -EINVAL;
+	else
+		user->intf->addrinfo[channel].address = address;
+	release_ipmi_user(user, index);
+
+	return rv;
 }
 EXPORT_SYMBOL(ipmi_set_my_address);
 
-int ipmi_get_my_address(ipmi_user_t   user,
+int ipmi_get_my_address(struct ipmi_user *user,
 			unsigned int  channel,
 			unsigned char *address)
 {
+	int index, rv = 0;
+
+	user = acquire_ipmi_user(user, &index);
+	if (!user)
+		return -ENODEV;
+
 	if (channel >= IPMI_MAX_CHANNELS)
-		return -EINVAL;
-	*address = user->intf->addrinfo[channel].address;
-	return 0;
+		rv = -EINVAL;
+	else
+		*address = user->intf->addrinfo[channel].address;
+	release_ipmi_user(user, index);
+
+	return rv;
 }
 EXPORT_SYMBOL(ipmi_get_my_address);
 
-int ipmi_set_my_LUN(ipmi_user_t   user,
+int ipmi_set_my_LUN(struct ipmi_user *user,
 		    unsigned int  channel,
 		    unsigned char LUN)
 {
+	int index, rv = 0;
+
+	user = acquire_ipmi_user(user, &index);
+	if (!user)
+		return -ENODEV;
+
 	if (channel >= IPMI_MAX_CHANNELS)
-		return -EINVAL;
-	user->intf->addrinfo[channel].lun = LUN & 0x3;
+		rv = -EINVAL;
+	else
+		user->intf->addrinfo[channel].lun = LUN & 0x3;
+	release_ipmi_user(user, index);
+
 	return 0;
 }
 EXPORT_SYMBOL(ipmi_set_my_LUN);
 
-int ipmi_get_my_LUN(ipmi_user_t   user,
+int ipmi_get_my_LUN(struct ipmi_user *user,
 		    unsigned int  channel,
 		    unsigned char *address)
 {
+	int index, rv = 0;
+
+	user = acquire_ipmi_user(user, &index);
+	if (!user)
+		return -ENODEV;
+
 	if (channel >= IPMI_MAX_CHANNELS)
-		return -EINVAL;
-	*address = user->intf->addrinfo[channel].lun;
-	return 0;
+		rv = -EINVAL;
+	else
+		*address = user->intf->addrinfo[channel].lun;
+	release_ipmi_user(user, index);
+
+	return rv;
 }
 EXPORT_SYMBOL(ipmi_get_my_LUN);
 
-int ipmi_get_maintenance_mode(ipmi_user_t user)
+int ipmi_get_maintenance_mode(struct ipmi_user *user)
 {
-	int           mode;
+	int mode, index;
 	unsigned long flags;
 
+	user = acquire_ipmi_user(user, &index);
+	if (!user)
+		return -ENODEV;
+
 	spin_lock_irqsave(&user->intf->maintenance_mode_lock, flags);
 	mode = user->intf->maintenance_mode;
 	spin_unlock_irqrestore(&user->intf->maintenance_mode_lock, flags);
+	release_ipmi_user(user, index);
 
 	return mode;
 }
 EXPORT_SYMBOL(ipmi_get_maintenance_mode);
 
-static void maintenance_mode_update(ipmi_smi_t intf)
+static void maintenance_mode_update(struct ipmi_smi *intf)
 {
 	if (intf->handlers->set_maintenance_mode)
 		intf->handlers->set_maintenance_mode(
 			intf->send_info, intf->maintenance_mode_enable);
 }
 
-int ipmi_set_maintenance_mode(ipmi_user_t user, int mode)
+int ipmi_set_maintenance_mode(struct ipmi_user *user, int mode)
 {
-	int           rv = 0;
+	int rv = 0, index;
 	unsigned long flags;
-	ipmi_smi_t    intf = user->intf;
+	struct ipmi_smi *intf = user->intf;
+
+	user = acquire_ipmi_user(user, &index);
+	if (!user)
+		return -ENODEV;
 
 	spin_lock_irqsave(&intf->maintenance_mode_lock, flags);
 	if (intf->maintenance_mode != mode) {
@@ -1332,17 +1428,23 @@
 	}
  out_unlock:
 	spin_unlock_irqrestore(&intf->maintenance_mode_lock, flags);
+	release_ipmi_user(user, index);
 
 	return rv;
 }
 EXPORT_SYMBOL(ipmi_set_maintenance_mode);
 
-int ipmi_set_gets_events(ipmi_user_t user, bool val)
+int ipmi_set_gets_events(struct ipmi_user *user, bool val)
 {
 	unsigned long        flags;
-	ipmi_smi_t           intf = user->intf;
+	struct ipmi_smi      *intf = user->intf;
 	struct ipmi_recv_msg *msg, *msg2;
 	struct list_head     msgs;
+	int index;
+
+	user = acquire_ipmi_user(user, &index);
+	if (!user)
+		return -ENODEV;
 
 	INIT_LIST_HEAD(&msgs);
 
@@ -1383,7 +1485,7 @@
 		list_for_each_entry_safe(msg, msg2, &msgs, link) {
 			msg->user = user;
 			kref_get(&user->refcount);
-			deliver_response(msg);
+			deliver_local_response(intf, msg);
 		}
 
 		spin_lock_irqsave(&intf->events_lock, flags);
@@ -1392,12 +1494,13 @@
 
  out:
 	spin_unlock_irqrestore(&intf->events_lock, flags);
+	release_ipmi_user(user, index);
 
 	return 0;
 }
 EXPORT_SYMBOL(ipmi_set_gets_events);
 
-static struct cmd_rcvr *find_cmd_rcvr(ipmi_smi_t    intf,
+static struct cmd_rcvr *find_cmd_rcvr(struct ipmi_smi *intf,
 				      unsigned char netfn,
 				      unsigned char cmd,
 				      unsigned char chan)
@@ -1412,7 +1515,7 @@
 	return NULL;
 }
 
-static int is_cmd_rcvr_exclusive(ipmi_smi_t    intf,
+static int is_cmd_rcvr_exclusive(struct ipmi_smi *intf,
 				 unsigned char netfn,
 				 unsigned char cmd,
 				 unsigned int  chans)
@@ -1427,19 +1530,24 @@
 	return 1;
 }
 
-int ipmi_register_for_cmd(ipmi_user_t   user,
+int ipmi_register_for_cmd(struct ipmi_user *user,
 			  unsigned char netfn,
 			  unsigned char cmd,
 			  unsigned int  chans)
 {
-	ipmi_smi_t      intf = user->intf;
+	struct ipmi_smi *intf = user->intf;
 	struct cmd_rcvr *rcvr;
-	int             rv = 0;
+	int rv = 0, index;
 
+	user = acquire_ipmi_user(user, &index);
+	if (!user)
+		return -ENODEV;
 
 	rcvr = kmalloc(sizeof(*rcvr), GFP_KERNEL);
-	if (!rcvr)
-		return -ENOMEM;
+	if (!rcvr) {
+		rv = -ENOMEM;
+		goto out_release;
+	}
 	rcvr->cmd = cmd;
 	rcvr->netfn = netfn;
 	rcvr->chans = chans;
@@ -1457,24 +1565,30 @@
 
 	list_add_rcu(&rcvr->link, &intf->cmd_rcvrs);
 
- out_unlock:
+out_unlock:
 	mutex_unlock(&intf->cmd_rcvrs_mutex);
 	if (rv)
 		kfree(rcvr);
+out_release:
+	release_ipmi_user(user, index);
 
 	return rv;
 }
 EXPORT_SYMBOL(ipmi_register_for_cmd);
 
-int ipmi_unregister_for_cmd(ipmi_user_t   user,
+int ipmi_unregister_for_cmd(struct ipmi_user *user,
 			    unsigned char netfn,
 			    unsigned char cmd,
 			    unsigned int  chans)
 {
-	ipmi_smi_t      intf = user->intf;
+	struct ipmi_smi *intf = user->intf;
 	struct cmd_rcvr *rcvr;
 	struct cmd_rcvr *rcvrs = NULL;
-	int i, rv = -ENOENT;
+	int i, rv = -ENOENT, index;
+
+	user = acquire_ipmi_user(user, &index);
+	if (!user)
+		return -ENODEV;
 
 	mutex_lock(&intf->cmd_rcvrs_mutex);
 	for (i = 0; i < IPMI_NUM_CHANNELS; i++) {
@@ -1495,12 +1609,14 @@
 	}
 	mutex_unlock(&intf->cmd_rcvrs_mutex);
 	synchronize_rcu();
+	release_ipmi_user(user, index);
 	while (rcvrs) {
 		atomic_dec(&intf->event_waiters);
 		rcvr = rcvrs;
 		rcvrs = rcvr->next;
 		kfree(rcvr);
 	}
+
 	return rv;
 }
 EXPORT_SYMBOL(ipmi_unregister_for_cmd);
@@ -1535,21 +1651,19 @@
 		smi_msg->data[3] = 0;
 	smi_msg->data[i+3] = ipmb_addr->slave_addr;
 	smi_msg->data[i+4] = (msg->netfn << 2) | (ipmb_addr->lun & 0x3);
-	smi_msg->data[i+5] = ipmb_checksum(&(smi_msg->data[i+3]), 2);
+	smi_msg->data[i+5] = ipmb_checksum(&smi_msg->data[i + 3], 2);
 	smi_msg->data[i+6] = source_address;
 	smi_msg->data[i+7] = (ipmb_seq << 2) | source_lun;
 	smi_msg->data[i+8] = msg->cmd;
 
 	/* Now tack on the data to the message. */
 	if (msg->data_len > 0)
-		memcpy(&(smi_msg->data[i+9]), msg->data,
-		       msg->data_len);
+		memcpy(&smi_msg->data[i + 9], msg->data, msg->data_len);
 	smi_msg->data_size = msg->data_len + 9;
 
 	/* Now calculate the checksum and tack it on. */
 	smi_msg->data[i+smi_msg->data_size]
-		= ipmb_checksum(&(smi_msg->data[i+6]),
-				smi_msg->data_size-6);
+		= ipmb_checksum(&smi_msg->data[i + 6], smi_msg->data_size - 6);
 
 	/*
 	 * Add on the checksum size and the offset from the
@@ -1574,21 +1688,19 @@
 	smi_msg->data[3] = lan_addr->session_handle;
 	smi_msg->data[4] = lan_addr->remote_SWID;
 	smi_msg->data[5] = (msg->netfn << 2) | (lan_addr->lun & 0x3);
-	smi_msg->data[6] = ipmb_checksum(&(smi_msg->data[4]), 2);
+	smi_msg->data[6] = ipmb_checksum(&smi_msg->data[4], 2);
 	smi_msg->data[7] = lan_addr->local_SWID;
 	smi_msg->data[8] = (ipmb_seq << 2) | source_lun;
 	smi_msg->data[9] = msg->cmd;
 
 	/* Now tack on the data to the message. */
 	if (msg->data_len > 0)
-		memcpy(&(smi_msg->data[10]), msg->data,
-		       msg->data_len);
+		memcpy(&smi_msg->data[10], msg->data, msg->data_len);
 	smi_msg->data_size = msg->data_len + 10;
 
 	/* Now calculate the checksum and tack it on. */
 	smi_msg->data[smi_msg->data_size]
-		= ipmb_checksum(&(smi_msg->data[7]),
-				smi_msg->data_size-7);
+		= ipmb_checksum(&smi_msg->data[7], smi_msg->data_size - 7);
 
 	/*
 	 * Add on the checksum size and the offset from the
@@ -1599,7 +1711,7 @@
 	smi_msg->msgid = msgid;
 }
 
-static struct ipmi_smi_msg *smi_add_send_msg(ipmi_smi_t intf,
+static struct ipmi_smi_msg *smi_add_send_msg(struct ipmi_smi *intf,
 					     struct ipmi_smi_msg *smi_msg,
 					     int priority)
 {
@@ -1617,7 +1729,8 @@
 }
 
 
-static void smi_send(ipmi_smi_t intf, const struct ipmi_smi_handlers *handlers,
+static void smi_send(struct ipmi_smi *intf,
+		     const struct ipmi_smi_handlers *handlers,
 		     struct ipmi_smi_msg *smi_msg, int priority)
 {
 	int run_to_completion = intf->run_to_completion;
@@ -1636,14 +1749,348 @@
 		handlers->sender(intf->send_info, smi_msg);
 }
 
+static bool is_maintenance_mode_cmd(struct kernel_ipmi_msg *msg)
+{
+	return (((msg->netfn == IPMI_NETFN_APP_REQUEST)
+		 && ((msg->cmd == IPMI_COLD_RESET_CMD)
+		     || (msg->cmd == IPMI_WARM_RESET_CMD)))
+		|| (msg->netfn == IPMI_NETFN_FIRMWARE_REQUEST));
+}
+
+static int i_ipmi_req_sysintf(struct ipmi_smi        *intf,
+			      struct ipmi_addr       *addr,
+			      long                   msgid,
+			      struct kernel_ipmi_msg *msg,
+			      struct ipmi_smi_msg    *smi_msg,
+			      struct ipmi_recv_msg   *recv_msg,
+			      int                    retries,
+			      unsigned int           retry_time_ms)
+{
+	struct ipmi_system_interface_addr *smi_addr;
+
+	if (msg->netfn & 1)
+		/* Responses are not allowed to the SMI. */
+		return -EINVAL;
+
+	smi_addr = (struct ipmi_system_interface_addr *) addr;
+	if (smi_addr->lun > 3) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EINVAL;
+	}
+
+	memcpy(&recv_msg->addr, smi_addr, sizeof(*smi_addr));
+
+	if ((msg->netfn == IPMI_NETFN_APP_REQUEST)
+	    && ((msg->cmd == IPMI_SEND_MSG_CMD)
+		|| (msg->cmd == IPMI_GET_MSG_CMD)
+		|| (msg->cmd == IPMI_READ_EVENT_MSG_BUFFER_CMD))) {
+		/*
+		 * We don't let the user do these, since we manage
+		 * the sequence numbers.
+		 */
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EINVAL;
+	}
+
+	if (is_maintenance_mode_cmd(msg)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&intf->maintenance_mode_lock, flags);
+		intf->auto_maintenance_timeout
+			= maintenance_mode_timeout_ms;
+		if (!intf->maintenance_mode
+		    && !intf->maintenance_mode_enable) {
+			intf->maintenance_mode_enable = true;
+			maintenance_mode_update(intf);
+		}
+		spin_unlock_irqrestore(&intf->maintenance_mode_lock,
+				       flags);
+	}
+
+	if (msg->data_len + 2 > IPMI_MAX_MSG_LENGTH) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EMSGSIZE;
+	}
+
+	smi_msg->data[0] = (msg->netfn << 2) | (smi_addr->lun & 0x3);
+	smi_msg->data[1] = msg->cmd;
+	smi_msg->msgid = msgid;
+	smi_msg->user_data = recv_msg;
+	if (msg->data_len > 0)
+		memcpy(&smi_msg->data[2], msg->data, msg->data_len);
+	smi_msg->data_size = msg->data_len + 2;
+	ipmi_inc_stat(intf, sent_local_commands);
+
+	return 0;
+}
+
+static int i_ipmi_req_ipmb(struct ipmi_smi        *intf,
+			   struct ipmi_addr       *addr,
+			   long                   msgid,
+			   struct kernel_ipmi_msg *msg,
+			   struct ipmi_smi_msg    *smi_msg,
+			   struct ipmi_recv_msg   *recv_msg,
+			   unsigned char          source_address,
+			   unsigned char          source_lun,
+			   int                    retries,
+			   unsigned int           retry_time_ms)
+{
+	struct ipmi_ipmb_addr *ipmb_addr;
+	unsigned char ipmb_seq;
+	long seqid;
+	int broadcast = 0;
+	struct ipmi_channel *chans;
+	int rv = 0;
+
+	if (addr->channel >= IPMI_MAX_CHANNELS) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EINVAL;
+	}
+
+	chans = READ_ONCE(intf->channel_list)->c;
+
+	if (chans[addr->channel].medium != IPMI_CHANNEL_MEDIUM_IPMB) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EINVAL;
+	}
+
+	if (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE) {
+		/*
+		 * Broadcasts add a zero at the beginning of the
+		 * message, but otherwise is the same as an IPMB
+		 * address.
+		 */
+		addr->addr_type = IPMI_IPMB_ADDR_TYPE;
+		broadcast = 1;
+		retries = 0; /* Don't retry broadcasts. */
+	}
+
+	/*
+	 * 9 for the header and 1 for the checksum, plus
+	 * possibly one for the broadcast.
+	 */
+	if ((msg->data_len + 10 + broadcast) > IPMI_MAX_MSG_LENGTH) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EMSGSIZE;
+	}
+
+	ipmb_addr = (struct ipmi_ipmb_addr *) addr;
+	if (ipmb_addr->lun > 3) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EINVAL;
+	}
+
+	memcpy(&recv_msg->addr, ipmb_addr, sizeof(*ipmb_addr));
+
+	if (recv_msg->msg.netfn & 0x1) {
+		/*
+		 * It's a response, so use the user's sequence
+		 * from msgid.
+		 */
+		ipmi_inc_stat(intf, sent_ipmb_responses);
+		format_ipmb_msg(smi_msg, msg, ipmb_addr, msgid,
+				msgid, broadcast,
+				source_address, source_lun);
+
+		/*
+		 * Save the receive message so we can use it
+		 * to deliver the response.
+		 */
+		smi_msg->user_data = recv_msg;
+	} else {
+		/* It's a command, so get a sequence for it. */
+		unsigned long flags;
+
+		spin_lock_irqsave(&intf->seq_lock, flags);
+
+		if (is_maintenance_mode_cmd(msg))
+			intf->ipmb_maintenance_mode_timeout =
+				maintenance_mode_timeout_ms;
+
+		if (intf->ipmb_maintenance_mode_timeout && retry_time_ms == 0)
+			/* Different default in maintenance mode */
+			retry_time_ms = default_maintenance_retry_ms;
+
+		/*
+		 * Create a sequence number with a 1 second
+		 * timeout and 4 retries.
+		 */
+		rv = intf_next_seq(intf,
+				   recv_msg,
+				   retry_time_ms,
+				   retries,
+				   broadcast,
+				   &ipmb_seq,
+				   &seqid);
+		if (rv)
+			/*
+			 * We have used up all the sequence numbers,
+			 * probably, so abort.
+			 */
+			goto out_err;
+
+		ipmi_inc_stat(intf, sent_ipmb_commands);
+
+		/*
+		 * Store the sequence number in the message,
+		 * so that when the send message response
+		 * comes back we can start the timer.
+		 */
+		format_ipmb_msg(smi_msg, msg, ipmb_addr,
+				STORE_SEQ_IN_MSGID(ipmb_seq, seqid),
+				ipmb_seq, broadcast,
+				source_address, source_lun);
+
+		/*
+		 * Copy the message into the recv message data, so we
+		 * can retransmit it later if necessary.
+		 */
+		memcpy(recv_msg->msg_data, smi_msg->data,
+		       smi_msg->data_size);
+		recv_msg->msg.data = recv_msg->msg_data;
+		recv_msg->msg.data_len = smi_msg->data_size;
+
+		/*
+		 * We don't unlock until here, because we need
+		 * to copy the completed message into the
+		 * recv_msg before we release the lock.
+		 * Otherwise, race conditions may bite us.  I
+		 * know that's pretty paranoid, but I prefer
+		 * to be correct.
+		 */
+out_err:
+		spin_unlock_irqrestore(&intf->seq_lock, flags);
+	}
+
+	return rv;
+}
+
+static int i_ipmi_req_lan(struct ipmi_smi        *intf,
+			  struct ipmi_addr       *addr,
+			  long                   msgid,
+			  struct kernel_ipmi_msg *msg,
+			  struct ipmi_smi_msg    *smi_msg,
+			  struct ipmi_recv_msg   *recv_msg,
+			  unsigned char          source_lun,
+			  int                    retries,
+			  unsigned int           retry_time_ms)
+{
+	struct ipmi_lan_addr  *lan_addr;
+	unsigned char ipmb_seq;
+	long seqid;
+	struct ipmi_channel *chans;
+	int rv = 0;
+
+	if (addr->channel >= IPMI_MAX_CHANNELS) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EINVAL;
+	}
+
+	chans = READ_ONCE(intf->channel_list)->c;
+
+	if ((chans[addr->channel].medium
+				!= IPMI_CHANNEL_MEDIUM_8023LAN)
+			&& (chans[addr->channel].medium
+			    != IPMI_CHANNEL_MEDIUM_ASYNC)) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EINVAL;
+	}
+
+	/* 11 for the header and 1 for the checksum. */
+	if ((msg->data_len + 12) > IPMI_MAX_MSG_LENGTH) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EMSGSIZE;
+	}
+
+	lan_addr = (struct ipmi_lan_addr *) addr;
+	if (lan_addr->lun > 3) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EINVAL;
+	}
+
+	memcpy(&recv_msg->addr, lan_addr, sizeof(*lan_addr));
+
+	if (recv_msg->msg.netfn & 0x1) {
+		/*
+		 * It's a response, so use the user's sequence
+		 * from msgid.
+		 */
+		ipmi_inc_stat(intf, sent_lan_responses);
+		format_lan_msg(smi_msg, msg, lan_addr, msgid,
+			       msgid, source_lun);
+
+		/*
+		 * Save the receive message so we can use it
+		 * to deliver the response.
+		 */
+		smi_msg->user_data = recv_msg;
+	} else {
+		/* It's a command, so get a sequence for it. */
+		unsigned long flags;
+
+		spin_lock_irqsave(&intf->seq_lock, flags);
+
+		/*
+		 * Create a sequence number with a 1 second
+		 * timeout and 4 retries.
+		 */
+		rv = intf_next_seq(intf,
+				   recv_msg,
+				   retry_time_ms,
+				   retries,
+				   0,
+				   &ipmb_seq,
+				   &seqid);
+		if (rv)
+			/*
+			 * We have used up all the sequence numbers,
+			 * probably, so abort.
+			 */
+			goto out_err;
+
+		ipmi_inc_stat(intf, sent_lan_commands);
+
+		/*
+		 * Store the sequence number in the message,
+		 * so that when the send message response
+		 * comes back we can start the timer.
+		 */
+		format_lan_msg(smi_msg, msg, lan_addr,
+			       STORE_SEQ_IN_MSGID(ipmb_seq, seqid),
+			       ipmb_seq, source_lun);
+
+		/*
+		 * Copy the message into the recv message data, so we
+		 * can retransmit it later if necessary.
+		 */
+		memcpy(recv_msg->msg_data, smi_msg->data,
+		       smi_msg->data_size);
+		recv_msg->msg.data = recv_msg->msg_data;
+		recv_msg->msg.data_len = smi_msg->data_size;
+
+		/*
+		 * We don't unlock until here, because we need
+		 * to copy the completed message into the
+		 * recv_msg before we release the lock.
+		 * Otherwise, race conditions may bite us.  I
+		 * know that's pretty paranoid, but I prefer
+		 * to be correct.
+		 */
+out_err:
+		spin_unlock_irqrestore(&intf->seq_lock, flags);
+	}
+
+	return rv;
+}
+
 /*
  * Separate from ipmi_request so that the user does not have to be
  * supplied in certain circumstances (mainly at panic time).  If
  * messages are supplied, they will be freed, even if an error
  * occurs.
  */
-static int i_ipmi_request(ipmi_user_t          user,
-			  ipmi_smi_t           intf,
+static int i_ipmi_request(struct ipmi_user     *user,
+			  struct ipmi_smi      *intf,
 			  struct ipmi_addr     *addr,
 			  long                 msgid,
 			  struct kernel_ipmi_msg *msg,
@@ -1656,18 +2103,18 @@
 			  int                  retries,
 			  unsigned int         retry_time_ms)
 {
-	int                      rv = 0;
-	struct ipmi_smi_msg      *smi_msg;
-	struct ipmi_recv_msg     *recv_msg;
-	unsigned long            flags;
-
+	struct ipmi_smi_msg *smi_msg;
+	struct ipmi_recv_msg *recv_msg;
+	int rv = 0;
 
 	if (supplied_recv)
 		recv_msg = supplied_recv;
 	else {
 		recv_msg = ipmi_alloc_recv_msg();
-		if (recv_msg == NULL)
-			return -ENOMEM;
+		if (recv_msg == NULL) {
+			rv = -ENOMEM;
+			goto out;
+		}
 	}
 	recv_msg->user_msg_data = user_msg_data;
 
@@ -1677,7 +2124,8 @@
 		smi_msg = ipmi_alloc_smi_msg();
 		if (smi_msg == NULL) {
 			ipmi_free_recv_msg(recv_msg);
-			return -ENOMEM;
+			rv = -ENOMEM;
+			goto out;
 		}
 	}
 
@@ -1689,6 +2137,7 @@
 
 	recv_msg->user = user;
 	if (user)
+		/* The put happens when the message is freed. */
 		kref_get(&user->refcount);
 	recv_msg->msgid = msgid;
 	/*
@@ -1698,343 +2147,37 @@
 	recv_msg->msg = *msg;
 
 	if (addr->addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) {
-		struct ipmi_system_interface_addr *smi_addr;
-
-		if (msg->netfn & 1) {
-			/* Responses are not allowed to the SMI. */
-			rv = -EINVAL;
-			goto out_err;
-		}
-
-		smi_addr = (struct ipmi_system_interface_addr *) addr;
-		if (smi_addr->lun > 3) {
-			ipmi_inc_stat(intf, sent_invalid_commands);
-			rv = -EINVAL;
-			goto out_err;
-		}
-
-		memcpy(&recv_msg->addr, smi_addr, sizeof(*smi_addr));
-
-		if ((msg->netfn == IPMI_NETFN_APP_REQUEST)
-		    && ((msg->cmd == IPMI_SEND_MSG_CMD)
-			|| (msg->cmd == IPMI_GET_MSG_CMD)
-			|| (msg->cmd == IPMI_READ_EVENT_MSG_BUFFER_CMD))) {
-			/*
-			 * We don't let the user do these, since we manage
-			 * the sequence numbers.
-			 */
-			ipmi_inc_stat(intf, sent_invalid_commands);
-			rv = -EINVAL;
-			goto out_err;
-		}
-
-		if (((msg->netfn == IPMI_NETFN_APP_REQUEST)
-		      && ((msg->cmd == IPMI_COLD_RESET_CMD)
-			  || (msg->cmd == IPMI_WARM_RESET_CMD)))
-		     || (msg->netfn == IPMI_NETFN_FIRMWARE_REQUEST)) {
-			spin_lock_irqsave(&intf->maintenance_mode_lock, flags);
-			intf->auto_maintenance_timeout
-				= IPMI_MAINTENANCE_MODE_TIMEOUT;
-			if (!intf->maintenance_mode
-			    && !intf->maintenance_mode_enable) {
-				intf->maintenance_mode_enable = true;
-				maintenance_mode_update(intf);
-			}
-			spin_unlock_irqrestore(&intf->maintenance_mode_lock,
-					       flags);
-		}
-
-		if ((msg->data_len + 2) > IPMI_MAX_MSG_LENGTH) {
-			ipmi_inc_stat(intf, sent_invalid_commands);
-			rv = -EMSGSIZE;
-			goto out_err;
-		}
-
-		smi_msg->data[0] = (msg->netfn << 2) | (smi_addr->lun & 0x3);
-		smi_msg->data[1] = msg->cmd;
-		smi_msg->msgid = msgid;
-		smi_msg->user_data = recv_msg;
-		if (msg->data_len > 0)
-			memcpy(&(smi_msg->data[2]), msg->data, msg->data_len);
-		smi_msg->data_size = msg->data_len + 2;
-		ipmi_inc_stat(intf, sent_local_commands);
+		rv = i_ipmi_req_sysintf(intf, addr, msgid, msg, smi_msg,
+					recv_msg, retries, retry_time_ms);
 	} else if (is_ipmb_addr(addr) || is_ipmb_bcast_addr(addr)) {
-		struct ipmi_ipmb_addr *ipmb_addr;
-		unsigned char         ipmb_seq;
-		long                  seqid;
-		int                   broadcast = 0;
-		struct ipmi_channel   *chans;
-
-		if (addr->channel >= IPMI_MAX_CHANNELS) {
-			ipmi_inc_stat(intf, sent_invalid_commands);
-			rv = -EINVAL;
-			goto out_err;
-		}
-
-		chans = READ_ONCE(intf->channel_list)->c;
-
-		if (chans[addr->channel].medium != IPMI_CHANNEL_MEDIUM_IPMB) {
-			ipmi_inc_stat(intf, sent_invalid_commands);
-			rv = -EINVAL;
-			goto out_err;
-		}
-
-		if (retries < 0) {
-		    if (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)
-			retries = 0; /* Don't retry broadcasts. */
-		    else
-			retries = 4;
-		}
-		if (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE) {
-		    /*
-		     * Broadcasts add a zero at the beginning of the
-		     * message, but otherwise is the same as an IPMB
-		     * address.
-		     */
-		    addr->addr_type = IPMI_IPMB_ADDR_TYPE;
-		    broadcast = 1;
-		}
-
-
-		/* Default to 1 second retries. */
-		if (retry_time_ms == 0)
-		    retry_time_ms = 1000;
-
-		/*
-		 * 9 for the header and 1 for the checksum, plus
-		 * possibly one for the broadcast.
-		 */
-		if ((msg->data_len + 10 + broadcast) > IPMI_MAX_MSG_LENGTH) {
-			ipmi_inc_stat(intf, sent_invalid_commands);
-			rv = -EMSGSIZE;
-			goto out_err;
-		}
-
-		ipmb_addr = (struct ipmi_ipmb_addr *) addr;
-		if (ipmb_addr->lun > 3) {
-			ipmi_inc_stat(intf, sent_invalid_commands);
-			rv = -EINVAL;
-			goto out_err;
-		}
-
-		memcpy(&recv_msg->addr, ipmb_addr, sizeof(*ipmb_addr));
-
-		if (recv_msg->msg.netfn & 0x1) {
-			/*
-			 * It's a response, so use the user's sequence
-			 * from msgid.
-			 */
-			ipmi_inc_stat(intf, sent_ipmb_responses);
-			format_ipmb_msg(smi_msg, msg, ipmb_addr, msgid,
-					msgid, broadcast,
-					source_address, source_lun);
-
-			/*
-			 * Save the receive message so we can use it
-			 * to deliver the response.
-			 */
-			smi_msg->user_data = recv_msg;
-		} else {
-			/* It's a command, so get a sequence for it. */
-
-			spin_lock_irqsave(&(intf->seq_lock), flags);
-
-			/*
-			 * Create a sequence number with a 1 second
-			 * timeout and 4 retries.
-			 */
-			rv = intf_next_seq(intf,
-					   recv_msg,
-					   retry_time_ms,
-					   retries,
-					   broadcast,
-					   &ipmb_seq,
-					   &seqid);
-			if (rv) {
-				/*
-				 * We have used up all the sequence numbers,
-				 * probably, so abort.
-				 */
-				spin_unlock_irqrestore(&(intf->seq_lock),
-						       flags);
-				goto out_err;
-			}
-
-			ipmi_inc_stat(intf, sent_ipmb_commands);
-
-			/*
-			 * Store the sequence number in the message,
-			 * so that when the send message response
-			 * comes back we can start the timer.
-			 */
-			format_ipmb_msg(smi_msg, msg, ipmb_addr,
-					STORE_SEQ_IN_MSGID(ipmb_seq, seqid),
-					ipmb_seq, broadcast,
-					source_address, source_lun);
-
-			/*
-			 * Copy the message into the recv message data, so we
-			 * can retransmit it later if necessary.
-			 */
-			memcpy(recv_msg->msg_data, smi_msg->data,
-			       smi_msg->data_size);
-			recv_msg->msg.data = recv_msg->msg_data;
-			recv_msg->msg.data_len = smi_msg->data_size;
-
-			/*
-			 * We don't unlock until here, because we need
-			 * to copy the completed message into the
-			 * recv_msg before we release the lock.
-			 * Otherwise, race conditions may bite us.  I
-			 * know that's pretty paranoid, but I prefer
-			 * to be correct.
-			 */
-			spin_unlock_irqrestore(&(intf->seq_lock), flags);
-		}
+		rv = i_ipmi_req_ipmb(intf, addr, msgid, msg, smi_msg, recv_msg,
+				     source_address, source_lun,
+				     retries, retry_time_ms);
 	} else if (is_lan_addr(addr)) {
-		struct ipmi_lan_addr  *lan_addr;
-		unsigned char         ipmb_seq;
-		long                  seqid;
-		struct ipmi_channel   *chans;
-
-		if (addr->channel >= IPMI_MAX_CHANNELS) {
-			ipmi_inc_stat(intf, sent_invalid_commands);
-			rv = -EINVAL;
-			goto out_err;
-		}
-
-		chans = READ_ONCE(intf->channel_list)->c;
-
-		if ((chans[addr->channel].medium
-				!= IPMI_CHANNEL_MEDIUM_8023LAN)
-		    && (chans[addr->channel].medium
-				!= IPMI_CHANNEL_MEDIUM_ASYNC)) {
-			ipmi_inc_stat(intf, sent_invalid_commands);
-			rv = -EINVAL;
-			goto out_err;
-		}
-
-		retries = 4;
-
-		/* Default to 1 second retries. */
-		if (retry_time_ms == 0)
-		    retry_time_ms = 1000;
-
-		/* 11 for the header and 1 for the checksum. */
-		if ((msg->data_len + 12) > IPMI_MAX_MSG_LENGTH) {
-			ipmi_inc_stat(intf, sent_invalid_commands);
-			rv = -EMSGSIZE;
-			goto out_err;
-		}
-
-		lan_addr = (struct ipmi_lan_addr *) addr;
-		if (lan_addr->lun > 3) {
-			ipmi_inc_stat(intf, sent_invalid_commands);
-			rv = -EINVAL;
-			goto out_err;
-		}
-
-		memcpy(&recv_msg->addr, lan_addr, sizeof(*lan_addr));
-
-		if (recv_msg->msg.netfn & 0x1) {
-			/*
-			 * It's a response, so use the user's sequence
-			 * from msgid.
-			 */
-			ipmi_inc_stat(intf, sent_lan_responses);
-			format_lan_msg(smi_msg, msg, lan_addr, msgid,
-				       msgid, source_lun);
-
-			/*
-			 * Save the receive message so we can use it
-			 * to deliver the response.
-			 */
-			smi_msg->user_data = recv_msg;
-		} else {
-			/* It's a command, so get a sequence for it. */
-
-			spin_lock_irqsave(&(intf->seq_lock), flags);
-
-			/*
-			 * Create a sequence number with a 1 second
-			 * timeout and 4 retries.
-			 */
-			rv = intf_next_seq(intf,
-					   recv_msg,
-					   retry_time_ms,
-					   retries,
-					   0,
-					   &ipmb_seq,
-					   &seqid);
-			if (rv) {
-				/*
-				 * We have used up all the sequence numbers,
-				 * probably, so abort.
-				 */
-				spin_unlock_irqrestore(&(intf->seq_lock),
-						       flags);
-				goto out_err;
-			}
-
-			ipmi_inc_stat(intf, sent_lan_commands);
-
-			/*
-			 * Store the sequence number in the message,
-			 * so that when the send message response
-			 * comes back we can start the timer.
-			 */
-			format_lan_msg(smi_msg, msg, lan_addr,
-				       STORE_SEQ_IN_MSGID(ipmb_seq, seqid),
-				       ipmb_seq, source_lun);
-
-			/*
-			 * Copy the message into the recv message data, so we
-			 * can retransmit it later if necessary.
-			 */
-			memcpy(recv_msg->msg_data, smi_msg->data,
-			       smi_msg->data_size);
-			recv_msg->msg.data = recv_msg->msg_data;
-			recv_msg->msg.data_len = smi_msg->data_size;
-
-			/*
-			 * We don't unlock until here, because we need
-			 * to copy the completed message into the
-			 * recv_msg before we release the lock.
-			 * Otherwise, race conditions may bite us.  I
-			 * know that's pretty paranoid, but I prefer
-			 * to be correct.
-			 */
-			spin_unlock_irqrestore(&(intf->seq_lock), flags);
-		}
+		rv = i_ipmi_req_lan(intf, addr, msgid, msg, smi_msg, recv_msg,
+				    source_lun, retries, retry_time_ms);
 	} else {
 	    /* Unknown address type. */
 		ipmi_inc_stat(intf, sent_invalid_commands);
 		rv = -EINVAL;
-		goto out_err;
 	}
 
-#ifdef DEBUG_MSGING
-	{
-		int m;
-		for (m = 0; m < smi_msg->data_size; m++)
-			printk(" %2.2x", smi_msg->data[m]);
-		printk("\n");
+	if (rv) {
+out_err:
+		ipmi_free_smi_msg(smi_msg);
+		ipmi_free_recv_msg(recv_msg);
+	} else {
+		ipmi_debug_msg("Send", smi_msg->data, smi_msg->data_size);
+
+		smi_send(intf, intf->handlers, smi_msg, priority);
 	}
-#endif
-
-	smi_send(intf, intf->handlers, smi_msg, priority);
 	rcu_read_unlock();
 
-	return 0;
-
- out_err:
-	rcu_read_unlock();
-	ipmi_free_smi_msg(smi_msg);
-	ipmi_free_recv_msg(recv_msg);
+out:
 	return rv;
 }
 
-static int check_addr(ipmi_smi_t       intf,
+static int check_addr(struct ipmi_smi  *intf,
 		      struct ipmi_addr *addr,
 		      unsigned char    *saddr,
 		      unsigned char    *lun)
@@ -2046,7 +2189,7 @@
 	return 0;
 }
 
-int ipmi_request_settime(ipmi_user_t      user,
+int ipmi_request_settime(struct ipmi_user *user,
 			 struct ipmi_addr *addr,
 			 long             msgid,
 			 struct kernel_ipmi_msg  *msg,
@@ -2056,29 +2199,36 @@
 			 unsigned int     retry_time_ms)
 {
 	unsigned char saddr = 0, lun = 0;
-	int           rv;
+	int rv, index;
 
 	if (!user)
 		return -EINVAL;
+
+	user = acquire_ipmi_user(user, &index);
+	if (!user)
+		return -ENODEV;
+
 	rv = check_addr(user->intf, addr, &saddr, &lun);
-	if (rv)
-		return rv;
-	return i_ipmi_request(user,
-			      user->intf,
-			      addr,
-			      msgid,
-			      msg,
-			      user_msg_data,
-			      NULL, NULL,
-			      priority,
-			      saddr,
-			      lun,
-			      retries,
-			      retry_time_ms);
+	if (!rv)
+		rv = i_ipmi_request(user,
+				    user->intf,
+				    addr,
+				    msgid,
+				    msg,
+				    user_msg_data,
+				    NULL, NULL,
+				    priority,
+				    saddr,
+				    lun,
+				    retries,
+				    retry_time_ms);
+
+	release_ipmi_user(user, index);
+	return rv;
 }
 EXPORT_SYMBOL(ipmi_request_settime);
 
-int ipmi_request_supply_msgs(ipmi_user_t          user,
+int ipmi_request_supply_msgs(struct ipmi_user     *user,
 			     struct ipmi_addr     *addr,
 			     long                 msgid,
 			     struct kernel_ipmi_msg *msg,
@@ -2088,29 +2238,37 @@
 			     int                  priority)
 {
 	unsigned char saddr = 0, lun = 0;
-	int           rv;
+	int rv, index;
 
 	if (!user)
 		return -EINVAL;
+
+	user = acquire_ipmi_user(user, &index);
+	if (!user)
+		return -ENODEV;
+
 	rv = check_addr(user->intf, addr, &saddr, &lun);
-	if (rv)
-		return rv;
-	return i_ipmi_request(user,
-			      user->intf,
-			      addr,
-			      msgid,
-			      msg,
-			      user_msg_data,
-			      supplied_smi,
-			      supplied_recv,
-			      priority,
-			      saddr,
-			      lun,
-			      -1, 0);
+	if (!rv)
+		rv = i_ipmi_request(user,
+				    user->intf,
+				    addr,
+				    msgid,
+				    msg,
+				    user_msg_data,
+				    supplied_smi,
+				    supplied_recv,
+				    priority,
+				    saddr,
+				    lun,
+				    -1, 0);
+
+	release_ipmi_user(user, index);
+	return rv;
 }
 EXPORT_SYMBOL(ipmi_request_supply_msgs);
 
-static void bmc_device_id_handler(ipmi_smi_t intf, struct ipmi_recv_msg *msg)
+static void bmc_device_id_handler(struct ipmi_smi *intf,
+				  struct ipmi_recv_msg *msg)
 {
 	int rv;
 
@@ -2142,7 +2300,7 @@
 }
 
 static int
-send_get_device_id_cmd(ipmi_smi_t intf)
+send_get_device_id_cmd(struct ipmi_smi *intf)
 {
 	struct ipmi_system_interface_addr si;
 	struct kernel_ipmi_msg msg;
@@ -2170,7 +2328,7 @@
 			      -1, 0);
 }
 
-static int __get_device_id(ipmi_smi_t intf, struct bmc_device *bmc)
+static int __get_device_id(struct ipmi_smi *intf, struct bmc_device *bmc)
 {
 	int rv;
 
@@ -2204,7 +2362,7 @@
  * Except for the first time this is called (in ipmi_register_smi()),
  * this will always return good data;
  */
-static int __bmc_get_device_id(ipmi_smi_t intf, struct bmc_device *bmc,
+static int __bmc_get_device_id(struct ipmi_smi *intf, struct bmc_device *bmc,
 			       struct ipmi_device_id *id,
 			       bool *guid_set, guid_t *guid, int intf_num)
 {
@@ -2337,223 +2495,13 @@
 	return rv;
 }
 
-static int bmc_get_device_id(ipmi_smi_t intf, struct bmc_device *bmc,
+static int bmc_get_device_id(struct ipmi_smi *intf, struct bmc_device *bmc,
 			     struct ipmi_device_id *id,
 			     bool *guid_set, guid_t *guid)
 {
 	return __bmc_get_device_id(intf, bmc, id, guid_set, guid, -1);
 }
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-static int smi_ipmb_proc_show(struct seq_file *m, void *v)
-{
-	ipmi_smi_t intf = m->private;
-	int        i;
-
-	seq_printf(m, "%x", intf->addrinfo[0].address);
-	for (i = 1; i < IPMI_MAX_CHANNELS; i++)
-		seq_printf(m, " %x", intf->addrinfo[i].address);
-	seq_putc(m, '\n');
-
-	return 0;
-}
-
-static int smi_ipmb_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, smi_ipmb_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations smi_ipmb_proc_ops = {
-	.open		= smi_ipmb_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int smi_version_proc_show(struct seq_file *m, void *v)
-{
-	ipmi_smi_t intf = m->private;
-	struct ipmi_device_id id;
-	int rv;
-
-	rv = bmc_get_device_id(intf, NULL, &id, NULL, NULL);
-	if (rv)
-		return rv;
-
-	seq_printf(m, "%u.%u\n",
-		   ipmi_version_major(&id),
-		   ipmi_version_minor(&id));
-
-	return 0;
-}
-
-static int smi_version_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, smi_version_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations smi_version_proc_ops = {
-	.open		= smi_version_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int smi_stats_proc_show(struct seq_file *m, void *v)
-{
-	ipmi_smi_t intf = m->private;
-
-	seq_printf(m, "sent_invalid_commands:       %u\n",
-		       ipmi_get_stat(intf, sent_invalid_commands));
-	seq_printf(m, "sent_local_commands:         %u\n",
-		       ipmi_get_stat(intf, sent_local_commands));
-	seq_printf(m, "handled_local_responses:     %u\n",
-		       ipmi_get_stat(intf, handled_local_responses));
-	seq_printf(m, "unhandled_local_responses:   %u\n",
-		       ipmi_get_stat(intf, unhandled_local_responses));
-	seq_printf(m, "sent_ipmb_commands:          %u\n",
-		       ipmi_get_stat(intf, sent_ipmb_commands));
-	seq_printf(m, "sent_ipmb_command_errs:      %u\n",
-		       ipmi_get_stat(intf, sent_ipmb_command_errs));
-	seq_printf(m, "retransmitted_ipmb_commands: %u\n",
-		       ipmi_get_stat(intf, retransmitted_ipmb_commands));
-	seq_printf(m, "timed_out_ipmb_commands:     %u\n",
-		       ipmi_get_stat(intf, timed_out_ipmb_commands));
-	seq_printf(m, "timed_out_ipmb_broadcasts:   %u\n",
-		       ipmi_get_stat(intf, timed_out_ipmb_broadcasts));
-	seq_printf(m, "sent_ipmb_responses:         %u\n",
-		       ipmi_get_stat(intf, sent_ipmb_responses));
-	seq_printf(m, "handled_ipmb_responses:      %u\n",
-		       ipmi_get_stat(intf, handled_ipmb_responses));
-	seq_printf(m, "invalid_ipmb_responses:      %u\n",
-		       ipmi_get_stat(intf, invalid_ipmb_responses));
-	seq_printf(m, "unhandled_ipmb_responses:    %u\n",
-		       ipmi_get_stat(intf, unhandled_ipmb_responses));
-	seq_printf(m, "sent_lan_commands:           %u\n",
-		       ipmi_get_stat(intf, sent_lan_commands));
-	seq_printf(m, "sent_lan_command_errs:       %u\n",
-		       ipmi_get_stat(intf, sent_lan_command_errs));
-	seq_printf(m, "retransmitted_lan_commands:  %u\n",
-		       ipmi_get_stat(intf, retransmitted_lan_commands));
-	seq_printf(m, "timed_out_lan_commands:      %u\n",
-		       ipmi_get_stat(intf, timed_out_lan_commands));
-	seq_printf(m, "sent_lan_responses:          %u\n",
-		       ipmi_get_stat(intf, sent_lan_responses));
-	seq_printf(m, "handled_lan_responses:       %u\n",
-		       ipmi_get_stat(intf, handled_lan_responses));
-	seq_printf(m, "invalid_lan_responses:       %u\n",
-		       ipmi_get_stat(intf, invalid_lan_responses));
-	seq_printf(m, "unhandled_lan_responses:     %u\n",
-		       ipmi_get_stat(intf, unhandled_lan_responses));
-	seq_printf(m, "handled_commands:            %u\n",
-		       ipmi_get_stat(intf, handled_commands));
-	seq_printf(m, "invalid_commands:            %u\n",
-		       ipmi_get_stat(intf, invalid_commands));
-	seq_printf(m, "unhandled_commands:          %u\n",
-		       ipmi_get_stat(intf, unhandled_commands));
-	seq_printf(m, "invalid_events:              %u\n",
-		       ipmi_get_stat(intf, invalid_events));
-	seq_printf(m, "events:                      %u\n",
-		       ipmi_get_stat(intf, events));
-	seq_printf(m, "failed rexmit LAN msgs:      %u\n",
-		       ipmi_get_stat(intf, dropped_rexmit_lan_commands));
-	seq_printf(m, "failed rexmit IPMB msgs:     %u\n",
-		       ipmi_get_stat(intf, dropped_rexmit_ipmb_commands));
-	return 0;
-}
-
-static int smi_stats_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, smi_stats_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations smi_stats_proc_ops = {
-	.open		= smi_stats_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name,
-			    const struct file_operations *proc_ops,
-			    void *data)
-{
-	int                    rv = 0;
-	struct proc_dir_entry  *file;
-	struct ipmi_proc_entry *entry;
-
-	/* Create a list element. */
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		return -ENOMEM;
-	entry->name = kstrdup(name, GFP_KERNEL);
-	if (!entry->name) {
-		kfree(entry);
-		return -ENOMEM;
-	}
-
-	file = proc_create_data(name, 0, smi->proc_dir, proc_ops, data);
-	if (!file) {
-		kfree(entry->name);
-		kfree(entry);
-		rv = -ENOMEM;
-	} else {
-		mutex_lock(&smi->proc_entry_lock);
-		/* Stick it on the list. */
-		entry->next = smi->proc_entries;
-		smi->proc_entries = entry;
-		mutex_unlock(&smi->proc_entry_lock);
-	}
-
-	return rv;
-}
-EXPORT_SYMBOL(ipmi_smi_add_proc_entry);
-
-static int add_proc_entries(ipmi_smi_t smi, int num)
-{
-	int rv = 0;
-
-	sprintf(smi->proc_dir_name, "%d", num);
-	smi->proc_dir = proc_mkdir(smi->proc_dir_name, proc_ipmi_root);
-	if (!smi->proc_dir)
-		rv = -ENOMEM;
-
-	if (rv == 0)
-		rv = ipmi_smi_add_proc_entry(smi, "stats",
-					     &smi_stats_proc_ops,
-					     smi);
-
-	if (rv == 0)
-		rv = ipmi_smi_add_proc_entry(smi, "ipmb",
-					     &smi_ipmb_proc_ops,
-					     smi);
-
-	if (rv == 0)
-		rv = ipmi_smi_add_proc_entry(smi, "version",
-					     &smi_version_proc_ops,
-					     smi);
-
-	return rv;
-}
-
-static void remove_proc_entries(ipmi_smi_t smi)
-{
-	struct ipmi_proc_entry *entry;
-
-	mutex_lock(&smi->proc_entry_lock);
-	while (smi->proc_entries) {
-		entry = smi->proc_entries;
-		smi->proc_entries = entry->next;
-
-		remove_proc_entry(entry->name, smi->proc_dir);
-		kfree(entry->name);
-		kfree(entry);
-	}
-	mutex_unlock(&smi->proc_entry_lock);
-	remove_proc_entry(smi->proc_dir_name, proc_ipmi_root);
-}
-#endif /* CONFIG_IPMI_PROC_INTERFACE */
-
 static ssize_t device_id_show(struct device *dev,
 			      struct device_attribute *attr,
 			      char *buf)
@@ -2885,7 +2833,7 @@
 /*
  * Must be called with intf->bmc_reg_mutex held.
  */
-static void __ipmi_bmc_unregister(ipmi_smi_t intf)
+static void __ipmi_bmc_unregister(struct ipmi_smi *intf)
 {
 	struct bmc_device *bmc = intf->bmc;
 
@@ -2905,7 +2853,7 @@
 	intf->bmc_registered = false;
 }
 
-static void ipmi_bmc_unregister(ipmi_smi_t intf)
+static void ipmi_bmc_unregister(struct ipmi_smi *intf)
 {
 	mutex_lock(&intf->bmc_reg_mutex);
 	__ipmi_bmc_unregister(intf);
@@ -2915,7 +2863,7 @@
 /*
  * Must be called with intf->bmc_reg_mutex held.
  */
-static int __ipmi_bmc_register(ipmi_smi_t intf,
+static int __ipmi_bmc_register(struct ipmi_smi *intf,
 			       struct ipmi_device_id *id,
 			       bool guid_set, guid_t *guid, int intf_num)
 {
@@ -3077,7 +3025,7 @@
 }
 
 static int
-send_guid_cmd(ipmi_smi_t intf, int chan)
+send_guid_cmd(struct ipmi_smi *intf, int chan)
 {
 	struct kernel_ipmi_msg            msg;
 	struct ipmi_system_interface_addr si;
@@ -3104,7 +3052,7 @@
 			      -1, 0);
 }
 
-static void guid_handler(ipmi_smi_t intf, struct ipmi_recv_msg *msg)
+static void guid_handler(struct ipmi_smi *intf, struct ipmi_recv_msg *msg)
 {
 	struct bmc_device *bmc = intf->bmc;
 
@@ -3139,7 +3087,7 @@
 	wake_up(&intf->waitq);
 }
 
-static void __get_guid(ipmi_smi_t intf)
+static void __get_guid(struct ipmi_smi *intf)
 {
 	int rv;
 	struct bmc_device *bmc = intf->bmc;
@@ -3160,7 +3108,7 @@
 }
 
 static int
-send_channel_info_cmd(ipmi_smi_t intf, int chan)
+send_channel_info_cmd(struct ipmi_smi *intf, int chan)
 {
 	struct kernel_ipmi_msg            msg;
 	unsigned char                     data[1];
@@ -3190,7 +3138,7 @@
 }
 
 static void
-channel_handler(ipmi_smi_t intf, struct ipmi_recv_msg *msg)
+channel_handler(struct ipmi_smi *intf, struct ipmi_recv_msg *msg)
 {
 	int rv = 0;
 	int ch;
@@ -3262,7 +3210,7 @@
 /*
  * Must be holding intf->bmc_reg_mutex to call this.
  */
-static int __scan_channels(ipmi_smi_t intf, struct ipmi_device_id *id)
+static int __scan_channels(struct ipmi_smi *intf, struct ipmi_device_id *id)
 {
 	int rv;
 
@@ -3306,7 +3254,7 @@
 	return 0;
 }
 
-static void ipmi_poll(ipmi_smi_t intf)
+static void ipmi_poll(struct ipmi_smi *intf)
 {
 	if (intf->handlers->poll)
 		intf->handlers->poll(intf->send_info);
@@ -3314,7 +3262,7 @@
 	handle_new_recv_msgs(intf);
 }
 
-void ipmi_poll_interface(ipmi_user_t user)
+void ipmi_poll_interface(struct ipmi_user *user)
 {
 	ipmi_poll(user->intf);
 }
@@ -3322,7 +3270,8 @@
 
 static void redo_bmc_reg(struct work_struct *work)
 {
-	ipmi_smi_t intf = container_of(work, struct ipmi_smi, bmc_reg_work);
+	struct ipmi_smi *intf = container_of(work, struct ipmi_smi,
+					     bmc_reg_work);
 
 	if (!intf->in_shutdown)
 		bmc_get_device_id(intf, NULL, NULL, NULL, NULL);
@@ -3337,8 +3286,7 @@
 {
 	int              i, j;
 	int              rv;
-	ipmi_smi_t       intf;
-	ipmi_smi_t       tintf;
+	struct ipmi_smi *intf, *tintf;
 	struct list_head *link;
 	struct ipmi_device_id id;
 
@@ -3362,6 +3310,13 @@
 	if (!intf)
 		return -ENOMEM;
 
+	rv = init_srcu_struct(&intf->users_srcu);
+	if (rv) {
+		kfree(intf);
+		return rv;
+	}
+
+
 	intf->bmc = &intf->tmp_bmc;
 	INIT_LIST_HEAD(&intf->bmc->intfs);
 	mutex_init(&intf->bmc->dyn_mutex);
@@ -3386,9 +3341,6 @@
 		intf->seq_table[j].seqid = 0;
 	}
 	intf->curr_seq = 0;
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-	mutex_init(&intf->proc_entry_lock);
-#endif
 	spin_lock_init(&intf->waiting_rcv_msgs_lock);
 	INIT_LIST_HEAD(&intf->waiting_rcv_msgs);
 	tasklet_init(&intf->recv_tasklet,
@@ -3410,11 +3362,6 @@
 	for (i = 0; i < IPMI_NUM_STATS; i++)
 		atomic_set(&intf->stats[i], 0);
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-	intf->proc_dir = NULL;
-#endif
-
-	mutex_lock(&smi_watchers_mutex);
 	mutex_lock(&ipmi_interfaces_mutex);
 	/* Look for a hole in the numbers. */
 	i = 0;
@@ -3445,25 +3392,14 @@
 	mutex_lock(&intf->bmc_reg_mutex);
 	rv = __scan_channels(intf, &id);
 	mutex_unlock(&intf->bmc_reg_mutex);
-	if (rv)
-		goto out;
-
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-	rv = add_proc_entries(intf, i);
-#endif
 
  out:
 	if (rv) {
 		ipmi_bmc_unregister(intf);
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-		if (intf->proc_dir)
-			remove_proc_entries(intf);
-#endif
-		intf->handlers = NULL;
 		list_del_rcu(&intf->link);
 		mutex_unlock(&ipmi_interfaces_mutex);
-		mutex_unlock(&smi_watchers_mutex);
-		synchronize_rcu();
+		synchronize_srcu(&ipmi_interfaces_srcu);
+		cleanup_srcu_struct(&intf->users_srcu);
 		kref_put(&intf->refcount, intf_free);
 	} else {
 		/*
@@ -3474,16 +3410,16 @@
 		smp_wmb();
 		intf->intf_num = i;
 		mutex_unlock(&ipmi_interfaces_mutex);
+
 		/* After this point the interface is legal to use. */
 		call_smi_watchers(i, intf->si_dev);
-		mutex_unlock(&smi_watchers_mutex);
 	}
 
 	return rv;
 }
 EXPORT_SYMBOL(ipmi_register_smi);
 
-static void deliver_smi_err_response(ipmi_smi_t intf,
+static void deliver_smi_err_response(struct ipmi_smi *intf,
 				     struct ipmi_smi_msg *msg,
 				     unsigned char err)
 {
@@ -3495,7 +3431,7 @@
 	handle_one_recv_msg(intf, msg);
 }
 
-static void cleanup_smi_msgs(ipmi_smi_t intf)
+static void cleanup_smi_msgs(struct ipmi_smi *intf)
 {
 	int              i;
 	struct seq_table *ent;
@@ -3528,60 +3464,58 @@
 	}
 
 	for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) {
-		ent = &(intf->seq_table[i]);
+		ent = &intf->seq_table[i];
 		if (!ent->inuse)
 			continue;
-		deliver_err_response(ent->recv_msg, IPMI_ERR_UNSPECIFIED);
+		deliver_err_response(intf, ent->recv_msg, IPMI_ERR_UNSPECIFIED);
 	}
 }
 
-int ipmi_unregister_smi(ipmi_smi_t intf)
+void ipmi_unregister_smi(struct ipmi_smi *intf)
 {
 	struct ipmi_smi_watcher *w;
-	int intf_num = intf->intf_num;
-	ipmi_user_t user;
+	int intf_num = intf->intf_num, index;
 
-	mutex_lock(&smi_watchers_mutex);
 	mutex_lock(&ipmi_interfaces_mutex);
 	intf->intf_num = -1;
 	intf->in_shutdown = true;
 	list_del_rcu(&intf->link);
 	mutex_unlock(&ipmi_interfaces_mutex);
-	synchronize_rcu();
+	synchronize_srcu(&ipmi_interfaces_srcu);
 
-	cleanup_smi_msgs(intf);
-
-	/* Clean up the effects of users on the lower-level software. */
-	mutex_lock(&ipmi_interfaces_mutex);
-	rcu_read_lock();
-	list_for_each_entry_rcu(user, &intf->users, link) {
-		module_put(intf->handlers->owner);
-		if (intf->handlers->dec_usecount)
-			intf->handlers->dec_usecount(intf->send_info);
-	}
-	rcu_read_unlock();
-	intf->handlers = NULL;
-	mutex_unlock(&ipmi_interfaces_mutex);
-
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-	remove_proc_entries(intf);
-#endif
-	ipmi_bmc_unregister(intf);
+	/* At this point no users can be added to the interface. */
 
 	/*
 	 * Call all the watcher interfaces to tell them that
-	 * an interface is gone.
+	 * an interface is going away.
 	 */
+	mutex_lock(&smi_watchers_mutex);
 	list_for_each_entry(w, &smi_watchers, link)
 		w->smi_gone(intf_num);
 	mutex_unlock(&smi_watchers_mutex);
 
+	index = srcu_read_lock(&intf->users_srcu);
+	while (!list_empty(&intf->users)) {
+		struct ipmi_user *user =
+			container_of(list_next_rcu(&intf->users),
+				     struct ipmi_user, link);
+
+		_ipmi_destroy_user(user);
+	}
+	srcu_read_unlock(&intf->users_srcu, index);
+
+	intf->handlers->shutdown(intf->send_info);
+
+	cleanup_smi_msgs(intf);
+
+	ipmi_bmc_unregister(intf);
+
+	cleanup_srcu_struct(&intf->users_srcu);
 	kref_put(&intf->refcount, intf_free);
-	return 0;
 }
 EXPORT_SYMBOL(ipmi_unregister_smi);
 
-static int handle_ipmb_get_msg_rsp(ipmi_smi_t          intf,
+static int handle_ipmb_get_msg_rsp(struct ipmi_smi *intf,
 				   struct ipmi_smi_msg *msg)
 {
 	struct ipmi_ipmb_addr ipmb_addr;
@@ -3616,7 +3550,7 @@
 			  msg->rsp[3] & 0x0f,
 			  msg->rsp[8],
 			  (msg->rsp[4] >> 2) & (~1),
-			  (struct ipmi_addr *) &(ipmb_addr),
+			  (struct ipmi_addr *) &ipmb_addr,
 			  &recv_msg)) {
 		/*
 		 * We were unable to find the sequence number,
@@ -3626,9 +3560,7 @@
 		return 0;
 	}
 
-	memcpy(recv_msg->msg_data,
-	       &(msg->rsp[9]),
-	       msg->rsp_size - 9);
+	memcpy(recv_msg->msg_data, &msg->rsp[9], msg->rsp_size - 9);
 	/*
 	 * The other fields matched, so no need to set them, except
 	 * for netfn, which needs to be the response that was
@@ -3638,13 +3570,15 @@
 	recv_msg->msg.data = recv_msg->msg_data;
 	recv_msg->msg.data_len = msg->rsp_size - 10;
 	recv_msg->recv_type = IPMI_RESPONSE_RECV_TYPE;
-	ipmi_inc_stat(intf, handled_ipmb_responses);
-	deliver_response(recv_msg);
+	if (deliver_response(intf, recv_msg))
+		ipmi_inc_stat(intf, unhandled_ipmb_responses);
+	else
+		ipmi_inc_stat(intf, handled_ipmb_responses);
 
 	return 0;
 }
 
-static int handle_ipmb_get_msg_cmd(ipmi_smi_t          intf,
+static int handle_ipmb_get_msg_cmd(struct ipmi_smi *intf,
 				   struct ipmi_smi_msg *msg)
 {
 	struct cmd_rcvr          *rcvr;
@@ -3652,7 +3586,7 @@
 	unsigned char            netfn;
 	unsigned char            cmd;
 	unsigned char            chan;
-	ipmi_user_t              user = NULL;
+	struct ipmi_user         *user = NULL;
 	struct ipmi_ipmb_addr    *ipmb_addr;
 	struct ipmi_recv_msg     *recv_msg;
 
@@ -3689,24 +3623,17 @@
 		msg->data[2] = msg->rsp[3];
 		msg->data[3] = msg->rsp[6];
 		msg->data[4] = ((netfn + 1) << 2) | (msg->rsp[7] & 0x3);
-		msg->data[5] = ipmb_checksum(&(msg->data[3]), 2);
+		msg->data[5] = ipmb_checksum(&msg->data[3], 2);
 		msg->data[6] = intf->addrinfo[msg->rsp[3] & 0xf].address;
 		/* rqseq/lun */
 		msg->data[7] = (msg->rsp[7] & 0xfc) | (msg->rsp[4] & 0x3);
 		msg->data[8] = msg->rsp[8]; /* cmd */
 		msg->data[9] = IPMI_INVALID_CMD_COMPLETION_CODE;
-		msg->data[10] = ipmb_checksum(&(msg->data[6]), 4);
+		msg->data[10] = ipmb_checksum(&msg->data[6], 4);
 		msg->data_size = 11;
 
-#ifdef DEBUG_MSGING
-	{
-		int m;
-		printk("Invalid command:");
-		for (m = 0; m < msg->data_size; m++)
-			printk(" %2.2x", msg->data[m]);
-		printk("\n");
-	}
-#endif
+		ipmi_debug_msg("Invalid command:", msg->data, msg->data_size);
+
 		rcu_read_lock();
 		if (!intf->in_shutdown) {
 			smi_send(intf, intf->handlers, msg, 0);
@@ -3719,9 +3646,6 @@
 		}
 		rcu_read_unlock();
 	} else {
-		/* Deliver the message to the user. */
-		ipmi_inc_stat(intf, handled_commands);
-
 		recv_msg = ipmi_alloc_recv_msg();
 		if (!recv_msg) {
 			/*
@@ -3755,17 +3679,19 @@
 			 * at the end also needs to be removed.
 			 */
 			recv_msg->msg.data_len = msg->rsp_size - 10;
-			memcpy(recv_msg->msg_data,
-			       &(msg->rsp[9]),
+			memcpy(recv_msg->msg_data, &msg->rsp[9],
 			       msg->rsp_size - 10);
-			deliver_response(recv_msg);
+			if (deliver_response(intf, recv_msg))
+				ipmi_inc_stat(intf, unhandled_commands);
+			else
+				ipmi_inc_stat(intf, handled_commands);
 		}
 	}
 
 	return rv;
 }
 
-static int handle_lan_get_msg_rsp(ipmi_smi_t          intf,
+static int handle_lan_get_msg_rsp(struct ipmi_smi *intf,
 				  struct ipmi_smi_msg *msg)
 {
 	struct ipmi_lan_addr  lan_addr;
@@ -3804,7 +3730,7 @@
 			  msg->rsp[3] & 0x0f,
 			  msg->rsp[10],
 			  (msg->rsp[6] >> 2) & (~1),
-			  (struct ipmi_addr *) &(lan_addr),
+			  (struct ipmi_addr *) &lan_addr,
 			  &recv_msg)) {
 		/*
 		 * We were unable to find the sequence number,
@@ -3814,9 +3740,7 @@
 		return 0;
 	}
 
-	memcpy(recv_msg->msg_data,
-	       &(msg->rsp[11]),
-	       msg->rsp_size - 11);
+	memcpy(recv_msg->msg_data, &msg->rsp[11], msg->rsp_size - 11);
 	/*
 	 * The other fields matched, so no need to set them, except
 	 * for netfn, which needs to be the response that was
@@ -3826,13 +3750,15 @@
 	recv_msg->msg.data = recv_msg->msg_data;
 	recv_msg->msg.data_len = msg->rsp_size - 12;
 	recv_msg->recv_type = IPMI_RESPONSE_RECV_TYPE;
-	ipmi_inc_stat(intf, handled_lan_responses);
-	deliver_response(recv_msg);
+	if (deliver_response(intf, recv_msg))
+		ipmi_inc_stat(intf, unhandled_lan_responses);
+	else
+		ipmi_inc_stat(intf, handled_lan_responses);
 
 	return 0;
 }
 
-static int handle_lan_get_msg_cmd(ipmi_smi_t          intf,
+static int handle_lan_get_msg_cmd(struct ipmi_smi *intf,
 				  struct ipmi_smi_msg *msg)
 {
 	struct cmd_rcvr          *rcvr;
@@ -3840,7 +3766,7 @@
 	unsigned char            netfn;
 	unsigned char            cmd;
 	unsigned char            chan;
-	ipmi_user_t              user = NULL;
+	struct ipmi_user         *user = NULL;
 	struct ipmi_lan_addr     *lan_addr;
 	struct ipmi_recv_msg     *recv_msg;
 
@@ -3878,9 +3804,6 @@
 		 */
 		rv = 0;
 	} else {
-		/* Deliver the message to the user. */
-		ipmi_inc_stat(intf, handled_commands);
-
 		recv_msg = ipmi_alloc_recv_msg();
 		if (!recv_msg) {
 			/*
@@ -3916,10 +3839,12 @@
 			 * at the end also needs to be removed.
 			 */
 			recv_msg->msg.data_len = msg->rsp_size - 12;
-			memcpy(recv_msg->msg_data,
-			       &(msg->rsp[11]),
+			memcpy(recv_msg->msg_data, &msg->rsp[11],
 			       msg->rsp_size - 12);
-			deliver_response(recv_msg);
+			if (deliver_response(intf, recv_msg))
+				ipmi_inc_stat(intf, unhandled_commands);
+			else
+				ipmi_inc_stat(intf, handled_commands);
 		}
 	}
 
@@ -3932,7 +3857,7 @@
  * the OEM.  See IPMI 2.0 specification, Chapter 6 and
  * Chapter 22, sections 22.6 and 22.24 for more details.
  */
-static int handle_oem_get_msg_cmd(ipmi_smi_t          intf,
+static int handle_oem_get_msg_cmd(struct ipmi_smi *intf,
 				  struct ipmi_smi_msg *msg)
 {
 	struct cmd_rcvr       *rcvr;
@@ -3940,7 +3865,7 @@
 	unsigned char         netfn;
 	unsigned char         cmd;
 	unsigned char         chan;
-	ipmi_user_t           user = NULL;
+	struct ipmi_user *user = NULL;
 	struct ipmi_system_interface_addr *smi_addr;
 	struct ipmi_recv_msg  *recv_msg;
 
@@ -3987,9 +3912,6 @@
 
 		rv = 0;
 	} else {
-		/* Deliver the message to the user. */
-		ipmi_inc_stat(intf, handled_commands);
-
 		recv_msg = ipmi_alloc_recv_msg();
 		if (!recv_msg) {
 			/*
@@ -4007,7 +3929,7 @@
 			 * requirements
 			 */
 			smi_addr = ((struct ipmi_system_interface_addr *)
-				    &(recv_msg->addr));
+				    &recv_msg->addr);
 			smi_addr->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
 			smi_addr->channel = IPMI_BMC_CHANNEL;
 			smi_addr->lun = msg->rsp[0] & 3;
@@ -4024,10 +3946,12 @@
 			 * the Channel Byte in the "GET MESSAGE" command
 			 */
 			recv_msg->msg.data_len = msg->rsp_size - 4;
-			memcpy(recv_msg->msg_data,
-			       &(msg->rsp[4]),
+			memcpy(recv_msg->msg_data, &msg->rsp[4],
 			       msg->rsp_size - 4);
-			deliver_response(recv_msg);
+			if (deliver_response(intf, recv_msg))
+				ipmi_inc_stat(intf, unhandled_commands);
+			else
+				ipmi_inc_stat(intf, handled_commands);
 		}
 	}
 
@@ -4040,26 +3964,25 @@
 	struct ipmi_system_interface_addr *smi_addr;
 
 	recv_msg->msgid = 0;
-	smi_addr = (struct ipmi_system_interface_addr *) &(recv_msg->addr);
+	smi_addr = (struct ipmi_system_interface_addr *) &recv_msg->addr;
 	smi_addr->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
 	smi_addr->channel = IPMI_BMC_CHANNEL;
 	smi_addr->lun = msg->rsp[0] & 3;
 	recv_msg->recv_type = IPMI_ASYNC_EVENT_RECV_TYPE;
 	recv_msg->msg.netfn = msg->rsp[0] >> 2;
 	recv_msg->msg.cmd = msg->rsp[1];
-	memcpy(recv_msg->msg_data, &(msg->rsp[3]), msg->rsp_size - 3);
+	memcpy(recv_msg->msg_data, &msg->rsp[3], msg->rsp_size - 3);
 	recv_msg->msg.data = recv_msg->msg_data;
 	recv_msg->msg.data_len = msg->rsp_size - 3;
 }
 
-static int handle_read_event_rsp(ipmi_smi_t          intf,
+static int handle_read_event_rsp(struct ipmi_smi *intf,
 				 struct ipmi_smi_msg *msg)
 {
 	struct ipmi_recv_msg *recv_msg, *recv_msg2;
 	struct list_head     msgs;
-	ipmi_user_t          user;
-	int                  rv = 0;
-	int                  deliver_count = 0;
+	struct ipmi_user     *user;
+	int rv = 0, deliver_count = 0, index;
 	unsigned long        flags;
 
 	if (msg->rsp_size < 19) {
@@ -4083,7 +4006,7 @@
 	 * Allocate and fill in one message for every user that is
 	 * getting events.
 	 */
-	rcu_read_lock();
+	index = srcu_read_lock(&intf->users_srcu);
 	list_for_each_entry_rcu(user, &intf->users, link) {
 		if (!user->gets_events)
 			continue;
@@ -4110,15 +4033,15 @@
 		copy_event_into_recv_msg(recv_msg, msg);
 		recv_msg->user = user;
 		kref_get(&user->refcount);
-		list_add_tail(&(recv_msg->link), &msgs);
+		list_add_tail(&recv_msg->link, &msgs);
 	}
-	rcu_read_unlock();
+	srcu_read_unlock(&intf->users_srcu, index);
 
 	if (deliver_count) {
 		/* Now deliver all the messages. */
 		list_for_each_entry_safe(recv_msg, recv_msg2, &msgs, link) {
 			list_del(&recv_msg->link);
-			deliver_response(recv_msg);
+			deliver_local_response(intf, recv_msg);
 		}
 	} else if (intf->waiting_events_count < MAX_EVENTS_IN_QUEUE) {
 		/*
@@ -4137,7 +4060,7 @@
 		}
 
 		copy_event_into_recv_msg(recv_msg, msg);
-		list_add_tail(&(recv_msg->link), &(intf->waiting_events));
+		list_add_tail(&recv_msg->link, &intf->waiting_events);
 		intf->waiting_events_count++;
 	} else if (!intf->event_msg_printed) {
 		/*
@@ -4150,16 +4073,16 @@
 	}
 
  out:
-	spin_unlock_irqrestore(&(intf->events_lock), flags);
+	spin_unlock_irqrestore(&intf->events_lock, flags);
 
 	return rv;
 }
 
-static int handle_bmc_rsp(ipmi_smi_t          intf,
+static int handle_bmc_rsp(struct ipmi_smi *intf,
 			  struct ipmi_smi_msg *msg)
 {
 	struct ipmi_recv_msg *recv_msg;
-	struct ipmi_user     *user;
+	struct ipmi_system_interface_addr *smi_addr;
 
 	recv_msg = (struct ipmi_recv_msg *) msg->user_data;
 	if (recv_msg == NULL) {
@@ -4168,32 +4091,19 @@
 		return 0;
 	}
 
-	user = recv_msg->user;
-	/* Make sure the user still exists. */
-	if (user && !user->valid) {
-		/* The user for the message went away, so give up. */
-		ipmi_inc_stat(intf, unhandled_local_responses);
-		ipmi_free_recv_msg(recv_msg);
-	} else {
-		struct ipmi_system_interface_addr *smi_addr;
-
-		ipmi_inc_stat(intf, handled_local_responses);
-		recv_msg->recv_type = IPMI_RESPONSE_RECV_TYPE;
-		recv_msg->msgid = msg->msgid;
-		smi_addr = ((struct ipmi_system_interface_addr *)
-			    &(recv_msg->addr));
-		smi_addr->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
-		smi_addr->channel = IPMI_BMC_CHANNEL;
-		smi_addr->lun = msg->rsp[0] & 3;
-		recv_msg->msg.netfn = msg->rsp[0] >> 2;
-		recv_msg->msg.cmd = msg->rsp[1];
-		memcpy(recv_msg->msg_data,
-		       &(msg->rsp[2]),
-		       msg->rsp_size - 2);
-		recv_msg->msg.data = recv_msg->msg_data;
-		recv_msg->msg.data_len = msg->rsp_size - 2;
-		deliver_response(recv_msg);
-	}
+	recv_msg->recv_type = IPMI_RESPONSE_RECV_TYPE;
+	recv_msg->msgid = msg->msgid;
+	smi_addr = ((struct ipmi_system_interface_addr *)
+		    &recv_msg->addr);
+	smi_addr->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
+	smi_addr->channel = IPMI_BMC_CHANNEL;
+	smi_addr->lun = msg->rsp[0] & 3;
+	recv_msg->msg.netfn = msg->rsp[0] >> 2;
+	recv_msg->msg.cmd = msg->rsp[1];
+	memcpy(recv_msg->msg_data, &msg->rsp[2], msg->rsp_size - 2);
+	recv_msg->msg.data = recv_msg->msg_data;
+	recv_msg->msg.data_len = msg->rsp_size - 2;
+	deliver_local_response(intf, recv_msg);
 
 	return 0;
 }
@@ -4203,19 +4113,13 @@
  * 0 if the message should be freed, or -1 if the message should not
  * be freed or requeued.
  */
-static int handle_one_recv_msg(ipmi_smi_t          intf,
+static int handle_one_recv_msg(struct ipmi_smi *intf,
 			       struct ipmi_smi_msg *msg)
 {
 	int requeue;
 	int chan;
 
-#ifdef DEBUG_MSGING
-	int m;
-	printk("Recv:");
-	for (m = 0; m < msg->rsp_size; m++)
-		printk(" %2.2x", msg->rsp[m]);
-	printk("\n");
-#endif
+	ipmi_debug_msg("Recv:", msg->rsp, msg->rsp_size);
 	if (msg->rsp_size < 2) {
 		/* Message is too small to be correct. */
 		dev_warn(intf->si_dev,
@@ -4252,7 +4156,7 @@
 		 * It's a response to a response we sent.  For this we
 		 * deliver a send message response to the user.
 		 */
-		struct ipmi_recv_msg     *recv_msg = msg->user_data;
+		struct ipmi_recv_msg *recv_msg = msg->user_data;
 
 		requeue = 0;
 		if (msg->rsp_size < 2)
@@ -4267,15 +4171,11 @@
 		if (!recv_msg)
 			goto out;
 
-		/* Make sure the user still exists. */
-		if (!recv_msg->user || !recv_msg->user->valid)
-			goto out;
-
 		recv_msg->recv_type = IPMI_RESPONSE_RESPONSE_TYPE;
 		recv_msg->msg.data = recv_msg->msg_data;
 		recv_msg->msg.data_len = 1;
 		recv_msg->msg_data[0] = msg->rsp[2];
-		deliver_response(recv_msg);
+		deliver_local_response(intf, recv_msg);
 	} else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2))
 		   && (msg->rsp[1] == IPMI_GET_MSG_CMD)) {
 		struct ipmi_channel   *chans;
@@ -4367,7 +4267,7 @@
 /*
  * If there are messages in the queue or pretimeouts, handle them.
  */
-static void handle_new_recv_msgs(ipmi_smi_t intf)
+static void handle_new_recv_msgs(struct ipmi_smi *intf)
 {
 	struct ipmi_smi_msg  *smi_msg;
 	unsigned long        flags = 0;
@@ -4412,22 +4312,23 @@
 	 * deliver pretimeouts to all the users.
 	 */
 	if (atomic_add_unless(&intf->watchdog_pretimeouts_to_deliver, -1, 0)) {
-		ipmi_user_t user;
+		struct ipmi_user *user;
+		int index;
 
-		rcu_read_lock();
+		index = srcu_read_lock(&intf->users_srcu);
 		list_for_each_entry_rcu(user, &intf->users, link) {
 			if (user->handler->ipmi_watchdog_pretimeout)
 				user->handler->ipmi_watchdog_pretimeout(
 					user->handler_data);
 		}
-		rcu_read_unlock();
+		srcu_read_unlock(&intf->users_srcu, index);
 	}
 }
 
 static void smi_recv_tasklet(unsigned long val)
 {
 	unsigned long flags = 0; /* keep us warning-free. */
-	ipmi_smi_t intf = (ipmi_smi_t) val;
+	struct ipmi_smi *intf = (struct ipmi_smi *) val;
 	int run_to_completion = intf->run_to_completion;
 	struct ipmi_smi_msg *newmsg = NULL;
 
@@ -4469,7 +4370,7 @@
 }
 
 /* Handle a new message from the lower layer. */
-void ipmi_smi_msg_received(ipmi_smi_t          intf,
+void ipmi_smi_msg_received(struct ipmi_smi *intf,
 			   struct ipmi_smi_msg *msg)
 {
 	unsigned long flags = 0; /* keep us warning-free. */
@@ -4550,7 +4451,7 @@
 }
 EXPORT_SYMBOL(ipmi_smi_msg_received);
 
-void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf)
+void ipmi_smi_watchdog_pretimeout(struct ipmi_smi *intf)
 {
 	if (intf->in_shutdown)
 		return;
@@ -4561,7 +4462,7 @@
 EXPORT_SYMBOL(ipmi_smi_watchdog_pretimeout);
 
 static struct ipmi_smi_msg *
-smi_from_recv_msg(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg,
+smi_from_recv_msg(struct ipmi_smi *intf, struct ipmi_recv_msg *recv_msg,
 		  unsigned char seq, long seqid)
 {
 	struct ipmi_smi_msg *smi_msg = ipmi_alloc_smi_msg();
@@ -4576,26 +4477,18 @@
 	smi_msg->data_size = recv_msg->msg.data_len;
 	smi_msg->msgid = STORE_SEQ_IN_MSGID(seq, seqid);
 
-#ifdef DEBUG_MSGING
-	{
-		int m;
-		printk("Resend: ");
-		for (m = 0; m < smi_msg->data_size; m++)
-			printk(" %2.2x", smi_msg->data[m]);
-		printk("\n");
-	}
-#endif
+	ipmi_debug_msg("Resend: ", smi_msg->data, smi_msg->data_size);
+
 	return smi_msg;
 }
 
-static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent,
+static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent,
 			      struct list_head *timeouts,
 			      unsigned long timeout_period,
 			      int slot, unsigned long *flags,
 			      unsigned int *waiting_msgs)
 {
-	struct ipmi_recv_msg     *msg;
-	const struct ipmi_smi_handlers *handlers;
+	struct ipmi_recv_msg *msg;
 
 	if (intf->in_shutdown)
 		return;
@@ -4653,8 +4546,7 @@
 		 * only for messages to the local MC, which don't get
 		 * resent.
 		 */
-		handlers = intf->handlers;
-		if (handlers) {
+		if (intf->handlers) {
 			if (is_lan_addr(&ent->recv_msg->addr))
 				ipmi_inc_stat(intf,
 					      retransmitted_lan_commands);
@@ -4662,7 +4554,7 @@
 				ipmi_inc_stat(intf,
 					      retransmitted_ipmb_commands);
 
-			smi_send(intf, handlers, smi_msg, 0);
+			smi_send(intf, intf->handlers, smi_msg, 0);
 		} else
 			ipmi_free_smi_msg(smi_msg);
 
@@ -4670,7 +4562,7 @@
 	}
 }
 
-static unsigned int ipmi_timeout_handler(ipmi_smi_t intf,
+static unsigned int ipmi_timeout_handler(struct ipmi_smi *intf,
 					 unsigned long timeout_period)
 {
 	struct list_head     timeouts;
@@ -4694,14 +4586,20 @@
 	 */
 	INIT_LIST_HEAD(&timeouts);
 	spin_lock_irqsave(&intf->seq_lock, flags);
+	if (intf->ipmb_maintenance_mode_timeout) {
+		if (intf->ipmb_maintenance_mode_timeout <= timeout_period)
+			intf->ipmb_maintenance_mode_timeout = 0;
+		else
+			intf->ipmb_maintenance_mode_timeout -= timeout_period;
+	}
 	for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++)
-		check_msg_timeout(intf, &(intf->seq_table[i]),
+		check_msg_timeout(intf, &intf->seq_table[i],
 				  &timeouts, timeout_period, i,
 				  &flags, &waiting_msgs);
 	spin_unlock_irqrestore(&intf->seq_lock, flags);
 
 	list_for_each_entry_safe(msg, msg2, &timeouts, link)
-		deliver_err_response(msg, IPMI_TIMEOUT_COMPLETION_CODE);
+		deliver_err_response(intf, msg, IPMI_TIMEOUT_COMPLETION_CODE);
 
 	/*
 	 * Maintenance mode handling.  Check the timeout
@@ -4731,7 +4629,7 @@
 	return waiting_msgs;
 }
 
-static void ipmi_request_event(ipmi_smi_t intf)
+static void ipmi_request_event(struct ipmi_smi *intf)
 {
 	/* No event requests when in maintenance mode. */
 	if (intf->maintenance_mode_enable)
@@ -4747,13 +4645,13 @@
 
 static void ipmi_timeout(struct timer_list *unused)
 {
-	ipmi_smi_t intf;
-	int nt = 0;
+	struct ipmi_smi *intf;
+	int nt = 0, index;
 
 	if (atomic_read(&stop_operation))
 		return;
 
-	rcu_read_lock();
+	index = srcu_read_lock(&ipmi_interfaces_srcu);
 	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
 		int lnt = 0;
 
@@ -4776,13 +4674,13 @@
 
 		nt += lnt;
 	}
-	rcu_read_unlock();
+	srcu_read_unlock(&ipmi_interfaces_srcu, index);
 
 	if (nt)
 		mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
 }
 
-static void need_waiter(ipmi_smi_t intf)
+static void need_waiter(struct ipmi_smi *intf)
 {
 	/* Racy, but worst case we start the timer twice. */
 	if (!timer_pending(&ipmi_timer))
@@ -4853,8 +4751,8 @@
 /*
  * Inside a panic, send a message and wait for a response.
  */
-static void ipmi_panic_request_and_wait(ipmi_smi_t           intf,
-					struct ipmi_addr     *addr,
+static void ipmi_panic_request_and_wait(struct ipmi_smi *intf,
+					struct ipmi_addr *addr,
 					struct kernel_ipmi_msg *msg)
 {
 	struct ipmi_smi_msg  smi_msg;
@@ -4885,7 +4783,8 @@
 		ipmi_poll(intf);
 }
 
-static void event_receiver_fetcher(ipmi_smi_t intf, struct ipmi_recv_msg *msg)
+static void event_receiver_fetcher(struct ipmi_smi *intf,
+				   struct ipmi_recv_msg *msg)
 {
 	if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE)
 	    && (msg->msg.netfn == IPMI_NETFN_SENSOR_EVENT_RESPONSE)
@@ -4897,7 +4796,7 @@
 	}
 }
 
-static void device_id_fetcher(ipmi_smi_t intf, struct ipmi_recv_msg *msg)
+static void device_id_fetcher(struct ipmi_smi *intf, struct ipmi_recv_msg *msg)
 {
 	if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE)
 	    && (msg->msg.netfn == IPMI_NETFN_APP_RESPONSE)
@@ -4912,13 +4811,15 @@
 	}
 }
 
-static void send_panic_events(char *str)
+static void send_panic_events(struct ipmi_smi *intf, char *str)
 {
-	struct kernel_ipmi_msg            msg;
-	ipmi_smi_t                        intf;
-	unsigned char                     data[16];
+	struct kernel_ipmi_msg msg;
+	unsigned char data[16];
 	struct ipmi_system_interface_addr *si;
-	struct ipmi_addr                  addr;
+	struct ipmi_addr addr;
+	char *p = str;
+	struct ipmi_ipmb_addr *ipmb;
+	int j;
 
 	if (ipmi_send_panic_event == IPMI_SEND_PANIC_EVENT_NONE)
 		return;
@@ -4949,15 +4850,8 @@
 		data[7] = str[2];
 	}
 
-	/* For every registered interface, send the event. */
-	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
-		if (!intf->handlers || !intf->handlers->poll)
-			/* Interface is not ready or can't run at panic time. */
-			continue;
-
-		/* Send the event announcing the panic. */
-		ipmi_panic_request_and_wait(intf, &addr, &msg);
-	}
+	/* Send the event announcing the panic. */
+	ipmi_panic_request_and_wait(intf, &addr, &msg);
 
 	/*
 	 * On every interface, dump a bunch of OEM event holding the
@@ -4966,111 +4860,100 @@
 	if (ipmi_send_panic_event != IPMI_SEND_PANIC_EVENT_STRING || !str)
 		return;
 
-	/* For every registered interface, send the event. */
-	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
-		char                  *p = str;
-		struct ipmi_ipmb_addr *ipmb;
-		int                   j;
+	/*
+	 * intf_num is used as an marker to tell if the
+	 * interface is valid.  Thus we need a read barrier to
+	 * make sure data fetched before checking intf_num
+	 * won't be used.
+	 */
+	smp_rmb();
 
-		if (intf->intf_num == -1)
-			/* Interface was not ready yet. */
-			continue;
+	/*
+	 * First job here is to figure out where to send the
+	 * OEM events.  There's no way in IPMI to send OEM
+	 * events using an event send command, so we have to
+	 * find the SEL to put them in and stick them in
+	 * there.
+	 */
 
-		/*
-		 * intf_num is used as an marker to tell if the
-		 * interface is valid.  Thus we need a read barrier to
-		 * make sure data fetched before checking intf_num
-		 * won't be used.
-		 */
-		smp_rmb();
+	/* Get capabilities from the get device id. */
+	intf->local_sel_device = 0;
+	intf->local_event_generator = 0;
+	intf->event_receiver = 0;
 
-		/*
-		 * First job here is to figure out where to send the
-		 * OEM events.  There's no way in IPMI to send OEM
-		 * events using an event send command, so we have to
-		 * find the SEL to put them in and stick them in
-		 * there.
-		 */
+	/* Request the device info from the local MC. */
+	msg.netfn = IPMI_NETFN_APP_REQUEST;
+	msg.cmd = IPMI_GET_DEVICE_ID_CMD;
+	msg.data = NULL;
+	msg.data_len = 0;
+	intf->null_user_handler = device_id_fetcher;
+	ipmi_panic_request_and_wait(intf, &addr, &msg);
 
-		/* Get capabilities from the get device id. */
-		intf->local_sel_device = 0;
-		intf->local_event_generator = 0;
-		intf->event_receiver = 0;
-
-		/* Request the device info from the local MC. */
-		msg.netfn = IPMI_NETFN_APP_REQUEST;
-		msg.cmd = IPMI_GET_DEVICE_ID_CMD;
+	if (intf->local_event_generator) {
+		/* Request the event receiver from the local MC. */
+		msg.netfn = IPMI_NETFN_SENSOR_EVENT_REQUEST;
+		msg.cmd = IPMI_GET_EVENT_RECEIVER_CMD;
 		msg.data = NULL;
 		msg.data_len = 0;
-		intf->null_user_handler = device_id_fetcher;
+		intf->null_user_handler = event_receiver_fetcher;
 		ipmi_panic_request_and_wait(intf, &addr, &msg);
+	}
+	intf->null_user_handler = NULL;
 
-		if (intf->local_event_generator) {
-			/* Request the event receiver from the local MC. */
-			msg.netfn = IPMI_NETFN_SENSOR_EVENT_REQUEST;
-			msg.cmd = IPMI_GET_EVENT_RECEIVER_CMD;
-			msg.data = NULL;
-			msg.data_len = 0;
-			intf->null_user_handler = event_receiver_fetcher;
-			ipmi_panic_request_and_wait(intf, &addr, &msg);
-		}
-		intf->null_user_handler = NULL;
-
+	/*
+	 * Validate the event receiver.  The low bit must not
+	 * be 1 (it must be a valid IPMB address), it cannot
+	 * be zero, and it must not be my address.
+	 */
+	if (((intf->event_receiver & 1) == 0)
+	    && (intf->event_receiver != 0)
+	    && (intf->event_receiver != intf->addrinfo[0].address)) {
 		/*
-		 * Validate the event receiver.  The low bit must not
-		 * be 1 (it must be a valid IPMB address), it cannot
-		 * be zero, and it must not be my address.
+		 * The event receiver is valid, send an IPMB
+		 * message.
 		 */
-		if (((intf->event_receiver & 1) == 0)
-		    && (intf->event_receiver != 0)
-		    && (intf->event_receiver != intf->addrinfo[0].address)) {
-			/*
-			 * The event receiver is valid, send an IPMB
-			 * message.
-			 */
-			ipmb = (struct ipmi_ipmb_addr *) &addr;
-			ipmb->addr_type = IPMI_IPMB_ADDR_TYPE;
-			ipmb->channel = 0; /* FIXME - is this right? */
-			ipmb->lun = intf->event_receiver_lun;
-			ipmb->slave_addr = intf->event_receiver;
-		} else if (intf->local_sel_device) {
-			/*
-			 * The event receiver was not valid (or was
-			 * me), but I am an SEL device, just dump it
-			 * in my SEL.
-			 */
-			si = (struct ipmi_system_interface_addr *) &addr;
-			si->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
-			si->channel = IPMI_BMC_CHANNEL;
-			si->lun = 0;
-		} else
-			continue; /* No where to send the event. */
+		ipmb = (struct ipmi_ipmb_addr *) &addr;
+		ipmb->addr_type = IPMI_IPMB_ADDR_TYPE;
+		ipmb->channel = 0; /* FIXME - is this right? */
+		ipmb->lun = intf->event_receiver_lun;
+		ipmb->slave_addr = intf->event_receiver;
+	} else if (intf->local_sel_device) {
+		/*
+		 * The event receiver was not valid (or was
+		 * me), but I am an SEL device, just dump it
+		 * in my SEL.
+		 */
+		si = (struct ipmi_system_interface_addr *) &addr;
+		si->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
+		si->channel = IPMI_BMC_CHANNEL;
+		si->lun = 0;
+	} else
+		return; /* No where to send the event. */
 
-		msg.netfn = IPMI_NETFN_STORAGE_REQUEST; /* Storage. */
-		msg.cmd = IPMI_ADD_SEL_ENTRY_CMD;
-		msg.data = data;
-		msg.data_len = 16;
+	msg.netfn = IPMI_NETFN_STORAGE_REQUEST; /* Storage. */
+	msg.cmd = IPMI_ADD_SEL_ENTRY_CMD;
+	msg.data = data;
+	msg.data_len = 16;
 
-		j = 0;
-		while (*p) {
-			int size = strlen(p);
+	j = 0;
+	while (*p) {
+		int size = strlen(p);
 
-			if (size > 11)
-				size = 11;
-			data[0] = 0;
-			data[1] = 0;
-			data[2] = 0xf0; /* OEM event without timestamp. */
-			data[3] = intf->addrinfo[0].address;
-			data[4] = j++; /* sequence # */
-			/*
-			 * Always give 11 bytes, so strncpy will fill
-			 * it with zeroes for me.
-			 */
-			strncpy(data+5, p, 11);
-			p += size;
+		if (size > 11)
+			size = 11;
+		data[0] = 0;
+		data[1] = 0;
+		data[2] = 0xf0; /* OEM event without timestamp. */
+		data[3] = intf->addrinfo[0].address;
+		data[4] = j++; /* sequence # */
+		/*
+		 * Always give 11 bytes, so strncpy will fill
+		 * it with zeroes for me.
+		 */
+		strncpy(data+5, p, 11);
+		p += size;
 
-			ipmi_panic_request_and_wait(intf, &addr, &msg);
-		}
+		ipmi_panic_request_and_wait(intf, &addr, &msg);
 	}
 }
 
@@ -5080,7 +4963,8 @@
 		       unsigned long         event,
 		       void                  *ptr)
 {
-	ipmi_smi_t intf;
+	struct ipmi_smi *intf;
+	struct ipmi_user *user;
 
 	if (has_panicked)
 		return NOTIFY_DONE;
@@ -5088,10 +4972,13 @@
 
 	/* For every registered interface, set it to run to completion. */
 	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
-		if (!intf->handlers)
+		if (!intf->handlers || intf->intf_num == -1)
 			/* Interface is not ready. */
 			continue;
 
+		if (!intf->handlers->poll)
+			continue;
+
 		/*
 		 * If we were interrupted while locking xmit_msgs_lock or
 		 * waiting_rcv_msgs_lock, the corresponding list may be
@@ -5113,9 +5000,15 @@
 		if (intf->handlers->set_run_to_completion)
 			intf->handlers->set_run_to_completion(intf->send_info,
 							      1);
-	}
 
-	send_panic_events(ptr);
+		list_for_each_entry_rcu(user, &intf->users, link) {
+			if (user->handler->ipmi_panic_handler)
+				user->handler->ipmi_panic_handler(
+					user->handler_data);
+		}
+
+		send_panic_events(intf, ptr);
+	}
 
 	return NOTIFY_DONE;
 }
@@ -5141,16 +5034,6 @@
 
 	pr_info("ipmi message handler version " IPMI_DRIVER_VERSION "\n");
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-	proc_ipmi_root = proc_mkdir("ipmi", NULL);
-	if (!proc_ipmi_root) {
-	    pr_err(PFX "Unable to create IPMI proc dir");
-	    driver_unregister(&ipmidriver.driver);
-	    return -ENOMEM;
-	}
-
-#endif /* CONFIG_IPMI_PROC_INTERFACE */
-
 	timer_setup(&ipmi_timer, ipmi_timeout, 0);
 	mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
 
@@ -5189,10 +5072,6 @@
 	atomic_inc(&stop_operation);
 	del_timer_sync(&ipmi_timer);
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-	proc_remove(proc_ipmi_root);
-#endif /* CONFIG_IPMI_PROC_INTERFACE */
-
 	driver_unregister(&ipmidriver.driver);
 
 	initialized = 0;
diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c
index 7996337..f6e1941 100644
--- a/drivers/char/ipmi/ipmi_poweroff.c
+++ b/drivers/char/ipmi/ipmi_poweroff.c
@@ -39,9 +39,9 @@
 
 /* Our local state. */
 static int ready;
-static ipmi_user_t ipmi_user;
+static struct ipmi_user *ipmi_user;
 static int ipmi_ifnum;
-static void (*specific_poweroff_func)(ipmi_user_t user);
+static void (*specific_poweroff_func)(struct ipmi_user *user);
 
 /* Holds the old poweroff function so we can restore it on removal. */
 static void (*old_poweroff_func)(void);
@@ -118,7 +118,7 @@
 };
 
 
-static int ipmi_request_wait_for_response(ipmi_user_t            user,
+static int ipmi_request_wait_for_response(struct ipmi_user       *user,
 					  struct ipmi_addr       *addr,
 					  struct kernel_ipmi_msg *send_msg)
 {
@@ -138,7 +138,7 @@
 }
 
 /* Wait for message to complete, spinning. */
-static int ipmi_request_in_rc_mode(ipmi_user_t            user,
+static int ipmi_request_in_rc_mode(struct ipmi_user       *user,
 				   struct ipmi_addr       *addr,
 				   struct kernel_ipmi_msg *send_msg)
 {
@@ -178,9 +178,9 @@
 #define IPMI_MOTOROLA_MANUFACTURER_ID		0x0000A1
 #define IPMI_MOTOROLA_PPS_IPMC_PRODUCT_ID	0x0051
 
-static void (*atca_oem_poweroff_hook)(ipmi_user_t user);
+static void (*atca_oem_poweroff_hook)(struct ipmi_user *user);
 
-static void pps_poweroff_atca(ipmi_user_t user)
+static void pps_poweroff_atca(struct ipmi_user *user)
 {
 	struct ipmi_system_interface_addr smi_addr;
 	struct kernel_ipmi_msg            send_msg;
@@ -208,7 +208,7 @@
 	return;
 }
 
-static int ipmi_atca_detect(ipmi_user_t user)
+static int ipmi_atca_detect(struct ipmi_user *user)
 {
 	struct ipmi_system_interface_addr smi_addr;
 	struct kernel_ipmi_msg            send_msg;
@@ -245,7 +245,7 @@
 	return !rv;
 }
 
-static void ipmi_poweroff_atca(ipmi_user_t user)
+static void ipmi_poweroff_atca(struct ipmi_user *user)
 {
 	struct ipmi_system_interface_addr smi_addr;
 	struct kernel_ipmi_msg            send_msg;
@@ -309,13 +309,13 @@
 #define IPMI_CPI1_PRODUCT_ID		0x000157
 #define IPMI_CPI1_MANUFACTURER_ID	0x0108
 
-static int ipmi_cpi1_detect(ipmi_user_t user)
+static int ipmi_cpi1_detect(struct ipmi_user *user)
 {
 	return ((mfg_id == IPMI_CPI1_MANUFACTURER_ID)
 		&& (prod_id == IPMI_CPI1_PRODUCT_ID));
 }
 
-static void ipmi_poweroff_cpi1(ipmi_user_t user)
+static void ipmi_poweroff_cpi1(struct ipmi_user *user)
 {
 	struct ipmi_system_interface_addr smi_addr;
 	struct ipmi_ipmb_addr             ipmb_addr;
@@ -424,7 +424,7 @@
  */
 
 #define DELL_IANA_MFR_ID {0xA2, 0x02, 0x00}
-static int ipmi_dell_chassis_detect(ipmi_user_t user)
+static int ipmi_dell_chassis_detect(struct ipmi_user *user)
 {
 	const char ipmi_version_major = ipmi_version & 0xF;
 	const char ipmi_version_minor = (ipmi_version >> 4) & 0xF;
@@ -445,7 +445,7 @@
 
 #define HP_IANA_MFR_ID 0x0b
 #define HP_BMC_PROD_ID 0x8201
-static int ipmi_hp_chassis_detect(ipmi_user_t user)
+static int ipmi_hp_chassis_detect(struct ipmi_user *user)
 {
 	if (mfg_id == HP_IANA_MFR_ID
 		&& prod_id == HP_BMC_PROD_ID
@@ -461,13 +461,13 @@
 #define IPMI_NETFN_CHASSIS_REQUEST	0
 #define IPMI_CHASSIS_CONTROL_CMD	0x02
 
-static int ipmi_chassis_detect(ipmi_user_t user)
+static int ipmi_chassis_detect(struct ipmi_user *user)
 {
 	/* Chassis support, use it. */
 	return (capabilities & 0x80);
 }
 
-static void ipmi_poweroff_chassis(ipmi_user_t user)
+static void ipmi_poweroff_chassis(struct ipmi_user *user)
 {
 	struct ipmi_system_interface_addr smi_addr;
 	struct kernel_ipmi_msg            send_msg;
@@ -517,8 +517,8 @@
 /* Table of possible power off functions. */
 struct poweroff_function {
 	char *platform_type;
-	int  (*detect)(ipmi_user_t user);
-	void (*poweroff_func)(ipmi_user_t user);
+	int  (*detect)(struct ipmi_user *user);
+	void (*poweroff_func)(struct ipmi_user *user);
 };
 
 static struct poweroff_function poweroff_functions[] = {
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index ff870aa..ad353be 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -122,8 +122,8 @@
 };
 
 struct smi_info {
-	int                    intf_num;
-	ipmi_smi_t             intf;
+	int                    si_num;
+	struct ipmi_smi        *intf;
 	struct si_sm_data      *si_sm;
 	const struct si_sm_handlers *handlers;
 	spinlock_t             si_lock;
@@ -261,7 +261,6 @@
 static bool unload_when_empty = true;
 
 static int try_smi_init(struct smi_info *smi);
-static void shutdown_one_si(struct smi_info *smi_info);
 static void cleanup_one_si(struct smi_info *smi_info);
 static void cleanup_ipmi_si(void);
 
@@ -287,10 +286,7 @@
 			     struct ipmi_smi_msg *msg)
 {
 	/* Deliver the message to the upper layer. */
-	if (smi_info->intf)
-		ipmi_smi_msg_received(smi_info->intf, msg);
-	else
-		ipmi_free_smi_msg(msg);
+	ipmi_smi_msg_received(smi_info->intf, msg);
 }
 
 static void return_hosed_msg(struct smi_info *smi_info, int cCode)
@@ -471,8 +467,7 @@
 
 		start_clear_flags(smi_info);
 		smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
-		if (smi_info->intf)
-			ipmi_smi_watchdog_pretimeout(smi_info->intf);
+		ipmi_smi_watchdog_pretimeout(smi_info->intf);
 	} else if (smi_info->msg_flags & RECEIVE_MSG_AVAIL) {
 		/* Messages available. */
 		smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
@@ -798,8 +793,7 @@
 	 * We prefer handling attn over new messages.  But don't do
 	 * this if there is not yet an upper layer to handle anything.
 	 */
-	if (likely(smi_info->intf) &&
-	    (si_sm_result == SI_SM_ATTN || smi_info->got_attn)) {
+	if (si_sm_result == SI_SM_ATTN || smi_info->got_attn) {
 		unsigned char msg[2];
 
 		if (smi_info->si_state != SI_NORMAL) {
@@ -962,8 +956,8 @@
 {
 	unsigned int max_busy_us = 0;
 
-	if (smi_info->intf_num < num_max_busy_us)
-		max_busy_us = kipmid_max_busy_us[smi_info->intf_num];
+	if (smi_info->si_num < num_max_busy_us)
+		max_busy_us = kipmid_max_busy_us[smi_info->si_num];
 	if (max_busy_us == 0 || smi_result != SI_SM_CALL_WITH_DELAY)
 		ipmi_si_set_not_busy(busy_until);
 	else if (!ipmi_si_is_busy(busy_until)) {
@@ -1143,8 +1137,8 @@
 	return IRQ_HANDLED;
 }
 
-static int smi_start_processing(void       *send_info,
-				ipmi_smi_t intf)
+static int smi_start_processing(void            *send_info,
+				struct ipmi_smi *intf)
 {
 	struct smi_info *new_smi = send_info;
 	int             enable = 0;
@@ -1165,8 +1159,8 @@
 	/*
 	 * Check if the user forcefully enabled the daemon.
 	 */
-	if (new_smi->intf_num < num_force_kipmid)
-		enable = force_kipmid[new_smi->intf_num];
+	if (new_smi->si_num < num_force_kipmid)
+		enable = force_kipmid[new_smi->si_num];
 	/*
 	 * The BT interface is efficient enough to not need a thread,
 	 * and there is no need for a thread if we have interrupts.
@@ -1176,7 +1170,7 @@
 
 	if (enable) {
 		new_smi->thread = kthread_run(ipmi_thread, new_smi,
-					      "kipmi%d", new_smi->intf_num);
+					      "kipmi%d", new_smi->si_num);
 		if (IS_ERR(new_smi->thread)) {
 			dev_notice(new_smi->io.dev, "Could not start"
 				   " kernel thread due to error %ld, only using"
@@ -1209,9 +1203,11 @@
 		atomic_set(&smi_info->req_events, 0);
 }
 
+static void shutdown_smi(void *send_info);
 static const struct ipmi_smi_handlers handlers = {
 	.owner                  = THIS_MODULE,
 	.start_processing       = smi_start_processing,
+	.shutdown               = shutdown_smi,
 	.get_smi_info		= get_smi_info,
 	.sender			= sender,
 	.request_events		= request_events,
@@ -1592,102 +1588,6 @@
 	return rv;
 }
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-static int smi_type_proc_show(struct seq_file *m, void *v)
-{
-	struct smi_info *smi = m->private;
-
-	seq_printf(m, "%s\n", si_to_str[smi->io.si_type]);
-
-	return 0;
-}
-
-static int smi_type_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, smi_type_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations smi_type_proc_ops = {
-	.open		= smi_type_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int smi_si_stats_proc_show(struct seq_file *m, void *v)
-{
-	struct smi_info *smi = m->private;
-
-	seq_printf(m, "interrupts_enabled:    %d\n",
-		       smi->io.irq && !smi->interrupt_disabled);
-	seq_printf(m, "short_timeouts:        %u\n",
-		       smi_get_stat(smi, short_timeouts));
-	seq_printf(m, "long_timeouts:         %u\n",
-		       smi_get_stat(smi, long_timeouts));
-	seq_printf(m, "idles:                 %u\n",
-		       smi_get_stat(smi, idles));
-	seq_printf(m, "interrupts:            %u\n",
-		       smi_get_stat(smi, interrupts));
-	seq_printf(m, "attentions:            %u\n",
-		       smi_get_stat(smi, attentions));
-	seq_printf(m, "flag_fetches:          %u\n",
-		       smi_get_stat(smi, flag_fetches));
-	seq_printf(m, "hosed_count:           %u\n",
-		       smi_get_stat(smi, hosed_count));
-	seq_printf(m, "complete_transactions: %u\n",
-		       smi_get_stat(smi, complete_transactions));
-	seq_printf(m, "events:                %u\n",
-		       smi_get_stat(smi, events));
-	seq_printf(m, "watchdog_pretimeouts:  %u\n",
-		       smi_get_stat(smi, watchdog_pretimeouts));
-	seq_printf(m, "incoming_messages:     %u\n",
-		       smi_get_stat(smi, incoming_messages));
-	return 0;
-}
-
-static int smi_si_stats_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, smi_si_stats_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations smi_si_stats_proc_ops = {
-	.open		= smi_si_stats_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int smi_params_proc_show(struct seq_file *m, void *v)
-{
-	struct smi_info *smi = m->private;
-
-	seq_printf(m,
-		   "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d\n",
-		   si_to_str[smi->io.si_type],
-		   addr_space_to_str[smi->io.addr_type],
-		   smi->io.addr_data,
-		   smi->io.regspacing,
-		   smi->io.regsize,
-		   smi->io.regshift,
-		   smi->io.irq,
-		   smi->io.slave_addr);
-
-	return 0;
-}
-
-static int smi_params_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, smi_params_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations smi_params_proc_ops = {
-	.open		= smi_params_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif
-
 #define IPMI_SI_ATTR(name) \
 static ssize_t ipmi_##name##_show(struct device *dev,			\
 				  struct device_attribute *attr,	\
@@ -2006,14 +1906,8 @@
 
 	list_add_tail(&new_smi->link, &smi_infos);
 
-	if (initialized) {
+	if (initialized)
 		rv = try_smi_init(new_smi);
-		if (rv) {
-			cleanup_one_si(new_smi);
-			mutex_unlock(&smi_infos_lock);
-			return rv;
-		}
-	}
 out_err:
 	mutex_unlock(&smi_infos_lock);
 	return rv;
@@ -2056,19 +1950,19 @@
 		goto out_err;
 	}
 
-	new_smi->intf_num = smi_num;
+	new_smi->si_num = smi_num;
 
 	/* Do this early so it's available for logs. */
 	if (!new_smi->io.dev) {
 		init_name = kasprintf(GFP_KERNEL, "ipmi_si.%d",
-				      new_smi->intf_num);
+				      new_smi->si_num);
 
 		/*
 		 * If we don't already have a device from something
 		 * else (like PCI), then register a new one.
 		 */
 		new_smi->pdev = platform_device_alloc("ipmi_si",
-						      new_smi->intf_num);
+						      new_smi->si_num);
 		if (!new_smi->pdev) {
 			pr_err(PFX "Unable to allocate platform device\n");
 			rv = -ENOMEM;
@@ -2182,35 +2076,6 @@
 		goto out_err;
 	}
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-	rv = ipmi_smi_add_proc_entry(new_smi->intf, "type",
-				     &smi_type_proc_ops,
-				     new_smi);
-	if (rv) {
-		dev_err(new_smi->io.dev,
-			"Unable to create proc entry: %d\n", rv);
-		goto out_err;
-	}
-
-	rv = ipmi_smi_add_proc_entry(new_smi->intf, "si_stats",
-				     &smi_si_stats_proc_ops,
-				     new_smi);
-	if (rv) {
-		dev_err(new_smi->io.dev,
-			"Unable to create proc entry: %d\n", rv);
-		goto out_err;
-	}
-
-	rv = ipmi_smi_add_proc_entry(new_smi->intf, "params",
-				     &smi_params_proc_ops,
-				     new_smi);
-	if (rv) {
-		dev_err(new_smi->io.dev,
-			"Unable to create proc entry: %d\n", rv);
-		goto out_err;
-	}
-#endif
-
 	/* Don't increment till we know we have succeeded. */
 	smi_num++;
 
@@ -2223,7 +2088,8 @@
 	return 0;
 
 out_err:
-	shutdown_one_si(new_smi);
+	ipmi_unregister_smi(new_smi->intf);
+	new_smi->intf = NULL;
 
 	kfree(init_name);
 
@@ -2301,20 +2167,9 @@
 }
 module_init(init_ipmi_si);
 
-static void shutdown_one_si(struct smi_info *smi_info)
+static void shutdown_smi(void *send_info)
 {
-	int           rv = 0;
-
-	if (smi_info->intf) {
-		ipmi_smi_t intf = smi_info->intf;
-
-		smi_info->intf = NULL;
-		rv = ipmi_unregister_smi(intf);
-		if (rv) {
-			pr_err(PFX "Unable to unregister device: errno=%d\n",
-			       rv);
-		}
-	}
+	struct smi_info *smi_info = send_info;
 
 	if (smi_info->dev_group_added) {
 		device_remove_group(smi_info->io.dev, &ipmi_si_dev_attr_group);
@@ -2372,6 +2227,10 @@
 	smi_info->si_sm = NULL;
 }
 
+/*
+ * Must be called with smi_infos_lock held, to serialize the
+ * smi_info->intf check.
+ */
 static void cleanup_one_si(struct smi_info *smi_info)
 {
 	if (!smi_info)
@@ -2379,7 +2238,10 @@
 
 	list_del(&smi_info->link);
 
-	shutdown_one_si(smi_info);
+	if (smi_info->intf) {
+		ipmi_unregister_smi(smi_info->intf);
+		smi_info->intf = NULL;
+	}
 
 	if (smi_info->pdev) {
 		if (smi_info->pdev_registered)
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 35a82f4..22f634e 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -193,8 +193,7 @@
 			     unsigned char *data, unsigned int len);
 
 struct ssif_info {
-	ipmi_smi_t          intf;
-	int                 intf_num;
+	struct ipmi_smi     *intf;
 	spinlock_t	    lock;
 	struct ipmi_smi_msg *waiting_msg;
 	struct ipmi_smi_msg *curr_msg;
@@ -290,8 +289,6 @@
 
 static bool initialized;
 
-static atomic_t next_intf = ATOMIC_INIT(0);
-
 static void return_hosed_msg(struct ssif_info *ssif_info,
 			     struct ipmi_smi_msg *msg);
 static void start_next_msg(struct ssif_info *ssif_info, unsigned long *flags);
@@ -315,17 +312,13 @@
 static void deliver_recv_msg(struct ssif_info *ssif_info,
 			     struct ipmi_smi_msg *msg)
 {
-	ipmi_smi_t    intf = ssif_info->intf;
-
-	if (!intf) {
-		ipmi_free_smi_msg(msg);
-	} else if (msg->rsp_size < 0) {
+	if (msg->rsp_size < 0) {
 		return_hosed_msg(ssif_info, msg);
 		pr_err(PFX
 		       "Malformed message in deliver_recv_msg: rsp_size = %d\n",
 		       msg->rsp_size);
 	} else {
-		ipmi_smi_msg_received(intf, msg);
+		ipmi_smi_msg_received(ssif_info->intf, msg);
 	}
 }
 
@@ -452,12 +445,10 @@
 static void handle_flags(struct ssif_info *ssif_info, unsigned long *flags)
 {
 	if (ssif_info->msg_flags & WDT_PRE_TIMEOUT_INT) {
-		ipmi_smi_t intf = ssif_info->intf;
 		/* Watchdog pre-timeout */
 		ssif_inc_stat(ssif_info, watchdog_pretimeouts);
 		start_clear_flags(ssif_info, flags);
-		if (intf)
-			ipmi_smi_watchdog_pretimeout(intf);
+		ipmi_smi_watchdog_pretimeout(ssif_info->intf);
 	} else if (ssif_info->msg_flags & RECEIVE_MSG_AVAIL)
 		/* Messages available. */
 		start_recv_msg_fetch(ssif_info, flags);
@@ -1094,27 +1085,8 @@
 	}
 }
 
-static int inc_usecount(void *send_info)
-{
-	struct ssif_info *ssif_info = send_info;
-
-	if (!i2c_get_adapter(i2c_adapter_id(ssif_info->client->adapter)))
-		return -ENODEV;
-
-	i2c_use_client(ssif_info->client);
-	return 0;
-}
-
-static void dec_usecount(void *send_info)
-{
-	struct ssif_info *ssif_info = send_info;
-
-	i2c_release_client(ssif_info->client);
-	i2c_put_adapter(ssif_info->client->adapter);
-}
-
-static int ssif_start_processing(void *send_info,
-				 ipmi_smi_t intf)
+static int ssif_start_processing(void            *send_info,
+				 struct ipmi_smi *intf)
 {
 	struct ssif_info *ssif_info = send_info;
 
@@ -1225,25 +1197,9 @@
 	.attrs		= ipmi_ssif_dev_attrs,
 };
 
-static int ssif_remove(struct i2c_client *client)
+static void shutdown_ssif(void *send_info)
 {
-	struct ssif_info *ssif_info = i2c_get_clientdata(client);
-	struct ssif_addr_info *addr_info;
-	int rv;
-
-	if (!ssif_info)
-		return 0;
-
-	/*
-	 * After this point, we won't deliver anything asychronously
-	 * to the message handler.  We can unregister ourself.
-	 */
-	rv = ipmi_unregister_smi(ssif_info->intf);
-	if (rv) {
-		pr_err(PFX "Unable to unregister device: errno=%d\n", rv);
-		return rv;
-	}
-	ssif_info->intf = NULL;
+	struct ssif_info *ssif_info = send_info;
 
 	device_remove_group(&ssif_info->client->dev, &ipmi_ssif_dev_attr_group);
 	dev_set_drvdata(&ssif_info->client->dev, NULL);
@@ -1259,6 +1215,30 @@
 		kthread_stop(ssif_info->thread);
 	}
 
+	/*
+	 * No message can be outstanding now, we have removed the
+	 * upper layer and it permitted us to do so.
+	 */
+	kfree(ssif_info);
+}
+
+static int ssif_remove(struct i2c_client *client)
+{
+	struct ssif_info *ssif_info = i2c_get_clientdata(client);
+	struct ipmi_smi *intf;
+	struct ssif_addr_info *addr_info;
+
+	if (!ssif_info)
+		return 0;
+
+	/*
+	 * After this point, we won't deliver anything asychronously
+	 * to the message handler.  We can unregister ourself.
+	 */
+	intf = ssif_info->intf;
+	ssif_info->intf = NULL;
+	ipmi_unregister_smi(intf);
+
 	list_for_each_entry(addr_info, &ssif_infos, link) {
 		if (addr_info->client == client) {
 			addr_info->client = NULL;
@@ -1266,11 +1246,6 @@
 		}
 	}
 
-	/*
-	 * No message can be outstanding now, we have removed the
-	 * upper layer and it permitted us to do so.
-	 */
-	kfree(ssif_info);
 	return 0;
 }
 
@@ -1341,72 +1316,6 @@
 	return rv;
 }
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-static int smi_type_proc_show(struct seq_file *m, void *v)
-{
-	seq_puts(m, "ssif\n");
-
-	return 0;
-}
-
-static int smi_type_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, smi_type_proc_show, inode->i_private);
-}
-
-static const struct file_operations smi_type_proc_ops = {
-	.open		= smi_type_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int smi_stats_proc_show(struct seq_file *m, void *v)
-{
-	struct ssif_info *ssif_info = m->private;
-
-	seq_printf(m, "sent_messages:          %u\n",
-		   ssif_get_stat(ssif_info, sent_messages));
-	seq_printf(m, "sent_messages_parts:    %u\n",
-		   ssif_get_stat(ssif_info, sent_messages_parts));
-	seq_printf(m, "send_retries:           %u\n",
-		   ssif_get_stat(ssif_info, send_retries));
-	seq_printf(m, "send_errors:            %u\n",
-		   ssif_get_stat(ssif_info, send_errors));
-	seq_printf(m, "received_messages:      %u\n",
-		   ssif_get_stat(ssif_info, received_messages));
-	seq_printf(m, "received_message_parts: %u\n",
-		   ssif_get_stat(ssif_info, received_message_parts));
-	seq_printf(m, "receive_retries:        %u\n",
-		   ssif_get_stat(ssif_info, receive_retries));
-	seq_printf(m, "receive_errors:         %u\n",
-		   ssif_get_stat(ssif_info, receive_errors));
-	seq_printf(m, "flag_fetches:           %u\n",
-		   ssif_get_stat(ssif_info, flag_fetches));
-	seq_printf(m, "hosed:                  %u\n",
-		   ssif_get_stat(ssif_info, hosed));
-	seq_printf(m, "events:                 %u\n",
-		   ssif_get_stat(ssif_info, events));
-	seq_printf(m, "watchdog_pretimeouts:   %u\n",
-		   ssif_get_stat(ssif_info, watchdog_pretimeouts));
-	seq_printf(m, "alerts:                 %u\n",
-		   ssif_get_stat(ssif_info, alerts));
-	return 0;
-}
-
-static int smi_stats_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, smi_stats_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations smi_stats_proc_ops = {
-	.open		= smi_stats_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif
-
 static int strcmp_nospace(char *s1, char *s2)
 {
 	while (*s1 && *s2) {
@@ -1678,8 +1587,6 @@
 	}
 
  found:
-	ssif_info->intf_num = atomic_inc_return(&next_intf);
-
 	if (ssif_dbg_probe) {
 		pr_info("ssif_probe: i2c_probe found device at i2c address %x\n",
 			client->addr);
@@ -1697,11 +1604,10 @@
 
 	ssif_info->handlers.owner = THIS_MODULE;
 	ssif_info->handlers.start_processing = ssif_start_processing;
+	ssif_info->handlers.shutdown = shutdown_ssif;
 	ssif_info->handlers.get_smi_info = get_smi_info;
 	ssif_info->handlers.sender = sender;
 	ssif_info->handlers.request_events = request_events;
-	ssif_info->handlers.inc_usecount = inc_usecount;
-	ssif_info->handlers.dec_usecount = dec_usecount;
 
 	{
 		unsigned int thread_num;
@@ -1740,24 +1646,6 @@
 		goto out_remove_attr;
 	}
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-	rv = ipmi_smi_add_proc_entry(ssif_info->intf, "type",
-				     &smi_type_proc_ops,
-				     ssif_info);
-	if (rv) {
-		pr_err(PFX "Unable to create proc entry: %d\n", rv);
-		goto out_err_unreg;
-	}
-
-	rv = ipmi_smi_add_proc_entry(ssif_info->intf, "ssif_stats",
-				     &smi_stats_proc_ops,
-				     ssif_info);
-	if (rv) {
-		pr_err(PFX "Unable to create proc entry: %d\n", rv);
-		goto out_err_unreg;
-	}
-#endif
-
  out:
 	if (rv) {
 		/*
@@ -1775,11 +1663,6 @@
 	kfree(resp);
 	return rv;
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-out_err_unreg:
-	ipmi_unregister_smi(ssif_info->intf);
-#endif
-
 out_remove_attr:
 	device_remove_group(&ssif_info->client->dev, &ipmi_ssif_dev_attr_group);
 	dev_set_drvdata(&ssif_info->client->dev, NULL);
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 22bc287..ca1c5c5 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -125,7 +125,7 @@
 static DEFINE_MUTEX(ipmi_watchdog_mutex);
 static bool nowayout = WATCHDOG_NOWAYOUT;
 
-static ipmi_user_t watchdog_user;
+static struct ipmi_user *watchdog_user;
 static int watchdog_ifnum;
 
 /* Default the timeout to 10 seconds. */
@@ -153,7 +153,7 @@
 static char data_to_read;
 static DECLARE_WAIT_QUEUE_HEAD(read_q);
 static struct fasync_struct *fasync_q;
-static char pretimeout_since_last_heartbeat;
+static atomic_t pretimeout_since_last_heartbeat;
 static char expect_close;
 
 static int ifnum_to_use = -1;
@@ -303,9 +303,6 @@
 /* Default state of the timer. */
 static unsigned char ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
 
-/* If shutting down via IPMI, we ignore the heartbeat. */
-static int ipmi_ignore_heartbeat;
-
 /* Is someone using the watchdog?  Only one user is allowed. */
 static unsigned long ipmi_wdog_open;
 
@@ -329,35 +326,33 @@
 static int nmi_handler_registered;
 #endif
 
-static int ipmi_heartbeat(void);
+static int __ipmi_heartbeat(void);
 
 /*
- * We use a mutex to make sure that only one thing can send a set
- * timeout at one time, because we only have one copy of the data.
- * The mutex is claimed when the set_timeout is sent and freed
- * when both messages are free.
+ * We use a mutex to make sure that only one thing can send a set a
+ * message at one time.  The mutex is claimed when a message is sent
+ * and freed when both the send and receive messages are free.
  */
-static atomic_t set_timeout_tofree = ATOMIC_INIT(0);
-static DEFINE_MUTEX(set_timeout_lock);
-static DECLARE_COMPLETION(set_timeout_wait);
-static void set_timeout_free_smi(struct ipmi_smi_msg *msg)
+static atomic_t msg_tofree = ATOMIC_INIT(0);
+static DECLARE_COMPLETION(msg_wait);
+static void msg_free_smi(struct ipmi_smi_msg *msg)
 {
-    if (atomic_dec_and_test(&set_timeout_tofree))
-	    complete(&set_timeout_wait);
+	if (atomic_dec_and_test(&msg_tofree))
+		complete(&msg_wait);
 }
-static void set_timeout_free_recv(struct ipmi_recv_msg *msg)
+static void msg_free_recv(struct ipmi_recv_msg *msg)
 {
-    if (atomic_dec_and_test(&set_timeout_tofree))
-	    complete(&set_timeout_wait);
+	if (atomic_dec_and_test(&msg_tofree))
+		complete(&msg_wait);
 }
-static struct ipmi_smi_msg set_timeout_smi_msg = {
-	.done = set_timeout_free_smi
+static struct ipmi_smi_msg smi_msg = {
+	.done = msg_free_smi
 };
-static struct ipmi_recv_msg set_timeout_recv_msg = {
-	.done = set_timeout_free_recv
+static struct ipmi_recv_msg recv_msg = {
+	.done = msg_free_recv
 };
 
-static int i_ipmi_set_timeout(struct ipmi_smi_msg  *smi_msg,
+static int __ipmi_set_timeout(struct ipmi_smi_msg  *smi_msg,
 			      struct ipmi_recv_msg *recv_msg,
 			      int                  *send_heartbeat_now)
 {
@@ -368,9 +363,6 @@
 	int                               hbnow = 0;
 
 
-	/* These can be cleared as we are setting the timeout. */
-	pretimeout_since_last_heartbeat = 0;
-
 	data[0] = 0;
 	WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS);
 
@@ -414,46 +406,48 @@
 				      smi_msg,
 				      recv_msg,
 				      1);
-	if (rv) {
-		printk(KERN_WARNING PFX "set timeout error: %d\n",
-		       rv);
-	}
+	if (rv)
+		pr_warn(PFX "set timeout error: %d\n", rv);
+	else if (send_heartbeat_now)
+		*send_heartbeat_now = hbnow;
 
-	if (send_heartbeat_now)
-	    *send_heartbeat_now = hbnow;
+	return rv;
+}
+
+static int _ipmi_set_timeout(int do_heartbeat)
+{
+	int send_heartbeat_now;
+	int rv;
+
+	if (!watchdog_user)
+		return -ENODEV;
+
+	atomic_set(&msg_tofree, 2);
+
+	rv = __ipmi_set_timeout(&smi_msg,
+				&recv_msg,
+				&send_heartbeat_now);
+	if (rv)
+		return rv;
+
+	wait_for_completion(&msg_wait);
+
+	if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB)
+		|| ((send_heartbeat_now)
+		    && (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY)))
+		rv = __ipmi_heartbeat();
 
 	return rv;
 }
 
 static int ipmi_set_timeout(int do_heartbeat)
 {
-	int send_heartbeat_now;
 	int rv;
 
+	mutex_lock(&ipmi_watchdog_mutex);
+	rv = _ipmi_set_timeout(do_heartbeat);
+	mutex_unlock(&ipmi_watchdog_mutex);
 
-	/* We can only send one of these at a time. */
-	mutex_lock(&set_timeout_lock);
-
-	atomic_set(&set_timeout_tofree, 2);
-
-	rv = i_ipmi_set_timeout(&set_timeout_smi_msg,
-				&set_timeout_recv_msg,
-				&send_heartbeat_now);
-	if (rv) {
-		mutex_unlock(&set_timeout_lock);
-		goto out;
-	}
-
-	wait_for_completion(&set_timeout_wait);
-
-	mutex_unlock(&set_timeout_lock);
-
-	if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB)
-	    || ((send_heartbeat_now)
-		&& (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY)))
-		rv = ipmi_heartbeat();
-
-out:
 	return rv;
 }
 
@@ -531,13 +525,12 @@
 	while (atomic_read(&panic_done_count) != 0)
 		ipmi_poll_interface(watchdog_user);
 	atomic_add(1, &panic_done_count);
-	rv = i_ipmi_set_timeout(&panic_halt_smi_msg,
+	rv = __ipmi_set_timeout(&panic_halt_smi_msg,
 				&panic_halt_recv_msg,
 				&send_heartbeat_now);
 	if (rv) {
 		atomic_sub(1, &panic_done_count);
-		printk(KERN_WARNING PFX
-		       "Unable to extend the watchdog timeout.");
+		pr_warn(PFX "Unable to extend the watchdog timeout.");
 	} else {
 		if (send_heartbeat_now)
 			panic_halt_ipmi_heartbeat();
@@ -546,69 +539,22 @@
 		ipmi_poll_interface(watchdog_user);
 }
 
-/*
- * We use a mutex to make sure that only one thing can send a
- * heartbeat at one time, because we only have one copy of the data.
- * The semaphore is claimed when the set_timeout is sent and freed
- * when both messages are free.
- */
-static atomic_t heartbeat_tofree = ATOMIC_INIT(0);
-static DEFINE_MUTEX(heartbeat_lock);
-static DECLARE_COMPLETION(heartbeat_wait);
-static void heartbeat_free_smi(struct ipmi_smi_msg *msg)
+static int __ipmi_heartbeat(void)
 {
-    if (atomic_dec_and_test(&heartbeat_tofree))
-	    complete(&heartbeat_wait);
-}
-static void heartbeat_free_recv(struct ipmi_recv_msg *msg)
-{
-    if (atomic_dec_and_test(&heartbeat_tofree))
-	    complete(&heartbeat_wait);
-}
-static struct ipmi_smi_msg heartbeat_smi_msg = {
-	.done = heartbeat_free_smi
-};
-static struct ipmi_recv_msg heartbeat_recv_msg = {
-	.done = heartbeat_free_recv
-};
-
-static int ipmi_heartbeat(void)
-{
-	struct kernel_ipmi_msg            msg;
-	int                               rv;
+	struct kernel_ipmi_msg msg;
+	int rv;
 	struct ipmi_system_interface_addr addr;
-	int				  timeout_retries = 0;
-
-	if (ipmi_ignore_heartbeat)
-		return 0;
-
-	if (ipmi_start_timer_on_heartbeat) {
-		ipmi_start_timer_on_heartbeat = 0;
-		ipmi_watchdog_state = action_val;
-		return ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
-	} else if (pretimeout_since_last_heartbeat) {
-		/*
-		 * A pretimeout occurred, make sure we set the timeout.
-		 * We don't want to set the action, though, we want to
-		 * leave that alone (thus it can't be combined with the
-		 * above operation.
-		 */
-		return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
-	}
-
-	mutex_lock(&heartbeat_lock);
+	int timeout_retries = 0;
 
 restart:
-	atomic_set(&heartbeat_tofree, 2);
-
 	/*
 	 * Don't reset the timer if we have the timer turned off, that
 	 * re-enables the watchdog.
 	 */
-	if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) {
-		mutex_unlock(&heartbeat_lock);
+	if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE)
 		return 0;
-	}
+
+	atomic_set(&msg_tofree, 2);
 
 	addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
 	addr.channel = IPMI_BMC_CHANNEL;
@@ -623,26 +569,23 @@
 				      0,
 				      &msg,
 				      NULL,
-				      &heartbeat_smi_msg,
-				      &heartbeat_recv_msg,
+				      &smi_msg,
+				      &recv_msg,
 				      1);
 	if (rv) {
-		mutex_unlock(&heartbeat_lock);
-		printk(KERN_WARNING PFX "heartbeat failure: %d\n",
-		       rv);
+		pr_warn(PFX "heartbeat send failure: %d\n", rv);
 		return rv;
 	}
 
 	/* Wait for the heartbeat to be sent. */
-	wait_for_completion(&heartbeat_wait);
+	wait_for_completion(&msg_wait);
 
-	if (heartbeat_recv_msg.msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP)  {
+	if (recv_msg.msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP)  {
 		timeout_retries++;
 		if (timeout_retries > 3) {
-			printk(KERN_ERR PFX ": Unable to restore the IPMI"
-			       " watchdog's settings, giving up.\n");
+			pr_err(PFX ": Unable to restore the IPMI watchdog's settings, giving up.\n");
 			rv = -EIO;
-			goto out_unlock;
+			goto out;
 		}
 
 		/*
@@ -651,18 +594,17 @@
 		 * to restore the timer's info.  Note that we still hold
 		 * the heartbeat lock, to keep a heartbeat from happening
 		 * in this process, so must say no heartbeat to avoid a
-		 * deadlock on this mutex.
+		 * deadlock on this mutex
 		 */
-		rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
+		rv = _ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
 		if (rv) {
-			printk(KERN_ERR PFX ": Unable to send the command to"
-			       " set the watchdog's settings, giving up.\n");
-			goto out_unlock;
+			pr_err(PFX ": Unable to send the command to set the watchdog's settings, giving up.\n");
+			goto out;
 		}
 
-		/* We might need a new heartbeat, so do it now */
+		/* Might need a heartbeat send, go ahead and do it. */
 		goto restart;
-	} else if (heartbeat_recv_msg.msg.data[0] != 0) {
+	} else if (recv_msg.msg.data[0] != 0) {
 		/*
 		 * Got an error in the heartbeat response.  It was already
 		 * reported in ipmi_wdog_msg_handler, but we should return
@@ -671,8 +613,43 @@
 		rv = -EINVAL;
 	}
 
-out_unlock:
-	mutex_unlock(&heartbeat_lock);
+out:
+	return rv;
+}
+
+static int _ipmi_heartbeat(void)
+{
+	int rv;
+
+	if (!watchdog_user)
+		return -ENODEV;
+
+	if (ipmi_start_timer_on_heartbeat) {
+		ipmi_start_timer_on_heartbeat = 0;
+		ipmi_watchdog_state = action_val;
+		rv = _ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
+	} else if (atomic_cmpxchg(&pretimeout_since_last_heartbeat, 1, 0)) {
+		/*
+		 * A pretimeout occurred, make sure we set the timeout.
+		 * We don't want to set the action, though, we want to
+		 * leave that alone (thus it can't be combined with the
+		 * above operation.
+		 */
+		rv = _ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
+	} else {
+		rv = __ipmi_heartbeat();
+	}
+
+	return rv;
+}
+
+static int ipmi_heartbeat(void)
+{
+	int rv;
+
+	mutex_lock(&ipmi_watchdog_mutex);
+	rv = _ipmi_heartbeat();
+	mutex_unlock(&ipmi_watchdog_mutex);
 
 	return rv;
 }
@@ -700,7 +677,7 @@
 		if (i)
 			return -EFAULT;
 		timeout = val;
-		return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
+		return _ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
 
 	case WDIOC_GETTIMEOUT:
 		i = copy_to_user(argp, &timeout, sizeof(timeout));
@@ -713,7 +690,7 @@
 		if (i)
 			return -EFAULT;
 		pretimeout = val;
-		return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
+		return _ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
 
 	case WDIOC_GETPRETIMEOUT:
 		i = copy_to_user(argp, &pretimeout, sizeof(pretimeout));
@@ -722,7 +699,7 @@
 		return 0;
 
 	case WDIOC_KEEPALIVE:
-		return ipmi_heartbeat();
+		return _ipmi_heartbeat();
 
 	case WDIOC_SETOPTIONS:
 		i = copy_from_user(&val, argp, sizeof(int));
@@ -730,13 +707,13 @@
 			return -EFAULT;
 		if (val & WDIOS_DISABLECARD) {
 			ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
-			ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
+			_ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
 			ipmi_start_timer_on_heartbeat = 0;
 		}
 
 		if (val & WDIOS_ENABLECARD) {
 			ipmi_watchdog_state = action_val;
-			ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
+			_ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
 		}
 		return 0;
 
@@ -810,7 +787,7 @@
 	 * Reading returns if the pretimeout has gone off, and it only does
 	 * it once per pretimeout.
 	 */
-	spin_lock(&ipmi_read_lock);
+	spin_lock_irq(&ipmi_read_lock);
 	if (!data_to_read) {
 		if (file->f_flags & O_NONBLOCK) {
 			rv = -EAGAIN;
@@ -821,9 +798,9 @@
 		add_wait_queue(&read_q, &wait);
 		while (!data_to_read) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			spin_unlock(&ipmi_read_lock);
+			spin_unlock_irq(&ipmi_read_lock);
 			schedule();
-			spin_lock(&ipmi_read_lock);
+			spin_lock_irq(&ipmi_read_lock);
 		}
 		remove_wait_queue(&read_q, &wait);
 
@@ -835,7 +812,7 @@
 	data_to_read = 0;
 
  out:
-	spin_unlock(&ipmi_read_lock);
+	spin_unlock_irq(&ipmi_read_lock);
 
 	if (rv == 0) {
 		if (copy_to_user(buf, &data_to_read, 1))
@@ -873,10 +850,10 @@
 
 	poll_wait(file, &read_q, wait);
 
-	spin_lock(&ipmi_read_lock);
+	spin_lock_irq(&ipmi_read_lock);
 	if (data_to_read)
 		mask |= (EPOLLIN | EPOLLRDNORM);
-	spin_unlock(&ipmi_read_lock);
+	spin_unlock_irq(&ipmi_read_lock);
 
 	return mask;
 }
@@ -894,11 +871,13 @@
 {
 	if (iminor(ino) == WATCHDOG_MINOR) {
 		if (expect_close == 42) {
+			mutex_lock(&ipmi_watchdog_mutex);
 			ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
-			ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
+			_ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
+			mutex_unlock(&ipmi_watchdog_mutex);
 		} else {
-			printk(KERN_CRIT PFX
-			       "Unexpected close, not stopping watchdog!\n");
+			pr_crit(PFX
+				"Unexpected close, not stopping watchdog!\n");
 			ipmi_heartbeat();
 		}
 		clear_bit(0, &ipmi_wdog_open);
@@ -932,11 +911,9 @@
 {
 	if (msg->msg.cmd == IPMI_WDOG_RESET_TIMER &&
 			msg->msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP)
-		printk(KERN_INFO PFX "response: The IPMI controller appears"
-		       " to have been reset, will attempt to reinitialize"
-		       " the watchdog timer\n");
+		pr_info(PFX "response: The IPMI controller appears to have been reset, will attempt to reinitialize the watchdog timer\n");
 	else if (msg->msg.data[0] != 0)
-		printk(KERN_ERR PFX "response: Error %x on cmd %x\n",
+		pr_err(PFX "response: Error %x on cmd %x\n",
 		       msg->msg.data[0],
 		       msg->msg.cmd);
 
@@ -950,12 +927,13 @@
 			if (atomic_inc_and_test(&preop_panic_excl))
 				panic("Watchdog pre-timeout");
 		} else if (preop_val == WDOG_PREOP_GIVE_DATA) {
-			spin_lock(&ipmi_read_lock);
+			unsigned long flags;
+
+			spin_lock_irqsave(&ipmi_read_lock, flags);
 			data_to_read = 1;
 			wake_up_interruptible(&read_q);
 			kill_fasync(&fasync_q, SIGIO, POLL_IN);
-
-			spin_unlock(&ipmi_read_lock);
+			spin_unlock_irqrestore(&ipmi_read_lock, flags);
 		}
 	}
 
@@ -963,12 +941,34 @@
 	 * On some machines, the heartbeat will give an error and not
 	 * work unless we re-enable the timer.  So do so.
 	 */
-	pretimeout_since_last_heartbeat = 1;
+	atomic_set(&pretimeout_since_last_heartbeat, 1);
+}
+
+static void ipmi_wdog_panic_handler(void *user_data)
+{
+	static int panic_event_handled;
+
+	/*
+	 * On a panic, if we have a panic timeout, make sure to extend
+	 * the watchdog timer to a reasonable value to complete the
+	 * panic, if the watchdog timer is running.  Plus the
+	 * pretimeout is meaningless at panic time.
+	 */
+	if (watchdog_user && !panic_event_handled &&
+	    ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
+		/* Make sure we do this only once. */
+		panic_event_handled = 1;
+
+		timeout = panic_wdt_timeout;
+		pretimeout = 0;
+		panic_halt_ipmi_set_timeout();
+	}
 }
 
 static const struct ipmi_user_hndl ipmi_hndlrs = {
 	.ipmi_recv_hndl           = ipmi_wdog_msg_handler,
-	.ipmi_watchdog_pretimeout = ipmi_wdog_pretimeout_handler
+	.ipmi_watchdog_pretimeout = ipmi_wdog_pretimeout_handler,
+	.ipmi_panic_handler       = ipmi_wdog_panic_handler
 };
 
 static void ipmi_register_watchdog(int ipmi_intf)
@@ -985,7 +985,7 @@
 
 	rv = ipmi_create_user(ipmi_intf, &ipmi_hndlrs, NULL, &watchdog_user);
 	if (rv < 0) {
-		printk(KERN_CRIT PFX "Unable to register with ipmi\n");
+		pr_crit(PFX "Unable to register with ipmi\n");
 		goto out;
 	}
 
@@ -1002,7 +1002,7 @@
 	if (rv < 0) {
 		ipmi_destroy_user(watchdog_user);
 		watchdog_user = NULL;
-		printk(KERN_CRIT PFX "Unable to register misc device\n");
+		pr_crit(PFX "Unable to register misc device\n");
 	}
 
 #ifdef HAVE_DIE_NMI
@@ -1024,9 +1024,8 @@
 
 		rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
 		if (rv) {
-			printk(KERN_WARNING PFX "Error starting timer to"
-			       " test NMI: 0x%x.  The NMI pretimeout will"
-			       " likely not work\n", rv);
+			pr_warn(PFX "Error starting timer to test NMI: 0x%x.  The NMI pretimeout will likely not work\n",
+				rv);
 			rv = 0;
 			goto out_restore;
 		}
@@ -1034,9 +1033,7 @@
 		msleep(1500);
 
 		if (testing_nmi != 2) {
-			printk(KERN_WARNING PFX "IPMI NMI didn't seem to"
-			       " occur.  The NMI pretimeout will"
-			       " likely not work\n");
+			pr_warn(PFX "IPMI NMI didn't seem to occur.  The NMI pretimeout will likely not work\n");
 		}
  out_restore:
 		testing_nmi = 0;
@@ -1052,7 +1049,7 @@
 		start_now = 0; /* Disable this function after first startup. */
 		ipmi_watchdog_state = action_val;
 		ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
-		printk(KERN_INFO PFX "Starting now!\n");
+		pr_info(PFX "Starting now!\n");
 	} else {
 		/* Stop the timer now. */
 		ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
@@ -1063,34 +1060,38 @@
 static void ipmi_unregister_watchdog(int ipmi_intf)
 {
 	int rv;
+	struct ipmi_user *loc_user = watchdog_user;
 
-	if (!watchdog_user)
-		goto out;
+	if (!loc_user)
+		return;
 
 	if (watchdog_ifnum != ipmi_intf)
-		goto out;
+		return;
 
 	/* Make sure no one can call us any more. */
 	misc_deregister(&ipmi_wdog_miscdev);
 
+	watchdog_user = NULL;
+
 	/*
 	 * Wait to make sure the message makes it out.  The lower layer has
 	 * pointers to our buffers, we want to make sure they are done before
 	 * we release our memory.
 	 */
-	while (atomic_read(&set_timeout_tofree))
-		schedule_timeout_uninterruptible(1);
+	while (atomic_read(&msg_tofree))
+		msg_free_smi(NULL);
+
+	mutex_lock(&ipmi_watchdog_mutex);
 
 	/* Disconnect from IPMI. */
-	rv = ipmi_destroy_user(watchdog_user);
-	if (rv) {
-		printk(KERN_WARNING PFX "error unlinking from IPMI: %d\n",
-		       rv);
-	}
-	watchdog_user = NULL;
+	rv = ipmi_destroy_user(loc_user);
+	if (rv)
+		pr_warn(PFX "error unlinking from IPMI: %d\n",  rv);
 
- out:
-	return;
+	/* If it comes back, restart it properly. */
+	ipmi_start_timer_on_heartbeat = 1;
+
+	mutex_unlock(&ipmi_watchdog_mutex);
 }
 
 #ifdef HAVE_DIE_NMI
@@ -1124,7 +1125,7 @@
 		/* On some machines, the heartbeat will give
 		   an error and not work unless we re-enable
 		   the timer.   So do so. */
-		pretimeout_since_last_heartbeat = 1;
+		atomic_set(&pretimeout_since_last_heartbeat, 1);
 		if (atomic_inc_and_test(&preop_panic_excl))
 			nmi_panic(regs, PFX "pre-timeout");
 	}
@@ -1167,36 +1168,6 @@
 	.priority	= 0
 };
 
-static int wdog_panic_handler(struct notifier_block *this,
-			      unsigned long         event,
-			      void                  *unused)
-{
-	static int panic_event_handled;
-
-	/* On a panic, if we have a panic timeout, make sure to extend
-	   the watchdog timer to a reasonable value to complete the
-	   panic, if the watchdog timer is running.  Plus the
-	   pretimeout is meaningless at panic time. */
-	if (watchdog_user && !panic_event_handled &&
-	    ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
-		/* Make sure we do this only once. */
-		panic_event_handled = 1;
-
-		timeout = panic_wdt_timeout;
-		pretimeout = 0;
-		panic_halt_ipmi_set_timeout();
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block wdog_panic_notifier = {
-	.notifier_call	= wdog_panic_handler,
-	.next		= NULL,
-	.priority	= 150	/* priority: INT_MAX >= x >= 0 */
-};
-
-
 static void ipmi_new_smi(int if_num, struct device *device)
 {
 	ipmi_register_watchdog(if_num);
@@ -1288,9 +1259,7 @@
 	if (preaction_val == WDOG_PRETIMEOUT_NMI) {
 		do_nmi = 1;
 		if (preop_val == WDOG_PREOP_GIVE_DATA) {
-			printk(KERN_WARNING PFX "Pretimeout op is to give data"
-			       " but NMI pretimeout is enabled, setting"
-			       " pretimeout op to none\n");
+			pr_warn(PFX "Pretimeout op is to give data but NMI pretimeout is enabled, setting pretimeout op to none\n");
 			preop_op("preop_none", NULL);
 			do_nmi = 0;
 		}
@@ -1299,8 +1268,7 @@
 		rv = register_nmi_handler(NMI_UNKNOWN, ipmi_nmi, 0,
 						"ipmi");
 		if (rv) {
-			printk(KERN_WARNING PFX
-			       "Can't register nmi handler\n");
+			pr_warn(PFX "Can't register nmi handler\n");
 			return;
 		} else
 			nmi_handler_registered = 1;
@@ -1317,27 +1285,24 @@
 
 	if (action_op(action, NULL)) {
 		action_op("reset", NULL);
-		printk(KERN_INFO PFX "Unknown action '%s', defaulting to"
-		       " reset\n", action);
+		pr_info(PFX "Unknown action '%s', defaulting to reset\n",
+			action);
 	}
 
 	if (preaction_op(preaction, NULL)) {
 		preaction_op("pre_none", NULL);
-		printk(KERN_INFO PFX "Unknown preaction '%s', defaulting to"
-		       " none\n", preaction);
+		pr_info(PFX "Unknown preaction '%s', defaulting to none\n",
+			preaction);
 	}
 
 	if (preop_op(preop, NULL)) {
 		preop_op("preop_none", NULL);
-		printk(KERN_INFO PFX "Unknown preop '%s', defaulting to"
-		       " none\n", preop);
+		pr_info(PFX "Unknown preop '%s', defaulting to none\n", preop);
 	}
 
 	check_parms();
 
 	register_reboot_notifier(&wdog_reboot_notifier);
-	atomic_notifier_chain_register(&panic_notifier_list,
-			&wdog_panic_notifier);
 
 	rv = ipmi_smi_watcher_register(&smi_watcher);
 	if (rv) {
@@ -1345,14 +1310,12 @@
 		if (nmi_handler_registered)
 			unregister_nmi_handler(NMI_UNKNOWN, "ipmi");
 #endif
-		atomic_notifier_chain_unregister(&panic_notifier_list,
-						 &wdog_panic_notifier);
 		unregister_reboot_notifier(&wdog_reboot_notifier);
-		printk(KERN_WARNING PFX "can't register smi watcher\n");
+		pr_warn(PFX "can't register smi watcher\n");
 		return rv;
 	}
 
-	printk(KERN_INFO PFX "driver initialized\n");
+	pr_info(PFX "driver initialized\n");
 
 	return 0;
 }
@@ -1367,8 +1330,6 @@
 		unregister_nmi_handler(NMI_UNKNOWN, "ipmi");
 #endif
 
-	atomic_notifier_chain_unregister(&panic_notifier_list,
-					 &wdog_panic_notifier);
 	unregister_reboot_notifier(&wdog_reboot_notifier);
 }
 module_exit(ipmi_wdog_exit);
diff --git a/drivers/char/ipmi/kcs_bmc_npcm7xx.c b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
new file mode 100644
index 0000000..722f739
--- /dev/null
+++ b/drivers/char/ipmi/kcs_bmc_npcm7xx.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Nuvoton Corporation.
+ * Copyright (c) 2018, Intel Corporation.
+ */
+
+#define pr_fmt(fmt) "nuvoton-kcs-bmc: " fmt
+
+#include <linux/atomic.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include "kcs_bmc.h"
+
+#define DEVICE_NAME	"npcm-kcs-bmc"
+#define KCS_CHANNEL_MAX	3
+
+#define KCS1ST		0x0C
+#define KCS2ST		0x1E
+#define KCS3ST		0x30
+
+#define KCS1DO		0x0E
+#define KCS2DO		0x20
+#define KCS3DO		0x32
+
+#define KCS1DI		0x10
+#define KCS2DI		0x22
+#define KCS3DI		0x34
+
+#define KCS1CTL		0x18
+#define KCS2CTL		0x2A
+#define KCS3CTL		0x3C
+#define    KCS_CTL_IBFIE	BIT(0)
+
+#define KCS1IE		0x1C
+#define KCS2IE		0x2E
+#define KCS3IE		0x40
+#define    KCS_IE_IRQE          BIT(0)
+#define    KCS_IE_HIRQE         BIT(3)
+
+/*
+ * 7.2.4 Core KCS Registers
+ * Registers in this module are 8 bits. An 8-bit register must be accessed
+ * by an 8-bit read or write.
+ *
+ * sts: KCS Channel n Status Register (KCSnST).
+ * dob: KCS Channel n Data Out Buffer Register (KCSnDO).
+ * dib: KCS Channel n Data In Buffer Register (KCSnDI).
+ * ctl: KCS Channel n Control Register (KCSnCTL).
+ * ie : KCS Channel n  Interrupt Enable Register (KCSnIE).
+ */
+struct npcm7xx_kcs_reg {
+	u32 sts;
+	u32 dob;
+	u32 dib;
+	u32 ctl;
+	u32 ie;
+};
+
+struct npcm7xx_kcs_bmc {
+	struct regmap *map;
+
+	const struct npcm7xx_kcs_reg *reg;
+};
+
+static const struct npcm7xx_kcs_reg npcm7xx_kcs_reg_tbl[KCS_CHANNEL_MAX] = {
+	{ .sts = KCS1ST, .dob = KCS1DO, .dib = KCS1DI, .ctl = KCS1CTL, .ie = KCS1IE },
+	{ .sts = KCS2ST, .dob = KCS2DO, .dib = KCS2DI, .ctl = KCS2CTL, .ie = KCS2IE },
+	{ .sts = KCS3ST, .dob = KCS3DO, .dib = KCS3DI, .ctl = KCS3CTL, .ie = KCS3IE },
+};
+
+static u8 npcm7xx_kcs_inb(struct kcs_bmc *kcs_bmc, u32 reg)
+{
+	struct npcm7xx_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	u32 val = 0;
+	int rc;
+
+	rc = regmap_read(priv->map, reg, &val);
+	WARN(rc != 0, "regmap_read() failed: %d\n", rc);
+
+	return rc == 0 ? (u8)val : 0;
+}
+
+static void npcm7xx_kcs_outb(struct kcs_bmc *kcs_bmc, u32 reg, u8 data)
+{
+	struct npcm7xx_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+	int rc;
+
+	rc = regmap_write(priv->map, reg, data);
+	WARN(rc != 0, "regmap_write() failed: %d\n", rc);
+}
+
+static void npcm7xx_kcs_enable_channel(struct kcs_bmc *kcs_bmc, bool enable)
+{
+	struct npcm7xx_kcs_bmc *priv = kcs_bmc_priv(kcs_bmc);
+
+	regmap_update_bits(priv->map, priv->reg->ctl, KCS_CTL_IBFIE,
+			   enable ? KCS_CTL_IBFIE : 0);
+
+	regmap_update_bits(priv->map, priv->reg->ie, KCS_IE_IRQE | KCS_IE_HIRQE,
+			   enable ? KCS_IE_IRQE | KCS_IE_HIRQE : 0);
+}
+
+static irqreturn_t npcm7xx_kcs_irq(int irq, void *arg)
+{
+	struct kcs_bmc *kcs_bmc = arg;
+
+	if (!kcs_bmc_handle_event(kcs_bmc))
+		return IRQ_HANDLED;
+
+	return IRQ_NONE;
+}
+
+static int npcm7xx_kcs_config_irq(struct kcs_bmc *kcs_bmc,
+				  struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	int irq;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	return devm_request_irq(dev, irq, npcm7xx_kcs_irq, IRQF_SHARED,
+				dev_name(dev), kcs_bmc);
+}
+
+static int npcm7xx_kcs_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct npcm7xx_kcs_bmc *priv;
+	struct kcs_bmc *kcs_bmc;
+	u32 chan;
+	int rc;
+
+	rc = of_property_read_u32(dev->of_node, "kcs_chan", &chan);
+	if (rc != 0 || chan == 0 || chan > KCS_CHANNEL_MAX) {
+		dev_err(dev, "no valid 'kcs_chan' configured\n");
+		return -ENODEV;
+	}
+
+	kcs_bmc = kcs_bmc_alloc(dev, sizeof(*priv), chan);
+	if (!kcs_bmc)
+		return -ENOMEM;
+
+	priv = kcs_bmc_priv(kcs_bmc);
+	priv->map = syscon_node_to_regmap(dev->parent->of_node);
+	if (IS_ERR(priv->map)) {
+		dev_err(dev, "Couldn't get regmap\n");
+		return -ENODEV;
+	}
+	priv->reg = &npcm7xx_kcs_reg_tbl[chan - 1];
+
+	kcs_bmc->ioreg.idr = priv->reg->dib;
+	kcs_bmc->ioreg.odr = priv->reg->dob;
+	kcs_bmc->ioreg.str = priv->reg->sts;
+	kcs_bmc->io_inputb = npcm7xx_kcs_inb;
+	kcs_bmc->io_outputb = npcm7xx_kcs_outb;
+
+	dev_set_drvdata(dev, kcs_bmc);
+
+	npcm7xx_kcs_enable_channel(kcs_bmc, true);
+	rc = npcm7xx_kcs_config_irq(kcs_bmc, pdev);
+	if (rc)
+		return rc;
+
+	rc = misc_register(&kcs_bmc->miscdev);
+	if (rc) {
+		dev_err(dev, "Unable to register device\n");
+		return rc;
+	}
+
+	pr_info("channel=%u idr=0x%x odr=0x%x str=0x%x\n",
+		chan,
+		kcs_bmc->ioreg.idr, kcs_bmc->ioreg.odr, kcs_bmc->ioreg.str);
+
+	return 0;
+}
+
+static int npcm7xx_kcs_remove(struct platform_device *pdev)
+{
+	struct kcs_bmc *kcs_bmc = dev_get_drvdata(&pdev->dev);
+
+	misc_deregister(&kcs_bmc->miscdev);
+
+	return 0;
+}
+
+static const struct of_device_id npcm_kcs_bmc_match[] = {
+	{ .compatible = "nuvoton,npcm750-kcs-bmc" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, npcm_kcs_bmc_match);
+
+static struct platform_driver npcm_kcs_bmc_driver = {
+	.driver = {
+		.name		= DEVICE_NAME,
+		.of_match_table	= npcm_kcs_bmc_match,
+	},
+	.probe	= npcm7xx_kcs_probe,
+	.remove	= npcm7xx_kcs_remove,
+};
+module_platform_driver(npcm_kcs_bmc_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Avi Fishman <avifishman70@gmail.com>");
+MODULE_AUTHOR("Haiyue Wang <haiyue.wang@linux.intel.com>");
+MODULE_DESCRIPTION("NPCM7xx device interface to the KCS BMC device");
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 1bb9e7c..53cfe57 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -95,19 +95,6 @@
 	.stop  = misc_seq_stop,
 	.show  = misc_seq_show,
 };
-
-static int misc_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &misc_seq_ops);
-}
-
-static const struct file_operations misc_proc_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = misc_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
 #endif
 
 static int misc_open(struct inode *inode, struct file *file)
@@ -282,7 +269,7 @@
 	int err;
 	struct proc_dir_entry *ret;
 
-	ret = proc_create("misc", 0, NULL, &misc_proc_fops);
+	ret = proc_create_seq("misc", 0, NULL, &misc_seq_ops);
 	misc_class = class_create(THIS_MODULE, "misc");
 	err = PTR_ERR(misc_class);
 	if (IS_ERR(misc_class))
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index 7b75669..058876b 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -191,7 +191,7 @@
  *
  * Creates a mspec page and maps it to user space.
  */
-static int
+static vm_fault_t
 mspec_fault(struct vm_fault *vmf)
 {
 	unsigned long paddr, maddr;
@@ -223,14 +223,7 @@
 
 	pfn = paddr >> PAGE_SHIFT;
 
-	/*
-	 * vm_insert_pfn can fail with -EBUSY, but in that case it will
-	 * be because another thread has installed the pte first, so it
-	 * is no problem.
-	 */
-	vm_insert_pfn(vmf->vma, vmf->address, pfn);
-
-	return VM_FAULT_NOPAGE;
+	return vmf_insert_pfn(vmf->vma, vmf->address, pfn);
 }
 
 static const struct vm_operations_struct mspec_vm_ops = {
diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index 678fa97..25264d6 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -389,22 +389,9 @@
 	return 0;
 }
 
-static int nvram_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, nvram_proc_read, NULL);
-}
-
-static const struct file_operations nvram_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= nvram_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int nvram_add_proc_fs(void)
 {
-	if (!proc_create("driver/nvram", 0, NULL, &nvram_proc_fops))
+	if (!proc_create_single("driver/nvram", 0, NULL, nvram_proc_read))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index aa502e9..66b0419 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -2616,19 +2616,6 @@
 	return 0;
 }
 
-static int mgslpc_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mgslpc_proc_show, NULL);
-}
-
-static const struct file_operations mgslpc_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= mgslpc_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int rx_alloc_buffers(MGSLPC_INFO *info)
 {
 	/* each buffer has header and data */
@@ -2815,7 +2802,7 @@
 	.tiocmget = tiocmget,
 	.tiocmset = tiocmset,
 	.get_icount = mgslpc_get_icount,
-	.proc_fops = &mgslpc_proc_fops,
+	.proc_show = mgslpc_proc_show,
 };
 
 static int __init synclink_cs_init(void)
diff --git a/drivers/char/random.c b/drivers/char/random.c
index cd888d4..a8fb002 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -402,8 +402,7 @@
 /*
  * Static global variables
  */
-static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
-static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
+static DECLARE_WAIT_QUEUE_HEAD(random_wait);
 static struct fasync_struct *fasync;
 
 static DEFINE_SPINLOCK(random_ready_list_lock);
@@ -722,8 +721,8 @@
 
 		/* should we wake readers? */
 		if (entropy_bits >= random_read_wakeup_bits &&
-		    wq_has_sleeper(&random_read_wait)) {
-			wake_up_interruptible(&random_read_wait);
+		    wq_has_sleeper(&random_wait)) {
+			wake_up_interruptible_poll(&random_wait, POLLIN);
 			kill_fasync(&fasync, SIGIO, POLL_IN);
 		}
 		/* If the input pool is getting full, send some
@@ -1397,7 +1396,7 @@
 	trace_debit_entropy(r->name, 8 * ibytes);
 	if (ibytes &&
 	    (r->entropy_count >> ENTROPY_SHIFT) < random_write_wakeup_bits) {
-		wake_up_interruptible(&random_write_wait);
+		wake_up_interruptible_poll(&random_wait, POLLOUT);
 		kill_fasync(&fasync, SIGIO, POLL_OUT);
 	}
 
@@ -1839,7 +1838,7 @@
 		if (nonblock)
 			return -EAGAIN;
 
-		wait_event_interruptible(random_read_wait,
+		wait_event_interruptible(random_wait,
 			ENTROPY_BITS(&input_pool) >=
 			random_read_wakeup_bits);
 		if (signal_pending(current))
@@ -1876,14 +1875,17 @@
 	return ret;
 }
 
-static __poll_t
-random_poll(struct file *file, poll_table * wait)
+static struct wait_queue_head *
+random_get_poll_head(struct file *file, __poll_t events)
 {
-	__poll_t mask;
+	return &random_wait;
+}
 
-	poll_wait(file, &random_read_wait, wait);
-	poll_wait(file, &random_write_wait, wait);
-	mask = 0;
+static __poll_t
+random_poll_mask(struct file *file, __poll_t events)
+{
+	__poll_t mask = 0;
+
 	if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits)
 		mask |= EPOLLIN | EPOLLRDNORM;
 	if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits)
@@ -1990,7 +1992,8 @@
 const struct file_operations random_fops = {
 	.read  = random_read,
 	.write = random_write,
-	.poll  = random_poll,
+	.get_poll_head  = random_get_poll_head,
+	.poll_mask  = random_poll_mask,
 	.unlocked_ioctl = random_ioctl,
 	.fasync = random_fasync,
 	.llseek = noop_llseek,
@@ -2323,7 +2326,7 @@
 	 * We'll be woken up again once below random_write_wakeup_thresh,
 	 * or when the calling thread is about to terminate.
 	 */
-	wait_event_interruptible(random_write_wait, kthread_should_stop() ||
+	wait_event_interruptible(random_wait, kthread_should_stop() ||
 			ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
 	mix_pool_bytes(poolp, buffer, count);
 	credit_entropy_bits(poolp, entropy);
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 57dc546..94fedee 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -171,7 +171,7 @@
 #endif
 
 #ifdef CONFIG_PROC_FS
-static int rtc_proc_open(struct inode *inode, struct file *file);
+static int rtc_proc_show(struct seq_file *seq, void *v);
 #endif
 
 /*
@@ -832,16 +832,6 @@
 	.fops		= &rtc_fops,
 };
 
-#ifdef CONFIG_PROC_FS
-static const struct file_operations rtc_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= rtc_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif
-
 static resource_size_t rtc_size;
 
 static struct resource * __init rtc_request_region(resource_size_t size)
@@ -982,7 +972,7 @@
 	}
 
 #ifdef CONFIG_PROC_FS
-	ent = proc_create("driver/rtc", 0, NULL, &rtc_proc_fops);
+	ent = proc_create_single("driver/rtc", 0, NULL, rtc_proc_show);
 	if (!ent)
 		printk(KERN_WARNING "rtc: Failed to register with procfs.\n");
 #endif
@@ -1201,11 +1191,6 @@
 #undef YN
 #undef NY
 }
-
-static int rtc_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, rtc_proc_show, NULL);
-}
 #endif
 
 static void rtc_get_rtc_time(struct rtc_time *rtc_tm)
diff --git a/drivers/char/toshiba.c b/drivers/char/toshiba.c
index 5488516..802376f 100644
--- a/drivers/char/toshiba.c
+++ b/drivers/char/toshiba.c
@@ -326,19 +326,6 @@
 		key);
 	return 0;
 }
-
-static int proc_toshiba_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_toshiba_show, NULL);
-}
-
-static const struct file_operations proc_toshiba_fops = {
-	.owner		= THIS_MODULE,
-	.open		= proc_toshiba_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
 
 
@@ -524,7 +511,7 @@
 	{
 		struct proc_dir_entry *pde;
 
-		pde = proc_create("toshiba", 0, NULL, &proc_toshiba_fops);
+		pde = proc_create_single("toshiba", 0, NULL, proc_toshiba_show);
 		if (!pde) {
 			misc_deregister(&tosh_device);
 			return -ENOMEM;
diff --git a/drivers/clk/bcm/clk-bcm2835-aux.c b/drivers/clk/bcm/clk-bcm2835-aux.c
index 77e276d..f225ad2 100644
--- a/drivers/clk/bcm/clk-bcm2835-aux.c
+++ b/drivers/clk/bcm/clk-bcm2835-aux.c
@@ -40,8 +40,10 @@
 	if (IS_ERR(reg))
 		return PTR_ERR(reg);
 
-	onecell = devm_kmalloc(dev, sizeof(*onecell) + sizeof(*onecell->hws) *
-			       BCM2835_AUX_CLOCK_COUNT, GFP_KERNEL);
+	onecell = devm_kmalloc(dev,
+			       struct_size(onecell, hws,
+					   BCM2835_AUX_CLOCK_COUNT),
+			       GFP_KERNEL);
 	if (!onecell)
 		return -ENOMEM;
 	onecell->num = BCM2835_AUX_CLOCK_COUNT;
diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index fa0d5c8..6d4e69e 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -2147,8 +2147,8 @@
 	size_t i;
 	int ret;
 
-	cprman = devm_kzalloc(dev, sizeof(*cprman) +
-			      sizeof(*cprman->onecell.hws) * asize,
+	cprman = devm_kzalloc(dev,
+			      struct_size(cprman, onecell.hws, asize),
 			      GFP_KERNEL);
 	if (!cprman)
 		return -ENOMEM;
diff --git a/drivers/clk/bcm/clk-iproc-asiu.c b/drivers/clk/bcm/clk-iproc-asiu.c
index 4360e48..6fb8af5 100644
--- a/drivers/clk/bcm/clk-iproc-asiu.c
+++ b/drivers/clk/bcm/clk-iproc-asiu.c
@@ -197,8 +197,8 @@
 	if (WARN_ON(!asiu))
 		return;
 
-	asiu->clk_data = kzalloc(sizeof(*asiu->clk_data->hws) * num_clks +
-				 sizeof(*asiu->clk_data), GFP_KERNEL);
+	asiu->clk_data = kzalloc(struct_size(asiu->clk_data, hws, num_clks),
+				 GFP_KERNEL);
 	if (WARN_ON(!asiu->clk_data))
 		goto err_clks;
 	asiu->clk_data->num = num_clks;
diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c
index 43a58ae..274441e 100644
--- a/drivers/clk/bcm/clk-iproc-pll.c
+++ b/drivers/clk/bcm/clk-iproc-pll.c
@@ -744,8 +744,7 @@
 	if (WARN_ON(!pll))
 		return;
 
-	clk_data = kzalloc(sizeof(*clk_data->hws) * num_clks +
-				sizeof(*clk_data), GFP_KERNEL);
+	clk_data = kzalloc(struct_size(clk_data, hws, num_clks), GFP_KERNEL);
 	if (WARN_ON(!clk_data))
 		goto err_clk_data;
 	clk_data->num = num_clks;
diff --git a/drivers/clk/berlin/bg2.c b/drivers/clk/berlin/bg2.c
index e7331ac..45fb888 100644
--- a/drivers/clk/berlin/bg2.c
+++ b/drivers/clk/berlin/bg2.c
@@ -509,8 +509,7 @@
 	u8 avpll_flags = 0;
 	int n, ret;
 
-	clk_data = kzalloc(sizeof(*clk_data) +
-			   sizeof(*clk_data->hws) * MAX_CLKS, GFP_KERNEL);
+	clk_data = kzalloc(struct_size(clk_data, hws, MAX_CLKS), GFP_KERNEL);
 	if (!clk_data)
 		return;
 	clk_data->num = MAX_CLKS;
diff --git a/drivers/clk/berlin/bg2q.c b/drivers/clk/berlin/bg2q.c
index 67c270b..db7364e 100644
--- a/drivers/clk/berlin/bg2q.c
+++ b/drivers/clk/berlin/bg2q.c
@@ -295,8 +295,7 @@
 	struct clk_hw **hws;
 	int n, ret;
 
-	clk_data = kzalloc(sizeof(*clk_data) +
-			   sizeof(*clk_data->hws) * MAX_CLKS, GFP_KERNEL);
+	clk_data = kzalloc(struct_size(clk_data, hws, MAX_CLKS), GFP_KERNEL);
 	if (!clk_data)
 		return;
 	clk_data->num = MAX_CLKS;
diff --git a/drivers/clk/clk-asm9260.c b/drivers/clk/clk-asm9260.c
index bf0582c..44b5441 100644
--- a/drivers/clk/clk-asm9260.c
+++ b/drivers/clk/clk-asm9260.c
@@ -273,8 +273,7 @@
 	int n;
 	u32 accuracy = 0;
 
-	clk_data = kzalloc(sizeof(*clk_data) +
-			   sizeof(*clk_data->hws) * MAX_CLKS, GFP_KERNEL);
+	clk_data = kzalloc(struct_size(clk_data, hws, MAX_CLKS), GFP_KERNEL);
 	if (!clk_data)
 		return;
 	clk_data->num = MAX_CLKS;
diff --git a/drivers/clk/clk-aspeed.c b/drivers/clk/clk-aspeed.c
index 5eb50c3..7abe423 100644
--- a/drivers/clk/clk-aspeed.c
+++ b/drivers/clk/clk-aspeed.c
@@ -627,9 +627,9 @@
 	if (!scu_base)
 		return;
 
-	aspeed_clk_data = kzalloc(sizeof(*aspeed_clk_data) +
-			sizeof(*aspeed_clk_data->hws) * ASPEED_NUM_CLKS,
-			GFP_KERNEL);
+	aspeed_clk_data = kzalloc(struct_size(aspeed_clk_data, hws,
+					      ASPEED_NUM_CLKS),
+				  GFP_KERNEL);
 	if (!aspeed_clk_data)
 		return;
 
diff --git a/drivers/clk/clk-clps711x.c b/drivers/clk/clk-clps711x.c
index 9193f64..2c04396 100644
--- a/drivers/clk/clk-clps711x.c
+++ b/drivers/clk/clk-clps711x.c
@@ -54,9 +54,9 @@
 	if (!base)
 		return ERR_PTR(-ENOMEM);
 
-	clps711x_clk = kzalloc(sizeof(*clps711x_clk) +
-			sizeof(*clps711x_clk->clk_data.hws) * CLPS711X_CLK_MAX,
-			GFP_KERNEL);
+	clps711x_clk = kzalloc(struct_size(clps711x_clk, clk_data.hws,
+					   CLPS711X_CLK_MAX),
+			       GFP_KERNEL);
 	if (!clps711x_clk)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/clk/clk-efm32gg.c b/drivers/clk/clk-efm32gg.c
index f674778..f37cf08 100644
--- a/drivers/clk/clk-efm32gg.c
+++ b/drivers/clk/clk-efm32gg.c
@@ -25,8 +25,8 @@
 	void __iomem *base;
 	struct clk_hw **hws;
 
-	clk_data = kzalloc(sizeof(*clk_data) +
-			   sizeof(*clk_data->hws) * CMU_MAX_CLKS, GFP_KERNEL);
+	clk_data = kzalloc(struct_size(clk_data, hws, CMU_MAX_CLKS),
+			   GFP_KERNEL);
 
 	if (!clk_data)
 		return;
diff --git a/drivers/clk/clk-gemini.c b/drivers/clk/clk-gemini.c
index 5e66e6c..b51069e 100644
--- a/drivers/clk/clk-gemini.c
+++ b/drivers/clk/clk-gemini.c
@@ -399,9 +399,9 @@
 	int ret;
 	int i;
 
-	gemini_clk_data = kzalloc(sizeof(*gemini_clk_data) +
-			sizeof(*gemini_clk_data->hws) * GEMINI_NUM_CLKS,
-			GFP_KERNEL);
+	gemini_clk_data = kzalloc(struct_size(gemini_clk_data, hws,
+					      GEMINI_NUM_CLKS),
+				  GFP_KERNEL);
 	if (!gemini_clk_data)
 		return;
 
diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c
index fbaa84a..d44e0ee 100644
--- a/drivers/clk/clk-s2mps11.c
+++ b/drivers/clk/clk-s2mps11.c
@@ -147,8 +147,8 @@
 	if (!s2mps11_clks)
 		return -ENOMEM;
 
-	clk_data = devm_kzalloc(&pdev->dev, sizeof(*clk_data) +
-				sizeof(*clk_data->hws) * S2MPS11_CLKS_NUM,
+	clk_data = devm_kzalloc(&pdev->dev,
+				struct_size(clk_data, hws, S2MPS11_CLKS_NUM),
 				GFP_KERNEL);
 	if (!clk_data)
 		return -ENOMEM;
diff --git a/drivers/clk/clk-scmi.c b/drivers/clk/clk-scmi.c
index 488c213..bb2a6f2 100644
--- a/drivers/clk/clk-scmi.c
+++ b/drivers/clk/clk-scmi.c
@@ -137,8 +137,8 @@
 		return -EINVAL;
 	}
 
-	clk_data = devm_kzalloc(dev, sizeof(*clk_data) +
-				sizeof(*clk_data->hws) * count, GFP_KERNEL);
+	clk_data = devm_kzalloc(dev, struct_size(clk_data, hws, count),
+				GFP_KERNEL);
 	if (!clk_data)
 		return -ENOMEM;
 
diff --git a/drivers/clk/clk-stm32h7.c b/drivers/clk/clk-stm32h7.c
index db2b162..d3271ec 100644
--- a/drivers/clk/clk-stm32h7.c
+++ b/drivers/clk/clk-stm32h7.c
@@ -1201,9 +1201,8 @@
 	const char *hse_clk, *lse_clk, *i2s_clk;
 	struct regmap *pdrm;
 
-	clk_data = kzalloc(sizeof(*clk_data) +
-			sizeof(*clk_data->hws) * STM32H7_MAX_CLKS,
-			GFP_KERNEL);
+	clk_data = kzalloc(struct_size(clk_data, hws, STM32H7_MAX_CLKS),
+			   GFP_KERNEL);
 	if (!clk_data)
 		return;
 
diff --git a/drivers/clk/clk-stm32mp1.c b/drivers/clk/clk-stm32mp1.c
index edd3cf4..83e8cd81 100644
--- a/drivers/clk/clk-stm32mp1.c
+++ b/drivers/clk/clk-stm32mp1.c
@@ -2060,9 +2060,8 @@
 
 	max_binding =  data->maxbinding;
 
-	clk_data = kzalloc(sizeof(*clk_data) +
-				  sizeof(*clk_data->hws) * max_binding,
-				  GFP_KERNEL);
+	clk_data = kzalloc(struct_size(clk_data, hws, max_binding),
+			   GFP_KERNEL);
 	if (!clk_data)
 		return -ENOMEM;
 
diff --git a/drivers/clk/davinci/da8xx-cfgchip.c b/drivers/clk/davinci/da8xx-cfgchip.c
index c971111..aae62a5 100644
--- a/drivers/clk/davinci/da8xx-cfgchip.c
+++ b/drivers/clk/davinci/da8xx-cfgchip.c
@@ -650,8 +650,8 @@
 	struct da8xx_usb0_clk48 *usb0;
 	struct da8xx_usb1_clk48 *usb1;
 
-	clk_data = devm_kzalloc(dev, sizeof(*clk_data) + 2 *
-				sizeof(*clk_data->hws), GFP_KERNEL);
+	clk_data = devm_kzalloc(dev, struct_size(clk_data, hws, 2),
+				GFP_KERNEL);
 	if (!clk_data)
 		return -ENOMEM;
 
diff --git a/drivers/clk/mvebu/armada-37xx-periph.c b/drivers/clk/mvebu/armada-37xx-periph.c
index 87213ea..6860bd5 100644
--- a/drivers/clk/mvebu/armada-37xx-periph.c
+++ b/drivers/clk/mvebu/armada-37xx-periph.c
@@ -667,9 +667,10 @@
 	if (!driver_data)
 		return -ENOMEM;
 
-	driver_data->hw_data = devm_kzalloc(dev, sizeof(*driver_data->hw_data) +
-			    sizeof(*driver_data->hw_data->hws) * num_periph,
-			    GFP_KERNEL);
+	driver_data->hw_data = devm_kzalloc(dev,
+					    struct_size(driver_data->hw_data,
+							hws, num_periph),
+					    GFP_KERNEL);
 	if (!driver_data->hw_data)
 		return -ENOMEM;
 	driver_data->hw_data->num = num_periph;
diff --git a/drivers/clk/mvebu/armada-37xx-tbg.c b/drivers/clk/mvebu/armada-37xx-tbg.c
index aa80db1..7ff041f 100644
--- a/drivers/clk/mvebu/armada-37xx-tbg.c
+++ b/drivers/clk/mvebu/armada-37xx-tbg.c
@@ -91,8 +91,8 @@
 	void __iomem *reg;
 	int i, ret;
 
-	hw_tbg_data = devm_kzalloc(&pdev->dev, sizeof(*hw_tbg_data)
-				   + sizeof(*hw_tbg_data->hws) * NUM_TBG,
+	hw_tbg_data = devm_kzalloc(&pdev->dev,
+				   struct_size(hw_tbg_data, hws, NUM_TBG),
 				   GFP_KERNEL);
 	if (!hw_tbg_data)
 		return -ENOMEM;
diff --git a/drivers/clk/qcom/clk-spmi-pmic-div.c b/drivers/clk/qcom/clk-spmi-pmic-div.c
index 8672ab8..c90dfdd 100644
--- a/drivers/clk/qcom/clk-spmi-pmic-div.c
+++ b/drivers/clk/qcom/clk-spmi-pmic-div.c
@@ -239,8 +239,7 @@
 	if (!nclks)
 		return -EINVAL;
 
-	cc = devm_kzalloc(dev, sizeof(*cc) + sizeof(*cc->clks) * nclks,
-			  GFP_KERNEL);
+	cc = devm_kzalloc(dev, struct_size(cc, clks, nclks), GFP_KERNEL);
 	if (!cc)
 		return -ENOMEM;
 	cc->nclks = nclks;
diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c
index 4e88e98..69a7c75 100644
--- a/drivers/clk/renesas/renesas-cpg-mssr.c
+++ b/drivers/clk/renesas/renesas-cpg-mssr.c
@@ -258,8 +258,9 @@
 		dev_err(dev, "Cannot get %s clock %u: %ld", type, clkidx,
 		       PTR_ERR(clk));
 	else
-		dev_dbg(dev, "clock (%u, %u) is %pC at %pCr Hz\n",
-			clkspec->args[0], clkspec->args[1], clk, clk);
+		dev_dbg(dev, "clock (%u, %u) is %pC at %lu Hz\n",
+			clkspec->args[0], clkspec->args[1], clk,
+			clk_get_rate(clk));
 	return clk;
 }
 
@@ -326,7 +327,7 @@
 	if (IS_ERR_OR_NULL(clk))
 		goto fail;
 
-	dev_dbg(dev, "Core clock %pC at %pCr Hz\n", clk, clk);
+	dev_dbg(dev, "Core clock %pC at %lu Hz\n", clk, clk_get_rate(clk));
 	priv->clks[id] = clk;
 	return;
 
@@ -392,7 +393,7 @@
 	if (IS_ERR(clk))
 		goto fail;
 
-	dev_dbg(dev, "Module clock %pC at %pCr Hz\n", clk, clk);
+	dev_dbg(dev, "Module clock %pC at %lu Hz\n", clk, clk_get_rate(clk));
 	priv->clks[id] = clk;
 	priv->smstpcr_saved[clock->index / 32].mask |= BIT(clock->index % 32);
 	return;
diff --git a/drivers/clk/samsung/clk-exynos-audss.c b/drivers/clk/samsung/clk-exynos-audss.c
index b4b057c..f659c5c 100644
--- a/drivers/clk/samsung/clk-exynos-audss.c
+++ b/drivers/clk/samsung/clk-exynos-audss.c
@@ -149,8 +149,8 @@
 	epll = ERR_PTR(-ENODEV);
 
 	clk_data = devm_kzalloc(dev,
-				sizeof(*clk_data) +
-				sizeof(*clk_data->hws) * EXYNOS_AUDSS_MAX_CLKS,
+				struct_size(clk_data, hws,
+					    EXYNOS_AUDSS_MAX_CLKS),
 				GFP_KERNEL);
 	if (!clk_data)
 		return -ENOMEM;
diff --git a/drivers/clk/samsung/clk-exynos-clkout.c b/drivers/clk/samsung/clk-exynos-clkout.c
index f29fb58..9c95390 100644
--- a/drivers/clk/samsung/clk-exynos-clkout.c
+++ b/drivers/clk/samsung/clk-exynos-clkout.c
@@ -61,8 +61,7 @@
 	int ret;
 	int i;
 
-	clkout = kzalloc(sizeof(*clkout) +
-			 sizeof(*clkout->data.hws) * EXYNOS_CLKOUT_NR_CLKS,
+	clkout = kzalloc(struct_size(clkout, data.hws, EXYNOS_CLKOUT_NR_CLKS),
 			 GFP_KERNEL);
 	if (!clkout)
 		return;
diff --git a/drivers/clk/samsung/clk-exynos5433.c b/drivers/clk/samsung/clk-exynos5433.c
index 5305ace..162de44 100644
--- a/drivers/clk/samsung/clk-exynos5433.c
+++ b/drivers/clk/samsung/clk-exynos5433.c
@@ -5505,8 +5505,8 @@
 
 	info = of_device_get_match_data(dev);
 
-	data = devm_kzalloc(dev, sizeof(*data) +
-			    sizeof(*data->ctx.clk_data.hws) * info->nr_clk_ids,
+	data = devm_kzalloc(dev,
+			    struct_size(data, ctx.clk_data.hws, info->nr_clk_ids),
 			    GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
diff --git a/drivers/clk/samsung/clk-s3c2410-dclk.c b/drivers/clk/samsung/clk-s3c2410-dclk.c
index 077df3e..66a9047 100644
--- a/drivers/clk/samsung/clk-s3c2410-dclk.c
+++ b/drivers/clk/samsung/clk-s3c2410-dclk.c
@@ -247,9 +247,10 @@
 	struct clk_hw **clk_table;
 	int ret, i;
 
-	s3c24xx_dclk = devm_kzalloc(&pdev->dev, sizeof(*s3c24xx_dclk) +
-			    sizeof(*s3c24xx_dclk->clk_data.hws) * DCLK_MAX_CLKS,
-			    GFP_KERNEL);
+	s3c24xx_dclk = devm_kzalloc(&pdev->dev,
+				    struct_size(s3c24xx_dclk, clk_data.hws,
+						DCLK_MAX_CLKS),
+				    GFP_KERNEL);
 	if (!s3c24xx_dclk)
 		return -ENOMEM;
 
diff --git a/drivers/clk/samsung/clk-s5pv210-audss.c b/drivers/clk/samsung/clk-s5pv210-audss.c
index b964141..22b18e7 100644
--- a/drivers/clk/samsung/clk-s5pv210-audss.c
+++ b/drivers/clk/samsung/clk-s5pv210-audss.c
@@ -81,8 +81,7 @@
 	}
 
 	clk_data = devm_kzalloc(&pdev->dev,
-				sizeof(*clk_data) +
-				sizeof(*clk_data->hws) * AUDSS_MAX_CLKS,
+				struct_size(clk_data, hws, AUDSS_MAX_CLKS),
 				GFP_KERNEL);
 
 	if (!clk_data)
diff --git a/drivers/clk/x86/Makefile b/drivers/clk/x86/Makefile
index 1367afb..00303bc 100644
--- a/drivers/clk/x86/Makefile
+++ b/drivers/clk/x86/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_PMC_ATOM)		+= clk-pmc-atom.o
+obj-$(CONFIG_X86_AMD_PLATFORM_DEVICE)	+= clk-st.o
 clk-x86-lpss-objs		:= clk-lpt.o
 obj-$(CONFIG_X86_INTEL_LPSS)	+= clk-x86-lpss.o
-obj-$(CONFIG_PMC_ATOM)		+= clk-pmc-atom.o
diff --git a/drivers/clk/x86/clk-st.c b/drivers/clk/x86/clk-st.c
new file mode 100644
index 0000000..fb62f39
--- /dev/null
+++ b/drivers/clk/x86/clk-st.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: MIT
+/*
+ * clock framework for AMD Stoney based clocks
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/platform_data/clk-st.h>
+#include <linux/platform_device.h>
+
+/* Clock Driving Strength 2 register */
+#define CLKDRVSTR2	0x28
+/* Clock Control 1 register */
+#define MISCCLKCNTL1	0x40
+/* Auxiliary clock1 enable bit */
+#define OSCCLKENB	2
+/* 25Mhz auxiliary output clock freq bit */
+#define OSCOUT1CLK25MHZ	16
+
+#define ST_CLK_48M	0
+#define ST_CLK_25M	1
+#define ST_CLK_MUX	2
+#define ST_CLK_GATE	3
+#define ST_MAX_CLKS	4
+
+static const char * const clk_oscout1_parents[] = { "clk48MHz", "clk25MHz" };
+static struct clk_hw *hws[ST_MAX_CLKS];
+
+static int st_clk_probe(struct platform_device *pdev)
+{
+	struct st_clk_data *st_data;
+
+	st_data = dev_get_platdata(&pdev->dev);
+	if (!st_data || !st_data->base)
+		return -EINVAL;
+
+	hws[ST_CLK_48M] = clk_hw_register_fixed_rate(NULL, "clk48MHz", NULL, 0,
+						     48000000);
+	hws[ST_CLK_25M] = clk_hw_register_fixed_rate(NULL, "clk25MHz", NULL, 0,
+						     25000000);
+
+	hws[ST_CLK_MUX] = clk_hw_register_mux(NULL, "oscout1_mux",
+		clk_oscout1_parents, ARRAY_SIZE(clk_oscout1_parents),
+		0, st_data->base + CLKDRVSTR2, OSCOUT1CLK25MHZ, 3, 0, NULL);
+
+	clk_set_parent(hws[ST_CLK_MUX]->clk, hws[ST_CLK_25M]->clk);
+
+	hws[ST_CLK_GATE] = clk_hw_register_gate(NULL, "oscout1", "oscout1_mux",
+		0, st_data->base + MISCCLKCNTL1, OSCCLKENB,
+		CLK_GATE_SET_TO_DISABLE, NULL);
+
+	clk_hw_register_clkdev(hws[ST_CLK_GATE], "oscout1", NULL);
+
+	return 0;
+}
+
+static int st_clk_remove(struct platform_device *pdev)
+{
+	int i;
+
+	for (i = 0; i < ST_MAX_CLKS; i++)
+		clk_hw_unregister(hws[i]);
+	return 0;
+}
+
+static struct platform_driver st_clk_driver = {
+	.driver = {
+		.name = "clk-st",
+		.suppress_bind_attrs = true,
+	},
+	.probe = st_clk_probe,
+	.remove = st_clk_remove,
+};
+builtin_platform_driver(st_clk_driver);
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 8e8a097..dec0dd8 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -450,8 +450,10 @@
 	  Support for Mediatek timer driver.
 
 config SPRD_TIMER
-	bool "Spreadtrum timer driver" if COMPILE_TEST
+	bool "Spreadtrum timer driver" if EXPERT
 	depends on HAS_IOMEM
+	depends on (ARCH_SPRD || COMPILE_TEST)
+	default ARCH_SPRD
 	select TIMER_OF
 	help
 	  Enables support for the Spreadtrum timer driver.
diff --git a/drivers/clocksource/arc_timer.c b/drivers/clocksource/arc_timer.c
index 471b428..20da9b1 100644
--- a/drivers/clocksource/arc_timer.c
+++ b/drivers/clocksource/arc_timer.c
@@ -61,6 +61,20 @@
 	unsigned long flags;
 	u32 l, h;
 
+	/*
+	 * From a programming model pov, there seems to be just one instance of
+	 * MCIP_CMD/MCIP_READBACK however micro-architecturally there's
+	 * an instance PER ARC CORE (not per cluster), and there are dedicated
+	 * hardware decode logic (per core) inside ARConnect to handle
+	 * simultaneous read/write accesses from cores via those two registers.
+	 * So several concurrent commands to ARConnect are OK if they are
+	 * trying to access two different sub-components (like GFRC,
+	 * inter-core interrupt, etc...). HW also supports simultaneously
+	 * accessing GFRC by multiple cores.
+	 * That's why it is safe to disable hard interrupts on the local CPU
+	 * before access to GFRC instead of taking global MCIP spinlock
+	 * defined in arch/arc/kernel/mcip.c
+	 */
 	local_irq_save(flags);
 
 	__mcip_cmd(CMD_GFRC_READ_LO, 0);
diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c
index 986b679..54f8a33 100644
--- a/drivers/clocksource/mips-gic-timer.c
+++ b/drivers/clocksource/mips-gic-timer.c
@@ -5,6 +5,9 @@
  *
  * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
  */
+
+#define pr_fmt(fmt) "mips-gic-timer: " fmt
+
 #include <linux/clk.h>
 #include <linux/clockchips.h>
 #include <linux/cpu.h>
@@ -136,8 +139,7 @@
 
 	ret = setup_percpu_irq(gic_timer_irq, &gic_compare_irqaction);
 	if (ret < 0) {
-		pr_err("GIC timer IRQ %d setup failed: %d\n",
-		       gic_timer_irq, ret);
+		pr_err("IRQ %d setup failed (%d)\n", gic_timer_irq, ret);
 		return ret;
 	}
 
@@ -176,7 +178,7 @@
 
 	ret = clocksource_register_hz(&gic_clocksource, gic_frequency);
 	if (ret < 0)
-		pr_warn("GIC: Unable to register clocksource\n");
+		pr_warn("Unable to register clocksource\n");
 
 	return ret;
 }
@@ -188,7 +190,7 @@
 
 	if (!mips_gic_present() || !node->parent ||
 	    !of_device_is_compatible(node->parent, "mti,gic")) {
-		pr_warn("No DT definition for the mips gic driver\n");
+		pr_warn("No DT definition\n");
 		return -ENXIO;
 	}
 
@@ -196,7 +198,7 @@
 	if (!IS_ERR(clk)) {
 		ret = clk_prepare_enable(clk);
 		if (ret < 0) {
-			pr_err("GIC failed to enable clock\n");
+			pr_err("Failed to enable clock\n");
 			clk_put(clk);
 			return ret;
 		}
@@ -204,12 +206,12 @@
 		gic_frequency = clk_get_rate(clk);
 	} else if (of_property_read_u32(node, "clock-frequency",
 					&gic_frequency)) {
-		pr_err("GIC frequency not specified.\n");
+		pr_err("Frequency not specified\n");
 		return -EINVAL;
 	}
 	gic_timer_irq = irq_of_parse_and_map(node, 0);
 	if (!gic_timer_irq) {
-		pr_err("GIC timer IRQ not specified.\n");
+		pr_err("IRQ not specified\n");
 		return -EINVAL;
 	}
 
@@ -220,7 +222,7 @@
 	ret = gic_clockevent_init();
 	if (!ret && !IS_ERR(clk)) {
 		if (clk_notifier_register(clk, &gic_clk_nb) < 0)
-			pr_warn("GIC: Unable to register clock notifier\n");
+			pr_warn("Unable to register clock notifier\n");
 	}
 
 	/* And finally start the counter */
diff --git a/drivers/clocksource/mxs_timer.c b/drivers/clocksource/mxs_timer.c
index a03434e..f6ddae3 100644
--- a/drivers/clocksource/mxs_timer.c
+++ b/drivers/clocksource/mxs_timer.c
@@ -1,24 +1,10 @@
-/*
- *  Copyright (C) 2000-2001 Deep Blue Solutions
- *  Copyright (C) 2002 Shane Nay (shane@minirl.com)
- *  Copyright (C) 2006-2007 Pavel Pisa (ppisa@pikron.com)
- *  Copyright (C) 2008 Juergen Beisert (kernel@pengutronix.de)
- *  Copyright (C) 2010 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+//  Copyright (C) 2000-2001 Deep Blue Solutions
+//  Copyright (C) 2002 Shane Nay (shane@minirl.com)
+//  Copyright (C) 2006-2007 Pavel Pisa (ppisa@pikron.com)
+//  Copyright (C) 2008 Juergen Beisert (kernel@pengutronix.de)
+//  Copyright (C) 2010 Freescale Semiconductor, Inc. All Rights Reserved.
 
 #include <linux/err.h>
 #include <linux/interrupt.h>
diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c
index 6ec6d79..165fbbb 100644
--- a/drivers/clocksource/timer-imx-gpt.c
+++ b/drivers/clocksource/timer-imx-gpt.c
@@ -1,25 +1,9 @@
-/*
- *  linux/arch/arm/plat-mxc/time.c
- *
- *  Copyright (C) 2000-2001 Deep Blue Solutions
- *  Copyright (C) 2002 Shane Nay (shane@minirl.com)
- *  Copyright (C) 2006-2007 Pavel Pisa (ppisa@pikron.com)
- *  Copyright (C) 2008 Juergen Beisert (kernel@pengutronix.de)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- * MA 02110-1301, USA.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+//  Copyright (C) 2000-2001 Deep Blue Solutions
+//  Copyright (C) 2002 Shane Nay (shane@minirl.com)
+//  Copyright (C) 2006-2007 Pavel Pisa (ppisa@pikron.com)
+//  Copyright (C) 2008 Juergen Beisert (kernel@pengutronix.de)
 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c
index 6c83184..b7aa2b8 100644
--- a/drivers/clocksource/timer-imx-tpm.c
+++ b/drivers/clocksource/timer-imx-tpm.c
@@ -1,12 +1,7 @@
-/*
- * Copyright 2016 Freescale Semiconductor, Inc.
- * Copyright 2017 NXP
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2016 Freescale Semiconductor, Inc.
+// Copyright 2017 NXP
 
 #include <linux/clk.h>
 #include <linux/clockchips.h>
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 8615594b..e718b8c 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -260,19 +260,6 @@
 	return 0;
 }
 
-static int cn_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, cn_proc_show, NULL);
-}
-
-static const struct file_operations cn_file_ops = {
-	.owner   = THIS_MODULE,
-	.open    = cn_proc_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = single_release
-};
-
 static struct cn_dev cdev = {
 	.input   = cn_rx_skb,
 };
@@ -297,7 +284,7 @@
 
 	cn_already_initialized = 1;
 
-	proc_create("connector", S_IRUGO, init_net.proc_net, &cn_file_ops);
+	proc_create_single("connector", S_IRUGO, init_net.proc_net, cn_proc_show);
 
 	return 0;
 }
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 96b35b8..c7ce928 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -124,6 +124,17 @@
 	depends on ARCH_OMAP2PLUS
 	default ARCH_OMAP2PLUS
 
+config ARM_QCOM_CPUFREQ_KRYO
+	bool "Qualcomm Kryo based CPUFreq"
+	depends on ARM64
+	depends on QCOM_QFPROM
+	depends on QCOM_SMEM
+	select PM_OPP
+	help
+	  This adds the CPUFreq driver for Qualcomm Kryo SoC based boards.
+
+	  If in doubt, say N.
+
 config ARM_S3C_CPUFREQ
 	bool
 	help
@@ -264,7 +275,7 @@
 	default y
 
 config ARM_TEGRA20_CPUFREQ
-	bool "Tegra20 CPUFreq support"
+	tristate "Tegra20 CPUFreq support"
 	depends on ARCH_TEGRA
 	default y
 	help
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 8d24ade..fb4a2ec 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -65,6 +65,7 @@
 obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ)	+= omap-cpufreq.o
 obj-$(CONFIG_ARM_PXA2xx_CPUFREQ)	+= pxa2xx-cpufreq.o
 obj-$(CONFIG_PXA3xx)			+= pxa3xx-cpufreq.o
+obj-$(CONFIG_ARM_QCOM_CPUFREQ_KRYO)	+= qcom-cpufreq-kryo.o
 obj-$(CONFIG_ARM_S3C2410_CPUFREQ)	+= s3c2410-cpufreq.o
 obj-$(CONFIG_ARM_S3C2412_CPUFREQ)	+= s3c2412-cpufreq.o
 obj-$(CONFIG_ARM_S3C2416_CPUFREQ)	+= s3c2416-cpufreq.o
diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
index 72a2975..739da90 100644
--- a/drivers/cpufreq/armada-37xx-cpufreq.c
+++ b/drivers/cpufreq/armada-37xx-cpufreq.c
@@ -23,6 +23,8 @@
 #include <linux/regmap.h>
 #include <linux/slab.h>
 
+#include "cpufreq-dt.h"
+
 /* Power management in North Bridge register set */
 #define ARMADA_37XX_NB_L0L1	0x18
 #define ARMADA_37XX_NB_L2L3	0x1C
@@ -56,6 +58,16 @@
  */
 #define LOAD_LEVEL_NR	4
 
+struct armada37xx_cpufreq_state {
+	struct regmap *regmap;
+	u32 nb_l0l1;
+	u32 nb_l2l3;
+	u32 nb_dyn_mod;
+	u32 nb_cpu_load;
+};
+
+static struct armada37xx_cpufreq_state *armada37xx_cpufreq_state;
+
 struct armada_37xx_dvfs {
 	u32 cpu_freq_max;
 	u8 divider[LOAD_LEVEL_NR];
@@ -136,7 +148,7 @@
 	clk_set_parent(clk, parent);
 }
 
-static void __init armada37xx_cpufreq_disable_dvfs(struct regmap *base)
+static void armada37xx_cpufreq_disable_dvfs(struct regmap *base)
 {
 	unsigned int reg = ARMADA_37XX_NB_DYN_MOD,
 		mask = ARMADA_37XX_NB_DFS_EN;
@@ -162,10 +174,47 @@
 	regmap_update_bits(base, reg, mask, mask);
 }
 
+static int armada37xx_cpufreq_suspend(struct cpufreq_policy *policy)
+{
+	struct armada37xx_cpufreq_state *state = armada37xx_cpufreq_state;
+
+	regmap_read(state->regmap, ARMADA_37XX_NB_L0L1, &state->nb_l0l1);
+	regmap_read(state->regmap, ARMADA_37XX_NB_L2L3, &state->nb_l2l3);
+	regmap_read(state->regmap, ARMADA_37XX_NB_CPU_LOAD,
+		    &state->nb_cpu_load);
+	regmap_read(state->regmap, ARMADA_37XX_NB_DYN_MOD, &state->nb_dyn_mod);
+
+	return 0;
+}
+
+static int armada37xx_cpufreq_resume(struct cpufreq_policy *policy)
+{
+	struct armada37xx_cpufreq_state *state = armada37xx_cpufreq_state;
+
+	/* Ensure DVFS is disabled otherwise the following registers are RO */
+	armada37xx_cpufreq_disable_dvfs(state->regmap);
+
+	regmap_write(state->regmap, ARMADA_37XX_NB_L0L1, state->nb_l0l1);
+	regmap_write(state->regmap, ARMADA_37XX_NB_L2L3, state->nb_l2l3);
+	regmap_write(state->regmap, ARMADA_37XX_NB_CPU_LOAD,
+		     state->nb_cpu_load);
+
+	/*
+	 * NB_DYN_MOD register is the one that actually enable back DVFS if it
+	 * was enabled before the suspend operation. This must be done last
+	 * otherwise other registers are not writable.
+	 */
+	regmap_write(state->regmap, ARMADA_37XX_NB_DYN_MOD, state->nb_dyn_mod);
+
+	return 0;
+}
+
 static int __init armada37xx_cpufreq_driver_init(void)
 {
+	struct cpufreq_dt_platform_data pdata;
 	struct armada_37xx_dvfs *dvfs;
 	struct platform_device *pdev;
+	unsigned long freq;
 	unsigned int cur_frequency;
 	struct regmap *nb_pm_base;
 	struct device *cpu_dev;
@@ -207,33 +256,58 @@
 	}
 
 	dvfs = armada_37xx_cpu_freq_info_get(cur_frequency);
-	if (!dvfs)
+	if (!dvfs) {
+		clk_put(clk);
 		return -EINVAL;
+	}
+
+	armada37xx_cpufreq_state = kmalloc(sizeof(*armada37xx_cpufreq_state),
+					   GFP_KERNEL);
+	if (!armada37xx_cpufreq_state) {
+		clk_put(clk);
+		return -ENOMEM;
+	}
+
+	armada37xx_cpufreq_state->regmap = nb_pm_base;
 
 	armada37xx_cpufreq_dvfs_setup(nb_pm_base, clk, dvfs->divider);
 	clk_put(clk);
 
 	for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR;
 	     load_lvl++) {
-		unsigned long freq = cur_frequency / dvfs->divider[load_lvl];
+		freq = cur_frequency / dvfs->divider[load_lvl];
 
 		ret = dev_pm_opp_add(cpu_dev, freq, 0);
-		if (ret) {
-			/* clean-up the already added opp before leaving */
-			while (load_lvl-- > ARMADA_37XX_DVFS_LOAD_0) {
-				freq = cur_frequency / dvfs->divider[load_lvl];
-				dev_pm_opp_remove(cpu_dev, freq);
-			}
-			return ret;
-		}
+		if (ret)
+			goto remove_opp;
 	}
 
 	/* Now that everything is setup, enable the DVFS at hardware level */
 	armada37xx_cpufreq_enable_dvfs(nb_pm_base);
 
-	pdev = platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
+	pdata.suspend = armada37xx_cpufreq_suspend;
+	pdata.resume = armada37xx_cpufreq_resume;
 
-	return PTR_ERR_OR_ZERO(pdev);
+	pdev = platform_device_register_data(NULL, "cpufreq-dt", -1, &pdata,
+					     sizeof(pdata));
+	ret = PTR_ERR_OR_ZERO(pdev);
+	if (ret)
+		goto disable_dvfs;
+
+	return 0;
+
+disable_dvfs:
+	armada37xx_cpufreq_disable_dvfs(nb_pm_base);
+remove_opp:
+	/* clean-up the already added opp before leaving */
+	while (load_lvl-- > ARMADA_37XX_DVFS_LOAD_0) {
+		freq = cur_frequency / dvfs->divider[load_lvl];
+		dev_pm_opp_remove(cpu_dev, freq);
+	}
+
+	kfree(armada37xx_cpufreq_state);
+
+	return ret;
 }
 /* late_initcall, to guarantee the driver is loaded after A37xx clock driver */
 late_initcall(armada37xx_cpufreq_driver_init);
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index b15115a..3464580 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -42,9 +42,6 @@
  */
 static struct cppc_cpudata **all_cpu_data;
 
-/* Capture the max KHz from DMI */
-static u64 cppc_dmi_max_khz;
-
 /* Callback function used to retrieve the max frequency from DMI */
 static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
 {
@@ -75,6 +72,64 @@
 	return (1000 * mhz);
 }
 
+/*
+ * If CPPC lowest_freq and nominal_freq registers are exposed then we can
+ * use them to convert perf to freq and vice versa
+ *
+ * If the perf/freq point lies between Nominal and Lowest, we can treat
+ * (Low perf, Low freq) and (Nom Perf, Nom freq) as 2D co-ordinates of a line
+ * and extrapolate the rest
+ * For perf/freq > Nominal, we use the ratio perf:freq at Nominal for conversion
+ */
+static unsigned int cppc_cpufreq_perf_to_khz(struct cppc_cpudata *cpu,
+					unsigned int perf)
+{
+	static u64 max_khz;
+	struct cppc_perf_caps *caps = &cpu->perf_caps;
+	u64 mul, div;
+
+	if (caps->lowest_freq && caps->nominal_freq) {
+		if (perf >= caps->nominal_perf) {
+			mul = caps->nominal_freq;
+			div = caps->nominal_perf;
+		} else {
+			mul = caps->nominal_freq - caps->lowest_freq;
+			div = caps->nominal_perf - caps->lowest_perf;
+		}
+	} else {
+		if (!max_khz)
+			max_khz = cppc_get_dmi_max_khz();
+		mul = max_khz;
+		div = cpu->perf_caps.highest_perf;
+	}
+	return (u64)perf * mul / div;
+}
+
+static unsigned int cppc_cpufreq_khz_to_perf(struct cppc_cpudata *cpu,
+					unsigned int freq)
+{
+	static u64 max_khz;
+	struct cppc_perf_caps *caps = &cpu->perf_caps;
+	u64  mul, div;
+
+	if (caps->lowest_freq && caps->nominal_freq) {
+		if (freq >= caps->nominal_freq) {
+			mul = caps->nominal_perf;
+			div = caps->nominal_freq;
+		} else {
+			mul = caps->lowest_perf;
+			div = caps->lowest_freq;
+		}
+	} else {
+		if (!max_khz)
+			max_khz = cppc_get_dmi_max_khz();
+		mul = cpu->perf_caps.highest_perf;
+		div = max_khz;
+	}
+
+	return (u64)freq * mul / div;
+}
+
 static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
 		unsigned int target_freq,
 		unsigned int relation)
@@ -86,7 +141,7 @@
 
 	cpu = all_cpu_data[policy->cpu];
 
-	desired_perf = (u64)target_freq * cpu->perf_caps.highest_perf / cppc_dmi_max_khz;
+	desired_perf = cppc_cpufreq_khz_to_perf(cpu, target_freq);
 	/* Return if it is exactly the same perf */
 	if (desired_perf == cpu->perf_ctrls.desired_perf)
 		return ret;
@@ -186,24 +241,24 @@
 		return ret;
 	}
 
-	cppc_dmi_max_khz = cppc_get_dmi_max_khz();
+	/* Convert the lowest and nominal freq from MHz to KHz */
+	cpu->perf_caps.lowest_freq *= 1000;
+	cpu->perf_caps.nominal_freq *= 1000;
 
 	/*
 	 * Set min to lowest nonlinear perf to avoid any efficiency penalty (see
 	 * Section 8.4.7.1.1.5 of ACPI 6.1 spec)
 	 */
-	policy->min = cpu->perf_caps.lowest_nonlinear_perf * cppc_dmi_max_khz /
-		cpu->perf_caps.highest_perf;
-	policy->max = cppc_dmi_max_khz;
+	policy->min = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.lowest_nonlinear_perf);
+	policy->max = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.highest_perf);
 
 	/*
 	 * Set cpuinfo.min_freq to Lowest to make the full range of performance
 	 * available if userspace wants to use any perf between lowest & lowest
 	 * nonlinear perf
 	 */
-	policy->cpuinfo.min_freq = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz /
-		cpu->perf_caps.highest_perf;
-	policy->cpuinfo.max_freq = cppc_dmi_max_khz;
+	policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.lowest_perf);
+	policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.highest_perf);
 
 	policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu_num);
 	policy->shared_type = cpu->shared_type;
@@ -229,7 +284,8 @@
 	cpu->cur_policy = policy;
 
 	/* Set policy->cur to max now. The governors will adjust later. */
-	policy->cur = cppc_dmi_max_khz;
+	policy->cur = cppc_cpufreq_perf_to_khz(cpu,
+					cpu->perf_caps.highest_perf);
 	cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf;
 
 	ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls);
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 3b585e4..fe14c57 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -66,8 +66,6 @@
 	{ .compatible = "renesas,r8a7792", },
 	{ .compatible = "renesas,r8a7793", },
 	{ .compatible = "renesas,r8a7794", },
-	{ .compatible = "renesas,r8a7795", },
-	{ .compatible = "renesas,r8a7796", },
 	{ .compatible = "renesas,sh73a0", },
 
 	{ .compatible = "rockchip,rk2928", },
@@ -118,6 +116,9 @@
 
 	{ .compatible = "nvidia,tegra124", },
 
+	{ .compatible = "qcom,apq8096", },
+	{ .compatible = "qcom,msm8996", },
+
 	{ .compatible = "st,stih407", },
 	{ .compatible = "st,stih410", },
 
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 190ea0d..0a9ebf0 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -346,8 +346,14 @@
 	if (ret)
 		return ret;
 
-	if (data && data->have_governor_per_policy)
-		dt_cpufreq_driver.flags |= CPUFREQ_HAVE_GOVERNOR_PER_POLICY;
+	if (data) {
+		if (data->have_governor_per_policy)
+			dt_cpufreq_driver.flags |= CPUFREQ_HAVE_GOVERNOR_PER_POLICY;
+
+		dt_cpufreq_driver.resume = data->resume;
+		if (data->suspend)
+			dt_cpufreq_driver.suspend = data->suspend;
+	}
 
 	ret = cpufreq_register_driver(&dt_cpufreq_driver);
 	if (ret)
diff --git a/drivers/cpufreq/cpufreq-dt.h b/drivers/cpufreq/cpufreq-dt.h
index 54d774e..d5aeea1 100644
--- a/drivers/cpufreq/cpufreq-dt.h
+++ b/drivers/cpufreq/cpufreq-dt.h
@@ -12,8 +12,13 @@
 
 #include <linux/types.h>
 
+struct cpufreq_policy;
+
 struct cpufreq_dt_platform_data {
 	bool have_governor_per_policy;
+
+	int (*suspend)(struct cpufreq_policy *policy);
+	int (*resume)(struct cpufreq_policy *policy);
 };
 
 #endif /* __CPUFREQ_DT_H__ */
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 075d18f..b0dfd32 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -89,16 +89,7 @@
  * The mutex locks both lists.
  */
 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
-static struct srcu_notifier_head cpufreq_transition_notifier_list;
-
-static bool init_cpufreq_transition_notifier_list_called;
-static int __init init_cpufreq_transition_notifier_list(void)
-{
-	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
-	init_cpufreq_transition_notifier_list_called = true;
-	return 0;
-}
-pure_initcall(init_cpufreq_transition_notifier_list);
+SRCU_NOTIFIER_HEAD_STATIC(cpufreq_transition_notifier_list);
 
 static int off __read_mostly;
 static int cpufreq_disabled(void)
@@ -300,8 +291,19 @@
 #endif
 }
 
-static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
-		struct cpufreq_freqs *freqs, unsigned int state)
+/**
+ * cpufreq_notify_transition - Notify frequency transition and adjust_jiffies.
+ * @policy: cpufreq policy to enable fast frequency switching for.
+ * @freqs: contain details of the frequency update.
+ * @state: set to CPUFREQ_PRECHANGE or CPUFREQ_POSTCHANGE.
+ *
+ * This function calls the transition notifiers and the "adjust_jiffies"
+ * function. It is called twice on all CPU frequency changes that have
+ * external effects.
+ */
+static void cpufreq_notify_transition(struct cpufreq_policy *policy,
+				      struct cpufreq_freqs *freqs,
+				      unsigned int state)
 {
 	BUG_ON(irqs_disabled());
 
@@ -313,52 +315,42 @@
 		 state, freqs->new);
 
 	switch (state) {
-
 	case CPUFREQ_PRECHANGE:
-		/* detect if the driver reported a value as "old frequency"
+		/*
+		 * Detect if the driver reported a value as "old frequency"
 		 * which is not equal to what the cpufreq core thinks is
 		 * "old frequency".
 		 */
 		if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
-			if ((policy) && (policy->cpu == freqs->cpu) &&
-			    (policy->cur) && (policy->cur != freqs->old)) {
+			if (policy->cur && (policy->cur != freqs->old)) {
 				pr_debug("Warning: CPU frequency is %u, cpufreq assumed %u kHz\n",
 					 freqs->old, policy->cur);
 				freqs->old = policy->cur;
 			}
 		}
-		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
-				CPUFREQ_PRECHANGE, freqs);
+
+		for_each_cpu(freqs->cpu, policy->cpus) {
+			srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
+						 CPUFREQ_PRECHANGE, freqs);
+		}
+
 		adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
 		break;
 
 	case CPUFREQ_POSTCHANGE:
 		adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
-		pr_debug("FREQ: %lu - CPU: %lu\n",
-			 (unsigned long)freqs->new, (unsigned long)freqs->cpu);
-		trace_cpu_frequency(freqs->new, freqs->cpu);
-		cpufreq_stats_record_transition(policy, freqs->new);
-		srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
-				CPUFREQ_POSTCHANGE, freqs);
-		if (likely(policy) && likely(policy->cpu == freqs->cpu))
-			policy->cur = freqs->new;
-		break;
-	}
-}
+		pr_debug("FREQ: %u - CPUs: %*pbl\n", freqs->new,
+			 cpumask_pr_args(policy->cpus));
 
-/**
- * cpufreq_notify_transition - call notifier chain and adjust_jiffies
- * on frequency transition.
- *
- * This function calls the transition notifiers and the "adjust_jiffies"
- * function. It is called twice on all CPU frequency changes that have
- * external effects.
- */
-static void cpufreq_notify_transition(struct cpufreq_policy *policy,
-		struct cpufreq_freqs *freqs, unsigned int state)
-{
-	for_each_cpu(freqs->cpu, policy->cpus)
-		__cpufreq_notify_transition(policy, freqs, state);
+		for_each_cpu(freqs->cpu, policy->cpus) {
+			trace_cpu_frequency(freqs->new, freqs->cpu);
+			srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
+						 CPUFREQ_POSTCHANGE, freqs);
+		}
+
+		cpufreq_stats_record_transition(policy, freqs->new);
+		policy->cur = freqs->new;
+	}
 }
 
 /* Do post notifications when there are chances that transition has failed */
@@ -696,6 +688,8 @@
 	struct cpufreq_policy new_policy;				\
 									\
 	memcpy(&new_policy, policy, sizeof(*policy));			\
+	new_policy.min = policy->user_policy.min;			\
+	new_policy.max = policy->user_policy.max;			\
 									\
 	ret = sscanf(buf, "%u", &new_policy.object);			\
 	if (ret != 1)							\
@@ -1764,8 +1758,6 @@
 	if (cpufreq_disabled())
 		return -EINVAL;
 
-	WARN_ON(!init_cpufreq_transition_notifier_list_called);
-
 	switch (list) {
 	case CPUFREQ_TRANSITION_NOTIFIER:
 		mutex_lock(&cpufreq_fast_switch_lock);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index ca38229..871bf9c 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -278,7 +278,7 @@
 	struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
 	u64 delta_ns, lst;
 
-	if (!cpufreq_can_do_remote_dvfs(policy_dbs->policy))
+	if (!cpufreq_this_cpu_can_update(policy_dbs->policy))
 		return;
 
 	/*
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 17e566af..08960a5 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1939,13 +1939,51 @@
 	return 0;
 }
 
+/* Use of trace in passive mode:
+ *
+ * In passive mode the trace core_busy field (also known as the
+ * performance field, and lablelled as such on the graphs; also known as
+ * core_avg_perf) is not needed and so is re-assigned to indicate if the
+ * driver call was via the normal or fast switch path. Various graphs
+ * output from the intel_pstate_tracer.py utility that include core_busy
+ * (or performance or core_avg_perf) have a fixed y-axis from 0 to 100%,
+ * so we use 10 to indicate the the normal path through the driver, and
+ * 90 to indicate the fast switch path through the driver.
+ * The scaled_busy field is not used, and is set to 0.
+ */
+
+#define	INTEL_PSTATE_TRACE_TARGET 10
+#define	INTEL_PSTATE_TRACE_FAST_SWITCH 90
+
+static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, int old_pstate)
+{
+	struct sample *sample;
+
+	if (!trace_pstate_sample_enabled())
+		return;
+
+	if (!intel_pstate_sample(cpu, ktime_get()))
+		return;
+
+	sample = &cpu->sample;
+	trace_pstate_sample(trace_type,
+		0,
+		old_pstate,
+		cpu->pstate.current_pstate,
+		sample->mperf,
+		sample->aperf,
+		sample->tsc,
+		get_avg_frequency(cpu),
+		fp_toint(cpu->iowait_boost * 100));
+}
+
 static int intel_cpufreq_target(struct cpufreq_policy *policy,
 				unsigned int target_freq,
 				unsigned int relation)
 {
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 	struct cpufreq_freqs freqs;
-	int target_pstate;
+	int target_pstate, old_pstate;
 
 	update_turbo_state();
 
@@ -1965,12 +2003,14 @@
 		break;
 	}
 	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
+	old_pstate = cpu->pstate.current_pstate;
 	if (target_pstate != cpu->pstate.current_pstate) {
 		cpu->pstate.current_pstate = target_pstate;
 		wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
 			      pstate_funcs.get_val(cpu, target_pstate));
 	}
 	freqs.new = target_pstate * cpu->pstate.scaling;
+	intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_TARGET, old_pstate);
 	cpufreq_freq_transition_end(policy, &freqs, false);
 
 	return 0;
@@ -1980,13 +2020,15 @@
 					      unsigned int target_freq)
 {
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
-	int target_pstate;
+	int target_pstate, old_pstate;
 
 	update_turbo_state();
 
 	target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
 	target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
+	old_pstate = cpu->pstate.current_pstate;
 	intel_pstate_update_pstate(cpu, target_pstate);
+	intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate);
 	return target_pstate * cpu->pstate.scaling;
 }
 
diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-kryo.c
new file mode 100644
index 0000000..d049fe4
--- /dev/null
+++ b/drivers/cpufreq/qcom-cpufreq-kryo.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+/*
+ * In Certain QCOM SoCs like apq8096 and msm8996 that have KRYO processors,
+ * the CPU frequency subset and voltage value of each OPP varies
+ * based on the silicon variant in use. Qualcomm Process Voltage Scaling Tables
+ * defines the voltage and frequency value based on the msm-id in SMEM
+ * and speedbin blown in the efuse combination.
+ * The qcom-cpufreq-kryo driver reads the msm-id and efuse value from the SoC
+ * to provide the OPP framework with required information.
+ * This is used to determine the voltage and frequency value for each OPP of
+ * operating-points-v2 table when it is parsed by the OPP framework.
+ */
+
+#include <linux/cpu.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/nvmem-consumer.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/slab.h>
+#include <linux/soc/qcom/smem.h>
+
+#define MSM_ID_SMEM	137
+
+enum _msm_id {
+	MSM8996V3 = 0xF6ul,
+	APQ8096V3 = 0x123ul,
+	MSM8996SG = 0x131ul,
+	APQ8096SG = 0x138ul,
+};
+
+enum _msm8996_version {
+	MSM8996_V3,
+	MSM8996_SG,
+	NUM_OF_MSM8996_VERSIONS,
+};
+
+static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void)
+{
+	size_t len;
+	u32 *msm_id;
+	enum _msm8996_version version;
+
+	msm_id = qcom_smem_get(QCOM_SMEM_HOST_ANY, MSM_ID_SMEM, &len);
+	if (IS_ERR(msm_id))
+		return NUM_OF_MSM8996_VERSIONS;
+
+	/* The first 4 bytes are format, next to them is the actual msm-id */
+	msm_id++;
+
+	switch ((enum _msm_id)*msm_id) {
+	case MSM8996V3:
+	case APQ8096V3:
+		version = MSM8996_V3;
+		break;
+	case MSM8996SG:
+	case APQ8096SG:
+		version = MSM8996_SG;
+		break;
+	default:
+		version = NUM_OF_MSM8996_VERSIONS;
+	}
+
+	return version;
+}
+
+static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
+{
+	struct opp_table *opp_tables[NR_CPUS] = {0};
+	struct platform_device *cpufreq_dt_pdev;
+	enum _msm8996_version msm8996_version;
+	struct nvmem_cell *speedbin_nvmem;
+	struct device_node *np;
+	struct device *cpu_dev;
+	unsigned cpu;
+	u8 *speedbin;
+	u32 versions;
+	size_t len;
+	int ret;
+
+	cpu_dev = get_cpu_device(0);
+	if (NULL == cpu_dev)
+		ret = -ENODEV;
+
+	msm8996_version = qcom_cpufreq_kryo_get_msm_id();
+	if (NUM_OF_MSM8996_VERSIONS == msm8996_version) {
+		dev_err(cpu_dev, "Not Snapdragon 820/821!");
+		return -ENODEV;
+	}
+
+	np = dev_pm_opp_of_get_opp_desc_node(cpu_dev);
+	if (IS_ERR(np))
+		return PTR_ERR(np);
+
+	ret = of_device_is_compatible(np, "operating-points-v2-kryo-cpu");
+	if (!ret) {
+		of_node_put(np);
+		return -ENOENT;
+	}
+
+	speedbin_nvmem = of_nvmem_cell_get(np, NULL);
+	of_node_put(np);
+	if (IS_ERR(speedbin_nvmem)) {
+		dev_err(cpu_dev, "Could not get nvmem cell: %ld\n",
+			PTR_ERR(speedbin_nvmem));
+		return PTR_ERR(speedbin_nvmem);
+	}
+
+	speedbin = nvmem_cell_read(speedbin_nvmem, &len);
+	nvmem_cell_put(speedbin_nvmem);
+
+	switch (msm8996_version) {
+	case MSM8996_V3:
+		versions = 1 << (unsigned int)(*speedbin);
+		break;
+	case MSM8996_SG:
+		versions = 1 << ((unsigned int)(*speedbin) + 4);
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	for_each_possible_cpu(cpu) {
+		cpu_dev = get_cpu_device(cpu);
+		if (NULL == cpu_dev) {
+			ret = -ENODEV;
+			goto free_opp;
+		}
+
+		opp_tables[cpu] = dev_pm_opp_set_supported_hw(cpu_dev,
+							      &versions, 1);
+		if (IS_ERR(opp_tables[cpu])) {
+			ret = PTR_ERR(opp_tables[cpu]);
+			dev_err(cpu_dev, "Failed to set supported hardware\n");
+			goto free_opp;
+		}
+	}
+
+	cpufreq_dt_pdev = platform_device_register_simple("cpufreq-dt", -1,
+							  NULL, 0);
+	if (!IS_ERR(cpufreq_dt_pdev))
+		return 0;
+
+	ret = PTR_ERR(cpufreq_dt_pdev);
+	dev_err(cpu_dev, "Failed to register platform device\n");
+
+free_opp:
+	for_each_possible_cpu(cpu) {
+		if (IS_ERR_OR_NULL(opp_tables[cpu]))
+			break;
+		dev_pm_opp_put_supported_hw(opp_tables[cpu]);
+	}
+
+	return ret;
+}
+
+static struct platform_driver qcom_cpufreq_kryo_driver = {
+	.probe = qcom_cpufreq_kryo_probe,
+	.driver = {
+		.name = "qcom-cpufreq-kryo",
+	},
+};
+
+static const struct of_device_id qcom_cpufreq_kryo_match_list[] __initconst = {
+	{ .compatible = "qcom,apq8096", },
+	{ .compatible = "qcom,msm8996", },
+};
+
+/*
+ * Since the driver depends on smem and nvmem drivers, which may
+ * return EPROBE_DEFER, all the real activity is done in the probe,
+ * which may be defered as well. The init here is only registering
+ * the driver and the platform device.
+ */
+static int __init qcom_cpufreq_kryo_init(void)
+{
+	struct device_node *np = of_find_node_by_path("/");
+	const struct of_device_id *match;
+	int ret;
+
+	if (!np)
+		return -ENODEV;
+
+	match = of_match_node(qcom_cpufreq_kryo_match_list, np);
+	of_node_put(np);
+	if (!match)
+		return -ENODEV;
+
+	ret = platform_driver_register(&qcom_cpufreq_kryo_driver);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = PTR_ERR_OR_ZERO(platform_device_register_simple(
+		"qcom-cpufreq-kryo", -1, NULL, 0));
+	if (0 == ret)
+		return 0;
+
+	platform_driver_unregister(&qcom_cpufreq_kryo_driver);
+	return ret;
+}
+module_init(qcom_cpufreq_kryo_init);
+
+MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Kryo CPUfreq driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpufreq/s3c2440-cpufreq.c b/drivers/cpufreq/s3c2440-cpufreq.c
index d0d75b6..d2f67b7 100644
--- a/drivers/cpufreq/s3c2440-cpufreq.c
+++ b/drivers/cpufreq/s3c2440-cpufreq.c
@@ -143,7 +143,7 @@
 {
 	unsigned long clkdiv, camdiv;
 
-	s3c_freq_dbg("%s: divsiors: h=%d, p=%d\n", __func__,
+	s3c_freq_dbg("%s: divisors: h=%d, p=%d\n", __func__,
 		     cfg->divs.h_divisor, cfg->divs.p_divisor);
 
 	clkdiv = __raw_readl(S3C2410_CLKDIVN);
diff --git a/drivers/cpufreq/speedstep-lib.c b/drivers/cpufreq/speedstep-lib.c
index e3a9962..cabb6f4 100644
--- a/drivers/cpufreq/speedstep-lib.c
+++ b/drivers/cpufreq/speedstep-lib.c
@@ -252,7 +252,7 @@
  *********************************************************************/
 
 /* Keep in sync with the x86_cpu_id tables in the different modules */
-unsigned int speedstep_detect_processor(void)
+enum speedstep_processor speedstep_detect_processor(void)
 {
 	struct cpuinfo_x86 *c = &cpu_data(0);
 	u32 ebx, msr_lo, msr_hi;
diff --git a/drivers/cpufreq/tegra20-cpufreq.c b/drivers/cpufreq/tegra20-cpufreq.c
index 2bd6284..05f57dc 100644
--- a/drivers/cpufreq/tegra20-cpufreq.c
+++ b/drivers/cpufreq/tegra20-cpufreq.c
@@ -16,16 +16,13 @@
  *
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/cpufreq.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/err.h>
 #include <linux/clk.h>
-#include <linux/io.h>
+#include <linux/cpufreq.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
 
 static struct cpufreq_frequency_table freq_table[] = {
 	{ .frequency = 216000 },
@@ -39,25 +36,27 @@
 	{ .frequency = CPUFREQ_TABLE_END },
 };
 
-#define NUM_CPUS	2
-
-static struct clk *cpu_clk;
-static struct clk *pll_x_clk;
-static struct clk *pll_p_clk;
-static struct clk *emc_clk;
-static bool pll_x_prepared;
+struct tegra20_cpufreq {
+	struct device *dev;
+	struct cpufreq_driver driver;
+	struct clk *cpu_clk;
+	struct clk *pll_x_clk;
+	struct clk *pll_p_clk;
+	bool pll_x_prepared;
+};
 
 static unsigned int tegra_get_intermediate(struct cpufreq_policy *policy,
 					   unsigned int index)
 {
-	unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000;
+	struct tegra20_cpufreq *cpufreq = cpufreq_get_driver_data();
+	unsigned int ifreq = clk_get_rate(cpufreq->pll_p_clk) / 1000;
 
 	/*
 	 * Don't switch to intermediate freq if:
 	 * - we are already at it, i.e. policy->cur == ifreq
 	 * - index corresponds to ifreq
 	 */
-	if ((freq_table[index].frequency == ifreq) || (policy->cur == ifreq))
+	if (freq_table[index].frequency == ifreq || policy->cur == ifreq)
 		return 0;
 
 	return ifreq;
@@ -66,6 +65,7 @@
 static int tegra_target_intermediate(struct cpufreq_policy *policy,
 				     unsigned int index)
 {
+	struct tegra20_cpufreq *cpufreq = cpufreq_get_driver_data();
 	int ret;
 
 	/*
@@ -78,47 +78,37 @@
 	 * Also, we wouldn't be using pll_x anymore and must not take extra
 	 * reference to it, as it can be disabled now to save some power.
 	 */
-	clk_prepare_enable(pll_x_clk);
+	clk_prepare_enable(cpufreq->pll_x_clk);
 
-	ret = clk_set_parent(cpu_clk, pll_p_clk);
+	ret = clk_set_parent(cpufreq->cpu_clk, cpufreq->pll_p_clk);
 	if (ret)
-		clk_disable_unprepare(pll_x_clk);
+		clk_disable_unprepare(cpufreq->pll_x_clk);
 	else
-		pll_x_prepared = true;
+		cpufreq->pll_x_prepared = true;
 
 	return ret;
 }
 
 static int tegra_target(struct cpufreq_policy *policy, unsigned int index)
 {
+	struct tegra20_cpufreq *cpufreq = cpufreq_get_driver_data();
 	unsigned long rate = freq_table[index].frequency;
-	unsigned int ifreq = clk_get_rate(pll_p_clk) / 1000;
-	int ret = 0;
-
-	/*
-	 * Vote on memory bus frequency based on cpu frequency
-	 * This sets the minimum frequency, display or avp may request higher
-	 */
-	if (rate >= 816000)
-		clk_set_rate(emc_clk, 600000000); /* cpu 816 MHz, emc max */
-	else if (rate >= 456000)
-		clk_set_rate(emc_clk, 300000000); /* cpu 456 MHz, emc 150Mhz */
-	else
-		clk_set_rate(emc_clk, 100000000);  /* emc 50Mhz */
+	unsigned int ifreq = clk_get_rate(cpufreq->pll_p_clk) / 1000;
+	int ret;
 
 	/*
 	 * target freq == pll_p, don't need to take extra reference to pll_x_clk
 	 * as it isn't used anymore.
 	 */
 	if (rate == ifreq)
-		return clk_set_parent(cpu_clk, pll_p_clk);
+		return clk_set_parent(cpufreq->cpu_clk, cpufreq->pll_p_clk);
 
-	ret = clk_set_rate(pll_x_clk, rate * 1000);
+	ret = clk_set_rate(cpufreq->pll_x_clk, rate * 1000);
 	/* Restore to earlier frequency on error, i.e. pll_x */
 	if (ret)
-		pr_err("Failed to change pll_x to %lu\n", rate);
+		dev_err(cpufreq->dev, "Failed to change pll_x to %lu\n", rate);
 
-	ret = clk_set_parent(cpu_clk, pll_x_clk);
+	ret = clk_set_parent(cpufreq->cpu_clk, cpufreq->pll_x_clk);
 	/* This shouldn't fail while changing or restoring */
 	WARN_ON(ret);
 
@@ -126,9 +116,9 @@
 	 * Drop count to pll_x clock only if we switched to intermediate freq
 	 * earlier while transitioning to a target frequency.
 	 */
-	if (pll_x_prepared) {
-		clk_disable_unprepare(pll_x_clk);
-		pll_x_prepared = false;
+	if (cpufreq->pll_x_prepared) {
+		clk_disable_unprepare(cpufreq->pll_x_clk);
+		cpufreq->pll_x_prepared = false;
 	}
 
 	return ret;
@@ -136,81 +126,111 @@
 
 static int tegra_cpu_init(struct cpufreq_policy *policy)
 {
+	struct tegra20_cpufreq *cpufreq = cpufreq_get_driver_data();
 	int ret;
 
-	if (policy->cpu >= NUM_CPUS)
-		return -EINVAL;
-
-	clk_prepare_enable(emc_clk);
-	clk_prepare_enable(cpu_clk);
+	clk_prepare_enable(cpufreq->cpu_clk);
 
 	/* FIXME: what's the actual transition time? */
 	ret = cpufreq_generic_init(policy, freq_table, 300 * 1000);
 	if (ret) {
-		clk_disable_unprepare(cpu_clk);
-		clk_disable_unprepare(emc_clk);
+		clk_disable_unprepare(cpufreq->cpu_clk);
 		return ret;
 	}
 
-	policy->clk = cpu_clk;
+	policy->clk = cpufreq->cpu_clk;
 	policy->suspend_freq = freq_table[0].frequency;
 	return 0;
 }
 
 static int tegra_cpu_exit(struct cpufreq_policy *policy)
 {
-	clk_disable_unprepare(cpu_clk);
-	clk_disable_unprepare(emc_clk);
+	struct tegra20_cpufreq *cpufreq = cpufreq_get_driver_data();
+
+	clk_disable_unprepare(cpufreq->cpu_clk);
 	return 0;
 }
 
-static struct cpufreq_driver tegra_cpufreq_driver = {
-	.flags			= CPUFREQ_NEED_INITIAL_FREQ_CHECK,
-	.verify			= cpufreq_generic_frequency_table_verify,
-	.get_intermediate	= tegra_get_intermediate,
-	.target_intermediate	= tegra_target_intermediate,
-	.target_index		= tegra_target,
-	.get			= cpufreq_generic_get,
-	.init			= tegra_cpu_init,
-	.exit			= tegra_cpu_exit,
-	.name			= "tegra",
-	.attr			= cpufreq_generic_attr,
-	.suspend		= cpufreq_generic_suspend,
-};
-
-static int __init tegra_cpufreq_init(void)
+static int tegra20_cpufreq_probe(struct platform_device *pdev)
 {
-	cpu_clk = clk_get_sys(NULL, "cclk");
-	if (IS_ERR(cpu_clk))
-		return PTR_ERR(cpu_clk);
+	struct tegra20_cpufreq *cpufreq;
+	int err;
 
-	pll_x_clk = clk_get_sys(NULL, "pll_x");
-	if (IS_ERR(pll_x_clk))
-		return PTR_ERR(pll_x_clk);
+	cpufreq = devm_kzalloc(&pdev->dev, sizeof(*cpufreq), GFP_KERNEL);
+	if (!cpufreq)
+		return -ENOMEM;
 
-	pll_p_clk = clk_get_sys(NULL, "pll_p");
-	if (IS_ERR(pll_p_clk))
-		return PTR_ERR(pll_p_clk);
+	cpufreq->cpu_clk = clk_get_sys(NULL, "cclk");
+	if (IS_ERR(cpufreq->cpu_clk))
+		return PTR_ERR(cpufreq->cpu_clk);
 
-	emc_clk = clk_get_sys("cpu", "emc");
-	if (IS_ERR(emc_clk)) {
-		clk_put(cpu_clk);
-		return PTR_ERR(emc_clk);
+	cpufreq->pll_x_clk = clk_get_sys(NULL, "pll_x");
+	if (IS_ERR(cpufreq->pll_x_clk)) {
+		err = PTR_ERR(cpufreq->pll_x_clk);
+		goto put_cpu;
 	}
 
-	return cpufreq_register_driver(&tegra_cpufreq_driver);
+	cpufreq->pll_p_clk = clk_get_sys(NULL, "pll_p");
+	if (IS_ERR(cpufreq->pll_p_clk)) {
+		err = PTR_ERR(cpufreq->pll_p_clk);
+		goto put_pll_x;
+	}
+
+	cpufreq->dev = &pdev->dev;
+	cpufreq->driver.get = cpufreq_generic_get;
+	cpufreq->driver.attr = cpufreq_generic_attr;
+	cpufreq->driver.init = tegra_cpu_init;
+	cpufreq->driver.exit = tegra_cpu_exit;
+	cpufreq->driver.flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK;
+	cpufreq->driver.verify = cpufreq_generic_frequency_table_verify;
+	cpufreq->driver.suspend = cpufreq_generic_suspend;
+	cpufreq->driver.driver_data = cpufreq;
+	cpufreq->driver.target_index = tegra_target;
+	cpufreq->driver.get_intermediate = tegra_get_intermediate;
+	cpufreq->driver.target_intermediate = tegra_target_intermediate;
+	snprintf(cpufreq->driver.name, CPUFREQ_NAME_LEN, "tegra");
+
+	err = cpufreq_register_driver(&cpufreq->driver);
+	if (err)
+		goto put_pll_p;
+
+	platform_set_drvdata(pdev, cpufreq);
+
+	return 0;
+
+put_pll_p:
+	clk_put(cpufreq->pll_p_clk);
+put_pll_x:
+	clk_put(cpufreq->pll_x_clk);
+put_cpu:
+	clk_put(cpufreq->cpu_clk);
+
+	return err;
 }
 
-static void __exit tegra_cpufreq_exit(void)
+static int tegra20_cpufreq_remove(struct platform_device *pdev)
 {
-        cpufreq_unregister_driver(&tegra_cpufreq_driver);
-	clk_put(emc_clk);
-	clk_put(cpu_clk);
+	struct tegra20_cpufreq *cpufreq = platform_get_drvdata(pdev);
+
+	cpufreq_unregister_driver(&cpufreq->driver);
+
+	clk_put(cpufreq->pll_p_clk);
+	clk_put(cpufreq->pll_x_clk);
+	clk_put(cpufreq->cpu_clk);
+
+	return 0;
 }
 
+static struct platform_driver tegra20_cpufreq_driver = {
+	.probe		= tegra20_cpufreq_probe,
+	.remove		= tegra20_cpufreq_remove,
+	.driver		= {
+		.name	= "tegra20-cpufreq",
+	},
+};
+module_platform_driver(tegra20_cpufreq_driver);
 
+MODULE_ALIAS("platform:tegra20-cpufreq");
 MODULE_AUTHOR("Colin Cross <ccross@android.com>");
-MODULE_DESCRIPTION("cpufreq driver for Nvidia Tegra2");
+MODULE_DESCRIPTION("NVIDIA Tegra20 cpufreq driver");
 MODULE_LICENSE("GPL");
-module_init(tegra_cpufreq_init);
-module_exit(tegra_cpufreq_exit);
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index 5d359af..9fed1b8 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -8,8 +8,10 @@
  * This code is licenced under the GPL.
  */
 
-#include <linux/mutex.h>
+#include <linux/cpu.h>
 #include <linux/cpuidle.h>
+#include <linux/mutex.h>
+#include <linux/pm_qos.h>
 
 #include "cpuidle.h"
 
@@ -93,3 +95,16 @@
 
 	return ret;
 }
+
+/**
+ * cpuidle_governor_latency_req - Compute a latency constraint for CPU
+ * @cpu: Target CPU
+ */
+int cpuidle_governor_latency_req(unsigned int cpu)
+{
+	int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
+	struct device *device = get_cpu_device(cpu);
+	int device_req = dev_pm_qos_raw_read_value(device);
+
+	return device_req < global_req ? device_req : global_req;
+}
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index b24883f..704880a 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -14,10 +14,8 @@
 
 #include <linux/kernel.h>
 #include <linux/cpuidle.h>
-#include <linux/pm_qos.h>
 #include <linux/jiffies.h>
 #include <linux/tick.h>
-#include <linux/cpu.h>
 
 #include <asm/io.h>
 #include <linux/uaccess.h>
@@ -69,16 +67,10 @@
 			       struct cpuidle_device *dev, bool *dummy)
 {
 	struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
-	struct device *device = get_cpu_device(dev->cpu);
 	struct ladder_device_state *last_state;
 	int last_residency, last_idx = ldev->last_state_idx;
 	int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
-	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
-	int resume_latency = dev_pm_qos_raw_read_value(device);
-
-	if (resume_latency < latency_req &&
-	    resume_latency != PM_QOS_RESUME_LATENCY_NO_CONSTRAINT)
-		latency_req = resume_latency;
+	int latency_req = cpuidle_governor_latency_req(dev->cpu);
 
 	/* Special case when user has set very strict latency requirement */
 	if (unlikely(latency_req == 0)) {
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 1bfe03c..1aef60d 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -12,7 +12,6 @@
 
 #include <linux/kernel.h>
 #include <linux/cpuidle.h>
-#include <linux/pm_qos.h>
 #include <linux/time.h>
 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
@@ -21,7 +20,6 @@
 #include <linux/sched/loadavg.h>
 #include <linux/sched/stat.h>
 #include <linux/math64.h>
-#include <linux/cpu.h>
 
 /*
  * Please note when changing the tuning values:
@@ -286,15 +284,13 @@
 		       bool *stop_tick)
 {
 	struct menu_device *data = this_cpu_ptr(&menu_devices);
-	struct device *device = get_cpu_device(dev->cpu);
-	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
+	int latency_req = cpuidle_governor_latency_req(dev->cpu);
 	int i;
 	int first_idx;
 	int idx;
 	unsigned int interactivity_req;
 	unsigned int expected_interval;
 	unsigned long nr_iowaiters, cpu_load;
-	int resume_latency = dev_pm_qos_raw_read_value(device);
 	ktime_t delta_next;
 
 	if (data->needs_update) {
@@ -302,10 +298,6 @@
 		data->needs_update = 0;
 	}
 
-	if (resume_latency < latency_req &&
-	    resume_latency != PM_QOS_RESUME_LATENCY_NO_CONSTRAINT)
-		latency_req = resume_latency;
-
 	/* Special case when user has set very strict latency requirement */
 	if (unlikely(latency_req == 0)) {
 		*stop_tick = false;
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index d1ea1a0..43cccf6 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -302,6 +302,7 @@
 	select CRYPTO_AEAD
 	select CRYPTO_AES
 	select CRYPTO_CCM
+	select CRYPTO_CTR
 	select CRYPTO_GCM
 	select CRYPTO_BLKCIPHER
 	help
@@ -419,7 +420,7 @@
 config CRYPTO_DEV_S5P
 	tristate "Support for Samsung S5PV210/Exynos crypto accelerator"
 	depends on ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
-	depends on HAS_IOMEM && HAS_DMA
+	depends on HAS_IOMEM
 	select CRYPTO_AES
 	select CRYPTO_BLKCIPHER
 	help
@@ -466,7 +467,6 @@
 
 config CRYPTO_DEV_ATMEL_AUTHENC
 	tristate "Support for Atmel IPSEC/SSL hw accelerator"
-	depends on HAS_DMA
 	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_AUTHENC
 	select CRYPTO_DEV_ATMEL_AES
@@ -479,7 +479,6 @@
 
 config CRYPTO_DEV_ATMEL_AES
 	tristate "Support for Atmel AES hw accelerator"
-	depends on HAS_DMA
 	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_AES
 	select CRYPTO_AEAD
@@ -494,7 +493,6 @@
 
 config CRYPTO_DEV_ATMEL_TDES
 	tristate "Support for Atmel DES/TDES hw accelerator"
-	depends on HAS_DMA
 	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_DES
 	select CRYPTO_BLKCIPHER
@@ -508,7 +506,6 @@
 
 config CRYPTO_DEV_ATMEL_SHA
 	tristate "Support for Atmel SHA hw accelerator"
-	depends on HAS_DMA
 	depends on ARCH_AT91 || COMPILE_TEST
 	select CRYPTO_HASH
 	help
@@ -574,7 +571,8 @@
 
 config CRYPTO_DEV_QCE
 	tristate "Qualcomm crypto engine accelerator"
-	depends on (ARCH_QCOM || COMPILE_TEST) && HAS_DMA && HAS_IOMEM
+	depends on ARCH_QCOM || COMPILE_TEST
+	depends on HAS_IOMEM
 	select CRYPTO_AES
 	select CRYPTO_DES
 	select CRYPTO_ECB
@@ -598,7 +596,6 @@
 config CRYPTO_DEV_IMGTEC_HASH
 	tristate "Imagination Technologies hardware hash accelerator"
 	depends on MIPS || COMPILE_TEST
-	depends on HAS_DMA
 	select CRYPTO_MD5
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
@@ -650,7 +647,6 @@
 
 config CRYPTO_DEV_MEDIATEK
 	tristate "MediaTek's EIP97 Cryptographic Engine driver"
-	depends on HAS_DMA
 	depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST
 	select CRYPTO_AES
 	select CRYPTO_AEAD
@@ -688,9 +684,10 @@
 
 config CRYPTO_DEV_SAFEXCEL
 	tristate "Inside Secure's SafeXcel cryptographic engine driver"
-	depends on HAS_DMA && OF
+	depends on OF
 	depends on (ARM64 && ARCH_MVEBU) || (COMPILE_TEST && 64BIT)
 	select CRYPTO_AES
+	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_HASH
 	select CRYPTO_HMAC
@@ -706,7 +703,6 @@
 config CRYPTO_DEV_ARTPEC6
 	tristate "Support for Axis ARTPEC-6/7 hardware crypto acceleration."
 	depends on ARM && (ARCH_ARTPEC || COMPILE_TEST)
-	depends on HAS_DMA
 	depends on OF
 	select CRYPTO_AEAD
 	select CRYPTO_AES
diff --git a/drivers/crypto/amcc/crypto4xx_alg.c b/drivers/crypto/amcc/crypto4xx_alg.c
index ea83d0b..f5c07498 100644
--- a/drivers/crypto/amcc/crypto4xx_alg.c
+++ b/drivers/crypto/amcc/crypto4xx_alg.c
@@ -31,6 +31,7 @@
 #include <crypto/gcm.h>
 #include <crypto/sha.h>
 #include <crypto/ctr.h>
+#include <crypto/skcipher.h>
 #include "crypto4xx_reg_def.h"
 #include "crypto4xx_core.h"
 #include "crypto4xx_sa.h"
@@ -74,51 +75,57 @@
 	sa->sa_command_1.bf.copy_hdr = cp_hdr;
 }
 
-int crypto4xx_encrypt(struct ablkcipher_request *req)
+static inline int crypto4xx_crypt(struct skcipher_request *req,
+				  const unsigned int ivlen, bool decrypt)
 {
-	struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	unsigned int ivlen = crypto_ablkcipher_ivsize(
-		crypto_ablkcipher_reqtfm(req));
-	__le32 iv[ivlen];
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
+	__le32 iv[AES_IV_SIZE];
 
 	if (ivlen)
-		crypto4xx_memcpy_to_le32(iv, req->info, ivlen);
+		crypto4xx_memcpy_to_le32(iv, req->iv, ivlen);
 
 	return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
-		req->nbytes, iv, ivlen, ctx->sa_out, ctx->sa_len, 0);
+		req->cryptlen, iv, ivlen, decrypt ? ctx->sa_in : ctx->sa_out,
+		ctx->sa_len, 0, NULL);
 }
 
-int crypto4xx_decrypt(struct ablkcipher_request *req)
+int crypto4xx_encrypt_noiv(struct skcipher_request *req)
 {
-	struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	unsigned int ivlen = crypto_ablkcipher_ivsize(
-		crypto_ablkcipher_reqtfm(req));
-	__le32 iv[ivlen];
+	return crypto4xx_crypt(req, 0, false);
+}
 
-	if (ivlen)
-		crypto4xx_memcpy_to_le32(iv, req->info, ivlen);
+int crypto4xx_encrypt_iv(struct skcipher_request *req)
+{
+	return crypto4xx_crypt(req, AES_IV_SIZE, false);
+}
 
-	return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
-		req->nbytes, iv, ivlen, ctx->sa_in, ctx->sa_len, 0);
+int crypto4xx_decrypt_noiv(struct skcipher_request *req)
+{
+	return crypto4xx_crypt(req, 0, true);
+}
+
+int crypto4xx_decrypt_iv(struct skcipher_request *req)
+{
+	return crypto4xx_crypt(req, AES_IV_SIZE, true);
 }
 
 /**
  * AES Functions
  */
-static int crypto4xx_setkey_aes(struct crypto_ablkcipher *cipher,
+static int crypto4xx_setkey_aes(struct crypto_skcipher *cipher,
 				const u8 *key,
 				unsigned int keylen,
 				unsigned char cm,
 				u8 fb)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
 	struct dynamic_sa_ctl *sa;
 	int    rc;
 
 	if (keylen != AES_KEYSIZE_256 &&
 		keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_128) {
-		crypto_ablkcipher_set_flags(cipher,
+		crypto_skcipher_set_flags(cipher,
 				CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
@@ -134,7 +141,8 @@
 	/* Setup SA */
 	sa = ctx->sa_in;
 
-	set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, SA_NOT_SAVE_IV,
+	set_dynamic_sa_command_0(sa, SA_NOT_SAVE_HASH, (cm == CRYPTO_MODE_CBC ?
+				 SA_SAVE_IV : SA_NOT_SAVE_IV),
 				 SA_LOAD_HASH_FROM_SA, SA_LOAD_IV_FROM_STATE,
 				 SA_NO_HEADER_PROC, SA_HASH_ALG_NULL,
 				 SA_CIPHER_ALG_AES, SA_PAD_TYPE_ZERO,
@@ -158,39 +166,38 @@
 	return 0;
 }
 
-int crypto4xx_setkey_aes_cbc(struct crypto_ablkcipher *cipher,
+int crypto4xx_setkey_aes_cbc(struct crypto_skcipher *cipher,
 			     const u8 *key, unsigned int keylen)
 {
 	return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_CBC,
 				    CRYPTO_FEEDBACK_MODE_NO_FB);
 }
 
-int crypto4xx_setkey_aes_cfb(struct crypto_ablkcipher *cipher,
+int crypto4xx_setkey_aes_cfb(struct crypto_skcipher *cipher,
 			     const u8 *key, unsigned int keylen)
 {
 	return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_CFB,
 				    CRYPTO_FEEDBACK_MODE_128BIT_CFB);
 }
 
-int crypto4xx_setkey_aes_ecb(struct crypto_ablkcipher *cipher,
+int crypto4xx_setkey_aes_ecb(struct crypto_skcipher *cipher,
 			     const u8 *key, unsigned int keylen)
 {
 	return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_ECB,
 				    CRYPTO_FEEDBACK_MODE_NO_FB);
 }
 
-int crypto4xx_setkey_aes_ofb(struct crypto_ablkcipher *cipher,
+int crypto4xx_setkey_aes_ofb(struct crypto_skcipher *cipher,
 			     const u8 *key, unsigned int keylen)
 {
 	return crypto4xx_setkey_aes(cipher, key, keylen, CRYPTO_MODE_OFB,
 				    CRYPTO_FEEDBACK_MODE_64BIT_OFB);
 }
 
-int crypto4xx_setkey_rfc3686(struct crypto_ablkcipher *cipher,
+int crypto4xx_setkey_rfc3686(struct crypto_skcipher *cipher,
 			     const u8 *key, unsigned int keylen)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
 	int rc;
 
 	rc = crypto4xx_setkey_aes(cipher, key, keylen - CTR_RFC3686_NONCE_SIZE,
@@ -204,35 +211,117 @@
 	return 0;
 }
 
-int crypto4xx_rfc3686_encrypt(struct ablkcipher_request *req)
+int crypto4xx_rfc3686_encrypt(struct skcipher_request *req)
 {
-	struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
 	__le32 iv[AES_IV_SIZE / 4] = {
 		ctx->iv_nonce,
-		cpu_to_le32p((u32 *) req->info),
-		cpu_to_le32p((u32 *) (req->info + 4)),
+		cpu_to_le32p((u32 *) req->iv),
+		cpu_to_le32p((u32 *) (req->iv + 4)),
 		cpu_to_le32(1) };
 
 	return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
-				  req->nbytes, iv, AES_IV_SIZE,
-				  ctx->sa_out, ctx->sa_len, 0);
+				  req->cryptlen, iv, AES_IV_SIZE,
+				  ctx->sa_out, ctx->sa_len, 0, NULL);
 }
 
-int crypto4xx_rfc3686_decrypt(struct ablkcipher_request *req)
+int crypto4xx_rfc3686_decrypt(struct skcipher_request *req)
 {
-	struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
 	__le32 iv[AES_IV_SIZE / 4] = {
 		ctx->iv_nonce,
-		cpu_to_le32p((u32 *) req->info),
-		cpu_to_le32p((u32 *) (req->info + 4)),
+		cpu_to_le32p((u32 *) req->iv),
+		cpu_to_le32p((u32 *) (req->iv + 4)),
 		cpu_to_le32(1) };
 
 	return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
-				  req->nbytes, iv, AES_IV_SIZE,
-				  ctx->sa_out, ctx->sa_len, 0);
+				  req->cryptlen, iv, AES_IV_SIZE,
+				  ctx->sa_out, ctx->sa_len, 0, NULL);
+}
+
+static int
+crypto4xx_ctr_crypt(struct skcipher_request *req, bool encrypt)
+{
+	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
+	struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
+	size_t iv_len = crypto_skcipher_ivsize(cipher);
+	unsigned int counter = be32_to_cpup((__be32 *)(req->iv + iv_len - 4));
+	unsigned int nblks = ALIGN(req->cryptlen, AES_BLOCK_SIZE) /
+			AES_BLOCK_SIZE;
+
+	/*
+	 * The hardware uses only the last 32-bits as the counter while the
+	 * kernel tests (aes_ctr_enc_tv_template[4] for example) expect that
+	 * the whole IV is a counter.  So fallback if the counter is going to
+	 * overlow.
+	 */
+	if (counter + nblks < counter) {
+		struct skcipher_request *subreq = skcipher_request_ctx(req);
+		int ret;
+
+		skcipher_request_set_tfm(subreq, ctx->sw_cipher.cipher);
+		skcipher_request_set_callback(subreq, req->base.flags,
+			NULL, NULL);
+		skcipher_request_set_crypt(subreq, req->src, req->dst,
+			req->cryptlen, req->iv);
+		ret = encrypt ? crypto_skcipher_encrypt(subreq)
+			: crypto_skcipher_decrypt(subreq);
+		skcipher_request_zero(subreq);
+		return ret;
+	}
+
+	return encrypt ? crypto4xx_encrypt_iv(req)
+		       : crypto4xx_decrypt_iv(req);
+}
+
+static int crypto4xx_sk_setup_fallback(struct crypto4xx_ctx *ctx,
+				       struct crypto_skcipher *cipher,
+				       const u8 *key,
+				       unsigned int keylen)
+{
+	int rc;
+
+	crypto_skcipher_clear_flags(ctx->sw_cipher.cipher,
+				    CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(ctx->sw_cipher.cipher,
+		crypto_skcipher_get_flags(cipher) & CRYPTO_TFM_REQ_MASK);
+	rc = crypto_skcipher_setkey(ctx->sw_cipher.cipher, key, keylen);
+	crypto_skcipher_clear_flags(cipher, CRYPTO_TFM_RES_MASK);
+	crypto_skcipher_set_flags(cipher,
+		crypto_skcipher_get_flags(ctx->sw_cipher.cipher) &
+			CRYPTO_TFM_RES_MASK);
+
+	return rc;
+}
+
+int crypto4xx_setkey_aes_ctr(struct crypto_skcipher *cipher,
+			     const u8 *key, unsigned int keylen)
+{
+	struct crypto4xx_ctx *ctx = crypto_skcipher_ctx(cipher);
+	int rc;
+
+	rc = crypto4xx_sk_setup_fallback(ctx, cipher, key, keylen);
+	if (rc)
+		return rc;
+
+	return crypto4xx_setkey_aes(cipher, key, keylen,
+		CRYPTO_MODE_CTR, CRYPTO_FEEDBACK_MODE_NO_FB);
+}
+
+int crypto4xx_encrypt_ctr(struct skcipher_request *req)
+{
+	return crypto4xx_ctr_crypt(req, true);
+}
+
+int crypto4xx_decrypt_ctr(struct skcipher_request *req)
+{
+	return crypto4xx_ctr_crypt(req, false);
 }
 
 static inline bool crypto4xx_aead_need_fallback(struct aead_request *req,
+						unsigned int len,
 						bool is_ccm, bool decrypt)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -242,14 +331,14 @@
 		return true;
 
 	/*
-	 * hardware does not handle cases where cryptlen
-	 * is less than a block
+	 * hardware does not handle cases where plaintext
+	 * is less than a block.
 	 */
-	if (req->cryptlen < AES_BLOCK_SIZE)
+	if (len < AES_BLOCK_SIZE)
 		return true;
 
-	/* assoc len needs to be a multiple of 4 */
-	if (req->assoclen & 0x3)
+	/* assoc len needs to be a multiple of 4 and <= 1020 */
+	if (req->assoclen & 0x3 || req->assoclen > 1020)
 		return true;
 
 	/* CCM supports only counter field length of 2 and 4 bytes */
@@ -262,13 +351,7 @@
 static int crypto4xx_aead_fallback(struct aead_request *req,
 	struct crypto4xx_ctx *ctx, bool do_decrypt)
 {
-	char aead_req_data[sizeof(struct aead_request) +
-			   crypto_aead_reqsize(ctx->sw_cipher.aead)]
-		__aligned(__alignof__(struct aead_request));
-
-	struct aead_request *subreq = (void *) aead_req_data;
-
-	memset(subreq, 0, sizeof(aead_req_data));
+	struct aead_request *subreq = aead_request_ctx(req);
 
 	aead_request_set_tfm(subreq, ctx->sw_cipher.aead);
 	aead_request_set_callback(subreq, req->base.flags,
@@ -280,10 +363,10 @@
 			    crypto_aead_encrypt(subreq);
 }
 
-static int crypto4xx_setup_fallback(struct crypto4xx_ctx *ctx,
-				    struct crypto_aead *cipher,
-				    const u8 *key,
-				    unsigned int keylen)
+static int crypto4xx_aead_setup_fallback(struct crypto4xx_ctx *ctx,
+					 struct crypto_aead *cipher,
+					 const u8 *key,
+					 unsigned int keylen)
 {
 	int rc;
 
@@ -311,7 +394,7 @@
 	struct dynamic_sa_ctl *sa;
 	int rc = 0;
 
-	rc = crypto4xx_setup_fallback(ctx, cipher, key, keylen);
+	rc = crypto4xx_aead_setup_fallback(ctx, cipher, key, keylen);
 	if (rc)
 		return rc;
 
@@ -366,19 +449,20 @@
 static int crypto4xx_crypt_aes_ccm(struct aead_request *req, bool decrypt)
 {
 	struct crypto4xx_ctx *ctx  = crypto_tfm_ctx(req->base.tfm);
+	struct crypto4xx_aead_reqctx *rctx = aead_request_ctx(req);
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	unsigned int len = req->cryptlen;
 	__le32 iv[16];
-	u32 tmp_sa[ctx->sa_len * 4];
+	u32 tmp_sa[SA_AES128_CCM_LEN + 4];
 	struct dynamic_sa_ctl *sa = (struct dynamic_sa_ctl *)tmp_sa;
-
-	if (crypto4xx_aead_need_fallback(req, true, decrypt))
-		return crypto4xx_aead_fallback(req, ctx, decrypt);
+	unsigned int len = req->cryptlen;
 
 	if (decrypt)
 		len -= crypto_aead_authsize(aead);
 
-	memcpy(tmp_sa, decrypt ? ctx->sa_in : ctx->sa_out, sizeof(tmp_sa));
+	if (crypto4xx_aead_need_fallback(req, len, true, decrypt))
+		return crypto4xx_aead_fallback(req, ctx, decrypt);
+
+	memcpy(tmp_sa, decrypt ? ctx->sa_in : ctx->sa_out, ctx->sa_len * 4);
 	sa->sa_command_0.bf.digest_len = crypto_aead_authsize(aead) >> 2;
 
 	if (req->iv[0] == 1) {
@@ -391,7 +475,7 @@
 
 	return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
 				  len, iv, sizeof(iv),
-				  sa, ctx->sa_len, req->assoclen);
+				  sa, ctx->sa_len, req->assoclen, rctx->dst);
 }
 
 int crypto4xx_encrypt_aes_ccm(struct aead_request *req)
@@ -470,7 +554,7 @@
 		return -EINVAL;
 	}
 
-	rc = crypto4xx_setup_fallback(ctx, cipher, key, keylen);
+	rc = crypto4xx_aead_setup_fallback(ctx, cipher, key, keylen);
 	if (rc)
 		return rc;
 
@@ -523,22 +607,23 @@
 					  bool decrypt)
 {
 	struct crypto4xx_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	unsigned int len = req->cryptlen;
+	struct crypto4xx_aead_reqctx *rctx = aead_request_ctx(req);
 	__le32 iv[4];
+	unsigned int len = req->cryptlen;
 
-	if (crypto4xx_aead_need_fallback(req, false, decrypt))
+	if (decrypt)
+		len -= crypto_aead_authsize(crypto_aead_reqtfm(req));
+
+	if (crypto4xx_aead_need_fallback(req, len, false, decrypt))
 		return crypto4xx_aead_fallback(req, ctx, decrypt);
 
 	crypto4xx_memcpy_to_le32(iv, req->iv, GCM_AES_IV_SIZE);
 	iv[3] = cpu_to_le32(1);
 
-	if (decrypt)
-		len -= crypto_aead_authsize(crypto_aead_reqtfm(req));
-
 	return crypto4xx_build_pd(&req->base, ctx, req->src, req->dst,
 				  len, iv, sizeof(iv),
 				  decrypt ? ctx->sa_in : ctx->sa_out,
-				  ctx->sa_len, req->assoclen);
+				  ctx->sa_len, req->assoclen, rctx->dst);
 }
 
 int crypto4xx_encrypt_aes_gcm(struct aead_request *req)
@@ -623,7 +708,7 @@
 
 	return crypto4xx_build_pd(&req->base, ctx, req->src, &dst,
 				  req->nbytes, NULL, 0, ctx->sa_in,
-				  ctx->sa_len, 0);
+				  ctx->sa_len, 0, NULL);
 }
 
 int crypto4xx_hash_final(struct ahash_request *req)
@@ -642,7 +727,7 @@
 
 	return crypto4xx_build_pd(&req->base, ctx, req->src, &dst,
 				  req->nbytes, NULL, 0, ctx->sa_in,
-				  ctx->sa_len, 0);
+				  ctx->sa_len, 0, NULL);
 }
 
 /**
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index 76f459a..9cb234c 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -41,6 +41,7 @@
 #include <crypto/gcm.h>
 #include <crypto/sha.h>
 #include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
 #include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
 #include "crypto4xx_reg_def.h"
@@ -526,31 +527,38 @@
 	}
 }
 
-static void crypto4xx_ablkcipher_done(struct crypto4xx_device *dev,
+static void crypto4xx_cipher_done(struct crypto4xx_device *dev,
 				     struct pd_uinfo *pd_uinfo,
 				     struct ce_pd *pd)
 {
-	struct crypto4xx_ctx *ctx;
-	struct ablkcipher_request *ablk_req;
+	struct skcipher_request *req;
 	struct scatterlist *dst;
 	dma_addr_t addr;
 
-	ablk_req = ablkcipher_request_cast(pd_uinfo->async_req);
-	ctx  = crypto_tfm_ctx(ablk_req->base.tfm);
+	req = skcipher_request_cast(pd_uinfo->async_req);
 
 	if (pd_uinfo->using_sd) {
-		crypto4xx_copy_pkt_to_dst(dev, pd, pd_uinfo, ablk_req->nbytes,
-					  ablk_req->dst);
+		crypto4xx_copy_pkt_to_dst(dev, pd, pd_uinfo,
+					  req->cryptlen, req->dst);
 	} else {
 		dst = pd_uinfo->dest_va;
 		addr = dma_map_page(dev->core_dev->device, sg_page(dst),
 				    dst->offset, dst->length, DMA_FROM_DEVICE);
 	}
+
+	if (pd_uinfo->sa_va->sa_command_0.bf.save_iv == SA_SAVE_IV) {
+		struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+
+		crypto4xx_memcpy_from_le32((u32 *)req->iv,
+			pd_uinfo->sr_va->save_iv,
+			crypto_skcipher_ivsize(skcipher));
+	}
+
 	crypto4xx_ret_sg_desc(dev, pd_uinfo);
 
 	if (pd_uinfo->state & PD_ENTRY_BUSY)
-		ablkcipher_request_complete(ablk_req, -EINPROGRESS);
-	ablkcipher_request_complete(ablk_req, 0);
+		skcipher_request_complete(req, -EINPROGRESS);
+	skcipher_request_complete(req, 0);
 }
 
 static void crypto4xx_ahash_done(struct crypto4xx_device *dev,
@@ -580,7 +588,7 @@
 	struct scatterlist *dst = pd_uinfo->dest_va;
 	size_t cp_len = crypto_aead_authsize(
 		crypto_aead_reqtfm(aead_req));
-	u32 icv[cp_len];
+	u32 icv[AES_BLOCK_SIZE];
 	int err = 0;
 
 	if (pd_uinfo->using_sd) {
@@ -595,7 +603,7 @@
 	if (pd_uinfo->sa_va->sa_command_0.bf.dir == DIR_OUTBOUND) {
 		/* append icv at the end */
 		crypto4xx_memcpy_from_le32(icv, pd_uinfo->sr_va->save_digest,
-					   cp_len);
+					   sizeof(icv));
 
 		scatterwalk_map_and_copy(icv, dst, aead_req->cryptlen,
 					 cp_len, 1);
@@ -605,7 +613,7 @@
 			aead_req->assoclen + aead_req->cryptlen -
 			cp_len, cp_len, 0);
 
-		crypto4xx_memcpy_from_le32(icv, icv, cp_len);
+		crypto4xx_memcpy_from_le32(icv, icv, sizeof(icv));
 
 		if (crypto_memneq(icv, pd_uinfo->sr_va->save_digest, cp_len))
 			err = -EBADMSG;
@@ -641,8 +649,8 @@
 	struct pd_uinfo *pd_uinfo = &dev->pdr_uinfo[idx];
 
 	switch (crypto_tfm_alg_type(pd_uinfo->async_req->tfm)) {
-	case CRYPTO_ALG_TYPE_ABLKCIPHER:
-		crypto4xx_ablkcipher_done(dev, pd_uinfo, pd);
+	case CRYPTO_ALG_TYPE_SKCIPHER:
+		crypto4xx_cipher_done(dev, pd_uinfo, pd);
 		break;
 	case CRYPTO_ALG_TYPE_AEAD:
 		crypto4xx_aead_done(dev, pd_uinfo, pd);
@@ -687,9 +695,9 @@
 		       const __le32 *iv, const u32 iv_len,
 		       const struct dynamic_sa_ctl *req_sa,
 		       const unsigned int sa_len,
-		       const unsigned int assoclen)
+		       const unsigned int assoclen,
+		       struct scatterlist *_dst)
 {
-	struct scatterlist _dst[2];
 	struct crypto4xx_device *dev = ctx->dev;
 	struct dynamic_sa_ctl *sa;
 	struct ce_gd *gd;
@@ -936,15 +944,27 @@
 	ctx->sa_len = 0;
 }
 
-static int crypto4xx_ablk_init(struct crypto_tfm *tfm)
+static int crypto4xx_sk_init(struct crypto_skcipher *sk)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
+	struct skcipher_alg *alg = crypto_skcipher_alg(sk);
 	struct crypto4xx_alg *amcc_alg;
-	struct crypto4xx_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto4xx_ctx *ctx =  crypto_skcipher_ctx(sk);
+
+	if (alg->base.cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
+		ctx->sw_cipher.cipher =
+			crypto_alloc_skcipher(alg->base.cra_name, 0,
+					      CRYPTO_ALG_NEED_FALLBACK |
+					      CRYPTO_ALG_ASYNC);
+		if (IS_ERR(ctx->sw_cipher.cipher))
+			return PTR_ERR(ctx->sw_cipher.cipher);
+
+		crypto_skcipher_set_reqsize(sk,
+			sizeof(struct skcipher_request) + 32 +
+			crypto_skcipher_reqsize(ctx->sw_cipher.cipher));
+	}
 
 	amcc_alg = container_of(alg, struct crypto4xx_alg, alg.u.cipher);
 	crypto4xx_ctx_init(amcc_alg, ctx);
-	tfm->crt_ablkcipher.reqsize = sizeof(struct crypto4xx_ctx);
 	return 0;
 }
 
@@ -953,9 +973,13 @@
 	crypto4xx_free_sa(ctx);
 }
 
-static void crypto4xx_ablk_exit(struct crypto_tfm *tfm)
+static void crypto4xx_sk_exit(struct crypto_skcipher *sk)
 {
-	crypto4xx_common_exit(crypto_tfm_ctx(tfm));
+	struct crypto4xx_ctx *ctx =  crypto_skcipher_ctx(sk);
+
+	crypto4xx_common_exit(ctx);
+	if (ctx->sw_cipher.cipher)
+		crypto_free_skcipher(ctx->sw_cipher.cipher);
 }
 
 static int crypto4xx_aead_init(struct crypto_aead *tfm)
@@ -972,9 +996,9 @@
 
 	amcc_alg = container_of(alg, struct crypto4xx_alg, alg.u.aead);
 	crypto4xx_ctx_init(amcc_alg, ctx);
-	crypto_aead_set_reqsize(tfm, sizeof(struct aead_request) +
-				max(sizeof(struct crypto4xx_ctx), 32 +
-				crypto_aead_reqsize(ctx->sw_cipher.aead)));
+	crypto_aead_set_reqsize(tfm, max(sizeof(struct aead_request) + 32 +
+				crypto_aead_reqsize(ctx->sw_cipher.aead),
+				sizeof(struct crypto4xx_aead_reqctx)));
 	return 0;
 }
 
@@ -1012,7 +1036,7 @@
 			break;
 
 		default:
-			rc = crypto_register_alg(&alg->alg.u.cipher);
+			rc = crypto_register_skcipher(&alg->alg.u.cipher);
 			break;
 		}
 
@@ -1041,7 +1065,7 @@
 			break;
 
 		default:
-			crypto_unregister_alg(&alg->alg.u.cipher);
+			crypto_unregister_skcipher(&alg->alg.u.cipher);
 		}
 		kfree(alg);
 	}
@@ -1103,126 +1127,131 @@
  */
 static struct crypto4xx_alg_common crypto4xx_alg[] = {
 	/* Crypto AES modes */
-	{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
-		.cra_name 	= "cbc(aes)",
-		.cra_driver_name = "cbc-aes-ppc4xx",
-		.cra_priority 	= CRYPTO4XX_CRYPTO_PRIORITY,
-		.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.cra_blocksize 	= AES_BLOCK_SIZE,
-		.cra_ctxsize 	= sizeof(struct crypto4xx_ctx),
-		.cra_type 	= &crypto_ablkcipher_type,
-		.cra_init	= crypto4xx_ablk_init,
-		.cra_exit	= crypto4xx_ablk_exit,
-		.cra_module 	= THIS_MODULE,
-		.cra_u 		= {
-			.ablkcipher = {
-				.min_keysize 	= AES_MIN_KEY_SIZE,
-				.max_keysize 	= AES_MAX_KEY_SIZE,
-				.ivsize		= AES_IV_SIZE,
-				.setkey 	= crypto4xx_setkey_aes_cbc,
-				.encrypt 	= crypto4xx_encrypt,
-				.decrypt 	= crypto4xx_decrypt,
-			}
-		}
-	}},
-	{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
-		.cra_name	= "cfb(aes)",
-		.cra_driver_name = "cfb-aes-ppc4xx",
-		.cra_priority	= CRYPTO4XX_CRYPTO_PRIORITY,
-		.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.cra_blocksize	= AES_BLOCK_SIZE,
-		.cra_ctxsize	= sizeof(struct crypto4xx_ctx),
-		.cra_type	= &crypto_ablkcipher_type,
-		.cra_init	= crypto4xx_ablk_init,
-		.cra_exit	= crypto4xx_ablk_exit,
-		.cra_module	= THIS_MODULE,
-		.cra_u		= {
-			.ablkcipher = {
-				.min_keysize	= AES_MIN_KEY_SIZE,
-				.max_keysize	= AES_MAX_KEY_SIZE,
-				.ivsize		= AES_IV_SIZE,
-				.setkey		= crypto4xx_setkey_aes_cfb,
-				.encrypt	= crypto4xx_encrypt,
-				.decrypt	= crypto4xx_decrypt,
-			}
-		}
+	{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
+		.base = {
+			.cra_name = "cbc(aes)",
+			.cra_driver_name = "cbc-aes-ppc4xx",
+			.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct crypto4xx_ctx),
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize	= AES_IV_SIZE,
+		.setkey = crypto4xx_setkey_aes_cbc,
+		.encrypt = crypto4xx_encrypt_iv,
+		.decrypt = crypto4xx_decrypt_iv,
+		.init = crypto4xx_sk_init,
+		.exit = crypto4xx_sk_exit,
 	} },
-	{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
-		.cra_name	= "rfc3686(ctr(aes))",
-		.cra_driver_name = "rfc3686-ctr-aes-ppc4xx",
-		.cra_priority	= CRYPTO4XX_CRYPTO_PRIORITY,
-		.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.cra_blocksize	= AES_BLOCK_SIZE,
-		.cra_ctxsize	= sizeof(struct crypto4xx_ctx),
-		.cra_type	= &crypto_ablkcipher_type,
-		.cra_init	= crypto4xx_ablk_init,
-		.cra_exit	= crypto4xx_ablk_exit,
-		.cra_module	= THIS_MODULE,
-		.cra_u		= {
-			.ablkcipher = {
-				.min_keysize	= AES_MIN_KEY_SIZE +
-						  CTR_RFC3686_NONCE_SIZE,
-				.max_keysize	= AES_MAX_KEY_SIZE +
-						  CTR_RFC3686_NONCE_SIZE,
-				.ivsize		= CTR_RFC3686_IV_SIZE,
-				.setkey		= crypto4xx_setkey_rfc3686,
-				.encrypt	= crypto4xx_rfc3686_encrypt,
-				.decrypt	= crypto4xx_rfc3686_decrypt,
-			}
-		}
+	{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
+		.base = {
+			.cra_name = "cfb(aes)",
+			.cra_driver_name = "cfb-aes-ppc4xx",
+			.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct crypto4xx_ctx),
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize	= AES_IV_SIZE,
+		.setkey	= crypto4xx_setkey_aes_cfb,
+		.encrypt = crypto4xx_encrypt_iv,
+		.decrypt = crypto4xx_decrypt_iv,
+		.init = crypto4xx_sk_init,
+		.exit = crypto4xx_sk_exit,
 	} },
-	{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
-		.cra_name	= "ecb(aes)",
-		.cra_driver_name = "ecb-aes-ppc4xx",
-		.cra_priority	= CRYPTO4XX_CRYPTO_PRIORITY,
-		.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.cra_blocksize	= AES_BLOCK_SIZE,
-		.cra_ctxsize	= sizeof(struct crypto4xx_ctx),
-		.cra_type	= &crypto_ablkcipher_type,
-		.cra_init	= crypto4xx_ablk_init,
-		.cra_exit	= crypto4xx_ablk_exit,
-		.cra_module	= THIS_MODULE,
-		.cra_u		= {
-			.ablkcipher = {
-				.min_keysize	= AES_MIN_KEY_SIZE,
-				.max_keysize	= AES_MAX_KEY_SIZE,
-				.setkey		= crypto4xx_setkey_aes_ecb,
-				.encrypt	= crypto4xx_encrypt,
-				.decrypt	= crypto4xx_decrypt,
-			}
-		}
+	{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
+		.base = {
+			.cra_name = "ctr(aes)",
+			.cra_driver_name = "ctr-aes-ppc4xx",
+			.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_NEED_FALLBACK |
+				CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct crypto4xx_ctx),
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize	= AES_IV_SIZE,
+		.setkey	= crypto4xx_setkey_aes_ctr,
+		.encrypt = crypto4xx_encrypt_ctr,
+		.decrypt = crypto4xx_decrypt_ctr,
+		.init = crypto4xx_sk_init,
+		.exit = crypto4xx_sk_exit,
 	} },
-	{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
-		.cra_name	= "ofb(aes)",
-		.cra_driver_name = "ofb-aes-ppc4xx",
-		.cra_priority	= CRYPTO4XX_CRYPTO_PRIORITY,
-		.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
-				  CRYPTO_ALG_ASYNC |
-				  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.cra_blocksize	= AES_BLOCK_SIZE,
-		.cra_ctxsize	= sizeof(struct crypto4xx_ctx),
-		.cra_type	= &crypto_ablkcipher_type,
-		.cra_init	= crypto4xx_ablk_init,
-		.cra_exit	= crypto4xx_ablk_exit,
-		.cra_module	= THIS_MODULE,
-		.cra_u		= {
-			.ablkcipher = {
-				.min_keysize	= AES_MIN_KEY_SIZE,
-				.max_keysize	= AES_MAX_KEY_SIZE,
-				.ivsize		= AES_IV_SIZE,
-				.setkey		= crypto4xx_setkey_aes_ofb,
-				.encrypt	= crypto4xx_encrypt,
-				.decrypt	= crypto4xx_decrypt,
-			}
-		}
+	{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
+		.base = {
+			.cra_name = "rfc3686(ctr(aes))",
+			.cra_driver_name = "rfc3686-ctr-aes-ppc4xx",
+			.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct crypto4xx_ctx),
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+		.ivsize	= CTR_RFC3686_IV_SIZE,
+		.setkey = crypto4xx_setkey_rfc3686,
+		.encrypt = crypto4xx_rfc3686_encrypt,
+		.decrypt = crypto4xx_rfc3686_decrypt,
+		.init = crypto4xx_sk_init,
+		.exit = crypto4xx_sk_exit,
+	} },
+	{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "ecb-aes-ppc4xx",
+			.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct crypto4xx_ctx),
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.setkey	= crypto4xx_setkey_aes_ecb,
+		.encrypt = crypto4xx_encrypt_noiv,
+		.decrypt = crypto4xx_decrypt_noiv,
+		.init = crypto4xx_sk_init,
+		.exit = crypto4xx_sk_exit,
+	} },
+	{ .type = CRYPTO_ALG_TYPE_SKCIPHER, .u.cipher = {
+		.base = {
+			.cra_name = "ofb(aes)",
+			.cra_driver_name = "ofb-aes-ppc4xx",
+			.cra_priority = CRYPTO4XX_CRYPTO_PRIORITY,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC |
+				CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct crypto4xx_ctx),
+			.cra_module = THIS_MODULE,
+		},
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize	= AES_IV_SIZE,
+		.setkey	= crypto4xx_setkey_aes_ofb,
+		.encrypt = crypto4xx_encrypt_iv,
+		.decrypt = crypto4xx_decrypt_iv,
+		.init = crypto4xx_sk_init,
+		.exit = crypto4xx_sk_exit,
 	} },
 
 	/* AEAD */
diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h
index 23b726d..e2ca567 100644
--- a/drivers/crypto/amcc/crypto4xx_core.h
+++ b/drivers/crypto/amcc/crypto4xx_core.h
@@ -25,6 +25,7 @@
 #include <linux/ratelimit.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
 #include "crypto4xx_reg_def.h"
 #include "crypto4xx_sa.h"
 
@@ -127,14 +128,19 @@
 	__le32 iv_nonce;
 	u32 sa_len;
 	union {
+		struct crypto_skcipher *cipher;
 		struct crypto_aead *aead;
 	} sw_cipher;
 };
 
+struct crypto4xx_aead_reqctx {
+	struct scatterlist dst[2];
+};
+
 struct crypto4xx_alg_common {
 	u32 type;
 	union {
-		struct crypto_alg cipher;
+		struct skcipher_alg cipher;
 		struct ahash_alg hash;
 		struct aead_alg aead;
 	} u;
@@ -157,21 +163,28 @@
 		       const __le32 *iv, const u32 iv_len,
 		       const struct dynamic_sa_ctl *sa,
 		       const unsigned int sa_len,
-		       const unsigned int assoclen);
-int crypto4xx_setkey_aes_cbc(struct crypto_ablkcipher *cipher,
+		       const unsigned int assoclen,
+		       struct scatterlist *dst_tmp);
+int crypto4xx_setkey_aes_cbc(struct crypto_skcipher *cipher,
 			     const u8 *key, unsigned int keylen);
-int crypto4xx_setkey_aes_cfb(struct crypto_ablkcipher *cipher,
+int crypto4xx_setkey_aes_cfb(struct crypto_skcipher *cipher,
 			     const u8 *key, unsigned int keylen);
-int crypto4xx_setkey_aes_ecb(struct crypto_ablkcipher *cipher,
+int crypto4xx_setkey_aes_ctr(struct crypto_skcipher *cipher,
 			     const u8 *key, unsigned int keylen);
-int crypto4xx_setkey_aes_ofb(struct crypto_ablkcipher *cipher,
+int crypto4xx_setkey_aes_ecb(struct crypto_skcipher *cipher,
 			     const u8 *key, unsigned int keylen);
-int crypto4xx_setkey_rfc3686(struct crypto_ablkcipher *cipher,
+int crypto4xx_setkey_aes_ofb(struct crypto_skcipher *cipher,
 			     const u8 *key, unsigned int keylen);
-int crypto4xx_encrypt(struct ablkcipher_request *req);
-int crypto4xx_decrypt(struct ablkcipher_request *req);
-int crypto4xx_rfc3686_encrypt(struct ablkcipher_request *req);
-int crypto4xx_rfc3686_decrypt(struct ablkcipher_request *req);
+int crypto4xx_setkey_rfc3686(struct crypto_skcipher *cipher,
+			     const u8 *key, unsigned int keylen);
+int crypto4xx_encrypt_ctr(struct skcipher_request *req);
+int crypto4xx_decrypt_ctr(struct skcipher_request *req);
+int crypto4xx_encrypt_iv(struct skcipher_request *req);
+int crypto4xx_decrypt_iv(struct skcipher_request *req);
+int crypto4xx_encrypt_noiv(struct skcipher_request *req);
+int crypto4xx_decrypt_noiv(struct skcipher_request *req);
+int crypto4xx_rfc3686_encrypt(struct skcipher_request *req);
+int crypto4xx_rfc3686_decrypt(struct skcipher_request *req);
 int crypto4xx_sha1_alg_init(struct crypto_tfm *tfm);
 int crypto4xx_hash_digest(struct ahash_request *req);
 int crypto4xx_hash_final(struct ahash_request *req);
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 7207a53..d676679 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -769,15 +769,18 @@
  * @src_nents: number of segments in input s/w scatterlist
  * @dst_nents: number of segments in output s/w scatterlist
  * @iv_dma: dma address of iv for checking continuity and link table
+ * @iv_dir: DMA mapping direction for IV
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
  * @sec4_sg_dma: bus physical mapped address of h/w link table
  * @sec4_sg: pointer to h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
+ *	     and IV
  */
 struct ablkcipher_edesc {
 	int src_nents;
 	int dst_nents;
 	dma_addr_t iv_dma;
+	enum dma_data_direction iv_dir;
 	int sec4_sg_bytes;
 	dma_addr_t sec4_sg_dma;
 	struct sec4_sg_entry *sec4_sg;
@@ -787,7 +790,8 @@
 static void caam_unmap(struct device *dev, struct scatterlist *src,
 		       struct scatterlist *dst, int src_nents,
 		       int dst_nents,
-		       dma_addr_t iv_dma, int ivsize, dma_addr_t sec4_sg_dma,
+		       dma_addr_t iv_dma, int ivsize,
+		       enum dma_data_direction iv_dir, dma_addr_t sec4_sg_dma,
 		       int sec4_sg_bytes)
 {
 	if (dst != src) {
@@ -799,7 +803,7 @@
 	}
 
 	if (iv_dma)
-		dma_unmap_single(dev, iv_dma, ivsize, DMA_TO_DEVICE);
+		dma_unmap_single(dev, iv_dma, ivsize, iv_dir);
 	if (sec4_sg_bytes)
 		dma_unmap_single(dev, sec4_sg_dma, sec4_sg_bytes,
 				 DMA_TO_DEVICE);
@@ -810,7 +814,7 @@
 		       struct aead_request *req)
 {
 	caam_unmap(dev, req->src, req->dst,
-		   edesc->src_nents, edesc->dst_nents, 0, 0,
+		   edesc->src_nents, edesc->dst_nents, 0, 0, DMA_NONE,
 		   edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
 }
 
@@ -823,7 +827,7 @@
 
 	caam_unmap(dev, req->src, req->dst,
 		   edesc->src_nents, edesc->dst_nents,
-		   edesc->iv_dma, ivsize,
+		   edesc->iv_dma, ivsize, edesc->iv_dir,
 		   edesc->sec4_sg_dma, edesc->sec4_sg_bytes);
 }
 
@@ -912,6 +916,18 @@
 	scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize,
 				 ivsize, 0);
 
+	/* In case initial IV was generated, copy it in GIVCIPHER request */
+	if (edesc->iv_dir == DMA_FROM_DEVICE) {
+		u8 *iv;
+		struct skcipher_givcrypt_request *greq;
+
+		greq = container_of(req, struct skcipher_givcrypt_request,
+				    creq);
+		iv = (u8 *)edesc->hw_desc + desc_bytes(edesc->hw_desc) +
+		     edesc->sec4_sg_bytes;
+		memcpy(greq->giv, iv, ivsize);
+	}
+
 	kfree(edesc);
 
 	ablkcipher_request_complete(req, err);
@@ -922,10 +938,10 @@
 {
 	struct ablkcipher_request *req = context;
 	struct ablkcipher_edesc *edesc;
+#ifdef DEBUG
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 
-#ifdef DEBUG
 	dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
@@ -943,14 +959,6 @@
 		     edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 
 	ablkcipher_unmap(jrdev, edesc, req);
-
-	/*
-	 * The crypto API expects us to set the IV (req->info) to the last
-	 * ciphertext block.
-	 */
-	scatterwalk_map_and_copy(req->info, req->src, req->nbytes - ivsize,
-				 ivsize, 0);
-
 	kfree(edesc);
 
 	ablkcipher_request_complete(req, err);
@@ -1099,15 +1107,14 @@
  */
 static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
 				struct ablkcipher_edesc *edesc,
-				struct ablkcipher_request *req,
-				bool iv_contig)
+				struct ablkcipher_request *req)
 {
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 	u32 *desc = edesc->hw_desc;
-	u32 out_options = 0, in_options;
-	dma_addr_t dst_dma, src_dma;
-	int len, sec4_sg_index = 0;
+	u32 out_options = 0;
+	dma_addr_t dst_dma;
+	int len;
 
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ",
@@ -1123,30 +1130,18 @@
 	len = desc_len(sh_desc);
 	init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE);
 
-	if (iv_contig) {
-		src_dma = edesc->iv_dma;
-		in_options = 0;
-	} else {
-		src_dma = edesc->sec4_sg_dma;
-		sec4_sg_index += edesc->src_nents + 1;
-		in_options = LDST_SGF;
-	}
-	append_seq_in_ptr(desc, src_dma, req->nbytes + ivsize, in_options);
+	append_seq_in_ptr(desc, edesc->sec4_sg_dma, req->nbytes + ivsize,
+			  LDST_SGF);
 
 	if (likely(req->src == req->dst)) {
-		if (edesc->src_nents == 1 && iv_contig) {
-			dst_dma = sg_dma_address(req->src);
-		} else {
-			dst_dma = edesc->sec4_sg_dma +
-				sizeof(struct sec4_sg_entry);
-			out_options = LDST_SGF;
-		}
+		dst_dma = edesc->sec4_sg_dma + sizeof(struct sec4_sg_entry);
+		out_options = LDST_SGF;
 	} else {
 		if (edesc->dst_nents == 1) {
 			dst_dma = sg_dma_address(req->dst);
 		} else {
-			dst_dma = edesc->sec4_sg_dma +
-				sec4_sg_index * sizeof(struct sec4_sg_entry);
+			dst_dma = edesc->sec4_sg_dma + (edesc->src_nents + 1) *
+				  sizeof(struct sec4_sg_entry);
 			out_options = LDST_SGF;
 		}
 	}
@@ -1158,13 +1153,12 @@
  */
 static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
 				    struct ablkcipher_edesc *edesc,
-				    struct ablkcipher_request *req,
-				    bool iv_contig)
+				    struct ablkcipher_request *req)
 {
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 	u32 *desc = edesc->hw_desc;
-	u32 out_options, in_options;
+	u32 in_options;
 	dma_addr_t dst_dma, src_dma;
 	int len, sec4_sg_index = 0;
 
@@ -1190,15 +1184,9 @@
 	}
 	append_seq_in_ptr(desc, src_dma, req->nbytes, in_options);
 
-	if (iv_contig) {
-		dst_dma = edesc->iv_dma;
-		out_options = 0;
-	} else {
-		dst_dma = edesc->sec4_sg_dma +
-			  sec4_sg_index * sizeof(struct sec4_sg_entry);
-		out_options = LDST_SGF;
-	}
-	append_seq_out_ptr(desc, dst_dma, req->nbytes + ivsize, out_options);
+	dst_dma = edesc->sec4_sg_dma + sec4_sg_index *
+		  sizeof(struct sec4_sg_entry);
+	append_seq_out_ptr(desc, dst_dma, req->nbytes + ivsize, LDST_SGF);
 }
 
 /*
@@ -1287,7 +1275,7 @@
 			GFP_DMA | flags);
 	if (!edesc) {
 		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, 0, 0);
+			   0, DMA_NONE, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -1491,8 +1479,7 @@
  * allocate and map the ablkcipher extended descriptor for ablkcipher
  */
 static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
-						       *req, int desc_bytes,
-						       bool *iv_contig_out)
+						       *req, int desc_bytes)
 {
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
@@ -1501,8 +1488,8 @@
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	struct ablkcipher_edesc *edesc;
-	dma_addr_t iv_dma = 0;
-	bool in_contig;
+	dma_addr_t iv_dma;
+	u8 *iv;
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 	int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes;
 
@@ -1546,33 +1533,20 @@
 		}
 	}
 
-	iv_dma = dma_map_single(jrdev, req->info, ivsize, DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, iv_dma)) {
-		dev_err(jrdev, "unable to map IV\n");
-		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, 0, 0);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	if (mapped_src_nents == 1 &&
-	    iv_dma + ivsize == sg_dma_address(req->src)) {
-		in_contig = true;
-		sec4_sg_ents = 0;
-	} else {
-		in_contig = false;
-		sec4_sg_ents = 1 + mapped_src_nents;
-	}
+	sec4_sg_ents = 1 + mapped_src_nents;
 	dst_sg_idx = sec4_sg_ents;
 	sec4_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
 	sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry);
 
-	/* allocate space for base edesc and hw desc commands, link tables */
-	edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes,
+	/*
+	 * allocate space for base edesc and hw desc commands, link tables, IV
+	 */
+	edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes + ivsize,
 			GFP_DMA | flags);
 	if (!edesc) {
 		dev_err(jrdev, "could not allocate extended descriptor\n");
-		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, 0, 0);
+		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, DMA_NONE, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -1581,13 +1555,24 @@
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
 	edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
 			 desc_bytes;
+	edesc->iv_dir = DMA_TO_DEVICE;
 
-	if (!in_contig) {
-		dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0);
-		sg_to_sec4_sg_last(req->src, mapped_src_nents,
-				   edesc->sec4_sg + 1, 0);
+	/* Make sure IV is located in a DMAable area */
+	iv = (u8 *)edesc->hw_desc + desc_bytes + sec4_sg_bytes;
+	memcpy(iv, req->info, ivsize);
+
+	iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_TO_DEVICE);
+	if (dma_mapping_error(jrdev, iv_dma)) {
+		dev_err(jrdev, "unable to map IV\n");
+		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, DMA_NONE, 0, 0);
+		kfree(edesc);
+		return ERR_PTR(-ENOMEM);
 	}
 
+	dma_to_sec4_sg_one(edesc->sec4_sg, iv_dma, ivsize, 0);
+	sg_to_sec4_sg_last(req->src, mapped_src_nents, edesc->sec4_sg + 1, 0);
+
 	if (mapped_dst_nents > 1) {
 		sg_to_sec4_sg_last(req->dst, mapped_dst_nents,
 				   edesc->sec4_sg + dst_sg_idx, 0);
@@ -1598,7 +1583,7 @@
 	if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
 		dev_err(jrdev, "unable to map S/G table\n");
 		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, 0, 0);
+			   iv_dma, ivsize, DMA_TO_DEVICE, 0, 0);
 		kfree(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1611,7 +1596,6 @@
 		       sec4_sg_bytes, 1);
 #endif
 
-	*iv_contig_out = in_contig;
 	return edesc;
 }
 
@@ -1621,19 +1605,16 @@
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *jrdev = ctx->jrdev;
-	bool iv_contig;
 	u32 *desc;
 	int ret = 0;
 
 	/* allocate extended descriptor */
-	edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN *
-				       CAAM_CMD_SZ, &iv_contig);
+	edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
 	/* Create and submit job descriptor*/
-	init_ablkcipher_job(ctx->sh_desc_enc,
-		ctx->sh_desc_enc_dma, edesc, req, iv_contig);
+	init_ablkcipher_job(ctx->sh_desc_enc, ctx->sh_desc_enc_dma, edesc, req);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "ablkcipher jobdesc@"__stringify(__LINE__)": ",
 		       DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc,
@@ -1657,20 +1638,25 @@
 	struct ablkcipher_edesc *edesc;
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
+	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 	struct device *jrdev = ctx->jrdev;
-	bool iv_contig;
 	u32 *desc;
 	int ret = 0;
 
 	/* allocate extended descriptor */
-	edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN *
-				       CAAM_CMD_SZ, &iv_contig);
+	edesc = ablkcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
+	/*
+	 * The crypto API expects us to set the IV (req->info) to the last
+	 * ciphertext block.
+	 */
+	scatterwalk_map_and_copy(req->info, req->src, req->nbytes - ivsize,
+				 ivsize, 0);
+
 	/* Create and submit job descriptor*/
-	init_ablkcipher_job(ctx->sh_desc_dec,
-		ctx->sh_desc_dec_dma, edesc, req, iv_contig);
+	init_ablkcipher_job(ctx->sh_desc_dec, ctx->sh_desc_dec_dma, edesc, req);
 	desc = edesc->hw_desc;
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR, "ablkcipher jobdesc@"__stringify(__LINE__)": ",
@@ -1695,8 +1681,7 @@
  */
 static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
 				struct skcipher_givcrypt_request *greq,
-				int desc_bytes,
-				bool *iv_contig_out)
+				int desc_bytes)
 {
 	struct ablkcipher_request *req = &greq->creq;
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
@@ -1706,8 +1691,8 @@
 		       GFP_KERNEL : GFP_ATOMIC;
 	int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
 	struct ablkcipher_edesc *edesc;
-	dma_addr_t iv_dma = 0;
-	bool out_contig;
+	dma_addr_t iv_dma;
+	u8 *iv;
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 	int dst_sg_idx, sec4_sg_ents, sec4_sg_bytes;
 
@@ -1752,36 +1737,20 @@
 		}
 	}
 
-	/*
-	 * Check if iv can be contiguous with source and destination.
-	 * If so, include it. If not, create scatterlist.
-	 */
-	iv_dma = dma_map_single(jrdev, greq->giv, ivsize, DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, iv_dma)) {
-		dev_err(jrdev, "unable to map IV\n");
-		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
-			   0, 0, 0);
-		return ERR_PTR(-ENOMEM);
-	}
-
 	sec4_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0;
 	dst_sg_idx = sec4_sg_ents;
-	if (mapped_dst_nents == 1 &&
-	    iv_dma + ivsize == sg_dma_address(req->dst)) {
-		out_contig = true;
-	} else {
-		out_contig = false;
-		sec4_sg_ents += 1 + mapped_dst_nents;
-	}
+	sec4_sg_ents += 1 + mapped_dst_nents;
 
-	/* allocate space for base edesc and hw desc commands, link tables */
+	/*
+	 * allocate space for base edesc and hw desc commands, link tables, IV
+	 */
 	sec4_sg_bytes = sec4_sg_ents * sizeof(struct sec4_sg_entry);
-	edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes,
+	edesc = kzalloc(sizeof(*edesc) + desc_bytes + sec4_sg_bytes + ivsize,
 			GFP_DMA | flags);
 	if (!edesc) {
 		dev_err(jrdev, "could not allocate extended descriptor\n");
-		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, 0, 0);
+		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, DMA_NONE, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -1790,24 +1759,33 @@
 	edesc->sec4_sg_bytes = sec4_sg_bytes;
 	edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) +
 			 desc_bytes;
+	edesc->iv_dir = DMA_FROM_DEVICE;
+
+	/* Make sure IV is located in a DMAable area */
+	iv = (u8 *)edesc->hw_desc + desc_bytes + sec4_sg_bytes;
+	iv_dma = dma_map_single(jrdev, iv, ivsize, DMA_FROM_DEVICE);
+	if (dma_mapping_error(jrdev, iv_dma)) {
+		dev_err(jrdev, "unable to map IV\n");
+		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, DMA_NONE, 0, 0);
+		kfree(edesc);
+		return ERR_PTR(-ENOMEM);
+	}
 
 	if (mapped_src_nents > 1)
 		sg_to_sec4_sg_last(req->src, mapped_src_nents, edesc->sec4_sg,
 				   0);
 
-	if (!out_contig) {
-		dma_to_sec4_sg_one(edesc->sec4_sg + dst_sg_idx,
-				   iv_dma, ivsize, 0);
-		sg_to_sec4_sg_last(req->dst, mapped_dst_nents,
-				   edesc->sec4_sg + dst_sg_idx + 1, 0);
-	}
+	dma_to_sec4_sg_one(edesc->sec4_sg + dst_sg_idx, iv_dma, ivsize, 0);
+	sg_to_sec4_sg_last(req->dst, mapped_dst_nents, edesc->sec4_sg +
+			   dst_sg_idx + 1, 0);
 
 	edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg,
 					    sec4_sg_bytes, DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
 		dev_err(jrdev, "unable to map S/G table\n");
 		caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, 0, 0);
+			   iv_dma, ivsize, DMA_FROM_DEVICE, 0, 0);
 		kfree(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1820,7 +1798,6 @@
 		       sec4_sg_bytes, 1);
 #endif
 
-	*iv_contig_out = out_contig;
 	return edesc;
 }
 
@@ -1831,19 +1808,17 @@
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct device *jrdev = ctx->jrdev;
-	bool iv_contig = false;
 	u32 *desc;
 	int ret = 0;
 
 	/* allocate extended descriptor */
-	edesc = ablkcipher_giv_edesc_alloc(creq, DESC_JOB_IO_LEN *
-				       CAAM_CMD_SZ, &iv_contig);
+	edesc = ablkcipher_giv_edesc_alloc(creq, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
 	/* Create and submit job descriptor*/
 	init_ablkcipher_giv_job(ctx->sh_desc_givenc, ctx->sh_desc_givenc_dma,
-				edesc, req, iv_contig);
+				edesc, req);
 #ifdef DEBUG
 	print_hex_dump(KERN_ERR,
 		       "ablkcipher jobdesc@" __stringify(__LINE__) ": ",
diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c
index 8ae7a1b..a408edd 100644
--- a/drivers/crypto/caam/caamalg_desc.c
+++ b/drivers/crypto/caam/caamalg_desc.c
@@ -1093,7 +1093,7 @@
 	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
 				    (0x6 << MOVE_LEN_SHIFT));
 	write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
-				     (0x8 << MOVE_LEN_SHIFT));
+				     (0x8 << MOVE_LEN_SHIFT) | MOVE_WAITCOMP);
 
 	/* Will read assoclen + cryptlen bytes */
 	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
@@ -1178,7 +1178,7 @@
 	read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
 				    (0x6 << MOVE_LEN_SHIFT));
 	write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
-				     (0x8 << MOVE_LEN_SHIFT));
+				     (0x8 << MOVE_LEN_SHIFT) | MOVE_WAITCOMP);
 
 	/* Will read assoclen + cryptlen bytes */
 	append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index cacda08..6e61cc9 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -728,7 +728,7 @@
  * @assoclen: associated data length, in CAAM endianness
  * @assoclen_dma: bus physical mapped address of req->assoclen
  * @drv_req: driver-specific request structure
- * @sgt: the h/w link table
+ * @sgt: the h/w link table, followed by IV
  */
 struct aead_edesc {
 	int src_nents;
@@ -739,9 +739,6 @@
 	unsigned int assoclen;
 	dma_addr_t assoclen_dma;
 	struct caam_drv_req drv_req;
-#define CAAM_QI_MAX_AEAD_SG						\
-	((CAAM_QI_MEMCACHE_SIZE - offsetof(struct aead_edesc, sgt)) /	\
-	 sizeof(struct qm_sg_entry))
 	struct qm_sg_entry sgt[0];
 };
 
@@ -753,7 +750,7 @@
  * @qm_sg_bytes: length of dma mapped h/w link table
  * @qm_sg_dma: bus physical mapped address of h/w link table
  * @drv_req: driver-specific request structure
- * @sgt: the h/w link table
+ * @sgt: the h/w link table, followed by IV
  */
 struct ablkcipher_edesc {
 	int src_nents;
@@ -762,9 +759,6 @@
 	int qm_sg_bytes;
 	dma_addr_t qm_sg_dma;
 	struct caam_drv_req drv_req;
-#define CAAM_QI_MAX_ABLKCIPHER_SG					    \
-	((CAAM_QI_MEMCACHE_SIZE - offsetof(struct ablkcipher_edesc, sgt)) / \
-	 sizeof(struct qm_sg_entry))
 	struct qm_sg_entry sgt[0];
 };
 
@@ -986,17 +980,8 @@
 		}
 	}
 
-	if ((alg->caam.rfc3686 && encrypt) || !alg->caam.geniv) {
+	if ((alg->caam.rfc3686 && encrypt) || !alg->caam.geniv)
 		ivsize = crypto_aead_ivsize(aead);
-		iv_dma = dma_map_single(qidev, req->iv, ivsize, DMA_TO_DEVICE);
-		if (dma_mapping_error(qidev, iv_dma)) {
-			dev_err(qidev, "unable to map IV\n");
-			caam_unmap(qidev, req->src, req->dst, src_nents,
-				   dst_nents, 0, 0, op_type, 0, 0);
-			qi_cache_free(edesc);
-			return ERR_PTR(-ENOMEM);
-		}
-	}
 
 	/*
 	 * Create S/G table: req->assoclen, [IV,] req->src [, req->dst].
@@ -1004,16 +989,33 @@
 	 */
 	qm_sg_ents = 1 + !!ivsize + mapped_src_nents +
 		     (mapped_dst_nents > 1 ? mapped_dst_nents : 0);
-	if (unlikely(qm_sg_ents > CAAM_QI_MAX_AEAD_SG)) {
-		dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
-			qm_sg_ents, CAAM_QI_MAX_AEAD_SG);
-		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, op_type, 0, 0);
+	sg_table = &edesc->sgt[0];
+	qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
+	if (unlikely(offsetof(struct aead_edesc, sgt) + qm_sg_bytes + ivsize >
+		     CAAM_QI_MEMCACHE_SIZE)) {
+		dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
+			qm_sg_ents, ivsize);
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, 0, 0, 0);
 		qi_cache_free(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
-	sg_table = &edesc->sgt[0];
-	qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
+
+	if (ivsize) {
+		u8 *iv = (u8 *)(sg_table + qm_sg_ents);
+
+		/* Make sure IV is located in a DMAable area */
+		memcpy(iv, req->iv, ivsize);
+
+		iv_dma = dma_map_single(qidev, iv, ivsize, DMA_TO_DEVICE);
+		if (dma_mapping_error(qidev, iv_dma)) {
+			dev_err(qidev, "unable to map IV\n");
+			caam_unmap(qidev, req->src, req->dst, src_nents,
+				   dst_nents, 0, 0, 0, 0, 0);
+			qi_cache_free(edesc);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
 
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
@@ -1166,15 +1168,27 @@
 #endif
 
 	ablkcipher_unmap(qidev, edesc, req);
-	qi_cache_free(edesc);
+
+	/* In case initial IV was generated, copy it in GIVCIPHER request */
+	if (edesc->drv_req.drv_ctx->op_type == GIVENCRYPT) {
+		u8 *iv;
+		struct skcipher_givcrypt_request *greq;
+
+		greq = container_of(req, struct skcipher_givcrypt_request,
+				    creq);
+		iv = (u8 *)edesc->sgt + edesc->qm_sg_bytes;
+		memcpy(greq->giv, iv, ivsize);
+	}
 
 	/*
 	 * The crypto API expects us to set the IV (req->info) to the last
 	 * ciphertext block. This is used e.g. by the CTS mode.
 	 */
-	scatterwalk_map_and_copy(req->info, req->dst, req->nbytes - ivsize,
-				 ivsize, 0);
+	if (edesc->drv_req.drv_ctx->op_type != DECRYPT)
+		scatterwalk_map_and_copy(req->info, req->dst, req->nbytes -
+					 ivsize, ivsize, 0);
 
+	qi_cache_free(edesc);
 	ablkcipher_request_complete(req, status);
 }
 
@@ -1189,9 +1203,9 @@
 	int src_nents, mapped_src_nents, dst_nents = 0, mapped_dst_nents = 0;
 	struct ablkcipher_edesc *edesc;
 	dma_addr_t iv_dma;
-	bool in_contig;
+	u8 *iv;
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
-	int dst_sg_idx, qm_sg_ents;
+	int dst_sg_idx, qm_sg_ents, qm_sg_bytes;
 	struct qm_sg_entry *sg_table, *fd_sgt;
 	struct caam_drv_ctx *drv_ctx;
 	enum optype op_type = encrypt ? ENCRYPT : DECRYPT;
@@ -1238,55 +1252,53 @@
 		}
 	}
 
-	iv_dma = dma_map_single(qidev, req->info, ivsize, DMA_TO_DEVICE);
-	if (dma_mapping_error(qidev, iv_dma)) {
-		dev_err(qidev, "unable to map IV\n");
+	qm_sg_ents = 1 + mapped_src_nents;
+	dst_sg_idx = qm_sg_ents;
+
+	qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
+	qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
+	if (unlikely(offsetof(struct ablkcipher_edesc, sgt) + qm_sg_bytes +
+		     ivsize > CAAM_QI_MEMCACHE_SIZE)) {
+		dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
+			qm_sg_ents, ivsize);
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
 			   0, 0, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	if (mapped_src_nents == 1 &&
-	    iv_dma + ivsize == sg_dma_address(req->src)) {
-		in_contig = true;
-		qm_sg_ents = 0;
-	} else {
-		in_contig = false;
-		qm_sg_ents = 1 + mapped_src_nents;
-	}
-	dst_sg_idx = qm_sg_ents;
-
-	qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
-	if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
-		dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
-			qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
-		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, op_type, 0, 0);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	/* allocate space for base edesc and link tables */
+	/* allocate space for base edesc, link tables and IV */
 	edesc = qi_cache_alloc(GFP_DMA | flags);
 	if (unlikely(!edesc)) {
 		dev_err(qidev, "could not allocate extended descriptor\n");
-		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, op_type, 0, 0);
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, 0, 0, 0);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Make sure IV is located in a DMAable area */
+	sg_table = &edesc->sgt[0];
+	iv = (u8 *)(sg_table + qm_sg_ents);
+	memcpy(iv, req->info, ivsize);
+
+	iv_dma = dma_map_single(qidev, iv, ivsize, DMA_TO_DEVICE);
+	if (dma_mapping_error(qidev, iv_dma)) {
+		dev_err(qidev, "unable to map IV\n");
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, 0, 0, 0);
+		qi_cache_free(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
 
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
 	edesc->iv_dma = iv_dma;
-	sg_table = &edesc->sgt[0];
-	edesc->qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
+	edesc->qm_sg_bytes = qm_sg_bytes;
 	edesc->drv_req.app_ctx = req;
 	edesc->drv_req.cbk = ablkcipher_done;
 	edesc->drv_req.drv_ctx = drv_ctx;
 
-	if (!in_contig) {
-		dma_to_qm_sg_one(sg_table, iv_dma, ivsize, 0);
-		sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table + 1, 0);
-	}
+	dma_to_qm_sg_one(sg_table, iv_dma, ivsize, 0);
+	sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table + 1, 0);
 
 	if (mapped_dst_nents > 1)
 		sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table +
@@ -1304,20 +1316,12 @@
 
 	fd_sgt = &edesc->drv_req.fd_sgt[0];
 
-	if (!in_contig)
-		dma_to_qm_sg_one_last_ext(&fd_sgt[1], edesc->qm_sg_dma,
-					  ivsize + req->nbytes, 0);
-	else
-		dma_to_qm_sg_one_last(&fd_sgt[1], iv_dma, ivsize + req->nbytes,
-				      0);
+	dma_to_qm_sg_one_last_ext(&fd_sgt[1], edesc->qm_sg_dma,
+				  ivsize + req->nbytes, 0);
 
 	if (req->src == req->dst) {
-		if (!in_contig)
-			dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma +
-					     sizeof(*sg_table), req->nbytes, 0);
-		else
-			dma_to_qm_sg_one(&fd_sgt[0], sg_dma_address(req->src),
-					 req->nbytes, 0);
+		dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma +
+				     sizeof(*sg_table), req->nbytes, 0);
 	} else if (mapped_dst_nents > 1) {
 		dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma + dst_sg_idx *
 				     sizeof(*sg_table), req->nbytes, 0);
@@ -1341,10 +1345,10 @@
 	int src_nents, mapped_src_nents, dst_nents, mapped_dst_nents;
 	struct ablkcipher_edesc *edesc;
 	dma_addr_t iv_dma;
-	bool out_contig;
+	u8 *iv;
 	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 	struct qm_sg_entry *sg_table, *fd_sgt;
-	int dst_sg_idx, qm_sg_ents;
+	int dst_sg_idx, qm_sg_ents, qm_sg_bytes;
 	struct caam_drv_ctx *drv_ctx;
 
 	drv_ctx = get_drv_ctx(ctx, GIVENCRYPT);
@@ -1392,46 +1396,45 @@
 		mapped_dst_nents = src_nents;
 	}
 
-	iv_dma = dma_map_single(qidev, creq->giv, ivsize, DMA_FROM_DEVICE);
-	if (dma_mapping_error(qidev, iv_dma)) {
-		dev_err(qidev, "unable to map IV\n");
+	qm_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0;
+	dst_sg_idx = qm_sg_ents;
+
+	qm_sg_ents += 1 + mapped_dst_nents;
+	qm_sg_bytes = qm_sg_ents * sizeof(struct qm_sg_entry);
+	if (unlikely(offsetof(struct ablkcipher_edesc, sgt) + qm_sg_bytes +
+		     ivsize > CAAM_QI_MEMCACHE_SIZE)) {
+		dev_err(qidev, "No space for %d S/G entries and/or %dB IV\n",
+			qm_sg_ents, ivsize);
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
 			   0, 0, 0, 0);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	qm_sg_ents = mapped_src_nents > 1 ? mapped_src_nents : 0;
-	dst_sg_idx = qm_sg_ents;
-	if (mapped_dst_nents == 1 &&
-	    iv_dma + ivsize == sg_dma_address(req->dst)) {
-		out_contig = true;
-	} else {
-		out_contig = false;
-		qm_sg_ents += 1 + mapped_dst_nents;
-	}
-
-	if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
-		dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
-			qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
-		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, GIVENCRYPT, 0, 0);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	/* allocate space for base edesc and link tables */
+	/* allocate space for base edesc, link tables and IV */
 	edesc = qi_cache_alloc(GFP_DMA | flags);
 	if (!edesc) {
 		dev_err(qidev, "could not allocate extended descriptor\n");
-		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
-			   iv_dma, ivsize, GIVENCRYPT, 0, 0);
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, 0, 0, 0);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Make sure IV is located in a DMAable area */
+	sg_table = &edesc->sgt[0];
+	iv = (u8 *)(sg_table + qm_sg_ents);
+	iv_dma = dma_map_single(qidev, iv, ivsize, DMA_FROM_DEVICE);
+	if (dma_mapping_error(qidev, iv_dma)) {
+		dev_err(qidev, "unable to map IV\n");
+		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents, 0,
+			   0, 0, 0, 0);
+		qi_cache_free(edesc);
 		return ERR_PTR(-ENOMEM);
 	}
 
 	edesc->src_nents = src_nents;
 	edesc->dst_nents = dst_nents;
 	edesc->iv_dma = iv_dma;
-	sg_table = &edesc->sgt[0];
-	edesc->qm_sg_bytes = qm_sg_ents * sizeof(*sg_table);
+	edesc->qm_sg_bytes = qm_sg_bytes;
 	edesc->drv_req.app_ctx = req;
 	edesc->drv_req.cbk = ablkcipher_done;
 	edesc->drv_req.drv_ctx = drv_ctx;
@@ -1439,11 +1442,9 @@
 	if (mapped_src_nents > 1)
 		sg_to_qm_sg_last(req->src, mapped_src_nents, sg_table, 0);
 
-	if (!out_contig) {
-		dma_to_qm_sg_one(sg_table + dst_sg_idx, iv_dma, ivsize, 0);
-		sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table +
-				 dst_sg_idx + 1, 0);
-	}
+	dma_to_qm_sg_one(sg_table + dst_sg_idx, iv_dma, ivsize, 0);
+	sg_to_qm_sg_last(req->dst, mapped_dst_nents, sg_table + dst_sg_idx + 1,
+			 0);
 
 	edesc->qm_sg_dma = dma_map_single(qidev, sg_table, edesc->qm_sg_bytes,
 					  DMA_TO_DEVICE);
@@ -1464,13 +1465,8 @@
 		dma_to_qm_sg_one(&fd_sgt[1], sg_dma_address(req->src),
 				 req->nbytes, 0);
 
-	if (!out_contig)
-		dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma + dst_sg_idx *
-				     sizeof(*sg_table), ivsize + req->nbytes,
-				     0);
-	else
-		dma_to_qm_sg_one(&fd_sgt[0], sg_dma_address(req->dst),
-				 ivsize + req->nbytes, 0);
+	dma_to_qm_sg_one_ext(&fd_sgt[0], edesc->qm_sg_dma + dst_sg_idx *
+			     sizeof(*sg_table), ivsize + req->nbytes, 0);
 
 	return edesc;
 }
@@ -1480,6 +1476,7 @@
 	struct ablkcipher_edesc *edesc;
 	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
 	struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
+	int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
 	int ret;
 
 	if (unlikely(caam_congested))
@@ -1490,6 +1487,14 @@
 	if (IS_ERR(edesc))
 		return PTR_ERR(edesc);
 
+	/*
+	 * The crypto API expects us to set the IV (req->info) to the last
+	 * ciphertext block.
+	 */
+	if (!encrypt)
+		scatterwalk_map_and_copy(req->info, req->src, req->nbytes -
+					 ivsize, ivsize, 0);
+
 	ret = caam_qi_enqueue(ctx->qidev, &edesc->drv_req);
 	if (!ret) {
 		ret = -EINPROGRESS;
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 7a897209..578ea63 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -66,7 +66,7 @@
 	struct caam_rsa_key *key = &ctx->key;
 	struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2;
 	size_t p_sz = key->p_sz;
-	size_t q_sz = key->p_sz;
+	size_t q_sz = key->q_sz;
 
 	dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE);
 	dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
@@ -83,7 +83,7 @@
 	struct caam_rsa_key *key = &ctx->key;
 	struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3;
 	size_t p_sz = key->p_sz;
-	size_t q_sz = key->p_sz;
+	size_t q_sz = key->q_sz;
 
 	dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE);
 	dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE);
@@ -166,18 +166,71 @@
 	akcipher_request_complete(req, err);
 }
 
+static int caam_rsa_count_leading_zeros(struct scatterlist *sgl,
+					unsigned int nbytes,
+					unsigned int flags)
+{
+	struct sg_mapping_iter miter;
+	int lzeros, ents;
+	unsigned int len;
+	unsigned int tbytes = nbytes;
+	const u8 *buff;
+
+	ents = sg_nents_for_len(sgl, nbytes);
+	if (ents < 0)
+		return ents;
+
+	sg_miter_start(&miter, sgl, ents, SG_MITER_FROM_SG | flags);
+
+	lzeros = 0;
+	len = 0;
+	while (nbytes > 0) {
+		while (len && !*buff) {
+			lzeros++;
+			len--;
+			buff++;
+		}
+
+		if (len && *buff)
+			break;
+
+		sg_miter_next(&miter);
+		buff = miter.addr;
+		len = miter.length;
+
+		nbytes -= lzeros;
+		lzeros = 0;
+	}
+
+	miter.consumed = lzeros;
+	sg_miter_stop(&miter);
+	nbytes -= lzeros;
+
+	return tbytes - nbytes;
+}
+
 static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 					 size_t desclen)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct device *dev = ctx->dev;
+	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 	struct rsa_edesc *edesc;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
+	int sg_flags = (flags == GFP_ATOMIC) ? SG_MITER_ATOMIC : 0;
 	int sgc;
 	int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes;
 	int src_nents, dst_nents;
+	int lzeros;
+
+	lzeros = caam_rsa_count_leading_zeros(req->src, req->src_len, sg_flags);
+	if (lzeros < 0)
+		return ERR_PTR(lzeros);
+
+	req->src_len -= lzeros;
+	req->src = scatterwalk_ffwd(req_ctx->src, req->src, lzeros);
 
 	src_nents = sg_nents_for_len(req->src, req->src_len);
 	dst_nents = sg_nents_for_len(req->dst, req->dst_len);
@@ -344,7 +397,7 @@
 	struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2;
 	int sec4_sg_index = 0;
 	size_t p_sz = key->p_sz;
-	size_t q_sz = key->p_sz;
+	size_t q_sz = key->q_sz;
 
 	pdb->d_dma = dma_map_single(dev, key->d, key->d_sz, DMA_TO_DEVICE);
 	if (dma_mapping_error(dev, pdb->d_dma)) {
@@ -419,7 +472,7 @@
 	struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3;
 	int sec4_sg_index = 0;
 	size_t p_sz = key->p_sz;
-	size_t q_sz = key->p_sz;
+	size_t q_sz = key->q_sz;
 
 	pdb->p_dma = dma_map_single(dev, key->p, p_sz, DMA_TO_DEVICE);
 	if (dma_mapping_error(dev, pdb->p_dma)) {
@@ -730,19 +783,12 @@
  */
 static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes)
 {
-	u8 *val;
 
 	caam_rsa_drop_leading_zeros(&buf, nbytes);
 	if (!*nbytes)
 		return NULL;
 
-	val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL);
-	if (!val)
-		return NULL;
-
-	memcpy(val, buf, *nbytes);
-
-	return val;
+	return kmemdup(buf, *nbytes, GFP_DMA | GFP_KERNEL);
 }
 
 static int caam_rsa_check_key_length(unsigned int len)
@@ -953,6 +999,7 @@
 	.max_size = caam_rsa_max_size,
 	.init = caam_rsa_init_tfm,
 	.exit = caam_rsa_exit_tfm,
+	.reqsize = sizeof(struct caam_rsa_req_ctx),
 	.base = {
 		.cra_name = "rsa",
 		.cra_driver_name = "rsa-caam",
diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h
index fd145c4..82645bc 100644
--- a/drivers/crypto/caam/caampkc.h
+++ b/drivers/crypto/caam/caampkc.h
@@ -96,6 +96,14 @@
 };
 
 /**
+ * caam_rsa_req_ctx - per request context.
+ * @src: input scatterlist (stripped of leading zeros)
+ */
+struct caam_rsa_req_ctx {
+	struct scatterlist src[2];
+};
+
+/**
  * rsa_edesc - s/w-extended rsa descriptor
  * @src_nents     : number of segments in input scatterlist
  * @dst_nents     : number of segments in output scatterlist
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index e4cc636..538c01f 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -322,9 +322,9 @@
 
 	/*
 	 * De-initialize RNG state handles initialized by this driver.
-	 * In case of DPAA 2.x, RNG is managed by MC firmware.
+	 * In case of SoCs with Management Complex, RNG is managed by MC f/w.
 	 */
-	if (!caam_dpaa2 && ctrlpriv->rng4_sh_init)
+	if (!ctrlpriv->mc_en && ctrlpriv->rng4_sh_init)
 		deinstantiate_rng(ctrldev, ctrlpriv->rng4_sh_init);
 
 	/* Shut down debug views */
@@ -396,11 +396,56 @@
 	clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC);
 }
 
+static int caam_get_era_from_hw(struct caam_ctrl __iomem *ctrl)
+{
+	static const struct {
+		u16 ip_id;
+		u8 maj_rev;
+		u8 era;
+	} id[] = {
+		{0x0A10, 1, 1},
+		{0x0A10, 2, 2},
+		{0x0A12, 1, 3},
+		{0x0A14, 1, 3},
+		{0x0A14, 2, 4},
+		{0x0A16, 1, 4},
+		{0x0A10, 3, 4},
+		{0x0A11, 1, 4},
+		{0x0A18, 1, 4},
+		{0x0A11, 2, 5},
+		{0x0A12, 2, 5},
+		{0x0A13, 1, 5},
+		{0x0A1C, 1, 5}
+	};
+	u32 ccbvid, id_ms;
+	u8 maj_rev, era;
+	u16 ip_id;
+	int i;
+
+	ccbvid = rd_reg32(&ctrl->perfmon.ccb_id);
+	era = (ccbvid & CCBVID_ERA_MASK) >> CCBVID_ERA_SHIFT;
+	if (era)	/* This is '0' prior to CAAM ERA-6 */
+		return era;
+
+	id_ms = rd_reg32(&ctrl->perfmon.caam_id_ms);
+	ip_id = (id_ms & SECVID_MS_IPID_MASK) >> SECVID_MS_IPID_SHIFT;
+	maj_rev = (id_ms & SECVID_MS_MAJ_REV_MASK) >> SECVID_MS_MAJ_REV_SHIFT;
+
+	for (i = 0; i < ARRAY_SIZE(id); i++)
+		if (id[i].ip_id == ip_id && id[i].maj_rev == maj_rev)
+			return id[i].era;
+
+	return -ENOTSUPP;
+}
+
 /**
  * caam_get_era() - Return the ERA of the SEC on SoC, based
- * on "sec-era" propery in the DTS. This property is updated by u-boot.
+ * on "sec-era" optional property in the DTS. This property is updated
+ * by u-boot.
+ * In case this property is not passed an attempt to retrieve the CAAM
+ * era via register reads will be made.
  **/
-int caam_get_era(void)
+static int caam_get_era(struct caam_ctrl __iomem *ctrl)
 {
 	struct device_node *caam_node;
 	int ret;
@@ -410,9 +455,11 @@
 	ret = of_property_read_u32(caam_node, "fsl,sec-era", &prop);
 	of_node_put(caam_node);
 
-	return ret ? -ENOTSUPP : prop;
+	if (!ret)
+		return prop;
+	else
+		return caam_get_era_from_hw(ctrl);
 }
-EXPORT_SYMBOL(caam_get_era);
 
 static const struct of_device_id caam_match[] = {
 	{
@@ -571,11 +618,15 @@
 	/*
 	 * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
 	 * long pointers in master configuration register.
-	 * In case of DPAA 2.x, Management Complex firmware performs
+	 * In case of SoCs with Management Complex, MC f/w performs
 	 * the configuration.
 	 */
 	caam_dpaa2 = !!(comp_params & CTPR_MS_DPAA2);
-	if (!caam_dpaa2)
+	np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc");
+	ctrlpriv->mc_en = !!np;
+	of_node_put(np);
+
+	if (!ctrlpriv->mc_en)
 		clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR,
 			      MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
 			      MCFGR_WDENABLE | MCFGR_LARGE_BURST |
@@ -623,7 +674,7 @@
 		goto iounmap_ctrl;
 	}
 
-	ctrlpriv->era = caam_get_era();
+	ctrlpriv->era = caam_get_era(ctrl);
 
 	ret = of_platform_populate(nprop, caam_match, NULL, dev);
 	if (ret) {
@@ -686,9 +737,9 @@
 	/*
 	 * If SEC has RNG version >= 4 and RNG state handle has not been
 	 * already instantiated, do RNG instantiation
-	 * In case of DPAA 2.x, RNG is managed by MC firmware.
+	 * In case of SoCs with Management Complex, RNG is managed by MC f/w.
 	 */
-	if (!caam_dpaa2 &&
+	if (!ctrlpriv->mc_en &&
 	    (cha_vid_ls & CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT >= 4) {
 		ctrlpriv->rng4_sh_init =
 			rd_reg32(&ctrl->r4tst[0].rdsta);
@@ -757,9 +808,8 @@
 	/* Report "alive" for developer to see */
 	dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id,
 		 ctrlpriv->era);
-	dev_info(dev, "job rings = %d, qi = %d, dpaa2 = %s\n",
-		 ctrlpriv->total_jobrs, ctrlpriv->qi_present,
-		 caam_dpaa2 ? "yes" : "no");
+	dev_info(dev, "job rings = %d, qi = %d\n",
+		 ctrlpriv->total_jobrs, ctrlpriv->qi_present);
 
 #ifdef CONFIG_DEBUG_FS
 	debugfs_create_file("rq_dequeued", S_IRUSR | S_IRGRP | S_IROTH,
diff --git a/drivers/crypto/caam/ctrl.h b/drivers/crypto/caam/ctrl.h
index be693a2..f3ecd67 100644
--- a/drivers/crypto/caam/ctrl.h
+++ b/drivers/crypto/caam/ctrl.h
@@ -9,8 +9,6 @@
 #define CTRL_H
 
 /* Prototypes for backend-level services exposed to APIs */
-int caam_get_era(void);
-
 extern bool caam_dpaa2;
 
 #endif /* CTRL_H */
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 7696a77..babc78a 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -82,6 +82,7 @@
 	 */
 	u8 total_jobrs;		/* Total Job Rings in device */
 	u8 qi_present;		/* Nonzero if QI present in device */
+	u8 mc_en;		/* Nonzero if MC f/w is active */
 	int secvio_irq;		/* Security violation interrupt number */
 	int virt_en;		/* Virtualization enabled in CAAM */
 	int era;		/* CAAM Era (internal HW revision) */
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index b948082..67f7f8c 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -657,9 +657,8 @@
 {
 	int ret;
 	struct qm_mcc_initcgr opts;
-	const u64 cpus = *(u64 *)qman_affine_cpus();
-	const int num_cpus = hweight64(cpus);
-	const u64 val = num_cpus * MAX_RSP_FQ_BACKLOG_PER_CPU;
+	const u64 val = (u64)cpumask_weight(qman_affine_cpus()) *
+			MAX_RSP_FQ_BACKLOG_PER_CPU;
 
 	ret = qman_alloc_cgrid(&qipriv.cgr.cgrid);
 	if (ret) {
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index fee3638..4fb91ba 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -312,11 +312,17 @@
 
 	/* Component Instantiation Parameters			fe0-fff */
 	u32 rtic_id;		/* RVID - RTIC Version ID	*/
+#define CCBVID_ERA_MASK		0xff000000
+#define CCBVID_ERA_SHIFT	24
 	u32 ccb_id;		/* CCBVID - CCB Version ID	*/
 	u32 cha_id_ms;		/* CHAVID - CHA Version ID Most Significant*/
 	u32 cha_id_ls;		/* CHAVID - CHA Version ID Least Significant*/
 	u32 cha_num_ms;		/* CHANUM - CHA Number Most Significant	*/
 	u32 cha_num_ls;		/* CHANUM - CHA Number Least Significant*/
+#define SECVID_MS_IPID_MASK	0xffff0000
+#define SECVID_MS_IPID_SHIFT	16
+#define SECVID_MS_MAJ_REV_MASK	0x0000ff00
+#define SECVID_MS_MAJ_REV_SHIFT	8
 	u32 caam_id_ms;		/* CAAMVID - CAAM Version ID MS	*/
 	u32 caam_id_ls;		/* CAAMVID - CAAM Version ID LS	*/
 };
diff --git a/drivers/crypto/cavium/zip/common.h b/drivers/crypto/cavium/zip/common.h
index dc451e0..58fb3ed 100644
--- a/drivers/crypto/cavium/zip/common.h
+++ b/drivers/crypto/cavium/zip/common.h
@@ -46,8 +46,10 @@
 #ifndef __COMMON_H__
 #define __COMMON_H__
 
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -149,6 +151,25 @@
 	u32   sizeofzops;
 };
 
+static inline int zip_poll_result(union zip_zres_s *result)
+{
+	int retries = 1000;
+
+	while (!result->s.compcode) {
+		if (!--retries) {
+			pr_err("ZIP ERR: request timed out");
+			return -ETIMEDOUT;
+		}
+		udelay(10);
+		/*
+		 * Force re-reading of compcode which is updated
+		 * by the ZIP coprocessor.
+		 */
+		rmb();
+	}
+	return 0;
+}
+
 /* error messages */
 #define zip_err(fmt, args...) pr_err("ZIP ERR:%s():%d: " \
 			      fmt "\n", __func__, __LINE__, ## args)
diff --git a/drivers/crypto/cavium/zip/zip_crypto.c b/drivers/crypto/cavium/zip/zip_crypto.c
index 8df4d26..b92b6e7 100644
--- a/drivers/crypto/cavium/zip/zip_crypto.c
+++ b/drivers/crypto/cavium/zip/zip_crypto.c
@@ -124,7 +124,7 @@
 		 struct zip_kernel_ctx *zip_ctx)
 {
 	struct zip_operation  *zip_ops   = NULL;
-	struct zip_state      zip_state;
+	struct zip_state      *zip_state;
 	struct zip_device     *zip = NULL;
 	int ret;
 
@@ -135,20 +135,23 @@
 	if (!zip)
 		return -ENODEV;
 
-	memset(&zip_state, 0, sizeof(struct zip_state));
+	zip_state = kzalloc(sizeof(*zip_state), GFP_ATOMIC);
+	if (!zip_state)
+		return -ENOMEM;
+
 	zip_ops = &zip_ctx->zip_comp;
 
 	zip_ops->input_len  = slen;
 	zip_ops->output_len = *dlen;
 	memcpy(zip_ops->input, src, slen);
 
-	ret = zip_deflate(zip_ops, &zip_state, zip);
+	ret = zip_deflate(zip_ops, zip_state, zip);
 
 	if (!ret) {
 		*dlen = zip_ops->output_len;
 		memcpy(dst, zip_ops->output, *dlen);
 	}
-
+	kfree(zip_state);
 	return ret;
 }
 
@@ -157,7 +160,7 @@
 		   struct zip_kernel_ctx *zip_ctx)
 {
 	struct zip_operation  *zip_ops   = NULL;
-	struct zip_state      zip_state;
+	struct zip_state      *zip_state;
 	struct zip_device     *zip = NULL;
 	int ret;
 
@@ -168,7 +171,10 @@
 	if (!zip)
 		return -ENODEV;
 
-	memset(&zip_state, 0, sizeof(struct zip_state));
+	zip_state = kzalloc(sizeof(*zip_state), GFP_ATOMIC);
+	if (!zip_state)
+		return -ENOMEM;
+
 	zip_ops = &zip_ctx->zip_decomp;
 	memcpy(zip_ops->input, src, slen);
 
@@ -179,13 +185,13 @@
 	zip_ops->input_len  = slen;
 	zip_ops->output_len = *dlen;
 
-	ret = zip_inflate(zip_ops, &zip_state, zip);
+	ret = zip_inflate(zip_ops, zip_state, zip);
 
 	if (!ret) {
 		*dlen = zip_ops->output_len;
 		memcpy(dst, zip_ops->output, *dlen);
 	}
-
+	kfree(zip_state);
 	return ret;
 }
 
diff --git a/drivers/crypto/cavium/zip/zip_deflate.c b/drivers/crypto/cavium/zip/zip_deflate.c
index 9a944b8..d7133f8 100644
--- a/drivers/crypto/cavium/zip/zip_deflate.c
+++ b/drivers/crypto/cavium/zip/zip_deflate.c
@@ -129,8 +129,8 @@
 	/* Stats update for compression requests submitted */
 	atomic64_inc(&zip_dev->stats.comp_req_submit);
 
-	while (!result_ptr->s.compcode)
-		continue;
+	/* Wait for completion or error */
+	zip_poll_result(result_ptr);
 
 	/* Stats update for compression requests completed */
 	atomic64_inc(&zip_dev->stats.comp_req_complete);
diff --git a/drivers/crypto/cavium/zip/zip_device.c b/drivers/crypto/cavium/zip/zip_device.c
index ccf21fb..f174ec2 100644
--- a/drivers/crypto/cavium/zip/zip_device.c
+++ b/drivers/crypto/cavium/zip/zip_device.c
@@ -87,12 +87,12 @@
 	 * Distribute the instructions between the enabled queues based on
 	 * the CPU id.
 	 */
-	if (smp_processor_id() % 2 == 0)
+	if (raw_smp_processor_id() % 2 == 0)
 		queue = 0;
 	else
 		queue = 1;
 
-	zip_dbg("CPU Core: %d Queue number:%d", smp_processor_id(), queue);
+	zip_dbg("CPU Core: %d Queue number:%d", raw_smp_processor_id(), queue);
 
 	/* Take cmd buffer lock */
 	spin_lock(&zip_dev->iq[queue].lock);
diff --git a/drivers/crypto/cavium/zip/zip_inflate.c b/drivers/crypto/cavium/zip/zip_inflate.c
index 50cbdd8..7e0d73e 100644
--- a/drivers/crypto/cavium/zip/zip_inflate.c
+++ b/drivers/crypto/cavium/zip/zip_inflate.c
@@ -143,8 +143,8 @@
 	/* Decompression requests submitted stats update */
 	atomic64_inc(&zip_dev->stats.decomp_req_submit);
 
-	while (!result_ptr->s.compcode)
-		continue;
+	/* Wait for completion or error */
+	zip_poll_result(result_ptr);
 
 	/* Decompression requests completed stats update */
 	atomic64_inc(&zip_dev->stats.decomp_req_complete);
diff --git a/drivers/crypto/cavium/zip/zip_main.c b/drivers/crypto/cavium/zip/zip_main.c
index 1cd8aa48..be055b9 100644
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ b/drivers/crypto/cavium/zip/zip_main.c
@@ -113,7 +113,7 @@
  */
 int zip_get_node_id(void)
 {
-	return cpu_to_node(smp_processor_id());
+	return cpu_to_node(raw_smp_processor_id());
 }
 
 /* Initializes the ZIP h/w sub-system */
@@ -469,6 +469,8 @@
 	struct zip_stats  *st;
 
 	for (index = 0; index < MAX_ZIP_DEVICES; index++) {
+		u64 pending = 0;
+
 		if (zip_dev[index]) {
 			zip = zip_dev[index];
 			st  = &zip->stats;
@@ -476,16 +478,15 @@
 			/* Get all the pending requests */
 			for (q = 0; q < ZIP_NUM_QUEUES; q++) {
 				val = zip_reg_read((zip->reg_base +
-						    ZIP_DBG_COREX_STA(q)));
-				val = (val >> 32);
-				val = val & 0xffffff;
-				atomic64_add(val, &st->pending_req);
+						    ZIP_DBG_QUEX_STA(q)));
+				pending += val >> 32 & 0xffffff;
 			}
 
-			avg_chunk = (atomic64_read(&st->comp_in_bytes) /
-				     atomic64_read(&st->comp_req_complete));
-			avg_cr = (atomic64_read(&st->comp_in_bytes) /
-				  atomic64_read(&st->comp_out_bytes));
+			val = atomic64_read(&st->comp_req_complete);
+			avg_chunk = (val) ? atomic64_read(&st->comp_in_bytes) / val : 0;
+
+			val = atomic64_read(&st->comp_out_bytes);
+			avg_cr = (val) ? atomic64_read(&st->comp_in_bytes) / val : 0;
 			seq_printf(s, "        ZIP Device %d Stats\n"
 				      "-----------------------------------\n"
 				      "Comp Req Submitted        : \t%lld\n"
@@ -513,10 +514,7 @@
 				       (u64)atomic64_read(&st->decomp_in_bytes),
 				       (u64)atomic64_read(&st->decomp_out_bytes),
 				       (u64)atomic64_read(&st->decomp_bad_reqs),
-				       (u64)atomic64_read(&st->pending_req));
-
-			/* Reset pending requests  count */
-			atomic64_set(&st->pending_req, 0);
+				       pending);
 		}
 	}
 	return 0;
diff --git a/drivers/crypto/cavium/zip/zip_main.h b/drivers/crypto/cavium/zip/zip_main.h
index 64e051f..e1e4fa9 100644
--- a/drivers/crypto/cavium/zip/zip_main.h
+++ b/drivers/crypto/cavium/zip/zip_main.h
@@ -74,7 +74,6 @@
 	atomic64_t    comp_req_complete;
 	atomic64_t    decomp_req_submit;
 	atomic64_t    decomp_req_complete;
-	atomic64_t    pending_req;
 	atomic64_t    comp_in_bytes;
 	atomic64_t    comp_out_bytes;
 	atomic64_t    decomp_in_bytes;
diff --git a/drivers/crypto/cavium/zip/zip_regs.h b/drivers/crypto/cavium/zip/zip_regs.h
index d0be682..874e023 100644
--- a/drivers/crypto/cavium/zip/zip_regs.h
+++ b/drivers/crypto/cavium/zip/zip_regs.h
@@ -443,7 +443,7 @@
 
 static inline u64 ZIP_COREX_BIST_STATUS(u64 param1)
 {
-	if (((param1 <= 1)))
+	if (param1 <= 1)
 		return 0x0520ull + (param1 & 1) * 0x8ull;
 	pr_err("ZIP_COREX_BIST_STATUS: %llu\n", param1);
 	return 0;
@@ -537,7 +537,7 @@
 
 static inline u64 ZIP_DBG_COREX_INST(u64 param1)
 {
-	if (((param1 <= 1)))
+	if (param1 <= 1)
 		return 0x0640ull + (param1 & 1) * 0x8ull;
 	pr_err("ZIP_DBG_COREX_INST: %llu\n", param1);
 	return 0;
@@ -568,7 +568,7 @@
 
 static inline u64 ZIP_DBG_COREX_STA(u64 param1)
 {
-	if (((param1 <= 1)))
+	if (param1 <= 1)
 		return 0x0680ull + (param1 & 1) * 0x8ull;
 	pr_err("ZIP_DBG_COREX_STA: %llu\n", param1);
 	return 0;
@@ -599,7 +599,7 @@
 
 static inline u64 ZIP_DBG_QUEX_STA(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x1800ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_DBG_QUEX_STA: %llu\n", param1);
 	return 0;
@@ -817,7 +817,7 @@
 
 static inline u64 ZIP_MSIX_PBAX(u64 param1)
 {
-	if (((param1 == 0)))
+	if (param1 == 0)
 		return 0x0000838000FF0000ull;
 	pr_err("ZIP_MSIX_PBAX: %llu\n", param1);
 	return 0;
@@ -846,7 +846,7 @@
 
 static inline u64 ZIP_MSIX_VECX_ADDR(u64 param1)
 {
-	if (((param1 <= 17)))
+	if (param1 <= 17)
 		return 0x0000838000F00000ull + (param1 & 31) * 0x10ull;
 	pr_err("ZIP_MSIX_VECX_ADDR: %llu\n", param1);
 	return 0;
@@ -875,7 +875,7 @@
 
 static inline u64 ZIP_MSIX_VECX_CTL(u64 param1)
 {
-	if (((param1 <= 17)))
+	if (param1 <= 17)
 		return 0x0000838000F00008ull + (param1 & 31) * 0x10ull;
 	pr_err("ZIP_MSIX_VECX_CTL: %llu\n", param1);
 	return 0;
@@ -900,7 +900,7 @@
 
 static inline u64 ZIP_QUEX_DONE(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x2000ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_DONE: %llu\n", param1);
 	return 0;
@@ -925,7 +925,7 @@
 
 static inline u64 ZIP_QUEX_DONE_ACK(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x2200ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_DONE_ACK: %llu\n", param1);
 	return 0;
@@ -950,7 +950,7 @@
 
 static inline u64 ZIP_QUEX_DONE_ENA_W1C(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x2600ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_DONE_ENA_W1C: %llu\n", param1);
 	return 0;
@@ -975,7 +975,7 @@
 
 static inline u64 ZIP_QUEX_DONE_ENA_W1S(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x2400ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_DONE_ENA_W1S: %llu\n", param1);
 	return 0;
@@ -1004,7 +1004,7 @@
 
 static inline u64 ZIP_QUEX_DONE_WAIT(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x2800ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_DONE_WAIT: %llu\n", param1);
 	return 0;
@@ -1029,7 +1029,7 @@
 
 static inline u64 ZIP_QUEX_DOORBELL(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x4000ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_DOORBELL: %llu\n", param1);
 	return 0;
@@ -1058,7 +1058,7 @@
 
 static inline u64 ZIP_QUEX_ERR_ENA_W1C(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x3600ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_ERR_ENA_W1C: %llu\n", param1);
 	return 0;
@@ -1087,7 +1087,7 @@
 
 static inline u64 ZIP_QUEX_ERR_ENA_W1S(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x3400ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_ERR_ENA_W1S: %llu\n", param1);
 	return 0;
@@ -1120,7 +1120,7 @@
 
 static inline u64 ZIP_QUEX_ERR_INT(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x3000ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_ERR_INT: %llu\n", param1);
 	return 0;
@@ -1150,7 +1150,7 @@
 
 static inline u64 ZIP_QUEX_ERR_INT_W1S(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x3200ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_ERR_INT_W1S: %llu\n", param1);
 	return 0;
@@ -1179,7 +1179,7 @@
 
 static inline u64 ZIP_QUEX_GCFG(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x1A00ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_GCFG: %llu\n", param1);
 	return 0;
@@ -1204,7 +1204,7 @@
 
 static inline u64 ZIP_QUEX_MAP(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x1400ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_MAP: %llu\n", param1);
 	return 0;
@@ -1236,7 +1236,7 @@
 
 static inline u64 ZIP_QUEX_SBUF_ADDR(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x1000ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_SBUF_ADDR: %llu\n", param1);
 	return 0;
@@ -1276,7 +1276,7 @@
 
 static inline u64 ZIP_QUEX_SBUF_CTL(u64 param1)
 {
-	if (((param1 <= 7)))
+	if (param1 <= 7)
 		return 0x1200ull + (param1 & 7) * 0x8ull;
 	pr_err("ZIP_QUEX_SBUF_CTL: %llu\n", param1);
 	return 0;
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index d95ec52..ff478d8 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -22,11 +22,17 @@
 #include <linux/delay.h>
 #include <linux/hw_random.h>
 #include <linux/ccp.h>
+#include <linux/firmware.h>
 
 #include "sp-dev.h"
 #include "psp-dev.h"
 
+#define SEV_VERSION_GREATER_OR_EQUAL(_maj, _min)	\
+		((psp_master->api_major) >= _maj &&	\
+		 (psp_master->api_minor) >= _min)
+
 #define DEVICE_NAME	"sev"
+#define SEV_FW_FILE	"amd/sev.fw"
 
 static DEFINE_MUTEX(sev_cmd_mutex);
 static struct sev_misc_dev *misc_dev;
@@ -112,6 +118,8 @@
 	case SEV_CMD_RECEIVE_UPDATE_DATA:	return sizeof(struct sev_data_receive_update_data);
 	case SEV_CMD_RECEIVE_UPDATE_VMSA:	return sizeof(struct sev_data_receive_update_vmsa);
 	case SEV_CMD_LAUNCH_UPDATE_SECRET:	return sizeof(struct sev_data_launch_secret);
+	case SEV_CMD_DOWNLOAD_FIRMWARE:		return sizeof(struct sev_data_download_firmware);
+	case SEV_CMD_GET_ID:			return sizeof(struct sev_data_get_id);
 	default:				return 0;
 	}
 
@@ -378,6 +386,79 @@
 }
 EXPORT_SYMBOL_GPL(psp_copy_user_blob);
 
+static int sev_get_api_version(void)
+{
+	struct sev_user_data_status *status;
+	int error, ret;
+
+	status = &psp_master->status_cmd_buf;
+	ret = sev_platform_status(status, &error);
+	if (ret) {
+		dev_err(psp_master->dev,
+			"SEV: failed to get status. Error: %#x\n", error);
+		return 1;
+	}
+
+	psp_master->api_major = status->api_major;
+	psp_master->api_minor = status->api_minor;
+	psp_master->build = status->build;
+
+	return 0;
+}
+
+/* Don't fail if SEV FW couldn't be updated. Continue with existing SEV FW */
+static int sev_update_firmware(struct device *dev)
+{
+	struct sev_data_download_firmware *data;
+	const struct firmware *firmware;
+	int ret, error, order;
+	struct page *p;
+	u64 data_size;
+
+	ret = request_firmware(&firmware, SEV_FW_FILE, dev);
+	if (ret < 0)
+		return -1;
+
+	/*
+	 * SEV FW expects the physical address given to it to be 32
+	 * byte aligned. Memory allocated has structure placed at the
+	 * beginning followed by the firmware being passed to the SEV
+	 * FW. Allocate enough memory for data structure + alignment
+	 * padding + SEV FW.
+	 */
+	data_size = ALIGN(sizeof(struct sev_data_download_firmware), 32);
+
+	order = get_order(firmware->size + data_size);
+	p = alloc_pages(GFP_KERNEL, order);
+	if (!p) {
+		ret = -1;
+		goto fw_err;
+	}
+
+	/*
+	 * Copy firmware data to a kernel allocated contiguous
+	 * memory region.
+	 */
+	data = page_address(p);
+	memcpy(page_address(p) + data_size, firmware->data, firmware->size);
+
+	data->address = __psp_pa(page_address(p) + data_size);
+	data->len = firmware->size;
+
+	ret = sev_do_cmd(SEV_CMD_DOWNLOAD_FIRMWARE, data, &error);
+	if (ret)
+		dev_dbg(dev, "Failed to update SEV firmware: %#x\n", error);
+	else
+		dev_info(dev, "SEV firmware update successful\n");
+
+	__free_pages(p, order);
+
+fw_err:
+	release_firmware(firmware);
+
+	return ret;
+}
+
 static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp)
 {
 	struct sev_user_data_pek_cert_import input;
@@ -430,6 +511,46 @@
 	return ret;
 }
 
+static int sev_ioctl_do_get_id(struct sev_issue_cmd *argp)
+{
+	struct sev_data_get_id *data;
+	u64 data_size, user_size;
+	void *id_blob, *mem;
+	int ret;
+
+	/* SEV GET_ID available from SEV API v0.16 and up */
+	if (!SEV_VERSION_GREATER_OR_EQUAL(0, 16))
+		return -ENOTSUPP;
+
+	/* SEV FW expects the buffer it fills with the ID to be
+	 * 8-byte aligned. Memory allocated should be enough to
+	 * hold data structure + alignment padding + memory
+	 * where SEV FW writes the ID.
+	 */
+	data_size = ALIGN(sizeof(struct sev_data_get_id), 8);
+	user_size = sizeof(struct sev_user_data_get_id);
+
+	mem = kzalloc(data_size + user_size, GFP_KERNEL);
+	if (!mem)
+		return -ENOMEM;
+
+	data = mem;
+	id_blob = mem + data_size;
+
+	data->address = __psp_pa(id_blob);
+	data->len = user_size;
+
+	ret = __sev_do_cmd_locked(SEV_CMD_GET_ID, data, &argp->error);
+	if (!ret) {
+		if (copy_to_user((void __user *)argp->data, id_blob, data->len))
+			ret = -EFAULT;
+	}
+
+	kfree(mem);
+
+	return ret;
+}
+
 static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp)
 {
 	struct sev_user_data_pdh_cert_export input;
@@ -567,6 +688,9 @@
 	case SEV_PDH_CERT_EXPORT:
 		ret = sev_ioctl_do_pdh_export(&input);
 		break;
+	case SEV_GET_ID:
+		ret = sev_ioctl_do_get_id(&input);
+		break;
 	default:
 		ret = -EINVAL;
 		goto out;
@@ -750,7 +874,6 @@
 
 void psp_pci_init(void)
 {
-	struct sev_user_data_status *status;
 	struct sp_device *sp;
 	int error, rc;
 
@@ -760,6 +883,13 @@
 
 	psp_master = sp->psp_data;
 
+	if (sev_get_api_version())
+		goto err;
+
+	if (SEV_VERSION_GREATER_OR_EQUAL(0, 15) &&
+	    sev_update_firmware(psp_master->dev) == 0)
+		sev_get_api_version();
+
 	/* Initialize the platform */
 	rc = sev_platform_init(&error);
 	if (rc) {
@@ -767,16 +897,9 @@
 		goto err;
 	}
 
-	/* Display SEV firmware version */
-	status = &psp_master->status_cmd_buf;
-	rc = sev_platform_status(status, &error);
-	if (rc) {
-		dev_err(sp->dev, "SEV: failed to get status error %#x\n", error);
-		goto err;
-	}
+	dev_info(sp->dev, "SEV API:%d.%d build:%d\n", psp_master->api_major,
+		 psp_master->api_minor, psp_master->build);
 
-	dev_info(sp->dev, "SEV API:%d.%d build:%d\n", status->api_major,
-		 status->api_minor, status->build);
 	return;
 
 err:
diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h
index c81f0b1..c7e9098a 100644
--- a/drivers/crypto/ccp/psp-dev.h
+++ b/drivers/crypto/ccp/psp-dev.h
@@ -78,6 +78,10 @@
 	struct sev_misc_dev *sev_misc;
 	struct sev_user_data_status status_cmd_buf;
 	struct sev_data_init init_cmd_buf;
+
+	u8 api_major;
+	u8 api_minor;
+	u8 build;
 };
 
 #endif /* __PSP_DEV_H */
diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
index df98f7a..d2810c1 100644
--- a/drivers/crypto/ccree/cc_cipher.c
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -42,6 +42,7 @@
 	int cipher_mode;
 	int flow_mode;
 	unsigned int flags;
+	bool hw_key;
 	struct cc_user_key_info user;
 	struct cc_hw_key_info hw;
 	struct crypto_shash *shash_tfm;
@@ -49,6 +50,13 @@
 
 static void cc_cipher_complete(struct device *dev, void *cc_req, int err);
 
+static inline bool cc_is_hw_key(struct crypto_tfm *tfm)
+{
+	struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
+
+	return ctx_p->hw_key;
+}
+
 static int validate_keys_sizes(struct cc_cipher_ctx *ctx_p, u32 size)
 {
 	switch (ctx_p->flow_mode) {
@@ -211,7 +219,7 @@
 	u8	key3[DES_KEY_SIZE];
 };
 
-static enum cc_hw_crypto_key hw_key_to_cc_hw_key(int slot_num)
+static enum cc_hw_crypto_key cc_slot_to_hw_key(int slot_num)
 {
 	switch (slot_num) {
 	case 0:
@@ -226,6 +234,74 @@
 	return END_OF_KEYS;
 }
 
+static int cc_cipher_sethkey(struct crypto_skcipher *sktfm, const u8 *key,
+			     unsigned int keylen)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(sktfm);
+	struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx_p->drvdata);
+	struct cc_hkey_info hki;
+
+	dev_dbg(dev, "Setting HW key in context @%p for %s. keylen=%u\n",
+		ctx_p, crypto_tfm_alg_name(tfm), keylen);
+	dump_byte_array("key", (u8 *)key, keylen);
+
+	/* STAT_PHASE_0: Init and sanity checks */
+
+	/* This check the size of the hardware key token */
+	if (keylen != sizeof(hki)) {
+		dev_err(dev, "Unsupported HW key size %d.\n", keylen);
+		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	if (ctx_p->flow_mode != S_DIN_to_AES) {
+		dev_err(dev, "HW key not supported for non-AES flows\n");
+		return -EINVAL;
+	}
+
+	memcpy(&hki, key, keylen);
+
+	/* The real key len for crypto op is the size of the HW key
+	 * referenced by the HW key slot, not the hardware key token
+	 */
+	keylen = hki.keylen;
+
+	if (validate_keys_sizes(ctx_p, keylen)) {
+		dev_err(dev, "Unsupported key size %d.\n", keylen);
+		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	ctx_p->hw.key1_slot = cc_slot_to_hw_key(hki.hw_key1);
+	if (ctx_p->hw.key1_slot == END_OF_KEYS) {
+		dev_err(dev, "Unsupported hw key1 number (%d)\n", hki.hw_key1);
+		return -EINVAL;
+	}
+
+	if (ctx_p->cipher_mode == DRV_CIPHER_XTS ||
+	    ctx_p->cipher_mode == DRV_CIPHER_ESSIV ||
+	    ctx_p->cipher_mode == DRV_CIPHER_BITLOCKER) {
+		if (hki.hw_key1 == hki.hw_key2) {
+			dev_err(dev, "Illegal hw key numbers (%d,%d)\n",
+				hki.hw_key1, hki.hw_key2);
+			return -EINVAL;
+		}
+		ctx_p->hw.key2_slot = cc_slot_to_hw_key(hki.hw_key2);
+		if (ctx_p->hw.key2_slot == END_OF_KEYS) {
+			dev_err(dev, "Unsupported hw key2 number (%d)\n",
+				hki.hw_key2);
+			return -EINVAL;
+		}
+	}
+
+	ctx_p->keylen = keylen;
+	ctx_p->hw_key = true;
+	dev_dbg(dev, "cc_is_hw_key ret 0");
+
+	return 0;
+}
+
 static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
 			    unsigned int keylen)
 {
@@ -250,44 +326,7 @@
 		return -EINVAL;
 	}
 
-	if (cc_is_hw_key(tfm)) {
-		/* setting HW key slots */
-		struct arm_hw_key_info *hki = (struct arm_hw_key_info *)key;
-
-		if (ctx_p->flow_mode != S_DIN_to_AES) {
-			dev_err(dev, "HW key not supported for non-AES flows\n");
-			return -EINVAL;
-		}
-
-		ctx_p->hw.key1_slot = hw_key_to_cc_hw_key(hki->hw_key1);
-		if (ctx_p->hw.key1_slot == END_OF_KEYS) {
-			dev_err(dev, "Unsupported hw key1 number (%d)\n",
-				hki->hw_key1);
-			return -EINVAL;
-		}
-
-		if (ctx_p->cipher_mode == DRV_CIPHER_XTS ||
-		    ctx_p->cipher_mode == DRV_CIPHER_ESSIV ||
-		    ctx_p->cipher_mode == DRV_CIPHER_BITLOCKER) {
-			if (hki->hw_key1 == hki->hw_key2) {
-				dev_err(dev, "Illegal hw key numbers (%d,%d)\n",
-					hki->hw_key1, hki->hw_key2);
-				return -EINVAL;
-			}
-			ctx_p->hw.key2_slot =
-				hw_key_to_cc_hw_key(hki->hw_key2);
-			if (ctx_p->hw.key2_slot == END_OF_KEYS) {
-				dev_err(dev, "Unsupported hw key2 number (%d)\n",
-					hki->hw_key2);
-				return -EINVAL;
-			}
-		}
-
-		ctx_p->keylen = keylen;
-		dev_dbg(dev, "cc_is_hw_key ret 0");
-
-		return 0;
-	}
+	ctx_p->hw_key = false;
 
 	/*
 	 * Verify DES weak keys
@@ -735,6 +774,241 @@
 /* Block cipher alg */
 static const struct cc_alg_template skcipher_algs[] = {
 	{
+		.name = "xts(paes)",
+		.driver_name = "xts-paes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_XTS,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_630,
+	},
+	{
+		.name = "xts512(paes)",
+		.driver_name = "xts-paes-du512-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_XTS,
+		.flow_mode = S_DIN_to_AES,
+		.data_unit = 512,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "xts4096(paes)",
+		.driver_name = "xts-paes-du4096-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_XTS,
+		.flow_mode = S_DIN_to_AES,
+		.data_unit = 4096,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "essiv(paes)",
+		.driver_name = "essiv-paes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_ESSIV,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "essiv512(paes)",
+		.driver_name = "essiv-paes-du512-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_ESSIV,
+		.flow_mode = S_DIN_to_AES,
+		.data_unit = 512,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "essiv4096(paes)",
+		.driver_name = "essiv-paes-du4096-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_ESSIV,
+		.flow_mode = S_DIN_to_AES,
+		.data_unit = 4096,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "bitlocker(paes)",
+		.driver_name = "bitlocker-paes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_BITLOCKER,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "bitlocker512(paes)",
+		.driver_name = "bitlocker-paes-du512-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_BITLOCKER,
+		.flow_mode = S_DIN_to_AES,
+		.data_unit = 512,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "bitlocker4096(paes)",
+		.driver_name = "bitlocker-paes-du4096-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize =  CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_BITLOCKER,
+		.flow_mode = S_DIN_to_AES,
+		.data_unit = 4096,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "ecb(paes)",
+		.driver_name = "ecb-paes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = 0,
+			},
+		.cipher_mode = DRV_CIPHER_ECB,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "cbc(paes)",
+		.driver_name = "cbc-paes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+		},
+		.cipher_mode = DRV_CIPHER_CBC,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "ofb(paes)",
+		.driver_name = "ofb-paes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_OFB,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "cts1(cbc(paes))",
+		.driver_name = "cts1-cbc-paes-ccree",
+		.blocksize = AES_BLOCK_SIZE,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_CBC_CTS,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
+		.name = "ctr(paes)",
+		.driver_name = "ctr-paes-ccree",
+		.blocksize = 1,
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.template_skcipher = {
+			.setkey = cc_cipher_sethkey,
+			.encrypt = cc_cipher_encrypt,
+			.decrypt = cc_cipher_decrypt,
+			.min_keysize = CC_HW_KEY_SIZE,
+			.max_keysize = CC_HW_KEY_SIZE,
+			.ivsize = AES_BLOCK_SIZE,
+			},
+		.cipher_mode = DRV_CIPHER_CTR,
+		.flow_mode = S_DIN_to_AES,
+		.min_hw_rev = CC_HW_REV_712,
+	},
+	{
 		.name = "xts(aes)",
 		.driver_name = "xts-aes-ccree",
 		.blocksize = AES_BLOCK_SIZE,
diff --git a/drivers/crypto/ccree/cc_cipher.h b/drivers/crypto/ccree/cc_cipher.h
index 2a2a6f4..68444cf 100644
--- a/drivers/crypto/ccree/cc_cipher.h
+++ b/drivers/crypto/ccree/cc_cipher.h
@@ -13,18 +13,6 @@
 #include "cc_driver.h"
 #include "cc_buffer_mgr.h"
 
-/* Crypto cipher flags */
-#define CC_CRYPTO_CIPHER_KEY_KFDE0	BIT(0)
-#define CC_CRYPTO_CIPHER_KEY_KFDE1	BIT(1)
-#define CC_CRYPTO_CIPHER_KEY_KFDE2	BIT(2)
-#define CC_CRYPTO_CIPHER_KEY_KFDE3	BIT(3)
-#define CC_CRYPTO_CIPHER_DU_SIZE_512B	BIT(4)
-
-#define CC_CRYPTO_CIPHER_KEY_KFDE_MASK (CC_CRYPTO_CIPHER_KEY_KFDE0 | \
-					CC_CRYPTO_CIPHER_KEY_KFDE1 | \
-					CC_CRYPTO_CIPHER_KEY_KFDE2 | \
-					CC_CRYPTO_CIPHER_KEY_KFDE3)
-
 struct cipher_req_ctx {
 	struct async_gen_req_ctx gen_ctx;
 	enum cc_req_dma_buf_type dma_buf_type;
@@ -42,18 +30,12 @@
 
 int cc_cipher_free(struct cc_drvdata *drvdata);
 
-struct arm_hw_key_info {
-	int hw_key1;
-	int hw_key2;
-};
+struct cc_hkey_info {
+	u16 keylen;
+	u8 hw_key1;
+	u8 hw_key2;
+} __packed;
 
-/*
- * This is a stub function that will replaced when we
- * implement secure keys
- */
-static inline bool cc_is_hw_key(struct crypto_tfm *tfm)
-{
-	return false;
-}
+#define CC_HW_KEY_SIZE sizeof(struct cc_hkey_info)
 
 #endif /*__CC_CIPHER_H__*/
diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c
index 08f8db4..5ca184e 100644
--- a/drivers/crypto/ccree/cc_debugfs.c
+++ b/drivers/crypto/ccree/cc_debugfs.c
@@ -26,7 +26,8 @@
 static struct dentry *cc_debugfs_dir;
 
 static struct debugfs_reg32 debug_regs[] = {
-	CC_DEBUG_REG(HOST_SIGNATURE),
+	{ .name = "SIGNATURE" }, /* Must be 0th */
+	{ .name = "VERSION" }, /* Must be 1st */
 	CC_DEBUG_REG(HOST_IRR),
 	CC_DEBUG_REG(HOST_POWER_DOWN_EN),
 	CC_DEBUG_REG(AXIM_MON_ERR),
@@ -34,7 +35,6 @@
 	CC_DEBUG_REG(HOST_IMR),
 	CC_DEBUG_REG(AXIM_CFG),
 	CC_DEBUG_REG(AXIM_CACHE_PARAMS),
-	CC_DEBUG_REG(HOST_VERSION),
 	CC_DEBUG_REG(GPR_HOST),
 	CC_DEBUG_REG(AXIM_MON_COMP),
 };
@@ -58,6 +58,9 @@
 	struct debugfs_regset32 *regset;
 	struct dentry *file;
 
+	debug_regs[0].offset = drvdata->sig_offset;
+	debug_regs[1].offset = drvdata->ver_offset;
+
 	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c
index 89ce013..bd974fe 100644
--- a/drivers/crypto/ccree/cc_driver.c
+++ b/drivers/crypto/ccree/cc_driver.c
@@ -168,14 +168,14 @@
 	val = cc_ioread(drvdata, CC_REG(AXIM_CACHE_PARAMS));
 
 	if (is_probe)
-		dev_info(dev, "Cache params previous: 0x%08X\n", val);
+		dev_dbg(dev, "Cache params previous: 0x%08X\n", val);
 
 	cc_iowrite(drvdata, CC_REG(AXIM_CACHE_PARAMS), cache_params);
 	val = cc_ioread(drvdata, CC_REG(AXIM_CACHE_PARAMS));
 
 	if (is_probe)
-		dev_info(dev, "Cache params current: 0x%08X (expect: 0x%08X)\n",
-			 val, cache_params);
+		dev_dbg(dev, "Cache params current: 0x%08X (expect: 0x%08X)\n",
+			val, cache_params);
 
 	return 0;
 }
@@ -190,6 +190,7 @@
 	u64 dma_mask;
 	const struct cc_hw_data *hw_rev;
 	const struct of_device_id *dev_id;
+	struct clk *clk;
 	int rc = 0;
 
 	new_drvdata = devm_kzalloc(dev, sizeof(*new_drvdata), GFP_KERNEL);
@@ -207,15 +208,36 @@
 	if (hw_rev->rev >= CC_HW_REV_712) {
 		new_drvdata->hash_len_sz = HASH_LEN_SIZE_712;
 		new_drvdata->axim_mon_offset = CC_REG(AXIM_MON_COMP);
+		new_drvdata->sig_offset = CC_REG(HOST_SIGNATURE_712);
+		new_drvdata->ver_offset = CC_REG(HOST_VERSION_712);
 	} else {
 		new_drvdata->hash_len_sz = HASH_LEN_SIZE_630;
 		new_drvdata->axim_mon_offset = CC_REG(AXIM_MON_COMP8);
+		new_drvdata->sig_offset = CC_REG(HOST_SIGNATURE_630);
+		new_drvdata->ver_offset = CC_REG(HOST_VERSION_630);
 	}
 
 	platform_set_drvdata(plat_dev, new_drvdata);
 	new_drvdata->plat_dev = plat_dev;
 
-	new_drvdata->clk = of_clk_get(np, 0);
+	clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(clk))
+		switch (PTR_ERR(clk)) {
+		/* Clock is optional so this might be fine */
+		case -ENOENT:
+			break;
+
+		/* Clock not available, let's try again soon */
+		case -EPROBE_DEFER:
+			return -EPROBE_DEFER;
+
+		default:
+			dev_err(dev, "Error getting clock: %ld\n",
+				PTR_ERR(clk));
+			return PTR_ERR(clk);
+		}
+	new_drvdata->clk = clk;
+
 	new_drvdata->coherent = of_dma_is_coherent(np);
 
 	/* Get device resources */
@@ -265,7 +287,7 @@
 	}
 
 	if (rc) {
-		dev_err(dev, "Failed in dma_set_mask, mask=%pad\n", &dma_mask);
+		dev_err(dev, "Failed in dma_set_mask, mask=%llx\n", dma_mask);
 		return rc;
 	}
 
@@ -276,7 +298,7 @@
 	}
 
 	/* Verify correct mapping */
-	signature_val = cc_ioread(new_drvdata, CC_REG(HOST_SIGNATURE));
+	signature_val = cc_ioread(new_drvdata, new_drvdata->sig_offset);
 	if (signature_val != hw_rev->sig) {
 		dev_err(dev, "Invalid CC signature: SIGNATURE=0x%08X != expected=0x%08X\n",
 			signature_val, hw_rev->sig);
@@ -287,7 +309,7 @@
 
 	/* Display HW versions */
 	dev_info(dev, "ARM CryptoCell %s Driver: HW version 0x%08X, Driver version %s\n",
-		 hw_rev->name, cc_ioread(new_drvdata, CC_REG(HOST_VERSION)),
+		 hw_rev->name, cc_ioread(new_drvdata, new_drvdata->ver_offset),
 		 DRV_MODULE_VERSION);
 
 	rc = init_cc_regs(new_drvdata, true);
diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h
index 2048fde..95f82b2 100644
--- a/drivers/crypto/ccree/cc_driver.h
+++ b/drivers/crypto/ccree/cc_driver.h
@@ -129,6 +129,8 @@
 	enum cc_hw_rev hw_rev;
 	u32 hash_len_sz;
 	u32 axim_mon_offset;
+	u32 sig_offset;
+	u32 ver_offset;
 };
 
 struct cc_crypto_alg {
diff --git a/drivers/crypto/ccree/cc_host_regs.h b/drivers/crypto/ccree/cc_host_regs.h
index f510018..616b2e1 100644
--- a/drivers/crypto/ccree/cc_host_regs.h
+++ b/drivers/crypto/ccree/cc_host_regs.h
@@ -45,7 +45,8 @@
 #define CC_HOST_ICR_DSCRPTR_WATERMARK_QUEUE0_CLEAR_BIT_SIZE	0x1UL
 #define CC_HOST_ICR_AXIM_COMP_INT_CLEAR_BIT_SHIFT	0x17UL
 #define CC_HOST_ICR_AXIM_COMP_INT_CLEAR_BIT_SIZE	0x1UL
-#define CC_HOST_SIGNATURE_REG_OFFSET	0xA24UL
+#define CC_HOST_SIGNATURE_712_REG_OFFSET	0xA24UL
+#define CC_HOST_SIGNATURE_630_REG_OFFSET	0xAC8UL
 #define CC_HOST_SIGNATURE_VALUE_BIT_SHIFT	0x0UL
 #define CC_HOST_SIGNATURE_VALUE_BIT_SIZE	0x20UL
 #define CC_HOST_BOOT_REG_OFFSET	0xA28UL
@@ -105,7 +106,8 @@
 #define CC_HOST_BOOT_ONLY_ENCRYPT_LOCAL_BIT_SIZE	0x1UL
 #define CC_HOST_BOOT_AES_EXISTS_LOCAL_BIT_SHIFT	0x1EUL
 #define CC_HOST_BOOT_AES_EXISTS_LOCAL_BIT_SIZE	0x1UL
-#define CC_HOST_VERSION_REG_OFFSET	0xA40UL
+#define CC_HOST_VERSION_712_REG_OFFSET	0xA40UL
+#define CC_HOST_VERSION_630_REG_OFFSET	0xAD8UL
 #define CC_HOST_VERSION_VALUE_BIT_SHIFT	0x0UL
 #define CC_HOST_VERSION_VALUE_BIT_SIZE	0x20UL
 #define CC_HOST_KFDE0_VALID_REG_OFFSET	0xA60UL
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 59fe6631e..b916c4e 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -203,13 +203,8 @@
 					 int err)
 {
 	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct uld_ctx *u_ctx = ULD_CTX(a_ctx(tfm));
 
-	chcr_aead_dma_unmap(&u_ctx->lldi.pdev->dev, req, reqctx->op);
-	if (reqctx->b0_dma)
-		dma_unmap_single(&u_ctx->lldi.pdev->dev, reqctx->b0_dma,
-				 reqctx->b0_len, DMA_BIDIRECTIONAL);
+	chcr_aead_common_exit(req);
 	if (reqctx->verify == VERIFY_SW) {
 		chcr_verify_tag(req, input, &err);
 		reqctx->verify = VERIFY_HW;
@@ -638,7 +633,6 @@
 		src = sg_next(src);
 		srcskip = 0;
 	}
-
 	if (sg_dma_len(dst) == dstskip) {
 		dst = sg_next(dst);
 		dstskip = 0;
@@ -688,6 +682,7 @@
 	int err;
 
 	SKCIPHER_REQUEST_ON_STACK(subreq, cipher);
+
 	skcipher_request_set_tfm(subreq, cipher);
 	skcipher_request_set_callback(subreq, flags, NULL, NULL);
 	skcipher_request_set_crypt(subreq, src, dst,
@@ -760,13 +755,13 @@
 
 	nents = sg_nents_xlen(reqctx->dstsg,  wrparam->bytes, CHCR_DST_SG_SIZE,
 			      reqctx->dst_ofst);
-	dst_size = get_space_for_phys_dsgl(nents + 1);
+	dst_size = get_space_for_phys_dsgl(nents);
 	kctx_len = roundup(ablkctx->enckey_len, 16);
 	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
 	nents = sg_nents_xlen(reqctx->srcsg, wrparam->bytes,
 				  CHCR_SRC_SG_SIZE, reqctx->src_ofst);
-	temp = reqctx->imm ? roundup(IV + wrparam->req->nbytes, 16) :
-				     (sgl_len(nents + MIN_CIPHER_SG) * 8);
+	temp = reqctx->imm ? roundup(wrparam->bytes, 16) :
+				     (sgl_len(nents) * 8);
 	transhdr_len += temp;
 	transhdr_len = roundup(transhdr_len, 16);
 	skb = alloc_skb(SGE_MAX_WR_LEN, flags);
@@ -788,7 +783,7 @@
 							 ablkctx->ciph_mode,
 							 0, 0, IV >> 1);
 	chcr_req->sec_cpl.ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0,
-							  0, 0, dst_size);
+							  0, 1, dst_size);
 
 	chcr_req->key_ctx.ctx_hdr = ablkctx->key_ctx_hdr;
 	if ((reqctx->op == CHCR_DECRYPT_OP) &&
@@ -818,8 +813,8 @@
 	chcr_add_cipher_dst_ent(wrparam->req, phys_cpl, wrparam, wrparam->qid);
 
 	atomic_inc(&adap->chcr_stats.cipher_rqst);
-	temp = sizeof(struct cpl_rx_phys_dsgl) + dst_size + kctx_len
-		+(reqctx->imm ? (IV + wrparam->bytes) : 0);
+	temp = sizeof(struct cpl_rx_phys_dsgl) + dst_size + kctx_len + IV
+		+ (reqctx->imm ? (wrparam->bytes) : 0);
 	create_wreq(c_ctx(tfm), chcr_req, &(wrparam->req->base), reqctx->imm, 0,
 		    transhdr_len, temp,
 			ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC);
@@ -1022,7 +1017,7 @@
 	ret = crypto_cipher_setkey(cipher, key, keylen);
 	if (ret)
 		goto out;
-	/*H/W sends the encrypted IV in dsgl when AADIVDROP bit is 0*/
+	crypto_cipher_encrypt_one(cipher, iv, iv);
 	for (i = 0; i < round8; i++)
 		gf128mul_x8_ble((le128 *)iv, (le128 *)iv);
 
@@ -1113,16 +1108,8 @@
 		goto complete;
 	}
 
-	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
-					    c_ctx(tfm)->tx_qidx))) {
-		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
-			err = -EBUSY;
-			goto unmap;
-		}
-
-	}
 	if (!reqctx->imm) {
-		bytes = chcr_sg_ent_in_wr(reqctx->srcsg, reqctx->dstsg, 1,
+		bytes = chcr_sg_ent_in_wr(reqctx->srcsg, reqctx->dstsg, 0,
 					  CIP_SPACE_LEFT(ablkctx->enckey_len),
 					  reqctx->src_ofst, reqctx->dst_ofst);
 		if ((bytes + reqctx->processed) >= req->nbytes)
@@ -1133,11 +1120,7 @@
 		/*CTR mode counter overfloa*/
 		bytes  = req->nbytes - reqctx->processed;
 	}
-	dma_sync_single_for_cpu(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev,
-				reqctx->iv_dma, IV, DMA_BIDIRECTIONAL);
 	err = chcr_update_cipher_iv(req, fw6_pld, reqctx->iv);
-	dma_sync_single_for_device(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev,
-				   reqctx->iv_dma, IV, DMA_BIDIRECTIONAL);
 	if (err)
 		goto unmap;
 
@@ -1212,7 +1195,6 @@
 
 		dnents = sg_nents_xlen(req->dst, req->nbytes,
 				       CHCR_DST_SG_SIZE, 0);
-		dnents += 1; // IV
 		phys_dsgl = get_space_for_phys_dsgl(dnents);
 		kctx_len = roundup(ablkctx->enckey_len, 16);
 		transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl);
@@ -1225,8 +1207,7 @@
 	}
 
 	if (!reqctx->imm) {
-		bytes = chcr_sg_ent_in_wr(req->src, req->dst,
-					  MIN_CIPHER_SG,
+		bytes = chcr_sg_ent_in_wr(req->src, req->dst, 0,
 					  CIP_SPACE_LEFT(ablkctx->enckey_len),
 					  0, 0);
 		if ((bytes + reqctx->processed) >= req->nbytes)
@@ -1293,13 +1274,14 @@
 {
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct sk_buff *skb = NULL;
-	int err;
+	int err, isfull = 0;
 	struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm));
 
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 					    c_ctx(tfm)->tx_qidx))) {
+		isfull = 1;
 		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
-			return -EBUSY;
+			return -ENOSPC;
 	}
 
 	err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx],
@@ -1309,7 +1291,7 @@
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx);
 	chcr_send_wr(skb);
-	return -EINPROGRESS;
+	return isfull ? -EBUSY : -EINPROGRESS;
 }
 
 static int chcr_aes_decrypt(struct ablkcipher_request *req)
@@ -1317,12 +1299,13 @@
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm));
 	struct sk_buff *skb = NULL;
-	int err;
+	int err, isfull = 0;
 
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 					    c_ctx(tfm)->tx_qidx))) {
+		isfull = 1;
 		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
-			return -EBUSY;
+			return -ENOSPC;
 	}
 
 	 err = process_cipher(req, u_ctx->lldi.rxq_ids[c_ctx(tfm)->rx_qidx],
@@ -1332,7 +1315,7 @@
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, c_ctx(tfm)->tx_qidx);
 	chcr_send_wr(skb);
-	return -EINPROGRESS;
+	return isfull ? -EBUSY : -EINPROGRESS;
 }
 
 static int chcr_device_init(struct chcr_context *ctx)
@@ -1574,14 +1557,15 @@
 	u8 remainder = 0, bs;
 	unsigned int nbytes = req->nbytes;
 	struct hash_wr_param params;
-	int error;
+	int error, isfull = 0;
 
 	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
 	u_ctx = ULD_CTX(h_ctx(rtfm));
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 					    h_ctx(rtfm)->tx_qidx))) {
+		isfull = 1;
 		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
-			return -EBUSY;
+			return -ENOSPC;
 	}
 
 	if (nbytes + req_ctx->reqlen >= bs) {
@@ -1633,7 +1617,7 @@
 	set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
 	chcr_send_wr(skb);
 
-	return -EINPROGRESS;
+	return isfull ? -EBUSY : -EINPROGRESS;
 unmap:
 	chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
 	return error;
@@ -1710,15 +1694,16 @@
 	struct sk_buff *skb;
 	struct hash_wr_param params;
 	u8  bs;
-	int error;
+	int error, isfull = 0;
 
 	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
 	u_ctx = ULD_CTX(h_ctx(rtfm));
 
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 					    h_ctx(rtfm)->tx_qidx))) {
+		isfull = 1;
 		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
-			return -EBUSY;
+			return -ENOSPC;
 	}
 	chcr_init_hctx_per_wr(req_ctx);
 	error = chcr_hash_dma_map(&u_ctx->lldi.pdev->dev, req);
@@ -1777,7 +1762,7 @@
 	set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
 	chcr_send_wr(skb);
 
-	return -EINPROGRESS;
+	return isfull ? -EBUSY : -EINPROGRESS;
 unmap:
 	chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
 	return error;
@@ -1791,7 +1776,7 @@
 	struct sk_buff *skb;
 	struct hash_wr_param params;
 	u8  bs;
-	int error;
+	int error, isfull = 0;
 
 	rtfm->init(req);
 	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
@@ -1799,8 +1784,9 @@
 	u_ctx = ULD_CTX(h_ctx(rtfm));
 	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 					    h_ctx(rtfm)->tx_qidx))) {
+		isfull = 1;
 		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
-			return -EBUSY;
+			return -ENOSPC;
 	}
 
 	chcr_init_hctx_per_wr(req_ctx);
@@ -1856,7 +1842,7 @@
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, h_ctx(rtfm)->tx_qidx);
 	chcr_send_wr(skb);
-	return -EINPROGRESS;
+	return isfull ? -EBUSY : -EINPROGRESS;
 unmap:
 	chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
 	return error;
@@ -1875,11 +1861,6 @@
 
 	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
 	u_ctx = ULD_CTX(h_ctx(rtfm));
-	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
-					    h_ctx(rtfm)->tx_qidx))) {
-		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
-			return -EBUSY;
-	}
 	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
 	params.kctx_len = roundup(params.alg_prm.result_size, 16);
 	if (is_hmac(crypto_ahash_tfm(rtfm))) {
@@ -2192,22 +2173,35 @@
 	}
 }
 
-static int chcr_aead_common_init(struct aead_request *req,
-				 unsigned short op_type)
+inline void chcr_aead_common_exit(struct aead_request *req)
+{
+	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct uld_ctx *u_ctx = ULD_CTX(a_ctx(tfm));
+
+	chcr_aead_dma_unmap(&u_ctx->lldi.pdev->dev, req, reqctx->op);
+}
+
+static int chcr_aead_common_init(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
-	int error = -EINVAL;
 	unsigned int authsize = crypto_aead_authsize(tfm);
+	int error = -EINVAL;
 
 	/* validate key size */
 	if (aeadctx->enckey_len == 0)
 		goto err;
-	if (op_type && req->cryptlen < authsize)
+	if (reqctx->op && req->cryptlen < authsize)
 		goto err;
+	if (reqctx->b0_len)
+		reqctx->scratch_pad = reqctx->iv + IV;
+	else
+		reqctx->scratch_pad = NULL;
+
 	error = chcr_aead_dma_map(&ULD_CTX(a_ctx(tfm))->lldi.pdev->dev, req,
-				  op_type);
+				  reqctx->op);
 	if (error) {
 		error = -ENOMEM;
 		goto err;
@@ -2244,7 +2238,7 @@
 	aead_request_set_tfm(subreq, aeadctx->sw_cipher);
 	aead_request_set_callback(subreq, req->base.flags,
 				  req->base.complete, req->base.data);
-	 aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+	aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
 				 req->iv);
 	 aead_request_set_ad(subreq, req->assoclen);
 	return op_type ? crypto_aead_decrypt(subreq) :
@@ -2253,8 +2247,7 @@
 
 static struct sk_buff *create_authenc_wr(struct aead_request *req,
 					 unsigned short qid,
-					 int size,
-					 unsigned short op_type)
+					 int size)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
@@ -2278,18 +2271,20 @@
 	if (req->cryptlen == 0)
 		return NULL;
 
-	reqctx->b0_dma = 0;
-	if (subtype == CRYPTO_ALG_SUB_TYPE_CBC_NULL ||
-	subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL) {
-		null = 1;
-		assoclen = 0;
-	}
-	error = chcr_aead_common_init(req, op_type);
+	reqctx->b0_len = 0;
+	error = chcr_aead_common_init(req);
 	if (error)
 		return ERR_PTR(error);
+
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CBC_NULL ||
+		subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL) {
+		null = 1;
+		assoclen = 0;
+		reqctx->aad_nents = 0;
+	}
 	dnents = sg_nents_xlen(req->dst, assoclen, CHCR_DST_SG_SIZE, 0);
 	dnents += sg_nents_xlen(req->dst, req->cryptlen +
-		(op_type ? -authsize : authsize), CHCR_DST_SG_SIZE,
+		(reqctx->op ? -authsize : authsize), CHCR_DST_SG_SIZE,
 		req->assoclen);
 	dnents += MIN_AUTH_SG; // For IV
 
@@ -2306,11 +2301,10 @@
 	transhdr_len = roundup(transhdr_len, 16);
 
 	if (chcr_aead_need_fallback(req, dnents, T6_MAX_AAD_SIZE,
-				    transhdr_len, op_type)) {
+				    transhdr_len, reqctx->op)) {
 		atomic_inc(&adap->chcr_stats.fallback);
-		chcr_aead_dma_unmap(&ULD_CTX(a_ctx(tfm))->lldi.pdev->dev, req,
-				    op_type);
-		return ERR_PTR(chcr_aead_fallback(req, op_type));
+		chcr_aead_common_exit(req);
+		return ERR_PTR(chcr_aead_fallback(req, reqctx->op));
 	}
 	skb = alloc_skb(SGE_MAX_WR_LEN, flags);
 	if (!skb) {
@@ -2320,7 +2314,7 @@
 
 	chcr_req = __skb_put_zero(skb, transhdr_len);
 
-	temp  = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize;
+	temp  = (reqctx->op == CHCR_ENCRYPT_OP) ? 0 : authsize;
 
 	/*
 	 * Input order	is AAD,IV and Payload. where IV should be included as
@@ -2344,8 +2338,8 @@
 		temp = CHCR_SCMD_CIPHER_MODE_AES_CTR;
 	else
 		temp = CHCR_SCMD_CIPHER_MODE_AES_CBC;
-	chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type,
-					(op_type == CHCR_ENCRYPT_OP) ? 1 : 0,
+	chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(reqctx->op,
+					(reqctx->op == CHCR_ENCRYPT_OP) ? 1 : 0,
 					temp,
 					actx->auth_mode, aeadctx->hmac_ctrl,
 					IV >> 1);
@@ -2353,7 +2347,7 @@
 					 0, 0, dst_size);
 
 	chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr;
-	if (op_type == CHCR_ENCRYPT_OP ||
+	if (reqctx->op == CHCR_ENCRYPT_OP ||
 		subtype == CRYPTO_ALG_SUB_TYPE_CTR_SHA ||
 		subtype == CRYPTO_ALG_SUB_TYPE_CTR_NULL)
 		memcpy(chcr_req->key_ctx.key, aeadctx->key,
@@ -2376,20 +2370,18 @@
 	}
 	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
 	ulptx = (struct ulptx_sgl *)((u8 *)(phys_cpl + 1) + dst_size);
-	chcr_add_aead_dst_ent(req, phys_cpl, assoclen, op_type, qid);
-	chcr_add_aead_src_ent(req, ulptx, assoclen, op_type);
+	chcr_add_aead_dst_ent(req, phys_cpl, assoclen, qid);
+	chcr_add_aead_src_ent(req, ulptx, assoclen);
 	atomic_inc(&adap->chcr_stats.cipher_rqst);
 	temp = sizeof(struct cpl_rx_phys_dsgl) + dst_size +
 		kctx_len + (reqctx->imm ? (assoclen + IV + req->cryptlen) : 0);
 	create_wreq(a_ctx(tfm), chcr_req, &req->base, reqctx->imm, size,
 		   transhdr_len, temp, 0);
 	reqctx->skb = skb;
-	reqctx->op = op_type;
 
 	return skb;
 err:
-	chcr_aead_dma_unmap(&ULD_CTX(a_ctx(tfm))->lldi.pdev->dev, req,
-			    op_type);
+	chcr_aead_common_exit(req);
 
 	return ERR_PTR(error);
 }
@@ -2408,11 +2400,14 @@
 				-authsize : authsize);
 	if (!req->cryptlen || !dst_size)
 		return 0;
-	reqctx->iv_dma = dma_map_single(dev, reqctx->iv, IV,
+	reqctx->iv_dma = dma_map_single(dev, reqctx->iv, (IV + reqctx->b0_len),
 					DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(dev, reqctx->iv_dma))
 		return -ENOMEM;
-
+	if (reqctx->b0_len)
+		reqctx->b0_dma = reqctx->iv_dma + IV;
+	else
+		reqctx->b0_dma = 0;
 	if (req->src == req->dst) {
 		error = dma_map_sg(dev, req->src, sg_nents(req->src),
 				   DMA_BIDIRECTIONAL);
@@ -2452,7 +2447,7 @@
 	if (!req->cryptlen || !dst_size)
 		return;
 
-	dma_unmap_single(dev, reqctx->iv_dma, IV,
+	dma_unmap_single(dev, reqctx->iv_dma, (IV + reqctx->b0_len),
 					DMA_BIDIRECTIONAL);
 	if (req->src == req->dst) {
 		dma_unmap_sg(dev, req->src, sg_nents(req->src),
@@ -2467,8 +2462,7 @@
 
 void chcr_add_aead_src_ent(struct aead_request *req,
 			   struct ulptx_sgl *ulptx,
-			   unsigned int assoclen,
-			   unsigned short op_type)
+			   unsigned int assoclen)
 {
 	struct ulptx_walk ulp_walk;
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
@@ -2476,7 +2470,7 @@
 	if (reqctx->imm) {
 		u8 *buf = (u8 *)ulptx;
 
-		if (reqctx->b0_dma) {
+		if (reqctx->b0_len) {
 			memcpy(buf, reqctx->scratch_pad, reqctx->b0_len);
 			buf += reqctx->b0_len;
 		}
@@ -2489,7 +2483,7 @@
 				   buf, req->cryptlen, req->assoclen);
 	} else {
 		ulptx_walk_init(&ulp_walk, ulptx);
-		if (reqctx->b0_dma)
+		if (reqctx->b0_len)
 			ulptx_walk_add_page(&ulp_walk, reqctx->b0_len,
 					    &reqctx->b0_dma);
 		ulptx_walk_add_sg(&ulp_walk, req->src, assoclen, 0);
@@ -2503,7 +2497,6 @@
 void chcr_add_aead_dst_ent(struct aead_request *req,
 			   struct cpl_rx_phys_dsgl *phys_cpl,
 			   unsigned int assoclen,
-			   unsigned short op_type,
 			   unsigned short qid)
 {
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
@@ -2513,32 +2506,30 @@
 	u32 temp;
 
 	dsgl_walk_init(&dsgl_walk, phys_cpl);
-	if (reqctx->b0_dma)
+	if (reqctx->b0_len)
 		dsgl_walk_add_page(&dsgl_walk, reqctx->b0_len, &reqctx->b0_dma);
 	dsgl_walk_add_sg(&dsgl_walk, req->dst, assoclen, 0);
 	dsgl_walk_add_page(&dsgl_walk, IV, &reqctx->iv_dma);
-	temp = req->cryptlen + (op_type ? -authsize : authsize);
+	temp = req->cryptlen + (reqctx->op ? -authsize : authsize);
 	dsgl_walk_add_sg(&dsgl_walk, req->dst, temp, req->assoclen);
 	dsgl_walk_end(&dsgl_walk, qid);
 }
 
 void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
-			     struct ulptx_sgl *ulptx,
+			     void *ulptx,
 			     struct  cipher_wr_param *wrparam)
 {
 	struct ulptx_walk ulp_walk;
 	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
+	u8 *buf = ulptx;
 
+	memcpy(buf, reqctx->iv, IV);
+	buf += IV;
 	if (reqctx->imm) {
-		u8 *buf = (u8 *)ulptx;
-
-		memcpy(buf, reqctx->iv, IV);
-		buf += IV;
 		sg_pcopy_to_buffer(req->src, sg_nents(req->src),
 				   buf, wrparam->bytes, reqctx->processed);
 	} else {
-		ulptx_walk_init(&ulp_walk, ulptx);
-		ulptx_walk_add_page(&ulp_walk, IV, &reqctx->iv_dma);
+		ulptx_walk_init(&ulp_walk, (struct ulptx_sgl *)buf);
 		ulptx_walk_add_sg(&ulp_walk, reqctx->srcsg, wrparam->bytes,
 				  reqctx->src_ofst);
 		reqctx->srcsg = ulp_walk.last_sg;
@@ -2556,7 +2547,6 @@
 	struct dsgl_walk dsgl_walk;
 
 	dsgl_walk_init(&dsgl_walk, phys_cpl);
-	dsgl_walk_add_page(&dsgl_walk, IV, &reqctx->iv_dma);
 	dsgl_walk_add_sg(&dsgl_walk, reqctx->dstsg, wrparam->bytes,
 			 reqctx->dst_ofst);
 	reqctx->dstsg = dsgl_walk.last_sg;
@@ -2630,12 +2620,6 @@
 			struct ablkcipher_request *req)
 {
 	int error;
-	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
-
-	reqctx->iv_dma = dma_map_single(dev, reqctx->iv, IV,
-					DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(dev, reqctx->iv_dma))
-		return -ENOMEM;
 
 	if (req->src == req->dst) {
 		error = dma_map_sg(dev, req->src, sg_nents(req->src),
@@ -2658,17 +2642,12 @@
 
 	return 0;
 err:
-	dma_unmap_single(dev, reqctx->iv_dma, IV, DMA_BIDIRECTIONAL);
 	return -ENOMEM;
 }
 
 void chcr_cipher_dma_unmap(struct device *dev,
 			   struct ablkcipher_request *req)
 {
-	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
-
-	dma_unmap_single(dev, reqctx->iv_dma, IV,
-					DMA_BIDIRECTIONAL);
 	if (req->src == req->dst) {
 		dma_unmap_sg(dev, req->src, sg_nents(req->src),
 				   DMA_BIDIRECTIONAL);
@@ -2738,7 +2717,8 @@
 static int ccm_format_packet(struct aead_request *req,
 			     struct chcr_aead_ctx *aeadctx,
 			     unsigned int sub_type,
-			     unsigned short op_type)
+			     unsigned short op_type,
+			     unsigned int assoclen)
 {
 	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
 	int rc = 0;
@@ -2748,13 +2728,13 @@
 		memcpy(reqctx->iv + 1, &aeadctx->salt[0], 3);
 		memcpy(reqctx->iv + 4, req->iv, 8);
 		memset(reqctx->iv + 12, 0, 4);
-		*((unsigned short *)(reqctx->scratch_pad + 16)) =
-			htons(req->assoclen - 8);
 	} else {
 		memcpy(reqctx->iv, req->iv, 16);
-		*((unsigned short *)(reqctx->scratch_pad + 16)) =
-			htons(req->assoclen);
 	}
+	if (assoclen)
+		*((unsigned short *)(reqctx->scratch_pad + 16)) =
+				htons(assoclen);
+
 	generate_b0(req, aeadctx, op_type);
 	/* zero the ctr value */
 	memset(reqctx->iv + 15 - reqctx->iv[0], 0, reqctx->iv[0] + 1);
@@ -2836,8 +2816,7 @@
 
 static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 					  unsigned short qid,
-					  int size,
-					  unsigned short op_type)
+					  int size)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
@@ -2855,22 +2834,20 @@
 		GFP_ATOMIC;
 	struct adapter *adap = padap(a_ctx(tfm)->dev);
 
-	reqctx->b0_dma = 0;
 	sub_type = get_aead_subtype(tfm);
 	if (sub_type == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309)
 		assoclen -= 8;
-	error = chcr_aead_common_init(req, op_type);
+	reqctx->b0_len = CCM_B0_SIZE + (assoclen ? CCM_AAD_FIELD_SIZE : 0);
+	error = chcr_aead_common_init(req);
 	if (error)
 		return ERR_PTR(error);
 
-
-	reqctx->b0_len = CCM_B0_SIZE + (assoclen ? CCM_AAD_FIELD_SIZE : 0);
-	error = aead_ccm_validate_input(op_type, req, aeadctx, sub_type);
+	error = aead_ccm_validate_input(reqctx->op, req, aeadctx, sub_type);
 	if (error)
 		goto err;
 	dnents = sg_nents_xlen(req->dst, assoclen, CHCR_DST_SG_SIZE, 0);
 	dnents += sg_nents_xlen(req->dst, req->cryptlen
-			+ (op_type ? -authsize : authsize),
+			+ (reqctx->op ? -authsize : authsize),
 			CHCR_DST_SG_SIZE, req->assoclen);
 	dnents += MIN_CCM_SG; // For IV and B0
 	dst_size = get_space_for_phys_dsgl(dnents);
@@ -2886,11 +2863,10 @@
 	transhdr_len = roundup(transhdr_len, 16);
 
 	if (chcr_aead_need_fallback(req, dnents, T6_MAX_AAD_SIZE -
-				    reqctx->b0_len, transhdr_len, op_type)) {
+				    reqctx->b0_len, transhdr_len, reqctx->op)) {
 		atomic_inc(&adap->chcr_stats.fallback);
-		chcr_aead_dma_unmap(&ULD_CTX(a_ctx(tfm))->lldi.pdev->dev, req,
-				    op_type);
-		return ERR_PTR(chcr_aead_fallback(req, op_type));
+		chcr_aead_common_exit(req);
+		return ERR_PTR(chcr_aead_fallback(req, reqctx->op));
 	}
 	skb = alloc_skb(SGE_MAX_WR_LEN,  flags);
 
@@ -2901,7 +2877,7 @@
 
 	chcr_req = (struct chcr_wr *) __skb_put_zero(skb, transhdr_len);
 
-	fill_sec_cpl_for_aead(&chcr_req->sec_cpl, dst_size, req, op_type);
+	fill_sec_cpl_for_aead(&chcr_req->sec_cpl, dst_size, req, reqctx->op);
 
 	chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr;
 	memcpy(chcr_req->key_ctx.key, aeadctx->key, aeadctx->enckey_len);
@@ -2910,21 +2886,11 @@
 
 	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
 	ulptx = (struct ulptx_sgl *)((u8 *)(phys_cpl + 1) + dst_size);
-	error = ccm_format_packet(req, aeadctx, sub_type, op_type);
+	error = ccm_format_packet(req, aeadctx, sub_type, reqctx->op, assoclen);
 	if (error)
 		goto dstmap_fail;
-
-	reqctx->b0_dma = dma_map_single(&ULD_CTX(a_ctx(tfm))->lldi.pdev->dev,
-					&reqctx->scratch_pad, reqctx->b0_len,
-					DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(&ULD_CTX(a_ctx(tfm))->lldi.pdev->dev,
-			      reqctx->b0_dma)) {
-		error = -ENOMEM;
-		goto dstmap_fail;
-	}
-
-	chcr_add_aead_dst_ent(req, phys_cpl, assoclen, op_type, qid);
-	chcr_add_aead_src_ent(req, ulptx, assoclen, op_type);
+	chcr_add_aead_dst_ent(req, phys_cpl, assoclen, qid);
+	chcr_add_aead_src_ent(req, ulptx, assoclen);
 
 	atomic_inc(&adap->chcr_stats.aead_rqst);
 	temp = sizeof(struct cpl_rx_phys_dsgl) + dst_size +
@@ -2933,20 +2899,18 @@
 	create_wreq(a_ctx(tfm), chcr_req, &req->base, reqctx->imm, 0,
 		    transhdr_len, temp, 0);
 	reqctx->skb = skb;
-	reqctx->op = op_type;
 
 	return skb;
 dstmap_fail:
 	kfree_skb(skb);
 err:
-	chcr_aead_dma_unmap(&ULD_CTX(a_ctx(tfm))->lldi.pdev->dev, req, op_type);
+	chcr_aead_common_exit(req);
 	return ERR_PTR(error);
 }
 
 static struct sk_buff *create_gcm_wr(struct aead_request *req,
 				     unsigned short qid,
-				     int size,
-				     unsigned short op_type)
+				     int size)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
@@ -2966,13 +2930,13 @@
 	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106)
 		assoclen = req->assoclen - 8;
 
-	reqctx->b0_dma = 0;
-	error = chcr_aead_common_init(req, op_type);
+	reqctx->b0_len = 0;
+	error = chcr_aead_common_init(req);
 	if (error)
 		return ERR_PTR(error);
 	dnents = sg_nents_xlen(req->dst, assoclen, CHCR_DST_SG_SIZE, 0);
 	dnents += sg_nents_xlen(req->dst, req->cryptlen +
-				(op_type ? -authsize : authsize),
+				(reqctx->op ? -authsize : authsize),
 				CHCR_DST_SG_SIZE, req->assoclen);
 	dnents += MIN_GCM_SG; // For IV
 	dst_size = get_space_for_phys_dsgl(dnents);
@@ -2986,11 +2950,11 @@
 	transhdr_len += temp;
 	transhdr_len = roundup(transhdr_len, 16);
 	if (chcr_aead_need_fallback(req, dnents, T6_MAX_AAD_SIZE,
-			    transhdr_len, op_type)) {
+			    transhdr_len, reqctx->op)) {
+
 		atomic_inc(&adap->chcr_stats.fallback);
-		chcr_aead_dma_unmap(&ULD_CTX(a_ctx(tfm))->lldi.pdev->dev, req,
-				    op_type);
-		return ERR_PTR(chcr_aead_fallback(req, op_type));
+		chcr_aead_common_exit(req);
+		return ERR_PTR(chcr_aead_fallback(req, reqctx->op));
 	}
 	skb = alloc_skb(SGE_MAX_WR_LEN, flags);
 	if (!skb) {
@@ -3001,7 +2965,7 @@
 	chcr_req = __skb_put_zero(skb, transhdr_len);
 
 	//Offset of tag from end
-	temp = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize;
+	temp = (reqctx->op == CHCR_ENCRYPT_OP) ? 0 : authsize;
 	chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(
 					a_ctx(tfm)->dev->rx_channel_id, 2,
 					(assoclen + 1));
@@ -3014,7 +2978,7 @@
 			FILL_SEC_CPL_AUTHINSERT(0, assoclen + IV + 1,
 						temp, temp);
 	chcr_req->sec_cpl.seqno_numivs =
-			FILL_SEC_CPL_SCMD0_SEQNO(op_type, (op_type ==
+			FILL_SEC_CPL_SCMD0_SEQNO(reqctx->op, (reqctx->op ==
 					CHCR_ENCRYPT_OP) ? 1 : 0,
 					CHCR_SCMD_CIPHER_MODE_AES_GCM,
 					CHCR_SCMD_AUTH_MODE_GHASH,
@@ -3040,19 +3004,18 @@
 	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
 	ulptx = (struct ulptx_sgl *)((u8 *)(phys_cpl + 1) + dst_size);
 
-	chcr_add_aead_dst_ent(req, phys_cpl, assoclen, op_type, qid);
-	chcr_add_aead_src_ent(req, ulptx, assoclen, op_type);
+	chcr_add_aead_dst_ent(req, phys_cpl, assoclen, qid);
+	chcr_add_aead_src_ent(req, ulptx, assoclen);
 	atomic_inc(&adap->chcr_stats.aead_rqst);
 	temp = sizeof(struct cpl_rx_phys_dsgl) + dst_size +
 		kctx_len + (reqctx->imm ? (assoclen + IV + req->cryptlen) : 0);
 	create_wreq(a_ctx(tfm), chcr_req, &req->base, reqctx->imm, size,
 		    transhdr_len, temp, reqctx->verify);
 	reqctx->skb = skb;
-	reqctx->op = op_type;
 	return skb;
 
 err:
-	chcr_aead_dma_unmap(&ULD_CTX(a_ctx(tfm))->lldi.pdev->dev, req, op_type);
+	chcr_aead_common_exit(req);
 	return ERR_PTR(error);
 }
 
@@ -3461,6 +3424,7 @@
 	}
 	{
 		SHASH_DESC_ON_STACK(shash, base_hash);
+
 		shash->tfm = base_hash;
 		shash->flags = crypto_shash_get_flags(base_hash);
 		bs = crypto_shash_blocksize(base_hash);
@@ -3585,13 +3549,13 @@
 }
 
 static int chcr_aead_op(struct aead_request *req,
-			unsigned short op_type,
 			int size,
 			create_wr_t create_wr_fn)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct uld_ctx *u_ctx;
 	struct sk_buff *skb;
+	int isfull = 0;
 
 	if (!a_ctx(tfm)->dev) {
 		pr_err("chcr : %s : No crypto device.\n", __func__);
@@ -3600,13 +3564,13 @@
 	u_ctx = ULD_CTX(a_ctx(tfm));
 	if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
 				   a_ctx(tfm)->tx_qidx)) {
+		isfull = 1;
 		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
-			return -EBUSY;
+			return -ENOSPC;
 	}
 
 	/* Form a WR from req */
-	skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[a_ctx(tfm)->rx_qidx], size,
-			   op_type);
+	skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[a_ctx(tfm)->rx_qidx], size);
 
 	if (IS_ERR(skb) || !skb)
 		return PTR_ERR(skb);
@@ -3614,7 +3578,7 @@
 	skb->dev = u_ctx->lldi.ports[0];
 	set_wr_txq(skb, CPL_PRIORITY_DATA, a_ctx(tfm)->tx_qidx);
 	chcr_send_wr(skb);
-	return -EINPROGRESS;
+	return isfull ? -EBUSY : -EINPROGRESS;
 }
 
 static int chcr_aead_encrypt(struct aead_request *req)
@@ -3623,21 +3587,19 @@
 	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
 
 	reqctx->verify = VERIFY_HW;
+	reqctx->op = CHCR_ENCRYPT_OP;
 
 	switch (get_aead_subtype(tfm)) {
 	case CRYPTO_ALG_SUB_TYPE_CTR_SHA:
 	case CRYPTO_ALG_SUB_TYPE_CBC_SHA:
 	case CRYPTO_ALG_SUB_TYPE_CBC_NULL:
 	case CRYPTO_ALG_SUB_TYPE_CTR_NULL:
-		return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0,
-				    create_authenc_wr);
+		return chcr_aead_op(req, 0, create_authenc_wr);
 	case CRYPTO_ALG_SUB_TYPE_AEAD_CCM:
 	case CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309:
-		return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0,
-				    create_aead_ccm_wr);
+		return chcr_aead_op(req, 0, create_aead_ccm_wr);
 	default:
-		return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0,
-				    create_gcm_wr);
+		return chcr_aead_op(req, 0, create_gcm_wr);
 	}
 }
 
@@ -3655,21 +3617,18 @@
 		size = 0;
 		reqctx->verify = VERIFY_HW;
 	}
-
+	reqctx->op = CHCR_DECRYPT_OP;
 	switch (get_aead_subtype(tfm)) {
 	case CRYPTO_ALG_SUB_TYPE_CBC_SHA:
 	case CRYPTO_ALG_SUB_TYPE_CTR_SHA:
 	case CRYPTO_ALG_SUB_TYPE_CBC_NULL:
 	case CRYPTO_ALG_SUB_TYPE_CTR_NULL:
-		return chcr_aead_op(req, CHCR_DECRYPT_OP, size,
-				    create_authenc_wr);
+		return chcr_aead_op(req, size, create_authenc_wr);
 	case CRYPTO_ALG_SUB_TYPE_AEAD_CCM:
 	case CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309:
-		return chcr_aead_op(req, CHCR_DECRYPT_OP, size,
-				    create_aead_ccm_wr);
+		return chcr_aead_op(req, size, create_aead_ccm_wr);
 	default:
-		return chcr_aead_op(req, CHCR_DECRYPT_OP, size,
-				    create_gcm_wr);
+		return chcr_aead_op(req, size, create_gcm_wr);
 	}
 }
 
diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h
index dba3dff..1871500 100644
--- a/drivers/crypto/chelsio/chcr_algo.h
+++ b/drivers/crypto/chelsio/chcr_algo.h
@@ -146,7 +146,7 @@
 	 kctx_len)
 #define CIPHER_TRANSHDR_SIZE(kctx_len, sge_pairs) \
 	(TRANSHDR_SIZE((kctx_len)) + (sge_pairs) +\
-	 sizeof(struct cpl_rx_phys_dsgl))
+	 sizeof(struct cpl_rx_phys_dsgl) + AES_BLOCK_SIZE)
 #define HASH_TRANSHDR_SIZE(kctx_len)\
 	(TRANSHDR_SIZE(kctx_len) + DUMMY_BYTES)
 
@@ -259,7 +259,6 @@
 					ULP_TX_SC_MORE_V((immdatalen)))
 #define MAX_NK 8
 #define MAX_DSGL_ENT			32
-#define MIN_CIPHER_SG			1 /* IV */
 #define MIN_AUTH_SG			1 /* IV */
 #define MIN_GCM_SG			1 /* IV */
 #define MIN_DIGEST_SG			1 /*Partial Buffer*/
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index 1a20424..de3a9c0 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -56,7 +56,7 @@
 #define MAX_SALT                4
 #define CIP_WR_MIN_LEN (sizeof(struct chcr_wr) + \
 		    sizeof(struct cpl_rx_phys_dsgl) + \
-		    sizeof(struct ulptx_sgl))
+		    sizeof(struct ulptx_sgl) + 16) //IV
 
 #define HASH_WR_MIN_LEN (sizeof(struct chcr_wr) + \
 			DUMMY_BYTES + \
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index c8e8972..54835cb 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -190,8 +190,8 @@
 	short int dst_nents;
 	u16 imm;
 	u16 verify;
-	u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
-	unsigned char scratch_pad[MAX_SCRATCH_PAD_SIZE];
+	u8 iv[CHCR_MAX_CRYPTO_IV_LEN + MAX_SCRATCH_PAD_SIZE];
+	u8 *scratch_pad;
 };
 
 struct ulptx_walk {
@@ -295,7 +295,6 @@
 	unsigned int src_ofst;
 	unsigned int dst_ofst;
 	unsigned int op;
-	dma_addr_t iv_dma;
 	u16 imm;
 	u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
 };
@@ -312,8 +311,7 @@
 
 typedef struct sk_buff *(*create_wr_t)(struct aead_request *req,
 				       unsigned short qid,
-				       int size,
-				       unsigned short op_type);
+				       int size);
 
 void chcr_verify_tag(struct aead_request *req, u8 *input, int *err);
 int chcr_aead_dma_map(struct device *dev, struct aead_request *req,
@@ -322,12 +320,12 @@
 			 unsigned short op_type);
 void chcr_add_aead_dst_ent(struct aead_request *req,
 			   struct cpl_rx_phys_dsgl *phys_cpl,
-			   unsigned int assoclen, unsigned short op_type,
+			   unsigned int assoclen,
 			   unsigned short qid);
 void chcr_add_aead_src_ent(struct aead_request *req, struct ulptx_sgl *ulptx,
-			   unsigned int assoclen, unsigned short op_type);
+			   unsigned int assoclen);
 void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
-			     struct ulptx_sgl *ulptx,
+			     void *ulptx,
 			     struct  cipher_wr_param *wrparam);
 int chcr_cipher_dma_map(struct device *dev, struct ablkcipher_request *req);
 void chcr_cipher_dma_unmap(struct device *dev, struct ablkcipher_request *req);
@@ -340,4 +338,5 @@
 			   struct hash_wr_param *param);
 int chcr_hash_dma_map(struct device *dev, struct ahash_request *req);
 void chcr_hash_dma_unmap(struct device *dev, struct ahash_request *req);
+void chcr_aead_common_exit(struct aead_request *req);
 #endif /* __CHCR_CRYPTO_H__ */
diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c
index 8e0aa3f..461b97e 100644
--- a/drivers/crypto/chelsio/chcr_ipsec.c
+++ b/drivers/crypto/chelsio/chcr_ipsec.c
@@ -346,18 +346,23 @@
 				struct net_device *dev,
 				void *pos)
 {
+	struct cpl_tx_pkt_core *cpl;
+	struct sge_eth_txq *q;
 	struct adapter *adap;
 	struct port_info *pi;
-	struct sge_eth_txq *q;
-	struct cpl_tx_pkt_core *cpl;
-	u64 cntrl = 0;
 	u32 ctrl0, qidx;
+	u64 cntrl = 0;
+	int left;
 
 	pi = netdev_priv(dev);
 	adap = pi->adapter;
 	qidx = skb->queue_mapping;
 	q = &adap->sge.ethtxq[qidx + pi->first_qset];
 
+	left = (void *)q->q.stat - pos;
+	if (!left)
+		pos = q->q.desc;
+
 	cpl = (struct cpl_tx_pkt_core *)pos;
 
 	cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
@@ -382,18 +387,17 @@
 				void *pos,
 				struct ipsec_sa_entry *sa_entry)
 {
-	struct adapter *adap;
-	struct port_info *pi;
-	struct sge_eth_txq *q;
-	unsigned int len, qidx;
 	struct _key_ctx *key_ctx;
 	int left, eoq, key_len;
+	struct sge_eth_txq *q;
+	struct adapter *adap;
+	struct port_info *pi;
+	unsigned int qidx;
 
 	pi = netdev_priv(dev);
 	adap = pi->adapter;
 	qidx = skb->queue_mapping;
 	q = &adap->sge.ethtxq[qidx + pi->first_qset];
-	len = sa_entry->enckey_len + sizeof(struct cpl_tx_pkt_core);
 	key_len = sa_entry->kctx_len;
 
 	/* end of queue, reset pos to start of queue */
@@ -411,19 +415,14 @@
 	pos += sizeof(struct _key_ctx);
 	left -= sizeof(struct _key_ctx);
 
-	if (likely(len <= left)) {
+	if (likely(key_len <= left)) {
 		memcpy(key_ctx->key, sa_entry->key, key_len);
 		pos += key_len;
 	} else {
-		if (key_len <= left) {
-			memcpy(pos, sa_entry->key, key_len);
-			pos += key_len;
-		} else {
-			memcpy(pos, sa_entry->key, left);
-			memcpy(q->q.desc, sa_entry->key + left,
-			       key_len - left);
-			pos = (u8 *)q->q.desc + (key_len - left);
-		}
+		memcpy(pos, sa_entry->key, left);
+		memcpy(q->q.desc, sa_entry->key + left,
+		       key_len - left);
+		pos = (u8 *)q->q.desc + (key_len - left);
 	}
 	/* Copy CPL TX PKT XT */
 	pos = copy_cpltx_pktxt(skb, dev, pos);
diff --git a/drivers/crypto/chelsio/chtls/chtls.h b/drivers/crypto/chelsio/chtls/chtls.h
index 778c194..a53a0e6 100644
--- a/drivers/crypto/chelsio/chtls/chtls.h
+++ b/drivers/crypto/chelsio/chtls/chtls.h
@@ -67,11 +67,6 @@
 	CPL_RET_UNKNOWN_TID = 4    /* unexpected unknown TID */
 };
 
-#define TLS_RCV_ST_READ_HEADER		0xF0
-#define TLS_RCV_ST_READ_BODY		0xF1
-#define TLS_RCV_ST_READ_DONE		0xF2
-#define TLS_RCV_ST_READ_NB		0xF3
-
 #define LISTEN_INFO_HASH_SIZE 32
 #define RSPQ_HASH_BITS 5
 struct listen_info {
@@ -279,6 +274,7 @@
 #define TLSRX_HDR_PKT_MAC_ERROR_F        TLSRX_HDR_PKT_MAC_ERROR_V(1U)
 
 #define TLSRX_HDR_PKT_ERROR_M           0x1F
+#define CONTENT_TYPE_ERROR		0x7F
 
 struct ulp_mem_rw {
 	__be32 cmd;
@@ -348,8 +344,8 @@
 	ULPCB_FLAG_HOLD      = 1 << 3,	/* skb not ready for Tx yet */
 	ULPCB_FLAG_COMPL     = 1 << 4,	/* request WR completion */
 	ULPCB_FLAG_URG       = 1 << 5,	/* urgent data */
-	ULPCB_FLAG_TLS_ND    = 1 << 6, /* payload of zero length */
-	ULPCB_FLAG_NO_HDR    = 1 << 7, /* not a ofld wr */
+	ULPCB_FLAG_TLS_HDR   = 1 << 6,  /* payload with tls hdr */
+	ULPCB_FLAG_NO_HDR    = 1 << 7,  /* not a ofld wr */
 };
 
 /* The ULP mode/submode of an skbuff */
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
index 82a473a..2bb6f03 100644
--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
@@ -1537,6 +1537,10 @@
 	struct sock *sk;
 
 	sk = lookup_tid(cdev->tids, hwtid);
+	if (unlikely(!sk)) {
+		pr_err("can't find conn. for hwtid %u.\n", hwtid);
+		return -EINVAL;
+	}
 	skb_dst_set(skb, NULL);
 	process_cpl_msg(chtls_recv_data, sk, skb);
 	return 0;
@@ -1585,6 +1589,10 @@
 	struct sock *sk;
 
 	sk = lookup_tid(cdev->tids, hwtid);
+	if (unlikely(!sk)) {
+		pr_err("can't find conn. for hwtid %u.\n", hwtid);
+		return -EINVAL;
+	}
 	skb_dst_set(skb, NULL);
 	process_cpl_msg(chtls_recv_pdu, sk, skb);
 	return 0;
@@ -1600,12 +1608,14 @@
 
 static void chtls_rx_hdr(struct sock *sk, struct sk_buff *skb)
 {
-	struct cpl_rx_tls_cmp *cmp_cpl = cplhdr(skb);
+	struct tlsrx_cmp_hdr *tls_hdr_pkt;
+	struct cpl_rx_tls_cmp *cmp_cpl;
 	struct sk_buff *skb_rec;
 	struct chtls_sock *csk;
 	struct chtls_hws *tlsk;
 	struct tcp_sock *tp;
 
+	cmp_cpl = cplhdr(skb);
 	csk = rcu_dereference_sk_user_data(sk);
 	tlsk = &csk->tlshws;
 	tp = tcp_sk(sk);
@@ -1615,16 +1625,18 @@
 
 	skb_reset_transport_header(skb);
 	__skb_pull(skb, sizeof(*cmp_cpl));
+	tls_hdr_pkt = (struct tlsrx_cmp_hdr *)skb->data;
+	if (tls_hdr_pkt->res_to_mac_error & TLSRX_HDR_PKT_ERROR_M)
+		tls_hdr_pkt->type = CONTENT_TYPE_ERROR;
 	if (!skb->data_len)
-		__skb_trim(skb, CPL_RX_TLS_CMP_LENGTH_G
-				(ntohl(cmp_cpl->pdulength_length)));
+		__skb_trim(skb, TLS_HEADER_LENGTH);
 
 	tp->rcv_nxt +=
 		CPL_RX_TLS_CMP_PDULENGTH_G(ntohl(cmp_cpl->pdulength_length));
 
+	ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_TLS_HDR;
 	skb_rec = __skb_dequeue(&tlsk->sk_recv_queue);
 	if (!skb_rec) {
-		ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_TLS_ND;
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
 	} else {
 		chtls_set_hdrlen(skb, tlsk->pldlen);
@@ -1646,6 +1658,10 @@
 	struct sock *sk;
 
 	sk = lookup_tid(cdev->tids, hwtid);
+	if (unlikely(!sk)) {
+		pr_err("can't find conn. for hwtid %u.\n", hwtid);
+		return -EINVAL;
+	}
 	skb_dst_set(skb, NULL);
 	process_cpl_msg(chtls_rx_hdr, sk, skb);
 
@@ -2105,6 +2121,10 @@
 	struct sock *sk;
 
 	sk = lookup_tid(cdev->tids, hwtid);
+	if (unlikely(!sk)) {
+		pr_err("can't find conn. for hwtid %u.\n", hwtid);
+		return -EINVAL;
+	}
 	process_cpl_msg(chtls_rx_ack, sk, skb);
 
 	return 0;
diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
index fdd9218..51fc682 100644
--- a/drivers/crypto/chelsio/chtls/chtls_io.c
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -1533,31 +1533,13 @@
 				}
 			}
 		}
-		if (hws->rstate == TLS_RCV_ST_READ_BODY) {
-			if (skb_copy_datagram_msg(skb, offset,
-						  msg, avail)) {
-				if (!copied) {
-					copied = -EFAULT;
-					break;
-				}
-			}
-		} else {
-			struct tlsrx_cmp_hdr *tls_hdr_pkt =
-				(struct tlsrx_cmp_hdr *)skb->data;
-
-			if ((tls_hdr_pkt->res_to_mac_error &
-			    TLSRX_HDR_PKT_ERROR_M))
-				tls_hdr_pkt->type = 0x7F;
-
-			/* CMP pld len is for recv seq */
-			hws->rcvpld = skb->hdr_len;
-			if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
-				if (!copied) {
-					copied = -EFAULT;
-					break;
-				}
+		if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
+			if (!copied) {
+				copied = -EFAULT;
+				break;
 			}
 		}
+
 		copied += avail;
 		len -= avail;
 		hws->copied_seq += avail;
@@ -1565,32 +1547,20 @@
 		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
 			tp->urg_data = 0;
 
-		if (hws->rstate == TLS_RCV_ST_READ_BODY &&
-		    (avail + offset) >= skb->len) {
+		if ((avail + offset) >= skb->len) {
 			if (likely(skb))
 				chtls_free_skb(sk, skb);
 			buffers_freed++;
-			hws->rstate = TLS_RCV_ST_READ_HEADER;
-			atomic_inc(&adap->chcr_stats.tls_pdu_rx);
-			tp->copied_seq += hws->rcvpld;
+			if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
+				tp->copied_seq += skb->len;
+				hws->rcvpld = skb->hdr_len;
+			} else {
+				tp->copied_seq += hws->rcvpld;
+			}
 			hws->copied_seq = 0;
 			if (copied >= target &&
 			    !skb_peek(&sk->sk_receive_queue))
 				break;
-		} else {
-			if (likely(skb)) {
-				if (ULP_SKB_CB(skb)->flags &
-				    ULPCB_FLAG_TLS_ND)
-					hws->rstate =
-						TLS_RCV_ST_READ_HEADER;
-				else
-					hws->rstate =
-						TLS_RCV_ST_READ_BODY;
-				chtls_free_skb(sk, skb);
-			}
-			buffers_freed++;
-			tp->copied_seq += avail;
-			hws->copied_seq = 0;
 		}
 	} while (len > 0);
 
diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
index 1a6cbb2..9b07f91 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -216,7 +216,6 @@
 	cdev->lldi = lldi;
 	cdev->pdev = lldi->pdev;
 	cdev->tids = lldi->tids;
-	cdev->ports = (struct net_device **)(cdev + 1);
 	cdev->ports = lldi->ports;
 	cdev->mtus = lldi->mtus;
 	cdev->tids = lldi->tids;
@@ -442,7 +441,7 @@
 static int do_chtls_getsockopt(struct sock *sk, char __user *optval,
 			       int __user *optlen)
 {
-	struct tls_crypto_info crypto_info;
+	struct tls_crypto_info crypto_info = { 0 };
 
 	crypto_info.version = TLS_1_2_VERSION;
 	if (copy_to_user(optval, &crypto_info, sizeof(struct tls_crypto_info)))
@@ -492,9 +491,13 @@
 
 	switch (tmp_crypto_info.cipher_type) {
 	case TLS_CIPHER_AES_GCM_128: {
-		rc = copy_from_user(crypto_info, optval,
-				    sizeof(struct
-					   tls12_crypto_info_aes_gcm_128));
+		/* Obtain version and type from previous copy */
+		crypto_info[0] = tmp_crypto_info;
+		/* Now copy the following data */
+		rc = copy_from_user((char *)crypto_info + sizeof(*crypto_info),
+				optval + sizeof(*crypto_info),
+				sizeof(struct tls12_crypto_info_aes_gcm_128)
+				- sizeof(*crypto_info));
 
 		if (rc) {
 			rc = -EFAULT;
diff --git a/drivers/crypto/exynos-rng.c b/drivers/crypto/exynos-rng.c
index 86f5f45..2cfabb9 100644
--- a/drivers/crypto/exynos-rng.c
+++ b/drivers/crypto/exynos-rng.c
@@ -319,8 +319,7 @@
 
 static int __maybe_unused exynos_rng_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct exynos_rng_dev *rng = platform_get_drvdata(pdev);
+	struct exynos_rng_dev *rng = dev_get_drvdata(dev);
 	int ret;
 
 	/* If we were never seeded then after resume it will be the same */
@@ -350,8 +349,7 @@
 
 static int __maybe_unused exynos_rng_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct exynos_rng_dev *rng = platform_get_drvdata(pdev);
+	struct exynos_rng_dev *rng = dev_get_drvdata(dev);
 	int ret;
 
 	/* Never seeded so nothing to do */
diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index b6be620..4e86f86 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -20,6 +20,7 @@
 #include <linux/platform_device.h>
 #include <linux/workqueue.h>
 
+#include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 
@@ -352,6 +353,7 @@
 	/* H/W capabilities selection */
 	val = EIP197_FUNCTION_RSVD;
 	val |= EIP197_PROTOCOL_ENCRYPT_ONLY | EIP197_PROTOCOL_HASH_ONLY;
+	val |= EIP197_PROTOCOL_ENCRYPT_HASH | EIP197_PROTOCOL_HASH_DECRYPT;
 	val |= EIP197_ALG_AES_ECB | EIP197_ALG_AES_CBC;
 	val |= EIP197_ALG_SHA1 | EIP197_ALG_HMAC_SHA1;
 	val |= EIP197_ALG_SHA2 | EIP197_ALG_HMAC_SHA2;
@@ -537,6 +539,27 @@
 	       EIP197_HIA_CDR(priv, ring) + EIP197_HIA_xDR_PREP_COUNT);
 }
 
+inline int safexcel_rdesc_check_errors(struct safexcel_crypto_priv *priv,
+				       struct safexcel_result_desc *rdesc)
+{
+	if (likely(!rdesc->result_data.error_code))
+		return 0;
+
+	if (rdesc->result_data.error_code & 0x407f) {
+		/* Fatal error (bits 0-7, 14) */
+		dev_err(priv->dev,
+			"cipher: result: result descriptor error (%d)\n",
+			rdesc->result_data.error_code);
+		return -EIO;
+	} else if (rdesc->result_data.error_code == BIT(9)) {
+		/* Authentication failed */
+		return -EBADMSG;
+	}
+
+	/* All other non-fatal errors */
+	return -EINVAL;
+}
+
 void safexcel_complete(struct safexcel_crypto_priv *priv, int ring)
 {
 	struct safexcel_command_desc *cdesc;
@@ -770,6 +793,9 @@
 	&safexcel_alg_hmac_sha1,
 	&safexcel_alg_hmac_sha224,
 	&safexcel_alg_hmac_sha256,
+	&safexcel_alg_authenc_hmac_sha1_cbc_aes,
+	&safexcel_alg_authenc_hmac_sha224_cbc_aes,
+	&safexcel_alg_authenc_hmac_sha256_cbc_aes,
 };
 
 static int safexcel_register_algorithms(struct safexcel_crypto_priv *priv)
@@ -781,6 +807,8 @@
 
 		if (safexcel_algs[i]->type == SAFEXCEL_ALG_TYPE_SKCIPHER)
 			ret = crypto_register_skcipher(&safexcel_algs[i]->alg.skcipher);
+		else if (safexcel_algs[i]->type == SAFEXCEL_ALG_TYPE_AEAD)
+			ret = crypto_register_aead(&safexcel_algs[i]->alg.aead);
 		else
 			ret = crypto_register_ahash(&safexcel_algs[i]->alg.ahash);
 
@@ -794,6 +822,8 @@
 	for (j = 0; j < i; j++) {
 		if (safexcel_algs[j]->type == SAFEXCEL_ALG_TYPE_SKCIPHER)
 			crypto_unregister_skcipher(&safexcel_algs[j]->alg.skcipher);
+		else if (safexcel_algs[j]->type == SAFEXCEL_ALG_TYPE_AEAD)
+			crypto_unregister_aead(&safexcel_algs[j]->alg.aead);
 		else
 			crypto_unregister_ahash(&safexcel_algs[j]->alg.ahash);
 	}
@@ -808,6 +838,8 @@
 	for (i = 0; i < ARRAY_SIZE(safexcel_algs); i++) {
 		if (safexcel_algs[i]->type == SAFEXCEL_ALG_TYPE_SKCIPHER)
 			crypto_unregister_skcipher(&safexcel_algs[i]->alg.skcipher);
+		else if (safexcel_algs[i]->type == SAFEXCEL_ALG_TYPE_AEAD)
+			crypto_unregister_aead(&safexcel_algs[i]->alg.aead);
 		else
 			crypto_unregister_ahash(&safexcel_algs[i]->alg.ahash);
 	}
diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
index b470a84..8b3ee9b 100644
--- a/drivers/crypto/inside-secure/safexcel.h
+++ b/drivers/crypto/inside-secure/safexcel.h
@@ -11,8 +11,10 @@
 #ifndef __SAFEXCEL_H__
 #define __SAFEXCEL_H__
 
+#include <crypto/aead.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/hash.h>
+#include <crypto/sha.h>
 #include <crypto/skcipher.h>
 
 #define EIP197_HIA_VERSION_LE			0xca35
@@ -20,7 +22,7 @@
 
 /* Static configuration */
 #define EIP197_DEFAULT_RING_SIZE		400
-#define EIP197_MAX_TOKENS			5
+#define EIP197_MAX_TOKENS			8
 #define EIP197_MAX_RINGS			4
 #define EIP197_FETCH_COUNT			1
 #define EIP197_MAX_BATCH_SZ			64
@@ -28,6 +30,17 @@
 #define EIP197_GFP_FLAGS(base)	((base).flags & CRYPTO_TFM_REQ_MAY_SLEEP ? \
 				 GFP_KERNEL : GFP_ATOMIC)
 
+/* Custom on-stack requests (for invalidation) */
+#define EIP197_SKCIPHER_REQ_SIZE	sizeof(struct skcipher_request) + \
+					sizeof(struct safexcel_cipher_req)
+#define EIP197_AHASH_REQ_SIZE		sizeof(struct ahash_request) + \
+					sizeof(struct safexcel_ahash_req)
+#define EIP197_AEAD_REQ_SIZE		sizeof(struct aead_request) + \
+					sizeof(struct safexcel_cipher_req)
+#define EIP197_REQUEST_ON_STACK(name, type, size) \
+	char __##name##_desc[size] CRYPTO_MINALIGN_ATTR; \
+	struct type##_request *name = (void *)__##name##_desc
+
 /* Register base offsets */
 #define EIP197_HIA_AIC(priv)		((priv)->base + (priv)->offsets.hia_aic)
 #define EIP197_HIA_AIC_G(priv)		((priv)->base + (priv)->offsets.hia_aic_g)
@@ -274,7 +287,7 @@
 	u32 control0;
 	u32 control1;
 
-	__le32 data[12];
+	__le32 data[24];
 } __packed;
 
 /* control0 */
@@ -286,8 +299,8 @@
 #define CONTEXT_CONTROL_TYPE_CRYPTO_IN		0x5
 #define CONTEXT_CONTROL_TYPE_ENCRYPT_HASH_OUT	0x6
 #define CONTEXT_CONTROL_TYPE_DECRYPT_HASH_IN	0x7
-#define CONTEXT_CONTROL_TYPE_HASH_ENCRYPT_OUT	0x14
-#define CONTEXT_CONTROL_TYPE_HASH_DECRYPT_OUT	0x15
+#define CONTEXT_CONTROL_TYPE_HASH_ENCRYPT_OUT	0xe
+#define CONTEXT_CONTROL_TYPE_HASH_DECRYPT_IN	0xf
 #define CONTEXT_CONTROL_RESTART_HASH		BIT(4)
 #define CONTEXT_CONTROL_NO_FINISH_HASH		BIT(5)
 #define CONTEXT_CONTROL_SIZE(n)			((n) << 8)
@@ -391,11 +404,15 @@
 	u8 opcode:4;
 } __packed;
 
+#define EIP197_TOKEN_HASH_RESULT_VERIFY		BIT(16)
+
 #define EIP197_TOKEN_STAT_LAST_HASH		BIT(0)
 #define EIP197_TOKEN_STAT_LAST_PACKET		BIT(1)
 #define EIP197_TOKEN_OPCODE_DIRECTION		0x0
 #define EIP197_TOKEN_OPCODE_INSERT		0x2
 #define EIP197_TOKEN_OPCODE_NOOP		EIP197_TOKEN_OPCODE_INSERT
+#define EIP197_TOKEN_OPCODE_RETRIEVE		0x4
+#define EIP197_TOKEN_OPCODE_VERIFY		0xd
 #define EIP197_TOKEN_OPCODE_BYPASS		GENMASK(3, 0)
 
 static inline void eip197_noop_token(struct safexcel_token *token)
@@ -479,6 +496,7 @@
 
 enum safexcel_alg_type {
 	SAFEXCEL_ALG_TYPE_SKCIPHER,
+	SAFEXCEL_ALG_TYPE_AEAD,
 	SAFEXCEL_ALG_TYPE_AHASH,
 };
 
@@ -581,6 +599,16 @@
 	bool exit_inv;
 };
 
+struct safexcel_ahash_export_state {
+	u64 len;
+	u64 processed;
+
+	u32 digest;
+
+	u32 state[SHA256_DIGEST_SIZE / sizeof(u32)];
+	u8 cache[SHA256_BLOCK_SIZE];
+};
+
 /*
  * Template structure to describe the algorithms in order to register them.
  * It also has the purpose to contain our private structure and is actually
@@ -591,6 +619,7 @@
 	enum safexcel_alg_type type;
 	union {
 		struct skcipher_alg skcipher;
+		struct aead_alg aead;
 		struct ahash_alg ahash;
 	} alg;
 };
@@ -601,6 +630,8 @@
 };
 
 void safexcel_dequeue(struct safexcel_crypto_priv *priv, int ring);
+int safexcel_rdesc_check_errors(struct safexcel_crypto_priv *priv,
+				struct safexcel_result_desc *rdesc);
 void safexcel_complete(struct safexcel_crypto_priv *priv, int ring);
 int safexcel_invalidate_cache(struct crypto_async_request *async,
 			      struct safexcel_crypto_priv *priv,
@@ -625,6 +656,8 @@
 						bool first, bool last,
 						dma_addr_t data, u32 len);
 void safexcel_inv_complete(struct crypto_async_request *req, int error);
+int safexcel_hmac_setkey(const char *alg, const u8 *key, unsigned int keylen,
+			 void *istate, void *ostate);
 
 /* available algorithms */
 extern struct safexcel_alg_template safexcel_alg_ecb_aes;
@@ -635,5 +668,8 @@
 extern struct safexcel_alg_template safexcel_alg_hmac_sha1;
 extern struct safexcel_alg_template safexcel_alg_hmac_sha224;
 extern struct safexcel_alg_template safexcel_alg_hmac_sha256;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_aes;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_cbc_aes;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_cbc_aes;
 
 #endif
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index bafb605..6bb60fd 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -12,8 +12,12 @@
 #include <linux/dma-mapping.h>
 #include <linux/dmapool.h>
 
+#include <crypto/aead.h>
 #include <crypto/aes.h>
+#include <crypto/authenc.h>
+#include <crypto/sha.h>
 #include <crypto/skcipher.h>
+#include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
 
 #include "safexcel.h"
@@ -28,9 +32,16 @@
 	struct safexcel_crypto_priv *priv;
 
 	u32 mode;
+	bool aead;
 
 	__le32 key[8];
 	unsigned int key_len;
+
+	/* All the below is AEAD specific */
+	u32 alg;
+	u32 state_sz;
+	u32 ipad[SHA256_DIGEST_SIZE / sizeof(u32)];
+	u32 opad[SHA256_DIGEST_SIZE / sizeof(u32)];
 };
 
 struct safexcel_cipher_req {
@@ -38,18 +49,16 @@
 	bool needs_inv;
 };
 
-static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx,
-				  struct crypto_async_request *async,
-				  struct safexcel_command_desc *cdesc,
-				  u32 length)
+static void safexcel_skcipher_token(struct safexcel_cipher_ctx *ctx, u8 *iv,
+				    struct safexcel_command_desc *cdesc,
+				    u32 length)
 {
-	struct skcipher_request *req = skcipher_request_cast(async);
 	struct safexcel_token *token;
 	unsigned offset = 0;
 
 	if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) {
 		offset = AES_BLOCK_SIZE / sizeof(u32);
-		memcpy(cdesc->control_data.token, req->iv, AES_BLOCK_SIZE);
+		memcpy(cdesc->control_data.token, iv, AES_BLOCK_SIZE);
 
 		cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
 	}
@@ -65,8 +74,64 @@
 				EIP197_TOKEN_INS_TYPE_OUTPUT;
 }
 
-static int safexcel_aes_setkey(struct crypto_skcipher *ctfm, const u8 *key,
-			       unsigned int len)
+static void safexcel_aead_token(struct safexcel_cipher_ctx *ctx, u8 *iv,
+				struct safexcel_command_desc *cdesc,
+				enum safexcel_cipher_direction direction,
+				u32 cryptlen, u32 assoclen, u32 digestsize)
+{
+	struct safexcel_token *token;
+	unsigned offset = 0;
+
+	if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) {
+		offset = AES_BLOCK_SIZE / sizeof(u32);
+		memcpy(cdesc->control_data.token, iv, AES_BLOCK_SIZE);
+
+		cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
+	}
+
+	token = (struct safexcel_token *)(cdesc->control_data.token + offset);
+
+	if (direction == SAFEXCEL_DECRYPT)
+		cryptlen -= digestsize;
+
+	token[0].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+	token[0].packet_length = assoclen;
+	token[0].instructions = EIP197_TOKEN_INS_TYPE_HASH |
+				EIP197_TOKEN_INS_TYPE_OUTPUT;
+
+	token[1].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+	token[1].packet_length = cryptlen;
+	token[1].stat = EIP197_TOKEN_STAT_LAST_HASH;
+	token[1].instructions = EIP197_TOKEN_INS_LAST |
+				EIP197_TOKEN_INS_TYPE_CRYTO |
+				EIP197_TOKEN_INS_TYPE_HASH |
+				EIP197_TOKEN_INS_TYPE_OUTPUT;
+
+	if (direction == SAFEXCEL_ENCRYPT) {
+		token[2].opcode = EIP197_TOKEN_OPCODE_INSERT;
+		token[2].packet_length = digestsize;
+		token[2].stat = EIP197_TOKEN_STAT_LAST_HASH |
+				EIP197_TOKEN_STAT_LAST_PACKET;
+		token[2].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT |
+					EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
+	} else {
+		token[2].opcode = EIP197_TOKEN_OPCODE_RETRIEVE;
+		token[2].packet_length = digestsize;
+		token[2].stat = EIP197_TOKEN_STAT_LAST_HASH |
+				EIP197_TOKEN_STAT_LAST_PACKET;
+		token[2].instructions = EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
+
+		token[3].opcode = EIP197_TOKEN_OPCODE_VERIFY;
+		token[3].packet_length = digestsize |
+					 EIP197_TOKEN_HASH_RESULT_VERIFY;
+		token[3].stat = EIP197_TOKEN_STAT_LAST_HASH |
+				EIP197_TOKEN_STAT_LAST_PACKET;
+		token[3].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT;
+	}
+}
+
+static int safexcel_skcipher_aes_setkey(struct crypto_skcipher *ctfm,
+					const u8 *key, unsigned int len)
 {
 	struct crypto_tfm *tfm = crypto_skcipher_tfm(ctfm);
 	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -98,41 +163,123 @@
 	return 0;
 }
 
+static int safexcel_aead_aes_setkey(struct crypto_aead *ctfm, const u8 *key,
+				    unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_ahash_export_state istate, ostate;
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct crypto_authenc_keys keys;
+
+	if (crypto_authenc_extractkeys(&keys, key, len) != 0)
+		goto badkey;
+
+	if (keys.enckeylen > sizeof(ctx->key))
+		goto badkey;
+
+	/* Encryption key */
+	if (priv->version == EIP197 && ctx->base.ctxr_dma &&
+	    memcmp(ctx->key, keys.enckey, keys.enckeylen))
+		ctx->base.needs_inv = true;
+
+	/* Auth key */
+	switch (ctx->alg) {
+	case CONTEXT_CONTROL_CRYPTO_ALG_SHA1:
+		if (safexcel_hmac_setkey("safexcel-sha1", keys.authkey,
+					 keys.authkeylen, &istate, &ostate))
+			goto badkey;
+		break;
+	case CONTEXT_CONTROL_CRYPTO_ALG_SHA224:
+		if (safexcel_hmac_setkey("safexcel-sha224", keys.authkey,
+					 keys.authkeylen, &istate, &ostate))
+			goto badkey;
+		break;
+	case CONTEXT_CONTROL_CRYPTO_ALG_SHA256:
+		if (safexcel_hmac_setkey("safexcel-sha256", keys.authkey,
+					 keys.authkeylen, &istate, &ostate))
+			goto badkey;
+		break;
+	default:
+		dev_err(priv->dev, "aead: unsupported hash algorithm\n");
+		goto badkey;
+	}
+
+	crypto_aead_set_flags(ctfm, crypto_aead_get_flags(ctfm) &
+				    CRYPTO_TFM_RES_MASK);
+
+	if (priv->version == EIP197 && ctx->base.ctxr_dma &&
+	    (memcmp(ctx->ipad, istate.state, ctx->state_sz) ||
+	     memcmp(ctx->opad, ostate.state, ctx->state_sz)))
+		ctx->base.needs_inv = true;
+
+	/* Now copy the keys into the context */
+	memcpy(ctx->key, keys.enckey, keys.enckeylen);
+	ctx->key_len = keys.enckeylen;
+
+	memcpy(ctx->ipad, &istate.state, ctx->state_sz);
+	memcpy(ctx->opad, &ostate.state, ctx->state_sz);
+
+	memzero_explicit(&keys, sizeof(keys));
+	return 0;
+
+badkey:
+	crypto_aead_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	memzero_explicit(&keys, sizeof(keys));
+	return -EINVAL;
+}
+
 static int safexcel_context_control(struct safexcel_cipher_ctx *ctx,
 				    struct crypto_async_request *async,
+				    struct safexcel_cipher_req *sreq,
 				    struct safexcel_command_desc *cdesc)
 {
 	struct safexcel_crypto_priv *priv = ctx->priv;
-	struct skcipher_request *req = skcipher_request_cast(async);
-	struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
 	int ctrl_size;
 
-	if (sreq->direction == SAFEXCEL_ENCRYPT)
+	if (ctx->aead) {
+		if (sreq->direction == SAFEXCEL_ENCRYPT)
+			cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_ENCRYPT_HASH_OUT;
+		else
+			cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_HASH_DECRYPT_IN;
+	} else {
 		cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_CRYPTO_OUT;
-	else
-		cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_CRYPTO_IN;
+
+		/* The decryption control type is a combination of the
+		 * encryption type and CONTEXT_CONTROL_TYPE_NULL_IN, for all
+		 * types.
+		 */
+		if (sreq->direction == SAFEXCEL_DECRYPT)
+			cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_NULL_IN;
+	}
 
 	cdesc->control_data.control0 |= CONTEXT_CONTROL_KEY_EN;
 	cdesc->control_data.control1 |= ctx->mode;
 
+	if (ctx->aead)
+		cdesc->control_data.control0 |= CONTEXT_CONTROL_DIGEST_HMAC |
+						ctx->alg;
+
 	switch (ctx->key_len) {
 	case AES_KEYSIZE_128:
 		cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_AES128;
-		ctrl_size = 4;
 		break;
 	case AES_KEYSIZE_192:
 		cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_AES192;
-		ctrl_size = 6;
 		break;
 	case AES_KEYSIZE_256:
 		cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_AES256;
-		ctrl_size = 8;
 		break;
 	default:
 		dev_err(priv->dev, "aes keysize not supported: %u\n",
 			ctx->key_len);
 		return -EINVAL;
 	}
+
+	ctrl_size = ctx->key_len / sizeof(u32);
+	if (ctx->aead)
+		/* Take in account the ipad+opad digests */
+		ctrl_size += ctx->state_sz / sizeof(u32) * 2;
 	cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(ctrl_size);
 
 	return 0;
@@ -140,9 +287,12 @@
 
 static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring,
 				      struct crypto_async_request *async,
+				      struct scatterlist *src,
+				      struct scatterlist *dst,
+				      unsigned int cryptlen,
+				      struct safexcel_cipher_req *sreq,
 				      bool *should_complete, int *ret)
 {
-	struct skcipher_request *req = skcipher_request_cast(async);
 	struct safexcel_result_desc *rdesc;
 	int ndesc = 0;
 
@@ -158,12 +308,8 @@
 			break;
 		}
 
-		if (rdesc->result_data.error_code) {
-			dev_err(priv->dev,
-				"cipher: result: result descriptor error (%d)\n",
-				rdesc->result_data.error_code);
-			*ret = -EIO;
-		}
+		if (likely(!*ret))
+			*ret = safexcel_rdesc_check_errors(priv, rdesc);
 
 		ndesc++;
 	} while (!rdesc->last_seg);
@@ -171,16 +317,16 @@
 	safexcel_complete(priv, ring);
 	spin_unlock_bh(&priv->ring[ring].egress_lock);
 
-	if (req->src == req->dst) {
-		dma_unmap_sg(priv->dev, req->src,
-			     sg_nents_for_len(req->src, req->cryptlen),
+	if (src == dst) {
+		dma_unmap_sg(priv->dev, src,
+			     sg_nents_for_len(src, cryptlen),
 			     DMA_BIDIRECTIONAL);
 	} else {
-		dma_unmap_sg(priv->dev, req->src,
-			     sg_nents_for_len(req->src, req->cryptlen),
+		dma_unmap_sg(priv->dev, src,
+			     sg_nents_for_len(src, cryptlen),
 			     DMA_TO_DEVICE);
-		dma_unmap_sg(priv->dev, req->dst,
-			     sg_nents_for_len(req->dst, req->cryptlen),
+		dma_unmap_sg(priv->dev, dst,
+			     sg_nents_for_len(dst, cryptlen),
 			     DMA_FROM_DEVICE);
 	}
 
@@ -189,39 +335,43 @@
 	return ndesc;
 }
 
-static int safexcel_aes_send(struct crypto_async_request *async,
-			     int ring, struct safexcel_request *request,
-			     int *commands, int *results)
+static int safexcel_aes_send(struct crypto_async_request *base, int ring,
+			     struct safexcel_request *request,
+			     struct safexcel_cipher_req *sreq,
+			     struct scatterlist *src, struct scatterlist *dst,
+			     unsigned int cryptlen, unsigned int assoclen,
+			     unsigned int digestsize, u8 *iv, int *commands,
+			     int *results)
 {
-	struct skcipher_request *req = skcipher_request_cast(async);
-	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(base->tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	struct safexcel_command_desc *cdesc;
 	struct safexcel_result_desc *rdesc;
 	struct scatterlist *sg;
-	int nr_src, nr_dst, n_cdesc = 0, n_rdesc = 0, queued = req->cryptlen;
+	unsigned int totlen = cryptlen + assoclen;
+	int nr_src, nr_dst, n_cdesc = 0, n_rdesc = 0, queued = totlen;
 	int i, ret = 0;
 
-	if (req->src == req->dst) {
-		nr_src = dma_map_sg(priv->dev, req->src,
-				    sg_nents_for_len(req->src, req->cryptlen),
+	if (src == dst) {
+		nr_src = dma_map_sg(priv->dev, src,
+				    sg_nents_for_len(src, totlen),
 				    DMA_BIDIRECTIONAL);
 		nr_dst = nr_src;
 		if (!nr_src)
 			return -EINVAL;
 	} else {
-		nr_src = dma_map_sg(priv->dev, req->src,
-				    sg_nents_for_len(req->src, req->cryptlen),
+		nr_src = dma_map_sg(priv->dev, src,
+				    sg_nents_for_len(src, totlen),
 				    DMA_TO_DEVICE);
 		if (!nr_src)
 			return -EINVAL;
 
-		nr_dst = dma_map_sg(priv->dev, req->dst,
-				    sg_nents_for_len(req->dst, req->cryptlen),
+		nr_dst = dma_map_sg(priv->dev, dst,
+				    sg_nents_for_len(dst, totlen),
 				    DMA_FROM_DEVICE);
 		if (!nr_dst) {
-			dma_unmap_sg(priv->dev, req->src,
-				     sg_nents_for_len(req->src, req->cryptlen),
+			dma_unmap_sg(priv->dev, src,
+				     sg_nents_for_len(src, totlen),
 				     DMA_TO_DEVICE);
 			return -EINVAL;
 		}
@@ -229,10 +379,17 @@
 
 	memcpy(ctx->base.ctxr->data, ctx->key, ctx->key_len);
 
+	if (ctx->aead) {
+		memcpy(ctx->base.ctxr->data + ctx->key_len / sizeof(u32),
+		       ctx->ipad, ctx->state_sz);
+		memcpy(ctx->base.ctxr->data + (ctx->key_len + ctx->state_sz) / sizeof(u32),
+		       ctx->opad, ctx->state_sz);
+	}
+
 	spin_lock_bh(&priv->ring[ring].egress_lock);
 
 	/* command descriptors */
-	for_each_sg(req->src, sg, nr_src, i) {
+	for_each_sg(src, sg, nr_src, i) {
 		int len = sg_dma_len(sg);
 
 		/* Do not overflow the request */
@@ -240,7 +397,7 @@
 			len = queued;
 
 		cdesc = safexcel_add_cdesc(priv, ring, !n_cdesc, !(queued - len),
-					   sg_dma_address(sg), len, req->cryptlen,
+					   sg_dma_address(sg), len, totlen,
 					   ctx->base.ctxr_dma);
 		if (IS_ERR(cdesc)) {
 			/* No space left in the command descriptor ring */
@@ -250,8 +407,14 @@
 		n_cdesc++;
 
 		if (n_cdesc == 1) {
-			safexcel_context_control(ctx, async, cdesc);
-			safexcel_cipher_token(ctx, async, cdesc, req->cryptlen);
+			safexcel_context_control(ctx, base, sreq, cdesc);
+			if (ctx->aead)
+				safexcel_aead_token(ctx, iv, cdesc,
+						    sreq->direction, cryptlen,
+						    assoclen, digestsize);
+			else
+				safexcel_skcipher_token(ctx, iv, cdesc,
+							cryptlen);
 		}
 
 		queued -= len;
@@ -260,7 +423,7 @@
 	}
 
 	/* result descriptors */
-	for_each_sg(req->dst, sg, nr_dst, i) {
+	for_each_sg(dst, sg, nr_dst, i) {
 		bool first = !i, last = (i == nr_dst - 1);
 		u32 len = sg_dma_len(sg);
 
@@ -276,7 +439,7 @@
 
 	spin_unlock_bh(&priv->ring[ring].egress_lock);
 
-	request->req = &req->base;
+	request->req = base;
 
 	*commands = n_cdesc;
 	*results = n_rdesc;
@@ -291,16 +454,16 @@
 
 	spin_unlock_bh(&priv->ring[ring].egress_lock);
 
-	if (req->src == req->dst) {
-		dma_unmap_sg(priv->dev, req->src,
-			     sg_nents_for_len(req->src, req->cryptlen),
+	if (src == dst) {
+		dma_unmap_sg(priv->dev, src,
+			     sg_nents_for_len(src, totlen),
 			     DMA_BIDIRECTIONAL);
 	} else {
-		dma_unmap_sg(priv->dev, req->src,
-			     sg_nents_for_len(req->src, req->cryptlen),
+		dma_unmap_sg(priv->dev, src,
+			     sg_nents_for_len(src, totlen),
 			     DMA_TO_DEVICE);
-		dma_unmap_sg(priv->dev, req->dst,
-			     sg_nents_for_len(req->dst, req->cryptlen),
+		dma_unmap_sg(priv->dev, dst,
+			     sg_nents_for_len(dst, totlen),
 			     DMA_FROM_DEVICE);
 	}
 
@@ -309,11 +472,10 @@
 
 static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
 				      int ring,
-				      struct crypto_async_request *async,
+				      struct crypto_async_request *base,
 				      bool *should_complete, int *ret)
 {
-	struct skcipher_request *req = skcipher_request_cast(async);
-	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(base->tfm);
 	struct safexcel_result_desc *rdesc;
 	int ndesc = 0, enq_ret;
 
@@ -354,7 +516,7 @@
 	ctx->base.ring = ring;
 
 	spin_lock_bh(&priv->ring[ring].queue_lock);
-	enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
+	enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, base);
 	spin_unlock_bh(&priv->ring[ring].queue_lock);
 
 	if (enq_ret != -EINPROGRESS)
@@ -368,9 +530,10 @@
 	return ndesc;
 }
 
-static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
-				  struct crypto_async_request *async,
-				  bool *should_complete, int *ret)
+static int safexcel_skcipher_handle_result(struct safexcel_crypto_priv *priv,
+					   int ring,
+					   struct crypto_async_request *async,
+					   bool *should_complete, int *ret)
 {
 	struct skcipher_request *req = skcipher_request_cast(async);
 	struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
@@ -381,24 +544,48 @@
 		err = safexcel_handle_inv_result(priv, ring, async,
 						 should_complete, ret);
 	} else {
-		err = safexcel_handle_req_result(priv, ring, async,
+		err = safexcel_handle_req_result(priv, ring, async, req->src,
+						 req->dst, req->cryptlen, sreq,
 						 should_complete, ret);
 	}
 
 	return err;
 }
 
-static int safexcel_cipher_send_inv(struct crypto_async_request *async,
+static int safexcel_aead_handle_result(struct safexcel_crypto_priv *priv,
+				       int ring,
+				       struct crypto_async_request *async,
+				       bool *should_complete, int *ret)
+{
+	struct aead_request *req = aead_request_cast(async);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct safexcel_cipher_req *sreq = aead_request_ctx(req);
+	int err;
+
+	if (sreq->needs_inv) {
+		sreq->needs_inv = false;
+		err = safexcel_handle_inv_result(priv, ring, async,
+						 should_complete, ret);
+	} else {
+		err = safexcel_handle_req_result(priv, ring, async, req->src,
+						 req->dst,
+						 req->cryptlen + crypto_aead_authsize(tfm),
+						 sreq, should_complete, ret);
+	}
+
+	return err;
+}
+
+static int safexcel_cipher_send_inv(struct crypto_async_request *base,
 				    int ring, struct safexcel_request *request,
 				    int *commands, int *results)
 {
-	struct skcipher_request *req = skcipher_request_cast(async);
-	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(base->tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	int ret;
 
-	ret = safexcel_invalidate_cache(async, priv,
-					ctx->base.ctxr_dma, ring, request);
+	ret = safexcel_invalidate_cache(base, priv, ctx->base.ctxr_dma, ring,
+					request);
 	if (unlikely(ret))
 		return ret;
 
@@ -408,9 +595,9 @@
 	return 0;
 }
 
-static int safexcel_send(struct crypto_async_request *async,
-			 int ring, struct safexcel_request *request,
-			 int *commands, int *results)
+static int safexcel_skcipher_send(struct crypto_async_request *async, int ring,
+				  struct safexcel_request *request,
+				  int *commands, int *results)
 {
 	struct skcipher_request *req = skcipher_request_cast(async);
 	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
@@ -421,59 +608,108 @@
 	BUG_ON(priv->version == EIP97 && sreq->needs_inv);
 
 	if (sreq->needs_inv)
-		ret = safexcel_cipher_send_inv(async, ring, request,
-					       commands, results);
+		ret = safexcel_cipher_send_inv(async, ring, request, commands,
+					       results);
 	else
-		ret = safexcel_aes_send(async, ring, request,
+		ret = safexcel_aes_send(async, ring, request, sreq, req->src,
+					req->dst, req->cryptlen, 0, 0, req->iv,
 					commands, results);
 	return ret;
 }
 
-static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
+static int safexcel_aead_send(struct crypto_async_request *async, int ring,
+			      struct safexcel_request *request, int *commands,
+			      int *results)
+{
+	struct aead_request *req = aead_request_cast(async);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct safexcel_cipher_req *sreq = aead_request_ctx(req);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ret;
+
+	BUG_ON(priv->version == EIP97 && sreq->needs_inv);
+
+	if (sreq->needs_inv)
+		ret = safexcel_cipher_send_inv(async, ring, request, commands,
+					       results);
+	else
+		ret = safexcel_aes_send(async, ring, request, sreq, req->src,
+					req->dst, req->cryptlen, req->assoclen,
+					crypto_aead_authsize(tfm), req->iv,
+					commands, results);
+	return ret;
+}
+
+static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm,
+				    struct crypto_async_request *base,
+				    struct safexcel_cipher_req *sreq,
+				    struct safexcel_inv_result *result)
 {
 	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
-	SKCIPHER_REQUEST_ON_STACK(req, __crypto_skcipher_cast(tfm));
-	struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
-	struct safexcel_inv_result result = {};
 	int ring = ctx->base.ring;
 
-	memset(req, 0, sizeof(struct skcipher_request));
+	init_completion(&result->completion);
 
-	/* create invalidation request */
-	init_completion(&result.completion);
-	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-				      safexcel_inv_complete, &result);
-
-	skcipher_request_set_tfm(req, __crypto_skcipher_cast(tfm));
-	ctx = crypto_tfm_ctx(req->base.tfm);
+	ctx = crypto_tfm_ctx(base->tfm);
 	ctx->base.exit_inv = true;
 	sreq->needs_inv = true;
 
 	spin_lock_bh(&priv->ring[ring].queue_lock);
-	crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
+	crypto_enqueue_request(&priv->ring[ring].queue, base);
 	spin_unlock_bh(&priv->ring[ring].queue_lock);
 
 	queue_work(priv->ring[ring].workqueue,
 		   &priv->ring[ring].work_data.work);
 
-	wait_for_completion(&result.completion);
+	wait_for_completion(&result->completion);
 
-	if (result.error) {
+	if (result->error) {
 		dev_warn(priv->dev,
 			"cipher: sync: invalidate: completion error %d\n",
-			 result.error);
-		return result.error;
+			 result->error);
+		return result->error;
 	}
 
 	return 0;
 }
 
-static int safexcel_aes(struct skcipher_request *req,
+static int safexcel_skcipher_exit_inv(struct crypto_tfm *tfm)
+{
+	EIP197_REQUEST_ON_STACK(req, skcipher, EIP197_SKCIPHER_REQ_SIZE);
+	struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
+	struct safexcel_inv_result result = {};
+
+	memset(req, 0, sizeof(struct skcipher_request));
+
+	skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      safexcel_inv_complete, &result);
+	skcipher_request_set_tfm(req, __crypto_skcipher_cast(tfm));
+
+	return safexcel_cipher_exit_inv(tfm, &req->base, sreq, &result);
+}
+
+static int safexcel_aead_exit_inv(struct crypto_tfm *tfm)
+{
+	EIP197_REQUEST_ON_STACK(req, aead, EIP197_AEAD_REQ_SIZE);
+	struct safexcel_cipher_req *sreq = aead_request_ctx(req);
+	struct safexcel_inv_result result = {};
+
+	memset(req, 0, sizeof(struct aead_request));
+
+	aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				  safexcel_inv_complete, &result);
+	aead_request_set_tfm(req, __crypto_aead_cast(tfm));
+
+	return safexcel_cipher_exit_inv(tfm, &req->base, sreq, &result);
+}
+
+static int safexcel_aes(struct crypto_async_request *base,
+			struct safexcel_cipher_req *sreq,
 			enum safexcel_cipher_direction dir, u32 mode)
 {
-	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(base->tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	int ret, ring;
 
@@ -489,7 +725,7 @@
 	} else {
 		ctx->base.ring = safexcel_select_ring(priv);
 		ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
-						 EIP197_GFP_FLAGS(req->base),
+						 EIP197_GFP_FLAGS(*base),
 						 &ctx->base.ctxr_dma);
 		if (!ctx->base.ctxr)
 			return -ENOMEM;
@@ -498,7 +734,7 @@
 	ring = ctx->base.ring;
 
 	spin_lock_bh(&priv->ring[ring].queue_lock);
-	ret = crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
+	ret = crypto_enqueue_request(&priv->ring[ring].queue, base);
 	spin_unlock_bh(&priv->ring[ring].queue_lock);
 
 	queue_work(priv->ring[ring].workqueue,
@@ -509,14 +745,14 @@
 
 static int safexcel_ecb_aes_encrypt(struct skcipher_request *req)
 {
-	return safexcel_aes(req, SAFEXCEL_ENCRYPT,
-			    CONTEXT_CONTROL_CRYPTO_MODE_ECB);
+	return safexcel_aes(&req->base, skcipher_request_ctx(req),
+			    SAFEXCEL_ENCRYPT, CONTEXT_CONTROL_CRYPTO_MODE_ECB);
 }
 
 static int safexcel_ecb_aes_decrypt(struct skcipher_request *req)
 {
-	return safexcel_aes(req, SAFEXCEL_DECRYPT,
-			    CONTEXT_CONTROL_CRYPTO_MODE_ECB);
+	return safexcel_aes(&req->base, skcipher_request_ctx(req),
+			    SAFEXCEL_DECRYPT, CONTEXT_CONTROL_CRYPTO_MODE_ECB);
 }
 
 static int safexcel_skcipher_cra_init(struct crypto_tfm *tfm)
@@ -526,13 +762,27 @@
 		container_of(tfm->__crt_alg, struct safexcel_alg_template,
 			     alg.skcipher.base);
 
-	ctx->priv = tmpl->priv;
-	ctx->base.send = safexcel_send;
-	ctx->base.handle_result = safexcel_handle_result;
-
 	crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm),
 				    sizeof(struct safexcel_cipher_req));
 
+	ctx->priv = tmpl->priv;
+
+	ctx->base.send = safexcel_skcipher_send;
+	ctx->base.handle_result = safexcel_skcipher_handle_result;
+	return 0;
+}
+
+static int safexcel_cipher_cra_exit(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	memzero_explicit(ctx->key, sizeof(ctx->key));
+
+	/* context not allocated, skip invalidation */
+	if (!ctx->base.ctxr)
+		return -ENOMEM;
+
+	memzero_explicit(ctx->base.ctxr->data, sizeof(ctx->base.ctxr->data));
 	return 0;
 }
 
@@ -542,18 +792,34 @@
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	int ret;
 
-	memzero_explicit(ctx->key, 8 * sizeof(u32));
-
-	/* context not allocated, skip invalidation */
-	if (!ctx->base.ctxr)
+	if (safexcel_cipher_cra_exit(tfm))
 		return;
 
-	memzero_explicit(ctx->base.ctxr->data, 8 * sizeof(u32));
+	if (priv->version == EIP197) {
+		ret = safexcel_skcipher_exit_inv(tfm);
+		if (ret)
+			dev_warn(priv->dev, "skcipher: invalidation error %d\n",
+				 ret);
+	} else {
+		dma_pool_free(priv->context_pool, ctx->base.ctxr,
+			      ctx->base.ctxr_dma);
+	}
+}
+
+static void safexcel_aead_cra_exit(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	int ret;
+
+	if (safexcel_cipher_cra_exit(tfm))
+		return;
 
 	if (priv->version == EIP197) {
-		ret = safexcel_cipher_exit_inv(tfm);
+		ret = safexcel_aead_exit_inv(tfm);
 		if (ret)
-			dev_warn(priv->dev, "cipher: invalidation error %d\n", ret);
+			dev_warn(priv->dev, "aead: invalidation error %d\n",
+				 ret);
 	} else {
 		dma_pool_free(priv->context_pool, ctx->base.ctxr,
 			      ctx->base.ctxr_dma);
@@ -563,7 +829,7 @@
 struct safexcel_alg_template safexcel_alg_ecb_aes = {
 	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
 	.alg.skcipher = {
-		.setkey = safexcel_aes_setkey,
+		.setkey = safexcel_skcipher_aes_setkey,
 		.encrypt = safexcel_ecb_aes_encrypt,
 		.decrypt = safexcel_ecb_aes_decrypt,
 		.min_keysize = AES_MIN_KEY_SIZE,
@@ -586,20 +852,20 @@
 
 static int safexcel_cbc_aes_encrypt(struct skcipher_request *req)
 {
-	return safexcel_aes(req, SAFEXCEL_ENCRYPT,
-			    CONTEXT_CONTROL_CRYPTO_MODE_CBC);
+	return safexcel_aes(&req->base, skcipher_request_ctx(req),
+			    SAFEXCEL_ENCRYPT, CONTEXT_CONTROL_CRYPTO_MODE_CBC);
 }
 
 static int safexcel_cbc_aes_decrypt(struct skcipher_request *req)
 {
-	return safexcel_aes(req, SAFEXCEL_DECRYPT,
-			    CONTEXT_CONTROL_CRYPTO_MODE_CBC);
+	return safexcel_aes(&req->base, skcipher_request_ctx(req),
+			    SAFEXCEL_DECRYPT, CONTEXT_CONTROL_CRYPTO_MODE_CBC);
 }
 
 struct safexcel_alg_template safexcel_alg_cbc_aes = {
 	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
 	.alg.skcipher = {
-		.setkey = safexcel_aes_setkey,
+		.setkey = safexcel_skcipher_aes_setkey,
 		.encrypt = safexcel_cbc_aes_encrypt,
 		.decrypt = safexcel_cbc_aes_decrypt,
 		.min_keysize = AES_MIN_KEY_SIZE,
@@ -620,3 +886,139 @@
 		},
 	},
 };
+
+static int safexcel_aead_encrypt(struct aead_request *req)
+{
+	struct safexcel_cipher_req *creq = aead_request_ctx(req);
+
+	return safexcel_aes(&req->base, creq, SAFEXCEL_ENCRYPT,
+			    CONTEXT_CONTROL_CRYPTO_MODE_CBC);
+}
+
+static int safexcel_aead_decrypt(struct aead_request *req)
+{
+	struct safexcel_cipher_req *creq = aead_request_ctx(req);
+
+	return safexcel_aes(&req->base, creq, SAFEXCEL_DECRYPT,
+			    CONTEXT_CONTROL_CRYPTO_MODE_CBC);
+}
+
+static int safexcel_aead_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_alg_template *tmpl =
+		container_of(tfm->__crt_alg, struct safexcel_alg_template,
+			     alg.aead.base);
+
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+				sizeof(struct safexcel_cipher_req));
+
+	ctx->priv = tmpl->priv;
+
+	ctx->aead = true;
+	ctx->base.send = safexcel_aead_send;
+	ctx->base.handle_result = safexcel_aead_handle_result;
+	return 0;
+}
+
+static int safexcel_aead_sha1_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_cra_init(tfm);
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA1;
+	ctx->state_sz = SHA1_DIGEST_SIZE;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_aes = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.alg.aead = {
+		.setkey = safexcel_aead_aes_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = AES_BLOCK_SIZE,
+		.maxauthsize = SHA1_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha1),cbc(aes))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha1-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha1_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha256_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_cra_init(tfm);
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA256;
+	ctx->state_sz = SHA256_DIGEST_SIZE;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_cbc_aes = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.alg.aead = {
+		.setkey = safexcel_aead_aes_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = AES_BLOCK_SIZE,
+		.maxauthsize = SHA256_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha256),cbc(aes))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha256-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha256_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha224_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_cra_init(tfm);
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA224;
+	ctx->state_sz = SHA256_DIGEST_SIZE;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_cbc_aes = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.alg.aead = {
+		.setkey = safexcel_aead_aes_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = AES_BLOCK_SIZE,
+		.maxauthsize = SHA224_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha224),cbc(aes))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha224-cbc-aes",
+			.cra_priority = 300,
+			.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha224_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index 317b9e4..d138d6b 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -50,16 +50,6 @@
 	u8 cache_next[SHA256_BLOCK_SIZE] __aligned(sizeof(u32));
 };
 
-struct safexcel_ahash_export_state {
-	u64 len;
-	u64 processed;
-
-	u32 digest;
-
-	u32 state[SHA256_DIGEST_SIZE / sizeof(u32)];
-	u8 cache[SHA256_BLOCK_SIZE];
-};
-
 static void safexcel_hash_token(struct safexcel_command_desc *cdesc,
 				u32 input_length, u32 result_length)
 {
@@ -146,11 +136,8 @@
 		dev_err(priv->dev,
 			"hash: result: could not retrieve the result descriptor\n");
 		*ret = PTR_ERR(rdesc);
-	} else if (rdesc->result_data.error_code) {
-		dev_err(priv->dev,
-			"hash: result: result descriptor error (%d)\n",
-			rdesc->result_data.error_code);
-		*ret = -EINVAL;
+	} else {
+		*ret = safexcel_rdesc_check_errors(priv, rdesc);
 	}
 
 	safexcel_complete(priv, ring);
@@ -480,7 +467,7 @@
 {
 	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
-	AHASH_REQUEST_ON_STACK(req, __crypto_ahash_cast(tfm));
+	EIP197_REQUEST_ON_STACK(req, ahash, EIP197_AHASH_REQ_SIZE);
 	struct safexcel_ahash_req *rctx = ahash_request_ctx(req);
 	struct safexcel_inv_result result = {};
 	int ring = ctx->base.ring;
@@ -912,8 +899,8 @@
 	return crypto_ahash_export(areq, state);
 }
 
-static int safexcel_hmac_setkey(const char *alg, const u8 *key,
-				unsigned int keylen, void *istate, void *ostate)
+int safexcel_hmac_setkey(const char *alg, const u8 *key, unsigned int keylen,
+			 void *istate, void *ostate)
 {
 	struct ahash_request *areq;
 	struct crypto_ahash *tfm;
diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c
index 1e87637..36afd6d 100644
--- a/drivers/crypto/nx/nx-842-powernv.c
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -334,7 +334,7 @@
 		return -EPROTO;
 	case CSB_CC_SEQUENCE:
 		/* should not happen, we don't use chained CRBs */
-		CSB_ERR(csb, "CRB seqeunce number error");
+		CSB_ERR(csb, "CRB sequence number error");
 		return -EPROTO;
 	case CSB_CC_UNKNOWN_CODE:
 		CSB_ERR(csb, "Unknown subfunction code");
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index ad02aa6..d1a1c74 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -1087,7 +1087,7 @@
 
 	if (test_bit(FLAGS_SGS_COPIED, &dd->flags))
 		free_pages((unsigned long)sg_virt(ctx->sg),
-			   get_order(ctx->sg->length));
+			   get_order(ctx->sg->length + ctx->bufcnt));
 
 	if (test_bit(FLAGS_SGS_ALLOCED, &dd->flags))
 		kfree(ctx->sg);
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index a4df966..321d5e2 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -1169,8 +1169,7 @@
 #ifdef CONFIG_PM
 static int spacc_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct spacc_engine *engine = platform_get_drvdata(pdev);
+	struct spacc_engine *engine = dev_get_drvdata(dev);
 
 	/*
 	 * We only support standby mode. All we have to do is gate the clock to
@@ -1184,8 +1183,7 @@
 
 static int spacc_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct spacc_engine *engine = platform_get_drvdata(pdev);
+	struct spacc_engine *engine = dev_get_drvdata(dev);
 
 	return clk_enable(engine->clk);
 }
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
index f172171..ba197f3 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
@@ -329,5 +329,7 @@
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Intel");
+MODULE_FIRMWARE(ADF_C3XXX_FW);
+MODULE_FIRMWARE(ADF_C3XXX_MMP);
 MODULE_DESCRIPTION("Intel(R) QuickAssist Technology");
 MODULE_VERSION(ADF_DRV_VERSION);
diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c
index 58a984c9..59a5a0d 100644
--- a/drivers/crypto/qat/qat_c62x/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62x/adf_drv.c
@@ -329,5 +329,7 @@
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Intel");
+MODULE_FIRMWARE(ADF_C62X_FW);
+MODULE_FIRMWARE(ADF_C62X_MMP);
 MODULE_DESCRIPTION("Intel(R) QuickAssist Technology");
 MODULE_VERSION(ADF_DRV_VERSION);
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
index 2ce01f0..be5c5a9 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
@@ -332,5 +332,6 @@
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Intel");
 MODULE_FIRMWARE(ADF_DH895XCC_FW);
+MODULE_FIRMWARE(ADF_DH895XCC_MMP);
 MODULE_DESCRIPTION("Intel(R) QuickAssist Technology");
 MODULE_VERSION(ADF_DRV_VERSION);
diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c
index 96072b9..d7316f7 100644
--- a/drivers/crypto/vmx/aes.c
+++ b/drivers/crypto/vmx/aes.c
@@ -48,8 +48,6 @@
 		       alg, PTR_ERR(fallback));
 		return PTR_ERR(fallback);
 	}
-	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-	       crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback));
 
 	crypto_cipher_set_flags(fallback,
 				crypto_cipher_get_flags((struct
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 7394d35..5285ece 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -52,9 +52,6 @@
 		       alg, PTR_ERR(fallback));
 		return PTR_ERR(fallback);
 	}
-	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-		crypto_skcipher_driver_name(fallback));
-
 
 	crypto_skcipher_set_flags(
 		fallback,
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index fc60d00..cd777c7 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -50,8 +50,6 @@
 		       alg, PTR_ERR(fallback));
 		return PTR_ERR(fallback);
 	}
-	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-		crypto_skcipher_driver_name(fallback));
 
 	crypto_skcipher_set_flags(
 		fallback,
diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
index 8cd6e62..8bd9aff 100644
--- a/drivers/crypto/vmx/aes_xts.c
+++ b/drivers/crypto/vmx/aes_xts.c
@@ -53,8 +53,6 @@
 			alg, PTR_ERR(fallback));
 		return PTR_ERR(fallback);
 	}
-	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-		crypto_skcipher_driver_name(fallback));
 
 	crypto_skcipher_set_flags(
 		fallback,
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl
index 0b4a293..d6a9f63 100644
--- a/drivers/crypto/vmx/aesp8-ppc.pl
+++ b/drivers/crypto/vmx/aesp8-ppc.pl
@@ -1,12 +1,51 @@
 #! /usr/bin/env perl
-# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
-#
-# Licensed under the OpenSSL license (the "License").  You may not use
-# this file except in compliance with the License.  You can obtain a copy
-# in the file LICENSE in the source distribution or at
-# https://www.openssl.org/source/license.html
+# SPDX-License-Identifier: GPL-2.0
 
+# This code is taken from CRYPTOGAMs[1] and is included here using the option
+# in the license to distribute the code under the GPL. Therefore this program
+# is free software; you can redistribute it and/or modify it under the terms of
+# the GNU General Public License version 2 as published by the Free Software
+# Foundation.
 #
+# [1] https://www.openssl.org/~appro/cryptogams/
+
+# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+#       * Redistributions of source code must retain copyright notices,
+#         this list of conditions and the following disclaimer.
+#
+#       * Redistributions in binary form must reproduce the above
+#         copyright notice, this list of conditions and the following
+#         disclaimer in the documentation and/or other materials
+#         provided with the distribution.
+#
+#       * Neither the name of the CRYPTOGAMS nor the names of its
+#         copyright holder and contributors may be used to endorse or
+#         promote products derived from this software without specific
+#         prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 # ====================================================================
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c
index 27a94a1..1c4b5b8 100644
--- a/drivers/crypto/vmx/ghash.c
+++ b/drivers/crypto/vmx/ghash.c
@@ -64,8 +64,6 @@
 		       alg, PTR_ERR(fallback));
 		return PTR_ERR(fallback);
 	}
-	printk(KERN_INFO "Using '%s' as fallback implementation.\n",
-	       crypto_tfm_alg_driver_name(crypto_shash_tfm(fallback)));
 
 	crypto_shash_set_flags(fallback,
 			       crypto_shash_get_flags((struct crypto_shash
diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl
index d8429cb..f746af2 100644
--- a/drivers/crypto/vmx/ghashp8-ppc.pl
+++ b/drivers/crypto/vmx/ghashp8-ppc.pl
@@ -1,5 +1,14 @@
 #!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from the OpenSSL project but the author (Andy Polyakov)
+# has relicensed it under the GPLv2. Therefore this program is free software;
+# you can redistribute it and/or modify it under the terms of the GNU General
+# Public License version 2 as published by the Free Software Foundation.
 #
+# The original headers, including the original license headers, are
+# included below for completeness.
+
 # ====================================================================
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index aff2c15..de2f829 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -594,7 +594,7 @@
 	if (!count)
 		return ERR_PTR(-EINVAL);
 
-	dev_dax = kzalloc(sizeof(*dev_dax) + sizeof(*res) * count, GFP_KERNEL);
+	dev_dax = kzalloc(struct_size(dev_dax, res, count), GFP_KERNEL);
 	if (!dev_dax)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 2b2332b..1d7bd96 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -74,42 +74,42 @@
 
 /**
  * __bdev_dax_supported() - Check if the device supports dax for filesystem
- * @sb: The superblock of the device
+ * @bdev: block device to check
  * @blocksize: The block size of the device
  *
  * This is a library function for filesystems to check if the block device
  * can be mounted with dax option.
  *
- * Return: negative errno if unsupported, 0 if supported.
+ * Return: true if supported, false if unsupported
  */
-int __bdev_dax_supported(struct super_block *sb, int blocksize)
+bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
 {
-	struct block_device *bdev = sb->s_bdev;
 	struct dax_device *dax_dev;
 	pgoff_t pgoff;
 	int err, id;
 	void *kaddr;
 	pfn_t pfn;
 	long len;
+	char buf[BDEVNAME_SIZE];
 
 	if (blocksize != PAGE_SIZE) {
-		pr_debug("VFS (%s): error: unsupported blocksize for dax\n",
-				sb->s_id);
-		return -EINVAL;
+		pr_debug("%s: error: unsupported blocksize for dax\n",
+				bdevname(bdev, buf));
+		return false;
 	}
 
 	err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
 	if (err) {
-		pr_debug("VFS (%s): error: unaligned partition for dax\n",
-				sb->s_id);
-		return err;
+		pr_debug("%s: error: unaligned partition for dax\n",
+				bdevname(bdev, buf));
+		return false;
 	}
 
 	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
 	if (!dax_dev) {
-		pr_debug("VFS (%s): error: device does not support dax\n",
-				sb->s_id);
-		return -EOPNOTSUPP;
+		pr_debug("%s: error: device does not support dax\n",
+				bdevname(bdev, buf));
+		return false;
 	}
 
 	id = dax_read_lock();
@@ -119,9 +119,9 @@
 	put_dax(dax_dev);
 
 	if (len < 1) {
-		pr_debug("VFS (%s): error: dax access failed (%ld)\n",
-				sb->s_id, len);
-		return len < 0 ? len : -EIO;
+		pr_debug("%s: error: dax access failed (%ld)\n",
+				bdevname(bdev, buf), len);
+		return false;
 	}
 
 	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
@@ -137,12 +137,12 @@
 	} else if (pfn_t_devmap(pfn)) {
 		/* pass */;
 	} else {
-		pr_debug("VFS (%s): error: dax support not enabled\n",
-				sb->s_id);
-		return -EOPNOTSUPP;
+		pr_debug("%s: error: dax support not enabled\n",
+				bdevname(bdev, buf));
+		return false;
 	}
 
-	return 0;
+	return true;
 }
 EXPORT_SYMBOL_GPL(__bdev_dax_supported);
 #endif
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h
index d615a89..05e33f9 100644
--- a/drivers/dma-buf/sync_debug.h
+++ b/drivers/dma-buf/sync_debug.h
@@ -62,8 +62,6 @@
 	struct rb_node node;
 };
 
-#ifdef CONFIG_SW_SYNC
-
 extern const struct file_operations sw_sync_debugfs_fops;
 
 void sync_timeline_debug_add(struct sync_timeline *obj);
@@ -72,12 +70,4 @@
 void sync_file_debug_remove(struct sync_file *fence);
 void sync_dump(void);
 
-#else
-# define sync_timeline_debug_add(obj)
-# define sync_timeline_debug_remove(obj)
-# define sync_file_debug_add(fence)
-# define sync_file_debug_remove(fence)
-# define sync_dump()
-#endif
-
 #endif /* _LINUX_SYNC_H */
diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
index 3956a01..72878ac 100644
--- a/drivers/dma/bcm-sba-raid.c
+++ b/drivers/dma/bcm-sba-raid.c
@@ -1499,9 +1499,8 @@
 
 	for (i = 0; i < sba->max_req; i++) {
 		req = devm_kzalloc(sba->dev,
-				sizeof(*req) +
-				sba->max_cmd_per_req * sizeof(req->cmds[0]),
-				GFP_KERNEL);
+				   struct_size(req, cmds, sba->max_cmd_per_req),
+				   GFP_KERNEL);
 		if (!req) {
 			ret = -ENOMEM;
 			goto fail_free_cmds_pool;
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 85ea92f..9bc722c 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -1074,8 +1074,7 @@
 		return NULL;
 	}
 
-	edesc = kzalloc(sizeof(*edesc) + sg_len * sizeof(edesc->pset[0]),
-			GFP_ATOMIC);
+	edesc = kzalloc(struct_size(edesc, pset, sg_len), GFP_ATOMIC);
 	if (!edesc)
 		return NULL;
 
@@ -1192,8 +1191,7 @@
 			nslots = 2;
 	}
 
-	edesc = kzalloc(sizeof(*edesc) + nslots * sizeof(edesc->pset[0]),
-			GFP_ATOMIC);
+	edesc = kzalloc(struct_size(edesc, pset, nslots), GFP_ATOMIC);
 	if (!edesc)
 		return NULL;
 
@@ -1315,8 +1313,7 @@
 		}
 	}
 
-	edesc = kzalloc(sizeof(*edesc) + nslots * sizeof(edesc->pset[0]),
-			GFP_ATOMIC);
+	edesc = kzalloc(struct_size(edesc, pset, nslots), GFP_ATOMIC);
 	if (!edesc)
 		return NULL;
 
diff --git a/drivers/dma/moxart-dma.c b/drivers/dma/moxart-dma.c
index e1a5c22..e04499c1f 100644
--- a/drivers/dma/moxart-dma.c
+++ b/drivers/dma/moxart-dma.c
@@ -309,7 +309,7 @@
 		return NULL;
 	}
 
-	d = kzalloc(sizeof(*d) + sg_len * sizeof(d->sg[0]), GFP_ATOMIC);
+	d = kzalloc(struct_size(d, sg, sg_len), GFP_ATOMIC);
 	if (!d)
 		return NULL;
 
diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c
index 5055933..2f9974d 100644
--- a/drivers/dma/nbpfaxi.c
+++ b/drivers/dma/nbpfaxi.c
@@ -1305,8 +1305,8 @@
 	cfg = of_device_get_match_data(dev);
 	num_channels = cfg->num_channels;
 
-	nbpf = devm_kzalloc(dev, sizeof(*nbpf) + num_channels *
-			    sizeof(nbpf->chan[0]), GFP_KERNEL);
+	nbpf = devm_kzalloc(dev, struct_size(nbpf, chan, num_channels),
+			    GFP_KERNEL);
 	if (!nbpf)
 		return -ENOMEM;
 
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index d21c198..9483000 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -917,7 +917,7 @@
 	}
 
 	/* Now allocate and setup the descriptor. */
-	d = kzalloc(sizeof(*d) + sglen * sizeof(d->sg[0]), GFP_ATOMIC);
+	d = kzalloc(struct_size(d, sg, sglen), GFP_ATOMIC);
 	if (!d)
 		return NULL;
 
diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c
index 000c7019..d64edeb 100644
--- a/drivers/dma/qcom/hidma_mgmt.c
+++ b/drivers/dma/qcom/hidma_mgmt.c
@@ -398,7 +398,7 @@
 		}
 		of_node_get(child);
 		new_pdev->dev.of_node = child;
-		of_dma_configure(&new_pdev->dev, child);
+		of_dma_configure(&new_pdev->dev, child, true);
 		/*
 		 * It is assumed that calling of_msi_configure is safe on
 		 * platforms with or without MSI support.
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c
index c7a89c2..b31d07c 100644
--- a/drivers/dma/sa11x0-dma.c
+++ b/drivers/dma/sa11x0-dma.c
@@ -557,7 +557,7 @@
 		}
 	}
 
-	txd = kzalloc(sizeof(*txd) + j * sizeof(txd->sg[0]), GFP_ATOMIC);
+	txd = kzalloc(struct_size(txd, sg, j), GFP_ATOMIC);
 	if (!txd) {
 		dev_dbg(chan->device->dev, "vchan %p: kzalloc failed\n", &c->vc);
 		return NULL;
@@ -627,7 +627,7 @@
 	if (sglen == 0)
 		return NULL;
 
-	txd = kzalloc(sizeof(*txd) + sglen * sizeof(txd->sg[0]), GFP_ATOMIC);
+	txd = kzalloc(struct_size(txd, sg, sglen), GFP_ATOMIC);
 	if (!txd) {
 		dev_dbg(chan->device->dev, "vchan %p: kzalloc failed\n", &c->vc);
 		return NULL;
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index 31a1451..1bb1a8e 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c
@@ -269,7 +269,7 @@
 	struct usb_dmac_desc *desc;
 	unsigned long flags;
 
-	desc = kzalloc(sizeof(*desc) + sg_len * sizeof(desc->sg[0]), gfp);
+	desc = kzalloc(struct_size(desc, sg, sg_len), gfp);
 	if (!desc)
 		return -ENOMEM;
 
diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c
index b106e8a..52ebccb 100644
--- a/drivers/dma/sprd-dma.c
+++ b/drivers/dma/sprd-dma.c
@@ -805,8 +805,8 @@
 		return ret;
 	}
 
-	sdev = devm_kzalloc(&pdev->dev, sizeof(*sdev) +
-			    sizeof(*dma_chn) * chn_count,
+	sdev = devm_kzalloc(&pdev->dev,
+			    struct_size(sdev, channels, chn_count),
 			    GFP_KERNEL);
 	if (!sdev)
 		return -ENOMEM;
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index da2da53..57304b2 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -232,6 +232,7 @@
 config EDAC_SKX
 	tristate "Intel Skylake server Integrated MC"
 	depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
+	depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
 	select DMI
 	help
 	  Support for error detection and correction the Intel
@@ -374,7 +375,7 @@
 
 config EDAC_ALTERA
 	bool "Altera SOCFPGA ECC"
-	depends on EDAC=y && ARCH_SOCFPGA
+	depends on EDAC=y && (ARCH_SOCFPGA || ARCH_STRATIX10)
 	help
 	  Support for error detection and correction on the
 	  Altera SOCs. This must be selected for SDRAM ECC.
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 11d6419..d0d5c4d 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
+ *  Copyright (C) 2017-2018, Intel Corporation. All rights reserved
  *  Copyright Altera Corporation (C) 2014-2016. All rights reserved.
  *  Copyright 2011-2012 Calxeda, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * Adapted from the highbank_mc_edac driver.
  */
 
 #include <asm/cacheflush.h>
@@ -26,6 +14,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/kernel.h>
 #include <linux/mfd/syscon.h>
+#include <linux/notifier.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
@@ -80,6 +69,25 @@
 	.ue_set_mask        = A10_DIAGINT_TDERRA_MASK,
 };
 
+static const struct altr_sdram_prv_data s10_data = {
+	.ecc_ctrl_offset    = S10_ECCCTRL1_OFST,
+	.ecc_ctl_en_mask    = A10_ECCCTRL1_ECC_EN,
+	.ecc_stat_offset    = S10_INTSTAT_OFST,
+	.ecc_stat_ce_mask   = A10_INTSTAT_SBEERR,
+	.ecc_stat_ue_mask   = A10_INTSTAT_DBEERR,
+	.ecc_saddr_offset   = S10_SERRADDR_OFST,
+	.ecc_daddr_offset   = S10_DERRADDR_OFST,
+	.ecc_irq_en_offset  = S10_ERRINTEN_OFST,
+	.ecc_irq_en_mask    = A10_ECC_IRQ_EN_MASK,
+	.ecc_irq_clr_offset = S10_INTSTAT_OFST,
+	.ecc_irq_clr_mask   = (A10_INTSTAT_SBEERR | A10_INTSTAT_DBEERR),
+	.ecc_cnt_rst_offset = S10_ECCCTRL1_OFST,
+	.ecc_cnt_rst_mask   = A10_ECC_CNT_RESET_MASK,
+	.ce_ue_trgr_offset  = S10_DIAGINTTEST_OFST,
+	.ce_set_mask        = A10_DIAGINT_TSERRA_MASK,
+	.ue_set_mask        = A10_DIAGINT_TDERRA_MASK,
+};
+
 /*********************** EDAC Memory Controller Functions ****************/
 
 /* The SDRAM controller uses the EDAC Memory Controller framework.       */
@@ -231,6 +239,7 @@
 static const struct of_device_id altr_sdram_ctrl_of_match[] = {
 	{ .compatible = "altr,sdram-edac", .data = &c5_data},
 	{ .compatible = "altr,sdram-edac-a10", .data = &a10_data},
+	{ .compatible = "altr,sdram-edac-s10", .data = &s10_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match);
@@ -477,6 +486,292 @@
 	return 0;
 }
 
+/**************** Stratix 10 EDAC Memory Controller Functions ************/
+
+/**
+ * s10_protected_reg_write
+ * Write to a protected SMC register.
+ * @context: Not used.
+ * @reg: Address of register
+ * @value: Value to write
+ * Return: INTEL_SIP_SMC_STATUS_OK (0) on success
+ *	   INTEL_SIP_SMC_REG_ERROR on error
+ *	   INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION if not supported
+ */
+static int s10_protected_reg_write(void *context, unsigned int reg,
+				   unsigned int val)
+{
+	struct arm_smccc_res result;
+
+	arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, reg, val, 0, 0,
+		      0, 0, 0, &result);
+
+	return (int)result.a0;
+}
+
+/**
+ * s10_protected_reg_read
+ * Read the status of a protected SMC register
+ * @context: Not used.
+ * @reg: Address of register
+ * @value: Value read.
+ * Return: INTEL_SIP_SMC_STATUS_OK (0) on success
+ *	   INTEL_SIP_SMC_REG_ERROR on error
+ *	   INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION if not supported
+ */
+static int s10_protected_reg_read(void *context, unsigned int reg,
+				  unsigned int *val)
+{
+	struct arm_smccc_res result;
+
+	arm_smccc_smc(INTEL_SIP_SMC_REG_READ, reg, 0, 0, 0,
+		      0, 0, 0, &result);
+
+	*val = (unsigned int)result.a1;
+
+	return (int)result.a0;
+}
+
+static bool s10_sdram_writeable_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case S10_ECCCTRL1_OFST:
+	case S10_ERRINTEN_OFST:
+	case S10_INTMODE_OFST:
+	case S10_INTSTAT_OFST:
+	case S10_DIAGINTTEST_OFST:
+	case S10_SYSMGR_ECC_INTMASK_VAL_OFST:
+	case S10_SYSMGR_ECC_INTMASK_SET_OFST:
+	case S10_SYSMGR_ECC_INTMASK_CLR_OFST:
+		return true;
+	}
+	return false;
+}
+
+static bool s10_sdram_readable_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case S10_ECCCTRL1_OFST:
+	case S10_ERRINTEN_OFST:
+	case S10_INTMODE_OFST:
+	case S10_INTSTAT_OFST:
+	case S10_DERRADDR_OFST:
+	case S10_SERRADDR_OFST:
+	case S10_DIAGINTTEST_OFST:
+	case S10_SYSMGR_ECC_INTMASK_VAL_OFST:
+	case S10_SYSMGR_ECC_INTMASK_SET_OFST:
+	case S10_SYSMGR_ECC_INTMASK_CLR_OFST:
+	case S10_SYSMGR_ECC_INTSTAT_SERR_OFST:
+	case S10_SYSMGR_ECC_INTSTAT_DERR_OFST:
+		return true;
+	}
+	return false;
+}
+
+static bool s10_sdram_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case S10_ECCCTRL1_OFST:
+	case S10_ERRINTEN_OFST:
+	case S10_INTMODE_OFST:
+	case S10_INTSTAT_OFST:
+	case S10_DERRADDR_OFST:
+	case S10_SERRADDR_OFST:
+	case S10_DIAGINTTEST_OFST:
+	case S10_SYSMGR_ECC_INTMASK_VAL_OFST:
+	case S10_SYSMGR_ECC_INTMASK_SET_OFST:
+	case S10_SYSMGR_ECC_INTMASK_CLR_OFST:
+	case S10_SYSMGR_ECC_INTSTAT_SERR_OFST:
+	case S10_SYSMGR_ECC_INTSTAT_DERR_OFST:
+		return true;
+	}
+	return false;
+}
+
+static const struct regmap_config s10_sdram_regmap_cfg = {
+	.name = "s10_ddr",
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.max_register = 0xffffffff,
+	.writeable_reg = s10_sdram_writeable_reg,
+	.readable_reg = s10_sdram_readable_reg,
+	.volatile_reg = s10_sdram_volatile_reg,
+	.reg_read = s10_protected_reg_read,
+	.reg_write = s10_protected_reg_write,
+	.use_single_rw = true,
+};
+
+static int altr_s10_sdram_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *id;
+	struct edac_mc_layer layers[2];
+	struct mem_ctl_info *mci;
+	struct altr_sdram_mc_data *drvdata;
+	const struct altr_sdram_prv_data *priv;
+	struct regmap *regmap;
+	struct dimm_info *dimm;
+	u32 read_reg;
+	int irq, ret = 0;
+	unsigned long mem_size;
+
+	id = of_match_device(altr_sdram_ctrl_of_match, &pdev->dev);
+	if (!id)
+		return -ENODEV;
+
+	/* Grab specific offsets and masks for Stratix10 */
+	priv = of_match_node(altr_sdram_ctrl_of_match,
+			     pdev->dev.of_node)->data;
+
+	regmap = devm_regmap_init(&pdev->dev, NULL, (void *)priv,
+				  &s10_sdram_regmap_cfg);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	/* Validate the SDRAM controller has ECC enabled */
+	if (regmap_read(regmap, priv->ecc_ctrl_offset, &read_reg) ||
+	    ((read_reg & priv->ecc_ctl_en_mask) != priv->ecc_ctl_en_mask)) {
+		edac_printk(KERN_ERR, EDAC_MC,
+			    "No ECC/ECC disabled [0x%08X]\n", read_reg);
+		return -ENODEV;
+	}
+
+	/* Grab memory size from device tree. */
+	mem_size = get_total_mem();
+	if (!mem_size) {
+		edac_printk(KERN_ERR, EDAC_MC, "Unable to calculate memory size\n");
+		return -ENODEV;
+	}
+
+	/* Ensure the SDRAM Interrupt is disabled */
+	if (regmap_update_bits(regmap, priv->ecc_irq_en_offset,
+			       priv->ecc_irq_en_mask, 0)) {
+		edac_printk(KERN_ERR, EDAC_MC,
+			    "Error disabling SDRAM ECC IRQ\n");
+		return -ENODEV;
+	}
+
+	/* Toggle to clear the SDRAM Error count */
+	if (regmap_update_bits(regmap, priv->ecc_cnt_rst_offset,
+			       priv->ecc_cnt_rst_mask,
+			       priv->ecc_cnt_rst_mask)) {
+		edac_printk(KERN_ERR, EDAC_MC,
+			    "Error clearing SDRAM ECC count\n");
+		return -ENODEV;
+	}
+
+	if (regmap_update_bits(regmap, priv->ecc_cnt_rst_offset,
+			       priv->ecc_cnt_rst_mask, 0)) {
+		edac_printk(KERN_ERR, EDAC_MC,
+			    "Error clearing SDRAM ECC count\n");
+		return -ENODEV;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		edac_printk(KERN_ERR, EDAC_MC,
+			    "No irq %d in DT\n", irq);
+		return -ENODEV;
+	}
+
+	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+	layers[0].size = 1;
+	layers[0].is_virt_csrow = true;
+	layers[1].type = EDAC_MC_LAYER_CHANNEL;
+	layers[1].size = 1;
+	layers[1].is_virt_csrow = false;
+	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+			    sizeof(struct altr_sdram_mc_data));
+	if (!mci)
+		return -ENOMEM;
+
+	mci->pdev = &pdev->dev;
+	drvdata = mci->pvt_info;
+	drvdata->mc_vbase = regmap;
+	drvdata->data = priv;
+	platform_set_drvdata(pdev, mci);
+
+	if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL)) {
+		edac_printk(KERN_ERR, EDAC_MC,
+			    "Unable to get managed device resource\n");
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	mci->mtype_cap = MEM_FLAG_DDR3;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+	mci->edac_cap = EDAC_FLAG_SECDED;
+	mci->mod_name = EDAC_MOD_STR;
+	mci->ctl_name = dev_name(&pdev->dev);
+	mci->scrub_mode = SCRUB_SW_SRC;
+	mci->dev_name = dev_name(&pdev->dev);
+
+	dimm = *mci->dimms;
+	dimm->nr_pages = ((mem_size - 1) >> PAGE_SHIFT) + 1;
+	dimm->grain = 8;
+	dimm->dtype = DEV_X8;
+	dimm->mtype = MEM_DDR3;
+	dimm->edac_mode = EDAC_SECDED;
+
+	ret = edac_mc_add_mc(mci);
+	if (ret < 0)
+		goto err;
+
+	ret = devm_request_irq(&pdev->dev, irq, altr_sdram_mc_err_handler,
+			       IRQF_SHARED, dev_name(&pdev->dev), mci);
+	if (ret < 0) {
+		edac_mc_printk(mci, KERN_ERR,
+			       "Unable to request irq %d\n", irq);
+		ret = -ENODEV;
+		goto err2;
+	}
+
+	if (regmap_write(regmap, S10_SYSMGR_ECC_INTMASK_CLR_OFST,
+			 S10_DDR0_IRQ_MASK)) {
+		edac_printk(KERN_ERR, EDAC_MC,
+			    "Error clearing SDRAM ECC count\n");
+		return -ENODEV;
+	}
+
+	if (regmap_update_bits(drvdata->mc_vbase, priv->ecc_irq_en_offset,
+			       priv->ecc_irq_en_mask, priv->ecc_irq_en_mask)) {
+		edac_mc_printk(mci, KERN_ERR,
+			       "Error enabling SDRAM ECC IRQ\n");
+		ret = -ENODEV;
+		goto err2;
+	}
+
+	altr_sdr_mc_create_debugfs_nodes(mci);
+
+	devres_close_group(&pdev->dev, NULL);
+
+	return 0;
+
+err2:
+	edac_mc_del_mc(&pdev->dev);
+err:
+	devres_release_group(&pdev->dev, NULL);
+free:
+	edac_mc_free(mci);
+	edac_printk(KERN_ERR, EDAC_MC,
+		    "EDAC Probe Failed; Error %d\n", ret);
+
+	return ret;
+}
+
+static int altr_s10_sdram_remove(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+	edac_mc_del_mc(&pdev->dev);
+	edac_mc_free(mci);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+/************** </Stratix10 EDAC Memory Controller Functions> ***********/
+
 /*
  * If you want to suspend, need to disable EDAC by removing it
  * from the device tree or defconfig.
@@ -508,6 +803,20 @@
 
 module_platform_driver(altr_sdram_edac_driver);
 
+static struct platform_driver altr_s10_sdram_edac_driver = {
+	.probe = altr_s10_sdram_probe,
+	.remove = altr_s10_sdram_remove,
+	.driver = {
+		.name = "altr_s10_sdram_edac",
+#ifdef CONFIG_PM
+		.pm = &altr_sdram_pm_ops,
+#endif
+		.of_match_table = altr_sdram_ctrl_of_match,
+	},
+};
+
+module_platform_driver(altr_s10_sdram_edac_driver);
+
 /************************* EDAC Parent Probe *************************/
 
 static const struct of_device_id altr_edac_device_of_match[];
@@ -1106,7 +1415,7 @@
 
 static void ocram_free_mem(void *p, size_t size, void *other)
 {
-	gen_pool_free((struct gen_pool *)other, (u32)p, size);
+	gen_pool_free((struct gen_pool *)other, (unsigned long)p, size);
 }
 
 static const struct edac_device_prv_data ocramecc_data = {
@@ -1925,6 +2234,171 @@
 };
 module_platform_driver(altr_edac_a10_driver);
 
+/************** Stratix 10 EDAC Device Controller Functions> ************/
+
+#define to_s10edac(p, m) container_of(p, struct altr_stratix10_edac, m)
+
+/*
+ * The double bit error is handled through SError which is fatal. This is
+ * called as a panic notifier to printout ECC error info as part of the panic.
+ */
+static int s10_edac_dberr_handler(struct notifier_block *this,
+				  unsigned long event, void *ptr)
+{
+	struct altr_stratix10_edac *edac = to_s10edac(this, panic_notifier);
+	int err_addr, dberror;
+
+	s10_protected_reg_read(edac, S10_SYSMGR_ECC_INTSTAT_DERR_OFST,
+			       &dberror);
+	/* Remember the UE Errors for a reboot */
+	s10_protected_reg_write(edac, S10_SYSMGR_UE_VAL_OFST, dberror);
+	if (dberror & S10_DDR0_IRQ_MASK) {
+		s10_protected_reg_read(edac, S10_DERRADDR_OFST, &err_addr);
+		/* Remember the UE Error address */
+		s10_protected_reg_write(edac, S10_SYSMGR_UE_ADDR_OFST,
+					err_addr);
+		edac_printk(KERN_ERR, EDAC_MC,
+			    "EDAC: [Uncorrectable errors @ 0x%08X]\n\n",
+			    err_addr);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static void altr_edac_s10_irq_handler(struct irq_desc *desc)
+{
+	struct altr_stratix10_edac *edac = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	int irq = irq_desc_get_irq(desc);
+	int bit, sm_offset, irq_status;
+
+	sm_offset = S10_SYSMGR_ECC_INTSTAT_SERR_OFST;
+
+	chained_irq_enter(chip, desc);
+
+	s10_protected_reg_read(NULL, sm_offset, &irq_status);
+
+	for_each_set_bit(bit, (unsigned long *)&irq_status, 32) {
+		irq = irq_linear_revmap(edac->domain, bit);
+		if (irq)
+			generic_handle_irq(irq);
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static void s10_eccmgr_irq_mask(struct irq_data *d)
+{
+	struct altr_stratix10_edac *edac = irq_data_get_irq_chip_data(d);
+
+	s10_protected_reg_write(edac, S10_SYSMGR_ECC_INTMASK_SET_OFST,
+				BIT(d->hwirq));
+}
+
+static void s10_eccmgr_irq_unmask(struct irq_data *d)
+{
+	struct altr_stratix10_edac *edac = irq_data_get_irq_chip_data(d);
+
+	s10_protected_reg_write(edac, S10_SYSMGR_ECC_INTMASK_CLR_OFST,
+				BIT(d->hwirq));
+}
+
+static int s10_eccmgr_irqdomain_map(struct irq_domain *d, unsigned int irq,
+				    irq_hw_number_t hwirq)
+{
+	struct altr_stratix10_edac *edac = d->host_data;
+
+	irq_set_chip_and_handler(irq, &edac->irq_chip, handle_simple_irq);
+	irq_set_chip_data(irq, edac);
+	irq_set_noprobe(irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops s10_eccmgr_ic_ops = {
+	.map = s10_eccmgr_irqdomain_map,
+	.xlate = irq_domain_xlate_twocell,
+};
+
+static int altr_edac_s10_probe(struct platform_device *pdev)
+{
+	struct altr_stratix10_edac *edac;
+	struct device_node *child;
+	int dberror, err_addr;
+
+	edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
+	if (!edac)
+		return -ENOMEM;
+
+	edac->dev = &pdev->dev;
+	platform_set_drvdata(pdev, edac);
+	INIT_LIST_HEAD(&edac->s10_ecc_devices);
+
+	edac->irq_chip.name = pdev->dev.of_node->name;
+	edac->irq_chip.irq_mask = s10_eccmgr_irq_mask;
+	edac->irq_chip.irq_unmask = s10_eccmgr_irq_unmask;
+	edac->domain = irq_domain_add_linear(pdev->dev.of_node, 64,
+					     &s10_eccmgr_ic_ops, edac);
+	if (!edac->domain) {
+		dev_err(&pdev->dev, "Error adding IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	edac->sb_irq = platform_get_irq(pdev, 0);
+	if (edac->sb_irq < 0) {
+		dev_err(&pdev->dev, "No SBERR IRQ resource\n");
+		return edac->sb_irq;
+	}
+
+	irq_set_chained_handler_and_data(edac->sb_irq,
+					 altr_edac_s10_irq_handler,
+					 edac);
+
+	edac->panic_notifier.notifier_call = s10_edac_dberr_handler;
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &edac->panic_notifier);
+
+	/* Printout a message if uncorrectable error previously. */
+	s10_protected_reg_read(edac, S10_SYSMGR_UE_VAL_OFST, &dberror);
+	if (dberror) {
+		s10_protected_reg_read(edac, S10_SYSMGR_UE_ADDR_OFST,
+				       &err_addr);
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "Previous Boot UE detected[0x%X] @ 0x%X\n",
+			    dberror, err_addr);
+		/* Reset the sticky registers */
+		s10_protected_reg_write(edac, S10_SYSMGR_UE_VAL_OFST, 0);
+		s10_protected_reg_write(edac, S10_SYSMGR_UE_ADDR_OFST, 0);
+	}
+
+	for_each_child_of_node(pdev->dev.of_node, child) {
+		if (!of_device_is_available(child))
+			continue;
+
+		if (of_device_is_compatible(child, "altr,sdram-edac-s10"))
+			of_platform_populate(pdev->dev.of_node,
+					     altr_sdram_ctrl_of_match,
+					     NULL, &pdev->dev);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id altr_edac_s10_of_match[] = {
+	{ .compatible = "altr,socfpga-s10-ecc-manager" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, altr_edac_s10_of_match);
+
+static struct platform_driver altr_edac_s10_driver = {
+	.probe =  altr_edac_s10_probe,
+	.driver = {
+		.name = "socfpga_s10_ecc_manager",
+		.of_match_table = altr_edac_s10_of_match,
+	},
+};
+module_platform_driver(altr_edac_s10_driver);
+
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Thor Thayer");
 MODULE_DESCRIPTION("EDAC Driver for Altera Memories");
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index cbc9629..81f0554 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -1,23 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- *
+ * Copyright (C) 2017-2018, Intel Corporation
  * Copyright (C) 2015 Altera Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _ALTERA_EDAC_H
 #define _ALTERA_EDAC_H
 
+#include <linux/arm-smccc.h>
 #include <linux/edac.h>
 #include <linux/types.h>
 
@@ -94,6 +84,7 @@
 /* SDRAM Controller Address Width Register */
 #define CV_DRAMADDRW               0xFFC2502C
 #define A10_DRAMADDRW              0xFFCFA0A8
+#define S10_DRAMADDRW              0xF80110E0
 
 /* SDRAM Controller Address Widths Field Register */
 #define DRAMADDRW_COLBIT_MASK      0x001F
@@ -115,6 +106,7 @@
 /* SDRAM Controller Interface Data Width Register */
 #define CV_DRAMIFWIDTH             0xFFC25030
 #define A10_DRAMIFWIDTH            0xFFCFB008
+#define S10_DRAMIFWIDTH            0xF8011008
 
 /* SDRAM Controller Interface Data Width Defines */
 #define CV_DRAMIFWIDTH_16B_ECC     24
@@ -164,6 +156,34 @@
 #define A10_INTMASK_CLR_OFST       0x10
 #define A10_DDR0_IRQ_MASK          BIT(17)
 
+/************* Stratix10 Defines **************/
+
+/* SDRAM Controller EccCtrl Register */
+#define S10_ECCCTRL1_OFST          0xF8011100
+
+/* SDRAM Controller DRAM IRQ Register */
+#define S10_ERRINTEN_OFST          0xF8011110
+
+/* SDRAM Interrupt Mode Register */
+#define S10_INTMODE_OFST           0xF801111C
+
+/* SDRAM Controller Error Status Register */
+#define S10_INTSTAT_OFST           0xF8011120
+
+/* SDRAM Controller ECC Error Address Register */
+#define S10_DERRADDR_OFST          0xF801112C
+#define S10_SERRADDR_OFST          0xF8011130
+
+/* SDRAM Controller ECC Diagnostic Register */
+#define S10_DIAGINTTEST_OFST       0xF8011124
+
+/* SDRAM Single Bit Error Count Compare Set Register */
+#define S10_SERRCNTREG_OFST        0xF801113C
+
+/* Sticky registers for Uncorrected Errors */
+#define S10_SYSMGR_UE_VAL_OFST     0xFFD12220
+#define S10_SYSMGR_UE_ADDR_OFST    0xFFD12224
+
 struct altr_sdram_prv_data {
 	int ecc_ctrl_offset;
 	int ecc_ctl_en_mask;
@@ -296,6 +316,18 @@
 /* A10 ECC Controller memory initialization timeout */
 #define ALTR_A10_ECC_INIT_WATCHDOG_10US      10000
 
+/************* Stratix10 Defines **************/
+
+/* Stratix10 ECC Manager Defines */
+#define S10_SYSMGR_ECC_INTMASK_VAL_OFST   0xFFD12090
+#define S10_SYSMGR_ECC_INTMASK_SET_OFST   0xFFD12094
+#define S10_SYSMGR_ECC_INTMASK_CLR_OFST   0xFFD12098
+
+#define S10_SYSMGR_ECC_INTSTAT_SERR_OFST  0xFFD1209C
+#define S10_SYSMGR_ECC_INTSTAT_DERR_OFST  0xFFD120A0
+
+#define S10_DDR0_IRQ_MASK                 BIT(16)
+
 struct altr_edac_device_dev;
 
 struct edac_device_prv_data {
@@ -340,4 +372,78 @@
 	struct list_head	a10_ecc_devices;
 };
 
+/*
+ * Functions specified by ARM SMC Calling convention:
+ *
+ * FAST call executes atomic operations, returns when the requested operation
+ * has completed.
+ * STD call starts a operation which can be preempted by a non-secure
+ * interrupt. The call can return before the requested operation has
+ * completed.
+ *
+ * a0..a7 is used as register names in the descriptions below, on arm32
+ * that translates to r0..r7 and on arm64 to w0..w7.
+ */
+
+#define INTEL_SIP_SMC_STD_CALL_VAL(func_num) \
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_STD_CALL, ARM_SMCCC_SMC_64, \
+	ARM_SMCCC_OWNER_SIP, (func_num))
+
+#define INTEL_SIP_SMC_FAST_CALL_VAL(func_num) \
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64, \
+	ARM_SMCCC_OWNER_SIP, (func_num))
+
+#define INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION		0xFFFFFFFF
+#define INTEL_SIP_SMC_STATUS_OK				0x0
+#define INTEL_SIP_SMC_REG_ERROR				0x5
+
+/*
+ * Request INTEL_SIP_SMC_REG_READ
+ *
+ * Read a protected register using SMCCC
+ *
+ * Call register usage:
+ * a0: INTEL_SIP_SMC_REG_READ.
+ * a1: register address.
+ * a2-7: not used.
+ *
+ * Return status:
+ * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_REG_ERROR, or
+ *     INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION
+ * a1: Value in the register
+ * a2-3: not used.
+ */
+#define INTEL_SIP_SMC_FUNCID_REG_READ 7
+#define INTEL_SIP_SMC_REG_READ \
+	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_READ)
+
+/*
+ * Request INTEL_SIP_SMC_REG_WRITE
+ *
+ * Write a protected register using SMCCC
+ *
+ * Call register usage:
+ * a0: INTEL_SIP_SMC_REG_WRITE.
+ * a1: register address
+ * a2: value to program into register.
+ * a3-7: not used.
+ *
+ * Return status:
+ * a0: INTEL_SIP_SMC_STATUS_OK, INTEL_SIP_SMC_REG_ERROR, or
+ *     INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION
+ * a1-3: not used.
+ */
+#define INTEL_SIP_SMC_FUNCID_REG_WRITE 8
+#define INTEL_SIP_SMC_REG_WRITE \
+	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE)
+
+struct altr_stratix10_edac {
+	struct device		*dev;
+	int sb_irq;
+	struct irq_domain	*domain;
+	struct irq_chip		irq_chip;
+	struct list_head	s10_ecc_devices;
+	struct notifier_block	panic_notifier;
+};
+
 #endif	/* #ifndef _ALTERA_EDAC_H */
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 68b6ee1..473aeec 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -91,6 +91,7 @@
 		struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
 						       mci->n_layers,
 						       dimm_fill->count, 0, 0);
+		u16 rdr_mask = BIT(7) | BIT(13);
 
 		if (entry->size == 0xffff) {
 			pr_info("Can't get DIMM%i size\n",
@@ -99,22 +100,21 @@
 		} else if (entry->size == 0x7fff) {
 			dimm->nr_pages = MiB_TO_PAGES(entry->extended_size);
 		} else {
-			if (entry->size & 1 << 15)
-				dimm->nr_pages = MiB_TO_PAGES((entry->size &
-							       0x7fff) << 10);
+			if (entry->size & BIT(15))
+				dimm->nr_pages = MiB_TO_PAGES((entry->size & 0x7fff) << 10);
 			else
 				dimm->nr_pages = MiB_TO_PAGES(entry->size);
 		}
 
 		switch (entry->memory_type) {
 		case 0x12:
-			if (entry->type_detail & 1 << 13)
+			if (entry->type_detail & BIT(13))
 				dimm->mtype = MEM_RDDR;
 			else
 				dimm->mtype = MEM_DDR;
 			break;
 		case 0x13:
-			if (entry->type_detail & 1 << 13)
+			if (entry->type_detail & BIT(13))
 				dimm->mtype = MEM_RDDR2;
 			else
 				dimm->mtype = MEM_DDR2;
@@ -123,20 +123,29 @@
 			dimm->mtype = MEM_FB_DDR2;
 			break;
 		case 0x18:
-			if (entry->type_detail & 1 << 13)
+			if (entry->type_detail & BIT(12))
+				dimm->mtype = MEM_NVDIMM;
+			else if (entry->type_detail & BIT(13))
 				dimm->mtype = MEM_RDDR3;
 			else
 				dimm->mtype = MEM_DDR3;
 			break;
+		case 0x1a:
+			if (entry->type_detail & BIT(12))
+				dimm->mtype = MEM_NVDIMM;
+			else if (entry->type_detail & BIT(13))
+				dimm->mtype = MEM_RDDR4;
+			else
+				dimm->mtype = MEM_DDR4;
+			break;
 		default:
-			if (entry->type_detail & 1 << 6)
+			if (entry->type_detail & BIT(6))
 				dimm->mtype = MEM_RMBS;
-			else if ((entry->type_detail & ((1 << 7) | (1 << 13)))
-				 == ((1 << 7) | (1 << 13)))
+			else if ((entry->type_detail & rdr_mask) == rdr_mask)
 				dimm->mtype = MEM_RDR;
-			else if (entry->type_detail & 1 << 7)
+			else if (entry->type_detail & BIT(7))
 				dimm->mtype = MEM_SDR;
-			else if (entry->type_detail & 1 << 9)
+			else if (entry->type_detail & BIT(9))
 				dimm->mtype = MEM_EDO;
 			else
 				dimm->mtype = MEM_UNKNOWN;
@@ -172,8 +181,7 @@
 	}
 }
 
-void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
-				struct cper_sec_mem_err *mem_err)
+void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 {
 	enum hw_event_mc_err_type type;
 	struct edac_raw_error_desc *e;
@@ -183,10 +191,8 @@
 	char *p;
 	u8 grain_bits;
 
-	if (!pvt) {
-		pr_err("Internal error: Can't find EDAC structure\n");
+	if (!pvt)
 		return;
-	}
 
 	/*
 	 * We can do the locking below because GHES defers error processing
@@ -434,12 +440,16 @@
 	struct mem_ctl_info *mci;
 	struct edac_mc_layer layers[1];
 	struct ghes_edac_dimm_fill dimm_fill;
-	int idx;
+	int idx = -1;
 
-	/* Check if safe to enable on this system */
-	idx = acpi_match_platform_list(plat_list);
-	if (!force_load && idx < 0)
-		return 0;
+	if (IS_ENABLED(CONFIG_X86)) {
+		/* Check if safe to enable on this system */
+		idx = acpi_match_platform_list(plat_list);
+		if (!force_load && idx < 0)
+			return -ENODEV;
+	} else {
+		idx = 0;
+	}
 
 	/*
 	 * We have only one logical memory controller to which all DIMMs belong.
@@ -519,6 +529,9 @@
 {
 	struct mem_ctl_info *mci;
 
+	if (!ghes_pvt)
+		return;
+
 	mci = ghes_pvt->mci;
 	edac_mc_del_mc(mci->pdev);
 	edac_mc_free(mci);
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 8c55401..4d0ea35 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1743,7 +1743,7 @@
 		err = "write parity error";
 		break;
 	case 19:
-		err = "redundacy loss";
+		err = "redundancy loss";
 		break;
 	case 20:
 		err = "reserved";
diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
index 939d259..7db234d 100644
--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@@ -112,8 +112,7 @@
 {
 	struct fw_node *node;
 
-	node = kzalloc(sizeof(*node) + port_count * sizeof(node->ports[0]),
-		       GFP_ATOMIC);
+	node = kzalloc(struct_size(node, ports, port_count), GFP_ATOMIC);
 	if (node == NULL)
 		return NULL;
 
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 3098410..781a4a3 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -174,6 +174,11 @@
 	depends on UEFI_CPER && ( ARM || ARM64 )
 	default y
 
+config UEFI_CPER_X86
+	bool
+	depends on UEFI_CPER && X86
+	default y
+
 config EFI_DEV_PATH_PARSER
 	bool
 	depends on ACPI
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index cb80537..5f9f503 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -31,3 +31,4 @@
 obj-$(CONFIG_ARM64)			+= $(arm-obj-y)
 obj-$(CONFIG_EFI_CAPSULE_LOADER)	+= capsule-loader.o
 obj-$(CONFIG_UEFI_CPER_ARM)		+= cper-arm.o
+obj-$(CONFIG_UEFI_CPER_X86)		+= cper-x86.o
diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
index adaa9a3..60a9571 100644
--- a/drivers/firmware/efi/apple-properties.c
+++ b/drivers/firmware/efi/apple-properties.c
@@ -13,6 +13,9 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note, all properties are considered as u8 arrays.
+ * To get a value of any of them the caller must use device_property_read_u8_array().
  */
 
 #define pr_fmt(fmt) "apple-properties: " fmt
@@ -96,12 +99,13 @@
 		entry[i].name = key;
 		entry[i].length = val_len - sizeof(val_len);
 		entry[i].is_array = !!entry[i].length;
-		entry[i].pointer.raw_data = ptr + key_len + sizeof(val_len);
+		entry[i].type = DEV_PROP_U8;
+		entry[i].pointer.u8_data = ptr + key_len + sizeof(val_len);
 
 		if (dump_properties) {
 			dev_info(dev, "property: %s\n", entry[i].name);
 			print_hex_dump(KERN_INFO, pr_fmt(), DUMP_PREFIX_OFFSET,
-				16, 1, entry[i].pointer.raw_data,
+				16, 1, entry[i].pointer.u8_data,
 				entry[i].length, true);
 		}
 
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index e456f46..9668898 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -134,10 +134,16 @@
 
 	/* Indicate capsule binary uploading is done */
 	cap_info->index = NO_FURTHER_WRITE_ACTION;
-	pr_info("Successfully upload capsule file with reboot type '%s'\n",
-		!cap_info->reset_type ? "RESET_COLD" :
-		cap_info->reset_type == 1 ? "RESET_WARM" :
-		"RESET_SHUTDOWN");
+
+	if (cap_info->header.flags & EFI_CAPSULE_PERSIST_ACROSS_RESET) {
+		pr_info("Successfully uploaded capsule file with reboot type '%s'\n",
+			!cap_info->reset_type ? "RESET_COLD" :
+			cap_info->reset_type == 1 ? "RESET_WARM" :
+			"RESET_SHUTDOWN");
+	} else {
+		pr_info("Successfully processed capsule file\n");
+	}
+
 	return 0;
 }
 
diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c
index 698e5c8..5028113 100644
--- a/drivers/firmware/efi/cper-arm.c
+++ b/drivers/firmware/efi/cper-arm.c
@@ -30,8 +30,6 @@
 #include <acpi/ghes.h>
 #include <ras/ras_event.h>
 
-#define INDENT_SP	" "
-
 static const char * const arm_reg_ctx_strs[] = {
 	"AArch32 general purpose registers",
 	"AArch32 EL1 context registers",
@@ -283,7 +281,7 @@
 			pfx, proc->psci_state);
 	}
 
-	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
+	snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
 
 	err_info = (struct cper_arm_err_info *)(proc + 1);
 	for (i = 0; i < proc->err_info_num; i++) {
@@ -310,7 +308,7 @@
 		if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO) {
 			printk("%serror_info: 0x%016llx\n", newpfx,
 			       err_info->error_info);
-			snprintf(infopfx, sizeof(infopfx), "%s%s", newpfx, INDENT_SP);
+			snprintf(infopfx, sizeof(infopfx), "%s ", newpfx);
 			cper_print_arm_err_info(infopfx, err_info->type,
 						err_info->error_info);
 		}
diff --git a/drivers/firmware/efi/cper-x86.c b/drivers/firmware/efi/cper-x86.c
new file mode 100644
index 0000000..2531de4
--- /dev/null
+++ b/drivers/firmware/efi/cper-x86.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018, Advanced Micro Devices, Inc.
+
+#include <linux/cper.h>
+
+/*
+ * We don't need a "CPER_IA" prefix since these are all locally defined.
+ * This will save us a lot of line space.
+ */
+#define VALID_LAPIC_ID			BIT_ULL(0)
+#define VALID_CPUID_INFO		BIT_ULL(1)
+#define VALID_PROC_ERR_INFO_NUM(bits)	(((bits) & GENMASK_ULL(7, 2)) >> 2)
+#define VALID_PROC_CXT_INFO_NUM(bits)	(((bits) & GENMASK_ULL(13, 8)) >> 8)
+
+#define INFO_ERR_STRUCT_TYPE_CACHE					\
+	GUID_INIT(0xA55701F5, 0xE3EF, 0x43DE, 0xAC, 0x72, 0x24, 0x9B,	\
+		  0x57, 0x3F, 0xAD, 0x2C)
+#define INFO_ERR_STRUCT_TYPE_TLB					\
+	GUID_INIT(0xFC06B535, 0x5E1F, 0x4562, 0x9F, 0x25, 0x0A, 0x3B,	\
+		  0x9A, 0xDB, 0x63, 0xC3)
+#define INFO_ERR_STRUCT_TYPE_BUS					\
+	GUID_INIT(0x1CF3F8B3, 0xC5B1, 0x49a2, 0xAA, 0x59, 0x5E, 0xEF,	\
+		  0x92, 0xFF, 0xA6, 0x3C)
+#define INFO_ERR_STRUCT_TYPE_MS						\
+	GUID_INIT(0x48AB7F57, 0xDC34, 0x4f6c, 0xA7, 0xD3, 0xB0, 0xB5,	\
+		  0xB0, 0xA7, 0x43, 0x14)
+
+#define INFO_VALID_CHECK_INFO		BIT_ULL(0)
+#define INFO_VALID_TARGET_ID		BIT_ULL(1)
+#define INFO_VALID_REQUESTOR_ID		BIT_ULL(2)
+#define INFO_VALID_RESPONDER_ID		BIT_ULL(3)
+#define INFO_VALID_IP			BIT_ULL(4)
+
+#define CHECK_VALID_TRANS_TYPE		BIT_ULL(0)
+#define CHECK_VALID_OPERATION		BIT_ULL(1)
+#define CHECK_VALID_LEVEL		BIT_ULL(2)
+#define CHECK_VALID_PCC			BIT_ULL(3)
+#define CHECK_VALID_UNCORRECTED		BIT_ULL(4)
+#define CHECK_VALID_PRECISE_IP		BIT_ULL(5)
+#define CHECK_VALID_RESTARTABLE_IP	BIT_ULL(6)
+#define CHECK_VALID_OVERFLOW		BIT_ULL(7)
+
+#define CHECK_VALID_BUS_PART_TYPE	BIT_ULL(8)
+#define CHECK_VALID_BUS_TIME_OUT	BIT_ULL(9)
+#define CHECK_VALID_BUS_ADDR_SPACE	BIT_ULL(10)
+
+#define CHECK_VALID_BITS(check)		(((check) & GENMASK_ULL(15, 0)))
+#define CHECK_TRANS_TYPE(check)		(((check) & GENMASK_ULL(17, 16)) >> 16)
+#define CHECK_OPERATION(check)		(((check) & GENMASK_ULL(21, 18)) >> 18)
+#define CHECK_LEVEL(check)		(((check) & GENMASK_ULL(24, 22)) >> 22)
+#define CHECK_PCC			BIT_ULL(25)
+#define CHECK_UNCORRECTED		BIT_ULL(26)
+#define CHECK_PRECISE_IP		BIT_ULL(27)
+#define CHECK_RESTARTABLE_IP		BIT_ULL(28)
+#define CHECK_OVERFLOW			BIT_ULL(29)
+
+#define CHECK_BUS_PART_TYPE(check)	(((check) & GENMASK_ULL(31, 30)) >> 30)
+#define CHECK_BUS_TIME_OUT		BIT_ULL(32)
+#define CHECK_BUS_ADDR_SPACE(check)	(((check) & GENMASK_ULL(34, 33)) >> 33)
+
+#define CHECK_VALID_MS_ERR_TYPE		BIT_ULL(0)
+#define CHECK_VALID_MS_PCC		BIT_ULL(1)
+#define CHECK_VALID_MS_UNCORRECTED	BIT_ULL(2)
+#define CHECK_VALID_MS_PRECISE_IP	BIT_ULL(3)
+#define CHECK_VALID_MS_RESTARTABLE_IP	BIT_ULL(4)
+#define CHECK_VALID_MS_OVERFLOW		BIT_ULL(5)
+
+#define CHECK_MS_ERR_TYPE(check)	(((check) & GENMASK_ULL(18, 16)) >> 16)
+#define CHECK_MS_PCC			BIT_ULL(19)
+#define CHECK_MS_UNCORRECTED		BIT_ULL(20)
+#define CHECK_MS_PRECISE_IP		BIT_ULL(21)
+#define CHECK_MS_RESTARTABLE_IP		BIT_ULL(22)
+#define CHECK_MS_OVERFLOW		BIT_ULL(23)
+
+#define CTX_TYPE_MSR			1
+#define CTX_TYPE_MMREG			7
+
+enum err_types {
+	ERR_TYPE_CACHE = 0,
+	ERR_TYPE_TLB,
+	ERR_TYPE_BUS,
+	ERR_TYPE_MS,
+	N_ERR_TYPES
+};
+
+static enum err_types cper_get_err_type(const guid_t *err_type)
+{
+	if (guid_equal(err_type, &INFO_ERR_STRUCT_TYPE_CACHE))
+		return ERR_TYPE_CACHE;
+	else if (guid_equal(err_type, &INFO_ERR_STRUCT_TYPE_TLB))
+		return ERR_TYPE_TLB;
+	else if (guid_equal(err_type, &INFO_ERR_STRUCT_TYPE_BUS))
+		return ERR_TYPE_BUS;
+	else if (guid_equal(err_type, &INFO_ERR_STRUCT_TYPE_MS))
+		return ERR_TYPE_MS;
+	else
+		return N_ERR_TYPES;
+}
+
+static const char * const ia_check_trans_type_strs[] = {
+	"Instruction",
+	"Data Access",
+	"Generic",
+};
+
+static const char * const ia_check_op_strs[] = {
+	"generic error",
+	"generic read",
+	"generic write",
+	"data read",
+	"data write",
+	"instruction fetch",
+	"prefetch",
+	"eviction",
+	"snoop",
+};
+
+static const char * const ia_check_bus_part_type_strs[] = {
+	"Local Processor originated request",
+	"Local Processor responded to request",
+	"Local Processor observed",
+	"Generic",
+};
+
+static const char * const ia_check_bus_addr_space_strs[] = {
+	"Memory Access",
+	"Reserved",
+	"I/O",
+	"Other Transaction",
+};
+
+static const char * const ia_check_ms_error_type_strs[] = {
+	"No Error",
+	"Unclassified",
+	"Microcode ROM Parity Error",
+	"External Error",
+	"FRC Error",
+	"Internal Unclassified",
+};
+
+static const char * const ia_reg_ctx_strs[] = {
+	"Unclassified Data",
+	"MSR Registers (Machine Check and other MSRs)",
+	"32-bit Mode Execution Context",
+	"64-bit Mode Execution Context",
+	"FXSAVE Context",
+	"32-bit Mode Debug Registers (DR0-DR7)",
+	"64-bit Mode Debug Registers (DR0-DR7)",
+	"Memory Mapped Registers",
+};
+
+static inline void print_bool(char *str, const char *pfx, u64 check, u64 bit)
+{
+	printk("%s%s: %s\n", pfx, str, (check & bit) ? "true" : "false");
+}
+
+static void print_err_info_ms(const char *pfx, u16 validation_bits, u64 check)
+{
+	if (validation_bits & CHECK_VALID_MS_ERR_TYPE) {
+		u8 err_type = CHECK_MS_ERR_TYPE(check);
+
+		printk("%sError Type: %u, %s\n", pfx, err_type,
+		       err_type < ARRAY_SIZE(ia_check_ms_error_type_strs) ?
+		       ia_check_ms_error_type_strs[err_type] : "unknown");
+	}
+
+	if (validation_bits & CHECK_VALID_MS_PCC)
+		print_bool("Processor Context Corrupt", pfx, check, CHECK_MS_PCC);
+
+	if (validation_bits & CHECK_VALID_MS_UNCORRECTED)
+		print_bool("Uncorrected", pfx, check, CHECK_MS_UNCORRECTED);
+
+	if (validation_bits & CHECK_VALID_MS_PRECISE_IP)
+		print_bool("Precise IP", pfx, check, CHECK_MS_PRECISE_IP);
+
+	if (validation_bits & CHECK_VALID_MS_RESTARTABLE_IP)
+		print_bool("Restartable IP", pfx, check, CHECK_MS_RESTARTABLE_IP);
+
+	if (validation_bits & CHECK_VALID_MS_OVERFLOW)
+		print_bool("Overflow", pfx, check, CHECK_MS_OVERFLOW);
+}
+
+static void print_err_info(const char *pfx, u8 err_type, u64 check)
+{
+	u16 validation_bits = CHECK_VALID_BITS(check);
+
+	/*
+	 * The MS Check structure varies a lot from the others, so use a
+	 * separate function for decoding.
+	 */
+	if (err_type == ERR_TYPE_MS)
+		return print_err_info_ms(pfx, validation_bits, check);
+
+	if (validation_bits & CHECK_VALID_TRANS_TYPE) {
+		u8 trans_type = CHECK_TRANS_TYPE(check);
+
+		printk("%sTransaction Type: %u, %s\n", pfx, trans_type,
+		       trans_type < ARRAY_SIZE(ia_check_trans_type_strs) ?
+		       ia_check_trans_type_strs[trans_type] : "unknown");
+	}
+
+	if (validation_bits & CHECK_VALID_OPERATION) {
+		u8 op = CHECK_OPERATION(check);
+
+		/*
+		 * CACHE has more operation types than TLB or BUS, though the
+		 * name and the order are the same.
+		 */
+		u8 max_ops = (err_type == ERR_TYPE_CACHE) ? 9 : 7;
+
+		printk("%sOperation: %u, %s\n", pfx, op,
+		       op < max_ops ? ia_check_op_strs[op] : "unknown");
+	}
+
+	if (validation_bits & CHECK_VALID_LEVEL)
+		printk("%sLevel: %llu\n", pfx, CHECK_LEVEL(check));
+
+	if (validation_bits & CHECK_VALID_PCC)
+		print_bool("Processor Context Corrupt", pfx, check, CHECK_PCC);
+
+	if (validation_bits & CHECK_VALID_UNCORRECTED)
+		print_bool("Uncorrected", pfx, check, CHECK_UNCORRECTED);
+
+	if (validation_bits & CHECK_VALID_PRECISE_IP)
+		print_bool("Precise IP", pfx, check, CHECK_PRECISE_IP);
+
+	if (validation_bits & CHECK_VALID_RESTARTABLE_IP)
+		print_bool("Restartable IP", pfx, check, CHECK_RESTARTABLE_IP);
+
+	if (validation_bits & CHECK_VALID_OVERFLOW)
+		print_bool("Overflow", pfx, check, CHECK_OVERFLOW);
+
+	if (err_type != ERR_TYPE_BUS)
+		return;
+
+	if (validation_bits & CHECK_VALID_BUS_PART_TYPE) {
+		u8 part_type = CHECK_BUS_PART_TYPE(check);
+
+		printk("%sParticipation Type: %u, %s\n", pfx, part_type,
+		       part_type < ARRAY_SIZE(ia_check_bus_part_type_strs) ?
+		       ia_check_bus_part_type_strs[part_type] : "unknown");
+	}
+
+	if (validation_bits & CHECK_VALID_BUS_TIME_OUT)
+		print_bool("Time Out", pfx, check, CHECK_BUS_TIME_OUT);
+
+	if (validation_bits & CHECK_VALID_BUS_ADDR_SPACE) {
+		u8 addr_space = CHECK_BUS_ADDR_SPACE(check);
+
+		printk("%sAddress Space: %u, %s\n", pfx, addr_space,
+		       addr_space < ARRAY_SIZE(ia_check_bus_addr_space_strs) ?
+		       ia_check_bus_addr_space_strs[addr_space] : "unknown");
+	}
+}
+
+void cper_print_proc_ia(const char *pfx, const struct cper_sec_proc_ia *proc)
+{
+	int i;
+	struct cper_ia_err_info *err_info;
+	struct cper_ia_proc_ctx *ctx_info;
+	char newpfx[64], infopfx[64];
+	u8 err_type;
+
+	if (proc->validation_bits & VALID_LAPIC_ID)
+		printk("%sLocal APIC_ID: 0x%llx\n", pfx, proc->lapic_id);
+
+	if (proc->validation_bits & VALID_CPUID_INFO) {
+		printk("%sCPUID Info:\n", pfx);
+		print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, proc->cpuid,
+			       sizeof(proc->cpuid), 0);
+	}
+
+	snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
+
+	err_info = (struct cper_ia_err_info *)(proc + 1);
+	for (i = 0; i < VALID_PROC_ERR_INFO_NUM(proc->validation_bits); i++) {
+		printk("%sError Information Structure %d:\n", pfx, i);
+
+		err_type = cper_get_err_type(&err_info->err_type);
+		printk("%sError Structure Type: %s\n", newpfx,
+		       err_type < ARRAY_SIZE(cper_proc_error_type_strs) ?
+		       cper_proc_error_type_strs[err_type] : "unknown");
+
+		if (err_type >= N_ERR_TYPES) {
+			printk("%sError Structure Type: %pUl\n", newpfx,
+			       &err_info->err_type);
+		}
+
+		if (err_info->validation_bits & INFO_VALID_CHECK_INFO) {
+			printk("%sCheck Information: 0x%016llx\n", newpfx,
+			       err_info->check_info);
+
+			if (err_type < N_ERR_TYPES) {
+				snprintf(infopfx, sizeof(infopfx), "%s ",
+					 newpfx);
+
+				print_err_info(infopfx, err_type,
+					       err_info->check_info);
+			}
+		}
+
+		if (err_info->validation_bits & INFO_VALID_TARGET_ID) {
+			printk("%sTarget Identifier: 0x%016llx\n",
+			       newpfx, err_info->target_id);
+		}
+
+		if (err_info->validation_bits & INFO_VALID_REQUESTOR_ID) {
+			printk("%sRequestor Identifier: 0x%016llx\n",
+			       newpfx, err_info->requestor_id);
+		}
+
+		if (err_info->validation_bits & INFO_VALID_RESPONDER_ID) {
+			printk("%sResponder Identifier: 0x%016llx\n",
+			       newpfx, err_info->responder_id);
+		}
+
+		if (err_info->validation_bits & INFO_VALID_IP) {
+			printk("%sInstruction Pointer: 0x%016llx\n",
+			       newpfx, err_info->ip);
+		}
+
+		err_info++;
+	}
+
+	ctx_info = (struct cper_ia_proc_ctx *)err_info;
+	for (i = 0; i < VALID_PROC_CXT_INFO_NUM(proc->validation_bits); i++) {
+		int size = sizeof(*ctx_info) + ctx_info->reg_arr_size;
+		int groupsize = 4;
+
+		printk("%sContext Information Structure %d:\n", pfx, i);
+
+		printk("%sRegister Context Type: %s\n", newpfx,
+		       ctx_info->reg_ctx_type < ARRAY_SIZE(ia_reg_ctx_strs) ?
+		       ia_reg_ctx_strs[ctx_info->reg_ctx_type] : "unknown");
+
+		printk("%sRegister Array Size: 0x%04x\n", newpfx,
+		       ctx_info->reg_arr_size);
+
+		if (ctx_info->reg_ctx_type == CTX_TYPE_MSR) {
+			groupsize = 8; /* MSRs are 8 bytes wide. */
+			printk("%sMSR Address: 0x%08x\n", newpfx,
+			       ctx_info->msr_addr);
+		}
+
+		if (ctx_info->reg_ctx_type == CTX_TYPE_MMREG) {
+			printk("%sMM Register Address: 0x%016llx\n", newpfx,
+			       ctx_info->mm_reg_addr);
+		}
+
+		printk("%sRegister Array:\n", newpfx);
+		print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, groupsize,
+			       (ctx_info + 1), ctx_info->reg_arr_size, 0);
+
+		ctx_info = (struct cper_ia_proc_ctx *)((long)ctx_info + size);
+	}
+}
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index c165933..3bf0dca 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -37,8 +37,6 @@
 #include <acpi/ghes.h>
 #include <ras/ras_event.h>
 
-#define INDENT_SP	" "
-
 static char rcd_decode_str[CPER_REC_LEN];
 
 /*
@@ -433,7 +431,7 @@
 	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
 		printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
 
-	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
+	snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
 	if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
 		struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
 
@@ -470,6 +468,16 @@
 		else
 			goto err_section_too_small;
 #endif
+#if defined(CONFIG_UEFI_CPER_X86)
+	} else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
+		struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata);
+
+		printk("%ssection_type: IA32/X64 processor error\n", newpfx);
+		if (gdata->error_data_length >= sizeof(*ia_err))
+			cper_print_proc_ia(newpfx, ia_err);
+		else
+			goto err_section_too_small;
+#endif
 	} else {
 		const void *err = acpi_hest_get_payload(gdata);
 
@@ -500,7 +508,7 @@
 		       "It has been corrected by h/w "
 		       "and requires no further action");
 	printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
-	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
+	snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
 
 	apei_estatus_for_each_section(estatus, gdata) {
 		cper_estatus_print_section(newpfx, gdata, sec_no);
diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
index 8f07eb4..72d9dfb 100644
--- a/drivers/firmware/efi/libstub/secureboot.c
+++ b/drivers/firmware/efi/libstub/secureboot.c
@@ -30,6 +30,9 @@
 
 /*
  * Determine whether we're in secure boot mode.
+ *
+ * Please keep the logic in sync with
+ * arch/x86/xen/efi.c:xen_efi_get_secureboot().
  */
 enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg)
 {
diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c
index 9d08cea..caa37a6 100644
--- a/drivers/firmware/efi/libstub/tpm.c
+++ b/drivers/firmware/efi/libstub/tpm.c
@@ -59,7 +59,7 @@
 
 #endif
 
-void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg)
+static void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg)
 {
 	efi_guid_t tcg2_guid = EFI_TCG2_PROTOCOL_GUID;
 	efi_guid_t linux_eventlog_guid = LINUX_EFI_TPM_EVENT_LOG_GUID;
diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig
index f16b381..a456a00 100644
--- a/drivers/firmware/google/Kconfig
+++ b/drivers/firmware/google/Kconfig
@@ -55,6 +55,14 @@
 	  the EBDA on Google servers.  If found, this log is exported to
 	  userland in the file /sys/firmware/log.
 
+config GOOGLE_FRAMEBUFFER_COREBOOT
+	tristate "Coreboot Framebuffer"
+	depends on FB_SIMPLE
+	depends on GOOGLE_COREBOOT_TABLE
+	help
+	  This option enables the kernel to search for a framebuffer in
+	  the coreboot table.  If found, it is registered with simplefb.
+
 config GOOGLE_MEMCONSOLE_COREBOOT
 	tristate "Firmware Memory Console"
 	depends on GOOGLE_COREBOOT_TABLE
diff --git a/drivers/firmware/google/Makefile b/drivers/firmware/google/Makefile
index dcd3675..d0b3fba 100644
--- a/drivers/firmware/google/Makefile
+++ b/drivers/firmware/google/Makefile
@@ -4,6 +4,7 @@
 obj-$(CONFIG_GOOGLE_COREBOOT_TABLE)        += coreboot_table.o
 obj-$(CONFIG_GOOGLE_COREBOOT_TABLE_ACPI)   += coreboot_table-acpi.o
 obj-$(CONFIG_GOOGLE_COREBOOT_TABLE_OF)     += coreboot_table-of.o
+obj-$(CONFIG_GOOGLE_FRAMEBUFFER_COREBOOT)  += framebuffer-coreboot.o
 obj-$(CONFIG_GOOGLE_MEMCONSOLE)            += memconsole.o
 obj-$(CONFIG_GOOGLE_MEMCONSOLE_COREBOOT)   += memconsole-coreboot.o
 obj-$(CONFIG_GOOGLE_MEMCONSOLE_X86_LEGACY) += memconsole-x86-legacy.o
diff --git a/drivers/firmware/google/coreboot_table-acpi.c b/drivers/firmware/google/coreboot_table-acpi.c
index fb98db2..77197fe 100644
--- a/drivers/firmware/google/coreboot_table-acpi.c
+++ b/drivers/firmware/google/coreboot_table-acpi.c
@@ -53,7 +53,7 @@
 	if (!ptr)
 		return -ENOMEM;
 
-	return coreboot_table_init(ptr);
+	return coreboot_table_init(&pdev->dev, ptr);
 }
 
 static int coreboot_table_acpi_remove(struct platform_device *pdev)
diff --git a/drivers/firmware/google/coreboot_table-of.c b/drivers/firmware/google/coreboot_table-of.c
index 727acdc..f15bf40 100644
--- a/drivers/firmware/google/coreboot_table-of.c
+++ b/drivers/firmware/google/coreboot_table-of.c
@@ -34,7 +34,7 @@
 	if (!ptr)
 		return -ENOMEM;
 
-	return coreboot_table_init(ptr);
+	return coreboot_table_init(&pdev->dev, ptr);
 }
 
 static int coreboot_table_of_remove(struct platform_device *pdev)
diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c
index 0019d3e..19db570 100644
--- a/drivers/firmware/google/coreboot_table.c
+++ b/drivers/firmware/google/coreboot_table.c
@@ -4,6 +4,7 @@
  * Module providing coreboot table access.
  *
  * Copyright 2017 Google Inc.
+ * Copyright 2017 Samuel Holland <samuel@sholland.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License v2.0 as published by
@@ -15,37 +16,96 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/device.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 
 #include "coreboot_table.h"
 
-struct coreboot_table_entry {
-	u32 tag;
-	u32 size;
-};
+#define CB_DEV(d) container_of(d, struct coreboot_device, dev)
+#define CB_DRV(d) container_of(d, struct coreboot_driver, drv)
 
 static struct coreboot_table_header __iomem *ptr_header;
 
-/*
- * This function parses the coreboot table for an entry that contains the base
- * address of the given entry tag. The coreboot table consists of a header
- * directly followed by a number of small, variable-sized entries, which each
- * contain an identifying tag and their length as the first two fields.
- */
-int coreboot_table_find(int tag, void *data, size_t data_size)
+static int coreboot_bus_match(struct device *dev, struct device_driver *drv)
 {
-	struct coreboot_table_header header;
-	struct coreboot_table_entry entry;
+	struct coreboot_device *device = CB_DEV(dev);
+	struct coreboot_driver *driver = CB_DRV(drv);
+
+	return device->entry.tag == driver->tag;
+}
+
+static int coreboot_bus_probe(struct device *dev)
+{
+	int ret = -ENODEV;
+	struct coreboot_device *device = CB_DEV(dev);
+	struct coreboot_driver *driver = CB_DRV(dev->driver);
+
+	if (driver->probe)
+		ret = driver->probe(device);
+
+	return ret;
+}
+
+static int coreboot_bus_remove(struct device *dev)
+{
+	int ret = 0;
+	struct coreboot_device *device = CB_DEV(dev);
+	struct coreboot_driver *driver = CB_DRV(dev->driver);
+
+	if (driver->remove)
+		ret = driver->remove(device);
+
+	return ret;
+}
+
+static struct bus_type coreboot_bus_type = {
+	.name		= "coreboot",
+	.match		= coreboot_bus_match,
+	.probe		= coreboot_bus_probe,
+	.remove		= coreboot_bus_remove,
+};
+
+static int __init coreboot_bus_init(void)
+{
+	return bus_register(&coreboot_bus_type);
+}
+module_init(coreboot_bus_init);
+
+static void coreboot_device_release(struct device *dev)
+{
+	struct coreboot_device *device = CB_DEV(dev);
+
+	kfree(device);
+}
+
+int coreboot_driver_register(struct coreboot_driver *driver)
+{
+	driver->drv.bus = &coreboot_bus_type;
+
+	return driver_register(&driver->drv);
+}
+EXPORT_SYMBOL(coreboot_driver_register);
+
+void coreboot_driver_unregister(struct coreboot_driver *driver)
+{
+	driver_unregister(&driver->drv);
+}
+EXPORT_SYMBOL(coreboot_driver_unregister);
+
+int coreboot_table_init(struct device *dev, void __iomem *ptr)
+{
+	int i, ret;
 	void *ptr_entry;
-	int i;
+	struct coreboot_device *device;
+	struct coreboot_table_entry entry;
+	struct coreboot_table_header header;
 
-	if (!ptr_header)
-		return -EPROBE_DEFER;
-
+	ptr_header = ptr;
 	memcpy_fromio(&header, ptr_header, sizeof(header));
 
 	if (strncmp(header.signature, "LBIO", sizeof(header.signature))) {
@@ -54,37 +114,41 @@
 	}
 
 	ptr_entry = (void *)ptr_header + header.header_bytes;
-
 	for (i = 0; i < header.table_entries; i++) {
 		memcpy_fromio(&entry, ptr_entry, sizeof(entry));
-		if (entry.tag == tag) {
-			if (data_size < entry.size)
-				return -EINVAL;
 
-			memcpy_fromio(data, ptr_entry, entry.size);
+		device = kzalloc(sizeof(struct device) + entry.size, GFP_KERNEL);
+		if (!device) {
+			ret = -ENOMEM;
+			break;
+		}
 
-			return 0;
+		dev_set_name(&device->dev, "coreboot%d", i);
+		device->dev.parent = dev;
+		device->dev.bus = &coreboot_bus_type;
+		device->dev.release = coreboot_device_release;
+		memcpy_fromio(&device->entry, ptr_entry, entry.size);
+
+		ret = device_register(&device->dev);
+		if (ret) {
+			put_device(&device->dev);
+			break;
 		}
 
 		ptr_entry += entry.size;
 	}
 
-	return -ENOENT;
-}
-EXPORT_SYMBOL(coreboot_table_find);
-
-int coreboot_table_init(void __iomem *ptr)
-{
-	ptr_header = ptr;
-
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(coreboot_table_init);
 
 int coreboot_table_exit(void)
 {
-	if (ptr_header)
+	if (ptr_header) {
+		bus_unregister(&coreboot_bus_type);
 		iounmap(ptr_header);
+		ptr_header = NULL;
+	}
 
 	return 0;
 }
diff --git a/drivers/firmware/google/coreboot_table.h b/drivers/firmware/google/coreboot_table.h
index 6eff1ae..8ad95a9 100644
--- a/drivers/firmware/google/coreboot_table.h
+++ b/drivers/firmware/google/coreboot_table.h
@@ -3,7 +3,9 @@
  *
  * Internal header for coreboot table access.
  *
+ * Copyright 2014 Gerd Hoffmann <kraxel@redhat.com>
  * Copyright 2017 Google Inc.
+ * Copyright 2017 Samuel Holland <samuel@sholland.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License v2.0 as published by
@@ -20,14 +22,6 @@
 
 #include <linux/io.h>
 
-/* List of coreboot entry structures that is used */
-struct lb_cbmem_ref {
-	uint32_t tag;
-	uint32_t size;
-
-	uint64_t cbmem_addr;
-};
-
 /* Coreboot table header structure */
 struct coreboot_table_header {
 	char signature[4];
@@ -38,11 +32,67 @@
 	u32 table_entries;
 };
 
-/* Retrieve coreboot table entry with tag *tag* and copy it to data */
-int coreboot_table_find(int tag, void *data, size_t data_size);
+/* List of coreboot entry structures that is used */
+/* Generic */
+struct coreboot_table_entry {
+	u32 tag;
+	u32 size;
+};
+
+/* Points to a CBMEM entry */
+struct lb_cbmem_ref {
+	u32 tag;
+	u32 size;
+
+	u64 cbmem_addr;
+};
+
+/* Describes framebuffer setup by coreboot */
+struct lb_framebuffer {
+	u32 tag;
+	u32 size;
+
+	u64 physical_address;
+	u32 x_resolution;
+	u32 y_resolution;
+	u32 bytes_per_line;
+	u8  bits_per_pixel;
+	u8  red_mask_pos;
+	u8  red_mask_size;
+	u8  green_mask_pos;
+	u8  green_mask_size;
+	u8  blue_mask_pos;
+	u8  blue_mask_size;
+	u8  reserved_mask_pos;
+	u8  reserved_mask_size;
+};
+
+/* A device, additionally with information from coreboot. */
+struct coreboot_device {
+	struct device dev;
+	union {
+		struct coreboot_table_entry entry;
+		struct lb_cbmem_ref cbmem_ref;
+		struct lb_framebuffer framebuffer;
+	};
+};
+
+/* A driver for handling devices described in coreboot tables. */
+struct coreboot_driver {
+	int (*probe)(struct coreboot_device *);
+	int (*remove)(struct coreboot_device *);
+	struct device_driver drv;
+	u32 tag;
+};
+
+/* Register a driver that uses the data from a coreboot table. */
+int coreboot_driver_register(struct coreboot_driver *driver);
+
+/* Unregister a driver that uses the data from a coreboot table. */
+void coreboot_driver_unregister(struct coreboot_driver *driver);
 
 /* Initialize coreboot table module given a pointer to iomem */
-int coreboot_table_init(void __iomem *ptr);
+int coreboot_table_init(struct device *dev, void __iomem *ptr);
 
 /* Cleanup coreboot table module */
 int coreboot_table_exit(void);
diff --git a/drivers/firmware/google/framebuffer-coreboot.c b/drivers/firmware/google/framebuffer-coreboot.c
new file mode 100644
index 0000000..b8b49c0
--- /dev/null
+++ b/drivers/firmware/google/framebuffer-coreboot.c
@@ -0,0 +1,115 @@
+/*
+ * framebuffer-coreboot.c
+ *
+ * Memory based framebuffer accessed through coreboot table.
+ *
+ * Copyright 2012-2013 David Herrmann <dh.herrmann@gmail.com>
+ * Copyright 2017 Google Inc.
+ * Copyright 2017 Samuel Holland <samuel@sholland.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License v2.0 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/platform_data/simplefb.h>
+#include <linux/platform_device.h>
+
+#include "coreboot_table.h"
+
+#define CB_TAG_FRAMEBUFFER 0x12
+
+static const struct simplefb_format formats[] = SIMPLEFB_FORMATS;
+
+static int framebuffer_probe(struct coreboot_device *dev)
+{
+	int i;
+	u32 length;
+	struct lb_framebuffer *fb = &dev->framebuffer;
+	struct platform_device *pdev;
+	struct resource res;
+	struct simplefb_platform_data pdata = {
+		.width = fb->x_resolution,
+		.height = fb->y_resolution,
+		.stride = fb->bytes_per_line,
+		.format = NULL,
+	};
+
+	for (i = 0; i < ARRAY_SIZE(formats); ++i) {
+		if (fb->bits_per_pixel     == formats[i].bits_per_pixel &&
+		    fb->red_mask_pos       == formats[i].red.offset &&
+		    fb->red_mask_size      == formats[i].red.length &&
+		    fb->green_mask_pos     == formats[i].green.offset &&
+		    fb->green_mask_size    == formats[i].green.length &&
+		    fb->blue_mask_pos      == formats[i].blue.offset &&
+		    fb->blue_mask_size     == formats[i].blue.length &&
+		    fb->reserved_mask_pos  == formats[i].transp.offset &&
+		    fb->reserved_mask_size == formats[i].transp.length)
+			pdata.format = formats[i].name;
+	}
+	if (!pdata.format)
+		return -ENODEV;
+
+	memset(&res, 0, sizeof(res));
+	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res.name = "Coreboot Framebuffer";
+	res.start = fb->physical_address;
+	length = PAGE_ALIGN(fb->y_resolution * fb->bytes_per_line);
+	res.end = res.start + length - 1;
+	if (res.end <= res.start)
+		return -EINVAL;
+
+	pdev = platform_device_register_resndata(&dev->dev,
+						 "simple-framebuffer", 0,
+						 &res, 1, &pdata,
+						 sizeof(pdata));
+	if (IS_ERR(pdev))
+		pr_warn("coreboot: could not register framebuffer\n");
+	else
+		dev_set_drvdata(&dev->dev, pdev);
+
+	return PTR_ERR_OR_ZERO(pdev);
+}
+
+static int framebuffer_remove(struct coreboot_device *dev)
+{
+	struct platform_device *pdev = dev_get_drvdata(&dev->dev);
+
+	platform_device_unregister(pdev);
+
+	return 0;
+}
+
+static struct coreboot_driver framebuffer_driver = {
+	.probe = framebuffer_probe,
+	.remove = framebuffer_remove,
+	.drv = {
+		.name = "framebuffer",
+	},
+	.tag = CB_TAG_FRAMEBUFFER,
+};
+
+static int __init coreboot_framebuffer_init(void)
+{
+	return coreboot_driver_register(&framebuffer_driver);
+}
+
+static void coreboot_framebuffer_exit(void)
+{
+	coreboot_driver_unregister(&framebuffer_driver);
+}
+
+module_init(coreboot_framebuffer_init);
+module_exit(coreboot_framebuffer_exit);
+
+MODULE_AUTHOR("Samuel Holland <samuel@sholland.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/firmware/google/memconsole-coreboot.c b/drivers/firmware/google/memconsole-coreboot.c
index 5273888..b29e107 100644
--- a/drivers/firmware/google/memconsole-coreboot.c
+++ b/drivers/firmware/google/memconsole-coreboot.c
@@ -15,9 +15,9 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/platform_device.h>
 
 #include "memconsole.h"
 #include "coreboot_table.h"
@@ -73,18 +73,19 @@
 	return done;
 }
 
-static int memconsole_coreboot_init(phys_addr_t physaddr)
+static int memconsole_probe(struct coreboot_device *dev)
 {
 	struct cbmem_cons __iomem *tmp_cbmc;
 
-	tmp_cbmc = memremap(physaddr, sizeof(*tmp_cbmc), MEMREMAP_WB);
+	tmp_cbmc = memremap(dev->cbmem_ref.cbmem_addr,
+			    sizeof(*tmp_cbmc), MEMREMAP_WB);
 
 	if (!tmp_cbmc)
 		return -ENOMEM;
 
 	/* Read size only once to prevent overrun attack through /dev/mem. */
 	cbmem_console_size = tmp_cbmc->size_dont_access_after_boot;
-	cbmem_console = memremap(physaddr,
+	cbmem_console = memremap(dev->cbmem_ref.cbmem_addr,
 				 cbmem_console_size + sizeof(*cbmem_console),
 				 MEMREMAP_WB);
 	memunmap(tmp_cbmc);
@@ -93,26 +94,11 @@
 		return -ENOMEM;
 
 	memconsole_setup(memconsole_coreboot_read);
-	return 0;
-}
-
-static int memconsole_probe(struct platform_device *pdev)
-{
-	int ret;
-	struct lb_cbmem_ref entry;
-
-	ret = coreboot_table_find(CB_TAG_CBMEM_CONSOLE, &entry, sizeof(entry));
-	if (ret)
-		return ret;
-
-	ret = memconsole_coreboot_init(entry.cbmem_addr);
-	if (ret)
-		return ret;
 
 	return memconsole_sysfs_init();
 }
 
-static int memconsole_remove(struct platform_device *pdev)
+static int memconsole_remove(struct coreboot_device *dev)
 {
 	memconsole_exit();
 
@@ -122,28 +108,27 @@
 	return 0;
 }
 
-static struct platform_driver memconsole_driver = {
+static struct coreboot_driver memconsole_driver = {
 	.probe = memconsole_probe,
 	.remove = memconsole_remove,
-	.driver = {
+	.drv = {
 		.name = "memconsole",
 	},
+	.tag = CB_TAG_CBMEM_CONSOLE,
 };
 
-static int __init platform_memconsole_init(void)
+static void coreboot_memconsole_exit(void)
 {
-	struct platform_device *pdev;
-
-	pdev = platform_device_register_simple("memconsole", -1, NULL, 0);
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
-
-	platform_driver_register(&memconsole_driver);
-
-	return 0;
+	coreboot_driver_unregister(&memconsole_driver);
 }
 
-module_init(platform_memconsole_init);
+static int __init coreboot_memconsole_init(void)
+{
+	return coreboot_driver_register(&memconsole_driver);
+}
+
+module_exit(coreboot_memconsole_exit);
+module_init(coreboot_memconsole_init);
 
 MODULE_AUTHOR("Google, Inc.");
 MODULE_LICENSE("GPL");
diff --git a/drivers/firmware/google/vpd.c b/drivers/firmware/google/vpd.c
index e4b40f2..e9db895 100644
--- a/drivers/firmware/google/vpd.c
+++ b/drivers/firmware/google/vpd.c
@@ -286,20 +286,15 @@
 	return 0;
 }
 
-static int vpd_probe(struct platform_device *pdev)
+static int vpd_probe(struct coreboot_device *dev)
 {
 	int ret;
-	struct lb_cbmem_ref entry;
-
-	ret = coreboot_table_find(CB_TAG_VPD, &entry, sizeof(entry));
-	if (ret)
-		return ret;
 
 	vpd_kobj = kobject_create_and_add("vpd", firmware_kobj);
 	if (!vpd_kobj)
 		return -ENOMEM;
 
-	ret = vpd_sections_init(entry.cbmem_addr);
+	ret = vpd_sections_init(dev->cbmem_ref.cbmem_addr);
 	if (ret) {
 		kobject_put(vpd_kobj);
 		return ret;
@@ -308,7 +303,7 @@
 	return 0;
 }
 
-static int vpd_remove(struct platform_device *pdev)
+static int vpd_remove(struct coreboot_device *dev)
 {
 	vpd_section_destroy(&ro_vpd);
 	vpd_section_destroy(&rw_vpd);
@@ -318,41 +313,27 @@
 	return 0;
 }
 
-static struct platform_driver vpd_driver = {
+static struct coreboot_driver vpd_driver = {
 	.probe = vpd_probe,
 	.remove = vpd_remove,
-	.driver = {
+	.drv = {
 		.name = "vpd",
 	},
+	.tag = CB_TAG_VPD,
 };
 
-static struct platform_device *vpd_pdev;
-
-static int __init vpd_platform_init(void)
+static int __init coreboot_vpd_init(void)
 {
-	int ret;
-
-	ret = platform_driver_register(&vpd_driver);
-	if (ret)
-		return ret;
-
-	vpd_pdev = platform_device_register_simple("vpd", -1, NULL, 0);
-	if (IS_ERR(vpd_pdev)) {
-		platform_driver_unregister(&vpd_driver);
-		return PTR_ERR(vpd_pdev);
-	}
-
-	return 0;
+	return coreboot_driver_register(&vpd_driver);
 }
 
-static void __exit vpd_platform_exit(void)
+static void __exit coreboot_vpd_exit(void)
 {
-	platform_device_unregister(vpd_pdev);
-	platform_driver_unregister(&vpd_driver);
+	coreboot_driver_unregister(&vpd_driver);
 }
 
-module_init(vpd_platform_init);
-module_exit(vpd_platform_exit);
+module_init(coreboot_vpd_init);
+module_exit(coreboot_vpd_exit);
 
 MODULE_AUTHOR("Google, Inc.");
 MODULE_LICENSE("GPL");
diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
index f47ef84..ee9c542 100644
--- a/drivers/fpga/Kconfig
+++ b/drivers/fpga/Kconfig
@@ -53,7 +53,6 @@
 config FPGA_MGR_ZYNQ_FPGA
 	tristate "Xilinx Zynq FPGA"
 	depends on ARCH_ZYNQ || COMPILE_TEST
-	depends on HAS_DMA
 	help
 	  FPGA manager driver support for Xilinx Zynq FPGAs.
 
@@ -70,6 +69,13 @@
 	help
 	  FPGA manager driver support for Lattice iCE40 FPGAs over SPI.
 
+config FPGA_MGR_MACHXO2_SPI
+	tristate "Lattice MachXO2 SPI"
+	depends on SPI
+	help
+	  FPGA manager driver support for Lattice MachXO2 configuration
+	  over slave SPI interface.
+
 config FPGA_MGR_TS73XX
 	tristate "Technologic Systems TS-73xx SBC FPGA Manager"
 	depends on ARCH_EP93XX && MACH_TS72XX
diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
index 3cb276a..f9803da 100644
--- a/drivers/fpga/Makefile
+++ b/drivers/fpga/Makefile
@@ -10,6 +10,7 @@
 obj-$(CONFIG_FPGA_MGR_ALTERA_CVP)	+= altera-cvp.o
 obj-$(CONFIG_FPGA_MGR_ALTERA_PS_SPI)	+= altera-ps-spi.o
 obj-$(CONFIG_FPGA_MGR_ICE40_SPI)	+= ice40-spi.o
+obj-$(CONFIG_FPGA_MGR_MACHXO2_SPI)	+= machxo2-spi.o
 obj-$(CONFIG_FPGA_MGR_SOCFPGA)		+= socfpga.o
 obj-$(CONFIG_FPGA_MGR_SOCFPGA_A10)	+= socfpga-a10.o
 obj-$(CONFIG_FPGA_MGR_TS73XX)		+= ts73xx-fpga.o
diff --git a/drivers/fpga/altera-cvp.c b/drivers/fpga/altera-cvp.c
index 77b04e4..dd4edd8 100644
--- a/drivers/fpga/altera-cvp.c
+++ b/drivers/fpga/altera-cvp.c
@@ -401,6 +401,7 @@
 			    const struct pci_device_id *dev_id)
 {
 	struct altera_cvp_conf *conf;
+	struct fpga_manager *mgr;
 	u16 cmd, val;
 	int ret;
 
@@ -452,16 +453,24 @@
 	snprintf(conf->mgr_name, sizeof(conf->mgr_name), "%s @%s",
 		 ALTERA_CVP_MGR_NAME, pci_name(pdev));
 
-	ret = fpga_mgr_register(&pdev->dev, conf->mgr_name,
-				&altera_cvp_ops, conf);
-	if (ret)
+	mgr = fpga_mgr_create(&pdev->dev, conf->mgr_name,
+			      &altera_cvp_ops, conf);
+	if (!mgr)
+		return -ENOMEM;
+
+	pci_set_drvdata(pdev, mgr);
+
+	ret = fpga_mgr_register(mgr);
+	if (ret) {
+		fpga_mgr_free(mgr);
 		goto err_unmap;
+	}
 
 	ret = driver_create_file(&altera_cvp_driver.driver,
 				 &driver_attr_chkcfg);
 	if (ret) {
 		dev_err(&pdev->dev, "Can't create sysfs chkcfg file\n");
-		fpga_mgr_unregister(&pdev->dev);
+		fpga_mgr_unregister(mgr);
 		goto err_unmap;
 	}
 
@@ -483,7 +492,7 @@
 	u16 cmd;
 
 	driver_remove_file(&altera_cvp_driver.driver, &driver_attr_chkcfg);
-	fpga_mgr_unregister(&pdev->dev);
+	fpga_mgr_unregister(mgr);
 	pci_iounmap(pdev, conf->map);
 	pci_release_region(pdev, CVP_BAR);
 	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
diff --git a/drivers/fpga/altera-fpga2sdram.c b/drivers/fpga/altera-fpga2sdram.c
index d4eeb74..23660cc 100644
--- a/drivers/fpga/altera-fpga2sdram.c
+++ b/drivers/fpga/altera-fpga2sdram.c
@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * FPGA to SDRAM Bridge Driver for Altera SoCFPGA Devices
  *
  *  Copyright (C) 2013-2016 Altera Corporation, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 /*
@@ -106,6 +95,7 @@
 {
 	struct device *dev = &pdev->dev;
 	struct alt_fpga2sdram_data *priv;
+	struct fpga_bridge *br;
 	u32 enable;
 	struct regmap *sysmgr;
 	int ret = 0;
@@ -131,10 +121,18 @@
 	/* Get f2s bridge configuration saved in handoff register */
 	regmap_read(sysmgr, SYSMGR_ISWGRP_HANDOFF3, &priv->mask);
 
-	ret = fpga_bridge_register(dev, F2S_BRIDGE_NAME,
-				   &altera_fpga2sdram_br_ops, priv);
-	if (ret)
+	br = fpga_bridge_create(dev, F2S_BRIDGE_NAME,
+				&altera_fpga2sdram_br_ops, priv);
+	if (!br)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, br);
+
+	ret = fpga_bridge_register(br);
+	if (ret) {
+		fpga_bridge_free(br);
 		return ret;
+	}
 
 	dev_info(dev, "driver initialized with handoff %08x\n", priv->mask);
 
@@ -146,7 +144,7 @@
 				 (enable ? "enabling" : "disabling"));
 			ret = _alt_fpga2sdram_enable_set(priv, enable);
 			if (ret) {
-				fpga_bridge_unregister(&pdev->dev);
+				fpga_bridge_unregister(br);
 				return ret;
 			}
 		}
@@ -157,7 +155,9 @@
 
 static int alt_fpga_bridge_remove(struct platform_device *pdev)
 {
-	fpga_bridge_unregister(&pdev->dev);
+	struct fpga_bridge *br = platform_get_drvdata(pdev);
+
+	fpga_bridge_unregister(br);
 
 	return 0;
 }
diff --git a/drivers/fpga/altera-freeze-bridge.c b/drivers/fpga/altera-freeze-bridge.c
index 6159cfcf..ffd586c 100644
--- a/drivers/fpga/altera-freeze-bridge.c
+++ b/drivers/fpga/altera-freeze-bridge.c
@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * FPGA Freeze Bridge Controller
  *
  *  Copyright (C) 2016 Altera Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/delay.h>
 #include <linux/io.h>
@@ -221,8 +210,10 @@
 	struct device_node *np = pdev->dev.of_node;
 	void __iomem *base_addr;
 	struct altera_freeze_br_data *priv;
+	struct fpga_bridge *br;
 	struct resource *res;
 	u32 status, revision;
+	int ret;
 
 	if (!np)
 		return -ENODEV;
@@ -254,13 +245,27 @@
 
 	priv->base_addr = base_addr;
 
-	return fpga_bridge_register(dev, FREEZE_BRIDGE_NAME,
-				    &altera_freeze_br_br_ops, priv);
+	br = fpga_bridge_create(dev, FREEZE_BRIDGE_NAME,
+				&altera_freeze_br_br_ops, priv);
+	if (!br)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, br);
+
+	ret = fpga_bridge_register(br);
+	if (ret) {
+		fpga_bridge_free(br);
+		return ret;
+	}
+
+	return 0;
 }
 
 static int altera_freeze_br_remove(struct platform_device *pdev)
 {
-	fpga_bridge_unregister(&pdev->dev);
+	struct fpga_bridge *br = platform_get_drvdata(pdev);
+
+	fpga_bridge_unregister(br);
 
 	return 0;
 }
diff --git a/drivers/fpga/altera-hps2fpga.c b/drivers/fpga/altera-hps2fpga.c
index 406d2f1..a974d3f 100644
--- a/drivers/fpga/altera-hps2fpga.c
+++ b/drivers/fpga/altera-hps2fpga.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * FPGA to/from HPS Bridge Driver for Altera SoCFPGA Devices
  *
@@ -6,18 +7,6 @@
  * Includes this patch from the mailing list:
  *   fpga: altera-hps2fpga: fix HPS2FPGA bridge visibility to L3 masters
  *   Signed-off-by: Anatolij Gustschin <agust@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 /*
@@ -139,6 +128,7 @@
 	struct device *dev = &pdev->dev;
 	struct altera_hps2fpga_data *priv;
 	const struct of_device_id *of_id;
+	struct fpga_bridge *br;
 	u32 enable;
 	int ret;
 
@@ -190,11 +180,24 @@
 		}
 	}
 
-	ret = fpga_bridge_register(dev, priv->name, &altera_hps2fpga_br_ops,
-				   priv);
-err:
+	br = fpga_bridge_create(dev, priv->name, &altera_hps2fpga_br_ops, priv);
+	if (!br) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	platform_set_drvdata(pdev, br);
+
+	ret = fpga_bridge_register(br);
 	if (ret)
-		clk_disable_unprepare(priv->clk);
+		goto err_free;
+
+	return 0;
+
+err_free:
+	fpga_bridge_free(br);
+err:
+	clk_disable_unprepare(priv->clk);
 
 	return ret;
 }
@@ -204,7 +207,7 @@
 	struct fpga_bridge *bridge = platform_get_drvdata(pdev);
 	struct altera_hps2fpga_data *priv = bridge->priv;
 
-	fpga_bridge_unregister(&pdev->dev);
+	fpga_bridge_unregister(bridge);
 
 	clk_disable_unprepare(priv->clk);
 
diff --git a/drivers/fpga/altera-pr-ip-core-plat.c b/drivers/fpga/altera-pr-ip-core-plat.c
index 8fb36b8..b293d83 100644
--- a/drivers/fpga/altera-pr-ip-core-plat.c
+++ b/drivers/fpga/altera-pr-ip-core-plat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Driver for Altera Partial Reconfiguration IP Core
  *
@@ -5,18 +6,6 @@
  *
  * Based on socfpga-a10.c Copyright (C) 2015-2016 Altera Corporation
  *  by Alan Tull <atull@opensource.altera.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/fpga/altera-pr-ip-core.h>
 #include <linux/module.h>
diff --git a/drivers/fpga/altera-pr-ip-core.c b/drivers/fpga/altera-pr-ip-core.c
index a7b31f9..65e0b6a 100644
--- a/drivers/fpga/altera-pr-ip-core.c
+++ b/drivers/fpga/altera-pr-ip-core.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Driver for Altera Partial Reconfiguration IP Core
  *
@@ -5,18 +6,6 @@
  *
  * Based on socfpga-a10.c Copyright (C) 2015-2016 Altera Corporation
  *  by Alan Tull <atull@opensource.altera.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/delay.h>
 #include <linux/fpga/altera-pr-ip-core.h>
@@ -187,6 +176,8 @@
 int alt_pr_register(struct device *dev, void __iomem *reg_base)
 {
 	struct alt_pr_priv *priv;
+	struct fpga_manager *mgr;
+	int ret;
 	u32 val;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -201,15 +192,27 @@
 		(val & ALT_PR_CSR_STATUS_MSK) >> ALT_PR_CSR_STATUS_SFT,
 		(int)(val & ALT_PR_CSR_PR_START));
 
-	return fpga_mgr_register(dev, dev_name(dev), &alt_pr_ops, priv);
+	mgr = fpga_mgr_create(dev, dev_name(dev), &alt_pr_ops, priv);
+	if (!mgr)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, mgr);
+
+	ret = fpga_mgr_register(mgr);
+	if (ret)
+		fpga_mgr_free(mgr);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(alt_pr_register);
 
 int alt_pr_unregister(struct device *dev)
 {
+	struct fpga_manager *mgr = dev_get_drvdata(dev);
+
 	dev_dbg(dev, "%s\n", __func__);
 
-	fpga_mgr_unregister(dev);
+	fpga_mgr_unregister(mgr);
 
 	return 0;
 }
diff --git a/drivers/fpga/altera-ps-spi.c b/drivers/fpga/altera-ps-spi.c
index 06d212a..24b25c6 100644
--- a/drivers/fpga/altera-ps-spi.c
+++ b/drivers/fpga/altera-ps-spi.c
@@ -238,6 +238,8 @@
 {
 	struct altera_ps_conf *conf;
 	const struct of_device_id *of_id;
+	struct fpga_manager *mgr;
+	int ret;
 
 	conf = devm_kzalloc(&spi->dev, sizeof(*conf), GFP_KERNEL);
 	if (!conf)
@@ -273,13 +275,25 @@
 	snprintf(conf->mgr_name, sizeof(conf->mgr_name), "%s %s",
 		 dev_driver_string(&spi->dev), dev_name(&spi->dev));
 
-	return fpga_mgr_register(&spi->dev, conf->mgr_name,
-				 &altera_ps_ops, conf);
+	mgr = fpga_mgr_create(&spi->dev, conf->mgr_name,
+			      &altera_ps_ops, conf);
+	if (!mgr)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, mgr);
+
+	ret = fpga_mgr_register(mgr);
+	if (ret)
+		fpga_mgr_free(mgr);
+
+	return ret;
 }
 
 static int altera_ps_remove(struct spi_device *spi)
 {
-	fpga_mgr_unregister(&spi->dev);
+	struct fpga_manager *mgr = spi_get_drvdata(spi);
+
+	fpga_mgr_unregister(mgr);
 
 	return 0;
 }
diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c
index 31bd2c5..24b8f98 100644
--- a/drivers/fpga/fpga-bridge.c
+++ b/drivers/fpga/fpga-bridge.c
@@ -1,20 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * FPGA Bridge Framework Driver
  *
  *  Copyright (C) 2013-2016 Altera Corporation, All Rights Reserved.
  *  Copyright (C) 2017 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/fpga/fpga-bridge.h>
 #include <linux/idr.h>
@@ -132,6 +121,7 @@
 /**
  * fpga_bridge_get - get an exclusive reference to a fpga bridge
  * @dev:	parent device that fpga bridge was registered with
+ * @info:	fpga manager info
  *
  * Given a device, get an exclusive reference to a fpga bridge.
  *
@@ -328,28 +318,29 @@
 ATTRIBUTE_GROUPS(fpga_bridge);
 
 /**
- * fpga_bridge_register - register a fpga bridge driver
+ * fpga_bridge_create - create and initialize a struct fpga_bridge
  * @dev:	FPGA bridge device from pdev
  * @name:	FPGA bridge name
  * @br_ops:	pointer to structure of fpga bridge ops
  * @priv:	FPGA bridge private data
  *
- * Return: 0 for success, error code otherwise.
+ * Return: struct fpga_bridge or NULL
  */
-int fpga_bridge_register(struct device *dev, const char *name,
-			 const struct fpga_bridge_ops *br_ops, void *priv)
+struct fpga_bridge *fpga_bridge_create(struct device *dev, const char *name,
+				       const struct fpga_bridge_ops *br_ops,
+				       void *priv)
 {
 	struct fpga_bridge *bridge;
 	int id, ret = 0;
 
 	if (!name || !strlen(name)) {
 		dev_err(dev, "Attempt to register with no name!\n");
-		return -EINVAL;
+		return NULL;
 	}
 
 	bridge = kzalloc(sizeof(*bridge), GFP_KERNEL);
 	if (!bridge)
-		return -ENOMEM;
+		return NULL;
 
 	id = ida_simple_get(&fpga_bridge_ida, 0, 0, GFP_KERNEL);
 	if (id < 0) {
@@ -370,40 +361,62 @@
 	bridge->dev.parent = dev;
 	bridge->dev.of_node = dev->of_node;
 	bridge->dev.id = id;
-	dev_set_drvdata(dev, bridge);
 
 	ret = dev_set_name(&bridge->dev, "br%d", id);
 	if (ret)
 		goto error_device;
 
-	ret = device_add(&bridge->dev);
-	if (ret)
-		goto error_device;
-
-	of_platform_populate(dev->of_node, NULL, NULL, dev);
-
-	dev_info(bridge->dev.parent, "fpga bridge [%s] registered\n",
-		 bridge->name);
-
-	return 0;
+	return bridge;
 
 error_device:
 	ida_simple_remove(&fpga_bridge_ida, id);
 error_kfree:
 	kfree(bridge);
 
-	return ret;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(fpga_bridge_create);
+
+/**
+ * fpga_bridge_free - free a fpga bridge and its id
+ * @bridge:	FPGA bridge struct created by fpga_bridge_create
+ */
+void fpga_bridge_free(struct fpga_bridge *bridge)
+{
+	ida_simple_remove(&fpga_bridge_ida, bridge->dev.id);
+	kfree(bridge);
+}
+EXPORT_SYMBOL_GPL(fpga_bridge_free);
+
+/**
+ * fpga_bridge_register - register a fpga bridge
+ * @bridge:	FPGA bridge struct created by fpga_bridge_create
+ *
+ * Return: 0 for success, error code otherwise.
+ */
+int fpga_bridge_register(struct fpga_bridge *bridge)
+{
+	struct device *dev = &bridge->dev;
+	int ret;
+
+	ret = device_add(dev);
+	if (ret)
+		return ret;
+
+	of_platform_populate(dev->of_node, NULL, NULL, dev);
+
+	dev_info(dev->parent, "fpga bridge [%s] registered\n", bridge->name);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(fpga_bridge_register);
 
 /**
- * fpga_bridge_unregister - unregister a fpga bridge driver
- * @dev: FPGA bridge device from pdev
+ * fpga_bridge_unregister - unregister and free a fpga bridge
+ * @bridge:	FPGA bridge struct created by fpga_bridge_create
  */
-void fpga_bridge_unregister(struct device *dev)
+void fpga_bridge_unregister(struct fpga_bridge *bridge)
 {
-	struct fpga_bridge *bridge = dev_get_drvdata(dev);
-
 	/*
 	 * If the low level driver provides a method for putting bridge into
 	 * a desired state upon unregister, do it.
@@ -419,8 +432,7 @@
 {
 	struct fpga_bridge *bridge = to_fpga_bridge(dev);
 
-	ida_simple_remove(&fpga_bridge_ida, bridge->dev.id);
-	kfree(bridge);
+	fpga_bridge_free(bridge);
 }
 
 static int __init fpga_bridge_dev_init(void)
diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index 9939d2c..c1564cf 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * FPGA Manager Core
  *
@@ -6,18 +7,6 @@
  *
  * With code from the mailing list:
  * Copyright (C) 2013 Xilinx, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/firmware.h>
 #include <linux/fpga/fpga-mgr.h>
@@ -32,6 +21,12 @@
 static DEFINE_IDA(fpga_mgr_ida);
 static struct class *fpga_mgr_class;
 
+/**
+ * fpga_image_info_alloc - Allocate a FPGA image info struct
+ * @dev: owning device
+ *
+ * Return: struct fpga_image_info or NULL
+ */
 struct fpga_image_info *fpga_image_info_alloc(struct device *dev)
 {
 	struct fpga_image_info *info;
@@ -50,6 +45,10 @@
 }
 EXPORT_SYMBOL_GPL(fpga_image_info_alloc);
 
+/**
+ * fpga_image_info_free - Free a FPGA image info struct
+ * @info: FPGA image info struct to free
+ */
 void fpga_image_info_free(struct fpga_image_info *info)
 {
 	struct device *dev;
@@ -234,7 +233,7 @@
 /**
  * fpga_mgr_buf_load - load fpga from image in buffer
  * @mgr:	fpga manager
- * @flags:	flags setting fpga confuration modes
+ * @info:	fpga image info
  * @buf:	buffer contain fpga image
  * @count:	byte count of buf
  *
@@ -343,6 +342,16 @@
 	return ret;
 }
 
+/**
+ * fpga_mgr_load - load FPGA from scatter/gather table, buffer, or firmware
+ * @mgr:	fpga manager
+ * @info:	fpga image information.
+ *
+ * Load the FPGA from an image which is indicated in @info.  If successful, the
+ * FPGA ends up in operating mode.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
 int fpga_mgr_load(struct fpga_manager *mgr, struct fpga_image_info *info)
 {
 	if (info->sgt)
@@ -429,11 +438,9 @@
 }
 
 /**
- * fpga_mgr_get - get a reference to a fpga mgr
+ * fpga_mgr_get - Given a device, get a reference to a fpga mgr.
  * @dev:	parent device that fpga mgr was registered with
  *
- * Given a device, get a reference to a fpga mgr.
- *
  * Return: fpga manager struct or IS_ERR() condition containing error code.
  */
 struct fpga_manager *fpga_mgr_get(struct device *dev)
@@ -453,10 +460,9 @@
 }
 
 /**
- * of_fpga_mgr_get - get a reference to a fpga mgr
- * @node:	device node
+ * of_fpga_mgr_get - Given a device node, get a reference to a fpga mgr.
  *
- * Given a device node, get a reference to a fpga mgr.
+ * @node:	device node
  *
  * Return: fpga manager struct or IS_ERR() condition containing error code.
  */
@@ -489,7 +495,10 @@
  * @mgr:	fpga manager
  *
  * Given a pointer to FPGA Manager (from fpga_mgr_get() or
- * of_fpga_mgr_put()) attempt to get the mutex.
+ * of_fpga_mgr_put()) attempt to get the mutex. The user should call
+ * fpga_mgr_lock() and verify that it returns 0 before attempting to
+ * program the FPGA.  Likewise, the user should call fpga_mgr_unlock
+ * when done programming the FPGA.
  *
  * Return: 0 for success or -EBUSY
  */
@@ -505,7 +514,7 @@
 EXPORT_SYMBOL_GPL(fpga_mgr_lock);
 
 /**
- * fpga_mgr_unlock - Unlock FPGA manager
+ * fpga_mgr_unlock - Unlock FPGA manager after done programming
  * @mgr:	fpga manager
  */
 void fpga_mgr_unlock(struct fpga_manager *mgr)
@@ -515,17 +524,17 @@
 EXPORT_SYMBOL_GPL(fpga_mgr_unlock);
 
 /**
- * fpga_mgr_register - register a low level fpga manager driver
+ * fpga_mgr_create - create and initialize a FPGA manager struct
  * @dev:	fpga manager device from pdev
  * @name:	fpga manager name
  * @mops:	pointer to structure of fpga manager ops
  * @priv:	fpga manager private data
  *
- * Return: 0 on success, negative error code otherwise.
+ * Return: pointer to struct fpga_manager or NULL
  */
-int fpga_mgr_register(struct device *dev, const char *name,
-		      const struct fpga_manager_ops *mops,
-		      void *priv)
+struct fpga_manager *fpga_mgr_create(struct device *dev, const char *name,
+				     const struct fpga_manager_ops *mops,
+				     void *priv)
 {
 	struct fpga_manager *mgr;
 	int id, ret;
@@ -534,17 +543,17 @@
 	    !mops->write_init || (!mops->write && !mops->write_sg) ||
 	    (mops->write && mops->write_sg)) {
 		dev_err(dev, "Attempt to register without fpga_manager_ops\n");
-		return -EINVAL;
+		return NULL;
 	}
 
 	if (!name || !strlen(name)) {
 		dev_err(dev, "Attempt to register with no name!\n");
-		return -EINVAL;
+		return NULL;
 	}
 
 	mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
 	if (!mgr)
-		return -ENOMEM;
+		return NULL;
 
 	id = ida_simple_get(&fpga_mgr_ida, 0, 0, GFP_KERNEL);
 	if (id < 0) {
@@ -558,25 +567,56 @@
 	mgr->mops = mops;
 	mgr->priv = priv;
 
-	/*
-	 * Initialize framework state by requesting low level driver read state
-	 * from device.  FPGA may be in reset mode or may have been programmed
-	 * by bootloader or EEPROM.
-	 */
-	mgr->state = mgr->mops->state(mgr);
-
 	device_initialize(&mgr->dev);
 	mgr->dev.class = fpga_mgr_class;
 	mgr->dev.groups = mops->groups;
 	mgr->dev.parent = dev;
 	mgr->dev.of_node = dev->of_node;
 	mgr->dev.id = id;
-	dev_set_drvdata(dev, mgr);
 
 	ret = dev_set_name(&mgr->dev, "fpga%d", id);
 	if (ret)
 		goto error_device;
 
+	return mgr;
+
+error_device:
+	ida_simple_remove(&fpga_mgr_ida, id);
+error_kfree:
+	kfree(mgr);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(fpga_mgr_create);
+
+/**
+ * fpga_mgr_free - deallocate a FPGA manager
+ * @mgr:	fpga manager struct created by fpga_mgr_create
+ */
+void fpga_mgr_free(struct fpga_manager *mgr)
+{
+	ida_simple_remove(&fpga_mgr_ida, mgr->dev.id);
+	kfree(mgr);
+}
+EXPORT_SYMBOL_GPL(fpga_mgr_free);
+
+/**
+ * fpga_mgr_register - register a FPGA manager
+ * @mgr:	fpga manager struct created by fpga_mgr_create
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int fpga_mgr_register(struct fpga_manager *mgr)
+{
+	int ret;
+
+	/*
+	 * Initialize framework state by requesting low level driver read state
+	 * from device.  FPGA may be in reset mode or may have been programmed
+	 * by bootloader or EEPROM.
+	 */
+	mgr->state = mgr->mops->state(mgr);
+
 	ret = device_add(&mgr->dev);
 	if (ret)
 		goto error_device;
@@ -586,22 +626,18 @@
 	return 0;
 
 error_device:
-	ida_simple_remove(&fpga_mgr_ida, id);
-error_kfree:
-	kfree(mgr);
+	ida_simple_remove(&fpga_mgr_ida, mgr->dev.id);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(fpga_mgr_register);
 
 /**
- * fpga_mgr_unregister - unregister a low level fpga manager driver
- * @dev:	fpga manager device from pdev
+ * fpga_mgr_unregister - unregister and free a FPGA manager
+ * @mgr:	fpga manager struct
  */
-void fpga_mgr_unregister(struct device *dev)
+void fpga_mgr_unregister(struct fpga_manager *mgr)
 {
-	struct fpga_manager *mgr = dev_get_drvdata(dev);
-
 	dev_info(&mgr->dev, "%s %s\n", __func__, mgr->name);
 
 	/*
@@ -619,8 +655,7 @@
 {
 	struct fpga_manager *mgr = to_fpga_manager(dev);
 
-	ida_simple_remove(&fpga_mgr_ida, mgr->dev.id);
-	kfree(mgr);
+	fpga_mgr_free(mgr);
 }
 
 static int __init fpga_mgr_class_init(void)
diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c
index edab2a2..6d214d7 100644
--- a/drivers/fpga/fpga-region.c
+++ b/drivers/fpga/fpga-region.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * FPGA Region - Device Tree support for FPGA programming under Linux
  *
  *  Copyright (C) 2013-2016 Altera Corporation
  *  Copyright (C) 2017 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-
 #include <linux/fpga/fpga-bridge.h>
 #include <linux/fpga/fpga-mgr.h>
 #include <linux/fpga/fpga-region.h>
@@ -93,8 +81,16 @@
 
 /**
  * fpga_region_program_fpga - program FPGA
+ *
  * @region: FPGA region
+ *
  * Program an FPGA using fpga image info (region->info).
+ * If the region has a get_bridges function, the exclusive reference for the
+ * bridges will be held if programming succeeds.  This is intended to prevent
+ * reprogramming the region until the caller considers it safe to do so.
+ * The caller will need to call fpga_bridges_put() before attempting to
+ * reprogram the region.
+ *
  * Return 0 for success or negative error code.
  */
 int fpga_region_program_fpga(struct fpga_region *region)
@@ -162,45 +158,86 @@
 }
 EXPORT_SYMBOL_GPL(fpga_region_program_fpga);
 
-int fpga_region_register(struct device *dev, struct fpga_region *region)
+/**
+ * fpga_region_create - alloc and init a struct fpga_region
+ * @dev: device parent
+ * @mgr: manager that programs this region
+ * @get_bridges: optional function to get bridges to a list
+ *
+ * Return: struct fpga_region or NULL
+ */
+struct fpga_region
+*fpga_region_create(struct device *dev,
+		    struct fpga_manager *mgr,
+		    int (*get_bridges)(struct fpga_region *))
 {
+	struct fpga_region *region;
 	int id, ret = 0;
 
+	region = kzalloc(sizeof(*region), GFP_KERNEL);
+	if (!region)
+		return NULL;
+
 	id = ida_simple_get(&fpga_region_ida, 0, 0, GFP_KERNEL);
 	if (id < 0)
-		return id;
+		goto err_free;
 
+	region->mgr = mgr;
+	region->get_bridges = get_bridges;
 	mutex_init(&region->mutex);
 	INIT_LIST_HEAD(&region->bridge_list);
+
 	device_initialize(&region->dev);
-	region->dev.groups = region->groups;
 	region->dev.class = fpga_region_class;
 	region->dev.parent = dev;
 	region->dev.of_node = dev->of_node;
 	region->dev.id = id;
-	dev_set_drvdata(dev, region);
 
 	ret = dev_set_name(&region->dev, "region%d", id);
 	if (ret)
 		goto err_remove;
 
-	ret = device_add(&region->dev);
-	if (ret)
-		goto err_remove;
-
-	return 0;
+	return region;
 
 err_remove:
 	ida_simple_remove(&fpga_region_ida, id);
-	return ret;
+err_free:
+	kfree(region);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(fpga_region_create);
+
+/**
+ * fpga_region_free - free a struct fpga_region
+ * @region: FPGA region created by fpga_region_create
+ */
+void fpga_region_free(struct fpga_region *region)
+{
+	ida_simple_remove(&fpga_region_ida, region->dev.id);
+	kfree(region);
+}
+EXPORT_SYMBOL_GPL(fpga_region_free);
+
+/**
+ * fpga_region_register - register a FPGA region
+ * @region: FPGA region created by fpga_region_create
+ * Return: 0 or -errno
+ */
+int fpga_region_register(struct fpga_region *region)
+{
+	return device_add(&region->dev);
+
 }
 EXPORT_SYMBOL_GPL(fpga_region_register);
 
-int fpga_region_unregister(struct fpga_region *region)
+/**
+ * fpga_region_unregister - unregister and free a FPGA region
+ * @region: FPGA region
+ */
+void fpga_region_unregister(struct fpga_region *region)
 {
 	device_unregister(&region->dev);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(fpga_region_unregister);
 
@@ -208,7 +245,7 @@
 {
 	struct fpga_region *region = to_fpga_region(dev);
 
-	ida_simple_remove(&fpga_region_ida, region->dev.id);
+	fpga_region_free(region);
 }
 
 /**
diff --git a/drivers/fpga/ice40-spi.c b/drivers/fpga/ice40-spi.c
index 7fca820..5981c7e 100644
--- a/drivers/fpga/ice40-spi.c
+++ b/drivers/fpga/ice40-spi.c
@@ -133,6 +133,7 @@
 {
 	struct device *dev = &spi->dev;
 	struct ice40_fpga_priv *priv;
+	struct fpga_manager *mgr;
 	int ret;
 
 	priv = devm_kzalloc(&spi->dev, sizeof(*priv), GFP_KERNEL);
@@ -174,14 +175,26 @@
 		return ret;
 	}
 
-	/* Register with the FPGA manager */
-	return fpga_mgr_register(dev, "Lattice iCE40 FPGA Manager",
-				 &ice40_fpga_ops, priv);
+	mgr = fpga_mgr_create(dev, "Lattice iCE40 FPGA Manager",
+			      &ice40_fpga_ops, priv);
+	if (!mgr)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, mgr);
+
+	ret = fpga_mgr_register(mgr);
+	if (ret)
+		fpga_mgr_free(mgr);
+
+	return ret;
 }
 
 static int ice40_fpga_remove(struct spi_device *spi)
 {
-	fpga_mgr_unregister(&spi->dev);
+	struct fpga_manager *mgr = spi_get_drvdata(spi);
+
+	fpga_mgr_unregister(mgr);
+
 	return 0;
 }
 
diff --git a/drivers/fpga/machxo2-spi.c b/drivers/fpga/machxo2-spi.c
new file mode 100644
index 0000000..a582e00
--- /dev/null
+++ b/drivers/fpga/machxo2-spi.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Lattice MachXO2 Slave SPI Driver
+ *
+ * Manage Lattice FPGA firmware that is loaded over SPI using
+ * the slave serial configuration interface.
+ *
+ * Copyright (C) 2018 Paolo Pisati <p.pisati@gmail.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/fpga/fpga-mgr.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/spi/spi.h>
+
+/* MachXO2 Programming Guide - sysCONFIG Programming Commands */
+#define IDCODE_PUB		{0xe0, 0x00, 0x00, 0x00}
+#define ISC_ENABLE		{0xc6, 0x08, 0x00, 0x00}
+#define ISC_ERASE		{0x0e, 0x04, 0x00, 0x00}
+#define ISC_PROGRAMDONE		{0x5e, 0x00, 0x00, 0x00}
+#define LSC_INITADDRESS		{0x46, 0x00, 0x00, 0x00}
+#define LSC_PROGINCRNV		{0x70, 0x00, 0x00, 0x01}
+#define LSC_READ_STATUS		{0x3c, 0x00, 0x00, 0x00}
+#define LSC_REFRESH		{0x79, 0x00, 0x00, 0x00}
+
+/*
+ * Max CCLK in Slave SPI mode according to 'MachXO2 Family Data
+ * Sheet' sysCONFIG Port Timing Specifications (3-36)
+ */
+#define MACHXO2_MAX_SPEED		66000000
+
+#define MACHXO2_LOW_DELAY_USEC		5
+#define MACHXO2_HIGH_DELAY_USEC		200
+#define MACHXO2_REFRESH_USEC		4800
+#define MACHXO2_MAX_BUSY_LOOP		128
+#define MACHXO2_MAX_REFRESH_LOOP	16
+
+#define MACHXO2_PAGE_SIZE		16
+#define MACHXO2_BUF_SIZE		(MACHXO2_PAGE_SIZE + 4)
+
+/* Status register bits, errors and error mask */
+#define BUSY	12
+#define DONE	8
+#define DVER	27
+#define ENAB	9
+#define ERRBITS	23
+#define ERRMASK	7
+#define FAIL	13
+
+#define ENOERR	0 /* no error */
+#define EID	1
+#define ECMD	2
+#define ECRC	3
+#define EPREAM	4 /* preamble error */
+#define EABRT	5 /* abort error */
+#define EOVERFL	6 /* overflow error */
+#define ESDMEOF	7 /* SDM EOF */
+
+static inline u8 get_err(unsigned long *status)
+{
+	return (*status >> ERRBITS) & ERRMASK;
+}
+
+static int get_status(struct spi_device *spi, unsigned long *status)
+{
+	struct spi_message msg;
+	struct spi_transfer rx, tx;
+	static const u8 cmd[] = LSC_READ_STATUS;
+	int ret;
+
+	memset(&rx, 0, sizeof(rx));
+	memset(&tx, 0, sizeof(tx));
+	tx.tx_buf = cmd;
+	tx.len = sizeof(cmd);
+	rx.rx_buf = status;
+	rx.len = 4;
+	spi_message_init(&msg);
+	spi_message_add_tail(&tx, &msg);
+	spi_message_add_tail(&rx, &msg);
+	ret = spi_sync(spi, &msg);
+	if (ret)
+		return ret;
+
+	*status = be32_to_cpu(*status);
+
+	return 0;
+}
+
+#ifdef DEBUG
+static const char *get_err_string(u8 err)
+{
+	switch (err) {
+	case ENOERR:	return "No Error";
+	case EID:	return "ID ERR";
+	case ECMD:	return "CMD ERR";
+	case ECRC:	return "CRC ERR";
+	case EPREAM:	return "Preamble ERR";
+	case EABRT:	return "Abort ERR";
+	case EOVERFL:	return "Overflow ERR";
+	case ESDMEOF:	return "SDM EOF";
+	}
+
+	return "Default switch case";
+}
+#endif
+
+static void dump_status_reg(unsigned long *status)
+{
+#ifdef DEBUG
+	pr_debug("machxo2 status: 0x%08lX - done=%d, cfgena=%d, busy=%d, fail=%d, devver=%d, err=%s\n",
+		 *status, test_bit(DONE, status), test_bit(ENAB, status),
+		 test_bit(BUSY, status), test_bit(FAIL, status),
+		 test_bit(DVER, status), get_err_string(get_err(status)));
+#endif
+}
+
+static int wait_until_not_busy(struct spi_device *spi)
+{
+	unsigned long status;
+	int ret, loop = 0;
+
+	do {
+		ret = get_status(spi, &status);
+		if (ret)
+			return ret;
+		if (++loop >= MACHXO2_MAX_BUSY_LOOP)
+			return -EBUSY;
+	} while (test_bit(BUSY, &status));
+
+	return 0;
+}
+
+static int machxo2_cleanup(struct fpga_manager *mgr)
+{
+	struct spi_device *spi = mgr->priv;
+	struct spi_message msg;
+	struct spi_transfer tx[2];
+	static const u8 erase[] = ISC_ERASE;
+	static const u8 refresh[] = LSC_REFRESH;
+	int ret;
+
+	memset(tx, 0, sizeof(tx));
+	spi_message_init(&msg);
+	tx[0].tx_buf = &erase;
+	tx[0].len = sizeof(erase);
+	spi_message_add_tail(&tx[0], &msg);
+	ret = spi_sync(spi, &msg);
+	if (ret)
+		goto fail;
+
+	ret = wait_until_not_busy(spi);
+	if (ret)
+		goto fail;
+
+	spi_message_init(&msg);
+	tx[1].tx_buf = &refresh;
+	tx[1].len = sizeof(refresh);
+	tx[1].delay_usecs = MACHXO2_REFRESH_USEC;
+	spi_message_add_tail(&tx[1], &msg);
+	ret = spi_sync(spi, &msg);
+	if (ret)
+		goto fail;
+
+	return 0;
+fail:
+	dev_err(&mgr->dev, "Cleanup failed\n");
+
+	return ret;
+}
+
+static enum fpga_mgr_states machxo2_spi_state(struct fpga_manager *mgr)
+{
+	struct spi_device *spi = mgr->priv;
+	unsigned long status;
+
+	get_status(spi, &status);
+	if (!test_bit(BUSY, &status) && test_bit(DONE, &status) &&
+	    get_err(&status) == ENOERR)
+		return FPGA_MGR_STATE_OPERATING;
+
+	return FPGA_MGR_STATE_UNKNOWN;
+}
+
+static int machxo2_write_init(struct fpga_manager *mgr,
+			      struct fpga_image_info *info,
+			      const char *buf, size_t count)
+{
+	struct spi_device *spi = mgr->priv;
+	struct spi_message msg;
+	struct spi_transfer tx[3];
+	static const u8 enable[] = ISC_ENABLE;
+	static const u8 erase[] = ISC_ERASE;
+	static const u8 initaddr[] = LSC_INITADDRESS;
+	unsigned long status;
+	int ret;
+
+	if ((info->flags & FPGA_MGR_PARTIAL_RECONFIG)) {
+		dev_err(&mgr->dev,
+			"Partial reconfiguration is not supported\n");
+		return -ENOTSUPP;
+	}
+
+	get_status(spi, &status);
+	dump_status_reg(&status);
+	memset(tx, 0, sizeof(tx));
+	spi_message_init(&msg);
+	tx[0].tx_buf = &enable;
+	tx[0].len = sizeof(enable);
+	tx[0].delay_usecs = MACHXO2_LOW_DELAY_USEC;
+	spi_message_add_tail(&tx[0], &msg);
+
+	tx[1].tx_buf = &erase;
+	tx[1].len = sizeof(erase);
+	spi_message_add_tail(&tx[1], &msg);
+	ret = spi_sync(spi, &msg);
+	if (ret)
+		goto fail;
+
+	ret = wait_until_not_busy(spi);
+	if (ret)
+		goto fail;
+
+	get_status(spi, &status);
+	if (test_bit(FAIL, &status))
+		goto fail;
+	dump_status_reg(&status);
+
+	spi_message_init(&msg);
+	tx[2].tx_buf = &initaddr;
+	tx[2].len = sizeof(initaddr);
+	spi_message_add_tail(&tx[2], &msg);
+	ret = spi_sync(spi, &msg);
+	if (ret)
+		goto fail;
+
+	get_status(spi, &status);
+	dump_status_reg(&status);
+
+	return 0;
+fail:
+	dev_err(&mgr->dev, "Error during FPGA init.\n");
+
+	return ret;
+}
+
+static int machxo2_write(struct fpga_manager *mgr, const char *buf,
+			 size_t count)
+{
+	struct spi_device *spi = mgr->priv;
+	struct spi_message msg;
+	struct spi_transfer tx;
+	static const u8 progincr[] = LSC_PROGINCRNV;
+	u8 payload[MACHXO2_BUF_SIZE];
+	unsigned long status;
+	int i, ret;
+
+	if (count % MACHXO2_PAGE_SIZE != 0) {
+		dev_err(&mgr->dev, "Malformed payload.\n");
+		return -EINVAL;
+	}
+	get_status(spi, &status);
+	dump_status_reg(&status);
+	memcpy(payload, &progincr, sizeof(progincr));
+	for (i = 0; i < count; i += MACHXO2_PAGE_SIZE) {
+		memcpy(&payload[sizeof(progincr)], &buf[i], MACHXO2_PAGE_SIZE);
+		memset(&tx, 0, sizeof(tx));
+		spi_message_init(&msg);
+		tx.tx_buf = payload;
+		tx.len = MACHXO2_BUF_SIZE;
+		tx.delay_usecs = MACHXO2_HIGH_DELAY_USEC;
+		spi_message_add_tail(&tx, &msg);
+		ret = spi_sync(spi, &msg);
+		if (ret) {
+			dev_err(&mgr->dev, "Error loading the bitstream.\n");
+			return ret;
+		}
+	}
+	get_status(spi, &status);
+	dump_status_reg(&status);
+
+	return 0;
+}
+
+static int machxo2_write_complete(struct fpga_manager *mgr,
+				  struct fpga_image_info *info)
+{
+	struct spi_device *spi = mgr->priv;
+	struct spi_message msg;
+	struct spi_transfer tx[2];
+	static const u8 progdone[] = ISC_PROGRAMDONE;
+	static const u8 refresh[] = LSC_REFRESH;
+	unsigned long status;
+	int ret, refreshloop = 0;
+
+	memset(tx, 0, sizeof(tx));
+	spi_message_init(&msg);
+	tx[0].tx_buf = &progdone;
+	tx[0].len = sizeof(progdone);
+	spi_message_add_tail(&tx[0], &msg);
+	ret = spi_sync(spi, &msg);
+	if (ret)
+		goto fail;
+	ret = wait_until_not_busy(spi);
+	if (ret)
+		goto fail;
+
+	get_status(spi, &status);
+	dump_status_reg(&status);
+	if (!test_bit(DONE, &status)) {
+		machxo2_cleanup(mgr);
+		goto fail;
+	}
+
+	do {
+		spi_message_init(&msg);
+		tx[1].tx_buf = &refresh;
+		tx[1].len = sizeof(refresh);
+		tx[1].delay_usecs = MACHXO2_REFRESH_USEC;
+		spi_message_add_tail(&tx[1], &msg);
+		ret = spi_sync(spi, &msg);
+		if (ret)
+			goto fail;
+
+		/* check refresh status */
+		get_status(spi, &status);
+		dump_status_reg(&status);
+		if (!test_bit(BUSY, &status) && test_bit(DONE, &status) &&
+		    get_err(&status) == ENOERR)
+			break;
+		if (++refreshloop == MACHXO2_MAX_REFRESH_LOOP) {
+			machxo2_cleanup(mgr);
+			goto fail;
+		}
+	} while (1);
+
+	get_status(spi, &status);
+	dump_status_reg(&status);
+
+	return 0;
+fail:
+	dev_err(&mgr->dev, "Refresh failed.\n");
+
+	return ret;
+}
+
+static const struct fpga_manager_ops machxo2_ops = {
+	.state = machxo2_spi_state,
+	.write_init = machxo2_write_init,
+	.write = machxo2_write,
+	.write_complete = machxo2_write_complete,
+};
+
+static int machxo2_spi_probe(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct fpga_manager *mgr;
+	int ret;
+
+	if (spi->max_speed_hz > MACHXO2_MAX_SPEED) {
+		dev_err(dev, "Speed is too high\n");
+		return -EINVAL;
+	}
+
+	mgr = fpga_mgr_create(dev, "Lattice MachXO2 SPI FPGA Manager",
+			      &machxo2_ops, spi);
+	if (!mgr)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, mgr);
+
+	ret = fpga_mgr_register(mgr);
+	if (ret)
+		fpga_mgr_free(mgr);
+
+	return ret;
+}
+
+static int machxo2_spi_remove(struct spi_device *spi)
+{
+	struct fpga_manager *mgr = spi_get_drvdata(spi);
+
+	fpga_mgr_unregister(mgr);
+
+	return 0;
+}
+
+static const struct of_device_id of_match[] = {
+	{ .compatible = "lattice,machxo2-slave-spi", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, of_match);
+
+static const struct spi_device_id lattice_ids[] = {
+	{ "machxo2-slave-spi", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, lattice_ids);
+
+static struct spi_driver machxo2_spi_driver = {
+	.driver = {
+		.name = "machxo2-slave-spi",
+		.of_match_table = of_match_ptr(of_match),
+	},
+	.probe = machxo2_spi_probe,
+	.remove = machxo2_spi_remove,
+	.id_table = lattice_ids,
+};
+
+module_spi_driver(machxo2_spi_driver)
+
+MODULE_AUTHOR("Paolo Pisati <p.pisati@gmail.com>");
+MODULE_DESCRIPTION("Load Lattice FPGA firmware over SPI");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/fpga/of-fpga-region.c b/drivers/fpga/of-fpga-region.c
index 119ff75..35fabb8 100644
--- a/drivers/fpga/of-fpga-region.c
+++ b/drivers/fpga/of-fpga-region.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * FPGA Region - Device Tree support for FPGA programming under Linux
  *
  *  Copyright (C) 2013-2016 Altera Corporation
  *  Copyright (C) 2017 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-
 #include <linux/fpga/fpga-bridge.h>
 #include <linux/fpga/fpga-mgr.h>
 #include <linux/fpga/fpga-region.h>
@@ -422,27 +410,25 @@
 	if (IS_ERR(mgr))
 		return -EPROBE_DEFER;
 
-	region = devm_kzalloc(dev, sizeof(*region), GFP_KERNEL);
+	region = fpga_region_create(dev, mgr, of_fpga_region_get_bridges);
 	if (!region) {
 		ret = -ENOMEM;
 		goto eprobe_mgr_put;
 	}
 
-	region->mgr = mgr;
-
-	/* Specify how to get bridges for this type of region. */
-	region->get_bridges = of_fpga_region_get_bridges;
-
-	ret = fpga_region_register(dev, region);
+	ret = fpga_region_register(region);
 	if (ret)
-		goto eprobe_mgr_put;
+		goto eprobe_free;
 
 	of_platform_populate(np, fpga_region_of_match, NULL, &region->dev);
+	dev_set_drvdata(dev, region);
 
 	dev_info(dev, "FPGA Region probed\n");
 
 	return 0;
 
+eprobe_free:
+	fpga_region_free(region);
 eprobe_mgr_put:
 	fpga_mgr_put(mgr);
 	return ret;
diff --git a/drivers/fpga/socfpga-a10.c b/drivers/fpga/socfpga-a10.c
index a46e343..be30c48 100644
--- a/drivers/fpga/socfpga-a10.c
+++ b/drivers/fpga/socfpga-a10.c
@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * FPGA Manager Driver for Altera Arria10 SoCFPGA
  *
  * Copyright (C) 2015-2016 Altera Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-
 #include <linux/clk.h>
 #include <linux/device.h>
 #include <linux/delay.h>
@@ -482,6 +470,7 @@
 	struct device *dev = &pdev->dev;
 	struct a10_fpga_priv *priv;
 	void __iomem *reg_base;
+	struct fpga_manager *mgr;
 	struct resource *res;
 	int ret;
 
@@ -519,9 +508,16 @@
 		return -EBUSY;
 	}
 
-	ret = fpga_mgr_register(dev, "SoCFPGA Arria10 FPGA Manager",
-				 &socfpga_a10_fpga_mgr_ops, priv);
+	mgr = fpga_mgr_create(dev, "SoCFPGA Arria10 FPGA Manager",
+			      &socfpga_a10_fpga_mgr_ops, priv);
+	if (!mgr)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, mgr);
+
+	ret = fpga_mgr_register(mgr);
 	if (ret) {
+		fpga_mgr_free(mgr);
 		clk_disable_unprepare(priv->clk);
 		return ret;
 	}
@@ -534,7 +530,7 @@
 	struct fpga_manager *mgr = platform_get_drvdata(pdev);
 	struct a10_fpga_priv *priv = mgr->priv;
 
-	fpga_mgr_unregister(&pdev->dev);
+	fpga_mgr_unregister(mgr);
 	clk_disable_unprepare(priv->clk);
 
 	return 0;
diff --git a/drivers/fpga/socfpga.c b/drivers/fpga/socfpga.c
index b6672e6..959d71f 100644
--- a/drivers/fpga/socfpga.c
+++ b/drivers/fpga/socfpga.c
@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * FPGA Manager Driver for Altera SOCFPGA
  *
  *  Copyright (C) 2013-2015 Altera Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #include <linux/completion.h>
 #include <linux/delay.h>
@@ -555,6 +544,7 @@
 {
 	struct device *dev = &pdev->dev;
 	struct socfpga_fpga_priv *priv;
+	struct fpga_manager *mgr;
 	struct resource *res;
 	int ret;
 
@@ -581,13 +571,25 @@
 	if (ret)
 		return ret;
 
-	return fpga_mgr_register(dev, "Altera SOCFPGA FPGA Manager",
-				 &socfpga_fpga_ops, priv);
+	mgr = fpga_mgr_create(dev, "Altera SOCFPGA FPGA Manager",
+			      &socfpga_fpga_ops, priv);
+	if (!mgr)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, mgr);
+
+	ret = fpga_mgr_register(mgr);
+	if (ret)
+		fpga_mgr_free(mgr);
+
+	return ret;
 }
 
 static int socfpga_fpga_remove(struct platform_device *pdev)
 {
-	fpga_mgr_unregister(&pdev->dev);
+	struct fpga_manager *mgr = platform_get_drvdata(pdev);
+
+	fpga_mgr_unregister(mgr);
 
 	return 0;
 }
diff --git a/drivers/fpga/ts73xx-fpga.c b/drivers/fpga/ts73xx-fpga.c
index f6a96b4..08efd18 100644
--- a/drivers/fpga/ts73xx-fpga.c
+++ b/drivers/fpga/ts73xx-fpga.c
@@ -116,7 +116,9 @@
 {
 	struct device *kdev = &pdev->dev;
 	struct ts73xx_fpga_priv *priv;
+	struct fpga_manager *mgr;
 	struct resource *res;
+	int ret;
 
 	priv = devm_kzalloc(kdev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -131,13 +133,25 @@
 		return PTR_ERR(priv->io_base);
 	}
 
-	return fpga_mgr_register(kdev, "TS-73xx FPGA Manager",
-				 &ts73xx_fpga_ops, priv);
+	mgr = fpga_mgr_create(kdev, "TS-73xx FPGA Manager",
+			      &ts73xx_fpga_ops, priv);
+	if (!mgr)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, mgr);
+
+	ret = fpga_mgr_register(mgr);
+	if (ret)
+		fpga_mgr_free(mgr);
+
+	return ret;
 }
 
 static int ts73xx_fpga_remove(struct platform_device *pdev)
 {
-	fpga_mgr_unregister(&pdev->dev);
+	struct fpga_manager *mgr = platform_get_drvdata(pdev);
+
+	fpga_mgr_unregister(mgr);
 
 	return 0;
 }
diff --git a/drivers/fpga/xilinx-pr-decoupler.c b/drivers/fpga/xilinx-pr-decoupler.c
index 0d77430..07ba153 100644
--- a/drivers/fpga/xilinx-pr-decoupler.c
+++ b/drivers/fpga/xilinx-pr-decoupler.c
@@ -94,6 +94,7 @@
 static int xlnx_pr_decoupler_probe(struct platform_device *pdev)
 {
 	struct xlnx_pr_decoupler_data *priv;
+	struct fpga_bridge *br;
 	int err;
 	struct resource *res;
 
@@ -120,16 +121,27 @@
 
 	clk_disable(priv->clk);
 
-	err = fpga_bridge_register(&pdev->dev, "Xilinx PR Decoupler",
-				   &xlnx_pr_decoupler_br_ops, priv);
+	br = fpga_bridge_create(&pdev->dev, "Xilinx PR Decoupler",
+				&xlnx_pr_decoupler_br_ops, priv);
+	if (!br) {
+		err = -ENOMEM;
+		goto err_clk;
+	}
 
+	platform_set_drvdata(pdev, br);
+
+	err = fpga_bridge_register(br);
 	if (err) {
 		dev_err(&pdev->dev, "unable to register Xilinx PR Decoupler");
-		clk_unprepare(priv->clk);
-		return err;
+		goto err_clk;
 	}
 
 	return 0;
+
+err_clk:
+	clk_unprepare(priv->clk);
+
+	return err;
 }
 
 static int xlnx_pr_decoupler_remove(struct platform_device *pdev)
@@ -137,7 +149,7 @@
 	struct fpga_bridge *bridge = platform_get_drvdata(pdev);
 	struct xlnx_pr_decoupler_data *p = bridge->priv;
 
-	fpga_bridge_unregister(&pdev->dev);
+	fpga_bridge_unregister(bridge);
 
 	clk_unprepare(p->clk);
 
diff --git a/drivers/fpga/xilinx-spi.c b/drivers/fpga/xilinx-spi.c
index 9b62a4c..8d19459 100644
--- a/drivers/fpga/xilinx-spi.c
+++ b/drivers/fpga/xilinx-spi.c
@@ -143,6 +143,8 @@
 static int xilinx_spi_probe(struct spi_device *spi)
 {
 	struct xilinx_spi_conf *conf;
+	struct fpga_manager *mgr;
+	int ret;
 
 	conf = devm_kzalloc(&spi->dev, sizeof(*conf), GFP_KERNEL);
 	if (!conf)
@@ -165,13 +167,25 @@
 		return PTR_ERR(conf->done);
 	}
 
-	return fpga_mgr_register(&spi->dev, "Xilinx Slave Serial FPGA Manager",
-				 &xilinx_spi_ops, conf);
+	mgr = fpga_mgr_create(&spi->dev, "Xilinx Slave Serial FPGA Manager",
+			      &xilinx_spi_ops, conf);
+	if (!mgr)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, mgr);
+
+	ret = fpga_mgr_register(mgr);
+	if (ret)
+		fpga_mgr_free(mgr);
+
+	return ret;
 }
 
 static int xilinx_spi_remove(struct spi_device *spi)
 {
-	fpga_mgr_unregister(&spi->dev);
+	struct fpga_manager *mgr = spi_get_drvdata(spi);
+
+	fpga_mgr_unregister(mgr);
 
 	return 0;
 }
diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
index 70b15b3..3110e00 100644
--- a/drivers/fpga/zynq-fpga.c
+++ b/drivers/fpga/zynq-fpga.c
@@ -558,6 +558,7 @@
 {
 	struct device *dev = &pdev->dev;
 	struct zynq_fpga_priv *priv;
+	struct fpga_manager *mgr;
 	struct resource *res;
 	int err;
 
@@ -613,10 +614,17 @@
 
 	clk_disable(priv->clk);
 
-	err = fpga_mgr_register(dev, "Xilinx Zynq FPGA Manager",
-				&zynq_fpga_ops, priv);
+	mgr = fpga_mgr_create(dev, "Xilinx Zynq FPGA Manager",
+			      &zynq_fpga_ops, priv);
+	if (!mgr)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, mgr);
+
+	err = fpga_mgr_register(mgr);
 	if (err) {
 		dev_err(dev, "unable to register FPGA manager\n");
+		fpga_mgr_free(mgr);
 		clk_unprepare(priv->clk);
 		return err;
 	}
@@ -632,7 +640,7 @@
 	mgr = platform_get_drvdata(pdev);
 	priv = mgr->priv;
 
-	fpga_mgr_unregister(&pdev->dev);
+	fpga_mgr_unregister(mgr);
 
 	clk_unprepare(priv->clk);
 
diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c
index 761d827..d3cf950 100644
--- a/drivers/gpio/gpio-uniphier.c
+++ b/drivers/gpio/gpio-uniphier.c
@@ -371,8 +371,7 @@
 		return ret;
 
 	nregs = uniphier_gpio_get_nbanks(ngpios) * 2 + 3;
-	priv = devm_kzalloc(dev,
-			    sizeof(*priv) + sizeof(priv->saved_vals[0]) * nregs,
+	priv = devm_kzalloc(dev, struct_size(priv, saved_vals, nregs),
 			    GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index d8ccb50..55d596f 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -4023,8 +4023,7 @@
 	if (count < 0)
 		return ERR_PTR(count);
 
-	descs = kzalloc(sizeof(*descs) + sizeof(descs->desc[0]) * count,
-			GFP_KERNEL);
+	descs = kzalloc(struct_size(descs, desc, count), GFP_KERNEL);
 	if (!descs)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index deeefa7..2a72d2f 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -49,16 +49,17 @@
 
 	  If in doubt, say "N".
 
-config DRM_DEBUG_MM_SELFTEST
-	tristate "kselftests for DRM range manager (struct drm_mm)"
+config DRM_DEBUG_SELFTEST
+	tristate "kselftests for DRM"
 	depends on DRM
 	depends on DEBUG_KERNEL
 	select PRIME_NUMBERS
 	select DRM_LIB_RANDOM
+	select DRM_KMS_HELPER
 	default n
 	help
-	  This option provides a kernel module that can be used to test
-	  the DRM range manager (drm_mm) and its API. This option is not
+	  This option provides kernel modules that can be used to run
+	  various selftests on parts of the DRM api. This option is not
 	  useful for distributions or general kernels, but only for kernel
 	  developers working on DRM and associated drivers.
 
@@ -267,6 +268,8 @@
 
 source "drivers/gpu/drm/imx/Kconfig"
 
+source "drivers/gpu/drm/v3d/Kconfig"
+
 source "drivers/gpu/drm/vc4/Kconfig"
 
 source "drivers/gpu/drm/etnaviv/Kconfig"
@@ -289,6 +292,8 @@
 
 source "drivers/gpu/drm/tve200/Kconfig"
 
+source "drivers/gpu/drm/xen/Kconfig"
+
 # Keep legacy drivers last
 
 menuconfig DRM_LEGACY
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 50093ff..ef9f3da 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -43,7 +43,7 @@
 drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
 
 obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
-obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += selftests/
+obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/
 
 obj-$(CONFIG_DRM)	+= drm.o
 obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
@@ -61,6 +61,7 @@
 obj-$(CONFIG_DRM_I810)	+= i810/
 obj-$(CONFIG_DRM_I915)	+= i915/
 obj-$(CONFIG_DRM_MGAG200) += mgag200/
+obj-$(CONFIG_DRM_V3D)  += v3d/
 obj-$(CONFIG_DRM_VC4)  += vc4/
 obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
 obj-$(CONFIG_DRM_SIS)   += sis/
@@ -103,3 +104,4 @@
 obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
 obj-$(CONFIG_DRM_PL111) += pl111/
 obj-$(CONFIG_DRM_TVE200) += tve200/
+obj-$(CONFIG_DRM_XEN) += xen/
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2ca2b51..bfd332c 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -56,13 +56,18 @@
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
-	ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
-	amdgpu_amdkfd_gfx_v7.o
+	ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
 
 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
 
 amdgpu-y += \
-	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
+	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
+	vega20_reg_init.o
+
+# add DF block
+amdgpu-y += \
+	df_v1_7.o \
+	df_v3_6.o
 
 # add GMC block
 amdgpu-y += \
@@ -126,11 +131,20 @@
 	vcn_v1_0.o
 
 # add amdkfd interfaces
+amdgpu-y += amdgpu_amdkfd.o
+
+ifneq ($(CONFIG_HSA_AMD),)
 amdgpu-y += \
-	 amdgpu_amdkfd.o \
 	 amdgpu_amdkfd_fence.o \
 	 amdgpu_amdkfd_gpuvm.o \
-	 amdgpu_amdkfd_gfx_v8.o
+	 amdgpu_amdkfd_gfx_v8.o \
+	 amdgpu_amdkfd_gfx_v9.o
+
+ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
+amdgpu-y += amdgpu_amdkfd_gfx_v7.o
+endif
+
+endif
 
 # add cgs
 amdgpu-y += amdgpu_cgs.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c8b605f..a59c075 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -129,6 +129,7 @@
 extern int amdgpu_compute_multipipe;
 extern int amdgpu_gpu_recovery;
 extern int amdgpu_emu_mode;
+extern uint amdgpu_smu_memory_pool_size;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
@@ -137,6 +138,7 @@
 extern int amdgpu_cik_support;
 #endif
 
+#define AMDGPU_SG_THRESHOLD			(256*1024*1024)
 #define AMDGPU_DEFAULT_GTT_SIZE_MB		3072ULL /* 3GB by default */
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
@@ -222,10 +224,10 @@
 	AMDGPU_CP_KIQ_IRQ_LAST
 };
 
-int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_clockgating_state(void *dev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_clockgating_state state);
-int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_powergating_state(void *dev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_powergating_state state);
 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
@@ -681,6 +683,8 @@
 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
 
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
 
@@ -771,9 +775,18 @@
 	u32 starting_offsets_start;
 	u32 reg_list_format_size_bytes;
 	u32 reg_list_size_bytes;
+	u32 reg_list_format_direct_reg_list_length;
+	u32 save_restore_list_cntl_size_bytes;
+	u32 save_restore_list_gpm_size_bytes;
+	u32 save_restore_list_srm_size_bytes;
 
 	u32 *register_list_format;
 	u32 *register_restore;
+	u8 *save_restore_list_cntl;
+	u8 *save_restore_list_gpm;
+	u8 *save_restore_list_srm;
+
+	bool is_rlc_v2_1;
 };
 
 #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
@@ -867,6 +880,8 @@
 
 	/* gfx configure feature */
 	uint32_t double_offchip_lds_buf;
+	/* cached value of DB_DEBUG2 */
+	uint32_t db_debug2;
 };
 
 struct amdgpu_cu_info {
@@ -938,6 +953,12 @@
 	uint32_t			ce_feature_version;
 	uint32_t			pfp_feature_version;
 	uint32_t			rlc_feature_version;
+	uint32_t			rlc_srlc_fw_version;
+	uint32_t			rlc_srlc_feature_version;
+	uint32_t			rlc_srlg_fw_version;
+	uint32_t			rlc_srlg_feature_version;
+	uint32_t			rlc_srls_fw_version;
+	uint32_t			rlc_srls_feature_version;
 	uint32_t			mec_feature_version;
 	uint32_t			mec2_feature_version;
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
@@ -1204,6 +1225,8 @@
 	/* invalidate hdp read cache */
 	void (*invalidate_hdp)(struct amdgpu_device *adev,
 			       struct amdgpu_ring *ring);
+	/* check if the asic needs a full reset of if soft reset will work */
+	bool (*need_full_reset)(struct amdgpu_device *adev);
 };
 
 /*
@@ -1368,7 +1391,19 @@
 	void (*detect_hw_virt)(struct amdgpu_device *adev);
 };
 
-
+struct amdgpu_df_funcs {
+	void (*init)(struct amdgpu_device *adev);
+	void (*enable_broadcast_mode)(struct amdgpu_device *adev,
+				      bool enable);
+	u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
+	u32 (*get_hbm_channel_number)(struct amdgpu_device *adev);
+	void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
+						 bool enable);
+	void (*get_clockgating_state)(struct amdgpu_device *adev,
+				      u32 *flags);
+	void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
+					    bool enable);
+};
 /* Define the HW IP blocks will be used in driver , add more if necessary */
 enum amd_hw_ip_block_type {
 	GC_HWIP = 1,
@@ -1398,6 +1433,7 @@
 struct amd_powerplay {
 	void *pp_handle;
 	const struct amd_pm_funcs *pp_funcs;
+	uint32_t pp_feature;
 };
 
 #define AMDGPU_RESET_MAGIC_NUM 64
@@ -1590,6 +1626,7 @@
 	uint32_t 		*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
 
 	const struct amdgpu_nbio_funcs	*nbio_funcs;
+	const struct amdgpu_df_funcs	*df_funcs;
 
 	/* delayed work_func for deferring clockgating during resume */
 	struct delayed_work     late_init_work;
@@ -1764,6 +1801,7 @@
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
 #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
+#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
 #define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
 #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
@@ -1790,6 +1828,7 @@
 #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
+#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
 #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
 #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
 #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index a29362f..428e5eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -290,12 +290,11 @@
 	else if (r)
 		return r;
 
-	r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO,
-			0x5289, 0, &acp_base);
-	if (r == -ENODEV)
-		return 0;
-	else if (r)
-		return r;
+	if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289)
+		return -EINVAL;
+
+	acp_base = adev->rmmio_base;
+
 	if (adev->asic_type != CHIP_STONEY) {
 		adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
 		if (adev->acp.acp_genpd == NULL)
@@ -513,7 +512,7 @@
 	if (adev->acp.acp_genpd) {
 		for (i = 0; i < ACP_DEVS ; i++) {
 			dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
-			ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev);
+			ret = pm_genpd_remove_device(dev);
 			/* If removal fails, dont giveup and try rest */
 			if (ret)
 				dev_err(dev, "remove dev from genpd failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 4d36203..8f6f455 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -50,15 +50,21 @@
 		kgd2kfd = NULL;
 	}
 
+
 #elif defined(CONFIG_HSA_AMD)
+
 	ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
 	if (ret)
 		kgd2kfd = NULL;
 
 #else
+	kgd2kfd = NULL;
 	ret = -ENOENT;
 #endif
+
+#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD)
 	amdgpu_amdkfd_gpuvm_init_mem_limits();
+#endif
 
 	return ret;
 }
@@ -92,8 +98,12 @@
 	case CHIP_POLARIS11:
 		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
 		break;
+	case CHIP_VEGA10:
+	case CHIP_RAVEN:
+		kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
+		break;
 	default:
-		dev_dbg(adev->dev, "kfd not supported on this ASIC\n");
+		dev_info(adev->dev, "kfd not supported on this ASIC\n");
 		return;
 	}
 
@@ -175,6 +185,28 @@
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);
+		if (adev->asic_type >= CHIP_VEGA10) {
+			/* On SOC15 the BIF is involved in routing
+			 * doorbells using the low 12 bits of the
+			 * address. Communicate the assignments to
+			 * KFD. KFD uses two doorbell pages per
+			 * process in case of 64-bit doorbells so we
+			 * can use each doorbell assignment twice.
+			 */
+			gpu_resources.sdma_doorbell[0][0] =
+				AMDGPU_DOORBELL64_sDMA_ENGINE0;
+			gpu_resources.sdma_doorbell[0][1] =
+				AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
+			gpu_resources.sdma_doorbell[1][0] =
+				AMDGPU_DOORBELL64_sDMA_ENGINE1;
+			gpu_resources.sdma_doorbell[1][1] =
+				AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
+			/* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
+			 * SDMA, IH and VCN. So don't use them for the CP.
+			 */
+			gpu_resources.reserved_doorbell_mask = 0x1f0;
+			gpu_resources.reserved_doorbell_val  = 0x0f0;
+		}
 
 		kgd2kfd->device_init(adev->kfd, &gpu_resources);
 	}
@@ -217,13 +249,19 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo_param bp;
 	int r;
 	uint64_t gpu_addr_tmp = 0;
 	void *cpu_ptr_tmp = NULL;
 
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
-			     AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel,
-			     NULL, &bo);
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
+	r = amdgpu_bo_create(adev, &bp, &bo);
 	if (r) {
 		dev_err(adev->dev,
 			"failed to allocate BO for amdkfd (%d)\n", r);
@@ -432,3 +470,44 @@
 
 	return false;
 }
+
+#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD)
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+	return false;
+}
+
+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
+{
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+					struct amdgpu_vm *vm)
+{
+}
+
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+	return NULL;
+}
+
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
+{
+	return 0;
+}
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
+{
+	return NULL;
+}
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
+{
+	return NULL;
+}
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
+{
+	return NULL;
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index c2c2bea..a8418a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -28,6 +28,7 @@
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/mmu_context.h>
+#include <linux/workqueue.h>
 #include <kgd_kfd_interface.h>
 #include <drm/ttm/ttm_execbuf_util.h>
 #include "amdgpu_sync.h"
@@ -59,7 +60,9 @@
 
 	uint32_t mapping_flags;
 
+	atomic_t invalid;
 	struct amdkfd_process_info *process_info;
+	struct page **user_pages;
 
 	struct amdgpu_sync sync;
 
@@ -84,6 +87,9 @@
 	struct list_head vm_list_head;
 	/* List head for all KFD BOs that belong to a KFD process. */
 	struct list_head kfd_bo_list;
+	/* List of userptr BOs that are valid or invalid */
+	struct list_head userptr_valid_list;
+	struct list_head userptr_inval_list;
 	/* Lock to protect kfd_bo_list */
 	struct mutex lock;
 
@@ -91,6 +97,11 @@
 	unsigned int n_vms;
 	/* Eviction Fence */
 	struct amdgpu_amdkfd_fence *eviction_fence;
+
+	/* MMU-notifier related fields */
+	atomic_t evicted_bos;
+	struct delayed_work restore_userptr_work;
+	struct pid *pid;
 };
 
 int amdgpu_amdkfd_init(void);
@@ -104,12 +115,14 @@
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
 
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len);
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
 
@@ -143,14 +156,14 @@
 
 /* GPUVM API */
 int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
-					  void **process_info,
-					  struct dma_fence **ef);
+					void **process_info,
+					struct dma_fence **ef);
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
-					   struct file *filp,
-					   void **vm, void **process_info,
-					   struct dma_fence **ef);
+					struct file *filp,
+					void **vm, void **process_info,
+					struct dma_fence **ef);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
-				    struct amdgpu_vm *vm);
+				struct amdgpu_vm *vm);
 void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
 uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index ea54e53..0ff36d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -98,8 +98,6 @@
 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 					unsigned int vmid);
 
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-				uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr,
@@ -183,7 +181,6 @@
 	.free_pasid = amdgpu_pasid_free,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
-	.init_pipeline = kgd_init_pipeline,
 	.init_interrupts = kgd_init_interrupts,
 	.hqd_load = kgd_hqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -309,13 +306,6 @@
 	return 0;
 }
 
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-				uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
-	/* amdgpu owns the per-pipe state */
-	return 0;
-}
-
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 89264c9..6ef9762 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -57,8 +57,6 @@
 		uint32_t sh_mem_bases);
 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 		unsigned int vmid);
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-		uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr,
@@ -141,7 +139,6 @@
 	.free_pasid = amdgpu_pasid_free,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
-	.init_pipeline = kgd_init_pipeline,
 	.init_interrupts = kgd_init_interrupts,
 	.hqd_load = kgd_hqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -270,13 +267,6 @@
 	return 0;
 }
 
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-				uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
-	/* amdgpu owns the per-pipe state */
-	return 0;
-}
-
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
new file mode 100644
index 0000000..f0c0d39
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -0,0 +1,1043 @@
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "kfd2kgd: " fmt
+
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ucode.h"
+#include "soc15_hw_ip.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "vega10_enum.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+#include "sdma1/sdma1_4_0_offset.h"
+#include "sdma1/sdma1_4_0_sh_mask.h"
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "soc15d.h"
+
+/* HACK: MMHUB and GC both have VM-related register with the same
+ * names but different offsets. Define the MMHUB register we need here
+ * with a prefix. A proper solution would be to move the functions
+ * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
+ * respectively.
+ */
+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ				0x06f3
+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX		0
+
+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK				0x0705
+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX		0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32		0x072b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX	0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32		0x072c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX	0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32		0x074b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX	0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32		0x074c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX	0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32		0x076b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX	0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32		0x076c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX	0
+
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32		0x0727
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX	0
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32		0x0728
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX	0
+
+#define V9_PIPE_PER_MEC		(4)
+#define V9_QUEUES_PER_PIPE_MEC	(8)
+
+enum hqd_dequeue_request_type {
+	NO_ACTION = 0,
+	DRAIN_PIPE,
+	RESET_WAVES
+};
+
+/*
+ * Register access functions
+ */
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t sh_mem_config,
+		uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
+		uint32_t sh_mem_bases);
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+		unsigned int vmid);
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs);
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+		uint32_t pipe_id, uint32_t queue_id);
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id);
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+				unsigned int utimeout);
+static int kgd_address_watch_disable(struct kgd_dev *kgd);
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+					unsigned int watch_point_id,
+					uint32_t cntl_val,
+					uint32_t addr_hi,
+					uint32_t addr_lo);
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd);
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+					unsigned int watch_point_id,
+					unsigned int reg_offset);
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+		uint8_t vmid);
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+		uint8_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t page_table_base);
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+					uint64_t va, uint32_t vmid);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+		struct tile_config *config)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	config->gb_addr_config = adev->gfx.config.gb_addr_config;
+
+	config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+	config->num_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+	config->macro_tile_config_ptr =
+			adev->gfx.config.macrotile_mode_array;
+	config->num_macro_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+	return 0;
+}
+
+static const struct kfd2kgd_calls kfd2kgd = {
+	.init_gtt_mem_allocation = alloc_gtt_mem,
+	.free_gtt_mem = free_gtt_mem,
+	.get_local_mem_info = get_local_mem_info,
+	.get_gpu_clock_counter = get_gpu_clock_counter,
+	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+	.alloc_pasid = amdgpu_pasid_alloc,
+	.free_pasid = amdgpu_pasid_free,
+	.program_sh_mem_settings = kgd_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_init_interrupts,
+	.hqd_load = kgd_hqd_load,
+	.hqd_sdma_load = kgd_hqd_sdma_load,
+	.hqd_dump = kgd_hqd_dump,
+	.hqd_sdma_dump = kgd_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_hqd_destroy,
+	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+	.address_watch_disable = kgd_address_watch_disable,
+	.address_watch_execute = kgd_address_watch_execute,
+	.wave_control_execute = kgd_wave_control_execute,
+	.address_watch_get_offset = kgd_address_watch_get_offset,
+	.get_atc_vmid_pasid_mapping_pasid =
+			get_atc_vmid_pasid_mapping_pasid,
+	.get_atc_vmid_pasid_mapping_valid =
+			get_atc_vmid_pasid_mapping_valid,
+	.get_fw_version = get_fw_version,
+	.set_scratch_backing_va = set_scratch_backing_va,
+	.get_tile_config = amdgpu_amdkfd_get_tile_config,
+	.get_cu_info = get_cu_info,
+	.get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+	.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
+	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+	.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+	.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+	.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+	.sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+	.map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+	.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+	.invalidate_tlbs = invalidate_tlbs,
+	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+	.submit_ib = amdgpu_amdkfd_submit_ib,
+};
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
+{
+	return (struct kfd2kgd_calls *)&kfd2kgd;
+}
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+	return (struct amdgpu_device *)kgd;
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+			uint32_t queue, uint32_t vmid)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	mutex_lock(&adev->srbm_mutex);
+	soc15_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	soc15_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static uint32_t get_queue_mask(struct amdgpu_device *adev,
+			       uint32_t pipe_id, uint32_t queue_id)
+{
+	unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
+			    queue_id) & 31;
+
+	return ((uint32_t)1) << bit;
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+	unlock_srbm(kgd);
+}
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+					uint32_t sh_mem_config,
+					uint32_t sh_mem_ape1_base,
+					uint32_t sh_mem_ape1_limit,
+					uint32_t sh_mem_bases)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+	/* APE1 no longer exists on GFX9 */
+
+	unlock_srbm(kgd);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+					unsigned int vmid)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	/*
+	 * We have to assume that there is no outstanding mapping.
+	 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+	 * a mapping is in progress or because a mapping finished
+	 * and the SW cleared it.
+	 * So the protocol is to always wait & clear.
+	 */
+	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+			ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+	/*
+	 * need to do this twice, once for gfx and once for mmhub
+	 * for ATC add 16 to VMID for mmhub, for IH different registers.
+	 * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
+	 */
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
+	       pasid_mapping);
+
+	while (!(RREG32(SOC15_REG_OFFSET(
+				ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+		 (1U << vmid)))
+		cpu_relax();
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+	       1U << vmid);
+
+	/* Mapping vmid to pasid also for IH block */
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+	       pasid_mapping);
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
+	       pasid_mapping);
+
+	while (!(RREG32(SOC15_REG_OFFSET(
+				ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+		 (1U << (vmid + 16))))
+		cpu_relax();
+
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+	       1U << (vmid + 16));
+
+	/* Mapping vmid to pasid also for IH block */
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
+	       pasid_mapping);
+	return 0;
+}
+
+/* TODO - RING0 form of field is obsolete, seems to date back to SI
+ * but still works
+ */
+
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t mec;
+	uint32_t pipe;
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(kgd, mec, pipe, 0, 0);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+		CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+		CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+	unlock_srbm(kgd);
+
+	return 0;
+}
+
+static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+				unsigned int engine_id,
+				unsigned int queue_id)
+{
+	uint32_t base[2] = {
+		SOC15_REG_OFFSET(SDMA0, 0,
+				 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
+		SOC15_REG_OFFSET(SDMA1, 0,
+				 mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
+	};
+	uint32_t retval;
+
+	retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
+					       mmSDMA0_RLC0_RB_CNTL);
+
+	pr_debug("sdma base address: 0x%x\n", retval);
+
+	return retval;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+	return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v9_sdma_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct v9_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg, hqd_base, data;
+
+	m = get_mqd(mqd);
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	/* HIQ is set during driver init period with vmid set to 0*/
+	if (m->cp_hqd_vmid == 0) {
+		uint32_t value, mec, pipe;
+
+		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+			mec, pipe, queue_id);
+		value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+			((mec << 5) | (pipe << 3) | queue_id | 0x80));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+	}
+
+	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+	hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+	for (reg = hqd_base;
+	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+		WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+	/* Activate doorbell logic before triggering WPTR poll. */
+	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+	if (wptr) {
+		/* Don't read wptr with get_user because the user
+		 * context may not be accessible (if this function
+		 * runs in a work queue). Instead trigger a one-shot
+		 * polling read from memory in the CP. This assumes
+		 * that wptr is GPU-accessible in the queue's VMID via
+		 * ATC or SVM. WPTR==RPTR before starting the poll so
+		 * the CP starts fetching new commands from the right
+		 * place.
+		 *
+		 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+		 * tricky. Assume that the queue didn't overflow. The
+		 * number of valid bits in the 32-bit RPTR depends on
+		 * the queue size. The remaining bits are taken from
+		 * the saved 64-bit WPTR. If the WPTR wrapped, add the
+		 * queue size.
+		 */
+		uint32_t queue_size =
+			2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+					   CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+		uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+		if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+			guessed_wptr += queue_size;
+		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+		       lower_32_bits(guessed_wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+		       upper_32_bits(guessed_wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+		       lower_32_bits((uintptr_t)wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+		       upper_32_bits((uintptr_t)wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+		       get_queue_mask(adev, pipe_id, queue_id));
+	}
+
+	/* Start the EOP fetcher */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+	       REG_SET_FIELD(m->cp_hqd_eop_rptr,
+			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+
+	release_queue(kgd);
+
+	return 0;
+}
+
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32(addr);		\
+	} while (0)
+
+	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+		DUMP_REG(reg);
+
+	release_queue(kgd);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+	unsigned long end_jiffies;
+	uint32_t data;
+	uint64_t data64;
+	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+	m = get_sdma_mqd(mqd);
+	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+	sdmax_gfx_context_cntl = m->sdma_engine_id ?
+		SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
+		SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+	end_jiffies = msecs_to_jiffies(2000) + jiffies;
+	while (true) {
+		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies))
+			return -ETIME;
+		usleep_range(500, 1000);
+	}
+	data = RREG32(sdmax_gfx_context_cntl);
+	data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+			     RESUME_CTX, 0);
+	WREG32(sdmax_gfx_context_cntl, data);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+	       m->sdmax_rlcx_doorbell_offset);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+			     ENABLE, 1);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+				m->sdmax_rlcx_rb_rptr_hi);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+	if (read_user_wptr(mm, wptr64, data64)) {
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		       lower_32_bits(data64));
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+		       upper_32_bits(data64));
+	} else {
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		       m->sdmax_rlcx_rb_rptr);
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+		       m->sdmax_rlcx_rb_rptr_hi);
+	}
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+			m->sdmax_rlcx_rb_base_hi);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+			m->sdmax_rlcx_rb_rptr_addr_lo);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+			m->sdmax_rlcx_rb_rptr_addr_hi);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+			     RB_ENABLE, 1);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+		DUMP_REG(sdma_base_addr + reg);
+	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+		DUMP_REG(sdma_base_addr + reg);
+	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+		DUMP_REG(sdma_base_addr + reg);
+	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+		DUMP_REG(sdma_base_addr + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+				uint32_t pipe_id, uint32_t queue_id)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t act;
+	bool retval = false;
+	uint32_t low, high;
+
+	acquire_queue(kgd, pipe_id, queue_id);
+	act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+	if (act) {
+		low = lower_32_bits(queue_address >> 8);
+		high = upper_32_bits(queue_address >> 8);
+
+		if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
+		   high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+			retval = true;
+	}
+	release_queue(kgd);
+	return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_rb_cntl;
+
+	m = get_sdma_mqd(mqd);
+	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+
+	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+		return true;
+
+	return false;
+}
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	enum hqd_dequeue_request_type type;
+	unsigned long end_jiffies;
+	uint32_t temp;
+	struct v9_mqd *m = get_mqd(mqd);
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	if (m->cp_hqd_vmid == 0)
+		WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+	switch (reset_type) {
+	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+		type = DRAIN_PIPE;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+		type = RESET_WAVES;
+		break;
+	default:
+		type = DRAIN_PIPE;
+		break;
+	}
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+
+	end_jiffies = (utimeout * HZ / 1000) + jiffies;
+	while (true) {
+		temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("cp queue preemption time out.\n");
+			release_queue(kgd);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	release_queue(kgd);
+	return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+				unsigned int utimeout)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct v9_sdma_mqd *m;
+	uint32_t sdma_base_addr;
+	uint32_t temp;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+	m = get_sdma_mqd(mqd);
+	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+
+	while (true) {
+		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies))
+			return -ETIME;
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr_hi =
+		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+
+	return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+							uint8_t vmid)
+{
+	uint32_t reg;
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+}
+
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+								uint8_t vmid)
+{
+	uint32_t reg;
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+}
+
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	uint32_t req = (1 << vmid) |
+		(0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
+		VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
+		VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
+		VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
+		VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
+		VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
+
+	mutex_lock(&adev->srbm_mutex);
+
+	/* Use legacy mode tlb invalidation.
+	 *
+	 * Currently on Raven the code below is broken for anything but
+	 * legacy mode due to a MMHUB power gating problem. A workaround
+	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
+	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
+	 * bit.
+	 *
+	 * TODO 1: agree on the right set of invalidation registers for
+	 * KFD use. Use the last one for now. Invalidate both GC and
+	 * MMHUB.
+	 *
+	 * TODO 2: support range-based invalidation, requires kfg2kgd
+	 * interface change
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
+				0xffffffff);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
+				0x0000001f);
+
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
+				mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
+				0xffffffff);
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0,
+				mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
+				0x0000001f);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
+
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
+				req);
+
+	while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
+					(1 << vmid)))
+		cpu_relax();
+
+	while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
+					mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
+					(1 << vmid)))
+		cpu_relax();
+
+	mutex_unlock(&adev->srbm_mutex);
+
+}
+
+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
+{
+	signed long r;
+	uint32_t seq;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
+	amdgpu_fence_emit_polling(ring, &seq);
+	amdgpu_ring_commit(ring);
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+
+	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+	if (r < 1) {
+		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	int vmid;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+	if (ring->ready)
+		return invalidate_tlbs_with_kiq(adev, pasid);
+
+	for (vmid = 0; vmid < 16; vmid++) {
+		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+			continue;
+		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
+			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
+				== pasid) {
+				write_vmid_invalidate_request(kgd, vmid);
+				break;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("non kfd vmid %d\n", vmid);
+		return 0;
+	}
+
+	write_vmid_invalidate_request(kgd, vmid);
+	return 0;
+}
+
+static int kgd_address_watch_disable(struct kgd_dev *kgd)
+{
+	return 0;
+}
+
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+					unsigned int watch_point_id,
+					uint32_t cntl_val,
+					uint32_t addr_hi,
+					uint32_t addr_lo)
+{
+	return 0;
+}
+
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t data = 0;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		INSTANCE_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SH_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SE_BROADCAST_WRITES, 1);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+					unsigned int watch_point_id,
+					unsigned int reg_offset)
+{
+	return 0;
+}
+
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+					uint64_t va, uint32_t vmid)
+{
+	/* No longer needed on GFXv9. The scratch base address is
+	 * passed to the shader by the CP. It's the user mode driver's
+	 * responsibility.
+	 */
+}
+
+/* FIXME: Does this need to be ASIC-specific code? */
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	const union amdgpu_firmware_header *hdr;
+
+	switch (type) {
+	case KGD_ENGINE_PFP:
+		hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data;
+		break;
+
+	case KGD_ENGINE_ME:
+		hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data;
+		break;
+
+	case KGD_ENGINE_CE:
+		hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data;
+		break;
+
+	case KGD_ENGINE_MEC1:
+		hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data;
+		break;
+
+	case KGD_ENGINE_MEC2:
+		hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data;
+		break;
+
+	case KGD_ENGINE_RLC:
+		hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data;
+		break;
+
+	case KGD_ENGINE_SDMA1:
+		hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data;
+		break;
+
+	case KGD_ENGINE_SDMA2:
+		hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data;
+		break;
+
+	default:
+		return 0;
+	}
+
+	if (hdr == NULL)
+		return 0;
+
+	/* Only 12 bit in use*/
+	return hdr->common.ucode_version;
+}
+
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t page_table_base)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
+		AMDGPU_PTE_VALID;
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID %u\n",
+		       vmid);
+		return;
+	}
+
+	/* TODO: take advantage of per-process address space size. For
+	 * now, all processes share the same address space size, like
+	 * on GFX8 and older.
+	 */
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+			lower_32_bits(adev->vm_manager.max_pfn - 1));
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+			upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+			lower_32_bits(adev->vm_manager.max_pfn - 1));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+			upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1d6e147..ff8fd75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -23,6 +23,8 @@
 #define pr_fmt(fmt) "kfd2kgd: " fmt
 
 #include <linux/list.h>
+#include <linux/pagemap.h>
+#include <linux/sched/mm.h>
 #include <drm/drmP.h>
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
@@ -33,10 +35,20 @@
  */
 #define VI_BO_SIZE_ALIGN (0x8000)
 
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
+
+/* Userptr restore delay, just long enough to allow consecutive VM
+ * changes to accumulate
+ */
+#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+
 /* Impose limit on how much memory KFD can use */
 static struct {
 	uint64_t max_system_mem_limit;
+	uint64_t max_userptr_mem_limit;
 	int64_t system_mem_used;
+	int64_t userptr_mem_used;
 	spinlock_t mem_limit_lock;
 } kfd_mem_limit;
 
@@ -57,6 +69,7 @@
 
 #define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
 
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
 
 
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -78,6 +91,7 @@
 
 /* Set memory usage limits. Current, limits are
  *  System (kernel) memory - 3/8th System RAM
+ *  Userptr memory - 3/4th System RAM
  */
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 {
@@ -90,8 +104,10 @@
 
 	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
 	kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
-	pr_debug("Kernel memory limit %lluM\n",
-		(kfd_mem_limit.max_system_mem_limit >> 20));
+	kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
+	pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
+		(kfd_mem_limit.max_system_mem_limit >> 20),
+		(kfd_mem_limit.max_userptr_mem_limit >> 20));
 }
 
 static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
@@ -111,6 +127,16 @@
 			goto err_no_mem;
 		}
 		kfd_mem_limit.system_mem_used += (acc_size + size);
+	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+		if ((kfd_mem_limit.system_mem_used + acc_size >
+			kfd_mem_limit.max_system_mem_limit) ||
+			(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
+			kfd_mem_limit.max_userptr_mem_limit)) {
+			ret = -ENOMEM;
+			goto err_no_mem;
+		}
+		kfd_mem_limit.system_mem_used += acc_size;
+		kfd_mem_limit.userptr_mem_used += size;
 	}
 err_no_mem:
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
@@ -126,10 +152,16 @@
 				       sizeof(struct amdgpu_bo));
 
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
-	if (domain == AMDGPU_GEM_DOMAIN_GTT)
+	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
 		kfd_mem_limit.system_mem_used -= (acc_size + size);
+	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+		kfd_mem_limit.system_mem_used -= acc_size;
+		kfd_mem_limit.userptr_mem_used -= size;
+	}
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 		  "kfd system memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+		  "kfd userptr memory accounting unbalanced");
 
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
@@ -138,12 +170,17 @@
 {
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
 
-	if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+	if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
+		kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
+	} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
 		kfd_mem_limit.system_mem_used -=
 			(bo->tbo.acc_size + amdgpu_bo_size(bo));
 	}
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 		  "kfd system memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+		  "kfd userptr memory accounting unbalanced");
 
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
@@ -506,7 +543,8 @@
 }
 
 static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
-				struct amdkfd_process_info *process_info)
+				struct amdkfd_process_info *process_info,
+				bool userptr)
 {
 	struct ttm_validate_buffer *entry = &mem->validate_list;
 	struct amdgpu_bo *bo = mem->bo;
@@ -515,10 +553,95 @@
 	entry->shared = true;
 	entry->bo = &bo->tbo;
 	mutex_lock(&process_info->lock);
-	list_add_tail(&entry->head, &process_info->kfd_bo_list);
+	if (userptr)
+		list_add_tail(&entry->head, &process_info->userptr_valid_list);
+	else
+		list_add_tail(&entry->head, &process_info->kfd_bo_list);
 	mutex_unlock(&process_info->lock);
 }
 
+/* Initializes user pages. It registers the MMU notifier and validates
+ * the userptr BO in the GTT domain.
+ *
+ * The BO must already be on the userptr_valid_list. Otherwise an
+ * eviction and restore may happen that leaves the new BO unmapped
+ * with the user mode queues running.
+ *
+ * Takes the process_info->lock to protect against concurrent restore
+ * workers.
+ *
+ * Returns 0 for success, negative errno for errors.
+ */
+static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
+			   uint64_t user_addr)
+{
+	struct amdkfd_process_info *process_info = mem->process_info;
+	struct amdgpu_bo *bo = mem->bo;
+	struct ttm_operation_ctx ctx = { true, false };
+	int ret = 0;
+
+	mutex_lock(&process_info->lock);
+
+	ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
+	if (ret) {
+		pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
+		goto out;
+	}
+
+	ret = amdgpu_mn_register(bo, user_addr);
+	if (ret) {
+		pr_err("%s: Failed to register MMU notifier: %d\n",
+		       __func__, ret);
+		goto out;
+	}
+
+	/* If no restore worker is running concurrently, user_pages
+	 * should not be allocated
+	 */
+	WARN(mem->user_pages, "Leaking user_pages array");
+
+	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+					   sizeof(struct page *),
+					   GFP_KERNEL | __GFP_ZERO);
+	if (!mem->user_pages) {
+		pr_err("%s: Failed to allocate pages array\n", __func__);
+		ret = -ENOMEM;
+		goto unregister_out;
+	}
+
+	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+	if (ret) {
+		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+		goto free_out;
+	}
+
+	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
+
+	ret = amdgpu_bo_reserve(bo, true);
+	if (ret) {
+		pr_err("%s: Failed to reserve BO\n", __func__);
+		goto release_out;
+	}
+	amdgpu_ttm_placement_from_domain(bo, mem->domain);
+	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (ret)
+		pr_err("%s: failed to validate BO\n", __func__);
+	amdgpu_bo_unreserve(bo);
+
+release_out:
+	if (ret)
+		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+free_out:
+	kvfree(mem->user_pages);
+	mem->user_pages = NULL;
+unregister_out:
+	if (ret)
+		amdgpu_mn_unregister(bo);
+out:
+	mutex_unlock(&process_info->lock);
+	return ret;
+}
+
 /* Reserving a BO and its page table BOs must happen atomically to
  * avoid deadlocks. Some operations update multiple VMs at once. Track
  * all the reservation info in a context structure. Optionally a sync
@@ -748,7 +871,8 @@
 }
 
 static int map_bo_to_gpuvm(struct amdgpu_device *adev,
-		struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
+		struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
+		bool no_update_pte)
 {
 	int ret;
 
@@ -762,6 +886,9 @@
 		return ret;
 	}
 
+	if (no_update_pte)
+		return 0;
+
 	ret = update_gpuvm_pte(adev, entry, sync);
 	if (ret) {
 		pr_err("update_gpuvm_pte() failed\n");
@@ -820,6 +947,8 @@
 		mutex_init(&info->lock);
 		INIT_LIST_HEAD(&info->vm_list_head);
 		INIT_LIST_HEAD(&info->kfd_bo_list);
+		INIT_LIST_HEAD(&info->userptr_valid_list);
+		INIT_LIST_HEAD(&info->userptr_inval_list);
 
 		info->eviction_fence =
 			amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
@@ -830,6 +959,11 @@
 			goto create_evict_fence_fail;
 		}
 
+		info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+		atomic_set(&info->evicted_bos, 0);
+		INIT_DELAYED_WORK(&info->restore_userptr_work,
+				  amdgpu_amdkfd_restore_userptr_worker);
+
 		*process_info = info;
 		*ef = dma_fence_get(&info->eviction_fence->base);
 	}
@@ -872,6 +1006,7 @@
 		dma_fence_put(*ef);
 		*ef = NULL;
 		*process_info = NULL;
+		put_pid(info->pid);
 create_evict_fence_fail:
 		mutex_destroy(&info->lock);
 		kfree(info);
@@ -967,8 +1102,12 @@
 	/* Release per-process resources when last compute VM is destroyed */
 	if (!process_info->n_vms) {
 		WARN_ON(!list_empty(&process_info->kfd_bo_list));
+		WARN_ON(!list_empty(&process_info->userptr_valid_list));
+		WARN_ON(!list_empty(&process_info->userptr_inval_list));
 
 		dma_fence_put(&process_info->eviction_fence->base);
+		cancel_delayed_work_sync(&process_info->restore_userptr_work);
+		put_pid(process_info->pid);
 		mutex_destroy(&process_info->lock);
 		kfree(process_info);
 	}
@@ -1003,9 +1142,11 @@
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+	uint64_t user_addr = 0;
 	struct amdgpu_bo *bo;
+	struct amdgpu_bo_param bp;
 	int byte_align;
-	u32 alloc_domain;
+	u32 domain, alloc_domain;
 	u64 alloc_flags;
 	uint32_t mapping_flags;
 	int ret;
@@ -1014,14 +1155,21 @@
 	 * Check on which domain to allocate BO
 	 */
 	if (flags & ALLOC_MEM_FLAGS_VRAM) {
-		alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+		domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
 		alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
 		alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
 			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
 			AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
 	} else if (flags & ALLOC_MEM_FLAGS_GTT) {
-		alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+		domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
 		alloc_flags = 0;
+	} else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
+		domain = AMDGPU_GEM_DOMAIN_GTT;
+		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
+		alloc_flags = 0;
+		if (!offset || !*offset)
+			return -EINVAL;
+		user_addr = *offset;
 	} else {
 		return -EINVAL;
 	}
@@ -1069,8 +1217,14 @@
 	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
 			va, size, domain_string(alloc_domain));
 
-	ret = amdgpu_bo_create(adev, size, byte_align,
-				alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo);
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = byte_align;
+	bp.domain = alloc_domain;
+	bp.flags = alloc_flags;
+	bp.type = ttm_bo_type_device;
+	bp.resv = NULL;
+	ret = amdgpu_bo_create(adev, &bp, &bo);
 	if (ret) {
 		pr_debug("Failed to create BO on domain %s. ret %d\n",
 				domain_string(alloc_domain), ret);
@@ -1078,18 +1232,34 @@
 	}
 	bo->kfd_bo = *mem;
 	(*mem)->bo = bo;
+	if (user_addr)
+		bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
 
 	(*mem)->va = va;
-	(*mem)->domain = alloc_domain;
+	(*mem)->domain = domain;
 	(*mem)->mapped_to_gpu_memory = 0;
 	(*mem)->process_info = avm->process_info;
-	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
+	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
+
+	if (user_addr) {
+		ret = init_user_pages(*mem, current->mm, user_addr);
+		if (ret) {
+			mutex_lock(&avm->process_info->lock);
+			list_del(&(*mem)->validate_list.head);
+			mutex_unlock(&avm->process_info->lock);
+			goto allocate_init_user_pages_failed;
+		}
+	}
 
 	if (offset)
 		*offset = amdgpu_bo_mmap_offset(bo);
 
 	return 0;
 
+allocate_init_user_pages_failed:
+	amdgpu_bo_unref(&bo);
+	/* Don't unreserve system mem limit twice */
+	goto err_reserve_system_mem;
 err_bo_create:
 	unreserve_system_mem_limit(adev, size, alloc_domain);
 err_reserve_system_mem:
@@ -1122,12 +1292,24 @@
 	 * be freed anyway
 	 */
 
+	/* No more MMU notifiers */
+	amdgpu_mn_unregister(mem->bo);
+
 	/* Make sure restore workers don't access the BO any more */
 	bo_list_entry = &mem->validate_list;
 	mutex_lock(&process_info->lock);
 	list_del(&bo_list_entry->head);
 	mutex_unlock(&process_info->lock);
 
+	/* Free user pages if necessary */
+	if (mem->user_pages) {
+		pr_debug("%s: Freeing user_pages array\n", __func__);
+		if (mem->user_pages[0])
+			release_pages(mem->user_pages,
+					mem->bo->tbo.ttm->num_pages);
+		kvfree(mem->user_pages);
+	}
+
 	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
 	if (unlikely(ret))
 		return ret;
@@ -1173,21 +1355,32 @@
 	struct kfd_bo_va_list *bo_va_entry = NULL;
 	struct kfd_bo_va_list *bo_va_entry_aql = NULL;
 	unsigned long bo_size;
+	bool is_invalid_userptr = false;
 
-	/* Make sure restore is not running concurrently.
+	bo = mem->bo;
+	if (!bo) {
+		pr_err("Invalid BO when mapping memory to GPU\n");
+		return -EINVAL;
+	}
+
+	/* Make sure restore is not running concurrently. Since we
+	 * don't map invalid userptr BOs, we rely on the next restore
+	 * worker to do the mapping
 	 */
 	mutex_lock(&mem->process_info->lock);
 
-	mutex_lock(&mem->lock);
-
-	bo = mem->bo;
-
-	if (!bo) {
-		pr_err("Invalid BO when mapping memory to GPU\n");
-		ret = -EINVAL;
-		goto out;
+	/* Lock mmap-sem. If we find an invalid userptr BO, we can be
+	 * sure that the MMU notifier is no longer running
+	 * concurrently and the queues are actually stopped
+	 */
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+		down_write(&current->mm->mmap_sem);
+		is_invalid_userptr = atomic_read(&mem->invalid);
+		up_write(&current->mm->mmap_sem);
 	}
 
+	mutex_lock(&mem->lock);
+
 	domain = mem->domain;
 	bo_size = bo->tbo.mem.size;
 
@@ -1200,6 +1393,14 @@
 	if (unlikely(ret))
 		goto out;
 
+	/* Userptr can be marked as "not invalid", but not actually be
+	 * validated yet (still in the system domain). In that case
+	 * the queues are still stopped and we can leave mapping for
+	 * the next restore worker
+	 */
+	if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+		is_invalid_userptr = true;
+
 	if (check_if_add_bo_to_vm(avm, mem)) {
 		ret = add_bo_to_vm(adev, mem, avm, false,
 				&bo_va_entry);
@@ -1217,7 +1418,8 @@
 			goto add_bo_to_vm_failed;
 	}
 
-	if (mem->mapped_to_gpu_memory == 0) {
+	if (mem->mapped_to_gpu_memory == 0 &&
+	    !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
 		/* Validate BO only once. The eviction fence gets added to BO
 		 * the first time it is mapped. Validate will wait for all
 		 * background evictions to complete.
@@ -1235,7 +1437,8 @@
 					entry->va, entry->va + bo_size,
 					entry);
 
-			ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
+			ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
+					      is_invalid_userptr);
 			if (ret) {
 				pr_err("Failed to map radeon bo to gpuvm\n");
 				goto map_bo_to_gpuvm_failed;
@@ -1418,6 +1621,337 @@
 	return ret;
 }
 
+/* Evict a userptr BO by stopping the queues if necessary
+ *
+ * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
+ * cannot do any memory allocations, and cannot take any locks that
+ * are held elsewhere while allocating memory. Therefore this is as
+ * simple as possible, using atomic counters.
+ *
+ * It doesn't do anything to the BO itself. The real work happens in
+ * restore, where we get updated page addresses. This function only
+ * ensures that GPU access to the BO is stopped.
+ */
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
+				struct mm_struct *mm)
+{
+	struct amdkfd_process_info *process_info = mem->process_info;
+	int invalid, evicted_bos;
+	int r = 0;
+
+	invalid = atomic_inc_return(&mem->invalid);
+	evicted_bos = atomic_inc_return(&process_info->evicted_bos);
+	if (evicted_bos == 1) {
+		/* First eviction, stop the queues */
+		r = kgd2kfd->quiesce_mm(mm);
+		if (r)
+			pr_err("Failed to quiesce KFD\n");
+		schedule_delayed_work(&process_info->restore_userptr_work,
+			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+	}
+
+	return r;
+}
+
+/* Update invalid userptr BOs
+ *
+ * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
+ * userptr_inval_list and updates user pages for all BOs that have
+ * been invalidated since their last update.
+ */
+static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
+				     struct mm_struct *mm)
+{
+	struct kgd_mem *mem, *tmp_mem;
+	struct amdgpu_bo *bo;
+	struct ttm_operation_ctx ctx = { false, false };
+	int invalid, ret;
+
+	/* Move all invalidated BOs to the userptr_inval_list and
+	 * release their user pages by migration to the CPU domain
+	 */
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_valid_list,
+				 validate_list.head) {
+		if (!atomic_read(&mem->invalid))
+			continue; /* BO is still valid */
+
+		bo = mem->bo;
+
+		if (amdgpu_bo_reserve(bo, true))
+			return -EAGAIN;
+		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+		amdgpu_bo_unreserve(bo);
+		if (ret) {
+			pr_err("%s: Failed to invalidate userptr BO\n",
+			       __func__);
+			return -EAGAIN;
+		}
+
+		list_move_tail(&mem->validate_list.head,
+			       &process_info->userptr_inval_list);
+	}
+
+	if (list_empty(&process_info->userptr_inval_list))
+		return 0; /* All evicted userptr BOs were freed */
+
+	/* Go through userptr_inval_list and update any invalid user_pages */
+	list_for_each_entry(mem, &process_info->userptr_inval_list,
+			    validate_list.head) {
+		invalid = atomic_read(&mem->invalid);
+		if (!invalid)
+			/* BO hasn't been invalidated since the last
+			 * revalidation attempt. Keep its BO list.
+			 */
+			continue;
+
+		bo = mem->bo;
+
+		if (!mem->user_pages) {
+			mem->user_pages =
+				kvmalloc_array(bo->tbo.ttm->num_pages,
+						 sizeof(struct page *),
+						 GFP_KERNEL | __GFP_ZERO);
+			if (!mem->user_pages) {
+				pr_err("%s: Failed to allocate pages array\n",
+				       __func__);
+				return -ENOMEM;
+			}
+		} else if (mem->user_pages[0]) {
+			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+		}
+
+		/* Get updated user pages */
+		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+						   mem->user_pages);
+		if (ret) {
+			mem->user_pages[0] = NULL;
+			pr_info("%s: Failed to get user pages: %d\n",
+				__func__, ret);
+			/* Pretend it succeeded. It will fail later
+			 * with a VM fault if the GPU tries to access
+			 * it. Better than hanging indefinitely with
+			 * stalled user mode queues.
+			 */
+		}
+
+		/* Mark the BO as valid unless it was invalidated
+		 * again concurrently
+		 */
+		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
+			return -EAGAIN;
+	}
+
+	return 0;
+}
+
+/* Validate invalid userptr BOs
+ *
+ * Validates BOs on the userptr_inval_list, and moves them back to the
+ * userptr_valid_list. Also updates GPUVM page tables with new page
+ * addresses and waits for the page table updates to complete.
+ */
+static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
+{
+	struct amdgpu_bo_list_entry *pd_bo_list_entries;
+	struct list_head resv_list, duplicates;
+	struct ww_acquire_ctx ticket;
+	struct amdgpu_sync sync;
+
+	struct amdgpu_vm *peer_vm;
+	struct kgd_mem *mem, *tmp_mem;
+	struct amdgpu_bo *bo;
+	struct ttm_operation_ctx ctx = { false, false };
+	int i, ret;
+
+	pd_bo_list_entries = kcalloc(process_info->n_vms,
+				     sizeof(struct amdgpu_bo_list_entry),
+				     GFP_KERNEL);
+	if (!pd_bo_list_entries) {
+		pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&resv_list);
+	INIT_LIST_HEAD(&duplicates);
+
+	/* Get all the page directory BOs that need to be reserved */
+	i = 0;
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node)
+		amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
+				    &pd_bo_list_entries[i++]);
+	/* Add the userptr_inval_list entries to resv_list */
+	list_for_each_entry(mem, &process_info->userptr_inval_list,
+			    validate_list.head) {
+		list_add_tail(&mem->resv_list.head, &resv_list);
+		mem->resv_list.bo = mem->validate_list.bo;
+		mem->resv_list.shared = mem->validate_list.shared;
+	}
+
+	/* Reserve all BOs and page tables for validation */
+	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
+	WARN(!list_empty(&duplicates), "Duplicates should be empty");
+	if (ret)
+		goto out;
+
+	amdgpu_sync_create(&sync);
+
+	/* Avoid triggering eviction fences when unmapping invalid
+	 * userptr BOs (waits for all fences, doesn't use
+	 * FENCE_OWNER_VM)
+	 */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node)
+		amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
+						process_info->eviction_fence,
+						NULL, NULL);
+
+	ret = process_validate_vms(process_info);
+	if (ret)
+		goto unreserve_out;
+
+	/* Validate BOs and update GPUVM page tables */
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_inval_list,
+				 validate_list.head) {
+		struct kfd_bo_va_list *bo_va_entry;
+
+		bo = mem->bo;
+
+		/* Copy pages array and validate the BO if we got user pages */
+		if (mem->user_pages[0]) {
+			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
+						     mem->user_pages);
+			amdgpu_ttm_placement_from_domain(bo, mem->domain);
+			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+			if (ret) {
+				pr_err("%s: failed to validate BO\n", __func__);
+				goto unreserve_out;
+			}
+		}
+
+		/* Validate succeeded, now the BO owns the pages, free
+		 * our copy of the pointer array. Put this BO back on
+		 * the userptr_valid_list. If we need to revalidate
+		 * it, we need to start from scratch.
+		 */
+		kvfree(mem->user_pages);
+		mem->user_pages = NULL;
+		list_move_tail(&mem->validate_list.head,
+			       &process_info->userptr_valid_list);
+
+		/* Update mapping. If the BO was not validated
+		 * (because we couldn't get user pages), this will
+		 * clear the page table entries, which will result in
+		 * VM faults if the GPU tries to access the invalid
+		 * memory.
+		 */
+		list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
+			if (!bo_va_entry->is_mapped)
+				continue;
+
+			ret = update_gpuvm_pte((struct amdgpu_device *)
+					       bo_va_entry->kgd_dev,
+					       bo_va_entry, &sync);
+			if (ret) {
+				pr_err("%s: update PTE failed\n", __func__);
+				/* make sure this gets validated again */
+				atomic_inc(&mem->invalid);
+				goto unreserve_out;
+			}
+		}
+	}
+
+	/* Update page directories */
+	ret = process_update_pds(process_info, &sync);
+
+unreserve_out:
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node)
+		amdgpu_bo_fence(peer_vm->root.base.bo,
+				&process_info->eviction_fence->base, true);
+	ttm_eu_backoff_reservation(&ticket, &resv_list);
+	amdgpu_sync_wait(&sync, false);
+	amdgpu_sync_free(&sync);
+out:
+	kfree(pd_bo_list_entries);
+
+	return ret;
+}
+
+/* Worker callback to restore evicted userptr BOs
+ *
+ * Tries to update and validate all userptr BOs. If successful and no
+ * concurrent evictions happened, the queues are restarted. Otherwise,
+ * reschedule for another attempt later.
+ */
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct amdkfd_process_info *process_info =
+		container_of(dwork, struct amdkfd_process_info,
+			     restore_userptr_work);
+	struct task_struct *usertask;
+	struct mm_struct *mm;
+	int evicted_bos;
+
+	evicted_bos = atomic_read(&process_info->evicted_bos);
+	if (!evicted_bos)
+		return;
+
+	/* Reference task and mm in case of concurrent process termination */
+	usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
+	if (!usertask)
+		return;
+	mm = get_task_mm(usertask);
+	if (!mm) {
+		put_task_struct(usertask);
+		return;
+	}
+
+	mutex_lock(&process_info->lock);
+
+	if (update_invalid_user_pages(process_info, mm))
+		goto unlock_out;
+	/* userptr_inval_list can be empty if all evicted userptr BOs
+	 * have been freed. In that case there is nothing to validate
+	 * and we can just restart the queues.
+	 */
+	if (!list_empty(&process_info->userptr_inval_list)) {
+		if (atomic_read(&process_info->evicted_bos) != evicted_bos)
+			goto unlock_out; /* Concurrent eviction, try again */
+
+		if (validate_invalid_user_pages(process_info))
+			goto unlock_out;
+	}
+	/* Final check for concurrent evicton and atomic update. If
+	 * another eviction happens after successful update, it will
+	 * be a first eviction that calls quiesce_mm. The eviction
+	 * reference counting inside KFD will handle this case.
+	 */
+	if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
+	    evicted_bos)
+		goto unlock_out;
+	evicted_bos = 0;
+	if (kgd2kfd->resume_mm(mm)) {
+		pr_err("%s: Failed to resume KFD\n", __func__);
+		/* No recovery from this failure. Probably the CP is
+		 * hanging. No point trying again.
+		 */
+	}
+unlock_out:
+	mutex_unlock(&process_info->lock);
+	mmput(mm);
+	put_task_struct(usertask);
+
+	/* If validation failed, reschedule another attempt */
+	if (evicted_bos)
+		schedule_delayed_work(&process_info->restore_userptr_work,
+			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+}
+
 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
  *   KFD process identified by process_info
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index a0f48cb..2369158 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -322,3 +322,47 @@
 
 	return ret;
 }
+
+union gfx_info {
+	struct  atom_gfx_info_v2_4 v24;
+};
+
+int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index;
+	uint8_t frev, crev;
+	uint16_t data_offset;
+
+	index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+					    gfx_info);
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset)) {
+		union gfx_info *gfx_info = (union gfx_info *)
+			(mode_info->atom_context->bios + data_offset);
+		switch (crev) {
+		case 4:
+			adev->gfx.config.max_shader_engines = gfx_info->v24.gc_num_se;
+			adev->gfx.config.max_cu_per_sh = gfx_info->v24.gc_num_cu_per_sh;
+			adev->gfx.config.max_sh_per_se = gfx_info->v24.gc_num_sh_per_se;
+			adev->gfx.config.max_backends_per_se = gfx_info->v24.gc_num_rb_per_se;
+			adev->gfx.config.max_texture_channel_caches = gfx_info->v24.gc_num_tccs;
+			adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
+			adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
+			adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
+			adev->gfx.config.gs_prim_buffer_depth =
+				le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
+			adev->gfx.config.double_offchip_lds_buf =
+				gfx_info->v24.gc_double_offchip_lds_buffer;
+			adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
+			adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
+			adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
+			adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
+			return 0;
+		default:
+			return -EINVAL;
+		}
+
+	}
+	return -EINVAL;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 7689c96..20f158f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -30,5 +30,6 @@
 int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
 int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 1ae5ae8..1bcb2b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -550,7 +550,7 @@
  * look up whether we are the integrated or discrete GPU (all asics).
  * Returns the client id.
  */
-static int amdgpu_atpx_get_client_id(struct pci_dev *pdev)
+static enum vga_switcheroo_client_id amdgpu_atpx_get_client_id(struct pci_dev *pdev)
 {
 	if (amdgpu_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
 		return VGA_SWITCHEROO_IGD;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 02b849b..19cfff3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -75,13 +75,20 @@
 {
 	struct amdgpu_bo *dobj = NULL;
 	struct amdgpu_bo *sobj = NULL;
+	struct amdgpu_bo_param bp;
 	uint64_t saddr, daddr;
 	int r, n;
 	int time;
 
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = sdomain;
+	bp.flags = 0;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
 	n = AMDGPU_BENCHMARK_ITERATIONS;
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0,
-			     ttm_bo_type_kernel, NULL, &sobj);
+	r = amdgpu_bo_create(adev, &bp, &sobj);
 	if (r) {
 		goto out_cleanup;
 	}
@@ -93,8 +100,8 @@
 	if (r) {
 		goto out_cleanup;
 	}
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0,
-			     ttm_bo_type_kernel, NULL, &dobj);
+	bp.domain = ddomain;
+	r = amdgpu_bo_create(adev, &bp, &dobj);
 	if (r) {
 		goto out_cleanup;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 71a57b2..e950730 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -23,7 +23,6 @@
  */
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <linux/pci.h>
 #include <drm/drmP.h>
 #include <linux/firmware.h>
 #include <drm/amdgpu_drm.h>
@@ -109,121 +108,6 @@
 	WARN(1, "Invalid indirect register space");
 }
 
-static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device,
-				       enum cgs_resource_type resource_type,
-				       uint64_t size,
-				       uint64_t offset,
-				       uint64_t *resource_base)
-{
-	CGS_FUNC_ADEV;
-
-	if (resource_base == NULL)
-		return -EINVAL;
-
-	switch (resource_type) {
-	case CGS_RESOURCE_TYPE_MMIO:
-		if (adev->rmmio_size == 0)
-			return -ENOENT;
-		if ((offset + size) > adev->rmmio_size)
-			return -EINVAL;
-		*resource_base = adev->rmmio_base;
-		return 0;
-	case CGS_RESOURCE_TYPE_DOORBELL:
-		if (adev->doorbell.size == 0)
-			return -ENOENT;
-		if ((offset + size) > adev->doorbell.size)
-			return -EINVAL;
-		*resource_base = adev->doorbell.base;
-		return 0;
-	case CGS_RESOURCE_TYPE_FB:
-	case CGS_RESOURCE_TYPE_IO:
-	case CGS_RESOURCE_TYPE_ROM:
-	default:
-		return -EINVAL;
-	}
-}
-
-static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device,
-						  unsigned table, uint16_t *size,
-						  uint8_t *frev, uint8_t *crev)
-{
-	CGS_FUNC_ADEV;
-	uint16_t data_start;
-
-	if (amdgpu_atom_parse_data_header(
-		    adev->mode_info.atom_context, table, size,
-		    frev, crev, &data_start))
-		return (uint8_t*)adev->mode_info.atom_context->bios +
-			data_start;
-
-	return NULL;
-}
-
-static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table,
-					      uint8_t *frev, uint8_t *crev)
-{
-	CGS_FUNC_ADEV;
-
-	if (amdgpu_atom_parse_cmd_header(
-		    adev->mode_info.atom_context, table,
-		    frev, crev))
-		return 0;
-
-	return -EINVAL;
-}
-
-static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table,
-					  void *args)
-{
-	CGS_FUNC_ADEV;
-
-	return amdgpu_atom_execute_table(
-		adev->mode_info.atom_context, table, args);
-}
-
-static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
-				  enum amd_ip_block_type block_type,
-				  enum amd_clockgating_state state)
-{
-	CGS_FUNC_ADEV;
-	int i, r = -1;
-
-	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_blocks[i].status.valid)
-			continue;
-
-		if (adev->ip_blocks[i].version->type == block_type) {
-			r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
-								(void *)adev,
-									state);
-			break;
-		}
-	}
-	return r;
-}
-
-static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device,
-				  enum amd_ip_block_type block_type,
-				  enum amd_powergating_state state)
-{
-	CGS_FUNC_ADEV;
-	int i, r = -1;
-
-	for (i = 0; i < adev->num_ip_blocks; i++) {
-		if (!adev->ip_blocks[i].status.valid)
-			continue;
-
-		if (adev->ip_blocks[i].version->type == block_type) {
-			r = adev->ip_blocks[i].version->funcs->set_powergating_state(
-								(void *)adev,
-									state);
-			break;
-		}
-	}
-	return r;
-}
-
-
 static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
 {
 	CGS_FUNC_ADEV;
@@ -271,18 +155,6 @@
 	return result;
 }
 
-static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type)
-{
-	CGS_FUNC_ADEV;
-	if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) {
-		release_firmware(adev->pm.fw);
-		adev->pm.fw = NULL;
-		return 0;
-	}
-	/* cannot release other firmware because they are not created by cgs */
-	return -EINVAL;
-}
-
 static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device,
 					enum cgs_ucode_id type)
 {
@@ -326,34 +198,6 @@
 	return fw_version;
 }
 
-static int amdgpu_cgs_enter_safe_mode(struct cgs_device *cgs_device,
-					bool en)
-{
-	CGS_FUNC_ADEV;
-
-	if (adev->gfx.rlc.funcs->enter_safe_mode == NULL ||
-		adev->gfx.rlc.funcs->exit_safe_mode == NULL)
-		return 0;
-
-	if (en)
-		adev->gfx.rlc.funcs->enter_safe_mode(adev);
-	else
-		adev->gfx.rlc.funcs->exit_safe_mode(adev);
-
-	return 0;
-}
-
-static void amdgpu_cgs_lock_grbm_idx(struct cgs_device *cgs_device,
-					bool lock)
-{
-	CGS_FUNC_ADEV;
-
-	if (lock)
-		mutex_lock(&adev->grbm_idx_mutex);
-	else
-		mutex_unlock(&adev->grbm_idx_mutex);
-}
-
 static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 					enum cgs_ucode_id type,
 					struct cgs_firmware_info *info)
@@ -541,6 +385,9 @@
 			case CHIP_POLARIS12:
 				strcpy(fw_name, "amdgpu/polaris12_smc.bin");
 				break;
+			case CHIP_VEGAM:
+				strcpy(fw_name, "amdgpu/vegam_smc.bin");
+				break;
 			case CHIP_VEGA10:
 				if ((adev->pdev->device == 0x687f) &&
 					((adev->pdev->revision == 0xc0) ||
@@ -553,6 +400,9 @@
 			case CHIP_VEGA12:
 				strcpy(fw_name, "amdgpu/vega12_smc.bin");
 				break;
+			case CHIP_VEGA20:
+				strcpy(fw_name, "amdgpu/vega20_smc.bin");
+				break;
 			default:
 				DRM_ERROR("SMC firmware not supported\n");
 				return -EINVAL;
@@ -598,97 +448,12 @@
 	return 0;
 }
 
-static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device)
-{
-	CGS_FUNC_ADEV;
-	return amdgpu_sriov_vf(adev);
-}
-
-static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
-					  struct cgs_display_info *info)
-{
-	CGS_FUNC_ADEV;
-	struct cgs_mode_info *mode_info;
-
-	if (info == NULL)
-		return -EINVAL;
-
-	mode_info = info->mode_info;
-	if (mode_info)
-		/* if the displays are off, vblank time is max */
-		mode_info->vblank_time_us = 0xffffffff;
-
-	if (!amdgpu_device_has_dc_support(adev)) {
-		struct amdgpu_crtc *amdgpu_crtc;
-		struct drm_device *ddev = adev->ddev;
-		struct drm_crtc *crtc;
-		uint32_t line_time_us, vblank_lines;
-
-		if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
-			list_for_each_entry(crtc,
-					&ddev->mode_config.crtc_list, head) {
-				amdgpu_crtc = to_amdgpu_crtc(crtc);
-				if (crtc->enabled) {
-					info->active_display_mask |= (1 << amdgpu_crtc->crtc_id);
-					info->display_count++;
-				}
-				if (mode_info != NULL &&
-					crtc->enabled && amdgpu_crtc->enabled &&
-					amdgpu_crtc->hw_mode.clock) {
-					line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) /
-								amdgpu_crtc->hw_mode.clock;
-					vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end -
-								amdgpu_crtc->hw_mode.crtc_vdisplay +
-								(amdgpu_crtc->v_border * 2);
-					mode_info->vblank_time_us = vblank_lines * line_time_us;
-					mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
-					/* we have issues with mclk switching with refresh rates
-					 * over 120 hz on the non-DC code.
-					 */
-					if (mode_info->refresh_rate > 120)
-						mode_info->vblank_time_us = 0;
-					mode_info = NULL;
-				}
-			}
-		}
-	} else {
-		info->display_count = adev->pm.pm_display_cfg.num_display;
-		if (mode_info != NULL) {
-			mode_info->vblank_time_us = adev->pm.pm_display_cfg.min_vblank_time;
-			mode_info->refresh_rate = adev->pm.pm_display_cfg.vrefresh;
-		}
-	}
-	return 0;
-}
-
-
-static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled)
-{
-	CGS_FUNC_ADEV;
-
-	adev->pm.dpm_enabled = enabled;
-
-	return 0;
-}
-
 static const struct cgs_ops amdgpu_cgs_ops = {
 	.read_register = amdgpu_cgs_read_register,
 	.write_register = amdgpu_cgs_write_register,
 	.read_ind_register = amdgpu_cgs_read_ind_register,
 	.write_ind_register = amdgpu_cgs_write_ind_register,
-	.get_pci_resource = amdgpu_cgs_get_pci_resource,
-	.atom_get_data_table = amdgpu_cgs_atom_get_data_table,
-	.atom_get_cmd_table_revs = amdgpu_cgs_atom_get_cmd_table_revs,
-	.atom_exec_cmd_table = amdgpu_cgs_atom_exec_cmd_table,
 	.get_firmware_info = amdgpu_cgs_get_firmware_info,
-	.rel_firmware = amdgpu_cgs_rel_firmware,
-	.set_powergating_state = amdgpu_cgs_set_powergating_state,
-	.set_clockgating_state = amdgpu_cgs_set_clockgating_state,
-	.get_active_displays_info = amdgpu_cgs_get_active_displays_info,
-	.notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled,
-	.is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
-	.enter_safe_mode = amdgpu_cgs_enter_safe_mode,
-	.lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
 };
 
 struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 96501ff..8e66851 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -691,7 +691,7 @@
 	return ret;
 }
 
-static int amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector,
 					     struct drm_display_mode *mode)
 {
 	struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
@@ -843,7 +843,7 @@
 	return ret;
 }
 
-static int amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector,
 					    struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
@@ -1172,7 +1172,7 @@
 		amdgpu_connector->use_digital = true;
 }
 
-static int amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
 					    struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
@@ -1448,7 +1448,7 @@
 	return ret;
 }
 
-static int amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector,
 					   struct drm_display_mode *mode)
 {
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dc34b50..9c1d491 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -382,8 +382,7 @@
 
 	p->bytes_moved += ctx.bytes_moved;
 	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-	    bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
+	    amdgpu_bo_in_cpu_visible_vram(bo))
 		p->bytes_moved_vis += ctx.bytes_moved;
 
 	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -411,7 +410,6 @@
 		struct amdgpu_bo_list_entry *candidate = p->evictable;
 		struct amdgpu_bo *bo = candidate->robj;
 		struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-		u64 initial_bytes_moved, bytes_moved;
 		bool update_bytes_moved_vis;
 		uint32_t other;
 
@@ -435,18 +433,14 @@
 			continue;
 
 		/* Good we can try to move this BO somewhere else */
-		amdgpu_ttm_placement_from_domain(bo, other);
 		update_bytes_moved_vis =
 			adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-			bo->tbo.mem.mem_type == TTM_PL_VRAM &&
-			bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT;
-		initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
+			amdgpu_bo_in_cpu_visible_vram(bo);
+		amdgpu_ttm_placement_from_domain(bo, other);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-		bytes_moved = atomic64_read(&adev->num_bytes_moved) -
-			initial_bytes_moved;
-		p->bytes_moved += bytes_moved;
+		p->bytes_moved += ctx.bytes_moved;
 		if (update_bytes_moved_vis)
-			p->bytes_moved_vis += bytes_moved;
+			p->bytes_moved_vis += ctx.bytes_moved;
 
 		if (unlikely(r))
 			break;
@@ -536,7 +530,7 @@
 	if (p->bo_list) {
 		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 		if (p->bo_list->first_userptr != p->bo_list->num_entries)
-			p->mn = amdgpu_mn_get(p->adev);
+			p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
 	}
 
 	INIT_LIST_HEAD(&duplicates);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 3fabf9f9..c5bb362 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -91,7 +91,7 @@
 			continue;
 
 		r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
-					  rq, amdgpu_sched_jobs, &ctx->guilty);
+					  rq, &ctx->guilty);
 		if (r)
 			goto failed;
 	}
@@ -111,8 +111,9 @@
 	return r;
 }
 
-static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
+static void amdgpu_ctx_fini(struct kref *ref)
 {
+	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned i, j;
 
@@ -125,13 +126,11 @@
 	kfree(ctx->fences);
 	ctx->fences = NULL;
 
-	for (i = 0; i < adev->num_rings; i++)
-		drm_sched_entity_fini(&adev->rings[i]->sched,
-				      &ctx->rings[i].entity);
-
 	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
 
 	mutex_destroy(&ctx->lock);
+
+	kfree(ctx);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -170,12 +169,20 @@
 static void amdgpu_ctx_do_release(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx;
+	u32 i;
 
 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
 
-	amdgpu_ctx_fini(ctx);
+	for (i = 0; i < ctx->adev->num_rings; i++) {
 
-	kfree(ctx);
+		if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+			continue;
+
+		drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
+			&ctx->rings[i].entity);
+	}
+
+	amdgpu_ctx_fini(ref);
 }
 
 static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
@@ -437,16 +444,72 @@
 	idr_init(&mgr->ctx_handles);
 }
 
+void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
+{
+	struct amdgpu_ctx *ctx;
+	struct idr *idp;
+	uint32_t id, i;
+
+	idp = &mgr->ctx_handles;
+
+	idr_for_each_entry(idp, ctx, id) {
+
+		if (!ctx->adev)
+			return;
+
+		for (i = 0; i < ctx->adev->num_rings; i++) {
+
+			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+				continue;
+
+			if (kref_read(&ctx->refcount) == 1)
+				drm_sched_entity_do_release(&ctx->adev->rings[i]->sched,
+						  &ctx->rings[i].entity);
+			else
+				DRM_ERROR("ctx %p is still alive\n", ctx);
+		}
+	}
+}
+
+void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr)
+{
+	struct amdgpu_ctx *ctx;
+	struct idr *idp;
+	uint32_t id, i;
+
+	idp = &mgr->ctx_handles;
+
+	idr_for_each_entry(idp, ctx, id) {
+
+		if (!ctx->adev)
+			return;
+
+		for (i = 0; i < ctx->adev->num_rings; i++) {
+
+			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+				continue;
+
+			if (kref_read(&ctx->refcount) == 1)
+				drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched,
+					&ctx->rings[i].entity);
+			else
+				DRM_ERROR("ctx %p is still alive\n", ctx);
+		}
+	}
+}
+
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 {
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
 	uint32_t id;
 
+	amdgpu_ctx_mgr_entity_cleanup(mgr);
+
 	idp = &mgr->ctx_handles;
 
 	idr_for_each_entry(idp, ctx, id) {
-		if (kref_put(&ctx->refcount, amdgpu_ctx_do_release) != 1)
+		if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
 			DRM_ERROR("ctx %p is still alive\n", ctx);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 448d69f..f5fb937 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -28,8 +28,13 @@
 #include <linux/debugfs.h>
 #include "amdgpu.h"
 
-/*
- * Debugfs
+/**
+ * amdgpu_debugfs_add_files - Add simple debugfs entries
+ *
+ * @adev:  Device to attach debugfs entries to
+ * @files:  Array of function callbacks that respond to reads
+ * @nfiles: Number of callbacks to register
+ *
  */
 int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
 			     const struct drm_info_list *files,
@@ -64,7 +69,33 @@
 
 #if defined(CONFIG_DEBUG_FS)
 
-
+/**
+ * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
+ *
+ * @read: True if reading
+ * @f: open file handle
+ * @buf: User buffer to write/read to
+ * @size: Number of bytes to write/read
+ * @pos:  Offset to seek to
+ *
+ * This debugfs entry has special meaning on the offset being sought.
+ * Various bits have different meanings:
+ *
+ * Bit 62:  Indicates a GRBM bank switch is needed
+ * Bit 61:  Indicates a SRBM bank switch is needed (implies bit 62 is
+ * 			zero)
+ * Bits 24..33: The SE or ME selector if needed
+ * Bits 34..43: The SH (or SA) or PIPE selector if needed
+ * Bits 44..53: The INSTANCE (or CU/WGP) or QUEUE selector if needed
+ *
+ * Bit 23:  Indicates that the PM power gating lock should be held
+ * 			This is necessary to read registers that might be
+ * 			unreliable during a power gating transistion.
+ *
+ * The lower bits are the BYTE offset of the register to read.  This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 		char __user *buf, size_t size, loff_t *pos)
 {
@@ -164,19 +195,37 @@
 	return result;
 }
 
-
+/**
+ * amdgpu_debugfs_regs_read - Callback for reading MMIO registers
+ */
 static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 {
 	return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
 }
 
+/**
+ * amdgpu_debugfs_regs_write - Callback for writing MMIO registers
+ */
 static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
 					 size_t size, loff_t *pos)
 {
 	return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
 }
 
+
+/**
+ * amdgpu_debugfs_regs_pcie_read - Read from a PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read.  This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 {
@@ -204,6 +253,18 @@
 	return result;
 }
 
+/**
+ * amdgpu_debugfs_regs_pcie_write - Write to a PCIE register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write.  This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf,
 					 size_t size, loff_t *pos)
 {
@@ -232,6 +293,18 @@
 	return result;
 }
 
+/**
+ * amdgpu_debugfs_regs_didt_read - Read from a DIDT register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read.  This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 {
@@ -259,6 +332,18 @@
 	return result;
 }
 
+/**
+ * amdgpu_debugfs_regs_didt_write - Write to a DIDT register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write.  This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf,
 					 size_t size, loff_t *pos)
 {
@@ -287,6 +372,18 @@
 	return result;
 }
 
+/**
+ * amdgpu_debugfs_regs_smc_read - Read from a SMC register
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to read.  This
+ * allows reading multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 {
@@ -314,6 +411,18 @@
 	return result;
 }
 
+/**
+ * amdgpu_debugfs_regs_smc_write - Write to a SMC register
+ *
+ * @f: open file handle
+ * @buf: User buffer to write data from
+ * @size: Number of bytes to write
+ * @pos:  Offset to seek to
+ *
+ * The lower bits are the BYTE offset of the register to write.  This
+ * allows writing multiple registers in a single call and having
+ * the returned size reflect that.
+ */
 static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf,
 					 size_t size, loff_t *pos)
 {
@@ -342,6 +451,20 @@
 	return result;
 }
 
+/**
+ * amdgpu_debugfs_gca_config_read - Read from gfx config data
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * This file is used to access configuration data in a somewhat
+ * stable fashion.  The format is a series of DWORDs with the first
+ * indicating which revision it is.  New content is appended to the
+ * end so that older software can still read the data.
+ */
+
 static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 {
@@ -418,6 +541,19 @@
 	return result;
 }
 
+/**
+ * amdgpu_debugfs_sensor_read - Read from the powerplay sensors
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The offset is treated as the BYTE address of one of the sensors
+ * enumerated in amd/include/kgd_pp_interface.h under the
+ * 'amd_pp_sensors' enumeration.  For instance to read the UVD VCLK
+ * you would use the offset 3 * 4 = 12.
+ */
 static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 {
@@ -428,7 +564,7 @@
 	if (size & 3 || *pos & 0x3)
 		return -EINVAL;
 
-	if (amdgpu_dpm == 0)
+	if (!adev->pm.dpm_enabled)
 		return -EINVAL;
 
 	/* convert offset to sensor number */
@@ -457,6 +593,27 @@
 	return !r ? outsize : r;
 }
 
+/** amdgpu_debugfs_wave_read - Read WAVE STATUS data
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The offset being sought changes which wave that the status data
+ * will be returned for.  The bits are used as follows:
+ *
+ * Bits 0..6: 	Byte offset into data
+ * Bits 7..14:	SE selector
+ * Bits 15..22:	SH/SA selector
+ * Bits 23..30: CU/{WGP+SIMD} selector
+ * Bits 31..36: WAVE ID selector
+ * Bits 37..44: SIMD ID selector
+ *
+ * The returned data begins with one DWORD of version information
+ * Followed by WAVE STATUS registers relevant to the GFX IP version
+ * being used.  See gfx_v8_0_read_wave_data() for an example output.
+ */
 static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 {
@@ -507,6 +664,28 @@
 	return result;
 }
 
+/** amdgpu_debugfs_gpr_read - Read wave gprs
+ *
+ * @f: open file handle
+ * @buf: User buffer to store read data in
+ * @size: Number of bytes to read
+ * @pos:  Offset to seek to
+ *
+ * The offset being sought changes which wave that the status data
+ * will be returned for.  The bits are used as follows:
+ *
+ * Bits 0..11:	Byte offset into data
+ * Bits 12..19:	SE selector
+ * Bits 20..27:	SH/SA selector
+ * Bits 28..35: CU/{WGP+SIMD} selector
+ * Bits 36..43: WAVE ID selector
+ * Bits 37..44: SIMD ID selector
+ * Bits 52..59: Thread selector
+ * Bits 60..61: Bank selector (VGPR=0,SGPR=1)
+ *
+ * The return data comes from the SGPR or VGPR register bank for
+ * the selected operational unit.
+ */
 static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 {
@@ -637,6 +816,12 @@
 	"amdgpu_gpr",
 };
 
+/**
+ * amdgpu_debugfs_regs_init -	Initialize debugfs entries that provide
+ * 								register access.
+ *
+ * @adev: The device to attach the debugfs entries to
+ */
 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
 {
 	struct drm_minor *minor = adev->ddev->primary;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 34af664..290e279 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -83,8 +83,10 @@
 	"POLARIS10",
 	"POLARIS11",
 	"POLARIS12",
+	"VEGAM",
 	"VEGA10",
 	"VEGA12",
+	"VEGA20",
 	"RAVEN",
 	"LAST",
 };
@@ -690,6 +692,8 @@
 {
 	u64 size_af, size_bf;
 
+	mc->gart_size += adev->pm.smu_prv_buffer_size;
+
 	size_af = adev->gmc.mc_mask - mc->vram_end;
 	size_bf = mc->vram_start;
 	if (size_bf > size_af) {
@@ -907,6 +911,46 @@
 	}
 }
 
+static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
+{
+	struct sysinfo si;
+	bool is_os_64 = (sizeof(void *) == 8) ? true : false;
+	uint64_t total_memory;
+	uint64_t dram_size_seven_GB = 0x1B8000000;
+	uint64_t dram_size_three_GB = 0xB8000000;
+
+	if (amdgpu_smu_memory_pool_size == 0)
+		return;
+
+	if (!is_os_64) {
+		DRM_WARN("Not 64-bit OS, feature not supported\n");
+		goto def_value;
+	}
+	si_meminfo(&si);
+	total_memory = (uint64_t)si.totalram * si.mem_unit;
+
+	if ((amdgpu_smu_memory_pool_size == 1) ||
+		(amdgpu_smu_memory_pool_size == 2)) {
+		if (total_memory < dram_size_three_GB)
+			goto def_value1;
+	} else if ((amdgpu_smu_memory_pool_size == 4) ||
+		(amdgpu_smu_memory_pool_size == 8)) {
+		if (total_memory < dram_size_seven_GB)
+			goto def_value1;
+	} else {
+		DRM_WARN("Smu memory pool size not supported\n");
+		goto def_value;
+	}
+	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
+
+	return;
+
+def_value1:
+	DRM_WARN("No enough system memory\n");
+def_value:
+	adev->pm.smu_prv_buffer_size = 0;
+}
+
 /**
  * amdgpu_device_check_arguments - validate module params
  *
@@ -948,6 +992,8 @@
 		amdgpu_vm_fragment_size = -1;
 	}
 
+	amdgpu_device_check_smu_prv_buffer_size(adev);
+
 	amdgpu_device_check_vm_size(adev);
 
 	amdgpu_device_check_block_size(adev);
@@ -1039,10 +1085,11 @@
  * the hardware IP specified.
  * Returns the error code from the last instance.
  */
-int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_clockgating_state(void *dev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_clockgating_state state)
 {
+	struct amdgpu_device *adev = dev;
 	int i, r = 0;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1072,10 +1119,11 @@
  * the hardware IP specified.
  * Returns the error code from the last instance.
  */
-int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
+int amdgpu_device_ip_set_powergating_state(void *dev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_powergating_state state)
 {
+	struct amdgpu_device *adev = dev;
 	int i, r = 0;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
@@ -1320,9 +1368,10 @@
 	case CHIP_TOPAZ:
 	case CHIP_TONGA:
 	case CHIP_FIJI:
-	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 #ifdef CONFIG_DRM_AMDGPU_SI
@@ -1339,6 +1388,7 @@
 	case CHIP_KABINI:
 	case CHIP_MULLINS:
 #endif
+	case CHIP_VEGA20:
 	default:
 		return 0;
 	case CHIP_VEGA10:
@@ -1428,9 +1478,10 @@
 	case CHIP_TOPAZ:
 	case CHIP_TONGA:
 	case CHIP_FIJI:
-	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 		if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY)
@@ -1472,6 +1523,7 @@
 #endif
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 		if (adev->asic_type == CHIP_RAVEN)
 			adev->family = AMDGPU_FAMILY_RV;
@@ -1499,6 +1551,8 @@
 			return -EAGAIN;
 	}
 
+	adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
 			DRM_ERROR("disabled ip block: %d <%s>\n",
@@ -1654,12 +1708,17 @@
 	if (amdgpu_emu_mode == 1)
 		return 0;
 
+	r = amdgpu_ib_ring_tests(adev);
+	if (r)
+		DRM_ERROR("ib ring test failed (%d).\n", r);
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
 		/* skip CG for VCE/UVD, it's handled specially */
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
 		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			/* enable clockgating to save power */
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1704,8 +1763,8 @@
 		}
 	}
 
-	mod_delayed_work(system_wq, &adev->late_init_work,
-			msecs_to_jiffies(AMDGPU_RESUME_MS));
+	queue_delayed_work(system_wq, &adev->late_init_work,
+			   msecs_to_jiffies(AMDGPU_RESUME_MS));
 
 	amdgpu_device_fill_reset_magic(adev);
 
@@ -1759,6 +1818,7 @@
 
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
 			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+			adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
 			adev->ip_blocks[i].version->funcs->set_clockgating_state) {
 			/* ungate blocks before hw fini so that we can shutdown the blocks safely */
 			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
@@ -1850,6 +1910,12 @@
 	if (amdgpu_sriov_vf(adev))
 		amdgpu_virt_request_full_gpu(adev, false);
 
+	/* ungate SMC block powergating */
+	if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
+		amdgpu_device_ip_set_powergating_state(adev,
+						       AMD_IP_BLOCK_TYPE_SMC,
+						       AMD_CG_STATE_UNGATE);
+
 	/* ungate SMC block first */
 	r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
 						   AMD_CG_STATE_UNGATE);
@@ -2086,16 +2152,15 @@
 	case CHIP_MULLINS:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
-	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 	case CHIP_TONGA:
 	case CHIP_FIJI:
-#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA)
-		return amdgpu_dc != 0;
-#endif
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case CHIP_RAVEN:
 #endif
@@ -2375,10 +2440,6 @@
 		goto failed;
 	}
 
-	r = amdgpu_ib_ring_tests(adev);
-	if (r)
-		DRM_ERROR("ib ring test failed (%d).\n", r);
-
 	if (amdgpu_sriov_vf(adev))
 		amdgpu_virt_init_data_exchange(adev);
 
@@ -2539,7 +2600,7 @@
 	/* unpin the front buffers and cursors */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-		struct amdgpu_framebuffer *rfb = to_amdgpu_framebuffer(crtc->primary->fb);
+		struct drm_framebuffer *fb = crtc->primary->fb;
 		struct amdgpu_bo *robj;
 
 		if (amdgpu_crtc->cursor_bo) {
@@ -2551,10 +2612,10 @@
 			}
 		}
 
-		if (rfb == NULL || rfb->obj == NULL) {
+		if (fb == NULL || fb->obj[0] == NULL) {
 			continue;
 		}
-		robj = gem_to_amdgpu_bo(rfb->obj);
+		robj = gem_to_amdgpu_bo(fb->obj[0]);
 		/* don't unpin kernel fb objects */
 		if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
 			r = amdgpu_bo_reserve(robj, true);
@@ -2640,11 +2701,6 @@
 	}
 	amdgpu_fence_driver_resume(adev);
 
-	if (resume) {
-		r = amdgpu_ib_ring_tests(adev);
-		if (r)
-			DRM_ERROR("ib ring test failed (%d).\n", r);
-	}
 
 	r = amdgpu_device_ip_late_init(adev);
 	if (r)
@@ -2736,6 +2792,9 @@
 	if (amdgpu_sriov_vf(adev))
 		return true;
 
+	if (amdgpu_asic_need_full_reset(adev))
+		return true;
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
@@ -2792,6 +2851,9 @@
 {
 	int i;
 
+	if (amdgpu_asic_need_full_reset(adev))
+		return true;
+
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
@@ -3087,20 +3149,19 @@
 
 	/* now we are okay to resume SMC/CP/SDMA */
 	r = amdgpu_device_ip_reinit_late_sriov(adev);
-	amdgpu_virt_release_full_gpu(adev, true);
 	if (r)
 		goto error;
 
 	amdgpu_irq_gpu_reset_resume_helper(adev);
 	r = amdgpu_ib_ring_tests(adev);
 
+error:
+	amdgpu_virt_release_full_gpu(adev, true);
 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
 		atomic_inc(&adev->vram_lost_counter);
 		r = amdgpu_device_handle_vram_lost(adev);
 	}
 
-error:
-
 	return r;
 }
 
@@ -3117,7 +3178,6 @@
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 			      struct amdgpu_job *job, bool force)
 {
-	struct drm_atomic_state *state = NULL;
 	int i, r, resched;
 
 	if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
@@ -3140,10 +3200,6 @@
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
 
-	/* store modesetting */
-	if (amdgpu_device_has_dc_support(adev))
-		state = drm_atomic_helper_suspend(adev->ddev);
-
 	/* block all schedulers and reset given job's ring */
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
@@ -3183,10 +3239,7 @@
 		kthread_unpark(ring->sched.thread);
 	}
 
-	if (amdgpu_device_has_dc_support(adev)) {
-		if (drm_atomic_helper_resume(adev->ddev, state))
-			dev_info(adev->dev, "drm resume failed:%d\n", r);
-	} else {
+	if (!amdgpu_device_has_dc_support(adev)) {
 		drm_helper_resume_force_mode(adev->ddev);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 93f700a..76ee8e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -35,6 +35,7 @@
 #include <linux/pm_runtime.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_fb_helper.h>
 
 static void amdgpu_display_flip_callback(struct dma_fence *f,
@@ -151,8 +152,6 @@
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-	struct amdgpu_framebuffer *old_amdgpu_fb;
-	struct amdgpu_framebuffer *new_amdgpu_fb;
 	struct drm_gem_object *obj;
 	struct amdgpu_flip_work *work;
 	struct amdgpu_bo *new_abo;
@@ -174,15 +173,13 @@
 	work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
 
 	/* schedule unpin of the old buffer */
-	old_amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
-	obj = old_amdgpu_fb->obj;
+	obj = crtc->primary->fb->obj[0];
 
 	/* take a reference to the old object */
 	work->old_abo = gem_to_amdgpu_bo(obj);
 	amdgpu_bo_ref(work->old_abo);
 
-	new_amdgpu_fb = to_amdgpu_framebuffer(fb);
-	obj = new_amdgpu_fb->obj;
+	obj = fb->obj[0];
 	new_abo = gem_to_amdgpu_bo(obj);
 
 	/* pin the new buffer */
@@ -192,7 +189,7 @@
 		goto cleanup;
 	}
 
-	r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base);
+	r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev), &base);
 	if (unlikely(r != 0)) {
 		DRM_ERROR("failed to pin new abo buffer before flip\n");
 		goto unreserve;
@@ -482,31 +479,12 @@
 	return true;
 }
 
-static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb)
-{
-	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
-
-	drm_gem_object_put_unlocked(amdgpu_fb->obj);
-	drm_framebuffer_cleanup(fb);
-	kfree(amdgpu_fb);
-}
-
-static int amdgpu_display_user_framebuffer_create_handle(
-			struct drm_framebuffer *fb,
-			struct drm_file *file_priv,
-			unsigned int *handle)
-{
-	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
-
-	return drm_gem_handle_create(file_priv, amdgpu_fb->obj, handle);
-}
-
 static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
-	.destroy = amdgpu_display_user_framebuffer_destroy,
-	.create_handle = amdgpu_display_user_framebuffer_create_handle,
+	.destroy = drm_gem_fb_destroy,
+	.create_handle = drm_gem_fb_create_handle,
 };
 
-uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev)
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev)
 {
 	uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
 
@@ -526,11 +504,11 @@
 				    struct drm_gem_object *obj)
 {
 	int ret;
-	rfb->obj = obj;
+	rfb->base.obj[0] = obj;
 	drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
 	ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
 	if (ret) {
-		rfb->obj = NULL;
+		rfb->base.obj[0] = NULL;
 		return ret;
 	}
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 2b11d80..f66e3e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -23,7 +23,7 @@
 #ifndef __AMDGPU_DISPLAY_H__
 #define __AMDGPU_DISPLAY_H__
 
-uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev);
+uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev);
 struct drm_framebuffer *
 amdgpu_display_user_framebuffer_create(struct drm_device *dev,
 				       struct drm_file *file_priv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
index e997ebbe43..def1010 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -115,6 +115,26 @@
 	pr_cont("\n");
 }
 
+void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev)
+{
+	struct drm_device *ddev = adev->ddev;
+	struct drm_crtc *crtc;
+	struct amdgpu_crtc *amdgpu_crtc;
+
+	adev->pm.dpm.new_active_crtcs = 0;
+	adev->pm.dpm.new_active_crtc_count = 0;
+	if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
+		list_for_each_entry(crtc,
+				    &ddev->mode_config.crtc_list, head) {
+			amdgpu_crtc = to_amdgpu_crtc(crtc);
+			if (amdgpu_crtc->enabled) {
+				adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
+				adev->pm.dpm.new_active_crtc_count++;
+			}
+		}
+	}
+}
+
 
 u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index 643d008..dd6203a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -52,8 +52,6 @@
 	AMDGPU_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4
 };
 
-#define SCLK_DEEP_SLEEP_MASK 0x8
-
 struct amdgpu_ps {
 	u32 caps; /* vbios flags */
 	u32 class; /* vbios flags */
@@ -349,12 +347,6 @@
 		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
 			(adev)->powerplay.pp_handle, msg_id))
 
-#define amdgpu_dpm_notify_smu_memory_info(adev, virtual_addr_low, \
-			virtual_addr_hi, mc_addr_low, mc_addr_hi, size) \
-		((adev)->powerplay.pp_funcs->notify_smu_memory_info)( \
-			(adev)->powerplay.pp_handle, virtual_addr_low, \
-			virtual_addr_hi, mc_addr_low, mc_addr_hi, size)
-
 #define amdgpu_dpm_get_power_profile_mode(adev, buf) \
 		((adev)->powerplay.pp_funcs->get_power_profile_mode(\
 			(adev)->powerplay.pp_handle, buf))
@@ -445,6 +437,8 @@
 	uint32_t                pcie_gen_mask;
 	uint32_t                pcie_mlw_mask;
 	struct amd_pp_display_configuration pm_display_cfg;/* set by dc */
+	uint32_t                smu_prv_buffer_size;
+	struct amdgpu_bo        *smu_prv_buffer;
 };
 
 #define R600_SSTU_DFLT                               0
@@ -482,6 +476,7 @@
 				struct amdgpu_ps *rps);
 u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev);
 u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev);
+void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev);
 bool amdgpu_is_uvd_state(u32 class, u32 class2);
 void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
 			      u32 *p, u32 *u);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0b19482..b0bf2f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -75,9 +75,10 @@
  * - 3.23.0 - Add query for VRAM lost counter
  * - 3.24.0 - Add high priority compute support for gfx9
  * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
+ * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	25
+#define KMS_DRIVER_MINOR	26
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
@@ -121,7 +122,7 @@
 uint amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
-uint amdgpu_pp_feature_mask = 0xffffbfff;
+uint amdgpu_pp_feature_mask = 0xffff3fff; /* gfxoff (bit 15) disabled by default */
 int amdgpu_ngg = 0;
 int amdgpu_prim_buf_per_se = 0;
 int amdgpu_pos_buf_per_se = 0;
@@ -132,6 +133,7 @@
 int amdgpu_compute_multipipe = -1;
 int amdgpu_gpu_recovery = -1; /* auto */
 int amdgpu_emu_mode = 0;
+uint amdgpu_smu_memory_pool_size = 0;
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -316,6 +318,11 @@
 module_param_named(cik_support, amdgpu_cik_support, int, 0444);
 #endif
 
+MODULE_PARM_DESC(smu_memory_pool_size,
+	"reserve gtt for smu debug usage, 0 = disable,"
+		"0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
+module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
 	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -534,6 +541,9 @@
 	{0x1002, 0x6995, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x6997, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
 	{0x1002, 0x699F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS12},
+	/* VEGAM */
+	{0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
+	{0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
 	/* Vega 10 */
 	{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
@@ -550,6 +560,13 @@
 	{0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
 	{0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
 	{0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+	/* Vega 20 */
+	{0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT},
 	/* Raven */
 	{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 1206301..bc5fd8e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -137,7 +137,7 @@
 	/* need to align pitch with crtc limits */
 	mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
 						  fb_tiled);
-	domain = amdgpu_display_framebuffer_domains(adev);
+	domain = amdgpu_display_supported_domains(adev);
 
 	height = ALIGN(mode_cmd->height, 8);
 	size = mode_cmd->pitches[0] * height;
@@ -292,9 +292,9 @@
 
 	drm_fb_helper_unregister_fbi(&rfbdev->helper);
 
-	if (rfb->obj) {
-		amdgpufb_destroy_pinned_object(rfb->obj);
-		rfb->obj = NULL;
+	if (rfb->base.obj[0]) {
+		amdgpufb_destroy_pinned_object(rfb->base.obj[0]);
+		rfb->base.obj[0] = NULL;
 		drm_framebuffer_unregister_private(&rfb->base);
 		drm_framebuffer_cleanup(&rfb->base);
 	}
@@ -377,7 +377,7 @@
 	if (!adev->mode_info.rfbdev)
 		return 0;
 
-	robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj);
+	robj = gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]);
 	size += amdgpu_bo_size(robj);
 	return size;
 }
@@ -386,7 +386,7 @@
 {
 	if (!adev->mode_info.rfbdev)
 		return false;
-	if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.obj))
+	if (robj == gem_to_amdgpu_bo(adev->mode_info.rfbdev->rfb.base.obj[0]))
 		return true;
 	return false;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 97449e0..39ec6b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -131,7 +131,8 @@
  * Emits a fence command on the requested ring (all asics).
  * Returns 0 on success, -ENOMEM on failure.
  */
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
+		      unsigned flags)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_fence *fence;
@@ -149,7 +150,7 @@
 		       adev->fence_context + ring->idx,
 		       seq);
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
-			       seq, AMDGPU_FENCE_FLAG_INT);
+			       seq, flags | AMDGPU_FENCE_FLAG_INT);
 
 	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 	/* This function can't be called concurrently anyway, otherwise
@@ -375,14 +376,14 @@
 	struct amdgpu_device *adev = ring->adev;
 	uint64_t index;
 
-	if (ring != &adev->uvd.ring) {
+	if (ring != &adev->uvd.inst[ring->me].ring) {
 		ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs];
 		ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4);
 	} else {
 		/* put fence directly behind firmware */
 		index = ALIGN(adev->uvd.fw->size, 8);
-		ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
-		ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
+		ring->fence_drv.cpu_addr = adev->uvd.inst[ring->me].cpu_addr + index;
+		ring->fence_drv.gpu_addr = adev->uvd.inst[ring->me].gpu_addr + index;
 	}
 	amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
 	amdgpu_irq_get(adev, irq_src, irq_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index cf0f186..17d6b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -113,12 +113,17 @@
 	int r;
 
 	if (adev->gart.robj == NULL) {
-		r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE,
-				     AMDGPU_GEM_DOMAIN_VRAM,
-				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-				     ttm_bo_type_kernel, NULL,
-				     &adev->gart.robj);
+		struct amdgpu_bo_param bp;
+
+		memset(&bp, 0, sizeof(bp));
+		bp.size = adev->gart.table_size;
+		bp.byte_align = PAGE_SIZE;
+		bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+		bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+			AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+		bp.type = ttm_bo_type_kernel;
+		bp.resv = NULL;
+		r = amdgpu_bo_create(adev, &bp, &adev->gart.robj);
 		if (r) {
 			return r;
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 46b9ea4..2c8e273 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -48,17 +48,25 @@
 			     struct drm_gem_object **obj)
 {
 	struct amdgpu_bo *bo;
+	struct amdgpu_bo_param bp;
 	int r;
 
+	memset(&bp, 0, sizeof(bp));
 	*obj = NULL;
 	/* At least align on page size */
 	if (alignment < PAGE_SIZE) {
 		alignment = PAGE_SIZE;
 	}
 
+	bp.size = size;
+	bp.byte_align = alignment;
+	bp.type = type;
+	bp.resv = resv;
+	bp.preferred_domain = initial_domain;
 retry:
-	r = amdgpu_bo_create(adev, size, alignment, initial_domain,
-			     flags, type, resv, &bo);
+	bp.flags = flags;
+	bp.domain = initial_domain;
+	r = amdgpu_bo_create(adev, &bp, &bo);
 	if (r) {
 		if (r != -ERESTARTSYS) {
 			if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
@@ -221,12 +229,7 @@
 		return -EINVAL;
 
 	/* reject invalid gem domains */
-	if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU |
-				 AMDGPU_GEM_DOMAIN_GTT |
-				 AMDGPU_GEM_DOMAIN_VRAM |
-				 AMDGPU_GEM_DOMAIN_GDS |
-				 AMDGPU_GEM_DOMAIN_GWS |
-				 AMDGPU_GEM_DOMAIN_OA))
+	if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK)
 		return -EINVAL;
 
 	/* create a gem object to contain this object in */
@@ -771,16 +774,23 @@
 }
 
 #if defined(CONFIG_DEBUG_FS)
+
+#define amdgpu_debugfs_gem_bo_print_flag(m, bo, flag)	\
+	if (bo->flags & (AMDGPU_GEM_CREATE_ ## flag)) {	\
+		seq_printf((m), " " #flag);		\
+	}
+
 static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
 {
 	struct drm_gem_object *gobj = ptr;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
 	struct seq_file *m = data;
 
+	struct dma_buf_attachment *attachment;
+	struct dma_buf *dma_buf;
 	unsigned domain;
 	const char *placement;
 	unsigned pin_count;
-	uint64_t offset;
 
 	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
 	switch (domain) {
@@ -798,13 +808,27 @@
 	seq_printf(m, "\t0x%08x: %12ld byte %s",
 		   id, amdgpu_bo_size(bo), placement);
 
-	offset = READ_ONCE(bo->tbo.mem.start);
-	if (offset != AMDGPU_BO_INVALID_OFFSET)
-		seq_printf(m, " @ 0x%010Lx", offset);
-
 	pin_count = READ_ONCE(bo->pin_count);
 	if (pin_count)
 		seq_printf(m, " pin count %d", pin_count);
+
+	dma_buf = READ_ONCE(bo->gem_base.dma_buf);
+	attachment = READ_ONCE(bo->gem_base.import_attach);
+
+	if (attachment)
+		seq_printf(m, " imported from %p", dma_buf);
+	else if (dma_buf)
+		seq_printf(m, " exported as %p", dma_buf);
+
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_ACCESS_REQUIRED);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, NO_CPU_ACCESS);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, CPU_GTT_USWC);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CLEARED);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, SHADOW);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, VM_ALWAYS_VALID);
+	amdgpu_debugfs_gem_bo_print_flag(m, bo, EXPLICIT_SYNC);
+
 	seq_printf(m, "\n");
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 311589e..f70eeed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -127,6 +127,7 @@
 	struct amdgpu_vm *vm;
 	uint64_t fence_ctx;
 	uint32_t status = 0, alloc_size;
+	unsigned fence_flags = 0;
 
 	unsigned i;
 	int r = 0;
@@ -227,7 +228,10 @@
 #endif
 		amdgpu_asic_invalidate_hdp(adev, ring);
 
-	r = amdgpu_fence_emit(ring, f);
+	if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
+		fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+
+	r = amdgpu_fence_emit(ring, f, fence_flags);
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vmid)
@@ -242,7 +246,7 @@
 	/* wrap the last IB with fence */
 	if (job && job->uf_addr) {
 		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
-				       AMDGPU_FENCE_FLAG_64BIT);
+				       fence_flags | AMDGPU_FENCE_FLAG_64BIT);
 	}
 
 	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 4b7824d..91517b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -31,6 +31,7 @@
 #include "amdgpu_sched.h"
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
+#include "atom.h"
 
 #include <linux/vga_switcheroo.h>
 #include <linux/slab.h>
@@ -214,6 +215,18 @@
 		fw_info->ver = adev->gfx.rlc_fw_version;
 		fw_info->feature = adev->gfx.rlc_feature_version;
 		break;
+	case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL:
+		fw_info->ver = adev->gfx.rlc_srlc_fw_version;
+		fw_info->feature = adev->gfx.rlc_srlc_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM:
+		fw_info->ver = adev->gfx.rlc_srlg_fw_version;
+		fw_info->feature = adev->gfx.rlc_srlg_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM:
+		fw_info->ver = adev->gfx.rlc_srls_fw_version;
+		fw_info->feature = adev->gfx.rlc_srls_feature_version;
+		break;
 	case AMDGPU_INFO_FW_GFX_MEC:
 		if (query_fw->index == 0) {
 			fw_info->ver = adev->gfx.mec_fw_version;
@@ -273,12 +286,15 @@
 	struct drm_crtc *crtc;
 	uint32_t ui32 = 0;
 	uint64_t ui64 = 0;
-	int i, found;
+	int i, j, found;
 	int ui32_size = sizeof(ui32);
 
 	if (!info->return_size || !info->return_pointer)
 		return -EINVAL;
 
+	/* Ensure IB tests are run on ring */
+	flush_delayed_work(&adev->late_init_work);
+
 	switch (info->query) {
 	case AMDGPU_INFO_ACCEL_WORKING:
 		ui32 = adev->accel_working;
@@ -332,7 +348,8 @@
 			break;
 		case AMDGPU_HW_IP_UVD:
 			type = AMD_IP_BLOCK_TYPE_UVD;
-			ring_mask = adev->uvd.ring.ready ? 1 : 0;
+			for (i = 0; i < adev->uvd.num_uvd_inst; i++)
+				ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i);
 			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
 			ib_size_alignment = 16;
 			break;
@@ -345,8 +362,11 @@
 			break;
 		case AMDGPU_HW_IP_UVD_ENC:
 			type = AMD_IP_BLOCK_TYPE_UVD;
-			for (i = 0; i < adev->uvd.num_enc_rings; i++)
-				ring_mask |= ((adev->uvd.ring_enc[i].ready ? 1 : 0) << i);
+			for (i = 0; i < adev->uvd.num_uvd_inst; i++)
+				for (j = 0; j < adev->uvd.num_enc_rings; j++)
+					ring_mask |=
+					((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) <<
+					(j + i * adev->uvd.num_enc_rings));
 			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
 			ib_size_alignment = 1;
 			break;
@@ -701,10 +721,7 @@
 		}
 	}
 	case AMDGPU_INFO_SENSOR: {
-		struct pp_gpu_power query = {0};
-		int query_size = sizeof(query);
-
-		if (amdgpu_dpm == 0)
+		if (!adev->pm.dpm_enabled)
 			return -ENOENT;
 
 		switch (info->sensor_info.type) {
@@ -746,10 +763,10 @@
 			/* get average GPU power */
 			if (amdgpu_dpm_read_sensor(adev,
 						   AMDGPU_PP_SENSOR_GPU_POWER,
-						   (void *)&query, &query_size)) {
+						   (void *)&ui32, &ui32_size)) {
 				return -EINVAL;
 			}
-			ui32 = query.average_gpu_power >> 8;
+			ui32 >>= 8;
 			break;
 		case AMDGPU_INFO_SENSOR_VDDNB:
 			/* get VDDNB in millivolts */
@@ -913,8 +930,7 @@
 		return;
 
 	pm_runtime_get_sync(dev->dev);
-
-	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
+	amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr);
 
 	if (adev->asic_type != CHIP_RAVEN) {
 		amdgpu_uvd_free_handles(adev, file_priv);
@@ -935,6 +951,8 @@
 	pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
 
 	amdgpu_vm_fini(adev, &fpriv->vm);
+	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
+
 	if (pasid)
 		amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
 	amdgpu_bo_unref(&pd);
@@ -1088,6 +1106,7 @@
 	struct amdgpu_device *adev = dev->dev_private;
 	struct drm_amdgpu_info_firmware fw_info;
 	struct drm_amdgpu_query_fw query_fw;
+	struct atom_context *ctx = adev->mode_info.atom_context;
 	int ret, i;
 
 	/* VCE */
@@ -1146,6 +1165,30 @@
 	seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
 		   fw_info.feature, fw_info.ver);
 
+	/* RLC SAVE RESTORE LIST CNTL */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC SRLC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLC SAVE RESTORE LIST GPM MEM */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC SRLG feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLC SAVE RESTORE LIST SRM MEM */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC SRLS feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
 	/* MEC */
 	query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
 	query_fw.index = 0;
@@ -1210,6 +1253,9 @@
 	seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
 		   fw_info.feature, fw_info.ver);
 
+
+	seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index bd67f4c..83e344f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -36,12 +36,14 @@
 #include <drm/drm.h>
 
 #include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
 
 struct amdgpu_mn {
 	/* constant after initialisation */
 	struct amdgpu_device	*adev;
 	struct mm_struct	*mm;
 	struct mmu_notifier	mn;
+	enum amdgpu_mn_type	type;
 
 	/* only used on destruction */
 	struct work_struct	work;
@@ -185,7 +187,7 @@
 }
 
 /**
- * amdgpu_mn_invalidate_range_start - callback to notify about mm change
+ * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
  *
  * @mn: our notifier
  * @mn: the mm this callback is about
@@ -195,10 +197,10 @@
  * We block for all BOs between start and end to be idle and
  * unmap them by move them into system domain again.
  */
-static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
-					     struct mm_struct *mm,
-					     unsigned long start,
-					     unsigned long end)
+static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+						 struct mm_struct *mm,
+						 unsigned long start,
+						 unsigned long end)
 {
 	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;
@@ -220,6 +222,49 @@
 }
 
 /**
+ * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @start: start of updated range
+ * @end: end of updated range
+ *
+ * We temporarily evict all BOs between start and end. This
+ * necessitates evicting all user-mode queues of the process. The BOs
+ * are restorted in amdgpu_mn_invalidate_range_end_hsa.
+ */
+static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+						 struct mm_struct *mm,
+						 unsigned long start,
+						 unsigned long end)
+{
+	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+	struct interval_tree_node *it;
+
+	/* notification is exclusive, but interval is inclusive */
+	end -= 1;
+
+	amdgpu_mn_read_lock(rmn);
+
+	it = interval_tree_iter_first(&rmn->objects, start, end);
+	while (it) {
+		struct amdgpu_mn_node *node;
+		struct amdgpu_bo *bo;
+
+		node = container_of(it, struct amdgpu_mn_node, it);
+		it = interval_tree_iter_next(it, start, end);
+
+		list_for_each_entry(bo, &node->bos, mn_list) {
+			struct kgd_mem *mem = bo->kfd_bo;
+
+			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
+							 start, end))
+				amdgpu_amdkfd_evict_userptr(mem, mm);
+		}
+	}
+}
+
+/**
  * amdgpu_mn_invalidate_range_end - callback to notify about mm change
  *
  * @mn: our notifier
@@ -239,23 +284,39 @@
 	amdgpu_mn_read_unlock(rmn);
 }
 
-static const struct mmu_notifier_ops amdgpu_mn_ops = {
-	.release = amdgpu_mn_release,
-	.invalidate_range_start = amdgpu_mn_invalidate_range_start,
-	.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+	[AMDGPU_MN_TYPE_GFX] = {
+		.release = amdgpu_mn_release,
+		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
+		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+	},
+	[AMDGPU_MN_TYPE_HSA] = {
+		.release = amdgpu_mn_release,
+		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
+		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+	},
 };
 
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
+ */
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
+
 /**
  * amdgpu_mn_get - create notifier context
  *
  * @adev: amdgpu device pointer
+ * @type: type of MMU notifier context
  *
  * Creates a notifier context for current->mm.
  */
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+				enum amdgpu_mn_type type)
 {
 	struct mm_struct *mm = current->mm;
 	struct amdgpu_mn *rmn;
+	unsigned long key = AMDGPU_MN_KEY(mm, type);
 	int r;
 
 	mutex_lock(&adev->mn_lock);
@@ -264,8 +325,8 @@
 		return ERR_PTR(-EINTR);
 	}
 
-	hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
-		if (rmn->mm == mm)
+	hash_for_each_possible(adev->mn_hash, rmn, node, key)
+		if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
 			goto release_locks;
 
 	rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
@@ -276,8 +337,9 @@
 
 	rmn->adev = adev;
 	rmn->mm = mm;
-	rmn->mn.ops = &amdgpu_mn_ops;
 	init_rwsem(&rmn->lock);
+	rmn->type = type;
+	rmn->mn.ops = &amdgpu_mn_ops[type];
 	rmn->objects = RB_ROOT_CACHED;
 	mutex_init(&rmn->read_lock);
 	atomic_set(&rmn->recursion, 0);
@@ -286,7 +348,7 @@
 	if (r)
 		goto free_rmn;
 
-	hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
+	hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
 
 release_locks:
 	up_write(&mm->mmap_sem);
@@ -315,15 +377,21 @@
 {
 	unsigned long end = addr + amdgpu_bo_size(bo) - 1;
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	enum amdgpu_mn_type type =
+		bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
 	struct amdgpu_mn *rmn;
-	struct amdgpu_mn_node *node = NULL;
+	struct amdgpu_mn_node *node = NULL, *new_node;
 	struct list_head bos;
 	struct interval_tree_node *it;
 
-	rmn = amdgpu_mn_get(adev);
+	rmn = amdgpu_mn_get(adev, type);
 	if (IS_ERR(rmn))
 		return PTR_ERR(rmn);
 
+	new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
+	if (!new_node)
+		return -ENOMEM;
+
 	INIT_LIST_HEAD(&bos);
 
 	down_write(&rmn->lock);
@@ -337,13 +405,10 @@
 		list_splice(&node->bos, &bos);
 	}
 
-	if (!node) {
-		node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
-		if (!node) {
-			up_write(&rmn->lock);
-			return -ENOMEM;
-		}
-	}
+	if (!node)
+		node = new_node;
+	else
+		kfree(new_node);
 
 	bo->mn = rmn;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index d0095a3..eb0f432 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -29,16 +29,23 @@
  */
 struct amdgpu_mn;
 
+enum amdgpu_mn_type {
+	AMDGPU_MN_TYPE_GFX,
+	AMDGPU_MN_TYPE_HSA,
+};
+
 #if defined(CONFIG_MMU_NOTIFIER)
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev);
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+				enum amdgpu_mn_type type);
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
 #else
 static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
 static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+					      enum amdgpu_mn_type type)
 {
 	return NULL;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index d6416ee..b9e9e8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -308,7 +308,6 @@
 
 struct amdgpu_framebuffer {
 	struct drm_framebuffer base;
-	struct drm_gem_object *obj;
 
 	/* caching for later use */
 	uint64_t address;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6d08cde..6a9e46a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -191,14 +191,21 @@
 			      u32 domain, struct amdgpu_bo **bo_ptr,
 			      u64 *gpu_addr, void **cpu_addr)
 {
+	struct amdgpu_bo_param bp;
 	bool free = false;
 	int r;
 
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = align;
+	bp.domain = domain;
+	bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+		AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
+
 	if (!*bo_ptr) {
-		r = amdgpu_bo_create(adev, size, align, domain,
-				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-				     ttm_bo_type_kernel, NULL, bo_ptr);
+		r = amdgpu_bo_create(adev, &bp, bo_ptr);
 		if (r) {
 			dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
 				r);
@@ -341,27 +348,25 @@
 	return false;
 }
 
-static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
-			       int byte_align, u32 domain,
-			       u64 flags, enum ttm_bo_type type,
-			       struct reservation_object *resv,
+static int amdgpu_bo_do_create(struct amdgpu_device *adev,
+			       struct amdgpu_bo_param *bp,
 			       struct amdgpu_bo **bo_ptr)
 {
 	struct ttm_operation_ctx ctx = {
-		.interruptible = (type != ttm_bo_type_kernel),
+		.interruptible = (bp->type != ttm_bo_type_kernel),
 		.no_wait_gpu = false,
-		.resv = resv,
+		.resv = bp->resv,
 		.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
 	};
 	struct amdgpu_bo *bo;
-	unsigned long page_align;
+	unsigned long page_align, size = bp->size;
 	size_t acc_size;
 	int r;
 
-	page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
+	page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
 	size = ALIGN(size, PAGE_SIZE);
 
-	if (!amdgpu_bo_validate_size(adev, size, domain))
+	if (!amdgpu_bo_validate_size(adev, size, bp->domain))
 		return -ENOMEM;
 
 	*bo_ptr = NULL;
@@ -375,18 +380,14 @@
 	drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
 	INIT_LIST_HEAD(&bo->shadow_list);
 	INIT_LIST_HEAD(&bo->va);
-	bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
-					 AMDGPU_GEM_DOMAIN_GTT |
-					 AMDGPU_GEM_DOMAIN_CPU |
-					 AMDGPU_GEM_DOMAIN_GDS |
-					 AMDGPU_GEM_DOMAIN_GWS |
-					 AMDGPU_GEM_DOMAIN_OA);
+	bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
+		bp->domain;
 	bo->allowed_domains = bo->preferred_domains;
-	if (type != ttm_bo_type_kernel &&
+	if (bp->type != ttm_bo_type_kernel &&
 	    bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 
-	bo->flags = flags;
+	bo->flags = bp->flags;
 
 #ifdef CONFIG_X86_32
 	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
@@ -417,11 +418,13 @@
 #endif
 
 	bo->tbo.bdev = &adev->mman.bdev;
-	amdgpu_ttm_placement_from_domain(bo, domain);
+	amdgpu_ttm_placement_from_domain(bo, bp->domain);
+	if (bp->type == ttm_bo_type_kernel)
+		bo->tbo.priority = 1;
 
-	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
+	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type,
 				 &bo->placement, page_align, &ctx, acc_size,
-				 NULL, resv, &amdgpu_ttm_bo_destroy);
+				 NULL, bp->resv, &amdgpu_ttm_bo_destroy);
 	if (unlikely(r != 0))
 		return r;
 
@@ -433,10 +436,7 @@
 	else
 		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
 
-	if (type == ttm_bo_type_kernel)
-		bo->tbo.priority = 1;
-
-	if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
+	if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
 	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
 		struct dma_fence *fence;
 
@@ -449,20 +449,20 @@
 		bo->tbo.moving = dma_fence_get(fence);
 		dma_fence_put(fence);
 	}
-	if (!resv)
+	if (!bp->resv)
 		amdgpu_bo_unreserve(bo);
 	*bo_ptr = bo;
 
 	trace_amdgpu_bo_create(bo);
 
 	/* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
-	if (type == ttm_bo_type_device)
+	if (bp->type == ttm_bo_type_device)
 		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 
 	return 0;
 
 fail_unreserve:
-	if (!resv)
+	if (!bp->resv)
 		ww_mutex_unlock(&bo->tbo.resv->lock);
 	amdgpu_bo_unref(&bo);
 	return r;
@@ -472,16 +472,22 @@
 				   unsigned long size, int byte_align,
 				   struct amdgpu_bo *bo)
 {
+	struct amdgpu_bo_param bp;
 	int r;
 
 	if (bo->shadow)
 		return 0;
 
-	r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT,
-				AMDGPU_GEM_CREATE_CPU_GTT_USWC |
-				AMDGPU_GEM_CREATE_SHADOW,
-				ttm_bo_type_kernel,
-				bo->tbo.resv, &bo->shadow);
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = byte_align;
+	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+		AMDGPU_GEM_CREATE_SHADOW;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = bo->tbo.resv;
+
+	r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
 	if (!r) {
 		bo->shadow->parent = amdgpu_bo_ref(bo);
 		mutex_lock(&adev->shadow_list_lock);
@@ -492,28 +498,26 @@
 	return r;
 }
 
-int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
-		     int byte_align, u32 domain,
-		     u64 flags, enum ttm_bo_type type,
-		     struct reservation_object *resv,
+int amdgpu_bo_create(struct amdgpu_device *adev,
+		     struct amdgpu_bo_param *bp,
 		     struct amdgpu_bo **bo_ptr)
 {
-	uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
+	u64 flags = bp->flags;
 	int r;
 
-	r = amdgpu_bo_do_create(adev, size, byte_align, domain,
-				parent_flags, type, resv, bo_ptr);
+	bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
+	r = amdgpu_bo_do_create(adev, bp, bo_ptr);
 	if (r)
 		return r;
 
 	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
-		if (!resv)
+		if (!bp->resv)
 			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
 							NULL));
 
-		r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
+		r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr));
 
-		if (!resv)
+		if (!bp->resv)
 			reservation_object_unlock((*bo_ptr)->tbo.resv);
 
 		if (r)
@@ -689,8 +693,21 @@
 		return -EINVAL;
 
 	/* A shared bo cannot be migrated to VRAM */
-	if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM))
-		return -EINVAL;
+	if (bo->prime_shared_count) {
+		if (domain & AMDGPU_GEM_DOMAIN_GTT)
+			domain = AMDGPU_GEM_DOMAIN_GTT;
+		else
+			return -EINVAL;
+	}
+
+	/* This assumes only APU display buffers are pinned with (VRAM|GTT).
+	 * See function amdgpu_display_supported_domains()
+	 */
+	if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+		domain = AMDGPU_GEM_DOMAIN_VRAM;
+		if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
+			domain = AMDGPU_GEM_DOMAIN_GTT;
+	}
 
 	if (bo->pin_count) {
 		uint32_t mem_type = bo->tbo.mem.mem_type;
@@ -838,6 +855,13 @@
 	return amdgpu_ttm_init(adev);
 }
 
+int amdgpu_bo_late_init(struct amdgpu_device *adev)
+{
+	amdgpu_ttm_late_init(adev);
+
+	return 0;
+}
+
 void amdgpu_bo_fini(struct amdgpu_device *adev)
 {
 	amdgpu_ttm_fini(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 546f77cb7..540e03f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -33,6 +33,16 @@
 
 #define AMDGPU_BO_INVALID_OFFSET	LONG_MAX
 
+struct amdgpu_bo_param {
+	unsigned long			size;
+	int				byte_align;
+	u32				domain;
+	u32				preferred_domain;
+	u64				flags;
+	enum ttm_bo_type		type;
+	struct reservation_object	*resv;
+};
+
 /* bo virtual addresses in a vm */
 struct amdgpu_bo_va_mapping {
 	struct amdgpu_bo_va		*bo_va;
@@ -196,6 +206,27 @@
 }
 
 /**
+ * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
+ */
+static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
+	struct drm_mm_node *node = bo->tbo.mem.mm_node;
+	unsigned long pages_left;
+
+	if (bo->tbo.mem.mem_type != TTM_PL_VRAM)
+		return false;
+
+	for (pages_left = bo->tbo.mem.num_pages; pages_left;
+	     pages_left -= node->size, node++)
+		if (node->start < fpfn)
+			return true;
+
+	return false;
+}
+
+/**
  * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
  */
 static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
@@ -203,10 +234,8 @@
 	return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
 }
 
-int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
-		     int byte_align, u32 domain,
-		     u64 flags, enum ttm_bo_type type,
-		     struct reservation_object *resv,
+int amdgpu_bo_create(struct amdgpu_device *adev,
+		     struct amdgpu_bo_param *bp,
 		     struct amdgpu_bo **bo_ptr);
 int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 			      unsigned long size, int align,
@@ -230,6 +259,7 @@
 int amdgpu_bo_unpin(struct amdgpu_bo *bo);
 int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
 int amdgpu_bo_init(struct amdgpu_device *adev);
+int amdgpu_bo_late_init(struct amdgpu_device *adev);
 void amdgpu_bo_fini(struct amdgpu_device *adev);
 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
 				struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 361975c..b455da4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -77,6 +77,37 @@
 	}
 }
 
+/**
+ * DOC: power_dpm_state
+ *
+ * This is a legacy interface and is only provided for backwards compatibility.
+ * The amdgpu driver provides a sysfs API for adjusting certain power
+ * related parameters.  The file power_dpm_state is used for this.
+ * It accepts the following arguments:
+ * - battery
+ * - balanced
+ * - performance
+ *
+ * battery
+ *
+ * On older GPUs, the vbios provided a special power state for battery
+ * operation.  Selecting battery switched to this state.  This is no
+ * longer provided on newer GPUs so the option does nothing in that case.
+ *
+ * balanced
+ *
+ * On older GPUs, the vbios provided a special power state for balanced
+ * operation.  Selecting balanced switched to this state.  This is no
+ * longer provided on newer GPUs so the option does nothing in that case.
+ *
+ * performance
+ *
+ * On older GPUs, the vbios provided a special power state for performance
+ * operation.  Selecting performance switched to this state.  This is no
+ * longer provided on newer GPUs so the option does nothing in that case.
+ *
+ */
+
 static ssize_t amdgpu_get_dpm_state(struct device *dev,
 				    struct device_attribute *attr,
 				    char *buf)
@@ -131,6 +162,59 @@
 	return count;
 }
 
+
+/**
+ * DOC: power_dpm_force_performance_level
+ *
+ * The amdgpu driver provides a sysfs API for adjusting certain power
+ * related parameters.  The file power_dpm_force_performance_level is
+ * used for this.  It accepts the following arguments:
+ * - auto
+ * - low
+ * - high
+ * - manual
+ * - GPU fan
+ * - profile_standard
+ * - profile_min_sclk
+ * - profile_min_mclk
+ * - profile_peak
+ *
+ * auto
+ *
+ * When auto is selected, the driver will attempt to dynamically select
+ * the optimal power profile for current conditions in the driver.
+ *
+ * low
+ *
+ * When low is selected, the clocks are forced to the lowest power state.
+ *
+ * high
+ *
+ * When high is selected, the clocks are forced to the highest power state.
+ *
+ * manual
+ *
+ * When manual is selected, the user can manually adjust which power states
+ * are enabled for each clock domain via the sysfs pp_dpm_mclk, pp_dpm_sclk,
+ * and pp_dpm_pcie files and adjust the power state transition heuristics
+ * via the pp_power_profile_mode sysfs file.
+ *
+ * profile_standard
+ * profile_min_sclk
+ * profile_min_mclk
+ * profile_peak
+ *
+ * When the profiling modes are selected, clock and power gating are
+ * disabled and the clocks are set for different profiling cases. This
+ * mode is recommended for profiling specific work loads where you do
+ * not want clock or power gating for clock fluctuation to interfere
+ * with your results. profile_standard sets the clocks to a fixed clock
+ * level which varies from asic to asic.  profile_min_sclk forces the sclk
+ * to the lowest level.  profile_min_mclk forces the mclk to the lowest level.
+ * profile_peak sets all clocks (mclk, sclk, pcie) to the highest levels.
+ *
+ */
+
 static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
 						struct device_attribute *attr,
 								char *buf)
@@ -324,6 +408,17 @@
 	return count;
 }
 
+/**
+ * DOC: pp_table
+ *
+ * The amdgpu driver provides a sysfs API for uploading new powerplay
+ * tables.  The file pp_table is used for this.  Reading the file
+ * will dump the current power play table.  Writing to the file
+ * will attempt to upload a new powerplay table and re-initialize
+ * powerplay using that new table.
+ *
+ */
+
 static ssize_t amdgpu_get_pp_table(struct device *dev,
 		struct device_attribute *attr,
 		char *buf)
@@ -360,6 +455,29 @@
 	return count;
 }
 
+/**
+ * DOC: pp_od_clk_voltage
+ *
+ * The amdgpu driver provides a sysfs API for adjusting the clocks and voltages
+ * in each power level within a power state.  The pp_od_clk_voltage is used for
+ * this.
+ *
+ * Reading the file will display:
+ * - a list of engine clock levels and voltages labeled OD_SCLK
+ * - a list of memory clock levels and voltages labeled OD_MCLK
+ * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE
+ *
+ * To manually adjust these settings, first select manual using
+ * power_dpm_force_performance_level. Enter a new value for each
+ * level by writing a string that contains "s/m level clock voltage" to
+ * the file.  E.g., "s 1 500 820" will update sclk level 1 to be 500 MHz
+ * at 820 mV; "m 0 350 810" will update mclk level 0 to be 350 MHz at
+ * 810 mV.  When you have edited all of the states as needed, write
+ * "c" (commit) to the file to commit your changes.  If you want to reset to the
+ * default power levels, write "r" (reset) to the file to reset them.
+ *
+ */
+
 static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
 		struct device_attribute *attr,
 		const char *buf,
@@ -437,6 +555,7 @@
 	if (adev->powerplay.pp_funcs->print_clock_levels) {
 		size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
 		size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
+		size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size);
 		return size;
 	} else {
 		return snprintf(buf, PAGE_SIZE, "\n");
@@ -444,6 +563,23 @@
 
 }
 
+/**
+ * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
+ *
+ * The amdgpu driver provides a sysfs API for adjusting what power levels
+ * are enabled for a given power state.  The files pp_dpm_sclk, pp_dpm_mclk,
+ * and pp_dpm_pcie are used for this.
+ *
+ * Reading back the files will show you the available power levels within
+ * the power state and the clock information for those levels.
+ *
+ * To manually adjust these states, first select manual using
+ * power_dpm_force_performance_level.
+ * Secondly,Enter a new value for each level by inputing a string that
+ * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie"
+ * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6.
+ */
+
 static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 		struct device_attribute *attr,
 		char *buf)
@@ -466,23 +602,27 @@
 	struct amdgpu_device *adev = ddev->dev_private;
 	int ret;
 	long level;
-	uint32_t i, mask = 0;
-	char sub_str[2];
+	uint32_t mask = 0;
+	char *sub_str = NULL;
+	char *tmp;
+	char buf_cpy[count];
+	const char delimiter[3] = {' ', '\n', '\0'};
 
-	for (i = 0; i < strlen(buf); i++) {
-		if (*(buf + i) == '\n')
-			continue;
-		sub_str[0] = *(buf + i);
-		sub_str[1] = '\0';
-		ret = kstrtol(sub_str, 0, &level);
+	memcpy(buf_cpy, buf, count+1);
+	tmp = buf_cpy;
+	while (tmp[0]) {
+		sub_str =  strsep(&tmp, delimiter);
+		if (strlen(sub_str)) {
+			ret = kstrtol(sub_str, 0, &level);
 
-		if (ret) {
-			count = -EINVAL;
-			goto fail;
-		}
-		mask |= 1 << level;
+			if (ret) {
+				count = -EINVAL;
+				goto fail;
+			}
+			mask |= 1 << level;
+		} else
+			break;
 	}
-
 	if (adev->powerplay.pp_funcs->force_clock_level)
 		amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
 
@@ -512,21 +652,26 @@
 	struct amdgpu_device *adev = ddev->dev_private;
 	int ret;
 	long level;
-	uint32_t i, mask = 0;
-	char sub_str[2];
+	uint32_t mask = 0;
+	char *sub_str = NULL;
+	char *tmp;
+	char buf_cpy[count];
+	const char delimiter[3] = {' ', '\n', '\0'};
 
-	for (i = 0; i < strlen(buf); i++) {
-		if (*(buf + i) == '\n')
-			continue;
-		sub_str[0] = *(buf + i);
-		sub_str[1] = '\0';
-		ret = kstrtol(sub_str, 0, &level);
+	memcpy(buf_cpy, buf, count+1);
+	tmp = buf_cpy;
+	while (tmp[0]) {
+		sub_str =  strsep(&tmp, delimiter);
+		if (strlen(sub_str)) {
+			ret = kstrtol(sub_str, 0, &level);
 
-		if (ret) {
-			count = -EINVAL;
-			goto fail;
-		}
-		mask |= 1 << level;
+			if (ret) {
+				count = -EINVAL;
+				goto fail;
+			}
+			mask |= 1 << level;
+		} else
+			break;
 	}
 	if (adev->powerplay.pp_funcs->force_clock_level)
 		amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
@@ -557,21 +702,27 @@
 	struct amdgpu_device *adev = ddev->dev_private;
 	int ret;
 	long level;
-	uint32_t i, mask = 0;
-	char sub_str[2];
+	uint32_t mask = 0;
+	char *sub_str = NULL;
+	char *tmp;
+	char buf_cpy[count];
+	const char delimiter[3] = {' ', '\n', '\0'};
 
-	for (i = 0; i < strlen(buf); i++) {
-		if (*(buf + i) == '\n')
-			continue;
-		sub_str[0] = *(buf + i);
-		sub_str[1] = '\0';
-		ret = kstrtol(sub_str, 0, &level);
+	memcpy(buf_cpy, buf, count+1);
+	tmp = buf_cpy;
 
-		if (ret) {
-			count = -EINVAL;
-			goto fail;
-		}
-		mask |= 1 << level;
+	while (tmp[0]) {
+		sub_str =  strsep(&tmp, delimiter);
+		if (strlen(sub_str)) {
+			ret = kstrtol(sub_str, 0, &level);
+
+			if (ret) {
+				count = -EINVAL;
+				goto fail;
+			}
+			mask |= 1 << level;
+		} else
+			break;
 	}
 	if (adev->powerplay.pp_funcs->force_clock_level)
 		amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
@@ -668,6 +819,26 @@
 	return count;
 }
 
+/**
+ * DOC: pp_power_profile_mode
+ *
+ * The amdgpu driver provides a sysfs API for adjusting the heuristics
+ * related to switching between power levels in a power state.  The file
+ * pp_power_profile_mode is used for this.
+ *
+ * Reading this file outputs a list of all of the predefined power profiles
+ * and the relevant heuristics settings for that profile.
+ *
+ * To select a profile or create a custom profile, first select manual using
+ * power_dpm_force_performance_level.  Writing the number of a predefined
+ * profile to pp_power_profile_mode will enable those heuristics.  To
+ * create a custom set of heuristics, write a string of numbers to the file
+ * starting with the number of the custom profile along with a setting
+ * for each heuristic parameter.  Due to differences across asic families
+ * the heuristic parameters vary from family to family.
+ *
+ */
+
 static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
 		struct device_attribute *attr,
 		char *buf)
@@ -1020,8 +1191,8 @@
 {
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	struct drm_device *ddev = adev->ddev;
-	struct pp_gpu_power query = {0};
-	int r, size = sizeof(query);
+	u32 query = 0;
+	int r, size = sizeof(u32);
 	unsigned uw;
 
 	/* Can't get power when the card is off */
@@ -1041,7 +1212,7 @@
 		return r;
 
 	/* convert to microwatts */
-	uw = (query.average_gpu_power >> 8) * 1000000;
+	uw = (query >> 8) * 1000000 + (query & 0xff) * 1000;
 
 	return snprintf(buf, PAGE_SIZE, "%u\n", uw);
 }
@@ -1109,6 +1280,46 @@
 	return count;
 }
 
+
+/**
+ * DOC: hwmon
+ *
+ * The amdgpu driver exposes the following sensor interfaces:
+ * - GPU temperature (via the on-die sensor)
+ * - GPU voltage
+ * - Northbridge voltage (APUs only)
+ * - GPU power
+ * - GPU fan
+ *
+ * hwmon interfaces for GPU temperature:
+ * - temp1_input: the on die GPU temperature in millidegrees Celsius
+ * - temp1_crit: temperature critical max value in millidegrees Celsius
+ * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
+ *
+ * hwmon interfaces for GPU voltage:
+ * - in0_input: the voltage on the GPU in millivolts
+ * - in1_input: the voltage on the Northbridge in millivolts
+ *
+ * hwmon interfaces for GPU power:
+ * - power1_average: average power used by the GPU in microWatts
+ * - power1_cap_min: minimum cap supported in microWatts
+ * - power1_cap_max: maximum cap supported in microWatts
+ * - power1_cap: selected power cap in microWatts
+ *
+ * hwmon interfaces for GPU fan:
+ * - pwm1: pulse width modulation fan level (0-255)
+ * - pwm1_enable: pulse width modulation fan control method
+ *                0: no fan speed control
+ *                1: manual fan speed control using pwm interface
+ *                2: automatic fan speed control
+ * - pwm1_min: pulse width modulation fan control minimum level (0)
+ * - pwm1_max: pulse width modulation fan control maximum level (255)
+ * - fan1_input: fan speed in RPM
+ *
+ * You can use hwmon tools like sensors to view this information on your system.
+ *
+ */
+
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
@@ -1153,19 +1364,14 @@
 	struct amdgpu_device *adev = dev_get_drvdata(dev);
 	umode_t effective_mode = attr->mode;
 
-	/* handle non-powerplay limitations */
-	if (!adev->powerplay.pp_handle) {
-		/* Skip fan attributes if fan is not present */
-		if (adev->pm.no_fan &&
-		    (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
-		     attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
-		     attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
-		     attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
-			return 0;
-		/* requires powerplay */
-		if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
-			return 0;
-	}
+
+	/* Skip fan attributes if fan is not present */
+	if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
+	    attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
+	    attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
+	    attr == &sensor_dev_attr_pwm1_min.dev_attr.attr ||
+	    attr == &sensor_dev_attr_fan1_input.dev_attr.attr))
+		return 0;
 
 	/* Skip limit attributes if DPM is not enabled */
 	if (!adev->pm.dpm_enabled &&
@@ -1658,9 +1864,6 @@
 
 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 {
-	struct drm_device *ddev = adev->ddev;
-	struct drm_crtc *crtc;
-	struct amdgpu_crtc *amdgpu_crtc;
 	int i = 0;
 
 	if (!adev->pm.dpm_enabled)
@@ -1676,21 +1879,25 @@
 	}
 
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
+		if (!amdgpu_device_has_dc_support(adev)) {
+			mutex_lock(&adev->pm.mutex);
+			amdgpu_dpm_get_active_displays(adev);
+			adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtcs;
+			adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev);
+			adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev);
+			/* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */
+			if (adev->pm.pm_display_cfg.vrefresh > 120)
+				adev->pm.pm_display_cfg.min_vblank_time = 0;
+			if (adev->powerplay.pp_funcs->display_configuration_change)
+				adev->powerplay.pp_funcs->display_configuration_change(
+								adev->powerplay.pp_handle,
+								&adev->pm.pm_display_cfg);
+			mutex_unlock(&adev->pm.mutex);
+		}
 		amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL);
 	} else {
 		mutex_lock(&adev->pm.mutex);
-		adev->pm.dpm.new_active_crtcs = 0;
-		adev->pm.dpm.new_active_crtc_count = 0;
-		if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
-			list_for_each_entry(crtc,
-					    &ddev->mode_config.crtc_list, head) {
-				amdgpu_crtc = to_amdgpu_crtc(crtc);
-				if (amdgpu_crtc->enabled) {
-					adev->pm.dpm.new_active_crtcs |= (1 << amdgpu_crtc->crtc_id);
-					adev->pm.dpm.new_active_crtc_count++;
-				}
-			}
-		}
+		amdgpu_dpm_get_active_displays(adev);
 		/* update battery/ac status */
 		if (power_supply_is_system_supplied() > 0)
 			adev->pm.dpm.ac_power = true;
@@ -1711,7 +1918,7 @@
 static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev)
 {
 	uint32_t value;
-	struct pp_gpu_power query = {0};
+	uint32_t query = 0;
 	int size;
 
 	/* sanity check PP is enabled */
@@ -1734,17 +1941,9 @@
 		seq_printf(m, "\t%u mV (VDDGFX)\n", value);
 	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
 		seq_printf(m, "\t%u mV (VDDNB)\n", value);
-	size = sizeof(query);
-	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size)) {
-		seq_printf(m, "\t%u.%u W (VDDC)\n", query.vddc_power >> 8,
-				query.vddc_power & 0xff);
-		seq_printf(m, "\t%u.%u W (VDDCI)\n", query.vddci_power >> 8,
-				query.vddci_power & 0xff);
-		seq_printf(m, "\t%u.%u W (max GPU)\n", query.max_gpu_power >> 8,
-				query.max_gpu_power & 0xff);
-		seq_printf(m, "\t%u.%u W (average GPU)\n", query.average_gpu_power >> 8,
-				query.average_gpu_power & 0xff);
-	}
+	size = sizeof(uint32_t);
+	if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, (void *)&query, &size))
+		seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff);
 	size = sizeof(value);
 	seq_printf(m, "\n");
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 4b584cb7..4683626 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -102,12 +102,18 @@
 	struct reservation_object *resv = attach->dmabuf->resv;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_bo *bo;
+	struct amdgpu_bo_param bp;
 	int ret;
 
+	memset(&bp, 0, sizeof(bp));
+	bp.size = attach->dmabuf->size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_CPU;
+	bp.flags = 0;
+	bp.type = ttm_bo_type_sg;
+	bp.resv = resv;
 	ww_mutex_lock(&resv->lock, NULL);
-	ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE,
-			       AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg,
-			       resv, &bo);
+	ret = amdgpu_bo_create(adev, &bp, &bo);
 	if (ret)
 		goto error;
 
@@ -209,7 +215,7 @@
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct ttm_operation_ctx ctx = { true, false };
-	u32 domain = amdgpu_display_framebuffer_domains(adev);
+	u32 domain = amdgpu_display_supported_domains(adev);
 	int ret;
 	bool reads = (direction == DMA_BIDIRECTIONAL ||
 		      direction == DMA_FROM_DEVICE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index c7d43e0..9f1a5bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -52,6 +52,7 @@
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 		psp_v3_1_set_psp_funcs(psp);
 		break;
 	case CHIP_RAVEN:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index 262c126..8af16e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -66,6 +66,8 @@
 			       u32 ring,
 			       struct amdgpu_ring **out_ring)
 {
+	u32 instance;
+
 	switch (mapper->hw_ip) {
 	case AMDGPU_HW_IP_GFX:
 		*out_ring = &adev->gfx.gfx_ring[ring];
@@ -77,13 +79,16 @@
 		*out_ring = &adev->sdma.instance[ring].ring;
 		break;
 	case AMDGPU_HW_IP_UVD:
-		*out_ring = &adev->uvd.ring;
+		instance = ring;
+		*out_ring = &adev->uvd.inst[instance].ring;
 		break;
 	case AMDGPU_HW_IP_VCE:
 		*out_ring = &adev->vce.ring[ring];
 		break;
 	case AMDGPU_HW_IP_UVD_ENC:
-		*out_ring = &adev->uvd.ring_enc[ring];
+		instance = ring / adev->uvd.num_enc_rings;
+		*out_ring =
+		&adev->uvd.inst[instance].ring_enc[ring%adev->uvd.num_enc_rings];
 		break;
 	case AMDGPU_HW_IP_VCN_DEC:
 		*out_ring = &adev->vcn.ring_dec;
@@ -240,13 +245,14 @@
 		ip_num_rings = adev->sdma.num_instances;
 		break;
 	case AMDGPU_HW_IP_UVD:
-		ip_num_rings = 1;
+		ip_num_rings = adev->uvd.num_uvd_inst;
 		break;
 	case AMDGPU_HW_IP_VCE:
 		ip_num_rings = adev->vce.num_rings;
 		break;
 	case AMDGPU_HW_IP_UVD_ENC:
-		ip_num_rings = adev->uvd.num_enc_rings;
+		ip_num_rings =
+			adev->uvd.num_enc_rings * adev->uvd.num_uvd_inst;
 		break;
 	case AMDGPU_HW_IP_VCN_DEC:
 		ip_num_rings = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index d5f526f..c6850b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -362,6 +362,7 @@
 
 	dma_fence_put(ring->vmid_wait);
 	ring->vmid_wait = NULL;
+	ring->me = 0;
 
 	ring->adev->rings[ring->idx] = NULL;
 }
@@ -459,6 +460,26 @@
 	spin_unlock(&adev->ring_lru_list_lock);
 }
 
+/**
+ * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper
+ *
+ * @adev: amdgpu_device pointer
+ * @reg0: register to write
+ * @reg1: register to wait on
+ * @ref: reference value to write/wait on
+ * @mask: mask to wait on
+ *
+ * Helper for rings that don't support write and wait in a
+ * single oneshot packet.
+ */
+void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+						uint32_t reg0, uint32_t reg1,
+						uint32_t ref, uint32_t mask)
+{
+	amdgpu_ring_emit_wreg(ring, reg0, ref);
+	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
 /*
  * Debugfs info
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 1a59118..1513124c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -29,7 +29,7 @@
 #include <drm/drm_print.h>
 
 /* max number of rings */
-#define AMDGPU_MAX_RINGS		18
+#define AMDGPU_MAX_RINGS		21
 #define AMDGPU_MAX_GFX_RINGS		1
 #define AMDGPU_MAX_COMPUTE_RINGS	8
 #define AMDGPU_MAX_VCE_RINGS		3
@@ -42,6 +42,7 @@
 
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
+#define AMDGPU_FENCE_FLAG_TC_WB_ONLY    (1 << 2)
 
 enum amdgpu_ring_type {
 	AMDGPU_RING_TYPE_GFX,
@@ -90,7 +91,8 @@
 				   unsigned irq_type);
 void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
 void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
+		      unsigned flags);
 int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
 void amdgpu_fence_process(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
@@ -154,6 +156,9 @@
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
 	void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
 			      uint32_t val, uint32_t mask);
+	void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
+					uint32_t reg0, uint32_t reg1,
+					uint32_t ref, uint32_t mask);
 	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
 	/* priority functions */
 	void (*set_priority) (struct amdgpu_ring *ring,
@@ -228,6 +233,10 @@
 			int *blacklist, int num_blacklist,
 			bool lru_pipe_order, struct amdgpu_ring **ring);
 void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring,
+						uint32_t reg0, uint32_t val0,
+						uint32_t reg1, uint32_t val1);
+
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 {
 	int i = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 2dbe875..d167e8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -33,6 +33,7 @@
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 	struct amdgpu_bo *vram_obj = NULL;
 	struct amdgpu_bo **gtt_obj = NULL;
+	struct amdgpu_bo_param bp;
 	uint64_t gart_addr, vram_addr;
 	unsigned n, size;
 	int i, r;
@@ -58,9 +59,15 @@
 		r = 1;
 		goto out_cleanup;
 	}
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	bp.flags = 0;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
 
-	r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0,
-			     ttm_bo_type_kernel, NULL, &vram_obj);
+	r = amdgpu_bo_create(adev, &bp, &vram_obj);
 	if (r) {
 		DRM_ERROR("Failed to create VRAM object\n");
 		goto out_cleanup;
@@ -79,9 +86,8 @@
 		void **vram_start, **vram_end;
 		struct dma_fence *fence = NULL;
 
-		r = amdgpu_bo_create(adev, size, PAGE_SIZE,
-				     AMDGPU_GEM_DOMAIN_GTT, 0,
-				     ttm_bo_type_kernel, NULL, gtt_obj + i);
+		bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+		r = amdgpu_bo_create(adev, &bp, gtt_obj + i);
 		if (r) {
 			DRM_ERROR("Failed to create GTT object %d\n", i);
 			goto out_lclean;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 532263a..e96e26d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -275,7 +275,7 @@
 			     ),
 
 	    TP_fast_assign(
-			   __entry->bo = bo_va->base.bo;
+			   __entry->bo = bo_va ? bo_va->base.bo : NULL;
 			   __entry->start = mapping->start;
 			   __entry->last = mapping->last;
 			   __entry->offset = mapping->offset;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 205da3f..e93a0a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -63,16 +63,44 @@
 /*
  * Global memory.
  */
+
+/**
+ * amdgpu_ttm_mem_global_init - Initialize and acquire reference to
+ * memory object
+ *
+ * @ref: Object for initialization.
+ *
+ * This is called by drm_global_item_ref() when an object is being
+ * initialized.
+ */
 static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref)
 {
 	return ttm_mem_global_init(ref->object);
 }
 
+/**
+ * amdgpu_ttm_mem_global_release - Drop reference to a memory object
+ *
+ * @ref: Object being removed
+ *
+ * This is called by drm_global_item_unref() when an object is being
+ * released.
+ */
 static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
 {
 	ttm_mem_global_release(ref->object);
 }
 
+/**
+ * amdgpu_ttm_global_init - Initialize global TTM memory reference
+ * 							structures.
+ *
+ * @adev:  	AMDGPU device for which the global structures need to be
+ *			registered.
+ *
+ * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
+ * during bring up.
+ */
 static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 {
 	struct drm_global_reference *global_ref;
@@ -80,7 +108,9 @@
 	struct drm_sched_rq *rq;
 	int r;
 
+	/* ensure reference is false in case init fails */
 	adev->mman.mem_global_referenced = false;
+
 	global_ref = &adev->mman.mem_global_ref;
 	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
 	global_ref->size = sizeof(struct ttm_mem_global);
@@ -111,7 +141,7 @@
 	ring = adev->mman.buffer_funcs_ring;
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
 	r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
-				  rq, amdgpu_sched_jobs, NULL);
+				  rq, NULL);
 	if (r) {
 		DRM_ERROR("Failed setting up TTM BO move run queue.\n");
 		goto error_entity;
@@ -146,6 +176,18 @@
 	return 0;
 }
 
+/**
+ * amdgpu_init_mem_type - 	Initialize a memory manager for a specific
+ * 							type of memory request.
+ *
+ * @bdev:	The TTM BO device object (contains a reference to
+ * 			amdgpu_device)
+ * @type:	The type of memory requested
+ * @man:
+ *
+ * This is called by ttm_bo_init_mm() when a buffer object is being
+ * initialized.
+ */
 static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 				struct ttm_mem_type_manager *man)
 {
@@ -161,6 +203,7 @@
 		man->default_caching = TTM_PL_FLAG_CACHED;
 		break;
 	case TTM_PL_TT:
+		/* GTT memory  */
 		man->func = &amdgpu_gtt_mgr_func;
 		man->gpu_offset = adev->gmc.gart_start;
 		man->available_caching = TTM_PL_MASK_CACHING;
@@ -193,6 +236,14 @@
 	return 0;
 }
 
+/**
+ * amdgpu_evict_flags - Compute placement flags
+ *
+ * @bo: The buffer object to evict
+ * @placement: Possible destination(s) for evicted BO
+ *
+ * Fill in placement data when ttm_bo_evict() is called
+ */
 static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 				struct ttm_placement *placement)
 {
@@ -204,12 +255,14 @@
 		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
 	};
 
+	/* Don't handle scatter gather BOs */
 	if (bo->type == ttm_bo_type_sg) {
 		placement->num_placement = 0;
 		placement->num_busy_placement = 0;
 		return;
 	}
 
+	/* Object isn't an AMDGPU object so ignore */
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
 		placement->placement = &placements;
 		placement->busy_placement = &placements;
@@ -217,26 +270,16 @@
 		placement->num_busy_placement = 1;
 		return;
 	}
+
 	abo = ttm_to_amdgpu_bo(bo);
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
 		if (!adev->mman.buffer_funcs_enabled) {
+			/* Move to system memory */
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 		} else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
-			unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
-			struct drm_mm_node *node = bo->mem.mm_node;
-			unsigned long pages_left;
-
-			for (pages_left = bo->mem.num_pages;
-			     pages_left;
-			     pages_left -= node->size, node++) {
-				if (node->start < fpfn)
-					break;
-			}
-
-			if (!pages_left)
-				goto gtt;
+			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
+			   amdgpu_bo_in_cpu_visible_vram(abo)) {
 
 			/* Try evicting to the CPU inaccessible part of VRAM
 			 * first, but only set GTT as busy placement, so this
@@ -245,12 +288,12 @@
 			 */
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
 							 AMDGPU_GEM_DOMAIN_GTT);
-			abo->placements[0].fpfn = fpfn;
+			abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
 			abo->placements[0].lpfn = 0;
 			abo->placement.busy_placement = &abo->placements[1];
 			abo->placement.num_busy_placement = 1;
 		} else {
-gtt:
+			/* Move to GTT memory */
 			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
 		}
 		break;
@@ -261,6 +304,15 @@
 	*placement = abo->placement;
 }
 
+/**
+ * amdgpu_verify_access - Verify access for a mmap call
+ *
+ * @bo:		The buffer object to map
+ * @filp:	The file pointer from the process performing the mmap
+ *
+ * This is called by ttm_bo_mmap() to verify whether a process
+ * has the right to mmap a BO to their process space.
+ */
 static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
@@ -278,6 +330,15 @@
 					  filp->private_data);
 }
 
+/**
+ * amdgpu_move_null - Register memory for a buffer object
+ *
+ * @bo:			The bo to assign the memory to
+ * @new_mem:	The memory to be assigned.
+ *
+ * Assign the memory from new_mem to the memory of the buffer object
+ * bo.
+ */
 static void amdgpu_move_null(struct ttm_buffer_object *bo,
 			     struct ttm_mem_reg *new_mem)
 {
@@ -288,6 +349,10 @@
 	new_mem->mm_node = NULL;
 }
 
+/**
+ * amdgpu_mm_node_addr -	Compute the GPU relative offset of a GTT
+ * 							buffer.
+ */
 static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 				    struct drm_mm_node *mm_node,
 				    struct ttm_mem_reg *mem)
@@ -302,9 +367,10 @@
 }
 
 /**
- * amdgpu_find_mm_node - Helper function finds the drm_mm_node
- *  corresponding to @offset. It also modifies the offset to be
- *  within the drm_mm_node returned
+ * amdgpu_find_mm_node -	Helper function finds the drm_mm_node
+ *  						corresponding to @offset. It also modifies
+ * 							the offset to be within the drm_mm_node
+ * 							returned
  */
 static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
 					       unsigned long *offset)
@@ -443,7 +509,12 @@
 	return r;
 }
 
-
+/**
+ * amdgpu_move_blit - Copy an entire buffer to another buffer
+ *
+ * This is a helper called by amdgpu_bo_move() and
+ * amdgpu_move_vram_ram() to help move buffers to and from VRAM.
+ */
 static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 			    bool evict, bool no_wait_gpu,
 			    struct ttm_mem_reg *new_mem,
@@ -478,6 +549,11 @@
 	return r;
 }
 
+/**
+ * amdgpu_move_vram_ram - Copy VRAM buffer to RAM buffer
+ *
+ * Called by amdgpu_bo_move().
+ */
 static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
 				struct ttm_operation_ctx *ctx,
 				struct ttm_mem_reg *new_mem)
@@ -490,6 +566,8 @@
 	int r;
 
 	adev = amdgpu_ttm_adev(bo->bdev);
+
+	/* create space/pages for new_mem in GTT space */
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
 	placement.num_placement = 1;
@@ -504,25 +582,36 @@
 		return r;
 	}
 
+	/* set caching flags */
 	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
 
+	/* Bind the memory to the GTT space */
 	r = ttm_tt_bind(bo->ttm, &tmp_mem, ctx);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
+
+	/* blit VRAM to GTT */
 	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
+
+	/* move BO (in tmp_mem) to new_mem */
 	r = ttm_bo_move_ttm(bo, ctx, new_mem);
 out_cleanup:
 	ttm_bo_mem_put(bo, &tmp_mem);
 	return r;
 }
 
+/**
+ * amdgpu_move_ram_vram - Copy buffer from RAM to VRAM
+ *
+ * Called by amdgpu_bo_move().
+ */
 static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
 				struct ttm_operation_ctx *ctx,
 				struct ttm_mem_reg *new_mem)
@@ -535,6 +624,8 @@
 	int r;
 
 	adev = amdgpu_ttm_adev(bo->bdev);
+
+	/* make space in GTT for old_mem buffer */
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
 	placement.num_placement = 1;
@@ -548,10 +639,14 @@
 	if (unlikely(r)) {
 		return r;
 	}
+
+	/* move/bind old memory to GTT space */
 	r = ttm_bo_move_ttm(bo, ctx, &tmp_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
+
+	/* copy to VRAM */
 	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
@@ -561,6 +656,11 @@
 	return r;
 }
 
+/**
+ * amdgpu_bo_move - Move a buffer object to a new memory location
+ *
+ * Called by ttm_bo_handle_move_mem()
+ */
 static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 			  struct ttm_operation_ctx *ctx,
 			  struct ttm_mem_reg *new_mem)
@@ -626,6 +726,11 @@
 	return 0;
 }
 
+/**
+ * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
+ *
+ * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
+ */
 static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
@@ -695,7 +800,7 @@
 	struct ttm_dma_tt	ttm;
 	u64			offset;
 	uint64_t		userptr;
-	struct mm_struct	*usermm;
+	struct task_struct	*usertask;
 	uint32_t		userflags;
 	spinlock_t              guptasklock;
 	struct list_head        guptasks;
@@ -703,17 +808,29 @@
 	uint32_t		last_set_pages;
 };
 
+/**
+ * amdgpu_ttm_tt_get_user_pages - 	Pin pages of memory pointed to
+ * 									by a USERPTR pointer to memory
+ *
+ * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
+ * This provides a wrapper around the get_user_pages() call to provide
+ * device accessible pages that back user memory.
+ */
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	struct mm_struct *mm = gtt->usertask->mm;
 	unsigned int flags = 0;
 	unsigned pinned = 0;
 	int r;
 
+	if (!mm) /* Happens during process shutdown */
+		return -ESRCH;
+
 	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
 		flags |= FOLL_WRITE;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&mm->mmap_sem);
 
 	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
 		/* check that we only use anonymous memory
@@ -721,13 +838,14 @@
 		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
 		struct vm_area_struct *vma;
 
-		vma = find_vma(gtt->usermm, gtt->userptr);
+		vma = find_vma(mm, gtt->userptr);
 		if (!vma || vma->vm_file || vma->vm_end < end) {
-			up_read(&current->mm->mmap_sem);
+			up_read(&mm->mmap_sem);
 			return -EPERM;
 		}
 	}
 
+	/* loop enough times using contiguous pages of memory */
 	do {
 		unsigned num_pages = ttm->num_pages - pinned;
 		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
@@ -739,7 +857,12 @@
 		list_add(&guptask.list, &gtt->guptasks);
 		spin_unlock(&gtt->guptasklock);
 
-		r = get_user_pages(userptr, num_pages, flags, p, NULL);
+		if (mm == current->mm)
+			r = get_user_pages(userptr, num_pages, flags, p, NULL);
+		else
+			r = get_user_pages_remote(gtt->usertask,
+					mm, userptr, num_pages,
+					flags, p, NULL, NULL);
 
 		spin_lock(&gtt->guptasklock);
 		list_del(&guptask.list);
@@ -752,15 +875,23 @@
 
 	} while (pinned < ttm->num_pages);
 
-	up_read(&current->mm->mmap_sem);
+	up_read(&mm->mmap_sem);
 	return 0;
 
 release_pages:
 	release_pages(pages, pinned);
-	up_read(&current->mm->mmap_sem);
+	up_read(&mm->mmap_sem);
 	return r;
 }
 
+/**
+ * amdgpu_ttm_tt_set_user_pages - 	Copy pages in, putting old pages
+ * 									as necessary.
+ *
+ * Called by amdgpu_cs_list_validate().  This creates the page list
+ * that backs user memory and will ultimately be mapped into the device
+ * address space.
+ */
 void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -775,6 +906,11 @@
 	}
 }
 
+/**
+ * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty
+ *
+ * Called while unpinning userptr pages
+ */
 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -793,7 +929,12 @@
 	}
 }
 
-/* prepare the sg table with the user pages */
+/**
+ * amdgpu_ttm_tt_pin_userptr - 	prepare the sg table with the
+ * 								user pages
+ *
+ * Called by amdgpu_ttm_backend_bind()
+ **/
 static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
@@ -805,17 +946,20 @@
 	enum dma_data_direction direction = write ?
 		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 
+	/* Allocate an SG array and squash pages into it */
 	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
 				      ttm->num_pages << PAGE_SHIFT,
 				      GFP_KERNEL);
 	if (r)
 		goto release_sg;
 
+	/* Map SG to device */
 	r = -ENOMEM;
 	nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 	if (nents != ttm->sg->nents)
 		goto release_sg;
 
+	/* convert SG to linear array of pages and dma addresses */
 	drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 					 gtt->ttm.dma_address, ttm->num_pages);
 
@@ -826,6 +970,9 @@
 	return r;
 }
 
+/**
+ * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
+ */
 static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
@@ -839,14 +986,60 @@
 	if (!ttm->sg->sgl)
 		return;
 
-	/* free the sg table and pages again */
+	/* unmap the pages mapped to the device */
 	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 
+	/* mark the pages as dirty */
 	amdgpu_ttm_tt_mark_user_pages(ttm);
 
 	sg_free_table(ttm->sg);
 }
 
+int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+				struct ttm_buffer_object *tbo,
+				uint64_t flags)
+{
+	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
+	struct ttm_tt *ttm = tbo->ttm;
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	int r;
+
+	if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) {
+		uint64_t page_idx = 1;
+
+		r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
+				ttm->pages, gtt->ttm.dma_address, flags);
+		if (r)
+			goto gart_bind_fail;
+
+		/* Patch mtype of the second part BO */
+		flags &=  ~AMDGPU_PTE_MTYPE_MASK;
+		flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC);
+
+		r = amdgpu_gart_bind(adev,
+				gtt->offset + (page_idx << PAGE_SHIFT),
+				ttm->num_pages - page_idx,
+				&ttm->pages[page_idx],
+				&(gtt->ttm.dma_address[page_idx]), flags);
+	} else {
+		r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
+				     ttm->pages, gtt->ttm.dma_address, flags);
+	}
+
+gart_bind_fail:
+	if (r)
+		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
+			  ttm->num_pages, gtt->offset);
+
+	return r;
+}
+
+/**
+ * amdgpu_ttm_backend_bind - Bind GTT memory
+ *
+ * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
+ * This handles binding GTT memory to the device address space.
+ */
 static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
 				   struct ttm_mem_reg *bo_mem)
 {
@@ -877,7 +1070,10 @@
 		return 0;
 	}
 
+	/* compute PTE flags relevant to this BO memory */
 	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
+
+	/* bind pages into GART page tables */
 	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
 	r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
 		ttm->pages, gtt->ttm.dma_address, flags);
@@ -888,6 +1084,9 @@
 	return r;
 }
 
+/**
+ * amdgpu_ttm_alloc_gart - Allocate GART memory for buffer object
+ */
 int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
@@ -903,6 +1102,7 @@
 	    amdgpu_gtt_mgr_has_gart_addr(&bo->mem))
 		return 0;
 
+	/* allocate GTT space */
 	tmp = bo->mem;
 	tmp.mm_node = NULL;
 	placement.num_placement = 1;
@@ -918,10 +1118,12 @@
 	if (unlikely(r))
 		return r;
 
+	/* compute PTE flags for this buffer object */
 	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp);
+
+	/* Bind pages */
 	gtt->offset = (u64)tmp.start << PAGE_SHIFT;
-	r = amdgpu_gart_bind(adev, gtt->offset, bo->ttm->num_pages,
-			     bo->ttm->pages, gtt->ttm.dma_address, flags);
+	r = amdgpu_ttm_gart_bind(adev, bo, flags);
 	if (unlikely(r)) {
 		ttm_bo_mem_put(bo, &tmp);
 		return r;
@@ -935,31 +1137,40 @@
 	return 0;
 }
 
+/**
+ * amdgpu_ttm_recover_gart - Rebind GTT pages
+ *
+ * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
+ * rebind GTT pages during a GPU reset.
+ */
 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
-	struct amdgpu_ttm_tt *gtt = (void *)tbo->ttm;
 	uint64_t flags;
 	int r;
 
-	if (!gtt)
+	if (!tbo->ttm)
 		return 0;
 
-	flags = amdgpu_ttm_tt_pte_flags(adev, &gtt->ttm.ttm, &tbo->mem);
-	r = amdgpu_gart_bind(adev, gtt->offset, gtt->ttm.ttm.num_pages,
-			     gtt->ttm.ttm.pages, gtt->ttm.dma_address, flags);
-	if (r)
-		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
-			  gtt->ttm.ttm.num_pages, gtt->offset);
+	flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, &tbo->mem);
+	r = amdgpu_ttm_gart_bind(adev, tbo, flags);
+
 	return r;
 }
 
+/**
+ * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
+ *
+ * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
+ * ttm_tt_destroy().
+ */
 static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	int r;
 
+	/* if the pages have userptr pinning then clear that first */
 	if (gtt->userptr)
 		amdgpu_ttm_tt_unpin_userptr(ttm);
 
@@ -978,6 +1189,9 @@
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 
+	if (gtt->usertask)
+		put_task_struct(gtt->usertask);
+
 	ttm_dma_tt_fini(&gtt->ttm);
 	kfree(gtt);
 }
@@ -988,6 +1202,13 @@
 	.destroy = &amdgpu_ttm_backend_destroy,
 };
 
+/**
+ * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
+ *
+ * @bo: The buffer object to create a GTT ttm_tt object around
+ *
+ * Called by ttm_tt_create().
+ */
 static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 					   uint32_t page_flags)
 {
@@ -1001,6 +1222,8 @@
 		return NULL;
 	}
 	gtt->ttm.ttm.func = &amdgpu_backend_func;
+
+	/* allocate space for the uninitialized page entries */
 	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
 		kfree(gtt);
 		return NULL;
@@ -1008,6 +1231,12 @@
 	return &gtt->ttm.ttm;
 }
 
+/**
+ * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
+ *
+ * Map the pages of a ttm_tt object to an address space visible
+ * to the underlying device.
+ */
 static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 			struct ttm_operation_ctx *ctx)
 {
@@ -1015,6 +1244,7 @@
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 
+	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
 	if (gtt && gtt->userptr) {
 		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
 		if (!ttm->sg)
@@ -1039,9 +1269,17 @@
 	}
 #endif
 
+	/* fall back to generic helper to populate the page array
+	 * and map them to the device */
 	return ttm_populate_and_map_pages(adev->dev, &gtt->ttm, ctx);
 }
 
+/**
+ * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
+ *
+ * Unmaps pages of a ttm_tt object from the device address space and
+ * unpopulates the page array backing it.
+ */
 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 {
 	struct amdgpu_device *adev;
@@ -1067,9 +1305,21 @@
 	}
 #endif
 
+	/* fall back to generic helper to unmap and unpopulate array */
 	ttm_unmap_and_unpopulate_pages(adev->dev, &gtt->ttm);
 }
 
+/**
+ * amdgpu_ttm_tt_set_userptr -	Initialize userptr GTT ttm_tt
+ * 								for the current task
+ *
+ * @ttm: The ttm_tt object to bind this userptr object to
+ * @addr:  The address in the current tasks VM space to use
+ * @flags: Requirements of userptr object.
+ *
+ * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
+ * to current task
+ */
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 			      uint32_t flags)
 {
@@ -1079,8 +1329,13 @@
 		return -EINVAL;
 
 	gtt->userptr = addr;
-	gtt->usermm = current->mm;
 	gtt->userflags = flags;
+
+	if (gtt->usertask)
+		put_task_struct(gtt->usertask);
+	gtt->usertask = current->group_leader;
+	get_task_struct(gtt->usertask);
+
 	spin_lock_init(&gtt->guptasklock);
 	INIT_LIST_HEAD(&gtt->guptasks);
 	atomic_set(&gtt->mmu_invalidations, 0);
@@ -1089,6 +1344,9 @@
 	return 0;
 }
 
+/**
+ * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
+ */
 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1096,9 +1354,18 @@
 	if (gtt == NULL)
 		return NULL;
 
-	return gtt->usermm;
+	if (gtt->usertask == NULL)
+		return NULL;
+
+	return gtt->usertask->mm;
 }
 
+/**
+ * amdgpu_ttm_tt_affect_userptr -	Determine if a ttm_tt object lays
+ * 									inside an address range for the
+ * 									current task.
+ *
+ */
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end)
 {
@@ -1109,10 +1376,16 @@
 	if (gtt == NULL || !gtt->userptr)
 		return false;
 
+	/* Return false if no part of the ttm_tt object lies within
+	 * the range
+	 */
 	size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
 	if (gtt->userptr > end || gtt->userptr + size <= start)
 		return false;
 
+	/* Search the lists of tasks that hold this mapping and see
+	 * if current is one of them.  If it is return false.
+	 */
 	spin_lock(&gtt->guptasklock);
 	list_for_each_entry(entry, &gtt->guptasks, list) {
 		if (entry->task == current) {
@@ -1127,6 +1400,10 @@
 	return true;
 }
 
+/**
+ * amdgpu_ttm_tt_userptr_invalidated -	Has the ttm_tt object been
+ * 										invalidated?
+ */
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 				       int *last_invalidated)
 {
@@ -1137,6 +1414,12 @@
 	return prev_invalidated != *last_invalidated;
 }
 
+/**
+ * amdgpu_ttm_tt_userptr_needs_pages -	Have the pages backing this
+ * 										ttm_tt object been invalidated
+ * 										since the last time they've
+ * 										been set?
+ */
 bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1147,6 +1430,9 @@
 	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
 }
 
+/**
+ * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
+ */
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1157,6 +1443,12 @@
 	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
 }
 
+/**
+ * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
+ *
+ * @ttm: The ttm_tt object to compute the flags for
+ * @mem: The memory registry backing this ttm_tt object
+ */
 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 				 struct ttm_mem_reg *mem)
 {
@@ -1181,6 +1473,16 @@
 	return flags;
 }
 
+/**
+ * amdgpu_ttm_bo_eviction_valuable -	Check to see if we can evict
+ * 										a buffer object.
+ *
+ * Return true if eviction is sensible.  Called by
+ * ttm_mem_evict_first() on behalf of ttm_bo_mem_force_space()
+ * which tries to evict buffer objects until it can find space
+ * for a new object and by ttm_bo_force_list_clean() which is
+ * used to clean out a memory space.
+ */
 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 					    const struct ttm_place *place)
 {
@@ -1227,6 +1529,19 @@
 	return ttm_bo_eviction_valuable(bo, place);
 }
 
+/**
+ * amdgpu_ttm_access_memory -	Read or Write memory that backs a
+ * 								buffer object.
+ *
+ * @bo:  The buffer object to read/write
+ * @offset:  Offset into buffer object
+ * @buf:  Secondary buffer to write/read from
+ * @len: Length in bytes of access
+ * @write:  true if writing
+ *
+ * This is used to access VRAM that backs a buffer object via MMIO
+ * access for debugging purposes.
+ */
 static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 				    unsigned long offset,
 				    void *buf, int len, int write)
@@ -1329,6 +1644,7 @@
 static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
 {
 	struct ttm_operation_ctx ctx = { false, false };
+	struct amdgpu_bo_param bp;
 	int r = 0;
 	int i;
 	u64 vram_size = adev->gmc.visible_vram_size;
@@ -1336,17 +1652,21 @@
 	u64 size = adev->fw_vram_usage.size;
 	struct amdgpu_bo *bo;
 
+	memset(&bp, 0, sizeof(bp));
+	bp.size = adev->fw_vram_usage.size;
+	bp.byte_align = PAGE_SIZE;
+	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+		AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
 	adev->fw_vram_usage.va = NULL;
 	adev->fw_vram_usage.reserved_bo = NULL;
 
 	if (adev->fw_vram_usage.size > 0 &&
 		adev->fw_vram_usage.size <= vram_size) {
 
-		r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE,
-				     AMDGPU_GEM_DOMAIN_VRAM,
-				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
-				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-				     ttm_bo_type_kernel, NULL,
+		r = amdgpu_bo_create(adev, &bp,
 				     &adev->fw_vram_usage.reserved_bo);
 		if (r)
 			goto error_create;
@@ -1398,13 +1718,22 @@
 	adev->fw_vram_usage.reserved_bo = NULL;
 	return r;
 }
-
+/**
+ * amdgpu_ttm_init -	Init the memory management (ttm) as well as
+ * 						various gtt/vram related fields.
+ *
+ * This initializes all of the memory space pools that the TTM layer
+ * will need such as the GTT space (system memory mapped to the device),
+ * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
+ * can be mapped per VMID.
+ */
 int amdgpu_ttm_init(struct amdgpu_device *adev)
 {
 	uint64_t gtt_size;
 	int r;
 	u64 vis_vram_limit;
 
+	/* initialize global references for vram/gtt */
 	r = amdgpu_ttm_global_init(adev);
 	if (r) {
 		return r;
@@ -1425,6 +1754,7 @@
 	/* We opt to avoid OOM on system pages allocations */
 	adev->mman.bdev.no_retry = true;
 
+	/* Initialize VRAM pool with all of VRAM divided into pages */
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
 				adev->gmc.real_vram_size >> PAGE_SHIFT);
 	if (r) {
@@ -1454,15 +1784,23 @@
 		return r;
 	}
 
-	r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
-				    AMDGPU_GEM_DOMAIN_VRAM,
-				    &adev->stolen_vga_memory,
-				    NULL, NULL);
-	if (r)
-		return r;
+	/* allocate memory as required for VGA
+	 * This is used for VGA emulation and pre-OS scanout buffers to
+	 * avoid display artifacts while transitioning between pre-OS
+	 * and driver.  */
+	if (adev->gmc.stolen_size) {
+		r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
+					    AMDGPU_GEM_DOMAIN_VRAM,
+					    &adev->stolen_vga_memory,
+					    NULL, NULL);
+		if (r)
+			return r;
+	}
 	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
 		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
 
+	/* Compute GTT size, either bsaed on 3/4th the size of RAM size
+	 * or whatever the user passed on module init */
 	if (amdgpu_gtt_size == -1) {
 		struct sysinfo si;
 
@@ -1473,6 +1811,8 @@
 	}
 	else
 		gtt_size = (uint64_t)amdgpu_gtt_size << 20;
+
+	/* Initialize GTT memory pool */
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT);
 	if (r) {
 		DRM_ERROR("Failed initializing GTT heap.\n");
@@ -1481,6 +1821,7 @@
 	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
 		 (unsigned)(gtt_size / (1024 * 1024)));
 
+	/* Initialize various on-chip memory pools */
 	adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
 	adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT;
 	adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT;
@@ -1520,6 +1861,7 @@
 		}
 	}
 
+	/* Register debugfs entries for amdgpu_ttm */
 	r = amdgpu_ttm_debugfs_init(adev);
 	if (r) {
 		DRM_ERROR("Failed to init debugfs\n");
@@ -1528,13 +1870,25 @@
 	return 0;
 }
 
+/**
+ * amdgpu_ttm_late_init -	Handle any late initialization for
+ * 							amdgpu_ttm
+ */
+void amdgpu_ttm_late_init(struct amdgpu_device *adev)
+{
+	/* return the VGA stolen memory (if any) back to VRAM */
+	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+}
+
+/**
+ * amdgpu_ttm_fini - De-initialize the TTM memory pools
+ */
 void amdgpu_ttm_fini(struct amdgpu_device *adev)
 {
 	if (!adev->mman.initialized)
 		return;
 
 	amdgpu_ttm_debugfs_fini(adev);
-	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
 	amdgpu_ttm_fw_reserve_vram_fini(adev);
 	if (adev->mman.aper_base_kaddr)
 		iounmap(adev->mman.aper_base_kaddr);
@@ -1856,6 +2210,11 @@
 #endif
 };
 
+/**
+ * amdgpu_ttm_vram_read - Linear read access to VRAM
+ *
+ * Accesses VRAM via MMIO for debugging purposes.
+ */
 static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
 				    size_t size, loff_t *pos)
 {
@@ -1895,6 +2254,11 @@
 	return result;
 }
 
+/**
+ * amdgpu_ttm_vram_write - Linear write access to VRAM
+ *
+ * Accesses VRAM via MMIO for debugging purposes.
+ */
 static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
 				    size_t size, loff_t *pos)
 {
@@ -1943,6 +2307,9 @@
 
 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 
+/**
+ * amdgpu_ttm_gtt_read - Linear read access to GTT memory
+ */
 static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf,
 				   size_t size, loff_t *pos)
 {
@@ -1990,6 +2357,13 @@
 
 #endif
 
+/**
+ * amdgpu_iomem_read - Virtual read access to GPU mapped memory
+ *
+ * This function is used to read memory that has been mapped to the
+ * GPU and the known addresses are not physical addresses but instead
+ * bus addresses (e.g., what you'd put in an IB or ring buffer).
+ */
 static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
 				 size_t size, loff_t *pos)
 {
@@ -1998,6 +2372,7 @@
 	ssize_t result = 0;
 	int r;
 
+	/* retrieve the IOMMU domain if any for this device */
 	dom = iommu_get_domain_for_dev(adev->dev);
 
 	while (size) {
@@ -2010,6 +2385,10 @@
 
 		bytes = bytes < size ? bytes : size;
 
+		/* Translate the bus address to a physical address.  If
+		 * the domain is NULL it means there is no IOMMU active
+		 * and the address translation is the identity
+		 */
 		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
 
 		pfn = addr >> PAGE_SHIFT;
@@ -2034,6 +2413,13 @@
 	return result;
 }
 
+/**
+ * amdgpu_iomem_write - Virtual write access to GPU mapped memory
+ *
+ * This function is used to write memory that has been mapped to the
+ * GPU and the known addresses are not physical addresses but instead
+ * bus addresses (e.g., what you'd put in an IB or ring buffer).
+ */
 static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
 				 size_t size, loff_t *pos)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 6ea7de8..e969c87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -77,6 +77,7 @@
 uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
 
 int amdgpu_ttm_init(struct amdgpu_device *adev);
+void amdgpu_ttm_late_init(struct amdgpu_device *adev);
 void amdgpu_ttm_fini(struct amdgpu_device *adev);
 void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
 					bool enable);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 5916cc2..f55f72a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -161,8 +161,38 @@
 			  le32_to_cpu(rlc_hdr->reg_list_format_separate_array_offset_bytes));
 		DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
 			  le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
-		DRM_DEBUG("reg_list_separate_size_bytes: %u\n",
-			  le32_to_cpu(rlc_hdr->reg_list_separate_size_bytes));
+		DRM_DEBUG("reg_list_separate_array_offset_bytes: %u\n",
+			  le32_to_cpu(rlc_hdr->reg_list_separate_array_offset_bytes));
+		if (version_minor == 1) {
+			const struct rlc_firmware_header_v2_1 *v2_1 =
+				container_of(rlc_hdr, struct rlc_firmware_header_v2_1, v2_0);
+			DRM_DEBUG("reg_list_format_direct_reg_list_length: %u\n",
+				  le32_to_cpu(v2_1->reg_list_format_direct_reg_list_length));
+			DRM_DEBUG("save_restore_list_cntl_ucode_ver: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_cntl_ucode_ver));
+			DRM_DEBUG("save_restore_list_cntl_feature_ver: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_cntl_feature_ver));
+			DRM_DEBUG("save_restore_list_cntl_size_bytes %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_cntl_size_bytes));
+			DRM_DEBUG("save_restore_list_cntl_offset_bytes: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_cntl_offset_bytes));
+			DRM_DEBUG("save_restore_list_gpm_ucode_ver: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_gpm_ucode_ver));
+			DRM_DEBUG("save_restore_list_gpm_feature_ver: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_gpm_feature_ver));
+			DRM_DEBUG("save_restore_list_gpm_size_bytes %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_gpm_size_bytes));
+			DRM_DEBUG("save_restore_list_gpm_offset_bytes: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_gpm_offset_bytes));
+			DRM_DEBUG("save_restore_list_srm_ucode_ver: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_srm_ucode_ver));
+			DRM_DEBUG("save_restore_list_srm_feature_ver: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_srm_feature_ver));
+			DRM_DEBUG("save_restore_list_srm_size_bytes %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_srm_size_bytes));
+			DRM_DEBUG("save_restore_list_srm_offset_bytes: %u\n",
+				  le32_to_cpu(v2_1->save_restore_list_srm_offset_bytes));
+		}
 	} else {
 		DRM_ERROR("Unknown RLC ucode version: %u.%u\n", version_major, version_minor);
 	}
@@ -265,6 +295,7 @@
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		if (!load_type)
 			return AMDGPU_FW_LOAD_DIRECT;
 		else
@@ -276,6 +307,8 @@
 			return AMDGPU_FW_LOAD_DIRECT;
 		else
 			return AMDGPU_FW_LOAD_PSP;
+	case CHIP_VEGA20:
+		return AMDGPU_FW_LOAD_DIRECT;
 	default:
 		DRM_ERROR("Unknown firmware load type\n");
 	}
@@ -307,7 +340,10 @@
 	    (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 &&
 	     ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2 &&
 	     ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1_JT &&
-	     ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT)) {
+	     ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT &&
+	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL &&
+	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM &&
+	     ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
 		ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes);
 
 		memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data +
@@ -329,6 +365,18 @@
 					      le32_to_cpu(header->ucode_array_offset_bytes) +
 					      le32_to_cpu(cp_hdr->jt_offset) * 4),
 		       ucode->ucode_size);
+	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) {
+		ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes;
+		memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl,
+		       ucode->ucode_size);
+	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM) {
+		ucode->ucode_size = adev->gfx.rlc.save_restore_list_gpm_size_bytes;
+		memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_gpm,
+		       ucode->ucode_size);
+	} else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM) {
+		ucode->ucode_size = adev->gfx.rlc.save_restore_list_srm_size_bytes;
+		memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_srm,
+		       ucode->ucode_size);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 30b5500..08e3857 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -98,6 +98,24 @@
 	uint32_t reg_list_separate_array_offset_bytes; /* payload offset from the start of the header */
 };
 
+/* version_major=2, version_minor=1 */
+struct rlc_firmware_header_v2_1 {
+	struct rlc_firmware_header_v2_0 v2_0;
+	uint32_t reg_list_format_direct_reg_list_length; /* length of direct reg list format array */
+	uint32_t save_restore_list_cntl_ucode_ver;
+	uint32_t save_restore_list_cntl_feature_ver;
+	uint32_t save_restore_list_cntl_size_bytes;
+	uint32_t save_restore_list_cntl_offset_bytes;
+	uint32_t save_restore_list_gpm_ucode_ver;
+	uint32_t save_restore_list_gpm_feature_ver;
+	uint32_t save_restore_list_gpm_size_bytes;
+	uint32_t save_restore_list_gpm_offset_bytes;
+	uint32_t save_restore_list_srm_ucode_ver;
+	uint32_t save_restore_list_srm_feature_ver;
+	uint32_t save_restore_list_srm_size_bytes;
+	uint32_t save_restore_list_srm_offset_bytes;
+};
+
 /* version_major=1, version_minor=0 */
 struct sdma_firmware_header_v1_0 {
 	struct common_firmware_header header;
@@ -148,6 +166,7 @@
 	struct gfx_firmware_header_v1_0 gfx;
 	struct rlc_firmware_header_v1_0 rlc;
 	struct rlc_firmware_header_v2_0 rlc_v2_0;
+	struct rlc_firmware_header_v2_1 rlc_v2_1;
 	struct sdma_firmware_header_v1_0 sdma;
 	struct sdma_firmware_header_v1_1 sdma_v1_1;
 	struct gpu_info_firmware_header_v1_0 gpu_info;
@@ -168,6 +187,9 @@
 	AMDGPU_UCODE_ID_CP_MEC2,
 	AMDGPU_UCODE_ID_CP_MEC2_JT,
 	AMDGPU_UCODE_ID_RLC_G,
+	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
+	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
+	AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
 	AMDGPU_UCODE_ID_STORAGE,
 	AMDGPU_UCODE_ID_SMC,
 	AMDGPU_UCODE_ID_UVD,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 627542b..bcf68f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -66,15 +66,18 @@
 #define FIRMWARE_POLARIS10	"amdgpu/polaris10_uvd.bin"
 #define FIRMWARE_POLARIS11	"amdgpu/polaris11_uvd.bin"
 #define FIRMWARE_POLARIS12	"amdgpu/polaris12_uvd.bin"
+#define FIRMWARE_VEGAM		"amdgpu/vegam_uvd.bin"
 
 #define FIRMWARE_VEGA10		"amdgpu/vega10_uvd.bin"
 #define FIRMWARE_VEGA12		"amdgpu/vega12_uvd.bin"
+#define FIRMWARE_VEGA20		"amdgpu/vega20_uvd.bin"
 
-#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00)
-#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00)
-#define mmUVD_GPCOM_VCPU_CMD_VEGA10 (0x03c3 + 0x7e00)
-#define mmUVD_NO_OP_VEGA10 (0x03ff + 0x7e00)
-#define mmUVD_ENGINE_CNTL_VEGA10 (0x03c6 + 0x7e00)
+/* These are common relative offsets for all asics, from uvd_7_0_offset.h,  */
+#define UVD_GPCOM_VCPU_CMD		0x03c3
+#define UVD_GPCOM_VCPU_DATA0	0x03c4
+#define UVD_GPCOM_VCPU_DATA1	0x03c5
+#define UVD_NO_OP				0x03ff
+#define UVD_BASE_SI				0x3800
 
 /**
  * amdgpu_uvd_cs_ctx - Command submission parser context
@@ -109,9 +112,11 @@
 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
 MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGAM);
 
 MODULE_FIRMWARE(FIRMWARE_VEGA10);
 MODULE_FIRMWARE(FIRMWARE_VEGA12);
+MODULE_FIRMWARE(FIRMWARE_VEGA20);
 
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
 
@@ -123,9 +128,9 @@
 	const char *fw_name;
 	const struct common_firmware_header *hdr;
 	unsigned version_major, version_minor, family_id;
-	int i, r;
+	int i, j, r;
 
-	INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
+	INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler);
 
 	switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
@@ -172,6 +177,12 @@
 	case CHIP_VEGA12:
 		fw_name = FIRMWARE_VEGA12;
 		break;
+	case CHIP_VEGAM:
+		fw_name = FIRMWARE_VEGAM;
+		break;
+	case CHIP_VEGA20:
+		fw_name = FIRMWARE_VEGA20;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -226,28 +237,30 @@
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
 		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
 
-	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
-				    AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.vcpu_bo,
-				    &adev->uvd.gpu_addr, &adev->uvd.cpu_addr);
-	if (r) {
-		dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
-		return r;
-	}
+	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
 
-	ring = &adev->uvd.ring;
-	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-	r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity,
-				  rq, amdgpu_sched_jobs, NULL);
-	if (r != 0) {
-		DRM_ERROR("Failed setting up UVD run queue.\n");
-		return r;
-	}
+		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
+					    AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
+					    &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
+		if (r) {
+			dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
+			return r;
+		}
 
-	for (i = 0; i < adev->uvd.max_handles; ++i) {
-		atomic_set(&adev->uvd.handles[i], 0);
-		adev->uvd.filp[i] = NULL;
-	}
+		ring = &adev->uvd.inst[j].ring;
+		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+		r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity,
+					  rq, NULL);
+		if (r != 0) {
+			DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j);
+			return r;
+		}
 
+		for (i = 0; i < adev->uvd.max_handles; ++i) {
+			atomic_set(&adev->uvd.inst[j].handles[i], 0);
+			adev->uvd.inst[j].filp[i] = NULL;
+		}
+	}
 	/* from uvd v5.0 HW addressing capacity increased to 64 bits */
 	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
 		adev->uvd.address_64_bit = true;
@@ -274,20 +287,22 @@
 
 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 {
-	int i;
-	kfree(adev->uvd.saved_bo);
+	int i, j;
 
-	drm_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
+	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+		kfree(adev->uvd.inst[j].saved_bo);
 
-	amdgpu_bo_free_kernel(&adev->uvd.vcpu_bo,
-			      &adev->uvd.gpu_addr,
-			      (void **)&adev->uvd.cpu_addr);
+		drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity);
 
-	amdgpu_ring_fini(&adev->uvd.ring);
+		amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
+				      &adev->uvd.inst[j].gpu_addr,
+				      (void **)&adev->uvd.inst[j].cpu_addr);
 
-	for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
-		amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
+		amdgpu_ring_fini(&adev->uvd.inst[j].ring);
 
+		for (i = 0; i < AMDGPU_MAX_UVD_ENC_RINGS; ++i)
+			amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
+	}
 	release_firmware(adev->uvd.fw);
 
 	return 0;
@@ -297,32 +312,33 @@
 {
 	unsigned size;
 	void *ptr;
-	int i;
+	int i, j;
 
-	if (adev->uvd.vcpu_bo == NULL)
-		return 0;
+	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+		if (adev->uvd.inst[j].vcpu_bo == NULL)
+			continue;
 
-	cancel_delayed_work_sync(&adev->uvd.idle_work);
+		cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work);
 
-	/* only valid for physical mode */
-	if (adev->asic_type < CHIP_POLARIS10) {
-		for (i = 0; i < adev->uvd.max_handles; ++i)
-			if (atomic_read(&adev->uvd.handles[i]))
-				break;
+		/* only valid for physical mode */
+		if (adev->asic_type < CHIP_POLARIS10) {
+			for (i = 0; i < adev->uvd.max_handles; ++i)
+				if (atomic_read(&adev->uvd.inst[j].handles[i]))
+					break;
 
-		if (i == adev->uvd.max_handles)
-			return 0;
+			if (i == adev->uvd.max_handles)
+				continue;
+		}
+
+		size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
+		ptr = adev->uvd.inst[j].cpu_addr;
+
+		adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL);
+		if (!adev->uvd.inst[j].saved_bo)
+			return -ENOMEM;
+
+		memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size);
 	}
-
-	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
-	ptr = adev->uvd.cpu_addr;
-
-	adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
-	if (!adev->uvd.saved_bo)
-		return -ENOMEM;
-
-	memcpy_fromio(adev->uvd.saved_bo, ptr, size);
-
 	return 0;
 }
 
@@ -330,59 +346,65 @@
 {
 	unsigned size;
 	void *ptr;
+	int i;
 
-	if (adev->uvd.vcpu_bo == NULL)
-		return -EINVAL;
+	for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+		if (adev->uvd.inst[i].vcpu_bo == NULL)
+			return -EINVAL;
 
-	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
-	ptr = adev->uvd.cpu_addr;
+		size = amdgpu_bo_size(adev->uvd.inst[i].vcpu_bo);
+		ptr = adev->uvd.inst[i].cpu_addr;
 
-	if (adev->uvd.saved_bo != NULL) {
-		memcpy_toio(ptr, adev->uvd.saved_bo, size);
-		kfree(adev->uvd.saved_bo);
-		adev->uvd.saved_bo = NULL;
-	} else {
-		const struct common_firmware_header *hdr;
-		unsigned offset;
+		if (adev->uvd.inst[i].saved_bo != NULL) {
+			memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
+			kfree(adev->uvd.inst[i].saved_bo);
+			adev->uvd.inst[i].saved_bo = NULL;
+		} else {
+			const struct common_firmware_header *hdr;
+			unsigned offset;
 
-		hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
-		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
-			offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
-			memcpy_toio(adev->uvd.cpu_addr, adev->uvd.fw->data + offset,
-				    le32_to_cpu(hdr->ucode_size_bytes));
-			size -= le32_to_cpu(hdr->ucode_size_bytes);
-			ptr += le32_to_cpu(hdr->ucode_size_bytes);
+			hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
+			if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+				offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+				memcpy_toio(adev->uvd.inst[i].cpu_addr, adev->uvd.fw->data + offset,
+					    le32_to_cpu(hdr->ucode_size_bytes));
+				size -= le32_to_cpu(hdr->ucode_size_bytes);
+				ptr += le32_to_cpu(hdr->ucode_size_bytes);
+			}
+			memset_io(ptr, 0, size);
+			/* to restore uvd fence seq */
+			amdgpu_fence_driver_force_completion(&adev->uvd.inst[i].ring);
 		}
-		memset_io(ptr, 0, size);
-		/* to restore uvd fence seq */
-		amdgpu_fence_driver_force_completion(&adev->uvd.ring);
 	}
-
 	return 0;
 }
 
 void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 {
-	struct amdgpu_ring *ring = &adev->uvd.ring;
-	int i, r;
+	struct amdgpu_ring *ring;
+	int i, j, r;
 
-	for (i = 0; i < adev->uvd.max_handles; ++i) {
-		uint32_t handle = atomic_read(&adev->uvd.handles[i]);
-		if (handle != 0 && adev->uvd.filp[i] == filp) {
-			struct dma_fence *fence;
+	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+		ring = &adev->uvd.inst[j].ring;
 
-			r = amdgpu_uvd_get_destroy_msg(ring, handle,
-						       false, &fence);
-			if (r) {
-				DRM_ERROR("Error destroying UVD (%d)!\n", r);
-				continue;
+		for (i = 0; i < adev->uvd.max_handles; ++i) {
+			uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]);
+			if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) {
+				struct dma_fence *fence;
+
+				r = amdgpu_uvd_get_destroy_msg(ring, handle,
+							       false, &fence);
+				if (r) {
+					DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r);
+					continue;
+				}
+
+				dma_fence_wait(fence, false);
+				dma_fence_put(fence);
+
+				adev->uvd.inst[j].filp[i] = NULL;
+				atomic_set(&adev->uvd.inst[j].handles[i], 0);
 			}
-
-			dma_fence_wait(fence, false);
-			dma_fence_put(fence);
-
-			adev->uvd.filp[i] = NULL;
-			atomic_set(&adev->uvd.handles[i], 0);
 		}
 	}
 }
@@ -657,15 +679,16 @@
 	void *ptr;
 	long r;
 	int i;
+	uint32_t ip_instance = ctx->parser->job->ring->me;
 
 	if (offset & 0x3F) {
-		DRM_ERROR("UVD messages must be 64 byte aligned!\n");
+		DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance);
 		return -EINVAL;
 	}
 
 	r = amdgpu_bo_kmap(bo, &ptr);
 	if (r) {
-		DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
+		DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r);
 		return r;
 	}
 
@@ -675,7 +698,7 @@
 	handle = msg[2];
 
 	if (handle == 0) {
-		DRM_ERROR("Invalid UVD handle!\n");
+		DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance);
 		return -EINVAL;
 	}
 
@@ -686,18 +709,18 @@
 
 		/* try to alloc a new handle */
 		for (i = 0; i < adev->uvd.max_handles; ++i) {
-			if (atomic_read(&adev->uvd.handles[i]) == handle) {
-				DRM_ERROR("Handle 0x%x already in use!\n", handle);
+			if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
+				DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle);
 				return -EINVAL;
 			}
 
-			if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
-				adev->uvd.filp[i] = ctx->parser->filp;
+			if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) {
+				adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp;
 				return 0;
 			}
 		}
 
-		DRM_ERROR("No more free UVD handles!\n");
+		DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance);
 		return -ENOSPC;
 
 	case 1:
@@ -709,27 +732,27 @@
 
 		/* validate the handle */
 		for (i = 0; i < adev->uvd.max_handles; ++i) {
-			if (atomic_read(&adev->uvd.handles[i]) == handle) {
-				if (adev->uvd.filp[i] != ctx->parser->filp) {
-					DRM_ERROR("UVD handle collision detected!\n");
+			if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
+				if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) {
+					DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance);
 					return -EINVAL;
 				}
 				return 0;
 			}
 		}
 
-		DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
+		DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle);
 		return -ENOENT;
 
 	case 2:
 		/* it's a destroy msg, free the handle */
 		for (i = 0; i < adev->uvd.max_handles; ++i)
-			atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
+			atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0);
 		amdgpu_bo_kunmap(bo);
 		return 0;
 
 	default:
-		DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
+		DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type);
 		return -EINVAL;
 	}
 	BUG();
@@ -800,7 +823,7 @@
 		}
 
 		if ((cmd == 0 || cmd == 0x3) &&
-		    (start >> 28) != (ctx->parser->adev->uvd.gpu_addr >> 28)) {
+		    (start >> 28) != (ctx->parser->adev->uvd.inst->gpu_addr >> 28)) {
 			DRM_ERROR("msg/fb buffer %LX-%LX out of 256MB segment!\n",
 				  start, end);
 			return -EINVAL;
@@ -968,6 +991,8 @@
 	uint64_t addr;
 	long r;
 	int i;
+	unsigned offset_idx = 0;
+	unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
 
 	amdgpu_bo_kunmap(bo);
 	amdgpu_bo_unpin(bo);
@@ -987,17 +1012,16 @@
 		goto err;
 
 	if (adev->asic_type >= CHIP_VEGA10) {
-		data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0_VEGA10, 0);
-		data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1_VEGA10, 0);
-		data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD_VEGA10, 0);
-		data[3] = PACKET0(mmUVD_NO_OP_VEGA10, 0);
-	} else {
-		data[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
-		data[1] = PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0);
-		data[2] = PACKET0(mmUVD_GPCOM_VCPU_CMD, 0);
-		data[3] = PACKET0(mmUVD_NO_OP, 0);
+		offset_idx = 1 + ring->me;
+		offset[1] = adev->reg_offset[UVD_HWIP][0][1];
+		offset[2] = adev->reg_offset[UVD_HWIP][1][1];
 	}
 
+	data[0] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA0, 0);
+	data[1] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_DATA1, 0);
+	data[2] = PACKET0(offset[offset_idx] + UVD_GPCOM_VCPU_CMD, 0);
+	data[3] = PACKET0(offset[offset_idx] + UVD_NO_OP, 0);
+
 	ib = &job->ibs[0];
 	addr = amdgpu_bo_gpu_offset(bo);
 	ib->ptr[0] = data[0];
@@ -1033,7 +1057,7 @@
 		if (r)
 			goto err_free;
 
-		r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
+		r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
 		if (r)
 			goto err_free;
@@ -1121,8 +1145,15 @@
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 {
 	struct amdgpu_device *adev =
-		container_of(work, struct amdgpu_device, uvd.idle_work.work);
-	unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
+		container_of(work, struct amdgpu_device, uvd.inst->idle_work.work);
+	unsigned fences = 0, i, j;
+
+	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+		fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
+		for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
+			fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
+		}
+	}
 
 	if (fences == 0) {
 		if (adev->pm.dpm_enabled) {
@@ -1136,7 +1167,7 @@
 							       AMD_CG_STATE_GATE);
 		}
 	} else {
-		schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+		schedule_delayed_work(&adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
 	}
 }
 
@@ -1148,7 +1179,7 @@
 	if (amdgpu_sriov_vf(adev))
 		return;
 
-	set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
+	set_clocks = !cancel_delayed_work_sync(&adev->uvd.inst->idle_work);
 	if (set_clocks) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_uvd(adev, true);
@@ -1165,7 +1196,7 @@
 void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
 {
 	if (!amdgpu_sriov_vf(ring->adev))
-		schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+		schedule_delayed_work(&ring->adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
 }
 
 /**
@@ -1179,27 +1210,28 @@
 {
 	struct dma_fence *fence;
 	long r;
+	uint32_t ip_instance = ring->me;
 
 	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+		DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r);
 		goto error;
 	}
 
 	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+		DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r);
 		goto error;
 	}
 
 	r = dma_fence_wait_timeout(fence, false, timeout);
 	if (r == 0) {
-		DRM_ERROR("amdgpu: IB test timed out.\n");
+		DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance);
 		r = -ETIMEDOUT;
 	} else if (r < 0) {
-		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+		DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r);
 	} else {
-		DRM_DEBUG("ib test on ring %d succeeded\n",  ring->idx);
+		DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx);
 		r = 0;
 	}
 
@@ -1227,7 +1259,7 @@
 		 * necessarily linear. So we need to count
 		 * all non-zero handles.
 		 */
-		if (atomic_read(&adev->uvd.handles[i]))
+		if (atomic_read(&adev->uvd.inst->handles[i]))
 			used_handles++;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 32ea20b..b1579fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -31,30 +31,37 @@
 #define AMDGPU_UVD_SESSION_SIZE		(50*1024)
 #define AMDGPU_UVD_FIRMWARE_OFFSET	256
 
+#define AMDGPU_MAX_UVD_INSTANCES			2
+
 #define AMDGPU_UVD_FIRMWARE_SIZE(adev)    \
 	(AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(((const struct common_firmware_header *)(adev)->uvd.fw->data)->ucode_size_bytes) + \
 			       8) - AMDGPU_UVD_FIRMWARE_OFFSET)
 
-struct amdgpu_uvd {
+struct amdgpu_uvd_inst {
 	struct amdgpu_bo	*vcpu_bo;
 	void			*cpu_addr;
 	uint64_t		gpu_addr;
-	unsigned		fw_version;
 	void			*saved_bo;
-	unsigned		max_handles;
 	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
 	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
 	struct delayed_work	idle_work;
-	const struct firmware	*fw;	/* UVD firmware */
 	struct amdgpu_ring	ring;
 	struct amdgpu_ring	ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
 	struct amdgpu_irq_src	irq;
-	bool			address_64_bit;
-	bool			use_ctx_buf;
 	struct drm_sched_entity entity;
 	struct drm_sched_entity entity_enc;
 	uint32_t                srbm_soft_reset;
+};
+
+struct amdgpu_uvd {
+	const struct firmware	*fw;	/* UVD firmware */
+	unsigned		fw_version;
+	unsigned		max_handles;
 	unsigned		num_enc_rings;
+	uint8_t		num_uvd_inst;
+	bool			address_64_bit;
+	bool			use_ctx_buf;
+	struct amdgpu_uvd_inst		inst[AMDGPU_MAX_UVD_INSTANCES];
 };
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index a33804b..23d960e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -51,11 +51,13 @@
 #define FIRMWARE_FIJI		"amdgpu/fiji_vce.bin"
 #define FIRMWARE_STONEY		"amdgpu/stoney_vce.bin"
 #define FIRMWARE_POLARIS10	"amdgpu/polaris10_vce.bin"
-#define FIRMWARE_POLARIS11         "amdgpu/polaris11_vce.bin"
-#define FIRMWARE_POLARIS12         "amdgpu/polaris12_vce.bin"
+#define FIRMWARE_POLARIS11	"amdgpu/polaris11_vce.bin"
+#define FIRMWARE_POLARIS12	"amdgpu/polaris12_vce.bin"
+#define FIRMWARE_VEGAM		"amdgpu/vegam_vce.bin"
 
 #define FIRMWARE_VEGA10		"amdgpu/vega10_vce.bin"
 #define FIRMWARE_VEGA12		"amdgpu/vega12_vce.bin"
+#define FIRMWARE_VEGA20		"amdgpu/vega20_vce.bin"
 
 #ifdef CONFIG_DRM_AMDGPU_CIK
 MODULE_FIRMWARE(FIRMWARE_BONAIRE);
@@ -71,9 +73,11 @@
 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
 MODULE_FIRMWARE(FIRMWARE_POLARIS12);
+MODULE_FIRMWARE(FIRMWARE_VEGAM);
 
 MODULE_FIRMWARE(FIRMWARE_VEGA10);
 MODULE_FIRMWARE(FIRMWARE_VEGA12);
+MODULE_FIRMWARE(FIRMWARE_VEGA20);
 
 static void amdgpu_vce_idle_work_handler(struct work_struct *work);
 
@@ -132,12 +136,18 @@
 	case CHIP_POLARIS12:
 		fw_name = FIRMWARE_POLARIS12;
 		break;
+	case CHIP_VEGAM:
+		fw_name = FIRMWARE_VEGAM;
+		break;
 	case CHIP_VEGA10:
 		fw_name = FIRMWARE_VEGA10;
 		break;
 	case CHIP_VEGA12:
 		fw_name = FIRMWARE_VEGA12;
 		break;
+	case CHIP_VEGA20:
+		fw_name = FIRMWARE_VEGA20;
+		break;
 
 	default:
 		return -EINVAL;
@@ -181,7 +191,7 @@
 	ring = &adev->vce.ring[0];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
 	r = drm_sched_entity_init(&ring->sched, &adev->vce.entity,
-				  rq, amdgpu_sched_jobs, NULL);
+				  rq, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCE run queue.\n");
 		return r;
@@ -755,6 +765,18 @@
 			if (r)
 				goto out;
 			break;
+
+		case 0x0500000d: /* MV buffer */
+			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3,
+							idx + 2, 0, 0);
+			if (r)
+				goto out;
+
+			r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8,
+							idx + 7, 0, 0);
+			if (r)
+				goto out;
+			break;
 		}
 
 		idx += len / 4;
@@ -860,6 +882,18 @@
 				goto out;
 			break;
 
+		case 0x0500000d: /* MV buffer */
+			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3,
+							idx + 2, *size, 0);
+			if (r)
+				goto out;
+
+			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8,
+							idx + 7, *size / 12, 0);
+			if (r)
+				goto out;
+			break;
+
 		default:
 			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
 			r = -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 58e4953..8851bcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -105,7 +105,7 @@
 	ring = &adev->vcn.ring_dec;
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
 	r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
-				  rq, amdgpu_sched_jobs, NULL);
+				  rq, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCN dec run queue.\n");
 		return r;
@@ -114,7 +114,7 @@
 	ring = &adev->vcn.ring_enc[0];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
 	r = drm_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
-				  rq, amdgpu_sched_jobs, NULL);
+				  rq, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCN enc run queue.\n");
 		return r;
@@ -205,13 +205,18 @@
 	struct amdgpu_device *adev =
 		container_of(work, struct amdgpu_device, vcn.idle_work.work);
 	unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec);
+	unsigned i;
+
+	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+		fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
+	}
 
 	if (fences == 0) {
-		if (adev->pm.dpm_enabled) {
-			/* might be used when with pg/cg
+		if (adev->pm.dpm_enabled)
 			amdgpu_dpm_enable_uvd(adev, false);
-			*/
-		}
+		else
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+							       AMD_PG_STATE_GATE);
 	} else {
 		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
 	}
@@ -223,9 +228,11 @@
 	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
 
 	if (set_clocks && adev->pm.dpm_enabled) {
-		/* might be used when with pg/cg
-		amdgpu_dpm_enable_uvd(adev, true);
-		*/
+		if (adev->pm.dpm_enabled)
+			amdgpu_dpm_enable_uvd(adev, true);
+		else
+			amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
+							       AMD_PG_STATE_UNGATE);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 2fd7db8..181e6af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -45,6 +45,17 @@
 #define VCN_ENC_CMD_REG_WRITE		0x0000000b
 #define VCN_ENC_CMD_REG_WAIT		0x0000000c
 
+enum engine_status_constants {
+	UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON = 0x2AAAA0,
+	UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON = 0x00000002,
+	UVD_STATUS__UVD_BUSY = 0x00000004,
+	GB_ADDR_CONFIG_DEFAULT = 0x26010011,
+	UVD_STATUS__IDLE = 0x2,
+	UVD_STATUS__BUSY = 0x5,
+	UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF = 0x1,
+	UVD_STATUS__RBC_BUSY = 0x1,
+};
+
 struct amdgpu_vcn {
 	struct amdgpu_bo	*vcpu_bo;
 	void			*cpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index da55a78..ccba88c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -94,6 +94,34 @@
 	struct dma_fence_cb cb;
 };
 
+static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+				   struct amdgpu_vm *vm,
+				   struct amdgpu_bo *bo)
+{
+	base->vm = vm;
+	base->bo = bo;
+	INIT_LIST_HEAD(&base->bo_list);
+	INIT_LIST_HEAD(&base->vm_status);
+
+	if (!bo)
+		return;
+	list_add_tail(&base->bo_list, &bo->va);
+
+	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
+		return;
+
+	if (bo->preferred_domains &
+	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
+		return;
+
+	/*
+	 * we checked all the prerequisites, but it looks like this per vm bo
+	 * is currently evicted. add the bo to the evicted list to make sure it
+	 * is validated on next vm use to avoid fault.
+	 * */
+	list_move_tail(&base->vm_status, &vm->evicted);
+}
+
 /**
  * amdgpu_vm_level_shift - return the addr shift for each level
  *
@@ -196,24 +224,16 @@
 			      void *param)
 {
 	struct ttm_bo_global *glob = adev->mman.bdev.glob;
-	int r;
+	struct amdgpu_vm_bo_base *bo_base, *tmp;
+	int r = 0;
 
-	spin_lock(&vm->status_lock);
-	while (!list_empty(&vm->evicted)) {
-		struct amdgpu_vm_bo_base *bo_base;
-		struct amdgpu_bo *bo;
+	list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
+		struct amdgpu_bo *bo = bo_base->bo;
 
-		bo_base = list_first_entry(&vm->evicted,
-					   struct amdgpu_vm_bo_base,
-					   vm_status);
-		spin_unlock(&vm->status_lock);
-
-		bo = bo_base->bo;
-		BUG_ON(!bo);
 		if (bo->parent) {
 			r = validate(param, bo);
 			if (r)
-				return r;
+				break;
 
 			spin_lock(&glob->lru_lock);
 			ttm_bo_move_to_lru_tail(&bo->tbo);
@@ -222,22 +242,29 @@
 			spin_unlock(&glob->lru_lock);
 		}
 
-		if (bo->tbo.type == ttm_bo_type_kernel &&
-		    vm->use_cpu_for_update) {
-			r = amdgpu_bo_kmap(bo, NULL);
-			if (r)
-				return r;
-		}
-
-		spin_lock(&vm->status_lock);
-		if (bo->tbo.type != ttm_bo_type_kernel)
+		if (bo->tbo.type != ttm_bo_type_kernel) {
+			spin_lock(&vm->moved_lock);
 			list_move(&bo_base->vm_status, &vm->moved);
-		else
+			spin_unlock(&vm->moved_lock);
+		} else {
 			list_move(&bo_base->vm_status, &vm->relocated);
+		}
 	}
-	spin_unlock(&vm->status_lock);
 
-	return 0;
+	spin_lock(&glob->lru_lock);
+	list_for_each_entry(bo_base, &vm->idle, vm_status) {
+		struct amdgpu_bo *bo = bo_base->bo;
+
+		if (!bo->parent)
+			continue;
+
+		ttm_bo_move_to_lru_tail(&bo->tbo);
+		if (bo->shadow)
+			ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
+	}
+	spin_unlock(&glob->lru_lock);
+
+	return r;
 }
 
 /**
@@ -249,13 +276,7 @@
  */
 bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 {
-	bool ready;
-
-	spin_lock(&vm->status_lock);
-	ready = list_empty(&vm->evicted);
-	spin_unlock(&vm->status_lock);
-
-	return ready;
+	return list_empty(&vm->evicted);
 }
 
 /**
@@ -412,11 +433,16 @@
 		struct amdgpu_bo *pt;
 
 		if (!entry->base.bo) {
-			r = amdgpu_bo_create(adev,
-					     amdgpu_vm_bo_size(adev, level),
-					     AMDGPU_GPU_PAGE_SIZE,
-					     AMDGPU_GEM_DOMAIN_VRAM, flags,
-					     ttm_bo_type_kernel, resv, &pt);
+			struct amdgpu_bo_param bp;
+
+			memset(&bp, 0, sizeof(bp));
+			bp.size = amdgpu_vm_bo_size(adev, level);
+			bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+			bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+			bp.flags = flags;
+			bp.type = ttm_bo_type_kernel;
+			bp.resv = resv;
+			r = amdgpu_bo_create(adev, &bp, &pt);
 			if (r)
 				return r;
 
@@ -441,12 +467,8 @@
 			*/
 			pt->parent = amdgpu_bo_ref(parent->base.bo);
 
-			entry->base.vm = vm;
-			entry->base.bo = pt;
-			list_add_tail(&entry->base.bo_list, &pt->va);
-			spin_lock(&vm->status_lock);
-			list_add(&entry->base.vm_status, &vm->relocated);
-			spin_unlock(&vm->status_lock);
+			amdgpu_vm_bo_base_init(&entry->base, vm, pt);
+			list_move(&entry->base.vm_status, &vm->relocated);
 		}
 
 		if (level < AMDGPU_VM_PTB) {
@@ -628,7 +650,7 @@
 		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
 
 	if (vm_flush_needed || pasid_mapping_needed) {
-		r = amdgpu_fence_emit(ring, &fence);
+		r = amdgpu_fence_emit(ring, &fence, 0);
 		if (r)
 			return r;
 	}
@@ -893,10 +915,8 @@
 		if (!entry->base.bo)
 			continue;
 
-		spin_lock(&vm->status_lock);
-		if (list_empty(&entry->base.vm_status))
-			list_add(&entry->base.vm_status, &vm->relocated);
-		spin_unlock(&vm->status_lock);
+		if (!entry->base.moved)
+			list_move(&entry->base.vm_status, &vm->relocated);
 		amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
 	}
 }
@@ -926,6 +946,14 @@
 	params.adev = adev;
 
 	if (vm->use_cpu_for_update) {
+		struct amdgpu_vm_bo_base *bo_base;
+
+		list_for_each_entry(bo_base, &vm->relocated, vm_status) {
+			r = amdgpu_bo_kmap(bo_base->bo, NULL);
+			if (unlikely(r))
+				return r;
+		}
+
 		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
 		if (unlikely(r))
 			return r;
@@ -941,7 +969,6 @@
 		params.func = amdgpu_vm_do_set_ptes;
 	}
 
-	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->relocated)) {
 		struct amdgpu_vm_bo_base *bo_base, *parent;
 		struct amdgpu_vm_pt *pt, *entry;
@@ -950,14 +977,12 @@
 		bo_base = list_first_entry(&vm->relocated,
 					   struct amdgpu_vm_bo_base,
 					   vm_status);
-		list_del_init(&bo_base->vm_status);
-		spin_unlock(&vm->status_lock);
+		bo_base->moved = false;
+		list_move(&bo_base->vm_status, &vm->idle);
 
 		bo = bo_base->bo->parent;
-		if (!bo) {
-			spin_lock(&vm->status_lock);
+		if (!bo)
 			continue;
-		}
 
 		parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
 					  bo_list);
@@ -966,12 +991,10 @@
 
 		amdgpu_vm_update_pde(&params, vm, pt, entry);
 
-		spin_lock(&vm->status_lock);
 		if (!vm->use_cpu_for_update &&
 		    (ndw - params.ib->length_dw) < 32)
 			break;
 	}
-	spin_unlock(&vm->status_lock);
 
 	if (vm->use_cpu_for_update) {
 		/* Flush HDP */
@@ -1074,9 +1097,7 @@
 		if (entry->huge) {
 			/* Add the entry to the relocated list to update it. */
 			entry->huge = false;
-			spin_lock(&p->vm->status_lock);
 			list_move(&entry->base.vm_status, &p->vm->relocated);
-			spin_unlock(&p->vm->status_lock);
 		}
 		return;
 	}
@@ -1555,9 +1576,22 @@
 		amdgpu_asic_flush_hdp(adev, NULL);
 	}
 
-	spin_lock(&vm->status_lock);
+	spin_lock(&vm->moved_lock);
 	list_del_init(&bo_va->base.vm_status);
-	spin_unlock(&vm->status_lock);
+	spin_unlock(&vm->moved_lock);
+
+	/* If the BO is not in its preferred location add it back to
+	 * the evicted list so that it gets validated again on the
+	 * next command submission.
+	 */
+	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+		uint32_t mem_type = bo->tbo.mem.mem_type;
+
+		if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
+			list_add_tail(&bo_va->base.vm_status, &vm->evicted);
+		else
+			list_add(&bo_va->base.vm_status, &vm->idle);
+	}
 
 	list_splice_init(&bo_va->invalids, &bo_va->valids);
 	bo_va->cleared = clear;
@@ -1766,19 +1800,18 @@
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 			   struct amdgpu_vm *vm)
 {
+	struct amdgpu_bo_va *bo_va, *tmp;
+	struct list_head moved;
 	bool clear;
-	int r = 0;
+	int r;
 
-	spin_lock(&vm->status_lock);
-	while (!list_empty(&vm->moved)) {
-		struct amdgpu_bo_va *bo_va;
-		struct reservation_object *resv;
+	INIT_LIST_HEAD(&moved);
+	spin_lock(&vm->moved_lock);
+	list_splice_init(&vm->moved, &moved);
+	spin_unlock(&vm->moved_lock);
 
-		bo_va = list_first_entry(&vm->moved,
-			struct amdgpu_bo_va, base.vm_status);
-		spin_unlock(&vm->status_lock);
-
-		resv = bo_va->base.bo->tbo.resv;
+	list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) {
+		struct reservation_object *resv = bo_va->base.bo->tbo.resv;
 
 		/* Per VM BOs never need to bo cleared in the page tables */
 		if (resv == vm->root.base.bo->tbo.resv)
@@ -1791,17 +1824,19 @@
 			clear = true;
 
 		r = amdgpu_vm_bo_update(adev, bo_va, clear);
-		if (r)
+		if (r) {
+			spin_lock(&vm->moved_lock);
+			list_splice(&moved, &vm->moved);
+			spin_unlock(&vm->moved_lock);
 			return r;
+		}
 
 		if (!clear && resv != vm->root.base.bo->tbo.resv)
 			reservation_object_unlock(resv);
 
-		spin_lock(&vm->status_lock);
 	}
-	spin_unlock(&vm->status_lock);
 
-	return r;
+	return 0;
 }
 
 /**
@@ -1827,36 +1862,12 @@
 	if (bo_va == NULL) {
 		return NULL;
 	}
-	bo_va->base.vm = vm;
-	bo_va->base.bo = bo;
-	INIT_LIST_HEAD(&bo_va->base.bo_list);
-	INIT_LIST_HEAD(&bo_va->base.vm_status);
+	amdgpu_vm_bo_base_init(&bo_va->base, vm, bo);
 
 	bo_va->ref_count = 1;
 	INIT_LIST_HEAD(&bo_va->valids);
 	INIT_LIST_HEAD(&bo_va->invalids);
 
-	if (!bo)
-		return bo_va;
-
-	list_add_tail(&bo_va->base.bo_list, &bo->va);
-
-	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
-		return bo_va;
-
-	if (bo->preferred_domains &
-	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
-		return bo_va;
-
-	/*
-	 * We checked all the prerequisites, but it looks like this per VM BO
-	 * is currently evicted. add the BO to the evicted list to make sure it
-	 * is validated on next VM use to avoid fault.
-	 * */
-	spin_lock(&vm->status_lock);
-	list_move_tail(&bo_va->base.vm_status, &vm->evicted);
-	spin_unlock(&vm->status_lock);
-
 	return bo_va;
 }
 
@@ -1884,11 +1895,11 @@
 	if (mapping->flags & AMDGPU_PTE_PRT)
 		amdgpu_vm_prt_get(adev);
 
-	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
-		spin_lock(&vm->status_lock);
-		if (list_empty(&bo_va->base.vm_status))
-			list_add(&bo_va->base.vm_status, &vm->moved);
-		spin_unlock(&vm->status_lock);
+	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
+	    !bo_va->base.moved) {
+		spin_lock(&vm->moved_lock);
+		list_move(&bo_va->base.vm_status, &vm->moved);
+		spin_unlock(&vm->moved_lock);
 	}
 	trace_amdgpu_vm_bo_map(bo_va, mapping);
 }
@@ -2198,9 +2209,9 @@
 
 	list_del(&bo_va->base.bo_list);
 
-	spin_lock(&vm->status_lock);
+	spin_lock(&vm->moved_lock);
 	list_del(&bo_va->base.vm_status);
-	spin_unlock(&vm->status_lock);
+	spin_unlock(&vm->moved_lock);
 
 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
 		list_del(&mapping->list);
@@ -2234,33 +2245,34 @@
 {
 	struct amdgpu_vm_bo_base *bo_base;
 
+	/* shadow bo doesn't have bo base, its validation needs its parent */
+	if (bo->parent && bo->parent->shadow == bo)
+		bo = bo->parent;
+
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
 		struct amdgpu_vm *vm = bo_base->vm;
+		bool was_moved = bo_base->moved;
 
 		bo_base->moved = true;
 		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
-			spin_lock(&bo_base->vm->status_lock);
 			if (bo->tbo.type == ttm_bo_type_kernel)
 				list_move(&bo_base->vm_status, &vm->evicted);
 			else
 				list_move_tail(&bo_base->vm_status,
 					       &vm->evicted);
-			spin_unlock(&bo_base->vm->status_lock);
 			continue;
 		}
 
+		if (was_moved)
+			continue;
+
 		if (bo->tbo.type == ttm_bo_type_kernel) {
-			spin_lock(&bo_base->vm->status_lock);
-			if (list_empty(&bo_base->vm_status))
-				list_add(&bo_base->vm_status, &vm->relocated);
-			spin_unlock(&bo_base->vm->status_lock);
-			continue;
+			list_move(&bo_base->vm_status, &vm->relocated);
+		} else {
+			spin_lock(&bo_base->vm->moved_lock);
+			list_move(&bo_base->vm_status, &vm->moved);
+			spin_unlock(&bo_base->vm->moved_lock);
 		}
-
-		spin_lock(&bo_base->vm->status_lock);
-		if (list_empty(&bo_base->vm_status))
-			list_add(&bo_base->vm_status, &vm->moved);
-		spin_unlock(&bo_base->vm->status_lock);
 	}
 }
 
@@ -2355,6 +2367,8 @@
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		   int vm_context, unsigned int pasid)
 {
+	struct amdgpu_bo_param bp;
+	struct amdgpu_bo *root;
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
 	unsigned ring_instance;
@@ -2367,10 +2381,11 @@
 	vm->va = RB_ROOT_CACHED;
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		vm->reserved_vmid[i] = NULL;
-	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->evicted);
 	INIT_LIST_HEAD(&vm->relocated);
+	spin_lock_init(&vm->moved_lock);
 	INIT_LIST_HEAD(&vm->moved);
+	INIT_LIST_HEAD(&vm->idle);
 	INIT_LIST_HEAD(&vm->freed);
 
 	/* create scheduler entity for page table updates */
@@ -2380,7 +2395,7 @@
 	ring = adev->vm_manager.vm_pte_rings[ring_instance];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
 	r = drm_sched_entity_init(&ring->sched, &vm->entity,
-				  rq, amdgpu_sched_jobs, NULL);
+				  rq, NULL);
 	if (r)
 		return r;
 
@@ -2409,24 +2424,28 @@
 		flags |= AMDGPU_GEM_CREATE_SHADOW;
 
 	size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
-	r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
-			     ttm_bo_type_kernel, NULL, &vm->root.base.bo);
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = align;
+	bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	bp.flags = flags;
+	bp.type = ttm_bo_type_kernel;
+	bp.resv = NULL;
+	r = amdgpu_bo_create(adev, &bp, &root);
 	if (r)
 		goto error_free_sched_entity;
 
-	r = amdgpu_bo_reserve(vm->root.base.bo, true);
+	r = amdgpu_bo_reserve(root, true);
 	if (r)
 		goto error_free_root;
 
-	r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+	r = amdgpu_vm_clear_bo(adev, vm, root,
 			       adev->vm_manager.root_level,
 			       vm->pte_support_ats);
 	if (r)
 		goto error_unreserve;
 
-	vm->root.base.vm = vm;
-	list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
-	list_add_tail(&vm->root.base.vm_status, &vm->evicted);
+	amdgpu_vm_bo_base_init(&vm->root.base, vm, root);
 	amdgpu_bo_unreserve(vm->root.base.bo);
 
 	if (pasid) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 30f0803..061b99a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -75,11 +75,12 @@
 /* PDE Block Fragment Size for VEGA10 */
 #define AMDGPU_PDE_BFS(a)	((uint64_t)a << 59)
 
-/* VEGA10 only */
+
+/* For GFX9 */
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
 
-/* For Raven */
+#define AMDGPU_MTYPE_NC 0
 #define AMDGPU_MTYPE_CC 2
 
 #define AMDGPU_PTE_DEFAULT_ATC  (AMDGPU_PTE_SYSTEM      \
@@ -167,9 +168,6 @@
 	/* tree of virtual addresses mapped */
 	struct rb_root_cached	va;
 
-	/* protecting invalidated */
-	spinlock_t		status_lock;
-
 	/* BOs who needs a validation */
 	struct list_head	evicted;
 
@@ -178,6 +176,10 @@
 
 	/* BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
+	spinlock_t		moved_lock;
+
+	/* All BOs of this VM not currently in the state machine */
+	struct list_head	idle;
 
 	/* BO mappings freed, but not yet updated in the PT */
 	struct list_head	freed;
@@ -186,9 +188,6 @@
 	struct amdgpu_vm_pt     root;
 	struct dma_fence	*last_update;
 
-	/* protecting freed */
-	spinlock_t		freed_lock;
-
 	/* Scheduler entity for page table updates */
 	struct drm_sched_entity	entity;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 47ef3e6..a266dcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -5903,7 +5903,7 @@
 	pi->pcie_dpm_key_disabled = 0;
 	pi->thermal_sclk_dpm_enabled = 0;
 
-	if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
+	if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
 		pi->caps_sclk_ds = true;
 	else
 		pi->caps_sclk_ds = false;
@@ -6255,7 +6255,7 @@
 	int ret;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (!amdgpu_dpm)
+	if (!adev->pm.dpm_enabled)
 		return 0;
 
 	/* init the sysfs and debugfs files late */
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 0df2203..8ff4c60 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1735,6 +1735,12 @@
 	}
 }
 
+static bool cik_need_full_reset(struct amdgpu_device *adev)
+{
+	/* change this when we support soft reset */
+	return true;
+}
+
 static const struct amdgpu_asic_funcs cik_asic_funcs =
 {
 	.read_disabled_bios = &cik_read_disabled_bios,
@@ -1748,6 +1754,7 @@
 	.get_config_memsize = &cik_get_config_memsize,
 	.flush_hdp = &cik_flush_hdp,
 	.invalidate_hdp = &cik_invalidate_hdp,
+	.need_full_reset = &cik_need_full_reset,
 };
 
 static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 452f88e..ada241b 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1823,7 +1823,6 @@
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_framebuffer *amdgpu_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_gem_object *obj;
 	struct amdgpu_bo *abo;
@@ -1842,18 +1841,15 @@
 		return 0;
 	}
 
-	if (atomic) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
+	if (atomic)
 		target_fb = fb;
-	} else {
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+	else
 		target_fb = crtc->primary->fb;
-	}
 
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	 * just update base pointers
 	 */
-	obj = amdgpu_fb->obj;
+	obj = target_fb->obj[0];
 	abo = gem_to_amdgpu_bo(obj);
 	r = amdgpu_bo_reserve(abo, false);
 	if (unlikely(r != 0))
@@ -2043,8 +2039,7 @@
 	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 			return r;
@@ -2526,11 +2521,9 @@
 	dce_v10_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
 		int r;
-		struct amdgpu_framebuffer *amdgpu_fb;
 		struct amdgpu_bo *abo;
 
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index a7c1c58..a5b96ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -173,6 +173,7 @@
 							ARRAY_SIZE(polaris11_golden_settings_a11));
 		break;
 	case CHIP_POLARIS10:
+	case CHIP_VEGAM:
 		amdgpu_device_program_register_sequence(adev,
 							polaris10_golden_settings_a11,
 							ARRAY_SIZE(polaris10_golden_settings_a11));
@@ -473,6 +474,7 @@
 		num_crtc = 2;
 		break;
 	case CHIP_POLARIS10:
+	case CHIP_VEGAM:
 		num_crtc = 6;
 		break;
 	case CHIP_POLARIS11:
@@ -1445,6 +1447,7 @@
 		adev->mode_info.audio.num_pins = 7;
 		break;
 	case CHIP_POLARIS10:
+	case CHIP_VEGAM:
 		adev->mode_info.audio.num_pins = 8;
 		break;
 	case CHIP_POLARIS11:
@@ -1862,7 +1865,6 @@
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_framebuffer *amdgpu_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_gem_object *obj;
 	struct amdgpu_bo *abo;
@@ -1881,18 +1883,15 @@
 		return 0;
 	}
 
-	if (atomic) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
+	if (atomic)
 		target_fb = fb;
-	} else {
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+	else
 		target_fb = crtc->primary->fb;
-	}
 
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	 * just update base pointers
 	 */
-	obj = amdgpu_fb->obj;
+	obj = target_fb->obj[0];
 	abo = gem_to_amdgpu_bo(obj);
 	r = amdgpu_bo_reserve(abo, false);
 	if (unlikely(r != 0))
@@ -2082,8 +2081,7 @@
 	WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 			return r;
@@ -2253,7 +2251,8 @@
 
 	if ((adev->asic_type == CHIP_POLARIS10) ||
 	    (adev->asic_type == CHIP_POLARIS11) ||
-	    (adev->asic_type == CHIP_POLARIS12)) {
+	    (adev->asic_type == CHIP_POLARIS12) ||
+	    (adev->asic_type == CHIP_VEGAM)) {
 		struct amdgpu_encoder *amdgpu_encoder =
 			to_amdgpu_encoder(amdgpu_crtc->encoder);
 		struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
@@ -2601,11 +2600,9 @@
 	dce_v11_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
 		int r;
-		struct amdgpu_framebuffer *amdgpu_fb;
 		struct amdgpu_bo *abo;
 
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
@@ -2673,7 +2670,8 @@
 
 	if ((adev->asic_type == CHIP_POLARIS10) ||
 	    (adev->asic_type == CHIP_POLARIS11) ||
-	    (adev->asic_type == CHIP_POLARIS12)) {
+	    (adev->asic_type == CHIP_POLARIS12) ||
+	    (adev->asic_type == CHIP_VEGAM)) {
 		struct amdgpu_encoder *amdgpu_encoder =
 			to_amdgpu_encoder(amdgpu_crtc->encoder);
 		int encoder_mode =
@@ -2830,6 +2828,7 @@
 		adev->mode_info.num_dig = 9;
 		break;
 	case CHIP_POLARIS10:
+	case CHIP_VEGAM:
 		adev->mode_info.num_hpd = 6;
 		adev->mode_info.num_dig = 6;
 		break;
@@ -2949,7 +2948,8 @@
 	amdgpu_atombios_encoder_init_dig(adev);
 	if ((adev->asic_type == CHIP_POLARIS10) ||
 	    (adev->asic_type == CHIP_POLARIS11) ||
-	    (adev->asic_type == CHIP_POLARIS12)) {
+	    (adev->asic_type == CHIP_POLARIS12) ||
+	    (adev->asic_type == CHIP_VEGAM)) {
 		amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk,
 						   DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS);
 		amdgpu_atombios_crtc_set_dce_clock(adev, 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 9f67b7f..394cc1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -1780,7 +1780,6 @@
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_framebuffer *amdgpu_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_gem_object *obj;
 	struct amdgpu_bo *abo;
@@ -1798,18 +1797,15 @@
 		return 0;
 	}
 
-	if (atomic) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
+	if (atomic)
 		target_fb = fb;
-	} else {
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+	else
 		target_fb = crtc->primary->fb;
-	}
 
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	 * just update base pointers
 	 */
-	obj = amdgpu_fb->obj;
+	obj = target_fb->obj[0];
 	abo = gem_to_amdgpu_bo(obj);
 	r = amdgpu_bo_reserve(abo, false);
 	if (unlikely(r != 0))
@@ -1978,8 +1974,7 @@
 	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 			return r;
@@ -2414,11 +2409,9 @@
 	dce_v6_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
 		int r;
-		struct amdgpu_framebuffer *amdgpu_fb;
 		struct amdgpu_bo *abo;
 
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index f55422c..c9b9ab8 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1754,7 +1754,6 @@
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_framebuffer *amdgpu_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_gem_object *obj;
 	struct amdgpu_bo *abo;
@@ -1773,18 +1772,15 @@
 		return 0;
 	}
 
-	if (atomic) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
+	if (atomic)
 		target_fb = fb;
-	} else {
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
+	else
 		target_fb = crtc->primary->fb;
-	}
 
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	 * just update base pointers
 	 */
-	obj = amdgpu_fb->obj;
+	obj = target_fb->obj[0];
 	abo = gem_to_amdgpu_bo(obj);
 	r = amdgpu_bo_reserve(abo, false);
 	if (unlikely(r != 0))
@@ -1955,8 +1951,7 @@
 	WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 0);
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
-		amdgpu_fb = to_amdgpu_framebuffer(fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r != 0))
 			return r;
@@ -2430,11 +2425,9 @@
 	dce_v8_0_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
 		int r;
-		struct amdgpu_framebuffer *amdgpu_fb;
 		struct amdgpu_bo *abo;
 
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index b51f05d..dbf2ccd 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -168,11 +168,9 @@
 	dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
 		int r;
-		struct amdgpu_framebuffer *amdgpu_fb;
 		struct amdgpu_bo *abo;
 
-		amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
-		abo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve abo before unpin\n");
@@ -329,7 +327,7 @@
 	return 0;
 }
 
-static int dce_virtual_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status dce_virtual_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	return MODE_OK;
@@ -462,8 +460,9 @@
 		break;
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
-	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_VEGAM:
 		dce_v11_0_disable_dce(adev);
 		break;
 	case CHIP_TOPAZ:
@@ -474,6 +473,7 @@
 		break;
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 		break;
 	default:
 		DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
new file mode 100644
index 0000000..9935371
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v1_7.h"
+
+#include "df/df_1_7_default.h"
+#include "df/df_1_7_offset.h"
+#include "df/df_1_7_sh_mask.h"
+
+static u32 df_v1_7_channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
+
+static void df_v1_7_init (struct amdgpu_device *adev)
+{
+}
+
+static void df_v1_7_enable_broadcast_mode(struct amdgpu_device *adev,
+                                          bool enable)
+{
+	u32 tmp;
+
+	if (enable) {
+		tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
+		tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
+		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
+	} else
+		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
+			     mmFabricConfigAccessControl_DEFAULT);
+}
+
+static u32 df_v1_7_get_fb_channel_number(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
+	tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+	tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+
+	return tmp;
+}
+
+static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
+{
+	int fb_channel_number;
+
+	fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+
+	return df_v1_7_channel_number[fb_channel_number];
+}
+
+static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						     bool enable)
+{
+	u32 tmp;
+
+	/* Put DF on broadcast mode */
+	adev->df_funcs->enable_broadcast_mode(adev, true);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
+		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+		tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+		tmp |= DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY;
+		WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+	} else {
+		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+		tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+		tmp |= DF_V1_7_MGCG_DISABLE;
+		WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+	}
+
+	/* Exit boradcast mode */
+	adev->df_funcs->enable_broadcast_mode(adev, false);
+}
+
+static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
+					  u32 *flags)
+{
+	u32 tmp;
+
+	/* AMD_CG_SUPPORT_DF_MGCG */
+	tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+	if (tmp & DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY)
+		*flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
+static void df_v1_7_enable_ecc_force_par_wr_rmw(struct amdgpu_device *adev,
+						bool enable)
+{
+	WREG32_FIELD15(DF, 0, DF_CS_AON0_CoherentSlaveModeCtrlA0,
+		       ForceParWrRMW, enable);
+}
+
+const struct amdgpu_df_funcs df_v1_7_funcs = {
+	.init = df_v1_7_init,
+	.enable_broadcast_mode = df_v1_7_enable_broadcast_mode,
+	.get_fb_channel_number = df_v1_7_get_fb_channel_number,
+	.get_hbm_channel_number = df_v1_7_get_hbm_channel_number,
+	.update_medium_grain_clock_gating = df_v1_7_update_medium_grain_clock_gating,
+	.get_clockgating_state = df_v1_7_get_clockgating_state,
+	.enable_ecc_force_par_wr_rmw = df_v1_7_enable_ecc_force_par_wr_rmw,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.h b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h
new file mode 100644
index 0000000..7462110
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V1_7_H__
+#define __DF_V1_7_H__
+
+#include "soc15_common.h"
+enum DF_V1_7_MGCG
+{
+	DF_V1_7_MGCG_DISABLE = 0,
+	DF_V1_7_MGCG_ENABLE_00_CYCLE_DELAY =1,
+	DF_V1_7_MGCG_ENABLE_01_CYCLE_DELAY =2,
+	DF_V1_7_MGCG_ENABLE_15_CYCLE_DELAY =13,
+	DF_V1_7_MGCG_ENABLE_31_CYCLE_DELAY =14,
+	DF_V1_7_MGCG_ENABLE_63_CYCLE_DELAY =15
+};
+
+extern const struct amdgpu_df_funcs df_v1_7_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
new file mode 100644
index 0000000..60608b3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v3_6.h"
+
+#include "df/df_3_6_default.h"
+#include "df/df_3_6_offset.h"
+#include "df/df_3_6_sh_mask.h"
+
+static u32 df_v3_6_channel_number[] = {1, 2, 0, 4, 0, 8, 0,
+				       16, 32, 0, 0, 0, 2, 4, 8};
+
+static void df_v3_6_init(struct amdgpu_device *adev)
+{
+}
+
+static void df_v3_6_enable_broadcast_mode(struct amdgpu_device *adev,
+					  bool enable)
+{
+	u32 tmp;
+
+	if (enable) {
+		tmp = RREG32_SOC15(DF, 0, mmFabricConfigAccessControl);
+		tmp &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
+		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl, tmp);
+	} else
+		WREG32_SOC15(DF, 0, mmFabricConfigAccessControl,
+			     mmFabricConfigAccessControl_DEFAULT);
+}
+
+static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
+	tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+	tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
+
+	return tmp;
+}
+
+static u32 df_v3_6_get_hbm_channel_number(struct amdgpu_device *adev)
+{
+	int fb_channel_number;
+
+	fb_channel_number = adev->df_funcs->get_fb_channel_number(adev);
+	if (fb_channel_number > ARRAY_SIZE(df_v3_6_channel_number))
+		fb_channel_number = 0;
+
+	return df_v3_6_channel_number[fb_channel_number];
+}
+
+static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
+						     bool enable)
+{
+	u32 tmp;
+
+	/* Put DF on broadcast mode */
+	adev->df_funcs->enable_broadcast_mode(adev, true);
+
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
+		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+		tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+		tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
+		WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+	} else {
+		tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+		tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+		tmp |= DF_V3_6_MGCG_DISABLE;
+		WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+	}
+
+	/* Exit broadcast mode */
+	adev->df_funcs->enable_broadcast_mode(adev, false);
+}
+
+static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
+					  u32 *flags)
+{
+	u32 tmp;
+
+	/* AMD_CG_SUPPORT_DF_MGCG */
+	tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
+	if (tmp & DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY)
+		*flags |= AMD_CG_SUPPORT_DF_MGCG;
+}
+
+const struct amdgpu_df_funcs df_v3_6_funcs = {
+	.init = df_v3_6_init,
+	.enable_broadcast_mode = df_v3_6_enable_broadcast_mode,
+	.get_fb_channel_number = df_v3_6_get_fb_channel_number,
+	.get_hbm_channel_number = df_v3_6_get_hbm_channel_number,
+	.update_medium_grain_clock_gating =
+			df_v3_6_update_medium_grain_clock_gating,
+	.get_clockgating_state = df_v3_6_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.h b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
new file mode 100644
index 0000000..e79c58e
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DF_V3_6_H__
+#define __DF_V3_6_H__
+
+#include "soc15_common.h"
+
+enum DF_V3_6_MGCG {
+	DF_V3_6_MGCG_DISABLE = 0,
+	DF_V3_6_MGCG_ENABLE_00_CYCLE_DELAY = 1,
+	DF_V3_6_MGCG_ENABLE_01_CYCLE_DELAY = 2,
+	DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY = 13,
+	DF_V3_6_MGCG_ENABLE_31_CYCLE_DELAY = 14,
+	DF_V3_6_MGCG_ENABLE_63_CYCLE_DELAY = 15
+};
+
+extern const struct amdgpu_df_funcs df_v3_6_funcs;
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index e14263f..818874b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -125,18 +125,6 @@
 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
 
-MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
-MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
-
 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
@@ -149,6 +137,18 @@
 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
 
+MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
+
 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
@@ -161,6 +161,13 @@
 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
 
+MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
+MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vegam_me.bin");
+MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
+MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
+
 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 {
 	{mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
@@ -292,6 +299,37 @@
 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 };
 
+static const u32 golden_settings_vegam_a11[] =
+{
+	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
+	mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
+	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
+	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
+	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
+	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
+	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
+	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
+	mmSQ_CONFIG, 0x07f80000, 0x01180000,
+	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
+	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
+	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
+	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
+};
+
+static const u32 vegam_golden_common_all[] =
+{
+	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
+	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
+	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
+	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
+	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
+};
+
 static const u32 golden_settings_polaris11_a11[] =
 {
 	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
@@ -712,6 +750,14 @@
 							tonga_golden_common_all,
 							ARRAY_SIZE(tonga_golden_common_all));
 		break;
+	case CHIP_VEGAM:
+		amdgpu_device_program_register_sequence(adev,
+							golden_settings_vegam_a11,
+							ARRAY_SIZE(golden_settings_vegam_a11));
+		amdgpu_device_program_register_sequence(adev,
+							vegam_golden_common_all,
+							ARRAY_SIZE(vegam_golden_common_all));
+		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 		amdgpu_device_program_register_sequence(adev,
@@ -918,17 +964,20 @@
 	case CHIP_FIJI:
 		chip_name = "fiji";
 		break;
-	case CHIP_POLARIS11:
-		chip_name = "polaris11";
+	case CHIP_STONEY:
+		chip_name = "stoney";
 		break;
 	case CHIP_POLARIS10:
 		chip_name = "polaris10";
 		break;
+	case CHIP_POLARIS11:
+		chip_name = "polaris11";
+		break;
 	case CHIP_POLARIS12:
 		chip_name = "polaris12";
 		break;
-	case CHIP_STONEY:
-		chip_name = "stoney";
+	case CHIP_VEGAM:
+		chip_name = "vegam";
 		break;
 	default:
 		BUG();
@@ -1770,6 +1819,7 @@
 		gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
 		break;
 	case CHIP_POLARIS10:
+	case CHIP_VEGAM:
 		ret = amdgpu_atombios_get_gfx_info(adev);
 		if (ret)
 			return ret;
@@ -1957,12 +2007,13 @@
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	switch (adev->asic_type) {
-	case CHIP_FIJI:
 	case CHIP_TONGA:
+	case CHIP_CARRIZO:
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
-	case CHIP_POLARIS10:
-	case CHIP_CARRIZO:
+	case CHIP_VEGAM:
 		adev->gfx.mec.num_mec = 2;
 		break;
 	case CHIP_TOPAZ:
@@ -2323,6 +2374,7 @@
 
 		break;
 	case CHIP_FIJI:
+	case CHIP_VEGAM:
 		modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
 				PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
 				TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
@@ -3504,6 +3556,7 @@
 {
 	switch (adev->asic_type) {
 	case CHIP_FIJI:
+	case CHIP_VEGAM:
 		*rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
 			  RB_XSEL2(1) | PKR_MAP(2) |
 			  PKR_XSEL(1) | PKR_YSEL(1) |
@@ -4071,7 +4124,8 @@
 		gfx_v8_0_init_power_gating(adev);
 		WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
 	} else if ((adev->asic_type == CHIP_POLARIS11) ||
-		   (adev->asic_type == CHIP_POLARIS12)) {
+		   (adev->asic_type == CHIP_POLARIS12) ||
+		   (adev->asic_type == CHIP_VEGAM)) {
 		gfx_v8_0_init_csb(adev);
 		gfx_v8_0_init_save_restore_list(adev);
 		gfx_v8_0_enable_save_restore_machine(adev);
@@ -4146,7 +4200,8 @@
 	WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
 	if (adev->asic_type == CHIP_POLARIS11 ||
 	    adev->asic_type == CHIP_POLARIS10 ||
-	    adev->asic_type == CHIP_POLARIS12) {
+	    adev->asic_type == CHIP_POLARIS12 ||
+	    adev->asic_type == CHIP_VEGAM) {
 		tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
 		tmp &= ~0x3;
 		WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
@@ -5498,7 +5553,8 @@
 						       bool enable)
 {
 	if ((adev->asic_type == CHIP_POLARIS11) ||
-	    (adev->asic_type == CHIP_POLARIS12))
+	    (adev->asic_type == CHIP_POLARIS12) ||
+	    (adev->asic_type == CHIP_VEGAM))
 		/* Send msg to SMU via Powerplay */
 		amdgpu_device_ip_set_powergating_state(adev,
 						       AMD_IP_BLOCK_TYPE_SMC,
@@ -5588,6 +5644,7 @@
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
 			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
 		else
@@ -6154,6 +6211,7 @@
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9d39fd5..d7530fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -27,6 +27,7 @@
 #include "amdgpu_gfx.h"
 #include "soc15.h"
 #include "soc15d.h"
+#include "amdgpu_atomfirmware.h"
 
 #include "gc/gc_9_0_offset.h"
 #include "gc/gc_9_0_sh_mask.h"
@@ -41,7 +42,6 @@
 #define GFX9_MEC_HPD_SIZE 2048
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
-#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34
 
 #define mmPWR_MISC_CNTL_STATUS					0x0183
 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
@@ -64,6 +64,13 @@
 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
 
+MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega20_me.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
+
 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
 MODULE_FIRMWARE("amdgpu/raven_me.bin");
@@ -73,29 +80,22 @@
 
 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
 {
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
 };
 
 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
@@ -109,6 +109,20 @@
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800)
 };
 
+static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
+{
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
+};
+
 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
 {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
@@ -185,6 +199,30 @@
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
 };
 
+static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
+{
+	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
+};
+
+static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
+{
+	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
+};
+
 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
@@ -218,6 +256,14 @@
 						golden_settings_gc_9_2_1_vg12,
 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
 		break;
+	case CHIP_VEGA20:
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_9_0,
+						ARRAY_SIZE(golden_settings_gc_9_0));
+		soc15_program_register_sequence(adev,
+						golden_settings_gc_9_0_vg20,
+						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
+		break;
 	case CHIP_RAVEN:
 		soc15_program_register_sequence(adev,
 						 golden_settings_gc_9_1,
@@ -401,6 +447,27 @@
 	kfree(adev->gfx.rlc.register_list_format);
 }
 
+static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
+{
+	const struct rlc_firmware_header_v2_1 *rlc_hdr;
+
+	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
+	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
+	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
+	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
+	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
+	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
+	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
+	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
+	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
+	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
+	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
+	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
+	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
+	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
+			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
+}
+
 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 {
 	const char *chip_name;
@@ -412,6 +479,8 @@
 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
 	unsigned int *tmp = NULL;
 	unsigned int i = 0;
+	uint16_t version_major;
+	uint16_t version_minor;
 
 	DRM_DEBUG("\n");
 
@@ -422,6 +491,9 @@
 	case CHIP_VEGA12:
 		chip_name = "vega12";
 		break;
+	case CHIP_VEGA20:
+		chip_name = "vega20";
+		break;
 	case CHIP_RAVEN:
 		chip_name = "raven";
 		break;
@@ -468,6 +540,12 @@
 		goto out;
 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
+
+	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
+	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
+	if (version_major == 2 && version_minor == 1)
+		adev->gfx.rlc.is_rlc_v2_1 = true;
+
 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
 	adev->gfx.rlc.save_and_restore_offset =
@@ -508,6 +586,9 @@
 	for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
 
+	if (adev->gfx.rlc.is_rlc_v2_1)
+		gfx_v9_0_init_rlc_ext_microcode(adev);
+
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
 	if (err)
@@ -566,6 +647,26 @@
 		adev->firmware.fw_size +=
 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
 
+		if (adev->gfx.rlc.is_rlc_v2_1) {
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
+
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
+
+			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
+			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
+			info->fw = adev->gfx.rlc_fw;
+			adev->firmware.fw_size +=
+				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
+		}
+
 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
 		info->fw = adev->gfx.mec_fw;
@@ -1013,9 +1114,10 @@
 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
 };
 
-static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
+static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 {
 	u32 gb_addr_config;
+	int err;
 
 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
 
@@ -1037,6 +1139,20 @@
 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
 		DRM_INFO("fix gfx.config for vega12\n");
 		break;
+	case CHIP_VEGA20:
+		adev->gfx.config.max_hw_contexts = 8;
+		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
+		gb_addr_config &= ~0xf3e777ff;
+		gb_addr_config |= 0x22014042;
+		/* check vbios table if gpu info is not available */
+		err = amdgpu_atomfirmware_get_gfx_info(adev);
+		if (err)
+			return err;
+		break;
 	case CHIP_RAVEN:
 		adev->gfx.config.max_hw_contexts = 8;
 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
@@ -1086,6 +1202,8 @@
 					adev->gfx.config.gb_addr_config,
 					GB_ADDR_CONFIG,
 					PIPE_INTERLEAVE_SIZE));
+
+	return 0;
 }
 
 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
@@ -1319,6 +1437,7 @@
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 		adev->gfx.mec.num_mec = 2;
 		break;
@@ -1446,7 +1565,9 @@
 
 	adev->gfx.ce_ram_size = 0x8000;
 
-	gfx_v9_0_gpu_early_init(adev);
+	r = gfx_v9_0_gpu_early_init(adev);
+	if (r)
+		return r;
 
 	r = gfx_v9_0_ngg_init(adev);
 	if (r)
@@ -1600,6 +1721,7 @@
 
 	gfx_v9_0_setup_rb(adev);
 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
+	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
 
 	/* XXX SH_MEM regs */
 	/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -1616,7 +1738,10 @@
 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
 			WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
-			tmp = adev->gmc.shared_aperture_start >> 48;
+			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
+				(adev->gmc.private_aperture_start >> 48));
+			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
+				(adev->gmc.shared_aperture_start >> 48));
 			WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
 		}
 	}
@@ -1708,55 +1833,42 @@
 			adev->gfx.rlc.clear_state_size);
 }
 
-static void gfx_v9_0_parse_ind_reg_list(int *register_list_format,
+static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
 				int indirect_offset,
 				int list_size,
 				int *unique_indirect_regs,
 				int *unique_indirect_reg_count,
-				int max_indirect_reg_count,
 				int *indirect_start_offsets,
-				int *indirect_start_offsets_count,
-				int max_indirect_start_offsets_count)
+				int *indirect_start_offsets_count)
 {
 	int idx;
-	bool new_entry = true;
 
 	for (; indirect_offset < list_size; indirect_offset++) {
+		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
+		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
 
-		if (new_entry) {
-			new_entry = false;
-			indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
-			*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
-			BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count);
+		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
+			indirect_offset += 2;
+
+			/* look for the matching indice */
+			for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
+				if (unique_indirect_regs[idx] ==
+					register_list_format[indirect_offset] ||
+					!unique_indirect_regs[idx])
+					break;
+			}
+
+			BUG_ON(idx >= *unique_indirect_reg_count);
+
+			if (!unique_indirect_regs[idx])
+				unique_indirect_regs[idx] = register_list_format[indirect_offset];
+
+			indirect_offset++;
 		}
-
-		if (register_list_format[indirect_offset] == 0xFFFFFFFF) {
-			new_entry = true;
-			continue;
-		}
-
-		indirect_offset += 2;
-
-		/* look for the matching indice */
-		for (idx = 0; idx < *unique_indirect_reg_count; idx++) {
-			if (unique_indirect_regs[idx] ==
-				register_list_format[indirect_offset])
-				break;
-		}
-
-		if (idx >= *unique_indirect_reg_count) {
-			unique_indirect_regs[*unique_indirect_reg_count] =
-				register_list_format[indirect_offset];
-			idx = *unique_indirect_reg_count;
-			*unique_indirect_reg_count = *unique_indirect_reg_count + 1;
-			BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count);
-		}
-
-		register_list_format[indirect_offset] = idx;
 	}
 }
 
-static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev)
+static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
 {
 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
 	int unique_indirect_reg_count = 0;
@@ -1765,7 +1877,7 @@
 	int indirect_start_offsets_count = 0;
 
 	int list_size = 0;
-	int i = 0;
+	int i = 0, j = 0;
 	u32 tmp = 0;
 
 	u32 *register_list_format =
@@ -1776,15 +1888,14 @@
 		adev->gfx.rlc.reg_list_format_size_bytes);
 
 	/* setup unique_indirect_regs array and indirect_start_offsets array */
-	gfx_v9_0_parse_ind_reg_list(register_list_format,
-				GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH,
-				adev->gfx.rlc.reg_list_format_size_bytes >> 2,
-				unique_indirect_regs,
-				&unique_indirect_reg_count,
-				ARRAY_SIZE(unique_indirect_regs),
-				indirect_start_offsets,
-				&indirect_start_offsets_count,
-				ARRAY_SIZE(indirect_start_offsets));
+	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
+	gfx_v9_1_parse_ind_reg_list(register_list_format,
+				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
+				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
+				    unique_indirect_regs,
+				    &unique_indirect_reg_count,
+				    indirect_start_offsets,
+				    &indirect_start_offsets_count);
 
 	/* enable auto inc in case it is disabled */
 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
@@ -1798,19 +1909,37 @@
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
 			adev->gfx.rlc.register_restore[i]);
 
-	/* load direct register */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0);
-	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
-			adev->gfx.rlc.register_restore[i]);
-
 	/* load indirect register */
 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
 		adev->gfx.rlc.reg_list_format_start);
-	for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
+
+	/* direct register portion */
+	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
 			register_list_format[i]);
 
+	/* indirect register portion */
+	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
+		if (register_list_format[i] == 0xFFFFFFFF) {
+			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+			continue;
+		}
+
+		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
+
+		for (j = 0; j < unique_indirect_reg_count; j++) {
+			if (register_list_format[i] == unique_indirect_regs[j]) {
+				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
+				break;
+			}
+		}
+
+		BUG_ON(j >= unique_indirect_reg_count);
+
+		i++;
+	}
+
 	/* set save/restore list size */
 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
 	list_size = list_size >> 1;
@@ -1823,14 +1952,19 @@
 		adev->gfx.rlc.starting_offsets_start);
 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
-			indirect_start_offsets[i]);
+		       indirect_start_offsets[i]);
 
 	/* load unique indirect regs*/
 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i,
-			unique_indirect_regs[i] & 0x3FFFF);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i,
-			unique_indirect_regs[i] >> 20);
+		if (unique_indirect_regs[i] != 0) {
+			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
+			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
+			       unique_indirect_regs[i] & 0x3FFFF);
+
+			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
+			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
+			       unique_indirect_regs[i] >> 20);
+		}
 	}
 
 	kfree(register_list_format);
@@ -2010,6 +2144,9 @@
 
 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
 {
+	if (!adev->gfx.rlc.is_rlc_v2_1)
+		return;
+
 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
 			      AMD_PG_SUPPORT_GFX_SMG |
 			      AMD_PG_SUPPORT_GFX_DMG |
@@ -2017,27 +2154,12 @@
 			      AMD_PG_SUPPORT_GDS |
 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
 		gfx_v9_0_init_csb(adev);
-		gfx_v9_0_init_rlc_save_restore_list(adev);
+		gfx_v9_1_init_rlc_save_restore_list(adev);
 		gfx_v9_0_enable_save_restore_machine(adev);
 
-		if (adev->asic_type == CHIP_RAVEN) {
-			WREG32(mmRLC_JUMP_TABLE_RESTORE,
-				adev->gfx.rlc.cp_table_gpu_addr >> 8);
-			gfx_v9_0_init_gfx_power_gating(adev);
-
-			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
-				gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
-				gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
-			} else {
-				gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
-				gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
-			}
-
-			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
-				gfx_v9_0_enable_cp_power_gating(adev, true);
-			else
-				gfx_v9_0_enable_cp_power_gating(adev, false);
-		}
+		WREG32(mmRLC_JUMP_TABLE_RESTORE,
+		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
+		gfx_v9_0_init_gfx_power_gating(adev);
 	}
 }
 
@@ -3061,6 +3183,9 @@
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int i;
 
+	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+					       AMD_PG_STATE_UNGATE);
+
 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
 
@@ -3279,6 +3404,11 @@
 	if (r)
 		return r;
 
+	r = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX,
+						   AMD_PG_STATE_GATE);
+	if (r)
+		return r;
+
 	return 0;
 }
 
@@ -3339,8 +3469,7 @@
 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
 						bool enable)
 {
-	/* TODO: double check if we need to perform under safe mdoe */
-	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
+	gfx_v9_0_enter_rlc_safe_mode(adev);
 
 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
@@ -3351,7 +3480,7 @@
 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
 	}
 
-	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
+	gfx_v9_0_exit_rlc_safe_mode(adev);
 }
 
 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
@@ -3605,6 +3734,7 @@
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 		gfx_v9_0_update_gfx_clock_gating(adev,
 						 state == AMD_CG_STATE_GATE ? true : false);
@@ -3742,7 +3872,7 @@
 	}
 
 	amdgpu_ring_write(ring, header);
-BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 		(2 << 0) |
@@ -3774,13 +3904,16 @@
 {
 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
+	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
 
 	/* RELEASE_MEM - flush caches, send int */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
-				 EOP_TC_ACTION_EN |
-				 EOP_TC_WB_ACTION_EN |
-				 EOP_TC_MD_ACTION_EN |
+	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
+					       EOP_TC_NC_ACTION_EN) :
+					      (EOP_TCL1_ACTION_EN |
+					       EOP_TC_ACTION_EN |
+					       EOP_TC_WB_ACTION_EN |
+					       EOP_TC_MD_ACTION_EN)) |
 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
 				 EVENT_INDEX(5)));
 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
@@ -4137,6 +4270,20 @@
 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
 }
 
+static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						  uint32_t reg0, uint32_t reg1,
+						  uint32_t ref, uint32_t mask)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+	if (amdgpu_sriov_vf(ring->adev))
+		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+				      ref, mask, 0x20);
+	else
+		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+							   ref, mask);
+}
+
 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 {
@@ -4458,6 +4605,7 @@
 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -4492,6 +4640,7 @@
 	.set_priority = gfx_v9_0_ring_set_priority_compute,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -4522,6 +4671,7 @@
 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
 };
 
 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -4577,6 +4727,7 @@
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
 		break;
@@ -4686,6 +4837,7 @@
 
 	cu_info->number = active_cu_number;
 	cu_info->ao_cu_mask = ao_cu_mask;
+	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 5617cf6..79f9ac2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -819,12 +819,33 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	amdgpu_bo_late_init(adev);
+
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
 		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 	else
 		return 0;
 }
 
+static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+	u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+	unsigned size;
+
+	if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+		size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+	} else {
+		u32 viewport = RREG32(mmVIEWPORT_SIZE);
+		size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+			REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+			4);
+	}
+	/* return 0 if the pre-OS buffer uses up most of vram */
+	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+		return 0;
+	return size;
+}
+
 static int gmc_v6_0_sw_init(void *handle)
 {
 	int r;
@@ -851,8 +872,6 @@
 
 	adev->gmc.mc_mask = 0xffffffffffULL;
 
-	adev->gmc.stolen_size = 256 * 1024;
-
 	adev->need_dma32 = false;
 	dma_bits = adev->need_dma32 ? 32 : 40;
 	r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits));
@@ -878,6 +897,8 @@
 	if (r)
 		return r;
 
+	adev->gmc.stolen_size = gmc_v6_0_get_vbios_fb_size(adev);
+
 	r = amdgpu_bo_init(adev);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 80054f3..7147bfe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -958,12 +958,33 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	amdgpu_bo_late_init(adev);
+
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
 		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 	else
 		return 0;
 }
 
+static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+	u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+	unsigned size;
+
+	if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+		size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+	} else {
+		u32 viewport = RREG32(mmVIEWPORT_SIZE);
+		size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+			REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+			4);
+	}
+	/* return 0 if the pre-OS buffer uses up most of vram */
+	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+		return 0;
+	return size;
+}
+
 static int gmc_v7_0_sw_init(void *handle)
 {
 	int r;
@@ -998,8 +1019,6 @@
 	 */
 	adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
 
-	adev->gmc.stolen_size = 256 * 1024;
-
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
 	 * IGP - can handle 40-bits
@@ -1030,6 +1049,8 @@
 	if (r)
 		return r;
 
+	adev->gmc.stolen_size = gmc_v7_0_get_vbios_fb_size(adev);
+
 	/* Memory manager */
 	r = amdgpu_bo_init(adev);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index d71d4cb..1edbe6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -138,6 +138,7 @@
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		amdgpu_device_program_register_sequence(adev,
 							golden_settings_polaris11_a11,
 							ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -231,6 +232,7 @@
 	case CHIP_FIJI:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
+	case CHIP_VEGAM:
 		return 0;
 	default: BUG();
 	}
@@ -567,9 +569,10 @@
 	/* set the gart size */
 	if (amdgpu_gart_size == -1) {
 		switch (adev->asic_type) {
-		case CHIP_POLARIS11: /* all engines support GPUVM */
 		case CHIP_POLARIS10: /* all engines support GPUVM */
+		case CHIP_POLARIS11: /* all engines support GPUVM */
 		case CHIP_POLARIS12: /* all engines support GPUVM */
+		case CHIP_VEGAM:     /* all engines support GPUVM */
 		default:
 			adev->gmc.gart_size = 256ULL << 20;
 			break;
@@ -1049,12 +1052,33 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	amdgpu_bo_late_init(adev);
+
 	if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
 		return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
 	else
 		return 0;
 }
 
+static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+	u32 d1vga_control = RREG32(mmD1VGA_CONTROL);
+	unsigned size;
+
+	if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+		size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+	} else {
+		u32 viewport = RREG32(mmVIEWPORT_SIZE);
+		size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+			REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+			4);
+	}
+	/* return 0 if the pre-OS buffer uses up most of vram */
+	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+		return 0;
+	return size;
+}
+
 #define mmMC_SEQ_MISC0_FIJI 0xA71
 
 static int gmc_v8_0_sw_init(void *handle)
@@ -1068,7 +1092,8 @@
 	} else {
 		u32 tmp;
 
-		if (adev->asic_type == CHIP_FIJI)
+		if ((adev->asic_type == CHIP_FIJI) ||
+		    (adev->asic_type == CHIP_VEGAM))
 			tmp = RREG32(mmMC_SEQ_MISC0_FIJI);
 		else
 			tmp = RREG32(mmMC_SEQ_MISC0);
@@ -1096,8 +1121,6 @@
 	 */
 	adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
 
-	adev->gmc.stolen_size = 256 * 1024;
-
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
 	 * IGP - can handle 40-bits
@@ -1128,6 +1151,8 @@
 	if (r)
 		return r;
 
+	adev->gmc.stolen_size = gmc_v8_0_get_vbios_fb_size(adev);
+
 	/* Memory manager */
 	r = amdgpu_bo_init(adev);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index e687363..3c0a85d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -43,19 +43,13 @@
 #include "gfxhub_v1_0.h"
 #include "mmhub_v1_0.h"
 
-#define mmDF_CS_AON0_DramBaseAddress0                                                                  0x0044
-#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX                                                         0
-//DF_CS_AON0_DramBaseAddress0
-#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT                                                        0x0
-#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT                                                    0x1
-#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT                                                      0x4
-#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT                                                      0x8
-#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT                                                      0xc
-#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK                                                          0x00000001L
-#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK                                                      0x00000002L
-#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK                                                        0x000000F0L
-#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK                                                        0x00000700L
-#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK                                                        0xFFFFF000L
+/* add these here since we already include dce12 headers and these are for DCN */
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d
+#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX                                                 2
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT                                        0x0
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT                                       0x10
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK                                          0x00003FFFL
+#define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK                                         0x3FFF0000L
 
 /* XXX Move this macro to VEGA10 header file, which is like vid.h for VI.*/
 #define AMDGPU_NUM_OF_VMIDS			8
@@ -385,11 +379,9 @@
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
 			      upper_32_bits(pd_addr));
 
-	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
-	/* wait for the invalidate to complete */
-	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
-				  1 << vmid, 1 << vmid);
+	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+					    hub->vm_inv_eng0_ack + eng,
+					    req, 1 << vmid);
 
 	return pd_addr;
 }
@@ -556,8 +548,7 @@
 	adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
 	adev->gmc.shared_aperture_end =
 		adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
-	adev->gmc.private_aperture_start =
-		adev->gmc.shared_aperture_end + 1;
+	adev->gmc.private_aperture_start = 0x1000000000000000ULL;
 	adev->gmc.private_aperture_end =
 		adev->gmc.private_aperture_start + (4ULL << 30) - 1;
 
@@ -659,6 +650,11 @@
 	unsigned i;
 	int r;
 
+	/*
+	 * TODO - Uncomment once GART corruption issue is fixed.
+	 */
+	/* amdgpu_bo_late_init(adev); */
+
 	for(i = 0; i < adev->num_rings; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
 		unsigned vmhub = ring->funcs->vmhub;
@@ -679,6 +675,7 @@
 			DRM_INFO("ECC is active.\n");
 		} else if (r == 0) {
 			DRM_INFO("ECC is not present.\n");
+			adev->df_funcs->enable_ecc_force_par_wr_rmw(adev, false);
 		} else {
 			DRM_ERROR("gmc_v9_0_ecc_available() failed. r: %d\n", r);
 			return r;
@@ -697,10 +694,7 @@
 	amdgpu_device_vram_location(adev, &adev->gmc, base);
 	amdgpu_device_gart_location(adev, mc);
 	/* base offset of vram pages */
-	if (adev->flags & AMD_IS_APU)
-		adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
-	else
-		adev->vm_manager.vram_base_offset = 0;
+	adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev);
 }
 
 /**
@@ -714,7 +708,6 @@
  */
 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
 {
-	u32 tmp;
 	int chansize, numchan;
 	int r;
 
@@ -727,39 +720,7 @@
 		else
 			chansize = 128;
 
-		tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
-		tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
-		tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
-		switch (tmp) {
-		case 0:
-		default:
-			numchan = 1;
-			break;
-		case 1:
-			numchan = 2;
-			break;
-		case 2:
-			numchan = 0;
-			break;
-		case 3:
-			numchan = 4;
-			break;
-		case 4:
-			numchan = 0;
-			break;
-		case 5:
-			numchan = 8;
-			break;
-		case 6:
-			numchan = 0;
-			break;
-		case 7:
-			numchan = 16;
-			break;
-		case 8:
-			numchan = 2;
-			break;
-		}
+		numchan = adev->df_funcs->get_hbm_channel_number(adev);
 		adev->gmc.vram_width = numchan * chansize;
 	}
 
@@ -792,6 +753,7 @@
 		switch (adev->asic_type) {
 		case CHIP_VEGA10:  /* all engines support GPUVM */
 		case CHIP_VEGA12:  /* all engines support GPUVM */
+		case CHIP_VEGA20:
 		default:
 			adev->gmc.gart_size = 512ULL << 20;
 			break;
@@ -826,6 +788,52 @@
 	return amdgpu_gart_table_vram_alloc(adev);
 }
 
+static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
+{
+#if 0
+	u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
+#endif
+	unsigned size;
+
+	/*
+	 * TODO Remove once GART corruption is resolved
+	 * Check related code in gmc_v9_0_sw_fini
+	 * */
+	size = 9 * 1024 * 1024;
+
+#if 0
+	if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
+		size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */
+	} else {
+		u32 viewport;
+
+		switch (adev->asic_type) {
+		case CHIP_RAVEN:
+			viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
+			size = (REG_GET_FIELD(viewport,
+					      HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
+				REG_GET_FIELD(viewport,
+					      HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
+				4);
+			break;
+		case CHIP_VEGA10:
+		case CHIP_VEGA12:
+		default:
+			viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
+			size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
+				REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
+				4);
+			break;
+		}
+	}
+	/* return 0 if the pre-OS buffer uses up most of vram */
+	if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024))
+		return 0;
+
+#endif
+	return size;
+}
+
 static int gmc_v9_0_sw_init(void *handle)
 {
 	int r;
@@ -851,6 +859,7 @@
 		break;
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 		/*
 		 * To fulfill 4-level page support,
 		 * vm size is 256TB (48bit), maximum size of Vega10,
@@ -877,12 +886,6 @@
 	 */
 	adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
 
-	/*
-	 * It needs to reserve 8M stolen memory for vega10
-	 * TODO: Figure out how to avoid that...
-	 */
-	adev->gmc.stolen_size = 8 * 1024 * 1024;
-
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 44-bits.
 	 * IGP - can handle 44-bits
@@ -907,6 +910,8 @@
 	if (r)
 		return r;
 
+	adev->gmc.stolen_size = gmc_v9_0_get_vbios_fb_size(adev);
+
 	/* Memory manager */
 	r = amdgpu_bo_init(adev);
 	if (r)
@@ -950,6 +955,18 @@
 	amdgpu_gem_force_release(adev);
 	amdgpu_vm_manager_fini(adev);
 	gmc_v9_0_gart_fini(adev);
+
+	/*
+	* TODO:
+	* Currently there is a bug where some memory client outside
+	* of the driver writes to first 8M of VRAM on S3 resume,
+	* this overrides GART which by default gets placed in first 8M and
+	* causes VM_FAULTS once GTT is accessed.
+	* Keep the stolen memory reservation until the while this is not solved.
+	* Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init
+	*/
+	amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
+
 	amdgpu_bo_fini(adev);
 
 	return 0;
@@ -960,6 +977,7 @@
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
+	case CHIP_VEGA20:
 		soc15_program_register_sequence(adev,
 						golden_settings_mmhub_1_0_0,
 						ARRAY_SIZE(golden_settings_mmhub_1_0_0));
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 26ba984..17f7f07 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -2817,7 +2817,7 @@
 		pi->caps_tcp_ramping = true;
 	}
 
-	if (amdgpu_pp_feature_mask & SCLK_DEEP_SLEEP_MASK)
+	if (adev->powerplay.pp_feature & PP_SCLK_DEEP_SLEEP_MASK)
 		pi->caps_sclk_ds = true;
 	else
 		pi->caps_sclk_ds = false;
@@ -2974,7 +2974,7 @@
 	/* powerdown unused blocks for now */
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (!amdgpu_dpm)
+	if (!adev->pm.dpm_enabled)
 		return 0;
 
 	kv_dpm_powergate_acp(adev, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 43f9257..3d53c44 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -734,6 +734,7 @@
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 		mmhub_v1_0_update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 4933486..078f70f 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -260,8 +260,10 @@
 	} while (timeout > 1);
 
 flr_done:
-	if (locked)
+	if (locked) {
+		adev->in_gpu_reset = 0;
 		mutex_unlock(&adev->lock_reset);
+	}
 
 	/* Trigger recovery for world switch failure if no TDR */
 	if (amdgpu_lockup_timeout == 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index df34dc7..365517c 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -34,10 +34,19 @@
 #define smnCPM_CONTROL                                                                                  0x11180460
 #define smnPCIE_CNTL2                                                                                   0x11180070
 
+/* vega20 */
+#define mmRCC_DEV0_EPF0_STRAP0_VG20                                                                         0x0011
+#define mmRCC_DEV0_EPF0_STRAP0_VG20_BASE_IDX                                                                2
+
 static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
 {
         u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
 
+	if (adev->asic_type == CHIP_VEGA20)
+		tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_VG20);
+	else
+		tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+
 	tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
 	tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
 
@@ -75,10 +84,14 @@
 			SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE);
 
 	u32 doorbell_range = RREG32(reg);
+	u32 range = 2;
+
+	if (adev->asic_type == CHIP_VEGA20)
+		range = 8;
 
 	if (use_doorbell) {
 		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index);
-		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2);
+		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range);
 	} else
 		doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0);
 
@@ -133,6 +146,9 @@
 {
 	uint32_t def, data;
 
+	if (adev->asic_type == CHIP_VEGA20)
+		return;
+
 	/* NBIF_MGCG_CTRL_LCLK */
 	def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 8da6da9..0cf48d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -40,11 +40,20 @@
     GFX_CTRL_CMD_ID_INIT_GPCOM_RING = 0x00020000,   /* initialize GPCOM ring */
     GFX_CTRL_CMD_ID_DESTROY_RINGS   = 0x00030000,   /* destroy rings */
     GFX_CTRL_CMD_ID_CAN_INIT_RINGS  = 0x00040000,   /* is it allowed to initialized the rings */
+    GFX_CTRL_CMD_ID_ENABLE_INT      = 0x00050000,   /* enable PSP-to-Gfx interrupt */
+    GFX_CTRL_CMD_ID_DISABLE_INT     = 0x00060000,   /* disable PSP-to-Gfx interrupt */
+    GFX_CTRL_CMD_ID_MODE1_RST       = 0x00070000,   /* trigger the Mode 1 reset */
 
     GFX_CTRL_CMD_ID_MAX             = 0x000F0000,   /* max command ID */
 };
 
 
+/*-----------------------------------------------------------------------------
+    NOTE:   All physical addresses used in this interface are actually
+            GPU Virtual Addresses.
+*/
+
+
 /* Control registers of the TEE Gfx interface. These are located in
 *  SRBM-to-PSP mailbox registers (total 8 registers).
 */
@@ -55,8 +64,8 @@
     volatile uint32_t   rbi_rptr;         /* +8   Read pointer (index) of RBI ring */
     volatile uint32_t   gpcom_wptr;       /* +12  Write pointer (index) of GPCOM ring */
     volatile uint32_t   gpcom_rptr;       /* +16  Read pointer (index) of GPCOM ring */
-    volatile uint32_t   ring_addr_lo;     /* +20  bits [31:0] of physical address of ring buffer */
-    volatile uint32_t   ring_addr_hi;     /* +24  bits [63:32] of physical address of ring buffer */
+    volatile uint32_t   ring_addr_lo;     /* +20  bits [31:0] of GPU Virtual of ring buffer (VMID=0)*/
+    volatile uint32_t   ring_addr_hi;     /* +24  bits [63:32] of GPU Virtual of ring buffer (VMID=0) */
     volatile uint32_t   ring_buf_size;    /* +28  Ring buffer size (in bytes) */
 
 };
@@ -78,6 +87,8 @@
     GFX_CMD_ID_LOAD_ASD     = 0x00000004,   /* load ASD Driver */
     GFX_CMD_ID_SETUP_TMR    = 0x00000005,   /* setup TMR region */
     GFX_CMD_ID_LOAD_IP_FW   = 0x00000006,   /* load HW IP FW */
+    GFX_CMD_ID_DESTROY_TMR  = 0x00000007,   /* destroy TMR region */
+    GFX_CMD_ID_SAVE_RESTORE = 0x00000008,   /* save/restore HW IP FW */
 
 };
 
@@ -85,11 +96,11 @@
 /* Command to load Trusted Application binary into PSP OS. */
 struct psp_gfx_cmd_load_ta
 {
-    uint32_t        app_phy_addr_lo;        /* bits [31:0] of the physical address of the TA binary (must be 4 KB aligned) */
-    uint32_t        app_phy_addr_hi;        /* bits [63:32] of the physical address of the TA binary */
+    uint32_t        app_phy_addr_lo;        /* bits [31:0] of the GPU Virtual address of the TA binary (must be 4 KB aligned) */
+    uint32_t        app_phy_addr_hi;        /* bits [63:32] of the GPU Virtual address of the TA binary */
     uint32_t        app_len;                /* length of the TA binary in bytes */
-    uint32_t        cmd_buf_phy_addr_lo;    /* bits [31:0] of the physical address of CMD buffer (must be 4 KB aligned) */
-    uint32_t        cmd_buf_phy_addr_hi;    /* bits [63:32] of the physical address of CMD buffer */
+    uint32_t        cmd_buf_phy_addr_lo;    /* bits [31:0] of the GPU Virtual address of CMD buffer (must be 4 KB aligned) */
+    uint32_t        cmd_buf_phy_addr_hi;    /* bits [63:32] of the GPU Virtual address of CMD buffer */
     uint32_t        cmd_buf_len;            /* length of the CMD buffer in bytes; must be multiple of 4 KB */
 
     /* Note: CmdBufLen can be set to 0. In this case no persistent CMD buffer is provided
@@ -111,8 +122,8 @@
 */
 struct psp_gfx_buf_desc
 {
-    uint32_t        buf_phy_addr_lo;       /* bits [31:0] of physical address of the buffer (must be 4 KB aligned) */
-    uint32_t        buf_phy_addr_hi;       /* bits [63:32] of physical address of the buffer */
+    uint32_t        buf_phy_addr_lo;       /* bits [31:0] of GPU Virtual address of the buffer (must be 4 KB aligned) */
+    uint32_t        buf_phy_addr_hi;       /* bits [63:32] of GPU Virtual address of the buffer */
     uint32_t        buf_size;              /* buffer size in bytes (must be multiple of 4 KB and no bigger than 64 MB) */
 
 };
@@ -145,8 +156,8 @@
 /* Command to setup TMR region. */
 struct psp_gfx_cmd_setup_tmr
 {
-    uint32_t        buf_phy_addr_lo;       /* bits [31:0] of physical address of TMR buffer (must be 4 KB aligned) */
-    uint32_t        buf_phy_addr_hi;       /* bits [63:32] of physical address of TMR buffer */
+    uint32_t        buf_phy_addr_lo;       /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */
+    uint32_t        buf_phy_addr_hi;       /* bits [63:32] of GPU Virtual address of TMR buffer */
     uint32_t        buf_size;              /* buffer size in bytes (must be multiple of 4 KB) */
 
 };
@@ -174,18 +185,32 @@
     GFX_FW_TYPE_ISP         = 16,
     GFX_FW_TYPE_ACP         = 17,
     GFX_FW_TYPE_SMU         = 18,
+    GFX_FW_TYPE_MMSCH       = 19,
+    GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM        = 20,
+    GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM        = 21,
+    GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL           = 22,
+    GFX_FW_TYPE_MAX         = 23
 };
 
 /* Command to load HW IP FW. */
 struct psp_gfx_cmd_load_ip_fw
 {
-    uint32_t                fw_phy_addr_lo;    /* bits [31:0] of physical address of FW location (must be 4 KB aligned) */
-    uint32_t                fw_phy_addr_hi;    /* bits [63:32] of physical address of FW location */
+    uint32_t                fw_phy_addr_lo;    /* bits [31:0] of GPU Virtual address of FW location (must be 4 KB aligned) */
+    uint32_t                fw_phy_addr_hi;    /* bits [63:32] of GPU Virtual address of FW location */
     uint32_t                fw_size;           /* FW buffer size in bytes */
     enum psp_gfx_fw_type    fw_type;           /* FW type */
 
 };
 
+/* Command to save/restore HW IP FW. */
+struct psp_gfx_cmd_save_restore_ip_fw
+{
+    uint32_t                save_fw;              /* if set, command is used for saving fw otherwise for resetoring*/
+    uint32_t                save_restore_addr_lo; /* bits [31:0] of FB address of GART memory used as save/restore buffer (must be 4 KB aligned) */
+    uint32_t                save_restore_addr_hi; /* bits [63:32] of FB address of GART memory used as save/restore buffer */
+    uint32_t                buf_size;             /* Size of the save/restore buffer in bytes */
+    enum psp_gfx_fw_type    fw_type;              /* FW type */
+};
 
 /* All GFX ring buffer commands. */
 union psp_gfx_commands
@@ -195,7 +220,7 @@
     struct psp_gfx_cmd_invoke_cmd       cmd_invoke_cmd;
     struct psp_gfx_cmd_setup_tmr        cmd_setup_tmr;
     struct psp_gfx_cmd_load_ip_fw       cmd_load_ip_fw;
-
+    struct psp_gfx_cmd_save_restore_ip_fw cmd_save_restore_ip_fw;
 };
 
 
@@ -226,8 +251,8 @@
 
     /* These fields are used for RBI only. They are all 0 in GPCOM commands
     */
-    uint32_t        resp_buf_addr_lo;   /* +12 bits [31:0] of physical address of response buffer (must be 4 KB aligned) */
-    uint32_t        resp_buf_addr_hi;   /* +16 bits [63:32] of physical address of response buffer */
+    uint32_t        resp_buf_addr_lo;   /* +12 bits [31:0] of GPU Virtual address of response buffer (must be 4 KB aligned) */
+    uint32_t        resp_buf_addr_hi;   /* +16 bits [63:32] of GPU Virtual address of response buffer */
     uint32_t        resp_offset;        /* +20 offset within response buffer */
     uint32_t        resp_buf_size;      /* +24 total size of the response buffer in bytes */
 
@@ -251,19 +276,19 @@
 /* Structure of the Ring Buffer Frame */
 struct psp_gfx_rb_frame
 {
-    uint32_t    cmd_buf_addr_lo;    /* +0  bits [31:0] of physical address of command buffer (must be 4 KB aligned) */
-    uint32_t    cmd_buf_addr_hi;    /* +4  bits [63:32] of physical address of command buffer */
+    uint32_t    cmd_buf_addr_lo;    /* +0  bits [31:0] of GPU Virtual address of command buffer (must be 4 KB aligned) */
+    uint32_t    cmd_buf_addr_hi;    /* +4  bits [63:32] of GPU Virtual address of command buffer */
     uint32_t    cmd_buf_size;       /* +8  command buffer size in bytes */
-    uint32_t    fence_addr_lo;      /* +12 bits [31:0] of physical address of Fence for this frame */
-    uint32_t    fence_addr_hi;      /* +16 bits [63:32] of physical address of Fence for this frame */
+    uint32_t    fence_addr_lo;      /* +12 bits [31:0] of GPU Virtual address of Fence for this frame */
+    uint32_t    fence_addr_hi;      /* +16 bits [63:32] of GPU Virtual address of Fence for this frame */
     uint32_t    fence_value;        /* +20 Fence value */
     uint32_t    sid_lo;             /* +24 bits [31:0] of SID value (used only for RBI frames) */
     uint32_t    sid_hi;             /* +28 bits [63:32] of SID value (used only for RBI frames) */
     uint8_t     vmid;               /* +32 VMID value used for mapping of all addresses for this frame */
     uint8_t     frame_type;         /* +33 1: destory context frame, 0: all other frames; used only for RBI frames */
     uint8_t     reserved1[2];       /* +34 reserved, must be 0 */
-    uint32_t    reserved2[7];       /* +40 reserved, must be 0 */
-    /* total 64 bytes */
+    uint32_t    reserved2[7];       /* +36 reserved, must be 0 */
+                /* total 64 bytes */
 };
 
 #endif /* _PSP_TEE_GFX_IF_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 8873d83..0ff136d 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -70,6 +70,15 @@
 	case AMDGPU_UCODE_ID_RLC_G:
 		*type = GFX_FW_TYPE_RLC_G;
 		break;
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL;
+		break;
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM;
+		break;
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
+		break;
 	case AMDGPU_UCODE_ID_SMC:
 		*type = GFX_FW_TYPE_SMU;
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 196e75d..0c768e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -41,6 +41,9 @@
 MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
 MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
 MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
+
 
 #define smnMP1_FIRMWARE_FLAGS 0x3010028
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index be20a38..aa9ab29 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -62,6 +62,8 @@
 MODULE_FIRMWARE("amdgpu/polaris11_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_sdma.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vegam_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vegam_sdma1.bin");
 
 
 static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
@@ -209,6 +211,7 @@
 		break;
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		amdgpu_device_program_register_sequence(adev,
 							golden_settings_polaris11_a11,
 							ARRAY_SIZE(golden_settings_polaris11_a11));
@@ -275,15 +278,18 @@
 	case CHIP_FIJI:
 		chip_name = "fiji";
 		break;
-	case CHIP_POLARIS11:
-		chip_name = "polaris11";
-		break;
 	case CHIP_POLARIS10:
 		chip_name = "polaris10";
 		break;
+	case CHIP_POLARIS11:
+		chip_name = "polaris11";
+		break;
 	case CHIP_POLARIS12:
 		chip_name = "polaris12";
 		break;
+	case CHIP_VEGAM:
+		chip_name = "vegam";
+		break;
 	case CHIP_CARRIZO:
 		chip_name = "carrizo";
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 399f876..ca53b3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -42,6 +42,8 @@
 MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
 MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin");
 MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
 
 #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK  0x000000F8L
@@ -107,6 +109,28 @@
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0)
 };
 
+static const struct soc15_reg_golden golden_settings_sdma_4_2[] =
+{
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0)
+};
+
 static const struct soc15_reg_golden golden_settings_sdma_rv1[] =
 {
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002),
@@ -139,6 +163,11 @@
 						golden_settings_sdma_vg12,
 						ARRAY_SIZE(golden_settings_sdma_vg12));
 		break;
+	case CHIP_VEGA20:
+		soc15_program_register_sequence(adev,
+						golden_settings_sdma_4_2,
+						ARRAY_SIZE(golden_settings_sdma_4_2));
+		break;
 	case CHIP_RAVEN:
 		soc15_program_register_sequence(adev,
 						 golden_settings_sdma_4_1,
@@ -182,6 +211,9 @@
 	case CHIP_VEGA12:
 		chip_name = "vega12";
 		break;
+	case CHIP_VEGA20:
+		chip_name = "vega20";
+		break;
 	case CHIP_RAVEN:
 		chip_name = "raven";
 		break;
@@ -360,6 +392,31 @@
 
 }
 
+static void sdma_v4_0_wait_reg_mem(struct amdgpu_ring *ring,
+				   int mem_space, int hdp,
+				   uint32_t addr0, uint32_t addr1,
+				   uint32_t ref, uint32_t mask,
+				   uint32_t inv)
+{
+	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
+			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
+	if (mem_space) {
+		/* memory */
+		amdgpu_ring_write(ring, addr0);
+		amdgpu_ring_write(ring, addr1);
+	} else {
+		/* registers */
+		amdgpu_ring_write(ring, addr0 << 2);
+		amdgpu_ring_write(ring, addr1 << 2);
+	}
+	amdgpu_ring_write(ring, ref); /* reference */
+	amdgpu_ring_write(ring, mask); /* mask */
+	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
+}
+
 /**
  * sdma_v4_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
  *
@@ -378,15 +435,10 @@
 	else
 		ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1;
 
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
-	amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_done_offset(adev)) << 2);
-	amdgpu_ring_write(ring, (adev->nbio_funcs->get_hdp_flush_req_offset(adev)) << 2);
-	amdgpu_ring_write(ring, ref_and_mask); /* reference */
-	amdgpu_ring_write(ring, ref_and_mask); /* mask */
-	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
-			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
+	sdma_v4_0_wait_reg_mem(ring, 0, 1,
+			       adev->nbio_funcs->get_hdp_flush_done_offset(adev),
+			       adev->nbio_funcs->get_hdp_flush_req_offset(adev),
+			       ref_and_mask, ref_and_mask, 10);
 }
 
 /**
@@ -1114,16 +1166,10 @@
 	uint64_t addr = ring->fence_drv.gpu_addr;
 
 	/* wait for idle */
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
-			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
-	amdgpu_ring_write(ring, addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
-	amdgpu_ring_write(ring, seq); /* reference */
-	amdgpu_ring_write(ring, 0xffffffff); /* mask */
-	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
-			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+	sdma_v4_0_wait_reg_mem(ring, 1, 0,
+			       addr & 0xfffffffc,
+			       upper_32_bits(addr) & 0xffffffff,
+			       seq, 0xffffffff, 4);
 }
 
 
@@ -1154,15 +1200,7 @@
 static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 					 uint32_t val, uint32_t mask)
 {
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
-			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
-	amdgpu_ring_write(ring, reg << 2);
-	amdgpu_ring_write(ring, 0);
-	amdgpu_ring_write(ring, val); /* reference */
-	amdgpu_ring_write(ring, mask); /* mask */
-	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
-			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+	sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
 }
 
 static int sdma_v4_0_early_init(void *handle)
@@ -1510,6 +1548,7 @@
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 	case CHIP_RAVEN:
 		sdma_v4_0_update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
@@ -1605,6 +1644,7 @@
 	.pad_ib = sdma_v4_0_ring_pad_ib,
 	.emit_wreg = sdma_v4_0_ring_emit_wreg,
 	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
 };
 
 static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index a675ec6..c364ef9 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1252,6 +1252,12 @@
 	}
 }
 
+static bool si_need_full_reset(struct amdgpu_device *adev)
+{
+	/* change this when we support soft reset */
+	return true;
+}
+
 static int si_get_pcie_lanes(struct amdgpu_device *adev)
 {
 	u32 link_width_cntl;
@@ -1332,6 +1338,7 @@
 	.get_config_memsize = &si_get_config_memsize,
 	.flush_hdp = &si_flush_hdp,
 	.invalidate_hdp = &si_invalidate_hdp,
+	.need_full_reset = &si_need_full_reset,
 };
 
 static uint32_t si_get_rev_id(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 797d505..b12d7c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -7580,7 +7580,7 @@
 	int ret;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (!amdgpu_dpm)
+	if (!adev->pm.dpm_enabled)
 		return 0;
 
 	ret = si_set_temperature_range(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 51cf8a3..68b4a22 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -41,8 +41,6 @@
 #include "sdma1/sdma1_4_0_offset.h"
 #include "hdp/hdp_4_0_offset.h"
 #include "hdp/hdp_4_0_sh_mask.h"
-#include "mp/mp_9_0_offset.h"
-#include "mp/mp_9_0_sh_mask.h"
 #include "smuio/smuio_9_0_offset.h"
 #include "smuio/smuio_9_0_sh_mask.h"
 
@@ -52,6 +50,8 @@
 #include "gmc_v9_0.h"
 #include "gfxhub_v1_0.h"
 #include "mmhub_v1_0.h"
+#include "df_v1_7.h"
+#include "df_v3_6.h"
 #include "vega10_ih.h"
 #include "sdma_v4_0.h"
 #include "uvd_v7_0.h"
@@ -60,33 +60,6 @@
 #include "dce_virtual.h"
 #include "mxgpu_ai.h"
 
-#define mmFabricConfigAccessControl                                                                    0x0410
-#define mmFabricConfigAccessControl_BASE_IDX                                                           0
-#define mmFabricConfigAccessControl_DEFAULT                                      0x00000000
-//FabricConfigAccessControl
-#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT                                                     0x0
-#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT                                                0x1
-#define FabricConfigAccessControl__CfgRegInstID__SHIFT                                                        0x10
-#define FabricConfigAccessControl__CfgRegInstAccEn_MASK                                                       0x00000001L
-#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK                                                  0x00000002L
-#define FabricConfigAccessControl__CfgRegInstID_MASK                                                          0x00FF0000L
-
-
-#define mmDF_PIE_AON0_DfGlobalClkGater                                                                 0x00fc
-#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX                                                        0
-//DF_PIE_AON0_DfGlobalClkGater
-#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT                                                         0x0
-#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK                                                           0x0000000FL
-
-enum {
-	DF_MGCG_DISABLE = 0,
-	DF_MGCG_ENABLE_00_CYCLE_DELAY =1,
-	DF_MGCG_ENABLE_01_CYCLE_DELAY =2,
-	DF_MGCG_ENABLE_15_CYCLE_DELAY =13,
-	DF_MGCG_ENABLE_31_CYCLE_DELAY =14,
-	DF_MGCG_ENABLE_63_CYCLE_DELAY =15
-};
-
 #define mmMP0_MISC_CGTT_CTRL0                                                                   0x01b9
 #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX                                                          0
 #define mmMP0_MISC_LIGHT_SLEEP_CTRL                                                             0x01ba
@@ -313,6 +286,7 @@
 	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STALLED_STAT1)},
 	{ SOC15_REG_ENTRY(GC, 0, mmCP_CPC_STATUS)},
 	{ SOC15_REG_ENTRY(GC, 0, mmGB_ADDR_CONFIG)},
+	{ SOC15_REG_ENTRY(GC, 0, mmDB_DEBUG2)},
 };
 
 static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
@@ -341,6 +315,8 @@
 	} else {
 		if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG))
 			return adev->gfx.config.gb_addr_config;
+		else if (reg_offset == SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2))
+			return adev->gfx.config.db_debug2;
 		return RREG32(reg_offset);
 	}
 }
@@ -512,15 +488,24 @@
 	case CHIP_RAVEN:
 		vega10_reg_base_init(adev);
 		break;
+	case CHIP_VEGA20:
+		vega20_reg_base_init(adev);
+		break;
 	default:
 		return -EINVAL;
 	}
 
 	if (adev->flags & AMD_IS_APU)
 		adev->nbio_funcs = &nbio_v7_0_funcs;
+	else if (adev->asic_type == CHIP_VEGA20)
+		adev->nbio_funcs = &nbio_v7_0_funcs;
 	else
 		adev->nbio_funcs = &nbio_v6_1_funcs;
 
+	if (adev->asic_type == CHIP_VEGA20)
+		adev->df_funcs = &df_v3_6_funcs;
+	else
+		adev->df_funcs = &df_v1_7_funcs;
 	adev->nbio_funcs->detect_hw_virt(adev);
 
 	if (amdgpu_sriov_vf(adev))
@@ -529,12 +514,15 @@
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 		amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
-		amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
-		if (!amdgpu_sriov_vf(adev))
-			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+		if (adev->asic_type != CHIP_VEGA20) {
+			amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+			if (!amdgpu_sriov_vf(adev))
+				amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
+		}
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
@@ -593,6 +581,12 @@
 			HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
 }
 
+static bool soc15_need_full_reset(struct amdgpu_device *adev)
+{
+	/* change this when we implement soft reset */
+	return true;
+}
+
 static const struct amdgpu_asic_funcs soc15_asic_funcs =
 {
 	.read_disabled_bios = &soc15_read_disabled_bios,
@@ -606,6 +600,7 @@
 	.get_config_memsize = &soc15_get_config_memsize,
 	.flush_hdp = &soc15_flush_hdp,
 	.invalidate_hdp = &soc15_invalidate_hdp,
+	.need_full_reset = &soc15_need_full_reset,
 };
 
 static int soc15_common_early_init(void *handle)
@@ -675,6 +670,27 @@
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x14;
 		break;
+	case CHIP_VEGA20:
+		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+			AMD_CG_SUPPORT_GFX_MGLS |
+			AMD_CG_SUPPORT_GFX_CGCG |
+			AMD_CG_SUPPORT_GFX_CGLS |
+			AMD_CG_SUPPORT_GFX_3D_CGCG |
+			AMD_CG_SUPPORT_GFX_3D_CGLS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_MC_LS |
+			AMD_CG_SUPPORT_MC_MGCG |
+			AMD_CG_SUPPORT_SDMA_MGCG |
+			AMD_CG_SUPPORT_SDMA_LS |
+			AMD_CG_SUPPORT_BIF_MGCG |
+			AMD_CG_SUPPORT_BIF_LS |
+			AMD_CG_SUPPORT_HDP_MGCG |
+			AMD_CG_SUPPORT_ROM_MGCG |
+			AMD_CG_SUPPORT_VCE_MGCG |
+			AMD_CG_SUPPORT_UVD_MGCG;
+		adev->pg_flags = 0;
+		adev->external_rev_id = adev->rev_id + 0x28;
+		break;
 	case CHIP_RAVEN:
 		adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
 			AMD_CG_SUPPORT_GFX_MGLS |
@@ -694,8 +710,15 @@
 			AMD_CG_SUPPORT_MC_MGCG |
 			AMD_CG_SUPPORT_MC_LS |
 			AMD_CG_SUPPORT_SDMA_MGCG |
-			AMD_CG_SUPPORT_SDMA_LS;
-		adev->pg_flags = AMD_PG_SUPPORT_SDMA;
+			AMD_CG_SUPPORT_SDMA_LS |
+			AMD_CG_SUPPORT_VCN_MGCG;
+
+		adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN;
+
+		if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
+			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+				AMD_PG_SUPPORT_CP |
+				AMD_PG_SUPPORT_RLC_SMU_HS;
 
 		adev->external_rev_id = 0x1;
 		break;
@@ -871,32 +894,6 @@
 		WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data);
 }
 
-static void soc15_update_df_medium_grain_clock_gating(struct amdgpu_device *adev,
-						       bool enable)
-{
-	uint32_t data;
-
-	/* Put DF on broadcast mode */
-	data = RREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl));
-	data &= ~FabricConfigAccessControl__CfgRegInstAccEn_MASK;
-	WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl), data);
-
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
-		data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
-		data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
-		data |= DF_MGCG_ENABLE_15_CYCLE_DELAY;
-		WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data);
-	} else {
-		data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
-		data &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
-		data |= DF_MGCG_DISABLE;
-		WREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater), data);
-	}
-
-	WREG32(SOC15_REG_OFFSET(DF, 0, mmFabricConfigAccessControl),
-	       mmFabricConfigAccessControl_DEFAULT);
-}
-
 static int soc15_common_set_clockgating_state(void *handle,
 					    enum amd_clockgating_state state)
 {
@@ -908,6 +905,7 @@
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 		adev->nbio_funcs->update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
 		adev->nbio_funcs->update_medium_grain_light_sleep(adev,
@@ -920,7 +918,7 @@
 				state == AMD_CG_STATE_GATE ? true : false);
 		soc15_update_rom_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
-		soc15_update_df_medium_grain_clock_gating(adev,
+		adev->df_funcs->update_medium_grain_clock_gating(adev,
 				state == AMD_CG_STATE_GATE ? true : false);
 		break;
 	case CHIP_RAVEN:
@@ -973,10 +971,7 @@
 	if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK))
 		*flags |= AMD_CG_SUPPORT_ROM_MGCG;
 
-	/* AMD_CG_SUPPORT_DF_MGCG */
-	data = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater));
-	if (data & DF_MGCG_ENABLE_15_CYCLE_DELAY)
-		*flags |= AMD_CG_SUPPORT_DF_MGCG;
+	adev->df_funcs->get_clockgating_state(adev, flags);
 }
 
 static int soc15_common_set_powergating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index f70da8a..1f714b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -55,5 +55,6 @@
 					     const u32 array_size);
 
 int vega10_reg_base_init(struct amdgpu_device *adev);
+int vega20_reg_base_init(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index def8650..0942f49 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -47,6 +47,21 @@
 #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \
 	WREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value)
 
+#define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask, ret) \
+	do {							\
+		uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
+		uint32_t loop = adev->usec_timeout;		\
+		while ((tmp_ & (mask)) != (expected_value)) {	\
+			udelay(2);				\
+			tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
+			loop--;					\
+			if (!loop) {				\
+				ret = -ETIMEDOUT;		\
+				break;				\
+			}					\
+		}						\
+	} while (0)
+
 #endif
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 7f408f8..8dc2910 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -159,6 +159,7 @@
 #define		EOP_TC_WB_ACTION_EN                     (1 << 15) /* L2 */
 #define		EOP_TCL1_ACTION_EN                      (1 << 16)
 #define		EOP_TC_ACTION_EN                        (1 << 17) /* L2 */
+#define		EOP_TC_NC_ACTION_EN			(1 << 19)
 #define		EOP_TC_MD_ACTION_EN			(1 << 21) /* L2 metadata */
 
 #define		DATA_SEL(x)                             ((x) << 29)
@@ -268,6 +269,11 @@
 			 * x=1: tmz_end
 			 */
 
+#define	PACKET3_INVALIDATE_TLBS				0x98
+#              define PACKET3_INVALIDATE_TLBS_DST_SEL(x)     ((x) << 0)
+#              define PACKET3_INVALIDATE_TLBS_ALL_HUB(x)     ((x) << 4)
+#              define PACKET3_INVALIDATE_TLBS_PASID(x)       ((x) << 5)
+#              define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x)  ((x) << 29)
 #define PACKET3_SET_RESOURCES				0xA0
 /* 1. header
  * 2. CONTROL
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 948bb943..6fed3d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -93,6 +93,7 @@
 static int uvd_v4_2_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	adev->uvd.num_uvd_inst = 1;
 
 	uvd_v4_2_set_ring_funcs(adev);
 	uvd_v4_2_set_irq_funcs(adev);
@@ -107,7 +108,7 @@
 	int r;
 
 	/* UVD TRAP */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
 	if (r)
 		return r;
 
@@ -119,9 +120,9 @@
 	if (r)
 		return r;
 
-	ring = &adev->uvd.ring;
+	ring = &adev->uvd.inst->ring;
 	sprintf(ring->name, "uvd");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
 
 	return r;
 }
@@ -150,7 +151,7 @@
 static int uvd_v4_2_hw_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->uvd.ring;
+	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
 	uint32_t tmp;
 	int r;
 
@@ -208,7 +209,7 @@
 static int uvd_v4_2_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->uvd.ring;
+	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
 
 	if (RREG32(mmUVD_STATUS) != 0)
 		uvd_v4_2_stop(adev);
@@ -251,7 +252,7 @@
  */
 static int uvd_v4_2_start(struct amdgpu_device *adev)
 {
-	struct amdgpu_ring *ring = &adev->uvd.ring;
+	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
 	uint32_t rb_bufsz;
 	int i, j, r;
 	u32 tmp;
@@ -523,6 +524,18 @@
 	amdgpu_ring_write(ring, ib->length_dw);
 }
 
+static void uvd_v4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	int i;
+
+	WARN_ON(ring->wptr % 2 || count % 2);
+
+	for (i = 0; i < count / 2; i++) {
+		amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+		amdgpu_ring_write(ring, 0);
+	}
+}
+
 /**
  * uvd_v4_2_mc_resume - memory controller programming
  *
@@ -536,7 +549,7 @@
 	uint32_t size;
 
 	/* programm the VCPU memory controller bits 0-27 */
-	addr = (adev->uvd.gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
+	addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3;
 	size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3;
 	WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr);
 	WREG32(mmUVD_VCPU_CACHE_SIZE0, size);
@@ -553,11 +566,11 @@
 	WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
 
 	/* bits 28-31 */
-	addr = (adev->uvd.gpu_addr >> 28) & 0xF;
+	addr = (adev->uvd.inst->gpu_addr >> 28) & 0xF;
 	WREG32(mmUVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
 
 	/* bits 32-39 */
-	addr = (adev->uvd.gpu_addr >> 32) & 0xFF;
+	addr = (adev->uvd.inst->gpu_addr >> 32) & 0xFF;
 	WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
 
 	WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
@@ -664,7 +677,7 @@
 				      struct amdgpu_iv_entry *entry)
 {
 	DRM_DEBUG("IH: UVD TRAP\n");
-	amdgpu_fence_process(&adev->uvd.ring);
+	amdgpu_fence_process(&adev->uvd.inst->ring);
 	return 0;
 }
 
@@ -688,7 +701,7 @@
 
 	if (state == AMD_PG_STATE_GATE) {
 		uvd_v4_2_stop(adev);
-		if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) {
+		if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) {
 			if (!(RREG32_SMC(ixCURRENT_PG_STATUS) &
 				CURRENT_PG_STATUS__UVD_PG_STATUS_MASK)) {
 				WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK   |
@@ -699,7 +712,7 @@
 		}
 		return 0;
 	} else {
-		if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) {
+		if (adev->pg_flags & AMD_PG_SUPPORT_UVD && !adev->pm.dpm_enabled) {
 			if (RREG32_SMC(ixCURRENT_PG_STATUS) &
 				CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) {
 				WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK   |
@@ -732,7 +745,6 @@
 static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
 	.type = AMDGPU_RING_TYPE_UVD,
 	.align_mask = 0xf,
-	.nop = PACKET0(mmUVD_NO_OP, 0),
 	.support_64bit_ptrs = false,
 	.get_rptr = uvd_v4_2_ring_get_rptr,
 	.get_wptr = uvd_v4_2_ring_get_wptr,
@@ -745,7 +757,7 @@
 	.emit_fence = uvd_v4_2_ring_emit_fence,
 	.test_ring = uvd_v4_2_ring_test_ring,
 	.test_ib = amdgpu_uvd_ring_test_ib,
-	.insert_nop = amdgpu_ring_insert_nop,
+	.insert_nop = uvd_v4_2_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.begin_use = amdgpu_uvd_ring_begin_use,
 	.end_use = amdgpu_uvd_ring_end_use,
@@ -753,7 +765,7 @@
 
 static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
 {
-	adev->uvd.ring.funcs = &uvd_v4_2_ring_funcs;
+	adev->uvd.inst->ring.funcs = &uvd_v4_2_ring_funcs;
 }
 
 static const struct amdgpu_irq_src_funcs uvd_v4_2_irq_funcs = {
@@ -763,8 +775,8 @@
 
 static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev)
 {
-	adev->uvd.irq.num_types = 1;
-	adev->uvd.irq.funcs = &uvd_v4_2_irq_funcs;
+	adev->uvd.inst->irq.num_types = 1;
+	adev->uvd.inst->irq.funcs = &uvd_v4_2_irq_funcs;
 }
 
 const struct amdgpu_ip_block_version uvd_v4_2_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 6445d55..341ee6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -89,6 +89,7 @@
 static int uvd_v5_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	adev->uvd.num_uvd_inst = 1;
 
 	uvd_v5_0_set_ring_funcs(adev);
 	uvd_v5_0_set_irq_funcs(adev);
@@ -103,7 +104,7 @@
 	int r;
 
 	/* UVD TRAP */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
 	if (r)
 		return r;
 
@@ -115,9 +116,9 @@
 	if (r)
 		return r;
 
-	ring = &adev->uvd.ring;
+	ring = &adev->uvd.inst->ring;
 	sprintf(ring->name, "uvd");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
 
 	return r;
 }
@@ -144,7 +145,7 @@
 static int uvd_v5_0_hw_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->uvd.ring;
+	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
 	uint32_t tmp;
 	int r;
 
@@ -204,7 +205,7 @@
 static int uvd_v5_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->uvd.ring;
+	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
 
 	if (RREG32(mmUVD_STATUS) != 0)
 		uvd_v5_0_stop(adev);
@@ -253,9 +254,9 @@
 
 	/* programm memory controller bits 0-27 */
 	WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
-			lower_32_bits(adev->uvd.gpu_addr));
+			lower_32_bits(adev->uvd.inst->gpu_addr));
 	WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
-			upper_32_bits(adev->uvd.gpu_addr));
+			upper_32_bits(adev->uvd.inst->gpu_addr));
 
 	offset = AMDGPU_UVD_FIRMWARE_OFFSET;
 	size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
@@ -287,7 +288,7 @@
  */
 static int uvd_v5_0_start(struct amdgpu_device *adev)
 {
-	struct amdgpu_ring *ring = &adev->uvd.ring;
+	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
 	uint32_t rb_bufsz, tmp;
 	uint32_t lmi_swap_cntl;
 	uint32_t mp_swap_cntl;
@@ -540,6 +541,18 @@
 	amdgpu_ring_write(ring, ib->length_dw);
 }
 
+static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	int i;
+
+	WARN_ON(ring->wptr % 2 || count % 2);
+
+	for (i = 0; i < count / 2; i++) {
+		amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+		amdgpu_ring_write(ring, 0);
+	}
+}
+
 static bool uvd_v5_0_is_idle(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -586,7 +599,7 @@
 				      struct amdgpu_iv_entry *entry)
 {
 	DRM_DEBUG("IH: UVD TRAP\n");
-	amdgpu_fence_process(&adev->uvd.ring);
+	amdgpu_fence_process(&adev->uvd.inst->ring);
 	return 0;
 }
 
@@ -840,7 +853,6 @@
 static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
 	.type = AMDGPU_RING_TYPE_UVD,
 	.align_mask = 0xf,
-	.nop = PACKET0(mmUVD_NO_OP, 0),
 	.support_64bit_ptrs = false,
 	.get_rptr = uvd_v5_0_ring_get_rptr,
 	.get_wptr = uvd_v5_0_ring_get_wptr,
@@ -853,7 +865,7 @@
 	.emit_fence = uvd_v5_0_ring_emit_fence,
 	.test_ring = uvd_v5_0_ring_test_ring,
 	.test_ib = amdgpu_uvd_ring_test_ib,
-	.insert_nop = amdgpu_ring_insert_nop,
+	.insert_nop = uvd_v5_0_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.begin_use = amdgpu_uvd_ring_begin_use,
 	.end_use = amdgpu_uvd_ring_end_use,
@@ -861,7 +873,7 @@
 
 static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
 {
-	adev->uvd.ring.funcs = &uvd_v5_0_ring_funcs;
+	adev->uvd.inst->ring.funcs = &uvd_v5_0_ring_funcs;
 }
 
 static const struct amdgpu_irq_src_funcs uvd_v5_0_irq_funcs = {
@@ -871,8 +883,8 @@
 
 static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev)
 {
-	adev->uvd.irq.num_types = 1;
-	adev->uvd.irq.funcs = &uvd_v5_0_irq_funcs;
+	adev->uvd.inst->irq.num_types = 1;
+	adev->uvd.inst->irq.funcs = &uvd_v5_0_irq_funcs;
 }
 
 const struct amdgpu_ip_block_version uvd_v5_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index f26f515..bfddf97 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -62,7 +62,7 @@
 static inline bool uvd_v6_0_enc_support(struct amdgpu_device *adev)
 {
 	return ((adev->asic_type >= CHIP_POLARIS10) &&
-			(adev->asic_type <= CHIP_POLARIS12) &&
+			(adev->asic_type <= CHIP_VEGAM) &&
 			(!adev->uvd.fw_version || adev->uvd.fw_version >= FW_1_130_16));
 }
 
@@ -91,7 +91,7 @@
 {
 	struct amdgpu_device *adev = ring->adev;
 
-	if (ring == &adev->uvd.ring_enc[0])
+	if (ring == &adev->uvd.inst->ring_enc[0])
 		return RREG32(mmUVD_RB_RPTR);
 	else
 		return RREG32(mmUVD_RB_RPTR2);
@@ -121,7 +121,7 @@
 {
 	struct amdgpu_device *adev = ring->adev;
 
-	if (ring == &adev->uvd.ring_enc[0])
+	if (ring == &adev->uvd.inst->ring_enc[0])
 		return RREG32(mmUVD_RB_WPTR);
 	else
 		return RREG32(mmUVD_RB_WPTR2);
@@ -152,7 +152,7 @@
 {
 	struct amdgpu_device *adev = ring->adev;
 
-	if (ring == &adev->uvd.ring_enc[0])
+	if (ring == &adev->uvd.inst->ring_enc[0])
 		WREG32(mmUVD_RB_WPTR,
 			lower_32_bits(ring->wptr));
 	else
@@ -375,6 +375,7 @@
 static int uvd_v6_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	adev->uvd.num_uvd_inst = 1;
 
 	if (!(adev->flags & AMD_IS_APU) &&
 	    (RREG32_SMC(ixCC_HARVEST_FUSES) & CC_HARVEST_FUSES__UVD_DISABLE_MASK))
@@ -399,14 +400,14 @@
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	/* UVD TRAP */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.irq);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq);
 	if (r)
 		return r;
 
 	/* UVD ENC TRAP */
 	if (uvd_v6_0_enc_support(adev)) {
 		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
-			r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.irq);
+			r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 119, &adev->uvd.inst->irq);
 			if (r)
 				return r;
 		}
@@ -418,18 +419,18 @@
 
 	if (!uvd_v6_0_enc_support(adev)) {
 		for (i = 0; i < adev->uvd.num_enc_rings; ++i)
-			adev->uvd.ring_enc[i].funcs = NULL;
+			adev->uvd.inst->ring_enc[i].funcs = NULL;
 
-		adev->uvd.irq.num_types = 1;
+		adev->uvd.inst->irq.num_types = 1;
 		adev->uvd.num_enc_rings = 0;
 
 		DRM_INFO("UVD ENC is disabled\n");
 	} else {
 		struct drm_sched_rq *rq;
-		ring = &adev->uvd.ring_enc[0];
+		ring = &adev->uvd.inst->ring_enc[0];
 		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-		r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc,
-					  rq, amdgpu_sched_jobs, NULL);
+		r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst->entity_enc,
+					  rq, NULL);
 		if (r) {
 			DRM_ERROR("Failed setting up UVD ENC run queue.\n");
 			return r;
@@ -440,17 +441,17 @@
 	if (r)
 		return r;
 
-	ring = &adev->uvd.ring;
+	ring = &adev->uvd.inst->ring;
 	sprintf(ring->name, "uvd");
-	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
 	if (r)
 		return r;
 
 	if (uvd_v6_0_enc_support(adev)) {
 		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
-			ring = &adev->uvd.ring_enc[i];
+			ring = &adev->uvd.inst->ring_enc[i];
 			sprintf(ring->name, "uvd_enc%d", i);
-			r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
+			r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
 			if (r)
 				return r;
 		}
@@ -469,10 +470,10 @@
 		return r;
 
 	if (uvd_v6_0_enc_support(adev)) {
-		drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
+		drm_sched_entity_fini(&adev->uvd.inst->ring_enc[0].sched, &adev->uvd.inst->entity_enc);
 
 		for (i = 0; i < adev->uvd.num_enc_rings; ++i)
-			amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
+			amdgpu_ring_fini(&adev->uvd.inst->ring_enc[i]);
 	}
 
 	return amdgpu_uvd_sw_fini(adev);
@@ -488,7 +489,7 @@
 static int uvd_v6_0_hw_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->uvd.ring;
+	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
 	uint32_t tmp;
 	int i, r;
 
@@ -532,7 +533,7 @@
 
 	if (uvd_v6_0_enc_support(adev)) {
 		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
-			ring = &adev->uvd.ring_enc[i];
+			ring = &adev->uvd.inst->ring_enc[i];
 			ring->ready = true;
 			r = amdgpu_ring_test_ring(ring);
 			if (r) {
@@ -563,7 +564,7 @@
 static int uvd_v6_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->uvd.ring;
+	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
 
 	if (RREG32(mmUVD_STATUS) != 0)
 		uvd_v6_0_stop(adev);
@@ -611,9 +612,9 @@
 
 	/* programm memory controller bits 0-27 */
 	WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
-			lower_32_bits(adev->uvd.gpu_addr));
+			lower_32_bits(adev->uvd.inst->gpu_addr));
 	WREG32(mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
-			upper_32_bits(adev->uvd.gpu_addr));
+			upper_32_bits(adev->uvd.inst->gpu_addr));
 
 	offset = AMDGPU_UVD_FIRMWARE_OFFSET;
 	size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
@@ -726,7 +727,7 @@
  */
 static int uvd_v6_0_start(struct amdgpu_device *adev)
 {
-	struct amdgpu_ring *ring = &adev->uvd.ring;
+	struct amdgpu_ring *ring = &adev->uvd.inst->ring;
 	uint32_t rb_bufsz, tmp;
 	uint32_t lmi_swap_cntl;
 	uint32_t mp_swap_cntl;
@@ -866,14 +867,14 @@
 	WREG32_FIELD(UVD_RBC_RB_CNTL, RB_NO_FETCH, 0);
 
 	if (uvd_v6_0_enc_support(adev)) {
-		ring = &adev->uvd.ring_enc[0];
+		ring = &adev->uvd.inst->ring_enc[0];
 		WREG32(mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
 		WREG32(mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
 		WREG32(mmUVD_RB_BASE_LO, ring->gpu_addr);
 		WREG32(mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
 		WREG32(mmUVD_RB_SIZE, ring->ring_size / 4);
 
-		ring = &adev->uvd.ring_enc[1];
+		ring = &adev->uvd.inst->ring_enc[1];
 		WREG32(mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
 		WREG32(mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
 		WREG32(mmUVD_RB_BASE_LO2, ring->gpu_addr);
@@ -964,6 +965,16 @@
 }
 
 /**
+ * uvd_v6_0_ring_emit_hdp_flush - skip HDP flushing
+ *
+ * @ring: amdgpu_ring pointer
+ */
+static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	/* The firmware doesn't seem to like touching registers at this point. */
+}
+
+/**
  * uvd_v6_0_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -1089,6 +1100,18 @@
 	amdgpu_ring_write(ring, 0xE);
 }
 
+static void uvd_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	int i;
+
+	WARN_ON(ring->wptr % 2 || count % 2);
+
+	for (i = 0; i < count / 2; i++) {
+		amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP, 0));
+		amdgpu_ring_write(ring, 0);
+	}
+}
+
 static void uvd_v6_0_enc_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 {
 	uint32_t seq = ring->fence_drv.sync_seq;
@@ -1148,10 +1171,10 @@
 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
 
 	if (srbm_soft_reset) {
-		adev->uvd.srbm_soft_reset = srbm_soft_reset;
+		adev->uvd.inst->srbm_soft_reset = srbm_soft_reset;
 		return true;
 	} else {
-		adev->uvd.srbm_soft_reset = 0;
+		adev->uvd.inst->srbm_soft_reset = 0;
 		return false;
 	}
 }
@@ -1160,7 +1183,7 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (!adev->uvd.srbm_soft_reset)
+	if (!adev->uvd.inst->srbm_soft_reset)
 		return 0;
 
 	uvd_v6_0_stop(adev);
@@ -1172,9 +1195,9 @@
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	u32 srbm_soft_reset;
 
-	if (!adev->uvd.srbm_soft_reset)
+	if (!adev->uvd.inst->srbm_soft_reset)
 		return 0;
-	srbm_soft_reset = adev->uvd.srbm_soft_reset;
+	srbm_soft_reset = adev->uvd.inst->srbm_soft_reset;
 
 	if (srbm_soft_reset) {
 		u32 tmp;
@@ -1202,7 +1225,7 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (!adev->uvd.srbm_soft_reset)
+	if (!adev->uvd.inst->srbm_soft_reset)
 		return 0;
 
 	mdelay(5);
@@ -1228,17 +1251,17 @@
 
 	switch (entry->src_id) {
 	case 124:
-		amdgpu_fence_process(&adev->uvd.ring);
+		amdgpu_fence_process(&adev->uvd.inst->ring);
 		break;
 	case 119:
 		if (likely(uvd_v6_0_enc_support(adev)))
-			amdgpu_fence_process(&adev->uvd.ring_enc[0]);
+			amdgpu_fence_process(&adev->uvd.inst->ring_enc[0]);
 		else
 			int_handled = false;
 		break;
 	case 120:
 		if (likely(uvd_v6_0_enc_support(adev)))
-			amdgpu_fence_process(&adev->uvd.ring_enc[1]);
+			amdgpu_fence_process(&adev->uvd.inst->ring_enc[1]);
 		else
 			int_handled = false;
 		break;
@@ -1521,22 +1544,22 @@
 static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
 	.type = AMDGPU_RING_TYPE_UVD,
 	.align_mask = 0xf,
-	.nop = PACKET0(mmUVD_NO_OP, 0),
 	.support_64bit_ptrs = false,
 	.get_rptr = uvd_v6_0_ring_get_rptr,
 	.get_wptr = uvd_v6_0_ring_get_wptr,
 	.set_wptr = uvd_v6_0_ring_set_wptr,
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_frame_size =
-		6 + 6 + /* hdp flush / invalidate */
+		6 + /* hdp invalidate */
 		10 + /* uvd_v6_0_ring_emit_pipeline_sync */
 		14, /* uvd_v6_0_ring_emit_fence x1 no user fence */
 	.emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */
 	.emit_ib = uvd_v6_0_ring_emit_ib,
 	.emit_fence = uvd_v6_0_ring_emit_fence,
+	.emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
 	.test_ring = uvd_v6_0_ring_test_ring,
 	.test_ib = amdgpu_uvd_ring_test_ib,
-	.insert_nop = amdgpu_ring_insert_nop,
+	.insert_nop = uvd_v6_0_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.begin_use = amdgpu_uvd_ring_begin_use,
 	.end_use = amdgpu_uvd_ring_end_use,
@@ -1552,7 +1575,7 @@
 	.get_wptr = uvd_v6_0_ring_get_wptr,
 	.set_wptr = uvd_v6_0_ring_set_wptr,
 	.emit_frame_size =
-		6 + 6 + /* hdp flush / invalidate */
+		6 + /* hdp invalidate */
 		10 + /* uvd_v6_0_ring_emit_pipeline_sync */
 		VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */
 		14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */
@@ -1561,6 +1584,7 @@
 	.emit_fence = uvd_v6_0_ring_emit_fence,
 	.emit_vm_flush = uvd_v6_0_ring_emit_vm_flush,
 	.emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync,
+	.emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
 	.test_ring = uvd_v6_0_ring_test_ring,
 	.test_ib = amdgpu_uvd_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
@@ -1600,10 +1624,10 @@
 static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
 {
 	if (adev->asic_type >= CHIP_POLARIS10) {
-		adev->uvd.ring.funcs = &uvd_v6_0_ring_vm_funcs;
+		adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_vm_funcs;
 		DRM_INFO("UVD is enabled in VM mode\n");
 	} else {
-		adev->uvd.ring.funcs = &uvd_v6_0_ring_phys_funcs;
+		adev->uvd.inst->ring.funcs = &uvd_v6_0_ring_phys_funcs;
 		DRM_INFO("UVD is enabled in physical mode\n");
 	}
 }
@@ -1613,7 +1637,7 @@
 	int i;
 
 	for (i = 0; i < adev->uvd.num_enc_rings; ++i)
-		adev->uvd.ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs;
+		adev->uvd.inst->ring_enc[i].funcs = &uvd_v6_0_enc_ring_vm_funcs;
 
 	DRM_INFO("UVD ENC is enabled in VM mode\n");
 }
@@ -1626,11 +1650,11 @@
 static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev)
 {
 	if (uvd_v6_0_enc_support(adev))
-		adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1;
+		adev->uvd.inst->irq.num_types = adev->uvd.num_enc_rings + 1;
 	else
-		adev->uvd.irq.num_types = 1;
+		adev->uvd.inst->irq.num_types = 1;
 
-	adev->uvd.irq.funcs = &uvd_v6_0_irq_funcs;
+	adev->uvd.inst->irq.funcs = &uvd_v6_0_irq_funcs;
 }
 
 const struct amdgpu_ip_block_version uvd_v6_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index eddc57f..57d32f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -40,6 +40,8 @@
 #include "mmhub/mmhub_1_0_offset.h"
 #include "mmhub/mmhub_1_0_sh_mask.h"
 
+#define UVD7_MAX_HW_INSTANCES_VEGA20			2
+
 static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev);
 static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -47,6 +49,11 @@
 static void uvd_v7_0_stop(struct amdgpu_device *adev);
 static int uvd_v7_0_sriov_start(struct amdgpu_device *adev);
 
+static int amdgpu_ih_clientid_uvds[] = {
+	SOC15_IH_CLIENTID_UVD,
+	SOC15_IH_CLIENTID_UVD1
+};
+
 /**
  * uvd_v7_0_ring_get_rptr - get read pointer
  *
@@ -58,7 +65,7 @@
 {
 	struct amdgpu_device *adev = ring->adev;
 
-	return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
+	return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR);
 }
 
 /**
@@ -72,10 +79,10 @@
 {
 	struct amdgpu_device *adev = ring->adev;
 
-	if (ring == &adev->uvd.ring_enc[0])
-		return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR);
+	if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+		return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR);
 	else
-		return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2);
+		return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2);
 }
 
 /**
@@ -89,7 +96,7 @@
 {
 	struct amdgpu_device *adev = ring->adev;
 
-	return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR);
+	return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR);
 }
 
 /**
@@ -106,10 +113,10 @@
 	if (ring->use_doorbell)
 		return adev->wb.wb[ring->wptr_offs];
 
-	if (ring == &adev->uvd.ring_enc[0])
-		return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
+	if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+		return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR);
 	else
-		return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
+		return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2);
 }
 
 /**
@@ -123,7 +130,7 @@
 {
 	struct amdgpu_device *adev = ring->adev;
 
-	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
+	WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
 }
 
 /**
@@ -144,11 +151,11 @@
 		return;
 	}
 
-	if (ring == &adev->uvd.ring_enc[0])
-		WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR,
+	if (ring == &adev->uvd.inst[ring->me].ring_enc[0])
+		WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR,
 			lower_32_bits(ring->wptr));
 	else
-		WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2,
+		WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2,
 			lower_32_bits(ring->wptr));
 }
 
@@ -170,8 +177,8 @@
 
 	r = amdgpu_ring_alloc(ring, 16);
 	if (r) {
-		DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n",
-			  ring->idx, r);
+		DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n",
+			  ring->me, ring->idx, r);
 		return r;
 	}
 	amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
@@ -184,11 +191,11 @@
 	}
 
 	if (i < adev->usec_timeout) {
-		DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
-			 ring->idx, i);
+		DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
+			 ring->me, ring->idx, i);
 	} else {
-		DRM_ERROR("amdgpu: ring %d test failed\n",
-			  ring->idx);
+		DRM_ERROR("amdgpu: (%d)ring %d test failed\n",
+			  ring->me, ring->idx);
 		r = -ETIMEDOUT;
 	}
 
@@ -342,24 +349,24 @@
 
 	r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+		DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r);
 		goto error;
 	}
 
 	r = uvd_v7_0_enc_get_destroy_msg(ring, 1, true, &fence);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+		DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r);
 		goto error;
 	}
 
 	r = dma_fence_wait_timeout(fence, false, timeout);
 	if (r == 0) {
-		DRM_ERROR("amdgpu: IB test timed out.\n");
+		DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me);
 		r = -ETIMEDOUT;
 	} else if (r < 0) {
-		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+		DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r);
 	} else {
-		DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+		DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx);
 		r = 0;
 	}
 error:
@@ -370,6 +377,10 @@
 static int uvd_v7_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	if (adev->asic_type == CHIP_VEGA20)
+		adev->uvd.num_uvd_inst = UVD7_MAX_HW_INSTANCES_VEGA20;
+	else
+		adev->uvd.num_uvd_inst = 1;
 
 	if (amdgpu_sriov_vf(adev))
 		adev->uvd.num_enc_rings = 1;
@@ -386,19 +397,21 @@
 {
 	struct amdgpu_ring *ring;
 	struct drm_sched_rq *rq;
-	int i, r;
+	int i, j, r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	/* UVD TRAP */
-	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.irq);
-	if (r)
-		return r;
-
-	/* UVD ENC TRAP */
-	for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
-		r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq);
+	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+		/* UVD TRAP */
+		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], 124, &adev->uvd.inst[j].irq);
 		if (r)
 			return r;
+
+		/* UVD ENC TRAP */
+		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+			r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_uvds[j], i + 119, &adev->uvd.inst[j].irq);
+			if (r)
+				return r;
+		}
 	}
 
 	r = amdgpu_uvd_sw_init(adev);
@@ -415,43 +428,48 @@
 		DRM_INFO("PSP loading UVD firmware\n");
 	}
 
-	ring = &adev->uvd.ring_enc[0];
-	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-	r = drm_sched_entity_init(&ring->sched, &adev->uvd.entity_enc,
-				  rq, amdgpu_sched_jobs, NULL);
-	if (r) {
-		DRM_ERROR("Failed setting up UVD ENC run queue.\n");
-		return r;
+	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+		ring = &adev->uvd.inst[j].ring_enc[0];
+		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+		r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity_enc,
+					  rq, NULL);
+		if (r) {
+			DRM_ERROR("(%d)Failed setting up UVD ENC run queue.\n", j);
+			return r;
+		}
 	}
 
 	r = amdgpu_uvd_resume(adev);
 	if (r)
 		return r;
-	if (!amdgpu_sriov_vf(adev)) {
-		ring = &adev->uvd.ring;
-		sprintf(ring->name, "uvd");
-		r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
-		if (r)
-			return r;
-	}
 
-	for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
-		ring = &adev->uvd.ring_enc[i];
-		sprintf(ring->name, "uvd_enc%d", i);
-		if (amdgpu_sriov_vf(adev)) {
-			ring->use_doorbell = true;
-
-			/* currently only use the first enconding ring for
-			 * sriov, so set unused location for other unused rings.
-			 */
-			if (i == 0)
-				ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
-			else
-				ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
+	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+		if (!amdgpu_sriov_vf(adev)) {
+			ring = &adev->uvd.inst[j].ring;
+			sprintf(ring->name, "uvd<%d>", j);
+			r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
+			if (r)
+				return r;
 		}
-		r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.irq, 0);
-		if (r)
-			return r;
+
+		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+			ring = &adev->uvd.inst[j].ring_enc[i];
+			sprintf(ring->name, "uvd_enc%d<%d>", i, j);
+			if (amdgpu_sriov_vf(adev)) {
+				ring->use_doorbell = true;
+
+				/* currently only use the first enconding ring for
+				 * sriov, so set unused location for other unused rings.
+				 */
+				if (i == 0)
+					ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2;
+				else
+					ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1;
+			}
+			r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
+			if (r)
+				return r;
+		}
 	}
 
 	r = amdgpu_virt_alloc_mm_table(adev);
@@ -463,7 +481,7 @@
 
 static int uvd_v7_0_sw_fini(void *handle)
 {
-	int i, r;
+	int i, j, r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	amdgpu_virt_free_mm_table(adev);
@@ -472,11 +490,12 @@
 	if (r)
 		return r;
 
-	drm_sched_entity_fini(&adev->uvd.ring_enc[0].sched, &adev->uvd.entity_enc);
+	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+		drm_sched_entity_fini(&adev->uvd.inst[j].ring_enc[0].sched, &adev->uvd.inst[j].entity_enc);
 
-	for (i = 0; i < adev->uvd.num_enc_rings; ++i)
-		amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
-
+		for (i = 0; i < adev->uvd.num_enc_rings; ++i)
+			amdgpu_ring_fini(&adev->uvd.inst[j].ring_enc[i]);
+	}
 	return amdgpu_uvd_sw_fini(adev);
 }
 
@@ -490,9 +509,9 @@
 static int uvd_v7_0_hw_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->uvd.ring;
+	struct amdgpu_ring *ring;
 	uint32_t tmp;
-	int i, r;
+	int i, j, r;
 
 	if (amdgpu_sriov_vf(adev))
 		r = uvd_v7_0_sriov_start(adev);
@@ -501,57 +520,60 @@
 	if (r)
 		goto done;
 
-	if (!amdgpu_sriov_vf(adev)) {
-		ring->ready = true;
-		r = amdgpu_ring_test_ring(ring);
-		if (r) {
-			ring->ready = false;
-			goto done;
+	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+		ring = &adev->uvd.inst[j].ring;
+
+		if (!amdgpu_sriov_vf(adev)) {
+			ring->ready = true;
+			r = amdgpu_ring_test_ring(ring);
+			if (r) {
+				ring->ready = false;
+				goto done;
+			}
+
+			r = amdgpu_ring_alloc(ring, 10);
+			if (r) {
+				DRM_ERROR("amdgpu: (%d)ring failed to lock UVD ring (%d).\n", j, r);
+				goto done;
+			}
+
+			tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+				mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
+			amdgpu_ring_write(ring, tmp);
+			amdgpu_ring_write(ring, 0xFFFFF);
+
+			tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+				mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
+			amdgpu_ring_write(ring, tmp);
+			amdgpu_ring_write(ring, 0xFFFFF);
+
+			tmp = PACKET0(SOC15_REG_OFFSET(UVD, j,
+				mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
+			amdgpu_ring_write(ring, tmp);
+			amdgpu_ring_write(ring, 0xFFFFF);
+
+			/* Clear timeout status bits */
+			amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
+				mmUVD_SEMA_TIMEOUT_STATUS), 0));
+			amdgpu_ring_write(ring, 0x8);
+
+			amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, j,
+				mmUVD_SEMA_CNTL), 0));
+			amdgpu_ring_write(ring, 3);
+
+			amdgpu_ring_commit(ring);
 		}
 
-		r = amdgpu_ring_alloc(ring, 10);
-		if (r) {
-			DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
-			goto done;
-		}
-
-		tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-			mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL), 0);
-		amdgpu_ring_write(ring, tmp);
-		amdgpu_ring_write(ring, 0xFFFFF);
-
-		tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-			mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL), 0);
-		amdgpu_ring_write(ring, tmp);
-		amdgpu_ring_write(ring, 0xFFFFF);
-
-		tmp = PACKET0(SOC15_REG_OFFSET(UVD, 0,
-			mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL), 0);
-		amdgpu_ring_write(ring, tmp);
-		amdgpu_ring_write(ring, 0xFFFFF);
-
-		/* Clear timeout status bits */
-		amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
-			mmUVD_SEMA_TIMEOUT_STATUS), 0));
-		amdgpu_ring_write(ring, 0x8);
-
-		amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0,
-			mmUVD_SEMA_CNTL), 0));
-		amdgpu_ring_write(ring, 3);
-
-		amdgpu_ring_commit(ring);
-	}
-
-	for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
-		ring = &adev->uvd.ring_enc[i];
-		ring->ready = true;
-		r = amdgpu_ring_test_ring(ring);
-		if (r) {
-			ring->ready = false;
-			goto done;
+		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+			ring = &adev->uvd.inst[j].ring_enc[i];
+			ring->ready = true;
+			r = amdgpu_ring_test_ring(ring);
+			if (r) {
+				ring->ready = false;
+				goto done;
+			}
 		}
 	}
-
 done:
 	if (!r)
 		DRM_INFO("UVD and UVD ENC initialized successfully.\n");
@@ -569,7 +591,7 @@
 static int uvd_v7_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring = &adev->uvd.ring;
+	int i;
 
 	if (!amdgpu_sriov_vf(adev))
 		uvd_v7_0_stop(adev);
@@ -578,7 +600,8 @@
 		DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
 	}
 
-	ring->ready = false;
+	for (i = 0; i < adev->uvd.num_uvd_inst; ++i)
+		adev->uvd.inst[i].ring.ready = false;
 
 	return 0;
 }
@@ -618,48 +641,51 @@
 {
 	uint32_t size = AMDGPU_UVD_FIRMWARE_SIZE(adev);
 	uint32_t offset;
+	int i;
 
-	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
-		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
-			lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
-		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
-			upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
-		offset = 0;
-	} else {
-		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
-			lower_32_bits(adev->uvd.gpu_addr));
-		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
-			upper_32_bits(adev->uvd.gpu_addr));
-		offset = size;
+	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+				lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+				upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+			offset = 0;
+		} else {
+			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+				lower_32_bits(adev->uvd.inst[i].gpu_addr));
+			WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+				upper_32_bits(adev->uvd.inst[i].gpu_addr));
+			offset = size;
+		}
+
+		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0,
+					AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size);
+
+		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
+				lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
+				upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
+		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
+
+		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
+				lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+		WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
+				upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
+		WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2,
+				AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+
+		WREG32_SOC15(UVD, i, mmUVD_UDEC_ADDR_CONFIG,
+				adev->gfx.config.gb_addr_config);
+		WREG32_SOC15(UVD, i, mmUVD_UDEC_DB_ADDR_CONFIG,
+				adev->gfx.config.gb_addr_config);
+		WREG32_SOC15(UVD, i, mmUVD_UDEC_DBW_ADDR_CONFIG,
+				adev->gfx.config.gb_addr_config);
+
+		WREG32_SOC15(UVD, i, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
 	}
-
-	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
-				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
-	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
-
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
-			lower_32_bits(adev->uvd.gpu_addr + offset));
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
-			upper_32_bits(adev->uvd.gpu_addr + offset));
-	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21));
-	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE);
-
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
-			lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
-			upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
-	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21));
-	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2,
-			AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
-
-	WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG,
-			adev->gfx.config.gb_addr_config);
-	WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG,
-			adev->gfx.config.gb_addr_config);
-	WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG,
-			adev->gfx.config.gb_addr_config);
-
-	WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles);
 }
 
 static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev,
@@ -669,6 +695,7 @@
 	uint64_t addr = table->gpu_addr;
 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)table->cpu_addr;
 	uint32_t size;
+	int i;
 
 	size = header->header_size + header->vce_table_size + header->uvd_table_size;
 
@@ -688,11 +715,12 @@
 	/* 4, set resp to zero */
 	WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0);
 
-	WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0);
-	adev->wb.wb[adev->uvd.ring_enc[0].wptr_offs] = 0;
-	adev->uvd.ring_enc[0].wptr = 0;
-	adev->uvd.ring_enc[0].wptr_old = 0;
-
+	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+		WDOORBELL32(adev->uvd.inst[i].ring_enc[0].doorbell_index, 0);
+		adev->wb.wb[adev->uvd.inst[i].ring_enc[0].wptr_offs] = 0;
+		adev->uvd.inst[i].ring_enc[0].wptr = 0;
+		adev->uvd.inst[i].ring_enc[0].wptr_old = 0;
+	}
 	/* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */
 	WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001);
 
@@ -725,6 +753,7 @@
 	struct mmsch_v1_0_cmd_end end = { {0} };
 	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
 	struct mmsch_v1_0_init_header *header = (struct mmsch_v1_0_init_header *)init_table;
+	uint8_t i = 0;
 
 	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
 	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
@@ -742,120 +771,121 @@
 
 		init_table += header->uvd_table_offset;
 
-		ring = &adev->uvd.ring;
-		ring->wptr = 0;
-		size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
+		for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+			ring = &adev->uvd.inst[i].ring;
+			ring->wptr = 0;
+			size = AMDGPU_GPU_PAGE_ALIGN(adev->uvd.fw->size + 4);
 
-		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
-						   0xFFFFFFFF, 0x00000004);
-		/* mc resume*/
-		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
-			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
-						    lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
-			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
-						    upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
-			offset = 0;
-		} else {
-			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
-						    lower_32_bits(adev->uvd.gpu_addr));
-			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
-						    upper_32_bits(adev->uvd.gpu_addr));
-			offset = size;
+			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+							   0xFFFFFFFF, 0x00000004);
+			/* mc resume*/
+			if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+							    lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+							    upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr));
+				offset = 0;
+			} else {
+				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+							    lower_32_bits(adev->uvd.inst[i].gpu_addr));
+				MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+							    upper_32_bits(adev->uvd.inst[i].gpu_addr));
+				offset = size;
+			}
+
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
+						    AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0), size);
+
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+						    lower_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+						    upper_32_bits(adev->uvd.inst[i].gpu_addr + offset));
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
+
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+						    lower_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+						    upper_32_bits(adev->uvd.inst[i].gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+						    AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
+
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
+			/* mc resume end*/
+
+			/* disable clock gating */
+			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_CGC_CTRL),
+							   ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
+
+			/* disable interupt */
+			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
+							   ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
+
+			/* stall UMC and register bus before resetting VCPU */
+			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+							   ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+							   UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+
+			/* put LMI, VCPU, RBC etc... into reset */
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
+						    (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+							       UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+							       UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+							       UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+							       UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+							       UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+							       UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+							       UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
+
+			/* initialize UVD memory controller */
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL),
+						    (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+							       UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+							       UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+							       UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+							       UVD_LMI_CTRL__REQ_MODE_MASK |
+							       0x00100000L));
+
+			/* take all subblocks out of reset, except VCPU */
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET),
+						    UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
+			/* enable VCPU clock */
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL),
+						    UVD_VCPU_CNTL__CLK_EN_MASK);
+
+			/* enable master interrupt */
+			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN),
+							   ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+							   (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+
+			/* clear the bit 4 of UVD_STATUS */
+			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+							   ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
+
+			/* force RBC into idle state */
+			size = order_base_2(ring->ring_size);
+			tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
+			tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+
+			ring = &adev->uvd.inst[i].ring_enc[0];
+			ring->wptr = 0;
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO), ring->gpu_addr);
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE), ring->ring_size / 4);
+
+			/* boot up the VCPU */
+			MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_SOFT_RESET), 0);
+
+			/* enable UMC */
+			MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+											   ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
+
+			MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0x02, 0x02);
 		}
-
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0),
-					    AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size);
-
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
-					    lower_32_bits(adev->uvd.gpu_addr + offset));
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
-					    upper_32_bits(adev->uvd.gpu_addr + offset));
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21));
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE);
-
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
-					    lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
-					    upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE));
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21));
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2),
-					    AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40));
-
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles);
-		/* mc resume end*/
-
-		/* disable clock gating */
-		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL),
-						   ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK, 0);
-
-		/* disable interupt */
-		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
-						   ~UVD_MASTINT_EN__VCPU_EN_MASK, 0);
-
-		/* stall UMC and register bus before resetting VCPU */
-		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
-						   ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
-						   UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-
-		/* put LMI, VCPU, RBC etc... into reset */
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
-					    (uint32_t)(UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
-						       UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
-						       UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
-						       UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
-						       UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
-						       UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
-						       UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
-						       UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK));
-
-		/* initialize UVD memory controller */
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL),
-					    (uint32_t)((0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
-						       UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
-						       UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
-						       UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
-						       UVD_LMI_CTRL__REQ_MODE_MASK |
-						       0x00100000L));
-
-		/* take all subblocks out of reset, except VCPU */
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
-					    UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-
-		/* enable VCPU clock */
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL),
-					    UVD_VCPU_CNTL__CLK_EN_MASK);
-
-		/* enable master interrupt */
-		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
-						   ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
-						   (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
-
-		/* clear the bit 4 of UVD_STATUS */
-		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS),
-						   ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT), 0);
-
-		/* force RBC into idle state */
-		size = order_base_2(ring->ring_size);
-		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, size);
-		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp);
-
-		ring = &adev->uvd.ring_enc[0];
-		ring->wptr = 0;
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr);
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr));
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4);
-
-		/* boot up the VCPU */
-		MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0);
-
-		/* enable UMC */
-		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
-										   ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, 0);
-
-		MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0x02, 0x02);
-
 		/* add end packet */
 		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
 		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
@@ -874,15 +904,17 @@
  */
 static int uvd_v7_0_start(struct amdgpu_device *adev)
 {
-	struct amdgpu_ring *ring = &adev->uvd.ring;
+	struct amdgpu_ring *ring;
 	uint32_t rb_bufsz, tmp;
 	uint32_t lmi_swap_cntl;
 	uint32_t mp_swap_cntl;
-	int i, j, r;
+	int i, j, k, r;
 
-	/* disable DPG */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
-			~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+	for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
+		/* disable DPG */
+		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_POWER_STATUS), 0,
+				~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+	}
 
 	/* disable byte swapping */
 	lmi_swap_cntl = 0;
@@ -890,157 +922,159 @@
 
 	uvd_v7_0_mc_resume(adev);
 
-	/* disable clock gating */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), 0,
-			~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
+	for (k = 0; k < adev->uvd.num_uvd_inst; ++k) {
+		ring = &adev->uvd.inst[k].ring;
+		/* disable clock gating */
+		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_CGC_CTRL), 0,
+				~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK);
 
-	/* disable interupt */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
-			~UVD_MASTINT_EN__VCPU_EN_MASK);
+		/* disable interupt */
+		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN), 0,
+				~UVD_MASTINT_EN__VCPU_EN_MASK);
 
-	/* stall UMC and register bus before resetting VCPU */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
-			UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
-			~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-	mdelay(1);
+		/* stall UMC and register bus before resetting VCPU */
+		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2),
+				UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+				~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+		mdelay(1);
 
-	/* put LMI, VCPU, RBC etc... into reset */
-	WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
-		UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
-	mdelay(5);
+		/* put LMI, VCPU, RBC etc... into reset */
+		WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
+			UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+			UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+			UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+			UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+			UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+			UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+			UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+			UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+		mdelay(5);
 
-	/* initialize UVD memory controller */
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL,
-		(0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
-		UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
-		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
-		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
-		UVD_LMI_CTRL__REQ_MODE_MASK |
-		0x00100000L);
+		/* initialize UVD memory controller */
+		WREG32_SOC15(UVD, k, mmUVD_LMI_CTRL,
+			(0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+			UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+			UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+			UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+			UVD_LMI_CTRL__REQ_MODE_MASK |
+			0x00100000L);
 
 #ifdef __BIG_ENDIAN
-	/* swap (8 in 32) RB and IB */
-	lmi_swap_cntl = 0xa;
-	mp_swap_cntl = 0;
+		/* swap (8 in 32) RB and IB */
+		lmi_swap_cntl = 0xa;
+		mp_swap_cntl = 0;
 #endif
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
-	WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+		WREG32_SOC15(UVD, k, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+		WREG32_SOC15(UVD, k, mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
 
-	WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040);
-	WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0);
-	WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040);
-	WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0);
-	WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0);
-	WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88);
+		WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA0, 0x40c2040);
+		WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXA1, 0x0);
+		WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB0, 0x40c2040);
+		WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUXB1, 0x0);
+		WREG32_SOC15(UVD, k, mmUVD_MPC_SET_ALU, 0);
+		WREG32_SOC15(UVD, k, mmUVD_MPC_SET_MUX, 0x88);
 
-	/* take all subblocks out of reset, except VCPU */
-	WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
-			UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-	mdelay(5);
+		/* take all subblocks out of reset, except VCPU */
+		WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET,
+				UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+		mdelay(5);
 
-	/* enable VCPU clock */
-	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL,
-			UVD_VCPU_CNTL__CLK_EN_MASK);
+		/* enable VCPU clock */
+		WREG32_SOC15(UVD, k, mmUVD_VCPU_CNTL,
+				UVD_VCPU_CNTL__CLK_EN_MASK);
 
-	/* enable UMC */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
-			~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+		/* enable UMC */
+		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_LMI_CTRL2), 0,
+				~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 
-	/* boot up the VCPU */
-	WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0);
-	mdelay(10);
+		/* boot up the VCPU */
+		WREG32_SOC15(UVD, k, mmUVD_SOFT_RESET, 0);
+		mdelay(10);
 
-	for (i = 0; i < 10; ++i) {
-		uint32_t status;
+		for (i = 0; i < 10; ++i) {
+			uint32_t status;
 
-		for (j = 0; j < 100; ++j) {
-			status = RREG32_SOC15(UVD, 0, mmUVD_STATUS);
+			for (j = 0; j < 100; ++j) {
+				status = RREG32_SOC15(UVD, k, mmUVD_STATUS);
+				if (status & 2)
+					break;
+				mdelay(10);
+			}
+			r = 0;
 			if (status & 2)
 				break;
+
+			DRM_ERROR("UVD(%d) not responding, trying to reset the VCPU!!!\n", k);
+			WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET),
+					UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+					~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
 			mdelay(10);
+			WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_SOFT_RESET), 0,
+					~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+			mdelay(10);
+			r = -1;
 		}
-		r = 0;
-		if (status & 2)
-			break;
 
-		DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
-		WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
-				UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
-				~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-		mdelay(10);
-		WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0,
-				~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-		mdelay(10);
-		r = -1;
+		if (r) {
+			DRM_ERROR("UVD(%d) not responding, giving up!!!\n", k);
+			return r;
+		}
+		/* enable master interrupt */
+		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_MASTINT_EN),
+			(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+			~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
+
+		/* clear the bit 4 of UVD_STATUS */
+		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_STATUS), 0,
+				~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
+
+		/* force RBC into idle state */
+		rb_bufsz = order_base_2(ring->ring_size);
+		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+		WREG32_SOC15(UVD, k, mmUVD_RBC_RB_CNTL, tmp);
+
+		/* set the write pointer delay */
+		WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+		/* set the wb address */
+		WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR_ADDR,
+				(upper_32_bits(ring->gpu_addr) >> 2));
+
+		/* programm the RB_BASE for ring buffer */
+		WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+				lower_32_bits(ring->gpu_addr));
+		WREG32_SOC15(UVD, k, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+				upper_32_bits(ring->gpu_addr));
+
+		/* Initialize the ring buffer's read and write pointers */
+		WREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR, 0);
+
+		ring->wptr = RREG32_SOC15(UVD, k, mmUVD_RBC_RB_RPTR);
+		WREG32_SOC15(UVD, k, mmUVD_RBC_RB_WPTR,
+				lower_32_bits(ring->wptr));
+
+		WREG32_P(SOC15_REG_OFFSET(UVD, k, mmUVD_RBC_RB_CNTL), 0,
+				~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
+
+		ring = &adev->uvd.inst[k].ring_enc[0];
+		WREG32_SOC15(UVD, k, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+		WREG32_SOC15(UVD, k, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+		WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO, ring->gpu_addr);
+		WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+		WREG32_SOC15(UVD, k, mmUVD_RB_SIZE, ring->ring_size / 4);
+
+		ring = &adev->uvd.inst[k].ring_enc[1];
+		WREG32_SOC15(UVD, k, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+		WREG32_SOC15(UVD, k, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+		WREG32_SOC15(UVD, k, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+		WREG32_SOC15(UVD, k, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+		WREG32_SOC15(UVD, k, mmUVD_RB_SIZE2, ring->ring_size / 4);
 	}
-
-	if (r) {
-		DRM_ERROR("UVD not responding, giving up!!!\n");
-		return r;
-	}
-	/* enable master interrupt */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN),
-		(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
-		~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
-
-	/* clear the bit 4 of UVD_STATUS */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0,
-			~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
-
-	/* force RBC into idle state */
-	rb_bufsz = order_base_2(ring->ring_size);
-	tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
-	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
-	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
-	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0);
-	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
-	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
-	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp);
-
-	/* set the write pointer delay */
-	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0);
-
-	/* set the wb address */
-	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR,
-			(upper_32_bits(ring->gpu_addr) >> 2));
-
-	/* programm the RB_BASE for ring buffer */
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
-			lower_32_bits(ring->gpu_addr));
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
-			upper_32_bits(ring->gpu_addr));
-
-	/* Initialize the ring buffer's read and write pointers */
-	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0);
-
-	ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR);
-	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
-			lower_32_bits(ring->wptr));
-
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
-			~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
-
-	ring = &adev->uvd.ring_enc[0];
-	WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
-	WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
-	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr);
-	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
-	WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4);
-
-	ring = &adev->uvd.ring_enc[1];
-	WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
-	WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
-	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr);
-	WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
-	WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4);
-
 	return 0;
 }
 
@@ -1053,26 +1087,30 @@
  */
 static void uvd_v7_0_stop(struct amdgpu_device *adev)
 {
-	/* force RBC into idle state */
-	WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101);
+	uint8_t i = 0;
 
-	/* Stall UMC and register bus before resetting VCPU */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
-			UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
-			~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
-	mdelay(1);
+	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+		/* force RBC into idle state */
+		WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, 0x11010101);
 
-	/* put VCPU into reset */
-	WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET,
-			UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-	mdelay(5);
+		/* Stall UMC and register bus before resetting VCPU */
+		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2),
+				UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+				~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+		mdelay(1);
 
-	/* disable VCPU clock */
-	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0);
+		/* put VCPU into reset */
+		WREG32_SOC15(UVD, i, mmUVD_SOFT_RESET,
+				UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+		mdelay(5);
 
-	/* Unstall UMC and register bus */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
-			~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+		/* disable VCPU clock */
+		WREG32_SOC15(UVD, i, mmUVD_VCPU_CNTL, 0x0);
+
+		/* Unstall UMC and register bus */
+		WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0,
+				~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
+	}
 }
 
 /**
@@ -1091,26 +1129,26 @@
 	WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
 
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
 	amdgpu_ring_write(ring, seq);
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
 	amdgpu_ring_write(ring, addr & 0xffffffff);
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff);
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
 	amdgpu_ring_write(ring, 0);
 
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
 	amdgpu_ring_write(ring, 2);
 }
 
@@ -1136,6 +1174,16 @@
 }
 
 /**
+ * uvd_v7_0_ring_emit_hdp_flush - skip HDP flushing
+ *
+ * @ring: amdgpu_ring pointer
+ */
+static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	/* The firmware doesn't seem to like touching registers at this point. */
+}
+
+/**
  * uvd_v7_0_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -1149,30 +1197,30 @@
 	unsigned i;
 	int r;
 
-	WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
+	WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD);
 	r = amdgpu_ring_alloc(ring, 3);
 	if (r) {
-		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
-			  ring->idx, r);
+		DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n",
+			  ring->me, ring->idx, r);
 		return r;
 	}
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0));
 	amdgpu_ring_write(ring, 0xDEADBEEF);
 	amdgpu_ring_commit(ring);
 	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID);
+		tmp = RREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID);
 		if (tmp == 0xDEADBEEF)
 			break;
 		DRM_UDELAY(1);
 	}
 
 	if (i < adev->usec_timeout) {
-		DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
-			 ring->idx, i);
+		DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n",
+			 ring->me, ring->idx, i);
 	} else {
-		DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
-			  ring->idx, tmp);
+		DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n",
+			  ring->me, ring->idx, tmp);
 		r = -EINVAL;
 	}
 	return r;
@@ -1193,17 +1241,17 @@
 	struct amdgpu_device *adev = ring->adev;
 
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0));
 	amdgpu_ring_write(ring, vmid);
 
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0));
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0));
 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_RBC_IB_SIZE), 0));
 	amdgpu_ring_write(ring, ib->length_dw);
 }
 
@@ -1231,13 +1279,13 @@
 	struct amdgpu_device *adev = ring->adev;
 
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
 	amdgpu_ring_write(ring, reg << 2);
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
 	amdgpu_ring_write(ring, val);
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
 	amdgpu_ring_write(ring, 8);
 }
 
@@ -1247,16 +1295,16 @@
 	struct amdgpu_device *adev = ring->adev;
 
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA0), 0));
 	amdgpu_ring_write(ring, reg << 2);
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_DATA1), 0));
 	amdgpu_ring_write(ring, val);
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GP_SCRATCH8), 0));
 	amdgpu_ring_write(ring, mask);
 	amdgpu_ring_write(ring,
-		PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+		PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_GPCOM_VCPU_CMD), 0));
 	amdgpu_ring_write(ring, 12);
 }
 
@@ -1277,12 +1325,15 @@
 
 static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 {
-	int i;
 	struct amdgpu_device *adev = ring->adev;
+	int i;
 
-	for (i = 0; i < count; i++)
-		amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
+	WARN_ON(ring->wptr % 2 || count % 2);
 
+	for (i = 0; i < count / 2; i++) {
+		amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_NO_OP), 0));
+		amdgpu_ring_write(ring, 0);
+	}
 }
 
 static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
@@ -1349,16 +1400,16 @@
 
 	if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) ||
 	    REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) ||
-	    (RREG32_SOC15(UVD, 0, mmUVD_STATUS) &
+	    (RREG32_SOC15(UVD, ring->me, mmUVD_STATUS) &
 		    AMDGPU_UVD_STATUS_BUSY_MASK))
 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
 				SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
 
 	if (srbm_soft_reset) {
-		adev->uvd.srbm_soft_reset = srbm_soft_reset;
+		adev->uvd.inst[ring->me].srbm_soft_reset = srbm_soft_reset;
 		return true;
 	} else {
-		adev->uvd.srbm_soft_reset = 0;
+		adev->uvd.inst[ring->me].srbm_soft_reset = 0;
 		return false;
 	}
 }
@@ -1367,7 +1418,7 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (!adev->uvd.srbm_soft_reset)
+	if (!adev->uvd.inst[ring->me].srbm_soft_reset)
 		return 0;
 
 	uvd_v7_0_stop(adev);
@@ -1379,9 +1430,9 @@
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	u32 srbm_soft_reset;
 
-	if (!adev->uvd.srbm_soft_reset)
+	if (!adev->uvd.inst[ring->me].srbm_soft_reset)
 		return 0;
-	srbm_soft_reset = adev->uvd.srbm_soft_reset;
+	srbm_soft_reset = adev->uvd.inst[ring->me].srbm_soft_reset;
 
 	if (srbm_soft_reset) {
 		u32 tmp;
@@ -1409,7 +1460,7 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (!adev->uvd.srbm_soft_reset)
+	if (!adev->uvd.inst[ring->me].srbm_soft_reset)
 		return 0;
 
 	mdelay(5);
@@ -1431,17 +1482,32 @@
 				      struct amdgpu_irq_src *source,
 				      struct amdgpu_iv_entry *entry)
 {
+	uint32_t ip_instance;
+
+	switch (entry->client_id) {
+	case SOC15_IH_CLIENTID_UVD:
+		ip_instance = 0;
+		break;
+	case SOC15_IH_CLIENTID_UVD1:
+		ip_instance = 1;
+		break;
+	default:
+		DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
+		return 0;
+	}
+
 	DRM_DEBUG("IH: UVD TRAP\n");
+
 	switch (entry->src_id) {
 	case 124:
-		amdgpu_fence_process(&adev->uvd.ring);
+		amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring);
 		break;
 	case 119:
-		amdgpu_fence_process(&adev->uvd.ring_enc[0]);
+		amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[0]);
 		break;
 	case 120:
 		if (!amdgpu_sriov_vf(adev))
-			amdgpu_fence_process(&adev->uvd.ring_enc[1]);
+			amdgpu_fence_process(&adev->uvd.inst[ip_instance].ring_enc[1]);
 		break;
 	default:
 		DRM_ERROR("Unhandled interrupt: %d %d\n",
@@ -1457,9 +1523,9 @@
 {
 	uint32_t data, data1, data2, suvd_flags;
 
-	data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL);
-	data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE);
-	data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL);
+	data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL);
+	data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
+	data2 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL);
 
 	data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK |
 		  UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK);
@@ -1503,18 +1569,18 @@
 			UVD_SUVD_CGC_CTRL__SDB_MODE_MASK);
 	data1 |= suvd_flags;
 
-	WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data);
-	WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0);
-	WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1);
-	WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2);
+	WREG32_SOC15(UVD, ring->me, mmUVD_CGC_CTRL, data);
+	WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, 0);
+	WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
+	WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_CTRL, data2);
 }
 
 static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev)
 {
 	uint32_t data, data1, cgc_flags, suvd_flags;
 
-	data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE);
-	data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE);
+	data = RREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE);
+	data1 = RREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE);
 
 	cgc_flags = UVD_CGC_GATE__SYS_MASK |
 		UVD_CGC_GATE__UDEC_MASK |
@@ -1546,8 +1612,8 @@
 	data |= cgc_flags;
 	data1 |= suvd_flags;
 
-	WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data);
-	WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1);
+	WREG32_SOC15(UVD, ring->me, mmUVD_CGC_GATE, data);
+	WREG32_SOC15(UVD, ring->me, mmUVD_SUVD_CGC_GATE, data1);
 }
 
 static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
@@ -1606,7 +1672,7 @@
 	if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
 		return 0;
 
-	WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
+	WREG32_SOC15(UVD, ring->me, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
 
 	if (state == AMD_PG_STATE_GATE) {
 		uvd_v7_0_stop(adev);
@@ -1647,14 +1713,13 @@
 static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
 	.type = AMDGPU_RING_TYPE_UVD,
 	.align_mask = 0xf,
-	.nop = PACKET0(0x81ff, 0),
 	.support_64bit_ptrs = false,
 	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = uvd_v7_0_ring_get_rptr,
 	.get_wptr = uvd_v7_0_ring_get_wptr,
 	.set_wptr = uvd_v7_0_ring_set_wptr,
 	.emit_frame_size =
-		6 + 6 + /* hdp flush / invalidate */
+		6 + /* hdp invalidate */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
 		8 + /* uvd_v7_0_ring_emit_vm_flush */
@@ -1663,6 +1728,7 @@
 	.emit_ib = uvd_v7_0_ring_emit_ib,
 	.emit_fence = uvd_v7_0_ring_emit_fence,
 	.emit_vm_flush = uvd_v7_0_ring_emit_vm_flush,
+	.emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush,
 	.test_ring = uvd_v7_0_ring_test_ring,
 	.test_ib = amdgpu_uvd_ring_test_ib,
 	.insert_nop = uvd_v7_0_ring_insert_nop,
@@ -1671,6 +1737,7 @@
 	.end_use = amdgpu_uvd_ring_end_use,
 	.emit_wreg = uvd_v7_0_ring_emit_wreg,
 	.emit_reg_wait = uvd_v7_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
 };
 
 static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
@@ -1702,22 +1769,32 @@
 	.end_use = amdgpu_uvd_ring_end_use,
 	.emit_wreg = uvd_v7_0_enc_ring_emit_wreg,
 	.emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
 };
 
 static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
 {
-	adev->uvd.ring.funcs = &uvd_v7_0_ring_vm_funcs;
-	DRM_INFO("UVD is enabled in VM mode\n");
+	int i;
+
+	for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+		adev->uvd.inst[i].ring.funcs = &uvd_v7_0_ring_vm_funcs;
+		adev->uvd.inst[i].ring.me = i;
+		DRM_INFO("UVD(%d) is enabled in VM mode\n", i);
+	}
 }
 
 static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev)
 {
-	int i;
+	int i, j;
 
-	for (i = 0; i < adev->uvd.num_enc_rings; ++i)
-		adev->uvd.ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
+	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
+		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
+			adev->uvd.inst[j].ring_enc[i].funcs = &uvd_v7_0_enc_ring_vm_funcs;
+			adev->uvd.inst[j].ring_enc[i].me = j;
+		}
 
-	DRM_INFO("UVD ENC is enabled in VM mode\n");
+		DRM_INFO("UVD(%d) ENC is enabled in VM mode\n", j);
+	}
 }
 
 static const struct amdgpu_irq_src_funcs uvd_v7_0_irq_funcs = {
@@ -1727,8 +1804,12 @@
 
 static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev)
 {
-	adev->uvd.irq.num_types = adev->uvd.num_enc_rings + 1;
-	adev->uvd.irq.funcs = &uvd_v7_0_irq_funcs;
+	int i;
+
+	for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+		adev->uvd.inst[i].irq.num_types = adev->uvd.num_enc_rings + 1;
+		adev->uvd.inst[i].irq.funcs = &uvd_v7_0_irq_funcs;
+	}
 }
 
 const struct amdgpu_ip_block_version uvd_v7_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 428d192..0999c84 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -388,7 +388,8 @@
 	default:
 		if ((adev->asic_type == CHIP_POLARIS10) ||
 		    (adev->asic_type == CHIP_POLARIS11) ||
-		    (adev->asic_type == CHIP_POLARIS12))
+		    (adev->asic_type == CHIP_POLARIS12) ||
+		    (adev->asic_type == CHIP_VEGAM))
 			return AMDGPU_VCE_HARVEST_VCE1;
 
 		return 0;
@@ -467,8 +468,8 @@
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	vce_v3_0_override_vce_clock_gating(adev, true);
-	if (!(adev->flags & AMD_IS_APU))
-		amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
+
+	amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
 
 	for (i = 0; i < adev->vce.num_rings; i++)
 		adev->vce.ring[i].ready = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 73fd48d..8fd1b74 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -1081,6 +1081,7 @@
 	.end_use = amdgpu_vce_ring_end_use,
 	.emit_wreg = vce_v4_0_emit_wreg,
 	.emit_reg_wait = vce_v4_0_emit_reg_wait,
+	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
 };
 
 static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 8c13267..110b294 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -35,7 +35,6 @@
 #include "mmhub/mmhub_9_1_offset.h"
 #include "mmhub/mmhub_9_1_sh_mask.h"
 
-static int vcn_v1_0_start(struct amdgpu_device *adev);
 static int vcn_v1_0_stop(struct amdgpu_device *adev);
 static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
@@ -146,10 +145,6 @@
 	struct amdgpu_ring *ring = &adev->vcn.ring_dec;
 	int i, r;
 
-	r = vcn_v1_0_start(adev);
-	if (r)
-		goto done;
-
 	ring->ready = true;
 	r = amdgpu_ring_test_ring(ring);
 	if (r) {
@@ -185,11 +180,9 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_ring *ring = &adev->vcn.ring_dec;
-	int r;
 
-	r = vcn_v1_0_stop(adev);
-	if (r)
-		return r;
+	if (RREG32_SOC15(VCN, 0, mmUVD_STATUS))
+		vcn_v1_0_stop(adev);
 
 	ring->ready = false;
 
@@ -288,14 +281,14 @@
  *
  * Disable clock gating for VCN block
  */
-static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw)
+static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev)
 {
 	uint32_t data;
 
 	/* JPEG disable CGC */
 	data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
 
-	if (sw)
+	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
 		data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
 	else
 		data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK;
@@ -310,7 +303,7 @@
 
 	/* UVD disable CGC */
 	data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
-	if (sw)
+	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
 		data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
 	else
 		data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
@@ -415,13 +408,13 @@
  *
  * Enable clock gating for VCN block
  */
-static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw)
+static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev)
 {
 	uint32_t data = 0;
 
 	/* enable JPEG CGC */
 	data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL);
-	if (sw)
+	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
 		data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
 	else
 		data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
@@ -435,7 +428,7 @@
 
 	/* enable UVD CGC */
 	data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL);
-	if (sw)
+	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
 		data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
 	else
 		data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
@@ -480,6 +473,94 @@
 	WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data);
 }
 
+static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev)
+{
+	uint32_t data = 0;
+	int ret;
+
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+		data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+			| 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
+
+		WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+		SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, UVD_PGFSM_STATUS__UVDM_UVDU_PWR_ON, 0xFFFFFF, ret);
+	} else {
+		data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+			| 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+			| 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+			| 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+			| 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+			| 1 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+			| 1 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+			| 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+			| 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+			| 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+			| 1 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
+		WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+		SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, 0,  0xFFFFFFFF, ret);
+	}
+
+	/* polling UVD_PGFSM_STATUS to confirm UVDM_PWR_STATUS , UVDU_PWR_STATUS are 0 (power on) */
+
+	data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
+	data &= ~0x103;
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
+		data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | UVD_POWER_STATUS__UVD_PG_EN_MASK;
+
+	WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
+}
+
+static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev)
+{
+	uint32_t data = 0;
+	int ret;
+
+	if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
+		/* Before power off, this indicator has to be turned on */
+		data = RREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS);
+		data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
+		data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
+		WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data);
+
+
+		data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDIL_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDIR_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
+			| 2 << UVD_PGFSM_CONFIG__UVDW_PWR_CONFIG__SHIFT);
+
+		WREG32_SOC15(VCN, 0, mmUVD_PGFSM_CONFIG, data);
+
+		data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
+			| 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
+			| 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
+			| 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
+			| 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
+			| 2 << UVD_PGFSM_STATUS__UVDIL_PWR_STATUS__SHIFT
+			| 2 << UVD_PGFSM_STATUS__UVDIR_PWR_STATUS__SHIFT
+			| 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
+			| 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
+			| 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
+			| 2 << UVD_PGFSM_STATUS__UVDW_PWR_STATUS__SHIFT);
+		SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_PGFSM_STATUS, data, 0xFFFFFFFF, ret);
+	}
+}
+
 /**
  * vcn_v1_0_start - start VCN block
  *
@@ -499,8 +580,9 @@
 
 	vcn_v1_0_mc_resume(adev);
 
+	vcn_1_0_disable_static_power_gating(adev);
 	/* disable clock gating */
-	vcn_v1_0_disable_clock_gating(adev, true);
+	vcn_v1_0_disable_clock_gating(adev);
 
 	/* disable interupt */
 	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0,
@@ -680,16 +762,45 @@
 	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0,
 			~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 
-	/* enable clock gating */
-	vcn_v1_0_enable_clock_gating(adev, true);
+	WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0);
 
+	vcn_v1_0_enable_clock_gating(adev);
+	vcn_1_0_enable_static_power_gating(adev);
 	return 0;
 }
 
+bool vcn_v1_0_is_idle(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == 0x2);
+}
+
+int vcn_v1_0_wait_for_idle(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int ret = 0;
+
+	SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, 0x2, 0x2, ret);
+
+	return ret;
+}
+
 static int vcn_v1_0_set_clockgating_state(void *handle,
 					  enum amd_clockgating_state state)
 {
-	/* needed for driver unload*/
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+
+	if (enable) {
+		/* wait for STATUS to clear */
+		if (vcn_v1_0_is_idle(handle))
+			return -EBUSY;
+		vcn_v1_0_enable_clock_gating(adev);
+	} else {
+		/* disable HW gating and enable Sw gating */
+		vcn_v1_0_disable_clock_gating(adev);
+	}
 	return 0;
 }
 
@@ -1048,16 +1159,36 @@
 	return 0;
 }
 
-static void vcn_v1_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 {
-	int i;
 	struct amdgpu_device *adev = ring->adev;
+	int i;
 
-	for (i = 0; i < count; i++)
+	WARN_ON(ring->wptr % 2 || count % 2);
+
+	for (i = 0; i < count / 2; i++) {
 		amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0));
-
+		amdgpu_ring_write(ring, 0);
+	}
 }
 
+static int vcn_v1_0_set_powergating_state(void *handle,
+					  enum amd_powergating_state state)
+{
+	/* This doesn't actually powergate the VCN block.
+	 * That's done in the dpm code via the SMC.  This
+	 * just re-inits the block as necessary.  The actual
+	 * gating still happens in the dpm code.  We should
+	 * revisit this when there is a cleaner line between
+	 * the smc and the hw blocks
+	 */
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (state == AMD_PG_STATE_GATE)
+		return vcn_v1_0_stop(adev);
+	else
+		return vcn_v1_0_start(adev);
+}
 
 static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
 	.name = "vcn_v1_0",
@@ -1069,20 +1200,19 @@
 	.hw_fini = vcn_v1_0_hw_fini,
 	.suspend = vcn_v1_0_suspend,
 	.resume = vcn_v1_0_resume,
-	.is_idle = NULL /* vcn_v1_0_is_idle */,
-	.wait_for_idle = NULL /* vcn_v1_0_wait_for_idle */,
+	.is_idle = vcn_v1_0_is_idle,
+	.wait_for_idle = vcn_v1_0_wait_for_idle,
 	.check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */,
 	.pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */,
 	.soft_reset = NULL /* vcn_v1_0_soft_reset */,
 	.post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */,
 	.set_clockgating_state = vcn_v1_0_set_clockgating_state,
-	.set_powergating_state = NULL /* vcn_v1_0_set_powergating_state */,
+	.set_powergating_state = vcn_v1_0_set_powergating_state,
 };
 
 static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
 	.type = AMDGPU_RING_TYPE_VCN_DEC,
 	.align_mask = 0xf,
-	.nop = PACKET0(0x81ff, 0),
 	.support_64bit_ptrs = false,
 	.vmhub = AMDGPU_MMHUB,
 	.get_rptr = vcn_v1_0_dec_ring_get_rptr,
@@ -1101,7 +1231,7 @@
 	.emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush,
 	.test_ring = amdgpu_vcn_dec_ring_test_ring,
 	.test_ib = amdgpu_vcn_dec_ring_test_ib,
-	.insert_nop = vcn_v1_0_ring_insert_nop,
+	.insert_nop = vcn_v1_0_dec_ring_insert_nop,
 	.insert_start = vcn_v1_0_dec_ring_insert_start,
 	.insert_end = vcn_v1_0_dec_ring_insert_end,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
@@ -1109,6 +1239,7 @@
 	.end_use = amdgpu_vcn_ring_end_use,
 	.emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
 	.emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
 };
 
 static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
@@ -1139,6 +1270,7 @@
 	.end_use = amdgpu_vcn_ring_end_use,
 	.emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
 	.emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
 };
 
 static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
new file mode 100644
index 0000000..52778de
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "soc15.h"
+
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "vega20_ip_offset.h"
+
+int vega20_reg_base_init(struct amdgpu_device *adev)
+{
+	/* HW has more IP blocks,  only initialized the blocke beend by our driver  */
+	uint32_t i;
+	for (i = 0 ; i < MAX_INSTANCE ; ++i) {
+		adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i]));
+		adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i]));
+		adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i]));
+		adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
+		adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+		adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+		adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
+		adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i]));
+		adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
+		adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DCE_BASE.instance[i]));
+		adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i]));
+		adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
+		adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
+		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+	}
+	return 0;
+}
+
+
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 126f127..4ac1288 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -305,9 +305,10 @@
 							stoney_mgcg_cgcg_init,
 							ARRAY_SIZE(stoney_mgcg_cgcg_init));
 		break;
-	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 	default:
 		break;
 	}
@@ -728,33 +729,59 @@
 		return r;
 
 	tmp = RREG32_SMC(cntl_reg);
-	tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK |
-		CG_DCLK_CNTL__DCLK_DIVIDER_MASK);
+
+	if (adev->flags & AMD_IS_APU)
+		tmp &= ~CG_DCLK_CNTL__DCLK_DIVIDER_MASK;
+	else
+		tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK |
+				CG_DCLK_CNTL__DCLK_DIVIDER_MASK);
 	tmp |= dividers.post_divider;
 	WREG32_SMC(cntl_reg, tmp);
 
 	for (i = 0; i < 100; i++) {
-		if (RREG32_SMC(status_reg) & CG_DCLK_STATUS__DCLK_STATUS_MASK)
-			break;
+		tmp = RREG32_SMC(status_reg);
+		if (adev->flags & AMD_IS_APU) {
+			if (tmp & 0x10000)
+				break;
+		} else {
+			if (tmp & CG_DCLK_STATUS__DCLK_STATUS_MASK)
+				break;
+		}
 		mdelay(10);
 	}
 	if (i == 100)
 		return -ETIMEDOUT;
-
 	return 0;
 }
 
+#define ixGNB_CLK1_DFS_CNTL 0xD82200F0
+#define ixGNB_CLK1_STATUS   0xD822010C
+#define ixGNB_CLK2_DFS_CNTL 0xD8220110
+#define ixGNB_CLK2_STATUS   0xD822012C
+#define ixGNB_CLK3_DFS_CNTL 0xD8220130
+#define ixGNB_CLK3_STATUS   0xD822014C
+
 static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
 {
 	int r;
 
-	r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
-	if (r)
-		return r;
+	if (adev->flags & AMD_IS_APU) {
+		r = vi_set_uvd_clock(adev, vclk, ixGNB_CLK2_DFS_CNTL, ixGNB_CLK2_STATUS);
+		if (r)
+			return r;
 
-	r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
-	if (r)
-		return r;
+		r = vi_set_uvd_clock(adev, dclk, ixGNB_CLK1_DFS_CNTL, ixGNB_CLK1_STATUS);
+		if (r)
+			return r;
+	} else {
+		r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
+		if (r)
+			return r;
+
+		r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
+		if (r)
+			return r;
+	}
 
 	return 0;
 }
@@ -764,6 +791,22 @@
 	int r, i;
 	struct atom_clock_dividers dividers;
 	u32 tmp;
+	u32 reg_ctrl;
+	u32 reg_status;
+	u32 status_mask;
+	u32 reg_mask;
+
+	if (adev->flags & AMD_IS_APU) {
+		reg_ctrl = ixGNB_CLK3_DFS_CNTL;
+		reg_status = ixGNB_CLK3_STATUS;
+		status_mask = 0x00010000;
+		reg_mask = CG_ECLK_CNTL__ECLK_DIVIDER_MASK;
+	} else {
+		reg_ctrl = ixCG_ECLK_CNTL;
+		reg_status = ixCG_ECLK_STATUS;
+		status_mask = CG_ECLK_STATUS__ECLK_STATUS_MASK;
+		reg_mask = CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | CG_ECLK_CNTL__ECLK_DIVIDER_MASK;
+	}
 
 	r = amdgpu_atombios_get_clock_dividers(adev,
 					       COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
@@ -772,24 +815,25 @@
 		return r;
 
 	for (i = 0; i < 100; i++) {
-		if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK)
+		if (RREG32_SMC(reg_status) & status_mask)
 			break;
 		mdelay(10);
 	}
+
 	if (i == 100)
 		return -ETIMEDOUT;
 
-	tmp = RREG32_SMC(ixCG_ECLK_CNTL);
-	tmp &= ~(CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK |
-		CG_ECLK_CNTL__ECLK_DIVIDER_MASK);
+	tmp = RREG32_SMC(reg_ctrl);
+	tmp &= ~reg_mask;
 	tmp |= dividers.post_divider;
-	WREG32_SMC(ixCG_ECLK_CNTL, tmp);
+	WREG32_SMC(reg_ctrl, tmp);
 
 	for (i = 0; i < 100; i++) {
-		if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK)
+		if (RREG32_SMC(reg_status) & status_mask)
 			break;
 		mdelay(10);
 	}
+
 	if (i == 100)
 		return -ETIMEDOUT;
 
@@ -876,6 +920,27 @@
 	}
 }
 
+static bool vi_need_full_reset(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+		/* CZ has hang issues with full reset at the moment */
+		return false;
+	case CHIP_FIJI:
+	case CHIP_TONGA:
+		/* XXX: soft reset should work on fiji and tonga */
+		return true;
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_TOPAZ:
+	default:
+		/* change this when we support soft reset */
+		return true;
+	}
+}
+
 static const struct amdgpu_asic_funcs vi_asic_funcs =
 {
 	.read_disabled_bios = &vi_read_disabled_bios,
@@ -889,6 +954,7 @@
 	.get_config_memsize = &vi_get_config_memsize,
 	.flush_hdp = &vi_flush_hdp,
 	.invalidate_hdp = &vi_invalidate_hdp,
+	.need_full_reset = &vi_need_full_reset,
 };
 
 #define CZ_REV_BRISTOL(rev)	 \
@@ -1031,6 +1097,30 @@
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x64;
 		break;
+	case CHIP_VEGAM:
+		adev->cg_flags = 0;
+			/*AMD_CG_SUPPORT_GFX_MGCG |
+			AMD_CG_SUPPORT_GFX_RLC_LS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_GFX_CGCG |
+			AMD_CG_SUPPORT_GFX_CGLS |
+			AMD_CG_SUPPORT_GFX_3D_CGCG |
+			AMD_CG_SUPPORT_GFX_3D_CGLS |
+			AMD_CG_SUPPORT_SDMA_MGCG |
+			AMD_CG_SUPPORT_SDMA_LS |
+			AMD_CG_SUPPORT_BIF_MGCG |
+			AMD_CG_SUPPORT_BIF_LS |
+			AMD_CG_SUPPORT_HDP_MGCG |
+			AMD_CG_SUPPORT_HDP_LS |
+			AMD_CG_SUPPORT_ROM_MGCG |
+			AMD_CG_SUPPORT_MC_MGCG |
+			AMD_CG_SUPPORT_MC_LS |
+			AMD_CG_SUPPORT_DRM_LS |
+			AMD_CG_SUPPORT_UVD_MGCG |
+			AMD_CG_SUPPORT_VCE_MGCG;*/
+		adev->pg_flags = 0;
+		adev->external_rev_id = adev->rev_id + 0x6E;
+		break;
 	case CHIP_CARRIZO:
 		adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG |
 			AMD_CG_SUPPORT_GFX_MGCG |
@@ -1422,6 +1512,7 @@
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		vi_common_set_clockgating_state_by_smu(adev, state);
 	default:
 		break;
@@ -1551,9 +1642,10 @@
 			amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block);
 		}
 		break;
-	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
 		amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block);
 		amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 0d02422..ffd096f 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -30,12 +30,14 @@
 		kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
 		kfd_process.o kfd_queue.o kfd_mqd_manager.o \
 		kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
+		kfd_mqd_manager_v9.o \
 		kfd_kernel_queue.o kfd_kernel_queue_cik.o \
-		kfd_kernel_queue_vi.o kfd_packet_manager.o \
-		kfd_process_queue_manager.o kfd_device_queue_manager.o \
-		kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
+		kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \
+		kfd_packet_manager.o kfd_process_queue_manager.o \
+		kfd_device_queue_manager.o kfd_device_queue_manager_cik.o \
+		kfd_device_queue_manager_vi.o kfd_device_queue_manager_v9.o \
 		kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
-		kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
+		kfd_int_process_v9.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
 
 ifneq ($(CONFIG_AMD_IOMMU_V2),)
 amdkfd-y += kfd_iommu.o
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 3d5ccb3..49df6c7 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -27,18 +27,28 @@
 static bool cik_event_interrupt_isr(struct kfd_dev *dev,
 					const uint32_t *ih_ring_entry)
 {
-	unsigned int pasid;
 	const struct cik_ih_ring_entry *ihre =
 			(const struct cik_ih_ring_entry *)ih_ring_entry;
+	unsigned int vmid, pasid;
 
+	/* Only handle interrupts from KFD VMIDs */
+	vmid  = (ihre->ring_id & 0x0000ff00) >> 8;
+	if (vmid < dev->vm_info.first_vmid_kfd ||
+	    vmid > dev->vm_info.last_vmid_kfd)
+		return 0;
+
+	/* If there is no valid PASID, it's likely a firmware bug */
 	pasid = (ihre->ring_id & 0xffff0000) >> 16;
+	if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
+		return 0;
 
-	/* Do not process in ISR, just request it to be forwarded to WQ. */
-	return (pasid != 0) &&
-		(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
+	/* Interrupt types we care about: various signals and faults.
+	 * They will be forwarded to a work queue (see below).
+	 */
+	return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
 		ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
 		ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
-		ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
+		ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE;
 }
 
 static void cik_event_interrupt_wq(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
index 48769d1..37ce6dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
@@ -33,7 +33,8 @@
 #define	APE1_MTYPE(x)					((x) << 7)
 
 /* valid for both DEFAULT_MTYPE and APE1_MTYPE */
-#define	MTYPE_CACHED					0
+#define	MTYPE_CACHED_NV					0
+#define	MTYPE_CACHED					1
 #define	MTYPE_NONCACHED					3
 
 #define	DEFAULT_CP_HQD_PERSISTENT_STATE			(0x33U << 8)
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
new file mode 100644
index 0000000..f68aef0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -0,0 +1,560 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+static const uint32_t cwsr_trap_gfx8_hex[] = {
+	0xbf820001, 0xbf820125,
+	0xb8f4f802, 0x89748674,
+	0xb8f5f803, 0x8675ff75,
+	0x00000400, 0xbf850011,
+	0xc00a1e37, 0x00000000,
+	0xbf8c007f, 0x87777978,
+	0xbf840002, 0xb974f802,
+	0xbe801d78, 0xb8f5f803,
+	0x8675ff75, 0x000001ff,
+	0xbf850002, 0x80708470,
+	0x82718071, 0x8671ff71,
+	0x0000ffff, 0xb974f802,
+	0xbe801f70, 0xb8f5f803,
+	0x8675ff75, 0x00000100,
+	0xbf840006, 0xbefa0080,
+	0xb97a0203, 0x8671ff71,
+	0x0000ffff, 0x80f08870,
+	0x82f18071, 0xbefa0080,
+	0xb97a0283, 0xbef60068,
+	0xbef70069, 0xb8fa1c07,
+	0x8e7a9c7a, 0x87717a71,
+	0xb8fa03c7, 0x8e7a9b7a,
+	0x87717a71, 0xb8faf807,
+	0x867aff7a, 0x00007fff,
+	0xb97af807, 0xbef2007e,
+	0xbef3007f, 0xbefe0180,
+	0xbf900004, 0x877a8474,
+	0xb97af802, 0xbf8e0002,
+	0xbf88fffe, 0xbef8007e,
+	0x8679ff7f, 0x0000ffff,
+	0x8779ff79, 0x00040000,
+	0xbefa0080, 0xbefb00ff,
+	0x00807fac, 0x867aff7f,
+	0x08000000, 0x8f7a837a,
+	0x877b7a7b, 0x867aff7f,
+	0x70000000, 0x8f7a817a,
+	0x877b7a7b, 0xbeef007c,
+	0xbeee0080, 0xb8ee2a05,
+	0x806e816e, 0x8e6e8a6e,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x806e7a6e,
+	0xbefa0084, 0xbefa00ff,
+	0x01000000, 0xbefe007c,
+	0xbefc006e, 0xc0611bfc,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611c3c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611c7c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611cbc,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611cfc,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611d3c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xb8f5f803,
+	0xbefe007c, 0xbefc006e,
+	0xc0611d7c, 0x0000007c,
+	0x806e846e, 0xbefc007e,
+	0xbefe007c, 0xbefc006e,
+	0xc0611dbc, 0x0000007c,
+	0x806e846e, 0xbefc007e,
+	0xbefe007c, 0xbefc006e,
+	0xc0611dfc, 0x0000007c,
+	0x806e846e, 0xbefc007e,
+	0xb8eff801, 0xbefe007c,
+	0xbefc006e, 0xc0611bfc,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611b3c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0xbefe007c,
+	0xbefc006e, 0xc0611b7c,
+	0x0000007c, 0x806e846e,
+	0xbefc007e, 0x867aff7f,
+	0x04000000, 0xbef30080,
+	0x8773737a, 0xb8ee2a05,
+	0x806e816e, 0x8e6e8a6e,
+	0xb8f51605, 0x80758175,
+	0x8e758475, 0x8e7a8275,
+	0xbefa00ff, 0x01000000,
+	0xbef60178, 0x80786e78,
+	0x82798079, 0xbefc0080,
+	0xbe802b00, 0xbe822b02,
+	0xbe842b04, 0xbe862b06,
+	0xbe882b08, 0xbe8a2b0a,
+	0xbe8c2b0c, 0xbe8e2b0e,
+	0xc06b003c, 0x00000000,
+	0xc06b013c, 0x00000010,
+	0xc06b023c, 0x00000020,
+	0xc06b033c, 0x00000030,
+	0x8078c078, 0x82798079,
+	0x807c907c, 0xbf0a757c,
+	0xbf85ffeb, 0xbef80176,
+	0xbeee0080, 0xbefe00c1,
+	0xbeff00c1, 0xbefa00ff,
+	0x01000000, 0xe0724000,
+	0x6e1e0000, 0xe0724100,
+	0x6e1e0100, 0xe0724200,
+	0x6e1e0200, 0xe0724300,
+	0x6e1e0300, 0xbefe00c1,
+	0xbeff00c1, 0xb8f54306,
+	0x8675c175, 0xbf84002c,
+	0xbf8a0000, 0x867aff73,
+	0x04000000, 0xbf840028,
+	0x8e758675, 0x8e758275,
+	0xbefa0075, 0xb8ee2a05,
+	0x806e816e, 0x8e6e8a6e,
+	0xb8fa1605, 0x807a817a,
+	0x8e7a867a, 0x806e7a6e,
+	0x806eff6e, 0x00000080,
+	0xbefa00ff, 0x01000000,
+	0xbefc0080, 0xd28c0002,
+	0x000100c1, 0xd28d0003,
+	0x000204c1, 0xd1060002,
+	0x00011103, 0x7e0602ff,
+	0x00000200, 0xbefc00ff,
+	0x00010000, 0xbe80007b,
+	0x867bff7b, 0xff7fffff,
+	0x877bff7b, 0x00058000,
+	0xd8ec0000, 0x00000002,
+	0xbf8c007f, 0xe0765000,
+	0x6e1e0002, 0x32040702,
+	0xd0c9006a, 0x0000eb02,
+	0xbf87fff7, 0xbefb0000,
+	0xbeee00ff, 0x00000400,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8f52a05, 0x80758175,
+	0x8e758275, 0x8e7a8875,
+	0xbefa00ff, 0x01000000,
+	0xbefc0084, 0xbf0a757c,
+	0xbf840015, 0xbf11017c,
+	0x8075ff75, 0x00001000,
+	0x7e000300, 0x7e020301,
+	0x7e040302, 0x7e060303,
+	0xe0724000, 0x6e1e0000,
+	0xe0724100, 0x6e1e0100,
+	0xe0724200, 0x6e1e0200,
+	0xe0724300, 0x6e1e0300,
+	0x807c847c, 0x806eff6e,
+	0x00000400, 0xbf0a757c,
+	0xbf85ffef, 0xbf9c0000,
+	0xbf8200ca, 0xbef8007e,
+	0x8679ff7f, 0x0000ffff,
+	0x8779ff79, 0x00040000,
+	0xbefa0080, 0xbefb00ff,
+	0x00807fac, 0x8676ff7f,
+	0x08000000, 0x8f768376,
+	0x877b767b, 0x8676ff7f,
+	0x70000000, 0x8f768176,
+	0x877b767b, 0x8676ff7f,
+	0x04000000, 0xbf84001e,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8f34306, 0x8673c173,
+	0xbf840019, 0x8e738673,
+	0x8e738273, 0xbefa0073,
+	0xb8f22a05, 0x80728172,
+	0x8e728a72, 0xb8f61605,
+	0x80768176, 0x8e768676,
+	0x80727672, 0x8072ff72,
+	0x00000080, 0xbefa00ff,
+	0x01000000, 0xbefc0080,
+	0xe0510000, 0x721e0000,
+	0xe0510100, 0x721e0000,
+	0x807cff7c, 0x00000200,
+	0x8072ff72, 0x00000200,
+	0xbf0a737c, 0xbf85fff6,
+	0xbef20080, 0xbefe00c1,
+	0xbeff00c1, 0xb8f32a05,
+	0x80738173, 0x8e738273,
+	0x8e7a8873, 0xbefa00ff,
+	0x01000000, 0xbef60072,
+	0x8072ff72, 0x00000400,
+	0xbefc0084, 0xbf11087c,
+	0x8073ff73, 0x00008000,
+	0xe0524000, 0x721e0000,
+	0xe0524100, 0x721e0100,
+	0xe0524200, 0x721e0200,
+	0xe0524300, 0x721e0300,
+	0xbf8c0f70, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0x807c847c,
+	0x8072ff72, 0x00000400,
+	0xbf0a737c, 0xbf85ffee,
+	0xbf9c0000, 0xe0524000,
+	0x761e0000, 0xe0524100,
+	0x761e0100, 0xe0524200,
+	0x761e0200, 0xe0524300,
+	0x761e0300, 0xb8f22a05,
+	0x80728172, 0x8e728a72,
+	0xb8f61605, 0x80768176,
+	0x8e768676, 0x80727672,
+	0x80f2c072, 0xb8f31605,
+	0x80738173, 0x8e738473,
+	0x8e7a8273, 0xbefa00ff,
+	0x01000000, 0xbefc0073,
+	0xc031003c, 0x00000072,
+	0x80f2c072, 0xbf8c007f,
+	0x80fc907c, 0xbe802d00,
+	0xbe822d02, 0xbe842d04,
+	0xbe862d06, 0xbe882d08,
+	0xbe8a2d0a, 0xbe8c2d0c,
+	0xbe8e2d0e, 0xbf06807c,
+	0xbf84fff1, 0xb8f22a05,
+	0x80728172, 0x8e728a72,
+	0xb8f61605, 0x80768176,
+	0x8e768676, 0x80727672,
+	0xbefa0084, 0xbefa00ff,
+	0x01000000, 0xc0211cfc,
+	0x00000072, 0x80728472,
+	0xc0211c3c, 0x00000072,
+	0x80728472, 0xc0211c7c,
+	0x00000072, 0x80728472,
+	0xc0211bbc, 0x00000072,
+	0x80728472, 0xc0211bfc,
+	0x00000072, 0x80728472,
+	0xc0211d3c, 0x00000072,
+	0x80728472, 0xc0211d7c,
+	0x00000072, 0x80728472,
+	0xc0211a3c, 0x00000072,
+	0x80728472, 0xc0211a7c,
+	0x00000072, 0x80728472,
+	0xc0211dfc, 0x00000072,
+	0x80728472, 0xc0211b3c,
+	0x00000072, 0x80728472,
+	0xc0211b7c, 0x00000072,
+	0x80728472, 0xbf8c007f,
+	0xbefc0073, 0xbefe006e,
+	0xbeff006f, 0x867375ff,
+	0x000003ff, 0xb9734803,
+	0x867375ff, 0xfffff800,
+	0x8f738b73, 0xb973a2c3,
+	0xb977f801, 0x8673ff71,
+	0xf0000000, 0x8f739c73,
+	0x8e739073, 0xbef60080,
+	0x87767376, 0x8673ff71,
+	0x08000000, 0x8f739b73,
+	0x8e738f73, 0x87767376,
+	0x8673ff74, 0x00800000,
+	0x8f739773, 0xb976f807,
+	0x8671ff71, 0x0000ffff,
+	0x86fe7e7e, 0x86ea6a6a,
+	0xb974f802, 0xbf8a0000,
+	0x95807370, 0xbf810000,
+};
+
+
+static const uint32_t cwsr_trap_gfx9_hex[] = {
+	0xbf820001, 0xbf82015a,
+	0xb8f8f802, 0x89788678,
+	0xb8f1f803, 0x866eff71,
+	0x00000400, 0xbf850034,
+	0x866eff71, 0x00000800,
+	0xbf850003, 0x866eff71,
+	0x00000100, 0xbf840008,
+	0x866eff78, 0x00002000,
+	0xbf840001, 0xbf810000,
+	0x8778ff78, 0x00002000,
+	0x80ec886c, 0x82ed806d,
+	0xb8eef807, 0x866fff6e,
+	0x001f8000, 0x8e6f8b6f,
+	0x8977ff77, 0xfc000000,
+	0x87776f77, 0x896eff6e,
+	0x001f8000, 0xb96ef807,
+	0xb8f0f812, 0xb8f1f813,
+	0x8ef08870, 0xc0071bb8,
+	0x00000000, 0xbf8cc07f,
+	0xc0071c38, 0x00000008,
+	0xbf8cc07f, 0x86ee6e6e,
+	0xbf840001, 0xbe801d6e,
+	0xb8f1f803, 0x8671ff71,
+	0x000001ff, 0xbf850002,
+	0x806c846c, 0x826d806d,
+	0x866dff6d, 0x0000ffff,
+	0x8f6e8b77, 0x866eff6e,
+	0x001f8000, 0xb96ef807,
+	0x86fe7e7e, 0x86ea6a6a,
+	0xb978f802, 0xbe801f6c,
+	0x866dff6d, 0x0000ffff,
+	0xbef00080, 0xb9700283,
+	0xb8f02407, 0x8e709c70,
+	0x876d706d, 0xb8f003c7,
+	0x8e709b70, 0x876d706d,
+	0xb8f0f807, 0x8670ff70,
+	0x00007fff, 0xb970f807,
+	0xbeee007e, 0xbeef007f,
+	0xbefe0180, 0xbf900004,
+	0x87708478, 0xb970f802,
+	0xbf8e0002, 0xbf88fffe,
+	0xb8f02a05, 0x80708170,
+	0x8e708a70, 0xb8f11605,
+	0x80718171, 0x8e718671,
+	0x80707170, 0x80707e70,
+	0x8271807f, 0x8671ff71,
+	0x0000ffff, 0xc0471cb8,
+	0x00000040, 0xbf8cc07f,
+	0xc04b1d38, 0x00000048,
+	0xbf8cc07f, 0xc0431e78,
+	0x00000058, 0xbf8cc07f,
+	0xc0471eb8, 0x0000005c,
+	0xbf8cc07f, 0xbef4007e,
+	0x8675ff7f, 0x0000ffff,
+	0x8775ff75, 0x00040000,
+	0xbef60080, 0xbef700ff,
+	0x00807fac, 0x8670ff7f,
+	0x08000000, 0x8f708370,
+	0x87777077, 0x8670ff7f,
+	0x70000000, 0x8f708170,
+	0x87777077, 0xbefb007c,
+	0xbefa0080, 0xb8fa2a05,
+	0x807a817a, 0x8e7a8a7a,
+	0xb8f01605, 0x80708170,
+	0x8e708670, 0x807a707a,
+	0xbef60084, 0xbef600ff,
+	0x01000000, 0xbefe007c,
+	0xbefc007a, 0xc0611efa,
+	0x0000007c, 0xbf8cc07f,
+	0x807a847a, 0xbefc007e,
+	0xbefe007c, 0xbefc007a,
+	0xc0611b3a, 0x0000007c,
+	0xbf8cc07f, 0x807a847a,
+	0xbefc007e, 0xbefe007c,
+	0xbefc007a, 0xc0611b7a,
+	0x0000007c, 0xbf8cc07f,
+	0x807a847a, 0xbefc007e,
+	0xbefe007c, 0xbefc007a,
+	0xc0611bba, 0x0000007c,
+	0xbf8cc07f, 0x807a847a,
+	0xbefc007e, 0xbefe007c,
+	0xbefc007a, 0xc0611bfa,
+	0x0000007c, 0xbf8cc07f,
+	0x807a847a, 0xbefc007e,
+	0xbefe007c, 0xbefc007a,
+	0xc0611e3a, 0x0000007c,
+	0xbf8cc07f, 0x807a847a,
+	0xbefc007e, 0xb8f1f803,
+	0xbefe007c, 0xbefc007a,
+	0xc0611c7a, 0x0000007c,
+	0xbf8cc07f, 0x807a847a,
+	0xbefc007e, 0xbefe007c,
+	0xbefc007a, 0xc0611a3a,
+	0x0000007c, 0xbf8cc07f,
+	0x807a847a, 0xbefc007e,
+	0xbefe007c, 0xbefc007a,
+	0xc0611a7a, 0x0000007c,
+	0xbf8cc07f, 0x807a847a,
+	0xbefc007e, 0xb8fbf801,
+	0xbefe007c, 0xbefc007a,
+	0xc0611efa, 0x0000007c,
+	0xbf8cc07f, 0x807a847a,
+	0xbefc007e, 0x8670ff7f,
+	0x04000000, 0xbeef0080,
+	0x876f6f70, 0xb8fa2a05,
+	0x807a817a, 0x8e7a8a7a,
+	0xb8f11605, 0x80718171,
+	0x8e718471, 0x8e768271,
+	0xbef600ff, 0x01000000,
+	0xbef20174, 0x80747a74,
+	0x82758075, 0xbefc0080,
+	0xbf800000, 0xbe802b00,
+	0xbe822b02, 0xbe842b04,
+	0xbe862b06, 0xbe882b08,
+	0xbe8a2b0a, 0xbe8c2b0c,
+	0xbe8e2b0e, 0xc06b003a,
+	0x00000000, 0xbf8cc07f,
+	0xc06b013a, 0x00000010,
+	0xbf8cc07f, 0xc06b023a,
+	0x00000020, 0xbf8cc07f,
+	0xc06b033a, 0x00000030,
+	0xbf8cc07f, 0x8074c074,
+	0x82758075, 0x807c907c,
+	0xbf0a717c, 0xbf85ffe7,
+	0xbef40172, 0xbefa0080,
+	0xbefe00c1, 0xbeff00c1,
+	0xbee80080, 0xbee90080,
+	0xbef600ff, 0x01000000,
+	0xe0724000, 0x7a1d0000,
+	0xe0724100, 0x7a1d0100,
+	0xe0724200, 0x7a1d0200,
+	0xe0724300, 0x7a1d0300,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8f14306, 0x8671c171,
+	0xbf84002c, 0xbf8a0000,
+	0x8670ff6f, 0x04000000,
+	0xbf840028, 0x8e718671,
+	0x8e718271, 0xbef60071,
+	0xb8fa2a05, 0x807a817a,
+	0x8e7a8a7a, 0xb8f01605,
+	0x80708170, 0x8e708670,
+	0x807a707a, 0x807aff7a,
+	0x00000080, 0xbef600ff,
+	0x01000000, 0xbefc0080,
+	0xd28c0002, 0x000100c1,
+	0xd28d0003, 0x000204c1,
+	0xd1060002, 0x00011103,
+	0x7e0602ff, 0x00000200,
+	0xbefc00ff, 0x00010000,
+	0xbe800077, 0x8677ff77,
+	0xff7fffff, 0x8777ff77,
+	0x00058000, 0xd8ec0000,
+	0x00000002, 0xbf8cc07f,
+	0xe0765000, 0x7a1d0002,
+	0x68040702, 0xd0c9006a,
+	0x0000e302, 0xbf87fff7,
+	0xbef70000, 0xbefa00ff,
+	0x00000400, 0xbefe00c1,
+	0xbeff00c1, 0xb8f12a05,
+	0x80718171, 0x8e718271,
+	0x8e768871, 0xbef600ff,
+	0x01000000, 0xbefc0084,
+	0xbf0a717c, 0xbf840015,
+	0xbf11017c, 0x8071ff71,
+	0x00001000, 0x7e000300,
+	0x7e020301, 0x7e040302,
+	0x7e060303, 0xe0724000,
+	0x7a1d0000, 0xe0724100,
+	0x7a1d0100, 0xe0724200,
+	0x7a1d0200, 0xe0724300,
+	0x7a1d0300, 0x807c847c,
+	0x807aff7a, 0x00000400,
+	0xbf0a717c, 0xbf85ffef,
+	0xbf9c0000, 0xbf8200d9,
+	0xbef4007e, 0x8675ff7f,
+	0x0000ffff, 0x8775ff75,
+	0x00040000, 0xbef60080,
+	0xbef700ff, 0x00807fac,
+	0x866eff7f, 0x08000000,
+	0x8f6e836e, 0x87776e77,
+	0x866eff7f, 0x70000000,
+	0x8f6e816e, 0x87776e77,
+	0x866eff7f, 0x04000000,
+	0xbf84001e, 0xbefe00c1,
+	0xbeff00c1, 0xb8ef4306,
+	0x866fc16f, 0xbf840019,
+	0x8e6f866f, 0x8e6f826f,
+	0xbef6006f, 0xb8f82a05,
+	0x80788178, 0x8e788a78,
+	0xb8ee1605, 0x806e816e,
+	0x8e6e866e, 0x80786e78,
+	0x8078ff78, 0x00000080,
+	0xbef600ff, 0x01000000,
+	0xbefc0080, 0xe0510000,
+	0x781d0000, 0xe0510100,
+	0x781d0000, 0x807cff7c,
+	0x00000200, 0x8078ff78,
+	0x00000200, 0xbf0a6f7c,
+	0xbf85fff6, 0xbef80080,
+	0xbefe00c1, 0xbeff00c1,
+	0xb8ef2a05, 0x806f816f,
+	0x8e6f826f, 0x8e76886f,
+	0xbef600ff, 0x01000000,
+	0xbeee0078, 0x8078ff78,
+	0x00000400, 0xbefc0084,
+	0xbf11087c, 0x806fff6f,
+	0x00008000, 0xe0524000,
+	0x781d0000, 0xe0524100,
+	0x781d0100, 0xe0524200,
+	0x781d0200, 0xe0524300,
+	0x781d0300, 0xbf8c0f70,
+	0x7e000300, 0x7e020301,
+	0x7e040302, 0x7e060303,
+	0x807c847c, 0x8078ff78,
+	0x00000400, 0xbf0a6f7c,
+	0xbf85ffee, 0xbf9c0000,
+	0xe0524000, 0x6e1d0000,
+	0xe0524100, 0x6e1d0100,
+	0xe0524200, 0x6e1d0200,
+	0xe0524300, 0x6e1d0300,
+	0xb8f82a05, 0x80788178,
+	0x8e788a78, 0xb8ee1605,
+	0x806e816e, 0x8e6e866e,
+	0x80786e78, 0x80f8c078,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f846f, 0x8e76826f,
+	0xbef600ff, 0x01000000,
+	0xbefc006f, 0xc031003a,
+	0x00000078, 0x80f8c078,
+	0xbf8cc07f, 0x80fc907c,
+	0xbf800000, 0xbe802d00,
+	0xbe822d02, 0xbe842d04,
+	0xbe862d06, 0xbe882d08,
+	0xbe8a2d0a, 0xbe8c2d0c,
+	0xbe8e2d0e, 0xbf06807c,
+	0xbf84fff0, 0xb8f82a05,
+	0x80788178, 0x8e788a78,
+	0xb8ee1605, 0x806e816e,
+	0x8e6e866e, 0x80786e78,
+	0xbef60084, 0xbef600ff,
+	0x01000000, 0xc0211bfa,
+	0x00000078, 0x80788478,
+	0xc0211b3a, 0x00000078,
+	0x80788478, 0xc0211b7a,
+	0x00000078, 0x80788478,
+	0xc0211eba, 0x00000078,
+	0x80788478, 0xc0211efa,
+	0x00000078, 0x80788478,
+	0xc0211c3a, 0x00000078,
+	0x80788478, 0xc0211c7a,
+	0x00000078, 0x80788478,
+	0xc0211a3a, 0x00000078,
+	0x80788478, 0xc0211a7a,
+	0x00000078, 0x80788478,
+	0xc0211cfa, 0x00000078,
+	0x80788478, 0xbf8cc07f,
+	0xbefc006f, 0xbefe007a,
+	0xbeff007b, 0x866f71ff,
+	0x000003ff, 0xb96f4803,
+	0x866f71ff, 0xfffff800,
+	0x8f6f8b6f, 0xb96fa2c3,
+	0xb973f801, 0xb8ee2a05,
+	0x806e816e, 0x8e6e8a6e,
+	0xb8ef1605, 0x806f816f,
+	0x8e6f866f, 0x806e6f6e,
+	0x806e746e, 0x826f8075,
+	0x866fff6f, 0x0000ffff,
+	0xc0071cb7, 0x00000040,
+	0xc00b1d37, 0x00000048,
+	0xc0031e77, 0x00000058,
+	0xc0071eb7, 0x0000005c,
+	0xbf8cc07f, 0x866fff6d,
+	0xf0000000, 0x8f6f9c6f,
+	0x8e6f906f, 0xbeee0080,
+	0x876e6f6e, 0x866fff6d,
+	0x08000000, 0x8f6f9b6f,
+	0x8e6f8f6f, 0x876e6f6e,
+	0x866fff70, 0x00800000,
+	0x8f6f976f, 0xb96ef807,
+	0x866dff6d, 0x0000ffff,
+	0x86fe7e7e, 0x86ea6a6a,
+	0xb970f802, 0xbf8a0000,
+	0x95806f6c, 0xbf810000,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
index 997a383d..a2a04bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
@@ -20,9 +20,12 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#if 0
-HW (VI) source code for CWSR trap handler
-#Version 18 + multiple trap handler
+/* To compile this assembly code:
+ * PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex
+ */
+
+/* HW (VI) source code for CWSR trap handler */
+/* Version 18 + multiple trap handler */
 
 // this performance-optimal version was originally from Seven Xu at SRDC
 
@@ -98,6 +101,7 @@
 /**************************************************************************/
 var SQ_WAVE_STATUS_INST_ATC_SHIFT  = 23
 var SQ_WAVE_STATUS_INST_ATC_MASK   = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_SHIFT  = 1
 var SQ_WAVE_STATUS_SPI_PRIO_MASK   = 0x00000006
 
 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT    = 12
@@ -149,7 +153,7 @@
 var s_save_spi_init_hi              =   exec_hi
 
                                                 //tba_lo and tba_hi need to be saved/restored
-var s_save_pc_lo            =   ttmp0           //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+var s_save_pc_lo            =   ttmp0           //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
 var s_save_pc_hi            =   ttmp1
 var s_save_exec_lo          =   ttmp2
 var s_save_exec_hi          =   ttmp3
@@ -319,6 +323,10 @@
         s_sendmsg   sendmsg(MSG_SAVEWAVE)  //send SPI a message and wait for SPI's write to EXEC
     end
 
+    // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
+    s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
+    s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
+
   L_SLEEP:
     s_sleep 0x2                // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
 
@@ -1007,8 +1015,6 @@
 
     s_waitcnt       lgkmcnt(0)                                                                                      //from now on, it is safe to restore STATUS and IB_STS
 
-    s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff      //pc[47:32]        //Do it here in order not to affect STATUS
-
     //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
     if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
         s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8            //pc[31:0]+8     //two back-to-back s_trap are used (first for save and second for restore)
@@ -1044,6 +1050,7 @@
     s_lshr_b32      s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
     s_setreg_b32    hwreg(HW_REG_IB_STS),   s_restore_tmp
 
+    s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff      //pc[47:32]        //Do it here in order not to affect STATUS
     s_and_b64    exec, exec, exec  // Restore STATUS.EXECZ, not writable by s_setreg_b32
     s_and_b64    vcc, vcc, vcc  // Restore STATUS.VCCZ, not writable by s_setreg_b32
     s_setreg_b32    hwreg(HW_REG_STATUS),   s_restore_status     // SCC is included, which is changed by previous salu
@@ -1127,258 +1134,3 @@
 function get_hwreg_size_bytes
     return 128 //HWREG size 128 bytes
 end
-
-
-#endif
-
-static const uint32_t cwsr_trap_gfx8_hex[] = {
-	0xbf820001, 0xbf820123,
-	0xb8f4f802, 0x89748674,
-	0xb8f5f803, 0x8675ff75,
-	0x00000400, 0xbf850011,
-	0xc00a1e37, 0x00000000,
-	0xbf8c007f, 0x87777978,
-	0xbf840002, 0xb974f802,
-	0xbe801d78, 0xb8f5f803,
-	0x8675ff75, 0x000001ff,
-	0xbf850002, 0x80708470,
-	0x82718071, 0x8671ff71,
-	0x0000ffff, 0xb974f802,
-	0xbe801f70, 0xb8f5f803,
-	0x8675ff75, 0x00000100,
-	0xbf840006, 0xbefa0080,
-	0xb97a0203, 0x8671ff71,
-	0x0000ffff, 0x80f08870,
-	0x82f18071, 0xbefa0080,
-	0xb97a0283, 0xbef60068,
-	0xbef70069, 0xb8fa1c07,
-	0x8e7a9c7a, 0x87717a71,
-	0xb8fa03c7, 0x8e7a9b7a,
-	0x87717a71, 0xb8faf807,
-	0x867aff7a, 0x00007fff,
-	0xb97af807, 0xbef2007e,
-	0xbef3007f, 0xbefe0180,
-	0xbf900004, 0xbf8e0002,
-	0xbf88fffe, 0xbef8007e,
-	0x8679ff7f, 0x0000ffff,
-	0x8779ff79, 0x00040000,
-	0xbefa0080, 0xbefb00ff,
-	0x00807fac, 0x867aff7f,
-	0x08000000, 0x8f7a837a,
-	0x877b7a7b, 0x867aff7f,
-	0x70000000, 0x8f7a817a,
-	0x877b7a7b, 0xbeef007c,
-	0xbeee0080, 0xb8ee2a05,
-	0x806e816e, 0x8e6e8a6e,
-	0xb8fa1605, 0x807a817a,
-	0x8e7a867a, 0x806e7a6e,
-	0xbefa0084, 0xbefa00ff,
-	0x01000000, 0xbefe007c,
-	0xbefc006e, 0xc0611bfc,
-	0x0000007c, 0x806e846e,
-	0xbefc007e, 0xbefe007c,
-	0xbefc006e, 0xc0611c3c,
-	0x0000007c, 0x806e846e,
-	0xbefc007e, 0xbefe007c,
-	0xbefc006e, 0xc0611c7c,
-	0x0000007c, 0x806e846e,
-	0xbefc007e, 0xbefe007c,
-	0xbefc006e, 0xc0611cbc,
-	0x0000007c, 0x806e846e,
-	0xbefc007e, 0xbefe007c,
-	0xbefc006e, 0xc0611cfc,
-	0x0000007c, 0x806e846e,
-	0xbefc007e, 0xbefe007c,
-	0xbefc006e, 0xc0611d3c,
-	0x0000007c, 0x806e846e,
-	0xbefc007e, 0xb8f5f803,
-	0xbefe007c, 0xbefc006e,
-	0xc0611d7c, 0x0000007c,
-	0x806e846e, 0xbefc007e,
-	0xbefe007c, 0xbefc006e,
-	0xc0611dbc, 0x0000007c,
-	0x806e846e, 0xbefc007e,
-	0xbefe007c, 0xbefc006e,
-	0xc0611dfc, 0x0000007c,
-	0x806e846e, 0xbefc007e,
-	0xb8eff801, 0xbefe007c,
-	0xbefc006e, 0xc0611bfc,
-	0x0000007c, 0x806e846e,
-	0xbefc007e, 0xbefe007c,
-	0xbefc006e, 0xc0611b3c,
-	0x0000007c, 0x806e846e,
-	0xbefc007e, 0xbefe007c,
-	0xbefc006e, 0xc0611b7c,
-	0x0000007c, 0x806e846e,
-	0xbefc007e, 0x867aff7f,
-	0x04000000, 0xbef30080,
-	0x8773737a, 0xb8ee2a05,
-	0x806e816e, 0x8e6e8a6e,
-	0xb8f51605, 0x80758175,
-	0x8e758475, 0x8e7a8275,
-	0xbefa00ff, 0x01000000,
-	0xbef60178, 0x80786e78,
-	0x82798079, 0xbefc0080,
-	0xbe802b00, 0xbe822b02,
-	0xbe842b04, 0xbe862b06,
-	0xbe882b08, 0xbe8a2b0a,
-	0xbe8c2b0c, 0xbe8e2b0e,
-	0xc06b003c, 0x00000000,
-	0xc06b013c, 0x00000010,
-	0xc06b023c, 0x00000020,
-	0xc06b033c, 0x00000030,
-	0x8078c078, 0x82798079,
-	0x807c907c, 0xbf0a757c,
-	0xbf85ffeb, 0xbef80176,
-	0xbeee0080, 0xbefe00c1,
-	0xbeff00c1, 0xbefa00ff,
-	0x01000000, 0xe0724000,
-	0x6e1e0000, 0xe0724100,
-	0x6e1e0100, 0xe0724200,
-	0x6e1e0200, 0xe0724300,
-	0x6e1e0300, 0xbefe00c1,
-	0xbeff00c1, 0xb8f54306,
-	0x8675c175, 0xbf84002c,
-	0xbf8a0000, 0x867aff73,
-	0x04000000, 0xbf840028,
-	0x8e758675, 0x8e758275,
-	0xbefa0075, 0xb8ee2a05,
-	0x806e816e, 0x8e6e8a6e,
-	0xb8fa1605, 0x807a817a,
-	0x8e7a867a, 0x806e7a6e,
-	0x806eff6e, 0x00000080,
-	0xbefa00ff, 0x01000000,
-	0xbefc0080, 0xd28c0002,
-	0x000100c1, 0xd28d0003,
-	0x000204c1, 0xd1060002,
-	0x00011103, 0x7e0602ff,
-	0x00000200, 0xbefc00ff,
-	0x00010000, 0xbe80007b,
-	0x867bff7b, 0xff7fffff,
-	0x877bff7b, 0x00058000,
-	0xd8ec0000, 0x00000002,
-	0xbf8c007f, 0xe0765000,
-	0x6e1e0002, 0x32040702,
-	0xd0c9006a, 0x0000eb02,
-	0xbf87fff7, 0xbefb0000,
-	0xbeee00ff, 0x00000400,
-	0xbefe00c1, 0xbeff00c1,
-	0xb8f52a05, 0x80758175,
-	0x8e758275, 0x8e7a8875,
-	0xbefa00ff, 0x01000000,
-	0xbefc0084, 0xbf0a757c,
-	0xbf840015, 0xbf11017c,
-	0x8075ff75, 0x00001000,
-	0x7e000300, 0x7e020301,
-	0x7e040302, 0x7e060303,
-	0xe0724000, 0x6e1e0000,
-	0xe0724100, 0x6e1e0100,
-	0xe0724200, 0x6e1e0200,
-	0xe0724300, 0x6e1e0300,
-	0x807c847c, 0x806eff6e,
-	0x00000400, 0xbf0a757c,
-	0xbf85ffef, 0xbf9c0000,
-	0xbf8200ca, 0xbef8007e,
-	0x8679ff7f, 0x0000ffff,
-	0x8779ff79, 0x00040000,
-	0xbefa0080, 0xbefb00ff,
-	0x00807fac, 0x8676ff7f,
-	0x08000000, 0x8f768376,
-	0x877b767b, 0x8676ff7f,
-	0x70000000, 0x8f768176,
-	0x877b767b, 0x8676ff7f,
-	0x04000000, 0xbf84001e,
-	0xbefe00c1, 0xbeff00c1,
-	0xb8f34306, 0x8673c173,
-	0xbf840019, 0x8e738673,
-	0x8e738273, 0xbefa0073,
-	0xb8f22a05, 0x80728172,
-	0x8e728a72, 0xb8f61605,
-	0x80768176, 0x8e768676,
-	0x80727672, 0x8072ff72,
-	0x00000080, 0xbefa00ff,
-	0x01000000, 0xbefc0080,
-	0xe0510000, 0x721e0000,
-	0xe0510100, 0x721e0000,
-	0x807cff7c, 0x00000200,
-	0x8072ff72, 0x00000200,
-	0xbf0a737c, 0xbf85fff6,
-	0xbef20080, 0xbefe00c1,
-	0xbeff00c1, 0xb8f32a05,
-	0x80738173, 0x8e738273,
-	0x8e7a8873, 0xbefa00ff,
-	0x01000000, 0xbef60072,
-	0x8072ff72, 0x00000400,
-	0xbefc0084, 0xbf11087c,
-	0x8073ff73, 0x00008000,
-	0xe0524000, 0x721e0000,
-	0xe0524100, 0x721e0100,
-	0xe0524200, 0x721e0200,
-	0xe0524300, 0x721e0300,
-	0xbf8c0f70, 0x7e000300,
-	0x7e020301, 0x7e040302,
-	0x7e060303, 0x807c847c,
-	0x8072ff72, 0x00000400,
-	0xbf0a737c, 0xbf85ffee,
-	0xbf9c0000, 0xe0524000,
-	0x761e0000, 0xe0524100,
-	0x761e0100, 0xe0524200,
-	0x761e0200, 0xe0524300,
-	0x761e0300, 0xb8f22a05,
-	0x80728172, 0x8e728a72,
-	0xb8f61605, 0x80768176,
-	0x8e768676, 0x80727672,
-	0x80f2c072, 0xb8f31605,
-	0x80738173, 0x8e738473,
-	0x8e7a8273, 0xbefa00ff,
-	0x01000000, 0xbefc0073,
-	0xc031003c, 0x00000072,
-	0x80f2c072, 0xbf8c007f,
-	0x80fc907c, 0xbe802d00,
-	0xbe822d02, 0xbe842d04,
-	0xbe862d06, 0xbe882d08,
-	0xbe8a2d0a, 0xbe8c2d0c,
-	0xbe8e2d0e, 0xbf06807c,
-	0xbf84fff1, 0xb8f22a05,
-	0x80728172, 0x8e728a72,
-	0xb8f61605, 0x80768176,
-	0x8e768676, 0x80727672,
-	0xbefa0084, 0xbefa00ff,
-	0x01000000, 0xc0211cfc,
-	0x00000072, 0x80728472,
-	0xc0211c3c, 0x00000072,
-	0x80728472, 0xc0211c7c,
-	0x00000072, 0x80728472,
-	0xc0211bbc, 0x00000072,
-	0x80728472, 0xc0211bfc,
-	0x00000072, 0x80728472,
-	0xc0211d3c, 0x00000072,
-	0x80728472, 0xc0211d7c,
-	0x00000072, 0x80728472,
-	0xc0211a3c, 0x00000072,
-	0x80728472, 0xc0211a7c,
-	0x00000072, 0x80728472,
-	0xc0211dfc, 0x00000072,
-	0x80728472, 0xc0211b3c,
-	0x00000072, 0x80728472,
-	0xc0211b7c, 0x00000072,
-	0x80728472, 0xbf8c007f,
-	0x8671ff71, 0x0000ffff,
-	0xbefc0073, 0xbefe006e,
-	0xbeff006f, 0x867375ff,
-	0x000003ff, 0xb9734803,
-	0x867375ff, 0xfffff800,
-	0x8f738b73, 0xb973a2c3,
-	0xb977f801, 0x8673ff71,
-	0xf0000000, 0x8f739c73,
-	0x8e739073, 0xbef60080,
-	0x87767376, 0x8673ff71,
-	0x08000000, 0x8f739b73,
-	0x8e738f73, 0x87767376,
-	0x8673ff74, 0x00800000,
-	0x8f739773, 0xb976f807,
-	0x86fe7e7e, 0x86ea6a6a,
-	0xb974f802, 0xbf8a0000,
-	0x95807370, 0xbf810000,
-};
-
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
new file mode 100644
index 0000000..998be96
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -0,0 +1,1214 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To compile this assembly code:
+ * PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex
+ */
+
+/* HW (GFX9) source code for CWSR trap handler */
+/* Version 18 + multiple trap handler */
+
+// this performance-optimal version was originally from Seven Xu at SRDC
+
+// Revison #18	 --...
+/* Rev History
+** #1. Branch from gc dv.   //gfxip/gfx9/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV)
+** #4. SR Memory Layout:
+**			 1. VGPR-SGPR-HWREG-{LDS}
+**			 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern..
+** #5. Update: 1. Accurate g8sr_ts_save_d timestamp
+** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation)
+** #7. Update: 1. don't barrier if noLDS
+** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version
+**	       2. Fix SQ issue by s_sleep 2
+** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last
+**	       2. optimize s_buffer save by burst 16sgprs...
+** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs.
+** #11. Update 1. Add 2 more timestamp for debug version
+** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance
+** #13. Integ  1. Always use MUBUF for PV trap shader...
+** #14. Update 1. s_buffer_store soft clause...
+** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot.
+** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree
+** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part]
+**	       2. PERF - Save LDS before save VGPR to cover LDS save long latency...
+** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32
+**	       2. FUNC - Handle non-CWSR traps
+*/
+
+var G8SR_WDMEM_HWREG_OFFSET = 0
+var G8SR_WDMEM_SGPR_OFFSET  = 128  // in bytes
+
+// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore.
+
+var G8SR_DEBUG_TIMESTAMP = 0
+var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4	// ts_save_d timestamp offset relative to SGPR_SR_memory_offset
+var s_g8sr_ts_save_s	= s[34:35]   // save start
+var s_g8sr_ts_sq_save_msg  = s[36:37]	// The save shader send SAVEWAVE msg to spi
+var s_g8sr_ts_spi_wrexec   = s[38:39]	// the SPI write the sr address to SQ
+var s_g8sr_ts_save_d	= s[40:41]   // save end
+var s_g8sr_ts_restore_s = s[42:43]   // restore start
+var s_g8sr_ts_restore_d = s[44:45]   // restore end
+
+var G8SR_VGPR_SR_IN_DWX4 = 0
+var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000	 // DWx4 stride is 4*4Bytes
+var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4  = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4
+
+
+/*************************************************************************/
+/*		    control on how to run the shader			 */
+/*************************************************************************/
+//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run)
+var EMU_RUN_HACK		    =	0
+var EMU_RUN_HACK_RESTORE_NORMAL	    =	0
+var EMU_RUN_HACK_SAVE_NORMAL_EXIT   =	0
+var EMU_RUN_HACK_SAVE_SINGLE_WAVE   =	0
+var EMU_RUN_HACK_SAVE_FIRST_TIME    =	0		    //for interrupted restore in which the first save is through EMU_RUN_HACK
+var SAVE_LDS			    =	1
+var WG_BASE_ADDR_LO		    =	0x9000a000
+var WG_BASE_ADDR_HI		    =	0x0
+var WAVE_SPACE			    =	0x5000		    //memory size that each wave occupies in workgroup state mem
+var CTX_SAVE_CONTROL		    =	0x0
+var CTX_RESTORE_CONTROL		    =	CTX_SAVE_CONTROL
+var SIM_RUN_HACK		    =	0		    //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run)
+var SGPR_SAVE_USE_SQC		    =	1		    //use SQC D$ to do the write
+var USE_MTBUF_INSTEAD_OF_MUBUF	    =	0		    //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes
+var SWIZZLE_EN			    =	0		    //whether we use swizzled buffer addressing
+var ACK_SQC_STORE		    =	1		    //workaround for suspected SQC store bug causing incorrect stores under concurrency
+
+/**************************************************************************/
+/*			variables					  */
+/**************************************************************************/
+var SQ_WAVE_STATUS_INST_ATC_SHIFT  = 23
+var SQ_WAVE_STATUS_INST_ATC_MASK   = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_SHIFT  = 1
+var SQ_WAVE_STATUS_SPI_PRIO_MASK   = 0x00000006
+var SQ_WAVE_STATUS_HALT_MASK       = 0x2000
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT	= 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE	= 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT	= 8
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE	= 6
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT	= 24
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE	= 3			//FIXME	 sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
+
+var SQ_WAVE_TRAPSTS_SAVECTX_MASK    =	0x400
+var SQ_WAVE_TRAPSTS_EXCE_MASK	    =	0x1FF			// Exception mask
+var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT   =	10
+var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK   =	0x100
+var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT  =	8
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK	=   0x3FF
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT	=   0x0
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE	=   10
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK	=   0xFFFFF800
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT	=   11
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE	=   21
+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK	=   0x800
+
+var SQ_WAVE_IB_STS_RCNT_SHIFT		=   16			//FIXME
+var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT	=   15			//FIXME
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK	= 0x1F8000
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG	= 0x00007FFF	//FIXME
+
+var SQ_BUF_RSRC_WORD1_ATC_SHIFT	    =	24
+var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT   =	27
+
+var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT	=   26			// bits [31:26] unused by SPI debug data
+var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK	=   0xFC000000
+
+/*	Save	    */
+var S_SAVE_BUF_RSRC_WORD1_STRIDE	=   0x00040000		//stride is 4 bytes
+var S_SAVE_BUF_RSRC_WORD3_MISC		=   0x00807FAC		//SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+
+var S_SAVE_SPI_INIT_ATC_MASK		=   0x08000000		//bit[27]: ATC bit
+var S_SAVE_SPI_INIT_ATC_SHIFT		=   27
+var S_SAVE_SPI_INIT_MTYPE_MASK		=   0x70000000		//bit[30:28]: Mtype
+var S_SAVE_SPI_INIT_MTYPE_SHIFT		=   28
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK	=   0x04000000		//bit[26]: FirstWaveInTG
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT	=   26
+
+var S_SAVE_PC_HI_RCNT_SHIFT		=   28			//FIXME	 check with Brian to ensure all fields other than PC[47:0] can be used
+var S_SAVE_PC_HI_RCNT_MASK		=   0xF0000000		//FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT	=   27			//FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_MASK	=   0x08000000		//FIXME
+
+var s_save_spi_init_lo		    =	exec_lo
+var s_save_spi_init_hi		    =	exec_hi
+
+var s_save_pc_lo	    =	ttmp0		//{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+var s_save_pc_hi	    =	ttmp1
+var s_save_exec_lo	    =	ttmp2
+var s_save_exec_hi	    =	ttmp3
+var s_save_tmp		    =	ttmp4
+var s_save_trapsts	    =	ttmp5		//not really used until the end of the SAVE routine
+var s_save_xnack_mask_lo    =	ttmp6
+var s_save_xnack_mask_hi    =	ttmp7
+var s_save_buf_rsrc0	    =	ttmp8
+var s_save_buf_rsrc1	    =	ttmp9
+var s_save_buf_rsrc2	    =	ttmp10
+var s_save_buf_rsrc3	    =	ttmp11
+var s_save_status	    =	ttmp12
+var s_save_mem_offset	    =	ttmp14
+var s_save_alloc_size	    =	s_save_trapsts		//conflict
+var s_save_m0		    =	ttmp15
+var s_save_ttmps_lo	    =	s_save_tmp		//no conflict
+var s_save_ttmps_hi	    =	s_save_trapsts		//no conflict
+
+/*	Restore	    */
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE	    =	S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC	    =	S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_ATC_MASK		    =	0x08000000	    //bit[27]: ATC bit
+var S_RESTORE_SPI_INIT_ATC_SHIFT	    =	27
+var S_RESTORE_SPI_INIT_MTYPE_MASK	    =	0x70000000	    //bit[30:28]: Mtype
+var S_RESTORE_SPI_INIT_MTYPE_SHIFT	    =	28
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK	    =	0x04000000	    //bit[26]: FirstWaveInTG
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT	    =	26
+
+var S_RESTORE_PC_HI_RCNT_SHIFT		    =	S_SAVE_PC_HI_RCNT_SHIFT
+var S_RESTORE_PC_HI_RCNT_MASK		    =	S_SAVE_PC_HI_RCNT_MASK
+var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT	    =	S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+var S_RESTORE_PC_HI_FIRST_REPLAY_MASK	    =	S_SAVE_PC_HI_FIRST_REPLAY_MASK
+
+var s_restore_spi_init_lo		    =	exec_lo
+var s_restore_spi_init_hi		    =	exec_hi
+
+var s_restore_mem_offset	=   ttmp12
+var s_restore_alloc_size	=   ttmp3
+var s_restore_tmp		=   ttmp2
+var s_restore_mem_offset_save	=   s_restore_tmp	//no conflict
+
+var s_restore_m0	    =	s_restore_alloc_size	//no conflict
+
+var s_restore_mode	    =	ttmp7
+
+var s_restore_pc_lo	    =	ttmp0
+var s_restore_pc_hi	    =	ttmp1
+var s_restore_exec_lo	    =	ttmp14
+var s_restore_exec_hi	    = 	ttmp15
+var s_restore_status	    =	ttmp4
+var s_restore_trapsts	    =	ttmp5
+var s_restore_xnack_mask_lo =	xnack_mask_lo
+var s_restore_xnack_mask_hi =	xnack_mask_hi
+var s_restore_buf_rsrc0	    =	ttmp8
+var s_restore_buf_rsrc1	    =	ttmp9
+var s_restore_buf_rsrc2	    =	ttmp10
+var s_restore_buf_rsrc3	    =	ttmp11
+var s_restore_ttmps_lo	    =	s_restore_tmp		//no conflict
+var s_restore_ttmps_hi	    =	s_restore_alloc_size	//no conflict
+
+/**************************************************************************/
+/*			trap handler entry points			  */
+/**************************************************************************/
+/* Shader Main*/
+
+shader main
+  asic(GFX9)
+  type(CS)
+
+
+    if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))		    //hack to use trap_id for determining save/restore
+	//FIXME VCCZ un-init assertion s_getreg_b32	s_save_status, hwreg(HW_REG_STATUS)	    //save STATUS since we will change SCC
+	s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000		    //change SCC
+	s_cmp_eq_u32 s_save_tmp, 0x007e0000			    //Save: trap_id = 0x7e. Restore: trap_id = 0x7f.
+	s_cbranch_scc0 L_JUMP_TO_RESTORE			    //do not need to recover STATUS here  since we are going to RESTORE
+	//FIXME	 s_setreg_b32	hwreg(HW_REG_STATUS),	s_save_status	    //need to recover STATUS since we are going to SAVE
+	s_branch L_SKIP_RESTORE					    //NOT restore, SAVE actually
+    else
+	s_branch L_SKIP_RESTORE					    //NOT restore. might be a regular trap or save
+    end
+
+L_JUMP_TO_RESTORE:
+    s_branch L_RESTORE						    //restore
+
+L_SKIP_RESTORE:
+
+    s_getreg_b32    s_save_status, hwreg(HW_REG_STATUS)				    //save STATUS since we will change SCC
+    s_andn2_b32	    s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK	    //check whether this is for save
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    s_and_b32       ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK    //check whether this is for save
+    s_cbranch_scc1  L_SAVE					//this is the operation for save
+
+    // *********    Handle non-CWSR traps	*******************
+if (!EMU_RUN_HACK)
+    // Illegal instruction is a non-maskable exception which blocks context save.
+    // Halt the wavefront and return from the trap.
+    s_and_b32       ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+    s_cbranch_scc1  L_HALT_WAVE
+
+    // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA.
+    // Instead, halt the wavefront and return from the trap.
+    s_and_b32       ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
+    s_cbranch_scc0  L_FETCH_2ND_TRAP
+
+L_HALT_WAVE:
+    // If STATUS.HALT is set then this fault must come from SQC instruction fetch.
+    // We cannot prevent further faults so just terminate the wavefront.
+    s_and_b32       ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+    s_cbranch_scc0  L_NOT_ALREADY_HALTED
+    s_endpgm
+L_NOT_ALREADY_HALTED:
+    s_or_b32        s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+    // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
+    // Rewind the PC to prevent this from occurring. The debugger compensates for this.
+    s_sub_u32       ttmp0, ttmp0, 0x8
+    s_subb_u32      ttmp1, ttmp1, 0x0
+
+L_FETCH_2ND_TRAP:
+    // Preserve and clear scalar XNACK state before issuing scalar reads.
+    // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
+    s_getreg_b32    ttmp2, hwreg(HW_REG_IB_STS)
+    s_and_b32       ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+    s_lshl_b32      ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+    s_andn2_b32     ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
+    s_or_b32        ttmp11, ttmp11, ttmp3
+
+    s_andn2_b32     ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+    s_setreg_b32    hwreg(HW_REG_IB_STS), ttmp2
+
+    // Read second-level TBA/TMA from first-level TMA and jump if available.
+    // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
+    // ttmp12 holds SQ_WAVE_STATUS
+    s_getreg_b32    ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO)
+    s_getreg_b32    ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI)
+    s_lshl_b64      [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
+    s_load_dwordx2  [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA
+    s_waitcnt       lgkmcnt(0)
+    s_load_dwordx2  [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA
+    s_waitcnt       lgkmcnt(0)
+    s_and_b64       [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
+    s_cbranch_scc0  L_NO_NEXT_TRAP // second-level trap handler not been set
+    s_setpc_b64     [ttmp2, ttmp3] // jump to second-level trap handler
+
+L_NO_NEXT_TRAP:
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    s_and_b32	    s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
+    s_cbranch_scc1  L_EXCP_CASE	  // Exception, jump back to the shader program directly.
+    s_add_u32	    ttmp0, ttmp0, 4   // S_TRAP case, add 4 to ttmp0
+    s_addc_u32	ttmp1, ttmp1, 0
+L_EXCP_CASE:
+    s_and_b32	ttmp1, ttmp1, 0xFFFF
+
+    // Restore SQ_WAVE_IB_STS.
+    s_lshr_b32      ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+    s_and_b32       ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+    s_setreg_b32    hwreg(HW_REG_IB_STS), ttmp2
+
+    // Restore SQ_WAVE_STATUS.
+    s_and_b64       exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+    s_and_b64       vcc, vcc, vcc    // Restore STATUS.VCCZ, not writable by s_setreg_b32
+    s_setreg_b32    hwreg(HW_REG_STATUS), s_save_status
+
+    s_rfe_b64       [ttmp0, ttmp1]
+end
+    // *********	End handling of non-CWSR traps	 *******************
+
+/**************************************************************************/
+/*			save routine					  */
+/**************************************************************************/
+
+L_SAVE:
+
+if G8SR_DEBUG_TIMESTAMP
+	s_memrealtime	s_g8sr_ts_save_s
+	s_waitcnt lgkmcnt(0)	     //FIXME, will cause xnack??
+end
+
+    s_and_b32	    s_save_pc_hi, s_save_pc_hi, 0x0000ffff    //pc[47:32]
+
+    s_mov_b32	    s_save_tmp, 0							    //clear saveCtx bit
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp	    //clear saveCtx bit
+
+    s_getreg_b32    s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE)		    //save RCNT
+    s_lshl_b32	    s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
+    s_or_b32	    s_save_pc_hi, s_save_pc_hi, s_save_tmp
+    s_getreg_b32    s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE)   //save FIRST_REPLAY
+    s_lshl_b32	    s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+    s_or_b32	    s_save_pc_hi, s_save_pc_hi, s_save_tmp
+    s_getreg_b32    s_save_tmp, hwreg(HW_REG_IB_STS)					    //clear RCNT and FIRST_REPLAY in IB_STS
+    s_and_b32	    s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
+
+    s_setreg_b32    hwreg(HW_REG_IB_STS), s_save_tmp
+
+    /*	    inform SPI the readiness and wait for SPI's go signal */
+    s_mov_b32	    s_save_exec_lo, exec_lo						    //save EXEC and use EXEC for the go signal from SPI
+    s_mov_b32	    s_save_exec_hi, exec_hi
+    s_mov_b64	    exec,   0x0								    //clear EXEC to get ready to receive
+
+if G8SR_DEBUG_TIMESTAMP
+	s_memrealtime  s_g8sr_ts_sq_save_msg
+	s_waitcnt lgkmcnt(0)
+end
+
+    if (EMU_RUN_HACK)
+
+    else
+	s_sendmsg   sendmsg(MSG_SAVEWAVE)  //send SPI a message and wait for SPI's write to EXEC
+    end
+
+    // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
+    s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
+    s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
+
+  L_SLEEP:
+    s_sleep 0x2		       // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
+
+    if (EMU_RUN_HACK)
+
+    else
+	s_cbranch_execz L_SLEEP
+    end
+
+if G8SR_DEBUG_TIMESTAMP
+	s_memrealtime  s_g8sr_ts_spi_wrexec
+	s_waitcnt lgkmcnt(0)
+end
+
+    if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE))
+	//calculate wd_addr using absolute thread id
+	v_readlane_b32 s_save_tmp, v9, 0
+	s_lshr_b32 s_save_tmp, s_save_tmp, 6
+	s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE
+	s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
+	s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
+	s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
+    else
+    end
+    if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE))
+	s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
+	s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
+	s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
+    else
+    end
+
+    // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
+    // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+    get_vgpr_size_bytes(s_save_ttmps_lo)
+    get_sgpr_size_bytes(s_save_ttmps_hi)
+    s_add_u32	    s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
+    s_add_u32	    s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+    s_addc_u32	    s_save_ttmps_hi, s_save_spi_init_hi, 0x0
+    s_and_b32	    s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
+    s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1
+    ack_sqc_store_workaround()
+    s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1
+    ack_sqc_store_workaround()
+    s_store_dword   ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1
+    ack_sqc_store_workaround()
+    s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1
+    ack_sqc_store_workaround()
+
+    /*	    setup Resource Contants    */
+    s_mov_b32	    s_save_buf_rsrc0,	s_save_spi_init_lo							//base_addr_lo
+    s_and_b32	    s_save_buf_rsrc1,	s_save_spi_init_hi, 0x0000FFFF						//base_addr_hi
+    s_or_b32	    s_save_buf_rsrc1,	s_save_buf_rsrc1,  S_SAVE_BUF_RSRC_WORD1_STRIDE
+    s_mov_b32	    s_save_buf_rsrc2,	0									//NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+    s_mov_b32	    s_save_buf_rsrc3,	S_SAVE_BUF_RSRC_WORD3_MISC
+    s_and_b32	    s_save_tmp,		s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
+    s_lshr_b32	    s_save_tmp,		s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)	    //get ATC bit into position
+    s_or_b32	    s_save_buf_rsrc3,	s_save_buf_rsrc3,  s_save_tmp						//or ATC
+    s_and_b32	    s_save_tmp,		s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
+    s_lshr_b32	    s_save_tmp,		s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)	    //get MTYPE bits into position
+    s_or_b32	    s_save_buf_rsrc3,	s_save_buf_rsrc3,  s_save_tmp						//or MTYPE
+
+    //FIXME  right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi  (might need to save them before using them?)
+    s_mov_b32	    s_save_m0,		m0								    //save M0
+
+    /*	    global mem offset		*/
+    s_mov_b32	    s_save_mem_offset,	0x0									//mem offset initial value = 0
+
+
+
+
+    /*	    save HW registers	*/
+    //////////////////////////////
+
+  L_SAVE_HWREG:
+	// HWREG SR memory offset : size(VGPR)+size(SGPR)
+       get_vgpr_size_bytes(s_save_mem_offset)
+       get_sgpr_size_bytes(s_save_tmp)
+       s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+
+
+    s_mov_b32	    s_save_buf_rsrc2, 0x4				//NUM_RECORDS	in bytes
+    if (SWIZZLE_EN)
+	s_add_u32	s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0			    //FIXME need to use swizzle to enable bounds checking?
+    else
+	s_mov_b32	s_save_buf_rsrc2,  0x1000000				    //NUM_RECORDS in bytes
+    end
+
+
+    write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)			//M0
+
+    if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME))
+	s_add_u32 s_save_pc_lo, s_save_pc_lo, 4		    //pc[31:0]+4
+	s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0	    //carry bit over
+    end
+
+    write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)		    //PC
+    write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+    write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset)		//EXEC
+    write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
+    write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset)		//STATUS
+
+    //s_save_trapsts conflicts with s_save_alloc_size
+    s_getreg_b32    s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+    write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset)		//TRAPSTS
+
+    write_hwreg_to_mem(xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset)	    //XNACK_MASK_LO
+    write_hwreg_to_mem(xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset)	    //XNACK_MASK_HI
+
+    //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2
+    s_getreg_b32    s_save_m0, hwreg(HW_REG_MODE)						    //MODE
+    write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+
+
+
+    /*	    the first wave in the threadgroup	 */
+    s_and_b32	    s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK	// extract fisrt wave bit
+    s_mov_b32	     s_save_exec_hi, 0x0
+    s_or_b32	     s_save_exec_hi, s_save_tmp, s_save_exec_hi				 // save first wave bit to s_save_exec_hi.bits[26]
+
+
+    /*		save SGPRs	*/
+	// Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+    //////////////////////////////
+
+    // SGPR SR memory offset : size(VGPR)
+    get_vgpr_size_bytes(s_save_mem_offset)
+    // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+    s_getreg_b32    s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE)		//spgr_size
+    s_add_u32	    s_save_alloc_size, s_save_alloc_size, 1
+    s_lshl_b32	    s_save_alloc_size, s_save_alloc_size, 4			    //Number of SGPRs = (sgpr_size + 1) * 16   (non-zero value)
+
+    if (SGPR_SAVE_USE_SQC)
+	s_lshl_b32	s_save_buf_rsrc2,   s_save_alloc_size, 2		    //NUM_RECORDS in bytes
+    else
+	s_lshl_b32	s_save_buf_rsrc2,   s_save_alloc_size, 8		    //NUM_RECORDS in bytes (64 threads)
+    end
+
+    if (SWIZZLE_EN)
+	s_add_u32	s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0			    //FIXME need to use swizzle to enable bounds checking?
+    else
+	s_mov_b32	s_save_buf_rsrc2,  0x1000000				    //NUM_RECORDS in bytes
+    end
+
+
+    // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
+    //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
+    s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0
+    s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
+    s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+
+    s_mov_b32	    m0, 0x0			    //SGPR initial index value =0
+    s_nop	    0x0				    //Manually inserted wait states
+  L_SAVE_SGPR_LOOP:
+    // SGPR is allocated in 16 SGPR granularity
+    s_movrels_b64   s0, s0     //s0 = s[0+m0], s1 = s[1+m0]
+    s_movrels_b64   s2, s2     //s2 = s[2+m0], s3 = s[3+m0]
+    s_movrels_b64   s4, s4     //s4 = s[4+m0], s5 = s[5+m0]
+    s_movrels_b64   s6, s6     //s6 = s[6+m0], s7 = s[7+m0]
+    s_movrels_b64   s8, s8     //s8 = s[8+m0], s9 = s[9+m0]
+    s_movrels_b64   s10, s10   //s10 = s[10+m0], s11 = s[11+m0]
+    s_movrels_b64   s12, s12   //s12 = s[12+m0], s13 = s[13+m0]
+    s_movrels_b64   s14, s14   //s14 = s[14+m0], s15 = s[15+m0]
+
+    write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4
+    s_add_u32	    m0, m0, 16							    //next sgpr index
+    s_cmp_lt_u32    m0, s_save_alloc_size					    //scc = (m0 < s_save_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_SAVE_SGPR_LOOP					//SGPR save is complete?
+    // restore s_save_buf_rsrc0,1
+    //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo
+    s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo
+
+
+
+
+    /*		save first 4 VGPR, then LDS save could use   */
+	// each wave will alloc 4 vgprs at least...
+    /////////////////////////////////////////////////////////////////////////////////////
+
+    s_mov_b32	    s_save_mem_offset, 0
+    s_mov_b32	    exec_lo, 0xFFFFFFFF						    //need every thread from now on
+    s_mov_b32	    exec_hi, 0xFFFFFFFF
+    s_mov_b32	    xnack_mask_lo, 0x0
+    s_mov_b32	    xnack_mask_hi, 0x0
+
+    if (SWIZZLE_EN)
+	s_add_u32	s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0			    //FIXME need to use swizzle to enable bounds checking?
+    else
+	s_mov_b32	s_save_buf_rsrc2,  0x1000000				    //NUM_RECORDS in bytes
+    end
+
+
+    // VGPR Allocated in 4-GPR granularity
+
+if G8SR_VGPR_SR_IN_DWX4
+	// the const stride for DWx4 is 4*4 bytes
+	s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF   // reset const stride to 0
+	s_or_b32  s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4  // const stride to 4*4 bytes
+
+	buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+
+	s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF   // reset const stride to 0
+	s_or_b32  s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE  // reset const stride to 4 bytes
+else
+	buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+	buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256
+	buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256*2
+	buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256*3
+end
+
+
+
+    /*		save LDS	*/
+    //////////////////////////////
+
+  L_SAVE_LDS:
+
+	// Change EXEC to all threads...
+    s_mov_b32	    exec_lo, 0xFFFFFFFF	  //need every thread from now on
+    s_mov_b32	    exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)		    //lds_size
+    s_and_b32	    s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF		    //lds_size is zero?
+    s_cbranch_scc0  L_SAVE_LDS_DONE									       //no lds used? jump to L_SAVE_DONE
+
+    s_barrier		    //LDS is used? wait for other waves in the same TG
+    s_and_b32	    s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK		       //exec is still used here
+    s_cbranch_scc0  L_SAVE_LDS_DONE
+
+	// first wave do LDS save;
+
+    s_lshl_b32	    s_save_alloc_size, s_save_alloc_size, 6			    //LDS size in dwords = lds_size * 64dw
+    s_lshl_b32	    s_save_alloc_size, s_save_alloc_size, 2			    //LDS size in bytes
+    s_mov_b32	    s_save_buf_rsrc2,  s_save_alloc_size			    //NUM_RECORDS in bytes
+
+    // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+    //
+    get_vgpr_size_bytes(s_save_mem_offset)
+    get_sgpr_size_bytes(s_save_tmp)
+    s_add_u32  s_save_mem_offset, s_save_mem_offset, s_save_tmp
+    s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+
+    if (SWIZZLE_EN)
+	s_add_u32	s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0	      //FIXME need to use swizzle to enable bounds checking?
+    else
+	s_mov_b32	s_save_buf_rsrc2,  0x1000000		      //NUM_RECORDS in bytes
+    end
+
+    s_mov_b32	    m0, 0x0						  //lds_offset initial value = 0
+
+
+var LDS_DMA_ENABLE = 0
+var UNROLL = 0
+if UNROLL==0 && LDS_DMA_ENABLE==1
+	s_mov_b32  s3, 256*2
+	s_nop 0
+	s_nop 0
+	s_nop 0
+  L_SAVE_LDS_LOOP:
+	//TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.???
+    if (SAVE_LDS)     //SPI always alloc LDS space in 128DW granularity
+	    buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1		// first 64DW
+	    buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
+    end
+
+    s_add_u32	    m0, m0, s3						//every buffer_store_lds does 256 bytes
+    s_add_u32	    s_save_mem_offset, s_save_mem_offset, s3				//mem offset increased by 256 bytes
+    s_cmp_lt_u32    m0, s_save_alloc_size						//scc=(m0 < s_save_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_SAVE_LDS_LOOP							//LDS save is complete?
+
+elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL	, has ichace miss
+      // store from higest LDS address to lowest
+      s_mov_b32	 s3, 256*2
+      s_sub_u32	 m0, s_save_alloc_size, s3
+      s_add_u32 s_save_mem_offset, s_save_mem_offset, m0
+      s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9   // how many 128 trunks...
+      s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size   // store from higheset addr to lowest
+      s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4   // PC offset increment,  each LDS save block cost 6*4 Bytes instruction
+      s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4   //2is the below 2 inst...//s_addc and s_setpc
+      s_nop 0
+      s_nop 0
+      s_nop 0	//pad 3 dw to let LDS_DMA align with 64Bytes
+      s_getpc_b64 s[0:1]			      // reuse s[0:1], since s[0:1] already saved
+      s_add_u32	  s0, s0,s_save_alloc_size
+      s_addc_u32  s1, s1, 0
+      s_setpc_b64 s[0:1]
+
+
+       for var i =0; i< 128; i++
+	    // be careful to make here a 64Byte aligned address, which could improve performance...
+	    buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0		// first 64DW
+	    buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256		  // second 64DW
+
+	if i!=127
+	s_sub_u32  m0, m0, s3	   // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e.  pack more LDS_DMA inst to one Cacheline
+	    s_sub_u32  s_save_mem_offset, s_save_mem_offset,  s3
+	    end
+       end
+
+else   // BUFFER_STORE
+      v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0
+      v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2	// tid
+      v_mul_i32_i24 v2, v3, 8	// tid*8
+      v_mov_b32 v3, 256*2
+      s_mov_b32 m0, 0x10000
+      s_mov_b32 s0, s_save_buf_rsrc3
+      s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF	  // disable add_tid
+      s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000   //DFMT
+
+L_SAVE_LDS_LOOP_VECTOR:
+      ds_read_b64 v[0:1], v2	//x =LDS[a], byte address
+      s_waitcnt lgkmcnt(0)
+      buffer_store_dwordx2  v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1  glc:1  slc:1
+//	s_waitcnt vmcnt(0)
+//	v_add_u32 v2, vcc[0:1], v2, v3
+      v_add_u32 v2, v2, v3
+      v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+      s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
+
+      // restore rsrc3
+      s_mov_b32 s_save_buf_rsrc3, s0
+
+end
+
+L_SAVE_LDS_DONE:
+
+
+    /*		save VGPRs  - set the Rest VGPRs	*/
+    //////////////////////////////////////////////////////////////////////////////////////
+  L_SAVE_VGPR:
+    // VGPR SR memory offset: 0
+    // TODO rearrange the RSRC words to use swizzle for VGPR save...
+
+    s_mov_b32	    s_save_mem_offset, (0+256*4)				    // for the rest VGPRs
+    s_mov_b32	    exec_lo, 0xFFFFFFFF						    //need every thread from now on
+    s_mov_b32	    exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)		    //vpgr_size
+    s_add_u32	    s_save_alloc_size, s_save_alloc_size, 1
+    s_lshl_b32	    s_save_alloc_size, s_save_alloc_size, 2			    //Number of VGPRs = (vgpr_size + 1) * 4    (non-zero value)	  //FIXME for GFX, zero is possible
+    s_lshl_b32	    s_save_buf_rsrc2,  s_save_alloc_size, 8			    //NUM_RECORDS in bytes (64 threads*4)
+    if (SWIZZLE_EN)
+	s_add_u32	s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0			    //FIXME need to use swizzle to enable bounds checking?
+    else
+	s_mov_b32	s_save_buf_rsrc2,  0x1000000				    //NUM_RECORDS in bytes
+    end
+
+
+    // VGPR Allocated in 4-GPR granularity
+
+if G8SR_VGPR_SR_IN_DWX4
+	// the const stride for DWx4 is 4*4 bytes
+	s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF   // reset const stride to 0
+	s_or_b32  s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4  // const stride to 4*4 bytes
+
+	s_mov_b32	  m0, 4	    // skip first 4 VGPRs
+	s_cmp_lt_u32	  m0, s_save_alloc_size
+	s_cbranch_scc0	  L_SAVE_VGPR_LOOP_END	    // no more vgprs
+
+	s_set_gpr_idx_on  m0, 0x1   // This will change M0
+	s_add_u32	  s_save_alloc_size, s_save_alloc_size, 0x1000	// because above inst change m0
+L_SAVE_VGPR_LOOP:
+	v_mov_b32	  v0, v0   // v0 = v[0+m0]
+	v_mov_b32	  v1, v1
+	v_mov_b32	  v2, v2
+	v_mov_b32	  v3, v3
+
+
+	buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+	s_add_u32	  m0, m0, 4
+	s_add_u32	  s_save_mem_offset, s_save_mem_offset, 256*4
+	s_cmp_lt_u32	  m0, s_save_alloc_size
+    s_cbranch_scc1  L_SAVE_VGPR_LOOP						    //VGPR save is complete?
+    s_set_gpr_idx_off
+L_SAVE_VGPR_LOOP_END:
+
+	s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF   // reset const stride to 0
+	s_or_b32  s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE  // reset const stride to 4 bytes
+else
+    // VGPR store using dw burst
+    s_mov_b32	      m0, 0x4	//VGPR initial index value =0
+    s_cmp_lt_u32      m0, s_save_alloc_size
+    s_cbranch_scc0    L_SAVE_VGPR_END
+
+
+    s_set_gpr_idx_on	m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
+    s_add_u32	    s_save_alloc_size, s_save_alloc_size, 0x1000		    //add 0x1000 since we compare m0 against it later
+
+  L_SAVE_VGPR_LOOP:
+    v_mov_b32	    v0, v0		//v0 = v[0+m0]
+    v_mov_b32	    v1, v1		//v0 = v[0+m0]
+    v_mov_b32	    v2, v2		//v0 = v[0+m0]
+    v_mov_b32	    v3, v3		//v0 = v[0+m0]
+
+    if(USE_MTBUF_INSTEAD_OF_MUBUF)
+	tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+    else
+	buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+	buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256
+	buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256*2
+	buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1  offset:256*3
+    end
+
+    s_add_u32	    m0, m0, 4							    //next vgpr index
+    s_add_u32	    s_save_mem_offset, s_save_mem_offset, 256*4			    //every buffer_store_dword does 256 bytes
+    s_cmp_lt_u32    m0, s_save_alloc_size					    //scc = (m0 < s_save_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_SAVE_VGPR_LOOP						    //VGPR save is complete?
+    s_set_gpr_idx_off
+end
+
+L_SAVE_VGPR_END:
+
+
+
+
+
+
+    /*	   S_PGM_END_SAVED  */				    //FIXME  graphics ONLY
+    if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT))
+	s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff    //pc[47:32]
+	s_add_u32 s_save_pc_lo, s_save_pc_lo, 4		    //pc[31:0]+4
+	s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0	    //carry bit over
+	s_rfe_b64 s_save_pc_lo				    //Return to the main shader program
+    else
+    end
+
+// Save Done timestamp
+if G8SR_DEBUG_TIMESTAMP
+	s_memrealtime	s_g8sr_ts_save_d
+	// SGPR SR memory offset : size(VGPR)
+	get_vgpr_size_bytes(s_save_mem_offset)
+	s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET
+	s_waitcnt lgkmcnt(0)	     //FIXME, will cause xnack??
+	// Need reset rsrc2??
+	s_mov_b32 m0, s_save_mem_offset
+	s_mov_b32 s_save_buf_rsrc2,  0x1000000					//NUM_RECORDS in bytes
+	s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0	    glc:1
+end
+
+
+    s_branch	L_END_PGM
+
+
+
+/**************************************************************************/
+/*			restore routine					  */
+/**************************************************************************/
+
+L_RESTORE:
+    /*	    Setup Resource Contants    */
+    if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
+	//calculate wd_addr using absolute thread id
+	v_readlane_b32 s_restore_tmp, v9, 0
+	s_lshr_b32 s_restore_tmp, s_restore_tmp, 6
+	s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE
+	s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO
+	s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI
+	s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL
+    else
+    end
+
+if G8SR_DEBUG_TIMESTAMP
+	s_memrealtime	s_g8sr_ts_restore_s
+	s_waitcnt lgkmcnt(0)	     //FIXME, will cause xnack??
+	// tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case...
+	s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0]
+	s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1]   //backup ts to ttmp0/1, sicne exec will be finally restored..
+end
+
+
+
+    s_mov_b32	    s_restore_buf_rsrc0,    s_restore_spi_init_lo							    //base_addr_lo
+    s_and_b32	    s_restore_buf_rsrc1,    s_restore_spi_init_hi, 0x0000FFFF						    //base_addr_hi
+    s_or_b32	    s_restore_buf_rsrc1,    s_restore_buf_rsrc1,  S_RESTORE_BUF_RSRC_WORD1_STRIDE
+    s_mov_b32	    s_restore_buf_rsrc2,    0										    //NUM_RECORDS initial value = 0 (in bytes)
+    s_mov_b32	    s_restore_buf_rsrc3,    S_RESTORE_BUF_RSRC_WORD3_MISC
+    s_and_b32	    s_restore_tmp,	    s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
+    s_lshr_b32	    s_restore_tmp,	    s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT)	    //get ATC bit into position
+    s_or_b32	    s_restore_buf_rsrc3,    s_restore_buf_rsrc3,  s_restore_tmp						    //or ATC
+    s_and_b32	    s_restore_tmp,	    s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
+    s_lshr_b32	    s_restore_tmp,	    s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT)   //get MTYPE bits into position
+    s_or_b32	    s_restore_buf_rsrc3,    s_restore_buf_rsrc3,  s_restore_tmp						    //or MTYPE
+
+    /*	    global mem offset		*/
+//  s_mov_b32	    s_restore_mem_offset, 0x0				    //mem offset initial value = 0
+
+    /*	    the first wave in the threadgroup	 */
+    s_and_b32	    s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+    s_cbranch_scc0  L_RESTORE_VGPR
+
+    /*		restore LDS	*/
+    //////////////////////////////
+  L_RESTORE_LDS:
+
+    s_mov_b32	    exec_lo, 0xFFFFFFFF							    //need every thread from now on   //be consistent with SAVE although can be moved ahead
+    s_mov_b32	    exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)		//lds_size
+    s_and_b32	    s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF		    //lds_size is zero?
+    s_cbranch_scc0  L_RESTORE_VGPR							    //no lds used? jump to L_RESTORE_VGPR
+    s_lshl_b32	    s_restore_alloc_size, s_restore_alloc_size, 6			    //LDS size in dwords = lds_size * 64dw
+    s_lshl_b32	    s_restore_alloc_size, s_restore_alloc_size, 2			    //LDS size in bytes
+    s_mov_b32	    s_restore_buf_rsrc2,    s_restore_alloc_size			    //NUM_RECORDS in bytes
+
+    // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+    //
+    get_vgpr_size_bytes(s_restore_mem_offset)
+    get_sgpr_size_bytes(s_restore_tmp)
+    s_add_u32  s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+    s_add_u32  s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes()	     //FIXME, Check if offset overflow???
+
+
+    if (SWIZZLE_EN)
+	s_add_u32	s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0			    //FIXME need to use swizzle to enable bounds checking?
+    else
+	s_mov_b32	s_restore_buf_rsrc2,  0x1000000					    //NUM_RECORDS in bytes
+    end
+    s_mov_b32	    m0, 0x0								    //lds_offset initial value = 0
+
+  L_RESTORE_LDS_LOOP:
+    if (SAVE_LDS)
+	buffer_load_dword   v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1		       // first 64DW
+	buffer_load_dword   v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256	       // second 64DW
+    end
+    s_add_u32	    m0, m0, 256*2						// 128 DW
+    s_add_u32	    s_restore_mem_offset, s_restore_mem_offset, 256*2		//mem offset increased by 128DW
+    s_cmp_lt_u32    m0, s_restore_alloc_size					//scc=(m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_RESTORE_LDS_LOOP							    //LDS restore is complete?
+
+
+    /*		restore VGPRs	    */
+    //////////////////////////////
+  L_RESTORE_VGPR:
+	// VGPR SR memory offset : 0
+    s_mov_b32	    s_restore_mem_offset, 0x0
+    s_mov_b32	    exec_lo, 0xFFFFFFFF							    //need every thread from now on   //be consistent with SAVE although can be moved ahead
+    s_mov_b32	    exec_hi, 0xFFFFFFFF
+
+    s_getreg_b32    s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)	//vpgr_size
+    s_add_u32	    s_restore_alloc_size, s_restore_alloc_size, 1
+    s_lshl_b32	    s_restore_alloc_size, s_restore_alloc_size, 2			    //Number of VGPRs = (vgpr_size + 1) * 4    (non-zero value)
+    s_lshl_b32	    s_restore_buf_rsrc2,  s_restore_alloc_size, 8			    //NUM_RECORDS in bytes (64 threads*4)
+    if (SWIZZLE_EN)
+	s_add_u32	s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0			    //FIXME need to use swizzle to enable bounds checking?
+    else
+	s_mov_b32	s_restore_buf_rsrc2,  0x1000000					    //NUM_RECORDS in bytes
+    end
+
+if G8SR_VGPR_SR_IN_DWX4
+     get_vgpr_size_bytes(s_restore_mem_offset)
+     s_sub_u32	       s_restore_mem_offset, s_restore_mem_offset, 256*4
+
+     // the const stride for DWx4 is 4*4 bytes
+     s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF   // reset const stride to 0
+     s_or_b32  s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4  // const stride to 4*4 bytes
+
+     s_mov_b32	       m0, s_restore_alloc_size
+     s_set_gpr_idx_on  m0, 0x8	  // Note.. This will change m0
+
+L_RESTORE_VGPR_LOOP:
+     buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+     s_waitcnt vmcnt(0)
+     s_sub_u32	       m0, m0, 4
+     v_mov_b32	       v0, v0	// v[0+m0] = v0
+     v_mov_b32	       v1, v1
+     v_mov_b32	       v2, v2
+     v_mov_b32	       v3, v3
+     s_sub_u32	       s_restore_mem_offset, s_restore_mem_offset, 256*4
+     s_cmp_eq_u32      m0, 0x8000
+     s_cbranch_scc0    L_RESTORE_VGPR_LOOP
+     s_set_gpr_idx_off
+
+     s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF   // reset const stride to 0
+     s_or_b32  s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE  // const stride to 4*4 bytes
+
+else
+    // VGPR load using dw burst
+    s_mov_b32	    s_restore_mem_offset_save, s_restore_mem_offset	// restore start with v1, v0 will be the last
+    s_add_u32	    s_restore_mem_offset, s_restore_mem_offset, 256*4
+    s_mov_b32	    m0, 4				//VGPR initial index value = 1
+    s_set_gpr_idx_on  m0, 0x8			    //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
+    s_add_u32	    s_restore_alloc_size, s_restore_alloc_size, 0x8000			    //add 0x8000 since we compare m0 against it later
+
+  L_RESTORE_VGPR_LOOP:
+    if(USE_MTBUF_INSTEAD_OF_MUBUF)
+	tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+    else
+	buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+	buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
+	buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
+	buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
+    end
+    s_waitcnt	    vmcnt(0)								    //ensure data ready
+    v_mov_b32	    v0, v0								    //v[0+m0] = v0
+    v_mov_b32	    v1, v1
+    v_mov_b32	    v2, v2
+    v_mov_b32	    v3, v3
+    s_add_u32	    m0, m0, 4								    //next vgpr index
+    s_add_u32	    s_restore_mem_offset, s_restore_mem_offset, 256*4				//every buffer_load_dword does 256 bytes
+    s_cmp_lt_u32    m0, s_restore_alloc_size						    //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc1  L_RESTORE_VGPR_LOOP							    //VGPR restore (except v0) is complete?
+    s_set_gpr_idx_off
+											    /* VGPR restore on v0 */
+    if(USE_MTBUF_INSTEAD_OF_MUBUF)
+	tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+    else
+	buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save    slc:1 glc:1
+	buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save    slc:1 glc:1 offset:256
+	buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save    slc:1 glc:1 offset:256*2
+	buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save    slc:1 glc:1 offset:256*3
+    end
+
+end
+
+    /*		restore SGPRs	    */
+    //////////////////////////////
+
+    // SGPR SR memory offset : size(VGPR)
+    get_vgpr_size_bytes(s_restore_mem_offset)
+    get_sgpr_size_bytes(s_restore_tmp)
+    s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+    s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4	   // restore SGPR from S[n] to S[0], by 16 sgprs group
+    // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+    s_getreg_b32    s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE)		    //spgr_size
+    s_add_u32	    s_restore_alloc_size, s_restore_alloc_size, 1
+    s_lshl_b32	    s_restore_alloc_size, s_restore_alloc_size, 4			    //Number of SGPRs = (sgpr_size + 1) * 16   (non-zero value)
+
+    if (SGPR_SAVE_USE_SQC)
+	s_lshl_b32	s_restore_buf_rsrc2,	s_restore_alloc_size, 2			    //NUM_RECORDS in bytes
+    else
+	s_lshl_b32	s_restore_buf_rsrc2,	s_restore_alloc_size, 8			    //NUM_RECORDS in bytes (64 threads)
+    end
+    if (SWIZZLE_EN)
+	s_add_u32	s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0			    //FIXME need to use swizzle to enable bounds checking?
+    else
+	s_mov_b32	s_restore_buf_rsrc2,  0x1000000					    //NUM_RECORDS in bytes
+    end
+
+    s_mov_b32 m0, s_restore_alloc_size
+
+ L_RESTORE_SGPR_LOOP:
+    read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)	 //PV: further performance improvement can be made
+    s_waitcnt	    lgkmcnt(0)								    //ensure data ready
+
+    s_sub_u32 m0, m0, 16    // Restore from S[n] to S[0]
+    s_nop 0 // hazard SALU M0=> S_MOVREL
+
+    s_movreld_b64   s0, s0	//s[0+m0] = s0
+    s_movreld_b64   s2, s2
+    s_movreld_b64   s4, s4
+    s_movreld_b64   s6, s6
+    s_movreld_b64   s8, s8
+    s_movreld_b64   s10, s10
+    s_movreld_b64   s12, s12
+    s_movreld_b64   s14, s14
+
+    s_cmp_eq_u32    m0, 0		//scc = (m0 < s_restore_alloc_size) ? 1 : 0
+    s_cbranch_scc0  L_RESTORE_SGPR_LOOP		    //SGPR restore (except s0) is complete?
+
+    /*	    restore HW registers    */
+    //////////////////////////////
+  L_RESTORE_HWREG:
+
+
+if G8SR_DEBUG_TIMESTAMP
+      s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo
+      s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi
+end
+
+    // HWREG SR memory offset : size(VGPR)+size(SGPR)
+    get_vgpr_size_bytes(s_restore_mem_offset)
+    get_sgpr_size_bytes(s_restore_tmp)
+    s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+
+
+    s_mov_b32	    s_restore_buf_rsrc2, 0x4						    //NUM_RECORDS   in bytes
+    if (SWIZZLE_EN)
+	s_add_u32	s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0			    //FIXME need to use swizzle to enable bounds checking?
+    else
+	s_mov_b32	s_restore_buf_rsrc2,  0x1000000					    //NUM_RECORDS in bytes
+    end
+
+    read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)		    //M0
+    read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)		//PC
+    read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+    read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset)		    //EXEC
+    read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+    read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset)		    //STATUS
+    read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset)		    //TRAPSTS
+    read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset)		    //XNACK_MASK_LO
+    read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset)		    //XNACK_MASK_HI
+    read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)		//MODE
+
+    s_waitcnt	    lgkmcnt(0)											    //from now on, it is safe to restore STATUS and IB_STS
+
+    //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
+    if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
+	s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8		 //pc[31:0]+8	  //two back-to-back s_trap are used (first for save and second for restore)
+	s_addc_u32  s_restore_pc_hi, s_restore_pc_hi, 0x0	 //carry bit over
+    end
+    if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL))
+	s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4		 //pc[31:0]+4	  // save is hack through s_trap but restore is normal
+	s_addc_u32  s_restore_pc_hi, s_restore_pc_hi, 0x0	 //carry bit over
+    end
+
+    s_mov_b32	    m0,		s_restore_m0
+    s_mov_b32	    exec_lo,	s_restore_exec_lo
+    s_mov_b32	    exec_hi,	s_restore_exec_hi
+
+    s_and_b32	    s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
+    s_and_b32	    s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
+    s_lshr_b32	    s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
+    s_setreg_b32    hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
+    //s_setreg_b32  hwreg(HW_REG_TRAPSTS),  s_restore_trapsts	   //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
+    s_setreg_b32    hwreg(HW_REG_MODE),	    s_restore_mode
+
+    // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
+    // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+    get_vgpr_size_bytes(s_restore_ttmps_lo)
+    get_sgpr_size_bytes(s_restore_ttmps_hi)
+    s_add_u32	    s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
+    s_add_u32	    s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+    s_addc_u32	    s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+    s_and_b32	    s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+    s_load_dwordx2  [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1
+    s_load_dwordx4  [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1
+    s_load_dword    ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1
+    s_load_dwordx2  [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1
+    s_waitcnt	    lgkmcnt(0)
+
+    //reuse s_restore_m0 as a temp register
+    s_and_b32	    s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
+    s_lshr_b32	    s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
+    s_lshl_b32	    s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
+    s_mov_b32	    s_restore_tmp, 0x0										    //IB_STS is zero
+    s_or_b32	    s_restore_tmp, s_restore_tmp, s_restore_m0
+    s_and_b32	    s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
+    s_lshr_b32	    s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+    s_lshl_b32	    s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
+    s_or_b32	    s_restore_tmp, s_restore_tmp, s_restore_m0
+    s_and_b32	    s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
+    s_lshr_b32	    s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
+    s_setreg_b32    hwreg(HW_REG_IB_STS),   s_restore_tmp
+
+    s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff	//pc[47:32]	   //Do it here in order not to affect STATUS
+    s_and_b64	 exec, exec, exec  // Restore STATUS.EXECZ, not writable by s_setreg_b32
+    s_and_b64	 vcc, vcc, vcc	// Restore STATUS.VCCZ, not writable by s_setreg_b32
+    s_setreg_b32    hwreg(HW_REG_STATUS),   s_restore_status	 // SCC is included, which is changed by previous salu
+
+    s_barrier							//barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
+
+if G8SR_DEBUG_TIMESTAMP
+    s_memrealtime s_g8sr_ts_restore_d
+    s_waitcnt lgkmcnt(0)
+end
+
+//  s_rfe_b64 s_restore_pc_lo					//Return to the main shader program and resume execution
+    s_rfe_restore_b64  s_restore_pc_lo, s_restore_m0		// s_restore_m0[0] is used to set STATUS.inst_atc
+
+
+/**************************************************************************/
+/*			the END						  */
+/**************************************************************************/
+L_END_PGM:
+    s_endpgm
+
+end
+
+
+/**************************************************************************/
+/*			the helper functions				  */
+/**************************************************************************/
+
+//Only for save hwreg to mem
+function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+	s_mov_b32 exec_lo, m0			//assuming exec_lo is not needed anymore from this point on
+	s_mov_b32 m0, s_mem_offset
+	s_buffer_store_dword s, s_rsrc, m0	glc:1
+	ack_sqc_store_workaround()
+	s_add_u32	s_mem_offset, s_mem_offset, 4
+	s_mov_b32   m0, exec_lo
+end
+
+
+// HWREG are saved before SGPRs, so all HWREG could be use.
+function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+
+	s_buffer_store_dwordx4 s[0], s_rsrc, 0	glc:1
+	ack_sqc_store_workaround()
+	s_buffer_store_dwordx4 s[4], s_rsrc, 16	 glc:1
+	ack_sqc_store_workaround()
+	s_buffer_store_dwordx4 s[8], s_rsrc, 32	 glc:1
+	ack_sqc_store_workaround()
+	s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+	ack_sqc_store_workaround()
+	s_add_u32	s_rsrc[0], s_rsrc[0], 4*16
+	s_addc_u32	s_rsrc[1], s_rsrc[1], 0x0	      // +scc
+end
+
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+    s_buffer_load_dword s, s_rsrc, s_mem_offset	    glc:1
+    s_add_u32	    s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+    s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset	glc:1
+    s_sub_u32	    s_mem_offset, s_mem_offset, 4*16
+end
+
+
+
+function get_lds_size_bytes(s_lds_size_byte)
+    // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW
+    s_getreg_b32   s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)		// lds_size
+    s_lshl_b32	   s_lds_size_byte, s_lds_size_byte, 8			    //LDS size in dwords = lds_size * 64 *4Bytes    // granularity 64DW
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte)
+    s_getreg_b32   s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)	 //vpgr_size
+    s_add_u32	   s_vgpr_size_byte, s_vgpr_size_byte, 1
+    s_lshl_b32	   s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4	(non-zero value)   //FIXME for GFX, zero is possible
+end
+
+function get_sgpr_size_bytes(s_sgpr_size_byte)
+    s_getreg_b32   s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE)	 //spgr_size
+    s_add_u32	   s_sgpr_size_byte, s_sgpr_size_byte, 1
+    s_lshl_b32	   s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4   (non-zero value)
+end
+
+function get_hwreg_size_bytes
+    return 128 //HWREG size 128 bytes
+end
+
+function ack_sqc_store_workaround
+    if ACK_SQC_STORE
+        s_waitcnt lgkmcnt(0)
+    end
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 59808a3..f64c555 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -233,7 +233,7 @@
 	pr_debug("Queue Size: 0x%llX, %u\n",
 			q_properties->queue_size, args->ring_size);
 
-	pr_debug("Queue r/w Pointers: %p, %p\n",
+	pr_debug("Queue r/w Pointers: %px, %px\n",
 			q_properties->read_ptr,
 			q_properties->write_ptr);
 
@@ -292,8 +292,16 @@
 
 
 	/* Return gpu_id as doorbell offset for mmap usage */
-	args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
+	args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
+	args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
 	args->doorbell_offset <<= PAGE_SHIFT;
+	if (KFD_IS_SOC15(dev->device_info->asic_family))
+		/* On SOC15 ASICs, doorbell allocation must be
+		 * per-device, and independent from the per-process
+		 * queue_id. Return the doorbell offset within the
+		 * doorbell aperture to user mode.
+		 */
+		args->doorbell_offset |= q_properties.doorbell_off;
 
 	mutex_unlock(&p->mutex);
 
@@ -1296,8 +1304,8 @@
 		return -EINVAL;
 	}
 
-	devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
-			      GFP_KERNEL);
+	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
+				    GFP_KERNEL);
 	if (!devices_arr)
 		return -ENOMEM;
 
@@ -1405,8 +1413,8 @@
 		return -EINVAL;
 	}
 
-	devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
-			      GFP_KERNEL);
+	devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
+				    GFP_KERNEL);
 	if (!devices_arr)
 		return -ENOMEM;
 
@@ -1645,23 +1653,33 @@
 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct kfd_process *process;
+	struct kfd_dev *dev = NULL;
+	unsigned long vm_pgoff;
+	unsigned int gpu_id;
 
 	process = kfd_get_process(current);
 	if (IS_ERR(process))
 		return PTR_ERR(process);
 
-	if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
-			KFD_MMAP_DOORBELL_MASK) {
-		vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
-		return kfd_doorbell_mmap(process, vma);
-	} else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
-			KFD_MMAP_EVENTS_MASK) {
-		vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
+	vm_pgoff = vma->vm_pgoff;
+	vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
+	gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
+	if (gpu_id)
+		dev = kfd_device_by_id(gpu_id);
+
+	switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
+	case KFD_MMAP_TYPE_DOORBELL:
+		if (!dev)
+			return -ENODEV;
+		return kfd_doorbell_mmap(dev, process, vma);
+
+	case KFD_MMAP_TYPE_EVENTS:
 		return kfd_event_mmap(process, vma);
-	} else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
-			KFD_MMAP_RESERVED_MEM_MASK) {
-		vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
-		return kfd_reserved_mem_mmap(process, vma);
+
+	case KFD_MMAP_TYPE_RESERVED_MEM:
+		if (!dev)
+			return -ENODEV;
+		return kfd_reserved_mem_mmap(dev, process, vma);
 	}
 
 	return -EFAULT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 4f126ef..296b3f2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -132,6 +132,9 @@
 #define fiji_cache_info  carrizo_cache_info
 #define polaris10_cache_info carrizo_cache_info
 #define polaris11_cache_info carrizo_cache_info
+/* TODO - check & update Vega10 cache details */
+#define vega10_cache_info carrizo_cache_info
+#define raven_cache_info carrizo_cache_info
 
 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
 		struct crat_subtype_computeunit *cu)
@@ -603,6 +606,14 @@
 		pcache_info = polaris11_cache_info;
 		num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
 		break;
+	case CHIP_VEGA10:
+		pcache_info = vega10_cache_info;
+		num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+		break;
+	case CHIP_RAVEN:
+		pcache_info = raven_cache_info;
+		num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 334669996..7ee6cec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -20,16 +20,13 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
-#include <linux/amd-iommu.h>
-#endif
 #include <linux/bsearch.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_pm4_headers_vi.h"
-#include "cwsr_trap_handler_gfx8.asm"
+#include "cwsr_trap_handler.h"
 #include "kfd_iommu.h"
 
 #define MQD_SIZE_ALIGNED 768
@@ -41,6 +38,7 @@
 	.max_pasid_bits = 16,
 	/* max num of queues for KV.TODO should be a dynamic value */
 	.max_no_of_hqd	= 24,
+	.doorbell_size  = 4,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
@@ -55,6 +53,7 @@
 	.max_pasid_bits = 16,
 	/* max num of queues for CZ.TODO should be a dynamic value */
 	.max_no_of_hqd	= 24,
+	.doorbell_size  = 4,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
@@ -70,6 +69,7 @@
 	.max_pasid_bits = 16,
 	/* max num of queues for KV.TODO should be a dynamic value */
 	.max_no_of_hqd	= 24,
+	.doorbell_size  = 4,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
@@ -83,6 +83,7 @@
 	.asic_family = CHIP_TONGA,
 	.max_pasid_bits = 16,
 	.max_no_of_hqd  = 24,
+	.doorbell_size  = 4,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
@@ -96,6 +97,7 @@
 	.asic_family = CHIP_TONGA,
 	.max_pasid_bits = 16,
 	.max_no_of_hqd  = 24,
+	.doorbell_size  = 4,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
@@ -109,6 +111,7 @@
 	.asic_family = CHIP_FIJI,
 	.max_pasid_bits = 16,
 	.max_no_of_hqd  = 24,
+	.doorbell_size  = 4,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
@@ -122,6 +125,7 @@
 	.asic_family = CHIP_FIJI,
 	.max_pasid_bits = 16,
 	.max_no_of_hqd  = 24,
+	.doorbell_size  = 4,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
@@ -136,6 +140,7 @@
 	.asic_family = CHIP_POLARIS10,
 	.max_pasid_bits = 16,
 	.max_no_of_hqd  = 24,
+	.doorbell_size  = 4,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
@@ -149,6 +154,7 @@
 	.asic_family = CHIP_POLARIS10,
 	.max_pasid_bits = 16,
 	.max_no_of_hqd  = 24,
+	.doorbell_size  = 4,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
@@ -162,6 +168,7 @@
 	.asic_family = CHIP_POLARIS11,
 	.max_pasid_bits = 16,
 	.max_no_of_hqd  = 24,
+	.doorbell_size  = 4,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
@@ -171,6 +178,34 @@
 	.needs_pci_atomics = true,
 };
 
+static const struct kfd_device_info vega10_device_info = {
+	.asic_family = CHIP_VEGA10,
+	.max_pasid_bits = 16,
+	.max_no_of_hqd  = 24,
+	.doorbell_size  = 8,
+	.ih_ring_entry_size = 8 * sizeof(uint32_t),
+	.event_interrupt_class = &event_interrupt_class_v9,
+	.num_of_watch_points = 4,
+	.mqd_size_aligned = MQD_SIZE_ALIGNED,
+	.supports_cwsr = true,
+	.needs_iommu_device = false,
+	.needs_pci_atomics = false,
+};
+
+static const struct kfd_device_info vega10_vf_device_info = {
+	.asic_family = CHIP_VEGA10,
+	.max_pasid_bits = 16,
+	.max_no_of_hqd  = 24,
+	.doorbell_size  = 8,
+	.ih_ring_entry_size = 8 * sizeof(uint32_t),
+	.event_interrupt_class = &event_interrupt_class_v9,
+	.num_of_watch_points = 4,
+	.mqd_size_aligned = MQD_SIZE_ALIGNED,
+	.supports_cwsr = true,
+	.needs_iommu_device = false,
+	.needs_pci_atomics = false,
+};
+
 
 struct kfd_deviceid {
 	unsigned short did;
@@ -250,6 +285,15 @@
 	{ 0x67EB, &polaris11_device_info },	/* Polaris11 */
 	{ 0x67EF, &polaris11_device_info },	/* Polaris11 */
 	{ 0x67FF, &polaris11_device_info },	/* Polaris11 */
+	{ 0x6860, &vega10_device_info },	/* Vega10 */
+	{ 0x6861, &vega10_device_info },	/* Vega10 */
+	{ 0x6862, &vega10_device_info },	/* Vega10 */
+	{ 0x6863, &vega10_device_info },	/* Vega10 */
+	{ 0x6864, &vega10_device_info },	/* Vega10 */
+	{ 0x6867, &vega10_device_info },	/* Vega10 */
+	{ 0x6868, &vega10_device_info },	/* Vega10 */
+	{ 0x686C, &vega10_vf_device_info },	/* Vega10  vf*/
+	{ 0x687F, &vega10_device_info },	/* Vega10 */
 };
 
 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@@ -279,7 +323,7 @@
 	struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
 {
 	struct kfd_dev *kfd;
-
+	int ret;
 	const struct kfd_device_info *device_info =
 					lookup_device_info(pdev->device);
 
@@ -288,19 +332,18 @@
 		return NULL;
 	}
 
-	if (device_info->needs_pci_atomics) {
-		/* Allow BIF to recode atomics to PCIe 3.0
-		 * AtomicOps. 32 and 64-bit requests are possible and
-		 * must be supported.
-		 */
-		if (pci_enable_atomic_ops_to_root(pdev,
-				PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
-				PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) {
-			dev_info(kfd_device,
-				"skipped device %x:%x, PCI rejects atomics",
-				 pdev->vendor, pdev->device);
-			return NULL;
-		}
+	/* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
+	 * 32 and 64-bit requests are possible and must be
+	 * supported.
+	 */
+	ret = pci_enable_atomic_ops_to_root(pdev,
+			PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+			PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+	if (device_info->needs_pci_atomics && ret < 0) {
+		dev_info(kfd_device,
+			 "skipped device %x:%x, PCI rejects atomics\n",
+			 pdev->vendor, pdev->device);
+		return NULL;
 	}
 
 	kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
@@ -323,10 +366,16 @@
 static void kfd_cwsr_init(struct kfd_dev *kfd)
 {
 	if (cwsr_enable && kfd->device_info->supports_cwsr) {
-		BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+		if (kfd->device_info->asic_family < CHIP_VEGA10) {
+			BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+			kfd->cwsr_isa = cwsr_trap_gfx8_hex;
+			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
+		} else {
+			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
+			kfd->cwsr_isa = cwsr_trap_gfx9_hex;
+			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
+		}
 
-		kfd->cwsr_isa = cwsr_trap_gfx8_hex;
-		kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
 		kfd->cwsr_enabled = true;
 	}
 }
@@ -541,6 +590,44 @@
 	spin_unlock(&kfd->interrupt_lock);
 }
 
+int kgd2kfd_quiesce_mm(struct mm_struct *mm)
+{
+	struct kfd_process *p;
+	int r;
+
+	/* Because we are called from arbitrary context (workqueue) as opposed
+	 * to process context, kfd_process could attempt to exit while we are
+	 * running so the lookup function increments the process ref count.
+	 */
+	p = kfd_lookup_process_by_mm(mm);
+	if (!p)
+		return -ESRCH;
+
+	r = kfd_process_evict_queues(p);
+
+	kfd_unref_process(p);
+	return r;
+}
+
+int kgd2kfd_resume_mm(struct mm_struct *mm)
+{
+	struct kfd_process *p;
+	int r;
+
+	/* Because we are called from arbitrary context (workqueue) as opposed
+	 * to process context, kfd_process could attempt to exit while we are
+	 * running so the lookup function increments the process ref count.
+	 */
+	p = kfd_lookup_process_by_mm(mm);
+	if (!p)
+		return -ESRCH;
+
+	r = kfd_process_restore_queues(p);
+
+	kfd_unref_process(p);
+	return r;
+}
+
 /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
  *   prepare for safe eviction of KFD BOs that belong to the specified
  *   process.
@@ -652,7 +739,7 @@
 	if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
 		return -ENOMEM;
 
-	*mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+	*mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
 	if ((*mem_obj) == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d55d29d..668ad07 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -110,6 +110,57 @@
 						qpd->sh_mem_bases);
 }
 
+static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
+{
+	struct kfd_dev *dev = qpd->dqm->dev;
+
+	if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
+		/* On pre-SOC15 chips we need to use the queue ID to
+		 * preserve the user mode ABI.
+		 */
+		q->doorbell_id = q->properties.queue_id;
+	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+		/* For SDMA queues on SOC15, use static doorbell
+		 * assignments based on the engine and queue.
+		 */
+		q->doorbell_id = dev->shared_resources.sdma_doorbell
+			[q->properties.sdma_engine_id]
+			[q->properties.sdma_queue_id];
+	} else {
+		/* For CP queues on SOC15 reserve a free doorbell ID */
+		unsigned int found;
+
+		found = find_first_zero_bit(qpd->doorbell_bitmap,
+					    KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+		if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+			pr_debug("No doorbells available");
+			return -EBUSY;
+		}
+		set_bit(found, qpd->doorbell_bitmap);
+		q->doorbell_id = found;
+	}
+
+	q->properties.doorbell_off =
+		kfd_doorbell_id_to_offset(dev, q->process,
+					  q->doorbell_id);
+
+	return 0;
+}
+
+static void deallocate_doorbell(struct qcm_process_device *qpd,
+				struct queue *q)
+{
+	unsigned int old;
+	struct kfd_dev *dev = qpd->dqm->dev;
+
+	if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
+	    q->properties.type == KFD_QUEUE_TYPE_SDMA)
+		return;
+
+	old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
+	WARN_ON(!old);
+}
+
 static int allocate_vmid(struct device_queue_manager *dqm,
 			struct qcm_process_device *qpd,
 			struct queue *q)
@@ -145,15 +196,19 @@
 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
 				struct qcm_process_device *qpd)
 {
-	uint32_t len;
+	const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
+	int ret;
 
 	if (!qpd->ib_kaddr)
 		return -ENOMEM;
 
-	len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
+	ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
+	if (ret)
+		return ret;
 
 	return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
-				qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
+				qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
+				pmf->release_mem_size / sizeof(uint32_t));
 }
 
 static void deallocate_vmid(struct device_queue_manager *dqm,
@@ -301,10 +356,14 @@
 	if (retval)
 		return retval;
 
+	retval = allocate_doorbell(qpd, q);
+	if (retval)
+		goto out_deallocate_hqd;
+
 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
 				&q->gart_mqd_addr, &q->properties);
 	if (retval)
-		goto out_deallocate_hqd;
+		goto out_deallocate_doorbell;
 
 	pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
 			q->pipe, q->queue);
@@ -324,6 +383,8 @@
 
 out_uninit_mqd:
 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+out_deallocate_doorbell:
+	deallocate_doorbell(qpd, q);
 out_deallocate_hqd:
 	deallocate_hqd(dqm, q);
 
@@ -357,6 +418,8 @@
 	}
 	dqm->total_queue_count--;
 
+	deallocate_doorbell(qpd, q);
+
 	retval = mqd->destroy_mqd(mqd, q->mqd,
 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
 				KFD_UNMAP_LATENCY_MS,
@@ -861,6 +924,10 @@
 	q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
 	q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
 
+	retval = allocate_doorbell(qpd, q);
+	if (retval)
+		goto out_deallocate_sdma_queue;
+
 	pr_debug("SDMA id is:    %d\n", q->sdma_id);
 	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
 	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
@@ -869,7 +936,7 @@
 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
 				&q->gart_mqd_addr, &q->properties);
 	if (retval)
-		goto out_deallocate_sdma_queue;
+		goto out_deallocate_doorbell;
 
 	retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
 	if (retval)
@@ -879,6 +946,8 @@
 
 out_uninit_mqd:
 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+out_deallocate_doorbell:
+	deallocate_doorbell(qpd, q);
 out_deallocate_sdma_queue:
 	deallocate_sdma_queue(dqm, q->sdma_id);
 
@@ -1070,12 +1139,17 @@
 		q->properties.sdma_engine_id =
 			q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
 	}
+
+	retval = allocate_doorbell(qpd, q);
+	if (retval)
+		goto out_deallocate_sdma_queue;
+
 	mqd = dqm->ops.get_mqd_manager(dqm,
 			get_mqd_type_from_queue_type(q->properties.type));
 
 	if (!mqd) {
 		retval = -ENOMEM;
-		goto out_deallocate_sdma_queue;
+		goto out_deallocate_doorbell;
 	}
 	/*
 	 * Eviction state logic: we only mark active queues as evicted
@@ -1093,7 +1167,7 @@
 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
 				&q->gart_mqd_addr, &q->properties);
 	if (retval)
-		goto out_deallocate_sdma_queue;
+		goto out_deallocate_doorbell;
 
 	list_add(&q->list, &qpd->queues_list);
 	qpd->queue_count++;
@@ -1117,6 +1191,8 @@
 	mutex_unlock(&dqm->lock);
 	return retval;
 
+out_deallocate_doorbell:
+	deallocate_doorbell(qpd, q);
 out_deallocate_sdma_queue:
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 		deallocate_sdma_queue(dqm, q->sdma_id);
@@ -1257,6 +1333,8 @@
 		goto failed;
 	}
 
+	deallocate_doorbell(qpd, q);
+
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 		dqm->sdma_queue_count--;
 		deallocate_sdma_queue(dqm, q->sdma_id);
@@ -1308,7 +1386,10 @@
 				   void __user *alternate_aperture_base,
 				   uint64_t alternate_aperture_size)
 {
-	bool retval;
+	bool retval = true;
+
+	if (!dqm->asic_ops.set_cache_memory_policy)
+		return retval;
 
 	mutex_lock(&dqm->lock);
 
@@ -1577,6 +1658,11 @@
 	case CHIP_POLARIS11:
 		device_queue_manager_init_vi_tonga(&dqm->asic_ops);
 		break;
+
+	case CHIP_VEGA10:
+	case CHIP_RAVEN:
+		device_queue_manager_init_v9(&dqm->asic_ops);
+		break;
 	default:
 		WARN(1, "Unexpected ASIC family %u",
 		     dev->device_info->asic_family);
@@ -1627,6 +1713,18 @@
 	int pipe, queue;
 	int r = 0;
 
+	r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
+		KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs);
+	if (!r) {
+		seq_printf(m, "  HIQ on MEC %d Pipe %d Queue %d\n",
+				KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
+				KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
+				KFD_CIK_HIQ_QUEUE);
+		seq_reg_dump(m, dump, n_regs);
+
+		kfree(dump);
+	}
+
 	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
 		int pipe_offset = pipe * get_queues_per_pipe(dqm);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 412beff..59a6b19 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -200,6 +200,8 @@
 		struct device_queue_manager_asic_ops *asic_ops);
 void device_queue_manager_init_vi_tonga(
 		struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v9(
+		struct device_queue_manager_asic_ops *asic_ops);
 void program_sh_mem_settings(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
 unsigned int get_queues_num(struct device_queue_manager *dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
new file mode 100644
index 0000000..79e5bcf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "vega10_enum.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+
+static int update_qpd_v9(struct device_queue_manager *dqm,
+			 struct qcm_process_device *qpd);
+static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
+			    struct qcm_process_device *qpd);
+
+void device_queue_manager_init_v9(
+	struct device_queue_manager_asic_ops *asic_ops)
+{
+	asic_ops->update_qpd = update_qpd_v9;
+	asic_ops->init_sdma_vm = init_sdma_vm_v9;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+	uint32_t shared_base = pdd->lds_base >> 48;
+	uint32_t private_base = pdd->scratch_base >> 48;
+
+	return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+		private_base;
+}
+
+static int update_qpd_v9(struct device_queue_manager *dqm,
+			 struct qcm_process_device *qpd)
+{
+	struct kfd_process_device *pdd;
+
+	pdd = qpd_to_pdd(qpd);
+
+	/* check if sh_mem_config register already configured */
+	if (qpd->sh_mem_config == 0) {
+		qpd->sh_mem_config =
+				SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+					SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+		if (vega10_noretry &&
+		    !dqm->dev->device_info->needs_iommu_device)
+			qpd->sh_mem_config |=
+				1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+
+		qpd->sh_mem_ape1_limit = 0;
+		qpd->sh_mem_ape1_base = 0;
+	}
+
+	qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+
+	pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+
+	return 0;
+}
+
+static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
+			    struct qcm_process_device *qpd)
+{
+	/* Not needed on SDMAv4 any more */
+	q->properties.sdma_vm_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index ebb4da14..c3744d8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -33,7 +33,6 @@
 
 static DEFINE_IDA(doorbell_ida);
 static unsigned int max_doorbell_slices;
-#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
 
 /*
  * Each device exposes a doorbell aperture, a PCI MMIO aperture that
@@ -50,9 +49,9 @@
  */
 
 /* # of doorbell bytes allocated for each process. */
-static inline size_t doorbell_process_allocation(void)
+size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
 {
-	return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES *
+	return roundup(kfd->device_info->doorbell_size *
 			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
 			PAGE_SIZE);
 }
@@ -72,16 +71,16 @@
 
 	doorbell_start_offset =
 			roundup(kfd->shared_resources.doorbell_start_offset,
-					doorbell_process_allocation());
+					kfd_doorbell_process_slice(kfd));
 
 	doorbell_aperture_size =
 			rounddown(kfd->shared_resources.doorbell_aperture_size,
-					doorbell_process_allocation());
+					kfd_doorbell_process_slice(kfd));
 
 	if (doorbell_aperture_size > doorbell_start_offset)
 		doorbell_process_limit =
 			(doorbell_aperture_size - doorbell_start_offset) /
-						doorbell_process_allocation();
+						kfd_doorbell_process_slice(kfd);
 	else
 		return -ENOSPC;
 
@@ -95,7 +94,7 @@
 	kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
 
 	kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
-						doorbell_process_allocation());
+					   kfd_doorbell_process_slice(kfd));
 
 	if (!kfd->doorbell_kernel_ptr)
 		return -ENOMEM;
@@ -127,21 +126,16 @@
 		iounmap(kfd->doorbell_kernel_ptr);
 }
 
-int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
+int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
+		      struct vm_area_struct *vma)
 {
 	phys_addr_t address;
-	struct kfd_dev *dev;
 
 	/*
 	 * For simplicitly we only allow mapping of the entire doorbell
 	 * allocation of a single device & process.
 	 */
-	if (vma->vm_end - vma->vm_start != doorbell_process_allocation())
-		return -EINVAL;
-
-	/* Find kfd device according to gpu id */
-	dev = kfd_device_by_id(vma->vm_pgoff);
-	if (!dev)
+	if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
 		return -EINVAL;
 
 	/* Calculate physical address of doorbell */
@@ -158,19 +152,19 @@
 		 "     vm_flags            == 0x%04lX\n"
 		 "     size                == 0x%04lX\n",
 		 (unsigned long long) vma->vm_start, address, vma->vm_flags,
-		 doorbell_process_allocation());
+		 kfd_doorbell_process_slice(dev));
 
 
 	return io_remap_pfn_range(vma,
 				vma->vm_start,
 				address >> PAGE_SHIFT,
-				doorbell_process_allocation(),
+				kfd_doorbell_process_slice(dev),
 				vma->vm_page_prot);
 }
 
 
 /* get kernel iomem pointer for a doorbell */
-u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
 					unsigned int *doorbell_off)
 {
 	u32 inx;
@@ -185,6 +179,8 @@
 	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
 		return NULL;
 
+	inx *= kfd->device_info->doorbell_size / sizeof(u32);
+
 	/*
 	 * Calculating the kernel doorbell offset using the first
 	 * doorbell page.
@@ -210,7 +206,7 @@
 	mutex_unlock(&kfd->doorbell_mutex);
 }
 
-inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
+void write_kernel_doorbell(void __iomem *db, u32 value)
 {
 	if (db) {
 		writel(value, db);
@@ -218,30 +214,37 @@
 	}
 }
 
-/*
- * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1
- * to doorbells with the process's doorbell page
- */
-unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
+void write_kernel_doorbell64(void __iomem *db, u64 value)
+{
+	if (db) {
+		WARN(((unsigned long)db & 7) != 0,
+		     "Unaligned 64-bit doorbell");
+		writeq(value, (u64 __iomem *)db);
+		pr_debug("writing %llu to doorbell address %p\n", value, db);
+	}
+}
+
+unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
 					struct kfd_process *process,
-					unsigned int queue_id)
+					unsigned int doorbell_id)
 {
 	/*
 	 * doorbell_id_offset accounts for doorbells taken by KGD.
-	 * index * doorbell_process_allocation/sizeof(u32) adjusts to
-	 * the process's doorbells.
+	 * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
+	 * the process's doorbells. The offset returned is in dword
+	 * units regardless of the ASIC-dependent doorbell size.
 	 */
 	return kfd->doorbell_id_offset +
 		process->doorbell_index
-		* doorbell_process_allocation() / sizeof(u32) +
-		queue_id;
+		* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
+		doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
 }
 
 uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
 {
 	uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
 				kfd->shared_resources.doorbell_start_offset) /
-					doorbell_process_allocation() + 1;
+					kfd_doorbell_process_slice(kfd) + 1;
 
 	return num_of_elems;
 
@@ -251,7 +254,7 @@
 					struct kfd_process *process)
 {
 	return dev->doorbell_base +
-		process->doorbell_index * doorbell_process_allocation();
+		process->doorbell_index * kfd_doorbell_process_slice(dev);
 }
 
 int kfd_alloc_process_doorbells(struct kfd_process *process)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 4890a90..5562e94 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -345,7 +345,7 @@
 	case KFD_EVENT_TYPE_DEBUG:
 		ret = create_signal_event(devkfd, p, ev);
 		if (!ret) {
-			*event_page_offset = KFD_MMAP_EVENTS_MASK;
+			*event_page_offset = KFD_MMAP_TYPE_EVENTS;
 			*event_page_offset <<= PAGE_SHIFT;
 			*event_slot_index = ev->event_id;
 		}
@@ -496,7 +496,7 @@
 			pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
 					     partial_id, valid_id_bits);
 
-		if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
+		if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) {
 			/* With relatively few events, it's faster to
 			 * iterate over the event IDR
 			 */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 66852de..97d5423 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -275,23 +275,35 @@
  * for FLAT_* / S_LOAD operations.
  */
 
-#define MAKE_GPUVM_APP_BASE(gpu_num) \
+#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \
 	(((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
 
 #define MAKE_GPUVM_APP_LIMIT(base, size) \
 	(((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
 
-#define MAKE_SCRATCH_APP_BASE() \
+#define MAKE_SCRATCH_APP_BASE_VI() \
 	(((uint64_t)(0x1UL) << 61) + 0x100000000L)
 
 #define MAKE_SCRATCH_APP_LIMIT(base) \
 	(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
 
-#define MAKE_LDS_APP_BASE() \
+#define MAKE_LDS_APP_BASE_VI() \
 	(((uint64_t)(0x1UL) << 61) + 0x0)
 #define MAKE_LDS_APP_LIMIT(base) \
 	(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
 
+/* On GFXv9 the LDS and scratch apertures are programmed independently
+ * using the high 16 bits of the 64-bit virtual address. They must be
+ * in the hole, which will be the case as long as the high 16 bits are
+ * not 0.
+ *
+ * The aperture sizes are still 4GB implicitly.
+ *
+ * A GPUVM aperture is not applicable on GFXv9.
+ */
+#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48)
+#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48)
+
 /* User mode manages most of the SVM aperture address space. The low
  * 16MB are reserved for kernel use (CWSR trap handler and kernel IB
  * for now).
@@ -300,6 +312,55 @@
 #define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE)
 #define SVM_IB_BASE   (SVM_CWSR_BASE - PAGE_SIZE)
 
+static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
+{
+	/*
+	 * node id couldn't be 0 - the three MSB bits of
+	 * aperture shoudn't be 0
+	 */
+	pdd->lds_base = MAKE_LDS_APP_BASE_VI();
+	pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+
+	if (!pdd->dev->device_info->needs_iommu_device) {
+		/* dGPUs: SVM aperture starting at 0
+		 * with small reserved space for kernel.
+		 * Set them to CANONICAL addresses.
+		 */
+		pdd->gpuvm_base = SVM_USER_BASE;
+		pdd->gpuvm_limit =
+			pdd->dev->shared_resources.gpuvm_size - 1;
+	} else {
+		/* set them to non CANONICAL addresses, and no SVM is
+		 * allocated.
+		 */
+		pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1);
+		pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base,
+				pdd->dev->shared_resources.gpuvm_size);
+	}
+
+	pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
+	pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+}
+
+static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
+{
+	pdd->lds_base = MAKE_LDS_APP_BASE_V9();
+	pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+
+	/* Raven needs SVM to support graphic handle, etc. Leave the small
+	 * reserved space before SVM on Raven as well, even though we don't
+	 * have to.
+	 * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
+	 * are used in Thunk to reserve SVM.
+	 */
+	pdd->gpuvm_base = SVM_USER_BASE;
+	pdd->gpuvm_limit =
+		pdd->dev->shared_resources.gpuvm_size - 1;
+
+	pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
+	pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+}
+
 int kfd_init_apertures(struct kfd_process *process)
 {
 	uint8_t id  = 0;
@@ -307,9 +368,7 @@
 	struct kfd_process_device *pdd;
 
 	/*Iterating over all devices*/
-	while (kfd_topology_enum_kfd_devices(id, &dev) == 0 &&
-		id < NUM_OF_SUPPORTED_GPUS) {
-
+	while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
 		if (!dev) {
 			id++; /* Skip non GPU devices */
 			continue;
@@ -318,7 +377,7 @@
 		pdd = kfd_create_process_device_data(dev, process);
 		if (!pdd) {
 			pr_err("Failed to create process device data\n");
-			return -1;
+			return -ENOMEM;
 		}
 		/*
 		 * For 64 bit process apertures will be statically reserved in
@@ -330,32 +389,30 @@
 			pdd->gpuvm_base = pdd->gpuvm_limit = 0;
 			pdd->scratch_base = pdd->scratch_limit = 0;
 		} else {
-			/* Same LDS and scratch apertures can be used
-			 * on all GPUs. This allows using more dGPUs
-			 * than placement options for apertures.
-			 */
-			pdd->lds_base = MAKE_LDS_APP_BASE();
-			pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+			switch (dev->device_info->asic_family) {
+			case CHIP_KAVERI:
+			case CHIP_HAWAII:
+			case CHIP_CARRIZO:
+			case CHIP_TONGA:
+			case CHIP_FIJI:
+			case CHIP_POLARIS10:
+			case CHIP_POLARIS11:
+				kfd_init_apertures_vi(pdd, id);
+				break;
+			case CHIP_VEGA10:
+			case CHIP_RAVEN:
+				kfd_init_apertures_v9(pdd, id);
+				break;
+			default:
+				WARN(1, "Unexpected ASIC family %u",
+				     dev->device_info->asic_family);
+				return -EINVAL;
+			}
 
-			pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
-			pdd->scratch_limit =
-				MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
-
-			if (dev->device_info->needs_iommu_device) {
-				/* APUs: GPUVM aperture in
-				 * non-canonical address space
+			if (!dev->device_info->needs_iommu_device) {
+				/* dGPUs: the reserved space for kernel
+				 * before SVM
 				 */
-				pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
-				pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(
-					pdd->gpuvm_base,
-					dev->shared_resources.gpuvm_size);
-			} else {
-				/* dGPUs: SVM aperture starting at 0
-				 * with small reserved space for kernel
-				 */
-				pdd->gpuvm_base = SVM_USER_BASE;
-				pdd->gpuvm_limit =
-					dev->shared_resources.gpuvm_size - 1;
 				pdd->qpd.cwsr_base = SVM_CWSR_BASE;
 				pdd->qpd.ib_base = SVM_IB_BASE;
 			}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
new file mode 100644
index 0000000..37029ba
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_priv.h"
+#include "kfd_events.h"
+#include "soc15_int.h"
+
+
+static bool event_interrupt_isr_v9(struct kfd_dev *dev,
+					const uint32_t *ih_ring_entry)
+{
+	uint16_t source_id, client_id, pasid, vmid;
+	const uint32_t *data = ih_ring_entry;
+
+	/* Only handle interrupts from KFD VMIDs */
+	vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+	if (vmid < dev->vm_info.first_vmid_kfd ||
+	    vmid > dev->vm_info.last_vmid_kfd)
+		return 0;
+
+	/* If there is no valid PASID, it's likely a firmware bug */
+	pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+	if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
+		return 0;
+
+	source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+	client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+
+	pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n",
+		 client_id, source_id, pasid);
+	pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+		 data[0], data[1], data[2], data[3],
+		 data[4], data[5], data[6], data[7]);
+
+	/* Interrupt types we care about: various signals and faults.
+	 * They will be forwarded to a work queue (see below).
+	 */
+	return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
+		source_id == SOC15_INTSRC_SDMA_TRAP ||
+		source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
+		source_id == SOC15_INTSRC_CP_BAD_OPCODE;
+}
+
+static void event_interrupt_wq_v9(struct kfd_dev *dev,
+					const uint32_t *ih_ring_entry)
+{
+	uint16_t source_id, client_id, pasid, vmid;
+	uint32_t context_id;
+
+	source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+	client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+	pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+	vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+	context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+
+	if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
+		kfd_signal_event_interrupt(pasid, context_id, 32);
+	else if (source_id == SOC15_INTSRC_SDMA_TRAP)
+		kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28);
+	else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG)
+		kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24);
+	else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
+		kfd_signal_hw_exception_event(pasid);
+	else if (client_id == SOC15_IH_CLIENTID_VMC ||
+		 client_id == SOC15_IH_CLIENTID_UTCL2) {
+		/* TODO */
+	}
+}
+
+const struct kfd_event_interrupt_class event_interrupt_class_v9 = {
+	.interrupt_isr = event_interrupt_isr_v9,
+	.interrupt_wq = event_interrupt_wq_v9,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 035c351..db6d933 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -139,10 +139,12 @@
 {
 	struct kfd_dev *dev = container_of(work, struct kfd_dev,
 						interrupt_work);
+	uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
 
-	uint32_t ih_ring_entry[DIV_ROUND_UP(
-				dev->device_info->ih_ring_entry_size,
-				sizeof(uint32_t))];
+	if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) {
+		dev_err_once(kfd_chardev(), "Ring entry too small\n");
+		return;
+	}
 
 	while (dequeue_ih_ring_entry(dev, ih_ring_entry))
 		dev->device_info->event_interrupt_class->interrupt_wq(dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 69f4964..476951d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -99,7 +99,7 @@
 	kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
 	kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
 
-	retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
+	retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
 					&kq->wptr_mem);
 
 	if (retval != 0)
@@ -208,6 +208,7 @@
 	size_t available_size;
 	size_t queue_size_dwords;
 	uint32_t wptr, rptr;
+	uint64_t wptr64;
 	unsigned int *queue_address;
 
 	/* When rptr == wptr, the buffer is empty.
@@ -216,7 +217,8 @@
 	 * the opposite. So we can only use up to queue_size_dwords - 1 dwords.
 	 */
 	rptr = *kq->rptr_kernel;
-	wptr = *kq->wptr_kernel;
+	wptr = kq->pending_wptr;
+	wptr64 = kq->pending_wptr64;
 	queue_address = (unsigned int *)kq->pq_kernel_addr;
 	queue_size_dwords = kq->queue->properties.queue_size / 4;
 
@@ -232,29 +234,33 @@
 		 * make sure calling functions know
 		 * acquire_packet_buffer() failed
 		 */
-		*buffer_ptr = NULL;
-		return -ENOMEM;
+		goto err_no_space;
 	}
 
 	if (wptr + packet_size_in_dwords >= queue_size_dwords) {
 		/* make sure after rolling back to position 0, there is
 		 * still enough space.
 		 */
-		if (packet_size_in_dwords >= rptr) {
-			*buffer_ptr = NULL;
-			return -ENOMEM;
-		}
+		if (packet_size_in_dwords >= rptr)
+			goto err_no_space;
+
 		/* fill nops, roll back and start at position 0 */
 		while (wptr > 0) {
 			queue_address[wptr] = kq->nop_packet;
 			wptr = (wptr + 1) % queue_size_dwords;
+			wptr64++;
 		}
 	}
 
 	*buffer_ptr = &queue_address[wptr];
 	kq->pending_wptr = wptr + packet_size_in_dwords;
+	kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
 
 	return 0;
+
+err_no_space:
+	*buffer_ptr = NULL;
+	return -ENOMEM;
 }
 
 static void submit_packet(struct kernel_queue *kq)
@@ -270,14 +276,18 @@
 	pr_debug("\n");
 #endif
 
-	*kq->wptr_kernel = kq->pending_wptr;
-	write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
-				kq->pending_wptr);
+	kq->ops_asic_specific.submit_packet(kq);
 }
 
 static void rollback_packet(struct kernel_queue *kq)
 {
-	kq->pending_wptr = *kq->queue->properties.write_ptr;
+	if (kq->dev->device_info->doorbell_size == 8) {
+		kq->pending_wptr64 = *kq->wptr64_kernel;
+		kq->pending_wptr = *kq->wptr_kernel %
+			(kq->queue->properties.queue_size / 4);
+	} else {
+		kq->pending_wptr = *kq->wptr_kernel;
+	}
 }
 
 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
@@ -308,6 +318,11 @@
 	case CHIP_HAWAII:
 		kernel_queue_init_cik(&kq->ops_asic_specific);
 		break;
+
+	case CHIP_VEGA10:
+	case CHIP_RAVEN:
+		kernel_queue_init_v9(&kq->ops_asic_specific);
+		break;
 	default:
 		WARN(1, "Unexpected ASIC family %u",
 		     dev->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 5940531..97aff20 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -72,6 +72,7 @@
 	struct kfd_dev		*dev;
 	struct mqd_manager	*mqd;
 	struct queue		*queue;
+	uint64_t		pending_wptr64;
 	uint32_t		pending_wptr;
 	unsigned int		nop_packet;
 
@@ -79,7 +80,10 @@
 	uint32_t		*rptr_kernel;
 	uint64_t		rptr_gpu_addr;
 	struct kfd_mem_obj	*wptr_mem;
-	uint32_t		*wptr_kernel;
+	union {
+		uint64_t	*wptr64_kernel;
+		uint32_t	*wptr_kernel;
+	};
 	uint64_t		wptr_gpu_addr;
 	struct kfd_mem_obj	*pq;
 	uint64_t		pq_gpu_addr;
@@ -97,5 +101,6 @@
 
 void kernel_queue_init_cik(struct kernel_queue_ops *ops);
 void kernel_queue_init_vi(struct kernel_queue_ops *ops);
+void kernel_queue_init_v9(struct kernel_queue_ops *ops);
 
 #endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
index a90eb44..19e54ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
@@ -26,11 +26,13 @@
 static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
 			enum kfd_queue_type type, unsigned int queue_size);
 static void uninitialize_cik(struct kernel_queue *kq);
+static void submit_packet_cik(struct kernel_queue *kq);
 
 void kernel_queue_init_cik(struct kernel_queue_ops *ops)
 {
 	ops->initialize = initialize_cik;
 	ops->uninitialize = uninitialize_cik;
+	ops->submit_packet = submit_packet_cik;
 }
 
 static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
@@ -42,3 +44,10 @@
 static void uninitialize_cik(struct kernel_queue *kq)
 {
 }
+
+static void submit_packet_cik(struct kernel_queue *kq)
+{
+	*kq->wptr_kernel = kq->pending_wptr;
+	write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+				kq->pending_wptr);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
new file mode 100644
index 0000000..684a3bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_kernel_queue.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_ai.h"
+#include "kfd_pm4_opcodes.h"
+
+static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
+			enum kfd_queue_type type, unsigned int queue_size);
+static void uninitialize_v9(struct kernel_queue *kq);
+static void submit_packet_v9(struct kernel_queue *kq);
+
+void kernel_queue_init_v9(struct kernel_queue_ops *ops)
+{
+	ops->initialize = initialize_v9;
+	ops->uninitialize = uninitialize_v9;
+	ops->submit_packet = submit_packet_v9;
+}
+
+static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
+			enum kfd_queue_type type, unsigned int queue_size)
+{
+	int retval;
+
+	retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
+	if (retval)
+		return false;
+
+	kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
+	kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
+
+	memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
+
+	return true;
+}
+
+static void uninitialize_v9(struct kernel_queue *kq)
+{
+	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
+}
+
+static void submit_packet_v9(struct kernel_queue *kq)
+{
+	*kq->wptr64_kernel = kq->pending_wptr64;
+	write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
+				kq->pending_wptr64);
+}
+
+static int pm_map_process_v9(struct packet_manager *pm,
+		uint32_t *buffer, struct qcm_process_device *qpd)
+{
+	struct pm4_mes_map_process *packet;
+	uint64_t vm_page_table_base_addr =
+		(uint64_t)(qpd->page_table_base) << 12;
+
+	packet = (struct pm4_mes_map_process *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+					sizeof(struct pm4_mes_map_process));
+	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+	packet->bitfields2.process_quantum = 1;
+	packet->bitfields2.pasid = qpd->pqm->process->pasid;
+	packet->bitfields14.gds_size = qpd->gds_size;
+	packet->bitfields14.num_gws = qpd->num_gws;
+	packet->bitfields14.num_oac = qpd->num_oac;
+	packet->bitfields14.sdma_enable = 1;
+	packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+	packet->sh_mem_config = qpd->sh_mem_config;
+	packet->sh_mem_bases = qpd->sh_mem_bases;
+	packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+	packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
+	packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
+	packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+
+	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+	packet->vm_context_page_table_base_addr_lo32 =
+			lower_32_bits(vm_page_table_base_addr);
+	packet->vm_context_page_table_base_addr_hi32 =
+			upper_32_bits(vm_page_table_base_addr);
+
+	return 0;
+}
+
+static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+	struct pm4_mes_runlist *packet;
+
+	int concurrent_proc_cnt = 0;
+	struct kfd_dev *kfd = pm->dqm->dev;
+
+	/* Determine the number of processes to map together to HW:
+	 * it can not exceed the number of VMIDs available to the
+	 * scheduler, and it is determined by the smaller of the number
+	 * of processes in the runlist and kfd module parameter
+	 * hws_max_conc_proc.
+	 * Note: the arbitration between the number of VMIDs and
+	 * hws_max_conc_proc has been done in
+	 * kgd2kfd_device_init().
+	 */
+	concurrent_proc_cnt = min(pm->dqm->processes_count,
+			kfd->max_proc_per_quantum);
+
+	packet = (struct pm4_mes_runlist *)buffer;
+
+	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+	packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
+						sizeof(struct pm4_mes_runlist));
+
+	packet->bitfields4.ib_size = ib_size_in_dwords;
+	packet->bitfields4.chain = chain ? 1 : 0;
+	packet->bitfields4.offload_polling = 0;
+	packet->bitfields4.valid = 1;
+	packet->bitfields4.process_cnt = concurrent_proc_cnt;
+	packet->ordinal2 = lower_32_bits(ib);
+	packet->ib_base_hi = upper_32_bits(ib);
+
+	return 0;
+}
+
+static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+		struct queue *q, bool is_static)
+{
+	struct pm4_mes_map_queues *packet;
+	bool use_static = is_static;
+
+	packet = (struct pm4_mes_map_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+					sizeof(struct pm4_mes_map_queues));
+	packet->bitfields2.alloc_format =
+		alloc_format__mes_map_queues__one_per_pipe_vi;
+	packet->bitfields2.num_queues = 1;
+	packet->bitfields2.queue_sel =
+		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+	packet->bitfields2.engine_sel =
+		engine_sel__mes_map_queues__compute_vi;
+	packet->bitfields2.queue_type =
+		queue_type__mes_map_queues__normal_compute_vi;
+
+	switch (q->properties.type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+		if (use_static)
+			packet->bitfields2.queue_type =
+		queue_type__mes_map_queues__normal_latency_static_queue_vi;
+		break;
+	case KFD_QUEUE_TYPE_DIQ:
+		packet->bitfields2.queue_type =
+			queue_type__mes_map_queues__debug_interface_queue_vi;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+				engine_sel__mes_map_queues__sdma0_vi;
+		use_static = false; /* no static queues under SDMA */
+		break;
+	default:
+		WARN(1, "queue type %d", q->properties.type);
+		return -EINVAL;
+	}
+	packet->bitfields3.doorbell_offset =
+			q->properties.doorbell_off;
+
+	packet->mqd_addr_lo =
+			lower_32_bits(q->gart_mqd_addr);
+
+	packet->mqd_addr_hi =
+			upper_32_bits(q->gart_mqd_addr);
+
+	packet->wptr_addr_lo =
+			lower_32_bits((uint64_t)q->properties.write_ptr);
+
+	packet->wptr_addr_hi =
+			upper_32_bits((uint64_t)q->properties.write_ptr);
+
+	return 0;
+}
+
+static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+			enum kfd_queue_type type,
+			enum kfd_unmap_queues_filter filter,
+			uint32_t filter_param, bool reset,
+			unsigned int sdma_engine)
+{
+	struct pm4_mes_unmap_queues *packet;
+
+	packet = (struct pm4_mes_unmap_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+
+	packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
+					sizeof(struct pm4_mes_unmap_queues));
+	switch (type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+	case KFD_QUEUE_TYPE_DIQ:
+		packet->bitfields2.engine_sel =
+			engine_sel__mes_unmap_queues__compute;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+		packet->bitfields2.engine_sel =
+			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+		break;
+	default:
+		WARN(1, "queue type %d", type);
+		return -EINVAL;
+	}
+
+	if (reset)
+		packet->bitfields2.action =
+			action__mes_unmap_queues__reset_queues;
+	else
+		packet->bitfields2.action =
+			action__mes_unmap_queues__preempt_queues;
+
+	switch (filter) {
+	case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
+		packet->bitfields2.num_queues = 1;
+		packet->bitfields3b.doorbell_offset0 = filter_param;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+		packet->bitfields3a.pasid = filter_param;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__unmap_all_queues;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
+		/* in this case, we do not preempt static queues */
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
+		break;
+	default:
+		WARN(1, "filter %d", filter);
+		return -EINVAL;
+	}
+
+	return 0;
+
+}
+
+static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t fence_address,	uint32_t fence_value)
+{
+	struct pm4_mes_query_status *packet;
+
+	packet = (struct pm4_mes_query_status *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_query_status));
+
+
+	packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
+					sizeof(struct pm4_mes_query_status));
+
+	packet->bitfields2.context_id = 0;
+	packet->bitfields2.interrupt_sel =
+			interrupt_sel__mes_query_status__completion_status;
+	packet->bitfields2.command =
+			command__mes_query_status__fence_only_after_write_ack;
+
+	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+	packet->data_hi = upper_32_bits((uint64_t)fence_value);
+	packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+	return 0;
+}
+
+
+static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
+{
+	struct pm4_mec_release_mem *packet;
+
+	packet = (struct pm4_mec_release_mem *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
+
+	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
+					sizeof(struct pm4_mec_release_mem));
+
+	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+	packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
+	packet->bitfields2.tcl1_action_ena = 1;
+	packet->bitfields2.tc_action_ena = 1;
+	packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
+
+	packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
+	packet->bitfields3.int_sel =
+		int_sel__mec_release_mem__send_interrupt_after_write_confirm;
+
+	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+	packet->address_hi = upper_32_bits(gpu_addr);
+
+	packet->data_lo = 0;
+
+	return 0;
+}
+
+const struct packet_manager_funcs kfd_v9_pm_funcs = {
+	.map_process		= pm_map_process_v9,
+	.runlist		= pm_runlist_v9,
+	.set_resources		= pm_set_resources_vi,
+	.map_queues		= pm_map_queues_v9,
+	.unmap_queues		= pm_unmap_queues_v9,
+	.query_status		= pm_query_status_v9,
+	.release_mem		= pm_release_mem_v9,
+	.map_process_size	= sizeof(struct pm4_mes_map_process),
+	.runlist_size		= sizeof(struct pm4_mes_runlist),
+	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
+	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
+	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
+	.query_status_size	= sizeof(struct pm4_mes_query_status),
+	.release_mem_size	= sizeof(struct pm4_mec_release_mem)
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
index f1d4828..bf20c6d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
@@ -22,15 +22,20 @@
  */
 
 #include "kfd_kernel_queue.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_vi.h"
+#include "kfd_pm4_opcodes.h"
 
 static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
 			enum kfd_queue_type type, unsigned int queue_size);
 static void uninitialize_vi(struct kernel_queue *kq);
+static void submit_packet_vi(struct kernel_queue *kq);
 
 void kernel_queue_init_vi(struct kernel_queue_ops *ops)
 {
 	ops->initialize = initialize_vi;
 	ops->uninitialize = uninitialize_vi;
+	ops->submit_packet = submit_packet_vi;
 }
 
 static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
@@ -54,3 +59,317 @@
 {
 	kfd_gtt_sa_free(kq->dev, kq->eop_mem);
 }
+
+static void submit_packet_vi(struct kernel_queue *kq)
+{
+	*kq->wptr_kernel = kq->pending_wptr;
+	write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+				kq->pending_wptr);
+}
+
+unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
+{
+	union PM4_MES_TYPE_3_HEADER header;
+
+	header.u32All = 0;
+	header.opcode = opcode;
+	header.count = packet_size / 4 - 2;
+	header.type = PM4_TYPE_3;
+
+	return header.u32All;
+}
+
+static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
+				struct qcm_process_device *qpd)
+{
+	struct pm4_mes_map_process *packet;
+
+	packet = (struct pm4_mes_map_process *)buffer;
+
+	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+					sizeof(struct pm4_mes_map_process));
+	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+	packet->bitfields2.process_quantum = 1;
+	packet->bitfields2.pasid = qpd->pqm->process->pasid;
+	packet->bitfields3.page_table_base = qpd->page_table_base;
+	packet->bitfields10.gds_size = qpd->gds_size;
+	packet->bitfields10.num_gws = qpd->num_gws;
+	packet->bitfields10.num_oac = qpd->num_oac;
+	packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+	packet->sh_mem_config = qpd->sh_mem_config;
+	packet->sh_mem_bases = qpd->sh_mem_bases;
+	packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
+	packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
+
+	packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
+
+	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+	return 0;
+}
+
+static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+	struct pm4_mes_runlist *packet;
+	int concurrent_proc_cnt = 0;
+	struct kfd_dev *kfd = pm->dqm->dev;
+
+	if (WARN_ON(!ib))
+		return -EFAULT;
+
+	/* Determine the number of processes to map together to HW:
+	 * it can not exceed the number of VMIDs available to the
+	 * scheduler, and it is determined by the smaller of the number
+	 * of processes in the runlist and kfd module parameter
+	 * hws_max_conc_proc.
+	 * Note: the arbitration between the number of VMIDs and
+	 * hws_max_conc_proc has been done in
+	 * kgd2kfd_device_init().
+	 */
+	concurrent_proc_cnt = min(pm->dqm->processes_count,
+			kfd->max_proc_per_quantum);
+
+	packet = (struct pm4_mes_runlist *)buffer;
+
+	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+	packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
+						sizeof(struct pm4_mes_runlist));
+
+	packet->bitfields4.ib_size = ib_size_in_dwords;
+	packet->bitfields4.chain = chain ? 1 : 0;
+	packet->bitfields4.offload_polling = 0;
+	packet->bitfields4.valid = 1;
+	packet->bitfields4.process_cnt = concurrent_proc_cnt;
+	packet->ordinal2 = lower_32_bits(ib);
+	packet->bitfields3.ib_base_hi = upper_32_bits(ib);
+
+	return 0;
+}
+
+int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+				struct scheduling_resources *res)
+{
+	struct pm4_mes_set_resources *packet;
+
+	packet = (struct pm4_mes_set_resources *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_set_resources));
+
+	packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES,
+					sizeof(struct pm4_mes_set_resources));
+
+	packet->bitfields2.queue_type =
+			queue_type__mes_set_resources__hsa_interface_queue_hiq;
+	packet->bitfields2.vmid_mask = res->vmid_mask;
+	packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
+	packet->bitfields7.oac_mask = res->oac_mask;
+	packet->bitfields8.gds_heap_base = res->gds_heap_base;
+	packet->bitfields8.gds_heap_size = res->gds_heap_size;
+
+	packet->gws_mask_lo = lower_32_bits(res->gws_mask);
+	packet->gws_mask_hi = upper_32_bits(res->gws_mask);
+
+	packet->queue_mask_lo = lower_32_bits(res->queue_mask);
+	packet->queue_mask_hi = upper_32_bits(res->queue_mask);
+
+	return 0;
+}
+
+static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+		struct queue *q, bool is_static)
+{
+	struct pm4_mes_map_queues *packet;
+	bool use_static = is_static;
+
+	packet = (struct pm4_mes_map_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+					sizeof(struct pm4_mes_map_queues));
+	packet->bitfields2.alloc_format =
+		alloc_format__mes_map_queues__one_per_pipe_vi;
+	packet->bitfields2.num_queues = 1;
+	packet->bitfields2.queue_sel =
+		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+	packet->bitfields2.engine_sel =
+		engine_sel__mes_map_queues__compute_vi;
+	packet->bitfields2.queue_type =
+		queue_type__mes_map_queues__normal_compute_vi;
+
+	switch (q->properties.type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+		if (use_static)
+			packet->bitfields2.queue_type =
+		queue_type__mes_map_queues__normal_latency_static_queue_vi;
+		break;
+	case KFD_QUEUE_TYPE_DIQ:
+		packet->bitfields2.queue_type =
+			queue_type__mes_map_queues__debug_interface_queue_vi;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+				engine_sel__mes_map_queues__sdma0_vi;
+		use_static = false; /* no static queues under SDMA */
+		break;
+	default:
+		WARN(1, "queue type %d", q->properties.type);
+		return -EINVAL;
+	}
+	packet->bitfields3.doorbell_offset =
+			q->properties.doorbell_off;
+
+	packet->mqd_addr_lo =
+			lower_32_bits(q->gart_mqd_addr);
+
+	packet->mqd_addr_hi =
+			upper_32_bits(q->gart_mqd_addr);
+
+	packet->wptr_addr_lo =
+			lower_32_bits((uint64_t)q->properties.write_ptr);
+
+	packet->wptr_addr_hi =
+			upper_32_bits((uint64_t)q->properties.write_ptr);
+
+	return 0;
+}
+
+static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+			enum kfd_queue_type type,
+			enum kfd_unmap_queues_filter filter,
+			uint32_t filter_param, bool reset,
+			unsigned int sdma_engine)
+{
+	struct pm4_mes_unmap_queues *packet;
+
+	packet = (struct pm4_mes_unmap_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+
+	packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
+					sizeof(struct pm4_mes_unmap_queues));
+	switch (type) {
+	case KFD_QUEUE_TYPE_COMPUTE:
+	case KFD_QUEUE_TYPE_DIQ:
+		packet->bitfields2.engine_sel =
+			engine_sel__mes_unmap_queues__compute;
+		break;
+	case KFD_QUEUE_TYPE_SDMA:
+		packet->bitfields2.engine_sel =
+			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+		break;
+	default:
+		WARN(1, "queue type %d", type);
+		return -EINVAL;
+	}
+
+	if (reset)
+		packet->bitfields2.action =
+			action__mes_unmap_queues__reset_queues;
+	else
+		packet->bitfields2.action =
+			action__mes_unmap_queues__preempt_queues;
+
+	switch (filter) {
+	case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
+		packet->bitfields2.num_queues = 1;
+		packet->bitfields3b.doorbell_offset0 = filter_param;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+		packet->bitfields3a.pasid = filter_param;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__unmap_all_queues;
+		break;
+	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
+		/* in this case, we do not preempt static queues */
+		packet->bitfields2.queue_sel =
+			queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
+		break;
+	default:
+		WARN(1, "filter %d", filter);
+		return -EINVAL;
+	}
+
+	return 0;
+
+}
+
+static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t fence_address,	uint32_t fence_value)
+{
+	struct pm4_mes_query_status *packet;
+
+	packet = (struct pm4_mes_query_status *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_query_status));
+
+	packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
+					sizeof(struct pm4_mes_query_status));
+
+	packet->bitfields2.context_id = 0;
+	packet->bitfields2.interrupt_sel =
+			interrupt_sel__mes_query_status__completion_status;
+	packet->bitfields2.command =
+			command__mes_query_status__fence_only_after_write_ack;
+
+	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+	packet->data_hi = upper_32_bits((uint64_t)fence_value);
+	packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+	return 0;
+}
+
+static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer)
+{
+	struct pm4_mec_release_mem *packet;
+
+	packet = (struct pm4_mec_release_mem *)buffer;
+	memset(buffer, 0, sizeof(*packet));
+
+	packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
+						 sizeof(*packet));
+
+	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+	packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
+	packet->bitfields2.tcl1_action_ena = 1;
+	packet->bitfields2.tc_action_ena = 1;
+	packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+	packet->bitfields2.atc = 0;
+
+	packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
+	packet->bitfields3.int_sel =
+		int_sel___release_mem__send_interrupt_after_write_confirm;
+
+	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+	packet->address_hi = upper_32_bits(gpu_addr);
+
+	packet->data_lo = 0;
+
+	return 0;
+}
+
+const struct packet_manager_funcs kfd_vi_pm_funcs = {
+	.map_process		= pm_map_process_vi,
+	.runlist		= pm_runlist_vi,
+	.set_resources		= pm_set_resources_vi,
+	.map_queues		= pm_map_queues_vi,
+	.unmap_queues		= pm_unmap_queues_vi,
+	.query_status		= pm_query_status_vi,
+	.release_mem		= pm_release_mem_vi,
+	.map_process_size	= sizeof(struct pm4_mes_map_process),
+	.runlist_size		= sizeof(struct pm4_mes_runlist),
+	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
+	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
+	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
+	.query_status_size	= sizeof(struct pm4_mes_query_status),
+	.release_mem_size	= sizeof(struct pm4_mec_release_mem)
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index e0c07d2..76bf2dc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -43,6 +43,8 @@
 	.interrupt	= kgd2kfd_interrupt,
 	.suspend	= kgd2kfd_suspend,
 	.resume		= kgd2kfd_resume,
+	.quiesce_mm	= kgd2kfd_quiesce_mm,
+	.resume_mm	= kgd2kfd_resume_mm,
 	.schedule_evict_and_restore_process =
 			  kgd2kfd_schedule_evict_and_restore_process,
 };
@@ -81,6 +83,11 @@
 MODULE_PARM_DESC(ignore_crat,
 	"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
 
+int vega10_noretry;
+module_param_named(noretry, vega10_noretry, int, 0644);
+MODULE_PARM_DESC(noretry,
+	"Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
+
 static int amdkfd_init_completed;
 
 int kgd2kfd_init(unsigned int interface_version,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index ee7061e..4b8eb50 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -38,6 +38,9 @@
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 		return mqd_manager_init_vi_tonga(type, dev);
+	case CHIP_VEGA10:
+	case CHIP_RAVEN:
+		return mqd_manager_init_v9(type, dev);
 	default:
 		WARN(1, "Unexpected ASIC family %u",
 		     dev->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index c00c325..06eaa21 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -79,10 +79,6 @@
 	m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
 	m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
 
-	m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN;
-	/* Although WinKFD writes this, I suspect it should not be necessary */
-	m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
-
 	m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
 				QUANTUM_DURATION(10);
 
@@ -412,7 +408,7 @@
 	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
 		return NULL;
 
-	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+	mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
 	if (!mqd)
 		return NULL;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
new file mode 100644
index 0000000..684054f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v9_structs.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+	return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v9_sdma_mqd *)mqd;
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	int retval;
+	uint64_t addr;
+	struct v9_mqd *m;
+	struct kfd_dev *kfd = mm->dev;
+
+	/* From V9,  for CWSR, the control stack is located on the next page
+	 * boundary after the mqd, we will use the gtt allocation function
+	 * instead of sub-allocation function.
+	 */
+	if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
+		*mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
+		if (!*mqd_mem_obj)
+			return -ENOMEM;
+		retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd,
+			ALIGN(q->ctl_stack_size, PAGE_SIZE) +
+				ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
+			&((*mqd_mem_obj)->gtt_mem),
+			&((*mqd_mem_obj)->gpu_addr),
+			(void *)&((*mqd_mem_obj)->cpu_ptr));
+	} else
+		retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
+				mqd_mem_obj);
+	if (retval != 0)
+		return -ENOMEM;
+
+	m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
+	addr = (*mqd_mem_obj)->gpu_addr;
+
+	memset(m, 0, sizeof(struct v9_mqd));
+
+	m->header = 0xC0310800;
+	m->compute_pipelinestat_enable = 1;
+	m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+	m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+	m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+			0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+	m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+	m->cp_mqd_base_addr_lo        = lower_32_bits(addr);
+	m->cp_mqd_base_addr_hi        = upper_32_bits(addr);
+
+	m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+			1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+			10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+	m->cp_hqd_pipe_priority = 1;
+	m->cp_hqd_queue_priority = 15;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL) {
+		m->cp_hqd_aql_control =
+			1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+	}
+
+	if (q->tba_addr) {
+		m->compute_pgm_rsrc2 |=
+			(1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
+	}
+
+	if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
+		m->cp_hqd_persistent_state |=
+			(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+		m->cp_hqd_ctx_save_base_addr_lo =
+			lower_32_bits(q->ctx_save_restore_area_address);
+		m->cp_hqd_ctx_save_base_addr_hi =
+			upper_32_bits(q->ctx_save_restore_area_address);
+		m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+		m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+		m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+		m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+	}
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = addr;
+	retval = mm->update_mqd(mm, m, q);
+
+	return retval;
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+			uint32_t pipe_id, uint32_t queue_id,
+			struct queue_properties *p, struct mm_struct *mms)
+{
+	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+	return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+					  (uint32_t __user *)p->write_ptr,
+					  wptr_shift, 0, mms);
+}
+
+static int update_mqd(struct mqd_manager *mm, void *mqd,
+		      struct queue_properties *q)
+{
+	struct v9_mqd *m;
+
+	m = get_mqd(mqd);
+
+	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+	m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
+	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+	m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+	m->cp_hqd_pq_doorbell_control =
+		q->doorbell_off <<
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+			m->cp_hqd_pq_doorbell_control);
+
+	m->cp_hqd_ib_control =
+		3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
+		1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT;
+
+	/*
+	 * HW does not clamp this field correctly. Maximum EOP queue size
+	 * is constrained by per-SE EOP done signal count, which is 8-bit.
+	 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+	 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+	 * is safe, giving a maximum field value of 0xA.
+	 */
+	m->cp_hqd_eop_control = min(0xA,
+		order_base_2(q->eop_ring_buffer_size / 4) - 1);
+	m->cp_hqd_eop_base_addr_lo =
+			lower_32_bits(q->eop_ring_buffer_address >> 8);
+	m->cp_hqd_eop_base_addr_hi =
+			upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+	m->cp_hqd_iq_timer = 0;
+
+	m->cp_hqd_vmid = q->vmid;
+
+	if (q->format == KFD_QUEUE_FORMAT_AQL) {
+		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+				1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT |
+				1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT;
+		m->cp_hqd_pq_doorbell_control |= 1 <<
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+	}
+	if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
+		m->cp_hqd_ctx_save_control = 0;
+
+	q->is_active = (q->queue_size > 0 &&
+			q->queue_address != 0 &&
+			q->queue_percent > 0 &&
+			!q->is_evicted);
+
+	return 0;
+}
+
+
+static int destroy_mqd(struct mqd_manager *mm, void *mqd,
+			enum kfd_preempt_type type,
+			unsigned int timeout, uint32_t pipe_id,
+			uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_destroy
+		(mm->dev->kgd, mqd, type, timeout,
+		pipe_id, queue_id);
+}
+
+static void uninit_mqd(struct mqd_manager *mm, void *mqd,
+			struct kfd_mem_obj *mqd_mem_obj)
+{
+	struct kfd_dev *kfd = mm->dev;
+
+	if (mqd_mem_obj->gtt_mem) {
+		kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
+		kfree(mqd_mem_obj);
+	} else {
+		kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+	}
+}
+
+static bool is_occupied(struct mqd_manager *mm, void *mqd,
+			uint64_t queue_address,	uint32_t pipe_id,
+			uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_is_occupied(
+		mm->dev->kgd, queue_address,
+		pipe_id, queue_id);
+}
+
+static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+			struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+			struct queue_properties *q)
+{
+	struct v9_mqd *m;
+	int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+	if (retval != 0)
+		return retval;
+
+	m = get_mqd(*mqd);
+
+	m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+			1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+
+	return retval;
+}
+
+static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
+			struct queue_properties *q)
+{
+	struct v9_mqd *m;
+	int retval = update_mqd(mm, mqd, q);
+
+	if (retval != 0)
+		return retval;
+
+	/* TODO: what's the point? update_mqd already does this. */
+	m = get_mqd(mqd);
+	m->cp_hqd_vmid = q->vmid;
+	return retval;
+}
+
+static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+		struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+		struct queue_properties *q)
+{
+	int retval;
+	struct v9_sdma_mqd *m;
+
+
+	retval = kfd_gtt_sa_allocate(mm->dev,
+			sizeof(struct v9_sdma_mqd),
+			mqd_mem_obj);
+
+	if (retval != 0)
+		return -ENOMEM;
+
+	m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
+
+	memset(m, 0, sizeof(struct v9_sdma_mqd));
+
+	*mqd = m;
+	if (gart_addr)
+		*gart_addr = (*mqd_mem_obj)->gpu_addr;
+
+	retval = mm->update_mqd(mm, m, q);
+
+	return retval;
+}
+
+static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		struct kfd_mem_obj *mqd_mem_obj)
+{
+	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		uint32_t pipe_id, uint32_t queue_id,
+		struct queue_properties *p, struct mm_struct *mms)
+{
+	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+					       (uint32_t __user *)p->write_ptr,
+					       mms);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		struct queue_properties *q)
+{
+	struct v9_sdma_mqd *m;
+
+	m = get_sdma_mqd(mqd);
+	m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
+		<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+		q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+		1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+		6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+
+	m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+	m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+	m->sdmax_rlcx_doorbell_offset =
+		q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+	m->sdma_engine_id = q->sdma_engine_id;
+	m->sdma_queue_id = q->sdma_queue_id;
+	m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+	q->is_active = (q->queue_size > 0 &&
+			q->queue_address != 0 &&
+			q->queue_percent > 0 &&
+			!q->is_evicted);
+
+	return 0;
+}
+
+/*
+ *  * preempt type here is ignored because there is only one way
+ *  * to preempt sdma queue
+ */
+static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+		enum kfd_preempt_type type,
+		unsigned int timeout, uint32_t pipe_id,
+		uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+}
+
+static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+		uint64_t queue_address, uint32_t pipe_id,
+		uint32_t queue_id)
+{
+	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct v9_mqd), false);
+	return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+	seq_hex_dump(m, "    ", DUMP_PREFIX_OFFSET, 32, 4,
+		     data, sizeof(struct v9_sdma_mqd), false);
+	return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
+		struct kfd_dev *dev)
+{
+	struct mqd_manager *mqd;
+
+	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+		return NULL;
+
+	mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
+	if (!mqd)
+		return NULL;
+
+	mqd->dev = dev;
+
+	switch (type) {
+	case KFD_MQD_TYPE_CP:
+	case KFD_MQD_TYPE_COMPUTE:
+		mqd->init_mqd = init_mqd;
+		mqd->uninit_mqd = uninit_mqd;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd;
+		mqd->destroy_mqd = destroy_mqd;
+		mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		break;
+	case KFD_MQD_TYPE_HIQ:
+		mqd->init_mqd = init_mqd_hiq;
+		mqd->uninit_mqd = uninit_mqd;
+		mqd->load_mqd = load_mqd;
+		mqd->update_mqd = update_mqd_hiq;
+		mqd->destroy_mqd = destroy_mqd;
+		mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+		break;
+	case KFD_MQD_TYPE_SDMA:
+		mqd->init_mqd = init_mqd_sdma;
+		mqd->uninit_mqd = uninit_mqd_sdma;
+		mqd->load_mqd = load_mqd_sdma;
+		mqd->update_mqd = update_mqd_sdma;
+		mqd->destroy_mqd = destroy_mqd_sdma;
+		mqd->is_occupied = is_occupied_sdma;
+#if defined(CONFIG_DEBUG_FS)
+		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+		break;
+	default:
+		kfree(mqd);
+		return NULL;
+	}
+
+	return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 89e4242..481307b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -394,7 +394,7 @@
 	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
 		return NULL;
 
-	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+	mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
 	if (!mqd)
 		return NULL;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 89ba4c6..c317feb4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -26,8 +26,6 @@
 #include "kfd_device_queue_manager.h"
 #include "kfd_kernel_queue.h"
 #include "kfd_priv.h"
-#include "kfd_pm4_headers_vi.h"
-#include "kfd_pm4_opcodes.h"
 
 static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
 				unsigned int buffer_size_bytes)
@@ -39,18 +37,6 @@
 	*wptr = temp;
 }
 
-static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
-{
-	union PM4_MES_TYPE_3_HEADER header;
-
-	header.u32All = 0;
-	header.opcode = opcode;
-	header.count = packet_size / 4 - 2;
-	header.type = PM4_TYPE_3;
-
-	return header.u32All;
-}
-
 static void pm_calc_rlib_size(struct packet_manager *pm,
 				unsigned int *rlib_size,
 				bool *over_subscription)
@@ -80,9 +66,9 @@
 		pr_debug("Over subscribed runlist\n");
 	}
 
-	map_queue_size = sizeof(struct pm4_mes_map_queues);
+	map_queue_size = pm->pmf->map_queues_size;
 	/* calculate run list ib allocation size */
-	*rlib_size = process_count * sizeof(struct pm4_mes_map_process) +
+	*rlib_size = process_count * pm->pmf->map_process_size +
 		     queue_count * map_queue_size;
 
 	/*
@@ -90,7 +76,7 @@
 	 * when over subscription
 	 */
 	if (*over_subscription)
-		*rlib_size += sizeof(struct pm4_mes_runlist);
+		*rlib_size += pm->pmf->runlist_size;
 
 	pr_debug("runlist ib size %d\n", *rlib_size);
 }
@@ -108,12 +94,14 @@
 
 	pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
 
+	mutex_lock(&pm->lock);
+
 	retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
 					&pm->ib_buffer_obj);
 
 	if (retval) {
 		pr_err("Failed to allocate runlist IB\n");
-		return retval;
+		goto out;
 	}
 
 	*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
@@ -121,140 +109,12 @@
 
 	memset(*rl_buffer, 0, *rl_buffer_size);
 	pm->allocated = true;
+
+out:
+	mutex_unlock(&pm->lock);
 	return retval;
 }
 
-static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
-			uint64_t ib, size_t ib_size_in_dwords, bool chain)
-{
-	struct pm4_mes_runlist *packet;
-	int concurrent_proc_cnt = 0;
-	struct kfd_dev *kfd = pm->dqm->dev;
-
-	if (WARN_ON(!ib))
-		return -EFAULT;
-
-	/* Determine the number of processes to map together to HW:
-	 * it can not exceed the number of VMIDs available to the
-	 * scheduler, and it is determined by the smaller of the number
-	 * of processes in the runlist and kfd module parameter
-	 * hws_max_conc_proc.
-	 * Note: the arbitration between the number of VMIDs and
-	 * hws_max_conc_proc has been done in
-	 * kgd2kfd_device_init().
-	 */
-	concurrent_proc_cnt = min(pm->dqm->processes_count,
-			kfd->max_proc_per_quantum);
-
-	packet = (struct pm4_mes_runlist *)buffer;
-
-	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
-	packet->header.u32All = build_pm4_header(IT_RUN_LIST,
-						sizeof(struct pm4_mes_runlist));
-
-	packet->bitfields4.ib_size = ib_size_in_dwords;
-	packet->bitfields4.chain = chain ? 1 : 0;
-	packet->bitfields4.offload_polling = 0;
-	packet->bitfields4.valid = 1;
-	packet->bitfields4.process_cnt = concurrent_proc_cnt;
-	packet->ordinal2 = lower_32_bits(ib);
-	packet->bitfields3.ib_base_hi = upper_32_bits(ib);
-
-	return 0;
-}
-
-static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
-				struct qcm_process_device *qpd)
-{
-	struct pm4_mes_map_process *packet;
-
-	packet = (struct pm4_mes_map_process *)buffer;
-
-	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
-
-	packet->header.u32All = build_pm4_header(IT_MAP_PROCESS,
-					sizeof(struct pm4_mes_map_process));
-	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
-	packet->bitfields2.process_quantum = 1;
-	packet->bitfields2.pasid = qpd->pqm->process->pasid;
-	packet->bitfields3.page_table_base = qpd->page_table_base;
-	packet->bitfields10.gds_size = qpd->gds_size;
-	packet->bitfields10.num_gws = qpd->num_gws;
-	packet->bitfields10.num_oac = qpd->num_oac;
-	packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
-
-	packet->sh_mem_config = qpd->sh_mem_config;
-	packet->sh_mem_bases = qpd->sh_mem_bases;
-	packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
-	packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
-
-	packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
-
-	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
-	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
-
-	return 0;
-}
-
-static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
-		struct queue *q, bool is_static)
-{
-	struct pm4_mes_map_queues *packet;
-	bool use_static = is_static;
-
-	packet = (struct pm4_mes_map_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
-
-	packet->header.u32All = build_pm4_header(IT_MAP_QUEUES,
-						sizeof(struct pm4_mes_map_queues));
-	packet->bitfields2.alloc_format =
-		alloc_format__mes_map_queues__one_per_pipe_vi;
-	packet->bitfields2.num_queues = 1;
-	packet->bitfields2.queue_sel =
-		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
-
-	packet->bitfields2.engine_sel =
-		engine_sel__mes_map_queues__compute_vi;
-	packet->bitfields2.queue_type =
-		queue_type__mes_map_queues__normal_compute_vi;
-
-	switch (q->properties.type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-		if (use_static)
-			packet->bitfields2.queue_type =
-		queue_type__mes_map_queues__normal_latency_static_queue_vi;
-		break;
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.queue_type =
-			queue_type__mes_map_queues__debug_interface_queue_vi;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
-				engine_sel__mes_map_queues__sdma0_vi;
-		use_static = false; /* no static queues under SDMA */
-		break;
-	default:
-		WARN(1, "queue type %d", q->properties.type);
-		return -EINVAL;
-	}
-	packet->bitfields3.doorbell_offset =
-			q->properties.doorbell_off;
-
-	packet->mqd_addr_lo =
-			lower_32_bits(q->gart_mqd_addr);
-
-	packet->mqd_addr_hi =
-			upper_32_bits(q->gart_mqd_addr);
-
-	packet->wptr_addr_lo =
-			lower_32_bits((uint64_t)q->properties.write_ptr);
-
-	packet->wptr_addr_hi =
-			upper_32_bits((uint64_t)q->properties.write_ptr);
-
-	return 0;
-}
-
 static int pm_create_runlist_ib(struct packet_manager *pm,
 				struct list_head *queues,
 				uint64_t *rl_gpu_addr,
@@ -292,12 +152,12 @@
 			return -ENOMEM;
 		}
 
-		retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
+		retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);
 		if (retval)
 			return retval;
 
 		proccesses_mapped++;
-		inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process),
+		inc_wptr(&rl_wptr, pm->pmf->map_process_size,
 				alloc_size_bytes);
 
 		list_for_each_entry(kq, &qpd->priv_queue_list, list) {
@@ -307,7 +167,7 @@
 			pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
 				kq->queue->queue, qpd->is_debug);
 
-			retval = pm_create_map_queue(pm,
+			retval = pm->pmf->map_queues(pm,
 						&rl_buffer[rl_wptr],
 						kq->queue,
 						qpd->is_debug);
@@ -315,7 +175,7 @@
 				return retval;
 
 			inc_wptr(&rl_wptr,
-				sizeof(struct pm4_mes_map_queues),
+				pm->pmf->map_queues_size,
 				alloc_size_bytes);
 		}
 
@@ -326,7 +186,7 @@
 			pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
 				q->queue, qpd->is_debug);
 
-			retval = pm_create_map_queue(pm,
+			retval = pm->pmf->map_queues(pm,
 						&rl_buffer[rl_wptr],
 						q,
 						qpd->is_debug);
@@ -335,7 +195,7 @@
 				return retval;
 
 			inc_wptr(&rl_wptr,
-				sizeof(struct pm4_mes_map_queues),
+				pm->pmf->map_queues_size,
 				alloc_size_bytes);
 		}
 	}
@@ -343,7 +203,7 @@
 	pr_debug("Finished map process and queues to runlist\n");
 
 	if (is_over_subscription)
-		retval = pm_create_runlist(pm, &rl_buffer[rl_wptr],
+		retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
 					*rl_gpu_addr,
 					alloc_size_bytes / sizeof(uint32_t),
 					true);
@@ -355,45 +215,29 @@
 	return retval;
 }
 
-/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
- *     of this packet
- *     @gpu_addr - GPU address of the packet. It's a virtual address.
- *     @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
- *     Return - length of the packet
- */
-uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
-{
-	struct pm4_mec_release_mem *packet;
-
-	WARN_ON(!buffer);
-
-	packet = (struct pm4_mec_release_mem *)buffer;
-	memset(buffer, 0, sizeof(*packet));
-
-	packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
-						 sizeof(*packet));
-
-	packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
-	packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
-	packet->bitfields2.tcl1_action_ena = 1;
-	packet->bitfields2.tc_action_ena = 1;
-	packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
-	packet->bitfields2.atc = 0;
-
-	packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
-	packet->bitfields3.int_sel =
-		int_sel___release_mem__send_interrupt_after_write_confirm;
-
-	packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
-	packet->address_hi = upper_32_bits(gpu_addr);
-
-	packet->data_lo = 0;
-
-	return sizeof(*packet) / sizeof(unsigned int);
-}
-
 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
 {
+	switch (dqm->dev->device_info->asic_family) {
+	case CHIP_KAVERI:
+	case CHIP_HAWAII:
+		/* PM4 packet structures on CIK are the same as on VI */
+	case CHIP_CARRIZO:
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+		pm->pmf = &kfd_vi_pm_funcs;
+		break;
+	case CHIP_VEGA10:
+	case CHIP_RAVEN:
+		pm->pmf = &kfd_v9_pm_funcs;
+		break;
+	default:
+		WARN(1, "Unexpected ASIC family %u",
+		     dqm->dev->device_info->asic_family);
+		return -EINVAL;
+	}
+
 	pm->dqm = dqm;
 	mutex_init(&pm->lock);
 	pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
@@ -415,38 +259,25 @@
 int pm_send_set_resources(struct packet_manager *pm,
 				struct scheduling_resources *res)
 {
-	struct pm4_mes_set_resources *packet;
+	uint32_t *buffer, size;
 	int retval = 0;
 
+	size = pm->pmf->set_resources_size;
 	mutex_lock(&pm->lock);
 	pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
-					sizeof(*packet) / sizeof(uint32_t),
-					(unsigned int **)&packet);
-	if (!packet) {
+					size / sizeof(uint32_t),
+					(unsigned int **)&buffer);
+	if (!buffer) {
 		pr_err("Failed to allocate buffer on kernel queue\n");
 		retval = -ENOMEM;
 		goto out;
 	}
 
-	memset(packet, 0, sizeof(struct pm4_mes_set_resources));
-	packet->header.u32All = build_pm4_header(IT_SET_RESOURCES,
-					sizeof(struct pm4_mes_set_resources));
-
-	packet->bitfields2.queue_type =
-			queue_type__mes_set_resources__hsa_interface_queue_hiq;
-	packet->bitfields2.vmid_mask = res->vmid_mask;
-	packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
-	packet->bitfields7.oac_mask = res->oac_mask;
-	packet->bitfields8.gds_heap_base = res->gds_heap_base;
-	packet->bitfields8.gds_heap_size = res->gds_heap_size;
-
-	packet->gws_mask_lo = lower_32_bits(res->gws_mask);
-	packet->gws_mask_hi = upper_32_bits(res->gws_mask);
-
-	packet->queue_mask_lo = lower_32_bits(res->queue_mask);
-	packet->queue_mask_hi = upper_32_bits(res->queue_mask);
-
-	pm->priv_queue->ops.submit_packet(pm->priv_queue);
+	retval = pm->pmf->set_resources(pm, buffer, res);
+	if (!retval)
+		pm->priv_queue->ops.submit_packet(pm->priv_queue);
+	else
+		pm->priv_queue->ops.rollback_packet(pm->priv_queue);
 
 out:
 	mutex_unlock(&pm->lock);
@@ -468,7 +299,7 @@
 
 	pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
 
-	packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t);
+	packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
 	mutex_lock(&pm->lock);
 
 	retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
@@ -476,7 +307,7 @@
 	if (retval)
 		goto fail_acquire_packet_buffer;
 
-	retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr,
+	retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
 					rl_ib_size / sizeof(uint32_t), false);
 	if (retval)
 		goto fail_create_runlist;
@@ -499,37 +330,29 @@
 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
 			uint32_t fence_value)
 {
-	int retval;
-	struct pm4_mes_query_status *packet;
+	uint32_t *buffer, size;
+	int retval = 0;
 
 	if (WARN_ON(!fence_address))
 		return -EFAULT;
 
+	size = pm->pmf->query_status_size;
 	mutex_lock(&pm->lock);
-	retval = pm->priv_queue->ops.acquire_packet_buffer(
-			pm->priv_queue,
-			sizeof(struct pm4_mes_query_status) / sizeof(uint32_t),
-			(unsigned int **)&packet);
-	if (retval)
-		goto fail_acquire_packet_buffer;
+	pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+			size / sizeof(uint32_t), (unsigned int **)&buffer);
+	if (!buffer) {
+		pr_err("Failed to allocate buffer on kernel queue\n");
+		retval = -ENOMEM;
+		goto out;
+	}
 
-	packet->header.u32All = build_pm4_header(IT_QUERY_STATUS,
-					sizeof(struct pm4_mes_query_status));
+	retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
+	if (!retval)
+		pm->priv_queue->ops.submit_packet(pm->priv_queue);
+	else
+		pm->priv_queue->ops.rollback_packet(pm->priv_queue);
 
-	packet->bitfields2.context_id = 0;
-	packet->bitfields2.interrupt_sel =
-			interrupt_sel__mes_query_status__completion_status;
-	packet->bitfields2.command =
-			command__mes_query_status__fence_only_after_write_ack;
-
-	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
-	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
-	packet->data_hi = upper_32_bits((uint64_t)fence_value);
-	packet->data_lo = lower_32_bits((uint64_t)fence_value);
-
-	pm->priv_queue->ops.submit_packet(pm->priv_queue);
-
-fail_acquire_packet_buffer:
+out:
 	mutex_unlock(&pm->lock);
 	return retval;
 }
@@ -539,82 +362,27 @@
 			uint32_t filter_param, bool reset,
 			unsigned int sdma_engine)
 {
-	int retval;
-	uint32_t *buffer;
-	struct pm4_mes_unmap_queues *packet;
+	uint32_t *buffer, size;
+	int retval = 0;
 
+	size = pm->pmf->unmap_queues_size;
 	mutex_lock(&pm->lock);
-	retval = pm->priv_queue->ops.acquire_packet_buffer(
-			pm->priv_queue,
-			sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t),
-			&buffer);
-	if (retval)
-		goto err_acquire_packet_buffer;
-
-	packet = (struct pm4_mes_unmap_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
-	pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n",
-		filter, reset, type);
-	packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES,
-					sizeof(struct pm4_mes_unmap_queues));
-	switch (type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.engine_sel =
-			engine_sel__mes_unmap_queues__compute;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-		packet->bitfields2.engine_sel =
-			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
-		break;
-	default:
-		WARN(1, "queue type %d", type);
-		retval = -EINVAL;
-		goto err_invalid;
+	pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+			size / sizeof(uint32_t), (unsigned int **)&buffer);
+	if (!buffer) {
+		pr_err("Failed to allocate buffer on kernel queue\n");
+		retval = -ENOMEM;
+		goto out;
 	}
 
-	if (reset)
-		packet->bitfields2.action =
-				action__mes_unmap_queues__reset_queues;
+	retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
+				       reset, sdma_engine);
+	if (!retval)
+		pm->priv_queue->ops.submit_packet(pm->priv_queue);
 	else
-		packet->bitfields2.action =
-				action__mes_unmap_queues__preempt_queues;
+		pm->priv_queue->ops.rollback_packet(pm->priv_queue);
 
-	switch (filter) {
-	case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
-		packet->bitfields2.queue_sel =
-				queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
-		packet->bitfields2.num_queues = 1;
-		packet->bitfields3b.doorbell_offset0 = filter_param;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
-		packet->bitfields2.queue_sel =
-				queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
-		packet->bitfields3a.pasid = filter_param;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
-		packet->bitfields2.queue_sel =
-				queue_sel__mes_unmap_queues__unmap_all_queues;
-		break;
-	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
-		/* in this case, we do not preempt static queues */
-		packet->bitfields2.queue_sel =
-				queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
-		break;
-	default:
-		WARN(1, "filter %d", filter);
-		retval = -EINVAL;
-		goto err_invalid;
-	}
-
-	pm->priv_queue->ops.submit_packet(pm->priv_queue);
-
-	mutex_unlock(&pm->lock);
-	return 0;
-
-err_invalid:
-	pm->priv_queue->ops.rollback_packet(pm->priv_queue);
-err_acquire_packet_buffer:
+out:
 	mutex_unlock(&pm->lock);
 	return retval;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
new file mode 100644
index 0000000..f2bcf5c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -0,0 +1,583 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef F32_MES_PM4_PACKETS_H
+#define F32_MES_PM4_PACKETS_H
+
+#ifndef PM4_MES_HEADER_DEFINED
+#define PM4_MES_HEADER_DEFINED
+union PM4_MES_TYPE_3_HEADER {
+	struct {
+		uint32_t reserved1 : 8; /* < reserved */
+		uint32_t opcode    : 8; /* < IT opcode */
+		uint32_t count     : 14;/* < number of DWORDs - 1 in the
+					 *   information body.
+					 */
+		uint32_t type      : 2; /* < packet identifier.
+					 *   It should be 3 for type 3 packets
+					 */
+	};
+	uint32_t u32All;
+};
+#endif /* PM4_MES_HEADER_DEFINED */
+
+/*--------------------MES_SET_RESOURCES--------------------*/
+
+#ifndef PM4_MES_SET_RESOURCES_DEFINED
+#define PM4_MES_SET_RESOURCES_DEFINED
+enum mes_set_resources_queue_type_enum {
+	queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
+	queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
+	queue_type__mes_set_resources__hsa_debug_interface_queue = 4
+};
+
+
+struct pm4_mes_set_resources {
+	union {
+		union PM4_MES_TYPE_3_HEADER	header;		/* header */
+		uint32_t			ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t vmid_mask:16;
+			uint32_t unmap_latency:8;
+			uint32_t reserved1:5;
+			enum mes_set_resources_queue_type_enum queue_type:3;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	uint32_t queue_mask_lo;
+	uint32_t queue_mask_hi;
+	uint32_t gws_mask_lo;
+	uint32_t gws_mask_hi;
+
+	union {
+		struct {
+			uint32_t oac_mask:16;
+			uint32_t reserved2:16;
+		} bitfields7;
+		uint32_t ordinal7;
+	};
+
+	union {
+		struct {
+		uint32_t gds_heap_base:6;
+		uint32_t reserved3:5;
+		uint32_t gds_heap_size:6;
+		uint32_t reserved4:15;
+		} bitfields8;
+		uint32_t ordinal8;
+	};
+
+};
+#endif
+
+/*--------------------MES_RUN_LIST--------------------*/
+
+#ifndef PM4_MES_RUN_LIST_DEFINED
+#define PM4_MES_RUN_LIST_DEFINED
+
+struct pm4_mes_runlist {
+	union {
+		union PM4_MES_TYPE_3_HEADER header; /* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t reserved1:2;
+			uint32_t ib_base_lo:30;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	uint32_t ib_base_hi;
+
+	union {
+		struct {
+			uint32_t ib_size:20;
+			uint32_t chain:1;
+			uint32_t offload_polling:1;
+			uint32_t reserved2:1;
+			uint32_t valid:1;
+			uint32_t process_cnt:4;
+			uint32_t reserved3:4;
+		} bitfields4;
+		uint32_t ordinal4;
+	};
+
+};
+#endif
+
+/*--------------------MES_MAP_PROCESS--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_DEFINED
+#define PM4_MES_MAP_PROCESS_DEFINED
+
+struct pm4_mes_map_process {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved1:8;
+			uint32_t diq_enable:1;
+			uint32_t process_quantum:7;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	uint32_t vm_context_page_table_base_addr_lo32;
+
+	uint32_t vm_context_page_table_base_addr_hi32;
+
+	uint32_t sh_mem_bases;
+
+	uint32_t sh_mem_config;
+
+	uint32_t sq_shader_tba_lo;
+
+	uint32_t sq_shader_tba_hi;
+
+	uint32_t sq_shader_tma_lo;
+
+	uint32_t sq_shader_tma_hi;
+
+	uint32_t reserved6;
+
+	uint32_t gds_addr_lo;
+
+	uint32_t gds_addr_hi;
+
+	union {
+		struct {
+			uint32_t num_gws:6;
+			uint32_t reserved7:1;
+			uint32_t sdma_enable:1;
+			uint32_t num_oac:4;
+			uint32_t reserved8:4;
+			uint32_t gds_size:6;
+			uint32_t num_queues:10;
+		} bitfields14;
+		uint32_t ordinal14;
+	};
+
+	uint32_t completion_signal_lo;
+
+	uint32_t completion_signal_hi;
+
+};
+
+#endif
+
+/*--------------------MES_MAP_PROCESS_VM--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_VM_DEFINED
+#define PM4_MES_MAP_PROCESS_VM_DEFINED
+
+struct PM4_MES_MAP_PROCESS_VM {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
+	};
+
+	uint32_t reserved1;
+
+	uint32_t vm_context_cntl;
+
+	uint32_t reserved2;
+
+	uint32_t vm_context_page_table_end_addr_lo32;
+
+	uint32_t vm_context_page_table_end_addr_hi32;
+
+	uint32_t vm_context_page_table_start_addr_lo32;
+
+	uint32_t vm_context_page_table_start_addr_hi32;
+
+	uint32_t reserved3;
+
+	uint32_t reserved4;
+
+	uint32_t reserved5;
+
+	uint32_t reserved6;
+
+	uint32_t reserved7;
+
+	uint32_t reserved8;
+
+	uint32_t completion_signal_lo32;
+
+	uint32_t completion_signal_hi32;
+
+};
+#endif
+
+/*--------------------MES_MAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED
+#define PM4_MES_MAP_QUEUES_VI_DEFINED
+enum mes_map_queues_queue_sel_enum {
+	queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0,
+queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1
+};
+
+enum mes_map_queues_queue_type_enum {
+	queue_type__mes_map_queues__normal_compute_vi = 0,
+	queue_type__mes_map_queues__debug_interface_queue_vi = 1,
+	queue_type__mes_map_queues__normal_latency_static_queue_vi = 2,
+queue_type__mes_map_queues__low_latency_static_queue_vi = 3
+};
+
+enum mes_map_queues_alloc_format_enum {
+	alloc_format__mes_map_queues__one_per_pipe_vi = 0,
+alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
+};
+
+enum mes_map_queues_engine_sel_enum {
+	engine_sel__mes_map_queues__compute_vi = 0,
+	engine_sel__mes_map_queues__sdma0_vi = 2,
+	engine_sel__mes_map_queues__sdma1_vi = 3
+};
+
+
+struct pm4_mes_map_queues {
+	union {
+		union PM4_MES_TYPE_3_HEADER   header;            /* header */
+		uint32_t            ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t reserved1:4;
+			enum mes_map_queues_queue_sel_enum queue_sel:2;
+			uint32_t reserved2:15;
+			enum mes_map_queues_queue_type_enum queue_type:3;
+			enum mes_map_queues_alloc_format_enum alloc_format:2;
+			enum mes_map_queues_engine_sel_enum engine_sel:3;
+			uint32_t num_queues:3;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t reserved3:1;
+			uint32_t check_disable:1;
+			uint32_t doorbell_offset:26;
+			uint32_t reserved4:4;
+		} bitfields3;
+		uint32_t ordinal3;
+	};
+
+	uint32_t mqd_addr_lo;
+	uint32_t mqd_addr_hi;
+	uint32_t wptr_addr_lo;
+	uint32_t wptr_addr_hi;
+};
+#endif
+
+/*--------------------MES_QUERY_STATUS--------------------*/
+
+#ifndef PM4_MES_QUERY_STATUS_DEFINED
+#define PM4_MES_QUERY_STATUS_DEFINED
+enum mes_query_status_interrupt_sel_enum {
+	interrupt_sel__mes_query_status__completion_status = 0,
+	interrupt_sel__mes_query_status__process_status = 1,
+	interrupt_sel__mes_query_status__queue_status = 2
+};
+
+enum mes_query_status_command_enum {
+	command__mes_query_status__interrupt_only = 0,
+	command__mes_query_status__fence_only_immediate = 1,
+	command__mes_query_status__fence_only_after_write_ack = 2,
+	command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
+};
+
+enum mes_query_status_engine_sel_enum {
+	engine_sel__mes_query_status__compute = 0,
+	engine_sel__mes_query_status__sdma0_queue = 2,
+	engine_sel__mes_query_status__sdma1_queue = 3
+};
+
+struct pm4_mes_query_status {
+	union {
+		union PM4_MES_TYPE_3_HEADER   header;            /* header */
+		uint32_t            ordinal1;
+	};
+
+	union {
+		struct {
+			uint32_t context_id:28;
+			enum mes_query_status_interrupt_sel_enum	interrupt_sel:2;
+			enum mes_query_status_command_enum command:2;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved1:16;
+		} bitfields3a;
+		struct {
+			uint32_t reserved2:2;
+			uint32_t doorbell_offset:26;
+			enum mes_query_status_engine_sel_enum engine_sel:3;
+			uint32_t reserved3:1;
+		} bitfields3b;
+		uint32_t ordinal3;
+	};
+
+	uint32_t addr_lo;
+	uint32_t addr_hi;
+	uint32_t data_lo;
+	uint32_t data_hi;
+};
+#endif
+
+/*--------------------MES_UNMAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
+#define PM4_MES_UNMAP_QUEUES_DEFINED
+enum mes_unmap_queues_action_enum {
+	action__mes_unmap_queues__preempt_queues = 0,
+	action__mes_unmap_queues__reset_queues = 1,
+	action__mes_unmap_queues__disable_process_queues = 2,
+	action__mes_unmap_queues__reserved = 3
+};
+
+enum mes_unmap_queues_queue_sel_enum {
+	queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
+	queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
+	queue_sel__mes_unmap_queues__unmap_all_queues = 2,
+	queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
+};
+
+enum mes_unmap_queues_engine_sel_enum {
+	engine_sel__mes_unmap_queues__compute = 0,
+	engine_sel__mes_unmap_queues__sdma0 = 2,
+	engine_sel__mes_unmap_queues__sdmal = 3
+};
+
+struct pm4_mes_unmap_queues {
+	union {
+		union PM4_MES_TYPE_3_HEADER   header;            /* header */
+		uint32_t            ordinal1;
+	};
+
+	union {
+		struct {
+			enum mes_unmap_queues_action_enum action:2;
+			uint32_t reserved1:2;
+			enum mes_unmap_queues_queue_sel_enum queue_sel:2;
+			uint32_t reserved2:20;
+			enum mes_unmap_queues_engine_sel_enum engine_sel:3;
+			uint32_t num_queues:3;
+		} bitfields2;
+		uint32_t ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t pasid:16;
+			uint32_t reserved3:16;
+		} bitfields3a;
+		struct {
+			uint32_t reserved4:2;
+			uint32_t doorbell_offset0:26;
+			int32_t reserved5:4;
+		} bitfields3b;
+		uint32_t ordinal3;
+	};
+
+	union {
+	struct {
+			uint32_t reserved6:2;
+			uint32_t doorbell_offset1:26;
+			uint32_t reserved7:4;
+		} bitfields4;
+		uint32_t ordinal4;
+	};
+
+	union {
+		struct {
+			uint32_t reserved8:2;
+			uint32_t doorbell_offset2:26;
+			uint32_t reserved9:4;
+		} bitfields5;
+		uint32_t ordinal5;
+	};
+
+	union {
+		struct {
+			uint32_t reserved10:2;
+			uint32_t doorbell_offset3:26;
+			uint32_t reserved11:4;
+		} bitfields6;
+		uint32_t ordinal6;
+	};
+};
+#endif
+
+#ifndef PM4_MEC_RELEASE_MEM_DEFINED
+#define PM4_MEC_RELEASE_MEM_DEFINED
+
+enum mec_release_mem_event_index_enum {
+	event_index__mec_release_mem__end_of_pipe = 5,
+	event_index__mec_release_mem__shader_done = 6
+};
+
+enum mec_release_mem_cache_policy_enum {
+	cache_policy__mec_release_mem__lru = 0,
+	cache_policy__mec_release_mem__stream = 1
+};
+
+enum mec_release_mem_pq_exe_status_enum {
+	pq_exe_status__mec_release_mem__default = 0,
+	pq_exe_status__mec_release_mem__phase_update = 1
+};
+
+enum mec_release_mem_dst_sel_enum {
+	dst_sel__mec_release_mem__memory_controller = 0,
+	dst_sel__mec_release_mem__tc_l2 = 1,
+	dst_sel__mec_release_mem__queue_write_pointer_register = 2,
+	dst_sel__mec_release_mem__queue_write_pointer_poll_mask_bit = 3
+};
+
+enum mec_release_mem_int_sel_enum {
+	int_sel__mec_release_mem__none = 0,
+	int_sel__mec_release_mem__send_interrupt_only = 1,
+	int_sel__mec_release_mem__send_interrupt_after_write_confirm = 2,
+	int_sel__mec_release_mem__send_data_after_write_confirm = 3,
+	int_sel__mec_release_mem__unconditionally_send_int_ctxid = 4,
+	int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_32_bit_compare = 5,
+	int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_64_bit_compare = 6
+};
+
+enum mec_release_mem_data_sel_enum {
+	data_sel__mec_release_mem__none = 0,
+	data_sel__mec_release_mem__send_32_bit_low = 1,
+	data_sel__mec_release_mem__send_64_bit_data = 2,
+	data_sel__mec_release_mem__send_gpu_clock_counter = 3,
+	data_sel__mec_release_mem__send_cp_perfcounter_hi_lo = 4,
+	data_sel__mec_release_mem__store_gds_data_to_memory = 5
+};
+
+struct pm4_mec_release_mem {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;     /*header */
+		unsigned int ordinal1;
+	};
+
+	union {
+		struct {
+			unsigned int event_type:6;
+			unsigned int reserved1:2;
+			enum mec_release_mem_event_index_enum event_index:4;
+			unsigned int tcl1_vol_action_ena:1;
+			unsigned int tc_vol_action_ena:1;
+			unsigned int reserved2:1;
+			unsigned int tc_wb_action_ena:1;
+			unsigned int tcl1_action_ena:1;
+			unsigned int tc_action_ena:1;
+			uint32_t reserved3:1;
+			uint32_t tc_nc_action_ena:1;
+			uint32_t tc_wc_action_ena:1;
+			uint32_t tc_md_action_ena:1;
+			uint32_t reserved4:3;
+			enum mec_release_mem_cache_policy_enum cache_policy:2;
+			uint32_t reserved5:2;
+			enum mec_release_mem_pq_exe_status_enum pq_exe_status:1;
+			uint32_t reserved6:2;
+		} bitfields2;
+		unsigned int ordinal2;
+	};
+
+	union {
+		struct {
+			uint32_t reserved7:16;
+			enum mec_release_mem_dst_sel_enum dst_sel:2;
+			uint32_t reserved8:6;
+			enum mec_release_mem_int_sel_enum int_sel:3;
+			uint32_t reserved9:2;
+			enum mec_release_mem_data_sel_enum data_sel:3;
+		} bitfields3;
+		unsigned int ordinal3;
+	};
+
+	union {
+		struct {
+			uint32_t reserved10:2;
+			unsigned int address_lo_32b:30;
+		} bitfields4;
+		struct {
+			uint32_t reserved11:3;
+			uint32_t address_lo_64b:29;
+		} bitfields4b;
+		uint32_t reserved12;
+		unsigned int ordinal4;
+	};
+
+	union {
+		uint32_t address_hi;
+		uint32_t reserved13;
+		uint32_t ordinal5;
+	};
+
+	union {
+		uint32_t data_lo;
+		uint32_t cmp_data_lo;
+		struct {
+			uint32_t dw_offset:16;
+			uint32_t num_dwords:16;
+		} bitfields6c;
+		uint32_t reserved14;
+		uint32_t ordinal6;
+	};
+
+	union {
+		uint32_t data_hi;
+		uint32_t cmp_data_hi;
+		uint32_t reserved15;
+		uint32_t reserved16;
+		uint32_t ordinal7;
+	};
+
+	uint32_t int_ctxid;
+
+};
+
+#endif
+
+enum {
+	CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
+};
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 96a9cc0..5e3990b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -39,11 +39,37 @@
 
 #include "amd_shared.h"
 
+#define KFD_MAX_RING_ENTRY_SIZE	8
+
 #define KFD_SYSFS_FILE_MODE 0444
 
-#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull
-#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull
-#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull
+/* GPU ID hash width in bits */
+#define KFD_GPU_ID_HASH_WIDTH 16
+
+/* Use upper bits of mmap offset to store KFD driver specific information.
+ * BITS[63:62] - Encode MMAP type
+ * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
+ * BITS[45:0]  - MMAP offset value
+ *
+ * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
+ *  defines are w.r.t to PAGE_SIZE
+ */
+#define KFD_MMAP_TYPE_SHIFT	(62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_MASK	(0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_DOORBELL	(0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_EVENTS	(0x2ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_RESERVED_MEM	(0x1ULL << KFD_MMAP_TYPE_SHIFT)
+
+#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
+#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
+				<< KFD_MMAP_GPU_ID_SHIFT)
+#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
+				& KFD_MMAP_GPU_ID_MASK)
+#define KFD_MMAP_GPU_ID_GET(offset)    ((offset & KFD_MMAP_GPU_ID_MASK) \
+				>> KFD_MMAP_GPU_ID_SHIFT)
+
+#define KFD_MMAP_OFFSET_VALUE_MASK	(0x3FFFFFFFFFFFULL >> PAGE_SHIFT)
+#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
 
 /*
  * When working with cp scheduler we should assign the HIQ manually or via
@@ -55,9 +81,6 @@
 #define KFD_CIK_HIQ_PIPE 4
 #define KFD_CIK_HIQ_QUEUE 0
 
-/* GPU ID hash width in bits */
-#define KFD_GPU_ID_HASH_WIDTH 16
-
 /* Macro for allocating structures */
 #define kfd_alloc_struct(ptr_to_struct)	\
 	((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
@@ -116,6 +139,11 @@
  */
 extern int ignore_crat;
 
+/*
+ * Set sh_mem_config.retry_disable on Vega10
+ */
+extern int vega10_noretry;
+
 /**
  * enum kfd_sched_policy
  *
@@ -148,6 +176,8 @@
 	cache_policy_noncoherent
 };
 
+#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
+
 struct kfd_event_interrupt_class {
 	bool (*interrupt_isr)(struct kfd_dev *dev,
 				const uint32_t *ih_ring_entry);
@@ -160,6 +190,7 @@
 	const struct kfd_event_interrupt_class *event_interrupt_class;
 	unsigned int max_pasid_bits;
 	unsigned int max_no_of_hqd;
+	unsigned int doorbell_size;
 	size_t ih_ring_entry_size;
 	uint8_t num_of_watch_points;
 	uint16_t mqd_size_aligned;
@@ -173,6 +204,7 @@
 	uint32_t range_end;
 	uint64_t gpu_addr;
 	uint32_t *cpu_ptr;
+	void *gtt_mem;
 };
 
 struct kfd_vmid_info {
@@ -364,7 +396,7 @@
 	uint32_t queue_percent;
 	uint32_t *read_ptr;
 	uint32_t *write_ptr;
-	uint32_t __iomem *doorbell_ptr;
+	void __iomem *doorbell_ptr;
 	uint32_t doorbell_off;
 	bool is_interop;
 	bool is_evicted;
@@ -427,6 +459,7 @@
 	uint32_t queue;
 
 	unsigned int sdma_id;
+	unsigned int doorbell_id;
 
 	struct kfd_process	*process;
 	struct kfd_dev		*device;
@@ -501,6 +534,9 @@
 	/* IB memory */
 	uint64_t ib_base;
 	void *ib_kaddr;
+
+	/* doorbell resources per process per device */
+	unsigned long *doorbell_bitmap;
 };
 
 /* KFD Memory Eviction */
@@ -512,6 +548,8 @@
 /* Approx. time before evicting the process again */
 #define PROCESS_ACTIVE_TIME_MS 10
 
+int kgd2kfd_quiesce_mm(struct mm_struct *mm);
+int kgd2kfd_resume_mm(struct mm_struct *mm);
 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
 					       struct dma_fence *fence);
 
@@ -681,6 +719,8 @@
 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
 void kfd_unref_process(struct kfd_process *p);
+int kfd_process_evict_queues(struct kfd_process *p);
+int kfd_process_restore_queues(struct kfd_process *p);
 void kfd_suspend_all_processes(void);
 int kfd_resume_all_processes(void);
 
@@ -693,7 +733,7 @@
 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 							struct kfd_process *p);
 
-int kfd_reserved_mem_mmap(struct kfd_process *process,
+int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 			  struct vm_area_struct *vma);
 
 /* KFD process API for creating and translating handles */
@@ -721,17 +761,20 @@
 void kfd_pasid_free(unsigned int pasid);
 
 /* Doorbells */
+size_t kfd_doorbell_process_slice(struct kfd_dev *kfd);
 int kfd_doorbell_init(struct kfd_dev *kfd);
 void kfd_doorbell_fini(struct kfd_dev *kfd);
-int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
-u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
+		      struct vm_area_struct *vma);
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
 					unsigned int *doorbell_off);
 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
 u32 read_kernel_doorbell(u32 __iomem *db);
-void write_kernel_doorbell(u32 __iomem *db, u32 value);
-unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
+void write_kernel_doorbell(void __iomem *db, u32 value);
+void write_kernel_doorbell64(void __iomem *db, u64 value);
+unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
 					struct kfd_process *process,
-					unsigned int queue_id);
+					unsigned int doorbell_id);
 phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
 					struct kfd_process *process);
 int kfd_alloc_process_doorbells(struct kfd_process *process);
@@ -788,6 +831,8 @@
 		struct kfd_dev *dev);
 struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
 		struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
+		struct kfd_dev *dev);
 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
 void device_queue_manager_uninit(struct device_queue_manager *dqm);
 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
@@ -832,8 +877,42 @@
 	bool allocated;
 	struct kfd_mem_obj *ib_buffer_obj;
 	unsigned int ib_size_bytes;
+
+	const struct packet_manager_funcs *pmf;
 };
 
+struct packet_manager_funcs {
+	/* Support ASIC-specific packet formats for PM4 packets */
+	int (*map_process)(struct packet_manager *pm, uint32_t *buffer,
+			struct qcm_process_device *qpd);
+	int (*runlist)(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t ib, size_t ib_size_in_dwords, bool chain);
+	int (*set_resources)(struct packet_manager *pm, uint32_t *buffer,
+			struct scheduling_resources *res);
+	int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
+			struct queue *q, bool is_static);
+	int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
+			enum kfd_queue_type type,
+			enum kfd_unmap_queues_filter mode,
+			uint32_t filter_param, bool reset,
+			unsigned int sdma_engine);
+	int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
+			uint64_t fence_address,	uint32_t fence_value);
+	int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
+
+	/* Packet sizes */
+	int map_process_size;
+	int runlist_size;
+	int set_resources_size;
+	int map_queues_size;
+	int unmap_queues_size;
+	int query_status_size;
+	int release_mem_size;
+};
+
+extern const struct packet_manager_funcs kfd_vi_pm_funcs;
+extern const struct packet_manager_funcs kfd_v9_pm_funcs;
+
 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
 void pm_uninit(struct packet_manager *pm);
 int pm_send_set_resources(struct packet_manager *pm,
@@ -849,12 +928,17 @@
 
 void pm_release_ib(struct packet_manager *pm);
 
-uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
+/* Following PM funcs can be shared among VI and AI */
+unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
+int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+				struct scheduling_resources *res);
 
 uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
 
 /* Events */
 extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
+
 extern const struct kfd_device_global_init_class device_global_init_class_cik;
 
 void kfd_event_init_process(struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1711ad0..1d80b4f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -332,6 +332,7 @@
 			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
 				get_order(KFD_CWSR_TBA_TMA_SIZE));
 
+		kfree(pdd->qpd.doorbell_bitmap);
 		idr_destroy(&pdd->alloc_idr);
 
 		kfree(pdd);
@@ -451,7 +452,8 @@
 		if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
 			continue;
 
-		offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
+		offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
+			<< PAGE_SHIFT;
 		qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
 			KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
 			MAP_SHARED, offset);
@@ -585,6 +587,31 @@
 	return ERR_PTR(err);
 }
 
+static int init_doorbell_bitmap(struct qcm_process_device *qpd,
+			struct kfd_dev *dev)
+{
+	unsigned int i;
+
+	if (!KFD_IS_SOC15(dev->device_info->asic_family))
+		return 0;
+
+	qpd->doorbell_bitmap =
+		kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+				     BITS_PER_BYTE), GFP_KERNEL);
+	if (!qpd->doorbell_bitmap)
+		return -ENOMEM;
+
+	/* Mask out any reserved doorbells */
+	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
+		if ((dev->shared_resources.reserved_doorbell_mask & i) ==
+		    dev->shared_resources.reserved_doorbell_val) {
+			set_bit(i, qpd->doorbell_bitmap);
+			pr_debug("reserved doorbell 0x%03x\n", i);
+		}
+
+	return 0;
+}
+
 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
 							struct kfd_process *p)
 {
@@ -606,6 +633,12 @@
 	if (!pdd)
 		return NULL;
 
+	if (init_doorbell_bitmap(&pdd->qpd, dev)) {
+		pr_err("Failed to init doorbell for process\n");
+		kfree(pdd);
+		return NULL;
+	}
+
 	pdd->dev = dev;
 	INIT_LIST_HEAD(&pdd->qpd.queues_list);
 	INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
@@ -808,7 +841,7 @@
  * Eviction is reference-counted per process-device. This means multiple
  * evictions from different sources can be nested safely.
  */
-static int process_evict_queues(struct kfd_process *p)
+int kfd_process_evict_queues(struct kfd_process *p)
 {
 	struct kfd_process_device *pdd;
 	int r = 0;
@@ -844,7 +877,7 @@
 }
 
 /* process_restore_queues - Restore all user queues of a process */
-static  int process_restore_queues(struct kfd_process *p)
+int kfd_process_restore_queues(struct kfd_process *p)
 {
 	struct kfd_process_device *pdd;
 	int r, ret = 0;
@@ -886,7 +919,7 @@
 	flush_delayed_work(&p->restore_work);
 
 	pr_debug("Started evicting pasid %d\n", p->pasid);
-	ret = process_evict_queues(p);
+	ret = kfd_process_evict_queues(p);
 	if (!ret) {
 		dma_fence_signal(p->ef);
 		dma_fence_put(p->ef);
@@ -946,7 +979,7 @@
 		return;
 	}
 
-	ret = process_restore_queues(p);
+	ret = kfd_process_restore_queues(p);
 	if (!ret)
 		pr_debug("Finished restoring pasid %d\n", p->pasid);
 	else
@@ -963,7 +996,7 @@
 		cancel_delayed_work_sync(&p->eviction_work);
 		cancel_delayed_work_sync(&p->restore_work);
 
-		if (process_evict_queues(p))
+		if (kfd_process_evict_queues(p))
 			pr_err("Failed to suspend process %d\n", p->pasid);
 		dma_fence_signal(p->ef);
 		dma_fence_put(p->ef);
@@ -989,15 +1022,12 @@
 	return ret;
 }
 
-int kfd_reserved_mem_mmap(struct kfd_process *process,
+int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
 			  struct vm_area_struct *vma)
 {
-	struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
 	struct kfd_process_device *pdd;
 	struct qcm_process_device *qpd;
 
-	if (!dev)
-		return -EINVAL;
 	if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
 		pr_err("Incorrect CWSR mapping size.\n");
 		return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 7817e32..d65ce04 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -119,9 +119,6 @@
 	/* Doorbell initialized in user space*/
 	q_properties->doorbell_ptr = NULL;
 
-	q_properties->doorbell_off =
-			kfd_queue_id_to_doorbell(dev, pqm->process, qid);
-
 	/* let DQM handle it*/
 	q_properties->vmid = 0;
 	q_properties->queue_id = qid;
@@ -244,10 +241,20 @@
 	}
 
 	if (retval != 0) {
-		pr_err("DQM create queue failed\n");
+		pr_err("Pasid %d DQM create queue %d failed. ret %d\n",
+			pqm->process->pasid, type, retval);
 		goto err_create_queue;
 	}
 
+	if (q)
+		/* Return the doorbell offset within the doorbell page
+		 * to the caller so it can be passed up to user mode
+		 * (in bytes).
+		 */
+		properties->doorbell_off =
+			(q->properties.doorbell_off * sizeof(uint32_t)) &
+			(kfd_doorbell_process_slice(dev) - 1);
+
 	pr_debug("PQM After DQM create queue\n");
 
 	list_add(&pqn->process_queue_list, &pqm->queues);
@@ -313,8 +320,11 @@
 		dqm = pqn->q->device->dqm;
 		retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
 		if (retval) {
-			pr_debug("Destroy queue failed, returned %d\n", retval);
-			goto err_destroy_queue;
+			pr_err("Pasid %d destroy queue %d failed, ret %d\n",
+				pqm->process->pasid,
+				pqn->q->properties.queue_id, retval);
+			if (retval != -ETIME)
+				goto err_destroy_queue;
 		}
 		uninit_queue(pqn->q);
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index a5315d4..6dcd621 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -36,8 +36,8 @@
 	pr_debug("Queue Address: 0x%llX\n", q->queue_address);
 	pr_debug("Queue Id: %u\n", q->queue_id);
 	pr_debug("Queue Process Vmid: %u\n", q->vmid);
-	pr_debug("Queue Read Pointer: 0x%p\n", q->read_ptr);
-	pr_debug("Queue Write Pointer: 0x%p\n", q->write_ptr);
+	pr_debug("Queue Read Pointer: 0x%px\n", q->read_ptr);
+	pr_debug("Queue Write Pointer: 0x%px\n", q->write_ptr);
 	pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr);
 	pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off);
 }
@@ -53,8 +53,8 @@
 	pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address);
 	pr_debug("Queue Id: %u\n", q->properties.queue_id);
 	pr_debug("Queue Process Vmid: %u\n", q->properties.vmid);
-	pr_debug("Queue Read Pointer: 0x%p\n", q->properties.read_ptr);
-	pr_debug("Queue Write Pointer: 0x%p\n", q->properties.write_ptr);
+	pr_debug("Queue Read Pointer: 0x%px\n", q->properties.read_ptr);
+	pr_debug("Queue Write Pointer: 0x%px\n", q->properties.write_ptr);
 	pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr);
 	pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off);
 	pr_debug("Queue MQD Address: 0x%p\n", q->mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index ac28abc..bc95d4df 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1239,6 +1239,12 @@
 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
 			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
 		break;
+	case CHIP_VEGA10:
+	case CHIP_RAVEN:
+		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
+			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+		break;
 	default:
 		WARN(1, "Unexpected ASIC family %u",
 		     dev->gpu->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index eb54cfc..7d9c3f9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -45,6 +45,7 @@
 
 #define HSA_CAP_DOORBELL_TYPE_PRE_1_0		0x0
 #define HSA_CAP_DOORBELL_TYPE_1_0		0x1
+#define HSA_CAP_DOORBELL_TYPE_2_0		0x2
 #define HSA_CAP_AQL_QUEUE_DOUBLE_MAP		0x00004000
 
 struct kfd_node_properties {
diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
new file mode 100644
index 0000000..0bc0b25
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HSA_SOC15_INT_H_INCLUDED
+#define HSA_SOC15_INT_H_INCLUDED
+
+#include "soc15_ih_clientid.h"
+
+#define SOC15_INTSRC_CP_END_OF_PIPE	181
+#define SOC15_INTSRC_CP_BAD_OPCODE	183
+#define SOC15_INTSRC_SQ_INTERRUPT_MSG	239
+#define SOC15_INTSRC_VMC_FAULT		0
+#define SOC15_INTSRC_SDMA_TRAP		224
+
+
+#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)
+#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff)
+#define SOC15_RING_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 16 & 0xff)
+#define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf)
+#define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1)
+#define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff)
+#define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4]))
+#define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5]))
+#define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6]))
+#define SOC15_CONTEXT_ID3_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[7]))
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 5b124a6..d5d4586 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -9,14 +9,6 @@
 	  support for AMDGPU. This adds required support for Vega and
 	  Raven ASICs.
 
-config DRM_AMD_DC_PRE_VEGA
-	bool "DC support for Polaris and older ASICs"
-	default y
-	help
-	  Choose this option to enable the new DC support for older asics
-	  by default. This includes Polaris, Carrizo, Tonga, Bonaire,
-	  and Hawaii.
-
 config DRM_AMD_DC_FBC
 	bool "AMD FBC - Enable Frame Buffer Compression"
 	depends on DRM_AMD_DC
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 2757944..f9b9ab9 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -433,11 +433,6 @@
 
 	init_data.dce_environment = DCE_ENV_PRODUCTION_DRV;
 
-	if (amdgpu_dc_log)
-		init_data.log_mask = DC_DEFAULT_LOG_MASK;
-	else
-		init_data.log_mask = DC_MIN_LOG_MASK;
-
 	/*
 	 * TODO debug why this doesn't work on Raven
 	 */
@@ -649,18 +644,6 @@
 static int dm_resume(void *handle)
 {
 	struct amdgpu_device *adev = handle;
-	struct amdgpu_display_manager *dm = &adev->dm;
-	int ret = 0;
-
-	/* power on hardware */
-	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
-
-	ret = amdgpu_dm_display_resume(adev);
-	return ret;
-}
-
-int amdgpu_dm_display_resume(struct amdgpu_device *adev)
-{
 	struct drm_device *ddev = adev->ddev;
 	struct amdgpu_display_manager *dm = &adev->dm;
 	struct amdgpu_dm_connector *aconnector;
@@ -671,10 +654,12 @@
 	struct drm_plane *plane;
 	struct drm_plane_state *new_plane_state;
 	struct dm_plane_state *dm_new_plane_state;
-
-	int ret = 0;
+	int ret;
 	int i;
 
+	/* power on hardware */
+	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
+
 	/* program HPD filter */
 	dc_resume(dm->dc);
 
@@ -688,8 +673,7 @@
 	amdgpu_dm_irq_resume_early(adev);
 
 	/* Do detection*/
-	list_for_each_entry(connector,
-			&ddev->mode_config.connector_list, head) {
+	list_for_each_entry(connector, &ddev->mode_config.connector_list, head) {
 		aconnector = to_amdgpu_dm_connector(connector);
 
 		/*
@@ -711,7 +695,7 @@
 	}
 
 	/* Force mode set in atomic comit */
-	for_each_new_crtc_in_state(adev->dm.cached_state, crtc, new_crtc_state, i)
+	for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i)
 		new_crtc_state->active_changed = true;
 
 	/*
@@ -719,7 +703,7 @@
 	 * them here, since they were duplicated as part of the suspend
 	 * procedure.
 	 */
-	for_each_new_crtc_in_state(adev->dm.cached_state, crtc, new_crtc_state, i) {
+	for_each_new_crtc_in_state(dm->cached_state, crtc, new_crtc_state, i) {
 		dm_new_crtc_state = to_dm_crtc_state(new_crtc_state);
 		if (dm_new_crtc_state->stream) {
 			WARN_ON(kref_read(&dm_new_crtc_state->stream->refcount) > 1);
@@ -728,7 +712,7 @@
 		}
 	}
 
-	for_each_new_plane_in_state(adev->dm.cached_state, plane, new_plane_state, i) {
+	for_each_new_plane_in_state(dm->cached_state, plane, new_plane_state, i) {
 		dm_new_plane_state = to_dm_plane_state(new_plane_state);
 		if (dm_new_plane_state->dc_state) {
 			WARN_ON(kref_read(&dm_new_plane_state->dc_state->refcount) > 1);
@@ -737,9 +721,9 @@
 		}
 	}
 
-	ret = drm_atomic_helper_resume(ddev, adev->dm.cached_state);
+	ret = drm_atomic_helper_resume(ddev, dm->cached_state);
 
-	adev->dm.cached_state = NULL;
+	dm->cached_state = NULL;
 
 	amdgpu_dm_irq_resume_late(adev);
 
@@ -927,6 +911,7 @@
 		drm_mode_connector_update_edid_property(connector, NULL);
 		aconnector->num_modes = 0;
 		aconnector->dc_sink = NULL;
+		aconnector->edid = NULL;
 	}
 
 	mutex_unlock(&dev->mode_config.mutex);
@@ -1131,6 +1116,7 @@
 
 	if (adev->asic_type == CHIP_VEGA10 ||
 	    adev->asic_type == CHIP_VEGA12 ||
+	    adev->asic_type == CHIP_VEGA20 ||
 	    adev->asic_type == CHIP_RAVEN)
 		client_id = SOC15_IH_CLIENTID_DCE;
 
@@ -1529,8 +1515,10 @@
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 		if (dce110_register_irq_handlers(dm->adev)) {
 			DRM_ERROR("DM: Failed to initialize IRQ\n");
 			goto fail;
@@ -1549,7 +1537,7 @@
 		break;
 #endif
 	default:
-		DRM_ERROR("Usupported ASIC type: 0x%X\n", adev->asic_type);
+		DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
 		goto fail;
 	}
 
@@ -1657,7 +1645,6 @@
 	if (ret == 0) {
 		if (s3_state) {
 			dm_resume(adev);
-			amdgpu_dm_display_resume(adev);
 			drm_kms_helper_hotplug_event(adev->ddev);
 		} else
 			dm_suspend(adev);
@@ -1722,6 +1709,7 @@
 		adev->mode_info.plane_type = dm_plane_type_default;
 		break;
 	case CHIP_POLARIS10:
+	case CHIP_VEGAM:
 		adev->mode_info.num_crtc = 6;
 		adev->mode_info.num_hpd = 6;
 		adev->mode_info.num_dig = 6;
@@ -1729,6 +1717,7 @@
 		break;
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
+	case CHIP_VEGA20:
 		adev->mode_info.num_crtc = 6;
 		adev->mode_info.num_hpd = 6;
 		adev->mode_info.num_dig = 6;
@@ -1743,7 +1732,7 @@
 		break;
 #endif
 	default:
-		DRM_ERROR("Usupported ASIC type: 0x%X\n", adev->asic_type);
+		DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type);
 		return -EINVAL;
 	}
 
@@ -1848,7 +1837,7 @@
 static int get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb,
 		       uint64_t *tiling_flags)
 {
-	struct amdgpu_bo *rbo = gem_to_amdgpu_bo(amdgpu_fb->obj);
+	struct amdgpu_bo *rbo = gem_to_amdgpu_bo(amdgpu_fb->base.obj[0]);
 	int r = amdgpu_bo_reserve(rbo, false);
 
 	if (unlikely(r)) {
@@ -1977,6 +1966,7 @@
 
 	if (adev->asic_type == CHIP_VEGA10 ||
 	    adev->asic_type == CHIP_VEGA12 ||
+	    adev->asic_type == CHIP_VEGA20 ||
 	    adev->asic_type == CHIP_RAVEN) {
 		/* Fill GFX9 params */
 		plane_state->tiling_info.gfx9.num_pipes =
@@ -2017,7 +2007,6 @@
 	const struct amdgpu_framebuffer *amdgpu_fb =
 		to_amdgpu_framebuffer(plane_state->fb);
 	const struct drm_crtc *crtc = plane_state->crtc;
-	struct dc_transfer_func *input_tf;
 	int ret = 0;
 
 	if (!fill_rects_from_plane_state(plane_state, dc_plane_state))
@@ -2031,13 +2020,6 @@
 	if (ret)
 		return ret;
 
-	input_tf = dc_create_transfer_func();
-
-	if (input_tf == NULL)
-		return -ENOMEM;
-
-	dc_plane_state->in_transfer_func = input_tf;
-
 	/*
 	 * Always set input transfer function, since plane state is refreshed
 	 * every time.
@@ -2206,7 +2188,6 @@
 					     const struct drm_connector *connector)
 {
 	struct dc_crtc_timing *timing_out = &stream->timing;
-	struct dc_transfer_func *tf = dc_create_transfer_func();
 
 	memset(timing_out, 0, sizeof(struct dc_crtc_timing));
 
@@ -2250,9 +2231,8 @@
 
 	stream->output_color_space = get_output_color_space(timing_out);
 
-	tf->type = TF_TYPE_PREDEFINED;
-	tf->tf = TRANSFER_FUNCTION_SRGB;
-	stream->out_transfer_func = tf;
+	stream->out_transfer_func->type = TF_TYPE_PREDEFINED;
+	stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
 }
 
 static void fill_audio_info(struct audio_info *audio_info,
@@ -2488,6 +2468,9 @@
 
 	update_stream_signal(stream);
 
+	if (dm_state && dm_state->freesync_capable)
+		stream->ignore_msa_timing_param = true;
+
 	return stream;
 }
 
@@ -2710,18 +2693,15 @@
 	const struct dc_link *link = aconnector->dc_link;
 	struct amdgpu_device *adev = connector->dev->dev_private;
 	struct amdgpu_display_manager *dm = &adev->dm;
+
 #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
 	defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
 
 	if ((link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) &&
-	    link->type != dc_connection_none) {
-		amdgpu_dm_register_backlight_device(dm);
-
-		if (dm->backlight_dev) {
-			backlight_device_unregister(dm->backlight_dev);
-			dm->backlight_dev = NULL;
-		}
-
+	    link->type != dc_connection_none &&
+	    dm->backlight_dev) {
+		backlight_device_unregister(dm->backlight_dev);
+		dm->backlight_dev = NULL;
 	}
 #endif
 	drm_connector_unregister(connector);
@@ -2855,7 +2835,7 @@
 	create_eml_sink(aconnector);
 }
 
-int amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
+enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
 				   struct drm_display_mode *mode)
 {
 	int result = MODE_ERROR;
@@ -3058,8 +3038,7 @@
 	}
 
 	afb = to_amdgpu_framebuffer(new_state->fb);
-
-	obj = afb->obj;
+	obj = new_state->fb->obj[0];
 	rbo = gem_to_amdgpu_bo(obj);
 	adev = amdgpu_ttm_adev(rbo->tbo.bdev);
 	r = amdgpu_bo_reserve(rbo, false);
@@ -3067,12 +3046,11 @@
 		return r;
 
 	if (plane->type != DRM_PLANE_TYPE_CURSOR)
-		domain = amdgpu_display_framebuffer_domains(adev);
+		domain = amdgpu_display_supported_domains(adev);
 	else
 		domain = AMDGPU_GEM_DOMAIN_VRAM;
 
 	r = amdgpu_bo_pin(rbo, domain, &afb->address);
-
 	amdgpu_bo_unreserve(rbo);
 
 	if (unlikely(r != 0)) {
@@ -3123,14 +3101,12 @@
 				       struct drm_plane_state *old_state)
 {
 	struct amdgpu_bo *rbo;
-	struct amdgpu_framebuffer *afb;
 	int r;
 
 	if (!old_state->fb)
 		return;
 
-	afb = to_amdgpu_framebuffer(old_state->fb);
-	rbo = gem_to_amdgpu_bo(afb->obj);
+	rbo = gem_to_amdgpu_bo(old_state->fb->obj[0]);
 	r = amdgpu_bo_reserve(rbo, false);
 	if (unlikely(r)) {
 		DRM_ERROR("failed to reserve rbo before unpin\n");
@@ -3773,7 +3749,7 @@
 static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
 			       struct dc_cursor_position *position)
 {
-	struct amdgpu_crtc *amdgpu_crtc = amdgpu_crtc = to_amdgpu_crtc(crtc);
+	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	int x, y;
 	int xorigin = 0, yorigin = 0;
 
@@ -3905,7 +3881,7 @@
 	int r, vpos, hpos;
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(fb);
-	struct amdgpu_bo *abo = gem_to_amdgpu_bo(afb->obj);
+	struct amdgpu_bo *abo = gem_to_amdgpu_bo(fb->obj[0]);
 	struct amdgpu_device *adev = crtc->dev->dev_private;
 	bool async_flip = (crtc->state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
 	struct dc_flip_addrs addr = { {0} };
@@ -3986,6 +3962,96 @@
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 }
 
+/*
+ * TODO this whole function needs to go
+ *
+ * dc_surface_update is needlessly complex. See if we can just replace this
+ * with a dc_plane_state and follow the atomic model a bit more closely here.
+ */
+static bool commit_planes_to_stream(
+		struct dc *dc,
+		struct dc_plane_state **plane_states,
+		uint8_t new_plane_count,
+		struct dm_crtc_state *dm_new_crtc_state,
+		struct dm_crtc_state *dm_old_crtc_state,
+		struct dc_state *state)
+{
+	/* no need to dynamically allocate this. it's pretty small */
+	struct dc_surface_update updates[MAX_SURFACES];
+	struct dc_flip_addrs *flip_addr;
+	struct dc_plane_info *plane_info;
+	struct dc_scaling_info *scaling_info;
+	int i;
+	struct dc_stream_state *dc_stream = dm_new_crtc_state->stream;
+	struct dc_stream_update *stream_update =
+			kzalloc(sizeof(struct dc_stream_update), GFP_KERNEL);
+
+	if (!stream_update) {
+		BREAK_TO_DEBUGGER();
+		return false;
+	}
+
+	flip_addr = kcalloc(MAX_SURFACES, sizeof(struct dc_flip_addrs),
+			    GFP_KERNEL);
+	plane_info = kcalloc(MAX_SURFACES, sizeof(struct dc_plane_info),
+			     GFP_KERNEL);
+	scaling_info = kcalloc(MAX_SURFACES, sizeof(struct dc_scaling_info),
+			       GFP_KERNEL);
+
+	if (!flip_addr || !plane_info || !scaling_info) {
+		kfree(flip_addr);
+		kfree(plane_info);
+		kfree(scaling_info);
+		kfree(stream_update);
+		return false;
+	}
+
+	memset(updates, 0, sizeof(updates));
+
+	stream_update->src = dc_stream->src;
+	stream_update->dst = dc_stream->dst;
+	stream_update->out_transfer_func = dc_stream->out_transfer_func;
+
+	for (i = 0; i < new_plane_count; i++) {
+		updates[i].surface = plane_states[i];
+		updates[i].gamma =
+			(struct dc_gamma *)plane_states[i]->gamma_correction;
+		updates[i].in_transfer_func = plane_states[i]->in_transfer_func;
+		flip_addr[i].address = plane_states[i]->address;
+		flip_addr[i].flip_immediate = plane_states[i]->flip_immediate;
+		plane_info[i].color_space = plane_states[i]->color_space;
+		plane_info[i].format = plane_states[i]->format;
+		plane_info[i].plane_size = plane_states[i]->plane_size;
+		plane_info[i].rotation = plane_states[i]->rotation;
+		plane_info[i].horizontal_mirror = plane_states[i]->horizontal_mirror;
+		plane_info[i].stereo_format = plane_states[i]->stereo_format;
+		plane_info[i].tiling_info = plane_states[i]->tiling_info;
+		plane_info[i].visible = plane_states[i]->visible;
+		plane_info[i].per_pixel_alpha = plane_states[i]->per_pixel_alpha;
+		plane_info[i].dcc = plane_states[i]->dcc;
+		scaling_info[i].scaling_quality = plane_states[i]->scaling_quality;
+		scaling_info[i].src_rect = plane_states[i]->src_rect;
+		scaling_info[i].dst_rect = plane_states[i]->dst_rect;
+		scaling_info[i].clip_rect = plane_states[i]->clip_rect;
+
+		updates[i].flip_addr = &flip_addr[i];
+		updates[i].plane_info = &plane_info[i];
+		updates[i].scaling_info = &scaling_info[i];
+	}
+
+	dc_commit_updates_for_stream(
+			dc,
+			updates,
+			new_plane_count,
+			dc_stream, stream_update, plane_states, state);
+
+	kfree(flip_addr);
+	kfree(plane_info);
+	kfree(scaling_info);
+	kfree(stream_update);
+	return true;
+}
+
 static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 				    struct drm_device *dev,
 				    struct amdgpu_display_manager *dm,
@@ -4001,6 +4067,8 @@
 	struct drm_crtc_state *new_pcrtc_state =
 			drm_atomic_get_new_crtc_state(state, pcrtc);
 	struct dm_crtc_state *acrtc_state = to_dm_crtc_state(new_pcrtc_state);
+	struct dm_crtc_state *dm_old_crtc_state =
+			to_dm_crtc_state(drm_atomic_get_old_crtc_state(state, pcrtc));
 	struct dm_atomic_state *dm_state = to_dm_atomic_state(state);
 	int planes_count = 0;
 	unsigned long flags;
@@ -4037,7 +4105,7 @@
 		}
 		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 
-		if (!pflip_needed) {
+		if (!pflip_needed || plane->type == DRM_PLANE_TYPE_OVERLAY) {
 			WARN_ON(!dm_new_plane_state->dc_state);
 
 			plane_states_constructed[planes_count] = dm_new_plane_state->dc_state;
@@ -4079,10 +4147,12 @@
 			spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags);
 		}
 
-		if (false == dc_commit_planes_to_stream(dm->dc,
+
+		if (false == commit_planes_to_stream(dm->dc,
 							plane_states_constructed,
 							planes_count,
-							dc_stream_attach,
+							acrtc_state,
+							dm_old_crtc_state,
 							dm_state->context))
 			dm_error("%s: Failed to attach plane!\n", __func__);
 	} else {
@@ -4307,8 +4377,10 @@
 		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
 		struct dc_stream_status *status = NULL;
 
-		if (acrtc)
+		if (acrtc) {
 			new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base);
+			old_crtc_state = drm_atomic_get_old_crtc_state(state, &acrtc->base);
+		}
 
 		/* Skip any modesets/resets */
 		if (!acrtc || drm_atomic_crtc_needs_modeset(new_crtc_state))
@@ -4331,11 +4403,12 @@
 		WARN_ON(!status->plane_count);
 
 		/*TODO How it works with MPO ?*/
-		if (!dc_commit_planes_to_stream(
+		if (!commit_planes_to_stream(
 				dm->dc,
 				status->plane_states,
 				status->plane_count,
-				dm_new_crtc_state->stream,
+				dm_new_crtc_state,
+				to_dm_crtc_state(old_crtc_state),
 				dm_state->context))
 			dm_error("%s: Failed to update stream scaling!\n", __func__);
 	}
@@ -4582,7 +4655,6 @@
 			drm_old_conn_state = drm_atomic_get_old_connector_state(state,
 								    &aconnector->base);
 
-
 			if (IS_ERR(drm_new_conn_state)) {
 				ret = PTR_ERR_OR_ZERO(drm_new_conn_state);
 				break;
@@ -4769,7 +4841,8 @@
 
 		/* Remove any changed/removed planes */
 		if (!enable) {
-			if (pflip_needed)
+			if (pflip_needed &&
+			    plane->type != DRM_PLANE_TYPE_OVERLAY)
 				continue;
 
 			if (!old_plane_crtc)
@@ -4816,7 +4889,8 @@
 			if (!dm_new_crtc_state->stream)
 				continue;
 
-			if (pflip_needed)
+			if (pflip_needed &&
+			    plane->type != DRM_PLANE_TYPE_OVERLAY)
 				continue;
 
 			WARN_ON(dm_new_plane_state->dc_state);
@@ -5023,17 +5097,24 @@
 					   struct edid *edid)
 {
 	int i;
-	uint64_t val_capable;
 	bool edid_check_required;
 	struct detailed_timing *timing;
 	struct detailed_non_pixel *data;
 	struct detailed_data_monitor_range *range;
 	struct amdgpu_dm_connector *amdgpu_dm_connector =
 			to_amdgpu_dm_connector(connector);
+	struct dm_connector_state *dm_con_state;
 
 	struct drm_device *dev = connector->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 
+	if (!connector->state) {
+		DRM_ERROR("%s - Connector has no state", __func__);
+		return;
+	}
+
+	dm_con_state = to_dm_connector_state(connector->state);
+
 	edid_check_required = false;
 	if (!amdgpu_dm_connector->dc_sink) {
 		DRM_ERROR("dc_sink NULL, could not add free_sync module.\n");
@@ -5052,7 +5133,7 @@
 						amdgpu_dm_connector);
 		}
 	}
-	val_capable = 0;
+	dm_con_state->freesync_capable = false;
 	if (edid_check_required == true && (edid->version > 1 ||
 	   (edid->version == 1 && edid->revision > 1))) {
 		for (i = 0; i < 4; i++) {
@@ -5088,7 +5169,7 @@
 					amdgpu_dm_connector->min_vfreq * 1000000;
 			amdgpu_dm_connector->caps.max_refresh_in_micro_hz =
 					amdgpu_dm_connector->max_vfreq * 1000000;
-				val_capable = 1;
+			dm_con_state->freesync_capable = true;
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index b68400c..d5aa89a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -28,7 +28,6 @@
 
 #include <drm/drmP.h>
 #include <drm/drm_atomic.h>
-#include "dc.h"
 
 /*
  * This file contains the definition for amdgpu_display_manager
@@ -53,6 +52,7 @@
 struct amdgpu_device;
 struct drm_device;
 struct amdgpu_dm_irq_handler_data;
+struct dc;
 
 struct amdgpu_dm_prev_state {
 	struct drm_framebuffer *fb;
@@ -220,6 +220,7 @@
 	uint8_t underscan_hborder;
 	bool underscan_enable;
 	struct mod_freesync_user_enable user_enable;
+	bool freesync_capable;
 };
 
 #define to_dm_connector_state(x)\
@@ -246,7 +247,7 @@
 				     struct dc_link *link,
 				     int link_index);
 
-int amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
+enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connector,
 				   struct drm_display_mode *mode);
 
 void dm_restore_drm_connector_state(struct drm_device *dev,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
index 25f064c..b329393 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c
@@ -25,6 +25,7 @@
 
 #include "amdgpu_mode.h"
 #include "amdgpu_dm.h"
+#include "dc.h"
 #include "modules/color/color_gamma.h"
 
 #define MAX_DRM_LUT_VALUE 0xFFFF
@@ -87,9 +88,9 @@
 			g = drm_color_lut_extract(lut[i].green, 16);
 			b = drm_color_lut_extract(lut[i].blue, 16);
 
-			gamma->entries.red[i] = dal_fixed31_32_from_int(r);
-			gamma->entries.green[i] = dal_fixed31_32_from_int(g);
-			gamma->entries.blue[i] = dal_fixed31_32_from_int(b);
+			gamma->entries.red[i] = dc_fixpt_from_int(r);
+			gamma->entries.green[i] = dc_fixpt_from_int(g);
+			gamma->entries.blue[i] = dc_fixpt_from_int(b);
 		}
 		return;
 	}
@@ -100,9 +101,9 @@
 		g = drm_color_lut_extract(lut[i].green, 16);
 		b = drm_color_lut_extract(lut[i].blue, 16);
 
-		gamma->entries.red[i] = dal_fixed31_32_from_fraction(r, MAX_DRM_LUT_VALUE);
-		gamma->entries.green[i] = dal_fixed31_32_from_fraction(g, MAX_DRM_LUT_VALUE);
-		gamma->entries.blue[i] = dal_fixed31_32_from_fraction(b, MAX_DRM_LUT_VALUE);
+		gamma->entries.red[i] = dc_fixpt_from_fraction(r, MAX_DRM_LUT_VALUE);
+		gamma->entries.green[i] = dc_fixpt_from_fraction(g, MAX_DRM_LUT_VALUE);
+		gamma->entries.blue[i] = dc_fixpt_from_fraction(b, MAX_DRM_LUT_VALUE);
 	}
 }
 
@@ -207,7 +208,7 @@
 	for (i = 0; i < 12; i++) {
 		/* Skip 4th element */
 		if (i % 4 == 3) {
-			stream->gamut_remap_matrix.matrix[i] = dal_fixed31_32_zero;
+			stream->gamut_remap_matrix.matrix[i] = dc_fixpt_zero;
 			continue;
 		}
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index ca0b08b..bd44935 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -330,11 +330,6 @@
 	return true;
 }
 
-bool dm_helpers_dc_conn_log(struct dc_context *ctx, struct log_entry *entry, enum dc_log_type event)
-{
-	return true;
-}
-
 void dm_dtn_log_begin(struct dc_context *ctx)
 {}
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
index 89342b4..0229c7ed 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
@@ -37,8 +37,17 @@
 
 unsigned long long dm_get_timestamp(struct dc_context *ctx)
 {
-	/* TODO: return actual timestamp */
-	return 0;
+	struct timespec64 time;
+
+	getrawmonotonic64(&time);
+	return timespec64_to_ns(&time);
+}
+
+unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx,
+		unsigned long long current_time_stamp,
+		unsigned long long last_time_stamp)
+{
+	return current_time_stamp - last_time_stamp;
 }
 
 void dm_perf_trace_timestamp(const char *func_name, unsigned int line)
diff --git a/drivers/gpu/drm/amd/display/dc/basics/Makefile b/drivers/gpu/drm/amd/display/dc/basics/Makefile
index bca33bd..b49ea96 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/basics/Makefile
@@ -24,7 +24,7 @@
 # It provides the general basic services required by other DAL
 # subcomponents.
 
-BASICS = conversion.o fixpt31_32.o fixpt32_32.o \
+BASICS = conversion.o fixpt31_32.o \
 	logger.o log_helpers.o vector.o
 
 AMD_DAL_BASICS = $(addprefix $(AMDDALPATH)/dc/basics/,$(BASICS))
diff --git a/drivers/gpu/drm/amd/display/dc/basics/conversion.c b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
index 3109649..50b47f1 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/conversion.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/conversion.c
@@ -41,22 +41,22 @@
 
 	uint16_t result;
 
-	uint16_t d = (uint16_t)dal_fixed31_32_floor(
-		dal_fixed31_32_abs(
+	uint16_t d = (uint16_t)dc_fixpt_floor(
+		dc_fixpt_abs(
 			arg));
 
 	if (d <= (uint16_t)(1 << integer_bits) - (1 / (uint16_t)divisor))
-		numerator = (uint16_t)dal_fixed31_32_round(
-			dal_fixed31_32_mul_int(
+		numerator = (uint16_t)dc_fixpt_round(
+			dc_fixpt_mul_int(
 				arg,
 				divisor));
 	else {
-		numerator = dal_fixed31_32_floor(
-			dal_fixed31_32_sub(
-				dal_fixed31_32_from_int(
+		numerator = dc_fixpt_floor(
+			dc_fixpt_sub(
+				dc_fixpt_from_int(
 					1LL << integer_bits),
-				dal_fixed31_32_recip(
-					dal_fixed31_32_from_int(
+				dc_fixpt_recip(
+					dc_fixpt_from_int(
 						divisor))));
 	}
 
@@ -66,8 +66,8 @@
 		result = (uint16_t)(
 		(1 << (integer_bits + fractional_bits + 1)) + numerator);
 
-	if ((result != 0) && dal_fixed31_32_lt(
-		arg, dal_fixed31_32_zero))
+	if ((result != 0) && dc_fixpt_lt(
+		arg, dc_fixpt_zero))
 		result |= 1 << (integer_bits + fractional_bits);
 
 	return result;
@@ -84,15 +84,15 @@
 	uint32_t buffer_size)
 {
 	const struct fixed31_32 min_2_13 =
-		dal_fixed31_32_from_fraction(S2D13_MIN, DIVIDER);
+		dc_fixpt_from_fraction(S2D13_MIN, DIVIDER);
 	const struct fixed31_32 max_2_13 =
-		dal_fixed31_32_from_fraction(S2D13_MAX, DIVIDER);
+		dc_fixpt_from_fraction(S2D13_MAX, DIVIDER);
 	uint32_t i;
 
 	for (i = 0; i < buffer_size; ++i) {
 		uint32_t reg_value =
 				fixed_point_to_int_frac(
-					dal_fixed31_32_clamp(
+					dc_fixpt_clamp(
 						flt[i],
 						min_2_13,
 						max_2_13),
diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
index 8a9bba8..e61dd97d 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/fixpt31_32.c
@@ -26,13 +26,13 @@
 #include "dm_services.h"
 #include "include/fixed31_32.h"
 
-static inline uint64_t abs_i64(
-	int64_t arg)
+static inline unsigned long long abs_i64(
+	long long arg)
 {
 	if (arg > 0)
-		return (uint64_t)arg;
+		return (unsigned long long)arg;
 	else
-		return (uint64_t)(-arg);
+		return (unsigned long long)(-arg);
 }
 
 /*
@@ -40,12 +40,12 @@
  * result = dividend / divisor
  * *remainder = dividend % divisor
  */
-static inline uint64_t complete_integer_division_u64(
-	uint64_t dividend,
-	uint64_t divisor,
-	uint64_t *remainder)
+static inline unsigned long long complete_integer_division_u64(
+	unsigned long long dividend,
+	unsigned long long divisor,
+	unsigned long long *remainder)
 {
-	uint64_t result;
+	unsigned long long result;
 
 	ASSERT(divisor);
 
@@ -64,30 +64,28 @@
 #define GET_FRACTIONAL_PART(x) \
 	(FRACTIONAL_PART_MASK & (x))
 
-struct fixed31_32 dal_fixed31_32_from_fraction(
-	int64_t numerator,
-	int64_t denominator)
+struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator)
 {
 	struct fixed31_32 res;
 
 	bool arg1_negative = numerator < 0;
 	bool arg2_negative = denominator < 0;
 
-	uint64_t arg1_value = arg1_negative ? -numerator : numerator;
-	uint64_t arg2_value = arg2_negative ? -denominator : denominator;
+	unsigned long long arg1_value = arg1_negative ? -numerator : numerator;
+	unsigned long long arg2_value = arg2_negative ? -denominator : denominator;
 
-	uint64_t remainder;
+	unsigned long long remainder;
 
 	/* determine integer part */
 
-	uint64_t res_value = complete_integer_division_u64(
+	unsigned long long res_value = complete_integer_division_u64(
 		arg1_value, arg2_value, &remainder);
 
 	ASSERT(res_value <= LONG_MAX);
 
 	/* determine fractional part */
 	{
-		uint32_t i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
+		unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART;
 
 		do {
 			remainder <<= 1;
@@ -103,14 +101,14 @@
 
 	/* round up LSB */
 	{
-		uint64_t summand = (remainder << 1) >= arg2_value;
+		unsigned long long summand = (remainder << 1) >= arg2_value;
 
 		ASSERT(res_value <= LLONG_MAX - summand);
 
 		res_value += summand;
 	}
 
-	res.value = (int64_t)res_value;
+	res.value = (long long)res_value;
 
 	if (arg1_negative ^ arg2_negative)
 		res.value = -res.value;
@@ -118,79 +116,23 @@
 	return res;
 }
 
-struct fixed31_32 dal_fixed31_32_from_int_nonconst(
-	int64_t arg)
-{
-	struct fixed31_32 res;
-
-	ASSERT((LONG_MIN <= arg) && (arg <= LONG_MAX));
-
-	res.value = arg << FIXED31_32_BITS_PER_FRACTIONAL_PART;
-
-	return res;
-}
-
-struct fixed31_32 dal_fixed31_32_shl(
-	struct fixed31_32 arg,
-	uint8_t shift)
-{
-	struct fixed31_32 res;
-
-	ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) ||
-		((arg.value < 0) && (arg.value >= LLONG_MIN >> shift)));
-
-	res.value = arg.value << shift;
-
-	return res;
-}
-
-struct fixed31_32 dal_fixed31_32_add(
-	struct fixed31_32 arg1,
-	struct fixed31_32 arg2)
-{
-	struct fixed31_32 res;
-
-	ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) ||
-		((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value)));
-
-	res.value = arg1.value + arg2.value;
-
-	return res;
-}
-
-struct fixed31_32 dal_fixed31_32_sub(
-	struct fixed31_32 arg1,
-	struct fixed31_32 arg2)
-{
-	struct fixed31_32 res;
-
-	ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) ||
-		((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value)));
-
-	res.value = arg1.value - arg2.value;
-
-	return res;
-}
-
-struct fixed31_32 dal_fixed31_32_mul(
-	struct fixed31_32 arg1,
-	struct fixed31_32 arg2)
+struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2)
 {
 	struct fixed31_32 res;
 
 	bool arg1_negative = arg1.value < 0;
 	bool arg2_negative = arg2.value < 0;
 
-	uint64_t arg1_value = arg1_negative ? -arg1.value : arg1.value;
-	uint64_t arg2_value = arg2_negative ? -arg2.value : arg2.value;
+	unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value;
+	unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value;
 
-	uint64_t arg1_int = GET_INTEGER_PART(arg1_value);
-	uint64_t arg2_int = GET_INTEGER_PART(arg2_value);
+	unsigned long long arg1_int = GET_INTEGER_PART(arg1_value);
+	unsigned long long arg2_int = GET_INTEGER_PART(arg2_value);
 
-	uint64_t arg1_fra = GET_FRACTIONAL_PART(arg1_value);
-	uint64_t arg2_fra = GET_FRACTIONAL_PART(arg2_value);
+	unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value);
+	unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value);
 
-	uint64_t tmp;
+	unsigned long long tmp;
 
 	res.value = arg1_int * arg2_int;
 
@@ -200,22 +142,22 @@
 
 	tmp = arg1_int * arg2_fra;
 
-	ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
+	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
 
 	res.value += tmp;
 
 	tmp = arg2_int * arg1_fra;
 
-	ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
+	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
 
 	res.value += tmp;
 
 	tmp = arg1_fra * arg2_fra;
 
 	tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
-		(tmp >= (uint64_t)dal_fixed31_32_half.value);
+		(tmp >= (unsigned long long)dc_fixpt_half.value);
 
-	ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
+	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
 
 	res.value += tmp;
 
@@ -225,18 +167,17 @@
 	return res;
 }
 
-struct fixed31_32 dal_fixed31_32_sqr(
-	struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg)
 {
 	struct fixed31_32 res;
 
-	uint64_t arg_value = abs_i64(arg.value);
+	unsigned long long arg_value = abs_i64(arg.value);
 
-	uint64_t arg_int = GET_INTEGER_PART(arg_value);
+	unsigned long long arg_int = GET_INTEGER_PART(arg_value);
 
-	uint64_t arg_fra = GET_FRACTIONAL_PART(arg_value);
+	unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value);
 
-	uint64_t tmp;
+	unsigned long long tmp;
 
 	res.value = arg_int * arg_int;
 
@@ -246,28 +187,27 @@
 
 	tmp = arg_int * arg_fra;
 
-	ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
+	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
 
 	res.value += tmp;
 
-	ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
+	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
 
 	res.value += tmp;
 
 	tmp = arg_fra * arg_fra;
 
 	tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) +
-		(tmp >= (uint64_t)dal_fixed31_32_half.value);
+		(tmp >= (unsigned long long)dc_fixpt_half.value);
 
-	ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
+	ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value));
 
 	res.value += tmp;
 
 	return res;
 }
 
-struct fixed31_32 dal_fixed31_32_recip(
-	struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg)
 {
 	/*
 	 * @note
@@ -276,41 +216,40 @@
 
 	ASSERT(arg.value);
 
-	return dal_fixed31_32_from_fraction(
-		dal_fixed31_32_one.value,
+	return dc_fixpt_from_fraction(
+		dc_fixpt_one.value,
 		arg.value);
 }
 
-struct fixed31_32 dal_fixed31_32_sinc(
-	struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg)
 {
 	struct fixed31_32 square;
 
-	struct fixed31_32 res = dal_fixed31_32_one;
+	struct fixed31_32 res = dc_fixpt_one;
 
-	int32_t n = 27;
+	int n = 27;
 
 	struct fixed31_32 arg_norm = arg;
 
-	if (dal_fixed31_32_le(
-		dal_fixed31_32_two_pi,
-		dal_fixed31_32_abs(arg))) {
-		arg_norm = dal_fixed31_32_sub(
+	if (dc_fixpt_le(
+		dc_fixpt_two_pi,
+		dc_fixpt_abs(arg))) {
+		arg_norm = dc_fixpt_sub(
 			arg_norm,
-			dal_fixed31_32_mul_int(
-				dal_fixed31_32_two_pi,
-				(int32_t)div64_s64(
+			dc_fixpt_mul_int(
+				dc_fixpt_two_pi,
+				(int)div64_s64(
 					arg_norm.value,
-					dal_fixed31_32_two_pi.value)));
+					dc_fixpt_two_pi.value)));
 	}
 
-	square = dal_fixed31_32_sqr(arg_norm);
+	square = dc_fixpt_sqr(arg_norm);
 
 	do {
-		res = dal_fixed31_32_sub(
-			dal_fixed31_32_one,
-			dal_fixed31_32_div_int(
-				dal_fixed31_32_mul(
+		res = dc_fixpt_sub(
+			dc_fixpt_one,
+			dc_fixpt_div_int(
+				dc_fixpt_mul(
 					square,
 					res),
 				n * (n - 1)));
@@ -319,37 +258,35 @@
 	} while (n > 2);
 
 	if (arg.value != arg_norm.value)
-		res = dal_fixed31_32_div(
-			dal_fixed31_32_mul(res, arg_norm),
+		res = dc_fixpt_div(
+			dc_fixpt_mul(res, arg_norm),
 			arg);
 
 	return res;
 }
 
-struct fixed31_32 dal_fixed31_32_sin(
-	struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg)
 {
-	return dal_fixed31_32_mul(
+	return dc_fixpt_mul(
 		arg,
-		dal_fixed31_32_sinc(arg));
+		dc_fixpt_sinc(arg));
 }
 
-struct fixed31_32 dal_fixed31_32_cos(
-	struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg)
 {
 	/* TODO implement argument normalization */
 
-	const struct fixed31_32 square = dal_fixed31_32_sqr(arg);
+	const struct fixed31_32 square = dc_fixpt_sqr(arg);
 
-	struct fixed31_32 res = dal_fixed31_32_one;
+	struct fixed31_32 res = dc_fixpt_one;
 
-	int32_t n = 26;
+	int n = 26;
 
 	do {
-		res = dal_fixed31_32_sub(
-			dal_fixed31_32_one,
-			dal_fixed31_32_div_int(
-				dal_fixed31_32_mul(
+		res = dc_fixpt_sub(
+			dc_fixpt_one,
+			dc_fixpt_div_int(
+				dc_fixpt_mul(
 					square,
 					res),
 				n * (n - 1)));
@@ -367,37 +304,35 @@
  *
  * Calculated as Taylor series.
  */
-static struct fixed31_32 fixed31_32_exp_from_taylor_series(
-	struct fixed31_32 arg)
+static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg)
 {
-	uint32_t n = 9;
+	unsigned int n = 9;
 
-	struct fixed31_32 res = dal_fixed31_32_from_fraction(
+	struct fixed31_32 res = dc_fixpt_from_fraction(
 		n + 2,
 		n + 1);
 	/* TODO find correct res */
 
-	ASSERT(dal_fixed31_32_lt(arg, dal_fixed31_32_one));
+	ASSERT(dc_fixpt_lt(arg, dc_fixpt_one));
 
 	do
-		res = dal_fixed31_32_add(
-			dal_fixed31_32_one,
-			dal_fixed31_32_div_int(
-				dal_fixed31_32_mul(
+		res = dc_fixpt_add(
+			dc_fixpt_one,
+			dc_fixpt_div_int(
+				dc_fixpt_mul(
 					arg,
 					res),
 				n));
 	while (--n != 1);
 
-	return dal_fixed31_32_add(
-		dal_fixed31_32_one,
-		dal_fixed31_32_mul(
+	return dc_fixpt_add(
+		dc_fixpt_one,
+		dc_fixpt_mul(
 			arg,
 			res));
 }
 
-struct fixed31_32 dal_fixed31_32_exp(
-	struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg)
 {
 	/*
 	 * @brief
@@ -406,44 +341,43 @@
 	 * where m = round(x / ln(2)), r = x - m * ln(2)
 	 */
 
-	if (dal_fixed31_32_le(
-		dal_fixed31_32_ln2_div_2,
-		dal_fixed31_32_abs(arg))) {
-		int32_t m = dal_fixed31_32_round(
-			dal_fixed31_32_div(
+	if (dc_fixpt_le(
+		dc_fixpt_ln2_div_2,
+		dc_fixpt_abs(arg))) {
+		int m = dc_fixpt_round(
+			dc_fixpt_div(
 				arg,
-				dal_fixed31_32_ln2));
+				dc_fixpt_ln2));
 
-		struct fixed31_32 r = dal_fixed31_32_sub(
+		struct fixed31_32 r = dc_fixpt_sub(
 			arg,
-			dal_fixed31_32_mul_int(
-				dal_fixed31_32_ln2,
+			dc_fixpt_mul_int(
+				dc_fixpt_ln2,
 				m));
 
 		ASSERT(m != 0);
 
-		ASSERT(dal_fixed31_32_lt(
-			dal_fixed31_32_abs(r),
-			dal_fixed31_32_one));
+		ASSERT(dc_fixpt_lt(
+			dc_fixpt_abs(r),
+			dc_fixpt_one));
 
 		if (m > 0)
-			return dal_fixed31_32_shl(
+			return dc_fixpt_shl(
 				fixed31_32_exp_from_taylor_series(r),
-				(uint8_t)m);
+				(unsigned char)m);
 		else
-			return dal_fixed31_32_div_int(
+			return dc_fixpt_div_int(
 				fixed31_32_exp_from_taylor_series(r),
 				1LL << -m);
 	} else if (arg.value != 0)
 		return fixed31_32_exp_from_taylor_series(arg);
 	else
-		return dal_fixed31_32_one;
+		return dc_fixpt_one;
 }
 
-struct fixed31_32 dal_fixed31_32_log(
-	struct fixed31_32 arg)
+struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg)
 {
-	struct fixed31_32 res = dal_fixed31_32_neg(dal_fixed31_32_one);
+	struct fixed31_32 res = dc_fixpt_neg(dc_fixpt_one);
 	/* TODO improve 1st estimation */
 
 	struct fixed31_32 error;
@@ -453,15 +387,15 @@
 	/* TODO if arg is zero, return -INF */
 
 	do {
-		struct fixed31_32 res1 = dal_fixed31_32_add(
-			dal_fixed31_32_sub(
+		struct fixed31_32 res1 = dc_fixpt_add(
+			dc_fixpt_sub(
 				res,
-				dal_fixed31_32_one),
-			dal_fixed31_32_div(
+				dc_fixpt_one),
+			dc_fixpt_div(
 				arg,
-				dal_fixed31_32_exp(res)));
+				dc_fixpt_exp(res)));
 
-		error = dal_fixed31_32_sub(
+		error = dc_fixpt_sub(
 			res,
 			res1);
 
@@ -472,78 +406,23 @@
 	return res;
 }
 
-struct fixed31_32 dal_fixed31_32_pow(
-	struct fixed31_32 arg1,
-	struct fixed31_32 arg2)
-{
-	return dal_fixed31_32_exp(
-		dal_fixed31_32_mul(
-			dal_fixed31_32_log(arg1),
-			arg2));
-}
-
-int32_t dal_fixed31_32_floor(
-	struct fixed31_32 arg)
-{
-	uint64_t arg_value = abs_i64(arg.value);
-
-	if (arg.value >= 0)
-		return (int32_t)GET_INTEGER_PART(arg_value);
-	else
-		return -(int32_t)GET_INTEGER_PART(arg_value);
-}
-
-int32_t dal_fixed31_32_round(
-	struct fixed31_32 arg)
-{
-	uint64_t arg_value = abs_i64(arg.value);
-
-	const int64_t summand = dal_fixed31_32_half.value;
-
-	ASSERT(LLONG_MAX - (int64_t)arg_value >= summand);
-
-	arg_value += summand;
-
-	if (arg.value >= 0)
-		return (int32_t)GET_INTEGER_PART(arg_value);
-	else
-		return -(int32_t)GET_INTEGER_PART(arg_value);
-}
-
-int32_t dal_fixed31_32_ceil(
-	struct fixed31_32 arg)
-{
-	uint64_t arg_value = abs_i64(arg.value);
-
-	const int64_t summand = dal_fixed31_32_one.value -
-		dal_fixed31_32_epsilon.value;
-
-	ASSERT(LLONG_MAX - (int64_t)arg_value >= summand);
-
-	arg_value += summand;
-
-	if (arg.value >= 0)
-		return (int32_t)GET_INTEGER_PART(arg_value);
-	else
-		return -(int32_t)GET_INTEGER_PART(arg_value);
-}
 
 /* this function is a generic helper to translate fixed point value to
  * specified integer format that will consist of integer_bits integer part and
  * fractional_bits fractional part. For example it is used in
- * dal_fixed31_32_u2d19 to receive 2 bits integer part and 19 bits fractional
+ * dc_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional
  * part in 32 bits. It is used in hw programming (scaler)
  */
 
-static inline uint32_t ux_dy(
-	int64_t value,
-	uint32_t integer_bits,
-	uint32_t fractional_bits)
+static inline unsigned int ux_dy(
+	long long value,
+	unsigned int integer_bits,
+	unsigned int fractional_bits)
 {
 	/* 1. create mask of integer part */
-	uint32_t result = (1 << integer_bits) - 1;
+	unsigned int result = (1 << integer_bits) - 1;
 	/* 2. mask out fractional part */
-	uint32_t fractional_part = FRACTIONAL_PART_MASK & value;
+	unsigned int fractional_part = FRACTIONAL_PART_MASK & value;
 	/* 3. shrink fixed point integer part to be of integer_bits width*/
 	result &= GET_INTEGER_PART(value);
 	/* 4. make space for fractional part to be filled in after integer */
@@ -554,13 +433,13 @@
 	return result | fractional_part;
 }
 
-static inline uint32_t clamp_ux_dy(
-	int64_t value,
-	uint32_t integer_bits,
-	uint32_t fractional_bits,
-	uint32_t min_clamp)
+static inline unsigned int clamp_ux_dy(
+	long long value,
+	unsigned int integer_bits,
+	unsigned int fractional_bits,
+	unsigned int min_clamp)
 {
-	uint32_t truncated_val = ux_dy(value, integer_bits, fractional_bits);
+	unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits);
 
 	if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART)))
 		return (1 << (integer_bits + fractional_bits)) - 1;
@@ -570,35 +449,30 @@
 		return min_clamp;
 }
 
-uint32_t dal_fixed31_32_u2d19(
-	struct fixed31_32 arg)
+unsigned int dc_fixpt_u2d19(struct fixed31_32 arg)
 {
 	return ux_dy(arg.value, 2, 19);
 }
 
-uint32_t dal_fixed31_32_u0d19(
-	struct fixed31_32 arg)
+unsigned int dc_fixpt_u0d19(struct fixed31_32 arg)
 {
 	return ux_dy(arg.value, 0, 19);
 }
 
-uint32_t dal_fixed31_32_clamp_u0d14(
-	struct fixed31_32 arg)
+unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg)
 {
 	return clamp_ux_dy(arg.value, 0, 14, 1);
 }
 
-uint32_t dal_fixed31_32_clamp_u0d10(
-	struct fixed31_32 arg)
+unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg)
 {
 	return clamp_ux_dy(arg.value, 0, 10, 1);
 }
 
-int32_t dal_fixed31_32_s4d19(
-	struct fixed31_32 arg)
+int dc_fixpt_s4d19(struct fixed31_32 arg)
 {
 	if (arg.value < 0)
-		return -(int32_t)ux_dy(dal_fixed31_32_abs(arg).value, 4, 19);
+		return -(int)ux_dy(dc_fixpt_abs(arg).value, 4, 19);
 	else
 		return ux_dy(arg.value, 4, 19);
 }
diff --git a/drivers/gpu/drm/amd/display/dc/basics/fixpt32_32.c b/drivers/gpu/drm/amd/display/dc/basics/fixpt32_32.c
deleted file mode 100644
index 4d3aaa8..0000000
--- a/drivers/gpu/drm/amd/display/dc/basics/fixpt32_32.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright 2012-15 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-#include "dm_services.h"
-#include "include/fixed32_32.h"
-
-static uint64_t u64_div(uint64_t n, uint64_t d)
-{
-	uint32_t i = 0;
-	uint64_t r;
-	uint64_t q = div64_u64_rem(n, d, &r);
-
-	for (i = 0; i < 32; ++i) {
-		uint64_t sbit = q & (1ULL<<63);
-
-		r <<= 1;
-		r |= sbit ? 1 : 0;
-		q <<= 1;
-		if (r >= d) {
-			r -= d;
-			q |= 1;
-		}
-	}
-
-	if (2*r >= d)
-		q += 1;
-	return q;
-}
-
-struct fixed32_32 dal_fixed32_32_from_fraction(uint32_t n, uint32_t d)
-{
-	struct fixed32_32 fx;
-
-	fx.value = u64_div((uint64_t)n << 32, (uint64_t)d << 32);
-	return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_add(
-	struct fixed32_32 lhs,
-	struct fixed32_32 rhs)
-{
-	struct fixed32_32 fx = {lhs.value + rhs.value};
-
-	ASSERT(fx.value >= rhs.value);
-	return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_add_int(struct fixed32_32 lhs, uint32_t rhs)
-{
-	struct fixed32_32 fx = {lhs.value + ((uint64_t)rhs << 32)};
-
-	ASSERT(fx.value >= (uint64_t)rhs << 32);
-	return fx;
-
-}
-struct fixed32_32 dal_fixed32_32_sub(
-	struct fixed32_32 lhs,
-	struct fixed32_32 rhs)
-{
-	struct fixed32_32 fx;
-
-	ASSERT(lhs.value >= rhs.value);
-	fx.value = lhs.value - rhs.value;
-	return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_sub_int(struct fixed32_32 lhs, uint32_t rhs)
-{
-	struct fixed32_32 fx;
-
-	ASSERT(lhs.value >= ((uint64_t)rhs<<32));
-	fx.value = lhs.value - ((uint64_t)rhs<<32);
-	return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_mul(
-	struct fixed32_32 lhs,
-	struct fixed32_32 rhs)
-{
-	struct fixed32_32 fx;
-	uint64_t lhs_int = lhs.value>>32;
-	uint64_t lhs_frac = (uint32_t)lhs.value;
-	uint64_t rhs_int = rhs.value>>32;
-	uint64_t rhs_frac = (uint32_t)rhs.value;
-	uint64_t ahbh = lhs_int * rhs_int;
-	uint64_t ahbl = lhs_int * rhs_frac;
-	uint64_t albh = lhs_frac * rhs_int;
-	uint64_t albl = lhs_frac * rhs_frac;
-
-	ASSERT((ahbh>>32) == 0);
-
-	fx.value = (ahbh<<32) + ahbl + albh + (albl>>32);
-	return fx;
-
-}
-
-struct fixed32_32 dal_fixed32_32_mul_int(struct fixed32_32 lhs, uint32_t rhs)
-{
-	struct fixed32_32 fx;
-	uint64_t lhsi = (lhs.value>>32) * (uint64_t)rhs;
-	uint64_t lhsf;
-
-	ASSERT((lhsi>>32) == 0);
-	lhsf = ((uint32_t)lhs.value) * (uint64_t)rhs;
-	ASSERT((lhsi<<32) + lhsf >= lhsf);
-	fx.value = (lhsi<<32) + lhsf;
-	return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_div(
-	struct fixed32_32 lhs,
-	struct fixed32_32 rhs)
-{
-	struct fixed32_32 fx;
-
-	fx.value = u64_div(lhs.value, rhs.value);
-	return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_div_int(struct fixed32_32 lhs, uint32_t rhs)
-{
-	struct fixed32_32 fx;
-
-	fx.value = u64_div(lhs.value, (uint64_t)rhs << 32);
-	return fx;
-}
-
-uint32_t dal_fixed32_32_ceil(struct fixed32_32 v)
-{
-	ASSERT((uint32_t)v.value ? (v.value >> 32) + 1 >= 1 : true);
-	return (v.value>>32) + ((uint32_t)v.value ? 1 : 0);
-}
-
-uint32_t dal_fixed32_32_round(struct fixed32_32 v)
-{
-	ASSERT(v.value + (1ULL<<31) >= (1ULL<<31));
-	return (v.value + (1ULL<<31))>>32;
-}
-
diff --git a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c
index 854678a..0214515 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c
@@ -94,7 +94,6 @@
 			dm_logger_append(&entry, "%2.2X ", hex_data[i]);
 
 	dm_logger_append(&entry, "^\n");
-	dm_helpers_dc_conn_log(ctx, &entry, event);
 
 fail:
 	dm_logger_close(&entry);
diff --git a/drivers/gpu/drm/amd/display/dc/basics/logger.c b/drivers/gpu/drm/amd/display/dc/basics/logger.c
index 31bee05..738a818 100644
--- a/drivers/gpu/drm/amd/display/dc/basics/logger.c
+++ b/drivers/gpu/drm/amd/display/dc/basics/logger.c
@@ -61,7 +61,7 @@
 		{LOG_EVENT_UNDERFLOW,       "Underflow"},
 		{LOG_IF_TRACE,              "InterfaceTrace"},
 		{LOG_DTN,                   "DTN"},
-		{LOG_PROFILING,             "Profiling"}
+		{LOG_DISPLAYSTATS,          "DisplayStats"}
 };
 
 
@@ -402,3 +402,4 @@
 		entry->max_buf_bytes = 0;
 	}
 }
+
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 10a5807..b8cef7a 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1330,6 +1330,9 @@
 			case 2:
 				result = get_firmware_info_v3_2(bp, info);
 				break;
+			case 3:
+				result = get_firmware_info_v3_2(bp, info);
+				break;
 			default:
 				break;
 			}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c
index 4b5fdd5..651e1fd 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c
@@ -24,7 +24,7 @@
  */
 
 #include "dm_services.h"
-
+#include "amdgpu.h"
 #include "atom.h"
 
 #include "include/bios_parser_interface.h"
@@ -35,16 +35,16 @@
 #include "bios_parser_types_internal.h"
 
 #define EXEC_BIOS_CMD_TABLE(command, params)\
-	(cgs_atom_exec_cmd_table(bp->base.ctx->cgs_device, \
+	(amdgpu_atom_execute_table(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
 		GetIndexIntoMasterTable(COMMAND, command), \
-		&params) == 0)
+		(uint32_t *)&params) == 0)
 
 #define BIOS_CMD_TABLE_REVISION(command, frev, crev)\
-	cgs_atom_get_cmd_table_revs(bp->base.ctx->cgs_device, \
+	amdgpu_atom_parse_cmd_header(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
 		GetIndexIntoMasterTable(COMMAND, command), &frev, &crev)
 
 #define BIOS_CMD_TABLE_PARA_REVISION(command)\
-	bios_cmd_table_para_revision(bp->base.ctx->cgs_device, \
+	bios_cmd_table_para_revision(bp->base.ctx->driver_context, \
 		GetIndexIntoMasterTable(COMMAND, command))
 
 static void init_dig_encoder_control(struct bios_parser *bp);
@@ -82,16 +82,18 @@
 	init_set_dce_clock(bp);
 }
 
-static uint32_t bios_cmd_table_para_revision(void *cgs_device,
+static uint32_t bios_cmd_table_para_revision(void *dev,
 					     uint32_t index)
 {
+	struct amdgpu_device *adev = dev;
 	uint8_t frev, crev;
 
-	if (cgs_atom_get_cmd_table_revs(cgs_device,
+	if (amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context,
 					index,
-					&frev, &crev) != 0)
+					&frev, &crev))
+		return crev;
+	else
 		return 0;
-	return crev;
 }
 
 /*******************************************************************************
@@ -368,7 +370,7 @@
 	uint8_t crev;
 
 	if (BIOS_CMD_TABLE_REVISION(UNIPHYTransmitterControl,
-			frev, crev) != 0)
+			frev, crev) == false)
 		BREAK_TO_DEBUGGER();
 	switch (crev) {
 	case 2:
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
index 3f63f71..752b08a 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c
@@ -26,14 +26,18 @@
 #include "dm_services.h"
 
 #include "ObjectID.h"
-#include "atomfirmware.h"
 
+#include "atomfirmware.h"
+#include "atom.h"
 #include "include/bios_parser_interface.h"
 
 #include "command_table2.h"
 #include "command_table_helper2.h"
 #include "bios_parser_helper.h"
 #include "bios_parser_types_internal2.h"
+#include "amdgpu.h"
+
+
 #define DC_LOGGER \
 	bp->base.ctx->logger
 
@@ -43,16 +47,16 @@
 		->FieldName)-(char *)0)/sizeof(uint16_t))
 
 #define EXEC_BIOS_CMD_TABLE(fname, params)\
-	(cgs_atom_exec_cmd_table(bp->base.ctx->cgs_device, \
+	(amdgpu_atom_execute_table(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
 		GET_INDEX_INTO_MASTER_TABLE(command, fname), \
-		&params) == 0)
+		(uint32_t *)&params) == 0)
 
 #define BIOS_CMD_TABLE_REVISION(fname, frev, crev)\
-	cgs_atom_get_cmd_table_revs(bp->base.ctx->cgs_device, \
+	amdgpu_atom_parse_cmd_header(((struct amdgpu_device *)bp->base.ctx->driver_context)->mode_info.atom_context, \
 		GET_INDEX_INTO_MASTER_TABLE(command, fname), &frev, &crev)
 
 #define BIOS_CMD_TABLE_PARA_REVISION(fname)\
-	bios_cmd_table_para_revision(bp->base.ctx->cgs_device, \
+	bios_cmd_table_para_revision(bp->base.ctx->driver_context, \
 			GET_INDEX_INTO_MASTER_TABLE(command, fname))
 
 static void init_dig_encoder_control(struct bios_parser *bp);
@@ -86,16 +90,18 @@
 	init_get_smu_clock_info(bp);
 }
 
-static uint32_t bios_cmd_table_para_revision(void *cgs_device,
+static uint32_t bios_cmd_table_para_revision(void *dev,
 					     uint32_t index)
 {
+	struct amdgpu_device *adev = dev;
 	uint8_t frev, crev;
 
-	if (cgs_atom_get_cmd_table_revs(cgs_device,
+	if (amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context,
 					index,
-					&frev, &crev) != 0)
+					&frev, &crev))
+		return crev;
+	else
 		return 0;
-	return crev;
 }
 
 /******************************************************************************
@@ -201,7 +207,7 @@
 	uint8_t frev;
 	uint8_t crev;
 
-	if (BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev) != 0)
+	if (BIOS_CMD_TABLE_REVISION(dig1transmittercontrol, frev, crev) == false)
 		BREAK_TO_DEBUGGER();
 	switch (crev) {
 	case 6:
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c
index 2979358..253bbb1 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper.c
@@ -51,6 +51,7 @@
 		return true;
 
 	case DCE_VERSION_11_2:
+	case DCE_VERSION_11_22:
 		*h = dal_cmd_tbl_helper_dce112_get_table();
 		return true;
 
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
index 9a4d30d..bbbcef5 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
@@ -52,6 +52,7 @@
 		return true;
 
 	case DCE_VERSION_11_2:
+	case DCE_VERSION_11_22:
 		*h = dal_cmd_tbl_helper_dce112_get_table2();
 		return true;
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h b/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h
new file mode 100644
index 0000000..fc3f98f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/calcs/calcs_logger.h
@@ -0,0 +1,579 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef _CALCS_CALCS_LOGGER_H_
+#define _CALCS_CALCS_LOGGER_H_
+#define DC_LOGGER \
+	logger
+
+static void print_bw_calcs_dceip(struct dal_logger *logger, const struct bw_calcs_dceip *dceip)
+{
+
+	DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+	DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_dceip");
+	DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+	DC_LOG_BANDWIDTH_CALCS("	[enum]   bw_calcs_version version %d", dceip->version);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] large_cursor: %d", dceip->large_cursor);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] dmif_pipe_en_fbc_chunk_tracker: %d", dceip->dmif_pipe_en_fbc_chunk_tracker);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] display_write_back_supported: %d", dceip->display_write_back_supported);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] argb_compression_support: %d", dceip->argb_compression_support);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] pre_downscaler_enabled: %d", dceip->pre_downscaler_enabled);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] underlay_downscale_prefetch_enabled: %d",
+				dceip->underlay_downscale_prefetch_enabled);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] graphics_lb_nodownscaling_multi_line_prefetching: %d",
+				dceip->graphics_lb_nodownscaling_multi_line_prefetching);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] limit_excessive_outstanding_dmif_requests: %d",
+				dceip->limit_excessive_outstanding_dmif_requests);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] cursor_max_outstanding_group_num: %d",
+				dceip->cursor_max_outstanding_group_num);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] lines_interleaved_into_lb: %d", dceip->lines_interleaved_into_lb);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] low_power_tiling_mode: %d", dceip->low_power_tiling_mode);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] chunk_width: %d", dceip->chunk_width);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] number_of_graphics_pipes: %d", dceip->number_of_graphics_pipes);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] number_of_underlay_pipes: %d", dceip->number_of_underlay_pipes);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] max_dmif_buffer_allocated: %d", dceip->max_dmif_buffer_allocated);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] graphics_dmif_size: %d", dceip->graphics_dmif_size);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] underlay_luma_dmif_size: %d", dceip->underlay_luma_dmif_size);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] underlay_chroma_dmif_size: %d", dceip->underlay_chroma_dmif_size);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] scatter_gather_lines_of_pte_prefetching_in_linear_mode: %d",
+				dceip->scatter_gather_lines_of_pte_prefetching_in_linear_mode);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] display_write_back420_luma_mcifwr_buffer_size: %d",
+				dceip->display_write_back420_luma_mcifwr_buffer_size);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] display_write_back420_chroma_mcifwr_buffer_size: %d",
+				dceip->display_write_back420_chroma_mcifwr_buffer_size);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] scatter_gather_pte_request_rows_in_tiling_mode: %d",
+				dceip->scatter_gather_pte_request_rows_in_tiling_mode);
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] underlay_vscaler_efficiency10_bit_per_component: %d",
+				bw_fixed_to_int(dceip->underlay_vscaler_efficiency10_bit_per_component));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] underlay_vscaler_efficiency12_bit_per_component: %d",
+				bw_fixed_to_int(dceip->underlay_vscaler_efficiency12_bit_per_component));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] graphics_vscaler_efficiency6_bit_per_component: %d",
+				bw_fixed_to_int(dceip->graphics_vscaler_efficiency6_bit_per_component));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] graphics_vscaler_efficiency8_bit_per_component: %d",
+				bw_fixed_to_int(dceip->graphics_vscaler_efficiency8_bit_per_component));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] graphics_vscaler_efficiency10_bit_per_component: %d",
+				bw_fixed_to_int(dceip->graphics_vscaler_efficiency10_bit_per_component));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] graphics_vscaler_efficiency12_bit_per_component: %d",
+				bw_fixed_to_int(dceip->graphics_vscaler_efficiency12_bit_per_component));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] alpha_vscaler_efficiency: %d",
+				bw_fixed_to_int(dceip->alpha_vscaler_efficiency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] lb_write_pixels_per_dispclk: %d",
+				bw_fixed_to_int(dceip->lb_write_pixels_per_dispclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] lb_size_per_component444: %d",
+				bw_fixed_to_int(dceip->lb_size_per_component444));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] stutter_and_dram_clock_state_change_gated_before_cursor: %d",
+				bw_fixed_to_int(dceip->stutter_and_dram_clock_state_change_gated_before_cursor));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] underlay420_luma_lb_size_per_component: %d",
+				bw_fixed_to_int(dceip->underlay420_luma_lb_size_per_component));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] underlay420_chroma_lb_size_per_component: %d",
+				bw_fixed_to_int(dceip->underlay420_chroma_lb_size_per_component));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] underlay422_lb_size_per_component: %d",
+				bw_fixed_to_int(dceip->underlay422_lb_size_per_component));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] cursor_chunk_width: %d", bw_fixed_to_int(dceip->cursor_chunk_width));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] cursor_dcp_buffer_lines: %d",
+				bw_fixed_to_int(dceip->cursor_dcp_buffer_lines));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] underlay_maximum_width_efficient_for_tiling: %d",
+				bw_fixed_to_int(dceip->underlay_maximum_width_efficient_for_tiling));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] underlay_maximum_height_efficient_for_tiling: %d",
+				bw_fixed_to_int(dceip->underlay_maximum_height_efficient_for_tiling));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display: %d",
+				bw_fixed_to_int(dceip->peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation: %d",
+				bw_fixed_to_int(dceip->peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] minimum_outstanding_pte_request_limit: %d",
+				bw_fixed_to_int(dceip->minimum_outstanding_pte_request_limit));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] maximum_total_outstanding_pte_requests_allowed_by_saw: %d",
+				bw_fixed_to_int(dceip->maximum_total_outstanding_pte_requests_allowed_by_saw));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] linear_mode_line_request_alternation_slice: %d",
+				bw_fixed_to_int(dceip->linear_mode_line_request_alternation_slice));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] request_efficiency: %d", bw_fixed_to_int(dceip->request_efficiency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dispclk_per_request: %d", bw_fixed_to_int(dceip->dispclk_per_request));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dispclk_ramping_factor: %d",
+				bw_fixed_to_int(dceip->dispclk_ramping_factor));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] display_pipe_throughput_factor: %d",
+				bw_fixed_to_int(dceip->display_pipe_throughput_factor));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mcifwr_all_surfaces_burst_time: %d",
+				bw_fixed_to_int(dceip->mcifwr_all_surfaces_burst_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dmif_request_buffer_size: %d",
+				bw_fixed_to_int(dceip->dmif_request_buffer_size));
+
+
+}
+
+static void print_bw_calcs_vbios(struct dal_logger *logger, const struct bw_calcs_vbios *vbios)
+{
+
+	DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+	DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_vbios vbios");
+	DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+	DC_LOG_BANDWIDTH_CALCS("	[enum] bw_defines memory_type: %d", vbios->memory_type);
+	DC_LOG_BANDWIDTH_CALCS("	[enum] bw_defines memory_type: %d", vbios->memory_type);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] dram_channel_width_in_bits: %d", vbios->dram_channel_width_in_bits);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] number_of_dram_channels: %d", vbios->number_of_dram_channels);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] number_of_dram_banks: %d", vbios->number_of_dram_banks);
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] low_yclk: %d", bw_fixed_to_int(vbios->low_yclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mid_yclk: %d", bw_fixed_to_int(vbios->mid_yclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] high_yclk: %d", bw_fixed_to_int(vbios->high_yclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] low_sclk: %d", bw_fixed_to_int(vbios->low_sclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mid1_sclk: %d", bw_fixed_to_int(vbios->mid1_sclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mid2_sclk: %d", bw_fixed_to_int(vbios->mid2_sclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mid3_sclk: %d", bw_fixed_to_int(vbios->mid3_sclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mid4_sclk: %d", bw_fixed_to_int(vbios->mid4_sclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mid5_sclk: %d", bw_fixed_to_int(vbios->mid5_sclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mid6_sclk: %d", bw_fixed_to_int(vbios->mid6_sclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] high_sclk: %d", bw_fixed_to_int(vbios->high_sclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] low_voltage_max_dispclk: %d",
+				bw_fixed_to_int(vbios->low_voltage_max_dispclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mid_voltage_max_dispclk;: %d",
+				bw_fixed_to_int(vbios->mid_voltage_max_dispclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] high_voltage_max_dispclk;: %d",
+				bw_fixed_to_int(vbios->high_voltage_max_dispclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] low_voltage_max_phyclk: %d",
+				bw_fixed_to_int(vbios->low_voltage_max_phyclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mid_voltage_max_phyclk: %d",
+				bw_fixed_to_int(vbios->mid_voltage_max_phyclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] high_voltage_max_phyclk: %d",
+				bw_fixed_to_int(vbios->high_voltage_max_phyclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] data_return_bus_width: %d", bw_fixed_to_int(vbios->data_return_bus_width));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] trc: %d", bw_fixed_to_int(vbios->trc));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dmifmc_urgent_latency: %d", bw_fixed_to_int(vbios->dmifmc_urgent_latency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] stutter_self_refresh_exit_latency: %d",
+				bw_fixed_to_int(vbios->stutter_self_refresh_exit_latency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] stutter_self_refresh_entry_latency: %d",
+				bw_fixed_to_int(vbios->stutter_self_refresh_entry_latency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] nbp_state_change_latency: %d",
+				bw_fixed_to_int(vbios->nbp_state_change_latency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mcifwrmc_urgent_latency: %d",
+				bw_fixed_to_int(vbios->mcifwrmc_urgent_latency));
+	DC_LOG_BANDWIDTH_CALCS("	[bool] scatter_gather_enable: %d", vbios->scatter_gather_enable);
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] down_spread_percentage: %d",
+				bw_fixed_to_int(vbios->down_spread_percentage));
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] cursor_width: %d", vbios->cursor_width);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] average_compression_rate: %d", vbios->average_compression_rate);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] number_of_request_slots_gmc_reserves_for_dmif_per_channel: %d",
+				vbios->number_of_request_slots_gmc_reserves_for_dmif_per_channel);
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] blackout_duration: %d", bw_fixed_to_int(vbios->blackout_duration));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] maximum_blackout_recovery_time: %d",
+				bw_fixed_to_int(vbios->maximum_blackout_recovery_time));
+
+
+}
+
+static void print_bw_calcs_data(struct dal_logger *logger, struct bw_calcs_data *data)
+{
+
+	int i, j, k;
+
+	DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+	DC_LOG_BANDWIDTH_CALCS("struct bw_calcs_data data");
+	DC_LOG_BANDWIDTH_CALCS("#####################################################################");
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] number_of_displays: %d", data->number_of_displays);
+	DC_LOG_BANDWIDTH_CALCS("	[enum] bw_defines underlay_surface_type: %d", data->underlay_surface_type);
+	DC_LOG_BANDWIDTH_CALCS("	[enum] bw_defines panning_and_bezel_adjustment: %d",
+				data->panning_and_bezel_adjustment);
+	DC_LOG_BANDWIDTH_CALCS("	[enum] bw_defines graphics_tiling_mode: %d", data->graphics_tiling_mode);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] graphics_lb_bpc: %d", data->graphics_lb_bpc);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] underlay_lb_bpc: %d", data->underlay_lb_bpc);
+	DC_LOG_BANDWIDTH_CALCS("	[enum] bw_defines underlay_tiling_mode: %d", data->underlay_tiling_mode);
+	DC_LOG_BANDWIDTH_CALCS("	[enum] bw_defines d0_underlay_mode: %d", data->d0_underlay_mode);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] d1_display_write_back_dwb_enable: %d", data->d1_display_write_back_dwb_enable);
+	DC_LOG_BANDWIDTH_CALCS("	[enum] bw_defines d1_underlay_mode: %d", data->d1_underlay_mode);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] cpup_state_change_enable: %d", data->cpup_state_change_enable);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] cpuc_state_change_enable: %d", data->cpuc_state_change_enable);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] nbp_state_change_enable: %d", data->nbp_state_change_enable);
+	DC_LOG_BANDWIDTH_CALCS("	[bool] stutter_mode_enable: %d", data->stutter_mode_enable);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] y_clk_level: %d", data->y_clk_level);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] sclk_level: %d", data->sclk_level);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] number_of_underlay_surfaces: %d", data->number_of_underlay_surfaces);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] number_of_dram_wrchannels: %d", data->number_of_dram_wrchannels);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] chunk_request_delay: %d", data->chunk_request_delay);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] number_of_dram_channels: %d", data->number_of_dram_channels);
+	DC_LOG_BANDWIDTH_CALCS("	[enum] bw_defines underlay_micro_tile_mode: %d", data->underlay_micro_tile_mode);
+	DC_LOG_BANDWIDTH_CALCS("	[enum] bw_defines graphics_micro_tile_mode: %d", data->graphics_micro_tile_mode);
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] max_phyclk: %d", bw_fixed_to_int(data->max_phyclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dram_efficiency: %d", bw_fixed_to_int(data->dram_efficiency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] src_width_after_surface_type: %d",
+				bw_fixed_to_int(data->src_width_after_surface_type));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] src_height_after_surface_type: %d",
+				bw_fixed_to_int(data->src_height_after_surface_type));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] hsr_after_surface_type: %d",
+				bw_fixed_to_int(data->hsr_after_surface_type));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] vsr_after_surface_type: %d", bw_fixed_to_int(data->vsr_after_surface_type));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] src_width_after_rotation: %d",
+				bw_fixed_to_int(data->src_width_after_rotation));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] src_height_after_rotation: %d",
+				bw_fixed_to_int(data->src_height_after_rotation));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] hsr_after_rotation: %d", bw_fixed_to_int(data->hsr_after_rotation));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] vsr_after_rotation: %d", bw_fixed_to_int(data->vsr_after_rotation));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] source_height_pixels: %d", bw_fixed_to_int(data->source_height_pixels));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] hsr_after_stereo: %d", bw_fixed_to_int(data->hsr_after_stereo));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] vsr_after_stereo: %d", bw_fixed_to_int(data->vsr_after_stereo));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] source_width_in_lb: %d", bw_fixed_to_int(data->source_width_in_lb));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] lb_line_pitch: %d", bw_fixed_to_int(data->lb_line_pitch));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] underlay_maximum_source_efficient_for_tiling: %d",
+				bw_fixed_to_int(data->underlay_maximum_source_efficient_for_tiling));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] num_lines_at_frame_start: %d",
+				bw_fixed_to_int(data->num_lines_at_frame_start));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] min_dmif_size_in_time: %d", bw_fixed_to_int(data->min_dmif_size_in_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] min_mcifwr_size_in_time: %d",
+				bw_fixed_to_int(data->min_mcifwr_size_in_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_requests_for_dmif_size: %d",
+				bw_fixed_to_int(data->total_requests_for_dmif_size));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] peak_pte_request_to_eviction_ratio_limiting: %d",
+				bw_fixed_to_int(data->peak_pte_request_to_eviction_ratio_limiting));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] useful_pte_per_pte_request: %d",
+				bw_fixed_to_int(data->useful_pte_per_pte_request));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] scatter_gather_pte_request_rows: %d",
+				bw_fixed_to_int(data->scatter_gather_pte_request_rows));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] scatter_gather_row_height: %d",
+				bw_fixed_to_int(data->scatter_gather_row_height));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] scatter_gather_pte_requests_in_vblank: %d",
+				bw_fixed_to_int(data->scatter_gather_pte_requests_in_vblank));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] inefficient_linear_pitch_in_bytes: %d",
+				bw_fixed_to_int(data->inefficient_linear_pitch_in_bytes));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] cursor_total_data: %d", bw_fixed_to_int(data->cursor_total_data));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] cursor_total_request_groups: %d",
+				bw_fixed_to_int(data->cursor_total_request_groups));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] scatter_gather_total_pte_requests: %d",
+				bw_fixed_to_int(data->scatter_gather_total_pte_requests));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] scatter_gather_total_pte_request_groups: %d",
+				bw_fixed_to_int(data->scatter_gather_total_pte_request_groups));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] tile_width_in_pixels: %d", bw_fixed_to_int(data->tile_width_in_pixels));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dmif_total_number_of_data_request_page_close_open: %d",
+				bw_fixed_to_int(data->dmif_total_number_of_data_request_page_close_open));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mcifwr_total_number_of_data_request_page_close_open: %d",
+				bw_fixed_to_int(data->mcifwr_total_number_of_data_request_page_close_open));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] bytes_per_page_close_open: %d",
+				bw_fixed_to_int(data->bytes_per_page_close_open));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mcifwr_total_page_close_open_time: %d",
+				bw_fixed_to_int(data->mcifwr_total_page_close_open_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_requests_for_adjusted_dmif_size: %d",
+				bw_fixed_to_int(data->total_requests_for_adjusted_dmif_size));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_dmifmc_urgent_trips: %d",
+				bw_fixed_to_int(data->total_dmifmc_urgent_trips));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_dmifmc_urgent_latency: %d",
+				bw_fixed_to_int(data->total_dmifmc_urgent_latency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_display_reads_required_data: %d",
+				bw_fixed_to_int(data->total_display_reads_required_data));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_display_reads_required_dram_access_data: %d",
+				bw_fixed_to_int(data->total_display_reads_required_dram_access_data));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_display_writes_required_data: %d",
+				bw_fixed_to_int(data->total_display_writes_required_data));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_display_writes_required_dram_access_data: %d",
+				bw_fixed_to_int(data->total_display_writes_required_dram_access_data));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] display_reads_required_data: %d",
+				bw_fixed_to_int(data->display_reads_required_data));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] display_reads_required_dram_access_data: %d",
+				bw_fixed_to_int(data->display_reads_required_dram_access_data));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dmif_total_page_close_open_time: %d",
+				bw_fixed_to_int(data->dmif_total_page_close_open_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] min_cursor_memory_interface_buffer_size_in_time: %d",
+				bw_fixed_to_int(data->min_cursor_memory_interface_buffer_size_in_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] min_read_buffer_size_in_time: %d",
+				bw_fixed_to_int(data->min_read_buffer_size_in_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] display_reads_time_for_data_transfer: %d",
+				bw_fixed_to_int(data->display_reads_time_for_data_transfer));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] display_writes_time_for_data_transfer: %d",
+				bw_fixed_to_int(data->display_writes_time_for_data_transfer));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dmif_required_dram_bandwidth: %d",
+				bw_fixed_to_int(data->dmif_required_dram_bandwidth));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mcifwr_required_dram_bandwidth: %d",
+				bw_fixed_to_int(data->mcifwr_required_dram_bandwidth));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] required_dmifmc_urgent_latency_for_page_close_open: %d",
+				bw_fixed_to_int(data->required_dmifmc_urgent_latency_for_page_close_open));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] required_mcifmcwr_urgent_latency: %d",
+				bw_fixed_to_int(data->required_mcifmcwr_urgent_latency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] required_dram_bandwidth_gbyte_per_second: %d",
+				bw_fixed_to_int(data->required_dram_bandwidth_gbyte_per_second));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dram_bandwidth: %d", bw_fixed_to_int(data->dram_bandwidth));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dmif_required_sclk: %d", bw_fixed_to_int(data->dmif_required_sclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mcifwr_required_sclk: %d", bw_fixed_to_int(data->mcifwr_required_sclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] required_sclk: %d", bw_fixed_to_int(data->required_sclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] downspread_factor: %d", bw_fixed_to_int(data->downspread_factor));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] v_scaler_efficiency: %d", bw_fixed_to_int(data->v_scaler_efficiency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] scaler_limits_factor: %d", bw_fixed_to_int(data->scaler_limits_factor));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] display_pipe_pixel_throughput: %d",
+				bw_fixed_to_int(data->display_pipe_pixel_throughput));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_dispclk_required_with_ramping: %d",
+				bw_fixed_to_int(data->total_dispclk_required_with_ramping));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_dispclk_required_without_ramping: %d",
+				bw_fixed_to_int(data->total_dispclk_required_without_ramping));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_read_request_bandwidth: %d",
+				bw_fixed_to_int(data->total_read_request_bandwidth));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_write_request_bandwidth: %d",
+				bw_fixed_to_int(data->total_write_request_bandwidth));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dispclk_required_for_total_read_request_bandwidth: %d",
+				bw_fixed_to_int(data->dispclk_required_for_total_read_request_bandwidth));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_dispclk_required_with_ramping_with_request_bandwidth: %d",
+				bw_fixed_to_int(data->total_dispclk_required_with_ramping_with_request_bandwidth));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_dispclk_required_without_ramping_with_request_bandwidth: %d",
+				bw_fixed_to_int(data->total_dispclk_required_without_ramping_with_request_bandwidth));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dispclk: %d", bw_fixed_to_int(data->dispclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] blackout_recovery_time: %d", bw_fixed_to_int(data->blackout_recovery_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] min_pixels_per_data_fifo_entry: %d",
+				bw_fixed_to_int(data->min_pixels_per_data_fifo_entry));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] sclk_deep_sleep: %d", bw_fixed_to_int(data->sclk_deep_sleep));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] chunk_request_time: %d", bw_fixed_to_int(data->chunk_request_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] cursor_request_time: %d", bw_fixed_to_int(data->cursor_request_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] line_source_pixels_transfer_time: %d",
+				bw_fixed_to_int(data->line_source_pixels_transfer_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dmifdram_access_efficiency: %d",
+				bw_fixed_to_int(data->dmifdram_access_efficiency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mcifwrdram_access_efficiency: %d",
+				bw_fixed_to_int(data->mcifwrdram_access_efficiency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_average_bandwidth_no_compression: %d",
+				bw_fixed_to_int(data->total_average_bandwidth_no_compression));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_average_bandwidth: %d",
+				bw_fixed_to_int(data->total_average_bandwidth));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] total_stutter_cycle_duration: %d",
+				bw_fixed_to_int(data->total_stutter_cycle_duration));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] stutter_burst_time: %d", bw_fixed_to_int(data->stutter_burst_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] time_in_self_refresh: %d", bw_fixed_to_int(data->time_in_self_refresh));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] stutter_efficiency: %d", bw_fixed_to_int(data->stutter_efficiency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] worst_number_of_trips_to_memory: %d",
+				bw_fixed_to_int(data->worst_number_of_trips_to_memory));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] immediate_flip_time: %d", bw_fixed_to_int(data->immediate_flip_time));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] latency_for_non_dmif_clients: %d",
+				bw_fixed_to_int(data->latency_for_non_dmif_clients));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] latency_for_non_mcifwr_clients: %d",
+				bw_fixed_to_int(data->latency_for_non_mcifwr_clients));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dmifmc_urgent_latency_supported_in_high_sclk_and_yclk: %d",
+				bw_fixed_to_int(data->dmifmc_urgent_latency_supported_in_high_sclk_and_yclk));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] nbp_state_dram_speed_change_margin: %d",
+				bw_fixed_to_int(data->nbp_state_dram_speed_change_margin));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] display_reads_time_for_data_transfer_and_urgent_latency: %d",
+				bw_fixed_to_int(data->display_reads_time_for_data_transfer_and_urgent_latency));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dram_speed_change_margin: %d",
+				bw_fixed_to_int(data->dram_speed_change_margin));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] min_vblank_dram_speed_change_margin: %d",
+				bw_fixed_to_int(data->min_vblank_dram_speed_change_margin));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] min_stutter_refresh_duration: %d",
+				bw_fixed_to_int(data->min_stutter_refresh_duration));
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] total_stutter_dmif_buffer_size: %d", data->total_stutter_dmif_buffer_size);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] total_bytes_requested: %d", data->total_bytes_requested);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] min_stutter_dmif_buffer_size: %d", data->min_stutter_dmif_buffer_size);
+	DC_LOG_BANDWIDTH_CALCS("	[uint32_t] num_stutter_bursts: %d", data->num_stutter_bursts);
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] v_blank_nbp_state_dram_speed_change_latency_supported: %d",
+				bw_fixed_to_int(data->v_blank_nbp_state_dram_speed_change_latency_supported));
+	DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] nbp_state_dram_speed_change_latency_supported: %d",
+				bw_fixed_to_int(data->nbp_state_dram_speed_change_latency_supported));
+
+	for (i = 0; i < maximum_number_of_surfaces; i++) {
+		DC_LOG_BANDWIDTH_CALCS("	[bool] fbc_en[%d]:%d\n", i, data->fbc_en[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[bool] lpt_en[%d]:%d", i, data->lpt_en[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[bool] displays_match_flag[%d]:%d", i, data->displays_match_flag[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[bool] use_alpha[%d]:%d", i, data->use_alpha[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[bool] orthogonal_rotation[%d]:%d", i, data->orthogonal_rotation[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[bool] enable[%d]:%d", i, data->enable[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[bool] access_one_channel_only[%d]:%d", i, data->access_one_channel_only[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[bool] scatter_gather_enable_for_pipe[%d]:%d",
+					i, data->scatter_gather_enable_for_pipe[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[bool] interlace_mode[%d]:%d",
+					i, data->interlace_mode[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[bool] display_pstate_change_enable[%d]:%d",
+					i, data->display_pstate_change_enable[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[bool] line_buffer_prefetch[%d]:%d", i, data->line_buffer_prefetch[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[uint32_t] bytes_per_pixel[%d]:%d", i, data->bytes_per_pixel[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[uint32_t] max_chunks_non_fbc_mode[%d]:%d",
+					i, data->max_chunks_non_fbc_mode[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[uint32_t] lb_bpc[%d]:%d", i, data->lb_bpc[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[uint32_t] output_bpphdmi[%d]:%d", i, data->output_bpphdmi[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[uint32_t] output_bppdp4_lane_hbr[%d]:%d", i, data->output_bppdp4_lane_hbr[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[uint32_t] output_bppdp4_lane_hbr2[%d]:%d",
+					i, data->output_bppdp4_lane_hbr2[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[uint32_t] output_bppdp4_lane_hbr3[%d]:%d",
+					i, data->output_bppdp4_lane_hbr3[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[enum] bw_defines stereo_mode[%d]:%d", i, data->stereo_mode[i]);
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dmif_buffer_transfer_time[%d]:%d",
+					i, bw_fixed_to_int(data->dmif_buffer_transfer_time[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] displays_with_same_mode[%d]:%d",
+					i, bw_fixed_to_int(data->displays_with_same_mode[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] stutter_dmif_buffer_size[%d]:%d",
+					i, bw_fixed_to_int(data->stutter_dmif_buffer_size[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] stutter_refresh_duration[%d]:%d",
+					i, bw_fixed_to_int(data->stutter_refresh_duration[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] stutter_exit_watermark[%d]:%d",
+					i, bw_fixed_to_int(data->stutter_exit_watermark[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] stutter_entry_watermark[%d]:%d",
+					i, bw_fixed_to_int(data->stutter_entry_watermark[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] h_total[%d]:%d", i, bw_fixed_to_int(data->h_total[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] v_total[%d]:%d", i, bw_fixed_to_int(data->v_total[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] pixel_rate[%d]:%d", i, bw_fixed_to_int(data->pixel_rate[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] src_width[%d]:%d", i, bw_fixed_to_int(data->src_width[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] pitch_in_pixels[%d]:%d",
+					i, bw_fixed_to_int(data->pitch_in_pixels[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] pitch_in_pixels_after_surface_type[%d]:%d",
+					i, bw_fixed_to_int(data->pitch_in_pixels_after_surface_type[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] src_height[%d]:%d", i, bw_fixed_to_int(data->src_height[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] scale_ratio[%d]:%d", i, bw_fixed_to_int(data->scale_ratio[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] h_taps[%d]:%d", i, bw_fixed_to_int(data->h_taps[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] v_taps[%d]:%d", i, bw_fixed_to_int(data->v_taps[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] h_scale_ratio[%d]:%d", i, bw_fixed_to_int(data->h_scale_ratio[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] v_scale_ratio[%d]:%d", i, bw_fixed_to_int(data->v_scale_ratio[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] rotation_angle[%d]:%d",
+					i, bw_fixed_to_int(data->rotation_angle[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] compression_rate[%d]:%d",
+					i, bw_fixed_to_int(data->compression_rate[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] hsr[%d]:%d", i, bw_fixed_to_int(data->hsr[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] vsr[%d]:%d", i, bw_fixed_to_int(data->vsr[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] source_width_rounded_up_to_chunks[%d]:%d",
+					i, bw_fixed_to_int(data->source_width_rounded_up_to_chunks[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] source_width_pixels[%d]:%d",
+					i, bw_fixed_to_int(data->source_width_pixels[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] source_height_rounded_up_to_chunks[%d]:%d",
+					i, bw_fixed_to_int(data->source_height_rounded_up_to_chunks[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] display_bandwidth[%d]:%d",
+					i, bw_fixed_to_int(data->display_bandwidth[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] request_bandwidth[%d]:%d",
+					i, bw_fixed_to_int(data->request_bandwidth[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] bytes_per_request[%d]:%d",
+					i, bw_fixed_to_int(data->bytes_per_request[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] useful_bytes_per_request[%d]:%d",
+					i, bw_fixed_to_int(data->useful_bytes_per_request[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] lines_interleaved_in_mem_access[%d]:%d",
+					i, bw_fixed_to_int(data->lines_interleaved_in_mem_access[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] latency_hiding_lines[%d]:%d",
+					i, bw_fixed_to_int(data->latency_hiding_lines[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] lb_partitions[%d]:%d",
+					i, bw_fixed_to_int(data->lb_partitions[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] lb_partitions_max[%d]:%d",
+					i, bw_fixed_to_int(data->lb_partitions_max[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dispclk_required_with_ramping[%d]:%d",
+					i, bw_fixed_to_int(data->dispclk_required_with_ramping[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dispclk_required_without_ramping[%d]:%d",
+					i, bw_fixed_to_int(data->dispclk_required_without_ramping[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] data_buffer_size[%d]:%d",
+					i, bw_fixed_to_int(data->data_buffer_size[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] outstanding_chunk_request_limit[%d]:%d",
+					i, bw_fixed_to_int(data->outstanding_chunk_request_limit[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] urgent_watermark[%d]:%d",
+					i, bw_fixed_to_int(data->urgent_watermark[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] nbp_state_change_watermark[%d]:%d",
+					i, bw_fixed_to_int(data->nbp_state_change_watermark[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] v_filter_init[%d]:%d", i, bw_fixed_to_int(data->v_filter_init[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] stutter_cycle_duration[%d]:%d",
+					i, bw_fixed_to_int(data->stutter_cycle_duration[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] average_bandwidth[%d]:%d",
+					i, bw_fixed_to_int(data->average_bandwidth[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] average_bandwidth_no_compression[%d]:%d",
+					i, bw_fixed_to_int(data->average_bandwidth_no_compression[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] scatter_gather_pte_request_limit[%d]:%d",
+					i, bw_fixed_to_int(data->scatter_gather_pte_request_limit[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] lb_size_per_component[%d]:%d",
+					i, bw_fixed_to_int(data->lb_size_per_component[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] memory_chunk_size_in_bytes[%d]:%d",
+					i, bw_fixed_to_int(data->memory_chunk_size_in_bytes[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] pipe_chunk_size_in_bytes[%d]:%d",
+					i, bw_fixed_to_int(data->pipe_chunk_size_in_bytes[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] number_of_trips_to_memory_for_getting_apte_row[%d]:%d",
+					i, bw_fixed_to_int(data->number_of_trips_to_memory_for_getting_apte_row[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] adjusted_data_buffer_size[%d]:%d",
+					i, bw_fixed_to_int(data->adjusted_data_buffer_size[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] adjusted_data_buffer_size_in_memory[%d]:%d",
+					i, bw_fixed_to_int(data->adjusted_data_buffer_size_in_memory[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] pixels_per_data_fifo_entry[%d]:%d",
+					i, bw_fixed_to_int(data->pixels_per_data_fifo_entry[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] scatter_gather_pte_requests_in_row[%d]:%d",
+					i, bw_fixed_to_int(data->scatter_gather_pte_requests_in_row[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] pte_request_per_chunk[%d]:%d",
+					i, bw_fixed_to_int(data->pte_request_per_chunk[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] scatter_gather_page_width[%d]:%d",
+					i, bw_fixed_to_int(data->scatter_gather_page_width[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] scatter_gather_page_height[%d]:%d",
+					i, bw_fixed_to_int(data->scatter_gather_page_height[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] lb_lines_in_per_line_out_in_beginning_of_frame[%d]:%d",
+					i, bw_fixed_to_int(data->lb_lines_in_per_line_out_in_beginning_of_frame[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] lb_lines_in_per_line_out_in_middle_of_frame[%d]:%d",
+					i, bw_fixed_to_int(data->lb_lines_in_per_line_out_in_middle_of_frame[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] cursor_width_pixels[%d]:%d",
+					i, bw_fixed_to_int(data->cursor_width_pixels[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] minimum_latency_hiding[%d]:%d",
+					i, bw_fixed_to_int(data->minimum_latency_hiding[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] maximum_latency_hiding[%d]:%d",
+					i, bw_fixed_to_int(data->maximum_latency_hiding[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] minimum_latency_hiding_with_cursor[%d]:%d",
+					i, bw_fixed_to_int(data->minimum_latency_hiding_with_cursor[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] maximum_latency_hiding_with_cursor[%d]:%d",
+					i, bw_fixed_to_int(data->maximum_latency_hiding_with_cursor[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] src_pixels_for_first_output_pixel[%d]:%d",
+					i, bw_fixed_to_int(data->src_pixels_for_first_output_pixel[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] src_pixels_for_last_output_pixel[%d]:%d",
+					i, bw_fixed_to_int(data->src_pixels_for_last_output_pixel[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] src_data_for_first_output_pixel[%d]:%d",
+					i, bw_fixed_to_int(data->src_data_for_first_output_pixel[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] src_data_for_last_output_pixel[%d]:%d",
+					i, bw_fixed_to_int(data->src_data_for_last_output_pixel[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] active_time[%d]:%d", i, bw_fixed_to_int(data->active_time[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] horizontal_blank_and_chunk_granularity_factor[%d]:%d",
+					i, bw_fixed_to_int(data->horizontal_blank_and_chunk_granularity_factor[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] cursor_latency_hiding[%d]:%d",
+					i, bw_fixed_to_int(data->cursor_latency_hiding[i]));
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] v_blank_dram_speed_change_margin[%d]:%d",
+					i, bw_fixed_to_int(data->v_blank_dram_speed_change_margin[i]));
+		}
+
+	for (i = 0; i < maximum_number_of_surfaces; i++) {
+		for (j = 0; j < 3; j++) {
+			for (k = 0; k < 8; k++) {
+
+				DC_LOG_BANDWIDTH_CALCS("\n	[bw_fixed] line_source_transfer_time[%d][%d][%d]:%d",
+					i, j, k, bw_fixed_to_int(data->line_source_transfer_time[i][j][k]));
+				DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dram_speed_change_line_source_transfer_time[%d][%d][%d]:%d",
+					i, j, k,
+					bw_fixed_to_int(data->dram_speed_change_line_source_transfer_time[i][j][k]));
+			}
+		}
+	}
+
+	for (i = 0; i < 3; i++) {
+		for (j = 0; j < 8; j++) {
+
+			DC_LOG_BANDWIDTH_CALCS("\n	[uint32_t] num_displays_with_margin[%d][%d]:%d",
+					i, j, data->num_displays_with_margin[i][j]);
+			DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dmif_burst_time[%d][%d]:%d",
+					i, j, bw_fixed_to_int(data->dmif_burst_time[i][j]));
+			DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] mcifwr_burst_time[%d][%d]:%d",
+					i, j, bw_fixed_to_int(data->mcifwr_burst_time[i][j]));
+			DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] min_dram_speed_change_margin[%d][%d]:%d",
+					i, j, bw_fixed_to_int(data->min_dram_speed_change_margin[i][j]));
+			DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dispclk_required_for_dram_speed_change[%d][%d]:%d",
+					i, j, bw_fixed_to_int(data->dispclk_required_for_dram_speed_change[i][j]));
+			DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] blackout_duration_margin[%d][%d]:%d",
+					i, j, bw_fixed_to_int(data->blackout_duration_margin[i][j]));
+			DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dispclk_required_for_blackout_duration[%d][%d]:%d",
+					i, j, bw_fixed_to_int(data->dispclk_required_for_blackout_duration[i][j]));
+			DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dispclk_required_for_blackout_recovery[%d][%d]:%d",
+					i, j, bw_fixed_to_int(data->dispclk_required_for_blackout_recovery[i][j]));
+		}
+	}
+
+	for (i = 0; i < 6; i++) {
+		DC_LOG_BANDWIDTH_CALCS("	[bw_fixed] dmif_required_sclk_for_urgent_latency[%d]:%d",
+					i, bw_fixed_to_int(data->dmif_required_sclk_for_urgent_latency[i]));
+	}
+}
+;
+
+#endif /* _CALCS_CALCS_LOGGER_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c b/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c
index 7243c37..31d167b 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/custom_float.c
@@ -36,41 +36,41 @@
 	uint32_t exp_offset = (1 << (format->exponenta_bits - 1)) - 1;
 
 	const struct fixed31_32 mantissa_constant_plus_max_fraction =
-		dal_fixed31_32_from_fraction(
+		dc_fixpt_from_fraction(
 			(1LL << (format->mantissa_bits + 1)) - 1,
 			1LL << format->mantissa_bits);
 
 	struct fixed31_32 mantiss;
 
-	if (dal_fixed31_32_eq(
+	if (dc_fixpt_eq(
 		value,
-		dal_fixed31_32_zero)) {
+		dc_fixpt_zero)) {
 		*negative = false;
 		*mantissa = 0;
 		*exponenta = 0;
 		return true;
 	}
 
-	if (dal_fixed31_32_lt(
+	if (dc_fixpt_lt(
 		value,
-		dal_fixed31_32_zero)) {
+		dc_fixpt_zero)) {
 		*negative = format->sign;
-		value = dal_fixed31_32_neg(value);
+		value = dc_fixpt_neg(value);
 	} else {
 		*negative = false;
 	}
 
-	if (dal_fixed31_32_lt(
+	if (dc_fixpt_lt(
 		value,
-		dal_fixed31_32_one)) {
+		dc_fixpt_one)) {
 		uint32_t i = 1;
 
 		do {
-			value = dal_fixed31_32_shl(value, 1);
+			value = dc_fixpt_shl(value, 1);
 			++i;
-		} while (dal_fixed31_32_lt(
+		} while (dc_fixpt_lt(
 			value,
-			dal_fixed31_32_one));
+			dc_fixpt_one));
 
 		--i;
 
@@ -81,15 +81,15 @@
 		}
 
 		*exponenta = exp_offset - i;
-	} else if (dal_fixed31_32_le(
+	} else if (dc_fixpt_le(
 		mantissa_constant_plus_max_fraction,
 		value)) {
 		uint32_t i = 1;
 
 		do {
-			value = dal_fixed31_32_shr(value, 1);
+			value = dc_fixpt_shr(value, 1);
 			++i;
-		} while (dal_fixed31_32_lt(
+		} while (dc_fixpt_lt(
 			mantissa_constant_plus_max_fraction,
 			value));
 
@@ -98,23 +98,23 @@
 		*exponenta = exp_offset;
 	}
 
-	mantiss = dal_fixed31_32_sub(
+	mantiss = dc_fixpt_sub(
 		value,
-		dal_fixed31_32_one);
+		dc_fixpt_one);
 
-	if (dal_fixed31_32_lt(
+	if (dc_fixpt_lt(
 			mantiss,
-			dal_fixed31_32_zero) ||
-		dal_fixed31_32_lt(
-			dal_fixed31_32_one,
+			dc_fixpt_zero) ||
+		dc_fixpt_lt(
+			dc_fixpt_one,
 			mantiss))
-		mantiss = dal_fixed31_32_zero;
+		mantiss = dc_fixpt_zero;
 	else
-		mantiss = dal_fixed31_32_shl(
+		mantiss = dc_fixpt_shl(
 			mantiss,
 			format->mantissa_bits);
 
-	*mantissa = dal_fixed31_32_floor(mantiss);
+	*mantissa = dc_fixpt_floor(mantiss);
 
 	return true;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
index 0cbab81..2c4e8f0 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
@@ -28,6 +28,7 @@
 #include "dc.h"
 #include "core_types.h"
 #include "dal_asic_id.h"
+#include "calcs_logger.h"
 
 /*
  * NOTE:
@@ -52,11 +53,14 @@
 		return BW_CALCS_VERSION_CARRIZO;
 
 	case FAMILY_VI:
+		if (ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev))
+			return BW_CALCS_VERSION_POLARIS12;
 		if (ASIC_REV_IS_POLARIS10_P(asic_id.hw_internal_rev))
 			return BW_CALCS_VERSION_POLARIS10;
-		if (ASIC_REV_IS_POLARIS11_M(asic_id.hw_internal_rev) ||
-				ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev))
+		if (ASIC_REV_IS_POLARIS11_M(asic_id.hw_internal_rev))
 			return BW_CALCS_VERSION_POLARIS11;
+		if (ASIC_REV_IS_VEGAM(asic_id.hw_internal_rev))
+			return BW_CALCS_VERSION_VEGAM;
 		return BW_CALCS_VERSION_INVALID;
 
 	case FAMILY_AI:
@@ -2145,6 +2149,9 @@
 		dceip.mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0); /* todo: this is a bug*/
 		break;
 	case BW_CALCS_VERSION_POLARIS10:
+		/* TODO: Treat VEGAM the same as P10 for now
+		 * Need to tune the para for VEGAM if needed */
+	case BW_CALCS_VERSION_VEGAM:
 		vbios.memory_type = bw_def_gddr5;
 		vbios.dram_channel_width_in_bits = 32;
 		vbios.number_of_dram_channels = asic_id.vram_width / vbios.dram_channel_width_in_bits;
@@ -2373,6 +2380,122 @@
 		dceip.scatter_gather_pte_request_rows_in_tiling_mode = 2;
 		dceip.mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0);
 		break;
+	case BW_CALCS_VERSION_POLARIS12:
+		vbios.memory_type = bw_def_gddr5;
+		vbios.dram_channel_width_in_bits = 32;
+		vbios.number_of_dram_channels = asic_id.vram_width / vbios.dram_channel_width_in_bits;
+		vbios.number_of_dram_banks = 8;
+		vbios.high_yclk = bw_int_to_fixed(6000);
+		vbios.mid_yclk = bw_int_to_fixed(3200);
+		vbios.low_yclk = bw_int_to_fixed(1000);
+		vbios.low_sclk = bw_int_to_fixed(678);
+		vbios.mid1_sclk = bw_int_to_fixed(864);
+		vbios.mid2_sclk = bw_int_to_fixed(900);
+		vbios.mid3_sclk = bw_int_to_fixed(920);
+		vbios.mid4_sclk = bw_int_to_fixed(940);
+		vbios.mid5_sclk = bw_int_to_fixed(960);
+		vbios.mid6_sclk = bw_int_to_fixed(980);
+		vbios.high_sclk = bw_int_to_fixed(1049);
+		vbios.low_voltage_max_dispclk = bw_int_to_fixed(459);
+		vbios.mid_voltage_max_dispclk = bw_int_to_fixed(654);
+		vbios.high_voltage_max_dispclk = bw_int_to_fixed(1108);
+		vbios.low_voltage_max_phyclk = bw_int_to_fixed(540);
+		vbios.mid_voltage_max_phyclk = bw_int_to_fixed(810);
+		vbios.high_voltage_max_phyclk = bw_int_to_fixed(810);
+		vbios.data_return_bus_width = bw_int_to_fixed(32);
+		vbios.trc = bw_int_to_fixed(48);
+		if (vbios.number_of_dram_channels == 2) // 64-bit
+			vbios.dmifmc_urgent_latency = bw_int_to_fixed(4);
+		else
+			vbios.dmifmc_urgent_latency = bw_int_to_fixed(3);
+		vbios.stutter_self_refresh_exit_latency = bw_int_to_fixed(5);
+		vbios.stutter_self_refresh_entry_latency = bw_int_to_fixed(0);
+		vbios.nbp_state_change_latency = bw_int_to_fixed(250);
+		vbios.mcifwrmc_urgent_latency = bw_int_to_fixed(10);
+		vbios.scatter_gather_enable = false;
+		vbios.down_spread_percentage = bw_frc_to_fixed(5, 10);
+		vbios.cursor_width = 32;
+		vbios.average_compression_rate = 4;
+		vbios.number_of_request_slots_gmc_reserves_for_dmif_per_channel = 256;
+		vbios.blackout_duration = bw_int_to_fixed(0); /* us */
+		vbios.maximum_blackout_recovery_time = bw_int_to_fixed(0);
+
+		dceip.max_average_percent_of_ideal_port_bw_display_can_use_in_normal_system_operation = 100;
+		dceip.max_average_percent_of_ideal_drambw_display_can_use_in_normal_system_operation = 100;
+		dceip.percent_of_ideal_port_bw_received_after_urgent_latency = 100;
+		dceip.large_cursor = false;
+		dceip.dmif_request_buffer_size = bw_int_to_fixed(768);
+		dceip.dmif_pipe_en_fbc_chunk_tracker = false;
+		dceip.cursor_max_outstanding_group_num = 1;
+		dceip.lines_interleaved_into_lb = 2;
+		dceip.chunk_width = 256;
+		dceip.number_of_graphics_pipes = 5;
+		dceip.number_of_underlay_pipes = 0;
+		dceip.low_power_tiling_mode = 0;
+		dceip.display_write_back_supported = true;
+		dceip.argb_compression_support = true;
+		dceip.underlay_vscaler_efficiency6_bit_per_component =
+			bw_frc_to_fixed(35556, 10000);
+		dceip.underlay_vscaler_efficiency8_bit_per_component =
+			bw_frc_to_fixed(34286, 10000);
+		dceip.underlay_vscaler_efficiency10_bit_per_component =
+			bw_frc_to_fixed(32, 10);
+		dceip.underlay_vscaler_efficiency12_bit_per_component =
+			bw_int_to_fixed(3);
+		dceip.graphics_vscaler_efficiency6_bit_per_component =
+			bw_frc_to_fixed(35, 10);
+		dceip.graphics_vscaler_efficiency8_bit_per_component =
+			bw_frc_to_fixed(34286, 10000);
+		dceip.graphics_vscaler_efficiency10_bit_per_component =
+			bw_frc_to_fixed(32, 10);
+		dceip.graphics_vscaler_efficiency12_bit_per_component =
+			bw_int_to_fixed(3);
+		dceip.alpha_vscaler_efficiency = bw_int_to_fixed(3);
+		dceip.max_dmif_buffer_allocated = 4;
+		dceip.graphics_dmif_size = 12288;
+		dceip.underlay_luma_dmif_size = 19456;
+		dceip.underlay_chroma_dmif_size = 23552;
+		dceip.pre_downscaler_enabled = true;
+		dceip.underlay_downscale_prefetch_enabled = true;
+		dceip.lb_write_pixels_per_dispclk = bw_int_to_fixed(1);
+		dceip.lb_size_per_component444 = bw_int_to_fixed(245952);
+		dceip.graphics_lb_nodownscaling_multi_line_prefetching = true;
+		dceip.stutter_and_dram_clock_state_change_gated_before_cursor =
+			bw_int_to_fixed(1);
+		dceip.underlay420_luma_lb_size_per_component = bw_int_to_fixed(
+			82176);
+		dceip.underlay420_chroma_lb_size_per_component =
+			bw_int_to_fixed(164352);
+		dceip.underlay422_lb_size_per_component = bw_int_to_fixed(
+			82176);
+		dceip.cursor_chunk_width = bw_int_to_fixed(64);
+		dceip.cursor_dcp_buffer_lines = bw_int_to_fixed(4);
+		dceip.underlay_maximum_width_efficient_for_tiling =
+			bw_int_to_fixed(1920);
+		dceip.underlay_maximum_height_efficient_for_tiling =
+			bw_int_to_fixed(1080);
+		dceip.peak_pte_request_to_eviction_ratio_limiting_multiple_displays_or_single_rotated_display =
+			bw_frc_to_fixed(3, 10);
+		dceip.peak_pte_request_to_eviction_ratio_limiting_single_display_no_rotation =
+			bw_int_to_fixed(25);
+		dceip.minimum_outstanding_pte_request_limit = bw_int_to_fixed(
+			2);
+		dceip.maximum_total_outstanding_pte_requests_allowed_by_saw =
+			bw_int_to_fixed(128);
+		dceip.limit_excessive_outstanding_dmif_requests = true;
+		dceip.linear_mode_line_request_alternation_slice =
+			bw_int_to_fixed(64);
+		dceip.scatter_gather_lines_of_pte_prefetching_in_linear_mode =
+			32;
+		dceip.display_write_back420_luma_mcifwr_buffer_size = 12288;
+		dceip.display_write_back420_chroma_mcifwr_buffer_size = 8192;
+		dceip.request_efficiency = bw_frc_to_fixed(8, 10);
+		dceip.dispclk_per_request = bw_int_to_fixed(2);
+		dceip.dispclk_ramping_factor = bw_frc_to_fixed(105, 100);
+		dceip.display_pipe_throughput_factor = bw_frc_to_fixed(105, 100);
+		dceip.scatter_gather_pte_request_rows_in_tiling_mode = 2;
+		dceip.mcifwr_all_surfaces_burst_time = bw_int_to_fixed(0);
+		break;
 	case BW_CALCS_VERSION_STONEY:
 		vbios.memory_type = bw_def_gddr5;
 		vbios.dram_channel_width_in_bits = 64;
@@ -2815,6 +2938,19 @@
 				data->bytes_per_pixel[num_displays + 4] = 4;
 				break;
 			}
+		} else if (pipe[i].stream->dst.width != 0 &&
+					pipe[i].stream->dst.height != 0 &&
+					pipe[i].stream->src.width != 0 &&
+					pipe[i].stream->src.height != 0) {
+			data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->src.width);
+			data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4];
+			data->src_height[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->src.height);
+			data->h_taps[num_displays + 4] = pipe[i].stream->src.width == pipe[i].stream->dst.width ? bw_int_to_fixed(1) : bw_int_to_fixed(2);
+			data->v_taps[num_displays + 4] = pipe[i].stream->src.height == pipe[i].stream->dst.height ? bw_int_to_fixed(1) : bw_int_to_fixed(2);
+			data->h_scale_ratio[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->src.width, pipe[i].stream->dst.width);
+			data->v_scale_ratio[num_displays + 4] = bw_frc_to_fixed(pipe[i].stream->src.height, pipe[i].stream->dst.height);
+			data->rotation_angle[num_displays + 4] = bw_int_to_fixed(0);
+			data->bytes_per_pixel[num_displays + 4] = 4;
 		} else {
 			data->src_width[num_displays + 4] = bw_int_to_fixed(pipe[i].stream->timing.h_addressable);
 			data->pitch_in_pixels[num_displays + 4] = data->src_width[num_displays + 4];
@@ -2873,6 +3009,11 @@
 		struct bw_fixed mid_yclk = vbios->mid_yclk;
 		struct bw_fixed low_yclk = vbios->low_yclk;
 
+		if (ctx->dc->debug.bandwidth_calcs_trace) {
+			print_bw_calcs_dceip(ctx->logger, dceip);
+			print_bw_calcs_vbios(ctx->logger, vbios);
+			print_bw_calcs_data(ctx->logger, data);
+		}
 		calculate_bandwidth(dceip, vbios, data);
 
 		yclk_lvl = data->y_clk_level;
@@ -2968,7 +3109,33 @@
 			bw_fixed_to_int(bw_mul(data->
 				stutter_exit_watermark[9], bw_int_to_fixed(1000)));
 
-
+		calcs_output->stutter_entry_wm_ns[0].a_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+		calcs_output->stutter_entry_wm_ns[1].a_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+		calcs_output->stutter_entry_wm_ns[2].a_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+		if (ctx->dc->caps.max_slave_planes) {
+			calcs_output->stutter_entry_wm_ns[3].a_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[0], bw_int_to_fixed(1000)));
+			calcs_output->stutter_entry_wm_ns[4].a_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[1], bw_int_to_fixed(1000)));
+		} else {
+			calcs_output->stutter_entry_wm_ns[3].a_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[7], bw_int_to_fixed(1000)));
+			calcs_output->stutter_entry_wm_ns[4].a_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[8], bw_int_to_fixed(1000)));
+		}
+		calcs_output->stutter_entry_wm_ns[5].a_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[9], bw_int_to_fixed(1000)));
 
 		calcs_output->urgent_wm_ns[0].a_mark =
 			bw_fixed_to_int(bw_mul(data->
@@ -3063,7 +3230,33 @@
 				bw_fixed_to_int(bw_mul(data->
 					stutter_exit_watermark[9], bw_int_to_fixed(1000)));
 
-
+		calcs_output->stutter_entry_wm_ns[0].b_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+		calcs_output->stutter_entry_wm_ns[1].b_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+		calcs_output->stutter_entry_wm_ns[2].b_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+		if (ctx->dc->caps.max_slave_planes) {
+			calcs_output->stutter_entry_wm_ns[3].b_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[0], bw_int_to_fixed(1000)));
+			calcs_output->stutter_entry_wm_ns[4].b_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[1], bw_int_to_fixed(1000)));
+		} else {
+			calcs_output->stutter_entry_wm_ns[3].b_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[7], bw_int_to_fixed(1000)));
+			calcs_output->stutter_entry_wm_ns[4].b_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[8], bw_int_to_fixed(1000)));
+		}
+		calcs_output->stutter_entry_wm_ns[5].b_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[9], bw_int_to_fixed(1000)));
 
 			calcs_output->urgent_wm_ns[0].b_mark =
 				bw_fixed_to_int(bw_mul(data->
@@ -3156,6 +3349,34 @@
 				bw_fixed_to_int(bw_mul(data->
 					stutter_exit_watermark[9], bw_int_to_fixed(1000)));
 
+		calcs_output->stutter_entry_wm_ns[0].c_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+		calcs_output->stutter_entry_wm_ns[1].c_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+		calcs_output->stutter_entry_wm_ns[2].c_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+		if (ctx->dc->caps.max_slave_planes) {
+			calcs_output->stutter_entry_wm_ns[3].c_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[0], bw_int_to_fixed(1000)));
+			calcs_output->stutter_entry_wm_ns[4].c_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[1], bw_int_to_fixed(1000)));
+		} else {
+			calcs_output->stutter_entry_wm_ns[3].c_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[7], bw_int_to_fixed(1000)));
+			calcs_output->stutter_entry_wm_ns[4].c_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[8], bw_int_to_fixed(1000)));
+		}
+		calcs_output->stutter_entry_wm_ns[5].c_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[9], bw_int_to_fixed(1000)));
+
 			calcs_output->urgent_wm_ns[0].c_mark =
 				bw_fixed_to_int(bw_mul(data->
 					urgent_watermark[4], bw_int_to_fixed(1000)));
@@ -3260,6 +3481,33 @@
 			bw_fixed_to_int(bw_mul(data->
 				stutter_exit_watermark[9], bw_int_to_fixed(1000)));
 
+		calcs_output->stutter_entry_wm_ns[0].d_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[4], bw_int_to_fixed(1000)));
+		calcs_output->stutter_entry_wm_ns[1].d_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[5], bw_int_to_fixed(1000)));
+		calcs_output->stutter_entry_wm_ns[2].d_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[6], bw_int_to_fixed(1000)));
+		if (ctx->dc->caps.max_slave_planes) {
+			calcs_output->stutter_entry_wm_ns[3].d_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[0], bw_int_to_fixed(1000)));
+			calcs_output->stutter_entry_wm_ns[4].d_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[1], bw_int_to_fixed(1000)));
+		} else {
+			calcs_output->stutter_entry_wm_ns[3].d_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[7], bw_int_to_fixed(1000)));
+			calcs_output->stutter_entry_wm_ns[4].d_mark =
+				bw_fixed_to_int(bw_mul(data->
+					stutter_entry_watermark[8], bw_int_to_fixed(1000)));
+		}
+		calcs_output->stutter_entry_wm_ns[5].d_mark =
+			bw_fixed_to_int(bw_mul(data->
+				stutter_entry_watermark[9], bw_int_to_fixed(1000)));
 
 		calcs_output->urgent_wm_ns[0].d_mark =
 			bw_fixed_to_int(bw_mul(data->
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
index 4bb43a3..49a4ea4 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
@@ -873,14 +873,14 @@
 			}
 
 			if (pipe->plane_state->rotation % 2 == 0) {
-				ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value
+				ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dc_fixpt_one.value
 					|| v->scaler_rec_out_width[input_idx] == v->viewport_width[input_idx]);
-				ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value
+				ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dc_fixpt_one.value
 					|| v->scaler_recout_height[input_idx] == v->viewport_height[input_idx]);
 			} else {
-				ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dal_fixed31_32_one.value
+				ASSERT(pipe->plane_res.scl_data.ratios.horz.value != dc_fixpt_one.value
 					|| v->scaler_recout_height[input_idx] == v->viewport_width[input_idx]);
-				ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dal_fixed31_32_one.value
+				ASSERT(pipe->plane_res.scl_data.ratios.vert.value != dc_fixpt_one.value
 					|| v->scaler_rec_out_width[input_idx] == v->viewport_height[input_idx]);
 			}
 			v->dcc_enable[input_idx] = pipe->plane_state->dcc.enable ? dcn_bw_yes : dcn_bw_no;
@@ -1459,39 +1459,39 @@
 void dcn_bw_sync_calcs_and_dml(struct dc *dc)
 {
 	kernel_fpu_begin();
-	DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %d ns\n"
-			"sr_enter_plus_exit_time: %d ns\n"
-			"urgent_latency: %d ns\n"
-			"write_back_latency: %d ns\n"
-			"percent_of_ideal_drambw_received_after_urg_latency: %d %\n"
+	DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %f ns\n"
+			"sr_enter_plus_exit_time: %f ns\n"
+			"urgent_latency: %f ns\n"
+			"write_back_latency: %f ns\n"
+			"percent_of_ideal_drambw_received_after_urg_latency: %f %%\n"
 			"max_request_size: %d bytes\n"
-			"dcfclkv_max0p9: %d kHz\n"
-			"dcfclkv_nom0p8: %d kHz\n"
-			"dcfclkv_mid0p72: %d kHz\n"
-			"dcfclkv_min0p65: %d kHz\n"
-			"max_dispclk_vmax0p9: %d kHz\n"
-			"max_dispclk_vnom0p8: %d kHz\n"
-			"max_dispclk_vmid0p72: %d kHz\n"
-			"max_dispclk_vmin0p65: %d kHz\n"
-			"max_dppclk_vmax0p9: %d kHz\n"
-			"max_dppclk_vnom0p8: %d kHz\n"
-			"max_dppclk_vmid0p72: %d kHz\n"
-			"max_dppclk_vmin0p65: %d kHz\n"
-			"socclk: %d kHz\n"
-			"fabric_and_dram_bandwidth_vmax0p9: %d MB/s\n"
-			"fabric_and_dram_bandwidth_vnom0p8: %d MB/s\n"
-			"fabric_and_dram_bandwidth_vmid0p72: %d MB/s\n"
-			"fabric_and_dram_bandwidth_vmin0p65: %d MB/s\n"
-			"phyclkv_max0p9: %d kHz\n"
-			"phyclkv_nom0p8: %d kHz\n"
-			"phyclkv_mid0p72: %d kHz\n"
-			"phyclkv_min0p65: %d kHz\n"
-			"downspreading: %d %\n"
+			"dcfclkv_max0p9: %f kHz\n"
+			"dcfclkv_nom0p8: %f kHz\n"
+			"dcfclkv_mid0p72: %f kHz\n"
+			"dcfclkv_min0p65: %f kHz\n"
+			"max_dispclk_vmax0p9: %f kHz\n"
+			"max_dispclk_vnom0p8: %f kHz\n"
+			"max_dispclk_vmid0p72: %f kHz\n"
+			"max_dispclk_vmin0p65: %f kHz\n"
+			"max_dppclk_vmax0p9: %f kHz\n"
+			"max_dppclk_vnom0p8: %f kHz\n"
+			"max_dppclk_vmid0p72: %f kHz\n"
+			"max_dppclk_vmin0p65: %f kHz\n"
+			"socclk: %f kHz\n"
+			"fabric_and_dram_bandwidth_vmax0p9: %f MB/s\n"
+			"fabric_and_dram_bandwidth_vnom0p8: %f MB/s\n"
+			"fabric_and_dram_bandwidth_vmid0p72: %f MB/s\n"
+			"fabric_and_dram_bandwidth_vmin0p65: %f MB/s\n"
+			"phyclkv_max0p9: %f kHz\n"
+			"phyclkv_nom0p8: %f kHz\n"
+			"phyclkv_mid0p72: %f kHz\n"
+			"phyclkv_min0p65: %f kHz\n"
+			"downspreading: %f %%\n"
 			"round_trip_ping_latency_cycles: %d DCFCLK Cycles\n"
 			"urgent_out_of_order_return_per_channel: %d Bytes\n"
 			"number_of_channels: %d\n"
 			"vmm_page_size: %d Bytes\n"
-			"dram_clock_change_latency: %d ns\n"
+			"dram_clock_change_latency: %f ns\n"
 			"return_bus_width: %d Bytes\n",
 			dc->dcn_soc->sr_exit_time * 1000,
 			dc->dcn_soc->sr_enter_plus_exit_time * 1000,
@@ -1527,11 +1527,11 @@
 			dc->dcn_soc->vmm_page_size,
 			dc->dcn_soc->dram_clock_change_latency * 1000,
 			dc->dcn_soc->return_bus_width);
-	DC_LOG_BANDWIDTH_CALCS("rob_buffer_size_in_kbyte: %d\n"
-			"det_buffer_size_in_kbyte: %d\n"
-			"dpp_output_buffer_pixels: %d\n"
-			"opp_output_buffer_lines: %d\n"
-			"pixel_chunk_size_in_kbyte: %d\n"
+	DC_LOG_BANDWIDTH_CALCS("rob_buffer_size_in_kbyte: %f\n"
+			"det_buffer_size_in_kbyte: %f\n"
+			"dpp_output_buffer_pixels: %f\n"
+			"opp_output_buffer_lines: %f\n"
+			"pixel_chunk_size_in_kbyte: %f\n"
 			"pte_enable: %d\n"
 			"pte_chunk_size: %d kbytes\n"
 			"meta_chunk_size: %d kbytes\n"
@@ -1550,13 +1550,13 @@
 			"max_pscl_tolb_throughput: %d pixels/dppclk\n"
 			"max_lb_tovscl_throughput: %d pixels/dppclk\n"
 			"max_vscl_tohscl_throughput: %d pixels/dppclk\n"
-			"max_hscl_ratio: %d\n"
-			"max_vscl_ratio: %d\n"
+			"max_hscl_ratio: %f\n"
+			"max_vscl_ratio: %f\n"
 			"max_hscl_taps: %d\n"
 			"max_vscl_taps: %d\n"
 			"pte_buffer_size_in_requests: %d\n"
-			"dispclk_ramping_margin: %d %\n"
-			"under_scan_factor: %d %\n"
+			"dispclk_ramping_margin: %f %%\n"
+			"under_scan_factor: %f %%\n"
 			"max_inter_dcn_tile_repeaters: %d\n"
 			"can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one: %d\n"
 			"bug_forcing_luma_and_chroma_request_to_same_size_fixed: %d\n"
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 9cd3566..644b2187 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -936,95 +936,6 @@
 	return true;
 }
 
-/*
- * TODO this whole function needs to go
- *
- * dc_surface_update is needlessly complex. See if we can just replace this
- * with a dc_plane_state and follow the atomic model a bit more closely here.
- */
-bool dc_commit_planes_to_stream(
-		struct dc *dc,
-		struct dc_plane_state **plane_states,
-		uint8_t new_plane_count,
-		struct dc_stream_state *dc_stream,
-		struct dc_state *state)
-{
-	/* no need to dynamically allocate this. it's pretty small */
-	struct dc_surface_update updates[MAX_SURFACES];
-	struct dc_flip_addrs *flip_addr;
-	struct dc_plane_info *plane_info;
-	struct dc_scaling_info *scaling_info;
-	int i;
-	struct dc_stream_update *stream_update =
-			kzalloc(sizeof(struct dc_stream_update), GFP_KERNEL);
-
-	if (!stream_update) {
-		BREAK_TO_DEBUGGER();
-		return false;
-	}
-
-	flip_addr = kcalloc(MAX_SURFACES, sizeof(struct dc_flip_addrs),
-			    GFP_KERNEL);
-	plane_info = kcalloc(MAX_SURFACES, sizeof(struct dc_plane_info),
-			     GFP_KERNEL);
-	scaling_info = kcalloc(MAX_SURFACES, sizeof(struct dc_scaling_info),
-			       GFP_KERNEL);
-
-	if (!flip_addr || !plane_info || !scaling_info) {
-		kfree(flip_addr);
-		kfree(plane_info);
-		kfree(scaling_info);
-		kfree(stream_update);
-		return false;
-	}
-
-	memset(updates, 0, sizeof(updates));
-
-	stream_update->src = dc_stream->src;
-	stream_update->dst = dc_stream->dst;
-	stream_update->out_transfer_func = dc_stream->out_transfer_func;
-
-	for (i = 0; i < new_plane_count; i++) {
-		updates[i].surface = plane_states[i];
-		updates[i].gamma =
-			(struct dc_gamma *)plane_states[i]->gamma_correction;
-		updates[i].in_transfer_func = plane_states[i]->in_transfer_func;
-		flip_addr[i].address = plane_states[i]->address;
-		flip_addr[i].flip_immediate = plane_states[i]->flip_immediate;
-		plane_info[i].color_space = plane_states[i]->color_space;
-		plane_info[i].input_tf = plane_states[i]->input_tf;
-		plane_info[i].format = plane_states[i]->format;
-		plane_info[i].plane_size = plane_states[i]->plane_size;
-		plane_info[i].rotation = plane_states[i]->rotation;
-		plane_info[i].horizontal_mirror = plane_states[i]->horizontal_mirror;
-		plane_info[i].stereo_format = plane_states[i]->stereo_format;
-		plane_info[i].tiling_info = plane_states[i]->tiling_info;
-		plane_info[i].visible = plane_states[i]->visible;
-		plane_info[i].per_pixel_alpha = plane_states[i]->per_pixel_alpha;
-		plane_info[i].dcc = plane_states[i]->dcc;
-		scaling_info[i].scaling_quality = plane_states[i]->scaling_quality;
-		scaling_info[i].src_rect = plane_states[i]->src_rect;
-		scaling_info[i].dst_rect = plane_states[i]->dst_rect;
-		scaling_info[i].clip_rect = plane_states[i]->clip_rect;
-
-		updates[i].flip_addr = &flip_addr[i];
-		updates[i].plane_info = &plane_info[i];
-		updates[i].scaling_info = &scaling_info[i];
-	}
-
-	dc_commit_updates_for_stream(
-			dc,
-			updates,
-			new_plane_count,
-			dc_stream, stream_update, plane_states, state);
-
-	kfree(flip_addr);
-	kfree(plane_info);
-	kfree(scaling_info);
-	kfree(stream_update);
-	return true;
-}
-
 struct dc_state *dc_create_state(void)
 {
 	struct dc_state *context = kzalloc(sizeof(struct dc_state),
@@ -1107,9 +1018,6 @@
 	if (u->plane_info->color_space != u->surface->color_space)
 		update_flags->bits.color_space_change = 1;
 
-	if (u->plane_info->input_tf != u->surface->input_tf)
-		update_flags->bits.input_tf_change = 1;
-
 	if (u->plane_info->horizontal_mirror != u->surface->horizontal_mirror)
 		update_flags->bits.horizontal_mirror_change = 1;
 
@@ -1243,12 +1151,20 @@
 	if (u->input_csc_color_matrix)
 		update_flags->bits.input_csc_change = 1;
 
-	if (update_flags->bits.in_transfer_func_change
-			|| update_flags->bits.input_csc_change) {
+	if (u->coeff_reduction_factor)
+		update_flags->bits.coeff_reduction_change = 1;
+
+	if (update_flags->bits.in_transfer_func_change) {
 		type = UPDATE_TYPE_MED;
 		elevate_update_type(&overall_type, type);
 	}
 
+	if (update_flags->bits.input_csc_change
+			|| update_flags->bits.coeff_reduction_change) {
+		type = UPDATE_TYPE_FULL;
+		elevate_update_type(&overall_type, type);
+	}
+
 	return overall_type;
 }
 
@@ -1297,7 +1213,7 @@
 	type = check_update_surfaces_for_stream(dc, updates, surface_count, stream_update, stream_status);
 	if (type == UPDATE_TYPE_FULL)
 		for (i = 0; i < surface_count; i++)
-			updates[i].surface->update_flags.bits.full_update = 1;
+			updates[i].surface->update_flags.raw = 0xFFFFFFFF;
 
 	return type;
 }
@@ -1375,6 +1291,12 @@
 					pipe_ctx->stream_res.abm->funcs->set_abm_level(
 							pipe_ctx->stream_res.abm, stream->abm_level);
 			}
+
+			if (stream_update && stream_update->periodic_fn_vsync_delta &&
+					pipe_ctx->stream_res.tg->funcs->program_vline_interrupt)
+				pipe_ctx->stream_res.tg->funcs->program_vline_interrupt(
+						pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing,
+						pipe_ctx->stream->periodic_fn_vsync_delta);
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
index 5a552cb3..267c767 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c
@@ -36,8 +36,9 @@
 #include "hw_sequencer.h"
 
 #include "resource.h"
-#define DC_LOGGER \
-	logger
+
+#define DC_LOGGER_INIT(logger)
+
 
 #define SURFACE_TRACE(...) do {\
 		if (dc->debug.surface_trace) \
@@ -60,8 +61,7 @@
 		int surface_count)
 {
 	int i;
-	struct dc  *core_dc = dc;
-	struct dal_logger *logger =  core_dc->ctx->logger;
+	DC_LOGGER_INIT(dc->ctx->logger);
 
 	for (i = 0; i < surface_count; i++) {
 		const struct dc_plane_state *plane_state = plane_states[i];
@@ -72,8 +72,8 @@
 				"plane_state->visible = %d;\n"
 				"plane_state->flip_immediate = %d;\n"
 				"plane_state->address.type = %d;\n"
-				"plane_state->address.grph.addr.quad_part = 0x%X;\n"
-				"plane_state->address.grph.meta_addr.quad_part = 0x%X;\n"
+				"plane_state->address.grph.addr.quad_part = 0x%llX;\n"
+				"plane_state->address.grph.meta_addr.quad_part = 0x%llX;\n"
 				"plane_state->scaling_quality.h_taps = %d;\n"
 				"plane_state->scaling_quality.v_taps = %d;\n"
 				"plane_state->scaling_quality.h_taps_c = %d;\n"
@@ -155,7 +155,6 @@
 				"plane_state->tiling_info.gfx8.pipe_config = %d;\n"
 				"plane_state->tiling_info.gfx8.array_mode = %d;\n"
 				"plane_state->color_space = %d;\n"
-				"plane_state->input_tf = %d;\n"
 				"plane_state->dcc.enable = %d;\n"
 				"plane_state->format = %d;\n"
 				"plane_state->rotation = %d;\n"
@@ -163,7 +162,6 @@
 				plane_state->tiling_info.gfx8.pipe_config,
 				plane_state->tiling_info.gfx8.array_mode,
 				plane_state->color_space,
-				plane_state->input_tf,
 				plane_state->dcc.enable,
 				plane_state->format,
 				plane_state->rotation,
@@ -183,8 +181,7 @@
 		int surface_count)
 {
 	int i;
-	struct dc  *core_dc = dc;
-	struct dal_logger *logger =  core_dc->ctx->logger;
+	DC_LOGGER_INIT(dc->ctx->logger);
 
 	for (i = 0; i < surface_count; i++) {
 		const struct dc_surface_update *update = &updates[i];
@@ -192,8 +189,8 @@
 		SURFACE_TRACE("Update %d\n", i);
 		if (update->flip_addr) {
 			SURFACE_TRACE("flip_addr->address.type = %d;\n"
-					"flip_addr->address.grph.addr.quad_part = 0x%X;\n"
-					"flip_addr->address.grph.meta_addr.quad_part = 0x%X;\n"
+					"flip_addr->address.grph.addr.quad_part = 0x%llX;\n"
+					"flip_addr->address.grph.meta_addr.quad_part = 0x%llX;\n"
 					"flip_addr->flip_immediate = %d;\n",
 					update->flip_addr->address.type,
 					update->flip_addr->address.grph.addr.quad_part,
@@ -204,16 +201,15 @@
 		if (update->plane_info) {
 			SURFACE_TRACE(
 					"plane_info->color_space = %d;\n"
-					"plane_info->input_tf = %d;\n"
 					"plane_info->format = %d;\n"
 					"plane_info->plane_size.grph.surface_pitch = %d;\n"
 					"plane_info->plane_size.grph.surface_size.height = %d;\n"
 					"plane_info->plane_size.grph.surface_size.width = %d;\n"
 					"plane_info->plane_size.grph.surface_size.x = %d;\n"
 					"plane_info->plane_size.grph.surface_size.y = %d;\n"
-					"plane_info->rotation = %d;\n",
+					"plane_info->rotation = %d;\n"
+					"plane_info->stereo_format = %d;\n",
 					update->plane_info->color_space,
-					update->plane_info->input_tf,
 					update->plane_info->format,
 					update->plane_info->plane_size.grph.surface_pitch,
 					update->plane_info->plane_size.grph.surface_size.height,
@@ -303,8 +299,7 @@
 
 void post_surface_trace(struct dc *dc)
 {
-	struct dc  *core_dc = dc;
-	struct dal_logger *logger =  core_dc->ctx->logger;
+	DC_LOGGER_INIT(dc->ctx->logger);
 
 	SURFACE_TRACE("post surface process.\n");
 
@@ -316,10 +311,10 @@
 {
 	int i;
 	struct dc  *core_dc = dc;
-	struct dal_logger *logger =  core_dc->ctx->logger;
 	int h_pos[MAX_PIPES], v_pos[MAX_PIPES];
 	struct crtc_position position;
 	unsigned int underlay_idx = core_dc->res_pool->underlay_pipe_index;
+	DC_LOGGER_INIT(dc->ctx->logger);
 
 
 	for (i = 0; i < core_dc->res_pool->pipe_count; i++) {
@@ -354,9 +349,7 @@
 		struct dc_state *context)
 {
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
-	struct dc  *core_dc = dc;
-	struct dal_logger *logger =  core_dc->ctx->logger;
-
+	DC_LOGGER_INIT(dc->ctx->logger);
 	CLOCK_TRACE("Current: dispclk_khz:%d  max_dppclk_khz:%d  dcfclk_khz:%d\n"
 			"dcfclk_deep_sleep_khz:%d  fclk_khz:%d  socclk_khz:%d\n",
 			context->bw.dcn.calc_clk.dispclk_khz,
@@ -371,6 +364,7 @@
 			context->bw.dcn.calc_clk.dppclk_khz,
 			context->bw.dcn.calc_clk.dcfclk_khz,
 			context->bw.dcn.calc_clk.dcfclk_deep_sleep_khz,
-			context->bw.dcn.calc_clk.fclk_khz);
+			context->bw.dcn.calc_clk.fclk_khz,
+			context->bw.dcn.calc_clk.socclk_khz);
 #endif
 }
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index ebc96b7..83d1215 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -208,6 +208,7 @@
 	case COLOR_SPACE_YCBCR709:
 	case COLOR_SPACE_YCBCR601_LIMITED:
 	case COLOR_SPACE_YCBCR709_LIMITED:
+	case COLOR_SPACE_2020_YCBCR:
 		*black_color = black_color_format[BLACK_COLOR_FORMAT_YUV_CV];
 		break;
 
@@ -216,7 +217,25 @@
 			black_color_format[BLACK_COLOR_FORMAT_RGB_LIMITED];
 		break;
 
-	default:
+	/**
+	 * Remove default and add case for all color space
+	 * so when we forget to add new color space
+	 * compiler will give a warning
+	 */
+	case COLOR_SPACE_UNKNOWN:
+	case COLOR_SPACE_SRGB:
+	case COLOR_SPACE_XR_RGB:
+	case COLOR_SPACE_MSREF_SCRGB:
+	case COLOR_SPACE_XV_YCC_709:
+	case COLOR_SPACE_XV_YCC_601:
+	case COLOR_SPACE_2020_RGB_FULLRANGE:
+	case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
+	case COLOR_SPACE_ADOBERGB:
+	case COLOR_SPACE_DCIP3:
+	case COLOR_SPACE_DISPLAYNATIVE:
+	case COLOR_SPACE_DOLBYVISION:
+	case COLOR_SPACE_APPCTRL:
+	case COLOR_SPACE_CUSTOMPOINTS:
 		/* fefault is sRGB black (full range). */
 		*black_color =
 			black_color_format[BLACK_COLOR_FORMAT_RGB_FULLRANGE];
@@ -230,6 +249,9 @@
 {
 	int counter;
 
+	/* Not applicable if the pipe is not primary, save 300ms of boot time */
+	if (!tg->funcs->is_blanked)
+		return true;
 	for (counter = 0; counter < 100; counter++) {
 		if (tg->funcs->is_blanked(tg))
 			break;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 6d1c498..2fa5218 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -45,8 +45,9 @@
 #include "dce/dce_11_0_d.h"
 #include "dce/dce_11_0_enum.h"
 #include "dce/dce_11_0_sh_mask.h"
-#define DC_LOGGER \
-	dc_ctx->logger
+
+#define DC_LOGGER_INIT(logger)
+
 
 #define LINK_INFO(...) \
 	DC_LOG_HW_HOTPLUG(  \
@@ -468,6 +469,13 @@
 	link->dpcd_sink_count = 0;
 }
 
+static void link_disconnect_remap(struct dc_sink *prev_sink, struct dc_link *link)
+{
+	dc_sink_release(link->local_sink);
+	link->local_sink = prev_sink;
+}
+
+
 static bool detect_dp(
 	struct dc_link *link,
 	struct display_sink_capability *sink_caps,
@@ -550,6 +558,17 @@
 	return true;
 }
 
+static bool is_same_edid(struct dc_edid *old_edid, struct dc_edid *new_edid)
+{
+	if (old_edid->length != new_edid->length)
+		return false;
+
+	if (new_edid->length == 0)
+		return false;
+
+	return (memcmp(old_edid->raw_edid, new_edid->raw_edid, new_edid->length) == 0);
+}
+
 bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 {
 	struct dc_sink_init_data sink_init_data = { 0 };
@@ -557,11 +576,15 @@
 	uint8_t i;
 	bool converter_disable_audio = false;
 	struct audio_support *aud_support = &link->dc->res_pool->audio_support;
+	bool same_edid = false;
 	enum dc_edid_status edid_status;
 	struct dc_context *dc_ctx = link->ctx;
 	struct dc_sink *sink = NULL;
+	struct dc_sink *prev_sink = NULL;
+	struct dpcd_caps prev_dpcd_caps;
+	bool same_dpcd = true;
 	enum dc_connection_type new_connection_type = dc_connection_none;
-
+	DC_LOGGER_INIT(link->ctx->logger);
 	if (link->connector_signal == SIGNAL_TYPE_VIRTUAL)
 		return false;
 
@@ -574,6 +597,11 @@
 			link->local_sink)
 		return true;
 
+	prev_sink = link->local_sink;
+	if (prev_sink != NULL) {
+		dc_sink_retain(prev_sink);
+		memcpy(&prev_dpcd_caps, &link->dpcd_caps, sizeof(struct dpcd_caps));
+	}
 	link_disconnect_sink(link);
 
 	if (new_connection_type != dc_connection_none) {
@@ -615,14 +643,25 @@
 				link,
 				&sink_caps,
 				&converter_disable_audio,
-				aud_support, reason))
+				aud_support, reason)) {
+				if (prev_sink != NULL)
+					dc_sink_release(prev_sink);
 				return false;
+			}
 
+			// Check if dpcp block is the same
+			if (prev_sink != NULL) {
+				if (memcmp(&link->dpcd_caps, &prev_dpcd_caps, sizeof(struct dpcd_caps)))
+					same_dpcd = false;
+			}
 			/* Active dongle downstream unplug */
 			if (link->type == dc_connection_active_dongle
 					&& link->dpcd_caps.sink_count.
-					bits.SINK_COUNT == 0)
+					bits.SINK_COUNT == 0) {
+				if (prev_sink != NULL)
+					dc_sink_release(prev_sink);
 				return true;
+			}
 
 			if (link->type == dc_connection_mst_branch) {
 				LINK_INFO("link=%d, mst branch is now Connected\n",
@@ -630,9 +669,11 @@
 				/* Need to setup mst link_cap struct here
 				 * otherwise dc_link_detect() will leave mst link_cap
 				 * empty which leads to allocate_mst_payload() has "0"
-				 * pbn_per_slot value leading to exception on dal_fixed31_32_div()
+				 * pbn_per_slot value leading to exception on dc_fixpt_div()
 				 */
 				link->verified_link_cap = link->reported_link_cap;
+				if (prev_sink != NULL)
+					dc_sink_release(prev_sink);
 				return false;
 			}
 
@@ -642,6 +683,8 @@
 		default:
 			DC_ERROR("Invalid connector type! signal:%d\n",
 				link->connector_signal);
+			if (prev_sink != NULL)
+				dc_sink_release(prev_sink);
 			return false;
 		} /* switch() */
 
@@ -664,6 +707,8 @@
 		sink = dc_sink_create(&sink_init_data);
 		if (!sink) {
 			DC_ERROR("Failed to create sink!\n");
+			if (prev_sink != NULL)
+				dc_sink_release(prev_sink);
 			return false;
 		}
 
@@ -687,23 +732,34 @@
 			break;
 		}
 
-		if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
-			sink_caps.transaction_type ==
-			DDC_TRANSACTION_TYPE_I2C_OVER_AUX) {
-			/*
-			 * TODO debug why Dell 2413 doesn't like
-			 *  two link trainings
-			 */
+		// Check if edid is the same
+		if ((prev_sink != NULL) && ((edid_status == EDID_THE_SAME) || (edid_status == EDID_OK)))
+			same_edid = is_same_edid(&prev_sink->dc_edid, &sink->dc_edid);
 
-			/* deal with non-mst cases */
-			dp_hbr_verify_link_cap(link, &link->reported_link_cap);
+		// If both edid and dpcd are the same, then discard new sink and revert back to original sink
+		if ((same_edid) && (same_dpcd)) {
+			link_disconnect_remap(prev_sink, link);
+			sink = prev_sink;
+			prev_sink = NULL;
+		} else {
+			if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
+					sink_caps.transaction_type ==
+						DDC_TRANSACTION_TYPE_I2C_OVER_AUX) {
+				/*
+				 * TODO debug why Dell 2413 doesn't like
+				 *  two link trainings
+				 */
+
+				/* deal with non-mst cases */
+				dp_hbr_verify_link_cap(link, &link->reported_link_cap);
+			}
+
+			/* HDMI-DVI Dongle */
+			if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A &&
+					!sink->edid_caps.edid_hdmi)
+				sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
 		}
 
-		/* HDMI-DVI Dongle */
-		if (sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A &&
-				!sink->edid_caps.edid_hdmi)
-			sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK;
-
 		/* Connectivity log: detection */
 		for (i = 0; i < sink->dc_edid.length / EDID_BLOCK_SIZE; i++) {
 			CONN_DATA_DETECT(link,
@@ -761,10 +817,14 @@
 		sink_caps.signal = SIGNAL_TYPE_NONE;
 	}
 
-	LINK_INFO("link=%d, dc_sink_in=%p is now %s\n",
+	LINK_INFO("link=%d, dc_sink_in=%p is now %s prev_sink=%p dpcd same=%d edid same=%d\n",
 		link->link_index, sink,
 		(sink_caps.signal == SIGNAL_TYPE_NONE ?
-			"Disconnected":"Connected"));
+			"Disconnected":"Connected"), prev_sink,
+			same_dpcd, same_edid);
+
+	if (prev_sink != NULL)
+		dc_sink_release(prev_sink);
 
 	return true;
 }
@@ -927,6 +987,7 @@
 	struct integrated_info info = {{{ 0 }}};
 	struct dc_bios *bios = init_params->dc->ctx->dc_bios;
 	const struct dc_vbios_funcs *bp_funcs = bios->funcs;
+	DC_LOGGER_INIT(dc_ctx->logger);
 
 	link->irq_source_hpd = DC_IRQ_SOURCE_INVALID;
 	link->irq_source_hpd_rx = DC_IRQ_SOURCE_INVALID;
@@ -1135,7 +1196,8 @@
 {
 	union dpcd_edp_config edp_config_set;
 	bool panel_mode_edp = false;
-	struct dc_context *dc_ctx = link->ctx;
+	DC_LOGGER_INIT(link->ctx->logger);
+
 	memset(&edp_config_set, '\0', sizeof(union dpcd_edp_config));
 
 	if (DP_PANEL_MODE_DEFAULT != panel_mode) {
@@ -1183,16 +1245,21 @@
 {
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct dc_link *link = stream->sink->link;
-	union down_spread_ctrl downspread;
+	union down_spread_ctrl old_downspread;
+	union down_spread_ctrl new_downspread;
 
 	core_link_read_dpcd(link, DP_DOWNSPREAD_CTRL,
-			&downspread.raw, sizeof(downspread));
+			&old_downspread.raw, sizeof(old_downspread));
 
-	downspread.bits.IGNORE_MSA_TIMING_PARAM =
+	new_downspread.raw = old_downspread.raw;
+
+	new_downspread.bits.IGNORE_MSA_TIMING_PARAM =
 			(stream->ignore_msa_timing_param) ? 1 : 0;
 
-	core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL,
-			&downspread.raw, sizeof(downspread));
+	if (new_downspread.raw != old_downspread.raw) {
+		core_link_write_dpcd(link, DP_DOWNSPREAD_CTRL,
+			&new_downspread.raw, sizeof(new_downspread));
+	}
 }
 
 static enum dc_status enable_link_dp(
@@ -1843,9 +1910,22 @@
 
 static bool dp_active_dongle_validate_timing(
 		const struct dc_crtc_timing *timing,
-		const struct dc_dongle_caps *dongle_caps)
+		const struct dpcd_caps *dpcd_caps)
 {
 	unsigned int required_pix_clk = timing->pix_clk_khz;
+	const struct dc_dongle_caps *dongle_caps = &dpcd_caps->dongle_caps;
+
+	switch (dpcd_caps->dongle_type) {
+	case DISPLAY_DONGLE_DP_VGA_CONVERTER:
+	case DISPLAY_DONGLE_DP_DVI_CONVERTER:
+	case DISPLAY_DONGLE_DP_DVI_DONGLE:
+		if (timing->pixel_encoding == PIXEL_ENCODING_RGB)
+			return true;
+		else
+			return false;
+	default:
+		break;
+	}
 
 	if (dongle_caps->dongle_type != DISPLAY_DONGLE_DP_HDMI_CONVERTER ||
 		dongle_caps->extendedCapValid == false)
@@ -1911,7 +1991,7 @@
 		const struct dc_crtc_timing *timing)
 {
 	uint32_t max_pix_clk = stream->sink->dongle_max_pix_clk;
-	struct dc_dongle_caps *dongle_caps = &link->dpcd_caps.dongle_caps;
+	struct dpcd_caps *dpcd_caps = &link->dpcd_caps;
 
 	/* A hack to avoid failing any modes for EDID override feature on
 	 * topology change such as lower quality cable for DP or different dongle
@@ -1924,7 +2004,7 @@
 		return DC_EXCEED_DONGLE_CAP;
 
 	/* Active Dongle*/
-	if (!dp_active_dongle_validate_timing(timing, dongle_caps))
+	if (!dp_active_dongle_validate_timing(timing, dpcd_caps))
 		return DC_EXCEED_DONGLE_CAP;
 
 	switch (stream->signal) {
@@ -1950,10 +2030,10 @@
 	struct dc  *core_dc = link->ctx->dc;
 	struct abm *abm = core_dc->res_pool->abm;
 	struct dmcu *dmcu = core_dc->res_pool->dmcu;
-	struct dc_context *dc_ctx = link->ctx;
 	unsigned int controller_id = 0;
 	bool use_smooth_brightness = true;
 	int i;
+	DC_LOGGER_INIT(link->ctx->logger);
 
 	if ((dmcu == NULL) ||
 		(abm == NULL) ||
@@ -1961,7 +2041,7 @@
 		return false;
 
 	if (stream) {
-		if (stream->bl_pwm_level == 0)
+		if (stream->bl_pwm_level == EDP_BACKLIGHT_RAMP_DISABLE_LEVEL)
 			frame_ramp = 0;
 
 		((struct dc_stream_state *)stream)->bl_pwm_level = level;
@@ -2038,10 +2118,10 @@
 			&stream->sink->link->cur_link_settings;
 	uint32_t link_rate_in_mbps =
 			link_settings->link_rate * LINK_RATE_REF_FREQ_IN_MHZ;
-	struct fixed31_32 mbps = dal_fixed31_32_from_int(
+	struct fixed31_32 mbps = dc_fixpt_from_int(
 			link_rate_in_mbps * link_settings->lane_count);
 
-	return dal_fixed31_32_div_int(mbps, 54);
+	return dc_fixpt_div_int(mbps, 54);
 }
 
 static int get_color_depth(enum dc_color_depth color_depth)
@@ -2082,7 +2162,7 @@
 	numerator = 64 * PEAK_FACTOR_X1000;
 	denominator = 54 * 8 * 1000 * 1000;
 	kbps *= numerator;
-	peak_kbps = dal_fixed31_32_from_fraction(kbps, denominator);
+	peak_kbps = dc_fixpt_from_fraction(kbps, denominator);
 
 	return peak_kbps;
 }
@@ -2149,8 +2229,8 @@
 	struct fixed31_32 avg_time_slots_per_mtp;
 	struct fixed31_32 pbn;
 	struct fixed31_32 pbn_per_slot;
-	struct dc_context *dc_ctx = link->ctx;
 	uint8_t i;
+	DC_LOGGER_INIT(link->ctx->logger);
 
 	/* enable_link_dp_mst already check link->enabled_stream_count
 	 * and stream is in link->stream[]. This is called during set mode,
@@ -2178,11 +2258,11 @@
 			link->mst_stream_alloc_table.stream_count);
 
 	for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
-		DC_LOG_MST("stream_enc[%d]: 0x%x      "
+		DC_LOG_MST("stream_enc[%d]: %p      "
 		"stream[%d].vcp_id: %d      "
 		"stream[%d].slot_count: %d\n",
 		i,
-		link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
+		(void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
 		i,
 		link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
 		i,
@@ -2209,7 +2289,7 @@
 	/* slot X.Y for only current stream */
 	pbn_per_slot = get_pbn_per_slot(stream);
 	pbn = get_pbn_from_timing(pipe_ctx);
-	avg_time_slots_per_mtp = dal_fixed31_32_div(pbn, pbn_per_slot);
+	avg_time_slots_per_mtp = dc_fixpt_div(pbn, pbn_per_slot);
 
 	stream_encoder->funcs->set_mst_bandwidth(
 		stream_encoder,
@@ -2226,10 +2306,10 @@
 	struct link_encoder *link_encoder = link->link_enc;
 	struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc;
 	struct dp_mst_stream_allocation_table proposed_table = {0};
-	struct fixed31_32 avg_time_slots_per_mtp = dal_fixed31_32_from_int(0);
+	struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0);
 	uint8_t i;
 	bool mst_mode = (link->type == dc_connection_mst_branch);
-	struct dc_context *dc_ctx = link->ctx;
+	DC_LOGGER_INIT(link->ctx->logger);
 
 	/* deallocate_mst_payload is called before disable link. When mode or
 	 * disable/enable monitor, new stream is created which is not in link
@@ -2268,11 +2348,11 @@
 			link->mst_stream_alloc_table.stream_count);
 
 	for (i = 0; i < MAX_CONTROLLER_NUM; i++) {
-		DC_LOG_MST("stream_enc[%d]: 0x%x      "
+		DC_LOG_MST("stream_enc[%d]: %p      "
 		"stream[%d].vcp_id: %d      "
 		"stream[%d].slot_count: %d\n",
 		i,
-		link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
+		(void *) link->mst_stream_alloc_table.stream_allocations[i].stream_enc,
 		i,
 		link->mst_stream_alloc_table.stream_allocations[i].vcp_id,
 		i,
@@ -2302,8 +2382,8 @@
 		struct pipe_ctx *pipe_ctx)
 {
 	struct dc  *core_dc = pipe_ctx->stream->ctx->dc;
-	struct dc_context *dc_ctx = pipe_ctx->stream->ctx;
 	enum dc_status status;
+	DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
 
 	/* eDP lit up by bios already, no need to enable again. */
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP &&
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 3b50535..7d609c7 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -1378,8 +1378,8 @@
 {
 	uint32_t bits_per_channel = 0;
 	uint32_t kbps;
-	switch (timing->display_color_depth) {
 
+	switch (timing->display_color_depth) {
 	case COLOR_DEPTH_666:
 		bits_per_channel = 6;
 		break;
@@ -1401,14 +1401,20 @@
 	default:
 		break;
 	}
+
 	ASSERT(bits_per_channel != 0);
 
 	kbps = timing->pix_clk_khz;
 	kbps *= bits_per_channel;
 
-	if (timing->flags.Y_ONLY != 1)
+	if (timing->flags.Y_ONLY != 1) {
 		/*Only YOnly make reduce bandwidth by 1/3 compares to RGB*/
 		kbps *= 3;
+		if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
+			kbps /= 2;
+		else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
+			kbps = kbps * 2 / 3;
+	}
 
 	return kbps;
 
@@ -2278,6 +2284,8 @@
 	union edp_configuration_cap edp_config_cap;
 	union dp_downstream_port_present ds_port = { 0 };
 	enum dc_status status = DC_ERROR_UNEXPECTED;
+	uint32_t read_dpcd_retry_cnt = 3;
+	int i;
 
 	memset(dpcd_data, '\0', sizeof(dpcd_data));
 	memset(&down_strm_port_count,
@@ -2285,11 +2293,15 @@
 	memset(&edp_config_cap, '\0',
 		sizeof(union edp_configuration_cap));
 
-	status = core_link_read_dpcd(
-			link,
-			DP_DPCD_REV,
-			dpcd_data,
-			sizeof(dpcd_data));
+	for (i = 0; i < read_dpcd_retry_cnt; i++) {
+		status = core_link_read_dpcd(
+				link,
+				DP_DPCD_REV,
+				dpcd_data,
+				sizeof(dpcd_data));
+		if (status == DC_OK)
+			break;
+	}
 
 	if (status != DC_OK) {
 		dm_error("%s: Read dpcd data failed.\n", __func__);
@@ -2376,6 +2388,10 @@
 void detect_edp_sink_caps(struct dc_link *link)
 {
 	retrieve_link_cap(link);
+
+	if (link->reported_link_cap.link_rate == LINK_RATE_UNKNOWN)
+		link->reported_link_cap.link_rate = LINK_RATE_HIGH2;
+
 	link->verified_link_cap = link->reported_link_cap;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
index 7c866a7..82cd1d6 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
@@ -11,8 +11,6 @@
 #include "dc_link_dp.h"
 #include "dc_link_ddc.h"
 #include "dm_helpers.h"
-#include "dce/dce_link_encoder.h"
-#include "dce/dce_stream_encoder.h"
 #include "dpcd_defs.h"
 
 enum dc_status core_link_read_dpcd(
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index ba3487e..751f3ac 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -45,8 +45,9 @@
 #include "dcn10/dcn10_resource.h"
 #endif
 #include "dce120/dce120_resource.h"
-#define DC_LOGGER \
-	ctx->logger
+
+#define DC_LOGGER_INIT(logger)
+
 enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
 {
 	enum dce_version dc_version = DCE_VERSION_UNKNOWN;
@@ -78,6 +79,8 @@
 				ASIC_REV_IS_POLARIS12_V(asic_id.hw_internal_rev)) {
 			dc_version = DCE_VERSION_11_2;
 		}
+		if (ASIC_REV_IS_VEGAM(asic_id.hw_internal_rev))
+			dc_version = DCE_VERSION_11_22;
 		break;
 	case FAMILY_AI:
 		dc_version = DCE_VERSION_12_0;
@@ -124,6 +127,7 @@
 			num_virtual_links, dc, asic_id);
 		break;
 	case DCE_VERSION_11_2:
+	case DCE_VERSION_11_22:
 		res_pool = dce112_create_resource_pool(
 			num_virtual_links, dc);
 		break;
@@ -492,9 +496,9 @@
 	data->viewport_c.x = data->viewport.x / vpc_div;
 	data->viewport_c.y = data->viewport.y / vpc_div;
 	data->inits.h_c = (data->viewport.x % vpc_div) != 0 ?
-			dal_fixed31_32_half : dal_fixed31_32_zero;
+			dc_fixpt_half : dc_fixpt_zero;
 	data->inits.v_c = (data->viewport.y % vpc_div) != 0 ?
-			dal_fixed31_32_half : dal_fixed31_32_zero;
+			dc_fixpt_half : dc_fixpt_zero;
 	/* Round up, assume original video size always even dimensions */
 	data->viewport_c.width = (data->viewport.width + vpc_div - 1) / vpc_div;
 	data->viewport_c.height = (data->viewport.height + vpc_div - 1) / vpc_div;
@@ -623,10 +627,10 @@
 			pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270)
 		rect_swap_helper(&surf_src);
 
-	pipe_ctx->plane_res.scl_data.ratios.horz = dal_fixed31_32_from_fraction(
+	pipe_ctx->plane_res.scl_data.ratios.horz = dc_fixpt_from_fraction(
 					surf_src.width,
 					plane_state->dst_rect.width);
-	pipe_ctx->plane_res.scl_data.ratios.vert = dal_fixed31_32_from_fraction(
+	pipe_ctx->plane_res.scl_data.ratios.vert = dc_fixpt_from_fraction(
 					surf_src.height,
 					plane_state->dst_rect.height);
 
@@ -648,6 +652,14 @@
 		pipe_ctx->plane_res.scl_data.ratios.horz_c.value /= 2;
 		pipe_ctx->plane_res.scl_data.ratios.vert_c.value /= 2;
 	}
+	pipe_ctx->plane_res.scl_data.ratios.horz = dc_fixpt_truncate(
+			pipe_ctx->plane_res.scl_data.ratios.horz, 19);
+	pipe_ctx->plane_res.scl_data.ratios.vert = dc_fixpt_truncate(
+			pipe_ctx->plane_res.scl_data.ratios.vert, 19);
+	pipe_ctx->plane_res.scl_data.ratios.horz_c = dc_fixpt_truncate(
+			pipe_ctx->plane_res.scl_data.ratios.horz_c, 19);
+	pipe_ctx->plane_res.scl_data.ratios.vert_c = dc_fixpt_truncate(
+			pipe_ctx->plane_res.scl_data.ratios.vert_c, 19);
 }
 
 static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *recout_skip)
@@ -684,32 +696,33 @@
 	 * 	init_bot = init + scaling_ratio
 	 * 	init_c = init + truncated_vp_c_offset(from calculate viewport)
 	 */
-	data->inits.h = dal_fixed31_32_div_int(
-			dal_fixed31_32_add_int(data->ratios.horz, data->taps.h_taps + 1), 2);
+	data->inits.h = dc_fixpt_truncate(dc_fixpt_div_int(
+			dc_fixpt_add_int(data->ratios.horz, data->taps.h_taps + 1), 2), 19);
 
-	data->inits.h_c = dal_fixed31_32_add(data->inits.h_c, dal_fixed31_32_div_int(
-			dal_fixed31_32_add_int(data->ratios.horz_c, data->taps.h_taps_c + 1), 2));
+	data->inits.h_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.h_c, dc_fixpt_div_int(
+			dc_fixpt_add_int(data->ratios.horz_c, data->taps.h_taps_c + 1), 2)), 19);
 
-	data->inits.v = dal_fixed31_32_div_int(
-			dal_fixed31_32_add_int(data->ratios.vert, data->taps.v_taps + 1), 2);
+	data->inits.v = dc_fixpt_truncate(dc_fixpt_div_int(
+			dc_fixpt_add_int(data->ratios.vert, data->taps.v_taps + 1), 2), 19);
 
-	data->inits.v_c = dal_fixed31_32_add(data->inits.v_c, dal_fixed31_32_div_int(
-			dal_fixed31_32_add_int(data->ratios.vert_c, data->taps.v_taps_c + 1), 2));
+	data->inits.v_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.v_c, dc_fixpt_div_int(
+			dc_fixpt_add_int(data->ratios.vert_c, data->taps.v_taps_c + 1), 2)), 19);
+
 
 
 	/* Adjust for viewport end clip-off */
 	if ((data->viewport.x + data->viewport.width) < (src.x + src.width) && !flip_horz_scan_dir) {
 		int vp_clip = src.x + src.width - data->viewport.width - data->viewport.x;
-		int int_part = dal_fixed31_32_floor(
-				dal_fixed31_32_sub(data->inits.h, data->ratios.horz));
+		int int_part = dc_fixpt_floor(
+				dc_fixpt_sub(data->inits.h, data->ratios.horz));
 
 		int_part = int_part > 0 ? int_part : 0;
 		data->viewport.width += int_part < vp_clip ? int_part : vp_clip;
 	}
 	if ((data->viewport.y + data->viewport.height) < (src.y + src.height) && !flip_vert_scan_dir) {
 		int vp_clip = src.y + src.height - data->viewport.height - data->viewport.y;
-		int int_part = dal_fixed31_32_floor(
-				dal_fixed31_32_sub(data->inits.v, data->ratios.vert));
+		int int_part = dc_fixpt_floor(
+				dc_fixpt_sub(data->inits.v, data->ratios.vert));
 
 		int_part = int_part > 0 ? int_part : 0;
 		data->viewport.height += int_part < vp_clip ? int_part : vp_clip;
@@ -717,8 +730,8 @@
 	if ((data->viewport_c.x + data->viewport_c.width) < (src.x + src.width) / vpc_div && !flip_horz_scan_dir) {
 		int vp_clip = (src.x + src.width) / vpc_div -
 				data->viewport_c.width - data->viewport_c.x;
-		int int_part = dal_fixed31_32_floor(
-				dal_fixed31_32_sub(data->inits.h_c, data->ratios.horz_c));
+		int int_part = dc_fixpt_floor(
+				dc_fixpt_sub(data->inits.h_c, data->ratios.horz_c));
 
 		int_part = int_part > 0 ? int_part : 0;
 		data->viewport_c.width += int_part < vp_clip ? int_part : vp_clip;
@@ -726,8 +739,8 @@
 	if ((data->viewport_c.y + data->viewport_c.height) < (src.y + src.height) / vpc_div && !flip_vert_scan_dir) {
 		int vp_clip = (src.y + src.height) / vpc_div -
 				data->viewport_c.height - data->viewport_c.y;
-		int int_part = dal_fixed31_32_floor(
-				dal_fixed31_32_sub(data->inits.v_c, data->ratios.vert_c));
+		int int_part = dc_fixpt_floor(
+				dc_fixpt_sub(data->inits.v_c, data->ratios.vert_c));
 
 		int_part = int_part > 0 ? int_part : 0;
 		data->viewport_c.height += int_part < vp_clip ? int_part : vp_clip;
@@ -737,9 +750,9 @@
 	if (data->viewport.x && !flip_horz_scan_dir) {
 		int int_part;
 
-		data->inits.h = dal_fixed31_32_add(data->inits.h, dal_fixed31_32_mul_int(
+		data->inits.h = dc_fixpt_add(data->inits.h, dc_fixpt_mul_int(
 				data->ratios.horz, recout_skip->width));
-		int_part = dal_fixed31_32_floor(data->inits.h) - data->viewport.x;
+		int_part = dc_fixpt_floor(data->inits.h) - data->viewport.x;
 		if (int_part < data->taps.h_taps) {
 			int int_adj = data->viewport.x >= (data->taps.h_taps - int_part) ?
 						(data->taps.h_taps - int_part) : data->viewport.x;
@@ -752,15 +765,15 @@
 			int_part = data->taps.h_taps;
 		}
 		data->inits.h.value &= 0xffffffff;
-		data->inits.h = dal_fixed31_32_add_int(data->inits.h, int_part);
+		data->inits.h = dc_fixpt_add_int(data->inits.h, int_part);
 	}
 
 	if (data->viewport_c.x && !flip_horz_scan_dir) {
 		int int_part;
 
-		data->inits.h_c = dal_fixed31_32_add(data->inits.h_c, dal_fixed31_32_mul_int(
+		data->inits.h_c = dc_fixpt_add(data->inits.h_c, dc_fixpt_mul_int(
 				data->ratios.horz_c, recout_skip->width));
-		int_part = dal_fixed31_32_floor(data->inits.h_c) - data->viewport_c.x;
+		int_part = dc_fixpt_floor(data->inits.h_c) - data->viewport_c.x;
 		if (int_part < data->taps.h_taps_c) {
 			int int_adj = data->viewport_c.x >= (data->taps.h_taps_c - int_part) ?
 					(data->taps.h_taps_c - int_part) : data->viewport_c.x;
@@ -773,15 +786,15 @@
 			int_part = data->taps.h_taps_c;
 		}
 		data->inits.h_c.value &= 0xffffffff;
-		data->inits.h_c = dal_fixed31_32_add_int(data->inits.h_c, int_part);
+		data->inits.h_c = dc_fixpt_add_int(data->inits.h_c, int_part);
 	}
 
 	if (data->viewport.y && !flip_vert_scan_dir) {
 		int int_part;
 
-		data->inits.v = dal_fixed31_32_add(data->inits.v, dal_fixed31_32_mul_int(
+		data->inits.v = dc_fixpt_add(data->inits.v, dc_fixpt_mul_int(
 				data->ratios.vert, recout_skip->height));
-		int_part = dal_fixed31_32_floor(data->inits.v) - data->viewport.y;
+		int_part = dc_fixpt_floor(data->inits.v) - data->viewport.y;
 		if (int_part < data->taps.v_taps) {
 			int int_adj = data->viewport.y >= (data->taps.v_taps - int_part) ?
 						(data->taps.v_taps - int_part) : data->viewport.y;
@@ -794,15 +807,15 @@
 			int_part = data->taps.v_taps;
 		}
 		data->inits.v.value &= 0xffffffff;
-		data->inits.v = dal_fixed31_32_add_int(data->inits.v, int_part);
+		data->inits.v = dc_fixpt_add_int(data->inits.v, int_part);
 	}
 
 	if (data->viewport_c.y && !flip_vert_scan_dir) {
 		int int_part;
 
-		data->inits.v_c = dal_fixed31_32_add(data->inits.v_c, dal_fixed31_32_mul_int(
+		data->inits.v_c = dc_fixpt_add(data->inits.v_c, dc_fixpt_mul_int(
 				data->ratios.vert_c, recout_skip->height));
-		int_part = dal_fixed31_32_floor(data->inits.v_c) - data->viewport_c.y;
+		int_part = dc_fixpt_floor(data->inits.v_c) - data->viewport_c.y;
 		if (int_part < data->taps.v_taps_c) {
 			int int_adj = data->viewport_c.y >= (data->taps.v_taps_c - int_part) ?
 					(data->taps.v_taps_c - int_part) : data->viewport_c.y;
@@ -815,12 +828,12 @@
 			int_part = data->taps.v_taps_c;
 		}
 		data->inits.v_c.value &= 0xffffffff;
-		data->inits.v_c = dal_fixed31_32_add_int(data->inits.v_c, int_part);
+		data->inits.v_c = dc_fixpt_add_int(data->inits.v_c, int_part);
 	}
 
 	/* Interlaced inits based on final vert inits */
-	data->inits.v_bot = dal_fixed31_32_add(data->inits.v, data->ratios.vert);
-	data->inits.v_c_bot = dal_fixed31_32_add(data->inits.v_c, data->ratios.vert_c);
+	data->inits.v_bot = dc_fixpt_add(data->inits.v, data->ratios.vert);
+	data->inits.v_c_bot = dc_fixpt_add(data->inits.v_c, data->ratios.vert_c);
 
 	if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 ||
 			pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) {
@@ -835,7 +848,7 @@
 	struct dc_crtc_timing *timing = &pipe_ctx->stream->timing;
 	struct view recout_skip = { 0 };
 	bool res = false;
-	struct dc_context *ctx = pipe_ctx->stream->ctx;
+	DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger);
 	/* Important: scaling ratio calculation requires pixel format,
 	 * lb depth calculation requires recout and taps require scaling ratios.
 	 * Inits require viewport, taps, ratios and recout of split pipe
@@ -843,6 +856,9 @@
 	pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface(
 			pipe_ctx->plane_state->format);
 
+	if (pipe_ctx->stream->timing.flags.INTERLACE)
+		pipe_ctx->stream->dst.height *= 2;
+
 	calculate_scaling_ratios(pipe_ctx);
 
 	calculate_viewport(pipe_ctx);
@@ -863,6 +879,8 @@
 
 	pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right;
 	pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
+	if (pipe_ctx->stream->timing.flags.INTERLACE)
+		pipe_ctx->plane_res.scl_data.v_active *= 2;
 
 
 	/* Taps calculations */
@@ -908,6 +926,9 @@
 				plane_state->dst_rect.x,
 				plane_state->dst_rect.y);
 
+	if (pipe_ctx->stream->timing.flags.INTERLACE)
+		pipe_ctx->stream->dst.height /= 2;
+
 	return res;
 }
 
@@ -1294,6 +1315,19 @@
 }
 
 
+static bool is_hdr_static_meta_changed(struct dc_stream_state *cur_stream,
+	struct dc_stream_state *new_stream)
+{
+	if (cur_stream == NULL)
+		return true;
+
+	if (memcmp(&cur_stream->hdr_static_metadata,
+			&new_stream->hdr_static_metadata,
+			sizeof(struct dc_info_packet)) != 0)
+		return true;
+
+	return false;
+}
 
 static bool is_timing_changed(struct dc_stream_state *cur_stream,
 		struct dc_stream_state *new_stream)
@@ -1329,6 +1363,9 @@
 	if (is_timing_changed(stream_a, stream_b))
 		return false;
 
+	if (is_hdr_static_meta_changed(stream_a, stream_b))
+		return false;
+
 	return true;
 }
 
@@ -1599,18 +1636,6 @@
 	return DC_OK;
 }
 
-static void copy_pipe_ctx(
-	const struct pipe_ctx *from_pipe_ctx, struct pipe_ctx *to_pipe_ctx)
-{
-	struct dc_plane_state *plane_state = to_pipe_ctx->plane_state;
-	struct dc_stream_state *stream = to_pipe_ctx->stream;
-
-	*to_pipe_ctx = *from_pipe_ctx;
-	to_pipe_ctx->stream = stream;
-	if (plane_state != NULL)
-		to_pipe_ctx->plane_state = plane_state;
-}
-
 static struct dc_stream_state *find_pll_sharable_stream(
 		struct dc_stream_state *stream_needs_pll,
 		struct dc_state *context)
@@ -1703,7 +1728,7 @@
 		pipe_idx = acquire_first_split_pipe(&context->res_ctx, pool, stream);
 #endif
 
-	if (pipe_idx < 0)
+	if (pipe_idx < 0 || context->res_ctx.pipe_ctx[pipe_idx].stream_res.tg == NULL)
 		return DC_NO_CONTROLLER_RESOURCE;
 
 	pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
@@ -1752,26 +1777,6 @@
 	return DC_ERROR_UNEXPECTED;
 }
 
-/* first stream in the context is used to populate the rest */
-void validate_guaranteed_copy_streams(
-		struct dc_state *context,
-		int max_streams)
-{
-	int i;
-
-	for (i = 1; i < max_streams; i++) {
-		context->streams[i] = context->streams[0];
-
-		copy_pipe_ctx(&context->res_ctx.pipe_ctx[0],
-			      &context->res_ctx.pipe_ctx[i]);
-		context->res_ctx.pipe_ctx[i].stream =
-				context->res_ctx.pipe_ctx[0].stream;
-
-		dc_stream_retain(context->streams[i]);
-		context->stream_count++;
-	}
-}
-
 void dc_resource_state_copy_construct_current(
 		const struct dc *dc,
 		struct dc_state *dst_ctx)
@@ -1798,9 +1803,9 @@
 		return DC_ERROR_UNEXPECTED;
 
 	if (dc->res_pool->funcs->validate_global) {
-			result = dc->res_pool->funcs->validate_global(dc, new_ctx);
-			if (result != DC_OK)
-				return result;
+		result = dc->res_pool->funcs->validate_global(dc, new_ctx);
+		if (result != DC_OK)
+			return result;
 	}
 
 	for (i = 0; i < new_ctx->stream_count; i++) {
@@ -1843,7 +1848,7 @@
 }
 
 static void patch_gamut_packet_checksum(
-		struct encoder_info_packet *gamut_packet)
+		struct dc_info_packet *gamut_packet)
 {
 	/* For gamut we recalc checksum */
 	if (gamut_packet->valid) {
@@ -1862,12 +1867,11 @@
 }
 
 static void set_avi_info_frame(
-		struct encoder_info_packet *info_packet,
+		struct dc_info_packet *info_packet,
 		struct pipe_ctx *pipe_ctx)
 {
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	enum dc_color_space color_space = COLOR_SPACE_UNKNOWN;
-	struct info_frame info_frame = { {0} };
 	uint32_t pixel_encoding = 0;
 	enum scanning_type scan_type = SCANNING_TYPE_NODATA;
 	enum dc_aspect_ratio aspect = ASPECT_RATIO_NO_DATA;
@@ -1877,22 +1881,24 @@
 	unsigned int cn0_cn1_value = 0;
 	uint8_t *check_sum = NULL;
 	uint8_t byte_index = 0;
-	union hdmi_info_packet *hdmi_info = &info_frame.avi_info_packet.info_packet_hdmi;
+	union hdmi_info_packet hdmi_info;
 	union display_content_support support = {0};
 	unsigned int vic = pipe_ctx->stream->timing.vic;
 	enum dc_timing_3d_format format;
 
+	memset(&hdmi_info, 0, sizeof(union hdmi_info_packet));
+
 	color_space = pipe_ctx->stream->output_color_space;
 	if (color_space == COLOR_SPACE_UNKNOWN)
 		color_space = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ?
 			COLOR_SPACE_SRGB:COLOR_SPACE_YCBCR709;
 
 	/* Initialize header */
-	hdmi_info->bits.header.info_frame_type = HDMI_INFOFRAME_TYPE_AVI;
+	hdmi_info.bits.header.info_frame_type = HDMI_INFOFRAME_TYPE_AVI;
 	/* InfoFrameVersion_3 is defined by CEA861F (Section 6.4), but shall
 	* not be used in HDMI 2.0 (Section 10.1) */
-	hdmi_info->bits.header.version = 2;
-	hdmi_info->bits.header.length = HDMI_AVI_INFOFRAME_SIZE;
+	hdmi_info.bits.header.version = 2;
+	hdmi_info.bits.header.length = HDMI_AVI_INFOFRAME_SIZE;
 
 	/*
 	 * IDO-defined (Y2,Y1,Y0 = 1,1,1) shall not be used by devices built
@@ -1918,39 +1924,39 @@
 
 	/* Y0_Y1_Y2 : The pixel encoding */
 	/* H14b AVI InfoFrame has extension on Y-field from 2 bits to 3 bits */
-	hdmi_info->bits.Y0_Y1_Y2 = pixel_encoding;
+	hdmi_info.bits.Y0_Y1_Y2 = pixel_encoding;
 
 	/* A0 = 1 Active Format Information valid */
-	hdmi_info->bits.A0 = ACTIVE_FORMAT_VALID;
+	hdmi_info.bits.A0 = ACTIVE_FORMAT_VALID;
 
 	/* B0, B1 = 3; Bar info data is valid */
-	hdmi_info->bits.B0_B1 = BAR_INFO_BOTH_VALID;
+	hdmi_info.bits.B0_B1 = BAR_INFO_BOTH_VALID;
 
-	hdmi_info->bits.SC0_SC1 = PICTURE_SCALING_UNIFORM;
+	hdmi_info.bits.SC0_SC1 = PICTURE_SCALING_UNIFORM;
 
 	/* S0, S1 : Underscan / Overscan */
 	/* TODO: un-hardcode scan type */
 	scan_type = SCANNING_TYPE_UNDERSCAN;
-	hdmi_info->bits.S0_S1 = scan_type;
+	hdmi_info.bits.S0_S1 = scan_type;
 
 	/* C0, C1 : Colorimetry */
 	if (color_space == COLOR_SPACE_YCBCR709 ||
 			color_space == COLOR_SPACE_YCBCR709_LIMITED)
-		hdmi_info->bits.C0_C1 = COLORIMETRY_ITU709;
+		hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709;
 	else if (color_space == COLOR_SPACE_YCBCR601 ||
 			color_space == COLOR_SPACE_YCBCR601_LIMITED)
-		hdmi_info->bits.C0_C1 = COLORIMETRY_ITU601;
+		hdmi_info.bits.C0_C1 = COLORIMETRY_ITU601;
 	else {
-		hdmi_info->bits.C0_C1 = COLORIMETRY_NO_DATA;
+		hdmi_info.bits.C0_C1 = COLORIMETRY_NO_DATA;
 	}
 	if (color_space == COLOR_SPACE_2020_RGB_FULLRANGE ||
 			color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE ||
 			color_space == COLOR_SPACE_2020_YCBCR) {
-		hdmi_info->bits.EC0_EC2 = COLORIMETRYEX_BT2020RGBYCBCR;
-		hdmi_info->bits.C0_C1   = COLORIMETRY_EXTENDED;
+		hdmi_info.bits.EC0_EC2 = COLORIMETRYEX_BT2020RGBYCBCR;
+		hdmi_info.bits.C0_C1   = COLORIMETRY_EXTENDED;
 	} else if (color_space == COLOR_SPACE_ADOBERGB) {
-		hdmi_info->bits.EC0_EC2 = COLORIMETRYEX_ADOBERGB;
-		hdmi_info->bits.C0_C1   = COLORIMETRY_EXTENDED;
+		hdmi_info.bits.EC0_EC2 = COLORIMETRYEX_ADOBERGB;
+		hdmi_info.bits.C0_C1   = COLORIMETRY_EXTENDED;
 	}
 
 	/* TODO: un-hardcode aspect ratio */
@@ -1959,18 +1965,18 @@
 	switch (aspect) {
 	case ASPECT_RATIO_4_3:
 	case ASPECT_RATIO_16_9:
-		hdmi_info->bits.M0_M1 = aspect;
+		hdmi_info.bits.M0_M1 = aspect;
 		break;
 
 	case ASPECT_RATIO_NO_DATA:
 	case ASPECT_RATIO_64_27:
 	case ASPECT_RATIO_256_135:
 	default:
-		hdmi_info->bits.M0_M1 = 0;
+		hdmi_info.bits.M0_M1 = 0;
 	}
 
 	/* Active Format Aspect ratio - same as Picture Aspect Ratio. */
-	hdmi_info->bits.R0_R3 = ACTIVE_FORMAT_ASPECT_RATIO_SAME_AS_PICTURE;
+	hdmi_info.bits.R0_R3 = ACTIVE_FORMAT_ASPECT_RATIO_SAME_AS_PICTURE;
 
 	/* TODO: un-hardcode cn0_cn1 and itc */
 
@@ -2013,8 +2019,8 @@
 				}
 			}
 		}
-		hdmi_info->bits.CN0_CN1 = cn0_cn1_value;
-		hdmi_info->bits.ITC = itc_value;
+		hdmi_info.bits.CN0_CN1 = cn0_cn1_value;
+		hdmi_info.bits.ITC = itc_value;
 	}
 
 	/* TODO : We should handle YCC quantization */
@@ -2023,19 +2029,19 @@
 			stream->sink->edid_caps.qy_bit == 1) {
 		if (color_space == COLOR_SPACE_SRGB ||
 			color_space == COLOR_SPACE_2020_RGB_FULLRANGE) {
-			hdmi_info->bits.Q0_Q1   = RGB_QUANTIZATION_FULL_RANGE;
-			hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_FULL_RANGE;
+			hdmi_info.bits.Q0_Q1   = RGB_QUANTIZATION_FULL_RANGE;
+			hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_FULL_RANGE;
 		} else if (color_space == COLOR_SPACE_SRGB_LIMITED ||
 					color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE) {
-			hdmi_info->bits.Q0_Q1   = RGB_QUANTIZATION_LIMITED_RANGE;
-			hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
+			hdmi_info.bits.Q0_Q1   = RGB_QUANTIZATION_LIMITED_RANGE;
+			hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
 		} else {
-			hdmi_info->bits.Q0_Q1   = RGB_QUANTIZATION_DEFAULT_RANGE;
-			hdmi_info->bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
+			hdmi_info.bits.Q0_Q1   = RGB_QUANTIZATION_DEFAULT_RANGE;
+			hdmi_info.bits.YQ0_YQ1 = YYC_QUANTIZATION_LIMITED_RANGE;
 		}
 	} else {
-		hdmi_info->bits.Q0_Q1   = RGB_QUANTIZATION_DEFAULT_RANGE;
-		hdmi_info->bits.YQ0_YQ1   = YYC_QUANTIZATION_LIMITED_RANGE;
+		hdmi_info.bits.Q0_Q1   = RGB_QUANTIZATION_DEFAULT_RANGE;
+		hdmi_info.bits.YQ0_YQ1   = YYC_QUANTIZATION_LIMITED_RANGE;
 	}
 
 	///VIC
@@ -2060,51 +2066,49 @@
 			break;
 		}
 	}
-	hdmi_info->bits.VIC0_VIC7 = vic;
+	hdmi_info.bits.VIC0_VIC7 = vic;
 
 	/* pixel repetition
 	 * PR0 - PR3 start from 0 whereas pHwPathMode->mode.timing.flags.pixel
 	 * repetition start from 1 */
-	hdmi_info->bits.PR0_PR3 = 0;
+	hdmi_info.bits.PR0_PR3 = 0;
 
 	/* Bar Info
 	 * barTop:    Line Number of End of Top Bar.
 	 * barBottom: Line Number of Start of Bottom Bar.
 	 * barLeft:   Pixel Number of End of Left Bar.
 	 * barRight:  Pixel Number of Start of Right Bar. */
-	hdmi_info->bits.bar_top = stream->timing.v_border_top;
-	hdmi_info->bits.bar_bottom = (stream->timing.v_total
+	hdmi_info.bits.bar_top = stream->timing.v_border_top;
+	hdmi_info.bits.bar_bottom = (stream->timing.v_total
 			- stream->timing.v_border_bottom + 1);
-	hdmi_info->bits.bar_left  = stream->timing.h_border_left;
-	hdmi_info->bits.bar_right = (stream->timing.h_total
+	hdmi_info.bits.bar_left  = stream->timing.h_border_left;
+	hdmi_info.bits.bar_right = (stream->timing.h_total
 			- stream->timing.h_border_right + 1);
 
 	/* check_sum - Calculate AFMT_AVI_INFO0 ~ AFMT_AVI_INFO3 */
-	check_sum = &info_frame.avi_info_packet.info_packet_hdmi.packet_raw_data.sb[0];
+	check_sum = &hdmi_info.packet_raw_data.sb[0];
 
 	*check_sum = HDMI_INFOFRAME_TYPE_AVI + HDMI_AVI_INFOFRAME_SIZE + 2;
 
 	for (byte_index = 1; byte_index <= HDMI_AVI_INFOFRAME_SIZE; byte_index++)
-		*check_sum += hdmi_info->packet_raw_data.sb[byte_index];
+		*check_sum += hdmi_info.packet_raw_data.sb[byte_index];
 
 	/* one byte complement */
 	*check_sum = (uint8_t) (0x100 - *check_sum);
 
 	/* Store in hw_path_mode */
-	info_packet->hb0 = hdmi_info->packet_raw_data.hb0;
-	info_packet->hb1 = hdmi_info->packet_raw_data.hb1;
-	info_packet->hb2 = hdmi_info->packet_raw_data.hb2;
+	info_packet->hb0 = hdmi_info.packet_raw_data.hb0;
+	info_packet->hb1 = hdmi_info.packet_raw_data.hb1;
+	info_packet->hb2 = hdmi_info.packet_raw_data.hb2;
 
-	for (byte_index = 0; byte_index < sizeof(info_frame.avi_info_packet.
-				info_packet_hdmi.packet_raw_data.sb); byte_index++)
-		info_packet->sb[byte_index] = info_frame.avi_info_packet.
-				info_packet_hdmi.packet_raw_data.sb[byte_index];
+	for (byte_index = 0; byte_index < sizeof(hdmi_info.packet_raw_data.sb); byte_index++)
+		info_packet->sb[byte_index] = hdmi_info.packet_raw_data.sb[byte_index];
 
 	info_packet->valid = true;
 }
 
 static void set_vendor_info_packet(
-		struct encoder_info_packet *info_packet,
+		struct dc_info_packet *info_packet,
 		struct dc_stream_state *stream)
 {
 	uint32_t length = 0;
@@ -2217,7 +2221,7 @@
 }
 
 static void set_spd_info_packet(
-		struct encoder_info_packet *info_packet,
+		struct dc_info_packet *info_packet,
 		struct dc_stream_state *stream)
 {
 	/* SPD info packet for FreeSync */
@@ -2338,104 +2342,19 @@
 }
 
 static void set_hdr_static_info_packet(
-		struct encoder_info_packet *info_packet,
+		struct dc_info_packet *info_packet,
 		struct dc_stream_state *stream)
 {
-	uint16_t i = 0;
-	enum signal_type signal = stream->signal;
-	uint32_t data;
+	/* HDR Static Metadata info packet for HDR10 */
 
-	if (!stream->hdr_static_metadata.hdr_supported)
+	if (!stream->hdr_static_metadata.valid)
 		return;
 
-	if (dc_is_hdmi_signal(signal)) {
-		info_packet->valid = true;
-
-		info_packet->hb0 = 0x87;
-		info_packet->hb1 = 0x01;
-		info_packet->hb2 = 0x1A;
-		i = 1;
-	} else if (dc_is_dp_signal(signal)) {
-		info_packet->valid = true;
-
-		info_packet->hb0 = 0x00;
-		info_packet->hb1 = 0x87;
-		info_packet->hb2 = 0x1D;
-		info_packet->hb3 = (0x13 << 2);
-		i = 2;
-	}
-
-	data = stream->hdr_static_metadata.is_hdr;
-	info_packet->sb[i++] = data ? 0x02 : 0x00;
-	info_packet->sb[i++] = 0x00;
-
-	data = stream->hdr_static_metadata.chromaticity_green_x / 2;
-	info_packet->sb[i++] = data & 0xFF;
-	info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
-	data = stream->hdr_static_metadata.chromaticity_green_y / 2;
-	info_packet->sb[i++] = data & 0xFF;
-	info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
-	data = stream->hdr_static_metadata.chromaticity_blue_x / 2;
-	info_packet->sb[i++] = data & 0xFF;
-	info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
-	data = stream->hdr_static_metadata.chromaticity_blue_y / 2;
-	info_packet->sb[i++] = data & 0xFF;
-	info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
-	data = stream->hdr_static_metadata.chromaticity_red_x / 2;
-	info_packet->sb[i++] = data & 0xFF;
-	info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
-	data = stream->hdr_static_metadata.chromaticity_red_y / 2;
-	info_packet->sb[i++] = data & 0xFF;
-	info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
-	data = stream->hdr_static_metadata.chromaticity_white_point_x / 2;
-	info_packet->sb[i++] = data & 0xFF;
-	info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
-	data = stream->hdr_static_metadata.chromaticity_white_point_y / 2;
-	info_packet->sb[i++] = data & 0xFF;
-	info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
-	data = stream->hdr_static_metadata.max_luminance;
-	info_packet->sb[i++] = data & 0xFF;
-	info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
-	data = stream->hdr_static_metadata.min_luminance;
-	info_packet->sb[i++] = data & 0xFF;
-	info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
-	data = stream->hdr_static_metadata.maximum_content_light_level;
-	info_packet->sb[i++] = data & 0xFF;
-	info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
-	data = stream->hdr_static_metadata.maximum_frame_average_light_level;
-	info_packet->sb[i++] = data & 0xFF;
-	info_packet->sb[i++] = (data & 0xFF00) >> 8;
-
-	if (dc_is_hdmi_signal(signal)) {
-		uint32_t checksum = 0;
-
-		checksum += info_packet->hb0;
-		checksum += info_packet->hb1;
-		checksum += info_packet->hb2;
-
-		for (i = 1; i <= info_packet->hb2; i++)
-			checksum += info_packet->sb[i];
-
-		info_packet->sb[0] = 0x100 - checksum;
-	} else if (dc_is_dp_signal(signal)) {
-		info_packet->sb[0] = 0x01;
-		info_packet->sb[1] = 0x1A;
-	}
+	*info_packet = stream->hdr_static_metadata;
 }
 
 static void set_vsc_info_packet(
-		struct encoder_info_packet *info_packet,
+		struct dc_info_packet *info_packet,
 		struct dc_stream_state *stream)
 {
 	unsigned int vscPacketRevision = 0;
@@ -2650,6 +2569,8 @@
 	if (is_timing_changed(pipe_ctx_old->stream, pipe_ctx->stream))
 		return true;
 
+	if (is_hdr_static_meta_changed(pipe_ctx_old->stream, pipe_ctx->stream))
+		return true;
 
 	return false;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index ce0747e..3732a1d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -101,14 +101,16 @@
 	stream->status.link = stream->sink->link;
 
 	update_stream_signal(stream);
+
+	stream->out_transfer_func = dc_create_transfer_func();
+	stream->out_transfer_func->type = TF_TYPE_BYPASS;
 }
 
 static void destruct(struct dc_stream_state *stream)
 {
 	dc_sink_release(stream->sink);
 	if (stream->out_transfer_func != NULL) {
-		dc_transfer_func_release(
-				stream->out_transfer_func);
+		dc_transfer_func_release(stream->out_transfer_func);
 		stream->out_transfer_func = NULL;
 	}
 }
@@ -176,6 +178,7 @@
 	int i;
 	struct dc  *core_dc;
 	struct resource_context *res_ctx;
+	struct pipe_ctx *pipe_to_program = NULL;
 
 	if (NULL == stream) {
 		dm_error("DC: dc_stream is NULL!\n");
@@ -203,9 +206,17 @@
 		if (pipe_ctx->top_pipe && pipe_ctx->plane_state != pipe_ctx->top_pipe->plane_state)
 			continue;
 
+		if (!pipe_to_program) {
+			pipe_to_program = pipe_ctx;
+			core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, true);
+		}
 
 		core_dc->hwss.set_cursor_attribute(pipe_ctx);
 	}
+
+	if (pipe_to_program)
+		core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, false);
+
 	return true;
 }
 
@@ -216,6 +227,7 @@
 	int i;
 	struct dc  *core_dc;
 	struct resource_context *res_ctx;
+	struct pipe_ctx *pipe_to_program = NULL;
 
 	if (NULL == stream) {
 		dm_error("DC: dc_stream is NULL!\n");
@@ -241,9 +253,17 @@
 				!pipe_ctx->plane_res.ipp)
 			continue;
 
+		if (!pipe_to_program) {
+			pipe_to_program = pipe_ctx;
+			core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, true);
+		}
+
 		core_dc->hwss.set_cursor_position(pipe_ctx);
 	}
 
+	if (pipe_to_program)
+		core_dc->hwss.pipe_control_lock(core_dc, pipe_to_program, false);
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
index 132eef3..68a71ad 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c
@@ -38,6 +38,12 @@
 static void construct(struct dc_context *ctx, struct dc_plane_state *plane_state)
 {
 	plane_state->ctx = ctx;
+
+	plane_state->gamma_correction = dc_create_gamma();
+	plane_state->gamma_correction->is_identity = true;
+
+	plane_state->in_transfer_func = dc_create_transfer_func();
+	plane_state->in_transfer_func->type = TF_TYPE_BYPASS;
 }
 
 static void destruct(struct dc_plane_state *plane_state)
@@ -175,7 +181,7 @@
 	kref_put(&tf->refcount, dc_transfer_func_free);
 }
 
-struct dc_transfer_func *dc_create_transfer_func(void)
+struct dc_transfer_func *dc_create_transfer_func()
 {
 	struct dc_transfer_func *tf = kvzalloc(sizeof(*tf), GFP_KERNEL);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index fa4b3c8..9cfde0c 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -38,7 +38,7 @@
 #include "inc/compressor.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.1.38"
+#define DC_VER "3.1.44"
 
 #define MAX_SURFACES 3
 #define MAX_STREAMS 6
@@ -75,6 +75,7 @@
 	bool dynamic_audio;
 	bool is_apu;
 	bool dual_link_dvi;
+	bool post_blend_color_processing;
 };
 
 struct dc_dcc_surface_param {
@@ -202,6 +203,8 @@
 	bool timing_trace;
 	bool clock_trace;
 	bool validation_trace;
+	bool bandwidth_calcs_trace;
+	int max_downscale_src_width;
 
 	/* stutter efficiency related */
 	bool disable_stutter;
@@ -238,6 +241,8 @@
 	bool az_endpoint_mute_only;
 	bool always_use_regamma;
 	bool p010_mpo_support;
+	bool recovery_enabled;
+
 };
 struct dc_state;
 struct resource_pool;
@@ -332,20 +337,6 @@
 	TRANSFER_FUNC_POINTS = 1025
 };
 
-// Moved here from color module for linux
-enum color_transfer_func {
-	transfer_func_unknown,
-	transfer_func_srgb,
-	transfer_func_bt709,
-	transfer_func_pq2084,
-	transfer_func_pq2084_interim,
-	transfer_func_linear_0_1,
-	transfer_func_linear_0_125,
-	transfer_func_dolbyvision,
-	transfer_func_gamma_22,
-	transfer_func_gamma_26
-};
-
 struct dc_hdr_static_metadata {
 	/* display chromaticities and white point in units of 0.00001 */
 	unsigned int chromaticity_green_x;
@@ -361,9 +352,6 @@
 	uint32_t max_luminance;
 	uint32_t maximum_content_light_level;
 	uint32_t maximum_frame_average_light_level;
-
-	bool hdr_supported;
-	bool is_hdr;
 };
 
 enum dc_transfer_func_type {
@@ -419,7 +407,6 @@
 		/* Medium updates */
 		uint32_t dcc_change:1;
 		uint32_t color_space_change:1;
-		uint32_t input_tf_change:1;
 		uint32_t horizontal_mirror_change:1;
 		uint32_t per_pixel_alpha_change:1;
 		uint32_t rotation_change:1;
@@ -428,6 +415,7 @@
 		uint32_t position_change:1;
 		uint32_t in_transfer_func_change:1;
 		uint32_t input_csc_change:1;
+		uint32_t coeff_reduction_change:1;
 		uint32_t output_tf_change:1;
 		uint32_t pixel_format_change:1;
 
@@ -460,7 +448,7 @@
 	struct dc_gamma *gamma_correction;
 	struct dc_transfer_func *in_transfer_func;
 	struct dc_bias_and_scale *bias_and_scale;
-	struct csc_transform input_csc_color_matrix;
+	struct dc_csc_transform input_csc_color_matrix;
 	struct fixed31_32 coeff_reduction_factor;
 	uint32_t sdr_white_level;
 
@@ -468,7 +456,6 @@
 	struct dc_hdr_static_metadata hdr_static_ctx;
 
 	enum dc_color_space color_space;
-	enum color_transfer_func input_tf;
 
 	enum surface_pixel_format format;
 	enum dc_rotation_angle rotation;
@@ -498,7 +485,6 @@
 	enum dc_rotation_angle rotation;
 	enum plane_stereo_format stereo_format;
 	enum dc_color_space color_space;
-	enum color_transfer_func input_tf;
 	unsigned int sdr_white_level;
 	bool horizontal_mirror;
 	bool visible;
@@ -517,19 +503,18 @@
 	struct dc_plane_state *surface;
 
 	/* isr safe update parameters.  null means no updates */
-	struct dc_flip_addrs *flip_addr;
-	struct dc_plane_info *plane_info;
-	struct dc_scaling_info *scaling_info;
+	const struct dc_flip_addrs *flip_addr;
+	const struct dc_plane_info *plane_info;
+	const struct dc_scaling_info *scaling_info;
 
 	/* following updates require alloc/sleep/spin that is not isr safe,
 	 * null means no updates
 	 */
-	struct dc_gamma *gamma;
-	enum color_transfer_func color_input_tf;
-	struct dc_transfer_func *in_transfer_func;
+	const struct dc_gamma *gamma;
+	const struct dc_transfer_func *in_transfer_func;
 
-	struct csc_transform *input_csc_color_matrix;
-	struct fixed31_32 *coeff_reduction_factor;
+	const struct dc_csc_transform *input_csc_color_matrix;
+	const struct fixed31_32 *coeff_reduction_factor;
 };
 
 /*
@@ -699,6 +684,7 @@
 	struct dc_cursor_attributes attributes;
 };
 
+
 /*******************************************************************************
  * Interrupt interfaces
  ******************************************************************************/
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
index 2726b02..90bccd5 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h
@@ -26,6 +26,8 @@
 #ifndef DC_DP_TYPES_H
 #define DC_DP_TYPES_H
 
+#include "os_types.h"
+
 enum dc_lane_count {
 	LANE_COUNT_UNKNOWN = 0,
 	LANE_COUNT_ONE = 1,
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index 48e1fcf5..bd0fda0 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -117,6 +117,65 @@
 	return reg_val;
 }
 
+uint32_t generic_reg_get6(const struct dc_context *ctx, uint32_t addr,
+		uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+		uint8_t shift2, uint32_t mask2, uint32_t *field_value2,
+		uint8_t shift3, uint32_t mask3, uint32_t *field_value3,
+		uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
+		uint8_t shift5, uint32_t mask5, uint32_t *field_value5,
+		uint8_t shift6, uint32_t mask6, uint32_t *field_value6)
+{
+	uint32_t reg_val = dm_read_reg(ctx, addr);
+	*field_value1 = get_reg_field_value_ex(reg_val, mask1, shift1);
+	*field_value2 = get_reg_field_value_ex(reg_val, mask2, shift2);
+	*field_value3 = get_reg_field_value_ex(reg_val, mask3, shift3);
+	*field_value4 = get_reg_field_value_ex(reg_val, mask4, shift4);
+	*field_value5 = get_reg_field_value_ex(reg_val, mask5, shift5);
+	*field_value6 = get_reg_field_value_ex(reg_val, mask6, shift6);
+	return reg_val;
+}
+
+uint32_t generic_reg_get7(const struct dc_context *ctx, uint32_t addr,
+		uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+		uint8_t shift2, uint32_t mask2, uint32_t *field_value2,
+		uint8_t shift3, uint32_t mask3, uint32_t *field_value3,
+		uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
+		uint8_t shift5, uint32_t mask5, uint32_t *field_value5,
+		uint8_t shift6, uint32_t mask6, uint32_t *field_value6,
+		uint8_t shift7, uint32_t mask7, uint32_t *field_value7)
+{
+	uint32_t reg_val = dm_read_reg(ctx, addr);
+	*field_value1 = get_reg_field_value_ex(reg_val, mask1, shift1);
+	*field_value2 = get_reg_field_value_ex(reg_val, mask2, shift2);
+	*field_value3 = get_reg_field_value_ex(reg_val, mask3, shift3);
+	*field_value4 = get_reg_field_value_ex(reg_val, mask4, shift4);
+	*field_value5 = get_reg_field_value_ex(reg_val, mask5, shift5);
+	*field_value6 = get_reg_field_value_ex(reg_val, mask6, shift6);
+	*field_value7 = get_reg_field_value_ex(reg_val, mask7, shift7);
+	return reg_val;
+}
+
+uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr,
+		uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+		uint8_t shift2, uint32_t mask2, uint32_t *field_value2,
+		uint8_t shift3, uint32_t mask3, uint32_t *field_value3,
+		uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
+		uint8_t shift5, uint32_t mask5, uint32_t *field_value5,
+		uint8_t shift6, uint32_t mask6, uint32_t *field_value6,
+		uint8_t shift7, uint32_t mask7, uint32_t *field_value7,
+		uint8_t shift8, uint32_t mask8, uint32_t *field_value8)
+{
+	uint32_t reg_val = dm_read_reg(ctx, addr);
+	*field_value1 = get_reg_field_value_ex(reg_val, mask1, shift1);
+	*field_value2 = get_reg_field_value_ex(reg_val, mask2, shift2);
+	*field_value3 = get_reg_field_value_ex(reg_val, mask3, shift3);
+	*field_value4 = get_reg_field_value_ex(reg_val, mask4, shift4);
+	*field_value5 = get_reg_field_value_ex(reg_val, mask5, shift5);
+	*field_value6 = get_reg_field_value_ex(reg_val, mask6, shift6);
+	*field_value7 = get_reg_field_value_ex(reg_val, mask7, shift7);
+	*field_value8 = get_reg_field_value_ex(reg_val, mask8, shift8);
+	return reg_val;
+}
 /* note:  va version of this is pretty bad idea, since there is a output parameter pass by pointer
  * compiler won't be able to check for size match and is prone to stack corruption type of bugs
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index b83a7dc..b1f7057 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -423,6 +423,11 @@
 	GAMMA_CS_TFM_1D = 3,
 };
 
+struct dc_csc_transform {
+	uint16_t matrix[12];
+	bool enable_adjustment;
+};
+
 struct dc_gamma {
 	struct kref refcount;
 	enum dc_gamma_type type;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index dc34515..8a716baa 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -51,6 +51,14 @@
 	struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM];
 };
 
+struct time_stamp {
+	uint64_t edp_poweroff;
+	uint64_t edp_poweron;
+};
+
+struct link_trace {
+	struct time_stamp time_stamp;
+};
 /*
  * A link contains one or more sinks and their connected status.
  * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported.
@@ -114,6 +122,7 @@
 
 	struct dc_link_status link_status;
 
+	struct link_trace link_trace;
 };
 
 const struct dc_link_status *dc_link_get_status(const struct dc_link *dc_link);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index d017df5..d7e6d53 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -58,18 +58,20 @@
 
 	struct freesync_context freesync_ctx;
 
-	struct dc_hdr_static_metadata hdr_static_metadata;
+	struct dc_info_packet hdr_static_metadata;
 	struct dc_transfer_func *out_transfer_func;
 	struct colorspace_transform gamut_remap_matrix;
-	struct csc_transform csc_color_matrix;
+	struct dc_csc_transform csc_color_matrix;
 
 	enum dc_color_space output_color_space;
 	enum dc_dither_option dither_option;
 
 	enum view_3d_format view_format;
-	enum color_transfer_func output_tf;
 
 	bool ignore_msa_timing_param;
+
+	unsigned long long periodic_fn_vsync_delta;
+
 	/* TODO: custom INFO packets */
 	/* TODO: ABM info (DMCU) */
 	/* PSR info */
@@ -110,9 +112,10 @@
 	struct rect src;
 	struct rect dst;
 	struct dc_transfer_func *out_transfer_func;
-	struct dc_hdr_static_metadata *hdr_static_metadata;
-	enum color_transfer_func color_output_tf;
+	struct dc_info_packet *hdr_static_metadata;
 	unsigned int *abm_level;
+
+	unsigned long long *periodic_fn_vsync_delta;
 };
 
 bool dc_is_stream_unchanged(
@@ -131,13 +134,6 @@
  *   This does not trigger a flip.  No surface address is programmed.
  */
 
-bool dc_commit_planes_to_stream(
-		struct dc *dc,
-		struct dc_plane_state **plane_states,
-		uint8_t new_plane_count,
-		struct dc_stream_state *dc_stream,
-		struct dc_state *state);
-
 void dc_commit_updates_for_stream(struct dc *dc,
 		struct dc_surface_update *srf_updates,
 		int surface_count,
@@ -209,14 +205,6 @@
 enum dc_status dc_validate_stream(struct dc *dc, struct dc_stream_state *stream);
 
 /*
- * This function takes a stream and checks if it is guaranteed to be supported.
- * Guaranteed means that MAX_COFUNC similar streams are supported.
- *
- * After this call:
- *   No hardware is programmed for call.  Only validation is done.
- */
-
-/*
  * Set up streams and links associated to drive sinks
  * The streams parameter is an absolute set of all active streams.
  *
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 9441305..76df253 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -25,7 +25,7 @@
 #ifndef DC_TYPES_H_
 #define DC_TYPES_H_
 
-#include "fixed32_32.h"
+#include "os_types.h"
 #include "fixed31_32.h"
 #include "irq_types.h"
 #include "dc_dp_types.h"
@@ -370,12 +370,6 @@
 	struct fixed31_32 hue;
 };
 
-enum {
-	MAX_LANES = 2,
-	MAX_COFUNC_PATH = 6,
-	LAYER_INDEX_PRIMARY = -1,
-};
-
 enum dpcd_downstream_port_max_bpc {
 	DOWN_STREAM_MAX_8BPC = 0,
 	DOWN_STREAM_MAX_10BPC,
@@ -530,6 +524,15 @@
 	uint32_t frame_counter;
 };
 
+struct dc_info_packet {
+	bool valid;
+	uint8_t hb0;
+	uint8_t hb1;
+	uint8_t hb2;
+	uint8_t hb3;
+	uint8_t sb[32];
+};
+
 #define DC_PLANE_UPDATE_TIMES_MAX 10
 
 struct dc_plane_flip_time {
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
index fe92a12..29294db 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -26,7 +26,7 @@
 #include "dce_abm.h"
 #include "dm_services.h"
 #include "reg_helper.h"
-#include "fixed32_32.h"
+#include "fixed31_32.h"
 #include "dc.h"
 
 #include "atom.h"
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
index 6d5cdcd..7f6d724 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_audio.c
@@ -33,8 +33,9 @@
 
 #define CTX \
 	aud->base.ctx
-#define DC_LOGGER \
-	aud->base.ctx->logger
+
+#define DC_LOGGER_INIT()
+
 #define REG(reg)\
 	(aud->regs->reg)
 
@@ -348,8 +349,8 @@
 
 void dce_aud_az_enable(struct audio *audio)
 {
-	struct dce_audio *aud = DCE_AUD(audio);
 	uint32_t value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL);
+	DC_LOGGER_INIT();
 
 	set_reg_field_value(value, 1,
 			    AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL,
@@ -371,7 +372,7 @@
 void dce_aud_az_disable(struct audio *audio)
 {
 	uint32_t value;
-	struct dce_audio *aud = DCE_AUD(audio);
+	DC_LOGGER_INIT();
 
 	value = AZ_REG_READ(AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL);
 	set_reg_field_value(value, 1,
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
index 0aa2cda..599c7ab 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
@@ -41,8 +41,9 @@
 
 #define CTX \
 	clk_src->base.ctx
-#define DC_LOGGER \
-	calc_pll_cs->ctx->logger
+
+#define DC_LOGGER_INIT()
+
 #undef FN
 #define FN(reg_name, field_name) \
 	clk_src->cs_shift->field_name, clk_src->cs_mask->field_name
@@ -467,7 +468,7 @@
 {
 	uint32_t field = 0;
 	uint32_t pll_calc_error = MAX_PLL_CALC_ERROR;
-	struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll;
+	DC_LOGGER_INIT();
 	/* Check if reference clock is external (not pcie/xtalin)
 	* HW Dce80 spec:
 	* 00 - PCIE_REFCLK, 01 - XTALIN,    02 - GENERICA,    03 - GENERICB
@@ -557,8 +558,8 @@
 		struct pll_settings *pll_settings)
 {
 	struct dce110_clk_src *clk_src = TO_DCE110_CLK_SRC(cs);
-	struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll;
 	uint32_t pll_calc_error = MAX_PLL_CALC_ERROR;
+	DC_LOGGER_INIT();
 
 	if (pix_clk_params == NULL || pll_settings == NULL
 			|| pix_clk_params->requested_pix_clk == 0) {
@@ -589,6 +590,7 @@
 			pll_settings, pix_clk_params);
 		break;
 	case DCE_VERSION_11_2:
+	case DCE_VERSION_11_22:
 	case DCE_VERSION_12_0:
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case DCN_VERSION_1_0:
@@ -655,12 +657,12 @@
 			return 0;
 		}
 
-		pix_rate = dal_fixed31_32_from_int(clk_src->ref_freq_khz);
-		pix_rate = dal_fixed31_32_mul_int(pix_rate, 1000);
-		pix_rate = dal_fixed31_32_mul_int(pix_rate, phase);
-		pix_rate = dal_fixed31_32_div_int(pix_rate, modulo);
+		pix_rate = dc_fixpt_from_int(clk_src->ref_freq_khz);
+		pix_rate = dc_fixpt_mul_int(pix_rate, 1000);
+		pix_rate = dc_fixpt_mul_int(pix_rate, phase);
+		pix_rate = dc_fixpt_div_int(pix_rate, modulo);
 
-		return dal_fixed31_32_round(pix_rate);
+		return dc_fixpt_round(pix_rate);
 	} else {
 		return dce110_get_dp_pixel_rate_from_combo_phy_pll(cs, pix_clk_params, pll_settings);
 	}
@@ -709,12 +711,12 @@
 		const struct spread_spectrum_data *ss_data,
 		struct delta_sigma_data *ds_data)
 {
-	struct fixed32_32 fb_div;
-	struct fixed32_32 ss_amount;
-	struct fixed32_32 ss_nslip_amount;
-	struct fixed32_32 ss_ds_frac_amount;
-	struct fixed32_32 ss_step_size;
-	struct fixed32_32 modulation_time;
+	struct fixed31_32 fb_div;
+	struct fixed31_32 ss_amount;
+	struct fixed31_32 ss_nslip_amount;
+	struct fixed31_32 ss_ds_frac_amount;
+	struct fixed31_32 ss_step_size;
+	struct fixed31_32 modulation_time;
 
 	if (ds_data == NULL)
 		return false;
@@ -729,42 +731,42 @@
 
 	/* compute SS_AMOUNT_FBDIV & SS_AMOUNT_NFRAC_SLIP & SS_AMOUNT_DSFRAC*/
 	/* 6 decimal point support in fractional feedback divider */
-	fb_div  = dal_fixed32_32_from_fraction(
+	fb_div  = dc_fixpt_from_fraction(
 		pll_settings->fract_feedback_divider, 1000000);
-	fb_div = dal_fixed32_32_add_int(fb_div, pll_settings->feedback_divider);
+	fb_div = dc_fixpt_add_int(fb_div, pll_settings->feedback_divider);
 
 	ds_data->ds_frac_amount = 0;
 	/*spreadSpectrumPercentage is in the unit of .01%,
 	 * so have to divided by 100 * 100*/
-	ss_amount = dal_fixed32_32_mul(
-		fb_div, dal_fixed32_32_from_fraction(ss_data->percentage,
+	ss_amount = dc_fixpt_mul(
+		fb_div, dc_fixpt_from_fraction(ss_data->percentage,
 					100 * ss_data->percentage_divider));
-	ds_data->feedback_amount = dal_fixed32_32_floor(ss_amount);
+	ds_data->feedback_amount = dc_fixpt_floor(ss_amount);
 
-	ss_nslip_amount = dal_fixed32_32_sub(ss_amount,
-		dal_fixed32_32_from_int(ds_data->feedback_amount));
-	ss_nslip_amount = dal_fixed32_32_mul_int(ss_nslip_amount, 10);
-	ds_data->nfrac_amount = dal_fixed32_32_floor(ss_nslip_amount);
+	ss_nslip_amount = dc_fixpt_sub(ss_amount,
+		dc_fixpt_from_int(ds_data->feedback_amount));
+	ss_nslip_amount = dc_fixpt_mul_int(ss_nslip_amount, 10);
+	ds_data->nfrac_amount = dc_fixpt_floor(ss_nslip_amount);
 
-	ss_ds_frac_amount = dal_fixed32_32_sub(ss_nslip_amount,
-		dal_fixed32_32_from_int(ds_data->nfrac_amount));
-	ss_ds_frac_amount = dal_fixed32_32_mul_int(ss_ds_frac_amount, 65536);
-	ds_data->ds_frac_amount = dal_fixed32_32_floor(ss_ds_frac_amount);
+	ss_ds_frac_amount = dc_fixpt_sub(ss_nslip_amount,
+		dc_fixpt_from_int(ds_data->nfrac_amount));
+	ss_ds_frac_amount = dc_fixpt_mul_int(ss_ds_frac_amount, 65536);
+	ds_data->ds_frac_amount = dc_fixpt_floor(ss_ds_frac_amount);
 
 	/* compute SS_STEP_SIZE_DSFRAC */
-	modulation_time = dal_fixed32_32_from_fraction(
+	modulation_time = dc_fixpt_from_fraction(
 		pll_settings->reference_freq * 1000,
 		pll_settings->reference_divider * ss_data->modulation_freq_hz);
 
 	if (ss_data->flags.CENTER_SPREAD)
-		modulation_time = dal_fixed32_32_div_int(modulation_time, 4);
+		modulation_time = dc_fixpt_div_int(modulation_time, 4);
 	else
-		modulation_time = dal_fixed32_32_div_int(modulation_time, 2);
+		modulation_time = dc_fixpt_div_int(modulation_time, 2);
 
-	ss_step_size = dal_fixed32_32_div(ss_amount, modulation_time);
+	ss_step_size = dc_fixpt_div(ss_amount, modulation_time);
 	/* SS_STEP_SIZE_DSFRAC_DEC = Int(SS_STEP_SIZE * 2 ^ 16 * 10)*/
-	ss_step_size = dal_fixed32_32_mul_int(ss_step_size, 65536 * 10);
-	ds_data->ds_frac_size =  dal_fixed32_32_floor(ss_step_size);
+	ss_step_size = dc_fixpt_mul_int(ss_step_size, 65536 * 10);
+	ds_data->ds_frac_size =  dc_fixpt_floor(ss_step_size);
 
 	return true;
 }
@@ -978,6 +980,7 @@
 
 		break;
 	case DCE_VERSION_11_2:
+	case DCE_VERSION_11_22:
 	case DCE_VERSION_12_0:
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case DCN_VERSION_1_0:
@@ -1054,7 +1057,7 @@
 	struct spread_spectrum_info *ss_info_cur;
 	struct spread_spectrum_data *ss_data_cur;
 	uint32_t i;
-	struct calc_pll_clock_source *calc_pll_cs = &clk_src->calc_pll;
+	DC_LOGGER_INIT();
 	if (ss_entries_num == NULL) {
 		DC_LOG_SYNC(
 			"Invalid entry !!!\n");
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c
index 78e6beb..8a581c6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c
@@ -26,7 +26,7 @@
 #include "dce_clocks.h"
 #include "dm_services.h"
 #include "reg_helper.h"
-#include "fixed32_32.h"
+#include "fixed31_32.h"
 #include "bios_parser_interface.h"
 #include "dc.h"
 #include "dmcu.h"
@@ -35,7 +35,7 @@
 #endif
 #include "core_types.h"
 #include "dc_types.h"
-
+#include "dal_asic_id.h"
 
 #define TO_DCE_CLOCKS(clocks)\
 	container_of(clocks, struct dce_disp_clk, base)
@@ -228,19 +228,19 @@
 	 generated according to average value (case as with previous ASICs)
 	  */
 	if (clk_dce->ss_on_dprefclk && clk_dce->dprefclk_ss_divider != 0) {
-		struct fixed32_32 ss_percentage = dal_fixed32_32_div_int(
-				dal_fixed32_32_from_fraction(
+		struct fixed31_32 ss_percentage = dc_fixpt_div_int(
+				dc_fixpt_from_fraction(
 						clk_dce->dprefclk_ss_percentage,
 						clk_dce->dprefclk_ss_divider), 200);
-		struct fixed32_32 adj_dp_ref_clk_khz;
+		struct fixed31_32 adj_dp_ref_clk_khz;
 
-		ss_percentage = dal_fixed32_32_sub(dal_fixed32_32_one,
+		ss_percentage = dc_fixpt_sub(dc_fixpt_one,
 								ss_percentage);
 		adj_dp_ref_clk_khz =
-			dal_fixed32_32_mul_int(
+			dc_fixpt_mul_int(
 				ss_percentage,
 				dp_ref_clk_khz);
-		dp_ref_clk_khz = dal_fixed32_32_floor(adj_dp_ref_clk_khz);
+		dp_ref_clk_khz = dc_fixpt_floor(adj_dp_ref_clk_khz);
 	}
 
 	return dp_ref_clk_khz;
@@ -256,19 +256,19 @@
 	int dp_ref_clk_khz = 600000;
 
 	if (clk_dce->ss_on_dprefclk && clk_dce->dprefclk_ss_divider != 0) {
-		struct fixed32_32 ss_percentage = dal_fixed32_32_div_int(
-				dal_fixed32_32_from_fraction(
+		struct fixed31_32 ss_percentage = dc_fixpt_div_int(
+				dc_fixpt_from_fraction(
 						clk_dce->dprefclk_ss_percentage,
 						clk_dce->dprefclk_ss_divider), 200);
-		struct fixed32_32 adj_dp_ref_clk_khz;
+		struct fixed31_32 adj_dp_ref_clk_khz;
 
-		ss_percentage = dal_fixed32_32_sub(dal_fixed32_32_one,
+		ss_percentage = dc_fixpt_sub(dc_fixpt_one,
 								ss_percentage);
 		adj_dp_ref_clk_khz =
-			dal_fixed32_32_mul_int(
+			dc_fixpt_mul_int(
 				ss_percentage,
 				dp_ref_clk_khz);
-		dp_ref_clk_khz = dal_fixed32_32_floor(adj_dp_ref_clk_khz);
+		dp_ref_clk_khz = dc_fixpt_floor(adj_dp_ref_clk_khz);
 	}
 
 	return dp_ref_clk_khz;
@@ -413,9 +413,12 @@
 	/*VBIOS will determine DPREFCLK frequency, so we don't set it*/
 	dce_clk_params.target_clock_frequency = 0;
 	dce_clk_params.clock_type = DCECLOCK_TYPE_DPREFCLK;
-	dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK =
+	if (!ASICREV_IS_VEGA20_P(clk->ctx->asic_id.hw_internal_rev))
+		dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK =
 			(dce_clk_params.pll_id ==
 					CLOCK_SOURCE_COMBO_DISPLAY_PLL0);
+	else
+		dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = false;
 
 	bp->funcs->set_dce_clock(bp, &dce_clk_params);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
index 2ee3d9b..a576b8b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
@@ -28,7 +28,7 @@
 #include "dce_dmcu.h"
 #include "dm_services.h"
 #include "reg_helper.h"
-#include "fixed32_32.h"
+#include "fixed31_32.h"
 #include "dc.h"
 
 #define TO_DCE_DMCU(dmcu)\
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c
index 4877243..0275d6d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.c
@@ -53,7 +53,8 @@
 	struct dce_hwseq *hws = dc->hwseq;
 
 	/* Not lock pipe when blank */
-	if (lock && pipe->stream_res.tg->funcs->is_blanked(pipe->stream_res.tg))
+	if (lock && pipe->stream_res.tg->funcs->is_blanked &&
+	    pipe->stream_res.tg->funcs->is_blanked(pipe->stream_res.tg))
 		return;
 
 	val = REG_GET_4(BLND_V_UPDATE_LOCK[pipe->stream_res.tg->inst],
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c b/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c
index d737e91..5d9506b 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_ipp.c
@@ -195,13 +195,13 @@
 
 	for (i = 0; i < gamma->num_entries; i++) {
 		REG_SET(DC_LUT_SEQ_COLOR, 0, DC_LUT_SEQ_COLOR,
-				dal_fixed31_32_round(
+				dc_fixpt_round(
 					gamma->entries.red[i]));
 		REG_SET(DC_LUT_SEQ_COLOR, 0, DC_LUT_SEQ_COLOR,
-				dal_fixed31_32_round(
+				dc_fixpt_round(
 					gamma->entries.green[i]));
 		REG_SET(DC_LUT_SEQ_COLOR, 0, DC_LUT_SEQ_COLOR,
-				dal_fixed31_32_round(
+				dc_fixpt_round(
 					gamma->entries.blue[i]));
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
index 8167cad..dbe3b26 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c
@@ -113,6 +113,7 @@
 	.connect_dig_be_to_fe = dce110_link_encoder_connect_dig_be_to_fe,
 	.enable_hpd = dce110_link_encoder_enable_hpd,
 	.disable_hpd = dce110_link_encoder_disable_hpd,
+	.is_dig_enabled = dce110_is_dig_enabled,
 	.destroy = dce110_link_encoder_destroy
 };
 
@@ -535,8 +536,9 @@
 		DP_SEC_GSP0_PRIORITY, 1);
 }
 
-static bool is_dig_enabled(const struct dce110_link_encoder *enc110)
+bool dce110_is_dig_enabled(struct link_encoder *enc)
 {
+	struct dce110_link_encoder *enc110 = TO_DCE110_LINK_ENC(enc);
 	uint32_t value;
 
 	REG_GET(DIG_BE_EN_CNTL, DIG_ENABLE, &value);
@@ -1031,7 +1033,7 @@
 	struct bp_transmitter_control cntl = { 0 };
 	enum bp_result result;
 
-	if (!is_dig_enabled(enc110)) {
+	if (!dce110_is_dig_enabled(enc)) {
 		/* OF_SKIP_POWER_DOWN_INACTIVE_ENCODER */
 		return;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
index 0ec3433..3470694 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.h
@@ -263,4 +263,6 @@
 void dce110_psr_program_secondary_packet(struct link_encoder *enc,
 			unsigned int sdp_transmit_line_num_deadline);
 
+bool dce110_is_dig_enabled(struct link_encoder *enc);
+
 #endif /* __DC_LINK_ENCODER__DCE110_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
index 0790f25..b235a75 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
@@ -174,6 +174,25 @@
 		URGENCY_HIGH_WATERMARK, urgency_high_wm);
 }
 
+static void dce120_program_urgency_watermark(
+	struct dce_mem_input *dce_mi,
+	uint32_t wm_select,
+	uint32_t urgency_low_wm,
+	uint32_t urgency_high_wm)
+{
+	REG_UPDATE(DPG_WATERMARK_MASK_CONTROL,
+		URGENCY_WATERMARK_MASK, wm_select);
+
+	REG_SET_2(DPG_PIPE_URGENCY_CONTROL, 0,
+		URGENCY_LOW_WATERMARK, urgency_low_wm,
+		URGENCY_HIGH_WATERMARK, urgency_high_wm);
+
+	REG_SET_2(DPG_PIPE_URGENT_LEVEL_CONTROL, 0,
+		URGENT_LEVEL_LOW_WATERMARK, urgency_low_wm,
+		URGENT_LEVEL_HIGH_WATERMARK, urgency_high_wm);
+
+}
+
 static void program_nbp_watermark(
 	struct dce_mem_input *dce_mi,
 	uint32_t wm_select,
@@ -206,6 +225,25 @@
 	}
 }
 
+static void dce120_program_stutter_watermark(
+	struct dce_mem_input *dce_mi,
+	uint32_t wm_select,
+	uint32_t stutter_mark,
+	uint32_t stutter_entry)
+{
+	REG_UPDATE(DPG_WATERMARK_MASK_CONTROL,
+		STUTTER_EXIT_SELF_REFRESH_WATERMARK_MASK, wm_select);
+
+	if (REG(DPG_PIPE_STUTTER_CONTROL2))
+		REG_UPDATE_2(DPG_PIPE_STUTTER_CONTROL2,
+				STUTTER_EXIT_SELF_REFRESH_WATERMARK, stutter_mark,
+				STUTTER_ENTER_SELF_REFRESH_WATERMARK, stutter_entry);
+	else
+		REG_UPDATE_2(DPG_PIPE_STUTTER_CONTROL,
+				STUTTER_EXIT_SELF_REFRESH_WATERMARK, stutter_mark,
+				STUTTER_ENTER_SELF_REFRESH_WATERMARK, stutter_entry);
+}
+
 static void program_stutter_watermark(
 	struct dce_mem_input *dce_mi,
 	uint32_t wm_select,
@@ -225,7 +263,8 @@
 static void dce_mi_program_display_marks(
 	struct mem_input *mi,
 	struct dce_watermarks nbp,
-	struct dce_watermarks stutter,
+	struct dce_watermarks stutter_exit,
+	struct dce_watermarks stutter_enter,
 	struct dce_watermarks urgent,
 	uint32_t total_dest_line_time_ns)
 {
@@ -243,13 +282,14 @@
 	program_nbp_watermark(dce_mi, 2, nbp.a_mark); /* set a */
 	program_nbp_watermark(dce_mi, 1, nbp.d_mark); /* set d */
 
-	program_stutter_watermark(dce_mi, 2, stutter.a_mark); /* set a */
-	program_stutter_watermark(dce_mi, 1, stutter.d_mark); /* set d */
+	program_stutter_watermark(dce_mi, 2, stutter_exit.a_mark); /* set a */
+	program_stutter_watermark(dce_mi, 1, stutter_exit.d_mark); /* set d */
 }
 
-static void dce120_mi_program_display_marks(struct mem_input *mi,
+static void dce112_mi_program_display_marks(struct mem_input *mi,
 	struct dce_watermarks nbp,
-	struct dce_watermarks stutter,
+	struct dce_watermarks stutter_exit,
+	struct dce_watermarks stutter_entry,
 	struct dce_watermarks urgent,
 	uint32_t total_dest_line_time_ns)
 {
@@ -273,10 +313,43 @@
 	program_nbp_watermark(dce_mi, 2, nbp.c_mark); /* set c */
 	program_nbp_watermark(dce_mi, 3, nbp.d_mark); /* set d */
 
-	program_stutter_watermark(dce_mi, 0, stutter.a_mark); /* set a */
-	program_stutter_watermark(dce_mi, 1, stutter.b_mark); /* set b */
-	program_stutter_watermark(dce_mi, 2, stutter.c_mark); /* set c */
-	program_stutter_watermark(dce_mi, 3, stutter.d_mark); /* set d */
+	program_stutter_watermark(dce_mi, 0, stutter_exit.a_mark); /* set a */
+	program_stutter_watermark(dce_mi, 1, stutter_exit.b_mark); /* set b */
+	program_stutter_watermark(dce_mi, 2, stutter_exit.c_mark); /* set c */
+	program_stutter_watermark(dce_mi, 3, stutter_exit.d_mark); /* set d */
+}
+
+static void dce120_mi_program_display_marks(struct mem_input *mi,
+	struct dce_watermarks nbp,
+	struct dce_watermarks stutter_exit,
+	struct dce_watermarks stutter_entry,
+	struct dce_watermarks urgent,
+	uint32_t total_dest_line_time_ns)
+{
+	struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi);
+	uint32_t stutter_en = mi->ctx->dc->debug.disable_stutter ? 0 : 1;
+
+	dce120_program_urgency_watermark(dce_mi, 0, /* set a */
+			urgent.a_mark, total_dest_line_time_ns);
+	dce120_program_urgency_watermark(dce_mi, 1, /* set b */
+			urgent.b_mark, total_dest_line_time_ns);
+	dce120_program_urgency_watermark(dce_mi, 2, /* set c */
+			urgent.c_mark, total_dest_line_time_ns);
+	dce120_program_urgency_watermark(dce_mi, 3, /* set d */
+			urgent.d_mark, total_dest_line_time_ns);
+
+	REG_UPDATE_2(DPG_PIPE_STUTTER_CONTROL,
+		STUTTER_ENABLE, stutter_en,
+		STUTTER_IGNORE_FBC, 1);
+	program_nbp_watermark(dce_mi, 0, nbp.a_mark); /* set a */
+	program_nbp_watermark(dce_mi, 1, nbp.b_mark); /* set b */
+	program_nbp_watermark(dce_mi, 2, nbp.c_mark); /* set c */
+	program_nbp_watermark(dce_mi, 3, nbp.d_mark); /* set d */
+
+	dce120_program_stutter_watermark(dce_mi, 0, stutter_exit.a_mark, stutter_entry.a_mark); /* set a */
+	dce120_program_stutter_watermark(dce_mi, 1, stutter_exit.b_mark, stutter_entry.b_mark); /* set b */
+	dce120_program_stutter_watermark(dce_mi, 2, stutter_exit.c_mark, stutter_entry.c_mark); /* set c */
+	dce120_program_stutter_watermark(dce_mi, 3, stutter_exit.d_mark, stutter_entry.d_mark); /* set d */
 }
 
 static void program_tiling(
@@ -696,5 +769,17 @@
 	const struct dce_mem_input_mask *mi_mask)
 {
 	dce_mem_input_construct(dce_mi, ctx, inst, regs, mi_shift, mi_mask);
+	dce_mi->base.funcs->mem_input_program_display_marks = dce112_mi_program_display_marks;
+}
+
+void dce120_mem_input_construct(
+	struct dce_mem_input *dce_mi,
+	struct dc_context *ctx,
+	int inst,
+	const struct dce_mem_input_registers *regs,
+	const struct dce_mem_input_shift *mi_shift,
+	const struct dce_mem_input_mask *mi_mask)
+{
+	dce_mem_input_construct(dce_mi, ctx, inst, regs, mi_shift, mi_mask);
 	dce_mi->base.funcs->mem_input_program_display_marks = dce120_mi_program_display_marks;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h
index 05d39c0..d15b0d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.h
@@ -106,6 +106,7 @@
 	uint32_t DPG_PIPE_ARBITRATION_CONTROL1;
 	uint32_t DPG_WATERMARK_MASK_CONTROL;
 	uint32_t DPG_PIPE_URGENCY_CONTROL;
+	uint32_t DPG_PIPE_URGENT_LEVEL_CONTROL;
 	uint32_t DPG_PIPE_NB_PSTATE_CHANGE_CONTROL;
 	uint32_t DPG_PIPE_LOW_POWER_CONTROL;
 	uint32_t DPG_PIPE_STUTTER_CONTROL;
@@ -213,6 +214,11 @@
 
 #define MI_DCE12_DMIF_PG_MASK_SH_LIST(mask_sh, blk)\
 	SFB(blk, DPG_PIPE_STUTTER_CONTROL2, STUTTER_EXIT_SELF_REFRESH_WATERMARK, mask_sh),\
+	SFB(blk, DPG_PIPE_STUTTER_CONTROL2, STUTTER_ENTER_SELF_REFRESH_WATERMARK, mask_sh),\
+	SFB(blk, DPG_PIPE_URGENT_LEVEL_CONTROL, URGENT_LEVEL_LOW_WATERMARK, mask_sh),\
+	SFB(blk, DPG_PIPE_URGENT_LEVEL_CONTROL, URGENT_LEVEL_HIGH_WATERMARK, mask_sh),\
+	SFB(blk, DPG_PIPE_URGENCY_CONTROL, URGENCY_LOW_WATERMARK, mask_sh),\
+	SFB(blk, DPG_PIPE_URGENCY_CONTROL, URGENCY_HIGH_WATERMARK, mask_sh),\
 	SFB(blk, DPG_WATERMARK_MASK_CONTROL, PSTATE_CHANGE_WATERMARK_MASK, mask_sh),\
 	SFB(blk, DPG_PIPE_LOW_POWER_CONTROL, PSTATE_CHANGE_ENABLE, mask_sh),\
 	SFB(blk, DPG_PIPE_LOW_POWER_CONTROL, PSTATE_CHANGE_URGENT_DURING_REQUEST, mask_sh),\
@@ -286,6 +292,8 @@
 	type STUTTER_EXIT_SELF_REFRESH_WATERMARK_MASK; \
 	type URGENCY_LOW_WATERMARK; \
 	type URGENCY_HIGH_WATERMARK; \
+	type URGENT_LEVEL_LOW_WATERMARK;\
+	type URGENT_LEVEL_HIGH_WATERMARK;\
 	type NB_PSTATE_CHANGE_ENABLE; \
 	type NB_PSTATE_CHANGE_URGENT_DURING_REQUEST; \
 	type NB_PSTATE_CHANGE_NOT_SELF_REFRESH_DURING_REQUEST; \
@@ -297,6 +305,7 @@
 	type STUTTER_ENABLE; \
 	type STUTTER_IGNORE_FBC; \
 	type STUTTER_EXIT_SELF_REFRESH_WATERMARK; \
+	type STUTTER_ENTER_SELF_REFRESH_WATERMARK; \
 	type DMIF_BUFFERS_ALLOCATED; \
 	type DMIF_BUFFERS_ALLOCATION_COMPLETED; \
 	type ENABLE; /* MC_HUB_RDREQ_DMIF_LIMIT */\
@@ -344,4 +353,12 @@
 	const struct dce_mem_input_shift *mi_shift,
 	const struct dce_mem_input_mask *mi_mask);
 
+void dce120_mem_input_construct(
+	struct dce_mem_input *dce_mi,
+	struct dc_context *ctx,
+	int inst,
+	const struct dce_mem_input_registers *regs,
+	const struct dce_mem_input_shift *mi_shift,
+	const struct dce_mem_input_mask *mi_mask);
+
 #endif /*__DCE_MEM_INPUT_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c b/drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c
index 6243450..48862be 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_scl_filters.c
@@ -1014,11 +1014,11 @@
 
 const uint16_t *get_filter_3tap_16p(struct fixed31_32 ratio)
 {
-	if (ratio.value < dal_fixed31_32_one.value)
+	if (ratio.value < dc_fixpt_one.value)
 		return filter_3tap_16p_upscale;
-	else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
 		return filter_3tap_16p_117;
-	else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
 		return filter_3tap_16p_150;
 	else
 		return filter_3tap_16p_183;
@@ -1026,11 +1026,11 @@
 
 const uint16_t *get_filter_3tap_64p(struct fixed31_32 ratio)
 {
-	if (ratio.value < dal_fixed31_32_one.value)
+	if (ratio.value < dc_fixpt_one.value)
 		return filter_3tap_64p_upscale;
-	else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
 		return filter_3tap_64p_117;
-	else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
 		return filter_3tap_64p_150;
 	else
 		return filter_3tap_64p_183;
@@ -1038,11 +1038,11 @@
 
 const uint16_t *get_filter_4tap_16p(struct fixed31_32 ratio)
 {
-	if (ratio.value < dal_fixed31_32_one.value)
+	if (ratio.value < dc_fixpt_one.value)
 		return filter_4tap_16p_upscale;
-	else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
 		return filter_4tap_16p_117;
-	else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
 		return filter_4tap_16p_150;
 	else
 		return filter_4tap_16p_183;
@@ -1050,11 +1050,11 @@
 
 const uint16_t *get_filter_4tap_64p(struct fixed31_32 ratio)
 {
-	if (ratio.value < dal_fixed31_32_one.value)
+	if (ratio.value < dc_fixpt_one.value)
 		return filter_4tap_64p_upscale;
-	else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
 		return filter_4tap_64p_117;
-	else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
 		return filter_4tap_64p_150;
 	else
 		return filter_4tap_64p_183;
@@ -1062,11 +1062,11 @@
 
 const uint16_t *get_filter_5tap_64p(struct fixed31_32 ratio)
 {
-	if (ratio.value < dal_fixed31_32_one.value)
+	if (ratio.value < dc_fixpt_one.value)
 		return filter_5tap_64p_upscale;
-	else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
 		return filter_5tap_64p_117;
-	else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
 		return filter_5tap_64p_150;
 	else
 		return filter_5tap_64p_183;
@@ -1074,11 +1074,11 @@
 
 const uint16_t *get_filter_6tap_64p(struct fixed31_32 ratio)
 {
-	if (ratio.value < dal_fixed31_32_one.value)
+	if (ratio.value < dc_fixpt_one.value)
 		return filter_6tap_64p_upscale;
-	else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
 		return filter_6tap_64p_117;
-	else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
 		return filter_6tap_64p_150;
 	else
 		return filter_6tap_64p_183;
@@ -1086,11 +1086,11 @@
 
 const uint16_t *get_filter_7tap_64p(struct fixed31_32 ratio)
 {
-	if (ratio.value < dal_fixed31_32_one.value)
+	if (ratio.value < dc_fixpt_one.value)
 		return filter_7tap_64p_upscale;
-	else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
 		return filter_7tap_64p_117;
-	else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
 		return filter_7tap_64p_150;
 	else
 		return filter_7tap_64p_183;
@@ -1098,11 +1098,11 @@
 
 const uint16_t *get_filter_8tap_64p(struct fixed31_32 ratio)
 {
-	if (ratio.value < dal_fixed31_32_one.value)
+	if (ratio.value < dc_fixpt_one.value)
 		return filter_8tap_64p_upscale;
-	else if (ratio.value < dal_fixed31_32_from_fraction(4, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(4, 3).value)
 		return filter_8tap_64p_117;
-	else if (ratio.value < dal_fixed31_32_from_fraction(5, 3).value)
+	else if (ratio.value < dc_fixpt_from_fraction(5, 3).value)
 		return filter_8tap_64p_150;
 	else
 		return filter_8tap_64p_183;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
index 162f6a6..0a6d483 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
@@ -26,27 +26,10 @@
 #include "dc_bios_types.h"
 #include "dce_stream_encoder.h"
 #include "reg_helper.h"
+#include "hw_shared.h"
+
 #define DC_LOGGER \
 		enc110->base.ctx->logger
-enum DP_PIXEL_ENCODING {
-DP_PIXEL_ENCODING_RGB444                 = 0x00000000,
-DP_PIXEL_ENCODING_YCBCR422               = 0x00000001,
-DP_PIXEL_ENCODING_YCBCR444               = 0x00000002,
-DP_PIXEL_ENCODING_RGB_WIDE_GAMUT         = 0x00000003,
-DP_PIXEL_ENCODING_Y_ONLY                 = 0x00000004,
-DP_PIXEL_ENCODING_YCBCR420               = 0x00000005,
-DP_PIXEL_ENCODING_RESERVED               = 0x00000006,
-};
-
-
-enum DP_COMPONENT_DEPTH {
-DP_COMPONENT_DEPTH_6BPC                  = 0x00000000,
-DP_COMPONENT_DEPTH_8BPC                  = 0x00000001,
-DP_COMPONENT_DEPTH_10BPC                 = 0x00000002,
-DP_COMPONENT_DEPTH_12BPC                 = 0x00000003,
-DP_COMPONENT_DEPTH_16BPC                 = 0x00000004,
-DP_COMPONENT_DEPTH_RESERVED              = 0x00000005,
-};
 
 
 #define REG(reg)\
@@ -80,7 +63,7 @@
 static void dce110_update_generic_info_packet(
 	struct dce110_stream_encoder *enc110,
 	uint32_t packet_index,
-	const struct encoder_info_packet *info_packet)
+	const struct dc_info_packet *info_packet)
 {
 	uint32_t regval;
 	/* TODOFPGA Figure out a proper number for max_retries polling for lock
@@ -196,7 +179,7 @@
 static void dce110_update_hdmi_info_packet(
 	struct dce110_stream_encoder *enc110,
 	uint32_t packet_index,
-	const struct encoder_info_packet *info_packet)
+	const struct dc_info_packet *info_packet)
 {
 	uint32_t cont, send, line;
 
@@ -314,11 +297,11 @@
 	switch (crtc_timing->pixel_encoding) {
 	case PIXEL_ENCODING_YCBCR422:
 		REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
-				DP_PIXEL_ENCODING_YCBCR422);
+				DP_PIXEL_ENCODING_TYPE_YCBCR422);
 		break;
 	case PIXEL_ENCODING_YCBCR444:
 		REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
-				DP_PIXEL_ENCODING_YCBCR444);
+				DP_PIXEL_ENCODING_TYPE_YCBCR444);
 
 		if (crtc_timing->flags.Y_ONLY)
 			if (crtc_timing->display_color_depth != COLOR_DEPTH_666)
@@ -326,7 +309,7 @@
 				 * Color depth of Y-only could be
 				 * 8, 10, 12, 16 bits */
 				REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
-						DP_PIXEL_ENCODING_Y_ONLY);
+						DP_PIXEL_ENCODING_TYPE_Y_ONLY);
 		/* Note: DP_MSA_MISC1 bit 7 is the indicator
 		 * of Y-only mode.
 		 * This bit is set in HW if register
@@ -334,7 +317,7 @@
 		break;
 	case PIXEL_ENCODING_YCBCR420:
 		REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
-				DP_PIXEL_ENCODING_YCBCR420);
+				DP_PIXEL_ENCODING_TYPE_YCBCR420);
 		if (enc110->se_mask->DP_VID_M_DOUBLE_VALUE_EN)
 			REG_UPDATE(DP_VID_TIMING, DP_VID_M_DOUBLE_VALUE_EN, 1);
 
@@ -345,7 +328,7 @@
 		break;
 	default:
 		REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
-				DP_PIXEL_ENCODING_RGB444);
+				DP_PIXEL_ENCODING_TYPE_RGB444);
 		break;
 	}
 
@@ -363,20 +346,20 @@
 		break;
 	case COLOR_DEPTH_888:
 		REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
-				DP_COMPONENT_DEPTH_8BPC);
+				DP_COMPONENT_PIXEL_DEPTH_8BPC);
 		break;
 	case COLOR_DEPTH_101010:
 		REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
-				DP_COMPONENT_DEPTH_10BPC);
+				DP_COMPONENT_PIXEL_DEPTH_10BPC);
 
 		break;
 	case COLOR_DEPTH_121212:
 		REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
-				DP_COMPONENT_DEPTH_12BPC);
+				DP_COMPONENT_PIXEL_DEPTH_12BPC);
 		break;
 	default:
 		REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
-				DP_COMPONENT_DEPTH_6BPC);
+				DP_COMPONENT_PIXEL_DEPTH_6BPC);
 		break;
 	}
 
@@ -700,11 +683,11 @@
 	struct fixed31_32 avg_time_slots_per_mtp)
 {
 	struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc);
-	uint32_t x = dal_fixed31_32_floor(
+	uint32_t x = dc_fixpt_floor(
 		avg_time_slots_per_mtp);
-	uint32_t y = dal_fixed31_32_ceil(
-		dal_fixed31_32_shl(
-			dal_fixed31_32_sub_int(
+	uint32_t y = dc_fixpt_ceil(
+		dc_fixpt_shl(
+			dc_fixpt_sub_int(
 				avg_time_slots_per_mtp,
 				x),
 			26));
@@ -836,7 +819,7 @@
 	const struct encoder_info_frame *info_frame)
 {
 	struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc);
-	uint32_t value = REG_READ(DP_SEC_CNTL);
+	uint32_t value = 0;
 
 	if (info_frame->vsc.valid)
 		dce110_update_generic_info_packet(
@@ -870,6 +853,7 @@
 	* Therefore we need to enable master bit
 	* if at least on of the fields is not 0
 	*/
+	value = REG_READ(DP_SEC_CNTL);
 	if (value)
 		REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
 }
@@ -879,7 +863,7 @@
 {
 	/* stop generic packets on DP */
 	struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc);
-	uint32_t value = REG_READ(DP_SEC_CNTL);
+	uint32_t value = 0;
 
 	if (enc110->se_mask->DP_SEC_AVI_ENABLE) {
 		REG_SET_7(DP_SEC_CNTL, 0,
@@ -892,25 +876,10 @@
 			DP_SEC_STREAM_ENABLE, 0);
 	}
 
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
-	if (enc110->se_mask->DP_SEC_GSP7_ENABLE) {
-		REG_SET_10(DP_SEC_CNTL, 0,
-			DP_SEC_GSP0_ENABLE, 0,
-			DP_SEC_GSP1_ENABLE, 0,
-			DP_SEC_GSP2_ENABLE, 0,
-			DP_SEC_GSP3_ENABLE, 0,
-			DP_SEC_GSP4_ENABLE, 0,
-			DP_SEC_GSP5_ENABLE, 0,
-			DP_SEC_GSP6_ENABLE, 0,
-			DP_SEC_GSP7_ENABLE, 0,
-			DP_SEC_MPG_ENABLE, 0,
-			DP_SEC_STREAM_ENABLE, 0);
-	}
-#endif
 	/* this register shared with audio info frame.
 	 * therefore we need to keep master enabled
 	 * if at least one of the fields is not 0 */
-
+	value = REG_READ(DP_SEC_CNTL);
 	if (value)
 		REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
 
@@ -1513,7 +1482,7 @@
 	struct stream_encoder *enc)
 {
 	struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc);
-	uint32_t value = REG_READ(DP_SEC_CNTL);
+	uint32_t value = 0;
 
 	/* Disable Audio packets */
 	REG_UPDATE_5(DP_SEC_CNTL,
@@ -1525,6 +1494,7 @@
 
 	/* This register shared with encoder info frame. Therefore we need to
 	keep master enabled if at least on of the fields is not 0 */
+	value = REG_READ(DP_SEC_CNTL);
 	if (value != 0)
 		REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
index 832c5da..a02e719 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c
@@ -41,7 +41,7 @@
 #define DC_LOGGER \
 	xfm_dce->base.ctx->logger
 
-#define IDENTITY_RATIO(ratio) (dal_fixed31_32_u2d19(ratio) == (1 << 19))
+#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19))
 #define GAMUT_MATRIX_SIZE 12
 #define SCL_PHASES 16
 
@@ -256,27 +256,27 @@
 	struct fixed31_32 v_init;
 
 	inits->h_int_scale_ratio =
-		dal_fixed31_32_u2d19(data->ratios.horz) << 5;
+		dc_fixpt_u2d19(data->ratios.horz) << 5;
 	inits->v_int_scale_ratio =
-		dal_fixed31_32_u2d19(data->ratios.vert) << 5;
+		dc_fixpt_u2d19(data->ratios.vert) << 5;
 
 	h_init =
-		dal_fixed31_32_div_int(
-			dal_fixed31_32_add(
+		dc_fixpt_div_int(
+			dc_fixpt_add(
 				data->ratios.horz,
-				dal_fixed31_32_from_int(data->taps.h_taps + 1)),
+				dc_fixpt_from_int(data->taps.h_taps + 1)),
 				2);
-	inits->h_init.integer = dal_fixed31_32_floor(h_init);
-	inits->h_init.fraction = dal_fixed31_32_u0d19(h_init) << 5;
+	inits->h_init.integer = dc_fixpt_floor(h_init);
+	inits->h_init.fraction = dc_fixpt_u0d19(h_init) << 5;
 
 	v_init =
-		dal_fixed31_32_div_int(
-			dal_fixed31_32_add(
+		dc_fixpt_div_int(
+			dc_fixpt_add(
 				data->ratios.vert,
-				dal_fixed31_32_from_int(data->taps.v_taps + 1)),
+				dc_fixpt_from_int(data->taps.v_taps + 1)),
 				2);
-	inits->v_init.integer = dal_fixed31_32_floor(v_init);
-	inits->v_init.fraction = dal_fixed31_32_u0d19(v_init) << 5;
+	inits->v_init.integer = dc_fixpt_floor(v_init);
+	inits->v_init.fraction = dc_fixpt_u0d19(v_init) << 5;
 }
 
 static void program_scl_ratios_inits(
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
index 3092f76..38ec0d6 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c
@@ -733,38 +733,6 @@
 	return result;
 }
 
-enum dc_status dce100_validate_guaranteed(
-		struct dc  *dc,
-		struct dc_stream_state *dc_stream,
-		struct dc_state *context)
-{
-	enum dc_status result = DC_ERROR_UNEXPECTED;
-
-	context->streams[0] = dc_stream;
-	dc_stream_retain(context->streams[0]);
-	context->stream_count++;
-
-	result = resource_map_pool_resources(dc, context, dc_stream);
-
-	if (result == DC_OK)
-		result = resource_map_clock_resources(dc, context, dc_stream);
-
-	if (result == DC_OK)
-		result = build_mapped_resource(dc, context, dc_stream);
-
-	if (result == DC_OK) {
-		validate_guaranteed_copy_streams(
-				context, dc->caps.max_streams);
-		result = resource_build_scaling_params_for_context(dc, context);
-	}
-
-	if (result == DC_OK)
-		if (!dce100_validate_bandwidth(dc, context))
-			result = DC_FAIL_BANDWIDTH_VALIDATE;
-
-	return result;
-}
-
 static void dce100_destroy_resource_pool(struct resource_pool **pool)
 {
 	struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
@@ -786,7 +754,6 @@
 static const struct resource_funcs dce100_res_pool_funcs = {
 	.destroy = dce100_destroy_resource_pool,
 	.link_enc_create = dce100_link_encoder_create,
-	.validate_guaranteed = dce100_validate_guaranteed,
 	.validate_bandwidth = dce100_validate_bandwidth,
 	.validate_plane = dce100_validate_plane,
 	.add_stream_to_ctx = dce100_add_stream_to_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index d057599..a92fb0a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -70,8 +70,9 @@
 
 #define CTX \
 	hws->ctx
-#define DC_LOGGER \
-	ctx->logger
+
+#define DC_LOGGER_INIT()
+
 #define REG(reg)\
 	hws->regs->reg
 
@@ -279,7 +280,9 @@
 	build_prescale_params(&prescale_params, plane_state);
 	ipp->funcs->ipp_program_prescale(ipp, &prescale_params);
 
-	if (plane_state->gamma_correction && dce_use_lut(plane_state->format))
+	if (plane_state->gamma_correction &&
+			!plane_state->gamma_correction->is_identity &&
+			dce_use_lut(plane_state->format))
 		ipp->funcs->ipp_program_input_lut(ipp, plane_state->gamma_correction);
 
 	if (tf == NULL) {
@@ -506,19 +509,19 @@
 	rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
 	rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
 
-	arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
-					     dal_fixed31_32_from_int(region_start));
-	arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
-					     dal_fixed31_32_from_int(region_end));
+	arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2),
+					     dc_fixpt_from_int(region_start));
+	arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2),
+					     dc_fixpt_from_int(region_end));
 
 	y_r = rgb_resulted[0].red;
 	y_g = rgb_resulted[0].green;
 	y_b = rgb_resulted[0].blue;
 
-	y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b));
+	y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b));
 
 	arr_points[0].y = y1_min;
-	arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y,
+	arr_points[0].slope = dc_fixpt_div(arr_points[0].y,
 						 arr_points[0].x);
 
 	y_r = rgb_resulted[hw_points - 1].red;
@@ -528,21 +531,21 @@
 	/* see comment above, m_arrPoints[1].y should be the Y value for the
 	 * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
 	 */
-	y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b));
+	y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b));
 
 	arr_points[1].y = y3_max;
 
-	arr_points[1].slope = dal_fixed31_32_zero;
+	arr_points[1].slope = dc_fixpt_zero;
 
 	if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
 		/* for PQ, we want to have a straight line from last HW X point,
 		 * and the slope to be such that we hit 1.0 at 10000 nits.
 		 */
-		const struct fixed31_32 end_value = dal_fixed31_32_from_int(125);
+		const struct fixed31_32 end_value = dc_fixpt_from_int(125);
 
-		arr_points[1].slope = dal_fixed31_32_div(
-				dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y),
-				dal_fixed31_32_sub(end_value, arr_points[1].x));
+		arr_points[1].slope = dc_fixpt_div(
+				dc_fixpt_sub(dc_fixpt_one, arr_points[1].y),
+				dc_fixpt_sub(end_value, arr_points[1].x));
 	}
 
 	regamma_params->hw_points_num = hw_points;
@@ -566,16 +569,16 @@
 	i = 1;
 
 	while (i != hw_points + 1) {
-		if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red))
+		if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
 			rgb_plus_1->red = rgb->red;
-		if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green))
+		if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
 			rgb_plus_1->green = rgb->green;
-		if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue))
+		if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
 			rgb_plus_1->blue = rgb->blue;
 
-		rgb->delta_red = dal_fixed31_32_sub(rgb_plus_1->red, rgb->red);
-		rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green);
-		rgb->delta_blue = dal_fixed31_32_sub(rgb_plus_1->blue, rgb->blue);
+		rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red);
+		rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
+		rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue);
 
 		++rgb_plus_1;
 		++rgb;
@@ -851,6 +854,28 @@
 
 	if (power_up != is_panel_powered_on(hwseq)) {
 		/* Send VBIOS command to prompt eDP panel power */
+		if (power_up) {
+			unsigned long long current_ts = dm_get_timestamp(ctx);
+			unsigned long long duration_in_ms =
+					dm_get_elapse_time_in_ns(
+							ctx,
+							current_ts,
+							div64_u64(link->link_trace.time_stamp.edp_poweroff, 1000000));
+			unsigned long long wait_time_ms = 0;
+
+			/* max 500ms from LCDVDD off to on */
+			if (link->link_trace.time_stamp.edp_poweroff == 0)
+				wait_time_ms = 500;
+			else if (duration_in_ms < 500)
+				wait_time_ms = 500 - duration_in_ms;
+
+			if (wait_time_ms) {
+				msleep(wait_time_ms);
+				dm_output_to_console("%s: wait %lld ms to power on eDP.\n",
+						__func__, wait_time_ms);
+			}
+
+		}
 
 		DC_LOG_HW_RESUME_S3(
 				"%s: Panel Power action: %s\n",
@@ -864,9 +889,14 @@
 		cntl.coherent = false;
 		cntl.lanes_number = LANE_COUNT_FOUR;
 		cntl.hpd_sel = link->link_enc->hpd_source;
-
 		bp_result = link_transmitter_control(ctx->dc_bios, &cntl);
 
+		if (!power_up)
+			/*save driver power off time stamp*/
+			link->link_trace.time_stamp.edp_poweroff = dm_get_timestamp(ctx);
+		else
+			link->link_trace.time_stamp.edp_poweron = dm_get_timestamp(ctx);
+
 		if (bp_result != BP_RESULT_OK)
 			DC_LOG_ERROR(
 					"%s: Panel Power bp_result: %d\n",
@@ -1011,7 +1041,7 @@
 
 	if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
 		link->dc->hwss.edp_backlight_control(link, true);
-		stream->bl_pwm_level = 0;
+		stream->bl_pwm_level = EDP_BACKLIGHT_RAMP_DISABLE_LEVEL;
 	}
 }
 void dce110_blank_stream(struct pipe_ctx *pipe_ctx)
@@ -1203,7 +1233,7 @@
 		&pipe_ctx->plane_res.scl_data);
 }
 
-static enum dc_status dce110_prog_pixclk_crtc_otg(
+static enum dc_status dce110_enable_stream_timing(
 		struct pipe_ctx *pipe_ctx,
 		struct dc_state *context,
 		struct dc *dc)
@@ -1269,7 +1299,7 @@
 			pipe_ctx[pipe_ctx->pipe_idx];
 
 	/*  */
-	dc->hwss.prog_pixclk_crtc_otg(pipe_ctx, context, dc);
+	dc->hwss.enable_stream_timing(pipe_ctx, context, dc);
 
 	/* FPGA does not program backend */
 	if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) {
@@ -1441,6 +1471,17 @@
 	}
 }
 
+static struct dc_link *get_link_for_edp(struct dc *dc)
+{
+	int i;
+
+	for (i = 0; i < dc->link_count; i++) {
+		if (dc->links[i]->connector_signal == SIGNAL_TYPE_EDP)
+			return dc->links[i];
+	}
+	return NULL;
+}
+
 static struct dc_link *get_link_for_edp_not_in_use(
 		struct dc *dc,
 		struct dc_state *context)
@@ -1475,20 +1516,21 @@
  */
 void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
 {
-	struct dc_bios *dcb = dc->ctx->dc_bios;
-
-	/* vbios already light up eDP, so we can leverage vbios and skip eDP
-	 * programming
-	 */
-	bool can_eDP_fast_boot_optimize =
-			(dcb->funcs->get_vga_enabled_displays(dc->ctx->dc_bios) == ATOM_DISPLAY_LCD1_ACTIVE);
-
-	/* if OS doesn't light up eDP and eDP link is available, we want to disable */
 	struct dc_link *edp_link_to_turnoff = NULL;
+	struct dc_link *edp_link = get_link_for_edp(dc);
+	bool can_eDP_fast_boot_optimize = false;
+
+	if (edp_link) {
+		can_eDP_fast_boot_optimize =
+				edp_link->link_enc->funcs->is_dig_enabled(edp_link->link_enc);
+	}
 
 	if (can_eDP_fast_boot_optimize) {
 		edp_link_to_turnoff = get_link_for_edp_not_in_use(dc, context);
 
+		/* if OS doesn't light up eDP and eDP link is available, we want to disable
+		 * If resume from S4/S5, should optimization.
+		 */
 		if (!edp_link_to_turnoff)
 			dc->apply_edp_fast_boot_optimization = true;
 	}
@@ -1544,6 +1586,7 @@
 			pipe_ctx->plane_res.mi,
 			context->bw.dce.nbp_state_change_wm_ns[num_pipes],
 			context->bw.dce.stutter_exit_wm_ns[num_pipes],
+			context->bw.dce.stutter_entry_wm_ns[num_pipes],
 			context->bw.dce.urgent_wm_ns[num_pipes],
 			total_dest_line_time_ns);
 		if (i == underlay_idx) {
@@ -1569,6 +1612,7 @@
 		MAX_WATERMARK, MAX_WATERMARK, MAX_WATERMARK, MAX_WATERMARK };
 	struct dce_watermarks nbp_marks = {
 		SAFE_NBP_MARK, SAFE_NBP_MARK, SAFE_NBP_MARK, SAFE_NBP_MARK };
+	struct dce_watermarks min_marks = { 0, 0, 0, 0};
 
 	for (i = 0; i < MAX_PIPES; i++) {
 		if (res_ctx->pipe_ctx[i].stream == NULL || res_ctx->pipe_ctx[i].plane_res.mi == NULL)
@@ -1578,6 +1622,7 @@
 				res_ctx->pipe_ctx[i].plane_res.mi,
 				nbp_marks,
 				max_marks,
+				min_marks,
 				max_marks,
 				MAX_WATERMARK);
 
@@ -1803,6 +1848,9 @@
 		}
 	}
 
+	/* Pipe context should be found */
+	ASSERT(pipe_ctx);
+
 	/* Only supports eDP */
 	if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP)
 		return false;
@@ -2221,74 +2269,6 @@
 
 	pipe_ctx->plane_res.xfm->funcs->transform_set_gamut_remap(pipe_ctx->plane_res.xfm, &adjust);
 }
-
-/**
- * TODO REMOVE, USE UPDATE INSTEAD
- */
-static void set_plane_config(
-	const struct dc *dc,
-	struct pipe_ctx *pipe_ctx,
-	struct resource_context *res_ctx)
-{
-	struct mem_input *mi = pipe_ctx->plane_res.mi;
-	struct dc_plane_state *plane_state = pipe_ctx->plane_state;
-	struct xfm_grph_csc_adjustment adjust;
-	struct out_csc_color_matrix tbl_entry;
-	unsigned int i;
-
-	memset(&adjust, 0, sizeof(adjust));
-	memset(&tbl_entry, 0, sizeof(tbl_entry));
-	adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
-
-	dce_enable_fe_clock(dc->hwseq, mi->inst, true);
-
-	set_default_colors(pipe_ctx);
-	if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) {
-		tbl_entry.color_space =
-			pipe_ctx->stream->output_color_space;
-
-		for (i = 0; i < 12; i++)
-			tbl_entry.regval[i] =
-			pipe_ctx->stream->csc_color_matrix.matrix[i];
-
-		pipe_ctx->plane_res.xfm->funcs->opp_set_csc_adjustment
-				(pipe_ctx->plane_res.xfm, &tbl_entry);
-	}
-
-	if (pipe_ctx->stream->gamut_remap_matrix.enable_remap == true) {
-		adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
-
-		for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
-			adjust.temperature_matrix[i] =
-				pipe_ctx->stream->gamut_remap_matrix.matrix[i];
-	}
-
-	pipe_ctx->plane_res.xfm->funcs->transform_set_gamut_remap(pipe_ctx->plane_res.xfm, &adjust);
-
-	pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != 0;
-	program_scaler(dc, pipe_ctx);
-
-	program_surface_visibility(dc, pipe_ctx);
-
-	mi->funcs->mem_input_program_surface_config(
-			mi,
-			plane_state->format,
-			&plane_state->tiling_info,
-			&plane_state->plane_size,
-			plane_state->rotation,
-			NULL,
-			false);
-	if (mi->funcs->set_blank)
-		mi->funcs->set_blank(mi, pipe_ctx->plane_state->visible);
-
-	if (dc->config.gpu_vm_support)
-		mi->funcs->mem_input_program_pte_vm(
-				pipe_ctx->plane_res.mi,
-				plane_state->format,
-				&plane_state->tiling_info,
-				plane_state->rotation);
-}
-
 static void update_plane_addr(const struct dc *dc,
 		struct pipe_ctx *pipe_ctx)
 {
@@ -2699,8 +2679,11 @@
 	struct dc_plane_state *plane_state = pipe_ctx->plane_state;
 	struct xfm_grph_csc_adjustment adjust;
 	struct out_csc_color_matrix tbl_entry;
+#if defined(CONFIG_DRM_AMD_DC_FBC)
+	unsigned int underlay_idx = dc->res_pool->underlay_pipe_index;
+#endif
 	unsigned int i;
-	struct dc_context *ctx = dc->ctx;
+	DC_LOGGER_INIT();
 	memset(&tbl_entry, 0, sizeof(tbl_entry));
 
 	if (dc->current_state)
@@ -2740,7 +2723,9 @@
 	program_scaler(dc, pipe_ctx);
 
 #if defined(CONFIG_DRM_AMD_DC_FBC)
-	if (dc->fbc_compressor && old_pipe->stream) {
+	/* fbc not applicable on Underlay pipe */
+	if (dc->fbc_compressor && old_pipe->stream &&
+	    pipe_ctx->pipe_idx != underlay_idx) {
 		if (plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL)
 			dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor);
 		else
@@ -2776,13 +2761,13 @@
 		dc->hwss.set_output_transfer_func(pipe_ctx, pipe_ctx->stream);
 
 	DC_LOG_SURFACE(
-			"Pipe:%d 0x%x: addr hi:0x%x, "
+			"Pipe:%d %p: addr hi:0x%x, "
 			"addr low:0x%x, "
 			"src: %d, %d, %d,"
 			" %d; dst: %d, %d, %d, %d;"
 			"clip: %d, %d, %d, %d\n",
 			pipe_ctx->pipe_idx,
-			pipe_ctx->plane_state,
+			(void *) pipe_ctx->plane_state,
 			pipe_ctx->plane_state->address.grph.addr.high_part,
 			pipe_ctx->plane_state->address.grph.addr.low_part,
 			pipe_ctx->plane_state->src_rect.x,
@@ -2970,7 +2955,6 @@
 	.init_hw = init_hw,
 	.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
 	.apply_ctx_for_surface = dce110_apply_ctx_for_surface,
-	.set_plane_config = set_plane_config,
 	.update_plane_addr = update_plane_addr,
 	.update_pending_status = dce110_update_pending_status,
 	.set_input_transfer_func = dce110_set_input_transfer_func,
@@ -2993,7 +2977,7 @@
 	.get_position = get_position,
 	.set_static_screen_control = set_static_screen_control,
 	.reset_hw_ctx_wrap = dce110_reset_hw_ctx_wrap,
-	.prog_pixclk_crtc_otg = dce110_prog_pixclk_crtc_otg,
+	.enable_stream_timing = dce110_enable_stream_timing,
 	.setup_stereo = NULL,
 	.set_avmute = dce110_set_avmute,
 	.wait_for_mpcc_disconnect = dce110_wait_for_mpcc_disconnect,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
index 7bab8c6..0564c8e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_mem_input_v.c
@@ -923,6 +923,7 @@
 	struct mem_input *mem_input,
 	struct dce_watermarks nbp,
 	struct dce_watermarks stutter,
+	struct dce_watermarks stutter_enter,
 	struct dce_watermarks urgent,
 	uint32_t total_dest_line_time_ns)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index b1f14be..ee33786 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -930,38 +930,6 @@
 	return result;
 }
 
-static enum dc_status dce110_validate_guaranteed(
-		struct dc *dc,
-		struct dc_stream_state *dc_stream,
-		struct dc_state *context)
-{
-	enum dc_status result = DC_ERROR_UNEXPECTED;
-
-	context->streams[0] = dc_stream;
-	dc_stream_retain(context->streams[0]);
-	context->stream_count++;
-
-	result = resource_map_pool_resources(dc, context, dc_stream);
-
-	if (result == DC_OK)
-		result = resource_map_clock_resources(dc, context, dc_stream);
-
-	if (result == DC_OK)
-		result = build_mapped_resource(dc, context, dc_stream);
-
-	if (result == DC_OK) {
-		validate_guaranteed_copy_streams(
-				context, dc->caps.max_streams);
-		result = resource_build_scaling_params_for_context(dc, context);
-	}
-
-	if (result == DC_OK)
-		if (!dce110_validate_bandwidth(dc, context))
-			result = DC_FAIL_BANDWIDTH_VALIDATE;
-
-	return result;
-}
-
 static struct pipe_ctx *dce110_acquire_underlay(
 		struct dc_state *context,
 		const struct resource_pool *pool,
@@ -1036,7 +1004,6 @@
 static const struct resource_funcs dce110_res_pool_funcs = {
 	.destroy = dce110_destroy_resource_pool,
 	.link_enc_create = dce110_link_encoder_create,
-	.validate_guaranteed = dce110_validate_guaranteed,
 	.validate_bandwidth = dce110_validate_bandwidth,
 	.validate_plane = dce110_validate_plane,
 	.acquire_idle_pipe_for_layer = dce110_acquire_underlay,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
index be71539..1b2fe0d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator.c
@@ -431,14 +431,6 @@
 			0,
 			CRTC_V_TOTAL_CONTROL,
 			CRTC_SET_V_TOTAL_MIN_MASK);
-		set_reg_field_value(v_total_min,
-				0,
-				CRTC_V_TOTAL_MIN,
-				CRTC_V_TOTAL_MIN);
-		set_reg_field_value(v_total_max,
-				0,
-				CRTC_V_TOTAL_MAX,
-				CRTC_V_TOTAL_MAX);
 		set_reg_field_value(v_total_cntl,
 				0,
 				CRTC_V_TOTAL_CONTROL,
@@ -447,6 +439,14 @@
 				0,
 				CRTC_V_TOTAL_CONTROL,
 				CRTC_V_TOTAL_MAX_SEL);
+		set_reg_field_value(v_total_min,
+				0,
+				CRTC_V_TOTAL_MIN,
+				CRTC_V_TOTAL_MIN);
+		set_reg_field_value(v_total_max,
+				0,
+				CRTC_V_TOTAL_MAX,
+				CRTC_V_TOTAL_MAX);
 		set_reg_field_value(v_total_cntl,
 				0,
 				CRTC_V_TOTAL_CONTROL,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
index 8ad0481..a3cef60 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_timing_generator_v.c
@@ -648,12 +648,6 @@
 	return;
 }
 
-static bool dce110_tg_v_is_blanked(struct timing_generator *tg)
-{
-	/* Signal comes from the primary pipe, underlay is never blanked. */
-	return false;
-}
-
 /** ********************************************************************************************
  *
  * DCE11 Timing Generator Constructor / Destructor
@@ -670,7 +664,6 @@
 		.set_early_control = dce110_timing_generator_v_set_early_control,
 		.wait_for_state = dce110_timing_generator_v_wait_for_state,
 		.set_blank = dce110_timing_generator_v_set_blank,
-		.is_blanked = dce110_tg_v_is_blanked,
 		.set_colors = dce110_timing_generator_v_set_colors,
 		.set_overscan_blank_color =
 				dce110_timing_generator_v_set_overscan_color_black,
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c
index 8ba3c12..a7dce06 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_transform_v.c
@@ -373,13 +373,13 @@
 	struct rect *chroma_viewport)
 {
 	inits->h_int_scale_ratio_luma =
-		dal_fixed31_32_u2d19(data->ratios.horz) << 5;
+		dc_fixpt_u2d19(data->ratios.horz) << 5;
 	inits->v_int_scale_ratio_luma =
-		dal_fixed31_32_u2d19(data->ratios.vert) << 5;
+		dc_fixpt_u2d19(data->ratios.vert) << 5;
 	inits->h_int_scale_ratio_chroma =
-		dal_fixed31_32_u2d19(data->ratios.horz_c) << 5;
+		dc_fixpt_u2d19(data->ratios.horz_c) << 5;
 	inits->v_int_scale_ratio_chroma =
-		dal_fixed31_32_u2d19(data->ratios.vert_c) << 5;
+		dc_fixpt_u2d19(data->ratios.vert_c) << 5;
 
 	inits->h_init_luma.integer = 1;
 	inits->v_init_luma.integer = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
index cd1e3f7..00c0a1e 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c
@@ -430,7 +430,7 @@
 
 	if (!enc110)
 		return NULL;
-	
+
 	dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id,
 					&stream_enc_regs[eng_id],
 					&se_shift, &se_mask);
@@ -867,38 +867,6 @@
 	return result;
 }
 
-enum dc_status dce112_validate_guaranteed(
-		struct dc *dc,
-		struct dc_stream_state *stream,
-		struct dc_state *context)
-{
-	enum dc_status result = DC_ERROR_UNEXPECTED;
-
-	context->streams[0] = stream;
-	dc_stream_retain(context->streams[0]);
-	context->stream_count++;
-
-	result = resource_map_pool_resources(dc, context, stream);
-
-	if (result == DC_OK)
-		result = resource_map_phy_clock_resources(dc, context, stream);
-
-	if (result == DC_OK)
-		result = build_mapped_resource(dc, context, stream);
-
-	if (result == DC_OK) {
-		validate_guaranteed_copy_streams(
-				context, dc->caps.max_streams);
-		result = resource_build_scaling_params_for_context(dc, context);
-	}
-
-	if (result == DC_OK)
-		if (!dce112_validate_bandwidth(dc, context))
-			result = DC_FAIL_BANDWIDTH_VALIDATE;
-
-	return result;
-}
-
 enum dc_status dce112_validate_global(
 		struct dc *dc,
 		struct dc_state *context)
@@ -921,7 +889,6 @@
 static const struct resource_funcs dce112_res_pool_funcs = {
 	.destroy = dce112_destroy_resource_pool,
 	.link_enc_create = dce112_link_encoder_create,
-	.validate_guaranteed = dce112_validate_guaranteed,
 	.validate_bandwidth = dce112_validate_bandwidth,
 	.validate_plane = dce100_validate_plane,
 	.add_stream_to_ctx = dce112_add_stream_to_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h
index d5c19d3..95a4033 100644
--- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.h
@@ -42,11 +42,6 @@
 		struct dc_state *context,
 		struct dc_state *old_context);
 
-enum dc_status dce112_validate_guaranteed(
-		struct dc *dc,
-		struct dc_stream_state *dc_stream,
-		struct dc_state *context);
-
 bool dce112_validate_bandwidth(
 	struct dc *dc,
 	struct dc_state *context);
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
index 4659a4b..2d58dac 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
@@ -652,7 +652,7 @@
 		return NULL;
 	}
 
-	dce112_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks);
+	dce120_mem_input_construct(dce_mi, ctx, inst, &mi_regs[inst], &mi_shifts, &mi_masks);
 	return &dce_mi->base;
 }
 
@@ -684,7 +684,6 @@
 static const struct resource_funcs dce120_res_pool_funcs = {
 	.destroy = dce120_destroy_resource_pool,
 	.link_enc_create = dce120_link_encoder_create,
-	.validate_guaranteed = dce112_validate_guaranteed,
 	.validate_bandwidth = dce112_validate_bandwidth,
 	.validate_plane = dce100_validate_plane,
 	.add_stream_to_ctx = dce112_add_stream_to_ctx
@@ -815,14 +814,25 @@
 	dm_pp_notify_wm_clock_changes(dc->ctx, &clk_ranges);
 }
 
+static uint32_t read_pipe_fuses(struct dc_context *ctx)
+{
+	uint32_t value = dm_read_reg_soc15(ctx, mmCC_DC_PIPE_DIS, 0);
+	/* VG20 support max 6 pipes */
+	value = value & 0x3f;
+	return value;
+}
+
 static bool construct(
 	uint8_t num_virtual_links,
 	struct dc *dc,
 	struct dce110_resource_pool *pool)
 {
 	unsigned int i;
+	int j;
 	struct dc_context *ctx = dc->ctx;
 	struct irq_service_init_data irq_init_data;
+	bool harvest_enabled = ASICREV_IS_VEGA20_P(ctx->asic_id.hw_internal_rev);
+	uint32_t pipe_fuses;
 
 	ctx->dc_bios->regs = &bios_regs;
 
@@ -916,28 +926,41 @@
 	if (!pool->base.irqs)
 		goto irqs_create_fail;
 
+	/* retrieve valid pipe fuses */
+	if (harvest_enabled)
+		pipe_fuses = read_pipe_fuses(ctx);
+
+	/* index to valid pipe resource */
+	j = 0;
 	for (i = 0; i < pool->base.pipe_count; i++) {
-		pool->base.timing_generators[i] =
+		if (harvest_enabled) {
+			if ((pipe_fuses & (1 << i)) != 0) {
+				dm_error("DC: skip invalid pipe %d!\n", i);
+				continue;
+			}
+		}
+
+		pool->base.timing_generators[j] =
 				dce120_timing_generator_create(
 					ctx,
 					i,
 					&dce120_tg_offsets[i]);
-		if (pool->base.timing_generators[i] == NULL) {
+		if (pool->base.timing_generators[j] == NULL) {
 			BREAK_TO_DEBUGGER();
 			dm_error("DC: failed to create tg!\n");
 			goto controller_create_fail;
 		}
 
-		pool->base.mis[i] = dce120_mem_input_create(ctx, i);
+		pool->base.mis[j] = dce120_mem_input_create(ctx, i);
 
-		if (pool->base.mis[i] == NULL) {
+		if (pool->base.mis[j] == NULL) {
 			BREAK_TO_DEBUGGER();
 			dm_error(
 				"DC: failed to create memory input!\n");
 			goto controller_create_fail;
 		}
 
-		pool->base.ipps[i] = dce120_ipp_create(ctx, i);
+		pool->base.ipps[j] = dce120_ipp_create(ctx, i);
 		if (pool->base.ipps[i] == NULL) {
 			BREAK_TO_DEBUGGER();
 			dm_error(
@@ -945,7 +968,7 @@
 			goto controller_create_fail;
 		}
 
-		pool->base.transforms[i] = dce120_transform_create(ctx, i);
+		pool->base.transforms[j] = dce120_transform_create(ctx, i);
 		if (pool->base.transforms[i] == NULL) {
 			BREAK_TO_DEBUGGER();
 			dm_error(
@@ -953,16 +976,23 @@
 			goto res_create_fail;
 		}
 
-		pool->base.opps[i] = dce120_opp_create(
+		pool->base.opps[j] = dce120_opp_create(
 			ctx,
 			i);
-		if (pool->base.opps[i] == NULL) {
+		if (pool->base.opps[j] == NULL) {
 			BREAK_TO_DEBUGGER();
 			dm_error(
 				"DC: failed to create output pixel processor!\n");
 		}
+
+		/* check next valid pipe */
+		j++;
 	}
 
+	/* valid pipe num */
+	pool->base.pipe_count = j;
+	pool->base.timing_generator_count = j;
+
 	if (!resource_construct(num_virtual_links, dc, &pool->base,
 			 &res_create_funcs))
 		goto res_create_fail;
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
index 7bee781..2ea490f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_timing_generator.c
@@ -570,12 +570,6 @@
 				0x180);
 
 	} else {
-		CRTC_REG_UPDATE(
-				CRTC0_CRTC_V_TOTAL_MIN,
-				CRTC_V_TOTAL_MIN, 0);
-		CRTC_REG_UPDATE(
-				CRTC0_CRTC_V_TOTAL_MAX,
-				CRTC_V_TOTAL_MAX, 0);
 		CRTC_REG_SET_N(CRTC0_CRTC_V_TOTAL_CONTROL, 5,
 				FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_V_TOTAL_MIN_SEL), 0,
 				FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_V_TOTAL_MAX_SEL), 0,
@@ -583,6 +577,12 @@
 				FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_FORCE_LOCK_TO_MASTER_VSYNC), 0,
 				FD(CRTC0_CRTC_V_TOTAL_CONTROL__CRTC_SET_V_TOTAL_MIN_MASK), 0);
 		CRTC_REG_UPDATE(
+				CRTC0_CRTC_V_TOTAL_MIN,
+				CRTC_V_TOTAL_MIN, 0);
+		CRTC_REG_UPDATE(
+				CRTC0_CRTC_V_TOTAL_MAX,
+				CRTC_V_TOTAL_MAX, 0);
+		CRTC_REG_UPDATE(
 				CRTC0_CRTC_STATIC_SCREEN_CONTROL,
 				CRTC_STATIC_SCREEN_EVENT_MASK,
 				0);
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index 5d854a3..48a0689 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -691,23 +691,6 @@
 	}
 }
 
-static enum dc_status build_mapped_resource(
-		const struct dc *dc,
-		struct dc_state *context,
-		struct dc_stream_state *stream)
-{
-	struct pipe_ctx *pipe_ctx = resource_get_head_pipe_for_stream(&context->res_ctx, stream);
-
-	if (!pipe_ctx)
-		return DC_ERROR_UNEXPECTED;
-
-	dce110_resource_build_pipe_hw_param(pipe_ctx);
-
-	resource_build_info_frame(pipe_ctx);
-
-	return DC_OK;
-}
-
 bool dce80_validate_bandwidth(
 	struct dc *dc,
 	struct dc_state *context)
@@ -749,37 +732,6 @@
 	return DC_OK;
 }
 
-enum dc_status dce80_validate_guaranteed(
-		struct dc *dc,
-		struct dc_stream_state *dc_stream,
-		struct dc_state *context)
-{
-	enum dc_status result = DC_ERROR_UNEXPECTED;
-
-	context->streams[0] = dc_stream;
-	dc_stream_retain(context->streams[0]);
-	context->stream_count++;
-
-	result = resource_map_pool_resources(dc, context, dc_stream);
-
-	if (result == DC_OK)
-		result = resource_map_clock_resources(dc, context, dc_stream);
-
-	if (result == DC_OK)
-		result = build_mapped_resource(dc, context, dc_stream);
-
-	if (result == DC_OK) {
-		validate_guaranteed_copy_streams(
-				context, dc->caps.max_streams);
-		result = resource_build_scaling_params_for_context(dc, context);
-	}
-
-	if (result == DC_OK)
-		result = dce80_validate_bandwidth(dc, context);
-
-	return result;
-}
-
 static void dce80_destroy_resource_pool(struct resource_pool **pool)
 {
 	struct dce110_resource_pool *dce110_pool = TO_DCE110_RES_POOL(*pool);
@@ -792,7 +744,6 @@
 static const struct resource_funcs dce80_res_pool_funcs = {
 	.destroy = dce80_destroy_resource_pool,
 	.link_enc_create = dce80_link_encoder_create,
-	.validate_guaranteed = dce80_validate_guaranteed,
 	.validate_bandwidth = dce80_validate_bandwidth,
 	.validate_plane = dce100_validate_plane,
 	.add_stream_to_ctx = dce100_add_stream_to_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
index 5469bdf..84f52c6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile
@@ -26,7 +26,7 @@
 		dcn10_dpp.o dcn10_opp.o dcn10_optc.o \
 		dcn10_hubp.o dcn10_mpc.o \
 		dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \
-		dcn10_hubbub.o
+		dcn10_hubbub.o dcn10_stream_encoder.o dcn10_link_encoder.o
 
 AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
index 881a1bf..5d95a99 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
@@ -169,7 +169,7 @@
 	}
 
 	if (fixpoint == true)
-		arr_points[1].custom_float_y = dal_fixed31_32_clamp_u0d14(arr_points[1].y);
+		arr_points[1].custom_float_y = dc_fixpt_clamp_u0d14(arr_points[1].y);
 	else if (!convert_to_custom_float_format(arr_points[1].y, &fmt,
 		&arr_points[1].custom_float_y)) {
 		BREAK_TO_DEBUGGER();
@@ -327,19 +327,19 @@
 	rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
 	rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
 
-	arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
-					     dal_fixed31_32_from_int(region_start));
-	arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
-					     dal_fixed31_32_from_int(region_end));
+	arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2),
+					     dc_fixpt_from_int(region_start));
+	arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2),
+					     dc_fixpt_from_int(region_end));
 
 	y_r = rgb_resulted[0].red;
 	y_g = rgb_resulted[0].green;
 	y_b = rgb_resulted[0].blue;
 
-	y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b));
+	y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b));
 
 	arr_points[0].y = y1_min;
-	arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y, arr_points[0].x);
+	arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x);
 	y_r = rgb_resulted[hw_points - 1].red;
 	y_g = rgb_resulted[hw_points - 1].green;
 	y_b = rgb_resulted[hw_points - 1].blue;
@@ -347,35 +347,35 @@
 	/* see comment above, m_arrPoints[1].y should be the Y value for the
 	 * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
 	 */
-	y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b));
+	y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b));
 
 	arr_points[1].y = y3_max;
 
-	arr_points[1].slope = dal_fixed31_32_zero;
+	arr_points[1].slope = dc_fixpt_zero;
 
 	if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
 		/* for PQ, we want to have a straight line from last HW X point,
 		 * and the slope to be such that we hit 1.0 at 10000 nits.
 		 */
 		const struct fixed31_32 end_value =
-				dal_fixed31_32_from_int(125);
+				dc_fixpt_from_int(125);
 
-		arr_points[1].slope = dal_fixed31_32_div(
-			dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y),
-			dal_fixed31_32_sub(end_value, arr_points[1].x));
+		arr_points[1].slope = dc_fixpt_div(
+			dc_fixpt_sub(dc_fixpt_one, arr_points[1].y),
+			dc_fixpt_sub(end_value, arr_points[1].x));
 	}
 
 	lut_params->hw_points_num = hw_points;
 
-	i = 1;
-	for (k = 0; k < MAX_REGIONS_NUMBER && i < MAX_REGIONS_NUMBER; k++) {
+	k = 0;
+	for (i = 1; i < MAX_REGIONS_NUMBER; i++) {
 		if (seg_distr[k] != -1) {
 			lut_params->arr_curve_points[k].segments_num =
 					seg_distr[k];
 			lut_params->arr_curve_points[i].offset =
 					lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]);
 		}
-		i++;
+		k++;
 	}
 
 	if (seg_distr[k] != -1)
@@ -386,24 +386,24 @@
 
 	i = 1;
 	while (i != hw_points + 1) {
-		if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red))
+		if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
 			rgb_plus_1->red = rgb->red;
-		if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green))
+		if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
 			rgb_plus_1->green = rgb->green;
-		if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue))
+		if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
 			rgb_plus_1->blue = rgb->blue;
 
-		rgb->delta_red   = dal_fixed31_32_sub(rgb_plus_1->red,   rgb->red);
-		rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green);
-		rgb->delta_blue  = dal_fixed31_32_sub(rgb_plus_1->blue,  rgb->blue);
+		rgb->delta_red   = dc_fixpt_sub(rgb_plus_1->red,   rgb->red);
+		rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
+		rgb->delta_blue  = dc_fixpt_sub(rgb_plus_1->blue,  rgb->blue);
 
 		if (fixpoint == true) {
-			rgb->delta_red_reg   = dal_fixed31_32_clamp_u0d10(rgb->delta_red);
-			rgb->delta_green_reg = dal_fixed31_32_clamp_u0d10(rgb->delta_green);
-			rgb->delta_blue_reg  = dal_fixed31_32_clamp_u0d10(rgb->delta_blue);
-			rgb->red_reg         = dal_fixed31_32_clamp_u0d14(rgb->red);
-			rgb->green_reg       = dal_fixed31_32_clamp_u0d14(rgb->green);
-			rgb->blue_reg        = dal_fixed31_32_clamp_u0d14(rgb->blue);
+			rgb->delta_red_reg   = dc_fixpt_clamp_u0d10(rgb->delta_red);
+			rgb->delta_green_reg = dc_fixpt_clamp_u0d10(rgb->delta_green);
+			rgb->delta_blue_reg  = dc_fixpt_clamp_u0d10(rgb->delta_blue);
+			rgb->red_reg         = dc_fixpt_clamp_u0d14(rgb->red);
+			rgb->green_reg       = dc_fixpt_clamp_u0d14(rgb->green);
+			rgb->blue_reg        = dc_fixpt_clamp_u0d14(rgb->blue);
 		}
 
 		++rgb_plus_1;
@@ -489,19 +489,19 @@
 	rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
 	rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
 
-	arr_points[0].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
-					     dal_fixed31_32_from_int(region_start));
-	arr_points[1].x = dal_fixed31_32_pow(dal_fixed31_32_from_int(2),
-					     dal_fixed31_32_from_int(region_end));
+	arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2),
+					     dc_fixpt_from_int(region_start));
+	arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2),
+					     dc_fixpt_from_int(region_end));
 
 	y_r = rgb_resulted[0].red;
 	y_g = rgb_resulted[0].green;
 	y_b = rgb_resulted[0].blue;
 
-	y1_min = dal_fixed31_32_min(y_r, dal_fixed31_32_min(y_g, y_b));
+	y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b));
 
 	arr_points[0].y = y1_min;
-	arr_points[0].slope = dal_fixed31_32_div(arr_points[0].y, arr_points[0].x);
+	arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x);
 	y_r = rgb_resulted[hw_points - 1].red;
 	y_g = rgb_resulted[hw_points - 1].green;
 	y_b = rgb_resulted[hw_points - 1].blue;
@@ -509,35 +509,35 @@
 	/* see comment above, m_arrPoints[1].y should be the Y value for the
 	 * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1)
 	 */
-	y3_max = dal_fixed31_32_max(y_r, dal_fixed31_32_max(y_g, y_b));
+	y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b));
 
 	arr_points[1].y = y3_max;
 
-	arr_points[1].slope = dal_fixed31_32_zero;
+	arr_points[1].slope = dc_fixpt_zero;
 
 	if (output_tf->tf == TRANSFER_FUNCTION_PQ) {
 		/* for PQ, we want to have a straight line from last HW X point,
 		 * and the slope to be such that we hit 1.0 at 10000 nits.
 		 */
 		const struct fixed31_32 end_value =
-				dal_fixed31_32_from_int(125);
+				dc_fixpt_from_int(125);
 
-		arr_points[1].slope = dal_fixed31_32_div(
-			dal_fixed31_32_sub(dal_fixed31_32_one, arr_points[1].y),
-			dal_fixed31_32_sub(end_value, arr_points[1].x));
+		arr_points[1].slope = dc_fixpt_div(
+			dc_fixpt_sub(dc_fixpt_one, arr_points[1].y),
+			dc_fixpt_sub(end_value, arr_points[1].x));
 	}
 
 	lut_params->hw_points_num = hw_points;
 
-	i = 1;
-	for (k = 0; k < MAX_REGIONS_NUMBER && i < MAX_REGIONS_NUMBER; k++) {
+	k = 0;
+	for (i = 1; i < MAX_REGIONS_NUMBER; i++) {
 		if (seg_distr[k] != -1) {
 			lut_params->arr_curve_points[k].segments_num =
 					seg_distr[k];
 			lut_params->arr_curve_points[i].offset =
 					lut_params->arr_curve_points[k].offset + (1 << seg_distr[k]);
 		}
-		i++;
+		k++;
 	}
 
 	if (seg_distr[k] != -1)
@@ -548,16 +548,16 @@
 
 	i = 1;
 	while (i != hw_points + 1) {
-		if (dal_fixed31_32_lt(rgb_plus_1->red, rgb->red))
+		if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
 			rgb_plus_1->red = rgb->red;
-		if (dal_fixed31_32_lt(rgb_plus_1->green, rgb->green))
+		if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
 			rgb_plus_1->green = rgb->green;
-		if (dal_fixed31_32_lt(rgb_plus_1->blue, rgb->blue))
+		if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
 			rgb_plus_1->blue = rgb->blue;
 
-		rgb->delta_red   = dal_fixed31_32_sub(rgb_plus_1->red,   rgb->red);
-		rgb->delta_green = dal_fixed31_32_sub(rgb_plus_1->green, rgb->green);
-		rgb->delta_blue  = dal_fixed31_32_sub(rgb_plus_1->blue,  rgb->blue);
+		rgb->delta_red   = dc_fixpt_sub(rgb_plus_1->red,   rgb->red);
+		rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
+		rgb->delta_blue  = dc_fixpt_sub(rgb_plus_1->blue,  rgb->blue);
 
 		++rgb_plus_1;
 		++rgb;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index e305c28..46a35c7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -98,6 +98,30 @@
 	GAMUT_REMAP_COMB_COEFF
 };
 
+void dpp_read_state(struct dpp *dpp_base,
+		struct dcn_dpp_state *s)
+{
+	struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
+
+	REG_GET(CM_IGAM_CONTROL,
+			CM_IGAM_LUT_MODE, &s->igam_lut_mode);
+	REG_GET(CM_IGAM_CONTROL,
+			CM_IGAM_INPUT_FORMAT, &s->igam_input_format);
+	REG_GET(CM_DGAM_CONTROL,
+			CM_DGAM_LUT_MODE, &s->dgam_lut_mode);
+	REG_GET(CM_RGAM_CONTROL,
+			CM_RGAM_LUT_MODE, &s->rgam_lut_mode);
+	REG_GET(CM_GAMUT_REMAP_CONTROL,
+			CM_GAMUT_REMAP_MODE, &s->gamut_remap_mode);
+
+	s->gamut_remap_c11_c12 = REG_READ(CM_GAMUT_REMAP_C11_C12);
+	s->gamut_remap_c13_c14 = REG_READ(CM_GAMUT_REMAP_C13_C14);
+	s->gamut_remap_c21_c22 = REG_READ(CM_GAMUT_REMAP_C21_C22);
+	s->gamut_remap_c23_c24 = REG_READ(CM_GAMUT_REMAP_C23_C24);
+	s->gamut_remap_c31_c32 = REG_READ(CM_GAMUT_REMAP_C31_C32);
+	s->gamut_remap_c33_c34 = REG_READ(CM_GAMUT_REMAP_C33_C34);
+}
+
 /* Program gamut remap in bypass mode */
 void dpp_set_gamut_remap_bypass(struct dcn10_dpp *dpp)
 {
@@ -106,7 +130,7 @@
 	/* Gamut remap in bypass */
 }
 
-#define IDENTITY_RATIO(ratio) (dal_fixed31_32_u2d19(ratio) == (1 << 19))
+#define IDENTITY_RATIO(ratio) (dc_fixpt_u2d19(ratio) == (1 << 19))
 
 
 bool dpp_get_optimal_number_of_taps(
@@ -121,6 +145,18 @@
 	else
 		pixel_width = scl_data->viewport.width;
 
+	/* Some ASICs does not support  FP16 scaling, so we reject modes require this*/
+	if (scl_data->viewport.width  != scl_data->h_active &&
+		scl_data->viewport.height != scl_data->v_active &&
+		dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT &&
+		scl_data->format == PIXEL_FORMAT_FP16)
+		return false;
+
+	if (scl_data->viewport.width > scl_data->h_active &&
+		dpp->ctx->dc->debug.max_downscale_src_width != 0 &&
+		scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width)
+		return false;
+
 	/* TODO: add lb check */
 
 	/* No support for programming ratio of 4, drop to 3.99999.. */
@@ -257,7 +293,7 @@
 		struct dpp *dpp_base,
 		enum surface_pixel_format format,
 		enum expansion_mode mode,
-		struct csc_transform input_csc_color_matrix,
+		struct dc_csc_transform input_csc_color_matrix,
 		enum dc_color_space input_color_space)
 {
 	uint32_t pixel_format;
@@ -416,7 +452,7 @@
 	if (src_x_offset >= (int)param->viewport_width)
 		cur_en = 0;  /* not visible beyond right edge*/
 
-	if (src_x_offset + (int)width < 0)
+	if (src_x_offset + (int)width <= 0)
 		cur_en = 0;  /* not visible beyond left edge*/
 
 	REG_UPDATE(CURSOR0_CONTROL,
@@ -443,6 +479,7 @@
 }
 
 static const struct dpp_funcs dcn10_dpp_funcs = {
+		.dpp_read_state = dpp_read_state,
 		.dpp_reset = dpp_reset,
 		.dpp_set_scaler = dpp1_dscl_set_scaler_manual_scale,
 		.dpp_get_optimal_number_of_taps = dpp_get_optimal_number_of_taps,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
index 17b062a..5944a3b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
@@ -44,6 +44,10 @@
 #define TF_REG_LIST_DCN(id) \
 	SRI(CM_GAMUT_REMAP_CONTROL, CM, id),\
 	SRI(CM_GAMUT_REMAP_C11_C12, CM, id),\
+	SRI(CM_GAMUT_REMAP_C13_C14, CM, id),\
+	SRI(CM_GAMUT_REMAP_C21_C22, CM, id),\
+	SRI(CM_GAMUT_REMAP_C23_C24, CM, id),\
+	SRI(CM_GAMUT_REMAP_C31_C32, CM, id),\
 	SRI(CM_GAMUT_REMAP_C33_C34, CM, id),\
 	SRI(DSCL_EXT_OVERSCAN_LEFT_RIGHT, DSCL, id), \
 	SRI(DSCL_EXT_OVERSCAN_TOP_BOTTOM, DSCL, id), \
@@ -108,6 +112,8 @@
 	SRI(CM_DGAM_LUT_DATA, CM, id), \
 	SRI(CM_CONTROL, CM, id), \
 	SRI(CM_DGAM_CONTROL, CM, id), \
+	SRI(CM_TEST_DEBUG_INDEX, CM, id), \
+	SRI(CM_TEST_DEBUG_DATA, CM, id), \
 	SRI(FORMAT_CONTROL, CNVC_CFG, id), \
 	SRI(CNVC_SURFACE_PIXEL_FORMAT, CNVC_CFG, id), \
 	SRI(CURSOR0_CONTROL, CNVC_CUR, id), \
@@ -175,6 +181,14 @@
 	TF_SF(CM0_CM_GAMUT_REMAP_CONTROL, CM_GAMUT_REMAP_MODE, mask_sh),\
 	TF_SF(CM0_CM_GAMUT_REMAP_C11_C12, CM_GAMUT_REMAP_C11, mask_sh),\
 	TF_SF(CM0_CM_GAMUT_REMAP_C11_C12, CM_GAMUT_REMAP_C12, mask_sh),\
+	TF_SF(CM0_CM_GAMUT_REMAP_C13_C14, CM_GAMUT_REMAP_C13, mask_sh),\
+	TF_SF(CM0_CM_GAMUT_REMAP_C13_C14, CM_GAMUT_REMAP_C14, mask_sh),\
+	TF_SF(CM0_CM_GAMUT_REMAP_C21_C22, CM_GAMUT_REMAP_C21, mask_sh),\
+	TF_SF(CM0_CM_GAMUT_REMAP_C21_C22, CM_GAMUT_REMAP_C22, mask_sh),\
+	TF_SF(CM0_CM_GAMUT_REMAP_C23_C24, CM_GAMUT_REMAP_C23, mask_sh),\
+	TF_SF(CM0_CM_GAMUT_REMAP_C23_C24, CM_GAMUT_REMAP_C24, mask_sh),\
+	TF_SF(CM0_CM_GAMUT_REMAP_C31_C32, CM_GAMUT_REMAP_C31, mask_sh),\
+	TF_SF(CM0_CM_GAMUT_REMAP_C31_C32, CM_GAMUT_REMAP_C32, mask_sh),\
 	TF_SF(CM0_CM_GAMUT_REMAP_C33_C34, CM_GAMUT_REMAP_C33, mask_sh),\
 	TF_SF(CM0_CM_GAMUT_REMAP_C33_C34, CM_GAMUT_REMAP_C34, mask_sh),\
 	TF_SF(DSCL0_DSCL_EXT_OVERSCAN_LEFT_RIGHT, EXT_OVERSCAN_LEFT, mask_sh),\
@@ -300,6 +314,7 @@
 	TF_SF(CM0_CM_DGAM_LUT_INDEX, CM_DGAM_LUT_INDEX, mask_sh), \
 	TF_SF(CM0_CM_DGAM_LUT_DATA, CM_DGAM_LUT_DATA, mask_sh), \
 	TF_SF(CM0_CM_DGAM_CONTROL, CM_DGAM_LUT_MODE, mask_sh), \
+	TF_SF(CM0_CM_TEST_DEBUG_INDEX, CM_TEST_DEBUG_INDEX, mask_sh), \
 	TF_SF(CNVC_CFG0_FORMAT_CONTROL, CNVC_BYPASS, mask_sh), \
 	TF2_SF(CNVC_CFG0, FORMAT_CONTROL__ALPHA_EN, mask_sh), \
 	TF_SF(CNVC_CFG0_FORMAT_CONTROL, FORMAT_EXPANSION_MODE, mask_sh), \
@@ -417,6 +432,41 @@
 	TF_SF(CURSOR0_CURSOR_CONTROL, CURSOR_ENABLE, mask_sh), \
 	TF_SF(DPP_TOP0_DPP_CONTROL, DPPCLK_RATE_CONTROL, mask_sh)
 
+/*
+ *
+	DCN1 CM debug status register definition
+
+	register :ID9_CM_STATUS do
+	implement_ref :cm
+	map to:  :cmdebugind, at: j
+	width 32
+	disclosure   NEVER
+
+		field :ID9_VUPDATE_CFG, [0], R
+		field :ID9_IGAM_LUT_MODE, [2..1], R
+		field :ID9_BNS_BYPASS, [3], R
+		field :ID9_ICSC_MODE, [5..4], R
+		field :ID9_DGAM_LUT_MODE, [8..6], R
+		field :ID9_HDR_BYPASS, [9], R
+		field :ID9_GAMUT_REMAP_MODE, [11..10], R
+		field :ID9_RGAM_LUT_MODE, [14..12], R
+		#1 free bit
+		field :ID9_OCSC_MODE, [18..16], R
+		field :ID9_DENORM_MODE, [21..19], R
+		field :ID9_ROUND_TRUNC_MODE, [25..22], R
+		field :ID9_DITHER_EN, [26], R
+		field :ID9_DITHER_MODE, [28..27], R
+	end
+*/
+
+#define TF_DEBUG_REG_LIST_SH_DCN10 \
+	.CM_TEST_DEBUG_DATA_ID9_ICSC_MODE = 4, \
+	.CM_TEST_DEBUG_DATA_ID9_OCSC_MODE = 16
+
+#define TF_DEBUG_REG_LIST_MASK_DCN10 \
+	.CM_TEST_DEBUG_DATA_ID9_ICSC_MODE = 0x30, \
+	.CM_TEST_DEBUG_DATA_ID9_OCSC_MODE = 0x70000
+
 #define TF_REG_FIELD_LIST(type) \
 	type EXT_OVERSCAN_LEFT; \
 	type EXT_OVERSCAN_RIGHT; \
@@ -486,6 +536,14 @@
 	type CM_GAMUT_REMAP_MODE; \
 	type CM_GAMUT_REMAP_C11; \
 	type CM_GAMUT_REMAP_C12; \
+	type CM_GAMUT_REMAP_C13; \
+	type CM_GAMUT_REMAP_C14; \
+	type CM_GAMUT_REMAP_C21; \
+	type CM_GAMUT_REMAP_C22; \
+	type CM_GAMUT_REMAP_C23; \
+	type CM_GAMUT_REMAP_C24; \
+	type CM_GAMUT_REMAP_C31; \
+	type CM_GAMUT_REMAP_C32; \
 	type CM_GAMUT_REMAP_C33; \
 	type CM_GAMUT_REMAP_C34; \
 	type CM_COMA_C11; \
@@ -1010,6 +1068,9 @@
 	type CUR0_EXPANSION_MODE; \
 	type CUR0_ENABLE; \
 	type CM_BYPASS; \
+	type CM_TEST_DEBUG_INDEX; \
+	type CM_TEST_DEBUG_DATA_ID9_ICSC_MODE; \
+	type CM_TEST_DEBUG_DATA_ID9_OCSC_MODE;\
 	type FORMAT_CONTROL__ALPHA_EN; \
 	type CUR0_COLOR0; \
 	type CUR0_COLOR1; \
@@ -1054,6 +1115,10 @@
 	uint32_t RECOUT_SIZE; \
 	uint32_t CM_GAMUT_REMAP_CONTROL; \
 	uint32_t CM_GAMUT_REMAP_C11_C12; \
+	uint32_t CM_GAMUT_REMAP_C13_C14; \
+	uint32_t CM_GAMUT_REMAP_C21_C22; \
+	uint32_t CM_GAMUT_REMAP_C23_C24; \
+	uint32_t CM_GAMUT_REMAP_C31_C32; \
 	uint32_t CM_GAMUT_REMAP_C33_C34; \
 	uint32_t CM_COMA_C11_C12; \
 	uint32_t CM_COMA_C33_C34; \
@@ -1255,6 +1320,8 @@
 	uint32_t CM_IGAM_LUT_RW_CONTROL; \
 	uint32_t CM_IGAM_LUT_RW_INDEX; \
 	uint32_t CM_IGAM_LUT_SEQ_COLOR; \
+	uint32_t CM_TEST_DEBUG_INDEX; \
+	uint32_t CM_TEST_DEBUG_DATA; \
 	uint32_t FORMAT_CONTROL; \
 	uint32_t CNVC_SURFACE_PIXEL_FORMAT; \
 	uint32_t CURSOR_CONTROL; \
@@ -1289,8 +1356,8 @@
 
 enum dcn10_input_csc_select {
 	INPUT_CSC_SELECT_BYPASS = 0,
-	INPUT_CSC_SELECT_ICSC,
-	INPUT_CSC_SELECT_COMA
+	INPUT_CSC_SELECT_ICSC = 1,
+	INPUT_CSC_SELECT_COMA = 2
 };
 
 void dpp1_set_cursor_attributes(
@@ -1364,6 +1431,9 @@
 		struct scaler_data *scl_data,
 		const struct scaling_taps *in_taps);
 
+void dpp_read_state(struct dpp *dpp_base,
+		struct dcn_dpp_state *s);
+
 void dpp_reset(struct dpp *dpp_base);
 
 void dpp1_cm_program_regamma_lut(
@@ -1408,7 +1478,7 @@
 		struct dpp *dpp_base,
 		enum surface_pixel_format format,
 		enum expansion_mode mode,
-		struct csc_transform input_csc_color_matrix,
+		struct dc_csc_transform input_csc_color_matrix,
 		enum dc_color_space input_color_space);
 
 void dpp1_full_bypass(struct dpp *dpp_base);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
index fb32975e..116977e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
@@ -216,41 +216,55 @@
 		struct dcn10_dpp *dpp,
 		const uint16_t *regval)
 {
-	uint32_t mode;
+	uint32_t ocsc_mode;
+	uint32_t cur_mode;
 	struct color_matrices_reg gam_regs;
 
-	REG_GET(CM_OCSC_CONTROL, CM_OCSC_MODE, &mode);
-
 	if (regval == NULL) {
 		BREAK_TO_DEBUGGER();
 		return;
 	}
-	mode = 4;
+
+	/* determine which CSC matrix (ocsc or comb) we are using
+	 * currently.  select the alternate set to double buffer
+	 * the CSC update so CSC is updated on frame boundary
+	 */
+	REG_SET(CM_TEST_DEBUG_INDEX, 0,
+			CM_TEST_DEBUG_INDEX, 9);
+
+	REG_GET(CM_TEST_DEBUG_DATA,
+			CM_TEST_DEBUG_DATA_ID9_OCSC_MODE, &cur_mode);
+
+	if (cur_mode != 4)
+		ocsc_mode = 4;
+	else
+		ocsc_mode = 5;
+
+
 	gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_OCSC_C11;
 	gam_regs.masks.csc_c11  = dpp->tf_mask->CM_OCSC_C11;
 	gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_OCSC_C12;
 	gam_regs.masks.csc_c12 = dpp->tf_mask->CM_OCSC_C12;
 
-	if (mode == 4) {
+	if (ocsc_mode == 4) {
 
 		gam_regs.csc_c11_c12 = REG(CM_OCSC_C11_C12);
 		gam_regs.csc_c33_c34 = REG(CM_OCSC_C33_C34);
 
-		cm_helper_program_color_matrices(
-				dpp->base.ctx,
-				regval,
-				&gam_regs);
-
 	} else {
 
 		gam_regs.csc_c11_c12 = REG(CM_COMB_C11_C12);
 		gam_regs.csc_c33_c34 = REG(CM_COMB_C33_C34);
 
-		cm_helper_program_color_matrices(
-				dpp->base.ctx,
-				regval,
-				&gam_regs);
 	}
+
+	cm_helper_program_color_matrices(
+			dpp->base.ctx,
+			regval,
+			&gam_regs);
+
+	REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode);
+
 }
 
 void dpp1_cm_set_output_csc_default(
@@ -260,15 +274,14 @@
 	struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
 	const uint16_t *regval = NULL;
 	int arr_size;
-	uint32_t ocsc_mode = 4;
 
 	regval = find_color_matrix(colorspace, &arr_size);
 	if (regval == NULL) {
 		BREAK_TO_DEBUGGER();
 		return;
 	}
+
 	dpp1_cm_program_color_matrix(dpp, regval);
-	REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode);
 }
 
 static void dpp1_cm_get_reg_field(
@@ -329,9 +342,8 @@
 		const uint16_t *regval)
 {
 	struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
-	uint32_t ocsc_mode = 4;
+
 	dpp1_cm_program_color_matrix(dpp, regval);
-	REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode);
 }
 
 void dpp1_cm_power_on_regamma_lut(struct dpp *dpp_base,
@@ -437,17 +449,18 @@
 void dpp1_program_input_csc(
 		struct dpp *dpp_base,
 		enum dc_color_space color_space,
-		enum dcn10_input_csc_select select,
+		enum dcn10_input_csc_select input_select,
 		const struct out_csc_color_matrix *tbl_entry)
 {
 	struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
 	int i;
 	int arr_size = sizeof(dcn10_input_csc_matrix)/sizeof(struct dcn10_input_csc_matrix);
 	const uint16_t *regval = NULL;
-	uint32_t selection = 1;
+	uint32_t cur_select = 0;
+	enum dcn10_input_csc_select select;
 	struct color_matrices_reg gam_regs;
 
-	if (select == INPUT_CSC_SELECT_BYPASS) {
+	if (input_select == INPUT_CSC_SELECT_BYPASS) {
 		REG_SET(CM_ICSC_CONTROL, 0, CM_ICSC_MODE, 0);
 		return;
 	}
@@ -467,36 +480,45 @@
 		regval = tbl_entry->regval;
 	}
 
-	if (select == INPUT_CSC_SELECT_COMA)
-		selection = 2;
-	REG_SET(CM_ICSC_CONTROL, 0,
-			CM_ICSC_MODE, selection);
+	/* determine which CSC matrix (icsc or coma) we are using
+	 * currently.  select the alternate set to double buffer
+	 * the CSC update so CSC is updated on frame boundary
+	 */
+	REG_SET(CM_TEST_DEBUG_INDEX, 0,
+			CM_TEST_DEBUG_INDEX, 9);
+
+	REG_GET(CM_TEST_DEBUG_DATA,
+			CM_TEST_DEBUG_DATA_ID9_ICSC_MODE, &cur_select);
+
+	if (cur_select != INPUT_CSC_SELECT_ICSC)
+		select = INPUT_CSC_SELECT_ICSC;
+	else
+		select = INPUT_CSC_SELECT_COMA;
 
 	gam_regs.shifts.csc_c11 = dpp->tf_shift->CM_ICSC_C11;
 	gam_regs.masks.csc_c11  = dpp->tf_mask->CM_ICSC_C11;
 	gam_regs.shifts.csc_c12 = dpp->tf_shift->CM_ICSC_C12;
 	gam_regs.masks.csc_c12 = dpp->tf_mask->CM_ICSC_C12;
 
-
 	if (select == INPUT_CSC_SELECT_ICSC) {
 
 		gam_regs.csc_c11_c12 = REG(CM_ICSC_C11_C12);
 		gam_regs.csc_c33_c34 = REG(CM_ICSC_C33_C34);
 
-		cm_helper_program_color_matrices(
-				dpp->base.ctx,
-				regval,
-				&gam_regs);
 	} else {
 
 		gam_regs.csc_c11_c12 = REG(CM_COMA_C11_C12);
 		gam_regs.csc_c33_c34 = REG(CM_COMA_C33_C34);
 
-		cm_helper_program_color_matrices(
-				dpp->base.ctx,
-				regval,
-				&gam_regs);
 	}
+
+	cm_helper_program_color_matrices(
+			dpp->base.ctx,
+			regval,
+			&gam_regs);
+
+	REG_SET(CM_ICSC_CONTROL, 0,
+				CM_ICSC_MODE, select);
 }
 
 //keep here for now, decide multi dce support later
@@ -789,13 +811,13 @@
 	REG_UPDATE(CM_IGAM_LUT_RW_INDEX, CM_IGAM_LUT_RW_INDEX, 0);
 	for (i = 0; i < gamma->num_entries; i++) {
 		REG_SET(CM_IGAM_LUT_SEQ_COLOR, 0, CM_IGAM_LUT_SEQ_COLOR,
-				dal_fixed31_32_round(
+				dc_fixpt_round(
 					gamma->entries.red[i]));
 		REG_SET(CM_IGAM_LUT_SEQ_COLOR, 0, CM_IGAM_LUT_SEQ_COLOR,
-				dal_fixed31_32_round(
+				dc_fixpt_round(
 					gamma->entries.green[i]));
 		REG_SET(CM_IGAM_LUT_SEQ_COLOR, 0, CM_IGAM_LUT_SEQ_COLOR,
-				dal_fixed31_32_round(
+				dc_fixpt_round(
 					gamma->entries.blue[i]));
 	}
 	// Power off LUT memory
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
index 3eb824d..4ddd627 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
@@ -169,7 +169,7 @@
 		const struct scaler_data *data,
 		bool dbg_always_scale)
 {
-	const long long one = dal_fixed31_32_one.value;
+	const long long one = dc_fixpt_one.value;
 
 	if (dpp_base->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) {
 		/* DSCL is processing data in fixed format */
@@ -464,8 +464,8 @@
 	int num_part_y, num_part_c;
 	int vtaps = scl_data->taps.v_taps;
 	int vtaps_c = scl_data->taps.v_taps_c;
-	int ceil_vratio = dal_fixed31_32_ceil(scl_data->ratios.vert);
-	int ceil_vratio_c = dal_fixed31_32_ceil(scl_data->ratios.vert_c);
+	int ceil_vratio = dc_fixpt_ceil(scl_data->ratios.vert);
+	int ceil_vratio_c = dc_fixpt_ceil(scl_data->ratios.vert_c);
 	enum lb_memory_config mem_cfg = LB_MEMORY_CONFIG_0;
 
 	if (dpp->base.ctx->dc->debug.use_max_lb)
@@ -565,52 +565,52 @@
 	uint32_t init_int = 0;
 
 	REG_SET(SCL_HORZ_FILTER_SCALE_RATIO, 0,
-			SCL_H_SCALE_RATIO, dal_fixed31_32_u2d19(data->ratios.horz) << 5);
+			SCL_H_SCALE_RATIO, dc_fixpt_u2d19(data->ratios.horz) << 5);
 
 	REG_SET(SCL_VERT_FILTER_SCALE_RATIO, 0,
-			SCL_V_SCALE_RATIO, dal_fixed31_32_u2d19(data->ratios.vert) << 5);
+			SCL_V_SCALE_RATIO, dc_fixpt_u2d19(data->ratios.vert) << 5);
 
 	REG_SET(SCL_HORZ_FILTER_SCALE_RATIO_C, 0,
-			SCL_H_SCALE_RATIO_C, dal_fixed31_32_u2d19(data->ratios.horz_c) << 5);
+			SCL_H_SCALE_RATIO_C, dc_fixpt_u2d19(data->ratios.horz_c) << 5);
 
 	REG_SET(SCL_VERT_FILTER_SCALE_RATIO_C, 0,
-			SCL_V_SCALE_RATIO_C, dal_fixed31_32_u2d19(data->ratios.vert_c) << 5);
+			SCL_V_SCALE_RATIO_C, dc_fixpt_u2d19(data->ratios.vert_c) << 5);
 
 	/*
 	 * 0.24 format for fraction, first five bits zeroed
 	 */
-	init_frac = dal_fixed31_32_u0d19(data->inits.h) << 5;
-	init_int = dal_fixed31_32_floor(data->inits.h);
+	init_frac = dc_fixpt_u0d19(data->inits.h) << 5;
+	init_int = dc_fixpt_floor(data->inits.h);
 	REG_SET_2(SCL_HORZ_FILTER_INIT, 0,
 		SCL_H_INIT_FRAC, init_frac,
 		SCL_H_INIT_INT, init_int);
 
-	init_frac = dal_fixed31_32_u0d19(data->inits.h_c) << 5;
-	init_int = dal_fixed31_32_floor(data->inits.h_c);
+	init_frac = dc_fixpt_u0d19(data->inits.h_c) << 5;
+	init_int = dc_fixpt_floor(data->inits.h_c);
 	REG_SET_2(SCL_HORZ_FILTER_INIT_C, 0,
 		SCL_H_INIT_FRAC_C, init_frac,
 		SCL_H_INIT_INT_C, init_int);
 
-	init_frac = dal_fixed31_32_u0d19(data->inits.v) << 5;
-	init_int = dal_fixed31_32_floor(data->inits.v);
+	init_frac = dc_fixpt_u0d19(data->inits.v) << 5;
+	init_int = dc_fixpt_floor(data->inits.v);
 	REG_SET_2(SCL_VERT_FILTER_INIT, 0,
 		SCL_V_INIT_FRAC, init_frac,
 		SCL_V_INIT_INT, init_int);
 
-	init_frac = dal_fixed31_32_u0d19(data->inits.v_bot) << 5;
-	init_int = dal_fixed31_32_floor(data->inits.v_bot);
+	init_frac = dc_fixpt_u0d19(data->inits.v_bot) << 5;
+	init_int = dc_fixpt_floor(data->inits.v_bot);
 	REG_SET_2(SCL_VERT_FILTER_INIT_BOT, 0,
 		SCL_V_INIT_FRAC_BOT, init_frac,
 		SCL_V_INIT_INT_BOT, init_int);
 
-	init_frac = dal_fixed31_32_u0d19(data->inits.v_c) << 5;
-	init_int = dal_fixed31_32_floor(data->inits.v_c);
+	init_frac = dc_fixpt_u0d19(data->inits.v_c) << 5;
+	init_int = dc_fixpt_floor(data->inits.v_c);
 	REG_SET_2(SCL_VERT_FILTER_INIT_C, 0,
 		SCL_V_INIT_FRAC_C, init_frac,
 		SCL_V_INIT_INT_C, init_int);
 
-	init_frac = dal_fixed31_32_u0d19(data->inits.v_c_bot) << 5;
-	init_int = dal_fixed31_32_floor(data->inits.v_c_bot);
+	init_frac = dc_fixpt_u0d19(data->inits.v_c_bot) << 5;
+	init_int = dc_fixpt_floor(data->inits.v_c_bot);
 	REG_SET_2(SCL_VERT_FILTER_INIT_BOT_C, 0,
 		SCL_V_INIT_FRAC_BOT_C, init_frac,
 		SCL_V_INIT_INT_BOT_C, init_int);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
index 738f67f..943143e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
@@ -476,8 +476,235 @@
 			DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, watermark_change_req);
 }
 
+void hubbub1_soft_reset(struct hubbub *hubbub, bool reset)
+{
+	uint32_t reset_en = reset ? 1 : 0;
+
+	REG_UPDATE(DCHUBBUB_SOFT_RESET,
+			DCHUBBUB_GLOBAL_SOFT_RESET, reset_en);
+}
+
+static bool hubbub1_dcc_support_swizzle(
+		enum swizzle_mode_values swizzle,
+		unsigned int bytes_per_element,
+		enum segment_order *segment_order_horz,
+		enum segment_order *segment_order_vert)
+{
+	bool standard_swizzle = false;
+	bool display_swizzle = false;
+
+	switch (swizzle) {
+	case DC_SW_4KB_S:
+	case DC_SW_64KB_S:
+	case DC_SW_VAR_S:
+	case DC_SW_4KB_S_X:
+	case DC_SW_64KB_S_X:
+	case DC_SW_VAR_S_X:
+		standard_swizzle = true;
+		break;
+	case DC_SW_4KB_D:
+	case DC_SW_64KB_D:
+	case DC_SW_VAR_D:
+	case DC_SW_4KB_D_X:
+	case DC_SW_64KB_D_X:
+	case DC_SW_VAR_D_X:
+		display_swizzle = true;
+		break;
+	default:
+		break;
+	}
+
+	if (bytes_per_element == 1 && standard_swizzle) {
+		*segment_order_horz = segment_order__contiguous;
+		*segment_order_vert = segment_order__na;
+		return true;
+	}
+	if (bytes_per_element == 2 && standard_swizzle) {
+		*segment_order_horz = segment_order__non_contiguous;
+		*segment_order_vert = segment_order__contiguous;
+		return true;
+	}
+	if (bytes_per_element == 4 && standard_swizzle) {
+		*segment_order_horz = segment_order__non_contiguous;
+		*segment_order_vert = segment_order__contiguous;
+		return true;
+	}
+	if (bytes_per_element == 8 && standard_swizzle) {
+		*segment_order_horz = segment_order__na;
+		*segment_order_vert = segment_order__contiguous;
+		return true;
+	}
+	if (bytes_per_element == 8 && display_swizzle) {
+		*segment_order_horz = segment_order__contiguous;
+		*segment_order_vert = segment_order__non_contiguous;
+		return true;
+	}
+
+	return false;
+}
+
+static bool hubbub1_dcc_support_pixel_format(
+		enum surface_pixel_format format,
+		unsigned int *bytes_per_element)
+{
+	/* DML: get_bytes_per_element */
+	switch (format) {
+	case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
+	case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
+		*bytes_per_element = 2;
+		return true;
+	case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
+	case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
+	case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
+	case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
+		*bytes_per_element = 4;
+		return true;
+	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
+	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
+	case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
+		*bytes_per_element = 8;
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void hubbub1_get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height,
+		unsigned int bytes_per_element)
+{
+	/* copied from DML.  might want to refactor DML to leverage from DML */
+	/* DML : get_blk256_size */
+	if (bytes_per_element == 1) {
+		*blk256_width = 16;
+		*blk256_height = 16;
+	} else if (bytes_per_element == 2) {
+		*blk256_width = 16;
+		*blk256_height = 8;
+	} else if (bytes_per_element == 4) {
+		*blk256_width = 8;
+		*blk256_height = 8;
+	} else if (bytes_per_element == 8) {
+		*blk256_width = 8;
+		*blk256_height = 4;
+	}
+}
+
+static void hubbub1_det_request_size(
+		unsigned int height,
+		unsigned int width,
+		unsigned int bpe,
+		bool *req128_horz_wc,
+		bool *req128_vert_wc)
+{
+	unsigned int detile_buf_size = 164 * 1024;  /* 164KB for DCN1.0 */
+
+	unsigned int blk256_height = 0;
+	unsigned int blk256_width = 0;
+	unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc;
+
+	hubbub1_get_blk256_size(&blk256_width, &blk256_height, bpe);
+
+	swath_bytes_horz_wc = height * blk256_height * bpe;
+	swath_bytes_vert_wc = width * blk256_width * bpe;
+
+	*req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ?
+			false : /* full 256B request */
+			true; /* half 128b request */
+
+	*req128_vert_wc = (2 * swath_bytes_vert_wc <= detile_buf_size) ?
+			false : /* full 256B request */
+			true; /* half 128b request */
+}
+
+static bool hubbub1_get_dcc_compression_cap(struct hubbub *hubbub,
+		const struct dc_dcc_surface_param *input,
+		struct dc_surface_dcc_cap *output)
+{
+	struct dc *dc = hubbub->ctx->dc;
+	/* implement section 1.6.2.1 of DCN1_Programming_Guide.docx */
+	enum dcc_control dcc_control;
+	unsigned int bpe;
+	enum segment_order segment_order_horz, segment_order_vert;
+	bool req128_horz_wc, req128_vert_wc;
+
+	memset(output, 0, sizeof(*output));
+
+	if (dc->debug.disable_dcc == DCC_DISABLE)
+		return false;
+
+	if (!hubbub->funcs->dcc_support_pixel_format(input->format, &bpe))
+		return false;
+
+	if (!hubbub->funcs->dcc_support_swizzle(input->swizzle_mode, bpe,
+			&segment_order_horz, &segment_order_vert))
+		return false;
+
+	hubbub1_det_request_size(input->surface_size.height,  input->surface_size.width,
+			bpe, &req128_horz_wc, &req128_vert_wc);
+
+	if (!req128_horz_wc && !req128_vert_wc) {
+		dcc_control = dcc_control__256_256_xxx;
+	} else if (input->scan == SCAN_DIRECTION_HORIZONTAL) {
+		if (!req128_horz_wc)
+			dcc_control = dcc_control__256_256_xxx;
+		else if (segment_order_horz == segment_order__contiguous)
+			dcc_control = dcc_control__128_128_xxx;
+		else
+			dcc_control = dcc_control__256_64_64;
+	} else if (input->scan == SCAN_DIRECTION_VERTICAL) {
+		if (!req128_vert_wc)
+			dcc_control = dcc_control__256_256_xxx;
+		else if (segment_order_vert == segment_order__contiguous)
+			dcc_control = dcc_control__128_128_xxx;
+		else
+			dcc_control = dcc_control__256_64_64;
+	} else {
+		if ((req128_horz_wc &&
+			segment_order_horz == segment_order__non_contiguous) ||
+			(req128_vert_wc &&
+			segment_order_vert == segment_order__non_contiguous))
+			/* access_dir not known, must use most constraining */
+			dcc_control = dcc_control__256_64_64;
+		else
+			/* reg128 is true for either horz and vert
+			 * but segment_order is contiguous
+			 */
+			dcc_control = dcc_control__128_128_xxx;
+	}
+
+	if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE &&
+		dcc_control != dcc_control__256_256_xxx)
+		return false;
+
+	switch (dcc_control) {
+	case dcc_control__256_256_xxx:
+		output->grph.rgb.max_uncompressed_blk_size = 256;
+		output->grph.rgb.max_compressed_blk_size = 256;
+		output->grph.rgb.independent_64b_blks = false;
+		break;
+	case dcc_control__128_128_xxx:
+		output->grph.rgb.max_uncompressed_blk_size = 128;
+		output->grph.rgb.max_compressed_blk_size = 128;
+		output->grph.rgb.independent_64b_blks = false;
+		break;
+	case dcc_control__256_64_64:
+		output->grph.rgb.max_uncompressed_blk_size = 256;
+		output->grph.rgb.max_compressed_blk_size = 64;
+		output->grph.rgb.independent_64b_blks = true;
+		break;
+	}
+
+	output->capable = true;
+	output->const_color_support = false;
+
+	return true;
+}
+
 static const struct hubbub_funcs hubbub1_funcs = {
-	.update_dchub = hubbub1_update_dchub
+	.update_dchub = hubbub1_update_dchub,
+	.dcc_support_swizzle = hubbub1_dcc_support_swizzle,
+	.dcc_support_pixel_format = hubbub1_dcc_support_pixel_format,
+	.get_dcc_compression_cap = hubbub1_get_dcc_compression_cap,
 };
 
 void hubbub1_construct(struct hubbub *hubbub,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
index a16e908..6315a0e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
@@ -27,6 +27,7 @@
 #define __DC_HUBBUB_DCN10_H__
 
 #include "core_types.h"
+#include "dchubbub.h"
 
 #define HUBHUB_REG_LIST_DCN()\
 	SR(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A),\
@@ -47,7 +48,8 @@
 	SR(DCHUBBUB_ARB_DF_REQ_OUTSTAND),\
 	SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \
 	SR(DCHUBBUB_TEST_DEBUG_INDEX), \
-	SR(DCHUBBUB_TEST_DEBUG_DATA)
+	SR(DCHUBBUB_TEST_DEBUG_DATA),\
+	SR(DCHUBBUB_SOFT_RESET)
 
 #define HUBBUB_SR_WATERMARK_REG_LIST()\
 	SR(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A),\
@@ -104,6 +106,7 @@
 	uint32_t DCHUBBUB_SDPIF_AGP_BOT;
 	uint32_t DCHUBBUB_SDPIF_AGP_TOP;
 	uint32_t DCHUBBUB_CRC_CTRL;
+	uint32_t DCHUBBUB_SOFT_RESET;
 };
 
 /* set field name */
@@ -113,6 +116,7 @@
 
 #define HUBBUB_MASK_SH_LIST_DCN(mask_sh)\
 		HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, mask_sh), \
+		HUBBUB_SF(DCHUBBUB_SOFT_RESET, DCHUBBUB_GLOBAL_SOFT_RESET, mask_sh), \
 		HUBBUB_SF(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_REQUEST, mask_sh), \
 		HUBBUB_SF(DCHUBBUB_ARB_WATERMARK_CHANGE_CNTL, DCHUBBUB_ARB_WATERMARK_CHANGE_DONE_INTERRUPT_DISABLE, mask_sh), \
 		HUBBUB_SF(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_VALUE, mask_sh), \
@@ -142,6 +146,7 @@
 		type DCHUBBUB_ARB_SAT_LEVEL;\
 		type DCHUBBUB_ARB_MIN_REQ_OUTSTAND;\
 		type DCHUBBUB_GLOBAL_TIMER_REFDIV;\
+		type DCHUBBUB_GLOBAL_SOFT_RESET; \
 		type SDPIF_FB_TOP;\
 		type SDPIF_FB_BASE;\
 		type SDPIF_FB_OFFSET;\
@@ -173,12 +178,6 @@
 	struct dcn_hubbub_wm_set sets[4];
 };
 
-struct hubbub_funcs {
-	void (*update_dchub)(
-			struct hubbub *hubbub,
-			struct dchub_init_data *dh_data);
-};
-
 struct hubbub {
 	const struct hubbub_funcs *funcs;
 	struct dc_context *ctx;
@@ -206,6 +205,7 @@
 void hubbub1_wm_read_state(struct hubbub *hubbub,
 		struct dcn_hubbub_wm *wm);
 
+void hubbub1_soft_reset(struct hubbub *hubbub, bool reset);
 void hubbub1_construct(struct hubbub *hubbub,
 	struct dc_context *ctx,
 	const struct dcn_hubbub_registers *hubbub_regs,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index 39b72f6..d2ab78b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -78,6 +78,27 @@
 			CURSOR_ENABLE, 0);
 }
 
+static void hubp1_disable_control(struct hubp *hubp, bool disable_hubp)
+{
+	struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
+	uint32_t disable = disable_hubp ? 1 : 0;
+
+	REG_UPDATE(DCHUBP_CNTL,
+			HUBP_DISABLE, disable);
+}
+
+static unsigned int hubp1_get_underflow_status(struct hubp *hubp)
+{
+	uint32_t hubp_underflow = 0;
+	struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
+
+	REG_GET(DCHUBP_CNTL,
+		HUBP_UNDERFLOW_STATUS,
+		&hubp_underflow);
+
+	return hubp_underflow;
+}
+
 static void hubp1_set_hubp_blank_en(struct hubp *hubp, bool blank)
 {
 	struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
@@ -146,6 +167,9 @@
 	 * 444 or 420 luma
 	 */
 	if (format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) {
+		ASSERT(plane_size->video.chroma_pitch != 0);
+		/* Chroma pitch zero can cause system hang! */
+
 		pitch = plane_size->video.luma_pitch - 1;
 		meta_pitch = dcc->video.meta_pitch_l - 1;
 		pitch_c = plane_size->video.chroma_pitch - 1;
@@ -535,11 +559,13 @@
 	REG_SET(VBLANK_PARAMETERS_3, 0,
 		REFCYC_PER_META_CHUNK_VBLANK_L, dlg_attr->refcyc_per_meta_chunk_vblank_l);
 
-	REG_SET(NOM_PARAMETERS_0, 0,
-		DST_Y_PER_PTE_ROW_NOM_L, dlg_attr->dst_y_per_pte_row_nom_l);
+	if (REG(NOM_PARAMETERS_0))
+		REG_SET(NOM_PARAMETERS_0, 0,
+			DST_Y_PER_PTE_ROW_NOM_L, dlg_attr->dst_y_per_pte_row_nom_l);
 
-	REG_SET(NOM_PARAMETERS_1, 0,
-		REFCYC_PER_PTE_GROUP_NOM_L, dlg_attr->refcyc_per_pte_group_nom_l);
+	if (REG(NOM_PARAMETERS_1))
+		REG_SET(NOM_PARAMETERS_1, 0,
+			REFCYC_PER_PTE_GROUP_NOM_L, dlg_attr->refcyc_per_pte_group_nom_l);
 
 	REG_SET(NOM_PARAMETERS_4, 0,
 		DST_Y_PER_META_ROW_NOM_L, dlg_attr->dst_y_per_meta_row_nom_l);
@@ -568,11 +594,13 @@
 	REG_SET(VBLANK_PARAMETERS_4, 0,
 		REFCYC_PER_META_CHUNK_VBLANK_C, dlg_attr->refcyc_per_meta_chunk_vblank_c);
 
-	REG_SET(NOM_PARAMETERS_2, 0,
-		DST_Y_PER_PTE_ROW_NOM_C, dlg_attr->dst_y_per_pte_row_nom_c);
+	if (REG(NOM_PARAMETERS_2))
+		REG_SET(NOM_PARAMETERS_2, 0,
+			DST_Y_PER_PTE_ROW_NOM_C, dlg_attr->dst_y_per_pte_row_nom_c);
 
-	REG_SET(NOM_PARAMETERS_3, 0,
-		REFCYC_PER_PTE_GROUP_NOM_C, dlg_attr->refcyc_per_pte_group_nom_c);
+	if (REG(NOM_PARAMETERS_3))
+		REG_SET(NOM_PARAMETERS_3, 0,
+			REFCYC_PER_PTE_GROUP_NOM_C, dlg_attr->refcyc_per_pte_group_nom_c);
 
 	REG_SET(NOM_PARAMETERS_6, 0,
 		DST_Y_PER_META_ROW_NOM_C, dlg_attr->dst_y_per_meta_row_nom_c);
@@ -609,6 +637,13 @@
 	REG_SET(DCN_SURF1_TTU_CNTL1, 0,
 		REFCYC_PER_REQ_DELIVERY_PRE,
 		ttu_attr->refcyc_per_req_delivery_pre_c);
+
+	REG_SET_3(DCN_CUR0_TTU_CNTL0, 0,
+		REFCYC_PER_REQ_DELIVERY, ttu_attr->refcyc_per_req_delivery_cur0,
+		QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_cur0,
+		QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_cur0);
+	REG_SET(DCN_CUR0_TTU_CNTL1, 0,
+		REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_cur0);
 }
 
 static void hubp1_setup(
@@ -752,9 +787,159 @@
 		  PRI_VIEWPORT_Y_START_C, viewport_c->y);
 }
 
-void hubp1_read_state(struct dcn10_hubp *hubp1,
-		struct dcn_hubp_state *s)
+void hubp1_read_state(struct hubp *hubp)
 {
+	struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
+	struct dcn_hubp_state *s = &hubp1->state;
+	struct _vcs_dpi_display_dlg_regs_st *dlg_attr = &s->dlg_attr;
+	struct _vcs_dpi_display_ttu_regs_st *ttu_attr = &s->ttu_attr;
+	struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs;
+
+	/* Requester */
+	REG_GET(HUBPRET_CONTROL,
+			DET_BUF_PLANE1_BASE_ADDRESS, &rq_regs->plane1_base_address);
+	REG_GET_4(DCN_EXPANSION_MODE,
+			DRQ_EXPANSION_MODE, &rq_regs->drq_expansion_mode,
+			PRQ_EXPANSION_MODE, &rq_regs->prq_expansion_mode,
+			MRQ_EXPANSION_MODE, &rq_regs->mrq_expansion_mode,
+			CRQ_EXPANSION_MODE, &rq_regs->crq_expansion_mode);
+	REG_GET_8(DCHUBP_REQ_SIZE_CONFIG,
+		CHUNK_SIZE, &rq_regs->rq_regs_l.chunk_size,
+		MIN_CHUNK_SIZE, &rq_regs->rq_regs_l.min_chunk_size,
+		META_CHUNK_SIZE, &rq_regs->rq_regs_l.meta_chunk_size,
+		MIN_META_CHUNK_SIZE, &rq_regs->rq_regs_l.min_meta_chunk_size,
+		DPTE_GROUP_SIZE, &rq_regs->rq_regs_l.dpte_group_size,
+		MPTE_GROUP_SIZE, &rq_regs->rq_regs_l.mpte_group_size,
+		SWATH_HEIGHT, &rq_regs->rq_regs_l.swath_height,
+		PTE_ROW_HEIGHT_LINEAR, &rq_regs->rq_regs_l.pte_row_height_linear);
+	REG_GET_8(DCHUBP_REQ_SIZE_CONFIG_C,
+		CHUNK_SIZE_C, &rq_regs->rq_regs_c.chunk_size,
+		MIN_CHUNK_SIZE_C, &rq_regs->rq_regs_c.min_chunk_size,
+		META_CHUNK_SIZE_C, &rq_regs->rq_regs_c.meta_chunk_size,
+		MIN_META_CHUNK_SIZE_C, &rq_regs->rq_regs_c.min_meta_chunk_size,
+		DPTE_GROUP_SIZE_C, &rq_regs->rq_regs_c.dpte_group_size,
+		MPTE_GROUP_SIZE_C, &rq_regs->rq_regs_c.mpte_group_size,
+		SWATH_HEIGHT_C, &rq_regs->rq_regs_c.swath_height,
+		PTE_ROW_HEIGHT_LINEAR_C, &rq_regs->rq_regs_c.pte_row_height_linear);
+
+	/* DLG - Per hubp */
+	REG_GET_2(BLANK_OFFSET_0,
+		REFCYC_H_BLANK_END, &dlg_attr->refcyc_h_blank_end,
+		DLG_V_BLANK_END, &dlg_attr->dlg_vblank_end);
+
+	REG_GET(BLANK_OFFSET_1,
+		MIN_DST_Y_NEXT_START, &dlg_attr->min_dst_y_next_start);
+
+	REG_GET(DST_DIMENSIONS,
+		REFCYC_PER_HTOTAL, &dlg_attr->refcyc_per_htotal);
+
+	REG_GET_2(DST_AFTER_SCALER,
+		REFCYC_X_AFTER_SCALER, &dlg_attr->refcyc_x_after_scaler,
+		DST_Y_AFTER_SCALER, &dlg_attr->dst_y_after_scaler);
+
+	if (REG(PREFETCH_SETTINS))
+		REG_GET_2(PREFETCH_SETTINS,
+			DST_Y_PREFETCH, &dlg_attr->dst_y_prefetch,
+			VRATIO_PREFETCH, &dlg_attr->vratio_prefetch);
+	else
+		REG_GET_2(PREFETCH_SETTINGS,
+			DST_Y_PREFETCH, &dlg_attr->dst_y_prefetch,
+			VRATIO_PREFETCH, &dlg_attr->vratio_prefetch);
+
+	REG_GET_2(VBLANK_PARAMETERS_0,
+		DST_Y_PER_VM_VBLANK, &dlg_attr->dst_y_per_vm_vblank,
+		DST_Y_PER_ROW_VBLANK, &dlg_attr->dst_y_per_row_vblank);
+
+	REG_GET(REF_FREQ_TO_PIX_FREQ,
+		REF_FREQ_TO_PIX_FREQ, &dlg_attr->ref_freq_to_pix_freq);
+
+	/* DLG - Per luma/chroma */
+	REG_GET(VBLANK_PARAMETERS_1,
+		REFCYC_PER_PTE_GROUP_VBLANK_L, &dlg_attr->refcyc_per_pte_group_vblank_l);
+
+	REG_GET(VBLANK_PARAMETERS_3,
+		REFCYC_PER_META_CHUNK_VBLANK_L, &dlg_attr->refcyc_per_meta_chunk_vblank_l);
+
+	if (REG(NOM_PARAMETERS_0))
+		REG_GET(NOM_PARAMETERS_0,
+			DST_Y_PER_PTE_ROW_NOM_L, &dlg_attr->dst_y_per_pte_row_nom_l);
+
+	if (REG(NOM_PARAMETERS_1))
+		REG_GET(NOM_PARAMETERS_1,
+			REFCYC_PER_PTE_GROUP_NOM_L, &dlg_attr->refcyc_per_pte_group_nom_l);
+
+	REG_GET(NOM_PARAMETERS_4,
+		DST_Y_PER_META_ROW_NOM_L, &dlg_attr->dst_y_per_meta_row_nom_l);
+
+	REG_GET(NOM_PARAMETERS_5,
+		REFCYC_PER_META_CHUNK_NOM_L, &dlg_attr->refcyc_per_meta_chunk_nom_l);
+
+	REG_GET_2(PER_LINE_DELIVERY_PRE,
+		REFCYC_PER_LINE_DELIVERY_PRE_L, &dlg_attr->refcyc_per_line_delivery_pre_l,
+		REFCYC_PER_LINE_DELIVERY_PRE_C, &dlg_attr->refcyc_per_line_delivery_pre_c);
+
+	REG_GET_2(PER_LINE_DELIVERY,
+		REFCYC_PER_LINE_DELIVERY_L, &dlg_attr->refcyc_per_line_delivery_l,
+		REFCYC_PER_LINE_DELIVERY_C, &dlg_attr->refcyc_per_line_delivery_c);
+
+	if (REG(PREFETCH_SETTINS_C))
+		REG_GET(PREFETCH_SETTINS_C,
+			VRATIO_PREFETCH_C, &dlg_attr->vratio_prefetch_c);
+	else
+		REG_GET(PREFETCH_SETTINGS_C,
+			VRATIO_PREFETCH_C, &dlg_attr->vratio_prefetch_c);
+
+	REG_GET(VBLANK_PARAMETERS_2,
+		REFCYC_PER_PTE_GROUP_VBLANK_C, &dlg_attr->refcyc_per_pte_group_vblank_c);
+
+	REG_GET(VBLANK_PARAMETERS_4,
+		REFCYC_PER_META_CHUNK_VBLANK_C, &dlg_attr->refcyc_per_meta_chunk_vblank_c);
+
+	if (REG(NOM_PARAMETERS_2))
+		REG_GET(NOM_PARAMETERS_2,
+			DST_Y_PER_PTE_ROW_NOM_C, &dlg_attr->dst_y_per_pte_row_nom_c);
+
+	if (REG(NOM_PARAMETERS_3))
+		REG_GET(NOM_PARAMETERS_3,
+			REFCYC_PER_PTE_GROUP_NOM_C, &dlg_attr->refcyc_per_pte_group_nom_c);
+
+	REG_GET(NOM_PARAMETERS_6,
+		DST_Y_PER_META_ROW_NOM_C, &dlg_attr->dst_y_per_meta_row_nom_c);
+
+	REG_GET(NOM_PARAMETERS_7,
+		REFCYC_PER_META_CHUNK_NOM_C, &dlg_attr->refcyc_per_meta_chunk_nom_c);
+
+	/* TTU - per hubp */
+	REG_GET_2(DCN_TTU_QOS_WM,
+		QoS_LEVEL_LOW_WM, &ttu_attr->qos_level_low_wm,
+		QoS_LEVEL_HIGH_WM, &ttu_attr->qos_level_high_wm);
+
+	REG_GET_2(DCN_GLOBAL_TTU_CNTL,
+		MIN_TTU_VBLANK, &ttu_attr->min_ttu_vblank,
+		QoS_LEVEL_FLIP, &ttu_attr->qos_level_flip);
+
+	/* TTU - per luma/chroma */
+	/* Assumed surf0 is luma and 1 is chroma */
+
+	REG_GET_3(DCN_SURF0_TTU_CNTL0,
+		REFCYC_PER_REQ_DELIVERY, &ttu_attr->refcyc_per_req_delivery_l,
+		QoS_LEVEL_FIXED, &ttu_attr->qos_level_fixed_l,
+		QoS_RAMP_DISABLE, &ttu_attr->qos_ramp_disable_l);
+
+	REG_GET(DCN_SURF0_TTU_CNTL1,
+		REFCYC_PER_REQ_DELIVERY_PRE,
+		&ttu_attr->refcyc_per_req_delivery_pre_l);
+
+	REG_GET_3(DCN_SURF1_TTU_CNTL0,
+		REFCYC_PER_REQ_DELIVERY, &ttu_attr->refcyc_per_req_delivery_c,
+		QoS_LEVEL_FIXED, &ttu_attr->qos_level_fixed_c,
+		QoS_RAMP_DISABLE, &ttu_attr->qos_ramp_disable_c);
+
+	REG_GET(DCN_SURF1_TTU_CNTL1,
+		REFCYC_PER_REQ_DELIVERY_PRE,
+		&ttu_attr->refcyc_per_req_delivery_pre_c);
+
+	/* Rest of hubp */
 	REG_GET(DCSURF_SURFACE_CONFIG,
 			SURFACE_PIXEL_FORMAT, &s->pixel_format);
 
@@ -890,14 +1075,14 @@
 	ASSERT(param->h_scale_ratio.value);
 
 	if (param->h_scale_ratio.value)
-		dst_x_offset = dal_fixed31_32_floor(dal_fixed31_32_div(
-				dal_fixed31_32_from_int(dst_x_offset),
+		dst_x_offset = dc_fixpt_floor(dc_fixpt_div(
+				dc_fixpt_from_int(dst_x_offset),
 				param->h_scale_ratio));
 
 	if (src_x_offset >= (int)param->viewport_width)
 		cur_en = 0;  /* not visible beyond right edge*/
 
-	if (src_x_offset + (int)hubp->curs_attr.width < 0)
+	if (src_x_offset + (int)hubp->curs_attr.width <= 0)
 		cur_en = 0;  /* not visible beyond left edge*/
 
 	if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0)
@@ -952,6 +1137,10 @@
 	.hubp_disconnect = hubp1_disconnect,
 	.hubp_clk_cntl = hubp1_clk_cntl,
 	.hubp_vtg_sel = hubp1_vtg_sel,
+	.hubp_read_state = hubp1_read_state,
+	.hubp_disable_control =  hubp1_disable_control,
+	.hubp_get_underflow_status = hubp1_get_underflow_status,
+
 };
 
 /*****************************************/
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
index 4a3703e..af38403 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
@@ -30,6 +30,7 @@
 #define TO_DCN10_HUBP(hubp)\
 	container_of(hubp, struct dcn10_hubp, base)
 
+/* Register address initialization macro for all ASICs (including those with reduced functionality) */
 #define HUBP_REG_LIST_DCN(id)\
 	SRI(DCHUBP_CNTL, HUBP, id),\
 	SRI(HUBPREQ_DEBUG_DB, HUBP, id),\
@@ -78,16 +79,12 @@
 	SRI(REF_FREQ_TO_PIX_FREQ, HUBPREQ, id),\
 	SRI(VBLANK_PARAMETERS_1, HUBPREQ, id),\
 	SRI(VBLANK_PARAMETERS_3, HUBPREQ, id),\
-	SRI(NOM_PARAMETERS_0, HUBPREQ, id),\
-	SRI(NOM_PARAMETERS_1, HUBPREQ, id),\
 	SRI(NOM_PARAMETERS_4, HUBPREQ, id),\
 	SRI(NOM_PARAMETERS_5, HUBPREQ, id),\
 	SRI(PER_LINE_DELIVERY_PRE, HUBPREQ, id),\
 	SRI(PER_LINE_DELIVERY, HUBPREQ, id),\
 	SRI(VBLANK_PARAMETERS_2, HUBPREQ, id),\
 	SRI(VBLANK_PARAMETERS_4, HUBPREQ, id),\
-	SRI(NOM_PARAMETERS_2, HUBPREQ, id),\
-	SRI(NOM_PARAMETERS_3, HUBPREQ, id),\
 	SRI(NOM_PARAMETERS_6, HUBPREQ, id),\
 	SRI(NOM_PARAMETERS_7, HUBPREQ, id),\
 	SRI(DCN_TTU_QOS_WM, HUBPREQ, id),\
@@ -96,11 +93,21 @@
 	SRI(DCN_SURF0_TTU_CNTL1, HUBPREQ, id),\
 	SRI(DCN_SURF1_TTU_CNTL0, HUBPREQ, id),\
 	SRI(DCN_SURF1_TTU_CNTL1, HUBPREQ, id),\
-	SRI(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id),\
+	SRI(DCN_CUR0_TTU_CNTL0, HUBPREQ, id),\
+	SRI(DCN_CUR0_TTU_CNTL1, HUBPREQ, id),\
 	SRI(HUBP_CLK_CNTL, HUBP, id)
 
+/* Register address initialization macro for ASICs with VM */
+#define HUBP_REG_LIST_DCN_VM(id)\
+	SRI(NOM_PARAMETERS_0, HUBPREQ, id),\
+	SRI(NOM_PARAMETERS_1, HUBPREQ, id),\
+	SRI(NOM_PARAMETERS_2, HUBPREQ, id),\
+	SRI(NOM_PARAMETERS_3, HUBPREQ, id),\
+	SRI(DCN_VM_MX_L1_TLB_CNTL, HUBPREQ, id)
+
 #define HUBP_REG_LIST_DCN10(id)\
 	HUBP_REG_LIST_DCN(id),\
+	HUBP_REG_LIST_DCN_VM(id),\
 	SRI(PREFETCH_SETTINS, HUBPREQ, id),\
 	SRI(PREFETCH_SETTINS_C, HUBPREQ, id),\
 	SRI(DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_MSB, HUBPREQ, id),\
@@ -198,6 +205,8 @@
 	uint32_t DCN_SURF0_TTU_CNTL1; \
 	uint32_t DCN_SURF1_TTU_CNTL0; \
 	uint32_t DCN_SURF1_TTU_CNTL1; \
+	uint32_t DCN_CUR0_TTU_CNTL0; \
+	uint32_t DCN_CUR0_TTU_CNTL1; \
 	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_MSB; \
 	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LSB; \
 	uint32_t DCN_VM_CONTEXT0_PAGE_TABLE_START_ADDR_MSB; \
@@ -237,12 +246,14 @@
 #define HUBP_SF(reg_name, field_name, post_fix)\
 	.field_name = reg_name ## __ ## field_name ## post_fix
 
+/* Mask/shift struct generation macro for all ASICs (including those with reduced functionality) */
 #define HUBP_MASK_SH_LIST_DCN(mask_sh)\
 	HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_BLANK_EN, mask_sh),\
 	HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_TTU_DISABLE, mask_sh),\
 	HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNDERFLOW_STATUS, mask_sh),\
 	HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_NO_OUTSTANDING_REQ, mask_sh),\
 	HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_VTG_SEL, mask_sh),\
+	HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_DISABLE, mask_sh),\
 	HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_PIPES, mask_sh),\
 	HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, NUM_BANKS, mask_sh),\
 	HUBP_SF(HUBP0_DCSURF_ADDR_CONFIG, PIPE_INTERLEAVE, mask_sh),\
@@ -335,8 +346,6 @@
 	HUBP_SF(HUBPREQ0_REF_FREQ_TO_PIX_FREQ, REF_FREQ_TO_PIX_FREQ, mask_sh),\
 	HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_1, REFCYC_PER_PTE_GROUP_VBLANK_L, mask_sh),\
 	HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_3, REFCYC_PER_META_CHUNK_VBLANK_L, mask_sh),\
-	HUBP_SF(HUBPREQ0_NOM_PARAMETERS_0, DST_Y_PER_PTE_ROW_NOM_L, mask_sh),\
-	HUBP_SF(HUBPREQ0_NOM_PARAMETERS_1, REFCYC_PER_PTE_GROUP_NOM_L, mask_sh),\
 	HUBP_SF(HUBPREQ0_NOM_PARAMETERS_4, DST_Y_PER_META_ROW_NOM_L, mask_sh),\
 	HUBP_SF(HUBPREQ0_NOM_PARAMETERS_5, REFCYC_PER_META_CHUNK_NOM_L, mask_sh),\
 	HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY_PRE, REFCYC_PER_LINE_DELIVERY_PRE_L, mask_sh),\
@@ -345,8 +354,6 @@
 	HUBP_SF(HUBPREQ0_PER_LINE_DELIVERY, REFCYC_PER_LINE_DELIVERY_C, mask_sh),\
 	HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_2, REFCYC_PER_PTE_GROUP_VBLANK_C, mask_sh),\
 	HUBP_SF(HUBPREQ0_VBLANK_PARAMETERS_4, REFCYC_PER_META_CHUNK_VBLANK_C, mask_sh),\
-	HUBP_SF(HUBPREQ0_NOM_PARAMETERS_2, DST_Y_PER_PTE_ROW_NOM_C, mask_sh),\
-	HUBP_SF(HUBPREQ0_NOM_PARAMETERS_3, REFCYC_PER_PTE_GROUP_NOM_C, mask_sh),\
 	HUBP_SF(HUBPREQ0_NOM_PARAMETERS_6, DST_Y_PER_META_ROW_NOM_C, mask_sh),\
 	HUBP_SF(HUBPREQ0_NOM_PARAMETERS_7, REFCYC_PER_META_CHUNK_NOM_C, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCN_TTU_QOS_WM, QoS_LEVEL_LOW_WM, mask_sh),\
@@ -357,12 +364,24 @@
 	HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_LEVEL_FIXED, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL0, QoS_RAMP_DISABLE, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCN_SURF0_TTU_CNTL1, REFCYC_PER_REQ_DELIVERY_PRE, mask_sh),\
+	HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh)
+
+/* Mask/shift struct generation macro for ASICs with VM */
+#define HUBP_MASK_SH_LIST_DCN_VM(mask_sh)\
+	HUBP_SF(HUBPREQ0_NOM_PARAMETERS_0, DST_Y_PER_PTE_ROW_NOM_L, mask_sh),\
+	HUBP_SF(HUBPREQ0_NOM_PARAMETERS_1, REFCYC_PER_PTE_GROUP_NOM_L, mask_sh),\
+	HUBP_SF(HUBPREQ0_NOM_PARAMETERS_2, DST_Y_PER_PTE_ROW_NOM_C, mask_sh),\
+	HUBP_SF(HUBPREQ0_NOM_PARAMETERS_3, REFCYC_PER_PTE_GROUP_NOM_C, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCN_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, mask_sh),\
 	HUBP_SF(HUBPREQ0_DCN_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, mask_sh),\
-	HUBP_SF(HUBP0_HUBP_CLK_CNTL, HUBP_CLOCK_ENABLE, mask_sh)
+	HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL0, REFCYC_PER_REQ_DELIVERY, mask_sh),\
+	HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL0, QoS_LEVEL_FIXED, mask_sh),\
+	HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL0, QoS_RAMP_DISABLE, mask_sh),\
+	HUBP_SF(HUBPREQ0_DCN_CUR0_TTU_CNTL1, REFCYC_PER_REQ_DELIVERY_PRE, mask_sh)
 
 #define HUBP_MASK_SH_LIST_DCN10(mask_sh)\
 	HUBP_MASK_SH_LIST_DCN(mask_sh),\
+	HUBP_MASK_SH_LIST_DCN_VM(mask_sh),\
 	HUBP_SF(HUBPREQ0_PREFETCH_SETTINS, DST_Y_PREFETCH, mask_sh),\
 	HUBP_SF(HUBPREQ0_PREFETCH_SETTINS, VRATIO_PREFETCH, mask_sh),\
 	HUBP_SF(HUBPREQ0_PREFETCH_SETTINS_C, VRATIO_PREFETCH_C, mask_sh),\
@@ -403,6 +422,7 @@
 
 #define DCN_HUBP_REG_FIELD_LIST(type) \
 	type HUBP_BLANK_EN;\
+	type HUBP_DISABLE;\
 	type HUBP_TTU_DISABLE;\
 	type HUBP_NO_OUTSTANDING_REQ;\
 	type HUBP_VTG_SEL;\
@@ -601,8 +621,29 @@
 	DCN_HUBP_REG_FIELD_LIST(uint32_t);
 };
 
+struct dcn_hubp_state {
+	struct _vcs_dpi_display_dlg_regs_st dlg_attr;
+	struct _vcs_dpi_display_ttu_regs_st ttu_attr;
+	struct _vcs_dpi_display_rq_regs_st rq_regs;
+	uint32_t pixel_format;
+	uint32_t inuse_addr_hi;
+	uint32_t viewport_width;
+	uint32_t viewport_height;
+	uint32_t rotation_angle;
+	uint32_t h_mirror_en;
+	uint32_t sw_mode;
+	uint32_t dcc_en;
+	uint32_t blank_en;
+	uint32_t underflow_status;
+	uint32_t ttu_disable;
+	uint32_t min_ttu_vblank;
+	uint32_t qos_level_low_wm;
+	uint32_t qos_level_high_wm;
+};
+
 struct dcn10_hubp {
 	struct hubp base;
+	struct dcn_hubp_state state;
 	const struct dcn_mi_registers *hubp_regs;
 	const struct dcn_mi_shift *hubp_shift;
 	const struct dcn_mi_mask *hubp_mask;
@@ -680,26 +721,9 @@
 	const struct dcn_mi_shift *hubp_shift,
 	const struct dcn_mi_mask *hubp_mask);
 
-
-struct dcn_hubp_state {
-	uint32_t pixel_format;
-	uint32_t inuse_addr_hi;
-	uint32_t viewport_width;
-	uint32_t viewport_height;
-	uint32_t rotation_angle;
-	uint32_t h_mirror_en;
-	uint32_t sw_mode;
-	uint32_t dcc_en;
-	uint32_t blank_en;
-	uint32_t underflow_status;
-	uint32_t ttu_disable;
-	uint32_t min_ttu_vblank;
-	uint32_t qos_level_low_wm;
-	uint32_t qos_level_high_wm;
-};
-void hubp1_read_state(struct dcn10_hubp *hubp1,
-		struct dcn_hubp_state *s);
+void hubp1_read_state(struct hubp *hubp);
 
 enum cursor_pitch hubp1_get_cursor_pitch(unsigned int pitch);
 
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 8b0f6b8..f8e0576 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -45,8 +45,8 @@
 #include "dcn10_hubbub.h"
 #include "dcn10_cm_common.h"
 
-#define DC_LOGGER \
-	ctx->logger
+#define DC_LOGGER_INIT(logger)
+
 #define CTX \
 	hws->ctx
 #define REG(reg)\
@@ -56,16 +56,17 @@
 #define FN(reg_name, field_name) \
 	hws->shifts->field_name, hws->masks->field_name
 
+/*print is 17 wide, first two characters are spaces*/
 #define DTN_INFO_MICRO_SEC(ref_cycle) \
 	print_microsec(dc_ctx, ref_cycle)
 
 void print_microsec(struct dc_context *dc_ctx, uint32_t ref_cycle)
 {
-	static const uint32_t ref_clk_mhz = 48;
-	static const unsigned int frac = 10;
+	const uint32_t ref_clk_mhz = dc_ctx->dc->res_pool->ref_clock_inKhz / 1000;
+	static const unsigned int frac = 1000;
 	uint32_t us_x10 = (ref_cycle * frac) / ref_clk_mhz;
 
-	DTN_INFO("%d.%d \t ",
+	DTN_INFO("  %11d.%03d",
 			us_x10 / frac,
 			us_x10 % frac);
 }
@@ -92,14 +93,14 @@
 
 	hubbub1_wm_read_state(dc->res_pool->hubbub, &wm);
 
-	DTN_INFO("HUBBUB WM: \t data_urgent \t pte_meta_urgent \t "
-			"sr_enter \t sr_exit \t dram_clk_change \n");
+	DTN_INFO("HUBBUB WM:      data_urgent  pte_meta_urgent"
+			"         sr_enter          sr_exit  dram_clk_change\n");
 
 	for (i = 0; i < 4; i++) {
 		struct dcn_hubbub_wm_set *s;
 
 		s = &wm.sets[i];
-		DTN_INFO("WM_Set[%d]:\t ", s->wm_set);
+		DTN_INFO("WM_Set[%d]:", s->wm_set);
 		DTN_INFO_MICRO_SEC(s->data_urgent);
 		DTN_INFO_MICRO_SEC(s->pte_meta_urgent);
 		DTN_INFO_MICRO_SEC(s->sr_enter);
@@ -111,6 +112,121 @@
 	DTN_INFO("\n");
 }
 
+static void dcn10_log_hubp_states(struct dc *dc)
+{
+	struct dc_context *dc_ctx = dc->ctx;
+	struct resource_pool *pool = dc->res_pool;
+	int i;
+
+	DTN_INFO("HUBP:  format  addr_hi  width  height"
+			"  rot  mir  sw_mode  dcc_en  blank_en  ttu_dis  underflow"
+			"   min_ttu_vblank       qos_low_wm      qos_high_wm\n");
+	for (i = 0; i < pool->pipe_count; i++) {
+		struct hubp *hubp = pool->hubps[i];
+		struct dcn_hubp_state *s = &(TO_DCN10_HUBP(hubp)->state);
+
+		hubp->funcs->hubp_read_state(hubp);
+
+		if (!s->blank_en) {
+			DTN_INFO("[%2d]:  %5xh  %6xh  %5d  %6d  %2xh  %2xh  %6xh"
+					"  %6d  %8d  %7d  %8xh",
+					hubp->inst,
+					s->pixel_format,
+					s->inuse_addr_hi,
+					s->viewport_width,
+					s->viewport_height,
+					s->rotation_angle,
+					s->h_mirror_en,
+					s->sw_mode,
+					s->dcc_en,
+					s->blank_en,
+					s->ttu_disable,
+					s->underflow_status);
+			DTN_INFO_MICRO_SEC(s->min_ttu_vblank);
+			DTN_INFO_MICRO_SEC(s->qos_level_low_wm);
+			DTN_INFO_MICRO_SEC(s->qos_level_high_wm);
+			DTN_INFO("\n");
+		}
+	}
+
+	DTN_INFO("\n=========RQ========\n");
+	DTN_INFO("HUBP:  drq_exp_m  prq_exp_m  mrq_exp_m  crq_exp_m  plane1_ba  L:chunk_s  min_chu_s  meta_ch_s"
+		"  min_m_c_s  dpte_gr_s  mpte_gr_s  swath_hei  pte_row_h  C:chunk_s  min_chu_s  meta_ch_s"
+		"  min_m_c_s  dpte_gr_s  mpte_gr_s  swath_hei  pte_row_h\n");
+	for (i = 0; i < pool->pipe_count; i++) {
+		struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state);
+		struct _vcs_dpi_display_rq_regs_st *rq_regs = &s->rq_regs;
+
+		if (!s->blank_en)
+			DTN_INFO("[%2d]:  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh\n",
+				pool->hubps[i]->inst, rq_regs->drq_expansion_mode, rq_regs->prq_expansion_mode, rq_regs->mrq_expansion_mode,
+				rq_regs->crq_expansion_mode, rq_regs->plane1_base_address, rq_regs->rq_regs_l.chunk_size,
+				rq_regs->rq_regs_l.min_chunk_size, rq_regs->rq_regs_l.meta_chunk_size,
+				rq_regs->rq_regs_l.min_meta_chunk_size, rq_regs->rq_regs_l.dpte_group_size,
+				rq_regs->rq_regs_l.mpte_group_size, rq_regs->rq_regs_l.swath_height,
+				rq_regs->rq_regs_l.pte_row_height_linear, rq_regs->rq_regs_c.chunk_size, rq_regs->rq_regs_c.min_chunk_size,
+				rq_regs->rq_regs_c.meta_chunk_size, rq_regs->rq_regs_c.min_meta_chunk_size,
+				rq_regs->rq_regs_c.dpte_group_size, rq_regs->rq_regs_c.mpte_group_size,
+				rq_regs->rq_regs_c.swath_height, rq_regs->rq_regs_c.pte_row_height_linear);
+	}
+
+	DTN_INFO("========DLG========\n");
+	DTN_INFO("HUBP:  rc_hbe     dlg_vbe    min_d_y_n  rc_per_ht  rc_x_a_s "
+			"  dst_y_a_s  dst_y_pf   dst_y_vvb  dst_y_rvb  dst_y_vfl  dst_y_rfl  rf_pix_fq"
+			"  vratio_pf  vrat_pf_c  rc_pg_vbl  rc_pg_vbc  rc_mc_vbl  rc_mc_vbc  rc_pg_fll"
+			"  rc_pg_flc  rc_mc_fll  rc_mc_flc  pr_nom_l   pr_nom_c   rc_pg_nl   rc_pg_nc "
+			"  mr_nom_l   mr_nom_c   rc_mc_nl   rc_mc_nc   rc_ld_pl   rc_ld_pc   rc_ld_l  "
+			"  rc_ld_c    cha_cur0   ofst_cur1  cha_cur1   vr_af_vc0  ddrq_limt  x_rt_dlay"
+			"  x_rp_dlay  x_rr_sfl\n");
+	for (i = 0; i < pool->pipe_count; i++) {
+		struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state);
+		struct _vcs_dpi_display_dlg_regs_st *dlg_regs = &s->dlg_attr;
+
+		if (!s->blank_en)
+			DTN_INFO("[%2d]:  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh"
+				"%  8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh"
+				"  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh\n",
+				pool->hubps[i]->inst, dlg_regs->refcyc_h_blank_end, dlg_regs->dlg_vblank_end, dlg_regs->min_dst_y_next_start,
+				dlg_regs->refcyc_per_htotal, dlg_regs->refcyc_x_after_scaler, dlg_regs->dst_y_after_scaler,
+				dlg_regs->dst_y_prefetch, dlg_regs->dst_y_per_vm_vblank, dlg_regs->dst_y_per_row_vblank,
+				dlg_regs->dst_y_per_vm_flip, dlg_regs->dst_y_per_row_flip, dlg_regs->ref_freq_to_pix_freq,
+				dlg_regs->vratio_prefetch, dlg_regs->vratio_prefetch_c, dlg_regs->refcyc_per_pte_group_vblank_l,
+				dlg_regs->refcyc_per_pte_group_vblank_c, dlg_regs->refcyc_per_meta_chunk_vblank_l,
+				dlg_regs->refcyc_per_meta_chunk_vblank_c, dlg_regs->refcyc_per_pte_group_flip_l,
+				dlg_regs->refcyc_per_pte_group_flip_c, dlg_regs->refcyc_per_meta_chunk_flip_l,
+				dlg_regs->refcyc_per_meta_chunk_flip_c, dlg_regs->dst_y_per_pte_row_nom_l,
+				dlg_regs->dst_y_per_pte_row_nom_c, dlg_regs->refcyc_per_pte_group_nom_l,
+				dlg_regs->refcyc_per_pte_group_nom_c, dlg_regs->dst_y_per_meta_row_nom_l,
+				dlg_regs->dst_y_per_meta_row_nom_c, dlg_regs->refcyc_per_meta_chunk_nom_l,
+				dlg_regs->refcyc_per_meta_chunk_nom_c, dlg_regs->refcyc_per_line_delivery_pre_l,
+				dlg_regs->refcyc_per_line_delivery_pre_c, dlg_regs->refcyc_per_line_delivery_l,
+				dlg_regs->refcyc_per_line_delivery_c, dlg_regs->chunk_hdl_adjust_cur0, dlg_regs->dst_y_offset_cur1,
+				dlg_regs->chunk_hdl_adjust_cur1, dlg_regs->vready_after_vcount0, dlg_regs->dst_y_delta_drq_limit,
+				dlg_regs->xfc_reg_transfer_delay, dlg_regs->xfc_reg_precharge_delay,
+				dlg_regs->xfc_reg_remote_surface_flip_latency);
+	}
+
+	DTN_INFO("========TTU========\n");
+	DTN_INFO("HUBP:  qos_ll_wm  qos_lh_wm  mn_ttu_vb  qos_l_flp  rc_rd_p_l  rc_rd_l    rc_rd_p_c"
+			"  rc_rd_c    rc_rd_c0   rc_rd_pc0  rc_rd_c1   rc_rd_pc1  qos_lf_l   qos_rds_l"
+			"  qos_lf_c   qos_rds_c  qos_lf_c0  qos_rds_c0 qos_lf_c1  qos_rds_c1\n");
+	for (i = 0; i < pool->pipe_count; i++) {
+		struct dcn_hubp_state *s = &(TO_DCN10_HUBP(pool->hubps[i])->state);
+		struct _vcs_dpi_display_ttu_regs_st *ttu_regs = &s->ttu_attr;
+
+		if (!s->blank_en)
+			DTN_INFO("[%2d]:  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh  %8xh\n",
+				pool->hubps[i]->inst, ttu_regs->qos_level_low_wm, ttu_regs->qos_level_high_wm, ttu_regs->min_ttu_vblank,
+				ttu_regs->qos_level_flip, ttu_regs->refcyc_per_req_delivery_pre_l, ttu_regs->refcyc_per_req_delivery_l,
+				ttu_regs->refcyc_per_req_delivery_pre_c, ttu_regs->refcyc_per_req_delivery_c, ttu_regs->refcyc_per_req_delivery_cur0,
+				ttu_regs->refcyc_per_req_delivery_pre_cur0, ttu_regs->refcyc_per_req_delivery_cur1,
+				ttu_regs->refcyc_per_req_delivery_pre_cur1, ttu_regs->qos_level_fixed_l, ttu_regs->qos_ramp_disable_l,
+				ttu_regs->qos_level_fixed_c, ttu_regs->qos_ramp_disable_c, ttu_regs->qos_level_fixed_cur0,
+				ttu_regs->qos_ramp_disable_cur0, ttu_regs->qos_level_fixed_cur1, ttu_regs->qos_ramp_disable_cur1);
+	}
+	DTN_INFO("\n");
+}
+
 void dcn10_log_hw_state(struct dc *dc)
 {
 	struct dc_context *dc_ctx = dc->ctx;
@@ -121,41 +237,64 @@
 
 	dcn10_log_hubbub_state(dc);
 
-	DTN_INFO("HUBP:\t format \t addr_hi \t width \t height \t "
-			"rotation \t mirror \t  sw_mode \t "
-			"dcc_en \t blank_en \t ttu_dis \t underflow \t "
-			"min_ttu_vblank \t qos_low_wm \t qos_high_wm \n");
+	dcn10_log_hubp_states(dc);
 
+	DTN_INFO("DPP:    IGAM format  IGAM mode    DGAM mode    RGAM mode"
+			"  GAMUT mode  C11 C12   C13 C14   C21 C22   C23 C24   "
+			"C31 C32   C33 C34\n");
 	for (i = 0; i < pool->pipe_count; i++) {
-		struct hubp *hubp = pool->hubps[i];
-		struct dcn_hubp_state s;
+		struct dpp *dpp = pool->dpps[i];
+		struct dcn_dpp_state s;
 
-		hubp1_read_state(TO_DCN10_HUBP(hubp), &s);
+		dpp->funcs->dpp_read_state(dpp, &s);
 
-		DTN_INFO("[%d]:\t %xh \t %xh \t %d \t %d \t "
-				"%xh \t %xh \t %xh \t "
-				"%d \t %d \t %d \t %xh \t",
-				hubp->inst,
-				s.pixel_format,
-				s.inuse_addr_hi,
-				s.viewport_width,
-				s.viewport_height,
-				s.rotation_angle,
-				s.h_mirror_en,
-				s.sw_mode,
-				s.dcc_en,
-				s.blank_en,
-				s.ttu_disable,
-				s.underflow_status);
-		DTN_INFO_MICRO_SEC(s.min_ttu_vblank);
-		DTN_INFO_MICRO_SEC(s.qos_level_low_wm);
-		DTN_INFO_MICRO_SEC(s.qos_level_high_wm);
+		DTN_INFO("[%2d]:  %11xh  %-11s  %-11s  %-11s"
+				"%8x    %08xh %08xh %08xh %08xh %08xh %08xh",
+				dpp->inst,
+				s.igam_input_format,
+				(s.igam_lut_mode == 0) ? "BypassFixed" :
+					((s.igam_lut_mode == 1) ? "BypassFloat" :
+					((s.igam_lut_mode == 2) ? "RAM" :
+					((s.igam_lut_mode == 3) ? "RAM" :
+								 "Unknown"))),
+				(s.dgam_lut_mode == 0) ? "Bypass" :
+					((s.dgam_lut_mode == 1) ? "sRGB" :
+					((s.dgam_lut_mode == 2) ? "Ycc" :
+					((s.dgam_lut_mode == 3) ? "RAM" :
+					((s.dgam_lut_mode == 4) ? "RAM" :
+								 "Unknown")))),
+				(s.rgam_lut_mode == 0) ? "Bypass" :
+					((s.rgam_lut_mode == 1) ? "sRGB" :
+					((s.rgam_lut_mode == 2) ? "Ycc" :
+					((s.rgam_lut_mode == 3) ? "RAM" :
+					((s.rgam_lut_mode == 4) ? "RAM" :
+								 "Unknown")))),
+				s.gamut_remap_mode,
+				s.gamut_remap_c11_c12,
+				s.gamut_remap_c13_c14,
+				s.gamut_remap_c21_c22,
+				s.gamut_remap_c23_c24,
+				s.gamut_remap_c31_c32,
+				s.gamut_remap_c33_c34);
 		DTN_INFO("\n");
 	}
 	DTN_INFO("\n");
 
-	DTN_INFO("OTG:\t v_bs \t v_be \t v_ss \t v_se \t vpol \t vmax \t vmin \t "
-			"h_bs \t h_be \t h_ss \t h_se \t hpol \t htot \t vtot \t underflow\n");
+	DTN_INFO("MPCC:  OPP  DPP  MPCCBOT  MODE  ALPHA_MODE  PREMULT  OVERLAP_ONLY  IDLE\n");
+	for (i = 0; i < pool->pipe_count; i++) {
+		struct mpcc_state s = {0};
+
+		pool->mpc->funcs->read_mpcc_state(pool->mpc, i, &s);
+		if (s.opp_id != 0xf)
+			DTN_INFO("[%2d]:  %2xh  %2xh  %6xh  %4d  %10d  %7d  %12d  %4d\n",
+				i, s.opp_id, s.dpp_id, s.bot_mpcc_id,
+				s.mode, s.alpha_mode, s.pre_multiplied_alpha, s.overlap_only,
+				s.idle);
+	}
+	DTN_INFO("\n");
+
+	DTN_INFO("OTG:  v_bs  v_be  v_ss  v_se  vpol  vmax  vmin  vmax_sel  vmin_sel"
+			"  h_bs  h_be  h_ss  h_se  hpol  htot  vtot  underflow\n");
 
 	for (i = 0; i < pool->timing_generator_count; i++) {
 		struct timing_generator *tg = pool->timing_generators[i];
@@ -167,9 +306,8 @@
 		if ((s.otg_enabled & 1) == 0)
 			continue;
 
-		DTN_INFO("[%d]:\t %d \t %d \t %d \t %d \t "
-				"%d \t %d \t %d \t %d \t %d \t %d \t "
-				"%d \t %d \t %d \t %d \t %d \t ",
+		DTN_INFO("[%d]: %5d %5d %5d %5d %5d %5d %5d %9d %9d %5d %5d %5d"
+				" %5d %5d %5d %5d  %9d\n",
 				tg->inst,
 				s.v_blank_start,
 				s.v_blank_end,
@@ -178,6 +316,8 @@
 				s.v_sync_a_pol,
 				s.v_total_max,
 				s.v_total_min,
+				s.v_total_max_sel,
+				s.v_total_min_sel,
 				s.h_blank_start,
 				s.h_blank_end,
 				s.h_sync_a_start,
@@ -186,10 +326,25 @@
 				s.h_total,
 				s.v_total,
 				s.underflow_occurred_status);
-		DTN_INFO("\n");
+
+		// Clear underflow for debug purposes
+		// We want to keep underflow sticky bit on for the longevity tests outside of test environment.
+		// This function is called only from Windows or Diags test environment, hence it's safe to clear
+		// it from here without affecting the original intent.
+		tg->funcs->clear_optc_underflow(tg);
 	}
 	DTN_INFO("\n");
 
+	DTN_INFO("\nCALCULATED Clocks: dcfclk_khz:%d  dcfclk_deep_sleep_khz:%d  dispclk_khz:%d\n"
+		"dppclk_khz:%d  max_supported_dppclk_khz:%d  fclk_khz:%d  socclk_khz:%d\n\n",
+			dc->current_state->bw.dcn.calc_clk.dcfclk_khz,
+			dc->current_state->bw.dcn.calc_clk.dcfclk_deep_sleep_khz,
+			dc->current_state->bw.dcn.calc_clk.dispclk_khz,
+			dc->current_state->bw.dcn.calc_clk.dppclk_khz,
+			dc->current_state->bw.dcn.calc_clk.max_supported_dppclk_khz,
+			dc->current_state->bw.dcn.calc_clk.fclk_khz,
+			dc->current_state->bw.dcn.calc_clk.socclk_khz);
+
 	log_mpc_crc(dc);
 
 	DTN_INFO_END();
@@ -354,7 +509,7 @@
 	struct dce_hwseq *hws,
 	int plane_id)
 {
-	struct dc_context *ctx = hws->ctx;
+	DC_LOGGER_INIT(hws->ctx->logger);
 	if (REG(DC_IP_REQUEST_CNTL)) {
 		REG_SET(DC_IP_REQUEST_CNTL, 0,
 				IP_REQUEST_EN, 1);
@@ -461,7 +616,7 @@
 		tg->funcs->clear_optc_underflow(tg);
 }
 
-static enum dc_status dcn10_prog_pixclk_crtc_otg(
+static enum dc_status dcn10_enable_stream_timing(
 		struct pipe_ctx *pipe_ctx,
 		struct dc_state *context,
 		struct dc *dc)
@@ -553,7 +708,7 @@
 		struct dc_state *context)
 {
 	int i;
-	struct dc_context *ctx = dc->ctx;
+	DC_LOGGER_INIT(dc->ctx->logger);
 	if (pipe_ctx->stream_res.stream_enc == NULL) {
 		pipe_ctx->stream = NULL;
 		return;
@@ -603,6 +758,90 @@
 					pipe_ctx->pipe_idx, pipe_ctx->stream_res.tg->inst);
 }
 
+static bool dcn10_hw_wa_force_recovery(struct dc *dc)
+{
+	struct hubp *hubp ;
+	unsigned int i;
+	bool need_recover = true;
+
+	if (!dc->debug.recovery_enabled)
+		return false;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe_ctx =
+			&dc->current_state->res_ctx.pipe_ctx[i];
+		if (pipe_ctx != NULL) {
+			hubp = pipe_ctx->plane_res.hubp;
+			if (hubp != NULL) {
+				if (hubp->funcs->hubp_get_underflow_status(hubp) != 0) {
+					/* one pipe underflow, we will reset all the pipes*/
+					need_recover = true;
+				}
+			}
+		}
+	}
+	if (!need_recover)
+		return false;
+	/*
+	DCHUBP_CNTL:HUBP_BLANK_EN=1
+	DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=1
+	DCHUBP_CNTL:HUBP_DISABLE=1
+	DCHUBP_CNTL:HUBP_DISABLE=0
+	DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=0
+	DCSURF_PRIMARY_SURFACE_ADDRESS
+	DCHUBP_CNTL:HUBP_BLANK_EN=0
+	*/
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe_ctx =
+			&dc->current_state->res_ctx.pipe_ctx[i];
+		if (pipe_ctx != NULL) {
+			hubp = pipe_ctx->plane_res.hubp;
+			/*DCHUBP_CNTL:HUBP_BLANK_EN=1*/
+			if (hubp != NULL)
+				hubp->funcs->set_hubp_blank_en(hubp, true);
+		}
+	}
+	/*DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=1*/
+	hubbub1_soft_reset(dc->res_pool->hubbub, true);
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe_ctx =
+			&dc->current_state->res_ctx.pipe_ctx[i];
+		if (pipe_ctx != NULL) {
+			hubp = pipe_ctx->plane_res.hubp;
+			/*DCHUBP_CNTL:HUBP_DISABLE=1*/
+			if (hubp != NULL)
+				hubp->funcs->hubp_disable_control(hubp, true);
+		}
+	}
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe_ctx =
+			&dc->current_state->res_ctx.pipe_ctx[i];
+		if (pipe_ctx != NULL) {
+			hubp = pipe_ctx->plane_res.hubp;
+			/*DCHUBP_CNTL:HUBP_DISABLE=0*/
+			if (hubp != NULL)
+				hubp->funcs->hubp_disable_control(hubp, true);
+		}
+	}
+	/*DCHUBBUB_SOFT_RESET:DCHUBBUB_GLOBAL_SOFT_RESET=0*/
+	hubbub1_soft_reset(dc->res_pool->hubbub, false);
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe_ctx =
+			&dc->current_state->res_ctx.pipe_ctx[i];
+		if (pipe_ctx != NULL) {
+			hubp = pipe_ctx->plane_res.hubp;
+			/*DCHUBP_CNTL:HUBP_BLANK_EN=0*/
+			if (hubp != NULL)
+				hubp->funcs->set_hubp_blank_en(hubp, true);
+		}
+	}
+	return true;
+
+}
+
+
 static void dcn10_verify_allow_pstate_change_high(struct dc *dc)
 {
 	static bool should_log_hw_state; /* prevent hw state log by default */
@@ -611,13 +850,17 @@
 		if (should_log_hw_state) {
 			dcn10_log_hw_state(dc);
 		}
-
 		BREAK_TO_DEBUGGER();
+		if (dcn10_hw_wa_force_recovery(dc)) {
+		/*check again*/
+			if (!hubbub1_verify_allow_pstate_change_high(dc->res_pool->hubbub))
+				BREAK_TO_DEBUGGER();
+		}
 	}
 }
 
 /* trigger HW to start disconnect plane from stream on the next vsync */
-static void plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
+void hwss1_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
 	struct hubp *hubp = pipe_ctx->plane_res.hubp;
 	int dpp_id = pipe_ctx->plane_res.dpp->inst;
@@ -649,7 +892,7 @@
 {
 	struct dce_hwseq *hws = dc->hwseq;
 	struct dpp *dpp = pipe_ctx->plane_res.dpp;
-	struct dc_context *ctx = dc->ctx;
+	DC_LOGGER_INIT(dc->ctx->logger);
 
 	if (REG(DC_IP_REQUEST_CNTL)) {
 		REG_SET(DC_IP_REQUEST_CNTL, 0,
@@ -699,7 +942,7 @@
 
 static void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
 {
-	struct dc_context *ctx = dc->ctx;
+	DC_LOGGER_INIT(dc->ctx->logger);
 
 	if (!pipe_ctx->plane_res.hubp || pipe_ctx->plane_res.hubp->power_gated)
 		return;
@@ -800,7 +1043,7 @@
 		dc->res_pool->opps[i]->mpcc_disconnect_pending[pipe_ctx->plane_res.mpcc_inst] = true;
 		pipe_ctx->stream_res.opp = dc->res_pool->opps[i];
 
-		plane_atomic_disconnect(dc, pipe_ctx);
+		hwss1_plane_atomic_disconnect(dc, pipe_ctx);
 	}
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
@@ -945,9 +1188,8 @@
 		tf = plane_state->in_transfer_func;
 
 	if (plane_state->gamma_correction &&
-		plane_state->gamma_correction->is_identity)
-		dpp_base->funcs->dpp_set_degamma(dpp_base, IPP_DEGAMMA_MODE_BYPASS);
-	else if (plane_state->gamma_correction && dce_use_lut(plane_state->format))
+		!plane_state->gamma_correction->is_identity
+			&& dce_use_lut(plane_state->format))
 		dpp_base->funcs->dpp_program_input_lut(dpp_base, plane_state->gamma_correction);
 
 	if (tf == NULL)
@@ -1433,7 +1675,7 @@
 	}
 }
 
-static void program_output_csc(struct dc *dc,
+static void dcn10_program_output_csc(struct dc *dc,
 		struct pipe_ctx *pipe_ctx,
 		enum dc_color_space colorspace,
 		uint16_t *matrix,
@@ -1542,22 +1784,22 @@
 
 	uint16_t result;
 
-	uint16_t d = (uint16_t)dal_fixed31_32_floor(
-		dal_fixed31_32_abs(
+	uint16_t d = (uint16_t)dc_fixpt_floor(
+		dc_fixpt_abs(
 			arg));
 
 	if (d <= (uint16_t)(1 << integer_bits) - (1 / (uint16_t)divisor))
-		numerator = (uint16_t)dal_fixed31_32_floor(
-			dal_fixed31_32_mul_int(
+		numerator = (uint16_t)dc_fixpt_floor(
+			dc_fixpt_mul_int(
 				arg,
 				divisor));
 	else {
-		numerator = dal_fixed31_32_floor(
-			dal_fixed31_32_sub(
-				dal_fixed31_32_from_int(
+		numerator = dc_fixpt_floor(
+			dc_fixpt_sub(
+				dc_fixpt_from_int(
 					1LL << integer_bits),
-				dal_fixed31_32_recip(
-					dal_fixed31_32_from_int(
+				dc_fixpt_recip(
+					dc_fixpt_from_int(
 						divisor))));
 	}
 
@@ -1567,8 +1809,8 @@
 		result = (uint16_t)(
 		(1 << (integer_bits + fractional_bits + 1)) + numerator);
 
-	if ((result != 0) && dal_fixed31_32_lt(
-		arg, dal_fixed31_32_zero))
+	if ((result != 0) && dc_fixpt_lt(
+		arg, dc_fixpt_zero))
 		result |= 1 << (integer_bits + fractional_bits);
 
 	return result;
@@ -1582,8 +1824,8 @@
 			&& plane_state->input_csc_color_matrix.enable_adjustment
 			&& plane_state->coeff_reduction_factor.value != 0) {
 		bias_and_scale->scale_blue = fixed_point_to_int_frac(
-			dal_fixed31_32_mul(plane_state->coeff_reduction_factor,
-					dal_fixed31_32_from_fraction(256, 255)),
+			dc_fixpt_mul(plane_state->coeff_reduction_factor,
+					dc_fixpt_from_fraction(256, 255)),
 				2,
 				13);
 		bias_and_scale->scale_red = bias_and_scale->scale_blue;
@@ -1623,6 +1865,8 @@
 	struct mpc *mpc = dc->res_pool->mpc;
 	struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params);
 
+
+
 	/* TODO: proper fix once fpga works */
 
 	if (dc->debug.surface_visual_confirm)
@@ -1649,6 +1893,7 @@
 			pipe_ctx->stream->output_color_space)
 					&& per_pixel_alpha;
 
+
 	/*
 	 * TODO: remove hack
 	 * Note: currently there is a bug in init_hw such that
@@ -1659,6 +1904,12 @@
 	 */
 	mpcc_id = hubp->inst;
 
+	/* If there is no full update, don't need to touch MPC tree*/
+	if (!pipe_ctx->plane_state->update_flags.bits.full_update) {
+		mpc->funcs->update_blending(mpc, &blnd_cfg, mpcc_id);
+		return;
+	}
+
 	/* check if this MPCC is already being used */
 	new_mpcc = mpc->funcs->get_mpcc_for_dpp(mpc_tree_params, mpcc_id);
 	/* remove MPCC if being used */
@@ -1777,7 +2028,7 @@
 		/*gamut remap*/
 		program_gamut_remap(pipe_ctx);
 
-		program_output_csc(dc,
+		dc->hwss.program_output_csc(dc,
 				pipe_ctx,
 				pipe_ctx->stream->output_color_space,
 				pipe_ctx->stream->csc_color_matrix.matrix,
@@ -1810,9 +2061,9 @@
 		hubp->funcs->set_blank(hubp, false);
 }
 
-static void dcn10_otg_blank(
+static void dcn10_blank_pixel_data(
 		struct dc *dc,
-		struct stream_resource stream_res,
+		struct stream_resource *stream_res,
 		struct dc_stream_state *stream,
 		bool blank)
 {
@@ -1823,27 +2074,27 @@
 	color_space = stream->output_color_space;
 	color_space_to_black_color(dc, color_space, &black_color);
 
-	if (stream_res.tg->funcs->set_blank_color)
-		stream_res.tg->funcs->set_blank_color(
-				stream_res.tg,
+	if (stream_res->tg->funcs->set_blank_color)
+		stream_res->tg->funcs->set_blank_color(
+				stream_res->tg,
 				&black_color);
 
 	if (!blank) {
-		if (stream_res.tg->funcs->set_blank)
-			stream_res.tg->funcs->set_blank(stream_res.tg, blank);
-		if (stream_res.abm)
-			stream_res.abm->funcs->set_abm_level(stream_res.abm, stream->abm_level);
+		if (stream_res->tg->funcs->set_blank)
+			stream_res->tg->funcs->set_blank(stream_res->tg, blank);
+		if (stream_res->abm)
+			stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level);
 	} else if (blank) {
-		if (stream_res.abm)
-			stream_res.abm->funcs->set_abm_immediate_disable(stream_res.abm);
-		if (stream_res.tg->funcs->set_blank)
-			stream_res.tg->funcs->set_blank(stream_res.tg, blank);
+		if (stream_res->abm)
+			stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm);
+		if (stream_res->tg->funcs->set_blank)
+			stream_res->tg->funcs->set_blank(stream_res->tg, blank);
 	}
 }
 
 static void set_hdr_multiplier(struct pipe_ctx *pipe_ctx)
 {
-	struct fixed31_32 multiplier = dal_fixed31_32_from_fraction(
+	struct fixed31_32 multiplier = dc_fixpt_from_fraction(
 			pipe_ctx->plane_state->sdr_white_level, 80);
 	uint32_t hw_mult = 0x1f000; // 1.0 default multiplier
 	struct custom_float_format fmt;
@@ -1876,7 +2127,7 @@
 		pipe_ctx->stream_res.tg->funcs->program_global_sync(
 				pipe_ctx->stream_res.tg);
 
-		dcn10_otg_blank(dc, pipe_ctx->stream_res,
+		dc->hwss.blank_pixel_data(dc, &pipe_ctx->stream_res,
 				pipe_ctx->stream, blank);
 	}
 
@@ -1983,9 +2234,9 @@
 	bool removed_pipe[4] = { false };
 	unsigned int ref_clk_mhz = dc->res_pool->ref_clock_inKhz/1000;
 	bool program_water_mark = false;
-	struct dc_context *ctx = dc->ctx;
 	struct pipe_ctx *top_pipe_to_program =
 			find_top_pipe_for_stream(dc, context, stream);
+	DC_LOGGER_INIT(dc->ctx->logger);
 
 	if (!top_pipe_to_program)
 		return;
@@ -1996,7 +2247,7 @@
 
 	if (num_planes == 0) {
 		/* OTG blank before remove all front end */
-		dcn10_otg_blank(dc, top_pipe_to_program->stream_res, top_pipe_to_program->stream, true);
+		dc->hwss.blank_pixel_data(dc, &top_pipe_to_program->stream_res, top_pipe_to_program->stream, true);
 	}
 
 	/* Disconnect unused mpcc */
@@ -2027,7 +2278,7 @@
 			old_pipe_ctx->plane_state &&
 			old_pipe_ctx->stream_res.tg == tg) {
 
-			plane_atomic_disconnect(dc, old_pipe_ctx);
+			hwss1_plane_atomic_disconnect(dc, old_pipe_ctx);
 			removed_pipe[i] = true;
 
 			DC_LOG_DC(
@@ -2335,15 +2586,6 @@
 			set_static_screen_control(pipe_ctx[i]->stream_res.tg, value);
 }
 
-static void set_plane_config(
-	const struct dc *dc,
-	struct pipe_ctx *pipe_ctx,
-	struct resource_context *res_ctx)
-{
-	/* TODO */
-	program_gamut_remap(pipe_ctx);
-}
-
 static void dcn10_config_stereo_parameters(
 		struct dc_stream_state *stream, struct crtc_stereo_flags *flags)
 {
@@ -2521,12 +2763,12 @@
 	.init_hw = dcn10_init_hw,
 	.apply_ctx_to_hw = dce110_apply_ctx_to_hw,
 	.apply_ctx_for_surface = dcn10_apply_ctx_for_surface,
-	.set_plane_config = set_plane_config,
 	.update_plane_addr = dcn10_update_plane_addr,
 	.update_dchub = dcn10_update_dchub,
 	.update_pending_status = dcn10_update_pending_status,
 	.set_input_transfer_func = dcn10_set_input_transfer_func,
 	.set_output_transfer_func = dcn10_set_output_transfer_func,
+	.program_output_csc = dcn10_program_output_csc,
 	.power_down = dce110_power_down,
 	.enable_accelerated_mode = dce110_enable_accelerated_mode,
 	.enable_timing_synchronization = dcn10_enable_timing_synchronization,
@@ -2538,10 +2780,11 @@
 	.blank_stream = dce110_blank_stream,
 	.enable_display_power_gating = dcn10_dummy_display_power_gating,
 	.disable_plane = dcn10_disable_plane,
+	.blank_pixel_data = dcn10_blank_pixel_data,
 	.pipe_control_lock = dcn10_pipe_control_lock,
 	.set_bandwidth = dcn10_set_bandwidth,
 	.reset_hw_ctx_wrap = reset_hw_ctx_wrap,
-	.prog_pixclk_crtc_otg = dcn10_prog_pixclk_crtc_otg,
+	.enable_stream_timing = dcn10_enable_stream_timing,
 	.set_drr = set_drr,
 	.get_position = get_position,
 	.set_static_screen_control = set_static_screen_control,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
index 6c526b5..44f734b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
@@ -37,4 +37,6 @@
 
 bool is_rgb_cspace(enum dc_color_space output_color_space);
 
+void hwss1_plane_atomic_disconnect(struct dc *dc, struct pipe_ctx *pipe_ctx);
+
 #endif /* __DC_HWSS_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
new file mode 100644
index 0000000..21fa40a
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
@@ -0,0 +1,1362 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+
+#include "core_types.h"
+#include "link_encoder.h"
+#include "dcn10_link_encoder.h"
+#include "stream_encoder.h"
+#include "i2caux_interface.h"
+#include "dc_bios_types.h"
+
+#include "gpio_service_interface.h"
+
+#define CTX \
+	enc10->base.ctx
+#define DC_LOGGER \
+	enc10->base.ctx->logger
+
+#define REG(reg)\
+	(enc10->link_regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+	enc10->link_shift->field_name, enc10->link_mask->field_name
+
+
+/*
+ * @brief
+ * Trigger Source Select
+ * ASIC-dependent, actual values for register programming
+ */
+#define DCN10_DIG_FE_SOURCE_SELECT_INVALID 0x0
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGA 0x1
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGB 0x2
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGC 0x4
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGD 0x08
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGE 0x10
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGF 0x20
+#define DCN10_DIG_FE_SOURCE_SELECT_DIGG 0x40
+
+enum {
+	DP_MST_UPDATE_MAX_RETRY = 50
+};
+
+
+
+static void aux_initialize(struct dcn10_link_encoder *enc10);
+
+
+static const struct link_encoder_funcs dcn10_lnk_enc_funcs = {
+	.validate_output_with_stream =
+		dcn10_link_encoder_validate_output_with_stream,
+	.hw_init = dcn10_link_encoder_hw_init,
+	.setup = dcn10_link_encoder_setup,
+	.enable_tmds_output = dcn10_link_encoder_enable_tmds_output,
+	.enable_dp_output = dcn10_link_encoder_enable_dp_output,
+	.enable_dp_mst_output = dcn10_link_encoder_enable_dp_mst_output,
+	.disable_output = dcn10_link_encoder_disable_output,
+	.dp_set_lane_settings = dcn10_link_encoder_dp_set_lane_settings,
+	.dp_set_phy_pattern = dcn10_link_encoder_dp_set_phy_pattern,
+	.update_mst_stream_allocation_table =
+		dcn10_link_encoder_update_mst_stream_allocation_table,
+	.psr_program_dp_dphy_fast_training =
+			dcn10_psr_program_dp_dphy_fast_training,
+	.psr_program_secondary_packet = dcn10_psr_program_secondary_packet,
+	.connect_dig_be_to_fe = dcn10_link_encoder_connect_dig_be_to_fe,
+	.enable_hpd = dcn10_link_encoder_enable_hpd,
+	.disable_hpd = dcn10_link_encoder_disable_hpd,
+	.is_dig_enabled = dcn10_is_dig_enabled,
+	.destroy = dcn10_link_encoder_destroy
+};
+
+static enum bp_result link_transmitter_control(
+	struct dcn10_link_encoder *enc10,
+	struct bp_transmitter_control *cntl)
+{
+	enum bp_result result;
+	struct dc_bios *bp = enc10->base.ctx->dc_bios;
+
+	result = bp->funcs->transmitter_control(bp, cntl);
+
+	return result;
+}
+
+static void enable_phy_bypass_mode(
+	struct dcn10_link_encoder *enc10,
+	bool enable)
+{
+	/* This register resides in DP back end block;
+	 * transmitter is used for the offset
+	 */
+	REG_UPDATE(DP_DPHY_CNTL, DPHY_BYPASS, enable);
+
+}
+
+static void disable_prbs_symbols(
+	struct dcn10_link_encoder *enc10,
+	bool disable)
+{
+	/* This register resides in DP back end block;
+	 * transmitter is used for the offset
+	 */
+	REG_UPDATE_4(DP_DPHY_CNTL,
+			DPHY_ATEST_SEL_LANE0, disable,
+			DPHY_ATEST_SEL_LANE1, disable,
+			DPHY_ATEST_SEL_LANE2, disable,
+			DPHY_ATEST_SEL_LANE3, disable);
+}
+
+static void disable_prbs_mode(
+	struct dcn10_link_encoder *enc10)
+{
+	REG_UPDATE(DP_DPHY_PRBS_CNTL, DPHY_PRBS_EN, 0);
+}
+
+static void program_pattern_symbols(
+	struct dcn10_link_encoder *enc10,
+	uint16_t pattern_symbols[8])
+{
+	/* This register resides in DP back end block;
+	 * transmitter is used for the offset
+	 */
+	REG_SET_3(DP_DPHY_SYM0, 0,
+			DPHY_SYM1, pattern_symbols[0],
+			DPHY_SYM2, pattern_symbols[1],
+			DPHY_SYM3, pattern_symbols[2]);
+
+	/* This register resides in DP back end block;
+	 * transmitter is used for the offset
+	 */
+	REG_SET_3(DP_DPHY_SYM1, 0,
+			DPHY_SYM4, pattern_symbols[3],
+			DPHY_SYM5, pattern_symbols[4],
+			DPHY_SYM6, pattern_symbols[5]);
+
+	/* This register resides in DP back end block;
+	 * transmitter is used for the offset
+	 */
+	REG_SET_2(DP_DPHY_SYM2, 0,
+			DPHY_SYM7, pattern_symbols[6],
+			DPHY_SYM8, pattern_symbols[7]);
+}
+
+static void set_dp_phy_pattern_d102(
+	struct dcn10_link_encoder *enc10)
+{
+	/* Disable PHY Bypass mode to setup the test pattern */
+	enable_phy_bypass_mode(enc10, false);
+
+	/* For 10-bit PRBS or debug symbols
+	 * please use the following sequence:
+	 *
+	 * Enable debug symbols on the lanes
+	 */
+	disable_prbs_symbols(enc10, true);
+
+	/* Disable PRBS mode */
+	disable_prbs_mode(enc10);
+
+	/* Program debug symbols to be output */
+	{
+		uint16_t pattern_symbols[8] = {
+			0x2AA, 0x2AA, 0x2AA, 0x2AA,
+			0x2AA, 0x2AA, 0x2AA, 0x2AA
+		};
+
+		program_pattern_symbols(enc10, pattern_symbols);
+	}
+
+	/* Enable phy bypass mode to enable the test pattern */
+
+	enable_phy_bypass_mode(enc10, true);
+}
+
+static void set_link_training_complete(
+	struct dcn10_link_encoder *enc10,
+	bool complete)
+{
+	/* This register resides in DP back end block;
+	 * transmitter is used for the offset
+	 */
+	REG_UPDATE(DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, complete);
+
+}
+
+void dcn10_link_encoder_set_dp_phy_pattern_training_pattern(
+	struct link_encoder *enc,
+	uint32_t index)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	/* Write Training Pattern */
+
+	REG_WRITE(DP_DPHY_TRAINING_PATTERN_SEL, index);
+
+	/* Set HW Register Training Complete to false */
+
+	set_link_training_complete(enc10, false);
+
+	/* Disable PHY Bypass mode to output Training Pattern */
+
+	enable_phy_bypass_mode(enc10, false);
+
+	/* Disable PRBS mode */
+	disable_prbs_mode(enc10);
+}
+
+static void setup_panel_mode(
+	struct dcn10_link_encoder *enc10,
+	enum dp_panel_mode panel_mode)
+{
+	uint32_t value;
+
+	ASSERT(REG(DP_DPHY_INTERNAL_CTRL));
+	value = REG_READ(DP_DPHY_INTERNAL_CTRL);
+
+	switch (panel_mode) {
+	case DP_PANEL_MODE_EDP:
+		value = 0x1;
+		break;
+	case DP_PANEL_MODE_SPECIAL:
+		value = 0x11;
+		break;
+	default:
+		value = 0x0;
+		break;
+	}
+
+	REG_WRITE(DP_DPHY_INTERNAL_CTRL, value);
+}
+
+static void set_dp_phy_pattern_symbol_error(
+	struct dcn10_link_encoder *enc10)
+{
+	/* Disable PHY Bypass mode to setup the test pattern */
+	enable_phy_bypass_mode(enc10, false);
+
+	/* program correct panel mode*/
+	setup_panel_mode(enc10, DP_PANEL_MODE_DEFAULT);
+
+	/* A PRBS23 pattern is used for most DP electrical measurements. */
+
+	/* Enable PRBS symbols on the lanes */
+	disable_prbs_symbols(enc10, false);
+
+	/* For PRBS23 Set bit DPHY_PRBS_SEL=1 and Set bit DPHY_PRBS_EN=1 */
+	REG_UPDATE_2(DP_DPHY_PRBS_CNTL,
+			DPHY_PRBS_SEL, 1,
+			DPHY_PRBS_EN, 1);
+
+	/* Enable phy bypass mode to enable the test pattern */
+	enable_phy_bypass_mode(enc10, true);
+}
+
+static void set_dp_phy_pattern_prbs7(
+	struct dcn10_link_encoder *enc10)
+{
+	/* Disable PHY Bypass mode to setup the test pattern */
+	enable_phy_bypass_mode(enc10, false);
+
+	/* A PRBS7 pattern is used for most DP electrical measurements. */
+
+	/* Enable PRBS symbols on the lanes */
+	disable_prbs_symbols(enc10, false);
+
+	/* For PRBS7 Set bit DPHY_PRBS_SEL=0 and Set bit DPHY_PRBS_EN=1 */
+	REG_UPDATE_2(DP_DPHY_PRBS_CNTL,
+			DPHY_PRBS_SEL, 0,
+			DPHY_PRBS_EN, 1);
+
+	/* Enable phy bypass mode to enable the test pattern */
+	enable_phy_bypass_mode(enc10, true);
+}
+
+static void set_dp_phy_pattern_80bit_custom(
+	struct dcn10_link_encoder *enc10,
+	const uint8_t *pattern)
+{
+	/* Disable PHY Bypass mode to setup the test pattern */
+	enable_phy_bypass_mode(enc10, false);
+
+	/* Enable debug symbols on the lanes */
+
+	disable_prbs_symbols(enc10, true);
+
+	/* Enable PHY bypass mode to enable the test pattern */
+	/* TODO is it really needed ? */
+
+	enable_phy_bypass_mode(enc10, true);
+
+	/* Program 80 bit custom pattern */
+	{
+		uint16_t pattern_symbols[8];
+
+		pattern_symbols[0] =
+			((pattern[1] & 0x03) << 8) | pattern[0];
+		pattern_symbols[1] =
+			((pattern[2] & 0x0f) << 6) | ((pattern[1] >> 2) & 0x3f);
+		pattern_symbols[2] =
+			((pattern[3] & 0x3f) << 4) | ((pattern[2] >> 4) & 0x0f);
+		pattern_symbols[3] =
+			(pattern[4] << 2) | ((pattern[3] >> 6) & 0x03);
+		pattern_symbols[4] =
+			((pattern[6] & 0x03) << 8) | pattern[5];
+		pattern_symbols[5] =
+			((pattern[7] & 0x0f) << 6) | ((pattern[6] >> 2) & 0x3f);
+		pattern_symbols[6] =
+			((pattern[8] & 0x3f) << 4) | ((pattern[7] >> 4) & 0x0f);
+		pattern_symbols[7] =
+			(pattern[9] << 2) | ((pattern[8] >> 6) & 0x03);
+
+		program_pattern_symbols(enc10, pattern_symbols);
+	}
+
+	/* Enable phy bypass mode to enable the test pattern */
+
+	enable_phy_bypass_mode(enc10, true);
+}
+
+static void set_dp_phy_pattern_hbr2_compliance_cp2520_2(
+	struct dcn10_link_encoder *enc10,
+	unsigned int cp2520_pattern)
+{
+
+	/* previously there is a register DP_HBR2_EYE_PATTERN
+	 * that is enabled to get the pattern.
+	 * But it does not work with the latest spec change,
+	 * so we are programming the following registers manually.
+	 *
+	 * The following settings have been confirmed
+	 * by Nick Chorney and Sandra Liu
+	 */
+
+	/* Disable PHY Bypass mode to setup the test pattern */
+
+	enable_phy_bypass_mode(enc10, false);
+
+	/* Setup DIG encoder in DP SST mode */
+	enc10->base.funcs->setup(&enc10->base, SIGNAL_TYPE_DISPLAY_PORT);
+
+	/* ensure normal panel mode. */
+	setup_panel_mode(enc10, DP_PANEL_MODE_DEFAULT);
+
+	/* no vbid after BS (SR)
+	 * DP_LINK_FRAMING_CNTL changed history Sandra Liu
+	 * 11000260 / 11000104 / 110000FC
+	 */
+	REG_UPDATE_3(DP_LINK_FRAMING_CNTL,
+			DP_IDLE_BS_INTERVAL, 0xFC,
+			DP_VBID_DISABLE, 1,
+			DP_VID_ENHANCED_FRAME_MODE, 1);
+
+	/* swap every BS with SR */
+	REG_UPDATE(DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, 0);
+
+	/* select cp2520 patterns */
+	if (REG(DP_DPHY_HBR2_PATTERN_CONTROL))
+		REG_UPDATE(DP_DPHY_HBR2_PATTERN_CONTROL,
+				DP_DPHY_HBR2_PATTERN_CONTROL, cp2520_pattern);
+	else
+		/* pre-DCE11 can only generate CP2520 pattern 2 */
+		ASSERT(cp2520_pattern == 2);
+
+	/* set link training complete */
+	set_link_training_complete(enc10, true);
+
+	/* disable video stream */
+	REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 0);
+
+	/* Disable PHY Bypass mode to setup the test pattern */
+	enable_phy_bypass_mode(enc10, false);
+}
+
+static void set_dp_phy_pattern_passthrough_mode(
+	struct dcn10_link_encoder *enc10,
+	enum dp_panel_mode panel_mode)
+{
+	/* program correct panel mode */
+	setup_panel_mode(enc10, panel_mode);
+
+	/* restore LINK_FRAMING_CNTL and DPHY_SCRAMBLER_BS_COUNT
+	 * in case we were doing HBR2 compliance pattern before
+	 */
+	REG_UPDATE_3(DP_LINK_FRAMING_CNTL,
+			DP_IDLE_BS_INTERVAL, 0x2000,
+			DP_VBID_DISABLE, 0,
+			DP_VID_ENHANCED_FRAME_MODE, 1);
+
+	REG_UPDATE(DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, 0x1FF);
+
+	/* set link training complete */
+	set_link_training_complete(enc10, true);
+
+	/* Disable PHY Bypass mode to setup the test pattern */
+	enable_phy_bypass_mode(enc10, false);
+
+	/* Disable PRBS mode */
+	disable_prbs_mode(enc10);
+}
+
+/* return value is bit-vector */
+static uint8_t get_frontend_source(
+	enum engine_id engine)
+{
+	switch (engine) {
+	case ENGINE_ID_DIGA:
+		return DCN10_DIG_FE_SOURCE_SELECT_DIGA;
+	case ENGINE_ID_DIGB:
+		return DCN10_DIG_FE_SOURCE_SELECT_DIGB;
+	case ENGINE_ID_DIGC:
+		return DCN10_DIG_FE_SOURCE_SELECT_DIGC;
+	case ENGINE_ID_DIGD:
+		return DCN10_DIG_FE_SOURCE_SELECT_DIGD;
+	case ENGINE_ID_DIGE:
+		return DCN10_DIG_FE_SOURCE_SELECT_DIGE;
+	case ENGINE_ID_DIGF:
+		return DCN10_DIG_FE_SOURCE_SELECT_DIGF;
+	case ENGINE_ID_DIGG:
+		return DCN10_DIG_FE_SOURCE_SELECT_DIGG;
+	default:
+		ASSERT_CRITICAL(false);
+		return DCN10_DIG_FE_SOURCE_SELECT_INVALID;
+	}
+}
+
+static void configure_encoder(
+	struct dcn10_link_encoder *enc10,
+	const struct dc_link_settings *link_settings)
+{
+	/* set number of lanes */
+
+	REG_SET(DP_CONFIG, 0,
+			DP_UDI_LANES, link_settings->lane_count - LANE_COUNT_ONE);
+
+	/* setup scrambler */
+	REG_UPDATE(DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_ADVANCE, 1);
+}
+
+void dcn10_psr_program_dp_dphy_fast_training(struct link_encoder *enc,
+			bool exit_link_training_required)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+	if (exit_link_training_required)
+		REG_UPDATE(DP_DPHY_FAST_TRAINING,
+				DPHY_RX_FAST_TRAINING_CAPABLE, 1);
+	else {
+		REG_UPDATE(DP_DPHY_FAST_TRAINING,
+				DPHY_RX_FAST_TRAINING_CAPABLE, 0);
+		/*In DCE 11, we are able to pre-program a Force SR register
+		 * to be able to trigger SR symbol after 5 idle patterns
+		 * transmitted. Upon PSR Exit, DMCU can trigger
+		 * DPHY_LOAD_BS_COUNT_START = 1. Upon writing 1 to
+		 * DPHY_LOAD_BS_COUNT_START and the internal counter
+		 * reaches DPHY_LOAD_BS_COUNT, the next BS symbol will be
+		 * replaced by SR symbol once.
+		 */
+
+		REG_UPDATE(DP_DPHY_BS_SR_SWAP_CNTL, DPHY_LOAD_BS_COUNT, 0x5);
+	}
+}
+
+void dcn10_psr_program_secondary_packet(struct link_encoder *enc,
+			unsigned int sdp_transmit_line_num_deadline)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+	REG_UPDATE_2(DP_SEC_CNTL1,
+		DP_SEC_GSP0_LINE_NUM, sdp_transmit_line_num_deadline,
+		DP_SEC_GSP0_PRIORITY, 1);
+}
+
+bool dcn10_is_dig_enabled(struct link_encoder *enc)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	uint32_t value;
+
+	REG_GET(DIG_BE_EN_CNTL, DIG_ENABLE, &value);
+	return value;
+}
+
+static void link_encoder_disable(struct dcn10_link_encoder *enc10)
+{
+	/* reset training pattern */
+	REG_SET(DP_DPHY_TRAINING_PATTERN_SEL, 0,
+			DPHY_TRAINING_PATTERN_SEL, 0);
+
+	/* reset training complete */
+	REG_UPDATE(DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, 0);
+
+	/* reset panel mode */
+	setup_panel_mode(enc10, DP_PANEL_MODE_DEFAULT);
+}
+
+static void hpd_initialize(
+	struct dcn10_link_encoder *enc10)
+{
+	/* Associate HPD with DIG_BE */
+	enum hpd_source_id hpd_source = enc10->base.hpd_source;
+
+	REG_UPDATE(DIG_BE_CNTL, DIG_HPD_SELECT, hpd_source);
+}
+
+bool dcn10_link_encoder_validate_dvi_output(
+	const struct dcn10_link_encoder *enc10,
+	enum signal_type connector_signal,
+	enum signal_type signal,
+	const struct dc_crtc_timing *crtc_timing)
+{
+	uint32_t max_pixel_clock = TMDS_MAX_PIXEL_CLOCK;
+
+	if (signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+		max_pixel_clock *= 2;
+
+	/* This handles the case of HDMI downgrade to DVI we don't want to
+	 * we don't want to cap the pixel clock if the DDI is not DVI.
+	 */
+	if (connector_signal != SIGNAL_TYPE_DVI_DUAL_LINK &&
+			connector_signal != SIGNAL_TYPE_DVI_SINGLE_LINK)
+		max_pixel_clock = enc10->base.features.max_hdmi_pixel_clock;
+
+	/* DVI only support RGB pixel encoding */
+	if (crtc_timing->pixel_encoding != PIXEL_ENCODING_RGB)
+		return false;
+
+	/*connect DVI via adpater's HDMI connector*/
+	if ((connector_signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
+		connector_signal == SIGNAL_TYPE_HDMI_TYPE_A) &&
+		signal != SIGNAL_TYPE_HDMI_TYPE_A &&
+		crtc_timing->pix_clk_khz > TMDS_MAX_PIXEL_CLOCK)
+		return false;
+	if (crtc_timing->pix_clk_khz < TMDS_MIN_PIXEL_CLOCK)
+		return false;
+
+	if (crtc_timing->pix_clk_khz > max_pixel_clock)
+		return false;
+
+	/* DVI supports 6/8bpp single-link and 10/16bpp dual-link */
+	switch (crtc_timing->display_color_depth) {
+	case COLOR_DEPTH_666:
+	case COLOR_DEPTH_888:
+	break;
+	case COLOR_DEPTH_101010:
+	case COLOR_DEPTH_161616:
+		if (signal != SIGNAL_TYPE_DVI_DUAL_LINK)
+			return false;
+	break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+static bool dcn10_link_encoder_validate_hdmi_output(
+	const struct dcn10_link_encoder *enc10,
+	const struct dc_crtc_timing *crtc_timing,
+	int adjusted_pix_clk_khz)
+{
+	enum dc_color_depth max_deep_color =
+			enc10->base.features.max_hdmi_deep_color;
+
+	if (max_deep_color < crtc_timing->display_color_depth)
+		return false;
+
+	if (crtc_timing->display_color_depth < COLOR_DEPTH_888)
+		return false;
+	if (adjusted_pix_clk_khz < TMDS_MIN_PIXEL_CLOCK)
+		return false;
+
+	if ((adjusted_pix_clk_khz == 0) ||
+		(adjusted_pix_clk_khz > enc10->base.features.max_hdmi_pixel_clock))
+		return false;
+
+	/* DCE11 HW does not support 420 */
+	if (!enc10->base.features.ycbcr420_supported &&
+			crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
+		return false;
+
+	if (!enc10->base.features.flags.bits.HDMI_6GB_EN &&
+		adjusted_pix_clk_khz >= 300000)
+		return false;
+	return true;
+}
+
+bool dcn10_link_encoder_validate_dp_output(
+	const struct dcn10_link_encoder *enc10,
+	const struct dc_crtc_timing *crtc_timing)
+{
+	/* default RGB only */
+	if (crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB)
+		return true;
+
+	if (enc10->base.features.flags.bits.IS_YCBCR_CAPABLE)
+		return true;
+
+	/* for DCE 8.x or later DP Y-only feature,
+	 * we need ASIC cap + FeatureSupportDPYonly, not support 666
+	 */
+	if (crtc_timing->flags.Y_ONLY &&
+		enc10->base.features.flags.bits.IS_YCBCR_CAPABLE &&
+		crtc_timing->display_color_depth != COLOR_DEPTH_666)
+		return true;
+
+	return false;
+}
+
+void dcn10_link_encoder_construct(
+	struct dcn10_link_encoder *enc10,
+	const struct encoder_init_data *init_data,
+	const struct encoder_feature_support *enc_features,
+	const struct dcn10_link_enc_registers *link_regs,
+	const struct dcn10_link_enc_aux_registers *aux_regs,
+	const struct dcn10_link_enc_hpd_registers *hpd_regs,
+	const struct dcn10_link_enc_shift *link_shift,
+	const struct dcn10_link_enc_mask *link_mask)
+{
+	struct bp_encoder_cap_info bp_cap_info = {0};
+	const struct dc_vbios_funcs *bp_funcs = init_data->ctx->dc_bios->funcs;
+	enum bp_result result = BP_RESULT_OK;
+
+	enc10->base.funcs = &dcn10_lnk_enc_funcs;
+	enc10->base.ctx = init_data->ctx;
+	enc10->base.id = init_data->encoder;
+
+	enc10->base.hpd_source = init_data->hpd_source;
+	enc10->base.connector = init_data->connector;
+
+	enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+	enc10->base.features = *enc_features;
+
+	enc10->base.transmitter = init_data->transmitter;
+
+	/* set the flag to indicate whether driver poll the I2C data pin
+	 * while doing the DP sink detect
+	 */
+
+/*	if (dal_adapter_service_is_feature_supported(as,
+		FEATURE_DP_SINK_DETECT_POLL_DATA_PIN))
+		enc10->base.features.flags.bits.
+			DP_SINK_DETECT_POLL_DATA_PIN = true;*/
+
+	enc10->base.output_signals =
+		SIGNAL_TYPE_DVI_SINGLE_LINK |
+		SIGNAL_TYPE_DVI_DUAL_LINK |
+		SIGNAL_TYPE_LVDS |
+		SIGNAL_TYPE_DISPLAY_PORT |
+		SIGNAL_TYPE_DISPLAY_PORT_MST |
+		SIGNAL_TYPE_EDP |
+		SIGNAL_TYPE_HDMI_TYPE_A;
+
+	/* For DCE 8.0 and 8.1, by design, UNIPHY is hardwired to DIG_BE.
+	 * SW always assign DIG_FE 1:1 mapped to DIG_FE for non-MST UNIPHY.
+	 * SW assign DIG_FE to non-MST UNIPHY first and MST last. So prefer
+	 * DIG is per UNIPHY and used by SST DP, eDP, HDMI, DVI and LVDS.
+	 * Prefer DIG assignment is decided by board design.
+	 * For DCE 8.0, there are only max 6 UNIPHYs, we assume board design
+	 * and VBIOS will filter out 7 UNIPHY for DCE 8.0.
+	 * By this, adding DIGG should not hurt DCE 8.0.
+	 * This will let DCE 8.1 share DCE 8.0 as much as possible
+	 */
+
+	enc10->link_regs = link_regs;
+	enc10->aux_regs = aux_regs;
+	enc10->hpd_regs = hpd_regs;
+	enc10->link_shift = link_shift;
+	enc10->link_mask = link_mask;
+
+	switch (enc10->base.transmitter) {
+	case TRANSMITTER_UNIPHY_A:
+		enc10->base.preferred_engine = ENGINE_ID_DIGA;
+	break;
+	case TRANSMITTER_UNIPHY_B:
+		enc10->base.preferred_engine = ENGINE_ID_DIGB;
+	break;
+	case TRANSMITTER_UNIPHY_C:
+		enc10->base.preferred_engine = ENGINE_ID_DIGC;
+	break;
+	case TRANSMITTER_UNIPHY_D:
+		enc10->base.preferred_engine = ENGINE_ID_DIGD;
+	break;
+	case TRANSMITTER_UNIPHY_E:
+		enc10->base.preferred_engine = ENGINE_ID_DIGE;
+	break;
+	case TRANSMITTER_UNIPHY_F:
+		enc10->base.preferred_engine = ENGINE_ID_DIGF;
+	break;
+	case TRANSMITTER_UNIPHY_G:
+		enc10->base.preferred_engine = ENGINE_ID_DIGG;
+	break;
+	default:
+		ASSERT_CRITICAL(false);
+		enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+	}
+
+	/* default to one to mirror Windows behavior */
+	enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
+
+	result = bp_funcs->get_encoder_cap_info(enc10->base.ctx->dc_bios,
+						enc10->base.id, &bp_cap_info);
+
+	/* Override features with DCE-specific values */
+	if (result == BP_RESULT_OK) {
+		enc10->base.features.flags.bits.IS_HBR2_CAPABLE =
+				bp_cap_info.DP_HBR2_EN;
+		enc10->base.features.flags.bits.IS_HBR3_CAPABLE =
+				bp_cap_info.DP_HBR3_EN;
+		enc10->base.features.flags.bits.HDMI_6GB_EN = bp_cap_info.HDMI_6GB_EN;
+	} else {
+		DC_LOG_WARNING("%s: Failed to get encoder_cap_info from VBIOS with error code %d!\n",
+				__func__,
+				result);
+	}
+}
+
+bool dcn10_link_encoder_validate_output_with_stream(
+	struct link_encoder *enc,
+	const struct dc_stream_state *stream)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	bool is_valid;
+
+	switch (stream->signal) {
+	case SIGNAL_TYPE_DVI_SINGLE_LINK:
+	case SIGNAL_TYPE_DVI_DUAL_LINK:
+		is_valid = dcn10_link_encoder_validate_dvi_output(
+			enc10,
+			stream->sink->link->connector_signal,
+			stream->signal,
+			&stream->timing);
+	break;
+	case SIGNAL_TYPE_HDMI_TYPE_A:
+		is_valid = dcn10_link_encoder_validate_hdmi_output(
+				enc10,
+				&stream->timing,
+				stream->phy_pix_clk);
+	break;
+	case SIGNAL_TYPE_DISPLAY_PORT:
+	case SIGNAL_TYPE_DISPLAY_PORT_MST:
+		is_valid = dcn10_link_encoder_validate_dp_output(
+					enc10, &stream->timing);
+	break;
+	case SIGNAL_TYPE_EDP:
+		is_valid = (stream->timing.pixel_encoding == PIXEL_ENCODING_RGB) ? true : false;
+	break;
+	case SIGNAL_TYPE_VIRTUAL:
+		is_valid = true;
+		break;
+	default:
+		is_valid = false;
+	break;
+	}
+
+	return is_valid;
+}
+
+void dcn10_link_encoder_hw_init(
+	struct link_encoder *enc)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	struct bp_transmitter_control cntl = { 0 };
+	enum bp_result result;
+
+	cntl.action = TRANSMITTER_CONTROL_INIT;
+	cntl.engine_id = ENGINE_ID_UNKNOWN;
+	cntl.transmitter = enc10->base.transmitter;
+	cntl.connector_obj_id = enc10->base.connector;
+	cntl.lanes_number = LANE_COUNT_FOUR;
+	cntl.coherent = false;
+	cntl.hpd_sel = enc10->base.hpd_source;
+
+	if (enc10->base.connector.id == CONNECTOR_ID_EDP)
+		cntl.signal = SIGNAL_TYPE_EDP;
+
+	result = link_transmitter_control(enc10, &cntl);
+
+	if (result != BP_RESULT_OK) {
+		DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n",
+			__func__);
+		BREAK_TO_DEBUGGER();
+		return;
+	}
+
+	if (enc10->base.connector.id == CONNECTOR_ID_LVDS) {
+		cntl.action = TRANSMITTER_CONTROL_BACKLIGHT_BRIGHTNESS;
+
+		result = link_transmitter_control(enc10, &cntl);
+
+		ASSERT(result == BP_RESULT_OK);
+
+	}
+	aux_initialize(enc10);
+
+	/* reinitialize HPD.
+	 * hpd_initialize() will pass DIG_FE id to HW context.
+	 * All other routine within HW context will use fe_engine_offset
+	 * as DIG_FE id even caller pass DIG_FE id.
+	 * So this routine must be called first.
+	 */
+	hpd_initialize(enc10);
+}
+
+void dcn10_link_encoder_destroy(struct link_encoder **enc)
+{
+	kfree(TO_DCN10_LINK_ENC(*enc));
+	*enc = NULL;
+}
+
+void dcn10_link_encoder_setup(
+	struct link_encoder *enc,
+	enum signal_type signal)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+	switch (signal) {
+	case SIGNAL_TYPE_EDP:
+	case SIGNAL_TYPE_DISPLAY_PORT:
+		/* DP SST */
+		REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 0);
+		break;
+	case SIGNAL_TYPE_LVDS:
+		/* LVDS */
+		REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 1);
+		break;
+	case SIGNAL_TYPE_DVI_SINGLE_LINK:
+	case SIGNAL_TYPE_DVI_DUAL_LINK:
+		/* TMDS-DVI */
+		REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 2);
+		break;
+	case SIGNAL_TYPE_HDMI_TYPE_A:
+		/* TMDS-HDMI */
+		REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 3);
+		break;
+	case SIGNAL_TYPE_DISPLAY_PORT_MST:
+		/* DP MST */
+		REG_UPDATE(DIG_BE_CNTL, DIG_MODE, 5);
+		break;
+	default:
+		ASSERT_CRITICAL(false);
+		/* invalid mode ! */
+		break;
+	}
+
+}
+
+/* TODO: still need depth or just pass in adjusted pixel clock? */
+void dcn10_link_encoder_enable_tmds_output(
+	struct link_encoder *enc,
+	enum clock_source_id clock_source,
+	enum dc_color_depth color_depth,
+	enum signal_type signal,
+	uint32_t pixel_clock)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	struct bp_transmitter_control cntl = { 0 };
+	enum bp_result result;
+
+	/* Enable the PHY */
+
+	cntl.action = TRANSMITTER_CONTROL_ENABLE;
+	cntl.engine_id = enc->preferred_engine;
+	cntl.transmitter = enc10->base.transmitter;
+	cntl.pll_id = clock_source;
+	cntl.signal = signal;
+	if (cntl.signal == SIGNAL_TYPE_DVI_DUAL_LINK)
+		cntl.lanes_number = 8;
+	else
+		cntl.lanes_number = 4;
+
+	cntl.hpd_sel = enc10->base.hpd_source;
+
+	cntl.pixel_clock = pixel_clock;
+	cntl.color_depth = color_depth;
+
+	result = link_transmitter_control(enc10, &cntl);
+
+	if (result != BP_RESULT_OK) {
+		DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n",
+			__func__);
+		BREAK_TO_DEBUGGER();
+	}
+}
+
+/* enables DP PHY output */
+void dcn10_link_encoder_enable_dp_output(
+	struct link_encoder *enc,
+	const struct dc_link_settings *link_settings,
+	enum clock_source_id clock_source)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	struct bp_transmitter_control cntl = { 0 };
+	enum bp_result result;
+
+	/* Enable the PHY */
+
+	/* number_of_lanes is used for pixel clock adjust,
+	 * but it's not passed to asic_control.
+	 * We need to set number of lanes manually.
+	 */
+	configure_encoder(enc10, link_settings);
+
+	cntl.action = TRANSMITTER_CONTROL_ENABLE;
+	cntl.engine_id = enc->preferred_engine;
+	cntl.transmitter = enc10->base.transmitter;
+	cntl.pll_id = clock_source;
+	cntl.signal = SIGNAL_TYPE_DISPLAY_PORT;
+	cntl.lanes_number = link_settings->lane_count;
+	cntl.hpd_sel = enc10->base.hpd_source;
+	cntl.pixel_clock = link_settings->link_rate
+						* LINK_RATE_REF_FREQ_IN_KHZ;
+	/* TODO: check if undefined works */
+	cntl.color_depth = COLOR_DEPTH_UNDEFINED;
+
+	result = link_transmitter_control(enc10, &cntl);
+
+	if (result != BP_RESULT_OK) {
+		DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n",
+			__func__);
+		BREAK_TO_DEBUGGER();
+	}
+}
+
+/* enables DP PHY output in MST mode */
+void dcn10_link_encoder_enable_dp_mst_output(
+	struct link_encoder *enc,
+	const struct dc_link_settings *link_settings,
+	enum clock_source_id clock_source)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	struct bp_transmitter_control cntl = { 0 };
+	enum bp_result result;
+
+	/* Enable the PHY */
+
+	/* number_of_lanes is used for pixel clock adjust,
+	 * but it's not passed to asic_control.
+	 * We need to set number of lanes manually.
+	 */
+	configure_encoder(enc10, link_settings);
+
+	cntl.action = TRANSMITTER_CONTROL_ENABLE;
+	cntl.engine_id = ENGINE_ID_UNKNOWN;
+	cntl.transmitter = enc10->base.transmitter;
+	cntl.pll_id = clock_source;
+	cntl.signal = SIGNAL_TYPE_DISPLAY_PORT_MST;
+	cntl.lanes_number = link_settings->lane_count;
+	cntl.hpd_sel = enc10->base.hpd_source;
+	cntl.pixel_clock = link_settings->link_rate
+						* LINK_RATE_REF_FREQ_IN_KHZ;
+	/* TODO: check if undefined works */
+	cntl.color_depth = COLOR_DEPTH_UNDEFINED;
+
+	result = link_transmitter_control(enc10, &cntl);
+
+	if (result != BP_RESULT_OK) {
+		DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n",
+			__func__);
+		BREAK_TO_DEBUGGER();
+	}
+}
+/*
+ * @brief
+ * Disable transmitter and its encoder
+ */
+void dcn10_link_encoder_disable_output(
+	struct link_encoder *enc,
+	enum signal_type signal)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	struct bp_transmitter_control cntl = { 0 };
+	enum bp_result result;
+
+	if (!dcn10_is_dig_enabled(enc)) {
+		/* OF_SKIP_POWER_DOWN_INACTIVE_ENCODER */
+		return;
+	}
+	/* Power-down RX and disable GPU PHY should be paired.
+	 * Disabling PHY without powering down RX may cause
+	 * symbol lock loss, on which we will get DP Sink interrupt.
+	 */
+
+	/* There is a case for the DP active dongles
+	 * where we want to disable the PHY but keep RX powered,
+	 * for those we need to ignore DP Sink interrupt
+	 * by checking lane count that has been set
+	 * on the last do_enable_output().
+	 */
+
+	/* disable transmitter */
+	cntl.action = TRANSMITTER_CONTROL_DISABLE;
+	cntl.transmitter = enc10->base.transmitter;
+	cntl.hpd_sel = enc10->base.hpd_source;
+	cntl.signal = signal;
+	cntl.connector_obj_id = enc10->base.connector;
+
+	result = link_transmitter_control(enc10, &cntl);
+
+	if (result != BP_RESULT_OK) {
+		DC_LOG_ERROR("%s: Failed to execute VBIOS command table!\n",
+			__func__);
+		BREAK_TO_DEBUGGER();
+		return;
+	}
+
+	/* disable encoder */
+	if (dc_is_dp_signal(signal))
+		link_encoder_disable(enc10);
+}
+
+void dcn10_link_encoder_dp_set_lane_settings(
+	struct link_encoder *enc,
+	const struct link_training_settings *link_settings)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	union dpcd_training_lane_set training_lane_set = { { 0 } };
+	int32_t lane = 0;
+	struct bp_transmitter_control cntl = { 0 };
+
+	if (!link_settings) {
+		BREAK_TO_DEBUGGER();
+		return;
+	}
+
+	cntl.action = TRANSMITTER_CONTROL_SET_VOLTAGE_AND_PREEMPASIS;
+	cntl.transmitter = enc10->base.transmitter;
+	cntl.connector_obj_id = enc10->base.connector;
+	cntl.lanes_number = link_settings->link_settings.lane_count;
+	cntl.hpd_sel = enc10->base.hpd_source;
+	cntl.pixel_clock = link_settings->link_settings.link_rate *
+						LINK_RATE_REF_FREQ_IN_KHZ;
+
+	for (lane = 0; lane < link_settings->link_settings.lane_count; lane++) {
+		/* translate lane settings */
+
+		training_lane_set.bits.VOLTAGE_SWING_SET =
+			link_settings->lane_settings[lane].VOLTAGE_SWING;
+		training_lane_set.bits.PRE_EMPHASIS_SET =
+			link_settings->lane_settings[lane].PRE_EMPHASIS;
+
+		/* post cursor 2 setting only applies to HBR2 link rate */
+		if (link_settings->link_settings.link_rate == LINK_RATE_HIGH2) {
+			/* this is passed to VBIOS
+			 * to program post cursor 2 level
+			 */
+			training_lane_set.bits.POST_CURSOR2_SET =
+				link_settings->lane_settings[lane].POST_CURSOR2;
+		}
+
+		cntl.lane_select = lane;
+		cntl.lane_settings = training_lane_set.raw;
+
+		/* call VBIOS table to set voltage swing and pre-emphasis */
+		link_transmitter_control(enc10, &cntl);
+	}
+}
+
+/* set DP PHY test and training patterns */
+void dcn10_link_encoder_dp_set_phy_pattern(
+	struct link_encoder *enc,
+	const struct encoder_set_dp_phy_pattern_param *param)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+	switch (param->dp_phy_pattern) {
+	case DP_TEST_PATTERN_TRAINING_PATTERN1:
+		dcn10_link_encoder_set_dp_phy_pattern_training_pattern(enc, 0);
+		break;
+	case DP_TEST_PATTERN_TRAINING_PATTERN2:
+		dcn10_link_encoder_set_dp_phy_pattern_training_pattern(enc, 1);
+		break;
+	case DP_TEST_PATTERN_TRAINING_PATTERN3:
+		dcn10_link_encoder_set_dp_phy_pattern_training_pattern(enc, 2);
+		break;
+	case DP_TEST_PATTERN_TRAINING_PATTERN4:
+		dcn10_link_encoder_set_dp_phy_pattern_training_pattern(enc, 3);
+		break;
+	case DP_TEST_PATTERN_D102:
+		set_dp_phy_pattern_d102(enc10);
+		break;
+	case DP_TEST_PATTERN_SYMBOL_ERROR:
+		set_dp_phy_pattern_symbol_error(enc10);
+		break;
+	case DP_TEST_PATTERN_PRBS7:
+		set_dp_phy_pattern_prbs7(enc10);
+		break;
+	case DP_TEST_PATTERN_80BIT_CUSTOM:
+		set_dp_phy_pattern_80bit_custom(
+			enc10, param->custom_pattern);
+		break;
+	case DP_TEST_PATTERN_CP2520_1:
+		set_dp_phy_pattern_hbr2_compliance_cp2520_2(enc10, 1);
+		break;
+	case DP_TEST_PATTERN_CP2520_2:
+		set_dp_phy_pattern_hbr2_compliance_cp2520_2(enc10, 2);
+		break;
+	case DP_TEST_PATTERN_CP2520_3:
+		set_dp_phy_pattern_hbr2_compliance_cp2520_2(enc10, 3);
+		break;
+	case DP_TEST_PATTERN_VIDEO_MODE: {
+		set_dp_phy_pattern_passthrough_mode(
+			enc10, param->dp_panel_mode);
+		break;
+	}
+
+	default:
+		/* invalid phy pattern */
+		ASSERT_CRITICAL(false);
+		break;
+	}
+}
+
+static void fill_stream_allocation_row_info(
+	const struct link_mst_stream_allocation *stream_allocation,
+	uint32_t *src,
+	uint32_t *slots)
+{
+	const struct stream_encoder *stream_enc = stream_allocation->stream_enc;
+
+	if (stream_enc) {
+		*src = stream_enc->id;
+		*slots = stream_allocation->slot_count;
+	} else {
+		*src = 0;
+		*slots = 0;
+	}
+}
+
+/* programs DP MST VC payload allocation */
+void dcn10_link_encoder_update_mst_stream_allocation_table(
+	struct link_encoder *enc,
+	const struct link_mst_stream_allocation_table *table)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	uint32_t value0 = 0;
+	uint32_t value1 = 0;
+	uint32_t value2 = 0;
+	uint32_t slots = 0;
+	uint32_t src = 0;
+	uint32_t retries = 0;
+
+	/* For CZ, there are only 3 pipes. So Virtual channel is up 3.*/
+
+	/* --- Set MSE Stream Attribute -
+	 * Setup VC Payload Table on Tx Side,
+	 * Issue allocation change trigger
+	 * to commit payload on both tx and rx side
+	 */
+
+	/* we should clean-up table each time */
+
+	if (table->stream_count >= 1) {
+		fill_stream_allocation_row_info(
+			&table->stream_allocations[0],
+			&src,
+			&slots);
+	} else {
+		src = 0;
+		slots = 0;
+	}
+
+	REG_UPDATE_2(DP_MSE_SAT0,
+			DP_MSE_SAT_SRC0, src,
+			DP_MSE_SAT_SLOT_COUNT0, slots);
+
+	if (table->stream_count >= 2) {
+		fill_stream_allocation_row_info(
+			&table->stream_allocations[1],
+			&src,
+			&slots);
+	} else {
+		src = 0;
+		slots = 0;
+	}
+
+	REG_UPDATE_2(DP_MSE_SAT0,
+			DP_MSE_SAT_SRC1, src,
+			DP_MSE_SAT_SLOT_COUNT1, slots);
+
+	if (table->stream_count >= 3) {
+		fill_stream_allocation_row_info(
+			&table->stream_allocations[2],
+			&src,
+			&slots);
+	} else {
+		src = 0;
+		slots = 0;
+	}
+
+	REG_UPDATE_2(DP_MSE_SAT1,
+			DP_MSE_SAT_SRC2, src,
+			DP_MSE_SAT_SLOT_COUNT2, slots);
+
+	if (table->stream_count >= 4) {
+		fill_stream_allocation_row_info(
+			&table->stream_allocations[3],
+			&src,
+			&slots);
+	} else {
+		src = 0;
+		slots = 0;
+	}
+
+	REG_UPDATE_2(DP_MSE_SAT1,
+			DP_MSE_SAT_SRC3, src,
+			DP_MSE_SAT_SLOT_COUNT3, slots);
+
+	/* --- wait for transaction finish */
+
+	/* send allocation change trigger (ACT) ?
+	 * this step first sends the ACT,
+	 * then double buffers the SAT into the hardware
+	 * making the new allocation active on the DP MST mode link
+	 */
+
+	/* DP_MSE_SAT_UPDATE:
+	 * 0 - No Action
+	 * 1 - Update SAT with trigger
+	 * 2 - Update SAT without trigger
+	 */
+	REG_UPDATE(DP_MSE_SAT_UPDATE,
+			DP_MSE_SAT_UPDATE, 1);
+
+	/* wait for update to complete
+	 * (i.e. DP_MSE_SAT_UPDATE field is reset to 0)
+	 * then wait for the transmission
+	 * of at least 16 MTP headers on immediate local link.
+	 * i.e. DP_MSE_16_MTP_KEEPOUT field (read only) is reset to 0
+	 * a value of 1 indicates that DP MST mode
+	 * is in the 16 MTP keepout region after a VC has been added.
+	 * MST stream bandwidth (VC rate) can be configured
+	 * after this bit is cleared
+	 */
+	do {
+		udelay(10);
+
+		value0 = REG_READ(DP_MSE_SAT_UPDATE);
+
+		REG_GET(DP_MSE_SAT_UPDATE,
+				DP_MSE_SAT_UPDATE, &value1);
+
+		REG_GET(DP_MSE_SAT_UPDATE,
+				DP_MSE_16_MTP_KEEPOUT, &value2);
+
+		/* bit field DP_MSE_SAT_UPDATE is set to 1 already */
+		if (!value1 && !value2)
+			break;
+		++retries;
+	} while (retries < DP_MST_UPDATE_MAX_RETRY);
+}
+
+void dcn10_link_encoder_connect_dig_be_to_fe(
+	struct link_encoder *enc,
+	enum engine_id engine,
+	bool connect)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+	uint32_t field;
+
+	if (engine != ENGINE_ID_UNKNOWN) {
+
+		REG_GET(DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, &field);
+
+		if (connect)
+			field |= get_frontend_source(engine);
+		else
+			field &= ~get_frontend_source(engine);
+
+		REG_UPDATE(DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, field);
+	}
+}
+
+
+#define HPD_REG(reg)\
+	(enc10->hpd_regs->reg)
+
+#define HPD_REG_READ(reg_name) \
+		dm_read_reg(CTX, HPD_REG(reg_name))
+
+#define HPD_REG_UPDATE_N(reg_name, n, ...)	\
+		generic_reg_update_ex(CTX, \
+				HPD_REG(reg_name), \
+				HPD_REG_READ(reg_name), \
+				n, __VA_ARGS__)
+
+#define HPD_REG_UPDATE(reg_name, field, val)	\
+		HPD_REG_UPDATE_N(reg_name, 1, \
+				FN(reg_name, field), val)
+
+void dcn10_link_encoder_enable_hpd(struct link_encoder *enc)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+	HPD_REG_UPDATE(DC_HPD_CONTROL,
+			DC_HPD_EN, 1);
+}
+
+void dcn10_link_encoder_disable_hpd(struct link_encoder *enc)
+{
+	struct dcn10_link_encoder *enc10 = TO_DCN10_LINK_ENC(enc);
+
+	HPD_REG_UPDATE(DC_HPD_CONTROL,
+			DC_HPD_EN, 0);
+}
+
+
+#define AUX_REG(reg)\
+	(enc10->aux_regs->reg)
+
+#define AUX_REG_READ(reg_name) \
+		dm_read_reg(CTX, AUX_REG(reg_name))
+
+#define AUX_REG_UPDATE_N(reg_name, n, ...)	\
+		generic_reg_update_ex(CTX, \
+				AUX_REG(reg_name), \
+				AUX_REG_READ(reg_name), \
+				n, __VA_ARGS__)
+
+#define AUX_REG_UPDATE(reg_name, field, val)	\
+		AUX_REG_UPDATE_N(reg_name, 1, \
+				FN(reg_name, field), val)
+
+#define AUX_REG_UPDATE_2(reg, f1, v1, f2, v2)	\
+		AUX_REG_UPDATE_N(reg, 2,\
+				FN(reg, f1), v1,\
+				FN(reg, f2), v2)
+
+static void aux_initialize(
+	struct dcn10_link_encoder *enc10)
+{
+	enum hpd_source_id hpd_source = enc10->base.hpd_source;
+
+	AUX_REG_UPDATE_2(AUX_CONTROL,
+			AUX_HPD_SEL, hpd_source,
+			AUX_LS_READ_EN, 0);
+
+	/* 1/4 window (the maximum allowed) */
+	AUX_REG_UPDATE(AUX_DPHY_RX_CONTROL0,
+			AUX_RX_RECEIVE_WINDOW, 1);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h
new file mode 100644
index 0000000..2a97cdb
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_LINK_ENCODER__DCN10_H__
+#define __DC_LINK_ENCODER__DCN10_H__
+
+#include "link_encoder.h"
+
+#define TO_DCN10_LINK_ENC(link_encoder)\
+	container_of(link_encoder, struct dcn10_link_encoder, base)
+
+
+#define AUX_REG_LIST(id)\
+	SRI(AUX_CONTROL, DP_AUX, id), \
+	SRI(AUX_DPHY_RX_CONTROL0, DP_AUX, id)
+
+#define HPD_REG_LIST(id)\
+	SRI(DC_HPD_CONTROL, HPD, id)
+
+#define LE_DCN_COMMON_REG_LIST(id) \
+	SRI(DIG_BE_CNTL, DIG, id), \
+	SRI(DIG_BE_EN_CNTL, DIG, id), \
+	SRI(DP_CONFIG, DP, id), \
+	SRI(DP_DPHY_CNTL, DP, id), \
+	SRI(DP_DPHY_PRBS_CNTL, DP, id), \
+	SRI(DP_DPHY_SCRAM_CNTL, DP, id),\
+	SRI(DP_DPHY_SYM0, DP, id), \
+	SRI(DP_DPHY_SYM1, DP, id), \
+	SRI(DP_DPHY_SYM2, DP, id), \
+	SRI(DP_DPHY_TRAINING_PATTERN_SEL, DP, id), \
+	SRI(DP_LINK_CNTL, DP, id), \
+	SRI(DP_LINK_FRAMING_CNTL, DP, id), \
+	SRI(DP_MSE_SAT0, DP, id), \
+	SRI(DP_MSE_SAT1, DP, id), \
+	SRI(DP_MSE_SAT2, DP, id), \
+	SRI(DP_MSE_SAT_UPDATE, DP, id), \
+	SRI(DP_SEC_CNTL, DP, id), \
+	SRI(DP_VID_STREAM_CNTL, DP, id), \
+	SRI(DP_DPHY_FAST_TRAINING, DP, id), \
+	SRI(DP_SEC_CNTL1, DP, id), \
+	SRI(DP_DPHY_BS_SR_SWAP_CNTL, DP, id), \
+	SRI(DP_DPHY_INTERNAL_CTRL, DP, id), \
+	SRI(DP_DPHY_HBR2_PATTERN_CONTROL, DP, id)
+
+#define LE_DCN10_REG_LIST(id)\
+	LE_DCN_COMMON_REG_LIST(id)
+
+struct dcn10_link_enc_aux_registers {
+	uint32_t AUX_CONTROL;
+	uint32_t AUX_DPHY_RX_CONTROL0;
+};
+
+struct dcn10_link_enc_hpd_registers {
+	uint32_t DC_HPD_CONTROL;
+};
+
+struct dcn10_link_enc_registers {
+	uint32_t DIG_BE_CNTL;
+	uint32_t DIG_BE_EN_CNTL;
+	uint32_t DP_CONFIG;
+	uint32_t DP_DPHY_CNTL;
+	uint32_t DP_DPHY_INTERNAL_CTRL;
+	uint32_t DP_DPHY_PRBS_CNTL;
+	uint32_t DP_DPHY_SCRAM_CNTL;
+	uint32_t DP_DPHY_SYM0;
+	uint32_t DP_DPHY_SYM1;
+	uint32_t DP_DPHY_SYM2;
+	uint32_t DP_DPHY_TRAINING_PATTERN_SEL;
+	uint32_t DP_LINK_CNTL;
+	uint32_t DP_LINK_FRAMING_CNTL;
+	uint32_t DP_MSE_SAT0;
+	uint32_t DP_MSE_SAT1;
+	uint32_t DP_MSE_SAT2;
+	uint32_t DP_MSE_SAT_UPDATE;
+	uint32_t DP_SEC_CNTL;
+	uint32_t DP_VID_STREAM_CNTL;
+	uint32_t DP_DPHY_FAST_TRAINING;
+	uint32_t DP_DPHY_BS_SR_SWAP_CNTL;
+	uint32_t DP_DPHY_HBR2_PATTERN_CONTROL;
+	uint32_t DP_SEC_CNTL1;
+};
+
+#define LE_SF(reg_name, field_name, post_fix)\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
+#define LINK_ENCODER_MASK_SH_LIST_DCN10(mask_sh)\
+	LE_SF(DIG0_DIG_BE_EN_CNTL, DIG_ENABLE, mask_sh),\
+	LE_SF(DIG0_DIG_BE_CNTL, DIG_HPD_SELECT, mask_sh),\
+	LE_SF(DIG0_DIG_BE_CNTL, DIG_MODE, mask_sh),\
+	LE_SF(DIG0_DIG_BE_CNTL, DIG_FE_SOURCE_SELECT, mask_sh),\
+	LE_SF(DP0_DP_DPHY_CNTL, DPHY_BYPASS, mask_sh),\
+	LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE0, mask_sh),\
+	LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE1, mask_sh),\
+	LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE2, mask_sh),\
+	LE_SF(DP0_DP_DPHY_CNTL, DPHY_ATEST_SEL_LANE3, mask_sh),\
+	LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_EN, mask_sh),\
+	LE_SF(DP0_DP_DPHY_PRBS_CNTL, DPHY_PRBS_SEL, mask_sh),\
+	LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM1, mask_sh),\
+	LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM2, mask_sh),\
+	LE_SF(DP0_DP_DPHY_SYM0, DPHY_SYM3, mask_sh),\
+	LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM4, mask_sh),\
+	LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM5, mask_sh),\
+	LE_SF(DP0_DP_DPHY_SYM1, DPHY_SYM6, mask_sh),\
+	LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM7, mask_sh),\
+	LE_SF(DP0_DP_DPHY_SYM2, DPHY_SYM8, mask_sh),\
+	LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_BS_COUNT, mask_sh),\
+	LE_SF(DP0_DP_DPHY_SCRAM_CNTL, DPHY_SCRAMBLER_ADVANCE, mask_sh),\
+	LE_SF(DP0_DP_DPHY_FAST_TRAINING, DPHY_RX_FAST_TRAINING_CAPABLE, mask_sh),\
+	LE_SF(DP0_DP_DPHY_BS_SR_SWAP_CNTL, DPHY_LOAD_BS_COUNT, mask_sh),\
+	LE_SF(DP0_DP_DPHY_TRAINING_PATTERN_SEL, DPHY_TRAINING_PATTERN_SEL, mask_sh),\
+	LE_SF(DP0_DP_DPHY_HBR2_PATTERN_CONTROL, DP_DPHY_HBR2_PATTERN_CONTROL, mask_sh),\
+	LE_SF(DP0_DP_LINK_CNTL, DP_LINK_TRAINING_COMPLETE, mask_sh),\
+	LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_IDLE_BS_INTERVAL, mask_sh),\
+	LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VBID_DISABLE, mask_sh),\
+	LE_SF(DP0_DP_LINK_FRAMING_CNTL, DP_VID_ENHANCED_FRAME_MODE, mask_sh),\
+	LE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+	LE_SF(DP0_DP_CONFIG, DP_UDI_LANES, mask_sh),\
+	LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_LINE_NUM, mask_sh),\
+	LE_SF(DP0_DP_SEC_CNTL1, DP_SEC_GSP0_PRIORITY, mask_sh),\
+	LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC0, mask_sh),\
+	LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SRC1, mask_sh),\
+	LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT0, mask_sh),\
+	LE_SF(DP0_DP_MSE_SAT0, DP_MSE_SAT_SLOT_COUNT1, mask_sh),\
+	LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC2, mask_sh),\
+	LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SRC3, mask_sh),\
+	LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT2, mask_sh),\
+	LE_SF(DP0_DP_MSE_SAT1, DP_MSE_SAT_SLOT_COUNT3, mask_sh),\
+	LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_SAT_UPDATE, mask_sh),\
+	LE_SF(DP0_DP_MSE_SAT_UPDATE, DP_MSE_16_MTP_KEEPOUT, mask_sh),\
+	LE_SF(DP_AUX0_AUX_CONTROL, AUX_HPD_SEL, mask_sh),\
+	LE_SF(DP_AUX0_AUX_CONTROL, AUX_LS_READ_EN, mask_sh),\
+	LE_SF(DP_AUX0_AUX_DPHY_RX_CONTROL0, AUX_RX_RECEIVE_WINDOW, mask_sh),\
+	LE_SF(HPD0_DC_HPD_CONTROL, DC_HPD_EN, mask_sh)
+
+#define DCN_LINK_ENCODER_REG_FIELD_LIST(type) \
+	type DIG_ENABLE;\
+	type DIG_HPD_SELECT;\
+	type DIG_MODE;\
+	type DIG_FE_SOURCE_SELECT;\
+	type DPHY_BYPASS;\
+	type DPHY_ATEST_SEL_LANE0;\
+	type DPHY_ATEST_SEL_LANE1;\
+	type DPHY_ATEST_SEL_LANE2;\
+	type DPHY_ATEST_SEL_LANE3;\
+	type DPHY_PRBS_EN;\
+	type DPHY_PRBS_SEL;\
+	type DPHY_SYM1;\
+	type DPHY_SYM2;\
+	type DPHY_SYM3;\
+	type DPHY_SYM4;\
+	type DPHY_SYM5;\
+	type DPHY_SYM6;\
+	type DPHY_SYM7;\
+	type DPHY_SYM8;\
+	type DPHY_SCRAMBLER_BS_COUNT;\
+	type DPHY_SCRAMBLER_ADVANCE;\
+	type DPHY_RX_FAST_TRAINING_CAPABLE;\
+	type DPHY_LOAD_BS_COUNT;\
+	type DPHY_TRAINING_PATTERN_SEL;\
+	type DP_DPHY_HBR2_PATTERN_CONTROL;\
+	type DP_LINK_TRAINING_COMPLETE;\
+	type DP_IDLE_BS_INTERVAL;\
+	type DP_VBID_DISABLE;\
+	type DP_VID_ENHANCED_FRAME_MODE;\
+	type DP_VID_STREAM_ENABLE;\
+	type DP_UDI_LANES;\
+	type DP_SEC_GSP0_LINE_NUM;\
+	type DP_SEC_GSP0_PRIORITY;\
+	type DP_MSE_SAT_SRC0;\
+	type DP_MSE_SAT_SRC1;\
+	type DP_MSE_SAT_SRC2;\
+	type DP_MSE_SAT_SRC3;\
+	type DP_MSE_SAT_SLOT_COUNT0;\
+	type DP_MSE_SAT_SLOT_COUNT1;\
+	type DP_MSE_SAT_SLOT_COUNT2;\
+	type DP_MSE_SAT_SLOT_COUNT3;\
+	type DP_MSE_SAT_UPDATE;\
+	type DP_MSE_16_MTP_KEEPOUT;\
+	type AUX_HPD_SEL;\
+	type AUX_LS_READ_EN;\
+	type AUX_RX_RECEIVE_WINDOW;\
+	type DC_HPD_EN
+
+struct dcn10_link_enc_shift {
+	DCN_LINK_ENCODER_REG_FIELD_LIST(uint8_t);
+};
+
+struct dcn10_link_enc_mask {
+	DCN_LINK_ENCODER_REG_FIELD_LIST(uint32_t);
+};
+
+struct dcn10_link_encoder {
+	struct link_encoder base;
+	const struct dcn10_link_enc_registers *link_regs;
+	const struct dcn10_link_enc_aux_registers *aux_regs;
+	const struct dcn10_link_enc_hpd_registers *hpd_regs;
+	const struct dcn10_link_enc_shift *link_shift;
+	const struct dcn10_link_enc_mask *link_mask;
+};
+
+
+void dcn10_link_encoder_construct(
+	struct dcn10_link_encoder *enc10,
+	const struct encoder_init_data *init_data,
+	const struct encoder_feature_support *enc_features,
+	const struct dcn10_link_enc_registers *link_regs,
+	const struct dcn10_link_enc_aux_registers *aux_regs,
+	const struct dcn10_link_enc_hpd_registers *hpd_regs,
+	const struct dcn10_link_enc_shift *link_shift,
+	const struct dcn10_link_enc_mask *link_mask);
+
+bool dcn10_link_encoder_validate_dvi_output(
+	const struct dcn10_link_encoder *enc10,
+	enum signal_type connector_signal,
+	enum signal_type signal,
+	const struct dc_crtc_timing *crtc_timing);
+
+bool dcn10_link_encoder_validate_rgb_output(
+	const struct dcn10_link_encoder *enc10,
+	const struct dc_crtc_timing *crtc_timing);
+
+bool dcn10_link_encoder_validate_dp_output(
+	const struct dcn10_link_encoder *enc10,
+	const struct dc_crtc_timing *crtc_timing);
+
+bool dcn10_link_encoder_validate_wireless_output(
+	const struct dcn10_link_encoder *enc10,
+	const struct dc_crtc_timing *crtc_timing);
+
+bool dcn10_link_encoder_validate_output_with_stream(
+	struct link_encoder *enc,
+	const struct dc_stream_state *stream);
+
+/****************** HW programming ************************/
+
+/* initialize HW */  /* why do we initialze aux in here? */
+void dcn10_link_encoder_hw_init(struct link_encoder *enc);
+
+void dcn10_link_encoder_destroy(struct link_encoder **enc);
+
+/* program DIG_MODE in DIG_BE */
+/* TODO can this be combined with enable_output? */
+void dcn10_link_encoder_setup(
+	struct link_encoder *enc,
+	enum signal_type signal);
+
+/* enables TMDS PHY output */
+/* TODO: still need depth or just pass in adjusted pixel clock? */
+void dcn10_link_encoder_enable_tmds_output(
+	struct link_encoder *enc,
+	enum clock_source_id clock_source,
+	enum dc_color_depth color_depth,
+	enum signal_type signal,
+	uint32_t pixel_clock);
+
+/* enables DP PHY output */
+void dcn10_link_encoder_enable_dp_output(
+	struct link_encoder *enc,
+	const struct dc_link_settings *link_settings,
+	enum clock_source_id clock_source);
+
+/* enables DP PHY output in MST mode */
+void dcn10_link_encoder_enable_dp_mst_output(
+	struct link_encoder *enc,
+	const struct dc_link_settings *link_settings,
+	enum clock_source_id clock_source);
+
+/* disable PHY output */
+void dcn10_link_encoder_disable_output(
+	struct link_encoder *enc,
+	enum signal_type signal);
+
+/* set DP lane settings */
+void dcn10_link_encoder_dp_set_lane_settings(
+	struct link_encoder *enc,
+	const struct link_training_settings *link_settings);
+
+void dcn10_link_encoder_dp_set_phy_pattern(
+	struct link_encoder *enc,
+	const struct encoder_set_dp_phy_pattern_param *param);
+
+/* programs DP MST VC payload allocation */
+void dcn10_link_encoder_update_mst_stream_allocation_table(
+	struct link_encoder *enc,
+	const struct link_mst_stream_allocation_table *table);
+
+void dcn10_link_encoder_connect_dig_be_to_fe(
+	struct link_encoder *enc,
+	enum engine_id engine,
+	bool connect);
+
+void dcn10_link_encoder_set_dp_phy_pattern_training_pattern(
+	struct link_encoder *enc,
+	uint32_t index);
+
+void dcn10_link_encoder_enable_hpd(struct link_encoder *enc);
+
+void dcn10_link_encoder_disable_hpd(struct link_encoder *enc);
+
+void dcn10_psr_program_dp_dphy_fast_training(struct link_encoder *enc,
+			bool exit_link_training_required);
+
+void dcn10_psr_program_secondary_packet(struct link_encoder *enc,
+			unsigned int sdp_transmit_line_num_deadline);
+
+bool dcn10_is_dig_enabled(struct link_encoder *enc);
+
+#endif /* __DC_LINK_ENCODER__DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
index 179890b..9ca51ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c
@@ -65,6 +65,7 @@
 	int mpcc_id)
 {
 	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
+	struct mpcc *mpcc = mpc1_get_mpcc(mpc, mpcc_id);
 
 	REG_UPDATE_5(MPCC_CONTROL[mpcc_id],
 			MPCC_ALPHA_BLND_MODE,		blnd_cfg->alpha_mode,
@@ -74,6 +75,7 @@
 			MPCC_GLOBAL_GAIN,		blnd_cfg->global_gain);
 
 	mpc1_set_bg_color(mpc, &blnd_cfg->black_color, mpcc_id);
+	mpcc->blnd_cfg = *blnd_cfg;
 }
 
 void mpc1_update_stereo_mix(
@@ -235,8 +237,7 @@
 	}
 
 	/* update the blending configuration */
-	new_mpcc->blnd_cfg = *blnd_cfg;
-	mpc->funcs->update_blending(mpc, &new_mpcc->blnd_cfg, mpcc_id);
+	mpc->funcs->update_blending(mpc, blnd_cfg, mpcc_id);
 
 	/* update the stereo mix settings, if provided */
 	if (sm_cfg != NULL) {
@@ -409,7 +410,26 @@
 	}
 }
 
+void mpc1_read_mpcc_state(
+		struct mpc *mpc,
+		int mpcc_inst,
+		struct mpcc_state *s)
+{
+	struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc);
+
+	REG_GET(MPCC_OPP_ID[mpcc_inst], MPCC_OPP_ID, &s->opp_id);
+	REG_GET(MPCC_TOP_SEL[mpcc_inst], MPCC_TOP_SEL, &s->dpp_id);
+	REG_GET(MPCC_BOT_SEL[mpcc_inst], MPCC_BOT_SEL, &s->bot_mpcc_id);
+	REG_GET_4(MPCC_CONTROL[mpcc_inst], MPCC_MODE, &s->mode,
+			MPCC_ALPHA_BLND_MODE, &s->alpha_mode,
+			MPCC_ALPHA_MULTIPLIED_MODE, &s->pre_multiplied_alpha,
+			MPCC_BLND_ACTIVE_OVERLAP_ONLY, &s->overlap_only);
+	REG_GET_2(MPCC_STATUS[mpcc_inst], MPCC_IDLE, &s->idle,
+			MPCC_BUSY, &s->busy);
+}
+
 const struct mpc_funcs dcn10_mpc_funcs = {
+	.read_mpcc_state = mpc1_read_mpcc_state,
 	.insert_plane = mpc1_insert_plane,
 	.remove_mpcc = mpc1_remove_mpcc,
 	.mpc_init = mpc1_mpc_init,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
index 267a299..d3d16c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h
@@ -183,4 +183,9 @@
 	struct mpc_tree *tree,
 	int dpp_id);
 
+void mpc1_read_mpcc_state(
+		struct mpc *mpc,
+		int mpcc_inst,
+		struct mpcc_state *s);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index 4bf64d1..f2fbce0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -93,6 +93,81 @@
 		OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0);
 }
 
+static uint32_t get_start_vline(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing)
+{
+	struct dc_crtc_timing patched_crtc_timing;
+	int vesa_sync_start;
+	int asic_blank_end;
+	int interlace_factor;
+	int vertical_line_start;
+
+	patched_crtc_timing = *dc_crtc_timing;
+	optc1_apply_front_porch_workaround(optc, &patched_crtc_timing);
+
+	vesa_sync_start = patched_crtc_timing.h_addressable +
+			patched_crtc_timing.h_border_right +
+			patched_crtc_timing.h_front_porch;
+
+	asic_blank_end = patched_crtc_timing.h_total -
+			vesa_sync_start -
+			patched_crtc_timing.h_border_left;
+
+	interlace_factor = patched_crtc_timing.flags.INTERLACE ? 2 : 1;
+
+	vesa_sync_start = patched_crtc_timing.v_addressable +
+			patched_crtc_timing.v_border_bottom +
+			patched_crtc_timing.v_front_porch;
+
+	asic_blank_end = (patched_crtc_timing.v_total -
+			vesa_sync_start -
+			patched_crtc_timing.v_border_top)
+			* interlace_factor;
+
+	vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1;
+	if (vertical_line_start < 0) {
+		ASSERT(0);
+		vertical_line_start = 0;
+	}
+
+	return vertical_line_start;
+}
+
+void optc1_program_vline_interrupt(
+		struct timing_generator *optc,
+		const struct dc_crtc_timing *dc_crtc_timing,
+		unsigned long long vsync_delta)
+{
+
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+	unsigned long long req_delta_tens_of_usec = div64_u64((vsync_delta + 9999), 10000);
+	unsigned long long pix_clk_hundreds_khz = div64_u64((dc_crtc_timing->pix_clk_khz + 99), 100);
+	uint32_t req_delta_lines = (uint32_t) div64_u64(
+			(req_delta_tens_of_usec * pix_clk_hundreds_khz + dc_crtc_timing->h_total - 1),
+								dc_crtc_timing->h_total);
+
+	uint32_t vsync_line = get_start_vline(optc, dc_crtc_timing);
+	uint32_t start_line = 0;
+	uint32_t endLine = 0;
+
+	if (req_delta_lines != 0)
+		req_delta_lines--;
+
+	if (req_delta_lines > vsync_line)
+		start_line = dc_crtc_timing->v_total - (req_delta_lines - vsync_line) - 1;
+	else
+		start_line = vsync_line - req_delta_lines;
+
+	endLine = start_line + 2;
+
+	if (endLine >= dc_crtc_timing->v_total)
+		endLine = 2;
+
+	REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0,
+			OTG_VERTICAL_INTERRUPT0_LINE_START, start_line,
+			OTG_VERTICAL_INTERRUPT0_LINE_END, endLine);
+}
+
 /**
  * program_timing_generator   used by mode timing set
  * Program CRTC Timing Registers - OTG_H_*, OTG_V_*, Pixel repetition.
@@ -285,7 +360,7 @@
 
 }
 
-static void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable)
+void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
@@ -780,17 +855,17 @@
 				OTG_SET_V_TOTAL_MIN_MASK_EN, 0,
 				OTG_SET_V_TOTAL_MIN_MASK, 0);
 	} else {
-		REG_SET(OTG_V_TOTAL_MIN, 0,
-			OTG_V_TOTAL_MIN, 0);
-
-		REG_SET(OTG_V_TOTAL_MAX, 0,
-			OTG_V_TOTAL_MAX, 0);
-
 		REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
 				OTG_SET_V_TOTAL_MIN_MASK, 0,
 				OTG_V_TOTAL_MIN_SEL, 0,
 				OTG_V_TOTAL_MAX_SEL, 0,
 				OTG_FORCE_LOCK_ON_EVENT, 0);
+
+		REG_SET(OTG_V_TOTAL_MIN, 0,
+			OTG_V_TOTAL_MIN, 0);
+
+		REG_SET(OTG_V_TOTAL_MAX, 0,
+			OTG_V_TOTAL_MAX, 0);
 	}
 }
 
@@ -1154,6 +1229,12 @@
 	REG_GET(OTG_V_TOTAL_MIN,
 			OTG_V_TOTAL_MIN, &s->v_total_min);
 
+	REG_GET(OTG_V_TOTAL_CONTROL,
+			OTG_V_TOTAL_MAX_SEL, &s->v_total_max_sel);
+
+	REG_GET(OTG_V_TOTAL_CONTROL,
+			OTG_V_TOTAL_MIN_SEL, &s->v_total_min_sel);
+
 	REG_GET_2(OTG_V_SYNC_A,
 			OTG_V_SYNC_A_START, &s->v_sync_a_start,
 			OTG_V_SYNC_A_END, &s->v_sync_a_end);
@@ -1176,20 +1257,20 @@
 			OPTC_UNDERFLOW_OCCURRED_STATUS, &s->underflow_occurred_status);
 }
 
-static void optc1_clear_optc_underflow(struct timing_generator *optc)
+void optc1_clear_optc_underflow(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
 	REG_UPDATE(OPTC_INPUT_GLOBAL_CONTROL, OPTC_UNDERFLOW_CLEAR, 1);
 }
 
-static void optc1_tg_init(struct timing_generator *optc)
+void optc1_tg_init(struct timing_generator *optc)
 {
 	optc1_set_blank_data_double_buffer(optc, true);
 	optc1_clear_optc_underflow(optc);
 }
 
-static bool optc1_is_tg_enabled(struct timing_generator *optc)
+bool optc1_is_tg_enabled(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 	uint32_t otg_enabled = 0;
@@ -1200,7 +1281,7 @@
 
 }
 
-static bool optc1_is_optc_underflow_occurred(struct timing_generator *optc)
+bool optc1_is_optc_underflow_occurred(struct timing_generator *optc)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
 	uint32_t underflow_occurred = 0;
@@ -1215,6 +1296,7 @@
 static const struct timing_generator_funcs dcn10_tg_funcs = {
 		.validate_timing = optc1_validate_timing,
 		.program_timing = optc1_program_timing,
+		.program_vline_interrupt = optc1_program_vline_interrupt,
 		.program_global_sync = optc1_program_global_sync,
 		.enable_crtc = optc1_enable_crtc,
 		.disable_crtc = optc1_disable_crtc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
index d25e7bf..c62052f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
@@ -65,6 +65,8 @@
 	SRI(OTG_NOM_VERT_POSITION, OTG, inst),\
 	SRI(OTG_BLACK_COLOR, OTG, inst),\
 	SRI(OTG_CLOCK_CONTROL, OTG, inst),\
+	SRI(OTG_VERTICAL_INTERRUPT0_CONTROL, OTG, inst),\
+	SRI(OTG_VERTICAL_INTERRUPT0_POSITION, OTG, inst),\
 	SRI(OTG_VERTICAL_INTERRUPT2_CONTROL, OTG, inst),\
 	SRI(OTG_VERTICAL_INTERRUPT2_POSITION, OTG, inst),\
 	SRI(OPTC_INPUT_CLOCK_CONTROL, ODM, inst),\
@@ -124,6 +126,8 @@
 	uint32_t OTG_TEST_PATTERN_CONTROL;
 	uint32_t OTG_TEST_PATTERN_COLOR;
 	uint32_t OTG_CLOCK_CONTROL;
+	uint32_t OTG_VERTICAL_INTERRUPT0_CONTROL;
+	uint32_t OTG_VERTICAL_INTERRUPT0_POSITION;
 	uint32_t OTG_VERTICAL_INTERRUPT2_CONTROL;
 	uint32_t OTG_VERTICAL_INTERRUPT2_POSITION;
 	uint32_t OPTC_INPUT_CLOCK_CONTROL;
@@ -206,6 +210,9 @@
 	SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_EN, mask_sh),\
 	SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_ON, mask_sh),\
 	SF(OTG0_OTG_CLOCK_CONTROL, OTG_CLOCK_GATE_DIS, mask_sh),\
+	SF(OTG0_OTG_VERTICAL_INTERRUPT0_CONTROL, OTG_VERTICAL_INTERRUPT0_INT_ENABLE, mask_sh),\
+	SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_START, mask_sh),\
+	SF(OTG0_OTG_VERTICAL_INTERRUPT0_POSITION, OTG_VERTICAL_INTERRUPT0_LINE_END, mask_sh),\
 	SF(OTG0_OTG_VERTICAL_INTERRUPT2_CONTROL, OTG_VERTICAL_INTERRUPT2_INT_ENABLE, mask_sh),\
 	SF(OTG0_OTG_VERTICAL_INTERRUPT2_POSITION, OTG_VERTICAL_INTERRUPT2_LINE_START, mask_sh),\
 	SF(ODM0_OPTC_INPUT_CLOCK_CONTROL, OPTC_INPUT_CLK_EN, mask_sh),\
@@ -323,6 +330,9 @@
 	type OTG_CLOCK_EN;\
 	type OTG_CLOCK_ON;\
 	type OTG_CLOCK_GATE_DIS;\
+	type OTG_VERTICAL_INTERRUPT0_INT_ENABLE;\
+	type OTG_VERTICAL_INTERRUPT0_LINE_START;\
+	type OTG_VERTICAL_INTERRUPT0_LINE_END;\
 	type OTG_VERTICAL_INTERRUPT2_INT_ENABLE;\
 	type OTG_VERTICAL_INTERRUPT2_LINE_START;\
 	type OPTC_INPUT_CLK_EN;\
@@ -396,6 +406,8 @@
 	uint32_t v_total;
 	uint32_t v_total_max;
 	uint32_t v_total_min;
+	uint32_t v_total_min_sel;
+	uint32_t v_total_max_sel;
 	uint32_t v_sync_a_start;
 	uint32_t v_sync_a_end;
 	uint32_t h_blank_start;
@@ -420,6 +432,10 @@
 	const struct dc_crtc_timing *dc_crtc_timing,
 	bool use_vbios);
 
+void optc1_program_vline_interrupt(struct timing_generator *optc,
+		const struct dc_crtc_timing *dc_crtc_timing,
+		unsigned long long vsync_delta);
+
 void optc1_program_global_sync(
 		struct timing_generator *optc);
 
@@ -481,4 +497,14 @@
 
 bool optc1_is_stereo_left_eye(struct timing_generator *optc);
 
+void optc1_clear_optc_underflow(struct timing_generator *optc);
+
+void optc1_tg_init(struct timing_generator *optc);
+
+bool optc1_is_tg_enabled(struct timing_generator *optc);
+
+bool optc1_is_optc_underflow_occurred(struct timing_generator *optc);
+
+void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable);
+
 #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 02bd664..df5cb2d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -38,8 +38,8 @@
 #include "dcn10/dcn10_hw_sequencer.h"
 #include "dce110/dce110_hw_sequencer.h"
 #include "dcn10/dcn10_opp.h"
-#include "dce/dce_link_encoder.h"
-#include "dce/dce_stream_encoder.h"
+#include "dcn10/dcn10_link_encoder.h"
+#include "dcn10/dcn10_stream_encoder.h"
 #include "dce/dce_clocks.h"
 #include "dce/dce_clock_source.h"
 #include "dce/dce_audio.h"
@@ -166,36 +166,22 @@
 
 #define stream_enc_regs(id)\
 [id] = {\
-	SE_DCN_REG_LIST(id),\
-	.TMDS_CNTL = 0,\
-	.AFMT_AVI_INFO0 = 0,\
-	.AFMT_AVI_INFO1 = 0,\
-	.AFMT_AVI_INFO2 = 0,\
-	.AFMT_AVI_INFO3 = 0,\
+	SE_DCN_REG_LIST(id)\
 }
 
-static const struct dce110_stream_enc_registers stream_enc_regs[] = {
+static const struct dcn10_stream_enc_registers stream_enc_regs[] = {
 	stream_enc_regs(0),
 	stream_enc_regs(1),
 	stream_enc_regs(2),
 	stream_enc_regs(3),
 };
 
-static const struct dce_stream_encoder_shift se_shift = {
+static const struct dcn10_stream_encoder_shift se_shift = {
 		SE_COMMON_MASK_SH_LIST_DCN10(__SHIFT)
 };
 
-static const struct dce_stream_encoder_mask se_mask = {
-		SE_COMMON_MASK_SH_LIST_DCN10(_MASK),
-		.AFMT_GENERIC0_UPDATE = 0,
-		.AFMT_GENERIC2_UPDATE = 0,
-		.DP_DYN_RANGE = 0,
-		.DP_YCBCR_RANGE = 0,
-		.HDMI_AVI_INFO_SEND = 0,
-		.HDMI_AVI_INFO_CONT = 0,
-		.HDMI_AVI_INFO_LINE = 0,
-		.DP_SEC_AVI_ENABLE = 0,
-		.AFMT_AVI_INFO_VERSION = 0
+static const struct dcn10_stream_encoder_mask se_mask = {
+		SE_COMMON_MASK_SH_LIST_DCN10(_MASK)
 };
 
 #define audio_regs(id)\
@@ -228,13 +214,11 @@
 	AUX_REG_LIST(id)\
 }
 
-static const struct dce110_link_enc_aux_registers link_enc_aux_regs[] = {
+static const struct dcn10_link_enc_aux_registers link_enc_aux_regs[] = {
 		aux_regs(0),
 		aux_regs(1),
 		aux_regs(2),
-		aux_regs(3),
-		aux_regs(4),
-		aux_regs(5)
+		aux_regs(3)
 };
 
 #define hpd_regs(id)\
@@ -242,13 +226,11 @@
 	HPD_REG_LIST(id)\
 }
 
-static const struct dce110_link_enc_hpd_registers link_enc_hpd_regs[] = {
+static const struct dcn10_link_enc_hpd_registers link_enc_hpd_regs[] = {
 		hpd_regs(0),
 		hpd_regs(1),
 		hpd_regs(2),
-		hpd_regs(3),
-		hpd_regs(4),
-		hpd_regs(5)
+		hpd_regs(3)
 };
 
 #define link_regs(id)\
@@ -257,14 +239,19 @@
 	SRI(DP_DPHY_INTERNAL_CTRL, DP, id) \
 }
 
-static const struct dce110_link_enc_registers link_enc_regs[] = {
+static const struct dcn10_link_enc_registers link_enc_regs[] = {
 	link_regs(0),
 	link_regs(1),
 	link_regs(2),
-	link_regs(3),
-	link_regs(4),
-	link_regs(5),
-	link_regs(6),
+	link_regs(3)
+};
+
+static const struct dcn10_link_enc_shift le_shift = {
+		LINK_ENCODER_MASK_SH_LIST_DCN10(__SHIFT)
+};
+
+static const struct dcn10_link_enc_mask le_mask = {
+		LINK_ENCODER_MASK_SH_LIST_DCN10(_MASK)
 };
 
 #define ipp_regs(id)\
@@ -320,11 +307,14 @@
 };
 
 static const struct dcn_dpp_shift tf_shift = {
-	TF_REG_LIST_SH_MASK_DCN10(__SHIFT)
+	TF_REG_LIST_SH_MASK_DCN10(__SHIFT),
+	TF_DEBUG_REG_LIST_SH_DCN10
+
 };
 
 static const struct dcn_dpp_mask tf_mask = {
 	TF_REG_LIST_SH_MASK_DCN10(_MASK),
+	TF_DEBUG_REG_LIST_MASK_DCN10
 };
 
 static const struct dcn_mpc_registers mpc_regs = {
@@ -457,6 +447,8 @@
 		.vsr_support = true,
 		.performance_trace = false,
 		.az_endpoint_mute_only = true,
+		.recovery_enabled = false, /*enable this by default after testing.*/
+		.max_downscale_src_width = 3840,
 };
 
 static const struct dc_debug debug_defaults_diags = {
@@ -592,20 +584,22 @@
 struct link_encoder *dcn10_link_encoder_create(
 	const struct encoder_init_data *enc_init_data)
 {
-	struct dce110_link_encoder *enc110 =
-		kzalloc(sizeof(struct dce110_link_encoder), GFP_KERNEL);
+	struct dcn10_link_encoder *enc10 =
+		kzalloc(sizeof(struct dcn10_link_encoder), GFP_KERNEL);
 
-	if (!enc110)
+	if (!enc10)
 		return NULL;
 
-	dce110_link_encoder_construct(enc110,
+	dcn10_link_encoder_construct(enc10,
 				      enc_init_data,
 				      &link_enc_feature,
 				      &link_enc_regs[enc_init_data->transmitter],
 				      &link_enc_aux_regs[enc_init_data->channel - 1],
-				      &link_enc_hpd_regs[enc_init_data->hpd_source]);
+				      &link_enc_hpd_regs[enc_init_data->hpd_source],
+				      &le_shift,
+				      &le_mask);
 
-	return &enc110->base;
+	return &enc10->base;
 }
 
 struct clock_source *dcn10_clock_source_create(
@@ -650,16 +644,16 @@
 	enum engine_id eng_id,
 	struct dc_context *ctx)
 {
-	struct dce110_stream_encoder *enc110 =
-		kzalloc(sizeof(struct dce110_stream_encoder), GFP_KERNEL);
+	struct dcn10_stream_encoder *enc1 =
+		kzalloc(sizeof(struct dcn10_stream_encoder), GFP_KERNEL);
 
-	if (!enc110)
+	if (!enc1)
 		return NULL;
 
-	dce110_stream_encoder_construct(enc110, ctx, ctx->dc_bios, eng_id,
+	dcn10_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id,
 					&stream_enc_regs[eng_id],
 					&se_shift, &se_mask);
-	return &enc110->base;
+	return &enc1->base;
 }
 
 static const struct dce_hwseq_registers hwseq_reg = {
@@ -918,36 +912,6 @@
 	return result;
 }
 
-enum dc_status dcn10_validate_guaranteed(
-		struct dc *dc,
-		struct dc_stream_state *dc_stream,
-		struct dc_state *context)
-{
-	enum dc_status result = DC_ERROR_UNEXPECTED;
-
-	context->streams[0] = dc_stream;
-	dc_stream_retain(context->streams[0]);
-	context->stream_count++;
-
-	result = resource_map_pool_resources(dc, context, dc_stream);
-
-	if (result == DC_OK)
-		result = resource_map_phy_clock_resources(dc, context, dc_stream);
-
-	if (result == DC_OK)
-		result = build_mapped_resource(dc, context, dc_stream);
-
-	if (result == DC_OK) {
-		validate_guaranteed_copy_streams(
-				context, dc->caps.max_streams);
-		result = resource_build_scaling_params_for_context(dc, context);
-	}
-	if (result == DC_OK && !dcn_validate_bandwidth(dc, context))
-		return DC_FAIL_BANDWIDTH_VALIDATE;
-
-	return result;
-}
-
 static struct pipe_ctx *dcn10_acquire_idle_pipe_for_layer(
 		struct dc_state *context,
 		const struct resource_pool *pool,
@@ -978,235 +942,16 @@
 	return idle_pipe;
 }
 
-enum dcc_control {
-	dcc_control__256_256_xxx,
-	dcc_control__128_128_xxx,
-	dcc_control__256_64_64,
-};
-
-enum segment_order {
-	segment_order__na,
-	segment_order__contiguous,
-	segment_order__non_contiguous,
-};
-
-static bool dcc_support_pixel_format(
-		enum surface_pixel_format format,
-		unsigned int *bytes_per_element)
-{
-	/* DML: get_bytes_per_element */
-	switch (format) {
-	case SURFACE_PIXEL_FORMAT_GRPH_ARGB1555:
-	case SURFACE_PIXEL_FORMAT_GRPH_RGB565:
-		*bytes_per_element = 2;
-		return true;
-	case SURFACE_PIXEL_FORMAT_GRPH_ARGB8888:
-	case SURFACE_PIXEL_FORMAT_GRPH_ABGR8888:
-	case SURFACE_PIXEL_FORMAT_GRPH_ARGB2101010:
-	case SURFACE_PIXEL_FORMAT_GRPH_ABGR2101010:
-		*bytes_per_element = 4;
-		return true;
-	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
-	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F:
-	case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
-		*bytes_per_element = 8;
-		return true;
-	default:
-		return false;
-	}
-}
-
-static bool dcc_support_swizzle(
-		enum swizzle_mode_values swizzle,
-		unsigned int bytes_per_element,
-		enum segment_order *segment_order_horz,
-		enum segment_order *segment_order_vert)
-{
-	bool standard_swizzle = false;
-	bool display_swizzle = false;
-
-	switch (swizzle) {
-	case DC_SW_4KB_S:
-	case DC_SW_64KB_S:
-	case DC_SW_VAR_S:
-	case DC_SW_4KB_S_X:
-	case DC_SW_64KB_S_X:
-	case DC_SW_VAR_S_X:
-		standard_swizzle = true;
-		break;
-	case DC_SW_4KB_D:
-	case DC_SW_64KB_D:
-	case DC_SW_VAR_D:
-	case DC_SW_4KB_D_X:
-	case DC_SW_64KB_D_X:
-	case DC_SW_VAR_D_X:
-		display_swizzle = true;
-		break;
-	default:
-		break;
-	}
-
-	if (bytes_per_element == 1 && standard_swizzle) {
-		*segment_order_horz = segment_order__contiguous;
-		*segment_order_vert = segment_order__na;
-		return true;
-	}
-	if (bytes_per_element == 2 && standard_swizzle) {
-		*segment_order_horz = segment_order__non_contiguous;
-		*segment_order_vert = segment_order__contiguous;
-		return true;
-	}
-	if (bytes_per_element == 4 && standard_swizzle) {
-		*segment_order_horz = segment_order__non_contiguous;
-		*segment_order_vert = segment_order__contiguous;
-		return true;
-	}
-	if (bytes_per_element == 8 && standard_swizzle) {
-		*segment_order_horz = segment_order__na;
-		*segment_order_vert = segment_order__contiguous;
-		return true;
-	}
-	if (bytes_per_element == 8 && display_swizzle) {
-		*segment_order_horz = segment_order__contiguous;
-		*segment_order_vert = segment_order__non_contiguous;
-		return true;
-	}
-
-	return false;
-}
-
-static void get_blk256_size(unsigned int *blk256_width, unsigned int *blk256_height,
-		unsigned int bytes_per_element)
-{
-	/* copied from DML.  might want to refactor DML to leverage from DML */
-	/* DML : get_blk256_size */
-	if (bytes_per_element == 1) {
-		*blk256_width = 16;
-		*blk256_height = 16;
-	} else if (bytes_per_element == 2) {
-		*blk256_width = 16;
-		*blk256_height = 8;
-	} else if (bytes_per_element == 4) {
-		*blk256_width = 8;
-		*blk256_height = 8;
-	} else if (bytes_per_element == 8) {
-		*blk256_width = 8;
-		*blk256_height = 4;
-	}
-}
-
-static void det_request_size(
-		unsigned int height,
-		unsigned int width,
-		unsigned int bpe,
-		bool *req128_horz_wc,
-		bool *req128_vert_wc)
-{
-	unsigned int detile_buf_size = 164 * 1024;  /* 164KB for DCN1.0 */
-
-	unsigned int blk256_height = 0;
-	unsigned int blk256_width = 0;
-	unsigned int swath_bytes_horz_wc, swath_bytes_vert_wc;
-
-	get_blk256_size(&blk256_width, &blk256_height, bpe);
-
-	swath_bytes_horz_wc = height * blk256_height * bpe;
-	swath_bytes_vert_wc = width * blk256_width * bpe;
-
-	*req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ?
-			false : /* full 256B request */
-			true; /* half 128b request */
-
-	*req128_vert_wc = (2 * swath_bytes_vert_wc <= detile_buf_size) ?
-			false : /* full 256B request */
-			true; /* half 128b request */
-}
-
-static bool get_dcc_compression_cap(const struct dc *dc,
+static bool dcn10_get_dcc_compression_cap(const struct dc *dc,
 		const struct dc_dcc_surface_param *input,
 		struct dc_surface_dcc_cap *output)
 {
-	/* implement section 1.6.2.1 of DCN1_Programming_Guide.docx */
-	enum dcc_control dcc_control;
-	unsigned int bpe;
-	enum segment_order segment_order_horz, segment_order_vert;
-	bool req128_horz_wc, req128_vert_wc;
-
-	memset(output, 0, sizeof(*output));
-
-	if (dc->debug.disable_dcc == DCC_DISABLE)
-		return false;
-
-	if (!dcc_support_pixel_format(input->format,
-			&bpe))
-		return false;
-
-	if (!dcc_support_swizzle(input->swizzle_mode, bpe,
-			&segment_order_horz, &segment_order_vert))
-		return false;
-
-	det_request_size(input->surface_size.height,  input->surface_size.width,
-			bpe, &req128_horz_wc, &req128_vert_wc);
-
-	if (!req128_horz_wc && !req128_vert_wc) {
-		dcc_control = dcc_control__256_256_xxx;
-	} else if (input->scan == SCAN_DIRECTION_HORIZONTAL) {
-		if (!req128_horz_wc)
-			dcc_control = dcc_control__256_256_xxx;
-		else if (segment_order_horz == segment_order__contiguous)
-			dcc_control = dcc_control__128_128_xxx;
-		else
-			dcc_control = dcc_control__256_64_64;
-	} else if (input->scan == SCAN_DIRECTION_VERTICAL) {
-		if (!req128_vert_wc)
-			dcc_control = dcc_control__256_256_xxx;
-		else if (segment_order_vert == segment_order__contiguous)
-			dcc_control = dcc_control__128_128_xxx;
-		else
-			dcc_control = dcc_control__256_64_64;
-	} else {
-		if ((req128_horz_wc &&
-			segment_order_horz == segment_order__non_contiguous) ||
-			(req128_vert_wc &&
-			segment_order_vert == segment_order__non_contiguous))
-			/* access_dir not known, must use most constraining */
-			dcc_control = dcc_control__256_64_64;
-		else
-			/* reg128 is true for either horz and vert
-			 * but segment_order is contiguous
-			 */
-			dcc_control = dcc_control__128_128_xxx;
-	}
-
-	if (dc->debug.disable_dcc == DCC_HALF_REQ_DISALBE &&
-		dcc_control != dcc_control__256_256_xxx)
-		return false;
-
-	switch (dcc_control) {
-	case dcc_control__256_256_xxx:
-		output->grph.rgb.max_uncompressed_blk_size = 256;
-		output->grph.rgb.max_compressed_blk_size = 256;
-		output->grph.rgb.independent_64b_blks = false;
-		break;
-	case dcc_control__128_128_xxx:
-		output->grph.rgb.max_uncompressed_blk_size = 128;
-		output->grph.rgb.max_compressed_blk_size = 128;
-		output->grph.rgb.independent_64b_blks = false;
-		break;
-	case dcc_control__256_64_64:
-		output->grph.rgb.max_uncompressed_blk_size = 256;
-		output->grph.rgb.max_compressed_blk_size = 64;
-		output->grph.rgb.independent_64b_blks = true;
-		break;
-	}
-
-	output->capable = true;
-	output->const_color_support = false;
-
-	return true;
+	return dc->res_pool->hubbub->funcs->get_dcc_compression_cap(
+			dc->res_pool->hubbub,
+			input,
+			output);
 }
 
-
 static void dcn10_destroy_resource_pool(struct resource_pool **pool)
 {
 	struct dcn10_resource_pool *dcn10_pool = TO_DCN10_RES_POOL(*pool);
@@ -1227,13 +972,12 @@
 }
 
 static struct dc_cap_funcs cap_funcs = {
-	.get_dcc_compression_cap = get_dcc_compression_cap
+	.get_dcc_compression_cap = dcn10_get_dcc_compression_cap
 };
 
 static struct resource_funcs dcn10_res_pool_funcs = {
 	.destroy = dcn10_destroy_resource_pool,
 	.link_enc_create = dcn10_link_encoder_create,
-	.validate_guaranteed = dcn10_validate_guaranteed,
 	.validate_bandwidth = dcn_validate_bandwidth,
 	.acquire_idle_pipe_for_layer = dcn10_acquire_idle_pipe_for_layer,
 	.validate_plane = dcn10_validate_plane,
@@ -1282,6 +1026,7 @@
 	dc->caps.max_cursor_size = 256;
 	dc->caps.max_slave_planes = 1;
 	dc->caps.is_apu = true;
+	dc->caps.post_blend_color_processing = false;
 
 	if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV)
 		dc->debug = debug_defaults_drv;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
new file mode 100644
index 0000000..653b7b2
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c
@@ -0,0 +1,1490 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+
+#include "dc_bios_types.h"
+#include "dcn10_stream_encoder.h"
+#include "reg_helper.h"
+#include "hw_shared.h"
+
+#define DC_LOGGER \
+		enc1->base.ctx->logger
+
+
+#define REG(reg)\
+	(enc1->regs->reg)
+
+#undef FN
+#define FN(reg_name, field_name) \
+	enc1->se_shift->field_name, enc1->se_mask->field_name
+
+#define VBI_LINE_0 0
+#define DP_BLANK_MAX_RETRY 20
+#define HDMI_CLOCK_CHANNEL_RATE_MORE_340M 340000
+
+
+enum {
+	DP_MST_UPDATE_MAX_RETRY = 50
+};
+
+#define CTX \
+	enc1->base.ctx
+
+void enc1_update_generic_info_packet(
+	struct dcn10_stream_encoder *enc1,
+	uint32_t packet_index,
+	const struct dc_info_packet *info_packet)
+{
+	uint32_t regval;
+	/* TODOFPGA Figure out a proper number for max_retries polling for lock
+	 * use 50 for now.
+	 */
+	uint32_t max_retries = 50;
+
+	/*we need turn on clock before programming AFMT block*/
+	REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1);
+
+	if (packet_index >= 8)
+		ASSERT(0);
+
+	/* poll dig_update_lock is not locked -> asic internal signal
+	 * assume otg master lock will unlock it
+	 */
+/*		REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_LOCK_STATUS,
+			0, 10, max_retries);*/
+
+	/* check if HW reading GSP memory */
+	REG_WAIT(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT,
+			0, 10, max_retries);
+
+	/* HW does is not reading GSP memory not reading too long ->
+	 * something wrong. clear GPS memory access and notify?
+	 * hw SW is writing to GSP memory
+	 */
+	REG_UPDATE(AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, 1);
+
+	/* choose which generic packet to use */
+	regval = REG_READ(AFMT_VBI_PACKET_CONTROL);
+	REG_UPDATE(AFMT_VBI_PACKET_CONTROL,
+			AFMT_GENERIC_INDEX, packet_index);
+
+	/* write generic packet header
+	 * (4th byte is for GENERIC0 only)
+	 */
+	REG_SET_4(AFMT_GENERIC_HDR, 0,
+			AFMT_GENERIC_HB0, info_packet->hb0,
+			AFMT_GENERIC_HB1, info_packet->hb1,
+			AFMT_GENERIC_HB2, info_packet->hb2,
+			AFMT_GENERIC_HB3, info_packet->hb3);
+
+	/* write generic packet contents
+	 * (we never use last 4 bytes)
+	 * there are 8 (0-7) mmDIG0_AFMT_GENERIC0_x registers
+	 */
+	{
+		const uint32_t *content =
+			(const uint32_t *) &info_packet->sb[0];
+
+		REG_WRITE(AFMT_GENERIC_0, *content++);
+		REG_WRITE(AFMT_GENERIC_1, *content++);
+		REG_WRITE(AFMT_GENERIC_2, *content++);
+		REG_WRITE(AFMT_GENERIC_3, *content++);
+		REG_WRITE(AFMT_GENERIC_4, *content++);
+		REG_WRITE(AFMT_GENERIC_5, *content++);
+		REG_WRITE(AFMT_GENERIC_6, *content++);
+		REG_WRITE(AFMT_GENERIC_7, *content);
+	}
+
+	switch (packet_index) {
+	case 0:
+		REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+				AFMT_GENERIC0_FRAME_UPDATE, 1);
+		break;
+	case 1:
+		REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+				AFMT_GENERIC1_FRAME_UPDATE, 1);
+		break;
+	case 2:
+		REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+				AFMT_GENERIC2_FRAME_UPDATE, 1);
+		break;
+	case 3:
+		REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+				AFMT_GENERIC3_FRAME_UPDATE, 1);
+		break;
+	case 4:
+		REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+				AFMT_GENERIC4_FRAME_UPDATE, 1);
+		break;
+	case 5:
+		REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+				AFMT_GENERIC5_FRAME_UPDATE, 1);
+		break;
+	case 6:
+		REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+				AFMT_GENERIC6_FRAME_UPDATE, 1);
+		break;
+	case 7:
+		REG_UPDATE(AFMT_VBI_PACKET_CONTROL1,
+				AFMT_GENERIC7_FRAME_UPDATE, 1);
+		break;
+	default:
+		break;
+	}
+}
+
+static void enc1_update_hdmi_info_packet(
+	struct dcn10_stream_encoder *enc1,
+	uint32_t packet_index,
+	const struct dc_info_packet *info_packet)
+{
+	uint32_t cont, send, line;
+
+	if (info_packet->valid) {
+		enc1_update_generic_info_packet(
+			enc1,
+			packet_index,
+			info_packet);
+
+		/* enable transmission of packet(s) -
+		 * packet transmission begins on the next frame
+		 */
+		cont = 1;
+		/* send packet(s) every frame */
+		send = 1;
+		/* select line number to send packets on */
+		line = 2;
+	} else {
+		cont = 0;
+		send = 0;
+		line = 0;
+	}
+
+	/* choose which generic packet control to use */
+	switch (packet_index) {
+	case 0:
+		REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL0,
+				HDMI_GENERIC0_CONT, cont,
+				HDMI_GENERIC0_SEND, send,
+				HDMI_GENERIC0_LINE, line);
+		break;
+	case 1:
+		REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL0,
+				HDMI_GENERIC1_CONT, cont,
+				HDMI_GENERIC1_SEND, send,
+				HDMI_GENERIC1_LINE, line);
+		break;
+	case 2:
+		REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL1,
+				HDMI_GENERIC0_CONT, cont,
+				HDMI_GENERIC0_SEND, send,
+				HDMI_GENERIC0_LINE, line);
+		break;
+	case 3:
+		REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL1,
+				HDMI_GENERIC1_CONT, cont,
+				HDMI_GENERIC1_SEND, send,
+				HDMI_GENERIC1_LINE, line);
+		break;
+	case 4:
+		REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL2,
+				HDMI_GENERIC0_CONT, cont,
+				HDMI_GENERIC0_SEND, send,
+				HDMI_GENERIC0_LINE, line);
+		break;
+	case 5:
+		REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL2,
+				HDMI_GENERIC1_CONT, cont,
+				HDMI_GENERIC1_SEND, send,
+				HDMI_GENERIC1_LINE, line);
+		break;
+	case 6:
+		REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL3,
+				HDMI_GENERIC0_CONT, cont,
+				HDMI_GENERIC0_SEND, send,
+				HDMI_GENERIC0_LINE, line);
+		break;
+	case 7:
+		REG_UPDATE_3(HDMI_GENERIC_PACKET_CONTROL3,
+				HDMI_GENERIC1_CONT, cont,
+				HDMI_GENERIC1_SEND, send,
+				HDMI_GENERIC1_LINE, line);
+		break;
+	default:
+		/* invalid HW packet index */
+		DC_LOG_WARNING(
+			"Invalid HW packet index: %s()\n",
+			__func__);
+		return;
+	}
+}
+
+/* setup stream encoder in dp mode */
+void enc1_stream_encoder_dp_set_stream_attribute(
+	struct stream_encoder *enc,
+	struct dc_crtc_timing *crtc_timing,
+	enum dc_color_space output_color_space)
+{
+	uint32_t h_active_start;
+	uint32_t v_active_start;
+	uint32_t misc0 = 0;
+	uint32_t misc1 = 0;
+	uint32_t h_blank;
+	uint32_t h_back_porch;
+	uint8_t synchronous_clock = 0; /* asynchronous mode */
+	uint8_t colorimetry_bpc;
+	uint8_t dynamic_range_rgb = 0; /*full range*/
+	uint8_t dynamic_range_ycbcr = 1; /*bt709*/
+
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+	REG_UPDATE(DP_DB_CNTL, DP_DB_DISABLE, 1);
+
+	/* set pixel encoding */
+	switch (crtc_timing->pixel_encoding) {
+	case PIXEL_ENCODING_YCBCR422:
+		REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
+				DP_PIXEL_ENCODING_TYPE_YCBCR422);
+		break;
+	case PIXEL_ENCODING_YCBCR444:
+		REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
+				DP_PIXEL_ENCODING_TYPE_YCBCR444);
+
+		if (crtc_timing->flags.Y_ONLY)
+			if (crtc_timing->display_color_depth != COLOR_DEPTH_666)
+				/* HW testing only, no use case yet.
+				 * Color depth of Y-only could be
+				 * 8, 10, 12, 16 bits
+				 */
+				REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
+						DP_PIXEL_ENCODING_TYPE_Y_ONLY);
+		/* Note: DP_MSA_MISC1 bit 7 is the indicator
+		 * of Y-only mode.
+		 * This bit is set in HW if register
+		 * DP_PIXEL_ENCODING is programmed to 0x4
+		 */
+		break;
+	case PIXEL_ENCODING_YCBCR420:
+		REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
+				DP_PIXEL_ENCODING_TYPE_YCBCR420);
+		REG_UPDATE(DP_VID_TIMING, DP_VID_N_MUL, 1);
+		break;
+	default:
+		REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING,
+				DP_PIXEL_ENCODING_TYPE_RGB444);
+		break;
+	}
+
+	misc1 = REG_READ(DP_MSA_MISC);
+
+	/* set color depth */
+
+	switch (crtc_timing->display_color_depth) {
+	case COLOR_DEPTH_666:
+		REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
+				0);
+		break;
+	case COLOR_DEPTH_888:
+		REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
+				DP_COMPONENT_PIXEL_DEPTH_8BPC);
+		break;
+	case COLOR_DEPTH_101010:
+		REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
+				DP_COMPONENT_PIXEL_DEPTH_10BPC);
+
+		break;
+	case COLOR_DEPTH_121212:
+		REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
+				DP_COMPONENT_PIXEL_DEPTH_12BPC);
+		break;
+	default:
+		REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH,
+				DP_COMPONENT_PIXEL_DEPTH_6BPC);
+		break;
+	}
+
+	/* set dynamic range and YCbCr range */
+
+	switch (crtc_timing->display_color_depth) {
+	case COLOR_DEPTH_666:
+		colorimetry_bpc = 0;
+		break;
+	case COLOR_DEPTH_888:
+		colorimetry_bpc = 1;
+		break;
+	case COLOR_DEPTH_101010:
+		colorimetry_bpc = 2;
+		break;
+	case COLOR_DEPTH_121212:
+		colorimetry_bpc = 3;
+		break;
+	default:
+		colorimetry_bpc = 0;
+		break;
+	}
+
+	misc0 = misc0 | synchronous_clock;
+	misc0 = colorimetry_bpc << 5;
+
+	switch (output_color_space) {
+	case COLOR_SPACE_SRGB:
+		misc0 = misc0 | 0x0;
+		misc1 = misc1 & ~0x80; /* bit7 = 0*/
+		dynamic_range_rgb = 0; /*full range*/
+		break;
+	case COLOR_SPACE_SRGB_LIMITED:
+		misc0 = misc0 | 0x8; /* bit3=1 */
+		misc1 = misc1 & ~0x80; /* bit7 = 0*/
+		dynamic_range_rgb = 1; /*limited range*/
+		break;
+	case COLOR_SPACE_YCBCR601:
+	case COLOR_SPACE_YCBCR601_LIMITED:
+		misc0 = misc0 | 0x8; /* bit3=1, bit4=0 */
+		misc1 = misc1 & ~0x80; /* bit7 = 0*/
+		dynamic_range_ycbcr = 0; /*bt601*/
+		if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
+			misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */
+		else if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR444)
+			misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */
+		break;
+	case COLOR_SPACE_YCBCR709:
+	case COLOR_SPACE_YCBCR709_LIMITED:
+		misc0 = misc0 | 0x18; /* bit3=1, bit4=1 */
+		misc1 = misc1 & ~0x80; /* bit7 = 0*/
+		dynamic_range_ycbcr = 1; /*bt709*/
+		if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
+			misc0 = misc0 | 0x2; /* bit2=0, bit1=1 */
+		else if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR444)
+			misc0 = misc0 | 0x4; /* bit2=1, bit1=0 */
+		break;
+	case COLOR_SPACE_2020_RGB_LIMITEDRANGE:
+		dynamic_range_rgb = 1; /*limited range*/
+		break;
+	case COLOR_SPACE_2020_RGB_FULLRANGE:
+	case COLOR_SPACE_2020_YCBCR:
+	case COLOR_SPACE_XR_RGB:
+	case COLOR_SPACE_MSREF_SCRGB:
+	case COLOR_SPACE_ADOBERGB:
+	case COLOR_SPACE_DCIP3:
+	case COLOR_SPACE_XV_YCC_709:
+	case COLOR_SPACE_XV_YCC_601:
+	case COLOR_SPACE_DISPLAYNATIVE:
+	case COLOR_SPACE_DOLBYVISION:
+	case COLOR_SPACE_APPCTRL:
+	case COLOR_SPACE_CUSTOMPOINTS:
+	case COLOR_SPACE_UNKNOWN:
+		/* do nothing */
+		break;
+	}
+
+	REG_SET(DP_MSA_COLORIMETRY, 0, DP_MSA_MISC0, misc0);
+	REG_WRITE(DP_MSA_MISC, misc1);   /* MSA_MISC1 */
+
+	/* dcn new register
+	 * dc_crtc_timing is vesa dmt struct. data from edid
+	 */
+	REG_SET_2(DP_MSA_TIMING_PARAM1, 0,
+			DP_MSA_HTOTAL, crtc_timing->h_total,
+			DP_MSA_VTOTAL, crtc_timing->v_total);
+
+	/* calculate from vesa timing parameters
+	 * h_active_start related to leading edge of sync
+	 */
+
+	h_blank = crtc_timing->h_total - crtc_timing->h_border_left -
+			crtc_timing->h_addressable - crtc_timing->h_border_right;
+
+	h_back_porch = h_blank - crtc_timing->h_front_porch -
+			crtc_timing->h_sync_width;
+
+	/* start at beginning of left border */
+	h_active_start = crtc_timing->h_sync_width + h_back_porch;
+
+
+	v_active_start = crtc_timing->v_total - crtc_timing->v_border_top -
+			crtc_timing->v_addressable - crtc_timing->v_border_bottom -
+			crtc_timing->v_front_porch;
+
+
+	/* start at beginning of left border */
+	REG_SET_2(DP_MSA_TIMING_PARAM2, 0,
+		DP_MSA_HSTART, h_active_start,
+		DP_MSA_VSTART, v_active_start);
+
+	REG_SET_4(DP_MSA_TIMING_PARAM3, 0,
+			DP_MSA_HSYNCWIDTH,
+			crtc_timing->h_sync_width,
+			DP_MSA_HSYNCPOLARITY,
+			!crtc_timing->flags.HSYNC_POSITIVE_POLARITY,
+			DP_MSA_VSYNCWIDTH,
+			crtc_timing->v_sync_width,
+			DP_MSA_VSYNCPOLARITY,
+			!crtc_timing->flags.VSYNC_POSITIVE_POLARITY);
+
+	/* HWDITH include border or overscan */
+	REG_SET_2(DP_MSA_TIMING_PARAM4, 0,
+		DP_MSA_HWIDTH, crtc_timing->h_border_left +
+		crtc_timing->h_addressable + crtc_timing->h_border_right,
+		DP_MSA_VHEIGHT, crtc_timing->v_border_top +
+		crtc_timing->v_addressable + crtc_timing->v_border_bottom);
+}
+
+static void enc1_stream_encoder_set_stream_attribute_helper(
+		struct dcn10_stream_encoder *enc1,
+		struct dc_crtc_timing *crtc_timing)
+{
+	switch (crtc_timing->pixel_encoding) {
+	case PIXEL_ENCODING_YCBCR422:
+		REG_UPDATE(DIG_FE_CNTL, TMDS_PIXEL_ENCODING, 1);
+		break;
+	default:
+		REG_UPDATE(DIG_FE_CNTL, TMDS_PIXEL_ENCODING, 0);
+		break;
+	}
+	REG_UPDATE(DIG_FE_CNTL, TMDS_COLOR_FORMAT, 0);
+}
+
+/* setup stream encoder in hdmi mode */
+void enc1_stream_encoder_hdmi_set_stream_attribute(
+	struct stream_encoder *enc,
+	struct dc_crtc_timing *crtc_timing,
+	int actual_pix_clk_khz,
+	bool enable_audio)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+	struct bp_encoder_control cntl = {0};
+
+	cntl.action = ENCODER_CONTROL_SETUP;
+	cntl.engine_id = enc1->base.id;
+	cntl.signal = SIGNAL_TYPE_HDMI_TYPE_A;
+	cntl.enable_dp_audio = enable_audio;
+	cntl.pixel_clock = actual_pix_clk_khz;
+	cntl.lanes_number = LANE_COUNT_FOUR;
+
+	if (enc1->base.bp->funcs->encoder_control(
+			enc1->base.bp, &cntl) != BP_RESULT_OK)
+		return;
+
+	enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+
+	/* setup HDMI engine */
+	REG_UPDATE_5(HDMI_CONTROL,
+		HDMI_PACKET_GEN_VERSION, 1,
+		HDMI_KEEPOUT_MODE, 1,
+		HDMI_DEEP_COLOR_ENABLE, 0,
+		HDMI_DATA_SCRAMBLE_EN, 0,
+		HDMI_CLOCK_CHANNEL_RATE, 0);
+
+
+	switch (crtc_timing->display_color_depth) {
+	case COLOR_DEPTH_888:
+		REG_UPDATE(HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, 0);
+		break;
+	case COLOR_DEPTH_101010:
+		if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+			REG_UPDATE_2(HDMI_CONTROL,
+					HDMI_DEEP_COLOR_DEPTH, 1,
+					HDMI_DEEP_COLOR_ENABLE, 0);
+		} else {
+			REG_UPDATE_2(HDMI_CONTROL,
+					HDMI_DEEP_COLOR_DEPTH, 1,
+					HDMI_DEEP_COLOR_ENABLE, 1);
+			}
+		break;
+	case COLOR_DEPTH_121212:
+		if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) {
+			REG_UPDATE_2(HDMI_CONTROL,
+					HDMI_DEEP_COLOR_DEPTH, 2,
+					HDMI_DEEP_COLOR_ENABLE, 0);
+		} else {
+			REG_UPDATE_2(HDMI_CONTROL,
+					HDMI_DEEP_COLOR_DEPTH, 2,
+					HDMI_DEEP_COLOR_ENABLE, 1);
+			}
+		break;
+	case COLOR_DEPTH_161616:
+		REG_UPDATE_2(HDMI_CONTROL,
+				HDMI_DEEP_COLOR_DEPTH, 3,
+				HDMI_DEEP_COLOR_ENABLE, 1);
+		break;
+	default:
+		break;
+	}
+
+	if (actual_pix_clk_khz >= HDMI_CLOCK_CHANNEL_RATE_MORE_340M) {
+		/* enable HDMI data scrambler
+		 * HDMI_CLOCK_CHANNEL_RATE_MORE_340M
+		 * Clock channel frequency is 1/4 of character rate.
+		 */
+		REG_UPDATE_2(HDMI_CONTROL,
+			HDMI_DATA_SCRAMBLE_EN, 1,
+			HDMI_CLOCK_CHANNEL_RATE, 1);
+	} else if (crtc_timing->flags.LTE_340MCSC_SCRAMBLE) {
+
+		/* TODO: New feature for DCE11, still need to implement */
+
+		/* enable HDMI data scrambler
+		 * HDMI_CLOCK_CHANNEL_FREQ_EQUAL_TO_CHAR_RATE
+		 * Clock channel frequency is the same
+		 * as character rate
+		 */
+		REG_UPDATE_2(HDMI_CONTROL,
+			HDMI_DATA_SCRAMBLE_EN, 1,
+			HDMI_CLOCK_CHANNEL_RATE, 0);
+	}
+
+
+	REG_UPDATE_3(HDMI_VBI_PACKET_CONTROL,
+		HDMI_GC_CONT, 1,
+		HDMI_GC_SEND, 1,
+		HDMI_NULL_SEND, 1);
+
+	/* following belongs to audio */
+	REG_UPDATE(HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1);
+
+	REG_UPDATE(AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
+
+	REG_UPDATE(HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE,
+				VBI_LINE_0 + 2);
+
+	REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, 0);
+}
+
+/* setup stream encoder in dvi mode */
+void enc1_stream_encoder_dvi_set_stream_attribute(
+	struct stream_encoder *enc,
+	struct dc_crtc_timing *crtc_timing,
+	bool is_dual_link)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+	struct bp_encoder_control cntl = {0};
+
+	cntl.action = ENCODER_CONTROL_SETUP;
+	cntl.engine_id = enc1->base.id;
+	cntl.signal = is_dual_link ?
+			SIGNAL_TYPE_DVI_DUAL_LINK : SIGNAL_TYPE_DVI_SINGLE_LINK;
+	cntl.enable_dp_audio = false;
+	cntl.pixel_clock = crtc_timing->pix_clk_khz;
+	cntl.lanes_number = (is_dual_link) ? LANE_COUNT_EIGHT : LANE_COUNT_FOUR;
+
+	if (enc1->base.bp->funcs->encoder_control(
+			enc1->base.bp, &cntl) != BP_RESULT_OK)
+		return;
+
+	ASSERT(crtc_timing->pixel_encoding == PIXEL_ENCODING_RGB);
+	ASSERT(crtc_timing->display_color_depth == COLOR_DEPTH_888);
+	enc1_stream_encoder_set_stream_attribute_helper(enc1, crtc_timing);
+}
+
+void enc1_stream_encoder_set_mst_bandwidth(
+	struct stream_encoder *enc,
+	struct fixed31_32 avg_time_slots_per_mtp)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+	uint32_t x = dc_fixpt_floor(
+		avg_time_slots_per_mtp);
+	uint32_t y = dc_fixpt_ceil(
+		dc_fixpt_shl(
+			dc_fixpt_sub_int(
+				avg_time_slots_per_mtp,
+				x),
+			26));
+
+	REG_SET_2(DP_MSE_RATE_CNTL, 0,
+		DP_MSE_RATE_X, x,
+		DP_MSE_RATE_Y, y);
+
+	/* wait for update to be completed on the link */
+	/* i.e. DP_MSE_RATE_UPDATE_PENDING field (read only) */
+	/* is reset to 0 (not pending) */
+	REG_WAIT(DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING,
+			0,
+			10, DP_MST_UPDATE_MAX_RETRY);
+}
+
+static void enc1_stream_encoder_update_hdmi_info_packets(
+	struct stream_encoder *enc,
+	const struct encoder_info_frame *info_frame)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+	/* for bring up, disable dp double  TODO */
+	REG_UPDATE(HDMI_DB_CONTROL, HDMI_DB_DISABLE, 1);
+
+	enc1_update_hdmi_info_packet(enc1, 0, &info_frame->avi);
+	enc1_update_hdmi_info_packet(enc1, 1, &info_frame->vendor);
+	enc1_update_hdmi_info_packet(enc1, 2, &info_frame->gamut);
+	enc1_update_hdmi_info_packet(enc1, 3, &info_frame->spd);
+	enc1_update_hdmi_info_packet(enc1, 4, &info_frame->hdrsmd);
+}
+
+static void enc1_stream_encoder_stop_hdmi_info_packets(
+	struct stream_encoder *enc)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+	/* stop generic packets 0 & 1 on HDMI */
+	REG_SET_6(HDMI_GENERIC_PACKET_CONTROL0, 0,
+		HDMI_GENERIC1_CONT, 0,
+		HDMI_GENERIC1_LINE, 0,
+		HDMI_GENERIC1_SEND, 0,
+		HDMI_GENERIC0_CONT, 0,
+		HDMI_GENERIC0_LINE, 0,
+		HDMI_GENERIC0_SEND, 0);
+
+	/* stop generic packets 2 & 3 on HDMI */
+	REG_SET_6(HDMI_GENERIC_PACKET_CONTROL1, 0,
+		HDMI_GENERIC0_CONT, 0,
+		HDMI_GENERIC0_LINE, 0,
+		HDMI_GENERIC0_SEND, 0,
+		HDMI_GENERIC1_CONT, 0,
+		HDMI_GENERIC1_LINE, 0,
+		HDMI_GENERIC1_SEND, 0);
+
+	/* stop generic packets 2 & 3 on HDMI */
+	REG_SET_6(HDMI_GENERIC_PACKET_CONTROL2, 0,
+		HDMI_GENERIC0_CONT, 0,
+		HDMI_GENERIC0_LINE, 0,
+		HDMI_GENERIC0_SEND, 0,
+		HDMI_GENERIC1_CONT, 0,
+		HDMI_GENERIC1_LINE, 0,
+		HDMI_GENERIC1_SEND, 0);
+
+	REG_SET_6(HDMI_GENERIC_PACKET_CONTROL3, 0,
+		HDMI_GENERIC0_CONT, 0,
+		HDMI_GENERIC0_LINE, 0,
+		HDMI_GENERIC0_SEND, 0,
+		HDMI_GENERIC1_CONT, 0,
+		HDMI_GENERIC1_LINE, 0,
+		HDMI_GENERIC1_SEND, 0);
+}
+
+void enc1_stream_encoder_update_dp_info_packets(
+	struct stream_encoder *enc,
+	const struct encoder_info_frame *info_frame)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+	uint32_t value = 0;
+
+	if (info_frame->vsc.valid)
+		enc1_update_generic_info_packet(
+					enc1,
+					0,  /* packetIndex */
+					&info_frame->vsc);
+
+	if (info_frame->spd.valid)
+		enc1_update_generic_info_packet(
+				enc1,
+				2,  /* packetIndex */
+				&info_frame->spd);
+
+	if (info_frame->hdrsmd.valid)
+		enc1_update_generic_info_packet(
+				enc1,
+				3,  /* packetIndex */
+				&info_frame->hdrsmd);
+
+	/* enable/disable transmission of packet(s).
+	 * If enabled, packet transmission begins on the next frame
+	 */
+	REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, info_frame->vsc.valid);
+	REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, info_frame->spd.valid);
+	REG_UPDATE(DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, info_frame->hdrsmd.valid);
+
+
+	/* This bit is the master enable bit.
+	 * When enabling secondary stream engine,
+	 * this master bit must also be set.
+	 * This register shared with audio info frame.
+	 * Therefore we need to enable master bit
+	 * if at least on of the fields is not 0
+	 */
+	value = REG_READ(DP_SEC_CNTL);
+	if (value)
+		REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+}
+
+void enc1_stream_encoder_stop_dp_info_packets(
+	struct stream_encoder *enc)
+{
+	/* stop generic packets on DP */
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+	uint32_t value = 0;
+
+	REG_SET_10(DP_SEC_CNTL, 0,
+		DP_SEC_GSP0_ENABLE, 0,
+		DP_SEC_GSP1_ENABLE, 0,
+		DP_SEC_GSP2_ENABLE, 0,
+		DP_SEC_GSP3_ENABLE, 0,
+		DP_SEC_GSP4_ENABLE, 0,
+		DP_SEC_GSP5_ENABLE, 0,
+		DP_SEC_GSP6_ENABLE, 0,
+		DP_SEC_GSP7_ENABLE, 0,
+		DP_SEC_MPG_ENABLE, 0,
+		DP_SEC_STREAM_ENABLE, 0);
+
+	/* this register shared with audio info frame.
+	 * therefore we need to keep master enabled
+	 * if at least one of the fields is not 0 */
+	value = REG_READ(DP_SEC_CNTL);
+	if (value)
+		REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+
+}
+
+void enc1_stream_encoder_dp_blank(
+	struct stream_encoder *enc)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+	uint32_t retries = 0;
+	uint32_t  reg1 = 0;
+	uint32_t max_retries = DP_BLANK_MAX_RETRY * 10;
+
+	/* Note: For CZ, we are changing driver default to disable
+	 * stream deferred to next VBLANK. If results are positive, we
+	 * will make the same change to all DCE versions. There are a
+	 * handful of panels that cannot handle disable stream at
+	 * HBLANK and will result in a white line flash across the
+	 * screen on stream disable.
+	 */
+	REG_GET(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, &reg1);
+	if ((reg1 & 0x1) == 0)
+		/*stream not enabled*/
+		return;
+	/* Specify the video stream disable point
+	 * (2 = start of the next vertical blank)
+	 */
+	REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, 2);
+	/* Larger delay to wait until VBLANK - use max retry of
+	 * 10us*3000=30ms. This covers 16.6ms of typical 60 Hz mode +
+	 * a little more because we may not trust delay accuracy.
+	 */
+	max_retries = DP_BLANK_MAX_RETRY * 150;
+
+	/* disable DP stream */
+	REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 0);
+
+	/* the encoder stops sending the video stream
+	 * at the start of the vertical blanking.
+	 * Poll for DP_VID_STREAM_STATUS == 0
+	 */
+
+	REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS,
+			0,
+			10, max_retries);
+
+	ASSERT(retries <= max_retries);
+
+	/* Tell the DP encoder to ignore timing from CRTC, must be done after
+	 * the polling. If we set DP_STEER_FIFO_RESET before DP stream blank is
+	 * complete, stream status will be stuck in video stream enabled state,
+	 * i.e. DP_VID_STREAM_STATUS stuck at 1.
+	 */
+
+	REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, true);
+}
+
+/* output video stream to link encoder */
+void enc1_stream_encoder_dp_unblank(
+	struct stream_encoder *enc,
+	const struct encoder_unblank_param *param)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+	if (param->link_settings.link_rate != LINK_RATE_UNKNOWN) {
+		uint32_t n_vid = 0x8000;
+		uint32_t m_vid;
+
+		/* M / N = Fstream / Flink
+		 * m_vid / n_vid = pixel rate / link rate
+		 */
+
+		uint64_t m_vid_l = n_vid;
+
+		m_vid_l *= param->pixel_clk_khz;
+		m_vid_l = div_u64(m_vid_l,
+			param->link_settings.link_rate
+				* LINK_RATE_REF_FREQ_IN_KHZ);
+
+		m_vid = (uint32_t) m_vid_l;
+
+		/* enable auto measurement */
+
+		REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 0);
+
+		/* auto measurement need 1 full 0x8000 symbol cycle to kick in,
+		 * therefore program initial value for Mvid and Nvid
+		 */
+
+		REG_UPDATE(DP_VID_N, DP_VID_N, n_vid);
+
+		REG_UPDATE(DP_VID_M, DP_VID_M, m_vid);
+
+		REG_UPDATE(DP_VID_TIMING, DP_VID_M_N_GEN_EN, 1);
+	}
+
+	/* set DIG_START to 0x1 to resync FIFO */
+
+	REG_UPDATE(DIG_FE_CNTL, DIG_START, 1);
+
+	/* switch DP encoder to CRTC data */
+
+	REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0);
+
+	/* wait 100us for DIG/DP logic to prime
+	 * (i.e. a few video lines)
+	 */
+	udelay(100);
+
+	/* the hardware would start sending video at the start of the next DP
+	 * frame (i.e. rising edge of the vblank).
+	 * NOTE: We used to program DP_VID_STREAM_DIS_DEFER = 2 here, but this
+	 * register has no effect on enable transition! HW always guarantees
+	 * VID_STREAM enable at start of next frame, and this is not
+	 * programmable
+	 */
+
+	REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, true);
+}
+
+void enc1_stream_encoder_set_avmute(
+	struct stream_encoder *enc,
+	bool enable)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+	unsigned int value = enable ? 1 : 0;
+
+	REG_UPDATE(HDMI_GC, HDMI_GC_AVMUTE, value);
+}
+
+
+#define DP_SEC_AUD_N__DP_SEC_AUD_N__DEFAULT 0x8000
+#define DP_SEC_TIMESTAMP__DP_SEC_TIMESTAMP_MODE__AUTO_CALC 1
+
+#include "include/audio_types.h"
+
+/**
+* speakersToChannels
+*
+* @brief
+*  translate speakers to channels
+*
+*  FL  - Front Left
+*  FR  - Front Right
+*  RL  - Rear Left
+*  RR  - Rear Right
+*  RC  - Rear Center
+*  FC  - Front Center
+*  FLC - Front Left Center
+*  FRC - Front Right Center
+*  RLC - Rear Left Center
+*  RRC - Rear Right Center
+*  LFE - Low Freq Effect
+*
+*               FC
+*          FLC      FRC
+*    FL                    FR
+*
+*                    LFE
+*              ()
+*
+*
+*    RL                    RR
+*          RLC      RRC
+*               RC
+*
+*             ch  8   7   6   5   4   3   2   1
+* 0b00000011      -   -   -   -   -   -   FR  FL
+* 0b00000111      -   -   -   -   -   LFE FR  FL
+* 0b00001011      -   -   -   -   FC  -   FR  FL
+* 0b00001111      -   -   -   -   FC  LFE FR  FL
+* 0b00010011      -   -   -   RC  -   -   FR  FL
+* 0b00010111      -   -   -   RC  -   LFE FR  FL
+* 0b00011011      -   -   -   RC  FC  -   FR  FL
+* 0b00011111      -   -   -   RC  FC  LFE FR  FL
+* 0b00110011      -   -   RR  RL  -   -   FR  FL
+* 0b00110111      -   -   RR  RL  -   LFE FR  FL
+* 0b00111011      -   -   RR  RL  FC  -   FR  FL
+* 0b00111111      -   -   RR  RL  FC  LFE FR  FL
+* 0b01110011      -   RC  RR  RL  -   -   FR  FL
+* 0b01110111      -   RC  RR  RL  -   LFE FR  FL
+* 0b01111011      -   RC  RR  RL  FC  -   FR  FL
+* 0b01111111      -   RC  RR  RL  FC  LFE FR  FL
+* 0b11110011      RRC RLC RR  RL  -   -   FR  FL
+* 0b11110111      RRC RLC RR  RL  -   LFE FR  FL
+* 0b11111011      RRC RLC RR  RL  FC  -   FR  FL
+* 0b11111111      RRC RLC RR  RL  FC  LFE FR  FL
+* 0b11000011      FRC FLC -   -   -   -   FR  FL
+* 0b11000111      FRC FLC -   -   -   LFE FR  FL
+* 0b11001011      FRC FLC -   -   FC  -   FR  FL
+* 0b11001111      FRC FLC -   -   FC  LFE FR  FL
+* 0b11010011      FRC FLC -   RC  -   -   FR  FL
+* 0b11010111      FRC FLC -   RC  -   LFE FR  FL
+* 0b11011011      FRC FLC -   RC  FC  -   FR  FL
+* 0b11011111      FRC FLC -   RC  FC  LFE FR  FL
+* 0b11110011      FRC FLC RR  RL  -   -   FR  FL
+* 0b11110111      FRC FLC RR  RL  -   LFE FR  FL
+* 0b11111011      FRC FLC RR  RL  FC  -   FR  FL
+* 0b11111111      FRC FLC RR  RL  FC  LFE FR  FL
+*
+* @param
+*  speakers - speaker information as it comes from CEA audio block
+*/
+/* translate speakers to channels */
+
+union audio_cea_channels {
+	uint8_t all;
+	struct audio_cea_channels_bits {
+		uint32_t FL:1;
+		uint32_t FR:1;
+		uint32_t LFE:1;
+		uint32_t FC:1;
+		uint32_t RL_RC:1;
+		uint32_t RR:1;
+		uint32_t RC_RLC_FLC:1;
+		uint32_t RRC_FRC:1;
+	} channels;
+};
+
+struct audio_clock_info {
+	/* pixel clock frequency*/
+	uint32_t pixel_clock_in_10khz;
+	/* N - 32KHz audio */
+	uint32_t n_32khz;
+	/* CTS - 32KHz audio*/
+	uint32_t cts_32khz;
+	uint32_t n_44khz;
+	uint32_t cts_44khz;
+	uint32_t n_48khz;
+	uint32_t cts_48khz;
+};
+
+/* 25.2MHz/1.001*/
+/* 25.2MHz/1.001*/
+/* 25.2MHz*/
+/* 27MHz */
+/* 27MHz*1.001*/
+/* 27MHz*1.001*/
+/* 54MHz*/
+/* 54MHz*1.001*/
+/* 74.25MHz/1.001*/
+/* 74.25MHz*/
+/* 148.5MHz/1.001*/
+/* 148.5MHz*/
+
+static const struct audio_clock_info audio_clock_info_table[16] = {
+	{2517, 4576, 28125, 7007, 31250, 6864, 28125},
+	{2518, 4576, 28125, 7007, 31250, 6864, 28125},
+	{2520, 4096, 25200, 6272, 28000, 6144, 25200},
+	{2700, 4096, 27000, 6272, 30000, 6144, 27000},
+	{2702, 4096, 27027, 6272, 30030, 6144, 27027},
+	{2703, 4096, 27027, 6272, 30030, 6144, 27027},
+	{5400, 4096, 54000, 6272, 60000, 6144, 54000},
+	{5405, 4096, 54054, 6272, 60060, 6144, 54054},
+	{7417, 11648, 210937, 17836, 234375, 11648, 140625},
+	{7425, 4096, 74250, 6272, 82500, 6144, 74250},
+	{14835, 11648, 421875, 8918, 234375, 5824, 140625},
+	{14850, 4096, 148500, 6272, 165000, 6144, 148500},
+	{29670, 5824, 421875, 4459, 234375, 5824, 281250},
+	{29700, 3072, 222750, 4704, 247500, 5120, 247500},
+	{59340, 5824, 843750, 8918, 937500, 5824, 562500},
+	{59400, 3072, 445500, 9408, 990000, 6144, 594000}
+};
+
+static const struct audio_clock_info audio_clock_info_table_36bpc[14] = {
+	{2517,  9152,  84375,  7007,  48875,  9152,  56250},
+	{2518,  9152,  84375,  7007,  48875,  9152,  56250},
+	{2520,  4096,  37800,  6272,  42000,  6144,  37800},
+	{2700,  4096,  40500,  6272,  45000,  6144,  40500},
+	{2702,  8192,  81081,  6272,  45045,  8192,  54054},
+	{2703,  8192,  81081,  6272,  45045,  8192,  54054},
+	{5400,  4096,  81000,  6272,  90000,  6144,  81000},
+	{5405,  4096,  81081,  6272,  90090,  6144,  81081},
+	{7417, 11648, 316406, 17836, 351562, 11648, 210937},
+	{7425, 4096, 111375,  6272, 123750,  6144, 111375},
+	{14835, 11648, 632812, 17836, 703125, 11648, 421875},
+	{14850, 4096, 222750,  6272, 247500,  6144, 222750},
+	{29670, 5824, 632812,  8918, 703125,  5824, 421875},
+	{29700, 4096, 445500,  4704, 371250,  5120, 371250}
+};
+
+static const struct audio_clock_info audio_clock_info_table_48bpc[14] = {
+	{2517,  4576,  56250,  7007,  62500,  6864,  56250},
+	{2518,  4576,  56250,  7007,  62500,  6864,  56250},
+	{2520,  4096,  50400,  6272,  56000,  6144,  50400},
+	{2700,  4096,  54000,  6272,  60000,  6144,  54000},
+	{2702,  4096,  54054,  6267,  60060,  8192,  54054},
+	{2703,  4096,  54054,  6272,  60060,  8192,  54054},
+	{5400,  4096, 108000,  6272, 120000,  6144, 108000},
+	{5405,  4096, 108108,  6272, 120120,  6144, 108108},
+	{7417, 11648, 421875, 17836, 468750, 11648, 281250},
+	{7425,  4096, 148500,  6272, 165000,  6144, 148500},
+	{14835, 11648, 843750,  8918, 468750, 11648, 281250},
+	{14850, 4096, 297000,  6272, 330000,  6144, 297000},
+	{29670, 5824, 843750,  4459, 468750,  5824, 562500},
+	{29700, 3072, 445500,  4704, 495000,  5120, 495000}
+
+
+};
+
+static union audio_cea_channels speakers_to_channels(
+	struct audio_speaker_flags speaker_flags)
+{
+	union audio_cea_channels cea_channels = {0};
+
+	/* these are one to one */
+	cea_channels.channels.FL = speaker_flags.FL_FR;
+	cea_channels.channels.FR = speaker_flags.FL_FR;
+	cea_channels.channels.LFE = speaker_flags.LFE;
+	cea_channels.channels.FC = speaker_flags.FC;
+
+	/* if Rear Left and Right exist move RC speaker to channel 7
+	 * otherwise to channel 5
+	 */
+	if (speaker_flags.RL_RR) {
+		cea_channels.channels.RL_RC = speaker_flags.RL_RR;
+		cea_channels.channels.RR = speaker_flags.RL_RR;
+		cea_channels.channels.RC_RLC_FLC = speaker_flags.RC;
+	} else {
+		cea_channels.channels.RL_RC = speaker_flags.RC;
+	}
+
+	/* FRONT Left Right Center and REAR Left Right Center are exclusive */
+	if (speaker_flags.FLC_FRC) {
+		cea_channels.channels.RC_RLC_FLC = speaker_flags.FLC_FRC;
+		cea_channels.channels.RRC_FRC = speaker_flags.FLC_FRC;
+	} else {
+		cea_channels.channels.RC_RLC_FLC = speaker_flags.RLC_RRC;
+		cea_channels.channels.RRC_FRC = speaker_flags.RLC_RRC;
+	}
+
+	return cea_channels;
+}
+
+static uint32_t calc_max_audio_packets_per_line(
+	const struct audio_crtc_info *crtc_info)
+{
+	uint32_t max_packets_per_line;
+
+	max_packets_per_line =
+		crtc_info->h_total - crtc_info->h_active;
+
+	if (crtc_info->pixel_repetition)
+		max_packets_per_line *= crtc_info->pixel_repetition;
+
+	/* for other hdmi features */
+	max_packets_per_line -= 58;
+	/* for Control Period */
+	max_packets_per_line -= 16;
+	/* Number of Audio Packets per Line */
+	max_packets_per_line /= 32;
+
+	return max_packets_per_line;
+}
+
+static void get_audio_clock_info(
+	enum dc_color_depth color_depth,
+	uint32_t crtc_pixel_clock_in_khz,
+	uint32_t actual_pixel_clock_in_khz,
+	struct audio_clock_info *audio_clock_info)
+{
+	const struct audio_clock_info *clock_info;
+	uint32_t index;
+	uint32_t crtc_pixel_clock_in_10khz = crtc_pixel_clock_in_khz / 10;
+	uint32_t audio_array_size;
+
+	switch (color_depth) {
+	case COLOR_DEPTH_161616:
+		clock_info = audio_clock_info_table_48bpc;
+		audio_array_size = ARRAY_SIZE(
+				audio_clock_info_table_48bpc);
+		break;
+	case COLOR_DEPTH_121212:
+		clock_info = audio_clock_info_table_36bpc;
+		audio_array_size = ARRAY_SIZE(
+				audio_clock_info_table_36bpc);
+		break;
+	default:
+		clock_info = audio_clock_info_table;
+		audio_array_size = ARRAY_SIZE(
+				audio_clock_info_table);
+		break;
+	}
+
+	if (clock_info != NULL) {
+		/* search for exact pixel clock in table */
+		for (index = 0; index < audio_array_size; index++) {
+			if (clock_info[index].pixel_clock_in_10khz >
+				crtc_pixel_clock_in_10khz)
+				break;  /* not match */
+			else if (clock_info[index].pixel_clock_in_10khz ==
+					crtc_pixel_clock_in_10khz) {
+				/* match found */
+				*audio_clock_info = clock_info[index];
+				return;
+			}
+		}
+	}
+
+	/* not found */
+	if (actual_pixel_clock_in_khz == 0)
+		actual_pixel_clock_in_khz = crtc_pixel_clock_in_khz;
+
+	/* See HDMI spec  the table entry under
+	 *  pixel clock of "Other". */
+	audio_clock_info->pixel_clock_in_10khz =
+			actual_pixel_clock_in_khz / 10;
+	audio_clock_info->cts_32khz = actual_pixel_clock_in_khz;
+	audio_clock_info->cts_44khz = actual_pixel_clock_in_khz;
+	audio_clock_info->cts_48khz = actual_pixel_clock_in_khz;
+
+	audio_clock_info->n_32khz = 4096;
+	audio_clock_info->n_44khz = 6272;
+	audio_clock_info->n_48khz = 6144;
+}
+
+static void enc1_se_audio_setup(
+	struct stream_encoder *enc,
+	unsigned int az_inst,
+	struct audio_info *audio_info)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+	uint32_t speakers = 0;
+	uint32_t channels = 0;
+
+	ASSERT(audio_info);
+	if (audio_info == NULL)
+		/* This should not happen.it does so we don't get BSOD*/
+		return;
+
+	speakers = audio_info->flags.info.ALLSPEAKERS;
+	channels = speakers_to_channels(audio_info->flags.speaker_flags).all;
+
+	/* setup the audio stream source select (audio -> dig mapping) */
+	REG_SET(AFMT_AUDIO_SRC_CONTROL, 0, AFMT_AUDIO_SRC_SELECT, az_inst);
+
+	/* Channel allocation */
+	REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, channels);
+}
+
+static void enc1_se_setup_hdmi_audio(
+	struct stream_encoder *enc,
+	const struct audio_crtc_info *crtc_info)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+	struct audio_clock_info audio_clock_info = {0};
+	uint32_t max_packets_per_line;
+
+	/* For now still do calculation, although this field is ignored when
+	 * above HDMI_PACKET_GEN_VERSION set to 1
+	 */
+	max_packets_per_line = calc_max_audio_packets_per_line(crtc_info);
+
+	/* HDMI_AUDIO_PACKET_CONTROL */
+	REG_UPDATE_2(HDMI_AUDIO_PACKET_CONTROL,
+			HDMI_AUDIO_PACKETS_PER_LINE, max_packets_per_line,
+			HDMI_AUDIO_DELAY_EN, 1);
+
+	/* AFMT_AUDIO_PACKET_CONTROL */
+	REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
+
+	/* AFMT_AUDIO_PACKET_CONTROL2 */
+	REG_UPDATE_2(AFMT_AUDIO_PACKET_CONTROL2,
+			AFMT_AUDIO_LAYOUT_OVRD, 0,
+			AFMT_60958_OSF_OVRD, 0);
+
+	/* HDMI_ACR_PACKET_CONTROL */
+	REG_UPDATE_3(HDMI_ACR_PACKET_CONTROL,
+			HDMI_ACR_AUTO_SEND, 1,
+			HDMI_ACR_SOURCE, 0,
+			HDMI_ACR_AUDIO_PRIORITY, 0);
+
+	/* Program audio clock sample/regeneration parameters */
+	get_audio_clock_info(crtc_info->color_depth,
+			     crtc_info->requested_pixel_clock,
+			     crtc_info->calculated_pixel_clock,
+			     &audio_clock_info);
+	DC_LOG_HW_AUDIO(
+			"\n%s:Input::requested_pixel_clock = %d"	\
+			"calculated_pixel_clock = %d \n", __func__,	\
+			crtc_info->requested_pixel_clock,		\
+			crtc_info->calculated_pixel_clock);
+
+	/* HDMI_ACR_32_0__HDMI_ACR_CTS_32_MASK */
+	REG_UPDATE(HDMI_ACR_32_0, HDMI_ACR_CTS_32, audio_clock_info.cts_32khz);
+
+	/* HDMI_ACR_32_1__HDMI_ACR_N_32_MASK */
+	REG_UPDATE(HDMI_ACR_32_1, HDMI_ACR_N_32, audio_clock_info.n_32khz);
+
+	/* HDMI_ACR_44_0__HDMI_ACR_CTS_44_MASK */
+	REG_UPDATE(HDMI_ACR_44_0, HDMI_ACR_CTS_44, audio_clock_info.cts_44khz);
+
+	/* HDMI_ACR_44_1__HDMI_ACR_N_44_MASK */
+	REG_UPDATE(HDMI_ACR_44_1, HDMI_ACR_N_44, audio_clock_info.n_44khz);
+
+	/* HDMI_ACR_48_0__HDMI_ACR_CTS_48_MASK */
+	REG_UPDATE(HDMI_ACR_48_0, HDMI_ACR_CTS_48, audio_clock_info.cts_48khz);
+
+	/* HDMI_ACR_48_1__HDMI_ACR_N_48_MASK */
+	REG_UPDATE(HDMI_ACR_48_1, HDMI_ACR_N_48, audio_clock_info.n_48khz);
+
+	/* Video driver cannot know in advance which sample rate will
+	 * be used by HD Audio driver
+	 * HDMI_ACR_PACKET_CONTROL__HDMI_ACR_N_MULTIPLE field is
+	 * programmed below in interruppt callback
+	 */
+
+	/* AFMT_60958_0__AFMT_60958_CS_CHANNEL_NUMBER_L_MASK &
+	 * AFMT_60958_0__AFMT_60958_CS_CLOCK_ACCURACY_MASK
+	 */
+	REG_UPDATE_2(AFMT_60958_0,
+			AFMT_60958_CS_CHANNEL_NUMBER_L, 1,
+			AFMT_60958_CS_CLOCK_ACCURACY, 0);
+
+	/* AFMT_60958_1 AFMT_60958_CS_CHALNNEL_NUMBER_R */
+	REG_UPDATE(AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2);
+
+	/* AFMT_60958_2 now keep this settings until
+	 * Programming guide comes out
+	 */
+	REG_UPDATE_6(AFMT_60958_2,
+			AFMT_60958_CS_CHANNEL_NUMBER_2, 3,
+			AFMT_60958_CS_CHANNEL_NUMBER_3, 4,
+			AFMT_60958_CS_CHANNEL_NUMBER_4, 5,
+			AFMT_60958_CS_CHANNEL_NUMBER_5, 6,
+			AFMT_60958_CS_CHANNEL_NUMBER_6, 7,
+			AFMT_60958_CS_CHANNEL_NUMBER_7, 8);
+}
+
+static void enc1_se_setup_dp_audio(
+	struct stream_encoder *enc)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+	/* --- DP Audio packet configurations --- */
+
+	/* ATP Configuration */
+	REG_SET(DP_SEC_AUD_N, 0,
+			DP_SEC_AUD_N, DP_SEC_AUD_N__DP_SEC_AUD_N__DEFAULT);
+
+	/* Async/auto-calc timestamp mode */
+	REG_SET(DP_SEC_TIMESTAMP, 0, DP_SEC_TIMESTAMP_MODE,
+			DP_SEC_TIMESTAMP__DP_SEC_TIMESTAMP_MODE__AUTO_CALC);
+
+	/* --- The following are the registers
+	 *  copied from the SetupHDMI ---
+	 */
+
+	/* AFMT_AUDIO_PACKET_CONTROL */
+	REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1);
+
+	/* AFMT_AUDIO_PACKET_CONTROL2 */
+	/* Program the ATP and AIP next */
+	REG_UPDATE_2(AFMT_AUDIO_PACKET_CONTROL2,
+			AFMT_AUDIO_LAYOUT_OVRD, 0,
+			AFMT_60958_OSF_OVRD, 0);
+
+	/* AFMT_INFOFRAME_CONTROL0 */
+	REG_UPDATE(AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1);
+
+	/* AFMT_60958_0__AFMT_60958_CS_CLOCK_ACCURACY_MASK */
+	REG_UPDATE(AFMT_60958_0, AFMT_60958_CS_CLOCK_ACCURACY, 0);
+}
+
+static void enc1_se_enable_audio_clock(
+	struct stream_encoder *enc,
+	bool enable)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+	if (REG(AFMT_CNTL) == 0)
+		return;   /* DCE8/10 does not have this register */
+
+	REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, !!enable);
+
+	/* wait for AFMT clock to turn on,
+	 * expectation: this should complete in 1-2 reads
+	 *
+	 * REG_WAIT(AFMT_CNTL, AFMT_AUDIO_CLOCK_ON, !!enable, 1, 10);
+	 *
+	 * TODO: wait for clock_on does not work well. May need HW
+	 * program sequence. But audio seems work normally even without wait
+	 * for clock_on status change
+	 */
+}
+
+static void enc1_se_enable_dp_audio(
+	struct stream_encoder *enc)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+	/* Enable Audio packets */
+	REG_UPDATE(DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1);
+
+	/* Program the ATP and AIP next */
+	REG_UPDATE_2(DP_SEC_CNTL,
+			DP_SEC_ATP_ENABLE, 1,
+			DP_SEC_AIP_ENABLE, 1);
+
+	/* Program STREAM_ENABLE after all the other enables. */
+	REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+}
+
+static void enc1_se_disable_dp_audio(
+	struct stream_encoder *enc)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+	uint32_t value = 0;
+
+	/* Disable Audio packets */
+	REG_UPDATE_5(DP_SEC_CNTL,
+			DP_SEC_ASP_ENABLE, 0,
+			DP_SEC_ATP_ENABLE, 0,
+			DP_SEC_AIP_ENABLE, 0,
+			DP_SEC_ACM_ENABLE, 0,
+			DP_SEC_STREAM_ENABLE, 0);
+
+	/* This register shared with encoder info frame. Therefore we need to
+	 * keep master enabled if at least on of the fields is not 0
+	 */
+	value = REG_READ(DP_SEC_CNTL);
+	if (value != 0)
+		REG_UPDATE(DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1);
+
+}
+
+void enc1_se_audio_mute_control(
+	struct stream_encoder *enc,
+	bool mute)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+
+	REG_UPDATE(AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, !mute);
+}
+
+void enc1_se_dp_audio_setup(
+	struct stream_encoder *enc,
+	unsigned int az_inst,
+	struct audio_info *info)
+{
+	enc1_se_audio_setup(enc, az_inst, info);
+}
+
+void enc1_se_dp_audio_enable(
+	struct stream_encoder *enc)
+{
+	enc1_se_enable_audio_clock(enc, true);
+	enc1_se_setup_dp_audio(enc);
+	enc1_se_enable_dp_audio(enc);
+}
+
+void enc1_se_dp_audio_disable(
+	struct stream_encoder *enc)
+{
+	enc1_se_disable_dp_audio(enc);
+	enc1_se_enable_audio_clock(enc, false);
+}
+
+void enc1_se_hdmi_audio_setup(
+	struct stream_encoder *enc,
+	unsigned int az_inst,
+	struct audio_info *info,
+	struct audio_crtc_info *audio_crtc_info)
+{
+	enc1_se_enable_audio_clock(enc, true);
+	enc1_se_setup_hdmi_audio(enc, audio_crtc_info);
+	enc1_se_audio_setup(enc, az_inst, info);
+}
+
+void enc1_se_hdmi_audio_disable(
+	struct stream_encoder *enc)
+{
+	enc1_se_enable_audio_clock(enc, false);
+}
+
+
+void enc1_setup_stereo_sync(
+	struct stream_encoder *enc,
+	int tg_inst, bool enable)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+	REG_UPDATE(DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, tg_inst);
+	REG_UPDATE(DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, !enable);
+}
+
+
+static const struct stream_encoder_funcs dcn10_str_enc_funcs = {
+	.dp_set_stream_attribute =
+		enc1_stream_encoder_dp_set_stream_attribute,
+	.hdmi_set_stream_attribute =
+		enc1_stream_encoder_hdmi_set_stream_attribute,
+	.dvi_set_stream_attribute =
+		enc1_stream_encoder_dvi_set_stream_attribute,
+	.set_mst_bandwidth =
+		enc1_stream_encoder_set_mst_bandwidth,
+	.update_hdmi_info_packets =
+		enc1_stream_encoder_update_hdmi_info_packets,
+	.stop_hdmi_info_packets =
+		enc1_stream_encoder_stop_hdmi_info_packets,
+	.update_dp_info_packets =
+		enc1_stream_encoder_update_dp_info_packets,
+	.stop_dp_info_packets =
+		enc1_stream_encoder_stop_dp_info_packets,
+	.dp_blank =
+		enc1_stream_encoder_dp_blank,
+	.dp_unblank =
+		enc1_stream_encoder_dp_unblank,
+	.audio_mute_control = enc1_se_audio_mute_control,
+
+	.dp_audio_setup = enc1_se_dp_audio_setup,
+	.dp_audio_enable = enc1_se_dp_audio_enable,
+	.dp_audio_disable = enc1_se_dp_audio_disable,
+
+	.hdmi_audio_setup = enc1_se_hdmi_audio_setup,
+	.hdmi_audio_disable = enc1_se_hdmi_audio_disable,
+	.setup_stereo_sync  = enc1_setup_stereo_sync,
+	.set_avmute = enc1_stream_encoder_set_avmute,
+};
+
+void dcn10_stream_encoder_construct(
+	struct dcn10_stream_encoder *enc1,
+	struct dc_context *ctx,
+	struct dc_bios *bp,
+	enum engine_id eng_id,
+	const struct dcn10_stream_enc_registers *regs,
+	const struct dcn10_stream_encoder_shift *se_shift,
+	const struct dcn10_stream_encoder_mask *se_mask)
+{
+	enc1->base.funcs = &dcn10_str_enc_funcs;
+	enc1->base.ctx = ctx;
+	enc1->base.id = eng_id;
+	enc1->base.bp = bp;
+	enc1->regs = regs;
+	enc1->se_shift = se_shift;
+	enc1->se_mask = se_mask;
+}
+
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
new file mode 100644
index 0000000..6b3e4de
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h
@@ -0,0 +1,524 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_STREAM_ENCODER_DCN10_H__
+#define __DC_STREAM_ENCODER_DCN10_H__
+
+#include "stream_encoder.h"
+
+#define DCN10STRENC_FROM_STRENC(stream_encoder)\
+	container_of(stream_encoder, struct dcn10_stream_encoder, base)
+
+#define SE_COMMON_DCN_REG_LIST(id) \
+	SRI(AFMT_CNTL, DIG, id), \
+	SRI(AFMT_GENERIC_0, DIG, id), \
+	SRI(AFMT_GENERIC_1, DIG, id), \
+	SRI(AFMT_GENERIC_2, DIG, id), \
+	SRI(AFMT_GENERIC_3, DIG, id), \
+	SRI(AFMT_GENERIC_4, DIG, id), \
+	SRI(AFMT_GENERIC_5, DIG, id), \
+	SRI(AFMT_GENERIC_6, DIG, id), \
+	SRI(AFMT_GENERIC_7, DIG, id), \
+	SRI(AFMT_GENERIC_HDR, DIG, id), \
+	SRI(AFMT_INFOFRAME_CONTROL0, DIG, id), \
+	SRI(AFMT_VBI_PACKET_CONTROL, DIG, id), \
+	SRI(AFMT_VBI_PACKET_CONTROL1, DIG, id), \
+	SRI(AFMT_AUDIO_PACKET_CONTROL, DIG, id), \
+	SRI(AFMT_AUDIO_PACKET_CONTROL2, DIG, id), \
+	SRI(AFMT_AUDIO_SRC_CONTROL, DIG, id), \
+	SRI(AFMT_60958_0, DIG, id), \
+	SRI(AFMT_60958_1, DIG, id), \
+	SRI(AFMT_60958_2, DIG, id), \
+	SRI(DIG_FE_CNTL, DIG, id), \
+	SRI(HDMI_CONTROL, DIG, id), \
+	SRI(HDMI_DB_CONTROL, DIG, id), \
+	SRI(HDMI_GC, DIG, id), \
+	SRI(HDMI_GENERIC_PACKET_CONTROL0, DIG, id), \
+	SRI(HDMI_GENERIC_PACKET_CONTROL1, DIG, id), \
+	SRI(HDMI_GENERIC_PACKET_CONTROL2, DIG, id), \
+	SRI(HDMI_GENERIC_PACKET_CONTROL3, DIG, id), \
+	SRI(HDMI_INFOFRAME_CONTROL0, DIG, id), \
+	SRI(HDMI_INFOFRAME_CONTROL1, DIG, id), \
+	SRI(HDMI_VBI_PACKET_CONTROL, DIG, id), \
+	SRI(HDMI_AUDIO_PACKET_CONTROL, DIG, id),\
+	SRI(HDMI_ACR_PACKET_CONTROL, DIG, id),\
+	SRI(HDMI_ACR_32_0, DIG, id),\
+	SRI(HDMI_ACR_32_1, DIG, id),\
+	SRI(HDMI_ACR_44_0, DIG, id),\
+	SRI(HDMI_ACR_44_1, DIG, id),\
+	SRI(HDMI_ACR_48_0, DIG, id),\
+	SRI(HDMI_ACR_48_1, DIG, id),\
+	SRI(DP_DB_CNTL, DP, id), \
+	SRI(DP_MSA_MISC, DP, id), \
+	SRI(DP_MSA_COLORIMETRY, DP, id), \
+	SRI(DP_MSA_TIMING_PARAM1, DP, id), \
+	SRI(DP_MSA_TIMING_PARAM2, DP, id), \
+	SRI(DP_MSA_TIMING_PARAM3, DP, id), \
+	SRI(DP_MSA_TIMING_PARAM4, DP, id), \
+	SRI(DP_MSE_RATE_CNTL, DP, id), \
+	SRI(DP_MSE_RATE_UPDATE, DP, id), \
+	SRI(DP_PIXEL_FORMAT, DP, id), \
+	SRI(DP_SEC_CNTL, DP, id), \
+	SRI(DP_STEER_FIFO, DP, id), \
+	SRI(DP_VID_M, DP, id), \
+	SRI(DP_VID_N, DP, id), \
+	SRI(DP_VID_STREAM_CNTL, DP, id), \
+	SRI(DP_VID_TIMING, DP, id), \
+	SRI(DP_SEC_AUD_N, DP, id), \
+	SRI(DP_SEC_TIMESTAMP, DP, id)
+
+#define SE_DCN_REG_LIST(id)\
+	SE_COMMON_DCN_REG_LIST(id)
+
+
+struct dcn10_stream_enc_registers {
+	uint32_t AFMT_CNTL;
+	uint32_t AFMT_AVI_INFO0;
+	uint32_t AFMT_AVI_INFO1;
+	uint32_t AFMT_AVI_INFO2;
+	uint32_t AFMT_AVI_INFO3;
+	uint32_t AFMT_GENERIC_0;
+	uint32_t AFMT_GENERIC_1;
+	uint32_t AFMT_GENERIC_2;
+	uint32_t AFMT_GENERIC_3;
+	uint32_t AFMT_GENERIC_4;
+	uint32_t AFMT_GENERIC_5;
+	uint32_t AFMT_GENERIC_6;
+	uint32_t AFMT_GENERIC_7;
+	uint32_t AFMT_GENERIC_HDR;
+	uint32_t AFMT_INFOFRAME_CONTROL0;
+	uint32_t AFMT_VBI_PACKET_CONTROL;
+	uint32_t AFMT_VBI_PACKET_CONTROL1;
+	uint32_t AFMT_AUDIO_PACKET_CONTROL;
+	uint32_t AFMT_AUDIO_PACKET_CONTROL2;
+	uint32_t AFMT_AUDIO_SRC_CONTROL;
+	uint32_t AFMT_60958_0;
+	uint32_t AFMT_60958_1;
+	uint32_t AFMT_60958_2;
+	uint32_t DIG_FE_CNTL;
+	uint32_t DP_MSE_RATE_CNTL;
+	uint32_t DP_MSE_RATE_UPDATE;
+	uint32_t DP_PIXEL_FORMAT;
+	uint32_t DP_SEC_CNTL;
+	uint32_t DP_STEER_FIFO;
+	uint32_t DP_VID_M;
+	uint32_t DP_VID_N;
+	uint32_t DP_VID_STREAM_CNTL;
+	uint32_t DP_VID_TIMING;
+	uint32_t DP_SEC_AUD_N;
+	uint32_t DP_SEC_TIMESTAMP;
+	uint32_t HDMI_CONTROL;
+	uint32_t HDMI_GC;
+	uint32_t HDMI_GENERIC_PACKET_CONTROL0;
+	uint32_t HDMI_GENERIC_PACKET_CONTROL1;
+	uint32_t HDMI_GENERIC_PACKET_CONTROL2;
+	uint32_t HDMI_GENERIC_PACKET_CONTROL3;
+	uint32_t HDMI_GENERIC_PACKET_CONTROL4;
+	uint32_t HDMI_GENERIC_PACKET_CONTROL5;
+	uint32_t HDMI_INFOFRAME_CONTROL0;
+	uint32_t HDMI_INFOFRAME_CONTROL1;
+	uint32_t HDMI_VBI_PACKET_CONTROL;
+	uint32_t HDMI_AUDIO_PACKET_CONTROL;
+	uint32_t HDMI_ACR_PACKET_CONTROL;
+	uint32_t HDMI_ACR_32_0;
+	uint32_t HDMI_ACR_32_1;
+	uint32_t HDMI_ACR_44_0;
+	uint32_t HDMI_ACR_44_1;
+	uint32_t HDMI_ACR_48_0;
+	uint32_t HDMI_ACR_48_1;
+	uint32_t DP_DB_CNTL;
+	uint32_t DP_MSA_MISC;
+	uint32_t DP_MSA_COLORIMETRY;
+	uint32_t DP_MSA_TIMING_PARAM1;
+	uint32_t DP_MSA_TIMING_PARAM2;
+	uint32_t DP_MSA_TIMING_PARAM3;
+	uint32_t DP_MSA_TIMING_PARAM4;
+	uint32_t HDMI_DB_CONTROL;
+};
+
+
+#define SE_SF(reg_name, field_name, post_fix)\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
+#define SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh)\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_INDEX, mask_sh),\
+	SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB0, mask_sh),\
+	SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB1, mask_sh),\
+	SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB2, mask_sh),\
+	SE_SF(DIG0_AFMT_GENERIC_HDR, AFMT_GENERIC_HB3, mask_sh),\
+	SE_SF(DP0_DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, mask_sh),\
+	SE_SF(DP0_DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, mask_sh),\
+	SE_SF(DIG0_HDMI_CONTROL, HDMI_PACKET_GEN_VERSION, mask_sh),\
+	SE_SF(DIG0_HDMI_CONTROL, HDMI_KEEPOUT_MODE, mask_sh),\
+	SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_ENABLE, mask_sh),\
+	SE_SF(DIG0_HDMI_CONTROL, HDMI_DEEP_COLOR_DEPTH, mask_sh),\
+	SE_SF(DIG0_HDMI_CONTROL, HDMI_DATA_SCRAMBLE_EN, mask_sh),\
+	SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, mask_sh),\
+	SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, mask_sh),\
+	SE_SF(DIG0_HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, mask_sh),\
+	SE_SF(DIG0_HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, mask_sh),\
+	SE_SF(DIG0_AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, mask_sh),\
+	SE_SF(DIG0_HDMI_INFOFRAME_CONTROL1, HDMI_AUDIO_INFO_LINE, mask_sh),\
+	SE_SF(DIG0_HDMI_GC, HDMI_GC_AVMUTE, mask_sh),\
+	SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_X, mask_sh),\
+	SE_SF(DP0_DP_MSE_RATE_CNTL, DP_MSE_RATE_Y, mask_sh),\
+	SE_SF(DP0_DP_MSE_RATE_UPDATE, DP_MSE_RATE_UPDATE_PENDING, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP0_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP1_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP2_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP3_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_MPG_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, mask_sh),\
+	SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, mask_sh),\
+	SE_SF(DP0_DP_STEER_FIFO, DP_STEER_FIFO_RESET, mask_sh),\
+	SE_SF(DP0_DP_VID_TIMING, DP_VID_M_N_GEN_EN, mask_sh),\
+	SE_SF(DP0_DP_VID_N, DP_VID_N, mask_sh),\
+	SE_SF(DP0_DP_VID_M, DP_VID_M, mask_sh),\
+	SE_SF(DIG0_DIG_FE_CNTL, DIG_START, mask_sh),\
+	SE_SF(DIG0_AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, mask_sh),\
+	SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, mask_sh),\
+	SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, mask_sh),\
+	SE_SF(DIG0_HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, mask_sh),\
+	SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, mask_sh),\
+	SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_LAYOUT_OVRD, mask_sh),\
+	SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL2, AFMT_60958_OSF_OVRD, mask_sh),\
+	SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, mask_sh),\
+	SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, mask_sh),\
+	SE_SF(DIG0_HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUDIO_PRIORITY, mask_sh),\
+	SE_SF(DIG0_HDMI_ACR_32_0, HDMI_ACR_CTS_32, mask_sh),\
+	SE_SF(DIG0_HDMI_ACR_32_1, HDMI_ACR_N_32, mask_sh),\
+	SE_SF(DIG0_HDMI_ACR_44_0, HDMI_ACR_CTS_44, mask_sh),\
+	SE_SF(DIG0_HDMI_ACR_44_1, HDMI_ACR_N_44, mask_sh),\
+	SE_SF(DIG0_HDMI_ACR_48_0, HDMI_ACR_CTS_48, mask_sh),\
+	SE_SF(DIG0_HDMI_ACR_48_1, HDMI_ACR_N_48, mask_sh),\
+	SE_SF(DIG0_AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, mask_sh),\
+	SE_SF(DIG0_AFMT_60958_0, AFMT_60958_CS_CLOCK_ACCURACY, mask_sh),\
+	SE_SF(DIG0_AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, mask_sh),\
+	SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, mask_sh),\
+	SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, mask_sh),\
+	SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, mask_sh),\
+	SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, mask_sh),\
+	SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, mask_sh),\
+	SE_SF(DIG0_AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, mask_sh),\
+	SE_SF(DP0_DP_SEC_AUD_N, DP_SEC_AUD_N, mask_sh),\
+	SE_SF(DP0_DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ASP_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ATP_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_AIP_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_ACM_ENABLE, mask_sh),\
+	SE_SF(DIG0_AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, mask_sh),\
+	SE_SF(DIG0_AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, mask_sh),\
+	SE_SF(DIG0_HDMI_CONTROL, HDMI_CLOCK_CHANNEL_RATE, mask_sh),\
+	SE_SF(DIG0_DIG_FE_CNTL, TMDS_PIXEL_ENCODING, mask_sh),\
+	SE_SF(DIG0_DIG_FE_CNTL, TMDS_COLOR_FORMAT, mask_sh),\
+	SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_SELECT, mask_sh),\
+	SE_SF(DIG0_DIG_FE_CNTL, DIG_STEREOSYNC_GATE_EN, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_LOCK_STATUS, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL, AFMT_GENERIC_CONFLICT_CLR, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC0_FRAME_UPDATE_PENDING, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC1_FRAME_UPDATE_PENDING, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE_PENDING, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE_PENDING, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE_PENDING, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE_PENDING, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE_PENDING, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE_PENDING, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC0_FRAME_UPDATE, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC1_FRAME_UPDATE, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC2_FRAME_UPDATE, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC3_FRAME_UPDATE, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC4_FRAME_UPDATE, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC5_FRAME_UPDATE, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC6_FRAME_UPDATE, mask_sh),\
+	SE_SF(DIG0_AFMT_VBI_PACKET_CONTROL1, AFMT_GENERIC7_FRAME_UPDATE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP4_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP5_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP6_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_SEC_CNTL, DP_SEC_GSP7_ENABLE, mask_sh),\
+	SE_SF(DP0_DP_DB_CNTL, DP_DB_DISABLE, mask_sh),\
+	SE_SF(DP0_DP_MSA_COLORIMETRY, DP_MSA_MISC0, mask_sh),\
+	SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_HTOTAL, mask_sh),\
+	SE_SF(DP0_DP_MSA_TIMING_PARAM1, DP_MSA_VTOTAL, mask_sh),\
+	SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_HSTART, mask_sh),\
+	SE_SF(DP0_DP_MSA_TIMING_PARAM2, DP_MSA_VSTART, mask_sh),\
+	SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCWIDTH, mask_sh),\
+	SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_HSYNCPOLARITY, mask_sh),\
+	SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCWIDTH, mask_sh),\
+	SE_SF(DP0_DP_MSA_TIMING_PARAM3, DP_MSA_VSYNCPOLARITY, mask_sh),\
+	SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_HWIDTH, mask_sh),\
+	SE_SF(DP0_DP_MSA_TIMING_PARAM4, DP_MSA_VHEIGHT, mask_sh),\
+	SE_SF(DIG0_HDMI_DB_CONTROL, HDMI_DB_DISABLE, mask_sh),\
+	SE_SF(DP0_DP_VID_TIMING, DP_VID_N_MUL, mask_sh)
+
+#define SE_COMMON_MASK_SH_LIST_SOC(mask_sh)\
+	SE_COMMON_MASK_SH_LIST_SOC_BASE(mask_sh)
+
+#define SE_COMMON_MASK_SH_LIST_DCN10(mask_sh)\
+	SE_COMMON_MASK_SH_LIST_SOC(mask_sh),\
+	SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_CONT, mask_sh),\
+	SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_SEND, mask_sh),\
+	SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC0_LINE, mask_sh),\
+	SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_CONT, mask_sh),\
+	SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_SEND, mask_sh),\
+	SE_SF(DIG0_HDMI_GENERIC_PACKET_CONTROL0, HDMI_GENERIC1_LINE, mask_sh)
+
+
+#define SE_REG_FIELD_LIST_DCN1_0(type) \
+	type AFMT_GENERIC_INDEX;\
+	type AFMT_GENERIC_HB0;\
+	type AFMT_GENERIC_HB1;\
+	type AFMT_GENERIC_HB2;\
+	type AFMT_GENERIC_HB3;\
+	type AFMT_GENERIC_LOCK_STATUS;\
+	type AFMT_GENERIC_CONFLICT;\
+	type AFMT_GENERIC_CONFLICT_CLR;\
+	type AFMT_GENERIC0_FRAME_UPDATE_PENDING;\
+	type AFMT_GENERIC1_FRAME_UPDATE_PENDING;\
+	type AFMT_GENERIC2_FRAME_UPDATE_PENDING;\
+	type AFMT_GENERIC3_FRAME_UPDATE_PENDING;\
+	type AFMT_GENERIC4_FRAME_UPDATE_PENDING;\
+	type AFMT_GENERIC5_FRAME_UPDATE_PENDING;\
+	type AFMT_GENERIC6_FRAME_UPDATE_PENDING;\
+	type AFMT_GENERIC7_FRAME_UPDATE_PENDING;\
+	type AFMT_GENERIC0_FRAME_UPDATE;\
+	type AFMT_GENERIC1_FRAME_UPDATE;\
+	type AFMT_GENERIC2_FRAME_UPDATE;\
+	type AFMT_GENERIC3_FRAME_UPDATE;\
+	type AFMT_GENERIC4_FRAME_UPDATE;\
+	type AFMT_GENERIC5_FRAME_UPDATE;\
+	type AFMT_GENERIC6_FRAME_UPDATE;\
+	type AFMT_GENERIC7_FRAME_UPDATE;\
+	type HDMI_GENERIC0_CONT;\
+	type HDMI_GENERIC0_SEND;\
+	type HDMI_GENERIC0_LINE;\
+	type HDMI_GENERIC1_CONT;\
+	type HDMI_GENERIC1_SEND;\
+	type HDMI_GENERIC1_LINE;\
+	type HDMI_GENERIC2_CONT;\
+	type HDMI_GENERIC2_SEND;\
+	type HDMI_GENERIC2_LINE;\
+	type HDMI_GENERIC3_CONT;\
+	type HDMI_GENERIC3_SEND;\
+	type HDMI_GENERIC3_LINE;\
+	type HDMI_GENERIC4_CONT;\
+	type HDMI_GENERIC4_SEND;\
+	type HDMI_GENERIC4_LINE;\
+	type HDMI_GENERIC5_CONT;\
+	type HDMI_GENERIC5_SEND;\
+	type HDMI_GENERIC5_LINE;\
+	type HDMI_GENERIC6_CONT;\
+	type HDMI_GENERIC6_SEND;\
+	type HDMI_GENERIC6_LINE;\
+	type HDMI_GENERIC7_CONT;\
+	type HDMI_GENERIC7_SEND;\
+	type HDMI_GENERIC7_LINE;\
+	type DP_PIXEL_ENCODING;\
+	type DP_COMPONENT_DEPTH;\
+	type HDMI_PACKET_GEN_VERSION;\
+	type HDMI_KEEPOUT_MODE;\
+	type HDMI_DEEP_COLOR_ENABLE;\
+	type HDMI_CLOCK_CHANNEL_RATE;\
+	type HDMI_DEEP_COLOR_DEPTH;\
+	type HDMI_GC_CONT;\
+	type HDMI_GC_SEND;\
+	type HDMI_NULL_SEND;\
+	type HDMI_DATA_SCRAMBLE_EN;\
+	type HDMI_AUDIO_INFO_SEND;\
+	type AFMT_AUDIO_INFO_UPDATE;\
+	type HDMI_AUDIO_INFO_LINE;\
+	type HDMI_GC_AVMUTE;\
+	type DP_MSE_RATE_X;\
+	type DP_MSE_RATE_Y;\
+	type DP_MSE_RATE_UPDATE_PENDING;\
+	type DP_SEC_GSP0_ENABLE;\
+	type DP_SEC_STREAM_ENABLE;\
+	type DP_SEC_GSP1_ENABLE;\
+	type DP_SEC_GSP2_ENABLE;\
+	type DP_SEC_GSP3_ENABLE;\
+	type DP_SEC_GSP4_ENABLE;\
+	type DP_SEC_GSP5_ENABLE;\
+	type DP_SEC_GSP6_ENABLE;\
+	type DP_SEC_GSP7_ENABLE;\
+	type DP_SEC_MPG_ENABLE;\
+	type DP_VID_STREAM_DIS_DEFER;\
+	type DP_VID_STREAM_ENABLE;\
+	type DP_VID_STREAM_STATUS;\
+	type DP_STEER_FIFO_RESET;\
+	type DP_VID_M_N_GEN_EN;\
+	type DP_VID_N;\
+	type DP_VID_M;\
+	type DIG_START;\
+	type AFMT_AUDIO_SRC_SELECT;\
+	type AFMT_AUDIO_CHANNEL_ENABLE;\
+	type HDMI_AUDIO_PACKETS_PER_LINE;\
+	type HDMI_AUDIO_DELAY_EN;\
+	type AFMT_60958_CS_UPDATE;\
+	type AFMT_AUDIO_LAYOUT_OVRD;\
+	type AFMT_60958_OSF_OVRD;\
+	type HDMI_ACR_AUTO_SEND;\
+	type HDMI_ACR_SOURCE;\
+	type HDMI_ACR_AUDIO_PRIORITY;\
+	type HDMI_ACR_CTS_32;\
+	type HDMI_ACR_N_32;\
+	type HDMI_ACR_CTS_44;\
+	type HDMI_ACR_N_44;\
+	type HDMI_ACR_CTS_48;\
+	type HDMI_ACR_N_48;\
+	type AFMT_60958_CS_CHANNEL_NUMBER_L;\
+	type AFMT_60958_CS_CLOCK_ACCURACY;\
+	type AFMT_60958_CS_CHANNEL_NUMBER_R;\
+	type AFMT_60958_CS_CHANNEL_NUMBER_2;\
+	type AFMT_60958_CS_CHANNEL_NUMBER_3;\
+	type AFMT_60958_CS_CHANNEL_NUMBER_4;\
+	type AFMT_60958_CS_CHANNEL_NUMBER_5;\
+	type AFMT_60958_CS_CHANNEL_NUMBER_6;\
+	type AFMT_60958_CS_CHANNEL_NUMBER_7;\
+	type DP_SEC_AUD_N;\
+	type DP_SEC_TIMESTAMP_MODE;\
+	type DP_SEC_ASP_ENABLE;\
+	type DP_SEC_ATP_ENABLE;\
+	type DP_SEC_AIP_ENABLE;\
+	type DP_SEC_ACM_ENABLE;\
+	type AFMT_AUDIO_SAMPLE_SEND;\
+	type AFMT_AUDIO_CLOCK_EN;\
+	type TMDS_PIXEL_ENCODING;\
+	type TMDS_COLOR_FORMAT;\
+	type DIG_STEREOSYNC_SELECT;\
+	type DIG_STEREOSYNC_GATE_EN;\
+	type DP_DB_DISABLE;\
+	type DP_MSA_MISC0;\
+	type DP_MSA_HTOTAL;\
+	type DP_MSA_VTOTAL;\
+	type DP_MSA_HSTART;\
+	type DP_MSA_VSTART;\
+	type DP_MSA_HSYNCWIDTH;\
+	type DP_MSA_HSYNCPOLARITY;\
+	type DP_MSA_VSYNCWIDTH;\
+	type DP_MSA_VSYNCPOLARITY;\
+	type DP_MSA_HWIDTH;\
+	type DP_MSA_VHEIGHT;\
+	type HDMI_DB_DISABLE;\
+	type DP_VID_N_MUL;\
+	type DP_VID_M_DOUBLE_VALUE_EN
+
+struct dcn10_stream_encoder_shift {
+	SE_REG_FIELD_LIST_DCN1_0(uint8_t);
+};
+
+struct dcn10_stream_encoder_mask {
+	SE_REG_FIELD_LIST_DCN1_0(uint32_t);
+};
+
+struct dcn10_stream_encoder {
+	struct stream_encoder base;
+	const struct dcn10_stream_enc_registers *regs;
+	const struct dcn10_stream_encoder_shift *se_shift;
+	const struct dcn10_stream_encoder_mask *se_mask;
+};
+
+void dcn10_stream_encoder_construct(
+	struct dcn10_stream_encoder *enc1,
+	struct dc_context *ctx,
+	struct dc_bios *bp,
+	enum engine_id eng_id,
+	const struct dcn10_stream_enc_registers *regs,
+	const struct dcn10_stream_encoder_shift *se_shift,
+	const struct dcn10_stream_encoder_mask *se_mask);
+
+void enc1_update_generic_info_packet(
+	struct dcn10_stream_encoder *enc1,
+	uint32_t packet_index,
+	const struct dc_info_packet *info_packet);
+
+void enc1_stream_encoder_dp_set_stream_attribute(
+	struct stream_encoder *enc,
+	struct dc_crtc_timing *crtc_timing,
+	enum dc_color_space output_color_space);
+
+void enc1_stream_encoder_hdmi_set_stream_attribute(
+	struct stream_encoder *enc,
+	struct dc_crtc_timing *crtc_timing,
+	int actual_pix_clk_khz,
+	bool enable_audio);
+
+void enc1_stream_encoder_dvi_set_stream_attribute(
+	struct stream_encoder *enc,
+	struct dc_crtc_timing *crtc_timing,
+	bool is_dual_link);
+
+void enc1_stream_encoder_set_mst_bandwidth(
+	struct stream_encoder *enc,
+	struct fixed31_32 avg_time_slots_per_mtp);
+
+void enc1_stream_encoder_update_dp_info_packets(
+	struct stream_encoder *enc,
+	const struct encoder_info_frame *info_frame);
+
+void enc1_stream_encoder_stop_dp_info_packets(
+	struct stream_encoder *enc);
+
+void enc1_stream_encoder_dp_blank(
+	struct stream_encoder *enc);
+
+void enc1_stream_encoder_dp_unblank(
+	struct stream_encoder *enc,
+	const struct encoder_unblank_param *param);
+
+void enc1_setup_stereo_sync(
+	struct stream_encoder *enc,
+	int tg_inst, bool enable);
+
+void enc1_stream_encoder_set_avmute(
+	struct stream_encoder *enc,
+	bool enable);
+
+void enc1_se_audio_mute_control(
+	struct stream_encoder *enc,
+	bool mute);
+
+void enc1_se_dp_audio_setup(
+	struct stream_encoder *enc,
+	unsigned int az_inst,
+	struct audio_info *info);
+
+void enc1_se_dp_audio_enable(
+	struct stream_encoder *enc);
+
+void enc1_se_dp_audio_disable(
+	struct stream_encoder *enc);
+
+void enc1_se_hdmi_audio_setup(
+	struct stream_encoder *enc,
+	unsigned int az_inst,
+	struct audio_info *info,
+	struct audio_crtc_info *audio_crtc_info);
+
+void enc1_se_hdmi_audio_disable(
+	struct stream_encoder *enc);
+
+#endif /* __DC_STREAM_ENCODER_DCN10_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h
index 22e7ee7..4ff9b2b 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services.h
@@ -341,6 +341,10 @@
 
 unsigned long long dm_get_timestamp(struct dc_context *ctx);
 
+unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx,
+		unsigned long long current_time_stamp,
+		unsigned long long last_time_stamp);
+
 /*
  * performance tracing
  */
@@ -351,10 +355,6 @@
 /*
  * Debug and verification hooks
  */
-bool dm_helpers_dc_conn_log(
-		struct dc_context *ctx,
-		struct log_entry *entry,
-		enum dc_log_type event);
 
 void dm_dtn_log_begin(struct dc_context *ctx);
 void dm_dtn_log_append_v(struct dc_context *ctx, const char *msg, ...);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
index b1ad355..47c19f8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
@@ -108,4 +108,17 @@
 	dm_std_uninitialized = 0, dm_std_cvtr2, dm_std_cvt
 };
 
+enum mpc_combine_affinity {
+	dm_mpc_always_when_possible,
+	dm_mpc_reduce_voltage,
+	dm_mpc_reduce_voltage_and_clocks
+};
+
+enum self_refresh_affinity {
+	dm_try_to_allow_self_refresh_and_mclk_switch,
+	dm_allow_self_refresh_and_mclk_switch,
+	dm_allow_self_refresh,
+	dm_neither_self_refresh_nor_mclk_switch
+};
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
index c109b2c..fd9d97a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_lib.c
@@ -26,75 +26,89 @@
 #include "display_mode_lib.h"
 #include "dc_features.h"
 
+static const struct _vcs_dpi_ip_params_st dcn1_0_ip = {
+	.rob_buffer_size_kbytes = 64,
+	.det_buffer_size_kbytes = 164,
+	.dpte_buffer_size_in_pte_reqs = 42,
+	.dpp_output_buffer_pixels = 2560,
+	.opp_output_buffer_lines = 1,
+	.pixel_chunk_size_kbytes = 8,
+	.pte_enable = 1,
+	.pte_chunk_size_kbytes = 2,
+	.meta_chunk_size_kbytes = 2,
+	.writeback_chunk_size_kbytes = 2,
+	.line_buffer_size_bits = 589824,
+	.max_line_buffer_lines = 12,
+	.IsLineBufferBppFixed = 0,
+	.LineBufferFixedBpp = -1,
+	.writeback_luma_buffer_size_kbytes = 12,
+	.writeback_chroma_buffer_size_kbytes = 8,
+	.max_num_dpp = 4,
+	.max_num_wb = 2,
+	.max_dchub_pscl_bw_pix_per_clk = 4,
+	.max_pscl_lb_bw_pix_per_clk = 2,
+	.max_lb_vscl_bw_pix_per_clk = 4,
+	.max_vscl_hscl_bw_pix_per_clk = 4,
+	.max_hscl_ratio = 4,
+	.max_vscl_ratio = 4,
+	.hscl_mults = 4,
+	.vscl_mults = 4,
+	.max_hscl_taps = 8,
+	.max_vscl_taps = 8,
+	.dispclk_ramp_margin_percent = 1,
+	.underscan_factor = 1.10,
+	.min_vblank_lines = 14,
+	.dppclk_delay_subtotal = 90,
+	.dispclk_delay_subtotal = 42,
+	.dcfclk_cstate_latency = 10,
+	.max_inter_dcn_tile_repeaters = 8,
+	.can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one = 0,
+	.bug_forcing_LC_req_same_size_fixed = 0,
+};
+
+static const struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
+	.sr_exit_time_us = 9.0,
+	.sr_enter_plus_exit_time_us = 11.0,
+	.urgent_latency_us = 4.0,
+	.writeback_latency_us = 12.0,
+	.ideal_dram_bw_after_urgent_percent = 80.0,
+	.max_request_size_bytes = 256,
+	.downspread_percent = 0.5,
+	.dram_page_open_time_ns = 50.0,
+	.dram_rw_turnaround_time_ns = 17.5,
+	.dram_return_buffer_per_channel_bytes = 8192,
+	.round_trip_ping_latency_dcfclk_cycles = 128,
+	.urgent_out_of_order_return_per_channel_bytes = 256,
+	.channel_interleave_bytes = 256,
+	.num_banks = 8,
+	.num_chans = 2,
+	.vmm_page_size_bytes = 4096,
+	.dram_clock_change_latency_us = 17.0,
+	.writeback_dram_clock_change_latency_us = 23.0,
+	.return_bus_width_bytes = 64,
+};
+
 static void set_soc_bounding_box(struct _vcs_dpi_soc_bounding_box_st *soc, enum dml_project project)
 {
-	if (project == DML_PROJECT_RAVEN1) {
-		soc->sr_exit_time_us = 9.0;
-		soc->sr_enter_plus_exit_time_us = 11.0;
-		soc->urgent_latency_us = 4.0;
-		soc->writeback_latency_us = 12.0;
-		soc->ideal_dram_bw_after_urgent_percent = 80.0;
-		soc->max_request_size_bytes = 256;
-		soc->downspread_percent = 0.5;
-		soc->dram_page_open_time_ns = 50.0;
-		soc->dram_rw_turnaround_time_ns = 17.5;
-		soc->dram_return_buffer_per_channel_bytes = 8192;
-		soc->round_trip_ping_latency_dcfclk_cycles = 128;
-		soc->urgent_out_of_order_return_per_channel_bytes = 256;
-		soc->channel_interleave_bytes = 256;
-		soc->num_banks = 8;
-		soc->num_chans = 2;
-		soc->vmm_page_size_bytes = 4096;
-		soc->dram_clock_change_latency_us = 17.0;
-		soc->writeback_dram_clock_change_latency_us = 23.0;
-		soc->return_bus_width_bytes = 64;
-	} else {
-		BREAK_TO_DEBUGGER(); /* Invalid Project Specified */
+	switch (project) {
+	case DML_PROJECT_RAVEN1:
+		*soc = dcn1_0_soc;
+		break;
+	default:
+		ASSERT(0);
+		break;
 	}
 }
 
 static void set_ip_params(struct _vcs_dpi_ip_params_st *ip, enum dml_project project)
 {
-	if (project == DML_PROJECT_RAVEN1) {
-		ip->rob_buffer_size_kbytes = 64;
-		ip->det_buffer_size_kbytes = 164;
-		ip->dpte_buffer_size_in_pte_reqs = 42;
-		ip->dpp_output_buffer_pixels = 2560;
-		ip->opp_output_buffer_lines = 1;
-		ip->pixel_chunk_size_kbytes = 8;
-		ip->pte_enable = 1;
-		ip->pte_chunk_size_kbytes = 2;
-		ip->meta_chunk_size_kbytes = 2;
-		ip->writeback_chunk_size_kbytes = 2;
-		ip->line_buffer_size_bits = 589824;
-		ip->max_line_buffer_lines = 12;
-		ip->IsLineBufferBppFixed = 0;
-		ip->LineBufferFixedBpp = -1;
-		ip->writeback_luma_buffer_size_kbytes = 12;
-		ip->writeback_chroma_buffer_size_kbytes = 8;
-		ip->max_num_dpp = 4;
-		ip->max_num_wb = 2;
-		ip->max_dchub_pscl_bw_pix_per_clk = 4;
-		ip->max_pscl_lb_bw_pix_per_clk = 2;
-		ip->max_lb_vscl_bw_pix_per_clk = 4;
-		ip->max_vscl_hscl_bw_pix_per_clk = 4;
-		ip->max_hscl_ratio = 4;
-		ip->max_vscl_ratio = 4;
-		ip->hscl_mults = 4;
-		ip->vscl_mults = 4;
-		ip->max_hscl_taps = 8;
-		ip->max_vscl_taps = 8;
-		ip->dispclk_ramp_margin_percent = 1;
-		ip->underscan_factor = 1.10;
-		ip->min_vblank_lines = 14;
-		ip->dppclk_delay_subtotal = 90;
-		ip->dispclk_delay_subtotal = 42;
-		ip->dcfclk_cstate_latency = 10;
-		ip->max_inter_dcn_tile_repeaters = 8;
-		ip->can_vstartup_lines_exceed_vsync_plus_back_porch_lines_minus_one = 0;
-		ip->bug_forcing_LC_req_same_size_fixed = 0;
-	} else {
-		BREAK_TO_DEBUGGER(); /* Invalid Project Specified */
+	switch (project) {
+	case DML_PROJECT_RAVEN1:
+		*ip = dcn1_0_ip;
+		break;
+	default:
+		ASSERT(0);
+		break;
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index 09affa1..7fa0375 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -25,39 +25,39 @@
 #ifndef __DISPLAY_MODE_STRUCTS_H__
 #define __DISPLAY_MODE_STRUCTS_H__
 
-typedef struct _vcs_dpi_voltage_scaling_st	voltage_scaling_st;
-typedef struct _vcs_dpi_soc_bounding_box_st	soc_bounding_box_st;
-typedef struct _vcs_dpi_ip_params_st	ip_params_st;
-typedef struct _vcs_dpi_display_pipe_source_params_st	display_pipe_source_params_st;
-typedef struct _vcs_dpi_display_output_params_st	display_output_params_st;
-typedef struct _vcs_dpi_display_bandwidth_st	display_bandwidth_st;
-typedef struct _vcs_dpi_scaler_ratio_depth_st	scaler_ratio_depth_st;
-typedef struct _vcs_dpi_scaler_taps_st	scaler_taps_st;
-typedef struct _vcs_dpi_display_pipe_dest_params_st	display_pipe_dest_params_st;
-typedef struct _vcs_dpi_display_pipe_params_st	display_pipe_params_st;
-typedef struct _vcs_dpi_display_clocks_and_cfg_st	display_clocks_and_cfg_st;
-typedef struct _vcs_dpi_display_e2e_pipe_params_st	display_e2e_pipe_params_st;
-typedef struct _vcs_dpi_dchub_buffer_sizing_st	dchub_buffer_sizing_st;
-typedef struct _vcs_dpi_watermarks_perf_st	watermarks_perf_st;
-typedef struct _vcs_dpi_cstate_pstate_watermarks_st	cstate_pstate_watermarks_st;
-typedef struct _vcs_dpi_wm_calc_pipe_params_st	wm_calc_pipe_params_st;
-typedef struct _vcs_dpi_vratio_pre_st	vratio_pre_st;
-typedef struct _vcs_dpi_display_data_rq_misc_params_st	display_data_rq_misc_params_st;
-typedef struct _vcs_dpi_display_data_rq_sizing_params_st	display_data_rq_sizing_params_st;
-typedef struct _vcs_dpi_display_data_rq_dlg_params_st	display_data_rq_dlg_params_st;
-typedef struct _vcs_dpi_display_cur_rq_dlg_params_st	display_cur_rq_dlg_params_st;
-typedef struct _vcs_dpi_display_rq_dlg_params_st	display_rq_dlg_params_st;
-typedef struct _vcs_dpi_display_rq_sizing_params_st	display_rq_sizing_params_st;
-typedef struct _vcs_dpi_display_rq_misc_params_st	display_rq_misc_params_st;
-typedef struct _vcs_dpi_display_rq_params_st	display_rq_params_st;
-typedef struct _vcs_dpi_display_dlg_regs_st	display_dlg_regs_st;
-typedef struct _vcs_dpi_display_ttu_regs_st	display_ttu_regs_st;
-typedef struct _vcs_dpi_display_data_rq_regs_st	display_data_rq_regs_st;
-typedef struct _vcs_dpi_display_rq_regs_st	display_rq_regs_st;
-typedef struct _vcs_dpi_display_dlg_sys_params_st	display_dlg_sys_params_st;
-typedef struct _vcs_dpi_display_dlg_prefetch_param_st	display_dlg_prefetch_param_st;
-typedef struct _vcs_dpi_display_pipe_clock_st	display_pipe_clock_st;
-typedef struct _vcs_dpi_display_arb_params_st	display_arb_params_st;
+typedef struct _vcs_dpi_voltage_scaling_st voltage_scaling_st;
+typedef struct _vcs_dpi_soc_bounding_box_st soc_bounding_box_st;
+typedef struct _vcs_dpi_ip_params_st ip_params_st;
+typedef struct _vcs_dpi_display_pipe_source_params_st display_pipe_source_params_st;
+typedef struct _vcs_dpi_display_output_params_st display_output_params_st;
+typedef struct _vcs_dpi_display_bandwidth_st display_bandwidth_st;
+typedef struct _vcs_dpi_scaler_ratio_depth_st scaler_ratio_depth_st;
+typedef struct _vcs_dpi_scaler_taps_st scaler_taps_st;
+typedef struct _vcs_dpi_display_pipe_dest_params_st display_pipe_dest_params_st;
+typedef struct _vcs_dpi_display_pipe_params_st display_pipe_params_st;
+typedef struct _vcs_dpi_display_clocks_and_cfg_st display_clocks_and_cfg_st;
+typedef struct _vcs_dpi_display_e2e_pipe_params_st display_e2e_pipe_params_st;
+typedef struct _vcs_dpi_dchub_buffer_sizing_st dchub_buffer_sizing_st;
+typedef struct _vcs_dpi_watermarks_perf_st watermarks_perf_st;
+typedef struct _vcs_dpi_cstate_pstate_watermarks_st cstate_pstate_watermarks_st;
+typedef struct _vcs_dpi_wm_calc_pipe_params_st wm_calc_pipe_params_st;
+typedef struct _vcs_dpi_vratio_pre_st vratio_pre_st;
+typedef struct _vcs_dpi_display_data_rq_misc_params_st display_data_rq_misc_params_st;
+typedef struct _vcs_dpi_display_data_rq_sizing_params_st display_data_rq_sizing_params_st;
+typedef struct _vcs_dpi_display_data_rq_dlg_params_st display_data_rq_dlg_params_st;
+typedef struct _vcs_dpi_display_cur_rq_dlg_params_st display_cur_rq_dlg_params_st;
+typedef struct _vcs_dpi_display_rq_dlg_params_st display_rq_dlg_params_st;
+typedef struct _vcs_dpi_display_rq_sizing_params_st display_rq_sizing_params_st;
+typedef struct _vcs_dpi_display_rq_misc_params_st display_rq_misc_params_st;
+typedef struct _vcs_dpi_display_rq_params_st display_rq_params_st;
+typedef struct _vcs_dpi_display_dlg_regs_st display_dlg_regs_st;
+typedef struct _vcs_dpi_display_ttu_regs_st display_ttu_regs_st;
+typedef struct _vcs_dpi_display_data_rq_regs_st display_data_rq_regs_st;
+typedef struct _vcs_dpi_display_rq_regs_st display_rq_regs_st;
+typedef struct _vcs_dpi_display_dlg_sys_params_st display_dlg_sys_params_st;
+typedef struct _vcs_dpi_display_dlg_prefetch_param_st display_dlg_prefetch_param_st;
+typedef struct _vcs_dpi_display_pipe_clock_st display_pipe_clock_st;
+typedef struct _vcs_dpi_display_arb_params_st display_arb_params_st;
 
 struct _vcs_dpi_voltage_scaling_st {
 	int state;
@@ -72,89 +72,107 @@
 	double dppclk_mhz;
 };
 
-struct	_vcs_dpi_soc_bounding_box_st	{
-	double	sr_exit_time_us;
-	double	sr_enter_plus_exit_time_us;
-	double	urgent_latency_us;
-	double	writeback_latency_us;
-	double	ideal_dram_bw_after_urgent_percent;
-	unsigned int	max_request_size_bytes;
-	double	downspread_percent;
-	double	dram_page_open_time_ns;
-	double	dram_rw_turnaround_time_ns;
-	double	dram_return_buffer_per_channel_bytes;
-	double	dram_channel_width_bytes;
+struct _vcs_dpi_soc_bounding_box_st {
+	double sr_exit_time_us;
+	double sr_enter_plus_exit_time_us;
+	double urgent_latency_us;
+	double urgent_latency_pixel_data_only_us;
+	double urgent_latency_pixel_mixed_with_vm_data_us;
+	double urgent_latency_vm_data_only_us;
+	double writeback_latency_us;
+	double ideal_dram_bw_after_urgent_percent;
+	double pct_ideal_dram_sdp_bw_after_urgent_pixel_only; // PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelDataOnly
+	double pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm;
+	double pct_ideal_dram_sdp_bw_after_urgent_vm_only;
+	double max_avg_sdp_bw_use_normal_percent;
+	double max_avg_dram_bw_use_normal_percent;
+	unsigned int max_request_size_bytes;
+	double downspread_percent;
+	double dram_page_open_time_ns;
+	double dram_rw_turnaround_time_ns;
+	double dram_return_buffer_per_channel_bytes;
+	double dram_channel_width_bytes;
 	double fabric_datapath_to_dcn_data_return_bytes;
 	double dcn_downspread_percent;
 	double dispclk_dppclk_vco_speed_mhz;
 	double dfs_vco_period_ps;
-	unsigned int	round_trip_ping_latency_dcfclk_cycles;
-	unsigned int	urgent_out_of_order_return_per_channel_bytes;
-	unsigned int	channel_interleave_bytes;
-	unsigned int	num_banks;
-	unsigned int	num_chans;
-	unsigned int	vmm_page_size_bytes;
-	double	dram_clock_change_latency_us;
-	double	writeback_dram_clock_change_latency_us;
-	unsigned int	return_bus_width_bytes;
-	unsigned int	voltage_override;
-	double	xfc_bus_transport_time_us;
-	double	xfc_xbuf_latency_tolerance_us;
+	unsigned int urgent_out_of_order_return_per_channel_pixel_only_bytes;
+	unsigned int urgent_out_of_order_return_per_channel_pixel_and_vm_bytes;
+	unsigned int urgent_out_of_order_return_per_channel_vm_only_bytes;
+	unsigned int round_trip_ping_latency_dcfclk_cycles;
+	unsigned int urgent_out_of_order_return_per_channel_bytes;
+	unsigned int channel_interleave_bytes;
+	unsigned int num_banks;
+	unsigned int num_chans;
+	unsigned int vmm_page_size_bytes;
+	double dram_clock_change_latency_us;
+	double writeback_dram_clock_change_latency_us;
+	unsigned int return_bus_width_bytes;
+	unsigned int voltage_override;
+	double xfc_bus_transport_time_us;
+	double xfc_xbuf_latency_tolerance_us;
+	int use_urgent_burst_bw;
 	struct _vcs_dpi_voltage_scaling_st clock_limits[7];
 };
 
-struct	_vcs_dpi_ip_params_st	{
-	unsigned int	max_inter_dcn_tile_repeaters;
-	unsigned int	num_dsc;
-	unsigned int	odm_capable;
-	unsigned int	rob_buffer_size_kbytes;
-	unsigned int	det_buffer_size_kbytes;
-	unsigned int	dpte_buffer_size_in_pte_reqs;
-	unsigned int	pde_proc_buffer_size_64k_reqs;
-	unsigned int	dpp_output_buffer_pixels;
-	unsigned int	opp_output_buffer_lines;
-	unsigned int	pixel_chunk_size_kbytes;
-	unsigned char	pte_enable;
-	unsigned int	pte_chunk_size_kbytes;
-	unsigned int	meta_chunk_size_kbytes;
-	unsigned int	writeback_chunk_size_kbytes;
-	unsigned int	line_buffer_size_bits;
-	unsigned int	max_line_buffer_lines;
-	unsigned int	writeback_luma_buffer_size_kbytes;
-	unsigned int	writeback_chroma_buffer_size_kbytes;
-	unsigned int	writeback_chroma_line_buffer_width_pixels;
-	unsigned int	max_page_table_levels;
-	unsigned int	max_num_dpp;
-	unsigned int	max_num_otg;
-	unsigned int	cursor_chunk_size;
-	unsigned int	cursor_buffer_size;
-	unsigned int	max_num_wb;
-	unsigned int	max_dchub_pscl_bw_pix_per_clk;
-	unsigned int	max_pscl_lb_bw_pix_per_clk;
-	unsigned int	max_lb_vscl_bw_pix_per_clk;
-	unsigned int	max_vscl_hscl_bw_pix_per_clk;
-	double	max_hscl_ratio;
-	double	max_vscl_ratio;
-	unsigned int	hscl_mults;
-	unsigned int	vscl_mults;
-	unsigned int	max_hscl_taps;
-	unsigned int	max_vscl_taps;
-	unsigned int	xfc_supported;
-	unsigned int	xfc_fill_constant_bytes;
-	double	dispclk_ramp_margin_percent;
-	double	xfc_fill_bw_overhead_percent;
-	double	underscan_factor;
-	unsigned int	min_vblank_lines;
-	unsigned int	dppclk_delay_subtotal;
-	unsigned int	dispclk_delay_subtotal;
-	unsigned int	dcfclk_cstate_latency;
-	unsigned int	dppclk_delay_scl;
-	unsigned int	dppclk_delay_scl_lb_only;
-	unsigned int	dppclk_delay_cnvc_formatter;
-	unsigned int	dppclk_delay_cnvc_cursor;
-	unsigned int	is_line_buffer_bpp_fixed;
-	unsigned int	line_buffer_fixed_bpp;
-	unsigned int	dcc_supported;
+struct _vcs_dpi_ip_params_st {
+	bool gpuvm_enable;
+	bool hostvm_enable;
+	unsigned int gpuvm_max_page_table_levels;
+	unsigned int hostvm_max_page_table_levels;
+	unsigned int hostvm_cached_page_table_levels;
+	unsigned int pte_group_size_bytes;
+	unsigned int max_inter_dcn_tile_repeaters;
+	unsigned int num_dsc;
+	unsigned int odm_capable;
+	unsigned int rob_buffer_size_kbytes;
+	unsigned int det_buffer_size_kbytes;
+	unsigned int dpte_buffer_size_in_pte_reqs;
+	unsigned int pde_proc_buffer_size_64k_reqs;
+	unsigned int dpp_output_buffer_pixels;
+	unsigned int opp_output_buffer_lines;
+	unsigned int pixel_chunk_size_kbytes;
+	unsigned char pte_enable;
+	unsigned int pte_chunk_size_kbytes;
+	unsigned int meta_chunk_size_kbytes;
+	unsigned int writeback_chunk_size_kbytes;
+	unsigned int line_buffer_size_bits;
+	unsigned int max_line_buffer_lines;
+	unsigned int writeback_luma_buffer_size_kbytes;
+	unsigned int writeback_chroma_buffer_size_kbytes;
+	unsigned int writeback_chroma_line_buffer_width_pixels;
+	unsigned int max_page_table_levels;
+	unsigned int max_num_dpp;
+	unsigned int max_num_otg;
+	unsigned int cursor_chunk_size;
+	unsigned int cursor_buffer_size;
+	unsigned int max_num_wb;
+	unsigned int max_dchub_pscl_bw_pix_per_clk;
+	unsigned int max_pscl_lb_bw_pix_per_clk;
+	unsigned int max_lb_vscl_bw_pix_per_clk;
+	unsigned int max_vscl_hscl_bw_pix_per_clk;
+	double max_hscl_ratio;
+	double max_vscl_ratio;
+	unsigned int hscl_mults;
+	unsigned int vscl_mults;
+	unsigned int max_hscl_taps;
+	unsigned int max_vscl_taps;
+	unsigned int xfc_supported;
+	unsigned int xfc_fill_constant_bytes;
+	double dispclk_ramp_margin_percent;
+	double xfc_fill_bw_overhead_percent;
+	double underscan_factor;
+	unsigned int min_vblank_lines;
+	unsigned int dppclk_delay_subtotal;
+	unsigned int dispclk_delay_subtotal;
+	unsigned int dcfclk_cstate_latency;
+	unsigned int dppclk_delay_scl;
+	unsigned int dppclk_delay_scl_lb_only;
+	unsigned int dppclk_delay_cnvc_formatter;
+	unsigned int dppclk_delay_cnvc_cursor;
+	unsigned int is_line_buffer_bpp_fixed;
+	unsigned int line_buffer_fixed_bpp;
+	unsigned int dcc_supported;
 
 	unsigned int IsLineBufferBppFixed;
 	unsigned int LineBufferFixedBpp;
@@ -169,41 +187,45 @@
 	int xfc_slv_chunk_size_bytes;
 };
 
-struct	_vcs_dpi_display_pipe_source_params_st	{
-	int	source_format;
-	unsigned char	dcc;
-	unsigned int	dcc_override;
-	unsigned int	dcc_rate;
-	unsigned char	dcc_use_global;
-	unsigned char	vm;
-	unsigned char	vm_levels_force_en;
-	unsigned int	vm_levels_force;
-	int	source_scan;
-	int	sw_mode;
-	int	macro_tile_size;
-	unsigned char	is_display_sw;
-	unsigned int	viewport_width;
-	unsigned int	viewport_height;
-	unsigned int	viewport_y_y;
-	unsigned int	viewport_y_c;
-	unsigned int	viewport_width_c;
-	unsigned int	viewport_height_c;
-	unsigned int	data_pitch;
-	unsigned int	data_pitch_c;
-	unsigned int	meta_pitch;
-	unsigned int	meta_pitch_c;
-	unsigned int	cur0_src_width;
-	int	cur0_bpp;
-	unsigned int	cur1_src_width;
-	int	cur1_bpp;
-	int	num_cursors;
-	unsigned char	is_hsplit;
-	unsigned char	dynamic_metadata_enable;
-	unsigned int	dynamic_metadata_lines_before_active;
-	unsigned int	dynamic_metadata_xmit_bytes;
-	unsigned int	hsplit_grp;
-	unsigned char	xfc_enable;
-	unsigned char	xfc_slave;
+struct _vcs_dpi_display_pipe_source_params_st {
+	int source_format;
+	unsigned char dcc;
+	unsigned int dcc_override;
+	unsigned int dcc_rate;
+	unsigned char dcc_use_global;
+	unsigned char vm;
+	bool gpuvm;    // gpuvm enabled
+	bool hostvm;    // hostvm enabled
+	bool gpuvm_levels_force_en;
+	unsigned int gpuvm_levels_force;
+	bool hostvm_levels_force_en;
+	unsigned int hostvm_levels_force;
+	int source_scan;
+	int sw_mode;
+	int macro_tile_size;
+	unsigned char is_display_sw;
+	unsigned int viewport_width;
+	unsigned int viewport_height;
+	unsigned int viewport_y_y;
+	unsigned int viewport_y_c;
+	unsigned int viewport_width_c;
+	unsigned int viewport_height_c;
+	unsigned int data_pitch;
+	unsigned int data_pitch_c;
+	unsigned int meta_pitch;
+	unsigned int meta_pitch_c;
+	unsigned int cur0_src_width;
+	int cur0_bpp;
+	unsigned int cur1_src_width;
+	int cur1_bpp;
+	int num_cursors;
+	unsigned char is_hsplit;
+	unsigned char dynamic_metadata_enable;
+	unsigned int dynamic_metadata_lines_before_active;
+	unsigned int dynamic_metadata_xmit_bytes;
+	unsigned int hsplit_grp;
+	unsigned char xfc_enable;
+	unsigned char xfc_slave;
 	struct _vcs_dpi_display_xfc_params_st xfc_params;
 };
 struct writeback_st {
@@ -215,338 +237,339 @@
 	int wb_vtaps_luma;
 	int wb_htaps_chroma;
 	int wb_vtaps_chroma;
-	int wb_hratio;
-	int wb_vratio;
+	double wb_hratio;
+	double wb_vratio;
 };
 
-struct	_vcs_dpi_display_output_params_st	{
-	int	dp_lanes;
-	int	output_bpp;
-	int	dsc_enable;
-	int	wb_enable;
-	int	opp_input_bpc;
-	int	output_type;
-	int	output_format;
-	int	output_standard;
-	int	dsc_slices;
+struct _vcs_dpi_display_output_params_st {
+	int dp_lanes;
+	int output_bpp;
+	int dsc_enable;
+	int wb_enable;
+	int num_active_wb;
+	int output_bpc;
+	int output_type;
+	int output_format;
+	int output_standard;
+	int dsc_slices;
 	struct writeback_st wb;
 };
 
-struct	_vcs_dpi_display_bandwidth_st	{
-	double	total_bw_consumed_gbps;
-	double	guaranteed_urgent_return_bw_gbps;
+struct _vcs_dpi_display_bandwidth_st {
+	double total_bw_consumed_gbps;
+	double guaranteed_urgent_return_bw_gbps;
 };
 
-struct	_vcs_dpi_scaler_ratio_depth_st	{
-	double	hscl_ratio;
-	double	vscl_ratio;
-	double	hscl_ratio_c;
-	double	vscl_ratio_c;
-	double	vinit;
-	double	vinit_c;
-	double	vinit_bot;
-	double	vinit_bot_c;
-	int	lb_depth;
-	int	scl_enable;
+struct _vcs_dpi_scaler_ratio_depth_st {
+	double hscl_ratio;
+	double vscl_ratio;
+	double hscl_ratio_c;
+	double vscl_ratio_c;
+	double vinit;
+	double vinit_c;
+	double vinit_bot;
+	double vinit_bot_c;
+	int lb_depth;
+	int scl_enable;
 };
 
-struct	_vcs_dpi_scaler_taps_st	{
-	unsigned int	htaps;
-	unsigned int	vtaps;
-	unsigned int	htaps_c;
-	unsigned int	vtaps_c;
+struct _vcs_dpi_scaler_taps_st {
+	unsigned int htaps;
+	unsigned int vtaps;
+	unsigned int htaps_c;
+	unsigned int vtaps_c;
 };
 
-struct	_vcs_dpi_display_pipe_dest_params_st	{
-	unsigned int	recout_width;
-	unsigned int	recout_height;
-	unsigned int	full_recout_width;
-	unsigned int	full_recout_height;
-	unsigned int	hblank_start;
-	unsigned int	hblank_end;
-	unsigned int	vblank_start;
-	unsigned int	vblank_end;
-	unsigned int	htotal;
-	unsigned int	vtotal;
-	unsigned int	vactive;
-	unsigned int	hactive;
-	unsigned int	vstartup_start;
-	unsigned int	vupdate_offset;
-	unsigned int	vupdate_width;
-	unsigned int	vready_offset;
-	unsigned char	interlaced;
-	unsigned char	underscan;
-	double	pixel_rate_mhz;
-	unsigned char	synchronized_vblank_all_planes;
-	unsigned char	otg_inst;
-	unsigned char	odm_split_cnt;
-	unsigned char	odm_combine;
+struct _vcs_dpi_display_pipe_dest_params_st {
+	unsigned int recout_width;
+	unsigned int recout_height;
+	unsigned int full_recout_width;
+	unsigned int full_recout_height;
+	unsigned int hblank_start;
+	unsigned int hblank_end;
+	unsigned int vblank_start;
+	unsigned int vblank_end;
+	unsigned int htotal;
+	unsigned int vtotal;
+	unsigned int vactive;
+	unsigned int hactive;
+	unsigned int vstartup_start;
+	unsigned int vupdate_offset;
+	unsigned int vupdate_width;
+	unsigned int vready_offset;
+	unsigned char interlaced;
+	unsigned char underscan;
+	double pixel_rate_mhz;
+	unsigned char synchronized_vblank_all_planes;
+	unsigned char otg_inst;
+	unsigned char odm_split_cnt;
+	unsigned char odm_combine;
 };
 
-struct	_vcs_dpi_display_pipe_params_st	{
-	display_pipe_source_params_st	src;
-	display_pipe_dest_params_st	dest;
-	scaler_ratio_depth_st	scale_ratio_depth;
-	scaler_taps_st	scale_taps;
+struct _vcs_dpi_display_pipe_params_st {
+	display_pipe_source_params_st src;
+	display_pipe_dest_params_st dest;
+	scaler_ratio_depth_st scale_ratio_depth;
+	scaler_taps_st scale_taps;
 };
 
-struct	_vcs_dpi_display_clocks_and_cfg_st	{
-	int	voltage;
-	double	dppclk_mhz;
-	double	refclk_mhz;
-	double	dispclk_mhz;
-	double	dcfclk_mhz;
-	double	socclk_mhz;
+struct _vcs_dpi_display_clocks_and_cfg_st {
+	int voltage;
+	double dppclk_mhz;
+	double refclk_mhz;
+	double dispclk_mhz;
+	double dcfclk_mhz;
+	double socclk_mhz;
 };
 
-struct	_vcs_dpi_display_e2e_pipe_params_st	{
-	display_pipe_params_st	pipe;
-	display_output_params_st	dout;
-	display_clocks_and_cfg_st	clks_cfg;
+struct _vcs_dpi_display_e2e_pipe_params_st {
+	display_pipe_params_st pipe;
+	display_output_params_st dout;
+	display_clocks_and_cfg_st clks_cfg;
 };
 
-struct	_vcs_dpi_dchub_buffer_sizing_st	{
-	unsigned int	swath_width_y;
-	unsigned int	swath_height_y;
-	unsigned int	swath_height_c;
-	unsigned int	detail_buffer_size_y;
+struct _vcs_dpi_dchub_buffer_sizing_st {
+	unsigned int swath_width_y;
+	unsigned int swath_height_y;
+	unsigned int swath_height_c;
+	unsigned int detail_buffer_size_y;
 };
 
-struct	_vcs_dpi_watermarks_perf_st	{
-	double	stutter_eff_in_active_region_percent;
-	double	urgent_latency_supported_us;
-	double	non_urgent_latency_supported_us;
-	double	dram_clock_change_margin_us;
-	double	dram_access_eff_percent;
+struct _vcs_dpi_watermarks_perf_st {
+	double stutter_eff_in_active_region_percent;
+	double urgent_latency_supported_us;
+	double non_urgent_latency_supported_us;
+	double dram_clock_change_margin_us;
+	double dram_access_eff_percent;
 };
 
-struct	_vcs_dpi_cstate_pstate_watermarks_st	{
-	double	cstate_exit_us;
-	double	cstate_enter_plus_exit_us;
-	double	pstate_change_us;
+struct _vcs_dpi_cstate_pstate_watermarks_st {
+	double cstate_exit_us;
+	double cstate_enter_plus_exit_us;
+	double pstate_change_us;
 };
 
-struct	_vcs_dpi_wm_calc_pipe_params_st	{
-	unsigned int	num_dpp;
-	int	voltage;
-	int	output_type;
-	double	dcfclk_mhz;
-	double	socclk_mhz;
-	double	dppclk_mhz;
-	double	pixclk_mhz;
-	unsigned char	interlace_en;
-	unsigned char	pte_enable;
-	unsigned char	dcc_enable;
-	double	dcc_rate;
-	double	bytes_per_pixel_c;
-	double	bytes_per_pixel_y;
-	unsigned int	swath_width_y;
-	unsigned int	swath_height_y;
-	unsigned int	swath_height_c;
-	unsigned int	det_buffer_size_y;
-	double	h_ratio;
-	double	v_ratio;
-	unsigned int	h_taps;
-	unsigned int	h_total;
-	unsigned int	v_total;
-	unsigned int	v_active;
-	unsigned int	e2e_index;
-	double	display_pipe_line_delivery_time;
-	double	read_bw;
-	unsigned int	lines_in_det_y;
-	unsigned int	lines_in_det_y_rounded_down_to_swath;
-	double	full_det_buffering_time;
-	double	dcfclk_deepsleep_mhz_per_plane;
+struct _vcs_dpi_wm_calc_pipe_params_st {
+	unsigned int num_dpp;
+	int voltage;
+	int output_type;
+	double dcfclk_mhz;
+	double socclk_mhz;
+	double dppclk_mhz;
+	double pixclk_mhz;
+	unsigned char interlace_en;
+	unsigned char pte_enable;
+	unsigned char dcc_enable;
+	double dcc_rate;
+	double bytes_per_pixel_c;
+	double bytes_per_pixel_y;
+	unsigned int swath_width_y;
+	unsigned int swath_height_y;
+	unsigned int swath_height_c;
+	unsigned int det_buffer_size_y;
+	double h_ratio;
+	double v_ratio;
+	unsigned int h_taps;
+	unsigned int h_total;
+	unsigned int v_total;
+	unsigned int v_active;
+	unsigned int e2e_index;
+	double display_pipe_line_delivery_time;
+	double read_bw;
+	unsigned int lines_in_det_y;
+	unsigned int lines_in_det_y_rounded_down_to_swath;
+	double full_det_buffering_time;
+	double dcfclk_deepsleep_mhz_per_plane;
 };
 
-struct	_vcs_dpi_vratio_pre_st	{
-	double	vratio_pre_l;
-	double	vratio_pre_c;
+struct _vcs_dpi_vratio_pre_st {
+	double vratio_pre_l;
+	double vratio_pre_c;
 };
 
-struct	_vcs_dpi_display_data_rq_misc_params_st	{
-	unsigned int	full_swath_bytes;
-	unsigned int	stored_swath_bytes;
-	unsigned int	blk256_height;
-	unsigned int	blk256_width;
-	unsigned int	req_height;
-	unsigned int	req_width;
+struct _vcs_dpi_display_data_rq_misc_params_st {
+	unsigned int full_swath_bytes;
+	unsigned int stored_swath_bytes;
+	unsigned int blk256_height;
+	unsigned int blk256_width;
+	unsigned int req_height;
+	unsigned int req_width;
 };
 
-struct	_vcs_dpi_display_data_rq_sizing_params_st	{
-	unsigned int	chunk_bytes;
-	unsigned int	min_chunk_bytes;
-	unsigned int	meta_chunk_bytes;
-	unsigned int	min_meta_chunk_bytes;
-	unsigned int	mpte_group_bytes;
-	unsigned int	dpte_group_bytes;
+struct _vcs_dpi_display_data_rq_sizing_params_st {
+	unsigned int chunk_bytes;
+	unsigned int min_chunk_bytes;
+	unsigned int meta_chunk_bytes;
+	unsigned int min_meta_chunk_bytes;
+	unsigned int mpte_group_bytes;
+	unsigned int dpte_group_bytes;
 };
 
-struct	_vcs_dpi_display_data_rq_dlg_params_st	{
-	unsigned int	swath_width_ub;
-	unsigned int	swath_height;
-	unsigned int	req_per_swath_ub;
-	unsigned int	meta_pte_bytes_per_frame_ub;
-	unsigned int	dpte_req_per_row_ub;
-	unsigned int	dpte_groups_per_row_ub;
-	unsigned int	dpte_row_height;
-	unsigned int	dpte_bytes_per_row_ub;
-	unsigned int	meta_chunks_per_row_ub;
-	unsigned int	meta_req_per_row_ub;
-	unsigned int	meta_row_height;
-	unsigned int	meta_bytes_per_row_ub;
+struct _vcs_dpi_display_data_rq_dlg_params_st {
+	unsigned int swath_width_ub;
+	unsigned int swath_height;
+	unsigned int req_per_swath_ub;
+	unsigned int meta_pte_bytes_per_frame_ub;
+	unsigned int dpte_req_per_row_ub;
+	unsigned int dpte_groups_per_row_ub;
+	unsigned int dpte_row_height;
+	unsigned int dpte_bytes_per_row_ub;
+	unsigned int meta_chunks_per_row_ub;
+	unsigned int meta_req_per_row_ub;
+	unsigned int meta_row_height;
+	unsigned int meta_bytes_per_row_ub;
 };
 
-struct	_vcs_dpi_display_cur_rq_dlg_params_st	{
-	unsigned char	enable;
-	unsigned int	swath_height;
-	unsigned int	req_per_line;
+struct _vcs_dpi_display_cur_rq_dlg_params_st {
+	unsigned char enable;
+	unsigned int swath_height;
+	unsigned int req_per_line;
 };
 
-struct	_vcs_dpi_display_rq_dlg_params_st	{
-	display_data_rq_dlg_params_st	rq_l;
-	display_data_rq_dlg_params_st	rq_c;
-	display_cur_rq_dlg_params_st	rq_cur0;
+struct _vcs_dpi_display_rq_dlg_params_st {
+	display_data_rq_dlg_params_st rq_l;
+	display_data_rq_dlg_params_st rq_c;
+	display_cur_rq_dlg_params_st rq_cur0;
 };
 
-struct	_vcs_dpi_display_rq_sizing_params_st	{
-	display_data_rq_sizing_params_st	rq_l;
-	display_data_rq_sizing_params_st	rq_c;
+struct _vcs_dpi_display_rq_sizing_params_st {
+	display_data_rq_sizing_params_st rq_l;
+	display_data_rq_sizing_params_st rq_c;
 };
 
-struct	_vcs_dpi_display_rq_misc_params_st	{
-	display_data_rq_misc_params_st	rq_l;
-	display_data_rq_misc_params_st	rq_c;
+struct _vcs_dpi_display_rq_misc_params_st {
+	display_data_rq_misc_params_st rq_l;
+	display_data_rq_misc_params_st rq_c;
 };
 
-struct	_vcs_dpi_display_rq_params_st	{
-	unsigned char	yuv420;
-	unsigned char	yuv420_10bpc;
-	display_rq_misc_params_st	misc;
-	display_rq_sizing_params_st	sizing;
-	display_rq_dlg_params_st	dlg;
+struct _vcs_dpi_display_rq_params_st {
+	unsigned char yuv420;
+	unsigned char yuv420_10bpc;
+	display_rq_misc_params_st misc;
+	display_rq_sizing_params_st sizing;
+	display_rq_dlg_params_st dlg;
 };
 
-struct	_vcs_dpi_display_dlg_regs_st	{
-	unsigned int	refcyc_h_blank_end;
-	unsigned int	dlg_vblank_end;
-	unsigned int	min_dst_y_next_start;
-	unsigned int	refcyc_per_htotal;
-	unsigned int	refcyc_x_after_scaler;
-	unsigned int	dst_y_after_scaler;
-	unsigned int	dst_y_prefetch;
-	unsigned int	dst_y_per_vm_vblank;
-	unsigned int	dst_y_per_row_vblank;
-	unsigned int	dst_y_per_vm_flip;
-	unsigned int	dst_y_per_row_flip;
-	unsigned int	ref_freq_to_pix_freq;
-	unsigned int	vratio_prefetch;
-	unsigned int	vratio_prefetch_c;
-	unsigned int	refcyc_per_pte_group_vblank_l;
-	unsigned int	refcyc_per_pte_group_vblank_c;
-	unsigned int	refcyc_per_meta_chunk_vblank_l;
-	unsigned int	refcyc_per_meta_chunk_vblank_c;
-	unsigned int	refcyc_per_pte_group_flip_l;
-	unsigned int	refcyc_per_pte_group_flip_c;
-	unsigned int	refcyc_per_meta_chunk_flip_l;
-	unsigned int	refcyc_per_meta_chunk_flip_c;
-	unsigned int	dst_y_per_pte_row_nom_l;
-	unsigned int	dst_y_per_pte_row_nom_c;
-	unsigned int	refcyc_per_pte_group_nom_l;
-	unsigned int	refcyc_per_pte_group_nom_c;
-	unsigned int	dst_y_per_meta_row_nom_l;
-	unsigned int	dst_y_per_meta_row_nom_c;
-	unsigned int	refcyc_per_meta_chunk_nom_l;
-	unsigned int	refcyc_per_meta_chunk_nom_c;
-	unsigned int	refcyc_per_line_delivery_pre_l;
-	unsigned int	refcyc_per_line_delivery_pre_c;
-	unsigned int	refcyc_per_line_delivery_l;
-	unsigned int	refcyc_per_line_delivery_c;
-	unsigned int	chunk_hdl_adjust_cur0;
-	unsigned int	chunk_hdl_adjust_cur1;
-	unsigned int	vready_after_vcount0;
-	unsigned int	dst_y_offset_cur0;
-	unsigned int	dst_y_offset_cur1;
-	unsigned int	xfc_reg_transfer_delay;
-	unsigned int	xfc_reg_precharge_delay;
-	unsigned int	xfc_reg_remote_surface_flip_latency;
-	unsigned int	xfc_reg_prefetch_margin;
-	unsigned int	dst_y_delta_drq_limit;
+struct _vcs_dpi_display_dlg_regs_st {
+	unsigned int refcyc_h_blank_end;
+	unsigned int dlg_vblank_end;
+	unsigned int min_dst_y_next_start;
+	unsigned int refcyc_per_htotal;
+	unsigned int refcyc_x_after_scaler;
+	unsigned int dst_y_after_scaler;
+	unsigned int dst_y_prefetch;
+	unsigned int dst_y_per_vm_vblank;
+	unsigned int dst_y_per_row_vblank;
+	unsigned int dst_y_per_vm_flip;
+	unsigned int dst_y_per_row_flip;
+	unsigned int ref_freq_to_pix_freq;
+	unsigned int vratio_prefetch;
+	unsigned int vratio_prefetch_c;
+	unsigned int refcyc_per_pte_group_vblank_l;
+	unsigned int refcyc_per_pte_group_vblank_c;
+	unsigned int refcyc_per_meta_chunk_vblank_l;
+	unsigned int refcyc_per_meta_chunk_vblank_c;
+	unsigned int refcyc_per_pte_group_flip_l;
+	unsigned int refcyc_per_pte_group_flip_c;
+	unsigned int refcyc_per_meta_chunk_flip_l;
+	unsigned int refcyc_per_meta_chunk_flip_c;
+	unsigned int dst_y_per_pte_row_nom_l;
+	unsigned int dst_y_per_pte_row_nom_c;
+	unsigned int refcyc_per_pte_group_nom_l;
+	unsigned int refcyc_per_pte_group_nom_c;
+	unsigned int dst_y_per_meta_row_nom_l;
+	unsigned int dst_y_per_meta_row_nom_c;
+	unsigned int refcyc_per_meta_chunk_nom_l;
+	unsigned int refcyc_per_meta_chunk_nom_c;
+	unsigned int refcyc_per_line_delivery_pre_l;
+	unsigned int refcyc_per_line_delivery_pre_c;
+	unsigned int refcyc_per_line_delivery_l;
+	unsigned int refcyc_per_line_delivery_c;
+	unsigned int chunk_hdl_adjust_cur0;
+	unsigned int chunk_hdl_adjust_cur1;
+	unsigned int vready_after_vcount0;
+	unsigned int dst_y_offset_cur0;
+	unsigned int dst_y_offset_cur1;
+	unsigned int xfc_reg_transfer_delay;
+	unsigned int xfc_reg_precharge_delay;
+	unsigned int xfc_reg_remote_surface_flip_latency;
+	unsigned int xfc_reg_prefetch_margin;
+	unsigned int dst_y_delta_drq_limit;
 };
 
-struct	_vcs_dpi_display_ttu_regs_st	{
-	unsigned int	qos_level_low_wm;
-	unsigned int	qos_level_high_wm;
-	unsigned int	min_ttu_vblank;
-	unsigned int	qos_level_flip;
-	unsigned int	refcyc_per_req_delivery_l;
-	unsigned int	refcyc_per_req_delivery_c;
-	unsigned int	refcyc_per_req_delivery_cur0;
-	unsigned int	refcyc_per_req_delivery_cur1;
-	unsigned int	refcyc_per_req_delivery_pre_l;
-	unsigned int	refcyc_per_req_delivery_pre_c;
-	unsigned int	refcyc_per_req_delivery_pre_cur0;
-	unsigned int	refcyc_per_req_delivery_pre_cur1;
-	unsigned int	qos_level_fixed_l;
-	unsigned int	qos_level_fixed_c;
-	unsigned int	qos_level_fixed_cur0;
-	unsigned int	qos_level_fixed_cur1;
-	unsigned int	qos_ramp_disable_l;
-	unsigned int	qos_ramp_disable_c;
-	unsigned int	qos_ramp_disable_cur0;
-	unsigned int	qos_ramp_disable_cur1;
+struct _vcs_dpi_display_ttu_regs_st {
+	unsigned int qos_level_low_wm;
+	unsigned int qos_level_high_wm;
+	unsigned int min_ttu_vblank;
+	unsigned int qos_level_flip;
+	unsigned int refcyc_per_req_delivery_l;
+	unsigned int refcyc_per_req_delivery_c;
+	unsigned int refcyc_per_req_delivery_cur0;
+	unsigned int refcyc_per_req_delivery_cur1;
+	unsigned int refcyc_per_req_delivery_pre_l;
+	unsigned int refcyc_per_req_delivery_pre_c;
+	unsigned int refcyc_per_req_delivery_pre_cur0;
+	unsigned int refcyc_per_req_delivery_pre_cur1;
+	unsigned int qos_level_fixed_l;
+	unsigned int qos_level_fixed_c;
+	unsigned int qos_level_fixed_cur0;
+	unsigned int qos_level_fixed_cur1;
+	unsigned int qos_ramp_disable_l;
+	unsigned int qos_ramp_disable_c;
+	unsigned int qos_ramp_disable_cur0;
+	unsigned int qos_ramp_disable_cur1;
 };
 
-struct	_vcs_dpi_display_data_rq_regs_st	{
-	unsigned int	chunk_size;
-	unsigned int	min_chunk_size;
-	unsigned int	meta_chunk_size;
-	unsigned int	min_meta_chunk_size;
-	unsigned int	dpte_group_size;
-	unsigned int	mpte_group_size;
-	unsigned int	swath_height;
-	unsigned int	pte_row_height_linear;
+struct _vcs_dpi_display_data_rq_regs_st {
+	unsigned int chunk_size;
+	unsigned int min_chunk_size;
+	unsigned int meta_chunk_size;
+	unsigned int min_meta_chunk_size;
+	unsigned int dpte_group_size;
+	unsigned int mpte_group_size;
+	unsigned int swath_height;
+	unsigned int pte_row_height_linear;
 };
 
-struct	_vcs_dpi_display_rq_regs_st	{
-	display_data_rq_regs_st	rq_regs_l;
-	display_data_rq_regs_st	rq_regs_c;
-	unsigned int	drq_expansion_mode;
-	unsigned int	prq_expansion_mode;
-	unsigned int	mrq_expansion_mode;
-	unsigned int	crq_expansion_mode;
-	unsigned int	plane1_base_address;
+struct _vcs_dpi_display_rq_regs_st {
+	display_data_rq_regs_st rq_regs_l;
+	display_data_rq_regs_st rq_regs_c;
+	unsigned int drq_expansion_mode;
+	unsigned int prq_expansion_mode;
+	unsigned int mrq_expansion_mode;
+	unsigned int crq_expansion_mode;
+	unsigned int plane1_base_address;
 };
 
-struct	_vcs_dpi_display_dlg_sys_params_st	{
-	double	t_mclk_wm_us;
-	double	t_urg_wm_us;
-	double	t_sr_wm_us;
-	double	t_extra_us;
-	double	mem_trip_us;
-	double	t_srx_delay_us;
-	double	deepsleep_dcfclk_mhz;
-	double	total_flip_bw;
-	unsigned int	total_flip_bytes;
+struct _vcs_dpi_display_dlg_sys_params_st {
+	double t_mclk_wm_us;
+	double t_urg_wm_us;
+	double t_sr_wm_us;
+	double t_extra_us;
+	double mem_trip_us;
+	double t_srx_delay_us;
+	double deepsleep_dcfclk_mhz;
+	double total_flip_bw;
+	unsigned int total_flip_bytes;
 };
 
-struct	_vcs_dpi_display_dlg_prefetch_param_st	{
-	double	prefetch_bw;
-	unsigned int	flip_bytes;
+struct _vcs_dpi_display_dlg_prefetch_param_st {
+	double prefetch_bw;
+	unsigned int flip_bytes;
 };
 
-struct	_vcs_dpi_display_pipe_clock_st	{
-	double	dcfclk_mhz;
-	double	dispclk_mhz;
-	double	socclk_mhz;
-	double	dscclk_mhz[6];
-	double	dppclk_mhz[6];
+struct _vcs_dpi_display_pipe_clock_st {
+	double dcfclk_mhz;
+	double dispclk_mhz;
+	double socclk_mhz;
+	double dscclk_mhz[6];
+	double dppclk_mhz[6];
 };
 
-struct	_vcs_dpi_display_arb_params_st	{
-	int	max_req_outstanding;
-	int	min_req_outstanding;
-	int	sat_level_us;
+struct _vcs_dpi_display_arb_params_st {
+	int max_req_outstanding;
+	int min_req_outstanding;
+	int sat_level_us;
 };
 
 #endif /*__DISPLAY_MODE_STRUCTS_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
index f9cf083..e8ce085 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h
@@ -35,6 +35,16 @@
 	return (double) dcn_bw_min2(a, b);
 }
 
+static inline double dml_min3(double a, double b, double c)
+{
+	return dml_min(dml_min(a, b), c);
+}
+
+static inline double dml_min4(double a, double b, double c, double d)
+{
+	return dml_min(dml_min(a, b), dml_min(c, d));
+}
+
 static inline double dml_max(double a, double b)
 {
 	return (double) dcn_bw_max2(a, b);
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
index 87b580f..0caee35 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
@@ -75,6 +75,7 @@
 		return true;
 	case DCE_VERSION_11_0:
 	case DCE_VERSION_11_2:
+	case DCE_VERSION_11_22:
 		dal_hw_factory_dce110_init(factory);
 		return true;
 	case DCE_VERSION_12_0:
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
index 0ae8ace..55c7074 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
@@ -72,6 +72,7 @@
 	case DCE_VERSION_10_0:
 	case DCE_VERSION_11_0:
 	case DCE_VERSION_11_2:
+	case DCE_VERSION_11_22:
 		dal_hw_translate_dce110_init(translate);
 		return true;
 	case DCE_VERSION_12_0:
diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c
index abd0095..b7256f5 100644
--- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c
+++ b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/i2c_hw_engine_dce110.c
@@ -527,7 +527,7 @@
 	REG_GET(MICROSECOND_TIME_BASE_DIV, XTAL_REF_DIV, &xtal_ref_div);
 
 	if (xtal_ref_div == 0) {
-		DC_LOG_WARNING("Invalid base timer divider\n",
+		DC_LOG_WARNING("Invalid base timer divider [%s]\n",
 				__func__);
 		xtal_ref_div = 2;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c b/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c
index 5cbf662..14dc8c9 100644
--- a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c
+++ b/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c
@@ -83,6 +83,7 @@
 	case DCE_VERSION_8_3:
 		return dal_i2caux_dce80_create(ctx);
 	case DCE_VERSION_11_2:
+	case DCE_VERSION_11_22:
 		return dal_i2caux_dce112_create(ctx);
 	case DCE_VERSION_11_0:
 		return dal_i2caux_dce110_create(ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 8c51ad7..a94942d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -95,11 +95,6 @@
 	struct link_encoder *(*link_enc_create)(
 			const struct encoder_init_data *init);
 
-	enum dc_status (*validate_guaranteed)(
-					struct dc *dc,
-					struct dc_stream_state *stream,
-					struct dc_state *context);
-
 	bool (*validate_bandwidth)(
 					struct dc *dc,
 					struct dc_state *context);
@@ -250,6 +245,7 @@
 	bool all_displays_in_sync;
 	struct dce_watermarks urgent_wm_ns[MAX_PIPES];
 	struct dce_watermarks stutter_exit_wm_ns[MAX_PIPES];
+	struct dce_watermarks stutter_entry_wm_ns[MAX_PIPES];
 	struct dce_watermarks nbp_state_change_wm_ns[MAX_PIPES];
 	int sclk_khz;
 	int sclk_deep_sleep_khz;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h
index a9bfe9f..eece165 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dce_calcs.h
@@ -42,6 +42,8 @@
 	BW_CALCS_VERSION_CARRIZO,
 	BW_CALCS_VERSION_POLARIS10,
 	BW_CALCS_VERSION_POLARIS11,
+	BW_CALCS_VERSION_POLARIS12,
+	BW_CALCS_VERSION_VEGAM,
 	BW_CALCS_VERSION_STONEY,
 	BW_CALCS_VERSION_VEGA10
 };
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
new file mode 100644
index 0000000..02f757d
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2012-15 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DAL_DCHUBBUB_H__
+#define __DAL_DCHUBBUB_H__
+
+
+enum dcc_control {
+	dcc_control__256_256_xxx,
+	dcc_control__128_128_xxx,
+	dcc_control__256_64_64,
+};
+
+enum segment_order {
+	segment_order__na,
+	segment_order__contiguous,
+	segment_order__non_contiguous,
+};
+
+
+struct hubbub_funcs {
+	void (*update_dchub)(
+			struct hubbub *hubbub,
+			struct dchub_init_data *dh_data);
+
+	bool (*get_dcc_compression_cap)(struct hubbub *hubbub,
+			const struct dc_dcc_surface_param *input,
+			struct dc_surface_dcc_cap *output);
+
+	bool (*dcc_support_swizzle)(
+			enum swizzle_mode_values swizzle,
+			unsigned int bytes_per_element,
+			enum segment_order *segment_order_horz,
+			enum segment_order *segment_order_vert);
+
+	bool (*dcc_support_pixel_format)(
+			enum surface_pixel_format format,
+			unsigned int *bytes_per_element);
+};
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
index 9999560..582458f 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
@@ -44,7 +44,23 @@
 	enum graphics_gamut_adjust_type gamut_adjust_type;
 };
 
+struct dcn_dpp_state {
+	uint32_t igam_lut_mode;
+	uint32_t igam_input_format;
+	uint32_t dgam_lut_mode;
+	uint32_t rgam_lut_mode;
+	uint32_t gamut_remap_mode;
+	uint32_t gamut_remap_c11_c12;
+	uint32_t gamut_remap_c13_c14;
+	uint32_t gamut_remap_c21_c22;
+	uint32_t gamut_remap_c23_c24;
+	uint32_t gamut_remap_c31_c32;
+	uint32_t gamut_remap_c33_c34;
+};
+
 struct dpp_funcs {
+	void (*dpp_read_state)(struct dpp *dpp, struct dcn_dpp_state *s);
+
 	void (*dpp_reset)(struct dpp *dpp);
 
 	void (*dpp_set_scaler)(struct dpp *dpp,
@@ -117,7 +133,7 @@
 			struct dpp *dpp_base,
 			enum surface_pixel_format format,
 			enum expansion_mode mode,
-			struct csc_transform input_csc_color_matrix,
+			struct dc_csc_transform input_csc_color_matrix,
 			enum dc_color_space input_color_space);
 
 	void (*dpp_full_bypass)(struct dpp *dpp_base);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
index 9ced254..97df82c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
@@ -56,7 +56,6 @@
 	bool power_gated;
 };
 
-
 struct hubp_funcs {
 	void (*hubp_setup)(
 			struct hubp *hubp,
@@ -121,6 +120,9 @@
 
 	void (*hubp_clk_cntl)(struct hubp *hubp, bool enable);
 	void (*hubp_vtg_sel)(struct hubp *hubp, uint32_t otg_inst);
+	void (*hubp_read_state)(struct hubp *hubp);
+	void (*hubp_disable_control)(struct hubp *hubp, bool disable_hubp);
+	unsigned int (*hubp_get_underflow_status)(struct hubp *hubp);
 
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
index b221581..cf7433e 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
@@ -140,11 +140,6 @@
 	OPP_REGAMMA_USER
 };
 
-struct csc_transform {
-	uint16_t matrix[12];
-	bool enable_adjustment;
-};
-
 struct dc_bias_and_scale {
 	uint16_t scale_red;
 	uint16_t bias_red;
@@ -191,4 +186,9 @@
 	CONTROLLER_DP_TEST_PATTERN_COLORSQUARES_CEA
 };
 
+enum dc_lut_mode {
+	LUT_BYPASS,
+	LUT_RAM_A,
+	LUT_RAM_B
+};
 #endif /* __DAL_HW_SHARED_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h
index 2109eac..b2fa4c4 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/ipp.h
@@ -87,7 +87,7 @@
 		struct input_pixel_processor *ipp,
 		enum surface_pixel_format format,
 		enum expansion_mode mode,
-		struct csc_transform input_csc_color_matrix,
+		struct dc_csc_transform input_csc_color_matrix,
 		enum dc_color_space input_color_space);
 
 	/* DCE function to setup IPP.  TODO: see if we can consolidate to setup */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
index 54d8a13..cf6df2e 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h
@@ -149,6 +149,7 @@
 		bool connect);
 	void (*enable_hpd)(struct link_encoder *enc);
 	void (*disable_hpd)(struct link_encoder *enc);
+	bool (*is_dig_enabled)(struct link_encoder *enc);
 	void (*destroy)(struct link_encoder **enc);
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
index 3e1e7e6..47f1dc5 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h
@@ -104,6 +104,7 @@
 		struct mem_input *mem_input,
 		struct dce_watermarks nbp,
 		struct dce_watermarks stutter,
+		struct dce_watermarks stutter_enter,
 		struct dce_watermarks urgent,
 		uint32_t total_dest_line_time_ns);
 
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
index 23a8d5e..caf74e3 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h
@@ -105,7 +105,24 @@
 	struct mpcc mpcc_array[MAX_MPCC];
 };
 
+struct mpcc_state {
+	uint32_t opp_id;
+	uint32_t dpp_id;
+	uint32_t bot_mpcc_id;
+	uint32_t mode;
+	uint32_t alpha_mode;
+	uint32_t pre_multiplied_alpha;
+	uint32_t overlap_only;
+	uint32_t idle;
+	uint32_t busy;
+};
+
 struct mpc_funcs {
+	void (*read_mpcc_state)(
+			struct mpc *mpc,
+			int mpcc_inst,
+			struct mpcc_state *s);
+
 	/*
 	 * Insert DPP into MPC tree based on specified blending position.
 	 * Only used for planes that are part of blending chain for OPP output
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
index b5db169..cfa7ec95 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
@@ -29,31 +29,40 @@
 #define STREAM_ENCODER_H_
 
 #include "audio_types.h"
+#include "hw_shared.h"
 
 struct dc_bios;
 struct dc_context;
 struct dc_crtc_timing;
 
-struct encoder_info_packet {
-	bool valid;
-	uint8_t hb0;
-	uint8_t hb1;
-	uint8_t hb2;
-	uint8_t hb3;
-	uint8_t sb[32];
+enum dp_pixel_encoding_type {
+	DP_PIXEL_ENCODING_TYPE_RGB444		= 0x00000000,
+	DP_PIXEL_ENCODING_TYPE_YCBCR422		= 0x00000001,
+	DP_PIXEL_ENCODING_TYPE_YCBCR444		= 0x00000002,
+	DP_PIXEL_ENCODING_TYPE_RGB_WIDE_GAMUT	= 0x00000003,
+	DP_PIXEL_ENCODING_TYPE_Y_ONLY		= 0x00000004,
+	DP_PIXEL_ENCODING_TYPE_YCBCR420		= 0x00000005
+};
+
+enum dp_component_depth {
+	DP_COMPONENT_PIXEL_DEPTH_6BPC		= 0x00000000,
+	DP_COMPONENT_PIXEL_DEPTH_8BPC		= 0x00000001,
+	DP_COMPONENT_PIXEL_DEPTH_10BPC		= 0x00000002,
+	DP_COMPONENT_PIXEL_DEPTH_12BPC		= 0x00000003,
+	DP_COMPONENT_PIXEL_DEPTH_16BPC		= 0x00000004
 };
 
 struct encoder_info_frame {
 	/* auxiliary video information */
-	struct encoder_info_packet avi;
-	struct encoder_info_packet gamut;
-	struct encoder_info_packet vendor;
+	struct dc_info_packet avi;
+	struct dc_info_packet gamut;
+	struct dc_info_packet vendor;
 	/* source product description */
-	struct encoder_info_packet spd;
+	struct dc_info_packet spd;
 	/* video stream configuration */
-	struct encoder_info_packet vsc;
+	struct dc_info_packet vsc;
 	/* HDR Static MetaData */
-	struct encoder_info_packet hdrsmd;
+	struct dc_info_packet hdrsmd;
 };
 
 struct encoder_unblank_param {
@@ -147,6 +156,7 @@
 
 	void (*set_avmute)(
 		struct stream_encoder *enc, bool enable);
+
 };
 
 #endif /* STREAM_ENCODER_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index 3217b5b..69cb0a1 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -140,6 +140,9 @@
 	void (*program_timing)(struct timing_generator *tg,
 							const struct dc_crtc_timing *timing,
 							bool use_vbios);
+	void (*program_vline_interrupt)(struct timing_generator *optc,
+			const struct dc_crtc_timing *dc_crtc_timing,
+			unsigned long long vsync_delta);
 	bool (*enable_crtc)(struct timing_generator *tg);
 	bool (*disable_crtc)(struct timing_generator *tg);
 	bool (*is_counter_moving)(struct timing_generator *tg);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
index c5b3623..fecc80c 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
@@ -252,7 +252,7 @@
 			struct transform *xfm_base,
 			enum surface_pixel_format format,
 			enum expansion_mode mode,
-			struct csc_transform input_csc_color_matrix,
+			struct dc_csc_transform input_csc_color_matrix,
 			enum dc_color_space input_color_space);
 
 	void (*ipp_full_bypass)(struct transform *xfm_base);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index e764cba..63fc6c4 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -32,6 +32,8 @@
 #include "inc/hw/link_encoder.h"
 #include "core_status.h"
 
+#define EDP_BACKLIGHT_RAMP_DISABLE_LEVEL 0xFFFFFFFF
+
 enum pipe_gating_control {
 	PIPE_GATING_CONTROL_DISABLE = 0,
 	PIPE_GATING_CONTROL_ENABLE,
@@ -63,6 +65,7 @@
 struct dc_static_screen_events;
 struct resource_pool;
 struct resource_context;
+struct stream_resource;
 
 struct hw_sequencer_funcs {
 
@@ -80,11 +83,6 @@
 			int num_planes,
 			struct dc_state *context);
 
-	void (*set_plane_config)(
-			const struct dc *dc,
-			struct pipe_ctx *pipe_ctx,
-			struct resource_context *res_ctx);
-
 	void (*program_gamut_remap)(
 			struct pipe_ctx *pipe_ctx);
 
@@ -93,6 +91,12 @@
 			enum dc_color_space colorspace,
 			uint16_t *matrix);
 
+	void (*program_output_csc)(struct dc *dc,
+			struct pipe_ctx *pipe_ctx,
+			enum dc_color_space colorspace,
+			uint16_t *matrix,
+			int opp_id);
+
 	void (*update_plane_addr)(
 		const struct dc *dc,
 		struct pipe_ctx *pipe_ctx);
@@ -154,6 +158,11 @@
 				struct dc *dc,
 				struct pipe_ctx *pipe,
 				bool lock);
+	void (*blank_pixel_data)(
+			struct dc *dc,
+			struct stream_resource *stream_res,
+			struct dc_stream_state *stream,
+			bool blank);
 
 	void (*set_bandwidth)(
 			struct dc *dc,
@@ -169,7 +178,7 @@
 	void (*set_static_screen_control)(struct pipe_ctx **pipe_ctx,
 			int num_pipes, const struct dc_static_screen_events *events);
 
-	enum dc_status (*prog_pixclk_crtc_otg)(
+	enum dc_status (*enable_stream_timing)(
 			struct pipe_ctx *pipe_ctx,
 			struct dc_state *context,
 			struct dc *dc);
@@ -201,6 +210,7 @@
 
 	void (*set_cursor_position)(struct pipe_ctx *pipe);
 	void (*set_cursor_attribute)(struct pipe_ctx *pipe);
+
 };
 
 void color_space_to_black_color(
diff --git a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
index 77eb728..3306e7b 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/reg_helper.h
@@ -183,6 +183,36 @@
 				FN(reg_name, f4), v4, \
 				FN(reg_name, f5), v5)
 
+#define REG_GET_6(reg_name, f1, v1, f2, v2, f3, v3, f4, v4, f5, v5, f6, v6)	\
+		generic_reg_get6(CTX, REG(reg_name), \
+				FN(reg_name, f1), v1, \
+				FN(reg_name, f2), v2, \
+				FN(reg_name, f3), v3, \
+				FN(reg_name, f4), v4, \
+				FN(reg_name, f5), v5, \
+				FN(reg_name, f6), v6)
+
+#define REG_GET_7(reg_name, f1, v1, f2, v2, f3, v3, f4, v4, f5, v5, f6, v6, f7, v7)	\
+		generic_reg_get7(CTX, REG(reg_name), \
+				FN(reg_name, f1), v1, \
+				FN(reg_name, f2), v2, \
+				FN(reg_name, f3), v3, \
+				FN(reg_name, f4), v4, \
+				FN(reg_name, f5), v5, \
+				FN(reg_name, f6), v6, \
+				FN(reg_name, f7), v7)
+
+#define REG_GET_8(reg_name, f1, v1, f2, v2, f3, v3, f4, v4, f5, v5, f6, v6, f7, v7, f8, v8)	\
+		generic_reg_get8(CTX, REG(reg_name), \
+				FN(reg_name, f1), v1, \
+				FN(reg_name, f2), v2, \
+				FN(reg_name, f3), v3, \
+				FN(reg_name, f4), v4, \
+				FN(reg_name, f5), v5, \
+				FN(reg_name, f6), v6, \
+				FN(reg_name, f7), v7, \
+				FN(reg_name, f8), v8)
+
 /* macro to poll and wait for a register field to read back given value */
 
 #define REG_WAIT(reg_name, field, val, delay_between_poll_us, max_try)	\
@@ -389,4 +419,30 @@
 		uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
 		uint8_t shift5, uint32_t mask5, uint32_t *field_value5);
 
+uint32_t generic_reg_get6(const struct dc_context *ctx, uint32_t addr,
+		uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+		uint8_t shift2, uint32_t mask2, uint32_t *field_value2,
+		uint8_t shift3, uint32_t mask3, uint32_t *field_value3,
+		uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
+		uint8_t shift5, uint32_t mask5, uint32_t *field_value5,
+		uint8_t shift6, uint32_t mask6, uint32_t *field_value6);
+
+uint32_t generic_reg_get7(const struct dc_context *ctx, uint32_t addr,
+		uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+		uint8_t shift2, uint32_t mask2, uint32_t *field_value2,
+		uint8_t shift3, uint32_t mask3, uint32_t *field_value3,
+		uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
+		uint8_t shift5, uint32_t mask5, uint32_t *field_value5,
+		uint8_t shift6, uint32_t mask6, uint32_t *field_value6,
+		uint8_t shift7, uint32_t mask7, uint32_t *field_value7);
+
+uint32_t generic_reg_get8(const struct dc_context *ctx, uint32_t addr,
+		uint8_t shift1, uint32_t mask1, uint32_t *field_value1,
+		uint8_t shift2, uint32_t mask2, uint32_t *field_value2,
+		uint8_t shift3, uint32_t mask3, uint32_t *field_value3,
+		uint8_t shift4, uint32_t mask4, uint32_t *field_value4,
+		uint8_t shift5, uint32_t mask5, uint32_t *field_value5,
+		uint8_t shift6, uint32_t mask6, uint32_t *field_value6,
+		uint8_t shift7, uint32_t mask7, uint32_t *field_value7,
+		uint8_t shift8, uint32_t mask8, uint32_t *field_value8);
 #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_REG_HELPER_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h
index 5467332f..640a647 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/resource.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h
@@ -139,10 +139,6 @@
 		struct dc_state *context,
 		const struct resource_pool *pool);
 
-void validate_guaranteed_copy_streams(
-		struct dc_state *context,
-		int max_streams);
-
 void resource_validate_ctx_update_pointer_after_copy(
 		const struct dc_state *src_ctx,
 		struct dc_state *dst_ctx);
diff --git a/drivers/gpu/drm/amd/display/dc/irq_types.h b/drivers/gpu/drm/amd/display/dc/irq_types.h
index a506c2e..0b5f3a2 100644
--- a/drivers/gpu/drm/amd/display/dc/irq_types.h
+++ b/drivers/gpu/drm/amd/display/dc/irq_types.h
@@ -26,6 +26,8 @@
 #ifndef __DAL_IRQ_TYPES_H__
 #define __DAL_IRQ_TYPES_H__
 
+#include "os_types.h"
+
 struct dc_context;
 
 typedef void (*interrupt_handler)(void *);
@@ -135,6 +137,13 @@
 	DC_IRQ_SOURCE_VBLANK5,
 	DC_IRQ_SOURCE_VBLANK6,
 
+	DC_IRQ_SOURCE_DC1_VLINE0,
+	DC_IRQ_SOURCE_DC2_VLINE0,
+	DC_IRQ_SOURCE_DC3_VLINE0,
+	DC_IRQ_SOURCE_DC4_VLINE0,
+	DC_IRQ_SOURCE_DC5_VLINE0,
+	DC_IRQ_SOURCE_DC6_VLINE0,
+
 	DAL_IRQ_SOURCES_NUMBER
 };
 
diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
index 9b0a04f..25029ed 100644
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -86,6 +86,7 @@
 #define VI_POLARIS10_P_A0 80
 #define VI_POLARIS11_M_A0 90
 #define VI_POLARIS12_V_A0 100
+#define VI_VEGAM_A0 110
 
 #define VI_UNKNOWN 0xFF
 
@@ -98,7 +99,9 @@
 		(eChipRev < VI_POLARIS11_M_A0))
 #define ASIC_REV_IS_POLARIS11_M(eChipRev) ((eChipRev >= VI_POLARIS11_M_A0) &&  \
 		(eChipRev < VI_POLARIS12_V_A0))
-#define ASIC_REV_IS_POLARIS12_V(eChipRev) (eChipRev >= VI_POLARIS12_V_A0)
+#define ASIC_REV_IS_POLARIS12_V(eChipRev) ((eChipRev >= VI_POLARIS12_V_A0) && \
+		(eChipRev < VI_VEGAM_A0))
+#define ASIC_REV_IS_VEGAM(eChipRev) (eChipRev >= VI_VEGAM_A0)
 
 /* DCE11 */
 #define CZ_CARRIZO_A0 0x01
@@ -110,17 +113,19 @@
 	((rev >= STONEY_A0) && (rev < CZ_UNKNOWN))
 
 /* DCE12 */
+#define AI_UNKNOWN 0xFF
 
 #define AI_GREENLAND_P_A0 1
 #define AI_GREENLAND_P_A1 2
 #define AI_UNKNOWN 0xFF
 
 #define AI_VEGA12_P_A0 20
+#define AI_VEGA20_P_A0 40
 #define ASICREV_IS_GREENLAND_M(eChipRev)  (eChipRev < AI_VEGA12_P_A0)
 #define ASICREV_IS_GREENLAND_P(eChipRev)  (eChipRev < AI_VEGA12_P_A0)
 
-#define ASICREV_IS_VEGA12_P(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_UNKNOWN))
-#define ASICREV_IS_VEGA12_p(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_UNKNOWN))
+#define ASICREV_IS_VEGA12_P(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_VEGA20_P_A0))
+#define ASICREV_IS_VEGA20_P(eChipRev) ((eChipRev >= AI_VEGA20_P_A0) && (eChipRev < AI_UNKNOWN))
 
 /* DCN1_0 */
 #define INTERNAL_REV_RAVEN_A0             0x00    /* First spin of Raven */
diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h
index fa54396..840142b 100644
--- a/drivers/gpu/drm/amd/display/include/dal_types.h
+++ b/drivers/gpu/drm/amd/display/include/dal_types.h
@@ -40,6 +40,7 @@
 	DCE_VERSION_10_0,
 	DCE_VERSION_11_0,
 	DCE_VERSION_11_2,
+	DCE_VERSION_11_22,
 	DCE_VERSION_12_0,
 	DCE_VERSION_MAX,
 	DCN_VERSION_1_0,
diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h
index 0de2586..bb0d4eb 100644
--- a/drivers/gpu/drm/amd/display/include/fixed31_32.h
+++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h
@@ -26,9 +26,13 @@
 #ifndef __DAL_FIXED31_32_H__
 #define __DAL_FIXED31_32_H__
 
-#include "os_types.h"
-
 #define FIXED31_32_BITS_PER_FRACTIONAL_PART 32
+#ifndef LLONG_MIN
+#define LLONG_MIN (1LL<<63)
+#endif
+#ifndef LLONG_MAX
+#define LLONG_MAX (-1LL>>1)
+#endif
 
 /*
  * @brief
@@ -44,24 +48,25 @@
  */
 
 struct fixed31_32 {
-	int64_t value;
+	long long value;
 };
 
+
 /*
  * @brief
  * Useful constants
  */
 
-static const struct fixed31_32 dal_fixed31_32_zero = { 0 };
-static const struct fixed31_32 dal_fixed31_32_epsilon = { 1LL };
-static const struct fixed31_32 dal_fixed31_32_half = { 0x80000000LL };
-static const struct fixed31_32 dal_fixed31_32_one = { 0x100000000LL };
+static const struct fixed31_32 dc_fixpt_zero = { 0 };
+static const struct fixed31_32 dc_fixpt_epsilon = { 1LL };
+static const struct fixed31_32 dc_fixpt_half = { 0x80000000LL };
+static const struct fixed31_32 dc_fixpt_one = { 0x100000000LL };
 
-static const struct fixed31_32 dal_fixed31_32_pi = { 13493037705LL };
-static const struct fixed31_32 dal_fixed31_32_two_pi = { 26986075409LL };
-static const struct fixed31_32 dal_fixed31_32_e = { 11674931555LL };
-static const struct fixed31_32 dal_fixed31_32_ln2 = { 2977044471LL };
-static const struct fixed31_32 dal_fixed31_32_ln2_div_2 = { 1488522236LL };
+static const struct fixed31_32 dc_fixpt_pi = { 13493037705LL };
+static const struct fixed31_32 dc_fixpt_two_pi = { 26986075409LL };
+static const struct fixed31_32 dc_fixpt_e = { 11674931555LL };
+static const struct fixed31_32 dc_fixpt_ln2 = { 2977044471LL };
+static const struct fixed31_32 dc_fixpt_ln2_div_2 = { 1488522236LL };
 
 /*
  * @brief
@@ -72,24 +77,19 @@
  * @brief
  * result = numerator / denominator
  */
-struct fixed31_32 dal_fixed31_32_from_fraction(
-	int64_t numerator,
-	int64_t denominator);
+struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator);
 
 /*
  * @brief
  * result = arg
  */
-struct fixed31_32 dal_fixed31_32_from_int_nonconst(int64_t arg);
-static inline struct fixed31_32 dal_fixed31_32_from_int(int64_t arg)
+static inline struct fixed31_32 dc_fixpt_from_int(int arg)
 {
-	if (__builtin_constant_p(arg)) {
-		struct fixed31_32 res;
-		BUILD_BUG_ON((LONG_MIN > arg) || (arg > LONG_MAX));
-		res.value = arg << FIXED31_32_BITS_PER_FRACTIONAL_PART;
-		return res;
-	} else
-		return dal_fixed31_32_from_int_nonconst(arg);
+	struct fixed31_32 res;
+
+	res.value = (long long) arg << FIXED31_32_BITS_PER_FRACTIONAL_PART;
+
+	return res;
 }
 
 /*
@@ -101,7 +101,7 @@
  * @brief
  * result = -arg
  */
-static inline struct fixed31_32 dal_fixed31_32_neg(struct fixed31_32 arg)
+static inline struct fixed31_32 dc_fixpt_neg(struct fixed31_32 arg)
 {
 	struct fixed31_32 res;
 
@@ -114,10 +114,10 @@
  * @brief
  * result = abs(arg) := (arg >= 0) ? arg : -arg
  */
-static inline struct fixed31_32 dal_fixed31_32_abs(struct fixed31_32 arg)
+static inline struct fixed31_32 dc_fixpt_abs(struct fixed31_32 arg)
 {
 	if (arg.value < 0)
-		return dal_fixed31_32_neg(arg);
+		return dc_fixpt_neg(arg);
 	else
 		return arg;
 }
@@ -131,8 +131,7 @@
  * @brief
  * result = arg1 < arg2
  */
-static inline bool dal_fixed31_32_lt(struct fixed31_32 arg1,
-				     struct fixed31_32 arg2)
+static inline bool dc_fixpt_lt(struct fixed31_32 arg1, struct fixed31_32 arg2)
 {
 	return arg1.value < arg2.value;
 }
@@ -141,8 +140,7 @@
  * @brief
  * result = arg1 <= arg2
  */
-static inline bool dal_fixed31_32_le(struct fixed31_32 arg1,
-				     struct fixed31_32 arg2)
+static inline bool dc_fixpt_le(struct fixed31_32 arg1, struct fixed31_32 arg2)
 {
 	return arg1.value <= arg2.value;
 }
@@ -151,8 +149,7 @@
  * @brief
  * result = arg1 == arg2
  */
-static inline bool dal_fixed31_32_eq(struct fixed31_32 arg1,
-				     struct fixed31_32 arg2)
+static inline bool dc_fixpt_eq(struct fixed31_32 arg1, struct fixed31_32 arg2)
 {
 	return arg1.value == arg2.value;
 }
@@ -161,8 +158,7 @@
  * @brief
  * result = min(arg1, arg2) := (arg1 <= arg2) ? arg1 : arg2
  */
-static inline struct fixed31_32 dal_fixed31_32_min(struct fixed31_32 arg1,
-						   struct fixed31_32 arg2)
+static inline struct fixed31_32 dc_fixpt_min(struct fixed31_32 arg1, struct fixed31_32 arg2)
 {
 	if (arg1.value <= arg2.value)
 		return arg1;
@@ -174,8 +170,7 @@
  * @brief
  * result = max(arg1, arg2) := (arg1 <= arg2) ? arg2 : arg1
  */
-static inline struct fixed31_32 dal_fixed31_32_max(struct fixed31_32 arg1,
-						   struct fixed31_32 arg2)
+static inline struct fixed31_32 dc_fixpt_max(struct fixed31_32 arg1, struct fixed31_32 arg2)
 {
 	if (arg1.value <= arg2.value)
 		return arg2;
@@ -189,14 +184,14 @@
  * result = | arg, when min_value < arg < max_value
  *          | max_value, when arg >= max_value
  */
-static inline struct fixed31_32 dal_fixed31_32_clamp(
+static inline struct fixed31_32 dc_fixpt_clamp(
 	struct fixed31_32 arg,
 	struct fixed31_32 min_value,
 	struct fixed31_32 max_value)
 {
-	if (dal_fixed31_32_le(arg, min_value))
+	if (dc_fixpt_le(arg, min_value))
 		return min_value;
-	else if (dal_fixed31_32_le(max_value, arg))
+	else if (dc_fixpt_le(max_value, arg))
 		return max_value;
 	else
 		return arg;
@@ -211,21 +206,30 @@
  * @brief
  * result = arg << shift
  */
-struct fixed31_32 dal_fixed31_32_shl(
-	struct fixed31_32 arg,
-	uint8_t shift);
+static inline struct fixed31_32 dc_fixpt_shl(struct fixed31_32 arg, unsigned char shift)
+{
+	ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) ||
+		((arg.value < 0) && (arg.value >= ~(LLONG_MAX >> shift))));
+
+	arg.value = arg.value << shift;
+
+	return arg;
+}
 
 /*
  * @brief
  * result = arg >> shift
  */
-static inline struct fixed31_32 dal_fixed31_32_shr(
-	struct fixed31_32 arg,
-	uint8_t shift)
+static inline struct fixed31_32 dc_fixpt_shr(struct fixed31_32 arg, unsigned char shift)
 {
-	struct fixed31_32 res;
-	res.value = arg.value >> shift;
-	return res;
+	bool negative = arg.value < 0;
+
+	if (negative)
+		arg.value = -arg.value;
+	arg.value = arg.value >> shift;
+	if (negative)
+		arg.value = -arg.value;
+	return arg;
 }
 
 /*
@@ -237,38 +241,50 @@
  * @brief
  * result = arg1 + arg2
  */
-struct fixed31_32 dal_fixed31_32_add(
-	struct fixed31_32 arg1,
-	struct fixed31_32 arg2);
+static inline struct fixed31_32 dc_fixpt_add(struct fixed31_32 arg1, struct fixed31_32 arg2)
+{
+	struct fixed31_32 res;
+
+	ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) ||
+		((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value)));
+
+	res.value = arg1.value + arg2.value;
+
+	return res;
+}
 
 /*
  * @brief
  * result = arg1 + arg2
  */
-static inline struct fixed31_32 dal_fixed31_32_add_int(struct fixed31_32 arg1,
-						       int32_t arg2)
+static inline struct fixed31_32 dc_fixpt_add_int(struct fixed31_32 arg1, int arg2)
 {
-	return dal_fixed31_32_add(arg1,
-				  dal_fixed31_32_from_int(arg2));
+	return dc_fixpt_add(arg1, dc_fixpt_from_int(arg2));
 }
 
 /*
  * @brief
  * result = arg1 - arg2
  */
-struct fixed31_32 dal_fixed31_32_sub(
-	struct fixed31_32 arg1,
-	struct fixed31_32 arg2);
+static inline struct fixed31_32 dc_fixpt_sub(struct fixed31_32 arg1, struct fixed31_32 arg2)
+{
+	struct fixed31_32 res;
+
+	ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) ||
+		((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value)));
+
+	res.value = arg1.value - arg2.value;
+
+	return res;
+}
 
 /*
  * @brief
  * result = arg1 - arg2
  */
-static inline struct fixed31_32 dal_fixed31_32_sub_int(struct fixed31_32 arg1,
-						       int32_t arg2)
+static inline struct fixed31_32 dc_fixpt_sub_int(struct fixed31_32 arg1, int arg2)
 {
-	return dal_fixed31_32_sub(arg1,
-				  dal_fixed31_32_from_int(arg2));
+	return dc_fixpt_sub(arg1, dc_fixpt_from_int(arg2));
 }
 
 
@@ -281,49 +297,40 @@
  * @brief
  * result = arg1 * arg2
  */
-struct fixed31_32 dal_fixed31_32_mul(
-	struct fixed31_32 arg1,
-	struct fixed31_32 arg2);
+struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2);
 
 
 /*
  * @brief
  * result = arg1 * arg2
  */
-static inline struct fixed31_32 dal_fixed31_32_mul_int(struct fixed31_32 arg1,
-						       int32_t arg2)
+static inline struct fixed31_32 dc_fixpt_mul_int(struct fixed31_32 arg1, int arg2)
 {
-	return dal_fixed31_32_mul(arg1,
-				  dal_fixed31_32_from_int(arg2));
+	return dc_fixpt_mul(arg1, dc_fixpt_from_int(arg2));
 }
 
 /*
  * @brief
  * result = square(arg) := arg * arg
  */
-struct fixed31_32 dal_fixed31_32_sqr(
-	struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg);
 
 /*
  * @brief
  * result = arg1 / arg2
  */
-static inline struct fixed31_32 dal_fixed31_32_div_int(struct fixed31_32 arg1,
-						       int64_t arg2)
+static inline struct fixed31_32 dc_fixpt_div_int(struct fixed31_32 arg1, long long arg2)
 {
-	return dal_fixed31_32_from_fraction(arg1.value,
-					    dal_fixed31_32_from_int(arg2).value);
+	return dc_fixpt_from_fraction(arg1.value, dc_fixpt_from_int(arg2).value);
 }
 
 /*
  * @brief
  * result = arg1 / arg2
  */
-static inline struct fixed31_32 dal_fixed31_32_div(struct fixed31_32 arg1,
-						   struct fixed31_32 arg2)
+static inline struct fixed31_32 dc_fixpt_div(struct fixed31_32 arg1, struct fixed31_32 arg2)
 {
-	return dal_fixed31_32_from_fraction(arg1.value,
-					    arg2.value);
+	return dc_fixpt_from_fraction(arg1.value, arg2.value);
 }
 
 /*
@@ -338,8 +345,7 @@
  * @note
  * No special actions taken in case argument is zero.
  */
-struct fixed31_32 dal_fixed31_32_recip(
-	struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg);
 
 /*
  * @brief
@@ -354,8 +360,7 @@
  * Argument specified in radians,
  * internally it's normalized to [-2pi...2pi] range.
  */
-struct fixed31_32 dal_fixed31_32_sinc(
-	struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg);
 
 /*
  * @brief
@@ -365,8 +370,7 @@
  * Argument specified in radians,
  * internally it's normalized to [-2pi...2pi] range.
  */
-struct fixed31_32 dal_fixed31_32_sin(
-	struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg);
 
 /*
  * @brief
@@ -378,8 +382,7 @@
  * passing arguments outside that range
  * will cause incorrect result!
  */
-struct fixed31_32 dal_fixed31_32_cos(
-	struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg);
 
 /*
  * @brief
@@ -393,8 +396,7 @@
  * @note
  * Currently, function is verified for abs(arg) <= 1.
  */
-struct fixed31_32 dal_fixed31_32_exp(
-	struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg);
 
 /*
  * @brief
@@ -406,8 +408,7 @@
  * Currently, no special actions taken
  * in case of invalid argument(s). Take care!
  */
-struct fixed31_32 dal_fixed31_32_log(
-	struct fixed31_32 arg);
+struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg);
 
 /*
  * @brief
@@ -421,9 +422,13 @@
  * @note
  * Currently, abs(arg1) should be less than 1. Take care!
  */
-struct fixed31_32 dal_fixed31_32_pow(
-	struct fixed31_32 arg1,
-	struct fixed31_32 arg2);
+static inline struct fixed31_32 dc_fixpt_pow(struct fixed31_32 arg1, struct fixed31_32 arg2)
+{
+	return dc_fixpt_exp(
+		dc_fixpt_mul(
+			dc_fixpt_log(arg1),
+			arg2));
+}
 
 /*
  * @brief
@@ -434,22 +439,56 @@
  * @brief
  * result = floor(arg) := greatest integer lower than or equal to arg
  */
-int32_t dal_fixed31_32_floor(
-	struct fixed31_32 arg);
+static inline int dc_fixpt_floor(struct fixed31_32 arg)
+{
+	unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value;
+
+	if (arg.value >= 0)
+		return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+	else
+		return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+}
 
 /*
  * @brief
  * result = round(arg) := integer nearest to arg
  */
-int32_t dal_fixed31_32_round(
-	struct fixed31_32 arg);
+static inline int dc_fixpt_round(struct fixed31_32 arg)
+{
+	unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value;
+
+	const long long summand = dc_fixpt_half.value;
+
+	ASSERT(LLONG_MAX - (long long)arg_value >= summand);
+
+	arg_value += summand;
+
+	if (arg.value >= 0)
+		return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+	else
+		return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+}
 
 /*
  * @brief
  * result = ceil(arg) := lowest integer greater than or equal to arg
  */
-int32_t dal_fixed31_32_ceil(
-	struct fixed31_32 arg);
+static inline int dc_fixpt_ceil(struct fixed31_32 arg)
+{
+	unsigned long long arg_value = arg.value > 0 ? arg.value : -arg.value;
+
+	const long long summand = dc_fixpt_one.value -
+		dc_fixpt_epsilon.value;
+
+	ASSERT(LLONG_MAX - (long long)arg_value >= summand);
+
+	arg_value += summand;
+
+	if (arg.value >= 0)
+		return (int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+	else
+		return -(int)(arg_value >> FIXED31_32_BITS_PER_FRACTIONAL_PART);
+}
 
 /* the following two function are used in scaler hw programming to convert fixed
  * point value to format 2 bits from integer part and 19 bits from fractional
@@ -457,20 +496,31 @@
  * fractional
  */
 
-uint32_t dal_fixed31_32_u2d19(
-	struct fixed31_32 arg);
+unsigned int dc_fixpt_u2d19(struct fixed31_32 arg);
 
-uint32_t dal_fixed31_32_u0d19(
-	struct fixed31_32 arg);
+unsigned int dc_fixpt_u0d19(struct fixed31_32 arg);
 
+unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg);
 
-uint32_t dal_fixed31_32_clamp_u0d14(
-	struct fixed31_32 arg);
+unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg);
 
-uint32_t dal_fixed31_32_clamp_u0d10(
-	struct fixed31_32 arg);
+int dc_fixpt_s4d19(struct fixed31_32 arg);
 
-int32_t dal_fixed31_32_s4d19(
-	struct fixed31_32 arg);
+static inline struct fixed31_32 dc_fixpt_truncate(struct fixed31_32 arg, unsigned int frac_bits)
+{
+	bool negative = arg.value < 0;
+
+	if (frac_bits >= FIXED31_32_BITS_PER_FRACTIONAL_PART) {
+		ASSERT(frac_bits == FIXED31_32_BITS_PER_FRACTIONAL_PART);
+		return arg;
+	}
+
+	if (negative)
+		arg.value = -arg.value;
+	arg.value &= (~0LL) << (FIXED31_32_BITS_PER_FRACTIONAL_PART - frac_bits);
+	if (negative)
+		arg.value = -arg.value;
+	return arg;
+}
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/include/fixed32_32.h b/drivers/gpu/drm/amd/display/include/fixed32_32.h
deleted file mode 100644
index 9c70341..0000000
--- a/drivers/gpu/drm/amd/display/include/fixed32_32.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright 2012-15 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-
-
-#ifndef __DAL_FIXED32_32_H__
-#define __DAL_FIXED32_32_H__
-
-#include "os_types.h"
-
-struct fixed32_32 {
-	uint64_t value;
-};
-
-static const struct fixed32_32 dal_fixed32_32_zero = { 0 };
-static const struct fixed32_32 dal_fixed32_32_one = { 0x100000000LL };
-static const struct fixed32_32 dal_fixed32_32_half = { 0x80000000LL };
-
-struct fixed32_32 dal_fixed32_32_from_fraction(uint32_t n, uint32_t d);
-static inline struct fixed32_32 dal_fixed32_32_from_int(uint32_t value)
-{
-	struct fixed32_32 fx;
-
-	fx.value = (uint64_t)value<<32;
-	return fx;
-}
-
-struct fixed32_32 dal_fixed32_32_add(
-	struct fixed32_32 lhs,
-	struct fixed32_32 rhs);
-struct fixed32_32 dal_fixed32_32_add_int(
-	struct fixed32_32 lhs,
-	uint32_t rhs);
-struct fixed32_32 dal_fixed32_32_sub(
-	struct fixed32_32 lhs,
-	struct fixed32_32 rhs);
-struct fixed32_32 dal_fixed32_32_sub_int(
-	struct fixed32_32 lhs,
-	uint32_t rhs);
-struct fixed32_32 dal_fixed32_32_mul(
-	struct fixed32_32 lhs,
-	struct fixed32_32 rhs);
-struct fixed32_32 dal_fixed32_32_mul_int(
-	struct fixed32_32 lhs,
-	uint32_t rhs);
-struct fixed32_32 dal_fixed32_32_div(
-	struct fixed32_32 lhs,
-	struct fixed32_32 rhs);
-struct fixed32_32 dal_fixed32_32_div_int(
-	struct fixed32_32 lhs,
-	uint32_t rhs);
-
-static inline struct fixed32_32 dal_fixed32_32_min(struct fixed32_32 lhs,
-						   struct fixed32_32 rhs)
-{
-	return (lhs.value < rhs.value) ? lhs : rhs;
-}
-
-static inline struct fixed32_32 dal_fixed32_32_max(struct fixed32_32 lhs,
-						   struct fixed32_32 rhs)
-{
-	return (lhs.value > rhs.value) ? lhs : rhs;
-}
-
-static inline bool dal_fixed32_32_gt(struct fixed32_32 lhs, struct fixed32_32 rhs)
-{
-	return lhs.value > rhs.value;
-}
-
-static inline bool dal_fixed32_32_gt_int(struct fixed32_32 lhs, uint32_t rhs)
-{
-	return lhs.value > ((uint64_t)rhs<<32);
-}
-
-static inline bool dal_fixed32_32_lt(struct fixed32_32 lhs, struct fixed32_32 rhs)
-{
-	return lhs.value < rhs.value;
-}
-
-static inline bool dal_fixed32_32_lt_int(struct fixed32_32 lhs, uint32_t rhs)
-{
-	return lhs.value < ((uint64_t)rhs<<32);
-}
-
-static inline bool dal_fixed32_32_le(struct fixed32_32 lhs, struct fixed32_32 rhs)
-{
-	return lhs.value <= rhs.value;
-}
-
-static inline bool dal_fixed32_32_le_int(struct fixed32_32 lhs, uint32_t rhs)
-{
-	return lhs.value <= ((uint64_t)rhs<<32);
-}
-
-static inline bool dal_fixed32_32_eq(struct fixed32_32 lhs, struct fixed32_32 rhs)
-{
-	return lhs.value == rhs.value;
-}
-
-uint32_t dal_fixed32_32_ceil(struct fixed32_32 value);
-static inline uint32_t dal_fixed32_32_floor(struct fixed32_32 value)
-{
-	return value.value>>32;
-}
-
-uint32_t dal_fixed32_32_round(struct fixed32_32 value);
-
-#endif
diff --git a/drivers/gpu/drm/amd/display/include/logger_interface.h b/drivers/gpu/drm/amd/display/include/logger_interface.h
index 28dee96..dc98d6d 100644
--- a/drivers/gpu/drm/amd/display/include/logger_interface.h
+++ b/drivers/gpu/drm/amd/display/include/logger_interface.h
@@ -190,4 +190,13 @@
 	} \
 } while (0)
 
+#define DISPLAY_STATS_BEGIN(entry) \
+	dm_logger_open(dc->ctx->logger, &entry, LOG_DISPLAYSTATS)
+
+#define DISPLAY_STATS(msg, ...) \
+	dm_logger_append(&log_entry, msg, ##__VA_ARGS__)
+
+#define DISPLAY_STATS_END(entry) \
+	dm_logger_close(&entry)
+
 #endif /* __DAL_LOGGER_INTERFACE_H__ */
diff --git a/drivers/gpu/drm/amd/display/include/logger_types.h b/drivers/gpu/drm/amd/display/include/logger_types.h
index 427796b..0a540b9 100644
--- a/drivers/gpu/drm/amd/display/include/logger_types.h
+++ b/drivers/gpu/drm/amd/display/include/logger_types.h
@@ -29,39 +29,39 @@
 #include "os_types.h"
 
 #define MAX_NAME_LEN 32
-#define DC_LOG_ERROR(a, ...) dm_logger_write(DC_LOGGER, LOG_ERROR, a, ## __VA_ARGS__)
-#define DC_LOG_WARNING(a, ...) dm_logger_write(DC_LOGGER, LOG_WARNING, a, ## __VA_ARGS__)
-#define DC_LOG_DEBUG(a, ...) dm_logger_write(DC_LOGGER, LOG_DEBUG, a, ## __VA_ARGS__)
-#define DC_LOG_DC(a, ...) dm_logger_write(DC_LOGGER, LOG_DC, a, ## __VA_ARGS__)
-#define DC_LOG_DTN(a, ...) dm_logger_write(DC_LOGGER, LOG_DTN, a, ## __VA_ARGS__)
-#define DC_LOG_SURFACE(a, ...) dm_logger_write(DC_LOGGER, LOG_SURFACE, a, ## __VA_ARGS__)
-#define DC_LOG_HW_HOTPLUG(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_HOTPLUG, a, ## __VA_ARGS__)
-#define DC_LOG_HW_LINK_TRAINING(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_LINK_TRAINING, a, ## __VA_ARGS__)
-#define DC_LOG_HW_SET_MODE(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_SET_MODE, a, ## __VA_ARGS__)
-#define DC_LOG_HW_RESUME_S3(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_RESUME_S3, a, ## __VA_ARGS__)
-#define DC_LOG_HW_AUDIO(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_AUDIO, a, ## __VA_ARGS__)
-#define DC_LOG_HW_HPD_IRQ(a, ...) dm_logger_write(DC_LOGGER, LOG_HW_HPD_IRQ, a, ## __VA_ARGS__)
-#define DC_LOG_MST(a, ...) dm_logger_write(DC_LOGGER, LOG_MST, a, ## __VA_ARGS__)
-#define DC_LOG_SCALER(a, ...) dm_logger_write(DC_LOGGER, LOG_SCALER, a, ## __VA_ARGS__)
-#define DC_LOG_BIOS(a, ...) dm_logger_write(DC_LOGGER, LOG_BIOS, a, ## __VA_ARGS__)
-#define DC_LOG_BANDWIDTH_CALCS(a, ...) dm_logger_write(DC_LOGGER, LOG_BANDWIDTH_CALCS, a, ## __VA_ARGS__)
-#define DC_LOG_BANDWIDTH_VALIDATION(a, ...) dm_logger_write(DC_LOGGER, LOG_BANDWIDTH_VALIDATION, a, ## __VA_ARGS__)
-#define DC_LOG_I2C_AUX(a, ...) dm_logger_write(DC_LOGGER, LOG_I2C_AUX, a, ## __VA_ARGS__)
-#define DC_LOG_SYNC(a, ...) dm_logger_write(DC_LOGGER, LOG_SYNC, a, ## __VA_ARGS__)
-#define DC_LOG_BACKLIGHT(a, ...) dm_logger_write(DC_LOGGER, LOG_BACKLIGHT, a, ## __VA_ARGS__)
-#define DC_LOG_FEATURE_OVERRIDE(a, ...) dm_logger_write(DC_LOGGER, LOG_FEATURE_OVERRIDE, a, ## __VA_ARGS__)
-#define DC_LOG_DETECTION_EDID_PARSER(a, ...) dm_logger_write(DC_LOGGER, LOG_DETECTION_EDID_PARSER, a, ## __VA_ARGS__)
-#define DC_LOG_DETECTION_DP_CAPS(a, ...) dm_logger_write(DC_LOGGER, LOG_DETECTION_DP_CAPS, a, ## __VA_ARGS__)
-#define DC_LOG_RESOURCE(a, ...) dm_logger_write(DC_LOGGER, LOG_RESOURCE, a, ## __VA_ARGS__)
-#define DC_LOG_DML(a, ...) dm_logger_write(DC_LOGGER, LOG_DML, a, ## __VA_ARGS__)
-#define DC_LOG_EVENT_MODE_SET(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_MODE_SET, a, ## __VA_ARGS__)
-#define DC_LOG_EVENT_DETECTION(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_DETECTION, a, ## __VA_ARGS__)
-#define DC_LOG_EVENT_LINK_TRAINING(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_LINK_TRAINING, a, ## __VA_ARGS__)
-#define DC_LOG_EVENT_LINK_LOSS(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_LINK_LOSS, a, ## __VA_ARGS__)
-#define DC_LOG_EVENT_UNDERFLOW(a, ...) dm_logger_write(DC_LOGGER, LOG_EVENT_UNDERFLOW, a, ## __VA_ARGS__)
-#define DC_LOG_IF_TRACE(a, ...) dm_logger_write(DC_LOGGER, LOG_IF_TRACE, a, ## __VA_ARGS__)
-#define DC_LOG_PERF_TRACE(a, ...) dm_logger_write(DC_LOGGER, LOG_PERF_TRACE, a, ## __VA_ARGS__)
 
+#define DC_LOG_ERROR(...) DRM_ERROR(__VA_ARGS__)
+#define DC_LOG_WARNING(...) DRM_WARN(__VA_ARGS__)
+#define DC_LOG_DEBUG(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_DC(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_DTN(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_SURFACE(...) pr_debug("[SURFACE]:"__VA_ARGS__)
+#define DC_LOG_HW_HOTPLUG(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_HW_LINK_TRAINING(...) pr_debug("[HW_LINK_TRAINING]:"__VA_ARGS__)
+#define DC_LOG_HW_SET_MODE(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_HW_RESUME_S3(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_HW_AUDIO(...) pr_debug("[HW_AUDIO]:"__VA_ARGS__)
+#define DC_LOG_HW_HPD_IRQ(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_MST(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_SCALER(...) pr_debug("[SCALER]:"__VA_ARGS__)
+#define DC_LOG_BIOS(...) pr_debug("[BIOS]:"__VA_ARGS__)
+#define DC_LOG_BANDWIDTH_CALCS(...) pr_debug("[BANDWIDTH_CALCS]:"__VA_ARGS__)
+#define DC_LOG_BANDWIDTH_VALIDATION(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_I2C_AUX(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_SYNC(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_BACKLIGHT(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_FEATURE_OVERRIDE(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_DETECTION_EDID_PARSER(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_DETECTION_DP_CAPS(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_RESOURCE(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_DML(...) pr_debug("[DML]:"__VA_ARGS__)
+#define DC_LOG_EVENT_MODE_SET(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_EVENT_DETECTION(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_EVENT_LINK_TRAINING(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_EVENT_LINK_LOSS(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_EVENT_UNDERFLOW(...) DRM_DEBUG_KMS(__VA_ARGS__)
+#define DC_LOG_IF_TRACE(...) pr_debug("[IF_TRACE]:"__VA_ARGS__)
+#define DC_LOG_PERF_TRACE(...) DRM_DEBUG_KMS(__VA_ARGS__)
 
 struct dal_logger;
 
@@ -98,7 +98,7 @@
 	LOG_EVENT_UNDERFLOW,
 	LOG_IF_TRACE,
 	LOG_PERF_TRACE,
-	LOG_PROFILING,
+	LOG_DISPLAYSTATS,
 
 	LOG_SECTION_TOTAL_COUNT
 };
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index b3747a0..0cd111d 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -43,7 +43,7 @@
 /* one-time setup of X points */
 void setup_x_points_distribution(void)
 {
-	struct fixed31_32 region_size = dal_fixed31_32_from_int(128);
+	struct fixed31_32 region_size = dc_fixpt_from_int(128);
 	int32_t segment;
 	uint32_t seg_offset;
 	uint32_t index;
@@ -53,8 +53,8 @@
 	coordinates_x[MAX_HW_POINTS + 1].x = region_size;
 
 	for (segment = 6; segment > (6 - NUM_REGIONS); segment--) {
-		region_size = dal_fixed31_32_div_int(region_size, 2);
-		increment = dal_fixed31_32_div_int(region_size,
+		region_size = dc_fixpt_div_int(region_size, 2);
+		increment = dc_fixpt_div_int(region_size,
 						NUM_PTS_IN_REGION);
 		seg_offset = (segment + (NUM_REGIONS - 7)) * NUM_PTS_IN_REGION;
 		coordinates_x[seg_offset].x = region_size;
@@ -62,7 +62,7 @@
 		for (index = seg_offset + 1;
 				index < seg_offset + NUM_PTS_IN_REGION;
 				index++) {
-			coordinates_x[index].x = dal_fixed31_32_add
+			coordinates_x[index].x = dc_fixpt_add
 					(coordinates_x[index-1].x, increment);
 		}
 	}
@@ -72,63 +72,63 @@
 {
 	/* consts for PQ gamma formula. */
 	const struct fixed31_32 m1 =
-		dal_fixed31_32_from_fraction(159301758, 1000000000);
+		dc_fixpt_from_fraction(159301758, 1000000000);
 	const struct fixed31_32 m2 =
-		dal_fixed31_32_from_fraction(7884375, 100000);
+		dc_fixpt_from_fraction(7884375, 100000);
 	const struct fixed31_32 c1 =
-		dal_fixed31_32_from_fraction(8359375, 10000000);
+		dc_fixpt_from_fraction(8359375, 10000000);
 	const struct fixed31_32 c2 =
-		dal_fixed31_32_from_fraction(188515625, 10000000);
+		dc_fixpt_from_fraction(188515625, 10000000);
 	const struct fixed31_32 c3 =
-		dal_fixed31_32_from_fraction(186875, 10000);
+		dc_fixpt_from_fraction(186875, 10000);
 
 	struct fixed31_32 l_pow_m1;
 	struct fixed31_32 base;
 
-	if (dal_fixed31_32_lt(in_x, dal_fixed31_32_zero))
-		in_x = dal_fixed31_32_zero;
+	if (dc_fixpt_lt(in_x, dc_fixpt_zero))
+		in_x = dc_fixpt_zero;
 
-	l_pow_m1 = dal_fixed31_32_pow(in_x, m1);
-	base = dal_fixed31_32_div(
-			dal_fixed31_32_add(c1,
-					(dal_fixed31_32_mul(c2, l_pow_m1))),
-			dal_fixed31_32_add(dal_fixed31_32_one,
-					(dal_fixed31_32_mul(c3, l_pow_m1))));
-	*out_y = dal_fixed31_32_pow(base, m2);
+	l_pow_m1 = dc_fixpt_pow(in_x, m1);
+	base = dc_fixpt_div(
+			dc_fixpt_add(c1,
+					(dc_fixpt_mul(c2, l_pow_m1))),
+			dc_fixpt_add(dc_fixpt_one,
+					(dc_fixpt_mul(c3, l_pow_m1))));
+	*out_y = dc_fixpt_pow(base, m2);
 }
 
 static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y)
 {
 	/* consts for dePQ gamma formula. */
 	const struct fixed31_32 m1 =
-		dal_fixed31_32_from_fraction(159301758, 1000000000);
+		dc_fixpt_from_fraction(159301758, 1000000000);
 	const struct fixed31_32 m2 =
-		dal_fixed31_32_from_fraction(7884375, 100000);
+		dc_fixpt_from_fraction(7884375, 100000);
 	const struct fixed31_32 c1 =
-		dal_fixed31_32_from_fraction(8359375, 10000000);
+		dc_fixpt_from_fraction(8359375, 10000000);
 	const struct fixed31_32 c2 =
-		dal_fixed31_32_from_fraction(188515625, 10000000);
+		dc_fixpt_from_fraction(188515625, 10000000);
 	const struct fixed31_32 c3 =
-		dal_fixed31_32_from_fraction(186875, 10000);
+		dc_fixpt_from_fraction(186875, 10000);
 
 	struct fixed31_32 l_pow_m1;
 	struct fixed31_32 base, div;
 
 
-	if (dal_fixed31_32_lt(in_x, dal_fixed31_32_zero))
-		in_x = dal_fixed31_32_zero;
+	if (dc_fixpt_lt(in_x, dc_fixpt_zero))
+		in_x = dc_fixpt_zero;
 
-	l_pow_m1 = dal_fixed31_32_pow(in_x,
-			dal_fixed31_32_div(dal_fixed31_32_one, m2));
-	base = dal_fixed31_32_sub(l_pow_m1, c1);
+	l_pow_m1 = dc_fixpt_pow(in_x,
+			dc_fixpt_div(dc_fixpt_one, m2));
+	base = dc_fixpt_sub(l_pow_m1, c1);
 
-	if (dal_fixed31_32_lt(base, dal_fixed31_32_zero))
-		base = dal_fixed31_32_zero;
+	if (dc_fixpt_lt(base, dc_fixpt_zero))
+		base = dc_fixpt_zero;
 
-	div = dal_fixed31_32_sub(c2, dal_fixed31_32_mul(c3, l_pow_m1));
+	div = dc_fixpt_sub(c2, dc_fixpt_mul(c3, l_pow_m1));
 
-	*out_y = dal_fixed31_32_pow(dal_fixed31_32_div(base, div),
-			dal_fixed31_32_div(dal_fixed31_32_one, m1));
+	*out_y = dc_fixpt_pow(dc_fixpt_div(base, div),
+			dc_fixpt_div(dc_fixpt_one, m1));
 
 }
 /* one-time pre-compute PQ values - only for sdr_white_level 80 */
@@ -138,14 +138,14 @@
 	struct fixed31_32 x;
 	const struct hw_x_point *coord_x = coordinates_x + 32;
 	struct fixed31_32 scaling_factor =
-			dal_fixed31_32_from_fraction(80, 10000);
+			dc_fixpt_from_fraction(80, 10000);
 
 	/* pow function has problems with arguments too small */
 	for (i = 0; i < 32; i++)
-		pq_table[i] = dal_fixed31_32_zero;
+		pq_table[i] = dc_fixpt_zero;
 
 	for (i = 32; i <= MAX_HW_POINTS; i++) {
-		x = dal_fixed31_32_mul(coord_x->x, scaling_factor);
+		x = dc_fixpt_mul(coord_x->x, scaling_factor);
 		compute_pq(x, &pq_table[i]);
 		++coord_x;
 	}
@@ -158,7 +158,7 @@
 	struct fixed31_32  y;
 	uint32_t begin_index, end_index;
 
-	struct fixed31_32 scaling_factor = dal_fixed31_32_from_int(125);
+	struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
 
 	/* X points is 2^-25 to 2^7
 	 * De-gamma X is 2^-12 to 2^0 – we are skipping first -12-(-25) = 13 regions
@@ -167,11 +167,11 @@
 	end_index = begin_index + 12 * NUM_PTS_IN_REGION;
 
 	for (i = 0; i <= begin_index; i++)
-		de_pq_table[i] = dal_fixed31_32_zero;
+		de_pq_table[i] = dc_fixpt_zero;
 
 	for (; i <= end_index; i++) {
 		compute_de_pq(coordinates_x[i].x, &y);
-		de_pq_table[i] = dal_fixed31_32_mul(y, scaling_factor);
+		de_pq_table[i] = dc_fixpt_mul(y, scaling_factor);
 	}
 
 	for (; i <= MAX_HW_POINTS; i++)
@@ -185,25 +185,25 @@
 
 static void build_coefficients(struct gamma_coefficients *coefficients, bool is_2_4)
 {
-		static const int32_t numerator01[] = { 31308, 180000};
-		static const int32_t numerator02[] = { 12920, 4500};
-		static const int32_t numerator03[] = { 55, 99};
-		static const int32_t numerator04[] = { 55, 99};
-		static const int32_t numerator05[] = { 2400, 2200};
+	static const int32_t numerator01[] = { 31308, 180000};
+	static const int32_t numerator02[] = { 12920, 4500};
+	static const int32_t numerator03[] = { 55, 99};
+	static const int32_t numerator04[] = { 55, 99};
+	static const int32_t numerator05[] = { 2400, 2200};
 
-		uint32_t i = 0;
-		uint32_t index = is_2_4 == true ? 0:1;
+	uint32_t i = 0;
+	uint32_t index = is_2_4 == true ? 0:1;
 
 	do {
-		coefficients->a0[i] = dal_fixed31_32_from_fraction(
+		coefficients->a0[i] = dc_fixpt_from_fraction(
 			numerator01[index], 10000000);
-		coefficients->a1[i] = dal_fixed31_32_from_fraction(
+		coefficients->a1[i] = dc_fixpt_from_fraction(
 			numerator02[index], 1000);
-		coefficients->a2[i] = dal_fixed31_32_from_fraction(
+		coefficients->a2[i] = dc_fixpt_from_fraction(
 			numerator03[index], 1000);
-		coefficients->a3[i] = dal_fixed31_32_from_fraction(
+		coefficients->a3[i] = dc_fixpt_from_fraction(
 			numerator04[index], 1000);
-		coefficients->user_gamma[i] = dal_fixed31_32_from_fraction(
+		coefficients->user_gamma[i] = dc_fixpt_from_fraction(
 			numerator05[index], 1000);
 
 		++i;
@@ -218,33 +218,33 @@
 	struct fixed31_32 a3,
 	struct fixed31_32 gamma)
 {
-	const struct fixed31_32 one = dal_fixed31_32_from_int(1);
+	const struct fixed31_32 one = dc_fixpt_from_int(1);
 
-	if (dal_fixed31_32_lt(one, arg))
+	if (dc_fixpt_lt(one, arg))
 		return one;
 
-	if (dal_fixed31_32_le(arg, dal_fixed31_32_neg(a0)))
-		return dal_fixed31_32_sub(
+	if (dc_fixpt_le(arg, dc_fixpt_neg(a0)))
+		return dc_fixpt_sub(
 			a2,
-			dal_fixed31_32_mul(
-				dal_fixed31_32_add(
+			dc_fixpt_mul(
+				dc_fixpt_add(
 					one,
 					a3),
-				dal_fixed31_32_pow(
-					dal_fixed31_32_neg(arg),
-					dal_fixed31_32_recip(gamma))));
-	else if (dal_fixed31_32_le(a0, arg))
-		return dal_fixed31_32_sub(
-			dal_fixed31_32_mul(
-				dal_fixed31_32_add(
+				dc_fixpt_pow(
+					dc_fixpt_neg(arg),
+					dc_fixpt_recip(gamma))));
+	else if (dc_fixpt_le(a0, arg))
+		return dc_fixpt_sub(
+			dc_fixpt_mul(
+				dc_fixpt_add(
 					one,
 					a3),
-				dal_fixed31_32_pow(
+				dc_fixpt_pow(
 					arg,
-					dal_fixed31_32_recip(gamma))),
+					dc_fixpt_recip(gamma))),
 			a2);
 	else
-		return dal_fixed31_32_mul(
+		return dc_fixpt_mul(
 			arg,
 			a1);
 }
@@ -259,25 +259,25 @@
 {
 	struct fixed31_32 linear;
 
-	a0 = dal_fixed31_32_mul(a0, a1);
-	if (dal_fixed31_32_le(arg, dal_fixed31_32_neg(a0)))
+	a0 = dc_fixpt_mul(a0, a1);
+	if (dc_fixpt_le(arg, dc_fixpt_neg(a0)))
 
-		linear = dal_fixed31_32_neg(
-				 dal_fixed31_32_pow(
-				 dal_fixed31_32_div(
-				 dal_fixed31_32_sub(a2, arg),
-				 dal_fixed31_32_add(
-				 dal_fixed31_32_one, a3)), gamma));
+		linear = dc_fixpt_neg(
+				 dc_fixpt_pow(
+				 dc_fixpt_div(
+				 dc_fixpt_sub(a2, arg),
+				 dc_fixpt_add(
+				 dc_fixpt_one, a3)), gamma));
 
-	else if (dal_fixed31_32_le(dal_fixed31_32_neg(a0), arg) &&
-			 dal_fixed31_32_le(arg, a0))
-		linear = dal_fixed31_32_div(arg, a1);
+	else if (dc_fixpt_le(dc_fixpt_neg(a0), arg) &&
+			 dc_fixpt_le(arg, a0))
+		linear = dc_fixpt_div(arg, a1);
 	else
-		linear =  dal_fixed31_32_pow(
-					dal_fixed31_32_div(
-					dal_fixed31_32_add(a2, arg),
-					dal_fixed31_32_add(
-					dal_fixed31_32_one, a3)), gamma);
+		linear =  dc_fixpt_pow(
+					dc_fixpt_div(
+					dc_fixpt_add(a2, arg),
+					dc_fixpt_add(
+					dc_fixpt_one, a3)), gamma);
 
 	return linear;
 }
@@ -352,8 +352,8 @@
 				right = axis_x[max_number - 1].b;
 		}
 
-		if (dal_fixed31_32_le(left, hw_point) &&
-			dal_fixed31_32_le(hw_point, right)) {
+		if (dc_fixpt_le(left, hw_point) &&
+			dc_fixpt_le(hw_point, right)) {
 			*index_to_start = i;
 			*index_left = i;
 
@@ -366,7 +366,7 @@
 
 			return true;
 		} else if ((i == *index_to_start) &&
-			dal_fixed31_32_le(hw_point, left)) {
+			dc_fixpt_le(hw_point, left)) {
 			*index_to_start = i;
 			*index_left = i;
 			*index_right = i;
@@ -375,7 +375,7 @@
 
 			return true;
 		} else if ((i == max_number - 1) &&
-			dal_fixed31_32_le(right, hw_point)) {
+			dc_fixpt_le(right, hw_point)) {
 			*index_to_start = i;
 			*index_left = i;
 			*index_right = i;
@@ -457,17 +457,17 @@
 		}
 
 		if (hw_pos == HW_POINT_POSITION_MIDDLE)
-			point->coeff = dal_fixed31_32_div(
-				dal_fixed31_32_sub(
+			point->coeff = dc_fixpt_div(
+				dc_fixpt_sub(
 					coord_x,
 					left_pos),
-				dal_fixed31_32_sub(
+				dc_fixpt_sub(
 					right_pos,
 					left_pos));
 		else if (hw_pos == HW_POINT_POSITION_LEFT)
-			point->coeff = dal_fixed31_32_zero;
+			point->coeff = dc_fixpt_zero;
 		else if (hw_pos == HW_POINT_POSITION_RIGHT)
-			point->coeff = dal_fixed31_32_from_int(2);
+			point->coeff = dc_fixpt_from_int(2);
 		else {
 			BREAK_TO_DEBUGGER();
 			return false;
@@ -502,45 +502,45 @@
 
 	if ((point->left_index < 0) || (point->left_index > max_index)) {
 		BREAK_TO_DEBUGGER();
-		return dal_fixed31_32_zero;
+		return dc_fixpt_zero;
 	}
 
 	if ((point->right_index < 0) || (point->right_index > max_index)) {
 		BREAK_TO_DEBUGGER();
-		return dal_fixed31_32_zero;
+		return dc_fixpt_zero;
 	}
 
 	if (point->pos == HW_POINT_POSITION_MIDDLE)
 		if (channel == CHANNEL_NAME_RED)
-			result = dal_fixed31_32_add(
-				dal_fixed31_32_mul(
+			result = dc_fixpt_add(
+				dc_fixpt_mul(
 					point->coeff,
-					dal_fixed31_32_sub(
+					dc_fixpt_sub(
 						rgb[point->right_index].r,
 						rgb[point->left_index].r)),
 				rgb[point->left_index].r);
 		else if (channel == CHANNEL_NAME_GREEN)
-			result = dal_fixed31_32_add(
-				dal_fixed31_32_mul(
+			result = dc_fixpt_add(
+				dc_fixpt_mul(
 					point->coeff,
-					dal_fixed31_32_sub(
+					dc_fixpt_sub(
 						rgb[point->right_index].g,
 						rgb[point->left_index].g)),
 				rgb[point->left_index].g);
 		else
-			result = dal_fixed31_32_add(
-				dal_fixed31_32_mul(
+			result = dc_fixpt_add(
+				dc_fixpt_mul(
 					point->coeff,
-					dal_fixed31_32_sub(
+					dc_fixpt_sub(
 						rgb[point->right_index].b,
 						rgb[point->left_index].b)),
 				rgb[point->left_index].b);
 	else if (point->pos == HW_POINT_POSITION_LEFT) {
 		BREAK_TO_DEBUGGER();
-		result = dal_fixed31_32_zero;
+		result = dc_fixpt_zero;
 	} else {
 		BREAK_TO_DEBUGGER();
-		result = dal_fixed31_32_one;
+		result = dc_fixpt_one;
 	}
 
 	return result;
@@ -558,7 +558,7 @@
 	struct fixed31_32 x;
 	struct fixed31_32 output;
 	struct fixed31_32 scaling_factor =
-			dal_fixed31_32_from_fraction(sdr_white_level, 10000);
+			dc_fixpt_from_fraction(sdr_white_level, 10000);
 
 	if (!pq_initialized && sdr_white_level == 80) {
 		precompute_pq();
@@ -579,15 +579,15 @@
 		if (sdr_white_level == 80) {
 			output = pq_table[i];
 		} else {
-			x = dal_fixed31_32_mul(coord_x->x, scaling_factor);
+			x = dc_fixpt_mul(coord_x->x, scaling_factor);
 			compute_pq(x, &output);
 		}
 
 		/* should really not happen? */
-		if (dal_fixed31_32_lt(output, dal_fixed31_32_zero))
-			output = dal_fixed31_32_zero;
-		else if (dal_fixed31_32_lt(dal_fixed31_32_one, output))
-			output = dal_fixed31_32_one;
+		if (dc_fixpt_lt(output, dc_fixpt_zero))
+			output = dc_fixpt_zero;
+		else if (dc_fixpt_lt(dc_fixpt_one, output))
+			output = dc_fixpt_one;
 
 		rgb->r = output;
 		rgb->g = output;
@@ -605,7 +605,7 @@
 	uint32_t i;
 	struct fixed31_32 output;
 
-	struct fixed31_32 scaling_factor = dal_fixed31_32_from_int(125);
+	struct fixed31_32 scaling_factor = dc_fixpt_from_int(125);
 
 	if (!de_pq_initialized) {
 		precompute_de_pq();
@@ -616,9 +616,9 @@
 	for (i = 0; i <= hw_points_num; i++) {
 		output = de_pq_table[i];
 		/* should really not happen? */
-		if (dal_fixed31_32_lt(output, dal_fixed31_32_zero))
-			output = dal_fixed31_32_zero;
-		else if (dal_fixed31_32_lt(scaling_factor, output))
+		if (dc_fixpt_lt(output, dc_fixpt_zero))
+			output = dc_fixpt_zero;
+		else if (dc_fixpt_lt(scaling_factor, output))
 			output = scaling_factor;
 		de_pq[i].r = output;
 		de_pq[i].g = output;
@@ -670,9 +670,9 @@
 	end_index = begin_index + 12 * NUM_PTS_IN_REGION;
 
 	while (i != begin_index) {
-		curve[i].r = dal_fixed31_32_zero;
-		curve[i].g = dal_fixed31_32_zero;
-		curve[i].b = dal_fixed31_32_zero;
+		curve[i].r = dc_fixpt_zero;
+		curve[i].g = dc_fixpt_zero;
+		curve[i].b = dc_fixpt_zero;
 		i++;
 	}
 
@@ -684,19 +684,19 @@
 		i++;
 	}
 	while (i != hw_points_num + 1) {
-		curve[i].r = dal_fixed31_32_one;
-		curve[i].g = dal_fixed31_32_one;
-		curve[i].b = dal_fixed31_32_one;
+		curve[i].r = dc_fixpt_one;
+		curve[i].g = dc_fixpt_one;
+		curve[i].b = dc_fixpt_one;
 		i++;
 	}
 }
 
-static bool scale_gamma(struct pwl_float_data *pwl_rgb,
+static void scale_gamma(struct pwl_float_data *pwl_rgb,
 		const struct dc_gamma *ramp,
 		struct dividers dividers)
 {
-	const struct fixed31_32 max_driver = dal_fixed31_32_from_int(0xFFFF);
-	const struct fixed31_32 max_os = dal_fixed31_32_from_int(0xFF00);
+	const struct fixed31_32 max_driver = dc_fixpt_from_int(0xFFFF);
+	const struct fixed31_32 max_os = dc_fixpt_from_int(0xFF00);
 	struct fixed31_32 scaler = max_os;
 	uint32_t i;
 	struct pwl_float_data *rgb = pwl_rgb;
@@ -705,9 +705,9 @@
 	i = 0;
 
 	do {
-		if (dal_fixed31_32_lt(max_os, ramp->entries.red[i]) ||
-			dal_fixed31_32_lt(max_os, ramp->entries.green[i]) ||
-			dal_fixed31_32_lt(max_os, ramp->entries.blue[i])) {
+		if (dc_fixpt_lt(max_os, ramp->entries.red[i]) ||
+			dc_fixpt_lt(max_os, ramp->entries.green[i]) ||
+			dc_fixpt_lt(max_os, ramp->entries.blue[i])) {
 			scaler = max_driver;
 			break;
 		}
@@ -717,109 +717,170 @@
 	i = 0;
 
 	do {
-		rgb->r = dal_fixed31_32_div(
+		rgb->r = dc_fixpt_div(
 			ramp->entries.red[i], scaler);
-		rgb->g = dal_fixed31_32_div(
+		rgb->g = dc_fixpt_div(
 			ramp->entries.green[i], scaler);
-		rgb->b = dal_fixed31_32_div(
+		rgb->b = dc_fixpt_div(
 			ramp->entries.blue[i], scaler);
 
 		++rgb;
 		++i;
 	} while (i != ramp->num_entries);
 
-	rgb->r = dal_fixed31_32_mul(rgb_last->r,
+	rgb->r = dc_fixpt_mul(rgb_last->r,
 			dividers.divider1);
-	rgb->g = dal_fixed31_32_mul(rgb_last->g,
+	rgb->g = dc_fixpt_mul(rgb_last->g,
 			dividers.divider1);
-	rgb->b = dal_fixed31_32_mul(rgb_last->b,
+	rgb->b = dc_fixpt_mul(rgb_last->b,
 			dividers.divider1);
 
 	++rgb;
 
-	rgb->r = dal_fixed31_32_mul(rgb_last->r,
+	rgb->r = dc_fixpt_mul(rgb_last->r,
 			dividers.divider2);
-	rgb->g = dal_fixed31_32_mul(rgb_last->g,
+	rgb->g = dc_fixpt_mul(rgb_last->g,
 			dividers.divider2);
-	rgb->b = dal_fixed31_32_mul(rgb_last->b,
+	rgb->b = dc_fixpt_mul(rgb_last->b,
 			dividers.divider2);
 
 	++rgb;
 
-	rgb->r = dal_fixed31_32_mul(rgb_last->r,
+	rgb->r = dc_fixpt_mul(rgb_last->r,
 			dividers.divider3);
-	rgb->g = dal_fixed31_32_mul(rgb_last->g,
+	rgb->g = dc_fixpt_mul(rgb_last->g,
 			dividers.divider3);
-	rgb->b = dal_fixed31_32_mul(rgb_last->b,
+	rgb->b = dc_fixpt_mul(rgb_last->b,
 			dividers.divider3);
-
-	return true;
 }
 
-static bool scale_gamma_dx(struct pwl_float_data *pwl_rgb,
+static void scale_gamma_dx(struct pwl_float_data *pwl_rgb,
 		const struct dc_gamma *ramp,
 		struct dividers dividers)
 {
 	uint32_t i;
-	struct fixed31_32 min = dal_fixed31_32_zero;
-	struct fixed31_32 max = dal_fixed31_32_one;
+	struct fixed31_32 min = dc_fixpt_zero;
+	struct fixed31_32 max = dc_fixpt_one;
 
-	struct fixed31_32 delta = dal_fixed31_32_zero;
-	struct fixed31_32 offset = dal_fixed31_32_zero;
+	struct fixed31_32 delta = dc_fixpt_zero;
+	struct fixed31_32 offset = dc_fixpt_zero;
 
 	for (i = 0 ; i < ramp->num_entries; i++) {
-		if (dal_fixed31_32_lt(ramp->entries.red[i], min))
+		if (dc_fixpt_lt(ramp->entries.red[i], min))
 			min = ramp->entries.red[i];
 
-		if (dal_fixed31_32_lt(ramp->entries.green[i], min))
+		if (dc_fixpt_lt(ramp->entries.green[i], min))
 			min = ramp->entries.green[i];
 
-		if (dal_fixed31_32_lt(ramp->entries.blue[i], min))
+		if (dc_fixpt_lt(ramp->entries.blue[i], min))
 			min = ramp->entries.blue[i];
 
-		if (dal_fixed31_32_lt(max, ramp->entries.red[i]))
+		if (dc_fixpt_lt(max, ramp->entries.red[i]))
 			max = ramp->entries.red[i];
 
-		if (dal_fixed31_32_lt(max, ramp->entries.green[i]))
+		if (dc_fixpt_lt(max, ramp->entries.green[i]))
 			max = ramp->entries.green[i];
 
-		if (dal_fixed31_32_lt(max, ramp->entries.blue[i]))
+		if (dc_fixpt_lt(max, ramp->entries.blue[i]))
 			max = ramp->entries.blue[i];
 	}
 
-	if (dal_fixed31_32_lt(min, dal_fixed31_32_zero))
-		delta = dal_fixed31_32_neg(min);
+	if (dc_fixpt_lt(min, dc_fixpt_zero))
+		delta = dc_fixpt_neg(min);
 
-	offset = dal_fixed31_32_add(min, max);
+	offset = dc_fixpt_add(min, max);
 
 	for (i = 0 ; i < ramp->num_entries; i++) {
-		pwl_rgb[i].r = dal_fixed31_32_div(
-			dal_fixed31_32_add(
+		pwl_rgb[i].r = dc_fixpt_div(
+			dc_fixpt_add(
 				ramp->entries.red[i], delta), offset);
-		pwl_rgb[i].g = dal_fixed31_32_div(
-			dal_fixed31_32_add(
+		pwl_rgb[i].g = dc_fixpt_div(
+			dc_fixpt_add(
 				ramp->entries.green[i], delta), offset);
-		pwl_rgb[i].b = dal_fixed31_32_div(
-			dal_fixed31_32_add(
+		pwl_rgb[i].b = dc_fixpt_div(
+			dc_fixpt_add(
 				ramp->entries.blue[i], delta), offset);
 
 	}
 
-	pwl_rgb[i].r =  dal_fixed31_32_sub(dal_fixed31_32_mul_int(
+	pwl_rgb[i].r =  dc_fixpt_sub(dc_fixpt_mul_int(
 				pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
-	pwl_rgb[i].g =  dal_fixed31_32_sub(dal_fixed31_32_mul_int(
+	pwl_rgb[i].g =  dc_fixpt_sub(dc_fixpt_mul_int(
 				pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
-	pwl_rgb[i].b =  dal_fixed31_32_sub(dal_fixed31_32_mul_int(
+	pwl_rgb[i].b =  dc_fixpt_sub(dc_fixpt_mul_int(
 				pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
 	++i;
-	pwl_rgb[i].r =  dal_fixed31_32_sub(dal_fixed31_32_mul_int(
+	pwl_rgb[i].r =  dc_fixpt_sub(dc_fixpt_mul_int(
 				pwl_rgb[i-1].r, 2), pwl_rgb[i-2].r);
-	pwl_rgb[i].g =  dal_fixed31_32_sub(dal_fixed31_32_mul_int(
+	pwl_rgb[i].g =  dc_fixpt_sub(dc_fixpt_mul_int(
 				pwl_rgb[i-1].g, 2), pwl_rgb[i-2].g);
-	pwl_rgb[i].b =  dal_fixed31_32_sub(dal_fixed31_32_mul_int(
+	pwl_rgb[i].b =  dc_fixpt_sub(dc_fixpt_mul_int(
 				pwl_rgb[i-1].b, 2), pwl_rgb[i-2].b);
+}
 
-	return true;
+/* todo: all these scale_gamma functions are inherently the same but
+ *  take different structures as params or different format for ramp
+ *  values. We could probably implement it in a more generic fashion
+ */
+static void scale_user_regamma_ramp(struct pwl_float_data *pwl_rgb,
+		const struct regamma_ramp *ramp,
+		struct dividers dividers)
+{
+	unsigned short max_driver = 0xFFFF;
+	unsigned short max_os = 0xFF00;
+	unsigned short scaler = max_os;
+	uint32_t i;
+	struct pwl_float_data *rgb = pwl_rgb;
+	struct pwl_float_data *rgb_last = rgb + GAMMA_RGB_256_ENTRIES - 1;
+
+	i = 0;
+	do {
+		if (ramp->gamma[i] > max_os ||
+				ramp->gamma[i + 256] > max_os ||
+				ramp->gamma[i + 512] > max_os) {
+			scaler = max_driver;
+			break;
+		}
+		i++;
+	} while (i != GAMMA_RGB_256_ENTRIES);
+
+	i = 0;
+	do {
+		rgb->r = dc_fixpt_from_fraction(
+				ramp->gamma[i], scaler);
+		rgb->g = dc_fixpt_from_fraction(
+				ramp->gamma[i + 256], scaler);
+		rgb->b = dc_fixpt_from_fraction(
+				ramp->gamma[i + 512], scaler);
+
+		++rgb;
+		++i;
+	} while (i != GAMMA_RGB_256_ENTRIES);
+
+	rgb->r = dc_fixpt_mul(rgb_last->r,
+			dividers.divider1);
+	rgb->g = dc_fixpt_mul(rgb_last->g,
+			dividers.divider1);
+	rgb->b = dc_fixpt_mul(rgb_last->b,
+			dividers.divider1);
+
+	++rgb;
+
+	rgb->r = dc_fixpt_mul(rgb_last->r,
+			dividers.divider2);
+	rgb->g = dc_fixpt_mul(rgb_last->g,
+			dividers.divider2);
+	rgb->b = dc_fixpt_mul(rgb_last->b,
+			dividers.divider2);
+
+	++rgb;
+
+	rgb->r = dc_fixpt_mul(rgb_last->r,
+			dividers.divider3);
+	rgb->g = dc_fixpt_mul(rgb_last->g,
+			dividers.divider3);
+	rgb->b = dc_fixpt_mul(rgb_last->b,
+			dividers.divider3);
 }
 
 /*
@@ -852,7 +913,7 @@
 	struct fixed31_32 lut2;
 	const int max_lut_index = 4095;
 	const struct fixed31_32 max_lut_index_f =
-			dal_fixed31_32_from_int_nonconst(max_lut_index);
+			dc_fixpt_from_int(max_lut_index);
 	int32_t index = 0, index_next = 0;
 	struct fixed31_32 index_f;
 	struct fixed31_32 delta_lut;
@@ -870,10 +931,10 @@
 			else
 				regamma_y = &tf_pts->blue[i];
 
-			norm_y = dal_fixed31_32_mul(max_lut_index_f,
+			norm_y = dc_fixpt_mul(max_lut_index_f,
 						   *regamma_y);
-			index = dal_fixed31_32_floor(norm_y);
-			index_f = dal_fixed31_32_from_int_nonconst(index);
+			index = dc_fixpt_floor(norm_y);
+			index_f = dc_fixpt_from_int(index);
 
 			if (index < 0 || index > max_lut_index)
 				continue;
@@ -892,11 +953,11 @@
 			}
 
 			// we have everything now, so interpolate
-			delta_lut = dal_fixed31_32_sub(lut2, lut1);
-			delta_index = dal_fixed31_32_sub(norm_y, index_f);
+			delta_lut = dc_fixpt_sub(lut2, lut1);
+			delta_index = dc_fixpt_sub(norm_y, index_f);
 
-			*regamma_y = dal_fixed31_32_add(lut1,
-				dal_fixed31_32_mul(delta_index, delta_lut));
+			*regamma_y = dc_fixpt_add(lut1,
+				dc_fixpt_mul(delta_index, delta_lut));
 		}
 	}
 }
@@ -912,7 +973,7 @@
 	uint32_t i = 0;
 
 	do {
-		struct fixed31_32 value = dal_fixed31_32_from_fraction(i,
+		struct fixed31_32 value = dc_fixpt_from_fraction(i,
 			numberof_points - 1);
 
 		p->r = value;
@@ -923,21 +984,21 @@
 		++i;
 	} while (i != numberof_points);
 
-	p->r = dal_fixed31_32_div(p_last->r, dividers.divider1);
-	p->g = dal_fixed31_32_div(p_last->g, dividers.divider1);
-	p->b = dal_fixed31_32_div(p_last->b, dividers.divider1);
+	p->r = dc_fixpt_div(p_last->r, dividers.divider1);
+	p->g = dc_fixpt_div(p_last->g, dividers.divider1);
+	p->b = dc_fixpt_div(p_last->b, dividers.divider1);
 
 	++p;
 
-	p->r = dal_fixed31_32_div(p_last->r, dividers.divider2);
-	p->g = dal_fixed31_32_div(p_last->g, dividers.divider2);
-	p->b = dal_fixed31_32_div(p_last->b, dividers.divider2);
+	p->r = dc_fixpt_div(p_last->r, dividers.divider2);
+	p->g = dc_fixpt_div(p_last->g, dividers.divider2);
+	p->b = dc_fixpt_div(p_last->b, dividers.divider2);
 
 	++p;
 
-	p->r = dal_fixed31_32_div(p_last->r, dividers.divider3);
-	p->g = dal_fixed31_32_div(p_last->g, dividers.divider3);
-	p->b = dal_fixed31_32_div(p_last->b, dividers.divider3);
+	p->r = dc_fixpt_div(p_last->r, dividers.divider3);
+	p->g = dc_fixpt_div(p_last->g, dividers.divider3);
+	p->b = dc_fixpt_div(p_last->b, dividers.divider3);
 }
 
 static inline void copy_rgb_regamma_to_coordinates_x(
@@ -949,7 +1010,7 @@
 	uint32_t i = 0;
 	const struct pwl_float_data_ex *rgb_regamma = rgb_ex;
 
-	while (i <= hw_points_num) {
+	while (i <= hw_points_num + 1) {
 		coords->regamma_y_red = rgb_regamma->r;
 		coords->regamma_y_green = rgb_regamma->g;
 		coords->regamma_y_blue = rgb_regamma->b;
@@ -1002,6 +1063,102 @@
 	return true;
 }
 
+/* The "old" interpolation uses a complicated scheme to build an array of
+ * coefficients while also using an array of 0-255 normalized to 0-1
+ * Then there's another loop using both of the above + new scaled user ramp
+ * and we concatenate them. It also searches for points of interpolation and
+ * uses enums for positions.
+ *
+ * This function uses a different approach:
+ * user ramp is always applied on X with 0/255, 1/255, 2/255, ..., 255/255
+ * To find index for hwX , we notice the following:
+ * i/255 <= hwX < (i+1)/255  <=> i <= 255*hwX < i+1
+ * See apply_lut_1d which is the same principle, but on 4K entry 1D LUT
+ *
+ * Once the index is known, combined Y is simply:
+ * user_ramp(index) + (hwX-index/255)*(user_ramp(index+1) - user_ramp(index)
+ *
+ * We should switch to this method in all cases, it's simpler and faster
+ * ToDo one day - for now this only applies to ADL regamma to avoid regression
+ * for regular use cases (sRGB and PQ)
+ */
+static void interpolate_user_regamma(uint32_t hw_points_num,
+		struct pwl_float_data *rgb_user,
+		bool apply_degamma,
+		struct dc_transfer_func_distributed_points *tf_pts)
+{
+	uint32_t i;
+	uint32_t color = 0;
+	int32_t index;
+	int32_t index_next;
+	struct fixed31_32 *tf_point;
+	struct fixed31_32 hw_x;
+	struct fixed31_32 norm_factor =
+			dc_fixpt_from_int(255);
+	struct fixed31_32 norm_x;
+	struct fixed31_32 index_f;
+	struct fixed31_32 lut1;
+	struct fixed31_32 lut2;
+	struct fixed31_32 delta_lut;
+	struct fixed31_32 delta_index;
+
+	i = 0;
+	/* fixed_pt library has problems handling too small values */
+	while (i != 32) {
+		tf_pts->red[i] = dc_fixpt_zero;
+		tf_pts->green[i] = dc_fixpt_zero;
+		tf_pts->blue[i] = dc_fixpt_zero;
+		++i;
+	}
+	while (i <= hw_points_num + 1) {
+		for (color = 0; color < 3; color++) {
+			if (color == 0)
+				tf_point = &tf_pts->red[i];
+			else if (color == 1)
+				tf_point = &tf_pts->green[i];
+			else
+				tf_point = &tf_pts->blue[i];
+
+			if (apply_degamma) {
+				if (color == 0)
+					hw_x = coordinates_x[i].regamma_y_red;
+				else if (color == 1)
+					hw_x = coordinates_x[i].regamma_y_green;
+				else
+					hw_x = coordinates_x[i].regamma_y_blue;
+			} else
+				hw_x = coordinates_x[i].x;
+
+			norm_x = dc_fixpt_mul(norm_factor, hw_x);
+			index = dc_fixpt_floor(norm_x);
+			if (index < 0 || index > 255)
+				continue;
+
+			index_f = dc_fixpt_from_int(index);
+			index_next = (index == 255) ? index : index + 1;
+
+			if (color == 0) {
+				lut1 = rgb_user[index].r;
+				lut2 = rgb_user[index_next].r;
+			} else if (color == 1) {
+				lut1 = rgb_user[index].g;
+				lut2 = rgb_user[index_next].g;
+			} else {
+				lut1 = rgb_user[index].b;
+				lut2 = rgb_user[index_next].b;
+			}
+
+			// we have everything now, so interpolate
+			delta_lut = dc_fixpt_sub(lut2, lut1);
+			delta_index = dc_fixpt_sub(norm_x, index_f);
+
+			*tf_point = dc_fixpt_add(lut1,
+				dc_fixpt_mul(delta_index, delta_lut));
+		}
+		++i;
+	}
+}
+
 static void build_new_custom_resulted_curve(
 	uint32_t hw_points_num,
 	struct dc_transfer_func_distributed_points *tf_pts)
@@ -1011,20 +1168,43 @@
 	i = 0;
 
 	while (i != hw_points_num + 1) {
-		tf_pts->red[i] = dal_fixed31_32_clamp(
-			tf_pts->red[i], dal_fixed31_32_zero,
-			dal_fixed31_32_one);
-		tf_pts->green[i] = dal_fixed31_32_clamp(
-			tf_pts->green[i], dal_fixed31_32_zero,
-			dal_fixed31_32_one);
-		tf_pts->blue[i] = dal_fixed31_32_clamp(
-			tf_pts->blue[i], dal_fixed31_32_zero,
-			dal_fixed31_32_one);
+		tf_pts->red[i] = dc_fixpt_clamp(
+			tf_pts->red[i], dc_fixpt_zero,
+			dc_fixpt_one);
+		tf_pts->green[i] = dc_fixpt_clamp(
+			tf_pts->green[i], dc_fixpt_zero,
+			dc_fixpt_one);
+		tf_pts->blue[i] = dc_fixpt_clamp(
+			tf_pts->blue[i], dc_fixpt_zero,
+			dc_fixpt_one);
 
 		++i;
 	}
 }
 
+static void apply_degamma_for_user_regamma(struct pwl_float_data_ex *rgb_regamma,
+		uint32_t hw_points_num)
+{
+	uint32_t i;
+
+	struct gamma_coefficients coeff;
+	struct pwl_float_data_ex *rgb = rgb_regamma;
+	const struct hw_x_point *coord_x = coordinates_x;
+
+	build_coefficients(&coeff, true);
+
+	i = 0;
+	while (i != hw_points_num + 1) {
+		rgb->r = translate_from_linear_space_ex(
+				coord_x->x, &coeff, 0);
+		rgb->g = rgb->r;
+		rgb->b = rgb->r;
+		++coord_x;
+		++rgb;
+		++i;
+	}
+}
+
 static bool map_regamma_hw_to_x_user(
 	const struct dc_gamma *ramp,
 	struct pixel_gamma_point *coeff128,
@@ -1062,6 +1242,7 @@
 		}
 	}
 
+	/* this should be named differently, all it does is clamp to 0-1 */
 	build_new_custom_resulted_curve(hw_points_num, tf_pts);
 
 	return true;
@@ -1109,9 +1290,9 @@
 	if (!coeff)
 		goto coeff_alloc_fail;
 
-	dividers.divider1 = dal_fixed31_32_from_fraction(3, 2);
-	dividers.divider2 = dal_fixed31_32_from_int(2);
-	dividers.divider3 = dal_fixed31_32_from_fraction(5, 2);
+	dividers.divider1 = dc_fixpt_from_fraction(3, 2);
+	dividers.divider2 = dc_fixpt_from_int(2);
+	dividers.divider3 = dc_fixpt_from_fraction(5, 2);
 
 	tf = output_tf->tf;
 
@@ -1168,6 +1349,113 @@
 	return ret;
 }
 
+bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf,
+		const struct regamma_lut *regamma)
+{
+	struct gamma_coefficients coeff;
+	const struct hw_x_point *coord_x = coordinates_x;
+	uint32_t i = 0;
+
+	do {
+		coeff.a0[i] = dc_fixpt_from_fraction(
+				regamma->coeff.A0[i], 10000000);
+		coeff.a1[i] = dc_fixpt_from_fraction(
+				regamma->coeff.A1[i], 1000);
+		coeff.a2[i] = dc_fixpt_from_fraction(
+				regamma->coeff.A2[i], 1000);
+		coeff.a3[i] = dc_fixpt_from_fraction(
+				regamma->coeff.A3[i], 1000);
+		coeff.user_gamma[i] = dc_fixpt_from_fraction(
+				regamma->coeff.gamma[i], 1000);
+
+		++i;
+	} while (i != 3);
+
+	i = 0;
+	/* fixed_pt library has problems handling too small values */
+	while (i != 32) {
+		output_tf->tf_pts.red[i] = dc_fixpt_zero;
+		output_tf->tf_pts.green[i] = dc_fixpt_zero;
+		output_tf->tf_pts.blue[i] = dc_fixpt_zero;
+		++coord_x;
+		++i;
+	}
+	while (i != MAX_HW_POINTS + 1) {
+		output_tf->tf_pts.red[i] = translate_from_linear_space_ex(
+				coord_x->x, &coeff, 0);
+		output_tf->tf_pts.green[i] = translate_from_linear_space_ex(
+				coord_x->x, &coeff, 1);
+		output_tf->tf_pts.blue[i] = translate_from_linear_space_ex(
+				coord_x->x, &coeff, 2);
+		++coord_x;
+		++i;
+	}
+
+	// this function just clamps output to 0-1
+	build_new_custom_resulted_curve(MAX_HW_POINTS, &output_tf->tf_pts);
+	output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+
+	return true;
+}
+
+bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
+		const struct regamma_lut *regamma)
+{
+	struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts;
+	struct dividers dividers;
+
+	struct pwl_float_data *rgb_user = NULL;
+	struct pwl_float_data_ex *rgb_regamma = NULL;
+	bool ret = false;
+
+	if (regamma == NULL)
+		return false;
+
+	output_tf->type = TF_TYPE_DISTRIBUTED_POINTS;
+
+	rgb_user = kzalloc(sizeof(*rgb_user) * (GAMMA_RGB_256_ENTRIES + _EXTRA_POINTS),
+			GFP_KERNEL);
+	if (!rgb_user)
+		goto rgb_user_alloc_fail;
+
+	rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS),
+			GFP_KERNEL);
+	if (!rgb_regamma)
+		goto rgb_regamma_alloc_fail;
+
+	dividers.divider1 = dc_fixpt_from_fraction(3, 2);
+	dividers.divider2 = dc_fixpt_from_int(2);
+	dividers.divider3 = dc_fixpt_from_fraction(5, 2);
+
+	scale_user_regamma_ramp(rgb_user, &regamma->ramp, dividers);
+
+	if (regamma->flags.bits.applyDegamma == 1) {
+		apply_degamma_for_user_regamma(rgb_regamma, MAX_HW_POINTS);
+		copy_rgb_regamma_to_coordinates_x(coordinates_x,
+				MAX_HW_POINTS, rgb_regamma);
+	}
+
+	interpolate_user_regamma(MAX_HW_POINTS, rgb_user,
+			regamma->flags.bits.applyDegamma, tf_pts);
+
+	// no custom HDR curves!
+	tf_pts->end_exponent = 0;
+	tf_pts->x_point_at_y1_red = 1;
+	tf_pts->x_point_at_y1_green = 1;
+	tf_pts->x_point_at_y1_blue = 1;
+
+	// this function just clamps output to 0-1
+	build_new_custom_resulted_curve(MAX_HW_POINTS, tf_pts);
+
+	ret = true;
+
+	kfree(rgb_regamma);
+rgb_regamma_alloc_fail:
+	kvfree(rgb_user);
+rgb_user_alloc_fail:
+	return ret;
+}
+
 bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
 		const struct dc_gamma *ramp, bool mapUserRamp)
 {
@@ -1208,9 +1496,9 @@
 	if (!coeff)
 		goto coeff_alloc_fail;
 
-	dividers.divider1 = dal_fixed31_32_from_fraction(3, 2);
-	dividers.divider2 = dal_fixed31_32_from_int(2);
-	dividers.divider3 = dal_fixed31_32_from_fraction(5, 2);
+	dividers.divider1 = dc_fixpt_from_fraction(3, 2);
+	dividers.divider2 = dc_fixpt_from_int(2);
+	dividers.divider3 = dc_fixpt_from_fraction(5, 2);
 
 	tf = input_tf->tf;
 
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
index b7f9bc2..b6404899 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h
@@ -32,6 +32,47 @@
 struct dc_rgb_fixed;
 enum dc_transfer_func_predefined;
 
+/* For SetRegamma ADL interface support
+ * Must match escape type
+ */
+union regamma_flags {
+	unsigned int raw;
+	struct {
+		unsigned int gammaRampArray       :1;    // RegammaRamp is in use
+		unsigned int gammaFromEdid        :1;    //gamma from edid is in use
+		unsigned int gammaFromEdidEx      :1;    //gamma from edid is in use , but only for Display Id 1.2
+		unsigned int gammaFromUser        :1;    //user custom gamma is used
+		unsigned int coeffFromUser        :1;    //coeff. A0-A3 from user is in use
+		unsigned int coeffFromEdid        :1;    //coeff. A0-A3 from edid is in use
+		unsigned int applyDegamma         :1;    //flag for additional degamma correction in driver
+		unsigned int gammaPredefinedSRGB  :1;    //flag for SRGB gamma
+		unsigned int gammaPredefinedPQ    :1;    //flag for PQ gamma
+		unsigned int gammaPredefinedPQ2084Interim :1;    //flag for PQ gamma, lower max nits
+		unsigned int gammaPredefined36    :1;    //flag for 3.6 gamma
+		unsigned int gammaPredefinedReset :1;    //flag to return to previous gamma
+	} bits;
+};
+
+struct regamma_ramp {
+	unsigned short gamma[256*3];  // gamma ramp packed  in same way as OS windows ,r , g & b
+};
+
+struct regamma_coeff {
+	int    gamma[3];
+	int    A0[3];
+	int    A1[3];
+	int    A2[3];
+	int    A3[3];
+};
+
+struct regamma_lut {
+	union regamma_flags flags;
+	union {
+		struct regamma_ramp ramp;
+		struct regamma_coeff coeff;
+	};
+};
+
 void setup_x_points_distribution(void);
 void precompute_pq(void);
 void precompute_de_pq(void);
@@ -45,9 +86,14 @@
 bool mod_color_calculate_curve(enum dc_transfer_func_predefined  trans,
 		struct dc_transfer_func_distributed_points *points);
 
-bool  mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
+bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans,
 				struct dc_transfer_func_distributed_points *points);
 
+bool calculate_user_regamma_coeff(struct dc_transfer_func *output_tf,
+		const struct regamma_lut *regamma);
+
+bool calculate_user_regamma_ramp(struct dc_transfer_func *output_tf,
+		const struct regamma_lut *regamma);
 
 
 #endif /* COLOR_MOD_COLOR_GAMMA_H_ */
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h
index 3230e2a..3812094 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_stats.h
@@ -46,6 +46,10 @@
 
 void mod_stats_reset_data(struct mod_stats *mod_stats);
 
+void mod_stats_update_event(struct mod_stats *mod_stats,
+		char *event_string,
+		unsigned int length);
+
 void mod_stats_update_flip(struct mod_stats *mod_stats,
 		unsigned long timestamp_in_ns);
 
diff --git a/drivers/gpu/drm/amd/display/modules/stats/stats.c b/drivers/gpu/drm/amd/display/modules/stats/stats.c
index 041f87b..3f7d47f 100644
--- a/drivers/gpu/drm/amd/display/modules/stats/stats.c
+++ b/drivers/gpu/drm/amd/display/modules/stats/stats.c
@@ -36,9 +36,14 @@
 #define DAL_STATS_ENTRIES_REGKEY_DEFAULT	0x00350000
 #define DAL_STATS_ENTRIES_REGKEY_MAX		0x01000000
 
+#define DAL_STATS_EVENT_ENTRIES_DEFAULT		0x00000100
+
 #define MOD_STATS_NUM_VSYNCS			5
+#define MOD_STATS_EVENT_STRING_MAX		512
 
 struct stats_time_cache {
+	unsigned int entry_id;
+
 	unsigned long flip_timestamp_in_ns;
 	unsigned long vupdate_timestamp_in_ns;
 
@@ -63,15 +68,26 @@
 	unsigned int flags;
 };
 
+struct stats_event_cache {
+	unsigned int entry_id;
+	char event_string[MOD_STATS_EVENT_STRING_MAX];
+};
+
 struct core_stats {
 	struct mod_stats public;
 	struct dc *dc;
 
+	bool enabled;
+	unsigned int entries;
+	unsigned int event_entries;
+	unsigned int entry_id;
+
 	struct stats_time_cache *time;
 	unsigned int index;
 
-	bool enabled;
-	unsigned int entries;
+	struct stats_event_cache *events;
+	unsigned int event_index;
+
 };
 
 #define MOD_STATS_TO_CORE(mod_stats)\
@@ -99,12 +115,12 @@
 	unsigned int reg_data;
 	int i = 0;
 
+	if (dc == NULL)
+		goto fail_construct;
+
 	core_stats = kzalloc(sizeof(struct core_stats), GFP_KERNEL);
 
 	if (core_stats == NULL)
-		goto fail_alloc_context;
-
-	if (dc == NULL)
 		goto fail_construct;
 
 	core_stats->dc = dc;
@@ -115,33 +131,55 @@
 			&reg_data, sizeof(unsigned int), &flag))
 		core_stats->enabled = reg_data;
 
-	core_stats->entries = DAL_STATS_ENTRIES_REGKEY_DEFAULT;
-	if (dm_read_persistent_data(dc->ctx, NULL, NULL,
-			DAL_STATS_ENTRIES_REGKEY,
-			&reg_data, sizeof(unsigned int), &flag)) {
-		if (reg_data > DAL_STATS_ENTRIES_REGKEY_MAX)
-			core_stats->entries = DAL_STATS_ENTRIES_REGKEY_MAX;
-		else
-			core_stats->entries = reg_data;
+	if (core_stats->enabled) {
+		core_stats->entries = DAL_STATS_ENTRIES_REGKEY_DEFAULT;
+		if (dm_read_persistent_data(dc->ctx, NULL, NULL,
+				DAL_STATS_ENTRIES_REGKEY,
+				&reg_data, sizeof(unsigned int), &flag)) {
+			if (reg_data > DAL_STATS_ENTRIES_REGKEY_MAX)
+				core_stats->entries = DAL_STATS_ENTRIES_REGKEY_MAX;
+			else
+				core_stats->entries = reg_data;
+		}
+		core_stats->time = kzalloc(
+			sizeof(struct stats_time_cache) *
+				core_stats->entries,
+						GFP_KERNEL);
+
+		if (core_stats->time == NULL)
+			goto fail_construct_time;
+
+		core_stats->event_entries = DAL_STATS_EVENT_ENTRIES_DEFAULT;
+		core_stats->events = kzalloc(
+			sizeof(struct stats_event_cache) *
+				core_stats->event_entries,
+						GFP_KERNEL);
+
+		if (core_stats->events == NULL)
+			goto fail_construct_events;
+
+	} else {
+		core_stats->entries = 0;
 	}
 
-	core_stats->time = kzalloc(sizeof(struct stats_time_cache) * core_stats->entries,
-					GFP_KERNEL);
-
-	if (core_stats->time == NULL)
-		goto fail_construct;
-
 	/* Purposely leave index 0 unused so we don't need special logic to
 	 * handle calculation cases that depend on previous flip data.
 	 */
 	core_stats->index = 1;
+	core_stats->event_index = 0;
+
+	// Keeps track of ordering within the different stats structures
+	core_stats->entry_id = 0;
 
 	return &core_stats->public;
 
-fail_construct:
+fail_construct_events:
+	kfree(core_stats->time);
+
+fail_construct_time:
 	kfree(core_stats);
 
-fail_alloc_context:
+fail_construct:
 	return NULL;
 }
 
@@ -153,6 +191,9 @@
 		if (core_stats->time != NULL)
 			kfree(core_stats->time);
 
+		if (core_stats->events != NULL)
+			kfree(core_stats->events);
+
 		kfree(core_stats);
 	}
 }
@@ -163,7 +204,11 @@
 	struct dal_logger *logger = NULL;
 	struct core_stats *core_stats = NULL;
 	struct stats_time_cache *time = NULL;
+	struct stats_event_cache *events = NULL;
+	unsigned int time_index = 1;
+	unsigned int event_index = 0;
 	unsigned int index = 0;
+	struct log_entry log_entry;
 
 	if (mod_stats == NULL)
 		return;
@@ -172,45 +217,62 @@
 	dc = core_stats->dc;
 	logger = dc->ctx->logger;
 	time = core_stats->time;
+	events = core_stats->events;
 
-	//LogEntry* pLog = GetLog()->Open(LogMajor_ISR, LogMinor_ISR_FreeSyncSW);
+	DISPLAY_STATS_BEGIN(log_entry);
 
-	//if (!pLog->IsDummyEntry())
-	{
-		dm_logger_write(logger, LOG_PROFILING, "==Display Caps==\n");
-		dm_logger_write(logger, LOG_PROFILING, "\n");
-		dm_logger_write(logger, LOG_PROFILING, "\n");
+	DISPLAY_STATS("==Display Caps==\n");
 
-		dm_logger_write(logger, LOG_PROFILING, "==Stats==\n");
-		dm_logger_write(logger, LOG_PROFILING,
-			"render avgRender minWindow midPoint maxWindow vsyncToFlip flipToVsync #vsyncBetweenFlip #frame insertDuration vTotalMin vTotalMax eventTrigs vSyncTime1 vSyncTime2 vSyncTime3 vSyncTime4 vSyncTime5 flags\n");
+	DISPLAY_STATS("==Display Stats==\n");
 
-		for (int i = 0; i < core_stats->index && i < core_stats->entries; i++) {
-			dm_logger_write(logger, LOG_PROFILING,
-					"%u  %u  %u  %u  %u  %u  %u  %u  %u  %u  %u  %u  %u  %u  %u  %u  %u  %u  %u\n",
-					time[i].render_time_in_us,
-					time[i].avg_render_time_in_us_last_ten,
-					time[i].min_window,
-					time[i].lfc_mid_point_in_us,
-					time[i].max_window,
-					time[i].vsync_to_flip_time_in_us,
-					time[i].flip_to_vsync_time_in_us,
-					time[i].num_vsync_between_flips,
-					time[i].num_frames_inserted,
-					time[i].inserted_duration_in_us,
-					time[i].v_total_min,
-					time[i].v_total_max,
-					time[i].event_triggers,
-					time[i].v_sync_time_in_us[0],
-					time[i].v_sync_time_in_us[1],
-					time[i].v_sync_time_in_us[2],
-					time[i].v_sync_time_in_us[3],
-					time[i].v_sync_time_in_us[4],
-					time[i].flags);
+	DISPLAY_STATS("%10s %10s %10s %10s %10s"
+			" %11s %11s %17s %10s %14s"
+			" %10s %10s %10s %10s %10s"
+			" %10s %10s %10s %10s\n",
+		"render", "avgRender",
+		"minWindow", "midPoint", "maxWindow",
+		"vsyncToFlip", "flipToVsync", "vsyncsBetweenFlip",
+		"numFrame", "insertDuration",
+		"vTotalMin", "vTotalMax", "eventTrigs",
+		"vSyncTime1", "vSyncTime2", "vSyncTime3",
+		"vSyncTime4", "vSyncTime5", "flags");
+
+	for (int i = 0; i < core_stats->entry_id; i++) {
+		if (event_index < core_stats->event_index &&
+				i == events[event_index].entry_id) {
+			DISPLAY_STATS("%s\n", events[event_index].event_string);
+			event_index++;
+		} else if (time_index < core_stats->index &&
+				i == time[time_index].entry_id) {
+			DISPLAY_STATS("%10u %10u %10u %10u %10u"
+					" %11u %11u %17u %10u %14u"
+					" %10u %10u %10u %10u %10u"
+					" %10u %10u %10u %10u\n",
+				time[time_index].render_time_in_us,
+				time[time_index].avg_render_time_in_us_last_ten,
+				time[time_index].min_window,
+				time[time_index].lfc_mid_point_in_us,
+				time[time_index].max_window,
+				time[time_index].vsync_to_flip_time_in_us,
+				time[time_index].flip_to_vsync_time_in_us,
+				time[time_index].num_vsync_between_flips,
+				time[time_index].num_frames_inserted,
+				time[time_index].inserted_duration_in_us,
+				time[time_index].v_total_min,
+				time[time_index].v_total_max,
+				time[time_index].event_triggers,
+				time[time_index].v_sync_time_in_us[0],
+				time[time_index].v_sync_time_in_us[1],
+				time[time_index].v_sync_time_in_us[2],
+				time[time_index].v_sync_time_in_us[3],
+				time[time_index].v_sync_time_in_us[4],
+				time[time_index].flags);
+
+			time_index++;
 		}
 	}
-	//GetLog()->Close(pLog);
-	//GetLog()->UnSetLogMask(LogMajor_ISR, LogMinor_ISR_FreeSyncSW);
+
+	DISPLAY_STATS_END(log_entry);
 }
 
 void mod_stats_reset_data(struct mod_stats *mod_stats)
@@ -227,7 +289,46 @@
 	memset(core_stats->time, 0,
 		sizeof(struct stats_time_cache) * core_stats->entries);
 
-	core_stats->index = 0;
+	memset(core_stats->events, 0,
+		sizeof(struct stats_event_cache) * core_stats->event_entries);
+
+	core_stats->index = 1;
+	core_stats->event_index = 0;
+
+	// Keeps track of ordering within the different stats structures
+	core_stats->entry_id = 0;
+}
+
+void mod_stats_update_event(struct mod_stats *mod_stats,
+		char *event_string,
+		unsigned int length)
+{
+	struct core_stats *core_stats = NULL;
+	struct stats_event_cache *events = NULL;
+	unsigned int index = 0;
+	unsigned int copy_length = 0;
+
+	if (mod_stats == NULL)
+		return;
+
+	core_stats = MOD_STATS_TO_CORE(mod_stats);
+
+	if (core_stats->event_index >= core_stats->event_entries)
+		return;
+
+	events = core_stats->events;
+	index = core_stats->event_index;
+
+	copy_length = length;
+	if (length > MOD_STATS_EVENT_STRING_MAX)
+		copy_length = MOD_STATS_EVENT_STRING_MAX;
+
+	memcpy(&events[index].event_string, event_string, copy_length);
+	events[index].event_string[copy_length - 1] = '\0';
+
+	events[index].entry_id = core_stats->entry_id;
+	core_stats->event_index++;
+	core_stats->entry_id++;
 }
 
 void mod_stats_update_flip(struct mod_stats *mod_stats,
@@ -250,7 +351,7 @@
 
 	time[index].flip_timestamp_in_ns = timestamp_in_ns;
 	time[index].render_time_in_us =
-		timestamp_in_ns - time[index - 1].flip_timestamp_in_ns;
+		(timestamp_in_ns - time[index - 1].flip_timestamp_in_ns) / 1000;
 
 	if (index >= 10) {
 		for (unsigned int i = 0; i < 10; i++)
@@ -261,12 +362,16 @@
 
 	if (time[index].num_vsync_between_flips > 0)
 		time[index].vsync_to_flip_time_in_us =
-			timestamp_in_ns - time[index].vupdate_timestamp_in_ns;
+			(timestamp_in_ns -
+				time[index].vupdate_timestamp_in_ns) / 1000;
 	else
 		time[index].vsync_to_flip_time_in_us =
-			timestamp_in_ns - time[index - 1].vupdate_timestamp_in_ns;
+			(timestamp_in_ns -
+				time[index - 1].vupdate_timestamp_in_ns) / 1000;
 
+	time[index].entry_id = core_stats->entry_id;
 	core_stats->index++;
+	core_stats->entry_id++;
 }
 
 void mod_stats_update_vupdate(struct mod_stats *mod_stats,
@@ -275,6 +380,8 @@
 	struct core_stats *core_stats = NULL;
 	struct stats_time_cache *time = NULL;
 	unsigned int index = 0;
+	unsigned int num_vsyncs = 0;
+	unsigned int prev_vsync_in_ns = 0;
 
 	if (mod_stats == NULL)
 		return;
@@ -286,14 +393,27 @@
 
 	time = core_stats->time;
 	index = core_stats->index;
+	num_vsyncs = time[index].num_vsync_between_flips;
+
+	if (num_vsyncs < MOD_STATS_NUM_VSYNCS) {
+		if (num_vsyncs == 0) {
+			prev_vsync_in_ns =
+				time[index - 1].vupdate_timestamp_in_ns;
+
+			time[index].flip_to_vsync_time_in_us =
+				(timestamp_in_ns -
+					time[index - 1].flip_timestamp_in_ns) /
+					1000;
+		} else {
+			prev_vsync_in_ns =
+				time[index].vupdate_timestamp_in_ns;
+		}
+
+		time[index].v_sync_time_in_us[num_vsyncs] =
+			(timestamp_in_ns - prev_vsync_in_ns) / 1000;
+	}
 
 	time[index].vupdate_timestamp_in_ns = timestamp_in_ns;
-	if (time[index].num_vsync_between_flips < MOD_STATS_NUM_VSYNCS)
-		time[index].v_sync_time_in_us[time[index].num_vsync_between_flips] =
-			timestamp_in_ns - time[index - 1].vupdate_timestamp_in_ns;
-	time[index].flip_to_vsync_time_in_us =
-		timestamp_in_ns - time[index - 1].flip_timestamp_in_ns;
-
 	time[index].num_vsync_between_flips++;
 }
 
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 9fa3aae..b178176 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -92,7 +92,7 @@
 #define AMD_CG_SUPPORT_GFX_3D_CGLS		(1 << 21)
 #define AMD_CG_SUPPORT_DRM_MGCG			(1 << 22)
 #define AMD_CG_SUPPORT_DF_MGCG			(1 << 23)
-
+#define AMD_CG_SUPPORT_VCN_MGCG			(1 << 24)
 /* PG flags */
 #define AMD_PG_SUPPORT_GFX_PG			(1 << 0)
 #define AMD_PG_SUPPORT_GFX_SMG			(1 << 1)
@@ -108,6 +108,27 @@
 #define AMD_PG_SUPPORT_GFX_QUICK_MG		(1 << 11)
 #define AMD_PG_SUPPORT_GFX_PIPELINE		(1 << 12)
 #define AMD_PG_SUPPORT_MMHUB			(1 << 13)
+#define AMD_PG_SUPPORT_VCN			(1 << 14)
+
+enum PP_FEATURE_MASK {
+	PP_SCLK_DPM_MASK = 0x1,
+	PP_MCLK_DPM_MASK = 0x2,
+	PP_PCIE_DPM_MASK = 0x4,
+	PP_SCLK_DEEP_SLEEP_MASK = 0x8,
+	PP_POWER_CONTAINMENT_MASK = 0x10,
+	PP_UVD_HANDSHAKE_MASK = 0x20,
+	PP_SMC_VOLTAGE_CONTROL_MASK = 0x40,
+	PP_VBI_TIME_SUPPORT_MASK = 0x80,
+	PP_ULV_MASK = 0x100,
+	PP_ENABLE_GFX_CG_THRU_SMU = 0x200,
+	PP_CLOCK_STRETCH_MASK = 0x400,
+	PP_OD_FUZZY_FAN_CONTROL_MASK = 0x800,
+	PP_SOCCLK_DPM_MASK = 0x1000,
+	PP_DCEFCLK_DPM_MASK = 0x2000,
+	PP_OVERDRIVE_MASK = 0x4000,
+	PP_GFXOFF_MASK = 0x8000,
+	PP_ACG_MASK = 0x10000,
+};
 
 struct amd_ip_funcs {
 	/* Name of IP block */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h
index f730d06..b6f74bf 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h
@@ -2095,6 +2095,18 @@
 #define mmDC_GPIO_AUX_CTRL_2_BASE_IDX                                                                  2
 #define mmDC_GPIO_RXEN                                                                                 0x212f
 #define mmDC_GPIO_RXEN_BASE_IDX                                                                        2
+#define mmDC_GPIO_AUX_CTRL_3                                                                           0x2130
+#define mmDC_GPIO_AUX_CTRL_3_BASE_IDX                                                                  2
+#define mmDC_GPIO_AUX_CTRL_4                                                                           0x2131
+#define mmDC_GPIO_AUX_CTRL_4_BASE_IDX                                                                  2
+#define mmDC_GPIO_AUX_CTRL_5                                                                           0x2132
+#define mmDC_GPIO_AUX_CTRL_5_BASE_IDX                                                                  2
+#define mmAUXI2C_PAD_ALL_PWR_OK                                                                        0x2133
+#define mmAUXI2C_PAD_ALL_PWR_OK_BASE_IDX                                                               2
+#define mmDC_GPIO_PULLUPEN                                                                             0x2134
+#define mmDC_GPIO_PULLUPEN_BASE_IDX                                                                    2
+#define mmDC_GPIO_AUX_CTRL_6                                                                           0x2135
+#define mmDC_GPIO_AUX_CTRL_6_BASE_IDX                                                                  2
 #define mmBPHYC_DAC_MACRO_CNTL                                                                         0x2136
 #define mmBPHYC_DAC_MACRO_CNTL_BASE_IDX                                                                2
 #define mmDAC_MACRO_CNTL_RESERVED0                                                                     0x2136
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h
index 6d3162c..bcd190a 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_sh_mask.h
@@ -10971,6 +10971,158 @@
 #define DC_GPIO_RXEN__DC_GPIO_BLON_RXEN_MASK                                                                  0x00100000L
 #define DC_GPIO_RXEN__DC_GPIO_DIGON_RXEN_MASK                                                                 0x00200000L
 #define DC_GPIO_RXEN__DC_GPIO_ENA_BL_RXEN_MASK                                                                0x00400000L
+//DC_GPIO_AUX_CTRL_3
+#define DC_GPIO_AUX_CTRL_3__AUX1_NEN_RTERM__SHIFT                                                             0x0
+#define DC_GPIO_AUX_CTRL_3__AUX2_NEN_RTERM__SHIFT                                                             0x1
+#define DC_GPIO_AUX_CTRL_3__AUX3_NEN_RTERM__SHIFT                                                             0x2
+#define DC_GPIO_AUX_CTRL_3__AUX4_NEN_RTERM__SHIFT                                                             0x3
+#define DC_GPIO_AUX_CTRL_3__AUX5_NEN_RTERM__SHIFT                                                             0x4
+#define DC_GPIO_AUX_CTRL_3__AUX6_NEN_RTERM__SHIFT                                                             0x5
+#define DC_GPIO_AUX_CTRL_3__AUX1_DP_DN_SWAP__SHIFT                                                            0x8
+#define DC_GPIO_AUX_CTRL_3__AUX2_DP_DN_SWAP__SHIFT                                                            0x9
+#define DC_GPIO_AUX_CTRL_3__AUX3_DP_DN_SWAP__SHIFT                                                            0xa
+#define DC_GPIO_AUX_CTRL_3__AUX4_DP_DN_SWAP__SHIFT                                                            0xb
+#define DC_GPIO_AUX_CTRL_3__AUX5_DP_DN_SWAP__SHIFT                                                            0xc
+#define DC_GPIO_AUX_CTRL_3__AUX6_DP_DN_SWAP__SHIFT                                                            0xd
+#define DC_GPIO_AUX_CTRL_3__AUX1_HYS_TUNE__SHIFT                                                              0x10
+#define DC_GPIO_AUX_CTRL_3__AUX2_HYS_TUNE__SHIFT                                                              0x12
+#define DC_GPIO_AUX_CTRL_3__AUX3_HYS_TUNE__SHIFT                                                              0x14
+#define DC_GPIO_AUX_CTRL_3__AUX4_HYS_TUNE__SHIFT                                                              0x16
+#define DC_GPIO_AUX_CTRL_3__AUX5_HYS_TUNE__SHIFT                                                              0x18
+#define DC_GPIO_AUX_CTRL_3__AUX6_HYS_TUNE__SHIFT                                                              0x1a
+#define DC_GPIO_AUX_CTRL_3__AUX1_NEN_RTERM_MASK                                                               0x00000001L
+#define DC_GPIO_AUX_CTRL_3__AUX2_NEN_RTERM_MASK                                                               0x00000002L
+#define DC_GPIO_AUX_CTRL_3__AUX3_NEN_RTERM_MASK                                                               0x00000004L
+#define DC_GPIO_AUX_CTRL_3__AUX4_NEN_RTERM_MASK                                                               0x00000008L
+#define DC_GPIO_AUX_CTRL_3__AUX5_NEN_RTERM_MASK                                                               0x00000010L
+#define DC_GPIO_AUX_CTRL_3__AUX6_NEN_RTERM_MASK                                                               0x00000020L
+#define DC_GPIO_AUX_CTRL_3__AUX1_DP_DN_SWAP_MASK                                                              0x00000100L
+#define DC_GPIO_AUX_CTRL_3__AUX2_DP_DN_SWAP_MASK                                                              0x00000200L
+#define DC_GPIO_AUX_CTRL_3__AUX3_DP_DN_SWAP_MASK                                                              0x00000400L
+#define DC_GPIO_AUX_CTRL_3__AUX4_DP_DN_SWAP_MASK                                                              0x00000800L
+#define DC_GPIO_AUX_CTRL_3__AUX5_DP_DN_SWAP_MASK                                                              0x00001000L
+#define DC_GPIO_AUX_CTRL_3__AUX6_DP_DN_SWAP_MASK                                                              0x00002000L
+#define DC_GPIO_AUX_CTRL_3__AUX1_HYS_TUNE_MASK                                                                0x00030000L
+#define DC_GPIO_AUX_CTRL_3__AUX2_HYS_TUNE_MASK                                                                0x000C0000L
+#define DC_GPIO_AUX_CTRL_3__AUX3_HYS_TUNE_MASK                                                                0x00300000L
+#define DC_GPIO_AUX_CTRL_3__AUX4_HYS_TUNE_MASK                                                                0x00C00000L
+#define DC_GPIO_AUX_CTRL_3__AUX5_HYS_TUNE_MASK                                                                0x03000000L
+#define DC_GPIO_AUX_CTRL_3__AUX6_HYS_TUNE_MASK                                                                0x0C000000L
+//DC_GPIO_AUX_CTRL_4
+#define DC_GPIO_AUX_CTRL_4__AUX1_AUX_CTRL__SHIFT                                                              0x0
+#define DC_GPIO_AUX_CTRL_4__AUX2_AUX_CTRL__SHIFT                                                              0x4
+#define DC_GPIO_AUX_CTRL_4__AUX3_AUX_CTRL__SHIFT                                                              0x8
+#define DC_GPIO_AUX_CTRL_4__AUX4_AUX_CTRL__SHIFT                                                              0xc
+#define DC_GPIO_AUX_CTRL_4__AUX5_AUX_CTRL__SHIFT                                                              0x10
+#define DC_GPIO_AUX_CTRL_4__AUX6_AUX_CTRL__SHIFT                                                              0x14
+#define DC_GPIO_AUX_CTRL_4__AUX1_AUX_CTRL_MASK                                                                0x0000000FL
+#define DC_GPIO_AUX_CTRL_4__AUX2_AUX_CTRL_MASK                                                                0x000000F0L
+#define DC_GPIO_AUX_CTRL_4__AUX3_AUX_CTRL_MASK                                                                0x00000F00L
+#define DC_GPIO_AUX_CTRL_4__AUX4_AUX_CTRL_MASK                                                                0x0000F000L
+#define DC_GPIO_AUX_CTRL_4__AUX5_AUX_CTRL_MASK                                                                0x000F0000L
+#define DC_GPIO_AUX_CTRL_4__AUX6_AUX_CTRL_MASK                                                                0x00F00000L
+//DC_GPIO_AUX_CTRL_5
+#define DC_GPIO_AUX_CTRL_5__AUX1_VOD_TUNE__SHIFT                                                              0x0
+#define DC_GPIO_AUX_CTRL_5__AUX2_VOD_TUNE__SHIFT                                                              0x2
+#define DC_GPIO_AUX_CTRL_5__AUX3_VOD_TUNE__SHIFT                                                              0x4
+#define DC_GPIO_AUX_CTRL_5__AUX4_VOD_TUNE__SHIFT                                                              0x6
+#define DC_GPIO_AUX_CTRL_5__AUX5_VOD_TUNE__SHIFT                                                              0x8
+#define DC_GPIO_AUX_CTRL_5__AUX6_VOD_TUNE__SHIFT                                                              0xa
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD1_I2CMODE__SHIFT                                                           0xc
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD2_I2CMODE__SHIFT                                                           0xd
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD3_I2CMODE__SHIFT                                                           0xe
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD4_I2CMODE__SHIFT                                                           0xf
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD5_I2CMODE__SHIFT                                                           0x10
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD6_I2CMODE__SHIFT                                                           0x11
+#define DC_GPIO_AUX_CTRL_5__DDC1_I2C_VPH_1V2_EN__SHIFT                                                        0x12
+#define DC_GPIO_AUX_CTRL_5__DDC2_I2C_VPH_1V2_EN__SHIFT                                                        0x13
+#define DC_GPIO_AUX_CTRL_5__DDC3_I2C_VPH_1V2_EN__SHIFT                                                        0x14
+#define DC_GPIO_AUX_CTRL_5__DDC4_I2C_VPH_1V2_EN__SHIFT                                                        0x15
+#define DC_GPIO_AUX_CTRL_5__DDC5_I2C_VPH_1V2_EN__SHIFT                                                        0x16
+#define DC_GPIO_AUX_CTRL_5__DDC6_I2C_VPH_1V2_EN__SHIFT                                                        0x17
+#define DC_GPIO_AUX_CTRL_5__DDC1_PAD_I2C_CTRL__SHIFT                                                          0x18
+#define DC_GPIO_AUX_CTRL_5__DDC2_PAD_I2C_CTRL__SHIFT                                                          0x19
+#define DC_GPIO_AUX_CTRL_5__DDC3_PAD_I2C_CTRL__SHIFT                                                          0x1a
+#define DC_GPIO_AUX_CTRL_5__DDC4_PAD_I2C_CTRL__SHIFT                                                          0x1b
+#define DC_GPIO_AUX_CTRL_5__DDC5_PAD_I2C_CTRL__SHIFT                                                          0x1c
+#define DC_GPIO_AUX_CTRL_5__DDC6_PAD_I2C_CTRL__SHIFT                                                          0x1d
+#define DC_GPIO_AUX_CTRL_5__AUX1_VOD_TUNE_MASK                                                                0x00000003L
+#define DC_GPIO_AUX_CTRL_5__AUX2_VOD_TUNE_MASK                                                                0x0000000CL
+#define DC_GPIO_AUX_CTRL_5__AUX3_VOD_TUNE_MASK                                                                0x00000030L
+#define DC_GPIO_AUX_CTRL_5__AUX4_VOD_TUNE_MASK                                                                0x000000C0L
+#define DC_GPIO_AUX_CTRL_5__AUX5_VOD_TUNE_MASK                                                                0x00000300L
+#define DC_GPIO_AUX_CTRL_5__AUX6_VOD_TUNE_MASK                                                                0x00000C00L
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD1_I2CMODE_MASK                                                             0x00001000L
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD2_I2CMODE_MASK                                                             0x00002000L
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD3_I2CMODE_MASK                                                             0x00004000L
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD4_I2CMODE_MASK                                                             0x00008000L
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD5_I2CMODE_MASK                                                             0x00010000L
+#define DC_GPIO_AUX_CTRL_5__DDC_PAD6_I2CMODE_MASK                                                             0x00020000L
+#define DC_GPIO_AUX_CTRL_5__DDC1_I2C_VPH_1V2_EN_MASK                                                          0x00040000L
+#define DC_GPIO_AUX_CTRL_5__DDC2_I2C_VPH_1V2_EN_MASK                                                          0x00080000L
+#define DC_GPIO_AUX_CTRL_5__DDC3_I2C_VPH_1V2_EN_MASK                                                          0x00100000L
+#define DC_GPIO_AUX_CTRL_5__DDC4_I2C_VPH_1V2_EN_MASK                                                          0x00200000L
+#define DC_GPIO_AUX_CTRL_5__DDC5_I2C_VPH_1V2_EN_MASK                                                          0x00400000L
+#define DC_GPIO_AUX_CTRL_5__DDC6_I2C_VPH_1V2_EN_MASK                                                          0x00800000L
+#define DC_GPIO_AUX_CTRL_5__DDC1_PAD_I2C_CTRL_MASK                                                            0x01000000L
+#define DC_GPIO_AUX_CTRL_5__DDC2_PAD_I2C_CTRL_MASK                                                            0x02000000L
+#define DC_GPIO_AUX_CTRL_5__DDC3_PAD_I2C_CTRL_MASK                                                            0x04000000L
+#define DC_GPIO_AUX_CTRL_5__DDC4_PAD_I2C_CTRL_MASK                                                            0x08000000L
+#define DC_GPIO_AUX_CTRL_5__DDC5_PAD_I2C_CTRL_MASK                                                            0x10000000L
+#define DC_GPIO_AUX_CTRL_5__DDC6_PAD_I2C_CTRL_MASK                                                            0x20000000L
+//AUXI2C_PAD_ALL_PWR_OK
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY1_ALL_PWR_OK__SHIFT                                                  0x0
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY2_ALL_PWR_OK__SHIFT                                                  0x1
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY3_ALL_PWR_OK__SHIFT                                                  0x2
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY4_ALL_PWR_OK__SHIFT                                                  0x3
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY5_ALL_PWR_OK__SHIFT                                                  0x4
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY6_ALL_PWR_OK__SHIFT                                                  0x5
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY1_ALL_PWR_OK_MASK                                                    0x00000001L
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY2_ALL_PWR_OK_MASK                                                    0x00000002L
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY3_ALL_PWR_OK_MASK                                                    0x00000004L
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY4_ALL_PWR_OK_MASK                                                    0x00000008L
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY5_ALL_PWR_OK_MASK                                                    0x00000010L
+#define AUXI2C_PAD_ALL_PWR_OK__AUXI2C_PHY6_ALL_PWR_OK_MASK                                                    0x00000020L
+//DC_GPIO_PULLUPEN
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICA_PU_EN__SHIFT                                                       0x0
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICB_PU_EN__SHIFT                                                       0x1
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICC_PU_EN__SHIFT                                                       0x2
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICD_PU_EN__SHIFT                                                       0x3
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICE_PU_EN__SHIFT                                                       0x4
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICF_PU_EN__SHIFT                                                       0x5
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICG_PU_EN__SHIFT                                                       0x6
+#define DC_GPIO_PULLUPEN__DC_GPIO_HSYNCA_PU_EN__SHIFT                                                         0x8
+#define DC_GPIO_PULLUPEN__DC_GPIO_VSYNCA_PU_EN__SHIFT                                                         0x9
+#define DC_GPIO_PULLUPEN__DC_GPIO_HPD1_PU_EN__SHIFT                                                           0xe
+#define DC_GPIO_PULLUPEN__DC_GPIO_BLON_PU_EN__SHIFT                                                           0x14
+#define DC_GPIO_PULLUPEN__DC_GPIO_DIGON_PU_EN__SHIFT                                                          0x15
+#define DC_GPIO_PULLUPEN__DC_GPIO_ENA_BL_PU_EN__SHIFT                                                         0x16
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICA_PU_EN_MASK                                                         0x00000001L
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICB_PU_EN_MASK                                                         0x00000002L
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICC_PU_EN_MASK                                                         0x00000004L
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICD_PU_EN_MASK                                                         0x00000008L
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICE_PU_EN_MASK                                                         0x00000010L
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICF_PU_EN_MASK                                                         0x00000020L
+#define DC_GPIO_PULLUPEN__DC_GPIO_GENERICG_PU_EN_MASK                                                         0x00000040L
+#define DC_GPIO_PULLUPEN__DC_GPIO_HSYNCA_PU_EN_MASK                                                           0x00000100L
+#define DC_GPIO_PULLUPEN__DC_GPIO_VSYNCA_PU_EN_MASK                                                           0x00000200L
+#define DC_GPIO_PULLUPEN__DC_GPIO_HPD1_PU_EN_MASK                                                             0x00004000L
+#define DC_GPIO_PULLUPEN__DC_GPIO_BLON_PU_EN_MASK                                                             0x00100000L
+#define DC_GPIO_PULLUPEN__DC_GPIO_DIGON_PU_EN_MASK                                                            0x00200000L
+#define DC_GPIO_PULLUPEN__DC_GPIO_ENA_BL_PU_EN_MASK                                                           0x00400000L
+//DC_GPIO_AUX_CTRL_6
+#define DC_GPIO_AUX_CTRL_6__AUX1_PAD_RXSEL__SHIFT                                                             0x0
+#define DC_GPIO_AUX_CTRL_6__AUX2_PAD_RXSEL__SHIFT                                                             0x2
+#define DC_GPIO_AUX_CTRL_6__AUX3_PAD_RXSEL__SHIFT                                                             0x4
+#define DC_GPIO_AUX_CTRL_6__AUX4_PAD_RXSEL__SHIFT                                                             0x6
+#define DC_GPIO_AUX_CTRL_6__AUX5_PAD_RXSEL__SHIFT                                                             0x8
+#define DC_GPIO_AUX_CTRL_6__AUX6_PAD_RXSEL__SHIFT                                                             0xa
+#define DC_GPIO_AUX_CTRL_6__AUX1_PAD_RXSEL_MASK                                                               0x00000003L
+#define DC_GPIO_AUX_CTRL_6__AUX2_PAD_RXSEL_MASK                                                               0x0000000CL
+#define DC_GPIO_AUX_CTRL_6__AUX3_PAD_RXSEL_MASK                                                               0x00000030L
+#define DC_GPIO_AUX_CTRL_6__AUX4_PAD_RXSEL_MASK                                                               0x000000C0L
+#define DC_GPIO_AUX_CTRL_6__AUX5_PAD_RXSEL_MASK                                                               0x00000300L
+#define DC_GPIO_AUX_CTRL_6__AUX6_PAD_RXSEL_MASK                                                               0x00000C00L
 //BPHYC_DAC_MACRO_CNTL
 #define BPHYC_DAC_MACRO_CNTL__BPHYC_DAC_WHITE_LEVEL__SHIFT                                                    0x0
 #define BPHYC_DAC_MACRO_CNTL__BPHYC_DAC_WHITE_FINE_CONTROL__SHIFT                                             0x8
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h
index 4ccf968..721c611 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_offset.h
@@ -3895,6 +3895,10 @@
 #define mmCM0_CM_MEM_PWR_CTRL_BASE_IDX                                                                 2
 #define mmCM0_CM_MEM_PWR_STATUS                                                                        0x0d33
 #define mmCM0_CM_MEM_PWR_STATUS_BASE_IDX                                                               2
+#define mmCM0_CM_TEST_DEBUG_INDEX                                                                      0x0d35
+#define mmCM0_CM_TEST_DEBUG_INDEX_BASE_IDX                                                             2
+#define mmCM0_CM_TEST_DEBUG_DATA                                                                       0x0d36
+#define mmCM0_CM_TEST_DEBUG_DATA_BASE_IDX                                                              2
 
 
 // addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
@@ -4367,7 +4371,10 @@
 #define mmCM1_CM_MEM_PWR_CTRL_BASE_IDX                                                                 2
 #define mmCM1_CM_MEM_PWR_STATUS                                                                        0x0e4e
 #define mmCM1_CM_MEM_PWR_STATUS_BASE_IDX                                                               2
-
+#define mmCM1_CM_TEST_DEBUG_INDEX                                                                      0x0e50
+#define mmCM1_CM_TEST_DEBUG_INDEX_BASE_IDX                                                             2
+#define mmCM1_CM_TEST_DEBUG_DATA                                                                       0x0e51
+#define mmCM1_CM_TEST_DEBUG_DATA_BASE_IDX                                                              2
 
 // addressBlock: dce_dc_dpp1_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
 // base address: 0x399c
@@ -4839,7 +4846,10 @@
 #define mmCM2_CM_MEM_PWR_CTRL_BASE_IDX                                                                 2
 #define mmCM2_CM_MEM_PWR_STATUS                                                                        0x0f69
 #define mmCM2_CM_MEM_PWR_STATUS_BASE_IDX                                                               2
-
+#define mmCM2_CM_TEST_DEBUG_INDEX                                                                      0x0f6b
+#define mmCM2_CM_TEST_DEBUG_INDEX_BASE_IDX                                                             2
+#define mmCM2_CM_TEST_DEBUG_DATA                                                                       0x0f6c
+#define mmCM2_CM_TEST_DEBUG_DATA_BASE_IDX                                                              2
 
 // addressBlock: dce_dc_dpp2_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
 // base address: 0x3e08
@@ -5311,7 +5321,10 @@
 #define mmCM3_CM_MEM_PWR_CTRL_BASE_IDX                                                                 2
 #define mmCM3_CM_MEM_PWR_STATUS                                                                        0x1084
 #define mmCM3_CM_MEM_PWR_STATUS_BASE_IDX                                                               2
-
+#define mmCM3_CM_TEST_DEBUG_INDEX                                                                      0x1086
+#define mmCM3_CM_TEST_DEBUG_INDEX_BASE_IDX                                                             2
+#define mmCM3_CM_TEST_DEBUG_DATA                                                                       0x1087
+#define mmCM3_CM_TEST_DEBUG_DATA_BASE_IDX                                                              2
 
 // addressBlock: dce_dc_dpp3_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
 // base address: 0x4274
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h
index e2a2f11..e7c0cad 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_1_0_sh_mask.h
@@ -14049,6 +14049,14 @@
 #define CM0_CM_MEM_PWR_STATUS__RGAM_MEM_PWR_STATE__SHIFT                                                      0x2
 #define CM0_CM_MEM_PWR_STATUS__SHARED_MEM_PWR_STATE_MASK                                                      0x00000003L
 #define CM0_CM_MEM_PWR_STATUS__RGAM_MEM_PWR_STATE_MASK                                                        0x0000000CL
+//CM0_CM_TEST_DEBUG_INDEX
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX__SHIFT                                                   0x0
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN__SHIFT                                                0x8
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_INDEX_MASK                                                     0x000000FFL
+#define CM0_CM_TEST_DEBUG_INDEX__CM_TEST_DEBUG_WRITE_EN_MASK                                                  0x00000100L
+//CM0_CM_TEST_DEBUG_DATA
+#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA__SHIFT                                                     0x0
+#define CM0_CM_TEST_DEBUG_DATA__CM_TEST_DEBUG_DATA_MASK                                                       0xFFFFFFFFL
 
 
 // addressBlock: dce_dc_dpp0_dispdec_dpp_dcperfmon_dc_perfmon_dispdec
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h
new file mode 100644
index 0000000..9e19e72
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_default.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2018  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _df_1_7_DEFAULT_HEADER
+#define _df_1_7_DEFAULT_HEADER
+
+#define mmFabricConfigAccessControl_DEFAULT						0x00000000
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h
new file mode 100644
index 0000000..e6044e2
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_offset.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2018  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _df_1_7_OFFSET_HEADER
+#define _df_1_7_OFFSET_HEADER
+
+#define mmFabricConfigAccessControl									0x0410
+#define mmFabricConfigAccessControl_BASE_IDX								0
+
+#define mmDF_PIE_AON0_DfGlobalClkGater									0x00fc
+#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX								0
+
+#define mmDF_CS_AON0_DramBaseAddress0									0x0044
+#define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX								0
+
+#define mmDF_CS_AON0_CoherentSlaveModeCtrlA0								0x0214
+#define mmDF_CS_AON0_CoherentSlaveModeCtrlA0_BASE_IDX							0
+
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h
new file mode 100644
index 0000000..a78c994
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_1_7_sh_mask.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2018  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _df_1_7_SH_MASK_HEADER
+#define _df_1_7_SH_MASK_HEADER
+
+/* FabricConfigAccessControl */
+#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT						0x0
+#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT						0x1
+#define FabricConfigAccessControl__CfgRegInstID__SHIFT							0x10
+#define FabricConfigAccessControl__CfgRegInstAccEn_MASK							0x00000001L
+#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK						0x00000002L
+#define FabricConfigAccessControl__CfgRegInstID_MASK							0x00FF0000L
+
+/* DF_PIE_AON0_DfGlobalClkGater */
+#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT							0x0
+#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK							0x0000000FL
+
+/* DF_CS_AON0_DramBaseAddress0 */
+#define DF_CS_AON0_DramBaseAddress0__AddrRngVal__SHIFT							0x0
+#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT						0x1
+#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT						0x4
+#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT						0x8
+#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr__SHIFT						0xc
+#define DF_CS_AON0_DramBaseAddress0__AddrRngVal_MASK							0x00000001L
+#define DF_CS_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK						0x00000002L
+#define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK							0x000000F0L
+#define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK							0x00000700L
+#define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK							0xFFFFF000L
+
+//DF_CS_AON0_CoherentSlaveModeCtrlA0
+#define DF_CS_AON0_CoherentSlaveModeCtrlA0__ForceParWrRMW__SHIFT					0x3
+#define DF_CS_AON0_CoherentSlaveModeCtrlA0__ForceParWrRMW_MASK						0x00000008L
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_default.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_default.h
new file mode 100644
index 0000000..e58c207
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_default.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2018  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _df_3_6_DEFAULT_HEADER
+#define _df_3_6_DEFAULT_HEADER
+
+#define mmFabricConfigAccessControl_DEFAULT						0x00000000
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
new file mode 100644
index 0000000..a9575db
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_offset.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2018  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _df_3_6_OFFSET_HEADER
+#define _df_3_6_OFFSET_HEADER
+
+#define mmFabricConfigAccessControl									0x0410
+#define mmFabricConfigAccessControl_BASE_IDX								0
+
+#define mmDF_PIE_AON0_DfGlobalClkGater									0x00fc
+#define mmDF_PIE_AON0_DfGlobalClkGater_BASE_IDX								0
+
+#define mmDF_CS_UMC_AON0_DramBaseAddress0								0x0044
+#define mmDF_CS_UMC_AON0_DramBaseAddress0_BASE_IDX							0
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
new file mode 100644
index 0000000..88f7c69
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/df/df_3_6_sh_mask.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2018  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _df_3_6_SH_MASK_HEADER
+#define _df_3_6_SH_MASK_HEADER
+
+/* FabricConfigAccessControl */
+#define FabricConfigAccessControl__CfgRegInstAccEn__SHIFT						0x0
+#define FabricConfigAccessControl__CfgRegInstAccRegLock__SHIFT						0x1
+#define FabricConfigAccessControl__CfgRegInstID__SHIFT							0x10
+#define FabricConfigAccessControl__CfgRegInstAccEn_MASK							0x00000001L
+#define FabricConfigAccessControl__CfgRegInstAccRegLock_MASK						0x00000002L
+#define FabricConfigAccessControl__CfgRegInstID_MASK							0x00FF0000L
+
+/* DF_PIE_AON0_DfGlobalClkGater */
+#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode__SHIFT							0x0
+#define DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK							0x0000000FL
+
+/* DF_CS_AON0_DramBaseAddress0 */
+#define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal__SHIFT						0x0
+#define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn__SHIFT						0x1
+#define DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT						0x4
+#define DF_CS_UMC_AON0_DramBaseAddress0__IntLvAddrSel__SHIFT						0x8
+#define DF_CS_UMC_AON0_DramBaseAddress0__DramBaseAddr__SHIFT						0xc
+#define DF_CS_UMC_AON0_DramBaseAddress0__AddrRngVal_MASK						0x00000001L
+#define DF_CS_UMC_AON0_DramBaseAddress0__LgcyMmioHoleEn_MASK						0x00000002L
+#define DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK						0x000000F0L
+#define DF_CS_UMC_AON0_DramBaseAddress0__IntLvAddrSel_MASK						0x00000700L
+#define DF_CS_UMC_AON0_DramBaseAddress0__DramBaseAddr_MASK						0xFFFFF000L
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
index f696bbb..7931502 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -632,6 +632,13 @@
   ULONG ulReserved;
 }COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2;
 
+typedef struct _COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_3
+{
+  COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 ulClock;
+  USHORT  usMclk_fcw_frac;                  //fractional divider of fcw = usSclk_fcw_frac/65536
+  USHORT  usMclk_fcw_int;                   //integer divider of fcwc
+}COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_3;
+
 //Input parameter of DynamicMemorySettingsTable
 //when ATOM_COMPUTE_CLOCK_FREQ.ulComputeClockFlag = COMPUTE_MEMORY_PLL_PARAM
 typedef struct _DYNAMICE_MEMORY_SETTINGS_PARAMETER
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index de177ce..c6c1666 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -1219,6 +1219,41 @@
   uint32_t rm21_sram_vmin_value;
 };
 
+struct  atom_gfx_info_v2_4 {
+  struct  atom_common_table_header  table_header;
+  uint8_t gfxip_min_ver;
+  uint8_t gfxip_max_ver;
+  uint8_t gc_num_se;
+  uint8_t max_tile_pipes;
+  uint8_t gc_num_cu_per_sh;
+  uint8_t gc_num_sh_per_se;
+  uint8_t gc_num_rb_per_se;
+  uint8_t gc_num_tccs;
+  uint32_t regaddr_cp_dma_src_addr;
+  uint32_t regaddr_cp_dma_src_addr_hi;
+  uint32_t regaddr_cp_dma_dst_addr;
+  uint32_t regaddr_cp_dma_dst_addr_hi;
+  uint32_t regaddr_cp_dma_command;
+  uint32_t regaddr_cp_status;
+  uint32_t regaddr_rlc_gpu_clock_32;
+  uint32_t rlc_gpu_timer_refclk;
+  uint8_t active_cu_per_sh;
+  uint8_t active_rb_per_se;
+  uint16_t gcgoldenoffset;
+  uint16_t gc_num_gprs;
+  uint16_t gc_gsprim_buff_depth;
+  uint16_t gc_parameter_cache_depth;
+  uint16_t gc_wave_size;
+  uint16_t gc_max_waves_per_simd;
+  uint16_t gc_lds_size;
+  uint8_t gc_num_max_gs_thds;
+  uint8_t gc_gs_table_depth;
+  uint8_t gc_double_offchip_lds_buffer;
+  uint8_t gc_max_scratch_slots_per_cu;
+  uint32_t sram_rm_fuses_val;
+  uint32_t sram_custom_rm_fuses_val;
+};
+
 /* 
   ***************************************************************************
     Data Table smu_info  structure
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index f2814ae..a69deb3 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -42,20 +42,6 @@
 	CGS_IND_REG__AUDIO_ENDPT
 };
 
-/**
- * enum cgs_engine - Engines that can be statically power-gated
- */
-enum cgs_engine {
-	CGS_ENGINE__UVD,
-	CGS_ENGINE__VCE,
-	CGS_ENGINE__VP8,
-	CGS_ENGINE__ACP_DMA,
-	CGS_ENGINE__ACP_DSP0,
-	CGS_ENGINE__ACP_DSP1,
-	CGS_ENGINE__ISP,
-	/* ... */
-};
-
 /*
  * enum cgs_ucode_id - Firmware types for different IPs
  */
@@ -76,17 +62,6 @@
 	CGS_UCODE_ID_MAXIMUM,
 };
 
-/*
- * enum cgs_resource_type - GPU resource type
- */
-enum cgs_resource_type {
-	CGS_RESOURCE_TYPE_MMIO = 0,
-	CGS_RESOURCE_TYPE_FB,
-	CGS_RESOURCE_TYPE_IO,
-	CGS_RESOURCE_TYPE_DOORBELL,
-	CGS_RESOURCE_TYPE_ROM,
-};
-
 /**
  * struct cgs_firmware_info - Firmware information
  */
@@ -104,17 +79,6 @@
 	bool			is_kicker;
 };
 
-struct cgs_mode_info {
-	uint32_t		refresh_rate;
-	uint32_t		vblank_time_us;
-};
-
-struct cgs_display_info {
-	uint32_t		display_count;
-	uint32_t		active_display_mask;
-	struct cgs_mode_info *mode_info;
-};
-
 typedef unsigned long cgs_handle_t;
 
 /**
@@ -170,119 +134,18 @@
 #define CGS_WREG32_FIELD_IND(device, space, reg, field, val)	\
 	cgs_write_ind_register(device, space, ix##reg, (cgs_read_ind_register(device, space, ix##reg) & ~CGS_REG_FIELD_MASK(reg, field)) | (val) << CGS_REG_FIELD_SHIFT(reg, field))
 
-/**
- * cgs_get_pci_resource() - provide access to a device resource (PCI BAR)
- * @cgs_device:	opaque device handle
- * @resource_type:	Type of Resource (MMIO, IO, ROM, FB, DOORBELL)
- * @size:	size of the region
- * @offset:	offset from the start of the region
- * @resource_base:	base address (not including offset) returned
- *
- * Return: 0 on success, -errno otherwise
- */
-typedef int (*cgs_get_pci_resource_t)(struct cgs_device *cgs_device,
-				      enum cgs_resource_type resource_type,
-				      uint64_t size,
-				      uint64_t offset,
-				      uint64_t *resource_base);
-
-/**
- * cgs_atom_get_data_table() - Get a pointer to an ATOM BIOS data table
- * @cgs_device:	opaque device handle
- * @table:	data table index
- * @size:	size of the table (output, may be NULL)
- * @frev:	table format revision (output, may be NULL)
- * @crev:	table content revision (output, may be NULL)
- *
- * Return: Pointer to start of the table, or NULL on failure
- */
-typedef const void *(*cgs_atom_get_data_table_t)(
-	struct cgs_device *cgs_device, unsigned table,
-	uint16_t *size, uint8_t *frev, uint8_t *crev);
-
-/**
- * cgs_atom_get_cmd_table_revs() - Get ATOM BIOS command table revisions
- * @cgs_device:	opaque device handle
- * @table:	data table index
- * @frev:	table format revision (output, may be NULL)
- * @crev:	table content revision (output, may be NULL)
- *
- * Return: 0 on success, -errno otherwise
- */
-typedef int (*cgs_atom_get_cmd_table_revs_t)(struct cgs_device *cgs_device, unsigned table,
-					     uint8_t *frev, uint8_t *crev);
-
-/**
- * cgs_atom_exec_cmd_table() - Execute an ATOM BIOS command table
- * @cgs_device: opaque device handle
- * @table:	command table index
- * @args:	arguments
- *
- * Return: 0 on success, -errno otherwise
- */
-typedef int (*cgs_atom_exec_cmd_table_t)(struct cgs_device *cgs_device,
-					 unsigned table, void *args);
-
-/**
- * cgs_get_firmware_info - Get the firmware information from core driver
- * @cgs_device: opaque device handle
- * @type: the firmware type
- * @info: returend firmware information
- *
- * Return: 0 on success, -errno otherwise
- */
 typedef int (*cgs_get_firmware_info)(struct cgs_device *cgs_device,
 				     enum cgs_ucode_id type,
 				     struct cgs_firmware_info *info);
 
-typedef int (*cgs_rel_firmware)(struct cgs_device *cgs_device,
-					 enum cgs_ucode_id type);
-
-typedef int(*cgs_set_powergating_state)(struct cgs_device *cgs_device,
-				  enum amd_ip_block_type block_type,
-				  enum amd_powergating_state state);
-
-typedef int(*cgs_set_clockgating_state)(struct cgs_device *cgs_device,
-				  enum amd_ip_block_type block_type,
-				  enum amd_clockgating_state state);
-
-typedef int(*cgs_get_active_displays_info)(
-					struct cgs_device *cgs_device,
-					struct cgs_display_info *info);
-
-typedef int (*cgs_notify_dpm_enabled)(struct cgs_device *cgs_device, bool enabled);
-
-typedef int (*cgs_is_virtualization_enabled_t)(void *cgs_device);
-
-typedef int (*cgs_enter_safe_mode)(struct cgs_device *cgs_device, bool en);
-
-typedef void (*cgs_lock_grbm_idx)(struct cgs_device *cgs_device, bool lock);
-
 struct cgs_ops {
 	/* MMIO access */
 	cgs_read_register_t read_register;
 	cgs_write_register_t write_register;
 	cgs_read_ind_register_t read_ind_register;
 	cgs_write_ind_register_t write_ind_register;
-	/* PCI resources */
-	cgs_get_pci_resource_t get_pci_resource;
-	/* ATOM BIOS */
-	cgs_atom_get_data_table_t atom_get_data_table;
-	cgs_atom_get_cmd_table_revs_t atom_get_cmd_table_revs;
-	cgs_atom_exec_cmd_table_t atom_exec_cmd_table;
 	/* Firmware Info */
 	cgs_get_firmware_info get_firmware_info;
-	cgs_rel_firmware rel_firmware;
-	/* cg pg interface*/
-	cgs_set_powergating_state set_powergating_state;
-	cgs_set_clockgating_state set_clockgating_state;
-	/* display manager */
-	cgs_get_active_displays_info get_active_displays_info;
-	/* notify dpm enabled */
-	cgs_notify_dpm_enabled notify_dpm_enabled;
-	cgs_is_virtualization_enabled_t is_virtualization_enabled;
-	cgs_enter_safe_mode enter_safe_mode;
-	cgs_lock_grbm_idx lock_grbm_idx;
 };
 
 struct cgs_os_ops; /* To be define in OS-specific CGS header */
@@ -309,40 +172,7 @@
 #define cgs_write_ind_register(dev,space,index,value)		\
 	CGS_CALL(write_ind_register,dev,space,index,value)
 
-#define cgs_atom_get_data_table(dev,table,size,frev,crev)	\
-	CGS_CALL(atom_get_data_table,dev,table,size,frev,crev)
-#define cgs_atom_get_cmd_table_revs(dev,table,frev,crev)	\
-	CGS_CALL(atom_get_cmd_table_revs,dev,table,frev,crev)
-#define cgs_atom_exec_cmd_table(dev,table,args)		\
-	CGS_CALL(atom_exec_cmd_table,dev,table,args)
-
 #define cgs_get_firmware_info(dev, type, info)	\
 	CGS_CALL(get_firmware_info, dev, type, info)
-#define cgs_rel_firmware(dev, type)	\
-	CGS_CALL(rel_firmware, dev, type)
-#define cgs_set_powergating_state(dev, block_type, state)	\
-	CGS_CALL(set_powergating_state, dev, block_type, state)
-#define cgs_set_clockgating_state(dev, block_type, state)	\
-	CGS_CALL(set_clockgating_state, dev, block_type, state)
-#define cgs_notify_dpm_enabled(dev, enabled)	\
-	CGS_CALL(notify_dpm_enabled, dev, enabled)
-
-#define cgs_get_active_displays_info(dev, info)	\
-	CGS_CALL(get_active_displays_info, dev, info)
-
-#define cgs_get_pci_resource(cgs_device, resource_type, size, offset, \
-	resource_base) \
-	CGS_CALL(get_pci_resource, cgs_device, resource_type, size, offset, \
-	resource_base)
-
-#define cgs_is_virtualization_enabled(cgs_device) \
-		CGS_CALL(is_virtualization_enabled, cgs_device)
-
-#define cgs_enter_safe_mode(cgs_device, en) \
-		CGS_CALL(enter_safe_mode, cgs_device, en)
-
-#define cgs_lock_grbm_idx(cgs_device, lock) \
-		CGS_CALL(lock_grbm_idx, cgs_device, lock)
-
 
 #endif /* _CGS_COMMON_H */
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 237289a..5733fbe 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -100,6 +100,21 @@
 	/* Bit n == 1 means Queue n is available for KFD */
 	DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
 
+	/* Doorbell assignments (SOC15 and later chips only). Only
+	 * specific doorbells are routed to each SDMA engine. Others
+	 * are routed to IH and VCN. They are not usable by the CP.
+	 *
+	 * Any doorbell number D that satisfies the following condition
+	 * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val
+	 *
+	 * KFD currently uses 1024 (= 0x3ff) doorbells per process. If
+	 * doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means
+	 * mask would be set to 0x1f8 and val set to 0x0f0.
+	 */
+	unsigned int sdma_doorbell[2][2];
+	unsigned int reserved_doorbell_mask;
+	unsigned int reserved_doorbell_val;
+
 	/* Base address of doorbell aperture. */
 	phys_addr_t doorbell_physical_address;
 
@@ -173,8 +188,6 @@
  * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp
  * scheduling mode. Only used for no cp scheduling mode.
  *
- * @init_pipeline: Initialized the compute pipelines.
- *
  * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp
  * sceduling mode.
  *
@@ -274,9 +287,6 @@
 	int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid,
 					unsigned int vmid);
 
-	int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id,
-				uint32_t hpd_size, uint64_t hpd_gpu_addr);
-
 	int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);
 
 	int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
@@ -382,6 +392,10 @@
  *
  * @resume: Notifies amdkfd about a resume action done to a kgd device
  *
+ * @quiesce_mm: Quiesce all user queue access to specified MM address space
+ *
+ * @resume_mm: Resume user queue access to specified MM address space
+ *
  * @schedule_evict_and_restore_process: Schedules work queue that will prepare
  * for safe eviction of KFD BOs that belong to the specified process.
  *
@@ -399,6 +413,8 @@
 	void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
 	void (*suspend)(struct kfd_dev *kfd);
 	int (*resume)(struct kfd_dev *kfd);
+	int (*quiesce_mm)(struct mm_struct *mm);
+	int (*resume_mm)(struct mm_struct *mm);
 	int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
 			struct dma_fence *fence);
 };
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 5c840c0..06f08f3 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -94,6 +94,7 @@
 	PP_PCIE,
 	OD_SCLK,
 	OD_MCLK,
+	OD_RANGE,
 };
 
 enum amd_pp_sensors {
@@ -149,13 +150,6 @@
 	uint32_t states[16];
 };
 
-struct pp_gpu_power {
-	uint32_t vddc_power;
-	uint32_t vddci_power;
-	uint32_t max_gpu_power;
-	uint32_t average_gpu_power;
-};
-
 #define PP_GROUP_MASK        0xF0000000
 #define PP_GROUP_SHIFT       28
 
@@ -246,11 +240,6 @@
 	int (*load_firmware)(void *handle);
 	int (*wait_for_fw_loading_complete)(void *handle);
 	int (*set_clockgating_by_smu)(void *handle, uint32_t msg_id);
-	int (*notify_smu_memory_info)(void *handle, uint32_t virtual_addr_low,
-					uint32_t virtual_addr_hi,
-					uint32_t mc_addr_low,
-					uint32_t mc_addr_hi,
-					uint32_t size);
 	int (*set_power_limit)(void *handle, uint32_t n);
 	int (*get_power_limit)(void *handle, uint32_t *limit, bool default_limit);
 /* export to DC */
diff --git a/drivers/gpu/drm/amd/include/soc15_ih_clientid.h b/drivers/gpu/drm/amd/include/soc15_ih_clientid.h
index a12d4f2..12e196c 100644
--- a/drivers/gpu/drm/amd/include/soc15_ih_clientid.h
+++ b/drivers/gpu/drm/amd/include/soc15_ih_clientid.h
@@ -43,6 +43,7 @@
 	SOC15_IH_CLIENTID_SE2SH		= 0x0c,
 	SOC15_IH_CLIENTID_SE3SH		= 0x0d,
 	SOC15_IH_CLIENTID_SYSHUB	= 0x0e,
+	SOC15_IH_CLIENTID_UVD1		= 0x0e,
 	SOC15_IH_CLIENTID_THM		= 0x0f,
 	SOC15_IH_CLIENTID_UVD		= 0x10,
 	SOC15_IH_CLIENTID_VCE0		= 0x11,
diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h
index 2fb25ab..ceaf493 100644
--- a/drivers/gpu/drm/amd/include/v9_structs.h
+++ b/drivers/gpu/drm/amd/include/v9_structs.h
@@ -29,10 +29,10 @@
 	uint32_t sdmax_rlcx_rb_base;
 	uint32_t sdmax_rlcx_rb_base_hi;
 	uint32_t sdmax_rlcx_rb_rptr;
+	uint32_t sdmax_rlcx_rb_rptr_hi;
 	uint32_t sdmax_rlcx_rb_wptr;
+	uint32_t sdmax_rlcx_rb_wptr_hi;
 	uint32_t sdmax_rlcx_rb_wptr_poll_cntl;
-	uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
-	uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
 	uint32_t sdmax_rlcx_rb_rptr_addr_hi;
 	uint32_t sdmax_rlcx_rb_rptr_addr_lo;
 	uint32_t sdmax_rlcx_ib_cntl;
@@ -44,29 +44,29 @@
 	uint32_t sdmax_rlcx_skip_cntl;
 	uint32_t sdmax_rlcx_context_status;
 	uint32_t sdmax_rlcx_doorbell;
-	uint32_t sdmax_rlcx_virtual_addr;
-	uint32_t sdmax_rlcx_ape1_cntl;
+	uint32_t sdmax_rlcx_status;
 	uint32_t sdmax_rlcx_doorbell_log;
-	uint32_t reserved_22;
-	uint32_t reserved_23;
-	uint32_t reserved_24;
-	uint32_t reserved_25;
-	uint32_t reserved_26;
-	uint32_t reserved_27;
-	uint32_t reserved_28;
-	uint32_t reserved_29;
-	uint32_t reserved_30;
-	uint32_t reserved_31;
-	uint32_t reserved_32;
-	uint32_t reserved_33;
-	uint32_t reserved_34;
-	uint32_t reserved_35;
-	uint32_t reserved_36;
-	uint32_t reserved_37;
-	uint32_t reserved_38;
-	uint32_t reserved_39;
-	uint32_t reserved_40;
-	uint32_t reserved_41;
+	uint32_t sdmax_rlcx_watermark;
+	uint32_t sdmax_rlcx_doorbell_offset;
+	uint32_t sdmax_rlcx_csa_addr_lo;
+	uint32_t sdmax_rlcx_csa_addr_hi;
+	uint32_t sdmax_rlcx_ib_sub_remain;
+	uint32_t sdmax_rlcx_preempt;
+	uint32_t sdmax_rlcx_dummy_reg;
+	uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
+	uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
+	uint32_t sdmax_rlcx_rb_aql_cntl;
+	uint32_t sdmax_rlcx_minor_ptr_update;
+	uint32_t sdmax_rlcx_midcmd_data0;
+	uint32_t sdmax_rlcx_midcmd_data1;
+	uint32_t sdmax_rlcx_midcmd_data2;
+	uint32_t sdmax_rlcx_midcmd_data3;
+	uint32_t sdmax_rlcx_midcmd_data4;
+	uint32_t sdmax_rlcx_midcmd_data5;
+	uint32_t sdmax_rlcx_midcmd_data6;
+	uint32_t sdmax_rlcx_midcmd_data7;
+	uint32_t sdmax_rlcx_midcmd_data8;
+	uint32_t sdmax_rlcx_midcmd_cntl;
 	uint32_t reserved_42;
 	uint32_t reserved_43;
 	uint32_t reserved_44;
diff --git a/drivers/gpu/drm/amd/include/vega20_ip_offset.h b/drivers/gpu/drm/amd/include/vega20_ip_offset.h
new file mode 100644
index 0000000..2a2a9cc
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/vega20_ip_offset.h
@@ -0,0 +1,1051 @@
+/*
+ * Copyright (C) 2018  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _vega20_ip_offset_HEADER
+#define _vega20_ip_offset_HEADER
+
+#define MAX_INSTANCE                                       6
+#define MAX_SEGMENT                                        6
+
+
+struct IP_BASE_INSTANCE
+{
+    unsigned int segment[MAX_SEGMENT];
+};
+
+struct IP_BASE
+{
+    struct IP_BASE_INSTANCE instance[MAX_INSTANCE];
+};
+
+
+static const struct IP_BASE ATHUB_BASE            ={ { { { 0x00000C20, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE CLK_BASE            ={ { { { 0x00016C00, 0x00016E00, 0x00017000, 0x00017200, 0x0001B000, 0x0001B200 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE DCE_BASE            ={ { { { 0x00000012, 0x000000C0, 0x000034C0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE DF_BASE            ={ { { { 0x00007000, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE FUSE_BASE            ={ { { { 0x00017400, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE GC_BASE            ={ { { { 0x00002000, 0x0000A000, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE HDP_BASE            ={ { { { 0x00000F20, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE MMHUB_BASE            ={ { { { 0x0001A000, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE MP0_BASE            ={ { { { 0x00016000, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE MP1_BASE            ={ { { { 0x00016000, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE NBIO_BASE            ={ { { { 0x00000000, 0x00000014, 0x00000D20, 0x00010400, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE OSSSYS_BASE            ={ { { { 0x000010A0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE SDMA0_BASE            ={ { { { 0x00001260, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE SDMA1_BASE            ={ { { { 0x00001860, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE SMUIO_BASE            ={ { { { 0x00016800, 0x00016A00, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE THM_BASE            ={ { { { 0x00016600, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE UMC_BASE            ={ { { { 0x00014000, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE UVD_BASE            ={ { { { 0x00007800, 0x00007E00, 0, 0, 0, 0 } },
+                                        { { 0, 0x00009000, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+/* Adjust VCE_BASE to make vce_4_1 use vce_4_0 offset header files*/
+static const struct IP_BASE VCE_BASE            ={ { { { 0x00007E00/* 0x00008800 */, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE XDMA_BASE            ={ { { { 0x00003400, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+static const struct IP_BASE RSMU_BASE            ={ { { { 0x00012000, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } },
+                                        { { 0, 0, 0, 0, 0, 0 } } } };
+
+
+#define ATHUB_BASE__INST0_SEG0                     0x00000C20
+#define ATHUB_BASE__INST0_SEG1                     0
+#define ATHUB_BASE__INST0_SEG2                     0
+#define ATHUB_BASE__INST0_SEG3                     0
+#define ATHUB_BASE__INST0_SEG4                     0
+#define ATHUB_BASE__INST0_SEG5                     0
+
+#define ATHUB_BASE__INST1_SEG0                     0
+#define ATHUB_BASE__INST1_SEG1                     0
+#define ATHUB_BASE__INST1_SEG2                     0
+#define ATHUB_BASE__INST1_SEG3                     0
+#define ATHUB_BASE__INST1_SEG4                     0
+#define ATHUB_BASE__INST1_SEG5                     0
+
+#define ATHUB_BASE__INST2_SEG0                     0
+#define ATHUB_BASE__INST2_SEG1                     0
+#define ATHUB_BASE__INST2_SEG2                     0
+#define ATHUB_BASE__INST2_SEG3                     0
+#define ATHUB_BASE__INST2_SEG4                     0
+#define ATHUB_BASE__INST2_SEG5                     0
+
+#define ATHUB_BASE__INST3_SEG0                     0
+#define ATHUB_BASE__INST3_SEG1                     0
+#define ATHUB_BASE__INST3_SEG2                     0
+#define ATHUB_BASE__INST3_SEG3                     0
+#define ATHUB_BASE__INST3_SEG4                     0
+#define ATHUB_BASE__INST3_SEG5                     0
+
+#define ATHUB_BASE__INST4_SEG0                     0
+#define ATHUB_BASE__INST4_SEG1                     0
+#define ATHUB_BASE__INST4_SEG2                     0
+#define ATHUB_BASE__INST4_SEG3                     0
+#define ATHUB_BASE__INST4_SEG4                     0
+#define ATHUB_BASE__INST4_SEG5                     0
+
+#define ATHUB_BASE__INST5_SEG0                     0
+#define ATHUB_BASE__INST5_SEG1                     0
+#define ATHUB_BASE__INST5_SEG2                     0
+#define ATHUB_BASE__INST5_SEG3                     0
+#define ATHUB_BASE__INST5_SEG4                     0
+#define ATHUB_BASE__INST5_SEG5                     0
+
+#define CLK_BASE__INST0_SEG0                       0x00016C00
+#define CLK_BASE__INST0_SEG1                       0x00016E00
+#define CLK_BASE__INST0_SEG2                       0x00017000
+#define CLK_BASE__INST0_SEG3                       0x00017200
+#define CLK_BASE__INST0_SEG4                       0x0001B000
+#define CLK_BASE__INST0_SEG5                       0x0001B200
+
+#define CLK_BASE__INST1_SEG0                       0
+#define CLK_BASE__INST1_SEG1                       0
+#define CLK_BASE__INST1_SEG2                       0
+#define CLK_BASE__INST1_SEG3                       0
+#define CLK_BASE__INST1_SEG4                       0
+#define CLK_BASE__INST1_SEG5                       0
+
+#define CLK_BASE__INST2_SEG0                       0
+#define CLK_BASE__INST2_SEG1                       0
+#define CLK_BASE__INST2_SEG2                       0
+#define CLK_BASE__INST2_SEG3                       0
+#define CLK_BASE__INST2_SEG4                       0
+#define CLK_BASE__INST2_SEG5                       0
+
+#define CLK_BASE__INST3_SEG0                       0
+#define CLK_BASE__INST3_SEG1                       0
+#define CLK_BASE__INST3_SEG2                       0
+#define CLK_BASE__INST3_SEG3                       0
+#define CLK_BASE__INST3_SEG4                       0
+#define CLK_BASE__INST3_SEG5                       0
+
+#define CLK_BASE__INST4_SEG0                       0
+#define CLK_BASE__INST4_SEG1                       0
+#define CLK_BASE__INST4_SEG2                       0
+#define CLK_BASE__INST4_SEG3                       0
+#define CLK_BASE__INST4_SEG4                       0
+#define CLK_BASE__INST4_SEG5                       0
+
+#define CLK_BASE__INST5_SEG0                       0
+#define CLK_BASE__INST5_SEG1                       0
+#define CLK_BASE__INST5_SEG2                       0
+#define CLK_BASE__INST5_SEG3                       0
+#define CLK_BASE__INST5_SEG4                       0
+#define CLK_BASE__INST5_SEG5                       0
+
+#define DCE_BASE__INST0_SEG0                       0x00000012
+#define DCE_BASE__INST0_SEG1                       0x000000C0
+#define DCE_BASE__INST0_SEG2                       0x000034C0
+#define DCE_BASE__INST0_SEG3                       0
+#define DCE_BASE__INST0_SEG4                       0
+#define DCE_BASE__INST0_SEG5                       0
+
+#define DCE_BASE__INST1_SEG0                       0
+#define DCE_BASE__INST1_SEG1                       0
+#define DCE_BASE__INST1_SEG2                       0
+#define DCE_BASE__INST1_SEG3                       0
+#define DCE_BASE__INST1_SEG4                       0
+#define DCE_BASE__INST1_SEG5                       0
+
+#define DCE_BASE__INST2_SEG0                       0
+#define DCE_BASE__INST2_SEG1                       0
+#define DCE_BASE__INST2_SEG2                       0
+#define DCE_BASE__INST2_SEG3                       0
+#define DCE_BASE__INST2_SEG4                       0
+#define DCE_BASE__INST2_SEG5                       0
+
+#define DCE_BASE__INST3_SEG0                       0
+#define DCE_BASE__INST3_SEG1                       0
+#define DCE_BASE__INST3_SEG2                       0
+#define DCE_BASE__INST3_SEG3                       0
+#define DCE_BASE__INST3_SEG4                       0
+#define DCE_BASE__INST3_SEG5                       0
+
+#define DCE_BASE__INST4_SEG0                       0
+#define DCE_BASE__INST4_SEG1                       0
+#define DCE_BASE__INST4_SEG2                       0
+#define DCE_BASE__INST4_SEG3                       0
+#define DCE_BASE__INST4_SEG4                       0
+#define DCE_BASE__INST4_SEG5                       0
+
+#define DCE_BASE__INST5_SEG0                       0
+#define DCE_BASE__INST5_SEG1                       0
+#define DCE_BASE__INST5_SEG2                       0
+#define DCE_BASE__INST5_SEG3                       0
+#define DCE_BASE__INST5_SEG4                       0
+#define DCE_BASE__INST5_SEG5                       0
+
+#define DF_BASE__INST0_SEG0                        0x00007000
+#define DF_BASE__INST0_SEG1                        0
+#define DF_BASE__INST0_SEG2                        0
+#define DF_BASE__INST0_SEG3                        0
+#define DF_BASE__INST0_SEG4                        0
+#define DF_BASE__INST0_SEG5                        0
+
+#define DF_BASE__INST1_SEG0                        0
+#define DF_BASE__INST1_SEG1                        0
+#define DF_BASE__INST1_SEG2                        0
+#define DF_BASE__INST1_SEG3                        0
+#define DF_BASE__INST1_SEG4                        0
+#define DF_BASE__INST1_SEG5                        0
+
+#define DF_BASE__INST2_SEG0                        0
+#define DF_BASE__INST2_SEG1                        0
+#define DF_BASE__INST2_SEG2                        0
+#define DF_BASE__INST2_SEG3                        0
+#define DF_BASE__INST2_SEG4                        0
+#define DF_BASE__INST2_SEG5                        0
+
+#define DF_BASE__INST3_SEG0                        0
+#define DF_BASE__INST3_SEG1                        0
+#define DF_BASE__INST3_SEG2                        0
+#define DF_BASE__INST3_SEG3                        0
+#define DF_BASE__INST3_SEG4                        0
+#define DF_BASE__INST3_SEG5                        0
+
+#define DF_BASE__INST4_SEG0                        0
+#define DF_BASE__INST4_SEG1                        0
+#define DF_BASE__INST4_SEG2                        0
+#define DF_BASE__INST4_SEG3                        0
+#define DF_BASE__INST4_SEG4                        0
+#define DF_BASE__INST4_SEG5                        0
+
+#define DF_BASE__INST5_SEG0                        0
+#define DF_BASE__INST5_SEG1                        0
+#define DF_BASE__INST5_SEG2                        0
+#define DF_BASE__INST5_SEG3                        0
+#define DF_BASE__INST5_SEG4                        0
+#define DF_BASE__INST5_SEG5                        0
+
+#define FUSE_BASE__INST0_SEG0                      0x00017400
+#define FUSE_BASE__INST0_SEG1                      0
+#define FUSE_BASE__INST0_SEG2                      0
+#define FUSE_BASE__INST0_SEG3                      0
+#define FUSE_BASE__INST0_SEG4                      0
+#define FUSE_BASE__INST0_SEG5                      0
+
+#define FUSE_BASE__INST1_SEG0                      0
+#define FUSE_BASE__INST1_SEG1                      0
+#define FUSE_BASE__INST1_SEG2                      0
+#define FUSE_BASE__INST1_SEG3                      0
+#define FUSE_BASE__INST1_SEG4                      0
+#define FUSE_BASE__INST1_SEG5                      0
+
+#define FUSE_BASE__INST2_SEG0                      0
+#define FUSE_BASE__INST2_SEG1                      0
+#define FUSE_BASE__INST2_SEG2                      0
+#define FUSE_BASE__INST2_SEG3                      0
+#define FUSE_BASE__INST2_SEG4                      0
+#define FUSE_BASE__INST2_SEG5                      0
+
+#define FUSE_BASE__INST3_SEG0                      0
+#define FUSE_BASE__INST3_SEG1                      0
+#define FUSE_BASE__INST3_SEG2                      0
+#define FUSE_BASE__INST3_SEG3                      0
+#define FUSE_BASE__INST3_SEG4                      0
+#define FUSE_BASE__INST3_SEG5                      0
+
+#define FUSE_BASE__INST4_SEG0                      0
+#define FUSE_BASE__INST4_SEG1                      0
+#define FUSE_BASE__INST4_SEG2                      0
+#define FUSE_BASE__INST4_SEG3                      0
+#define FUSE_BASE__INST4_SEG4                      0
+#define FUSE_BASE__INST4_SEG5                      0
+
+#define FUSE_BASE__INST5_SEG0                      0
+#define FUSE_BASE__INST5_SEG1                      0
+#define FUSE_BASE__INST5_SEG2                      0
+#define FUSE_BASE__INST5_SEG3                      0
+#define FUSE_BASE__INST5_SEG4                      0
+#define FUSE_BASE__INST5_SEG5                      0
+
+#define GC_BASE__INST0_SEG0                        0x00002000
+#define GC_BASE__INST0_SEG1                        0x0000A000
+#define GC_BASE__INST0_SEG2                        0
+#define GC_BASE__INST0_SEG3                        0
+#define GC_BASE__INST0_SEG4                        0
+#define GC_BASE__INST0_SEG5                        0
+
+#define GC_BASE__INST1_SEG0                        0
+#define GC_BASE__INST1_SEG1                        0
+#define GC_BASE__INST1_SEG2                        0
+#define GC_BASE__INST1_SEG3                        0
+#define GC_BASE__INST1_SEG4                        0
+#define GC_BASE__INST1_SEG5                        0
+
+#define GC_BASE__INST2_SEG0                        0
+#define GC_BASE__INST2_SEG1                        0
+#define GC_BASE__INST2_SEG2                        0
+#define GC_BASE__INST2_SEG3                        0
+#define GC_BASE__INST2_SEG4                        0
+#define GC_BASE__INST2_SEG5                        0
+
+#define GC_BASE__INST3_SEG0                        0
+#define GC_BASE__INST3_SEG1                        0
+#define GC_BASE__INST3_SEG2                        0
+#define GC_BASE__INST3_SEG3                        0
+#define GC_BASE__INST3_SEG4                        0
+#define GC_BASE__INST3_SEG5                        0
+
+#define GC_BASE__INST4_SEG0                        0
+#define GC_BASE__INST4_SEG1                        0
+#define GC_BASE__INST4_SEG2                        0
+#define GC_BASE__INST4_SEG3                        0
+#define GC_BASE__INST4_SEG4                        0
+#define GC_BASE__INST4_SEG5                        0
+
+#define GC_BASE__INST5_SEG0                        0
+#define GC_BASE__INST5_SEG1                        0
+#define GC_BASE__INST5_SEG2                        0
+#define GC_BASE__INST5_SEG3                        0
+#define GC_BASE__INST5_SEG4                        0
+#define GC_BASE__INST5_SEG5                        0
+
+#define HDP_BASE__INST0_SEG0                       0x00000F20
+#define HDP_BASE__INST0_SEG1                       0
+#define HDP_BASE__INST0_SEG2                       0
+#define HDP_BASE__INST0_SEG3                       0
+#define HDP_BASE__INST0_SEG4                       0
+#define HDP_BASE__INST0_SEG5                       0
+
+#define HDP_BASE__INST1_SEG0                       0
+#define HDP_BASE__INST1_SEG1                       0
+#define HDP_BASE__INST1_SEG2                       0
+#define HDP_BASE__INST1_SEG3                       0
+#define HDP_BASE__INST1_SEG4                       0
+#define HDP_BASE__INST1_SEG5                       0
+
+#define HDP_BASE__INST2_SEG0                       0
+#define HDP_BASE__INST2_SEG1                       0
+#define HDP_BASE__INST2_SEG2                       0
+#define HDP_BASE__INST2_SEG3                       0
+#define HDP_BASE__INST2_SEG4                       0
+#define HDP_BASE__INST2_SEG5                       0
+
+#define HDP_BASE__INST3_SEG0                       0
+#define HDP_BASE__INST3_SEG1                       0
+#define HDP_BASE__INST3_SEG2                       0
+#define HDP_BASE__INST3_SEG3                       0
+#define HDP_BASE__INST3_SEG4                       0
+#define HDP_BASE__INST3_SEG5                       0
+
+#define HDP_BASE__INST4_SEG0                       0
+#define HDP_BASE__INST4_SEG1                       0
+#define HDP_BASE__INST4_SEG2                       0
+#define HDP_BASE__INST4_SEG3                       0
+#define HDP_BASE__INST4_SEG4                       0
+#define HDP_BASE__INST4_SEG5                       0
+
+#define HDP_BASE__INST5_SEG0                       0
+#define HDP_BASE__INST5_SEG1                       0
+#define HDP_BASE__INST5_SEG2                       0
+#define HDP_BASE__INST5_SEG3                       0
+#define HDP_BASE__INST5_SEG4                       0
+#define HDP_BASE__INST5_SEG5                       0
+
+#define MMHUB_BASE__INST0_SEG0                     0x0001A000
+#define MMHUB_BASE__INST0_SEG1                     0
+#define MMHUB_BASE__INST0_SEG2                     0
+#define MMHUB_BASE__INST0_SEG3                     0
+#define MMHUB_BASE__INST0_SEG4                     0
+#define MMHUB_BASE__INST0_SEG5                     0
+
+#define MMHUB_BASE__INST1_SEG0                     0
+#define MMHUB_BASE__INST1_SEG1                     0
+#define MMHUB_BASE__INST1_SEG2                     0
+#define MMHUB_BASE__INST1_SEG3                     0
+#define MMHUB_BASE__INST1_SEG4                     0
+#define MMHUB_BASE__INST1_SEG5                     0
+
+#define MMHUB_BASE__INST2_SEG0                     0
+#define MMHUB_BASE__INST2_SEG1                     0
+#define MMHUB_BASE__INST2_SEG2                     0
+#define MMHUB_BASE__INST2_SEG3                     0
+#define MMHUB_BASE__INST2_SEG4                     0
+#define MMHUB_BASE__INST2_SEG5                     0
+
+#define MMHUB_BASE__INST3_SEG0                     0
+#define MMHUB_BASE__INST3_SEG1                     0
+#define MMHUB_BASE__INST3_SEG2                     0
+#define MMHUB_BASE__INST3_SEG3                     0
+#define MMHUB_BASE__INST3_SEG4                     0
+#define MMHUB_BASE__INST3_SEG5                     0
+
+#define MMHUB_BASE__INST4_SEG0                     0
+#define MMHUB_BASE__INST4_SEG1                     0
+#define MMHUB_BASE__INST4_SEG2                     0
+#define MMHUB_BASE__INST4_SEG3                     0
+#define MMHUB_BASE__INST4_SEG4                     0
+#define MMHUB_BASE__INST4_SEG5                     0
+
+#define MMHUB_BASE__INST5_SEG0                     0
+#define MMHUB_BASE__INST5_SEG1                     0
+#define MMHUB_BASE__INST5_SEG2                     0
+#define MMHUB_BASE__INST5_SEG3                     0
+#define MMHUB_BASE__INST5_SEG4                     0
+#define MMHUB_BASE__INST5_SEG5                     0
+
+#define MP0_BASE__INST0_SEG0                       0x00016000
+#define MP0_BASE__INST0_SEG1                       0
+#define MP0_BASE__INST0_SEG2                       0
+#define MP0_BASE__INST0_SEG3                       0
+#define MP0_BASE__INST0_SEG4                       0
+#define MP0_BASE__INST0_SEG5                       0
+
+#define MP0_BASE__INST1_SEG0                       0
+#define MP0_BASE__INST1_SEG1                       0
+#define MP0_BASE__INST1_SEG2                       0
+#define MP0_BASE__INST1_SEG3                       0
+#define MP0_BASE__INST1_SEG4                       0
+#define MP0_BASE__INST1_SEG5                       0
+
+#define MP0_BASE__INST2_SEG0                       0
+#define MP0_BASE__INST2_SEG1                       0
+#define MP0_BASE__INST2_SEG2                       0
+#define MP0_BASE__INST2_SEG3                       0
+#define MP0_BASE__INST2_SEG4                       0
+#define MP0_BASE__INST2_SEG5                       0
+
+#define MP0_BASE__INST3_SEG0                       0
+#define MP0_BASE__INST3_SEG1                       0
+#define MP0_BASE__INST3_SEG2                       0
+#define MP0_BASE__INST3_SEG3                       0
+#define MP0_BASE__INST3_SEG4                       0
+#define MP0_BASE__INST3_SEG5                       0
+
+#define MP0_BASE__INST4_SEG0                       0
+#define MP0_BASE__INST4_SEG1                       0
+#define MP0_BASE__INST4_SEG2                       0
+#define MP0_BASE__INST4_SEG3                       0
+#define MP0_BASE__INST4_SEG4                       0
+#define MP0_BASE__INST4_SEG5                       0
+
+#define MP0_BASE__INST5_SEG0                       0
+#define MP0_BASE__INST5_SEG1                       0
+#define MP0_BASE__INST5_SEG2                       0
+#define MP0_BASE__INST5_SEG3                       0
+#define MP0_BASE__INST5_SEG4                       0
+#define MP0_BASE__INST5_SEG5                       0
+
+#define MP1_BASE__INST0_SEG0                       0x00016000
+#define MP1_BASE__INST0_SEG1                       0
+#define MP1_BASE__INST0_SEG2                       0
+#define MP1_BASE__INST0_SEG3                       0
+#define MP1_BASE__INST0_SEG4                       0
+#define MP1_BASE__INST0_SEG5                       0
+
+#define MP1_BASE__INST1_SEG0                       0
+#define MP1_BASE__INST1_SEG1                       0
+#define MP1_BASE__INST1_SEG2                       0
+#define MP1_BASE__INST1_SEG3                       0
+#define MP1_BASE__INST1_SEG4                       0
+#define MP1_BASE__INST1_SEG5                       0
+
+#define MP1_BASE__INST2_SEG0                       0
+#define MP1_BASE__INST2_SEG1                       0
+#define MP1_BASE__INST2_SEG2                       0
+#define MP1_BASE__INST2_SEG3                       0
+#define MP1_BASE__INST2_SEG4                       0
+#define MP1_BASE__INST2_SEG5                       0
+
+#define MP1_BASE__INST3_SEG0                       0
+#define MP1_BASE__INST3_SEG1                       0
+#define MP1_BASE__INST3_SEG2                       0
+#define MP1_BASE__INST3_SEG3                       0
+#define MP1_BASE__INST3_SEG4                       0
+#define MP1_BASE__INST3_SEG5                       0
+
+#define MP1_BASE__INST4_SEG0                       0
+#define MP1_BASE__INST4_SEG1                       0
+#define MP1_BASE__INST4_SEG2                       0
+#define MP1_BASE__INST4_SEG3                       0
+#define MP1_BASE__INST4_SEG4                       0
+#define MP1_BASE__INST4_SEG5                       0
+
+#define MP1_BASE__INST5_SEG0                       0
+#define MP1_BASE__INST5_SEG1                       0
+#define MP1_BASE__INST5_SEG2                       0
+#define MP1_BASE__INST5_SEG3                       0
+#define MP1_BASE__INST5_SEG4                       0
+#define MP1_BASE__INST5_SEG5                       0
+
+#define NBIO_BASE__INST0_SEG0                      0x00000000
+#define NBIO_BASE__INST0_SEG1                      0x00000014
+#define NBIO_BASE__INST0_SEG2                      0x00000D20
+#define NBIO_BASE__INST0_SEG3                      0x00010400
+#define NBIO_BASE__INST0_SEG4                      0
+#define NBIO_BASE__INST0_SEG5                      0
+
+#define NBIO_BASE__INST1_SEG0                      0
+#define NBIO_BASE__INST1_SEG1                      0
+#define NBIO_BASE__INST1_SEG2                      0
+#define NBIO_BASE__INST1_SEG3                      0
+#define NBIO_BASE__INST1_SEG4                      0
+#define NBIO_BASE__INST1_SEG5                      0
+
+#define NBIO_BASE__INST2_SEG0                      0
+#define NBIO_BASE__INST2_SEG1                      0
+#define NBIO_BASE__INST2_SEG2                      0
+#define NBIO_BASE__INST2_SEG3                      0
+#define NBIO_BASE__INST2_SEG4                      0
+#define NBIO_BASE__INST2_SEG5                      0
+
+#define NBIO_BASE__INST3_SEG0                      0
+#define NBIO_BASE__INST3_SEG1                      0
+#define NBIO_BASE__INST3_SEG2                      0
+#define NBIO_BASE__INST3_SEG3                      0
+#define NBIO_BASE__INST3_SEG4                      0
+#define NBIO_BASE__INST3_SEG5                      0
+
+#define NBIO_BASE__INST4_SEG0                      0
+#define NBIO_BASE__INST4_SEG1                      0
+#define NBIO_BASE__INST4_SEG2                      0
+#define NBIO_BASE__INST4_SEG3                      0
+#define NBIO_BASE__INST4_SEG4                      0
+#define NBIO_BASE__INST4_SEG5                      0
+
+#define NBIO_BASE__INST5_SEG0                      0
+#define NBIO_BASE__INST5_SEG1                      0
+#define NBIO_BASE__INST5_SEG2                      0
+#define NBIO_BASE__INST5_SEG3                      0
+#define NBIO_BASE__INST5_SEG4                      0
+#define NBIO_BASE__INST5_SEG5                      0
+
+#define OSSSYS_BASE__INST0_SEG0                    0x000010A0
+#define OSSSYS_BASE__INST0_SEG1                    0
+#define OSSSYS_BASE__INST0_SEG2                    0
+#define OSSSYS_BASE__INST0_SEG3                    0
+#define OSSSYS_BASE__INST0_SEG4                    0
+#define OSSSYS_BASE__INST0_SEG5                    0
+
+#define OSSSYS_BASE__INST1_SEG0                    0
+#define OSSSYS_BASE__INST1_SEG1                    0
+#define OSSSYS_BASE__INST1_SEG2                    0
+#define OSSSYS_BASE__INST1_SEG3                    0
+#define OSSSYS_BASE__INST1_SEG4                    0
+#define OSSSYS_BASE__INST1_SEG5                    0
+
+#define OSSSYS_BASE__INST2_SEG0                    0
+#define OSSSYS_BASE__INST2_SEG1                    0
+#define OSSSYS_BASE__INST2_SEG2                    0
+#define OSSSYS_BASE__INST2_SEG3                    0
+#define OSSSYS_BASE__INST2_SEG4                    0
+#define OSSSYS_BASE__INST2_SEG5                    0
+
+#define OSSSYS_BASE__INST3_SEG0                    0
+#define OSSSYS_BASE__INST3_SEG1                    0
+#define OSSSYS_BASE__INST3_SEG2                    0
+#define OSSSYS_BASE__INST3_SEG3                    0
+#define OSSSYS_BASE__INST3_SEG4                    0
+#define OSSSYS_BASE__INST3_SEG5                    0
+
+#define OSSSYS_BASE__INST4_SEG0                    0
+#define OSSSYS_BASE__INST4_SEG1                    0
+#define OSSSYS_BASE__INST4_SEG2                    0
+#define OSSSYS_BASE__INST4_SEG3                    0
+#define OSSSYS_BASE__INST4_SEG4                    0
+#define OSSSYS_BASE__INST4_SEG5                    0
+
+#define OSSSYS_BASE__INST5_SEG0                    0
+#define OSSSYS_BASE__INST5_SEG1                    0
+#define OSSSYS_BASE__INST5_SEG2                    0
+#define OSSSYS_BASE__INST5_SEG3                    0
+#define OSSSYS_BASE__INST5_SEG4                    0
+#define OSSSYS_BASE__INST5_SEG5                    0
+
+#define SDMA0_BASE__INST0_SEG0                     0x00001260
+#define SDMA0_BASE__INST0_SEG1                     0
+#define SDMA0_BASE__INST0_SEG2                     0
+#define SDMA0_BASE__INST0_SEG3                     0
+#define SDMA0_BASE__INST0_SEG4                     0
+#define SDMA0_BASE__INST0_SEG5                     0
+
+#define SDMA0_BASE__INST1_SEG0                     0
+#define SDMA0_BASE__INST1_SEG1                     0
+#define SDMA0_BASE__INST1_SEG2                     0
+#define SDMA0_BASE__INST1_SEG3                     0
+#define SDMA0_BASE__INST1_SEG4                     0
+#define SDMA0_BASE__INST1_SEG5                     0
+
+#define SDMA0_BASE__INST2_SEG0                     0
+#define SDMA0_BASE__INST2_SEG1                     0
+#define SDMA0_BASE__INST2_SEG2                     0
+#define SDMA0_BASE__INST2_SEG3                     0
+#define SDMA0_BASE__INST2_SEG4                     0
+#define SDMA0_BASE__INST2_SEG5                     0
+
+#define SDMA0_BASE__INST3_SEG0                     0
+#define SDMA0_BASE__INST3_SEG1                     0
+#define SDMA0_BASE__INST3_SEG2                     0
+#define SDMA0_BASE__INST3_SEG3                     0
+#define SDMA0_BASE__INST3_SEG4                     0
+#define SDMA0_BASE__INST3_SEG5                     0
+
+#define SDMA0_BASE__INST4_SEG0                     0
+#define SDMA0_BASE__INST4_SEG1                     0
+#define SDMA0_BASE__INST4_SEG2                     0
+#define SDMA0_BASE__INST4_SEG3                     0
+#define SDMA0_BASE__INST4_SEG4                     0
+#define SDMA0_BASE__INST4_SEG5                     0
+
+#define SDMA0_BASE__INST5_SEG0                     0
+#define SDMA0_BASE__INST5_SEG1                     0
+#define SDMA0_BASE__INST5_SEG2                     0
+#define SDMA0_BASE__INST5_SEG3                     0
+#define SDMA0_BASE__INST5_SEG4                     0
+#define SDMA0_BASE__INST5_SEG5                     0
+
+#define SDMA1_BASE__INST0_SEG0                     0x00001860
+#define SDMA1_BASE__INST0_SEG1                     0
+#define SDMA1_BASE__INST0_SEG2                     0
+#define SDMA1_BASE__INST0_SEG3                     0
+#define SDMA1_BASE__INST0_SEG4                     0
+#define SDMA1_BASE__INST0_SEG5                     0
+
+#define SDMA1_BASE__INST1_SEG0                     0
+#define SDMA1_BASE__INST1_SEG1                     0
+#define SDMA1_BASE__INST1_SEG2                     0
+#define SDMA1_BASE__INST1_SEG3                     0
+#define SDMA1_BASE__INST1_SEG4                     0
+#define SDMA1_BASE__INST1_SEG5                     0
+
+#define SDMA1_BASE__INST2_SEG0                     0
+#define SDMA1_BASE__INST2_SEG1                     0
+#define SDMA1_BASE__INST2_SEG2                     0
+#define SDMA1_BASE__INST2_SEG3                     0
+#define SDMA1_BASE__INST2_SEG4                     0
+#define SDMA1_BASE__INST2_SEG5                     0
+
+#define SDMA1_BASE__INST3_SEG0                     0
+#define SDMA1_BASE__INST3_SEG1                     0
+#define SDMA1_BASE__INST3_SEG2                     0
+#define SDMA1_BASE__INST3_SEG3                     0
+#define SDMA1_BASE__INST3_SEG4                     0
+#define SDMA1_BASE__INST3_SEG5                     0
+
+#define SDMA1_BASE__INST4_SEG0                     0
+#define SDMA1_BASE__INST4_SEG1                     0
+#define SDMA1_BASE__INST4_SEG2                     0
+#define SDMA1_BASE__INST4_SEG3                     0
+#define SDMA1_BASE__INST4_SEG4                     0
+#define SDMA1_BASE__INST4_SEG5                     0
+
+#define SDMA1_BASE__INST5_SEG0                     0
+#define SDMA1_BASE__INST5_SEG1                     0
+#define SDMA1_BASE__INST5_SEG2                     0
+#define SDMA1_BASE__INST5_SEG3                     0
+#define SDMA1_BASE__INST5_SEG4                     0
+#define SDMA1_BASE__INST5_SEG5                     0
+
+#define SMUIO_BASE__INST0_SEG0                     0x00016800
+#define SMUIO_BASE__INST0_SEG1                     0x00016A00
+#define SMUIO_BASE__INST0_SEG2                     0
+#define SMUIO_BASE__INST0_SEG3                     0
+#define SMUIO_BASE__INST0_SEG4                     0
+#define SMUIO_BASE__INST0_SEG5                     0
+
+#define SMUIO_BASE__INST1_SEG0                     0
+#define SMUIO_BASE__INST1_SEG1                     0
+#define SMUIO_BASE__INST1_SEG2                     0
+#define SMUIO_BASE__INST1_SEG3                     0
+#define SMUIO_BASE__INST1_SEG4                     0
+#define SMUIO_BASE__INST1_SEG5                     0
+
+#define SMUIO_BASE__INST2_SEG0                     0
+#define SMUIO_BASE__INST2_SEG1                     0
+#define SMUIO_BASE__INST2_SEG2                     0
+#define SMUIO_BASE__INST2_SEG3                     0
+#define SMUIO_BASE__INST2_SEG4                     0
+#define SMUIO_BASE__INST2_SEG5                     0
+
+#define SMUIO_BASE__INST3_SEG0                     0
+#define SMUIO_BASE__INST3_SEG1                     0
+#define SMUIO_BASE__INST3_SEG2                     0
+#define SMUIO_BASE__INST3_SEG3                     0
+#define SMUIO_BASE__INST3_SEG4                     0
+#define SMUIO_BASE__INST3_SEG5                     0
+
+#define SMUIO_BASE__INST4_SEG0                     0
+#define SMUIO_BASE__INST4_SEG1                     0
+#define SMUIO_BASE__INST4_SEG2                     0
+#define SMUIO_BASE__INST4_SEG3                     0
+#define SMUIO_BASE__INST4_SEG4                     0
+#define SMUIO_BASE__INST4_SEG5                     0
+
+#define SMUIO_BASE__INST5_SEG0                     0
+#define SMUIO_BASE__INST5_SEG1                     0
+#define SMUIO_BASE__INST5_SEG2                     0
+#define SMUIO_BASE__INST5_SEG3                     0
+#define SMUIO_BASE__INST5_SEG4                     0
+#define SMUIO_BASE__INST5_SEG5                     0
+
+#define THM_BASE__INST0_SEG0                       0x00016600
+#define THM_BASE__INST0_SEG1                       0
+#define THM_BASE__INST0_SEG2                       0
+#define THM_BASE__INST0_SEG3                       0
+#define THM_BASE__INST0_SEG4                       0
+#define THM_BASE__INST0_SEG5                       0
+
+#define THM_BASE__INST1_SEG0                       0
+#define THM_BASE__INST1_SEG1                       0
+#define THM_BASE__INST1_SEG2                       0
+#define THM_BASE__INST1_SEG3                       0
+#define THM_BASE__INST1_SEG4                       0
+#define THM_BASE__INST1_SEG5                       0
+
+#define THM_BASE__INST2_SEG0                       0
+#define THM_BASE__INST2_SEG1                       0
+#define THM_BASE__INST2_SEG2                       0
+#define THM_BASE__INST2_SEG3                       0
+#define THM_BASE__INST2_SEG4                       0
+#define THM_BASE__INST2_SEG5                       0
+
+#define THM_BASE__INST3_SEG0                       0
+#define THM_BASE__INST3_SEG1                       0
+#define THM_BASE__INST3_SEG2                       0
+#define THM_BASE__INST3_SEG3                       0
+#define THM_BASE__INST3_SEG4                       0
+#define THM_BASE__INST3_SEG5                       0
+
+#define THM_BASE__INST4_SEG0                       0
+#define THM_BASE__INST4_SEG1                       0
+#define THM_BASE__INST4_SEG2                       0
+#define THM_BASE__INST4_SEG3                       0
+#define THM_BASE__INST4_SEG4                       0
+#define THM_BASE__INST4_SEG5                       0
+
+#define THM_BASE__INST5_SEG0                       0
+#define THM_BASE__INST5_SEG1                       0
+#define THM_BASE__INST5_SEG2                       0
+#define THM_BASE__INST5_SEG3                       0
+#define THM_BASE__INST5_SEG4                       0
+#define THM_BASE__INST5_SEG5                       0
+
+#define UMC_BASE__INST0_SEG0                       0x00014000
+#define UMC_BASE__INST0_SEG1                       0
+#define UMC_BASE__INST0_SEG2                       0
+#define UMC_BASE__INST0_SEG3                       0
+#define UMC_BASE__INST0_SEG4                       0
+#define UMC_BASE__INST0_SEG5                       0
+
+#define UMC_BASE__INST1_SEG0                       0
+#define UMC_BASE__INST1_SEG1                       0
+#define UMC_BASE__INST1_SEG2                       0
+#define UMC_BASE__INST1_SEG3                       0
+#define UMC_BASE__INST1_SEG4                       0
+#define UMC_BASE__INST1_SEG5                       0
+
+#define UMC_BASE__INST2_SEG0                       0
+#define UMC_BASE__INST2_SEG1                       0
+#define UMC_BASE__INST2_SEG2                       0
+#define UMC_BASE__INST2_SEG3                       0
+#define UMC_BASE__INST2_SEG4                       0
+#define UMC_BASE__INST2_SEG5                       0
+
+#define UMC_BASE__INST3_SEG0                       0
+#define UMC_BASE__INST3_SEG1                       0
+#define UMC_BASE__INST3_SEG2                       0
+#define UMC_BASE__INST3_SEG3                       0
+#define UMC_BASE__INST3_SEG4                       0
+#define UMC_BASE__INST3_SEG5                       0
+
+#define UMC_BASE__INST4_SEG0                       0
+#define UMC_BASE__INST4_SEG1                       0
+#define UMC_BASE__INST4_SEG2                       0
+#define UMC_BASE__INST4_SEG3                       0
+#define UMC_BASE__INST4_SEG4                       0
+#define UMC_BASE__INST4_SEG5                       0
+
+#define UMC_BASE__INST5_SEG0                       0
+#define UMC_BASE__INST5_SEG1                       0
+#define UMC_BASE__INST5_SEG2                       0
+#define UMC_BASE__INST5_SEG3                       0
+#define UMC_BASE__INST5_SEG4                       0
+#define UMC_BASE__INST5_SEG5                       0
+
+#define UVD_BASE__INST0_SEG0                       0x00007800
+#define UVD_BASE__INST0_SEG1                       0x00007E00
+#define UVD_BASE__INST0_SEG2                       0
+#define UVD_BASE__INST0_SEG3                       0
+#define UVD_BASE__INST0_SEG4                       0
+#define UVD_BASE__INST0_SEG5                       0
+
+#define UVD_BASE__INST1_SEG0                       0
+#define UVD_BASE__INST1_SEG1                       0x00009000
+#define UVD_BASE__INST1_SEG2                       0
+#define UVD_BASE__INST1_SEG3                       0
+#define UVD_BASE__INST1_SEG4                       0
+#define UVD_BASE__INST1_SEG5                       0
+
+#define UVD_BASE__INST2_SEG0                       0
+#define UVD_BASE__INST2_SEG1                       0
+#define UVD_BASE__INST2_SEG2                       0
+#define UVD_BASE__INST2_SEG3                       0
+#define UVD_BASE__INST2_SEG4                       0
+#define UVD_BASE__INST2_SEG5                       0
+
+#define UVD_BASE__INST3_SEG0                       0
+#define UVD_BASE__INST3_SEG1                       0
+#define UVD_BASE__INST3_SEG2                       0
+#define UVD_BASE__INST3_SEG3                       0
+#define UVD_BASE__INST3_SEG4                       0
+#define UVD_BASE__INST3_SEG5                       0
+
+#define UVD_BASE__INST4_SEG0                       0
+#define UVD_BASE__INST4_SEG1                       0
+#define UVD_BASE__INST4_SEG2                       0
+#define UVD_BASE__INST4_SEG3                       0
+#define UVD_BASE__INST4_SEG4                       0
+#define UVD_BASE__INST4_SEG5                       0
+
+#define UVD_BASE__INST5_SEG0                       0
+#define UVD_BASE__INST5_SEG1                       0
+#define UVD_BASE__INST5_SEG2                       0
+#define UVD_BASE__INST5_SEG3                       0
+#define UVD_BASE__INST5_SEG4                       0
+#define UVD_BASE__INST5_SEG5                       0
+
+#define VCE_BASE__INST0_SEG0                       0x00008800
+#define VCE_BASE__INST0_SEG1                       0
+#define VCE_BASE__INST0_SEG2                       0
+#define VCE_BASE__INST0_SEG3                       0
+#define VCE_BASE__INST0_SEG4                       0
+#define VCE_BASE__INST0_SEG5                       0
+
+#define VCE_BASE__INST1_SEG0                       0
+#define VCE_BASE__INST1_SEG1                       0
+#define VCE_BASE__INST1_SEG2                       0
+#define VCE_BASE__INST1_SEG3                       0
+#define VCE_BASE__INST1_SEG4                       0
+#define VCE_BASE__INST1_SEG5                       0
+
+#define VCE_BASE__INST2_SEG0                       0
+#define VCE_BASE__INST2_SEG1                       0
+#define VCE_BASE__INST2_SEG2                       0
+#define VCE_BASE__INST2_SEG3                       0
+#define VCE_BASE__INST2_SEG4                       0
+#define VCE_BASE__INST2_SEG5                       0
+
+#define VCE_BASE__INST3_SEG0                       0
+#define VCE_BASE__INST3_SEG1                       0
+#define VCE_BASE__INST3_SEG2                       0
+#define VCE_BASE__INST3_SEG3                       0
+#define VCE_BASE__INST3_SEG4                       0
+#define VCE_BASE__INST3_SEG5                       0
+
+#define VCE_BASE__INST4_SEG0                       0
+#define VCE_BASE__INST4_SEG1                       0
+#define VCE_BASE__INST4_SEG2                       0
+#define VCE_BASE__INST4_SEG3                       0
+#define VCE_BASE__INST4_SEG4                       0
+#define VCE_BASE__INST4_SEG5                       0
+
+#define VCE_BASE__INST5_SEG0                       0
+#define VCE_BASE__INST5_SEG1                       0
+#define VCE_BASE__INST5_SEG2                       0
+#define VCE_BASE__INST5_SEG3                       0
+#define VCE_BASE__INST5_SEG4                       0
+#define VCE_BASE__INST5_SEG5                       0
+
+#define XDMA_BASE__INST0_SEG0                      0x00003400
+#define XDMA_BASE__INST0_SEG1                      0
+#define XDMA_BASE__INST0_SEG2                      0
+#define XDMA_BASE__INST0_SEG3                      0
+#define XDMA_BASE__INST0_SEG4                      0
+#define XDMA_BASE__INST0_SEG5                      0
+
+#define XDMA_BASE__INST1_SEG0                      0
+#define XDMA_BASE__INST1_SEG1                      0
+#define XDMA_BASE__INST1_SEG2                      0
+#define XDMA_BASE__INST1_SEG3                      0
+#define XDMA_BASE__INST1_SEG4                      0
+#define XDMA_BASE__INST1_SEG5                      0
+
+#define XDMA_BASE__INST2_SEG0                      0
+#define XDMA_BASE__INST2_SEG1                      0
+#define XDMA_BASE__INST2_SEG2                      0
+#define XDMA_BASE__INST2_SEG3                      0
+#define XDMA_BASE__INST2_SEG4                      0
+#define XDMA_BASE__INST2_SEG5                      0
+
+#define XDMA_BASE__INST3_SEG0                      0
+#define XDMA_BASE__INST3_SEG1                      0
+#define XDMA_BASE__INST3_SEG2                      0
+#define XDMA_BASE__INST3_SEG3                      0
+#define XDMA_BASE__INST3_SEG4                      0
+#define XDMA_BASE__INST3_SEG5                      0
+
+#define XDMA_BASE__INST4_SEG0                      0
+#define XDMA_BASE__INST4_SEG1                      0
+#define XDMA_BASE__INST4_SEG2                      0
+#define XDMA_BASE__INST4_SEG3                      0
+#define XDMA_BASE__INST4_SEG4                      0
+#define XDMA_BASE__INST4_SEG5                      0
+
+#define XDMA_BASE__INST5_SEG0                      0
+#define XDMA_BASE__INST5_SEG1                      0
+#define XDMA_BASE__INST5_SEG2                      0
+#define XDMA_BASE__INST5_SEG3                      0
+#define XDMA_BASE__INST5_SEG4                      0
+#define XDMA_BASE__INST5_SEG5                      0
+
+#define RSMU_BASE__INST0_SEG0                      0x00012000
+#define RSMU_BASE__INST0_SEG1                      0
+#define RSMU_BASE__INST0_SEG2                      0
+#define RSMU_BASE__INST0_SEG3                      0
+#define RSMU_BASE__INST0_SEG4                      0
+#define RSMU_BASE__INST0_SEG5                      0
+
+#define RSMU_BASE__INST1_SEG0                      0
+#define RSMU_BASE__INST1_SEG1                      0
+#define RSMU_BASE__INST1_SEG2                      0
+#define RSMU_BASE__INST1_SEG3                      0
+#define RSMU_BASE__INST1_SEG4                      0
+#define RSMU_BASE__INST1_SEG5                      0
+
+#define RSMU_BASE__INST2_SEG0                      0
+#define RSMU_BASE__INST2_SEG1                      0
+#define RSMU_BASE__INST2_SEG2                      0
+#define RSMU_BASE__INST2_SEG3                      0
+#define RSMU_BASE__INST2_SEG4                      0
+#define RSMU_BASE__INST2_SEG5                      0
+
+#define RSMU_BASE__INST3_SEG0                      0
+#define RSMU_BASE__INST3_SEG1                      0
+#define RSMU_BASE__INST3_SEG2                      0
+#define RSMU_BASE__INST3_SEG3                      0
+#define RSMU_BASE__INST3_SEG4                      0
+#define RSMU_BASE__INST3_SEG5                      0
+
+#define RSMU_BASE__INST4_SEG0                      0
+#define RSMU_BASE__INST4_SEG1                      0
+#define RSMU_BASE__INST4_SEG2                      0
+#define RSMU_BASE__INST4_SEG3                      0
+#define RSMU_BASE__INST4_SEG4                      0
+#define RSMU_BASE__INST4_SEG5                      0
+
+#define RSMU_BASE__INST5_SEG0                      0
+#define RSMU_BASE__INST5_SEG1                      0
+#define RSMU_BASE__INST5_SEG2                      0
+#define RSMU_BASE__INST5_SEG3                      0
+#define RSMU_BASE__INST5_SEG4                      0
+#define RSMU_BASE__INST5_SEG5                      0
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 7e8ad30..b493369 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -25,30 +25,16 @@
 #include <linux/kernel.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/firmware.h>
 #include "amd_shared.h"
 #include "amd_powerplay.h"
 #include "power_state.h"
 #include "amdgpu.h"
 #include "hwmgr.h"
 
-#define PP_DPM_DISABLED 0xCCCC
-
-static int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_task task_id,
-		enum amd_pm_state_type *user_state);
 
 static const struct amd_pm_funcs pp_dpm_funcs;
 
-static inline int pp_check(struct pp_hwmgr *hwmgr)
-{
-	if (hwmgr == NULL || hwmgr->smumgr_funcs == NULL)
-		return -EINVAL;
-
-	if (hwmgr->pm_en == 0 || hwmgr->hwmgr_func == NULL)
-		return PP_DPM_DISABLED;
-
-	return 0;
-}
-
 static int amd_powerplay_create(struct amdgpu_device *adev)
 {
 	struct pp_hwmgr *hwmgr;
@@ -61,19 +47,21 @@
 		return -ENOMEM;
 
 	hwmgr->adev = adev;
-	hwmgr->pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false;
+	hwmgr->not_vf = !amdgpu_sriov_vf(adev);
+	hwmgr->pm_en = (amdgpu_dpm && hwmgr->not_vf) ? true : false;
 	hwmgr->device = amdgpu_cgs_create_device(adev);
 	mutex_init(&hwmgr->smu_lock);
 	hwmgr->chip_family = adev->family;
 	hwmgr->chip_id = adev->asic_type;
-	hwmgr->feature_mask = amdgpu_pp_feature_mask;
+	hwmgr->feature_mask = adev->powerplay.pp_feature;
+	hwmgr->display_config = &adev->pm.pm_display_cfg;
 	adev->powerplay.pp_handle = hwmgr;
 	adev->powerplay.pp_funcs = &pp_dpm_funcs;
 	return 0;
 }
 
 
-static int amd_powerplay_destroy(struct amdgpu_device *adev)
+static void amd_powerplay_destroy(struct amdgpu_device *adev)
 {
 	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
 
@@ -82,8 +70,6 @@
 
 	kfree(hwmgr);
 	hwmgr = NULL;
-
-	return 0;
 }
 
 static int pp_early_init(void *handle)
@@ -109,18 +95,9 @@
 	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
+	ret = hwmgr_sw_init(hwmgr);
 
-	if (ret >= 0) {
-		if (hwmgr->smumgr_funcs->smu_init == NULL)
-			return -EINVAL;
-
-		ret = hwmgr->smumgr_funcs->smu_init(hwmgr);
-
-		phm_register_irq_handlers(hwmgr);
-
-		pr_debug("amdgpu: powerplay sw initialized\n");
-	}
+	pr_debug("powerplay sw init %s\n", ret ? "failed" : "successfully");
 
 	return ret;
 }
@@ -129,16 +106,14 @@
 {
 	struct amdgpu_device *adev = handle;
 	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-	if (ret >= 0) {
-		if (hwmgr->smumgr_funcs->smu_fini != NULL)
-			hwmgr->smumgr_funcs->smu_fini(hwmgr);
-	}
+	hwmgr_sw_fini(hwmgr);
 
-	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
+		release_firmware(adev->pm.fw);
+		adev->pm.fw = NULL;
 		amdgpu_ucode_fini_bo(adev);
+	}
 
 	return 0;
 }
@@ -152,55 +127,76 @@
 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
 		amdgpu_ucode_init_bo(adev);
 
-	ret = pp_check(hwmgr);
+	ret = hwmgr_hw_init(hwmgr);
 
-	if (ret >= 0) {
-		if (hwmgr->smumgr_funcs->start_smu == NULL)
-			return -EINVAL;
+	if (ret)
+		pr_err("powerplay hw init failed\n");
 
-		if (hwmgr->smumgr_funcs->start_smu(hwmgr)) {
-			pr_err("smc start failed\n");
-			hwmgr->smumgr_funcs->smu_fini(hwmgr);
-			return -EINVAL;
-		}
-		if (ret == PP_DPM_DISABLED)
-			goto exit;
-		ret = hwmgr_hw_init(hwmgr);
-		if (ret)
-			goto exit;
-	}
 	return ret;
-exit:
-	hwmgr->pm_en = 0;
-	cgs_notify_dpm_enabled(hwmgr->device, false);
-	return 0;
-
 }
 
 static int pp_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = handle;
 	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-	if (ret == 0)
-		hwmgr_hw_fini(hwmgr);
+	hwmgr_hw_fini(hwmgr);
 
 	return 0;
 }
 
+static void pp_reserve_vram_for_smu(struct amdgpu_device *adev)
+{
+	int r = -EINVAL;
+	void *cpu_ptr = NULL;
+	uint64_t gpu_addr;
+	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
+
+	if (amdgpu_bo_create_kernel(adev, adev->pm.smu_prv_buffer_size,
+						PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+						&adev->pm.smu_prv_buffer,
+						&gpu_addr,
+						&cpu_ptr)) {
+		DRM_ERROR("amdgpu: failed to create smu prv buffer\n");
+		return;
+	}
+
+	if (hwmgr->hwmgr_func->notify_cac_buffer_info)
+		r = hwmgr->hwmgr_func->notify_cac_buffer_info(hwmgr,
+					lower_32_bits((unsigned long)cpu_ptr),
+					upper_32_bits((unsigned long)cpu_ptr),
+					lower_32_bits(gpu_addr),
+					upper_32_bits(gpu_addr),
+					adev->pm.smu_prv_buffer_size);
+
+	if (r) {
+		amdgpu_bo_free_kernel(&adev->pm.smu_prv_buffer, NULL, NULL);
+		adev->pm.smu_prv_buffer = NULL;
+		DRM_ERROR("amdgpu: failed to notify SMU buffer address\n");
+	}
+}
+
 static int pp_late_init(void *handle)
 {
 	struct amdgpu_device *adev = handle;
 	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
-	int ret = 0;
+	int ret;
 
-	ret = pp_check(hwmgr);
-
-	if (ret == 0)
-		pp_dpm_dispatch_tasks(hwmgr,
+	if (hwmgr && hwmgr->pm_en) {
+		mutex_lock(&hwmgr->smu_lock);
+		hwmgr_handle_task(hwmgr,
 					AMD_PP_TASK_COMPLETE_INIT, NULL);
+		mutex_unlock(&hwmgr->smu_lock);
+	}
+	if (adev->pm.smu_prv_buffer_size != 0)
+		pp_reserve_vram_for_smu(adev);
+
+	if (hwmgr->hwmgr_func->gfx_off_control &&
+	    (hwmgr->feature_mask & PP_GFXOFF_MASK)) {
+		ret = hwmgr->hwmgr_func->gfx_off_control(hwmgr, true);
+		if (ret)
+			pr_err("gfx off enabling failed!\n");
+	}
 
 	return 0;
 }
@@ -209,6 +205,8 @@
 {
 	struct amdgpu_device *adev = handle;
 
+	if (adev->pm.smu_prv_buffer)
+		amdgpu_bo_free_kernel(&adev->pm.smu_prv_buffer, NULL, NULL);
 	amd_powerplay_destroy(adev);
 }
 
@@ -233,12 +231,18 @@
 {
 	struct amdgpu_device *adev = handle;
 	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
-	int ret = 0;
+	int ret;
 
-	ret = pp_check(hwmgr);
+	if (!hwmgr || !hwmgr->pm_en)
+		return 0;
 
-	if (ret)
-		return ret;
+	if (hwmgr->hwmgr_func->gfx_off_control) {
+		/* Enable/disable GFX off through SMU */
+		ret = hwmgr->hwmgr_func->gfx_off_control(hwmgr,
+							 state == AMD_PG_STATE_GATE);
+		if (ret)
+			pr_err("gfx off control failed!\n");
+	}
 
 	if (hwmgr->hwmgr_func->enable_per_cu_power_gating == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -254,38 +258,16 @@
 {
 	struct amdgpu_device *adev = handle;
 	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-	if (ret == 0)
-		hwmgr_hw_suspend(hwmgr);
-	return 0;
+	return hwmgr_suspend(hwmgr);
 }
 
 static int pp_resume(void *handle)
 {
 	struct amdgpu_device *adev = handle;
 	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
-	int ret;
 
-	ret = pp_check(hwmgr);
-
-	if (ret < 0)
-		return ret;
-
-	if (hwmgr->smumgr_funcs->start_smu == NULL)
-		return -EINVAL;
-
-	if (hwmgr->smumgr_funcs->start_smu(hwmgr)) {
-		pr_err("smc start failed\n");
-		hwmgr->smumgr_funcs->smu_fini(hwmgr);
-		return -EINVAL;
-	}
-
-	if (ret == PP_DPM_DISABLED)
-		return 0;
-
-	return hwmgr_hw_resume(hwmgr);
+	return hwmgr_resume(hwmgr);
 }
 
 static int pp_set_clockgating_state(void *handle,
@@ -334,12 +316,9 @@
 static int pp_set_clockgating_by_smu(void *handle, uint32_t msg_id)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->update_clock_gatings == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -362,10 +341,10 @@
 		if (*level & profile_mode_mask) {
 			hwmgr->saved_dpm_level = hwmgr->dpm_level;
 			hwmgr->en_umd_pstate = true;
-			cgs_set_clockgating_state(hwmgr->device,
+			amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 						AMD_IP_BLOCK_TYPE_GFX,
 						AMD_CG_STATE_UNGATE);
-			cgs_set_powergating_state(hwmgr->device,
+			amdgpu_device_ip_set_powergating_state(hwmgr->adev,
 					AMD_IP_BLOCK_TYPE_GFX,
 					AMD_PG_STATE_UNGATE);
 		}
@@ -375,10 +354,10 @@
 			if (*level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)
 				*level = hwmgr->saved_dpm_level;
 			hwmgr->en_umd_pstate = false;
-			cgs_set_clockgating_state(hwmgr->device,
+			amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 					AMD_IP_BLOCK_TYPE_GFX,
 					AMD_CG_STATE_GATE);
-			cgs_set_powergating_state(hwmgr->device,
+			amdgpu_device_ip_set_powergating_state(hwmgr->adev,
 					AMD_IP_BLOCK_TYPE_GFX,
 					AMD_PG_STATE_GATE);
 		}
@@ -389,12 +368,9 @@
 					enum amd_dpm_forced_level level)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (level == hwmgr->dpm_level)
 		return 0;
@@ -412,13 +388,10 @@
 								void *handle)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 	enum amd_dpm_forced_level level;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
 	level = hwmgr->dpm_level;
@@ -429,13 +402,10 @@
 static uint32_t pp_dpm_get_sclk(void *handle, bool low)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 	uint32_t clk = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return 0;
 
 	if (hwmgr->hwmgr_func->get_sclk == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -450,13 +420,10 @@
 static uint32_t pp_dpm_get_mclk(void *handle, bool low)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 	uint32_t clk = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return 0;
 
 	if (hwmgr->hwmgr_func->get_mclk == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -471,11 +438,8 @@
 static void pp_dpm_powergate_vce(void *handle, bool gate)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
+	if (!hwmgr || !hwmgr->pm_en)
 		return;
 
 	if (hwmgr->hwmgr_func->powergate_vce == NULL) {
@@ -490,11 +454,8 @@
 static void pp_dpm_powergate_uvd(void *handle, bool gate)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
+	if (!hwmgr || !hwmgr->pm_en)
 		return;
 
 	if (hwmgr->hwmgr_func->powergate_uvd == NULL) {
@@ -512,10 +473,8 @@
 	int ret = 0;
 	struct pp_hwmgr *hwmgr = handle;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
 	ret = hwmgr_handle_task(hwmgr, task_id, user_state);
@@ -528,15 +487,9 @@
 {
 	struct pp_hwmgr *hwmgr = handle;
 	struct pp_power_state *state;
-	int ret = 0;
 	enum amd_pm_state_type pm_type;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
-
-	if (hwmgr->current_ps == NULL)
+	if (!hwmgr || !hwmgr->pm_en || !hwmgr->current_ps)
 		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
@@ -568,11 +521,8 @@
 static void pp_dpm_set_fan_control_mode(void *handle, uint32_t mode)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
+	if (!hwmgr || !hwmgr->pm_en)
 		return;
 
 	if (hwmgr->hwmgr_func->set_fan_control_mode == NULL) {
@@ -587,13 +537,10 @@
 static uint32_t pp_dpm_get_fan_control_mode(void *handle)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 	uint32_t mode = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return 0;
 
 	if (hwmgr->hwmgr_func->get_fan_control_mode == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -610,10 +557,8 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->set_fan_speed_percent == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -630,10 +575,8 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->get_fan_speed_percent == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -651,10 +594,8 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->get_fan_speed_rpm == NULL)
 		return -EINVAL;
@@ -670,16 +611,10 @@
 {
 	struct pp_hwmgr *hwmgr = handle;
 	int i;
-	int ret = 0;
 
 	memset(data, 0, sizeof(*data));
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
-
-	if (hwmgr->ps == NULL)
+	if (!hwmgr || !hwmgr->pm_en ||!hwmgr->ps)
 		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
@@ -713,15 +648,9 @@
 static int pp_dpm_get_pp_table(void *handle, char **table)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 	int size = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
-
-	if (!hwmgr->soft_pp_table)
+	if (!hwmgr || !hwmgr->pm_en ||!hwmgr->soft_pp_table)
 		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
@@ -736,10 +665,6 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret;
 
-	ret = pp_check(hwmgr);
-	if (ret)
-		return ret;
-
 	ret = hwmgr_hw_fini(hwmgr);
 	if (ret)
 		return ret;
@@ -754,40 +679,38 @@
 static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
+	int ret = -ENOMEM;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
 	if (!hwmgr->hardcode_pp_table) {
 		hwmgr->hardcode_pp_table = kmemdup(hwmgr->soft_pp_table,
 						   hwmgr->soft_pp_table_size,
 						   GFP_KERNEL);
-		if (!hwmgr->hardcode_pp_table) {
-			mutex_unlock(&hwmgr->smu_lock);
-			return -ENOMEM;
-		}
+		if (!hwmgr->hardcode_pp_table)
+			goto err;
 	}
 
 	memcpy(hwmgr->hardcode_pp_table, buf, size);
 
 	hwmgr->soft_pp_table = hwmgr->hardcode_pp_table;
-	mutex_unlock(&hwmgr->smu_lock);
 
 	ret = amd_powerplay_reset(handle);
 	if (ret)
-		return ret;
+		goto err;
 
 	if (hwmgr->hwmgr_func->avfs_control) {
 		ret = hwmgr->hwmgr_func->avfs_control(hwmgr, false);
 		if (ret)
-			return ret;
+			goto err;
 	}
-
+	mutex_unlock(&hwmgr->smu_lock);
 	return 0;
+err:
+	mutex_unlock(&hwmgr->smu_lock);
+	return ret;
 }
 
 static int pp_dpm_force_clock_level(void *handle,
@@ -796,10 +719,8 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->force_clock_level == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -820,10 +741,8 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->print_clock_levels == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -840,10 +759,8 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->get_sclk_od == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -860,10 +777,8 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->set_sclk_od == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -881,10 +796,8 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->get_mclk_od == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -901,10 +814,8 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->set_mclk_od == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -922,11 +833,7 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-	if (ret)
-		return ret;
-
-	if (value == NULL)
+	if (!hwmgr || !hwmgr->pm_en || !value)
 		return -EINVAL;
 
 	switch (idx) {
@@ -948,14 +855,11 @@
 pp_dpm_get_vce_clock_state(void *handle, unsigned idx)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
+	if (!hwmgr || !hwmgr->pm_en)
 		return NULL;
 
-	if (hwmgr && idx < hwmgr->num_vce_state_tables)
+	if (idx < hwmgr->num_vce_state_tables)
 		return &hwmgr->vce_states[idx];
 	return NULL;
 }
@@ -964,7 +868,7 @@
 {
 	struct pp_hwmgr *hwmgr = handle;
 
-	if (!buf || pp_check(hwmgr))
+	if (!hwmgr || !hwmgr->pm_en || !buf)
 		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->get_power_profile_mode == NULL) {
@@ -980,12 +884,12 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = -EINVAL;
 
-	if (pp_check(hwmgr))
-		return -EINVAL;
+	if (!hwmgr || !hwmgr->pm_en)
+		return ret;
 
 	if (hwmgr->hwmgr_func->set_power_profile_mode == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
-		return -EINVAL;
+		return ret;
 	}
 	mutex_lock(&hwmgr->smu_lock);
 	if (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL)
@@ -998,7 +902,7 @@
 {
 	struct pp_hwmgr *hwmgr = handle;
 
-	if (pp_check(hwmgr))
+	if (!hwmgr || !hwmgr->pm_en)
 		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->odn_edit_dpm_table == NULL) {
@@ -1016,7 +920,7 @@
 	long workload;
 	uint32_t index;
 
-	if (pp_check(hwmgr))
+	if (!hwmgr || !hwmgr->pm_en)
 		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->set_power_profile_mode == NULL) {
@@ -1048,46 +952,12 @@
 	return 0;
 }
 
-static int pp_dpm_notify_smu_memory_info(void *handle,
-					uint32_t virtual_addr_low,
-					uint32_t virtual_addr_hi,
-					uint32_t mc_addr_low,
-					uint32_t mc_addr_hi,
-					uint32_t size)
-{
-	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
-
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
-
-	if (hwmgr->hwmgr_func->notify_cac_buffer_info == NULL) {
-		pr_info("%s was not implemented.\n", __func__);
-		return -EINVAL;
-	}
-
-	mutex_lock(&hwmgr->smu_lock);
-
-	ret = hwmgr->hwmgr_func->notify_cac_buffer_info(hwmgr, virtual_addr_low,
-					virtual_addr_hi, mc_addr_low, mc_addr_hi,
-					size);
-
-	mutex_unlock(&hwmgr->smu_lock);
-
-	return ret;
-}
-
 static int pp_set_power_limit(void *handle, uint32_t limit)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->set_power_limit == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -1104,20 +974,14 @@
 	hwmgr->hwmgr_func->set_power_limit(hwmgr, limit);
 	hwmgr->power_limit = limit;
 	mutex_unlock(&hwmgr->smu_lock);
-	return ret;
+	return 0;
 }
 
 static int pp_get_power_limit(void *handle, uint32_t *limit, bool default_limit)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
-
-	if (limit == NULL)
+	if (!hwmgr || !hwmgr->pm_en ||!limit)
 		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
@@ -1129,19 +993,16 @@
 
 	mutex_unlock(&hwmgr->smu_lock);
 
-	return ret;
+	return 0;
 }
 
 static int pp_display_configuration_change(void *handle,
 	const struct amd_pp_display_configuration *display_config)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
 	phm_store_dal_configuration_data(hwmgr, display_config);
@@ -1155,12 +1016,7 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
-
-	if (output == NULL)
+	if (!hwmgr || !hwmgr->pm_en ||!output)
 		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
@@ -1177,10 +1033,8 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
 
@@ -1225,10 +1079,8 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (clocks == NULL)
 		return -EINVAL;
@@ -1246,11 +1098,7 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-	if (ret)
-		return ret;
-
-	if (!clocks)
+	if (!hwmgr || !hwmgr->pm_en ||!clocks)
 		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
@@ -1266,11 +1114,7 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-	if (ret)
-		return ret;
-
-	if (!clocks)
+	if (!hwmgr || !hwmgr->pm_en ||!clocks)
 		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
@@ -1287,11 +1131,7 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-	if (ret)
-		return ret;
-
-	if (!wm_with_clock_ranges)
+	if (!hwmgr || !hwmgr->pm_en ||!wm_with_clock_ranges)
 		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
@@ -1308,11 +1148,7 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-	if (ret)
-		return ret;
-
-	if (!clock)
+	if (!hwmgr || !hwmgr->pm_en ||!clock)
 		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
@@ -1328,12 +1164,7 @@
 	struct pp_hwmgr *hwmgr = handle;
 	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
-
-	if (clocks == NULL)
+	if (!hwmgr || !hwmgr->pm_en ||!clocks)
 		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
@@ -1348,12 +1179,9 @@
 static int pp_set_mmhub_powergating_by_smu(void *handle)
 {
 	struct pp_hwmgr *hwmgr = handle;
-	int ret = 0;
 
-	ret = pp_check(hwmgr);
-
-	if (ret)
-		return ret;
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
 
 	if (hwmgr->hwmgr_func->set_mmhub_powergating_by_smu == NULL) {
 		pr_info("%s was not implemented.\n", __func__);
@@ -1390,7 +1218,6 @@
 	.get_vce_clock_state = pp_dpm_get_vce_clock_state,
 	.switch_power_profile = pp_dpm_switch_power_profile,
 	.set_clockgating_by_smu = pp_set_clockgating_by_smu,
-	.notify_smu_memory_info = pp_dpm_notify_smu_memory_info,
 	.get_power_profile_mode = pp_get_power_profile_mode,
 	.set_power_profile_mode = pp_set_power_profile_mode,
 	.odn_edit_dpm_table = pp_odn_edit_dpm_table,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index ae2e933..a0bb921 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -75,8 +75,7 @@
 
 int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr)
 {
-	int ret = 1;
-	bool enabled;
+	int ret = -EINVAL;;
 	PHM_FUNC_CHECK(hwmgr);
 
 	if (smum_is_dpm_running(hwmgr)) {
@@ -87,17 +86,12 @@
 	if (NULL != hwmgr->hwmgr_func->dynamic_state_management_enable)
 		ret = hwmgr->hwmgr_func->dynamic_state_management_enable(hwmgr);
 
-	enabled = ret == 0;
-
-	cgs_notify_dpm_enabled(hwmgr->device, enabled);
-
 	return ret;
 }
 
 int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr)
 {
-	int ret = -1;
-	bool enabled;
+	int ret = -EINVAL;
 
 	PHM_FUNC_CHECK(hwmgr);
 
@@ -109,10 +103,6 @@
 	if (hwmgr->hwmgr_func->dynamic_state_management_disable)
 		ret = hwmgr->hwmgr_func->dynamic_state_management_disable(hwmgr);
 
-	enabled = ret == 0 ? false : true;
-
-	cgs_notify_dpm_enabled(hwmgr->device, enabled);
-
 	return ret;
 }
 
@@ -142,6 +132,15 @@
 	return 0;
 }
 
+int phm_apply_clock_adjust_rules(struct pp_hwmgr *hwmgr)
+{
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (hwmgr->hwmgr_func->apply_clocks_adjust_rules != NULL)
+		return hwmgr->hwmgr_func->apply_clocks_adjust_rules(hwmgr);
+	return 0;
+}
+
 int phm_powerdown_uvd(struct pp_hwmgr *hwmgr)
 {
 	PHM_FUNC_CHECK(hwmgr);
@@ -171,6 +170,16 @@
 	return 0;
 }
 
+int phm_pre_display_configuration_changed(struct pp_hwmgr *hwmgr)
+{
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (NULL != hwmgr->hwmgr_func->pre_display_config_changed)
+		hwmgr->hwmgr_func->pre_display_config_changed(hwmgr);
+
+	return 0;
+
+}
 
 int phm_display_configuration_changed(struct pp_hwmgr *hwmgr)
 {
@@ -275,13 +284,11 @@
 	if (display_config == NULL)
 		return -EINVAL;
 
-	hwmgr->display_config = *display_config;
-
 	if (NULL != hwmgr->hwmgr_func->set_deep_sleep_dcefclk)
-		hwmgr->hwmgr_func->set_deep_sleep_dcefclk(hwmgr, hwmgr->display_config.min_dcef_deep_sleep_set_clk);
+		hwmgr->hwmgr_func->set_deep_sleep_dcefclk(hwmgr, display_config->min_dcef_deep_sleep_set_clk);
 
-	for (index = 0; index < hwmgr->display_config.num_path_including_non_display; index++) {
-		if (hwmgr->display_config.displays[index].controller_id != 0)
+	for (index = 0; index < display_config->num_path_including_non_display; index++) {
+		if (display_config->displays[index].controller_id != 0)
 			number_of_active_display++;
 	}
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index 4298205..e63bc47 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -40,6 +40,7 @@
 extern const struct pp_smumgr_func tonga_smu_funcs;
 extern const struct pp_smumgr_func fiji_smu_funcs;
 extern const struct pp_smumgr_func polaris10_smu_funcs;
+extern const struct pp_smumgr_func vegam_smu_funcs;
 extern const struct pp_smumgr_func vega10_smu_funcs;
 extern const struct pp_smumgr_func vega12_smu_funcs;
 extern const struct pp_smumgr_func smu10_smu_funcs;
@@ -76,7 +77,7 @@
 
 int hwmgr_early_init(struct pp_hwmgr *hwmgr)
 {
-	if (hwmgr == NULL)
+	if (!hwmgr)
 		return -EINVAL;
 
 	hwmgr->usec_timeout = AMD_MAX_USEC_TIMEOUT;
@@ -95,7 +96,8 @@
 		hwmgr->smumgr_funcs = &ci_smu_funcs;
 		ci_set_asic_special_caps(hwmgr);
 		hwmgr->feature_mask &= ~(PP_VBI_TIME_SUPPORT_MASK |
-					PP_ENABLE_GFX_CG_THRU_SMU);
+					 PP_ENABLE_GFX_CG_THRU_SMU |
+					 PP_GFXOFF_MASK);
 		hwmgr->pp_table_version = PP_TABLE_V0;
 		hwmgr->od_enabled = false;
 		smu7_init_function_pointers(hwmgr);
@@ -103,9 +105,11 @@
 	case AMDGPU_FAMILY_CZ:
 		hwmgr->od_enabled = false;
 		hwmgr->smumgr_funcs = &smu8_smu_funcs;
+		hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
 		smu8_init_function_pointers(hwmgr);
 		break;
 	case AMDGPU_FAMILY_VI:
+		hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
 		switch (hwmgr->chip_id) {
 		case CHIP_TOPAZ:
 			hwmgr->smumgr_funcs = &iceland_smu_funcs;
@@ -133,14 +137,21 @@
 			polaris_set_asic_special_caps(hwmgr);
 			hwmgr->feature_mask &= ~(PP_UVD_HANDSHAKE_MASK);
 			break;
+		case CHIP_VEGAM:
+			hwmgr->smumgr_funcs = &vegam_smu_funcs;
+			polaris_set_asic_special_caps(hwmgr);
+			hwmgr->feature_mask &= ~(PP_UVD_HANDSHAKE_MASK);
+			break;
 		default:
 			return -EINVAL;
 		}
 		smu7_init_function_pointers(hwmgr);
 		break;
 	case AMDGPU_FAMILY_AI:
+		hwmgr->feature_mask &= ~PP_GFXOFF_MASK;
 		switch (hwmgr->chip_id) {
 		case CHIP_VEGA10:
+		case CHIP_VEGA20:
 			hwmgr->smumgr_funcs = &vega10_smu_funcs;
 			vega10_hwmgr_init(hwmgr);
 			break;
@@ -170,22 +181,58 @@
 	return 0;
 }
 
+int hwmgr_sw_init(struct pp_hwmgr *hwmgr)
+{
+	if (!hwmgr|| !hwmgr->smumgr_funcs || !hwmgr->smumgr_funcs->smu_init)
+		return -EINVAL;
+
+	phm_register_irq_handlers(hwmgr);
+
+	return hwmgr->smumgr_funcs->smu_init(hwmgr);
+}
+
+
+int hwmgr_sw_fini(struct pp_hwmgr *hwmgr)
+{
+	if (hwmgr && hwmgr->smumgr_funcs && hwmgr->smumgr_funcs->smu_fini)
+		hwmgr->smumgr_funcs->smu_fini(hwmgr);
+
+	return 0;
+}
+
 int hwmgr_hw_init(struct pp_hwmgr *hwmgr)
 {
 	int ret = 0;
 
-	if (hwmgr == NULL)
+	if (!hwmgr || !hwmgr->smumgr_funcs)
 		return -EINVAL;
 
-	if (hwmgr->pptable_func == NULL ||
-	    hwmgr->pptable_func->pptable_init == NULL ||
-	    hwmgr->hwmgr_func->backend_init == NULL)
-		return -EINVAL;
+	if (hwmgr->smumgr_funcs->start_smu) {
+		ret = hwmgr->smumgr_funcs->start_smu(hwmgr);
+		if (ret) {
+			pr_err("smc start failed\n");
+			return -EINVAL;
+		}
+	}
+
+	if (!hwmgr->pm_en)
+		return 0;
+
+	if (!hwmgr->pptable_func ||
+	    !hwmgr->pptable_func->pptable_init ||
+	    !hwmgr->hwmgr_func->backend_init) {
+		hwmgr->pm_en = false;
+		pr_info("dpm not supported \n");
+		return 0;
+	}
 
 	ret = hwmgr->pptable_func->pptable_init(hwmgr);
 	if (ret)
 		goto err;
 
+	((struct amdgpu_device *)hwmgr->adev)->pm.no_fan =
+				hwmgr->thermal_controller.fanInfo.bNoFan;
+
 	ret = hwmgr->hwmgr_func->backend_init(hwmgr);
 	if (ret)
 		goto err1;
@@ -206,6 +253,8 @@
 	if (ret)
 		goto err2;
 
+	((struct amdgpu_device *)hwmgr->adev)->pm.dpm_enabled = true;
+
 	return 0;
 err2:
 	if (hwmgr->hwmgr_func->backend_fini)
@@ -214,14 +263,13 @@
 	if (hwmgr->pptable_func->pptable_fini)
 		hwmgr->pptable_func->pptable_fini(hwmgr);
 err:
-	pr_err("amdgpu: powerplay initialization failed\n");
 	return ret;
 }
 
 int hwmgr_hw_fini(struct pp_hwmgr *hwmgr)
 {
-	if (hwmgr == NULL)
-		return -EINVAL;
+	if (!hwmgr || !hwmgr->pm_en)
+		return 0;
 
 	phm_stop_thermal_controller(hwmgr);
 	psm_set_boot_states(hwmgr);
@@ -236,12 +284,12 @@
 	return psm_fini_power_state_table(hwmgr);
 }
 
-int hwmgr_hw_suspend(struct pp_hwmgr *hwmgr)
+int hwmgr_suspend(struct pp_hwmgr *hwmgr)
 {
 	int ret = 0;
 
-	if (hwmgr == NULL)
-		return -EINVAL;
+	if (!hwmgr || !hwmgr->pm_en)
+		return 0;
 
 	phm_disable_smc_firmware_ctf(hwmgr);
 	ret = psm_set_boot_states(hwmgr);
@@ -255,13 +303,23 @@
 	return ret;
 }
 
-int hwmgr_hw_resume(struct pp_hwmgr *hwmgr)
+int hwmgr_resume(struct pp_hwmgr *hwmgr)
 {
 	int ret = 0;
 
-	if (hwmgr == NULL)
+	if (!hwmgr)
 		return -EINVAL;
 
+	if (hwmgr->smumgr_funcs && hwmgr->smumgr_funcs->start_smu) {
+		if (hwmgr->smumgr_funcs->start_smu(hwmgr)) {
+			pr_err("smc start failed\n");
+			return -EINVAL;
+		}
+	}
+
+	if (!hwmgr->pm_en)
+		return 0;
+
 	ret = phm_setup_asic(hwmgr);
 	if (ret)
 		return ret;
@@ -270,9 +328,6 @@
 	if (ret)
 		return ret;
 	ret = phm_start_thermal_controller(hwmgr);
-	if (ret)
-		return ret;
-
 	ret |= psm_set_performance_states(hwmgr);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
index 0f2851b..0af13c1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
@@ -46,7 +46,7 @@
 					  sizeof(struct pp_power_state);
 
 	if (table_entries == 0 || size == 0) {
-		pr_warn("Please check whether power state management is suppported on this asic\n");
+		pr_warn("Please check whether power state management is supported on this asic\n");
 		return 0;
 	}
 
@@ -265,6 +265,15 @@
 	if (skip)
 		return 0;
 
+	if (!hwmgr->ps)
+		/*
+		 * for vega12/vega20 which does not support power state manager
+		 * DAL clock limits should also be honoured
+		 */
+		phm_apply_clock_adjust_rules(hwmgr);
+
+	phm_pre_display_configuration_changed(hwmgr);
+
 	phm_display_configuration_changed(hwmgr);
 
 	if (hwmgr->ps)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
index c6febbf..7047e29 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
@@ -23,7 +23,8 @@
 #include "pp_debug.h"
 #include <linux/module.h>
 #include <linux/slab.h>
-
+#include <linux/delay.h>
+#include "atom.h"
 #include "ppatomctrl.h"
 #include "atombios.h"
 #include "cgs_common.h"
@@ -128,7 +129,6 @@
 	return 0;
 }
 
-
 int atomctrl_initialize_mc_reg_table(
 		struct pp_hwmgr *hwmgr,
 		uint8_t module_index,
@@ -141,7 +141,7 @@
 	u16 size;
 
 	vram_info = (ATOM_VRAM_INFO_HEADER_V2_1 *)
-		cgs_atom_get_data_table(hwmgr->device,
+		smu_atom_get_data_table(hwmgr->adev,
 				GetIndexIntoMasterTable(DATA, VRAM_Info), &size, &frev, &crev);
 
 	if (module_index >= vram_info->ucNumOfVRAMModule) {
@@ -174,6 +174,8 @@
 		uint32_t engine_clock,
 		uint32_t memory_clock)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
+
 	SET_ENGINE_CLOCK_PS_ALLOCATION engine_clock_parameters;
 
 	/* They are both in 10KHz Units. */
@@ -184,9 +186,10 @@
 	/* in 10 khz units.*/
 	engine_clock_parameters.sReserved.ulClock =
 		cpu_to_le32(memory_clock & SET_CLOCK_FREQ_MASK);
-	return cgs_atom_exec_cmd_table(hwmgr->device,
+
+	return amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings),
-			&engine_clock_parameters);
+			(uint32_t *)&engine_clock_parameters);
 }
 
 /**
@@ -203,7 +206,7 @@
 	union voltage_object_info *voltage_info;
 
 	voltage_info = (union voltage_object_info *)
-		cgs_atom_get_data_table(device, index,
+		smu_atom_get_data_table(device, index,
 			&size, &frev, &crev);
 
 	if (voltage_info != NULL)
@@ -247,16 +250,16 @@
 		pp_atomctrl_memory_clock_param *mpll_param,
 		bool strobe_mode)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1 mpll_parameters;
 	int result;
 
 	mpll_parameters.ulClock = cpu_to_le32(clock_value);
 	mpll_parameters.ucInputFlag = (uint8_t)((strobe_mode) ? 1 : 0);
 
-	result = cgs_atom_exec_cmd_table
-		(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 		 GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam),
-		 &mpll_parameters);
+		(uint32_t *)&mpll_parameters);
 
 	if (0 == result) {
 		mpll_param->mpll_fb_divider.clk_frac =
@@ -295,14 +298,15 @@
 int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr,
 		uint32_t clock_value, pp_atomctrl_memory_clock_param *mpll_param)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2 mpll_parameters;
 	int result;
 
 	mpll_parameters.ulClock.ulClock = cpu_to_le32(clock_value);
 
-	result = cgs_atom_exec_cmd_table(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam),
-			&mpll_parameters);
+			(uint32_t *)&mpll_parameters);
 
 	if (!result)
 		mpll_param->mpll_post_divider =
@@ -311,19 +315,49 @@
 	return result;
 }
 
+int atomctrl_get_memory_pll_dividers_ai(struct pp_hwmgr *hwmgr,
+					uint32_t clock_value,
+					pp_atomctrl_memory_clock_param_ai *mpll_param)
+{
+	struct amdgpu_device *adev = hwmgr->adev;
+	COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_3 mpll_parameters = {{0}, 0, 0};
+	int result;
+
+	mpll_parameters.ulClock.ulClock = cpu_to_le32(clock_value);
+
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
+			GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam),
+			(uint32_t *)&mpll_parameters);
+
+	/* VEGAM's mpll takes sometime to finish computing */
+	udelay(10);
+
+	if (!result) {
+		mpll_param->ulMclk_fcw_int =
+			le16_to_cpu(mpll_parameters.usMclk_fcw_int);
+		mpll_param->ulMclk_fcw_frac =
+			le16_to_cpu(mpll_parameters.usMclk_fcw_frac);
+		mpll_param->ulClock =
+			le32_to_cpu(mpll_parameters.ulClock.ulClock);
+		mpll_param->ulPostDiv = mpll_parameters.ulClock.ucPostDiv;
+	}
+
+	return result;
+}
+
 int atomctrl_get_engine_pll_dividers_kong(struct pp_hwmgr *hwmgr,
 					  uint32_t clock_value,
 					  pp_atomctrl_clock_dividers_kong *dividers)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 pll_parameters;
 	int result;
 
 	pll_parameters.ulClock = cpu_to_le32(clock_value);
 
-	result = cgs_atom_exec_cmd_table
-		(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 		 GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL),
-		 &pll_parameters);
+		(uint32_t *)&pll_parameters);
 
 	if (0 == result) {
 		dividers->pll_post_divider = pll_parameters.ucPostDiv;
@@ -338,16 +372,16 @@
 		uint32_t clock_value,
 		pp_atomctrl_clock_dividers_vi *dividers)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 pll_patameters;
 	int result;
 
 	pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value);
 	pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_SCLK;
 
-	result = cgs_atom_exec_cmd_table
-		(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 		 GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL),
-		 &pll_patameters);
+		(uint32_t *)&pll_patameters);
 
 	if (0 == result) {
 		dividers->pll_post_divider =
@@ -375,16 +409,16 @@
 		uint32_t clock_value,
 		pp_atomctrl_clock_dividers_ai *dividers)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_7 pll_patameters;
 	int result;
 
 	pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value);
 	pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_SCLK;
 
-	result = cgs_atom_exec_cmd_table
-		(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 		 GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL),
-		 &pll_patameters);
+		(uint32_t *)&pll_patameters);
 
 	if (0 == result) {
 		dividers->usSclk_fcw_frac     = le16_to_cpu(pll_patameters.usSclk_fcw_frac);
@@ -407,6 +441,7 @@
 		uint32_t clock_value,
 		pp_atomctrl_clock_dividers_vi *dividers)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 pll_patameters;
 	int result;
 
@@ -414,10 +449,9 @@
 	pll_patameters.ulClock.ucPostDiv =
 		COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK;
 
-	result = cgs_atom_exec_cmd_table
-		(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 		 GetIndexIntoMasterTable(COMMAND, ComputeMemoryEnginePLL),
-		 &pll_patameters);
+		(uint32_t *)&pll_patameters);
 
 	if (0 == result) {
 		dividers->pll_post_divider =
@@ -452,7 +486,7 @@
 	uint32_t clock;
 
 	fw_info = (ATOM_FIRMWARE_INFO *)
-		cgs_atom_get_data_table(hwmgr->device,
+		smu_atom_get_data_table(hwmgr->adev,
 			GetIndexIntoMasterTable(DATA, FirmwareInfo),
 			&size, &frev, &crev);
 
@@ -476,7 +510,7 @@
 		uint8_t voltage_mode)
 {
 	ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info =
-		(ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device);
+		(ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->adev);
 	bool ret;
 
 	PP_ASSERT_WITH_CODE((NULL != voltage_info),
@@ -495,7 +529,7 @@
 		pp_atomctrl_voltage_table *voltage_table)
 {
 	ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info =
-		(ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device);
+		(ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->adev);
 	const ATOM_VOLTAGE_OBJECT_V3 *voltage_object;
 	unsigned int i;
 
@@ -572,7 +606,7 @@
 	void *table_address;
 
 	table_address = (ATOM_GPIO_PIN_LUT *)
-		cgs_atom_get_data_table(device,
+		smu_atom_get_data_table(device,
 				GetIndexIntoMasterTable(DATA, GPIO_Pin_LUT),
 				&size, &frev, &crev);
 
@@ -592,7 +626,7 @@
 {
 	bool bRet = false;
 	ATOM_GPIO_PIN_LUT *gpio_lookup_table =
-		get_gpio_lookup_table(hwmgr->device);
+		get_gpio_lookup_table(hwmgr->adev);
 
 	PP_ASSERT_WITH_CODE((NULL != gpio_lookup_table),
 			"Could not find GPIO lookup Table in BIOS.", return false);
@@ -613,7 +647,7 @@
 		bool debug)
 {
 	ATOM_ASIC_PROFILING_INFO_V3_4 *getASICProfilingInfo;
-
+	struct amdgpu_device *adev = hwmgr->adev;
 	EFUSE_LINEAR_FUNC_PARAM sRO_fuse;
 	EFUSE_LINEAR_FUNC_PARAM sCACm_fuse;
 	EFUSE_LINEAR_FUNC_PARAM sCACb_fuse;
@@ -640,7 +674,7 @@
 	int result;
 
 	getASICProfilingInfo = (ATOM_ASIC_PROFILING_INFO_V3_4 *)
-			cgs_atom_get_data_table(hwmgr->device,
+			smu_atom_get_data_table(hwmgr->adev,
 					GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo),
 					NULL, NULL, NULL);
 
@@ -706,9 +740,9 @@
 
 	sOutput_FuseValues.sEfuse = sInput_FuseValues;
 
-	result = cgs_atom_exec_cmd_table(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
-			&sOutput_FuseValues);
+			(uint32_t *)&sOutput_FuseValues);
 
 	if (result)
 		return result;
@@ -727,9 +761,9 @@
 
 	sOutput_FuseValues.sEfuse = sInput_FuseValues;
 
-	result = cgs_atom_exec_cmd_table(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
-			&sOutput_FuseValues);
+			(uint32_t *)&sOutput_FuseValues);
 
 	if (result)
 		return result;
@@ -747,9 +781,9 @@
 	sInput_FuseValues.ucBitLength = sCACb_fuse.ucEfuseLength;
 	sOutput_FuseValues.sEfuse = sInput_FuseValues;
 
-	result = cgs_atom_exec_cmd_table(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
-			&sOutput_FuseValues);
+			(uint32_t *)&sOutput_FuseValues);
 
 	if (result)
 		return result;
@@ -768,9 +802,9 @@
 
 	sOutput_FuseValues.sEfuse = sInput_FuseValues;
 
-	result = cgs_atom_exec_cmd_table(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
-			&sOutput_FuseValues);
+			(uint32_t *)&sOutput_FuseValues);
 
 	if (result)
 		return result;
@@ -790,9 +824,9 @@
 
 	sOutput_FuseValues.sEfuse = sInput_FuseValues;
 
-	result = cgs_atom_exec_cmd_table(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
-			&sOutput_FuseValues);
+			(uint32_t *)&sOutput_FuseValues);
 	if (result)
 		return result;
 
@@ -811,9 +845,9 @@
 	sInput_FuseValues.ucBitLength = sKv_b_fuse.ucEfuseLength;
 	sOutput_FuseValues.sEfuse = sInput_FuseValues;
 
-	result = cgs_atom_exec_cmd_table(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
-			&sOutput_FuseValues);
+			(uint32_t *)&sOutput_FuseValues);
 
 	if (result)
 		return result;
@@ -842,9 +876,9 @@
 
 	sOutput_FuseValues.sEfuse = sInput_FuseValues;
 
-	result = cgs_atom_exec_cmd_table(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
-			&sOutput_FuseValues);
+			(uint32_t *)&sOutput_FuseValues);
 
 	if (result)
 		return result;
@@ -1053,8 +1087,9 @@
 		uint32_t sclk, uint16_t virtual_voltage_Id,
 		uint16_t *voltage)
 {
-	int result;
+	struct amdgpu_device *adev = hwmgr->adev;
 	GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space;
+	int result;
 
 	get_voltage_info_param_space.ucVoltageType   =
 		voltage_type;
@@ -1065,14 +1100,12 @@
 	get_voltage_info_param_space.ulSCLKFreq      =
 		cpu_to_le32(sclk);
 
-	result = cgs_atom_exec_cmd_table(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, GetVoltageInfo),
-			&get_voltage_info_param_space);
+			(uint32_t *)&get_voltage_info_param_space);
 
-	if (0 != result)
-		return result;
-
-	*voltage = le16_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 *)
+	*voltage = result ? 0 :
+			le16_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 *)
 				(&get_voltage_info_param_space))->usVoltageLevel);
 
 	return result;
@@ -1088,9 +1121,10 @@
 			     uint16_t virtual_voltage_id,
 			     uint16_t *voltage)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
+	GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space;
 	int result;
 	int entry_id;
-	GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space;
 
 	/* search for leakage voltage ID 0xff01 ~ 0xff08 and sckl */
 	for (entry_id = 0; entry_id < hwmgr->dyn_state.vddc_dependency_on_sclk->count; entry_id++) {
@@ -1111,9 +1145,9 @@
 	get_voltage_info_param_space.ulSCLKFreq =
 		cpu_to_le32(hwmgr->dyn_state.vddc_dependency_on_sclk->entries[entry_id].clk);
 
-	result = cgs_atom_exec_cmd_table(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, GetVoltageInfo),
-			&get_voltage_info_param_space);
+			(uint32_t *)&get_voltage_info_param_space);
 
 	if (0 != result)
 		return result;
@@ -1135,7 +1169,7 @@
 	u16 size;
 
 	fw_info = (ATOM_COMMON_TABLE_HEADER *)
-		cgs_atom_get_data_table(hwmgr->device,
+		smu_atom_get_data_table(hwmgr->adev,
 				GetIndexIntoMasterTable(DATA, FirmwareInfo),
 				&size, &frev, &crev);
 
@@ -1167,7 +1201,7 @@
 	u16 size;
 
 	table = (ATOM_ASIC_INTERNAL_SS_INFO *)
-		cgs_atom_get_data_table(device,
+		smu_atom_get_data_table(device,
 			GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info),
 			&size, &frev, &crev);
 
@@ -1188,7 +1222,7 @@
 
 	memset(ssEntry, 0x00, sizeof(pp_atomctrl_internal_ss_info));
 
-	table = asic_internal_ss_get_ss_table(hwmgr->device);
+	table = asic_internal_ss_get_ss_table(hwmgr->adev);
 
 	if (NULL == table)
 		return -1;
@@ -1260,9 +1294,10 @@
 			ASIC_INTERNAL_ENGINE_SS, engine_clock, ssInfo);
 }
 
-int atomctrl_read_efuse(void *device, uint16_t start_index,
+int atomctrl_read_efuse(struct pp_hwmgr *hwmgr, uint16_t start_index,
 		uint16_t end_index, uint32_t mask, uint32_t *efuse)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	int result;
 	READ_EFUSE_VALUE_PARAMETER efuse_param;
 
@@ -1272,11 +1307,10 @@
 	efuse_param.sEfuse.ucBitLength  = (uint8_t)
 			((end_index - start_index) + 1);
 
-	result = cgs_atom_exec_cmd_table(device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
-			&efuse_param);
-	if (!result)
-		*efuse = le32_to_cpu(efuse_param.ulEfuseValue) & mask;
+			(uint32_t *)&efuse_param);
+	*efuse = result ? 0 : le32_to_cpu(efuse_param.ulEfuseValue) & mask;
 
 	return result;
 }
@@ -1284,6 +1318,7 @@
 int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
 			      uint8_t level)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	DYNAMICE_MEMORY_SETTINGS_PARAMETER_V2_1 memory_clock_parameters;
 	int result;
 
@@ -1293,10 +1328,9 @@
 		ADJUST_MC_SETTING_PARAM;
 	memory_clock_parameters.asDPMMCReg.ucMclkDPMState = level;
 
-	result = cgs_atom_exec_cmd_table
-		(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 		 GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings),
-		 &memory_clock_parameters);
+		(uint32_t *)&memory_clock_parameters);
 
 	return result;
 }
@@ -1304,7 +1338,7 @@
 int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
 				uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage)
 {
-
+	struct amdgpu_device *adev = hwmgr->adev;
 	int result;
 	GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_3 get_voltage_info_param_space;
 
@@ -1313,15 +1347,12 @@
 	get_voltage_info_param_space.usVoltageLevel = cpu_to_le16(virtual_voltage_Id);
 	get_voltage_info_param_space.ulSCLKFreq = cpu_to_le32(sclk);
 
-	result = cgs_atom_exec_cmd_table(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, GetVoltageInfo),
-			&get_voltage_info_param_space);
+			(uint32_t *)&get_voltage_info_param_space);
 
-	if (0 != result)
-		return result;
-
-	*voltage = le32_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)
-				(&get_voltage_info_param_space))->ulVoltageLevel);
+	*voltage = result ? 0 :
+		le32_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)(&get_voltage_info_param_space))->ulVoltageLevel);
 
 	return result;
 }
@@ -1334,7 +1365,7 @@
 	u16 size;
 
 	ATOM_SMU_INFO_V2_1 *psmu_info =
-		(ATOM_SMU_INFO_V2_1 *)cgs_atom_get_data_table(hwmgr->device,
+		(ATOM_SMU_INFO_V2_1 *)smu_atom_get_data_table(hwmgr->adev,
 			GetIndexIntoMasterTable(DATA, SMU_Info),
 			&size, &frev, &crev);
 
@@ -1362,7 +1393,7 @@
 		return -EINVAL;
 
 	profile = (ATOM_ASIC_PROFILING_INFO_V3_6 *)
-			cgs_atom_get_data_table(hwmgr->device,
+			smu_atom_get_data_table(hwmgr->adev,
 					GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo),
 					NULL, NULL, NULL);
 	if (!profile)
@@ -1402,7 +1433,7 @@
 				uint16_t *load_line)
 {
 	ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info =
-		(ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device);
+		(ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->adev);
 
 	const ATOM_VOLTAGE_OBJECT_V3 *voltage_object;
 
@@ -1421,16 +1452,17 @@
 
 int atomctrl_get_leakage_id_from_efuse(struct pp_hwmgr *hwmgr, uint16_t *virtual_voltage_id)
 {
-	int result;
+	struct amdgpu_device *adev = hwmgr->adev;
 	SET_VOLTAGE_PS_ALLOCATION allocation;
 	SET_VOLTAGE_PARAMETERS_V1_3 *voltage_parameters =
 			(SET_VOLTAGE_PARAMETERS_V1_3 *)&allocation.sASICSetVoltage;
+	int result;
 
 	voltage_parameters->ucVoltageMode = ATOM_GET_LEAKAGE_ID;
 
-	result = cgs_atom_exec_cmd_table(hwmgr->device,
+	result = amdgpu_atom_execute_table(adev->mode_info.atom_context,
 			GetIndexIntoMasterTable(COMMAND, SetVoltage),
-			voltage_parameters);
+			(uint32_t *)voltage_parameters);
 
 	*virtual_voltage_id = voltage_parameters->usVoltageLevel;
 
@@ -1453,7 +1485,7 @@
 	ix = GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo);
 
 	profile = (ATOM_ASIC_PROFILING_INFO_V2_1 *)
-			cgs_atom_get_data_table(hwmgr->device,
+			smu_atom_get_data_table(hwmgr->adev,
 					ix,
 					NULL, NULL, NULL);
 	if (!profile)
@@ -1498,3 +1530,33 @@
 
 	return 0;
 }
+
+void atomctrl_get_voltage_range(struct pp_hwmgr *hwmgr, uint32_t *max_vddc,
+							uint32_t *min_vddc)
+{
+	void *profile;
+
+	profile = smu_atom_get_data_table(hwmgr->adev,
+					GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo),
+					NULL, NULL, NULL);
+
+	if (profile) {
+		switch (hwmgr->chip_id) {
+		case CHIP_TONGA:
+		case CHIP_FIJI:
+			*max_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_3 *)profile)->ulMaxVddc/4);
+			*min_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_3 *)profile)->ulMinVddc/4);
+			return;
+		case CHIP_POLARIS11:
+		case CHIP_POLARIS10:
+		case CHIP_POLARIS12:
+			*max_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_6 *)profile)->ulMaxVddc/100);
+			*min_vddc = le32_to_cpu(((ATOM_ASIC_PROFILING_INFO_V3_6 *)profile)->ulMinVddc/100);
+			return;
+		default:
+			break;
+		}
+	}
+	*max_vddc = 0;
+	*min_vddc = 0;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
index c44a920..3ee54f1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
@@ -146,6 +146,14 @@
 };
 typedef struct pp_atomctrl_memory_clock_param pp_atomctrl_memory_clock_param;
 
+struct pp_atomctrl_memory_clock_param_ai {
+	uint32_t ulClock;
+	uint32_t ulPostDiv;
+	uint16_t ulMclk_fcw_frac;
+	uint16_t ulMclk_fcw_int;
+};
+typedef struct pp_atomctrl_memory_clock_param_ai pp_atomctrl_memory_clock_param_ai;
+
 struct pp_atomctrl_internal_ss_info {
 	uint32_t speed_spectrum_percentage;                      /* in 1/100 percentage */
 	uint32_t speed_spectrum_rate;                            /* in KHz */
@@ -295,10 +303,12 @@
 extern int atomctrl_get_voltage_table_v3(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint8_t voltage_mode, pp_atomctrl_voltage_table *voltage_table);
 extern int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr,
 		uint32_t clock_value, pp_atomctrl_memory_clock_param *mpll_param);
+extern int atomctrl_get_memory_pll_dividers_ai(struct pp_hwmgr *hwmgr,
+		uint32_t clock_value, pp_atomctrl_memory_clock_param_ai *mpll_param);
 extern int atomctrl_get_engine_pll_dividers_kong(struct pp_hwmgr *hwmgr,
 						 uint32_t clock_value,
 						 pp_atomctrl_clock_dividers_kong *dividers);
-extern int atomctrl_read_efuse(void *device, uint16_t start_index,
+extern int atomctrl_read_efuse(struct pp_hwmgr *hwmgr, uint16_t start_index,
 		uint16_t end_index, uint32_t mask, uint32_t *efuse);
 extern int atomctrl_calculate_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
 		uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage, uint16_t dpm_level, bool debug);
@@ -320,5 +330,8 @@
 					uint16_t virtual_voltage_id,
 					uint16_t efuse_voltage_id);
 extern int atomctrl_get_leakage_id_from_efuse(struct pp_hwmgr *hwmgr, uint16_t *virtual_voltage_id);
+
+extern void atomctrl_get_voltage_range(struct pp_hwmgr *hwmgr, uint32_t *max_vddc,
+							uint32_t *min_vddc);
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
index ad42caa..c97b0e5 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
@@ -23,9 +23,9 @@
 
 #include "ppatomfwctrl.h"
 #include "atomfirmware.h"
+#include "atom.h"
 #include "pp_debug.h"
 
-
 static const union atom_voltage_object_v4 *pp_atomfwctrl_lookup_voltage_type_v4(
 		const struct atom_voltage_objects_info_v4_1 *voltage_object_info_table,
 		uint8_t voltage_type, uint8_t voltage_mode)
@@ -38,35 +38,34 @@
 
 	while (offset < size) {
 		const union atom_voltage_object_v4 *voltage_object =
-				(const union atom_voltage_object_v4 *)(start + offset);
+			(const union atom_voltage_object_v4 *)(start + offset);
 
-        if (voltage_type == voltage_object->gpio_voltage_obj.header.voltage_type &&
-            voltage_mode == voltage_object->gpio_voltage_obj.header.voltage_mode)
-            return voltage_object;
+		if (voltage_type == voltage_object->gpio_voltage_obj.header.voltage_type &&
+		    voltage_mode == voltage_object->gpio_voltage_obj.header.voltage_mode)
+			return voltage_object;
 
-        offset += le16_to_cpu(voltage_object->gpio_voltage_obj.header.object_size);
+		offset += le16_to_cpu(voltage_object->gpio_voltage_obj.header.object_size);
 
-    }
+	}
 
-    return NULL;
+	return NULL;
 }
 
 static struct atom_voltage_objects_info_v4_1 *pp_atomfwctrl_get_voltage_info_table(
 		struct pp_hwmgr *hwmgr)
 {
-    const void *table_address;
-    uint16_t idx;
+	const void *table_address;
+	uint16_t idx;
 
-    idx = GetIndexIntoMasterDataTable(voltageobject_info);
-    table_address =	cgs_atom_get_data_table(hwmgr->device,
-    		idx, NULL, NULL, NULL);
+	idx = GetIndexIntoMasterDataTable(voltageobject_info);
+	table_address = smu_atom_get_data_table(hwmgr->adev,
+						idx, NULL, NULL, NULL);
 
-    PP_ASSERT_WITH_CODE( 
-        table_address,
-        "Error retrieving BIOS Table Address!",
-        return NULL);
+	PP_ASSERT_WITH_CODE(table_address,
+			"Error retrieving BIOS Table Address!",
+			return NULL);
 
-    return (struct atom_voltage_objects_info_v4_1 *)table_address;
+	return (struct atom_voltage_objects_info_v4_1 *)table_address;
 }
 
 /**
@@ -167,7 +166,7 @@
 	uint16_t idx;
 
 	idx = GetIndexIntoMasterDataTable(gpio_pin_lut);
-	table_address =	cgs_atom_get_data_table(hwmgr->device,
+	table_address =	smu_atom_get_data_table(hwmgr->adev,
 			idx, NULL, NULL, NULL);
 	PP_ASSERT_WITH_CODE(table_address,
 			"Error retrieving BIOS Table Address!",
@@ -248,28 +247,30 @@
 		uint32_t clock_type, uint32_t clock_value,
 		struct pp_atomfwctrl_clock_dividers_soc15 *dividers)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	struct compute_gpu_clock_input_parameter_v1_8 pll_parameters;
 	struct compute_gpu_clock_output_parameter_v1_8 *pll_output;
-	int result;
 	uint32_t idx;
 
 	pll_parameters.gpuclock_10khz = (uint32_t)clock_value;
 	pll_parameters.gpu_clock_type = clock_type;
 
 	idx = GetIndexIntoMasterCmdTable(computegpuclockparam);
-	result = cgs_atom_exec_cmd_table(hwmgr->device, idx, &pll_parameters);
 
-	if (!result) {
-		pll_output = (struct compute_gpu_clock_output_parameter_v1_8 *)
-				&pll_parameters;
-		dividers->ulClock = le32_to_cpu(pll_output->gpuclock_10khz);
-		dividers->ulDid = le32_to_cpu(pll_output->dfs_did);
-		dividers->ulPll_fb_mult = le32_to_cpu(pll_output->pll_fb_mult);
-		dividers->ulPll_ss_fbsmult = le32_to_cpu(pll_output->pll_ss_fbsmult);
-		dividers->usPll_ss_slew_frac = le16_to_cpu(pll_output->pll_ss_slew_frac);
-		dividers->ucPll_ss_enable = pll_output->pll_ss_enable;
-	}
-	return result;
+	if (amdgpu_atom_execute_table(
+		adev->mode_info.atom_context, idx, (uint32_t *)&pll_parameters))
+		return -EINVAL;
+
+	pll_output = (struct compute_gpu_clock_output_parameter_v1_8 *)
+			&pll_parameters;
+	dividers->ulClock = le32_to_cpu(pll_output->gpuclock_10khz);
+	dividers->ulDid = le32_to_cpu(pll_output->dfs_did);
+	dividers->ulPll_fb_mult = le32_to_cpu(pll_output->pll_fb_mult);
+	dividers->ulPll_ss_fbsmult = le32_to_cpu(pll_output->pll_ss_fbsmult);
+	dividers->usPll_ss_slew_frac = le16_to_cpu(pll_output->pll_ss_slew_frac);
+	dividers->ucPll_ss_enable = pll_output->pll_ss_enable;
+
+	return 0;
 }
 
 int pp_atomfwctrl_get_avfs_information(struct pp_hwmgr *hwmgr,
@@ -283,7 +284,7 @@
 
 	idx = GetIndexIntoMasterDataTable(asic_profiling_info);
 	profile = (struct atom_asic_profiling_info_v4_1 *)
-			cgs_atom_get_data_table(hwmgr->device,
+			smu_atom_get_data_table(hwmgr->adev,
 					idx, NULL, NULL, NULL);
 
 	if (!profile)
@@ -467,7 +468,7 @@
 
 	idx = GetIndexIntoMasterDataTable(smu_info);
 	info = (struct atom_smu_info_v3_1 *)
-		cgs_atom_get_data_table(hwmgr->device,
+		smu_atom_get_data_table(hwmgr->adev,
 				idx, NULL, NULL, NULL);
 
 	if (!info) {
@@ -487,8 +488,9 @@
 	return 0;
 }
 
-int pp_atomfwctrl__get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLKID id, uint32_t *frequency)
+int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLKID id, uint32_t *frequency)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	struct atom_get_smu_clock_info_parameters_v3_1   parameters;
 	struct atom_get_smu_clock_info_output_parameters_v3_1 *output;
 	uint32_t ix;
@@ -497,13 +499,13 @@
 	parameters.command = GET_SMU_CLOCK_INFO_V3_1_GET_CLOCK_FREQ;
 
 	ix = GetIndexIntoMasterCmdTable(getsmuclockinfo);
-	if (!cgs_atom_exec_cmd_table(hwmgr->device, ix, &parameters)) {
-		output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)&parameters;
-		*frequency = output->atom_smu_outputclkfreq.smu_clock_freq_hz / 10000;
-	} else {
-		pr_info("Error execute_table getsmuclockinfo!");
-		return -1;
-	}
+
+	if (amdgpu_atom_execute_table(
+		adev->mode_info.atom_context, ix, (uint32_t *)&parameters))
+		return -EINVAL;
+
+	output = (struct atom_get_smu_clock_info_output_parameters_v3_1 *)&parameters;
+	*frequency = output->atom_smu_outputclkfreq.smu_clock_freq_hz / 10000;
 
 	return 0;
 }
@@ -513,11 +515,10 @@
 {
 	struct atom_firmware_info_v3_1 *info = NULL;
 	uint16_t ix;
-	uint32_t frequency = 0;
 
 	ix = GetIndexIntoMasterDataTable(firmwareinfo);
 	info = (struct atom_firmware_info_v3_1 *)
-		cgs_atom_get_data_table(hwmgr->device,
+		smu_atom_get_data_table(hwmgr->adev,
 				ix, NULL, NULL, NULL);
 
 	if (!info) {
@@ -536,12 +537,6 @@
 	boot_values->ulSocClk   = 0;
 	boot_values->ulDCEFClk   = 0;
 
-	if (!pp_atomfwctrl__get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_SOCCLK_ID, &frequency))
-		boot_values->ulSocClk   = frequency;
-
-	if (!pp_atomfwctrl__get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_DCEFCLK_ID, &frequency))
-		boot_values->ulDCEFClk   = frequency;
-
 	return 0;
 }
 
@@ -553,7 +548,7 @@
 
 	ix = GetIndexIntoMasterDataTable(smc_dpm_info);
 	info = (struct atom_smc_dpm_info_v4_1 *)
-		cgs_atom_get_data_table(hwmgr->device,
+		smu_atom_get_data_table(hwmgr->adev,
 				ix, NULL, NULL, NULL);
 	if (!info) {
 		pr_info("Error retrieving BIOS Table Address!");
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
index 8df1e84f..fe10aa4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
@@ -230,6 +230,8 @@
 			struct pp_atomfwctrl_bios_boot_up_values *boot_values);
 int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr,
 			struct pp_atomfwctrl_smc_dpm_parameters *param);
+int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr,
+					BIOS_CLKID id, uint32_t *frequency);
 
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
index c9eecce..f0d48b1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
@@ -141,7 +141,7 @@
 
 	if (!table_address) {
 		table_address = (ATOM_Tonga_POWERPLAYTABLE *)
-				cgs_atom_get_data_table(hwmgr->device,
+				smu_atom_get_data_table(hwmgr->adev,
 						index, &size, &frev, &crev);
 		hwmgr->soft_pp_table = table_address;	/*Cache the result in RAM.*/
 		hwmgr->soft_pp_table_size = size;
@@ -728,6 +728,32 @@
 	return 0;
 }
 
+static int get_gpio_table(struct pp_hwmgr *hwmgr,
+		struct phm_ppt_v1_gpio_table **pp_tonga_gpio_table,
+		const ATOM_Tonga_GPIO_Table *atom_gpio_table)
+{
+	uint32_t table_size;
+	struct phm_ppt_v1_gpio_table *pp_gpio_table;
+	struct phm_ppt_v1_information *pp_table_information =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+
+	table_size = sizeof(struct phm_ppt_v1_gpio_table);
+	pp_gpio_table = kzalloc(table_size, GFP_KERNEL);
+	if (!pp_gpio_table)
+		return -ENOMEM;
+
+	if (pp_table_information->vdd_dep_on_sclk->count <
+			atom_gpio_table->ucVRHotTriggeredSclkDpmIndex)
+		PP_ASSERT_WITH_CODE(false,
+				"SCLK DPM index for VRHot cannot exceed the total sclk level count!",);
+	else
+		pp_gpio_table->vrhot_triggered_sclk_dpm_index =
+				atom_gpio_table->ucVRHotTriggeredSclkDpmIndex;
+
+	*pp_tonga_gpio_table = pp_gpio_table;
+
+	return 0;
+}
 /**
  * Private Function used during initialization.
  * Initialize clock voltage dependency
@@ -761,11 +787,15 @@
 	const PPTable_Generic_SubTable_Header *pcie_table =
 		(const PPTable_Generic_SubTable_Header *)(((unsigned long) powerplay_table) +
 		le16_to_cpu(powerplay_table->usPCIETableOffset));
+	const ATOM_Tonga_GPIO_Table *gpio_table =
+		(const ATOM_Tonga_GPIO_Table *)(((unsigned long) powerplay_table) +
+		le16_to_cpu(powerplay_table->usGPIOTableOffset));
 
 	pp_table_information->vdd_dep_on_sclk = NULL;
 	pp_table_information->vdd_dep_on_mclk = NULL;
 	pp_table_information->mm_dep_table = NULL;
 	pp_table_information->pcie_table = NULL;
+	pp_table_information->gpio_table = NULL;
 
 	if (powerplay_table->usMMDependencyTableOffset != 0)
 		result = get_mm_clock_voltage_table(hwmgr,
@@ -810,6 +840,10 @@
 		result = get_valid_clk(hwmgr, &pp_table_information->valid_sclk_values,
 		pp_table_information->vdd_dep_on_sclk);
 
+	if (!result && gpio_table)
+		result = get_gpio_table(hwmgr, &pp_table_information->gpio_table,
+				gpio_table);
+
 	return result;
 }
 
@@ -1116,6 +1150,9 @@
 	kfree(pp_table_information->pcie_table);
 	pp_table_information->pcie_table = NULL;
 
+	kfree(pp_table_information->gpio_table);
+	pp_table_information->gpio_table = NULL;
+
 	kfree(hwmgr->pptable);
 	hwmgr->pptable = NULL;
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
index 36ca7c4..ce64dfa 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
@@ -837,7 +837,7 @@
 			hwmgr->soft_pp_table = &soft_dummy_pp_table[0];
 			hwmgr->soft_pp_table_size = sizeof(soft_dummy_pp_table);
 		} else {
-			table_addr = cgs_atom_get_data_table(hwmgr->device,
+			table_addr = smu_atom_get_data_table(hwmgr->adev,
 					GetIndexIntoMasterTable(DATA, PowerPlayInfo),
 					&size, &frev, &crev);
 			hwmgr->soft_pp_table = table_addr;
@@ -1058,7 +1058,7 @@
 		return 0;
 
 	/* We assume here that fw_info is unchanged if this call fails.*/
-	fw_info = cgs_atom_get_data_table(hwmgr->device,
+	fw_info = smu_atom_get_data_table(hwmgr->adev,
 			 GetIndexIntoMasterTable(DATA, FirmwareInfo),
 			 &size, &frev, &crev);
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index 10253b8..85f84f4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -34,7 +34,7 @@
 #include "rv_ppsmc.h"
 #include "smu10_hwmgr.h"
 #include "power_state.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
 
 #define SMU10_MAX_DEEPSLEEP_DIVIDER_ID     5
 #define SMU10_MINIMUM_ENGINE_CLOCK         800   /* 8Mhz, the low boundary of engine clock allowed on this chip */
@@ -42,6 +42,13 @@
 #define SMU10_DISPCLK_BYPASS_THRESHOLD     10000 /* 100Mhz */
 #define SMC_RAM_END                     0x40000
 
+#define mmPWR_MISC_CNTL_STATUS					0x0183
+#define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
+#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
+#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
+#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
+#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
+
 static const unsigned long SMU10_Magic = (unsigned long) PHM_Rv_Magic;
 
 
@@ -74,11 +81,15 @@
 	smu10_data->thermal_auto_throttling_treshold = 0;
 	smu10_data->is_nb_dpm_enabled = 1;
 	smu10_data->dpm_flags = 1;
-	smu10_data->gfx_off_controled_by_driver = false;
 	smu10_data->need_min_deep_sleep_dcefclk = true;
 	smu10_data->num_active_display = 0;
 	smu10_data->deep_sleep_dcefclk = 0;
 
+	if (hwmgr->feature_mask & PP_GFXOFF_MASK)
+		smu10_data->gfx_off_controled_by_driver = true;
+	else
+		smu10_data->gfx_off_controled_by_driver = false;
+
 	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_SclkDeepSleep);
 
@@ -161,7 +172,7 @@
 	struct PP_Clocks clocks = {0};
 	struct pp_display_clock_request clock_req;
 
-	clocks.dcefClock = hwmgr->display_config.min_dcef_set_clk;
+	clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk;
 	clock_req.clock_type = amd_pp_dcf_clock;
 	clock_req.clock_freq_in_khz = clocks.dcefClock * 10;
 
@@ -206,12 +217,18 @@
 static int smu10_init_power_gate_state(struct pp_hwmgr *hwmgr)
 {
 	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	smu10_data->vcn_power_gated = true;
 	smu10_data->isp_tileA_power_gated = true;
 	smu10_data->isp_tileB_power_gated = true;
 
-	return 0;
+	if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
+		return smum_send_msg_to_smc_with_parameter(hwmgr,
+							   PPSMC_MSG_SetGfxCGPG,
+							   true);
+	else
+		return 0;
 }
 
 
@@ -237,13 +254,31 @@
 	return smu10_reset_cc6_data(hwmgr);
 }
 
+static bool smu10_is_gfx_on(struct pp_hwmgr *hwmgr)
+{
+	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
+
+	reg = RREG32_SOC15(PWR, 0, mmPWR_MISC_CNTL_STATUS);
+	if ((reg & PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK) ==
+	    (0x2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT))
+		return true;
+
+	return false;
+}
+
 static int smu10_disable_gfx_off(struct pp_hwmgr *hwmgr)
 {
 	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
 
-	if (smu10_data->gfx_off_controled_by_driver)
+	if (smu10_data->gfx_off_controled_by_driver) {
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableGfxOff);
 
+		/* confirm gfx is back to "on" state */
+		while (!smu10_is_gfx_on(hwmgr))
+			msleep(1);
+	}
+
 	return 0;
 }
 
@@ -267,6 +302,14 @@
 	return smu10_enable_gfx_off(hwmgr);
 }
 
+static int smu10_gfx_off_control(struct pp_hwmgr *hwmgr, bool enable)
+{
+	if (enable)
+		return smu10_enable_gfx_off(hwmgr);
+	else
+		return smu10_disable_gfx_off(hwmgr);
+}
+
 static int smu10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 				struct pp_power_state  *prequest_ps,
 			const struct pp_power_state *pcurrent_ps)
@@ -340,7 +383,7 @@
 
 static int smu10_populate_clock_table(struct pp_hwmgr *hwmgr)
 {
-	int result;
+	uint32_t result;
 
 	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
 	DpmClocks_t  *table = &(smu10_data->clock_table);
@@ -386,11 +429,11 @@
 
 	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency);
 	result = smum_get_argument(hwmgr);
-	smu10_data->gfx_min_freq_limit = result * 100;
+	smu10_data->gfx_min_freq_limit = result / 10 * 1000;
 
 	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency);
 	result = smum_get_argument(hwmgr);
-	smu10_data->gfx_max_freq_limit = result * 100;
+	smu10_data->gfx_max_freq_limit = result / 10 * 1000;
 
 	return 0;
 }
@@ -436,8 +479,8 @@
 
 	hwmgr->platform_descriptor.minimumClocksReductionPercentage = 50;
 
-	hwmgr->pstate_sclk = SMU10_UMD_PSTATE_GFXCLK;
-	hwmgr->pstate_mclk = SMU10_UMD_PSTATE_FCLK;
+	hwmgr->pstate_sclk = SMU10_UMD_PSTATE_GFXCLK * 100;
+	hwmgr->pstate_mclk = SMU10_UMD_PSTATE_FCLK * 100;
 
 	return result;
 }
@@ -472,6 +515,8 @@
 static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 				enum amd_dpm_forced_level level)
 {
+	struct smu10_hwmgr *data = hwmgr->backend;
+
 	if (hwmgr->smu_version < 0x1E3700) {
 		pr_info("smu firmware version too old, can not set dpm level\n");
 		return 0;
@@ -482,7 +527,7 @@
 	case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
-						SMU10_UMD_PSTATE_PEAK_GFXCLK);
+						data->gfx_max_freq_limit/100);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinFclkByFreq,
 						SMU10_UMD_PSTATE_PEAK_FCLK);
@@ -495,7 +540,7 @@
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxGfxClk,
-						SMU10_UMD_PSTATE_PEAK_GFXCLK);
+						data->gfx_max_freq_limit/100);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxFclkByFreq,
 						SMU10_UMD_PSTATE_PEAK_FCLK);
@@ -509,10 +554,10 @@
 	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
-						SMU10_UMD_PSTATE_MIN_GFXCLK);
+						data->gfx_min_freq_limit/100);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxGfxClk,
-						SMU10_UMD_PSTATE_MIN_GFXCLK);
+						data->gfx_min_freq_limit/100);
 		break;
 	case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
 		smum_send_msg_to_smc_with_parameter(hwmgr,
@@ -552,10 +597,13 @@
 	case AMD_DPM_FORCED_LEVEL_AUTO:
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
-						SMU10_UMD_PSTATE_MIN_GFXCLK);
+						data->gfx_min_freq_limit/100);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinFclkByFreq,
+						hwmgr->display_config->num_display > 3 ?
+						SMU10_UMD_PSTATE_PEAK_FCLK :
 						SMU10_UMD_PSTATE_MIN_FCLK);
+
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinSocclkByFreq,
 						SMU10_UMD_PSTATE_MIN_SOCCLK);
@@ -565,7 +613,7 @@
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxGfxClk,
-						SMU10_UMD_PSTATE_PEAK_GFXCLK);
+						data->gfx_max_freq_limit/100);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxFclkByFreq,
 						SMU10_UMD_PSTATE_PEAK_FCLK);
@@ -579,10 +627,10 @@
 	case AMD_DPM_FORCED_LEVEL_LOW:
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinGfxClk,
-						SMU10_UMD_PSTATE_MIN_GFXCLK);
+						data->gfx_min_freq_limit/100);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxGfxClk,
-						SMU10_UMD_PSTATE_MIN_GFXCLK);
+						data->gfx_min_freq_limit/100);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinFclkByFreq,
 						SMU10_UMD_PSTATE_MIN_FCLK);
@@ -699,6 +747,16 @@
 static int smu10_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time,
 			bool cc6_disable, bool pstate_disable, bool pstate_switch_disable)
 {
+	struct smu10_hwmgr *data = (struct smu10_hwmgr *)(hwmgr->backend);
+
+	if (separation_time != data->separation_time ||
+			cc6_disable != data->cc6_disable ||
+			pstate_disable != data->pstate_disable) {
+		data->separation_time = separation_time;
+		data->cc6_disable = cc6_disable;
+		data->pstate_disable = pstate_disable;
+		data->cc6_setting_changed = true;
+	}
 	return 0;
 }
 
@@ -711,6 +769,51 @@
 static int smu10_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
+	struct smu10_hwmgr *data = hwmgr->backend;
+	struct smu10_voltage_dependency_table *mclk_table =
+					data->clock_vol_info.vdd_dep_on_fclk;
+	uint32_t low, high;
+
+	low = mask ? (ffs(mask) - 1) : 0;
+	high = mask ? (fls(mask) - 1) : 0;
+
+	switch (type) {
+	case PP_SCLK:
+		if (low > 2 || high > 2) {
+			pr_info("Currently sclk only support 3 levels on RV\n");
+			return -EINVAL;
+		}
+
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinGfxClk,
+						low == 2 ? data->gfx_max_freq_limit/100 :
+						low == 1 ? SMU10_UMD_PSTATE_GFXCLK :
+						data->gfx_min_freq_limit/100);
+
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxGfxClk,
+						high == 0 ? data->gfx_min_freq_limit/100 :
+						high == 1 ? SMU10_UMD_PSTATE_GFXCLK :
+						data->gfx_max_freq_limit/100);
+		break;
+
+	case PP_MCLK:
+		if (low > mclk_table->count - 1 || high > mclk_table->count - 1)
+			return -EINVAL;
+
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetHardMinFclkByFreq,
+						mclk_table->entries[low].clk/100);
+
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_SetSoftMaxFclkByFreq,
+						mclk_table->entries[high].clk/100);
+		break;
+
+	case PP_PCIE:
+	default:
+		break;
+	}
 	return 0;
 }
 
@@ -720,21 +823,30 @@
 	struct smu10_hwmgr *data = (struct smu10_hwmgr *)(hwmgr->backend);
 	struct smu10_voltage_dependency_table *mclk_table =
 			data->clock_vol_info.vdd_dep_on_fclk;
-	int i, now, size = 0;
+	uint32_t i, now, size = 0;
 
 	switch (type) {
 	case PP_SCLK:
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetGfxclkFrequency);
 		now = smum_get_argument(hwmgr);
 
+	/* driver only know min/max gfx_clk, Add level 1 for all other gfx clks */
+		if (now == data->gfx_max_freq_limit/100)
+			i = 2;
+		else if (now == data->gfx_min_freq_limit/100)
+			i = 0;
+		else
+			i = 1;
+
 		size += sprintf(buf + size, "0: %uMhz %s\n",
-				data->gfx_min_freq_limit / 100,
-				((data->gfx_min_freq_limit / 100)
-				 == now) ? "*" : "");
+					data->gfx_min_freq_limit/100,
+					i == 0 ? "*" : "");
 		size += sprintf(buf + size, "1: %uMhz %s\n",
-				data->gfx_max_freq_limit / 100,
-				((data->gfx_max_freq_limit / 100)
-				 == now) ? "*" : "");
+					i == 1 ? now : SMU10_UMD_PSTATE_GFXCLK,
+					i == 1 ? "*" : "");
+		size += sprintf(buf + size, "2: %uMhz %s\n",
+					data->gfx_max_freq_limit/100,
+					i == 2 ? "*" : "");
 		break;
 	case PP_MCLK:
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency);
@@ -947,9 +1059,8 @@
 
 static int smu10_thermal_get_temperature(struct pp_hwmgr *hwmgr)
 {
-	uint32_t reg_offset = soc15_get_register_offset(THM_HWID, 0,
-			mmTHM_TCON_CUR_TMP_BASE_IDX, mmTHM_TCON_CUR_TMP);
-	uint32_t reg_value = cgs_read_register(hwmgr->device, reg_offset);
+	struct amdgpu_device *adev = hwmgr->adev;
+	uint32_t reg_value = RREG32_SOC15(THM, 0, mmTHM_TCON_CUR_TMP);
 	int cur_temp =
 		(reg_value & THM_TCON_CUR_TMP__CUR_TEMP_MASK) >> THM_TCON_CUR_TMP__CUR_TEMP__SHIFT;
 
@@ -993,11 +1104,47 @@
 	return ret;
 }
 
+static int smu10_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr,
+		struct pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges)
+{
+	struct smu10_hwmgr *data = hwmgr->backend;
+	Watermarks_t *table = &(data->water_marks_table);
+	int result = 0;
+
+	smu_set_watermarks_for_clocks_ranges(table,wm_with_clock_ranges);
+	smum_smc_table_manager(hwmgr, (uint8_t *)table, (uint16_t)SMU10_WMTABLE, false);
+	data->water_marks_exist = true;
+	return result;
+}
+
+static int smu10_smus_notify_pwe(struct pp_hwmgr *hwmgr)
+{
+
+	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_SetRccPfcPmeRestoreRegister);
+}
+
 static int smu10_set_mmhub_powergating_by_smu(struct pp_hwmgr *hwmgr)
 {
 	return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PowerGateMmHub);
 }
 
+static void smu10_powergate_vcn(struct pp_hwmgr *hwmgr, bool bgate)
+{
+	if (bgate) {
+		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
+						AMD_IP_BLOCK_TYPE_VCN,
+						AMD_PG_STATE_GATE);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+					PPSMC_MSG_PowerDownVcn, 0);
+	} else {
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+						PPSMC_MSG_PowerUpVcn, 0);
+		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
+						AMD_IP_BLOCK_TYPE_VCN,
+						AMD_PG_STATE_UNGATE);
+	}
+}
+
 static const struct pp_hwmgr_func smu10_hwmgr_funcs = {
 	.backend_init = smu10_hwmgr_backend_init,
 	.backend_fini = smu10_hwmgr_backend_fini,
@@ -1006,7 +1153,7 @@
 	.force_dpm_level = smu10_dpm_force_dpm_level,
 	.get_power_state_size = smu10_get_power_state_size,
 	.powerdown_uvd = NULL,
-	.powergate_uvd = NULL,
+	.powergate_uvd = smu10_powergate_vcn,
 	.powergate_vce = NULL,
 	.get_mclk = smu10_dpm_get_mclk,
 	.get_sclk = smu10_dpm_get_sclk,
@@ -1022,6 +1169,7 @@
 	.get_current_shallow_sleep_clocks = smu10_get_current_shallow_sleep_clocks,
 	.get_clock_by_type_with_latency = smu10_get_clock_by_type_with_latency,
 	.get_clock_by_type_with_voltage = smu10_get_clock_by_type_with_voltage,
+	.set_watermarks_for_clocks_ranges = smu10_set_watermarks_for_clocks_ranges,
 	.get_max_high_clocks = smu10_get_max_high_clocks,
 	.read_sensor = smu10_read_sensor,
 	.set_active_display_count = smu10_set_active_display_count,
@@ -1032,6 +1180,8 @@
 	.power_state_set = smu10_set_power_state_tasks,
 	.dynamic_state_management_disable = smu10_disable_dpm_tasks,
 	.set_mmhub_powergating_by_smu = smu10_set_mmhub_powergating_by_smu,
+	.smus_notify_pwe = smu10_smus_notify_pwe,
+	.gfx_off_control = smu10_gfx_off_control,
 };
 
 int smu10_init_function_pointers(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h
index 175c3a59..1fb296a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.h
@@ -290,6 +290,7 @@
 	bool                           vcn_dpg_mode;
 
 	bool                           gfx_off_controled_by_driver;
+	bool                           water_marks_exist;
 	Watermarks_t                      water_marks_table;
 	struct smu10_clock_voltage_information   clock_vol_info;
 	DpmClocks_t                       clock_table;
@@ -310,11 +311,9 @@
 #define SMU10_UMD_PSTATE_FCLK                   933
 #define SMU10_UMD_PSTATE_VCE                    0x03C00320
 
-#define SMU10_UMD_PSTATE_PEAK_GFXCLK            1100
 #define SMU10_UMD_PSTATE_PEAK_SOCCLK            757
 #define SMU10_UMD_PSTATE_PEAK_FCLK              1200
 
-#define SMU10_UMD_PSTATE_MIN_GFXCLK             200
 #define SMU10_UMD_PSTATE_MIN_FCLK               400
 #define SMU10_UMD_PSTATE_MIN_SOCCLK             200
 #define SMU10_UMD_PSTATE_MIN_VCE                0x0190012C
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
index f4cbaee..6d72a56 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
@@ -147,20 +147,20 @@
 	data->uvd_power_gated = bgate;
 
 	if (bgate) {
-		cgs_set_powergating_state(hwmgr->device,
+		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
 						AMD_IP_BLOCK_TYPE_UVD,
 						AMD_PG_STATE_GATE);
-		cgs_set_clockgating_state(hwmgr->device,
+		amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 				AMD_IP_BLOCK_TYPE_UVD,
 				AMD_CG_STATE_GATE);
 		smu7_update_uvd_dpm(hwmgr, true);
 		smu7_powerdown_uvd(hwmgr);
 	} else {
 		smu7_powerup_uvd(hwmgr);
-		cgs_set_clockgating_state(hwmgr->device,
+		amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 				AMD_IP_BLOCK_TYPE_UVD,
 				AMD_CG_STATE_UNGATE);
-		cgs_set_powergating_state(hwmgr->device,
+		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
 						AMD_IP_BLOCK_TYPE_UVD,
 						AMD_PG_STATE_UNGATE);
 		smu7_update_uvd_dpm(hwmgr, false);
@@ -175,20 +175,20 @@
 	data->vce_power_gated = bgate;
 
 	if (bgate) {
-		cgs_set_powergating_state(hwmgr->device,
+		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
 						AMD_IP_BLOCK_TYPE_VCE,
 						AMD_PG_STATE_GATE);
-		cgs_set_clockgating_state(hwmgr->device,
+		amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 				AMD_IP_BLOCK_TYPE_VCE,
 				AMD_CG_STATE_GATE);
 		smu7_update_vce_dpm(hwmgr, true);
 		smu7_powerdown_vce(hwmgr);
 	} else {
 		smu7_powerup_vce(hwmgr);
-		cgs_set_clockgating_state(hwmgr->device,
+		amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 				AMD_IP_BLOCK_TYPE_VCE,
 				AMD_CG_STATE_UNGATE);
-		cgs_set_powergating_state(hwmgr->device,
+		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
 						AMD_IP_BLOCK_TYPE_VCE,
 						AMD_PG_STATE_UNGATE);
 		smu7_update_vce_dpm(hwmgr, false);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 18b5b2f..45e9b8c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -61,10 +61,6 @@
 #define SMC_CG_IND_START            0xc0030000
 #define SMC_CG_IND_END              0xc0040000
 
-#define VOLTAGE_SCALE               4
-#define VOLTAGE_VID_OFFSET_SCALE1   625
-#define VOLTAGE_VID_OFFSET_SCALE2   100
-
 #define MEM_FREQ_LOW_LATENCY        25000
 #define MEM_FREQ_HIGH_LATENCY       80000
 
@@ -88,6 +84,14 @@
 					 {0, 0, 0, 0, 0, 0, 0, 0},
 					};
 
+#define PPSMC_MSG_SetVBITimeout_VEGAM    ((uint16_t) 0x310)
+
+#define ixPWR_SVI2_PLANE1_LOAD                     0xC0200280
+#define PWR_SVI2_PLANE1_LOAD__PSI1_MASK                    0x00000020L
+#define PWR_SVI2_PLANE1_LOAD__PSI0_EN_MASK                 0x00000040L
+#define PWR_SVI2_PLANE1_LOAD__PSI1__SHIFT                  0x00000005
+#define PWR_SVI2_PLANE1_LOAD__PSI0_EN__SHIFT               0x00000006
+
 /** Values for the CG_THERMAL_CTRL::DPM_EVENT_SRC field. */
 enum DPM_EVENT_SRC {
 	DPM_EVENT_SRC_ANALOG = 0,
@@ -169,6 +173,13 @@
 */
 static int smu7_enable_smc_voltage_controller(struct pp_hwmgr *hwmgr)
 {
+	if (hwmgr->chip_id == CHIP_VEGAM) {
+		PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device,
+				CGS_IND_REG__SMC, PWR_SVI2_PLANE1_LOAD, PSI1, 0);
+		PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device,
+				CGS_IND_REG__SMC, PWR_SVI2_PLANE1_LOAD, PSI0_EN, 0);
+	}
+
 	if (hwmgr->feature_mask & PP_SMC_VOLTAGE_CONTROL_MASK)
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_Voltage_Cntl_Enable);
 
@@ -798,32 +809,6 @@
 	return 0;
 }
 
-static int smu7_get_voltage_dependency_table(
-			const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table,
-			struct phm_ppt_v1_clock_voltage_dependency_table *dep_table)
-{
-	uint8_t i = 0;
-	PP_ASSERT_WITH_CODE((0 != allowed_dep_table->count),
-				"Voltage Lookup Table empty",
-				return -EINVAL);
-
-	dep_table->count = allowed_dep_table->count;
-	for (i=0; i<dep_table->count; i++) {
-		dep_table->entries[i].clk = allowed_dep_table->entries[i].clk;
-		dep_table->entries[i].vddInd = allowed_dep_table->entries[i].vddInd;
-		dep_table->entries[i].vdd_offset = allowed_dep_table->entries[i].vdd_offset;
-		dep_table->entries[i].vddc = allowed_dep_table->entries[i].vddc;
-		dep_table->entries[i].vddgfx = allowed_dep_table->entries[i].vddgfx;
-		dep_table->entries[i].vddci = allowed_dep_table->entries[i].vddci;
-		dep_table->entries[i].mvdd = allowed_dep_table->entries[i].mvdd;
-		dep_table->entries[i].phases = allowed_dep_table->entries[i].phases;
-		dep_table->entries[i].cks_enable = allowed_dep_table->entries[i].cks_enable;
-		dep_table->entries[i].cks_voffset = allowed_dep_table->entries[i].cks_voffset;
-	}
-
-	return 0;
-}
-
 static int smu7_odn_initial_default_setting(struct pp_hwmgr *hwmgr)
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -851,7 +836,7 @@
 		entries[i].vddc = dep_sclk_table->entries[i].vddc;
 	}
 
-	smu7_get_voltage_dependency_table(dep_sclk_table,
+	smu_get_voltage_dependency_table_ppt_v1(dep_sclk_table,
 		(struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk));
 
 	odn_table->odn_memory_clock_dpm_levels.num_of_pl =
@@ -863,12 +848,40 @@
 		entries[i].vddc = dep_mclk_table->entries[i].vddc;
 	}
 
-	smu7_get_voltage_dependency_table(dep_mclk_table,
+	smu_get_voltage_dependency_table_ppt_v1(dep_mclk_table,
 		(struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_mclk));
 
 	return 0;
 }
 
+static void smu7_setup_voltage_range_from_vbios(struct pp_hwmgr *hwmgr)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table;
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	uint32_t min_vddc = 0;
+	uint32_t max_vddc = 0;
+
+	if (!table_info)
+		return;
+
+	dep_sclk_table = table_info->vdd_dep_on_sclk;
+
+	atomctrl_get_voltage_range(hwmgr, &max_vddc, &min_vddc);
+
+	if (min_vddc == 0 || min_vddc > 2000
+		|| min_vddc > dep_sclk_table->entries[0].vddc)
+		min_vddc = dep_sclk_table->entries[0].vddc;
+
+	if (max_vddc == 0 || max_vddc > 2000
+		|| max_vddc < dep_sclk_table->entries[dep_sclk_table->count-1].vddc)
+		max_vddc = dep_sclk_table->entries[dep_sclk_table->count-1].vddc;
+
+	data->odn_dpm_table.min_vddc = min_vddc;
+	data->odn_dpm_table.max_vddc = max_vddc;
+}
+
 static int smu7_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -887,8 +900,10 @@
 			sizeof(struct smu7_dpm_table));
 
 	/* initialize ODN table */
-	if (hwmgr->od_enabled)
+	if (hwmgr->od_enabled) {
+		smu7_setup_voltage_range_from_vbios(hwmgr);
 		smu7_odn_initial_default_setting(hwmgr);
+	}
 
 	return 0;
 }
@@ -966,6 +981,22 @@
 	return 0;
 }
 
+static int smu7_disable_sclk_vce_handshake(struct pp_hwmgr *hwmgr)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	uint32_t soft_register_value = 0;
+	uint32_t handshake_disables_offset = data->soft_regs_start
+				+ smum_get_offsetof(hwmgr,
+					SMU_SoftRegisters, HandshakeDisables);
+
+	soft_register_value = cgs_read_ind_register(hwmgr->device,
+				CGS_IND_REG__SMC, handshake_disables_offset);
+	soft_register_value |= SMU7_VCE_SCLK_HANDSHAKE_DISABLE;
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			handshake_disables_offset, soft_register_value);
+	return 0;
+}
+
 static int smu7_disable_handshake_uvd(struct pp_hwmgr *hwmgr)
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -988,23 +1019,29 @@
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 
 	/* enable SCLK dpm */
-	if (!data->sclk_dpm_key_disabled)
+	if (!data->sclk_dpm_key_disabled) {
+		if (hwmgr->chip_id == CHIP_VEGAM)
+			smu7_disable_sclk_vce_handshake(hwmgr);
+
 		PP_ASSERT_WITH_CODE(
 		(0 == smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DPM_Enable)),
 		"Failed to enable SCLK DPM during DPM Start Function!",
 		return -EINVAL);
+	}
 
 	/* enable MCLK dpm */
 	if (0 == data->mclk_dpm_key_disabled) {
 		if (!(hwmgr->feature_mask & PP_UVD_HANDSHAKE_MASK))
 			smu7_disable_handshake_uvd(hwmgr);
+
 		PP_ASSERT_WITH_CODE(
 				(0 == smum_send_msg_to_smc(hwmgr,
 						PPSMC_MSG_MCLKDPM_Enable)),
 				"Failed to enable MCLK DPM during DPM Start Function!",
 				return -EINVAL);
 
-		PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1);
+		if (hwmgr->chip_family != CHIP_VEGAM)
+			PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1);
 
 
 		if (hwmgr->chip_family == AMDGPU_FAMILY_CI) {
@@ -1020,8 +1057,13 @@
 			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x5);
 			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_CPL_CNTL, 0x100005);
 			udelay(10);
-			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x400005);
-			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x400005);
+			if (hwmgr->chip_id == CHIP_VEGAM) {
+				cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x400009);
+				cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x400009);
+			} else {
+				cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x400005);
+				cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC1_CNTL, 0x400005);
+			}
 			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_CPL_CNTL, 0x500005);
 		}
 	}
@@ -1230,7 +1272,7 @@
 
 		tmp_result = smu7_construct_voltage_tables(hwmgr);
 		PP_ASSERT_WITH_CODE((0 == tmp_result),
-				"Failed to contruct voltage tables!",
+				"Failed to construct voltage tables!",
 				result = tmp_result);
 	}
 	smum_initialize_mc_reg_table(hwmgr);
@@ -1262,10 +1304,12 @@
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to process firmware header!", result = tmp_result);
 
-	tmp_result = smu7_initial_switch_from_arbf0_to_f1(hwmgr);
-	PP_ASSERT_WITH_CODE((0 == tmp_result),
-			"Failed to initialize switch from ArbF0 to F1!",
-			result = tmp_result);
+	if (hwmgr->chip_id != CHIP_VEGAM) {
+		tmp_result = smu7_initial_switch_from_arbf0_to_f1(hwmgr);
+		PP_ASSERT_WITH_CODE((0 == tmp_result),
+				"Failed to initialize switch from ArbF0 to F1!",
+				result = tmp_result);
+	}
 
 	result = smu7_setup_default_dpm_tables(hwmgr);
 	PP_ASSERT_WITH_CODE(0 == result,
@@ -2755,6 +2799,9 @@
 	case CHIP_POLARIS12:
 		switch_limit_us = data->is_memory_gddr5 ? 190 : 150;
 		break;
+	case CHIP_VEGAM:
+		switch_limit_us = 30;
+		break;
 	default:
 		switch_limit_us = data->is_memory_gddr5 ? 450 : 150;
 		break;
@@ -2778,8 +2825,6 @@
 	struct PP_Clocks minimum_clocks = {0};
 	bool disable_mclk_switching;
 	bool disable_mclk_switching_for_frame_lock;
-	struct cgs_display_info info = {0};
-	struct cgs_mode_info mode_info = {0};
 	const struct phm_clock_and_voltage_limits *max_limits;
 	uint32_t i;
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -2788,7 +2833,6 @@
 	int32_t count;
 	int32_t stable_pstate_sclk = 0, stable_pstate_mclk = 0;
 
-	info.mode_info = &mode_info;
 	data->battery_state = (PP_StateUILabel_Battery ==
 			request_ps->classification.ui_label);
 
@@ -2810,10 +2854,8 @@
 		}
 	}
 
-	cgs_get_active_displays_info(hwmgr->device, &info);
-
-	minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock;
-	minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock;
+	minimum_clocks.engineClock = hwmgr->display_config->min_core_set_clock;
+	minimum_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock;
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_StablePState)) {
@@ -2844,12 +2886,12 @@
 				    PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
 
 
-	if (info.display_count == 0)
+	if (hwmgr->display_config->num_display == 0)
 		disable_mclk_switching = false;
 	else
-		disable_mclk_switching = ((1 < info.display_count) ||
+		disable_mclk_switching = ((1 < hwmgr->display_config->num_display) ||
 					  disable_mclk_switching_for_frame_lock ||
-					  smu7_vblank_too_short(hwmgr, mode_info.vblank_time_us));
+					  smu7_vblank_too_short(hwmgr, hwmgr->display_config->min_vblank_time));
 
 	sclk = smu7_ps->performance_levels[0].engine_clock;
 	mclk = smu7_ps->performance_levels[0].memory_clock;
@@ -2958,8 +3000,7 @@
 	/* First retrieve the Boot clocks and VDDC from the firmware info table.
 	 * We assume here that fw_info is unchanged if this call fails.
 	 */
-	fw_info = (ATOM_FIRMWARE_INFO_V2_2 *)cgs_atom_get_data_table(
-			hwmgr->device, index,
+	fw_info = (ATOM_FIRMWARE_INFO_V2_2 *)smu_atom_get_data_table(hwmgr->adev, index,
 			&size, &frev, &crev);
 	if (!fw_info)
 		/* During a test, there is no firmware info table. */
@@ -3367,34 +3408,35 @@
 	return 0;
 }
 
-static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr,
-		struct pp_gpu_power *query)
+static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, u32 *query)
 {
-	PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr,
-			PPSMC_MSG_PmStatusLogStart),
-			"Failed to start pm status log!",
-			return -1);
+	int i;
+	u32 tmp = 0;
 
-	/* Sampling period from 50ms to 4sec */
-	msleep_interruptible(200);
+	if (!query)
+		return -EINVAL;
 
-	PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr,
-			PPSMC_MSG_PmStatusLogSample),
-			"Failed to sample pm status log!",
-			return -1);
+	smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0);
+	tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+	*query = tmp;
 
-	query->vddc_power = cgs_read_ind_register(hwmgr->device,
-			CGS_IND_REG__SMC,
-			ixSMU_PM_STATUS_40);
-	query->vddci_power = cgs_read_ind_register(hwmgr->device,
-			CGS_IND_REG__SMC,
-			ixSMU_PM_STATUS_49);
-	query->max_gpu_power = cgs_read_ind_register(hwmgr->device,
-			CGS_IND_REG__SMC,
-			ixSMU_PM_STATUS_94);
-	query->average_gpu_power = cgs_read_ind_register(hwmgr->device,
-			CGS_IND_REG__SMC,
-			ixSMU_PM_STATUS_95);
+	if (tmp != 0)
+		return 0;
+
+	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+							ixSMU_PM_STATUS_94, 0);
+
+	for (i = 0; i < 10; i++) {
+		mdelay(1);
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogSample);
+		tmp = cgs_read_ind_register(hwmgr->device,
+						CGS_IND_REG__SMC,
+						ixSMU_PM_STATUS_94);
+		if (tmp != 0)
+			break;
+	}
+	*query = tmp;
 
 	return 0;
 }
@@ -3447,10 +3489,7 @@
 		*size = 4;
 		return 0;
 	case AMDGPU_PP_SENSOR_GPU_POWER:
-		if (*size < sizeof(struct pp_gpu_power))
-			return -EINVAL;
-		*size = sizeof(struct pp_gpu_power);
-		return smu7_get_gpu_power(hwmgr, (struct pp_gpu_power *)value);
+		return smu7_get_gpu_power(hwmgr, (uint32_t *)value);
 	case AMDGPU_PP_SENSOR_VDDGFX:
 		if ((data->vr_config & 0xff) == 0x2)
 			val_vid = PHM_READ_INDIRECT_FIELD(hwmgr->device,
@@ -3481,7 +3520,6 @@
 			[smu7_ps->performance_level_count - 1].memory_clock;
 	struct PP_Clocks min_clocks = {0};
 	uint32_t i;
-	struct cgs_display_info info = {0};
 
 	for (i = 0; i < sclk_table->count; i++) {
 		if (sclk == sclk_table->dpm_levels[i].value)
@@ -3508,9 +3546,8 @@
 	if (i >= mclk_table->count)
 		data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
 
-	cgs_get_active_displays_info(hwmgr->device, &info);
 
-	if (data->display_timing.num_existing_displays != info.display_count)
+	if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
 		data->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK;
 
 	return 0;
@@ -3813,9 +3850,14 @@
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 
-	if (hwmgr->feature_mask & PP_VBI_TIME_SUPPORT_MASK)
-		smum_send_msg_to_smc_with_parameter(hwmgr,
-			(PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2);
+	if (hwmgr->feature_mask & PP_VBI_TIME_SUPPORT_MASK) {
+		if (hwmgr->chip_id == CHIP_VEGAM)
+			smum_send_msg_to_smc_with_parameter(hwmgr,
+					(PPSMC_Msg)PPSMC_MSG_SetVBITimeout_VEGAM, data->frame_time_x2);
+		else
+			smum_send_msg_to_smc_with_parameter(hwmgr,
+					(PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2);
+	}
 	return (smum_send_msg_to_smc(hwmgr, (PPSMC_Msg)PPSMC_HasDisplay) == 0) ?  0 : -EINVAL;
 }
 
@@ -3909,15 +3951,8 @@
 static int
 smu7_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwmgr)
 {
-	uint32_t num_active_displays = 0;
-	struct cgs_display_info info = {0};
-
-	info.mode_info = NULL;
-	cgs_get_active_displays_info(hwmgr->device, &info);
-
-	num_active_displays = info.display_count;
-
-	if (num_active_displays > 1 && hwmgr->display_config.multi_monitor_in_sync != true)
+	if (hwmgr->display_config->num_display > 1 &&
+			!hwmgr->display_config->multi_monitor_in_sync)
 		smu7_notify_smc_display_change(hwmgr, false);
 
 	return 0;
@@ -3932,33 +3967,24 @@
 static int smu7_program_display_gap(struct pp_hwmgr *hwmgr)
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
-	uint32_t num_active_displays = 0;
 	uint32_t display_gap = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL);
 	uint32_t display_gap2;
 	uint32_t pre_vbi_time_in_us;
 	uint32_t frame_time_in_us;
-	uint32_t ref_clock;
-	uint32_t refresh_rate = 0;
-	struct cgs_display_info info = {0};
-	struct cgs_mode_info mode_info = {0};
+	uint32_t ref_clock, refresh_rate;
 
-	info.mode_info = &mode_info;
-	cgs_get_active_displays_info(hwmgr->device, &info);
-	num_active_displays = info.display_count;
-
-	display_gap = PHM_SET_FIELD(display_gap, CG_DISPLAY_GAP_CNTL, DISP_GAP, (num_active_displays > 0) ? DISPLAY_GAP_VBLANK_OR_WM : DISPLAY_GAP_IGNORE);
+	display_gap = PHM_SET_FIELD(display_gap, CG_DISPLAY_GAP_CNTL, DISP_GAP, (hwmgr->display_config->num_display > 0) ? DISPLAY_GAP_VBLANK_OR_WM : DISPLAY_GAP_IGNORE);
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL, display_gap);
 
 	ref_clock =  amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev);
-
-	refresh_rate = mode_info.refresh_rate;
+	refresh_rate = hwmgr->display_config->vrefresh;
 
 	if (0 == refresh_rate)
 		refresh_rate = 60;
 
 	frame_time_in_us = 1000000 / refresh_rate;
 
-	pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us;
+	pre_vbi_time_in_us = frame_time_in_us - 200 - hwmgr->display_config->min_vblank_time;
 
 	data->frame_time_x2 = frame_time_in_us * 2 / 100;
 
@@ -4038,17 +4064,14 @@
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 	bool is_update_required = false;
-	struct cgs_display_info info = {0, 0, NULL};
 
-	cgs_get_active_displays_info(hwmgr->device, &info);
-
-	if (data->display_timing.num_existing_displays != info.display_count)
+	if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
 		is_update_required = true;
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep)) {
-		if (data->display_timing.min_clock_in_sr != hwmgr->display_config.min_core_set_clock_in_sr &&
+		if (data->display_timing.min_clock_in_sr != hwmgr->display_config->min_core_set_clock_in_sr &&
 			(data->display_timing.min_clock_in_sr >= SMU7_MINIMUM_ENGINE_CLOCK ||
-			hwmgr->display_config.min_core_set_clock_in_sr >= SMU7_MINIMUM_ENGINE_CLOCK))
+			hwmgr->display_config->min_core_set_clock_in_sr >= SMU7_MINIMUM_ENGINE_CLOCK))
 			is_update_required = true;
 	}
 	return is_update_required;
@@ -4103,7 +4126,7 @@
 	return 0;
 }
 
-static int smu7_upload_mc_firmware(struct pp_hwmgr *hwmgr)
+static int smu7_check_mc_firmware(struct pp_hwmgr *hwmgr)
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 
@@ -4182,13 +4205,9 @@
 static int smu7_get_memory_type(struct pp_hwmgr *hwmgr)
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
-	uint32_t temp;
+	struct amdgpu_device *adev = hwmgr->adev;
 
-	temp = cgs_read_register(hwmgr->device, mmMC_SEQ_MISC0);
-
-	data->is_memory_gddr5 = (MC_SEQ_MISC0_GDDR5_VALUE ==
-			((temp & MC_SEQ_MISC0_GDDR5_MASK) >>
-			 MC_SEQ_MISC0_GDDR5_SHIFT));
+	data->is_memory_gddr5 = (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5);
 
 	return 0;
 }
@@ -4236,7 +4255,7 @@
 {
 	int tmp_result, result = 0;
 
-	smu7_upload_mc_firmware(hwmgr);
+	smu7_check_mc_firmware(hwmgr);
 
 	tmp_result = smu7_read_clock_registers(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
@@ -4371,22 +4390,36 @@
 		break;
 	case OD_SCLK:
 		if (hwmgr->od_enabled) {
-			size = sprintf(buf, "%s: \n", "OD_SCLK");
+			size = sprintf(buf, "%s:\n", "OD_SCLK");
 			for (i = 0; i < odn_sclk_table->num_of_pl; i++)
-				size += sprintf(buf + size, "%d: %10uMhz %10u mV\n",
-					i, odn_sclk_table->entries[i].clock / 100,
+				size += sprintf(buf + size, "%d: %10uMHz %10umV\n",
+					i, odn_sclk_table->entries[i].clock/100,
 					odn_sclk_table->entries[i].vddc);
 		}
 		break;
 	case OD_MCLK:
 		if (hwmgr->od_enabled) {
-			size = sprintf(buf, "%s: \n", "OD_MCLK");
+			size = sprintf(buf, "%s:\n", "OD_MCLK");
 			for (i = 0; i < odn_mclk_table->num_of_pl; i++)
-				size += sprintf(buf + size, "%d: %10uMhz %10u mV\n",
-					i, odn_mclk_table->entries[i].clock / 100,
+				size += sprintf(buf + size, "%d: %10uMHz %10umV\n",
+					i, odn_mclk_table->entries[i].clock/100,
 					odn_mclk_table->entries[i].vddc);
 		}
 		break;
+	case OD_RANGE:
+		if (hwmgr->od_enabled) {
+			size = sprintf(buf, "%s:\n", "OD_RANGE");
+			size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
+				data->golden_dpm_table.sclk_table.dpm_levels[0].value/100,
+				hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
+			size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n",
+				data->golden_dpm_table.mclk_table.dpm_levels[0].value/100,
+				hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
+			size += sprintf(buf + size, "VDDC: %7umV %11umV\n",
+				data->odn_dpm_table.min_vddc,
+				data->odn_dpm_table.max_vddc);
+		}
+		break;
 	default:
 		break;
 	}
@@ -4670,36 +4703,27 @@
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 
-	struct phm_ppt_v1_information *table_info =
-			(struct phm_ppt_v1_information *)(hwmgr->pptable);
-	uint32_t min_vddc;
-	struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table;
-
-	if (table_info == NULL)
-		return false;
-
-	dep_sclk_table = table_info->vdd_dep_on_sclk;
-	min_vddc = dep_sclk_table->entries[0].vddc;
-
-	if (voltage < min_vddc || voltage > 2000) {
-		pr_info("OD voltage is out of range [%d - 2000] mV\n", min_vddc);
+	if (voltage < data->odn_dpm_table.min_vddc || voltage > data->odn_dpm_table.max_vddc) {
+		pr_info("OD voltage is out of range [%d - %d] mV\n",
+						data->odn_dpm_table.min_vddc,
+						data->odn_dpm_table.max_vddc);
 		return false;
 	}
 
 	if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) {
-		if (data->vbios_boot_state.sclk_bootup_value > clk ||
+		if (data->golden_dpm_table.sclk_table.dpm_levels[0].value > clk ||
 			hwmgr->platform_descriptor.overdriveLimit.engineClock < clk) {
 			pr_info("OD engine clock is out of range [%d - %d] MHz\n",
-				data->vbios_boot_state.sclk_bootup_value,
-				hwmgr->platform_descriptor.overdriveLimit.engineClock / 100);
+				data->golden_dpm_table.sclk_table.dpm_levels[0].value/100,
+				hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
 			return false;
 		}
 	} else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) {
-		if (data->vbios_boot_state.mclk_bootup_value > clk ||
+		if (data->golden_dpm_table.mclk_table.dpm_levels[0].value > clk ||
 			hwmgr->platform_descriptor.overdriveLimit.memoryClock < clk) {
 			pr_info("OD memory clock is out of range [%d - %d] MHz\n",
-				data->vbios_boot_state.mclk_bootup_value/100,
-				hwmgr->platform_descriptor.overdriveLimit.memoryClock / 100);
+				data->golden_dpm_table.mclk_table.dpm_levels[0].value/100,
+				hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
 			return false;
 		}
 	} else {
@@ -4748,10 +4772,6 @@
 			return;
 		}
 	}
-	if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
-		data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC;
-		data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
-	}
 
 	dep_table = table_info->vdd_dep_on_sclk;
 	odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk);
@@ -4761,9 +4781,9 @@
 			return;
 		}
 	}
-	if (i == dep_table->count && data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
+	if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
 		data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC;
-		data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+		data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK;
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
index b8d0bb3..c91e75d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
@@ -184,6 +184,8 @@
 	struct smu7_odn_clock_voltage_dependency_table	vdd_dependency_on_sclk;
 	struct smu7_odn_clock_voltage_dependency_table	vdd_dependency_on_mclk;
 	uint32_t					odn_mclk_min_limit;
+	uint32_t min_vddc;
+	uint32_t max_vddc;
 };
 
 struct profile_mode_setting {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
index d9e92e3..c952845 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
@@ -623,6 +623,190 @@
 	{   0xFFFFFFFF  }  /* End of list */
 };
 
+static const struct gpu_pt_config_reg GCCACConfig_VegaM[] =
+{
+// ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+//      Offset                             Mask                                                Shift                                               Value       Type
+// ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+    // DIDT_SQ
+    //
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00060013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00860013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01060013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01860013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02060013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02860013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x03060013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x03860013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x04060013, GPU_CONFIGREG_GC_CAC_IND },
+
+    // DIDT_TD
+    //
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x000E0013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x008E0013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x010E0013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x018E0013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x020E0013, GPU_CONFIGREG_GC_CAC_IND },
+
+    // DIDT_TCP
+    //
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00100013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00900013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01100013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01900013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02100013, GPU_CONFIGREG_GC_CAC_IND },
+    {   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02900013, GPU_CONFIGREG_GC_CAC_IND },
+
+    {   0xFFFFFFFF  }  // End of list
+};
+
+static const struct gpu_pt_config_reg DIDTConfig_VegaM[] =
+{
+// ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+//      Offset                             Mask                                                Shift                                               Value       Type
+// ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+    // DIDT_SQ
+    //
+    {   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT0_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT0__SHIFT,                  0x0073,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT1_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT1__SHIFT,                  0x00ab,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT2_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT2__SHIFT,                  0x0084,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT3_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT3__SHIFT,                  0x005a,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT4_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT4__SHIFT,                  0x0067,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT5_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT5__SHIFT,                  0x0084,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT6_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT6__SHIFT,                  0x0027,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT7_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT7__SHIFT,                  0x0046,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT8_MASK,                   DIDT_SQ_WEIGHT8_11__WEIGHT8__SHIFT,                 0x00aa,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT9_MASK,                   DIDT_SQ_WEIGHT8_11__WEIGHT9__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT10_MASK,                  DIDT_SQ_WEIGHT8_11__WEIGHT10__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT11_MASK,                  DIDT_SQ_WEIGHT8_11__WEIGHT11__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MIN_POWER_MASK,                      DIDT_SQ_CTRL1__MIN_POWER__SHIFT,                    0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MAX_POWER_MASK,                      DIDT_SQ_CTRL1__MAX_POWER__SHIFT,                    0xffff,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_SQ_CTRL_OCP,                DIDT_SQ_CTRL_OCP__UNUSED_0_MASK,                    DIDT_SQ_CTRL_OCP__UNUSED_0__SHIFT,                  0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL_OCP,                DIDT_SQ_CTRL_OCP__OCP_MAX_POWER_MASK,               DIDT_SQ_CTRL_OCP__OCP_MAX_POWER__SHIFT,             0xffff,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT,              0x3853,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_0_MASK,                       DIDT_SQ_CTRL2__UNUSED_0__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,     0x005a,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_1_MASK,                       DIDT_SQ_CTRL2__UNUSED_1__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,     0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_2_MASK,                       DIDT_SQ_CTRL2__UNUSED_2__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,    DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT,  0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,       DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,       DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x0ebb,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__UNUSED_0_MASK,                  DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,       DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,     0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,     0x3853,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,     0x3153,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK,                 DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT,               0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_SQ_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__PHASE_OFFSET_MASK,                   DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0010,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0010,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__UNUSED_0_MASK,                       DIDT_SQ_CTRL0__UNUSED_0__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    // DIDT_TD
+    //
+    {   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT0_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT0__SHIFT,                  0x000a,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT1_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT1__SHIFT,                  0x0010,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT2_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT2__SHIFT,                  0x0017,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT3_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT3__SHIFT,                  0x002f,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT4_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT4__SHIFT,                  0x0046,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT5_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT5__SHIFT,                  0x005d,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT6_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT6__SHIFT,                  0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT7_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT7__SHIFT,                  0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MIN_POWER_MASK,                      DIDT_TD_CTRL1__MIN_POWER__SHIFT,                    0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MAX_POWER_MASK,                      DIDT_TD_CTRL1__MAX_POWER__SHIFT,                    0xffff,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TD_CTRL_OCP,                DIDT_TD_CTRL_OCP__UNUSED_0_MASK,                    DIDT_TD_CTRL_OCP__UNUSED_0__SHIFT,                  0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL_OCP,                DIDT_TD_CTRL_OCP__OCP_MAX_POWER_MASK,               DIDT_TD_CTRL_OCP__OCP_MAX_POWER__SHIFT,             0x00ff,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT,              0x3fff,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_0_MASK,                       DIDT_TD_CTRL2__UNUSED_0__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,     0x000f,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_1_MASK,                       DIDT_TD_CTRL2__UNUSED_1__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,     0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_2_MASK,                       DIDT_TD_CTRL2__UNUSED_2__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,    DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT,  0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,       DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,       DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__UNUSED_0_MASK,                  DIDT_TD_STALL_CTRL__UNUSED_0__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,       DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,     0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,     0x0dde,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,     0x0dde,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__UNUSED_0_MASK,                 DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT,               0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_TD_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__PHASE_OFFSET_MASK,                   DIDT_TD_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0009,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0009,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__UNUSED_0_MASK,                       DIDT_TD_CTRL0__UNUSED_0__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    // DIDT_TCP
+    //
+    {   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT0_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT0__SHIFT,                 0x0004,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT1_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT1__SHIFT,                 0x0037,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT2_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT2__SHIFT,                 0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT3_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT3__SHIFT,                 0x00ff,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT4_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT4__SHIFT,                 0x0054,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT5_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT5__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT6_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT6__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT7_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT7__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MIN_POWER_MASK,                     DIDT_TCP_CTRL1__MIN_POWER__SHIFT,                   0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MAX_POWER_MASK,                     DIDT_TCP_CTRL1__MAX_POWER__SHIFT,                   0xffff,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TCP_CTRL_OCP,               DIDT_TCP_CTRL_OCP__UNUSED_0_MASK,                   DIDT_TCP_CTRL_OCP__UNUSED_0__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL_OCP,               DIDT_TCP_CTRL_OCP__OCP_MAX_POWER_MASK,              DIDT_TCP_CTRL_OCP__OCP_MAX_POWER__SHIFT,            0xffff,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__MAX_POWER_DELTA_MASK,               DIDT_TCP_CTRL2__MAX_POWER_DELTA__SHIFT,             0x3dde,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_0_MASK,                      DIDT_TCP_CTRL2__UNUSED_0__SHIFT,                    0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,      DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,    0x0032,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_1_MASK,                      DIDT_TCP_CTRL2__UNUSED_1__SHIFT,                    0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,      DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,    0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_2_MASK,                      DIDT_TCP_CTRL2__UNUSED_2__SHIFT,                    0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,   DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,      DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,    0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,      DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,    0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,  DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT,0x01aa,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__UNUSED_0_MASK,                 DIDT_TCP_STALL_CTRL__UNUSED_0__SHIFT,               0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,      DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,    0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,      DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,    0x3dde,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,      DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,    0x3dde,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK,                DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT,              0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_TCP_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__PHASE_OFFSET_MASK,                   DIDT_TCP_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0010,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0010,     GPU_CONFIGREG_DIDT_IND },
+    {   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__UNUSED_0_MASK,                       DIDT_TCP_CTRL0__UNUSED_0__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+    {   0xFFFFFFFF  }  // End of list
+};
 static int smu7_enable_didt(struct pp_hwmgr *hwmgr, const bool enable)
 {
 	uint32_t en = enable ? 1 : 0;
@@ -740,8 +924,8 @@
 	    PP_CAP(PHM_PlatformCaps_TDRamping) ||
 	    PP_CAP(PHM_PlatformCaps_TCPRamping)) {
 
-		cgs_enter_safe_mode(hwmgr->device, true);
-		cgs_lock_grbm_idx(hwmgr->device, true);
+		adev->gfx.rlc.funcs->enter_safe_mode(adev);
+		mutex_lock(&adev->grbm_idx_mutex);
 		value = 0;
 		value2 = cgs_read_register(hwmgr->device, mmGRBM_GFX_INDEX);
 		for (count = 0; count < num_se; count++) {
@@ -752,67 +936,80 @@
 
 			if (hwmgr->chip_id == CHIP_POLARIS10) {
 				result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_Polaris10);
-				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
 				result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris10);
-				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
 			} else if (hwmgr->chip_id == CHIP_POLARIS11) {
 				result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_Polaris11);
-				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
 				if (hwmgr->is_kicker)
 					result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris11_Kicker);
 				else
 					result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris11);
-				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
 			} else if (hwmgr->chip_id == CHIP_POLARIS12) {
 				result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_Polaris11);
-				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
 				result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris12);
-				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
+			} else if (hwmgr->chip_id == CHIP_VEGAM) {
+				result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_VegaM);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
+				result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_VegaM);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", goto error);
 			}
 		}
 		cgs_write_register(hwmgr->device, mmGRBM_GFX_INDEX, value2);
 
 		result = smu7_enable_didt(hwmgr, true);
-		PP_ASSERT_WITH_CODE((result == 0), "EnableDiDt failed.", return result);
+		PP_ASSERT_WITH_CODE((result == 0), "EnableDiDt failed.", goto error);
 
 		if (hwmgr->chip_id == CHIP_POLARIS11) {
 			result = smum_send_msg_to_smc(hwmgr,
 						(uint16_t)(PPSMC_MSG_EnableDpmDidt));
 			PP_ASSERT_WITH_CODE((0 == result),
-					"Failed to enable DPM DIDT.", return result);
+					"Failed to enable DPM DIDT.", goto error);
 		}
-		cgs_lock_grbm_idx(hwmgr->device, false);
-		cgs_enter_safe_mode(hwmgr->device, false);
+		mutex_unlock(&adev->grbm_idx_mutex);
+		adev->gfx.rlc.funcs->exit_safe_mode(adev);
 	}
 
 	return 0;
+error:
+	mutex_unlock(&adev->grbm_idx_mutex);
+	adev->gfx.rlc.funcs->exit_safe_mode(adev);
+	return result;
 }
 
 int smu7_disable_didt_config(struct pp_hwmgr *hwmgr)
 {
 	int result;
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	if (PP_CAP(PHM_PlatformCaps_SQRamping) ||
 	    PP_CAP(PHM_PlatformCaps_DBRamping) ||
 	    PP_CAP(PHM_PlatformCaps_TDRamping) ||
 	    PP_CAP(PHM_PlatformCaps_TCPRamping)) {
 
-		cgs_enter_safe_mode(hwmgr->device, true);
+		adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 		result = smu7_enable_didt(hwmgr, false);
 		PP_ASSERT_WITH_CODE((result == 0),
 				"Post DIDT enable clock gating failed.",
-				return result);
+				goto error);
 		if (hwmgr->chip_id == CHIP_POLARIS11) {
 			result = smum_send_msg_to_smc(hwmgr,
 						(uint16_t)(PPSMC_MSG_DisableDpmDidt));
 			PP_ASSERT_WITH_CODE((0 == result),
-					"Failed to disable DPM DIDT.", return result);
+					"Failed to disable DPM DIDT.", goto error);
 		}
-		cgs_enter_safe_mode(hwmgr->device, false);
+		adev->gfx.rlc.funcs->exit_safe_mode(adev);
 	}
 
 	return 0;
+error:
+	adev->gfx.rlc.funcs->exit_safe_mode(adev);
+	return result;
 }
 
 int smu7_enable_smc_cac(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
index 7b26607..50690c7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
@@ -314,8 +314,7 @@
 	uint8_t frev, crev;
 	uint16_t size;
 
-	info = (ATOM_INTEGRATED_SYSTEM_INFO_V1_9 *) cgs_atom_get_data_table(
-			hwmgr->device,
+	info = (ATOM_INTEGRATED_SYSTEM_INFO_V1_9 *)smu_atom_get_data_table(hwmgr->adev,
 			GetIndexIntoMasterTable(DATA, IntegratedSystemInfo),
 			&size, &frev, &crev);
 
@@ -694,7 +693,7 @@
 	else
 		data->sclk_dpm.soft_max_clk  = table->entries[table->count - 1].clk;
 
-	clock = hwmgr->display_config.min_core_set_clock;
+	clock = hwmgr->display_config->min_core_set_clock;
 	if (clock == 0)
 		pr_debug("min_core_set_clock not set\n");
 
@@ -749,7 +748,7 @@
 {
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 				PHM_PlatformCaps_SclkDeepSleep)) {
-		uint32_t clks = hwmgr->display_config.min_core_set_clock_in_sr;
+		uint32_t clks = hwmgr->display_config->min_core_set_clock_in_sr;
 		if (clks == 0)
 			clks = SMU8_MIN_DEEP_SLEEP_SCLK;
 
@@ -1041,25 +1040,21 @@
 	struct smu8_hwmgr *data = hwmgr->backend;
 	struct PP_Clocks clocks = {0, 0, 0, 0};
 	bool force_high;
-	uint32_t  num_of_active_displays = 0;
-	struct cgs_display_info info = {0};
 
 	smu8_ps->need_dfs_bypass = true;
 
 	data->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label);
 
-	clocks.memoryClock = hwmgr->display_config.min_mem_set_clock != 0 ?
-				hwmgr->display_config.min_mem_set_clock :
+	clocks.memoryClock = hwmgr->display_config->min_mem_set_clock != 0 ?
+				hwmgr->display_config->min_mem_set_clock :
 				data->sys_info.nbp_memory_clock[1];
 
-	cgs_get_active_displays_info(hwmgr->device, &info);
-	num_of_active_displays = info.display_count;
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
 		clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk;
 
 	force_high = (clocks.memoryClock > data->sys_info.nbp_memory_clock[SMU8_NUM_NBPMEMORYCLOCK - 1])
-			|| (num_of_active_displays >= 3);
+			|| (hwmgr->display_config->num_display >= 3);
 
 	smu8_ps->action = smu8_current_ps->action;
 
@@ -1897,20 +1892,20 @@
 	data->uvd_power_gated = bgate;
 
 	if (bgate) {
-		cgs_set_powergating_state(hwmgr->device,
+		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
 						AMD_IP_BLOCK_TYPE_UVD,
 						AMD_PG_STATE_GATE);
-		cgs_set_clockgating_state(hwmgr->device,
+		amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 						AMD_IP_BLOCK_TYPE_UVD,
 						AMD_CG_STATE_GATE);
 		smu8_dpm_update_uvd_dpm(hwmgr, true);
 		smu8_dpm_powerdown_uvd(hwmgr);
 	} else {
 		smu8_dpm_powerup_uvd(hwmgr);
-		cgs_set_clockgating_state(hwmgr->device,
+		amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 						AMD_IP_BLOCK_TYPE_UVD,
 						AMD_CG_STATE_UNGATE);
-		cgs_set_powergating_state(hwmgr->device,
+		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
 						AMD_IP_BLOCK_TYPE_UVD,
 						AMD_PG_STATE_UNGATE);
 		smu8_dpm_update_uvd_dpm(hwmgr, false);
@@ -1923,12 +1918,10 @@
 	struct smu8_hwmgr *data = hwmgr->backend;
 
 	if (bgate) {
-		cgs_set_powergating_state(
-					hwmgr->device,
+		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
 					AMD_IP_BLOCK_TYPE_VCE,
 					AMD_PG_STATE_GATE);
-		cgs_set_clockgating_state(
-					hwmgr->device,
+		amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 					AMD_IP_BLOCK_TYPE_VCE,
 					AMD_CG_STATE_GATE);
 		smu8_enable_disable_vce_dpm(hwmgr, false);
@@ -1937,12 +1930,10 @@
 	} else {
 		smu8_dpm_powerup_vce(hwmgr);
 		data->vce_power_gated = false;
-		cgs_set_clockgating_state(
-					hwmgr->device,
+		amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 					AMD_IP_BLOCK_TYPE_VCE,
 					AMD_CG_STATE_UNGATE);
-		cgs_set_powergating_state(
-					hwmgr->device,
+		amdgpu_device_ip_set_powergating_state(hwmgr->adev,
 					AMD_IP_BLOCK_TYPE_VCE,
 					AMD_PG_STATE_UNGATE);
 		smu8_dpm_update_vce_dpm(hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
index 5981228..93a3d02 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
@@ -24,6 +24,7 @@
 #include "pp_debug.h"
 #include "ppatomctrl.h"
 #include "ppsmc.h"
+#include "atom.h"
 
 uint8_t convert_to_vid(uint16_t vddc)
 {
@@ -608,3 +609,100 @@
 
 	return 0;
 }
+
+void *smu_atom_get_data_table(void *dev, uint32_t table, uint16_t *size,
+						uint8_t *frev, uint8_t *crev)
+{
+	struct amdgpu_device *adev = dev;
+	uint16_t data_start;
+
+	if (amdgpu_atom_parse_data_header(
+		    adev->mode_info.atom_context, table, size,
+		    frev, crev, &data_start))
+		return (uint8_t *)adev->mode_info.atom_context->bios +
+			data_start;
+
+	return NULL;
+}
+
+int smu_get_voltage_dependency_table_ppt_v1(
+			const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table,
+			struct phm_ppt_v1_clock_voltage_dependency_table *dep_table)
+{
+	uint8_t i = 0;
+	PP_ASSERT_WITH_CODE((0 != allowed_dep_table->count),
+				"Voltage Lookup Table empty",
+				return -EINVAL);
+
+	dep_table->count = allowed_dep_table->count;
+	for (i=0; i<dep_table->count; i++) {
+		dep_table->entries[i].clk = allowed_dep_table->entries[i].clk;
+		dep_table->entries[i].vddInd = allowed_dep_table->entries[i].vddInd;
+		dep_table->entries[i].vdd_offset = allowed_dep_table->entries[i].vdd_offset;
+		dep_table->entries[i].vddc = allowed_dep_table->entries[i].vddc;
+		dep_table->entries[i].vddgfx = allowed_dep_table->entries[i].vddgfx;
+		dep_table->entries[i].vddci = allowed_dep_table->entries[i].vddci;
+		dep_table->entries[i].mvdd = allowed_dep_table->entries[i].mvdd;
+		dep_table->entries[i].phases = allowed_dep_table->entries[i].phases;
+		dep_table->entries[i].cks_enable = allowed_dep_table->entries[i].cks_enable;
+		dep_table->entries[i].cks_voffset = allowed_dep_table->entries[i].cks_voffset;
+	}
+
+	return 0;
+}
+
+int smu_set_watermarks_for_clocks_ranges(void *wt_table,
+		struct pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges)
+{
+	uint32_t i;
+	struct watermarks *table = wt_table;
+
+	if (!table || !wm_with_clock_ranges)
+		return -EINVAL;
+
+	if (wm_with_clock_ranges->num_wm_sets_dmif > 4 || wm_with_clock_ranges->num_wm_sets_mcif > 4)
+		return -EINVAL;
+
+	for (i = 0; i < wm_with_clock_ranges->num_wm_sets_dmif; i++) {
+		table->WatermarkRow[1][i].MinClock =
+			cpu_to_le16((uint16_t)
+			(wm_with_clock_ranges->wm_sets_dmif[i].wm_min_dcefclk_in_khz) /
+			100);
+		table->WatermarkRow[1][i].MaxClock =
+			cpu_to_le16((uint16_t)
+			(wm_with_clock_ranges->wm_sets_dmif[i].wm_max_dcefclk_in_khz) /
+			100);
+		table->WatermarkRow[1][i].MinUclk =
+			cpu_to_le16((uint16_t)
+			(wm_with_clock_ranges->wm_sets_dmif[i].wm_min_memclk_in_khz) /
+			100);
+		table->WatermarkRow[1][i].MaxUclk =
+			cpu_to_le16((uint16_t)
+			(wm_with_clock_ranges->wm_sets_dmif[i].wm_max_memclk_in_khz) /
+			100);
+		table->WatermarkRow[1][i].WmSetting = (uint8_t)
+				wm_with_clock_ranges->wm_sets_dmif[i].wm_set_id;
+	}
+
+	for (i = 0; i < wm_with_clock_ranges->num_wm_sets_mcif; i++) {
+		table->WatermarkRow[0][i].MinClock =
+			cpu_to_le16((uint16_t)
+			(wm_with_clock_ranges->wm_sets_mcif[i].wm_min_socclk_in_khz) /
+			100);
+		table->WatermarkRow[0][i].MaxClock =
+			cpu_to_le16((uint16_t)
+			(wm_with_clock_ranges->wm_sets_mcif[i].wm_max_socclk_in_khz) /
+			100);
+		table->WatermarkRow[0][i].MinUclk =
+			cpu_to_le16((uint16_t)
+			(wm_with_clock_ranges->wm_sets_mcif[i].wm_min_memclk_in_khz) /
+			100);
+		table->WatermarkRow[0][i].MaxUclk =
+			cpu_to_le16((uint16_t)
+			(wm_with_clock_ranges->wm_sets_mcif[i].wm_max_memclk_in_khz) /
+			100);
+		table->WatermarkRow[0][i].WmSetting = (uint8_t)
+				wm_with_clock_ranges->wm_sets_mcif[i].wm_set_id;
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h
index d37d16e..916cc01 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.h
@@ -26,10 +26,27 @@
 struct pp_atomctrl_voltage_table;
 struct pp_hwmgr;
 struct phm_ppt_v1_voltage_lookup_table;
+struct Watermarks_t;
+struct pp_wm_sets_with_clock_ranges_soc15;
 
 uint8_t convert_to_vid(uint16_t vddc);
 uint16_t convert_to_vddc(uint8_t vid);
 
+struct watermark_row_generic_t {
+	uint16_t MinClock;
+	uint16_t MaxClock;
+	uint16_t MinUclk;
+	uint16_t MaxUclk;
+
+	uint8_t  WmSetting;
+	uint8_t  Padding[3];
+};
+
+struct watermarks {
+	struct watermark_row_generic_t WatermarkRow[2][4];
+	uint32_t     padding[7];
+};
+
 extern int phm_wait_for_register_unequal(struct pp_hwmgr *hwmgr,
 					uint32_t index,
 					uint32_t value, uint32_t mask);
@@ -82,6 +99,16 @@
 
 int smu9_register_irq_handlers(struct pp_hwmgr *hwmgr);
 
+void *smu_atom_get_data_table(void *dev, uint32_t table, uint16_t *size,
+						uint8_t *frev, uint8_t *crev);
+
+int smu_get_voltage_dependency_table_ppt_v1(
+	const struct phm_ppt_v1_clock_voltage_dependency_table *allowed_dep_table,
+		struct phm_ppt_v1_clock_voltage_dependency_table *dep_table);
+
+int smu_set_watermarks_for_clocks_ranges(void *wt_table,
+		struct pp_wm_sets_with_clock_ranges_soc15 *wm_with_clock_ranges);
+
 #define PHM_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
 #define PHM_FIELD_MASK(reg, field) reg##__##field##_MASK
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 7cbb56b..d156b7b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -36,7 +36,7 @@
 #include "smu9.h"
 #include "smu9_driver_if.h"
 #include "vega10_inc.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
 #include "pppcielanes.h"
 #include "vega10_hwmgr.h"
 #include "vega10_processpptables.h"
@@ -51,10 +51,6 @@
 #include "smuio/smuio_9_0_offset.h"
 #include "smuio/smuio_9_0_sh_mask.h"
 
-#define VOLTAGE_SCALE  4
-#define VOLTAGE_VID_OFFSET_SCALE1   625
-#define VOLTAGE_VID_OFFSET_SCALE2   100
-
 #define HBM_MEMORY_CHANNEL_WIDTH    128
 
 static const uint32_t channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
@@ -79,8 +75,6 @@
 #define DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK                                                        0x000000F0L
 #define DF_CS_AON0_DramBaseAddress0__IntLvAddrSel_MASK                                                        0x00000700L
 #define DF_CS_AON0_DramBaseAddress0__DramBaseAddr_MASK                                                        0xFFFFF000L
-static int vega10_force_clock_level(struct pp_hwmgr *hwmgr,
-		enum pp_clock_type type, uint32_t mask);
 
 static const ULONG PhwVega10_Magic = (ULONG)(PHM_VIslands_Magic);
 
@@ -291,6 +285,48 @@
 	return 0;
 }
 
+static int vega10_odn_initial_default_setting(struct pp_hwmgr *hwmgr)
+{
+	struct vega10_hwmgr *data = hwmgr->backend;
+	struct phm_ppt_v2_information *table_info =
+			(struct phm_ppt_v2_information *)(hwmgr->pptable);
+	struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table);
+	struct vega10_odn_vddc_lookup_table *od_lookup_table;
+	struct phm_ppt_v1_voltage_lookup_table *vddc_lookup_table;
+	struct phm_ppt_v1_clock_voltage_dependency_table *dep_table[3];
+	struct phm_ppt_v1_clock_voltage_dependency_table *od_table[3];
+	uint32_t i;
+
+	od_lookup_table = &odn_table->vddc_lookup_table;
+	vddc_lookup_table = table_info->vddc_lookup_table;
+
+	for (i = 0; i < vddc_lookup_table->count; i++)
+		od_lookup_table->entries[i].us_vdd = vddc_lookup_table->entries[i].us_vdd;
+
+	od_lookup_table->count = vddc_lookup_table->count;
+
+	dep_table[0] = table_info->vdd_dep_on_sclk;
+	dep_table[1] = table_info->vdd_dep_on_mclk;
+	dep_table[2] = table_info->vdd_dep_on_socclk;
+	od_table[0] = (struct phm_ppt_v1_clock_voltage_dependency_table *)&odn_table->vdd_dep_on_sclk;
+	od_table[1] = (struct phm_ppt_v1_clock_voltage_dependency_table *)&odn_table->vdd_dep_on_mclk;
+	od_table[2] = (struct phm_ppt_v1_clock_voltage_dependency_table *)&odn_table->vdd_dep_on_socclk;
+
+	for (i = 0; i < 3; i++)
+		smu_get_voltage_dependency_table_ppt_v1(dep_table[i], od_table[i]);
+
+	if (odn_table->max_vddc == 0 || odn_table->max_vddc > 2000)
+		odn_table->max_vddc = dep_table[0]->entries[dep_table[0]->count - 1].vddc;
+	if (odn_table->min_vddc == 0 || odn_table->min_vddc > 2000)
+		odn_table->min_vddc = dep_table[0]->entries[0].vddc;
+
+	i = od_table[2]->count - 1;
+	od_table[2]->entries[i].clk = hwmgr->platform_descriptor.overdriveLimit.memoryClock;
+	od_table[2]->entries[i].vddc = odn_table->max_vddc;
+
+	return 0;
+}
+
 static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 {
 	struct vega10_hwmgr *data = hwmgr->backend;
@@ -427,7 +463,6 @@
 		/* ACG firmware has major version 5 */
 	if ((hwmgr->smu_version & 0xff000000) == 0x5000000)
 		data->smu_features[GNLD_ACG].supported = true;
-
 	if (data->registry_data.didt_support)
 		data->smu_features[GNLD_DIDT].supported = true;
 
@@ -754,7 +789,6 @@
 	uint32_t config_telemetry = 0;
 	struct pp_atomfwctrl_voltage_table vol_table;
 	struct amdgpu_device *adev = hwmgr->adev;
-	uint32_t reg;
 
 	data = kzalloc(sizeof(struct vega10_hwmgr), GFP_KERNEL);
 	if (data == NULL)
@@ -860,10 +894,7 @@
 			advanceFanControlParameters.usFanPWMMinLimit *
 			hwmgr->thermal_controller.fanInfo.ulMaxRPM / 100;
 
-	reg = soc15_get_register_offset(DF_HWID, 0,
-			mmDF_CS_AON0_DramBaseAddress0_BASE_IDX,
-			mmDF_CS_AON0_DramBaseAddress0);
-	data->mem_channels = (cgs_read_register(hwmgr->device, reg) &
+	data->mem_channels = (RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0) &
 			DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK) >>
 			DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
 	PP_ASSERT_WITH_CODE(data->mem_channels < ARRAY_SIZE(channel_number),
@@ -1370,48 +1401,6 @@
 	memcpy(&(data->golden_dpm_table), &(data->dpm_table),
 			sizeof(struct vega10_dpm_table));
 
-	if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) ||
-	    PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) {
-		data->odn_dpm_table.odn_core_clock_dpm_levels.num_of_pl =
-						data->dpm_table.gfx_table.count;
-		for (i = 0; i < data->dpm_table.gfx_table.count; i++) {
-			data->odn_dpm_table.odn_core_clock_dpm_levels.entries[i].clock =
-					data->dpm_table.gfx_table.dpm_levels[i].value;
-			data->odn_dpm_table.odn_core_clock_dpm_levels.entries[i].enabled = true;
-		}
-
-		data->odn_dpm_table.vdd_dependency_on_sclk.count =
-				dep_gfx_table->count;
-		for (i = 0; i < dep_gfx_table->count; i++) {
-			data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].clk =
-					dep_gfx_table->entries[i].clk;
-			data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].vddInd =
-					dep_gfx_table->entries[i].vddInd;
-			data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].cks_enable =
-					dep_gfx_table->entries[i].cks_enable;
-			data->odn_dpm_table.vdd_dependency_on_sclk.entries[i].cks_voffset =
-					dep_gfx_table->entries[i].cks_voffset;
-		}
-
-		data->odn_dpm_table.odn_memory_clock_dpm_levels.num_of_pl =
-						data->dpm_table.mem_table.count;
-		for (i = 0; i < data->dpm_table.mem_table.count; i++) {
-			data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[i].clock =
-					data->dpm_table.mem_table.dpm_levels[i].value;
-			data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[i].enabled = true;
-		}
-
-		data->odn_dpm_table.vdd_dependency_on_mclk.count = dep_mclk_table->count;
-		for (i = 0; i < dep_mclk_table->count; i++) {
-			data->odn_dpm_table.vdd_dependency_on_mclk.entries[i].clk =
-					dep_mclk_table->entries[i].clk;
-			data->odn_dpm_table.vdd_dependency_on_mclk.entries[i].vddInd =
-					dep_mclk_table->entries[i].vddInd;
-			data->odn_dpm_table.vdd_dependency_on_mclk.entries[i].vddci =
-					dep_mclk_table->entries[i].vddci;
-		}
-	}
-
 	return 0;
 }
 
@@ -1514,18 +1503,18 @@
 {
 	struct phm_ppt_v2_information *table_info =
 			(struct phm_ppt_v2_information *)(hwmgr->pptable);
-	struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_sclk =
-			table_info->vdd_dep_on_sclk;
+	struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_sclk;
 	struct vega10_hwmgr *data = hwmgr->backend;
 	struct pp_atomfwctrl_clock_dividers_soc15 dividers;
 	uint32_t gfx_max_clock =
 			hwmgr->platform_descriptor.overdriveLimit.engineClock;
 	uint32_t i = 0;
 
-	if (data->apply_overdrive_next_settings_mask &
-			DPMTABLE_OD_UPDATE_VDDC)
+	if (hwmgr->od_enabled)
 		dep_on_sclk = (struct phm_ppt_v1_clock_voltage_dependency_table *)
-						&(data->odn_dpm_table.vdd_dependency_on_sclk);
+						&(data->odn_dpm_table.vdd_dep_on_sclk);
+	else
+		dep_on_sclk = table_info->vdd_dep_on_sclk;
 
 	PP_ASSERT_WITH_CODE(dep_on_sclk,
 			"Invalid SOC_VDD-GFX_CLK Dependency Table!",
@@ -1577,23 +1566,32 @@
 		uint32_t soc_clock, uint8_t *current_soc_did,
 		uint8_t *current_vol_index)
 {
+	struct vega10_hwmgr *data = hwmgr->backend;
 	struct phm_ppt_v2_information *table_info =
 			(struct phm_ppt_v2_information *)(hwmgr->pptable);
-	struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_soc =
-			table_info->vdd_dep_on_socclk;
+	struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_soc;
 	struct pp_atomfwctrl_clock_dividers_soc15 dividers;
 	uint32_t i;
 
-	PP_ASSERT_WITH_CODE(dep_on_soc,
-			"Invalid SOC_VDD-SOC_CLK Dependency Table!",
-			return -EINVAL);
-	for (i = 0; i < dep_on_soc->count; i++) {
-		if (dep_on_soc->entries[i].clk == soc_clock)
-			break;
+	if (hwmgr->od_enabled) {
+		dep_on_soc = (struct phm_ppt_v1_clock_voltage_dependency_table *)
+						&data->odn_dpm_table.vdd_dep_on_socclk;
+		for (i = 0; i < dep_on_soc->count; i++) {
+			if (dep_on_soc->entries[i].clk >= soc_clock)
+				break;
+		}
+	} else {
+		dep_on_soc = table_info->vdd_dep_on_socclk;
+		for (i = 0; i < dep_on_soc->count; i++) {
+			if (dep_on_soc->entries[i].clk == soc_clock)
+				break;
+		}
 	}
+
 	PP_ASSERT_WITH_CODE(dep_on_soc->count > i,
 			"Cannot find SOC_CLK in SOC_VDD-SOC_CLK Dependency Table",
 			return -EINVAL);
+
 	PP_ASSERT_WITH_CODE(!pp_atomfwctrl_get_gpu_pll_dividers_vega10(hwmgr,
 			COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
 			soc_clock, &dividers),
@@ -1602,22 +1600,6 @@
 
 	*current_soc_did = (uint8_t)dividers.ulDid;
 	*current_vol_index = (uint8_t)(dep_on_soc->entries[i].vddInd);
-
-	return 0;
-}
-
-uint16_t vega10_locate_vddc_given_clock(struct pp_hwmgr *hwmgr,
-		uint32_t clk,
-		struct phm_ppt_v1_clock_voltage_dependency_table *dep_table)
-{
-	uint16_t i;
-
-	for (i = 0; i < dep_table->count; i++) {
-		if (dep_table->entries[i].clk == clk)
-			return dep_table->entries[i].vddc;
-	}
-
-	pr_info("[LocateVddcGivenClock] Cannot locate SOC Vddc for this clock!");
 	return 0;
 }
 
@@ -1631,8 +1613,6 @@
 	struct vega10_hwmgr *data = hwmgr->backend;
 	struct phm_ppt_v2_information *table_info =
 			(struct phm_ppt_v2_information *)(hwmgr->pptable);
-	struct phm_ppt_v1_clock_voltage_dependency_table *dep_table =
-			table_info->vdd_dep_on_socclk;
 	PPTable_t *pp_table = &(data->smc_state_table.pp_table);
 	struct vega10_single_dpm_table *dpm_table = &(data->dpm_table.gfx_table);
 	int result = 0;
@@ -1663,11 +1643,6 @@
 
 	dpm_table = &(data->dpm_table.soc_table);
 	for (i = 0; i < dpm_table->count; i++) {
-		pp_table->SocVid[i] =
-				(uint8_t)convert_to_vid(
-				vega10_locate_vddc_given_clock(hwmgr,
-						dpm_table->dpm_levels[i].value,
-						dep_table));
 		result = vega10_populate_single_soc_level(hwmgr,
 				dpm_table->dpm_levels[i].value,
 				&(pp_table->SocclkDid[i]),
@@ -1678,7 +1653,6 @@
 
 	j = i - 1;
 	while (i < NUM_SOCCLK_DPM_LEVELS) {
-		pp_table->SocVid[i] = pp_table->SocVid[j];
 		result = vega10_populate_single_soc_level(hwmgr,
 				dpm_table->dpm_levels[j].value,
 				&(pp_table->SocclkDid[i]),
@@ -1691,6 +1665,32 @@
 	return result;
 }
 
+static void vega10_populate_vddc_soc_levels(struct pp_hwmgr *hwmgr)
+{
+	struct vega10_hwmgr *data = hwmgr->backend;
+	PPTable_t *pp_table = &(data->smc_state_table.pp_table);
+	struct phm_ppt_v2_information *table_info = hwmgr->pptable;
+	struct phm_ppt_v1_voltage_lookup_table *vddc_lookup_table;
+
+	uint8_t soc_vid = 0;
+	uint32_t i, max_vddc_level;
+
+	if (hwmgr->od_enabled)
+		vddc_lookup_table = (struct phm_ppt_v1_voltage_lookup_table *)&data->odn_dpm_table.vddc_lookup_table;
+	else
+		vddc_lookup_table = table_info->vddc_lookup_table;
+
+	max_vddc_level = vddc_lookup_table->count;
+	for (i = 0; i < max_vddc_level; i++) {
+		soc_vid = (uint8_t)convert_to_vid(vddc_lookup_table->entries[i].us_vdd);
+		pp_table->SocVid[i] = soc_vid;
+	}
+	while (i < MAX_REGULAR_DPM_NUMBER) {
+		pp_table->SocVid[i] = soc_vid;
+		i++;
+	}
+}
+
 /**
  * @brief Populates single SMC GFXCLK structure using the provided clock.
  *
@@ -1705,25 +1705,25 @@
 	struct vega10_hwmgr *data = hwmgr->backend;
 	struct phm_ppt_v2_information *table_info =
 			(struct phm_ppt_v2_information *)(hwmgr->pptable);
-	struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_mclk =
-			table_info->vdd_dep_on_mclk;
+	struct phm_ppt_v1_clock_voltage_dependency_table *dep_on_mclk;
 	struct pp_atomfwctrl_clock_dividers_soc15 dividers;
 	uint32_t mem_max_clock =
 			hwmgr->platform_descriptor.overdriveLimit.memoryClock;
 	uint32_t i = 0;
 
-	if (data->apply_overdrive_next_settings_mask &
-			DPMTABLE_OD_UPDATE_VDDC)
+	if (hwmgr->od_enabled)
 		dep_on_mclk = (struct phm_ppt_v1_clock_voltage_dependency_table *)
-					&data->odn_dpm_table.vdd_dependency_on_mclk;
+					&data->odn_dpm_table.vdd_dep_on_mclk;
+	else
+		dep_on_mclk = table_info->vdd_dep_on_mclk;
 
 	PP_ASSERT_WITH_CODE(dep_on_mclk,
 			"Invalid SOC_VDD-UCLK Dependency Table!",
 			return -EINVAL);
 
-	if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_MCLK)
+	if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_MCLK) {
 		mem_clock = mem_clock > mem_max_clock ? mem_max_clock : mem_clock;
-	else {
+	} else {
 		for (i = 0; i < dep_on_mclk->count; i++) {
 			if (dep_on_mclk->entries[i].clk == mem_clock)
 				break;
@@ -2067,6 +2067,9 @@
 	if (data->smu_features[GNLD_AVFS].supported) {
 		result = pp_atomfwctrl_get_avfs_information(hwmgr, &avfs_params);
 		if (!result) {
+			data->odn_dpm_table.max_vddc = avfs_params.ulMaxVddc;
+			data->odn_dpm_table.min_vddc = avfs_params.ulMinVddc;
+
 			pp_table->MinVoltageVid = (uint8_t)
 					convert_to_vid((uint16_t)(avfs_params.ulMinVddc));
 			pp_table->MaxVoltageVid = (uint8_t)
@@ -2345,6 +2348,22 @@
 	return 0;
 }
 
+static int vega10_update_avfs(struct pp_hwmgr *hwmgr)
+{
+	struct vega10_hwmgr *data = hwmgr->backend;
+
+	if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
+		vega10_avfs_enable(hwmgr, false);
+	} else if (data->need_update_dpm_table) {
+		vega10_avfs_enable(hwmgr, false);
+		vega10_avfs_enable(hwmgr, true);
+	} else {
+		vega10_avfs_enable(hwmgr, true);
+	}
+
+	return 0;
+}
+
 static int vega10_populate_and_upload_avfs_fuse_override(struct pp_hwmgr *hwmgr)
 {
 	int result = 0;
@@ -2406,6 +2425,10 @@
 			"Failed to setup default DPM tables!",
 			return result);
 
+	/* initialize ODN table */
+	if (hwmgr->od_enabled)
+		vega10_odn_initial_default_setting(hwmgr);
+
 	pp_atomfwctrl_get_voltage_table_v4(hwmgr, VOLTAGE_TYPE_VDDC,
 			VOLTAGE_OBJ_SVID2,  &voltage_table);
 	pp_table->MaxVidStep = voltage_table.max_vid_step;
@@ -2452,6 +2475,8 @@
 			"Failed to initialize Memory Level!",
 			return result);
 
+	vega10_populate_vddc_soc_levels(hwmgr);
+
 	result = vega10_populate_all_display_clock_levels(hwmgr);
 	PP_ASSERT_WITH_CODE(!result,
 			"Failed to initialize Display Level!",
@@ -2481,6 +2506,12 @@
 		data->vbios_boot_state.mvddc    = boot_up_values.usMvddc;
 		data->vbios_boot_state.gfx_clock = boot_up_values.ulGfxClk;
 		data->vbios_boot_state.mem_clock = boot_up_values.ulUClk;
+		pp_atomfwctrl_get_clk_information_by_clkid(hwmgr,
+				SMU9_SYSPLL0_SOCCLK_ID, &boot_up_values.ulSocClk);
+
+		pp_atomfwctrl_get_clk_information_by_clkid(hwmgr,
+				SMU9_SYSPLL0_DCEFCLK_ID, &boot_up_values.ulDCEFClk);
+
 		data->vbios_boot_state.soc_clock = boot_up_values.ulSocClk;
 		data->vbios_boot_state.dcef_clock = boot_up_values.ulDCEFClk;
 		if (0 != boot_up_values.usVddc) {
@@ -2829,7 +2860,7 @@
 
 	tmp_result = vega10_construct_voltage_tables(hwmgr);
 	PP_ASSERT_WITH_CODE(!tmp_result,
-			"Failed to contruct voltage tables!",
+			"Failed to construct voltage tables!",
 			result = tmp_result);
 
 	tmp_result = vega10_init_smc_table(hwmgr);
@@ -3028,7 +3059,6 @@
 	bool disable_mclk_switching_for_frame_lock;
 	bool disable_mclk_switching_for_vr;
 	bool force_mclk_high;
-	struct cgs_display_info info = {0};
 	const struct phm_clock_and_voltage_limits *max_limits;
 	uint32_t i;
 	struct vega10_hwmgr *data = hwmgr->backend;
@@ -3063,11 +3093,9 @@
 		}
 	}
 
-	cgs_get_active_displays_info(hwmgr->device, &info);
-
 	/* result = PHM_CheckVBlankTime(hwmgr, &vblankTooShort);*/
-	minimum_clocks.engineClock = hwmgr->display_config.min_core_set_clock;
-	minimum_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock;
+	minimum_clocks.engineClock = hwmgr->display_config->min_core_set_clock;
+	minimum_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock;
 
 	if (PP_CAP(PHM_PlatformCaps_StablePState)) {
 		stable_pstate_sclk_dpm_percentage =
@@ -3107,10 +3135,10 @@
 		PP_CAP(PHM_PlatformCaps_DisableMclkSwitchForVR);
 	force_mclk_high = PP_CAP(PHM_PlatformCaps_ForceMclkHigh);
 
-	if (info.display_count == 0)
+	if (hwmgr->display_config->num_display == 0)
 		disable_mclk_switching = false;
 	else
-		disable_mclk_switching = (info.display_count > 1) ||
+		disable_mclk_switching = (hwmgr->display_config->num_display > 1) ||
 			disable_mclk_switching_for_frame_lock ||
 			disable_mclk_switching_for_vr ||
 			force_mclk_high;
@@ -3171,87 +3199,11 @@
 
 static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input)
 {
-	const struct phm_set_power_state_input *states =
-			(const struct phm_set_power_state_input *)input;
-	const struct vega10_power_state *vega10_ps =
-			cast_const_phw_vega10_power_state(states->pnew_state);
 	struct vega10_hwmgr *data = hwmgr->backend;
-	struct vega10_single_dpm_table *sclk_table =
-			&(data->dpm_table.gfx_table);
-	uint32_t sclk = vega10_ps->performance_levels
-			[vega10_ps->performance_level_count - 1].gfx_clock;
-	struct vega10_single_dpm_table *mclk_table =
-			&(data->dpm_table.mem_table);
-	uint32_t mclk = vega10_ps->performance_levels
-			[vega10_ps->performance_level_count - 1].mem_clock;
-	struct PP_Clocks min_clocks = {0};
-	uint32_t i;
-	struct cgs_display_info info = {0};
 
-	data->need_update_dpm_table = 0;
+	if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
+		data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK;
 
-	if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) ||
-	    PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) {
-		for (i = 0; i < sclk_table->count; i++) {
-			if (sclk == sclk_table->dpm_levels[i].value)
-				break;
-		}
-
-		if (!(data->apply_overdrive_next_settings_mask &
-				DPMTABLE_OD_UPDATE_SCLK) && i >= sclk_table->count) {
-			/* Check SCLK in DAL's minimum clocks
-			 * in case DeepSleep divider update is required.
-			 */
-			if (data->display_timing.min_clock_in_sr !=
-					min_clocks.engineClockInSR &&
-				(min_clocks.engineClockInSR >=
-						VEGA10_MINIMUM_ENGINE_CLOCK ||
-					data->display_timing.min_clock_in_sr >=
-						VEGA10_MINIMUM_ENGINE_CLOCK))
-				data->need_update_dpm_table |= DPMTABLE_UPDATE_SCLK;
-		}
-
-		cgs_get_active_displays_info(hwmgr->device, &info);
-
-		if (data->display_timing.num_existing_displays !=
-				info.display_count)
-			data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK;
-	} else {
-		for (i = 0; i < sclk_table->count; i++) {
-			if (sclk == sclk_table->dpm_levels[i].value)
-				break;
-		}
-
-		if (i >= sclk_table->count)
-			data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
-		else {
-			/* Check SCLK in DAL's minimum clocks
-			 * in case DeepSleep divider update is required.
-			 */
-			if (data->display_timing.min_clock_in_sr !=
-					min_clocks.engineClockInSR &&
-				(min_clocks.engineClockInSR >=
-						VEGA10_MINIMUM_ENGINE_CLOCK ||
-					data->display_timing.min_clock_in_sr >=
-						VEGA10_MINIMUM_ENGINE_CLOCK))
-				data->need_update_dpm_table |= DPMTABLE_UPDATE_SCLK;
-		}
-
-		for (i = 0; i < mclk_table->count; i++) {
-			if (mclk == mclk_table->dpm_levels[i].value)
-				break;
-		}
-
-		cgs_get_active_displays_info(hwmgr->device, &info);
-
-		if (i >= mclk_table->count)
-			data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
-
-		if (data->display_timing.num_existing_displays !=
-				info.display_count ||
-				i >= mclk_table->count)
-			data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK;
-	}
 	return 0;
 }
 
@@ -3259,194 +3211,29 @@
 		struct pp_hwmgr *hwmgr, const void *input)
 {
 	int result = 0;
-	const struct phm_set_power_state_input *states =
-			(const struct phm_set_power_state_input *)input;
-	const struct vega10_power_state *vega10_ps =
-			cast_const_phw_vega10_power_state(states->pnew_state);
 	struct vega10_hwmgr *data = hwmgr->backend;
-	uint32_t sclk = vega10_ps->performance_levels
-			[vega10_ps->performance_level_count - 1].gfx_clock;
-	uint32_t mclk = vega10_ps->performance_levels
-			[vega10_ps->performance_level_count - 1].mem_clock;
-	struct vega10_dpm_table *dpm_table = &data->dpm_table;
-	struct vega10_dpm_table *golden_dpm_table =
-			&data->golden_dpm_table;
-	uint32_t dpm_count, clock_percent;
-	uint32_t i;
 
-	if (PP_CAP(PHM_PlatformCaps_ODNinACSupport) ||
-	    PP_CAP(PHM_PlatformCaps_ODNinDCSupport)) {
+	if (!data->need_update_dpm_table)
+		return 0;
 
-		if (!data->need_update_dpm_table &&
-			!data->apply_optimized_settings &&
-			!data->apply_overdrive_next_settings_mask)
-			return 0;
-
-		if (data->apply_overdrive_next_settings_mask &
-				DPMTABLE_OD_UPDATE_SCLK) {
-			for (dpm_count = 0;
-					dpm_count < dpm_table->gfx_table.count;
-					dpm_count++) {
-				dpm_table->gfx_table.dpm_levels[dpm_count].enabled =
-					data->odn_dpm_table.odn_core_clock_dpm_levels.entries[dpm_count].enabled;
-				dpm_table->gfx_table.dpm_levels[dpm_count].value =
-					data->odn_dpm_table.odn_core_clock_dpm_levels.entries[dpm_count].clock;
-			}
-		}
-
-		if (data->apply_overdrive_next_settings_mask &
-				DPMTABLE_OD_UPDATE_MCLK) {
-			for (dpm_count = 0;
-					dpm_count < dpm_table->mem_table.count;
-					dpm_count++) {
-				dpm_table->mem_table.dpm_levels[dpm_count].enabled =
-					data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[dpm_count].enabled;
-				dpm_table->mem_table.dpm_levels[dpm_count].value =
-					data->odn_dpm_table.odn_memory_clock_dpm_levels.entries[dpm_count].clock;
-			}
-		}
-
-		if ((data->need_update_dpm_table & DPMTABLE_UPDATE_SCLK) ||
-			data->apply_optimized_settings ||
-			(data->apply_overdrive_next_settings_mask &
-					DPMTABLE_OD_UPDATE_SCLK)) {
-			result = vega10_populate_all_graphic_levels(hwmgr);
-			PP_ASSERT_WITH_CODE(!result,
-					"Failed to populate SCLK during PopulateNewDPMClocksStates Function!",
-					return result);
-		}
-
-		if ((data->need_update_dpm_table & DPMTABLE_UPDATE_MCLK) ||
-			(data->apply_overdrive_next_settings_mask &
-					DPMTABLE_OD_UPDATE_MCLK)){
-			result = vega10_populate_all_memory_levels(hwmgr);
-			PP_ASSERT_WITH_CODE(!result,
-					"Failed to populate MCLK during PopulateNewDPMClocksStates Function!",
-					return result);
-		}
-	} else {
-		if (!data->need_update_dpm_table &&
-				!data->apply_optimized_settings)
-			return 0;
-
-		if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_SCLK &&
-				data->smu_features[GNLD_DPM_GFXCLK].supported) {
-				dpm_table->
-				gfx_table.dpm_levels[dpm_table->gfx_table.count - 1].
-				value = sclk;
-				if (hwmgr->od_enabled) {
-					/* Need to do calculation based on the golden DPM table
-					 * as the Heatmap GPU Clock axis is also based on
-					 * the default values
-					 */
-					PP_ASSERT_WITH_CODE(
-							golden_dpm_table->gfx_table.dpm_levels
-							[golden_dpm_table->gfx_table.count - 1].value,
-							"Divide by 0!",
-							return -1);
-
-					dpm_count = dpm_table->gfx_table.count < 2 ?
-							0 : dpm_table->gfx_table.count - 2;
-					for (i = dpm_count; i > 1; i--) {
-						if (sclk > golden_dpm_table->gfx_table.dpm_levels
-							[golden_dpm_table->gfx_table.count - 1].value) {
-							clock_percent =
-								((sclk - golden_dpm_table->gfx_table.dpm_levels
-								[golden_dpm_table->gfx_table.count - 1].value) *
-								100) /
-								golden_dpm_table->gfx_table.dpm_levels
-								[golden_dpm_table->gfx_table.count - 1].value;
-
-							dpm_table->gfx_table.dpm_levels[i].value =
-								golden_dpm_table->gfx_table.dpm_levels[i].value +
-								(golden_dpm_table->gfx_table.dpm_levels[i].value *
-								clock_percent) / 100;
-						} else if (golden_dpm_table->
-								gfx_table.dpm_levels[dpm_table->gfx_table.count-1].value >
-								sclk) {
-							clock_percent =
-								((golden_dpm_table->gfx_table.dpm_levels
-								[golden_dpm_table->gfx_table.count - 1].value -
-								sclk) *	100) /
-								golden_dpm_table->gfx_table.dpm_levels
-								[golden_dpm_table->gfx_table.count-1].value;
-
-							dpm_table->gfx_table.dpm_levels[i].value =
-								golden_dpm_table->gfx_table.dpm_levels[i].value -
-								(golden_dpm_table->gfx_table.dpm_levels[i].value *
-								clock_percent) / 100;
-						} else
-							dpm_table->gfx_table.dpm_levels[i].value =
-								golden_dpm_table->gfx_table.dpm_levels[i].value;
-					}
-				}
-			}
-
-		if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_MCLK &&
-				data->smu_features[GNLD_DPM_UCLK].supported) {
-			dpm_table->
-			mem_table.dpm_levels[dpm_table->mem_table.count - 1].
-			value = mclk;
-
-			if (hwmgr->od_enabled) {
-				PP_ASSERT_WITH_CODE(
-					golden_dpm_table->mem_table.dpm_levels
-					[golden_dpm_table->mem_table.count - 1].value,
-					"Divide by 0!",
-					return -1);
-
-				dpm_count = dpm_table->mem_table.count < 2 ?
-						0 : dpm_table->mem_table.count - 2;
-				for (i = dpm_count; i > 1; i--) {
-					if (mclk > golden_dpm_table->mem_table.dpm_levels
-						[golden_dpm_table->mem_table.count-1].value) {
-						clock_percent = ((mclk -
-							golden_dpm_table->mem_table.dpm_levels
-							[golden_dpm_table->mem_table.count-1].value) *
-							100) /
-							golden_dpm_table->mem_table.dpm_levels
-							[golden_dpm_table->mem_table.count-1].value;
-
-						dpm_table->mem_table.dpm_levels[i].value =
-							golden_dpm_table->mem_table.dpm_levels[i].value +
-							(golden_dpm_table->mem_table.dpm_levels[i].value *
-							clock_percent) / 100;
-					} else if (golden_dpm_table->mem_table.dpm_levels
-							[dpm_table->mem_table.count-1].value > mclk) {
-						clock_percent = ((golden_dpm_table->mem_table.dpm_levels
-							[golden_dpm_table->mem_table.count-1].value - mclk) *
-							100) /
-							golden_dpm_table->mem_table.dpm_levels
-							[golden_dpm_table->mem_table.count-1].value;
-
-						dpm_table->mem_table.dpm_levels[i].value =
-							golden_dpm_table->mem_table.dpm_levels[i].value -
-							(golden_dpm_table->mem_table.dpm_levels[i].value *
-							clock_percent) / 100;
-					} else
-						dpm_table->mem_table.dpm_levels[i].value =
-							golden_dpm_table->mem_table.dpm_levels[i].value;
-				}
-			}
-		}
-
-		if ((data->need_update_dpm_table &
-			(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK)) ||
-			data->apply_optimized_settings) {
-			result = vega10_populate_all_graphic_levels(hwmgr);
-			PP_ASSERT_WITH_CODE(!result,
-					"Failed to populate SCLK during PopulateNewDPMClocksStates Function!",
-					return result);
-		}
-
-		if (data->need_update_dpm_table &
-				(DPMTABLE_OD_UPDATE_MCLK + DPMTABLE_UPDATE_MCLK)) {
-			result = vega10_populate_all_memory_levels(hwmgr);
-			PP_ASSERT_WITH_CODE(!result,
-					"Failed to populate MCLK during PopulateNewDPMClocksStates Function!",
-					return result);
-		}
+	if (data->need_update_dpm_table &
+			(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK + DPMTABLE_UPDATE_SOCCLK)) {
+		result = vega10_populate_all_graphic_levels(hwmgr);
+		PP_ASSERT_WITH_CODE((0 == result),
+				"Failed to populate SCLK during PopulateNewDPMClocksStates Function!",
+				return result);
 	}
+
+	if (data->need_update_dpm_table &
+			(DPMTABLE_OD_UPDATE_MCLK + DPMTABLE_UPDATE_MCLK)) {
+		result = vega10_populate_all_memory_levels(hwmgr);
+		PP_ASSERT_WITH_CODE((0 == result),
+				"Failed to populate MCLK during PopulateNewDPMClocksStates Function!",
+				return result);
+	}
+
+	vega10_populate_vddc_soc_levels(hwmgr);
+
 	return result;
 }
 
@@ -3742,8 +3529,9 @@
 	PP_ASSERT_WITH_CODE(!result,
 			"Failed to upload PPtable!", return result);
 
-	data->apply_optimized_settings = false;
-	data->apply_overdrive_next_settings_mask = 0;
+	vega10_update_avfs(hwmgr);
+
+	data->need_update_dpm_table &= DPMTABLE_OD_UPDATE_VDDC;
 
 	return 0;
 }
@@ -3793,16 +3581,18 @@
 }
 
 static int vega10_get_gpu_power(struct pp_hwmgr *hwmgr,
-		struct pp_gpu_power *query)
+		uint32_t *query)
 {
 	uint32_t value;
 
+	if (!query)
+		return -EINVAL;
+
 	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrPkgPwr);
 	value = smum_get_argument(hwmgr);
 
-	/* power value is an integer */
-	memset(query, 0, sizeof *query);
-	query->average_gpu_power = value << 8;
+	/* SMC returning actual watts, keep consistent with legacy asics, low 8 bit as 8 fractional bits */
+	*query = value << 8;
 
 	return 0;
 }
@@ -3810,22 +3600,18 @@
 static int vega10_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 			      void *value, int *size)
 {
-	uint32_t sclk_idx, mclk_idx, activity_percent = 0;
+	struct amdgpu_device *adev = hwmgr->adev;
+	uint32_t sclk_mhz, mclk_idx, activity_percent = 0;
 	struct vega10_hwmgr *data = hwmgr->backend;
 	struct vega10_dpm_table *dpm_table = &data->dpm_table;
 	int ret = 0;
-	uint32_t reg, val_vid;
+	uint32_t val_vid;
 
 	switch (idx) {
 	case AMDGPU_PP_SENSOR_GFX_SCLK:
-		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentGfxclkIndex);
-		sclk_idx = smum_get_argument(hwmgr);
-		if (sclk_idx <  dpm_table->gfx_table.count) {
-			*((uint32_t *)value) = dpm_table->gfx_table.dpm_levels[sclk_idx].value;
-			*size = 4;
-		} else {
-			ret = -EINVAL;
-		}
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetAverageGfxclkActualFrequency);
+		sclk_mhz = smum_get_argument(hwmgr);
+		*((uint32_t *)value) = sclk_mhz * 100;
 		break;
 	case AMDGPU_PP_SENSOR_GFX_MCLK:
 		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex);
@@ -3856,18 +3642,10 @@
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_GPU_POWER:
-		if (*size < sizeof(struct pp_gpu_power))
-			ret = -EINVAL;
-		else {
-			*size = sizeof(struct pp_gpu_power);
-			ret = vega10_get_gpu_power(hwmgr, (struct pp_gpu_power *)value);
-		}
+		ret = vega10_get_gpu_power(hwmgr, (uint32_t *)value);
 		break;
 	case AMDGPU_PP_SENSOR_VDDGFX:
-		reg = soc15_get_register_offset(SMUIO_HWID, 0,
-			mmSMUSVI0_PLANE0_CURRENTVID_BASE_IDX,
-			mmSMUSVI0_PLANE0_CURRENTVID);
-		val_vid = (cgs_read_register(hwmgr->device, reg) &
+		val_vid = (RREG32_SOC15(SMUIO, 0, mmSMUSVI0_PLANE0_CURRENTVID) &
 			SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID_MASK) >>
 			SMUSVI0_PLANE0_CURRENTVID__CURRENT_SVI0_PLANE0_VID__SHIFT;
 		*((uint32_t *)value) = (uint32_t)convert_to_vddc((uint8_t)val_vid);
@@ -3956,26 +3734,18 @@
 			(struct phm_ppt_v2_information *)hwmgr->pptable;
 	struct phm_ppt_v1_clock_voltage_dependency_table *mclk_table = table_info->vdd_dep_on_mclk;
 	uint32_t idx;
-	uint32_t num_active_disps = 0;
-	struct cgs_display_info info = {0};
 	struct PP_Clocks min_clocks = {0};
 	uint32_t i;
 	struct pp_display_clock_request clock_req;
 
-	info.mode_info = NULL;
-
-	cgs_get_active_displays_info(hwmgr->device, &info);
-
-	num_active_disps = info.display_count;
-
-	if (num_active_disps > 1)
+	if (hwmgr->display_config->num_display > 1)
 		vega10_notify_smc_display_change(hwmgr, false);
 	else
 		vega10_notify_smc_display_change(hwmgr, true);
 
-	min_clocks.dcefClock = hwmgr->display_config.min_dcef_set_clk;
-	min_clocks.dcefClockInSR = hwmgr->display_config.min_dcef_deep_sleep_set_clk;
-	min_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock;
+	min_clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk;
+	min_clocks.dcefClockInSR = hwmgr->display_config->min_dcef_deep_sleep_set_clk;
+	min_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock;
 
 	for (i = 0; i < dpm_table->count; i++) {
 		if (dpm_table->dpm_levels[i].value == min_clocks.dcefClock)
@@ -4120,6 +3890,47 @@
 	}
 }
 
+static int vega10_force_clock_level(struct pp_hwmgr *hwmgr,
+		enum pp_clock_type type, uint32_t mask)
+{
+	struct vega10_hwmgr *data = hwmgr->backend;
+
+	switch (type) {
+	case PP_SCLK:
+		data->smc_state_table.gfx_boot_level = mask ? (ffs(mask) - 1) : 0;
+		data->smc_state_table.gfx_max_level = mask ? (fls(mask) - 1) : 0;
+
+		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr),
+			"Failed to upload boot level to lowest!",
+			return -EINVAL);
+
+		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr),
+			"Failed to upload dpm max level to highest!",
+			return -EINVAL);
+		break;
+
+	case PP_MCLK:
+		data->smc_state_table.mem_boot_level = mask ? (ffs(mask) - 1) : 0;
+		data->smc_state_table.mem_max_level = mask ? (fls(mask) - 1) : 0;
+
+		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr),
+			"Failed to upload boot level to lowest!",
+			return -EINVAL);
+
+		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr),
+			"Failed to upload dpm max level to highest!",
+			return -EINVAL);
+
+		break;
+
+	case PP_PCIE:
+	default:
+		break;
+	}
+
+	return 0;
+}
+
 static int vega10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 				enum amd_dpm_forced_level level)
 {
@@ -4356,97 +4167,15 @@
 	struct vega10_hwmgr *data = hwmgr->backend;
 	Watermarks_t *table = &(data->smc_state_table.water_marks_table);
 	int result = 0;
-	uint32_t i;
 
 	if (!data->registry_data.disable_water_mark) {
-		for (i = 0; i < wm_with_clock_ranges->num_wm_sets_dmif; i++) {
-			table->WatermarkRow[WM_DCEFCLK][i].MinClock =
-				cpu_to_le16((uint16_t)
-				(wm_with_clock_ranges->wm_sets_dmif[i].wm_min_dcefclk_in_khz) /
-				100);
-			table->WatermarkRow[WM_DCEFCLK][i].MaxClock =
-				cpu_to_le16((uint16_t)
-				(wm_with_clock_ranges->wm_sets_dmif[i].wm_max_dcefclk_in_khz) /
-				100);
-			table->WatermarkRow[WM_DCEFCLK][i].MinUclk =
-				cpu_to_le16((uint16_t)
-				(wm_with_clock_ranges->wm_sets_dmif[i].wm_min_memclk_in_khz) /
-				100);
-			table->WatermarkRow[WM_DCEFCLK][i].MaxUclk =
-				cpu_to_le16((uint16_t)
-				(wm_with_clock_ranges->wm_sets_dmif[i].wm_max_memclk_in_khz) /
-				100);
-			table->WatermarkRow[WM_DCEFCLK][i].WmSetting = (uint8_t)
-					wm_with_clock_ranges->wm_sets_dmif[i].wm_set_id;
-		}
-
-		for (i = 0; i < wm_with_clock_ranges->num_wm_sets_mcif; i++) {
-			table->WatermarkRow[WM_SOCCLK][i].MinClock =
-				cpu_to_le16((uint16_t)
-				(wm_with_clock_ranges->wm_sets_mcif[i].wm_min_socclk_in_khz) /
-				100);
-			table->WatermarkRow[WM_SOCCLK][i].MaxClock =
-				cpu_to_le16((uint16_t)
-				(wm_with_clock_ranges->wm_sets_mcif[i].wm_max_socclk_in_khz) /
-				100);
-			table->WatermarkRow[WM_SOCCLK][i].MinUclk =
-				cpu_to_le16((uint16_t)
-				(wm_with_clock_ranges->wm_sets_mcif[i].wm_min_memclk_in_khz) /
-				100);
-			table->WatermarkRow[WM_SOCCLK][i].MaxUclk =
-				cpu_to_le16((uint16_t)
-				(wm_with_clock_ranges->wm_sets_mcif[i].wm_max_memclk_in_khz) /
-				100);
-			table->WatermarkRow[WM_SOCCLK][i].WmSetting = (uint8_t)
-					wm_with_clock_ranges->wm_sets_mcif[i].wm_set_id;
-		}
+		smu_set_watermarks_for_clocks_ranges(table, wm_with_clock_ranges);
 		data->water_marks_bitmap = WaterMarksExist;
 	}
 
 	return result;
 }
 
-static int vega10_force_clock_level(struct pp_hwmgr *hwmgr,
-		enum pp_clock_type type, uint32_t mask)
-{
-	struct vega10_hwmgr *data = hwmgr->backend;
-
-	switch (type) {
-	case PP_SCLK:
-		data->smc_state_table.gfx_boot_level = mask ? (ffs(mask) - 1) : 0;
-		data->smc_state_table.gfx_max_level = mask ? (fls(mask) - 1) : 0;
-
-		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr),
-			"Failed to upload boot level to lowest!",
-			return -EINVAL);
-
-		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr),
-			"Failed to upload dpm max level to highest!",
-			return -EINVAL);
-		break;
-
-	case PP_MCLK:
-		data->smc_state_table.mem_boot_level = mask ? (ffs(mask) - 1) : 0;
-		data->smc_state_table.mem_max_level = mask ? (fls(mask) - 1) : 0;
-
-		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_bootup_level(hwmgr),
-			"Failed to upload boot level to lowest!",
-			return -EINVAL);
-
-		PP_ASSERT_WITH_CODE(!vega10_upload_dpm_max_level(hwmgr),
-			"Failed to upload dpm max level to highest!",
-			return -EINVAL);
-
-		break;
-
-	case PP_PCIE:
-	default:
-		break;
-	}
-
-	return 0;
-}
-
 static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, char *buf)
 {
@@ -4454,6 +4183,8 @@
 	struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table);
 	struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table);
 	struct vega10_pcie_table *pcie_table = &(data->dpm_table.pcie_table);
+	struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep = NULL;
+
 	int i, now, size = 0;
 
 	switch (type) {
@@ -4492,6 +4223,40 @@
 					(pcie_table->pcie_gen[i] == 2) ? "8.0GT/s, x16" : "",
 					(i == now) ? "*" : "");
 		break;
+	case OD_SCLK:
+		if (hwmgr->od_enabled) {
+			size = sprintf(buf, "%s:\n", "OD_SCLK");
+			podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk;
+			for (i = 0; i < podn_vdd_dep->count; i++)
+				size += sprintf(buf + size, "%d: %10uMhz %10umV\n",
+					i, podn_vdd_dep->entries[i].clk / 100,
+						podn_vdd_dep->entries[i].vddc);
+		}
+		break;
+	case OD_MCLK:
+		if (hwmgr->od_enabled) {
+			size = sprintf(buf, "%s:\n", "OD_MCLK");
+			podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk;
+			for (i = 0; i < podn_vdd_dep->count; i++)
+				size += sprintf(buf + size, "%d: %10uMhz %10umV\n",
+					i, podn_vdd_dep->entries[i].clk/100,
+						podn_vdd_dep->entries[i].vddc);
+		}
+		break;
+	case OD_RANGE:
+		if (hwmgr->od_enabled) {
+			size = sprintf(buf, "%s:\n", "OD_RANGE");
+			size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
+				data->golden_dpm_table.gfx_table.dpm_levels[0].value/100,
+				hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
+			size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n",
+				data->golden_dpm_table.mem_table.dpm_levels[0].value/100,
+				hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
+			size += sprintf(buf + size, "VDDC: %7umV %11umV\n",
+				data->odn_dpm_table.min_vddc,
+				data->odn_dpm_table.max_vddc);
+		}
+		break;
 	default:
 		break;
 	}
@@ -4501,10 +4266,8 @@
 static int vega10_display_configuration_changed_task(struct pp_hwmgr *hwmgr)
 {
 	struct vega10_hwmgr *data = hwmgr->backend;
-	int result = 0;
-	uint32_t num_turned_on_displays = 1;
 	Watermarks_t *wm_table = &(data->smc_state_table.water_marks_table);
-	struct cgs_display_info info = {0};
+	int result = 0;
 
 	if ((data->water_marks_bitmap & WaterMarksExist) &&
 			!(data->water_marks_bitmap & WaterMarksLoaded)) {
@@ -4514,10 +4277,8 @@
 	}
 
 	if (data->water_marks_bitmap & WaterMarksLoaded) {
-		cgs_get_active_displays_info(hwmgr->device, &info);
-		num_turned_on_displays = info.display_count;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_NumOfDisplays, num_turned_on_displays);
+			PPSMC_MSG_NumOfDisplays, hwmgr->display_config->num_display);
 	}
 
 	return result;
@@ -4603,15 +4364,12 @@
 {
 	struct vega10_hwmgr *data = hwmgr->backend;
 	bool is_update_required = false;
-	struct cgs_display_info info = {0, 0, NULL};
 
-	cgs_get_active_displays_info(hwmgr->device, &info);
-
-	if (data->display_timing.num_existing_displays != info.display_count)
+	if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
 		is_update_required = true;
 
 	if (PP_CAP(PHM_PlatformCaps_SclkDeepSleep)) {
-		if (data->display_timing.min_clock_in_sr != hwmgr->display_config.min_core_set_clock_in_sr)
+		if (data->display_timing.min_clock_in_sr != hwmgr->display_config->min_core_set_clock_in_sr)
 			is_update_required = true;
 	}
 
@@ -4886,6 +4644,200 @@
 	return 0;
 }
 
+
+static bool vega10_check_clk_voltage_valid(struct pp_hwmgr *hwmgr,
+					enum PP_OD_DPM_TABLE_COMMAND type,
+					uint32_t clk,
+					uint32_t voltage)
+{
+	struct vega10_hwmgr *data = hwmgr->backend;
+	struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table);
+	struct vega10_single_dpm_table *golden_table;
+
+	if (voltage < odn_table->min_vddc || voltage > odn_table->max_vddc) {
+		pr_info("OD voltage is out of range [%d - %d] mV\n", odn_table->min_vddc, odn_table->max_vddc);
+		return false;
+	}
+
+	if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) {
+		golden_table = &(data->golden_dpm_table.gfx_table);
+		if (golden_table->dpm_levels[0].value > clk ||
+			hwmgr->platform_descriptor.overdriveLimit.engineClock < clk) {
+			pr_info("OD engine clock is out of range [%d - %d] MHz\n",
+				golden_table->dpm_levels[0].value/100,
+				hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
+			return false;
+		}
+	} else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) {
+		golden_table = &(data->golden_dpm_table.mem_table);
+		if (golden_table->dpm_levels[0].value > clk ||
+			hwmgr->platform_descriptor.overdriveLimit.memoryClock < clk) {
+			pr_info("OD memory clock is out of range [%d - %d] MHz\n",
+				golden_table->dpm_levels[0].value/100,
+				hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
+			return false;
+		}
+	} else {
+		return false;
+	}
+
+	return true;
+}
+
+static void vega10_check_dpm_table_updated(struct pp_hwmgr *hwmgr)
+{
+	struct vega10_hwmgr *data = hwmgr->backend;
+	struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table);
+	struct phm_ppt_v2_information *table_info = hwmgr->pptable;
+	struct phm_ppt_v1_clock_voltage_dependency_table *dep_table;
+	struct phm_ppt_v1_clock_voltage_dependency_table *odn_dep_table;
+	uint32_t i;
+
+	dep_table = table_info->vdd_dep_on_mclk;
+	odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dep_on_mclk);
+
+	for (i = 0; i < dep_table->count; i++) {
+		if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) {
+			data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_MCLK;
+			return;
+		}
+	}
+
+	dep_table = table_info->vdd_dep_on_sclk;
+	odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dep_on_sclk);
+	for (i = 0; i < dep_table->count; i++) {
+		if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) {
+			data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_SCLK;
+			return;
+		}
+	}
+
+	if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) {
+		data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC;
+		data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK;
+	}
+}
+
+static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
+						enum PP_OD_DPM_TABLE_COMMAND type)
+{
+	struct vega10_hwmgr *data = hwmgr->backend;
+	struct phm_ppt_v2_information *table_info = hwmgr->pptable;
+	struct phm_ppt_v1_clock_voltage_dependency_table *dep_table = table_info->vdd_dep_on_socclk;
+	struct vega10_single_dpm_table *dpm_table = &data->golden_dpm_table.soc_table;
+
+	struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_on_socclk =
+							&data->odn_dpm_table.vdd_dep_on_socclk;
+	struct vega10_odn_vddc_lookup_table *od_vddc_lookup_table = &data->odn_dpm_table.vddc_lookup_table;
+
+	struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep;
+	uint8_t i, j;
+
+	if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) {
+		podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk;
+		for (i = 0; i < podn_vdd_dep->count - 1; i++)
+			od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc;
+		if (od_vddc_lookup_table->entries[i].us_vdd < podn_vdd_dep->entries[i].vddc)
+			od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc;
+	} else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) {
+		podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk;
+		for (i = 0; i < dpm_table->count; i++) {
+			for (j = 0; j < od_vddc_lookup_table->count; j++) {
+				if (od_vddc_lookup_table->entries[j].us_vdd >
+					podn_vdd_dep->entries[i].vddc)
+					break;
+			}
+			if (j == od_vddc_lookup_table->count) {
+				od_vddc_lookup_table->entries[j-1].us_vdd =
+					podn_vdd_dep->entries[i].vddc;
+				data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC;
+			}
+			podn_vdd_dep->entries[i].vddInd = j;
+		}
+		dpm_table = &data->dpm_table.soc_table;
+		for (i = 0; i < dep_table->count; i++) {
+			if (dep_table->entries[i].vddInd == podn_vdd_dep->entries[dep_table->count-1].vddInd &&
+					dep_table->entries[i].clk < podn_vdd_dep->entries[dep_table->count-1].clk) {
+				data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
+				podn_vdd_dep_on_socclk->entries[i].clk = podn_vdd_dep->entries[dep_table->count-1].clk;
+				dpm_table->dpm_levels[i].value = podn_vdd_dep_on_socclk->entries[i].clk;
+			}
+		}
+		if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk <
+					podn_vdd_dep->entries[dep_table->count-1].clk) {
+			data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
+			podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].clk = podn_vdd_dep->entries[dep_table->count-1].clk;
+			dpm_table->dpm_levels[podn_vdd_dep_on_socclk->count - 1].value = podn_vdd_dep->entries[dep_table->count-1].clk;
+		}
+		if (podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd <
+					podn_vdd_dep->entries[dep_table->count-1].vddInd) {
+			data->need_update_dpm_table |= DPMTABLE_UPDATE_SOCCLK;
+			podn_vdd_dep_on_socclk->entries[podn_vdd_dep_on_socclk->count - 1].vddInd = podn_vdd_dep->entries[dep_table->count-1].vddInd;
+		}
+	}
+}
+
+static int vega10_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
+					enum PP_OD_DPM_TABLE_COMMAND type,
+					long *input, uint32_t size)
+{
+	struct vega10_hwmgr *data = hwmgr->backend;
+	struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep_table;
+	struct vega10_single_dpm_table *dpm_table;
+
+	uint32_t input_clk;
+	uint32_t input_vol;
+	uint32_t input_level;
+	uint32_t i;
+
+	PP_ASSERT_WITH_CODE(input, "NULL user input for clock and voltage",
+				return -EINVAL);
+
+	if (!hwmgr->od_enabled) {
+		pr_info("OverDrive feature not enabled\n");
+		return -EINVAL;
+	}
+
+	if (PP_OD_EDIT_SCLK_VDDC_TABLE == type) {
+		dpm_table = &data->dpm_table.gfx_table;
+		podn_vdd_dep_table = &data->odn_dpm_table.vdd_dep_on_sclk;
+		data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+	} else if (PP_OD_EDIT_MCLK_VDDC_TABLE == type) {
+		dpm_table = &data->dpm_table.mem_table;
+		podn_vdd_dep_table = &data->odn_dpm_table.vdd_dep_on_mclk;
+		data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
+	} else if (PP_OD_RESTORE_DEFAULT_TABLE == type) {
+		memcpy(&(data->dpm_table), &(data->golden_dpm_table), sizeof(struct vega10_dpm_table));
+		vega10_odn_initial_default_setting(hwmgr);
+		return 0;
+	} else if (PP_OD_COMMIT_DPM_TABLE == type) {
+		vega10_check_dpm_table_updated(hwmgr);
+		return 0;
+	} else {
+		return -EINVAL;
+	}
+
+	for (i = 0; i < size; i += 3) {
+		if (i + 3 > size || input[i] >= podn_vdd_dep_table->count) {
+			pr_info("invalid clock voltage input\n");
+			return 0;
+		}
+		input_level = input[i];
+		input_clk = input[i+1] * 100;
+		input_vol = input[i+2];
+
+		if (vega10_check_clk_voltage_valid(hwmgr, type, input_clk, input_vol)) {
+			dpm_table->dpm_levels[input_level].value = input_clk;
+			podn_vdd_dep_table->entries[input_level].clk = input_clk;
+			podn_vdd_dep_table->entries[input_level].vddc = input_vol;
+		} else {
+			return -EINVAL;
+		}
+	}
+	vega10_odn_update_soc_table(hwmgr, type);
+	return 0;
+}
+
 static const struct pp_hwmgr_func vega10_hwmgr_funcs = {
 	.backend_init = vega10_hwmgr_backend_init,
 	.backend_fini = vega10_hwmgr_backend_fini,
@@ -4944,6 +4896,7 @@
 	.get_power_profile_mode = vega10_get_power_profile_mode,
 	.set_power_profile_mode = vega10_set_power_profile_mode,
 	.set_power_limit = vega10_set_power_limit,
+	.odn_edit_dpm_table = vega10_odn_edit_dpm_table,
 };
 
 int vega10_enable_smc_features(struct pp_hwmgr *hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
index 5339ea1..aadd6cb 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h
@@ -282,15 +282,21 @@
 
 struct vega10_odn_clock_voltage_dependency_table {
 	uint32_t count;
-	struct phm_ppt_v1_clock_voltage_dependency_record
-		entries[MAX_REGULAR_DPM_NUMBER];
+	struct phm_ppt_v1_clock_voltage_dependency_record entries[MAX_REGULAR_DPM_NUMBER];
+};
+
+struct vega10_odn_vddc_lookup_table {
+	uint32_t count;
+	struct phm_ppt_v1_voltage_lookup_record entries[MAX_REGULAR_DPM_NUMBER];
 };
 
 struct vega10_odn_dpm_table {
-	struct phm_odn_clock_levels		odn_core_clock_dpm_levels;
-	struct phm_odn_clock_levels		odn_memory_clock_dpm_levels;
-	struct vega10_odn_clock_voltage_dependency_table		vdd_dependency_on_sclk;
-	struct vega10_odn_clock_voltage_dependency_table		vdd_dependency_on_mclk;
+	struct vega10_odn_clock_voltage_dependency_table vdd_dep_on_sclk;
+	struct vega10_odn_clock_voltage_dependency_table vdd_dep_on_mclk;
+	struct vega10_odn_clock_voltage_dependency_table vdd_dep_on_socclk;
+	struct vega10_odn_vddc_lookup_table vddc_lookup_table;
+	uint32_t max_vddc;
+	uint32_t min_vddc;
 };
 
 struct vega10_odn_fan_table {
@@ -301,8 +307,8 @@
 };
 
 struct vega10_hwmgr {
-	struct vega10_dpm_table			dpm_table;
-	struct vega10_dpm_table			golden_dpm_table;
+	struct vega10_dpm_table          dpm_table;
+	struct vega10_dpm_table          golden_dpm_table;
 	struct vega10_registry_data      registry_data;
 	struct vega10_vbios_boot_state   vbios_boot_state;
 	struct vega10_mclk_latency_table mclk_latency_table;
@@ -368,12 +374,8 @@
 	bool                           need_long_memory_training;
 
 	/* Internal settings to apply the application power optimization parameters */
-	bool                           apply_optimized_settings;
 	uint32_t                       disable_dpm_mask;
 
-	/* ---- Overdrive next setting ---- */
-	uint32_t                       apply_overdrive_next_settings_mask;
-
 	/* ---- SMU9 ---- */
 	struct smu_features            smu_features[GNLD_FEATURES_MAX];
 	struct vega10_smc_state_table  smc_state_table;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
index ba63fae..a9efd855 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
@@ -27,7 +27,7 @@
 #include "vega10_ppsmc.h"
 #include "vega10_inc.h"
 #include "pp_debug.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
 
 static const struct vega10_didt_config_reg SEDiDtTuningCtrlConfig_Vega10[] =
 {
@@ -888,36 +888,36 @@
 	if (PP_CAP(PHM_PlatformCaps_DiDtEDCEnable)) {
 		if (PP_CAP(PHM_PlatformCaps_SQRamping)) {
 			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_EDC_CTRL);
-			data = CGS_REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_EN, en);
-			data = CGS_REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_SW_RST, ~en);
+			data = REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_EN, en);
+			data = REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_SW_RST, ~en);
 			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_EDC_CTRL, data);
 		}
 
 		if (PP_CAP(PHM_PlatformCaps_DBRamping)) {
 			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_EDC_CTRL);
-			data = CGS_REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_EN, en);
-			data = CGS_REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_SW_RST, ~en);
+			data = REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_EN, en);
+			data = REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_SW_RST, ~en);
 			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_EDC_CTRL, data);
 		}
 
 		if (PP_CAP(PHM_PlatformCaps_TDRamping)) {
 			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_EDC_CTRL);
-			data = CGS_REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_EN, en);
-			data = CGS_REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_SW_RST, ~en);
+			data = REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_EN, en);
+			data = REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_SW_RST, ~en);
 			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_EDC_CTRL, data);
 		}
 
 		if (PP_CAP(PHM_PlatformCaps_TCPRamping)) {
 			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_EDC_CTRL);
-			data = CGS_REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_EN, en);
-			data = CGS_REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_SW_RST, ~en);
+			data = REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_EN, en);
+			data = REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_SW_RST, ~en);
 			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_EDC_CTRL, data);
 		}
 
 		if (PP_CAP(PHM_PlatformCaps_DBRRamping)) {
 			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_EDC_CTRL);
-			data = CGS_REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_EN, en);
-			data = CGS_REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_SW_RST, ~en);
+			data = REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_EN, en);
+			data = REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_SW_RST, ~en);
 			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_EDC_CTRL, data);
 		}
 	}
@@ -930,20 +930,18 @@
 
 static int vega10_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	int result;
 	uint32_t num_se = 0, count, data;
-	struct amdgpu_device *adev = hwmgr->adev;
-	uint32_t reg;
 
 	num_se = adev->gfx.config.max_shader_engines;
 
-	cgs_enter_safe_mode(hwmgr->device, true);
+	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
-	cgs_lock_grbm_idx(hwmgr->device, true);
-	reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
+	mutex_lock(&adev->grbm_idx_mutex);
 	for (count = 0; count < num_se; count++) {
 		data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
-		cgs_write_register(hwmgr->device, reg, data);
+		WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
 
 		result =  vega10_program_didt_config_registers(hwmgr, SEDiDtStallCtrlConfig_vega10, VEGA10_CONFIGREG_DIDT);
 		result |= vega10_program_didt_config_registers(hwmgr, SEDiDtStallPatternConfig_vega10, VEGA10_CONFIGREG_DIDT);
@@ -958,43 +956,43 @@
 		if (0 != result)
 			break;
 	}
-	cgs_write_register(hwmgr->device, reg, 0xE0000000);
-	cgs_lock_grbm_idx(hwmgr->device, false);
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000);
+	mutex_unlock(&adev->grbm_idx_mutex);
 
 	vega10_didt_set_mask(hwmgr, true);
 
-	cgs_enter_safe_mode(hwmgr->device, false);
+	adev->gfx.rlc.funcs->exit_safe_mode(adev);
 
 	return 0;
 }
 
 static int vega10_disable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr)
 {
-	cgs_enter_safe_mode(hwmgr->device, true);
+	struct amdgpu_device *adev = hwmgr->adev;
+
+	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 	vega10_didt_set_mask(hwmgr, false);
 
-	cgs_enter_safe_mode(hwmgr->device, false);
+	adev->gfx.rlc.funcs->exit_safe_mode(adev);
 
 	return 0;
 }
 
 static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	int result;
 	uint32_t num_se = 0, count, data;
-	struct amdgpu_device *adev = hwmgr->adev;
-	uint32_t reg;
 
 	num_se = adev->gfx.config.max_shader_engines;
 
-	cgs_enter_safe_mode(hwmgr->device, true);
+	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
-	cgs_lock_grbm_idx(hwmgr->device, true);
-	reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
+	mutex_lock(&adev->grbm_idx_mutex);
 	for (count = 0; count < num_se; count++) {
 		data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
-		cgs_write_register(hwmgr->device, reg, data);
+		WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
 
 		result = vega10_program_didt_config_registers(hwmgr, SEDiDtStallCtrlConfig_vega10, VEGA10_CONFIGREG_DIDT);
 		result |= vega10_program_didt_config_registers(hwmgr, SEDiDtStallPatternConfig_vega10, VEGA10_CONFIGREG_DIDT);
@@ -1003,12 +1001,12 @@
 		if (0 != result)
 			break;
 	}
-	cgs_write_register(hwmgr->device, reg, 0xE0000000);
-	cgs_lock_grbm_idx(hwmgr->device, false);
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000);
+	mutex_unlock(&adev->grbm_idx_mutex);
 
 	vega10_didt_set_mask(hwmgr, true);
 
-	cgs_enter_safe_mode(hwmgr->device, false);
+	adev->gfx.rlc.funcs->exit_safe_mode(adev);
 
 	vega10_program_gc_didt_config_registers(hwmgr, GCDiDtDroopCtrlConfig_vega10);
 	if (PP_CAP(PHM_PlatformCaps_GCEDC))
@@ -1022,13 +1020,14 @@
 
 static int vega10_disable_psm_gc_didt_config(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	uint32_t data;
 
-	cgs_enter_safe_mode(hwmgr->device, true);
+	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 	vega10_didt_set_mask(hwmgr, false);
 
-	cgs_enter_safe_mode(hwmgr->device, false);
+	adev->gfx.rlc.funcs->exit_safe_mode(adev);
 
 	if (PP_CAP(PHM_PlatformCaps_GCEDC)) {
 		data = 0x00000000;
@@ -1043,20 +1042,18 @@
 
 static int vega10_enable_se_edc_config(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	int result;
 	uint32_t num_se = 0, count, data;
-	struct amdgpu_device *adev = hwmgr->adev;
-	uint32_t reg;
 
 	num_se = adev->gfx.config.max_shader_engines;
 
-	cgs_enter_safe_mode(hwmgr->device, true);
+	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
-	cgs_lock_grbm_idx(hwmgr->device, true);
-	reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
+	mutex_lock(&adev->grbm_idx_mutex);
 	for (count = 0; count < num_se; count++) {
 		data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
-		cgs_write_register(hwmgr->device, reg, data);
+		WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
 		result = vega10_program_didt_config_registers(hwmgr, SEDiDtWeightConfig_Vega10, VEGA10_CONFIGREG_DIDT);
 		result |= vega10_program_didt_config_registers(hwmgr, SEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT);
 		result |= vega10_program_didt_config_registers(hwmgr, SEEDCStallDelayConfig_Vega10, VEGA10_CONFIGREG_DIDT);
@@ -1067,46 +1064,46 @@
 		if (0 != result)
 			break;
 	}
-	cgs_write_register(hwmgr->device, reg, 0xE0000000);
-	cgs_lock_grbm_idx(hwmgr->device, false);
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000);
+	mutex_unlock(&adev->grbm_idx_mutex);
 
 	vega10_didt_set_mask(hwmgr, true);
 
-	cgs_enter_safe_mode(hwmgr->device, false);
+	adev->gfx.rlc.funcs->exit_safe_mode(adev);
 
 	return 0;
 }
 
 static int vega10_disable_se_edc_config(struct pp_hwmgr *hwmgr)
 {
-	cgs_enter_safe_mode(hwmgr->device, true);
+	struct amdgpu_device *adev = hwmgr->adev;
+
+	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 	vega10_didt_set_mask(hwmgr, false);
 
-	cgs_enter_safe_mode(hwmgr->device, false);
+	adev->gfx.rlc.funcs->exit_safe_mode(adev);
 
 	return 0;
 }
 
 static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	int result;
 	uint32_t num_se = 0;
 	uint32_t count, data;
-	struct amdgpu_device *adev = hwmgr->adev;
-	uint32_t reg;
 
 	num_se = adev->gfx.config.max_shader_engines;
 
-	cgs_enter_safe_mode(hwmgr->device, true);
+	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 	vega10_program_gc_didt_config_registers(hwmgr, AvfsPSMResetConfig_vega10);
 
-	cgs_lock_grbm_idx(hwmgr->device, true);
-	reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
+	mutex_lock(&adev->grbm_idx_mutex);
 	for (count = 0; count < num_se; count++) {
 		data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
-		cgs_write_register(hwmgr->device, reg, data);
+		WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data);
 		result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT);
 		result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCStallDelayConfig_Vega10, VEGA10_CONFIGREG_DIDT);
 		result |= vega10_program_didt_config_registers(hwmgr, PSMSEEDCCtrlResetConfig_Vega10, VEGA10_CONFIGREG_DIDT);
@@ -1115,12 +1112,12 @@
 		if (0 != result)
 			break;
 	}
-	cgs_write_register(hwmgr->device, reg, 0xE0000000);
-	cgs_lock_grbm_idx(hwmgr->device, false);
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000);
+	mutex_unlock(&adev->grbm_idx_mutex);
 
 	vega10_didt_set_mask(hwmgr, true);
 
-	cgs_enter_safe_mode(hwmgr->device, false);
+	adev->gfx.rlc.funcs->exit_safe_mode(adev);
 
 	vega10_program_gc_didt_config_registers(hwmgr, PSMGCEDCDroopCtrlConfig_vega10);
 
@@ -1137,13 +1134,14 @@
 
 static int vega10_disable_psm_gc_edc_config(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	uint32_t data;
 
-	cgs_enter_safe_mode(hwmgr->device, true);
+	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 	vega10_didt_set_mask(hwmgr, false);
 
-	cgs_enter_safe_mode(hwmgr->device, false);
+	adev->gfx.rlc.funcs->exit_safe_mode(adev);
 
 	if (PP_CAP(PHM_PlatformCaps_GCEDC)) {
 		data = 0x00000000;
@@ -1158,15 +1156,14 @@
 
 static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 	int result;
 
-	cgs_enter_safe_mode(hwmgr->device, true);
+	adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
-	cgs_lock_grbm_idx(hwmgr->device, true);
-	reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
-	cgs_write_register(hwmgr->device, reg, 0xE0000000);
-	cgs_lock_grbm_idx(hwmgr->device, false);
+	mutex_lock(&adev->grbm_idx_mutex);
+	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000);
+	mutex_unlock(&adev->grbm_idx_mutex);
 
 	result = vega10_program_didt_config_registers(hwmgr, SEEDCForceStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT);
 	result |= vega10_program_didt_config_registers(hwmgr, SEEDCCtrlForceStallConfig_Vega10, VEGA10_CONFIGREG_DIDT);
@@ -1175,7 +1172,7 @@
 
 	vega10_didt_set_mask(hwmgr, false);
 
-	cgs_enter_safe_mode(hwmgr->device, false);
+	adev->gfx.rlc.funcs->exit_safe_mode(adev);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
index c61d074..0768d25 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
@@ -52,7 +52,7 @@
 
 	if (!table_address) {
 		table_address = (ATOM_Vega10_POWERPLAYTABLE *)
-				cgs_atom_get_data_table(hwmgr->device, index,
+				smu_atom_get_data_table(hwmgr->adev, index,
 						&size, &frev, &crev);
 
 		hwmgr->soft_pp_table = table_address;	/*Cache the result in RAM.*/
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
index 9f18226..aa044c1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c
@@ -25,7 +25,7 @@
 #include "vega10_hwmgr.h"
 #include "vega10_ppsmc.h"
 #include "vega10_inc.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
 #include "pp_debug.h"
 
 static int vega10_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm)
@@ -89,6 +89,7 @@
 
 int vega10_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	struct vega10_hwmgr *data = hwmgr->backend;
 	uint32_t tach_period;
 	uint32_t crystal_clock_freq;
@@ -100,10 +101,8 @@
 	if (data->smu_features[GNLD_FAN_CONTROL].supported) {
 		result = vega10_get_current_rpm(hwmgr, speed);
 	} else {
-		uint32_t reg = soc15_get_register_offset(THM_HWID, 0,
-				mmCG_TACH_STATUS_BASE_IDX, mmCG_TACH_STATUS);
 		tach_period =
-			CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg),
+			REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_STATUS),
 					  CG_TACH_STATUS,
 					  TACH_PERIOD);
 
@@ -127,26 +126,23 @@
 */
 int vega10_fan_ctrl_set_static_mode(struct pp_hwmgr *hwmgr, uint32_t mode)
 {
-	uint32_t reg;
-
-	reg = soc15_get_register_offset(THM_HWID, 0,
-			mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	if (hwmgr->fan_ctrl_is_in_default_mode) {
 		hwmgr->fan_ctrl_default_mode =
-			CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg),
+			REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
 				CG_FDO_CTRL2, FDO_PWM_MODE);
 		hwmgr->tmin =
-			CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg),
+			REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
 				CG_FDO_CTRL2, TMIN);
 		hwmgr->fan_ctrl_is_in_default_mode = false;
 	}
 
-	cgs_write_register(hwmgr->device, reg,
-			CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+	WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2,
+			REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
 				CG_FDO_CTRL2, TMIN, 0));
-	cgs_write_register(hwmgr->device, reg,
-			CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+	WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2,
+			REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
 				CG_FDO_CTRL2, FDO_PWM_MODE, mode));
 
 	return 0;
@@ -159,18 +155,15 @@
 */
 int vega10_fan_ctrl_set_default_mode(struct pp_hwmgr *hwmgr)
 {
-	uint32_t reg;
-
-	reg = soc15_get_register_offset(THM_HWID, 0,
-			mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	if (!hwmgr->fan_ctrl_is_in_default_mode) {
-		cgs_write_register(hwmgr->device, reg,
-			CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+		WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2,
+			REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
 				CG_FDO_CTRL2, FDO_PWM_MODE,
 				hwmgr->fan_ctrl_default_mode));
-		cgs_write_register(hwmgr->device, reg,
-			CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+		WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2,
+			REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
 				CG_FDO_CTRL2, TMIN,
 				hwmgr->tmin << CG_FDO_CTRL2__TMIN__SHIFT));
 		hwmgr->fan_ctrl_is_in_default_mode = true;
@@ -257,10 +250,10 @@
 int vega10_fan_ctrl_set_fan_speed_percent(struct pp_hwmgr *hwmgr,
 		uint32_t speed)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	uint32_t duty100;
 	uint32_t duty;
 	uint64_t tmp64;
-	uint32_t reg;
 
 	if (hwmgr->thermal_controller.fanInfo.bNoFan)
 		return 0;
@@ -271,10 +264,7 @@
 	if (PP_CAP(PHM_PlatformCaps_MicrocodeFanControl))
 		vega10_fan_ctrl_stop_smc_fan_control(hwmgr);
 
-	reg = soc15_get_register_offset(THM_HWID, 0,
-			mmCG_FDO_CTRL1_BASE_IDX, mmCG_FDO_CTRL1);
-
-	duty100 = CGS_REG_GET_FIELD(cgs_read_register(hwmgr->device, reg),
+	duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1),
 				    CG_FDO_CTRL1, FMAX_DUTY100);
 
 	if (duty100 == 0)
@@ -284,10 +274,8 @@
 	do_div(tmp64, 100);
 	duty = (uint32_t)tmp64;
 
-	reg = soc15_get_register_offset(THM_HWID, 0,
-			mmCG_FDO_CTRL0_BASE_IDX, mmCG_FDO_CTRL0);
-	cgs_write_register(hwmgr->device, reg,
-		CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+	WREG32_SOC15(THM, 0, mmCG_FDO_CTRL0,
+		REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL0),
 			CG_FDO_CTRL0, FDO_STATIC_DUTY, duty));
 
 	return vega10_fan_ctrl_set_static_mode(hwmgr, FDO_PWM_MODE_STATIC);
@@ -317,10 +305,10 @@
 */
 int vega10_fan_ctrl_set_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t speed)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	uint32_t tach_period;
 	uint32_t crystal_clock_freq;
 	int result = 0;
-	uint32_t reg;
 
 	if (hwmgr->thermal_controller.fanInfo.bNoFan ||
 	    (speed < hwmgr->thermal_controller.fanInfo.ulMinRPM) ||
@@ -333,10 +321,8 @@
 	if (!result) {
 		crystal_clock_freq = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev);
 		tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed);
-		reg = soc15_get_register_offset(THM_HWID, 0,
-				mmCG_TACH_STATUS_BASE_IDX, mmCG_TACH_STATUS);
-		cgs_write_register(hwmgr->device, reg,
-				CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+		WREG32_SOC15(THM, 0, mmCG_TACH_STATUS,
+				REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_STATUS),
 					CG_TACH_STATUS, TACH_PERIOD,
 					tach_period));
 	}
@@ -350,13 +336,10 @@
 */
 int vega10_thermal_get_temperature(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	int temp;
-	uint32_t reg;
 
-	reg = soc15_get_register_offset(THM_HWID, 0,
-			mmCG_MULT_THERMAL_STATUS_BASE_IDX,  mmCG_MULT_THERMAL_STATUS);
-
-	temp = cgs_read_register(hwmgr->device, reg);
+	temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS);
 
 	temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
 			CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
@@ -379,11 +362,12 @@
 static int vega10_thermal_set_temperature_range(struct pp_hwmgr *hwmgr,
 		struct PP_TemperatureRange *range)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	int low = VEGA10_THERMAL_MINIMUM_ALERT_TEMP *
 			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	int high = VEGA10_THERMAL_MAXIMUM_ALERT_TEMP *
 			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
-	uint32_t val, reg;
+	uint32_t val;
 
 	if (low < range->min)
 		low = range->min;
@@ -393,20 +377,17 @@
 	if (low > high)
 		return -EINVAL;
 
-	reg = soc15_get_register_offset(THM_HWID, 0,
-			mmTHM_THERMAL_INT_CTRL_BASE_IDX, mmTHM_THERMAL_INT_CTRL);
+	val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL);
 
-	val = cgs_read_register(hwmgr->device, reg);
-
-	val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5);
-	val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1);
-	val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
-	val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
+	val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5);
+	val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1);
+	val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
+	val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
 	val &= (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK) &
 			(~THM_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK) &
 			(~THM_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK);
 
-	cgs_write_register(hwmgr->device, reg, val);
+	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val);
 
 	return 0;
 }
@@ -418,21 +399,17 @@
 */
 static int vega10_thermal_initialize(struct pp_hwmgr *hwmgr)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	if (hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution) {
-		reg = soc15_get_register_offset(THM_HWID, 0,
-				mmCG_TACH_CTRL_BASE_IDX, mmCG_TACH_CTRL);
-		cgs_write_register(hwmgr->device, reg,
-			CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+		WREG32_SOC15(THM, 0, mmCG_TACH_CTRL,
+			REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_TACH_CTRL),
 				CG_TACH_CTRL, EDGE_PER_REV,
 				hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution - 1));
 	}
 
-	reg = soc15_get_register_offset(THM_HWID, 0,
-			mmCG_FDO_CTRL2_BASE_IDX, mmCG_FDO_CTRL2);
-	cgs_write_register(hwmgr->device, reg,
-		CGS_REG_SET_FIELD(cgs_read_register(hwmgr->device, reg),
+	WREG32_SOC15(THM, 0, mmCG_FDO_CTRL2,
+		REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL2),
 			CG_FDO_CTRL2, TACH_PWM_RESP_RATE, 0x28));
 
 	return 0;
@@ -445,9 +422,9 @@
 */
 static int vega10_thermal_enable_alert(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	struct vega10_hwmgr *data = hwmgr->backend;
 	uint32_t val = 0;
-	uint32_t reg;
 
 	if (data->smu_features[GNLD_FW_CTF].supported) {
 		if (data->smu_features[GNLD_FW_CTF].enabled)
@@ -465,8 +442,7 @@
 	val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT);
 	val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT);
 
-	reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA);
-	cgs_write_register(hwmgr->device, reg, val);
+	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, val);
 
 	return 0;
 }
@@ -477,8 +453,8 @@
 */
 int vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	struct vega10_hwmgr *data = hwmgr->backend;
-	uint32_t reg;
 
 	if (data->smu_features[GNLD_FW_CTF].supported) {
 		if (!data->smu_features[GNLD_FW_CTF].enabled)
@@ -493,8 +469,7 @@
 		data->smu_features[GNLD_FW_CTF].enabled = false;
 	}
 
-	reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA);
-	cgs_write_register(hwmgr->device, reg, 0);
+	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 200de46..782e209 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -34,7 +34,6 @@
 #include "atomfirmware.h"
 #include "cgs_common.h"
 #include "vega12_inc.h"
-#include "pp_soc15.h"
 #include "pppcielanes.h"
 #include "vega12_hwmgr.h"
 #include "vega12_processpptables.h"
@@ -546,6 +545,7 @@
 			return -EINVAL);
 
 		dpm_table->dpm_levels[i].value = clock;
+		dpm_table->dpm_levels[i].enabled = true;
 	}
 
 	vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -565,6 +565,7 @@
 			return -EINVAL);
 
 		dpm_table->dpm_levels[i].value = clock;
+		dpm_table->dpm_levels[i].enabled = true;
 	}
 
 	vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -585,6 +586,7 @@
 			return -EINVAL);
 
 		dpm_table->dpm_levels[i].value = clock;
+		dpm_table->dpm_levels[i].enabled = true;
 	}
 
 	vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -605,6 +607,7 @@
 		return -EINVAL);
 
 		dpm_table->dpm_levels[i].value = clock;
+		dpm_table->dpm_levels[i].enabled = true;
 	}
 
 	vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -625,6 +628,7 @@
 			return -EINVAL);
 
 		dpm_table->dpm_levels[i].value = clock;
+		dpm_table->dpm_levels[i].enabled = true;
 	}
 
 	vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -645,6 +649,7 @@
 		return -EINVAL);
 
 		dpm_table->dpm_levels[i].value = clock;
+		dpm_table->dpm_levels[i].enabled = true;
 	}
 
 	vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -666,6 +671,7 @@
 			return -EINVAL);
 
 		dpm_table->dpm_levels[i].value = clock;
+		dpm_table->dpm_levels[i].enabled = true;
 	}
 
 	vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -686,6 +692,7 @@
 			return -EINVAL);
 
 		dpm_table->dpm_levels[i].value = clock;
+		dpm_table->dpm_levels[i].enabled = true;
 	}
 
 	vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -706,6 +713,7 @@
 			return -EINVAL);
 
 		dpm_table->dpm_levels[i].value = clock;
+		dpm_table->dpm_levels[i].enabled = true;
 	}
 
 	vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -726,6 +734,7 @@
 			return -EINVAL);
 
 		dpm_table->dpm_levels[i].value = clock;
+		dpm_table->dpm_levels[i].enabled = true;
 	}
 
 	vega12_init_dpm_state(&(dpm_table->dpm_state));
@@ -992,15 +1001,55 @@
 
 static int vega12_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 {
+	struct vega12_hwmgr *data = hwmgr->backend;
+	if (data->smc_state_table.gfx_boot_level !=
+			data->dpm_table.gfx_table.dpm_state.soft_min_level) {
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_SetSoftMinByFreq,
+			PPCLK_GFXCLK<<16 | data->dpm_table.gfx_table.dpm_levels[data->smc_state_table.gfx_boot_level].value);
+		data->dpm_table.gfx_table.dpm_state.soft_min_level =
+				data->smc_state_table.gfx_boot_level;
+	}
+
+	if (data->smc_state_table.mem_boot_level !=
+			data->dpm_table.mem_table.dpm_state.soft_min_level) {
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_SetSoftMinByFreq,
+			PPCLK_UCLK<<16 | data->dpm_table.mem_table.dpm_levels[data->smc_state_table.mem_boot_level].value);
+		data->dpm_table.mem_table.dpm_state.soft_min_level =
+				data->smc_state_table.mem_boot_level;
+	}
+
 	return 0;
+
 }
 
 static int vega12_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 {
+	struct vega12_hwmgr *data = hwmgr->backend;
+	if (data->smc_state_table.gfx_max_level !=
+		data->dpm_table.gfx_table.dpm_state.soft_max_level) {
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_SetSoftMaxByFreq,
+			/* plus the vale by 1 to align the resolution */
+			PPCLK_GFXCLK<<16 | (data->dpm_table.gfx_table.dpm_levels[data->smc_state_table.gfx_max_level].value + 1));
+		data->dpm_table.gfx_table.dpm_state.soft_max_level =
+				data->smc_state_table.gfx_max_level;
+	}
+
+	if (data->smc_state_table.mem_max_level !=
+		data->dpm_table.mem_table.dpm_state.soft_max_level) {
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_SetSoftMaxByFreq,
+			/* plus the vale by 1 to align the resolution */
+			PPCLK_UCLK<<16 | (data->dpm_table.mem_table.dpm_levels[data->smc_state_table.mem_max_level].value + 1));
+		data->dpm_table.mem_table.dpm_state.soft_max_level =
+				data->smc_state_table.mem_max_level;
+	}
+
 	return 0;
 }
 
-
 int vega12_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable)
 {
 	struct vega12_hwmgr *data =
@@ -1064,8 +1113,7 @@
 	return (mem_clk * 100);
 }
 
-static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr,
-		struct pp_gpu_power *query)
+static int vega12_get_gpu_power(struct pp_hwmgr *hwmgr, uint32_t *query)
 {
 #if 0
 	uint32_t value;
@@ -1077,7 +1125,7 @@
 
 	vega12_read_arg_from_smc(hwmgr, &value);
 	/* power value is an integer */
-	query->average_gpu_power = value << 8;
+	*query = value << 8;
 #endif
 	return 0;
 }
@@ -1186,12 +1234,8 @@
 		*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_GPU_POWER:
-		if (*size < sizeof(struct pp_gpu_power))
-			ret = -EINVAL;
-		else {
-			*size = sizeof(struct pp_gpu_power);
-			ret = vega12_get_gpu_power(hwmgr, (struct pp_gpu_power *)value);
-		}
+		ret = vega12_get_gpu_power(hwmgr, (uint32_t *)value);
+
 		break;
 	default:
 		ret = -EINVAL;
@@ -1260,23 +1304,18 @@
 {
 	struct vega12_hwmgr *data =
 			(struct vega12_hwmgr *)(hwmgr->backend);
-	uint32_t num_active_disps = 0;
-	struct cgs_display_info info = {0};
 	struct PP_Clocks min_clocks = {0};
 	struct pp_display_clock_request clock_req;
 	uint32_t clk_request;
 
-	info.mode_info = NULL;
-	cgs_get_active_displays_info(hwmgr->device, &info);
-	num_active_disps = info.display_count;
-	if (num_active_disps > 1)
+	if (hwmgr->display_config->num_display > 1)
 		vega12_notify_smc_display_change(hwmgr, false);
 	else
 		vega12_notify_smc_display_change(hwmgr, true);
 
-	min_clocks.dcefClock = hwmgr->display_config.min_dcef_set_clk;
-	min_clocks.dcefClockInSR = hwmgr->display_config.min_dcef_deep_sleep_set_clk;
-	min_clocks.memoryClock = hwmgr->display_config.min_mem_set_clock;
+	min_clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk;
+	min_clocks.dcefClockInSR = hwmgr->display_config->min_dcef_deep_sleep_set_clk;
+	min_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock;
 
 	if (data->smu_features[GNLD_DPM_DCEFCLK].supported) {
 		clock_req.clock_type = amd_pp_dcef_clock;
@@ -1832,9 +1871,7 @@
 {
 	struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
 	int result = 0;
-	uint32_t num_turned_on_displays = 1;
 	Watermarks_t *wm_table = &(data->smc_state_table.water_marks_table);
-	struct cgs_display_info info = {0};
 
 	if ((data->water_marks_bitmap & WaterMarksExist) &&
 			!(data->water_marks_bitmap & WaterMarksLoaded)) {
@@ -1846,12 +1883,9 @@
 
 	if ((data->water_marks_bitmap & WaterMarksExist) &&
 		data->smu_features[GNLD_DPM_DCEFCLK].supported &&
-		data->smu_features[GNLD_DPM_SOCCLK].supported) {
-		cgs_get_active_displays_info(hwmgr->device, &info);
-		num_turned_on_displays = info.display_count;
+		data->smu_features[GNLD_DPM_SOCCLK].supported)
 		smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_NumOfDisplays, num_turned_on_displays);
-	}
+			PPSMC_MSG_NumOfDisplays, hwmgr->display_config->num_display);
 
 	return result;
 }
@@ -1894,15 +1928,12 @@
 {
 	struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
 	bool is_update_required = false;
-	struct cgs_display_info info = {0, 0, NULL};
 
-	cgs_get_active_displays_info(hwmgr->device, &info);
-
-	if (data->display_timing.num_existing_displays != info.display_count)
+	if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
 		is_update_required = true;
 
 	if (data->registry_data.gfx_clk_deep_sleep_support) {
-		if (data->display_timing.min_clock_in_sr != hwmgr->display_config.min_core_set_clock_in_sr)
+		if (data->display_timing.min_clock_in_sr != hwmgr->display_config->min_core_set_clock_in_sr)
 			is_update_required = true;
 	}
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
index bc98b1d..e81ded1 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
@@ -33,7 +33,7 @@
 #define WaterMarksExist  1
 #define WaterMarksLoaded 2
 
-#define VG12_PSUEDO_NUM_GFXCLK_DPM_LEVELS   8
+#define VG12_PSUEDO_NUM_GFXCLK_DPM_LEVELS   16
 #define VG12_PSUEDO_NUM_SOCCLK_DPM_LEVELS   8
 #define VG12_PSUEDO_NUM_DCEFCLK_DPM_LEVELS  8
 #define VG12_PSUEDO_NUM_UCLK_DPM_LEVELS     4
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c
index b34113f..888ddca 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c
@@ -51,7 +51,7 @@
 
 	if (!table_address) {
 		table_address = (ATOM_Vega12_POWERPLAYTABLE *)
-				cgs_atom_get_data_table(hwmgr->device, index,
+				smu_atom_get_data_table(hwmgr->adev, index,
 						&size, &frev, &crev);
 
 		hwmgr->soft_pp_table = table_address;	/*Cache the result in RAM.*/
@@ -224,6 +224,11 @@
 	ppsmc_pptable->AcgGfxclkSpreadPercent = smc_dpm_table.acggfxclkspreadpercent;
 	ppsmc_pptable->AcgGfxclkSpreadFreq = smc_dpm_table.acggfxclkspreadfreq;
 
+	/* 0xFFFF will disable the ACG feature */
+	if (!(hwmgr->feature_mask & PP_ACG_MASK)) {
+		ppsmc_pptable->AcgThresholdFreqHigh = 0xFFFF;
+		ppsmc_pptable->AcgThresholdFreqLow = 0xFFFF;
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c
index df0fa81..cfd9e6c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_thermal.c
@@ -26,7 +26,7 @@
 #include "vega12_smumgr.h"
 #include "vega12_ppsmc.h"
 #include "vega12_inc.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
 #include "pp_debug.h"
 
 static int vega12_get_current_rpm(struct pp_hwmgr *hwmgr, uint32_t *current_rpm)
@@ -147,13 +147,10 @@
 */
 int vega12_thermal_get_temperature(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	int temp = 0;
-	uint32_t reg;
 
-	reg = soc15_get_register_offset(THM_HWID, 0,
-			mmCG_MULT_THERMAL_STATUS_BASE_IDX,  mmCG_MULT_THERMAL_STATUS);
-
-	temp = cgs_read_register(hwmgr->device, reg);
+	temp = RREG32_SOC15(THM, 0, mmCG_MULT_THERMAL_STATUS);
 
 	temp = (temp & CG_MULT_THERMAL_STATUS__CTF_TEMP_MASK) >>
 			CG_MULT_THERMAL_STATUS__CTF_TEMP__SHIFT;
@@ -175,11 +172,12 @@
 static int vega12_thermal_set_temperature_range(struct pp_hwmgr *hwmgr,
 		struct PP_TemperatureRange *range)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	int low = VEGA12_THERMAL_MINIMUM_ALERT_TEMP *
 			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	int high = VEGA12_THERMAL_MAXIMUM_ALERT_TEMP *
 			PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
-	uint32_t val, reg;
+	uint32_t val;
 
 	if (low < range->min)
 		low = range->min;
@@ -189,18 +187,15 @@
 	if (low > high)
 		return -EINVAL;
 
-	reg = soc15_get_register_offset(THM_HWID, 0,
-			mmTHM_THERMAL_INT_CTRL_BASE_IDX, mmTHM_THERMAL_INT_CTRL);
+	val = RREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL);
 
-	val = cgs_read_register(hwmgr->device, reg);
-
-	val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5);
-	val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1);
-	val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
-	val = CGS_REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
+	val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, MAX_IH_CREDIT, 5);
+	val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, THERM_IH_HW_ENA, 1);
+	val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTH, (high / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
+	val = REG_SET_FIELD(val, THM_THERMAL_INT_CTRL, DIG_THERM_INTL, (low / PP_TEMPERATURE_UNITS_PER_CENTIGRADES));
 	val = val & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
 
-	cgs_write_register(hwmgr->device, reg, val);
+	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_CTRL, val);
 
 	return 0;
 }
@@ -212,15 +207,14 @@
 */
 static int vega12_thermal_enable_alert(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	uint32_t val = 0;
-	uint32_t reg;
 
 	val |= (1 << THM_THERMAL_INT_ENA__THERM_INTH_CLR__SHIFT);
 	val |= (1 << THM_THERMAL_INT_ENA__THERM_INTL_CLR__SHIFT);
 	val |= (1 << THM_THERMAL_INT_ENA__THERM_TRIGGER_CLR__SHIFT);
 
-	reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA);
-	cgs_write_register(hwmgr->device, reg, val);
+	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, val);
 
 	return 0;
 }
@@ -231,10 +225,9 @@
 */
 int vega12_thermal_disable_alert(struct pp_hwmgr *hwmgr)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
-	reg = soc15_get_register_offset(THM_HWID, 0, mmTHM_THERMAL_INT_ENA_BASE_IDX, mmTHM_THERMAL_INT_ENA);
-	cgs_write_register(hwmgr->device, reg, 0);
+	WREG32_SOC15(THM, 0, mmTHM_THERMAL_INT_ENA, 0);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 8b78bbe..a202247 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -377,11 +377,7 @@
 #define DPMTABLE_UPDATE_SCLK        0x00000004
 #define DPMTABLE_UPDATE_MCLK        0x00000008
 #define DPMTABLE_OD_UPDATE_VDDC     0x00000010
-
-/* To determine if sclk and mclk are in overdrive state */
-#define SCLK_OVERDRIVE_ENABLED           0x00000001
-#define MCLK_OVERDRIVE_ENABLED           0x00000002
-#define VDDC_OVERDRIVE_ENABLED           0x00000010
+#define DPMTABLE_UPDATE_SOCCLK      0x00000020
 
 struct phm_odn_performance_level {
 	uint32_t clock;
@@ -414,7 +410,10 @@
 				   struct pp_power_state *adjusted_ps,
 			     const struct pp_power_state *current_ps);
 
+extern int phm_apply_clock_adjust_rules(struct pp_hwmgr *hwmgr);
+
 extern int phm_force_dpm_levels(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level);
+extern int phm_pre_display_configuration_changed(struct pp_hwmgr *hwmgr);
 extern int phm_display_configuration_changed(struct pp_hwmgr *hwmgr);
 extern int phm_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwmgr);
 extern int phm_register_irq_handlers(struct pp_hwmgr *hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 17f811d..b99fb8a 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -38,6 +38,8 @@
 struct pp_atomctrl_voltage_table;
 
 #define VOLTAGE_SCALE 4
+#define VOLTAGE_VID_OFFSET_SCALE1   625
+#define VOLTAGE_VID_OFFSET_SCALE2   100
 
 enum DISPLAY_GAP {
 	DISPLAY_GAP_VBLANK_OR_WM = 0,   /* Wait for vblank or MCHG watermark. */
@@ -64,24 +66,6 @@
 #define PCIE_PERF_REQ_GEN2         3
 #define PCIE_PERF_REQ_GEN3         4
 
-enum PP_FEATURE_MASK {
-	PP_SCLK_DPM_MASK = 0x1,
-	PP_MCLK_DPM_MASK = 0x2,
-	PP_PCIE_DPM_MASK = 0x4,
-	PP_SCLK_DEEP_SLEEP_MASK = 0x8,
-	PP_POWER_CONTAINMENT_MASK = 0x10,
-	PP_UVD_HANDSHAKE_MASK = 0x20,
-	PP_SMC_VOLTAGE_CONTROL_MASK = 0x40,
-	PP_VBI_TIME_SUPPORT_MASK = 0x80,
-	PP_ULV_MASK = 0x100,
-	PP_ENABLE_GFX_CG_THRU_SMU = 0x200,
-	PP_CLOCK_STRETCH_MASK = 0x400,
-	PP_OD_FUZZY_FAN_CONTROL_MASK = 0x800,
-	PP_SOCCLK_DPM_MASK = 0x1000,
-	PP_DCEFCLK_DPM_MASK = 0x2000,
-	PP_OVERDRIVE_MASK = 0x4000,
-};
-
 enum PHM_BackEnd_Magic {
 	PHM_Dummy_Magic       = 0xAA5555AA,
 	PHM_RV770_Magic       = 0xDCBAABCD,
@@ -245,6 +229,8 @@
 				struct pp_power_state  *prequest_ps,
 			const struct pp_power_state *pcurrent_ps);
 
+	int (*apply_clocks_adjust_rules)(struct pp_hwmgr *hwmgr);
+
 	int (*force_dpm_level)(struct pp_hwmgr *hw_mgr,
 					enum amd_dpm_forced_level level);
 
@@ -268,6 +254,7 @@
 						const void *state);
 	int (*enable_clock_power_gating)(struct pp_hwmgr *hwmgr);
 	int (*notify_smc_display_config_after_ps_adjustment)(struct pp_hwmgr *hwmgr);
+	int (*pre_display_config_changed)(struct pp_hwmgr *hwmgr);
 	int (*display_config_changed)(struct pp_hwmgr *hwmgr);
 	int (*disable_clock_power_gating)(struct pp_hwmgr *hwmgr);
 	int (*update_clock_gatings)(struct pp_hwmgr *hwmgr,
@@ -312,6 +299,7 @@
 	int (*display_clock_voltage_request)(struct pp_hwmgr *hwmgr,
 			struct pp_display_clock_request *clock);
 	int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
+	int (*gfx_off_control)(struct pp_hwmgr *hwmgr, bool enable);
 	int (*power_off_asic)(struct pp_hwmgr *hwmgr);
 	int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask);
 	int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf);
@@ -341,6 +329,7 @@
 					long *input, uint32_t size);
 	int (*set_power_limit)(struct pp_hwmgr *hwmgr, uint32_t n);
 	int (*set_mmhub_powergating_by_smu)(struct pp_hwmgr *hwmgr);
+	int (*smus_notify_pwe)(struct pp_hwmgr *hwmgr);
 };
 
 struct pp_table_func {
@@ -718,6 +707,7 @@
 	uint32_t chip_family;
 	uint32_t chip_id;
 	uint32_t smu_version;
+	bool not_vf;
 	bool pm_en;
 	struct mutex smu_lock;
 
@@ -764,7 +754,7 @@
 	struct pp_power_state    *request_ps;
 	struct pp_power_state    *boot_ps;
 	struct pp_power_state    *uvd_ps;
-	struct amd_pp_display_configuration display_config;
+	const struct amd_pp_display_configuration *display_config;
 	uint32_t feature_mask;
 	bool avfs_supported;
 	/* UMD Pstate */
@@ -782,10 +772,13 @@
 };
 
 int hwmgr_early_init(struct pp_hwmgr *hwmgr);
+int hwmgr_sw_init(struct pp_hwmgr *hwmgr);
+int hwmgr_sw_fini(struct pp_hwmgr *hwmgr);
 int hwmgr_hw_init(struct pp_hwmgr *hwmgr);
 int hwmgr_hw_fini(struct pp_hwmgr *hwmgr);
-int hwmgr_hw_suspend(struct pp_hwmgr *hwmgr);
-int hwmgr_hw_resume(struct pp_hwmgr *hwmgr);
+int hwmgr_suspend(struct pp_hwmgr *hwmgr);
+int hwmgr_resume(struct pp_hwmgr *hwmgr);
+
 int hwmgr_handle_task(struct pp_hwmgr *hwmgr,
 				enum amd_pp_task task_id,
 				enum amd_pm_state_type *user_state);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h b/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h
deleted file mode 100644
index 214f370..0000000
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_soc15.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright 2016 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#ifndef PP_SOC15_H
-#define PP_SOC15_H
-
-#include "soc15_hw_ip.h"
-#include "vega10_ip_offset.h"
-
-inline static uint32_t soc15_get_register_offset(
-		uint32_t hw_id,
-		uint32_t inst,
-		uint32_t segment,
-		uint32_t offset)
-{
-	uint32_t reg = 0;
-
-	if (hw_id == THM_HWID)
-		reg = THM_BASE.instance[inst].segment[segment] + offset;
-	else if (hw_id == NBIF_HWID)
-		reg = NBIF_BASE.instance[inst].segment[segment] + offset;
-	else if (hw_id == MP1_HWID)
-		reg = MP1_BASE.instance[inst].segment[segment] + offset;
-	else if (hw_id == DF_HWID)
-		reg = DF_BASE.instance[inst].segment[segment] + offset;
-	else if (hw_id == GC_HWID)
-		reg = GC_BASE.instance[inst].segment[segment] + offset;
-	else if (hw_id == SMUIO_HWID)
-		reg = SMUIO_BASE.instance[inst].segment[segment] + offset;
-	return reg;
-}
-
-#endif
diff --git a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h
index 426bff2..a2991fa 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/rv_ppsmc.h
@@ -75,13 +75,15 @@
 #define PPSMC_MSG_GetMinGfxclkFrequency         0x2C
 #define PPSMC_MSG_GetMaxGfxclkFrequency         0x2D
 #define PPSMC_MSG_SoftReset                     0x2E
+#define PPSMC_MSG_SetGfxCGPG			0x2F
 #define PPSMC_MSG_SetSoftMaxGfxClk              0x30
 #define PPSMC_MSG_SetHardMinGfxClk              0x31
 #define PPSMC_MSG_SetSoftMaxSocclkByFreq        0x32
 #define PPSMC_MSG_SetSoftMaxFclkByFreq          0x33
 #define PPSMC_MSG_SetSoftMaxVcn                 0x34
 #define PPSMC_MSG_PowerGateMmHub                0x35
-#define PPSMC_Message_Count                     0x36
+#define PPSMC_MSG_SetRccPfcPmeRestoreRegister   0x36
+#define PPSMC_Message_Count                     0x37
 
 
 typedef uint16_t PPSMC_Result;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu75.h b/drivers/gpu/drm/amd/powerplay/inc/smu75.h
new file mode 100644
index 0000000..7715230
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu75.h
@@ -0,0 +1,760 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef SMU75_H
+#define SMU75_H
+
+#pragma pack(push, 1)
+
+typedef struct {
+	uint32_t high;
+	uint32_t low;
+} data_64_t;
+
+typedef struct {
+	data_64_t high;
+	data_64_t low;
+} data_128_t;
+
+#define SMU__DGPU_ONLY
+
+#define SMU__NUM_SCLK_DPM_STATE  8
+#define SMU__NUM_MCLK_DPM_LEVELS 4
+#define SMU__NUM_LCLK_DPM_LEVELS 8
+#define SMU__NUM_PCIE_DPM_LEVELS 8
+
+#define SMU7_CONTEXT_ID_SMC        1
+#define SMU7_CONTEXT_ID_VBIOS      2
+
+#define SMU75_MAX_LEVELS_VDDC            16
+#define SMU75_MAX_LEVELS_VDDGFX          16
+#define SMU75_MAX_LEVELS_VDDCI           8
+#define SMU75_MAX_LEVELS_MVDD            4
+
+#define SMU_MAX_SMIO_LEVELS              4
+
+#define SMU75_MAX_LEVELS_GRAPHICS        SMU__NUM_SCLK_DPM_STATE
+#define SMU75_MAX_LEVELS_MEMORY          SMU__NUM_MCLK_DPM_LEVELS
+#define SMU75_MAX_LEVELS_GIO             SMU__NUM_LCLK_DPM_LEVELS
+#define SMU75_MAX_LEVELS_LINK            SMU__NUM_PCIE_DPM_LEVELS
+#define SMU75_MAX_LEVELS_UVD             8
+#define SMU75_MAX_LEVELS_VCE             8
+#define SMU75_MAX_LEVELS_ACP             8
+#define SMU75_MAX_LEVELS_SAMU            8
+#define SMU75_MAX_ENTRIES_SMIO           32
+
+#define DPM_NO_LIMIT 0
+#define DPM_NO_UP 1
+#define DPM_GO_DOWN 2
+#define DPM_GO_UP 3
+
+#define SMU7_FIRST_DPM_GRAPHICS_LEVEL    0
+#define SMU7_FIRST_DPM_MEMORY_LEVEL      0
+
+#define GPIO_CLAMP_MODE_VRHOT      1
+#define GPIO_CLAMP_MODE_THERM      2
+#define GPIO_CLAMP_MODE_DC         4
+
+#define SCRATCH_B_TARG_PCIE_INDEX_SHIFT 0
+#define SCRATCH_B_TARG_PCIE_INDEX_MASK  (0x7<<SCRATCH_B_TARG_PCIE_INDEX_SHIFT)
+#define SCRATCH_B_CURR_PCIE_INDEX_SHIFT 3
+#define SCRATCH_B_CURR_PCIE_INDEX_MASK  (0x7<<SCRATCH_B_CURR_PCIE_INDEX_SHIFT)
+#define SCRATCH_B_TARG_UVD_INDEX_SHIFT  6
+#define SCRATCH_B_TARG_UVD_INDEX_MASK   (0x7<<SCRATCH_B_TARG_UVD_INDEX_SHIFT)
+#define SCRATCH_B_CURR_UVD_INDEX_SHIFT  9
+#define SCRATCH_B_CURR_UVD_INDEX_MASK   (0x7<<SCRATCH_B_CURR_UVD_INDEX_SHIFT)
+#define SCRATCH_B_TARG_VCE_INDEX_SHIFT  12
+#define SCRATCH_B_TARG_VCE_INDEX_MASK   (0x7<<SCRATCH_B_TARG_VCE_INDEX_SHIFT)
+#define SCRATCH_B_CURR_VCE_INDEX_SHIFT  15
+#define SCRATCH_B_CURR_VCE_INDEX_MASK   (0x7<<SCRATCH_B_CURR_VCE_INDEX_SHIFT)
+#define SCRATCH_B_TARG_ACP_INDEX_SHIFT  18
+#define SCRATCH_B_TARG_ACP_INDEX_MASK   (0x7<<SCRATCH_B_TARG_ACP_INDEX_SHIFT)
+#define SCRATCH_B_CURR_ACP_INDEX_SHIFT  21
+#define SCRATCH_B_CURR_ACP_INDEX_MASK   (0x7<<SCRATCH_B_CURR_ACP_INDEX_SHIFT)
+#define SCRATCH_B_TARG_SAMU_INDEX_SHIFT 24
+#define SCRATCH_B_TARG_SAMU_INDEX_MASK  (0x7<<SCRATCH_B_TARG_SAMU_INDEX_SHIFT)
+#define SCRATCH_B_CURR_SAMU_INDEX_SHIFT 27
+#define SCRATCH_B_CURR_SAMU_INDEX_MASK  (0x7<<SCRATCH_B_CURR_SAMU_INDEX_SHIFT)
+
+/* Virtualization Defines */
+#define CG_XDMA_MASK  0x1
+#define CG_XDMA_SHIFT 0
+#define CG_UVD_MASK   0x2
+#define CG_UVD_SHIFT  1
+#define CG_VCE_MASK   0x4
+#define CG_VCE_SHIFT  2
+#define CG_SAMU_MASK  0x8
+#define CG_SAMU_SHIFT 3
+#define CG_GFX_MASK   0x10
+#define CG_GFX_SHIFT  4
+#define CG_SDMA_MASK  0x20
+#define CG_SDMA_SHIFT 5
+#define CG_HDP_MASK   0x40
+#define CG_HDP_SHIFT  6
+#define CG_MC_MASK    0x80
+#define CG_MC_SHIFT   7
+#define CG_DRM_MASK   0x100
+#define CG_DRM_SHIFT  8
+#define CG_ROM_MASK   0x200
+#define CG_ROM_SHIFT  9
+#define CG_BIF_MASK   0x400
+#define CG_BIF_SHIFT  10
+
+#if defined SMU__DGPU_ONLY
+#define SMU75_DTE_ITERATIONS 5
+#define SMU75_DTE_SOURCES 3
+#define SMU75_DTE_SINKS 1
+#define SMU75_NUM_CPU_TES 0
+#define SMU75_NUM_GPU_TES 1
+#define SMU75_NUM_NON_TES 2
+#define SMU75_DTE_FAN_SCALAR_MIN 0x100
+#define SMU75_DTE_FAN_SCALAR_MAX 0x166
+#define SMU75_DTE_FAN_TEMP_MAX 93
+#define SMU75_DTE_FAN_TEMP_MIN 83
+#endif
+#define SMU75_THERMAL_INPUT_LOOP_COUNT 2
+#define SMU75_THERMAL_CLAMP_MODE_COUNT 2
+
+#define EXP_M1_1  93
+#define EXP_M2_1  195759
+#define EXP_B_1   111176531
+
+#define EXP_M1_2  67
+#define EXP_M2_2  153720
+#define EXP_B_2   94415767
+
+#define EXP_M1_3  48
+#define EXP_M2_3  119796
+#define EXP_B_3   79195279
+
+#define EXP_M1_4  550
+#define EXP_M2_4  1484190
+#define EXP_B_4   1051432828
+
+#define EXP_M1_5  394
+#define EXP_M2_5  1143049
+#define EXP_B_5   864288432
+
+struct SMU7_HystController_Data {
+	uint16_t waterfall_up;
+	uint16_t waterfall_down;
+	uint16_t waterfall_limit;
+	uint16_t release_cnt;
+	uint16_t release_limit;
+	uint16_t spare;
+};
+
+typedef struct SMU7_HystController_Data SMU7_HystController_Data;
+
+struct SMU75_PIDController {
+	uint32_t Ki;
+	int32_t LFWindupUpperLim;
+	int32_t LFWindupLowerLim;
+	uint32_t StatePrecision;
+	uint32_t LfPrecision;
+	uint32_t LfOffset;
+	uint32_t MaxState;
+	uint32_t MaxLfFraction;
+	uint32_t StateShift;
+};
+
+typedef struct SMU75_PIDController SMU75_PIDController;
+
+struct SMU7_LocalDpmScoreboard {
+	uint32_t PercentageBusy;
+
+	int32_t  PIDError;
+	int32_t  PIDIntegral;
+	int32_t  PIDOutput;
+
+	uint32_t SigmaDeltaAccum;
+	uint32_t SigmaDeltaOutput;
+	uint32_t SigmaDeltaLevel;
+
+	uint32_t UtilizationSetpoint;
+
+	uint8_t  TdpClampMode;
+	uint8_t  TdcClampMode;
+	uint8_t  ThermClampMode;
+	uint8_t  VoltageBusy;
+
+	int8_t   CurrLevel;
+	int8_t   TargLevel;
+	uint8_t  LevelChangeInProgress;
+	uint8_t  UpHyst;
+
+	uint8_t  DownHyst;
+	uint8_t  VoltageDownHyst;
+	uint8_t  DpmEnable;
+	uint8_t  DpmRunning;
+
+	uint8_t  DpmForce;
+	uint8_t  DpmForceLevel;
+	uint8_t  DisplayWatermark;
+	uint8_t  McArbIndex;
+
+	uint32_t MinimumPerfSclk;
+
+	uint8_t  AcpiReq;
+	uint8_t  AcpiAck;
+	uint8_t  GfxClkSlow;
+	uint8_t  GpioClampMode;
+
+	uint8_t  EnableModeSwitchRLCNotification;
+	uint8_t  EnabledLevelsChange;
+	uint8_t  DteClampMode;
+	uint8_t  FpsClampMode;
+
+	uint16_t LevelResidencyCounters [SMU75_MAX_LEVELS_GRAPHICS];
+	uint16_t LevelSwitchCounters [SMU75_MAX_LEVELS_GRAPHICS];
+
+	void     (*TargetStateCalculator)(uint8_t);
+	void     (*SavedTargetStateCalculator)(uint8_t);
+
+	uint16_t AutoDpmInterval;
+	uint16_t AutoDpmRange;
+
+	uint8_t  FpsEnabled;
+	uint8_t  MaxPerfLevel;
+	uint8_t  AllowLowClkInterruptToHost;
+	uint8_t  FpsRunning;
+
+	uint32_t MaxAllowedFrequency;
+
+	uint32_t FilteredSclkFrequency;
+	uint32_t LastSclkFrequency;
+	uint32_t FilteredSclkFrequencyCnt;
+
+	uint8_t MinPerfLevel;
+#ifdef SMU__FIRMWARE_SCKS_PRESENT__1
+	uint8_t ScksClampMode;
+	uint8_t padding[2];
+#else
+	uint8_t padding[3];
+#endif
+
+	uint16_t FpsAlpha;
+	uint16_t DeltaTime;
+	uint32_t CurrentFps;
+	uint32_t FilteredFps;
+	uint32_t FrameCount;
+	uint32_t FrameCountLast;
+	uint16_t FpsTargetScalar;
+	uint16_t FpsWaterfallLimitScalar;
+	uint16_t FpsAlphaScalar;
+	uint16_t spare8;
+	SMU7_HystController_Data HystControllerData;
+};
+
+typedef struct SMU7_LocalDpmScoreboard SMU7_LocalDpmScoreboard;
+
+#define SMU7_MAX_VOLTAGE_CLIENTS 12
+
+typedef uint8_t (*VoltageChangeHandler_t)(uint16_t, uint8_t);
+
+#define VDDC_MASK    0x00007FFF
+#define VDDC_SHIFT   0
+#define VDDCI_MASK   0x3FFF8000
+#define VDDCI_SHIFT  15
+#define PHASES_MASK  0xC0000000
+#define PHASES_SHIFT 30
+
+typedef uint32_t SMU_VoltageLevel;
+
+struct SMU7_VoltageScoreboard {
+	SMU_VoltageLevel TargetVoltage;
+	uint16_t MaxVid;
+	uint8_t  HighestVidOffset;
+	uint8_t  CurrentVidOffset;
+
+	uint16_t CurrentVddc;
+	uint16_t CurrentVddci;
+
+	uint8_t  ControllerBusy;
+	uint8_t  CurrentVid;
+	uint8_t  CurrentVddciVid;
+	uint8_t  padding;
+
+	SMU_VoltageLevel RequestedVoltage[SMU7_MAX_VOLTAGE_CLIENTS];
+	SMU_VoltageLevel TargetVoltageState;
+	uint8_t  EnabledRequest[SMU7_MAX_VOLTAGE_CLIENTS];
+
+	uint8_t  padding2;
+	uint8_t  padding3;
+	uint8_t  ControllerEnable;
+	uint8_t  ControllerRunning;
+	uint16_t CurrentStdVoltageHiSidd;
+	uint16_t CurrentStdVoltageLoSidd;
+	uint8_t  OverrideVoltage;
+	uint8_t  padding4;
+	uint8_t  padding5;
+	uint8_t  CurrentPhases;
+
+	VoltageChangeHandler_t ChangeVddc;
+	VoltageChangeHandler_t ChangeVddci;
+	VoltageChangeHandler_t ChangePhase;
+	VoltageChangeHandler_t ChangeMvdd;
+
+	VoltageChangeHandler_t functionLinks[6];
+
+	uint16_t * VddcFollower1;
+	int16_t  Driver_OD_RequestedVidOffset1;
+	int16_t  Driver_OD_RequestedVidOffset2;
+};
+
+typedef struct SMU7_VoltageScoreboard SMU7_VoltageScoreboard;
+
+#define SMU7_MAX_PCIE_LINK_SPEEDS 3
+
+struct SMU7_PCIeLinkSpeedScoreboard {
+	uint8_t     DpmEnable;
+	uint8_t     DpmRunning;
+	uint8_t     DpmForce;
+	uint8_t     DpmForceLevel;
+
+	uint8_t     CurrentLinkSpeed;
+	uint8_t     EnabledLevelsChange;
+	uint16_t    AutoDpmInterval;
+
+	uint16_t    AutoDpmRange;
+	uint16_t    AutoDpmCount;
+
+	uint8_t     DpmMode;
+	uint8_t     AcpiReq;
+	uint8_t     AcpiAck;
+	uint8_t     CurrentLinkLevel;
+};
+
+typedef struct SMU7_PCIeLinkSpeedScoreboard SMU7_PCIeLinkSpeedScoreboard;
+
+#define SMU7_LKGE_LUT_NUM_OF_TEMP_ENTRIES 16
+#define SMU7_LKGE_LUT_NUM_OF_VOLT_ENTRIES 16
+
+#define SMU7_SCALE_I  7
+#define SMU7_SCALE_R 12
+
+struct SMU7_PowerScoreboard {
+	uint32_t GpuPower;
+
+	uint32_t VddcPower;
+	uint32_t VddcVoltage;
+	uint32_t VddcCurrent;
+
+	uint32_t VddciPower;
+	uint32_t VddciVoltage;
+	uint32_t VddciCurrent;
+
+	uint32_t RocPower;
+
+	uint16_t Telemetry_1_slope;
+	uint16_t Telemetry_2_slope;
+	int32_t  Telemetry_1_offset;
+	int32_t  Telemetry_2_offset;
+
+	uint8_t MCLK_patch_flag;
+	uint8_t reserved[3];
+};
+
+typedef struct SMU7_PowerScoreboard SMU7_PowerScoreboard;
+
+#define SMU7_SCLK_DPM_CONFIG_MASK                        0x01
+#define SMU7_VOLTAGE_CONTROLLER_CONFIG_MASK              0x02
+#define SMU7_THERMAL_CONTROLLER_CONFIG_MASK              0x04
+#define SMU7_MCLK_DPM_CONFIG_MASK                        0x08
+#define SMU7_UVD_DPM_CONFIG_MASK                         0x10
+#define SMU7_VCE_DPM_CONFIG_MASK                         0x20
+#define SMU7_ACP_DPM_CONFIG_MASK                         0x40
+#define SMU7_SAMU_DPM_CONFIG_MASK                        0x80
+#define SMU7_PCIEGEN_DPM_CONFIG_MASK                    0x100
+
+#define SMU7_ACP_MCLK_HANDSHAKE_DISABLE                  0x00000001
+#define SMU7_ACP_SCLK_HANDSHAKE_DISABLE                  0x00000002
+#define SMU7_UVD_MCLK_HANDSHAKE_DISABLE                  0x00000100
+#define SMU7_UVD_SCLK_HANDSHAKE_DISABLE                  0x00000200
+#define SMU7_VCE_MCLK_HANDSHAKE_DISABLE                  0x00010000
+#define SMU7_VCE_SCLK_HANDSHAKE_DISABLE                  0x00020000
+
+struct SMU75_SoftRegisters {
+	uint32_t        RefClockFrequency;
+	uint32_t        PmTimerPeriod;
+	uint32_t        FeatureEnables;
+#if defined (SMU__DGPU_ONLY)
+	uint32_t        PreVBlankGap;
+	uint32_t        VBlankTimeout;
+	uint32_t        TrainTimeGap;
+	uint32_t        MvddSwitchTime;
+	uint32_t        LongestAcpiTrainTime;
+	uint32_t        AcpiDelay;
+	uint32_t        G5TrainTime;
+	uint32_t        DelayMpllPwron;
+	uint32_t        VoltageChangeTimeout;
+#endif
+	uint32_t        HandshakeDisables;
+
+	uint8_t         DisplayPhy1Config;
+	uint8_t         DisplayPhy2Config;
+	uint8_t         DisplayPhy3Config;
+	uint8_t         DisplayPhy4Config;
+
+	uint8_t         DisplayPhy5Config;
+	uint8_t         DisplayPhy6Config;
+	uint8_t         DisplayPhy7Config;
+	uint8_t         DisplayPhy8Config;
+
+	uint32_t        AverageGraphicsActivity;
+	uint32_t        AverageMemoryActivity;
+	uint32_t        AverageGioActivity;
+
+	uint8_t         SClkDpmEnabledLevels;
+	uint8_t         MClkDpmEnabledLevels;
+	uint8_t         LClkDpmEnabledLevels;
+	uint8_t         PCIeDpmEnabledLevels;
+
+	uint8_t         UVDDpmEnabledLevels;
+	uint8_t         SAMUDpmEnabledLevels;
+	uint8_t         ACPDpmEnabledLevels;
+	uint8_t         VCEDpmEnabledLevels;
+
+	uint32_t        DRAM_LOG_ADDR_H;
+	uint32_t        DRAM_LOG_ADDR_L;
+	uint32_t        DRAM_LOG_PHY_ADDR_H;
+	uint32_t        DRAM_LOG_PHY_ADDR_L;
+	uint32_t        DRAM_LOG_BUFF_SIZE;
+	uint32_t        UlvEnterCount;
+	uint32_t        UlvTime;
+	uint32_t        UcodeLoadStatus;
+	uint32_t        AllowMvddSwitch;
+	uint8_t         Activity_Weight;
+	uint8_t         Reserved8[3];
+};
+
+typedef struct SMU75_SoftRegisters SMU75_SoftRegisters;
+
+struct SMU75_Firmware_Header {
+	uint32_t Digest[5];
+	uint32_t Version;
+	uint32_t HeaderSize;
+	uint32_t Flags;
+	uint32_t EntryPoint;
+	uint32_t CodeSize;
+	uint32_t ImageSize;
+
+	uint32_t Rtos;
+	uint32_t SoftRegisters;
+	uint32_t DpmTable;
+	uint32_t FanTable;
+	uint32_t CacConfigTable;
+	uint32_t CacStatusTable;
+	uint32_t mcRegisterTable;
+	uint32_t mcArbDramTimingTable;
+	uint32_t PmFuseTable;
+	uint32_t Globals;
+	uint32_t ClockStretcherTable;
+	uint32_t VftTable;
+	uint32_t Reserved1;
+	uint32_t AvfsCksOff_AvfsGbvTable;
+	uint32_t AvfsCksOff_BtcGbvTable;
+	uint32_t MM_AvfsTable;
+	uint32_t PowerSharingTable;
+	uint32_t AvfsTable;
+	uint32_t AvfsCksOffGbvTable;
+	uint32_t AvfsMeanNSigma;
+	uint32_t AvfsSclkOffsetTable;
+	uint32_t Reserved[12];
+	uint32_t Signature;
+};
+
+typedef struct SMU75_Firmware_Header SMU75_Firmware_Header;
+
+#define SMU7_FIRMWARE_HEADER_LOCATION 0x20000
+
+enum  DisplayConfig {
+	PowerDown = 1,
+	DP54x4,
+	DP54x2,
+	DP54x1,
+	DP27x4,
+	DP27x2,
+	DP27x1,
+	HDMI297,
+	HDMI162,
+	LVDS,
+	DP324x4,
+	DP324x2,
+	DP324x1
+};
+
+#define MC_BLOCK_COUNT 1
+#define CPL_BLOCK_COUNT 5
+#define SE_BLOCK_COUNT 15
+#define GC_BLOCK_COUNT 24
+
+struct SMU7_Local_Cac {
+	uint8_t BlockId;
+	uint8_t SignalId;
+	uint8_t Threshold;
+	uint8_t Padding;
+};
+
+typedef struct SMU7_Local_Cac SMU7_Local_Cac;
+
+struct SMU7_Local_Cac_Table {
+	SMU7_Local_Cac CplLocalCac[CPL_BLOCK_COUNT];
+	SMU7_Local_Cac McLocalCac[MC_BLOCK_COUNT];
+	SMU7_Local_Cac SeLocalCac[SE_BLOCK_COUNT];
+	SMU7_Local_Cac GcLocalCac[GC_BLOCK_COUNT];
+};
+
+typedef struct SMU7_Local_Cac_Table SMU7_Local_Cac_Table;
+
+#pragma pack(pop)
+
+#define CG_SYS_BITMASK_FIRST_BIT      0
+#define CG_SYS_BITMASK_LAST_BIT       10
+#define CG_SYS_BIF_MGLS_SHIFT         0
+#define CG_SYS_ROM_SHIFT              1
+#define CG_SYS_MC_MGCG_SHIFT          2
+#define CG_SYS_MC_MGLS_SHIFT          3
+#define CG_SYS_SDMA_MGCG_SHIFT        4
+#define CG_SYS_SDMA_MGLS_SHIFT        5
+#define CG_SYS_DRM_MGCG_SHIFT         6
+#define CG_SYS_HDP_MGCG_SHIFT         7
+#define CG_SYS_HDP_MGLS_SHIFT         8
+#define CG_SYS_DRM_MGLS_SHIFT         9
+#define CG_SYS_BIF_MGCG_SHIFT         10
+
+#define CG_SYS_BIF_MGLS_MASK          0x1
+#define CG_SYS_ROM_MASK               0x2
+#define CG_SYS_MC_MGCG_MASK           0x4
+#define CG_SYS_MC_MGLS_MASK           0x8
+#define CG_SYS_SDMA_MGCG_MASK         0x10
+#define CG_SYS_SDMA_MGLS_MASK         0x20
+#define CG_SYS_DRM_MGCG_MASK          0x40
+#define CG_SYS_HDP_MGCG_MASK          0x80
+#define CG_SYS_HDP_MGLS_MASK          0x100
+#define CG_SYS_DRM_MGLS_MASK          0x200
+#define CG_SYS_BIF_MGCG_MASK          0x400
+
+#define CG_GFX_BITMASK_FIRST_BIT      16
+#define CG_GFX_BITMASK_LAST_BIT       24
+
+#define CG_GFX_CGCG_SHIFT             16
+#define CG_GFX_CGLS_SHIFT             17
+#define CG_CPF_MGCG_SHIFT             18
+#define CG_RLC_MGCG_SHIFT             19
+#define CG_GFX_OTHERS_MGCG_SHIFT      20
+#define CG_GFX_3DCG_SHIFT             21
+#define CG_GFX_3DLS_SHIFT             22
+#define CG_GFX_RLC_LS_SHIFT           23
+#define CG_GFX_CP_LS_SHIFT            24
+
+#define CG_GFX_CGCG_MASK              0x00010000
+#define CG_GFX_CGLS_MASK              0x00020000
+#define CG_CPF_MGCG_MASK              0x00040000
+#define CG_RLC_MGCG_MASK              0x00080000
+#define CG_GFX_OTHERS_MGCG_MASK       0x00100000
+#define CG_GFX_3DCG_MASK              0x00200000
+#define CG_GFX_3DLS_MASK              0x00400000
+#define CG_GFX_RLC_LS_MASK            0x00800000
+#define CG_GFX_CP_LS_MASK             0x01000000
+
+
+#define VRCONF_VDDC_MASK         0x000000FF
+#define VRCONF_VDDC_SHIFT        0
+#define VRCONF_VDDGFX_MASK       0x0000FF00
+#define VRCONF_VDDGFX_SHIFT      8
+#define VRCONF_VDDCI_MASK        0x00FF0000
+#define VRCONF_VDDCI_SHIFT       16
+#define VRCONF_MVDD_MASK         0xFF000000
+#define VRCONF_MVDD_SHIFT        24
+
+#define VR_MERGED_WITH_VDDC      0
+#define VR_SVI2_PLANE_1          1
+#define VR_SVI2_PLANE_2          2
+#define VR_SMIO_PATTERN_1        3
+#define VR_SMIO_PATTERN_2        4
+#define VR_STATIC_VOLTAGE        5
+
+#define CLOCK_STRETCHER_MAX_ENTRIES 0x4
+#define CKS_LOOKUPTable_MAX_ENTRIES 0x4
+
+#define CLOCK_STRETCHER_SETTING_DDT_MASK             0x01
+#define CLOCK_STRETCHER_SETTING_DDT_SHIFT            0x0
+#define CLOCK_STRETCHER_SETTING_STRETCH_AMOUNT_MASK  0x1E
+#define CLOCK_STRETCHER_SETTING_STRETCH_AMOUNT_SHIFT 0x1
+#define CLOCK_STRETCHER_SETTING_ENABLE_MASK          0x80
+#define CLOCK_STRETCHER_SETTING_ENABLE_SHIFT         0x7
+
+struct SMU_ClockStretcherDataTableEntry {
+	uint8_t minVID;
+	uint8_t maxVID;
+
+	uint16_t setting;
+};
+typedef struct SMU_ClockStretcherDataTableEntry SMU_ClockStretcherDataTableEntry;
+
+struct SMU_ClockStretcherDataTable {
+	SMU_ClockStretcherDataTableEntry ClockStretcherDataTableEntry[CLOCK_STRETCHER_MAX_ENTRIES];
+};
+typedef struct SMU_ClockStretcherDataTable SMU_ClockStretcherDataTable;
+
+struct SMU_CKS_LOOKUPTableEntry {
+	uint16_t minFreq;
+	uint16_t maxFreq;
+
+	uint8_t setting;
+	uint8_t padding[3];
+};
+typedef struct SMU_CKS_LOOKUPTableEntry SMU_CKS_LOOKUPTableEntry;
+
+struct SMU_CKS_LOOKUPTable {
+	SMU_CKS_LOOKUPTableEntry CKS_LOOKUPTableEntry[CKS_LOOKUPTable_MAX_ENTRIES];
+};
+typedef struct SMU_CKS_LOOKUPTable SMU_CKS_LOOKUPTable;
+
+struct AgmAvfsData_t {
+	uint16_t avgPsmCount[28];
+	uint16_t minPsmCount[28];
+};
+typedef struct AgmAvfsData_t AgmAvfsData_t;
+
+enum VFT_COLUMNS {
+	SCLK0,
+	SCLK1,
+	SCLK2,
+	SCLK3,
+	SCLK4,
+	SCLK5,
+	SCLK6,
+	SCLK7,
+
+	NUM_VFT_COLUMNS
+};
+enum {
+  SCS_FUSE_T0,
+  SCS_FUSE_T1,
+  NUM_SCS_FUSE_TEMPERATURE
+};
+enum {
+  SCKS_ON,
+  SCKS_OFF,
+  NUM_SCKS_STATE_TYPES
+};
+
+#define VFT_TABLE_DEFINED
+
+#define TEMP_RANGE_MAXSTEPS 12
+struct VFT_CELL_t {
+	uint16_t Voltage;
+};
+
+typedef struct VFT_CELL_t VFT_CELL_t;
+#ifdef SMU__FIRMWARE_SCKS_PRESENT__1
+struct SCS_CELL_t {
+	uint16_t PsmCnt[NUM_SCKS_STATE_TYPES];
+};
+typedef struct SCS_CELL_t SCS_CELL_t;
+#endif
+
+struct VFT_TABLE_t {
+	VFT_CELL_t    Cell[TEMP_RANGE_MAXSTEPS][NUM_VFT_COLUMNS];
+	uint16_t      AvfsGbv [NUM_VFT_COLUMNS];
+	uint16_t      BtcGbv  [NUM_VFT_COLUMNS];
+	int16_t       Temperature [TEMP_RANGE_MAXSTEPS];
+
+#ifdef SMU__FIRMWARE_SCKS_PRESENT__1
+	SCS_CELL_t    ScksCell[TEMP_RANGE_MAXSTEPS][NUM_VFT_COLUMNS];
+#endif
+
+	uint8_t       NumTemperatureSteps;
+	uint8_t       padding[3];
+};
+typedef struct VFT_TABLE_t VFT_TABLE_t;
+
+#define BTCGB_VDROOP_TABLE_MAX_ENTRIES 2
+#define AVFSGB_VDROOP_TABLE_MAX_ENTRIES 2
+
+struct GB_VDROOP_TABLE_t {
+	int32_t a0;
+	int32_t a1;
+	int32_t a2;
+	uint32_t spare;
+};
+typedef struct GB_VDROOP_TABLE_t GB_VDROOP_TABLE_t;
+
+struct SMU_QuadraticCoeffs {
+	int32_t m1;
+	int32_t b;
+
+	int16_t m2;
+	uint8_t m1_shift;
+	uint8_t m2_shift;
+};
+typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
+
+struct AVFS_Margin_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_Margin_t AVFS_Margin_t;
+
+struct AVFS_CksOff_Gbv_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_CksOff_Gbv_t AVFS_CksOff_Gbv_t;
+
+struct AVFS_CksOff_AvfsGbv_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_CksOff_AvfsGbv_t AVFS_CksOff_AvfsGbv_t;
+
+struct AVFS_CksOff_BtcGbv_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_CksOff_BtcGbv_t AVFS_CksOff_BtcGbv_t;
+
+struct AVFS_meanNsigma_t {
+	uint32_t Aconstant[3];
+	uint16_t DC_tol_sigma;
+	uint16_t Platform_mean;
+	uint16_t Platform_sigma;
+	uint16_t PSM_Age_CompFactor;
+	uint8_t  Static_Voltage_Offset[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_meanNsigma_t AVFS_meanNsigma_t;
+
+struct AVFS_Sclk_Offset_t {
+	uint16_t Sclk_Offset[8];
+};
+typedef struct AVFS_Sclk_Offset_t AVFS_Sclk_Offset_t;
+
+struct Power_Sharing_t {
+	uint32_t EnergyCounter;
+	uint32_t EngeryThreshold;
+	uint64_t AM_SCLK_CNT;
+	uint64_t AM_0_BUSY_CNT;
+};
+typedef struct Power_Sharing_t  Power_Sharing_t;
+
+
+#endif
+
+
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h
new file mode 100644
index 0000000..b64e58a2
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu75_discrete.h
@@ -0,0 +1,886 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef SMU75_DISCRETE_H
+#define SMU75_DISCRETE_H
+
+#include "smu75.h"
+
+#pragma pack(push, 1)
+
+#define NUM_SCLK_RANGE 8
+
+#define VCO_3_6 1
+#define VCO_2_4 3
+
+#define POSTDIV_DIV_BY_1  0
+#define POSTDIV_DIV_BY_2  1
+#define POSTDIV_DIV_BY_4  2
+#define POSTDIV_DIV_BY_8  3
+#define POSTDIV_DIV_BY_16 4
+
+struct sclkFcwRange_t {
+	uint8_t  vco_setting; /* 1: 3-6GHz, 3: 2-4GHz */
+	uint8_t  postdiv;     /* divide by 2^n */
+	uint16_t fcw_pcc;
+	uint16_t fcw_trans_upper;
+	uint16_t fcw_trans_lower;
+};
+typedef struct sclkFcwRange_t sclkFcwRange_t;
+
+struct SMIO_Pattern {
+	uint16_t Voltage;
+	uint8_t  Smio;
+	uint8_t  padding;
+};
+
+typedef struct SMIO_Pattern SMIO_Pattern;
+
+struct SMIO_Table {
+	SMIO_Pattern Pattern[SMU_MAX_SMIO_LEVELS];
+};
+
+typedef struct SMIO_Table SMIO_Table;
+
+struct SMU_SclkSetting {
+	uint32_t    SclkFrequency;
+	uint16_t    Fcw_int;
+	uint16_t    Fcw_frac;
+	uint16_t    Pcc_fcw_int;
+	uint8_t     PllRange;
+	uint8_t     SSc_En;
+	uint16_t    Sclk_slew_rate;
+	uint16_t    Pcc_up_slew_rate;
+	uint16_t    Pcc_down_slew_rate;
+	uint16_t    Fcw1_int;
+	uint16_t    Fcw1_frac;
+	uint16_t    Sclk_ss_slew_rate;
+};
+typedef struct SMU_SclkSetting SMU_SclkSetting;
+
+struct SMU75_Discrete_GraphicsLevel {
+	SMU_VoltageLevel MinVoltage;
+
+	uint8_t     pcieDpmLevel;
+	uint8_t     DeepSleepDivId;
+	uint16_t    ActivityLevel;
+
+	uint32_t    CgSpllFuncCntl3;
+	uint32_t    CgSpllFuncCntl4;
+	uint32_t    CcPwrDynRm;
+	uint32_t    CcPwrDynRm1;
+
+	uint8_t     SclkDid;
+	uint8_t     padding;
+	uint8_t     EnabledForActivity;
+	uint8_t     EnabledForThrottle;
+	uint8_t     UpHyst;
+	uint8_t     DownHyst;
+	uint8_t     VoltageDownHyst;
+	uint8_t     PowerThrottle;
+
+	SMU_SclkSetting SclkSetting;
+
+	uint8_t  ScksStretchThreshVid[NUM_SCKS_STATE_TYPES];
+	uint16_t Padding;
+};
+
+typedef struct SMU75_Discrete_GraphicsLevel SMU75_Discrete_GraphicsLevel;
+
+struct SMU75_Discrete_ACPILevel {
+	uint32_t    Flags;
+	SMU_VoltageLevel MinVoltage;
+	uint32_t    SclkFrequency;
+	uint8_t     SclkDid;
+	uint8_t     DisplayWatermark;
+	uint8_t     DeepSleepDivId;
+	uint8_t     padding;
+	uint32_t    CcPwrDynRm;
+	uint32_t    CcPwrDynRm1;
+
+	SMU_SclkSetting SclkSetting;
+};
+
+typedef struct SMU75_Discrete_ACPILevel SMU75_Discrete_ACPILevel;
+
+struct SMU75_Discrete_Ulv {
+	uint32_t    CcPwrDynRm;
+	uint32_t    CcPwrDynRm1;
+	uint16_t    VddcOffset;
+	uint8_t     VddcOffsetVid;
+	uint8_t     VddcPhase;
+	uint16_t    BifSclkDfs;
+	uint16_t    Reserved;
+};
+
+typedef struct SMU75_Discrete_Ulv SMU75_Discrete_Ulv;
+
+struct SMU75_Discrete_MemoryLevel {
+	SMU_VoltageLevel MinVoltage;
+	uint32_t    MinMvdd;
+
+	uint32_t    MclkFrequency;
+
+	uint8_t     StutterEnable;
+	uint8_t     EnabledForThrottle;
+	uint8_t     EnabledForActivity;
+	uint8_t     padding_0;
+
+	uint8_t     UpHyst;
+	uint8_t     DownHyst;
+	uint8_t     VoltageDownHyst;
+	uint8_t     padding_1;
+
+	uint16_t    ActivityLevel;
+	uint8_t     DisplayWatermark;
+	uint8_t     padding_2;
+
+	uint16_t    Fcw_int;
+	uint16_t    Fcw_frac;
+	uint8_t     Postdiv;
+	uint8_t     padding_3[3];
+};
+
+typedef struct SMU75_Discrete_MemoryLevel SMU75_Discrete_MemoryLevel;
+
+struct SMU75_Discrete_LinkLevel {
+	uint8_t     PcieGenSpeed;
+	uint8_t     PcieLaneCount;
+	uint8_t     EnabledForActivity;
+	uint8_t     SPC;
+	uint32_t    DownThreshold;
+	uint32_t    UpThreshold;
+	uint16_t    BifSclkDfs;
+	uint16_t    Reserved;
+};
+
+typedef struct SMU75_Discrete_LinkLevel SMU75_Discrete_LinkLevel;
+
+
+/* MC ARB DRAM Timing registers. */
+struct SMU75_Discrete_MCArbDramTimingTableEntry {
+	uint32_t McArbDramTiming;
+	uint32_t McArbDramTiming2;
+	uint32_t McArbBurstTime;
+	uint32_t McArbRfshRate;
+	uint32_t McArbMisc3;
+};
+
+typedef struct SMU75_Discrete_MCArbDramTimingTableEntry SMU75_Discrete_MCArbDramTimingTableEntry;
+
+struct SMU75_Discrete_MCArbDramTimingTable {
+	SMU75_Discrete_MCArbDramTimingTableEntry entries[SMU__NUM_SCLK_DPM_STATE][SMU__NUM_MCLK_DPM_LEVELS];
+};
+
+typedef struct SMU75_Discrete_MCArbDramTimingTable SMU75_Discrete_MCArbDramTimingTable;
+
+/* UVD VCLK/DCLK state (level) definition. */
+struct SMU75_Discrete_UvdLevel {
+	uint32_t VclkFrequency;
+	uint32_t DclkFrequency;
+	SMU_VoltageLevel MinVoltage;
+	uint8_t  VclkDivider;
+	uint8_t  DclkDivider;
+	uint8_t  padding[2];
+};
+
+typedef struct SMU75_Discrete_UvdLevel SMU75_Discrete_UvdLevel;
+
+/* Clocks for other external blocks (VCE, ACP, SAMU). */
+struct SMU75_Discrete_ExtClkLevel {
+	uint32_t Frequency;
+	SMU_VoltageLevel MinVoltage;
+	uint8_t  Divider;
+	uint8_t  padding[3];
+};
+
+typedef struct SMU75_Discrete_ExtClkLevel SMU75_Discrete_ExtClkLevel;
+
+struct SMU75_Discrete_StateInfo {
+	uint32_t SclkFrequency;
+	uint32_t MclkFrequency;
+	uint32_t VclkFrequency;
+	uint32_t DclkFrequency;
+	uint32_t SamclkFrequency;
+	uint32_t AclkFrequency;
+	uint32_t EclkFrequency;
+	uint16_t MvddVoltage;
+	uint16_t padding16;
+	uint8_t  DisplayWatermark;
+	uint8_t  McArbIndex;
+	uint8_t  McRegIndex;
+	uint8_t  SeqIndex;
+	uint8_t  SclkDid;
+	int8_t   SclkIndex;
+	int8_t   MclkIndex;
+	uint8_t  PCIeGen;
+};
+
+typedef struct SMU75_Discrete_StateInfo SMU75_Discrete_StateInfo;
+
+struct SMU75_Discrete_DpmTable {
+	SMU75_PIDController                  GraphicsPIDController;
+	SMU75_PIDController                  MemoryPIDController;
+	SMU75_PIDController                  LinkPIDController;
+
+	uint32_t                            SystemFlags;
+
+	uint32_t                            VRConfig;
+	uint32_t                            SmioMask1;
+	uint32_t                            SmioMask2;
+	SMIO_Table                          SmioTable1;
+	SMIO_Table                          SmioTable2;
+
+	uint32_t                            MvddLevelCount;
+
+	uint8_t                             BapmVddcVidHiSidd        [SMU75_MAX_LEVELS_VDDC];
+	uint8_t                             BapmVddcVidLoSidd        [SMU75_MAX_LEVELS_VDDC];
+	uint8_t                             BapmVddcVidHiSidd2       [SMU75_MAX_LEVELS_VDDC];
+
+	uint8_t                             GraphicsDpmLevelCount;
+	uint8_t                             MemoryDpmLevelCount;
+	uint8_t                             LinkLevelCount;
+	uint8_t                             MasterDeepSleepControl;
+
+	uint8_t                             UvdLevelCount;
+	uint8_t                             VceLevelCount;
+	uint8_t                             AcpLevelCount;
+	uint8_t                             SamuLevelCount;
+
+	uint8_t                             ThermOutGpio;
+	uint8_t                             ThermOutPolarity;
+	uint8_t                             ThermOutMode;
+	uint8_t                             BootPhases;
+
+	uint8_t                             VRHotLevel;
+	uint8_t                             LdoRefSel;
+
+	uint8_t                             Reserved1[2];
+
+	uint16_t                            FanStartTemperature;
+	uint16_t                            FanStopTemperature;
+
+	uint16_t                            MaxVoltage;
+	uint16_t                            Reserved2;
+	uint32_t                            Reserved;
+
+	SMU75_Discrete_GraphicsLevel        GraphicsLevel           [SMU75_MAX_LEVELS_GRAPHICS];
+	SMU75_Discrete_MemoryLevel          MemoryACPILevel;
+	SMU75_Discrete_MemoryLevel          MemoryLevel             [SMU75_MAX_LEVELS_MEMORY];
+	SMU75_Discrete_LinkLevel            LinkLevel               [SMU75_MAX_LEVELS_LINK];
+	SMU75_Discrete_ACPILevel            ACPILevel;
+	SMU75_Discrete_UvdLevel             UvdLevel                [SMU75_MAX_LEVELS_UVD];
+	SMU75_Discrete_ExtClkLevel          VceLevel                [SMU75_MAX_LEVELS_VCE];
+	SMU75_Discrete_ExtClkLevel          AcpLevel                [SMU75_MAX_LEVELS_ACP];
+	SMU75_Discrete_ExtClkLevel          SamuLevel               [SMU75_MAX_LEVELS_SAMU];
+	SMU75_Discrete_Ulv                  Ulv;
+
+	uint8_t                             DisplayWatermark        [SMU75_MAX_LEVELS_MEMORY][SMU75_MAX_LEVELS_GRAPHICS];
+
+	uint32_t                            SclkStepSize;
+	uint32_t                            Smio                    [SMU75_MAX_ENTRIES_SMIO];
+
+	uint8_t                             UvdBootLevel;
+	uint8_t                             VceBootLevel;
+	uint8_t                             AcpBootLevel;
+	uint8_t                             SamuBootLevel;
+
+	uint8_t                             GraphicsBootLevel;
+	uint8_t                             GraphicsVoltageChangeEnable;
+	uint8_t                             GraphicsThermThrottleEnable;
+	uint8_t                             GraphicsInterval;
+
+	uint8_t                             VoltageInterval;
+	uint8_t                             ThermalInterval;
+	uint16_t                            TemperatureLimitHigh;
+
+	uint16_t                            TemperatureLimitLow;
+	uint8_t                             MemoryBootLevel;
+	uint8_t                             MemoryVoltageChangeEnable;
+
+	uint16_t                            BootMVdd;
+	uint8_t                             MemoryInterval;
+	uint8_t                             MemoryThermThrottleEnable;
+
+	uint16_t                            VoltageResponseTime;
+	uint16_t                            PhaseResponseTime;
+
+	uint8_t                             PCIeBootLinkLevel;
+	uint8_t                             PCIeGenInterval;
+	uint8_t                             DTEInterval;
+	uint8_t                             DTEMode;
+
+	uint8_t                             SVI2Enable;
+	uint8_t                             VRHotGpio;
+	uint8_t                             AcDcGpio;
+	uint8_t                             ThermGpio;
+
+	uint16_t                            PPM_PkgPwrLimit;
+	uint16_t                            PPM_TemperatureLimit;
+
+	uint16_t                            DefaultTdp;
+	uint16_t                            TargetTdp;
+
+	uint16_t                            FpsHighThreshold;
+	uint16_t                            FpsLowThreshold;
+
+	uint16_t                            BAPMTI_R  [SMU75_DTE_ITERATIONS][SMU75_DTE_SOURCES][SMU75_DTE_SINKS];
+	uint16_t                            BAPMTI_RC [SMU75_DTE_ITERATIONS][SMU75_DTE_SOURCES][SMU75_DTE_SINKS];
+
+	uint16_t                            TemperatureLimitEdge;
+	uint16_t                            TemperatureLimitHotspot;
+
+	uint16_t                            BootVddc;
+	uint16_t                            BootVddci;
+
+	uint16_t                            FanGainEdge;
+	uint16_t                            FanGainHotspot;
+
+	uint32_t                            LowSclkInterruptThreshold;
+	uint32_t                            VddGfxReChkWait;
+
+	uint8_t                             ClockStretcherAmount;
+	uint8_t                             Sclk_CKS_masterEn0_7;
+	uint8_t                             Sclk_CKS_masterEn8_15;
+	uint8_t                             DPMFreezeAndForced;
+
+	uint8_t                             Sclk_voltageOffset[8];
+
+	SMU_ClockStretcherDataTable         ClockStretcherDataTable;
+	SMU_CKS_LOOKUPTable                 CKS_LOOKUPTable;
+
+	uint32_t                            CurrSclkPllRange;
+	sclkFcwRange_t                      SclkFcwRangeTable[NUM_SCLK_RANGE];
+
+	GB_VDROOP_TABLE_t                   BTCGB_VDROOP_TABLE[BTCGB_VDROOP_TABLE_MAX_ENTRIES];
+	SMU_QuadraticCoeffs                 AVFSGB_FUSE_TABLE[AVFSGB_VDROOP_TABLE_MAX_ENTRIES];
+};
+
+typedef struct SMU75_Discrete_DpmTable SMU75_Discrete_DpmTable;
+
+struct SMU75_Discrete_FanTable {
+	uint16_t FdoMode;
+	int16_t  TempMin;
+	int16_t  TempMed;
+	int16_t  TempMax;
+	int16_t  Slope1;
+	int16_t  Slope2;
+	int16_t  FdoMin;
+	int16_t  HystUp;
+	int16_t  HystDown;
+	int16_t  HystSlope;
+	int16_t  TempRespLim;
+	int16_t  TempCurr;
+	int16_t  SlopeCurr;
+	int16_t  PwmCurr;
+	uint32_t RefreshPeriod;
+	int16_t  FdoMax;
+	uint8_t  TempSrc;
+	int8_t   Padding;
+};
+
+typedef struct SMU75_Discrete_FanTable SMU75_Discrete_FanTable;
+
+#define SMU7_DISCRETE_GPIO_SCLK_DEBUG             4
+#define SMU7_DISCRETE_GPIO_SCLK_DEBUG_BIT         (0x1 << SMU7_DISCRETE_GPIO_SCLK_DEBUG)
+
+
+
+struct SMU7_MclkDpmScoreboard {
+	uint32_t PercentageBusy;
+
+	int32_t  PIDError;
+	int32_t  PIDIntegral;
+	int32_t  PIDOutput;
+
+	uint32_t SigmaDeltaAccum;
+	uint32_t SigmaDeltaOutput;
+	uint32_t SigmaDeltaLevel;
+
+	uint32_t UtilizationSetpoint;
+
+	uint8_t  TdpClampMode;
+	uint8_t  TdcClampMode;
+	uint8_t  ThermClampMode;
+	uint8_t  VoltageBusy;
+
+	int8_t   CurrLevel;
+	int8_t   TargLevel;
+	uint8_t  LevelChangeInProgress;
+	uint8_t  UpHyst;
+
+	uint8_t  DownHyst;
+	uint8_t  VoltageDownHyst;
+	uint8_t  DpmEnable;
+	uint8_t  DpmRunning;
+
+	uint8_t  DpmForce;
+	uint8_t  DpmForceLevel;
+	uint8_t  padding2;
+	uint8_t  McArbIndex;
+
+	uint32_t MinimumPerfMclk;
+
+	uint8_t  AcpiReq;
+	uint8_t  AcpiAck;
+	uint8_t  MclkSwitchInProgress;
+	uint8_t  MclkSwitchCritical;
+
+	uint8_t  IgnoreVBlank;
+	uint8_t  TargetMclkIndex;
+	uint8_t  TargetMvddIndex;
+	uint8_t  MclkSwitchResult;
+
+	uint16_t VbiFailureCount;
+	uint8_t  VbiWaitCounter;
+	uint8_t  EnabledLevelsChange;
+
+	uint16_t LevelResidencyCounters [SMU75_MAX_LEVELS_MEMORY];
+	uint16_t LevelSwitchCounters [SMU75_MAX_LEVELS_MEMORY];
+
+	void     (*TargetStateCalculator)(uint8_t);
+	void     (*SavedTargetStateCalculator)(uint8_t);
+
+	uint16_t AutoDpmInterval;
+	uint16_t AutoDpmRange;
+
+	uint16_t VbiTimeoutCount;
+	uint16_t MclkSwitchingTime;
+
+	uint8_t  fastSwitch;
+	uint8_t  Save_PIC_VDDGFX_EXIT;
+	uint8_t  Save_PIC_VDDGFX_ENTER;
+	uint8_t  VbiTimeout;
+
+	uint32_t HbmTempRegBackup;
+};
+
+typedef struct SMU7_MclkDpmScoreboard SMU7_MclkDpmScoreboard;
+
+struct SMU7_UlvScoreboard {
+	uint8_t     EnterUlv;
+	uint8_t     ExitUlv;
+	uint8_t     UlvActive;
+	uint8_t     WaitingForUlv;
+	uint8_t     UlvEnable;
+	uint8_t     UlvRunning;
+	uint8_t     UlvMasterEnable;
+	uint8_t     padding;
+	uint32_t    UlvAbortedCount;
+	uint32_t    UlvTimeStamp;
+};
+
+typedef struct SMU7_UlvScoreboard SMU7_UlvScoreboard;
+
+struct VddgfxSavedRegisters {
+	uint32_t GPU_DBG[3];
+	uint32_t MEC_BaseAddress_Hi;
+	uint32_t MEC_BaseAddress_Lo;
+	uint32_t THM_TMON0_CTRL2__RDIR_PRESENT;
+	uint32_t THM_TMON1_CTRL2__RDIR_PRESENT;
+	uint32_t CP_INT_CNTL;
+};
+
+typedef struct VddgfxSavedRegisters VddgfxSavedRegisters;
+
+struct SMU7_VddGfxScoreboard {
+	uint8_t     VddGfxEnable;
+	uint8_t     VddGfxActive;
+	uint8_t     VPUResetOccured;
+	uint8_t     padding;
+
+	uint32_t    VddGfxEnteredCount;
+	uint32_t    VddGfxAbortedCount;
+
+	uint32_t    VddGfxVid;
+
+	VddgfxSavedRegisters SavedRegisters;
+};
+
+typedef struct SMU7_VddGfxScoreboard SMU7_VddGfxScoreboard;
+
+struct SMU7_TdcLimitScoreboard {
+	uint8_t  Enable;
+	uint8_t  Running;
+	uint16_t Alpha;
+	uint32_t FilteredIddc;
+	uint32_t IddcLimit;
+	uint32_t IddcHyst;
+	SMU7_HystController_Data HystControllerData;
+};
+
+typedef struct SMU7_TdcLimitScoreboard SMU7_TdcLimitScoreboard;
+
+struct SMU7_PkgPwrLimitScoreboard {
+	uint8_t  Enable;
+	uint8_t  Running;
+	uint16_t Alpha;
+	uint32_t FilteredPkgPwr;
+	uint32_t Limit;
+	uint32_t Hyst;
+	uint32_t LimitFromDriver;
+	uint8_t PowerSharingEnabled;
+	uint8_t PowerSharingCounter;
+	uint8_t PowerSharingINTEnabled;
+	uint8_t GFXActivityCounterEnabled;
+	uint32_t EnergyCount;
+	uint32_t PSACTCount;
+	uint8_t RollOverRequired;
+	uint8_t RollOverCount;
+	uint8_t padding[2];
+	SMU7_HystController_Data HystControllerData;
+};
+
+typedef struct SMU7_PkgPwrLimitScoreboard SMU7_PkgPwrLimitScoreboard;
+
+struct SMU7_BapmScoreboard {
+	uint32_t source_powers[SMU75_DTE_SOURCES];
+	uint32_t source_powers_last[SMU75_DTE_SOURCES];
+	int32_t entity_temperatures[SMU75_NUM_GPU_TES];
+	int32_t initial_entity_temperatures[SMU75_NUM_GPU_TES];
+	int32_t Limit;
+	int32_t Hyst;
+	int32_t therm_influence_coeff_table[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS * 2];
+	int32_t therm_node_table[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS];
+	uint16_t ConfigTDPPowerScalar;
+	uint16_t FanSpeedPowerScalar;
+	uint16_t OverDrivePowerScalar;
+	uint16_t OverDriveLimitScalar;
+	uint16_t FinalPowerScalar;
+	uint8_t VariantID;
+	uint8_t spare997;
+
+	SMU7_HystController_Data HystControllerData;
+
+	int32_t temperature_gradient_slope;
+	int32_t temperature_gradient;
+	uint32_t measured_temperature;
+};
+
+
+typedef struct SMU7_BapmScoreboard SMU7_BapmScoreboard;
+
+struct SMU7_AcpiScoreboard {
+	uint32_t SavedInterruptMask[2];
+	uint8_t LastACPIRequest;
+	uint8_t CgBifResp;
+	uint8_t RequestType;
+	uint8_t Padding;
+	SMU75_Discrete_ACPILevel D0Level;
+};
+
+typedef struct SMU7_AcpiScoreboard SMU7_AcpiScoreboard;
+
+struct SMU75_Discrete_PmFuses {
+	uint8_t BapmVddCVidHiSidd[8];
+
+	uint8_t BapmVddCVidLoSidd[8];
+
+	uint8_t VddCVid[8];
+
+	uint8_t SviLoadLineEn;
+	uint8_t SviLoadLineVddC;
+	uint8_t SviLoadLineTrimVddC;
+	uint8_t SviLoadLineOffsetVddC;
+
+	uint16_t TDC_VDDC_PkgLimit;
+	uint8_t TDC_VDDC_ThrottleReleaseLimitPerc;
+	uint8_t TDC_MAWt;
+
+	uint8_t TdcWaterfallCtl;
+	uint8_t LPMLTemperatureMin;
+	uint8_t LPMLTemperatureMax;
+	uint8_t Reserved;
+
+	uint8_t LPMLTemperatureScaler[16];
+
+	int16_t FuzzyFan_ErrorSetDelta;
+	int16_t FuzzyFan_ErrorRateSetDelta;
+	int16_t FuzzyFan_PwmSetDelta;
+	uint16_t Reserved6;
+
+	uint8_t GnbLPML[16];
+
+	uint8_t GnbLPMLMaxVid;
+	uint8_t GnbLPMLMinVid;
+	uint8_t Reserved1[2];
+
+	uint16_t BapmVddCBaseLeakageHiSidd;
+	uint16_t BapmVddCBaseLeakageLoSidd;
+
+	uint16_t  VFT_Temp[3];
+	uint8_t   Version;
+	uint8_t   padding;
+
+	SMU_QuadraticCoeffs VFT_ATE[3];
+
+	SMU_QuadraticCoeffs AVFS_GB;
+	SMU_QuadraticCoeffs ATE_ACBTC_GB;
+
+	SMU_QuadraticCoeffs P2V;
+
+	uint32_t PsmCharzFreq;
+
+	uint16_t InversionVoltage;
+	uint16_t PsmCharzTemp;
+
+	uint32_t EnabledAvfsModules;
+
+	SMU_QuadraticCoeffs BtcGbv_CksOff;
+};
+
+typedef struct SMU75_Discrete_PmFuses SMU75_Discrete_PmFuses;
+
+struct SMU7_Discrete_Log_Header_Table {
+	uint32_t    version;
+	uint32_t    asic_id;
+	uint16_t    flags;
+	uint16_t    entry_size;
+	uint32_t    total_size;
+	uint32_t    num_of_entries;
+	uint8_t     type;
+	uint8_t     mode;
+	uint8_t     filler_0[2];
+	uint32_t    filler_1[2];
+};
+
+typedef struct SMU7_Discrete_Log_Header_Table SMU7_Discrete_Log_Header_Table;
+
+struct SMU7_Discrete_Log_Cntl {
+	uint8_t             Enabled;
+	uint8_t             Type;
+	uint8_t             padding[2];
+	uint32_t            BufferSize;
+	uint32_t            SamplesLogged;
+	uint32_t            SampleSize;
+	uint32_t            AddrL;
+	uint32_t            AddrH;
+};
+
+typedef struct SMU7_Discrete_Log_Cntl SMU7_Discrete_Log_Cntl;
+
+#if defined SMU__DGPU_ONLY
+#define CAC_ACC_NW_NUM_OF_SIGNALS 87
+#endif
+
+
+struct SMU7_Discrete_Cac_Collection_Table {
+	uint32_t temperature;
+	uint32_t cac_acc_nw[CAC_ACC_NW_NUM_OF_SIGNALS];
+};
+
+typedef struct SMU7_Discrete_Cac_Collection_Table SMU7_Discrete_Cac_Collection_Table;
+
+struct SMU7_Discrete_Cac_Verification_Table {
+	uint32_t VddcTotalPower;
+	uint32_t VddcLeakagePower;
+	uint32_t VddcConstantPower;
+	uint32_t VddcGfxDynamicPower;
+	uint32_t VddcUvdDynamicPower;
+	uint32_t VddcVceDynamicPower;
+	uint32_t VddcAcpDynamicPower;
+	uint32_t VddcPcieDynamicPower;
+	uint32_t VddcDceDynamicPower;
+	uint32_t VddcCurrent;
+	uint32_t VddcVoltage;
+	uint32_t VddciTotalPower;
+	uint32_t VddciLeakagePower;
+	uint32_t VddciConstantPower;
+	uint32_t VddciDynamicPower;
+	uint32_t Vddr1TotalPower;
+	uint32_t Vddr1LeakagePower;
+	uint32_t Vddr1ConstantPower;
+	uint32_t Vddr1DynamicPower;
+	uint32_t spare[4];
+	uint32_t temperature;
+};
+
+typedef struct SMU7_Discrete_Cac_Verification_Table SMU7_Discrete_Cac_Verification_Table;
+
+struct SMU7_Discrete_Pm_Status_Table {
+	int32_t T_meas_max[SMU75_THERMAL_INPUT_LOOP_COUNT];
+	int32_t T_meas_acc[SMU75_THERMAL_INPUT_LOOP_COUNT];
+
+	uint32_t I_calc_max;
+	uint32_t I_calc_acc;
+	uint32_t P_meas_acc;
+	uint32_t V_meas_load_acc;
+	uint32_t I_meas_acc;
+	uint32_t P_meas_acc_vddci;
+	uint32_t V_meas_load_acc_vddci;
+	uint32_t I_meas_acc_vddci;
+
+	uint16_t Sclk_dpm_residency[8];
+	uint16_t Uvd_dpm_residency[8];
+	uint16_t Vce_dpm_residency[8];
+	uint16_t Mclk_dpm_residency[4];
+
+	uint32_t P_roc_acc;
+	uint32_t PkgPwr_max;
+	uint32_t PkgPwr_acc;
+	uint32_t MclkSwitchingTime_max;
+	uint32_t MclkSwitchingTime_acc;
+	uint32_t FanPwm_acc;
+	uint32_t FanRpm_acc;
+	uint32_t Gfx_busy_acc;
+	uint32_t Mc_busy_acc;
+	uint32_t Fps_acc;
+
+	uint32_t AccCnt;
+};
+
+typedef struct SMU7_Discrete_Pm_Status_Table SMU7_Discrete_Pm_Status_Table;
+
+struct SMU7_Discrete_AutoWattMan_Status_Table {
+	int32_t T_meas_acc[SMU75_THERMAL_INPUT_LOOP_COUNT];
+	uint16_t Sclk_dpm_residency[8];
+	uint16_t Mclk_dpm_residency[4];
+	uint32_t TgpPwr_acc;
+	uint32_t Gfx_busy_acc;
+	uint32_t Mc_busy_acc;
+	uint32_t AccCnt;
+};
+
+typedef struct SMU7_Discrete_AutoWattMan_Status_Table SMU7_Discrete_AutoWattMan_Status_Table;
+
+#define SMU7_MAX_GFX_CU_COUNT 24
+#define SMU7_MIN_GFX_CU_COUNT  8
+#define SMU7_GFX_CU_PG_ENABLE_DC_MAX_CU_SHIFT 0
+#define SMU7_GFX_CU_PG_ENABLE_DC_MAX_CU_MASK  (0xFFFF << SMU7_GFX_CU_PG_ENABLE_DC_MAX_CU_SHIFT)
+#define SMU7_GFX_CU_PG_ENABLE_AC_MAX_CU_SHIFT 16
+#define SMU7_GFX_CU_PG_ENABLE_AC_MAX_CU_MASK  (0xFFFF << SMU7_GFX_CU_PG_ENABLE_AC_MAX_CU_SHIFT)
+
+struct SMU7_GfxCuPgScoreboard {
+	uint8_t Enabled;
+	uint8_t WaterfallUp;
+	uint8_t WaterfallDown;
+	uint8_t WaterfallLimit;
+	uint8_t CurrMaxCu;
+	uint8_t TargMaxCu;
+	uint8_t ClampMode;
+	uint8_t Active;
+	uint8_t MaxSupportedCu;
+	uint8_t MinSupportedCu;
+	uint8_t PendingGfxCuHostInterrupt;
+	uint8_t LastFilteredMaxCuInteger;
+	uint16_t FilteredMaxCu;
+	uint16_t FilteredMaxCuAlpha;
+	uint16_t FilterResetCount;
+	uint16_t FilterResetCountLimit;
+	uint8_t ForceCu;
+	uint8_t ForceCuCount;
+	uint8_t AcModeMaxCu;
+	uint8_t DcModeMaxCu;
+};
+
+typedef struct SMU7_GfxCuPgScoreboard SMU7_GfxCuPgScoreboard;
+
+#define SMU7_SCLK_CAC 0x561
+#define SMU7_MCLK_CAC 0xF9
+#define SMU7_VCLK_CAC 0x2DE
+#define SMU7_DCLK_CAC 0x2DE
+#define SMU7_ECLK_CAC 0x25E
+#define SMU7_ACLK_CAC 0x25E
+#define SMU7_SAMCLK_CAC 0x25E
+#define SMU7_DISPCLK_CAC 0x100
+#define SMU7_CAC_CONSTANT 0x2EE3430
+#define SMU7_CAC_CONSTANT_SHIFT 18
+
+#define SMU7_VDDCI_MCLK_CONST        1765
+#define SMU7_VDDCI_MCLK_CONST_SHIFT  16
+#define SMU7_VDDCI_VDDCI_CONST       50958
+#define SMU7_VDDCI_VDDCI_CONST_SHIFT 14
+#define SMU7_VDDCI_CONST             11781
+#define SMU7_VDDCI_STROBE_PWR        1331
+
+#define SMU7_VDDR1_CONST            693
+#define SMU7_VDDR1_CAC_WEIGHT       20
+#define SMU7_VDDR1_CAC_WEIGHT_SHIFT 19
+#define SMU7_VDDR1_STROBE_PWR       512
+
+#define SMU7_AREA_COEFF_UVD 0xA78
+#define SMU7_AREA_COEFF_VCE 0x190A
+#define SMU7_AREA_COEFF_ACP 0x22D1
+#define SMU7_AREA_COEFF_SAMU 0x534
+
+#define SMU7_THERM_OUT_MODE_DISABLE       0x0
+#define SMU7_THERM_OUT_MODE_THERM_ONLY    0x1
+#define SMU7_THERM_OUT_MODE_THERM_VRHOT   0x2
+
+#define SQ_Enable_MASK 0x1
+#define SQ_IR_MASK 0x2
+#define SQ_PCC_MASK 0x4
+#define SQ_EDC_MASK 0x8
+
+#define TCP_Enable_MASK 0x100
+#define TCP_IR_MASK 0x200
+#define TCP_PCC_MASK 0x400
+#define TCP_EDC_MASK 0x800
+
+#define TD_Enable_MASK 0x10000
+#define TD_IR_MASK 0x20000
+#define TD_PCC_MASK 0x40000
+#define TD_EDC_MASK 0x80000
+
+#define DB_Enable_MASK 0x1000000
+#define DB_IR_MASK 0x2000000
+#define DB_PCC_MASK 0x4000000
+#define DB_EDC_MASK 0x8000000
+
+#define SQ_Enable_SHIFT 0
+#define SQ_IR_SHIFT 1
+#define SQ_PCC_SHIFT 2
+#define SQ_EDC_SHIFT 3
+
+#define TCP_Enable_SHIFT 8
+#define TCP_IR_SHIFT 9
+#define TCP_PCC_SHIFT 10
+#define TCP_EDC_SHIFT 11
+
+#define TD_Enable_SHIFT 16
+#define TD_IR_SHIFT 17
+#define TD_PCC_SHIFT 18
+#define TD_EDC_SHIFT 19
+
+#define DB_Enable_SHIFT 24
+#define DB_IR_SHIFT 25
+#define DB_PCC_SHIFT 26
+#define DB_EDC_SHIFT 27
+
+#define PMFUSES_AVFSSIZE 104
+
+#define BTCGB0_Vdroop_Enable_MASK  0x1
+#define BTCGB1_Vdroop_Enable_MASK  0x2
+#define AVFSGB0_Vdroop_Enable_MASK 0x4
+#define AVFSGB1_Vdroop_Enable_MASK 0x8
+
+#define BTCGB0_Vdroop_Enable_SHIFT  0
+#define BTCGB1_Vdroop_Enable_SHIFT  1
+#define AVFSGB0_Vdroop_Enable_SHIFT 2
+#define AVFSGB1_Vdroop_Enable_SHIFT 3
+
+#pragma pack(pop)
+
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
index c3ed737..715b5a1 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/vega10_ppsmc.h
@@ -131,6 +131,7 @@
 #define PPSMC_MSG_RunAcgInOpenLoop               0x5E
 #define PPSMC_MSG_InitializeAcg                  0x5F
 #define PPSMC_MSG_GetCurrPkgPwr                  0x61
+#define PPSMC_MSG_GetAverageGfxclkActualFrequency 0x63
 #define PPSMC_MSG_SetPccThrottleLevel            0x67
 #define PPSMC_MSG_UpdatePkgPwrPidAlpha           0x68
 #define PPSMC_Message_Count                      0x69
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile
index 9587550..0a20040 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/Makefile
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/Makefile
@@ -26,7 +26,7 @@
 SMU_MGR = smumgr.o smu8_smumgr.o tonga_smumgr.o fiji_smumgr.o \
 	  polaris10_smumgr.o iceland_smumgr.o \
 	  smu7_smumgr.o vega10_smumgr.o smu10_smumgr.o ci_smumgr.o \
-	  vega12_smumgr.o
+	  vega12_smumgr.o vegam_smumgr.o
 
 AMD_PP_SMUMGR = $(addprefix $(AMD_PP_PATH)/smumgr/,$(SMU_MGR))
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
index 08d0001..2d4ec8a 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c
@@ -61,9 +61,6 @@
 
 #define SMC_RAM_END 0x40000
 
-#define VOLTAGE_SCALE               4
-#define VOLTAGE_VID_OFFSET_SCALE1    625
-#define VOLTAGE_VID_OFFSET_SCALE2    100
 #define CISLAND_MINIMUM_ENGINE_CLOCK 800
 #define CISLAND_MAX_DEEPSLEEP_DIVIDER_ID 5
 
@@ -211,9 +208,7 @@
 {
 	int ret;
 
-	if (!ci_is_smc_ram_running(hwmgr))
-		return -EINVAL;
-
+	cgs_write_register(hwmgr->device, mmSMC_RESP_0, 0);
 	cgs_write_register(hwmgr->device, mmSMC_MESSAGE_0, msg);
 
 	PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0);
@@ -1182,7 +1177,6 @@
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 	int result = 0;
 	bool dll_state_on;
-	struct cgs_display_info info = {0};
 	uint32_t mclk_edc_wr_enable_threshold = 40000;
 	uint32_t mclk_edc_enable_threshold = 40000;
 	uint32_t mclk_strobe_mode_threshold = 40000;
@@ -1236,8 +1230,7 @@
 	/* default set to low watermark. Highest level will be set to high later.*/
 	memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
 
-	cgs_get_active_displays_info(hwmgr->device, &info);
-	data->display_timing.num_existing_displays = info.display_count;
+	data->display_timing.num_existing_displays = hwmgr->display_config->num_display;
 
 	/* stutter mode not support on ci */
 
@@ -2784,7 +2777,6 @@
 {
 	kfree(hwmgr->smu_backend);
 	hwmgr->smu_backend = NULL;
-	cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index faef783..53df940 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -53,10 +53,7 @@
 
 #define FIJI_SMC_SIZE 0x20000
 
-#define VOLTAGE_SCALE 4
 #define POWERTUNE_DEFAULT_SET_MAX    1
-#define VOLTAGE_VID_OFFSET_SCALE1   625
-#define VOLTAGE_VID_OFFSET_SCALE2   100
 #define VDDC_VDDCI_DELTA            300
 #define MC_CG_ARB_FREQ_F1           0x0b
 
@@ -288,8 +285,7 @@
 	struct fiji_smumgr *priv = (struct fiji_smumgr *)(hwmgr->smu_backend);
 
 	/* Only start SMC if SMC RAM is not running */
-	if (!(smu7_is_smc_ram_running(hwmgr)
-		|| cgs_is_virtualization_enabled(hwmgr->device))) {
+	if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) {
 		/* Check if SMU is running in protected mode */
 		if (0 == PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device,
 				CGS_IND_REG__SMC,
@@ -307,13 +303,13 @@
 	}
 
 	/* To initialize all clock gating before RLC loaded and running.*/
-	cgs_set_clockgating_state(hwmgr->device,
+	amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 			AMD_IP_BLOCK_TYPE_GFX, AMD_CG_STATE_GATE);
-	cgs_set_clockgating_state(hwmgr->device,
+	amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 			AMD_IP_BLOCK_TYPE_GMC, AMD_CG_STATE_GATE);
-	cgs_set_clockgating_state(hwmgr->device,
+	amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 			AMD_IP_BLOCK_TYPE_SDMA, AMD_CG_STATE_GATE);
-	cgs_set_clockgating_state(hwmgr->device,
+	amdgpu_device_ip_set_clockgating_state(hwmgr->adev,
 			AMD_IP_BLOCK_TYPE_COMMON, AMD_CG_STATE_GATE);
 
 	/* Setup SoftRegsStart here for register lookup in case
@@ -335,10 +331,10 @@
 	uint32_t efuse = 0;
 	uint32_t mask = (1 << ((AVFS_EN_MSB - AVFS_EN_LSB) + 1)) - 1;
 
-	if (cgs_is_virtualization_enabled(hwmgr->device))
-		return 0;
+	if (!hwmgr->not_vf)
+		return false;
 
-	if (!atomctrl_read_efuse(hwmgr->device, AVFS_EN_LSB, AVFS_EN_MSB,
+	if (!atomctrl_read_efuse(hwmgr, AVFS_EN_LSB, AVFS_EN_MSB,
 			mask, &efuse)) {
 		if (efuse)
 			return true;
@@ -989,11 +985,11 @@
 
 	threshold = clock * data->fast_watermark_threshold / 100;
 
-	data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr;
+	data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr;
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep))
 		level->DeepSleepDivId = smu7_get_sleep_divider_id_from_clock(clock,
-								hwmgr->display_config.min_core_set_clock_in_sr);
+								hwmgr->display_config->min_core_set_clock_in_sr);
 
 
 	/* Default to slow, highest DPM level will be
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
index d4bb934..415f691 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c
@@ -60,10 +60,7 @@
 
 #define ICELAND_SMC_SIZE               0x20000
 
-#define VOLTAGE_SCALE 4
 #define POWERTUNE_DEFAULT_SET_MAX    1
-#define VOLTAGE_VID_OFFSET_SCALE1   625
-#define VOLTAGE_VID_OFFSET_SCALE2   100
 #define MC_CG_ARB_FREQ_F1           0x0b
 #define VDDC_VDDCI_DELTA            200
 
@@ -932,7 +929,7 @@
 	graphic_level->PowerThrottle = 0;
 
 	data->display_timing.min_clock_in_sr =
-			hwmgr->display_config.min_core_set_clock_in_sr;
+			hwmgr->display_config->min_core_set_clock_in_sr;
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_SclkDeepSleep))
@@ -1236,7 +1233,6 @@
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 	int result = 0;
 	bool dll_state_on;
-	struct cgs_display_info info = {0};
 	uint32_t mclk_edc_wr_enable_threshold = 40000;
 	uint32_t mclk_edc_enable_threshold = 40000;
 	uint32_t mclk_strobe_mode_threshold = 40000;
@@ -1283,8 +1279,7 @@
 	/* default set to low watermark. Highest level will be set to high later.*/
 	memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
 
-	cgs_get_active_displays_info(hwmgr->device, &info);
-	data->display_timing.num_existing_displays = info.display_count;
+	data->display_timing.num_existing_displays = hwmgr->display_config->num_display;
 
 	/* stutter mode not support on iceland */
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 997a777..a8c6524 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -52,8 +52,6 @@
 #include "dce/dce_10_0_sh_mask.h"
 
 #define POLARIS10_SMC_SIZE 0x20000
-#define VOLTAGE_VID_OFFSET_SCALE1   625
-#define VOLTAGE_VID_OFFSET_SCALE2   100
 #define POWERTUNE_DEFAULT_SET_MAX    1
 #define VDDC_VDDCI_DELTA            200
 #define MC_CG_ARB_FREQ_F1           0x0b
@@ -295,25 +293,16 @@
 	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend);
 
 	/* Only start SMC if SMC RAM is not running */
-	if (!(smu7_is_smc_ram_running(hwmgr)
-		|| cgs_is_virtualization_enabled(hwmgr->device))) {
+	if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) {
 		smu_data->protected_mode = (uint8_t) (PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_MODE));
 		smu_data->smu7_data.security_hard_key = (uint8_t) (PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_SEL));
 
 		/* Check if SMU is running in protected mode */
-		if (smu_data->protected_mode == 0) {
+		if (smu_data->protected_mode == 0)
 			result = polaris10_start_smu_in_non_protection_mode(hwmgr);
-		} else {
+		else
 			result = polaris10_start_smu_in_protection_mode(hwmgr);
 
-			/* If failed, try with different security Key. */
-			if (result != 0) {
-				smu_data->smu7_data.security_hard_key ^= 1;
-				cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU);
-				result = polaris10_start_smu_in_protection_mode(hwmgr);
-			}
-		}
-
 		if (result != 0)
 			PP_ASSERT_WITH_CODE(0, "Failed to load SMU ucode.", return result);
 
@@ -951,11 +940,11 @@
 	level->DownHyst = data->current_profile_setting.sclk_down_hyst;
 	level->VoltageDownHyst = 0;
 	level->PowerThrottle = 0;
-	data->display_timing.min_clock_in_sr = hwmgr->display_config.min_core_set_clock_in_sr;
+	data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr;
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep))
 		level->DeepSleepDivId = smu7_get_sleep_divider_id_from_clock(clock,
-								hwmgr->display_config.min_core_set_clock_in_sr);
+								hwmgr->display_config->min_core_set_clock_in_sr);
 
 	/* Default to slow, highest DPM level will be
 	 * set to PPSMC_DISPLAY_WATERMARK_LOW later.
@@ -1085,11 +1074,9 @@
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 	int result = 0;
-	struct cgs_display_info info = {0, 0, NULL};
 	uint32_t mclk_stutter_mode_threshold = 40000;
 	phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL;
 
-	cgs_get_active_displays_info(hwmgr->device, &info);
 
 	if (hwmgr->od_enabled)
 		vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_mclk;
@@ -1115,7 +1102,7 @@
 	mem_level->StutterEnable = false;
 	mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
 
-	data->display_timing.num_existing_displays = info.display_count;
+	data->display_timing.num_existing_displays = hwmgr->display_config->num_display;
 
 	if (mclk_stutter_mode_threshold &&
 		(clock <= mclk_stutter_mode_threshold) &&
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
index bc53f2b..0a563f6 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c
@@ -23,7 +23,7 @@
 
 #include "smumgr.h"
 #include "smu10_inc.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
 #include "smu10_smumgr.h"
 #include "ppatomctrl.h"
 #include "rv_ppsmc.h"
@@ -33,8 +33,6 @@
 #include "pp_debug.h"
 
 
-#define VOLTAGE_SCALE 4
-
 #define BUFFER_SIZE                 80000
 #define MAX_STRING_SIZE             15
 #define BUFFER_SIZETWO              131072
@@ -49,48 +47,41 @@
 
 static uint32_t smu10_wait_for_response(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	uint32_t reg;
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
+	reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
 
 	phm_wait_for_register_unequal(hwmgr, reg,
 			0, MP1_C2PMSG_90__CONTENT_MASK);
 
-	return cgs_read_register(hwmgr->device, reg);
+	return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
 }
 
 static int smu10_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr,
 		uint16_t msg)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66);
-	cgs_write_register(hwmgr->device, reg, msg);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
 
 	return 0;
 }
 
 static int smu10_read_arg_from_smc(struct pp_hwmgr *hwmgr)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82);
-
-	return cgs_read_register(hwmgr->device, reg);
+	return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82);
 }
 
 static int smu10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	smu10_wait_for_response(hwmgr);
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
-	cgs_write_register(hwmgr->device, reg, 0);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
 
 	smu10_send_msg_to_smc_without_waiting(hwmgr, msg);
 
@@ -104,17 +95,13 @@
 static int smu10_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
 		uint16_t msg, uint32_t parameter)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	smu10_wait_for_response(hwmgr);
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
-	cgs_write_register(hwmgr->device, reg, 0);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82);
-	cgs_write_register(hwmgr->device, reg, parameter);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter);
 
 	smu10_send_msg_to_smc_without_waiting(hwmgr, msg);
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
index 0399c10..d644a9b 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
@@ -167,24 +167,25 @@
 {
 	int ret;
 
-	if (!smu7_is_smc_ram_running(hwmgr))
-		return -EINVAL;
-
-
 	PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0);
 
 	ret = PHM_READ_FIELD(hwmgr->device, SMC_RESP_0, SMC_RESP);
 
-	if (ret != 1)
-		pr_info("\n failed to send pre message %x ret is %d \n",  msg, ret);
+	if (ret == 0xFE)
+		pr_debug("last message was not supported\n");
+	else if (ret != 1)
+		pr_info("\n last message was failed ret is %d\n", ret);
 
+	cgs_write_register(hwmgr->device, mmSMC_RESP_0, 0);
 	cgs_write_register(hwmgr->device, mmSMC_MESSAGE_0, msg);
 
 	PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0);
 
 	ret = PHM_READ_FIELD(hwmgr->device, SMC_RESP_0, SMC_RESP);
 
-	if (ret != 1)
+	if (ret == 0xFE)
+		pr_debug("message %x was not supported\n", msg);
+	else if (ret != 1)
 		pr_info("\n failed to send message %x ret is %d \n",  msg, ret);
 
 	return 0;
@@ -199,10 +200,6 @@
 
 int smu7_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter)
 {
-	if (!smu7_is_smc_ram_running(hwmgr)) {
-		return -EINVAL;
-	}
-
 	PHM_WAIT_FIELD_UNEQUAL(hwmgr, SMC_RESP_0, SMC_RESP, 0);
 
 	cgs_write_register(hwmgr->device, mmSMC_MSG_ARG_0, parameter);
@@ -231,16 +228,6 @@
 	return 0;
 }
 
-int smu7_wait_for_smc_inactive(struct pp_hwmgr *hwmgr)
-{
-	if (!smu7_is_smc_ram_running(hwmgr))
-		return -EINVAL;
-
-	PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, SMC_SYSCON_CLOCK_CNTL_0, cken, 0);
-	return 0;
-}
-
-
 enum cgs_ucode_id smu7_convert_fw_type_to_cgs(uint32_t fw_type)
 {
 	enum cgs_ucode_id result = CGS_UCODE_ID_MAXIMUM;
@@ -296,11 +283,9 @@
 
 	result = smu7_set_smc_sram_address(hwmgr, smc_addr, limit);
 
-	if (result)
-		return result;
+	*value = result ? 0 : cgs_read_register(hwmgr->device, mmSMC_IND_DATA_11);
 
-	*value = cgs_read_register(hwmgr->device, mmSMC_IND_DATA_11);
-	return 0;
+	return result;
 }
 
 int smu7_write_smc_sram_dword(struct pp_hwmgr *hwmgr, uint32_t smc_addr, uint32_t value, uint32_t limit)
@@ -375,7 +360,7 @@
 		entry->meta_data_addr_low = 0;
 
 		/* digest need be excluded out */
-		if (cgs_is_virtualization_enabled(hwmgr->device))
+		if (!hwmgr->not_vf)
 			info.image_size -= 20;
 		entry->data_size_byte = info.image_size;
 		entry->num_register_entries = 0;
@@ -409,7 +394,7 @@
 					0x0);
 
 	if (hwmgr->chip_id > CHIP_TOPAZ) { /* add support for Topaz */
-		if (!cgs_is_virtualization_enabled(hwmgr->device)) {
+		if (hwmgr->not_vf) {
 			smu7_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SMU_DRAM_ADDR_HI,
 						upper_32_bits(smu_data->smu_buffer.mc_addr));
@@ -467,7 +452,7 @@
 	PP_ASSERT_WITH_CODE(0 == smu7_populate_single_firmware_entry(hwmgr,
 				UCODE_ID_SDMA1, &toc->entry[toc->num_entries++]),
 				"Failed to Get Firmware Entry.", return -EINVAL);
-	if (cgs_is_virtualization_enabled(hwmgr->device))
+	if (!hwmgr->not_vf)
 		PP_ASSERT_WITH_CODE(0 == smu7_populate_single_firmware_entry(hwmgr,
 				UCODE_ID_MEC_STORAGE, &toc->entry[toc->num_entries++]),
 				"Failed to Get Firmware Entry.", return -EINVAL);
@@ -608,7 +593,7 @@
 	smu_data->header = smu_data->header_buffer.kaddr;
 	smu_data->header_buffer.mc_addr = mc_addr;
 
-	if (cgs_is_virtualization_enabled(hwmgr->device))
+	if (!hwmgr->not_vf)
 		return 0;
 
 	smu_data->smu_buffer.data_size = 200*4096;
@@ -643,13 +628,12 @@
 					&smu_data->header_buffer.mc_addr,
 					&smu_data->header_buffer.kaddr);
 
-	if (!cgs_is_virtualization_enabled(hwmgr->device))
+	if (hwmgr->not_vf)
 		amdgpu_bo_free_kernel(&smu_data->smu_buffer.handle,
 					&smu_data->smu_buffer.mc_addr,
 					&smu_data->smu_buffer.kaddr);
 
 	kfree(hwmgr->smu_backend);
 	hwmgr->smu_backend = NULL;
-	cgs_rel_firmware(hwmgr->device, CGS_UCODE_ID_SMU);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h
index 126d300..39c9bfd 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.h
@@ -67,7 +67,6 @@
 int smu7_send_msg_to_smc_with_parameter_without_waiting(struct pp_hwmgr *hwmgr,
 						uint16_t msg, uint32_t parameter);
 int smu7_send_msg_to_smc_offset(struct pp_hwmgr *hwmgr);
-int smu7_wait_for_smc_inactive(struct pp_hwmgr *hwmgr);
 
 enum cgs_ucode_id smu7_convert_fw_type_to_cgs(uint32_t fw_type);
 int smu7_read_smc_sram_dword(struct pp_hwmgr *hwmgr, uint32_t smc_addr,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
index c28b60a..c983793 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
@@ -41,9 +41,11 @@
 MODULE_FIRMWARE("amdgpu/polaris11_smc_sk.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_k_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_smc.bin");
+MODULE_FIRMWARE("amdgpu/vegam_smc.bin");
 MODULE_FIRMWARE("amdgpu/vega10_smc.bin");
 MODULE_FIRMWARE("amdgpu/vega10_acg_smc.bin");
 MODULE_FIRMWARE("amdgpu/vega12_smc.bin");
+MODULE_FIRMWARE("amdgpu/vega20_smc.bin");
 
 int smum_thermal_avfs_enable(struct pp_hwmgr *hwmgr)
 {
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index b51d746..782b19f 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -55,11 +55,7 @@
 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
 
-
-#define VOLTAGE_SCALE 4
 #define POWERTUNE_DEFAULT_SET_MAX    1
-#define VOLTAGE_VID_OFFSET_SCALE1   625
-#define VOLTAGE_VID_OFFSET_SCALE2   100
 #define MC_CG_ARB_FREQ_F1           0x0b
 #define VDDC_VDDCI_DELTA            200
 
@@ -199,8 +195,7 @@
 	int result;
 
 	/* Only start SMC if SMC RAM is not running */
-	if (!(smu7_is_smc_ram_running(hwmgr) ||
-		cgs_is_virtualization_enabled(hwmgr->device))) {
+	if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) {
 		/*Check if SMU is running in protected mode*/
 		if (0 == PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
 					SMU_FIRMWARE, SMU_MODE)) {
@@ -651,7 +646,7 @@
 	graphic_level->PowerThrottle = 0;
 
 	data->display_timing.min_clock_in_sr =
-			hwmgr->display_config.min_core_set_clock_in_sr;
+			hwmgr->display_config->min_core_set_clock_in_sr;
 
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_SclkDeepSleep))
@@ -957,18 +952,17 @@
 		SMU72_Discrete_MemoryLevel *memory_level
 		)
 {
-	uint32_t mvdd = 0;
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 	struct phm_ppt_v1_information *pptable_info =
 			  (struct phm_ppt_v1_information *)(hwmgr->pptable);
-	int result = 0;
-	bool dll_state_on;
-	struct cgs_display_info info = {0};
 	uint32_t mclk_edc_wr_enable_threshold = 40000;
 	uint32_t mclk_stutter_mode_threshold = 30000;
 	uint32_t mclk_edc_enable_threshold = 40000;
 	uint32_t mclk_strobe_mode_threshold = 40000;
 	phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_table = NULL;
+	int result = 0;
+	bool dll_state_on;
+	uint32_t mvdd = 0;
 
 	if (hwmgr->od_enabled)
 		vdd_dep_table = (phm_ppt_v1_clock_voltage_dependency_table *)&data->odn_dpm_table.vdd_dependency_on_mclk;
@@ -1009,8 +1003,7 @@
 	/* default set to low watermark. Highest level will be set to high later.*/
 	memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
 
-	cgs_get_active_displays_info(hwmgr->device, &info);
-	data->display_timing.num_existing_displays = info.display_count;
+	data->display_timing.num_existing_displays = hwmgr->display_config->num_display;
 
 	if ((mclk_stutter_mode_threshold != 0) &&
 	    (memory_clock <= mclk_stutter_mode_threshold) &&
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
index 4aafb04..e84669c 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega10_smumgr.c
@@ -23,7 +23,7 @@
 
 #include "smumgr.h"
 #include "vega10_inc.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
 #include "vega10_smumgr.h"
 #include "vega10_hwmgr.h"
 #include "vega10_ppsmc.h"
@@ -35,8 +35,6 @@
 #define AVFS_EN_MSB		1568
 #define AVFS_EN_LSB		1568
 
-#define VOLTAGE_SCALE	4
-
 /* Microcode file is stored in this buffer */
 #define BUFFER_SIZE                 80000
 #define MAX_STRING_SIZE             15
@@ -54,18 +52,13 @@
 
 static bool vega10_is_smc_ram_running(struct pp_hwmgr *hwmgr)
 {
-	uint32_t mp1_fw_flags, reg;
+	struct amdgpu_device *adev = hwmgr->adev;
+	uint32_t mp1_fw_flags;
 
-	reg = soc15_get_register_offset(NBIF_HWID, 0,
-			mmPCIE_INDEX2_BASE_IDX, mmPCIE_INDEX2);
-
-	cgs_write_register(hwmgr->device, reg,
+	WREG32_SOC15(NBIF, 0, mmPCIE_INDEX2,
 			(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff)));
 
-	reg = soc15_get_register_offset(NBIF_HWID, 0,
-			mmPCIE_DATA2_BASE_IDX, mmPCIE_DATA2);
-
-	mp1_fw_flags = cgs_read_register(hwmgr->device, reg);
+	mp1_fw_flags = RREG32_SOC15(NBIF, 0, mmPCIE_DATA2);
 
 	if (mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK)
 		return true;
@@ -81,11 +74,11 @@
  */
 static uint32_t vega10_wait_for_response(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	uint32_t reg;
 	uint32_t ret;
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
+	reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
 
 	ret = phm_wait_for_register_unequal(hwmgr, reg,
 			0, MP1_C2PMSG_90__CONTENT_MASK);
@@ -93,7 +86,7 @@
 	if (ret)
 		pr_err("No response from smu\n");
 
-	return cgs_read_register(hwmgr->device, reg);
+	return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
 }
 
 /*
@@ -105,11 +98,9 @@
 static int vega10_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr,
 		uint16_t msg)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66);
-	cgs_write_register(hwmgr->device, reg, msg);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
 
 	return 0;
 }
@@ -122,14 +113,12 @@
  */
 static int vega10_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 	uint32_t ret;
 
 	vega10_wait_for_response(hwmgr);
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
-	cgs_write_register(hwmgr->device, reg, 0);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
 
 	vega10_send_msg_to_smc_without_waiting(hwmgr, msg);
 
@@ -150,18 +139,14 @@
 static int vega10_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
 		uint16_t msg, uint32_t parameter)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 	uint32_t ret;
 
 	vega10_wait_for_response(hwmgr);
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
-	cgs_write_register(hwmgr->device, reg, 0);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82);
-	cgs_write_register(hwmgr->device, reg, parameter);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter);
 
 	vega10_send_msg_to_smc_without_waiting(hwmgr, msg);
 
@@ -174,12 +159,9 @@
 
 static int vega10_get_argument(struct pp_hwmgr *hwmgr)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82);
-
-	return cgs_read_register(hwmgr->device, reg);
+	return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82);
 }
 
 static int vega10_copy_table_from_smc(struct pp_hwmgr *hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
index 651a3f2..7d9b40e 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
@@ -23,7 +23,7 @@
 
 #include "smumgr.h"
 #include "vega12_inc.h"
-#include "pp_soc15.h"
+#include "soc15_common.h"
 #include "vega12_smumgr.h"
 #include "vega12_ppsmc.h"
 #include "vega12/smu9_driver_if.h"
@@ -44,18 +44,13 @@
 
 static bool vega12_is_smc_ram_running(struct pp_hwmgr *hwmgr)
 {
-	uint32_t mp1_fw_flags, reg;
+	struct amdgpu_device *adev = hwmgr->adev;
+	uint32_t mp1_fw_flags;
 
-	reg = soc15_get_register_offset(NBIF_HWID, 0,
-			mmPCIE_INDEX2_BASE_IDX, mmPCIE_INDEX2);
-
-	cgs_write_register(hwmgr->device, reg,
+	WREG32_SOC15(NBIF, 0, mmPCIE_INDEX2,
 			(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff)));
 
-	reg = soc15_get_register_offset(NBIF_HWID, 0,
-			mmPCIE_DATA2_BASE_IDX, mmPCIE_DATA2);
-
-	mp1_fw_flags = cgs_read_register(hwmgr->device, reg);
+	mp1_fw_flags = RREG32_SOC15(NBIF, 0, mmPCIE_DATA2);
 
 	if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >>
 				MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT)
@@ -72,15 +67,15 @@
  */
 static uint32_t vega12_wait_for_response(struct pp_hwmgr *hwmgr)
 {
+	struct amdgpu_device *adev = hwmgr->adev;
 	uint32_t reg;
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
+	reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
 
 	phm_wait_for_register_unequal(hwmgr, reg,
 			0, MP1_C2PMSG_90__CONTENT_MASK);
 
-	return cgs_read_register(hwmgr->device, reg);
+	return RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90);
 }
 
 /*
@@ -92,11 +87,9 @@
 int vega12_send_msg_to_smc_without_waiting(struct pp_hwmgr *hwmgr,
 		uint16_t msg)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66);
-	cgs_write_register(hwmgr->device, reg, msg);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg);
 
 	return 0;
 }
@@ -109,13 +102,11 @@
  */
 int vega12_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	vega12_wait_for_response(hwmgr);
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
-	cgs_write_register(hwmgr->device, reg, 0);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
 
 	vega12_send_msg_to_smc_without_waiting(hwmgr, msg);
 
@@ -135,17 +126,13 @@
 int vega12_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr,
 		uint16_t msg, uint32_t parameter)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	vega12_wait_for_response(hwmgr);
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_90_BASE_IDX, mmMP1_SMN_C2PMSG_90);
-	cgs_write_register(hwmgr->device, reg, 0);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0);
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82);
-	cgs_write_register(hwmgr->device, reg, parameter);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, parameter);
 
 	vega12_send_msg_to_smc_without_waiting(hwmgr, msg);
 
@@ -166,11 +153,9 @@
 int vega12_send_msg_to_smc_with_parameter_without_waiting(
 		struct pp_hwmgr *hwmgr, uint16_t msg, uint32_t parameter)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_66_BASE_IDX, mmMP1_SMN_C2PMSG_66);
-	cgs_write_register(hwmgr->device, reg, parameter);
+	WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, parameter);
 
 	return vega12_send_msg_to_smc_without_waiting(hwmgr, msg);
 }
@@ -183,12 +168,9 @@
  */
 int vega12_read_arg_from_smc(struct pp_hwmgr *hwmgr, uint32_t *arg)
 {
-	uint32_t reg;
+	struct amdgpu_device *adev = hwmgr->adev;
 
-	reg = soc15_get_register_offset(MP1_HWID, 0,
-			mmMP1_SMN_C2PMSG_82_BASE_IDX, mmMP1_SMN_C2PMSG_82);
-
-	*arg = cgs_read_register(hwmgr->device, reg);
+	*arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
new file mode 100644
index 0000000..2de4895
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
@@ -0,0 +1,2383 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "pp_debug.h"
+#include "smumgr.h"
+#include "smu_ucode_xfer_vi.h"
+#include "vegam_smumgr.h"
+#include "smu/smu_7_1_3_d.h"
+#include "smu/smu_7_1_3_sh_mask.h"
+#include "gmc/gmc_8_1_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+#include "oss/oss_3_0_d.h"
+#include "gca/gfx_8_0_d.h"
+#include "bif/bif_5_0_d.h"
+#include "bif/bif_5_0_sh_mask.h"
+#include "ppatomctrl.h"
+#include "cgs_common.h"
+#include "smu7_ppsmc.h"
+
+#include "smu7_dyn_defaults.h"
+
+#include "smu7_hwmgr.h"
+#include "hardwaremanager.h"
+#include "ppatomctrl.h"
+#include "atombios.h"
+#include "pppcielanes.h"
+
+#include "dce/dce_11_2_d.h"
+#include "dce/dce_11_2_sh_mask.h"
+
+#define PPVEGAM_TARGETACTIVITY_DFLT                     50
+
+#define VOLTAGE_VID_OFFSET_SCALE1   625
+#define VOLTAGE_VID_OFFSET_SCALE2   100
+#define POWERTUNE_DEFAULT_SET_MAX    1
+#define VDDC_VDDCI_DELTA            200
+#define MC_CG_ARB_FREQ_F1           0x0b
+
+#define STRAP_ASIC_RO_LSB    2168
+#define STRAP_ASIC_RO_MSB    2175
+
+#define PPSMC_MSG_ApplyAvfsCksOffVoltage      ((uint16_t) 0x415)
+#define PPSMC_MSG_EnableModeSwitchRLCNotification  ((uint16_t) 0x305)
+
+static const struct vegam_pt_defaults
+vegam_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = {
+	/* sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt,
+	 * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT */
+	{ 1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000,
+	{ 0x79, 0x253, 0x25D, 0xAE, 0x72, 0x80, 0x83, 0x86, 0x6F, 0xC8, 0xC9, 0xC9, 0x2F, 0x4D, 0x61},
+	{ 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 } },
+};
+
+static const sclkFcwRange_t Range_Table[NUM_SCLK_RANGE] = {
+			{VCO_2_4, POSTDIV_DIV_BY_16,  75, 160, 112},
+			{VCO_3_6, POSTDIV_DIV_BY_16, 112, 224, 160},
+			{VCO_2_4, POSTDIV_DIV_BY_8,   75, 160, 112},
+			{VCO_3_6, POSTDIV_DIV_BY_8,  112, 224, 160},
+			{VCO_2_4, POSTDIV_DIV_BY_4,   75, 160, 112},
+			{VCO_3_6, POSTDIV_DIV_BY_4,  112, 216, 160},
+			{VCO_2_4, POSTDIV_DIV_BY_2,   75, 160, 108},
+			{VCO_3_6, POSTDIV_DIV_BY_2,  112, 216, 160} };
+
+static int vegam_smu_init(struct pp_hwmgr *hwmgr)
+{
+	struct vegam_smumgr *smu_data;
+
+	smu_data = kzalloc(sizeof(struct vegam_smumgr), GFP_KERNEL);
+	if (smu_data == NULL)
+		return -ENOMEM;
+
+	hwmgr->smu_backend = smu_data;
+
+	if (smu7_init(hwmgr)) {
+		kfree(smu_data);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int vegam_start_smu_in_protection_mode(struct pp_hwmgr *hwmgr)
+{
+	int result = 0;
+
+	/* Wait for smc boot up */
+	/* PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(smumgr, SMC_IND, RCU_UC_EVENTS, boot_seq_done, 0) */
+
+	/* Assert reset */
+	PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+					SMC_SYSCON_RESET_CNTL, rst_reg, 1);
+
+	result = smu7_upload_smu_firmware_image(hwmgr);
+	if (result != 0)
+		return result;
+
+	/* Clear status */
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixSMU_STATUS, 0);
+
+	PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+					SMC_SYSCON_CLOCK_CNTL_0, ck_disable, 0);
+
+	/* De-assert reset */
+	PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+					SMC_SYSCON_RESET_CNTL, rst_reg, 0);
+
+
+	PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, RCU_UC_EVENTS, INTERRUPTS_ENABLED, 1);
+
+
+	/* Call Test SMU message with 0x20000 offset to trigger SMU start */
+	smu7_send_msg_to_smc_offset(hwmgr);
+
+	/* Wait done bit to be set */
+	/* Check pass/failed indicator */
+
+	PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(hwmgr, SMC_IND, SMU_STATUS, SMU_DONE, 0);
+
+	if (1 != PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+						SMU_STATUS, SMU_PASS))
+		PP_ASSERT_WITH_CODE(false, "SMU Firmware start failed!", return -1);
+
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixFIRMWARE_FLAGS, 0);
+
+	PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+					SMC_SYSCON_RESET_CNTL, rst_reg, 1);
+
+	PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+					SMC_SYSCON_RESET_CNTL, rst_reg, 0);
+
+	/* Wait for firmware to initialize */
+	PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND, FIRMWARE_FLAGS, INTERRUPTS_ENABLED, 1);
+
+	return result;
+}
+
+static int vegam_start_smu_in_non_protection_mode(struct pp_hwmgr *hwmgr)
+{
+	int result = 0;
+
+	/* wait for smc boot up */
+	PHM_WAIT_VFPF_INDIRECT_FIELD_UNEQUAL(hwmgr, SMC_IND, RCU_UC_EVENTS, boot_seq_done, 0);
+
+	/* Clear firmware interrupt enable flag */
+	/* PHM_WRITE_VFPF_INDIRECT_FIELD(pSmuMgr, SMC_IND, SMC_SYSCON_MISC_CNTL, pre_fetcher_en, 1); */
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+				ixFIRMWARE_FLAGS, 0);
+
+	PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+					SMC_SYSCON_RESET_CNTL,
+					rst_reg, 1);
+
+	result = smu7_upload_smu_firmware_image(hwmgr);
+	if (result != 0)
+		return result;
+
+	/* Set smc instruct start point at 0x0 */
+	smu7_program_jump_on_start(hwmgr);
+
+	PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+					SMC_SYSCON_CLOCK_CNTL_0, ck_disable, 0);
+
+	PHM_WRITE_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+					SMC_SYSCON_RESET_CNTL, rst_reg, 0);
+
+	/* Wait for firmware to initialize */
+
+	PHM_WAIT_VFPF_INDIRECT_FIELD(hwmgr, SMC_IND,
+					FIRMWARE_FLAGS, INTERRUPTS_ENABLED, 1);
+
+	return result;
+}
+
+static int vegam_start_smu(struct pp_hwmgr *hwmgr)
+{
+	int result = 0;
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+	/* Only start SMC if SMC RAM is not running */
+	if (!smu7_is_smc_ram_running(hwmgr) && hwmgr->not_vf) {
+		smu_data->protected_mode = (uint8_t)(PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device,
+				CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_MODE));
+		smu_data->smu7_data.security_hard_key = (uint8_t)(PHM_READ_VFPF_INDIRECT_FIELD(
+				hwmgr->device, CGS_IND_REG__SMC, SMU_FIRMWARE, SMU_SEL));
+
+		/* Check if SMU is running in protected mode */
+		if (smu_data->protected_mode == 0)
+			result = vegam_start_smu_in_non_protection_mode(hwmgr);
+		else
+			result = vegam_start_smu_in_protection_mode(hwmgr);
+
+		if (result != 0)
+			PP_ASSERT_WITH_CODE(0, "Failed to load SMU ucode.", return result);
+	}
+
+	/* Setup SoftRegsStart here for register lookup in case DummyBackEnd is used and ProcessFirmwareHeader is not executed */
+	smu7_read_smc_sram_dword(hwmgr,
+			SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU75_Firmware_Header, SoftRegisters),
+			&(smu_data->smu7_data.soft_regs_start),
+			0x40000);
+
+	result = smu7_request_smu_load_fw(hwmgr);
+
+	return result;
+}
+
+static int vegam_process_firmware_header(struct pp_hwmgr *hwmgr)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	uint32_t tmp;
+	int result;
+	bool error = false;
+
+	result = smu7_read_smc_sram_dword(hwmgr,
+			SMU7_FIRMWARE_HEADER_LOCATION +
+			offsetof(SMU75_Firmware_Header, DpmTable),
+			&tmp, SMC_RAM_END);
+
+	if (0 == result)
+		smu_data->smu7_data.dpm_table_start = tmp;
+
+	error |= (0 != result);
+
+	result = smu7_read_smc_sram_dword(hwmgr,
+			SMU7_FIRMWARE_HEADER_LOCATION +
+			offsetof(SMU75_Firmware_Header, SoftRegisters),
+			&tmp, SMC_RAM_END);
+
+	if (!result) {
+		data->soft_regs_start = tmp;
+		smu_data->smu7_data.soft_regs_start = tmp;
+	}
+
+	error |= (0 != result);
+
+	result = smu7_read_smc_sram_dword(hwmgr,
+			SMU7_FIRMWARE_HEADER_LOCATION +
+			offsetof(SMU75_Firmware_Header, mcRegisterTable),
+			&tmp, SMC_RAM_END);
+
+	if (!result)
+		smu_data->smu7_data.mc_reg_table_start = tmp;
+
+	result = smu7_read_smc_sram_dword(hwmgr,
+			SMU7_FIRMWARE_HEADER_LOCATION +
+			offsetof(SMU75_Firmware_Header, FanTable),
+			&tmp, SMC_RAM_END);
+
+	if (!result)
+		smu_data->smu7_data.fan_table_start = tmp;
+
+	error |= (0 != result);
+
+	result = smu7_read_smc_sram_dword(hwmgr,
+			SMU7_FIRMWARE_HEADER_LOCATION +
+			offsetof(SMU75_Firmware_Header, mcArbDramTimingTable),
+			&tmp, SMC_RAM_END);
+
+	if (!result)
+		smu_data->smu7_data.arb_table_start = tmp;
+
+	error |= (0 != result);
+
+	result = smu7_read_smc_sram_dword(hwmgr,
+			SMU7_FIRMWARE_HEADER_LOCATION +
+			offsetof(SMU75_Firmware_Header, Version),
+			&tmp, SMC_RAM_END);
+
+	if (!result)
+		hwmgr->microcode_version_info.SMC = tmp;
+
+	error |= (0 != result);
+
+	return error ? -1 : 0;
+}
+
+static bool vegam_is_dpm_running(struct pp_hwmgr *hwmgr)
+{
+	return (1 == PHM_READ_INDIRECT_FIELD(hwmgr->device,
+			CGS_IND_REG__SMC, FEATURE_STATUS, VOLTAGE_CONTROLLER_ON))
+			? true : false;
+}
+
+static uint32_t vegam_get_mac_definition(uint32_t value)
+{
+	switch (value) {
+	case SMU_MAX_LEVELS_GRAPHICS:
+		return SMU75_MAX_LEVELS_GRAPHICS;
+	case SMU_MAX_LEVELS_MEMORY:
+		return SMU75_MAX_LEVELS_MEMORY;
+	case SMU_MAX_LEVELS_LINK:
+		return SMU75_MAX_LEVELS_LINK;
+	case SMU_MAX_ENTRIES_SMIO:
+		return SMU75_MAX_ENTRIES_SMIO;
+	case SMU_MAX_LEVELS_VDDC:
+		return SMU75_MAX_LEVELS_VDDC;
+	case SMU_MAX_LEVELS_VDDGFX:
+		return SMU75_MAX_LEVELS_VDDGFX;
+	case SMU_MAX_LEVELS_VDDCI:
+		return SMU75_MAX_LEVELS_VDDCI;
+	case SMU_MAX_LEVELS_MVDD:
+		return SMU75_MAX_LEVELS_MVDD;
+	case SMU_UVD_MCLK_HANDSHAKE_DISABLE:
+		return SMU7_UVD_MCLK_HANDSHAKE_DISABLE |
+				SMU7_VCE_MCLK_HANDSHAKE_DISABLE;
+	}
+
+	pr_warn("can't get the mac of %x\n", value);
+	return 0;
+}
+
+static int vegam_update_uvd_smc_table(struct pp_hwmgr *hwmgr)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	uint32_t mm_boot_level_offset, mm_boot_level_value;
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+
+	smu_data->smc_state_table.UvdBootLevel = 0;
+	if (table_info->mm_dep_table->count > 0)
+		smu_data->smc_state_table.UvdBootLevel =
+				(uint8_t) (table_info->mm_dep_table->count - 1);
+	mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + offsetof(SMU75_Discrete_DpmTable,
+						UvdBootLevel);
+	mm_boot_level_offset /= 4;
+	mm_boot_level_offset *= 4;
+	mm_boot_level_value = cgs_read_ind_register(hwmgr->device,
+			CGS_IND_REG__SMC, mm_boot_level_offset);
+	mm_boot_level_value &= 0x00FFFFFF;
+	mm_boot_level_value |= smu_data->smc_state_table.UvdBootLevel << 24;
+	cgs_write_ind_register(hwmgr->device,
+			CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value);
+
+	if (!phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_UVDDPM) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_StablePState))
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_UVDDPM_SetEnabledMask,
+				(uint32_t)(1 << smu_data->smc_state_table.UvdBootLevel));
+	return 0;
+}
+
+static int vegam_update_vce_smc_table(struct pp_hwmgr *hwmgr)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	uint32_t mm_boot_level_offset, mm_boot_level_value;
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+					PHM_PlatformCaps_StablePState))
+		smu_data->smc_state_table.VceBootLevel =
+			(uint8_t) (table_info->mm_dep_table->count - 1);
+	else
+		smu_data->smc_state_table.VceBootLevel = 0;
+
+	mm_boot_level_offset = smu_data->smu7_data.dpm_table_start +
+					offsetof(SMU75_Discrete_DpmTable, VceBootLevel);
+	mm_boot_level_offset /= 4;
+	mm_boot_level_offset *= 4;
+	mm_boot_level_value = cgs_read_ind_register(hwmgr->device,
+			CGS_IND_REG__SMC, mm_boot_level_offset);
+	mm_boot_level_value &= 0xFF00FFFF;
+	mm_boot_level_value |= smu_data->smc_state_table.VceBootLevel << 16;
+	cgs_write_ind_register(hwmgr->device,
+			CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_VCEDPM_SetEnabledMask,
+				(uint32_t)1 << smu_data->smc_state_table.VceBootLevel);
+	return 0;
+}
+
+static int vegam_update_samu_smc_table(struct pp_hwmgr *hwmgr)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	uint32_t mm_boot_level_offset, mm_boot_level_value;
+
+
+	smu_data->smc_state_table.SamuBootLevel = 0;
+	mm_boot_level_offset = smu_data->smu7_data.dpm_table_start +
+				offsetof(SMU75_Discrete_DpmTable, SamuBootLevel);
+
+	mm_boot_level_offset /= 4;
+	mm_boot_level_offset *= 4;
+	mm_boot_level_value = cgs_read_ind_register(hwmgr->device,
+			CGS_IND_REG__SMC, mm_boot_level_offset);
+	mm_boot_level_value &= 0xFFFFFF00;
+	mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0;
+	cgs_write_ind_register(hwmgr->device,
+			CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_StablePState))
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_SAMUDPM_SetEnabledMask,
+				(uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel));
+	return 0;
+}
+
+
+static int vegam_update_bif_smc_table(struct pp_hwmgr *hwmgr)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table;
+	int max_entry, i;
+
+	max_entry = (SMU75_MAX_LEVELS_LINK < pcie_table->count) ?
+						SMU75_MAX_LEVELS_LINK :
+						pcie_table->count;
+	/* Setup BIF_SCLK levels */
+	for (i = 0; i < max_entry; i++)
+		smu_data->bif_sclk_table[i] = pcie_table->entries[i].pcie_sclk;
+	return 0;
+}
+
+static int vegam_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type)
+{
+	switch (type) {
+	case SMU_UVD_TABLE:
+		vegam_update_uvd_smc_table(hwmgr);
+		break;
+	case SMU_VCE_TABLE:
+		vegam_update_vce_smc_table(hwmgr);
+		break;
+	case SMU_SAMU_TABLE:
+		vegam_update_samu_smc_table(hwmgr);
+		break;
+	case SMU_BIF_TABLE:
+		vegam_update_bif_smc_table(hwmgr);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static void vegam_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	struct  phm_ppt_v1_information *table_info =
+			(struct  phm_ppt_v1_information *)(hwmgr->pptable);
+
+	if (table_info &&
+			table_info->cac_dtp_table->usPowerTuneDataSetID <= POWERTUNE_DEFAULT_SET_MAX &&
+			table_info->cac_dtp_table->usPowerTuneDataSetID)
+		smu_data->power_tune_defaults =
+				&vegam_power_tune_data_set_array
+				[table_info->cac_dtp_table->usPowerTuneDataSetID - 1];
+	else
+		smu_data->power_tune_defaults = &vegam_power_tune_data_set_array[0];
+
+}
+
+static int vegam_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
+			SMU75_Discrete_DpmTable *table)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	uint32_t count, level;
+
+	if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) {
+		count = data->mvdd_voltage_table.count;
+		if (count > SMU_MAX_SMIO_LEVELS)
+			count = SMU_MAX_SMIO_LEVELS;
+		for (level = 0; level < count; level++) {
+			table->SmioTable2.Pattern[level].Voltage = PP_HOST_TO_SMC_US(
+					data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE);
+			/* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/
+			table->SmioTable2.Pattern[level].Smio =
+				(uint8_t) level;
+			table->Smio[level] |=
+				data->mvdd_voltage_table.entries[level].smio_low;
+		}
+		table->SmioMask2 = data->mvdd_voltage_table.mask_low;
+
+		table->MvddLevelCount = (uint32_t) PP_HOST_TO_SMC_UL(count);
+	}
+
+	return 0;
+}
+
+static int vegam_populate_smc_vddci_table(struct pp_hwmgr *hwmgr,
+					struct SMU75_Discrete_DpmTable *table)
+{
+	uint32_t count, level;
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+
+	count = data->vddci_voltage_table.count;
+
+	if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) {
+		if (count > SMU_MAX_SMIO_LEVELS)
+			count = SMU_MAX_SMIO_LEVELS;
+		for (level = 0; level < count; ++level) {
+			table->SmioTable1.Pattern[level].Voltage = PP_HOST_TO_SMC_US(
+					data->vddci_voltage_table.entries[level].value * VOLTAGE_SCALE);
+			table->SmioTable1.Pattern[level].Smio = (uint8_t) level;
+
+			table->Smio[level] |= data->vddci_voltage_table.entries[level].smio_low;
+		}
+	}
+
+	table->SmioMask1 = data->vddci_voltage_table.mask_low;
+
+	return 0;
+}
+
+static int vegam_populate_cac_table(struct pp_hwmgr *hwmgr,
+		struct SMU75_Discrete_DpmTable *table)
+{
+	uint32_t count;
+	uint8_t index;
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_ppt_v1_voltage_lookup_table *lookup_table =
+			table_info->vddc_lookup_table;
+	/* tables is already swapped, so in order to use the value from it,
+	 * we need to swap it back.
+	 * We are populating vddc CAC data to BapmVddc table
+	 * in split and merged mode
+	 */
+	for (count = 0; count < lookup_table->count; count++) {
+		index = phm_get_voltage_index(lookup_table,
+				data->vddc_voltage_table.entries[count].value);
+		table->BapmVddcVidLoSidd[count] =
+				convert_to_vid(lookup_table->entries[index].us_cac_low);
+		table->BapmVddcVidHiSidd[count] =
+				convert_to_vid(lookup_table->entries[index].us_cac_mid);
+		table->BapmVddcVidHiSidd2[count] =
+				convert_to_vid(lookup_table->entries[index].us_cac_high);
+	}
+
+	return 0;
+}
+
+static int vegam_populate_smc_voltage_tables(struct pp_hwmgr *hwmgr,
+		struct SMU75_Discrete_DpmTable *table)
+{
+	vegam_populate_smc_vddci_table(hwmgr, table);
+	vegam_populate_smc_mvdd_table(hwmgr, table);
+	vegam_populate_cac_table(hwmgr, table);
+
+	return 0;
+}
+
+static int vegam_populate_ulv_level(struct pp_hwmgr *hwmgr,
+		struct SMU75_Discrete_Ulv *state)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+
+	state->CcPwrDynRm = 0;
+	state->CcPwrDynRm1 = 0;
+
+	state->VddcOffset = (uint16_t) table_info->us_ulv_voltage_offset;
+	state->VddcOffsetVid = (uint8_t)(table_info->us_ulv_voltage_offset *
+			VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1);
+
+	state->VddcPhase = data->vddc_phase_shed_control ^ 0x3;
+
+	CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm);
+	CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1);
+	CONVERT_FROM_HOST_TO_SMC_US(state->VddcOffset);
+
+	return 0;
+}
+
+static int vegam_populate_ulv_state(struct pp_hwmgr *hwmgr,
+		struct SMU75_Discrete_DpmTable *table)
+{
+	return vegam_populate_ulv_level(hwmgr, &table->Ulv);
+}
+
+static int vegam_populate_smc_link_level(struct pp_hwmgr *hwmgr,
+		struct SMU75_Discrete_DpmTable *table)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct vegam_smumgr *smu_data =
+			(struct vegam_smumgr *)(hwmgr->smu_backend);
+	struct smu7_dpm_table *dpm_table = &data->dpm_table;
+	int i;
+
+	/* Index (dpm_table->pcie_speed_table.count)
+	 * is reserved for PCIE boot level. */
+	for (i = 0; i <= dpm_table->pcie_speed_table.count; i++) {
+		table->LinkLevel[i].PcieGenSpeed  =
+				(uint8_t)dpm_table->pcie_speed_table.dpm_levels[i].value;
+		table->LinkLevel[i].PcieLaneCount = (uint8_t)encode_pcie_lane_width(
+				dpm_table->pcie_speed_table.dpm_levels[i].param1);
+		table->LinkLevel[i].EnabledForActivity = 1;
+		table->LinkLevel[i].SPC = (uint8_t)(data->pcie_spc_cap & 0xff);
+		table->LinkLevel[i].DownThreshold = PP_HOST_TO_SMC_UL(5);
+		table->LinkLevel[i].UpThreshold = PP_HOST_TO_SMC_UL(30);
+	}
+
+	smu_data->smc_state_table.LinkLevelCount =
+			(uint8_t)dpm_table->pcie_speed_table.count;
+
+/* To Do move to hwmgr */
+	data->dpm_level_enable_mask.pcie_dpm_enable_mask =
+			phm_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table);
+
+	return 0;
+}
+
+static int vegam_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr,
+		struct phm_ppt_v1_clock_voltage_dependency_table *dep_table,
+		uint32_t clock, SMU_VoltageLevel *voltage, uint32_t *mvdd)
+{
+	uint32_t i;
+	uint16_t vddci;
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+
+	*voltage = *mvdd = 0;
+
+	/* clock - voltage dependency table is empty table */
+	if (dep_table->count == 0)
+		return -EINVAL;
+
+	for (i = 0; i < dep_table->count; i++) {
+		/* find first sclk bigger than request */
+		if (dep_table->entries[i].clk >= clock) {
+			*voltage |= (dep_table->entries[i].vddc *
+					VOLTAGE_SCALE) << VDDC_SHIFT;
+			if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control)
+				*voltage |= (data->vbios_boot_state.vddci_bootup_value *
+						VOLTAGE_SCALE) << VDDCI_SHIFT;
+			else if (dep_table->entries[i].vddci)
+				*voltage |= (dep_table->entries[i].vddci *
+						VOLTAGE_SCALE) << VDDCI_SHIFT;
+			else {
+				vddci = phm_find_closest_vddci(&(data->vddci_voltage_table),
+						(dep_table->entries[i].vddc -
+								(uint16_t)VDDC_VDDCI_DELTA));
+				*voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
+			}
+
+			if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control)
+				*mvdd = data->vbios_boot_state.mvdd_bootup_value *
+					VOLTAGE_SCALE;
+			else if (dep_table->entries[i].mvdd)
+				*mvdd = (uint32_t) dep_table->entries[i].mvdd *
+					VOLTAGE_SCALE;
+
+			*voltage |= 1 << PHASES_SHIFT;
+			return 0;
+		}
+	}
+
+	/* sclk is bigger than max sclk in the dependence table */
+	*voltage |= (dep_table->entries[i - 1].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
+	vddci = phm_find_closest_vddci(&(data->vddci_voltage_table),
+			(dep_table->entries[i - 1].vddc -
+					(uint16_t)VDDC_VDDCI_DELTA));
+
+	if (SMU7_VOLTAGE_CONTROL_NONE == data->vddci_control)
+		*voltage |= (data->vbios_boot_state.vddci_bootup_value *
+				VOLTAGE_SCALE) << VDDCI_SHIFT;
+	else if (dep_table->entries[i - 1].vddci)
+		*voltage |= (dep_table->entries[i - 1].vddci *
+				VOLTAGE_SCALE) << VDDC_SHIFT;
+	else
+		*voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+	if (SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control)
+		*mvdd = data->vbios_boot_state.mvdd_bootup_value * VOLTAGE_SCALE;
+	else if (dep_table->entries[i].mvdd)
+		*mvdd = (uint32_t) dep_table->entries[i - 1].mvdd * VOLTAGE_SCALE;
+
+	return 0;
+}
+
+static void vegam_get_sclk_range_table(struct pp_hwmgr *hwmgr,
+				   SMU75_Discrete_DpmTable  *table)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	uint32_t i, ref_clk;
+
+	struct pp_atom_ctrl_sclk_range_table range_table_from_vbios = { { {0} } };
+
+	ref_clk = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev);
+
+	if (0 == atomctrl_get_smc_sclk_range_table(hwmgr, &range_table_from_vbios)) {
+		for (i = 0; i < NUM_SCLK_RANGE; i++) {
+			table->SclkFcwRangeTable[i].vco_setting =
+					range_table_from_vbios.entry[i].ucVco_setting;
+			table->SclkFcwRangeTable[i].postdiv =
+					range_table_from_vbios.entry[i].ucPostdiv;
+			table->SclkFcwRangeTable[i].fcw_pcc =
+					range_table_from_vbios.entry[i].usFcw_pcc;
+
+			table->SclkFcwRangeTable[i].fcw_trans_upper =
+					range_table_from_vbios.entry[i].usFcw_trans_upper;
+			table->SclkFcwRangeTable[i].fcw_trans_lower =
+					range_table_from_vbios.entry[i].usRcw_trans_lower;
+
+			CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_pcc);
+			CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_upper);
+			CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_lower);
+		}
+		return;
+	}
+
+	for (i = 0; i < NUM_SCLK_RANGE; i++) {
+		smu_data->range_table[i].trans_lower_frequency =
+				(ref_clk * Range_Table[i].fcw_trans_lower) >> Range_Table[i].postdiv;
+		smu_data->range_table[i].trans_upper_frequency =
+				(ref_clk * Range_Table[i].fcw_trans_upper) >> Range_Table[i].postdiv;
+
+		table->SclkFcwRangeTable[i].vco_setting = Range_Table[i].vco_setting;
+		table->SclkFcwRangeTable[i].postdiv = Range_Table[i].postdiv;
+		table->SclkFcwRangeTable[i].fcw_pcc = Range_Table[i].fcw_pcc;
+
+		table->SclkFcwRangeTable[i].fcw_trans_upper = Range_Table[i].fcw_trans_upper;
+		table->SclkFcwRangeTable[i].fcw_trans_lower = Range_Table[i].fcw_trans_lower;
+
+		CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_pcc);
+		CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_upper);
+		CONVERT_FROM_HOST_TO_SMC_US(table->SclkFcwRangeTable[i].fcw_trans_lower);
+	}
+}
+
+static int vegam_calculate_sclk_params(struct pp_hwmgr *hwmgr,
+		uint32_t clock, SMU_SclkSetting *sclk_setting)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	const SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table);
+	struct pp_atomctrl_clock_dividers_ai dividers;
+	uint32_t ref_clock;
+	uint32_t pcc_target_percent, pcc_target_freq, ss_target_percent, ss_target_freq;
+	uint8_t i;
+	int result;
+	uint64_t temp;
+
+	sclk_setting->SclkFrequency = clock;
+	/* get the engine clock dividers for this clock value */
+	result = atomctrl_get_engine_pll_dividers_ai(hwmgr, clock,  &dividers);
+	if (result == 0) {
+		sclk_setting->Fcw_int = dividers.usSclk_fcw_int;
+		sclk_setting->Fcw_frac = dividers.usSclk_fcw_frac;
+		sclk_setting->Pcc_fcw_int = dividers.usPcc_fcw_int;
+		sclk_setting->PllRange = dividers.ucSclkPllRange;
+		sclk_setting->Sclk_slew_rate = 0x400;
+		sclk_setting->Pcc_up_slew_rate = dividers.usPcc_fcw_slew_frac;
+		sclk_setting->Pcc_down_slew_rate = 0xffff;
+		sclk_setting->SSc_En = dividers.ucSscEnable;
+		sclk_setting->Fcw1_int = dividers.usSsc_fcw1_int;
+		sclk_setting->Fcw1_frac = dividers.usSsc_fcw1_frac;
+		sclk_setting->Sclk_ss_slew_rate = dividers.usSsc_fcw_slew_frac;
+		return result;
+	}
+
+	ref_clock = amdgpu_asic_get_xclk((struct amdgpu_device *)hwmgr->adev);
+
+	for (i = 0; i < NUM_SCLK_RANGE; i++) {
+		if (clock > smu_data->range_table[i].trans_lower_frequency
+		&& clock <= smu_data->range_table[i].trans_upper_frequency) {
+			sclk_setting->PllRange = i;
+			break;
+		}
+	}
+
+	sclk_setting->Fcw_int = (uint16_t)
+			((clock << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) /
+					ref_clock);
+	temp = clock << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv;
+	temp <<= 0x10;
+	do_div(temp, ref_clock);
+	sclk_setting->Fcw_frac = temp & 0xffff;
+
+	pcc_target_percent = 10; /*  Hardcode 10% for now. */
+	pcc_target_freq = clock - (clock * pcc_target_percent / 100);
+	sclk_setting->Pcc_fcw_int = (uint16_t)
+			((pcc_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) /
+					ref_clock);
+
+	ss_target_percent = 2; /*  Hardcode 2% for now. */
+	sclk_setting->SSc_En = 0;
+	if (ss_target_percent) {
+		sclk_setting->SSc_En = 1;
+		ss_target_freq = clock - (clock * ss_target_percent / 100);
+		sclk_setting->Fcw1_int = (uint16_t)
+				((ss_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv) /
+						ref_clock);
+		temp = ss_target_freq << table->SclkFcwRangeTable[sclk_setting->PllRange].postdiv;
+		temp <<= 0x10;
+		do_div(temp, ref_clock);
+		sclk_setting->Fcw1_frac = temp & 0xffff;
+	}
+
+	return 0;
+}
+
+static uint8_t vegam_get_sleep_divider_id_from_clock(uint32_t clock,
+		uint32_t clock_insr)
+{
+	uint8_t i;
+	uint32_t temp;
+	uint32_t min = max(clock_insr, (uint32_t)SMU7_MINIMUM_ENGINE_CLOCK);
+
+	PP_ASSERT_WITH_CODE((clock >= min),
+			"Engine clock can't satisfy stutter requirement!",
+			return 0);
+	for (i = 31;  ; i--) {
+		temp = clock / (i + 1);
+
+		if (temp >= min || i == 0)
+			break;
+	}
+	return i;
+}
+
+static int vegam_populate_single_graphic_level(struct pp_hwmgr *hwmgr,
+		uint32_t clock, struct SMU75_Discrete_GraphicsLevel *level)
+{
+	int result;
+	/* PP_Clocks minClocks; */
+	uint32_t mvdd;
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	SMU_SclkSetting curr_sclk_setting = { 0 };
+
+	result = vegam_calculate_sclk_params(hwmgr, clock, &curr_sclk_setting);
+
+	/* populate graphics levels */
+	result = vegam_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_sclk, clock,
+			&level->MinVoltage, &mvdd);
+
+	PP_ASSERT_WITH_CODE((0 == result),
+			"can not find VDDC voltage value for "
+			"VDDC engine clock dependency table",
+			return result);
+	level->ActivityLevel = (uint16_t)(SclkDPMTuning_VEGAM >> DPMTuning_Activity_Shift);
+
+	level->CcPwrDynRm = 0;
+	level->CcPwrDynRm1 = 0;
+	level->EnabledForActivity = 0;
+	level->EnabledForThrottle = 1;
+	level->VoltageDownHyst = 0;
+	level->PowerThrottle = 0;
+	data->display_timing.min_clock_in_sr = hwmgr->display_config->min_core_set_clock_in_sr;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SclkDeepSleep))
+		level->DeepSleepDivId = vegam_get_sleep_divider_id_from_clock(clock,
+								hwmgr->display_config->min_core_set_clock_in_sr);
+
+	level->SclkSetting = curr_sclk_setting;
+
+	CONVERT_FROM_HOST_TO_SMC_UL(level->MinVoltage);
+	CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm);
+	CONVERT_FROM_HOST_TO_SMC_UL(level->CcPwrDynRm1);
+	CONVERT_FROM_HOST_TO_SMC_US(level->ActivityLevel);
+	CONVERT_FROM_HOST_TO_SMC_UL(level->SclkSetting.SclkFrequency);
+	CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw_int);
+	CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw_frac);
+	CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_fcw_int);
+	CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Sclk_slew_rate);
+	CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_up_slew_rate);
+	CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Pcc_down_slew_rate);
+	CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw1_int);
+	CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Fcw1_frac);
+	CONVERT_FROM_HOST_TO_SMC_US(level->SclkSetting.Sclk_ss_slew_rate);
+	return 0;
+}
+
+static int vegam_populate_all_graphic_levels(struct pp_hwmgr *hwmgr)
+{
+	struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	struct smu7_dpm_table *dpm_table = &hw_data->dpm_table;
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_ppt_v1_pcie_table *pcie_table = table_info->pcie_table;
+	uint8_t pcie_entry_cnt = (uint8_t) hw_data->dpm_table.pcie_speed_table.count;
+	int result = 0;
+	uint32_t array = smu_data->smu7_data.dpm_table_start +
+			offsetof(SMU75_Discrete_DpmTable, GraphicsLevel);
+	uint32_t array_size = sizeof(struct SMU75_Discrete_GraphicsLevel) *
+			SMU75_MAX_LEVELS_GRAPHICS;
+	struct SMU75_Discrete_GraphicsLevel *levels =
+			smu_data->smc_state_table.GraphicsLevel;
+	uint32_t i, max_entry;
+	uint8_t hightest_pcie_level_enabled = 0,
+		lowest_pcie_level_enabled = 0,
+		mid_pcie_level_enabled = 0,
+		count = 0;
+
+	vegam_get_sclk_range_table(hwmgr, &(smu_data->smc_state_table));
+
+	for (i = 0; i < dpm_table->sclk_table.count; i++) {
+
+		result = vegam_populate_single_graphic_level(hwmgr,
+				dpm_table->sclk_table.dpm_levels[i].value,
+				&(smu_data->smc_state_table.GraphicsLevel[i]));
+		if (result)
+			return result;
+
+		levels[i].UpHyst = (uint8_t)
+				(SclkDPMTuning_VEGAM >> DPMTuning_Uphyst_Shift);
+		levels[i].DownHyst = (uint8_t)
+				(SclkDPMTuning_VEGAM >> DPMTuning_Downhyst_Shift);
+		/* Making sure only DPM level 0-1 have Deep Sleep Div ID populated. */
+		if (i > 1)
+			levels[i].DeepSleepDivId = 0;
+	}
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+					PHM_PlatformCaps_SPLLShutdownSupport))
+		smu_data->smc_state_table.GraphicsLevel[0].SclkSetting.SSc_En = 0;
+
+	smu_data->smc_state_table.GraphicsDpmLevelCount =
+			(uint8_t)dpm_table->sclk_table.count;
+	hw_data->dpm_level_enable_mask.sclk_dpm_enable_mask =
+			phm_get_dpm_level_enable_mask_value(&dpm_table->sclk_table);
+
+	for (i = 0; i < dpm_table->sclk_table.count; i++)
+		levels[i].EnabledForActivity =
+				(hw_data->dpm_level_enable_mask.sclk_dpm_enable_mask >> i) & 0x1;
+
+	if (pcie_table != NULL) {
+		PP_ASSERT_WITH_CODE((1 <= pcie_entry_cnt),
+				"There must be 1 or more PCIE levels defined in PPTable.",
+				return -EINVAL);
+		max_entry = pcie_entry_cnt - 1;
+		for (i = 0; i < dpm_table->sclk_table.count; i++)
+			levels[i].pcieDpmLevel =
+					(uint8_t) ((i < max_entry) ? i : max_entry);
+	} else {
+		while (hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask &&
+				((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask &
+						(1 << (hightest_pcie_level_enabled + 1))) != 0))
+			hightest_pcie_level_enabled++;
+
+		while (hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask &&
+				((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask &
+						(1 << lowest_pcie_level_enabled)) == 0))
+			lowest_pcie_level_enabled++;
+
+		while ((count < hightest_pcie_level_enabled) &&
+				((hw_data->dpm_level_enable_mask.pcie_dpm_enable_mask &
+						(1 << (lowest_pcie_level_enabled + 1 + count))) == 0))
+			count++;
+
+		mid_pcie_level_enabled = (lowest_pcie_level_enabled + 1 + count) <
+				hightest_pcie_level_enabled ?
+						(lowest_pcie_level_enabled + 1 + count) :
+						hightest_pcie_level_enabled;
+
+		/* set pcieDpmLevel to hightest_pcie_level_enabled */
+		for (i = 2; i < dpm_table->sclk_table.count; i++)
+			levels[i].pcieDpmLevel = hightest_pcie_level_enabled;
+
+		/* set pcieDpmLevel to lowest_pcie_level_enabled */
+		levels[0].pcieDpmLevel = lowest_pcie_level_enabled;
+
+		/* set pcieDpmLevel to mid_pcie_level_enabled */
+		levels[1].pcieDpmLevel = mid_pcie_level_enabled;
+	}
+	/* level count will send to smc once at init smc table and never change */
+	result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels,
+			(uint32_t)array_size, SMC_RAM_END);
+
+	return result;
+}
+
+static int vegam_calculate_mclk_params(struct pp_hwmgr *hwmgr,
+		uint32_t clock, struct SMU75_Discrete_MemoryLevel *mem_level)
+{
+	struct pp_atomctrl_memory_clock_param_ai mpll_param;
+
+	PP_ASSERT_WITH_CODE(!atomctrl_get_memory_pll_dividers_ai(hwmgr,
+			clock, &mpll_param),
+			"Failed to retrieve memory pll parameter.",
+			return -EINVAL);
+
+	mem_level->MclkFrequency = (uint32_t)mpll_param.ulClock;
+	mem_level->Fcw_int = (uint16_t)mpll_param.ulMclk_fcw_int;
+	mem_level->Fcw_frac = (uint16_t)mpll_param.ulMclk_fcw_frac;
+	mem_level->Postdiv = (uint8_t)mpll_param.ulPostDiv;
+
+	return 0;
+}
+
+static int vegam_populate_single_memory_level(struct pp_hwmgr *hwmgr,
+		uint32_t clock, struct SMU75_Discrete_MemoryLevel *mem_level)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	int result = 0;
+	uint32_t mclk_stutter_mode_threshold = 60000;
+
+
+	if (table_info->vdd_dep_on_mclk) {
+		result = vegam_get_dependency_volt_by_clk(hwmgr,
+				table_info->vdd_dep_on_mclk, clock,
+				&mem_level->MinVoltage, &mem_level->MinMvdd);
+		PP_ASSERT_WITH_CODE(!result,
+				"can not find MinVddc voltage value from memory "
+				"VDDC voltage dependency table", return result);
+	}
+
+	result = vegam_calculate_mclk_params(hwmgr, clock, mem_level);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to calculate mclk params.",
+			return -EINVAL);
+
+	mem_level->EnabledForThrottle = 1;
+	mem_level->EnabledForActivity = 0;
+	mem_level->VoltageDownHyst = 0;
+	mem_level->ActivityLevel = (uint16_t)
+			(MemoryDPMTuning_VEGAM >> DPMTuning_Activity_Shift);
+	mem_level->StutterEnable = false;
+	mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
+
+	data->display_timing.num_existing_displays = hwmgr->display_config->num_display;
+
+	if (mclk_stutter_mode_threshold &&
+		(clock <= mclk_stutter_mode_threshold) &&
+		(PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL,
+				STUTTER_ENABLE) & 0x1))
+		mem_level->StutterEnable = true;
+
+	if (!result) {
+		CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinMvdd);
+		CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MclkFrequency);
+		CONVERT_FROM_HOST_TO_SMC_US(mem_level->Fcw_int);
+		CONVERT_FROM_HOST_TO_SMC_US(mem_level->Fcw_frac);
+		CONVERT_FROM_HOST_TO_SMC_US(mem_level->ActivityLevel);
+		CONVERT_FROM_HOST_TO_SMC_UL(mem_level->MinVoltage);
+	}
+
+	return result;
+}
+
+static int vegam_populate_all_memory_levels(struct pp_hwmgr *hwmgr)
+{
+	struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	struct smu7_dpm_table *dpm_table = &hw_data->dpm_table;
+	int result;
+	/* populate MCLK dpm table to SMU7 */
+	uint32_t array = smu_data->smu7_data.dpm_table_start +
+			offsetof(SMU75_Discrete_DpmTable, MemoryLevel);
+	uint32_t array_size = sizeof(SMU75_Discrete_MemoryLevel) *
+			SMU75_MAX_LEVELS_MEMORY;
+	struct SMU75_Discrete_MemoryLevel *levels =
+			smu_data->smc_state_table.MemoryLevel;
+	uint32_t i;
+
+	for (i = 0; i < dpm_table->mclk_table.count; i++) {
+		PP_ASSERT_WITH_CODE((0 != dpm_table->mclk_table.dpm_levels[i].value),
+				"can not populate memory level as memory clock is zero",
+				return -EINVAL);
+		result = vegam_populate_single_memory_level(hwmgr,
+				dpm_table->mclk_table.dpm_levels[i].value,
+				&levels[i]);
+
+		if (result)
+			return result;
+
+		levels[i].UpHyst = (uint8_t)
+				(MemoryDPMTuning_VEGAM >> DPMTuning_Uphyst_Shift);
+		levels[i].DownHyst = (uint8_t)
+				(MemoryDPMTuning_VEGAM >> DPMTuning_Downhyst_Shift);
+	}
+
+	smu_data->smc_state_table.MemoryDpmLevelCount =
+			(uint8_t)dpm_table->mclk_table.count;
+	hw_data->dpm_level_enable_mask.mclk_dpm_enable_mask =
+			phm_get_dpm_level_enable_mask_value(&dpm_table->mclk_table);
+
+	for (i = 0; i < dpm_table->mclk_table.count; i++)
+		levels[i].EnabledForActivity =
+				(hw_data->dpm_level_enable_mask.mclk_dpm_enable_mask >> i) & 0x1;
+
+	levels[dpm_table->mclk_table.count - 1].DisplayWatermark =
+			PPSMC_DISPLAY_WATERMARK_HIGH;
+
+	/* level count will send to smc once at init smc table and never change */
+	result = smu7_copy_bytes_to_smc(hwmgr, array, (uint8_t *)levels,
+			(uint32_t)array_size, SMC_RAM_END);
+
+	return result;
+}
+
+static int vegam_populate_mvdd_value(struct pp_hwmgr *hwmgr,
+		uint32_t mclk, SMIO_Pattern *smio_pat)
+{
+	const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	uint32_t i = 0;
+
+	if (SMU7_VOLTAGE_CONTROL_NONE != data->mvdd_control) {
+		/* find mvdd value which clock is more than request */
+		for (i = 0; i < table_info->vdd_dep_on_mclk->count; i++) {
+			if (mclk <= table_info->vdd_dep_on_mclk->entries[i].clk) {
+				smio_pat->Voltage = data->mvdd_voltage_table.entries[i].value;
+				break;
+			}
+		}
+		PP_ASSERT_WITH_CODE(i < table_info->vdd_dep_on_mclk->count,
+				"MVDD Voltage is outside the supported range.",
+				return -EINVAL);
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+static int vegam_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
+		SMU75_Discrete_DpmTable *table)
+{
+	int result = 0;
+	uint32_t sclk_frequency;
+	const struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	SMIO_Pattern vol_level;
+	uint32_t mvdd;
+	uint16_t us_mvdd;
+
+	table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC;
+
+	/* Get MinVoltage and Frequency from DPM0,
+	 * already converted to SMC_UL */
+	sclk_frequency = data->vbios_boot_state.sclk_bootup_value;
+	result = vegam_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_sclk,
+			sclk_frequency,
+			&table->ACPILevel.MinVoltage, &mvdd);
+	PP_ASSERT_WITH_CODE(!result,
+			"Cannot find ACPI VDDC voltage value "
+			"in Clock Dependency Table",
+			);
+
+	result = vegam_calculate_sclk_params(hwmgr, sclk_frequency,
+			&(table->ACPILevel.SclkSetting));
+	PP_ASSERT_WITH_CODE(!result,
+			"Error retrieving Engine Clock dividers from VBIOS.",
+			return result);
+
+	table->ACPILevel.DeepSleepDivId = 0;
+	table->ACPILevel.CcPwrDynRm = 0;
+	table->ACPILevel.CcPwrDynRm1 = 0;
+
+	CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.Flags);
+	CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.MinVoltage);
+	CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm);
+	CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.CcPwrDynRm1);
+
+	CONVERT_FROM_HOST_TO_SMC_UL(table->ACPILevel.SclkSetting.SclkFrequency);
+	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw_int);
+	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw_frac);
+	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_fcw_int);
+	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_slew_rate);
+	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_up_slew_rate);
+	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Pcc_down_slew_rate);
+	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_int);
+	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac);
+	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate);
+
+
+	/* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */
+	table->MemoryACPILevel.MclkFrequency = data->vbios_boot_state.mclk_bootup_value;
+	result = vegam_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_mclk,
+			table->MemoryACPILevel.MclkFrequency,
+			&table->MemoryACPILevel.MinVoltage, &mvdd);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"Cannot find ACPI VDDCI voltage value "
+			"in Clock Dependency Table",
+			);
+
+	us_mvdd = 0;
+	if ((SMU7_VOLTAGE_CONTROL_NONE == data->mvdd_control) ||
+			(data->mclk_dpm_key_disabled))
+		us_mvdd = data->vbios_boot_state.mvdd_bootup_value;
+	else {
+		if (!vegam_populate_mvdd_value(hwmgr,
+				data->dpm_table.mclk_table.dpm_levels[0].value,
+				&vol_level))
+			us_mvdd = vol_level.Voltage;
+	}
+
+	if (!vegam_populate_mvdd_value(hwmgr, 0, &vol_level))
+		table->MemoryACPILevel.MinMvdd = PP_HOST_TO_SMC_UL(vol_level.Voltage);
+	else
+		table->MemoryACPILevel.MinMvdd = 0;
+
+	table->MemoryACPILevel.StutterEnable = false;
+
+	table->MemoryACPILevel.EnabledForThrottle = 0;
+	table->MemoryACPILevel.EnabledForActivity = 0;
+	table->MemoryACPILevel.UpHyst = 0;
+	table->MemoryACPILevel.DownHyst = 100;
+	table->MemoryACPILevel.VoltageDownHyst = 0;
+	table->MemoryACPILevel.ActivityLevel =
+		PP_HOST_TO_SMC_US(data->current_profile_setting.mclk_activity);
+
+	CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MclkFrequency);
+	CONVERT_FROM_HOST_TO_SMC_UL(table->MemoryACPILevel.MinVoltage);
+
+	return result;
+}
+
+static int vegam_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
+		SMU75_Discrete_DpmTable *table)
+{
+	int result = -EINVAL;
+	uint8_t count;
+	struct pp_atomctrl_clock_dividers_vi dividers;
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
+			table_info->mm_dep_table;
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
+
+	table->VceLevelCount = (uint8_t)(mm_table->count);
+	table->VceBootLevel = 0;
+
+	for (count = 0; count < table->VceLevelCount; count++) {
+		table->VceLevel[count].Frequency = mm_table->entries[count].eclk;
+		table->VceLevel[count].MinVoltage = 0;
+		table->VceLevel[count].MinVoltage |=
+				(mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
+
+		if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+
+		table->VceLevel[count].MinVoltage |=
+				(vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
+		table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
+
+		/*retrieve divider value for VBIOS */
+		result = atomctrl_get_dfs_pll_dividers_vi(hwmgr,
+				table->VceLevel[count].Frequency, &dividers);
+		PP_ASSERT_WITH_CODE((0 == result),
+				"can not find divide id for VCE engine clock",
+				return result);
+
+		table->VceLevel[count].Divider = (uint8_t)dividers.pll_post_divider;
+
+		CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].Frequency);
+		CONVERT_FROM_HOST_TO_SMC_UL(table->VceLevel[count].MinVoltage);
+	}
+	return result;
+}
+
+static int vegam_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
+		SMU75_Discrete_DpmTable *table)
+{
+	int result = -EINVAL;
+	uint8_t count;
+	struct pp_atomctrl_clock_dividers_vi dividers;
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
+			table_info->mm_dep_table;
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
+
+	table->SamuBootLevel = 0;
+	table->SamuLevelCount = (uint8_t)(mm_table->count);
+
+	for (count = 0; count < table->SamuLevelCount; count++) {
+		/* not sure whether we need evclk or not */
+		table->SamuLevel[count].MinVoltage = 0;
+		table->SamuLevel[count].Frequency = mm_table->entries[count].samclock;
+		table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
+				VOLTAGE_SCALE) << VDDC_SHIFT;
+
+		if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
+		table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
+
+		/* retrieve divider value for VBIOS */
+		result = atomctrl_get_dfs_pll_dividers_vi(hwmgr,
+				table->SamuLevel[count].Frequency, &dividers);
+		PP_ASSERT_WITH_CODE((0 == result),
+				"can not find divide id for samu clock", return result);
+
+		table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider;
+
+		CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency);
+		CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].MinVoltage);
+	}
+	return result;
+}
+
+static int vegam_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr,
+		int32_t eng_clock, int32_t mem_clock,
+		SMU75_Discrete_MCArbDramTimingTableEntry *arb_regs)
+{
+	uint32_t dram_timing;
+	uint32_t dram_timing2;
+	uint32_t burst_time;
+	uint32_t rfsh_rate;
+	uint32_t misc3;
+
+	int result;
+
+	result = atomctrl_set_engine_dram_timings_rv770(hwmgr,
+			eng_clock, mem_clock);
+	PP_ASSERT_WITH_CODE(result == 0,
+			"Error calling VBIOS to set DRAM_TIMING.",
+			return result);
+
+	dram_timing = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING);
+	dram_timing2 = cgs_read_register(hwmgr->device, mmMC_ARB_DRAM_TIMING2);
+	burst_time = cgs_read_register(hwmgr->device, mmMC_ARB_BURST_TIME);
+	rfsh_rate = cgs_read_register(hwmgr->device, mmMC_ARB_RFSH_RATE);
+	misc3 = cgs_read_register(hwmgr->device, mmMC_ARB_MISC3);
+
+	arb_regs->McArbDramTiming  = PP_HOST_TO_SMC_UL(dram_timing);
+	arb_regs->McArbDramTiming2 = PP_HOST_TO_SMC_UL(dram_timing2);
+	arb_regs->McArbBurstTime   = PP_HOST_TO_SMC_UL(burst_time);
+	arb_regs->McArbRfshRate = PP_HOST_TO_SMC_UL(rfsh_rate);
+	arb_regs->McArbMisc3 = PP_HOST_TO_SMC_UL(misc3);
+
+	return 0;
+}
+
+static int vegam_program_memory_timing_parameters(struct pp_hwmgr *hwmgr)
+{
+	struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	struct SMU75_Discrete_MCArbDramTimingTable arb_regs;
+	uint32_t i, j;
+	int result = 0;
+
+	memset(&arb_regs, 0, sizeof(SMU75_Discrete_MCArbDramTimingTable));
+
+	for (i = 0; i < hw_data->dpm_table.sclk_table.count; i++) {
+		for (j = 0; j < hw_data->dpm_table.mclk_table.count; j++) {
+			result = vegam_populate_memory_timing_parameters(hwmgr,
+					hw_data->dpm_table.sclk_table.dpm_levels[i].value,
+					hw_data->dpm_table.mclk_table.dpm_levels[j].value,
+					&arb_regs.entries[i][j]);
+			if (result)
+				return result;
+		}
+	}
+
+	result = smu7_copy_bytes_to_smc(
+			hwmgr,
+			smu_data->smu7_data.arb_table_start,
+			(uint8_t *)&arb_regs,
+			sizeof(SMU75_Discrete_MCArbDramTimingTable),
+			SMC_RAM_END);
+	return result;
+}
+
+static int vegam_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
+		struct SMU75_Discrete_DpmTable *table)
+{
+	int result = -EINVAL;
+	uint8_t count;
+	struct pp_atomctrl_clock_dividers_vi dividers;
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
+			table_info->mm_dep_table;
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
+
+	table->UvdLevelCount = (uint8_t)(mm_table->count);
+	table->UvdBootLevel = 0;
+
+	for (count = 0; count < table->UvdLevelCount; count++) {
+		table->UvdLevel[count].MinVoltage = 0;
+		table->UvdLevel[count].VclkFrequency = mm_table->entries[count].vclk;
+		table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk;
+		table->UvdLevel[count].MinVoltage |=
+				(mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
+
+		if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
+		table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
+
+		/* retrieve divider value for VBIOS */
+		result = atomctrl_get_dfs_pll_dividers_vi(hwmgr,
+				table->UvdLevel[count].VclkFrequency, &dividers);
+		PP_ASSERT_WITH_CODE((0 == result),
+				"can not find divide id for Vclk clock", return result);
+
+		table->UvdLevel[count].VclkDivider = (uint8_t)dividers.pll_post_divider;
+
+		result = atomctrl_get_dfs_pll_dividers_vi(hwmgr,
+				table->UvdLevel[count].DclkFrequency, &dividers);
+		PP_ASSERT_WITH_CODE((0 == result),
+				"can not find divide id for Dclk clock", return result);
+
+		table->UvdLevel[count].DclkDivider = (uint8_t)dividers.pll_post_divider;
+
+		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency);
+		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency);
+		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage);
+	}
+
+	return result;
+}
+
+static int vegam_populate_smc_boot_level(struct pp_hwmgr *hwmgr,
+		struct SMU75_Discrete_DpmTable *table)
+{
+	int result = 0;
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+
+	table->GraphicsBootLevel = 0;
+	table->MemoryBootLevel = 0;
+
+	/* find boot level from dpm table */
+	result = phm_find_boot_level(&(data->dpm_table.sclk_table),
+			data->vbios_boot_state.sclk_bootup_value,
+			(uint32_t *)&(table->GraphicsBootLevel));
+
+	result = phm_find_boot_level(&(data->dpm_table.mclk_table),
+			data->vbios_boot_state.mclk_bootup_value,
+			(uint32_t *)&(table->MemoryBootLevel));
+
+	table->BootVddc  = data->vbios_boot_state.vddc_bootup_value *
+			VOLTAGE_SCALE;
+	table->BootVddci = data->vbios_boot_state.vddci_bootup_value *
+			VOLTAGE_SCALE;
+	table->BootMVdd  = data->vbios_boot_state.mvdd_bootup_value *
+			VOLTAGE_SCALE;
+
+	CONVERT_FROM_HOST_TO_SMC_US(table->BootVddc);
+	CONVERT_FROM_HOST_TO_SMC_US(table->BootVddci);
+	CONVERT_FROM_HOST_TO_SMC_US(table->BootMVdd);
+
+	return 0;
+}
+
+static int vegam_populate_smc_initial_state(struct pp_hwmgr *hwmgr)
+{
+	struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	uint8_t count, level;
+
+	count = (uint8_t)(table_info->vdd_dep_on_sclk->count);
+
+	for (level = 0; level < count; level++) {
+		if (table_info->vdd_dep_on_sclk->entries[level].clk >=
+				hw_data->vbios_boot_state.sclk_bootup_value) {
+			smu_data->smc_state_table.GraphicsBootLevel = level;
+			break;
+		}
+	}
+
+	count = (uint8_t)(table_info->vdd_dep_on_mclk->count);
+	for (level = 0; level < count; level++) {
+		if (table_info->vdd_dep_on_mclk->entries[level].clk >=
+				hw_data->vbios_boot_state.mclk_bootup_value) {
+			smu_data->smc_state_table.MemoryBootLevel = level;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static uint16_t scale_fan_gain_settings(uint16_t raw_setting)
+{
+	uint32_t tmp;
+	tmp = raw_setting * 4096 / 100;
+	return (uint16_t)tmp;
+}
+
+static int vegam_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+	const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults;
+	SMU75_Discrete_DpmTable  *table = &(smu_data->smc_state_table);
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_cac_tdp_table *cac_dtp_table = table_info->cac_dtp_table;
+	struct pp_advance_fan_control_parameters *fan_table =
+			&hwmgr->thermal_controller.advanceFanControlParameters;
+	int i, j, k;
+	const uint16_t *pdef1;
+	const uint16_t *pdef2;
+
+	table->DefaultTdp = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 128));
+	table->TargetTdp  = PP_HOST_TO_SMC_US((uint16_t)(cac_dtp_table->usTDP * 128));
+
+	PP_ASSERT_WITH_CODE(cac_dtp_table->usTargetOperatingTemp <= 255,
+				"Target Operating Temp is out of Range!",
+				);
+
+	table->TemperatureLimitEdge = PP_HOST_TO_SMC_US(
+			cac_dtp_table->usTargetOperatingTemp * 256);
+	table->TemperatureLimitHotspot = PP_HOST_TO_SMC_US(
+			cac_dtp_table->usTemperatureLimitHotspot * 256);
+	table->FanGainEdge = PP_HOST_TO_SMC_US(
+			scale_fan_gain_settings(fan_table->usFanGainEdge));
+	table->FanGainHotspot = PP_HOST_TO_SMC_US(
+			scale_fan_gain_settings(fan_table->usFanGainHotspot));
+
+	pdef1 = defaults->BAPMTI_R;
+	pdef2 = defaults->BAPMTI_RC;
+
+	for (i = 0; i < SMU75_DTE_ITERATIONS; i++) {
+		for (j = 0; j < SMU75_DTE_SOURCES; j++) {
+			for (k = 0; k < SMU75_DTE_SINKS; k++) {
+				table->BAPMTI_R[i][j][k] = PP_HOST_TO_SMC_US(*pdef1);
+				table->BAPMTI_RC[i][j][k] = PP_HOST_TO_SMC_US(*pdef2);
+				pdef1++;
+				pdef2++;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int vegam_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
+{
+	uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min;
+	struct vegam_smumgr *smu_data =
+			(struct vegam_smumgr *)(hwmgr->smu_backend);
+
+	uint8_t i, stretch_amount, stretch_amount2, volt_offset = 0;
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
+			table_info->vdd_dep_on_sclk;
+	uint32_t mask = (1 << ((STRAP_ASIC_RO_MSB - STRAP_ASIC_RO_LSB) + 1)) - 1;
+
+	stretch_amount = (uint8_t)table_info->cac_dtp_table->usClockStretchAmount;
+
+	atomctrl_read_efuse(hwmgr, STRAP_ASIC_RO_LSB, STRAP_ASIC_RO_MSB,
+			mask, &efuse);
+
+	min = 1200;
+	max = 2500;
+
+	ro = efuse * (max - min) / 255 + min;
+
+	/* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */
+	for (i = 0; i < sclk_table->count; i++) {
+		smu_data->smc_state_table.Sclk_CKS_masterEn0_7 |=
+				sclk_table->entries[i].cks_enable << i;
+		volt_without_cks = (uint32_t)((2753594000U + (sclk_table->entries[i].clk/100) *
+				136418 - (ro - 70) * 1000000) /
+				(2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000));
+		volt_with_cks = (uint32_t)((2797202000U + sclk_table->entries[i].clk/100 *
+				3232 - (ro - 65) * 1000000) /
+				(2522480 - sclk_table->entries[i].clk/100 * 115764/100));
+
+		if (volt_without_cks >= volt_with_cks)
+			volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks +
+					sclk_table->entries[i].cks_voffset) * 100 + 624) / 625);
+
+		smu_data->smc_state_table.Sclk_voltageOffset[i] = volt_offset;
+	}
+
+	smu_data->smc_state_table.LdoRefSel =
+			(table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ?
+			table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 5;
+	/* Populate CKS Lookup Table */
+	if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5)
+		stretch_amount2 = 0;
+	else if (stretch_amount == 3 || stretch_amount == 4)
+		stretch_amount2 = 1;
+	else {
+		phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+				PHM_PlatformCaps_ClockStretcher);
+		PP_ASSERT_WITH_CODE(false,
+				"Stretch Amount in PPTable not supported\n",
+				return -EINVAL);
+	}
+
+	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL);
+	value &= 0xFFFFFFFE;
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value);
+
+	return 0;
+}
+
+static bool vegam_is_hw_avfs_present(struct pp_hwmgr *hwmgr)
+{
+	uint32_t efuse;
+
+	efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixSMU_EFUSE_0 + (49 * 4));
+	efuse &= 0x00000001;
+
+	if (efuse)
+		return true;
+
+	return false;
+}
+
+static int vegam_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+	SMU75_Discrete_DpmTable  *table = &(smu_data->smc_state_table);
+	int result = 0;
+	struct pp_atom_ctrl__avfs_parameters avfs_params = {0};
+	AVFS_meanNsigma_t AVFS_meanNsigma = { {0} };
+	AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} };
+	uint32_t tmp, i;
+
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)hwmgr->pptable;
+	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
+			table_info->vdd_dep_on_sclk;
+
+	if (!hwmgr->avfs_supported)
+		return 0;
+
+	result = atomctrl_get_avfs_information(hwmgr, &avfs_params);
+
+	if (0 == result) {
+		table->BTCGB_VDROOP_TABLE[0].a0 =
+				PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0);
+		table->BTCGB_VDROOP_TABLE[0].a1 =
+				PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1);
+		table->BTCGB_VDROOP_TABLE[0].a2 =
+				PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2);
+		table->BTCGB_VDROOP_TABLE[1].a0 =
+				PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0);
+		table->BTCGB_VDROOP_TABLE[1].a1 =
+				PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1);
+		table->BTCGB_VDROOP_TABLE[1].a2 =
+				PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2);
+		table->AVFSGB_FUSE_TABLE[0].m1 =
+				PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1);
+		table->AVFSGB_FUSE_TABLE[0].m2 =
+				PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2);
+		table->AVFSGB_FUSE_TABLE[0].b =
+				PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b);
+		table->AVFSGB_FUSE_TABLE[0].m1_shift = 24;
+		table->AVFSGB_FUSE_TABLE[0].m2_shift = 12;
+		table->AVFSGB_FUSE_TABLE[1].m1 =
+				PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1);
+		table->AVFSGB_FUSE_TABLE[1].m2 =
+				PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2);
+		table->AVFSGB_FUSE_TABLE[1].b =
+				PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b);
+		table->AVFSGB_FUSE_TABLE[1].m1_shift = 24;
+		table->AVFSGB_FUSE_TABLE[1].m2_shift = 12;
+		table->MaxVoltage = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv);
+		AVFS_meanNsigma.Aconstant[0] =
+				PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0);
+		AVFS_meanNsigma.Aconstant[1] =
+				PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1);
+		AVFS_meanNsigma.Aconstant[2] =
+				PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2);
+		AVFS_meanNsigma.DC_tol_sigma =
+				PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma);
+		AVFS_meanNsigma.Platform_mean =
+				PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean);
+		AVFS_meanNsigma.PSM_Age_CompFactor =
+				PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor);
+		AVFS_meanNsigma.Platform_sigma =
+				PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma);
+
+		for (i = 0; i < sclk_table->count; i++) {
+			AVFS_meanNsigma.Static_Voltage_Offset[i] =
+					(uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625);
+			AVFS_SclkOffset.Sclk_Offset[i] =
+					PP_HOST_TO_SMC_US((uint16_t)
+							(sclk_table->entries[i].sclk_offset) / 100);
+		}
+
+		result = smu7_read_smc_sram_dword(hwmgr,
+				SMU7_FIRMWARE_HEADER_LOCATION +
+				offsetof(SMU75_Firmware_Header, AvfsMeanNSigma),
+				&tmp, SMC_RAM_END);
+		smu7_copy_bytes_to_smc(hwmgr,
+					tmp,
+					(uint8_t *)&AVFS_meanNsigma,
+					sizeof(AVFS_meanNsigma_t),
+					SMC_RAM_END);
+
+		result = smu7_read_smc_sram_dword(hwmgr,
+				SMU7_FIRMWARE_HEADER_LOCATION +
+				offsetof(SMU75_Firmware_Header, AvfsSclkOffsetTable),
+				&tmp, SMC_RAM_END);
+		smu7_copy_bytes_to_smc(hwmgr,
+					tmp,
+					(uint8_t *)&AVFS_SclkOffset,
+					sizeof(AVFS_Sclk_Offset_t),
+					SMC_RAM_END);
+
+		data->avfs_vdroop_override_setting =
+				(avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) |
+				(avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) |
+				(avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) |
+				(avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT);
+		data->apply_avfs_cks_off_voltage =
+				(avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false;
+	}
+	return result;
+}
+
+static int vegam_populate_vr_config(struct pp_hwmgr *hwmgr,
+		struct SMU75_Discrete_DpmTable *table)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct vegam_smumgr *smu_data =
+			(struct vegam_smumgr *)(hwmgr->smu_backend);
+	uint16_t config;
+
+	config = VR_MERGED_WITH_VDDC;
+	table->VRConfig |= (config << VRCONF_VDDGFX_SHIFT);
+
+	/* Set Vddc Voltage Controller */
+	if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->voltage_control) {
+		config = VR_SVI2_PLANE_1;
+		table->VRConfig |= config;
+	} else {
+		PP_ASSERT_WITH_CODE(false,
+				"VDDC should be on SVI2 control in merged mode!",
+				);
+	}
+	/* Set Vddci Voltage Controller */
+	if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) {
+		config = VR_SVI2_PLANE_2;  /* only in merged mode */
+		table->VRConfig |= (config << VRCONF_VDDCI_SHIFT);
+	} else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) {
+		config = VR_SMIO_PATTERN_1;
+		table->VRConfig |= (config << VRCONF_VDDCI_SHIFT);
+	} else {
+		config = VR_STATIC_VOLTAGE;
+		table->VRConfig |= (config << VRCONF_VDDCI_SHIFT);
+	}
+	/* Set Mvdd Voltage Controller */
+	if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) {
+		if (config != VR_SVI2_PLANE_2) {
+			config = VR_SVI2_PLANE_2;
+			table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
+			cgs_write_ind_register(hwmgr->device,
+					CGS_IND_REG__SMC,
+					smu_data->smu7_data.soft_regs_start +
+					offsetof(SMU75_SoftRegisters, AllowMvddSwitch),
+					0x1);
+		} else {
+			PP_ASSERT_WITH_CODE(false,
+					"SVI2 Plane 2 is already taken, set MVDD as Static",);
+			config = VR_STATIC_VOLTAGE;
+			table->VRConfig = (config << VRCONF_MVDD_SHIFT);
+		}
+	} else if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) {
+		config = VR_SMIO_PATTERN_2;
+		table->VRConfig = (config << VRCONF_MVDD_SHIFT);
+		cgs_write_ind_register(hwmgr->device,
+				CGS_IND_REG__SMC,
+				smu_data->smu7_data.soft_regs_start +
+				offsetof(SMU75_SoftRegisters, AllowMvddSwitch),
+				0x1);
+	} else {
+		config = VR_STATIC_VOLTAGE;
+		table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
+	}
+
+	return 0;
+}
+
+static int vegam_populate_svi_load_line(struct pp_hwmgr *hwmgr)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults;
+
+	smu_data->power_tune_table.SviLoadLineEn = defaults->SviLoadLineEn;
+	smu_data->power_tune_table.SviLoadLineVddC = defaults->SviLoadLineVddC;
+	smu_data->power_tune_table.SviLoadLineTrimVddC = 3;
+	smu_data->power_tune_table.SviLoadLineOffsetVddC = 0;
+
+	return 0;
+}
+
+static int vegam_populate_tdc_limit(struct pp_hwmgr *hwmgr)
+{
+	uint16_t tdc_limit;
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults;
+
+	tdc_limit = (uint16_t)(table_info->cac_dtp_table->usTDC * 128);
+	smu_data->power_tune_table.TDC_VDDC_PkgLimit =
+			CONVERT_FROM_HOST_TO_SMC_US(tdc_limit);
+	smu_data->power_tune_table.TDC_VDDC_ThrottleReleaseLimitPerc =
+			defaults->TDC_VDDC_ThrottleReleaseLimitPerc;
+	smu_data->power_tune_table.TDC_MAWt = defaults->TDC_MAWt;
+
+	return 0;
+}
+
+static int vegam_populate_dw8(struct pp_hwmgr *hwmgr, uint32_t fuse_table_offset)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	const struct vegam_pt_defaults *defaults = smu_data->power_tune_defaults;
+	uint32_t temp;
+
+	if (smu7_read_smc_sram_dword(hwmgr,
+			fuse_table_offset +
+			offsetof(SMU75_Discrete_PmFuses, TdcWaterfallCtl),
+			(uint32_t *)&temp, SMC_RAM_END))
+		PP_ASSERT_WITH_CODE(false,
+				"Attempt to read PmFuses.DW6 (SviLoadLineEn) from SMC Failed!",
+				return -EINVAL);
+	else {
+		smu_data->power_tune_table.TdcWaterfallCtl = defaults->TdcWaterfallCtl;
+		smu_data->power_tune_table.LPMLTemperatureMin =
+				(uint8_t)((temp >> 16) & 0xff);
+		smu_data->power_tune_table.LPMLTemperatureMax =
+				(uint8_t)((temp >> 8) & 0xff);
+		smu_data->power_tune_table.Reserved = (uint8_t)(temp & 0xff);
+	}
+	return 0;
+}
+
+static int vegam_populate_temperature_scaler(struct pp_hwmgr *hwmgr)
+{
+	int i;
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+	/* Currently not used. Set all to zero. */
+	for (i = 0; i < 16; i++)
+		smu_data->power_tune_table.LPMLTemperatureScaler[i] = 0;
+
+	return 0;
+}
+
+static int vegam_populate_fuzzy_fan(struct pp_hwmgr *hwmgr)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+/* TO DO move to hwmgr */
+	if ((hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity & (1 << 15))
+		|| 0 == hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity)
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity =
+			hwmgr->thermal_controller.advanceFanControlParameters.usDefaultFanOutputSensitivity;
+
+	smu_data->power_tune_table.FuzzyFan_PwmSetDelta = PP_HOST_TO_SMC_US(
+				hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity);
+	return 0;
+}
+
+static int vegam_populate_gnb_lpml(struct pp_hwmgr *hwmgr)
+{
+	int i;
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+	/* Currently not used. Set all to zero. */
+	for (i = 0; i < 16; i++)
+		smu_data->power_tune_table.GnbLPML[i] = 0;
+
+	return 0;
+}
+
+static int vegam_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	uint16_t hi_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd;
+	uint16_t lo_sidd = smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd;
+	struct phm_cac_tdp_table *cac_table = table_info->cac_dtp_table;
+
+	hi_sidd = (uint16_t)(cac_table->usHighCACLeakage / 100 * 256);
+	lo_sidd = (uint16_t)(cac_table->usLowCACLeakage / 100 * 256);
+
+	smu_data->power_tune_table.BapmVddCBaseLeakageHiSidd =
+			CONVERT_FROM_HOST_TO_SMC_US(hi_sidd);
+	smu_data->power_tune_table.BapmVddCBaseLeakageLoSidd =
+			CONVERT_FROM_HOST_TO_SMC_US(lo_sidd);
+
+	return 0;
+}
+
+static int vegam_populate_pm_fuses(struct pp_hwmgr *hwmgr)
+{
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+	uint32_t pm_fuse_table_offset;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_PowerContainment)) {
+		if (smu7_read_smc_sram_dword(hwmgr,
+				SMU7_FIRMWARE_HEADER_LOCATION +
+				offsetof(SMU75_Firmware_Header, PmFuseTable),
+				&pm_fuse_table_offset, SMC_RAM_END))
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to get pm_fuse_table_offset Failed!",
+					return -EINVAL);
+
+		if (vegam_populate_svi_load_line(hwmgr))
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to populate SviLoadLine Failed!",
+					return -EINVAL);
+
+		if (vegam_populate_tdc_limit(hwmgr))
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to populate TDCLimit Failed!", return -EINVAL);
+
+		if (vegam_populate_dw8(hwmgr, pm_fuse_table_offset))
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to populate TdcWaterfallCtl, "
+					"LPMLTemperature Min and Max Failed!",
+					return -EINVAL);
+
+		if (0 != vegam_populate_temperature_scaler(hwmgr))
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to populate LPMLTemperatureScaler Failed!",
+					return -EINVAL);
+
+		if (vegam_populate_fuzzy_fan(hwmgr))
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to populate Fuzzy Fan Control parameters Failed!",
+					return -EINVAL);
+
+		if (vegam_populate_gnb_lpml(hwmgr))
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to populate GnbLPML Failed!",
+					return -EINVAL);
+
+		if (vegam_populate_bapm_vddc_base_leakage_sidd(hwmgr))
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to populate BapmVddCBaseLeakage Hi and Lo "
+					"Sidd Failed!", return -EINVAL);
+
+		if (smu7_copy_bytes_to_smc(hwmgr, pm_fuse_table_offset,
+				(uint8_t *)&smu_data->power_tune_table,
+				(sizeof(struct SMU75_Discrete_PmFuses) - PMFUSES_AVFSSIZE),
+				SMC_RAM_END))
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to download PmFuseTable Failed!",
+					return -EINVAL);
+	}
+	return 0;
+}
+
+static int vegam_enable_reconfig_cus(struct pp_hwmgr *hwmgr)
+{
+	struct amdgpu_device *adev = hwmgr->adev;
+
+	smum_send_msg_to_smc_with_parameter(hwmgr,
+					    PPSMC_MSG_EnableModeSwitchRLCNotification,
+					    adev->gfx.cu_info.number);
+
+	return 0;
+}
+
+static int vegam_init_smc_table(struct pp_hwmgr *hwmgr)
+{
+	int result;
+	struct smu7_hwmgr *hw_data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend);
+
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct SMU75_Discrete_DpmTable *table = &(smu_data->smc_state_table);
+	uint8_t i;
+	struct pp_atomctrl_gpio_pin_assignment gpio_pin;
+	struct phm_ppt_v1_gpio_table *gpio_table =
+			(struct phm_ppt_v1_gpio_table *)table_info->gpio_table;
+	pp_atomctrl_clock_dividers_vi dividers;
+
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_AutomaticDCTransition);
+
+	vegam_initialize_power_tune_defaults(hwmgr);
+
+	if (SMU7_VOLTAGE_CONTROL_NONE != hw_data->voltage_control)
+		vegam_populate_smc_voltage_tables(hwmgr, table);
+
+	table->SystemFlags = 0;
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_AutomaticDCTransition))
+		table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_StepVddc))
+		table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC;
+
+	if (hw_data->is_memory_gddr5)
+		table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5;
+
+	if (hw_data->ulv_supported && table_info->us_ulv_voltage_offset) {
+		result = vegam_populate_ulv_state(hwmgr, table);
+		PP_ASSERT_WITH_CODE(!result,
+				"Failed to initialize ULV state!", return result);
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+				ixCG_ULV_PARAMETER, SMU7_CGULVPARAMETER_DFLT);
+	}
+
+	result = vegam_populate_smc_link_level(hwmgr, table);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to initialize Link Level!", return result);
+
+	result = vegam_populate_all_graphic_levels(hwmgr);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to initialize Graphics Level!", return result);
+
+	result = vegam_populate_all_memory_levels(hwmgr);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to initialize Memory Level!", return result);
+
+	result = vegam_populate_smc_acpi_level(hwmgr, table);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to initialize ACPI Level!", return result);
+
+	result = vegam_populate_smc_vce_level(hwmgr, table);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to initialize VCE Level!", return result);
+
+	result = vegam_populate_smc_samu_level(hwmgr, table);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to initialize SAMU Level!", return result);
+
+	/* Since only the initial state is completely set up at this point
+	 * (the other states are just copies of the boot state) we only
+	 * need to populate the  ARB settings for the initial state.
+	 */
+	result = vegam_program_memory_timing_parameters(hwmgr);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to Write ARB settings for the initial state.", return result);
+
+	result = vegam_populate_smc_uvd_level(hwmgr, table);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to initialize UVD Level!", return result);
+
+	result = vegam_populate_smc_boot_level(hwmgr, table);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to initialize Boot Level!", return result);
+
+	result = vegam_populate_smc_initial_state(hwmgr);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to initialize Boot State!", return result);
+
+	result = vegam_populate_bapm_parameters_in_dpm_table(hwmgr);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to populate BAPM Parameters!", return result);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_ClockStretcher)) {
+		result = vegam_populate_clock_stretcher_data_table(hwmgr);
+		PP_ASSERT_WITH_CODE(!result,
+				"Failed to populate Clock Stretcher Data Table!",
+				return result);
+	}
+
+	result = vegam_populate_avfs_parameters(hwmgr);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to populate AVFS Parameters!", return result;);
+
+	table->CurrSclkPllRange = 0xff;
+	table->GraphicsVoltageChangeEnable  = 1;
+	table->GraphicsThermThrottleEnable  = 1;
+	table->GraphicsInterval = 1;
+	table->VoltageInterval  = 1;
+	table->ThermalInterval  = 1;
+	table->TemperatureLimitHigh =
+			table_info->cac_dtp_table->usTargetOperatingTemp *
+			SMU7_Q88_FORMAT_CONVERSION_UNIT;
+	table->TemperatureLimitLow  =
+			(table_info->cac_dtp_table->usTargetOperatingTemp - 1) *
+			SMU7_Q88_FORMAT_CONVERSION_UNIT;
+	table->MemoryVoltageChangeEnable = 1;
+	table->MemoryInterval = 1;
+	table->VoltageResponseTime = 0;
+	table->PhaseResponseTime = 0;
+	table->MemoryThermThrottleEnable = 1;
+
+	PP_ASSERT_WITH_CODE(hw_data->dpm_table.pcie_speed_table.count >= 1,
+			"There must be 1 or more PCIE levels defined in PPTable.",
+			return -EINVAL);
+	table->PCIeBootLinkLevel =
+			hw_data->dpm_table.pcie_speed_table.count;
+	table->PCIeGenInterval = 1;
+	table->VRConfig = 0;
+
+	result = vegam_populate_vr_config(hwmgr, table);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to populate VRConfig setting!", return result);
+
+	table->ThermGpio = 17;
+	table->SclkStepSize = 0x4000;
+
+	if (atomctrl_get_pp_assign_pin(hwmgr,
+			VDDC_VRHOT_GPIO_PINID, &gpio_pin)) {
+		table->VRHotGpio = gpio_pin.uc_gpio_pin_bit_shift;
+		if (gpio_table)
+			table->VRHotLevel =
+					table_info->gpio_table->vrhot_triggered_sclk_dpm_index;
+	} else {
+		table->VRHotGpio = SMU7_UNUSED_GPIO_PIN;
+		phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+				PHM_PlatformCaps_RegulatorHot);
+	}
+
+	if (atomctrl_get_pp_assign_pin(hwmgr,
+			PP_AC_DC_SWITCH_GPIO_PINID,	&gpio_pin)) {
+		table->AcDcGpio = gpio_pin.uc_gpio_pin_bit_shift;
+		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+				PHM_PlatformCaps_AutomaticDCTransition) &&
+				!smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UseNewGPIOScheme))
+			phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+					PHM_PlatformCaps_SMCtoPPLIBAcdcGpioScheme);
+	} else {
+		table->AcDcGpio = SMU7_UNUSED_GPIO_PIN;
+		phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+				PHM_PlatformCaps_AutomaticDCTransition);
+	}
+
+	/* Thermal Output GPIO */
+	if (atomctrl_get_pp_assign_pin(hwmgr,
+			THERMAL_INT_OUTPUT_GPIO_PINID, &gpio_pin)) {
+		table->ThermOutGpio = gpio_pin.uc_gpio_pin_bit_shift;
+
+		/* For porlarity read GPIOPAD_A with assigned Gpio pin
+		 * since VBIOS will program this register to set 'inactive state',
+		 * driver can then determine 'active state' from this and
+		 * program SMU with correct polarity
+		 */
+		table->ThermOutPolarity =
+				(0 == (cgs_read_register(hwmgr->device, mmGPIOPAD_A) &
+				(1 << gpio_pin.uc_gpio_pin_bit_shift))) ? 1:0;
+		table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_ONLY;
+
+		/* if required, combine VRHot/PCC with thermal out GPIO */
+		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+				PHM_PlatformCaps_RegulatorHot) &&
+			phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+				PHM_PlatformCaps_CombinePCCWithThermalSignal))
+			table->ThermOutMode = SMU7_THERM_OUT_MODE_THERM_VRHOT;
+	} else {
+		table->ThermOutGpio = 17;
+		table->ThermOutPolarity = 1;
+		table->ThermOutMode = SMU7_THERM_OUT_MODE_DISABLE;
+	}
+
+	/* Populate BIF_SCLK levels into SMC DPM table */
+	for (i = 0; i <= hw_data->dpm_table.pcie_speed_table.count; i++) {
+		result = atomctrl_get_dfs_pll_dividers_vi(hwmgr,
+				smu_data->bif_sclk_table[i], &dividers);
+		PP_ASSERT_WITH_CODE(!result,
+				"Can not find DFS divide id for Sclk",
+				return result);
+
+		if (i == 0)
+			table->Ulv.BifSclkDfs =
+					PP_HOST_TO_SMC_US((uint16_t)(dividers.pll_post_divider));
+		else
+			table->LinkLevel[i - 1].BifSclkDfs =
+					PP_HOST_TO_SMC_US((uint16_t)(dividers.pll_post_divider));
+	}
+
+	for (i = 0; i < SMU75_MAX_ENTRIES_SMIO; i++)
+		table->Smio[i] = PP_HOST_TO_SMC_UL(table->Smio[i]);
+
+	CONVERT_FROM_HOST_TO_SMC_UL(table->SystemFlags);
+	CONVERT_FROM_HOST_TO_SMC_UL(table->VRConfig);
+	CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask1);
+	CONVERT_FROM_HOST_TO_SMC_UL(table->SmioMask2);
+	CONVERT_FROM_HOST_TO_SMC_UL(table->SclkStepSize);
+	CONVERT_FROM_HOST_TO_SMC_UL(table->CurrSclkPllRange);
+	CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitHigh);
+	CONVERT_FROM_HOST_TO_SMC_US(table->TemperatureLimitLow);
+	CONVERT_FROM_HOST_TO_SMC_US(table->VoltageResponseTime);
+	CONVERT_FROM_HOST_TO_SMC_US(table->PhaseResponseTime);
+
+	/* Upload all dpm data to SMC memory.(dpm level, dpm level count etc) */
+	result = smu7_copy_bytes_to_smc(hwmgr,
+			smu_data->smu7_data.dpm_table_start +
+			offsetof(SMU75_Discrete_DpmTable, SystemFlags),
+			(uint8_t *)&(table->SystemFlags),
+			sizeof(SMU75_Discrete_DpmTable) - 3 * sizeof(SMU75_PIDController),
+			SMC_RAM_END);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to upload dpm data to SMC memory!", return result);
+
+	result = vegam_populate_pm_fuses(hwmgr);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to  populate PM fuses to SMC memory!", return result);
+
+	result = vegam_enable_reconfig_cus(hwmgr);
+	PP_ASSERT_WITH_CODE(!result,
+			"Failed to enable reconfigurable CUs!", return result);
+
+	return 0;
+}
+
+static uint32_t vegam_get_offsetof(uint32_t type, uint32_t member)
+{
+	switch (type) {
+	case SMU_SoftRegisters:
+		switch (member) {
+		case HandshakeDisables:
+			return offsetof(SMU75_SoftRegisters, HandshakeDisables);
+		case VoltageChangeTimeout:
+			return offsetof(SMU75_SoftRegisters, VoltageChangeTimeout);
+		case AverageGraphicsActivity:
+			return offsetof(SMU75_SoftRegisters, AverageGraphicsActivity);
+		case PreVBlankGap:
+			return offsetof(SMU75_SoftRegisters, PreVBlankGap);
+		case VBlankTimeout:
+			return offsetof(SMU75_SoftRegisters, VBlankTimeout);
+		case UcodeLoadStatus:
+			return offsetof(SMU75_SoftRegisters, UcodeLoadStatus);
+		case DRAM_LOG_ADDR_H:
+			return offsetof(SMU75_SoftRegisters, DRAM_LOG_ADDR_H);
+		case DRAM_LOG_ADDR_L:
+			return offsetof(SMU75_SoftRegisters, DRAM_LOG_ADDR_L);
+		case DRAM_LOG_PHY_ADDR_H:
+			return offsetof(SMU75_SoftRegisters, DRAM_LOG_PHY_ADDR_H);
+		case DRAM_LOG_PHY_ADDR_L:
+			return offsetof(SMU75_SoftRegisters, DRAM_LOG_PHY_ADDR_L);
+		case DRAM_LOG_BUFF_SIZE:
+			return offsetof(SMU75_SoftRegisters, DRAM_LOG_BUFF_SIZE);
+		}
+	case SMU_Discrete_DpmTable:
+		switch (member) {
+		case UvdBootLevel:
+			return offsetof(SMU75_Discrete_DpmTable, UvdBootLevel);
+		case VceBootLevel:
+			return offsetof(SMU75_Discrete_DpmTable, VceBootLevel);
+		case SamuBootLevel:
+			return offsetof(SMU75_Discrete_DpmTable, SamuBootLevel);
+		case LowSclkInterruptThreshold:
+			return offsetof(SMU75_Discrete_DpmTable, LowSclkInterruptThreshold);
+		}
+	}
+	pr_warn("can't get the offset of type %x member %x\n", type, member);
+	return 0;
+}
+
+static int vegam_program_mem_timing_parameters(struct pp_hwmgr *hwmgr)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+
+	if (data->need_update_smu7_dpm_table &
+		(DPMTABLE_OD_UPDATE_SCLK +
+		DPMTABLE_UPDATE_SCLK +
+		DPMTABLE_UPDATE_MCLK))
+		return vegam_program_memory_timing_parameters(hwmgr);
+
+	return 0;
+}
+
+static int vegam_update_sclk_threshold(struct pp_hwmgr *hwmgr)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	struct vegam_smumgr *smu_data =
+			(struct vegam_smumgr *)(hwmgr->smu_backend);
+	int result = 0;
+	uint32_t low_sclk_interrupt_threshold = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_SclkThrottleLowNotification)
+	    && (data->low_sclk_interrupt_threshold != 0)) {
+		low_sclk_interrupt_threshold =
+				data->low_sclk_interrupt_threshold;
+
+		CONVERT_FROM_HOST_TO_SMC_UL(low_sclk_interrupt_threshold);
+
+		result = smu7_copy_bytes_to_smc(
+				hwmgr,
+				smu_data->smu7_data.dpm_table_start +
+				offsetof(SMU75_Discrete_DpmTable,
+					LowSclkInterruptThreshold),
+				(uint8_t *)&low_sclk_interrupt_threshold,
+				sizeof(uint32_t),
+				SMC_RAM_END);
+	}
+	PP_ASSERT_WITH_CODE((result == 0),
+			"Failed to update SCLK threshold!", return result);
+
+	result = vegam_program_mem_timing_parameters(hwmgr);
+	PP_ASSERT_WITH_CODE((result == 0),
+			"Failed to program memory timing parameters!",
+			);
+
+	return result;
+}
+
+int vegam_thermal_avfs_enable(struct pp_hwmgr *hwmgr)
+{
+	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
+	int ret;
+
+	if (!hwmgr->avfs_supported)
+		return 0;
+
+	ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs);
+	if (!ret) {
+		if (data->apply_avfs_cks_off_voltage)
+			ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ApplyAvfsCksOffVoltage);
+	}
+
+	return ret;
+}
+
+static int vegam_thermal_setup_fan_table(struct pp_hwmgr *hwmgr)
+{
+	PP_ASSERT_WITH_CODE(hwmgr->thermal_controller.fanInfo.bNoFan,
+			"VBIOS fan info is not correct!",
+			);
+	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_MicrocodeFanControl);
+	return 0;
+}
+
+const struct pp_smumgr_func vegam_smu_funcs = {
+	.smu_init = vegam_smu_init,
+	.smu_fini = smu7_smu_fini,
+	.start_smu = vegam_start_smu,
+	.check_fw_load_finish = smu7_check_fw_load_finish,
+	.request_smu_load_fw = smu7_reload_firmware,
+	.request_smu_load_specific_fw = NULL,
+	.send_msg_to_smc = smu7_send_msg_to_smc,
+	.send_msg_to_smc_with_parameter = smu7_send_msg_to_smc_with_parameter,
+	.process_firmware_header = vegam_process_firmware_header,
+	.is_dpm_running = vegam_is_dpm_running,
+	.get_mac_definition = vegam_get_mac_definition,
+	.update_smc_table = vegam_update_smc_table,
+	.init_smc_table = vegam_init_smc_table,
+	.get_offsetof = vegam_get_offsetof,
+	.populate_all_graphic_levels = vegam_populate_all_graphic_levels,
+	.populate_all_memory_levels = vegam_populate_all_memory_levels,
+	.update_sclk_threshold = vegam_update_sclk_threshold,
+	.is_hw_avfs_present = vegam_is_hw_avfs_present,
+	.thermal_avfs_enable = vegam_thermal_avfs_enable,
+	.thermal_setup_fan_table = vegam_thermal_setup_fan_table,
+};
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h
new file mode 100644
index 0000000..2b65582
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _VEGAM_SMUMANAGER_H
+#define _VEGAM_SMUMANAGER_H
+
+
+#include <pp_endian.h>
+#include "smu75_discrete.h"
+#include "smu7_smumgr.h"
+
+#define SMC_RAM_END 0x40000
+
+#define DPMTuning_Uphyst_Shift    0
+#define DPMTuning_Downhyst_Shift  8
+#define DPMTuning_Activity_Shift  16
+
+#define GraphicsDPMTuning_VEGAM    0x001e6400
+#define MemoryDPMTuning_VEGAM      0x000f3c0a
+#define SclkDPMTuning_VEGAM        0x002d000a
+#define MclkDPMTuning_VEGAM        0x001f100a
+
+
+struct vegam_pt_defaults {
+	uint8_t   SviLoadLineEn;
+	uint8_t   SviLoadLineVddC;
+	uint8_t   TDC_VDDC_ThrottleReleaseLimitPerc;
+	uint8_t   TDC_MAWt;
+	uint8_t   TdcWaterfallCtl;
+	uint8_t   DTEAmbientTempBase;
+
+	uint32_t  DisplayCac;
+	uint32_t  BAPM_TEMP_GRADIENT;
+	uint16_t  BAPMTI_R[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS];
+	uint16_t  BAPMTI_RC[SMU75_DTE_ITERATIONS * SMU75_DTE_SOURCES * SMU75_DTE_SINKS];
+};
+
+struct vegam_range_table {
+	uint32_t trans_lower_frequency; /* in 10khz */
+	uint32_t trans_upper_frequency;
+};
+
+struct vegam_smumgr {
+	struct smu7_smumgr smu7_data;
+	uint8_t protected_mode;
+	SMU75_Discrete_DpmTable              smc_state_table;
+	struct SMU75_Discrete_Ulv            ulv_setting;
+	struct SMU75_Discrete_PmFuses  power_tune_table;
+	struct vegam_range_table                range_table[NUM_SCLK_RANGE];
+	const struct vegam_pt_defaults       *power_tune_defaults;
+	uint32_t               bif_sclk_table[SMU75_MAX_LEVELS_LINK];
+};
+
+
+#endif
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 831b733..036dff8 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -799,7 +799,7 @@
 	return 0;
 }
 
-static int ast_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status ast_mode_valid(struct drm_connector *connector,
 			  struct drm_display_mode *mode)
 {
 	struct ast_private *ast = connector->dev->dev_private;
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h
index ab32d5b..60c937f 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h
@@ -299,7 +299,6 @@
 struct atmel_hlcdc_plane {
 	struct drm_plane base;
 	struct atmel_hlcdc_layer layer;
-	struct atmel_hlcdc_plane_properties *properties;
 };
 
 static inline struct atmel_hlcdc_plane *
@@ -346,18 +345,6 @@
 };
 
 /**
- * Atmel HLCDC Plane properties.
- *
- * This structure stores plane property definitions.
- *
- * @alpha: alpha blending (or transparency) property
- * @rotation: rotation property
- */
-struct atmel_hlcdc_plane_properties {
-	struct drm_property *alpha;
-};
-
-/**
  * Atmel HLCDC Display Controller.
  *
  * @desc: HLCDC Display Controller description
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index e18800e..73c875d 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -31,7 +31,6 @@
  * @src_y: y buffer position
  * @src_w: buffer width
  * @src_h: buffer height
- * @alpha: alpha blending of the plane
  * @disc_x: x discard position
  * @disc_y: y discard position
  * @disc_w: discard width
@@ -54,8 +53,6 @@
 	uint32_t src_w;
 	uint32_t src_h;
 
-	u8 alpha;
-
 	int disc_x;
 	int disc_y;
 	int disc_w;
@@ -385,7 +382,7 @@
 			cfg |= ATMEL_HLCDC_LAYER_LAEN;
 		else
 			cfg |= ATMEL_HLCDC_LAYER_GAEN |
-			       ATMEL_HLCDC_LAYER_GA(state->alpha);
+			       ATMEL_HLCDC_LAYER_GA(state->base.alpha >> 8);
 	}
 
 	if (state->disc_h && state->disc_w)
@@ -553,7 +550,7 @@
 
 		if (!ovl_s->fb ||
 		    ovl_s->fb->format->has_alpha ||
-		    ovl_state->alpha != 255)
+		    ovl_s->alpha != DRM_BLEND_ALPHA_OPAQUE)
 			continue;
 
 		/* TODO: implement a smarter hidden area detection */
@@ -829,51 +826,18 @@
 	drm_plane_cleanup(p);
 }
 
-static int atmel_hlcdc_plane_atomic_set_property(struct drm_plane *p,
-						 struct drm_plane_state *s,
-						 struct drm_property *property,
-						 uint64_t val)
-{
-	struct atmel_hlcdc_plane *plane = drm_plane_to_atmel_hlcdc_plane(p);
-	struct atmel_hlcdc_plane_properties *props = plane->properties;
-	struct atmel_hlcdc_plane_state *state =
-			drm_plane_state_to_atmel_hlcdc_plane_state(s);
-
-	if (property == props->alpha)
-		state->alpha = val;
-	else
-		return -EINVAL;
-
-	return 0;
-}
-
-static int atmel_hlcdc_plane_atomic_get_property(struct drm_plane *p,
-					const struct drm_plane_state *s,
-					struct drm_property *property,
-					uint64_t *val)
-{
-	struct atmel_hlcdc_plane *plane = drm_plane_to_atmel_hlcdc_plane(p);
-	struct atmel_hlcdc_plane_properties *props = plane->properties;
-	const struct atmel_hlcdc_plane_state *state =
-		container_of(s, const struct atmel_hlcdc_plane_state, base);
-
-	if (property == props->alpha)
-		*val = state->alpha;
-	else
-		return -EINVAL;
-
-	return 0;
-}
-
-static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane,
-				struct atmel_hlcdc_plane_properties *props)
+static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane)
 {
 	const struct atmel_hlcdc_layer_desc *desc = plane->layer.desc;
 
 	if (desc->type == ATMEL_HLCDC_OVERLAY_LAYER ||
-	    desc->type == ATMEL_HLCDC_CURSOR_LAYER)
-		drm_object_attach_property(&plane->base.base,
-					   props->alpha, 255);
+	    desc->type == ATMEL_HLCDC_CURSOR_LAYER) {
+		int ret;
+
+		ret = drm_plane_create_alpha_property(&plane->base);
+		if (ret)
+			return ret;
+	}
 
 	if (desc->layout.xstride && desc->layout.pstride) {
 		int ret;
@@ -988,8 +952,8 @@
 			return;
 		}
 
-		state->alpha = 255;
 		p->state = &state->base;
+		p->state->alpha = DRM_BLEND_ALPHA_OPAQUE;
 		p->state->plane = p;
 	}
 }
@@ -1042,13 +1006,10 @@
 	.reset = atmel_hlcdc_plane_reset,
 	.atomic_duplicate_state = atmel_hlcdc_plane_atomic_duplicate_state,
 	.atomic_destroy_state = atmel_hlcdc_plane_atomic_destroy_state,
-	.atomic_set_property = atmel_hlcdc_plane_atomic_set_property,
-	.atomic_get_property = atmel_hlcdc_plane_atomic_get_property,
 };
 
 static int atmel_hlcdc_plane_create(struct drm_device *dev,
-				    const struct atmel_hlcdc_layer_desc *desc,
-				    struct atmel_hlcdc_plane_properties *props)
+				    const struct atmel_hlcdc_layer_desc *desc)
 {
 	struct atmel_hlcdc_dc *dc = dev->dev_private;
 	struct atmel_hlcdc_plane *plane;
@@ -1060,7 +1021,6 @@
 		return -ENOMEM;
 
 	atmel_hlcdc_layer_init(&plane->layer, desc, dc->hlcdc->regmap);
-	plane->properties = props;
 
 	if (desc->type == ATMEL_HLCDC_BASE_LAYER)
 		type = DRM_PLANE_TYPE_PRIMARY;
@@ -1081,7 +1041,7 @@
 			     &atmel_hlcdc_layer_plane_helper_funcs);
 
 	/* Set default property values*/
-	ret = atmel_hlcdc_plane_init_properties(plane, props);
+	ret = atmel_hlcdc_plane_init_properties(plane);
 	if (ret)
 		return ret;
 
@@ -1090,34 +1050,13 @@
 	return 0;
 }
 
-static struct atmel_hlcdc_plane_properties *
-atmel_hlcdc_plane_create_properties(struct drm_device *dev)
-{
-	struct atmel_hlcdc_plane_properties *props;
-
-	props = devm_kzalloc(dev->dev, sizeof(*props), GFP_KERNEL);
-	if (!props)
-		return ERR_PTR(-ENOMEM);
-
-	props->alpha = drm_property_create_range(dev, 0, "alpha", 0, 255);
-	if (!props->alpha)
-		return ERR_PTR(-ENOMEM);
-
-	return props;
-}
-
 int atmel_hlcdc_create_planes(struct drm_device *dev)
 {
 	struct atmel_hlcdc_dc *dc = dev->dev_private;
-	struct atmel_hlcdc_plane_properties *props;
 	const struct atmel_hlcdc_layer_desc *descs = dc->desc->layers;
 	int nlayers = dc->desc->nlayers;
 	int i, ret;
 
-	props = atmel_hlcdc_plane_create_properties(dev);
-	if (IS_ERR(props))
-		return PTR_ERR(props);
-
 	dc->dscrpool = dmam_pool_create("atmel-hlcdc-dscr", dev->dev,
 				sizeof(struct atmel_hlcdc_dma_channel_dscr),
 				sizeof(u64), 0);
@@ -1130,7 +1069,7 @@
 		    descs[i].type != ATMEL_HLCDC_CURSOR_LAYER)
 			continue;
 
-		ret = atmel_hlcdc_plane_create(dev, &descs[i], props);
+		ret = atmel_hlcdc_plane_create(dev, &descs[i]);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c
index a24a18f..233980a 100644
--- a/drivers/gpu/drm/bochs/bochs_kms.c
+++ b/drivers/gpu/drm/bochs/bochs_kms.c
@@ -188,7 +188,7 @@
 	return count;
 }
 
-static int bochs_connector_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status bochs_connector_mode_valid(struct drm_connector *connector,
 				      struct drm_display_mode *mode)
 {
 	struct bochs_device *bochs =
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index 684ac62..fa2c799 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -25,6 +25,16 @@
 	  the HDMI output of an application processor to MyDP
 	  or DisplayPort.
 
+config DRM_CDNS_DSI
+	tristate "Cadence DPI/DSI bridge"
+	select DRM_KMS_HELPER
+	select DRM_MIPI_DSI
+	select DRM_PANEL_BRIDGE
+	depends on OF
+	help
+	  Support Cadence DPI to DSI bridge. This is an internal
+	  bridge and is meant to be directly embedded in a SoC.
+
 config DRM_DUMB_VGA_DAC
 	tristate "Dumb VGA DAC Bridge support"
 	depends on OF
@@ -94,6 +104,12 @@
 	  It is an I2C driver, that detects connection of MHL bridge
 	  and starts encapsulation of HDMI signal.
 
+config DRM_THINE_THC63LVD1024
+	tristate "Thine THC63LVD1024 LVDS decoder bridge"
+	depends on OF
+	---help---
+	  Thine THC63LVD1024 LVDS/parallel converter driver.
+
 config DRM_TOSHIBA_TC358767
 	tristate "Toshiba TC358767 eDP bridge"
 	depends on OF
diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile
index 373eb28..35f88d4 100644
--- a/drivers/gpu/drm/bridge/Makefile
+++ b/drivers/gpu/drm/bridge/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_DRM_ANALOGIX_ANX78XX) += analogix-anx78xx.o
+obj-$(CONFIG_DRM_CDNS_DSI) += cdns-dsi.o
 obj-$(CONFIG_DRM_DUMB_VGA_DAC) += dumb-vga-dac.o
 obj-$(CONFIG_DRM_LVDS_ENCODER) += lvds-encoder.o
 obj-$(CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW) += megachips-stdpxxxx-ge-b850v3-fw.o
@@ -8,6 +9,7 @@
 obj-$(CONFIG_DRM_SIL_SII8620) += sil-sii8620.o
 obj-$(CONFIG_DRM_SII902X) += sii902x.o
 obj-$(CONFIG_DRM_SII9234) += sii9234.o
+obj-$(CONFIG_DRM_THINE_THC63LVD1024) += thc63lvd1024.o
 obj-$(CONFIG_DRM_TOSHIBA_TC358767) += tc358767.o
 obj-$(CONFIG_DRM_ANALOGIX_DP) += analogix/
 obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511/
diff --git a/drivers/gpu/drm/bridge/adv7511/Kconfig b/drivers/gpu/drm/bridge/adv7511/Kconfig
index 592b9d2..944e440 100644
--- a/drivers/gpu/drm/bridge/adv7511/Kconfig
+++ b/drivers/gpu/drm/bridge/adv7511/Kconfig
@@ -1,5 +1,5 @@
 config DRM_I2C_ADV7511
-	tristate "AV7511 encoder"
+	tristate "ADV7511 encoder"
 	depends on OF
 	select DRM_KMS_HELPER
 	select REGMAP_I2C
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h
index d034b2c..73d8ccb 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7511.h
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h
@@ -93,6 +93,11 @@
 #define ADV7511_REG_CHIP_ID_HIGH		0xf5
 #define ADV7511_REG_CHIP_ID_LOW			0xf6
 
+/* Hardware defined default addresses for I2C register maps */
+#define ADV7511_CEC_I2C_ADDR_DEFAULT		0x3c
+#define ADV7511_EDID_I2C_ADDR_DEFAULT		0x3f
+#define ADV7511_PACKET_I2C_ADDR_DEFAULT		0x38
+
 #define ADV7511_CSC_ENABLE			BIT(7)
 #define ADV7511_CSC_UPDATE_MODE			BIT(5)
 
@@ -321,6 +326,7 @@
 struct adv7511 {
 	struct i2c_client *i2c_main;
 	struct i2c_client *i2c_edid;
+	struct i2c_client *i2c_packet;
 	struct i2c_client *i2c_cec;
 
 	struct regmap *regmap;
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
index efa29db..73021b3 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
@@ -586,7 +586,7 @@
 	/* Reading the EDID only works if the device is powered */
 	if (!adv7511->powered) {
 		unsigned int edid_i2c_addr =
-					(adv7511->i2c_main->addr << 1) + 4;
+					(adv7511->i2c_edid->addr << 1);
 
 		__adv7511_power_on(adv7511);
 
@@ -654,7 +654,7 @@
 	return status;
 }
 
-static int adv7511_mode_valid(struct adv7511 *adv7511,
+static enum drm_mode_status adv7511_mode_valid(struct adv7511 *adv7511,
 			      struct drm_display_mode *mode)
 {
 	if (mode->clock > 165000)
@@ -969,10 +969,10 @@
 {
 	int ret;
 
-	adv->i2c_cec = i2c_new_dummy(adv->i2c_main->adapter,
-				     adv->i2c_main->addr - 1);
+	adv->i2c_cec = i2c_new_secondary_device(adv->i2c_main, "cec",
+						ADV7511_CEC_I2C_ADDR_DEFAULT);
 	if (!adv->i2c_cec)
-		return -ENOMEM;
+		return -EINVAL;
 	i2c_set_clientdata(adv->i2c_cec, adv);
 
 	adv->regmap_cec = devm_regmap_init_i2c(adv->i2c_cec,
@@ -1082,8 +1082,6 @@
 	struct adv7511_link_config link_config;
 	struct adv7511 *adv7511;
 	struct device *dev = &i2c->dev;
-	unsigned int main_i2c_addr = i2c->addr << 1;
-	unsigned int edid_i2c_addr = main_i2c_addr + 4;
 	unsigned int val;
 	int ret;
 
@@ -1129,7 +1127,7 @@
 	}
 
 	if (adv7511->gpio_pd) {
-		mdelay(5);
+		usleep_range(5000, 6000);
 		gpiod_set_value_cansleep(adv7511->gpio_pd, 0);
 	}
 
@@ -1153,23 +1151,34 @@
 	if (ret)
 		goto uninit_regulators;
 
-	regmap_write(adv7511->regmap, ADV7511_REG_EDID_I2C_ADDR, edid_i2c_addr);
-	regmap_write(adv7511->regmap, ADV7511_REG_PACKET_I2C_ADDR,
-		     main_i2c_addr - 0xa);
-	regmap_write(adv7511->regmap, ADV7511_REG_CEC_I2C_ADDR,
-		     main_i2c_addr - 2);
-
 	adv7511_packet_disable(adv7511, 0xffff);
 
-	adv7511->i2c_edid = i2c_new_dummy(i2c->adapter, edid_i2c_addr >> 1);
+	adv7511->i2c_edid = i2c_new_secondary_device(i2c, "edid",
+					ADV7511_EDID_I2C_ADDR_DEFAULT);
 	if (!adv7511->i2c_edid) {
-		ret = -ENOMEM;
+		ret = -EINVAL;
 		goto uninit_regulators;
 	}
 
+	regmap_write(adv7511->regmap, ADV7511_REG_EDID_I2C_ADDR,
+		     adv7511->i2c_edid->addr << 1);
+
+	adv7511->i2c_packet = i2c_new_secondary_device(i2c, "packet",
+					ADV7511_PACKET_I2C_ADDR_DEFAULT);
+	if (!adv7511->i2c_packet) {
+		ret = -EINVAL;
+		goto err_i2c_unregister_edid;
+	}
+
+	regmap_write(adv7511->regmap, ADV7511_REG_PACKET_I2C_ADDR,
+		     adv7511->i2c_packet->addr << 1);
+
 	ret = adv7511_init_cec_regmap(adv7511);
 	if (ret)
-		goto err_i2c_unregister_edid;
+		goto err_i2c_unregister_packet;
+
+	regmap_write(adv7511->regmap, ADV7511_REG_CEC_I2C_ADDR,
+		     adv7511->i2c_cec->addr << 1);
 
 	INIT_WORK(&adv7511->hpd_work, adv7511_hpd_work);
 
@@ -1207,6 +1216,8 @@
 	i2c_unregister_device(adv7511->i2c_cec);
 	if (adv7511->cec_clk)
 		clk_disable_unprepare(adv7511->cec_clk);
+err_i2c_unregister_packet:
+	i2c_unregister_device(adv7511->i2c_packet);
 err_i2c_unregister_edid:
 	i2c_unregister_device(adv7511->i2c_edid);
 uninit_regulators:
@@ -1233,6 +1244,7 @@
 
 	cec_unregister_adapter(adv7511->cec_adap);
 
+	i2c_unregister_device(adv7511->i2c_packet);
 	i2c_unregister_device(adv7511->i2c_edid);
 
 	return 0;
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
index 5c52307..2bcbfad 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
@@ -43,8 +43,10 @@
 	struct device_node *node;
 };
 
-static void analogix_dp_init_dp(struct analogix_dp_device *dp)
+static int analogix_dp_init_dp(struct analogix_dp_device *dp)
 {
+	int ret;
+
 	analogix_dp_reset(dp);
 
 	analogix_dp_swreset(dp);
@@ -56,10 +58,13 @@
 	analogix_dp_enable_sw_function(dp);
 
 	analogix_dp_config_interrupt(dp);
-	analogix_dp_init_analog_func(dp);
+	ret = analogix_dp_init_analog_func(dp);
+	if (ret)
+		return ret;
 
 	analogix_dp_init_hpd(dp);
 	analogix_dp_init_aux(dp);
+	return 0;
 }
 
 static int analogix_dp_detect_hpd(struct analogix_dp_device *dp)
@@ -71,7 +76,7 @@
 			return 0;
 
 		timeout_loop++;
-		usleep_range(10, 11);
+		usleep_range(1000, 1100);
 	}
 
 	/*
@@ -148,87 +153,146 @@
 	psr_vsc.DB1 = 0;
 
 	ret = drm_dp_dpcd_writeb(&dp->aux, DP_SET_POWER, DP_SET_POWER_D0);
-	if (ret != 1)
+	if (ret != 1) {
 		dev_err(dp->dev, "Failed to set DP Power0 %d\n", ret);
+		return ret;
+	}
 
 	return analogix_dp_send_psr_spd(dp, &psr_vsc, false);
 }
 EXPORT_SYMBOL_GPL(analogix_dp_disable_psr);
 
-static bool analogix_dp_detect_sink_psr(struct analogix_dp_device *dp)
+static int analogix_dp_detect_sink_psr(struct analogix_dp_device *dp)
 {
 	unsigned char psr_version;
+	int ret;
 
-	drm_dp_dpcd_readb(&dp->aux, DP_PSR_SUPPORT, &psr_version);
+	ret = drm_dp_dpcd_readb(&dp->aux, DP_PSR_SUPPORT, &psr_version);
+	if (ret != 1) {
+		dev_err(dp->dev, "failed to get PSR version, disable it\n");
+		return ret;
+	}
+
 	dev_dbg(dp->dev, "Panel PSR version : %x\n", psr_version);
 
-	return (psr_version & DP_PSR_IS_SUPPORTED) ? true : false;
+	dp->psr_enable = (psr_version & DP_PSR_IS_SUPPORTED) ? true : false;
+
+	return 0;
 }
 
-static void analogix_dp_enable_sink_psr(struct analogix_dp_device *dp)
+static int analogix_dp_enable_sink_psr(struct analogix_dp_device *dp)
 {
 	unsigned char psr_en;
+	int ret;
 
 	/* Disable psr function */
-	drm_dp_dpcd_readb(&dp->aux, DP_PSR_EN_CFG, &psr_en);
+	ret = drm_dp_dpcd_readb(&dp->aux, DP_PSR_EN_CFG, &psr_en);
+	if (ret != 1) {
+		dev_err(dp->dev, "failed to get psr config\n");
+		goto end;
+	}
+
 	psr_en &= ~DP_PSR_ENABLE;
-	drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en);
+	ret = drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en);
+	if (ret != 1) {
+		dev_err(dp->dev, "failed to disable panel psr\n");
+		goto end;
+	}
 
 	/* Main-Link transmitter remains active during PSR active states */
 	psr_en = DP_PSR_MAIN_LINK_ACTIVE | DP_PSR_CRC_VERIFICATION;
-	drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en);
+	ret = drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en);
+	if (ret != 1) {
+		dev_err(dp->dev, "failed to set panel psr\n");
+		goto end;
+	}
 
 	/* Enable psr function */
 	psr_en = DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE |
 		 DP_PSR_CRC_VERIFICATION;
-	drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en);
+	ret = drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en);
+	if (ret != 1) {
+		dev_err(dp->dev, "failed to set panel psr\n");
+		goto end;
+	}
 
 	analogix_dp_enable_psr_crc(dp);
+
+	return 0;
+end:
+	dev_err(dp->dev, "enable psr fail, force to disable psr\n");
+	dp->psr_enable = false;
+
+	return ret;
 }
 
-static void
+static int
 analogix_dp_enable_rx_to_enhanced_mode(struct analogix_dp_device *dp,
 				       bool enable)
 {
 	u8 data;
+	int ret;
 
-	drm_dp_dpcd_readb(&dp->aux, DP_LANE_COUNT_SET, &data);
+	ret = drm_dp_dpcd_readb(&dp->aux, DP_LANE_COUNT_SET, &data);
+	if (ret != 1)
+		return ret;
 
 	if (enable)
-		drm_dp_dpcd_writeb(&dp->aux, DP_LANE_COUNT_SET,
-				   DP_LANE_COUNT_ENHANCED_FRAME_EN |
-					DPCD_LANE_COUNT_SET(data));
+		ret = drm_dp_dpcd_writeb(&dp->aux, DP_LANE_COUNT_SET,
+					 DP_LANE_COUNT_ENHANCED_FRAME_EN |
+					 DPCD_LANE_COUNT_SET(data));
 	else
-		drm_dp_dpcd_writeb(&dp->aux, DP_LANE_COUNT_SET,
-				   DPCD_LANE_COUNT_SET(data));
+		ret = drm_dp_dpcd_writeb(&dp->aux, DP_LANE_COUNT_SET,
+					 DPCD_LANE_COUNT_SET(data));
+
+	return ret < 0 ? ret : 0;
 }
 
-static int analogix_dp_is_enhanced_mode_available(struct analogix_dp_device *dp)
+static int analogix_dp_is_enhanced_mode_available(struct analogix_dp_device *dp,
+						  u8 *enhanced_mode_support)
 {
 	u8 data;
-	int retval;
+	int ret;
 
-	drm_dp_dpcd_readb(&dp->aux, DP_MAX_LANE_COUNT, &data);
-	retval = DPCD_ENHANCED_FRAME_CAP(data);
+	ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_LANE_COUNT, &data);
+	if (ret != 1) {
+		*enhanced_mode_support = 0;
+		return ret;
+	}
 
-	return retval;
+	*enhanced_mode_support = DPCD_ENHANCED_FRAME_CAP(data);
+
+	return 0;
 }
 
-static void analogix_dp_set_enhanced_mode(struct analogix_dp_device *dp)
+static int analogix_dp_set_enhanced_mode(struct analogix_dp_device *dp)
 {
 	u8 data;
+	int ret;
 
-	data = analogix_dp_is_enhanced_mode_available(dp);
-	analogix_dp_enable_rx_to_enhanced_mode(dp, data);
+	ret = analogix_dp_is_enhanced_mode_available(dp, &data);
+	if (ret < 0)
+		return ret;
+
+	ret = analogix_dp_enable_rx_to_enhanced_mode(dp, data);
+	if (ret < 0)
+		return ret;
+
 	analogix_dp_enable_enhanced_mode(dp, data);
+
+	return 0;
 }
 
-static void analogix_dp_training_pattern_dis(struct analogix_dp_device *dp)
+static int analogix_dp_training_pattern_dis(struct analogix_dp_device *dp)
 {
+	int ret;
+
 	analogix_dp_set_training_pattern(dp, DP_NONE);
 
-	drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET,
-			   DP_TRAINING_PATTERN_DISABLE);
+	ret = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET,
+				 DP_TRAINING_PATTERN_DISABLE);
+
+	return ret < 0 ? ret : 0;
 }
 
 static void
@@ -276,6 +340,12 @@
 	retval = drm_dp_dpcd_write(&dp->aux, DP_LINK_BW_SET, buf, 2);
 	if (retval < 0)
 		return retval;
+	/* set enhanced mode if available */
+	retval = analogix_dp_set_enhanced_mode(dp);
+	if (retval < 0) {
+		dev_err(dp->dev, "failed to set enhance mode\n");
+		return retval;
+	}
 
 	/* Set TX pre-emphasis to minimum */
 	for (lane = 0; lane < lane_count; lane++)
@@ -531,7 +601,7 @@
 {
 	int lane, lane_count, retval;
 	u32 reg;
-	u8 link_align, link_status[2], adjust_request[2], spread;
+	u8 link_align, link_status[2], adjust_request[2];
 
 	usleep_range(400, 401);
 
@@ -560,10 +630,11 @@
 
 	if (!analogix_dp_channel_eq_ok(link_status, link_align, lane_count)) {
 		/* traing pattern Set to Normal */
-		analogix_dp_training_pattern_dis(dp);
+		retval = analogix_dp_training_pattern_dis(dp);
+		if (retval < 0)
+			return retval;
 
 		dev_info(dp->dev, "Link Training success!\n");
-
 		analogix_dp_get_link_bandwidth(dp, &reg);
 		dp->link_train.link_rate = reg;
 		dev_dbg(dp->dev, "final bandwidth = %.2x\n",
@@ -574,22 +645,6 @@
 		dev_dbg(dp->dev, "final lane count = %.2x\n",
 			dp->link_train.lane_count);
 
-		retval = drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD,
-					   &spread);
-		if (retval != 1) {
-			dev_err(dp->dev, "failed to read downspread %d\n",
-				retval);
-			dp->fast_train_support = false;
-		} else {
-			dp->fast_train_support =
-				(spread & DP_NO_AUX_HANDSHAKE_LINK_TRAINING) ?
-					true : false;
-		}
-		dev_dbg(dp->dev, "fast link training %s\n",
-			dp->fast_train_support ? "supported" : "unsupported");
-
-		/* set enhanced mode if available */
-		analogix_dp_set_enhanced_mode(dp);
 		dp->link_train.lt_state = FINISHED;
 
 		return 0;
@@ -793,7 +848,7 @@
 
 static int analogix_dp_train_link(struct analogix_dp_device *dp)
 {
-	if (dp->fast_train_support)
+	if (dp->fast_train_enable)
 		return analogix_dp_fast_link_train(dp);
 
 	return analogix_dp_full_link_train(dp, dp->video_info.max_lane_count,
@@ -819,11 +874,10 @@
 		if (analogix_dp_is_slave_video_stream_clock_on(dp) == 0)
 			break;
 		if (timeout_loop > DP_TIMEOUT_LOOP_COUNT) {
-			dev_err(dp->dev, "Timeout of video streamclk ok\n");
+			dev_err(dp->dev, "Timeout of slave video streamclk ok\n");
 			return -ETIMEDOUT;
 		}
-
-		usleep_range(1, 2);
+		usleep_range(1000, 1001);
 	}
 
 	/* Set to use the register calculated M/N video */
@@ -838,6 +892,9 @@
 	/* Configure video slave mode */
 	analogix_dp_enable_video_master(dp, 0);
 
+	/* Enable video */
+	analogix_dp_start_video(dp);
+
 	timeout_loop = 0;
 
 	for (;;) {
@@ -850,8 +907,9 @@
 			done_count = 0;
 		}
 		if (timeout_loop > DP_TIMEOUT_LOOP_COUNT) {
-			dev_err(dp->dev, "Timeout of video streamclk ok\n");
-			return -ETIMEDOUT;
+			dev_warn(dp->dev,
+				 "Ignoring timeout of video streamclk ok\n");
+			break;
 		}
 
 		usleep_range(1000, 1001);
@@ -860,24 +918,32 @@
 	return 0;
 }
 
-static void analogix_dp_enable_scramble(struct analogix_dp_device *dp,
-					bool enable)
+static int analogix_dp_enable_scramble(struct analogix_dp_device *dp,
+				       bool enable)
 {
 	u8 data;
+	int ret;
 
 	if (enable) {
 		analogix_dp_enable_scrambling(dp);
 
-		drm_dp_dpcd_readb(&dp->aux, DP_TRAINING_PATTERN_SET, &data);
-		drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET,
+		ret = drm_dp_dpcd_readb(&dp->aux, DP_TRAINING_PATTERN_SET,
+					&data);
+		if (ret != 1)
+			return ret;
+		ret = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET,
 				   (u8)(data & ~DP_LINK_SCRAMBLING_DISABLE));
 	} else {
 		analogix_dp_disable_scrambling(dp);
 
-		drm_dp_dpcd_readb(&dp->aux, DP_TRAINING_PATTERN_SET, &data);
-		drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET,
+		ret = drm_dp_dpcd_readb(&dp->aux, DP_TRAINING_PATTERN_SET,
+					&data);
+		if (ret != 1)
+			return ret;
+		ret = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET,
 				   (u8)(data | DP_LINK_SCRAMBLING_DISABLE));
 	}
+	return ret < 0 ? ret : 0;
 }
 
 static irqreturn_t analogix_dp_hardirq(int irq, void *arg)
@@ -916,7 +982,23 @@
 	return IRQ_HANDLED;
 }
 
-static void analogix_dp_commit(struct analogix_dp_device *dp)
+static int analogix_dp_fast_link_train_detection(struct analogix_dp_device *dp)
+{
+	int ret;
+	u8 spread;
+
+	ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &spread);
+	if (ret != 1) {
+		dev_err(dp->dev, "failed to read downspread %d\n", ret);
+		return ret;
+	}
+	dp->fast_train_enable = !!(spread & DP_NO_AUX_HANDSHAKE_LINK_TRAINING);
+	dev_dbg(dp->dev, "fast link training %s\n",
+		dp->fast_train_enable ? "supported" : "unsupported");
+	return 0;
+}
+
+static int analogix_dp_commit(struct analogix_dp_device *dp)
 {
 	int ret;
 
@@ -926,34 +1008,50 @@
 			DRM_ERROR("failed to disable the panel\n");
 	}
 
-	ret = readx_poll_timeout(analogix_dp_train_link, dp, ret, !ret, 100,
-				 DP_TIMEOUT_TRAINING_US * 5);
+	ret = analogix_dp_train_link(dp);
 	if (ret) {
 		dev_err(dp->dev, "unable to do link train, ret=%d\n", ret);
-		return;
+		return ret;
 	}
 
-	analogix_dp_enable_scramble(dp, 1);
-	analogix_dp_enable_rx_to_enhanced_mode(dp, 1);
-	analogix_dp_enable_enhanced_mode(dp, 1);
+	ret = analogix_dp_enable_scramble(dp, 1);
+	if (ret < 0) {
+		dev_err(dp->dev, "can not enable scramble\n");
+		return ret;
+	}
 
 	analogix_dp_init_video(dp);
 	ret = analogix_dp_config_video(dp);
-	if (ret)
+	if (ret) {
 		dev_err(dp->dev, "unable to config video\n");
+		return ret;
+	}
 
 	/* Safe to enable the panel now */
 	if (dp->plat_data->panel) {
-		if (drm_panel_enable(dp->plat_data->panel))
+		ret = drm_panel_enable(dp->plat_data->panel);
+		if (ret) {
 			DRM_ERROR("failed to enable the panel\n");
+			return ret;
+		}
 	}
 
-	/* Enable video */
-	analogix_dp_start_video(dp);
+	ret = analogix_dp_detect_sink_psr(dp);
+	if (ret)
+		return ret;
 
-	dp->psr_enable = analogix_dp_detect_sink_psr(dp);
-	if (dp->psr_enable)
-		analogix_dp_enable_sink_psr(dp);
+	if (dp->psr_enable) {
+		ret = analogix_dp_enable_sink_psr(dp);
+		if (ret)
+			return ret;
+	}
+
+	/* Check whether panel supports fast training */
+	ret =  analogix_dp_fast_link_train_detection(dp);
+	if (ret)
+		dp->psr_enable = false;
+
+	return ret;
 }
 
 /*
@@ -1150,24 +1248,80 @@
 		DRM_ERROR("failed to setup the panel ret = %d\n", ret);
 }
 
+static int analogix_dp_set_bridge(struct analogix_dp_device *dp)
+{
+	int ret;
+
+	pm_runtime_get_sync(dp->dev);
+
+	ret = clk_prepare_enable(dp->clock);
+	if (ret < 0) {
+		DRM_ERROR("Failed to prepare_enable the clock clk [%d]\n", ret);
+		goto out_dp_clk_pre;
+	}
+
+	if (dp->plat_data->power_on_start)
+		dp->plat_data->power_on_start(dp->plat_data);
+
+	phy_power_on(dp->phy);
+
+	ret = analogix_dp_init_dp(dp);
+	if (ret)
+		goto out_dp_init;
+
+	/*
+	 * According to DP spec v1.3 chap 3.5.1.2 Link Training,
+	 * We should first make sure the HPD signal is asserted high by device
+	 * when we want to establish a link with it.
+	 */
+	ret = analogix_dp_detect_hpd(dp);
+	if (ret) {
+		DRM_ERROR("failed to get hpd single ret = %d\n", ret);
+		goto out_dp_init;
+	}
+
+	ret = analogix_dp_commit(dp);
+	if (ret) {
+		DRM_ERROR("dp commit error, ret = %d\n", ret);
+		goto out_dp_init;
+	}
+
+	if (dp->plat_data->power_on_end)
+		dp->plat_data->power_on_end(dp->plat_data);
+
+	enable_irq(dp->irq);
+	return 0;
+
+out_dp_init:
+	phy_power_off(dp->phy);
+	if (dp->plat_data->power_off)
+		dp->plat_data->power_off(dp->plat_data);
+	clk_disable_unprepare(dp->clock);
+out_dp_clk_pre:
+	pm_runtime_put_sync(dp->dev);
+
+	return ret;
+}
+
 static void analogix_dp_bridge_enable(struct drm_bridge *bridge)
 {
 	struct analogix_dp_device *dp = bridge->driver_private;
+	int timeout_loop = 0;
 
 	if (dp->dpms_mode == DRM_MODE_DPMS_ON)
 		return;
 
-	pm_runtime_get_sync(dp->dev);
-
-	if (dp->plat_data->power_on)
-		dp->plat_data->power_on(dp->plat_data);
-
-	phy_power_on(dp->phy);
-	analogix_dp_init_dp(dp);
-	enable_irq(dp->irq);
-	analogix_dp_commit(dp);
-
-	dp->dpms_mode = DRM_MODE_DPMS_ON;
+	while (timeout_loop < MAX_PLL_LOCK_LOOP) {
+		if (analogix_dp_set_bridge(dp) == 0) {
+			dp->dpms_mode = DRM_MODE_DPMS_ON;
+			return;
+		}
+		dev_err(dp->dev, "failed to set bridge, retry: %d\n",
+			timeout_loop);
+		timeout_loop++;
+		usleep_range(10, 11);
+	}
+	dev_err(dp->dev, "too many times retry set bridge, give it up\n");
 }
 
 static void analogix_dp_bridge_disable(struct drm_bridge *bridge)
@@ -1186,11 +1340,15 @@
 	}
 
 	disable_irq(dp->irq);
-	phy_power_off(dp->phy);
 
 	if (dp->plat_data->power_off)
 		dp->plat_data->power_off(dp->plat_data);
 
+	analogix_dp_set_analog_power_down(dp, POWER_ALL, 1);
+	phy_power_off(dp->phy);
+
+	clk_disable_unprepare(dp->clock);
+
 	pm_runtime_put_sync(dp->dev);
 
 	ret = analogix_dp_prepare_panel(dp, false, true);
@@ -1198,6 +1356,7 @@
 		DRM_ERROR("failed to setup the panel ret = %d\n", ret);
 
 	dp->psr_enable = false;
+	dp->fast_train_enable = false;
 	dp->dpms_mode = DRM_MODE_DPMS_OFF;
 }
 
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h
index 6a96ef7..769255d 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h
@@ -19,6 +19,7 @@
 #define DP_TIMEOUT_LOOP_COUNT 100
 #define MAX_CR_LOOP 5
 #define MAX_EQ_LOOP 5
+#define MAX_PLL_LOCK_LOOP 5
 
 /* Training takes 22ms if AUX channel comm fails. Use this as retry interval */
 #define DP_TIMEOUT_TRAINING_US			22000
@@ -173,7 +174,7 @@
 	int			hpd_gpio;
 	bool                    force_hpd;
 	bool			psr_enable;
-	bool			fast_train_support;
+	bool			fast_train_enable;
 
 	struct mutex		panel_lock;
 	bool			panel_is_modeset;
@@ -197,7 +198,7 @@
 void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp,
 				       enum analog_power_block block,
 				       bool enable);
-void analogix_dp_init_analog_func(struct analogix_dp_device *dp);
+int analogix_dp_init_analog_func(struct analogix_dp_device *dp);
 void analogix_dp_init_hpd(struct analogix_dp_device *dp);
 void analogix_dp_force_hpd(struct analogix_dp_device *dp);
 enum dp_irq_type analogix_dp_get_irq_type(struct analogix_dp_device *dp);
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
index 9df2f3e..a5f2763 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
@@ -126,9 +126,14 @@
 	analogix_dp_stop_video(dp);
 	analogix_dp_enable_video_mute(dp, 0);
 
-	reg = MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N |
-		AUD_FIFO_FUNC_EN_N | AUD_FUNC_EN_N |
-		HDCP_FUNC_EN_N | SW_FUNC_EN_N;
+	if (dp->plat_data && is_rockchip(dp->plat_data->dev_type))
+		reg = RK_VID_CAP_FUNC_EN_N | RK_VID_FIFO_FUNC_EN_N |
+			SW_FUNC_EN_N;
+	else
+		reg = MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N |
+			AUD_FIFO_FUNC_EN_N | AUD_FUNC_EN_N |
+			HDCP_FUNC_EN_N | SW_FUNC_EN_N;
+
 	writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_1);
 
 	reg = SSC_FUNC_EN_N | AUX_FUNC_EN_N |
@@ -230,16 +235,20 @@
 void analogix_dp_set_pll_power_down(struct analogix_dp_device *dp, bool enable)
 {
 	u32 reg;
+	u32 mask = DP_PLL_PD;
+	u32 pd_addr = ANALOGIX_DP_PLL_CTL;
 
-	if (enable) {
-		reg = readl(dp->reg_base + ANALOGIX_DP_PLL_CTL);
-		reg |= DP_PLL_PD;
-		writel(reg, dp->reg_base + ANALOGIX_DP_PLL_CTL);
-	} else {
-		reg = readl(dp->reg_base + ANALOGIX_DP_PLL_CTL);
-		reg &= ~DP_PLL_PD;
-		writel(reg, dp->reg_base + ANALOGIX_DP_PLL_CTL);
+	if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) {
+		pd_addr = ANALOGIX_DP_PD;
+		mask = RK_PLL_PD;
 	}
+
+	reg = readl(dp->reg_base + pd_addr);
+	if (enable)
+		reg |= mask;
+	else
+		reg &= ~mask;
+	writel(reg, dp->reg_base + pd_addr);
 }
 
 void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp,
@@ -248,83 +257,98 @@
 {
 	u32 reg;
 	u32 phy_pd_addr = ANALOGIX_DP_PHY_PD;
+	u32 mask;
 
 	if (dp->plat_data && is_rockchip(dp->plat_data->dev_type))
 		phy_pd_addr = ANALOGIX_DP_PD;
 
 	switch (block) {
 	case AUX_BLOCK:
-		if (enable) {
-			reg = readl(dp->reg_base + phy_pd_addr);
-			reg |= AUX_PD;
-			writel(reg, dp->reg_base + phy_pd_addr);
-		} else {
-			reg = readl(dp->reg_base + phy_pd_addr);
-			reg &= ~AUX_PD;
-			writel(reg, dp->reg_base + phy_pd_addr);
-		}
+		if (dp->plat_data && is_rockchip(dp->plat_data->dev_type))
+			mask = RK_AUX_PD;
+		else
+			mask = AUX_PD;
+
+		reg = readl(dp->reg_base + phy_pd_addr);
+		if (enable)
+			reg |= mask;
+		else
+			reg &= ~mask;
+		writel(reg, dp->reg_base + phy_pd_addr);
 		break;
 	case CH0_BLOCK:
-		if (enable) {
-			reg = readl(dp->reg_base + phy_pd_addr);
-			reg |= CH0_PD;
-			writel(reg, dp->reg_base + phy_pd_addr);
-		} else {
-			reg = readl(dp->reg_base + phy_pd_addr);
-			reg &= ~CH0_PD;
-			writel(reg, dp->reg_base + phy_pd_addr);
-		}
+		mask = CH0_PD;
+		reg = readl(dp->reg_base + phy_pd_addr);
+
+		if (enable)
+			reg |= mask;
+		else
+			reg &= ~mask;
+		writel(reg, dp->reg_base + phy_pd_addr);
 		break;
 	case CH1_BLOCK:
-		if (enable) {
-			reg = readl(dp->reg_base + phy_pd_addr);
-			reg |= CH1_PD;
-			writel(reg, dp->reg_base + phy_pd_addr);
-		} else {
-			reg = readl(dp->reg_base + phy_pd_addr);
-			reg &= ~CH1_PD;
-			writel(reg, dp->reg_base + phy_pd_addr);
-		}
+		mask = CH1_PD;
+		reg = readl(dp->reg_base + phy_pd_addr);
+
+		if (enable)
+			reg |= mask;
+		else
+			reg &= ~mask;
+		writel(reg, dp->reg_base + phy_pd_addr);
 		break;
 	case CH2_BLOCK:
-		if (enable) {
-			reg = readl(dp->reg_base + phy_pd_addr);
-			reg |= CH2_PD;
-			writel(reg, dp->reg_base + phy_pd_addr);
-		} else {
-			reg = readl(dp->reg_base + phy_pd_addr);
-			reg &= ~CH2_PD;
-			writel(reg, dp->reg_base + phy_pd_addr);
-		}
+		mask = CH2_PD;
+		reg = readl(dp->reg_base + phy_pd_addr);
+
+		if (enable)
+			reg |= mask;
+		else
+			reg &= ~mask;
+		writel(reg, dp->reg_base + phy_pd_addr);
 		break;
 	case CH3_BLOCK:
-		if (enable) {
-			reg = readl(dp->reg_base + phy_pd_addr);
-			reg |= CH3_PD;
-			writel(reg, dp->reg_base + phy_pd_addr);
-		} else {
-			reg = readl(dp->reg_base + phy_pd_addr);
-			reg &= ~CH3_PD;
-			writel(reg, dp->reg_base + phy_pd_addr);
-		}
+		mask = CH3_PD;
+		reg = readl(dp->reg_base + phy_pd_addr);
+
+		if (enable)
+			reg |= mask;
+		else
+			reg &= ~mask;
+		writel(reg, dp->reg_base + phy_pd_addr);
 		break;
 	case ANALOG_TOTAL:
-		if (enable) {
-			reg = readl(dp->reg_base + phy_pd_addr);
-			reg |= DP_PHY_PD;
-			writel(reg, dp->reg_base + phy_pd_addr);
-		} else {
-			reg = readl(dp->reg_base + phy_pd_addr);
-			reg &= ~DP_PHY_PD;
-			writel(reg, dp->reg_base + phy_pd_addr);
-		}
+		/*
+		 * There is no bit named DP_PHY_PD, so We used DP_INC_BG
+		 * to power off everything instead of DP_PHY_PD in
+		 * Rockchip
+		 */
+		if (dp->plat_data && is_rockchip(dp->plat_data->dev_type))
+			mask = DP_INC_BG;
+		else
+			mask = DP_PHY_PD;
+
+		reg = readl(dp->reg_base + phy_pd_addr);
+		if (enable)
+			reg |= mask;
+		else
+			reg &= ~mask;
+
+		writel(reg, dp->reg_base + phy_pd_addr);
+		if (dp->plat_data && is_rockchip(dp->plat_data->dev_type))
+			usleep_range(10, 15);
 		break;
 	case POWER_ALL:
 		if (enable) {
-			reg = DP_PHY_PD | AUX_PD | CH3_PD | CH2_PD |
-				CH1_PD | CH0_PD;
+			reg = DP_ALL_PD;
 			writel(reg, dp->reg_base + phy_pd_addr);
 		} else {
+			reg = DP_ALL_PD;
+			writel(reg, dp->reg_base + phy_pd_addr);
+			usleep_range(10, 15);
+			reg &= ~DP_INC_BG;
+			writel(reg, dp->reg_base + phy_pd_addr);
+			usleep_range(10, 15);
+
 			writel(0x00, dp->reg_base + phy_pd_addr);
 		}
 		break;
@@ -333,7 +357,7 @@
 	}
 }
 
-void analogix_dp_init_analog_func(struct analogix_dp_device *dp)
+int analogix_dp_init_analog_func(struct analogix_dp_device *dp)
 {
 	u32 reg;
 	int timeout_loop = 0;
@@ -355,7 +379,7 @@
 			timeout_loop++;
 			if (DP_TIMEOUT_LOOP_COUNT < timeout_loop) {
 				dev_err(dp->dev, "failed to get pll lock status\n");
-				return;
+				return -ETIMEDOUT;
 			}
 			usleep_range(10, 20);
 		}
@@ -366,6 +390,7 @@
 	reg &= ~(SERDES_FIFO_FUNC_EN_N | LS_CLK_DOMAIN_FUNC_EN_N
 		| AUX_FUNC_EN_N);
 	writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2);
+	return 0;
 }
 
 void analogix_dp_clear_hotplug_interrupts(struct analogix_dp_device *dp)
@@ -450,17 +475,22 @@
 	reg = RPLY_RECEIV | AUX_ERR;
 	writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA);
 
+	analogix_dp_set_analog_power_down(dp, AUX_BLOCK, true);
+	usleep_range(10, 11);
+	analogix_dp_set_analog_power_down(dp, AUX_BLOCK, false);
+
 	analogix_dp_reset_aux(dp);
 
-	/* Disable AUX transaction H/W retry */
+	/* AUX_BIT_PERIOD_EXPECTED_DELAY doesn't apply to Rockchip IP */
 	if (dp->plat_data && is_rockchip(dp->plat_data->dev_type))
-		reg = AUX_BIT_PERIOD_EXPECTED_DELAY(0) |
-		      AUX_HW_RETRY_COUNT_SEL(3) |
-		      AUX_HW_RETRY_INTERVAL_600_MICROSECONDS;
+		reg = 0;
 	else
-		reg = AUX_BIT_PERIOD_EXPECTED_DELAY(3) |
-		      AUX_HW_RETRY_COUNT_SEL(0) |
-		      AUX_HW_RETRY_INTERVAL_600_MICROSECONDS;
+		reg = AUX_BIT_PERIOD_EXPECTED_DELAY(3);
+
+	/* Disable AUX transaction H/W retry */
+	reg |= AUX_HW_RETRY_COUNT_SEL(0) |
+	       AUX_HW_RETRY_INTERVAL_600_MICROSECONDS;
+
 	writel(reg, dp->reg_base + ANALOGIX_DP_AUX_HW_RETRY_CTL);
 
 	/* Receive AUX Channel DEFER commands equal to DEFFER_COUNT*64 */
@@ -947,8 +977,12 @@
 	u32 reg;
 
 	reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_1);
-	reg &= ~(MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N);
-	reg |= MASTER_VID_FUNC_EN_N;
+	if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) {
+		reg &= ~(RK_VID_CAP_FUNC_EN_N | RK_VID_FIFO_FUNC_EN_N);
+	} else {
+		reg &= ~(MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N);
+		reg |= MASTER_VID_FUNC_EN_N;
+	}
 	writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_1);
 
 	reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10);
@@ -1072,10 +1106,11 @@
 			     struct drm_dp_aux_msg *msg)
 {
 	u32 reg;
+	u32 status_reg;
 	u8 *buffer = msg->buffer;
-	int timeout_loop = 0;
 	unsigned int i;
 	int num_transferred = 0;
+	int ret;
 
 	/* Buffer size of AUX CH is 16 bytes */
 	if (WARN_ON(msg->size > 16))
@@ -1139,17 +1174,20 @@
 
 	writel(reg, dp->reg_base + ANALOGIX_DP_AUX_CH_CTL_2);
 
-	/* Is AUX CH command reply received? */
+	ret = readx_poll_timeout(readl, dp->reg_base + ANALOGIX_DP_AUX_CH_CTL_2,
+				 reg, !(reg & AUX_EN), 25, 500 * 1000);
+	if (ret) {
+		dev_err(dp->dev, "AUX CH enable timeout!\n");
+		goto aux_error;
+	}
+
 	/* TODO: Wait for an interrupt instead of looping? */
-	reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA);
-	while (!(reg & RPLY_RECEIV)) {
-		timeout_loop++;
-		if (timeout_loop > DP_TIMEOUT_LOOP_COUNT) {
-			dev_err(dp->dev, "AUX CH command reply failed!\n");
-			return -ETIMEDOUT;
-		}
-		reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA);
-		usleep_range(10, 11);
+	/* Is AUX CH command reply received? */
+	ret = readx_poll_timeout(readl, dp->reg_base + ANALOGIX_DP_INT_STA,
+				 reg, reg & RPLY_RECEIV, 10, 20 * 1000);
+	if (ret) {
+		dev_err(dp->dev, "AUX CH cmd reply timeout!\n");
+		goto aux_error;
 	}
 
 	/* Clear interrupt source for AUX CH command reply */
@@ -1157,17 +1195,13 @@
 
 	/* Clear interrupt source for AUX CH access error */
 	reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA);
-	if (reg & AUX_ERR) {
+	status_reg = readl(dp->reg_base + ANALOGIX_DP_AUX_CH_STA);
+	if ((reg & AUX_ERR) || (status_reg & AUX_STATUS_MASK)) {
 		writel(AUX_ERR, dp->reg_base + ANALOGIX_DP_INT_STA);
-		return -EREMOTEIO;
-	}
 
-	/* Check AUX CH error access status */
-	reg = readl(dp->reg_base + ANALOGIX_DP_AUX_CH_STA);
-	if ((reg & AUX_STATUS_MASK)) {
-		dev_err(dp->dev, "AUX CH error happened: %d\n\n",
-			reg & AUX_STATUS_MASK);
-		return -EREMOTEIO;
+		dev_warn(dp->dev, "AUX CH error happened: %#x (%d)\n",
+			 status_reg & AUX_STATUS_MASK, !!(reg & AUX_ERR));
+		goto aux_error;
 	}
 
 	if (msg->request & DP_AUX_I2C_READ) {
@@ -1193,4 +1227,10 @@
 		msg->reply = DP_AUX_NATIVE_REPLY_ACK;
 
 	return num_transferred > 0 ? num_transferred : -EBUSY;
+
+aux_error:
+	/* if aux err happen, reset aux */
+	analogix_dp_init_aux(dp);
+
+	return -EREMOTEIO;
 }
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h
index 40200c6..0cf27c7 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h
@@ -127,7 +127,9 @@
 
 /* ANALOGIX_DP_FUNC_EN_1 */
 #define MASTER_VID_FUNC_EN_N			(0x1 << 7)
+#define RK_VID_CAP_FUNC_EN_N			(0x1 << 6)
 #define SLAVE_VID_FUNC_EN_N			(0x1 << 5)
+#define RK_VID_FIFO_FUNC_EN_N			(0x1 << 5)
 #define AUD_FIFO_FUNC_EN_N			(0x1 << 4)
 #define AUD_FUNC_EN_N				(0x1 << 3)
 #define HDCP_FUNC_EN_N				(0x1 << 2)
@@ -342,12 +344,17 @@
 #define DP_PLL_REF_BIT_1_2500V			(0x7 << 0)
 
 /* ANALOGIX_DP_PHY_PD */
+#define DP_INC_BG				(0x1 << 7)
+#define DP_EXP_BG				(0x1 << 6)
 #define DP_PHY_PD				(0x1 << 5)
+#define RK_AUX_PD				(0x1 << 5)
 #define AUX_PD					(0x1 << 4)
+#define RK_PLL_PD				(0x1 << 4)
 #define CH3_PD					(0x1 << 3)
 #define CH2_PD					(0x1 << 2)
 #define CH1_PD					(0x1 << 1)
 #define CH0_PD					(0x1 << 0)
+#define DP_ALL_PD				(0xff)
 
 /* ANALOGIX_DP_PHY_TEST */
 #define MACRO_RST				(0x1 << 5)
diff --git a/drivers/gpu/drm/bridge/cdns-dsi.c b/drivers/gpu/drm/bridge/cdns-dsi.c
new file mode 100644
index 0000000..c255fc3
--- /dev/null
+++ b/drivers/gpu/drm/bridge/cdns-dsi.c
@@ -0,0 +1,1623 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright: 2017 Cadence Design Systems, Inc.
+ *
+ * Author: Boris Brezillon <boris.brezillon@bootlin.com>
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_panel.h>
+#include <video/mipi_display.h>
+
+#include <linux/clk.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_graph.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+
+#define IP_CONF				0x0
+#define SP_HS_FIFO_DEPTH(x)		(((x) & GENMASK(30, 26)) >> 26)
+#define SP_LP_FIFO_DEPTH(x)		(((x) & GENMASK(25, 21)) >> 21)
+#define VRS_FIFO_DEPTH(x)		(((x) & GENMASK(20, 16)) >> 16)
+#define DIRCMD_FIFO_DEPTH(x)		(((x) & GENMASK(15, 13)) >> 13)
+#define SDI_IFACE_32			BIT(12)
+#define INTERNAL_DATAPATH_32		(0 << 10)
+#define INTERNAL_DATAPATH_16		(1 << 10)
+#define INTERNAL_DATAPATH_8		(3 << 10)
+#define INTERNAL_DATAPATH_SIZE		((x) & GENMASK(11, 10))
+#define NUM_IFACE(x)			((((x) & GENMASK(9, 8)) >> 8) + 1)
+#define MAX_LANE_NB(x)			(((x) & GENMASK(7, 6)) >> 6)
+#define RX_FIFO_DEPTH(x)		((x) & GENMASK(5, 0))
+
+#define MCTL_MAIN_DATA_CTL		0x4
+#define TE_MIPI_POLLING_EN		BIT(25)
+#define TE_HW_POLLING_EN		BIT(24)
+#define DISP_EOT_GEN			BIT(18)
+#define HOST_EOT_GEN			BIT(17)
+#define DISP_GEN_CHECKSUM		BIT(16)
+#define DISP_GEN_ECC			BIT(15)
+#define BTA_EN				BIT(14)
+#define READ_EN				BIT(13)
+#define REG_TE_EN			BIT(12)
+#define IF_TE_EN(x)			BIT(8 + (x))
+#define TVG_SEL				BIT(6)
+#define VID_EN				BIT(5)
+#define IF_VID_SELECT(x)		((x) << 2)
+#define IF_VID_SELECT_MASK		GENMASK(3, 2)
+#define IF_VID_MODE			BIT(1)
+#define LINK_EN				BIT(0)
+
+#define MCTL_MAIN_PHY_CTL		0x8
+#define HS_INVERT_DAT(x)		BIT(19 + ((x) * 2))
+#define SWAP_PINS_DAT(x)		BIT(18 + ((x) * 2))
+#define HS_INVERT_CLK			BIT(17)
+#define SWAP_PINS_CLK			BIT(16)
+#define HS_SKEWCAL_EN			BIT(15)
+#define WAIT_BURST_TIME(x)		((x) << 10)
+#define DATA_ULPM_EN(x)			BIT(6 + (x))
+#define CLK_ULPM_EN			BIT(5)
+#define CLK_CONTINUOUS			BIT(4)
+#define DATA_LANE_EN(x)			BIT((x) - 1)
+
+#define MCTL_MAIN_EN			0xc
+#define DATA_FORCE_STOP			BIT(17)
+#define CLK_FORCE_STOP			BIT(16)
+#define IF_EN(x)			BIT(13 + (x))
+#define DATA_LANE_ULPM_REQ(l)		BIT(9 + (l))
+#define CLK_LANE_ULPM_REQ		BIT(8)
+#define DATA_LANE_START(x)		BIT(4 + (x))
+#define CLK_LANE_EN			BIT(3)
+#define PLL_START			BIT(0)
+
+#define MCTL_DPHY_CFG0			0x10
+#define DPHY_C_RSTB			BIT(20)
+#define DPHY_D_RSTB(x)			GENMASK(15 + (x), 16)
+#define DPHY_PLL_PDN			BIT(10)
+#define DPHY_CMN_PDN			BIT(9)
+#define DPHY_C_PDN			BIT(8)
+#define DPHY_D_PDN(x)			GENMASK(3 + (x), 4)
+#define DPHY_ALL_D_PDN			GENMASK(7, 4)
+#define DPHY_PLL_PSO			BIT(1)
+#define DPHY_CMN_PSO			BIT(0)
+
+#define MCTL_DPHY_TIMEOUT1		0x14
+#define HSTX_TIMEOUT(x)			((x) << 4)
+#define HSTX_TIMEOUT_MAX		GENMASK(17, 0)
+#define CLK_DIV(x)			(x)
+#define CLK_DIV_MAX			GENMASK(3, 0)
+
+#define MCTL_DPHY_TIMEOUT2		0x18
+#define LPRX_TIMEOUT(x)			(x)
+
+#define MCTL_ULPOUT_TIME		0x1c
+#define DATA_LANE_ULPOUT_TIME(x)	((x) << 9)
+#define CLK_LANE_ULPOUT_TIME(x)		(x)
+
+#define MCTL_3DVIDEO_CTL		0x20
+#define VID_VSYNC_3D_EN			BIT(7)
+#define VID_VSYNC_3D_LR			BIT(5)
+#define VID_VSYNC_3D_SECOND_EN		BIT(4)
+#define VID_VSYNC_3DFORMAT_LINE		(0 << 2)
+#define VID_VSYNC_3DFORMAT_FRAME	(1 << 2)
+#define VID_VSYNC_3DFORMAT_PIXEL	(2 << 2)
+#define VID_VSYNC_3DMODE_OFF		0
+#define VID_VSYNC_3DMODE_PORTRAIT	1
+#define VID_VSYNC_3DMODE_LANDSCAPE	2
+
+#define MCTL_MAIN_STS			0x24
+#define MCTL_MAIN_STS_CTL		0x130
+#define MCTL_MAIN_STS_CLR		0x150
+#define MCTL_MAIN_STS_FLAG		0x170
+#define HS_SKEWCAL_DONE			BIT(11)
+#define IF_UNTERM_PKT_ERR(x)		BIT(8 + (x))
+#define LPRX_TIMEOUT_ERR		BIT(7)
+#define HSTX_TIMEOUT_ERR		BIT(6)
+#define DATA_LANE_RDY(l)		BIT(2 + (l))
+#define CLK_LANE_RDY			BIT(1)
+#define PLL_LOCKED			BIT(0)
+
+#define MCTL_DPHY_ERR			0x28
+#define MCTL_DPHY_ERR_CTL1		0x148
+#define MCTL_DPHY_ERR_CLR		0x168
+#define MCTL_DPHY_ERR_FLAG		0x188
+#define ERR_CONT_LP(x, l)		BIT(18 + ((x) * 4) + (l))
+#define ERR_CONTROL(l)			BIT(14 + (l))
+#define ERR_SYNESC(l)			BIT(10 + (l))
+#define ERR_ESC(l)			BIT(6 + (l))
+
+#define MCTL_DPHY_ERR_CTL2		0x14c
+#define ERR_CONT_LP_EDGE(x, l)		BIT(12 + ((x) * 4) + (l))
+#define ERR_CONTROL_EDGE(l)		BIT(8 + (l))
+#define ERR_SYN_ESC_EDGE(l)		BIT(4 + (l))
+#define ERR_ESC_EDGE(l)			BIT(0 + (l))
+
+#define MCTL_LANE_STS			0x2c
+#define PPI_C_TX_READY_HS		BIT(18)
+#define DPHY_PLL_LOCK			BIT(17)
+#define PPI_D_RX_ULPS_ESC(x)		(((x) & GENMASK(15, 12)) >> 12)
+#define LANE_STATE_START		0
+#define LANE_STATE_IDLE			1
+#define LANE_STATE_WRITE		2
+#define LANE_STATE_ULPM			3
+#define LANE_STATE_READ			4
+#define DATA_LANE_STATE(l, val)		\
+	(((val) >> (2 + 2 * (l) + ((l) ? 1 : 0))) & GENMASK((l) ? 1 : 2, 0))
+#define CLK_LANE_STATE_HS		2
+#define CLK_LANE_STATE(val)		((val) & GENMASK(1, 0))
+
+#define DSC_MODE_CTL			0x30
+#define DSC_MODE_EN			BIT(0)
+
+#define DSC_CMD_SEND			0x34
+#define DSC_SEND_PPS			BIT(0)
+#define DSC_EXECUTE_QUEUE		BIT(1)
+
+#define DSC_PPS_WRDAT			0x38
+
+#define DSC_MODE_STS			0x3c
+#define DSC_PPS_DONE			BIT(1)
+#define DSC_EXEC_DONE			BIT(2)
+
+#define CMD_MODE_CTL			0x70
+#define IF_LP_EN(x)			BIT(9 + (x))
+#define IF_VCHAN_ID(x, c)		((c) << ((x) * 2))
+
+#define CMD_MODE_CTL2			0x74
+#define TE_TIMEOUT(x)			((x) << 11)
+#define FILL_VALUE(x)			((x) << 3)
+#define ARB_IF_WITH_HIGHEST_PRIORITY(x)	((x) << 1)
+#define ARB_ROUND_ROBIN_MODE		BIT(0)
+
+#define CMD_MODE_STS			0x78
+#define CMD_MODE_STS_CTL		0x134
+#define CMD_MODE_STS_CLR		0x154
+#define CMD_MODE_STS_FLAG		0x174
+#define ERR_IF_UNDERRUN(x)		BIT(4 + (x))
+#define ERR_UNWANTED_READ		BIT(3)
+#define ERR_TE_MISS			BIT(2)
+#define ERR_NO_TE			BIT(1)
+#define CSM_RUNNING			BIT(0)
+
+#define DIRECT_CMD_SEND			0x80
+
+#define DIRECT_CMD_MAIN_SETTINGS	0x84
+#define TRIGGER_VAL(x)			((x) << 25)
+#define CMD_LP_EN			BIT(24)
+#define CMD_SIZE(x)			((x) << 16)
+#define CMD_VCHAN_ID(x)			((x) << 14)
+#define CMD_DATATYPE(x)			((x) << 8)
+#define CMD_LONG			BIT(3)
+#define WRITE_CMD			0
+#define READ_CMD			1
+#define TE_REQ				4
+#define TRIGGER_REQ			5
+#define BTA_REQ				6
+
+#define DIRECT_CMD_STS			0x88
+#define DIRECT_CMD_STS_CTL		0x138
+#define DIRECT_CMD_STS_CLR		0x158
+#define DIRECT_CMD_STS_FLAG		0x178
+#define RCVD_ACK_VAL(val)		((val) >> 16)
+#define RCVD_TRIGGER_VAL(val)		(((val) & GENMASK(14, 11)) >> 11)
+#define READ_COMPLETED_WITH_ERR		BIT(10)
+#define BTA_FINISHED			BIT(9)
+#define BTA_COMPLETED			BIT(8)
+#define TE_RCVD				BIT(7)
+#define TRIGGER_RCVD			BIT(6)
+#define ACK_WITH_ERR_RCVD		BIT(5)
+#define ACK_RCVD			BIT(4)
+#define READ_COMPLETED			BIT(3)
+#define TRIGGER_COMPLETED		BIT(2)
+#define WRITE_COMPLETED			BIT(1)
+#define SENDING_CMD			BIT(0)
+
+#define DIRECT_CMD_STOP_READ		0x8c
+
+#define DIRECT_CMD_WRDATA		0x90
+
+#define DIRECT_CMD_FIFO_RST		0x94
+
+#define DIRECT_CMD_RDDATA		0xa0
+
+#define DIRECT_CMD_RD_PROPS		0xa4
+#define RD_DCS				BIT(18)
+#define RD_VCHAN_ID(val)		(((val) >> 16) & GENMASK(1, 0))
+#define RD_SIZE(val)			((val) & GENMASK(15, 0))
+
+#define DIRECT_CMD_RD_STS		0xa8
+#define DIRECT_CMD_RD_STS_CTL		0x13c
+#define DIRECT_CMD_RD_STS_CLR		0x15c
+#define DIRECT_CMD_RD_STS_FLAG		0x17c
+#define ERR_EOT_WITH_ERR		BIT(8)
+#define ERR_MISSING_EOT			BIT(7)
+#define ERR_WRONG_LENGTH		BIT(6)
+#define ERR_OVERSIZE			BIT(5)
+#define ERR_RECEIVE			BIT(4)
+#define ERR_UNDECODABLE			BIT(3)
+#define ERR_CHECKSUM			BIT(2)
+#define ERR_UNCORRECTABLE		BIT(1)
+#define ERR_FIXED			BIT(0)
+
+#define VID_MAIN_CTL			0xb0
+#define VID_IGNORE_MISS_VSYNC		BIT(31)
+#define VID_FIELD_SW			BIT(28)
+#define VID_INTERLACED_EN		BIT(27)
+#define RECOVERY_MODE(x)		((x) << 25)
+#define RECOVERY_MODE_NEXT_HSYNC	0
+#define RECOVERY_MODE_NEXT_STOP_POINT	2
+#define RECOVERY_MODE_NEXT_VSYNC	3
+#define REG_BLKEOL_MODE(x)		((x) << 23)
+#define REG_BLKLINE_MODE(x)		((x) << 21)
+#define REG_BLK_MODE_NULL_PKT		0
+#define REG_BLK_MODE_BLANKING_PKT	1
+#define REG_BLK_MODE_LP			2
+#define SYNC_PULSE_HORIZONTAL		BIT(20)
+#define SYNC_PULSE_ACTIVE		BIT(19)
+#define BURST_MODE			BIT(18)
+#define VID_PIXEL_MODE_MASK		GENMASK(17, 14)
+#define VID_PIXEL_MODE_RGB565		(0 << 14)
+#define VID_PIXEL_MODE_RGB666_PACKED	(1 << 14)
+#define VID_PIXEL_MODE_RGB666		(2 << 14)
+#define VID_PIXEL_MODE_RGB888		(3 << 14)
+#define VID_PIXEL_MODE_RGB101010	(4 << 14)
+#define VID_PIXEL_MODE_RGB121212	(5 << 14)
+#define VID_PIXEL_MODE_YUV420		(8 << 14)
+#define VID_PIXEL_MODE_YUV422_PACKED	(9 << 14)
+#define VID_PIXEL_MODE_YUV422		(10 << 14)
+#define VID_PIXEL_MODE_YUV422_24B	(11 << 14)
+#define VID_PIXEL_MODE_DSC_COMP		(12 << 14)
+#define VID_DATATYPE(x)			((x) << 8)
+#define VID_VIRTCHAN_ID(iface, x)	((x) << (4 + (iface) * 2))
+#define STOP_MODE(x)			((x) << 2)
+#define START_MODE(x)			(x)
+
+#define VID_VSIZE1			0xb4
+#define VFP_LEN(x)			((x) << 12)
+#define VBP_LEN(x)			((x) << 6)
+#define VSA_LEN(x)			(x)
+
+#define VID_VSIZE2			0xb8
+#define VACT_LEN(x)			(x)
+
+#define VID_HSIZE1			0xc0
+#define HBP_LEN(x)			((x) << 16)
+#define HSA_LEN(x)			(x)
+
+#define VID_HSIZE2			0xc4
+#define HFP_LEN(x)			((x) << 16)
+#define HACT_LEN(x)			(x)
+
+#define VID_BLKSIZE1			0xcc
+#define BLK_EOL_PKT_LEN(x)		((x) << 15)
+#define BLK_LINE_EVENT_PKT_LEN(x)	(x)
+
+#define VID_BLKSIZE2			0xd0
+#define BLK_LINE_PULSE_PKT_LEN(x)	(x)
+
+#define VID_PKT_TIME			0xd8
+#define BLK_EOL_DURATION(x)		(x)
+
+#define VID_DPHY_TIME			0xdc
+#define REG_WAKEUP_TIME(x)		((x) << 17)
+#define REG_LINE_DURATION(x)		(x)
+
+#define VID_ERR_COLOR1			0xe0
+#define COL_GREEN(x)			((x) << 12)
+#define COL_RED(x)			(x)
+
+#define VID_ERR_COLOR2			0xe4
+#define PAD_VAL(x)			((x) << 12)
+#define COL_BLUE(x)			(x)
+
+#define VID_VPOS			0xe8
+#define LINE_VAL(val)			(((val) & GENMASK(14, 2)) >> 2)
+#define LINE_POS(val)			((val) & GENMASK(1, 0))
+
+#define VID_HPOS			0xec
+#define HORIZ_VAL(val)			(((val) & GENMASK(17, 3)) >> 3)
+#define HORIZ_POS(val)			((val) & GENMASK(2, 0))
+
+#define VID_MODE_STS			0xf0
+#define VID_MODE_STS_CTL		0x140
+#define VID_MODE_STS_CLR		0x160
+#define VID_MODE_STS_FLAG		0x180
+#define VSG_RECOVERY			BIT(10)
+#define ERR_VRS_WRONG_LEN		BIT(9)
+#define ERR_LONG_READ			BIT(8)
+#define ERR_LINE_WRITE			BIT(7)
+#define ERR_BURST_WRITE			BIT(6)
+#define ERR_SMALL_HEIGHT		BIT(5)
+#define ERR_SMALL_LEN			BIT(4)
+#define ERR_MISSING_VSYNC		BIT(3)
+#define ERR_MISSING_HSYNC		BIT(2)
+#define ERR_MISSING_DATA		BIT(1)
+#define VSG_RUNNING			BIT(0)
+
+#define VID_VCA_SETTING1		0xf4
+#define BURST_LP			BIT(16)
+#define MAX_BURST_LIMIT(x)		(x)
+
+#define VID_VCA_SETTING2		0xf8
+#define MAX_LINE_LIMIT(x)		((x) << 16)
+#define EXACT_BURST_LIMIT(x)		(x)
+
+#define TVG_CTL				0xfc
+#define TVG_STRIPE_SIZE(x)		((x) << 5)
+#define TVG_MODE_MASK			GENMASK(4, 3)
+#define TVG_MODE_SINGLE_COLOR		(0 << 3)
+#define TVG_MODE_VSTRIPES		(2 << 3)
+#define TVG_MODE_HSTRIPES		(3 << 3)
+#define TVG_STOPMODE_MASK		GENMASK(2, 1)
+#define TVG_STOPMODE_EOF		(0 << 1)
+#define TVG_STOPMODE_EOL		(1 << 1)
+#define TVG_STOPMODE_NOW		(2 << 1)
+#define TVG_RUN				BIT(0)
+
+#define TVG_IMG_SIZE			0x100
+#define TVG_NBLINES(x)			((x) << 16)
+#define TVG_LINE_SIZE(x)		(x)
+
+#define TVG_COLOR1			0x104
+#define TVG_COL1_GREEN(x)		((x) << 12)
+#define TVG_COL1_RED(x)			(x)
+
+#define TVG_COLOR1_BIS			0x108
+#define TVG_COL1_BLUE(x)		(x)
+
+#define TVG_COLOR2			0x10c
+#define TVG_COL2_GREEN(x)		((x) << 12)
+#define TVG_COL2_RED(x)			(x)
+
+#define TVG_COLOR2_BIS			0x110
+#define TVG_COL2_BLUE(x)		(x)
+
+#define TVG_STS				0x114
+#define TVG_STS_CTL			0x144
+#define TVG_STS_CLR			0x164
+#define TVG_STS_FLAG			0x184
+#define TVG_STS_RUNNING			BIT(0)
+
+#define STS_CTL_EDGE(e)			((e) << 16)
+
+#define DPHY_LANES_MAP			0x198
+#define DAT_REMAP_CFG(b, l)		((l) << ((b) * 8))
+
+#define DPI_IRQ_EN			0x1a0
+#define DPI_IRQ_CLR			0x1a4
+#define DPI_IRQ_STS			0x1a8
+#define PIXEL_BUF_OVERFLOW		BIT(0)
+
+#define DPI_CFG				0x1ac
+#define DPI_CFG_FIFO_DEPTH(x)		((x) >> 16)
+#define DPI_CFG_FIFO_LEVEL(x)		((x) & GENMASK(15, 0))
+
+#define TEST_GENERIC			0x1f0
+#define TEST_STATUS(x)			((x) >> 16)
+#define TEST_CTRL(x)			(x)
+
+#define ID_REG				0x1fc
+#define REV_VENDOR_ID(x)		(((x) & GENMASK(31, 20)) >> 20)
+#define REV_PRODUCT_ID(x)		(((x) & GENMASK(19, 12)) >> 12)
+#define REV_HW(x)			(((x) & GENMASK(11, 8)) >> 8)
+#define REV_MAJOR(x)			(((x) & GENMASK(7, 4)) >> 4)
+#define REV_MINOR(x)			((x) & GENMASK(3, 0))
+
+#define DSI_OUTPUT_PORT			0
+#define DSI_INPUT_PORT(inputid)		(1 + (inputid))
+
+#define DSI_HBP_FRAME_OVERHEAD		12
+#define DSI_HSA_FRAME_OVERHEAD		14
+#define DSI_HFP_FRAME_OVERHEAD		6
+#define DSI_HSS_VSS_VSE_FRAME_OVERHEAD	4
+#define DSI_BLANKING_FRAME_OVERHEAD	6
+#define DSI_NULL_FRAME_OVERHEAD		6
+#define DSI_EOT_PKT_SIZE		4
+
+#define REG_WAKEUP_TIME_NS		800
+#define DPHY_PLL_RATE_HZ		108000000
+
+/* DPHY registers */
+#define DPHY_PMA_CMN(reg)		(reg)
+#define DPHY_PMA_LCLK(reg)		(0x100 + (reg))
+#define DPHY_PMA_LDATA(lane, reg)	(0x200 + ((lane) * 0x100) + (reg))
+#define DPHY_PMA_RCLK(reg)		(0x600 + (reg))
+#define DPHY_PMA_RDATA(lane, reg)	(0x700 + ((lane) * 0x100) + (reg))
+#define DPHY_PCS(reg)			(0xb00 + (reg))
+
+#define DPHY_CMN_SSM			DPHY_PMA_CMN(0x20)
+#define DPHY_CMN_SSM_EN			BIT(0)
+#define DPHY_CMN_TX_MODE_EN		BIT(9)
+
+#define DPHY_CMN_PWM			DPHY_PMA_CMN(0x40)
+#define DPHY_CMN_PWM_DIV(x)		((x) << 20)
+#define DPHY_CMN_PWM_LOW(x)		((x) << 10)
+#define DPHY_CMN_PWM_HIGH(x)		(x)
+
+#define DPHY_CMN_FBDIV			DPHY_PMA_CMN(0x4c)
+#define DPHY_CMN_FBDIV_VAL(low, high)	(((high) << 11) | ((low) << 22))
+#define DPHY_CMN_FBDIV_FROM_REG		(BIT(10) | BIT(21))
+
+#define DPHY_CMN_OPIPDIV		DPHY_PMA_CMN(0x50)
+#define DPHY_CMN_IPDIV_FROM_REG		BIT(0)
+#define DPHY_CMN_IPDIV(x)		((x) << 1)
+#define DPHY_CMN_OPDIV_FROM_REG		BIT(6)
+#define DPHY_CMN_OPDIV(x)		((x) << 7)
+
+#define DPHY_PSM_CFG			DPHY_PCS(0x4)
+#define DPHY_PSM_CFG_FROM_REG		BIT(0)
+#define DPHY_PSM_CLK_DIV(x)		((x) << 1)
+
+struct cdns_dsi_output {
+	struct mipi_dsi_device *dev;
+	struct drm_panel *panel;
+	struct drm_bridge *bridge;
+};
+
+enum cdns_dsi_input_id {
+	CDNS_SDI_INPUT,
+	CDNS_DPI_INPUT,
+	CDNS_DSC_INPUT,
+};
+
+struct cdns_dphy_cfg {
+	u8 pll_ipdiv;
+	u8 pll_opdiv;
+	u16 pll_fbdiv;
+	unsigned long lane_bps;
+	unsigned int nlanes;
+};
+
+struct cdns_dsi_cfg {
+	unsigned int hfp;
+	unsigned int hsa;
+	unsigned int hbp;
+	unsigned int hact;
+	unsigned int htotal;
+};
+
+struct cdns_dphy;
+
+enum cdns_dphy_clk_lane_cfg {
+	DPHY_CLK_CFG_LEFT_DRIVES_ALL = 0,
+	DPHY_CLK_CFG_LEFT_DRIVES_RIGHT = 1,
+	DPHY_CLK_CFG_LEFT_DRIVES_LEFT = 2,
+	DPHY_CLK_CFG_RIGHT_DRIVES_ALL = 3,
+};
+
+struct cdns_dphy_ops {
+	int (*probe)(struct cdns_dphy *dphy);
+	void (*remove)(struct cdns_dphy *dphy);
+	void (*set_psm_div)(struct cdns_dphy *dphy, u8 div);
+	void (*set_clk_lane_cfg)(struct cdns_dphy *dphy,
+				 enum cdns_dphy_clk_lane_cfg cfg);
+	void (*set_pll_cfg)(struct cdns_dphy *dphy,
+			    const struct cdns_dphy_cfg *cfg);
+	unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy);
+};
+
+struct cdns_dphy {
+	struct cdns_dphy_cfg cfg;
+	void __iomem *regs;
+	struct clk *psm_clk;
+	struct clk *pll_ref_clk;
+	const struct cdns_dphy_ops *ops;
+};
+
+struct cdns_dsi_input {
+	enum cdns_dsi_input_id id;
+	struct drm_bridge bridge;
+};
+
+struct cdns_dsi {
+	struct mipi_dsi_host base;
+	void __iomem *regs;
+	struct cdns_dsi_input input;
+	struct cdns_dsi_output output;
+	unsigned int direct_cmd_fifo_depth;
+	unsigned int rx_fifo_depth;
+	struct completion direct_cmd_comp;
+	struct clk *dsi_p_clk;
+	struct reset_control *dsi_p_rst;
+	struct clk *dsi_sys_clk;
+	bool link_initialized;
+	struct cdns_dphy *dphy;
+};
+
+static inline struct cdns_dsi *input_to_dsi(struct cdns_dsi_input *input)
+{
+	return container_of(input, struct cdns_dsi, input);
+}
+
+static inline struct cdns_dsi *to_cdns_dsi(struct mipi_dsi_host *host)
+{
+	return container_of(host, struct cdns_dsi, base);
+}
+
+static inline struct cdns_dsi_input *
+bridge_to_cdns_dsi_input(struct drm_bridge *bridge)
+{
+	return container_of(bridge, struct cdns_dsi_input, bridge);
+}
+
+static int cdns_dsi_get_dphy_pll_cfg(struct cdns_dphy *dphy,
+				     struct cdns_dphy_cfg *cfg,
+				     unsigned int dpi_htotal,
+				     unsigned int dpi_bpp,
+				     unsigned int dpi_hz,
+				     unsigned int dsi_htotal,
+				     unsigned int dsi_nlanes,
+				     unsigned int *dsi_hfp_ext)
+{
+	u64 dlane_bps, dlane_bps_max, fbdiv, fbdiv_max, adj_dsi_htotal;
+	unsigned long pll_ref_hz = clk_get_rate(dphy->pll_ref_clk);
+
+	memset(cfg, 0, sizeof(*cfg));
+
+	cfg->nlanes = dsi_nlanes;
+
+	if (pll_ref_hz < 9600000 || pll_ref_hz >= 150000000)
+		return -EINVAL;
+	else if (pll_ref_hz < 19200000)
+		cfg->pll_ipdiv = 1;
+	else if (pll_ref_hz < 38400000)
+		cfg->pll_ipdiv = 2;
+	else if (pll_ref_hz < 76800000)
+		cfg->pll_ipdiv = 4;
+	else
+		cfg->pll_ipdiv = 8;
+
+	/*
+	 * Make sure DSI htotal is aligned on a lane boundary when calculating
+	 * the expected data rate. This is done by extending HFP in case of
+	 * misalignment.
+	 */
+	adj_dsi_htotal = dsi_htotal;
+	if (dsi_htotal % dsi_nlanes)
+		adj_dsi_htotal += dsi_nlanes - (dsi_htotal % dsi_nlanes);
+
+	dlane_bps = (u64)dpi_hz * adj_dsi_htotal;
+
+	/* data rate in bytes/sec is not an integer, refuse the mode. */
+	if (do_div(dlane_bps, dsi_nlanes * dpi_htotal))
+		return -EINVAL;
+
+	/* data rate was in bytes/sec, convert to bits/sec. */
+	dlane_bps *= 8;
+
+	if (dlane_bps > 2500000000UL || dlane_bps < 160000000UL)
+		return -EINVAL;
+	else if (dlane_bps >= 1250000000)
+		cfg->pll_opdiv = 1;
+	else if (dlane_bps >= 630000000)
+		cfg->pll_opdiv = 2;
+	else if (dlane_bps >= 320000000)
+		cfg->pll_opdiv = 4;
+	else if (dlane_bps >= 160000000)
+		cfg->pll_opdiv = 8;
+
+	/*
+	 * Allow a deviation of 0.2% on the per-lane data rate to try to
+	 * recover a potential mismatch between DPI and PPI clks.
+	 */
+	dlane_bps_max = dlane_bps + DIV_ROUND_DOWN_ULL(dlane_bps, 500);
+	fbdiv_max = DIV_ROUND_DOWN_ULL(dlane_bps_max * 2 *
+				       cfg->pll_opdiv * cfg->pll_ipdiv,
+				       pll_ref_hz);
+	fbdiv = DIV_ROUND_UP_ULL(dlane_bps * 2 * cfg->pll_opdiv *
+				 cfg->pll_ipdiv,
+				 pll_ref_hz);
+
+	/*
+	 * Iterate over all acceptable fbdiv and try to find an adjusted DSI
+	 * htotal length providing an exact match.
+	 *
+	 * Note that we could do something even trickier by relying on the fact
+	 * that a new line is not necessarily aligned on a lane boundary, so,
+	 * by making adj_dsi_htotal non aligned on a dsi_lanes we can improve a
+	 * bit the precision. With this, the step would be
+	 *
+	 *	pll_ref_hz / (2 * opdiv * ipdiv * nlanes)
+	 *
+	 * instead of
+	 *
+	 *	pll_ref_hz / (2 * opdiv * ipdiv)
+	 *
+	 * The drawback of this approach is that we would need to make sure the
+	 * number or lines is a multiple of the realignment periodicity which is
+	 * a function of the number of lanes and the original misalignment. For
+	 * example, for NLANES = 4 and HTOTAL % NLANES = 3, it takes 4 lines
+	 * to realign on a lane:
+	 * LINE 0: expected number of bytes, starts emitting first byte of
+	 *	   LINE 1 on LANE 3
+	 * LINE 1: expected number of bytes, starts emitting first 2 bytes of
+	 *	   LINE 2 on LANES 2 and 3
+	 * LINE 2: expected number of bytes, starts emitting first 3 bytes of
+	 *	   of LINE 3 on LANES 1, 2 and 3
+	 * LINE 3: one byte less, now things are realigned on LANE 0 for LINE 4
+	 *
+	 * I figured this extra complexity was not worth the benefit, but if
+	 * someone really has unfixable mismatch, that would be something to
+	 * investigate.
+	 */
+	for (; fbdiv <= fbdiv_max; fbdiv++) {
+		u32 rem;
+
+		adj_dsi_htotal = (u64)fbdiv * pll_ref_hz * dsi_nlanes *
+				 dpi_htotal;
+
+		/*
+		 * Do the division in 2 steps to avoid an overflow on the
+		 * divider.
+		 */
+		rem = do_div(adj_dsi_htotal, dpi_hz);
+		if (rem)
+			continue;
+
+		rem = do_div(adj_dsi_htotal,
+			     cfg->pll_opdiv * cfg->pll_ipdiv * 2 * 8);
+		if (rem)
+			continue;
+
+		cfg->pll_fbdiv = fbdiv;
+		*dsi_hfp_ext = adj_dsi_htotal - dsi_htotal;
+		break;
+	}
+
+	/* No match, let's just reject the display mode. */
+	if (!cfg->pll_fbdiv)
+		return -EINVAL;
+
+	dlane_bps = DIV_ROUND_DOWN_ULL((u64)dpi_hz * adj_dsi_htotal * 8,
+				       dsi_nlanes * dpi_htotal);
+	cfg->lane_bps = dlane_bps;
+
+	return 0;
+}
+
+static int cdns_dphy_setup_psm(struct cdns_dphy *dphy)
+{
+	unsigned long psm_clk_hz = clk_get_rate(dphy->psm_clk);
+	unsigned long psm_div;
+
+	if (!psm_clk_hz || psm_clk_hz > 100000000)
+		return -EINVAL;
+
+	psm_div = DIV_ROUND_CLOSEST(psm_clk_hz, 1000000);
+	if (dphy->ops->set_psm_div)
+		dphy->ops->set_psm_div(dphy, psm_div);
+
+	return 0;
+}
+
+static void cdns_dphy_set_clk_lane_cfg(struct cdns_dphy *dphy,
+				       enum cdns_dphy_clk_lane_cfg cfg)
+{
+	if (dphy->ops->set_clk_lane_cfg)
+		dphy->ops->set_clk_lane_cfg(dphy, cfg);
+}
+
+static void cdns_dphy_set_pll_cfg(struct cdns_dphy *dphy,
+				  const struct cdns_dphy_cfg *cfg)
+{
+	if (dphy->ops->set_pll_cfg)
+		dphy->ops->set_pll_cfg(dphy, cfg);
+}
+
+static unsigned long cdns_dphy_get_wakeup_time_ns(struct cdns_dphy *dphy)
+{
+	return dphy->ops->get_wakeup_time_ns(dphy);
+}
+
+static unsigned int dpi_to_dsi_timing(unsigned int dpi_timing,
+				      unsigned int dpi_bpp,
+				      unsigned int dsi_pkt_overhead)
+{
+	unsigned int dsi_timing = DIV_ROUND_UP(dpi_timing * dpi_bpp, 8);
+
+	if (dsi_timing < dsi_pkt_overhead)
+		dsi_timing = 0;
+	else
+		dsi_timing -= dsi_pkt_overhead;
+
+	return dsi_timing;
+}
+
+static int cdns_dsi_mode2cfg(struct cdns_dsi *dsi,
+			     const struct drm_display_mode *mode,
+			     struct cdns_dsi_cfg *dsi_cfg,
+			     struct cdns_dphy_cfg *dphy_cfg,
+			     bool mode_valid_check)
+{
+	unsigned long dsi_htotal = 0, dsi_hss_hsa_hse_hbp = 0;
+	struct cdns_dsi_output *output = &dsi->output;
+	unsigned int dsi_hfp_ext = 0, dpi_hfp, tmp;
+	bool sync_pulse = false;
+	int bpp, nlanes, ret;
+
+	memset(dsi_cfg, 0, sizeof(*dsi_cfg));
+
+	if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE)
+		sync_pulse = true;
+
+	bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format);
+	nlanes = output->dev->lanes;
+
+	if (mode_valid_check)
+		tmp = mode->htotal -
+		      (sync_pulse ? mode->hsync_end : mode->hsync_start);
+	else
+		tmp = mode->crtc_htotal -
+		      (sync_pulse ?
+		       mode->crtc_hsync_end : mode->crtc_hsync_start);
+
+	dsi_cfg->hbp = dpi_to_dsi_timing(tmp, bpp, DSI_HBP_FRAME_OVERHEAD);
+	dsi_htotal += dsi_cfg->hbp + DSI_HBP_FRAME_OVERHEAD;
+	dsi_hss_hsa_hse_hbp += dsi_cfg->hbp + DSI_HBP_FRAME_OVERHEAD;
+
+	if (sync_pulse) {
+		if (mode_valid_check)
+			tmp = mode->hsync_end - mode->hsync_start;
+		else
+			tmp = mode->crtc_hsync_end - mode->crtc_hsync_start;
+
+		dsi_cfg->hsa = dpi_to_dsi_timing(tmp, bpp,
+						 DSI_HSA_FRAME_OVERHEAD);
+		dsi_htotal += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD;
+		dsi_hss_hsa_hse_hbp += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD;
+	}
+
+	dsi_cfg->hact = dpi_to_dsi_timing(mode_valid_check ?
+					  mode->hdisplay : mode->crtc_hdisplay,
+					  bpp, 0);
+	dsi_htotal += dsi_cfg->hact;
+
+	if (mode_valid_check)
+		dpi_hfp = mode->hsync_start - mode->hdisplay;
+	else
+		dpi_hfp = mode->crtc_hsync_start - mode->crtc_hdisplay;
+
+	dsi_cfg->hfp = dpi_to_dsi_timing(dpi_hfp, bpp, DSI_HFP_FRAME_OVERHEAD);
+	dsi_htotal += dsi_cfg->hfp + DSI_HFP_FRAME_OVERHEAD;
+
+	if (mode_valid_check)
+		ret = cdns_dsi_get_dphy_pll_cfg(dsi->dphy, dphy_cfg,
+						mode->htotal, bpp,
+						mode->clock * 1000,
+						dsi_htotal, nlanes,
+						&dsi_hfp_ext);
+	else
+		ret = cdns_dsi_get_dphy_pll_cfg(dsi->dphy, dphy_cfg,
+						mode->crtc_htotal, bpp,
+						mode->crtc_clock * 1000,
+						dsi_htotal, nlanes,
+						&dsi_hfp_ext);
+
+	if (ret)
+		return ret;
+
+	dsi_cfg->hfp += dsi_hfp_ext;
+	dsi_htotal += dsi_hfp_ext;
+	dsi_cfg->htotal = dsi_htotal;
+
+	/*
+	 * Make sure DPI(HFP) > DSI(HSS+HSA+HSE+HBP) to guarantee that the FIFO
+	 * is empty before we start a receiving a new line on the DPI
+	 * interface.
+	 */
+	if ((u64)dphy_cfg->lane_bps * dpi_hfp * nlanes <
+	    (u64)dsi_hss_hsa_hse_hbp *
+	    (mode_valid_check ? mode->clock : mode->crtc_clock) * 1000)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int cdns_dsi_bridge_attach(struct drm_bridge *bridge)
+{
+	struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge);
+	struct cdns_dsi *dsi = input_to_dsi(input);
+	struct cdns_dsi_output *output = &dsi->output;
+
+	if (!drm_core_check_feature(bridge->dev, DRIVER_ATOMIC)) {
+		dev_err(dsi->base.dev,
+			"cdns-dsi driver is only compatible with DRM devices supporting atomic updates");
+		return -ENOTSUPP;
+	}
+
+	return drm_bridge_attach(bridge->encoder, output->bridge, bridge);
+}
+
+static enum drm_mode_status
+cdns_dsi_bridge_mode_valid(struct drm_bridge *bridge,
+			   const struct drm_display_mode *mode)
+{
+	struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge);
+	struct cdns_dsi *dsi = input_to_dsi(input);
+	struct cdns_dsi_output *output = &dsi->output;
+	struct cdns_dphy_cfg dphy_cfg;
+	struct cdns_dsi_cfg dsi_cfg;
+	int bpp, nlanes, ret;
+
+	/*
+	 * VFP_DSI should be less than VFP_DPI and VFP_DSI should be at
+	 * least 1.
+	 */
+	if (mode->vtotal - mode->vsync_end < 2)
+		return MODE_V_ILLEGAL;
+
+	/* VSA_DSI = VSA_DPI and must be at least 2. */
+	if (mode->vsync_end - mode->vsync_start < 2)
+		return MODE_V_ILLEGAL;
+
+	/* HACT must be 32-bits aligned. */
+	bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format);
+	if ((mode->hdisplay * bpp) % 32)
+		return MODE_H_ILLEGAL;
+
+	nlanes = output->dev->lanes;
+
+	ret = cdns_dsi_mode2cfg(dsi, mode, &dsi_cfg, &dphy_cfg, true);
+	if (ret)
+		return MODE_CLOCK_RANGE;
+
+	return MODE_OK;
+}
+
+static void cdns_dsi_bridge_disable(struct drm_bridge *bridge)
+{
+	struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge);
+	struct cdns_dsi *dsi = input_to_dsi(input);
+	u32 val;
+
+	val = readl(dsi->regs + MCTL_MAIN_DATA_CTL);
+	val &= ~(IF_VID_SELECT_MASK | IF_VID_MODE | VID_EN | HOST_EOT_GEN |
+		 DISP_EOT_GEN);
+	writel(val, dsi->regs + MCTL_MAIN_DATA_CTL);
+
+	val = readl(dsi->regs + MCTL_MAIN_EN) & ~IF_EN(input->id);
+	writel(val, dsi->regs + MCTL_MAIN_EN);
+	pm_runtime_put(dsi->base.dev);
+}
+
+static void cdns_dsi_hs_init(struct cdns_dsi *dsi,
+			     const struct cdns_dphy_cfg *dphy_cfg)
+{
+	u32 status;
+
+	/*
+	 * Power all internal DPHY blocks down and maintain their reset line
+	 * asserted before changing the DPHY config.
+	 */
+	writel(DPHY_CMN_PSO | DPHY_PLL_PSO | DPHY_ALL_D_PDN | DPHY_C_PDN |
+	       DPHY_CMN_PDN | DPHY_PLL_PDN,
+	       dsi->regs + MCTL_DPHY_CFG0);
+
+	/*
+	 * Configure the internal PSM clk divider so that the DPHY has a
+	 * 1MHz clk (or something close).
+	 */
+	WARN_ON_ONCE(cdns_dphy_setup_psm(dsi->dphy));
+
+	/*
+	 * Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes
+	 * and 8 data lanes, each clk lane can be attache different set of
+	 * data lanes. The 2 groups are named 'left' and 'right', so here we
+	 * just say that we want the 'left' clk lane to drive the 'left' data
+	 * lanes.
+	 */
+	cdns_dphy_set_clk_lane_cfg(dsi->dphy, DPHY_CLK_CFG_LEFT_DRIVES_LEFT);
+
+	/*
+	 * Configure the DPHY PLL that will be used to generate the TX byte
+	 * clk.
+	 */
+	cdns_dphy_set_pll_cfg(dsi->dphy, dphy_cfg);
+
+	/* Start TX state machine. */
+	writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
+	       dsi->dphy->regs + DPHY_CMN_SSM);
+
+	/* Activate the PLL and wait until it's locked. */
+	writel(PLL_LOCKED, dsi->regs + MCTL_MAIN_STS_CLR);
+	writel(DPHY_CMN_PSO | DPHY_ALL_D_PDN | DPHY_C_PDN | DPHY_CMN_PDN,
+	       dsi->regs + MCTL_DPHY_CFG0);
+	WARN_ON_ONCE(readl_poll_timeout(dsi->regs + MCTL_MAIN_STS, status,
+					status & PLL_LOCKED, 100, 100));
+	/* De-assert data and clock reset lines. */
+	writel(DPHY_CMN_PSO | DPHY_ALL_D_PDN | DPHY_C_PDN | DPHY_CMN_PDN |
+	       DPHY_D_RSTB(dphy_cfg->nlanes) | DPHY_C_RSTB,
+	       dsi->regs + MCTL_DPHY_CFG0);
+}
+
+static void cdns_dsi_init_link(struct cdns_dsi *dsi)
+{
+	struct cdns_dsi_output *output = &dsi->output;
+	unsigned long sysclk_period, ulpout;
+	u32 val;
+	int i;
+
+	if (dsi->link_initialized)
+		return;
+
+	val = 0;
+	for (i = 1; i < output->dev->lanes; i++)
+		val |= DATA_LANE_EN(i);
+
+	if (!(output->dev->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS))
+		val |= CLK_CONTINUOUS;
+
+	writel(val, dsi->regs + MCTL_MAIN_PHY_CTL);
+
+	/* ULPOUT should be set to 1ms and is expressed in sysclk cycles. */
+	sysclk_period = NSEC_PER_SEC / clk_get_rate(dsi->dsi_sys_clk);
+	ulpout = DIV_ROUND_UP(NSEC_PER_MSEC, sysclk_period);
+	writel(CLK_LANE_ULPOUT_TIME(ulpout) | DATA_LANE_ULPOUT_TIME(ulpout),
+	       dsi->regs + MCTL_ULPOUT_TIME);
+
+	writel(LINK_EN, dsi->regs + MCTL_MAIN_DATA_CTL);
+
+	val = CLK_LANE_EN | PLL_START;
+	for (i = 0; i < output->dev->lanes; i++)
+		val |= DATA_LANE_START(i);
+
+	writel(val, dsi->regs + MCTL_MAIN_EN);
+
+	dsi->link_initialized = true;
+}
+
+static void cdns_dsi_bridge_enable(struct drm_bridge *bridge)
+{
+	struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge);
+	struct cdns_dsi *dsi = input_to_dsi(input);
+	struct cdns_dsi_output *output = &dsi->output;
+	struct drm_display_mode *mode;
+	struct cdns_dphy_cfg dphy_cfg;
+	unsigned long tx_byte_period;
+	struct cdns_dsi_cfg dsi_cfg;
+	u32 tmp, reg_wakeup, div;
+	int bpp, nlanes;
+
+	if (WARN_ON(pm_runtime_get_sync(dsi->base.dev) < 0))
+		return;
+
+	mode = &bridge->encoder->crtc->state->adjusted_mode;
+	bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format);
+	nlanes = output->dev->lanes;
+
+	WARN_ON_ONCE(cdns_dsi_mode2cfg(dsi, mode, &dsi_cfg, &dphy_cfg, false));
+
+	cdns_dsi_hs_init(dsi, &dphy_cfg);
+	cdns_dsi_init_link(dsi);
+
+	writel(HBP_LEN(dsi_cfg.hbp) | HSA_LEN(dsi_cfg.hsa),
+	       dsi->regs + VID_HSIZE1);
+	writel(HFP_LEN(dsi_cfg.hfp) | HACT_LEN(dsi_cfg.hact),
+	       dsi->regs + VID_HSIZE2);
+
+	writel(VBP_LEN(mode->crtc_vtotal - mode->crtc_vsync_end - 1) |
+	       VFP_LEN(mode->crtc_vsync_start - mode->crtc_vdisplay) |
+	       VSA_LEN(mode->crtc_vsync_end - mode->crtc_vsync_start + 1),
+	       dsi->regs + VID_VSIZE1);
+	writel(mode->crtc_vdisplay, dsi->regs + VID_VSIZE2);
+
+	tmp = dsi_cfg.htotal -
+	      (dsi_cfg.hsa + DSI_BLANKING_FRAME_OVERHEAD +
+	       DSI_HSA_FRAME_OVERHEAD);
+	writel(BLK_LINE_PULSE_PKT_LEN(tmp), dsi->regs + VID_BLKSIZE2);
+	if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE)
+		writel(MAX_LINE_LIMIT(tmp - DSI_NULL_FRAME_OVERHEAD),
+		       dsi->regs + VID_VCA_SETTING2);
+
+	tmp = dsi_cfg.htotal -
+	      (DSI_HSS_VSS_VSE_FRAME_OVERHEAD + DSI_BLANKING_FRAME_OVERHEAD);
+	writel(BLK_LINE_EVENT_PKT_LEN(tmp), dsi->regs + VID_BLKSIZE1);
+	if (!(output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE))
+		writel(MAX_LINE_LIMIT(tmp - DSI_NULL_FRAME_OVERHEAD),
+		       dsi->regs + VID_VCA_SETTING2);
+
+	tmp = DIV_ROUND_UP(dsi_cfg.htotal, nlanes) -
+	      DIV_ROUND_UP(dsi_cfg.hsa, nlanes);
+
+	if (!(output->dev->mode_flags & MIPI_DSI_MODE_EOT_PACKET))
+		tmp -= DIV_ROUND_UP(DSI_EOT_PKT_SIZE, nlanes);
+
+	tx_byte_period = DIV_ROUND_DOWN_ULL((u64)NSEC_PER_SEC * 8,
+					    dphy_cfg.lane_bps);
+	reg_wakeup = cdns_dphy_get_wakeup_time_ns(dsi->dphy) /
+		     tx_byte_period;
+	writel(REG_WAKEUP_TIME(reg_wakeup) | REG_LINE_DURATION(tmp),
+	       dsi->regs + VID_DPHY_TIME);
+
+	/*
+	 * HSTX and LPRX timeouts are both expressed in TX byte clk cycles and
+	 * both should be set to at least the time it takes to transmit a
+	 * frame.
+	 */
+	tmp = NSEC_PER_SEC / drm_mode_vrefresh(mode);
+	tmp /= tx_byte_period;
+
+	for (div = 0; div <= CLK_DIV_MAX; div++) {
+		if (tmp <= HSTX_TIMEOUT_MAX)
+			break;
+
+		tmp >>= 1;
+	}
+
+	if (tmp > HSTX_TIMEOUT_MAX)
+		tmp = HSTX_TIMEOUT_MAX;
+
+	writel(CLK_DIV(div) | HSTX_TIMEOUT(tmp),
+	       dsi->regs + MCTL_DPHY_TIMEOUT1);
+
+	writel(LPRX_TIMEOUT(tmp), dsi->regs + MCTL_DPHY_TIMEOUT2);
+
+	if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO) {
+		switch (output->dev->format) {
+		case MIPI_DSI_FMT_RGB888:
+			tmp = VID_PIXEL_MODE_RGB888 |
+			      VID_DATATYPE(MIPI_DSI_PACKED_PIXEL_STREAM_24);
+			break;
+
+		case MIPI_DSI_FMT_RGB666:
+			tmp = VID_PIXEL_MODE_RGB666 |
+			      VID_DATATYPE(MIPI_DSI_PIXEL_STREAM_3BYTE_18);
+			break;
+
+		case MIPI_DSI_FMT_RGB666_PACKED:
+			tmp = VID_PIXEL_MODE_RGB666_PACKED |
+			      VID_DATATYPE(MIPI_DSI_PACKED_PIXEL_STREAM_18);
+			break;
+
+		case MIPI_DSI_FMT_RGB565:
+			tmp = VID_PIXEL_MODE_RGB565 |
+			      VID_DATATYPE(MIPI_DSI_PACKED_PIXEL_STREAM_16);
+			break;
+
+		default:
+			dev_err(dsi->base.dev, "Unsupported DSI format\n");
+			return;
+		}
+
+		if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE)
+			tmp |= SYNC_PULSE_ACTIVE | SYNC_PULSE_HORIZONTAL;
+
+		tmp |= REG_BLKLINE_MODE(REG_BLK_MODE_BLANKING_PKT) |
+		       REG_BLKEOL_MODE(REG_BLK_MODE_BLANKING_PKT) |
+		       RECOVERY_MODE(RECOVERY_MODE_NEXT_HSYNC) |
+		       VID_IGNORE_MISS_VSYNC;
+
+		writel(tmp, dsi->regs + VID_MAIN_CTL);
+	}
+
+	tmp = readl(dsi->regs + MCTL_MAIN_DATA_CTL);
+	tmp &= ~(IF_VID_SELECT_MASK | HOST_EOT_GEN | IF_VID_MODE);
+
+	if (!(output->dev->mode_flags & MIPI_DSI_MODE_EOT_PACKET))
+		tmp |= HOST_EOT_GEN;
+
+	if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO)
+		tmp |= IF_VID_MODE | IF_VID_SELECT(input->id) | VID_EN;
+
+	writel(tmp, dsi->regs + MCTL_MAIN_DATA_CTL);
+
+	tmp = readl(dsi->regs + MCTL_MAIN_EN) | IF_EN(input->id);
+	writel(tmp, dsi->regs + MCTL_MAIN_EN);
+}
+
+static const struct drm_bridge_funcs cdns_dsi_bridge_funcs = {
+	.attach = cdns_dsi_bridge_attach,
+	.mode_valid = cdns_dsi_bridge_mode_valid,
+	.disable = cdns_dsi_bridge_disable,
+	.enable = cdns_dsi_bridge_enable,
+};
+
+static int cdns_dsi_attach(struct mipi_dsi_host *host,
+			   struct mipi_dsi_device *dev)
+{
+	struct cdns_dsi *dsi = to_cdns_dsi(host);
+	struct cdns_dsi_output *output = &dsi->output;
+	struct cdns_dsi_input *input = &dsi->input;
+	struct drm_bridge *bridge;
+	struct drm_panel *panel;
+	struct device_node *np;
+	int ret;
+
+	/*
+	 * We currently do not support connecting several DSI devices to the
+	 * same host. In order to support that we'd need the DRM bridge
+	 * framework to allow dynamic reconfiguration of the bridge chain.
+	 */
+	if (output->dev)
+		return -EBUSY;
+
+	/* We do not support burst mode yet. */
+	if (dev->mode_flags & MIPI_DSI_MODE_VIDEO_BURST)
+		return -ENOTSUPP;
+
+	/*
+	 * The host <-> device link might be described using an OF-graph
+	 * representation, in this case we extract the device of_node from
+	 * this representation, otherwise we use dsidev->dev.of_node which
+	 * should have been filled by the core.
+	 */
+	np = of_graph_get_remote_node(dsi->base.dev->of_node, DSI_OUTPUT_PORT,
+				      dev->channel);
+	if (!np)
+		np = of_node_get(dev->dev.of_node);
+
+	panel = of_drm_find_panel(np);
+	if (panel) {
+		bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_DSI);
+	} else {
+		bridge = of_drm_find_bridge(dev->dev.of_node);
+		if (!bridge)
+			bridge = ERR_PTR(-EINVAL);
+	}
+
+	of_node_put(np);
+
+	if (IS_ERR(bridge)) {
+		ret = PTR_ERR(bridge);
+		dev_err(host->dev, "failed to add DSI device %s (err = %d)",
+			dev->name, ret);
+		return ret;
+	}
+
+	output->dev = dev;
+	output->bridge = bridge;
+	output->panel = panel;
+
+	/*
+	 * The DSI output has been properly configured, we can now safely
+	 * register the input to the bridge framework so that it can take place
+	 * in a display pipeline.
+	 */
+	drm_bridge_add(&input->bridge);
+
+	return 0;
+}
+
+static int cdns_dsi_detach(struct mipi_dsi_host *host,
+			   struct mipi_dsi_device *dev)
+{
+	struct cdns_dsi *dsi = to_cdns_dsi(host);
+	struct cdns_dsi_output *output = &dsi->output;
+	struct cdns_dsi_input *input = &dsi->input;
+
+	drm_bridge_remove(&input->bridge);
+	if (output->panel)
+		drm_panel_bridge_remove(output->bridge);
+
+	return 0;
+}
+
+static irqreturn_t cdns_dsi_interrupt(int irq, void *data)
+{
+	struct cdns_dsi *dsi = data;
+	irqreturn_t ret = IRQ_NONE;
+	u32 flag, ctl;
+
+	flag = readl(dsi->regs + DIRECT_CMD_STS_FLAG);
+	if (flag) {
+		ctl = readl(dsi->regs + DIRECT_CMD_STS_CTL);
+		ctl &= ~flag;
+		writel(ctl, dsi->regs + DIRECT_CMD_STS_CTL);
+		complete(&dsi->direct_cmd_comp);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static ssize_t cdns_dsi_transfer(struct mipi_dsi_host *host,
+				 const struct mipi_dsi_msg *msg)
+{
+	struct cdns_dsi *dsi = to_cdns_dsi(host);
+	u32 cmd, sts, val, wait = WRITE_COMPLETED, ctl = 0;
+	struct mipi_dsi_packet packet;
+	int ret, i, tx_len, rx_len;
+
+	ret = pm_runtime_get_sync(host->dev);
+	if (ret < 0)
+		return ret;
+
+	cdns_dsi_init_link(dsi);
+
+	ret = mipi_dsi_create_packet(&packet, msg);
+	if (ret)
+		goto out;
+
+	tx_len = msg->tx_buf ? msg->tx_len : 0;
+	rx_len = msg->rx_buf ? msg->rx_len : 0;
+
+	/* For read operations, the maximum TX len is 2. */
+	if (rx_len && tx_len > 2) {
+		ret = -ENOTSUPP;
+		goto out;
+	}
+
+	/* TX len is limited by the CMD FIFO depth. */
+	if (tx_len > dsi->direct_cmd_fifo_depth) {
+		ret = -ENOTSUPP;
+		goto out;
+	}
+
+	/* RX len is limited by the RX FIFO depth. */
+	if (rx_len > dsi->rx_fifo_depth) {
+		ret = -ENOTSUPP;
+		goto out;
+	}
+
+	cmd = CMD_SIZE(tx_len) | CMD_VCHAN_ID(msg->channel) |
+	      CMD_DATATYPE(msg->type);
+
+	if (msg->flags & MIPI_DSI_MSG_USE_LPM)
+		cmd |= CMD_LP_EN;
+
+	if (mipi_dsi_packet_format_is_long(msg->type))
+		cmd |= CMD_LONG;
+
+	if (rx_len) {
+		cmd |= READ_CMD;
+		wait = READ_COMPLETED_WITH_ERR | READ_COMPLETED;
+		ctl = READ_EN | BTA_EN;
+	} else if (msg->flags & MIPI_DSI_MSG_REQ_ACK) {
+		cmd |= BTA_REQ;
+		wait = ACK_WITH_ERR_RCVD | ACK_RCVD;
+		ctl = BTA_EN;
+	}
+
+	writel(readl(dsi->regs + MCTL_MAIN_DATA_CTL) | ctl,
+	       dsi->regs + MCTL_MAIN_DATA_CTL);
+
+	writel(cmd, dsi->regs + DIRECT_CMD_MAIN_SETTINGS);
+
+	for (i = 0; i < tx_len; i += 4) {
+		const u8 *buf = msg->tx_buf;
+		int j;
+
+		val = 0;
+		for (j = 0; j < 4 && j + i < tx_len; j++)
+			val |= (u32)buf[i + j] << (8 * j);
+
+		writel(val, dsi->regs + DIRECT_CMD_WRDATA);
+	}
+
+	/* Clear status flags before sending the command. */
+	writel(wait, dsi->regs + DIRECT_CMD_STS_CLR);
+	writel(wait, dsi->regs + DIRECT_CMD_STS_CTL);
+	reinit_completion(&dsi->direct_cmd_comp);
+	writel(0, dsi->regs + DIRECT_CMD_SEND);
+
+	wait_for_completion_timeout(&dsi->direct_cmd_comp,
+				    msecs_to_jiffies(1000));
+
+	sts = readl(dsi->regs + DIRECT_CMD_STS);
+	writel(wait, dsi->regs + DIRECT_CMD_STS_CLR);
+	writel(0, dsi->regs + DIRECT_CMD_STS_CTL);
+
+	writel(readl(dsi->regs + MCTL_MAIN_DATA_CTL) & ~ctl,
+	       dsi->regs + MCTL_MAIN_DATA_CTL);
+
+	/* We did not receive the events we were waiting for. */
+	if (!(sts & wait)) {
+		ret = -ETIMEDOUT;
+		goto out;
+	}
+
+	/* 'READ' or 'WRITE with ACK' failed. */
+	if (sts & (READ_COMPLETED_WITH_ERR | ACK_WITH_ERR_RCVD)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	for (i = 0; i < rx_len; i += 4) {
+		u8 *buf = msg->rx_buf;
+		int j;
+
+		val = readl(dsi->regs + DIRECT_CMD_RDDATA);
+		for (j = 0; j < 4 && j + i < rx_len; j++)
+			buf[i + j] = val >> (8 * j);
+	}
+
+out:
+	pm_runtime_put(host->dev);
+	return ret;
+}
+
+static const struct mipi_dsi_host_ops cdns_dsi_ops = {
+	.attach = cdns_dsi_attach,
+	.detach = cdns_dsi_detach,
+	.transfer = cdns_dsi_transfer,
+};
+
+static int cdns_dsi_resume(struct device *dev)
+{
+	struct cdns_dsi *dsi = dev_get_drvdata(dev);
+
+	reset_control_deassert(dsi->dsi_p_rst);
+	clk_prepare_enable(dsi->dsi_p_clk);
+	clk_prepare_enable(dsi->dsi_sys_clk);
+	clk_prepare_enable(dsi->dphy->psm_clk);
+	clk_prepare_enable(dsi->dphy->pll_ref_clk);
+
+	return 0;
+}
+
+static int cdns_dsi_suspend(struct device *dev)
+{
+	struct cdns_dsi *dsi = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(dsi->dphy->pll_ref_clk);
+	clk_disable_unprepare(dsi->dphy->psm_clk);
+	clk_disable_unprepare(dsi->dsi_sys_clk);
+	clk_disable_unprepare(dsi->dsi_p_clk);
+	reset_control_assert(dsi->dsi_p_rst);
+	dsi->link_initialized = false;
+	return 0;
+}
+
+static UNIVERSAL_DEV_PM_OPS(cdns_dsi_pm_ops, cdns_dsi_suspend, cdns_dsi_resume,
+			    NULL);
+
+static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy)
+{
+	/* Default wakeup time is 800 ns (in a simulated environment). */
+	return 800;
+}
+
+static void cdns_dphy_ref_set_pll_cfg(struct cdns_dphy *dphy,
+				      const struct cdns_dphy_cfg *cfg)
+{
+	u32 fbdiv_low, fbdiv_high;
+
+	fbdiv_low = (cfg->pll_fbdiv / 4) - 2;
+	fbdiv_high = cfg->pll_fbdiv - fbdiv_low - 2;
+
+	writel(DPHY_CMN_IPDIV_FROM_REG | DPHY_CMN_OPDIV_FROM_REG |
+	       DPHY_CMN_IPDIV(cfg->pll_ipdiv) |
+	       DPHY_CMN_OPDIV(cfg->pll_opdiv),
+	       dphy->regs + DPHY_CMN_OPIPDIV);
+	writel(DPHY_CMN_FBDIV_FROM_REG |
+	       DPHY_CMN_FBDIV_VAL(fbdiv_low, fbdiv_high),
+	       dphy->regs + DPHY_CMN_FBDIV);
+	writel(DPHY_CMN_PWM_HIGH(6) | DPHY_CMN_PWM_LOW(0x101) |
+	       DPHY_CMN_PWM_DIV(0x8),
+	       dphy->regs + DPHY_CMN_PWM);
+}
+
+static void cdns_dphy_ref_set_psm_div(struct cdns_dphy *dphy, u8 div)
+{
+	writel(DPHY_PSM_CFG_FROM_REG | DPHY_PSM_CLK_DIV(div),
+	       dphy->regs + DPHY_PSM_CFG);
+}
+
+/*
+ * This is the reference implementation of DPHY hooks. Specific integration of
+ * this IP may have to re-implement some of them depending on how they decided
+ * to wire things in the SoC.
+ */
+static const struct cdns_dphy_ops ref_dphy_ops = {
+	.get_wakeup_time_ns = cdns_dphy_ref_get_wakeup_time_ns,
+	.set_pll_cfg = cdns_dphy_ref_set_pll_cfg,
+	.set_psm_div = cdns_dphy_ref_set_psm_div,
+};
+
+static const struct of_device_id cdns_dphy_of_match[] = {
+	{ .compatible = "cdns,dphy", .data = &ref_dphy_ops },
+	{ /* sentinel */ },
+};
+
+static struct cdns_dphy *cdns_dphy_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct cdns_dphy *dphy;
+	struct of_phandle_args args;
+	struct resource res;
+	int ret;
+
+	ret = of_parse_phandle_with_args(pdev->dev.of_node, "phys",
+					 "#phy-cells", 0, &args);
+	if (ret)
+		return ERR_PTR(-ENOENT);
+
+	match = of_match_node(cdns_dphy_of_match, args.np);
+	if (!match || !match->data)
+		return ERR_PTR(-EINVAL);
+
+	dphy = devm_kzalloc(&pdev->dev, sizeof(*dphy), GFP_KERNEL);
+	if (!dphy)
+		return ERR_PTR(-ENOMEM);
+
+	dphy->ops = match->data;
+
+	ret = of_address_to_resource(args.np, 0, &res);
+	if (ret)
+		return ERR_PTR(ret);
+
+	dphy->regs = devm_ioremap_resource(&pdev->dev, &res);
+	if (IS_ERR(dphy->regs))
+		return ERR_CAST(dphy->regs);
+
+	dphy->psm_clk = of_clk_get_by_name(args.np, "psm");
+	if (IS_ERR(dphy->psm_clk))
+		return ERR_CAST(dphy->psm_clk);
+
+	dphy->pll_ref_clk = of_clk_get_by_name(args.np, "pll_ref");
+	if (IS_ERR(dphy->pll_ref_clk)) {
+		ret = PTR_ERR(dphy->pll_ref_clk);
+		goto err_put_psm_clk;
+	}
+
+	if (dphy->ops->probe) {
+		ret = dphy->ops->probe(dphy);
+		if (ret)
+			goto err_put_pll_ref_clk;
+	}
+
+	return dphy;
+
+err_put_pll_ref_clk:
+	clk_put(dphy->pll_ref_clk);
+
+err_put_psm_clk:
+	clk_put(dphy->psm_clk);
+
+	return ERR_PTR(ret);
+}
+
+static void cdns_dphy_remove(struct cdns_dphy *dphy)
+{
+	if (dphy->ops->remove)
+		dphy->ops->remove(dphy);
+
+	clk_put(dphy->pll_ref_clk);
+	clk_put(dphy->psm_clk);
+}
+
+static int cdns_dsi_drm_probe(struct platform_device *pdev)
+{
+	struct cdns_dsi *dsi;
+	struct cdns_dsi_input *input;
+	struct resource *res;
+	int ret, irq;
+	u32 val;
+
+	dsi = devm_kzalloc(&pdev->dev, sizeof(*dsi), GFP_KERNEL);
+	if (!dsi)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, dsi);
+
+	input = &dsi->input;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dsi->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(dsi->regs))
+		return PTR_ERR(dsi->regs);
+
+	dsi->dsi_p_clk = devm_clk_get(&pdev->dev, "dsi_p_clk");
+	if (IS_ERR(dsi->dsi_p_clk))
+		return PTR_ERR(dsi->dsi_p_clk);
+
+	dsi->dsi_p_rst = devm_reset_control_get_optional_exclusive(&pdev->dev,
+								"dsi_p_rst");
+	if (IS_ERR(dsi->dsi_p_rst))
+		return PTR_ERR(dsi->dsi_p_rst);
+
+	dsi->dsi_sys_clk = devm_clk_get(&pdev->dev, "dsi_sys_clk");
+	if (IS_ERR(dsi->dsi_sys_clk))
+		return PTR_ERR(dsi->dsi_sys_clk);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	dsi->dphy = cdns_dphy_probe(pdev);
+	if (IS_ERR(dsi->dphy))
+		return PTR_ERR(dsi->dphy);
+
+	ret = clk_prepare_enable(dsi->dsi_p_clk);
+	if (ret)
+		goto err_remove_dphy;
+
+	val = readl(dsi->regs + ID_REG);
+	if (REV_VENDOR_ID(val) != 0xcad) {
+		dev_err(&pdev->dev, "invalid vendor id\n");
+		ret = -EINVAL;
+		goto err_disable_pclk;
+	}
+
+	val = readl(dsi->regs + IP_CONF);
+	dsi->direct_cmd_fifo_depth = 1 << (DIRCMD_FIFO_DEPTH(val) + 2);
+	dsi->rx_fifo_depth = RX_FIFO_DEPTH(val);
+	init_completion(&dsi->direct_cmd_comp);
+
+	writel(0, dsi->regs + MCTL_MAIN_DATA_CTL);
+	writel(0, dsi->regs + MCTL_MAIN_EN);
+	writel(0, dsi->regs + MCTL_MAIN_PHY_CTL);
+
+	/*
+	 * We only support the DPI input, so force input->id to
+	 * CDNS_DPI_INPUT.
+	 */
+	input->id = CDNS_DPI_INPUT;
+	input->bridge.funcs = &cdns_dsi_bridge_funcs;
+	input->bridge.of_node = pdev->dev.of_node;
+
+	/* Mask all interrupts before registering the IRQ handler. */
+	writel(0, dsi->regs + MCTL_MAIN_STS_CTL);
+	writel(0, dsi->regs + MCTL_DPHY_ERR_CTL1);
+	writel(0, dsi->regs + CMD_MODE_STS_CTL);
+	writel(0, dsi->regs + DIRECT_CMD_STS_CTL);
+	writel(0, dsi->regs + DIRECT_CMD_RD_STS_CTL);
+	writel(0, dsi->regs + VID_MODE_STS_CTL);
+	writel(0, dsi->regs + TVG_STS_CTL);
+	writel(0, dsi->regs + DPI_IRQ_EN);
+	ret = devm_request_irq(&pdev->dev, irq, cdns_dsi_interrupt, 0,
+			       dev_name(&pdev->dev), dsi);
+	if (ret)
+		goto err_disable_pclk;
+
+	pm_runtime_enable(&pdev->dev);
+	dsi->base.dev = &pdev->dev;
+	dsi->base.ops = &cdns_dsi_ops;
+
+	ret = mipi_dsi_host_register(&dsi->base);
+	if (ret)
+		goto err_disable_runtime_pm;
+
+	clk_disable_unprepare(dsi->dsi_p_clk);
+
+	return 0;
+
+err_disable_runtime_pm:
+	pm_runtime_disable(&pdev->dev);
+
+err_disable_pclk:
+	clk_disable_unprepare(dsi->dsi_p_clk);
+
+err_remove_dphy:
+	cdns_dphy_remove(dsi->dphy);
+
+	return ret;
+}
+
+static int cdns_dsi_drm_remove(struct platform_device *pdev)
+{
+	struct cdns_dsi *dsi = platform_get_drvdata(pdev);
+
+	mipi_dsi_host_unregister(&dsi->base);
+	pm_runtime_disable(&pdev->dev);
+	cdns_dphy_remove(dsi->dphy);
+
+	return 0;
+}
+
+static const struct of_device_id cdns_dsi_of_match[] = {
+	{ .compatible = "cdns,dsi" },
+	{ },
+};
+
+static struct platform_driver cdns_dsi_platform_driver = {
+	.probe  = cdns_dsi_drm_probe,
+	.remove = cdns_dsi_drm_remove,
+	.driver = {
+		.name   = "cdns-dsi",
+		.of_match_table = cdns_dsi_of_match,
+		.pm = &cdns_dsi_pm_ops,
+	},
+};
+module_platform_driver(cdns_dsi_platform_driver);
+
+MODULE_AUTHOR("Boris Brezillon <boris.brezillon@bootlin.com>");
+MODULE_DESCRIPTION("Cadence DSI driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:cdns-dsi");
+
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
index 3b7e5c5..8f9c8a6 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
@@ -152,7 +152,6 @@
 	.remove	= snd_dw_hdmi_remove,
 	.driver	= {
 		.name = DRIVER_NAME,
-		.owner = THIS_MODULE,
 	},
 };
 module_platform_driver(snd_dw_hdmi_driver);
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c
index 226171a..fd79996 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd
  * Copyright (C) STMicroelectronics SA 2017
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
  * Modified by Philippe Cornu <philippe.cornu@st.com>
  * This generic Synopsys DesignWare MIPI DSI host driver is based on the
  * Rockchip version from rockchip/dw-mipi-dsi.c with phy & bridge APIs.
@@ -775,20 +771,20 @@
 
 	clk_prepare_enable(dsi->pclk);
 
-	ret = phy_ops->get_lane_mbps(priv_data, mode, dsi->mode_flags,
+	ret = phy_ops->get_lane_mbps(priv_data, adjusted_mode, dsi->mode_flags,
 				     dsi->lanes, dsi->format, &dsi->lane_mbps);
 	if (ret)
 		DRM_DEBUG_DRIVER("Phy get_lane_mbps() failed\n");
 
 	pm_runtime_get_sync(dsi->dev);
 	dw_mipi_dsi_init(dsi);
-	dw_mipi_dsi_dpi_config(dsi, mode);
+	dw_mipi_dsi_dpi_config(dsi, adjusted_mode);
 	dw_mipi_dsi_packet_handler_config(dsi);
 	dw_mipi_dsi_video_mode_config(dsi);
-	dw_mipi_dsi_video_packet_config(dsi, mode);
+	dw_mipi_dsi_video_packet_config(dsi, adjusted_mode);
 	dw_mipi_dsi_command_mode_config(dsi);
-	dw_mipi_dsi_line_timer_config(dsi, mode);
-	dw_mipi_dsi_vertical_timing_config(dsi, mode);
+	dw_mipi_dsi_line_timer_config(dsi, adjusted_mode);
+	dw_mipi_dsi_vertical_timing_config(dsi, adjusted_mode);
 
 	dw_mipi_dsi_dphy_init(dsi);
 	dw_mipi_dsi_dphy_timing_config(dsi);
@@ -802,7 +798,7 @@
 
 	dw_mipi_dsi_dphy_enable(dsi);
 
-	dw_mipi_dsi_wait_for_two_frames(mode);
+	dw_mipi_dsi_wait_for_two_frames(adjusted_mode);
 
 	/* Switch to cmd mode for panel-bridge pre_enable & panel prepare */
 	dw_mipi_dsi_set_mode(dsi, 0);
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index 08ab7d6a..0fd9cf2 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -1102,7 +1102,7 @@
 	return true;
 }
 
-static int tc_connector_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status tc_connector_mode_valid(struct drm_connector *connector,
 				   struct drm_display_mode *mode)
 {
 	/* DPI interface clock limitation: upto 154 MHz */
diff --git a/drivers/gpu/drm/bridge/thc63lvd1024.c b/drivers/gpu/drm/bridge/thc63lvd1024.c
new file mode 100644
index 0000000..c8b9edd
--- /dev/null
+++ b/drivers/gpu/drm/bridge/thc63lvd1024.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * THC63LVD1024 LVDS to parallel data DRM bridge driver.
+ *
+ * Copyright (C) 2018 Jacopo Mondi <jacopo+renesas@jmondi.org>
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_panel.h>
+
+#include <linux/gpio/consumer.h>
+#include <linux/of_graph.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+
+enum thc63_ports {
+	THC63_LVDS_IN0,
+	THC63_LVDS_IN1,
+	THC63_RGB_OUT0,
+	THC63_RGB_OUT1,
+};
+
+struct thc63_dev {
+	struct device *dev;
+
+	struct regulator *vcc;
+
+	struct gpio_desc *pdwn;
+	struct gpio_desc *oe;
+
+	struct drm_bridge bridge;
+	struct drm_bridge *next;
+};
+
+static inline struct thc63_dev *to_thc63(struct drm_bridge *bridge)
+{
+	return container_of(bridge, struct thc63_dev, bridge);
+}
+
+static int thc63_attach(struct drm_bridge *bridge)
+{
+	struct thc63_dev *thc63 = to_thc63(bridge);
+
+	return drm_bridge_attach(bridge->encoder, thc63->next, bridge);
+}
+
+static void thc63_enable(struct drm_bridge *bridge)
+{
+	struct thc63_dev *thc63 = to_thc63(bridge);
+	int ret;
+
+	ret = regulator_enable(thc63->vcc);
+	if (ret) {
+		dev_err(thc63->dev,
+			"Failed to enable regulator \"vcc\": %d\n", ret);
+		return;
+	}
+
+	gpiod_set_value(thc63->pdwn, 0);
+	gpiod_set_value(thc63->oe, 1);
+}
+
+static void thc63_disable(struct drm_bridge *bridge)
+{
+	struct thc63_dev *thc63 = to_thc63(bridge);
+	int ret;
+
+	gpiod_set_value(thc63->oe, 0);
+	gpiod_set_value(thc63->pdwn, 1);
+
+	ret = regulator_disable(thc63->vcc);
+	if (ret)
+		dev_err(thc63->dev,
+			"Failed to disable regulator \"vcc\": %d\n", ret);
+}
+
+static const struct drm_bridge_funcs thc63_bridge_func = {
+	.attach	= thc63_attach,
+	.enable = thc63_enable,
+	.disable = thc63_disable,
+};
+
+static int thc63_parse_dt(struct thc63_dev *thc63)
+{
+	struct device_node *thc63_out;
+	struct device_node *remote;
+
+	thc63_out = of_graph_get_endpoint_by_regs(thc63->dev->of_node,
+						  THC63_RGB_OUT0, -1);
+	if (!thc63_out) {
+		dev_err(thc63->dev, "Missing endpoint in port@%u\n",
+			THC63_RGB_OUT0);
+		return -ENODEV;
+	}
+
+	remote = of_graph_get_remote_port_parent(thc63_out);
+	of_node_put(thc63_out);
+	if (!remote) {
+		dev_err(thc63->dev, "Endpoint in port@%u unconnected\n",
+			THC63_RGB_OUT0);
+		return -ENODEV;
+	}
+
+	if (!of_device_is_available(remote)) {
+		dev_err(thc63->dev, "port@%u remote endpoint is disabled\n",
+			THC63_RGB_OUT0);
+		of_node_put(remote);
+		return -ENODEV;
+	}
+
+	thc63->next = of_drm_find_bridge(remote);
+	of_node_put(remote);
+	if (!thc63->next)
+		return -EPROBE_DEFER;
+
+	return 0;
+}
+
+static int thc63_gpio_init(struct thc63_dev *thc63)
+{
+	thc63->oe = devm_gpiod_get_optional(thc63->dev, "oe", GPIOD_OUT_LOW);
+	if (IS_ERR(thc63->oe)) {
+		dev_err(thc63->dev, "Unable to get \"oe-gpios\": %ld\n",
+			PTR_ERR(thc63->oe));
+		return PTR_ERR(thc63->oe);
+	}
+
+	thc63->pdwn = devm_gpiod_get_optional(thc63->dev, "powerdown",
+					      GPIOD_OUT_HIGH);
+	if (IS_ERR(thc63->pdwn)) {
+		dev_err(thc63->dev, "Unable to get \"powerdown-gpios\": %ld\n",
+			PTR_ERR(thc63->pdwn));
+		return PTR_ERR(thc63->pdwn);
+	}
+
+	return 0;
+}
+
+static int thc63_probe(struct platform_device *pdev)
+{
+	struct thc63_dev *thc63;
+	int ret;
+
+	thc63 = devm_kzalloc(&pdev->dev, sizeof(*thc63), GFP_KERNEL);
+	if (!thc63)
+		return -ENOMEM;
+
+	thc63->dev = &pdev->dev;
+	platform_set_drvdata(pdev, thc63);
+
+	thc63->vcc = devm_regulator_get_optional(thc63->dev, "vcc");
+	if (IS_ERR(thc63->vcc)) {
+		if (PTR_ERR(thc63->vcc) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		dev_err(thc63->dev, "Unable to get \"vcc\" supply: %ld\n",
+			PTR_ERR(thc63->vcc));
+		return PTR_ERR(thc63->vcc);
+	}
+
+	ret = thc63_gpio_init(thc63);
+	if (ret)
+		return ret;
+
+	ret = thc63_parse_dt(thc63);
+	if (ret)
+		return ret;
+
+	thc63->bridge.driver_private = thc63;
+	thc63->bridge.of_node = pdev->dev.of_node;
+	thc63->bridge.funcs = &thc63_bridge_func;
+
+	drm_bridge_add(&thc63->bridge);
+
+	return 0;
+}
+
+static int thc63_remove(struct platform_device *pdev)
+{
+	struct thc63_dev *thc63 = platform_get_drvdata(pdev);
+
+	drm_bridge_remove(&thc63->bridge);
+
+	return 0;
+}
+
+static const struct of_device_id thc63_match[] = {
+	{ .compatible = "thine,thc63lvd1024", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, thc63_match);
+
+static struct platform_driver thc63_driver = {
+	.probe	= thc63_probe,
+	.remove	= thc63_remove,
+	.driver	= {
+		.name		= "thc63lvd1024",
+		.of_match_table	= thc63_match,
+	},
+};
+module_platform_driver(thc63_driver);
+
+MODULE_AUTHOR("Jacopo Mondi <jacopo@jmondi.org>");
+MODULE_DESCRIPTION("Thine THC63LVD1024 LVDS decoder DRM bridge driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index c825c76..895741e 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -791,6 +791,8 @@
 		state->src_w = val;
 	} else if (property == config->prop_src_h) {
 		state->src_h = val;
+	} else if (property == plane->alpha_property) {
+		state->alpha = val;
 	} else if (property == plane->rotation_property) {
 		if (!is_power_of_2(val & DRM_MODE_ROTATE_MASK))
 			return -EINVAL;
@@ -856,6 +858,8 @@
 		*val = state->src_w;
 	} else if (property == config->prop_src_h) {
 		*val = state->src_h;
+	} else if (property == plane->alpha_property) {
+		*val = state->alpha;
 	} else if (property == plane->rotation_property) {
 		*val = state->rotation;
 	} else if (property == plane->zpos_property) {
@@ -1429,7 +1433,9 @@
 {
 	struct drm_plane *plane = plane_state->plane;
 	struct drm_crtc_state *crtc_state;
-
+	/* Nothing to do for same crtc*/
+	if (plane_state->crtc == crtc)
+		return 0;
 	if (plane_state->crtc) {
 		crtc_state = drm_atomic_get_crtc_state(plane_state->state,
 						       plane_state->crtc);
@@ -1500,6 +1506,14 @@
  * Otherwise, if &drm_plane_state.fence is not set this function we just set it
  * with the received implicit fence. In both cases this function consumes a
  * reference for @fence.
+ *
+ * This way explicit fencing can be used to overrule implicit fencing, which is
+ * important to make explicit fencing use-cases work: One example is using one
+ * buffer for 2 screens with different refresh rates. Implicit fencing will
+ * clamp rendering to the refresh rate of the slower screen, whereas explicit
+ * fence allows 2 independent render and display loops on a single buffer. If a
+ * driver allows obeys both implicit and explicit fences for plane updates, then
+ * it will break all the benefits of explicit fencing.
  */
 void
 drm_atomic_set_fence_for_plane(struct drm_plane_state *plane_state,
@@ -1710,11 +1724,15 @@
 		}
 	}
 
-	if (config->funcs->atomic_check)
+	if (config->funcs->atomic_check) {
 		ret = config->funcs->atomic_check(state->dev, state);
 
-	if (ret)
-		return ret;
+		if (ret) {
+			DRM_DEBUG_ATOMIC("atomic driver check for %p failed: %d\n",
+					 state, ret);
+			return ret;
+		}
+	}
 
 	if (!state->allow_modeset) {
 		for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index c356545..130da51 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -766,7 +766,7 @@
 	if (crtc_state->enable)
 		drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2);
 
-	plane_state->visible = drm_rect_clip_scaled(src, dst, &clip, hscale, vscale);
+	plane_state->visible = drm_rect_clip_scaled(src, dst, &clip);
 
 	drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation);
 
@@ -875,6 +875,11 @@
  * functions depend upon an updated adjusted_mode.clock to e.g. properly compute
  * watermarks.
  *
+ * Note that zpos normalization will add all enable planes to the state which
+ * might not desired for some drivers.
+ * For example enable/disable of a cursor plane which have fixed zpos value
+ * would trigger all other enabled planes to be forced to the state change.
+ *
  * RETURNS:
  * Zero for success or -errno
  */
@@ -887,6 +892,12 @@
 	if (ret)
 		return ret;
 
+	if (dev->mode_config.normalize_zpos) {
+		ret = drm_atomic_normalize_zpos(dev, state);
+		if (ret)
+			return ret;
+	}
+
 	ret = drm_atomic_helper_check_planes(dev, state);
 	if (ret)
 		return ret;
@@ -1561,6 +1572,17 @@
 	for_each_new_plane_in_state(state, plane, plane_state, i) {
 		funcs = plane->helper_private;
 		funcs->atomic_async_update(plane, plane_state);
+
+		/*
+		 * ->atomic_async_update() is supposed to update the
+		 * plane->state in-place, make sure at least common
+		 * properties have been properly updated.
+		 */
+		WARN_ON_ONCE(plane->state->fb != plane_state->fb);
+		WARN_ON_ONCE(plane->state->crtc_x != plane_state->crtc_x);
+		WARN_ON_ONCE(plane->state->crtc_y != plane_state->crtc_y);
+		WARN_ON_ONCE(plane->state->src_x != plane_state->src_x);
+		WARN_ON_ONCE(plane->state->src_y != plane_state->src_y);
 	}
 }
 EXPORT_SYMBOL(drm_atomic_helper_async_commit);
@@ -2659,7 +2681,7 @@
 		goto fail;
 	}
 
-	if (plane_state->crtc && (plane == plane->crtc->cursor))
+	if (plane_state->crtc && plane_state->crtc->cursor == plane)
 		plane_state->state->legacy_cursor_update = true;
 
 	ret = __drm_atomic_helper_disable_plane(plane, plane_state);
@@ -2881,31 +2903,9 @@
 	return 0;
 }
 
-/**
- * drm_atomic_helper_disable_all - disable all currently active outputs
- * @dev: DRM device
- * @ctx: lock acquisition context
- *
- * Loops through all connectors, finding those that aren't turned off and then
- * turns them off by setting their DPMS mode to OFF and deactivating the CRTC
- * that they are connected to.
- *
- * This is used for example in suspend/resume to disable all currently active
- * functions when suspending. If you just want to shut down everything at e.g.
- * driver unload, look at drm_atomic_helper_shutdown().
- *
- * Note that if callers haven't already acquired all modeset locks this might
- * return -EDEADLK, which must be handled by calling drm_modeset_backoff().
- *
- * Returns:
- * 0 on success or a negative error code on failure.
- *
- * See also:
- * drm_atomic_helper_suspend(), drm_atomic_helper_resume() and
- * drm_atomic_helper_shutdown().
- */
-int drm_atomic_helper_disable_all(struct drm_device *dev,
-				  struct drm_modeset_acquire_ctx *ctx)
+static int __drm_atomic_helper_disable_all(struct drm_device *dev,
+					   struct drm_modeset_acquire_ctx *ctx,
+					   bool clean_old_fbs)
 {
 	struct drm_atomic_state *state;
 	struct drm_connector_state *conn_state;
@@ -2957,8 +2957,11 @@
 			goto free;
 
 		drm_atomic_set_fb_for_plane(plane_state, NULL);
-		plane_mask |= BIT(drm_plane_index(plane));
-		plane->old_fb = plane->fb;
+
+		if (clean_old_fbs) {
+			plane->old_fb = plane->fb;
+			plane_mask |= BIT(drm_plane_index(plane));
+		}
 	}
 
 	ret = drm_atomic_commit(state);
@@ -2969,6 +2972,34 @@
 	return ret;
 }
 
+/**
+ * drm_atomic_helper_disable_all - disable all currently active outputs
+ * @dev: DRM device
+ * @ctx: lock acquisition context
+ *
+ * Loops through all connectors, finding those that aren't turned off and then
+ * turns them off by setting their DPMS mode to OFF and deactivating the CRTC
+ * that they are connected to.
+ *
+ * This is used for example in suspend/resume to disable all currently active
+ * functions when suspending. If you just want to shut down everything at e.g.
+ * driver unload, look at drm_atomic_helper_shutdown().
+ *
+ * Note that if callers haven't already acquired all modeset locks this might
+ * return -EDEADLK, which must be handled by calling drm_modeset_backoff().
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ *
+ * See also:
+ * drm_atomic_helper_suspend(), drm_atomic_helper_resume() and
+ * drm_atomic_helper_shutdown().
+ */
+int drm_atomic_helper_disable_all(struct drm_device *dev,
+				  struct drm_modeset_acquire_ctx *ctx)
+{
+	return __drm_atomic_helper_disable_all(dev, ctx, false);
+}
 EXPORT_SYMBOL(drm_atomic_helper_disable_all);
 
 /**
@@ -2991,7 +3022,7 @@
 	while (1) {
 		ret = drm_modeset_lock_all_ctx(dev, &ctx);
 		if (!ret)
-			ret = drm_atomic_helper_disable_all(dev, &ctx);
+			ret = __drm_atomic_helper_disable_all(dev, &ctx, true);
 
 		if (ret != -EDEADLK)
 			break;
@@ -3095,14 +3126,14 @@
 	struct drm_connector_state *new_conn_state;
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *new_crtc_state;
-	unsigned plane_mask = 0;
-	struct drm_device *dev = state->dev;
-	int ret;
 
 	state->acquire_ctx = ctx;
 
 	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
-		plane_mask |= BIT(drm_plane_index(plane));
+		WARN_ON(plane->crtc != new_plane_state->crtc);
+		WARN_ON(plane->fb != new_plane_state->fb);
+		WARN_ON(plane->old_fb);
+
 		state->planes[i].old_state = plane->state;
 	}
 
@@ -3112,11 +3143,7 @@
 	for_each_new_connector_in_state(state, connector, new_conn_state, i)
 		state->connectors[i].old_state = connector->state;
 
-	ret = drm_atomic_commit(state);
-	if (plane_mask)
-		drm_atomic_clean_old_fb(dev, plane_mask, ret);
-
-	return ret;
+	return drm_atomic_commit(state);
 }
 EXPORT_SYMBOL(drm_atomic_helper_commit_duplicated_state);
 
@@ -3484,6 +3511,10 @@
 	if (plane->state) {
 		plane->state->plane = plane;
 		plane->state->rotation = DRM_MODE_ROTATE_0;
+
+		/* Reset the alpha value to fully opaque if it matters */
+		if (plane->alpha_property)
+			plane->state->alpha = plane->alpha_property->values[1];
 	}
 }
 EXPORT_SYMBOL(drm_atomic_helper_plane_reset);
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index 5a81e1b..a16a74d 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -88,6 +88,13 @@
  * On top of this basic transformation additional properties can be exposed by
  * the driver:
  *
+ * alpha:
+ * 	Alpha is setup with drm_plane_create_alpha_property(). It controls the
+ * 	plane-wide opacity, from transparent (0) to opaque (0xffff). It can be
+ * 	combined with pixel alpha.
+ *	The pixel values in the framebuffers are expected to not be
+ *	pre-multiplied by the global alpha associated to the plane.
+ *
  * rotation:
  *	Rotation is set up with drm_plane_create_rotation_property(). It adds a
  *	rotation and reflection step between the source and destination rectangles.
@@ -106,6 +113,38 @@
  */
 
 /**
+ * drm_plane_create_alpha_property - create a new alpha property
+ * @plane: drm plane
+ *
+ * This function creates a generic, mutable, alpha property and enables support
+ * for it in the DRM core. It is attached to @plane.
+ *
+ * The alpha property will be allowed to be within the bounds of 0
+ * (transparent) to 0xffff (opaque).
+ *
+ * Returns:
+ * 0 on success, negative error code on failure.
+ */
+int drm_plane_create_alpha_property(struct drm_plane *plane)
+{
+	struct drm_property *prop;
+
+	prop = drm_property_create_range(plane->dev, 0, "alpha",
+					 0, DRM_BLEND_ALPHA_OPAQUE);
+	if (!prop)
+		return -ENOMEM;
+
+	drm_object_attach_property(&plane->base, prop, DRM_BLEND_ALPHA_OPAQUE);
+	plane->alpha_property = prop;
+
+	if (plane->state)
+		plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_plane_create_alpha_property);
+
+/**
  * drm_plane_create_rotation_property - create a new rotation property
  * @plane: drm plane
  * @rotation: initial value of the rotation property
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index b3cde89..9b9ba5d 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1069,7 +1069,7 @@
 		goto nomem;
 
 	for (i = 0; i < num_modes; i++)
-		drm_property_add_enum(dev->mode_config.tv_mode_property, i,
+		drm_property_add_enum(dev->mode_config.tv_mode_property,
 				      i, modes[i]);
 
 	dev->mode_config.tv_brightness_property =
@@ -1156,7 +1156,7 @@
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_property *scaling_mode_property;
-	int i, j = 0;
+	int i;
 	const unsigned valid_scaling_mode_mask =
 		(1U << ARRAY_SIZE(drm_scaling_mode_enum_list)) - 1;
 
@@ -1177,7 +1177,7 @@
 		if (!(BIT(i) & scaling_mode_mask))
 			continue;
 
-		ret = drm_property_add_enum(scaling_mode_property, j++,
+		ret = drm_property_add_enum(scaling_mode_property,
 					    drm_scaling_mode_enum_list[i].type,
 					    drm_scaling_mode_enum_list[i].name);
 
@@ -1531,8 +1531,10 @@
 	return connector->encoder;
 }
 
-static bool drm_mode_expose_to_userspace(const struct drm_display_mode *mode,
-					 const struct drm_file *file_priv)
+static bool
+drm_mode_expose_to_userspace(const struct drm_display_mode *mode,
+			     const struct list_head *export_list,
+			     const struct drm_file *file_priv)
 {
 	/*
 	 * If user-space hasn't configured the driver to expose the stereo 3D
@@ -1540,6 +1542,23 @@
 	 */
 	if (!file_priv->stereo_allowed && drm_mode_is_stereo(mode))
 		return false;
+	/*
+	 * If user-space hasn't configured the driver to expose the modes
+	 * with aspect-ratio, don't expose them. However if such a mode
+	 * is unique, let it be exposed, but reset the aspect-ratio flags
+	 * while preparing the list of user-modes.
+	 */
+	if (!file_priv->aspect_ratio_allowed) {
+		struct drm_display_mode *mode_itr;
+
+		list_for_each_entry(mode_itr, export_list, export_head)
+			if (drm_mode_match(mode_itr, mode,
+					   DRM_MODE_MATCH_TIMINGS |
+					   DRM_MODE_MATCH_CLOCK |
+					   DRM_MODE_MATCH_FLAGS |
+					   DRM_MODE_MATCH_3D_FLAGS))
+				return false;
+	}
 
 	return true;
 }
@@ -1559,6 +1578,7 @@
 	struct drm_mode_modeinfo u_mode;
 	struct drm_mode_modeinfo __user *mode_ptr;
 	uint32_t __user *encoder_ptr;
+	LIST_HEAD(export_list);
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EINVAL;
@@ -1607,21 +1627,31 @@
 
 	/* delayed so we get modes regardless of pre-fill_modes state */
 	list_for_each_entry(mode, &connector->modes, head)
-		if (drm_mode_expose_to_userspace(mode, file_priv))
+		if (drm_mode_expose_to_userspace(mode, &export_list,
+						 file_priv)) {
+			list_add_tail(&mode->export_head, &export_list);
 			mode_count++;
+		}
 
 	/*
 	 * This ioctl is called twice, once to determine how much space is
 	 * needed, and the 2nd time to fill it.
+	 * The modes that need to be exposed to the user are maintained in the
+	 * 'export_list'. When the ioctl is called first time to determine the,
+	 * space, the export_list gets filled, to find the no.of modes. In the
+	 * 2nd time, the user modes are filled, one by one from the export_list.
 	 */
 	if ((out_resp->count_modes >= mode_count) && mode_count) {
 		copied = 0;
 		mode_ptr = (struct drm_mode_modeinfo __user *)(unsigned long)out_resp->modes_ptr;
-		list_for_each_entry(mode, &connector->modes, head) {
-			if (!drm_mode_expose_to_userspace(mode, file_priv))
-				continue;
-
+		list_for_each_entry(mode, &export_list, export_head) {
 			drm_mode_convert_to_umode(&u_mode, mode);
+			/*
+			 * Reset aspect ratio flags of user-mode, if modes with
+			 * aspect-ratio are not supported.
+			 */
+			if (!file_priv->aspect_ratio_allowed)
+				u_mode.flags &= ~DRM_MODE_FLAG_PIC_AR_MASK;
 			if (copy_to_user(mode_ptr + copied,
 					 &u_mode, sizeof(u_mode))) {
 				ret = -EFAULT;
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 0358388..98a36e6 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -402,6 +402,7 @@
 {
 	struct drm_mode_crtc *crtc_resp = data;
 	struct drm_crtc *crtc;
+	struct drm_plane *plane;
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EINVAL;
@@ -410,34 +411,36 @@
 	if (!crtc)
 		return -ENOENT;
 
+	plane = crtc->primary;
+
 	crtc_resp->gamma_size = crtc->gamma_size;
 
-	drm_modeset_lock(&crtc->primary->mutex, NULL);
-	if (crtc->primary->state && crtc->primary->state->fb)
-		crtc_resp->fb_id = crtc->primary->state->fb->base.id;
-	else if (!crtc->primary->state && crtc->primary->fb)
-		crtc_resp->fb_id = crtc->primary->fb->base.id;
+	drm_modeset_lock(&plane->mutex, NULL);
+	if (plane->state && plane->state->fb)
+		crtc_resp->fb_id = plane->state->fb->base.id;
+	else if (!plane->state && plane->fb)
+		crtc_resp->fb_id = plane->fb->base.id;
 	else
 		crtc_resp->fb_id = 0;
 
-	if (crtc->primary->state) {
-		crtc_resp->x = crtc->primary->state->src_x >> 16;
-		crtc_resp->y = crtc->primary->state->src_y >> 16;
+	if (plane->state) {
+		crtc_resp->x = plane->state->src_x >> 16;
+		crtc_resp->y = plane->state->src_y >> 16;
 	}
-	drm_modeset_unlock(&crtc->primary->mutex);
+	drm_modeset_unlock(&plane->mutex);
 
 	drm_modeset_lock(&crtc->mutex, NULL);
 	if (crtc->state) {
 		if (crtc->state->enable) {
 			drm_mode_convert_to_umode(&crtc_resp->mode, &crtc->state->mode);
 			crtc_resp->mode_valid = 1;
-
 		} else {
 			crtc_resp->mode_valid = 0;
 		}
 	} else {
 		crtc_resp->x = crtc->x;
 		crtc_resp->y = crtc->y;
+
 		if (crtc->enabled) {
 			drm_mode_convert_to_umode(&crtc_resp->mode, &crtc->mode);
 			crtc_resp->mode_valid = 1;
@@ -446,6 +449,8 @@
 			crtc_resp->mode_valid = 0;
 		}
 	}
+	if (!file_priv->aspect_ratio_allowed)
+		crtc_resp->mode.flags &= ~DRM_MODE_FLAG_PIC_AR_MASK;
 	drm_modeset_unlock(&crtc->mutex);
 
 	return 0;
@@ -471,7 +476,7 @@
 
 	ret = crtc->funcs->set_config(set, ctx);
 	if (ret == 0) {
-		crtc->primary->crtc = crtc;
+		crtc->primary->crtc = fb ? crtc : NULL;
 		crtc->primary->fb = fb;
 	}
 
@@ -554,6 +559,7 @@
 	struct drm_mode_config *config = &dev->mode_config;
 	struct drm_mode_crtc *crtc_req = data;
 	struct drm_crtc *crtc;
+	struct drm_plane *plane;
 	struct drm_connector **connector_set = NULL, *connector;
 	struct drm_framebuffer *fb = NULL;
 	struct drm_display_mode *mode = NULL;
@@ -580,22 +586,33 @@
 	}
 	DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.id, crtc->name);
 
+	plane = crtc->primary;
+
 	mutex_lock(&crtc->dev->mode_config.mutex);
 	drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
 retry:
 	ret = drm_modeset_lock_all_ctx(crtc->dev, &ctx);
 	if (ret)
 		goto out;
+
 	if (crtc_req->mode_valid) {
 		/* If we have a mode we need a framebuffer. */
 		/* If we pass -1, set the mode with the currently bound fb */
 		if (crtc_req->fb_id == -1) {
-			if (!crtc->primary->fb) {
+			struct drm_framebuffer *old_fb;
+
+			if (plane->state)
+				old_fb = plane->state->fb;
+			else
+				old_fb = plane->fb;
+
+			if (!old_fb) {
 				DRM_DEBUG_KMS("CRTC doesn't have current FB\n");
 				ret = -EINVAL;
 				goto out;
 			}
-			fb = crtc->primary->fb;
+
+			fb = old_fb;
 			/* Make refcounting symmetric with the lookup path. */
 			drm_framebuffer_get(fb);
 		} else {
@@ -613,6 +630,13 @@
 			ret = -ENOMEM;
 			goto out;
 		}
+		if (!file_priv->aspect_ratio_allowed &&
+		    (crtc_req->mode.flags & DRM_MODE_FLAG_PIC_AR_MASK) != DRM_MODE_FLAG_PIC_AR_NONE) {
+			DRM_DEBUG_KMS("Unexpected aspect-ratio flag bits\n");
+			ret = -EINVAL;
+			goto out;
+		}
+
 
 		ret = drm_mode_convert_umode(dev, mode, &crtc_req->mode);
 		if (ret) {
@@ -627,8 +651,8 @@
 		 * match real hardware capabilities. Skip the check in that
 		 * case.
 		 */
-		if (!crtc->primary->format_default) {
-			ret = drm_plane_check_pixel_format(crtc->primary,
+		if (!plane->format_default) {
+			ret = drm_plane_check_pixel_format(plane,
 							   fb->format->format,
 							   fb->modifier);
 			if (ret) {
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index 3c2b828..5d307b2 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -220,3 +220,5 @@
 
 /* drm_edid.c */
 void drm_mode_fixup_1366x768(struct drm_display_mode *mode);
+void drm_reset_display_info(struct drm_connector *connector);
+u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid);
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 70ae1f2..a7ba602 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -119,18 +119,32 @@
 EXPORT_SYMBOL(drm_dp_get_adjust_request_pre_emphasis);
 
 void drm_dp_link_train_clock_recovery_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) {
-	if (dpcd[DP_TRAINING_AUX_RD_INTERVAL] == 0)
+	int rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+			  DP_TRAINING_AUX_RD_MASK;
+
+	if (rd_interval > 4)
+		DRM_DEBUG_KMS("AUX interval %d, out of range (max 4)\n",
+			      rd_interval);
+
+	if (rd_interval == 0 || dpcd[DP_DPCD_REV] >= DP_DPCD_REV_14)
 		udelay(100);
 	else
-		mdelay(dpcd[DP_TRAINING_AUX_RD_INTERVAL] * 4);
+		mdelay(rd_interval * 4);
 }
 EXPORT_SYMBOL(drm_dp_link_train_clock_recovery_delay);
 
 void drm_dp_link_train_channel_eq_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) {
-	if (dpcd[DP_TRAINING_AUX_RD_INTERVAL] == 0)
+	int rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+			  DP_TRAINING_AUX_RD_MASK;
+
+	if (rd_interval > 4)
+		DRM_DEBUG_KMS("AUX interval %d, out of range (max 4)\n",
+			      rd_interval);
+
+	if (rd_interval == 0)
 		udelay(400);
 	else
-		mdelay(dpcd[DP_TRAINING_AUX_RD_INTERVAL] * 4);
+		mdelay(rd_interval * 4);
 }
 EXPORT_SYMBOL(drm_dp_link_train_channel_eq_delay);
 
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 6fac412..6588306 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -2941,12 +2941,14 @@
 	}
 }
 
+#define DP_PAYLOAD_TABLE_SIZE		64
+
 static bool dump_dp_payload_table(struct drm_dp_mst_topology_mgr *mgr,
 				  char *buf)
 {
 	int i;
 
-	for (i = 0; i < 64; i += 16) {
+	for (i = 0; i < DP_PAYLOAD_TABLE_SIZE; i += 16) {
 		if (drm_dp_dpcd_read(mgr->aux,
 				     DP_PAYLOAD_TABLE_UPDATE_STATUS + i,
 				     &buf[i], 16) != 16)
@@ -3015,7 +3017,7 @@
 
 	mutex_lock(&mgr->lock);
 	if (mgr->mst_primary) {
-		u8 buf[64];
+		u8 buf[DP_PAYLOAD_TABLE_SIZE];
 		int ret;
 
 		ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, buf, DP_RECEIVER_CAP_SIZE);
@@ -3033,8 +3035,7 @@
 		seq_printf(m, " revision: hw: %x.%x sw: %x.%x\n",
 			   buf[0x9] >> 4, buf[0x9] & 0xf, buf[0xa], buf[0xb]);
 		if (dump_dp_payload_table(mgr, buf))
-			seq_printf(m, "payload table: %*ph\n", 63, buf);
-
+			seq_printf(m, "payload table: %*ph\n", DP_PAYLOAD_TABLE_SIZE, buf);
 	}
 
 	mutex_unlock(&mgr->lock);
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index c2c21d8..b553a6f 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -32,6 +32,7 @@
 #include <linux/moduleparam.h>
 #include <linux/mount.h>
 #include <linux/slab.h>
+#include <linux/srcu.h>
 
 #include <drm/drm_drv.h>
 #include <drm/drmP.h>
@@ -75,6 +76,8 @@
 
 static struct dentry *drm_debugfs_root;
 
+DEFINE_STATIC_SRCU(drm_unplug_srcu);
+
 /*
  * DRM Minors
  * A DRM device can provide several char-dev interfaces on the DRM-Major. Each
@@ -96,8 +99,6 @@
 		return &dev->primary;
 	case DRM_MINOR_RENDER:
 		return &dev->render;
-	case DRM_MINOR_CONTROL:
-		return &dev->control;
 	default:
 		BUG();
 	}
@@ -318,18 +319,51 @@
 }
 EXPORT_SYMBOL(drm_put_dev);
 
-static void drm_device_set_unplugged(struct drm_device *dev)
+/**
+ * drm_dev_enter - Enter device critical section
+ * @dev: DRM device
+ * @idx: Pointer to index that will be passed to the matching drm_dev_exit()
+ *
+ * This function marks and protects the beginning of a section that should not
+ * be entered after the device has been unplugged. The section end is marked
+ * with drm_dev_exit(). Calls to this function can be nested.
+ *
+ * Returns:
+ * True if it is OK to enter the section, false otherwise.
+ */
+bool drm_dev_enter(struct drm_device *dev, int *idx)
 {
-	smp_wmb();
-	atomic_set(&dev->unplugged, 1);
+	*idx = srcu_read_lock(&drm_unplug_srcu);
+
+	if (dev->unplugged) {
+		srcu_read_unlock(&drm_unplug_srcu, *idx);
+		return false;
+	}
+
+	return true;
 }
+EXPORT_SYMBOL(drm_dev_enter);
+
+/**
+ * drm_dev_exit - Exit device critical section
+ * @idx: index returned from drm_dev_enter()
+ *
+ * This function marks the end of a section that should not be entered after
+ * the device has been unplugged.
+ */
+void drm_dev_exit(int idx)
+{
+	srcu_read_unlock(&drm_unplug_srcu, idx);
+}
+EXPORT_SYMBOL(drm_dev_exit);
 
 /**
  * drm_dev_unplug - unplug a DRM device
  * @dev: DRM device
  *
  * This unplugs a hotpluggable DRM device, which makes it inaccessible to
- * userspace operations. Entry-points can use drm_dev_is_unplugged(). This
+ * userspace operations. Entry-points can use drm_dev_enter() and
+ * drm_dev_exit() to protect device resources in a race free manner. This
  * essentially unregisters the device like drm_dev_unregister(), but can be
  * called while there are still open users of @dev.
  */
@@ -338,10 +372,18 @@
 	drm_dev_unregister(dev);
 
 	mutex_lock(&drm_global_mutex);
-	drm_device_set_unplugged(dev);
 	if (dev->open_count == 0)
 		drm_dev_put(dev);
 	mutex_unlock(&drm_global_mutex);
+
+	/*
+	 * After synchronizing any critical read section is guaranteed to see
+	 * the new value of ->unplugged, and any critical section which might
+	 * still have seen the old value of ->unplugged is guaranteed to have
+	 * finished.
+	 */
+	dev->unplugged = true;
+	synchronize_srcu(&drm_unplug_srcu);
 }
 EXPORT_SYMBOL(drm_dev_unplug);
 
@@ -523,7 +565,6 @@
 err_minors:
 	drm_minor_free(dev, DRM_MINOR_PRIMARY);
 	drm_minor_free(dev, DRM_MINOR_RENDER);
-	drm_minor_free(dev, DRM_MINOR_CONTROL);
 	drm_fs_inode_free(dev->anon_inode);
 err_free:
 	mutex_destroy(&dev->master_mutex);
@@ -559,7 +600,6 @@
 
 	drm_minor_free(dev, DRM_MINOR_PRIMARY);
 	drm_minor_free(dev, DRM_MINOR_RENDER);
-	drm_minor_free(dev, DRM_MINOR_CONTROL);
 
 	mutex_destroy(&dev->master_mutex);
 	mutex_destroy(&dev->ctxlist_mutex);
@@ -752,10 +792,6 @@
 
 	mutex_lock(&drm_global_mutex);
 
-	ret = drm_minor_register(dev, DRM_MINOR_CONTROL);
-	if (ret)
-		goto err_minors;
-
 	ret = drm_minor_register(dev, DRM_MINOR_RENDER);
 	if (ret)
 		goto err_minors;
@@ -793,7 +829,6 @@
 	remove_compat_control_link(dev);
 	drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
 	drm_minor_unregister(dev, DRM_MINOR_RENDER);
-	drm_minor_unregister(dev, DRM_MINOR_CONTROL);
 out_unlock:
 	mutex_unlock(&drm_global_mutex);
 	return ret;
@@ -838,7 +873,6 @@
 	remove_compat_control_link(dev);
 	drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
 	drm_minor_unregister(dev, DRM_MINOR_RENDER);
-	drm_minor_unregister(dev, DRM_MINOR_CONTROL);
 }
 EXPORT_SYMBOL(drm_dev_unregister);
 
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 39f1db4a..40e1e24 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -2930,11 +2930,15 @@
 static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_match,
 					     unsigned int clock_tolerance)
 {
+	unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS;
 	u8 vic;
 
 	if (!to_match->clock)
 		return 0;
 
+	if (to_match->picture_aspect_ratio)
+		match_flags |= DRM_MODE_MATCH_ASPECT_RATIO;
+
 	for (vic = 1; vic < ARRAY_SIZE(edid_cea_modes); vic++) {
 		struct drm_display_mode cea_mode = edid_cea_modes[vic];
 		unsigned int clock1, clock2;
@@ -2948,7 +2952,7 @@
 			continue;
 
 		do {
-			if (drm_mode_equal_no_clocks_no_stereo(to_match, &cea_mode))
+			if (drm_mode_match(to_match, &cea_mode, match_flags))
 				return vic;
 		} while (cea_mode_alternate_timings(vic, &cea_mode));
 	}
@@ -2965,11 +2969,15 @@
  */
 u8 drm_match_cea_mode(const struct drm_display_mode *to_match)
 {
+	unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS;
 	u8 vic;
 
 	if (!to_match->clock)
 		return 0;
 
+	if (to_match->picture_aspect_ratio)
+		match_flags |= DRM_MODE_MATCH_ASPECT_RATIO;
+
 	for (vic = 1; vic < ARRAY_SIZE(edid_cea_modes); vic++) {
 		struct drm_display_mode cea_mode = edid_cea_modes[vic];
 		unsigned int clock1, clock2;
@@ -2983,7 +2991,7 @@
 			continue;
 
 		do {
-			if (drm_mode_equal_no_clocks_no_stereo(to_match, &cea_mode))
+			if (drm_mode_match(to_match, &cea_mode, match_flags))
 				return vic;
 		} while (cea_mode_alternate_timings(vic, &cea_mode));
 	}
@@ -3030,6 +3038,7 @@
 static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_match,
 					      unsigned int clock_tolerance)
 {
+	unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS;
 	u8 vic;
 
 	if (!to_match->clock)
@@ -3047,7 +3056,7 @@
 		    abs(to_match->clock - clock2) > clock_tolerance)
 			continue;
 
-		if (drm_mode_equal_no_clocks(to_match, hdmi_mode))
+		if (drm_mode_match(to_match, hdmi_mode, match_flags))
 			return vic;
 	}
 
@@ -3064,6 +3073,7 @@
  */
 static u8 drm_match_hdmi_mode(const struct drm_display_mode *to_match)
 {
+	unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS;
 	u8 vic;
 
 	if (!to_match->clock)
@@ -3079,7 +3089,7 @@
 
 		if ((KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock1) ||
 		     KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock2)) &&
-		    drm_mode_equal_no_clocks_no_stereo(to_match, hdmi_mode))
+		    drm_mode_match(to_match, hdmi_mode, match_flags))
 			return vic;
 	}
 	return 0;
@@ -4455,7 +4465,6 @@
 
 	info->non_desktop = 0;
 }
-EXPORT_SYMBOL_GPL(drm_reset_display_info);
 
 u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid)
 {
@@ -4533,7 +4542,6 @@
 		info->color_formats |= DRM_COLOR_FORMAT_YCRCB422;
 	return quirks;
 }
-EXPORT_SYMBOL_GPL(drm_add_display_info);
 
 static int validate_displayid(u8 *displayid, int length, int idx)
 {
@@ -4825,6 +4833,7 @@
 					 const struct drm_display_mode *mode,
 					 bool is_hdmi2_sink)
 {
+	enum hdmi_picture_aspect picture_aspect;
 	int err;
 
 	if (!frame || !mode)
@@ -4867,13 +4876,23 @@
 	 * Populate picture aspect ratio from either
 	 * user input (if specified) or from the CEA mode list.
 	 */
-	if (mode->picture_aspect_ratio == HDMI_PICTURE_ASPECT_4_3 ||
-		mode->picture_aspect_ratio == HDMI_PICTURE_ASPECT_16_9)
-		frame->picture_aspect = mode->picture_aspect_ratio;
-	else if (frame->video_code > 0)
-		frame->picture_aspect = drm_get_cea_aspect_ratio(
-						frame->video_code);
+	picture_aspect = mode->picture_aspect_ratio;
+	if (picture_aspect == HDMI_PICTURE_ASPECT_NONE)
+		picture_aspect = drm_get_cea_aspect_ratio(frame->video_code);
 
+	/*
+	 * The infoframe can't convey anything but none, 4:3
+	 * and 16:9, so if the user has asked for anything else
+	 * we can only satisfy it by specifying the right VIC.
+	 */
+	if (picture_aspect > HDMI_PICTURE_ASPECT_16_9) {
+		if (picture_aspect !=
+		    drm_get_cea_aspect_ratio(frame->video_code))
+			return -EINVAL;
+		picture_aspect = HDMI_PICTURE_ASPECT_NONE;
+	}
+
+	frame->picture_aspect = picture_aspect;
 	frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE;
 	frame->scan_mode = HDMI_SCAN_MODE_UNDERSCAN;
 
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 0646b10..2ee1eaa 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -2183,7 +2183,11 @@
 		for (j = 0; j < i; j++) {
 			if (!enabled[j])
 				continue;
-			if (!drm_mode_equal(modes[j], modes[i]))
+			if (!drm_mode_match(modes[j], modes[i],
+					    DRM_MODE_MATCH_TIMINGS |
+					    DRM_MODE_MATCH_CLOCK |
+					    DRM_MODE_MATCH_FLAGS |
+					    DRM_MODE_MATCH_3D_FLAGS))
 				can_clone = false;
 		}
 	}
@@ -2203,7 +2207,11 @@
 
 		fb_helper_conn = fb_helper->connector_info[i];
 		list_for_each_entry(mode, &fb_helper_conn->connector->modes, head) {
-			if (drm_mode_equal(mode, dmt_mode))
+			if (drm_mode_match(mode, dmt_mode,
+					   DRM_MODE_MATCH_TIMINGS |
+					   DRM_MODE_MATCH_CLOCK |
+					   DRM_MODE_MATCH_FLAGS |
+					   DRM_MODE_MATCH_3D_FLAGS))
 				modes[i] = mode;
 		}
 		if (!modes[i])
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index ad67203..bfedcef 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -468,29 +468,30 @@
 		goto out;
 	}
 
+	if (!fb->funcs->create_handle) {
+		ret = -ENODEV;
+		goto out;
+	}
+
 	r->height = fb->height;
 	r->width = fb->width;
 	r->depth = fb->format->depth;
 	r->bpp = fb->format->cpp[0] * 8;
 	r->pitch = fb->pitches[0];
-	if (fb->funcs->create_handle) {
-		if (drm_is_current_master(file_priv) || capable(CAP_SYS_ADMIN) ||
-		    drm_is_control_client(file_priv)) {
-			ret = fb->funcs->create_handle(fb, file_priv,
-						       &r->handle);
-		} else {
-			/* GET_FB() is an unprivileged ioctl so we must not
-			 * return a buffer-handle to non-master processes! For
-			 * backwards-compatibility reasons, we cannot make
-			 * GET_FB() privileged, so just return an invalid handle
-			 * for non-masters. */
-			r->handle = 0;
-			ret = 0;
-		}
-	} else {
-		ret = -ENODEV;
+
+	/* GET_FB() is an unprivileged ioctl so we must not return a
+	 * buffer-handle to non-master processes! For
+	 * backwards-compatibility reasons, we cannot make GET_FB() privileged,
+	 * so just return an invalid handle for non-masters.
+	 */
+	if (!drm_is_current_master(file_priv) && !capable(CAP_SYS_ADMIN)) {
+		r->handle = 0;
+		ret = 0;
+		goto out;
 	}
 
+	ret = fb->funcs->create_handle(fb, file_priv, &r->handle);
+
 out:
 	drm_framebuffer_put(fb);
 
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 4975ba9..4a16d7b 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -436,9 +436,12 @@
  * @obj: object to register
  * @handlep: pionter to return the created handle to the caller
  *
- * Create a handle for this object. This adds a handle reference
- * to the object, which includes a regular reference count. Callers
- * will likely want to dereference the object afterwards.
+ * Create a handle for this object. This adds a handle reference to the object,
+ * which includes a regular reference count. Callers will likely want to
+ * dereference the object afterwards.
+ *
+ * Since this publishes @obj to userspace it must be fully set up by this point,
+ * drivers must call this last in their buffer object creation callbacks.
  */
 int drm_gem_handle_create(struct drm_file *file_priv,
 			  struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
index 4d682a6..acfbc06 100644
--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -22,6 +22,7 @@
 #include <drm/drm_gem.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_modeset_helper.h>
+#include <drm/drm_simple_kms_helper.h>
 
 /**
  * DOC: overview
@@ -266,6 +267,24 @@
 EXPORT_SYMBOL_GPL(drm_gem_fb_prepare_fb);
 
 /**
+ * drm_gem_fb_simple_display_pipe_prepare_fb - prepare_fb helper for
+ *     &drm_simple_display_pipe
+ * @pipe: Simple display pipe
+ * @plane_state: Plane state
+ *
+ * This function uses drm_gem_fb_prepare_fb() to check if the plane FB has a
+ * &dma_buf attached, extracts the exclusive fence and attaches it to plane
+ * state for the atomic helper to wait on. Drivers can use this as their
+ * &drm_simple_display_pipe_funcs.prepare_fb callback.
+ */
+int drm_gem_fb_simple_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
+					      struct drm_plane_state *plane_state)
+{
+	return drm_gem_fb_prepare_fb(&pipe->plane, plane_state);
+}
+EXPORT_SYMBOL(drm_gem_fb_simple_display_pipe_prepare_fb);
+
+/**
  * drm_gem_fbdev_fb_create - Create a GEM backed &drm_framebuffer for fbdev
  *                           emulation
  * @dev: DRM device
diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index f8e96e6..67b1fca 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -105,7 +105,7 @@
 		.desc = compat_ptr(v32.desc),
 	};
 	err = drm_ioctl_kernel(file, drm_version, &v,
-			DRM_UNLOCKED|DRM_RENDER_ALLOW|DRM_CONTROL_ALLOW);
+			       DRM_UNLOCKED|DRM_RENDER_ALLOW);
 	if (err)
 		return err;
 
@@ -885,7 +885,7 @@
 		return -EFAULT;
 
 	err = drm_ioctl_kernel(file, drm_mode_addfb2, &req64,
-				DRM_CONTROL_ALLOW|DRM_UNLOCKED);
+			       DRM_UNLOCKED);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index af78291..0d4cfb2 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -324,6 +324,15 @@
 			return -EINVAL;
 		file_priv->atomic = req->value;
 		file_priv->universal_planes = req->value;
+		/*
+		 * No atomic user-space blows up on aspect ratio mode bits.
+		 */
+		file_priv->aspect_ratio_allowed = req->value;
+		break;
+	case DRM_CLIENT_CAP_ASPECT_RATIO:
+		if (req->value > 1)
+			return -EINVAL;
+		file_priv->aspect_ratio_allowed = req->value;
 		break;
 	default:
 		return -EINVAL;
@@ -510,13 +519,7 @@
 
 	/* MASTER is only for master or control clients */
 	if (unlikely((flags & DRM_MASTER) &&
-		     !drm_is_current_master(file_priv) &&
-		     !drm_is_control_client(file_priv)))
-		return -EACCES;
-
-	/* Control clients must be explicitly allowed */
-	if (unlikely(!(flags & DRM_CONTROL_ALLOW) &&
-		     drm_is_control_client(file_priv)))
+		     !drm_is_current_master(file_priv)))
 		return -EACCES;
 
 	/* Render clients must be explicitly allowed */
@@ -539,7 +542,7 @@
 /* Ioctl table */
 static const struct drm_ioctl_desc drm_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version,
-		      DRM_UNLOCKED|DRM_RENDER_ALLOW|DRM_CONTROL_ALLOW),
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY),
@@ -613,41 +616,41 @@
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH|DRM_UNLOCKED),
 
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_UNLOCKED),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_UNLOCKED),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_CREATE, drm_syncobj_create_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
@@ -665,10 +668,10 @@
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
 };
 
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index d345563..50c73c0 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -340,7 +340,7 @@
 				break;
 
 			/* Over */
-			master = list_entry(master->lessee_list.next, struct drm_master, lessee_list);
+			master = list_next_entry(master, lessee_list);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index e82b61e..c78ca0e 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -939,6 +939,99 @@
 }
 EXPORT_SYMBOL(drm_mode_duplicate);
 
+static bool drm_mode_match_timings(const struct drm_display_mode *mode1,
+				   const struct drm_display_mode *mode2)
+{
+	return mode1->hdisplay == mode2->hdisplay &&
+		mode1->hsync_start == mode2->hsync_start &&
+		mode1->hsync_end == mode2->hsync_end &&
+		mode1->htotal == mode2->htotal &&
+		mode1->hskew == mode2->hskew &&
+		mode1->vdisplay == mode2->vdisplay &&
+		mode1->vsync_start == mode2->vsync_start &&
+		mode1->vsync_end == mode2->vsync_end &&
+		mode1->vtotal == mode2->vtotal &&
+		mode1->vscan == mode2->vscan;
+}
+
+static bool drm_mode_match_clock(const struct drm_display_mode *mode1,
+				  const struct drm_display_mode *mode2)
+{
+	/*
+	 * do clock check convert to PICOS
+	 * so fb modes get matched the same
+	 */
+	if (mode1->clock && mode2->clock)
+		return KHZ2PICOS(mode1->clock) == KHZ2PICOS(mode2->clock);
+	else
+		return mode1->clock == mode2->clock;
+}
+
+static bool drm_mode_match_flags(const struct drm_display_mode *mode1,
+				 const struct drm_display_mode *mode2)
+{
+	return (mode1->flags & ~DRM_MODE_FLAG_3D_MASK) ==
+		(mode2->flags & ~DRM_MODE_FLAG_3D_MASK);
+}
+
+static bool drm_mode_match_3d_flags(const struct drm_display_mode *mode1,
+				    const struct drm_display_mode *mode2)
+{
+	return (mode1->flags & DRM_MODE_FLAG_3D_MASK) ==
+		(mode2->flags & DRM_MODE_FLAG_3D_MASK);
+}
+
+static bool drm_mode_match_aspect_ratio(const struct drm_display_mode *mode1,
+					const struct drm_display_mode *mode2)
+{
+	return mode1->picture_aspect_ratio == mode2->picture_aspect_ratio;
+}
+
+/**
+ * drm_mode_match - test modes for (partial) equality
+ * @mode1: first mode
+ * @mode2: second mode
+ * @match_flags: which parts need to match (DRM_MODE_MATCH_*)
+ *
+ * Check to see if @mode1 and @mode2 are equivalent.
+ *
+ * Returns:
+ * True if the modes are (partially) equal, false otherwise.
+ */
+bool drm_mode_match(const struct drm_display_mode *mode1,
+		    const struct drm_display_mode *mode2,
+		    unsigned int match_flags)
+{
+	if (!mode1 && !mode2)
+		return true;
+
+	if (!mode1 || !mode2)
+		return false;
+
+	if (match_flags & DRM_MODE_MATCH_TIMINGS &&
+	    !drm_mode_match_timings(mode1, mode2))
+		return false;
+
+	if (match_flags & DRM_MODE_MATCH_CLOCK &&
+	    !drm_mode_match_clock(mode1, mode2))
+		return false;
+
+	if (match_flags & DRM_MODE_MATCH_FLAGS &&
+	    !drm_mode_match_flags(mode1, mode2))
+		return false;
+
+	if (match_flags & DRM_MODE_MATCH_3D_FLAGS &&
+	    !drm_mode_match_3d_flags(mode1, mode2))
+		return false;
+
+	if (match_flags & DRM_MODE_MATCH_ASPECT_RATIO &&
+	    !drm_mode_match_aspect_ratio(mode1, mode2))
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL(drm_mode_match);
+
 /**
  * drm_mode_equal - test modes for equality
  * @mode1: first mode
@@ -949,23 +1042,15 @@
  * Returns:
  * True if the modes are equal, false otherwise.
  */
-bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2)
+bool drm_mode_equal(const struct drm_display_mode *mode1,
+		    const struct drm_display_mode *mode2)
 {
-	if (!mode1 && !mode2)
-		return true;
-
-	if (!mode1 || !mode2)
-		return false;
-
-	/* do clock check convert to PICOS so fb modes get matched
-	 * the same */
-	if (mode1->clock && mode2->clock) {
-		if (KHZ2PICOS(mode1->clock) != KHZ2PICOS(mode2->clock))
-			return false;
-	} else if (mode1->clock != mode2->clock)
-		return false;
-
-	return drm_mode_equal_no_clocks(mode1, mode2);
+	return drm_mode_match(mode1, mode2,
+			      DRM_MODE_MATCH_TIMINGS |
+			      DRM_MODE_MATCH_CLOCK |
+			      DRM_MODE_MATCH_FLAGS |
+			      DRM_MODE_MATCH_3D_FLAGS|
+			      DRM_MODE_MATCH_ASPECT_RATIO);
 }
 EXPORT_SYMBOL(drm_mode_equal);
 
@@ -980,13 +1065,13 @@
  * Returns:
  * True if the modes are equal, false otherwise.
  */
-bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2)
+bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1,
+			      const struct drm_display_mode *mode2)
 {
-	if ((mode1->flags & DRM_MODE_FLAG_3D_MASK) !=
-	    (mode2->flags & DRM_MODE_FLAG_3D_MASK))
-		return false;
-
-	return drm_mode_equal_no_clocks_no_stereo(mode1, mode2);
+	return drm_mode_match(mode1, mode2,
+			      DRM_MODE_MATCH_TIMINGS |
+			      DRM_MODE_MATCH_FLAGS |
+			      DRM_MODE_MATCH_3D_FLAGS);
 }
 EXPORT_SYMBOL(drm_mode_equal_no_clocks);
 
@@ -1004,21 +1089,9 @@
 bool drm_mode_equal_no_clocks_no_stereo(const struct drm_display_mode *mode1,
 					const struct drm_display_mode *mode2)
 {
-	if (mode1->hdisplay == mode2->hdisplay &&
-	    mode1->hsync_start == mode2->hsync_start &&
-	    mode1->hsync_end == mode2->hsync_end &&
-	    mode1->htotal == mode2->htotal &&
-	    mode1->hskew == mode2->hskew &&
-	    mode1->vdisplay == mode2->vdisplay &&
-	    mode1->vsync_start == mode2->vsync_start &&
-	    mode1->vsync_end == mode2->vsync_end &&
-	    mode1->vtotal == mode2->vtotal &&
-	    mode1->vscan == mode2->vscan &&
-	    (mode1->flags & ~DRM_MODE_FLAG_3D_MASK) ==
-	     (mode2->flags & ~DRM_MODE_FLAG_3D_MASK))
-		return true;
-
-	return false;
+	return drm_mode_match(mode1, mode2,
+			      DRM_MODE_MATCH_TIMINGS |
+			      DRM_MODE_MATCH_FLAGS);
 }
 EXPORT_SYMBOL(drm_mode_equal_no_clocks_no_stereo);
 
@@ -1575,6 +1648,26 @@
 	out->vrefresh = in->vrefresh;
 	out->flags = in->flags;
 	out->type = in->type;
+
+	switch (in->picture_aspect_ratio) {
+	case HDMI_PICTURE_ASPECT_4_3:
+		out->flags |= DRM_MODE_FLAG_PIC_AR_4_3;
+		break;
+	case HDMI_PICTURE_ASPECT_16_9:
+		out->flags |= DRM_MODE_FLAG_PIC_AR_16_9;
+		break;
+	case HDMI_PICTURE_ASPECT_64_27:
+		out->flags |= DRM_MODE_FLAG_PIC_AR_64_27;
+		break;
+	case HDMI_PICTURE_ASPECT_256_135:
+		out->flags |= DRM_MODE_FLAG_PIC_AR_256_135;
+		break;
+	case HDMI_PICTURE_ASPECT_RESERVED:
+	default:
+		out->flags |= DRM_MODE_FLAG_PIC_AR_NONE;
+		break;
+	}
+
 	strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN);
 	out->name[DRM_DISPLAY_MODE_LEN-1] = 0;
 }
@@ -1621,6 +1714,30 @@
 	strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN);
 	out->name[DRM_DISPLAY_MODE_LEN-1] = 0;
 
+	/* Clearing picture aspect ratio bits from out flags,
+	 * as the aspect-ratio information is not stored in
+	 * flags for kernel-mode, but in picture_aspect_ratio.
+	 */
+	out->flags &= ~DRM_MODE_FLAG_PIC_AR_MASK;
+
+	switch (in->flags & DRM_MODE_FLAG_PIC_AR_MASK) {
+	case DRM_MODE_FLAG_PIC_AR_4_3:
+		out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_4_3;
+		break;
+	case DRM_MODE_FLAG_PIC_AR_16_9:
+		out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_16_9;
+		break;
+	case DRM_MODE_FLAG_PIC_AR_64_27:
+		out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_64_27;
+		break;
+	case DRM_MODE_FLAG_PIC_AR_256_135:
+		out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_256_135;
+		break;
+	default:
+		out->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
+		break;
+	}
+
 	out->status = drm_mode_validate_driver(dev, out);
 	if (out->status != MODE_OK)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 902cc1a..fe9c6c7 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -60,7 +60,7 @@
 	.orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
 };
 
-static const struct drm_dmi_panel_orientation_data vios_lth17 = {
+static const struct drm_dmi_panel_orientation_data lcd800x1280_rightside_up = {
 	.width = 800,
 	.height = 1280,
 	.orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
@@ -102,12 +102,30 @@
 		  DMI_EXACT_MATCH(DMI_BOARD_NAME, "TW891"),
 		},
 		.driver_data = (void *)&itworks_tw891,
+	}, {	/*
+		 * Lenovo Ideapad Miix 310 laptop, only some production batches
+		 * have a portrait screen, the resolution checks makes the quirk
+		 * apply only to those batches.
+		 */
+		.matches = {
+		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "80SG"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "MIIX 310-10ICR"),
+		},
+		.driver_data = (void *)&lcd800x1280_rightside_up,
+	}, {	/* Lenovo Ideapad Miix 320 */
+		.matches = {
+		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "80XF"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"),
+		},
+		.driver_data = (void *)&lcd800x1280_rightside_up,
 	}, {	/* VIOS LTH17 */
 		.matches = {
 		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"),
 		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "LTH17"),
 		},
-		.driver_data = (void *)&vios_lth17,
+		.driver_data = (void *)&lcd800x1280_rightside_up,
 	},
 	{}
 };
@@ -154,10 +172,9 @@
 		if (!bios_date)
 			continue;
 
-		for (i = 0; data->bios_dates[i]; i++) {
-			if (!strcmp(data->bios_dates[i], bios_date))
-				return data->orientation;
-		}
+		i = match_string(data->bios_dates, -1, bios_date);
+		if (i >= 0)
+			return data->orientation;
 	}
 
 	return DRM_MODE_PANEL_ORIENTATION_UNKNOWN;
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 6d2a6e4..0350544 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -756,6 +756,7 @@
 				     struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_device *dev = crtc->dev;
+	struct drm_plane *plane = crtc->cursor;
 	struct drm_framebuffer *fb = NULL;
 	struct drm_mode_fb_cmd2 fbreq = {
 		.width = req->width,
@@ -769,8 +770,8 @@
 	uint32_t src_w = 0, src_h = 0;
 	int ret = 0;
 
-	BUG_ON(!crtc->cursor);
-	WARN_ON(crtc->cursor->crtc != crtc && crtc->cursor->crtc != NULL);
+	BUG_ON(!plane);
+	WARN_ON(plane->crtc != crtc && plane->crtc != NULL);
 
 	/*
 	 * Obtain fb we'll be using (either new or existing) and take an extra
@@ -784,13 +785,18 @@
 				DRM_DEBUG_KMS("failed to wrap cursor buffer in drm framebuffer\n");
 				return PTR_ERR(fb);
 			}
+
 			fb->hot_x = req->hot_x;
 			fb->hot_y = req->hot_y;
 		} else {
 			fb = NULL;
 		}
 	} else {
-		fb = crtc->cursor->fb;
+		if (plane->state)
+			fb = plane->state->fb;
+		else
+			fb = plane->fb;
+
 		if (fb)
 			drm_framebuffer_get(fb);
 	}
@@ -810,7 +816,7 @@
 		src_h = fb->height << 16;
 	}
 
-	ret = __setplane_internal(crtc->cursor, crtc, fb,
+	ret = __setplane_internal(plane, crtc, fb,
 				  crtc_x, crtc_y, crtc_w, crtc_h,
 				  0, 0, src_w, src_h, ctx);
 
@@ -931,7 +937,8 @@
 {
 	struct drm_mode_crtc_page_flip_target *page_flip = data;
 	struct drm_crtc *crtc;
-	struct drm_framebuffer *fb = NULL;
+	struct drm_plane *plane;
+	struct drm_framebuffer *fb = NULL, *old_fb;
 	struct drm_pending_vblank_event *e = NULL;
 	u32 target_vblank = page_flip->sequence;
 	struct drm_modeset_acquire_ctx ctx;
@@ -959,6 +966,8 @@
 	if (!crtc)
 		return -ENOENT;
 
+	plane = crtc->primary;
+
 	if (crtc->funcs->page_flip_target) {
 		u32 current_vblank;
 		int r;
@@ -1003,11 +1012,16 @@
 	ret = drm_modeset_lock(&crtc->mutex, &ctx);
 	if (ret)
 		goto out;
-	ret = drm_modeset_lock(&crtc->primary->mutex, &ctx);
+	ret = drm_modeset_lock(&plane->mutex, &ctx);
 	if (ret)
 		goto out;
 
-	if (crtc->primary->fb == NULL) {
+	if (plane->state)
+		old_fb = plane->state->fb;
+	else
+		old_fb = plane->fb;
+
+	if (old_fb == NULL) {
 		/* The framebuffer is currently unbound, presumably
 		 * due to a hotplug event, that userspace has not
 		 * yet discovered.
@@ -1022,8 +1036,8 @@
 		goto out;
 	}
 
-	if (crtc->state) {
-		const struct drm_plane_state *state = crtc->primary->state;
+	if (plane->state) {
+		const struct drm_plane_state *state = plane->state;
 
 		ret = drm_framebuffer_check_src_coords(state->src_x,
 						       state->src_y,
@@ -1031,12 +1045,13 @@
 						       state->src_h,
 						       fb);
 	} else {
-		ret = drm_crtc_check_viewport(crtc, crtc->x, crtc->y, &crtc->mode, fb);
+		ret = drm_crtc_check_viewport(crtc, crtc->x, crtc->y,
+					      &crtc->mode, fb);
 	}
 	if (ret)
 		goto out;
 
-	if (crtc->primary->fb->format != fb->format) {
+	if (old_fb->format != fb->format) {
 		DRM_DEBUG_KMS("Page flip is not allowed to change frame buffer format.\n");
 		ret = -EINVAL;
 		goto out;
@@ -1048,10 +1063,12 @@
 			ret = -ENOMEM;
 			goto out;
 		}
+
 		e->event.base.type = DRM_EVENT_FLIP_COMPLETE;
 		e->event.base.length = sizeof(e->event);
 		e->event.vbl.user_data = page_flip->user_data;
 		e->event.vbl.crtc_id = crtc->base.id;
+
 		ret = drm_event_reserve_init(dev, file_priv, &e->base, &e->event.base);
 		if (ret) {
 			kfree(e);
@@ -1060,7 +1077,7 @@
 		}
 	}
 
-	crtc->primary->old_fb = crtc->primary->fb;
+	plane->old_fb = plane->fb;
 	if (crtc->funcs->page_flip_target)
 		ret = crtc->funcs->page_flip_target(crtc, fb, e,
 						    page_flip->flags,
@@ -1073,19 +1090,18 @@
 		if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT)
 			drm_event_cancel_free(dev, &e->base);
 		/* Keep the old fb, don't unref it. */
-		crtc->primary->old_fb = NULL;
+		plane->old_fb = NULL;
 	} else {
-		crtc->primary->fb = fb;
-		/* Unref only the old framebuffer. */
-		fb = NULL;
+		plane->fb = fb;
+		drm_framebuffer_get(fb);
 	}
 
 out:
 	if (fb)
 		drm_framebuffer_put(fb);
-	if (crtc->primary->old_fb)
-		drm_framebuffer_put(crtc->primary->old_fb);
-	crtc->primary->old_fb = NULL;
+	if (plane->old_fb)
+		drm_framebuffer_put(plane->old_fb);
+	plane->old_fb = NULL;
 
 	if (ret == -EDEADLK) {
 		ret = drm_modeset_backoff(&ctx);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 7856a9b..397b46b 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -331,6 +331,9 @@
 
 /**
  * drm_gem_unmap_dma_buf - unmap_dma_buf implementation for GEM
+ * @attach: attachment to unmap buffer from
+ * @sgt: scatterlist info of the buffer to unmap
+ * @dir: direction of DMA transfer
  *
  * Not implemented. The unmap is done at drm_gem_map_detach().  This can be
  * used as the &dma_buf_ops.unmap_dma_buf callback.
@@ -406,7 +409,10 @@
 	struct drm_gem_object *obj = dma_buf->priv;
 	struct drm_device *dev = obj->dev;
 
-	return dev->driver->gem_prime_vmap(obj);
+	if (dev->driver->gem_prime_vmap)
+		return dev->driver->gem_prime_vmap(obj);
+	else
+		return NULL;
 }
 EXPORT_SYMBOL(drm_gem_dmabuf_vmap);
 
@@ -423,12 +429,15 @@
 	struct drm_gem_object *obj = dma_buf->priv;
 	struct drm_device *dev = obj->dev;
 
-	dev->driver->gem_prime_vunmap(obj, vaddr);
+	if (dev->driver->gem_prime_vunmap)
+		dev->driver->gem_prime_vunmap(obj, vaddr);
 }
 EXPORT_SYMBOL(drm_gem_dmabuf_vunmap);
 
 /**
  * drm_gem_dmabuf_kmap_atomic - map_atomic implementation for GEM
+ * @dma_buf: buffer to be mapped
+ * @page_num: page number within the buffer
  *
  * Not implemented. This can be used as the &dma_buf_ops.map_atomic callback.
  */
@@ -441,6 +450,9 @@
 
 /**
  * drm_gem_dmabuf_kunmap_atomic - unmap_atomic implementation for GEM
+ * @dma_buf: buffer to be unmapped
+ * @page_num: page number within the buffer
+ * @addr: virtual address of the buffer
  *
  * Not implemented. This can be used as the &dma_buf_ops.unmap_atomic callback.
  */
@@ -453,6 +465,8 @@
 
 /**
  * drm_gem_dmabuf_kmap - map implementation for GEM
+ * @dma_buf: buffer to be mapped
+ * @page_num: page number within the buffer
  *
  * Not implemented. This can be used as the &dma_buf_ops.map callback.
  */
@@ -464,6 +478,9 @@
 
 /**
  * drm_gem_dmabuf_kunmap - unmap implementation for GEM
+ * @dma_buf: buffer to be unmapped
+ * @page_num: page number within the buffer
+ * @addr: virtual address of the buffer
  *
  * Not implemented. This can be used as the &dma_buf_ops.unmap callback.
  */
diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index 8f4672d..1f8031e 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -169,9 +169,9 @@
 		return NULL;
 
 	for (i = 0; i < num_values; i++) {
-		ret = drm_property_add_enum(property, i,
-				      props[i].type,
-				      props[i].name);
+		ret = drm_property_add_enum(property,
+					    props[i].type,
+					    props[i].name);
 		if (ret) {
 			drm_property_destroy(dev, property);
 			return NULL;
@@ -209,7 +209,7 @@
 						 uint64_t supported_bits)
 {
 	struct drm_property *property;
-	int i, ret, index = 0;
+	int i, ret;
 	int num_values = hweight64(supported_bits);
 
 	flags |= DRM_MODE_PROP_BITMASK;
@@ -221,14 +221,9 @@
 		if (!(supported_bits & (1ULL << props[i].type)))
 			continue;
 
-		if (WARN_ON(index >= num_values)) {
-			drm_property_destroy(dev, property);
-			return NULL;
-		}
-
-		ret = drm_property_add_enum(property, index++,
-				      props[i].type,
-				      props[i].name);
+		ret = drm_property_add_enum(property,
+					    props[i].type,
+					    props[i].name);
 		if (ret) {
 			drm_property_destroy(dev, property);
 			return NULL;
@@ -376,7 +371,6 @@
 /**
  * drm_property_add_enum - add a possible value to an enumeration property
  * @property: enumeration property to change
- * @index: index of the new enumeration
  * @value: value of the new enumeration
  * @name: symbolic name of the new enumeration
  *
@@ -388,10 +382,11 @@
  * Returns:
  * Zero on success, error code on failure.
  */
-int drm_property_add_enum(struct drm_property *property, int index,
+int drm_property_add_enum(struct drm_property *property,
 			  uint64_t value, const char *name)
 {
 	struct drm_property_enum *prop_enum;
+	int index = 0;
 
 	if (WARN_ON(strlen(name) >= DRM_PROP_NAME_LEN))
 		return -EINVAL;
@@ -411,8 +406,12 @@
 	list_for_each_entry(prop_enum, &property->enum_list, head) {
 		if (WARN_ON(prop_enum->value == value))
 			return -EINVAL;
+		index++;
 	}
 
+	if (WARN_ON(index >= property->num_values))
+		return -EINVAL;
+
 	prop_enum = kzalloc(sizeof(struct drm_property_enum), GFP_KERNEL);
 	if (!prop_enum)
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c
index 9817c14..8c05782 100644
--- a/drivers/gpu/drm/drm_rect.c
+++ b/drivers/gpu/drm/drm_rect.c
@@ -50,13 +50,25 @@
 }
 EXPORT_SYMBOL(drm_rect_intersect);
 
+static u32 clip_scaled(u32 src, u32 dst, u32 clip)
+{
+	u64 tmp = mul_u32_u32(src, dst - clip);
+
+	/*
+	 * Round toward 1.0 when clipping so that we don't accidentally
+	 * change upscaling to downscaling or vice versa.
+	 */
+	if (src < (dst << 16))
+		return DIV_ROUND_UP_ULL(tmp, dst);
+	else
+		return DIV_ROUND_DOWN_ULL(tmp, dst);
+}
+
 /**
  * drm_rect_clip_scaled - perform a scaled clip operation
  * @src: source window rectangle
  * @dst: destination window rectangle
  * @clip: clip rectangle
- * @hscale: horizontal scaling factor
- * @vscale: vertical scaling factor
  *
  * Clip rectangle @dst by rectangle @clip. Clip rectangle @src by the
  * same amounts multiplied by @hscale and @vscale.
@@ -66,33 +78,44 @@
  * %false otherwise
  */
 bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst,
-			  const struct drm_rect *clip,
-			  int hscale, int vscale)
+			  const struct drm_rect *clip)
 {
 	int diff;
 
 	diff = clip->x1 - dst->x1;
 	if (diff > 0) {
-		int64_t tmp = src->x1 + (int64_t) diff * hscale;
-		src->x1 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX);
+		u32 new_src_w = clip_scaled(drm_rect_width(src),
+					    drm_rect_width(dst), diff);
+
+		src->x1 = clamp_t(int64_t, src->x2 - new_src_w, INT_MIN, INT_MAX);
+		dst->x1 = clip->x1;
 	}
 	diff = clip->y1 - dst->y1;
 	if (diff > 0) {
-		int64_t tmp = src->y1 + (int64_t) diff * vscale;
-		src->y1 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX);
+		u32 new_src_h = clip_scaled(drm_rect_height(src),
+					    drm_rect_height(dst), diff);
+
+		src->y1 = clamp_t(int64_t, src->y2 - new_src_h, INT_MIN, INT_MAX);
+		dst->y1 = clip->y1;
 	}
 	diff = dst->x2 - clip->x2;
 	if (diff > 0) {
-		int64_t tmp = src->x2 - (int64_t) diff * hscale;
-		src->x2 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX);
+		u32 new_src_w = clip_scaled(drm_rect_width(src),
+					    drm_rect_width(dst), diff);
+
+		src->x2 = clamp_t(int64_t, src->x1 + new_src_w, INT_MIN, INT_MAX);
+		dst->x2 = clip->x2;
 	}
 	diff = dst->y2 - clip->y2;
 	if (diff > 0) {
-		int64_t tmp = src->y2 - (int64_t) diff * vscale;
-		src->y2 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX);
+		u32 new_src_h = clip_scaled(drm_rect_height(src),
+					    drm_rect_height(dst), diff);
+
+		src->y2 = clamp_t(int64_t, src->y1 + new_src_h, INT_MIN, INT_MAX);
+		dst->y2 = clip->y2;
 	}
 
-	return drm_rect_intersect(dst, clip);
+	return drm_rect_visible(dst);
 }
 EXPORT_SYMBOL(drm_rect_clip_scaled);
 
@@ -106,7 +129,10 @@
 	if (dst == 0)
 		return 0;
 
-	scale = src / dst;
+	if (src > (dst << 16))
+		return DIV_ROUND_UP(src, dst);
+	else
+		scale = src / dst;
 
 	return scale;
 }
@@ -121,6 +147,10 @@
  * Calculate the horizontal scaling factor as
  * (@src width) / (@dst width).
  *
+ * If the scale is below 1 << 16, round down. If the scale is above
+ * 1 << 16, round up. This will calculate the scale with the most
+ * pessimistic limit calculation.
+ *
  * RETURNS:
  * The horizontal scaling factor, or errno of out of limits.
  */
@@ -152,6 +182,10 @@
  * Calculate the vertical scaling factor as
  * (@src height) / (@dst height).
  *
+ * If the scale is below 1 << 16, round down. If the scale is above
+ * 1 << 16, round up. This will calculate the scale with the most
+ * pessimistic limit calculation.
+ *
  * RETURNS:
  * The vertical scaling factor, or errno of out of limits.
  */
@@ -189,6 +223,10 @@
  * If the calculated scaling factor is above @max_vscale,
  * decrease the height of rectangle @src to compensate.
  *
+ * If the scale is below 1 << 16, round down. If the scale is above
+ * 1 << 16, round up. This will calculate the scale with the most
+ * pessimistic limit calculation.
+ *
  * RETURNS:
  * The horizontal scaling factor.
  */
@@ -239,6 +277,10 @@
  * If the calculated scaling factor is above @max_vscale,
  * decrease the height of rectangle @src to compensate.
  *
+ * If the scale is below 1 << 16, round down. If the scale is above
+ * 1 << 16, round up. This will calculate the scale with the most
+ * pessimistic limit calculation.
+ *
  * RETURNS:
  * The vertical scaling factor.
  */
@@ -373,8 +415,8 @@
  * them when doing a rotatation and its inverse.
  * That is, if you do ::
  *
- *     DRM_MODE_PROP_ROTATE(&r, width, height, rotation);
- *     DRM_MODE_ROTATE_inv(&r, width, height, rotation);
+ *     drm_rect_rotate(&r, width, height, rotation);
+ *     drm_rect_rotate_inv(&r, width, height, rotation);
  *
  * you will always get back the original rectangle.
  */
diff --git a/drivers/gpu/drm/drm_scdc_helper.c b/drivers/gpu/drm/drm_scdc_helper.c
index 657ea5a..870e25f 100644
--- a/drivers/gpu/drm/drm_scdc_helper.c
+++ b/drivers/gpu/drm/drm_scdc_helper.c
@@ -141,7 +141,7 @@
 
 	ret = drm_scdc_readb(adapter, SCDC_SCRAMBLER_STATUS, &status);
 	if (ret < 0) {
-		DRM_ERROR("Failed to read scrambling status: %d\n", ret);
+		DRM_DEBUG_KMS("Failed to read scrambling status: %d\n", ret);
 		return false;
 	}
 
@@ -168,7 +168,7 @@
 
 	ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config);
 	if (ret < 0) {
-		DRM_ERROR("Failed to read TMDS config: %d\n", ret);
+		DRM_DEBUG_KMS("Failed to read TMDS config: %d\n", ret);
 		return false;
 	}
 
@@ -179,7 +179,7 @@
 
 	ret = drm_scdc_writeb(adapter, SCDC_TMDS_CONFIG, config);
 	if (ret < 0) {
-		DRM_ERROR("Failed to enable scrambling: %d\n", ret);
+		DRM_DEBUG_KMS("Failed to enable scrambling: %d\n", ret);
 		return false;
 	}
 
@@ -223,7 +223,7 @@
 
 	ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config);
 	if (ret < 0) {
-		DRM_ERROR("Failed to read TMDS config: %d\n", ret);
+		DRM_DEBUG_KMS("Failed to read TMDS config: %d\n", ret);
 		return false;
 	}
 
@@ -234,7 +234,7 @@
 
 	ret = drm_scdc_writeb(adapter, SCDC_TMDS_CONFIG, config);
 	if (ret < 0) {
-		DRM_ERROR("Failed to set TMDS clock ratio: %d\n", ret);
+		DRM_DEBUG_KMS("Failed to set TMDS clock ratio: %d\n", ret);
 		return false;
 	}
 
diff --git a/drivers/gpu/drm/drm_simple_kms_helper.c b/drivers/gpu/drm/drm_simple_kms_helper.c
index 987a353..7a00455 100644
--- a/drivers/gpu/drm/drm_simple_kms_helper.c
+++ b/drivers/gpu/drm/drm_simple_kms_helper.c
@@ -64,13 +64,15 @@
 static void drm_simple_kms_crtc_enable(struct drm_crtc *crtc,
 				       struct drm_crtc_state *old_state)
 {
+	struct drm_plane *plane;
 	struct drm_simple_display_pipe *pipe;
 
 	pipe = container_of(crtc, struct drm_simple_display_pipe, crtc);
 	if (!pipe->funcs || !pipe->funcs->enable)
 		return;
 
-	pipe->funcs->enable(pipe, crtc->state);
+	plane = &pipe->plane;
+	pipe->funcs->enable(pipe, crtc->state, plane->state);
 }
 
 static void drm_simple_kms_crtc_disable(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 1c5b5ce..b3c1daa 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -331,9 +331,7 @@
 	struct device *kdev;
 	int r;
 
-	if (minor->type == DRM_MINOR_CONTROL)
-		minor_str = "controlD%d";
-	else if (minor->type == DRM_MINOR_RENDER)
+	if (minor->type == DRM_MINOR_RENDER)
 		minor_str = "renderD%d";
 	else
 		minor_str = "card%d";
diff --git a/drivers/gpu/drm/etnaviv/Kconfig b/drivers/gpu/drm/etnaviv/Kconfig
index e5bfeca..041a77e 100644
--- a/drivers/gpu/drm/etnaviv/Kconfig
+++ b/drivers/gpu/drm/etnaviv/Kconfig
@@ -22,11 +22,3 @@
 	help
 	  Compile in support for thermal throttling.
 	  Say Y unless you want to risk burning your SoC.
-
-config DRM_ETNAVIV_REGISTER_LOGGING
-	bool "enable ETNAVIV register logging"
-	depends on DRM_ETNAVIV
-	help
-	  Compile in support for logging register reads/writes in a format
-	  that can be parsed by envytools demsm tool.  If enabled, register
-	  logging can be switched on via etnaviv.reglog=y module param.
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
index bfc6d4a..7fea748 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2014 Etnaviv Project
- * Author: Christian Gmeiner <christian.gmeiner@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2014-2018 Etnaviv Project
  */
 
 #include "etnaviv_cmdbuf.h"
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
index 68e6d37..b106e8b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2015 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2015-2018 Etnaviv Project
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
index 3746827..a3c44f1 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2017 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2017-2018 Etnaviv Project
  */
 
 #include <drm/drm_mm.h>
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
index ddc3f7e..acb68c6 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2017 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __ETNAVIV_CMDBUF_H__
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index ab50090..e5013a9 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2015 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2015-2018 Etnaviv Project
  */
 
 #include <linux/component.h>
@@ -25,57 +14,6 @@
 #include "etnaviv_mmu.h"
 #include "etnaviv_perfmon.h"
 
-#ifdef CONFIG_DRM_ETNAVIV_REGISTER_LOGGING
-static bool reglog;
-MODULE_PARM_DESC(reglog, "Enable register read/write logging");
-module_param(reglog, bool, 0600);
-#else
-#define reglog 0
-#endif
-
-void __iomem *etnaviv_ioremap(struct platform_device *pdev, const char *name,
-		const char *dbgname)
-{
-	struct resource *res;
-	void __iomem *ptr;
-
-	if (name)
-		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
-	else
-		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-	ptr = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(ptr)) {
-		dev_err(&pdev->dev, "failed to ioremap %s: %ld\n", name,
-			PTR_ERR(ptr));
-		return ptr;
-	}
-
-	if (reglog)
-		dev_printk(KERN_DEBUG, &pdev->dev, "IO:region %s 0x%p %08zx\n",
-			   dbgname, ptr, (size_t)resource_size(res));
-
-	return ptr;
-}
-
-void etnaviv_writel(u32 data, void __iomem *addr)
-{
-	if (reglog)
-		printk(KERN_DEBUG "IO:W %p %08x\n", addr, data);
-
-	writel(data, addr);
-}
-
-u32 etnaviv_readl(const void __iomem *addr)
-{
-	u32 val = readl(addr);
-
-	if (reglog)
-		printk(KERN_DEBUG "IO:R %p %08x\n", addr, val);
-
-	return val;
-}
-
 /*
  * DRM operations:
  */
@@ -116,7 +54,7 @@
 			drm_sched_entity_init(&gpu->sched,
 				&ctx->sched_entity[i],
 				&gpu->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL],
-				32, NULL);
+				NULL);
 			}
 	}
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index ddb17ee5..d36c7bb 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Copyright (C) 2015 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2015-2018 Etnaviv Project
  */
 
 #ifndef __ETNAVIV_DRV_H__
@@ -26,6 +15,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/list.h>
+#include <linux/time64.h>
 #include <linux/types.h>
 #include <linux/sizes.h>
 
@@ -101,11 +91,6 @@
 	struct seq_file *m);
 #endif
 
-void __iomem *etnaviv_ioremap(struct platform_device *pdev, const char *name,
-		const char *dbgname);
-void etnaviv_writel(u32 data, void __iomem *addr);
-u32 etnaviv_readl(const void __iomem *addr);
-
 #define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
 #define VERB(fmt, ...) if (0) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
 
@@ -132,19 +117,27 @@
 	return (s32)(a - b) >= 0;
 }
 
+/*
+ * Etnaviv timeouts are specified wrt CLOCK_MONOTONIC, not jiffies.
+ * We need to calculate the timeout in terms of number of jiffies
+ * between the specified timeout and the current CLOCK_MONOTONIC time.
+ */
 static inline unsigned long etnaviv_timeout_to_jiffies(
 	const struct timespec *timeout)
 {
-	unsigned long timeout_jiffies = timespec_to_jiffies(timeout);
-	unsigned long start_jiffies = jiffies;
-	unsigned long remaining_jiffies;
+	struct timespec64 ts, to;
 
-	if (time_after(start_jiffies, timeout_jiffies))
-		remaining_jiffies = 0;
-	else
-		remaining_jiffies = timeout_jiffies - start_jiffies;
+	to = timespec_to_timespec64(*timeout);
 
-	return remaining_jiffies;
+	ktime_get_ts64(&ts);
+
+	/* timeouts before "now" have already expired */
+	if (timespec64_compare(&to, &ts) <= 0)
+		return 0;
+
+	ts = timespec64_sub(to, ts);
+
+	return timespec64_to_jiffies(&ts);
 }
 
 #endif /* __ETNAVIV_DRV_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 48aef6c..9146e30 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2015 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2015-2018 Etnaviv Project
  */
 
 #include <linux/devcoredump.h>
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.h b/drivers/gpu/drm/etnaviv/etnaviv_dump.h
index 97f2f8d..2d916c2 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.h
@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2015 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * Etnaviv devcoredump file definitions
  */
+
 #ifndef ETNAVIV_DUMP_H
 #define ETNAVIV_DUMP_H
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index fcc969f..209ef12 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2015 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2015-2018 Etnaviv Project
  */
 
 #include <linux/spinlock.h>
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index 93e696f..76079c22 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Copyright (C) 2015 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2015-2018 Etnaviv Project
  */
 
 #ifndef __ETNAVIV_GEM_H__
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index 5704305..0566171 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2013 Red Hat
- * Author: Rob Clark <robdclark@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2014-2018 Etnaviv Project
  */
 
 #include <linux/dma-buf.h>
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 8a88799..686f655 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2015 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2015-2018 Etnaviv Project
  */
 
 #include <linux/component.h>
@@ -1735,6 +1724,7 @@
 {
 	struct device *dev = &pdev->dev;
 	struct etnaviv_gpu *gpu;
+	struct resource *res;
 	int err;
 
 	gpu = devm_kzalloc(dev, sizeof(*gpu), GFP_KERNEL);
@@ -1746,7 +1736,8 @@
 	mutex_init(&gpu->fence_idr_lock);
 
 	/* Map registers: */
-	gpu->mmio = etnaviv_ioremap(pdev, NULL, dev_name(gpu->dev));
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	gpu->mmio = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(gpu->mmio))
 		return PTR_ERR(gpu->mmio);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 3c30055..dd430f0 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Copyright (C) 2015 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2015-2018 Etnaviv Project
  */
 
 #ifndef __ETNAVIV_GPU_H__
@@ -161,12 +150,12 @@
 
 static inline void gpu_write(struct etnaviv_gpu *gpu, u32 reg, u32 data)
 {
-	etnaviv_writel(data, gpu->mmio + reg);
+	writel(data, gpu->mmio + reg);
 }
 
 static inline u32 gpu_read(struct etnaviv_gpu *gpu, u32 reg)
 {
-	return etnaviv_readl(gpu->mmio + reg);
+	return readl(gpu->mmio + reg);
 }
 
 static inline bool fence_completed(struct etnaviv_gpu *gpu, u32 fence)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
index ea08bb3..39b463d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2018 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "etnaviv_gpu.h"
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
index 4b9b11c..b163bdb 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2014 Christian Gmeiner <christian.gmeiner@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2014-2018 Etnaviv Project
  */
 
 #include <linux/platform_device.h>
@@ -47,11 +36,10 @@
 	u32 *p;
 	int i;
 
-	etnaviv_domain->base.bad_page_cpu = dma_alloc_coherent(
-						etnaviv_domain->base.dev,
-						SZ_4K,
-						&etnaviv_domain->base.bad_page_dma,
-						GFP_KERNEL);
+	etnaviv_domain->base.bad_page_cpu =
+			dma_alloc_wc(etnaviv_domain->base.dev, SZ_4K,
+				     &etnaviv_domain->base.bad_page_dma,
+				     GFP_KERNEL);
 	if (!etnaviv_domain->base.bad_page_cpu)
 		return -ENOMEM;
 
@@ -59,14 +47,14 @@
 	for (i = 0; i < SZ_4K / 4; i++)
 		*p++ = 0xdead55aa;
 
-	etnaviv_domain->pgtable_cpu =
-			dma_alloc_coherent(etnaviv_domain->base.dev, PT_SIZE,
-					   &etnaviv_domain->pgtable_dma,
-					   GFP_KERNEL);
+	etnaviv_domain->pgtable_cpu = dma_alloc_wc(etnaviv_domain->base.dev,
+						   PT_SIZE,
+						   &etnaviv_domain->pgtable_dma,
+						   GFP_KERNEL);
 	if (!etnaviv_domain->pgtable_cpu) {
-		dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
-				  etnaviv_domain->base.bad_page_cpu,
-				  etnaviv_domain->base.bad_page_dma);
+		dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
+			    etnaviv_domain->base.bad_page_cpu,
+			    etnaviv_domain->base.bad_page_dma);
 		return -ENOMEM;
 	}
 
@@ -81,13 +69,12 @@
 	struct etnaviv_iommuv1_domain *etnaviv_domain =
 			to_etnaviv_domain(domain);
 
-	dma_free_coherent(etnaviv_domain->base.dev, PT_SIZE,
-			  etnaviv_domain->pgtable_cpu,
-			  etnaviv_domain->pgtable_dma);
+	dma_free_wc(etnaviv_domain->base.dev, PT_SIZE,
+		    etnaviv_domain->pgtable_cpu, etnaviv_domain->pgtable_dma);
 
-	dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
-			  etnaviv_domain->base.bad_page_cpu,
-			  etnaviv_domain->base.bad_page_dma);
+	dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
+		    etnaviv_domain->base.bad_page_cpu,
+		    etnaviv_domain->base.bad_page_dma);
 
 	kfree(etnaviv_domain);
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.h b/drivers/gpu/drm/etnaviv/etnaviv_iommu.h
index 01d59bf..b279404 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Copyright (C) 2014 Christian Gmeiner <christian.gmeiner@gmail.com>
-  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2014-2018 Etnaviv Project
  */
 
 #ifndef __ETNAVIV_IOMMU_H__
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
index 9752dbd..71fbc1f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2016 Etnaviv Project
-  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2016-2018 Etnaviv Project
  */
 
 #include <linux/platform_device.h>
@@ -47,8 +36,8 @@
 	u32 *mtlb_cpu;
 	dma_addr_t mtlb_dma;
 	/* S(lave) TLB aka second level pagetable */
-	u32 *stlb_cpu[1024];
-	dma_addr_t stlb_dma[1024];
+	u32 *stlb_cpu[MMUv2_MAX_STLB_ENTRIES];
+	dma_addr_t stlb_dma[MMUv2_MAX_STLB_ENTRIES];
 };
 
 static struct etnaviv_iommuv2_domain *
@@ -57,24 +46,54 @@
 	return container_of(domain, struct etnaviv_iommuv2_domain, base);
 }
 
+static int
+etnaviv_iommuv2_ensure_stlb(struct etnaviv_iommuv2_domain *etnaviv_domain,
+			    int stlb)
+{
+	if (etnaviv_domain->stlb_cpu[stlb])
+		return 0;
+
+	etnaviv_domain->stlb_cpu[stlb] =
+			dma_alloc_wc(etnaviv_domain->base.dev, SZ_4K,
+				     &etnaviv_domain->stlb_dma[stlb],
+				     GFP_KERNEL);
+
+	if (!etnaviv_domain->stlb_cpu[stlb])
+		return -ENOMEM;
+
+	memset32(etnaviv_domain->stlb_cpu[stlb], MMUv2_PTE_EXCEPTION,
+		 SZ_4K / sizeof(u32));
+
+	etnaviv_domain->mtlb_cpu[stlb] = etnaviv_domain->stlb_dma[stlb] |
+						      MMUv2_PTE_PRESENT;
+	return 0;
+}
+
 static int etnaviv_iommuv2_map(struct etnaviv_iommu_domain *domain,
 			       unsigned long iova, phys_addr_t paddr,
 			       size_t size, int prot)
 {
 	struct etnaviv_iommuv2_domain *etnaviv_domain =
 			to_etnaviv_domain(domain);
-	int mtlb_entry, stlb_entry;
-	u32 entry = (u32)paddr | MMUv2_PTE_PRESENT;
+	int mtlb_entry, stlb_entry, ret;
+	u32 entry = lower_32_bits(paddr) | MMUv2_PTE_PRESENT;
 
 	if (size != SZ_4K)
 		return -EINVAL;
 
+	if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
+		entry |= (upper_32_bits(paddr) & 0xff) << 4;
+
 	if (prot & ETNAVIV_PROT_WRITE)
 		entry |= MMUv2_PTE_WRITEABLE;
 
 	mtlb_entry = (iova & MMUv2_MTLB_MASK) >> MMUv2_MTLB_SHIFT;
 	stlb_entry = (iova & MMUv2_STLB_MASK) >> MMUv2_STLB_SHIFT;
 
+	ret = etnaviv_iommuv2_ensure_stlb(etnaviv_domain, mtlb_entry);
+	if (ret)
+		return ret;
+
 	etnaviv_domain->stlb_cpu[mtlb_entry][stlb_entry] = entry;
 
 	return 0;
@@ -101,14 +120,13 @@
 static int etnaviv_iommuv2_init(struct etnaviv_iommuv2_domain *etnaviv_domain)
 {
 	u32 *p;
-	int ret, i, j;
+	int ret, i;
 
 	/* allocate scratch page */
-	etnaviv_domain->base.bad_page_cpu = dma_alloc_coherent(
-						etnaviv_domain->base.dev,
-						SZ_4K,
-						&etnaviv_domain->base.bad_page_dma,
-						GFP_KERNEL);
+	etnaviv_domain->base.bad_page_cpu =
+			dma_alloc_wc(etnaviv_domain->base.dev, SZ_4K,
+				     &etnaviv_domain->base.bad_page_dma,
+				     GFP_KERNEL);
 	if (!etnaviv_domain->base.bad_page_cpu) {
 		ret = -ENOMEM;
 		goto fail_mem;
@@ -117,67 +135,40 @@
 	for (i = 0; i < SZ_4K / 4; i++)
 		*p++ = 0xdead55aa;
 
-	etnaviv_domain->pta_cpu = dma_alloc_coherent(etnaviv_domain->base.dev,
-						     SZ_4K,
-						     &etnaviv_domain->pta_dma,
-						     GFP_KERNEL);
+	etnaviv_domain->pta_cpu = dma_alloc_wc(etnaviv_domain->base.dev,
+					       SZ_4K, &etnaviv_domain->pta_dma,
+					       GFP_KERNEL);
 	if (!etnaviv_domain->pta_cpu) {
 		ret = -ENOMEM;
 		goto fail_mem;
 	}
 
-	etnaviv_domain->mtlb_cpu = dma_alloc_coherent(etnaviv_domain->base.dev,
-						  SZ_4K,
-						  &etnaviv_domain->mtlb_dma,
-						  GFP_KERNEL);
+	etnaviv_domain->mtlb_cpu = dma_alloc_wc(etnaviv_domain->base.dev,
+						SZ_4K, &etnaviv_domain->mtlb_dma,
+						GFP_KERNEL);
 	if (!etnaviv_domain->mtlb_cpu) {
 		ret = -ENOMEM;
 		goto fail_mem;
 	}
 
-	/* pre-populate STLB pages (may want to switch to on-demand later) */
-	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) {
-		etnaviv_domain->stlb_cpu[i] =
-				dma_alloc_coherent(etnaviv_domain->base.dev,
-						   SZ_4K,
-						   &etnaviv_domain->stlb_dma[i],
-						   GFP_KERNEL);
-		if (!etnaviv_domain->stlb_cpu[i]) {
-			ret = -ENOMEM;
-			goto fail_mem;
-		}
-		p = etnaviv_domain->stlb_cpu[i];
-		for (j = 0; j < SZ_4K / 4; j++)
-			*p++ = MMUv2_PTE_EXCEPTION;
-
-		etnaviv_domain->mtlb_cpu[i] = etnaviv_domain->stlb_dma[i] |
-					      MMUv2_PTE_PRESENT;
-	}
+	memset32(etnaviv_domain->mtlb_cpu, MMUv2_PTE_EXCEPTION,
+		 MMUv2_MAX_STLB_ENTRIES);
 
 	return 0;
 
 fail_mem:
 	if (etnaviv_domain->base.bad_page_cpu)
-		dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
-				  etnaviv_domain->base.bad_page_cpu,
-				  etnaviv_domain->base.bad_page_dma);
+		dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
+			    etnaviv_domain->base.bad_page_cpu,
+			    etnaviv_domain->base.bad_page_dma);
 
 	if (etnaviv_domain->pta_cpu)
-		dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
-				  etnaviv_domain->pta_cpu,
-				  etnaviv_domain->pta_dma);
+		dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
+			    etnaviv_domain->pta_cpu, etnaviv_domain->pta_dma);
 
 	if (etnaviv_domain->mtlb_cpu)
-		dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
-				  etnaviv_domain->mtlb_cpu,
-				  etnaviv_domain->mtlb_dma);
-
-	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) {
-		if (etnaviv_domain->stlb_cpu[i])
-			dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
-					  etnaviv_domain->stlb_cpu[i],
-					  etnaviv_domain->stlb_dma[i]);
-	}
+		dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
+			    etnaviv_domain->mtlb_cpu, etnaviv_domain->mtlb_dma);
 
 	return ret;
 }
@@ -188,23 +179,21 @@
 			to_etnaviv_domain(domain);
 	int i;
 
-	dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
-			  etnaviv_domain->base.bad_page_cpu,
-			  etnaviv_domain->base.bad_page_dma);
+	dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
+		    etnaviv_domain->base.bad_page_cpu,
+		    etnaviv_domain->base.bad_page_dma);
 
-	dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
-			  etnaviv_domain->pta_cpu,
-			  etnaviv_domain->pta_dma);
+	dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
+		    etnaviv_domain->pta_cpu, etnaviv_domain->pta_dma);
 
-	dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
-			  etnaviv_domain->mtlb_cpu,
-			  etnaviv_domain->mtlb_dma);
+	dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
+		    etnaviv_domain->mtlb_cpu, etnaviv_domain->mtlb_dma);
 
 	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) {
 		if (etnaviv_domain->stlb_cpu[i])
-			dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
-					  etnaviv_domain->stlb_cpu[i],
-					  etnaviv_domain->stlb_dma[i]);
+			dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
+				    etnaviv_domain->stlb_cpu[i],
+				    etnaviv_domain->stlb_dma[i]);
 	}
 
 	vfree(etnaviv_domain);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
index 49e0497..8069f9f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2015 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2015-2018 Etnaviv Project
  */
 
 #include "common.xml.h"
@@ -162,22 +151,10 @@
 		bool found;
 
 		ret = drm_mm_insert_node_in_range(&mmu->mm, node,
-						  size, 0, 0,
-						  mmu->last_iova, U64_MAX,
-						  mode);
+						  size, 0, 0, 0, U64_MAX, mode);
 		if (ret != -ENOSPC)
 			break;
 
-		/*
-		 * If we did not search from the start of the MMU region,
-		 * try again in case there are free slots.
-		 */
-		if (mmu->last_iova) {
-			mmu->last_iova = 0;
-			mmu->need_flush = true;
-			continue;
-		}
-
 		/* Try to retire some entries */
 		drm_mm_scan_init(&scan, &mmu->mm, size, 0, 0, mode);
 
@@ -274,7 +251,6 @@
 	if (ret < 0)
 		goto unlock;
 
-	mmu->last_iova = node->start + etnaviv_obj->base.size;
 	mapping->iova = node->start;
 	ret = etnaviv_iommu_map(mmu, node->start, sgt, etnaviv_obj->base.size,
 				ETNAVIV_PROT_READ | ETNAVIV_PROT_WRITE);
@@ -381,7 +357,6 @@
 			mutex_unlock(&mmu->lock);
 			return ret;
 		}
-		mmu->last_iova = vram_node->start + size;
 		gpu->mmu->need_flush = true;
 		mutex_unlock(&mmu->lock);
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
index ab603f5..a0db17f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Copyright (C) 2015 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (C) 2015-2018 Etnaviv Project
  */
 
 #ifndef __ETNAVIV_MMU_H__
@@ -59,7 +48,6 @@
 	struct mutex lock;
 	struct list_head mappings;
 	struct drm_mm mm;
-	u32 last_iova;
 	bool need_flush;
 };
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
index 26dddfc..9980d81 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2017 Etnaviv Project
  * Copyright (C) 2017 Zodiac Inflight Innovations
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include "etnaviv_gpu.h"
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h
index c1653c6..4a9d508 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.h
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2017 Etnaviv Project
  * Copyright (C) 2017 Zodiac Inflight Innovations
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __ETNAVIV_PERFMON_H__
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 6cf0775..a74eb57 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -1,17 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2017 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/kthread.h>
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.h b/drivers/gpu/drm/etnaviv/etnaviv_sched.h
index 097635f..c0a6796 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.h
@@ -1,17 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) 2017 Etnaviv Project
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __ETNAVIV_SCHED_H__
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index 735ce47..208bc27 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -1,6 +1,6 @@
 config DRM_EXYNOS
 	tristate "DRM Support for Samsung SoC EXYNOS Series"
-	depends on OF && DRM && (ARCH_S3C64XX || ARCH_EXYNOS || ARCH_MULTIPLATFORM)
+	depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM)
 	select DRM_KMS_HELPER
 	select VIDEOMODE_HELPERS
 	select SND_SOC_HDMI_CODEC if SND_SOC
@@ -95,21 +95,31 @@
 	help
 	  Choose this option if you want to use Exynos G2D for DRM.
 
+config DRM_EXYNOS_IPP
+	bool
+
 config DRM_EXYNOS_FIMC
 	bool "FIMC"
-	depends on BROKEN && MFD_SYSCON
+	select DRM_EXYNOS_IPP
 	help
 	  Choose this option if you want to use Exynos FIMC for DRM.
 
 config DRM_EXYNOS_ROTATOR
 	bool "Rotator"
-	depends on BROKEN
+	select DRM_EXYNOS_IPP
 	help
 	  Choose this option if you want to use Exynos Rotator for DRM.
 
+config DRM_EXYNOS_SCALER
+	bool "Scaler"
+	select DRM_EXYNOS_IPP
+	help
+	  Choose this option if you want to use Exynos Scaler for DRM.
+
 config DRM_EXYNOS_GSC
 	bool "GScaler"
-	depends on BROKEN && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n
+	depends on VIDEO_SAMSUNG_EXYNOS_GSC=n
+	select DRM_EXYNOS_IPP
 	help
 	  Choose this option if you want to use Exynos GSC for DRM.
 
diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile
index a51c545..3b323f1 100644
--- a/drivers/gpu/drm/exynos/Makefile
+++ b/drivers/gpu/drm/exynos/Makefile
@@ -18,8 +18,10 @@
 exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI)	+= exynos_hdmi.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI)	+= exynos_drm_vidi.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_G2D)	+= exynos_drm_g2d.o
+exynosdrm-$(CONFIG_DRM_EXYNOS_IPP)	+= exynos_drm_ipp.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC)	+= exynos_drm_fimc.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR)	+= exynos_drm_rotator.o
+exynosdrm-$(CONFIG_DRM_EXYNOS_SCALER)	+= exynos_drm_scaler.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_GSC)	+= exynos_drm_gsc.o
 exynosdrm-$(CONFIG_DRM_EXYNOS_MIC)     += exynos_drm_mic.o
 
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 1c330f2..82c95c3 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -31,7 +31,10 @@
 #define DSD_CFG_MUX 0x1004
 #define DSD_CFG_MUX_TE_UNMASK_GLOBAL BIT(13)
 
-#define WINDOWS_NR	3
+#define WINDOWS_NR	5
+#define PRIMARY_WIN	2
+#define CURSON_WIN	4
+
 #define MIN_FB_WIDTH_FOR_16WORD_BURST	128
 
 #define I80_HW_TRG	(1 << 0)
@@ -43,6 +46,9 @@
 	"aclk_smmu_decon0x",
 	"aclk_xiu_decon0x",
 	"pclk_smmu_decon0x",
+	"aclk_smmu_decon1x",
+	"aclk_xiu_decon1x",
+	"pclk_smmu_decon1x",
 	"sclk_decon_vclk",
 	"sclk_decon_eclk",
 };
@@ -74,9 +80,8 @@
 };
 
 static const enum drm_plane_type decon_win_types[WINDOWS_NR] = {
-	DRM_PLANE_TYPE_PRIMARY,
-	DRM_PLANE_TYPE_OVERLAY,
-	DRM_PLANE_TYPE_CURSOR,
+	[PRIMARY_WIN] = DRM_PLANE_TYPE_PRIMARY,
+	[CURSON_WIN] = DRM_PLANE_TYPE_CURSOR,
 };
 
 static inline void decon_set_bits(struct decon_context *ctx, u32 reg, u32 mask,
@@ -552,12 +557,10 @@
 	drm_dev->max_vblank_count = 0xffffffff;
 
 	for (win = ctx->first_win; win < WINDOWS_NR; win++) {
-		int tmp = (win == ctx->first_win) ? 0 : win;
-
 		ctx->configs[win].pixel_formats = decon_formats;
 		ctx->configs[win].num_pixel_formats = ARRAY_SIZE(decon_formats);
-		ctx->configs[win].zpos = win;
-		ctx->configs[win].type = decon_win_types[tmp];
+		ctx->configs[win].zpos = win - ctx->first_win;
+		ctx->configs[win].type = decon_win_types[win];
 
 		ret = exynos_plane_init(drm_dev, &ctx->planes[win], win,
 					&ctx->configs[win]);
@@ -565,7 +568,7 @@
 			return ret;
 	}
 
-	exynos_plane = &ctx->planes[ctx->first_win];
+	exynos_plane = &ctx->planes[PRIMARY_WIN];
 	out_type = (ctx->out_type & IFTYPE_HDMI) ? EXYNOS_DISPLAY_TYPE_HDMI
 						  : EXYNOS_DISPLAY_TYPE_LCD;
 	ctx->crtc = exynos_drm_crtc_create(drm_dev, &exynos_plane->base,
diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index 964831d..86330f3 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -162,7 +162,7 @@
 	dp->drm_dev = drm_dev;
 
 	dp->plat_data.dev_type = EXYNOS_DP;
-	dp->plat_data.power_on = exynos_dp_poweron;
+	dp->plat_data.power_on_start = exynos_dp_poweron;
 	dp->plat_data.power_off = exynos_dp_poweroff;
 	dp->plat_data.attach = exynos_dp_bridge_attach;
 	dp->plat_data.get_modes = exynos_dp_get_modes;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index dc01342..eea9025 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -228,7 +228,7 @@
 		if (to_exynos_crtc(crtc)->type == out_type)
 			return to_exynos_crtc(crtc);
 
-	return ERR_PTR(-EPERM);
+	return ERR_PTR(-ENODEV);
 }
 
 int exynos_drm_set_possible_crtcs(struct drm_encoder *encoder,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index a518e9c..a81b4a5 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -27,35 +27,23 @@
 #include "exynos_drm_fb.h"
 #include "exynos_drm_gem.h"
 #include "exynos_drm_plane.h"
+#include "exynos_drm_ipp.h"
 #include "exynos_drm_vidi.h"
 #include "exynos_drm_g2d.h"
 #include "exynos_drm_iommu.h"
 
 #define DRIVER_NAME	"exynos"
 #define DRIVER_DESC	"Samsung SoC DRM"
-#define DRIVER_DATE	"20110530"
+#define DRIVER_DATE	"20180330"
+
+/*
+ * Interface history:
+ *
+ * 1.0 - Original version
+ * 1.1 - Upgrade IPP driver to version 2.0
+ */
 #define DRIVER_MAJOR	1
-#define DRIVER_MINOR	0
-
-int exynos_atomic_check(struct drm_device *dev,
-			struct drm_atomic_state *state)
-{
-	int ret;
-
-	ret = drm_atomic_helper_check_modeset(dev, state);
-	if (ret)
-		return ret;
-
-	ret = drm_atomic_normalize_zpos(dev, state);
-	if (ret)
-		return ret;
-
-	ret = drm_atomic_helper_check_planes(dev, state);
-	if (ret)
-		return ret;
-
-	return ret;
-}
+#define DRIVER_MINOR	1
 
 static int exynos_drm_open(struct drm_device *dev, struct drm_file *file)
 {
@@ -108,6 +96,16 @@
 			DRM_AUTH | DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(EXYNOS_G2D_EXEC, exynos_g2d_exec_ioctl,
 			DRM_AUTH | DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_RESOURCES,
+			exynos_drm_ipp_get_res_ioctl,
+			DRM_AUTH | DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_CAPS, exynos_drm_ipp_get_caps_ioctl,
+			DRM_AUTH | DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_LIMITS,
+			exynos_drm_ipp_get_limits_ioctl,
+			DRM_AUTH | DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(EXYNOS_IPP_COMMIT, exynos_drm_ipp_commit_ioctl,
+			DRM_AUTH | DRM_RENDER_ALLOW),
 };
 
 static const struct file_operations exynos_drm_driver_fops = {
@@ -204,6 +202,7 @@
 #define DRM_COMPONENT_DRIVER	BIT(0)	/* supports component framework */
 #define DRM_VIRTUAL_DEVICE	BIT(1)	/* create virtual platform device */
 #define DRM_DMA_DEVICE		BIT(2)	/* can be used for dma allocations */
+#define DRM_FIMC_DEVICE		BIT(3)	/* devices shared with V4L2 subsystem */
 
 #define DRV_PTR(drv, cond) (IS_ENABLED(cond) ? &drv : NULL)
 
@@ -243,10 +242,16 @@
 		DRV_PTR(g2d_driver, CONFIG_DRM_EXYNOS_G2D),
 	}, {
 		DRV_PTR(fimc_driver, CONFIG_DRM_EXYNOS_FIMC),
+		DRM_COMPONENT_DRIVER | DRM_FIMC_DEVICE,
 	}, {
 		DRV_PTR(rotator_driver, CONFIG_DRM_EXYNOS_ROTATOR),
+		DRM_COMPONENT_DRIVER
+	}, {
+		DRV_PTR(scaler_driver, CONFIG_DRM_EXYNOS_SCALER),
+		DRM_COMPONENT_DRIVER
 	}, {
 		DRV_PTR(gsc_driver, CONFIG_DRM_EXYNOS_GSC),
+		DRM_COMPONENT_DRIVER
 	}, {
 		&exynos_drm_platform_driver,
 		DRM_VIRTUAL_DEVICE
@@ -274,7 +279,11 @@
 					    &info->driver->driver,
 					    (void *)platform_bus_type.match))) {
 			put_device(p);
-			component_match_add(dev, &match, compare_dev, d);
+
+			if (!(info->flags & DRM_FIMC_DEVICE) ||
+			    exynos_drm_check_fimc_device(d) == 0)
+				component_match_add(dev, &match,
+						    compare_dev, d);
 			p = d;
 		}
 		put_device(p);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index df2262f..0f6d079 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -273,9 +273,17 @@
 }
 #endif
 
+#ifdef CONFIG_DRM_EXYNOS_FIMC
+int exynos_drm_check_fimc_device(struct device *dev);
+#else
+static inline int exynos_drm_check_fimc_device(struct device *dev)
+{
+	return 0;
+}
+#endif
+
 int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state,
 			 bool nonblock);
-int exynos_atomic_check(struct drm_device *dev, struct drm_atomic_state *state);
 
 
 extern struct platform_driver fimd_driver;
@@ -289,6 +297,7 @@
 extern struct platform_driver g2d_driver;
 extern struct platform_driver fimc_driver;
 extern struct platform_driver rotator_driver;
+extern struct platform_driver scaler_driver;
 extern struct platform_driver gsc_driver;
 extern struct platform_driver mic_driver;
 #endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 7904ffa..7c3030b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -270,7 +270,6 @@
 	u32 lanes;
 	u32 mode_flags;
 	u32 format;
-	struct videomode vm;
 
 	int state;
 	struct drm_property *brightness;
@@ -881,30 +880,30 @@
 
 static void exynos_dsi_set_display_mode(struct exynos_dsi *dsi)
 {
-	struct videomode *vm = &dsi->vm;
+	struct drm_display_mode *m = &dsi->encoder.crtc->state->adjusted_mode;
 	unsigned int num_bits_resol = dsi->driver_data->num_bits_resol;
 	u32 reg;
 
 	if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) {
 		reg = DSIM_CMD_ALLOW(0xf)
-			| DSIM_STABLE_VFP(vm->vfront_porch)
-			| DSIM_MAIN_VBP(vm->vback_porch);
+			| DSIM_STABLE_VFP(m->vsync_start - m->vdisplay)
+			| DSIM_MAIN_VBP(m->vtotal - m->vsync_end);
 		exynos_dsi_write(dsi, DSIM_MVPORCH_REG, reg);
 
-		reg = DSIM_MAIN_HFP(vm->hfront_porch)
-			| DSIM_MAIN_HBP(vm->hback_porch);
+		reg = DSIM_MAIN_HFP(m->hsync_start - m->hdisplay)
+			| DSIM_MAIN_HBP(m->htotal - m->hsync_end);
 		exynos_dsi_write(dsi, DSIM_MHPORCH_REG, reg);
 
-		reg = DSIM_MAIN_VSA(vm->vsync_len)
-			| DSIM_MAIN_HSA(vm->hsync_len);
+		reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start)
+			| DSIM_MAIN_HSA(m->hsync_end - m->hsync_start);
 		exynos_dsi_write(dsi, DSIM_MSYNC_REG, reg);
 	}
-	reg =  DSIM_MAIN_HRESOL(vm->hactive, num_bits_resol) |
-		DSIM_MAIN_VRESOL(vm->vactive, num_bits_resol);
+	reg =  DSIM_MAIN_HRESOL(m->hdisplay, num_bits_resol) |
+		DSIM_MAIN_VRESOL(m->vdisplay, num_bits_resol);
 
 	exynos_dsi_write(dsi, DSIM_MDRESOL_REG, reg);
 
-	dev_dbg(dsi->dev, "LCD size = %dx%d\n", vm->hactive, vm->vactive);
+	dev_dbg(dsi->dev, "LCD size = %dx%d\n", m->hdisplay, m->vdisplay);
 }
 
 static void exynos_dsi_set_display_enable(struct exynos_dsi *dsi, bool enable)
@@ -1265,15 +1264,15 @@
 
 	if (status & DSIM_INT_SW_RST_RELEASE) {
 		u32 mask = ~(DSIM_INT_RX_DONE | DSIM_INT_SFR_FIFO_EMPTY |
-			DSIM_INT_SFR_HDR_FIFO_EMPTY | DSIM_INT_FRAME_DONE |
-			DSIM_INT_RX_ECC_ERR | DSIM_INT_SW_RST_RELEASE);
+			DSIM_INT_SFR_HDR_FIFO_EMPTY | DSIM_INT_RX_ECC_ERR |
+			DSIM_INT_SW_RST_RELEASE);
 		exynos_dsi_write(dsi, DSIM_INTMSK_REG, mask);
 		complete(&dsi->completed);
 		return IRQ_HANDLED;
 	}
 
 	if (!(status & (DSIM_INT_RX_DONE | DSIM_INT_SFR_FIFO_EMPTY |
-			DSIM_INT_FRAME_DONE | DSIM_INT_PLL_STABLE)))
+			DSIM_INT_PLL_STABLE)))
 		return IRQ_HANDLED;
 
 	if (exynos_dsi_transfer_finish(dsi))
@@ -1485,26 +1484,7 @@
 	return 0;
 }
 
-static void exynos_dsi_mode_set(struct drm_encoder *encoder,
-				struct drm_display_mode *mode,
-				struct drm_display_mode *adjusted_mode)
-{
-	struct exynos_dsi *dsi = encoder_to_dsi(encoder);
-	struct videomode *vm = &dsi->vm;
-	struct drm_display_mode *m = adjusted_mode;
-
-	vm->hactive = m->hdisplay;
-	vm->vactive = m->vdisplay;
-	vm->vfront_porch = m->vsync_start - m->vdisplay;
-	vm->vback_porch = m->vtotal - m->vsync_end;
-	vm->vsync_len = m->vsync_end - m->vsync_start;
-	vm->hfront_porch = m->hsync_start - m->hdisplay;
-	vm->hback_porch = m->htotal - m->hsync_end;
-	vm->hsync_len = m->hsync_end - m->hsync_start;
-}
-
 static const struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = {
-	.mode_set = exynos_dsi_mode_set,
 	.enable = exynos_dsi_enable,
 	.disable = exynos_dsi_disable,
 };
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index f0e7917..7fcc1a7 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -161,7 +161,7 @@
 static const struct drm_mode_config_funcs exynos_drm_mode_config_funcs = {
 	.fb_create = exynos_user_fb_create,
 	.output_poll_changed = drm_fb_helper_output_poll_changed,
-	.atomic_check = exynos_atomic_check,
+	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
 
@@ -182,4 +182,6 @@
 	dev->mode_config.helper_private = &exynos_drm_mode_config_helpers;
 
 	dev->mode_config.allow_fb_modifiers = true;
+
+	dev->mode_config.normalize_zpos = true;
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 5b18b5c..5ce8402 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -12,6 +12,7 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/component.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
@@ -24,8 +25,8 @@
 #include <drm/exynos_drm.h>
 #include "regs-fimc.h"
 #include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
 #include "exynos_drm_ipp.h"
-#include "exynos_drm_fimc.h"
 
 /*
  * FIMC stands for Fully Interactive Mobile Camera and
@@ -33,23 +34,6 @@
  * input DMA reads image data from the memory.
  * output DMA writes image data to memory.
  * FIMC supports image rotation and image effect functions.
- *
- * M2M operation : supports crop/scale/rotation/csc so on.
- * Memory ----> FIMC H/W ----> Memory.
- * Writeback operation : supports cloned screen with FIMD.
- * FIMD ----> FIMC H/W ----> Memory.
- * Output operation : supports direct display using local path.
- * Memory ----> FIMC H/W ----> FIMD.
- */
-
-/*
- * TODO
- * 1. check suspend/resume api if needed.
- * 2. need to check use case platform_device_id.
- * 3. check src/dst size with, height.
- * 4. added check_prepare api for right register.
- * 5. need to add supported list in prop_list.
- * 6. check prescaler/scaler optimization.
  */
 
 #define FIMC_MAX_DEVS	4
@@ -59,29 +43,19 @@
 #define FIMC_BUF_STOP	1
 #define FIMC_BUF_START	2
 #define FIMC_WIDTH_ITU_709	1280
-#define FIMC_REFRESH_MAX	60
-#define FIMC_REFRESH_MIN	12
-#define FIMC_CROP_MAX	8192
-#define FIMC_CROP_MIN	32
-#define FIMC_SCALE_MAX	4224
-#define FIMC_SCALE_MIN	32
+#define FIMC_AUTOSUSPEND_DELAY	2000
+
+static unsigned int fimc_mask = 0xc;
+module_param_named(fimc_devs, fimc_mask, uint, 0644);
+MODULE_PARM_DESC(fimc_devs, "Alias mask for assigning FIMC devices to Exynos DRM");
 
 #define get_fimc_context(dev)	platform_get_drvdata(to_platform_device(dev))
-#define get_ctx_from_ippdrv(ippdrv)	container_of(ippdrv,\
-					struct fimc_context, ippdrv);
-enum fimc_wb {
-	FIMC_WB_NONE,
-	FIMC_WB_A,
-	FIMC_WB_B,
-};
 
 enum {
 	FIMC_CLK_LCLK,
 	FIMC_CLK_GATE,
 	FIMC_CLK_WB_A,
 	FIMC_CLK_WB_B,
-	FIMC_CLK_MUX,
-	FIMC_CLK_PARENT,
 	FIMC_CLKS_MAX
 };
 
@@ -90,12 +64,8 @@
 	[FIMC_CLK_GATE]   = "fimc",
 	[FIMC_CLK_WB_A]   = "pxl_async0",
 	[FIMC_CLK_WB_B]   = "pxl_async1",
-	[FIMC_CLK_MUX]    = "mux",
-	[FIMC_CLK_PARENT] = "parent",
 };
 
-#define FIMC_DEFAULT_LCLK_FREQUENCY 133000000UL
-
 /*
  * A structure of scaler.
  *
@@ -107,7 +77,7 @@
  * @vratio: vertical ratio.
  */
 struct fimc_scaler {
-	bool	range;
+	bool range;
 	bool bypass;
 	bool up_h;
 	bool up_v;
@@ -116,56 +86,32 @@
 };
 
 /*
- * A structure of scaler capability.
- *
- * find user manual table 43-1.
- * @in_hori: scaler input horizontal size.
- * @bypass: scaler bypass mode.
- * @dst_h_wo_rot: target horizontal size without output rotation.
- * @dst_h_rot: target horizontal size with output rotation.
- * @rl_w_wo_rot: real width without input rotation.
- * @rl_h_rot: real height without output rotation.
- */
-struct fimc_capability {
-	/* scaler */
-	u32	in_hori;
-	u32	bypass;
-	/* output rotator */
-	u32	dst_h_wo_rot;
-	u32	dst_h_rot;
-	/* input rotator */
-	u32	rl_w_wo_rot;
-	u32	rl_h_rot;
-};
-
-/*
  * A structure of fimc context.
  *
- * @ippdrv: prepare initialization using ippdrv.
  * @regs_res: register resources.
  * @regs: memory mapped io registers.
  * @lock: locking of operations.
  * @clocks: fimc clocks.
- * @clk_frequency: LCLK clock frequency.
- * @sysreg: handle to SYSREG block regmap.
  * @sc: scaler infomations.
  * @pol: porarity of writeback.
  * @id: fimc id.
  * @irq: irq number.
- * @suspended: qos operations.
  */
 struct fimc_context {
-	struct exynos_drm_ippdrv	ippdrv;
+	struct exynos_drm_ipp ipp;
+	struct drm_device *drm_dev;
+	struct device	*dev;
+	struct exynos_drm_ipp_task	*task;
+	struct exynos_drm_ipp_formats	*formats;
+	unsigned int			num_formats;
+
 	struct resource	*regs_res;
 	void __iomem	*regs;
 	spinlock_t	lock;
 	struct clk	*clocks[FIMC_CLKS_MAX];
-	u32		clk_frequency;
-	struct regmap	*sysreg;
 	struct fimc_scaler	sc;
 	int	id;
 	int	irq;
-	bool	suspended;
 };
 
 static u32 fimc_read(struct fimc_context *ctx, u32 reg)
@@ -217,19 +163,10 @@
 	fimc_write(ctx, 0x0, EXYNOS_CIFCNTSEQ);
 }
 
-static int fimc_set_camblk_fimd0_wb(struct fimc_context *ctx)
-{
-	return regmap_update_bits(ctx->sysreg, SYSREG_CAMERA_BLK,
-				  SYSREG_FIMD0WB_DEST_MASK,
-				  ctx->id << SYSREG_FIMD0WB_DEST_SHIFT);
-}
-
-static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb)
+static void fimc_set_type_ctrl(struct fimc_context *ctx)
 {
 	u32 cfg;
 
-	DRM_DEBUG_KMS("wb[%d]\n", wb);
-
 	cfg = fimc_read(ctx, EXYNOS_CIGCTRL);
 	cfg &= ~(EXYNOS_CIGCTRL_TESTPATTERN_MASK |
 		EXYNOS_CIGCTRL_SELCAM_ITU_MASK |
@@ -238,23 +175,10 @@
 		EXYNOS_CIGCTRL_SELWB_CAMIF_MASK |
 		EXYNOS_CIGCTRL_SELWRITEBACK_MASK);
 
-	switch (wb) {
-	case FIMC_WB_A:
-		cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_A |
-			EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK);
-		break;
-	case FIMC_WB_B:
-		cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_B |
-			EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK);
-		break;
-	case FIMC_WB_NONE:
-	default:
-		cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A |
-			EXYNOS_CIGCTRL_SELWRITEBACK_A |
-			EXYNOS_CIGCTRL_SELCAM_MIPI_A |
-			EXYNOS_CIGCTRL_SELCAM_FIMC_ITU);
-		break;
-	}
+	cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A |
+		EXYNOS_CIGCTRL_SELWRITEBACK_A |
+		EXYNOS_CIGCTRL_SELCAM_MIPI_A |
+		EXYNOS_CIGCTRL_SELCAM_FIMC_ITU);
 
 	fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
 }
@@ -296,7 +220,6 @@
 
 static bool fimc_check_ovf(struct fimc_context *ctx)
 {
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 	u32 status, flag;
 
 	status = fimc_read(ctx, EXYNOS_CISTATUS);
@@ -310,7 +233,7 @@
 			EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB |
 			EXYNOS_CIWDOFST_CLROVFICR);
 
-		dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n",
+		dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n",
 			ctx->id, status);
 		return true;
 	}
@@ -376,10 +299,8 @@
 	fimc_write(ctx, cfg, EXYNOS_CIOCTRL);
 }
 
-
-static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt)
+static void fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt)
 {
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 	u32 cfg;
 
 	DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -392,12 +313,12 @@
 	case DRM_FORMAT_RGB565:
 		cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB565;
 		fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
-		return 0;
+		return;
 	case DRM_FORMAT_RGB888:
 	case DRM_FORMAT_XRGB8888:
 		cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB888;
 		fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
-		return 0;
+		return;
 	default:
 		/* bypass */
 		break;
@@ -438,20 +359,13 @@
 		cfg |= (EXYNOS_MSCTRL_ORDER2P_LSB_CBCR |
 			EXYNOS_MSCTRL_C_INT_IN_2PLANE);
 		break;
-	default:
-		dev_err(ippdrv->dev, "invalid source yuv order 0x%x.\n", fmt);
-		return -EINVAL;
 	}
 
 	fimc_write(ctx, cfg, EXYNOS_MSCTRL);
-
-	return 0;
 }
 
-static int fimc_src_set_fmt(struct device *dev, u32 fmt)
+static void fimc_src_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled)
 {
-	struct fimc_context *ctx = get_fimc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 	u32 cfg;
 
 	DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -485,9 +399,6 @@
 	case DRM_FORMAT_NV21:
 		cfg |= EXYNOS_MSCTRL_INFORMAT_YCBCR420;
 		break;
-	default:
-		dev_err(ippdrv->dev, "invalid source format 0x%x.\n", fmt);
-		return -EINVAL;
 	}
 
 	fimc_write(ctx, cfg, EXYNOS_MSCTRL);
@@ -495,22 +406,22 @@
 	cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM);
 	cfg &= ~EXYNOS_CIDMAPARAM_R_MODE_MASK;
 
-	cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR;
+	if (tiled)
+		cfg |= EXYNOS_CIDMAPARAM_R_MODE_64X32;
+	else
+		cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR;
 
 	fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM);
 
-	return fimc_src_set_fmt_order(ctx, fmt);
+	fimc_src_set_fmt_order(ctx, fmt);
 }
 
-static int fimc_src_set_transf(struct device *dev,
-		enum drm_exynos_degree degree,
-		enum drm_exynos_flip flip, bool *swap)
+static void fimc_src_set_transf(struct fimc_context *ctx, unsigned int rotation)
 {
-	struct fimc_context *ctx = get_fimc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	unsigned int degree = rotation & DRM_MODE_ROTATE_MASK;
 	u32 cfg1, cfg2;
 
-	DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
+	DRM_DEBUG_KMS("rotation[%x]\n", rotation);
 
 	cfg1 = fimc_read(ctx, EXYNOS_MSCTRL);
 	cfg1 &= ~(EXYNOS_MSCTRL_FLIP_X_MIRROR |
@@ -520,61 +431,56 @@
 	cfg2 &= ~EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
 
 	switch (degree) {
-	case EXYNOS_DRM_DEGREE_0:
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+	case DRM_MODE_ROTATE_0:
+		if (rotation & DRM_MODE_REFLECT_X)
 			cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR;
 		break;
-	case EXYNOS_DRM_DEGREE_90:
+	case DRM_MODE_ROTATE_90:
 		cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+		if (rotation & DRM_MODE_REFLECT_X)
 			cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR;
 		break;
-	case EXYNOS_DRM_DEGREE_180:
+	case DRM_MODE_ROTATE_180:
 		cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR |
 			EXYNOS_MSCTRL_FLIP_Y_MIRROR);
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+		if (rotation & DRM_MODE_REFLECT_X)
 			cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR;
 		break;
-	case EXYNOS_DRM_DEGREE_270:
+	case DRM_MODE_ROTATE_270:
 		cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR |
 			EXYNOS_MSCTRL_FLIP_Y_MIRROR);
 		cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+		if (rotation & DRM_MODE_REFLECT_X)
 			cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR;
 		break;
-	default:
-		dev_err(ippdrv->dev, "invalid degree value %d.\n", degree);
-		return -EINVAL;
 	}
 
 	fimc_write(ctx, cfg1, EXYNOS_MSCTRL);
 	fimc_write(ctx, cfg2, EXYNOS_CITRGFMT);
-	*swap = (cfg2 & EXYNOS_CITRGFMT_INROT90_CLOCKWISE) ? 1 : 0;
-
-	return 0;
 }
 
-static int fimc_set_window(struct fimc_context *ctx,
-		struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void fimc_set_window(struct fimc_context *ctx,
+			    struct exynos_drm_ipp_buffer *buf)
 {
 	u32 cfg, h1, h2, v1, v2;
 
 	/* cropped image */
-	h1 = pos->x;
-	h2 = sz->hsize - pos->w - pos->x;
-	v1 = pos->y;
-	v2 = sz->vsize - pos->h - pos->y;
+	h1 = buf->rect.x;
+	h2 = buf->buf.width - buf->rect.w - buf->rect.x;
+	v1 = buf->rect.y;
+	v2 = buf->buf.height - buf->rect.h - buf->rect.y;
 
 	DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]hsize[%d]vsize[%d]\n",
-		pos->x, pos->y, pos->w, pos->h, sz->hsize, sz->vsize);
+		buf->rect.x, buf->rect.y, buf->rect.w, buf->rect.h,
+		buf->buf.width, buf->buf.height);
 	DRM_DEBUG_KMS("h1[%d]h2[%d]v1[%d]v2[%d]\n", h1, h2, v1, v2);
 
 	/*
@@ -592,42 +498,30 @@
 	cfg = (EXYNOS_CIWDOFST2_WINHOROFST2(h2) |
 		EXYNOS_CIWDOFST2_WINVEROFST2(v2));
 	fimc_write(ctx, cfg, EXYNOS_CIWDOFST2);
-
-	return 0;
 }
 
-static int fimc_src_set_size(struct device *dev, int swap,
-		struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void fimc_src_set_size(struct fimc_context *ctx,
+			      struct exynos_drm_ipp_buffer *buf)
 {
-	struct fimc_context *ctx = get_fimc_context(dev);
-	struct drm_exynos_pos img_pos = *pos;
-	struct drm_exynos_sz img_sz = *sz;
 	u32 cfg;
 
-	DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n",
-		swap, sz->hsize, sz->vsize);
+	DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height);
 
 	/* original size */
-	cfg = (EXYNOS_ORGISIZE_HORIZONTAL(img_sz.hsize) |
-		EXYNOS_ORGISIZE_VERTICAL(img_sz.vsize));
+	cfg = (EXYNOS_ORGISIZE_HORIZONTAL(buf->buf.width) |
+		EXYNOS_ORGISIZE_VERTICAL(buf->buf.height));
 
 	fimc_write(ctx, cfg, EXYNOS_ORGISIZE);
 
-	DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h);
-
-	if (swap) {
-		img_pos.w = pos->h;
-		img_pos.h = pos->w;
-		img_sz.hsize = sz->vsize;
-		img_sz.vsize = sz->hsize;
-	}
+	DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y,
+		buf->rect.w, buf->rect.h);
 
 	/* set input DMA image size */
 	cfg = fimc_read(ctx, EXYNOS_CIREAL_ISIZE);
 	cfg &= ~(EXYNOS_CIREAL_ISIZE_HEIGHT_MASK |
 		EXYNOS_CIREAL_ISIZE_WIDTH_MASK);
-	cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(img_pos.w) |
-		EXYNOS_CIREAL_ISIZE_HEIGHT(img_pos.h));
+	cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(buf->rect.w) |
+		EXYNOS_CIREAL_ISIZE_HEIGHT(buf->rect.h));
 	fimc_write(ctx, cfg, EXYNOS_CIREAL_ISIZE);
 
 	/*
@@ -635,91 +529,34 @@
 	 * for now, we support only ITU601 8 bit mode
 	 */
 	cfg = (EXYNOS_CISRCFMT_ITU601_8BIT |
-		EXYNOS_CISRCFMT_SOURCEHSIZE(img_sz.hsize) |
-		EXYNOS_CISRCFMT_SOURCEVSIZE(img_sz.vsize));
+		EXYNOS_CISRCFMT_SOURCEHSIZE(buf->buf.width) |
+		EXYNOS_CISRCFMT_SOURCEVSIZE(buf->buf.height));
 	fimc_write(ctx, cfg, EXYNOS_CISRCFMT);
 
 	/* offset Y(RGB), Cb, Cr */
-	cfg = (EXYNOS_CIIYOFF_HORIZONTAL(img_pos.x) |
-		EXYNOS_CIIYOFF_VERTICAL(img_pos.y));
+	cfg = (EXYNOS_CIIYOFF_HORIZONTAL(buf->rect.x) |
+		EXYNOS_CIIYOFF_VERTICAL(buf->rect.y));
 	fimc_write(ctx, cfg, EXYNOS_CIIYOFF);
-	cfg = (EXYNOS_CIICBOFF_HORIZONTAL(img_pos.x) |
-		EXYNOS_CIICBOFF_VERTICAL(img_pos.y));
+	cfg = (EXYNOS_CIICBOFF_HORIZONTAL(buf->rect.x) |
+		EXYNOS_CIICBOFF_VERTICAL(buf->rect.y));
 	fimc_write(ctx, cfg, EXYNOS_CIICBOFF);
-	cfg = (EXYNOS_CIICROFF_HORIZONTAL(img_pos.x) |
-		EXYNOS_CIICROFF_VERTICAL(img_pos.y));
+	cfg = (EXYNOS_CIICROFF_HORIZONTAL(buf->rect.x) |
+		EXYNOS_CIICROFF_VERTICAL(buf->rect.y));
 	fimc_write(ctx, cfg, EXYNOS_CIICROFF);
 
-	return fimc_set_window(ctx, &img_pos, &img_sz);
+	fimc_set_window(ctx, buf);
 }
 
-static int fimc_src_set_addr(struct device *dev,
-		struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
-		enum drm_exynos_ipp_buf_type buf_type)
+static void fimc_src_set_addr(struct fimc_context *ctx,
+			      struct exynos_drm_ipp_buffer *buf)
 {
-	struct fimc_context *ctx = get_fimc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
-	struct drm_exynos_ipp_property *property;
-	struct drm_exynos_ipp_config *config;
-
-	if (!c_node) {
-		DRM_ERROR("failed to get c_node.\n");
-		return -EINVAL;
-	}
-
-	property = &c_node->property;
-
-	DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n",
-		property->prop_id, buf_id, buf_type);
-
-	if (buf_id > FIMC_MAX_SRC) {
-		dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id);
-		return -ENOMEM;
-	}
-
-	/* address register set */
-	switch (buf_type) {
-	case IPP_BUF_ENQUEUE:
-		config = &property->config[EXYNOS_DRM_OPS_SRC];
-		fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y],
-			EXYNOS_CIIYSA0);
-
-		if (config->fmt == DRM_FORMAT_YVU420) {
-			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
-				EXYNOS_CIICBSA0);
-			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
-				EXYNOS_CIICRSA0);
-		} else {
-			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
-				EXYNOS_CIICBSA0);
-			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
-				EXYNOS_CIICRSA0);
-		}
-		break;
-	case IPP_BUF_DEQUEUE:
-		fimc_write(ctx, 0x0, EXYNOS_CIIYSA0);
-		fimc_write(ctx, 0x0, EXYNOS_CIICBSA0);
-		fimc_write(ctx, 0x0, EXYNOS_CIICRSA0);
-		break;
-	default:
-		/* bypass */
-		break;
-	}
-
-	return 0;
+	fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIIYSA(0));
+	fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIICBSA(0));
+	fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIICRSA(0));
 }
 
-static struct exynos_drm_ipp_ops fimc_src_ops = {
-	.set_fmt = fimc_src_set_fmt,
-	.set_transf = fimc_src_set_transf,
-	.set_size = fimc_src_set_size,
-	.set_addr = fimc_src_set_addr,
-};
-
-static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt)
+static void fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt)
 {
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 	u32 cfg;
 
 	DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -732,11 +569,11 @@
 	case DRM_FORMAT_RGB565:
 		cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB565;
 		fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
-		return 0;
+		return;
 	case DRM_FORMAT_RGB888:
 		cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888;
 		fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
-		return 0;
+		return;
 	case DRM_FORMAT_XRGB8888:
 		cfg |= (EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888 |
 			EXYNOS_CISCCTRL_EXTRGB_EXTENSION);
@@ -784,20 +621,13 @@
 		cfg |= EXYNOS_CIOCTRL_ORDER2P_LSB_CBCR;
 		cfg |= EXYNOS_CIOCTRL_YCBCR_2PLANE;
 		break;
-	default:
-		dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt);
-		return -EINVAL;
 	}
 
 	fimc_write(ctx, cfg, EXYNOS_CIOCTRL);
-
-	return 0;
 }
 
-static int fimc_dst_set_fmt(struct device *dev, u32 fmt)
+static void fimc_dst_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled)
 {
-	struct fimc_context *ctx = get_fimc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 	u32 cfg;
 
 	DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -837,10 +667,6 @@
 		case DRM_FORMAT_NV21:
 			cfg |= EXYNOS_CITRGFMT_OUTFORMAT_YCBCR420;
 			break;
-		default:
-			dev_err(ippdrv->dev, "invalid target format 0x%x.\n",
-				fmt);
-			return -EINVAL;
 		}
 
 		fimc_write(ctx, cfg, EXYNOS_CITRGFMT);
@@ -849,73 +675,67 @@
 	cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM);
 	cfg &= ~EXYNOS_CIDMAPARAM_W_MODE_MASK;
 
-	cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR;
+	if (tiled)
+		cfg |= EXYNOS_CIDMAPARAM_W_MODE_64X32;
+	else
+		cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR;
 
 	fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM);
 
-	return fimc_dst_set_fmt_order(ctx, fmt);
+	fimc_dst_set_fmt_order(ctx, fmt);
 }
 
-static int fimc_dst_set_transf(struct device *dev,
-		enum drm_exynos_degree degree,
-		enum drm_exynos_flip flip, bool *swap)
+static void fimc_dst_set_transf(struct fimc_context *ctx, unsigned int rotation)
 {
-	struct fimc_context *ctx = get_fimc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	unsigned int degree = rotation & DRM_MODE_ROTATE_MASK;
 	u32 cfg;
 
-	DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
+	DRM_DEBUG_KMS("rotation[0x%x]\n", rotation);
 
 	cfg = fimc_read(ctx, EXYNOS_CITRGFMT);
 	cfg &= ~EXYNOS_CITRGFMT_FLIP_MASK;
 	cfg &= ~EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE;
 
 	switch (degree) {
-	case EXYNOS_DRM_DEGREE_0:
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+	case DRM_MODE_ROTATE_0:
+		if (rotation & DRM_MODE_REFLECT_X)
 			cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
 		break;
-	case EXYNOS_DRM_DEGREE_90:
+	case DRM_MODE_ROTATE_90:
 		cfg |= EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE;
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+		if (rotation & DRM_MODE_REFLECT_X)
 			cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
 		break;
-	case EXYNOS_DRM_DEGREE_180:
+	case DRM_MODE_ROTATE_180:
 		cfg |= (EXYNOS_CITRGFMT_FLIP_X_MIRROR |
 			EXYNOS_CITRGFMT_FLIP_Y_MIRROR);
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+		if (rotation & DRM_MODE_REFLECT_X)
 			cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
 		break;
-	case EXYNOS_DRM_DEGREE_270:
+	case DRM_MODE_ROTATE_270:
 		cfg |= (EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE |
 			EXYNOS_CITRGFMT_FLIP_X_MIRROR |
 			EXYNOS_CITRGFMT_FLIP_Y_MIRROR);
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+		if (rotation & DRM_MODE_REFLECT_X)
 			cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
 		break;
-	default:
-		dev_err(ippdrv->dev, "invalid degree value %d.\n", degree);
-		return -EINVAL;
 	}
 
 	fimc_write(ctx, cfg, EXYNOS_CITRGFMT);
-	*swap = (cfg & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) ? 1 : 0;
-
-	return 0;
 }
 
 static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc,
-		struct drm_exynos_pos *src, struct drm_exynos_pos *dst)
+			      struct drm_exynos_ipp_task_rect *src,
+			      struct drm_exynos_ipp_task_rect *dst)
 {
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 	u32 cfg, cfg_ext, shfactor;
 	u32 pre_dst_width, pre_dst_height;
 	u32 hfactor, vfactor;
@@ -942,13 +762,13 @@
 	/* fimc_ippdrv_check_property assures that dividers are not null */
 	hfactor = fls(src_w / dst_w / 2);
 	if (hfactor > FIMC_SHFACTOR / 2) {
-		dev_err(ippdrv->dev, "failed to get ratio horizontal.\n");
+		dev_err(ctx->dev, "failed to get ratio horizontal.\n");
 		return -EINVAL;
 	}
 
 	vfactor = fls(src_h / dst_h / 2);
 	if (vfactor > FIMC_SHFACTOR / 2) {
-		dev_err(ippdrv->dev, "failed to get ratio vertical.\n");
+		dev_err(ctx->dev, "failed to get ratio vertical.\n");
 		return -EINVAL;
 	}
 
@@ -1019,83 +839,77 @@
 	fimc_write(ctx, cfg_ext, EXYNOS_CIEXTEN);
 }
 
-static int fimc_dst_set_size(struct device *dev, int swap,
-		struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void fimc_dst_set_size(struct fimc_context *ctx,
+			     struct exynos_drm_ipp_buffer *buf)
 {
-	struct fimc_context *ctx = get_fimc_context(dev);
-	struct drm_exynos_pos img_pos = *pos;
-	struct drm_exynos_sz img_sz = *sz;
-	u32 cfg;
+	u32 cfg, cfg_ext;
 
-	DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n",
-		swap, sz->hsize, sz->vsize);
+	DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height);
 
 	/* original size */
-	cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(img_sz.hsize) |
-		EXYNOS_ORGOSIZE_VERTICAL(img_sz.vsize));
+	cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(buf->buf.width) |
+		EXYNOS_ORGOSIZE_VERTICAL(buf->buf.height));
 
 	fimc_write(ctx, cfg, EXYNOS_ORGOSIZE);
 
-	DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h);
+	DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y,
+		buf->rect.w, buf->rect.h);
 
 	/* CSC ITU */
 	cfg = fimc_read(ctx, EXYNOS_CIGCTRL);
 	cfg &= ~EXYNOS_CIGCTRL_CSC_MASK;
 
-	if (sz->hsize >= FIMC_WIDTH_ITU_709)
+	if (buf->buf.width >= FIMC_WIDTH_ITU_709)
 		cfg |= EXYNOS_CIGCTRL_CSC_ITU709;
 	else
 		cfg |= EXYNOS_CIGCTRL_CSC_ITU601;
 
 	fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
 
-	if (swap) {
-		img_pos.w = pos->h;
-		img_pos.h = pos->w;
-		img_sz.hsize = sz->vsize;
-		img_sz.vsize = sz->hsize;
-	}
+	cfg_ext = fimc_read(ctx, EXYNOS_CITRGFMT);
 
 	/* target image size */
 	cfg = fimc_read(ctx, EXYNOS_CITRGFMT);
 	cfg &= ~(EXYNOS_CITRGFMT_TARGETH_MASK |
 		EXYNOS_CITRGFMT_TARGETV_MASK);
-	cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(img_pos.w) |
-		EXYNOS_CITRGFMT_TARGETVSIZE(img_pos.h));
+	if (cfg_ext & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE)
+		cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.h) |
+			EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.w));
+	else
+		cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.w) |
+			EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.h));
 	fimc_write(ctx, cfg, EXYNOS_CITRGFMT);
 
 	/* target area */
-	cfg = EXYNOS_CITAREA_TARGET_AREA(img_pos.w * img_pos.h);
+	cfg = EXYNOS_CITAREA_TARGET_AREA(buf->rect.w * buf->rect.h);
 	fimc_write(ctx, cfg, EXYNOS_CITAREA);
 
 	/* offset Y(RGB), Cb, Cr */
-	cfg = (EXYNOS_CIOYOFF_HORIZONTAL(img_pos.x) |
-		EXYNOS_CIOYOFF_VERTICAL(img_pos.y));
+	cfg = (EXYNOS_CIOYOFF_HORIZONTAL(buf->rect.x) |
+		EXYNOS_CIOYOFF_VERTICAL(buf->rect.y));
 	fimc_write(ctx, cfg, EXYNOS_CIOYOFF);
-	cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(img_pos.x) |
-		EXYNOS_CIOCBOFF_VERTICAL(img_pos.y));
+	cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(buf->rect.x) |
+		EXYNOS_CIOCBOFF_VERTICAL(buf->rect.y));
 	fimc_write(ctx, cfg, EXYNOS_CIOCBOFF);
-	cfg = (EXYNOS_CIOCROFF_HORIZONTAL(img_pos.x) |
-		EXYNOS_CIOCROFF_VERTICAL(img_pos.y));
+	cfg = (EXYNOS_CIOCROFF_HORIZONTAL(buf->rect.x) |
+		EXYNOS_CIOCROFF_VERTICAL(buf->rect.y));
 	fimc_write(ctx, cfg, EXYNOS_CIOCROFF);
-
-	return 0;
 }
 
 static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id,
-		enum drm_exynos_ipp_buf_type buf_type)
+		bool enqueue)
 {
 	unsigned long flags;
 	u32 buf_num;
 	u32 cfg;
 
-	DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type);
+	DRM_DEBUG_KMS("buf_id[%d]enqueu[%d]\n", buf_id, enqueue);
 
 	spin_lock_irqsave(&ctx->lock, flags);
 
 	cfg = fimc_read(ctx, EXYNOS_CIFCNTSEQ);
 
-	if (buf_type == IPP_BUF_ENQUEUE)
+	if (enqueue)
 		cfg |= (1 << buf_id);
 	else
 		cfg &= ~(1 << buf_id);
@@ -1104,88 +918,29 @@
 
 	buf_num = hweight32(cfg);
 
-	if (buf_type == IPP_BUF_ENQUEUE && buf_num >= FIMC_BUF_START)
+	if (enqueue && buf_num >= FIMC_BUF_START)
 		fimc_mask_irq(ctx, true);
-	else if (buf_type == IPP_BUF_DEQUEUE && buf_num <= FIMC_BUF_STOP)
+	else if (!enqueue && buf_num <= FIMC_BUF_STOP)
 		fimc_mask_irq(ctx, false);
 
 	spin_unlock_irqrestore(&ctx->lock, flags);
 }
 
-static int fimc_dst_set_addr(struct device *dev,
-		struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
-		enum drm_exynos_ipp_buf_type buf_type)
+static void fimc_dst_set_addr(struct fimc_context *ctx,
+			     struct exynos_drm_ipp_buffer *buf)
 {
-	struct fimc_context *ctx = get_fimc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
-	struct drm_exynos_ipp_property *property;
-	struct drm_exynos_ipp_config *config;
+	fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIOYSA(0));
+	fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIOCBSA(0));
+	fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIOCRSA(0));
 
-	if (!c_node) {
-		DRM_ERROR("failed to get c_node.\n");
-		return -EINVAL;
-	}
-
-	property = &c_node->property;
-
-	DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n",
-		property->prop_id, buf_id, buf_type);
-
-	if (buf_id > FIMC_MAX_DST) {
-		dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id);
-		return -ENOMEM;
-	}
-
-	/* address register set */
-	switch (buf_type) {
-	case IPP_BUF_ENQUEUE:
-		config = &property->config[EXYNOS_DRM_OPS_DST];
-
-		fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y],
-			EXYNOS_CIOYSA(buf_id));
-
-		if (config->fmt == DRM_FORMAT_YVU420) {
-			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
-				EXYNOS_CIOCBSA(buf_id));
-			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
-				EXYNOS_CIOCRSA(buf_id));
-		} else {
-			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
-				EXYNOS_CIOCBSA(buf_id));
-			fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
-				EXYNOS_CIOCRSA(buf_id));
-		}
-		break;
-	case IPP_BUF_DEQUEUE:
-		fimc_write(ctx, 0x0, EXYNOS_CIOYSA(buf_id));
-		fimc_write(ctx, 0x0, EXYNOS_CIOCBSA(buf_id));
-		fimc_write(ctx, 0x0, EXYNOS_CIOCRSA(buf_id));
-		break;
-	default:
-		/* bypass */
-		break;
-	}
-
-	fimc_dst_set_buf_seq(ctx, buf_id, buf_type);
-
-	return 0;
+	fimc_dst_set_buf_seq(ctx, 0, true);
 }
 
-static struct exynos_drm_ipp_ops fimc_dst_ops = {
-	.set_fmt = fimc_dst_set_fmt,
-	.set_transf = fimc_dst_set_transf,
-	.set_size = fimc_dst_set_size,
-	.set_addr = fimc_dst_set_addr,
-};
+static void fimc_stop(struct fimc_context *ctx);
 
 static irqreturn_t fimc_irq_handler(int irq, void *dev_id)
 {
 	struct fimc_context *ctx = dev_id;
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
-	struct drm_exynos_ipp_event_work *event_work =
-		c_node->event_work;
 	int buf_id;
 
 	DRM_DEBUG_KMS("fimc id[%d]\n", ctx->id);
@@ -1203,172 +958,21 @@
 
 	DRM_DEBUG_KMS("buf_id[%d]\n", buf_id);
 
-	fimc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE);
+	if (ctx->task) {
+		struct exynos_drm_ipp_task *task = ctx->task;
 
-	event_work->ippdrv = ippdrv;
-	event_work->buf_id[EXYNOS_DRM_OPS_DST] = buf_id;
-	queue_work(ippdrv->event_workq, &event_work->work);
+		ctx->task = NULL;
+		pm_runtime_mark_last_busy(ctx->dev);
+		pm_runtime_put_autosuspend(ctx->dev);
+		exynos_drm_ipp_task_done(task, 0);
+	}
+
+	fimc_dst_set_buf_seq(ctx, buf_id, false);
+	fimc_stop(ctx);
 
 	return IRQ_HANDLED;
 }
 
-static int fimc_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
-{
-	struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list;
-
-	prop_list->version = 1;
-	prop_list->writeback = 1;
-	prop_list->refresh_min = FIMC_REFRESH_MIN;
-	prop_list->refresh_max = FIMC_REFRESH_MAX;
-	prop_list->flip = (1 << EXYNOS_DRM_FLIP_NONE) |
-				(1 << EXYNOS_DRM_FLIP_VERTICAL) |
-				(1 << EXYNOS_DRM_FLIP_HORIZONTAL);
-	prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) |
-				(1 << EXYNOS_DRM_DEGREE_90) |
-				(1 << EXYNOS_DRM_DEGREE_180) |
-				(1 << EXYNOS_DRM_DEGREE_270);
-	prop_list->csc = 1;
-	prop_list->crop = 1;
-	prop_list->crop_max.hsize = FIMC_CROP_MAX;
-	prop_list->crop_max.vsize = FIMC_CROP_MAX;
-	prop_list->crop_min.hsize = FIMC_CROP_MIN;
-	prop_list->crop_min.vsize = FIMC_CROP_MIN;
-	prop_list->scale = 1;
-	prop_list->scale_max.hsize = FIMC_SCALE_MAX;
-	prop_list->scale_max.vsize = FIMC_SCALE_MAX;
-	prop_list->scale_min.hsize = FIMC_SCALE_MIN;
-	prop_list->scale_min.vsize = FIMC_SCALE_MIN;
-
-	return 0;
-}
-
-static inline bool fimc_check_drm_flip(enum drm_exynos_flip flip)
-{
-	switch (flip) {
-	case EXYNOS_DRM_FLIP_NONE:
-	case EXYNOS_DRM_FLIP_VERTICAL:
-	case EXYNOS_DRM_FLIP_HORIZONTAL:
-	case EXYNOS_DRM_FLIP_BOTH:
-		return true;
-	default:
-		DRM_DEBUG_KMS("invalid flip\n");
-		return false;
-	}
-}
-
-static int fimc_ippdrv_check_property(struct device *dev,
-		struct drm_exynos_ipp_property *property)
-{
-	struct fimc_context *ctx = get_fimc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list;
-	struct drm_exynos_ipp_config *config;
-	struct drm_exynos_pos *pos;
-	struct drm_exynos_sz *sz;
-	bool swap;
-	int i;
-
-	for_each_ipp_ops(i) {
-		if ((i == EXYNOS_DRM_OPS_SRC) &&
-			(property->cmd == IPP_CMD_WB))
-			continue;
-
-		config = &property->config[i];
-		pos = &config->pos;
-		sz = &config->sz;
-
-		/* check for flip */
-		if (!fimc_check_drm_flip(config->flip)) {
-			DRM_ERROR("invalid flip.\n");
-			goto err_property;
-		}
-
-		/* check for degree */
-		switch (config->degree) {
-		case EXYNOS_DRM_DEGREE_90:
-		case EXYNOS_DRM_DEGREE_270:
-			swap = true;
-			break;
-		case EXYNOS_DRM_DEGREE_0:
-		case EXYNOS_DRM_DEGREE_180:
-			swap = false;
-			break;
-		default:
-			DRM_ERROR("invalid degree.\n");
-			goto err_property;
-		}
-
-		/* check for buffer bound */
-		if ((pos->x + pos->w > sz->hsize) ||
-			(pos->y + pos->h > sz->vsize)) {
-			DRM_ERROR("out of buf bound.\n");
-			goto err_property;
-		}
-
-		/* check for crop */
-		if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) {
-			if (swap) {
-				if ((pos->h < pp->crop_min.hsize) ||
-					(sz->vsize > pp->crop_max.hsize) ||
-					(pos->w < pp->crop_min.vsize) ||
-					(sz->hsize > pp->crop_max.vsize)) {
-					DRM_ERROR("out of crop size.\n");
-					goto err_property;
-				}
-			} else {
-				if ((pos->w < pp->crop_min.hsize) ||
-					(sz->hsize > pp->crop_max.hsize) ||
-					(pos->h < pp->crop_min.vsize) ||
-					(sz->vsize > pp->crop_max.vsize)) {
-					DRM_ERROR("out of crop size.\n");
-					goto err_property;
-				}
-			}
-		}
-
-		/* check for scale */
-		if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) {
-			if (swap) {
-				if ((pos->h < pp->scale_min.hsize) ||
-					(sz->vsize > pp->scale_max.hsize) ||
-					(pos->w < pp->scale_min.vsize) ||
-					(sz->hsize > pp->scale_max.vsize)) {
-					DRM_ERROR("out of scale size.\n");
-					goto err_property;
-				}
-			} else {
-				if ((pos->w < pp->scale_min.hsize) ||
-					(sz->hsize > pp->scale_max.hsize) ||
-					(pos->h < pp->scale_min.vsize) ||
-					(sz->vsize > pp->scale_max.vsize)) {
-					DRM_ERROR("out of scale size.\n");
-					goto err_property;
-				}
-			}
-		}
-	}
-
-	return 0;
-
-err_property:
-	for_each_ipp_ops(i) {
-		if ((i == EXYNOS_DRM_OPS_SRC) &&
-			(property->cmd == IPP_CMD_WB))
-			continue;
-
-		config = &property->config[i];
-		pos = &config->pos;
-		sz = &config->sz;
-
-		DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n",
-			i ? "dst" : "src", config->flip, config->degree,
-			pos->x, pos->y, pos->w, pos->h,
-			sz->hsize, sz->vsize);
-	}
-
-	return -EINVAL;
-}
-
 static void fimc_clear_addr(struct fimc_context *ctx)
 {
 	int i;
@@ -1386,10 +990,8 @@
 	}
 }
 
-static int fimc_ippdrv_reset(struct device *dev)
+static void fimc_reset(struct fimc_context *ctx)
 {
-	struct fimc_context *ctx = get_fimc_context(dev);
-
 	/* reset h/w block */
 	fimc_sw_reset(ctx);
 
@@ -1397,82 +999,26 @@
 	memset(&ctx->sc, 0x0, sizeof(ctx->sc));
 
 	fimc_clear_addr(ctx);
-
-	return 0;
 }
 
-static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void fimc_start(struct fimc_context *ctx)
 {
-	struct fimc_context *ctx = get_fimc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
-	struct drm_exynos_ipp_property *property;
-	struct drm_exynos_ipp_config *config;
-	struct drm_exynos_pos	img_pos[EXYNOS_DRM_OPS_MAX];
-	struct drm_exynos_ipp_set_wb set_wb;
-	int ret, i;
 	u32 cfg0, cfg1;
 
-	DRM_DEBUG_KMS("cmd[%d]\n", cmd);
-
-	if (!c_node) {
-		DRM_ERROR("failed to get c_node.\n");
-		return -EINVAL;
-	}
-
-	property = &c_node->property;
-
 	fimc_mask_irq(ctx, true);
 
-	for_each_ipp_ops(i) {
-		config = &property->config[i];
-		img_pos[i] = config->pos;
-	}
-
-	ret = fimc_set_prescaler(ctx, &ctx->sc,
-		&img_pos[EXYNOS_DRM_OPS_SRC],
-		&img_pos[EXYNOS_DRM_OPS_DST]);
-	if (ret) {
-		dev_err(dev, "failed to set prescaler.\n");
-		return ret;
-	}
-
-	/* If set ture, we can save jpeg about screen */
+	/* If set true, we can save jpeg about screen */
 	fimc_handle_jpeg(ctx, false);
 	fimc_set_scaler(ctx, &ctx->sc);
 
-	switch (cmd) {
-	case IPP_CMD_M2M:
-		fimc_set_type_ctrl(ctx, FIMC_WB_NONE);
-		fimc_handle_lastend(ctx, false);
+	fimc_set_type_ctrl(ctx);
+	fimc_handle_lastend(ctx, false);
 
-		/* setup dma */
-		cfg0 = fimc_read(ctx, EXYNOS_MSCTRL);
-		cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK;
-		cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY;
-		fimc_write(ctx, cfg0, EXYNOS_MSCTRL);
-		break;
-	case IPP_CMD_WB:
-		fimc_set_type_ctrl(ctx, FIMC_WB_A);
-		fimc_handle_lastend(ctx, true);
-
-		/* setup FIMD */
-		ret = fimc_set_camblk_fimd0_wb(ctx);
-		if (ret < 0) {
-			dev_err(dev, "camblk setup failed.\n");
-			return ret;
-		}
-
-		set_wb.enable = 1;
-		set_wb.refresh = property->refresh_rate;
-		exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
-		break;
-	case IPP_CMD_OUTPUT:
-	default:
-		ret = -EINVAL;
-		dev_err(dev, "invalid operations.\n");
-		return ret;
-	}
+	/* setup dma */
+	cfg0 = fimc_read(ctx, EXYNOS_MSCTRL);
+	cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK;
+	cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY;
+	fimc_write(ctx, cfg0, EXYNOS_MSCTRL);
 
 	/* Reset status */
 	fimc_write(ctx, 0x0, EXYNOS_CISTATUS);
@@ -1498,36 +1044,18 @@
 
 	fimc_clear_bits(ctx, EXYNOS_CIOCTRL, EXYNOS_CIOCTRL_WEAVE_MASK);
 
-	if (cmd == IPP_CMD_M2M)
-		fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID);
-
-	return 0;
+	fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID);
 }
 
-static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void fimc_stop(struct fimc_context *ctx)
 {
-	struct fimc_context *ctx = get_fimc_context(dev);
-	struct drm_exynos_ipp_set_wb set_wb = {0, 0};
 	u32 cfg;
 
-	DRM_DEBUG_KMS("cmd[%d]\n", cmd);
-
-	switch (cmd) {
-	case IPP_CMD_M2M:
-		/* Source clear */
-		cfg = fimc_read(ctx, EXYNOS_MSCTRL);
-		cfg &= ~EXYNOS_MSCTRL_INPUT_MASK;
-		cfg &= ~EXYNOS_MSCTRL_ENVID;
-		fimc_write(ctx, cfg, EXYNOS_MSCTRL);
-		break;
-	case IPP_CMD_WB:
-		exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
-		break;
-	case IPP_CMD_OUTPUT:
-	default:
-		dev_err(dev, "invalid operations.\n");
-		break;
-	}
+	/* Source clear */
+	cfg = fimc_read(ctx, EXYNOS_MSCTRL);
+	cfg &= ~EXYNOS_MSCTRL_INPUT_MASK;
+	cfg &= ~EXYNOS_MSCTRL_ENVID;
+	fimc_write(ctx, cfg, EXYNOS_MSCTRL);
 
 	fimc_mask_irq(ctx, false);
 
@@ -1545,6 +1073,87 @@
 	fimc_set_bits(ctx, EXYNOS_CIGCTRL, EXYNOS_CIGCTRL_IRQ_END_DISABLE);
 }
 
+static int fimc_commit(struct exynos_drm_ipp *ipp,
+			  struct exynos_drm_ipp_task *task)
+{
+	struct fimc_context *ctx =
+			container_of(ipp, struct fimc_context, ipp);
+
+	pm_runtime_get_sync(ctx->dev);
+	ctx->task = task;
+
+	fimc_src_set_fmt(ctx, task->src.buf.fourcc, task->src.buf.modifier);
+	fimc_src_set_size(ctx, &task->src);
+	fimc_src_set_transf(ctx, DRM_MODE_ROTATE_0);
+	fimc_src_set_addr(ctx, &task->src);
+	fimc_dst_set_fmt(ctx, task->dst.buf.fourcc, task->dst.buf.modifier);
+	fimc_dst_set_transf(ctx, task->transform.rotation);
+	fimc_dst_set_size(ctx, &task->dst);
+	fimc_dst_set_addr(ctx, &task->dst);
+	fimc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect);
+	fimc_start(ctx);
+
+	return 0;
+}
+
+static void fimc_abort(struct exynos_drm_ipp *ipp,
+			  struct exynos_drm_ipp_task *task)
+{
+	struct fimc_context *ctx =
+			container_of(ipp, struct fimc_context, ipp);
+
+	fimc_reset(ctx);
+
+	if (ctx->task) {
+		struct exynos_drm_ipp_task *task = ctx->task;
+
+		ctx->task = NULL;
+		pm_runtime_mark_last_busy(ctx->dev);
+		pm_runtime_put_autosuspend(ctx->dev);
+		exynos_drm_ipp_task_done(task, -EIO);
+	}
+}
+
+static struct exynos_drm_ipp_funcs ipp_funcs = {
+	.commit = fimc_commit,
+	.abort = fimc_abort,
+};
+
+static int fimc_bind(struct device *dev, struct device *master, void *data)
+{
+	struct fimc_context *ctx = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+	struct exynos_drm_ipp *ipp = &ctx->ipp;
+
+	ctx->drm_dev = drm_dev;
+	drm_iommu_attach_device(drm_dev, dev);
+
+	exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs,
+			DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
+			DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT,
+			ctx->formats, ctx->num_formats, "fimc");
+
+	dev_info(dev, "The exynos fimc has been probed successfully\n");
+
+	return 0;
+}
+
+static void fimc_unbind(struct device *dev, struct device *master,
+			void *data)
+{
+	struct fimc_context *ctx = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+	struct exynos_drm_ipp *ipp = &ctx->ipp;
+
+	exynos_drm_ipp_unregister(drm_dev, ipp);
+	drm_iommu_detach_device(drm_dev, dev);
+}
+
+static const struct component_ops fimc_component_ops = {
+	.bind	= fimc_bind,
+	.unbind = fimc_unbind,
+};
+
 static void fimc_put_clocks(struct fimc_context *ctx)
 {
 	int i;
@@ -1559,7 +1168,7 @@
 
 static int fimc_setup_clocks(struct fimc_context *ctx)
 {
-	struct device *fimc_dev = ctx->ippdrv.dev;
+	struct device *fimc_dev = ctx->dev;
 	struct device *dev;
 	int ret, i;
 
@@ -1574,8 +1183,6 @@
 
 		ctx->clocks[i] = clk_get(dev, fimc_clock_names[i]);
 		if (IS_ERR(ctx->clocks[i])) {
-			if (i >= FIMC_CLK_MUX)
-				break;
 			ret = PTR_ERR(ctx->clocks[i]);
 			dev_err(fimc_dev, "failed to get clock: %s\n",
 						fimc_clock_names[i]);
@@ -1583,20 +1190,6 @@
 		}
 	}
 
-	/* Optional FIMC LCLK parent clock setting */
-	if (!IS_ERR(ctx->clocks[FIMC_CLK_PARENT])) {
-		ret = clk_set_parent(ctx->clocks[FIMC_CLK_MUX],
-				     ctx->clocks[FIMC_CLK_PARENT]);
-		if (ret < 0) {
-			dev_err(fimc_dev, "failed to set parent.\n");
-			goto e_clk_free;
-		}
-	}
-
-	ret = clk_set_rate(ctx->clocks[FIMC_CLK_LCLK], ctx->clk_frequency);
-	if (ret < 0)
-		goto e_clk_free;
-
 	ret = clk_prepare_enable(ctx->clocks[FIMC_CLK_LCLK]);
 	if (!ret)
 		return ret;
@@ -1605,57 +1198,118 @@
 	return ret;
 }
 
-static int fimc_parse_dt(struct fimc_context *ctx)
+int exynos_drm_check_fimc_device(struct device *dev)
 {
-	struct device_node *node = ctx->ippdrv.dev->of_node;
+	int id = of_alias_get_id(dev->of_node, "fimc");
 
-	/* Handle only devices that support the LCD Writeback data path */
-	if (!of_property_read_bool(node, "samsung,lcd-wb"))
-		return -ENODEV;
-
-	if (of_property_read_u32(node, "clock-frequency",
-					&ctx->clk_frequency))
-		ctx->clk_frequency = FIMC_DEFAULT_LCLK_FREQUENCY;
-
-	ctx->id = of_alias_get_id(node, "fimc");
-
-	if (ctx->id < 0) {
-		dev_err(ctx->ippdrv.dev, "failed to get node alias id.\n");
-		return -EINVAL;
-	}
-
-	return 0;
+	if (id >= 0 && (BIT(id) & fimc_mask))
+		return 0;
+	return -ENODEV;
 }
 
+static const unsigned int fimc_formats[] = {
+	DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565,
+	DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61,
+	DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU,
+	DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422,
+	DRM_FORMAT_YUV444,
+};
+
+static const unsigned int fimc_tiled_formats[] = {
+	DRM_FORMAT_NV12, DRM_FORMAT_NV21,
+};
+
+static const struct drm_exynos_ipp_limit fimc_4210_limits_v1[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) },
+	{ IPP_SIZE_LIMIT(AREA, .h = { 16, 4224, 2 }, .v = { 16, 0, 2 }) },
+	{ IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1920 }, .v = { 128, 0 }) },
+	{ IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 },
+			  .v = { (1 << 16) / 64, (1 << 16) * 64 }) },
+};
+
+static const struct drm_exynos_ipp_limit fimc_4210_limits_v2[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) },
+	{ IPP_SIZE_LIMIT(AREA, .h = { 16, 1920, 2 }, .v = { 16, 0, 2 }) },
+	{ IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1366 }, .v = { 128, 0 }) },
+	{ IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 },
+			  .v = { (1 << 16) / 64, (1 << 16) * 64 }) },
+};
+
+static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v1[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) },
+	{ IPP_SIZE_LIMIT(AREA, .h = { 128, 1920, 2 }, .v = { 128, 0, 2 }) },
+	{ IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 },
+			  .v = { (1 << 16) / 64, (1 << 16) * 64 }) },
+};
+
+static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v2[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) },
+	{ IPP_SIZE_LIMIT(AREA, .h = { 128, 1366, 2 }, .v = { 128, 0, 2 }) },
+	{ IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 },
+			  .v = { (1 << 16) / 64, (1 << 16) * 64 }) },
+};
+
 static int fimc_probe(struct platform_device *pdev)
 {
+	const struct drm_exynos_ipp_limit *limits;
+	struct exynos_drm_ipp_formats *formats;
 	struct device *dev = &pdev->dev;
 	struct fimc_context *ctx;
 	struct resource *res;
-	struct exynos_drm_ippdrv *ippdrv;
 	int ret;
+	int i, j, num_limits, num_formats;
 
-	if (!dev->of_node) {
-		dev_err(dev, "device tree node not found.\n");
+	if (exynos_drm_check_fimc_device(dev) != 0)
 		return -ENODEV;
-	}
 
 	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
-	ctx->ippdrv.dev = dev;
+	ctx->dev = dev;
+	ctx->id = of_alias_get_id(dev->of_node, "fimc");
 
-	ret = fimc_parse_dt(ctx);
-	if (ret < 0)
-		return ret;
+	/* construct formats/limits array */
+	num_formats = ARRAY_SIZE(fimc_formats) + ARRAY_SIZE(fimc_tiled_formats);
+	formats = devm_kzalloc(dev, sizeof(*formats) * num_formats, GFP_KERNEL);
+	if (!formats)
+		return -ENOMEM;
 
-	ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node,
-						"samsung,sysreg");
-	if (IS_ERR(ctx->sysreg)) {
-		dev_err(dev, "syscon regmap lookup failed.\n");
-		return PTR_ERR(ctx->sysreg);
+	/* linear formats */
+	if (ctx->id < 3) {
+		limits = fimc_4210_limits_v1;
+		num_limits = ARRAY_SIZE(fimc_4210_limits_v1);
+	} else {
+		limits = fimc_4210_limits_v2;
+		num_limits = ARRAY_SIZE(fimc_4210_limits_v2);
 	}
+	for (i = 0; i < ARRAY_SIZE(fimc_formats); i++) {
+		formats[i].fourcc = fimc_formats[i];
+		formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE |
+				  DRM_EXYNOS_IPP_FORMAT_DESTINATION;
+		formats[i].limits = limits;
+		formats[i].num_limits = num_limits;
+	}
+
+	/* tiled formats */
+	if (ctx->id < 3) {
+		limits = fimc_4210_limits_tiled_v1;
+		num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v1);
+	} else {
+		limits = fimc_4210_limits_tiled_v2;
+		num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v2);
+	}
+	for (j = i, i = 0; i < ARRAY_SIZE(fimc_tiled_formats); j++, i++) {
+		formats[j].fourcc = fimc_tiled_formats[i];
+		formats[j].modifier = DRM_FORMAT_MOD_SAMSUNG_64_32_TILE;
+		formats[j].type = DRM_EXYNOS_IPP_FORMAT_SOURCE |
+				  DRM_EXYNOS_IPP_FORMAT_DESTINATION;
+		formats[j].limits = limits;
+		formats[j].num_limits = num_limits;
+	}
+
+	ctx->formats = formats;
+	ctx->num_formats = num_formats;
 
 	/* resource memory */
 	ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1670,9 +1324,8 @@
 		return -ENOENT;
 	}
 
-	ctx->irq = res->start;
-	ret = devm_request_threaded_irq(dev, ctx->irq, NULL, fimc_irq_handler,
-		IRQF_ONESHOT, "drm_fimc", ctx);
+	ret = devm_request_irq(dev, res->start, fimc_irq_handler,
+		0, dev_name(dev), ctx);
 	if (ret < 0) {
 		dev_err(dev, "failed to request irq.\n");
 		return ret;
@@ -1682,39 +1335,24 @@
 	if (ret < 0)
 		return ret;
 
-	ippdrv = &ctx->ippdrv;
-	ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &fimc_src_ops;
-	ippdrv->ops[EXYNOS_DRM_OPS_DST] = &fimc_dst_ops;
-	ippdrv->check_property = fimc_ippdrv_check_property;
-	ippdrv->reset = fimc_ippdrv_reset;
-	ippdrv->start = fimc_ippdrv_start;
-	ippdrv->stop = fimc_ippdrv_stop;
-	ret = fimc_init_prop_list(ippdrv);
-	if (ret < 0) {
-		dev_err(dev, "failed to init property list.\n");
-		goto err_put_clk;
-	}
-
-	DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv);
-
 	spin_lock_init(&ctx->lock);
 	platform_set_drvdata(pdev, ctx);
 
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, FIMC_AUTOSUSPEND_DELAY);
 	pm_runtime_enable(dev);
 
-	ret = exynos_drm_ippdrv_register(ippdrv);
-	if (ret < 0) {
-		dev_err(dev, "failed to register drm fimc device.\n");
+	ret = component_add(dev, &fimc_component_ops);
+	if (ret)
 		goto err_pm_dis;
-	}
 
 	dev_info(dev, "drm fimc registered successfully.\n");
 
 	return 0;
 
 err_pm_dis:
+	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
-err_put_clk:
 	fimc_put_clocks(ctx);
 
 	return ret;
@@ -1724,42 +1362,24 @@
 {
 	struct device *dev = &pdev->dev;
 	struct fimc_context *ctx = get_fimc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 
-	exynos_drm_ippdrv_unregister(ippdrv);
+	component_del(dev, &fimc_component_ops);
+	pm_runtime_dont_use_autosuspend(dev);
+	pm_runtime_disable(dev);
 
 	fimc_put_clocks(ctx);
-	pm_runtime_set_suspended(dev);
-	pm_runtime_disable(dev);
 
 	return 0;
 }
 
 #ifdef CONFIG_PM
-static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable)
-{
-	DRM_DEBUG_KMS("enable[%d]\n", enable);
-
-	if (enable) {
-		clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]);
-		clk_prepare_enable(ctx->clocks[FIMC_CLK_WB_A]);
-		ctx->suspended = false;
-	} else {
-		clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]);
-		clk_disable_unprepare(ctx->clocks[FIMC_CLK_WB_A]);
-		ctx->suspended = true;
-	}
-
-	return 0;
-}
-
 static int fimc_runtime_suspend(struct device *dev)
 {
 	struct fimc_context *ctx = get_fimc_context(dev);
 
 	DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
-	return  fimc_clk_ctrl(ctx, false);
+	clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]);
+	return 0;
 }
 
 static int fimc_runtime_resume(struct device *dev)
@@ -1767,8 +1387,7 @@
 	struct fimc_context *ctx = get_fimc_context(dev);
 
 	DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
-	return  fimc_clk_ctrl(ctx, true);
+	return clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]);
 }
 #endif
 
@@ -1795,4 +1414,3 @@
 		.pm	= &fimc_pm_ops,
 	},
 };
-
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.h b/drivers/gpu/drm/exynos/exynos_drm_fimc.h
deleted file mode 100644
index 127a424..0000000
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *
- * Authors:
- *	Eunchul Kim <chulspro.kim@samsung.com>
- *	Jinyoung Jeon <jy0.jeon@samsung.com>
- *	Sangmin Lee <lsmin.lee@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef _EXYNOS_DRM_FIMC_H_
-#define _EXYNOS_DRM_FIMC_H_
-
-/*
- * TODO
- * FIMD output interface notifier callback.
- */
-
-#endif /* _EXYNOS_DRM_FIMC_H_ */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index d42ae2b..01b1570 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -121,6 +121,12 @@
 	.has_limited_fmt = 1,
 };
 
+static struct fimd_driver_data s5pv210_fimd_driver_data = {
+	.timing_base = 0x0,
+	.has_shadowcon = 1,
+	.has_clksel = 1,
+};
+
 static struct fimd_driver_data exynos3_fimd_driver_data = {
 	.timing_base = 0x20000,
 	.lcdblk_offset = 0x210,
@@ -193,6 +199,8 @@
 static const struct of_device_id fimd_driver_dt_match[] = {
 	{ .compatible = "samsung,s3c6400-fimd",
 	  .data = &s3c64xx_fimd_driver_data },
+	{ .compatible = "samsung,s5pv210-fimd",
+	  .data = &s5pv210_fimd_driver_data },
 	{ .compatible = "samsung,exynos3250-fimd",
 	  .data = &exynos3_fimd_driver_data },
 	{ .compatible = "samsung,exynos4210-fimd",
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 11cc01b..6e1494f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -431,37 +431,24 @@
 	return 0;
 }
 
-int exynos_drm_gem_fault(struct vm_fault *vmf)
+vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct drm_gem_object *obj = vma->vm_private_data;
 	struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj);
 	unsigned long pfn;
 	pgoff_t page_offset;
-	int ret;
 
 	page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 
 	if (page_offset >= (exynos_gem->size >> PAGE_SHIFT)) {
 		DRM_ERROR("invalid page offset\n");
-		ret = -EINVAL;
-		goto out;
+		return VM_FAULT_SIGBUS;
 	}
 
 	pfn = page_to_pfn(exynos_gem->pages[page_offset]);
-	ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
-
-out:
-	switch (ret) {
-	case 0:
-	case -ERESTARTSYS:
-	case -EINTR:
-		return VM_FAULT_NOPAGE;
-	case -ENOMEM:
-		return VM_FAULT_OOM;
-	default:
-		return VM_FAULT_SIGBUS;
-	}
+	return vmf_insert_mixed(vma, vmf->address,
+			__pfn_to_pfn_t(pfn, PFN_DEV));
 }
 
 static int exynos_drm_gem_mmap_obj(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 5a4c7de..9057d7f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -13,6 +13,7 @@
 #define _EXYNOS_DRM_GEM_H_
 
 #include <drm/drm_gem.h>
+#include <linux/mm_types.h>
 
 #define to_exynos_gem(x)	container_of(x, struct exynos_drm_gem, base)
 
@@ -111,7 +112,7 @@
 			       struct drm_mode_create_dumb *args);
 
 /* page fault handler and mmap fault address(virtual) to physical memory. */
-int exynos_drm_gem_fault(struct vm_fault *vmf);
+vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf);
 
 /* set vm_flags and we can change the vm attribute to other one at here. */
 int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 0506b2b..e99dd1e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -12,18 +12,20 @@
  *
  */
 #include <linux/kernel.h>
+#include <linux/component.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
 #include <linux/mfd/syscon.h>
+#include <linux/of_device.h>
 #include <linux/regmap.h>
 
 #include <drm/drmP.h>
 #include <drm/exynos_drm.h>
 #include "regs-gsc.h"
 #include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
 #include "exynos_drm_ipp.h"
-#include "exynos_drm_gsc.h"
 
 /*
  * GSC stands for General SCaler and
@@ -31,26 +33,10 @@
  * input DMA reads image data from the memory.
  * output DMA writes image data to memory.
  * GSC supports image rotation and image effect functions.
- *
- * M2M operation : supports crop/scale/rotation/csc so on.
- * Memory ----> GSC H/W ----> Memory.
- * Writeback operation : supports cloned screen with FIMD.
- * FIMD ----> GSC H/W ----> Memory.
- * Output operation : supports direct display using local path.
- * Memory ----> GSC H/W ----> FIMD, Mixer.
  */
 
-/*
- * TODO
- * 1. check suspend/resume api if needed.
- * 2. need to check use case platform_device_id.
- * 3. check src/dst size with, height.
- * 4. added check_prepare api for right register.
- * 5. need to add supported list in prop_list.
- * 6. check prescaler/scaler optimization.
- */
 
-#define GSC_MAX_DEVS	4
+#define GSC_MAX_CLOCKS	8
 #define GSC_MAX_SRC		4
 #define GSC_MAX_DST		16
 #define GSC_RESET_TIMEOUT	50
@@ -65,8 +51,6 @@
 #define GSC_SC_DOWN_RATIO_4_8		131072
 #define GSC_SC_DOWN_RATIO_3_8		174762
 #define GSC_SC_DOWN_RATIO_2_8		262144
-#define GSC_REFRESH_MIN	12
-#define GSC_REFRESH_MAX	60
 #define GSC_CROP_MAX	8192
 #define GSC_CROP_MIN	32
 #define GSC_SCALE_MAX	4224
@@ -77,10 +61,9 @@
 #define GSC_COEF_H_8T	8
 #define GSC_COEF_V_4T	4
 #define GSC_COEF_DEPTH	3
+#define GSC_AUTOSUSPEND_DELAY		2000
 
 #define get_gsc_context(dev)	platform_get_drvdata(to_platform_device(dev))
-#define get_ctx_from_ippdrv(ippdrv)	container_of(ippdrv,\
-					struct gsc_context, ippdrv);
 #define gsc_read(offset)		readl(ctx->regs + (offset))
 #define gsc_write(cfg, offset)	writel(cfg, ctx->regs + (offset))
 
@@ -104,50 +87,47 @@
 };
 
 /*
- * A structure of scaler capability.
- *
- * find user manual 49.2 features.
- * @tile_w: tile mode or rotation width.
- * @tile_h: tile mode or rotation height.
- * @w: other cases width.
- * @h: other cases height.
- */
-struct gsc_capability {
-	/* tile or rotation */
-	u32	tile_w;
-	u32	tile_h;
-	/* other cases */
-	u32	w;
-	u32	h;
-};
-
-/*
  * A structure of gsc context.
  *
- * @ippdrv: prepare initialization using ippdrv.
  * @regs_res: register resources.
  * @regs: memory mapped io registers.
- * @sysreg: handle to SYSREG block regmap.
- * @lock: locking of operations.
  * @gsc_clk: gsc gate clock.
  * @sc: scaler infomations.
  * @id: gsc id.
  * @irq: irq number.
  * @rotation: supports rotation of src.
- * @suspended: qos operations.
  */
 struct gsc_context {
-	struct exynos_drm_ippdrv	ippdrv;
+	struct exynos_drm_ipp ipp;
+	struct drm_device *drm_dev;
+	struct device	*dev;
+	struct exynos_drm_ipp_task	*task;
+	struct exynos_drm_ipp_formats	*formats;
+	unsigned int			num_formats;
+
 	struct resource	*regs_res;
 	void __iomem	*regs;
-	struct regmap	*sysreg;
-	struct mutex	lock;
-	struct clk	*gsc_clk;
+	const char	**clk_names;
+	struct clk	*clocks[GSC_MAX_CLOCKS];
+	int		num_clocks;
 	struct gsc_scaler	sc;
 	int	id;
 	int	irq;
 	bool	rotation;
-	bool	suspended;
+};
+
+/**
+ * struct gsc_driverdata - per device type driver data for init time.
+ *
+ * @limits: picture size limits array
+ * @clk_names: names of clocks needed by this variant
+ * @num_clocks: the number of clocks needed by this variant
+ */
+struct gsc_driverdata {
+	const struct drm_exynos_ipp_limit *limits;
+	int		num_limits;
+	const char	*clk_names[GSC_MAX_CLOCKS];
+	int		num_clocks;
 };
 
 /* 8-tap Filter Coefficient */
@@ -438,25 +418,6 @@
 	return 0;
 }
 
-static void gsc_set_gscblk_fimd_wb(struct gsc_context *ctx, bool enable)
-{
-	unsigned int gscblk_cfg;
-
-	if (!ctx->sysreg)
-		return;
-
-	regmap_read(ctx->sysreg, SYSREG_GSCBLK_CFG1, &gscblk_cfg);
-
-	if (enable)
-		gscblk_cfg |= GSC_BLK_DISP1WB_DEST(ctx->id) |
-				GSC_BLK_GSCL_WB_IN_SRC_SEL(ctx->id) |
-				GSC_BLK_SW_RESET_WB_DEST(ctx->id);
-	else
-		gscblk_cfg |= GSC_BLK_PXLASYNC_LO_MASK_WB(ctx->id);
-
-	regmap_write(ctx->sysreg, SYSREG_GSCBLK_CFG1, gscblk_cfg);
-}
-
 static void gsc_handle_irq(struct gsc_context *ctx, bool enable,
 		bool overflow, bool done)
 {
@@ -487,10 +448,8 @@
 }
 
 
-static int gsc_src_set_fmt(struct device *dev, u32 fmt)
+static void gsc_src_set_fmt(struct gsc_context *ctx, u32 fmt)
 {
-	struct gsc_context *ctx = get_gsc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 	u32 cfg;
 
 	DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -506,6 +465,7 @@
 		cfg |= GSC_IN_RGB565;
 		break;
 	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_ARGB8888:
 		cfg |= GSC_IN_XRGB8888;
 		break;
 	case DRM_FORMAT_BGRX8888:
@@ -548,115 +508,84 @@
 		cfg |= (GSC_IN_CHROMA_ORDER_CBCR |
 			GSC_IN_YUV420_2P);
 		break;
-	default:
-		dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt);
-		return -EINVAL;
 	}
 
 	gsc_write(cfg, GSC_IN_CON);
-
-	return 0;
 }
 
-static int gsc_src_set_transf(struct device *dev,
-		enum drm_exynos_degree degree,
-		enum drm_exynos_flip flip, bool *swap)
+static void gsc_src_set_transf(struct gsc_context *ctx, unsigned int rotation)
 {
-	struct gsc_context *ctx = get_gsc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+	unsigned int degree = rotation & DRM_MODE_ROTATE_MASK;
 	u32 cfg;
 
-	DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
-
 	cfg = gsc_read(GSC_IN_CON);
 	cfg &= ~GSC_IN_ROT_MASK;
 
 	switch (degree) {
-	case EXYNOS_DRM_DEGREE_0:
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+	case DRM_MODE_ROTATE_0:
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg |= GSC_IN_ROT_XFLIP;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+		if (rotation & DRM_MODE_REFLECT_X)
 			cfg |= GSC_IN_ROT_YFLIP;
 		break;
-	case EXYNOS_DRM_DEGREE_90:
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
-			cfg |= GSC_IN_ROT_90_XFLIP;
-		else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
-			cfg |= GSC_IN_ROT_90_YFLIP;
-		else
-			cfg |= GSC_IN_ROT_90;
+	case DRM_MODE_ROTATE_90:
+		cfg |= GSC_IN_ROT_90;
+		if (rotation & DRM_MODE_REFLECT_Y)
+			cfg |= GSC_IN_ROT_XFLIP;
+		if (rotation & DRM_MODE_REFLECT_X)
+			cfg |= GSC_IN_ROT_YFLIP;
 		break;
-	case EXYNOS_DRM_DEGREE_180:
+	case DRM_MODE_ROTATE_180:
 		cfg |= GSC_IN_ROT_180;
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg &= ~GSC_IN_ROT_XFLIP;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+		if (rotation & DRM_MODE_REFLECT_X)
 			cfg &= ~GSC_IN_ROT_YFLIP;
 		break;
-	case EXYNOS_DRM_DEGREE_270:
+	case DRM_MODE_ROTATE_270:
 		cfg |= GSC_IN_ROT_270;
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg &= ~GSC_IN_ROT_XFLIP;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+		if (rotation & DRM_MODE_REFLECT_X)
 			cfg &= ~GSC_IN_ROT_YFLIP;
 		break;
-	default:
-		dev_err(ippdrv->dev, "invalid degree value %d.\n", degree);
-		return -EINVAL;
 	}
 
 	gsc_write(cfg, GSC_IN_CON);
 
 	ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0;
-	*swap = ctx->rotation;
-
-	return 0;
 }
 
-static int gsc_src_set_size(struct device *dev, int swap,
-		struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void gsc_src_set_size(struct gsc_context *ctx,
+			     struct exynos_drm_ipp_buffer *buf)
 {
-	struct gsc_context *ctx = get_gsc_context(dev);
-	struct drm_exynos_pos img_pos = *pos;
 	struct gsc_scaler *sc = &ctx->sc;
 	u32 cfg;
 
-	DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n",
-		swap, pos->x, pos->y, pos->w, pos->h);
-
-	if (swap) {
-		img_pos.w = pos->h;
-		img_pos.h = pos->w;
-	}
-
 	/* pixel offset */
-	cfg = (GSC_SRCIMG_OFFSET_X(img_pos.x) |
-		GSC_SRCIMG_OFFSET_Y(img_pos.y));
+	cfg = (GSC_SRCIMG_OFFSET_X(buf->rect.x) |
+		GSC_SRCIMG_OFFSET_Y(buf->rect.y));
 	gsc_write(cfg, GSC_SRCIMG_OFFSET);
 
 	/* cropped size */
-	cfg = (GSC_CROPPED_WIDTH(img_pos.w) |
-		GSC_CROPPED_HEIGHT(img_pos.h));
+	cfg = (GSC_CROPPED_WIDTH(buf->rect.w) |
+		GSC_CROPPED_HEIGHT(buf->rect.h));
 	gsc_write(cfg, GSC_CROPPED_SIZE);
 
-	DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize);
-
 	/* original size */
 	cfg = gsc_read(GSC_SRCIMG_SIZE);
 	cfg &= ~(GSC_SRCIMG_HEIGHT_MASK |
 		GSC_SRCIMG_WIDTH_MASK);
 
-	cfg |= (GSC_SRCIMG_WIDTH(sz->hsize) |
-		GSC_SRCIMG_HEIGHT(sz->vsize));
+	cfg |= (GSC_SRCIMG_WIDTH(buf->buf.width) |
+		GSC_SRCIMG_HEIGHT(buf->buf.height));
 
 	gsc_write(cfg, GSC_SRCIMG_SIZE);
 
 	cfg = gsc_read(GSC_IN_CON);
 	cfg &= ~GSC_IN_RGB_TYPE_MASK;
 
-	DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range);
-
-	if (pos->w >= GSC_WIDTH_ITU_709)
+	if (buf->rect.w >= GSC_WIDTH_ITU_709)
 		if (sc->range)
 			cfg |= GSC_IN_RGB_HD_WIDE;
 		else
@@ -668,103 +597,39 @@
 			cfg |= GSC_IN_RGB_SD_NARROW;
 
 	gsc_write(cfg, GSC_IN_CON);
-
-	return 0;
 }
 
-static int gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
-		enum drm_exynos_ipp_buf_type buf_type)
+static void gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
+			       bool enqueue)
 {
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	bool masked;
+	bool masked = !enqueue;
 	u32 cfg;
 	u32 mask = 0x00000001 << buf_id;
 
-	DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type);
-
 	/* mask register set */
 	cfg = gsc_read(GSC_IN_BASE_ADDR_Y_MASK);
 
-	switch (buf_type) {
-	case IPP_BUF_ENQUEUE:
-		masked = false;
-		break;
-	case IPP_BUF_DEQUEUE:
-		masked = true;
-		break;
-	default:
-		dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n");
-		return -EINVAL;
-	}
-
 	/* sequence id */
 	cfg &= ~mask;
 	cfg |= masked << buf_id;
 	gsc_write(cfg, GSC_IN_BASE_ADDR_Y_MASK);
 	gsc_write(cfg, GSC_IN_BASE_ADDR_CB_MASK);
 	gsc_write(cfg, GSC_IN_BASE_ADDR_CR_MASK);
-
-	return 0;
 }
 
-static int gsc_src_set_addr(struct device *dev,
-		struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
-		enum drm_exynos_ipp_buf_type buf_type)
+static void gsc_src_set_addr(struct gsc_context *ctx, u32 buf_id,
+			    struct exynos_drm_ipp_buffer *buf)
 {
-	struct gsc_context *ctx = get_gsc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
-	struct drm_exynos_ipp_property *property;
-
-	if (!c_node) {
-		DRM_ERROR("failed to get c_node.\n");
-		return -EFAULT;
-	}
-
-	property = &c_node->property;
-
-	DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n",
-		property->prop_id, buf_id, buf_type);
-
-	if (buf_id > GSC_MAX_SRC) {
-		dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id);
-		return -EINVAL;
-	}
-
 	/* address register set */
-	switch (buf_type) {
-	case IPP_BUF_ENQUEUE:
-		gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y],
-			GSC_IN_BASE_ADDR_Y(buf_id));
-		gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
-			GSC_IN_BASE_ADDR_CB(buf_id));
-		gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
-			GSC_IN_BASE_ADDR_CR(buf_id));
-		break;
-	case IPP_BUF_DEQUEUE:
-		gsc_write(0x0, GSC_IN_BASE_ADDR_Y(buf_id));
-		gsc_write(0x0, GSC_IN_BASE_ADDR_CB(buf_id));
-		gsc_write(0x0, GSC_IN_BASE_ADDR_CR(buf_id));
-		break;
-	default:
-		/* bypass */
-		break;
-	}
+	gsc_write(buf->dma_addr[0], GSC_IN_BASE_ADDR_Y(buf_id));
+	gsc_write(buf->dma_addr[1], GSC_IN_BASE_ADDR_CB(buf_id));
+	gsc_write(buf->dma_addr[2], GSC_IN_BASE_ADDR_CR(buf_id));
 
-	return gsc_src_set_buf_seq(ctx, buf_id, buf_type);
+	gsc_src_set_buf_seq(ctx, buf_id, true);
 }
 
-static struct exynos_drm_ipp_ops gsc_src_ops = {
-	.set_fmt = gsc_src_set_fmt,
-	.set_transf = gsc_src_set_transf,
-	.set_size = gsc_src_set_size,
-	.set_addr = gsc_src_set_addr,
-};
-
-static int gsc_dst_set_fmt(struct device *dev, u32 fmt)
+static void gsc_dst_set_fmt(struct gsc_context *ctx, u32 fmt)
 {
-	struct gsc_context *ctx = get_gsc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 	u32 cfg;
 
 	DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -779,8 +644,9 @@
 	case DRM_FORMAT_RGB565:
 		cfg |= GSC_OUT_RGB565;
 		break;
+	case DRM_FORMAT_ARGB8888:
 	case DRM_FORMAT_XRGB8888:
-		cfg |= GSC_OUT_XRGB8888;
+		cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_GLOBAL_ALPHA(0xff));
 		break;
 	case DRM_FORMAT_BGRX8888:
 		cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_RB_SWAP);
@@ -819,69 +685,9 @@
 		cfg |= (GSC_OUT_CHROMA_ORDER_CBCR |
 			GSC_OUT_YUV420_2P);
 		break;
-	default:
-		dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt);
-		return -EINVAL;
 	}
 
 	gsc_write(cfg, GSC_OUT_CON);
-
-	return 0;
-}
-
-static int gsc_dst_set_transf(struct device *dev,
-		enum drm_exynos_degree degree,
-		enum drm_exynos_flip flip, bool *swap)
-{
-	struct gsc_context *ctx = get_gsc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	u32 cfg;
-
-	DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
-
-	cfg = gsc_read(GSC_IN_CON);
-	cfg &= ~GSC_IN_ROT_MASK;
-
-	switch (degree) {
-	case EXYNOS_DRM_DEGREE_0:
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
-			cfg |= GSC_IN_ROT_XFLIP;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
-			cfg |= GSC_IN_ROT_YFLIP;
-		break;
-	case EXYNOS_DRM_DEGREE_90:
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
-			cfg |= GSC_IN_ROT_90_XFLIP;
-		else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
-			cfg |= GSC_IN_ROT_90_YFLIP;
-		else
-			cfg |= GSC_IN_ROT_90;
-		break;
-	case EXYNOS_DRM_DEGREE_180:
-		cfg |= GSC_IN_ROT_180;
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
-			cfg &= ~GSC_IN_ROT_XFLIP;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
-			cfg &= ~GSC_IN_ROT_YFLIP;
-		break;
-	case EXYNOS_DRM_DEGREE_270:
-		cfg |= GSC_IN_ROT_270;
-		if (flip & EXYNOS_DRM_FLIP_VERTICAL)
-			cfg &= ~GSC_IN_ROT_XFLIP;
-		if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
-			cfg &= ~GSC_IN_ROT_YFLIP;
-		break;
-	default:
-		dev_err(ippdrv->dev, "invalid degree value %d.\n", degree);
-		return -EINVAL;
-	}
-
-	gsc_write(cfg, GSC_IN_CON);
-
-	ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0;
-	*swap = ctx->rotation;
-
-	return 0;
 }
 
 static int gsc_get_ratio_shift(u32 src, u32 dst, u32 *ratio)
@@ -919,9 +725,9 @@
 }
 
 static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc,
-		struct drm_exynos_pos *src, struct drm_exynos_pos *dst)
+			     struct drm_exynos_ipp_task_rect *src,
+			     struct drm_exynos_ipp_task_rect *dst)
 {
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 	u32 cfg;
 	u32 src_w, src_h, dst_w, dst_h;
 	int ret = 0;
@@ -939,13 +745,13 @@
 
 	ret = gsc_get_ratio_shift(src_w, dst_w, &sc->pre_hratio);
 	if (ret) {
-		dev_err(ippdrv->dev, "failed to get ratio horizontal.\n");
+		dev_err(ctx->dev, "failed to get ratio horizontal.\n");
 		return ret;
 	}
 
 	ret = gsc_get_ratio_shift(src_h, dst_h, &sc->pre_vratio);
 	if (ret) {
-		dev_err(ippdrv->dev, "failed to get ratio vertical.\n");
+		dev_err(ctx->dev, "failed to get ratio vertical.\n");
 		return ret;
 	}
 
@@ -1039,47 +845,37 @@
 	gsc_write(cfg, GSC_MAIN_V_RATIO);
 }
 
-static int gsc_dst_set_size(struct device *dev, int swap,
-		struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void gsc_dst_set_size(struct gsc_context *ctx,
+			     struct exynos_drm_ipp_buffer *buf)
 {
-	struct gsc_context *ctx = get_gsc_context(dev);
-	struct drm_exynos_pos img_pos = *pos;
 	struct gsc_scaler *sc = &ctx->sc;
 	u32 cfg;
 
-	DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n",
-		swap, pos->x, pos->y, pos->w, pos->h);
-
-	if (swap) {
-		img_pos.w = pos->h;
-		img_pos.h = pos->w;
-	}
-
 	/* pixel offset */
-	cfg = (GSC_DSTIMG_OFFSET_X(pos->x) |
-		GSC_DSTIMG_OFFSET_Y(pos->y));
+	cfg = (GSC_DSTIMG_OFFSET_X(buf->rect.x) |
+		GSC_DSTIMG_OFFSET_Y(buf->rect.y));
 	gsc_write(cfg, GSC_DSTIMG_OFFSET);
 
 	/* scaled size */
-	cfg = (GSC_SCALED_WIDTH(img_pos.w) | GSC_SCALED_HEIGHT(img_pos.h));
+	if (ctx->rotation)
+		cfg = (GSC_SCALED_WIDTH(buf->rect.h) |
+		       GSC_SCALED_HEIGHT(buf->rect.w));
+	else
+		cfg = (GSC_SCALED_WIDTH(buf->rect.w) |
+		       GSC_SCALED_HEIGHT(buf->rect.h));
 	gsc_write(cfg, GSC_SCALED_SIZE);
 
-	DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize);
-
 	/* original size */
 	cfg = gsc_read(GSC_DSTIMG_SIZE);
-	cfg &= ~(GSC_DSTIMG_HEIGHT_MASK |
-		GSC_DSTIMG_WIDTH_MASK);
-	cfg |= (GSC_DSTIMG_WIDTH(sz->hsize) |
-		GSC_DSTIMG_HEIGHT(sz->vsize));
+	cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | GSC_DSTIMG_WIDTH_MASK);
+	cfg |= GSC_DSTIMG_WIDTH(buf->buf.width) |
+	       GSC_DSTIMG_HEIGHT(buf->buf.height);
 	gsc_write(cfg, GSC_DSTIMG_SIZE);
 
 	cfg = gsc_read(GSC_OUT_CON);
 	cfg &= ~GSC_OUT_RGB_TYPE_MASK;
 
-	DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range);
-
-	if (pos->w >= GSC_WIDTH_ITU_709)
+	if (buf->rect.w >= GSC_WIDTH_ITU_709)
 		if (sc->range)
 			cfg |= GSC_OUT_RGB_HD_WIDE;
 		else
@@ -1091,8 +887,6 @@
 			cfg |= GSC_OUT_RGB_SD_NARROW;
 
 	gsc_write(cfg, GSC_OUT_CON);
-
-	return 0;
 }
 
 static int gsc_dst_get_buf_seq(struct gsc_context *ctx)
@@ -1111,35 +905,16 @@
 	return buf_num;
 }
 
-static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
-		enum drm_exynos_ipp_buf_type buf_type)
+static void gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
+				bool enqueue)
 {
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	bool masked;
+	bool masked = !enqueue;
 	u32 cfg;
 	u32 mask = 0x00000001 << buf_id;
-	int ret = 0;
-
-	DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type);
-
-	mutex_lock(&ctx->lock);
 
 	/* mask register set */
 	cfg = gsc_read(GSC_OUT_BASE_ADDR_Y_MASK);
 
-	switch (buf_type) {
-	case IPP_BUF_ENQUEUE:
-		masked = false;
-		break;
-	case IPP_BUF_DEQUEUE:
-		masked = true;
-		break;
-	default:
-		dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n");
-		ret =  -EINVAL;
-		goto err_unlock;
-	}
-
 	/* sequence id */
 	cfg &= ~mask;
 	cfg |= masked << buf_id;
@@ -1148,94 +923,29 @@
 	gsc_write(cfg, GSC_OUT_BASE_ADDR_CR_MASK);
 
 	/* interrupt enable */
-	if (buf_type == IPP_BUF_ENQUEUE &&
-	    gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START)
+	if (enqueue && gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START)
 		gsc_handle_irq(ctx, true, false, true);
 
 	/* interrupt disable */
-	if (buf_type == IPP_BUF_DEQUEUE &&
-	    gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP)
+	if (!enqueue && gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP)
 		gsc_handle_irq(ctx, false, false, true);
-
-err_unlock:
-	mutex_unlock(&ctx->lock);
-	return ret;
 }
 
-static int gsc_dst_set_addr(struct device *dev,
-		struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
-		enum drm_exynos_ipp_buf_type buf_type)
+static void gsc_dst_set_addr(struct gsc_context *ctx,
+			     u32 buf_id, struct exynos_drm_ipp_buffer *buf)
 {
-	struct gsc_context *ctx = get_gsc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
-	struct drm_exynos_ipp_property *property;
-
-	if (!c_node) {
-		DRM_ERROR("failed to get c_node.\n");
-		return -EFAULT;
-	}
-
-	property = &c_node->property;
-
-	DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n",
-		property->prop_id, buf_id, buf_type);
-
-	if (buf_id > GSC_MAX_DST) {
-		dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id);
-		return -EINVAL;
-	}
-
 	/* address register set */
-	switch (buf_type) {
-	case IPP_BUF_ENQUEUE:
-		gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y],
-			GSC_OUT_BASE_ADDR_Y(buf_id));
-		gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
-			GSC_OUT_BASE_ADDR_CB(buf_id));
-		gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
-			GSC_OUT_BASE_ADDR_CR(buf_id));
-		break;
-	case IPP_BUF_DEQUEUE:
-		gsc_write(0x0, GSC_OUT_BASE_ADDR_Y(buf_id));
-		gsc_write(0x0, GSC_OUT_BASE_ADDR_CB(buf_id));
-		gsc_write(0x0, GSC_OUT_BASE_ADDR_CR(buf_id));
-		break;
-	default:
-		/* bypass */
-		break;
-	}
+	gsc_write(buf->dma_addr[0], GSC_OUT_BASE_ADDR_Y(buf_id));
+	gsc_write(buf->dma_addr[1], GSC_OUT_BASE_ADDR_CB(buf_id));
+	gsc_write(buf->dma_addr[2], GSC_OUT_BASE_ADDR_CR(buf_id));
 
-	return gsc_dst_set_buf_seq(ctx, buf_id, buf_type);
-}
-
-static struct exynos_drm_ipp_ops gsc_dst_ops = {
-	.set_fmt = gsc_dst_set_fmt,
-	.set_transf = gsc_dst_set_transf,
-	.set_size = gsc_dst_set_size,
-	.set_addr = gsc_dst_set_addr,
-};
-
-static int gsc_clk_ctrl(struct gsc_context *ctx, bool enable)
-{
-	DRM_DEBUG_KMS("enable[%d]\n", enable);
-
-	if (enable) {
-		clk_prepare_enable(ctx->gsc_clk);
-		ctx->suspended = false;
-	} else {
-		clk_disable_unprepare(ctx->gsc_clk);
-		ctx->suspended = true;
-	}
-
-	return 0;
+	gsc_dst_set_buf_seq(ctx, buf_id, true);
 }
 
 static int gsc_get_src_buf_index(struct gsc_context *ctx)
 {
 	u32 cfg, curr_index, i;
 	u32 buf_id = GSC_MAX_SRC;
-	int ret;
 
 	DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id);
 
@@ -1249,19 +959,15 @@
 		}
 	}
 
+	DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg,
+		curr_index, buf_id);
+
 	if (buf_id == GSC_MAX_SRC) {
 		DRM_ERROR("failed to get in buffer index.\n");
 		return -EINVAL;
 	}
 
-	ret = gsc_src_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE);
-	if (ret < 0) {
-		DRM_ERROR("failed to dequeue.\n");
-		return ret;
-	}
-
-	DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg,
-		curr_index, buf_id);
+	gsc_src_set_buf_seq(ctx, buf_id, false);
 
 	return buf_id;
 }
@@ -1270,7 +976,6 @@
 {
 	u32 cfg, curr_index, i;
 	u32 buf_id = GSC_MAX_DST;
-	int ret;
 
 	DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id);
 
@@ -1289,11 +994,7 @@
 		return -EINVAL;
 	}
 
-	ret = gsc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE);
-	if (ret < 0) {
-		DRM_ERROR("failed to dequeue.\n");
-		return ret;
-	}
+	gsc_dst_set_buf_seq(ctx, buf_id, false);
 
 	DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg,
 		curr_index, buf_id);
@@ -1304,215 +1005,55 @@
 static irqreturn_t gsc_irq_handler(int irq, void *dev_id)
 {
 	struct gsc_context *ctx = dev_id;
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
-	struct drm_exynos_ipp_event_work *event_work =
-		c_node->event_work;
 	u32 status;
-	int buf_id[EXYNOS_DRM_OPS_MAX];
+	int err = 0;
 
 	DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id);
 
 	status = gsc_read(GSC_IRQ);
 	if (status & GSC_IRQ_STATUS_OR_IRQ) {
-		dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n",
+		dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n",
 			ctx->id, status);
-		return IRQ_NONE;
+		err = -EINVAL;
 	}
 
 	if (status & GSC_IRQ_STATUS_OR_FRM_DONE) {
-		dev_dbg(ippdrv->dev, "occurred frame done at %d, status 0x%x.\n",
+		int src_buf_id, dst_buf_id;
+
+		dev_dbg(ctx->dev, "occurred frame done at %d, status 0x%x.\n",
 			ctx->id, status);
 
-		buf_id[EXYNOS_DRM_OPS_SRC] = gsc_get_src_buf_index(ctx);
-		if (buf_id[EXYNOS_DRM_OPS_SRC] < 0)
-			return IRQ_HANDLED;
+		src_buf_id = gsc_get_src_buf_index(ctx);
+		dst_buf_id = gsc_get_dst_buf_index(ctx);
 
-		buf_id[EXYNOS_DRM_OPS_DST] = gsc_get_dst_buf_index(ctx);
-		if (buf_id[EXYNOS_DRM_OPS_DST] < 0)
-			return IRQ_HANDLED;
+		DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n",	src_buf_id,
+			      dst_buf_id);
 
-		DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n",
-			buf_id[EXYNOS_DRM_OPS_SRC], buf_id[EXYNOS_DRM_OPS_DST]);
+		if (src_buf_id < 0 || dst_buf_id < 0)
+			err = -EINVAL;
+	}
 
-		event_work->ippdrv = ippdrv;
-		event_work->buf_id[EXYNOS_DRM_OPS_SRC] =
-			buf_id[EXYNOS_DRM_OPS_SRC];
-		event_work->buf_id[EXYNOS_DRM_OPS_DST] =
-			buf_id[EXYNOS_DRM_OPS_DST];
-		queue_work(ippdrv->event_workq, &event_work->work);
+	if (ctx->task) {
+		struct exynos_drm_ipp_task *task = ctx->task;
+
+		ctx->task = NULL;
+		pm_runtime_mark_last_busy(ctx->dev);
+		pm_runtime_put_autosuspend(ctx->dev);
+		exynos_drm_ipp_task_done(task, err);
 	}
 
 	return IRQ_HANDLED;
 }
 
-static int gsc_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
+static int gsc_reset(struct gsc_context *ctx)
 {
-	struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list;
-
-	prop_list->version = 1;
-	prop_list->writeback = 1;
-	prop_list->refresh_min = GSC_REFRESH_MIN;
-	prop_list->refresh_max = GSC_REFRESH_MAX;
-	prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) |
-				(1 << EXYNOS_DRM_FLIP_HORIZONTAL);
-	prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) |
-				(1 << EXYNOS_DRM_DEGREE_90) |
-				(1 << EXYNOS_DRM_DEGREE_180) |
-				(1 << EXYNOS_DRM_DEGREE_270);
-	prop_list->csc = 1;
-	prop_list->crop = 1;
-	prop_list->crop_max.hsize = GSC_CROP_MAX;
-	prop_list->crop_max.vsize = GSC_CROP_MAX;
-	prop_list->crop_min.hsize = GSC_CROP_MIN;
-	prop_list->crop_min.vsize = GSC_CROP_MIN;
-	prop_list->scale = 1;
-	prop_list->scale_max.hsize = GSC_SCALE_MAX;
-	prop_list->scale_max.vsize = GSC_SCALE_MAX;
-	prop_list->scale_min.hsize = GSC_SCALE_MIN;
-	prop_list->scale_min.vsize = GSC_SCALE_MIN;
-
-	return 0;
-}
-
-static inline bool gsc_check_drm_flip(enum drm_exynos_flip flip)
-{
-	switch (flip) {
-	case EXYNOS_DRM_FLIP_NONE:
-	case EXYNOS_DRM_FLIP_VERTICAL:
-	case EXYNOS_DRM_FLIP_HORIZONTAL:
-	case EXYNOS_DRM_FLIP_BOTH:
-		return true;
-	default:
-		DRM_DEBUG_KMS("invalid flip\n");
-		return false;
-	}
-}
-
-static int gsc_ippdrv_check_property(struct device *dev,
-		struct drm_exynos_ipp_property *property)
-{
-	struct gsc_context *ctx = get_gsc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list;
-	struct drm_exynos_ipp_config *config;
-	struct drm_exynos_pos *pos;
-	struct drm_exynos_sz *sz;
-	bool swap;
-	int i;
-
-	for_each_ipp_ops(i) {
-		if ((i == EXYNOS_DRM_OPS_SRC) &&
-			(property->cmd == IPP_CMD_WB))
-			continue;
-
-		config = &property->config[i];
-		pos = &config->pos;
-		sz = &config->sz;
-
-		/* check for flip */
-		if (!gsc_check_drm_flip(config->flip)) {
-			DRM_ERROR("invalid flip.\n");
-			goto err_property;
-		}
-
-		/* check for degree */
-		switch (config->degree) {
-		case EXYNOS_DRM_DEGREE_90:
-		case EXYNOS_DRM_DEGREE_270:
-			swap = true;
-			break;
-		case EXYNOS_DRM_DEGREE_0:
-		case EXYNOS_DRM_DEGREE_180:
-			swap = false;
-			break;
-		default:
-			DRM_ERROR("invalid degree.\n");
-			goto err_property;
-		}
-
-		/* check for buffer bound */
-		if ((pos->x + pos->w > sz->hsize) ||
-			(pos->y + pos->h > sz->vsize)) {
-			DRM_ERROR("out of buf bound.\n");
-			goto err_property;
-		}
-
-		/* check for crop */
-		if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) {
-			if (swap) {
-				if ((pos->h < pp->crop_min.hsize) ||
-					(sz->vsize > pp->crop_max.hsize) ||
-					(pos->w < pp->crop_min.vsize) ||
-					(sz->hsize > pp->crop_max.vsize)) {
-					DRM_ERROR("out of crop size.\n");
-					goto err_property;
-				}
-			} else {
-				if ((pos->w < pp->crop_min.hsize) ||
-					(sz->hsize > pp->crop_max.hsize) ||
-					(pos->h < pp->crop_min.vsize) ||
-					(sz->vsize > pp->crop_max.vsize)) {
-					DRM_ERROR("out of crop size.\n");
-					goto err_property;
-				}
-			}
-		}
-
-		/* check for scale */
-		if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) {
-			if (swap) {
-				if ((pos->h < pp->scale_min.hsize) ||
-					(sz->vsize > pp->scale_max.hsize) ||
-					(pos->w < pp->scale_min.vsize) ||
-					(sz->hsize > pp->scale_max.vsize)) {
-					DRM_ERROR("out of scale size.\n");
-					goto err_property;
-				}
-			} else {
-				if ((pos->w < pp->scale_min.hsize) ||
-					(sz->hsize > pp->scale_max.hsize) ||
-					(pos->h < pp->scale_min.vsize) ||
-					(sz->vsize > pp->scale_max.vsize)) {
-					DRM_ERROR("out of scale size.\n");
-					goto err_property;
-				}
-			}
-		}
-	}
-
-	return 0;
-
-err_property:
-	for_each_ipp_ops(i) {
-		if ((i == EXYNOS_DRM_OPS_SRC) &&
-			(property->cmd == IPP_CMD_WB))
-			continue;
-
-		config = &property->config[i];
-		pos = &config->pos;
-		sz = &config->sz;
-
-		DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n",
-			i ? "dst" : "src", config->flip, config->degree,
-			pos->x, pos->y, pos->w, pos->h,
-			sz->hsize, sz->vsize);
-	}
-
-	return -EINVAL;
-}
-
-
-static int gsc_ippdrv_reset(struct device *dev)
-{
-	struct gsc_context *ctx = get_gsc_context(dev);
 	struct gsc_scaler *sc = &ctx->sc;
 	int ret;
 
 	/* reset h/w block */
 	ret = gsc_sw_reset(ctx);
 	if (ret < 0) {
-		dev_err(dev, "failed to reset hardware.\n");
+		dev_err(ctx->dev, "failed to reset hardware.\n");
 		return ret;
 	}
 
@@ -1523,166 +1064,172 @@
 	return 0;
 }
 
-static int gsc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void gsc_start(struct gsc_context *ctx)
 {
-	struct gsc_context *ctx = get_gsc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
-	struct drm_exynos_ipp_property *property;
-	struct drm_exynos_ipp_config *config;
-	struct drm_exynos_pos	img_pos[EXYNOS_DRM_OPS_MAX];
-	struct drm_exynos_ipp_set_wb set_wb;
 	u32 cfg;
-	int ret, i;
-
-	DRM_DEBUG_KMS("cmd[%d]\n", cmd);
-
-	if (!c_node) {
-		DRM_ERROR("failed to get c_node.\n");
-		return -EINVAL;
-	}
-
-	property = &c_node->property;
 
 	gsc_handle_irq(ctx, true, false, true);
 
-	for_each_ipp_ops(i) {
-		config = &property->config[i];
-		img_pos[i] = config->pos;
-	}
+	/* enable one shot */
+	cfg = gsc_read(GSC_ENABLE);
+	cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK |
+		GSC_ENABLE_CLK_GATE_MODE_MASK);
+	cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT;
+	gsc_write(cfg, GSC_ENABLE);
 
-	switch (cmd) {
-	case IPP_CMD_M2M:
-		/* enable one shot */
-		cfg = gsc_read(GSC_ENABLE);
-		cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK |
-			GSC_ENABLE_CLK_GATE_MODE_MASK);
-		cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT;
-		gsc_write(cfg, GSC_ENABLE);
+	/* src dma memory */
+	cfg = gsc_read(GSC_IN_CON);
+	cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
+	cfg |= GSC_IN_PATH_MEMORY;
+	gsc_write(cfg, GSC_IN_CON);
 
-		/* src dma memory */
-		cfg = gsc_read(GSC_IN_CON);
-		cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
-		cfg |= GSC_IN_PATH_MEMORY;
-		gsc_write(cfg, GSC_IN_CON);
-
-		/* dst dma memory */
-		cfg = gsc_read(GSC_OUT_CON);
-		cfg |= GSC_OUT_PATH_MEMORY;
-		gsc_write(cfg, GSC_OUT_CON);
-		break;
-	case IPP_CMD_WB:
-		set_wb.enable = 1;
-		set_wb.refresh = property->refresh_rate;
-		gsc_set_gscblk_fimd_wb(ctx, set_wb.enable);
-		exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
-
-		/* src local path */
-		cfg = gsc_read(GSC_IN_CON);
-		cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
-		cfg |= (GSC_IN_PATH_LOCAL | GSC_IN_LOCAL_FIMD_WB);
-		gsc_write(cfg, GSC_IN_CON);
-
-		/* dst dma memory */
-		cfg = gsc_read(GSC_OUT_CON);
-		cfg |= GSC_OUT_PATH_MEMORY;
-		gsc_write(cfg, GSC_OUT_CON);
-		break;
-	case IPP_CMD_OUTPUT:
-		/* src dma memory */
-		cfg = gsc_read(GSC_IN_CON);
-		cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
-		cfg |= GSC_IN_PATH_MEMORY;
-		gsc_write(cfg, GSC_IN_CON);
-
-		/* dst local path */
-		cfg = gsc_read(GSC_OUT_CON);
-		cfg |= GSC_OUT_PATH_MEMORY;
-		gsc_write(cfg, GSC_OUT_CON);
-		break;
-	default:
-		ret = -EINVAL;
-		dev_err(dev, "invalid operations.\n");
-		return ret;
-	}
-
-	ret = gsc_set_prescaler(ctx, &ctx->sc,
-		&img_pos[EXYNOS_DRM_OPS_SRC],
-		&img_pos[EXYNOS_DRM_OPS_DST]);
-	if (ret) {
-		dev_err(dev, "failed to set prescaler.\n");
-		return ret;
-	}
+	/* dst dma memory */
+	cfg = gsc_read(GSC_OUT_CON);
+	cfg |= GSC_OUT_PATH_MEMORY;
+	gsc_write(cfg, GSC_OUT_CON);
 
 	gsc_set_scaler(ctx, &ctx->sc);
 
 	cfg = gsc_read(GSC_ENABLE);
 	cfg |= GSC_ENABLE_ON;
 	gsc_write(cfg, GSC_ENABLE);
+}
+
+static int gsc_commit(struct exynos_drm_ipp *ipp,
+			  struct exynos_drm_ipp_task *task)
+{
+	struct gsc_context *ctx = container_of(ipp, struct gsc_context, ipp);
+	int ret;
+
+	pm_runtime_get_sync(ctx->dev);
+	ctx->task = task;
+
+	ret = gsc_reset(ctx);
+	if (ret) {
+		pm_runtime_put_autosuspend(ctx->dev);
+		ctx->task = NULL;
+		return ret;
+	}
+
+	gsc_src_set_fmt(ctx, task->src.buf.fourcc);
+	gsc_src_set_transf(ctx, task->transform.rotation);
+	gsc_src_set_size(ctx, &task->src);
+	gsc_src_set_addr(ctx, 0, &task->src);
+	gsc_dst_set_fmt(ctx, task->dst.buf.fourcc);
+	gsc_dst_set_size(ctx, &task->dst);
+	gsc_dst_set_addr(ctx, 0, &task->dst);
+	gsc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect);
+	gsc_start(ctx);
 
 	return 0;
 }
 
-static void gsc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void gsc_abort(struct exynos_drm_ipp *ipp,
+			  struct exynos_drm_ipp_task *task)
 {
-	struct gsc_context *ctx = get_gsc_context(dev);
-	struct drm_exynos_ipp_set_wb set_wb = {0, 0};
-	u32 cfg;
+	struct gsc_context *ctx =
+			container_of(ipp, struct gsc_context, ipp);
 
-	DRM_DEBUG_KMS("cmd[%d]\n", cmd);
+	gsc_reset(ctx);
+	if (ctx->task) {
+		struct exynos_drm_ipp_task *task = ctx->task;
 
-	switch (cmd) {
-	case IPP_CMD_M2M:
-		/* bypass */
-		break;
-	case IPP_CMD_WB:
-		gsc_set_gscblk_fimd_wb(ctx, set_wb.enable);
-		exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
-		break;
-	case IPP_CMD_OUTPUT:
-	default:
-		dev_err(dev, "invalid operations.\n");
-		break;
+		ctx->task = NULL;
+		pm_runtime_mark_last_busy(ctx->dev);
+		pm_runtime_put_autosuspend(ctx->dev);
+		exynos_drm_ipp_task_done(task, -EIO);
 	}
-
-	gsc_handle_irq(ctx, false, false, true);
-
-	/* reset sequence */
-	gsc_write(0xff, GSC_OUT_BASE_ADDR_Y_MASK);
-	gsc_write(0xff, GSC_OUT_BASE_ADDR_CB_MASK);
-	gsc_write(0xff, GSC_OUT_BASE_ADDR_CR_MASK);
-
-	cfg = gsc_read(GSC_ENABLE);
-	cfg &= ~GSC_ENABLE_ON;
-	gsc_write(cfg, GSC_ENABLE);
 }
 
+static struct exynos_drm_ipp_funcs ipp_funcs = {
+	.commit = gsc_commit,
+	.abort = gsc_abort,
+};
+
+static int gsc_bind(struct device *dev, struct device *master, void *data)
+{
+	struct gsc_context *ctx = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+	struct exynos_drm_ipp *ipp = &ctx->ipp;
+
+	ctx->drm_dev = drm_dev;
+	drm_iommu_attach_device(drm_dev, dev);
+
+	exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs,
+			DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
+			DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT,
+			ctx->formats, ctx->num_formats, "gsc");
+
+	dev_info(dev, "The exynos gscaler has been probed successfully\n");
+
+	return 0;
+}
+
+static void gsc_unbind(struct device *dev, struct device *master,
+			void *data)
+{
+	struct gsc_context *ctx = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+	struct exynos_drm_ipp *ipp = &ctx->ipp;
+
+	exynos_drm_ipp_unregister(drm_dev, ipp);
+	drm_iommu_detach_device(drm_dev, dev);
+}
+
+static const struct component_ops gsc_component_ops = {
+	.bind	= gsc_bind,
+	.unbind = gsc_unbind,
+};
+
+static const unsigned int gsc_formats[] = {
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565, DRM_FORMAT_BGRX8888,
+	DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61,
+	DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU,
+	DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422,
+};
+
 static int gsc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
+	struct gsc_driverdata *driver_data;
+	struct exynos_drm_ipp_formats *formats;
 	struct gsc_context *ctx;
 	struct resource *res;
-	struct exynos_drm_ippdrv *ippdrv;
-	int ret;
+	int ret, i;
 
 	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
-	if (dev->of_node) {
-		ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node,
-							"samsung,sysreg");
-		if (IS_ERR(ctx->sysreg)) {
-			dev_warn(dev, "failed to get system register.\n");
-			ctx->sysreg = NULL;
-		}
+	formats = devm_kzalloc(dev, sizeof(*formats) *
+			       (ARRAY_SIZE(gsc_formats)), GFP_KERNEL);
+	if (!formats)
+		return -ENOMEM;
+
+	driver_data = (struct gsc_driverdata *)of_device_get_match_data(dev);
+	ctx->dev = dev;
+	ctx->num_clocks = driver_data->num_clocks;
+	ctx->clk_names = driver_data->clk_names;
+
+	for (i = 0; i < ARRAY_SIZE(gsc_formats); i++) {
+		formats[i].fourcc = gsc_formats[i];
+		formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE |
+				  DRM_EXYNOS_IPP_FORMAT_DESTINATION;
+		formats[i].limits = driver_data->limits;
+		formats[i].num_limits = driver_data->num_limits;
 	}
+	ctx->formats = formats;
+	ctx->num_formats = ARRAY_SIZE(gsc_formats);
 
 	/* clock control */
-	ctx->gsc_clk = devm_clk_get(dev, "gscl");
-	if (IS_ERR(ctx->gsc_clk)) {
-		dev_err(dev, "failed to get gsc clock.\n");
-		return PTR_ERR(ctx->gsc_clk);
+	for (i = 0; i < ctx->num_clocks; i++) {
+		ctx->clocks[i] = devm_clk_get(dev, ctx->clk_names[i]);
+		if (IS_ERR(ctx->clocks[i])) {
+			dev_err(dev, "failed to get clock: %s\n",
+				ctx->clk_names[i]);
+			return PTR_ERR(ctx->clocks[i]);
+		}
 	}
 
 	/* resource memory */
@@ -1699,8 +1246,8 @@
 	}
 
 	ctx->irq = res->start;
-	ret = devm_request_threaded_irq(dev, ctx->irq, NULL, gsc_irq_handler,
-		IRQF_ONESHOT, "drm_gsc", ctx);
+	ret = devm_request_irq(dev, ctx->irq, gsc_irq_handler, 0,
+			       dev_name(dev), ctx);
 	if (ret < 0) {
 		dev_err(dev, "failed to request irq.\n");
 		return ret;
@@ -1709,38 +1256,22 @@
 	/* context initailization */
 	ctx->id = pdev->id;
 
-	ippdrv = &ctx->ippdrv;
-	ippdrv->dev = dev;
-	ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &gsc_src_ops;
-	ippdrv->ops[EXYNOS_DRM_OPS_DST] = &gsc_dst_ops;
-	ippdrv->check_property = gsc_ippdrv_check_property;
-	ippdrv->reset = gsc_ippdrv_reset;
-	ippdrv->start = gsc_ippdrv_start;
-	ippdrv->stop = gsc_ippdrv_stop;
-	ret = gsc_init_prop_list(ippdrv);
-	if (ret < 0) {
-		dev_err(dev, "failed to init property list.\n");
-		return ret;
-	}
-
-	DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv);
-
-	mutex_init(&ctx->lock);
 	platform_set_drvdata(pdev, ctx);
 
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, GSC_AUTOSUSPEND_DELAY);
 	pm_runtime_enable(dev);
 
-	ret = exynos_drm_ippdrv_register(ippdrv);
-	if (ret < 0) {
-		dev_err(dev, "failed to register drm gsc device.\n");
-		goto err_ippdrv_register;
-	}
+	ret = component_add(dev, &gsc_component_ops);
+	if (ret)
+		goto err_pm_dis;
 
 	dev_info(dev, "drm gsc registered successfully.\n");
 
 	return 0;
 
-err_ippdrv_register:
+err_pm_dis:
+	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
 	return ret;
 }
@@ -1748,13 +1279,8 @@
 static int gsc_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct gsc_context *ctx = get_gsc_context(dev);
-	struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
 
-	exynos_drm_ippdrv_unregister(ippdrv);
-	mutex_destroy(&ctx->lock);
-
-	pm_runtime_set_suspended(dev);
+	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
 
 	return 0;
@@ -1763,19 +1289,32 @@
 static int __maybe_unused gsc_runtime_suspend(struct device *dev)
 {
 	struct gsc_context *ctx = get_gsc_context(dev);
+	int i;
 
 	DRM_DEBUG_KMS("id[%d]\n", ctx->id);
 
-	return  gsc_clk_ctrl(ctx, false);
+	for (i = ctx->num_clocks - 1; i >= 0; i--)
+		clk_disable_unprepare(ctx->clocks[i]);
+
+	return 0;
 }
 
 static int __maybe_unused gsc_runtime_resume(struct device *dev)
 {
 	struct gsc_context *ctx = get_gsc_context(dev);
+	int i, ret;
 
 	DRM_DEBUG_KMS("id[%d]\n", ctx->id);
 
-	return  gsc_clk_ctrl(ctx, true);
+	for (i = 0; i < ctx->num_clocks; i++) {
+		ret = clk_prepare_enable(ctx->clocks[i]);
+		if (ret) {
+			while (--i > 0)
+				clk_disable_unprepare(ctx->clocks[i]);
+			return ret;
+		}
+	}
+	return 0;
 }
 
 static const struct dev_pm_ops gsc_pm_ops = {
@@ -1784,9 +1323,66 @@
 	SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL)
 };
 
+static const struct drm_exynos_ipp_limit gsc_5250_limits[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) },
+	{ IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) },
+	{ IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2048 }, .v = { 16, 2048 }) },
+	{ IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 },
+			  .v = { (1 << 16) / 16, (1 << 16) * 8 }) },
+};
+
+static const struct drm_exynos_ipp_limit gsc_5420_limits[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) },
+	{ IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) },
+	{ IPP_SIZE_LIMIT(ROTATED, .h = { 16, 2016 }, .v = { 8, 2016 }) },
+	{ IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 },
+			  .v = { (1 << 16) / 16, (1 << 16) * 8 }) },
+};
+
+static const struct drm_exynos_ipp_limit gsc_5433_limits[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 32, 8191, 2 }, .v = { 16, 8191, 2 }) },
+	{ IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 1 }, .v = { 8, 3344, 1 }) },
+	{ IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2047 }, .v = { 8, 8191 }) },
+	{ IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 },
+			  .v = { (1 << 16) / 16, (1 << 16) * 8 }) },
+};
+
+static struct gsc_driverdata gsc_exynos5250_drvdata = {
+	.clk_names = {"gscl"},
+	.num_clocks = 1,
+	.limits = gsc_5250_limits,
+	.num_limits = ARRAY_SIZE(gsc_5250_limits),
+};
+
+static struct gsc_driverdata gsc_exynos5420_drvdata = {
+	.clk_names = {"gscl"},
+	.num_clocks = 1,
+	.limits = gsc_5420_limits,
+	.num_limits = ARRAY_SIZE(gsc_5420_limits),
+};
+
+static struct gsc_driverdata gsc_exynos5433_drvdata = {
+	.clk_names = {"pclk", "aclk", "aclk_xiu", "aclk_gsclbend"},
+	.num_clocks = 4,
+	.limits = gsc_5433_limits,
+	.num_limits = ARRAY_SIZE(gsc_5433_limits),
+};
+
 static const struct of_device_id exynos_drm_gsc_of_match[] = {
-	{ .compatible = "samsung,exynos5-gsc" },
-	{ },
+	{
+		.compatible = "samsung,exynos5-gsc",
+		.data = &gsc_exynos5250_drvdata,
+	}, {
+		.compatible = "samsung,exynos5250-gsc",
+		.data = &gsc_exynos5250_drvdata,
+	}, {
+		.compatible = "samsung,exynos5420-gsc",
+		.data = &gsc_exynos5420_drvdata,
+	}, {
+		.compatible = "samsung,exynos5433-gsc",
+		.data = &gsc_exynos5433_drvdata,
+	}, {
+	},
 };
 MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match);
 
@@ -1800,4 +1396,3 @@
 		.of_match_table = of_match_ptr(exynos_drm_gsc_of_match),
 	},
 };
-
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.h b/drivers/gpu/drm/exynos/exynos_drm_gsc.h
deleted file mode 100644
index 29ec1c5..0000000
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *
- * Authors:
- *	Eunchul Kim <chulspro.kim@samsung.com>
- *	Jinyoung Jeon <jy0.jeon@samsung.com>
- *	Sangmin Lee <lsmin.lee@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef _EXYNOS_DRM_GSC_H_
-#define _EXYNOS_DRM_GSC_H_
-
-/*
- * TODO
- * FIMD output interface notifier callback.
- * Mixer output interface notifier callback.
- */
-
-#endif /* _EXYNOS_DRM_GSC_H_ */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
new file mode 100644
index 0000000..26374e5
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -0,0 +1,916 @@
+/*
+ * Copyright (C) 2017 Samsung Electronics Co.Ltd
+ * Authors:
+ *	Marek Szyprowski <m.szyprowski@samsung.com>
+ *
+ * Exynos DRM Image Post Processing (IPP) related functions
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ */
+
+
+#include <drm/drmP.h>
+#include <drm/drm_mode.h>
+#include <uapi/drm/exynos_drm.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_gem.h"
+#include "exynos_drm_ipp.h"
+
+static int num_ipp;
+static LIST_HEAD(ipp_list);
+
+/**
+ * exynos_drm_ipp_register - Register a new picture processor hardware module
+ * @dev: DRM device
+ * @ipp: ipp module to init
+ * @funcs: callbacks for the new ipp object
+ * @caps: bitmask of ipp capabilities (%DRM_EXYNOS_IPP_CAP_*)
+ * @formats: array of supported formats
+ * @num_formats: size of the supported formats array
+ * @name: name (for debugging purposes)
+ *
+ * Initializes a ipp module.
+ *
+ * Returns:
+ * Zero on success, error code on failure.
+ */
+int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp,
+		const struct exynos_drm_ipp_funcs *funcs, unsigned int caps,
+		const struct exynos_drm_ipp_formats *formats,
+		unsigned int num_formats, const char *name)
+{
+	WARN_ON(!ipp);
+	WARN_ON(!funcs);
+	WARN_ON(!formats);
+	WARN_ON(!num_formats);
+
+	spin_lock_init(&ipp->lock);
+	INIT_LIST_HEAD(&ipp->todo_list);
+	init_waitqueue_head(&ipp->done_wq);
+	ipp->dev = dev;
+	ipp->funcs = funcs;
+	ipp->capabilities = caps;
+	ipp->name = name;
+	ipp->formats = formats;
+	ipp->num_formats = num_formats;
+
+	/* ipp_list modification is serialized by component framework */
+	list_add_tail(&ipp->head, &ipp_list);
+	ipp->id = num_ipp++;
+
+	DRM_DEBUG_DRIVER("Registered ipp %d\n", ipp->id);
+
+	return 0;
+}
+
+/**
+ * exynos_drm_ipp_unregister - Unregister the picture processor module
+ * @dev: DRM device
+ * @ipp: ipp module
+ */
+void exynos_drm_ipp_unregister(struct drm_device *dev,
+			       struct exynos_drm_ipp *ipp)
+{
+	WARN_ON(ipp->task);
+	WARN_ON(!list_empty(&ipp->todo_list));
+	list_del(&ipp->head);
+}
+
+/**
+ * exynos_drm_ipp_ioctl_get_res_ioctl - enumerate all ipp modules
+ * @dev: DRM device
+ * @data: ioctl data
+ * @file_priv: DRM file info
+ *
+ * Construct a list of ipp ids.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv)
+{
+	struct drm_exynos_ioctl_ipp_get_res *resp = data;
+	struct exynos_drm_ipp *ipp;
+	uint32_t __user *ipp_ptr = (uint32_t __user *)
+						(unsigned long)resp->ipp_id_ptr;
+	unsigned int count = num_ipp, copied = 0;
+
+	/*
+	 * This ioctl is called twice, once to determine how much space is
+	 * needed, and the 2nd time to fill it.
+	 */
+	if (count && resp->count_ipps >= count) {
+		list_for_each_entry(ipp, &ipp_list, head) {
+			if (put_user(ipp->id, ipp_ptr + copied))
+				return -EFAULT;
+			copied++;
+		}
+	}
+	resp->count_ipps = count;
+
+	return 0;
+}
+
+static inline struct exynos_drm_ipp *__ipp_get(uint32_t id)
+{
+	struct exynos_drm_ipp *ipp;
+
+	list_for_each_entry(ipp, &ipp_list, head)
+		if (ipp->id == id)
+			return ipp;
+	return NULL;
+}
+
+/**
+ * exynos_drm_ipp_ioctl_get_caps - get ipp module capabilities and formats
+ * @dev: DRM device
+ * @data: ioctl data
+ * @file_priv: DRM file info
+ *
+ * Construct a structure describing ipp module capabilities.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv)
+{
+	struct drm_exynos_ioctl_ipp_get_caps *resp = data;
+	void __user *ptr = (void __user *)(unsigned long)resp->formats_ptr;
+	struct exynos_drm_ipp *ipp;
+	int i;
+
+	ipp = __ipp_get(resp->ipp_id);
+	if (!ipp)
+		return -ENOENT;
+
+	resp->ipp_id = ipp->id;
+	resp->capabilities = ipp->capabilities;
+
+	/*
+	 * This ioctl is called twice, once to determine how much space is
+	 * needed, and the 2nd time to fill it.
+	 */
+	if (resp->formats_count >= ipp->num_formats) {
+		for (i = 0; i < ipp->num_formats; i++) {
+			struct drm_exynos_ipp_format tmp = {
+				.fourcc = ipp->formats[i].fourcc,
+				.type = ipp->formats[i].type,
+				.modifier = ipp->formats[i].modifier,
+			};
+
+			if (copy_to_user(ptr, &tmp, sizeof(tmp)))
+				return -EFAULT;
+			ptr += sizeof(tmp);
+		}
+	}
+	resp->formats_count = ipp->num_formats;
+
+	return 0;
+}
+
+static inline const struct exynos_drm_ipp_formats *__ipp_format_get(
+				struct exynos_drm_ipp *ipp, uint32_t fourcc,
+				uint64_t mod, unsigned int type)
+{
+	int i;
+
+	for (i = 0; i < ipp->num_formats; i++) {
+		if ((ipp->formats[i].type & type) &&
+		    ipp->formats[i].fourcc == fourcc &&
+		    ipp->formats[i].modifier == mod)
+			return &ipp->formats[i];
+	}
+	return NULL;
+}
+
+/**
+ * exynos_drm_ipp_get_limits_ioctl - get ipp module limits
+ * @dev: DRM device
+ * @data: ioctl data
+ * @file_priv: DRM file info
+ *
+ * Construct a structure describing ipp module limitations for provided
+ * picture format.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_priv)
+{
+	struct drm_exynos_ioctl_ipp_get_limits *resp = data;
+	void __user *ptr = (void __user *)(unsigned long)resp->limits_ptr;
+	const struct exynos_drm_ipp_formats *format;
+	struct exynos_drm_ipp *ipp;
+
+	if (resp->type != DRM_EXYNOS_IPP_FORMAT_SOURCE &&
+	    resp->type != DRM_EXYNOS_IPP_FORMAT_DESTINATION)
+		return -EINVAL;
+
+	ipp = __ipp_get(resp->ipp_id);
+	if (!ipp)
+		return -ENOENT;
+
+	format = __ipp_format_get(ipp, resp->fourcc, resp->modifier,
+				  resp->type);
+	if (!format)
+		return -EINVAL;
+
+	/*
+	 * This ioctl is called twice, once to determine how much space is
+	 * needed, and the 2nd time to fill it.
+	 */
+	if (format->num_limits && resp->limits_count >= format->num_limits)
+		if (copy_to_user((void __user *)ptr, format->limits,
+				 sizeof(*format->limits) * format->num_limits))
+			return -EFAULT;
+	resp->limits_count = format->num_limits;
+
+	return 0;
+}
+
+struct drm_pending_exynos_ipp_event {
+	struct drm_pending_event base;
+	struct drm_exynos_ipp_event event;
+};
+
+static inline struct exynos_drm_ipp_task *
+			exynos_drm_ipp_task_alloc(struct exynos_drm_ipp *ipp)
+{
+	struct exynos_drm_ipp_task *task;
+
+	task = kzalloc(sizeof(*task), GFP_KERNEL);
+	if (!task)
+		return NULL;
+
+	task->dev = ipp->dev;
+	task->ipp = ipp;
+
+	/* some defaults */
+	task->src.rect.w = task->dst.rect.w = UINT_MAX;
+	task->src.rect.h = task->dst.rect.h = UINT_MAX;
+	task->transform.rotation = DRM_MODE_ROTATE_0;
+
+	DRM_DEBUG_DRIVER("Allocated task %pK\n", task);
+
+	return task;
+}
+
+static const struct exynos_drm_param_map {
+	unsigned int id;
+	unsigned int size;
+	unsigned int offset;
+} exynos_drm_ipp_params_maps[] = {
+	{
+		DRM_EXYNOS_IPP_TASK_BUFFER | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE,
+		sizeof(struct drm_exynos_ipp_task_buffer),
+		offsetof(struct exynos_drm_ipp_task, src.buf),
+	}, {
+		DRM_EXYNOS_IPP_TASK_BUFFER |
+			DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION,
+		sizeof(struct drm_exynos_ipp_task_buffer),
+		offsetof(struct exynos_drm_ipp_task, dst.buf),
+	}, {
+		DRM_EXYNOS_IPP_TASK_RECTANGLE | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE,
+		sizeof(struct drm_exynos_ipp_task_rect),
+		offsetof(struct exynos_drm_ipp_task, src.rect),
+	}, {
+		DRM_EXYNOS_IPP_TASK_RECTANGLE |
+			DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION,
+		sizeof(struct drm_exynos_ipp_task_rect),
+		offsetof(struct exynos_drm_ipp_task, dst.rect),
+	}, {
+		DRM_EXYNOS_IPP_TASK_TRANSFORM,
+		sizeof(struct drm_exynos_ipp_task_transform),
+		offsetof(struct exynos_drm_ipp_task, transform),
+	}, {
+		DRM_EXYNOS_IPP_TASK_ALPHA,
+		sizeof(struct drm_exynos_ipp_task_alpha),
+		offsetof(struct exynos_drm_ipp_task, alpha),
+	},
+};
+
+static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task,
+				   struct drm_exynos_ioctl_ipp_commit *arg)
+{
+	const struct exynos_drm_param_map *map = exynos_drm_ipp_params_maps;
+	void __user *params = (void __user *)(unsigned long)arg->params_ptr;
+	unsigned int size = arg->params_size;
+	uint32_t id;
+	int i;
+
+	while (size) {
+		if (get_user(id, (uint32_t __user *)params))
+			return -EFAULT;
+
+		for (i = 0; i < ARRAY_SIZE(exynos_drm_ipp_params_maps); i++)
+			if (map[i].id == id)
+				break;
+		if (i == ARRAY_SIZE(exynos_drm_ipp_params_maps) ||
+		    map[i].size > size)
+			return -EINVAL;
+
+		if (copy_from_user((void *)task + map[i].offset, params,
+				   map[i].size))
+			return -EFAULT;
+
+		params += map[i].size;
+		size -= map[i].size;
+	}
+
+	DRM_DEBUG_DRIVER("Got task %pK configuration from userspace\n", task);
+	return 0;
+}
+
+static int exynos_drm_ipp_task_setup_buffer(struct exynos_drm_ipp_buffer *buf,
+					    struct drm_file *filp)
+{
+	int ret = 0;
+	int i;
+
+	/* basic checks */
+	if (buf->buf.width == 0 || buf->buf.height == 0)
+		return -EINVAL;
+	buf->format = drm_format_info(buf->buf.fourcc);
+	for (i = 0; i < buf->format->num_planes; i++) {
+		unsigned int width = (i == 0) ? buf->buf.width :
+			     DIV_ROUND_UP(buf->buf.width, buf->format->hsub);
+
+		if (buf->buf.pitch[i] == 0)
+			buf->buf.pitch[i] = width * buf->format->cpp[i];
+		if (buf->buf.pitch[i] < width * buf->format->cpp[i])
+			return -EINVAL;
+		if (!buf->buf.gem_id[i])
+			return -ENOENT;
+	}
+
+	/* pitch for additional planes must match */
+	if (buf->format->num_planes > 2 &&
+	    buf->buf.pitch[1] != buf->buf.pitch[2])
+		return -EINVAL;
+
+	/* get GEM buffers and check their size */
+	for (i = 0; i < buf->format->num_planes; i++) {
+		unsigned int height = (i == 0) ? buf->buf.height :
+			     DIV_ROUND_UP(buf->buf.height, buf->format->vsub);
+		unsigned long size = height * buf->buf.pitch[i];
+		struct drm_gem_object *obj = drm_gem_object_lookup(filp,
+							    buf->buf.gem_id[i]);
+		if (!obj) {
+			ret = -ENOENT;
+			goto gem_free;
+		}
+		buf->exynos_gem[i] = to_exynos_gem(obj);
+
+		if (size + buf->buf.offset[i] > buf->exynos_gem[i]->size) {
+			i++;
+			ret = -EINVAL;
+			goto gem_free;
+		}
+		buf->dma_addr[i] = buf->exynos_gem[i]->dma_addr +
+				   buf->buf.offset[i];
+	}
+
+	return 0;
+gem_free:
+	while (i--) {
+		drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base);
+		buf->exynos_gem[i] = NULL;
+	}
+	return ret;
+}
+
+static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf)
+{
+	int i;
+
+	if (!buf->exynos_gem[0])
+		return;
+	for (i = 0; i < buf->format->num_planes; i++)
+		drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base);
+}
+
+static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp,
+				 struct exynos_drm_ipp_task *task)
+{
+	DRM_DEBUG_DRIVER("Freeing task %pK\n", task);
+
+	exynos_drm_ipp_task_release_buf(&task->src);
+	exynos_drm_ipp_task_release_buf(&task->dst);
+	if (task->event)
+		drm_event_cancel_free(ipp->dev, &task->event->base);
+	kfree(task);
+}
+
+struct drm_ipp_limit {
+	struct drm_exynos_ipp_limit_val h;
+	struct drm_exynos_ipp_limit_val v;
+};
+
+enum drm_ipp_size_id {
+	IPP_LIMIT_BUFFER, IPP_LIMIT_AREA, IPP_LIMIT_ROTATED, IPP_LIMIT_MAX
+};
+
+static const enum drm_ipp_size_id limit_id_fallback[IPP_LIMIT_MAX][4] = {
+	[IPP_LIMIT_BUFFER]  = { DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER },
+	[IPP_LIMIT_AREA]    = { DRM_EXYNOS_IPP_LIMIT_SIZE_AREA,
+				DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER },
+	[IPP_LIMIT_ROTATED] = { DRM_EXYNOS_IPP_LIMIT_SIZE_ROTATED,
+				DRM_EXYNOS_IPP_LIMIT_SIZE_AREA,
+				DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER },
+};
+
+static inline void __limit_set_val(unsigned int *ptr, unsigned int val)
+{
+	if (!*ptr)
+		*ptr = val;
+}
+
+static void __get_size_limit(const struct drm_exynos_ipp_limit *limits,
+			     unsigned int num_limits, enum drm_ipp_size_id id,
+			     struct drm_ipp_limit *res)
+{
+	const struct drm_exynos_ipp_limit *l = limits;
+	int i = 0;
+
+	memset(res, 0, sizeof(*res));
+	for (i = 0; limit_id_fallback[id][i]; i++)
+		for (l = limits; l - limits < num_limits; l++) {
+			if (((l->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) !=
+			      DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE) ||
+			    ((l->type & DRM_EXYNOS_IPP_LIMIT_SIZE_MASK) !=
+						     limit_id_fallback[id][i]))
+				continue;
+			__limit_set_val(&res->h.min, l->h.min);
+			__limit_set_val(&res->h.max, l->h.max);
+			__limit_set_val(&res->h.align, l->h.align);
+			__limit_set_val(&res->v.min, l->v.min);
+			__limit_set_val(&res->v.max, l->v.max);
+			__limit_set_val(&res->v.align, l->v.align);
+		}
+}
+
+static inline bool __align_check(unsigned int val, unsigned int align)
+{
+	if (align && (val & (align - 1))) {
+		DRM_DEBUG_DRIVER("Value %d exceeds HW limits (align %d)\n",
+				 val, align);
+		return false;
+	}
+	return true;
+}
+
+static inline bool __size_limit_check(unsigned int val,
+				 struct drm_exynos_ipp_limit_val *l)
+{
+	if ((l->min && val < l->min) || (l->max && val > l->max)) {
+		DRM_DEBUG_DRIVER("Value %d exceeds HW limits (min %d, max %d)\n",
+				 val, l->min, l->max);
+		return false;
+	}
+	return __align_check(val, l->align);
+}
+
+static int exynos_drm_ipp_check_size_limits(struct exynos_drm_ipp_buffer *buf,
+	const struct drm_exynos_ipp_limit *limits, unsigned int num_limits,
+	bool rotate, bool swap)
+{
+	enum drm_ipp_size_id id = rotate ? IPP_LIMIT_ROTATED : IPP_LIMIT_AREA;
+	struct drm_ipp_limit l;
+	struct drm_exynos_ipp_limit_val *lh = &l.h, *lv = &l.v;
+
+	if (!limits)
+		return 0;
+
+	__get_size_limit(limits, num_limits, IPP_LIMIT_BUFFER, &l);
+	if (!__size_limit_check(buf->buf.width, &l.h) ||
+	    !__size_limit_check(buf->buf.height, &l.v))
+		return -EINVAL;
+
+	if (swap) {
+		lv = &l.h;
+		lh = &l.v;
+	}
+	__get_size_limit(limits, num_limits, id, &l);
+	if (!__size_limit_check(buf->rect.w, lh) ||
+	    !__align_check(buf->rect.x, lh->align) ||
+	    !__size_limit_check(buf->rect.h, lv) ||
+	    !__align_check(buf->rect.y, lv->align))
+		return -EINVAL;
+
+	return 0;
+}
+
+static inline bool __scale_limit_check(unsigned int src, unsigned int dst,
+				       unsigned int min, unsigned int max)
+{
+	if ((max && (dst << 16) > src * max) ||
+	    (min && (dst << 16) < src * min)) {
+		DRM_DEBUG_DRIVER("Scale from %d to %d exceeds HW limits (ratio min %d.%05d, max %d.%05d)\n",
+			 src, dst,
+			 min >> 16, 100000 * (min & 0xffff) / (1 << 16),
+			 max >> 16, 100000 * (max & 0xffff) / (1 << 16));
+		return false;
+	}
+	return true;
+}
+
+static int exynos_drm_ipp_check_scale_limits(
+				struct drm_exynos_ipp_task_rect *src,
+				struct drm_exynos_ipp_task_rect *dst,
+				const struct drm_exynos_ipp_limit *limits,
+				unsigned int num_limits, bool swap)
+{
+	const struct drm_exynos_ipp_limit_val *lh, *lv;
+	int dw, dh;
+
+	for (; num_limits; limits++, num_limits--)
+		if ((limits->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) ==
+		    DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE)
+			break;
+	if (!num_limits)
+		return 0;
+
+	lh = (!swap) ? &limits->h : &limits->v;
+	lv = (!swap) ? &limits->v : &limits->h;
+	dw = (!swap) ? dst->w : dst->h;
+	dh = (!swap) ? dst->h : dst->w;
+
+	if (!__scale_limit_check(src->w, dw, lh->min, lh->max) ||
+	    !__scale_limit_check(src->h, dh, lv->min, lv->max))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task)
+{
+	struct exynos_drm_ipp *ipp = task->ipp;
+	const struct exynos_drm_ipp_formats *src_fmt, *dst_fmt;
+	struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst;
+	unsigned int rotation = task->transform.rotation;
+	int ret = 0;
+	bool swap = drm_rotation_90_or_270(rotation);
+	bool rotate = (rotation != DRM_MODE_ROTATE_0);
+	bool scale = false;
+
+	DRM_DEBUG_DRIVER("Checking task %pK\n", task);
+
+	if (src->rect.w == UINT_MAX)
+		src->rect.w = src->buf.width;
+	if (src->rect.h == UINT_MAX)
+		src->rect.h = src->buf.height;
+	if (dst->rect.w == UINT_MAX)
+		dst->rect.w = dst->buf.width;
+	if (dst->rect.h == UINT_MAX)
+		dst->rect.h = dst->buf.height;
+
+	if (src->rect.x + src->rect.w > (src->buf.width) ||
+	    src->rect.y + src->rect.h > (src->buf.height) ||
+	    dst->rect.x + dst->rect.w > (dst->buf.width) ||
+	    dst->rect.y + dst->rect.h > (dst->buf.height)) {
+		DRM_DEBUG_DRIVER("Task %pK: defined area is outside provided buffers\n",
+				 task);
+		return -EINVAL;
+	}
+
+	if ((!swap && (src->rect.w != dst->rect.w ||
+		       src->rect.h != dst->rect.h)) ||
+	    (swap && (src->rect.w != dst->rect.h ||
+		      src->rect.h != dst->rect.w)))
+		scale = true;
+
+	if ((!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CROP) &&
+	     (src->rect.x || src->rect.y || dst->rect.x || dst->rect.y)) ||
+	    (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_ROTATE) && rotate) ||
+	    (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) ||
+	    (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) &&
+	     src->buf.fourcc != dst->buf.fourcc)) {
+		DRM_DEBUG_DRIVER("Task %pK: hw capabilities exceeded\n", task);
+		return -EINVAL;
+	}
+
+	src_fmt = __ipp_format_get(ipp, src->buf.fourcc, src->buf.modifier,
+				   DRM_EXYNOS_IPP_FORMAT_SOURCE);
+	if (!src_fmt) {
+		DRM_DEBUG_DRIVER("Task %pK: src format not supported\n", task);
+		return -EINVAL;
+	}
+	ret = exynos_drm_ipp_check_size_limits(src, src_fmt->limits,
+					       src_fmt->num_limits,
+					       rotate, false);
+	if (ret)
+		return ret;
+	ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect,
+						src_fmt->limits,
+						src_fmt->num_limits, swap);
+	if (ret)
+		return ret;
+
+	dst_fmt = __ipp_format_get(ipp, dst->buf.fourcc, dst->buf.modifier,
+				   DRM_EXYNOS_IPP_FORMAT_DESTINATION);
+	if (!dst_fmt) {
+		DRM_DEBUG_DRIVER("Task %pK: dst format not supported\n", task);
+		return -EINVAL;
+	}
+	ret = exynos_drm_ipp_check_size_limits(dst, dst_fmt->limits,
+					       dst_fmt->num_limits,
+					       false, swap);
+	if (ret)
+		return ret;
+	ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect,
+						dst_fmt->limits,
+						dst_fmt->num_limits, swap);
+	if (ret)
+		return ret;
+
+	DRM_DEBUG_DRIVER("Task %pK: all checks done.\n", task);
+
+	return ret;
+}
+
+static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task,
+				     struct drm_file *filp)
+{
+	struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst;
+	int ret = 0;
+
+	DRM_DEBUG_DRIVER("Setting buffer for task %pK\n", task);
+
+	ret = exynos_drm_ipp_task_setup_buffer(src, filp);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Task %pK: src buffer setup failed\n", task);
+		return ret;
+	}
+	ret = exynos_drm_ipp_task_setup_buffer(dst, filp);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Task %pK: dst buffer setup failed\n", task);
+		return ret;
+	}
+
+	DRM_DEBUG_DRIVER("Task %pK: buffers prepared.\n", task);
+
+	return ret;
+}
+
+
+static int exynos_drm_ipp_event_create(struct exynos_drm_ipp_task *task,
+				 struct drm_file *file_priv, uint64_t user_data)
+{
+	struct drm_pending_exynos_ipp_event *e = NULL;
+	int ret;
+
+	e = kzalloc(sizeof(*e), GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+
+	e->event.base.type = DRM_EXYNOS_IPP_EVENT;
+	e->event.base.length = sizeof(e->event);
+	e->event.user_data = user_data;
+
+	ret = drm_event_reserve_init(task->dev, file_priv, &e->base,
+				     &e->event.base);
+	if (ret)
+		goto free;
+
+	task->event = e;
+	return 0;
+free:
+	kfree(e);
+	return ret;
+}
+
+static void exynos_drm_ipp_event_send(struct exynos_drm_ipp_task *task)
+{
+	struct timespec64 now;
+
+	ktime_get_ts64(&now);
+	task->event->event.tv_sec = now.tv_sec;
+	task->event->event.tv_usec = now.tv_nsec / NSEC_PER_USEC;
+	task->event->event.sequence = atomic_inc_return(&task->ipp->sequence);
+
+	drm_send_event(task->dev, &task->event->base);
+}
+
+static int exynos_drm_ipp_task_cleanup(struct exynos_drm_ipp_task *task)
+{
+	int ret = task->ret;
+
+	if (ret == 0 && task->event) {
+		exynos_drm_ipp_event_send(task);
+		/* ensure event won't be canceled on task free */
+		task->event = NULL;
+	}
+
+	exynos_drm_ipp_task_free(task->ipp, task);
+	return ret;
+}
+
+static void exynos_drm_ipp_cleanup_work(struct work_struct *work)
+{
+	struct exynos_drm_ipp_task *task = container_of(work,
+				      struct exynos_drm_ipp_task, cleanup_work);
+
+	exynos_drm_ipp_task_cleanup(task);
+}
+
+static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp);
+
+/**
+ * exynos_drm_ipp_task_done - finish given task and set return code
+ * @task: ipp task to finish
+ * @ret: error code or 0 if operation has been performed successfully
+ */
+void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret)
+{
+	struct exynos_drm_ipp *ipp = task->ipp;
+	unsigned long flags;
+
+	DRM_DEBUG_DRIVER("ipp: %d, task %pK done: %d\n", ipp->id, task, ret);
+
+	spin_lock_irqsave(&ipp->lock, flags);
+	if (ipp->task == task)
+		ipp->task = NULL;
+	task->flags |= DRM_EXYNOS_IPP_TASK_DONE;
+	task->ret = ret;
+	spin_unlock_irqrestore(&ipp->lock, flags);
+
+	exynos_drm_ipp_next_task(ipp);
+	wake_up(&ipp->done_wq);
+
+	if (task->flags & DRM_EXYNOS_IPP_TASK_ASYNC) {
+		INIT_WORK(&task->cleanup_work, exynos_drm_ipp_cleanup_work);
+		schedule_work(&task->cleanup_work);
+	}
+}
+
+static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp)
+{
+	struct exynos_drm_ipp_task *task;
+	unsigned long flags;
+	int ret;
+
+	DRM_DEBUG_DRIVER("ipp: %d, try to run new task\n", ipp->id);
+
+	spin_lock_irqsave(&ipp->lock, flags);
+
+	if (ipp->task || list_empty(&ipp->todo_list)) {
+		spin_unlock_irqrestore(&ipp->lock, flags);
+		return;
+	}
+
+	task = list_first_entry(&ipp->todo_list, struct exynos_drm_ipp_task,
+				head);
+	list_del_init(&task->head);
+	ipp->task = task;
+
+	spin_unlock_irqrestore(&ipp->lock, flags);
+
+	DRM_DEBUG_DRIVER("ipp: %d, selected task %pK to run\n", ipp->id, task);
+
+	ret = ipp->funcs->commit(ipp, task);
+	if (ret)
+		exynos_drm_ipp_task_done(task, ret);
+}
+
+static void exynos_drm_ipp_schedule_task(struct exynos_drm_ipp *ipp,
+					 struct exynos_drm_ipp_task *task)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ipp->lock, flags);
+	list_add(&task->head, &ipp->todo_list);
+	spin_unlock_irqrestore(&ipp->lock, flags);
+
+	exynos_drm_ipp_next_task(ipp);
+}
+
+static void exynos_drm_ipp_task_abort(struct exynos_drm_ipp *ipp,
+				      struct exynos_drm_ipp_task *task)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ipp->lock, flags);
+	if (task->flags & DRM_EXYNOS_IPP_TASK_DONE) {
+		/* already completed task */
+		exynos_drm_ipp_task_cleanup(task);
+	} else if (ipp->task != task) {
+		/* task has not been scheduled for execution yet */
+		list_del_init(&task->head);
+		exynos_drm_ipp_task_cleanup(task);
+	} else {
+		/*
+		 * currently processed task, call abort() and perform
+		 * cleanup with async worker
+		 */
+		task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC;
+		spin_unlock_irqrestore(&ipp->lock, flags);
+		if (ipp->funcs->abort)
+			ipp->funcs->abort(ipp, task);
+		return;
+	}
+	spin_unlock_irqrestore(&ipp->lock, flags);
+}
+
+/**
+ * exynos_drm_ipp_commit_ioctl - perform image processing operation
+ * @dev: DRM device
+ * @data: ioctl data
+ * @file_priv: DRM file info
+ *
+ * Construct a ipp task from the set of properties provided from the user
+ * and try to schedule it to framebuffer processor hardware.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_priv)
+{
+	struct drm_exynos_ioctl_ipp_commit *arg = data;
+	struct exynos_drm_ipp *ipp;
+	struct exynos_drm_ipp_task *task;
+	int ret = 0;
+
+	if ((arg->flags & ~DRM_EXYNOS_IPP_FLAGS) || arg->reserved)
+		return -EINVAL;
+
+	/* can't test and expect an event at the same time */
+	if ((arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY) &&
+			(arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT))
+		return -EINVAL;
+
+	ipp = __ipp_get(arg->ipp_id);
+	if (!ipp)
+		return -ENOENT;
+
+	task = exynos_drm_ipp_task_alloc(ipp);
+	if (!task)
+		return -ENOMEM;
+
+	ret = exynos_drm_ipp_task_set(task, arg);
+	if (ret)
+		goto free;
+
+	ret = exynos_drm_ipp_task_check(task);
+	if (ret)
+		goto free;
+
+	ret = exynos_drm_ipp_task_setup_buffers(task, file_priv);
+	if (ret || arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY)
+		goto free;
+
+	if (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT) {
+		ret = exynos_drm_ipp_event_create(task, file_priv,
+						 arg->user_data);
+		if (ret)
+			goto free;
+	}
+
+	/*
+	 * Queue task for processing on the hardware. task object will be
+	 * then freed after exynos_drm_ipp_task_done()
+	 */
+	if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) {
+		DRM_DEBUG_DRIVER("ipp: %d, nonblocking processing task %pK\n",
+				 ipp->id, task);
+
+		task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC;
+		exynos_drm_ipp_schedule_task(task->ipp, task);
+		ret = 0;
+	} else {
+		DRM_DEBUG_DRIVER("ipp: %d, processing task %pK\n", ipp->id,
+				 task);
+		exynos_drm_ipp_schedule_task(ipp, task);
+		ret = wait_event_interruptible(ipp->done_wq,
+					task->flags & DRM_EXYNOS_IPP_TASK_DONE);
+		if (ret)
+			exynos_drm_ipp_task_abort(ipp, task);
+		else
+			ret = exynos_drm_ipp_task_cleanup(task);
+	}
+	return ret;
+free:
+	exynos_drm_ipp_task_free(ipp, task);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h
new file mode 100644
index 0000000..0b27d4a
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2017 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef _EXYNOS_DRM_IPP_H_
+#define _EXYNOS_DRM_IPP_H_
+
+#include <drm/drmP.h>
+
+struct exynos_drm_ipp;
+struct exynos_drm_ipp_task;
+
+/**
+ * struct exynos_drm_ipp_funcs - exynos_drm_ipp control functions
+ */
+struct exynos_drm_ipp_funcs {
+	/**
+	 * @commit:
+	 *
+	 * This is the main entry point to start framebuffer processing
+	 * in the hardware. The exynos_drm_ipp_task has been already validated.
+	 * This function must not wait until the device finishes processing.
+	 * When the driver finishes processing, it has to call
+	 * exynos_exynos_drm_ipp_task_done() function.
+	 *
+	 * RETURNS:
+	 *
+	 * 0 on success or negative error codes in case of failure.
+	 */
+	int (*commit)(struct exynos_drm_ipp *ipp,
+		      struct exynos_drm_ipp_task *task);
+
+	/**
+	 * @abort:
+	 *
+	 * Informs the driver that it has to abort the currently running
+	 * task as soon as possible (i.e. as soon as it can stop the device
+	 * safely), even if the task would not have been finished by then.
+	 * After the driver performs the necessary steps, it has to call
+	 * exynos_drm_ipp_task_done() (as if the task ended normally).
+	 * This function does not have to (and will usually not) wait
+	 * until the device enters a state when it can be stopped.
+	 */
+	void (*abort)(struct exynos_drm_ipp *ipp,
+		      struct exynos_drm_ipp_task *task);
+};
+
+/**
+ * struct exynos_drm_ipp - central picture processor module structure
+ */
+struct exynos_drm_ipp {
+	struct drm_device *dev;
+	struct list_head head;
+	unsigned int id;
+
+	const char *name;
+	const struct exynos_drm_ipp_funcs *funcs;
+	unsigned int capabilities;
+	const struct exynos_drm_ipp_formats *formats;
+	unsigned int num_formats;
+	atomic_t sequence;
+
+	spinlock_t lock;
+	struct exynos_drm_ipp_task *task;
+	struct list_head todo_list;
+	wait_queue_head_t done_wq;
+};
+
+struct exynos_drm_ipp_buffer {
+	struct drm_exynos_ipp_task_buffer buf;
+	struct drm_exynos_ipp_task_rect rect;
+
+	struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER];
+	const struct drm_format_info *format;
+	dma_addr_t dma_addr[MAX_FB_BUFFER];
+};
+
+/**
+ * struct exynos_drm_ipp_task - a structure describing transformation that
+ * has to be performed by the picture processor hardware module
+ */
+struct exynos_drm_ipp_task {
+	struct drm_device *dev;
+	struct exynos_drm_ipp *ipp;
+	struct list_head head;
+
+	struct exynos_drm_ipp_buffer src;
+	struct exynos_drm_ipp_buffer dst;
+
+	struct drm_exynos_ipp_task_transform transform;
+	struct drm_exynos_ipp_task_alpha alpha;
+
+	struct work_struct cleanup_work;
+	unsigned int flags;
+	int ret;
+
+	struct drm_pending_exynos_ipp_event *event;
+};
+
+#define DRM_EXYNOS_IPP_TASK_DONE	(1 << 0)
+#define DRM_EXYNOS_IPP_TASK_ASYNC	(1 << 1)
+
+struct exynos_drm_ipp_formats {
+	uint32_t fourcc;
+	uint32_t type;
+	uint64_t modifier;
+	const struct drm_exynos_ipp_limit *limits;
+	unsigned int num_limits;
+};
+
+/* helper macros to set exynos_drm_ipp_formats structure and limits*/
+#define IPP_SRCDST_MFORMAT(f, m, l) \
+	.fourcc = DRM_FORMAT_##f, .modifier = m, .limits = l, \
+	.num_limits = ARRAY_SIZE(l), \
+	.type = (DRM_EXYNOS_IPP_FORMAT_SOURCE | \
+		 DRM_EXYNOS_IPP_FORMAT_DESTINATION)
+
+#define IPP_SRCDST_FORMAT(f, l) IPP_SRCDST_MFORMAT(f, 0, l)
+
+#define IPP_SIZE_LIMIT(l, val...)	\
+	.type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE | \
+		 DRM_EXYNOS_IPP_LIMIT_SIZE_##l), val
+
+#define IPP_SCALE_LIMIT(val...)		\
+	.type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE), val
+
+int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp,
+		const struct exynos_drm_ipp_funcs *funcs, unsigned int caps,
+		const struct exynos_drm_ipp_formats *formats,
+		unsigned int num_formats, const char *name);
+void exynos_drm_ipp_unregister(struct drm_device *dev,
+			       struct exynos_drm_ipp *ipp);
+
+void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret);
+
+#ifdef CONFIG_DRM_EXYNOS_IPP
+int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv);
+int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file_priv);
+int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_priv);
+int exynos_drm_ipp_commit_ioctl(struct drm_device *dev,
+				void *data, struct drm_file *file_priv);
+#else
+static inline int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev,
+	 void *data, struct drm_file *file_priv)
+{
+	struct drm_exynos_ioctl_ipp_get_res *resp = data;
+
+	resp->count_ipps = 0;
+	return 0;
+}
+static inline int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev,
+	 void *data, struct drm_file *file_priv)
+{
+	return -ENODEV;
+}
+static inline int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev,
+	 void *data, struct drm_file *file_priv)
+{
+	return -ENODEV;
+}
+static inline int exynos_drm_ipp_commit_ioctl(struct drm_device *dev,
+	 void *data, struct drm_file *file_priv)
+{
+	return -ENODEV;
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c
index d2a90da..38a2a7f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_plane.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c
@@ -289,13 +289,12 @@
 };
 
 static void exynos_plane_attach_zpos_property(struct drm_plane *plane,
-					      bool immutable)
+					      int zpos, bool immutable)
 {
-	/* FIXME */
 	if (immutable)
-		drm_plane_create_zpos_immutable_property(plane, 0);
+		drm_plane_create_zpos_immutable_property(plane, zpos);
 	else
-		drm_plane_create_zpos_property(plane, 0, 0, MAX_PLANE - 1);
+		drm_plane_create_zpos_property(plane, zpos, 0, MAX_PLANE - 1);
 }
 
 int exynos_plane_init(struct drm_device *dev,
@@ -320,7 +319,7 @@
 	exynos_plane->index = index;
 	exynos_plane->config = config;
 
-	exynos_plane_attach_zpos_property(&exynos_plane->base,
+	exynos_plane_attach_zpos_property(&exynos_plane->base, config->zpos,
 			   !(config->capabilities & EXYNOS_DRM_PLANE_CAP_ZPOS));
 
 	return 0;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index 79282a8..1a76dd3 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/component.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -22,29 +23,18 @@
 #include <drm/exynos_drm.h>
 #include "regs-rotator.h"
 #include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
 #include "exynos_drm_ipp.h"
 
 /*
  * Rotator supports image crop/rotator and input/output DMA operations.
  * input DMA reads image data from the memory.
  * output DMA writes image data to memory.
- *
- * M2M operation : supports crop/scale/rotation/csc so on.
- * Memory ----> Rotator H/W ----> Memory.
  */
 
-/*
- * TODO
- * 1. check suspend/resume api if needed.
- * 2. need to check use case platform_device_id.
- * 3. check src/dst size with, height.
- * 4. need to add supported list in prop_list.
- */
+#define ROTATOR_AUTOSUSPEND_DELAY	2000
 
-#define get_rot_context(dev)	platform_get_drvdata(to_platform_device(dev))
-#define get_ctx_from_ippdrv(ippdrv)	container_of(ippdrv,\
-					struct rot_context, ippdrv);
-#define rot_read(offset)		readl(rot->regs + (offset))
+#define rot_read(offset)	readl(rot->regs + (offset))
 #define rot_write(cfg, offset)	writel(cfg, rot->regs + (offset))
 
 enum rot_irq_status {
@@ -52,54 +42,28 @@
 	ROT_IRQ_STATUS_ILLEGAL	= 9,
 };
 
-/*
- * A structure of limitation.
- *
- * @min_w: minimum width.
- * @min_h: minimum height.
- * @max_w: maximum width.
- * @max_h: maximum height.
- * @align: align size.
- */
-struct rot_limit {
-	u32	min_w;
-	u32	min_h;
-	u32	max_w;
-	u32	max_h;
-	u32	align;
-};
-
-/*
- * A structure of limitation table.
- *
- * @ycbcr420_2p: case of YUV.
- * @rgb888: case of RGB.
- */
-struct rot_limit_table {
-	struct rot_limit	ycbcr420_2p;
-	struct rot_limit	rgb888;
+struct rot_variant {
+	const struct exynos_drm_ipp_formats *formats;
+	unsigned int	num_formats;
 };
 
 /*
  * A structure of rotator context.
  * @ippdrv: prepare initialization using ippdrv.
- * @regs_res: register resources.
  * @regs: memory mapped io registers.
  * @clock: rotator gate clock.
  * @limit_tbl: limitation of rotator.
  * @irq: irq number.
- * @cur_buf_id: current operation buffer id.
- * @suspended: suspended state.
  */
 struct rot_context {
-	struct exynos_drm_ippdrv	ippdrv;
-	struct resource	*regs_res;
+	struct exynos_drm_ipp ipp;
+	struct drm_device *drm_dev;
+	struct device	*dev;
 	void __iomem	*regs;
 	struct clk	*clock;
-	struct rot_limit_table	*limit_tbl;
-	int	irq;
-	int	cur_buf_id[EXYNOS_DRM_OPS_MAX];
-	bool	suspended;
+	const struct exynos_drm_ipp_formats *formats;
+	unsigned int	num_formats;
+	struct exynos_drm_ipp_task	*task;
 };
 
 static void rotator_reg_set_irq(struct rot_context *rot, bool enable)
@@ -114,15 +78,6 @@
 	rot_write(val, ROT_CONFIG);
 }
 
-static u32 rotator_reg_get_fmt(struct rot_context *rot)
-{
-	u32 val = rot_read(ROT_CONTROL);
-
-	val &= ROT_CONTROL_FMT_MASK;
-
-	return val;
-}
-
 static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot)
 {
 	u32 val = rot_read(ROT_STATUS);
@@ -138,9 +93,6 @@
 static irqreturn_t rotator_irq_handler(int irq, void *arg)
 {
 	struct rot_context *rot = arg;
-	struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv;
-	struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
-	struct drm_exynos_ipp_event_work *event_work = c_node->event_work;
 	enum rot_irq_status irq_status;
 	u32 val;
 
@@ -152,56 +104,21 @@
 	val |= ROT_STATUS_IRQ_PENDING((u32)irq_status);
 	rot_write(val, ROT_STATUS);
 
-	if (irq_status == ROT_IRQ_STATUS_COMPLETE) {
-		event_work->ippdrv = ippdrv;
-		event_work->buf_id[EXYNOS_DRM_OPS_DST] =
-			rot->cur_buf_id[EXYNOS_DRM_OPS_DST];
-		queue_work(ippdrv->event_workq, &event_work->work);
-	} else {
-		DRM_ERROR("the SFR is set illegally\n");
+	if (rot->task) {
+		struct exynos_drm_ipp_task *task = rot->task;
+
+		rot->task = NULL;
+		pm_runtime_mark_last_busy(rot->dev);
+		pm_runtime_put_autosuspend(rot->dev);
+		exynos_drm_ipp_task_done(task,
+			irq_status == ROT_IRQ_STATUS_COMPLETE ? 0 : -EINVAL);
 	}
 
 	return IRQ_HANDLED;
 }
 
-static void rotator_align_size(struct rot_context *rot, u32 fmt, u32 *hsize,
-		u32 *vsize)
+static void rotator_src_set_fmt(struct rot_context *rot, u32 fmt)
 {
-	struct rot_limit_table *limit_tbl = rot->limit_tbl;
-	struct rot_limit *limit;
-	u32 mask, val;
-
-	/* Get size limit */
-	if (fmt == ROT_CONTROL_FMT_RGB888)
-		limit = &limit_tbl->rgb888;
-	else
-		limit = &limit_tbl->ycbcr420_2p;
-
-	/* Get mask for rounding to nearest aligned val */
-	mask = ~((1 << limit->align) - 1);
-
-	/* Set aligned width */
-	val = ROT_ALIGN(*hsize, limit->align, mask);
-	if (val < limit->min_w)
-		*hsize = ROT_MIN(limit->min_w, mask);
-	else if (val > limit->max_w)
-		*hsize = ROT_MAX(limit->max_w, mask);
-	else
-		*hsize = val;
-
-	/* Set aligned height */
-	val = ROT_ALIGN(*vsize, limit->align, mask);
-	if (val < limit->min_h)
-		*vsize = ROT_MIN(limit->min_h, mask);
-	else if (val > limit->max_h)
-		*vsize = ROT_MAX(limit->max_h, mask);
-	else
-		*vsize = val;
-}
-
-static int rotator_src_set_fmt(struct device *dev, u32 fmt)
-{
-	struct rot_context *rot = dev_get_drvdata(dev);
 	u32 val;
 
 	val = rot_read(ROT_CONTROL);
@@ -214,515 +131,176 @@
 	case DRM_FORMAT_XRGB8888:
 		val |= ROT_CONTROL_FMT_RGB888;
 		break;
-	default:
-		DRM_ERROR("invalid image format\n");
-		return -EINVAL;
 	}
 
 	rot_write(val, ROT_CONTROL);
-
-	return 0;
 }
 
-static inline bool rotator_check_reg_fmt(u32 fmt)
+static void rotator_src_set_buf(struct rot_context *rot,
+				struct exynos_drm_ipp_buffer *buf)
 {
-	if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) ||
-	    (fmt == ROT_CONTROL_FMT_RGB888))
-		return true;
-
-	return false;
-}
-
-static int rotator_src_set_size(struct device *dev, int swap,
-		struct drm_exynos_pos *pos,
-		struct drm_exynos_sz *sz)
-{
-	struct rot_context *rot = dev_get_drvdata(dev);
-	u32 fmt, hsize, vsize;
 	u32 val;
 
-	/* Get format */
-	fmt = rotator_reg_get_fmt(rot);
-	if (!rotator_check_reg_fmt(fmt)) {
-		DRM_ERROR("invalid format.\n");
-		return -EINVAL;
-	}
-
-	/* Align buffer size */
-	hsize = sz->hsize;
-	vsize = sz->vsize;
-	rotator_align_size(rot, fmt, &hsize, &vsize);
-
 	/* Set buffer size configuration */
-	val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize);
+	val = ROT_SET_BUF_SIZE_H(buf->buf.height) |
+	      ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]);
 	rot_write(val, ROT_SRC_BUF_SIZE);
 
 	/* Set crop image position configuration */
-	val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x);
+	val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x);
 	rot_write(val, ROT_SRC_CROP_POS);
-	val = ROT_SRC_CROP_SIZE_H(pos->h) | ROT_SRC_CROP_SIZE_W(pos->w);
+	val = ROT_SRC_CROP_SIZE_H(buf->rect.h) |
+	      ROT_SRC_CROP_SIZE_W(buf->rect.w);
 	rot_write(val, ROT_SRC_CROP_SIZE);
 
-	return 0;
+	/* Set buffer DMA address */
+	rot_write(buf->dma_addr[0], ROT_SRC_BUF_ADDR(0));
+	rot_write(buf->dma_addr[1], ROT_SRC_BUF_ADDR(1));
 }
 
-static int rotator_src_set_addr(struct device *dev,
-		struct drm_exynos_ipp_buf_info *buf_info,
-		u32 buf_id, enum drm_exynos_ipp_buf_type buf_type)
+static void rotator_dst_set_transf(struct rot_context *rot,
+				   unsigned int rotation)
 {
-	struct rot_context *rot = dev_get_drvdata(dev);
-	dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX];
-	u32 val, fmt, hsize, vsize;
-	int i;
-
-	/* Set current buf_id */
-	rot->cur_buf_id[EXYNOS_DRM_OPS_SRC] = buf_id;
-
-	switch (buf_type) {
-	case IPP_BUF_ENQUEUE:
-		/* Set address configuration */
-		for_each_ipp_planar(i)
-			addr[i] = buf_info->base[i];
-
-		/* Get format */
-		fmt = rotator_reg_get_fmt(rot);
-		if (!rotator_check_reg_fmt(fmt)) {
-			DRM_ERROR("invalid format.\n");
-			return -EINVAL;
-		}
-
-		/* Re-set cb planar for NV12 format */
-		if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) &&
-		    !addr[EXYNOS_DRM_PLANAR_CB]) {
-
-			val = rot_read(ROT_SRC_BUF_SIZE);
-			hsize = ROT_GET_BUF_SIZE_W(val);
-			vsize = ROT_GET_BUF_SIZE_H(val);
-
-			/* Set cb planar */
-			addr[EXYNOS_DRM_PLANAR_CB] =
-				addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize;
-		}
-
-		for_each_ipp_planar(i)
-			rot_write(addr[i], ROT_SRC_BUF_ADDR(i));
-		break;
-	case IPP_BUF_DEQUEUE:
-		for_each_ipp_planar(i)
-			rot_write(0x0, ROT_SRC_BUF_ADDR(i));
-		break;
-	default:
-		/* Nothing to do */
-		break;
-	}
-
-	return 0;
-}
-
-static int rotator_dst_set_transf(struct device *dev,
-		enum drm_exynos_degree degree,
-		enum drm_exynos_flip flip, bool *swap)
-{
-	struct rot_context *rot = dev_get_drvdata(dev);
 	u32 val;
 
 	/* Set transform configuration */
 	val = rot_read(ROT_CONTROL);
 	val &= ~ROT_CONTROL_FLIP_MASK;
 
-	switch (flip) {
-	case EXYNOS_DRM_FLIP_VERTICAL:
-		val |= ROT_CONTROL_FLIP_VERTICAL;
-		break;
-	case EXYNOS_DRM_FLIP_HORIZONTAL:
+	if (rotation & DRM_MODE_REFLECT_X)
 		val |= ROT_CONTROL_FLIP_HORIZONTAL;
-		break;
-	default:
-		/* Flip None */
-		break;
-	}
+	if (rotation & DRM_MODE_REFLECT_Y)
+		val |= ROT_CONTROL_FLIP_VERTICAL;
 
 	val &= ~ROT_CONTROL_ROT_MASK;
 
-	switch (degree) {
-	case EXYNOS_DRM_DEGREE_90:
+	if (rotation & DRM_MODE_ROTATE_90)
 		val |= ROT_CONTROL_ROT_90;
-		break;
-	case EXYNOS_DRM_DEGREE_180:
+	else if (rotation & DRM_MODE_ROTATE_180)
 		val |= ROT_CONTROL_ROT_180;
-		break;
-	case EXYNOS_DRM_DEGREE_270:
+	else if (rotation & DRM_MODE_ROTATE_270)
 		val |= ROT_CONTROL_ROT_270;
-		break;
-	default:
-		/* Rotation 0 Degree */
-		break;
-	}
 
 	rot_write(val, ROT_CONTROL);
-
-	/* Check degree for setting buffer size swap */
-	if ((degree == EXYNOS_DRM_DEGREE_90) ||
-	    (degree == EXYNOS_DRM_DEGREE_270))
-		*swap = true;
-	else
-		*swap = false;
-
-	return 0;
 }
 
-static int rotator_dst_set_size(struct device *dev, int swap,
-		struct drm_exynos_pos *pos,
-		struct drm_exynos_sz *sz)
+static void rotator_dst_set_buf(struct rot_context *rot,
+				struct exynos_drm_ipp_buffer *buf)
 {
-	struct rot_context *rot = dev_get_drvdata(dev);
-	u32 val, fmt, hsize, vsize;
-
-	/* Get format */
-	fmt = rotator_reg_get_fmt(rot);
-	if (!rotator_check_reg_fmt(fmt)) {
-		DRM_ERROR("invalid format.\n");
-		return -EINVAL;
-	}
-
-	/* Align buffer size */
-	hsize = sz->hsize;
-	vsize = sz->vsize;
-	rotator_align_size(rot, fmt, &hsize, &vsize);
+	u32 val;
 
 	/* Set buffer size configuration */
-	val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize);
+	val = ROT_SET_BUF_SIZE_H(buf->buf.height) |
+	      ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]);
 	rot_write(val, ROT_DST_BUF_SIZE);
 
 	/* Set crop image position configuration */
-	val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x);
+	val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x);
 	rot_write(val, ROT_DST_CROP_POS);
 
-	return 0;
+	/* Set buffer DMA address */
+	rot_write(buf->dma_addr[0], ROT_DST_BUF_ADDR(0));
+	rot_write(buf->dma_addr[1], ROT_DST_BUF_ADDR(1));
 }
 
-static int rotator_dst_set_addr(struct device *dev,
-		struct drm_exynos_ipp_buf_info *buf_info,
-		u32 buf_id, enum drm_exynos_ipp_buf_type buf_type)
+static void rotator_start(struct rot_context *rot)
 {
-	struct rot_context *rot = dev_get_drvdata(dev);
-	dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX];
-	u32 val, fmt, hsize, vsize;
-	int i;
-
-	/* Set current buf_id */
-	rot->cur_buf_id[EXYNOS_DRM_OPS_DST] = buf_id;
-
-	switch (buf_type) {
-	case IPP_BUF_ENQUEUE:
-		/* Set address configuration */
-		for_each_ipp_planar(i)
-			addr[i] = buf_info->base[i];
-
-		/* Get format */
-		fmt = rotator_reg_get_fmt(rot);
-		if (!rotator_check_reg_fmt(fmt)) {
-			DRM_ERROR("invalid format.\n");
-			return -EINVAL;
-		}
-
-		/* Re-set cb planar for NV12 format */
-		if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) &&
-		    !addr[EXYNOS_DRM_PLANAR_CB]) {
-			/* Get buf size */
-			val = rot_read(ROT_DST_BUF_SIZE);
-
-			hsize = ROT_GET_BUF_SIZE_W(val);
-			vsize = ROT_GET_BUF_SIZE_H(val);
-
-			/* Set cb planar */
-			addr[EXYNOS_DRM_PLANAR_CB] =
-				addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize;
-		}
-
-		for_each_ipp_planar(i)
-			rot_write(addr[i], ROT_DST_BUF_ADDR(i));
-		break;
-	case IPP_BUF_DEQUEUE:
-		for_each_ipp_planar(i)
-			rot_write(0x0, ROT_DST_BUF_ADDR(i));
-		break;
-	default:
-		/* Nothing to do */
-		break;
-	}
-
-	return 0;
-}
-
-static struct exynos_drm_ipp_ops rot_src_ops = {
-	.set_fmt	=	rotator_src_set_fmt,
-	.set_size	=	rotator_src_set_size,
-	.set_addr	=	rotator_src_set_addr,
-};
-
-static struct exynos_drm_ipp_ops rot_dst_ops = {
-	.set_transf	=	rotator_dst_set_transf,
-	.set_size	=	rotator_dst_set_size,
-	.set_addr	=	rotator_dst_set_addr,
-};
-
-static int rotator_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
-{
-	struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list;
-
-	prop_list->version = 1;
-	prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) |
-				(1 << EXYNOS_DRM_FLIP_HORIZONTAL);
-	prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) |
-				(1 << EXYNOS_DRM_DEGREE_90) |
-				(1 << EXYNOS_DRM_DEGREE_180) |
-				(1 << EXYNOS_DRM_DEGREE_270);
-	prop_list->csc = 0;
-	prop_list->crop = 0;
-	prop_list->scale = 0;
-
-	return 0;
-}
-
-static inline bool rotator_check_drm_fmt(u32 fmt)
-{
-	switch (fmt) {
-	case DRM_FORMAT_XRGB8888:
-	case DRM_FORMAT_NV12:
-		return true;
-	default:
-		DRM_DEBUG_KMS("not support format\n");
-		return false;
-	}
-}
-
-static inline bool rotator_check_drm_flip(enum drm_exynos_flip flip)
-{
-	switch (flip) {
-	case EXYNOS_DRM_FLIP_NONE:
-	case EXYNOS_DRM_FLIP_VERTICAL:
-	case EXYNOS_DRM_FLIP_HORIZONTAL:
-	case EXYNOS_DRM_FLIP_BOTH:
-		return true;
-	default:
-		DRM_DEBUG_KMS("invalid flip\n");
-		return false;
-	}
-}
-
-static int rotator_ippdrv_check_property(struct device *dev,
-		struct drm_exynos_ipp_property *property)
-{
-	struct drm_exynos_ipp_config *src_config =
-					&property->config[EXYNOS_DRM_OPS_SRC];
-	struct drm_exynos_ipp_config *dst_config =
-					&property->config[EXYNOS_DRM_OPS_DST];
-	struct drm_exynos_pos *src_pos = &src_config->pos;
-	struct drm_exynos_pos *dst_pos = &dst_config->pos;
-	struct drm_exynos_sz *src_sz = &src_config->sz;
-	struct drm_exynos_sz *dst_sz = &dst_config->sz;
-	bool swap = false;
-
-	/* Check format configuration */
-	if (src_config->fmt != dst_config->fmt) {
-		DRM_DEBUG_KMS("not support csc feature\n");
-		return -EINVAL;
-	}
-
-	if (!rotator_check_drm_fmt(dst_config->fmt)) {
-		DRM_DEBUG_KMS("invalid format\n");
-		return -EINVAL;
-	}
-
-	/* Check transform configuration */
-	if (src_config->degree != EXYNOS_DRM_DEGREE_0) {
-		DRM_DEBUG_KMS("not support source-side rotation\n");
-		return -EINVAL;
-	}
-
-	switch (dst_config->degree) {
-	case EXYNOS_DRM_DEGREE_90:
-	case EXYNOS_DRM_DEGREE_270:
-		swap = true;
-	case EXYNOS_DRM_DEGREE_0:
-	case EXYNOS_DRM_DEGREE_180:
-		/* No problem */
-		break;
-	default:
-		DRM_DEBUG_KMS("invalid degree\n");
-		return -EINVAL;
-	}
-
-	if (src_config->flip != EXYNOS_DRM_FLIP_NONE) {
-		DRM_DEBUG_KMS("not support source-side flip\n");
-		return -EINVAL;
-	}
-
-	if (!rotator_check_drm_flip(dst_config->flip)) {
-		DRM_DEBUG_KMS("invalid flip\n");
-		return -EINVAL;
-	}
-
-	/* Check size configuration */
-	if ((src_pos->x + src_pos->w > src_sz->hsize) ||
-		(src_pos->y + src_pos->h > src_sz->vsize)) {
-		DRM_DEBUG_KMS("out of source buffer bound\n");
-		return -EINVAL;
-	}
-
-	if (swap) {
-		if ((dst_pos->x + dst_pos->h > dst_sz->vsize) ||
-			(dst_pos->y + dst_pos->w > dst_sz->hsize)) {
-			DRM_DEBUG_KMS("out of destination buffer bound\n");
-			return -EINVAL;
-		}
-
-		if ((src_pos->w != dst_pos->h) || (src_pos->h != dst_pos->w)) {
-			DRM_DEBUG_KMS("not support scale feature\n");
-			return -EINVAL;
-		}
-	} else {
-		if ((dst_pos->x + dst_pos->w > dst_sz->hsize) ||
-			(dst_pos->y + dst_pos->h > dst_sz->vsize)) {
-			DRM_DEBUG_KMS("out of destination buffer bound\n");
-			return -EINVAL;
-		}
-
-		if ((src_pos->w != dst_pos->w) || (src_pos->h != dst_pos->h)) {
-			DRM_DEBUG_KMS("not support scale feature\n");
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-
-static int rotator_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
-{
-	struct rot_context *rot = dev_get_drvdata(dev);
 	u32 val;
 
-	if (rot->suspended) {
-		DRM_ERROR("suspended state\n");
-		return -EPERM;
-	}
-
-	if (cmd != IPP_CMD_M2M) {
-		DRM_ERROR("not support cmd: %d\n", cmd);
-		return -EINVAL;
-	}
-
 	/* Set interrupt enable */
 	rotator_reg_set_irq(rot, true);
 
 	val = rot_read(ROT_CONTROL);
 	val |= ROT_CONTROL_START;
-
 	rot_write(val, ROT_CONTROL);
+}
+
+static int rotator_commit(struct exynos_drm_ipp *ipp,
+			  struct exynos_drm_ipp_task *task)
+{
+	struct rot_context *rot =
+			container_of(ipp, struct rot_context, ipp);
+
+	pm_runtime_get_sync(rot->dev);
+	rot->task = task;
+
+	rotator_src_set_fmt(rot, task->src.buf.fourcc);
+	rotator_src_set_buf(rot, &task->src);
+	rotator_dst_set_transf(rot, task->transform.rotation);
+	rotator_dst_set_buf(rot, &task->dst);
+	rotator_start(rot);
 
 	return 0;
 }
 
-static struct rot_limit_table rot_limit_tbl_4210 = {
-	.ycbcr420_2p = {
-		.min_w = 32,
-		.min_h = 32,
-		.max_w = SZ_64K,
-		.max_h = SZ_64K,
-		.align = 3,
-	},
-	.rgb888 = {
-		.min_w = 8,
-		.min_h = 8,
-		.max_w = SZ_16K,
-		.max_h = SZ_16K,
-		.align = 2,
-	},
+static const struct exynos_drm_ipp_funcs ipp_funcs = {
+	.commit = rotator_commit,
 };
 
-static struct rot_limit_table rot_limit_tbl_4x12 = {
-	.ycbcr420_2p = {
-		.min_w = 32,
-		.min_h = 32,
-		.max_w = SZ_32K,
-		.max_h = SZ_32K,
-		.align = 3,
-	},
-	.rgb888 = {
-		.min_w = 8,
-		.min_h = 8,
-		.max_w = SZ_8K,
-		.max_h = SZ_8K,
-		.align = 2,
-	},
-};
+static int rotator_bind(struct device *dev, struct device *master, void *data)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+	struct exynos_drm_ipp *ipp = &rot->ipp;
 
-static struct rot_limit_table rot_limit_tbl_5250 = {
-	.ycbcr420_2p = {
-		.min_w = 32,
-		.min_h = 32,
-		.max_w = SZ_32K,
-		.max_h = SZ_32K,
-		.align = 3,
-	},
-	.rgb888 = {
-		.min_w = 8,
-		.min_h = 8,
-		.max_w = SZ_8K,
-		.max_h = SZ_8K,
-		.align = 1,
-	},
-};
+	rot->drm_dev = drm_dev;
+	drm_iommu_attach_device(drm_dev, dev);
 
-static const struct of_device_id exynos_rotator_match[] = {
-	{
-		.compatible = "samsung,exynos4210-rotator",
-		.data = &rot_limit_tbl_4210,
-	},
-	{
-		.compatible = "samsung,exynos4212-rotator",
-		.data = &rot_limit_tbl_4x12,
-	},
-	{
-		.compatible = "samsung,exynos5250-rotator",
-		.data = &rot_limit_tbl_5250,
-	},
-	{},
+	exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs,
+			   DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE,
+			   rot->formats, rot->num_formats, "rotator");
+
+	dev_info(dev, "The exynos rotator has been probed successfully\n");
+
+	return 0;
+}
+
+static void rotator_unbind(struct device *dev, struct device *master,
+			void *data)
+{
+	struct rot_context *rot = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+	struct exynos_drm_ipp *ipp = &rot->ipp;
+
+	exynos_drm_ipp_unregister(drm_dev, ipp);
+	drm_iommu_detach_device(rot->drm_dev, rot->dev);
+}
+
+static const struct component_ops rotator_component_ops = {
+	.bind	= rotator_bind,
+	.unbind = rotator_unbind,
 };
-MODULE_DEVICE_TABLE(of, exynos_rotator_match);
 
 static int rotator_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
+	struct resource	*regs_res;
 	struct rot_context *rot;
-	struct exynos_drm_ippdrv *ippdrv;
+	const struct rot_variant *variant;
+	int irq;
 	int ret;
 
-	if (!dev->of_node) {
-		dev_err(dev, "cannot find of_node.\n");
-		return -ENODEV;
-	}
-
 	rot = devm_kzalloc(dev, sizeof(*rot), GFP_KERNEL);
 	if (!rot)
 		return -ENOMEM;
 
-	rot->limit_tbl = (struct rot_limit_table *)
-				of_device_get_match_data(dev);
-	rot->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	rot->regs = devm_ioremap_resource(dev, rot->regs_res);
+	variant = of_device_get_match_data(dev);
+	rot->formats = variant->formats;
+	rot->num_formats = variant->num_formats;
+	rot->dev = dev;
+	regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	rot->regs = devm_ioremap_resource(dev, regs_res);
 	if (IS_ERR(rot->regs))
 		return PTR_ERR(rot->regs);
 
-	rot->irq = platform_get_irq(pdev, 0);
-	if (rot->irq < 0) {
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
 		dev_err(dev, "failed to get irq\n");
-		return rot->irq;
+		return irq;
 	}
 
-	ret = devm_request_threaded_irq(dev, rot->irq, NULL,
-			rotator_irq_handler, IRQF_ONESHOT, "drm_rotator", rot);
+	ret = devm_request_irq(dev, irq, rotator_irq_handler, 0, dev_name(dev),
+			       rot);
 	if (ret < 0) {
 		dev_err(dev, "failed to request irq\n");
 		return ret;
@@ -734,35 +312,19 @@
 		return PTR_ERR(rot->clock);
 	}
 
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, ROTATOR_AUTOSUSPEND_DELAY);
 	pm_runtime_enable(dev);
-
-	ippdrv = &rot->ippdrv;
-	ippdrv->dev = dev;
-	ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &rot_src_ops;
-	ippdrv->ops[EXYNOS_DRM_OPS_DST] = &rot_dst_ops;
-	ippdrv->check_property = rotator_ippdrv_check_property;
-	ippdrv->start = rotator_ippdrv_start;
-	ret = rotator_init_prop_list(ippdrv);
-	if (ret < 0) {
-		dev_err(dev, "failed to init property list.\n");
-		goto err_ippdrv_register;
-	}
-
-	DRM_DEBUG_KMS("ippdrv[%pK]\n", ippdrv);
-
 	platform_set_drvdata(pdev, rot);
 
-	ret = exynos_drm_ippdrv_register(ippdrv);
-	if (ret < 0) {
-		dev_err(dev, "failed to register drm rotator device\n");
-		goto err_ippdrv_register;
-	}
-
-	dev_info(dev, "The exynos rotator is probed successfully\n");
+	ret = component_add(dev, &rotator_component_ops);
+	if (ret)
+		goto err_component;
 
 	return 0;
 
-err_ippdrv_register:
+err_component:
+	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
 	return ret;
 }
@@ -770,45 +332,101 @@
 static int rotator_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct rot_context *rot = dev_get_drvdata(dev);
-	struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv;
 
-	exynos_drm_ippdrv_unregister(ippdrv);
-
+	component_del(dev, &rotator_component_ops);
+	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
 
 	return 0;
 }
 
 #ifdef CONFIG_PM
-static int rotator_clk_crtl(struct rot_context *rot, bool enable)
-{
-	if (enable) {
-		clk_prepare_enable(rot->clock);
-		rot->suspended = false;
-	} else {
-		clk_disable_unprepare(rot->clock);
-		rot->suspended = true;
-	}
-
-	return 0;
-}
-
 static int rotator_runtime_suspend(struct device *dev)
 {
 	struct rot_context *rot = dev_get_drvdata(dev);
 
-	return  rotator_clk_crtl(rot, false);
+	clk_disable_unprepare(rot->clock);
+	return 0;
 }
 
 static int rotator_runtime_resume(struct device *dev)
 {
 	struct rot_context *rot = dev_get_drvdata(dev);
 
-	return  rotator_clk_crtl(rot, true);
+	return clk_prepare_enable(rot->clock);
 }
 #endif
 
+static const struct drm_exynos_ipp_limit rotator_4210_rbg888_limits[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_16K }, .v = { 8, SZ_16K }) },
+	{ IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) },
+};
+
+static const struct drm_exynos_ipp_limit rotator_4412_rbg888_limits[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) },
+	{ IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) },
+};
+
+static const struct drm_exynos_ipp_limit rotator_5250_rbg888_limits[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) },
+	{ IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) },
+};
+
+static const struct drm_exynos_ipp_limit rotator_4210_yuv_limits[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_64K }, .v = { 32, SZ_64K }) },
+	{ IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) },
+};
+
+static const struct drm_exynos_ipp_limit rotator_4412_yuv_limits[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_32K }, .v = { 32, SZ_32K }) },
+	{ IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) },
+};
+
+static const struct exynos_drm_ipp_formats rotator_4210_formats[] = {
+	{ IPP_SRCDST_FORMAT(XRGB8888, rotator_4210_rbg888_limits) },
+	{ IPP_SRCDST_FORMAT(NV12, rotator_4210_yuv_limits) },
+};
+
+static const struct exynos_drm_ipp_formats rotator_4412_formats[] = {
+	{ IPP_SRCDST_FORMAT(XRGB8888, rotator_4412_rbg888_limits) },
+	{ IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) },
+};
+
+static const struct exynos_drm_ipp_formats rotator_5250_formats[] = {
+	{ IPP_SRCDST_FORMAT(XRGB8888, rotator_5250_rbg888_limits) },
+	{ IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) },
+};
+
+static const struct rot_variant rotator_4210_data = {
+	.formats = rotator_4210_formats,
+	.num_formats = ARRAY_SIZE(rotator_4210_formats),
+};
+
+static const struct rot_variant rotator_4412_data = {
+	.formats = rotator_4412_formats,
+	.num_formats = ARRAY_SIZE(rotator_4412_formats),
+};
+
+static const struct rot_variant rotator_5250_data = {
+	.formats = rotator_5250_formats,
+	.num_formats = ARRAY_SIZE(rotator_5250_formats),
+};
+
+static const struct of_device_id exynos_rotator_match[] = {
+	{
+		.compatible = "samsung,exynos4210-rotator",
+		.data = &rotator_4210_data,
+	}, {
+		.compatible = "samsung,exynos4212-rotator",
+		.data = &rotator_4412_data,
+	}, {
+		.compatible = "samsung,exynos5250-rotator",
+		.data = &rotator_5250_data,
+	}, {
+	},
+};
+MODULE_DEVICE_TABLE(of, exynos_rotator_match);
+
 static const struct dev_pm_ops rotator_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
 				pm_runtime_force_resume)
@@ -820,7 +438,7 @@
 	.probe		= rotator_probe,
 	.remove		= rotator_remove,
 	.driver		= {
-		.name	= "exynos-rot",
+		.name	= "exynos-rotator",
 		.owner	= THIS_MODULE,
 		.pm	= &rotator_pm_ops,
 		.of_match_table = exynos_rotator_match,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
new file mode 100644
index 0000000..91d4382
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
@@ -0,0 +1,694 @@
+/*
+ * Copyright (C) 2017 Samsung Electronics Co.Ltd
+ * Author:
+ *	Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundationr
+ */
+
+#include <linux/kernel.h>
+#include <linux/component.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drmP.h>
+#include <drm/exynos_drm.h>
+#include "regs-scaler.h"
+#include "exynos_drm_fb.h"
+#include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
+#include "exynos_drm_ipp.h"
+
+#define scaler_read(offset)		readl(scaler->regs + (offset))
+#define scaler_write(cfg, offset)	writel(cfg, scaler->regs + (offset))
+#define SCALER_MAX_CLK			4
+#define SCALER_AUTOSUSPEND_DELAY	2000
+
+struct scaler_data {
+	const char	*clk_name[SCALER_MAX_CLK];
+	unsigned int	num_clk;
+	const struct exynos_drm_ipp_formats *formats;
+	unsigned int	num_formats;
+};
+
+struct scaler_context {
+	struct exynos_drm_ipp		ipp;
+	struct drm_device		*drm_dev;
+	struct device			*dev;
+	void __iomem			*regs;
+	struct clk			*clock[SCALER_MAX_CLK];
+	struct exynos_drm_ipp_task	*task;
+	const struct scaler_data	*scaler_data;
+};
+
+static u32 scaler_get_format(u32 drm_fmt)
+{
+	switch (drm_fmt) {
+	case DRM_FORMAT_NV21:
+		return SCALER_YUV420_2P_UV;
+	case DRM_FORMAT_NV12:
+		return SCALER_YUV420_2P_VU;
+	case DRM_FORMAT_YUV420:
+		return SCALER_YUV420_3P;
+	case DRM_FORMAT_YUYV:
+		return SCALER_YUV422_1P_YUYV;
+	case DRM_FORMAT_UYVY:
+		return SCALER_YUV422_1P_UYVY;
+	case DRM_FORMAT_YVYU:
+		return SCALER_YUV422_1P_YVYU;
+	case DRM_FORMAT_NV61:
+		return SCALER_YUV422_2P_UV;
+	case DRM_FORMAT_NV16:
+		return SCALER_YUV422_2P_VU;
+	case DRM_FORMAT_YUV422:
+		return SCALER_YUV422_3P;
+	case DRM_FORMAT_NV42:
+		return SCALER_YUV444_2P_UV;
+	case DRM_FORMAT_NV24:
+		return SCALER_YUV444_2P_VU;
+	case DRM_FORMAT_YUV444:
+		return SCALER_YUV444_3P;
+	case DRM_FORMAT_RGB565:
+		return SCALER_RGB_565;
+	case DRM_FORMAT_XRGB1555:
+		return SCALER_ARGB1555;
+	case DRM_FORMAT_ARGB1555:
+		return SCALER_ARGB1555;
+	case DRM_FORMAT_XRGB4444:
+		return SCALER_ARGB4444;
+	case DRM_FORMAT_ARGB4444:
+		return SCALER_ARGB4444;
+	case DRM_FORMAT_XRGB8888:
+		return SCALER_ARGB8888;
+	case DRM_FORMAT_ARGB8888:
+		return SCALER_ARGB8888;
+	case DRM_FORMAT_RGBX8888:
+		return SCALER_RGBA8888;
+	case DRM_FORMAT_RGBA8888:
+		return SCALER_RGBA8888;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static inline void scaler_enable_int(struct scaler_context *scaler)
+{
+	u32 val;
+
+	val = SCALER_INT_EN_TIMEOUT |
+		SCALER_INT_EN_ILLEGAL_BLEND |
+		SCALER_INT_EN_ILLEGAL_RATIO |
+		SCALER_INT_EN_ILLEGAL_DST_HEIGHT |
+		SCALER_INT_EN_ILLEGAL_DST_WIDTH |
+		SCALER_INT_EN_ILLEGAL_DST_V_POS |
+		SCALER_INT_EN_ILLEGAL_DST_H_POS |
+		SCALER_INT_EN_ILLEGAL_DST_C_SPAN |
+		SCALER_INT_EN_ILLEGAL_DST_Y_SPAN |
+		SCALER_INT_EN_ILLEGAL_DST_CR_BASE |
+		SCALER_INT_EN_ILLEGAL_DST_CB_BASE |
+		SCALER_INT_EN_ILLEGAL_DST_Y_BASE |
+		SCALER_INT_EN_ILLEGAL_DST_COLOR |
+		SCALER_INT_EN_ILLEGAL_SRC_HEIGHT |
+		SCALER_INT_EN_ILLEGAL_SRC_WIDTH |
+		SCALER_INT_EN_ILLEGAL_SRC_CV_POS |
+		SCALER_INT_EN_ILLEGAL_SRC_CH_POS |
+		SCALER_INT_EN_ILLEGAL_SRC_YV_POS |
+		SCALER_INT_EN_ILLEGAL_SRC_YH_POS |
+		SCALER_INT_EN_ILLEGAL_DST_SPAN |
+		SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN |
+		SCALER_INT_EN_ILLEGAL_SRC_CR_BASE |
+		SCALER_INT_EN_ILLEGAL_SRC_CB_BASE |
+		SCALER_INT_EN_ILLEGAL_SRC_Y_BASE |
+		SCALER_INT_EN_ILLEGAL_SRC_COLOR |
+		SCALER_INT_EN_FRAME_END;
+	scaler_write(val, SCALER_INT_EN);
+}
+
+static inline void scaler_set_src_fmt(struct scaler_context *scaler,
+	u32 src_fmt)
+{
+	u32 val;
+
+	val = SCALER_SRC_CFG_SET_COLOR_FORMAT(src_fmt);
+	scaler_write(val, SCALER_SRC_CFG);
+}
+
+static inline void scaler_set_src_base(struct scaler_context *scaler,
+	struct exynos_drm_ipp_buffer *src_buf)
+{
+	static unsigned int bases[] = {
+		SCALER_SRC_Y_BASE,
+		SCALER_SRC_CB_BASE,
+		SCALER_SRC_CR_BASE,
+	};
+	int i;
+
+	for (i = 0; i < src_buf->format->num_planes; ++i)
+		scaler_write(src_buf->dma_addr[i], bases[i]);
+}
+
+static inline void scaler_set_src_span(struct scaler_context *scaler,
+	struct exynos_drm_ipp_buffer *src_buf)
+{
+	u32 val;
+
+	val = SCALER_SRC_SPAN_SET_Y_SPAN(src_buf->buf.pitch[0] /
+		src_buf->format->cpp[0]);
+
+	if (src_buf->format->num_planes > 1)
+		val |= SCALER_SRC_SPAN_SET_C_SPAN(src_buf->buf.pitch[1]);
+
+	scaler_write(val, SCALER_SRC_SPAN);
+}
+
+static inline void scaler_set_src_luma_pos(struct scaler_context *scaler,
+	struct drm_exynos_ipp_task_rect *src_pos)
+{
+	u32 val;
+
+	val = SCALER_SRC_Y_POS_SET_YH_POS(src_pos->x << 2);
+	val |=  SCALER_SRC_Y_POS_SET_YV_POS(src_pos->y << 2);
+	scaler_write(val, SCALER_SRC_Y_POS);
+	scaler_write(val, SCALER_SRC_C_POS); /* ATTENTION! */
+}
+
+static inline void scaler_set_src_wh(struct scaler_context *scaler,
+	struct drm_exynos_ipp_task_rect *src_pos)
+{
+	u32 val;
+
+	val = SCALER_SRC_WH_SET_WIDTH(src_pos->w);
+	val |= SCALER_SRC_WH_SET_HEIGHT(src_pos->h);
+	scaler_write(val, SCALER_SRC_WH);
+}
+
+static inline void scaler_set_dst_fmt(struct scaler_context *scaler,
+	u32 dst_fmt)
+{
+	u32 val;
+
+	val = SCALER_DST_CFG_SET_COLOR_FORMAT(dst_fmt);
+	scaler_write(val, SCALER_DST_CFG);
+}
+
+static inline void scaler_set_dst_base(struct scaler_context *scaler,
+	struct exynos_drm_ipp_buffer *dst_buf)
+{
+	static unsigned int bases[] = {
+		SCALER_DST_Y_BASE,
+		SCALER_DST_CB_BASE,
+		SCALER_DST_CR_BASE,
+	};
+	int i;
+
+	for (i = 0; i < dst_buf->format->num_planes; ++i)
+		scaler_write(dst_buf->dma_addr[i], bases[i]);
+}
+
+static inline void scaler_set_dst_span(struct scaler_context *scaler,
+	struct exynos_drm_ipp_buffer *dst_buf)
+{
+	u32 val;
+
+	val = SCALER_DST_SPAN_SET_Y_SPAN(dst_buf->buf.pitch[0] /
+		dst_buf->format->cpp[0]);
+
+	if (dst_buf->format->num_planes > 1)
+		val |= SCALER_DST_SPAN_SET_C_SPAN(dst_buf->buf.pitch[1]);
+
+	scaler_write(val, SCALER_DST_SPAN);
+}
+
+static inline void scaler_set_dst_luma_pos(struct scaler_context *scaler,
+	struct drm_exynos_ipp_task_rect *dst_pos)
+{
+	u32 val;
+
+	val = SCALER_DST_WH_SET_WIDTH(dst_pos->w);
+	val |= SCALER_DST_WH_SET_HEIGHT(dst_pos->h);
+	scaler_write(val, SCALER_DST_WH);
+}
+
+static inline void scaler_set_dst_wh(struct scaler_context *scaler,
+	struct drm_exynos_ipp_task_rect *dst_pos)
+{
+	u32 val;
+
+	val = SCALER_DST_POS_SET_H_POS(dst_pos->x);
+	val |= SCALER_DST_POS_SET_V_POS(dst_pos->y);
+	scaler_write(val, SCALER_DST_POS);
+}
+
+static inline void scaler_set_hv_ratio(struct scaler_context *scaler,
+	unsigned int rotation,
+	struct drm_exynos_ipp_task_rect *src_pos,
+	struct drm_exynos_ipp_task_rect *dst_pos)
+{
+	u32 val, h_ratio, v_ratio;
+
+	if (drm_rotation_90_or_270(rotation)) {
+		h_ratio = (src_pos->h << 16) / dst_pos->w;
+		v_ratio = (src_pos->w << 16) / dst_pos->h;
+	} else {
+		h_ratio = (src_pos->w << 16) / dst_pos->w;
+		v_ratio = (src_pos->h << 16) / dst_pos->h;
+	}
+
+	val = SCALER_H_RATIO_SET(h_ratio);
+	scaler_write(val, SCALER_H_RATIO);
+
+	val = SCALER_V_RATIO_SET(v_ratio);
+	scaler_write(val, SCALER_V_RATIO);
+}
+
+static inline void scaler_set_rotation(struct scaler_context *scaler,
+	unsigned int rotation)
+{
+	u32 val = 0;
+
+	if (rotation & DRM_MODE_ROTATE_90)
+		val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_90);
+	else if (rotation & DRM_MODE_ROTATE_180)
+		val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_180);
+	else if (rotation & DRM_MODE_ROTATE_270)
+		val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_270);
+	if (rotation & DRM_MODE_REFLECT_X)
+		val |= SCALER_ROT_CFG_FLIP_X_EN;
+	if (rotation & DRM_MODE_REFLECT_Y)
+		val |= SCALER_ROT_CFG_FLIP_Y_EN;
+	scaler_write(val, SCALER_ROT_CFG);
+}
+
+static inline void scaler_set_csc(struct scaler_context *scaler,
+	const struct drm_format_info *fmt)
+{
+	static const u32 csc_mtx[2][3][3] = {
+		{ /* YCbCr to RGB */
+			{0x254, 0x000, 0x331},
+			{0x254, 0xf38, 0xe60},
+			{0x254, 0x409, 0x000},
+		},
+		{ /* RGB to YCbCr */
+			{0x084, 0x102, 0x032},
+			{0xfb4, 0xf6b, 0x0e1},
+			{0x0e1, 0xf44, 0xfdc},
+		},
+	};
+	int i, j, dir;
+
+	switch (fmt->format) {
+	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_XRGB1555:
+	case DRM_FORMAT_ARGB1555:
+	case DRM_FORMAT_XRGB4444:
+	case DRM_FORMAT_ARGB4444:
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_RGBX8888:
+	case DRM_FORMAT_RGBA8888:
+		dir = 1;
+		break;
+	default:
+		dir = 0;
+	}
+
+	for (i = 0; i < 3; i++)
+		for (j = 0; j < 3; j++)
+			scaler_write(csc_mtx[dir][i][j], SCALER_CSC_COEF(j, i));
+}
+
+static inline void scaler_set_timer(struct scaler_context *scaler,
+	unsigned int timer, unsigned int divider)
+{
+	u32 val;
+
+	val = SCALER_TIMEOUT_CTRL_TIMER_ENABLE;
+	val |= SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(timer);
+	val |= SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(divider);
+	scaler_write(val, SCALER_TIMEOUT_CTRL);
+}
+
+static inline void scaler_start_hw(struct scaler_context *scaler)
+{
+	scaler_write(SCALER_CFG_START_CMD, SCALER_CFG);
+}
+
+static int scaler_commit(struct exynos_drm_ipp *ipp,
+			  struct exynos_drm_ipp_task *task)
+{
+	struct scaler_context *scaler =
+			container_of(ipp, struct scaler_context, ipp);
+
+	u32 src_fmt = scaler_get_format(task->src.buf.fourcc);
+	struct drm_exynos_ipp_task_rect *src_pos = &task->src.rect;
+
+	u32 dst_fmt = scaler_get_format(task->dst.buf.fourcc);
+	struct drm_exynos_ipp_task_rect *dst_pos = &task->dst.rect;
+
+	scaler->task = task;
+
+	pm_runtime_get_sync(scaler->dev);
+
+	scaler_set_src_fmt(scaler, src_fmt);
+	scaler_set_src_base(scaler, &task->src);
+	scaler_set_src_span(scaler, &task->src);
+	scaler_set_src_luma_pos(scaler, src_pos);
+	scaler_set_src_wh(scaler, src_pos);
+
+	scaler_set_dst_fmt(scaler, dst_fmt);
+	scaler_set_dst_base(scaler, &task->dst);
+	scaler_set_dst_span(scaler, &task->dst);
+	scaler_set_dst_luma_pos(scaler, dst_pos);
+	scaler_set_dst_wh(scaler, dst_pos);
+
+	scaler_set_hv_ratio(scaler, task->transform.rotation, src_pos, dst_pos);
+	scaler_set_rotation(scaler, task->transform.rotation);
+
+	scaler_set_csc(scaler, task->src.format);
+
+	scaler_set_timer(scaler, 0xffff, 0xf);
+
+	scaler_enable_int(scaler);
+	scaler_start_hw(scaler);
+
+	return 0;
+}
+
+static struct exynos_drm_ipp_funcs ipp_funcs = {
+	.commit = scaler_commit,
+};
+
+static inline void scaler_disable_int(struct scaler_context *scaler)
+{
+	scaler_write(0, SCALER_INT_EN);
+}
+
+static inline u32 scaler_get_int_status(struct scaler_context *scaler)
+{
+	return scaler_read(SCALER_INT_STATUS);
+}
+
+static inline int scaler_task_done(u32 val)
+{
+	return val & SCALER_INT_STATUS_FRAME_END ? 0 : -EINVAL;
+}
+
+static irqreturn_t scaler_irq_handler(int irq, void *arg)
+{
+	struct scaler_context *scaler = arg;
+
+	u32 val = scaler_get_int_status(scaler);
+
+	scaler_disable_int(scaler);
+
+	if (scaler->task) {
+		struct exynos_drm_ipp_task *task = scaler->task;
+
+		scaler->task = NULL;
+		pm_runtime_mark_last_busy(scaler->dev);
+		pm_runtime_put_autosuspend(scaler->dev);
+		exynos_drm_ipp_task_done(task, scaler_task_done(val));
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int scaler_bind(struct device *dev, struct device *master, void *data)
+{
+	struct scaler_context *scaler = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+	struct exynos_drm_ipp *ipp = &scaler->ipp;
+
+	scaler->drm_dev = drm_dev;
+	drm_iommu_attach_device(drm_dev, dev);
+
+	exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs,
+			DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
+			DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT,
+			scaler->scaler_data->formats,
+			scaler->scaler_data->num_formats, "scaler");
+
+	dev_info(dev, "The exynos scaler has been probed successfully\n");
+
+	return 0;
+}
+
+static void scaler_unbind(struct device *dev, struct device *master,
+			void *data)
+{
+	struct scaler_context *scaler = dev_get_drvdata(dev);
+	struct drm_device *drm_dev = data;
+	struct exynos_drm_ipp *ipp = &scaler->ipp;
+
+	exynos_drm_ipp_unregister(drm_dev, ipp);
+	drm_iommu_detach_device(scaler->drm_dev, scaler->dev);
+}
+
+static const struct component_ops scaler_component_ops = {
+	.bind	= scaler_bind,
+	.unbind = scaler_unbind,
+};
+
+static int scaler_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource	*regs_res;
+	struct scaler_context *scaler;
+	int irq;
+	int ret, i;
+
+	scaler = devm_kzalloc(dev, sizeof(*scaler), GFP_KERNEL);
+	if (!scaler)
+		return -ENOMEM;
+
+	scaler->scaler_data =
+		(struct scaler_data *)of_device_get_match_data(dev);
+
+	scaler->dev = dev;
+	regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	scaler->regs = devm_ioremap_resource(dev, regs_res);
+	if (IS_ERR(scaler->regs))
+		return PTR_ERR(scaler->regs);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "failed to get irq\n");
+		return irq;
+	}
+
+	ret = devm_request_threaded_irq(dev, irq, NULL,	scaler_irq_handler,
+					IRQF_ONESHOT, "drm_scaler", scaler);
+	if (ret < 0) {
+		dev_err(dev, "failed to request irq\n");
+		return ret;
+	}
+
+	for (i = 0; i < scaler->scaler_data->num_clk; ++i) {
+		scaler->clock[i] = devm_clk_get(dev,
+					      scaler->scaler_data->clk_name[i]);
+		if (IS_ERR(scaler->clock[i])) {
+			dev_err(dev, "failed to get clock\n");
+			return PTR_ERR(scaler->clock[i]);
+		}
+	}
+
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, SCALER_AUTOSUSPEND_DELAY);
+	pm_runtime_enable(dev);
+	platform_set_drvdata(pdev, scaler);
+
+	ret = component_add(dev, &scaler_component_ops);
+	if (ret)
+		goto err_ippdrv_register;
+
+	return 0;
+
+err_ippdrv_register:
+	pm_runtime_dont_use_autosuspend(dev);
+	pm_runtime_disable(dev);
+	return ret;
+}
+
+static int scaler_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+
+	component_del(dev, &scaler_component_ops);
+	pm_runtime_dont_use_autosuspend(dev);
+	pm_runtime_disable(dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int clk_disable_unprepare_wrapper(struct clk *clk)
+{
+	clk_disable_unprepare(clk);
+
+	return 0;
+}
+
+static int scaler_clk_ctrl(struct scaler_context *scaler, bool enable)
+{
+	int (*clk_fun)(struct clk *clk), i;
+
+	clk_fun = enable ? clk_prepare_enable : clk_disable_unprepare_wrapper;
+
+	for (i = 0; i < scaler->scaler_data->num_clk; ++i)
+		clk_fun(scaler->clock[i]);
+
+	return 0;
+}
+
+static int scaler_runtime_suspend(struct device *dev)
+{
+	struct scaler_context *scaler = dev_get_drvdata(dev);
+
+	return  scaler_clk_ctrl(scaler, false);
+}
+
+static int scaler_runtime_resume(struct device *dev)
+{
+	struct scaler_context *scaler = dev_get_drvdata(dev);
+
+	return  scaler_clk_ctrl(scaler, true);
+}
+#endif
+
+static const struct dev_pm_ops scaler_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+	SET_RUNTIME_PM_OPS(scaler_runtime_suspend, scaler_runtime_resume, NULL)
+};
+
+static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_hv_limits[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) },
+	{ IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) },
+	{ IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 },
+			  .v = { 65536 * 1 / 4, 65536 * 16 }) },
+};
+
+static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_h_limits[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) },
+	{ IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 1) },
+	{ IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 },
+			  .v = { 65536 * 1 / 4, 65536 * 16 }) },
+};
+
+static const struct drm_exynos_ipp_limit scaler_5420_one_pixel_limits[] = {
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) },
+	{ IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 },
+			  .v = { 65536 * 1 / 4, 65536 * 16 }) },
+};
+
+static const struct exynos_drm_ipp_formats exynos5420_formats[] = {
+	/* SCALER_YUV420_2P_UV */
+	{ IPP_SRCDST_FORMAT(NV21, scaler_5420_two_pixel_hv_limits) },
+
+	/* SCALER_YUV420_2P_VU */
+	{ IPP_SRCDST_FORMAT(NV12, scaler_5420_two_pixel_hv_limits) },
+
+	/* SCALER_YUV420_3P */
+	{ IPP_SRCDST_FORMAT(YUV420, scaler_5420_two_pixel_hv_limits) },
+
+	/* SCALER_YUV422_1P_YUYV */
+	{ IPP_SRCDST_FORMAT(YUYV, scaler_5420_two_pixel_h_limits) },
+
+	/* SCALER_YUV422_1P_UYVY */
+	{ IPP_SRCDST_FORMAT(UYVY, scaler_5420_two_pixel_h_limits) },
+
+	/* SCALER_YUV422_1P_YVYU */
+	{ IPP_SRCDST_FORMAT(YVYU, scaler_5420_two_pixel_h_limits) },
+
+	/* SCALER_YUV422_2P_UV */
+	{ IPP_SRCDST_FORMAT(NV61, scaler_5420_two_pixel_h_limits) },
+
+	/* SCALER_YUV422_2P_VU */
+	{ IPP_SRCDST_FORMAT(NV16, scaler_5420_two_pixel_h_limits) },
+
+	/* SCALER_YUV422_3P */
+	{ IPP_SRCDST_FORMAT(YUV422, scaler_5420_two_pixel_h_limits) },
+
+	/* SCALER_YUV444_2P_UV */
+	{ IPP_SRCDST_FORMAT(NV42, scaler_5420_one_pixel_limits) },
+
+	/* SCALER_YUV444_2P_VU */
+	{ IPP_SRCDST_FORMAT(NV24, scaler_5420_one_pixel_limits) },
+
+	/* SCALER_YUV444_3P */
+	{ IPP_SRCDST_FORMAT(YUV444, scaler_5420_one_pixel_limits) },
+
+	/* SCALER_RGB_565 */
+	{ IPP_SRCDST_FORMAT(RGB565, scaler_5420_one_pixel_limits) },
+
+	/* SCALER_ARGB1555 */
+	{ IPP_SRCDST_FORMAT(XRGB1555, scaler_5420_one_pixel_limits) },
+
+	/* SCALER_ARGB1555 */
+	{ IPP_SRCDST_FORMAT(ARGB1555, scaler_5420_one_pixel_limits) },
+
+	/* SCALER_ARGB4444 */
+	{ IPP_SRCDST_FORMAT(XRGB4444, scaler_5420_one_pixel_limits) },
+
+	/* SCALER_ARGB4444 */
+	{ IPP_SRCDST_FORMAT(ARGB4444, scaler_5420_one_pixel_limits) },
+
+	/* SCALER_ARGB8888 */
+	{ IPP_SRCDST_FORMAT(XRGB8888, scaler_5420_one_pixel_limits) },
+
+	/* SCALER_ARGB8888 */
+	{ IPP_SRCDST_FORMAT(ARGB8888, scaler_5420_one_pixel_limits) },
+
+	/* SCALER_RGBA8888 */
+	{ IPP_SRCDST_FORMAT(RGBX8888, scaler_5420_one_pixel_limits) },
+
+	/* SCALER_RGBA8888 */
+	{ IPP_SRCDST_FORMAT(RGBA8888, scaler_5420_one_pixel_limits) },
+};
+
+static const struct scaler_data exynos5420_data = {
+	.clk_name	= {"mscl"},
+	.num_clk	= 1,
+	.formats	= exynos5420_formats,
+	.num_formats	= ARRAY_SIZE(exynos5420_formats),
+};
+
+static const struct scaler_data exynos5433_data = {
+	.clk_name	= {"pclk", "aclk", "aclk_xiu"},
+	.num_clk	= 3,
+	.formats	= exynos5420_formats, /* intentional */
+	.num_formats	= ARRAY_SIZE(exynos5420_formats),
+};
+
+static const struct of_device_id exynos_scaler_match[] = {
+	{
+		.compatible = "samsung,exynos5420-scaler",
+		.data = &exynos5420_data,
+	}, {
+		.compatible = "samsung,exynos5433-scaler",
+		.data = &exynos5433_data,
+	}, {
+	},
+};
+MODULE_DEVICE_TABLE(of, exynos_scaler_match);
+
+struct platform_driver scaler_driver = {
+	.probe		= scaler_probe,
+	.remove		= scaler_remove,
+	.driver		= {
+		.name	= "exynos-scaler",
+		.owner	= THIS_MODULE,
+		.pm	= &scaler_pm_ops,
+		.of_match_table = exynos_scaler_match,
+	},
+};
diff --git a/drivers/gpu/drm/exynos/regs-scaler.h b/drivers/gpu/drm/exynos/regs-scaler.h
new file mode 100644
index 0000000..fc7ccad
--- /dev/null
+++ b/drivers/gpu/drm/exynos/regs-scaler.h
@@ -0,0 +1,426 @@
+/* drivers/gpu/drm/exynos/regs-scaler.h
+ *
+ * Copyright (c) 2017 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com/
+ * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ *
+ * Register definition file for Samsung scaler driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef EXYNOS_REGS_SCALER_H
+#define EXYNOS_REGS_SCALER_H
+
+/* Register part */
+
+/* Global setting */
+#define SCALER_STATUS			0x0	/* no shadow */
+#define SCALER_CFG			0x4
+
+/* Interrupt */
+#define SCALER_INT_EN			0x8	/* no shadow */
+#define SCALER_INT_STATUS		0xc	/* no shadow */
+
+/* SRC */
+#define SCALER_SRC_CFG			0x10
+#define SCALER_SRC_Y_BASE		0x14
+#define SCALER_SRC_CB_BASE		0x18
+#define SCALER_SRC_CR_BASE		0x294
+#define SCALER_SRC_SPAN			0x1c
+#define SCALER_SRC_Y_POS		0x20
+#define SCALER_SRC_WH			0x24
+#define SCALER_SRC_C_POS		0x28
+
+/* DST */
+#define SCALER_DST_CFG			0x30
+#define SCALER_DST_Y_BASE		0x34
+#define SCALER_DST_CB_BASE		0x38
+#define SCALER_DST_CR_BASE		0x298
+#define SCALER_DST_SPAN			0x3c
+#define SCALER_DST_WH			0x40
+#define SCALER_DST_POS			0x44
+
+/* Ratio */
+#define SCALER_H_RATIO			0x50
+#define SCALER_V_RATIO			0x54
+
+/* Rotation */
+#define SCALER_ROT_CFG			0x58
+
+/* Coefficient */
+/*
+ * YHCOEF_{x}{A|B|C|D}			CHCOEF_{x}{A|B|C|D}
+ *
+ *	A	B	C	D	A	B	C	D
+ * 0	60	64	68	6c	140	144	148	14c
+ * 1	70	74	78	7c	150	154	158	15c
+ * 2	80	84	88	8c	160	164	168	16c
+ * 3	90	94	98	9c	170	174	178	17c
+ * 4	a0	a4	a8	ac	180	184	188	18c
+ * 5	b0	b4	b8	bc	190	194	198	19c
+ * 6	c0	c4	c8	cc	1a0	1a4	1a8	1ac
+ * 7	d0	d4	d8	dc	1b0	1b4	1b8	1bc
+ * 8	e0	e4	e8	ec	1c0	1c4	1c8	1cc
+ *
+ *
+ * YVCOEF_{x}{A|B}			CVCOEF_{x}{A|B}
+ *
+ *	A	B			A	B
+ * 0	f0	f4			1d0	1d4
+ * 1	f8	fc			1d8	1dc
+ * 2	100	104			1e0	1e4
+ * 3	108	10c			1e8	1ec
+ * 4	110	114			1f0	1f4
+ * 5	118	11c			1f8	1fc
+ * 6	120	124			200	204
+ * 7	128	12c			208	20c
+ * 8	130	134			210	214
+ */
+#define _SCALER_HCOEF_DELTA(r, c)	((r) * 0x10 + (c) * 0x4)
+#define _SCALER_VCOEF_DELTA(r, c)	((r) * 0x8 + (c) * 0x4)
+
+#define SCALER_YHCOEF(r, c)		(0x60 + _SCALER_HCOEF_DELTA((r), (c)))
+#define SCALER_YVCOEF(r, c)		(0xf0 + _SCALER_VCOEF_DELTA((r), (c)))
+#define SCALER_CHCOEF(r, c)		(0x140 + _SCALER_HCOEF_DELTA((r), (c)))
+#define SCALER_CVCOEF(r, c)		(0x1d0 + _SCALER_VCOEF_DELTA((r), (c)))
+
+
+/* Color Space Conversion */
+#define SCALER_CSC_COEF(x, y)		(0x220 + (y) * 0xc + (x) * 0x4)
+
+/* Dithering */
+#define SCALER_DITH_CFG			0x250
+
+/* Version Number */
+#define SCALER_VER			0x260	/* no shadow */
+
+/* Cycle count and Timeout */
+#define SCALER_CYCLE_COUNT		0x278	/* no shadow */
+#define SCALER_TIMEOUT_CTRL		0x2c0	/* no shadow */
+#define SCALER_TIMEOUT_CNT		0x2c4	/* no shadow */
+
+/* Blending */
+#define SCALER_SRC_BLEND_COLOR		0x280
+#define SCALER_SRC_BLEND_ALPHA		0x284
+#define SCALER_DST_BLEND_COLOR		0x288
+#define SCALER_DST_BLEND_ALPHA		0x28c
+
+/* Color Fill */
+#define SCALER_FILL_COLOR		0x290
+
+/* Multiple Command Queue */
+#define SCALER_ADDR_Q_CONFIG		0x2a0	/* no shadow */
+#define SCALER_SRC_ADDR_Q_STATUS	0x2a4	/* no shadow */
+#define SCALER_SRC_ADDR_Q		0x2a8	/* no shadow */
+
+/* CRC */
+#define SCALER_CRC_COLOR00_10		0x2b0	/* no shadow */
+#define SCALER_CRC_COLOR20_30		0x2b4	/* no shadow */
+#define SCALER_CRC_COLOR01_11		0x2b8	/* no shadow */
+#define SCALER_CRC_COLOR21_31		0x2bc	/* no shadow */
+
+/* Shadow Registers */
+#define SCALER_SHADOW_OFFSET		0x1000
+
+
+/* Bit definition part */
+#define SCALER_MASK(hi_b, lo_b)		((1 << ((hi_b) - (lo_b) + 1)) - 1)
+#define SCALER_GET(reg, hi_b, lo_b)	\
+	(((reg) >> (lo_b)) & SCALER_MASK(hi_b, lo_b))
+#define SCALER_SET(val, hi_b, lo_b) \
+	(((val) & SCALER_MASK(hi_b, lo_b)) << lo_b)
+
+/* SCALER_STATUS */
+#define SCALER_STATUS_SCALER_RUNNING		(1 << 1)
+#define SCALER_STATUS_SCALER_READY_CLK_DOWN	(1 << 0)
+
+/* SCALER_CFG */
+#define SCALER_CFG_FILL_EN			(1 << 24)
+#define SCALER_CFG_BLEND_COLOR_DIVIDE_ALPHA_EN	(1 << 17)
+#define SCALER_CFG_BLEND_EN			(1 << 16)
+#define SCALER_CFG_CSC_Y_OFFSET_SRC_EN		(1 << 10)
+#define SCALER_CFG_CSC_Y_OFFSET_DST_EN		(1 << 9)
+#define SCALER_CFG_16_BURST_MODE		(1 << 8)
+#define SCALER_CFG_SOFT_RESET			(1 << 1)
+#define SCALER_CFG_START_CMD			(1 << 0)
+
+/* SCALER_INT_EN */
+#define SCALER_INT_EN_TIMEOUT			(1 << 31)
+#define SCALER_INT_EN_ILLEGAL_BLEND		(1 << 24)
+#define SCALER_INT_EN_ILLEGAL_RATIO		(1 << 23)
+#define SCALER_INT_EN_ILLEGAL_DST_HEIGHT	(1 << 22)
+#define SCALER_INT_EN_ILLEGAL_DST_WIDTH		(1 << 21)
+#define SCALER_INT_EN_ILLEGAL_DST_V_POS		(1 << 20)
+#define SCALER_INT_EN_ILLEGAL_DST_H_POS		(1 << 19)
+#define SCALER_INT_EN_ILLEGAL_DST_C_SPAN	(1 << 18)
+#define SCALER_INT_EN_ILLEGAL_DST_Y_SPAN	(1 << 17)
+#define SCALER_INT_EN_ILLEGAL_DST_CR_BASE	(1 << 16)
+#define SCALER_INT_EN_ILLEGAL_DST_CB_BASE	(1 << 15)
+#define SCALER_INT_EN_ILLEGAL_DST_Y_BASE	(1 << 14)
+#define SCALER_INT_EN_ILLEGAL_DST_COLOR		(1 << 13)
+#define SCALER_INT_EN_ILLEGAL_SRC_HEIGHT	(1 << 12)
+#define SCALER_INT_EN_ILLEGAL_SRC_WIDTH		(1 << 11)
+#define SCALER_INT_EN_ILLEGAL_SRC_CV_POS	(1 << 10)
+#define SCALER_INT_EN_ILLEGAL_SRC_CH_POS	(1 << 9)
+#define SCALER_INT_EN_ILLEGAL_SRC_YV_POS	(1 << 8)
+#define SCALER_INT_EN_ILLEGAL_SRC_YH_POS	(1 << 7)
+#define SCALER_INT_EN_ILLEGAL_DST_SPAN		(1 << 6)
+#define SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN	(1 << 5)
+#define SCALER_INT_EN_ILLEGAL_SRC_CR_BASE	(1 << 4)
+#define SCALER_INT_EN_ILLEGAL_SRC_CB_BASE	(1 << 3)
+#define SCALER_INT_EN_ILLEGAL_SRC_Y_BASE	(1 << 2)
+#define SCALER_INT_EN_ILLEGAL_SRC_COLOR		(1 << 1)
+#define SCALER_INT_EN_FRAME_END			(1 << 0)
+
+/* SCALER_INT_STATUS */
+#define SCALER_INT_STATUS_TIMEOUT		(1 << 31)
+#define SCALER_INT_STATUS_ILLEGAL_BLEND		(1 << 24)
+#define SCALER_INT_STATUS_ILLEGAL_RATIO		(1 << 23)
+#define SCALER_INT_STATUS_ILLEGAL_DST_HEIGHT	(1 << 22)
+#define SCALER_INT_STATUS_ILLEGAL_DST_WIDTH	(1 << 21)
+#define SCALER_INT_STATUS_ILLEGAL_DST_V_POS	(1 << 20)
+#define SCALER_INT_STATUS_ILLEGAL_DST_H_POS	(1 << 19)
+#define SCALER_INT_STATUS_ILLEGAL_DST_C_SPAN	(1 << 18)
+#define SCALER_INT_STATUS_ILLEGAL_DST_Y_SPAN	(1 << 17)
+#define SCALER_INT_STATUS_ILLEGAL_DST_CR_BASE	(1 << 16)
+#define SCALER_INT_STATUS_ILLEGAL_DST_CB_BASE	(1 << 15)
+#define SCALER_INT_STATUS_ILLEGAL_DST_Y_BASE	(1 << 14)
+#define SCALER_INT_STATUS_ILLEGAL_DST_COLOR	(1 << 13)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_HEIGHT	(1 << 12)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_WIDTH	(1 << 11)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_CV_POS	(1 << 10)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_CH_POS	(1 << 9)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_YV_POS	(1 << 8)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_YH_POS	(1 << 7)
+#define SCALER_INT_STATUS_ILLEGAL_DST_SPAN	(1 << 6)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_SPAN	(1 << 5)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_CR_BASE	(1 << 4)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_CB_BASE	(1 << 3)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_BASE	(1 << 2)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_COLOR	(1 << 1)
+#define SCALER_INT_STATUS_FRAME_END		(1 << 0)
+
+/* SCALER_SRC_CFG */
+#define SCALER_SRC_CFG_TILE_EN			(1 << 10)
+#define SCALER_SRC_CFG_GET_BYTE_SWAP(r)		SCALER_GET(r, 6, 5)
+#define SCALER_SRC_CFG_SET_BYTE_SWAP(v)		SCALER_SET(v, 6, 5)
+#define SCALER_SRC_CFG_GET_COLOR_FORMAT(r)	SCALER_GET(r, 4, 0)
+#define SCALER_SRC_CFG_SET_COLOR_FORMAT(v)	SCALER_SET(v, 4, 0)
+#define SCALER_YUV420_2P_UV			0
+#define SCALER_YUV422_2P_UV			2
+#define SCALER_YUV444_2P_UV			3
+#define SCALER_RGB_565				4
+#define SCALER_ARGB1555				5
+#define SCALER_ARGB8888				6
+#define SCALER_ARGB8888_PRE			7
+#define SCALER_YUV422_1P_YVYU			9
+#define SCALER_YUV422_1P_YUYV			10
+#define SCALER_YUV422_1P_UYVY			11
+#define SCALER_ARGB4444				12
+#define SCALER_L8A8				13
+#define SCALER_RGBA8888				14
+#define SCALER_L8				15
+#define SCALER_YUV420_2P_VU			16
+#define SCALER_YUV422_2P_VU			18
+#define SCALER_YUV444_2P_VU			19
+#define SCALER_YUV420_3P			20
+#define SCALER_YUV422_3P			22
+#define SCALER_YUV444_3P			23
+
+/* SCALER_SRC_SPAN */
+#define SCALER_SRC_SPAN_GET_C_SPAN(r)		SCALER_GET(r, 29, 16)
+#define SCALER_SRC_SPAN_SET_C_SPAN(v)		SCALER_SET(v, 29, 16)
+#define SCALER_SRC_SPAN_GET_Y_SPAN(r)		SCALER_GET(r, 13, 0)
+#define SCALER_SRC_SPAN_SET_Y_SPAN(v)		SCALER_SET(v, 13, 0)
+
+/* SCALER_SRC_Y_POS */
+#define SCALER_SRC_Y_POS_GET_YH_POS(r)		SCALER_GET(r, 31, 16)
+#define SCALER_SRC_Y_POS_SET_YH_POS(v)		SCALER_SET(v, 31, 16)
+#define SCALER_SRC_Y_POS_GET_YV_POS(r)		SCALER_GET(r, 15, 0)
+#define SCALER_SRC_Y_POS_SET_YV_POS(v)		SCALER_SET(v, 15, 0)
+
+/* SCALER_SRC_WH */
+#define SCALER_SRC_WH_GET_WIDTH(r)		SCALER_GET(r, 29, 16)
+#define SCALER_SRC_WH_SET_WIDTH(v)		SCALER_SET(v, 29, 16)
+#define SCALER_SRC_WH_GET_HEIGHT(r)		SCALER_GET(r, 13, 0)
+#define SCALER_SRC_WH_SET_HEIGHT(v)		SCALER_SET(v, 13, 0)
+
+/* SCALER_SRC_C_POS */
+#define SCALER_SRC_C_POS_GET_CH_POS(r)		SCALER_GET(r, 31, 16)
+#define SCALER_SRC_C_POS_SET_CH_POS(v)		SCALER_SET(v, 31, 16)
+#define SCALER_SRC_C_POS_GET_CV_POS(r)		SCALER_GET(r, 15, 0)
+#define SCALER_SRC_C_POS_SET_CV_POS(v)		SCALER_SET(v, 15, 0)
+
+/* SCALER_DST_CFG */
+#define SCALER_DST_CFG_GET_BYTE_SWAP(r)		SCALER_GET(r, 6, 5)
+#define SCALER_DST_CFG_SET_BYTE_SWAP(v)		SCALER_SET(v, 6, 5)
+#define SCALER_DST_CFG_GET_COLOR_FORMAT(r)	SCALER_GET(r, 4, 0)
+#define SCALER_DST_CFG_SET_COLOR_FORMAT(v)	SCALER_SET(v, 4, 0)
+
+/* SCALER_DST_SPAN */
+#define SCALER_DST_SPAN_GET_C_SPAN(r)		SCALER_GET(r, 29, 16)
+#define SCALER_DST_SPAN_SET_C_SPAN(v)		SCALER_SET(v, 29, 16)
+#define SCALER_DST_SPAN_GET_Y_SPAN(r)		SCALER_GET(r, 13, 0)
+#define SCALER_DST_SPAN_SET_Y_SPAN(v)		SCALER_SET(v, 13, 0)
+
+/* SCALER_DST_WH */
+#define SCALER_DST_WH_GET_WIDTH(r)		SCALER_GET(r, 29, 16)
+#define SCALER_DST_WH_SET_WIDTH(v)		SCALER_SET(v, 29, 16)
+#define SCALER_DST_WH_GET_HEIGHT(r)		SCALER_GET(r, 13, 0)
+#define SCALER_DST_WH_SET_HEIGHT(v)		SCALER_SET(v, 13, 0)
+
+/* SCALER_DST_POS */
+#define SCALER_DST_POS_GET_H_POS(r)		SCALER_GET(r, 29, 16)
+#define SCALER_DST_POS_SET_H_POS(v)		SCALER_SET(v, 29, 16)
+#define SCALER_DST_POS_GET_V_POS(r)		SCALER_GET(r, 13, 0)
+#define SCALER_DST_POS_SET_V_POS(v)		SCALER_SET(v, 13, 0)
+
+/* SCALER_H_RATIO */
+#define SCALER_H_RATIO_GET(r)			SCALER_GET(r, 18, 0)
+#define SCALER_H_RATIO_SET(v)			SCALER_SET(v, 18, 0)
+
+/* SCALER_V_RATIO */
+#define SCALER_V_RATIO_GET(r)			SCALER_GET(r, 18, 0)
+#define SCALER_V_RATIO_SET(v)			SCALER_SET(v, 18, 0)
+
+/* SCALER_ROT_CFG */
+#define SCALER_ROT_CFG_FLIP_X_EN		(1 << 3)
+#define SCALER_ROT_CFG_FLIP_Y_EN		(1 << 2)
+#define SCALER_ROT_CFG_GET_ROTMODE(r)		SCALER_GET(r, 1, 0)
+#define SCALER_ROT_CFG_SET_ROTMODE(v)		SCALER_SET(v, 1, 0)
+#define SCALER_ROT_MODE_90			1
+#define SCALER_ROT_MODE_180			2
+#define SCALER_ROT_MODE_270			3
+
+/* SCALER_HCOEF, SCALER_VCOEF */
+#define SCALER_COEF_SHIFT(i)			(16 * (1 - (i) % 2))
+#define SCALER_COEF_GET(r, i)			\
+				(((r) >> SCALER_COEF_SHIFT(i)) & 0x1ff)
+#define SCALER_COEF_SET(v, i)			\
+				(((v) & 0x1ff) << SCALER_COEF_SHIFT(i))
+
+/* SCALER_CSC_COEFxy */
+#define SCALER_CSC_COEF_GET(r)			SCALER_GET(r, 11, 0)
+#define SCALER_CSC_COEF_SET(v)			SCALER_SET(v, 11, 0)
+
+/* SCALER_DITH_CFG */
+#define SCALER_DITH_CFG_GET_R_TYPE(r)		SCALER_GET(r, 8, 6)
+#define SCALER_DITH_CFG_SET_R_TYPE(v)		SCALER_SET(v, 8, 6)
+#define SCALER_DITH_CFG_GET_G_TYPE(r)		SCALER_GET(r, 5, 3)
+#define SCALER_DITH_CFG_SET_G_TYPE(v)		SCALER_SET(v, 5, 3)
+#define SCALER_DITH_CFG_GET_B_TYPE(r)		SCALER_GET(r, 2, 0)
+#define SCALER_DITH_CFG_SET_B_TYPE(v)		SCALER_SET(v, 2, 0)
+
+/* SCALER_TIMEOUT_CTRL */
+#define SCALER_TIMEOUT_CTRL_GET_TIMER_VALUE(r)	SCALER_GET(r, 31, 16)
+#define SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(v)	SCALER_SET(v, 31, 16)
+#define SCALER_TIMEOUT_CTRL_GET_TIMER_DIV(r)	SCALER_GET(r, 7, 4)
+#define SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(v)	SCALER_SET(v, 7, 4)
+#define SCALER_TIMEOUT_CTRL_TIMER_ENABLE	(1 << 0)
+
+/* SCALER_TIMEOUT_CNT */
+#define SCALER_TIMEOUT_CTRL_GET_TIMER_COUNT(r)	SCALER_GET(r, 31, 16)
+
+/* SCALER_SRC_BLEND_COLOR */
+#define SCALER_SRC_BLEND_COLOR_SEL_INV		(1 << 31)
+#define SCALER_SRC_BLEND_COLOR_GET_SEL(r)	SCALER_GET(r, 30, 29)
+#define SCALER_SRC_BLEND_COLOR_SET_SEL(v)	SCALER_SET(v, 30, 29)
+#define SCALER_SRC_BLEND_COLOR_OP_SEL_INV	(1 << 28)
+#define SCALER_SRC_BLEND_COLOR_GET_OP_SEL(r)	SCALER_GET(r, 27, 24)
+#define SCALER_SRC_BLEND_COLOR_SET_OP_SEL(v)	SCALER_SET(v, 27, 24)
+#define SCALER_SRC_BLEND_COLOR_GET_COLOR0(r)	SCALER_GET(r, 23, 16)
+#define SCALER_SRC_BLEND_COLOR_SET_COLOR0(v)	SCALER_SET(v, 23, 16)
+#define SCALER_SRC_BLEND_COLOR_GET_COLOR1(r)	SCALER_GET(r, 15, 8)
+#define SCALER_SRC_BLEND_COLOR_SET_COLOR1(v)	SCALER_SET(v, 15, 8)
+#define SCALER_SRC_BLEND_COLOR_GET_COLOR2(r)	SCALER_GET(r, 7, 0)
+#define SCALER_SRC_BLEND_COLOR_SET_COLOR2(v)	SCALER_SET(v, 7, 0)
+
+/* SCALER_SRC_BLEND_ALPHA */
+#define SCALER_SRC_BLEND_ALPHA_SEL_INV		(1 << 31)
+#define SCALER_SRC_BLEND_ALPHA_GET_SEL(r)	SCALER_GET(r, 30, 29)
+#define SCALER_SRC_BLEND_ALPHA_SET_SEL(v)	SCALER_SET(v, 30, 29)
+#define SCALER_SRC_BLEND_ALPHA_OP_SEL_INV	(1 << 28)
+#define SCALER_SRC_BLEND_ALPHA_GET_OP_SEL(r)	SCALER_GET(r, 27, 24)
+#define SCALER_SRC_BLEND_ALPHA_SET_OP_SEL(v)	SCALER_SET(v, 27, 24)
+#define SCALER_SRC_BLEND_ALPHA_GET_ALPHA(r)	SCALER_GET(r, 7, 0)
+#define SCALER_SRC_BLEND_ALPHA_SET_ALPHA(v)	SCALER_SET(v, 7, 0)
+
+/* SCALER_DST_BLEND_COLOR */
+#define SCALER_DST_BLEND_COLOR_SEL_INV		(1 << 31)
+#define SCALER_DST_BLEND_COLOR_GET_SEL(r)	SCALER_GET(r, 30, 29)
+#define SCALER_DST_BLEND_COLOR_SET_SEL(v)	SCALER_SET(v, 30, 29)
+#define SCALER_DST_BLEND_COLOR_OP_SEL_INV	(1 << 28)
+#define SCALER_DST_BLEND_COLOR_GET_OP_SEL(r)	SCALER_GET(r, 27, 24)
+#define SCALER_DST_BLEND_COLOR_SET_OP_SEL(v)	SCALER_SET(v, 27, 24)
+#define SCALER_DST_BLEND_COLOR_GET_COLOR0(r)	SCALER_GET(r, 23, 16)
+#define SCALER_DST_BLEND_COLOR_SET_COLOR0(v)	SCALER_SET(v, 23, 16)
+#define SCALER_DST_BLEND_COLOR_GET_COLOR1(r)	SCALER_GET(r, 15, 8)
+#define SCALER_DST_BLEND_COLOR_SET_COLOR1(v)	SCALER_SET(v, 15, 8)
+#define SCALER_DST_BLEND_COLOR_GET_COLOR2(r)	SCALER_GET(r, 7, 0)
+#define SCALER_DST_BLEND_COLOR_SET_COLOR2(v)	SCALER_SET(v, 7, 0)
+
+/* SCALER_DST_BLEND_ALPHA */
+#define SCALER_DST_BLEND_ALPHA_SEL_INV		(1 << 31)
+#define SCALER_DST_BLEND_ALPHA_GET_SEL(r)	SCALER_GET(r, 30, 29)
+#define SCALER_DST_BLEND_ALPHA_SET_SEL(v)	SCALER_SET(v, 30, 29)
+#define SCALER_DST_BLEND_ALPHA_OP_SEL_INV	(1 << 28)
+#define SCALER_DST_BLEND_ALPHA_GET_OP_SEL(r)	SCALER_GET(r, 27, 24)
+#define SCALER_DST_BLEND_ALPHA_SET_OP_SEL(v)	SCALER_SET(v, 27, 24)
+#define SCALER_DST_BLEND_ALPHA_GET_ALPHA(r)	SCALER_GET(r, 7, 0)
+#define SCALER_DST_BLEND_ALPHA_SET_ALPHA(v)	SCALER_SET(v, 7, 0)
+
+/* SCALER_FILL_COLOR */
+#define SCALER_FILL_COLOR_GET_ALPHA(r)		SCALER_GET(r, 31, 24)
+#define SCALER_FILL_COLOR_SET_ALPHA(v)		SCALER_SET(v, 31, 24)
+#define SCALER_FILL_COLOR_GET_FILL_COLOR0(r)	SCALER_GET(r, 23, 16)
+#define SCALER_FILL_COLOR_SET_FILL_COLOR0(v)	SCALER_SET(v, 23, 16)
+#define SCALER_FILL_COLOR_GET_FILL_COLOR1(r)	SCALER_GET(r, 15, 8)
+#define SCALER_FILL_COLOR_SET_FILL_COLOR1(v)	SCALER_SET(v, 15, 8)
+#define SCALER_FILL_COLOR_GET_FILL_COLOR2(r)	SCALER_GET(r, 7, 0)
+#define SCALER_FILL_COLOR_SET_FILL_COLOR2(v)	SCALER_SET(v, 7, 0)
+
+/* SCALER_ADDR_Q_CONFIG */
+#define SCALER_ADDR_Q_CONFIG_RST		(1 << 0)
+
+/* SCALER_SRC_ADDR_Q_STATUS */
+#define SCALER_SRC_ADDR_Q_STATUS_Y_FULL		(1 << 23)
+#define SCALER_SRC_ADDR_Q_STATUS_Y_EMPTY	(1 << 22)
+#define SCALER_SRC_ADDR_Q_STATUS_GET_Y_WR_IDX(r)	SCALER_GET(r, 21, 16)
+#define SCALER_SRC_ADDR_Q_STATUS_CB_FULL	(1 << 15)
+#define SCALER_SRC_ADDR_Q_STATUS_CB_EMPTY	(1 << 14)
+#define SCALER_SRC_ADDR_Q_STATUS_GET_CB_WR_IDX(r)	SCALER_GET(r, 13, 8)
+#define SCALER_SRC_ADDR_Q_STATUS_CR_FULL	(1 << 7)
+#define SCALER_SRC_ADDR_Q_STATUS_CR_EMPTY	(1 << 6)
+#define SCALER_SRC_ADDR_Q_STATUS_GET_CR_WR_IDX(r)	SCALER_GET(r, 5, 0)
+
+/* SCALER_DST_ADDR_Q_STATUS */
+#define SCALER_DST_ADDR_Q_STATUS_Y_FULL		(1 << 23)
+#define SCALER_DST_ADDR_Q_STATUS_Y_EMPTY	(1 << 22)
+#define SCALER_DST_ADDR_Q_STATUS_GET_Y_WR_IDX(r)	SCALER_GET(r, 21, 16)
+#define SCALER_DST_ADDR_Q_STATUS_CB_FULL	(1 << 15)
+#define SCALER_DST_ADDR_Q_STATUS_CB_EMPTY	(1 << 14)
+#define SCALER_DST_ADDR_Q_STATUS_GET_CB_WR_IDX(r)	SCALER_GET(r, 13, 8)
+#define SCALER_DST_ADDR_Q_STATUS_CR_FULL	(1 << 7)
+#define SCALER_DST_ADDR_Q_STATUS_CR_EMPTY	(1 << 6)
+#define SCALER_DST_ADDR_Q_STATUS_GET_CR_WR_IDX(r)	SCALER_GET(r, 5, 0)
+
+/* SCALER_CRC_COLOR00_10 */
+#define SCALER_CRC_COLOR00_10_GET_00(r)		SCALER_GET(r, 31, 16)
+#define SCALER_CRC_COLOR00_10_GET_10(r)		SCALER_GET(r, 15, 0)
+
+/* SCALER_CRC_COLOR20_30 */
+#define SCALER_CRC_COLOR20_30_GET_20(r)		SCALER_GET(r, 31, 16)
+#define SCALER_CRC_COLOR20_30_GET_30(r)		SCALER_GET(r, 15, 0)
+
+/* SCALER_CRC_COLOR01_11 */
+#define SCALER_CRC_COLOR01_11_GET_01(r)		SCALER_GET(r, 31, 16)
+#define SCALER_CRC_COLOR01_11_GET_11(r)		SCALER_GET(r, 15, 0)
+
+/* SCALER_CRC_COLOR21_31 */
+#define SCALER_CRC_COLOR21_31_GET_21(r)		SCALER_GET(r, 31, 16)
+#define SCALER_CRC_COLOR21_31_GET_31(r)		SCALER_GET(r, 15, 0)
+
+#endif /* EXYNOS_REGS_SCALER_H */
diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c
index 3a3bf75..34b8576 100644
--- a/drivers/gpu/drm/gma500/cdv_device.c
+++ b/drivers/gpu/drm/gma500/cdv_device.c
@@ -485,7 +485,7 @@
 			return;
 
 		for (i = 0; i < ARRAY_SIZE(force_audio_names); i++)
-			drm_property_add_enum(prop, i, i-1, force_audio_names[i]);
+			drm_property_add_enum(prop, i-1, force_audio_names[i]);
 
 		dev_priv->force_audio_property = prop;
 	}
@@ -514,7 +514,7 @@
 			return;
 
 		for (i = 0; i < ARRAY_SIZE(broadcast_rgb_names); i++)
-			drm_property_add_enum(prop, i, i, broadcast_rgb_names[i]);
+			drm_property_add_enum(prop, i, broadcast_rgb_names[i]);
 
 		dev_priv->broadcast_rgb_property = prop;
 	}
diff --git a/drivers/gpu/drm/gma500/cdv_intel_crt.c b/drivers/gpu/drm/gma500/cdv_intel_crt.c
index b837e7a..cb5a14b 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_crt.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_crt.c
@@ -64,7 +64,7 @@
 	REG_WRITE(reg, temp);
 }
 
-static int cdv_intel_crt_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status cdv_intel_crt_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c
index a4bb89b..5ea785f 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_dp.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c
@@ -505,7 +505,7 @@
 	msleep(intel_dp->backlight_off_delay);
 }
 
-static int
+static enum drm_mode_status
 cdv_intel_dp_mode_valid(struct drm_connector *connector,
 		    struct drm_display_mode *mode)
 {
diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
index 563f193..f087899 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
@@ -223,7 +223,7 @@
 	return ret;
 }
 
-static int cdv_hdmi_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status cdv_hdmi_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
 	if (mode->clock > 165000)
diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
index e64960d..de9531c 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
@@ -244,7 +244,7 @@
 {
 }
 
-static int cdv_intel_lvds_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status cdv_intel_lvds_mode_valid(struct drm_connector *connector,
 			      struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.c b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
index acb3848e..fe02092 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_output.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
@@ -346,7 +346,7 @@
 	return 0;
 }
 
-static int mdfld_dsi_connector_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status mdfld_dsi_connector_mode_valid(struct drm_connector *connector,
 						struct drm_display_mode *mode)
 {
 	struct mdfld_dsi_connector *dsi_connector =
diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
index 8b2eb32..78566a8 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
@@ -509,7 +509,7 @@
 	HDMI_WRITE(HDMI_VIDEO_REG, temp);
 }
 
-static int oaktrail_hdmi_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status oaktrail_hdmi_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
 	if (mode->clock > 165000)
diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h
index e8e4ea1..e05e539 100644
--- a/drivers/gpu/drm/gma500/psb_intel_drv.h
+++ b/drivers/gpu/drm/gma500/psb_intel_drv.h
@@ -255,7 +255,7 @@
 extern bool psb_intel_lvds_mode_fixup(struct drm_encoder *encoder,
 				      const struct drm_display_mode *mode,
 				      struct drm_display_mode *adjusted_mode);
-extern int psb_intel_lvds_mode_valid(struct drm_connector *connector,
+extern enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector,
 				     struct drm_display_mode *mode);
 extern int psb_intel_lvds_set_property(struct drm_connector *connector,
 					struct drm_property *property,
diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index be3eefe..8baf632 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -343,7 +343,7 @@
 	}
 }
 
-int psb_intel_lvds_mode_valid(struct drm_connector *connector,
+enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
 	struct drm_psb_private *dev_priv = connector->dev->dev_private;
diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
index 8450791..f2ee6aa 100644
--- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c
+++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
@@ -1157,7 +1157,7 @@
 	return;
 }
 
-static int psb_intel_sdvo_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status psb_intel_sdvo_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
 	struct psb_intel_sdvo *psb_intel_sdvo = intel_attached_sdvo(connector);
@@ -2281,7 +2281,7 @@
 
 	for (i = 0; i < psb_intel_sdvo_connector->format_supported_num; i++)
 		drm_property_add_enum(
-				psb_intel_sdvo_connector->tv_format, i,
+				psb_intel_sdvo_connector->tv_format,
 				i, tv_format_names[psb_intel_sdvo_connector->tv_format_supported[i]]);
 
 	psb_intel_sdvo->tv_format_index = psb_intel_sdvo_connector->tv_format_supported[0];
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c
index f4eba87..d2f4749 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c
@@ -27,7 +27,7 @@
 	return drm_add_modes_noedid(connector, 800, 600);
 }
 
-static int hibmc_connector_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status hibmc_connector_mode_valid(struct drm_connector *connector,
 				      struct drm_display_mode *mode)
 {
 	return MODE_OK;
diff --git a/drivers/gpu/drm/i2c/Kconfig b/drivers/gpu/drm/i2c/Kconfig
index a6c92be..65d3acb 100644
--- a/drivers/gpu/drm/i2c/Kconfig
+++ b/drivers/gpu/drm/i2c/Kconfig
@@ -22,8 +22,14 @@
 config DRM_I2C_NXP_TDA998X
 	tristate "NXP Semiconductors TDA998X HDMI encoder"
 	default m if DRM_TILCDC
+	select CEC_CORE if CEC_NOTIFIER
 	select SND_SOC_HDMI_CODEC if SND_SOC
 	help
 	  Support for NXP Semiconductors TDA998X HDMI encoders.
 
+config DRM_I2C_NXP_TDA9950
+	tristate "NXP Semiconductors TDA9950/TDA998X HDMI CEC"
+	select CEC_NOTIFIER
+	select CEC_CORE
+
 endmenu
diff --git a/drivers/gpu/drm/i2c/Makefile b/drivers/gpu/drm/i2c/Makefile
index b20100c..a962f6f 100644
--- a/drivers/gpu/drm/i2c/Makefile
+++ b/drivers/gpu/drm/i2c/Makefile
@@ -7,3 +7,4 @@
 
 tda998x-y := tda998x_drv.o
 obj-$(CONFIG_DRM_I2C_NXP_TDA998X) += tda998x.o
+obj-$(CONFIG_DRM_I2C_NXP_TDA9950) += tda9950.o
diff --git a/drivers/gpu/drm/i2c/tda9950.c b/drivers/gpu/drm/i2c/tda9950.c
new file mode 100644
index 0000000..3f7396ca
--- /dev/null
+++ b/drivers/gpu/drm/i2c/tda9950.c
@@ -0,0 +1,509 @@
+/*
+ *  TDA9950 Consumer Electronics Control driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * The NXP TDA9950 implements the HDMI Consumer Electronics Control
+ * interface.  The host interface is similar to a mailbox: the data
+ * registers starting at REG_CDR0 are written to send a command to the
+ * internal CPU, and replies are read from these registers.
+ *
+ * As the data registers represent a mailbox, they must be accessed
+ * as a single I2C transaction.  See the TDA9950 data sheet for details.
+ */
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_data/tda9950.h>
+#include <linux/slab.h>
+#include <drm/drm_edid.h>
+#include <media/cec.h>
+#include <media/cec-notifier.h>
+
+enum {
+	REG_CSR = 0x00,
+	CSR_BUSY = BIT(7),
+	CSR_INT  = BIT(6),
+	CSR_ERR  = BIT(5),
+
+	REG_CER = 0x01,
+
+	REG_CVR = 0x02,
+
+	REG_CCR = 0x03,
+	CCR_RESET = BIT(7),
+	CCR_ON    = BIT(6),
+
+	REG_ACKH = 0x04,
+	REG_ACKL = 0x05,
+
+	REG_CCONR = 0x06,
+	CCONR_ENABLE_ERROR = BIT(4),
+	CCONR_RETRY_MASK = 7,
+
+	REG_CDR0 = 0x07,
+
+	CDR1_REQ = 0x00,
+	CDR1_CNF = 0x01,
+	CDR1_IND = 0x81,
+	CDR1_ERR = 0x82,
+	CDR1_IER = 0x83,
+
+	CDR2_CNF_SUCCESS    = 0x00,
+	CDR2_CNF_OFF_STATE  = 0x80,
+	CDR2_CNF_BAD_REQ    = 0x81,
+	CDR2_CNF_CEC_ACCESS = 0x82,
+	CDR2_CNF_ARB_ERROR  = 0x83,
+	CDR2_CNF_BAD_TIMING = 0x84,
+	CDR2_CNF_NACK_ADDR  = 0x85,
+	CDR2_CNF_NACK_DATA  = 0x86,
+};
+
+struct tda9950_priv {
+	struct i2c_client *client;
+	struct device *hdmi;
+	struct cec_adapter *adap;
+	struct tda9950_glue *glue;
+	u16 addresses;
+	struct cec_msg rx_msg;
+	struct cec_notifier *notify;
+	bool open;
+};
+
+static int tda9950_write_range(struct i2c_client *client, u8 addr, u8 *p, int cnt)
+{
+	struct i2c_msg msg;
+	u8 buf[cnt + 1];
+	int ret;
+
+	buf[0] = addr;
+	memcpy(buf + 1, p, cnt);
+
+	msg.addr = client->addr;
+	msg.flags = 0;
+	msg.len = cnt + 1;
+	msg.buf = buf;
+
+	dev_dbg(&client->dev, "wr 0x%02x: %*ph\n", addr, cnt, p);
+
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret < 0)
+		dev_err(&client->dev, "Error %d writing to cec:0x%x\n", ret, addr);
+	return ret < 0 ? ret : 0;
+}
+
+static void tda9950_write(struct i2c_client *client, u8 addr, u8 val)
+{
+	tda9950_write_range(client, addr, &val, 1);
+}
+
+static int tda9950_read_range(struct i2c_client *client, u8 addr, u8 *p, int cnt)
+{
+	struct i2c_msg msg[2];
+	int ret;
+
+	msg[0].addr = client->addr;
+	msg[0].flags = 0;
+	msg[0].len = 1;
+	msg[0].buf = &addr;
+	msg[1].addr = client->addr;
+	msg[1].flags = I2C_M_RD;
+	msg[1].len = cnt;
+	msg[1].buf = p;
+
+	ret = i2c_transfer(client->adapter, msg, 2);
+	if (ret < 0)
+		dev_err(&client->dev, "Error %d reading from cec:0x%x\n", ret, addr);
+
+	dev_dbg(&client->dev, "rd 0x%02x: %*ph\n", addr, cnt, p);
+
+	return ret;
+}
+
+static u8 tda9950_read(struct i2c_client *client, u8 addr)
+{
+	int ret;
+	u8 val;
+
+	ret = tda9950_read_range(client, addr, &val, 1);
+	if (ret < 0)
+		val = 0;
+
+	return val;
+}
+
+static irqreturn_t tda9950_irq(int irq, void *data)
+{
+	struct tda9950_priv *priv = data;
+	unsigned int tx_status;
+	u8 csr, cconr, buf[19];
+	u8 arb_lost_cnt, nack_cnt, err_cnt;
+
+	if (!priv->open)
+		return IRQ_NONE;
+
+	csr = tda9950_read(priv->client, REG_CSR);
+	if (!(csr & CSR_INT))
+		return IRQ_NONE;
+
+	cconr = tda9950_read(priv->client, REG_CCONR) & CCONR_RETRY_MASK;
+
+	tda9950_read_range(priv->client, REG_CDR0, buf, sizeof(buf));
+
+	/*
+	 * This should never happen: the data sheet says that there will
+	 * always be a valid message if the interrupt line is asserted.
+	 */
+	if (buf[0] == 0) {
+		dev_warn(&priv->client->dev, "interrupt pending, but no message?\n");
+		return IRQ_NONE;
+	}
+
+	switch (buf[1]) {
+	case CDR1_CNF: /* transmit result */
+		arb_lost_cnt = nack_cnt = err_cnt = 0;
+		switch (buf[2]) {
+		case CDR2_CNF_SUCCESS:
+			tx_status = CEC_TX_STATUS_OK;
+			break;
+
+		case CDR2_CNF_ARB_ERROR:
+			tx_status = CEC_TX_STATUS_ARB_LOST;
+			arb_lost_cnt = cconr;
+			break;
+
+		case CDR2_CNF_NACK_ADDR:
+			tx_status = CEC_TX_STATUS_NACK;
+			nack_cnt = cconr;
+			break;
+
+		default: /* some other error, refer to TDA9950 docs */
+			dev_err(&priv->client->dev, "CNF reply error 0x%02x\n",
+				buf[2]);
+			tx_status = CEC_TX_STATUS_ERROR;
+			err_cnt = cconr;
+			break;
+		}
+		/* TDA9950 executes all retries for us */
+		tx_status |= CEC_TX_STATUS_MAX_RETRIES;
+		cec_transmit_done(priv->adap, tx_status, arb_lost_cnt,
+				  nack_cnt, 0, err_cnt);
+		break;
+
+	case CDR1_IND:
+		priv->rx_msg.len = buf[0] - 2;
+		if (priv->rx_msg.len > CEC_MAX_MSG_SIZE)
+			priv->rx_msg.len = CEC_MAX_MSG_SIZE;
+
+		memcpy(priv->rx_msg.msg, buf + 2, priv->rx_msg.len);
+		cec_received_msg(priv->adap, &priv->rx_msg);
+		break;
+
+	default: /* unknown */
+		dev_err(&priv->client->dev, "unknown service id 0x%02x\n",
+			buf[1]);
+		break;
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int tda9950_cec_transmit(struct cec_adapter *adap, u8 attempts,
+				u32 signal_free_time, struct cec_msg *msg)
+{
+	struct tda9950_priv *priv = adap->priv;
+	u8 buf[CEC_MAX_MSG_SIZE + 2];
+
+	buf[0] = 2 + msg->len;
+	buf[1] = CDR1_REQ;
+	memcpy(buf + 2, msg->msg, msg->len);
+
+	if (attempts > 5)
+		attempts = 5;
+
+	tda9950_write(priv->client, REG_CCONR, attempts);
+
+	return tda9950_write_range(priv->client, REG_CDR0, buf, 2 + msg->len);
+}
+
+static int tda9950_cec_adap_log_addr(struct cec_adapter *adap, u8 addr)
+{
+	struct tda9950_priv *priv = adap->priv;
+	u16 addresses;
+	u8 buf[2];
+
+	if (addr == CEC_LOG_ADDR_INVALID)
+		addresses = priv->addresses = 0;
+	else
+		addresses = priv->addresses |= BIT(addr);
+
+	/* TDA9950 doesn't want address 15 set */
+	addresses &= 0x7fff;
+	buf[0] = addresses >> 8;
+	buf[1] = addresses;
+
+	return tda9950_write_range(priv->client, REG_ACKH, buf, 2);
+}
+
+/*
+ * When operating as part of the TDA998x, we need additional handling
+ * to initialise and shut down the TDA9950 part of the device.  These
+ * two hooks are provided to allow the TDA998x code to perform those
+ * activities.
+ */
+static int tda9950_glue_open(struct tda9950_priv *priv)
+{
+	int ret = 0;
+
+	if (priv->glue && priv->glue->open)
+		ret = priv->glue->open(priv->glue->data);
+
+	priv->open = true;
+
+	return ret;
+}
+
+static void tda9950_glue_release(struct tda9950_priv *priv)
+{
+	priv->open = false;
+
+	if (priv->glue && priv->glue->release)
+		priv->glue->release(priv->glue->data);
+}
+
+static int tda9950_open(struct tda9950_priv *priv)
+{
+	struct i2c_client *client = priv->client;
+	int ret;
+
+	ret = tda9950_glue_open(priv);
+	if (ret)
+		return ret;
+
+	/* Reset the TDA9950, and wait 250ms for it to recover */
+	tda9950_write(client, REG_CCR, CCR_RESET);
+	msleep(250);
+
+	tda9950_cec_adap_log_addr(priv->adap, CEC_LOG_ADDR_INVALID);
+
+	/* Start the command processor */
+	tda9950_write(client, REG_CCR, CCR_ON);
+
+	return 0;
+}
+
+static void tda9950_release(struct tda9950_priv *priv)
+{
+	struct i2c_client *client = priv->client;
+	int timeout = 50;
+	u8 csr;
+
+	/* Stop the command processor */
+	tda9950_write(client, REG_CCR, 0);
+
+	/* Wait up to .5s for it to signal non-busy */
+	do {
+		csr = tda9950_read(client, REG_CSR);
+		if (!(csr & CSR_BUSY) || --timeout)
+			break;
+		msleep(10);
+	} while (1);
+
+	/* Warn the user that their IRQ may die if it's shared. */
+	if (csr & CSR_BUSY)
+		dev_warn(&client->dev, "command processor failed to stop, irq%d may die (csr=0x%02x)\n",
+			 client->irq, csr);
+
+	tda9950_glue_release(priv);
+}
+
+static int tda9950_cec_adap_enable(struct cec_adapter *adap, bool enable)
+{
+	struct tda9950_priv *priv = adap->priv;
+
+	if (!enable) {
+		tda9950_release(priv);
+		return 0;
+	} else {
+		return tda9950_open(priv);
+	}
+}
+
+static const struct cec_adap_ops tda9950_cec_ops = {
+	.adap_enable = tda9950_cec_adap_enable,
+	.adap_log_addr = tda9950_cec_adap_log_addr,
+	.adap_transmit = tda9950_cec_transmit,
+};
+
+/*
+ * When operating as part of the TDA998x, we need to claim additional
+ * resources.  These two hooks permit the management of those resources.
+ */
+static void tda9950_devm_glue_exit(void *data)
+{
+	struct tda9950_glue *glue = data;
+
+	if (glue && glue->exit)
+		glue->exit(glue->data);
+}
+
+static int tda9950_devm_glue_init(struct device *dev, struct tda9950_glue *glue)
+{
+	int ret;
+
+	if (glue && glue->init) {
+		ret = glue->init(glue->data);
+		if (ret)
+			return ret;
+	}
+
+	ret = devm_add_action(dev, tda9950_devm_glue_exit, glue);
+	if (ret)
+		tda9950_devm_glue_exit(glue);
+
+	return ret;
+}
+
+static void tda9950_cec_del(void *data)
+{
+	struct tda9950_priv *priv = data;
+
+	cec_delete_adapter(priv->adap);
+}
+
+static int tda9950_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+{
+	struct tda9950_glue *glue = client->dev.platform_data;
+	struct device *dev = &client->dev;
+	struct tda9950_priv *priv;
+	unsigned long irqflags;
+	int ret;
+	u8 cvr;
+
+	/*
+	 * We must have I2C functionality: our multi-byte accesses
+	 * must be performed as a single contiguous transaction.
+	 */
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		dev_err(&client->dev,
+			"adapter does not support I2C functionality\n");
+		return -ENXIO;
+	}
+
+	/* We must have an interrupt to be functional. */
+	if (client->irq <= 0) {
+		dev_err(&client->dev, "driver requires an interrupt\n");
+		return -ENXIO;
+	}
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->client = client;
+	priv->glue = glue;
+
+	i2c_set_clientdata(client, priv);
+
+	/*
+	 * If we're part of a TDA998x, we want the class devices to be
+	 * associated with the HDMI Tx so we have a tight relationship
+	 * between the HDMI interface and the CEC interface.
+	 */
+	priv->hdmi = dev;
+	if (glue && glue->parent)
+		priv->hdmi = glue->parent;
+
+	priv->adap = cec_allocate_adapter(&tda9950_cec_ops, priv, "tda9950",
+					  CEC_CAP_DEFAULTS,
+					  CEC_MAX_LOG_ADDRS);
+	if (IS_ERR(priv->adap))
+		return PTR_ERR(priv->adap);
+
+	ret = devm_add_action(dev, tda9950_cec_del, priv);
+	if (ret) {
+		cec_delete_adapter(priv->adap);
+		return ret;
+	}
+
+	ret = tda9950_devm_glue_init(dev, glue);
+	if (ret)
+		return ret;
+
+	ret = tda9950_glue_open(priv);
+	if (ret)
+		return ret;
+
+	cvr = tda9950_read(client, REG_CVR);
+
+	dev_info(&client->dev,
+		 "TDA9950 CEC interface, hardware version %u.%u\n",
+		 cvr >> 4, cvr & 15);
+
+	tda9950_glue_release(priv);
+
+	irqflags = IRQF_TRIGGER_FALLING;
+	if (glue)
+		irqflags = glue->irq_flags;
+
+	ret = devm_request_threaded_irq(dev, client->irq, NULL, tda9950_irq,
+					irqflags | IRQF_SHARED | IRQF_ONESHOT,
+					dev_name(&client->dev), priv);
+	if (ret < 0)
+		return ret;
+
+	priv->notify = cec_notifier_get(priv->hdmi);
+	if (!priv->notify)
+		return -ENOMEM;
+
+	ret = cec_register_adapter(priv->adap, priv->hdmi);
+	if (ret < 0) {
+		cec_notifier_put(priv->notify);
+		return ret;
+	}
+
+	/*
+	 * CEC documentation says we must not call cec_delete_adapter
+	 * after a successful call to cec_register_adapter().
+	 */
+	devm_remove_action(dev, tda9950_cec_del, priv);
+
+	cec_register_cec_notifier(priv->adap, priv->notify);
+
+	return 0;
+}
+
+static int tda9950_remove(struct i2c_client *client)
+{
+	struct tda9950_priv *priv = i2c_get_clientdata(client);
+
+	cec_unregister_adapter(priv->adap);
+	cec_notifier_put(priv->notify);
+
+	return 0;
+}
+
+static struct i2c_device_id tda9950_ids[] = {
+	{ "tda9950", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, tda9950_ids);
+
+static struct i2c_driver tda9950_driver = {
+	.probe = tda9950_probe,
+	.remove = tda9950_remove,
+	.driver = {
+		.name = "tda9950",
+	},
+	.id_table = tda9950_ids,
+};
+
+module_i2c_driver(tda9950_driver);
+
+MODULE_AUTHOR("Russell King <rmk+kernel@armlinux.org.uk>");
+MODULE_DESCRIPTION("TDA9950/TDA998x Consumer Electronics Control Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index 9e67a7b..6ebd884 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -16,8 +16,10 @@
  */
 
 #include <linux/component.h>
+#include <linux/gpio/consumer.h>
 #include <linux/hdmi.h>
 #include <linux/module.h>
+#include <linux/platform_data/tda9950.h>
 #include <linux/irq.h>
 #include <sound/asoundef.h>
 #include <sound/hdmi-codec.h>
@@ -29,6 +31,8 @@
 #include <drm/drm_of.h>
 #include <drm/i2c/tda998x.h>
 
+#include <media/cec-notifier.h>
+
 #define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
 
 struct tda998x_audio_port {
@@ -55,6 +59,7 @@
 	struct platform_device *audio_pdev;
 	struct mutex audio_mutex;
 
+	struct mutex edid_mutex;
 	wait_queue_head_t wq_edid;
 	volatile int wq_edid_wait;
 
@@ -67,6 +72,9 @@
 	struct drm_connector connector;
 
 	struct tda998x_audio_port audio_port[2];
+	struct tda9950_glue cec_glue;
+	struct gpio_desc *calib;
+	struct cec_notifier *cec_notify;
 };
 
 #define conn_to_tda998x_priv(x) \
@@ -345,6 +353,12 @@
 #define REG_CEC_INTSTATUS	  0xee		      /* read */
 # define CEC_INTSTATUS_CEC	  (1 << 0)
 # define CEC_INTSTATUS_HDMI	  (1 << 1)
+#define REG_CEC_CAL_XOSC_CTRL1    0xf2
+# define CEC_CAL_XOSC_CTRL1_ENA_CAL	BIT(0)
+#define REG_CEC_DES_FREQ2         0xf5
+# define CEC_DES_FREQ2_DIS_AUTOCAL BIT(7)
+#define REG_CEC_CLK               0xf6
+# define CEC_CLK_FRO              0x11
 #define REG_CEC_FRO_IM_CLK_CTRL   0xfb                /* read/write */
 # define CEC_FRO_IM_CLK_CTRL_GHOST_DIS (1 << 7)
 # define CEC_FRO_IM_CLK_CTRL_ENA_OTP   (1 << 6)
@@ -359,6 +373,7 @@
 # define CEC_RXSHPDLEV_HPD        (1 << 1)
 
 #define REG_CEC_ENAMODS           0xff                /* read/write */
+# define CEC_ENAMODS_EN_CEC_CLK   (1 << 7)
 # define CEC_ENAMODS_DIS_FRO      (1 << 6)
 # define CEC_ENAMODS_DIS_CCLK     (1 << 5)
 # define CEC_ENAMODS_EN_RXSENS    (1 << 2)
@@ -417,6 +432,114 @@
 	return val;
 }
 
+static void cec_enamods(struct tda998x_priv *priv, u8 mods, bool enable)
+{
+	int val = cec_read(priv, REG_CEC_ENAMODS);
+
+	if (val < 0)
+		return;
+
+	if (enable)
+		val |= mods;
+	else
+		val &= ~mods;
+
+	cec_write(priv, REG_CEC_ENAMODS, val);
+}
+
+static void tda998x_cec_set_calibration(struct tda998x_priv *priv, bool enable)
+{
+	if (enable) {
+		u8 val;
+
+		cec_write(priv, 0xf3, 0xc0);
+		cec_write(priv, 0xf4, 0xd4);
+
+		/* Enable automatic calibration mode */
+		val = cec_read(priv, REG_CEC_DES_FREQ2);
+		val &= ~CEC_DES_FREQ2_DIS_AUTOCAL;
+		cec_write(priv, REG_CEC_DES_FREQ2, val);
+
+		/* Enable free running oscillator */
+		cec_write(priv, REG_CEC_CLK, CEC_CLK_FRO);
+		cec_enamods(priv, CEC_ENAMODS_DIS_FRO, false);
+
+		cec_write(priv, REG_CEC_CAL_XOSC_CTRL1,
+			  CEC_CAL_XOSC_CTRL1_ENA_CAL);
+	} else {
+		cec_write(priv, REG_CEC_CAL_XOSC_CTRL1, 0);
+	}
+}
+
+/*
+ * Calibration for the internal oscillator: we need to set calibration mode,
+ * and then pulse the IRQ line low for a 10ms ± 1% period.
+ */
+static void tda998x_cec_calibration(struct tda998x_priv *priv)
+{
+	struct gpio_desc *calib = priv->calib;
+
+	mutex_lock(&priv->edid_mutex);
+	if (priv->hdmi->irq > 0)
+		disable_irq(priv->hdmi->irq);
+	gpiod_direction_output(calib, 1);
+	tda998x_cec_set_calibration(priv, true);
+
+	local_irq_disable();
+	gpiod_set_value(calib, 0);
+	mdelay(10);
+	gpiod_set_value(calib, 1);
+	local_irq_enable();
+
+	tda998x_cec_set_calibration(priv, false);
+	gpiod_direction_input(calib);
+	if (priv->hdmi->irq > 0)
+		enable_irq(priv->hdmi->irq);
+	mutex_unlock(&priv->edid_mutex);
+}
+
+static int tda998x_cec_hook_init(void *data)
+{
+	struct tda998x_priv *priv = data;
+	struct gpio_desc *calib;
+
+	calib = gpiod_get(&priv->hdmi->dev, "nxp,calib", GPIOD_ASIS);
+	if (IS_ERR(calib)) {
+		dev_warn(&priv->hdmi->dev, "failed to get calibration gpio: %ld\n",
+			 PTR_ERR(calib));
+		return PTR_ERR(calib);
+	}
+
+	priv->calib = calib;
+
+	return 0;
+}
+
+static void tda998x_cec_hook_exit(void *data)
+{
+	struct tda998x_priv *priv = data;
+
+	gpiod_put(priv->calib);
+	priv->calib = NULL;
+}
+
+static int tda998x_cec_hook_open(void *data)
+{
+	struct tda998x_priv *priv = data;
+
+	cec_enamods(priv, CEC_ENAMODS_EN_CEC_CLK | CEC_ENAMODS_EN_CEC, true);
+	tda998x_cec_calibration(priv);
+
+	return 0;
+}
+
+static void tda998x_cec_hook_release(void *data)
+{
+	struct tda998x_priv *priv = data;
+
+	cec_enamods(priv, CEC_ENAMODS_EN_CEC_CLK | CEC_ENAMODS_EN_CEC, false);
+}
+
 static int
 set_page(struct tda998x_priv *priv, u16 reg)
 {
@@ -657,10 +780,13 @@
 			sta, cec, lvl, flag0, flag1, flag2);
 
 		if (cec & CEC_RXSHPDINT_HPD) {
-			if (lvl & CEC_RXSHPDLEV_HPD)
+			if (lvl & CEC_RXSHPDLEV_HPD) {
 				tda998x_edid_delay_start(priv);
-			else
+			} else {
 				schedule_work(&priv->detect_work);
+				cec_notifier_set_phys_addr(priv->cec_notify,
+						   CEC_PHYS_ADDR_INVALID);
+			}
 
 			handled = true;
 		}
@@ -981,6 +1107,8 @@
 	if (connector->edid_blob_ptr) {
 		struct edid *edid = (void *)connector->edid_blob_ptr->data;
 
+		cec_notifier_set_phys_addr_from_edid(priv->cec_notify, edid);
+
 		priv->sink_has_audio = drm_detect_monitor_audio(edid);
 	} else {
 		priv->sink_has_audio = false;
@@ -1024,6 +1152,8 @@
 	offset = (blk & 1) ? 128 : 0;
 	segptr = blk / 2;
 
+	mutex_lock(&priv->edid_mutex);
+
 	reg_write(priv, REG_DDC_ADDR, 0xa0);
 	reg_write(priv, REG_DDC_OFFS, offset);
 	reg_write(priv, REG_DDC_SEGM_ADDR, 0x60);
@@ -1043,14 +1173,15 @@
 					msecs_to_jiffies(100));
 		if (i < 0) {
 			dev_err(&priv->hdmi->dev, "read edid wait err %d\n", i);
-			return i;
+			ret = i;
+			goto failed;
 		}
 	} else {
 		for (i = 100; i > 0; i--) {
 			msleep(1);
 			ret = reg_read(priv, REG_INT_FLAGS_2);
 			if (ret < 0)
-				return ret;
+				goto failed;
 			if (ret & INT_FLAGS_2_EDID_BLK_RD)
 				break;
 		}
@@ -1058,17 +1189,22 @@
 
 	if (i == 0) {
 		dev_err(&priv->hdmi->dev, "read edid timeout\n");
-		return -ETIMEDOUT;
+		ret = -ETIMEDOUT;
+		goto failed;
 	}
 
 	ret = reg_read_range(priv, REG_EDID_DATA_0, buf, length);
 	if (ret != length) {
 		dev_err(&priv->hdmi->dev, "failed to read edid block %d: %d\n",
 			blk, ret);
-		return ret;
+		goto failed;
 	}
 
-	return 0;
+	ret = 0;
+
+ failed:
+	mutex_unlock(&priv->edid_mutex);
+	return ret;
 }
 
 static int tda998x_connector_get_modes(struct drm_connector *connector)
@@ -1106,7 +1242,7 @@
 	return n;
 }
 
-static int tda998x_connector_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status tda998x_connector_mode_valid(struct drm_connector *connector,
 					struct drm_display_mode *mode)
 {
 	/* TDA19988 dotclock can go up to 165MHz */
@@ -1423,6 +1559,9 @@
 	cancel_work_sync(&priv->detect_work);
 
 	i2c_unregister_device(priv->cec);
+
+	if (priv->cec_notify)
+		cec_notifier_put(priv->cec_notify);
 }
 
 /* I2C driver functions */
@@ -1472,10 +1611,16 @@
 static int tda998x_create(struct i2c_client *client, struct tda998x_priv *priv)
 {
 	struct device_node *np = client->dev.of_node;
+	struct i2c_board_info cec_info;
 	u32 video;
 	int rev_lo, rev_hi, ret;
 
-	mutex_init(&priv->audio_mutex); /* Protect access from audio thread */
+	mutex_init(&priv->mutex);	/* protect the page access */
+	mutex_init(&priv->audio_mutex); /* protect access from audio thread */
+	mutex_init(&priv->edid_mutex);
+	init_waitqueue_head(&priv->edid_delay_waitq);
+	timer_setup(&priv->edid_delay_timer, tda998x_edid_delay_done, 0);
+	INIT_WORK(&priv->detect_work, tda998x_detect_work);
 
 	priv->vip_cntrl_0 = VIP_CNTRL_0_SWAP_A(2) | VIP_CNTRL_0_SWAP_B(3);
 	priv->vip_cntrl_1 = VIP_CNTRL_1_SWAP_C(0) | VIP_CNTRL_1_SWAP_D(1);
@@ -1485,14 +1630,6 @@
 	priv->cec_addr = 0x34 + (client->addr & 0x03);
 	priv->current_page = 0xff;
 	priv->hdmi = client;
-	priv->cec = i2c_new_dummy(client->adapter, priv->cec_addr);
-	if (!priv->cec)
-		return -ENODEV;
-
-	mutex_init(&priv->mutex);	/* protect the page access */
-	init_waitqueue_head(&priv->edid_delay_waitq);
-	timer_setup(&priv->edid_delay_timer, tda998x_edid_delay_done, 0);
-	INIT_WORK(&priv->detect_work, tda998x_detect_work);
 
 	/* wake up the device: */
 	cec_write(priv, REG_CEC_ENAMODS,
@@ -1502,10 +1639,15 @@
 
 	/* read version: */
 	rev_lo = reg_read(priv, REG_VERSION_LSB);
+	if (rev_lo < 0) {
+		dev_err(&client->dev, "failed to read version: %d\n", rev_lo);
+		return rev_lo;
+	}
+
 	rev_hi = reg_read(priv, REG_VERSION_MSB);
-	if (rev_lo < 0 || rev_hi < 0) {
-		ret = rev_lo < 0 ? rev_lo : rev_hi;
-		goto fail;
+	if (rev_hi < 0) {
+		dev_err(&client->dev, "failed to read version: %d\n", rev_hi);
+		return rev_hi;
 	}
 
 	priv->rev = rev_lo | rev_hi << 8;
@@ -1529,7 +1671,7 @@
 	default:
 		dev_err(&client->dev, "found unsupported device: %04x\n",
 			priv->rev);
-		goto fail;
+		return -ENXIO;
 	}
 
 	/* after reset, enable DDC: */
@@ -1545,6 +1687,15 @@
 	cec_write(priv, REG_CEC_FRO_IM_CLK_CTRL,
 			CEC_FRO_IM_CLK_CTRL_GHOST_DIS | CEC_FRO_IM_CLK_CTRL_IMCLK_SEL);
 
+	/* ensure interrupts are disabled */
+	cec_write(priv, REG_CEC_RXSHPDINTENA, 0);
+
+	/* clear pending interrupts */
+	cec_read(priv, REG_CEC_RXSHPDINT);
+	reg_read(priv, REG_INT_FLAGS_0);
+	reg_read(priv, REG_INT_FLAGS_1);
+	reg_read(priv, REG_INT_FLAGS_2);
+
 	/* initialize the optional IRQ */
 	if (client->irq) {
 		unsigned long irq_flags;
@@ -1552,13 +1703,11 @@
 		/* init read EDID waitqueue and HDP work */
 		init_waitqueue_head(&priv->wq_edid);
 
-		/* clear pending interrupts */
-		reg_read(priv, REG_INT_FLAGS_0);
-		reg_read(priv, REG_INT_FLAGS_1);
-		reg_read(priv, REG_INT_FLAGS_2);
-
 		irq_flags =
 			irqd_get_trigger_type(irq_get_irq_data(client->irq));
+
+		priv->cec_glue.irq_flags = irq_flags;
+
 		irq_flags |= IRQF_SHARED | IRQF_ONESHOT;
 		ret = request_threaded_irq(client->irq, NULL,
 					   tda998x_irq_thread, irq_flags,
@@ -1567,13 +1716,46 @@
 			dev_err(&client->dev,
 				"failed to request IRQ#%u: %d\n",
 				client->irq, ret);
-			goto fail;
+			goto err_irq;
 		}
 
 		/* enable HPD irq */
 		cec_write(priv, REG_CEC_RXSHPDINTENA, CEC_RXSHPDLEV_HPD);
 	}
 
+	priv->cec_notify = cec_notifier_get(&client->dev);
+	if (!priv->cec_notify) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	priv->cec_glue.parent = &client->dev;
+	priv->cec_glue.data = priv;
+	priv->cec_glue.init = tda998x_cec_hook_init;
+	priv->cec_glue.exit = tda998x_cec_hook_exit;
+	priv->cec_glue.open = tda998x_cec_hook_open;
+	priv->cec_glue.release = tda998x_cec_hook_release;
+
+	/*
+	 * Some TDA998x are actually two I2C devices merged onto one piece
+	 * of silicon: TDA9989 and TDA19989 combine the HDMI transmitter
+	 * with a slightly modified TDA9950 CEC device.  The CEC device
+	 * is at the TDA9950 address, with the address pins strapped across
+	 * to the TDA998x address pins.  Hence, it always has the same
+	 * offset.
+	 */
+	memset(&cec_info, 0, sizeof(cec_info));
+	strlcpy(cec_info.type, "tda9950", sizeof(cec_info.type));
+	cec_info.addr = priv->cec_addr;
+	cec_info.platform_data = &priv->cec_glue;
+	cec_info.irq = client->irq;
+
+	priv->cec = i2c_new_device(client->adapter, &cec_info);
+	if (!priv->cec) {
+		ret = -ENODEV;
+		goto fail;
+	}
+
 	/* enable EDID read irq: */
 	reg_set(priv, REG_INT_FLAGS_2, INT_FLAGS_2_EDID_BLK_RD);
 
@@ -1596,12 +1778,18 @@
 		tda998x_audio_codec_init(priv, &client->dev);
 
 	return 0;
+
 fail:
 	/* if encoder_init fails, the encoder slave is never registered,
 	 * so cleanup here:
 	 */
 	i2c_unregister_device(priv->cec);
-	return -ENXIO;
+	if (priv->cec_notify)
+		cec_notifier_put(priv->cec_notify);
+	if (client->irq)
+		free_irq(client->irq, priv);
+err_irq:
+	return ret;
 }
 
 static void tda998x_encoder_prepare(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 108d21f..9de8b1c 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -25,7 +25,8 @@
         select X86_MSR # used by igt/pm_rpm
         select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
         select DRM_DEBUG_MM if DRM=y
-	select DRM_DEBUG_MM_SELFTEST
+        select STACKDEPOT if DRM=y # for DRM_DEBUG_MM
+	select DRM_DEBUG_SELFTEST
 	select SW_SYNC # signaling validation framework (igt/syncobj*)
 	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
 	select DRM_I915_SELFTEST
@@ -89,6 +90,18 @@
 
           If in doubt, say "N".
 
+config DRM_I915_DEBUG_GUC
+        bool "Enable additional driver debugging for GuC"
+        depends on DRM_I915
+        default n
+        help
+          Choose this option to turn on extra driver debugging that may affect
+          performance but will help resolve GuC related issues.
+
+          Recommended for driver developers only.
+
+          If in doubt, say "N".
+
 config DRM_I915_SELFTEST
 	bool "Enable selftests upon driver load"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 4eee91a..4c6adae 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -12,12 +12,16 @@
 # Note the danger in using -Wall -Wextra is that when CI updates gcc we
 # will most likely get a sudden build breakage... Hopefully we will fix
 # new warnings before CI updates!
-subdir-ccflags-y := -Wall -Wextra
+subdir-ccflags-y := -Wall -Wextra -Wvla
 subdir-ccflags-y += $(call cc-disable-warning, unused-parameter)
 subdir-ccflags-y += $(call cc-disable-warning, type-limits)
 subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers)
 subdir-ccflags-y += $(call cc-disable-warning, implicit-fallthrough)
 subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable)
+# clang warnings
+subdir-ccflags-y += $(call cc-disable-warning, sign-compare)
+subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized)
+subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides)
 subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror
 
 # Fine grained warnings disable
@@ -43,7 +47,8 @@
 	  intel_csr.o \
 	  intel_device_info.o \
 	  intel_pm.o \
-	  intel_runtime_pm.o
+	  intel_runtime_pm.o \
+	  intel_workarounds.o
 
 i915-$(CONFIG_COMPAT)   += i915_ioc32.o
 i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
@@ -66,11 +71,11 @@
 	  i915_gem_shrinker.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
-	  i915_gem_timeline.o \
 	  i915_gem_userptr.o \
 	  i915_gemfs.o \
 	  i915_query.o \
 	  i915_request.o \
+	  i915_timeline.o \
 	  i915_trace_points.o \
 	  i915_vma.o \
 	  intel_breadcrumbs.o \
@@ -79,7 +84,8 @@
 	  intel_lrc.o \
 	  intel_mocs.o \
 	  intel_ringbuffer.o \
-	  intel_uncore.o
+	  intel_uncore.o \
+	  intel_wopcm.o
 
 # general-purpose microcontroller (GuC) support
 i915-y += intel_uc.o \
@@ -152,7 +158,8 @@
 i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
 i915-$(CONFIG_DRM_I915_SELFTEST) += \
 	selftests/i915_random.o \
-	selftests/i915_selftest.o
+	selftests/i915_selftest.o \
+	selftests/igt_flush_test.o
 
 # virtual gpu code
 i915-y += i915_vgpu.o
@@ -171,7 +178,8 @@
 	  i915_oa_glk.o \
 	  i915_oa_cflgt2.o \
 	  i915_oa_cflgt3.o \
-	  i915_oa_cnl.o
+	  i915_oa_cnl.o \
+	  i915_oa_icl.o
 
 ifeq ($(CONFIG_DRM_I915_GVT),y)
 i915-y += intel_gvt.o
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index d85939b..718ca08 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -813,15 +813,31 @@
 }
 
 static int force_nonpriv_reg_handler(struct parser_exec_state *s,
-				     unsigned int offset, unsigned int index)
+		unsigned int offset, unsigned int index, char *cmd)
 {
 	struct intel_gvt *gvt = s->vgpu->gvt;
-	unsigned int data = cmd_val(s, index + 1);
+	unsigned int data;
+	u32 ring_base;
+	u32 nopid;
+	struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
 
-	if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) {
+	if (!strcmp(cmd, "lri"))
+		data = cmd_val(s, index + 1);
+	else {
+		gvt_err("Unexpected forcenonpriv 0x%x write from cmd %s\n",
+			offset, cmd);
+		return -EINVAL;
+	}
+
+	ring_base = dev_priv->engine[s->ring_id]->mmio_base;
+	nopid = i915_mmio_reg_offset(RING_NOPID(ring_base));
+
+	if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data) &&
+			data != nopid) {
 		gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n",
 			offset, data);
-		return -EPERM;
+		patch_value(s, cmd_ptr(s, index), nopid);
+		return 0;
 	}
 	return 0;
 }
@@ -869,7 +885,7 @@
 		return -EINVAL;
 
 	if (is_force_nonpriv_mmio(offset) &&
-		force_nonpriv_reg_handler(s, offset, index))
+		force_nonpriv_reg_handler(s, offset, index, cmd))
 		return -EPERM;
 
 	if (offset == i915_mmio_reg_offset(DERRMR) ||
@@ -1604,7 +1620,8 @@
 	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
 		|| IS_KABYLAKE(gvt->dev_priv)) {
 		/* BDW decides privilege based on address space */
-		if (cmd_val(s, 0) & (1 << 8))
+		if (cmd_val(s, 0) & (1 << 8) &&
+			!(s->vgpu->scan_nonprivbb & (1 << s->ring_id)))
 			return 0;
 	}
 	return 1;
@@ -1618,6 +1635,8 @@
 	bool bb_end = false;
 	struct intel_vgpu *vgpu = s->vgpu;
 	u32 cmd;
+	struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
+		s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
 
 	*bb_size = 0;
 
@@ -1629,18 +1648,22 @@
 	cmd = cmd_val(s, 0);
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
-		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
-				cmd, get_opcode(cmd, s->ring_id));
+		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n",
+				cmd, get_opcode(cmd, s->ring_id),
+				(s->buf_addr_type == PPGTT_BUFFER) ?
+				"ppgtt" : "ggtt", s->ring_id, s->workload);
 		return -EBADRQC;
 	}
 	do {
-		if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
+		if (copy_gma_to_hva(s->vgpu, mm,
 				gma, gma + 4, &cmd) < 0)
 			return -EFAULT;
 		info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 		if (info == NULL) {
-			gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
-				cmd, get_opcode(cmd, s->ring_id));
+			gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n",
+				cmd, get_opcode(cmd, s->ring_id),
+				(s->buf_addr_type == PPGTT_BUFFER) ?
+				"ppgtt" : "ggtt", s->ring_id, s->workload);
 			return -EBADRQC;
 		}
 
@@ -1666,6 +1689,9 @@
 	unsigned long gma = 0;
 	unsigned long bb_size;
 	int ret = 0;
+	struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
+		s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
+	unsigned long gma_start_offset = 0;
 
 	/* get the start gm address of the batch buffer */
 	gma = get_gma_bb_from_cmd(s, 1);
@@ -1680,8 +1706,24 @@
 	if (!bb)
 		return -ENOMEM;
 
+	bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true;
+
+	/* the gma_start_offset stores the batch buffer's start gma's
+	 * offset relative to page boundary. so for non-privileged batch
+	 * buffer, the shadowed gem object holds exactly the same page
+	 * layout as original gem object. This is for the convience of
+	 * replacing the whole non-privilged batch buffer page to this
+	 * shadowed one in PPGTT at the same gma address. (this replacing
+	 * action is not implemented yet now, but may be necessary in
+	 * future).
+	 * for prileged batch buffer, we just change start gma address to
+	 * that of shadowed page.
+	 */
+	if (bb->ppgtt)
+		gma_start_offset = gma & ~I915_GTT_PAGE_MASK;
+
 	bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv,
-					 roundup(bb_size, PAGE_SIZE));
+			 roundup(bb_size + gma_start_offset, PAGE_SIZE));
 	if (IS_ERR(bb->obj)) {
 		ret = PTR_ERR(bb->obj);
 		goto err_free_bb;
@@ -1702,9 +1744,9 @@
 		bb->clflush &= ~CLFLUSH_BEFORE;
 	}
 
-	ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
+	ret = copy_gma_to_hva(s->vgpu, mm,
 			      gma, gma + bb_size,
-			      bb->va);
+			      bb->va + gma_start_offset);
 	if (ret < 0) {
 		gvt_vgpu_err("fail to copy guest ring buffer\n");
 		ret = -EFAULT;
@@ -1730,7 +1772,7 @@
 	 * buffer's gma in pair. After all, we don't want to pin the shadow
 	 * buffer here (too early).
 	 */
-	s->ip_va = bb->va;
+	s->ip_va = bb->va + gma_start_offset;
 	s->ip_gma = gma;
 	return 0;
 err_unmap:
@@ -2469,15 +2511,18 @@
 
 	info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id);
 	if (info == NULL) {
-		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n",
-				cmd, get_opcode(cmd, s->ring_id));
+		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n",
+				cmd, get_opcode(cmd, s->ring_id),
+				(s->buf_addr_type == PPGTT_BUFFER) ?
+				"ppgtt" : "ggtt", s->ring_id, s->workload);
 		return -EBADRQC;
 	}
 
 	s->info = info;
 
 	trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va,
-			  cmd_length(s), s->buf_type);
+			  cmd_length(s), s->buf_type, s->buf_addr_type,
+			  s->workload, info->name);
 
 	if (info->handler) {
 		ret = info->handler(s);
diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c
index 32a66df..2ec89bc 100644
--- a/drivers/gpu/drm/i915/gvt/debugfs.c
+++ b/drivers/gpu/drm/i915/gvt/debugfs.c
@@ -122,18 +122,69 @@
 	seq_printf(s, "Total: %d, Diff: %d\n", param.total, param.diff);
 	return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(vgpu_mmio_diff);
 
-static int vgpu_mmio_diff_open(struct inode *inode, struct file *file)
+static int
+vgpu_scan_nonprivbb_get(void *data, u64 *val)
 {
-	return single_open(file, vgpu_mmio_diff_show, inode->i_private);
+	struct intel_vgpu *vgpu = (struct intel_vgpu *)data;
+	*val = vgpu->scan_nonprivbb;
+	return 0;
 }
 
-static const struct file_operations vgpu_mmio_diff_fops = {
-	.open		= vgpu_mmio_diff_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+/*
+ * set/unset bit engine_id of vgpu->scan_nonprivbb to turn on/off scanning
+ * of non-privileged batch buffer. e.g.
+ * if vgpu->scan_nonprivbb=3, then it will scan non-privileged batch buffer
+ * on engine 0 and 1.
+ */
+static int
+vgpu_scan_nonprivbb_set(void *data, u64 val)
+{
+	struct intel_vgpu *vgpu = (struct intel_vgpu *)data;
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+	enum intel_engine_id id;
+	char buf[128], *s;
+	int len;
+
+	val &= (1 << I915_NUM_ENGINES) - 1;
+
+	if (vgpu->scan_nonprivbb == val)
+		return 0;
+
+	if (!val)
+		goto done;
+
+	len = sprintf(buf,
+		"gvt: vgpu %d turns on non-privileged batch buffers scanning on Engines:",
+		vgpu->id);
+
+	s = buf + len;
+
+	for (id = 0; id < I915_NUM_ENGINES; id++) {
+		struct intel_engine_cs *engine;
+
+		engine = dev_priv->engine[id];
+		if (engine && (val & (1 << id))) {
+			len = snprintf(s, 4, "%d, ", engine->id);
+			s += len;
+		} else
+			val &=  ~(1 << id);
+	}
+
+	if (val)
+		sprintf(s, "low performance expected.");
+
+	pr_warn("%s\n", buf);
+
+done:
+	vgpu->scan_nonprivbb = val;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops,
+			vgpu_scan_nonprivbb_get, vgpu_scan_nonprivbb_set,
+			"0x%llx\n");
 
 /**
  * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU
@@ -162,6 +213,11 @@
 	if (!ent)
 		return -ENOMEM;
 
+	ent = debugfs_create_file("scan_nonprivbb", 0644, vgpu->debugfs,
+				 vgpu, &vgpu_scan_nonprivbb_fops);
+	if (!ent)
+		return -ENOMEM;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index efacd8a..05d15a0 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -99,7 +99,6 @@
 struct intel_vgpu_mmio {
 	void *vreg;
 	void *sreg;
-	bool disable_warn_untrack;
 };
 
 #define INTEL_GVT_MAX_BAR_NUM 4
@@ -226,6 +225,7 @@
 
 	struct completion vblank_done;
 
+	u32 scan_nonprivbb;
 };
 
 /* validating GM healthy status*/
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index a33c1c3e..4b6532f 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -191,6 +191,8 @@
 	unsigned int max_fence = vgpu_fence_sz(vgpu);
 
 	if (fence_num >= max_fence) {
+		gvt_vgpu_err("access oob fence reg %d/%d\n",
+			     fence_num, max_fence);
 
 		/* When guest access oob fence regs without access
 		 * pv_info first, we treat guest not supporting GVT,
@@ -200,11 +202,6 @@
 			enter_failsafe_mode(vgpu,
 					GVT_FAILSAFE_UNSUPPORTED_GUEST);
 
-		if (!vgpu->mmio.disable_warn_untrack) {
-			gvt_vgpu_err("found oob fence register access\n");
-			gvt_vgpu_err("total fence %d, access fence %d\n",
-				     max_fence, fence_num);
-		}
 		memset(p_data, 0, bytes);
 		return -EINVAL;
 	}
@@ -477,22 +474,28 @@
 	unsigned int offset, void *p_data, unsigned int bytes)
 {
 	u32 reg_nonpriv = *(u32 *)p_data;
+	int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset);
+	u32 ring_base;
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	int ret = -EINVAL;
 
-	if ((bytes != 4) || ((offset & (bytes - 1)) != 0)) {
-		gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n",
-			vgpu->id, offset, bytes);
+	if ((bytes != 4) || ((offset & (bytes - 1)) != 0) || ring_id < 0) {
+		gvt_err("vgpu(%d) ring %d Invalid FORCE_NONPRIV offset %x(%dB)\n",
+			vgpu->id, ring_id, offset, bytes);
 		return ret;
 	}
 
-	if (in_whitelist(reg_nonpriv)) {
+	ring_base = dev_priv->engine[ring_id]->mmio_base;
+
+	if (in_whitelist(reg_nonpriv) ||
+		reg_nonpriv == i915_mmio_reg_offset(RING_NOPID(ring_base))) {
 		ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data,
 			bytes);
-	} else {
-		gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x\n",
-			vgpu->id, reg_nonpriv);
-	}
-	return ret;
+	} else
+		gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x at offset %x\n",
+			vgpu->id, reg_nonpriv, offset);
+
+	return 0;
 }
 
 static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
@@ -3092,9 +3095,7 @@
 	 */
 	mmio_info = find_mmio_info(gvt, offset);
 	if (!mmio_info) {
-		if (!vgpu->mmio.disable_warn_untrack)
-			gvt_vgpu_err("untracked MMIO %08x len %d\n",
-				     offset, bytes);
+		gvt_dbg_mmio("untracked MMIO %08x len %d\n", offset, bytes);
 		goto default_rw;
 	}
 
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index 11b71b3..e4960af 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -244,8 +244,6 @@
 
 		/* set the bit 0:2(Core C-State ) to C0 */
 		vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0;
-
-		vgpu->mmio.disable_warn_untrack = false;
 	} else {
 #define GVT_GEN8_MMIO_RESET_OFFSET		(0x44200)
 		/* only reset the engine related, so starting with 0x44200
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index a5bac83..0f94955 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -448,7 +448,7 @@
 
 bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id)
 {
-	u32 *reg_state = ctx->engine[ring_id].lrc_reg_state;
+	u32 *reg_state = ctx->__engine[ring_id].lrc_reg_state;
 	u32 inhibit_mask =
 		_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index 75b7bc7..d053cbe 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -53,7 +53,6 @@
 	bool active;
 
 	ktime_t sched_in_time;
-	ktime_t sched_out_time;
 	ktime_t sched_time;
 	ktime_t left_ts;
 	ktime_t allocated_ts;
@@ -66,17 +65,22 @@
 	struct hrtimer timer;
 	unsigned long period;
 	struct list_head lru_runq_head;
+	ktime_t expire_time;
 };
 
-static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu)
+static void vgpu_update_timeslice(struct intel_vgpu *vgpu, ktime_t cur_time)
 {
 	ktime_t delta_ts;
-	struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data;
+	struct vgpu_sched_data *vgpu_data;
 
-	delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time;
+	if (!vgpu || vgpu == vgpu->gvt->idle_vgpu)
+		return;
 
-	vgpu_data->sched_time += delta_ts;
-	vgpu_data->left_ts -= delta_ts;
+	vgpu_data = vgpu->sched_data;
+	delta_ts = ktime_sub(cur_time, vgpu_data->sched_in_time);
+	vgpu_data->sched_time = ktime_add(vgpu_data->sched_time, delta_ts);
+	vgpu_data->left_ts = ktime_sub(vgpu_data->left_ts, delta_ts);
+	vgpu_data->sched_in_time = cur_time;
 }
 
 #define GVT_TS_BALANCE_PERIOD_MS 100
@@ -150,11 +154,7 @@
 	}
 
 	cur_time = ktime_get();
-	if (scheduler->current_vgpu) {
-		vgpu_data = scheduler->current_vgpu->sched_data;
-		vgpu_data->sched_out_time = cur_time;
-		vgpu_update_timeslice(scheduler->current_vgpu);
-	}
+	vgpu_update_timeslice(scheduler->current_vgpu, cur_time);
 	vgpu_data = scheduler->next_vgpu->sched_data;
 	vgpu_data->sched_in_time = cur_time;
 
@@ -226,17 +226,22 @@
 void intel_gvt_schedule(struct intel_gvt *gvt)
 {
 	struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
-	static uint64_t timer_check;
+	ktime_t cur_time;
 
 	mutex_lock(&gvt->lock);
+	cur_time = ktime_get();
 
 	if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED,
 				(void *)&gvt->service_request)) {
-		if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS))
+		if (cur_time >= sched_data->expire_time) {
 			gvt_balance_timeslice(sched_data);
+			sched_data->expire_time = ktime_add_ms(
+				cur_time, GVT_TS_BALANCE_PERIOD_MS);
+		}
 	}
 	clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request);
 
+	vgpu_update_timeslice(gvt->scheduler.current_vgpu, cur_time);
 	tbs_sched_func(sched_data);
 
 	mutex_unlock(&gvt->lock);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 638abe8..c2d183b 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -58,7 +58,7 @@
 	int ring_id = workload->ring_id;
 	struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
 	struct drm_i915_gem_object *ctx_obj =
-		shadow_ctx->engine[ring_id].state->obj;
+		shadow_ctx->__engine[ring_id].state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 
@@ -97,7 +97,7 @@
 		i915_mmio_reg_offset(EU_PERF_CNTL6),
 	};
 
-	if (!workload || !reg_state || workload->ring_id != RCS)
+	if (workload->ring_id != RCS)
 		return;
 
 	if (save) {
@@ -130,7 +130,7 @@
 	int ring_id = workload->ring_id;
 	struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
 	struct drm_i915_gem_object *ctx_obj =
-		shadow_ctx->engine[ring_id].state->obj;
+		shadow_ctx->__engine[ring_id].state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 	void *dst;
@@ -283,7 +283,7 @@
 static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
 		struct intel_engine_cs *engine)
 {
-	struct intel_context *ce = &ctx->engine[engine->id];
+	struct intel_context *ce = to_intel_context(ctx, engine);
 	u64 desc = 0;
 
 	desc = ce->lrc_desc;
@@ -389,7 +389,7 @@
 	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
 	 * the guest context, gvt can unpin the shadow_ctx safely.
 	 */
-	ring = engine->context_pin(engine, shadow_ctx);
+	ring = intel_context_pin(shadow_ctx, engine);
 	if (IS_ERR(ring)) {
 		ret = PTR_ERR(ring);
 		gvt_vgpu_err("fail to pin shadow context\n");
@@ -403,7 +403,7 @@
 	return 0;
 
 err_unpin:
-	engine->context_unpin(engine, shadow_ctx);
+	intel_context_unpin(shadow_ctx, engine);
 err_shadow:
 	release_shadow_wa_ctx(&workload->wa_ctx);
 err_scan:
@@ -437,7 +437,7 @@
 	return 0;
 
 err_unpin:
-	engine->context_unpin(engine, shadow_ctx);
+	intel_context_unpin(shadow_ctx, engine);
 	release_shadow_wa_ctx(&workload->wa_ctx);
 	return ret;
 }
@@ -452,12 +452,6 @@
 	int ret;
 
 	list_for_each_entry(bb, &workload->shadow_bb, list) {
-		bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0);
-		if (IS_ERR(bb->vma)) {
-			ret = PTR_ERR(bb->vma);
-			goto err;
-		}
-
 		/* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va
 		 * is only updated into ring_scan_buffer, not real ring address
 		 * allocated in later copy_workload_to_ring_buffer. pls be noted
@@ -469,25 +463,53 @@
 			bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
 				+ bb->bb_offset;
 
-		/* relocate shadow batch buffer */
-		bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
-		if (gmadr_bytes == 8)
-			bb->bb_start_cmd_va[2] = 0;
+		if (bb->ppgtt) {
+			/* for non-priv bb, scan&shadow is only for
+			 * debugging purpose, so the content of shadow bb
+			 * is the same as original bb. Therefore,
+			 * here, rather than switch to shadow bb's gma
+			 * address, we directly use original batch buffer's
+			 * gma address, and send original bb to hardware
+			 * directly
+			 */
+			if (bb->clflush & CLFLUSH_AFTER) {
+				drm_clflush_virt_range(bb->va,
+						bb->obj->base.size);
+				bb->clflush &= ~CLFLUSH_AFTER;
+			}
+			i915_gem_obj_finish_shmem_access(bb->obj);
+			bb->accessing = false;
 
-		/* No one is going to touch shadow bb from now on. */
-		if (bb->clflush & CLFLUSH_AFTER) {
-			drm_clflush_virt_range(bb->va, bb->obj->base.size);
-			bb->clflush &= ~CLFLUSH_AFTER;
+		} else {
+			bb->vma = i915_gem_object_ggtt_pin(bb->obj,
+					NULL, 0, 0, 0);
+			if (IS_ERR(bb->vma)) {
+				ret = PTR_ERR(bb->vma);
+				goto err;
+			}
+
+			/* relocate shadow batch buffer */
+			bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
+			if (gmadr_bytes == 8)
+				bb->bb_start_cmd_va[2] = 0;
+
+			/* No one is going to touch shadow bb from now on. */
+			if (bb->clflush & CLFLUSH_AFTER) {
+				drm_clflush_virt_range(bb->va,
+						bb->obj->base.size);
+				bb->clflush &= ~CLFLUSH_AFTER;
+			}
+
+			ret = i915_gem_object_set_to_gtt_domain(bb->obj,
+					false);
+			if (ret)
+				goto err;
+
+			i915_gem_obj_finish_shmem_access(bb->obj);
+			bb->accessing = false;
+
+			i915_vma_move_to_active(bb->vma, workload->req, 0);
 		}
-
-		ret = i915_gem_object_set_to_gtt_domain(bb->obj, false);
-		if (ret)
-			goto err;
-
-		i915_gem_obj_finish_shmem_access(bb->obj);
-		bb->accessing = false;
-
-		i915_vma_move_to_active(bb->vma, workload->req, 0);
 	}
 	return 0;
 err:
@@ -504,7 +526,7 @@
 	struct intel_vgpu_submission *s = &workload->vgpu->submission;
 	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	struct drm_i915_gem_object *ctx_obj =
-		shadow_ctx->engine[ring_id].state->obj;
+		shadow_ctx->__engine[ring_id].state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 
@@ -666,7 +688,7 @@
 
 	ret = prepare_workload(workload);
 	if (ret) {
-		engine->context_unpin(engine, shadow_ctx);
+		intel_context_unpin(shadow_ctx, engine);
 		goto out;
 	}
 
@@ -749,7 +771,7 @@
 	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
 	int ring_id = workload->ring_id;
 	struct drm_i915_gem_object *ctx_obj =
-		shadow_ctx->engine[ring_id].state->obj;
+		shadow_ctx->__engine[ring_id].state->obj;
 	struct execlist_ring_context *shadow_ring_context;
 	struct page *page;
 	void *src;
@@ -876,7 +898,7 @@
 		}
 		mutex_lock(&dev_priv->drm.struct_mutex);
 		/* unpin shadow ctx as the shadow_ctx update is done */
-		engine->context_unpin(engine, s->shadow_ctx);
+		intel_context_unpin(s->shadow_ctx, engine);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
 
@@ -1134,9 +1156,6 @@
 	if (IS_ERR(s->shadow_ctx))
 		return PTR_ERR(s->shadow_ctx);
 
-	if (HAS_LOGICAL_RING_PREEMPTION(vgpu->gvt->dev_priv))
-		s->shadow_ctx->priority = INT_MAX;
-
 	bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
 
 	s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload",
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index 486ed57..6c64478 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -125,6 +125,7 @@
 	unsigned int clflush;
 	bool accessing;
 	unsigned long bb_offset;
+	bool ppgtt;
 };
 
 #define workload_q_head(vgpu, ring_id) \
diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h
index 82093f1..1fd6420 100644
--- a/drivers/gpu/drm/i915/gvt/trace.h
+++ b/drivers/gpu/drm/i915/gvt/trace.h
@@ -224,19 +224,25 @@
 	TP_printk("%s", __entry->buf)
 );
 
+#define GVT_CMD_STR_LEN 40
 TRACE_EVENT(gvt_command,
-	TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len,
-		 u32 buf_type),
+	TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va,
+		u32 cmd_len,  u32 buf_type, u32 buf_addr_type,
+		void *workload, char *cmd_name),
 
-	TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type),
+	TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type,
+		buf_addr_type, workload, cmd_name),
 
 	TP_STRUCT__entry(
 		__field(u8, vgpu_id)
 		__field(u8, ring_id)
 		__field(u32, ip_gma)
 		__field(u32, buf_type)
+		__field(u32, buf_addr_type)
 		__field(u32, cmd_len)
+		__field(void*, workload)
 		__dynamic_array(u32, raw_cmd, cmd_len)
+		__array(char, cmd_name, GVT_CMD_STR_LEN)
 	),
 
 	TP_fast_assign(
@@ -244,17 +250,25 @@
 		__entry->ring_id = ring_id;
 		__entry->ip_gma = ip_gma;
 		__entry->buf_type = buf_type;
+		__entry->buf_addr_type = buf_addr_type;
 		__entry->cmd_len = cmd_len;
+		__entry->workload = workload;
+		snprintf(__entry->cmd_name, GVT_CMD_STR_LEN, "%s", cmd_name);
 		memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va));
 	),
 
 
-	TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s",
+	TP_printk("vgpu%d ring %d: address_type %u, buf_type %u, ip_gma %08x,cmd (name=%s,len=%u,raw cmd=%s), workload=%p\n",
 		__entry->vgpu_id,
 		__entry->ring_id,
+		__entry->buf_addr_type,
 		__entry->buf_type,
 		__entry->ip_gma,
-		__print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4))
+		__entry->cmd_name,
+		__entry->cmd_len,
+		__print_array(__get_dynamic_array(raw_cmd),
+			__entry->cmd_len, 4),
+		__entry->workload)
 );
 
 #define GVT_TEMP_STR_LEN 10
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 89f7ff2..13e7b9e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -377,16 +377,19 @@
 	print_file_stats(m, "[k]batch pool", stats);
 }
 
-static int per_file_ctx_stats(int id, void *ptr, void *data)
+static int per_file_ctx_stats(int idx, void *ptr, void *data)
 {
 	struct i915_gem_context *ctx = ptr;
-	int n;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 
-	for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) {
-		if (ctx->engine[n].state)
-			per_file_stats(0, ctx->engine[n].state->obj, data);
-		if (ctx->engine[n].ring)
-			per_file_stats(0, ctx->engine[n].ring->vma->obj, data);
+	for_each_engine(engine, ctx->i915, id) {
+		struct intel_context *ce = to_intel_context(ctx, engine);
+
+		if (ce->state)
+			per_file_stats(0, ce->state->obj, data);
+		if (ce->ring)
+			per_file_stats(0, ce->ring->vma->obj, data);
 	}
 
 	return 0;
@@ -1215,20 +1218,20 @@
 		max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 :
 			    rp_state_cap >> 16) & 0xff;
 		max_freq *= (IS_GEN9_BC(dev_priv) ||
-			     IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1);
+			     INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1);
 		seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
 			   intel_gpu_freq(dev_priv, max_freq));
 
 		max_freq = (rp_state_cap & 0xff00) >> 8;
 		max_freq *= (IS_GEN9_BC(dev_priv) ||
-			     IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1);
+			     INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1);
 		seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
 			   intel_gpu_freq(dev_priv, max_freq));
 
 		max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 :
 			    rp_state_cap >> 0) & 0xff;
 		max_freq *= (IS_GEN9_BC(dev_priv) ||
-			     IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1);
+			     INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1);
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
 			   intel_gpu_freq(dev_priv, max_freq));
 		seq_printf(m, "Max overclocked frequency: %dMHz\n",
@@ -1340,10 +1343,9 @@
 		struct rb_node *rb;
 
 		seq_printf(m, "%s:\n", engine->name);
-		seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n",
+		seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
 			   engine->hangcheck.seqno, seqno[id],
-			   intel_engine_last_submit(engine),
-			   engine->timeline->inflight_seqnos);
+			   intel_engine_last_submit(engine));
 		seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n",
 			   yesno(intel_engine_has_waiter(engine)),
 			   yesno(test_bit(engine->id,
@@ -1796,9 +1798,9 @@
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	int ret = 0;
-	int gpu_freq, ia_freq;
 	unsigned int max_gpu_freq, min_gpu_freq;
+	int gpu_freq, ia_freq;
+	int ret;
 
 	if (!HAS_LLC(dev_priv))
 		return -ENODEV;
@@ -1809,13 +1811,12 @@
 	if (ret)
 		goto out;
 
-	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
+	min_gpu_freq = rps->min_freq;
+	max_gpu_freq = rps->max_freq;
+	if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
 		/* Convert GT frequency to 50 HZ units */
-		min_gpu_freq = rps->min_freq_softlimit / GEN9_FREQ_SCALER;
-		max_gpu_freq = rps->max_freq_softlimit / GEN9_FREQ_SCALER;
-	} else {
-		min_gpu_freq = rps->min_freq_softlimit;
-		max_gpu_freq = rps->max_freq_softlimit;
+		min_gpu_freq /= GEN9_FREQ_SCALER;
+		max_gpu_freq /= GEN9_FREQ_SCALER;
 	}
 
 	seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
@@ -1828,7 +1829,7 @@
 		seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
 			   intel_gpu_freq(dev_priv, (gpu_freq *
 						     (IS_GEN9_BC(dev_priv) ||
-						      IS_CANNONLAKE(dev_priv) ?
+						      INTEL_GEN(dev_priv) >= 10 ?
 						      GEN9_FREQ_SCALER : 1))),
 			   ((ia_freq >> 0) & 0xff) * 100,
 			   ((ia_freq >> 8) & 0xff) * 100);
@@ -1923,8 +1924,8 @@
 
 static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
 {
-	seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u)",
-		   ring->space, ring->head, ring->tail);
+	seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, emit: %u)",
+		   ring->space, ring->head, ring->tail, ring->emit);
 }
 
 static int i915_context_status(struct seq_file *m, void *unused)
@@ -1961,7 +1962,8 @@
 		seq_putc(m, '\n');
 
 		for_each_engine(engine, dev_priv, id) {
-			struct intel_context *ce = &ctx->engine[engine->id];
+			struct intel_context *ce =
+				to_intel_context(ctx, engine);
 
 			seq_printf(m, "%s: ", engine->name);
 			if (ce->state)
@@ -2326,30 +2328,45 @@
 	return 0;
 }
 
+static const char *
+stringify_guc_log_type(enum guc_log_buffer_type type)
+{
+	switch (type) {
+	case GUC_ISR_LOG_BUFFER:
+		return "ISR";
+	case GUC_DPC_LOG_BUFFER:
+		return "DPC";
+	case GUC_CRASH_DUMP_LOG_BUFFER:
+		return "CRASH";
+	default:
+		MISSING_CASE(type);
+	}
+
+	return "";
+}
+
 static void i915_guc_log_info(struct seq_file *m,
 			      struct drm_i915_private *dev_priv)
 {
-	struct intel_guc *guc = &dev_priv->guc;
+	struct intel_guc_log *log = &dev_priv->guc.log;
+	enum guc_log_buffer_type type;
 
-	seq_puts(m, "\nGuC logging stats:\n");
+	if (!intel_guc_log_relay_enabled(log)) {
+		seq_puts(m, "GuC log relay disabled\n");
+		return;
+	}
 
-	seq_printf(m, "\tISR:   flush count %10u, overflow count %10u\n",
-		   guc->log.flush_count[GUC_ISR_LOG_BUFFER],
-		   guc->log.total_overflow_count[GUC_ISR_LOG_BUFFER]);
+	seq_puts(m, "GuC logging stats:\n");
 
-	seq_printf(m, "\tDPC:   flush count %10u, overflow count %10u\n",
-		   guc->log.flush_count[GUC_DPC_LOG_BUFFER],
-		   guc->log.total_overflow_count[GUC_DPC_LOG_BUFFER]);
+	seq_printf(m, "\tRelay full count: %u\n",
+		   log->relay.full_count);
 
-	seq_printf(m, "\tCRASH: flush count %10u, overflow count %10u\n",
-		   guc->log.flush_count[GUC_CRASH_DUMP_LOG_BUFFER],
-		   guc->log.total_overflow_count[GUC_CRASH_DUMP_LOG_BUFFER]);
-
-	seq_printf(m, "\tTotal flush interrupt count: %u\n",
-		   guc->log.flush_interrupt_count);
-
-	seq_printf(m, "\tCapture miss count: %u\n",
-		   guc->log.capture_miss_count);
+	for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) {
+		seq_printf(m, "\t%s:\tflush count %10u, overflow count %10u\n",
+			   stringify_guc_log_type(type),
+			   log->stats[type].flush,
+			   log->stats[type].sampled_overflow);
+	}
 }
 
 static void i915_guc_client_info(struct seq_file *m,
@@ -2379,14 +2396,19 @@
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	const struct intel_guc *guc = &dev_priv->guc;
 
-	if (!USES_GUC_SUBMISSION(dev_priv))
+	if (!USES_GUC(dev_priv))
 		return -ENODEV;
 
+	i915_guc_log_info(m, dev_priv);
+
+	if (!USES_GUC_SUBMISSION(dev_priv))
+		return 0;
+
 	GEM_BUG_ON(!guc->execbuf_client);
 
-	seq_printf(m, "Doorbell map:\n");
+	seq_printf(m, "\nDoorbell map:\n");
 	seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
-	seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline);
+	seq_printf(m, "Doorbell next cacheline: 0x%x\n", guc->db_cacheline);
 
 	seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client);
 	i915_guc_client_info(m, dev_priv, guc->execbuf_client);
@@ -2396,8 +2418,6 @@
 		i915_guc_client_info(m, dev_priv, guc->preempt_client);
 	}
 
-	i915_guc_log_info(m, dev_priv);
-
 	/* Add more as required ... */
 
 	return 0;
@@ -2496,35 +2516,73 @@
 	return 0;
 }
 
-static int i915_guc_log_control_get(void *data, u64 *val)
+static int i915_guc_log_level_get(void *data, u64 *val)
 {
 	struct drm_i915_private *dev_priv = data;
 
-	if (!HAS_GUC(dev_priv))
+	if (!USES_GUC(dev_priv))
 		return -ENODEV;
 
-	if (!dev_priv->guc.log.vma)
-		return -EINVAL;
-
-	*val = i915_modparams.guc_log_level;
+	*val = intel_guc_log_level_get(&dev_priv->guc.log);
 
 	return 0;
 }
 
-static int i915_guc_log_control_set(void *data, u64 val)
+static int i915_guc_log_level_set(void *data, u64 val)
 {
 	struct drm_i915_private *dev_priv = data;
 
-	if (!HAS_GUC(dev_priv))
+	if (!USES_GUC(dev_priv))
 		return -ENODEV;
 
-	return intel_guc_log_control(&dev_priv->guc, val);
+	return intel_guc_log_level_set(&dev_priv->guc.log, val);
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_control_fops,
-			i915_guc_log_control_get, i915_guc_log_control_set,
+DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_level_fops,
+			i915_guc_log_level_get, i915_guc_log_level_set,
 			"%lld\n");
 
+static int i915_guc_log_relay_open(struct inode *inode, struct file *file)
+{
+	struct drm_i915_private *dev_priv = inode->i_private;
+
+	if (!USES_GUC(dev_priv))
+		return -ENODEV;
+
+	file->private_data = &dev_priv->guc.log;
+
+	return intel_guc_log_relay_open(&dev_priv->guc.log);
+}
+
+static ssize_t
+i915_guc_log_relay_write(struct file *filp,
+			 const char __user *ubuf,
+			 size_t cnt,
+			 loff_t *ppos)
+{
+	struct intel_guc_log *log = filp->private_data;
+
+	intel_guc_log_relay_flush(log);
+
+	return cnt;
+}
+
+static int i915_guc_log_relay_release(struct inode *inode, struct file *file)
+{
+	struct drm_i915_private *dev_priv = inode->i_private;
+
+	intel_guc_log_relay_close(&dev_priv->guc.log);
+
+	return 0;
+}
+
+static const struct file_operations i915_guc_log_relay_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_guc_log_relay_open,
+	.write = i915_guc_log_relay_write,
+	.release = i915_guc_log_relay_release,
+};
+
 static const char *psr2_live_status(u32 val)
 {
 	static const char * const live_status[] = {
@@ -2548,6 +2606,26 @@
 	return "unknown";
 }
 
+static const char *psr_sink_status(u8 val)
+{
+	static const char * const sink_status[] = {
+		"inactive",
+		"transition to active, capture and display",
+		"active, display from RFB",
+		"active, capture and display on sink device timings",
+		"transition to inactive, capture and display, timing re-sync",
+		"reserved",
+		"reserved",
+		"sink internal error"
+	};
+
+	val &= DP_PSR_SINK_STATE_MASK;
+	if (val < ARRAY_SIZE(sink_status))
+		return sink_status[val];
+
+	return "unknown";
+}
+
 static int i915_edp_psr_status(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
@@ -2569,14 +2647,13 @@
 
 	mutex_lock(&dev_priv->psr.lock);
 	seq_printf(m, "Enabled: %s\n", yesno((bool)dev_priv->psr.enabled));
-	seq_printf(m, "Active: %s\n", yesno(dev_priv->psr.active));
 	seq_printf(m, "Busy frontbuffer bits: 0x%03x\n",
 		   dev_priv->psr.busy_frontbuffer_bits);
 	seq_printf(m, "Re-enable work scheduled: %s\n",
 		   yesno(work_busy(&dev_priv->psr.work.work)));
 
 	if (HAS_DDI(dev_priv)) {
-		if (dev_priv->psr.psr2_support)
+		if (dev_priv->psr.psr2_enabled)
 			enabled = I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE;
 		else
 			enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE;
@@ -2624,18 +2701,67 @@
 
 		seq_printf(m, "Performance_Counter: %u\n", psrperf);
 	}
-	if (dev_priv->psr.psr2_support) {
+	if (dev_priv->psr.psr2_enabled) {
 		u32 psr2 = I915_READ(EDP_PSR2_STATUS);
 
 		seq_printf(m, "EDP_PSR2_STATUS: %x [%s]\n",
 			   psr2, psr2_live_status(psr2));
 	}
+
+	if (dev_priv->psr.enabled) {
+		struct drm_dp_aux *aux = &dev_priv->psr.enabled->aux;
+		u8 val;
+
+		if (drm_dp_dpcd_readb(aux, DP_PSR_STATUS, &val) == 1)
+			seq_printf(m, "Sink PSR status: 0x%x [%s]\n", val,
+				   psr_sink_status(val));
+	}
 	mutex_unlock(&dev_priv->psr.lock);
 
+	if (READ_ONCE(dev_priv->psr.debug)) {
+		seq_printf(m, "Last attempted entry at: %lld\n",
+			   dev_priv->psr.last_entry_attempt);
+		seq_printf(m, "Last exit at: %lld\n",
+			   dev_priv->psr.last_exit);
+	}
+
 	intel_runtime_pm_put(dev_priv);
 	return 0;
 }
 
+static int
+i915_edp_psr_debug_set(void *data, u64 val)
+{
+	struct drm_i915_private *dev_priv = data;
+
+	if (!CAN_PSR(dev_priv))
+		return -ENODEV;
+
+	DRM_DEBUG_KMS("PSR debug %s\n", enableddisabled(val));
+
+	intel_runtime_pm_get(dev_priv);
+	intel_psr_irq_control(dev_priv, !!val);
+	intel_runtime_pm_put(dev_priv);
+
+	return 0;
+}
+
+static int
+i915_edp_psr_debug_get(void *data, u64 *val)
+{
+	struct drm_i915_private *dev_priv = data;
+
+	if (!CAN_PSR(dev_priv))
+		return -ENODEV;
+
+	*val = READ_ONCE(dev_priv->psr.debug);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_edp_psr_debug_fops,
+			i915_edp_psr_debug_get, i915_edp_psr_debug_set,
+			"%llu\n");
+
 static int i915_sink_crc(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
@@ -3231,7 +3357,8 @@
 	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
 		struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i];
 
-		seq_printf(m, "DPLL%i: %s, id: %i\n", i, pll->name, pll->id);
+		seq_printf(m, "DPLL%i: %s, id: %i\n", i, pll->info->name,
+			   pll->info->id);
 		seq_printf(m, " crtc_mask: 0x%08x, active: 0x%x, on: %s\n",
 			   pll->state.crtc_mask, pll->active_mask, yesno(pll->on));
 		seq_printf(m, " tracked hardware state:\n");
@@ -3241,6 +3368,28 @@
 		seq_printf(m, " fp0:     0x%08x\n", pll->state.hw_state.fp0);
 		seq_printf(m, " fp1:     0x%08x\n", pll->state.hw_state.fp1);
 		seq_printf(m, " wrpll:   0x%08x\n", pll->state.hw_state.wrpll);
+		seq_printf(m, " cfgcr0:  0x%08x\n", pll->state.hw_state.cfgcr0);
+		seq_printf(m, " cfgcr1:  0x%08x\n", pll->state.hw_state.cfgcr1);
+		seq_printf(m, " mg_refclkin_ctl:        0x%08x\n",
+			   pll->state.hw_state.mg_refclkin_ctl);
+		seq_printf(m, " mg_clktop2_coreclkctl1: 0x%08x\n",
+			   pll->state.hw_state.mg_clktop2_coreclkctl1);
+		seq_printf(m, " mg_clktop2_hsclkctl:    0x%08x\n",
+			   pll->state.hw_state.mg_clktop2_hsclkctl);
+		seq_printf(m, " mg_pll_div0:  0x%08x\n",
+			   pll->state.hw_state.mg_pll_div0);
+		seq_printf(m, " mg_pll_div1:  0x%08x\n",
+			   pll->state.hw_state.mg_pll_div1);
+		seq_printf(m, " mg_pll_lf:    0x%08x\n",
+			   pll->state.hw_state.mg_pll_lf);
+		seq_printf(m, " mg_pll_frac_lock: 0x%08x\n",
+			   pll->state.hw_state.mg_pll_frac_lock);
+		seq_printf(m, " mg_pll_ssc:   0x%08x\n",
+			   pll->state.hw_state.mg_pll_ssc);
+		seq_printf(m, " mg_pll_bias:  0x%08x\n",
+			   pll->state.hw_state.mg_pll_bias);
+		seq_printf(m, " mg_pll_tdc_coldst_bias: 0x%08x\n",
+			   pll->state.hw_state.mg_pll_tdc_coldst_bias);
 	}
 	drm_modeset_unlock_all(dev);
 
@@ -3249,24 +3398,13 @@
 
 static int i915_wa_registers(struct seq_file *m, void *unused)
 {
-	int i;
-	int ret;
-	struct intel_engine_cs *engine;
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_device *dev = &dev_priv->drm;
 	struct i915_workarounds *workarounds = &dev_priv->workarounds;
-	enum intel_engine_id id;
-
-	ret = mutex_lock_interruptible(&dev->struct_mutex);
-	if (ret)
-		return ret;
+	int i;
 
 	intel_runtime_pm_get(dev_priv);
 
 	seq_printf(m, "Workarounds applied: %d\n", workarounds->count);
-	for_each_engine(engine, dev_priv, id)
-		seq_printf(m, "HW whitelist count for %s: %d\n",
-			   engine->name, workarounds->hw_whitelist_count[id]);
 	for (i = 0; i < workarounds->count; ++i) {
 		i915_reg_t addr;
 		u32 mask, value, read;
@@ -3282,7 +3420,6 @@
 	}
 
 	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
@@ -3567,7 +3704,8 @@
 
 static int i915_displayport_test_active_show(struct seq_file *m, void *data)
 {
-	struct drm_device *dev = m->private;
+	struct drm_i915_private *dev_priv = m->private;
+	struct drm_device *dev = &dev_priv->drm;
 	struct drm_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 	struct intel_dp *intel_dp;
@@ -3601,10 +3739,8 @@
 static int i915_displayport_test_active_open(struct inode *inode,
 					     struct file *file)
 {
-	struct drm_i915_private *dev_priv = inode->i_private;
-
 	return single_open(file, i915_displayport_test_active_show,
-			   &dev_priv->drm);
+			   inode->i_private);
 }
 
 static const struct file_operations i915_displayport_test_active_fops = {
@@ -3618,7 +3754,8 @@
 
 static int i915_displayport_test_data_show(struct seq_file *m, void *data)
 {
-	struct drm_device *dev = m->private;
+	struct drm_i915_private *dev_priv = m->private;
+	struct drm_device *dev = &dev_priv->drm;
 	struct drm_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 	struct intel_dp *intel_dp;
@@ -3657,26 +3794,12 @@
 
 	return 0;
 }
-static int i915_displayport_test_data_open(struct inode *inode,
-					   struct file *file)
-{
-	struct drm_i915_private *dev_priv = inode->i_private;
-
-	return single_open(file, i915_displayport_test_data_show,
-			   &dev_priv->drm);
-}
-
-static const struct file_operations i915_displayport_test_data_fops = {
-	.owner = THIS_MODULE,
-	.open = i915_displayport_test_data_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release
-};
+DEFINE_SHOW_ATTRIBUTE(i915_displayport_test_data);
 
 static int i915_displayport_test_type_show(struct seq_file *m, void *data)
 {
-	struct drm_device *dev = m->private;
+	struct drm_i915_private *dev_priv = m->private;
+	struct drm_device *dev = &dev_priv->drm;
 	struct drm_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 	struct intel_dp *intel_dp;
@@ -3703,23 +3826,7 @@
 
 	return 0;
 }
-
-static int i915_displayport_test_type_open(struct inode *inode,
-				       struct file *file)
-{
-	struct drm_i915_private *dev_priv = inode->i_private;
-
-	return single_open(file, i915_displayport_test_type_show,
-			   &dev_priv->drm);
-}
-
-static const struct file_operations i915_displayport_test_type_fops = {
-	.owner = THIS_MODULE,
-	.open = i915_displayport_test_type_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release
-};
+DEFINE_SHOW_ATTRIBUTE(i915_displayport_test_type);
 
 static void wm_latency_show(struct seq_file *m, const uint16_t wm[8])
 {
@@ -3987,8 +4094,8 @@
 		engine->hangcheck.stalled = true;
 	}
 
-	i915_handle_error(i915, val, "Manually set wedged engine mask = %llx",
-			  val);
+	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
+			  "Manually set wedged engine mask = %llx", val);
 
 	wait_on_bit(&i915->gpu_error.flags,
 		    I915_RESET_HANDOFF,
@@ -4152,119 +4259,6 @@
 			"0x%08llx\n");
 
 static int
-i915_max_freq_get(void *data, u64 *val)
-{
-	struct drm_i915_private *dev_priv = data;
-
-	if (INTEL_GEN(dev_priv) < 6)
-		return -ENODEV;
-
-	*val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit);
-	return 0;
-}
-
-static int
-i915_max_freq_set(void *data, u64 val)
-{
-	struct drm_i915_private *dev_priv = data;
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 hw_max, hw_min;
-	int ret;
-
-	if (INTEL_GEN(dev_priv) < 6)
-		return -ENODEV;
-
-	DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val);
-
-	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
-	if (ret)
-		return ret;
-
-	/*
-	 * Turbo will still be enabled, but won't go above the set value.
-	 */
-	val = intel_freq_opcode(dev_priv, val);
-
-	hw_max = rps->max_freq;
-	hw_min = rps->min_freq;
-
-	if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) {
-		mutex_unlock(&dev_priv->pcu_lock);
-		return -EINVAL;
-	}
-
-	rps->max_freq_softlimit = val;
-
-	if (intel_set_rps(dev_priv, val))
-		DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n");
-
-	mutex_unlock(&dev_priv->pcu_lock);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops,
-			i915_max_freq_get, i915_max_freq_set,
-			"%llu\n");
-
-static int
-i915_min_freq_get(void *data, u64 *val)
-{
-	struct drm_i915_private *dev_priv = data;
-
-	if (INTEL_GEN(dev_priv) < 6)
-		return -ENODEV;
-
-	*val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit);
-	return 0;
-}
-
-static int
-i915_min_freq_set(void *data, u64 val)
-{
-	struct drm_i915_private *dev_priv = data;
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 hw_max, hw_min;
-	int ret;
-
-	if (INTEL_GEN(dev_priv) < 6)
-		return -ENODEV;
-
-	DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val);
-
-	ret = mutex_lock_interruptible(&dev_priv->pcu_lock);
-	if (ret)
-		return ret;
-
-	/*
-	 * Turbo will still be enabled, but won't go below the set value.
-	 */
-	val = intel_freq_opcode(dev_priv, val);
-
-	hw_max = rps->max_freq;
-	hw_min = rps->min_freq;
-
-	if (val < hw_min ||
-	    val > hw_max || val > rps->max_freq_softlimit) {
-		mutex_unlock(&dev_priv->pcu_lock);
-		return -EINVAL;
-	}
-
-	rps->min_freq_softlimit = val;
-
-	if (intel_set_rps(dev_priv, val))
-		DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n");
-
-	mutex_unlock(&dev_priv->pcu_lock);
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops,
-			i915_min_freq_get, i915_min_freq_set,
-			"%llu\n");
-
-static int
 i915_cache_sharing_get(void *data, u64 *val)
 {
 	struct drm_i915_private *dev_priv = data;
@@ -4316,9 +4310,10 @@
 static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
 					  struct sseu_dev_info *sseu)
 {
-	int ss_max = 2;
+#define SS_MAX 2
+	const int ss_max = SS_MAX;
+	u32 sig1[SS_MAX], sig2[SS_MAX];
 	int ss;
-	u32 sig1[ss_max], sig2[ss_max];
 
 	sig1[0] = I915_READ(CHV_POWER_SS0_SIG1);
 	sig1[1] = I915_READ(CHV_POWER_SS1_SIG1);
@@ -4342,15 +4337,16 @@
 		sseu->eu_per_subslice = max_t(unsigned int,
 					      sseu->eu_per_subslice, eu_cnt);
 	}
+#undef SS_MAX
 }
 
 static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 				     struct sseu_dev_info *sseu)
 {
+#define SS_MAX 6
 	const struct intel_device_info *info = INTEL_INFO(dev_priv);
+	u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
 	int s, ss;
-	u32 s_reg[info->sseu.max_slices];
-	u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2];
 
 	for (s = 0; s < info->sseu.max_slices; s++) {
 		/*
@@ -4397,15 +4393,16 @@
 						      eu_cnt);
 		}
 	}
+#undef SS_MAX
 }
 
 static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 				    struct sseu_dev_info *sseu)
 {
+#define SS_MAX 3
 	const struct intel_device_info *info = INTEL_INFO(dev_priv);
+	u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2];
 	int s, ss;
-	u32 s_reg[info->sseu.max_slices];
-	u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2];
 
 	for (s = 0; s < info->sseu.max_slices; s++) {
 		s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s));
@@ -4452,6 +4449,7 @@
 						      eu_cnt);
 		}
 	}
+#undef SS_MAX
 }
 
 static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
@@ -4703,6 +4701,67 @@
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_drrs_ctl_fops, NULL, i915_drrs_ctl_set, "%llu\n");
 
+static ssize_t
+i915_fifo_underrun_reset_write(struct file *filp,
+			       const char __user *ubuf,
+			       size_t cnt, loff_t *ppos)
+{
+	struct drm_i915_private *dev_priv = filp->private_data;
+	struct intel_crtc *intel_crtc;
+	struct drm_device *dev = &dev_priv->drm;
+	int ret;
+	bool reset;
+
+	ret = kstrtobool_from_user(ubuf, cnt, &reset);
+	if (ret)
+		return ret;
+
+	if (!reset)
+		return cnt;
+
+	for_each_intel_crtc(dev, intel_crtc) {
+		struct drm_crtc_commit *commit;
+		struct intel_crtc_state *crtc_state;
+
+		ret = drm_modeset_lock_single_interruptible(&intel_crtc->base.mutex);
+		if (ret)
+			return ret;
+
+		crtc_state = to_intel_crtc_state(intel_crtc->base.state);
+		commit = crtc_state->base.commit;
+		if (commit) {
+			ret = wait_for_completion_interruptible(&commit->hw_done);
+			if (!ret)
+				ret = wait_for_completion_interruptible(&commit->flip_done);
+		}
+
+		if (!ret && crtc_state->base.active) {
+			DRM_DEBUG_KMS("Re-arming FIFO underruns on pipe %c\n",
+				      pipe_name(intel_crtc->pipe));
+
+			intel_crtc_arm_fifo_underrun(intel_crtc, crtc_state);
+		}
+
+		drm_modeset_unlock(&intel_crtc->base.mutex);
+
+		if (ret)
+			return ret;
+	}
+
+	ret = intel_fbc_reset_underrun(dev_priv);
+	if (ret)
+		return ret;
+
+	return cnt;
+}
+
+static const struct file_operations i915_fifo_underrun_reset_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.write = i915_fifo_underrun_reset_write,
+	.llseek = default_llseek,
+};
+
 static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_capabilities", i915_capabilities, 0},
 	{"i915_gem_objects", i915_gem_object_info, 0},
@@ -4760,8 +4819,6 @@
 	const struct file_operations *fops;
 } i915_debugfs_files[] = {
 	{"i915_wedged", &i915_wedged_fops},
-	{"i915_max_freq", &i915_max_freq_fops},
-	{"i915_min_freq", &i915_min_freq_fops},
 	{"i915_cache_sharing", &i915_cache_sharing_fops},
 	{"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
 	{"i915_ring_test_irq", &i915_ring_test_irq_fops},
@@ -4770,6 +4827,7 @@
 	{"i915_error_state", &i915_error_state_fops},
 	{"i915_gpu_info", &i915_gpu_info_fops},
 #endif
+	{"i915_fifo_underrun_reset", &i915_fifo_underrun_reset_ops},
 	{"i915_next_seqno", &i915_next_seqno_fops},
 	{"i915_display_crc_ctl", &i915_display_crc_ctl_fops},
 	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
@@ -4779,10 +4837,12 @@
 	{"i915_dp_test_data", &i915_displayport_test_data_fops},
 	{"i915_dp_test_type", &i915_displayport_test_type_fops},
 	{"i915_dp_test_active", &i915_displayport_test_active_fops},
-	{"i915_guc_log_control", &i915_guc_log_control_fops},
+	{"i915_guc_log_level", &i915_guc_log_level_fops},
+	{"i915_guc_log_relay", &i915_guc_log_relay_fops},
 	{"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops},
 	{"i915_ipc_status", &i915_ipc_status_fops},
-	{"i915_drrs_ctl", &i915_drrs_ctl_fops}
+	{"i915_drrs_ctl", &i915_drrs_ctl_fops},
+	{"i915_edp_psr_debug", &i915_edp_psr_debug_fops}
 };
 
 int i915_debugfs_register(struct drm_i915_private *dev_priv)
@@ -4876,19 +4936,7 @@
 
 	return 0;
 }
-
-static int i915_dpcd_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, i915_dpcd_show, inode->i_private);
-}
-
-static const struct file_operations i915_dpcd_fops = {
-	.owner = THIS_MODULE,
-	.open = i915_dpcd_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(i915_dpcd);
 
 static int i915_panel_show(struct seq_file *m, void *data)
 {
@@ -4910,19 +4958,7 @@
 
 	return 0;
 }
-
-static int i915_panel_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, i915_panel_show, inode->i_private);
-}
-
-static const struct file_operations i915_panel_fops = {
-	.owner = THIS_MODULE,
-	.open = i915_panel_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(i915_panel);
 
 /**
  * i915_debugfs_connector_add - add i915 specific connector debugfs files
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 3b4daaf..9c449b8 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -101,7 +101,13 @@
 		   __builtin_return_address(0), &vaf);
 
 	if (is_error && !shown_bug_once) {
-		dev_notice(kdev, "%s", FDO_BUG_MSG);
+		/*
+		 * Ask the user to file a bug report for the error, except
+		 * if they may have caused the bug by fiddling with unsafe
+		 * module parameters.
+		 */
+		if (!test_taint(TAINT_USER))
+			dev_notice(kdev, "%s", FDO_BUG_MSG);
 		shown_bug_once = true;
 	}
 
@@ -377,9 +383,9 @@
 		value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool;
 		break;
 	case I915_PARAM_HUC_STATUS:
-		intel_runtime_pm_get(dev_priv);
-		value = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
-		intel_runtime_pm_put(dev_priv);
+		value = intel_huc_check_status(&dev_priv->huc);
+		if (value < 0)
+			return value;
 		break;
 	case I915_PARAM_MMAP_GTT_VERSION:
 		/* Though we've started our numbering from 1, and so class all
@@ -695,11 +701,9 @@
 	if (ret)
 		goto cleanup_irq;
 
-	intel_uc_init_fw(dev_priv);
-
 	ret = i915_gem_init(dev_priv);
 	if (ret)
-		goto cleanup_uc;
+		goto cleanup_irq;
 
 	intel_setup_overlay(dev_priv);
 
@@ -719,8 +723,6 @@
 	if (i915_gem_suspend(dev_priv))
 		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
 	i915_gem_fini(dev_priv);
-cleanup_uc:
-	intel_uc_fini_fw(dev_priv);
 cleanup_irq:
 	drm_irq_uninstall(dev);
 	intel_teardown_gmbus(dev_priv);
@@ -922,16 +924,21 @@
 	mutex_init(&dev_priv->wm.wm_mutex);
 	mutex_init(&dev_priv->pps_mutex);
 
-	intel_uc_init_early(dev_priv);
 	i915_memcpy_init_early(dev_priv);
 
 	ret = i915_workqueues_init(dev_priv);
 	if (ret < 0)
 		goto err_engines;
 
+	ret = i915_gem_init_early(dev_priv);
+	if (ret < 0)
+		goto err_workqueues;
+
 	/* This must be called before any calls to HAS_PCH_* */
 	intel_detect_pch(dev_priv);
 
+	intel_wopcm_init_early(&dev_priv->wopcm);
+	intel_uc_init_early(dev_priv);
 	intel_pm_setup(dev_priv);
 	intel_init_dpio(dev_priv);
 	intel_power_domains_init(dev_priv);
@@ -940,18 +947,13 @@
 	intel_init_display_hooks(dev_priv);
 	intel_init_clock_gating_hooks(dev_priv);
 	intel_init_audio_hooks(dev_priv);
-	ret = i915_gem_load_init(dev_priv);
-	if (ret < 0)
-		goto err_irq;
-
 	intel_display_crc_init(dev_priv);
 
 	intel_detect_preproduction_hw(dev_priv);
 
 	return 0;
 
-err_irq:
-	intel_irq_fini(dev_priv);
+err_workqueues:
 	i915_workqueues_cleanup(dev_priv);
 err_engines:
 	i915_engines_cleanup(dev_priv);
@@ -964,8 +966,9 @@
  */
 static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 {
-	i915_gem_load_cleanup(dev_priv);
 	intel_irq_fini(dev_priv);
+	intel_uc_cleanup_early(dev_priv);
+	i915_gem_cleanup_early(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
 	i915_engines_cleanup(dev_priv);
 }
@@ -1035,6 +1038,10 @@
 
 	intel_uncore_init(dev_priv);
 
+	intel_device_info_init_mmio(dev_priv);
+
+	intel_uncore_prune(dev_priv);
+
 	intel_uc_init_mmio(dev_priv);
 
 	ret = intel_engines_init_mmio(dev_priv);
@@ -1077,8 +1084,6 @@
 					    i915_modparams.enable_ppgtt);
 	DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915_modparams.enable_ppgtt);
 
-	intel_uc_sanitize_options(dev_priv);
-
 	intel_gvt_sanitize_options(dev_priv);
 }
 
@@ -1244,7 +1249,6 @@
 	/* Reveal our presence to userspace */
 	if (drm_dev_register(dev, 0) == 0) {
 		i915_debugfs_register(dev_priv);
-		i915_guc_log_register(dev_priv);
 		i915_setup_sysfs(dev_priv);
 
 		/* Depends on sysfs having been initialized */
@@ -1304,7 +1308,6 @@
 	i915_pmu_unregister(dev_priv);
 
 	i915_teardown_sysfs(dev_priv);
-	i915_guc_log_unregister(dev_priv);
 	drm_dev_unregister(&dev_priv->drm);
 
 	i915_gem_shrinker_unregister(dev_priv);
@@ -1463,7 +1466,6 @@
 	i915_reset_error_state(dev_priv);
 
 	i915_gem_fini(dev_priv);
-	intel_uc_fini_fw(dev_priv);
 	intel_fbc_cleanup_cfb(dev_priv);
 
 	intel_power_domains_fini(dev_priv);
@@ -1876,7 +1878,8 @@
 /**
  * i915_reset - reset chip after a hang
  * @i915: #drm_i915_private to reset
- * @flags: Instructions
+ * @stalled_mask: mask of the stalled engines with the guilty requests
+ * @reason: user error message for why we are resetting
  *
  * Reset the chip.  Useful if a hang is detected. Marks the device as wedged
  * on failure.
@@ -1891,12 +1894,16 @@
  *   - re-init interrupt state
  *   - re-init display
  */
-void i915_reset(struct drm_i915_private *i915, unsigned int flags)
+void i915_reset(struct drm_i915_private *i915,
+		unsigned int stalled_mask,
+		const char *reason)
 {
 	struct i915_gpu_error *error = &i915->gpu_error;
 	int ret;
 	int i;
 
+	GEM_TRACE("flags=%lx\n", error->flags);
+
 	might_sleep();
 	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
@@ -1908,8 +1915,8 @@
 	if (!i915_gem_unset_wedged(i915))
 		goto wakeup;
 
-	if (!(flags & I915_RESET_QUIET))
-		dev_notice(i915->drm.dev, "Resetting chip after gpu hang\n");
+	if (reason)
+		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
 	error->reset_count++;
 
 	disable_irq(i915->drm.irq);
@@ -1952,7 +1959,7 @@
 		goto error;
 	}
 
-	i915_gem_reset(i915);
+	i915_gem_reset(i915, stalled_mask);
 	intel_overlay_reset(i915);
 
 	/*
@@ -1998,7 +2005,6 @@
 error:
 	i915_gem_set_wedged(i915);
 	i915_retire_requests(i915);
-	intel_gpu_reset(i915, ALL_ENGINES);
 	goto finish;
 }
 
@@ -2011,7 +2017,7 @@
 /**
  * i915_reset_engine - reset GPU engine to recover from a hang
  * @engine: engine to reset
- * @flags: options
+ * @msg: reason for GPU reset; or NULL for no dev_notice()
  *
  * Reset a specific GPU engine. Useful if a hang is detected.
  * Returns zero on successful reset or otherwise an error code.
@@ -2021,12 +2027,13 @@
  *  - reset engine (which will force the engine to idle)
  *  - re-init/configure engine
  */
-int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags)
+int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 {
 	struct i915_gpu_error *error = &engine->i915->gpu_error;
 	struct i915_request *active_request;
 	int ret;
 
+	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 
 	active_request = i915_gem_reset_prepare_engine(engine);
@@ -2036,10 +2043,9 @@
 		goto out;
 	}
 
-	if (!(flags & I915_RESET_QUIET)) {
+	if (msg)
 		dev_notice(engine->i915->drm.dev,
-			   "Resetting %s after gpu hang\n", engine->name);
-	}
+			   "Resetting %s for %s\n", engine->name, msg);
 	error->reset_engine_count[engine->id]++;
 
 	if (!engine->i915->guc.execbuf_client)
@@ -2059,7 +2065,7 @@
 	 * active request and can drop it, adjust head to skip the offending
 	 * request to resume executing remaining requests in the queue.
 	 */
-	i915_gem_reset_engine(engine, active_request);
+	i915_gem_reset_engine(engine, active_request, true);
 
 	/*
 	 * The engine and its registers (and workarounds in case of render)
@@ -2468,10 +2474,13 @@
 	/*
 	 * RC6 transitioning can be delayed up to 2 msec (see
 	 * valleyview_enable_rps), use 3 msec for safety.
+	 *
+	 * This can fail to turn off the rc6 if the GPU is stuck after a failed
+	 * reset and we are trying to force the machine to sleep.
 	 */
 	if (vlv_wait_for_pw_status(dev_priv, mask, val))
-		DRM_ERROR("timeout waiting for GT wells to go %s\n",
-			  onoff(wait_for_on));
+		DRM_DEBUG_DRIVER("timeout waiting for GT wells to go %s\n",
+				 onoff(wait_for_on));
 }
 
 static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv)
@@ -2822,10 +2831,10 @@
 	DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
-	DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER),
+	DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER),
+	DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER),
+	DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER),
 	DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ce18b6c..34c125e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -64,6 +64,7 @@
 #include "intel_opregion.h"
 #include "intel_ringbuffer.h"
 #include "intel_uncore.h"
+#include "intel_wopcm.h"
 #include "intel_uc.h"
 
 #include "i915_gem.h"
@@ -71,9 +72,10 @@
 #include "i915_gem_fence_reg.h"
 #include "i915_gem_object.h"
 #include "i915_gem_gtt.h"
-#include "i915_gem_timeline.h"
-
+#include "i915_gpu_error.h"
 #include "i915_request.h"
+#include "i915_scheduler.h"
+#include "i915_timeline.h"
 #include "i915_vma.h"
 
 #include "intel_gvt.h"
@@ -83,8 +85,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20180308"
-#define DRIVER_TIMESTAMP	1520513379
+#define DRIVER_DATE		"20180514"
+#define DRIVER_TIMESTAMP	1526300884
 
 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
  * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -261,6 +263,7 @@
 	HPD_PORT_C,
 	HPD_PORT_D,
 	HPD_PORT_E,
+	HPD_PORT_F,
 	HPD_NUM_PINS
 };
 
@@ -453,172 +456,6 @@
 	uint32_t allowed_dc_mask;
 };
 
-struct intel_display_error_state;
-
-struct i915_gpu_state {
-	struct kref ref;
-	ktime_t time;
-	ktime_t boottime;
-	ktime_t uptime;
-
-	struct drm_i915_private *i915;
-
-	char error_msg[128];
-	bool simulated;
-	bool awake;
-	bool wakelock;
-	bool suspended;
-	int iommu;
-	u32 reset_count;
-	u32 suspend_count;
-	struct intel_device_info device_info;
-	struct intel_driver_caps driver_caps;
-	struct i915_params params;
-
-	struct i915_error_uc {
-		struct intel_uc_fw guc_fw;
-		struct intel_uc_fw huc_fw;
-		struct drm_i915_error_object *guc_log;
-	} uc;
-
-	/* Generic register state */
-	u32 eir;
-	u32 pgtbl_er;
-	u32 ier;
-	u32 gtier[4], ngtier;
-	u32 ccid;
-	u32 derrmr;
-	u32 forcewake;
-	u32 error; /* gen6+ */
-	u32 err_int; /* gen7 */
-	u32 fault_data0; /* gen8, gen9 */
-	u32 fault_data1; /* gen8, gen9 */
-	u32 done_reg;
-	u32 gac_eco;
-	u32 gam_ecochk;
-	u32 gab_ctl;
-	u32 gfx_mode;
-
-	u32 nfence;
-	u64 fence[I915_MAX_NUM_FENCES];
-	struct intel_overlay_error_state *overlay;
-	struct intel_display_error_state *display;
-
-	struct drm_i915_error_engine {
-		int engine_id;
-		/* Software tracked state */
-		bool idle;
-		bool waiting;
-		int num_waiters;
-		unsigned long hangcheck_timestamp;
-		bool hangcheck_stalled;
-		enum intel_engine_hangcheck_action hangcheck_action;
-		struct i915_address_space *vm;
-		int num_requests;
-		u32 reset_count;
-
-		/* position of active request inside the ring */
-		u32 rq_head, rq_post, rq_tail;
-
-		/* our own tracking of ring head and tail */
-		u32 cpu_ring_head;
-		u32 cpu_ring_tail;
-
-		u32 last_seqno;
-
-		/* Register state */
-		u32 start;
-		u32 tail;
-		u32 head;
-		u32 ctl;
-		u32 mode;
-		u32 hws;
-		u32 ipeir;
-		u32 ipehr;
-		u32 bbstate;
-		u32 instpm;
-		u32 instps;
-		u32 seqno;
-		u64 bbaddr;
-		u64 acthd;
-		u32 fault_reg;
-		u64 faddr;
-		u32 rc_psmi; /* sleep state */
-		u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
-		struct intel_instdone instdone;
-
-		struct drm_i915_error_context {
-			char comm[TASK_COMM_LEN];
-			pid_t pid;
-			u32 handle;
-			u32 hw_id;
-			int priority;
-			int ban_score;
-			int active;
-			int guilty;
-			bool bannable;
-		} context;
-
-		struct drm_i915_error_object {
-			u64 gtt_offset;
-			u64 gtt_size;
-			int page_count;
-			int unused;
-			u32 *pages[0];
-		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
-
-		struct drm_i915_error_object **user_bo;
-		long user_bo_count;
-
-		struct drm_i915_error_object *wa_ctx;
-		struct drm_i915_error_object *default_state;
-
-		struct drm_i915_error_request {
-			long jiffies;
-			pid_t pid;
-			u32 context;
-			int priority;
-			int ban_score;
-			u32 seqno;
-			u32 head;
-			u32 tail;
-		} *requests, execlist[EXECLIST_MAX_PORTS];
-		unsigned int num_ports;
-
-		struct drm_i915_error_waiter {
-			char comm[TASK_COMM_LEN];
-			pid_t pid;
-			u32 seqno;
-		} *waiters;
-
-		struct {
-			u32 gfx_mode;
-			union {
-				u64 pdp[4];
-				u32 pp_dir_base;
-			};
-		} vm_info;
-	} engine[I915_NUM_ENGINES];
-
-	struct drm_i915_error_buffer {
-		u32 size;
-		u32 name;
-		u32 rseqno[I915_NUM_ENGINES], wseqno;
-		u64 gtt_offset;
-		u32 read_domains;
-		u32 write_domain;
-		s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
-		u32 tiling:2;
-		u32 dirty:1;
-		u32 purgeable:1;
-		u32 userptr:1;
-		s32 engine:4;
-		u32 cache_level:3;
-	} *active_bo[I915_NUM_ENGINES], *pinned_bo;
-	u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
-	struct i915_address_space *active_vm[I915_NUM_ENGINES];
-};
-
 enum i915_cache_level {
 	I915_CACHE_NONE = 0,
 	I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */
@@ -766,12 +603,16 @@
 	bool active;
 	struct delayed_work work;
 	unsigned busy_frontbuffer_bits;
-	bool psr2_support;
-	bool aux_frame_sync;
+	bool sink_psr2_support;
 	bool link_standby;
-	bool y_cord_support;
 	bool colorimetry_support;
 	bool alpm;
+	bool has_hw_tracking;
+	bool psr2_enabled;
+	u8 sink_sync_latency;
+	bool debug;
+	ktime_t last_entry_attempt;
+	ktime_t last_exit;
 
 	void (*enable_source)(struct intel_dp *,
 			      const struct intel_crtc_state *);
@@ -1146,16 +987,6 @@
 	u32 object_count;
 };
 
-struct drm_i915_error_state_buf {
-	struct drm_i915_private *i915;
-	unsigned bytes;
-	unsigned size;
-	int err;
-	u8 *buf;
-	loff_t start;
-	loff_t pos;
-};
-
 #define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */
 
 #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */
@@ -1164,102 +995,6 @@
 #define I915_ENGINE_DEAD_TIMEOUT  (4 * HZ)  /* Seqno, head and subunits dead */
 #define I915_SEQNO_DEAD_TIMEOUT   (12 * HZ) /* Seqno dead with active head */
 
-struct i915_gpu_error {
-	/* For hangcheck timer */
-#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
-#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
-
-	struct delayed_work hangcheck_work;
-
-	/* For reset and error_state handling. */
-	spinlock_t lock;
-	/* Protected by the above dev->gpu_error.lock. */
-	struct i915_gpu_state *first_error;
-
-	atomic_t pending_fb_pin;
-
-	unsigned long missed_irq_rings;
-
-	/**
-	 * State variable controlling the reset flow and count
-	 *
-	 * This is a counter which gets incremented when reset is triggered,
-	 *
-	 * Before the reset commences, the I915_RESET_BACKOFF bit is set
-	 * meaning that any waiters holding onto the struct_mutex should
-	 * relinquish the lock immediately in order for the reset to start.
-	 *
-	 * If reset is not completed succesfully, the I915_WEDGE bit is
-	 * set meaning that hardware is terminally sour and there is no
-	 * recovery. All waiters on the reset_queue will be woken when
-	 * that happens.
-	 *
-	 * This counter is used by the wait_seqno code to notice that reset
-	 * event happened and it needs to restart the entire ioctl (since most
-	 * likely the seqno it waited for won't ever signal anytime soon).
-	 *
-	 * This is important for lock-free wait paths, where no contended lock
-	 * naturally enforces the correct ordering between the bail-out of the
-	 * waiter and the gpu reset work code.
-	 */
-	unsigned long reset_count;
-
-	/**
-	 * flags: Control various stages of the GPU reset
-	 *
-	 * #I915_RESET_BACKOFF - When we start a reset, we want to stop any
-	 * other users acquiring the struct_mutex. To do this we set the
-	 * #I915_RESET_BACKOFF bit in the error flags when we detect a reset
-	 * and then check for that bit before acquiring the struct_mutex (in
-	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
-	 * secondary role in preventing two concurrent global reset attempts.
-	 *
-	 * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the
-	 * struct_mutex. We try to acquire the struct_mutex in the reset worker,
-	 * but it may be held by some long running waiter (that we cannot
-	 * interrupt without causing trouble). Once we are ready to do the GPU
-	 * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If
-	 * they already hold the struct_mutex and want to participate they can
-	 * inspect the bit and do the reset directly, otherwise the worker
-	 * waits for the struct_mutex.
-	 *
-	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
-	 * acquire the struct_mutex to reset an engine, we need an explicit
-	 * flag to prevent two concurrent reset attempts in the same engine.
-	 * As the number of engines continues to grow, allocate the flags from
-	 * the most significant bits.
-	 *
-	 * #I915_WEDGED - If reset fails and we can no longer use the GPU,
-	 * we set the #I915_WEDGED bit. Prior to command submission, e.g.
-	 * i915_request_alloc(), this bit is checked and the sequence
-	 * aborted (with -EIO reported to userspace) if set.
-	 */
-	unsigned long flags;
-#define I915_RESET_BACKOFF	0
-#define I915_RESET_HANDOFF	1
-#define I915_RESET_MODESET	2
-#define I915_WEDGED		(BITS_PER_LONG - 1)
-#define I915_RESET_ENGINE	(I915_WEDGED - I915_NUM_ENGINES)
-
-	/** Number of times an engine has been reset */
-	u32 reset_engine_count[I915_NUM_ENGINES];
-
-	/**
-	 * Waitqueue to signal when a hang is detected. Used to for waiters
-	 * to release the struct_mutex for the reset to procede.
-	 */
-	wait_queue_head_t wait_queue;
-
-	/**
-	 * Waitqueue to signal when the reset has completed. Used by clients
-	 * that wait for dev_priv->mm.wedged to settle.
-	 */
-	wait_queue_head_t reset_queue;
-
-	/* For missed irq/seqno simulation. */
-	unsigned long test_irq_rings;
-};
-
 enum modeset_restore {
 	MODESET_ON_LID_OPEN,
 	MODESET_DONE,
@@ -1338,6 +1073,7 @@
 	} edp;
 
 	struct {
+		bool enable;
 		bool full_link;
 		bool require_aux_wakeup;
 		int idle_frames;
@@ -1451,11 +1187,13 @@
 }
 
 struct skl_ddb_allocation {
-	struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; /* packed/uv */
-	struct skl_ddb_entry y_plane[I915_MAX_PIPES][I915_MAX_PLANES];
+	/* packed/y */
+	struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES];
+	struct skl_ddb_entry uv_plane[I915_MAX_PIPES][I915_MAX_PLANES];
+	u8 enabled_slices; /* GEN11 has configurable 2 slices */
 };
 
-struct skl_wm_values {
+struct skl_ddb_values {
 	unsigned dirty_pipes;
 	struct skl_ddb_allocation ddb;
 };
@@ -1470,6 +1208,7 @@
 struct skl_wm_params {
 	bool x_tiled, y_tiled;
 	bool rc_surface;
+	bool is_planar;
 	uint32_t width;
 	uint8_t cpp;
 	uint32_t plane_pixel_rate;
@@ -1564,7 +1303,6 @@
 struct i915_workarounds {
 	struct i915_wa_reg reg[I915_MAX_WA_REGS];
 	u32 count;
-	u32 hw_whitelist_count[I915_NUM_ENGINES];
 };
 
 struct i915_virtual_gpu {
@@ -1860,6 +1598,8 @@
 
 	struct intel_gvt *gvt;
 
+	struct intel_wopcm wopcm;
+
 	struct intel_huc huc;
 	struct intel_guc guc;
 
@@ -2152,7 +1892,7 @@
 		/* current hardware state */
 		union {
 			struct ilk_wm_values hw;
-			struct skl_wm_values skl_hw;
+			struct skl_ddb_values skl_hw;
 			struct vlv_wm_values vlv;
 			struct g4x_wm_values g4x;
 		};
@@ -2321,8 +2061,11 @@
 		void (*cleanup_engine)(struct intel_engine_cs *engine);
 
 		struct list_head timelines;
-		struct i915_gem_timeline global_timeline;
+
+		struct list_head active_rings;
+		struct list_head closed_vma;
 		u32 active_requests;
+		u32 request_serial;
 
 		/**
 		 * Is the GPU currently considered idle, or busy executing
@@ -2392,6 +2135,11 @@
 	return to_i915(dev_get_drvdata(kdev));
 }
 
+static inline struct drm_i915_private *wopcm_to_i915(struct intel_wopcm *wopcm)
+{
+	return container_of(wopcm, struct drm_i915_private, wopcm);
+}
+
 static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
 {
 	return container_of(guc, struct drm_i915_private, guc);
@@ -2411,8 +2159,10 @@
 
 /* Iterator over subset of engines selected by mask */
 #define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \
-	for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask;	\
-	     tmp__ ? (engine__ = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : 0; )
+	for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->ring_mask; \
+	     (tmp__) ? \
+	     ((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \
+	     0;)
 
 enum hdmi_force_audio {
 	HDMI_AUDIO_OFF_DVI = -2,	/* no aux data for HDMI-DVI converter */
@@ -2720,6 +2470,15 @@
 #define IS_CNL_REVID(p, since, until) \
 	(IS_CANNONLAKE(p) && IS_REVID(p, since, until))
 
+#define ICL_REVID_A0		0x0
+#define ICL_REVID_A2		0x1
+#define ICL_REVID_B0		0x3
+#define ICL_REVID_B2		0x4
+#define ICL_REVID_C0		0x5
+
+#define IS_ICL_REVID(p, since, until) \
+	(IS_ICELAKE(p) && IS_REVID(p, since, until))
+
 /*
  * The genX designation typically refers to the render engine, so render
  * capability related checks should use IS_GEN, while display and other checks
@@ -2963,10 +2722,11 @@
 extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
 extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
 
-#define I915_RESET_QUIET BIT(0)
-extern void i915_reset(struct drm_i915_private *i915, unsigned int flags);
+extern void i915_reset(struct drm_i915_private *i915,
+		       unsigned int stalled_mask,
+		       const char *reason);
 extern int i915_reset_engine(struct intel_engine_cs *engine,
-			     unsigned int flags);
+			     const char *reason);
 
 extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
 extern int intel_reset_guc(struct drm_i915_private *dev_priv);
@@ -3014,10 +2774,12 @@
 			   &dev_priv->gpu_error.hangcheck_work, delay);
 }
 
-__printf(3, 4)
+__printf(4, 5)
 void i915_handle_error(struct drm_i915_private *dev_priv,
 		       u32 engine_mask,
+		       unsigned long flags,
 		       const char *fmt, ...);
+#define I915_ERROR_CAPTURE BIT(0)
 
 extern void intel_irq_init(struct drm_i915_private *dev_priv);
 extern void intel_irq_fini(struct drm_i915_private *dev_priv);
@@ -3132,8 +2894,8 @@
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 void i915_gem_sanitize(struct drm_i915_private *i915);
-int i915_gem_load_init(struct drm_i915_private *dev_priv);
-void i915_gem_load_cleanup(struct drm_i915_private *dev_priv);
+int i915_gem_init_early(struct drm_i915_private *dev_priv);
+void i915_gem_cleanup_early(struct drm_i915_private *dev_priv);
 void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
 int i915_gem_freeze(struct drm_i915_private *dev_priv);
 int i915_gem_freeze_late(struct drm_i915_private *dev_priv);
@@ -3388,13 +3150,15 @@
 struct i915_request *
 i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
-void i915_gem_reset(struct drm_i915_private *dev_priv);
+void i915_gem_reset(struct drm_i915_private *dev_priv,
+		    unsigned int stalled_mask);
 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
 void i915_gem_reset_engine(struct intel_engine_cs *engine,
-			   struct i915_request *request);
+			   struct i915_request *request,
+			   bool stalled);
 
 void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
@@ -3412,7 +3176,7 @@
 			 struct intel_rps_client *rps);
 int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 				  unsigned int flags,
-				  int priority);
+				  const struct i915_sched_attr *attr);
 #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX
 
 int __must_check
@@ -3481,16 +3245,6 @@
 	return ctx;
 }
 
-static inline struct intel_timeline *
-i915_gem_context_lookup_timeline(struct i915_gem_context *ctx,
-				 struct intel_engine_cs *engine)
-{
-	struct i915_address_space *vm;
-
-	vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base;
-	return &vm->timeline.engine[engine->id];
-}
-
 int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file);
 int i915_perf_add_config_ioctl(struct drm_device *dev, void *data,
@@ -3589,64 +3343,6 @@
 static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {}
 #endif
 
-/* i915_gpu_error.c */
-#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
-
-__printf(2, 3)
-void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
-int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
-			    const struct i915_gpu_state *gpu);
-int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
-			      struct drm_i915_private *i915,
-			      size_t count, loff_t pos);
-static inline void i915_error_state_buf_release(
-	struct drm_i915_error_state_buf *eb)
-{
-	kfree(eb->buf);
-}
-
-struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
-void i915_capture_error_state(struct drm_i915_private *dev_priv,
-			      u32 engine_mask,
-			      const char *error_msg);
-
-static inline struct i915_gpu_state *
-i915_gpu_state_get(struct i915_gpu_state *gpu)
-{
-	kref_get(&gpu->ref);
-	return gpu;
-}
-
-void __i915_gpu_state_free(struct kref *kref);
-static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
-{
-	if (gpu)
-		kref_put(&gpu->ref, __i915_gpu_state_free);
-}
-
-struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915);
-void i915_reset_error_state(struct drm_i915_private *i915);
-
-#else
-
-static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
-					    u32 engine_mask,
-					    const char *error_msg)
-{
-}
-
-static inline struct i915_gpu_state *
-i915_first_error_state(struct drm_i915_private *i915)
-{
-	return NULL;
-}
-
-static inline void i915_reset_error_state(struct drm_i915_private *i915)
-{
-}
-
-#endif
-
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
 /* i915_cmd_parser.c */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7b5a9d7..0a20701 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -35,6 +35,7 @@
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
 #include "intel_mocs.h"
+#include "intel_workarounds.h"
 #include "i915_gemfs.h"
 #include <linux/dma-fence-array.h>
 #include <linux/kthread.h>
@@ -136,6 +137,102 @@
 	return 0;
 }
 
+static u32 __i915_gem_park(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	GEM_BUG_ON(i915->gt.active_requests);
+	GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
+
+	if (!i915->gt.awake)
+		return I915_EPOCH_INVALID;
+
+	GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID);
+
+	/*
+	 * Be paranoid and flush a concurrent interrupt to make sure
+	 * we don't reactivate any irq tasklets after parking.
+	 *
+	 * FIXME: Note that even though we have waited for execlists to be idle,
+	 * there may still be an in-flight interrupt even though the CSB
+	 * is now empty. synchronize_irq() makes sure that a residual interrupt
+	 * is completed before we continue, but it doesn't prevent the HW from
+	 * raising a spurious interrupt later. To complete the shield we should
+	 * coordinate disabling the CS irq with flushing the interrupts.
+	 */
+	synchronize_irq(i915->drm.irq);
+
+	intel_engines_park(i915);
+	i915_timelines_park(i915);
+
+	i915_pmu_gt_parked(i915);
+	i915_vma_parked(i915);
+
+	i915->gt.awake = false;
+
+	if (INTEL_GEN(i915) >= 6)
+		gen6_rps_idle(i915);
+
+	intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ);
+
+	intel_runtime_pm_put(i915);
+
+	return i915->gt.epoch;
+}
+
+void i915_gem_park(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	GEM_BUG_ON(i915->gt.active_requests);
+
+	if (!i915->gt.awake)
+		return;
+
+	/* Defer the actual call to __i915_gem_park() to prevent ping-pongs */
+	mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100));
+}
+
+void i915_gem_unpark(struct drm_i915_private *i915)
+{
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	GEM_BUG_ON(!i915->gt.active_requests);
+
+	if (i915->gt.awake)
+		return;
+
+	intel_runtime_pm_get_noresume(i915);
+
+	/*
+	 * It seems that the DMC likes to transition between the DC states a lot
+	 * when there are no connected displays (no active power domains) during
+	 * command submission.
+	 *
+	 * This activity has negative impact on the performance of the chip with
+	 * huge latencies observed in the interrupt handler and elsewhere.
+	 *
+	 * Work around it by grabbing a GT IRQ power domain whilst there is any
+	 * GT activity, preventing any DC state transitions.
+	 */
+	intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+
+	i915->gt.awake = true;
+	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
+		i915->gt.epoch = 1;
+
+	intel_enable_gt_powersave(i915);
+	i915_update_gfx_val(i915);
+	if (INTEL_GEN(i915) >= 6)
+		gen6_rps_busy(i915);
+	i915_pmu_gt_unparked(i915);
+
+	intel_engines_unpark(i915);
+
+	i915_queue_hangcheck(i915);
+
+	queue_delayed_work(i915->wq,
+			   &i915->gt.retire_work,
+			   round_jiffies_up_relative(HZ));
+}
+
 int
 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file)
@@ -469,7 +566,8 @@
 	return timeout;
 }
 
-static void __fence_set_priority(struct dma_fence *fence, int prio)
+static void __fence_set_priority(struct dma_fence *fence,
+				 const struct i915_sched_attr *attr)
 {
 	struct i915_request *rq;
 	struct intel_engine_cs *engine;
@@ -480,13 +578,16 @@
 	rq = to_request(fence);
 	engine = rq->engine;
 
-	rcu_read_lock();
+	local_bh_disable();
+	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
 	if (engine->schedule)
-		engine->schedule(rq, prio);
+		engine->schedule(rq, attr);
 	rcu_read_unlock();
+	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
 }
 
-static void fence_set_priority(struct dma_fence *fence, int prio)
+static void fence_set_priority(struct dma_fence *fence,
+			       const struct i915_sched_attr *attr)
 {
 	/* Recurse once into a fence-array */
 	if (dma_fence_is_array(fence)) {
@@ -494,16 +595,16 @@
 		int i;
 
 		for (i = 0; i < array->num_fences; i++)
-			__fence_set_priority(array->fences[i], prio);
+			__fence_set_priority(array->fences[i], attr);
 	} else {
-		__fence_set_priority(fence, prio);
+		__fence_set_priority(fence, attr);
 	}
 }
 
 int
 i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 			      unsigned int flags,
-			      int prio)
+			      const struct i915_sched_attr *attr)
 {
 	struct dma_fence *excl;
 
@@ -518,7 +619,7 @@
 			return ret;
 
 		for (i = 0; i < count; i++) {
-			fence_set_priority(shared[i], prio);
+			fence_set_priority(shared[i], attr);
 			dma_fence_put(shared[i]);
 		}
 
@@ -528,7 +629,7 @@
 	}
 
 	if (excl) {
-		fence_set_priority(excl, prio);
+		fence_set_priority(excl, attr);
 		dma_fence_put(excl);
 	}
 	return 0;
@@ -2879,8 +2980,8 @@
 	 * extra delay for a recent interrupt is pointless. Hence, we do
 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
 	 */
-	spin_lock_irqsave(&engine->timeline->lock, flags);
-	list_for_each_entry(request, &engine->timeline->requests, link) {
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+	list_for_each_entry(request, &engine->timeline.requests, link) {
 		if (__i915_request_completed(request, request->global_seqno))
 			continue;
 
@@ -2891,25 +2992,11 @@
 		active = request;
 		break;
 	}
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 
 	return active;
 }
 
-static bool engine_stalled(struct intel_engine_cs *engine)
-{
-	if (!engine->hangcheck.stalled)
-		return false;
-
-	/* Check for possible seqno movement after hang declaration */
-	if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) {
-		DRM_DEBUG_DRIVER("%s pardoned\n", engine->name);
-		return false;
-	}
-
-	return true;
-}
-
 /*
  * Ensure irq handler finishes, and not run again.
  * Also return the active request so that we only search for it once.
@@ -2998,6 +3085,7 @@
 	}
 
 	i915_gem_revoke_fences(dev_priv);
+	intel_uc_sanitize(dev_priv);
 
 	return err;
 }
@@ -3025,15 +3113,15 @@
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct i915_gem_context *hung_ctx = request->ctx;
-	struct intel_timeline *timeline;
+	struct i915_timeline *timeline = request->timeline;
 	unsigned long flags;
 
-	timeline = i915_gem_context_lookup_timeline(hung_ctx, engine);
+	GEM_BUG_ON(timeline == &engine->timeline);
 
-	spin_lock_irqsave(&engine->timeline->lock, flags);
-	spin_lock(&timeline->lock);
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+	spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING);
 
-	list_for_each_entry_continue(request, &engine->timeline->requests, link)
+	list_for_each_entry_continue(request, &engine->timeline.requests, link)
 		if (request->ctx == hung_ctx)
 			skip_request(request);
 
@@ -3041,13 +3129,14 @@
 		skip_request(request);
 
 	spin_unlock(&timeline->lock);
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 /* Returns the request if it was guilty of the hang */
 static struct i915_request *
 i915_gem_reset_request(struct intel_engine_cs *engine,
-		       struct i915_request *request)
+		       struct i915_request *request,
+		       bool stalled)
 {
 	/* The guilty request will get skipped on a hung engine.
 	 *
@@ -3070,7 +3159,15 @@
 	 * subsequent hangs.
 	 */
 
-	if (engine_stalled(engine)) {
+	if (i915_request_completed(request)) {
+		GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n",
+			  engine->name, request->global_seqno,
+			  request->fence.context, request->fence.seqno,
+			  intel_engine_get_seqno(engine));
+		stalled = false;
+	}
+
+	if (stalled) {
 		i915_gem_context_mark_guilty(request->ctx);
 		skip_request(request);
 
@@ -3089,11 +3186,11 @@
 			dma_fence_set_error(&request->fence, -EAGAIN);
 
 			/* Rewind the engine to replay the incomplete rq */
-			spin_lock_irq(&engine->timeline->lock);
+			spin_lock_irq(&engine->timeline.lock);
 			request = list_prev_entry(request, link);
-			if (&request->link == &engine->timeline->requests)
+			if (&request->link == &engine->timeline.requests)
 				request = NULL;
-			spin_unlock_irq(&engine->timeline->lock);
+			spin_unlock_irq(&engine->timeline.lock);
 		}
 	}
 
@@ -3101,7 +3198,8 @@
 }
 
 void i915_gem_reset_engine(struct intel_engine_cs *engine,
-			   struct i915_request *request)
+			   struct i915_request *request,
+			   bool stalled)
 {
 	/*
 	 * Make sure this write is visible before we re-enable the interrupt
@@ -3111,7 +3209,7 @@
 	smp_store_mb(engine->irq_posted, 0);
 
 	if (request)
-		request = i915_gem_reset_request(engine, request);
+		request = i915_gem_reset_request(engine, request, stalled);
 
 	if (request) {
 		DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
@@ -3122,7 +3220,8 @@
 	engine->reset_hw(engine, request);
 }
 
-void i915_gem_reset(struct drm_i915_private *dev_priv)
+void i915_gem_reset(struct drm_i915_private *dev_priv,
+		    unsigned int stalled_mask)
 {
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
@@ -3134,10 +3233,12 @@
 	for_each_engine(engine, dev_priv, id) {
 		struct i915_gem_context *ctx;
 
-		i915_gem_reset_engine(engine, engine->hangcheck.active_request);
+		i915_gem_reset_engine(engine,
+				      engine->hangcheck.active_request,
+				      stalled_mask & ENGINE_MASK(id));
 		ctx = fetch_and_zero(&engine->last_retired_context);
 		if (ctx)
-			engine->context_unpin(engine, ctx);
+			intel_context_unpin(ctx, engine);
 
 		/*
 		 * Ostensibily, we always want a context loaded for powersaving,
@@ -3160,13 +3261,6 @@
 	}
 
 	i915_gem_restore_fences(dev_priv);
-
-	if (dev_priv->gt.awake) {
-		intel_sanitize_gt_powersave(dev_priv);
-		intel_enable_gt_powersave(dev_priv);
-		if (INTEL_GEN(dev_priv) >= 6)
-			gen6_rps_busy(dev_priv);
-	}
 }
 
 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
@@ -3192,6 +3286,9 @@
 
 static void nop_submit_request(struct i915_request *request)
 {
+	GEM_TRACE("%s fence %llx:%d -> -EIO\n",
+		  request->engine->name,
+		  request->fence.context, request->fence.seqno);
 	dma_fence_set_error(&request->fence, -EIO);
 
 	i915_request_submit(request);
@@ -3201,12 +3298,15 @@
 {
 	unsigned long flags;
 
+	GEM_TRACE("%s fence %llx:%d -> -EIO\n",
+		  request->engine->name,
+		  request->fence.context, request->fence.seqno);
 	dma_fence_set_error(&request->fence, -EIO);
 
-	spin_lock_irqsave(&request->engine->timeline->lock, flags);
+	spin_lock_irqsave(&request->engine->timeline.lock, flags);
 	__i915_request_submit(request);
 	intel_engine_init_global_seqno(request->engine, request->global_seqno);
-	spin_unlock_irqrestore(&request->engine->timeline->lock, flags);
+	spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
 }
 
 void i915_gem_set_wedged(struct drm_i915_private *i915)
@@ -3214,7 +3314,9 @@
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	if (drm_debug & DRM_UT_DRIVER) {
+	GEM_TRACE("start\n");
+
+	if (GEM_SHOW_DEBUG()) {
 		struct drm_printer p = drm_debug_printer(__func__);
 
 		for_each_engine(engine, i915, id)
@@ -3237,6 +3339,9 @@
 	}
 	i915->caps.scheduler = 0;
 
+	/* Even if the GPU reset fails, it should still stop the engines */
+	intel_gpu_reset(i915, ALL_ENGINES);
+
 	/*
 	 * Make sure no one is running the old callback before we proceed with
 	 * cancelling requests and resetting the completion tracking. Otherwise
@@ -3270,27 +3375,31 @@
 		 * (lockless) lookup doesn't try and wait upon the request as we
 		 * reset it.
 		 */
-		spin_lock_irqsave(&engine->timeline->lock, flags);
+		spin_lock_irqsave(&engine->timeline.lock, flags);
 		intel_engine_init_global_seqno(engine,
 					       intel_engine_last_submit(engine));
-		spin_unlock_irqrestore(&engine->timeline->lock, flags);
+		spin_unlock_irqrestore(&engine->timeline.lock, flags);
 
 		i915_gem_reset_finish_engine(engine);
 	}
 
+	GEM_TRACE("end\n");
+
 	wake_up_all(&i915->gpu_error.reset_queue);
 }
 
 bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 {
-	struct i915_gem_timeline *tl;
-	int i;
+	struct i915_timeline *tl;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 	if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
 		return true;
 
-	/* Before unwedging, make sure that all pending operations
+	GEM_TRACE("start\n");
+
+	/*
+	 * Before unwedging, make sure that all pending operations
 	 * are flushed and errored out - we may have requests waiting upon
 	 * third party fences. We marked all inflight requests as EIO, and
 	 * every execbuf since returned EIO, for consistency we want all
@@ -3300,31 +3409,33 @@
 	 * No more can be submitted until we reset the wedged bit.
 	 */
 	list_for_each_entry(tl, &i915->gt.timelines, link) {
-		for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
-			struct i915_request *rq;
+		struct i915_request *rq;
 
-			rq = i915_gem_active_peek(&tl->engine[i].last_request,
-						  &i915->drm.struct_mutex);
-			if (!rq)
-				continue;
+		rq = i915_gem_active_peek(&tl->last_request,
+					  &i915->drm.struct_mutex);
+		if (!rq)
+			continue;
 
-			/* We can't use our normal waiter as we want to
-			 * avoid recursively trying to handle the current
-			 * reset. The basic dma_fence_default_wait() installs
-			 * a callback for dma_fence_signal(), which is
-			 * triggered by our nop handler (indirectly, the
-			 * callback enables the signaler thread which is
-			 * woken by the nop_submit_request() advancing the seqno
-			 * and when the seqno passes the fence, the signaler
-			 * then signals the fence waking us up).
-			 */
-			if (dma_fence_default_wait(&rq->fence, true,
-						   MAX_SCHEDULE_TIMEOUT) < 0)
-				return false;
-		}
+		/*
+		 * We can't use our normal waiter as we want to
+		 * avoid recursively trying to handle the current
+		 * reset. The basic dma_fence_default_wait() installs
+		 * a callback for dma_fence_signal(), which is
+		 * triggered by our nop handler (indirectly, the
+		 * callback enables the signaler thread which is
+		 * woken by the nop_submit_request() advancing the seqno
+		 * and when the seqno passes the fence, the signaler
+		 * then signals the fence waking us up).
+		 */
+		if (dma_fence_default_wait(&rq->fence, true,
+					   MAX_SCHEDULE_TIMEOUT) < 0)
+			return false;
 	}
+	i915_retire_requests(i915);
+	GEM_BUG_ON(i915->gt.active_requests);
 
-	/* Undo nop_submit_request. We prevent all new i915 requests from
+	/*
+	 * Undo nop_submit_request. We prevent all new i915 requests from
 	 * being queued (by disallowing execbuf whilst wedged) so having
 	 * waited for all active requests above, we know the system is idle
 	 * and do not have to worry about a thread being inside
@@ -3335,6 +3446,8 @@
 	intel_engines_reset_default_submission(i915);
 	i915_gem_contexts_lost(i915);
 
+	GEM_TRACE("end\n");
+
 	smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
 	clear_bit(I915_WEDGED, &i915->gpu_error.flags);
 
@@ -3473,36 +3586,9 @@
 	if (new_requests_since_last_retire(dev_priv))
 		goto out_unlock;
 
-	/*
-	 * Be paranoid and flush a concurrent interrupt to make sure
-	 * we don't reactivate any irq tasklets after parking.
-	 *
-	 * FIXME: Note that even though we have waited for execlists to be idle,
-	 * there may still be an in-flight interrupt even though the CSB
-	 * is now empty. synchronize_irq() makes sure that a residual interrupt
-	 * is completed before we continue, but it doesn't prevent the HW from
-	 * raising a spurious interrupt later. To complete the shield we should
-	 * coordinate disabling the CS irq with flushing the interrupts.
-	 */
-	synchronize_irq(dev_priv->drm.irq);
+	epoch = __i915_gem_park(dev_priv);
 
-	intel_engines_park(dev_priv);
-	i915_gem_timelines_park(dev_priv);
-
-	i915_pmu_gt_parked(dev_priv);
-
-	GEM_BUG_ON(!dev_priv->gt.awake);
-	dev_priv->gt.awake = false;
-	epoch = dev_priv->gt.epoch;
-	GEM_BUG_ON(epoch == I915_EPOCH_INVALID);
 	rearm_hangcheck = false;
-
-	if (INTEL_GEN(dev_priv) >= 6)
-		gen6_rps_idle(dev_priv);
-
-	intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
-
-	intel_runtime_pm_put(dev_priv);
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
@@ -3648,17 +3734,9 @@
 	return ret;
 }
 
-static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
+static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags)
 {
-	int ret, i;
-
-	for (i = 0; i < ARRAY_SIZE(tl->engine); i++) {
-		ret = i915_gem_active_wait(&tl->engine[i].last_request, flags);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
+	return i915_gem_active_wait(&tl->last_request, flags);
 }
 
 static int wait_for_engines(struct drm_i915_private *i915)
@@ -3666,16 +3744,7 @@
 	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
 		dev_err(i915->drm.dev,
 			"Failed to idle engines, declaring wedged!\n");
-		if (drm_debug & DRM_UT_DRIVER) {
-			struct drm_printer p = drm_debug_printer(__func__);
-			struct intel_engine_cs *engine;
-			enum intel_engine_id id;
-
-			for_each_engine(engine, i915, id)
-				intel_engine_dump(engine, &p,
-						  "%s\n", engine->name);
-		}
-
+		GEM_TRACE_DUMP();
 		i915_gem_set_wedged(i915);
 		return -EIO;
 	}
@@ -3685,30 +3754,37 @@
 
 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 {
-	int ret;
-
 	/* If the device is asleep, we have no requests outstanding */
 	if (!READ_ONCE(i915->gt.awake))
 		return 0;
 
 	if (flags & I915_WAIT_LOCKED) {
-		struct i915_gem_timeline *tl;
+		struct i915_timeline *tl;
+		int err;
 
 		lockdep_assert_held(&i915->drm.struct_mutex);
 
 		list_for_each_entry(tl, &i915->gt.timelines, link) {
-			ret = wait_for_timeline(tl, flags);
-			if (ret)
-				return ret;
+			err = wait_for_timeline(tl, flags);
+			if (err)
+				return err;
 		}
 		i915_retire_requests(i915);
 
-		ret = wait_for_engines(i915);
+		return wait_for_engines(i915);
 	} else {
-		ret = wait_for_timeline(&i915->gt.global_timeline, flags);
-	}
+		struct intel_engine_cs *engine;
+		enum intel_engine_id id;
+		int err;
 
-	return ret;
+		for_each_engine(engine, i915, id) {
+			err = wait_for_timeline(&engine->timeline, flags);
+			if (err)
+				return err;
+		}
+
+		return 0;
+	}
 }
 
 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
@@ -4088,9 +4164,10 @@
 }
 
 /*
- * Prepare buffer for display plane (scanout, cursors, etc).
- * Can be called from an uninterruptible phase (modesetting) and allows
- * any flushes to be pipelined (for pageflips).
+ * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
+ * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
+ * (for pageflips). We only flush the caches while preparing the buffer for
+ * display, the callers are responsible for frontbuffer flush.
  */
 struct i915_vma *
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
@@ -4146,9 +4223,7 @@
 
 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
-	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
 	__i915_gem_object_flush_for_display(obj);
-	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
 
 	/* It should now be out of any other write domains, and we can update
 	 * the domain values for our changes.
@@ -4723,7 +4798,7 @@
 					 &obj->vma_list, obj_link) {
 			GEM_BUG_ON(i915_vma_is_active(vma));
 			vma->flags &= ~I915_VMA_PIN_MASK;
-			i915_vma_close(vma);
+			i915_vma_destroy(vma);
 		}
 		GEM_BUG_ON(!list_empty(&obj->vma_list));
 		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
@@ -4878,7 +4953,7 @@
 	enum intel_engine_id id;
 
 	for_each_engine(engine, i915, id) {
-		GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request));
+		GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request));
 		GEM_BUG_ON(engine->last_retired_context != kernel_context);
 	}
 }
@@ -4973,6 +5048,7 @@
 	 * machines is a good idea, we don't - just in case it leaves the
 	 * machine in an unusable condition.
 	 */
+	intel_uc_sanitize(dev_priv);
 	i915_gem_sanitize(dev_priv);
 
 	intel_runtime_pm_put(dev_priv);
@@ -5118,6 +5194,8 @@
 		}
 	}
 
+	intel_gt_workarounds_apply(dev_priv);
+
 	i915_gem_init_swizzling(dev_priv);
 
 	/*
@@ -5140,6 +5218,12 @@
 		goto out;
 	}
 
+	ret = intel_wopcm_init_hw(&dev_priv->wopcm);
+	if (ret) {
+		DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
+		goto out;
+	}
+
 	/* We can't enable contexts until all firmware is loaded */
 	ret = intel_uc_init_hw(dev_priv);
 	if (ret) {
@@ -5207,7 +5291,7 @@
 	for_each_engine(engine, i915, id) {
 		struct i915_vma *state;
 
-		state = ctx->engine[id].state;
+		state = to_intel_context(ctx, engine)->state;
 		if (!state)
 			continue;
 
@@ -5297,6 +5381,10 @@
 	if (ret)
 		return ret;
 
+	ret = intel_wopcm_init(&dev_priv->wopcm);
+	if (ret)
+		return ret;
+
 	ret = intel_uc_init_misc(dev_priv);
 	if (ret)
 		return ret;
@@ -5478,8 +5566,7 @@
 	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
 }
 
-int
-i915_gem_load_init(struct drm_i915_private *dev_priv)
+int i915_gem_init_early(struct drm_i915_private *dev_priv)
 {
 	int err = -ENOMEM;
 
@@ -5512,12 +5599,9 @@
 	if (!dev_priv->priorities)
 		goto err_dependencies;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	INIT_LIST_HEAD(&dev_priv->gt.timelines);
-	err = i915_gem_timeline_init__global(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-	if (err)
-		goto err_priorities;
+	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
+	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
 
 	i915_gem_init__mm(dev_priv);
 
@@ -5538,8 +5622,6 @@
 
 	return 0;
 
-err_priorities:
-	kmem_cache_destroy(dev_priv->priorities);
 err_dependencies:
 	kmem_cache_destroy(dev_priv->dependencies);
 err_requests:
@@ -5554,17 +5636,13 @@
 	return err;
 }
 
-void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
+void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
 {
 	i915_gem_drain_freed_objects(dev_priv);
 	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
 	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
 	WARN_ON(dev_priv->mm.object_count);
-
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	i915_gem_timeline_fini(&dev_priv->gt.global_timeline);
 	WARN_ON(!list_empty(&dev_priv->gt.timelines));
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index f54c4ff..5259204 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -27,7 +27,12 @@
 
 #include <linux/bug.h>
 
+struct drm_i915_private;
+
 #ifdef CONFIG_DRM_I915_DEBUG_GEM
+
+#define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER)
+
 #define GEM_BUG_ON(condition) do { if (unlikely((condition))) {	\
 		pr_err("%s:%d GEM_BUG_ON(%s)\n", \
 		       __func__, __LINE__, __stringify(condition)); \
@@ -43,6 +48,9 @@
 #define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr)
 
 #else
+
+#define GEM_SHOW_DEBUG() (0)
+
 #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
 #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0)
 
@@ -53,10 +61,15 @@
 
 #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM)
 #define GEM_TRACE(...) trace_printk(__VA_ARGS__)
+#define GEM_TRACE_DUMP() ftrace_dump(DUMP_ALL)
 #else
 #define GEM_TRACE(...) do { } while (0)
+#define GEM_TRACE_DUMP() do { } while (0)
 #endif
 
 #define I915_NUM_ENGINES 8
 
+void i915_gem_park(struct drm_i915_private *i915);
+void i915_gem_unpark(struct drm_i915_private *i915);
+
 #endif /* __I915_GEM_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index d3cbe84..f3890b6 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -1,29 +1,11 @@
 /*
- * Copyright © 2014 Intel Corporation
+ * SPDX-License-Identifier: MIT
  *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
+ * Copyright © 2014-2018 Intel Corporation
  */
 
-#include "i915_drv.h"
 #include "i915_gem_batch_pool.h"
+#include "i915_drv.h"
 
 /**
  * DOC: batch pool
@@ -41,11 +23,11 @@
 
 /**
  * i915_gem_batch_pool_init() - initialize a batch buffer pool
- * @engine: the associated request submission engine
  * @pool: the batch buffer pool
+ * @engine: the associated request submission engine
  */
-void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
-			      struct i915_gem_batch_pool *pool)
+void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool,
+			      struct intel_engine_cs *engine)
 {
 	int n;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 10d5ac4..56947da 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -1,31 +1,13 @@
 /*
- * Copyright © 2014 Intel Corporation
+ * SPDX-License-Identifier: MIT
  *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
+ * Copyright © 2014-2018 Intel Corporation
  */
 
 #ifndef I915_GEM_BATCH_POOL_H
 #define I915_GEM_BATCH_POOL_H
 
-#include "i915_drv.h"
+#include <linux/types.h>
 
 struct intel_engine_cs;
 
@@ -34,9 +16,8 @@
 	struct list_head cache_list[4];
 };
 
-/* i915_gem_batch_pool.c */
-void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
-			      struct i915_gem_batch_pool *pool);
+void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool,
+			      struct intel_engine_cs *engine);
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
 struct drm_i915_gem_object*
 i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index f2cbea7..33f8a4b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "intel_workarounds.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
@@ -116,15 +117,15 @@
 
 static void i915_gem_context_free(struct i915_gem_context *ctx)
 {
-	int i;
+	unsigned int n;
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
 
 	i915_ppgtt_put(ctx->ppgtt);
 
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		struct intel_context *ce = &ctx->engine[i];
+	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
+		struct intel_context *ce = &ctx->__engine[n];
 
 		if (!ce->state)
 			continue;
@@ -280,7 +281,7 @@
 	kref_init(&ctx->ref);
 	list_add_tail(&ctx->link, &dev_priv->contexts.list);
 	ctx->i915 = dev_priv;
-	ctx->priority = I915_PRIORITY_NORMAL;
+	ctx->sched.priority = I915_PRIORITY_NORMAL;
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
@@ -318,12 +319,13 @@
 	ctx->desc_template =
 		default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt);
 
-	/* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not
+	/*
+	 * GuC requires the ring to be placed in Non-WOPCM memory. If GuC is not
 	 * present or not in use we still need a small bias as ring wraparound
 	 * at offset 0 sometimes hangs. No idea why.
 	 */
 	if (USES_GUC(dev_priv))
-		ctx->ggtt_offset_bias = GUC_WOPCM_TOP;
+		ctx->ggtt_offset_bias = dev_priv->guc.ggtt_pin_bias;
 	else
 		ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE;
 
@@ -429,7 +431,7 @@
 		return ctx;
 
 	i915_gem_context_clear_bannable(ctx);
-	ctx->priority = prio;
+	ctx->sched.priority = prio;
 	ctx->ring_size = PAGE_SIZE;
 
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
@@ -458,11 +460,16 @@
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 {
 	struct i915_gem_context *ctx;
+	int ret;
 
 	/* Reassure ourselves we are only called once */
 	GEM_BUG_ON(dev_priv->kernel_context);
 	GEM_BUG_ON(dev_priv->preempt_context);
 
+	ret = intel_ctx_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
+
 	INIT_LIST_HEAD(&dev_priv->contexts.list);
 	INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker);
 	init_llist_head(&dev_priv->contexts.free_list);
@@ -514,7 +521,7 @@
 		if (!engine->last_retired_context)
 			continue;
 
-		engine->context_unpin(engine, engine->last_retired_context);
+		intel_context_unpin(engine->last_retired_context, engine);
 		engine->last_retired_context = NULL;
 	}
 }
@@ -570,19 +577,29 @@
 	idr_destroy(&file_priv->context_idr);
 }
 
+static struct i915_request *
+last_request_on_engine(struct i915_timeline *timeline,
+		       struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	if (timeline == &engine->timeline)
+		return NULL;
+
+	rq = i915_gem_active_raw(&timeline->last_request,
+				 &engine->i915->drm.struct_mutex);
+	if (rq && rq->engine == engine)
+		return rq;
+
+	return NULL;
+}
+
 static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine)
 {
-	struct i915_gem_timeline *timeline;
+	struct i915_timeline *timeline;
 
 	list_for_each_entry(timeline, &engine->i915->gt.timelines, link) {
-		struct intel_timeline *tl;
-
-		if (timeline == &engine->i915->gt.global_timeline)
-			continue;
-
-		tl = &timeline->engine[engine->id];
-		if (i915_gem_active_peek(&tl->last_request,
-					 &engine->i915->drm.struct_mutex))
+		if (last_request_on_engine(timeline, engine))
 			return false;
 	}
 
@@ -592,7 +609,7 @@
 int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 {
 	struct intel_engine_cs *engine;
-	struct i915_gem_timeline *timeline;
+	struct i915_timeline *timeline;
 	enum intel_engine_id id;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -612,11 +629,8 @@
 		/* Queue this switch after all other activity */
 		list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
 			struct i915_request *prev;
-			struct intel_timeline *tl;
 
-			tl = &timeline->engine[engine->id];
-			prev = i915_gem_active_raw(&tl->last_request,
-						   &dev_priv->drm.struct_mutex);
+			prev = last_request_on_engine(timeline, engine);
 			if (prev)
 				i915_sw_fence_await_sw_fence_gfp(&rq->submit,
 								 &prev->submit,
@@ -746,7 +760,7 @@
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
 	case I915_CONTEXT_PARAM_PRIORITY:
-		args->value = ctx->priority;
+		args->value = ctx->sched.priority;
 		break;
 	default:
 		ret = -EINVAL;
@@ -819,7 +833,7 @@
 				 !capable(CAP_SYS_NICE))
 				ret = -EPERM;
 			else
-				ctx->priority = priority;
+				ctx->sched.priority = priority;
 		}
 		break;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 7854262..ace3b12 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -137,18 +137,7 @@
 	 */
 	u32 user_handle;
 
-	/**
-	 * @priority: execution and service priority
-	 *
-	 * All clients are equal, but some are more equal than others!
-	 *
-	 * Requests from a context with a greater (more positive) value of
-	 * @priority will be executed before those with a lower @priority
-	 * value, forming a simple QoS.
-	 *
-	 * The &drm_i915_private.kernel_context is assigned the lowest priority.
-	 */
-	int priority;
+	struct i915_sched_attr sched;
 
 	/** ggtt_offset_bias: placement restriction for context objects */
 	u32 ggtt_offset_bias;
@@ -160,7 +149,7 @@
 		u32 *lrc_reg_state;
 		u64 lrc_desc;
 		int pin_count;
-	} engine[I915_NUM_ENGINES];
+	} __engine[I915_NUM_ENGINES];
 
 	/** ring_size: size for allocating the per-engine ring buffer */
 	u32 ring_size;
@@ -267,6 +256,34 @@
 	return !ctx->file_priv;
 }
 
+static inline struct intel_context *
+to_intel_context(struct i915_gem_context *ctx,
+		 const struct intel_engine_cs *engine)
+{
+	return &ctx->__engine[engine->id];
+}
+
+static inline struct intel_ring *
+intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+	return engine->context_pin(engine, ctx);
+}
+
+static inline void __intel_context_pin(struct i915_gem_context *ctx,
+				       const struct intel_engine_cs *engine)
+{
+	struct intel_context *ce = to_intel_context(ctx, engine);
+
+	GEM_BUG_ON(!ce->pin_count);
+	ce->pin_count++;
+}
+
+static inline void intel_context_unpin(struct i915_gem_context *ctx,
+				       struct intel_engine_cs *engine)
+{
+	engine->context_unpin(engine, ctx);
+}
+
 /* i915_gem_context.c */
 int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv);
 void i915_gem_contexts_lost(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0414228..f627a8c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -81,6 +81,35 @@
  * but this remains just a hint as the kernel may choose a new location for
  * any object in the future.
  *
+ * At the level of talking to the hardware, submitting a batchbuffer for the
+ * GPU to execute is to add content to a buffer from which the HW
+ * command streamer is reading.
+ *
+ * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
+ *    Execlists, this command is not placed on the same buffer as the
+ *    remaining items.
+ *
+ * 2. Add a command to invalidate caches to the buffer.
+ *
+ * 3. Add a batchbuffer start command to the buffer; the start command is
+ *    essentially a token together with the GPU address of the batchbuffer
+ *    to be executed.
+ *
+ * 4. Add a pipeline flush to the buffer.
+ *
+ * 5. Add a memory write command to the buffer to record when the GPU
+ *    is done executing the batchbuffer. The memory write writes the
+ *    global sequence number of the request, ``i915_request::global_seqno``;
+ *    the i915 driver uses the current value in the register to determine
+ *    if the GPU has completed the batchbuffer.
+ *
+ * 6. Add a user interrupt command to the buffer. This command instructs
+ *    the GPU to issue an interrupt when the command, pipeline flush and
+ *    memory write are completed.
+ *
+ * 7. Inform the hardware of the additional commands added to the buffer
+ *    (by updating the tail pointer).
+ *
  * Processing an execbuf ioctl is conceptually split up into a few phases.
  *
  * 1. Validation - Ensure all the pointers, handles and flags are valid.
@@ -733,7 +762,8 @@
 		}
 
 		/* transfer ref to ctx */
-		vma->open_count++;
+		if (!vma->open_count++)
+			i915_vma_reopen(vma);
 		list_add(&lut->obj_link, &obj->lut_list);
 		list_add(&lut->ctx_link, &eb->ctx->handles_list);
 		lut->ctx = eb->ctx;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 21d72f6..996ab2a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -110,7 +110,8 @@
 
 static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
 {
-	/* Note that as an uncached mmio write, this should flush the
+	/*
+	 * Note that as an uncached mmio write, this will flush the
 	 * WCB of the writes into the GGTT before it triggers the invalidate.
 	 */
 	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
@@ -1161,6 +1162,27 @@
 			vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
 			kunmap_atomic(vaddr);
 			page_size = I915_GTT_PAGE_SIZE_64K;
+
+			/*
+			 * We write all 4K page entries, even when using 64K
+			 * pages. In order to verify that the HW isn't cheating
+			 * by using the 4K PTE instead of the 64K PTE, we want
+			 * to remove all the surplus entries. If the HW skipped
+			 * the 64K PTE, it will read/write into the scratch page
+			 * instead - which we detect as missing results during
+			 * selftests.
+			 */
+			if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
+				u16 i;
+
+				encode = pte_encode | vma->vm->scratch_page.daddr;
+				vaddr = kmap_atomic_px(pd->page_table[idx.pde]);
+
+				for (i = 1; i < index; i += 16)
+					memset64(vaddr + i, encode, 15);
+
+				kunmap_atomic(vaddr);
+			}
 		}
 
 		vma->page_sizes.gtt |= page_size;
@@ -2111,8 +2133,6 @@
 				    struct drm_i915_private *dev_priv,
 				    const char *name)
 {
-	i915_gem_timeline_init(dev_priv, &vm->timeline, name);
-
 	drm_mm_init(&vm->mm, 0, vm->total);
 	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
 
@@ -2129,7 +2149,6 @@
 	if (pagevec_count(&vm->free_pages))
 		vm_free_pages_release(vm, true);
 
-	i915_gem_timeline_fini(&vm->timeline);
 	drm_mm_takedown(&vm->mm);
 	list_del(&vm->global_link);
 }
@@ -2140,15 +2159,15 @@
 	 * called on driver load and after a GPU reset, so you can place
 	 * workarounds here even if they get overwritten by GPU reset.
 	 */
-	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */
+	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
 	if (IS_BROADWELL(dev_priv))
 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
 	else if (IS_CHERRYVIEW(dev_priv))
 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
-	else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv))
-		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
 	else if (IS_GEN9_LP(dev_priv))
 		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
+	else if (INTEL_GEN(dev_priv) >= 9)
+		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
 
 	/*
 	 * To support 64K PTEs we need to first enable the use of the
@@ -2222,6 +2241,12 @@
 
 void i915_ppgtt_close(struct i915_address_space *vm)
 {
+	GEM_BUG_ON(vm->closed);
+	vm->closed = true;
+}
+
+static void ppgtt_destroy_vma(struct i915_address_space *vm)
+{
 	struct list_head *phases[] = {
 		&vm->active_list,
 		&vm->inactive_list,
@@ -2229,15 +2254,12 @@
 		NULL,
 	}, **phase;
 
-	GEM_BUG_ON(vm->closed);
 	vm->closed = true;
-
 	for (phase = phases; *phase; phase++) {
 		struct i915_vma *vma, *vn;
 
 		list_for_each_entry_safe(vma, vn, *phase, vm_link)
-			if (!i915_vma_is_closed(vma))
-				i915_vma_close(vma);
+			i915_vma_destroy(vma);
 	}
 }
 
@@ -2248,7 +2270,8 @@
 
 	trace_i915_ppgtt_release(&ppgtt->base);
 
-	/* vmas should already be unbound and destroyed */
+	ppgtt_destroy_vma(&ppgtt->base);
+
 	GEM_BUG_ON(!list_empty(&ppgtt->base.active_list));
 	GEM_BUG_ON(!list_empty(&ppgtt->base.inactive_list));
 	GEM_BUG_ON(!list_empty(&ppgtt->base.unbound_list));
@@ -2417,11 +2440,9 @@
 	for_each_sgt_dma(addr, sgt_iter, vma->pages)
 		gen8_set_pte(gtt_entries++, pte_encode | addr);
 
-	wmb();
-
-	/* This next bit makes the above posting read even more important. We
-	 * want to flush the TLBs only after we're certain all the PTE updates
-	 * have finished.
+	/*
+	 * We want to flush the TLBs only after we're certain all the PTE
+	 * updates have finished.
 	 */
 	ggtt->invalidate(vm->i915);
 }
@@ -2459,11 +2480,10 @@
 	dma_addr_t addr;
 	for_each_sgt_dma(addr, iter, vma->pages)
 		iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
-	wmb();
 
-	/* This next bit makes the above posting read even more important. We
-	 * want to flush the TLBs only after we're certain all the PTE updates
-	 * have finished.
+	/*
+	 * We want to flush the TLBs only after we're certain all the PTE
+	 * updates have finished.
 	 */
 	ggtt->invalidate(vm->i915);
 }
@@ -3325,14 +3345,10 @@
 		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
 
 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-
-	if (INTEL_GEN(dev_priv) >= 9) {
-		size = gen8_get_total_gtt_size(snb_gmch_ctl);
-	} else if (IS_CHERRYVIEW(dev_priv)) {
+	if (IS_CHERRYVIEW(dev_priv))
 		size = chv_get_total_gtt_size(snb_gmch_ctl);
-	} else {
+	else
 		size = gen8_get_total_gtt_size(snb_gmch_ctl);
-	}
 
 	ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
 	ggtt->base.cleanup = gen6_gmch_remove;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 6efc017..aec4f73 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -38,10 +38,9 @@
 #include <linux/mm.h>
 #include <linux/pagevec.h>
 
-#include "i915_gem_timeline.h"
-
 #include "i915_request.h"
 #include "i915_selftest.h"
+#include "i915_timeline.h"
 
 #define I915_GTT_PAGE_SIZE_4K BIT(12)
 #define I915_GTT_PAGE_SIZE_64K BIT(16)
@@ -257,7 +256,6 @@
 
 struct i915_address_space {
 	struct drm_mm mm;
-	struct i915_gem_timeline timeline;
 	struct drm_i915_private *i915;
 	struct device *dma;
 	/* Every address space belongs to a struct file - except for the global
@@ -344,6 +342,7 @@
 	void (*clear_pages)(struct i915_vma *vma);
 
 	I915_SELFTEST_DECLARE(struct fault_attr fault_attr);
+	I915_SELFTEST_DECLARE(bool scrub_64K);
 };
 
 #define i915_is_ggtt(V) (!(V)->file)
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 62aa679..ad949cc 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -51,6 +51,10 @@
 	if (!drm_mm_initialized(&dev_priv->mm.stolen))
 		return -ENODEV;
 
+	/* WaSkipStolenMemoryFirstPage:bdw+ */
+	if (INTEL_GEN(dev_priv) >= 8 && start < 4096)
+		start = 4096;
+
 	mutex_lock(&dev_priv->mm.stolen_lock);
 	ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node,
 					  size, alignment, 0,
@@ -121,8 +125,8 @@
 
 		if (stolen[0].start != stolen[1].start ||
 		    stolen[0].end != stolen[1].end) {
-			DRM_DEBUG_KMS("GTT within stolen memory at %pR\n", &ggtt_res);
-			DRM_DEBUG_KMS("Stolen memory adjusted to %pR\n", dsm);
+			DRM_DEBUG_DRIVER("GTT within stolen memory at %pR\n", &ggtt_res);
+			DRM_DEBUG_DRIVER("Stolen memory adjusted to %pR\n", dsm);
 		}
 	}
 
@@ -174,18 +178,19 @@
 }
 
 static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				    resource_size_t *base, resource_size_t *size)
+				    resource_size_t *base,
+				    resource_size_t *size)
 {
-	uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ?
-				     CTG_STOLEN_RESERVED :
-				     ELK_STOLEN_RESERVED);
+	u32 reg_val = I915_READ(IS_GM45(dev_priv) ?
+				CTG_STOLEN_RESERVED :
+				ELK_STOLEN_RESERVED);
 	resource_size_t stolen_top = dev_priv->dsm.end + 1;
 
-	if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) {
-		*base = 0;
-		*size = 0;
+	DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n",
+			 IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val);
+
+	if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0)
 		return;
-	}
 
 	/*
 	 * Whether ILK really reuses the ELK register for this is unclear.
@@ -193,30 +198,25 @@
 	 */
 	WARN(IS_GEN5(dev_priv), "ILK stolen reserved found? 0x%08x\n", reg_val);
 
-	*base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16;
+	if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK))
+		return;
 
+	*base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16;
 	WARN_ON((reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base);
 
-	/* On these platforms, the register doesn't have a size field, so the
-	 * size is the distance between the base and the top of the stolen
-	 * memory. We also have the genuine case where base is zero and there's
-	 * nothing reserved. */
-	if (*base == 0)
-		*size = 0;
-	else
-		*size = stolen_top - *base;
+	*size = stolen_top - *base;
 }
 
 static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				     resource_size_t *base, resource_size_t *size)
+				     resource_size_t *base,
+				     resource_size_t *size)
 {
-	uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
 
-	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
-		*base = 0;
-		*size = 0;
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
 		return;
-	}
 
 	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
 
@@ -239,17 +239,44 @@
 	}
 }
 
-static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				     resource_size_t *base, resource_size_t *size)
+static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv,
+				    resource_size_t *base,
+				    resource_size_t *size)
 {
-	uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	resource_size_t stolen_top = dev_priv->dsm.end + 1;
 
-	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
-		*base = 0;
-		*size = 0;
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
 		return;
+
+	switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) {
+	default:
+		MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK);
+	case GEN7_STOLEN_RESERVED_1M:
+		*size = 1024 * 1024;
+		break;
 	}
 
+	/*
+	 * On vlv, the ADDR_MASK portion is left as 0 and HW deduces the
+	 * reserved location as (top - size).
+	 */
+	*base = stolen_top - *size;
+}
+
+static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
+				     resource_size_t *base,
+				     resource_size_t *size)
+{
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
+		return;
+
 	*base = reg_val & GEN7_STOLEN_RESERVED_ADDR_MASK;
 
 	switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) {
@@ -266,15 +293,15 @@
 }
 
 static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				    resource_size_t *base, resource_size_t *size)
+				    resource_size_t *base,
+				    resource_size_t *size)
 {
-	uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
 
-	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
-		*base = 0;
-		*size = 0;
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
 		return;
-	}
 
 	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
 
@@ -298,36 +325,28 @@
 }
 
 static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				    resource_size_t *base, resource_size_t *size)
+				    resource_size_t *base,
+				    resource_size_t *size)
 {
-	uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
-	resource_size_t stolen_top;
+	u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED);
+	resource_size_t stolen_top = dev_priv->dsm.end + 1;
 
-	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
-		*base = 0;
-		*size = 0;
+	DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
 		return;
-	}
 
-	stolen_top = dev_priv->dsm.end + 1;
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK))
+		return;
 
 	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
-
-	/* On these platforms, the register doesn't have a size field, so the
-	 * size is the distance between the base and the top of the stolen
-	 * memory. We also have the genuine case where base is zero and there's
-	 * nothing reserved. */
-	if (*base == 0)
-		*size = 0;
-	else
-		*size = stolen_top - *base;
+	*size = stolen_top - *base;
 }
 
 int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 {
 	resource_size_t reserved_base, stolen_top;
 	resource_size_t reserved_total, reserved_size;
-	resource_size_t stolen_usable_start;
 
 	mutex_init(&dev_priv->mm.stolen_lock);
 
@@ -353,7 +372,7 @@
 	GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start);
 
 	stolen_top = dev_priv->dsm.end + 1;
-	reserved_base = 0;
+	reserved_base = stolen_top;
 	reserved_size = 0;
 
 	switch (INTEL_GEN(dev_priv)) {
@@ -373,8 +392,12 @@
 					 &reserved_base, &reserved_size);
 		break;
 	case 7:
-		gen7_get_stolen_reserved(dev_priv,
-					 &reserved_base, &reserved_size);
+		if (IS_VALLEYVIEW(dev_priv))
+			vlv_get_stolen_reserved(dev_priv,
+						&reserved_base, &reserved_size);
+		else
+			gen7_get_stolen_reserved(dev_priv,
+						 &reserved_base, &reserved_size);
 		break;
 	default:
 		if (IS_LP(dev_priv))
@@ -386,11 +409,16 @@
 		break;
 	}
 
-	/* It is possible for the reserved base to be zero, but the register
-	 * field for size doesn't have a zero option. */
-	if (reserved_base == 0) {
-		reserved_size = 0;
+	/*
+	 * Our expectation is that the reserved space is at the top of the
+	 * stolen region and *never* at the bottom. If we see !reserved_base,
+	 * it likely means we failed to read the registers correctly.
+	 */
+	if (!reserved_base) {
+		DRM_ERROR("inconsistent reservation %pa + %pa; ignoring\n",
+			  &reserved_base, &reserved_size);
 		reserved_base = stolen_top;
+		reserved_size = 0;
 	}
 
 	dev_priv->dsm_reserved =
@@ -406,21 +434,15 @@
 	 * memory, so just consider the start. */
 	reserved_total = stolen_top - reserved_base;
 
-	DRM_DEBUG_KMS("Memory reserved for graphics device: %lluK, usable: %lluK\n",
-		      (u64)resource_size(&dev_priv->dsm) >> 10,
-		      ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10);
-
-	stolen_usable_start = 0;
-	/* WaSkipStolenMemoryFirstPage:bdw+ */
-	if (INTEL_GEN(dev_priv) >= 8)
-		stolen_usable_start = 4096;
+	DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n",
+			 (u64)resource_size(&dev_priv->dsm) >> 10,
+			 ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10);
 
 	dev_priv->stolen_usable_size =
-		resource_size(&dev_priv->dsm) - reserved_total - stolen_usable_start;
+		resource_size(&dev_priv->dsm) - reserved_total;
 
 	/* Basic memrange allocator for stolen space. */
-	drm_mm_init(&dev_priv->mm.stolen, stolen_usable_start,
-		    dev_priv->stolen_usable_size);
+	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size);
 
 	return 0;
 }
@@ -580,8 +602,8 @@
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	DRM_DEBUG_KMS("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n",
-			&stolen_offset, &gtt_offset, &size);
+	DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n",
+			 &stolen_offset, &gtt_offset, &size);
 
 	/* KISS and expect everything to be page-aligned */
 	if (WARN_ON(size == 0) ||
@@ -599,14 +621,14 @@
 	ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen);
 	mutex_unlock(&dev_priv->mm.stolen_lock);
 	if (ret) {
-		DRM_DEBUG_KMS("failed to allocate stolen space\n");
+		DRM_DEBUG_DRIVER("failed to allocate stolen space\n");
 		kfree(stolen);
 		return NULL;
 	}
 
 	obj = _i915_gem_object_create_stolen(dev_priv, stolen);
 	if (obj == NULL) {
-		DRM_DEBUG_KMS("failed to allocate stolen object\n");
+		DRM_DEBUG_DRIVER("failed to allocate stolen object\n");
 		i915_gem_stolen_remove_node(dev_priv, stolen);
 		kfree(stolen);
 		return NULL;
@@ -635,7 +657,7 @@
 				   size, gtt_offset, obj->cache_level,
 				   0);
 	if (ret) {
-		DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
+		DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n");
 		goto err_pages;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
deleted file mode 100644
index e9fd876..0000000
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "i915_drv.h"
-#include "i915_syncmap.h"
-
-static void __intel_timeline_init(struct intel_timeline *tl,
-				  struct i915_gem_timeline *parent,
-				  u64 context,
-				  struct lock_class_key *lockclass,
-				  const char *lockname)
-{
-	tl->fence_context = context;
-	tl->common = parent;
-	spin_lock_init(&tl->lock);
-	lockdep_set_class_and_name(&tl->lock, lockclass, lockname);
-	init_request_active(&tl->last_request, NULL);
-	INIT_LIST_HEAD(&tl->requests);
-	i915_syncmap_init(&tl->sync);
-}
-
-static void __intel_timeline_fini(struct intel_timeline *tl)
-{
-	GEM_BUG_ON(!list_empty(&tl->requests));
-
-	i915_syncmap_free(&tl->sync);
-}
-
-static int __i915_gem_timeline_init(struct drm_i915_private *i915,
-				    struct i915_gem_timeline *timeline,
-				    const char *name,
-				    struct lock_class_key *lockclass,
-				    const char *lockname)
-{
-	unsigned int i;
-	u64 fences;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	/*
-	 * Ideally we want a set of engines on a single leaf as we expect
-	 * to mostly be tracking synchronisation between engines. It is not
-	 * a huge issue if this is not the case, but we may want to mitigate
-	 * any page crossing penalties if they become an issue.
-	 */
-	BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
-
-	timeline->i915 = i915;
-	timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL);
-	if (!timeline->name)
-		return -ENOMEM;
-
-	list_add(&timeline->link, &i915->gt.timelines);
-
-	/* Called during early_init before we know how many engines there are */
-	fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine));
-	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
-		__intel_timeline_init(&timeline->engine[i],
-				      timeline, fences++,
-				      lockclass, lockname);
-
-	return 0;
-}
-
-int i915_gem_timeline_init(struct drm_i915_private *i915,
-			   struct i915_gem_timeline *timeline,
-			   const char *name)
-{
-	static struct lock_class_key class;
-
-	return __i915_gem_timeline_init(i915, timeline, name,
-					&class, "&timeline->lock");
-}
-
-int i915_gem_timeline_init__global(struct drm_i915_private *i915)
-{
-	static struct lock_class_key class;
-
-	return __i915_gem_timeline_init(i915,
-					&i915->gt.global_timeline,
-					"[execution]",
-					&class, "&global_timeline->lock");
-}
-
-/**
- * i915_gem_timelines_park - called when the driver idles
- * @i915: the drm_i915_private device
- *
- * When the driver is completely idle, we know that all of our sync points
- * have been signaled and our tracking is then entirely redundant. Any request
- * to wait upon an older sync point will be completed instantly as we know
- * the fence is signaled and therefore we will not even look them up in the
- * sync point map.
- */
-void i915_gem_timelines_park(struct drm_i915_private *i915)
-{
-	struct i915_gem_timeline *timeline;
-	int i;
-
-	lockdep_assert_held(&i915->drm.struct_mutex);
-
-	list_for_each_entry(timeline, &i915->gt.timelines, link) {
-		for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
-			struct intel_timeline *tl = &timeline->engine[i];
-
-			/*
-			 * All known fences are completed so we can scrap
-			 * the current sync point tracking and start afresh,
-			 * any attempt to wait upon a previous sync point
-			 * will be skipped as the fence was signaled.
-			 */
-			i915_syncmap_free(&tl->sync);
-		}
-	}
-}
-
-void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
-{
-	int i;
-
-	lockdep_assert_held(&timeline->i915->drm.struct_mutex);
-
-	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
-		__intel_timeline_fini(&timeline->engine[i]);
-
-	list_del(&timeline->link);
-	kfree(timeline->name);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/mock_timeline.c"
-#include "selftests/i915_gem_timeline.c"
-#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
deleted file mode 100644
index 33e01bf..0000000
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#ifndef I915_GEM_TIMELINE_H
-#define I915_GEM_TIMELINE_H
-
-#include <linux/list.h>
-
-#include "i915_request.h"
-#include "i915_syncmap.h"
-#include "i915_utils.h"
-
-struct i915_gem_timeline;
-
-struct intel_timeline {
-	u64 fence_context;
-	u32 seqno;
-
-	/**
-	 * Count of outstanding requests, from the time they are constructed
-	 * to the moment they are retired. Loosely coupled to hardware.
-	 */
-	u32 inflight_seqnos;
-
-	spinlock_t lock;
-
-	/**
-	 * List of breadcrumbs associated with GPU requests currently
-	 * outstanding.
-	 */
-	struct list_head requests;
-
-	/* Contains an RCU guarded pointer to the last request. No reference is
-	 * held to the request, users must carefully acquire a reference to
-	 * the request using i915_gem_active_get_request_rcu(), or hold the
-	 * struct_mutex.
-	 */
-	struct i915_gem_active last_request;
-
-	/**
-	 * We track the most recent seqno that we wait on in every context so
-	 * that we only have to emit a new await and dependency on a more
-	 * recent sync point. As the contexts may be executed out-of-order, we
-	 * have to track each individually and can not rely on an absolute
-	 * global_seqno. When we know that all tracked fences are completed
-	 * (i.e. when the driver is idle), we know that the syncmap is
-	 * redundant and we can discard it without loss of generality.
-	 */
-	struct i915_syncmap *sync;
-	/**
-	 * Separately to the inter-context seqno map above, we track the last
-	 * barrier (e.g. semaphore wait) to the global engine timelines. Note
-	 * that this tracks global_seqno rather than the context.seqno, and
-	 * so it is subject to the limitations of hw wraparound and that we
-	 * may need to revoke global_seqno (on pre-emption).
-	 */
-	u32 global_sync[I915_NUM_ENGINES];
-
-	struct i915_gem_timeline *common;
-};
-
-struct i915_gem_timeline {
-	struct list_head link;
-
-	struct drm_i915_private *i915;
-	const char *name;
-
-	struct intel_timeline engine[I915_NUM_ENGINES];
-};
-
-int i915_gem_timeline_init(struct drm_i915_private *i915,
-			   struct i915_gem_timeline *tl,
-			   const char *name);
-int i915_gem_timeline_init__global(struct drm_i915_private *i915);
-void i915_gem_timelines_park(struct drm_i915_private *i915);
-void i915_gem_timeline_fini(struct i915_gem_timeline *tl);
-
-static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
-					    u64 context, u32 seqno)
-{
-	return i915_syncmap_set(&tl->sync, context, seqno);
-}
-
-static inline int intel_timeline_sync_set(struct intel_timeline *tl,
-					  const struct dma_fence *fence)
-{
-	return __intel_timeline_sync_set(tl, fence->context, fence->seqno);
-}
-
-static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl,
-						  u64 context, u32 seqno)
-{
-	return i915_syncmap_is_later(&tl->sync, context, seqno);
-}
-
-static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
-						const struct dma_fence *fence)
-{
-	return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
-}
-
-#endif
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f89ac7a8..df234dc 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -32,6 +32,7 @@
 #include <linux/zlib.h>
 #include <drm/drm_print.h>
 
+#include "i915_gpu_error.h"
 #include "i915_drv.h"
 
 static inline const struct intel_engine_cs *
@@ -403,16 +404,17 @@
 
 static void error_print_request(struct drm_i915_error_state_buf *m,
 				const char *prefix,
-				const struct drm_i915_error_request *erq)
+				const struct drm_i915_error_request *erq,
+				const unsigned long epoch)
 {
 	if (!erq->seqno)
 		return;
 
-	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n",
+	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
 		   prefix, erq->pid, erq->ban_score,
-		   erq->context, erq->seqno, erq->priority,
-		   jiffies_to_msecs(jiffies - erq->jiffies),
-		   erq->head, erq->tail);
+		   erq->context, erq->seqno, erq->sched_attr.priority,
+		   jiffies_to_msecs(erq->jiffies - epoch),
+		   erq->start, erq->head, erq->tail);
 }
 
 static void error_print_context(struct drm_i915_error_state_buf *m,
@@ -421,12 +423,13 @@
 {
 	err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n",
 		   header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
-		   ctx->priority, ctx->ban_score, bannable(ctx),
+		   ctx->sched_attr.priority, ctx->ban_score, bannable(ctx),
 		   ctx->guilty, ctx->active);
 }
 
 static void error_print_engine(struct drm_i915_error_state_buf *m,
-			       const struct drm_i915_error_engine *ee)
+			       const struct drm_i915_error_engine *ee,
+			       const unsigned long epoch)
 {
 	int n;
 
@@ -496,14 +499,15 @@
 	err_printf(m, "  hangcheck stall: %s\n", yesno(ee->hangcheck_stalled));
 	err_printf(m, "  hangcheck action: %s\n",
 		   hangcheck_action_to_str(ee->hangcheck_action));
-	err_printf(m, "  hangcheck action timestamp: %lu, %u ms ago\n",
+	err_printf(m, "  hangcheck action timestamp: %dms (%lu%s)\n",
+		   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
 		   ee->hangcheck_timestamp,
-		   jiffies_to_msecs(jiffies - ee->hangcheck_timestamp));
+		   ee->hangcheck_timestamp == epoch ? "; epoch" : "");
 	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
 
 	for (n = 0; n < ee->num_ports; n++) {
 		err_printf(m, "  ELSP[%d]:", n);
-		error_print_request(m, " ", &ee->execlist[n]);
+		error_print_request(m, " ", &ee->execlist[n], epoch);
 	}
 
 	error_print_context(m, "  Active context: ", &ee->context);
@@ -649,6 +653,11 @@
 	ts = ktime_to_timespec64(error->uptime);
 	err_printf(m, "Uptime: %lld s %ld us\n",
 		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
+	err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ);
+	err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n",
+		   error->capture,
+		   jiffies_to_msecs(jiffies - error->capture),
+		   jiffies_to_msecs(error->capture - error->epoch));
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].hangcheck_stalled &&
@@ -709,7 +718,7 @@
 
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		if (error->engine[i].engine_id != -1)
-			error_print_engine(m, &error->engine[i]);
+			error_print_engine(m, &error->engine[i], error->epoch);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
@@ -768,7 +777,9 @@
 				   dev_priv->engine[i]->name,
 				   ee->num_requests);
 			for (j = 0; j < ee->num_requests; j++)
-				error_print_request(m, " ", &ee->requests[j]);
+				error_print_request(m, " ",
+						    &ee->requests[j],
+						    error->epoch);
 		}
 
 		if (IS_ERR(ee->waiters)) {
@@ -1277,10 +1288,11 @@
 			   struct drm_i915_error_request *erq)
 {
 	erq->context = request->ctx->hw_id;
-	erq->priority = request->priotree.priority;
+	erq->sched_attr = request->sched.attr;
 	erq->ban_score = atomic_read(&request->ctx->ban_score);
 	erq->seqno = request->global_seqno;
 	erq->jiffies = request->emitted_jiffies;
+	erq->start = i915_ggtt_offset(request->ring->vma);
 	erq->head = request->head;
 	erq->tail = request->tail;
 
@@ -1298,7 +1310,7 @@
 
 	count = 0;
 	request = first;
-	list_for_each_entry_from(request, &engine->timeline->requests, link)
+	list_for_each_entry_from(request, &engine->timeline.requests, link)
 		count++;
 	if (!count)
 		return;
@@ -1311,7 +1323,7 @@
 
 	count = 0;
 	request = first;
-	list_for_each_entry_from(request, &engine->timeline->requests, link) {
+	list_for_each_entry_from(request, &engine->timeline.requests, link) {
 		if (count >= ee->num_requests) {
 			/*
 			 * If the ring request list was changed in
@@ -1371,7 +1383,7 @@
 
 	e->handle = ctx->user_handle;
 	e->hw_id = ctx->hw_id;
-	e->priority = ctx->priority;
+	e->sched_attr = ctx->sched;
 	e->ban_score = atomic_read(&ctx->ban_score);
 	e->bannable = i915_gem_context_is_bannable(ctx);
 	e->guilty = atomic_read(&ctx->guilty_count);
@@ -1471,7 +1483,8 @@
 
 			ee->ctx =
 				i915_error_object_create(i915,
-							 request->ctx->engine[i].state);
+							 to_intel_context(request->ctx,
+									  engine)->state);
 
 			error->simulated |=
 				i915_gem_context_no_error_capture(request->ctx);
@@ -1734,6 +1747,22 @@
 #undef DUP
 }
 
+static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
+{
+	unsigned long epoch = error->capture;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
+		const struct drm_i915_error_engine *ee = &error->engine[i];
+
+		if (ee->hangcheck_stalled &&
+		    time_before(ee->hangcheck_timestamp, epoch))
+			epoch = ee->hangcheck_timestamp;
+	}
+
+	return epoch;
+}
+
 static int capture(void *data)
 {
 	struct i915_gpu_state *error = data;
@@ -1742,6 +1771,7 @@
 	error->boottime = ktime_get_boottime();
 	error->uptime = ktime_sub(ktime_get(),
 				  error->i915->gt.last_init_time);
+	error->capture = jiffies;
 
 	capture_params(error);
 	capture_gen_state(error);
@@ -1755,6 +1785,8 @@
 	error->overlay = intel_overlay_capture_error_state(error->i915);
 	error->display = intel_display_capture_error_state(error->i915);
 
+	error->epoch = capture_find_epoch(error);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
new file mode 100644
index 0000000..dac0f8c
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -0,0 +1,366 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright � 2008-2018 Intel Corporation
+ */
+
+#ifndef _I915_GPU_ERROR_H_
+#define _I915_GPU_ERROR_H_
+
+#include <linux/kref.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
+
+#include <drm/drm_mm.h>
+
+#include "intel_device_info.h"
+#include "intel_ringbuffer.h"
+#include "intel_uc_fw.h"
+
+#include "i915_gem.h"
+#include "i915_gem_gtt.h"
+#include "i915_params.h"
+#include "i915_scheduler.h"
+
+struct drm_i915_private;
+struct intel_overlay_error_state;
+struct intel_display_error_state;
+
+struct i915_gpu_state {
+	struct kref ref;
+	ktime_t time;
+	ktime_t boottime;
+	ktime_t uptime;
+	unsigned long capture;
+	unsigned long epoch;
+
+	struct drm_i915_private *i915;
+
+	char error_msg[128];
+	bool simulated;
+	bool awake;
+	bool wakelock;
+	bool suspended;
+	int iommu;
+	u32 reset_count;
+	u32 suspend_count;
+	struct intel_device_info device_info;
+	struct intel_driver_caps driver_caps;
+	struct i915_params params;
+
+	struct i915_error_uc {
+		struct intel_uc_fw guc_fw;
+		struct intel_uc_fw huc_fw;
+		struct drm_i915_error_object *guc_log;
+	} uc;
+
+	/* Generic register state */
+	u32 eir;
+	u32 pgtbl_er;
+	u32 ier;
+	u32 gtier[4], ngtier;
+	u32 ccid;
+	u32 derrmr;
+	u32 forcewake;
+	u32 error; /* gen6+ */
+	u32 err_int; /* gen7 */
+	u32 fault_data0; /* gen8, gen9 */
+	u32 fault_data1; /* gen8, gen9 */
+	u32 done_reg;
+	u32 gac_eco;
+	u32 gam_ecochk;
+	u32 gab_ctl;
+	u32 gfx_mode;
+
+	u32 nfence;
+	u64 fence[I915_MAX_NUM_FENCES];
+	struct intel_overlay_error_state *overlay;
+	struct intel_display_error_state *display;
+
+	struct drm_i915_error_engine {
+		int engine_id;
+		/* Software tracked state */
+		bool idle;
+		bool waiting;
+		int num_waiters;
+		unsigned long hangcheck_timestamp;
+		bool hangcheck_stalled;
+		enum intel_engine_hangcheck_action hangcheck_action;
+		struct i915_address_space *vm;
+		int num_requests;
+		u32 reset_count;
+
+		/* position of active request inside the ring */
+		u32 rq_head, rq_post, rq_tail;
+
+		/* our own tracking of ring head and tail */
+		u32 cpu_ring_head;
+		u32 cpu_ring_tail;
+
+		u32 last_seqno;
+
+		/* Register state */
+		u32 start;
+		u32 tail;
+		u32 head;
+		u32 ctl;
+		u32 mode;
+		u32 hws;
+		u32 ipeir;
+		u32 ipehr;
+		u32 bbstate;
+		u32 instpm;
+		u32 instps;
+		u32 seqno;
+		u64 bbaddr;
+		u64 acthd;
+		u32 fault_reg;
+		u64 faddr;
+		u32 rc_psmi; /* sleep state */
+		u32 semaphore_mboxes[I915_NUM_ENGINES - 1];
+		struct intel_instdone instdone;
+
+		struct drm_i915_error_context {
+			char comm[TASK_COMM_LEN];
+			pid_t pid;
+			u32 handle;
+			u32 hw_id;
+			int ban_score;
+			int active;
+			int guilty;
+			bool bannable;
+			struct i915_sched_attr sched_attr;
+		} context;
+
+		struct drm_i915_error_object {
+			u64 gtt_offset;
+			u64 gtt_size;
+			int page_count;
+			int unused;
+			u32 *pages[0];
+		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
+
+		struct drm_i915_error_object **user_bo;
+		long user_bo_count;
+
+		struct drm_i915_error_object *wa_ctx;
+		struct drm_i915_error_object *default_state;
+
+		struct drm_i915_error_request {
+			long jiffies;
+			pid_t pid;
+			u32 context;
+			int ban_score;
+			u32 seqno;
+			u32 start;
+			u32 head;
+			u32 tail;
+			struct i915_sched_attr sched_attr;
+		} *requests, execlist[EXECLIST_MAX_PORTS];
+		unsigned int num_ports;
+
+		struct drm_i915_error_waiter {
+			char comm[TASK_COMM_LEN];
+			pid_t pid;
+			u32 seqno;
+		} *waiters;
+
+		struct {
+			u32 gfx_mode;
+			union {
+				u64 pdp[4];
+				u32 pp_dir_base;
+			};
+		} vm_info;
+	} engine[I915_NUM_ENGINES];
+
+	struct drm_i915_error_buffer {
+		u32 size;
+		u32 name;
+		u32 rseqno[I915_NUM_ENGINES], wseqno;
+		u64 gtt_offset;
+		u32 read_domains;
+		u32 write_domain;
+		s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
+		u32 tiling:2;
+		u32 dirty:1;
+		u32 purgeable:1;
+		u32 userptr:1;
+		s32 engine:4;
+		u32 cache_level:3;
+	} *active_bo[I915_NUM_ENGINES], *pinned_bo;
+	u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
+	struct i915_address_space *active_vm[I915_NUM_ENGINES];
+};
+
+struct i915_gpu_error {
+	/* For hangcheck timer */
+#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
+#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD)
+
+	struct delayed_work hangcheck_work;
+
+	/* For reset and error_state handling. */
+	spinlock_t lock;
+	/* Protected by the above dev->gpu_error.lock. */
+	struct i915_gpu_state *first_error;
+
+	atomic_t pending_fb_pin;
+
+	unsigned long missed_irq_rings;
+
+	/**
+	 * State variable controlling the reset flow and count
+	 *
+	 * This is a counter which gets incremented when reset is triggered,
+	 *
+	 * Before the reset commences, the I915_RESET_BACKOFF bit is set
+	 * meaning that any waiters holding onto the struct_mutex should
+	 * relinquish the lock immediately in order for the reset to start.
+	 *
+	 * If reset is not completed successfully, the I915_WEDGE bit is
+	 * set meaning that hardware is terminally sour and there is no
+	 * recovery. All waiters on the reset_queue will be woken when
+	 * that happens.
+	 *
+	 * This counter is used by the wait_seqno code to notice that reset
+	 * event happened and it needs to restart the entire ioctl (since most
+	 * likely the seqno it waited for won't ever signal anytime soon).
+	 *
+	 * This is important for lock-free wait paths, where no contended lock
+	 * naturally enforces the correct ordering between the bail-out of the
+	 * waiter and the gpu reset work code.
+	 */
+	unsigned long reset_count;
+
+	/**
+	 * flags: Control various stages of the GPU reset
+	 *
+	 * #I915_RESET_BACKOFF - When we start a reset, we want to stop any
+	 * other users acquiring the struct_mutex. To do this we set the
+	 * #I915_RESET_BACKOFF bit in the error flags when we detect a reset
+	 * and then check for that bit before acquiring the struct_mutex (in
+	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
+	 * secondary role in preventing two concurrent global reset attempts.
+	 *
+	 * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the
+	 * struct_mutex. We try to acquire the struct_mutex in the reset worker,
+	 * but it may be held by some long running waiter (that we cannot
+	 * interrupt without causing trouble). Once we are ready to do the GPU
+	 * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If
+	 * they already hold the struct_mutex and want to participate they can
+	 * inspect the bit and do the reset directly, otherwise the worker
+	 * waits for the struct_mutex.
+	 *
+	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
+	 * acquire the struct_mutex to reset an engine, we need an explicit
+	 * flag to prevent two concurrent reset attempts in the same engine.
+	 * As the number of engines continues to grow, allocate the flags from
+	 * the most significant bits.
+	 *
+	 * #I915_WEDGED - If reset fails and we can no longer use the GPU,
+	 * we set the #I915_WEDGED bit. Prior to command submission, e.g.
+	 * i915_request_alloc(), this bit is checked and the sequence
+	 * aborted (with -EIO reported to userspace) if set.
+	 */
+	unsigned long flags;
+#define I915_RESET_BACKOFF	0
+#define I915_RESET_HANDOFF	1
+#define I915_RESET_MODESET	2
+#define I915_WEDGED		(BITS_PER_LONG - 1)
+#define I915_RESET_ENGINE	(I915_WEDGED - I915_NUM_ENGINES)
+
+	/** Number of times an engine has been reset */
+	u32 reset_engine_count[I915_NUM_ENGINES];
+
+	/** Set of stalled engines with guilty requests, in the current reset */
+	u32 stalled_mask;
+
+	/** Reason for the current *global* reset */
+	const char *reason;
+
+	/**
+	 * Waitqueue to signal when a hang is detected. Used to for waiters
+	 * to release the struct_mutex for the reset to procede.
+	 */
+	wait_queue_head_t wait_queue;
+
+	/**
+	 * Waitqueue to signal when the reset has completed. Used by clients
+	 * that wait for dev_priv->mm.wedged to settle.
+	 */
+	wait_queue_head_t reset_queue;
+
+	/* For missed irq/seqno simulation. */
+	unsigned long test_irq_rings;
+};
+
+struct drm_i915_error_state_buf {
+	struct drm_i915_private *i915;
+	unsigned int bytes;
+	unsigned int size;
+	int err;
+	u8 *buf;
+	loff_t start;
+	loff_t pos;
+};
+
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+
+__printf(2, 3)
+void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
+int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
+			    const struct i915_gpu_state *gpu);
+int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
+			      struct drm_i915_private *i915,
+			      size_t count, loff_t pos);
+
+static inline void
+i915_error_state_buf_release(struct drm_i915_error_state_buf *eb)
+{
+	kfree(eb->buf);
+}
+
+struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
+void i915_capture_error_state(struct drm_i915_private *dev_priv,
+			      u32 engine_mask,
+			      const char *error_msg);
+
+static inline struct i915_gpu_state *
+i915_gpu_state_get(struct i915_gpu_state *gpu)
+{
+	kref_get(&gpu->ref);
+	return gpu;
+}
+
+void __i915_gpu_state_free(struct kref *kref);
+static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
+{
+	if (gpu)
+		kref_put(&gpu->ref, __i915_gpu_state_free);
+}
+
+struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915);
+void i915_reset_error_state(struct drm_i915_private *i915);
+
+#else
+
+static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
+					    u32 engine_mask,
+					    const char *error_msg)
+{
+}
+
+static inline struct i915_gpu_state *
+i915_first_error_state(struct drm_i915_private *i915)
+{
+	return NULL;
+}
+
+static inline void i915_reset_error_state(struct drm_i915_private *i915)
+{
+}
+
+#endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
+
+#endif /* _I915_GPU_ERROR_H_ */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 633c187..f9bc3aa 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -243,6 +243,41 @@
 	spin_unlock_irq(&dev_priv->irq_lock);
 }
 
+static u32
+gen11_gt_engine_identity(struct drm_i915_private * const i915,
+			 const unsigned int bank, const unsigned int bit);
+
+bool gen11_reset_one_iir(struct drm_i915_private * const i915,
+			 const unsigned int bank,
+			 const unsigned int bit)
+{
+	void __iomem * const regs = i915->regs;
+	u32 dw;
+
+	lockdep_assert_held(&i915->irq_lock);
+
+	dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank));
+	if (dw & BIT(bit)) {
+		/*
+		 * According to the BSpec, DW_IIR bits cannot be cleared without
+		 * first servicing the Selector & Shared IIR registers.
+		 */
+		gen11_gt_engine_identity(i915, bank, bit);
+
+		/*
+		 * We locked GT INT DW by reading it. If we want to (try
+		 * to) recover from this succesfully, we need to clear
+		 * our bit, otherwise we are locking the register for
+		 * everybody.
+		 */
+		raw_reg_write(regs, GEN11_GT_INTR_DW(bank), BIT(bit));
+
+		return true;
+	}
+
+	return false;
+}
+
 /**
  * ilk_update_display_irq - update DEIMR
  * @dev_priv: driver private
@@ -308,17 +343,29 @@
 
 static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv)
 {
+	WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11);
+
 	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR;
 }
 
 static i915_reg_t gen6_pm_imr(struct drm_i915_private *dev_priv)
 {
-	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IMR(2) : GEN6_PMIMR;
+	if (INTEL_GEN(dev_priv) >= 11)
+		return GEN11_GPM_WGBOXPERF_INTR_MASK;
+	else if (INTEL_GEN(dev_priv) >= 8)
+		return GEN8_GT_IMR(2);
+	else
+		return GEN6_PMIMR;
 }
 
 static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv)
 {
-	return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IER(2) : GEN6_PMIER;
+	if (INTEL_GEN(dev_priv) >= 11)
+		return GEN11_GPM_WGBOXPERF_INTR_ENABLE;
+	else if (INTEL_GEN(dev_priv) >= 8)
+		return GEN8_GT_IER(2);
+	else
+		return GEN6_PMIER;
 }
 
 /**
@@ -400,6 +447,18 @@
 	/* though a barrier is missing here, but don't really need a one */
 }
 
+void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv)
+{
+	spin_lock_irq(&dev_priv->irq_lock);
+
+	while (gen11_reset_one_iir(dev_priv, 0, GEN11_GTPM))
+		;
+
+	dev_priv->gt_pm.rps.pm_iir = 0;
+
+	spin_unlock_irq(&dev_priv->irq_lock);
+}
+
 void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv)
 {
 	spin_lock_irq(&dev_priv->irq_lock);
@@ -415,12 +474,14 @@
 	if (READ_ONCE(rps->interrupts_enabled))
 		return;
 
-	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
-		return;
-
 	spin_lock_irq(&dev_priv->irq_lock);
 	WARN_ON_ONCE(rps->pm_iir);
-	WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
+
+	if (INTEL_GEN(dev_priv) >= 11)
+		WARN_ON_ONCE(gen11_reset_one_iir(dev_priv, 0, GEN11_GTPM));
+	else
+		WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
+
 	rps->interrupts_enabled = true;
 	gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
 
@@ -434,9 +495,6 @@
 	if (!READ_ONCE(rps->interrupts_enabled))
 		return;
 
-	if (WARN_ON_ONCE(IS_GEN11(dev_priv)))
-		return;
-
 	spin_lock_irq(&dev_priv->irq_lock);
 	rps->interrupts_enabled = false;
 
@@ -453,7 +511,10 @@
 	 * state of the worker can be discarded.
 	 */
 	cancel_work_sync(&rps->work);
-	gen6_reset_rps_interrupts(dev_priv);
+	if (INTEL_GEN(dev_priv) >= 11)
+		gen11_reset_rps_interrupts(dev_priv);
+	else
+		gen6_reset_rps_interrupts(dev_priv);
 }
 
 void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv)
@@ -1399,19 +1460,18 @@
 }
 
 static void
-gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
+gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	bool tasklet = false;
 
-	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
-		if (READ_ONCE(engine->execlists.active)) {
-			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-			tasklet = true;
-		}
+	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
+		if (READ_ONCE(engine->execlists.active))
+			tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
+						    &engine->irq_posted);
 	}
 
-	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
+	if (iir & GT_RENDER_USER_INTERRUPT) {
 		notify_ring(engine);
 		tasklet |= USES_GUC_SUBMISSION(engine->i915);
 	}
@@ -1466,21 +1526,21 @@
 {
 	if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
 		gen8_cs_irq_handler(i915->engine[RCS],
-				    gt_iir[0], GEN8_RCS_IRQ_SHIFT);
+				    gt_iir[0] >> GEN8_RCS_IRQ_SHIFT);
 		gen8_cs_irq_handler(i915->engine[BCS],
-				    gt_iir[0], GEN8_BCS_IRQ_SHIFT);
+				    gt_iir[0] >> GEN8_BCS_IRQ_SHIFT);
 	}
 
 	if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
 		gen8_cs_irq_handler(i915->engine[VCS],
-				    gt_iir[1], GEN8_VCS1_IRQ_SHIFT);
+				    gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT);
 		gen8_cs_irq_handler(i915->engine[VCS2],
-				    gt_iir[1], GEN8_VCS2_IRQ_SHIFT);
+				    gt_iir[1] >> GEN8_VCS2_IRQ_SHIFT);
 	}
 
 	if (master_ctl & GEN8_GT_VECS_IRQ) {
 		gen8_cs_irq_handler(i915->engine[VECS],
-				    gt_iir[3], GEN8_VECS_IRQ_SHIFT);
+				    gt_iir[3] >> GEN8_VECS_IRQ_SHIFT);
 	}
 
 	if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
@@ -1627,7 +1687,7 @@
 	int head, tail;
 
 	spin_lock(&pipe_crc->lock);
-	if (pipe_crc->source) {
+	if (pipe_crc->source && !crtc->base.crc.opened) {
 		if (!pipe_crc->entries) {
 			spin_unlock(&pipe_crc->lock);
 			DRM_DEBUG_KMS("spurious interrupt\n");
@@ -1667,7 +1727,7 @@
 		 * On GEN8+ sometimes the second CRC is bonkers as well, so
 		 * don't trust that one either.
 		 */
-		if (pipe_crc->skipped == 0 ||
+		if (pipe_crc->skipped <= 0 ||
 		    (INTEL_GEN(dev_priv) >= 8 && pipe_crc->skipped == 1)) {
 			pipe_crc->skipped++;
 			spin_unlock(&pipe_crc->lock);
@@ -1766,37 +1826,8 @@
 
 static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
 {
-	if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT) {
-		/* Sample the log buffer flush related bits & clear them out now
-		 * itself from the message identity register to minimize the
-		 * probability of losing a flush interrupt, when there are back
-		 * to back flush interrupts.
-		 * There can be a new flush interrupt, for different log buffer
-		 * type (like for ISR), whilst Host is handling one (for DPC).
-		 * Since same bit is used in message register for ISR & DPC, it
-		 * could happen that GuC sets the bit for 2nd interrupt but Host
-		 * clears out the bit on handling the 1st interrupt.
-		 */
-		u32 msg, flush;
-
-		msg = I915_READ(SOFT_SCRATCH(15));
-		flush = msg & (INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED |
-			       INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER);
-		if (flush) {
-			/* Clear the message bits that are handled */
-			I915_WRITE(SOFT_SCRATCH(15), msg & ~flush);
-
-			/* Handle flush interrupt in bottom half */
-			queue_work(dev_priv->guc.log.runtime.flush_wq,
-				   &dev_priv->guc.log.runtime.flush_work);
-
-			dev_priv->guc.log.flush_interrupt_count++;
-		} else {
-			/* Not clearing of unhandled event bits won't result in
-			 * re-triggering of the interrupt.
-			 */
-		}
-	}
+	if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT)
+		intel_guc_to_host_event_handler(&dev_priv->guc);
 }
 
 static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv)
@@ -2433,6 +2464,13 @@
 	if (de_iir & DE_ERR_INT_IVB)
 		ivb_err_int_handler(dev_priv);
 
+	if (de_iir & DE_EDP_PSR_INT_HSW) {
+		u32 psr_iir = I915_READ(EDP_PSR_IIR);
+
+		intel_psr_irq_handler(dev_priv, psr_iir);
+		I915_WRITE(EDP_PSR_IIR, psr_iir);
+	}
+
 	if (de_iir & DE_AUX_CHANNEL_A_IVB)
 		dp_aux_irq_handler(dev_priv);
 
@@ -2562,11 +2600,25 @@
 	if (master_ctl & GEN8_DE_MISC_IRQ) {
 		iir = I915_READ(GEN8_DE_MISC_IIR);
 		if (iir) {
+			bool found = false;
+
 			I915_WRITE(GEN8_DE_MISC_IIR, iir);
 			ret = IRQ_HANDLED;
-			if (iir & GEN8_DE_MISC_GSE)
+
+			if (iir & GEN8_DE_MISC_GSE) {
 				intel_opregion_asle_intr(dev_priv);
-			else
+				found = true;
+			}
+
+			if (iir & GEN8_DE_EDP_PSR) {
+				u32 psr_iir = I915_READ(EDP_PSR_IIR);
+
+				intel_psr_irq_handler(dev_priv, psr_iir);
+				I915_WRITE(EDP_PSR_IIR, psr_iir);
+				found = true;
+			}
+
+			if (!found)
 				DRM_ERROR("Unexpected DE Misc interrupt\n");
 		}
 		else
@@ -2762,58 +2814,16 @@
 	     (W)->i915;							\
 	     __fini_wedge((W)))
 
-static __always_inline void
-gen11_cs_irq_handler(struct intel_engine_cs * const engine, const u32 iir)
-{
-	gen8_cs_irq_handler(engine, iir, 0);
-}
-
-static void
-gen11_gt_engine_irq_handler(struct drm_i915_private * const i915,
-			    const unsigned int bank,
-			    const unsigned int engine_n,
-			    const u16 iir)
-{
-	struct intel_engine_cs ** const engine = i915->engine;
-
-	switch (bank) {
-	case 0:
-		switch (engine_n) {
-
-		case GEN11_RCS0:
-			return gen11_cs_irq_handler(engine[RCS], iir);
-
-		case GEN11_BCS:
-			return gen11_cs_irq_handler(engine[BCS], iir);
-		}
-	case 1:
-		switch (engine_n) {
-
-		case GEN11_VCS(0):
-			return gen11_cs_irq_handler(engine[_VCS(0)], iir);
-		case GEN11_VCS(1):
-			return gen11_cs_irq_handler(engine[_VCS(1)], iir);
-		case GEN11_VCS(2):
-			return gen11_cs_irq_handler(engine[_VCS(2)], iir);
-		case GEN11_VCS(3):
-			return gen11_cs_irq_handler(engine[_VCS(3)], iir);
-
-		case GEN11_VECS(0):
-			return gen11_cs_irq_handler(engine[_VECS(0)], iir);
-		case GEN11_VECS(1):
-			return gen11_cs_irq_handler(engine[_VECS(1)], iir);
-		}
-	}
-}
-
 static u32
-gen11_gt_engine_intr(struct drm_i915_private * const i915,
-		     const unsigned int bank, const unsigned int bit)
+gen11_gt_engine_identity(struct drm_i915_private * const i915,
+			 const unsigned int bank, const unsigned int bit)
 {
 	void __iomem * const regs = i915->regs;
 	u32 timeout_ts;
 	u32 ident;
 
+	lockdep_assert_held(&i915->irq_lock);
+
 	raw_reg_write(regs, GEN11_IIR_REG_SELECTOR(bank), BIT(bit));
 
 	/*
@@ -2835,42 +2845,101 @@
 	raw_reg_write(regs, GEN11_INTR_IDENTITY_REG(bank),
 		      GEN11_INTR_DATA_VALID);
 
-	return ident & GEN11_INTR_ENGINE_MASK;
+	return ident;
+}
+
+static void
+gen11_other_irq_handler(struct drm_i915_private * const i915,
+			const u8 instance, const u16 iir)
+{
+	if (instance == OTHER_GTPM_INSTANCE)
+		return gen6_rps_irq_handler(i915, iir);
+
+	WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
+		  instance, iir);
+}
+
+static void
+gen11_engine_irq_handler(struct drm_i915_private * const i915,
+			 const u8 class, const u8 instance, const u16 iir)
+{
+	struct intel_engine_cs *engine;
+
+	if (instance <= MAX_ENGINE_INSTANCE)
+		engine = i915->engine_class[class][instance];
+	else
+		engine = NULL;
+
+	if (likely(engine))
+		return gen8_cs_irq_handler(engine, iir);
+
+	WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
+		  class, instance);
+}
+
+static void
+gen11_gt_identity_handler(struct drm_i915_private * const i915,
+			  const u32 identity)
+{
+	const u8 class = GEN11_INTR_ENGINE_CLASS(identity);
+	const u8 instance = GEN11_INTR_ENGINE_INSTANCE(identity);
+	const u16 intr = GEN11_INTR_ENGINE_INTR(identity);
+
+	if (unlikely(!intr))
+		return;
+
+	if (class <= COPY_ENGINE_CLASS)
+		return gen11_engine_irq_handler(i915, class, instance, intr);
+
+	if (class == OTHER_CLASS)
+		return gen11_other_irq_handler(i915, instance, intr);
+
+	WARN_ONCE(1, "unknown interrupt class=0x%x, instance=0x%x, intr=0x%x\n",
+		  class, instance, intr);
+}
+
+static void
+gen11_gt_bank_handler(struct drm_i915_private * const i915,
+		      const unsigned int bank)
+{
+	void __iomem * const regs = i915->regs;
+	unsigned long intr_dw;
+	unsigned int bit;
+
+	lockdep_assert_held(&i915->irq_lock);
+
+	intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank));
+
+	if (unlikely(!intr_dw)) {
+		DRM_ERROR("GT_INTR_DW%u blank!\n", bank);
+		return;
+	}
+
+	for_each_set_bit(bit, &intr_dw, 32) {
+		const u32 ident = gen11_gt_engine_identity(i915,
+							   bank, bit);
+
+		gen11_gt_identity_handler(i915, ident);
+	}
+
+	/* Clear must be after shared has been served for engine */
+	raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw);
 }
 
 static void
 gen11_gt_irq_handler(struct drm_i915_private * const i915,
 		     const u32 master_ctl)
 {
-	void __iomem * const regs = i915->regs;
 	unsigned int bank;
 
+	spin_lock(&i915->irq_lock);
+
 	for (bank = 0; bank < 2; bank++) {
-		unsigned long intr_dw;
-		unsigned int bit;
-
-		if (!(master_ctl & GEN11_GT_DW_IRQ(bank)))
-			continue;
-
-		intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank));
-
-		if (unlikely(!intr_dw)) {
-			DRM_ERROR("GT_INTR_DW%u blank!\n", bank);
-			continue;
-		}
-
-		for_each_set_bit(bit, &intr_dw, 32) {
-			const u16 iir = gen11_gt_engine_intr(i915, bank, bit);
-
-			if (unlikely(!iir))
-				continue;
-
-			gen11_gt_engine_irq_handler(i915, bank, bit, iir);
-		}
-
-		/* Clear must be after shared has been served for engine */
-		raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw);
+		if (master_ctl & GEN11_GT_DW_IRQ(bank))
+			gen11_gt_bank_handler(i915, bank);
 	}
+
+	spin_unlock(&i915->irq_lock);
 }
 
 static irqreturn_t gen11_irq_handler(int irq, void *arg)
@@ -2912,15 +2981,11 @@
 	return IRQ_HANDLED;
 }
 
-/**
- * i915_reset_device - do process context error handling work
- * @dev_priv: i915 device private
- *
- * Fire an error uevent so userspace can see that a hang or error
- * was detected.
- */
-static void i915_reset_device(struct drm_i915_private *dev_priv)
+static void i915_reset_device(struct drm_i915_private *dev_priv,
+			      u32 engine_mask,
+			      const char *reason)
 {
+	struct i915_gpu_error *error = &dev_priv->gpu_error;
 	struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
 	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
 	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
@@ -2936,29 +3001,35 @@
 	i915_wedge_on_timeout(&w, dev_priv, 5*HZ) {
 		intel_prepare_reset(dev_priv);
 
+		error->reason = reason;
+		error->stalled_mask = engine_mask;
+
 		/* Signal that locked waiters should reset the GPU */
-		set_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags);
-		wake_up_all(&dev_priv->gpu_error.wait_queue);
+		smp_mb__before_atomic();
+		set_bit(I915_RESET_HANDOFF, &error->flags);
+		wake_up_all(&error->wait_queue);
 
 		/* Wait for anyone holding the lock to wakeup, without
 		 * blocking indefinitely on struct_mutex.
 		 */
 		do {
 			if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
-				i915_reset(dev_priv, 0);
+				i915_reset(dev_priv, engine_mask, reason);
 				mutex_unlock(&dev_priv->drm.struct_mutex);
 			}
-		} while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
+		} while (wait_on_bit_timeout(&error->flags,
 					     I915_RESET_HANDOFF,
 					     TASK_UNINTERRUPTIBLE,
 					     1));
 
+		error->stalled_mask = 0;
+		error->reason = NULL;
+
 		intel_finish_reset(dev_priv);
 	}
 
-	if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
-		kobject_uevent_env(kobj,
-				   KOBJ_CHANGE, reset_done_event);
+	if (!test_bit(I915_WEDGED, &error->flags))
+		kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
 }
 
 static void i915_clear_error_registers(struct drm_i915_private *dev_priv)
@@ -2990,6 +3061,7 @@
  * i915_handle_error - handle a gpu error
  * @dev_priv: i915 device private
  * @engine_mask: mask representing engines that are hung
+ * @flags: control flags
  * @fmt: Error message format string
  *
  * Do some basic checking of register state at error time and
@@ -3000,16 +3072,23 @@
  */
 void i915_handle_error(struct drm_i915_private *dev_priv,
 		       u32 engine_mask,
+		       unsigned long flags,
 		       const char *fmt, ...)
 {
 	struct intel_engine_cs *engine;
 	unsigned int tmp;
-	va_list args;
 	char error_msg[80];
+	char *msg = NULL;
 
-	va_start(args, fmt);
-	vscnprintf(error_msg, sizeof(error_msg), fmt, args);
-	va_end(args);
+	if (fmt) {
+		va_list args;
+
+		va_start(args, fmt);
+		vscnprintf(error_msg, sizeof(error_msg), fmt, args);
+		va_end(args);
+
+		msg = error_msg;
+	}
 
 	/*
 	 * In most cases it's guaranteed that we get here with an RPM
@@ -3020,8 +3099,12 @@
 	 */
 	intel_runtime_pm_get(dev_priv);
 
-	i915_capture_error_state(dev_priv, engine_mask, error_msg);
-	i915_clear_error_registers(dev_priv);
+	engine_mask &= INTEL_INFO(dev_priv)->ring_mask;
+
+	if (flags & I915_ERROR_CAPTURE) {
+		i915_capture_error_state(dev_priv, engine_mask, msg);
+		i915_clear_error_registers(dev_priv);
+	}
 
 	/*
 	 * Try engine reset when available. We fall back to full reset if
@@ -3034,7 +3117,7 @@
 					     &dev_priv->gpu_error.flags))
 				continue;
 
-			if (i915_reset_engine(engine, 0) == 0)
+			if (i915_reset_engine(engine, msg) == 0)
 				engine_mask &= ~intel_engine_flag(engine);
 
 			clear_bit(I915_RESET_ENGINE + engine->id,
@@ -3064,7 +3147,7 @@
 				    TASK_UNINTERRUPTIBLE);
 	}
 
-	i915_reset_device(dev_priv);
+	i915_reset_device(dev_priv, engine_mask, msg);
 
 	for_each_engine(engine, dev_priv, tmp) {
 		clear_bit(I915_RESET_ENGINE + engine->id,
@@ -3286,6 +3369,11 @@
 	if (IS_GEN7(dev_priv))
 		I915_WRITE(GEN7_ERR_INT, 0xffffffff);
 
+	if (IS_HASWELL(dev_priv)) {
+		I915_WRITE(EDP_PSR_IMR, 0xffffffff);
+		I915_WRITE(EDP_PSR_IIR, 0xffffffff);
+	}
+
 	gen5_gt_irq_reset(dev_priv);
 
 	ibx_irq_reset(dev_priv);
@@ -3324,6 +3412,9 @@
 
 	gen8_gt_irq_reset(dev_priv);
 
+	I915_WRITE(EDP_PSR_IMR, 0xffffffff);
+	I915_WRITE(EDP_PSR_IIR, 0xffffffff);
+
 	for_each_pipe(dev_priv, pipe)
 		if (intel_display_power_is_enabled(dev_priv,
 						   POWER_DOMAIN_PIPE(pipe)))
@@ -3349,6 +3440,9 @@
 	I915_WRITE(GEN11_VCS0_VCS1_INTR_MASK,	~0);
 	I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK,	~0);
 	I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK,	~0);
+
+	I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
+	I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_MASK,  ~0);
 }
 
 static void gen11_irq_reset(struct drm_device *dev)
@@ -3697,6 +3791,12 @@
 			      DE_DP_A_HOTPLUG);
 	}
 
+	if (IS_HASWELL(dev_priv)) {
+		gen3_assert_iir_is_zero(dev_priv, EDP_PSR_IIR);
+		intel_psr_irq_control(dev_priv, dev_priv->psr.debug);
+		display_mask |= DE_EDP_PSR_INT_HSW;
+	}
+
 	dev_priv->irq_mask = ~display_mask;
 
 	ibx_irq_pre_postinstall(dev);
@@ -3807,7 +3907,7 @@
 	uint32_t de_pipe_enables;
 	u32 de_port_masked = GEN8_AUX_CHANNEL_A;
 	u32 de_port_enables;
-	u32 de_misc_masked = GEN8_DE_MISC_GSE;
+	u32 de_misc_masked = GEN8_DE_MISC_GSE | GEN8_DE_EDP_PSR;
 	enum pipe pipe;
 
 	if (INTEL_GEN(dev_priv) >= 9) {
@@ -3832,6 +3932,9 @@
 	else if (IS_BROADWELL(dev_priv))
 		de_port_enables |= GEN8_PORT_DP_A_HOTPLUG;
 
+	gen3_assert_iir_is_zero(dev_priv, EDP_PSR_IIR);
+	intel_psr_irq_control(dev_priv, dev_priv->psr.debug);
+
 	for_each_pipe(dev_priv, pipe) {
 		dev_priv->de_irq_mask[pipe] = ~de_pipe_masked;
 
@@ -3887,7 +3990,14 @@
 	I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK,	~(irqs | irqs << 16));
 	I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK,	~(irqs | irqs << 16));
 
-	dev_priv->pm_imr = 0xffffffff; /* TODO */
+	/*
+	 * RPS interrupts will get enabled/disabled on demand when RPS itself
+	 * is enabled/disabled.
+	 */
+	dev_priv->pm_ier = 0x0;
+	dev_priv->pm_imr = ~dev_priv->pm_ier;
+	I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
+	I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_MASK,  ~0);
 }
 
 static int gen11_irq_postinstall(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_oa_icl.c b/drivers/gpu/drm/i915/i915_oa_icl.c
new file mode 100644
index 0000000..a566792
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_oa_icl.c
@@ -0,0 +1,118 @@
+/*
+ * Autogenerated file by GPU Top : https://github.com/rib/gputop
+ * DO NOT EDIT manually!
+ *
+ *
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/sysfs.h>
+
+#include "i915_drv.h"
+#include "i915_oa_icl.h"
+
+static const struct i915_oa_reg b_counter_config_test_oa[] = {
+	{ _MMIO(0x2740), 0x00000000 },
+	{ _MMIO(0x2710), 0x00000000 },
+	{ _MMIO(0x2714), 0xf0800000 },
+	{ _MMIO(0x2720), 0x00000000 },
+	{ _MMIO(0x2724), 0xf0800000 },
+	{ _MMIO(0x2770), 0x00000004 },
+	{ _MMIO(0x2774), 0x0000ffff },
+	{ _MMIO(0x2778), 0x00000003 },
+	{ _MMIO(0x277c), 0x0000ffff },
+	{ _MMIO(0x2780), 0x00000007 },
+	{ _MMIO(0x2784), 0x0000ffff },
+	{ _MMIO(0x2788), 0x00100002 },
+	{ _MMIO(0x278c), 0x0000fff7 },
+	{ _MMIO(0x2790), 0x00100002 },
+	{ _MMIO(0x2794), 0x0000ffcf },
+	{ _MMIO(0x2798), 0x00100082 },
+	{ _MMIO(0x279c), 0x0000ffef },
+	{ _MMIO(0x27a0), 0x001000c2 },
+	{ _MMIO(0x27a4), 0x0000ffe7 },
+	{ _MMIO(0x27a8), 0x00100001 },
+	{ _MMIO(0x27ac), 0x0000ffe7 },
+};
+
+static const struct i915_oa_reg flex_eu_config_test_oa[] = {
+};
+
+static const struct i915_oa_reg mux_config_test_oa[] = {
+	{ _MMIO(0xd04), 0x00000200 },
+	{ _MMIO(0x9840), 0x00000000 },
+	{ _MMIO(0x9884), 0x00000000 },
+	{ _MMIO(0x9888), 0x10060000 },
+	{ _MMIO(0x9888), 0x22060000 },
+	{ _MMIO(0x9888), 0x16060000 },
+	{ _MMIO(0x9888), 0x24060000 },
+	{ _MMIO(0x9888), 0x18060000 },
+	{ _MMIO(0x9888), 0x1a060000 },
+	{ _MMIO(0x9888), 0x12060000 },
+	{ _MMIO(0x9888), 0x14060000 },
+	{ _MMIO(0x9888), 0x10060000 },
+	{ _MMIO(0x9888), 0x22060000 },
+	{ _MMIO(0x9884), 0x00000003 },
+	{ _MMIO(0x9888), 0x16130000 },
+	{ _MMIO(0x9888), 0x24000001 },
+	{ _MMIO(0x9888), 0x0e130056 },
+	{ _MMIO(0x9888), 0x10130000 },
+	{ _MMIO(0x9888), 0x1a130000 },
+	{ _MMIO(0x9888), 0x541f0001 },
+	{ _MMIO(0x9888), 0x181f0000 },
+	{ _MMIO(0x9888), 0x4c1f0000 },
+	{ _MMIO(0x9888), 0x301f0000 },
+};
+
+static ssize_t
+show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "1\n");
+}
+
+void
+i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv)
+{
+	strlcpy(dev_priv->perf.oa.test_config.uuid,
+		"a291665e-244b-4b76-9b9a-01de9d3c8068",
+		sizeof(dev_priv->perf.oa.test_config.uuid));
+	dev_priv->perf.oa.test_config.id = 1;
+
+	dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa;
+	dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa);
+
+	dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa;
+	dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa);
+
+	dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa;
+	dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa);
+
+	dev_priv->perf.oa.test_config.sysfs_metric.name = "a291665e-244b-4b76-9b9a-01de9d3c8068";
+	dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs;
+
+	dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr;
+
+	dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id";
+	dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444;
+	dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id;
+}
diff --git a/drivers/gpu/drm/i915/i915_oa_icl.h b/drivers/gpu/drm/i915/i915_oa_icl.h
new file mode 100644
index 0000000..ae1c24a
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_oa_icl.h
@@ -0,0 +1,34 @@
+/*
+ * Autogenerated file by GPU Top : https://github.com/rib/gputop
+ * DO NOT EDIT manually!
+ *
+ *
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_OA_ICL_H__
+#define __I915_OA_ICL_H__
+
+extern void i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 08108ce..66ea355 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -164,6 +164,9 @@
 i915_param_named_unsafe(huc_firmware_path, charp, 0400,
 	"HuC firmware path to use instead of the default one");
 
+i915_param_named_unsafe(dmc_firmware_path, charp, 0400,
+	"DMC firmware path to use instead of the default one");
+
 i915_param_named_unsafe(enable_dp_mst, bool, 0600,
 	"Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)");
 
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 430f5f9..6684025 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -48,9 +48,10 @@
 	param(int, enable_ips, 1) \
 	param(int, invert_brightness, 0) \
 	param(int, enable_guc, 0) \
-	param(int, guc_log_level, 0) \
+	param(int, guc_log_level, -1) \
 	param(char *, guc_firmware_path, NULL) \
 	param(char *, huc_firmware_path, NULL) \
+	param(char *, dmc_firmware_path, NULL) \
 	param(int, mmio_debug, 0) \
 	param(int, edp_vswing, 0) \
 	param(int, reset, 2) \
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 062e91b..4364922 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -602,6 +602,7 @@
 	PLATFORM(INTEL_ICELAKE),
 	.is_alpha_support = 1,
 	.has_resource_streamer = 0,
+	.ring_mask = RENDER_RING | BLT_RING | VEBOX_RING | BSD_RING | BSD3_RING,
 };
 
 #undef GEN
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index abaca6e..019bd2d 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -209,6 +209,7 @@
 #include "i915_oa_cflgt2.h"
 #include "i915_oa_cflgt3.h"
 #include "i915_oa_cnl.h"
+#include "i915_oa_icl.h"
 
 /* HW requires this to be a power of two, between 128k and 16M, though driver
  * is currently generally designed assuming the largest 16M size is used such
@@ -1042,7 +1043,7 @@
 
 		I915_WRITE(GEN7_OASTATUS2,
 			   ((head & GEN7_OASTATUS2_HEAD_MASK) |
-			    OA_MEM_SELECT_GGTT));
+			    GEN7_OASTATUS2_MEM_SELECT_GGTT));
 		dev_priv->perf.oa.oa_buffer.head = head;
 
 		spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
@@ -1233,7 +1234,7 @@
 		 *
 		 * NB: implied RCS engine...
 		 */
-		ring = engine->context_pin(engine, stream->ctx);
+		ring = intel_context_pin(stream->ctx, engine);
 		mutex_unlock(&dev_priv->drm.struct_mutex);
 		if (IS_ERR(ring))
 			return PTR_ERR(ring);
@@ -1245,7 +1246,7 @@
 		 * with gen8+ and execlists
 		 */
 		dev_priv->perf.oa.specific_ctx_id =
-			i915_ggtt_offset(stream->ctx->engine[engine->id].state);
+			i915_ggtt_offset(to_intel_context(stream->ctx, engine)->state);
 	}
 
 	return 0;
@@ -1270,7 +1271,7 @@
 		mutex_lock(&dev_priv->drm.struct_mutex);
 
 		dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
-		engine->context_unpin(engine, stream->ctx);
+		intel_context_unpin(stream->ctx, engine);
 
 		mutex_unlock(&dev_priv->drm.struct_mutex);
 	}
@@ -1332,7 +1333,8 @@
 	/* Pre-DevBDW: OABUFFER must be set with counters off,
 	 * before OASTATUS1, but after OASTATUS2
 	 */
-	I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */
+	I915_WRITE(GEN7_OASTATUS2,
+		   gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */
 	dev_priv->perf.oa.oa_buffer.head = gtt_offset;
 
 	I915_WRITE(GEN7_OABUFFER, gtt_offset);
@@ -1392,7 +1394,7 @@
 	 *  bit."
 	 */
 	I915_WRITE(GEN8_OABUFFER, gtt_offset |
-		   OABUFFER_SIZE_16M | OA_MEM_SELECT_GGTT);
+		   OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
 	I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
 
 	/* Mark that we need updated tail pointers to read from... */
@@ -1693,7 +1695,7 @@
 						 const struct i915_oa_config *oa_config)
 {
 	struct intel_engine_cs *engine = dev_priv->engine[RCS];
-	struct i915_gem_timeline *timeline;
+	struct i915_timeline *timeline;
 	struct i915_request *rq;
 	int ret;
 
@@ -1714,15 +1716,11 @@
 	/* Queue this switch after all other activity */
 	list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
 		struct i915_request *prev;
-		struct intel_timeline *tl;
 
-		tl = &timeline->engine[engine->id];
-		prev = i915_gem_active_raw(&tl->last_request,
+		prev = i915_gem_active_raw(&timeline->last_request,
 					   &dev_priv->drm.struct_mutex);
 		if (prev)
-			i915_sw_fence_await_sw_fence_gfp(&rq->submit,
-							 &prev->submit,
-							 GFP_KERNEL);
+			i915_request_await_dma_fence(rq, &prev->fence);
 	}
 
 	i915_request_add(rq);
@@ -1757,6 +1755,7 @@
 static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 				       const struct i915_oa_config *oa_config)
 {
+	struct intel_engine_cs *engine = dev_priv->engine[RCS];
 	struct i915_gem_context *ctx;
 	int ret;
 	unsigned int wait_flags = I915_WAIT_LOCKED;
@@ -1787,7 +1786,7 @@
 
 	/* Update all contexts now that we've stalled the submission. */
 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
-		struct intel_context *ce = &ctx->engine[RCS];
+		struct intel_context *ce = to_intel_context(ctx, engine);
 		u32 *regs;
 
 		/* OA settings will be set upon first use */
@@ -1840,7 +1839,7 @@
 	 * be read back from automatically triggered reports, as part of the
 	 * RPT_ID field.
 	 */
-	if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) {
+	if (IS_GEN(dev_priv, 9, 11)) {
 		I915_WRITE(GEN8_OA_DEBUG,
 			   _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
 					      GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
@@ -1870,7 +1869,6 @@
 
 	I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
 				      ~GT_NOA_ENABLE));
-
 }
 
 static void gen10_disable_metric_set(struct drm_i915_private *dev_priv)
@@ -1885,6 +1883,13 @@
 
 static void gen7_oa_enable(struct drm_i915_private *dev_priv)
 {
+	struct i915_gem_context *ctx =
+			dev_priv->perf.oa.exclusive_stream->ctx;
+	u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
+	bool periodic = dev_priv->perf.oa.periodic;
+	u32 period_exponent = dev_priv->perf.oa.period_exponent;
+	u32 report_format = dev_priv->perf.oa.oa_buffer.format;
+
 	/*
 	 * Reset buf pointers so we don't forward reports from before now.
 	 *
@@ -1896,25 +1901,14 @@
 	 */
 	gen7_init_oa_buffer(dev_priv);
 
-	if (dev_priv->perf.oa.exclusive_stream->enabled) {
-		struct i915_gem_context *ctx =
-			dev_priv->perf.oa.exclusive_stream->ctx;
-		u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
-
-		bool periodic = dev_priv->perf.oa.periodic;
-		u32 period_exponent = dev_priv->perf.oa.period_exponent;
-		u32 report_format = dev_priv->perf.oa.oa_buffer.format;
-
-		I915_WRITE(GEN7_OACONTROL,
-			   (ctx_id & GEN7_OACONTROL_CTX_MASK) |
-			   (period_exponent <<
-			    GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
-			   (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
-			   (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
-			   (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
-			   GEN7_OACONTROL_ENABLE);
-	} else
-		I915_WRITE(GEN7_OACONTROL, 0);
+	I915_WRITE(GEN7_OACONTROL,
+		   (ctx_id & GEN7_OACONTROL_CTX_MASK) |
+		   (period_exponent <<
+		    GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
+		   (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
+		   (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
+		   (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
+		   GEN7_OACONTROL_ENABLE);
 }
 
 static void gen8_oa_enable(struct drm_i915_private *dev_priv)
@@ -1966,11 +1960,19 @@
 static void gen7_oa_disable(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(GEN7_OACONTROL, 0);
+	if (intel_wait_for_register(dev_priv,
+				    GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0,
+				    50))
+		DRM_ERROR("wait for OA to be disabled timed out\n");
 }
 
 static void gen8_oa_disable(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(GEN8_OACONTROL, 0);
+	if (intel_wait_for_register(dev_priv,
+				    GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0,
+				    50))
+		DRM_ERROR("wait for OA to be disabled timed out\n");
 }
 
 /**
@@ -2099,13 +2101,17 @@
 
 	if (stream->ctx) {
 		ret = oa_get_render_ctx_id(stream);
-		if (ret)
+		if (ret) {
+			DRM_DEBUG("Invalid context id to filter with\n");
 			return ret;
+		}
 	}
 
 	ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
-	if (ret)
+	if (ret) {
+		DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
 		goto err_config;
+	}
 
 	/* PRM - observability performance counters:
 	 *
@@ -2132,8 +2138,10 @@
 
 	ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
 						      stream->oa_config);
-	if (ret)
+	if (ret) {
+		DRM_DEBUG("Unable to enable metric set\n");
 		goto err_enable;
+	}
 
 	stream->ops = &i915_oa_stream_ops;
 
@@ -2745,7 +2753,8 @@
 			props->ctx_handle = value;
 			break;
 		case DRM_I915_PERF_PROP_SAMPLE_OA:
-			props->sample_flags |= SAMPLE_OA_REPORT;
+			if (value)
+				props->sample_flags |= SAMPLE_OA_REPORT;
 			break;
 		case DRM_I915_PERF_PROP_OA_METRICS_SET:
 			if (value == 0) {
@@ -2935,6 +2944,8 @@
 			i915_perf_load_test_config_cflgt3(dev_priv);
 	} else if (IS_CANNONLAKE(dev_priv)) {
 		i915_perf_load_test_config_cnl(dev_priv);
+	} else if (IS_ICELAKE(dev_priv)) {
+		i915_perf_load_test_config_icl(dev_priv);
 	}
 
 	if (dev_priv->perf.oa.test_config.id == 0)
@@ -3292,6 +3303,8 @@
 
 	mutex_unlock(&dev_priv->perf.metrics_lock);
 
+	DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id);
+
 	return oa_config->id;
 
 sysfs_err:
@@ -3348,6 +3361,9 @@
 			   &oa_config->sysfs_metric);
 
 	idr_remove(&dev_priv->perf.metrics_idr, *arg);
+
+	DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id);
+
 	put_oa_config(dev_priv, oa_config);
 
 config_err:
@@ -3467,7 +3483,7 @@
 
 				dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
 			}
-		} else if (IS_GEN10(dev_priv)) {
+		} else if (IS_GEN(dev_priv, 10, 11)) {
 			dev_priv->perf.oa.ops.is_valid_b_counter_reg =
 				gen7_is_valid_b_counter_addr;
 			dev_priv->perf.oa.ops.is_valid_mux_reg =
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index f0519e3..dc87797 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -1,33 +1,12 @@
 /*
- * Copyright © 2017 Intel Corporation
+ * SPDX-License-Identifier: MIT
  *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
+ * Copyright © 2017-2018 Intel Corporation
  */
 
-#include <linux/perf_event.h>
-#include <linux/pm_runtime.h>
-
-#include "i915_drv.h"
 #include "i915_pmu.h"
 #include "intel_ringbuffer.h"
+#include "i915_drv.h"
 
 /* Frequency for the sampling timer for events which need it. */
 #define FREQUENCY 200
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index aa1b1a9..2ba7352 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -1,29 +1,19 @@
 /*
- * Copyright © 2017 Intel Corporation
+ * SPDX-License-Identifier: MIT
  *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
+ * Copyright © 2017-2018 Intel Corporation
  */
+
 #ifndef __I915_PMU_H__
 #define __I915_PMU_H__
 
+#include <linux/hrtimer.h>
+#include <linux/perf_event.h>
+#include <linux/spinlock_types.h>
+#include <drm/i915_drm.h>
+
+struct drm_i915_private;
+
 enum {
 	__I915_SAMPLE_FREQ_ACT = 0,
 	__I915_SAMPLE_FREQ_REQ,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8a69a92..f11bb21 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -153,9 +153,6 @@
 #define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PICK(pipe, a, b, c))
 #define _PLL(pll, a, b) ((a) + (pll)*((b)-(a)))
 #define _MMIO_PLL(pll, a, b) _MMIO(_PLL(pll, a, b))
-#define _MMIO_PORT6(port, a, b, c, d, e, f) _MMIO(_PICK(port, a, b, c, d, e, f))
-#define _MMIO_PORT6_LN(port, ln, a0, a1, b, c, d, e, f)			\
-	_MMIO(_PICK(port, a0, b, c, d, e, f) + (ln * (a1 - a0)))
 #define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__)
 #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c))
 
@@ -191,6 +188,7 @@
 #define OTHER_CLASS		4
 #define MAX_ENGINE_CLASS	4
 
+#define OTHER_GTPM_INSTANCE	1
 #define MAX_ENGINE_INSTANCE    3
 
 /* PCI config space */
@@ -304,6 +302,17 @@
 #define  GEN6_GRDOM_VECS		(1 << 4)
 #define  GEN9_GRDOM_GUC			(1 << 5)
 #define  GEN8_GRDOM_MEDIA2		(1 << 7)
+/* GEN11 changed all bit defs except for FULL & RENDER */
+#define  GEN11_GRDOM_FULL		GEN6_GRDOM_FULL
+#define  GEN11_GRDOM_RENDER		GEN6_GRDOM_RENDER
+#define  GEN11_GRDOM_BLT		(1 << 2)
+#define  GEN11_GRDOM_GUC		(1 << 3)
+#define  GEN11_GRDOM_MEDIA		(1 << 5)
+#define  GEN11_GRDOM_MEDIA2		(1 << 6)
+#define  GEN11_GRDOM_MEDIA3		(1 << 7)
+#define  GEN11_GRDOM_MEDIA4		(1 << 8)
+#define  GEN11_GRDOM_VECS		(1 << 13)
+#define  GEN11_GRDOM_VECS2		(1 << 14)
 
 #define RING_PP_DIR_BASE(engine)	_MMIO((engine)->mmio_base+0x228)
 #define RING_PP_DIR_BASE_READ(engine)	_MMIO((engine)->mmio_base+0x518)
@@ -430,145 +439,6 @@
 #define VGA_CR_INDEX_CGA 0x3d4
 #define VGA_CR_DATA_CGA 0x3d5
 
-/*
- * Instruction field definitions used by the command parser
- */
-#define INSTR_CLIENT_SHIFT      29
-#define   INSTR_MI_CLIENT       0x0
-#define   INSTR_BC_CLIENT       0x2
-#define   INSTR_RC_CLIENT       0x3
-#define INSTR_SUBCLIENT_SHIFT   27
-#define INSTR_SUBCLIENT_MASK    0x18000000
-#define   INSTR_MEDIA_SUBCLIENT 0x2
-#define INSTR_26_TO_24_MASK	0x7000000
-#define   INSTR_26_TO_24_SHIFT	24
-
-/*
- * Memory interface instructions used by the kernel
- */
-#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
-/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */
-#define  MI_GLOBAL_GTT    (1<<22)
-
-#define MI_NOOP			MI_INSTR(0, 0)
-#define MI_USER_INTERRUPT	MI_INSTR(0x02, 0)
-#define MI_WAIT_FOR_EVENT       MI_INSTR(0x03, 0)
-#define   MI_WAIT_FOR_OVERLAY_FLIP	(1<<16)
-#define   MI_WAIT_FOR_PLANE_B_FLIP      (1<<6)
-#define   MI_WAIT_FOR_PLANE_A_FLIP      (1<<2)
-#define   MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1)
-#define MI_FLUSH		MI_INSTR(0x04, 0)
-#define   MI_READ_FLUSH		(1 << 0)
-#define   MI_EXE_FLUSH		(1 << 1)
-#define   MI_NO_WRITE_FLUSH	(1 << 2)
-#define   MI_SCENE_COUNT	(1 << 3) /* just increment scene count */
-#define   MI_END_SCENE		(1 << 4) /* flush binner and incr scene count */
-#define   MI_INVALIDATE_ISP	(1 << 5) /* invalidate indirect state pointers */
-#define MI_REPORT_HEAD		MI_INSTR(0x07, 0)
-#define MI_ARB_ON_OFF		MI_INSTR(0x08, 0)
-#define   MI_ARB_ENABLE			(1<<0)
-#define   MI_ARB_DISABLE		(0<<0)
-#define MI_BATCH_BUFFER_END	MI_INSTR(0x0a, 0)
-#define MI_SUSPEND_FLUSH	MI_INSTR(0x0b, 0)
-#define   MI_SUSPEND_FLUSH_EN	(1<<0)
-#define MI_SET_APPID		MI_INSTR(0x0e, 0)
-#define MI_OVERLAY_FLIP		MI_INSTR(0x11, 0)
-#define   MI_OVERLAY_CONTINUE	(0x0<<21)
-#define   MI_OVERLAY_ON		(0x1<<21)
-#define   MI_OVERLAY_OFF	(0x2<<21)
-#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0)
-#define MI_DISPLAY_FLIP		MI_INSTR(0x14, 2)
-#define MI_DISPLAY_FLIP_I915	MI_INSTR(0x14, 1)
-#define   MI_DISPLAY_FLIP_PLANE(n) ((n) << 20)
-/* IVB has funny definitions for which plane to flip. */
-#define   MI_DISPLAY_FLIP_IVB_PLANE_A  (0 << 19)
-#define   MI_DISPLAY_FLIP_IVB_PLANE_B  (1 << 19)
-#define   MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19)
-#define   MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
-#define   MI_DISPLAY_FLIP_IVB_PLANE_C  (4 << 19)
-#define   MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
-/* SKL ones */
-#define   MI_DISPLAY_FLIP_SKL_PLANE_1_A	(0 << 8)
-#define   MI_DISPLAY_FLIP_SKL_PLANE_1_B	(1 << 8)
-#define   MI_DISPLAY_FLIP_SKL_PLANE_1_C	(2 << 8)
-#define   MI_DISPLAY_FLIP_SKL_PLANE_2_A	(4 << 8)
-#define   MI_DISPLAY_FLIP_SKL_PLANE_2_B	(5 << 8)
-#define   MI_DISPLAY_FLIP_SKL_PLANE_2_C	(6 << 8)
-#define   MI_DISPLAY_FLIP_SKL_PLANE_3_A	(7 << 8)
-#define   MI_DISPLAY_FLIP_SKL_PLANE_3_B	(8 << 8)
-#define   MI_DISPLAY_FLIP_SKL_PLANE_3_C	(9 << 8)
-#define MI_SEMAPHORE_MBOX	MI_INSTR(0x16, 1) /* gen6, gen7 */
-#define   MI_SEMAPHORE_GLOBAL_GTT    (1<<22)
-#define   MI_SEMAPHORE_UPDATE	    (1<<21)
-#define   MI_SEMAPHORE_COMPARE	    (1<<20)
-#define   MI_SEMAPHORE_REGISTER	    (1<<18)
-#define   MI_SEMAPHORE_SYNC_VR	    (0<<16) /* RCS  wait for VCS  (RVSYNC) */
-#define   MI_SEMAPHORE_SYNC_VER	    (1<<16) /* RCS  wait for VECS (RVESYNC) */
-#define   MI_SEMAPHORE_SYNC_BR	    (2<<16) /* RCS  wait for BCS  (RBSYNC) */
-#define   MI_SEMAPHORE_SYNC_BV	    (0<<16) /* VCS  wait for BCS  (VBSYNC) */
-#define   MI_SEMAPHORE_SYNC_VEV	    (1<<16) /* VCS  wait for VECS (VVESYNC) */
-#define   MI_SEMAPHORE_SYNC_RV	    (2<<16) /* VCS  wait for RCS  (VRSYNC) */
-#define   MI_SEMAPHORE_SYNC_RB	    (0<<16) /* BCS  wait for RCS  (BRSYNC) */
-#define   MI_SEMAPHORE_SYNC_VEB	    (1<<16) /* BCS  wait for VECS (BVESYNC) */
-#define   MI_SEMAPHORE_SYNC_VB	    (2<<16) /* BCS  wait for VCS  (BVSYNC) */
-#define   MI_SEMAPHORE_SYNC_BVE	    (0<<16) /* VECS wait for BCS  (VEBSYNC) */
-#define   MI_SEMAPHORE_SYNC_VVE	    (1<<16) /* VECS wait for VCS  (VEVSYNC) */
-#define   MI_SEMAPHORE_SYNC_RVE	    (2<<16) /* VECS wait for RCS  (VERSYNC) */
-#define   MI_SEMAPHORE_SYNC_INVALID (3<<16)
-#define   MI_SEMAPHORE_SYNC_MASK    (3<<16)
-#define MI_SET_CONTEXT		MI_INSTR(0x18, 0)
-#define   MI_MM_SPACE_GTT		(1<<8)
-#define   MI_MM_SPACE_PHYSICAL		(0<<8)
-#define   MI_SAVE_EXT_STATE_EN		(1<<3)
-#define   MI_RESTORE_EXT_STATE_EN	(1<<2)
-#define   MI_FORCE_RESTORE		(1<<1)
-#define   MI_RESTORE_INHIBIT		(1<<0)
-#define   HSW_MI_RS_SAVE_STATE_EN       (1<<3)
-#define   HSW_MI_RS_RESTORE_STATE_EN    (1<<2)
-#define MI_SEMAPHORE_SIGNAL	MI_INSTR(0x1b, 0) /* GEN8+ */
-#define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
-#define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
-#define   MI_SEMAPHORE_POLL		(1<<15)
-#define   MI_SEMAPHORE_SAD_GTE_SDD	(1<<12)
-#define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
-#define MI_STORE_DWORD_IMM_GEN4	MI_INSTR(0x20, 2)
-#define   MI_MEM_VIRTUAL	(1 << 22) /* 945,g33,965 */
-#define   MI_USE_GGTT		(1 << 22) /* g4x+ */
-#define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
-#define   MI_STORE_DWORD_INDEX_SHIFT 2
-/* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
- * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
- *   simply ignores the register load under certain conditions.
- * - One can actually load arbitrary many arbitrary registers: Simply issue x
- *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
- */
-#define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
-#define   MI_LRI_FORCE_POSTED		(1<<12)
-#define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
-#define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
-#define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
-#define MI_FLUSH_DW		MI_INSTR(0x26, 1) /* for GEN6 */
-#define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
-#define   MI_INVALIDATE_TLB		(1<<18)
-#define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
-#define   MI_FLUSH_DW_OP_MASK		(3<<14)
-#define   MI_FLUSH_DW_NOTIFY		(1<<8)
-#define   MI_INVALIDATE_BSD		(1<<7)
-#define   MI_FLUSH_DW_USE_GTT		(1<<2)
-#define   MI_FLUSH_DW_USE_PPGTT		(0<<2)
-#define MI_LOAD_REGISTER_MEM	   MI_INSTR(0x29, 1)
-#define MI_LOAD_REGISTER_MEM_GEN8  MI_INSTR(0x29, 2)
-#define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
-#define   MI_BATCH_NON_SECURE		(1)
-/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
-#define   MI_BATCH_NON_SECURE_I965	(1<<8)
-#define   MI_BATCH_PPGTT_HSW		(1<<8)
-#define   MI_BATCH_NON_SECURE_HSW	(1<<13)
-#define MI_BATCH_BUFFER_START	MI_INSTR(0x31, 0)
-#define   MI_BATCH_GTT		    (2<<6) /* aliased with (1<<7) on gen4 */
-#define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
-#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
-
 #define MI_PREDICATE_SRC0	_MMIO(0x2400)
 #define MI_PREDICATE_SRC0_UDW	_MMIO(0x2400 + 4)
 #define MI_PREDICATE_SRC1	_MMIO(0x2408)
@@ -579,130 +449,6 @@
 #define  LOWER_SLICE_DISABLED	(0<<0)
 
 /*
- * 3D instructions used by the kernel
- */
-#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags))
-
-#define GEN9_MEDIA_POOL_STATE     ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4)
-#define   GEN9_MEDIA_POOL_ENABLE  (1 << 31)
-#define GFX_OP_RASTER_RULES    ((0x3<<29)|(0x7<<24))
-#define GFX_OP_SCISSOR         ((0x3<<29)|(0x1c<<24)|(0x10<<19))
-#define   SC_UPDATE_SCISSOR       (0x1<<1)
-#define   SC_ENABLE_MASK          (0x1<<0)
-#define   SC_ENABLE               (0x1<<0)
-#define GFX_OP_LOAD_INDIRECT   ((0x3<<29)|(0x1d<<24)|(0x7<<16))
-#define GFX_OP_SCISSOR_INFO    ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1))
-#define   SCI_YMIN_MASK      (0xffff<<16)
-#define   SCI_XMIN_MASK      (0xffff<<0)
-#define   SCI_YMAX_MASK      (0xffff<<16)
-#define   SCI_XMAX_MASK      (0xffff<<0)
-#define GFX_OP_SCISSOR_ENABLE	 ((0x3<<29)|(0x1c<<24)|(0x10<<19))
-#define GFX_OP_SCISSOR_RECT	 ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1)
-#define GFX_OP_COLOR_FACTOR      ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0)
-#define GFX_OP_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16))
-#define GFX_OP_MAP_INFO          ((0x3<<29)|(0x1d<<24)|0x4)
-#define GFX_OP_DESTBUFFER_VARS   ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0)
-#define GFX_OP_DESTBUFFER_INFO	 ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1)
-#define GFX_OP_DRAWRECT_INFO     ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
-#define GFX_OP_DRAWRECT_INFO_I965  ((0x7900<<16)|0x2)
-
-#define COLOR_BLT_CMD			(2<<29 | 0x40<<22 | (5-2))
-#define SRC_COPY_BLT_CMD		((2<<29)|(0x43<<22)|4)
-#define XY_SRC_COPY_BLT_CMD		((2<<29)|(0x53<<22)|6)
-#define XY_MONO_SRC_COPY_IMM_BLT	((2<<29)|(0x71<<22)|5)
-#define   BLT_WRITE_A			(2<<20)
-#define   BLT_WRITE_RGB			(1<<20)
-#define   BLT_WRITE_RGBA		(BLT_WRITE_RGB | BLT_WRITE_A)
-#define   BLT_DEPTH_8			(0<<24)
-#define   BLT_DEPTH_16_565		(1<<24)
-#define   BLT_DEPTH_16_1555		(2<<24)
-#define   BLT_DEPTH_32			(3<<24)
-#define   BLT_ROP_SRC_COPY		(0xcc<<16)
-#define   BLT_ROP_COLOR_COPY		(0xf0<<16)
-#define XY_SRC_COPY_BLT_SRC_TILED	(1<<15) /* 965+ only */
-#define XY_SRC_COPY_BLT_DST_TILED	(1<<11) /* 965+ only */
-#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2)
-#define   ASYNC_FLIP                (1<<22)
-#define   DISPLAY_PLANE_A           (0<<20)
-#define   DISPLAY_PLANE_B           (1<<20)
-#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
-#define   PIPE_CONTROL_FLUSH_L3				(1<<27)
-#define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24) /* gen7+ */
-#define   PIPE_CONTROL_MMIO_WRITE			(1<<23)
-#define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
-#define   PIPE_CONTROL_CS_STALL				(1<<20)
-#define   PIPE_CONTROL_TLB_INVALIDATE			(1<<18)
-#define   PIPE_CONTROL_MEDIA_STATE_CLEAR		(1<<16)
-#define   PIPE_CONTROL_QW_WRITE				(1<<14)
-#define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
-#define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
-#define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
-#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12) /* gen6+ */
-#define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11) /* MBZ on Ironlake */
-#define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10) /* GM45+ only */
-#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
-#define   PIPE_CONTROL_NOTIFY				(1<<8)
-#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7) /* gen7+ */
-#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
-#define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
-#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
-#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
-#define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
-#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
-#define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
-
-/*
- * Commands used only by the command parser
- */
-#define MI_SET_PREDICATE        MI_INSTR(0x01, 0)
-#define MI_ARB_CHECK            MI_INSTR(0x05, 0)
-#define MI_RS_CONTROL           MI_INSTR(0x06, 0)
-#define MI_URB_ATOMIC_ALLOC     MI_INSTR(0x09, 0)
-#define MI_PREDICATE            MI_INSTR(0x0C, 0)
-#define MI_RS_CONTEXT           MI_INSTR(0x0F, 0)
-#define MI_TOPOLOGY_FILTER      MI_INSTR(0x0D, 0)
-#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0)
-#define MI_URB_CLEAR            MI_INSTR(0x19, 0)
-#define MI_UPDATE_GTT           MI_INSTR(0x23, 0)
-#define MI_CLFLUSH              MI_INSTR(0x27, 0)
-#define MI_REPORT_PERF_COUNT    MI_INSTR(0x28, 0)
-#define   MI_REPORT_PERF_COUNT_GGTT (1<<0)
-#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 0)
-#define MI_RS_STORE_DATA_IMM    MI_INSTR(0x2B, 0)
-#define MI_LOAD_URB_MEM         MI_INSTR(0x2C, 0)
-#define MI_STORE_URB_MEM        MI_INSTR(0x2D, 0)
-#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0)
-
-#define PIPELINE_SELECT                ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16))
-#define GFX_OP_3DSTATE_VF_STATISTICS   ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16))
-#define MEDIA_VFE_STATE                ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16))
-#define  MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18)
-#define GPGPU_OBJECT                   ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16))
-#define GPGPU_WALKER                   ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16))
-#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \
-	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16))
-#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \
-	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16))
-#define GFX_OP_3DSTATE_SO_DECL_LIST \
-	((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16))
-
-#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \
-	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16))
-#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \
-	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16))
-#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \
-	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16))
-#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \
-	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16))
-#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
-	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))
-
-#define MFX_WAIT  ((0x3<<29)|(0x1<<27)|(0x0<<16))
-
-#define COLOR_BLT     ((0x2<<29)|(0x40<<22))
-#define SRC_COPY_BLT  ((0x2<<29)|(0x43<<22))
-
-/*
  * Registers used only by the command parser
  */
 #define BCS_SWCTRL _MMIO(0x22200)
@@ -802,6 +548,7 @@
 
 #define GEN8_OABUFFER_UDW _MMIO(0x23b4)
 #define GEN8_OABUFFER _MMIO(0x2b14)
+#define  GEN8_OABUFFER_MEM_SELECT_GGTT      (1 << 0)  /* 0: PPGTT, 1: GGTT */
 
 #define GEN7_OASTATUS1 _MMIO(0x2364)
 #define  GEN7_OASTATUS1_TAIL_MASK	    0xffffffc0
@@ -810,7 +557,8 @@
 #define  GEN7_OASTATUS1_REPORT_LOST	    (1<<0)
 
 #define GEN7_OASTATUS2 _MMIO(0x2368)
-#define GEN7_OASTATUS2_HEAD_MASK    0xffffffc0
+#define  GEN7_OASTATUS2_HEAD_MASK           0xffffffc0
+#define  GEN7_OASTATUS2_MEM_SELECT_GGTT     (1 << 0) /* 0: PPGTT, 1: GGTT */
 
 #define GEN8_OASTATUS _MMIO(0x2b08)
 #define  GEN8_OASTATUS_OVERRUN_STATUS	    (1<<3)
@@ -832,8 +580,6 @@
 #define OABUFFER_SIZE_8M    (6<<3)
 #define OABUFFER_SIZE_16M   (7<<3)
 
-#define OA_MEM_SELECT_GGTT  (1<<0)
-
 /*
  * Flexible, Aggregate EU Counter Registers.
  * Note: these aren't contiguous
@@ -1127,6 +873,12 @@
 #define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
 #define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	0
 #define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	1
+#define  GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
+#define  GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
+#define  GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	0
+#define  GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	1
+#define  GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ	2
+#define  GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ	3
 #define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
 #define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
 
@@ -1948,79 +1700,100 @@
 #define _CNL_PORT_PCS_DW1_LN0_C		0x162C04
 #define _CNL_PORT_PCS_DW1_LN0_D		0x162E04
 #define _CNL_PORT_PCS_DW1_LN0_F		0x162804
-#define CNL_PORT_PCS_DW1_GRP(port)	_MMIO_PORT6(port, \
+#define CNL_PORT_PCS_DW1_GRP(port)	_MMIO(_PICK(port, \
 						    _CNL_PORT_PCS_DW1_GRP_AE, \
 						    _CNL_PORT_PCS_DW1_GRP_B, \
 						    _CNL_PORT_PCS_DW1_GRP_C, \
 						    _CNL_PORT_PCS_DW1_GRP_D, \
 						    _CNL_PORT_PCS_DW1_GRP_AE, \
-						    _CNL_PORT_PCS_DW1_GRP_F)
-#define CNL_PORT_PCS_DW1_LN0(port)	_MMIO_PORT6(port, \
+						    _CNL_PORT_PCS_DW1_GRP_F))
+
+#define CNL_PORT_PCS_DW1_LN0(port)	_MMIO(_PICK(port, \
 						    _CNL_PORT_PCS_DW1_LN0_AE, \
 						    _CNL_PORT_PCS_DW1_LN0_B, \
 						    _CNL_PORT_PCS_DW1_LN0_C, \
 						    _CNL_PORT_PCS_DW1_LN0_D, \
 						    _CNL_PORT_PCS_DW1_LN0_AE, \
-						    _CNL_PORT_PCS_DW1_LN0_F)
+						    _CNL_PORT_PCS_DW1_LN0_F))
+#define _ICL_PORT_PCS_DW1_GRP_A		0x162604
+#define _ICL_PORT_PCS_DW1_GRP_B		0x6C604
+#define _ICL_PORT_PCS_DW1_LN0_A		0x162804
+#define _ICL_PORT_PCS_DW1_LN0_B		0x6C804
+#define ICL_PORT_PCS_DW1_GRP(port)	_MMIO_PORT(port,\
+						   _ICL_PORT_PCS_DW1_GRP_A, \
+						   _ICL_PORT_PCS_DW1_GRP_B)
+#define ICL_PORT_PCS_DW1_LN0(port)	_MMIO_PORT(port, \
+						   _ICL_PORT_PCS_DW1_LN0_A, \
+						   _ICL_PORT_PCS_DW1_LN0_B)
 #define   COMMON_KEEPER_EN		(1 << 26)
 
-#define _CNL_PORT_TX_DW2_GRP_AE		0x162348
-#define _CNL_PORT_TX_DW2_GRP_B		0x1623C8
-#define _CNL_PORT_TX_DW2_GRP_C		0x162B48
-#define _CNL_PORT_TX_DW2_GRP_D		0x162BC8
-#define _CNL_PORT_TX_DW2_GRP_F		0x162A48
-#define _CNL_PORT_TX_DW2_LN0_AE		0x162448
-#define _CNL_PORT_TX_DW2_LN0_B		0x162648
-#define _CNL_PORT_TX_DW2_LN0_C		0x162C48
-#define _CNL_PORT_TX_DW2_LN0_D		0x162E48
-#define _CNL_PORT_TX_DW2_LN0_F		0x162848
-#define CNL_PORT_TX_DW2_GRP(port)	_MMIO_PORT6(port, \
-						    _CNL_PORT_TX_DW2_GRP_AE, \
-						    _CNL_PORT_TX_DW2_GRP_B, \
-						    _CNL_PORT_TX_DW2_GRP_C, \
-						    _CNL_PORT_TX_DW2_GRP_D, \
-						    _CNL_PORT_TX_DW2_GRP_AE, \
-						    _CNL_PORT_TX_DW2_GRP_F)
-#define CNL_PORT_TX_DW2_LN0(port)	_MMIO_PORT6(port, \
-						    _CNL_PORT_TX_DW2_LN0_AE, \
-						    _CNL_PORT_TX_DW2_LN0_B, \
-						    _CNL_PORT_TX_DW2_LN0_C, \
-						    _CNL_PORT_TX_DW2_LN0_D, \
-						    _CNL_PORT_TX_DW2_LN0_AE, \
-						    _CNL_PORT_TX_DW2_LN0_F)
-#define   SWING_SEL_UPPER(x)		((x >> 3) << 15)
+/* CNL Port TX registers */
+#define _CNL_PORT_TX_AE_GRP_OFFSET		0x162340
+#define _CNL_PORT_TX_B_GRP_OFFSET		0x1623C0
+#define _CNL_PORT_TX_C_GRP_OFFSET		0x162B40
+#define _CNL_PORT_TX_D_GRP_OFFSET		0x162BC0
+#define _CNL_PORT_TX_F_GRP_OFFSET		0x162A40
+#define _CNL_PORT_TX_AE_LN0_OFFSET		0x162440
+#define _CNL_PORT_TX_B_LN0_OFFSET		0x162640
+#define _CNL_PORT_TX_C_LN0_OFFSET		0x162C40
+#define _CNL_PORT_TX_D_LN0_OFFSET		0x162E40
+#define _CNL_PORT_TX_F_LN0_OFFSET		0x162840
+#define _CNL_PORT_TX_DW_GRP(port, dw)	(_PICK((port), \
+					       _CNL_PORT_TX_AE_GRP_OFFSET, \
+					       _CNL_PORT_TX_B_GRP_OFFSET, \
+					       _CNL_PORT_TX_B_GRP_OFFSET, \
+					       _CNL_PORT_TX_D_GRP_OFFSET, \
+					       _CNL_PORT_TX_AE_GRP_OFFSET, \
+					       _CNL_PORT_TX_F_GRP_OFFSET) + \
+					       4*(dw))
+#define _CNL_PORT_TX_DW_LN0(port, dw)	(_PICK((port), \
+					       _CNL_PORT_TX_AE_LN0_OFFSET, \
+					       _CNL_PORT_TX_B_LN0_OFFSET, \
+					       _CNL_PORT_TX_B_LN0_OFFSET, \
+					       _CNL_PORT_TX_D_LN0_OFFSET, \
+					       _CNL_PORT_TX_AE_LN0_OFFSET, \
+					       _CNL_PORT_TX_F_LN0_OFFSET) + \
+					       4*(dw))
+
+#define CNL_PORT_TX_DW2_GRP(port)	_MMIO(_CNL_PORT_TX_DW_GRP((port), 2))
+#define CNL_PORT_TX_DW2_LN0(port)	_MMIO(_CNL_PORT_TX_DW_LN0((port), 2))
+#define _ICL_PORT_TX_DW2_GRP_A		0x162688
+#define _ICL_PORT_TX_DW2_GRP_B		0x6C688
+#define _ICL_PORT_TX_DW2_LN0_A		0x162888
+#define _ICL_PORT_TX_DW2_LN0_B		0x6C888
+#define ICL_PORT_TX_DW2_GRP(port)	_MMIO_PORT(port, \
+						   _ICL_PORT_TX_DW2_GRP_A, \
+						   _ICL_PORT_TX_DW2_GRP_B)
+#define ICL_PORT_TX_DW2_LN0(port)	_MMIO_PORT(port, \
+						   _ICL_PORT_TX_DW2_LN0_A, \
+						   _ICL_PORT_TX_DW2_LN0_B)
+#define   SWING_SEL_UPPER(x)		(((x) >> 3) << 15)
 #define   SWING_SEL_UPPER_MASK		(1 << 15)
-#define   SWING_SEL_LOWER(x)		((x & 0x7) << 11)
+#define   SWING_SEL_LOWER(x)		(((x) & 0x7) << 11)
 #define   SWING_SEL_LOWER_MASK		(0x7 << 11)
 #define   RCOMP_SCALAR(x)		((x) << 0)
 #define   RCOMP_SCALAR_MASK		(0xFF << 0)
 
-#define _CNL_PORT_TX_DW4_GRP_AE		0x162350
-#define _CNL_PORT_TX_DW4_GRP_B		0x1623D0
-#define _CNL_PORT_TX_DW4_GRP_C		0x162B50
-#define _CNL_PORT_TX_DW4_GRP_D		0x162BD0
-#define _CNL_PORT_TX_DW4_GRP_F		0x162A50
 #define _CNL_PORT_TX_DW4_LN0_AE		0x162450
 #define _CNL_PORT_TX_DW4_LN1_AE		0x1624D0
-#define _CNL_PORT_TX_DW4_LN0_B		0x162650
-#define _CNL_PORT_TX_DW4_LN0_C		0x162C50
-#define _CNL_PORT_TX_DW4_LN0_D		0x162E50
-#define _CNL_PORT_TX_DW4_LN0_F		0x162850
-#define CNL_PORT_TX_DW4_GRP(port)       _MMIO_PORT6(port, \
-						    _CNL_PORT_TX_DW4_GRP_AE, \
-						    _CNL_PORT_TX_DW4_GRP_B, \
-						    _CNL_PORT_TX_DW4_GRP_C, \
-						    _CNL_PORT_TX_DW4_GRP_D, \
-						    _CNL_PORT_TX_DW4_GRP_AE, \
-						    _CNL_PORT_TX_DW4_GRP_F)
-#define CNL_PORT_TX_DW4_LN(port, ln)       _MMIO_PORT6_LN(port, ln,	\
-						    _CNL_PORT_TX_DW4_LN0_AE, \
-						    _CNL_PORT_TX_DW4_LN1_AE, \
-						    _CNL_PORT_TX_DW4_LN0_B, \
-						    _CNL_PORT_TX_DW4_LN0_C, \
-						    _CNL_PORT_TX_DW4_LN0_D, \
-						    _CNL_PORT_TX_DW4_LN0_AE, \
-						    _CNL_PORT_TX_DW4_LN0_F)
+#define CNL_PORT_TX_DW4_GRP(port)	_MMIO(_CNL_PORT_TX_DW_GRP((port), 4))
+#define CNL_PORT_TX_DW4_LN0(port)	_MMIO(_CNL_PORT_TX_DW_LN0((port), 4))
+#define CNL_PORT_TX_DW4_LN(port, ln)   _MMIO(_CNL_PORT_TX_DW_LN0((port), 4) + \
+					     (ln * (_CNL_PORT_TX_DW4_LN1_AE - \
+						    _CNL_PORT_TX_DW4_LN0_AE)))
+#define _ICL_PORT_TX_DW4_GRP_A		0x162690
+#define _ICL_PORT_TX_DW4_GRP_B		0x6C690
+#define _ICL_PORT_TX_DW4_LN0_A		0x162890
+#define _ICL_PORT_TX_DW4_LN1_A		0x162990
+#define _ICL_PORT_TX_DW4_LN0_B		0x6C890
+#define ICL_PORT_TX_DW4_GRP(port)	_MMIO_PORT(port, \
+						   _ICL_PORT_TX_DW4_GRP_A, \
+						   _ICL_PORT_TX_DW4_GRP_B)
+#define ICL_PORT_TX_DW4_LN(port, ln)	_MMIO(_PORT(port, \
+						   _ICL_PORT_TX_DW4_LN0_A, \
+						   _ICL_PORT_TX_DW4_LN0_B) + \
+					      (ln * (_ICL_PORT_TX_DW4_LN1_A - \
+						     _ICL_PORT_TX_DW4_LN0_A)))
 #define   LOADGEN_SELECT		(1 << 31)
 #define   POST_CURSOR_1(x)		((x) << 12)
 #define   POST_CURSOR_1_MASK		(0x3F << 12)
@@ -2029,64 +1802,147 @@
 #define   CURSOR_COEFF(x)		((x) << 0)
 #define   CURSOR_COEFF_MASK		(0x3F << 0)
 
-#define _CNL_PORT_TX_DW5_GRP_AE		0x162354
-#define _CNL_PORT_TX_DW5_GRP_B		0x1623D4
-#define _CNL_PORT_TX_DW5_GRP_C		0x162B54
-#define _CNL_PORT_TX_DW5_GRP_D		0x162BD4
-#define _CNL_PORT_TX_DW5_GRP_F		0x162A54
-#define _CNL_PORT_TX_DW5_LN0_AE		0x162454
-#define _CNL_PORT_TX_DW5_LN0_B		0x162654
-#define _CNL_PORT_TX_DW5_LN0_C		0x162C54
-#define _CNL_PORT_TX_DW5_LN0_D		0x162E54
-#define _CNL_PORT_TX_DW5_LN0_F		0x162854
-#define CNL_PORT_TX_DW5_GRP(port)	_MMIO_PORT6(port, \
-						    _CNL_PORT_TX_DW5_GRP_AE, \
-						    _CNL_PORT_TX_DW5_GRP_B, \
-						    _CNL_PORT_TX_DW5_GRP_C, \
-						    _CNL_PORT_TX_DW5_GRP_D, \
-						    _CNL_PORT_TX_DW5_GRP_AE, \
-						    _CNL_PORT_TX_DW5_GRP_F)
-#define CNL_PORT_TX_DW5_LN0(port)	_MMIO_PORT6(port, \
-						    _CNL_PORT_TX_DW5_LN0_AE, \
-						    _CNL_PORT_TX_DW5_LN0_B, \
-						    _CNL_PORT_TX_DW5_LN0_C, \
-						    _CNL_PORT_TX_DW5_LN0_D, \
-						    _CNL_PORT_TX_DW5_LN0_AE, \
-						    _CNL_PORT_TX_DW5_LN0_F)
+#define CNL_PORT_TX_DW5_GRP(port)	_MMIO(_CNL_PORT_TX_DW_GRP((port), 5))
+#define CNL_PORT_TX_DW5_LN0(port)	_MMIO(_CNL_PORT_TX_DW_LN0((port), 5))
+#define _ICL_PORT_TX_DW5_GRP_A		0x162694
+#define _ICL_PORT_TX_DW5_GRP_B		0x6C694
+#define _ICL_PORT_TX_DW5_LN0_A		0x162894
+#define _ICL_PORT_TX_DW5_LN0_B		0x6C894
+#define ICL_PORT_TX_DW5_GRP(port)	_MMIO_PORT(port, \
+						   _ICL_PORT_TX_DW5_GRP_A, \
+						   _ICL_PORT_TX_DW5_GRP_B)
+#define ICL_PORT_TX_DW5_LN0(port)	_MMIO_PORT(port, \
+						   _ICL_PORT_TX_DW5_LN0_A, \
+						   _ICL_PORT_TX_DW5_LN0_B)
 #define   TX_TRAINING_EN		(1 << 31)
+#define   TAP2_DISABLE			(1 << 30)
 #define   TAP3_DISABLE			(1 << 29)
 #define   SCALING_MODE_SEL(x)		((x) << 18)
 #define   SCALING_MODE_SEL_MASK		(0x7 << 18)
 #define   RTERM_SELECT(x)		((x) << 3)
 #define   RTERM_SELECT_MASK		(0x7 << 3)
 
-#define _CNL_PORT_TX_DW7_GRP_AE		0x16235C
-#define _CNL_PORT_TX_DW7_GRP_B		0x1623DC
-#define _CNL_PORT_TX_DW7_GRP_C		0x162B5C
-#define _CNL_PORT_TX_DW7_GRP_D		0x162BDC
-#define _CNL_PORT_TX_DW7_GRP_F		0x162A5C
-#define _CNL_PORT_TX_DW7_LN0_AE		0x16245C
-#define _CNL_PORT_TX_DW7_LN0_B		0x16265C
-#define _CNL_PORT_TX_DW7_LN0_C		0x162C5C
-#define _CNL_PORT_TX_DW7_LN0_D		0x162E5C
-#define _CNL_PORT_TX_DW7_LN0_F		0x16285C
-#define CNL_PORT_TX_DW7_GRP(port)	_MMIO_PORT6(port, \
-						    _CNL_PORT_TX_DW7_GRP_AE, \
-						    _CNL_PORT_TX_DW7_GRP_B, \
-						    _CNL_PORT_TX_DW7_GRP_C, \
-						    _CNL_PORT_TX_DW7_GRP_D, \
-						    _CNL_PORT_TX_DW7_GRP_AE, \
-						    _CNL_PORT_TX_DW7_GRP_F)
-#define CNL_PORT_TX_DW7_LN0(port)	_MMIO_PORT6(port, \
-						    _CNL_PORT_TX_DW7_LN0_AE, \
-						    _CNL_PORT_TX_DW7_LN0_B, \
-						    _CNL_PORT_TX_DW7_LN0_C, \
-						    _CNL_PORT_TX_DW7_LN0_D, \
-						    _CNL_PORT_TX_DW7_LN0_AE, \
-						    _CNL_PORT_TX_DW7_LN0_F)
+#define CNL_PORT_TX_DW7_GRP(port)	_MMIO(_CNL_PORT_TX_DW_GRP((port), 7))
+#define CNL_PORT_TX_DW7_LN0(port)	_MMIO(_CNL_PORT_TX_DW_LN0((port), 7))
 #define   N_SCALAR(x)			((x) << 24)
 #define   N_SCALAR_MASK			(0x7F << 24)
 
+#define _ICL_MG_PHY_PORT_LN(port, ln, ln0p1, ln0p2, ln1p1) \
+	_MMIO(_PORT((port) - PORT_C, ln0p1, ln0p2) + (ln) * ((ln1p1) - (ln0p1)))
+
+#define _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT1		0x16812C
+#define _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT1		0x16852C
+#define _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT2		0x16912C
+#define _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT2		0x16952C
+#define _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT3		0x16A12C
+#define _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT3		0x16A52C
+#define _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT4		0x16B12C
+#define _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT4		0x16B52C
+#define ICL_PORT_MG_TX1_LINK_PARAMS(port, ln) \
+	_ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT1, \
+				      _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT2, \
+				      _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT1)
+
+#define _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT1		0x1680AC
+#define _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT1		0x1684AC
+#define _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT2		0x1690AC
+#define _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT2		0x1694AC
+#define _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT3		0x16A0AC
+#define _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT3		0x16A4AC
+#define _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT4		0x16B0AC
+#define _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT4		0x16B4AC
+#define ICL_PORT_MG_TX2_LINK_PARAMS(port, ln) \
+	_ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT1, \
+				      _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT2, \
+				      _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT1)
+#define CRI_USE_FS32			(1 << 5)
+
+#define _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT1		0x16814C
+#define _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT1		0x16854C
+#define _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT2		0x16914C
+#define _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT2		0x16954C
+#define _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT3		0x16A14C
+#define _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT3		0x16A54C
+#define _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT4		0x16B14C
+#define _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT4		0x16B54C
+#define ICL_PORT_MG_TX1_PISO_READLOAD(port, ln) \
+	_ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT1, \
+				      _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT2, \
+				      _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT1)
+
+#define _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT1		0x1680CC
+#define _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT1		0x1684CC
+#define _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT2		0x1690CC
+#define _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT2		0x1694CC
+#define _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT3		0x16A0CC
+#define _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT3		0x16A4CC
+#define _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT4		0x16B0CC
+#define _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT4		0x16B4CC
+#define ICL_PORT_MG_TX2_PISO_READLOAD(port, ln) \
+	_ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT1, \
+				      _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT2, \
+				      _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT1)
+#define CRI_CALCINIT					(1 << 1)
+
+#define _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT1		0x168148
+#define _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT1		0x168548
+#define _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT2		0x169148
+#define _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT2		0x169548
+#define _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT3		0x16A148
+#define _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT3		0x16A548
+#define _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT4		0x16B148
+#define _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT4		0x16B548
+#define ICL_PORT_MG_TX1_SWINGCTRL(port, ln) \
+	_ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT1, \
+				      _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT2, \
+				      _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT1)
+
+#define _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT1		0x1680C8
+#define _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT1		0x1684C8
+#define _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT2		0x1690C8
+#define _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT2		0x1694C8
+#define _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT3		0x16A0C8
+#define _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT3		0x16A4C8
+#define _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT4		0x16B0C8
+#define _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT4		0x16B4C8
+#define ICL_PORT_MG_TX2_SWINGCTRL(port, ln) \
+	_ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT1, \
+				      _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT2, \
+				      _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT1)
+#define CRI_TXDEEMPH_OVERRIDE_17_12(x)			((x) << 0)
+#define CRI_TXDEEMPH_OVERRIDE_17_12_MASK		(0x3F << 0)
+
+#define _ICL_MG_TX_DRVCTRL_TX1LN0_PORT1			0x168144
+#define _ICL_MG_TX_DRVCTRL_TX1LN1_PORT1			0x168544
+#define _ICL_MG_TX_DRVCTRL_TX1LN0_PORT2			0x169144
+#define _ICL_MG_TX_DRVCTRL_TX1LN1_PORT2			0x169544
+#define _ICL_MG_TX_DRVCTRL_TX1LN0_PORT3			0x16A144
+#define _ICL_MG_TX_DRVCTRL_TX1LN1_PORT3			0x16A544
+#define _ICL_MG_TX_DRVCTRL_TX1LN0_PORT4			0x16B144
+#define _ICL_MG_TX_DRVCTRL_TX1LN1_PORT4			0x16B544
+#define ICL_PORT_MG_TX1_DRVCTRL(port, ln) \
+	_ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_DRVCTRL_TX1LN0_PORT1, \
+				      _ICL_MG_TX_DRVCTRL_TX1LN0_PORT2, \
+				      _ICL_MG_TX_DRVCTRL_TX1LN1_PORT1)
+
+#define _ICL_MG_TX_DRVCTRL_TX2LN0_PORT1			0x1680C4
+#define _ICL_MG_TX_DRVCTRL_TX2LN1_PORT1			0x1684C4
+#define _ICL_MG_TX_DRVCTRL_TX2LN0_PORT2			0x1690C4
+#define _ICL_MG_TX_DRVCTRL_TX2LN1_PORT2			0x1694C4
+#define _ICL_MG_TX_DRVCTRL_TX2LN0_PORT3			0x16A0C4
+#define _ICL_MG_TX_DRVCTRL_TX2LN1_PORT3			0x16A4C4
+#define _ICL_MG_TX_DRVCTRL_TX2LN0_PORT4			0x16B0C4
+#define _ICL_MG_TX_DRVCTRL_TX2LN1_PORT4			0x16B4C4
+#define ICL_PORT_MG_TX2_DRVCTRL(port, ln) \
+	_ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_DRVCTRL_TX2LN0_PORT1, \
+				      _ICL_MG_TX_DRVCTRL_TX2LN0_PORT2, \
+				      _ICL_MG_TX_DRVCTRL_TX2LN1_PORT1)
+#define CRI_TXDEEMPH_OVERRIDE_11_6(x)			((x) << 24)
+#define CRI_TXDEEMPH_OVERRIDE_11_6_MASK			(0x3F << 24)
+#define CRI_TXDEEMPH_OVERRIDE_EN			(1 << 22)
+#define CRI_TXDEEMPH_OVERRIDE_5_0(x)			((x) << 16)
+#define CRI_TXDEEMPH_OVERRIDE_5_0_MASK			(0x3F << 16)
+
 /* The spec defines this only for BXT PHY0, but lets assume that this
  * would exist for PHY1 too if it had a second channel.
  */
@@ -2473,6 +2329,10 @@
 #define   GEN8_MCR_SLICE_MASK		GEN8_MCR_SLICE(3)
 #define   GEN8_MCR_SUBSLICE(subslice)	(((subslice) & 3) << 24)
 #define   GEN8_MCR_SUBSLICE_MASK	GEN8_MCR_SUBSLICE(3)
+#define   GEN11_MCR_SLICE(slice)	(((slice) & 0xf) << 27)
+#define   GEN11_MCR_SLICE_MASK		GEN11_MCR_SLICE(0xf)
+#define   GEN11_MCR_SUBSLICE(subslice)	(((subslice) & 0x7) << 24)
+#define   GEN11_MCR_SUBSLICE_MASK	GEN11_MCR_SUBSLICE(0x7)
 #define RING_IPEIR(base)	_MMIO((base)+0x64)
 #define RING_IPEHR(base)	_MMIO((base)+0x68)
 /*
@@ -2867,6 +2727,19 @@
 #define GEN10_EU_DISABLE3		_MMIO(0x9140)
 #define   GEN10_EU_DIS_SS_MASK		0xff
 
+#define GEN11_GT_VEBOX_VDBOX_DISABLE	_MMIO(0x9140)
+#define   GEN11_GT_VDBOX_DISABLE_MASK	0xff
+#define   GEN11_GT_VEBOX_DISABLE_SHIFT	16
+#define   GEN11_GT_VEBOX_DISABLE_MASK	(0xff << GEN11_GT_VEBOX_DISABLE_SHIFT)
+
+#define GEN11_EU_DISABLE _MMIO(0x9134)
+#define GEN11_EU_DIS_MASK 0xFF
+
+#define GEN11_GT_SLICE_ENABLE _MMIO(0x9138)
+#define GEN11_GT_S_ENA_MASK 0xFF
+
+#define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
+
 #define GEN6_BSD_SLEEP_PSMI_CONTROL	_MMIO(0x12050)
 #define   GEN6_BSD_SLEEP_MSG_DISABLE	(1 << 0)
 #define   GEN6_BSD_SLEEP_FLUSH_DISABLE	(1 << 2)
@@ -3951,6 +3824,9 @@
 #define _CLKGATE_DIS_PSL_A		0x46520
 #define _CLKGATE_DIS_PSL_B		0x46524
 #define _CLKGATE_DIS_PSL_C		0x46528
+#define   DUPS1_GATING_DIS		(1 << 15)
+#define   DUPS2_GATING_DIS		(1 << 19)
+#define   DUPS3_GATING_DIS		(1 << 23)
 #define   DPF_GATING_DIS		(1 << 10)
 #define   DPF_RAM_GATING_DIS		(1 << 9)
 #define   DPFR_GATING_DIS		(1 << 8)
@@ -3964,6 +3840,7 @@
 #define SLICE_UNIT_LEVEL_CLKGATE	_MMIO(0x94d4)
 #define  SARBUNIT_CLKGATE_DIS		(1 << 5)
 #define  RCCUNIT_CLKGATE_DIS		(1 << 7)
+#define  MSCUNIT_CLKGATE_DIS		(1 << 10)
 
 #define SUBSLICE_UNIT_LEVEL_CLKGATE	_MMIO(0x9524)
 #define  GWUNIT_CLKGATE_DIS		(1 << 16)
@@ -3971,6 +3848,9 @@
 #define UNSLICE_UNIT_LEVEL_CLKGATE	_MMIO(0x9434)
 #define  VFUNIT_CLKGATE_DIS		(1 << 20)
 
+#define INF_UNIT_LEVEL_CLKGATE		_MMIO(0x9560)
+#define   CGPSF_CLKGATE_DIS		(1 << 3)
+
 /*
  * Display engine regs
  */
@@ -4150,7 +4030,20 @@
 #define   EDP_PSR_TP1_TIME_0us			(3<<4)
 #define   EDP_PSR_IDLE_FRAME_SHIFT		0
 
+/* Bspec claims those aren't shifted but stay at 0x64800 */
+#define EDP_PSR_IMR				_MMIO(0x64834)
+#define EDP_PSR_IIR				_MMIO(0x64838)
+#define   EDP_PSR_ERROR(trans)			(1 << (((trans) * 8 + 10) & 31))
+#define   EDP_PSR_POST_EXIT(trans)		(1 << (((trans) * 8 + 9) & 31))
+#define   EDP_PSR_PRE_ENTRY(trans)		(1 << (((trans) * 8 + 8) & 31))
+
 #define EDP_PSR_AUX_CTL				_MMIO(dev_priv->psr_mmio_base + 0x10)
+#define   EDP_PSR_AUX_CTL_TIME_OUT_MASK		(3 << 26)
+#define   EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK	(0x1f << 20)
+#define   EDP_PSR_AUX_CTL_PRECHARGE_2US_MASK	(0xf << 16)
+#define   EDP_PSR_AUX_CTL_ERROR_INTERRUPT	(1 << 11)
+#define   EDP_PSR_AUX_CTL_BIT_CLOCK_2X_MASK	(0x7ff)
+
 #define EDP_PSR_AUX_DATA(i)			_MMIO(dev_priv->psr_mmio_base + 0x14 + (i) * 4) /* 5 registers */
 
 #define EDP_PSR_STATUS				_MMIO(dev_priv->psr_mmio_base + 0x40)
@@ -4180,17 +4073,19 @@
 #define EDP_PSR_PERF_CNT		_MMIO(dev_priv->psr_mmio_base + 0x44)
 #define   EDP_PSR_PERF_CNT_MASK		0xffffff
 
-#define EDP_PSR_DEBUG				_MMIO(dev_priv->psr_mmio_base + 0x60)
+#define EDP_PSR_DEBUG				_MMIO(dev_priv->psr_mmio_base + 0x60) /* PSR_MASK on SKL+ */
 #define   EDP_PSR_DEBUG_MASK_MAX_SLEEP         (1<<28)
 #define   EDP_PSR_DEBUG_MASK_LPSP              (1<<27)
 #define   EDP_PSR_DEBUG_MASK_MEMUP             (1<<26)
 #define   EDP_PSR_DEBUG_MASK_HPD               (1<<25)
 #define   EDP_PSR_DEBUG_MASK_DISP_REG_WRITE    (1<<16)
-#define   EDP_PSR_DEBUG_EXIT_ON_PIXEL_UNDERRUN (1<<15)
+#define   EDP_PSR_DEBUG_EXIT_ON_PIXEL_UNDERRUN (1<<15) /* SKL+ */
 
 #define EDP_PSR2_CTL			_MMIO(0x6f900)
 #define   EDP_PSR2_ENABLE		(1<<31)
 #define   EDP_SU_TRACK_ENABLE		(1<<30)
+#define   EDP_Y_COORDINATE_VALID	(1<<26) /* GLK and CNL+ */
+#define   EDP_Y_COORDINATE_ENABLE	(1<<25) /* GLK and CNL+ */
 #define   EDP_MAX_SU_DISABLE_TIME(t)	((t)<<20)
 #define   EDP_MAX_SU_DISABLE_TIME_MASK	(0x1f<<20)
 #define   EDP_PSR2_TP2_TIME_500		(0<<8)
@@ -4200,8 +4095,32 @@
 #define   EDP_PSR2_TP2_TIME_MASK	(3<<8)
 #define   EDP_PSR2_FRAME_BEFORE_SU_SHIFT 4
 #define   EDP_PSR2_FRAME_BEFORE_SU_MASK	(0xf<<4)
-#define   EDP_PSR2_IDLE_MASK		0xf
 #define   EDP_PSR2_FRAME_BEFORE_SU(a)	((a)<<4)
+#define   EDP_PSR2_IDLE_FRAME_MASK	0xf
+#define   EDP_PSR2_IDLE_FRAME_SHIFT	0
+
+#define _PSR_EVENT_TRANS_A			0x60848
+#define _PSR_EVENT_TRANS_B			0x61848
+#define _PSR_EVENT_TRANS_C			0x62848
+#define _PSR_EVENT_TRANS_D			0x63848
+#define _PSR_EVENT_TRANS_EDP			0x6F848
+#define PSR_EVENT(trans)			_MMIO_TRANS2(trans, _PSR_EVENT_TRANS_A)
+#define  PSR_EVENT_PSR2_WD_TIMER_EXPIRE		(1 << 17)
+#define  PSR_EVENT_PSR2_DISABLED		(1 << 16)
+#define  PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN	(1 << 15)
+#define  PSR_EVENT_SU_CRC_FIFO_UNDERRUN		(1 << 14)
+#define  PSR_EVENT_GRAPHICS_RESET		(1 << 12)
+#define  PSR_EVENT_PCH_INTERRUPT		(1 << 11)
+#define  PSR_EVENT_MEMORY_UP			(1 << 10)
+#define  PSR_EVENT_FRONT_BUFFER_MODIFY		(1 << 9)
+#define  PSR_EVENT_WD_TIMER_EXPIRE		(1 << 8)
+#define  PSR_EVENT_PIPE_REGISTERS_UPDATE	(1 << 6)
+#define  PSR_EVENT_REGISTER_UPDATE		(1 << 5)
+#define  PSR_EVENT_HDCP_ENABLE			(1 << 4)
+#define  PSR_EVENT_KVMR_SESSION_ENABLE		(1 << 3)
+#define  PSR_EVENT_VBI_ENABLE			(1 << 2)
+#define  PSR_EVENT_LPSP_MODE_EXIT		(1 << 1)
+#define  PSR_EVENT_PSR_DISABLE			(1 << 0)
 
 #define EDP_PSR2_STATUS			_MMIO(0x6f940)
 #define EDP_PSR2_STATUS_STATE_MASK     (0xf<<28)
@@ -5265,8 +5184,6 @@
 #define   DP_LINK_TRAIN_OFF		(3 << 28)
 #define   DP_LINK_TRAIN_MASK		(3 << 28)
 #define   DP_LINK_TRAIN_SHIFT		28
-#define   DP_LINK_TRAIN_PAT_3_CHV	(1 << 14)
-#define   DP_LINK_TRAIN_MASK_CHV	((3 << 28)|(1<<14))
 
 /* CPT Link training mode */
 #define   DP_LINK_TRAIN_PAT_1_CPT	(0 << 8)
@@ -6009,6 +5926,7 @@
 #define CURSIZE			_MMIO(0x700a0) /* 845/865 */
 #define _CUR_FBC_CTL_A		0x700a0 /* ivb+ */
 #define   CUR_FBC_CTL_EN	(1 << 31)
+#define _CURASURFLIVE		0x700ac /* g4x+ */
 #define _CURBCNTR		0x700c0
 #define _CURBBASE		0x700c4
 #define _CURBPOS		0x700c8
@@ -6025,6 +5943,7 @@
 #define CURBASE(pipe) _CURSOR2(pipe, _CURABASE)
 #define CURPOS(pipe) _CURSOR2(pipe, _CURAPOS)
 #define CUR_FBC_CTL(pipe) _CURSOR2(pipe, _CUR_FBC_CTL_A)
+#define CURSURFLIVE(pipe) _CURSOR2(pipe, _CURASURFLIVE)
 
 #define CURSOR_A_OFFSET 0x70080
 #define CURSOR_B_OFFSET 0x700c0
@@ -6492,9 +6411,9 @@
 #define _PLANE_COLOR_CTL_1_A			0x701CC /* GLK+ */
 #define _PLANE_COLOR_CTL_2_A			0x702CC /* GLK+ */
 #define _PLANE_COLOR_CTL_3_A			0x703CC /* GLK+ */
-#define   PLANE_COLOR_PIPE_GAMMA_ENABLE		(1 << 30)
+#define   PLANE_COLOR_PIPE_GAMMA_ENABLE		(1 << 30) /* Pre-ICL */
 #define   PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE	(1 << 28)
-#define   PLANE_COLOR_PIPE_CSC_ENABLE		(1 << 23)
+#define   PLANE_COLOR_PIPE_CSC_ENABLE		(1 << 23) /* Pre-ICL */
 #define   PLANE_COLOR_CSC_MODE_BYPASS			(0 << 17)
 #define   PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709		(1 << 17)
 #define   PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709		(2 << 17)
@@ -6589,6 +6508,9 @@
 
 #define _PLANE_BUF_CFG_1_B			0x7127c
 #define _PLANE_BUF_CFG_2_B			0x7137c
+#define  SKL_DDB_ENTRY_MASK			0x3FF
+#define  ICL_DDB_ENTRY_MASK			0x7FF
+#define  DDB_ENTRY_END_SHIFT			16
 #define _PLANE_BUF_CFG_1(pipe)	\
 	_PIPE(pipe, _PLANE_BUF_CFG_1_A, _PLANE_BUF_CFG_1_B)
 #define _PLANE_BUF_CFG_2(pipe)	\
@@ -6779,6 +6701,8 @@
 #define PS_SCALER_MODE_MASK (3 << 28)
 #define PS_SCALER_MODE_DYN  (0 << 28)
 #define PS_SCALER_MODE_HQ  (1 << 28)
+#define SKL_PS_SCALER_MODE_NV12 (2 << 28)
+#define PS_SCALER_MODE_PLANAR (1 << 29)
 #define PS_PLANE_SEL_MASK  (7 << 25)
 #define PS_PLANE_SEL(plane) (((plane) + 1) << 25)
 #define PS_FILTER_MASK         (3 << 23)
@@ -6950,6 +6874,7 @@
 #define DE_PCH_EVENT_IVB		(1<<28)
 #define DE_DP_A_HOTPLUG_IVB		(1<<27)
 #define DE_AUX_CHANNEL_A_IVB		(1<<26)
+#define DE_EDP_PSR_INT_HSW		(1<<19)
 #define DE_SPRITEC_FLIP_DONE_IVB	(1<<14)
 #define DE_PLANEC_FLIP_DONE_IVB		(1<<13)
 #define DE_PIPEC_VBLANK_IVB		(1<<10)
@@ -7074,6 +6999,7 @@
 #define GEN8_DE_MISC_IIR _MMIO(0x44468)
 #define GEN8_DE_MISC_IER _MMIO(0x4446c)
 #define  GEN8_DE_MISC_GSE		(1 << 27)
+#define  GEN8_DE_EDP_PSR		(1 << 19)
 
 #define GEN8_PCU_ISR _MMIO(0x444e0)
 #define GEN8_PCU_IMR _MMIO(0x444e4)
@@ -7117,7 +7043,9 @@
 #define GEN11_INTR_IDENTITY_REG0	_MMIO(0x190060)
 #define GEN11_INTR_IDENTITY_REG1	_MMIO(0x190064)
 #define  GEN11_INTR_DATA_VALID		(1 << 31)
-#define  GEN11_INTR_ENGINE_MASK		(0xffff)
+#define  GEN11_INTR_ENGINE_CLASS(x)	(((x) & GENMASK(18, 16)) >> 16)
+#define  GEN11_INTR_ENGINE_INSTANCE(x)	(((x) & GENMASK(25, 20)) >> 20)
+#define  GEN11_INTR_ENGINE_INTR(x)	((x) & 0xffff)
 
 #define GEN11_INTR_IDENTITY_REG(x)	_MMIO(0x190060 + (x * 4))
 
@@ -7197,6 +7125,7 @@
 #define CHICKEN_TRANS_A         0x420c0
 #define CHICKEN_TRANS_B         0x420c4
 #define CHICKEN_TRANS(trans) _MMIO_TRANS(trans, CHICKEN_TRANS_A, CHICKEN_TRANS_B)
+#define  VSC_DATA_SEL_SOFTWARE_CONTROL	(1<<25) /* GLK and CNL+ */
 #define  DDI_TRAINING_OVERRIDE_ENABLE	(1<<19)
 #define  DDI_TRAINING_OVERRIDE_VALUE	(1<<18)
 #define  DDIE_TRAINING_OVERRIDE_ENABLE	(1<<17) /* CHICKEN_TRANS_A only */
@@ -7301,18 +7230,22 @@
 #define GEN7_L3CNTLREG3				_MMIO(0xB024)
 
 #define GEN7_L3_CHICKEN_MODE_REGISTER		_MMIO(0xB030)
-#define  GEN7_WA_L3_CHICKEN_MODE				0x20000000
+#define   GEN7_WA_L3_CHICKEN_MODE		0x20000000
+#define GEN10_L3_CHICKEN_MODE_REGISTER		_MMIO(0xB114)
+#define   GEN11_I2M_WRITE_DISABLE		(1 << 28)
 
 #define GEN7_L3SQCREG4				_MMIO(0xb034)
 #define  L3SQ_URB_READ_CAM_MATCH_DISABLE	(1<<27)
 
 #define GEN8_L3SQCREG4				_MMIO(0xb118)
-#define  GEN8_LQSC_RO_PERF_DIS			(1<<27)
-#define  GEN8_LQSC_FLUSH_COHERENT_LINES		(1<<21)
+#define  GEN11_LQSC_CLEAN_EVICT_DISABLE		(1 << 6)
+#define  GEN8_LQSC_RO_PERF_DIS			(1 << 27)
+#define  GEN8_LQSC_FLUSH_COHERENT_LINES		(1 << 21)
 
 /* GEN8 chicken */
 #define HDC_CHICKEN0				_MMIO(0x7300)
 #define CNL_HDC_CHICKEN0			_MMIO(0xE5F0)
+#define ICL_HDC_MODE				_MMIO(0xE5F4)
 #define  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE	(1<<15)
 #define  HDC_FENCE_DEST_SLM_DISABLE		(1<<14)
 #define  HDC_DONOT_FETCH_MEM_WHEN_MASKED	(1<<11)
@@ -8327,8 +8260,30 @@
 #define   GEN8_DOP_CLOCK_GATE_GUC_ENABLE	(1<<4)
 #define   GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE     (1<<6)
 
-#define GEN8_GARBCNTL                   _MMIO(0xB004)
-#define   GEN9_GAPS_TSV_CREDIT_DISABLE  (1<<7)
+#define GEN8_GARBCNTL				_MMIO(0xB004)
+#define   GEN9_GAPS_TSV_CREDIT_DISABLE		(1 << 7)
+#define   GEN11_ARBITRATION_PRIO_ORDER_MASK	(0x3f << 22)
+#define   GEN11_HASH_CTRL_EXCL_MASK		(0x7f << 0)
+#define   GEN11_HASH_CTRL_EXCL_BIT0		(1 << 0)
+
+#define GEN11_GLBLINVL				_MMIO(0xB404)
+#define   GEN11_BANK_HASH_ADDR_EXCL_MASK	(0x7f << 5)
+#define   GEN11_BANK_HASH_ADDR_EXCL_BIT0	(1 << 5)
+
+#define GEN10_DFR_RATIO_EN_AND_CHICKEN	_MMIO(0x9550)
+#define   DFR_DISABLE			(1 << 9)
+
+#define GEN11_GACB_PERF_CTRL			_MMIO(0x4B80)
+#define   GEN11_HASH_CTRL_MASK			(0x3 << 12 | 0xf << 0)
+#define   GEN11_HASH_CTRL_BIT0			(1 << 0)
+#define   GEN11_HASH_CTRL_BIT4			(1 << 12)
+
+#define GEN11_LSN_UNSLCVC				_MMIO(0xB43C)
+#define   GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC	(1 << 9)
+#define   GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC	(1 << 7)
+
+#define GAMW_ECO_DEV_RW_IA_REG			_MMIO(0x4080)
+#define   GAMW_ECO_DEV_CTX_RELOAD_DISABLE	(1 << 7)
 
 /* IVYBRIDGE DPF */
 #define GEN7_L3CDERRST1(slice)		_MMIO(0xB008 + (slice) * 0x200) /* L3CD Error Status 1 */
@@ -8837,6 +8792,12 @@
 #define  PORT_CLK_SEL_NONE		(7<<29)
 #define  PORT_CLK_SEL_MASK		(7<<29)
 
+/* On ICL+ this is the same as PORT_CLK_SEL, but all bits change. */
+#define DDI_CLK_SEL(port)		PORT_CLK_SEL(port)
+#define  DDI_CLK_SEL_NONE		(0x0 << 28)
+#define  DDI_CLK_SEL_MG			(0x8 << 28)
+#define  DDI_CLK_SEL_MASK		(0xF << 28)
+
 /* Transcoder clock selection */
 #define _TRANS_CLK_SEL_A		0x46140
 #define _TRANS_CLK_SEL_B		0x46144
@@ -8967,6 +8928,7 @@
  * CNL Clocks
  */
 #define DPCLKA_CFGCR0				_MMIO(0x6C200)
+#define DPCLKA_CFGCR0_ICL			_MMIO(0x164280)
 #define  DPCLKA_CFGCR0_DDI_CLK_OFF(port)	(1 << ((port) ==  PORT_F ? 23 : \
 						      (port)+10))
 #define  DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port)	((port) == PORT_F ? 21 : \
@@ -8983,10 +8945,141 @@
 #define  PLL_POWER_STATE	(1 << 26)
 #define CNL_DPLL_ENABLE(pll)	_MMIO_PLL(pll, DPLL0_ENABLE, DPLL1_ENABLE)
 
+#define _MG_PLL1_ENABLE		0x46030
+#define _MG_PLL2_ENABLE		0x46034
+#define _MG_PLL3_ENABLE		0x46038
+#define _MG_PLL4_ENABLE		0x4603C
+/* Bits are the same as DPLL0_ENABLE */
+#define MG_PLL_ENABLE(port)	_MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \
+					   _MG_PLL2_ENABLE)
+
+#define _MG_REFCLKIN_CTL_PORT1				0x16892C
+#define _MG_REFCLKIN_CTL_PORT2				0x16992C
+#define _MG_REFCLKIN_CTL_PORT3				0x16A92C
+#define _MG_REFCLKIN_CTL_PORT4				0x16B92C
+#define   MG_REFCLKIN_CTL_OD_2_MUX(x)			((x) << 8)
+#define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \
+					 _MG_REFCLKIN_CTL_PORT1, \
+					 _MG_REFCLKIN_CTL_PORT2)
+
+#define _MG_CLKTOP2_CORECLKCTL1_PORT1			0x1688D8
+#define _MG_CLKTOP2_CORECLKCTL1_PORT2			0x1698D8
+#define _MG_CLKTOP2_CORECLKCTL1_PORT3			0x16A8D8
+#define _MG_CLKTOP2_CORECLKCTL1_PORT4			0x16B8D8
+#define   MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO(x)		((x) << 16)
+#define   MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x)		((x) << 8)
+#define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \
+						_MG_CLKTOP2_CORECLKCTL1_PORT1, \
+						_MG_CLKTOP2_CORECLKCTL1_PORT2)
+
+#define _MG_CLKTOP2_HSCLKCTL_PORT1			0x1688D4
+#define _MG_CLKTOP2_HSCLKCTL_PORT2			0x1698D4
+#define _MG_CLKTOP2_HSCLKCTL_PORT3			0x16A8D4
+#define _MG_CLKTOP2_HSCLKCTL_PORT4			0x16B8D4
+#define   MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(x)		((x) << 16)
+#define   MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(x)	((x) << 14)
+#define   MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(x)		((x) << 12)
+#define   MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x)		((x) << 8)
+#define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \
+					     _MG_CLKTOP2_HSCLKCTL_PORT1, \
+					     _MG_CLKTOP2_HSCLKCTL_PORT2)
+
+#define _MG_PLL_DIV0_PORT1				0x168A00
+#define _MG_PLL_DIV0_PORT2				0x169A00
+#define _MG_PLL_DIV0_PORT3				0x16AA00
+#define _MG_PLL_DIV0_PORT4				0x16BA00
+#define   MG_PLL_DIV0_FRACNEN_H				(1 << 30)
+#define   MG_PLL_DIV0_FBDIV_FRAC(x)			((x) << 8)
+#define   MG_PLL_DIV0_FBDIV_INT(x)			((x) << 0)
+#define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \
+				     _MG_PLL_DIV0_PORT2)
+
+#define _MG_PLL_DIV1_PORT1				0x168A04
+#define _MG_PLL_DIV1_PORT2				0x169A04
+#define _MG_PLL_DIV1_PORT3				0x16AA04
+#define _MG_PLL_DIV1_PORT4				0x16BA04
+#define   MG_PLL_DIV1_IREF_NDIVRATIO(x)			((x) << 16)
+#define   MG_PLL_DIV1_DITHER_DIV_1			(0 << 12)
+#define   MG_PLL_DIV1_DITHER_DIV_2			(1 << 12)
+#define   MG_PLL_DIV1_DITHER_DIV_4			(2 << 12)
+#define   MG_PLL_DIV1_DITHER_DIV_8			(3 << 12)
+#define   MG_PLL_DIV1_NDIVRATIO(x)			((x) << 4)
+#define   MG_PLL_DIV1_FBPREDIV(x)			((x) << 0)
+#define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \
+				     _MG_PLL_DIV1_PORT2)
+
+#define _MG_PLL_LF_PORT1				0x168A08
+#define _MG_PLL_LF_PORT2				0x169A08
+#define _MG_PLL_LF_PORT3				0x16AA08
+#define _MG_PLL_LF_PORT4				0x16BA08
+#define   MG_PLL_LF_TDCTARGETCNT(x)			((x) << 24)
+#define   MG_PLL_LF_AFCCNTSEL_256			(0 << 20)
+#define   MG_PLL_LF_AFCCNTSEL_512			(1 << 20)
+#define   MG_PLL_LF_GAINCTRL(x)				((x) << 16)
+#define   MG_PLL_LF_INT_COEFF(x)			((x) << 8)
+#define   MG_PLL_LF_PROP_COEFF(x)			((x) << 0)
+#define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \
+				   _MG_PLL_LF_PORT2)
+
+#define _MG_PLL_FRAC_LOCK_PORT1				0x168A0C
+#define _MG_PLL_FRAC_LOCK_PORT2				0x169A0C
+#define _MG_PLL_FRAC_LOCK_PORT3				0x16AA0C
+#define _MG_PLL_FRAC_LOCK_PORT4				0x16BA0C
+#define   MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32		(1 << 18)
+#define   MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32		(1 << 16)
+#define   MG_PLL_FRAC_LOCK_LOCKTHRESH(x)		((x) << 11)
+#define   MG_PLL_FRAC_LOCK_DCODITHEREN			(1 << 10)
+#define   MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN		(1 << 8)
+#define   MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x)		((x) << 0)
+#define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \
+					  _MG_PLL_FRAC_LOCK_PORT1, \
+					  _MG_PLL_FRAC_LOCK_PORT2)
+
+#define _MG_PLL_SSC_PORT1				0x168A10
+#define _MG_PLL_SSC_PORT2				0x169A10
+#define _MG_PLL_SSC_PORT3				0x16AA10
+#define _MG_PLL_SSC_PORT4				0x16BA10
+#define   MG_PLL_SSC_EN					(1 << 28)
+#define   MG_PLL_SSC_TYPE(x)				((x) << 26)
+#define   MG_PLL_SSC_STEPLENGTH(x)			((x) << 16)
+#define   MG_PLL_SSC_STEPNUM(x)				((x) << 10)
+#define   MG_PLL_SSC_FLLEN				(1 << 9)
+#define   MG_PLL_SSC_STEPSIZE(x)			((x) << 0)
+#define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \
+				    _MG_PLL_SSC_PORT2)
+
+#define _MG_PLL_BIAS_PORT1				0x168A14
+#define _MG_PLL_BIAS_PORT2				0x169A14
+#define _MG_PLL_BIAS_PORT3				0x16AA14
+#define _MG_PLL_BIAS_PORT4				0x16BA14
+#define   MG_PLL_BIAS_BIAS_GB_SEL(x)			((x) << 30)
+#define   MG_PLL_BIAS_INIT_DCOAMP(x)			((x) << 24)
+#define   MG_PLL_BIAS_BIAS_BONUS(x)			((x) << 16)
+#define   MG_PLL_BIAS_BIASCAL_EN			(1 << 15)
+#define   MG_PLL_BIAS_CTRIM(x)				((x) << 8)
+#define   MG_PLL_BIAS_VREF_RDAC(x)			((x) << 5)
+#define   MG_PLL_BIAS_IREFTRIM(x)			((x) << 0)
+#define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \
+				     _MG_PLL_BIAS_PORT2)
+
+#define _MG_PLL_TDC_COLDST_BIAS_PORT1			0x168A18
+#define _MG_PLL_TDC_COLDST_BIAS_PORT2			0x169A18
+#define _MG_PLL_TDC_COLDST_BIAS_PORT3			0x16AA18
+#define _MG_PLL_TDC_COLDST_BIAS_PORT4			0x16BA18
+#define   MG_PLL_TDC_COLDST_IREFINT_EN			(1 << 27)
+#define   MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(x)	((x) << 17)
+#define   MG_PLL_TDC_COLDST_COLDSTART			(1 << 16)
+#define   MG_PLL_TDC_TDCOVCCORR_EN			(1 << 2)
+#define   MG_PLL_TDC_TDCSEL(x)				((x) << 0)
+#define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \
+						_MG_PLL_TDC_COLDST_BIAS_PORT1, \
+						_MG_PLL_TDC_COLDST_BIAS_PORT2)
+
 #define _CNL_DPLL0_CFGCR0		0x6C000
 #define _CNL_DPLL1_CFGCR0		0x6C080
 #define  DPLL_CFGCR0_HDMI_MODE		(1 << 30)
 #define  DPLL_CFGCR0_SSC_ENABLE		(1 << 29)
+#define  DPLL_CFGCR0_SSC_ENABLE_ICL	(1 << 25)
 #define  DPLL_CFGCR0_LINK_RATE_MASK	(0xf << 25)
 #define  DPLL_CFGCR0_LINK_RATE_2700	(0 << 25)
 #define  DPLL_CFGCR0_LINK_RATE_1350	(1 << 25)
@@ -9020,8 +9113,19 @@
 #define  DPLL_CFGCR1_PDIV_5		(4 << 2)
 #define  DPLL_CFGCR1_PDIV_7		(8 << 2)
 #define  DPLL_CFGCR1_CENTRAL_FREQ	(3 << 0)
+#define  DPLL_CFGCR1_CENTRAL_FREQ_8400	(3 << 0)
 #define CNL_DPLL_CFGCR1(pll)		_MMIO_PLL(pll, _CNL_DPLL0_CFGCR1, _CNL_DPLL1_CFGCR1)
 
+#define _ICL_DPLL0_CFGCR0		0x164000
+#define _ICL_DPLL1_CFGCR0		0x164080
+#define ICL_DPLL_CFGCR0(pll)		_MMIO_PLL(pll, _ICL_DPLL0_CFGCR0, \
+						  _ICL_DPLL1_CFGCR0)
+
+#define _ICL_DPLL0_CFGCR1		0x164004
+#define _ICL_DPLL1_CFGCR1		0x164084
+#define ICL_DPLL_CFGCR1(pll)		_MMIO_PLL(pll, _ICL_DPLL0_CFGCR1, \
+						  _ICL_DPLL1_CFGCR1)
+
 /* BXT display engine PLL */
 #define BXT_DE_PLL_CTL			_MMIO(0x6d000)
 #define   BXT_DE_PLL_RATIO(x)		(x)	/* {60,65,100} * 19.2MHz */
@@ -9793,6 +9897,13 @@
 #define GEN9_MFX1_MOCS(i)	_MMIO(0xca00 + (i) * 4)	/* Media 1 MOCS registers */
 #define GEN9_VEBOX_MOCS(i)	_MMIO(0xcb00 + (i) * 4)	/* Video MOCS registers */
 #define GEN9_BLT_MOCS(i)	_MMIO(0xcc00 + (i) * 4)	/* Blitter MOCS registers */
+/* Media decoder 2 MOCS registers */
+#define GEN11_MFX2_MOCS(i)	_MMIO(0x10000 + (i) * 4)
+
+#define GEN10_SCRATCH_LNCF2		_MMIO(0xb0a0)
+#define   PMFLUSHDONE_LNICRSDROP	(1 << 20)
+#define   PMFLUSH_GAPL3UNBLOCK		(1 << 21)
+#define   PMFLUSHDONE_LNEBLK		(1 << 22)
 
 /* gamt regs */
 #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 282f576..8928894 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -49,7 +49,7 @@
 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 		return "signaled";
 
-	return to_request(fence)->timeline->common->name;
+	return to_request(fence)->timeline->name;
 }
 
 static bool i915_fence_signaled(struct dma_fence *fence)
@@ -59,11 +59,7 @@
 
 static bool i915_fence_enable_signaling(struct dma_fence *fence)
 {
-	if (i915_fence_signaled(fence))
-		return false;
-
-	intel_engine_enable_signaling(to_request(fence), true);
-	return !i915_fence_signaled(fence);
+	return intel_engine_enable_signaling(to_request(fence), true);
 }
 
 static signed long i915_fence_wait(struct dma_fence *fence,
@@ -129,22 +125,22 @@
 }
 
 static void
-__i915_priotree_add_dependency(struct i915_priotree *pt,
-			       struct i915_priotree *signal,
-			       struct i915_dependency *dep,
-			       unsigned long flags)
+__i915_sched_node_add_dependency(struct i915_sched_node *node,
+				 struct i915_sched_node *signal,
+				 struct i915_dependency *dep,
+				 unsigned long flags)
 {
 	INIT_LIST_HEAD(&dep->dfs_link);
 	list_add(&dep->wait_link, &signal->waiters_list);
-	list_add(&dep->signal_link, &pt->signalers_list);
+	list_add(&dep->signal_link, &node->signalers_list);
 	dep->signaler = signal;
 	dep->flags = flags;
 }
 
 static int
-i915_priotree_add_dependency(struct drm_i915_private *i915,
-			     struct i915_priotree *pt,
-			     struct i915_priotree *signal)
+i915_sched_node_add_dependency(struct drm_i915_private *i915,
+			       struct i915_sched_node *node,
+			       struct i915_sched_node *signal)
 {
 	struct i915_dependency *dep;
 
@@ -152,16 +148,18 @@
 	if (!dep)
 		return -ENOMEM;
 
-	__i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC);
+	__i915_sched_node_add_dependency(node, signal, dep,
+					 I915_DEPENDENCY_ALLOC);
 	return 0;
 }
 
 static void
-i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
+i915_sched_node_fini(struct drm_i915_private *i915,
+		     struct i915_sched_node *node)
 {
-	struct i915_dependency *dep, *next;
+	struct i915_dependency *dep, *tmp;
 
-	GEM_BUG_ON(!list_empty(&pt->link));
+	GEM_BUG_ON(!list_empty(&node->link));
 
 	/*
 	 * Everyone we depended upon (the fences we wait to be signaled)
@@ -169,8 +167,8 @@
 	 * However, retirement is run independently on each timeline and
 	 * so we may be called out-of-order.
 	 */
-	list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) {
-		GEM_BUG_ON(!i915_priotree_signaled(dep->signaler));
+	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
+		GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler));
 		GEM_BUG_ON(!list_empty(&dep->dfs_link));
 
 		list_del(&dep->wait_link);
@@ -179,8 +177,8 @@
 	}
 
 	/* Remove ourselves from everyone who depends upon us */
-	list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) {
-		GEM_BUG_ON(dep->signaler != pt);
+	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
+		GEM_BUG_ON(dep->signaler != node);
 		GEM_BUG_ON(!list_empty(&dep->dfs_link));
 
 		list_del(&dep->signal_link);
@@ -190,17 +188,18 @@
 }
 
 static void
-i915_priotree_init(struct i915_priotree *pt)
+i915_sched_node_init(struct i915_sched_node *node)
 {
-	INIT_LIST_HEAD(&pt->signalers_list);
-	INIT_LIST_HEAD(&pt->waiters_list);
-	INIT_LIST_HEAD(&pt->link);
-	pt->priority = I915_PRIORITY_INVALID;
+	INIT_LIST_HEAD(&node->signalers_list);
+	INIT_LIST_HEAD(&node->waiters_list);
+	INIT_LIST_HEAD(&node->link);
+	node->attr.priority = I915_PRIORITY_INVALID;
 }
 
 static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 {
 	struct intel_engine_cs *engine;
+	struct i915_timeline *timeline;
 	enum intel_engine_id id;
 	int ret;
 
@@ -211,30 +210,37 @@
 	if (ret)
 		return ret;
 
+	GEM_BUG_ON(i915->gt.active_requests);
+
 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
 	for_each_engine(engine, i915, id) {
-		struct i915_gem_timeline *timeline;
-		struct intel_timeline *tl = engine->timeline;
+		GEM_TRACE("%s seqno %d (current %d) -> %d\n",
+			  engine->name,
+			  engine->timeline.seqno,
+			  intel_engine_get_seqno(engine),
+			  seqno);
 
-		if (!i915_seqno_passed(seqno, tl->seqno)) {
+		if (!i915_seqno_passed(seqno, engine->timeline.seqno)) {
 			/* Flush any waiters before we reuse the seqno */
 			intel_engine_disarm_breadcrumbs(engine);
+			intel_engine_init_hangcheck(engine);
 			GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals));
 		}
 
 		/* Check we are idle before we fiddle with hw state! */
 		GEM_BUG_ON(!intel_engine_is_idle(engine));
-		GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
+		GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request));
 
 		/* Finally reset hw state */
 		intel_engine_init_global_seqno(engine, seqno);
-		tl->seqno = seqno;
-
-		list_for_each_entry(timeline, &i915->gt.timelines, link)
-			memset(timeline->engine[id].global_sync, 0,
-			       sizeof(timeline->engine[id].global_sync));
+		engine->timeline.seqno = seqno;
 	}
 
+	list_for_each_entry(timeline, &i915->gt.timelines, link)
+		memset(timeline->global_sync, 0, sizeof(timeline->global_sync));
+
+	i915->gt.request_serial = seqno;
+
 	return 0;
 }
 
@@ -251,83 +257,37 @@
 	return reset_all_global_seqno(i915, seqno - 1);
 }
 
-static void mark_busy(struct drm_i915_private *i915)
+static int reserve_gt(struct drm_i915_private *i915)
 {
-	if (i915->gt.awake)
-		return;
-
-	GEM_BUG_ON(!i915->gt.active_requests);
-
-	intel_runtime_pm_get_noresume(i915);
-
-	/*
-	 * It seems that the DMC likes to transition between the DC states a lot
-	 * when there are no connected displays (no active power domains) during
-	 * command submission.
-	 *
-	 * This activity has negative impact on the performance of the chip with
-	 * huge latencies observed in the interrupt handler and elsewhere.
-	 *
-	 * Work around it by grabbing a GT IRQ power domain whilst there is any
-	 * GT activity, preventing any DC state transitions.
-	 */
-	intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
-
-	i915->gt.awake = true;
-	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
-		i915->gt.epoch = 1;
-
-	intel_enable_gt_powersave(i915);
-	i915_update_gfx_val(i915);
-	if (INTEL_GEN(i915) >= 6)
-		gen6_rps_busy(i915);
-	i915_pmu_gt_unparked(i915);
-
-	intel_engines_unpark(i915);
-
-	i915_queue_hangcheck(i915);
-
-	queue_delayed_work(i915->wq,
-			   &i915->gt.retire_work,
-			   round_jiffies_up_relative(HZ));
-}
-
-static int reserve_engine(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *i915 = engine->i915;
-	u32 active = ++engine->timeline->inflight_seqnos;
-	u32 seqno = engine->timeline->seqno;
 	int ret;
 
-	/* Reservation is fine until we need to wrap around */
-	if (unlikely(add_overflows(seqno, active))) {
+	/*
+	 * Reservation is fine until we may need to wrap around
+	 *
+	 * By incrementing the serial for every request, we know that no
+	 * individual engine may exceed that serial (as each is reset to 0
+	 * on any wrap). This protects even the most pessimistic of migrations
+	 * of every request from all engines onto just one.
+	 */
+	while (unlikely(++i915->gt.request_serial == 0)) {
 		ret = reset_all_global_seqno(i915, 0);
 		if (ret) {
-			engine->timeline->inflight_seqnos--;
+			i915->gt.request_serial--;
 			return ret;
 		}
 	}
 
 	if (!i915->gt.active_requests++)
-		mark_busy(i915);
+		i915_gem_unpark(i915);
 
 	return 0;
 }
 
-static void unreserve_engine(struct intel_engine_cs *engine)
+static void unreserve_gt(struct drm_i915_private *i915)
 {
-	struct drm_i915_private *i915 = engine->i915;
-
-	if (!--i915->gt.active_requests) {
-		/* Cancel the mark_busy() from our reserve_engine() */
-		GEM_BUG_ON(!i915->gt.awake);
-		mod_delayed_work(i915->wq,
-				 &i915->gt.idle_work,
-				 msecs_to_jiffies(100));
-	}
-
-	GEM_BUG_ON(!engine->timeline->inflight_seqnos);
-	engine->timeline->inflight_seqnos--;
+	GEM_BUG_ON(!i915->gt.active_requests);
+	if (!--i915->gt.active_requests)
+		i915_gem_park(i915);
 }
 
 void i915_gem_retire_noop(struct i915_gem_active *active,
@@ -338,6 +298,7 @@
 
 static void advance_ring(struct i915_request *request)
 {
+	struct intel_ring *ring = request->ring;
 	unsigned int tail;
 
 	/*
@@ -349,7 +310,8 @@
 	 * Note this requires that we are always called in request
 	 * completion order.
 	 */
-	if (list_is_last(&request->ring_link, &request->ring->request_list)) {
+	GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
+	if (list_is_last(&request->ring_link, &ring->request_list)) {
 		/*
 		 * We may race here with execlists resubmitting this request
 		 * as we retire it. The resubmission will move the ring->tail
@@ -358,13 +320,14 @@
 		 * is just about to be. Either works, if we miss the last two
 		 * noops - they are safe to be replayed on a reset.
 		 */
-		tail = READ_ONCE(request->ring->tail);
+		tail = READ_ONCE(request->tail);
+		list_del(&ring->active_link);
 	} else {
 		tail = request->postfix;
 	}
-	list_del(&request->ring_link);
+	list_del_init(&request->ring_link);
 
-	request->ring->head = tail;
+	ring->head = tail;
 }
 
 static void free_capture_list(struct i915_request *request)
@@ -380,25 +343,84 @@
 	}
 }
 
+static void __retire_engine_request(struct intel_engine_cs *engine,
+				    struct i915_request *rq)
+{
+	GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n",
+		  __func__, engine->name,
+		  rq->fence.context, rq->fence.seqno,
+		  rq->global_seqno,
+		  intel_engine_get_seqno(engine));
+
+	GEM_BUG_ON(!i915_request_completed(rq));
+
+	local_irq_disable();
+
+	spin_lock(&engine->timeline.lock);
+	GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
+	list_del_init(&rq->link);
+	spin_unlock(&engine->timeline.lock);
+
+	spin_lock(&rq->lock);
+	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
+		dma_fence_signal_locked(&rq->fence);
+	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
+		intel_engine_cancel_signaling(rq);
+	if (rq->waitboost) {
+		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
+		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
+	}
+	spin_unlock(&rq->lock);
+
+	local_irq_enable();
+
+	/*
+	 * The backing object for the context is done after switching to the
+	 * *next* context. Therefore we cannot retire the previous context until
+	 * the next context has already started running. However, since we
+	 * cannot take the required locks at i915_request_submit() we
+	 * defer the unpinning of the active context to now, retirement of
+	 * the subsequent request.
+	 */
+	if (engine->last_retired_context)
+		intel_context_unpin(engine->last_retired_context, engine);
+	engine->last_retired_context = rq->ctx;
+}
+
+static void __retire_engine_upto(struct intel_engine_cs *engine,
+				 struct i915_request *rq)
+{
+	struct i915_request *tmp;
+
+	if (list_empty(&rq->link))
+		return;
+
+	do {
+		tmp = list_first_entry(&engine->timeline.requests,
+				       typeof(*tmp), link);
+
+		GEM_BUG_ON(tmp->engine != engine);
+		__retire_engine_request(engine, tmp);
+	} while (tmp != rq);
+}
+
 static void i915_request_retire(struct i915_request *request)
 {
-	struct intel_engine_cs *engine = request->engine;
 	struct i915_gem_active *active, *next;
 
+	GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n",
+		  request->engine->name,
+		  request->fence.context, request->fence.seqno,
+		  request->global_seqno,
+		  intel_engine_get_seqno(request->engine));
+
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
 	GEM_BUG_ON(!i915_request_completed(request));
-	GEM_BUG_ON(!request->i915->gt.active_requests);
 
 	trace_i915_request_retire(request);
 
-	spin_lock_irq(&engine->timeline->lock);
-	list_del_init(&request->link);
-	spin_unlock_irq(&engine->timeline->lock);
-
-	unreserve_engine(request->engine);
 	advance_ring(request);
-
 	free_capture_list(request);
 
 	/*
@@ -434,73 +456,74 @@
 
 	/* Retirement decays the ban score as it is a sign of ctx progress */
 	atomic_dec_if_positive(&request->ctx->ban_score);
+	intel_context_unpin(request->ctx, request->engine);
 
-	/*
-	 * The backing object for the context is done after switching to the
-	 * *next* context. Therefore we cannot retire the previous context until
-	 * the next context has already started running. However, since we
-	 * cannot take the required locks at i915_request_submit() we
-	 * defer the unpinning of the active context to now, retirement of
-	 * the subsequent request.
-	 */
-	if (engine->last_retired_context)
-		engine->context_unpin(engine, engine->last_retired_context);
-	engine->last_retired_context = request->ctx;
+	__retire_engine_upto(request->engine, request);
 
-	spin_lock_irq(&request->lock);
-	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
-		dma_fence_signal_locked(&request->fence);
-	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-		intel_engine_cancel_signaling(request);
-	if (request->waitboost) {
-		GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters));
-		atomic_dec(&request->i915->gt_pm.rps.num_waiters);
-	}
-	spin_unlock_irq(&request->lock);
+	unreserve_gt(request->i915);
 
-	i915_priotree_fini(request->i915, &request->priotree);
+	i915_sched_node_fini(request->i915, &request->sched);
 	i915_request_put(request);
 }
 
 void i915_request_retire_upto(struct i915_request *rq)
 {
-	struct intel_engine_cs *engine = rq->engine;
+	struct intel_ring *ring = rq->ring;
 	struct i915_request *tmp;
 
+	GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n",
+		  rq->engine->name,
+		  rq->fence.context, rq->fence.seqno,
+		  rq->global_seqno,
+		  intel_engine_get_seqno(rq->engine));
+
 	lockdep_assert_held(&rq->i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915_request_completed(rq));
 
-	if (list_empty(&rq->link))
+	if (list_empty(&rq->ring_link))
 		return;
 
 	do {
-		tmp = list_first_entry(&engine->timeline->requests,
-				       typeof(*tmp), link);
+		tmp = list_first_entry(&ring->request_list,
+				       typeof(*tmp), ring_link);
 
 		i915_request_retire(tmp);
 	} while (tmp != rq);
 }
 
-static u32 timeline_get_seqno(struct intel_timeline *tl)
+static u32 timeline_get_seqno(struct i915_timeline *tl)
 {
 	return ++tl->seqno;
 }
 
+static void move_to_timeline(struct i915_request *request,
+			     struct i915_timeline *timeline)
+{
+	GEM_BUG_ON(request->timeline == &request->engine->timeline);
+	lockdep_assert_held(&request->engine->timeline.lock);
+
+	spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING);
+	list_move_tail(&request->link, &timeline->requests);
+	spin_unlock(&request->timeline->lock);
+}
+
 void __i915_request_submit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
-	struct intel_timeline *timeline;
 	u32 seqno;
 
-	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&engine->timeline->lock);
+	GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n",
+		  engine->name,
+		  request->fence.context, request->fence.seqno,
+		  engine->timeline.seqno + 1,
+		  intel_engine_get_seqno(engine));
 
-	/* Transfer from per-context onto the global per-engine timeline */
-	timeline = engine->timeline;
-	GEM_BUG_ON(timeline == request->timeline);
+	GEM_BUG_ON(!irqs_disabled());
+	lockdep_assert_held(&engine->timeline.lock);
+
 	GEM_BUG_ON(request->global_seqno);
 
-	seqno = timeline_get_seqno(timeline);
+	seqno = timeline_get_seqno(&engine->timeline);
 	GEM_BUG_ON(!seqno);
 	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
 
@@ -514,9 +537,8 @@
 	engine->emit_breadcrumb(request,
 				request->ring->vaddr + request->postfix);
 
-	spin_lock(&request->timeline->lock);
-	list_move_tail(&request->link, &timeline->requests);
-	spin_unlock(&request->timeline->lock);
+	/* Transfer from per-context onto the global per-engine timeline */
+	move_to_timeline(request, &engine->timeline);
 
 	trace_i915_request_execute(request);
 
@@ -529,30 +551,35 @@
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline->lock, flags);
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	__i915_request_submit(request);
 
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 void __i915_request_unsubmit(struct i915_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
-	struct intel_timeline *timeline;
+
+	GEM_TRACE("%s fence %llx:%d <- global=%d, current %d\n",
+		  engine->name,
+		  request->fence.context, request->fence.seqno,
+		  request->global_seqno,
+		  intel_engine_get_seqno(engine));
 
 	GEM_BUG_ON(!irqs_disabled());
-	lockdep_assert_held(&engine->timeline->lock);
+	lockdep_assert_held(&engine->timeline.lock);
 
 	/*
 	 * Only unwind in reverse order, required so that the per-context list
 	 * is kept in seqno/ring order.
 	 */
 	GEM_BUG_ON(!request->global_seqno);
-	GEM_BUG_ON(request->global_seqno != engine->timeline->seqno);
+	GEM_BUG_ON(request->global_seqno != engine->timeline.seqno);
 	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine),
 				     request->global_seqno));
-	engine->timeline->seqno--;
+	engine->timeline.seqno--;
 
 	/* We may be recursing from the signal callback of another i915 fence */
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
@@ -562,12 +589,7 @@
 	spin_unlock(&request->lock);
 
 	/* Transfer back from the global per-engine timeline to per-context */
-	timeline = request->timeline;
-	GEM_BUG_ON(timeline == engine->timeline);
-
-	spin_lock(&timeline->lock);
-	list_move(&request->link, &timeline->requests);
-	spin_unlock(&timeline->lock);
+	move_to_timeline(request, request->timeline);
 
 	/*
 	 * We don't need to wake_up any waiters on request->execute, they
@@ -584,11 +606,11 @@
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline->lock, flags);
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	__i915_request_unsubmit(request);
 
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static int __i915_sw_fence_call
@@ -659,12 +681,12 @@
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ring = engine->context_pin(engine, ctx);
+	ring = intel_context_pin(ctx, engine);
 	if (IS_ERR(ring))
 		return ERR_CAST(ring);
 	GEM_BUG_ON(!ring);
 
-	ret = reserve_engine(engine);
+	ret = reserve_gt(i915);
 	if (ret)
 		goto err_unpin;
 
@@ -672,10 +694,10 @@
 	if (ret)
 		goto err_unreserve;
 
-	/* Move the oldest request to the slab-cache (if not in use!) */
-	rq = list_first_entry_or_null(&engine->timeline->requests,
-				      typeof(*rq), link);
-	if (rq && i915_request_completed(rq))
+	/* Move our oldest request to the slab-cache (if not in use!) */
+	rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
+	if (!list_is_last(&rq->ring_link, &ring->request_list) &&
+	    i915_request_completed(rq))
 		i915_request_retire(rq);
 
 	/*
@@ -735,8 +757,13 @@
 		}
 	}
 
-	rq->timeline = i915_gem_context_lookup_timeline(ctx, engine);
-	GEM_BUG_ON(rq->timeline == engine->timeline);
+	INIT_LIST_HEAD(&rq->active_list);
+	rq->i915 = i915;
+	rq->engine = engine;
+	rq->ctx = ctx;
+	rq->ring = ring;
+	rq->timeline = ring->timeline;
+	GEM_BUG_ON(rq->timeline == &engine->timeline);
 
 	spin_lock_init(&rq->lock);
 	dma_fence_init(&rq->fence,
@@ -749,13 +776,7 @@
 	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
 	init_waitqueue_head(&rq->execute);
 
-	i915_priotree_init(&rq->priotree);
-
-	INIT_LIST_HEAD(&rq->active_list);
-	rq->i915 = i915;
-	rq->engine = engine;
-	rq->ctx = ctx;
-	rq->ring = ring;
+	i915_sched_node_init(&rq->sched);
 
 	/* No zalloc, must clear what we need by hand */
 	rq->global_seqno = 0;
@@ -792,6 +813,9 @@
 	if (ret)
 		goto err_unwind;
 
+	/* Keep a second pin for the dual retirement along engine and ring */
+	__intel_context_pin(rq->ctx, engine);
+
 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
 	return rq;
@@ -801,14 +825,14 @@
 
 	/* Make sure we didn't add ourselves to external state before freeing */
 	GEM_BUG_ON(!list_empty(&rq->active_list));
-	GEM_BUG_ON(!list_empty(&rq->priotree.signalers_list));
-	GEM_BUG_ON(!list_empty(&rq->priotree.waiters_list));
+	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
+	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
 
 	kmem_cache_free(i915->requests, rq);
 err_unreserve:
-	unreserve_engine(engine);
+	unreserve_gt(i915);
 err_unpin:
-	engine->context_unpin(engine, ctx);
+	intel_context_unpin(ctx, engine);
 	return ERR_PTR(ret);
 }
 
@@ -824,9 +848,9 @@
 		return 0;
 
 	if (to->engine->schedule) {
-		ret = i915_priotree_add_dependency(to->i915,
-						   &to->priotree,
-						   &from->priotree);
+		ret = i915_sched_node_add_dependency(to->i915,
+						     &to->sched,
+						     &from->sched);
 		if (ret < 0)
 			return ret;
 	}
@@ -904,7 +928,7 @@
 
 		/* Squash repeated waits to the same timelines */
 		if (fence->context != rq->i915->mm.unordered_timeline &&
-		    intel_timeline_sync_is_later(rq->timeline, fence))
+		    i915_timeline_sync_is_later(rq->timeline, fence))
 			continue;
 
 		if (dma_fence_is_i915(fence))
@@ -918,7 +942,7 @@
 
 		/* Record the latest fence used against each timeline */
 		if (fence->context != rq->i915->mm.unordered_timeline)
-			intel_timeline_sync_set(rq->timeline, fence);
+			i915_timeline_sync_set(rq->timeline, fence);
 	} while (--nchild);
 
 	return 0;
@@ -995,11 +1019,14 @@
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_ring *ring = request->ring;
-	struct intel_timeline *timeline = request->timeline;
+	struct i915_timeline *timeline = request->timeline;
 	struct i915_request *prev;
 	u32 *cs;
 	int err;
 
+	GEM_TRACE("%s fence %llx:%d\n",
+		  engine->name, request->fence.context, request->fence.seqno);
+
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
 	trace_i915_request_add(request);
 
@@ -1054,10 +1081,10 @@
 		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
 					     &request->submitq);
 		if (engine->schedule)
-			__i915_priotree_add_dependency(&request->priotree,
-						       &prev->priotree,
-						       &request->dep,
-						       0);
+			__i915_sched_node_add_dependency(&request->sched,
+							 &prev->sched,
+							 &request->dep,
+							 0);
 	}
 
 	spin_lock_irq(&timeline->lock);
@@ -1068,6 +1095,8 @@
 	i915_gem_active_set(&timeline->last_request, request);
 
 	list_add_tail(&request->ring_link, &ring->request_list);
+	if (list_is_first(&request->ring_link, &ring->request_list))
+		list_add(&ring->active_link, &request->i915->gt.active_rings);
 	request->emitted_jiffies = jiffies;
 
 	/*
@@ -1081,12 +1110,11 @@
 	 * decide whether to preempt the entire chain so that it is ready to
 	 * run at the earliest possible convenience.
 	 */
-	rcu_read_lock();
-	if (engine->schedule)
-		engine->schedule(request, request->ctx->priority);
-	rcu_read_unlock();
-
 	local_bh_disable();
+	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
+	if (engine->schedule)
+		engine->schedule(request, &request->ctx->sched);
+	rcu_read_unlock();
 	i915_sw_fence_commit(&request->submit);
 	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
 
@@ -1206,11 +1234,13 @@
 
 static bool __i915_wait_request_check_and_reset(struct i915_request *request)
 {
-	if (likely(!i915_reset_handoff(&request->i915->gpu_error)))
+	struct i915_gpu_error *error = &request->i915->gpu_error;
+
+	if (likely(!i915_reset_handoff(error)))
 		return false;
 
 	__set_current_state(TASK_RUNNING);
-	i915_reset(request->i915, 0);
+	i915_reset(request->i915, error->stalled_mask, error->reason);
 	return true;
 }
 
@@ -1373,38 +1403,30 @@
 	return timeout;
 }
 
-static void engine_retire_requests(struct intel_engine_cs *engine)
+static void ring_retire_requests(struct intel_ring *ring)
 {
 	struct i915_request *request, *next;
-	u32 seqno = intel_engine_get_seqno(engine);
-	LIST_HEAD(retire);
 
-	spin_lock_irq(&engine->timeline->lock);
 	list_for_each_entry_safe(request, next,
-				 &engine->timeline->requests, link) {
-		if (!i915_seqno_passed(seqno, request->global_seqno))
+				 &ring->request_list, ring_link) {
+		if (!i915_request_completed(request))
 			break;
 
-		list_move_tail(&request->link, &retire);
-	}
-	spin_unlock_irq(&engine->timeline->lock);
-
-	list_for_each_entry_safe(request, next, &retire, link)
 		i915_request_retire(request);
+	}
 }
 
 void i915_retire_requests(struct drm_i915_private *i915)
 {
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
+	struct intel_ring *ring, *tmp;
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
 	if (!i915->gt.active_requests)
 		return;
 
-	for_each_engine(engine, i915, id)
-		engine_retire_requests(engine);
+	list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link)
+		ring_retire_requests(ring);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 7d6eb82..eddbd424 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -28,13 +28,16 @@
 #include <linux/dma-fence.h>
 
 #include "i915_gem.h"
+#include "i915_scheduler.h"
 #include "i915_sw_fence.h"
+#include "i915_scheduler.h"
 
 #include <uapi/drm/i915_drm.h>
 
 struct drm_file;
 struct drm_i915_gem_object;
 struct i915_request;
+struct i915_timeline;
 
 struct intel_wait {
 	struct rb_node node;
@@ -48,44 +51,6 @@
 	struct list_head link;
 };
 
-struct i915_dependency {
-	struct i915_priotree *signaler;
-	struct list_head signal_link;
-	struct list_head wait_link;
-	struct list_head dfs_link;
-	unsigned long flags;
-#define I915_DEPENDENCY_ALLOC BIT(0)
-};
-
-/*
- * "People assume that time is a strict progression of cause to effect, but
- * actually, from a nonlinear, non-subjective viewpoint, it's more like a big
- * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015
- *
- * Requests exist in a complex web of interdependencies. Each request
- * has to wait for some other request to complete before it is ready to be run
- * (e.g. we have to wait until the pixels have been rendering into a texture
- * before we can copy from it). We track the readiness of a request in terms
- * of fences, but we also need to keep the dependency tree for the lifetime
- * of the request (beyond the life of an individual fence). We use the tree
- * at various points to reorder the requests whilst keeping the requests
- * in order with respect to their various dependencies.
- */
-struct i915_priotree {
-	struct list_head signalers_list; /* those before us, we depend upon */
-	struct list_head waiters_list; /* those after us, they depend upon us */
-	struct list_head link;
-	int priority;
-};
-
-enum {
-	I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
-	I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
-	I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1,
-
-	I915_PRIORITY_INVALID = INT_MIN
-};
-
 struct i915_capture_list {
 	struct i915_capture_list *next;
 	struct i915_vma *vma;
@@ -131,7 +96,7 @@
 	struct i915_gem_context *ctx;
 	struct intel_engine_cs *engine;
 	struct intel_ring *ring;
-	struct intel_timeline *timeline;
+	struct i915_timeline *timeline;
 	struct intel_signal_node signaling;
 
 	/*
@@ -154,7 +119,7 @@
 	 * to retirement), i.e. bidirectional dependency information for the
 	 * request not tied to individual fences.
 	 */
-	struct i915_priotree priotree;
+	struct i915_sched_node sched;
 	struct i915_dependency dep;
 
 	/**
@@ -343,10 +308,10 @@
 				 seqno - 1);
 }
 
-static inline bool i915_priotree_signaled(const struct i915_priotree *pt)
+static inline bool i915_sched_node_signaled(const struct i915_sched_node *node)
 {
 	const struct i915_request *rq =
-		container_of(pt, const struct i915_request, priotree);
+		container_of(node, const struct i915_request, sched);
 
 	return i915_request_completed(rq);
 }
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
new file mode 100644
index 0000000..70a4222
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -0,0 +1,72 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef _I915_SCHEDULER_H_
+#define _I915_SCHEDULER_H_
+
+#include <linux/bitops.h>
+
+#include <uapi/drm/i915_drm.h>
+
+enum {
+	I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1,
+	I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY,
+	I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1,
+
+	I915_PRIORITY_INVALID = INT_MIN
+};
+
+struct i915_sched_attr {
+	/**
+	 * @priority: execution and service priority
+	 *
+	 * All clients are equal, but some are more equal than others!
+	 *
+	 * Requests from a context with a greater (more positive) value of
+	 * @priority will be executed before those with a lower @priority
+	 * value, forming a simple QoS.
+	 *
+	 * The &drm_i915_private.kernel_context is assigned the lowest priority.
+	 */
+	int priority;
+};
+
+/*
+ * "People assume that time is a strict progression of cause to effect, but
+ * actually, from a nonlinear, non-subjective viewpoint, it's more like a big
+ * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015
+ *
+ * Requests exist in a complex web of interdependencies. Each request
+ * has to wait for some other request to complete before it is ready to be run
+ * (e.g. we have to wait until the pixels have been rendering into a texture
+ * before we can copy from it). We track the readiness of a request in terms
+ * of fences, but we also need to keep the dependency tree for the lifetime
+ * of the request (beyond the life of an individual fence). We use the tree
+ * at various points to reorder the requests whilst keeping the requests
+ * in order with respect to their various dependencies.
+ *
+ * There is no active component to the "scheduler". As we know the dependency
+ * DAG of each request, we are able to insert it into a sorted queue when it
+ * is ready, and are able to reorder its portion of the graph to accommodate
+ * dynamic priority changes.
+ */
+struct i915_sched_node {
+	struct list_head signalers_list; /* those before us, we depend upon */
+	struct list_head waiters_list; /* those after us, they depend upon us */
+	struct list_head link;
+	struct i915_sched_attr attr;
+};
+
+struct i915_dependency {
+	struct i915_sched_node *signaler;
+	struct list_head signal_link;
+	struct list_head wait_link;
+	struct list_head dfs_link;
+	unsigned long flags;
+#define I915_DEPENDENCY_ALLOC BIT(0)
+};
+
+#endif /* _I915_SCHEDULER_H_ */
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
new file mode 100644
index 0000000..4667cc0
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -0,0 +1,105 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016-2018 Intel Corporation
+ */
+
+#include "i915_drv.h"
+
+#include "i915_timeline.h"
+#include "i915_syncmap.h"
+
+void i915_timeline_init(struct drm_i915_private *i915,
+			struct i915_timeline *timeline,
+			const char *name)
+{
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	/*
+	 * Ideally we want a set of engines on a single leaf as we expect
+	 * to mostly be tracking synchronisation between engines. It is not
+	 * a huge issue if this is not the case, but we may want to mitigate
+	 * any page crossing penalties if they become an issue.
+	 */
+	BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
+
+	timeline->name = name;
+
+	list_add(&timeline->link, &i915->gt.timelines);
+
+	/* Called during early_init before we know how many engines there are */
+
+	timeline->fence_context = dma_fence_context_alloc(1);
+
+	spin_lock_init(&timeline->lock);
+
+	init_request_active(&timeline->last_request, NULL);
+	INIT_LIST_HEAD(&timeline->requests);
+
+	i915_syncmap_init(&timeline->sync);
+}
+
+/**
+ * i915_timelines_park - called when the driver idles
+ * @i915: the drm_i915_private device
+ *
+ * When the driver is completely idle, we know that all of our sync points
+ * have been signaled and our tracking is then entirely redundant. Any request
+ * to wait upon an older sync point will be completed instantly as we know
+ * the fence is signaled and therefore we will not even look them up in the
+ * sync point map.
+ */
+void i915_timelines_park(struct drm_i915_private *i915)
+{
+	struct i915_timeline *timeline;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	list_for_each_entry(timeline, &i915->gt.timelines, link) {
+		/*
+		 * All known fences are completed so we can scrap
+		 * the current sync point tracking and start afresh,
+		 * any attempt to wait upon a previous sync point
+		 * will be skipped as the fence was signaled.
+		 */
+		i915_syncmap_free(&timeline->sync);
+	}
+}
+
+void i915_timeline_fini(struct i915_timeline *timeline)
+{
+	GEM_BUG_ON(!list_empty(&timeline->requests));
+
+	i915_syncmap_free(&timeline->sync);
+
+	list_del(&timeline->link);
+}
+
+struct i915_timeline *
+i915_timeline_create(struct drm_i915_private *i915, const char *name)
+{
+	struct i915_timeline *timeline;
+
+	timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
+	if (!timeline)
+		return ERR_PTR(-ENOMEM);
+
+	i915_timeline_init(i915, timeline, name);
+	kref_init(&timeline->kref);
+
+	return timeline;
+}
+
+void __i915_timeline_free(struct kref *kref)
+{
+	struct i915_timeline *timeline =
+		container_of(kref, typeof(*timeline), kref);
+
+	i915_timeline_fini(timeline);
+	kfree(timeline);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_timeline.c"
+#include "selftests/i915_timeline.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
new file mode 100644
index 0000000..dc2a463
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_timeline.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef I915_TIMELINE_H
+#define I915_TIMELINE_H
+
+#include <linux/list.h>
+#include <linux/kref.h>
+
+#include "i915_request.h"
+#include "i915_syncmap.h"
+#include "i915_utils.h"
+
+struct i915_timeline {
+	u64 fence_context;
+	u32 seqno;
+
+	spinlock_t lock;
+
+	/**
+	 * List of breadcrumbs associated with GPU requests currently
+	 * outstanding.
+	 */
+	struct list_head requests;
+
+	/* Contains an RCU guarded pointer to the last request. No reference is
+	 * held to the request, users must carefully acquire a reference to
+	 * the request using i915_gem_active_get_request_rcu(), or hold the
+	 * struct_mutex.
+	 */
+	struct i915_gem_active last_request;
+
+	/**
+	 * We track the most recent seqno that we wait on in every context so
+	 * that we only have to emit a new await and dependency on a more
+	 * recent sync point. As the contexts may be executed out-of-order, we
+	 * have to track each individually and can not rely on an absolute
+	 * global_seqno. When we know that all tracked fences are completed
+	 * (i.e. when the driver is idle), we know that the syncmap is
+	 * redundant and we can discard it without loss of generality.
+	 */
+	struct i915_syncmap *sync;
+	/**
+	 * Separately to the inter-context seqno map above, we track the last
+	 * barrier (e.g. semaphore wait) to the global engine timelines. Note
+	 * that this tracks global_seqno rather than the context.seqno, and
+	 * so it is subject to the limitations of hw wraparound and that we
+	 * may need to revoke global_seqno (on pre-emption).
+	 */
+	u32 global_sync[I915_NUM_ENGINES];
+
+	struct list_head link;
+	const char *name;
+
+	struct kref kref;
+};
+
+void i915_timeline_init(struct drm_i915_private *i915,
+			struct i915_timeline *tl,
+			const char *name);
+void i915_timeline_fini(struct i915_timeline *tl);
+
+struct i915_timeline *
+i915_timeline_create(struct drm_i915_private *i915, const char *name);
+
+static inline struct i915_timeline *
+i915_timeline_get(struct i915_timeline *timeline)
+{
+	kref_get(&timeline->kref);
+	return timeline;
+}
+
+void __i915_timeline_free(struct kref *kref);
+static inline void i915_timeline_put(struct i915_timeline *timeline)
+{
+	kref_put(&timeline->kref, __i915_timeline_free);
+}
+
+static inline int __i915_timeline_sync_set(struct i915_timeline *tl,
+					   u64 context, u32 seqno)
+{
+	return i915_syncmap_set(&tl->sync, context, seqno);
+}
+
+static inline int i915_timeline_sync_set(struct i915_timeline *tl,
+					 const struct dma_fence *fence)
+{
+	return __i915_timeline_sync_set(tl, fence->context, fence->seqno);
+}
+
+static inline bool __i915_timeline_sync_is_later(struct i915_timeline *tl,
+						 u64 context, u32 seqno)
+{
+	return i915_syncmap_is_later(&tl->sync, context, seqno);
+}
+
+static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl,
+					       const struct dma_fence *fence)
+{
+	return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno);
+}
+
+void i915_timelines_park(struct drm_i915_private *i915);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 408827b..8cc3a25 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -679,45 +679,68 @@
 	     TP_ARGS(rq)
 );
 
-DECLARE_EVENT_CLASS(i915_request_hw,
-		    TP_PROTO(struct i915_request *rq, unsigned int port),
-		    TP_ARGS(rq, port),
+TRACE_EVENT(i915_request_in,
+	    TP_PROTO(struct i915_request *rq, unsigned int port),
+	    TP_ARGS(rq, port),
 
-		    TP_STRUCT__entry(
-				     __field(u32, dev)
-				     __field(u32, hw_id)
-				     __field(u32, ring)
-				     __field(u32, ctx)
-				     __field(u32, seqno)
-				     __field(u32, global_seqno)
-				     __field(u32, port)
-				    ),
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u32, hw_id)
+			     __field(u32, ring)
+			     __field(u32, ctx)
+			     __field(u32, seqno)
+			     __field(u32, global_seqno)
+			     __field(u32, port)
+			     __field(u32, prio)
+			    ),
 
-		    TP_fast_assign(
-				   __entry->dev = rq->i915->drm.primary->index;
-				   __entry->hw_id = rq->ctx->hw_id;
-				   __entry->ring = rq->engine->id;
-				   __entry->ctx = rq->fence.context;
-				   __entry->seqno = rq->fence.seqno;
-				   __entry->global_seqno = rq->global_seqno;
-				   __entry->port = port;
-				  ),
+	    TP_fast_assign(
+			   __entry->dev = rq->i915->drm.primary->index;
+			   __entry->hw_id = rq->ctx->hw_id;
+			   __entry->ring = rq->engine->id;
+			   __entry->ctx = rq->fence.context;
+			   __entry->seqno = rq->fence.seqno;
+			   __entry->global_seqno = rq->global_seqno;
+			   __entry->prio = rq->sched.attr.priority;
+			   __entry->port = port;
+			   ),
 
-		    TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u",
+	    TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, prio=%u, global=%u, port=%u",
+		      __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx,
+		      __entry->seqno, __entry->prio, __entry->global_seqno,
+		      __entry->port)
+);
+
+TRACE_EVENT(i915_request_out,
+	    TP_PROTO(struct i915_request *rq),
+	    TP_ARGS(rq),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u32, hw_id)
+			     __field(u32, ring)
+			     __field(u32, ctx)
+			     __field(u32, seqno)
+			     __field(u32, global_seqno)
+			     __field(u32, completed)
+			    ),
+
+	    TP_fast_assign(
+			   __entry->dev = rq->i915->drm.primary->index;
+			   __entry->hw_id = rq->ctx->hw_id;
+			   __entry->ring = rq->engine->id;
+			   __entry->ctx = rq->fence.context;
+			   __entry->seqno = rq->fence.seqno;
+			   __entry->global_seqno = rq->global_seqno;
+			   __entry->completed = i915_request_completed(rq);
+			   ),
+
+		    TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, completed?=%u",
 			      __entry->dev, __entry->hw_id, __entry->ring,
 			      __entry->ctx, __entry->seqno,
-			      __entry->global_seqno, __entry->port)
+			      __entry->global_seqno, __entry->completed)
 );
 
-DEFINE_EVENT(i915_request_hw, i915_request_in,
-	     TP_PROTO(struct i915_request *rq, unsigned int port),
-	     TP_ARGS(rq, port)
-);
-
-DEFINE_EVENT(i915_request, i915_request_out,
-	     TP_PROTO(struct i915_request *rq),
-	     TP_ARGS(rq)
-);
 #else
 #if !defined(TRACE_HEADER_MULTI_READ)
 static inline void
@@ -811,42 +834,6 @@
 	    TP_ARGS(rq)
 );
 
-TRACE_EVENT(i915_flip_request,
-	    TP_PROTO(int plane, struct drm_i915_gem_object *obj),
-
-	    TP_ARGS(plane, obj),
-
-	    TP_STRUCT__entry(
-		    __field(int, plane)
-		    __field(struct drm_i915_gem_object *, obj)
-		    ),
-
-	    TP_fast_assign(
-		    __entry->plane = plane;
-		    __entry->obj = obj;
-		    ),
-
-	    TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj)
-);
-
-TRACE_EVENT(i915_flip_complete,
-	    TP_PROTO(int plane, struct drm_i915_gem_object *obj),
-
-	    TP_ARGS(plane, obj),
-
-	    TP_STRUCT__entry(
-		    __field(int, plane)
-		    __field(struct drm_i915_gem_object *, obj)
-		    ),
-
-	    TP_fast_assign(
-		    __entry->plane = plane;
-		    __entry->obj = obj;
-		    ),
-
-	    TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj)
-);
-
 TRACE_EVENT_CONDITION(i915_reg_rw,
 	TP_PROTO(bool write, i915_reg_t reg, u64 val, int len, bool trace),
 
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 51dbfe5..00165ad 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -40,8 +40,8 @@
 #undef WARN_ON_ONCE
 #define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")")
 
-#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \
-			     (long)(x), __func__)
+#define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \
+			     __stringify(x), (long)(x))
 
 #if GCC_VERSION >= 70000
 #define add_overflows(A, B) \
@@ -120,6 +120,12 @@
 
 #include <linux/list.h>
 
+static inline int list_is_first(const struct list_head *list,
+				const struct list_head *head)
+{
+	return head->next == list;
+}
+
 static inline void __list_del_many(struct list_head *head,
 				   struct list_head *first)
 {
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4bda3bd..9324d47 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -46,8 +46,6 @@
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
-	if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
-		WARN_ON(i915_vma_unbind(vma));
 
 	GEM_BUG_ON(!i915_gem_object_is_active(obj));
 	if (--obj->active_count)
@@ -232,7 +230,6 @@
 	if (!vma)
 		vma = vma_create(obj, vm, view);
 
-	GEM_BUG_ON(!IS_ERR(vma) && i915_vma_is_closed(vma));
 	GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
 	GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma);
 	return vma;
@@ -684,13 +681,43 @@
 	return ret;
 }
 
-static void i915_vma_destroy(struct i915_vma *vma)
+void i915_vma_close(struct i915_vma *vma)
+{
+	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+
+	GEM_BUG_ON(i915_vma_is_closed(vma));
+	vma->flags |= I915_VMA_CLOSED;
+
+	/*
+	 * We defer actually closing, unbinding and destroying the VMA until
+	 * the next idle point, or if the object is freed in the meantime. By
+	 * postponing the unbind, we allow for it to be resurrected by the
+	 * client, avoiding the work required to rebind the VMA. This is
+	 * advantageous for DRI, where the client/server pass objects
+	 * between themselves, temporarily opening a local VMA to the
+	 * object, and then closing it again. The same object is then reused
+	 * on the next frame (or two, depending on the depth of the swap queue)
+	 * causing us to rebind the VMA once more. This ends up being a lot
+	 * of wasted work for the steady state.
+	 */
+	list_add_tail(&vma->closed_link, &vma->vm->i915->gt.closed_vma);
+}
+
+void i915_vma_reopen(struct i915_vma *vma)
+{
+	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
+
+	if (vma->flags & I915_VMA_CLOSED) {
+		vma->flags &= ~I915_VMA_CLOSED;
+		list_del(&vma->closed_link);
+	}
+}
+
+static void __i915_vma_destroy(struct i915_vma *vma)
 {
 	int i;
 
 	GEM_BUG_ON(vma->node.allocated);
-	GEM_BUG_ON(i915_vma_is_active(vma));
-	GEM_BUG_ON(!i915_vma_is_closed(vma));
 	GEM_BUG_ON(vma->fence);
 
 	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
@@ -699,6 +726,7 @@
 
 	list_del(&vma->obj_link);
 	list_del(&vma->vm_link);
+	rb_erase(&vma->obj_node, &vma->obj->vma_tree);
 
 	if (!i915_vma_is_ggtt(vma))
 		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
@@ -706,15 +734,30 @@
 	kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma);
 }
 
-void i915_vma_close(struct i915_vma *vma)
+void i915_vma_destroy(struct i915_vma *vma)
 {
-	GEM_BUG_ON(i915_vma_is_closed(vma));
-	vma->flags |= I915_VMA_CLOSED;
+	lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
 
-	rb_erase(&vma->obj_node, &vma->obj->vma_tree);
+	GEM_BUG_ON(i915_vma_is_active(vma));
+	GEM_BUG_ON(i915_vma_is_pinned(vma));
 
-	if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
-		WARN_ON(i915_vma_unbind(vma));
+	if (i915_vma_is_closed(vma))
+		list_del(&vma->closed_link);
+
+	WARN_ON(i915_vma_unbind(vma));
+	__i915_vma_destroy(vma);
+}
+
+void i915_vma_parked(struct drm_i915_private *i915)
+{
+	struct i915_vma *vma, *next;
+
+	list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) {
+		GEM_BUG_ON(!i915_vma_is_closed(vma));
+		i915_vma_destroy(vma);
+	}
+
+	GEM_BUG_ON(!list_empty(&i915->gt.closed_vma));
 }
 
 static void __i915_vma_iounmap(struct i915_vma *vma)
@@ -804,7 +847,7 @@
 		return -EBUSY;
 
 	if (!drm_mm_node_allocated(&vma->node))
-		goto destroy;
+		return 0;
 
 	GEM_BUG_ON(obj->bind_count == 0);
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
@@ -841,10 +884,6 @@
 
 	i915_vma_remove(vma);
 
-destroy:
-	if (unlikely(i915_vma_is_closed(vma)))
-		i915_vma_destroy(vma);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 8c50220..fc4294c 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -119,6 +119,8 @@
 	/** This vma's place in the eviction list */
 	struct list_head evict_link;
 
+	struct list_head closed_link;
+
 	/**
 	 * Used for performing relocations during execbuffer insertion.
 	 */
@@ -285,6 +287,8 @@
 int __must_check i915_vma_unbind(struct i915_vma *vma);
 void i915_vma_unlink_ctx(struct i915_vma *vma);
 void i915_vma_close(struct i915_vma *vma);
+void i915_vma_reopen(struct i915_vma *vma);
+void i915_vma_destroy(struct i915_vma *vma);
 
 int __i915_vma_do_pin(struct i915_vma *vma,
 		      u64 size, u64 alignment, u64 flags);
@@ -408,6 +412,8 @@
 		__i915_vma_unpin_fence(vma);
 }
 
+void i915_vma_parked(struct drm_i915_private *i915);
+
 #define for_each_until(cond) if (cond) break; else
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c
index e9fb6920..40285d1 100644
--- a/drivers/gpu/drm/i915/intel_atomic.c
+++ b/drivers/gpu/drm/i915/intel_atomic.c
@@ -227,6 +227,7 @@
 	struct intel_crtc_scaler_state *scaler_state =
 		&crtc_state->scaler_state;
 	struct drm_atomic_state *drm_state = crtc_state->base.state;
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(drm_state);
 	int num_scalers_need;
 	int i, j;
 
@@ -304,8 +305,8 @@
 				continue;
 			}
 
-			plane_state = intel_atomic_get_existing_plane_state(drm_state,
-									    intel_plane);
+			plane_state = intel_atomic_get_new_plane_state(intel_state,
+								       intel_plane);
 			scaler_id = &plane_state->scaler_id;
 		}
 
@@ -328,8 +329,18 @@
 		}
 
 		/* set scaler mode */
-		if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) {
-			scaler_state->scalers[*scaler_id].mode = 0;
+		if ((INTEL_GEN(dev_priv) >= 9) &&
+		    plane_state && plane_state->base.fb &&
+		    plane_state->base.fb->format->format ==
+		    DRM_FORMAT_NV12) {
+			if (INTEL_GEN(dev_priv) == 9 &&
+			    !IS_GEMINILAKE(dev_priv) &&
+			    !IS_SKYLAKE(dev_priv))
+				scaler_state->scalers[*scaler_id].mode =
+					SKL_PS_SCALER_MODE_NV12;
+			else
+				scaler_state->scalers[*scaler_id].mode =
+					PS_SCALER_MODE_PLANAR;
 		} else if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) {
 			/*
 			 * when only 1 scaler is in use on either pipe A or B,
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index 7481ce8..6d06878 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -183,11 +183,16 @@
 	}
 
 	/* FIXME pre-g4x don't work like this */
-	if (intel_state->base.visible)
+	if (state->visible)
 		crtc_state->active_planes |= BIT(intel_plane->id);
 	else
 		crtc_state->active_planes &= ~BIT(intel_plane->id);
 
+	if (state->visible && state->fb->format->format == DRM_FORMAT_NV12)
+		crtc_state->nv12_planes |= BIT(intel_plane->id);
+	else
+		crtc_state->nv12_planes &= ~BIT(intel_plane->id);
+
 	return intel_plane_atomic_calc_changes(old_crtc_state,
 					       &crtc_state->base,
 					       old_plane_state,
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 447b721..54270bd 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -530,6 +530,7 @@
 	 */
 	if (!driver->drrs_enabled)
 		dev_priv->vbt.drrs_type = DRRS_NOT_SUPPORTED;
+	dev_priv->vbt.psr.enable = driver->psr_enabled;
 }
 
 static void
@@ -1215,10 +1216,8 @@
 {
 	struct child_device_config *it, *child = NULL;
 	struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port];
-	uint8_t hdmi_level_shift;
 	int i, j;
 	bool is_dvi, is_hdmi, is_dp, is_edp, is_crt;
-	uint8_t aux_channel, ddc_pin;
 	/* Each DDI port can have more than one value on the "DVO Port" field,
 	 * so look for all the possible values for each port.
 	 */
@@ -1255,8 +1254,6 @@
 	if (!child)
 		return;
 
-	aux_channel = child->aux_channel;
-
 	is_dvi = child->device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING;
 	is_dp = child->device_type & DEVICE_TYPE_DISPLAYPORT_OUTPUT;
 	is_crt = child->device_type & DEVICE_TYPE_ANALOG_OUTPUT;
@@ -1270,13 +1267,6 @@
 		is_hdmi = false;
 	}
 
-	if (port == PORT_A && is_dvi) {
-		DRM_DEBUG_KMS("VBT claims port A supports DVI%s, ignoring\n",
-			      is_hdmi ? "/HDMI" : "");
-		is_dvi = false;
-		is_hdmi = false;
-	}
-
 	info->supports_dvi = is_dvi;
 	info->supports_hdmi = is_hdmi;
 	info->supports_dp = is_dp;
@@ -1302,6 +1292,8 @@
 		DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port));
 
 	if (is_dvi) {
+		u8 ddc_pin;
+
 		ddc_pin = map_ddc_pin(dev_priv, child->ddc_pin);
 		if (intel_gmbus_is_valid_pin(dev_priv, ddc_pin)) {
 			info->alternate_ddc_pin = ddc_pin;
@@ -1314,14 +1306,14 @@
 	}
 
 	if (is_dp) {
-		info->alternate_aux_channel = aux_channel;
+		info->alternate_aux_channel = child->aux_channel;
 
 		sanitize_aux_ch(dev_priv, port);
 	}
 
 	if (bdb_version >= 158) {
 		/* The VBT HDMI level shift values match the table we have. */
-		hdmi_level_shift = child->hdmi_level_shifter_value;
+		u8 hdmi_level_shift = child->hdmi_level_shifter_value;
 		DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n",
 			      port_name(port),
 			      hdmi_level_shift);
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 1f79e7a..18e643d 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -82,7 +82,7 @@
 
 static noinline void missed_breadcrumb(struct intel_engine_cs *engine)
 {
-	if (drm_debug & DRM_UT_DRIVER) {
+	if (GEM_SHOW_DEBUG()) {
 		struct drm_printer p = drm_debug_printer(__func__);
 
 		intel_engine_dump(engine, &p,
@@ -130,11 +130,12 @@
 
 static void intel_breadcrumbs_fake_irq(struct timer_list *t)
 {
-	struct intel_engine_cs *engine = from_timer(engine, t,
-						    breadcrumbs.fake_irq);
+	struct intel_engine_cs *engine =
+		from_timer(engine, t, breadcrumbs.fake_irq);
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-	/* The timer persists in case we cannot enable interrupts,
+	/*
+	 * The timer persists in case we cannot enable interrupts,
 	 * or if we have previously seen seqno/interrupt incoherency
 	 * ("missed interrupt" syndrome, better known as a "missed breadcrumb").
 	 * Here the worker will wake up every jiffie in order to kick the
@@ -148,6 +149,12 @@
 	if (!b->irq_armed)
 		return;
 
+	/* If the user has disabled the fake-irq, restore the hangchecking */
+	if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) {
+		mod_timer(&b->hangcheck, wait_timeout());
+		return;
+	}
+
 	mod_timer(&b->fake_irq, jiffies + 1);
 }
 
@@ -730,10 +737,11 @@
 	list_add(&request->signaling.link, &iter->signaling.link);
 }
 
-void intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
+bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	struct intel_wait *wait = &request->signaling.wait;
 	u32 seqno;
 
 	/*
@@ -750,12 +758,12 @@
 
 	seqno = i915_request_global_seqno(request);
 	if (!seqno) /* will be enabled later upon execution */
-		return;
+		return true;
 
-	GEM_BUG_ON(request->signaling.wait.seqno);
-	request->signaling.wait.tsk = b->signaler;
-	request->signaling.wait.request = request;
-	request->signaling.wait.seqno = seqno;
+	GEM_BUG_ON(wait->seqno);
+	wait->tsk = b->signaler;
+	wait->request = request;
+	wait->seqno = seqno;
 
 	/*
 	 * Add ourselves into the list of waiters, but registering our
@@ -768,11 +776,15 @@
 	 */
 	spin_lock(&b->rb_lock);
 	insert_signal(b, request, seqno);
-	wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
+	wakeup &= __intel_engine_add_wait(engine, wait);
 	spin_unlock(&b->rb_lock);
 
-	if (wakeup)
+	if (wakeup) {
 		wake_up_process(b->signaler);
+		return !intel_wait_complete(wait);
+	}
+
+	return true;
 }
 
 void intel_engine_cancel_signaling(struct i915_request *request)
@@ -826,8 +838,8 @@
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
+	del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */
 	del_timer_sync(&b->hangcheck);
-	del_timer_sync(&b->fake_irq);
 	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
 }
 
@@ -835,15 +847,22 @@
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-	cancel_fake_irq(engine);
 	spin_lock_irq(&b->irq_lock);
 
+	/*
+	 * Leave the fake_irq timer enabled (if it is running), but clear the
+	 * bit so that it turns itself off on its next wake up and goes back
+	 * to the long hangcheck interval if still required.
+	 */
+	clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+
 	if (b->irq_enabled)
 		irq_enable(engine);
 	else
 		irq_disable(engine);
 
-	/* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the
+	/*
+	 * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the
 	 * GPU is active and may have already executed the MI_USER_INTERRUPT
 	 * before the CPU is ready to receive. However, the engine is currently
 	 * idle (we haven't started it yet), there is no possibility for a
@@ -852,9 +871,6 @@
 	 */
 	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
-	if (b->irq_armed)
-		enable_fake_irq(b);
-
 	spin_unlock_irq(&b->irq_lock);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index c0a8805..de0e223 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -748,6 +748,11 @@
 		      connector->base.id, connector->name,
 		      force);
 
+	if (i915_modparams.load_detect_test) {
+		intel_display_power_get(dev_priv, intel_encoder->power_domain);
+		goto load_detect;
+	}
+
 	/* Skip machines without VGA that falsely report hotplug events */
 	if (dmi_check_system(intel_spurious_crt_detect))
 		return connector_status_disconnected;
@@ -776,11 +781,12 @@
 	 * broken monitor (without edid) to work behind a broken kvm (that fails
 	 * to have the right resistors for HP detection) needs to fix this up.
 	 * For now just bail out. */
-	if (I915_HAS_HOTPLUG(dev_priv) && !i915_modparams.load_detect_test) {
+	if (I915_HAS_HOTPLUG(dev_priv)) {
 		status = connector_status_disconnected;
 		goto out;
 	}
 
+load_detect:
 	if (!force) {
 		status = connector->status;
 		goto out;
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index f9550ea..cf9b600 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -298,7 +298,10 @@
 
 	csr->version = css_header->version;
 
-	if (IS_CANNONLAKE(dev_priv)) {
+	if (csr->fw_path == i915_modparams.dmc_firmware_path) {
+		/* Bypass version check for firmware override. */
+		required_version = csr->version;
+	} else if (IS_CANNONLAKE(dev_priv)) {
 		required_version = CNL_CSR_VERSION_REQUIRED;
 	} else if (IS_GEMINILAKE(dev_priv)) {
 		required_version = GLK_CSR_VERSION_REQUIRED;
@@ -453,7 +456,9 @@
 	if (!HAS_CSR(dev_priv))
 		return;
 
-	if (IS_CANNONLAKE(dev_priv))
+	if (i915_modparams.dmc_firmware_path)
+		csr->fw_path = i915_modparams.dmc_firmware_path;
+	else if (IS_CANNONLAKE(dev_priv))
 		csr->fw_path = I915_CSR_CNL;
 	else if (IS_GEMINILAKE(dev_priv))
 		csr->fw_path = I915_CSR_GLK;
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 8c2d778..b98ac054 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -493,6 +493,125 @@
 	{ 0x2, 0x7F, 0x3F, 0x00, 0x00 },	/* 400   400      0.0   */
 };
 
+struct icl_combo_phy_ddi_buf_trans {
+	u32 dw2_swing_select;
+	u32 dw2_swing_scalar;
+	u32 dw4_scaling;
+};
+
+/* Voltage Swing Programming for VccIO 0.85V for DP */
+static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_0_85V[] = {
+				/* Voltage mV  db    */
+	{ 0x2, 0x98, 0x0018 },	/* 400         0.0   */
+	{ 0x2, 0x98, 0x3015 },	/* 400         3.5   */
+	{ 0x2, 0x98, 0x6012 },	/* 400         6.0   */
+	{ 0x2, 0x98, 0x900F },	/* 400         9.5   */
+	{ 0xB, 0x70, 0x0018 },	/* 600         0.0   */
+	{ 0xB, 0x70, 0x3015 },	/* 600         3.5   */
+	{ 0xB, 0x70, 0x6012 },	/* 600         6.0   */
+	{ 0x5, 0x00, 0x0018 },	/* 800         0.0   */
+	{ 0x5, 0x00, 0x3015 },	/* 800         3.5   */
+	{ 0x6, 0x98, 0x0018 },	/* 1200        0.0   */
+};
+
+/* FIXME - After table is updated in Bspec */
+/* Voltage Swing Programming for VccIO 0.85V for eDP */
+static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_0_85V[] = {
+				/* Voltage mV  db    */
+	{ 0x0, 0x00, 0x00 },	/* 200         0.0   */
+	{ 0x0, 0x00, 0x00 },	/* 200         1.5   */
+	{ 0x0, 0x00, 0x00 },	/* 200         4.0   */
+	{ 0x0, 0x00, 0x00 },	/* 200         6.0   */
+	{ 0x0, 0x00, 0x00 },	/* 250         0.0   */
+	{ 0x0, 0x00, 0x00 },	/* 250         1.5   */
+	{ 0x0, 0x00, 0x00 },	/* 250         4.0   */
+	{ 0x0, 0x00, 0x00 },	/* 300         0.0   */
+	{ 0x0, 0x00, 0x00 },	/* 300         1.5   */
+	{ 0x0, 0x00, 0x00 },	/* 350         0.0   */
+};
+
+/* Voltage Swing Programming for VccIO 0.95V for DP */
+static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_0_95V[] = {
+				/* Voltage mV  db    */
+	{ 0x2, 0x98, 0x0018 },	/* 400         0.0   */
+	{ 0x2, 0x98, 0x3015 },	/* 400         3.5   */
+	{ 0x2, 0x98, 0x6012 },	/* 400         6.0   */
+	{ 0x2, 0x98, 0x900F },	/* 400         9.5   */
+	{ 0x4, 0x98, 0x0018 },	/* 600         0.0   */
+	{ 0x4, 0x98, 0x3015 },	/* 600         3.5   */
+	{ 0x4, 0x98, 0x6012 },	/* 600         6.0   */
+	{ 0x5, 0x76, 0x0018 },	/* 800         0.0   */
+	{ 0x5, 0x76, 0x3015 },	/* 800         3.5   */
+	{ 0x6, 0x98, 0x0018 },	/* 1200        0.0   */
+};
+
+/* FIXME - After table is updated in Bspec */
+/* Voltage Swing Programming for VccIO 0.95V for eDP */
+static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_0_95V[] = {
+				/* Voltage mV  db    */
+	{ 0x0, 0x00, 0x00 },	/* 200         0.0   */
+	{ 0x0, 0x00, 0x00 },	/* 200         1.5   */
+	{ 0x0, 0x00, 0x00 },	/* 200         4.0   */
+	{ 0x0, 0x00, 0x00 },	/* 200         6.0   */
+	{ 0x0, 0x00, 0x00 },	/* 250         0.0   */
+	{ 0x0, 0x00, 0x00 },	/* 250         1.5   */
+	{ 0x0, 0x00, 0x00 },	/* 250         4.0   */
+	{ 0x0, 0x00, 0x00 },	/* 300         0.0   */
+	{ 0x0, 0x00, 0x00 },	/* 300         1.5   */
+	{ 0x0, 0x00, 0x00 },	/* 350         0.0   */
+};
+
+/* Voltage Swing Programming for VccIO 1.05V for DP */
+static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_1_05V[] = {
+				/* Voltage mV  db    */
+	{ 0x2, 0x98, 0x0018 },	/* 400         0.0   */
+	{ 0x2, 0x98, 0x3015 },	/* 400         3.5   */
+	{ 0x2, 0x98, 0x6012 },	/* 400         6.0   */
+	{ 0x2, 0x98, 0x900F },	/* 400         9.5   */
+	{ 0x4, 0x98, 0x0018 },	/* 600         0.0   */
+	{ 0x4, 0x98, 0x3015 },	/* 600         3.5   */
+	{ 0x4, 0x98, 0x6012 },	/* 600         6.0   */
+	{ 0x5, 0x71, 0x0018 },	/* 800         0.0   */
+	{ 0x5, 0x71, 0x3015 },	/* 800         3.5   */
+	{ 0x6, 0x98, 0x0018 },	/* 1200        0.0   */
+};
+
+/* FIXME - After table is updated in Bspec */
+/* Voltage Swing Programming for VccIO 1.05V for eDP */
+static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_1_05V[] = {
+				/* Voltage mV  db    */
+	{ 0x0, 0x00, 0x00 },	/* 200         0.0   */
+	{ 0x0, 0x00, 0x00 },	/* 200         1.5   */
+	{ 0x0, 0x00, 0x00 },	/* 200         4.0   */
+	{ 0x0, 0x00, 0x00 },	/* 200         6.0   */
+	{ 0x0, 0x00, 0x00 },	/* 250         0.0   */
+	{ 0x0, 0x00, 0x00 },	/* 250         1.5   */
+	{ 0x0, 0x00, 0x00 },	/* 250         4.0   */
+	{ 0x0, 0x00, 0x00 },	/* 300         0.0   */
+	{ 0x0, 0x00, 0x00 },	/* 300         1.5   */
+	{ 0x0, 0x00, 0x00 },	/* 350         0.0   */
+};
+
+struct icl_mg_phy_ddi_buf_trans {
+	u32 cri_txdeemph_override_5_0;
+	u32 cri_txdeemph_override_11_6;
+	u32 cri_txdeemph_override_17_12;
+};
+
+static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations[] = {
+				/* Voltage swing  pre-emphasis */
+	{ 0x0, 0x1B, 0x00 },	/* 0              0   */
+	{ 0x0, 0x23, 0x08 },	/* 0              1   */
+	{ 0x0, 0x2D, 0x12 },	/* 0              2   */
+	{ 0x0, 0x00, 0x00 },	/* 0              3   */
+	{ 0x0, 0x23, 0x00 },	/* 1              0   */
+	{ 0x0, 0x2B, 0x09 },	/* 1              1   */
+	{ 0x0, 0x2E, 0x11 },	/* 1              2   */
+	{ 0x0, 0x2F, 0x00 },	/* 2              0   */
+	{ 0x0, 0x33, 0x0C },	/* 2              1   */
+	{ 0x0, 0x00, 0x00 },	/* 3              0   */
+};
+
 static const struct ddi_buf_trans *
 bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries)
 {
@@ -751,6 +870,45 @@
 	}
 }
 
+static const struct icl_combo_phy_ddi_buf_trans *
+icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, enum port port,
+			int type, int *n_entries)
+{
+	u32 voltage = I915_READ(ICL_PORT_COMP_DW3(port)) & VOLTAGE_INFO_MASK;
+
+	if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) {
+		switch (voltage) {
+		case VOLTAGE_INFO_0_85V:
+			*n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_85V);
+			return icl_combo_phy_ddi_translations_edp_0_85V;
+		case VOLTAGE_INFO_0_95V:
+			*n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_95V);
+			return icl_combo_phy_ddi_translations_edp_0_95V;
+		case VOLTAGE_INFO_1_05V:
+			*n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_1_05V);
+			return icl_combo_phy_ddi_translations_edp_1_05V;
+		default:
+			MISSING_CASE(voltage);
+			return NULL;
+		}
+	} else {
+		switch (voltage) {
+		case VOLTAGE_INFO_0_85V:
+			*n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_85V);
+			return icl_combo_phy_ddi_translations_dp_hdmi_0_85V;
+		case VOLTAGE_INFO_0_95V:
+			*n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_95V);
+			return icl_combo_phy_ddi_translations_dp_hdmi_0_95V;
+		case VOLTAGE_INFO_1_05V:
+			*n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_1_05V);
+			return icl_combo_phy_ddi_translations_dp_hdmi_1_05V;
+		default:
+			MISSING_CASE(voltage);
+			return NULL;
+		}
+	}
+}
+
 static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port)
 {
 	int n_entries, level, default_entry;
@@ -875,7 +1033,7 @@
 
 static uint32_t hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll)
 {
-	switch (pll->id) {
+	switch (pll->info->id) {
 	case DPLL_ID_WRPLL1:
 		return PORT_CLK_SEL_WRPLL1;
 	case DPLL_ID_WRPLL2:
@@ -889,11 +1047,30 @@
 	case DPLL_ID_LCPLL_2700:
 		return PORT_CLK_SEL_LCPLL_2700;
 	default:
-		MISSING_CASE(pll->id);
+		MISSING_CASE(pll->info->id);
 		return PORT_CLK_SEL_NONE;
 	}
 }
 
+static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder,
+				       const struct intel_shared_dpll *pll)
+{
+	const enum intel_dpll_id id = pll->info->id;
+
+	switch (id) {
+	default:
+		MISSING_CASE(id);
+	case DPLL_ID_ICL_DPLL0:
+	case DPLL_ID_ICL_DPLL1:
+		return DDI_CLK_SEL_NONE;
+	case DPLL_ID_ICL_MGPLL1:
+	case DPLL_ID_ICL_MGPLL2:
+	case DPLL_ID_ICL_MGPLL3:
+	case DPLL_ID_ICL_MGPLL4:
+		return DDI_CLK_SEL_MG;
+	}
+}
+
 /* Starting with Haswell, different DDI ports can work in FDI mode for
  * connection to the PCH-located connectors. For this, it is necessary to train
  * both the DDI port and PCH receiver for the desired DDI buffer settings.
@@ -1906,7 +2083,13 @@
 	enum port port = encoder->port;
 	int n_entries;
 
-	if (IS_CANNONLAKE(dev_priv)) {
+	if (IS_ICELAKE(dev_priv)) {
+		if (port == PORT_A || port == PORT_B)
+			icl_get_combo_buf_trans(dev_priv, port, encoder->type,
+						&n_entries);
+		else
+			n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations);
+	} else if (IS_CANNONLAKE(dev_priv)) {
 		if (encoder->type == INTEL_OUTPUT_EDP)
 			cnl_get_buf_trans_edp(dev_priv, &n_entries);
 		else
@@ -2063,6 +2246,146 @@
 	I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val);
 }
 
+static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv,
+					 u32 level, enum port port, int type)
+{
+	const struct icl_combo_phy_ddi_buf_trans *ddi_translations = NULL;
+	u32 n_entries, val;
+	int ln;
+
+	ddi_translations = icl_get_combo_buf_trans(dev_priv, port, type,
+						   &n_entries);
+	if (!ddi_translations)
+		return;
+
+	if (level >= n_entries) {
+		DRM_DEBUG_KMS("DDI translation not found for level %d. Using %d instead.", level, n_entries - 1);
+		level = n_entries - 1;
+	}
+
+	/* Set PORT_TX_DW5 Rterm Sel to 110b. */
+	val = I915_READ(ICL_PORT_TX_DW5_LN0(port));
+	val &= ~RTERM_SELECT_MASK;
+	val |= RTERM_SELECT(0x6);
+	I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val);
+
+	/* Program PORT_TX_DW5 */
+	val = I915_READ(ICL_PORT_TX_DW5_LN0(port));
+	/* Set DisableTap2 and DisableTap3 if MIPI DSI
+	 * Clear DisableTap2 and DisableTap3 for all other Ports
+	 */
+	if (type == INTEL_OUTPUT_DSI) {
+		val |= TAP2_DISABLE;
+		val |= TAP3_DISABLE;
+	} else {
+		val &= ~TAP2_DISABLE;
+		val &= ~TAP3_DISABLE;
+	}
+	I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val);
+
+	/* Program PORT_TX_DW2 */
+	val = I915_READ(ICL_PORT_TX_DW2_LN0(port));
+	val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK |
+		 RCOMP_SCALAR_MASK);
+	val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_select);
+	val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_select);
+	/* Program Rcomp scalar for every table entry */
+	val |= RCOMP_SCALAR(ddi_translations[level].dw2_swing_scalar);
+	I915_WRITE(ICL_PORT_TX_DW2_GRP(port), val);
+
+	/* Program PORT_TX_DW4 */
+	/* We cannot write to GRP. It would overwrite individual loadgen. */
+	for (ln = 0; ln <= 3; ln++) {
+		val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln));
+		val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK |
+			 CURSOR_COEFF_MASK);
+		val |= ddi_translations[level].dw4_scaling;
+		I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val);
+	}
+}
+
+static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder,
+					      u32 level,
+					      enum intel_output_type type)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	enum port port = encoder->port;
+	int width = 0;
+	int rate = 0;
+	u32 val;
+	int ln = 0;
+
+	if (type == INTEL_OUTPUT_HDMI) {
+		width = 4;
+		/* Rate is always < than 6GHz for HDMI */
+	} else {
+		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+
+		width = intel_dp->lane_count;
+		rate = intel_dp->link_rate;
+	}
+
+	/*
+	 * 1. If port type is eDP or DP,
+	 * set PORT_PCS_DW1 cmnkeeper_enable to 1b,
+	 * else clear to 0b.
+	 */
+	val = I915_READ(ICL_PORT_PCS_DW1_LN0(port));
+	if (type == INTEL_OUTPUT_HDMI)
+		val &= ~COMMON_KEEPER_EN;
+	else
+		val |= COMMON_KEEPER_EN;
+	I915_WRITE(ICL_PORT_PCS_DW1_GRP(port), val);
+
+	/* 2. Program loadgen select */
+	/*
+	 * Program PORT_TX_DW4_LN depending on Bit rate and used lanes
+	 * <= 6 GHz and 4 lanes (LN0=0, LN1=1, LN2=1, LN3=1)
+	 * <= 6 GHz and 1,2 lanes (LN0=0, LN1=1, LN2=1, LN3=0)
+	 * > 6 GHz (LN0=0, LN1=0, LN2=0, LN3=0)
+	 */
+	for (ln = 0; ln <= 3; ln++) {
+		val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln));
+		val &= ~LOADGEN_SELECT;
+
+		if ((rate <= 600000 && width == 4 && ln >= 1) ||
+		    (rate <= 600000 && width < 4 && (ln == 1 || ln == 2))) {
+			val |= LOADGEN_SELECT;
+		}
+		I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val);
+	}
+
+	/* 3. Set PORT_CL_DW5 SUS Clock Config to 11b */
+	val = I915_READ(ICL_PORT_CL_DW5(port));
+	val |= SUS_CLOCK_CONFIG;
+	I915_WRITE(ICL_PORT_CL_DW5(port), val);
+
+	/* 4. Clear training enable to change swing values */
+	val = I915_READ(ICL_PORT_TX_DW5_LN0(port));
+	val &= ~TX_TRAINING_EN;
+	I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val);
+
+	/* 5. Program swing and de-emphasis */
+	icl_ddi_combo_vswing_program(dev_priv, level, port, type);
+
+	/* 6. Set training enable to trigger update */
+	val = I915_READ(ICL_PORT_TX_DW5_LN0(port));
+	val |= TX_TRAINING_EN;
+	I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val);
+}
+
+static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level,
+				    enum intel_output_type type)
+{
+	enum port port = encoder->port;
+
+	if (port == PORT_A || port == PORT_B)
+		icl_combo_phy_ddi_vswing_sequence(encoder, level, type);
+	else
+		/* Not Implemented Yet */
+		WARN_ON(1);
+}
+
 static uint32_t translate_signal_level(int signal_levels)
 {
 	int i;
@@ -2094,7 +2417,9 @@
 	struct intel_encoder *encoder = &dport->base;
 	int level = intel_ddi_dp_level(intel_dp);
 
-	if (IS_CANNONLAKE(dev_priv))
+	if (IS_ICELAKE(dev_priv))
+		icl_ddi_vswing_sequence(encoder, level, encoder->type);
+	else if (IS_CANNONLAKE(dev_priv))
 		cnl_ddi_vswing_sequence(encoder, level, encoder->type);
 	else
 		bxt_ddi_vswing_sequence(encoder, level, encoder->type);
@@ -2115,6 +2440,69 @@
 	return DDI_BUF_TRANS_SELECT(level);
 }
 
+void icl_map_plls_to_ports(struct drm_crtc *crtc,
+			   struct intel_crtc_state *crtc_state,
+			   struct drm_atomic_state *old_state)
+{
+	struct intel_shared_dpll *pll = crtc_state->shared_dpll;
+	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+	struct drm_connector_state *conn_state;
+	struct drm_connector *conn;
+	int i;
+
+	for_each_new_connector_in_state(old_state, conn, conn_state, i) {
+		struct intel_encoder *encoder =
+			to_intel_encoder(conn_state->best_encoder);
+		enum port port = encoder->port;
+		uint32_t val;
+
+		if (conn_state->crtc != crtc)
+			continue;
+
+		mutex_lock(&dev_priv->dpll_lock);
+
+		val = I915_READ(DPCLKA_CFGCR0_ICL);
+		WARN_ON((val & DPCLKA_CFGCR0_DDI_CLK_OFF(port)) == 0);
+
+		if (port == PORT_A || port == PORT_B) {
+			val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port);
+			val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port);
+			I915_WRITE(DPCLKA_CFGCR0_ICL, val);
+			POSTING_READ(DPCLKA_CFGCR0_ICL);
+		}
+
+		val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port);
+		I915_WRITE(DPCLKA_CFGCR0_ICL, val);
+
+		mutex_unlock(&dev_priv->dpll_lock);
+	}
+}
+
+void icl_unmap_plls_to_ports(struct drm_crtc *crtc,
+			     struct intel_crtc_state *crtc_state,
+			     struct drm_atomic_state *old_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+	struct drm_connector_state *old_conn_state;
+	struct drm_connector *conn;
+	int i;
+
+	for_each_old_connector_in_state(old_state, conn, old_conn_state, i) {
+		struct intel_encoder *encoder =
+			to_intel_encoder(old_conn_state->best_encoder);
+		enum port port = encoder->port;
+
+		if (old_conn_state->crtc != crtc)
+			continue;
+
+		mutex_lock(&dev_priv->dpll_lock);
+		I915_WRITE(DPCLKA_CFGCR0_ICL,
+			   I915_READ(DPCLKA_CFGCR0_ICL) |
+			   DPCLKA_CFGCR0_DDI_CLK_OFF(port));
+		mutex_unlock(&dev_priv->dpll_lock);
+	}
+}
+
 static void intel_ddi_clk_select(struct intel_encoder *encoder,
 				 const struct intel_shared_dpll *pll)
 {
@@ -2127,11 +2515,15 @@
 
 	mutex_lock(&dev_priv->dpll_lock);
 
-	if (IS_CANNONLAKE(dev_priv)) {
+	if (IS_ICELAKE(dev_priv)) {
+		if (port >= PORT_C)
+			I915_WRITE(DDI_CLK_SEL(port),
+				   icl_pll_to_ddi_pll_sel(encoder, pll));
+	} else if (IS_CANNONLAKE(dev_priv)) {
 		/* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */
 		val = I915_READ(DPCLKA_CFGCR0);
 		val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port);
-		val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->id, port);
+		val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port);
 		I915_WRITE(DPCLKA_CFGCR0, val);
 
 		/*
@@ -2148,7 +2540,7 @@
 
 		val &= ~(DPLL_CTRL2_DDI_CLK_OFF(port) |
 			 DPLL_CTRL2_DDI_CLK_SEL_MASK(port));
-		val |= (DPLL_CTRL2_DDI_CLK_SEL(pll->id, port) |
+		val |= (DPLL_CTRL2_DDI_CLK_SEL(pll->info->id, port) |
 			DPLL_CTRL2_DDI_SEL_OVERRIDE(port));
 
 		I915_WRITE(DPLL_CTRL2, val);
@@ -2165,14 +2557,18 @@
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum port port = encoder->port;
 
-	if (IS_CANNONLAKE(dev_priv))
+	if (IS_ICELAKE(dev_priv)) {
+		if (port >= PORT_C)
+			I915_WRITE(DDI_CLK_SEL(port), DDI_CLK_SEL_NONE);
+	} else if (IS_CANNONLAKE(dev_priv)) {
 		I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) |
 			   DPCLKA_CFGCR0_DDI_CLK_OFF(port));
-	else if (IS_GEN9_BC(dev_priv))
+	} else if (IS_GEN9_BC(dev_priv)) {
 		I915_WRITE(DPLL_CTRL2, I915_READ(DPLL_CTRL2) |
 			   DPLL_CTRL2_DDI_CLK_OFF(port));
-	else if (INTEL_GEN(dev_priv) < 9)
+	} else if (INTEL_GEN(dev_priv) < 9) {
 		I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE);
+	}
 }
 
 static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
@@ -2197,7 +2593,9 @@
 
 	intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain);
 
-	if (IS_CANNONLAKE(dev_priv))
+	if (IS_ICELAKE(dev_priv))
+		icl_ddi_vswing_sequence(encoder, level, encoder->type);
+	else if (IS_CANNONLAKE(dev_priv))
 		cnl_ddi_vswing_sequence(encoder, level, encoder->type);
 	else if (IS_GEN9_LP(dev_priv))
 		bxt_ddi_vswing_sequence(encoder, level, encoder->type);
@@ -2205,7 +2603,8 @@
 		intel_prepare_dp_ddi_buffers(encoder, crtc_state);
 
 	intel_ddi_init_dp_buf_reg(encoder);
-	intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+	if (!is_mst)
+		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
 	intel_dp_start_link_train(intel_dp);
 	if (port != PORT_A || INTEL_GEN(dev_priv) >= 9)
 		intel_dp_stop_link_train(intel_dp);
@@ -2227,7 +2626,9 @@
 
 	intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain);
 
-	if (IS_CANNONLAKE(dev_priv))
+	if (IS_ICELAKE(dev_priv))
+		icl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI);
+	else if (IS_CANNONLAKE(dev_priv))
 		cnl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI);
 	else if (IS_GEN9_LP(dev_priv))
 		bxt_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI);
@@ -2303,12 +2704,15 @@
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
 	struct intel_dp *intel_dp = &dig_port->dp;
+	bool is_mst = intel_crtc_has_type(old_crtc_state,
+					  INTEL_OUTPUT_DP_MST);
 
 	/*
 	 * Power down sink before disabling the port, otherwise we end
 	 * up getting interrupts from the sink on detecting link loss.
 	 */
-	intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
+	if (!is_mst)
+		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
 
 	intel_disable_ddi_buf(encoder);
 
@@ -2424,12 +2828,14 @@
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base);
+	struct drm_connector *connector = conn_state->connector;
 	enum port port = encoder->port;
 
-	intel_hdmi_handle_sink_scrambling(encoder,
-					  conn_state->connector,
-					  crtc_state->hdmi_high_tmds_clock_ratio,
-					  crtc_state->hdmi_scrambling);
+	if (!intel_hdmi_handle_sink_scrambling(encoder, connector,
+					       crtc_state->hdmi_high_tmds_clock_ratio,
+					       crtc_state->hdmi_scrambling))
+		DRM_ERROR("[CONNECTOR:%d:%s] Failed to configure sink scrambling/TMDS bit clock ratio\n",
+			  connector->base.id, connector->name);
 
 	/* Display WA #1143: skl,kbl,cfl */
 	if (IS_GEN9_BC(dev_priv)) {
@@ -2520,13 +2926,16 @@
 				   const struct intel_crtc_state *old_crtc_state,
 				   const struct drm_connector_state *old_conn_state)
 {
+	struct drm_connector *connector = old_conn_state->connector;
+
 	if (old_crtc_state->has_audio)
 		intel_audio_codec_disable(encoder,
 					  old_crtc_state, old_conn_state);
 
-	intel_hdmi_handle_sink_scrambling(encoder,
-					  old_conn_state->connector,
-					  false, false);
+	if (!intel_hdmi_handle_sink_scrambling(encoder, connector,
+					       false, false))
+		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] Failed to reset sink scrambling/TMDS bit clock ratio\n",
+			      connector->base.id, connector->name);
 }
 
 static void intel_disable_ddi(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 3dd350f..0fd13df 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -83,11 +83,11 @@
 {
 	int s;
 
-	drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
-	drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
+	drm_printf(p, "slice total: %u, mask=%04x\n",
+		   hweight8(sseu->slice_mask), sseu->slice_mask);
 	drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
-	for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) {
-		drm_printf(p, "slice%d %u subslices mask=%04x\n",
+	for (s = 0; s < sseu->max_slices; s++) {
+		drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
 			   s, hweight8(sseu->subslice_mask[s]),
 			   sseu->subslice_mask[s]);
 	}
@@ -158,6 +158,45 @@
 	return total;
 }
 
+static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
+{
+	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
+	u8 s_en;
+	u32 ss_en, ss_en_mask;
+	u8 eu_en;
+	int s;
+
+	sseu->max_slices = 1;
+	sseu->max_subslices = 8;
+	sseu->max_eus_per_subslice = 8;
+
+	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
+	ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
+	ss_en_mask = BIT(sseu->max_subslices) - 1;
+	eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
+
+	for (s = 0; s < sseu->max_slices; s++) {
+		if (s_en & BIT(s)) {
+			int ss_idx = sseu->max_subslices * s;
+			int ss;
+
+			sseu->slice_mask |= BIT(s);
+			sseu->subslice_mask[s] = (ss_en >> ss_idx) & ss_en_mask;
+			for (ss = 0; ss < sseu->max_subslices; ss++) {
+				if (sseu->subslice_mask[s] & BIT(ss))
+					sseu_set_eus(sseu, s, ss, eu_en);
+			}
+		}
+	}
+	sseu->eu_per_subslice = hweight8(eu_en);
+	sseu->eu_total = compute_eu_total(sseu);
+
+	/* ICL has no power gating restrictions. */
+	sseu->has_slice_pg = 1;
+	sseu->has_subslice_pg = 1;
+	sseu->has_eu_pg = 1;
+}
+
 static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
@@ -557,6 +596,52 @@
 	return base_freq + frac_freq;
 }
 
+static u32 gen10_get_crystal_clock_freq(struct drm_i915_private *dev_priv,
+					u32 rpm_config_reg)
+{
+	u32 f19_2_mhz = 19200;
+	u32 f24_mhz = 24000;
+	u32 crystal_clock = (rpm_config_reg &
+			     GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
+			    GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+
+	switch (crystal_clock) {
+	case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
+		return f19_2_mhz;
+	case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
+		return f24_mhz;
+	default:
+		MISSING_CASE(crystal_clock);
+		return 0;
+	}
+}
+
+static u32 gen11_get_crystal_clock_freq(struct drm_i915_private *dev_priv,
+					u32 rpm_config_reg)
+{
+	u32 f19_2_mhz = 19200;
+	u32 f24_mhz = 24000;
+	u32 f25_mhz = 25000;
+	u32 f38_4_mhz = 38400;
+	u32 crystal_clock = (rpm_config_reg &
+			     GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
+			    GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
+
+	switch (crystal_clock) {
+	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
+		return f24_mhz;
+	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
+		return f19_2_mhz;
+	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
+		return f38_4_mhz;
+	case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
+		return f25_mhz;
+	default:
+		MISSING_CASE(crystal_clock);
+		return 0;
+	}
+}
+
 static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv)
 {
 	u32 f12_5_mhz = 12500;
@@ -597,10 +682,9 @@
 		}
 
 		return freq;
-	} else if (INTEL_GEN(dev_priv) <= 10) {
+	} else if (INTEL_GEN(dev_priv) <= 11) {
 		u32 ctc_reg = I915_READ(CTC_MODE);
 		u32 freq = 0;
-		u32 rpm_config_reg = 0;
 
 		/* First figure out the reference frequency. There are 2 ways
 		 * we can compute the frequency, either through the
@@ -610,20 +694,14 @@
 		if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) {
 			freq = read_reference_ts_freq(dev_priv);
 		} else {
-			u32 crystal_clock;
+			u32 rpm_config_reg = I915_READ(RPM_CONFIG0);
 
-			rpm_config_reg = I915_READ(RPM_CONFIG0);
-			crystal_clock = (rpm_config_reg &
-					 GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
-				GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
-			switch (crystal_clock) {
-			case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
-				freq = f19_2_mhz;
-				break;
-			case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
-				freq = f24_mhz;
-				break;
-			}
+			if (INTEL_GEN(dev_priv) <= 10)
+				freq = gen10_get_crystal_clock_freq(dev_priv,
+								rpm_config_reg);
+			else
+				freq = gen11_get_crystal_clock_freq(dev_priv,
+								rpm_config_reg);
 
 			/* Now figure out how the command stream's timestamp
 			 * register increments from this frequency (it might
@@ -768,8 +846,10 @@
 		broadwell_sseu_info_init(dev_priv);
 	else if (INTEL_GEN(dev_priv) == 9)
 		gen9_sseu_info_init(dev_priv);
-	else if (INTEL_GEN(dev_priv) >= 10)
+	else if (INTEL_GEN(dev_priv) == 10)
 		gen10_sseu_info_init(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 11)
+		gen11_sseu_info_init(dev_priv);
 
 	/* Initialize command stream timestamp frequency */
 	info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
@@ -780,3 +860,50 @@
 {
 	drm_printf(p, "scheduler: %x\n", caps->scheduler);
 }
+
+/*
+ * Determine which engines are fused off in our particular hardware. Since the
+ * fuse register is in the blitter powerwell, we need forcewake to be ready at
+ * this point (but later we need to prune the forcewake domains for engines that
+ * are indeed fused off).
+ */
+void intel_device_info_init_mmio(struct drm_i915_private *dev_priv)
+{
+	struct intel_device_info *info = mkwrite_device_info(dev_priv);
+	u8 vdbox_disable, vebox_disable;
+	u32 media_fuse;
+	int i;
+
+	if (INTEL_GEN(dev_priv) < 11)
+		return;
+
+	media_fuse = I915_READ(GEN11_GT_VEBOX_VDBOX_DISABLE);
+
+	vdbox_disable = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK;
+	vebox_disable = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >>
+			GEN11_GT_VEBOX_DISABLE_SHIFT;
+
+	DRM_DEBUG_DRIVER("vdbox disable: %04x\n", vdbox_disable);
+	for (i = 0; i < I915_MAX_VCS; i++) {
+		if (!HAS_ENGINE(dev_priv, _VCS(i)))
+			continue;
+
+		if (!(BIT(i) & vdbox_disable))
+			continue;
+
+		info->ring_mask &= ~ENGINE_MASK(_VCS(i));
+		DRM_DEBUG_DRIVER("vcs%u fused off\n", i);
+	}
+
+	DRM_DEBUG_DRIVER("vebox disable: %04x\n", vebox_disable);
+	for (i = 0; i < I915_MAX_VECS; i++) {
+		if (!HAS_ENGINE(dev_priv, _VECS(i)))
+			continue;
+
+		if (!(BIT(i) & vebox_disable))
+			continue;
+
+		info->ring_mask &= ~ENGINE_MASK(_VECS(i));
+		DRM_DEBUG_DRIVER("vecs%u fused off\n", i);
+	}
+}
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 0835752..933e316 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -114,7 +114,7 @@
 	func(has_ipc);
 
 #define GEN_MAX_SLICES		(6) /* CNL upper bound */
-#define GEN_MAX_SUBSLICES	(7)
+#define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
 
 struct sseu_dev_info {
 	u8 slice_mask;
@@ -247,6 +247,8 @@
 void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
 				     struct drm_printer *p);
 
+void intel_device_info_init_mmio(struct drm_i915_private *dev_priv);
+
 void intel_driver_caps_print(const struct intel_driver_caps *caps,
 			     struct drm_printer *p);
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 56004ff..ad588d5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -88,6 +88,22 @@
 	DRM_FORMAT_VYUY,
 };
 
+static const uint32_t skl_pri_planar_formats[] = {
+	DRM_FORMAT_C8,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_XRGB2101010,
+	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_YVYU,
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_VYUY,
+	DRM_FORMAT_NV12,
+};
+
 static const uint64_t skl_format_modifiers_noccs[] = {
 	I915_FORMAT_MOD_Yf_TILED,
 	I915_FORMAT_MOD_Y_TILED,
@@ -488,6 +504,33 @@
 	.p2 = { .p2_slow = 1, .p2_fast = 20 },
 };
 
+static void
+skl_wa_528(struct drm_i915_private *dev_priv, int pipe, bool enable)
+{
+	if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))
+		return;
+
+	if (enable)
+		I915_WRITE(CHICKEN_PIPESL_1(pipe), HSW_FBCQ_DIS);
+	else
+		I915_WRITE(CHICKEN_PIPESL_1(pipe), 0);
+}
+
+static void
+skl_wa_clkgate(struct drm_i915_private *dev_priv, int pipe, bool enable)
+{
+	if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))
+		return;
+
+	if (enable)
+		I915_WRITE(CLKGATE_DIS_PSL(pipe),
+			   DUPS1_GATING_DIS | DUPS2_GATING_DIS);
+	else
+		I915_WRITE(CLKGATE_DIS_PSL(pipe),
+			   I915_READ(CLKGATE_DIS_PSL(pipe)) &
+			   ~(DUPS1_GATING_DIS | DUPS2_GATING_DIS));
+}
+
 static bool
 needs_modeset(const struct drm_crtc_state *state)
 {
@@ -2657,11 +2700,13 @@
 	}
 }
 
-static int skl_format_to_fourcc(int format, bool rgb_order, bool alpha)
+int skl_format_to_fourcc(int format, bool rgb_order, bool alpha)
 {
 	switch (format) {
 	case PLANE_CTL_FORMAT_RGB_565:
 		return DRM_FORMAT_RGB565;
+	case PLANE_CTL_FORMAT_NV12:
+		return DRM_FORMAT_NV12;
 	default:
 	case PLANE_CTL_FORMAT_XRGB_8888:
 		if (rgb_order) {
@@ -2824,7 +2869,7 @@
 			continue;
 
 		if (intel_plane_ggtt_offset(state) == plane_config->base) {
-			fb = c->primary->fb;
+			fb = state->base.fb;
 			drm_framebuffer_get(fb);
 			goto valid_fb;
 		}
@@ -2858,6 +2903,9 @@
 		return;
 	}
 
+	obj = intel_fb_obj(fb);
+	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
+
 	plane_state->src_x = 0;
 	plane_state->src_y = 0;
 	plane_state->src_w = fb->width << 16;
@@ -2871,7 +2919,6 @@
 	intel_state->base.src = drm_plane_state_src(plane_state);
 	intel_state->base.dst = drm_plane_state_dest(plane_state);
 
-	obj = intel_fb_obj(fb);
 	if (i915_gem_object_is_tiled(obj))
 		dev_priv->preserve_bios_swizzle = true;
 
@@ -3071,6 +3118,29 @@
 	return 0;
 }
 
+static int
+skl_check_nv12_surface(const struct intel_crtc_state *crtc_state,
+		       struct intel_plane_state *plane_state)
+{
+	/* Display WA #1106 */
+	if (plane_state->base.rotation !=
+	    (DRM_MODE_REFLECT_X | DRM_MODE_ROTATE_90) &&
+	    plane_state->base.rotation != DRM_MODE_ROTATE_270)
+		return 0;
+
+	/*
+	 * src coordinates are rotated here.
+	 * We check height but report it as width
+	 */
+	if (((drm_rect_height(&plane_state->base.src) >> 16) % 4) != 0) {
+		DRM_DEBUG_KMS("src width must be multiple "
+			      "of 4 for rotated NV12\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state)
 {
 	const struct drm_framebuffer *fb = plane_state->base.fb;
@@ -3154,6 +3224,9 @@
 	 * the main surface setup depends on it.
 	 */
 	if (fb->format->format == DRM_FORMAT_NV12) {
+		ret = skl_check_nv12_surface(crtc_state, plane_state);
+		if (ret)
+			return ret;
 		ret = skl_check_nv12_aux_surface(plane_state);
 		if (ret)
 			return ret;
@@ -3464,6 +3537,8 @@
 		return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_UYVY;
 	case DRM_FORMAT_VYUY:
 		return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_VYUY;
+	case DRM_FORMAT_NV12:
+		return PLANE_CTL_FORMAT_NV12;
 	default:
 		MISSING_CASE(pixel_format);
 	}
@@ -3602,15 +3677,24 @@
 u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state,
 			const struct intel_plane_state *plane_state)
 {
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
 	const struct drm_framebuffer *fb = plane_state->base.fb;
 	u32 plane_color_ctl = 0;
 
-	plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE;
-	plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE;
+	if (INTEL_GEN(dev_priv) < 11) {
+		plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE;
+		plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE;
+	}
 	plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE;
 	plane_color_ctl |= glk_plane_color_ctl_alpha(fb->format->format);
 
 	if (intel_format_is_yuv(fb->format->format)) {
+		if (fb->format->format == DRM_FORMAT_NV12) {
+			plane_color_ctl |=
+				PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709;
+			goto out;
+		}
 		if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709)
 			plane_color_ctl |= PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709;
 		else
@@ -3619,7 +3703,7 @@
 		if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE)
 			plane_color_ctl |= PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE;
 	}
-
+out:
 	return plane_color_ctl;
 }
 
@@ -3675,7 +3759,6 @@
 	struct drm_atomic_state *state;
 	int ret;
 
-
 	/* reset doesn't touch the display */
 	if (!i915_modparams.force_reset_modeset_test &&
 	    !gpu_reset_clobbers_display(dev_priv))
@@ -3729,19 +3812,17 @@
 {
 	struct drm_device *dev = &dev_priv->drm;
 	struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
-	struct drm_atomic_state *state = dev_priv->modeset_restore_state;
+	struct drm_atomic_state *state;
 	int ret;
 
 	/* reset doesn't touch the display */
-	if (!i915_modparams.force_reset_modeset_test &&
-	    !gpu_reset_clobbers_display(dev_priv))
+	if (!test_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags))
 		return;
 
+	state = fetch_and_zero(&dev_priv->modeset_restore_state);
 	if (!state)
 		goto unlock;
 
-	dev_priv->modeset_restore_state = NULL;
-
 	/* reset doesn't touch the display */
 	if (!gpu_reset_clobbers_display(dev_priv)) {
 		/* for testing only restore the display */
@@ -4703,7 +4784,9 @@
 static int
 skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach,
 		  unsigned int scaler_user, int *scaler_id,
-		  int src_w, int src_h, int dst_w, int dst_h)
+		  int src_w, int src_h, int dst_w, int dst_h,
+		  bool plane_scaler_check,
+		  uint32_t pixel_format)
 {
 	struct intel_crtc_scaler_state *scaler_state =
 		&crtc_state->scaler_state;
@@ -4721,6 +4804,10 @@
 	 */
 	need_scaling = src_w != dst_w || src_h != dst_h;
 
+	if (plane_scaler_check)
+		if (pixel_format == DRM_FORMAT_NV12)
+			need_scaling = true;
+
 	if (crtc_state->ycbcr420 && scaler_user == SKL_CRTC_INDEX)
 		need_scaling = true;
 
@@ -4760,12 +4847,21 @@
 		return 0;
 	}
 
+	if (plane_scaler_check && pixel_format == DRM_FORMAT_NV12 &&
+	    (src_h < SKL_MIN_YUV_420_SRC_H || src_w < SKL_MIN_YUV_420_SRC_W)) {
+		DRM_DEBUG_KMS("NV12: src dimensions not met\n");
+		return -EINVAL;
+	}
+
 	/* range checks */
 	if (src_w < SKL_MIN_SRC_W || src_h < SKL_MIN_SRC_H ||
-		dst_w < SKL_MIN_DST_W || dst_h < SKL_MIN_DST_H ||
-
-		src_w > SKL_MAX_SRC_W || src_h > SKL_MAX_SRC_H ||
-		dst_w > SKL_MAX_DST_W || dst_h > SKL_MAX_DST_H) {
+	    dst_w < SKL_MIN_DST_W || dst_h < SKL_MIN_DST_H ||
+	    (IS_GEN11(dev_priv) &&
+	     (src_w > ICL_MAX_SRC_W || src_h > ICL_MAX_SRC_H ||
+	      dst_w > ICL_MAX_DST_W || dst_h > ICL_MAX_DST_H)) ||
+	    (!IS_GEN11(dev_priv) &&
+	     (src_w > SKL_MAX_SRC_W || src_h > SKL_MAX_SRC_H ||
+	      dst_w > SKL_MAX_DST_W || dst_h > SKL_MAX_DST_H)))	{
 		DRM_DEBUG_KMS("scaler_user index %u.%u: src %ux%u dst %ux%u "
 			"size is out of scaler range\n",
 			intel_crtc->pipe, scaler_user, src_w, src_h, dst_w, dst_h);
@@ -4796,9 +4892,10 @@
 	const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode;
 
 	return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
-		&state->scaler_state.scaler_id,
-		state->pipe_src_w, state->pipe_src_h,
-		adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
+				 &state->scaler_state.scaler_id,
+				 state->pipe_src_w, state->pipe_src_h,
+				 adjusted_mode->crtc_hdisplay,
+				 adjusted_mode->crtc_vdisplay, false, 0);
 }
 
 /**
@@ -4827,7 +4924,8 @@
 				drm_rect_width(&plane_state->base.src) >> 16,
 				drm_rect_height(&plane_state->base.src) >> 16,
 				drm_rect_width(&plane_state->base.dst),
-				drm_rect_height(&plane_state->base.dst));
+				drm_rect_height(&plane_state->base.dst),
+				fb ? true : false, fb ? fb->format->format : 0);
 
 	if (ret || plane_state->scaler_id < 0)
 		return ret;
@@ -4853,6 +4951,7 @@
 	case DRM_FORMAT_YVYU:
 	case DRM_FORMAT_UYVY:
 	case DRM_FORMAT_VYUY:
+	case DRM_FORMAT_NV12:
 		break;
 	default:
 		DRM_DEBUG_KMS("[PLANE:%d:%s] FB:%d unsupported scaling format 0x%x\n",
@@ -5096,16 +5195,34 @@
 	return !old_crtc_state->ips_enabled;
 }
 
+static bool needs_nv12_wa(struct drm_i915_private *dev_priv,
+			  const struct intel_crtc_state *crtc_state)
+{
+	if (!crtc_state->nv12_planes)
+		return false;
+
+	if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))
+		return false;
+
+	if ((INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) ||
+	    IS_CANNONLAKE(dev_priv))
+		return true;
+
+	return false;
+}
+
 static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc);
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_atomic_state *old_state = old_crtc_state->base.state;
 	struct intel_crtc_state *pipe_config =
 		intel_atomic_get_new_crtc_state(to_intel_atomic_state(old_state),
 						crtc);
 	struct drm_plane *primary = crtc->base.primary;
-	struct drm_plane_state *old_pri_state =
-		drm_atomic_get_existing_plane_state(old_state, primary);
+	struct drm_plane_state *old_primary_state =
+		drm_atomic_get_old_plane_state(old_state, primary);
 
 	intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits);
 
@@ -5115,20 +5232,24 @@
 	if (hsw_post_update_enable_ips(old_crtc_state, pipe_config))
 		hsw_enable_ips(pipe_config);
 
-	if (old_pri_state) {
-		struct intel_plane_state *primary_state =
-			intel_atomic_get_new_plane_state(to_intel_atomic_state(old_state),
-							 to_intel_plane(primary));
-		struct intel_plane_state *old_primary_state =
-			to_intel_plane_state(old_pri_state);
+	if (old_primary_state) {
+		struct drm_plane_state *new_primary_state =
+			drm_atomic_get_new_plane_state(old_state, primary);
 
 		intel_fbc_post_update(crtc);
 
-		if (primary_state->base.visible &&
+		if (new_primary_state->visible &&
 		    (needs_modeset(&pipe_config->base) ||
-		     !old_primary_state->base.visible))
+		     !old_primary_state->visible))
 			intel_post_enable_primary(&crtc->base, pipe_config);
 	}
+
+	/* Display WA 827 */
+	if (needs_nv12_wa(dev_priv, old_crtc_state) &&
+	    !needs_nv12_wa(dev_priv, pipe_config)) {
+		skl_wa_clkgate(dev_priv, crtc->pipe, false);
+		skl_wa_528(dev_priv, crtc->pipe, false);
+	}
 }
 
 static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state,
@@ -5139,8 +5260,8 @@
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_atomic_state *old_state = old_crtc_state->base.state;
 	struct drm_plane *primary = crtc->base.primary;
-	struct drm_plane_state *old_pri_state =
-		drm_atomic_get_existing_plane_state(old_state, primary);
+	struct drm_plane_state *old_primary_state =
+		drm_atomic_get_old_plane_state(old_state, primary);
 	bool modeset = needs_modeset(&pipe_config->base);
 	struct intel_atomic_state *old_intel_state =
 		to_intel_atomic_state(old_state);
@@ -5148,23 +5269,28 @@
 	if (hsw_pre_update_disable_ips(old_crtc_state, pipe_config))
 		hsw_disable_ips(old_crtc_state);
 
-	if (old_pri_state) {
-		struct intel_plane_state *primary_state =
+	if (old_primary_state) {
+		struct intel_plane_state *new_primary_state =
 			intel_atomic_get_new_plane_state(old_intel_state,
 							 to_intel_plane(primary));
-		struct intel_plane_state *old_primary_state =
-			to_intel_plane_state(old_pri_state);
 
-		intel_fbc_pre_update(crtc, pipe_config, primary_state);
+		intel_fbc_pre_update(crtc, pipe_config, new_primary_state);
 		/*
 		 * Gen2 reports pipe underruns whenever all planes are disabled.
 		 * So disable underrun reporting before all the planes get disabled.
 		 */
-		if (IS_GEN2(dev_priv) && old_primary_state->base.visible &&
-		    (modeset || !primary_state->base.visible))
+		if (IS_GEN2(dev_priv) && old_primary_state->visible &&
+		    (modeset || !new_primary_state->base.visible))
 			intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false);
 	}
 
+	/* Display WA 827 */
+	if (!needs_nv12_wa(dev_priv, old_crtc_state) &&
+	    needs_nv12_wa(dev_priv, pipe_config)) {
+		skl_wa_clkgate(dev_priv, crtc->pipe, true);
+		skl_wa_528(dev_priv, crtc->pipe, true);
+	}
+
 	/*
 	 * Vblank time updates from the shadow to live plane control register
 	 * are blocked if the memory self-refresh mode is active at that
@@ -5499,6 +5625,9 @@
 	if (intel_crtc->config->shared_dpll)
 		intel_enable_shared_dpll(intel_crtc);
 
+	if (INTEL_GEN(dev_priv) >= 11)
+		icl_map_plls_to_ports(crtc, pipe_config, old_state);
+
 	if (intel_crtc_has_dp_encoder(intel_crtc->config))
 		intel_dp_set_m_n(intel_crtc, M1_N1);
 
@@ -5696,6 +5825,9 @@
 		intel_ddi_disable_pipe_clock(intel_crtc->config);
 
 	intel_encoders_post_disable(crtc, old_crtc_state, old_state);
+
+	if (INTEL_GEN(dev_priv) >= 11)
+		icl_unmap_plls_to_ports(crtc, old_crtc_state, old_state);
 }
 
 static void i9xx_pfit_enable(struct intel_crtc *crtc)
@@ -8766,8 +8898,8 @@
 			intel_get_shared_dpll_by_id(dev_priv, pll_id);
 		pll = pipe_config->shared_dpll;
 
-		WARN_ON(!pll->funcs.get_hw_state(dev_priv, pll,
-						 &pipe_config->dpll_hw_state));
+		WARN_ON(!pll->info->funcs->get_hw_state(dev_priv, pll,
+						&pipe_config->dpll_hw_state));
 
 		tmp = pipe_config->dpll_hw_state.dpll;
 		pipe_config->pixel_multiplier =
@@ -9243,8 +9375,8 @@
 
 	pll = pipe_config->shared_dpll;
 	if (pll) {
-		WARN_ON(!pll->funcs.get_hw_state(dev_priv, pll,
-						 &pipe_config->dpll_hw_state));
+		WARN_ON(!pll->info->funcs->get_hw_state(dev_priv, pll,
+						&pipe_config->dpll_hw_state));
 	}
 
 	/*
@@ -9974,6 +10106,8 @@
 	ret = PTR_ERR_OR_ZERO(drm_atomic_get_connector_state(restore_state, connector));
 	if (!ret)
 		ret = PTR_ERR_OR_ZERO(drm_atomic_get_crtc_state(restore_state, crtc));
+	if (!ret)
+		ret = drm_atomic_add_affected_planes(restore_state, crtc);
 	if (ret) {
 		DRM_DEBUG_KMS("Failed to create a copy of old state to restore: %i\n", ret);
 		goto fail;
@@ -10773,7 +10907,7 @@
 		struct drm_connector_state *connector_state;
 		struct intel_encoder *encoder;
 
-		connector_state = drm_atomic_get_existing_connector_state(state, connector);
+		connector_state = drm_atomic_get_new_connector_state(state, connector);
 		if (!connector_state)
 			connector_state = connector->state;
 
@@ -11085,39 +11219,42 @@
 		(current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) &&
 		!(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED);
 
-#define PIPE_CONF_CHECK_X(name)	\
+#define PIPE_CONF_CHECK_X(name) do { \
 	if (current_config->name != pipe_config->name) { \
 		pipe_config_err(adjust, __stringify(name), \
 			  "(expected 0x%08x, found 0x%08x)\n", \
 			  current_config->name, \
 			  pipe_config->name); \
 		ret = false; \
-	}
+	} \
+} while (0)
 
-#define PIPE_CONF_CHECK_I(name)	\
+#define PIPE_CONF_CHECK_I(name) do { \
 	if (current_config->name != pipe_config->name) { \
 		pipe_config_err(adjust, __stringify(name), \
 			  "(expected %i, found %i)\n", \
 			  current_config->name, \
 			  pipe_config->name); \
 		ret = false; \
-	}
+	} \
+} while (0)
 
-#define PIPE_CONF_CHECK_BOOL(name)	\
+#define PIPE_CONF_CHECK_BOOL(name) do { \
 	if (current_config->name != pipe_config->name) { \
 		pipe_config_err(adjust, __stringify(name), \
 			  "(expected %s, found %s)\n", \
 			  yesno(current_config->name), \
 			  yesno(pipe_config->name)); \
 		ret = false; \
-	}
+	} \
+} while (0)
 
 /*
  * Checks state where we only read out the enabling, but not the entire
  * state itself (like full infoframes or ELD for audio). These states
  * require a full modeset on bootup to fix up.
  */
-#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) \
+#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) do { \
 	if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \
 		PIPE_CONF_CHECK_BOOL(name); \
 	} else { \
@@ -11126,18 +11263,20 @@
 			  yesno(current_config->name), \
 			  yesno(pipe_config->name)); \
 		ret = false; \
-	}
+	} \
+} while (0)
 
-#define PIPE_CONF_CHECK_P(name)	\
+#define PIPE_CONF_CHECK_P(name) do { \
 	if (current_config->name != pipe_config->name) { \
 		pipe_config_err(adjust, __stringify(name), \
 			  "(expected %p, found %p)\n", \
 			  current_config->name, \
 			  pipe_config->name); \
 		ret = false; \
-	}
+	} \
+} while (0)
 
-#define PIPE_CONF_CHECK_M_N(name) \
+#define PIPE_CONF_CHECK_M_N(name) do { \
 	if (!intel_compare_link_m_n(&current_config->name, \
 				    &pipe_config->name,\
 				    adjust)) { \
@@ -11155,14 +11294,15 @@
 			  pipe_config->name.link_m, \
 			  pipe_config->name.link_n); \
 		ret = false; \
-	}
+	} \
+} while (0)
 
 /* This is required for BDW+ where there is only one set of registers for
  * switching between high and low RR.
  * This macro can be used whenever a comparison has to be made between one
  * hw state and multiple sw state variables.
  */
-#define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) \
+#define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) do { \
 	if (!intel_compare_link_m_n(&current_config->name, \
 				    &pipe_config->name, adjust) && \
 	    !intel_compare_link_m_n(&current_config->alt_name, \
@@ -11187,9 +11327,10 @@
 			  pipe_config->name.link_m, \
 			  pipe_config->name.link_n); \
 		ret = false; \
-	}
+	} \
+} while (0)
 
-#define PIPE_CONF_CHECK_FLAGS(name, mask)	\
+#define PIPE_CONF_CHECK_FLAGS(name, mask) do { \
 	if ((current_config->name ^ pipe_config->name) & (mask)) { \
 		pipe_config_err(adjust, __stringify(name), \
 			  "(%x) (expected %i, found %i)\n", \
@@ -11197,16 +11338,18 @@
 			  current_config->name & (mask), \
 			  pipe_config->name & (mask)); \
 		ret = false; \
-	}
+	} \
+} while (0)
 
-#define PIPE_CONF_CHECK_CLOCK_FUZZY(name) \
+#define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \
 	if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \
 		pipe_config_err(adjust, __stringify(name), \
 			  "(expected %i, found %i)\n", \
 			  current_config->name, \
 			  pipe_config->name); \
 		ret = false; \
-	}
+	} \
+} while (0)
 
 #define PIPE_CONF_QUIRK(quirk)	\
 	((current_config->quirks | pipe_config->quirks) & (quirk))
@@ -11315,6 +11458,16 @@
 	PIPE_CONF_CHECK_X(dpll_hw_state.pll9);
 	PIPE_CONF_CHECK_X(dpll_hw_state.pll10);
 	PIPE_CONF_CHECK_X(dpll_hw_state.pcsdw12);
+	PIPE_CONF_CHECK_X(dpll_hw_state.mg_refclkin_ctl);
+	PIPE_CONF_CHECK_X(dpll_hw_state.mg_clktop2_coreclkctl1);
+	PIPE_CONF_CHECK_X(dpll_hw_state.mg_clktop2_hsclkctl);
+	PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_div0);
+	PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_div1);
+	PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_lf);
+	PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_frac_lock);
+	PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_ssc);
+	PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_bias);
+	PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_tdc_coldst_bias);
 
 	PIPE_CONF_CHECK_X(dsi_pll.ctrl);
 	PIPE_CONF_CHECK_X(dsi_pll.div);
@@ -11378,6 +11531,11 @@
 	skl_ddb_get_hw_state(dev_priv, &hw_ddb);
 	sw_ddb = &dev_priv->wm.skl_hw.ddb;
 
+	if (INTEL_GEN(dev_priv) >= 11)
+		if (hw_ddb.enabled_slices != sw_ddb->enabled_slices)
+			DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n",
+				  sw_ddb->enabled_slices,
+				  hw_ddb.enabled_slices);
 	/* planes */
 	for_each_universal_plane(dev_priv, pipe, plane) {
 		hw_plane_wm = &hw_wm.planes[plane];
@@ -11643,11 +11801,11 @@
 
 	memset(&dpll_hw_state, 0, sizeof(dpll_hw_state));
 
-	DRM_DEBUG_KMS("%s\n", pll->name);
+	DRM_DEBUG_KMS("%s\n", pll->info->name);
 
-	active = pll->funcs.get_hw_state(dev_priv, pll, &dpll_hw_state);
+	active = pll->info->funcs->get_hw_state(dev_priv, pll, &dpll_hw_state);
 
-	if (!(pll->flags & INTEL_DPLL_ALWAYS_ON)) {
+	if (!(pll->info->flags & INTEL_DPLL_ALWAYS_ON)) {
 		I915_STATE_WARN(!pll->on && pll->active_mask,
 		     "pll in active use but not on in sw tracking\n");
 		I915_STATE_WARN(pll->on && !pll->active_mask,
@@ -12136,20 +12294,23 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_crtc_state *pipe_config = to_intel_crtc_state(new_crtc_state);
 	bool modeset = needs_modeset(new_crtc_state);
+	struct intel_plane_state *new_plane_state =
+		intel_atomic_get_new_plane_state(to_intel_atomic_state(state),
+						 to_intel_plane(crtc->primary));
 
 	if (modeset) {
 		update_scanline_offset(intel_crtc);
 		dev_priv->display.crtc_enable(pipe_config, state);
+
+		/* vblanks work again, re-enable pipe CRC. */
+		intel_crtc_enable_pipe_crc(intel_crtc);
 	} else {
 		intel_pre_plane_update(to_intel_crtc_state(old_crtc_state),
 				       pipe_config);
 	}
 
-	if (drm_atomic_get_existing_plane_state(state, crtc->primary)) {
-		intel_fbc_enable(
-		    intel_crtc, pipe_config,
-		    to_intel_plane_state(crtc->primary->state));
-	}
+	if (new_plane_state)
+		intel_fbc_enable(intel_crtc, pipe_config, new_plane_state);
 
 	drm_atomic_helper_commit_planes_on_crtc(old_crtc_state);
 }
@@ -12181,6 +12342,8 @@
 	bool progress;
 	enum pipe pipe;
 	int i;
+	u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
+	u8 required_slices = intel_state->wm_results.ddb.enabled_slices;
 
 	const struct skl_ddb_entry *entries[I915_MAX_PIPES] = {};
 
@@ -12189,6 +12352,10 @@
 		if (new_crtc_state->active)
 			entries[i] = &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb;
 
+	/* If 2nd DBuf slice required, enable it here */
+	if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices)
+		icl_dbuf_slices_update(dev_priv, required_slices);
+
 	/*
 	 * Whenever the number of active pipes changes, we need to make sure we
 	 * update the pipes in the right order so that their ddb allocations
@@ -12239,6 +12406,10 @@
 			progress = true;
 		}
 	} while (progress);
+
+	/* If 2nd DBuf slice is no more required disable it */
+	if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices)
+		icl_dbuf_slices_update(dev_priv, required_slices);
 }
 
 static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv)
@@ -12320,6 +12491,13 @@
 
 		if (old_crtc_state->active) {
 			intel_crtc_disable_planes(crtc, old_crtc_state->plane_mask);
+
+			/*
+			 * We need to disable pipe CRC before disabling the pipe,
+			 * or we race against vblank off.
+			 */
+			intel_crtc_disable_pipe_crc(intel_crtc);
+
 			dev_priv->display.crtc_disable(to_intel_crtc_state(old_crtc_state), state);
 			intel_crtc->active = false;
 			intel_fbc_disable(intel_crtc);
@@ -12695,6 +12873,15 @@
 		intel_unpin_fb_vma(vma, old_plane_state->flags);
 }
 
+static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
+{
+	struct i915_sched_attr attr = {
+		.priority = I915_PRIORITY_DISPLAY,
+	};
+
+	i915_gem_object_wait_priority(obj, 0, &attr);
+}
+
 /**
  * intel_prepare_plane_fb - Prepare fb for usage on plane
  * @plane: drm plane to prepare for
@@ -12723,8 +12910,8 @@
 
 	if (old_obj) {
 		struct drm_crtc_state *crtc_state =
-			drm_atomic_get_existing_crtc_state(new_state->state,
-							   plane->state->crtc);
+			drm_atomic_get_new_crtc_state(new_state->state,
+						      plane->state->crtc);
 
 		/* Big Hammer, we also need to ensure that any pending
 		 * MI_WAIT_FOR_EVENT inside a user batch buffer on the
@@ -12771,13 +12958,15 @@
 
 	ret = intel_plane_pin_fb(to_intel_plane_state(new_state));
 
-	i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY);
+	fb_obj_bump_render_priority(obj);
 
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	i915_gem_object_unpin_pages(obj);
 	if (ret)
 		return ret;
 
+	intel_fb_obj_flush(obj, ORIGIN_DIRTYFB);
+
 	if (!new_state->fence) { /* implicit fencing */
 		struct dma_fence *fence;
 
@@ -12822,11 +13011,13 @@
 }
 
 int
-skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state)
+skl_max_scale(struct intel_crtc *intel_crtc,
+	      struct intel_crtc_state *crtc_state,
+	      uint32_t pixel_format)
 {
 	struct drm_i915_private *dev_priv;
-	int max_scale;
-	int crtc_clock, max_dotclk;
+	int max_scale, mult;
+	int crtc_clock, max_dotclk, tmpclk1, tmpclk2;
 
 	if (!intel_crtc || !crtc_state->base.enable)
 		return DRM_PLANE_HELPER_NO_SCALING;
@@ -12848,8 +13039,10 @@
 	 *            or
 	 *    cdclk/crtc_clock
 	 */
-	max_scale = min((1 << 16) * 3 - 1,
-			(1 << 8) * ((max_dotclk << 8) / crtc_clock));
+	mult = pixel_format == DRM_FORMAT_NV12 ? 2 : 3;
+	tmpclk1 = (1 << 16) * mult - 1;
+	tmpclk2 = (1 << 8) * ((max_dotclk << 8) / crtc_clock);
+	max_scale = min(tmpclk1, tmpclk2);
 
 	return max_scale;
 }
@@ -12865,12 +13058,16 @@
 	int max_scale = DRM_PLANE_HELPER_NO_SCALING;
 	bool can_position = false;
 	int ret;
+	uint32_t pixel_format = 0;
 
 	if (INTEL_GEN(dev_priv) >= 9) {
 		/* use scaler when colorkey is not required */
 		if (!state->ckey.flags) {
 			min_scale = 1;
-			max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state);
+			if (state->base.fb)
+				pixel_format = state->base.fb->format->format;
+			max_scale = skl_max_scale(to_intel_crtc(crtc),
+						  crtc_state, pixel_format);
 		}
 		can_position = true;
 	}
@@ -12943,10 +13140,25 @@
 							   intel_cstate);
 }
 
+void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc,
+				  struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+	if (!IS_GEN2(dev_priv))
+		intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true);
+
+	if (crtc_state->has_pch_encoder) {
+		enum pipe pch_transcoder =
+			intel_crtc_pch_transcoder(crtc);
+
+		intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true);
+	}
+}
+
 static void intel_finish_crtc_commit(struct drm_crtc *crtc,
 				     struct drm_crtc_state *old_crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_atomic_state *old_intel_state =
 		to_intel_atomic_state(old_crtc_state->state);
@@ -12957,17 +13169,8 @@
 
 	if (new_crtc_state->update_pipe &&
 	    !needs_modeset(&new_crtc_state->base) &&
-	    old_crtc_state->mode.private_flags & I915_MODE_FLAG_INHERITED) {
-		if (!IS_GEN2(dev_priv))
-			intel_set_cpu_fifo_underrun_reporting(dev_priv, intel_crtc->pipe, true);
-
-		if (new_crtc_state->has_pch_encoder) {
-			enum pipe pch_transcoder =
-				intel_crtc_pch_transcoder(intel_crtc);
-
-			intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true);
-		}
-	}
+	    old_crtc_state->mode.private_flags & I915_MODE_FLAG_INHERITED)
+		intel_crtc_arm_fifo_underrun(intel_crtc, new_crtc_state);
 }
 
 /**
@@ -13031,6 +13234,7 @@
 	case DRM_FORMAT_YVYU:
 	case DRM_FORMAT_UYVY:
 	case DRM_FORMAT_VYUY:
+	case DRM_FORMAT_NV12:
 		if (modifier == I915_FORMAT_MOD_Yf_TILED)
 			return true;
 		/* fall through */
@@ -13165,8 +13369,9 @@
 	if (ret)
 		goto out_unlock;
 
-	old_fb = old_plane_state->fb;
+	intel_fb_obj_flush(intel_fb_obj(fb), ORIGIN_FLIP);
 
+	old_fb = old_plane_state->fb;
 	i915_gem_track_fb(intel_fb_obj(old_fb), intel_fb_obj(fb),
 			  intel_plane->frontbuffer_bit);
 
@@ -13237,6 +13442,30 @@
 	return pipe == PIPE_A && plane_id == PLANE_PRIMARY;
 }
 
+bool skl_plane_has_planar(struct drm_i915_private *dev_priv,
+			  enum pipe pipe, enum plane_id plane_id)
+{
+	if (plane_id == PLANE_PRIMARY) {
+		if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))
+			return false;
+		else if ((INTEL_GEN(dev_priv) == 9 && pipe == PIPE_C) &&
+			 !IS_GEMINILAKE(dev_priv))
+			return false;
+	} else if (plane_id >= PLANE_SPRITE0) {
+		if (plane_id == PLANE_CURSOR)
+			return false;
+		if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) == 10) {
+			if (plane_id != PLANE_SPRITE0)
+				return false;
+		} else {
+			if (plane_id != PLANE_SPRITE0 || pipe == PIPE_C ||
+			    IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))
+				return false;
+		}
+	}
+	return true;
+}
+
 static struct intel_plane *
 intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 {
@@ -13297,8 +13526,13 @@
 	primary->check_plane = intel_check_primary_plane;
 
 	if (INTEL_GEN(dev_priv) >= 9) {
-		intel_primary_formats = skl_primary_formats;
-		num_formats = ARRAY_SIZE(skl_primary_formats);
+		if (skl_plane_has_planar(dev_priv, pipe, PLANE_PRIMARY)) {
+			intel_primary_formats = skl_pri_planar_formats;
+			num_formats = ARRAY_SIZE(skl_pri_planar_formats);
+		} else {
+			intel_primary_formats = skl_primary_formats;
+			num_formats = ARRAY_SIZE(skl_primary_formats);
+		}
 
 		if (skl_plane_has_ccs(dev_priv, pipe, PLANE_PRIMARY))
 			modifiers = skl_format_modifiers_ccs;
@@ -13553,10 +13787,17 @@
 	/* initialize shared scalers */
 	intel_crtc_init_scalers(intel_crtc, crtc_state);
 
-	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
-	       dev_priv->plane_to_crtc_mapping[primary->i9xx_plane] != NULL);
-	dev_priv->plane_to_crtc_mapping[primary->i9xx_plane] = intel_crtc;
-	dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = intel_crtc;
+	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->pipe_to_crtc_mapping) ||
+	       dev_priv->pipe_to_crtc_mapping[pipe] != NULL);
+	dev_priv->pipe_to_crtc_mapping[pipe] = intel_crtc;
+
+	if (INTEL_GEN(dev_priv) < 9) {
+		enum i9xx_plane_id i9xx_plane = primary->i9xx_plane;
+
+		BUG_ON(i9xx_plane >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
+		       dev_priv->plane_to_crtc_mapping[i9xx_plane] != NULL);
+		dev_priv->plane_to_crtc_mapping[i9xx_plane] = intel_crtc;
+	}
 
 	drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
 
@@ -14112,6 +14353,20 @@
 			goto err;
 		}
 		break;
+	case DRM_FORMAT_NV12:
+		if (mode_cmd->modifier[0] == I915_FORMAT_MOD_Y_TILED_CCS ||
+		    mode_cmd->modifier[0] == I915_FORMAT_MOD_Yf_TILED_CCS) {
+			DRM_DEBUG_KMS("RC not to be enabled with NV12\n");
+			goto err;
+		}
+		if (INTEL_GEN(dev_priv) < 9 || IS_SKYLAKE(dev_priv) ||
+		    IS_BROXTON(dev_priv)) {
+			DRM_DEBUG_KMS("unsupported pixel format: %s\n",
+				      drm_get_format_name(mode_cmd->pixel_format,
+							  &format_name));
+			goto err;
+		}
+		break;
 	default:
 		DRM_DEBUG_KMS("unsupported pixel format: %s\n",
 			      drm_get_format_name(mode_cmd->pixel_format, &format_name));
@@ -14124,6 +14379,14 @@
 
 	drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd);
 
+	if (fb->format->format == DRM_FORMAT_NV12 &&
+	    (fb->width < SKL_MIN_YUV_420_SRC_W ||
+	     fb->height < SKL_MIN_YUV_420_SRC_H ||
+	     (fb->width % 4) != 0 || (fb->height % 4) != 0)) {
+		DRM_DEBUG_KMS("src dimensions not correct for NV12\n");
+		return -EINVAL;
+	}
+
 	for (i = 0; i < fb->format->num_planes; i++) {
 		u32 stride_alignment;
 
@@ -15101,8 +15364,8 @@
 	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
 		struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i];
 
-		pll->on = pll->funcs.get_hw_state(dev_priv, pll,
-						  &pll->state.hw_state);
+		pll->on = pll->info->funcs->get_hw_state(dev_priv, pll,
+							&pll->state.hw_state);
 		pll->state.crtc_mask = 0;
 		for_each_intel_crtc(dev, crtc) {
 			struct intel_crtc_state *crtc_state =
@@ -15115,7 +15378,7 @@
 		pll->active_mask = pll->state.crtc_mask;
 
 		DRM_DEBUG_KMS("%s hw state readout: crtc_mask 0x%08x, on %i\n",
-			      pll->name, pll->state.crtc_mask, pll->on);
+			      pll->info->name, pll->state.crtc_mask, pll->on);
 	}
 
 	for_each_intel_encoder(dev, encoder) {
@@ -15291,9 +15554,10 @@
 		if (!pll->on || pll->active_mask)
 			continue;
 
-		DRM_DEBUG_KMS("%s enabled but not in use, disabling\n", pll->name);
+		DRM_DEBUG_KMS("%s enabled but not in use, disabling\n",
+			      pll->info->name);
 
-		pll->funcs.disable(dev_priv, pll);
+		pll->info->funcs->disable(dev_priv, pll);
 		pll->on = false;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h
index 4e7418b..2ef3161 100644
--- a/drivers/gpu/drm/i915/intel_display.h
+++ b/drivers/gpu/drm/i915/intel_display.h
@@ -218,6 +218,10 @@
 	for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \
 		for_each_if((__mask) & BIT(__p))
 
+#define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \
+	for ((__t) = 0; (__t) < I915_MAX_TRANSCODERS; (__t)++)	\
+		for_each_if ((__mask) & (1 << (__t)))
+
 #define for_each_universal_plane(__dev_priv, __pipe, __p)		\
 	for ((__p) = 0;							\
 	     (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1;	\
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index b7b4cfd..dde92e4 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -43,7 +43,6 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 
-#define DP_LINK_CHECK_TIMEOUT	(10 * 1000)
 #define DP_DPRX_ESI_LEN 14
 
 /* Compliance test status bits  */
@@ -92,8 +91,6 @@
 		{ .p1 = 4, .p2 = 2, .n = 1, .m1 = 2, .m2 = 0x819999a } },
 	{ 270000,	/* m2_int = 27, m2_fraction = 0 */
 		{ .p1 = 4, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c00000 } },
-	{ 540000,	/* m2_int = 27, m2_fraction = 0 */
-		{ .p1 = 2, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c00000 } }
 };
 
 /**
@@ -1650,9 +1647,17 @@
 	}
 }
 
+struct link_config_limits {
+	int min_clock, max_clock;
+	int min_lane_count, max_lane_count;
+	int min_bpp, max_bpp;
+};
+
 static int intel_dp_compute_bpp(struct intel_dp *intel_dp,
 				struct intel_crtc_state *pipe_config)
 {
+	struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp));
+	struct intel_connector *intel_connector = intel_dp->attached_connector;
 	int bpp, bpc;
 
 	bpp = pipe_config->pipe_bpp;
@@ -1661,13 +1666,16 @@
 	if (bpc > 0)
 		bpp = min(bpp, 3*bpc);
 
-	/* For DP Compliance we override the computed bpp for the pipe */
-	if (intel_dp->compliance.test_data.bpc != 0) {
-		pipe_config->pipe_bpp =	3*intel_dp->compliance.test_data.bpc;
-		pipe_config->dither_force_disable = pipe_config->pipe_bpp == 6*3;
-		DRM_DEBUG_KMS("Setting pipe_bpp to %d\n",
-			      pipe_config->pipe_bpp);
+	if (intel_dp_is_edp(intel_dp)) {
+		/* Get bpp from vbt only for panels that dont have bpp in edid */
+		if (intel_connector->base.display_info.bpc == 0 &&
+		    dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp) {
+			DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n",
+				      dev_priv->vbt.edp.bpp);
+			bpp = dev_priv->vbt.edp.bpp;
+		}
 	}
+
 	return bpp;
 }
 
@@ -1688,6 +1696,142 @@
 	return bres;
 }
 
+/* Adjust link config limits based on compliance test requests. */
+static void
+intel_dp_adjust_compliance_config(struct intel_dp *intel_dp,
+				  struct intel_crtc_state *pipe_config,
+				  struct link_config_limits *limits)
+{
+	/* For DP Compliance we override the computed bpp for the pipe */
+	if (intel_dp->compliance.test_data.bpc != 0) {
+		int bpp = 3 * intel_dp->compliance.test_data.bpc;
+
+		limits->min_bpp = limits->max_bpp = bpp;
+		pipe_config->dither_force_disable = bpp == 6 * 3;
+
+		DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", bpp);
+	}
+
+	/* Use values requested by Compliance Test Request */
+	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) {
+		int index;
+
+		/* Validate the compliance test data since max values
+		 * might have changed due to link train fallback.
+		 */
+		if (intel_dp_link_params_valid(intel_dp, intel_dp->compliance.test_link_rate,
+					       intel_dp->compliance.test_lane_count)) {
+			index = intel_dp_rate_index(intel_dp->common_rates,
+						    intel_dp->num_common_rates,
+						    intel_dp->compliance.test_link_rate);
+			if (index >= 0)
+				limits->min_clock = limits->max_clock = index;
+			limits->min_lane_count = limits->max_lane_count =
+				intel_dp->compliance.test_lane_count;
+		}
+	}
+}
+
+/* Optimize link config in order: max bpp, min clock, min lanes */
+static bool
+intel_dp_compute_link_config_wide(struct intel_dp *intel_dp,
+				  struct intel_crtc_state *pipe_config,
+				  const struct link_config_limits *limits)
+{
+	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	int bpp, clock, lane_count;
+	int mode_rate, link_clock, link_avail;
+
+	for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) {
+		mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock,
+						   bpp);
+
+		for (clock = limits->min_clock; clock <= limits->max_clock; clock++) {
+			for (lane_count = limits->min_lane_count;
+			     lane_count <= limits->max_lane_count;
+			     lane_count <<= 1) {
+				link_clock = intel_dp->common_rates[clock];
+				link_avail = intel_dp_max_data_rate(link_clock,
+								    lane_count);
+
+				if (mode_rate <= link_avail) {
+					pipe_config->lane_count = lane_count;
+					pipe_config->pipe_bpp = bpp;
+					pipe_config->port_clock = link_clock;
+
+					return true;
+				}
+			}
+		}
+	}
+
+	return false;
+}
+
+static bool
+intel_dp_compute_link_config(struct intel_encoder *encoder,
+			     struct intel_crtc_state *pipe_config)
+{
+	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct link_config_limits limits;
+	int common_len;
+
+	common_len = intel_dp_common_len_rate_limit(intel_dp,
+						    intel_dp->max_link_rate);
+
+	/* No common link rates between source and sink */
+	WARN_ON(common_len <= 0);
+
+	limits.min_clock = 0;
+	limits.max_clock = common_len - 1;
+
+	limits.min_lane_count = 1;
+	limits.max_lane_count = intel_dp_max_lane_count(intel_dp);
+
+	limits.min_bpp = 6 * 3;
+	limits.max_bpp = intel_dp_compute_bpp(intel_dp, pipe_config);
+
+	if (intel_dp_is_edp(intel_dp)) {
+		/*
+		 * Use the maximum clock and number of lanes the eDP panel
+		 * advertizes being capable of. The panels are generally
+		 * designed to support only a single clock and lane
+		 * configuration, and typically these values correspond to the
+		 * native resolution of the panel.
+		 */
+		limits.min_lane_count = limits.max_lane_count;
+		limits.min_clock = limits.max_clock;
+	}
+
+	intel_dp_adjust_compliance_config(intel_dp, pipe_config, &limits);
+
+	DRM_DEBUG_KMS("DP link computation with max lane count %i "
+		      "max rate %d max bpp %d pixel clock %iKHz\n",
+		      limits.max_lane_count,
+		      intel_dp->common_rates[limits.max_clock],
+		      limits.max_bpp, adjusted_mode->crtc_clock);
+
+	/*
+	 * Optimize for slow and wide. This is the place to add alternative
+	 * optimization policy.
+	 */
+	if (!intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits))
+		return false;
+
+	DRM_DEBUG_KMS("DP lane count %d clock %d bpp %d\n",
+		      pipe_config->lane_count, pipe_config->port_clock,
+		      pipe_config->pipe_bpp);
+
+	DRM_DEBUG_KMS("DP link rate required %i available %i\n",
+		      intel_dp_link_required(adjusted_mode->crtc_clock,
+					     pipe_config->pipe_bpp),
+		      intel_dp_max_data_rate(pipe_config->port_clock,
+					     pipe_config->lane_count));
+
+	return true;
+}
+
 bool
 intel_dp_compute_config(struct intel_encoder *encoder,
 			struct intel_crtc_state *pipe_config,
@@ -1701,27 +1845,9 @@
 	struct intel_connector *intel_connector = intel_dp->attached_connector;
 	struct intel_digital_connector_state *intel_conn_state =
 		to_intel_digital_connector_state(conn_state);
-	int lane_count, clock;
-	int min_lane_count = 1;
-	int max_lane_count = intel_dp_max_lane_count(intel_dp);
-	/* Conveniently, the link BW constants become indices with a shift...*/
-	int min_clock = 0;
-	int max_clock;
-	int bpp, mode_rate;
-	int link_avail, link_clock;
-	int common_len;
-	uint8_t link_bw, rate_select;
 	bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc,
 					   DP_DPCD_QUIRK_LIMITED_M_N);
 
-	common_len = intel_dp_common_len_rate_limit(intel_dp,
-						    intel_dp->max_link_rate);
-
-	/* No common link rates between source and sink */
-	WARN_ON(common_len <= 0);
-
-	max_clock = common_len - 1;
-
 	if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv) && port != PORT_A)
 		pipe_config->has_pch_encoder = true;
 
@@ -1747,6 +1873,7 @@
 
 		if (INTEL_GEN(dev_priv) >= 9) {
 			int ret;
+
 			ret = skl_update_scaler_crtc(pipe_config);
 			if (ret)
 				return ret;
@@ -1767,75 +1894,9 @@
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
 		return false;
 
-	/* Use values requested by Compliance Test Request */
-	if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) {
-		int index;
+	if (!intel_dp_compute_link_config(encoder, pipe_config))
+		return false;
 
-		/* Validate the compliance test data since max values
-		 * might have changed due to link train fallback.
-		 */
-		if (intel_dp_link_params_valid(intel_dp, intel_dp->compliance.test_link_rate,
-					       intel_dp->compliance.test_lane_count)) {
-			index = intel_dp_rate_index(intel_dp->common_rates,
-						    intel_dp->num_common_rates,
-						    intel_dp->compliance.test_link_rate);
-			if (index >= 0)
-				min_clock = max_clock = index;
-			min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count;
-		}
-	}
-	DRM_DEBUG_KMS("DP link computation with max lane count %i "
-		      "max bw %d pixel clock %iKHz\n",
-		      max_lane_count, intel_dp->common_rates[max_clock],
-		      adjusted_mode->crtc_clock);
-
-	/* Walk through all bpp values. Luckily they're all nicely spaced with 2
-	 * bpc in between. */
-	bpp = intel_dp_compute_bpp(intel_dp, pipe_config);
-	if (intel_dp_is_edp(intel_dp)) {
-
-		/* Get bpp from vbt only for panels that dont have bpp in edid */
-		if (intel_connector->base.display_info.bpc == 0 &&
-			(dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp)) {
-			DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n",
-				      dev_priv->vbt.edp.bpp);
-			bpp = dev_priv->vbt.edp.bpp;
-		}
-
-		/*
-		 * Use the maximum clock and number of lanes the eDP panel
-		 * advertizes being capable of. The panels are generally
-		 * designed to support only a single clock and lane
-		 * configuration, and typically these values correspond to the
-		 * native resolution of the panel.
-		 */
-		min_lane_count = max_lane_count;
-		min_clock = max_clock;
-	}
-
-	for (; bpp >= 6*3; bpp -= 2*3) {
-		mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock,
-						   bpp);
-
-		for (clock = min_clock; clock <= max_clock; clock++) {
-			for (lane_count = min_lane_count;
-				lane_count <= max_lane_count;
-				lane_count <<= 1) {
-
-				link_clock = intel_dp->common_rates[clock];
-				link_avail = intel_dp_max_data_rate(link_clock,
-								    lane_count);
-
-				if (mode_rate <= link_avail) {
-					goto found;
-				}
-			}
-		}
-	}
-
-	return false;
-
-found:
 	if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) {
 		/*
 		 * See:
@@ -1843,7 +1904,7 @@
 		 * VESA DisplayPort Ver.1.2a - 5.1.1.1 Video Colorimetry
 		 */
 		pipe_config->limited_color_range =
-			bpp != 18 &&
+			pipe_config->pipe_bpp != 18 &&
 			drm_default_rgb_quant_range(adjusted_mode) ==
 			HDMI_QUANTIZATION_RANGE_LIMITED;
 	} else {
@@ -1851,21 +1912,7 @@
 			intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED;
 	}
 
-	pipe_config->lane_count = lane_count;
-
-	pipe_config->pipe_bpp = bpp;
-	pipe_config->port_clock = intel_dp->common_rates[clock];
-
-	intel_dp_compute_rate(intel_dp, pipe_config->port_clock,
-			      &link_bw, &rate_select);
-
-	DRM_DEBUG_KMS("DP link bw %02x rate select %02x lane count %d clock %d bpp %d\n",
-		      link_bw, rate_select, pipe_config->lane_count,
-		      pipe_config->port_clock, bpp);
-	DRM_DEBUG_KMS("DP link bw required %i available %i\n",
-		      mode_rate, link_avail);
-
-	intel_link_compute_m_n(bpp, lane_count,
+	intel_link_compute_m_n(pipe_config->pipe_bpp, pipe_config->lane_count,
 			       adjusted_mode->crtc_clock,
 			       pipe_config->port_clock,
 			       &pipe_config->dp_m_n,
@@ -1874,11 +1921,12 @@
 	if (intel_connector->panel.downclock_mode != NULL &&
 		dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) {
 			pipe_config->has_drrs = true;
-			intel_link_compute_m_n(bpp, lane_count,
-				intel_connector->panel.downclock_mode->clock,
-				pipe_config->port_clock,
-				&pipe_config->dp_m2_n2,
-				reduce_m_n);
+			intel_link_compute_m_n(pipe_config->pipe_bpp,
+					       pipe_config->lane_count,
+					       intel_connector->panel.downclock_mode->clock,
+					       pipe_config->port_clock,
+					       &pipe_config->dp_m2_n2,
+					       reduce_m_n);
 	}
 
 	if (!HAS_DDI(dev_priv))
@@ -2881,10 +2929,7 @@
 		}
 
 	} else {
-		if (IS_CHERRYVIEW(dev_priv))
-			*DP &= ~DP_LINK_TRAIN_MASK_CHV;
-		else
-			*DP &= ~DP_LINK_TRAIN_MASK;
+		*DP &= ~DP_LINK_TRAIN_MASK;
 
 		switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) {
 		case DP_TRAINING_PATTERN_DISABLE:
@@ -2897,12 +2942,8 @@
 			*DP |= DP_LINK_TRAIN_PAT_2;
 			break;
 		case DP_TRAINING_PATTERN_3:
-			if (IS_CHERRYVIEW(dev_priv)) {
-				*DP |= DP_LINK_TRAIN_PAT_3_CHV;
-			} else {
-				DRM_DEBUG_KMS("TPS3 not supported, using TPS2 instead\n");
-				*DP |= DP_LINK_TRAIN_PAT_2;
-			}
+			DRM_DEBUG_KMS("TPS3 not supported, using TPS2 instead\n");
+			*DP |= DP_LINK_TRAIN_PAT_2;
 			break;
 		}
 	}
@@ -3641,10 +3682,7 @@
 		DP &= ~DP_LINK_TRAIN_MASK_CPT;
 		DP |= DP_LINK_TRAIN_PAT_IDLE_CPT;
 	} else {
-		if (IS_CHERRYVIEW(dev_priv))
-			DP &= ~DP_LINK_TRAIN_MASK_CHV;
-		else
-			DP &= ~DP_LINK_TRAIN_MASK;
+		DP &= ~DP_LINK_TRAIN_MASK;
 		DP |= DP_LINK_TRAIN_PAT_IDLE;
 	}
 	I915_WRITE(intel_dp->output_reg, DP);
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
index f59b59b..3fcaa98 100644
--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -139,6 +139,11 @@
 	intel_dp_compute_rate(intel_dp, intel_dp->link_rate,
 			      &link_bw, &rate_select);
 
+	if (link_bw)
+		DRM_DEBUG_KMS("Using LINK_BW_SET value %02x\n", link_bw);
+	else
+		DRM_DEBUG_KMS("Using LINK_RATE_SET value %02x\n", rate_select);
+
 	/* Write the link configuration data */
 	link_config[0] = link_bw;
 	link_config[1] = intel_dp->lane_count;
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index c3de091..9e6956c 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -180,9 +180,11 @@
 	intel_dp->active_mst_links--;
 
 	intel_mst->connector = NULL;
-	if (intel_dp->active_mst_links == 0)
+	if (intel_dp->active_mst_links == 0) {
+		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
 		intel_dig_port->base.post_disable(&intel_dig_port->base,
 						  old_crtc_state, NULL);
+	}
 
 	DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links);
 }
@@ -223,7 +225,11 @@
 
 	DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links);
 
+	if (intel_dp->active_mst_links == 0)
+		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+
 	drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true);
+
 	if (intel_dp->active_mst_links == 0)
 		intel_dig_port->base.pre_enable(&intel_dig_port->base,
 						pipe_config, NULL);
diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c
index c8e9e44..00b3ab6 100644
--- a/drivers/gpu/drm/i915/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/intel_dpio_phy.c
@@ -380,13 +380,14 @@
 	 * all 1s.  Eventually they become accessible as they power up, then
 	 * the reserved bit will give the default 0.  Poll on the reserved bit
 	 * becoming 0 to find when the PHY is accessible.
-	 * HW team confirmed that the time to reach phypowergood status is
-	 * anywhere between 50 us and 100us.
+	 * The flag should get set in 100us according to the HW team, but
+	 * use 1ms due to occasional timeouts observed with that.
 	 */
-	if (wait_for_us(((I915_READ(BXT_PORT_CL1CM_DW0(phy)) &
-		(PHY_RESERVED | PHY_POWER_GOOD)) == PHY_POWER_GOOD), 100)) {
+	if (intel_wait_for_register_fw(dev_priv, BXT_PORT_CL1CM_DW0(phy),
+				       PHY_RESERVED | PHY_POWER_GOOD,
+				       PHY_POWER_GOOD,
+				       1))
 		DRM_ERROR("timeout during PHY%d power on\n", phy);
-	}
 
 	/* Program PLL Rcomp code offset */
 	val = I915_READ(BXT_PORT_CL1CM_DW9(phy));
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index 51c5ae4..383fbc1 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -118,10 +118,10 @@
 	if (WARN(!pll, "asserting DPLL %s with no DPLL\n", onoff(state)))
 		return;
 
-	cur_state = pll->funcs.get_hw_state(dev_priv, pll, &hw_state);
+	cur_state = pll->info->funcs->get_hw_state(dev_priv, pll, &hw_state);
 	I915_STATE_WARN(cur_state != state,
 	     "%s assertion failure (expected %s, current %s)\n",
-			pll->name, onoff(state), onoff(cur_state));
+			pll->info->name, onoff(state), onoff(cur_state));
 }
 
 /**
@@ -143,11 +143,11 @@
 	mutex_lock(&dev_priv->dpll_lock);
 	WARN_ON(!pll->state.crtc_mask);
 	if (!pll->active_mask) {
-		DRM_DEBUG_DRIVER("setting up %s\n", pll->name);
+		DRM_DEBUG_DRIVER("setting up %s\n", pll->info->name);
 		WARN_ON(pll->on);
 		assert_shared_dpll_disabled(dev_priv, pll);
 
-		pll->funcs.prepare(dev_priv, pll);
+		pll->info->funcs->prepare(dev_priv, pll);
 	}
 	mutex_unlock(&dev_priv->dpll_lock);
 }
@@ -179,7 +179,7 @@
 	pll->active_mask |= crtc_mask;
 
 	DRM_DEBUG_KMS("enable %s (active %x, on? %d) for crtc %d\n",
-		      pll->name, pll->active_mask, pll->on,
+		      pll->info->name, pll->active_mask, pll->on,
 		      crtc->base.base.id);
 
 	if (old_mask) {
@@ -189,8 +189,8 @@
 	}
 	WARN_ON(pll->on);
 
-	DRM_DEBUG_KMS("enabling %s\n", pll->name);
-	pll->funcs.enable(dev_priv, pll);
+	DRM_DEBUG_KMS("enabling %s\n", pll->info->name);
+	pll->info->funcs->enable(dev_priv, pll);
 	pll->on = true;
 
 out:
@@ -221,7 +221,7 @@
 		goto out;
 
 	DRM_DEBUG_KMS("disable %s (active %x, on? %d) for crtc %d\n",
-		      pll->name, pll->active_mask, pll->on,
+		      pll->info->name, pll->active_mask, pll->on,
 		      crtc->base.base.id);
 
 	assert_shared_dpll_enabled(dev_priv, pll);
@@ -231,8 +231,8 @@
 	if (pll->active_mask)
 		goto out;
 
-	DRM_DEBUG_KMS("disabling %s\n", pll->name);
-	pll->funcs.disable(dev_priv, pll);
+	DRM_DEBUG_KMS("disabling %s\n", pll->info->name);
+	pll->info->funcs->disable(dev_priv, pll);
 	pll->on = false;
 
 out:
@@ -263,7 +263,8 @@
 			   &shared_dpll[i].hw_state,
 			   sizeof(crtc_state->dpll_hw_state)) == 0) {
 			DRM_DEBUG_KMS("[CRTC:%d:%s] sharing existing %s (crtc mask 0x%08x, active %x)\n",
-				      crtc->base.base.id, crtc->base.name, pll->name,
+				      crtc->base.base.id, crtc->base.name,
+				      pll->info->name,
 				      shared_dpll[i].crtc_mask,
 				      pll->active_mask);
 			return pll;
@@ -275,7 +276,8 @@
 		pll = &dev_priv->shared_dplls[i];
 		if (shared_dpll[i].crtc_mask == 0) {
 			DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n",
-				      crtc->base.base.id, crtc->base.name, pll->name);
+				      crtc->base.base.id, crtc->base.name,
+				      pll->info->name);
 			return pll;
 		}
 	}
@@ -289,19 +291,19 @@
 {
 	struct intel_shared_dpll_state *shared_dpll;
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
-	enum intel_dpll_id i = pll->id;
+	const enum intel_dpll_id id = pll->info->id;
 
 	shared_dpll = intel_atomic_get_shared_dpll_state(crtc_state->base.state);
 
-	if (shared_dpll[i].crtc_mask == 0)
-		shared_dpll[i].hw_state =
+	if (shared_dpll[id].crtc_mask == 0)
+		shared_dpll[id].hw_state =
 			crtc_state->dpll_hw_state;
 
 	crtc_state->shared_dpll = pll;
-	DRM_DEBUG_DRIVER("using %s for pipe %c\n", pll->name,
+	DRM_DEBUG_DRIVER("using %s for pipe %c\n", pll->info->name,
 			 pipe_name(crtc->pipe));
 
-	shared_dpll[pll->id].crtc_mask |= 1 << crtc->pipe;
+	shared_dpll[id].crtc_mask |= 1 << crtc->pipe;
 }
 
 /**
@@ -341,15 +343,16 @@
 				      struct intel_shared_dpll *pll,
 				      struct intel_dpll_hw_state *hw_state)
 {
+	const enum intel_dpll_id id = pll->info->id;
 	uint32_t val;
 
 	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
 		return false;
 
-	val = I915_READ(PCH_DPLL(pll->id));
+	val = I915_READ(PCH_DPLL(id));
 	hw_state->dpll = val;
-	hw_state->fp0 = I915_READ(PCH_FP0(pll->id));
-	hw_state->fp1 = I915_READ(PCH_FP1(pll->id));
+	hw_state->fp0 = I915_READ(PCH_FP0(id));
+	hw_state->fp1 = I915_READ(PCH_FP1(id));
 
 	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
 
@@ -359,8 +362,10 @@
 static void ibx_pch_dpll_prepare(struct drm_i915_private *dev_priv,
 				 struct intel_shared_dpll *pll)
 {
-	I915_WRITE(PCH_FP0(pll->id), pll->state.hw_state.fp0);
-	I915_WRITE(PCH_FP1(pll->id), pll->state.hw_state.fp1);
+	const enum intel_dpll_id id = pll->info->id;
+
+	I915_WRITE(PCH_FP0(id), pll->state.hw_state.fp0);
+	I915_WRITE(PCH_FP1(id), pll->state.hw_state.fp1);
 }
 
 static void ibx_assert_pch_refclk_enabled(struct drm_i915_private *dev_priv)
@@ -379,13 +384,15 @@
 static void ibx_pch_dpll_enable(struct drm_i915_private *dev_priv,
 				struct intel_shared_dpll *pll)
 {
+	const enum intel_dpll_id id = pll->info->id;
+
 	/* PCH refclock must be enabled first */
 	ibx_assert_pch_refclk_enabled(dev_priv);
 
-	I915_WRITE(PCH_DPLL(pll->id), pll->state.hw_state.dpll);
+	I915_WRITE(PCH_DPLL(id), pll->state.hw_state.dpll);
 
 	/* Wait for the clocks to stabilize. */
-	POSTING_READ(PCH_DPLL(pll->id));
+	POSTING_READ(PCH_DPLL(id));
 	udelay(150);
 
 	/* The pixel multiplier can only be updated once the
@@ -393,14 +400,15 @@
 	 *
 	 * So write it again.
 	 */
-	I915_WRITE(PCH_DPLL(pll->id), pll->state.hw_state.dpll);
-	POSTING_READ(PCH_DPLL(pll->id));
+	I915_WRITE(PCH_DPLL(id), pll->state.hw_state.dpll);
+	POSTING_READ(PCH_DPLL(id));
 	udelay(200);
 }
 
 static void ibx_pch_dpll_disable(struct drm_i915_private *dev_priv,
 				 struct intel_shared_dpll *pll)
 {
+	const enum intel_dpll_id id = pll->info->id;
 	struct drm_device *dev = &dev_priv->drm;
 	struct intel_crtc *crtc;
 
@@ -410,8 +418,8 @@
 			assert_pch_transcoder_disabled(dev_priv, crtc->pipe);
 	}
 
-	I915_WRITE(PCH_DPLL(pll->id), 0);
-	POSTING_READ(PCH_DPLL(pll->id));
+	I915_WRITE(PCH_DPLL(id), 0);
+	POSTING_READ(PCH_DPLL(id));
 	udelay(200);
 }
 
@@ -429,7 +437,8 @@
 		pll = &dev_priv->shared_dplls[i];
 
 		DRM_DEBUG_KMS("[CRTC:%d:%s] using pre-allocated %s\n",
-			      crtc->base.base.id, crtc->base.name, pll->name);
+			      crtc->base.base.id, crtc->base.name,
+			      pll->info->name);
 	} else {
 		pll = intel_find_shared_dpll(crtc, crtc_state,
 					     DPLL_ID_PCH_PLL_A,
@@ -466,8 +475,10 @@
 static void hsw_ddi_wrpll_enable(struct drm_i915_private *dev_priv,
 			       struct intel_shared_dpll *pll)
 {
-	I915_WRITE(WRPLL_CTL(pll->id), pll->state.hw_state.wrpll);
-	POSTING_READ(WRPLL_CTL(pll->id));
+	const enum intel_dpll_id id = pll->info->id;
+
+	I915_WRITE(WRPLL_CTL(id), pll->state.hw_state.wrpll);
+	POSTING_READ(WRPLL_CTL(id));
 	udelay(20);
 }
 
@@ -482,11 +493,12 @@
 static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv,
 				  struct intel_shared_dpll *pll)
 {
+	const enum intel_dpll_id id = pll->info->id;
 	uint32_t val;
 
-	val = I915_READ(WRPLL_CTL(pll->id));
-	I915_WRITE(WRPLL_CTL(pll->id), val & ~WRPLL_PLL_ENABLE);
-	POSTING_READ(WRPLL_CTL(pll->id));
+	val = I915_READ(WRPLL_CTL(id));
+	I915_WRITE(WRPLL_CTL(id), val & ~WRPLL_PLL_ENABLE);
+	POSTING_READ(WRPLL_CTL(id));
 }
 
 static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv,
@@ -503,12 +515,13 @@
 				       struct intel_shared_dpll *pll,
 				       struct intel_dpll_hw_state *hw_state)
 {
+	const enum intel_dpll_id id = pll->info->id;
 	uint32_t val;
 
 	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
 		return false;
 
-	val = I915_READ(WRPLL_CTL(pll->id));
+	val = I915_READ(WRPLL_CTL(id));
 	hw_state->wrpll = val;
 
 	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
@@ -914,13 +927,15 @@
 static void skl_ddi_pll_write_ctrl1(struct drm_i915_private *dev_priv,
 				    struct intel_shared_dpll *pll)
 {
+	const enum intel_dpll_id id = pll->info->id;
 	uint32_t val;
 
 	val = I915_READ(DPLL_CTRL1);
 
-	val &= ~(DPLL_CTRL1_HDMI_MODE(pll->id) | DPLL_CTRL1_SSC(pll->id) |
-		 DPLL_CTRL1_LINK_RATE_MASK(pll->id));
-	val |= pll->state.hw_state.ctrl1 << (pll->id * 6);
+	val &= ~(DPLL_CTRL1_HDMI_MODE(id) |
+		 DPLL_CTRL1_SSC(id) |
+		 DPLL_CTRL1_LINK_RATE_MASK(id));
+	val |= pll->state.hw_state.ctrl1 << (id * 6);
 
 	I915_WRITE(DPLL_CTRL1, val);
 	POSTING_READ(DPLL_CTRL1);
@@ -930,24 +945,25 @@
 			       struct intel_shared_dpll *pll)
 {
 	const struct skl_dpll_regs *regs = skl_dpll_regs;
+	const enum intel_dpll_id id = pll->info->id;
 
 	skl_ddi_pll_write_ctrl1(dev_priv, pll);
 
-	I915_WRITE(regs[pll->id].cfgcr1, pll->state.hw_state.cfgcr1);
-	I915_WRITE(regs[pll->id].cfgcr2, pll->state.hw_state.cfgcr2);
-	POSTING_READ(regs[pll->id].cfgcr1);
-	POSTING_READ(regs[pll->id].cfgcr2);
+	I915_WRITE(regs[id].cfgcr1, pll->state.hw_state.cfgcr1);
+	I915_WRITE(regs[id].cfgcr2, pll->state.hw_state.cfgcr2);
+	POSTING_READ(regs[id].cfgcr1);
+	POSTING_READ(regs[id].cfgcr2);
 
 	/* the enable bit is always bit 31 */
-	I915_WRITE(regs[pll->id].ctl,
-		   I915_READ(regs[pll->id].ctl) | LCPLL_PLL_ENABLE);
+	I915_WRITE(regs[id].ctl,
+		   I915_READ(regs[id].ctl) | LCPLL_PLL_ENABLE);
 
 	if (intel_wait_for_register(dev_priv,
 				    DPLL_STATUS,
-				    DPLL_LOCK(pll->id),
-				    DPLL_LOCK(pll->id),
+				    DPLL_LOCK(id),
+				    DPLL_LOCK(id),
 				    5))
-		DRM_ERROR("DPLL %d not locked\n", pll->id);
+		DRM_ERROR("DPLL %d not locked\n", id);
 }
 
 static void skl_ddi_dpll0_enable(struct drm_i915_private *dev_priv,
@@ -960,11 +976,12 @@
 				struct intel_shared_dpll *pll)
 {
 	const struct skl_dpll_regs *regs = skl_dpll_regs;
+	const enum intel_dpll_id id = pll->info->id;
 
 	/* the enable bit is always bit 31 */
-	I915_WRITE(regs[pll->id].ctl,
-		   I915_READ(regs[pll->id].ctl) & ~LCPLL_PLL_ENABLE);
-	POSTING_READ(regs[pll->id].ctl);
+	I915_WRITE(regs[id].ctl,
+		   I915_READ(regs[id].ctl) & ~LCPLL_PLL_ENABLE);
+	POSTING_READ(regs[id].ctl);
 }
 
 static void skl_ddi_dpll0_disable(struct drm_i915_private *dev_priv,
@@ -978,6 +995,7 @@
 {
 	uint32_t val;
 	const struct skl_dpll_regs *regs = skl_dpll_regs;
+	const enum intel_dpll_id id = pll->info->id;
 	bool ret;
 
 	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
@@ -985,17 +1003,17 @@
 
 	ret = false;
 
-	val = I915_READ(regs[pll->id].ctl);
+	val = I915_READ(regs[id].ctl);
 	if (!(val & LCPLL_PLL_ENABLE))
 		goto out;
 
 	val = I915_READ(DPLL_CTRL1);
-	hw_state->ctrl1 = (val >> (pll->id * 6)) & 0x3f;
+	hw_state->ctrl1 = (val >> (id * 6)) & 0x3f;
 
 	/* avoid reading back stale values if HDMI mode is not enabled */
-	if (val & DPLL_CTRL1_HDMI_MODE(pll->id)) {
-		hw_state->cfgcr1 = I915_READ(regs[pll->id].cfgcr1);
-		hw_state->cfgcr2 = I915_READ(regs[pll->id].cfgcr2);
+	if (val & DPLL_CTRL1_HDMI_MODE(id)) {
+		hw_state->cfgcr1 = I915_READ(regs[id].cfgcr1);
+		hw_state->cfgcr2 = I915_READ(regs[id].cfgcr2);
 	}
 	ret = true;
 
@@ -1011,6 +1029,7 @@
 {
 	uint32_t val;
 	const struct skl_dpll_regs *regs = skl_dpll_regs;
+	const enum intel_dpll_id id = pll->info->id;
 	bool ret;
 
 	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
@@ -1019,12 +1038,12 @@
 	ret = false;
 
 	/* DPLL0 is always enabled since it drives CDCLK */
-	val = I915_READ(regs[pll->id].ctl);
+	val = I915_READ(regs[id].ctl);
 	if (WARN_ON(!(val & LCPLL_PLL_ENABLE)))
 		goto out;
 
 	val = I915_READ(DPLL_CTRL1);
-	hw_state->ctrl1 = (val >> (pll->id * 6)) & 0x3f;
+	hw_state->ctrl1 = (val >> (id * 6)) & 0x3f;
 
 	ret = true;
 
@@ -1424,7 +1443,7 @@
 				struct intel_shared_dpll *pll)
 {
 	uint32_t temp;
-	enum port port = (enum port)pll->id;	/* 1:1 port->PLL mapping */
+	enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */
 	enum dpio_phy phy;
 	enum dpio_channel ch;
 
@@ -1543,7 +1562,7 @@
 static void bxt_ddi_pll_disable(struct drm_i915_private *dev_priv,
 					struct intel_shared_dpll *pll)
 {
-	enum port port = (enum port)pll->id;	/* 1:1 port->PLL mapping */
+	enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */
 	uint32_t temp;
 
 	temp = I915_READ(BXT_PORT_PLL_ENABLE(port));
@@ -1566,7 +1585,7 @@
 					struct intel_shared_dpll *pll,
 					struct intel_dpll_hw_state *hw_state)
 {
-	enum port port = (enum port)pll->id;	/* 1:1 port->PLL mapping */
+	enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */
 	uint32_t val;
 	bool ret;
 	enum dpio_phy phy;
@@ -1824,7 +1843,7 @@
 	pll = intel_get_shared_dpll_by_id(dev_priv, i);
 
 	DRM_DEBUG_KMS("[CRTC:%d:%s] using pre-allocated %s\n",
-		      crtc->base.base.id, crtc->base.name, pll->name);
+		      crtc->base.base.id, crtc->base.name, pll->info->name);
 
 	intel_reference_shared_dpll(pll, crtc_state);
 
@@ -1877,13 +1896,6 @@
 	}
 }
 
-struct dpll_info {
-	const char *name;
-	const int id;
-	const struct intel_shared_dpll_funcs *funcs;
-	uint32_t flags;
-};
-
 struct intel_dpll_mgr {
 	const struct dpll_info *dpll_info;
 
@@ -1896,9 +1908,9 @@
 };
 
 static const struct dpll_info pch_plls[] = {
-	{ "PCH DPLL A", DPLL_ID_PCH_PLL_A, &ibx_pch_dpll_funcs, 0 },
-	{ "PCH DPLL B", DPLL_ID_PCH_PLL_B, &ibx_pch_dpll_funcs, 0 },
-	{ NULL, -1, NULL, 0 },
+	{ "PCH DPLL A", &ibx_pch_dpll_funcs, DPLL_ID_PCH_PLL_A, 0 },
+	{ "PCH DPLL B", &ibx_pch_dpll_funcs, DPLL_ID_PCH_PLL_B, 0 },
+	{ },
 };
 
 static const struct intel_dpll_mgr pch_pll_mgr = {
@@ -1908,13 +1920,13 @@
 };
 
 static const struct dpll_info hsw_plls[] = {
-	{ "WRPLL 1",    DPLL_ID_WRPLL1,     &hsw_ddi_wrpll_funcs, 0 },
-	{ "WRPLL 2",    DPLL_ID_WRPLL2,     &hsw_ddi_wrpll_funcs, 0 },
-	{ "SPLL",       DPLL_ID_SPLL,       &hsw_ddi_spll_funcs,  0 },
-	{ "LCPLL 810",  DPLL_ID_LCPLL_810,  &hsw_ddi_lcpll_funcs, INTEL_DPLL_ALWAYS_ON },
-	{ "LCPLL 1350", DPLL_ID_LCPLL_1350, &hsw_ddi_lcpll_funcs, INTEL_DPLL_ALWAYS_ON },
-	{ "LCPLL 2700", DPLL_ID_LCPLL_2700, &hsw_ddi_lcpll_funcs, INTEL_DPLL_ALWAYS_ON },
-	{ NULL, -1, NULL, },
+	{ "WRPLL 1",    &hsw_ddi_wrpll_funcs, DPLL_ID_WRPLL1,     0 },
+	{ "WRPLL 2",    &hsw_ddi_wrpll_funcs, DPLL_ID_WRPLL2,     0 },
+	{ "SPLL",       &hsw_ddi_spll_funcs,  DPLL_ID_SPLL,       0 },
+	{ "LCPLL 810",  &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_810,  INTEL_DPLL_ALWAYS_ON },
+	{ "LCPLL 1350", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_1350, INTEL_DPLL_ALWAYS_ON },
+	{ "LCPLL 2700", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_2700, INTEL_DPLL_ALWAYS_ON },
+	{ },
 };
 
 static const struct intel_dpll_mgr hsw_pll_mgr = {
@@ -1924,11 +1936,11 @@
 };
 
 static const struct dpll_info skl_plls[] = {
-	{ "DPLL 0", DPLL_ID_SKL_DPLL0, &skl_ddi_dpll0_funcs, INTEL_DPLL_ALWAYS_ON },
-	{ "DPLL 1", DPLL_ID_SKL_DPLL1, &skl_ddi_pll_funcs,   0 },
-	{ "DPLL 2", DPLL_ID_SKL_DPLL2, &skl_ddi_pll_funcs,   0 },
-	{ "DPLL 3", DPLL_ID_SKL_DPLL3, &skl_ddi_pll_funcs,   0 },
-	{ NULL, -1, NULL, },
+	{ "DPLL 0", &skl_ddi_dpll0_funcs, DPLL_ID_SKL_DPLL0, INTEL_DPLL_ALWAYS_ON },
+	{ "DPLL 1", &skl_ddi_pll_funcs,   DPLL_ID_SKL_DPLL1, 0 },
+	{ "DPLL 2", &skl_ddi_pll_funcs,   DPLL_ID_SKL_DPLL2, 0 },
+	{ "DPLL 3", &skl_ddi_pll_funcs,   DPLL_ID_SKL_DPLL3, 0 },
+	{ },
 };
 
 static const struct intel_dpll_mgr skl_pll_mgr = {
@@ -1938,10 +1950,10 @@
 };
 
 static const struct dpll_info bxt_plls[] = {
-	{ "PORT PLL A", DPLL_ID_SKL_DPLL0, &bxt_ddi_pll_funcs, 0 },
-	{ "PORT PLL B", DPLL_ID_SKL_DPLL1, &bxt_ddi_pll_funcs, 0 },
-	{ "PORT PLL C", DPLL_ID_SKL_DPLL2, &bxt_ddi_pll_funcs, 0 },
-	{ NULL, -1, NULL, },
+	{ "PORT PLL A", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL0, 0 },
+	{ "PORT PLL B", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 },
+	{ "PORT PLL C", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 },
+	{ },
 };
 
 static const struct intel_dpll_mgr bxt_pll_mgr = {
@@ -1953,38 +1965,39 @@
 static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv,
 			       struct intel_shared_dpll *pll)
 {
+	const enum intel_dpll_id id = pll->info->id;
 	uint32_t val;
 
 	/* 1. Enable DPLL power in DPLL_ENABLE. */
-	val = I915_READ(CNL_DPLL_ENABLE(pll->id));
+	val = I915_READ(CNL_DPLL_ENABLE(id));
 	val |= PLL_POWER_ENABLE;
-	I915_WRITE(CNL_DPLL_ENABLE(pll->id), val);
+	I915_WRITE(CNL_DPLL_ENABLE(id), val);
 
 	/* 2. Wait for DPLL power state enabled in DPLL_ENABLE. */
 	if (intel_wait_for_register(dev_priv,
-				    CNL_DPLL_ENABLE(pll->id),
+				    CNL_DPLL_ENABLE(id),
 				    PLL_POWER_STATE,
 				    PLL_POWER_STATE,
 				    5))
-		DRM_ERROR("PLL %d Power not enabled\n", pll->id);
+		DRM_ERROR("PLL %d Power not enabled\n", id);
 
 	/*
 	 * 3. Configure DPLL_CFGCR0 to set SSC enable/disable,
 	 * select DP mode, and set DP link rate.
 	 */
 	val = pll->state.hw_state.cfgcr0;
-	I915_WRITE(CNL_DPLL_CFGCR0(pll->id), val);
+	I915_WRITE(CNL_DPLL_CFGCR0(id), val);
 
 	/* 4. Reab back to ensure writes completed */
-	POSTING_READ(CNL_DPLL_CFGCR0(pll->id));
+	POSTING_READ(CNL_DPLL_CFGCR0(id));
 
 	/* 3. Configure DPLL_CFGCR0 */
 	/* Avoid touch CFGCR1 if HDMI mode is not enabled */
 	if (pll->state.hw_state.cfgcr0 & DPLL_CFGCR0_HDMI_MODE) {
 		val = pll->state.hw_state.cfgcr1;
-		I915_WRITE(CNL_DPLL_CFGCR1(pll->id), val);
+		I915_WRITE(CNL_DPLL_CFGCR1(id), val);
 		/* 4. Reab back to ensure writes completed */
-		POSTING_READ(CNL_DPLL_CFGCR1(pll->id));
+		POSTING_READ(CNL_DPLL_CFGCR1(id));
 	}
 
 	/*
@@ -1997,17 +2010,17 @@
 	 */
 
 	/* 6. Enable DPLL in DPLL_ENABLE. */
-	val = I915_READ(CNL_DPLL_ENABLE(pll->id));
+	val = I915_READ(CNL_DPLL_ENABLE(id));
 	val |= PLL_ENABLE;
-	I915_WRITE(CNL_DPLL_ENABLE(pll->id), val);
+	I915_WRITE(CNL_DPLL_ENABLE(id), val);
 
 	/* 7. Wait for PLL lock status in DPLL_ENABLE. */
 	if (intel_wait_for_register(dev_priv,
-				    CNL_DPLL_ENABLE(pll->id),
+				    CNL_DPLL_ENABLE(id),
 				    PLL_LOCK,
 				    PLL_LOCK,
 				    5))
-		DRM_ERROR("PLL %d not locked\n", pll->id);
+		DRM_ERROR("PLL %d not locked\n", id);
 
 	/*
 	 * 8. If the frequency will result in a change to the voltage
@@ -2027,6 +2040,7 @@
 static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv,
 				struct intel_shared_dpll *pll)
 {
+	const enum intel_dpll_id id = pll->info->id;
 	uint32_t val;
 
 	/*
@@ -2044,17 +2058,17 @@
 	 */
 
 	/* 3. Disable DPLL through DPLL_ENABLE. */
-	val = I915_READ(CNL_DPLL_ENABLE(pll->id));
+	val = I915_READ(CNL_DPLL_ENABLE(id));
 	val &= ~PLL_ENABLE;
-	I915_WRITE(CNL_DPLL_ENABLE(pll->id), val);
+	I915_WRITE(CNL_DPLL_ENABLE(id), val);
 
 	/* 4. Wait for PLL not locked status in DPLL_ENABLE. */
 	if (intel_wait_for_register(dev_priv,
-				    CNL_DPLL_ENABLE(pll->id),
+				    CNL_DPLL_ENABLE(id),
 				    PLL_LOCK,
 				    0,
 				    5))
-		DRM_ERROR("PLL %d locked\n", pll->id);
+		DRM_ERROR("PLL %d locked\n", id);
 
 	/*
 	 * 5. If the frequency will result in a change to the voltage
@@ -2066,23 +2080,24 @@
 	 */
 
 	/* 6. Disable DPLL power in DPLL_ENABLE. */
-	val = I915_READ(CNL_DPLL_ENABLE(pll->id));
+	val = I915_READ(CNL_DPLL_ENABLE(id));
 	val &= ~PLL_POWER_ENABLE;
-	I915_WRITE(CNL_DPLL_ENABLE(pll->id), val);
+	I915_WRITE(CNL_DPLL_ENABLE(id), val);
 
 	/* 7. Wait for DPLL power state disabled in DPLL_ENABLE. */
 	if (intel_wait_for_register(dev_priv,
-				    CNL_DPLL_ENABLE(pll->id),
+				    CNL_DPLL_ENABLE(id),
 				    PLL_POWER_STATE,
 				    0,
 				    5))
-		DRM_ERROR("PLL %d Power not disabled\n", pll->id);
+		DRM_ERROR("PLL %d Power not disabled\n", id);
 }
 
 static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
 				     struct intel_shared_dpll *pll,
 				     struct intel_dpll_hw_state *hw_state)
 {
+	const enum intel_dpll_id id = pll->info->id;
 	uint32_t val;
 	bool ret;
 
@@ -2091,16 +2106,16 @@
 
 	ret = false;
 
-	val = I915_READ(CNL_DPLL_ENABLE(pll->id));
+	val = I915_READ(CNL_DPLL_ENABLE(id));
 	if (!(val & PLL_ENABLE))
 		goto out;
 
-	val = I915_READ(CNL_DPLL_CFGCR0(pll->id));
+	val = I915_READ(CNL_DPLL_CFGCR0(id));
 	hw_state->cfgcr0 = val;
 
 	/* avoid reading back stale values if HDMI mode is not enabled */
 	if (val & DPLL_CFGCR0_HDMI_MODE) {
-		hw_state->cfgcr1 = I915_READ(CNL_DPLL_CFGCR1(pll->id));
+		hw_state->cfgcr1 = I915_READ(CNL_DPLL_CFGCR1(id));
 	}
 	ret = true;
 
@@ -2203,6 +2218,7 @@
 			struct skl_wrpll_params *wrpll_params)
 {
 	u32 afe_clock = clock * 5;
+	uint32_t ref_clock;
 	u32 dco_min = 7998000;
 	u32 dco_max = 10000000;
 	u32 dco_mid = (dco_min + dco_max) / 2;
@@ -2235,8 +2251,17 @@
 
 	cnl_wrpll_get_multipliers(best_div, &pdiv, &qdiv, &kdiv);
 
-	cnl_wrpll_params_populate(wrpll_params, best_dco,
-				  dev_priv->cdclk.hw.ref, pdiv, qdiv, kdiv);
+	ref_clock = dev_priv->cdclk.hw.ref;
+
+	/*
+	 * For ICL, the spec states: if reference frequency is 38.4, use 19.2
+	 * because the DPLL automatically divides that by 2.
+	 */
+	if (IS_ICELAKE(dev_priv) && ref_clock == 38400)
+		ref_clock = 19200;
+
+	cnl_wrpll_params_populate(wrpll_params, best_dco, ref_clock, pdiv, qdiv,
+				  kdiv);
 
 	return true;
 }
@@ -2372,10 +2397,10 @@
 };
 
 static const struct dpll_info cnl_plls[] = {
-	{ "DPLL 0", DPLL_ID_SKL_DPLL0, &cnl_ddi_pll_funcs, 0 },
-	{ "DPLL 1", DPLL_ID_SKL_DPLL1, &cnl_ddi_pll_funcs, 0 },
-	{ "DPLL 2", DPLL_ID_SKL_DPLL2, &cnl_ddi_pll_funcs, 0 },
-	{ NULL, -1, NULL, },
+	{ "DPLL 0", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL0, 0 },
+	{ "DPLL 1", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 },
+	{ "DPLL 2", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 },
+	{ },
 };
 
 static const struct intel_dpll_mgr cnl_pll_mgr = {
@@ -2384,6 +2409,644 @@
 	.dump_hw_state = cnl_dump_hw_state,
 };
 
+/*
+ * These values alrea already adjusted: they're the bits we write to the
+ * registers, not the logical values.
+ */
+static const struct skl_wrpll_params icl_dp_combo_pll_24MHz_values[] = {
+	{ .dco_integer = 0x151, .dco_fraction = 0x4000,		/* [0]: 5.4 */
+	  .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0},
+	{ .dco_integer = 0x151, .dco_fraction = 0x4000,		/* [1]: 2.7 */
+	  .pdiv = 0x2 /* 3 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0},
+	{ .dco_integer = 0x151, .dco_fraction = 0x4000,		/* [2]: 1.62 */
+	  .pdiv = 0x4 /* 5 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0},
+	{ .dco_integer = 0x151, .dco_fraction = 0x4000,		/* [3]: 3.24 */
+	  .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0},
+	{ .dco_integer = 0x168, .dco_fraction = 0x0000,		/* [4]: 2.16 */
+	  .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 1, .qdiv_ratio = 2},
+	{ .dco_integer = 0x168, .dco_fraction = 0x0000,		/* [5]: 4.32 */
+	  .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0},
+	{ .dco_integer = 0x195, .dco_fraction = 0x0000,		/* [6]: 6.48 */
+	  .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0},
+	{ .dco_integer = 0x151, .dco_fraction = 0x4000,		/* [7]: 8.1 */
+	  .pdiv = 0x1 /* 2 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0},
+};
+
+/* Also used for 38.4 MHz values. */
+static const struct skl_wrpll_params icl_dp_combo_pll_19_2MHz_values[] = {
+	{ .dco_integer = 0x1A5, .dco_fraction = 0x7000,		/* [0]: 5.4 */
+	  .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0},
+	{ .dco_integer = 0x1A5, .dco_fraction = 0x7000,		/* [1]: 2.7 */
+	  .pdiv = 0x2 /* 3 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0},
+	{ .dco_integer = 0x1A5, .dco_fraction = 0x7000,		/* [2]: 1.62 */
+	  .pdiv = 0x4 /* 5 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0},
+	{ .dco_integer = 0x1A5, .dco_fraction = 0x7000,		/* [3]: 3.24 */
+	  .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0},
+	{ .dco_integer = 0x1C2, .dco_fraction = 0x0000,		/* [4]: 2.16 */
+	  .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 1, .qdiv_ratio = 2},
+	{ .dco_integer = 0x1C2, .dco_fraction = 0x0000,		/* [5]: 4.32 */
+	  .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0},
+	{ .dco_integer = 0x1FA, .dco_fraction = 0x2000,		/* [6]: 6.48 */
+	  .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0},
+	{ .dco_integer = 0x1A5, .dco_fraction = 0x7000,		/* [7]: 8.1 */
+	  .pdiv = 0x1 /* 2 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0},
+};
+
+static bool icl_calc_dp_combo_pll(struct drm_i915_private *dev_priv, int clock,
+				  struct skl_wrpll_params *pll_params)
+{
+	const struct skl_wrpll_params *params;
+
+	params = dev_priv->cdclk.hw.ref == 24000 ?
+			icl_dp_combo_pll_24MHz_values :
+			icl_dp_combo_pll_19_2MHz_values;
+
+	switch (clock) {
+	case 540000:
+		*pll_params = params[0];
+		break;
+	case 270000:
+		*pll_params = params[1];
+		break;
+	case 162000:
+		*pll_params = params[2];
+		break;
+	case 324000:
+		*pll_params = params[3];
+		break;
+	case 216000:
+		*pll_params = params[4];
+		break;
+	case 432000:
+		*pll_params = params[5];
+		break;
+	case 648000:
+		*pll_params = params[6];
+		break;
+	case 810000:
+		*pll_params = params[7];
+		break;
+	default:
+		MISSING_CASE(clock);
+		return false;
+	}
+
+	return true;
+}
+
+static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state,
+				struct intel_encoder *encoder, int clock,
+				struct intel_dpll_hw_state *pll_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	uint32_t cfgcr0, cfgcr1;
+	struct skl_wrpll_params pll_params = { 0 };
+	bool ret;
+
+	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI))
+		ret = cnl_ddi_calculate_wrpll(clock, dev_priv, &pll_params);
+	else
+		ret = icl_calc_dp_combo_pll(dev_priv, clock, &pll_params);
+
+	if (!ret)
+		return false;
+
+	cfgcr0 = DPLL_CFGCR0_DCO_FRACTION(pll_params.dco_fraction) |
+		 pll_params.dco_integer;
+
+	cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(pll_params.qdiv_ratio) |
+		 DPLL_CFGCR1_QDIV_MODE(pll_params.qdiv_mode) |
+		 DPLL_CFGCR1_KDIV(pll_params.kdiv) |
+		 DPLL_CFGCR1_PDIV(pll_params.pdiv) |
+		 DPLL_CFGCR1_CENTRAL_FREQ_8400;
+
+	pll_state->cfgcr0 = cfgcr0;
+	pll_state->cfgcr1 = cfgcr1;
+	return true;
+}
+
+static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id)
+{
+	return id - DPLL_ID_ICL_MGPLL1 + PORT_C;
+}
+
+static enum intel_dpll_id icl_port_to_mg_pll_id(enum port port)
+{
+	return port - PORT_C + DPLL_ID_ICL_MGPLL1;
+}
+
+static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc,
+				     uint32_t *target_dco_khz,
+				     struct intel_dpll_hw_state *state)
+{
+	uint32_t dco_min_freq, dco_max_freq;
+	int div1_vals[] = {7, 5, 3, 2};
+	unsigned int i;
+	int div2;
+
+	dco_min_freq = is_dp ? 8100000 : use_ssc ? 8000000 : 7992000;
+	dco_max_freq = is_dp ? 8100000 : 10000000;
+
+	for (i = 0; i < ARRAY_SIZE(div1_vals); i++) {
+		int div1 = div1_vals[i];
+
+		for (div2 = 10; div2 > 0; div2--) {
+			int dco = div1 * div2 * clock_khz * 5;
+			int a_divratio, tlinedrv, inputsel, hsdiv;
+
+			if (dco < dco_min_freq || dco > dco_max_freq)
+				continue;
+
+			if (div2 >= 2) {
+				a_divratio = is_dp ? 10 : 5;
+				tlinedrv = 2;
+			} else {
+				a_divratio = 5;
+				tlinedrv = 0;
+			}
+			inputsel = is_dp ? 0 : 1;
+
+			switch (div1) {
+			default:
+				MISSING_CASE(div1);
+			case 2:
+				hsdiv = 0;
+				break;
+			case 3:
+				hsdiv = 1;
+				break;
+			case 5:
+				hsdiv = 2;
+				break;
+			case 7:
+				hsdiv = 3;
+				break;
+			}
+
+			*target_dco_khz = dco;
+
+			state->mg_refclkin_ctl = MG_REFCLKIN_CTL_OD_2_MUX(1);
+
+			state->mg_clktop2_coreclkctl1 =
+				MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(a_divratio);
+
+			state->mg_clktop2_hsclkctl =
+				MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(tlinedrv) |
+				MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(inputsel) |
+				MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(hsdiv) |
+				MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(div2);
+
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/*
+ * The specification for this function uses real numbers, so the math had to be
+ * adapted to integer-only calculation, that's why it looks so different.
+ */
+static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state,
+				  struct intel_encoder *encoder, int clock,
+				  struct intel_dpll_hw_state *pll_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	int refclk_khz = dev_priv->cdclk.hw.ref;
+	uint32_t dco_khz, m1div, m2div_int, m2div_rem, m2div_frac;
+	uint32_t iref_ndiv, iref_trim, iref_pulse_w;
+	uint32_t prop_coeff, int_coeff;
+	uint32_t tdc_targetcnt, feedfwgain;
+	uint64_t ssc_stepsize, ssc_steplen, ssc_steplog;
+	uint64_t tmp;
+	bool use_ssc = false;
+	bool is_dp = !intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI);
+
+	if (!icl_mg_pll_find_divisors(clock, is_dp, use_ssc, &dco_khz,
+				      pll_state)) {
+		DRM_DEBUG_KMS("Failed to find divisors for clock %d\n", clock);
+		return false;
+	}
+
+	m1div = 2;
+	m2div_int = dco_khz / (refclk_khz * m1div);
+	if (m2div_int > 255) {
+		m1div = 4;
+		m2div_int = dco_khz / (refclk_khz * m1div);
+		if (m2div_int > 255) {
+			DRM_DEBUG_KMS("Failed to find mdiv for clock %d\n",
+				      clock);
+			return false;
+		}
+	}
+	m2div_rem = dco_khz % (refclk_khz * m1div);
+
+	tmp = (uint64_t)m2div_rem * (1 << 22);
+	do_div(tmp, refclk_khz * m1div);
+	m2div_frac = tmp;
+
+	switch (refclk_khz) {
+	case 19200:
+		iref_ndiv = 1;
+		iref_trim = 28;
+		iref_pulse_w = 1;
+		break;
+	case 24000:
+		iref_ndiv = 1;
+		iref_trim = 25;
+		iref_pulse_w = 2;
+		break;
+	case 38400:
+		iref_ndiv = 2;
+		iref_trim = 28;
+		iref_pulse_w = 1;
+		break;
+	default:
+		MISSING_CASE(refclk_khz);
+		return false;
+	}
+
+	/*
+	 * tdc_res = 0.000003
+	 * tdc_targetcnt = int(2 / (tdc_res * 8 * 50 * 1.1) / refclk_mhz + 0.5)
+	 *
+	 * The multiplication by 1000 is due to refclk MHz to KHz conversion. It
+	 * was supposed to be a division, but we rearranged the operations of
+	 * the formula to avoid early divisions so we don't multiply the
+	 * rounding errors.
+	 *
+	 * 0.000003 * 8 * 50 * 1.1 = 0.00132, also known as 132 / 100000, which
+	 * we also rearrange to work with integers.
+	 *
+	 * The 0.5 transformed to 5 results in a multiplication by 10 and the
+	 * last division by 10.
+	 */
+	tdc_targetcnt = (2 * 1000 * 100000 * 10 / (132 * refclk_khz) + 5) / 10;
+
+	/*
+	 * Here we divide dco_khz by 10 in order to allow the dividend to fit in
+	 * 32 bits. That's not a problem since we round the division down
+	 * anyway.
+	 */
+	feedfwgain = (use_ssc || m2div_rem > 0) ?
+		m1div * 1000000 * 100 / (dco_khz * 3 / 10) : 0;
+
+	if (dco_khz >= 9000000) {
+		prop_coeff = 5;
+		int_coeff = 10;
+	} else {
+		prop_coeff = 4;
+		int_coeff = 8;
+	}
+
+	if (use_ssc) {
+		tmp = (uint64_t)dco_khz * 47 * 32;
+		do_div(tmp, refclk_khz * m1div * 10000);
+		ssc_stepsize = tmp;
+
+		tmp = (uint64_t)dco_khz * 1000;
+		ssc_steplen = DIV_ROUND_UP_ULL(tmp, 32 * 2 * 32);
+	} else {
+		ssc_stepsize = 0;
+		ssc_steplen = 0;
+	}
+	ssc_steplog = 4;
+
+	pll_state->mg_pll_div0 = (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) |
+				  MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) |
+				  MG_PLL_DIV0_FBDIV_INT(m2div_int);
+
+	pll_state->mg_pll_div1 = MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) |
+				 MG_PLL_DIV1_DITHER_DIV_2 |
+				 MG_PLL_DIV1_NDIVRATIO(1) |
+				 MG_PLL_DIV1_FBPREDIV(m1div);
+
+	pll_state->mg_pll_lf = MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) |
+			       MG_PLL_LF_AFCCNTSEL_512 |
+			       MG_PLL_LF_GAINCTRL(1) |
+			       MG_PLL_LF_INT_COEFF(int_coeff) |
+			       MG_PLL_LF_PROP_COEFF(prop_coeff);
+
+	pll_state->mg_pll_frac_lock = MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 |
+				      MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 |
+				      MG_PLL_FRAC_LOCK_LOCKTHRESH(10) |
+				      MG_PLL_FRAC_LOCK_DCODITHEREN |
+				      MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain);
+	if (use_ssc || m2div_rem > 0)
+		pll_state->mg_pll_frac_lock |= MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN;
+
+	pll_state->mg_pll_ssc = (use_ssc ? MG_PLL_SSC_EN : 0) |
+				MG_PLL_SSC_TYPE(2) |
+				MG_PLL_SSC_STEPLENGTH(ssc_steplen) |
+				MG_PLL_SSC_STEPNUM(ssc_steplog) |
+				MG_PLL_SSC_FLLEN |
+				MG_PLL_SSC_STEPSIZE(ssc_stepsize);
+
+	pll_state->mg_pll_tdc_coldst_bias = MG_PLL_TDC_COLDST_COLDSTART;
+
+	if (refclk_khz != 38400) {
+		pll_state->mg_pll_tdc_coldst_bias |=
+			MG_PLL_TDC_COLDST_IREFINT_EN |
+			MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) |
+			MG_PLL_TDC_COLDST_COLDSTART |
+			MG_PLL_TDC_TDCOVCCORR_EN |
+			MG_PLL_TDC_TDCSEL(3);
+
+		pll_state->mg_pll_bias = MG_PLL_BIAS_BIAS_GB_SEL(3) |
+					 MG_PLL_BIAS_INIT_DCOAMP(0x3F) |
+					 MG_PLL_BIAS_BIAS_BONUS(10) |
+					 MG_PLL_BIAS_BIASCAL_EN |
+					 MG_PLL_BIAS_CTRIM(12) |
+					 MG_PLL_BIAS_VREF_RDAC(4) |
+					 MG_PLL_BIAS_IREFTRIM(iref_trim);
+	}
+
+	return true;
+}
+
+static struct intel_shared_dpll *
+icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state,
+	     struct intel_encoder *encoder)
+{
+	struct intel_shared_dpll *pll;
+	struct intel_dpll_hw_state pll_state = {};
+	enum port port = encoder->port;
+	enum intel_dpll_id min, max;
+	int clock = crtc_state->port_clock;
+	bool ret;
+
+	switch (port) {
+	case PORT_A:
+	case PORT_B:
+		min = DPLL_ID_ICL_DPLL0;
+		max = DPLL_ID_ICL_DPLL1;
+		ret = icl_calc_dpll_state(crtc_state, encoder, clock,
+					  &pll_state);
+		break;
+	case PORT_C:
+	case PORT_D:
+	case PORT_E:
+	case PORT_F:
+		min = icl_port_to_mg_pll_id(port);
+		max = min;
+		ret = icl_calc_mg_pll_state(crtc_state, encoder, clock,
+					    &pll_state);
+		break;
+	default:
+		MISSING_CASE(port);
+		return NULL;
+	}
+
+	if (!ret) {
+		DRM_DEBUG_KMS("Could not calculate PLL state.\n");
+		return NULL;
+	}
+
+	crtc_state->dpll_hw_state = pll_state;
+
+	pll = intel_find_shared_dpll(crtc, crtc_state, min, max);
+	if (!pll) {
+		DRM_DEBUG_KMS("No PLL selected\n");
+		return NULL;
+	}
+
+	intel_reference_shared_dpll(pll, crtc_state);
+
+	return pll;
+}
+
+static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id)
+{
+	switch (id) {
+	default:
+		MISSING_CASE(id);
+	case DPLL_ID_ICL_DPLL0:
+	case DPLL_ID_ICL_DPLL1:
+		return CNL_DPLL_ENABLE(id);
+	case DPLL_ID_ICL_MGPLL1:
+	case DPLL_ID_ICL_MGPLL2:
+	case DPLL_ID_ICL_MGPLL3:
+	case DPLL_ID_ICL_MGPLL4:
+		return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id));
+	}
+}
+
+static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
+				 struct intel_shared_dpll *pll,
+				 struct intel_dpll_hw_state *hw_state)
+{
+	const enum intel_dpll_id id = pll->info->id;
+	uint32_t val;
+	enum port port;
+	bool ret = false;
+
+	if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
+		return false;
+
+	val = I915_READ(icl_pll_id_to_enable_reg(id));
+	if (!(val & PLL_ENABLE))
+		goto out;
+
+	switch (id) {
+	case DPLL_ID_ICL_DPLL0:
+	case DPLL_ID_ICL_DPLL1:
+		hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id));
+		hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id));
+		break;
+	case DPLL_ID_ICL_MGPLL1:
+	case DPLL_ID_ICL_MGPLL2:
+	case DPLL_ID_ICL_MGPLL3:
+	case DPLL_ID_ICL_MGPLL4:
+		port = icl_mg_pll_id_to_port(id);
+		hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port));
+		hw_state->mg_clktop2_coreclkctl1 =
+			I915_READ(MG_CLKTOP2_CORECLKCTL1(port));
+		hw_state->mg_clktop2_hsclkctl =
+			I915_READ(MG_CLKTOP2_HSCLKCTL(port));
+		hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port));
+		hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port));
+		hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port));
+		hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port));
+		hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port));
+		hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port));
+		hw_state->mg_pll_tdc_coldst_bias =
+			I915_READ(MG_PLL_TDC_COLDST_BIAS(port));
+		break;
+	default:
+		MISSING_CASE(id);
+	}
+
+	ret = true;
+out:
+	intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+	return ret;
+}
+
+static void icl_dpll_write(struct drm_i915_private *dev_priv,
+			   struct intel_shared_dpll *pll)
+{
+	struct intel_dpll_hw_state *hw_state = &pll->state.hw_state;
+	const enum intel_dpll_id id = pll->info->id;
+
+	I915_WRITE(ICL_DPLL_CFGCR0(id), hw_state->cfgcr0);
+	I915_WRITE(ICL_DPLL_CFGCR1(id), hw_state->cfgcr1);
+	POSTING_READ(ICL_DPLL_CFGCR1(id));
+}
+
+static void icl_mg_pll_write(struct drm_i915_private *dev_priv,
+			     struct intel_shared_dpll *pll)
+{
+	struct intel_dpll_hw_state *hw_state = &pll->state.hw_state;
+	enum port port = icl_mg_pll_id_to_port(pll->info->id);
+
+	I915_WRITE(MG_REFCLKIN_CTL(port), hw_state->mg_refclkin_ctl);
+	I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port),
+		   hw_state->mg_clktop2_coreclkctl1);
+	I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), hw_state->mg_clktop2_hsclkctl);
+	I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0);
+	I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1);
+	I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf);
+	I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock);
+	I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc);
+	I915_WRITE(MG_PLL_BIAS(port), hw_state->mg_pll_bias);
+	I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port),
+		   hw_state->mg_pll_tdc_coldst_bias);
+	POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port));
+}
+
+static void icl_pll_enable(struct drm_i915_private *dev_priv,
+			   struct intel_shared_dpll *pll)
+{
+	const enum intel_dpll_id id = pll->info->id;
+	i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id);
+	uint32_t val;
+
+	val = I915_READ(enable_reg);
+	val |= PLL_POWER_ENABLE;
+	I915_WRITE(enable_reg, val);
+
+	/*
+	 * The spec says we need to "wait" but it also says it should be
+	 * immediate.
+	 */
+	if (intel_wait_for_register(dev_priv, enable_reg, PLL_POWER_STATE,
+				    PLL_POWER_STATE, 1))
+		DRM_ERROR("PLL %d Power not enabled\n", id);
+
+	switch (id) {
+	case DPLL_ID_ICL_DPLL0:
+	case DPLL_ID_ICL_DPLL1:
+		icl_dpll_write(dev_priv, pll);
+		break;
+	case DPLL_ID_ICL_MGPLL1:
+	case DPLL_ID_ICL_MGPLL2:
+	case DPLL_ID_ICL_MGPLL3:
+	case DPLL_ID_ICL_MGPLL4:
+		icl_mg_pll_write(dev_priv, pll);
+		break;
+	default:
+		MISSING_CASE(id);
+	}
+
+	/*
+	 * DVFS pre sequence would be here, but in our driver the cdclk code
+	 * paths should already be setting the appropriate voltage, hence we do
+	 * nothign here.
+	 */
+
+	val = I915_READ(enable_reg);
+	val |= PLL_ENABLE;
+	I915_WRITE(enable_reg, val);
+
+	if (intel_wait_for_register(dev_priv, enable_reg, PLL_LOCK, PLL_LOCK,
+				    1)) /* 600us actually. */
+		DRM_ERROR("PLL %d not locked\n", id);
+
+	/* DVFS post sequence would be here. See the comment above. */
+}
+
+static void icl_pll_disable(struct drm_i915_private *dev_priv,
+			    struct intel_shared_dpll *pll)
+{
+	const enum intel_dpll_id id = pll->info->id;
+	i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id);
+	uint32_t val;
+
+	/* The first steps are done by intel_ddi_post_disable(). */
+
+	/*
+	 * DVFS pre sequence would be here, but in our driver the cdclk code
+	 * paths should already be setting the appropriate voltage, hence we do
+	 * nothign here.
+	 */
+
+	val = I915_READ(enable_reg);
+	val &= ~PLL_ENABLE;
+	I915_WRITE(enable_reg, val);
+
+	/* Timeout is actually 1us. */
+	if (intel_wait_for_register(dev_priv, enable_reg, PLL_LOCK, 0, 1))
+		DRM_ERROR("PLL %d locked\n", id);
+
+	/* DVFS post sequence would be here. See the comment above. */
+
+	val = I915_READ(enable_reg);
+	val &= ~PLL_POWER_ENABLE;
+	I915_WRITE(enable_reg, val);
+
+	/*
+	 * The spec says we need to "wait" but it also says it should be
+	 * immediate.
+	 */
+	if (intel_wait_for_register(dev_priv, enable_reg, PLL_POWER_STATE, 0,
+				    1))
+		DRM_ERROR("PLL %d Power not disabled\n", id);
+}
+
+static void icl_dump_hw_state(struct drm_i915_private *dev_priv,
+			      struct intel_dpll_hw_state *hw_state)
+{
+	DRM_DEBUG_KMS("dpll_hw_state: cfgcr0: 0x%x, cfgcr1: 0x%x, "
+		      "mg_refclkin_ctl: 0x%x, hg_clktop2_coreclkctl1: 0x%x, "
+		      "mg_clktop2_hsclkctl: 0x%x, mg_pll_div0: 0x%x, "
+		      "mg_pll_div2: 0x%x, mg_pll_lf: 0x%x, "
+		      "mg_pll_frac_lock: 0x%x, mg_pll_ssc: 0x%x, "
+		      "mg_pll_bias: 0x%x, mg_pll_tdc_coldst_bias: 0x%x\n",
+		      hw_state->cfgcr0, hw_state->cfgcr1,
+		      hw_state->mg_refclkin_ctl,
+		      hw_state->mg_clktop2_coreclkctl1,
+		      hw_state->mg_clktop2_hsclkctl,
+		      hw_state->mg_pll_div0,
+		      hw_state->mg_pll_div1,
+		      hw_state->mg_pll_lf,
+		      hw_state->mg_pll_frac_lock,
+		      hw_state->mg_pll_ssc,
+		      hw_state->mg_pll_bias,
+		      hw_state->mg_pll_tdc_coldst_bias);
+}
+
+static const struct intel_shared_dpll_funcs icl_pll_funcs = {
+	.enable = icl_pll_enable,
+	.disable = icl_pll_disable,
+	.get_hw_state = icl_pll_get_hw_state,
+};
+
+static const struct dpll_info icl_plls[] = {
+	{ "DPLL 0",   &icl_pll_funcs, DPLL_ID_ICL_DPLL0,  0 },
+	{ "DPLL 1",   &icl_pll_funcs, DPLL_ID_ICL_DPLL1,  0 },
+	{ "MG PLL 1", &icl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 },
+	{ "MG PLL 2", &icl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 },
+	{ "MG PLL 3", &icl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 },
+	{ "MG PLL 4", &icl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 },
+	{ },
+};
+
+static const struct intel_dpll_mgr icl_pll_mgr = {
+	.dpll_info = icl_plls,
+	.get_dpll = icl_get_dpll,
+	.dump_hw_state = icl_dump_hw_state,
+};
+
 /**
  * intel_shared_dpll_init - Initialize shared DPLLs
  * @dev: drm device
@@ -2397,7 +3060,9 @@
 	const struct dpll_info *dpll_info;
 	int i;
 
-	if (IS_CANNONLAKE(dev_priv))
+	if (IS_ICELAKE(dev_priv))
+		dpll_mgr = &icl_pll_mgr;
+	else if (IS_CANNONLAKE(dev_priv))
 		dpll_mgr = &cnl_pll_mgr;
 	else if (IS_GEN9_BC(dev_priv))
 		dpll_mgr = &skl_pll_mgr;
@@ -2415,13 +3080,9 @@
 
 	dpll_info = dpll_mgr->dpll_info;
 
-	for (i = 0; dpll_info[i].id >= 0; i++) {
+	for (i = 0; dpll_info[i].name; i++) {
 		WARN_ON(i != dpll_info[i].id);
-
-		dev_priv->shared_dplls[i].id = dpll_info[i].id;
-		dev_priv->shared_dplls[i].name = dpll_info[i].name;
-		dev_priv->shared_dplls[i].funcs = *dpll_info[i].funcs;
-		dev_priv->shared_dplls[i].flags = dpll_info[i].flags;
+		dev_priv->shared_dplls[i].info = &dpll_info[i];
 	}
 
 	dev_priv->dpll_mgr = dpll_mgr;
@@ -2481,7 +3142,7 @@
 	struct intel_shared_dpll_state *shared_dpll_state;
 
 	shared_dpll_state = intel_atomic_get_shared_dpll_state(state);
-	shared_dpll_state[dpll->id].crtc_mask &= ~(1 << crtc->pipe);
+	shared_dpll_state[dpll->info->id].crtc_mask &= ~(1 << crtc->pipe);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h
index f24ccf4..7a0cd56 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.h
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h
@@ -103,6 +103,32 @@
 	 * @DPLL_ID_SKL_DPLL3: SKL and later DPLL3
 	 */
 	DPLL_ID_SKL_DPLL3 = 3,
+
+
+	/**
+	 * @DPLL_ID_ICL_DPLL0: ICL combo PHY DPLL0
+	 */
+	DPLL_ID_ICL_DPLL0 = 0,
+	/**
+	 * @DPLL_ID_ICL_DPLL1: ICL combo PHY DPLL1
+	 */
+	DPLL_ID_ICL_DPLL1 = 1,
+	/**
+	 * @DPLL_ID_ICL_MGPLL1: ICL MG PLL 1 port 1 (C)
+	 */
+	DPLL_ID_ICL_MGPLL1 = 2,
+	/**
+	 * @DPLL_ID_ICL_MGPLL2: ICL MG PLL 1 port 2 (D)
+	 */
+	DPLL_ID_ICL_MGPLL2 = 3,
+	/**
+	 * @DPLL_ID_ICL_MGPLL3: ICL MG PLL 1 port 3 (E)
+	 */
+	DPLL_ID_ICL_MGPLL3 = 4,
+	/**
+	 * @DPLL_ID_ICL_MGPLL4: ICL MG PLL 1 port 4 (F)
+	 */
+	DPLL_ID_ICL_MGPLL4 = 5,
 };
 #define I915_NUM_PLLS 6
 
@@ -135,6 +161,21 @@
 	/* bxt */
 	uint32_t ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10,
 		 pcsdw12;
+
+	/*
+	 * ICL uses the following, already defined:
+	 * uint32_t cfgcr0, cfgcr1;
+	 */
+	uint32_t mg_refclkin_ctl;
+	uint32_t mg_clktop2_coreclkctl1;
+	uint32_t mg_clktop2_hsclkctl;
+	uint32_t mg_pll_div0;
+	uint32_t mg_pll_div1;
+	uint32_t mg_pll_lf;
+	uint32_t mg_pll_frac_lock;
+	uint32_t mg_pll_ssc;
+	uint32_t mg_pll_bias;
+	uint32_t mg_pll_tdc_coldst_bias;
 };
 
 /**
@@ -206,6 +247,37 @@
 };
 
 /**
+ * struct dpll_info - display PLL platform specific info
+ */
+struct dpll_info {
+	/**
+	 * @name: DPLL name; used for logging
+	 */
+	const char *name;
+
+	/**
+	 * @funcs: platform specific hooks
+	 */
+	const struct intel_shared_dpll_funcs *funcs;
+
+	/**
+	 * @id: unique indentifier for this DPLL; should match the index in the
+	 * dev_priv->shared_dplls array
+	 */
+	enum intel_dpll_id id;
+
+#define INTEL_DPLL_ALWAYS_ON	(1 << 0)
+	/**
+	 * @flags:
+	 *
+	 * INTEL_DPLL_ALWAYS_ON
+	 *     Inform the state checker that the DPLL is kept enabled even if
+	 *     not in use by any CRTC.
+	 */
+	uint32_t flags;
+};
+
+/**
  * struct intel_shared_dpll - display PLL with tracked state and users
  */
 struct intel_shared_dpll {
@@ -228,30 +300,9 @@
 	bool on;
 
 	/**
-	 * @name: DPLL name; used for logging
+	 * @info: platform specific info
 	 */
-	const char *name;
-
-	/**
-	 * @id: unique indentifier for this DPLL; should match the index in the
-	 * dev_priv->shared_dplls array
-	 */
-	enum intel_dpll_id id;
-
-	/**
-	 * @funcs: platform specific hooks
-	 */
-	struct intel_shared_dpll_funcs funcs;
-
-#define INTEL_DPLL_ALWAYS_ON	(1 << 0)
-	/**
-	 * @flags:
-	 *
-	 * INTEL_DPLL_ALWAYS_ON
-	 *     Inform the state checker that the DPLL is kept enabled even if
-	 *     not in use by any CRTC.
-	 */
-	uint32_t flags;
+	const struct dpll_info *info;
 };
 
 #define SKL_DPLL0 0
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a80fbad..d7dbca1 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -56,6 +56,8 @@
 	for (;;) {							\
 		const bool expired__ = ktime_after(ktime_get_raw(), end__); \
 		OP;							\
+		/* Guarantee COND check prior to timeout */		\
+		barrier();						\
 		if (COND) {						\
 			ret__ = 0;					\
 			break;						\
@@ -96,6 +98,8 @@
 		u64 now = local_clock(); \
 		if (!(ATOMIC)) \
 			preempt_enable(); \
+		/* Guarantee COND check prior to timeout */ \
+		barrier(); \
 		if (COND) { \
 			ret = 0; \
 			break; \
@@ -140,6 +144,10 @@
 #define KHz(x) (1000 * (x))
 #define MHz(x) KHz(1000 * (x))
 
+#define KBps(x) (1000 * (x))
+#define MBps(x) KBps(1000 * (x))
+#define GBps(x) ((u64)1000 * MBps((x)))
+
 /*
  * Display related stuff
  */
@@ -482,7 +490,7 @@
 	bool skip_intermediate_wm;
 
 	/* Gen9+ only */
-	struct skl_wm_values wm_results;
+	struct skl_ddb_values wm_results;
 
 	struct i915_sw_fence commit_ready;
 
@@ -548,6 +556,12 @@
 #define SKL_MAX_DST_W 4096
 #define SKL_MIN_DST_H 8
 #define SKL_MAX_DST_H 4096
+#define ICL_MAX_SRC_W 5120
+#define ICL_MAX_SRC_H 4096
+#define ICL_MAX_DST_W 5120
+#define ICL_MAX_DST_H 4096
+#define SKL_MIN_YUV_420_SRC_W 16
+#define SKL_MIN_YUV_420_SRC_H 16
 
 struct intel_scaler {
 	int in_use;
@@ -598,7 +612,9 @@
 
 struct skl_plane_wm {
 	struct skl_wm_level wm[8];
+	struct skl_wm_level uv_wm[8];
 	struct skl_wm_level trans_wm;
+	bool is_planar;
 };
 
 struct skl_pipe_wm {
@@ -874,6 +890,7 @@
 
 	/* bitmask of visible planes (enum plane_id) */
 	u8 active_planes;
+	u8 nv12_planes;
 
 	/* HDMI scrambling status */
 	bool hdmi_scrambling;
@@ -1321,10 +1338,14 @@
 void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv);
 
 /* i915_irq.c */
+bool gen11_reset_one_iir(struct drm_i915_private * const i915,
+			 const unsigned int bank,
+			 const unsigned int bit);
 void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
 void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
 void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
 void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask);
+void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv);
 void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv);
 void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv);
 void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv);
@@ -1389,6 +1410,12 @@
 u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder);
 int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder,
 				     bool enable);
+void icl_map_plls_to_ports(struct drm_crtc *crtc,
+			   struct intel_crtc_state *crtc_state,
+			   struct drm_atomic_state *old_state);
+void icl_unmap_plls_to_ports(struct drm_crtc *crtc,
+			     struct intel_crtc_state *crtc_state,
+			     struct drm_atomic_state *old_state);
 
 unsigned int intel_fb_align_height(const struct drm_framebuffer *fb,
 				   int plane, unsigned int height);
@@ -1571,8 +1598,6 @@
 void bxt_disable_dc9(struct drm_i915_private *dev_priv);
 void gen9_enable_dc5(struct drm_i915_private *dev_priv);
 unsigned int skl_cdclk_get_vco(unsigned int freq);
-void skl_enable_dc6(struct drm_i915_private *dev_priv);
-void skl_disable_dc6(struct drm_i915_private *dev_priv);
 void intel_dp_get_m_n(struct intel_crtc *crtc,
 		      struct intel_crtc_state *pipe_config);
 void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n);
@@ -1588,9 +1613,12 @@
 enum intel_display_power_domain intel_port_to_power_domain(enum port port);
 void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 				 struct intel_crtc_state *pipe_config);
+void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc,
+				  struct intel_crtc_state *crtc_state);
 
 int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
-int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state);
+int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state,
+		  uint32_t pixel_format);
 
 static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state)
 {
@@ -1607,6 +1635,7 @@
 int skl_check_plane_surface(const struct intel_crtc_state *crtc_state,
 			    struct intel_plane_state *plane_state);
 int i9xx_check_plane_surface(struct intel_plane_state *plane_state);
+int skl_format_to_fourcc(int format, bool rgb_order, bool alpha);
 
 /* intel_csr.c */
 void intel_csr_ucode_init(struct drm_i915_private *);
@@ -1773,6 +1802,7 @@
 		     unsigned int frontbuffer_bits, enum fb_op_origin origin);
 void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);
 void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *dev_priv);
+int intel_fbc_reset_underrun(struct drm_i915_private *dev_priv);
 
 /* intel_hdmi.c */
 void intel_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg,
@@ -1783,7 +1813,7 @@
 bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 			       struct intel_crtc_state *pipe_config,
 			       struct drm_connector_state *conn_state);
-void intel_hdmi_handle_sink_scrambling(struct intel_encoder *intel_encoder,
+bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
 				       struct drm_connector *connector,
 				       bool high_tmds_clock_ratio,
 				       bool scrambling);
@@ -1877,7 +1907,8 @@
 void intel_psr_disable(struct intel_dp *intel_dp,
 		      const struct intel_crtc_state *old_crtc_state);
 void intel_psr_invalidate(struct drm_i915_private *dev_priv,
-			  unsigned frontbuffer_bits);
+			  unsigned frontbuffer_bits,
+			  enum fb_op_origin origin);
 void intel_psr_flush(struct drm_i915_private *dev_priv,
 		     unsigned frontbuffer_bits,
 		     enum fb_op_origin origin);
@@ -1886,6 +1917,8 @@
 				   unsigned frontbuffer_bits);
 void intel_psr_compute_config(struct intel_dp *intel_dp,
 			      struct intel_crtc_state *crtc_state);
+void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug);
+void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir);
 
 /* intel_runtime_pm.c */
 int intel_power_domains_init(struct drm_i915_private *);
@@ -1909,6 +1942,8 @@
 					enum intel_display_power_domain domain);
 void intel_display_power_put(struct drm_i915_private *dev_priv,
 			     enum intel_display_power_domain domain);
+void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
+			    u8 req_slices);
 
 static inline void
 assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv)
@@ -2046,6 +2081,9 @@
 bool skl_plane_get_hw_state(struct intel_plane *plane);
 bool skl_plane_has_ccs(struct drm_i915_private *dev_priv,
 		       enum pipe pipe, enum plane_id plane_id);
+bool intel_format_is_yuv(uint32_t format);
+bool skl_plane_has_planar(struct drm_i915_private *dev_priv,
+			  enum pipe pipe, enum plane_id plane_id);
 
 /* intel_tv.c */
 void intel_tv_init(struct drm_i915_private *dev_priv);
@@ -2082,31 +2120,6 @@
 	return to_intel_crtc_state(crtc_state);
 }
 
-static inline struct intel_crtc_state *
-intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state,
-				     struct intel_crtc *crtc)
-{
-	struct drm_crtc_state *crtc_state;
-
-	crtc_state = drm_atomic_get_existing_crtc_state(state, &crtc->base);
-
-	if (crtc_state)
-		return to_intel_crtc_state(crtc_state);
-	else
-		return NULL;
-}
-
-static inline struct intel_plane_state *
-intel_atomic_get_existing_plane_state(struct drm_atomic_state *state,
-				      struct intel_plane *plane)
-{
-	struct drm_plane_state *plane_state;
-
-	plane_state = drm_atomic_get_existing_plane_state(state, &plane->base);
-
-	return to_intel_plane_state(plane_state);
-}
-
 int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv,
 			       struct intel_crtc *intel_crtc,
 			       struct intel_crtc_state *crtc_state);
@@ -2138,8 +2151,17 @@
 #ifdef CONFIG_DEBUG_FS
 int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name,
 			      size_t *values_cnt);
+void intel_crtc_disable_pipe_crc(struct intel_crtc *crtc);
+void intel_crtc_enable_pipe_crc(struct intel_crtc *crtc);
 #else
 #define intel_crtc_set_crc_source NULL
+static inline void intel_crtc_disable_pipe_crc(struct intel_crtc *crtc)
+{
+}
+
+static inline void intel_crtc_enable_pipe_crc(struct intel_crtc *crtc)
+{
+}
 #endif
 extern const struct file_operations i915_display_crc_ctl_fops;
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c
index 91c07b0..4d6ffa7 100644
--- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c
@@ -647,6 +647,11 @@
 	/* prepare count */
 	prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * mul);
 
+	if (prepare_cnt > PREPARE_CNT_MAX) {
+		DRM_DEBUG_KMS("prepare count too high %u\n", prepare_cnt);
+		prepare_cnt = PREPARE_CNT_MAX;
+	}
+
 	/* exit zero count */
 	exit_zero_cnt = DIV_ROUND_UP(
 				(ths_prepare_hszero - ths_prepare_ns) * ui_den,
@@ -662,32 +667,29 @@
 	if (exit_zero_cnt < (55 * ui_den / ui_num) && (55 * ui_den) % ui_num)
 		exit_zero_cnt += 1;
 
+	if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) {
+		DRM_DEBUG_KMS("exit zero count too high %u\n", exit_zero_cnt);
+		exit_zero_cnt = EXIT_ZERO_CNT_MAX;
+	}
+
 	/* clk zero count */
 	clk_zero_cnt = DIV_ROUND_UP(
 				(tclk_prepare_clkzero -	ths_prepare_ns)
 				* ui_den, ui_num * mul);
 
+	if (clk_zero_cnt > CLK_ZERO_CNT_MAX) {
+		DRM_DEBUG_KMS("clock zero count too high %u\n", clk_zero_cnt);
+		clk_zero_cnt = CLK_ZERO_CNT_MAX;
+	}
+
 	/* trail count */
 	tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail);
 	trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, ui_num * mul);
 
-	if (prepare_cnt > PREPARE_CNT_MAX ||
-		exit_zero_cnt > EXIT_ZERO_CNT_MAX ||
-		clk_zero_cnt > CLK_ZERO_CNT_MAX ||
-		trail_cnt > TRAIL_CNT_MAX)
-		DRM_DEBUG_DRIVER("Values crossing maximum limits, restricting to max values\n");
-
-	if (prepare_cnt > PREPARE_CNT_MAX)
-		prepare_cnt = PREPARE_CNT_MAX;
-
-	if (exit_zero_cnt > EXIT_ZERO_CNT_MAX)
-		exit_zero_cnt = EXIT_ZERO_CNT_MAX;
-
-	if (clk_zero_cnt > CLK_ZERO_CNT_MAX)
-		clk_zero_cnt = CLK_ZERO_CNT_MAX;
-
-	if (trail_cnt > TRAIL_CNT_MAX)
+	if (trail_cnt > TRAIL_CNT_MAX) {
+		DRM_DEBUG_KMS("trail count too high %u\n", trail_cnt);
 		trail_cnt = TRAIL_CNT_MAX;
+	}
 
 	/* B080 */
 	intel_dsi->dphy_reg = exit_zero_cnt << 24 | trail_cnt << 16 |
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index f7c2582..6bfd7e3 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -81,13 +81,17 @@
 	},
 };
 
+#define MAX_MMIO_BASES 3
 struct engine_info {
 	unsigned int hw_id;
 	unsigned int uabi_id;
 	u8 class;
 	u8 instance;
-	u32 mmio_base;
-	unsigned irq_shift;
+	/* mmio bases table *must* be sorted in reverse gen order */
+	struct engine_mmio_base {
+		u32 gen : 8;
+		u32 base : 24;
+	} mmio_bases[MAX_MMIO_BASES];
 };
 
 static const struct engine_info intel_engines[] = {
@@ -96,64 +100,76 @@
 		.uabi_id = I915_EXEC_RENDER,
 		.class = RENDER_CLASS,
 		.instance = 0,
-		.mmio_base = RENDER_RING_BASE,
-		.irq_shift = GEN8_RCS_IRQ_SHIFT,
+		.mmio_bases = {
+			{ .gen = 1, .base = RENDER_RING_BASE }
+		},
 	},
 	[BCS] = {
 		.hw_id = BCS_HW,
 		.uabi_id = I915_EXEC_BLT,
 		.class = COPY_ENGINE_CLASS,
 		.instance = 0,
-		.mmio_base = BLT_RING_BASE,
-		.irq_shift = GEN8_BCS_IRQ_SHIFT,
+		.mmio_bases = {
+			{ .gen = 6, .base = BLT_RING_BASE }
+		},
 	},
 	[VCS] = {
 		.hw_id = VCS_HW,
 		.uabi_id = I915_EXEC_BSD,
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 0,
-		.mmio_base = GEN6_BSD_RING_BASE,
-		.irq_shift = GEN8_VCS1_IRQ_SHIFT,
+		.mmio_bases = {
+			{ .gen = 11, .base = GEN11_BSD_RING_BASE },
+			{ .gen = 6, .base = GEN6_BSD_RING_BASE },
+			{ .gen = 4, .base = BSD_RING_BASE }
+		},
 	},
 	[VCS2] = {
 		.hw_id = VCS2_HW,
 		.uabi_id = I915_EXEC_BSD,
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 1,
-		.mmio_base = GEN8_BSD2_RING_BASE,
-		.irq_shift = GEN8_VCS2_IRQ_SHIFT,
+		.mmio_bases = {
+			{ .gen = 11, .base = GEN11_BSD2_RING_BASE },
+			{ .gen = 8, .base = GEN8_BSD2_RING_BASE }
+		},
 	},
 	[VCS3] = {
 		.hw_id = VCS3_HW,
 		.uabi_id = I915_EXEC_BSD,
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 2,
-		.mmio_base = GEN11_BSD3_RING_BASE,
-		.irq_shift = 0, /* not used */
+		.mmio_bases = {
+			{ .gen = 11, .base = GEN11_BSD3_RING_BASE }
+		},
 	},
 	[VCS4] = {
 		.hw_id = VCS4_HW,
 		.uabi_id = I915_EXEC_BSD,
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 3,
-		.mmio_base = GEN11_BSD4_RING_BASE,
-		.irq_shift = 0, /* not used */
+		.mmio_bases = {
+			{ .gen = 11, .base = GEN11_BSD4_RING_BASE }
+		},
 	},
 	[VECS] = {
 		.hw_id = VECS_HW,
 		.uabi_id = I915_EXEC_VEBOX,
 		.class = VIDEO_ENHANCEMENT_CLASS,
 		.instance = 0,
-		.mmio_base = VEBOX_RING_BASE,
-		.irq_shift = GEN8_VECS_IRQ_SHIFT,
+		.mmio_bases = {
+			{ .gen = 11, .base = GEN11_VEBOX_RING_BASE },
+			{ .gen = 7, .base = VEBOX_RING_BASE }
+		},
 	},
 	[VECS2] = {
 		.hw_id = VECS2_HW,
 		.uabi_id = I915_EXEC_VEBOX,
 		.class = VIDEO_ENHANCEMENT_CLASS,
 		.instance = 1,
-		.mmio_base = GEN11_VEBOX2_RING_BASE,
-		.irq_shift = 0, /* not used */
+		.mmio_bases = {
+			{ .gen = 11, .base = GEN11_VEBOX2_RING_BASE }
+		},
 	},
 };
 
@@ -223,16 +239,36 @@
 	}
 }
 
+static u32 __engine_mmio_base(struct drm_i915_private *i915,
+			      const struct engine_mmio_base *bases)
+{
+	int i;
+
+	for (i = 0; i < MAX_MMIO_BASES; i++)
+		if (INTEL_GEN(i915) >= bases[i].gen)
+			break;
+
+	GEM_BUG_ON(i == MAX_MMIO_BASES);
+	GEM_BUG_ON(!bases[i].base);
+
+	return bases[i].base;
+}
+
+static void __sprint_engine_name(char *name, const struct engine_info *info)
+{
+	WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
+			 intel_engine_classes[info->class].name,
+			 info->instance) >= INTEL_ENGINE_CS_MAX_NAME);
+}
+
 static int
 intel_engine_setup(struct drm_i915_private *dev_priv,
 		   enum intel_engine_id id)
 {
 	const struct engine_info *info = &intel_engines[id];
-	const struct engine_class_info *class_info;
 	struct intel_engine_cs *engine;
 
 	GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
-	class_info = &intel_engine_classes[info->class];
 
 	BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
 	BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
@@ -253,35 +289,14 @@
 
 	engine->id = id;
 	engine->i915 = dev_priv;
-	WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u",
-			 class_info->name, info->instance) >=
-		sizeof(engine->name));
+	__sprint_engine_name(engine->name, info);
 	engine->hw_id = engine->guc_id = info->hw_id;
-	if (INTEL_GEN(dev_priv) >= 11) {
-		switch (engine->id) {
-		case VCS:
-			engine->mmio_base = GEN11_BSD_RING_BASE;
-			break;
-		case VCS2:
-			engine->mmio_base = GEN11_BSD2_RING_BASE;
-			break;
-		case VECS:
-			engine->mmio_base = GEN11_VEBOX_RING_BASE;
-			break;
-		default:
-			/* take the original value for all other engines  */
-			engine->mmio_base = info->mmio_base;
-			break;
-		}
-	} else {
-		engine->mmio_base = info->mmio_base;
-	}
-	engine->irq_shift = info->irq_shift;
+	engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
 	engine->class = info->class;
 	engine->instance = info->instance;
 
 	engine->uabi_id = info->uabi_id;
-	engine->uabi_class = class_info->uabi_class;
+	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
 
 	engine->context_size = __intel_engine_context_size(dev_priv,
 							   engine->class);
@@ -291,7 +306,7 @@
 	/* Nothing to do here, execute in order of dependencies */
 	engine->schedule = NULL;
 
-	spin_lock_init(&engine->stats.lock);
+	seqlock_init(&engine->stats.lock);
 
 	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
 
@@ -436,21 +451,13 @@
 	GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
 }
 
-static void intel_engine_init_timeline(struct intel_engine_cs *engine)
+static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
 {
-	engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id];
+	i915_gem_batch_pool_init(&engine->batch_pool, engine);
 }
 
 static bool csb_force_mmio(struct drm_i915_private *i915)
 {
-	/*
-	 * IOMMU adds unpredictable latency causing the CSB write (from the
-	 * GPU into the HWSP) to only be visible some time after the interrupt
-	 * (missed breadcrumb syndrome).
-	 */
-	if (intel_vtd_active())
-		return true;
-
 	/* Older GVT emulation depends upon intercepting CSB mmio */
 	if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915))
 		return true;
@@ -484,12 +491,11 @@
  */
 void intel_engine_setup_common(struct intel_engine_cs *engine)
 {
+	i915_timeline_init(engine->i915, &engine->timeline, engine->name);
+
 	intel_engine_init_execlist(engine);
-
-	intel_engine_init_timeline(engine);
 	intel_engine_init_hangcheck(engine);
-	i915_gem_batch_pool_init(engine, &engine->batch_pool);
-
+	intel_engine_init_batch_pool(engine);
 	intel_engine_init_cmd_parser(engine);
 }
 
@@ -520,8 +526,6 @@
 		goto err_unref;
 
 	engine->scratch = vma;
-	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
-			 engine->name, i915_ggtt_offset(vma));
 	return 0;
 
 err_unref:
@@ -615,9 +619,6 @@
 	engine->status_page.vma = vma;
 	engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
 	engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
-
-	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
-			 engine->name, i915_ggtt_offset(vma));
 	return 0;
 
 err_unpin:
@@ -669,7 +670,7 @@
 	 * be available. To avoid this we always pin the default
 	 * context.
 	 */
-	ring = engine->context_pin(engine, engine->i915->kernel_context);
+	ring = intel_context_pin(engine->i915->kernel_context, engine);
 	if (IS_ERR(ring))
 		return PTR_ERR(ring);
 
@@ -678,8 +679,7 @@
 	 * we can interrupt the engine at any time.
 	 */
 	if (engine->i915->preempt_context) {
-		ring = engine->context_pin(engine,
-					   engine->i915->preempt_context);
+		ring = intel_context_pin(engine->i915->preempt_context, engine);
 		if (IS_ERR(ring)) {
 			ret = PTR_ERR(ring);
 			goto err_unpin_kernel;
@@ -703,9 +703,9 @@
 	intel_engine_fini_breadcrumbs(engine);
 err_unpin_preempt:
 	if (engine->i915->preempt_context)
-		engine->context_unpin(engine, engine->i915->preempt_context);
+		intel_context_unpin(engine->i915->preempt_context, engine);
 err_unpin_kernel:
-	engine->context_unpin(engine, engine->i915->kernel_context);
+	intel_context_unpin(engine->i915->kernel_context, engine);
 	return ret;
 }
 
@@ -733,8 +733,10 @@
 		i915_gem_object_put(engine->default_state);
 
 	if (engine->i915->preempt_context)
-		engine->context_unpin(engine, engine->i915->preempt_context);
-	engine->context_unpin(engine, engine->i915->kernel_context);
+		intel_context_unpin(engine->i915->preempt_context, engine);
+	intel_context_unpin(engine->i915->kernel_context, engine);
+
+	i915_timeline_fini(&engine->timeline);
 }
 
 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
@@ -782,10 +784,24 @@
 read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
 		  int subslice, i915_reg_t reg)
 {
+	uint32_t mcr_slice_subslice_mask;
+	uint32_t mcr_slice_subslice_select;
 	uint32_t mcr;
 	uint32_t ret;
 	enum forcewake_domains fw_domains;
 
+	if (INTEL_GEN(dev_priv) >= 11) {
+		mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
+					  GEN11_MCR_SUBSLICE_MASK;
+		mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) |
+					    GEN11_MCR_SUBSLICE(subslice);
+	} else {
+		mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
+					  GEN8_MCR_SUBSLICE_MASK;
+		mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) |
+					    GEN8_MCR_SUBSLICE(subslice);
+	}
+
 	fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg,
 						    FW_REG_READ);
 	fw_domains |= intel_uncore_forcewake_for_reg(dev_priv,
@@ -800,14 +816,14 @@
 	 * The HW expects the slice and sublice selectors to be reset to 0
 	 * after reading out the registers.
 	 */
-	WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK));
-	mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
-	mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+	WARN_ON_ONCE(mcr & mcr_slice_subslice_mask);
+	mcr &= ~mcr_slice_subslice_mask;
+	mcr |= mcr_slice_subslice_select;
 	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
 
 	ret = I915_READ_FW(reg);
 
-	mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
+	mcr &= ~mcr_slice_subslice_mask;
 	I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
 
 	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
@@ -871,644 +887,6 @@
 	}
 }
 
-static int wa_add(struct drm_i915_private *dev_priv,
-		  i915_reg_t addr,
-		  const u32 mask, const u32 val)
-{
-	const u32 idx = dev_priv->workarounds.count;
-
-	if (WARN_ON(idx >= I915_MAX_WA_REGS))
-		return -ENOSPC;
-
-	dev_priv->workarounds.reg[idx].addr = addr;
-	dev_priv->workarounds.reg[idx].value = val;
-	dev_priv->workarounds.reg[idx].mask = mask;
-
-	dev_priv->workarounds.count++;
-
-	return 0;
-}
-
-#define WA_REG(addr, mask, val) do { \
-		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
-		if (r) \
-			return r; \
-	} while (0)
-
-#define WA_SET_BIT_MASKED(addr, mask) \
-	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
-
-#define WA_CLR_BIT_MASKED(addr, mask) \
-	WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
-
-#define WA_SET_FIELD_MASKED(addr, mask, value) \
-	WA_REG(addr, mask, _MASKED_FIELD(mask, value))
-
-static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
-				 i915_reg_t reg)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	struct i915_workarounds *wa = &dev_priv->workarounds;
-	const uint32_t index = wa->hw_whitelist_count[engine->id];
-
-	if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
-		return -EINVAL;
-
-	I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
-		   i915_mmio_reg_offset(reg));
-	wa->hw_whitelist_count[engine->id]++;
-
-	return 0;
-}
-
-static int gen8_init_workarounds(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
-
-	/* WaDisableAsyncFlipPerfMode:bdw,chv */
-	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
-
-	/* WaDisablePartialInstShootdown:bdw,chv */
-	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
-			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
-
-	/* Use Force Non-Coherent whenever executing a 3D context. This is a
-	 * workaround for for a possible hang in the unlikely event a TLB
-	 * invalidation occurs during a PSD flush.
-	 */
-	/* WaForceEnableNonCoherent:bdw,chv */
-	/* WaHdcDisableFetchWhenMasked:bdw,chv */
-	WA_SET_BIT_MASKED(HDC_CHICKEN0,
-			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
-			  HDC_FORCE_NON_COHERENT);
-
-	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
-	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
-	 *  polygons in the same 8x4 pixel/sample area to be processed without
-	 *  stalling waiting for the earlier ones to write to Hierarchical Z
-	 *  buffer."
-	 *
-	 * This optimization is off by default for BDW and CHV; turn it on.
-	 */
-	WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
-
-	/* Wa4x4STCOptimizationDisable:bdw,chv */
-	WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
-
-	/*
-	 * BSpec recommends 8x4 when MSAA is used,
-	 * however in practice 16x4 seems fastest.
-	 *
-	 * Note that PS/WM thread counts depend on the WIZ hashing
-	 * disable bit, which we don't touch here, but it's good
-	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
-	 */
-	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
-			    GEN6_WIZ_HASHING_MASK,
-			    GEN6_WIZ_HASHING_16x4);
-
-	return 0;
-}
-
-static int bdw_init_workarounds(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	ret = gen8_init_workarounds(engine);
-	if (ret)
-		return ret;
-
-	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
-	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
-
-	/* WaDisableDopClockGating:bdw
-	 *
-	 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
-	 * to disable EUTC clock gating.
-	 */
-	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
-			  DOP_CLOCK_GATING_DISABLE);
-
-	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
-			  GEN8_SAMPLER_POWER_BYPASS_DIS);
-
-	WA_SET_BIT_MASKED(HDC_CHICKEN0,
-			  /* WaForceContextSaveRestoreNonCoherent:bdw */
-			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
-			  /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
-			  (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
-
-	return 0;
-}
-
-static int chv_init_workarounds(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	ret = gen8_init_workarounds(engine);
-	if (ret)
-		return ret;
-
-	/* WaDisableThreadStallDopClockGating:chv */
-	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
-
-	/* Improve HiZ throughput on CHV. */
-	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
-
-	return 0;
-}
-
-static int gen9_init_workarounds(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	/* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
-	I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
-
-	/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
-	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
-		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
-
-	/* WaDisableKillLogic:bxt,skl,kbl */
-	if (!IS_COFFEELAKE(dev_priv))
-		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-			   ECOCHK_DIS_TLB);
-
-	if (HAS_LLC(dev_priv)) {
-		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
-		 *
-		 * Must match Display Engine. See
-		 * WaCompressedResourceDisplayNewHashMode.
-		 */
-		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
-				  GEN9_PBE_COMPRESSED_HASH_SELECTION);
-		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
-				  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
-
-		I915_WRITE(MMCD_MISC_CTRL,
-			   I915_READ(MMCD_MISC_CTRL) |
-			   MMCD_PCLA |
-			   MMCD_HOTSPOT_EN);
-	}
-
-	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
-	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
-	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
-			  FLOW_CONTROL_ENABLE |
-			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
-
-	/* Syncing dependencies between camera and graphics:skl,bxt,kbl */
-	if (!IS_COFFEELAKE(dev_priv))
-		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
-				  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
-
-	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
-	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
-	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
-			  GEN9_ENABLE_YV12_BUGFIX |
-			  GEN9_ENABLE_GPGPU_PREEMPTION);
-
-	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
-	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
-	WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
-					 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
-
-	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
-	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
-			  GEN9_CCS_TLB_PREFETCH_ENABLE);
-
-	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
-	WA_SET_BIT_MASKED(HDC_CHICKEN0,
-			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
-			  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
-
-	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
-	 * both tied to WaForceContextSaveRestoreNonCoherent
-	 * in some hsds for skl. We keep the tie for all gen9. The
-	 * documentation is a bit hazy and so we want to get common behaviour,
-	 * even though there is no clear evidence we would need both on kbl/bxt.
-	 * This area has been source of system hangs so we play it safe
-	 * and mimic the skl regardless of what bspec says.
-	 *
-	 * Use Force Non-Coherent whenever executing a 3D context. This
-	 * is a workaround for a possible hang in the unlikely event
-	 * a TLB invalidation occurs during a PSD flush.
-	 */
-
-	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
-	WA_SET_BIT_MASKED(HDC_CHICKEN0,
-			  HDC_FORCE_NON_COHERENT);
-
-	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
-	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-		   BDW_DISABLE_HDC_INVALIDATION);
-
-	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
-	if (IS_SKYLAKE(dev_priv) ||
-	    IS_KABYLAKE(dev_priv) ||
-	    IS_COFFEELAKE(dev_priv))
-		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
-				  GEN8_SAMPLER_POWER_BYPASS_DIS);
-
-	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
-	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
-
-	/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
-	if (IS_GEN9_LP(dev_priv)) {
-		u32 val = I915_READ(GEN8_L3SQCREG1);
-
-		val &= ~L3_PRIO_CREDITS_MASK;
-		val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
-		I915_WRITE(GEN8_L3SQCREG1, val);
-	}
-
-	/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
-	I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
-				    GEN8_LQSC_FLUSH_COHERENT_LINES));
-
-	/*
-	 * Supporting preemption with fine-granularity requires changes in the
-	 * batch buffer programming. Since we can't break old userspace, we
-	 * need to set our default preemption level to safe value. Userspace is
-	 * still able to use more fine-grained preemption levels, since in
-	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
-	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
-	 * not real HW workarounds, but merely a way to start using preemption
-	 * while maintaining old contract with userspace.
-	 */
-
-	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
-	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
-
-	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
-	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
-			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
-
-	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
-	if (IS_GEN9_LP(dev_priv))
-		WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
-
-	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
-	ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
-	if (ret)
-		return ret;
-
-	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
-	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
-		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
-	ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
-	if (ret)
-		return ret;
-
-	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
-	ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static int skl_tune_iz_hashing(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	u8 vals[3] = { 0, 0, 0 };
-	unsigned int i;
-
-	for (i = 0; i < 3; i++) {
-		u8 ss;
-
-		/*
-		 * Only consider slices where one, and only one, subslice has 7
-		 * EUs
-		 */
-		if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
-			continue;
-
-		/*
-		 * subslice_7eu[i] != 0 (because of the check above) and
-		 * ss_max == 4 (maximum number of subslices possible per slice)
-		 *
-		 * ->    0 <= ss <= 3;
-		 */
-		ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
-		vals[i] = 3 - ss;
-	}
-
-	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
-		return 0;
-
-	/* Tune IZ hashing. See intel_device_info_runtime_init() */
-	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
-			    GEN9_IZ_HASHING_MASK(2) |
-			    GEN9_IZ_HASHING_MASK(1) |
-			    GEN9_IZ_HASHING_MASK(0),
-			    GEN9_IZ_HASHING(2, vals[2]) |
-			    GEN9_IZ_HASHING(1, vals[1]) |
-			    GEN9_IZ_HASHING(0, vals[0]));
-
-	return 0;
-}
-
-static int skl_init_workarounds(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	ret = gen9_init_workarounds(engine);
-	if (ret)
-		return ret;
-
-	/* WaEnableGapsTsvCreditFix:skl */
-	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
-				   GEN9_GAPS_TSV_CREDIT_DISABLE));
-
-	/* WaDisableGafsUnitClkGating:skl */
-	I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
-				  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
-
-	/* WaInPlaceDecompressionHang:skl */
-	if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
-		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
-
-	/* WaDisableLSQCROPERFforOCL:skl */
-	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
-	if (ret)
-		return ret;
-
-	return skl_tune_iz_hashing(engine);
-}
-
-static int bxt_init_workarounds(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	ret = gen9_init_workarounds(engine);
-	if (ret)
-		return ret;
-
-	/* WaDisableThreadStallDopClockGating:bxt */
-	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
-			  STALL_DOP_GATING_DISABLE);
-
-	/* WaDisablePooledEuLoadBalancingFix:bxt */
-	I915_WRITE(FF_SLICE_CS_CHICKEN2,
-		   _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
-
-	/* WaToEnableHwFixForPushConstHWBug:bxt */
-	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
-			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
-	/* WaInPlaceDecompressionHang:bxt */
-	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
-
-	return 0;
-}
-
-static int cnl_init_workarounds(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
-	if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
-		I915_WRITE(GAMT_CHKN_BIT_REG,
-			   (I915_READ(GAMT_CHKN_BIT_REG) |
-			    GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT));
-
-	/* WaForceContextSaveRestoreNonCoherent:cnl */
-	WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
-			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
-
-	/* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
-	if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
-		WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
-
-	/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
-	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
-			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
-	/* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
-	if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0))
-		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
-				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
-
-	/* WaInPlaceDecompressionHang:cnl */
-	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
-
-	/* WaPushConstantDereferenceHoldDisable:cnl */
-	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
-
-	/* FtrEnableFastAnisoL1BankingFix: cnl */
-	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
-
-	/* WaDisable3DMidCmdPreemption:cnl */
-	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
-
-	/* WaDisableGPGPUMidCmdPreemption:cnl */
-	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
-			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
-
-	/* WaEnablePreemptionGranularityControlByUMD:cnl */
-	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
-		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
-	ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
-	if (ret)
-		return ret;
-
-	/* WaDisableEarlyEOT:cnl */
-	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
-
-	return 0;
-}
-
-static int kbl_init_workarounds(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	ret = gen9_init_workarounds(engine);
-	if (ret)
-		return ret;
-
-	/* WaEnableGapsTsvCreditFix:kbl */
-	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
-				   GEN9_GAPS_TSV_CREDIT_DISABLE));
-
-	/* WaDisableDynamicCreditSharing:kbl */
-	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
-		I915_WRITE(GAMT_CHKN_BIT_REG,
-			   (I915_READ(GAMT_CHKN_BIT_REG) |
-			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING));
-
-	/* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
-	if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
-		WA_SET_BIT_MASKED(HDC_CHICKEN0,
-				  HDC_FENCE_DEST_SLM_DISABLE);
-
-	/* WaToEnableHwFixForPushConstHWBug:kbl */
-	if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
-		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
-				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
-	/* WaDisableGafsUnitClkGating:kbl */
-	I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
-				  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
-
-	/* WaDisableSbeCacheDispatchPortSharing:kbl */
-	WA_SET_BIT_MASKED(
-		GEN7_HALF_SLICE_CHICKEN1,
-		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
-
-	/* WaInPlaceDecompressionHang:kbl */
-	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
-
-	/* WaDisableLSQCROPERFforOCL:kbl */
-	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static int glk_init_workarounds(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	ret = gen9_init_workarounds(engine);
-	if (ret)
-		return ret;
-
-	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
-	ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
-	if (ret)
-		return ret;
-
-	/* WaToEnableHwFixForPushConstHWBug:glk */
-	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
-			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
-	return 0;
-}
-
-static int cfl_init_workarounds(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int ret;
-
-	ret = gen9_init_workarounds(engine);
-	if (ret)
-		return ret;
-
-	/* WaEnableGapsTsvCreditFix:cfl */
-	I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
-				   GEN9_GAPS_TSV_CREDIT_DISABLE));
-
-	/* WaToEnableHwFixForPushConstHWBug:cfl */
-	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
-			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
-
-	/* WaDisableGafsUnitClkGating:cfl */
-	I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
-				  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
-
-	/* WaDisableSbeCacheDispatchPortSharing:cfl */
-	WA_SET_BIT_MASKED(
-		GEN7_HALF_SLICE_CHICKEN1,
-		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
-
-	/* WaInPlaceDecompressionHang:cfl */
-	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
-		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
-		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
-
-	return 0;
-}
-
-int init_workarounds_ring(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int err;
-
-	if (GEM_WARN_ON(engine->id != RCS))
-		return -EINVAL;
-
-	dev_priv->workarounds.count = 0;
-	dev_priv->workarounds.hw_whitelist_count[engine->id] = 0;
-
-	if (IS_BROADWELL(dev_priv))
-		err = bdw_init_workarounds(engine);
-	else if (IS_CHERRYVIEW(dev_priv))
-		err = chv_init_workarounds(engine);
-	else if (IS_SKYLAKE(dev_priv))
-		err =  skl_init_workarounds(engine);
-	else if (IS_BROXTON(dev_priv))
-		err = bxt_init_workarounds(engine);
-	else if (IS_KABYLAKE(dev_priv))
-		err = kbl_init_workarounds(engine);
-	else if (IS_GEMINILAKE(dev_priv))
-		err =  glk_init_workarounds(engine);
-	else if (IS_COFFEELAKE(dev_priv))
-		err = cfl_init_workarounds(engine);
-	else if (IS_CANNONLAKE(dev_priv))
-		err = cnl_init_workarounds(engine);
-	else
-		err = 0;
-	if (err)
-		return err;
-
-	DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n",
-			 engine->name, dev_priv->workarounds.count);
-	return 0;
-}
-
-int intel_ring_workarounds_emit(struct i915_request *rq)
-{
-	struct i915_workarounds *w = &rq->i915->workarounds;
-	u32 *cs;
-	int ret, i;
-
-	if (w->count == 0)
-		return 0;
-
-	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
-	if (ret)
-		return ret;
-
-	cs = intel_ring_begin(rq, w->count * 2 + 2);
-	if (IS_ERR(cs))
-		return PTR_ERR(cs);
-
-	*cs++ = MI_LOAD_REGISTER_IMM(w->count);
-	for (i = 0; i < w->count; i++) {
-		*cs++ = i915_mmio_reg_offset(w->reg[i].addr);
-		*cs++ = w->reg[i].value;
-	}
-	*cs++ = MI_NOOP;
-
-	intel_ring_advance(rq, cs);
-
-	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 static bool ring_is_idle(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -1611,7 +989,7 @@
 	 * the last request that remains in the timeline. When idle, it is
 	 * the last executed context as tracked by retirement.
 	 */
-	rq = __i915_gem_active_peek(&engine->timeline->last_request);
+	rq = __i915_gem_active_peek(&engine->timeline.last_request);
 	if (rq)
 		return rq->ctx == kernel_context;
 	else
@@ -1659,6 +1037,9 @@
 			intel_engine_dump(engine, &p, NULL);
 		}
 
+		/* Must be reset upon idling, or we may miss the busy wakeup. */
+		GEM_BUG_ON(engine->execlists.queue_priority != INT_MIN);
+
 		if (engine->park)
 			engine->park(engine);
 
@@ -1681,6 +1062,8 @@
 	for_each_engine(engine, i915, id) {
 		if (engine->unpark)
 			engine->unpark(engine);
+
+		intel_engine_init_hangcheck(engine);
 	}
 }
 
@@ -1713,17 +1096,37 @@
 	return which;
 }
 
+static int print_sched_attr(struct drm_i915_private *i915,
+			    const struct i915_sched_attr *attr,
+			    char *buf, int x, int len)
+{
+	if (attr->priority == I915_PRIORITY_INVALID)
+		return x;
+
+	x += snprintf(buf + x, len - x,
+		      " prio=%d", attr->priority);
+
+	return x;
+}
+
 static void print_request(struct drm_printer *m,
 			  struct i915_request *rq,
 			  const char *prefix)
 {
-	drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix,
+	const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
+	char buf[80];
+	int x = 0;
+
+	x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
+
+	drm_printf(m, "%s%x%s [%llx:%x]%s @ %dms: %s\n",
+		   prefix,
 		   rq->global_seqno,
 		   i915_request_completed(rq) ? "!" : "",
 		   rq->fence.context, rq->fence.seqno,
-		   rq->priotree.priority,
+		   buf,
 		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
-		   rq->timeline->common->name);
+		   name);
 }
 
 static void hexdump(struct drm_printer *m, const void *buf, size_t len)
@@ -1829,12 +1232,15 @@
 		ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
 		read = GEN8_CSB_READ_PTR(ptr);
 		write = GEN8_CSB_WRITE_PTR(ptr);
-		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n",
+		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
 			   read, execlists->csb_head,
 			   write,
 			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
 			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
-					  &engine->irq_posted)));
+					  &engine->irq_posted)),
+			   yesno(test_bit(TASKLET_STATE_SCHED,
+					  &engine->execlists.tasklet.state)),
+			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
 		if (read >= GEN8_CSB_ENTRIES)
 			read = 0;
 		if (write >= GEN8_CSB_ENTRIES)
@@ -1861,8 +1267,9 @@
 				char hdr[80];
 
 				snprintf(hdr, sizeof(hdr),
-					 "\t\tELSP[%d] count=%d, rq: ",
-					 idx, count);
+					 "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ",
+					 idx, count,
+					 i915_ggtt_offset(rq->ring->vma));
 				print_request(m, rq, hdr);
 			} else {
 				drm_printf(m, "\t\tELSP[%d] idle\n", idx);
@@ -1884,11 +1291,13 @@
 		       struct drm_printer *m,
 		       const char *header, ...)
 {
+	const int MAX_REQUESTS_TO_SHOW = 8;
 	struct intel_breadcrumbs * const b = &engine->breadcrumbs;
 	const struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_gpu_error * const error = &engine->i915->gpu_error;
-	struct i915_request *rq;
+	struct i915_request *rq, *last;
 	struct rb_node *rb;
+	int count;
 
 	if (header) {
 		va_list ap;
@@ -1901,12 +1310,11 @@
 	if (i915_terminally_wedged(&engine->i915->gpu_error))
 		drm_printf(m, "*** WEDGED ***\n");
 
-	drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n",
+	drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n",
 		   intel_engine_get_seqno(engine),
 		   intel_engine_last_submit(engine),
 		   engine->hangcheck.seqno,
-		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp),
-		   engine->timeline->inflight_seqnos);
+		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
 	drm_printf(m, "\tReset count: %d (global %d)\n",
 		   i915_reset_engine_count(error, engine),
 		   i915_reset_count(error));
@@ -1915,14 +1323,14 @@
 
 	drm_printf(m, "\tRequests:\n");
 
-	rq = list_first_entry(&engine->timeline->requests,
+	rq = list_first_entry(&engine->timeline.requests,
 			      struct i915_request, link);
-	if (&rq->link != &engine->timeline->requests)
+	if (&rq->link != &engine->timeline.requests)
 		print_request(m, rq, "\t\tfirst  ");
 
-	rq = list_last_entry(&engine->timeline->requests,
+	rq = list_last_entry(&engine->timeline.requests,
 			     struct i915_request, link);
-	if (&rq->link != &engine->timeline->requests)
+	if (&rq->link != &engine->timeline.requests)
 		print_request(m, rq, "\t\tlast   ");
 
 	rq = i915_gem_find_active_request(engine);
@@ -1933,12 +1341,16 @@
 			   rq->head, rq->postfix, rq->tail,
 			   rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
 			   rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
-		drm_printf(m, "\t\tring->start: 0x%08x\n",
+		drm_printf(m, "\t\tring->start:  0x%08x\n",
 			   i915_ggtt_offset(rq->ring->vma));
-		drm_printf(m, "\t\tring->head:  0x%08x\n",
+		drm_printf(m, "\t\tring->head:   0x%08x\n",
 			   rq->ring->head);
-		drm_printf(m, "\t\tring->tail:  0x%08x\n",
+		drm_printf(m, "\t\tring->tail:   0x%08x\n",
 			   rq->ring->tail);
+		drm_printf(m, "\t\tring->emit:   0x%08x\n",
+			   rq->ring->emit);
+		drm_printf(m, "\t\tring->space:  0x%08x\n",
+			   rq->ring->space);
 	}
 
 	rcu_read_unlock();
@@ -1950,18 +1362,49 @@
 		drm_printf(m, "\tDevice is asleep; skipping register dump\n");
 	}
 
-	spin_lock_irq(&engine->timeline->lock);
-	list_for_each_entry(rq, &engine->timeline->requests, link)
-		print_request(m, rq, "\t\tE ");
+	spin_lock_irq(&engine->timeline.lock);
+
+	last = NULL;
+	count = 0;
+	list_for_each_entry(rq, &engine->timeline.requests, link) {
+		if (count++ < MAX_REQUESTS_TO_SHOW - 1)
+			print_request(m, rq, "\t\tE ");
+		else
+			last = rq;
+	}
+	if (last) {
+		if (count > MAX_REQUESTS_TO_SHOW) {
+			drm_printf(m,
+				   "\t\t...skipping %d executing requests...\n",
+				   count - MAX_REQUESTS_TO_SHOW);
+		}
+		print_request(m, last, "\t\tE ");
+	}
+
+	last = NULL;
+	count = 0;
 	drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
 	for (rb = execlists->first; rb; rb = rb_next(rb)) {
 		struct i915_priolist *p =
 			rb_entry(rb, typeof(*p), node);
 
-		list_for_each_entry(rq, &p->requests, priotree.link)
-			print_request(m, rq, "\t\tQ ");
+		list_for_each_entry(rq, &p->requests, sched.link) {
+			if (count++ < MAX_REQUESTS_TO_SHOW - 1)
+				print_request(m, rq, "\t\tQ ");
+			else
+				last = rq;
+		}
 	}
-	spin_unlock_irq(&engine->timeline->lock);
+	if (last) {
+		if (count > MAX_REQUESTS_TO_SHOW) {
+			drm_printf(m,
+				   "\t\t...skipping %d queued requests...\n",
+				   count - MAX_REQUESTS_TO_SHOW);
+		}
+		print_request(m, last, "\t\tQ ");
+	}
+
+	spin_unlock_irq(&engine->timeline.lock);
 
 	spin_lock_irq(&b->rb_lock);
 	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
@@ -2026,7 +1469,7 @@
 		return -ENODEV;
 
 	tasklet_disable(&execlists->tasklet);
-	spin_lock_irqsave(&engine->stats.lock, flags);
+	write_seqlock_irqsave(&engine->stats.lock, flags);
 
 	if (unlikely(engine->stats.enabled == ~0)) {
 		err = -EBUSY;
@@ -2050,7 +1493,7 @@
 	}
 
 unlock:
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 	tasklet_enable(&execlists->tasklet);
 
 	return err;
@@ -2079,12 +1522,13 @@
  */
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine)
 {
+	unsigned int seq;
 	ktime_t total;
-	unsigned long flags;
 
-	spin_lock_irqsave(&engine->stats.lock, flags);
-	total = __intel_engine_get_busy_time(engine);
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	do {
+		seq = read_seqbegin(&engine->stats.lock);
+		total = __intel_engine_get_busy_time(engine);
+	} while (read_seqretry(&engine->stats.lock, seq));
 
 	return total;
 }
@@ -2102,15 +1546,16 @@
 	if (!intel_engine_supports_stats(engine))
 		return;
 
-	spin_lock_irqsave(&engine->stats.lock, flags);
+	write_seqlock_irqsave(&engine->stats.lock, flags);
 	WARN_ON_ONCE(engine->stats.enabled == 0);
 	if (--engine->stats.enabled == 0) {
 		engine->stats.total = __intel_engine_get_busy_time(engine);
 		engine->stats.active = 0;
 	}
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/mock_engine.c"
+#include "selftests/intel_engine_cs.c"
 #endif
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 707d49c..b431b67 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -1272,6 +1272,34 @@
 	mutex_unlock(&fbc->lock);
 }
 
+/*
+ * intel_fbc_reset_underrun - reset FBC fifo underrun status.
+ * @dev_priv: i915 device instance
+ *
+ * See intel_fbc_handle_fifo_underrun_irq(). For automated testing we
+ * want to re-enable FBC after an underrun to increase test coverage.
+ */
+int intel_fbc_reset_underrun(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	cancel_work_sync(&dev_priv->fbc.underrun_work);
+
+	ret = mutex_lock_interruptible(&dev_priv->fbc.lock);
+	if (ret)
+		return ret;
+
+	if (dev_priv->fbc.underrun_detected) {
+		DRM_DEBUG_KMS("Re-allowing FBC after fifo underrun\n");
+		dev_priv->fbc.no_fbc_reason = "FIFO underrun cleared";
+	}
+
+	dev_priv->fbc.underrun_detected = false;
+	mutex_unlock(&dev_priv->fbc.lock);
+
+	return 0;
+}
+
 /**
  * intel_fbc_handle_fifo_underrun_irq - disable FBC when we get a FIFO underrun
  * @dev_priv: i915 device instance
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 6467a5c..e9e02b5 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -221,6 +221,9 @@
 		goto out_unlock;
 	}
 
+	fb = &ifbdev->fb->base;
+	intel_fb_obj_flush(intel_fb_obj(fb), ORIGIN_DIRTYFB);
+
 	info = drm_fb_helper_alloc_fbi(helper);
 	if (IS_ERR(info)) {
 		DRM_ERROR("Failed to allocate fb_info\n");
@@ -230,8 +233,6 @@
 
 	info->par = helper;
 
-	fb = &ifbdev->fb->base;
-
 	ifbdev->helper.fb = fb;
 
 	strcpy(info->fix.id, "inteldrmfb");
@@ -640,7 +641,7 @@
 		if (!crtc->state->active)
 			continue;
 
-		WARN(!crtc->primary->fb,
+		WARN(!crtc->primary->state->fb,
 		     "re-used BIOS config but lost an fb on crtc %d\n",
 		     crtc->base.id);
 	}
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index 3a8d3d0..7fff0a0 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -80,7 +80,7 @@
 	}
 
 	might_sleep();
-	intel_psr_invalidate(dev_priv, frontbuffer_bits);
+	intel_psr_invalidate(dev_priv, frontbuffer_bits, origin);
 	intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits);
 	intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin);
 }
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h
new file mode 100644
index 0000000..105e2a9
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -0,0 +1,274 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright � 2003-2018 Intel Corporation
+ */
+
+#ifndef _INTEL_GPU_COMMANDS_H_
+#define _INTEL_GPU_COMMANDS_H_
+
+/*
+ * Instruction field definitions used by the command parser
+ */
+#define INSTR_CLIENT_SHIFT      29
+#define   INSTR_MI_CLIENT       0x0
+#define   INSTR_BC_CLIENT       0x2
+#define   INSTR_RC_CLIENT       0x3
+#define INSTR_SUBCLIENT_SHIFT   27
+#define INSTR_SUBCLIENT_MASK    0x18000000
+#define   INSTR_MEDIA_SUBCLIENT 0x2
+#define INSTR_26_TO_24_MASK	0x7000000
+#define   INSTR_26_TO_24_SHIFT	24
+
+/*
+ * Memory interface instructions used by the kernel
+ */
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */
+#define  MI_GLOBAL_GTT    (1<<22)
+
+#define MI_NOOP			MI_INSTR(0, 0)
+#define MI_USER_INTERRUPT	MI_INSTR(0x02, 0)
+#define MI_WAIT_FOR_EVENT       MI_INSTR(0x03, 0)
+#define   MI_WAIT_FOR_OVERLAY_FLIP	(1<<16)
+#define   MI_WAIT_FOR_PLANE_B_FLIP      (1<<6)
+#define   MI_WAIT_FOR_PLANE_A_FLIP      (1<<2)
+#define   MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1)
+#define MI_FLUSH		MI_INSTR(0x04, 0)
+#define   MI_READ_FLUSH		(1 << 0)
+#define   MI_EXE_FLUSH		(1 << 1)
+#define   MI_NO_WRITE_FLUSH	(1 << 2)
+#define   MI_SCENE_COUNT	(1 << 3) /* just increment scene count */
+#define   MI_END_SCENE		(1 << 4) /* flush binner and incr scene count */
+#define   MI_INVALIDATE_ISP	(1 << 5) /* invalidate indirect state pointers */
+#define MI_REPORT_HEAD		MI_INSTR(0x07, 0)
+#define MI_ARB_ON_OFF		MI_INSTR(0x08, 0)
+#define   MI_ARB_ENABLE			(1<<0)
+#define   MI_ARB_DISABLE		(0<<0)
+#define MI_BATCH_BUFFER_END	MI_INSTR(0x0a, 0)
+#define MI_SUSPEND_FLUSH	MI_INSTR(0x0b, 0)
+#define   MI_SUSPEND_FLUSH_EN	(1<<0)
+#define MI_SET_APPID		MI_INSTR(0x0e, 0)
+#define MI_OVERLAY_FLIP		MI_INSTR(0x11, 0)
+#define   MI_OVERLAY_CONTINUE	(0x0<<21)
+#define   MI_OVERLAY_ON		(0x1<<21)
+#define   MI_OVERLAY_OFF	(0x2<<21)
+#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0)
+#define MI_DISPLAY_FLIP		MI_INSTR(0x14, 2)
+#define MI_DISPLAY_FLIP_I915	MI_INSTR(0x14, 1)
+#define   MI_DISPLAY_FLIP_PLANE(n) ((n) << 20)
+/* IVB has funny definitions for which plane to flip. */
+#define   MI_DISPLAY_FLIP_IVB_PLANE_A  (0 << 19)
+#define   MI_DISPLAY_FLIP_IVB_PLANE_B  (1 << 19)
+#define   MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19)
+#define   MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19)
+#define   MI_DISPLAY_FLIP_IVB_PLANE_C  (4 << 19)
+#define   MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19)
+/* SKL ones */
+#define   MI_DISPLAY_FLIP_SKL_PLANE_1_A	(0 << 8)
+#define   MI_DISPLAY_FLIP_SKL_PLANE_1_B	(1 << 8)
+#define   MI_DISPLAY_FLIP_SKL_PLANE_1_C	(2 << 8)
+#define   MI_DISPLAY_FLIP_SKL_PLANE_2_A	(4 << 8)
+#define   MI_DISPLAY_FLIP_SKL_PLANE_2_B	(5 << 8)
+#define   MI_DISPLAY_FLIP_SKL_PLANE_2_C	(6 << 8)
+#define   MI_DISPLAY_FLIP_SKL_PLANE_3_A	(7 << 8)
+#define   MI_DISPLAY_FLIP_SKL_PLANE_3_B	(8 << 8)
+#define   MI_DISPLAY_FLIP_SKL_PLANE_3_C	(9 << 8)
+#define MI_SEMAPHORE_MBOX	MI_INSTR(0x16, 1) /* gen6, gen7 */
+#define   MI_SEMAPHORE_GLOBAL_GTT    (1<<22)
+#define   MI_SEMAPHORE_UPDATE	    (1<<21)
+#define   MI_SEMAPHORE_COMPARE	    (1<<20)
+#define   MI_SEMAPHORE_REGISTER	    (1<<18)
+#define   MI_SEMAPHORE_SYNC_VR	    (0<<16) /* RCS  wait for VCS  (RVSYNC) */
+#define   MI_SEMAPHORE_SYNC_VER	    (1<<16) /* RCS  wait for VECS (RVESYNC) */
+#define   MI_SEMAPHORE_SYNC_BR	    (2<<16) /* RCS  wait for BCS  (RBSYNC) */
+#define   MI_SEMAPHORE_SYNC_BV	    (0<<16) /* VCS  wait for BCS  (VBSYNC) */
+#define   MI_SEMAPHORE_SYNC_VEV	    (1<<16) /* VCS  wait for VECS (VVESYNC) */
+#define   MI_SEMAPHORE_SYNC_RV	    (2<<16) /* VCS  wait for RCS  (VRSYNC) */
+#define   MI_SEMAPHORE_SYNC_RB	    (0<<16) /* BCS  wait for RCS  (BRSYNC) */
+#define   MI_SEMAPHORE_SYNC_VEB	    (1<<16) /* BCS  wait for VECS (BVESYNC) */
+#define   MI_SEMAPHORE_SYNC_VB	    (2<<16) /* BCS  wait for VCS  (BVSYNC) */
+#define   MI_SEMAPHORE_SYNC_BVE	    (0<<16) /* VECS wait for BCS  (VEBSYNC) */
+#define   MI_SEMAPHORE_SYNC_VVE	    (1<<16) /* VECS wait for VCS  (VEVSYNC) */
+#define   MI_SEMAPHORE_SYNC_RVE	    (2<<16) /* VECS wait for RCS  (VERSYNC) */
+#define   MI_SEMAPHORE_SYNC_INVALID (3<<16)
+#define   MI_SEMAPHORE_SYNC_MASK    (3<<16)
+#define MI_SET_CONTEXT		MI_INSTR(0x18, 0)
+#define   MI_MM_SPACE_GTT		(1<<8)
+#define   MI_MM_SPACE_PHYSICAL		(0<<8)
+#define   MI_SAVE_EXT_STATE_EN		(1<<3)
+#define   MI_RESTORE_EXT_STATE_EN	(1<<2)
+#define   MI_FORCE_RESTORE		(1<<1)
+#define   MI_RESTORE_INHIBIT		(1<<0)
+#define   HSW_MI_RS_SAVE_STATE_EN       (1<<3)
+#define   HSW_MI_RS_RESTORE_STATE_EN    (1<<2)
+#define MI_SEMAPHORE_SIGNAL	MI_INSTR(0x1b, 0) /* GEN8+ */
+#define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
+#define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
+#define   MI_SEMAPHORE_POLL		(1<<15)
+#define   MI_SEMAPHORE_SAD_GTE_SDD	(1<<12)
+#define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
+#define MI_STORE_DWORD_IMM_GEN4	MI_INSTR(0x20, 2)
+#define   MI_MEM_VIRTUAL	(1 << 22) /* 945,g33,965 */
+#define   MI_USE_GGTT		(1 << 22) /* g4x+ */
+#define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
+#define   MI_STORE_DWORD_INDEX_SHIFT 2
+/*
+ * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
+ * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
+ *   simply ignores the register load under certain conditions.
+ * - One can actually load arbitrary many arbitrary registers: Simply issue x
+ *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
+ */
+#define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
+#define   MI_LRI_FORCE_POSTED		(1<<12)
+#define MI_STORE_REGISTER_MEM        MI_INSTR(0x24, 1)
+#define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
+#define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
+#define MI_FLUSH_DW		MI_INSTR(0x26, 1) /* for GEN6 */
+#define   MI_FLUSH_DW_STORE_INDEX	(1<<21)
+#define   MI_INVALIDATE_TLB		(1<<18)
+#define   MI_FLUSH_DW_OP_STOREDW	(1<<14)
+#define   MI_FLUSH_DW_OP_MASK		(3<<14)
+#define   MI_FLUSH_DW_NOTIFY		(1<<8)
+#define   MI_INVALIDATE_BSD		(1<<7)
+#define   MI_FLUSH_DW_USE_GTT		(1<<2)
+#define   MI_FLUSH_DW_USE_PPGTT		(0<<2)
+#define MI_LOAD_REGISTER_MEM	   MI_INSTR(0x29, 1)
+#define MI_LOAD_REGISTER_MEM_GEN8  MI_INSTR(0x29, 2)
+#define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
+#define   MI_BATCH_NON_SECURE		(1)
+/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
+#define   MI_BATCH_NON_SECURE_I965	(1<<8)
+#define   MI_BATCH_PPGTT_HSW		(1<<8)
+#define   MI_BATCH_NON_SECURE_HSW	(1<<13)
+#define MI_BATCH_BUFFER_START	MI_INSTR(0x31, 0)
+#define   MI_BATCH_GTT		    (2<<6) /* aliased with (1<<7) on gen4 */
+#define MI_BATCH_BUFFER_START_GEN8	MI_INSTR(0x31, 1)
+#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
+
+/*
+ * 3D instructions used by the kernel
+ */
+#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags))
+
+#define GEN9_MEDIA_POOL_STATE     ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4)
+#define   GEN9_MEDIA_POOL_ENABLE  (1 << 31)
+#define GFX_OP_RASTER_RULES    ((0x3<<29)|(0x7<<24))
+#define GFX_OP_SCISSOR         ((0x3<<29)|(0x1c<<24)|(0x10<<19))
+#define   SC_UPDATE_SCISSOR       (0x1<<1)
+#define   SC_ENABLE_MASK          (0x1<<0)
+#define   SC_ENABLE               (0x1<<0)
+#define GFX_OP_LOAD_INDIRECT   ((0x3<<29)|(0x1d<<24)|(0x7<<16))
+#define GFX_OP_SCISSOR_INFO    ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1))
+#define   SCI_YMIN_MASK      (0xffff<<16)
+#define   SCI_XMIN_MASK      (0xffff<<0)
+#define   SCI_YMAX_MASK      (0xffff<<16)
+#define   SCI_XMAX_MASK      (0xffff<<0)
+#define GFX_OP_SCISSOR_ENABLE	 ((0x3<<29)|(0x1c<<24)|(0x10<<19))
+#define GFX_OP_SCISSOR_RECT	 ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1)
+#define GFX_OP_COLOR_FACTOR      ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0)
+#define GFX_OP_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define GFX_OP_MAP_INFO          ((0x3<<29)|(0x1d<<24)|0x4)
+#define GFX_OP_DESTBUFFER_VARS   ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0)
+#define GFX_OP_DESTBUFFER_INFO	 ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1)
+#define GFX_OP_DRAWRECT_INFO     ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
+#define GFX_OP_DRAWRECT_INFO_I965  ((0x7900<<16)|0x2)
+
+#define COLOR_BLT_CMD			(2<<29 | 0x40<<22 | (5-2))
+#define SRC_COPY_BLT_CMD		((2<<29)|(0x43<<22)|4)
+#define XY_SRC_COPY_BLT_CMD		((2<<29)|(0x53<<22)|6)
+#define XY_MONO_SRC_COPY_IMM_BLT	((2<<29)|(0x71<<22)|5)
+#define   BLT_WRITE_A			(2<<20)
+#define   BLT_WRITE_RGB			(1<<20)
+#define   BLT_WRITE_RGBA		(BLT_WRITE_RGB | BLT_WRITE_A)
+#define   BLT_DEPTH_8			(0<<24)
+#define   BLT_DEPTH_16_565		(1<<24)
+#define   BLT_DEPTH_16_1555		(2<<24)
+#define   BLT_DEPTH_32			(3<<24)
+#define   BLT_ROP_SRC_COPY		(0xcc<<16)
+#define   BLT_ROP_COLOR_COPY		(0xf0<<16)
+#define XY_SRC_COPY_BLT_SRC_TILED	(1<<15) /* 965+ only */
+#define XY_SRC_COPY_BLT_DST_TILED	(1<<11) /* 965+ only */
+#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2)
+#define   ASYNC_FLIP                (1<<22)
+#define   DISPLAY_PLANE_A           (0<<20)
+#define   DISPLAY_PLANE_B           (1<<20)
+#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+#define   PIPE_CONTROL_FLUSH_L3				(1<<27)
+#define   PIPE_CONTROL_GLOBAL_GTT_IVB			(1<<24) /* gen7+ */
+#define   PIPE_CONTROL_MMIO_WRITE			(1<<23)
+#define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
+#define   PIPE_CONTROL_CS_STALL				(1<<20)
+#define   PIPE_CONTROL_TLB_INVALIDATE			(1<<18)
+#define   PIPE_CONTROL_MEDIA_STATE_CLEAR		(1<<16)
+#define   PIPE_CONTROL_QW_WRITE				(1<<14)
+#define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
+#define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
+#define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
+#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12) /* gen6+ */
+#define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11) /* MBZ on ILK */
+#define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10) /* GM45+ only */
+#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
+#define   PIPE_CONTROL_NOTIFY				(1<<8)
+#define   PIPE_CONTROL_FLUSH_ENABLE			(1<<7) /* gen7+ */
+#define   PIPE_CONTROL_DC_FLUSH_ENABLE			(1<<5)
+#define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
+#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
+#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
+#define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
+#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
+#define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
+
+/*
+ * Commands used only by the command parser
+ */
+#define MI_SET_PREDICATE        MI_INSTR(0x01, 0)
+#define MI_ARB_CHECK            MI_INSTR(0x05, 0)
+#define MI_RS_CONTROL           MI_INSTR(0x06, 0)
+#define MI_URB_ATOMIC_ALLOC     MI_INSTR(0x09, 0)
+#define MI_PREDICATE            MI_INSTR(0x0C, 0)
+#define MI_RS_CONTEXT           MI_INSTR(0x0F, 0)
+#define MI_TOPOLOGY_FILTER      MI_INSTR(0x0D, 0)
+#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0)
+#define MI_URB_CLEAR            MI_INSTR(0x19, 0)
+#define MI_UPDATE_GTT           MI_INSTR(0x23, 0)
+#define MI_CLFLUSH              MI_INSTR(0x27, 0)
+#define MI_REPORT_PERF_COUNT    MI_INSTR(0x28, 0)
+#define   MI_REPORT_PERF_COUNT_GGTT (1<<0)
+#define MI_LOAD_REGISTER_REG    MI_INSTR(0x2A, 0)
+#define MI_RS_STORE_DATA_IMM    MI_INSTR(0x2B, 0)
+#define MI_LOAD_URB_MEM         MI_INSTR(0x2C, 0)
+#define MI_STORE_URB_MEM        MI_INSTR(0x2D, 0)
+#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0)
+
+#define PIPELINE_SELECT                ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16))
+#define GFX_OP_3DSTATE_VF_STATISTICS   ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16))
+#define MEDIA_VFE_STATE                ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16))
+#define  MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18)
+#define GPGPU_OBJECT                   ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16))
+#define GPGPU_WALKER                   ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16))
+#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16))
+#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16))
+#define GFX_OP_3DSTATE_SO_DECL_LIST \
+	((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16))
+
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16))
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16))
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16))
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16))
+#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \
+	((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16))
+
+#define MFX_WAIT  ((0x3<<29)|(0x1<<27)|(0x0<<16))
+
+#define COLOR_BLT     ((0x2<<29)|(0x40<<22))
+#define SRC_COPY_BLT  ((0x2<<29)|(0x43<<22))
+
+#endif /* _INTEL_GPU_COMMANDS_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
index ff08ea0..116f4cc 100644
--- a/drivers/gpu/drm/i915/intel_guc.c
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -64,10 +64,12 @@
 {
 	intel_guc_fw_init_early(guc);
 	intel_guc_ct_init_early(&guc->ct);
-	intel_guc_log_init_early(guc);
+	intel_guc_log_init_early(&guc->log);
 
 	mutex_init(&guc->send_mutex);
+	spin_lock_init(&guc->irq_lock);
 	guc->send = intel_guc_send_nop;
+	guc->handler = intel_guc_to_host_event_handler_nop;
 	guc->notify = gen8_guc_raise_irq;
 }
 
@@ -86,9 +88,10 @@
 	 * or scheduled later on resume. This way the handling of work
 	 * item can be kept same between system suspend & rpm suspend.
 	 */
-	guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
-						WQ_HIGHPRI | WQ_FREEZABLE);
-	if (!guc->log.runtime.flush_wq) {
+	guc->log.relay.flush_wq =
+		alloc_ordered_workqueue("i915-guc_log",
+					WQ_HIGHPRI | WQ_FREEZABLE);
+	if (!guc->log.relay.flush_wq) {
 		DRM_ERROR("Couldn't allocate workqueue for GuC log\n");
 		return -ENOMEM;
 	}
@@ -111,7 +114,7 @@
 		guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt",
 							  WQ_HIGHPRI);
 		if (!guc->preempt_wq) {
-			destroy_workqueue(guc->log.runtime.flush_wq);
+			destroy_workqueue(guc->log.relay.flush_wq);
 			DRM_ERROR("Couldn't allocate workqueue for GuC "
 				  "preemption\n");
 			return -ENOMEM;
@@ -129,7 +132,7 @@
 	    USES_GUC_SUBMISSION(dev_priv))
 		destroy_workqueue(guc->preempt_wq);
 
-	destroy_workqueue(guc->log.runtime.flush_wq);
+	destroy_workqueue(guc->log.relay.flush_wq);
 }
 
 static int guc_shared_data_create(struct intel_guc *guc)
@@ -169,7 +172,7 @@
 		return ret;
 	GEM_BUG_ON(!guc->shared_data);
 
-	ret = intel_guc_log_create(guc);
+	ret = intel_guc_log_create(&guc->log);
 	if (ret)
 		goto err_shared;
 
@@ -184,7 +187,7 @@
 	return 0;
 
 err_log:
-	intel_guc_log_destroy(guc);
+	intel_guc_log_destroy(&guc->log);
 err_shared:
 	guc_shared_data_destroy(guc);
 	return ret;
@@ -196,41 +199,27 @@
 
 	i915_ggtt_disable_guc(dev_priv);
 	intel_guc_ads_destroy(guc);
-	intel_guc_log_destroy(guc);
+	intel_guc_log_destroy(&guc->log);
 	guc_shared_data_destroy(guc);
 }
 
-static u32 get_gt_type(struct drm_i915_private *dev_priv)
+static u32 get_log_control_flags(void)
 {
-	/* XXX: GT type based on PCI device ID? field seems unused by fw */
-	return 0;
-}
+	u32 level = i915_modparams.guc_log_level;
+	u32 flags = 0;
 
-static u32 get_core_family(struct drm_i915_private *dev_priv)
-{
-	u32 gen = INTEL_GEN(dev_priv);
+	GEM_BUG_ON(level < 0);
 
-	switch (gen) {
-	case 9:
-		return GUC_CORE_FAMILY_GEN9;
+	if (!GUC_LOG_LEVEL_IS_ENABLED(level))
+		flags |= GUC_LOG_DEFAULT_DISABLED;
 
-	default:
-		MISSING_CASE(gen);
-		return GUC_CORE_FAMILY_UNKNOWN;
-	}
-}
+	if (!GUC_LOG_LEVEL_IS_VERBOSE(level))
+		flags |= GUC_LOG_DISABLED;
+	else
+		flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) <<
+			 GUC_LOG_VERBOSITY_SHIFT;
 
-static u32 get_log_verbosity_flags(void)
-{
-	if (i915_modparams.guc_log_level > 0) {
-		u32 verbosity = i915_modparams.guc_log_level - 1;
-
-		GEM_BUG_ON(verbosity > GUC_LOG_VERBOSITY_MAX);
-		return verbosity << GUC_LOG_VERBOSITY_SHIFT;
-	}
-
-	GEM_BUG_ON(i915_modparams.enable_guc < 0);
-	return GUC_LOG_DISABLED;
+	return flags;
 }
 
 /*
@@ -246,10 +235,6 @@
 
 	memset(params, 0, sizeof(params));
 
-	params[GUC_CTL_DEVICE_INFO] |=
-		(get_gt_type(dev_priv) << GUC_CTL_GT_TYPE_SHIFT) |
-		(get_core_family(dev_priv) << GUC_CTL_CORE_FAMILY_SHIFT);
-
 	/*
 	 * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one
 	 * second. This ARAR is calculated by:
@@ -265,12 +250,13 @@
 
 	params[GUC_CTL_LOG_PARAMS] = guc->log.flags;
 
-	params[GUC_CTL_DEBUG] = get_log_verbosity_flags();
+	params[GUC_CTL_DEBUG] = get_log_control_flags();
 
 	/* If GuC submission is enabled, set up additional parameters here */
 	if (USES_GUC_SUBMISSION(dev_priv)) {
-		u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT;
-		u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool);
+		u32 ads = intel_guc_ggtt_offset(guc,
+						guc->ads_vma) >> PAGE_SHIFT;
+		u32 pgs = intel_guc_ggtt_offset(guc, guc->stage_desc_pool);
 		u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16;
 
 		params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
@@ -301,16 +287,23 @@
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER);
 }
 
-int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len)
+int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len,
+		       u32 *response_buf, u32 response_buf_size)
 {
 	WARN(1, "Unexpected send: action=%#x\n", *action);
 	return -ENODEV;
 }
 
+void intel_guc_to_host_event_handler_nop(struct intel_guc *guc)
+{
+	WARN(1, "Unexpected event: no suitable handler\n");
+}
+
 /*
  * This function implements the MMIO based host to GuC interface.
  */
-int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
+int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len,
+			u32 *response_buf, u32 response_buf_size)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	u32 status;
@@ -320,6 +313,9 @@
 	GEM_BUG_ON(!len);
 	GEM_BUG_ON(len > guc->send_regs.count);
 
+	/* We expect only action code */
+	GEM_BUG_ON(*action & ~INTEL_GUC_MSG_CODE_MASK);
+
 	/* If CT is available, we expect to use MMIO only during init/fini */
 	GEM_BUG_ON(HAS_GUC_CT(dev_priv) &&
 		*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
@@ -341,29 +337,74 @@
 	 */
 	ret = __intel_wait_for_register_fw(dev_priv,
 					   guc_send_reg(guc, 0),
-					   INTEL_GUC_RECV_MASK,
-					   INTEL_GUC_RECV_MASK,
+					   INTEL_GUC_MSG_TYPE_MASK,
+					   INTEL_GUC_MSG_TYPE_RESPONSE <<
+					   INTEL_GUC_MSG_TYPE_SHIFT,
 					   10, 10, &status);
-	if (status != INTEL_GUC_STATUS_SUCCESS) {
-		/*
-		 * Either the GuC explicitly returned an error (which
-		 * we convert to -EIO here) or no response at all was
-		 * received within the timeout limit (-ETIMEDOUT)
-		 */
-		if (ret != -ETIMEDOUT)
-			ret = -EIO;
+	/* If GuC explicitly returned an error, convert it to -EIO */
+	if (!ret && !INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(status))
+		ret = -EIO;
 
-		DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;"
-			 " ret=%d status=0x%08X response=0x%08X\n",
-			 action[0], ret, status, I915_READ(SOFT_SCRATCH(15)));
+	if (ret) {
+		DRM_DEBUG_DRIVER("INTEL_GUC_SEND: Action 0x%X failed;"
+				 " ret=%d status=0x%08X response=0x%08X\n",
+				 action[0], ret, status,
+				 I915_READ(SOFT_SCRATCH(15)));
+		goto out;
 	}
 
+	if (response_buf) {
+		int count = min(response_buf_size, guc->send_regs.count - 1);
+
+		for (i = 0; i < count; i++)
+			response_buf[i] = I915_READ(guc_send_reg(guc, i + 1));
+	}
+
+	/* Use data from the GuC response as our return value */
+	ret = INTEL_GUC_MSG_TO_DATA(status);
+
+out:
 	intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains);
 	mutex_unlock(&guc->send_mutex);
 
 	return ret;
 }
 
+void intel_guc_to_host_event_handler_mmio(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	u32 msg, val;
+
+	/*
+	 * Sample the log buffer flush related bits & clear them out now
+	 * itself from the message identity register to minimize the
+	 * probability of losing a flush interrupt, when there are back
+	 * to back flush interrupts.
+	 * There can be a new flush interrupt, for different log buffer
+	 * type (like for ISR), whilst Host is handling one (for DPC).
+	 * Since same bit is used in message register for ISR & DPC, it
+	 * could happen that GuC sets the bit for 2nd interrupt but Host
+	 * clears out the bit on handling the 1st interrupt.
+	 */
+	spin_lock(&guc->irq_lock);
+	val = I915_READ(SOFT_SCRATCH(15));
+	msg = val & guc->msg_enabled_mask;
+	I915_WRITE(SOFT_SCRATCH(15), val & ~msg);
+	spin_unlock(&guc->irq_lock);
+
+	intel_guc_to_host_process_recv_msg(guc, msg);
+}
+
+void intel_guc_to_host_process_recv_msg(struct intel_guc *guc, u32 msg)
+{
+	/* Make sure to handle only enabled messages */
+	msg &= guc->msg_enabled_mask;
+
+	if (msg & (INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
+		   INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED))
+		intel_guc_log_handle_flush_event(&guc->log);
+}
+
 int intel_guc_sample_forcewake(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
@@ -410,7 +451,7 @@
 	u32 data[] = {
 		INTEL_GUC_ACTION_ENTER_S_STATE,
 		GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */
-		guc_ggtt_offset(guc->shared_data)
+		intel_guc_ggtt_offset(guc, guc->shared_data)
 	};
 
 	return intel_guc_send(guc, data, ARRAY_SIZE(data));
@@ -434,7 +475,7 @@
 	data[3] = 0;
 	data[4] = 0;
 	data[5] = guc->execbuf_client->stage_id;
-	data[6] = guc_ggtt_offset(guc->shared_data);
+	data[6] = intel_guc_ggtt_offset(guc, guc->shared_data);
 
 	return intel_guc_send(guc, data, ARRAY_SIZE(data));
 }
@@ -448,13 +489,66 @@
 	u32 data[] = {
 		INTEL_GUC_ACTION_EXIT_S_STATE,
 		GUC_POWER_D0,
-		guc_ggtt_offset(guc->shared_data)
+		intel_guc_ggtt_offset(guc, guc->shared_data)
 	};
 
 	return intel_guc_send(guc, data, ARRAY_SIZE(data));
 }
 
 /**
+ * DOC: GuC Address Space
+ *
+ * The layout of GuC address space is shown below:
+ *
+ * ::
+ *
+ *     +==============> +====================+ <== GUC_GGTT_TOP
+ *     ^                |                    |
+ *     |                |                    |
+ *     |                |        DRAM        |
+ *     |                |       Memory       |
+ *     |                |                    |
+ *    GuC               |                    |
+ *  Address  +========> +====================+ <== WOPCM Top
+ *   Space   ^          |   HW contexts RSVD |
+ *     |     |          |        WOPCM       |
+ *     |     |     +==> +--------------------+ <== GuC WOPCM Top
+ *     |    GuC    ^    |                    |
+ *     |    GGTT   |    |                    |
+ *     |    Pin   GuC   |        GuC         |
+ *     |    Bias WOPCM  |       WOPCM        |
+ *     |     |    Size  |                    |
+ *     |     |     |    |                    |
+ *     v     v     v    |                    |
+ *     +=====+=====+==> +====================+ <== GuC WOPCM Base
+ *                      |   Non-GuC WOPCM    |
+ *                      |   (HuC/Reserved)   |
+ *                      +====================+ <== WOPCM Base
+ *
+ * The lower part of GuC Address Space [0, ggtt_pin_bias) is mapped to WOPCM
+ * while upper part of GuC Address Space [ggtt_pin_bias, GUC_GGTT_TOP) is mapped
+ * to DRAM. The value of the GuC ggtt_pin_bias is determined by WOPCM size and
+ * actual GuC WOPCM size.
+ */
+
+/**
+ * intel_guc_init_ggtt_pin_bias() - Initialize the GuC ggtt_pin_bias value.
+ * @guc: intel_guc structure.
+ *
+ * This function will calculate and initialize the ggtt_pin_bias value based on
+ * overall WOPCM size and GuC WOPCM size.
+ */
+void intel_guc_init_ggtt_pin_bias(struct intel_guc *guc)
+{
+	struct drm_i915_private *i915 = guc_to_i915(guc);
+
+	GEM_BUG_ON(!i915->wopcm.size);
+	GEM_BUG_ON(i915->wopcm.size < i915->wopcm.guc.base);
+
+	guc->ggtt_pin_bias = i915->wopcm.size - i915->wopcm.guc.base;
+}
+
+/**
  * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage
  * @guc:	the guc
  * @size:	size of area to allocate (both virtual space and memory)
@@ -462,7 +556,7 @@
  * This is a wrapper to create an object for use with the GuC. In order to
  * use it inside the GuC, an object needs to be pinned lifetime, so we allocate
  * both some backing storage and a range inside the Global GTT. We must pin
- * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that
+ * it in the GGTT somewhere other than than [0, GUC ggtt_pin_bias) because that
  * range is reserved inside GuC.
  *
  * Return:	A i915_vma if successful, otherwise an ERR_PTR.
@@ -483,7 +577,7 @@
 		goto err;
 
 	ret = i915_vma_pin(vma, 0, PAGE_SIZE,
-			   PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
+			   PIN_GLOBAL | PIN_OFFSET_BIAS | guc->ggtt_pin_bias);
 	if (ret) {
 		vma = ERR_PTR(ret);
 		goto err;
@@ -495,14 +589,3 @@
 	i915_gem_object_put(obj);
 	return vma;
 }
-
-u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv)
-{
-	u32 wopcm_size = GUC_WOPCM_TOP;
-
-	/* On BXT, the top of WOPCM is reserved for RC6 context */
-	if (IS_GEN9_LP(dev_priv))
-		wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED;
-
-	return wopcm_size;
-}
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index b9424ac..f1265e1 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -49,11 +49,16 @@
 	struct intel_guc_log log;
 	struct intel_guc_ct ct;
 
+	/* Offset where Non-WOPCM memory starts. */
+	u32 ggtt_pin_bias;
+
 	/* Log snapshot if GuC errors during load */
 	struct drm_i915_gem_object *load_err_log;
 
 	/* intel_guc_recv interrupt related state */
+	spinlock_t irq_lock;
 	bool interrupts_enabled;
+	unsigned int msg_enabled_mask;
 
 	struct i915_vma *ads_vma;
 	struct i915_vma *stage_desc_pool;
@@ -83,7 +88,11 @@
 	struct mutex send_mutex;
 
 	/* GuC's FW specific send function */
-	int (*send)(struct intel_guc *guc, const u32 *data, u32 len);
+	int (*send)(struct intel_guc *guc, const u32 *data, u32 len,
+		    u32 *response_buf, u32 response_buf_size);
+
+	/* GuC's FW specific event handler function */
+	void (*handler)(struct intel_guc *guc);
 
 	/* GuC's FW specific notify function */
 	void (*notify)(struct intel_guc *guc);
@@ -92,7 +101,14 @@
 static
 inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
 {
-	return guc->send(guc, action, len);
+	return guc->send(guc, action, len, NULL, 0);
+}
+
+static inline int
+intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len,
+			   u32 *response_buf, u32 response_buf_size)
+{
+	return guc->send(guc, action, len, response_buf, response_buf_size);
 }
 
 static inline void intel_guc_notify(struct intel_guc *guc)
@@ -100,17 +116,33 @@
 	guc->notify(guc);
 }
 
-/*
- * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP),
- * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is
- * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects
- * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM.
+static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
+{
+	guc->handler(guc);
+}
+
+/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
+#define GUC_GGTT_TOP	0xFEE00000
+
+/**
+ * intel_guc_ggtt_offset() - Get and validate the GGTT offset of @vma
+ * @guc: intel_guc structure.
+ * @vma: i915 graphics virtual memory area.
+ *
+ * GuC does not allow any gfx GGTT address that falls into range
+ * [0, GuC ggtt_pin_bias), which is reserved for Boot ROM, SRAM and WOPCM.
+ * Currently, in order to exclude [0, GuC ggtt_pin_bias) address space from
+ * GGTT, all gfx objects used by GuC are allocated with intel_guc_allocate_vma()
+ * and pinned with PIN_OFFSET_BIAS along with the value of GuC ggtt_pin_bias.
+ *
+ * Return: GGTT offset of the @vma.
  */
-static inline u32 guc_ggtt_offset(struct i915_vma *vma)
+static inline u32 intel_guc_ggtt_offset(struct intel_guc *guc,
+					struct i915_vma *vma)
 {
 	u32 offset = i915_ggtt_offset(vma);
 
-	GEM_BUG_ON(offset < GUC_WOPCM_TOP);
+	GEM_BUG_ON(offset < guc->ggtt_pin_bias);
 	GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP));
 
 	return offset;
@@ -119,17 +151,43 @@
 void intel_guc_init_early(struct intel_guc *guc);
 void intel_guc_init_send_regs(struct intel_guc *guc);
 void intel_guc_init_params(struct intel_guc *guc);
+void intel_guc_init_ggtt_pin_bias(struct intel_guc *guc);
 int intel_guc_init_wq(struct intel_guc *guc);
 void intel_guc_fini_wq(struct intel_guc *guc);
 int intel_guc_init(struct intel_guc *guc);
 void intel_guc_fini(struct intel_guc *guc);
-int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
-int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
+int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len,
+		       u32 *response_buf, u32 response_buf_size);
+int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len,
+			u32 *response_buf, u32 response_buf_size);
+void intel_guc_to_host_event_handler(struct intel_guc *guc);
+void intel_guc_to_host_event_handler_nop(struct intel_guc *guc);
+void intel_guc_to_host_event_handler_mmio(struct intel_guc *guc);
+void intel_guc_to_host_process_recv_msg(struct intel_guc *guc, u32 msg);
 int intel_guc_sample_forcewake(struct intel_guc *guc);
 int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset);
 int intel_guc_suspend(struct intel_guc *guc);
 int intel_guc_resume(struct intel_guc *guc);
 struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
-u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
+
+static inline int intel_guc_sanitize(struct intel_guc *guc)
+{
+	intel_uc_fw_sanitize(&guc->fw);
+	return 0;
+}
+
+static inline void intel_guc_enable_msg(struct intel_guc *guc, u32 mask)
+{
+	spin_lock_irq(&guc->irq_lock);
+	guc->msg_enabled_mask |= mask;
+	spin_unlock_irq(&guc->irq_lock);
+}
+
+static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
+{
+	spin_lock_irq(&guc->irq_lock);
+	guc->msg_enabled_mask &= ~mask;
+	spin_unlock_irq(&guc->irq_lock);
+}
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c
index ac62753..dcaa3fb7 100644
--- a/drivers/gpu/drm/i915/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/intel_guc_ads.c
@@ -75,7 +75,7 @@
 int intel_guc_ads_create(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct i915_vma *vma;
+	struct i915_vma *vma, *kernel_ctx_vma;
 	struct page *page;
 	/* The ads obj includes the struct itself and buffers passed to GuC */
 	struct {
@@ -121,9 +121,10 @@
 	 * to find it. Note that we have to skip our header (1 page),
 	 * because our GuC shared data is there.
 	 */
+	kernel_ctx_vma = to_intel_context(dev_priv->kernel_context,
+					  dev_priv->engine[RCS])->state;
 	blob->ads.golden_context_lrca =
-		guc_ggtt_offset(dev_priv->kernel_context->engine[RCS].state) +
-		skipped_offset;
+		intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset;
 
 	/*
 	 * The GuC expects us to exclude the portion of the context image that
@@ -135,7 +136,7 @@
 		blob->ads.eng_state_size[engine->guc_id] =
 			engine->context_size - skipped_size;
 
-	base = guc_ggtt_offset(vma);
+	base = intel_guc_ggtt_offset(guc, vma);
 	blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
 	blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer);
 	blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state);
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c
index 24ad557..371b600 100644
--- a/drivers/gpu/drm/i915/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/intel_guc_ct.c
@@ -24,14 +24,49 @@
 #include "i915_drv.h"
 #include "intel_guc_ct.h"
 
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
+#define CT_DEBUG_DRIVER(...)	DRM_DEBUG_DRIVER(__VA_ARGS__)
+#else
+#define CT_DEBUG_DRIVER(...)	do { } while (0)
+#endif
+
+struct ct_request {
+	struct list_head link;
+	u32 fence;
+	u32 status;
+	u32 response_len;
+	u32 *response_buf;
+};
+
+struct ct_incoming_request {
+	struct list_head link;
+	u32 msg[];
+};
+
 enum { CTB_SEND = 0, CTB_RECV = 1 };
 
 enum { CTB_OWNER_HOST = 0 };
 
+static void ct_incoming_request_worker_func(struct work_struct *w);
+
+/**
+ * intel_guc_ct_init_early - Initialize CT state without requiring device access
+ * @ct: pointer to CT struct
+ */
 void intel_guc_ct_init_early(struct intel_guc_ct *ct)
 {
 	/* we're using static channel owners */
 	ct->host_channel.owner = CTB_OWNER_HOST;
+
+	spin_lock_init(&ct->lock);
+	INIT_LIST_HEAD(&ct->pending_requests);
+	INIT_LIST_HEAD(&ct->incoming_requests);
+	INIT_WORK(&ct->worker, ct_incoming_request_worker_func);
+}
+
+static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
+{
+	return container_of(ct, struct intel_guc, ct);
 }
 
 static inline const char *guc_ct_buffer_type_to_str(u32 type)
@@ -49,8 +84,8 @@
 static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
 				    u32 cmds_addr, u32 size, u32 owner)
 {
-	DRM_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
-			 desc, cmds_addr, size, owner);
+	CT_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
+			desc, cmds_addr, size, owner);
 	memset(desc, 0, sizeof(*desc));
 	desc->addr = cmds_addr;
 	desc->size = size;
@@ -59,8 +94,8 @@
 
 static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
 {
-	DRM_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
-			 desc, desc->head, desc->tail);
+	CT_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
+			desc, desc->head, desc->tail);
 	desc->head = 0;
 	desc->tail = 0;
 	desc->is_in_error = 0;
@@ -79,7 +114,7 @@
 	int err;
 
 	/* Can't use generic send(), CT registration must go over MMIO */
-	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
 	if (err)
 		DRM_ERROR("CT: register %s buffer failed; err=%d\n",
 			  guc_ct_buffer_type_to_str(type), err);
@@ -98,7 +133,7 @@
 	int err;
 
 	/* Can't use generic send(), CT deregistration must go over MMIO */
-	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
 	if (err)
 		DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n",
 			  guc_ct_buffer_type_to_str(type), owner, err);
@@ -156,7 +191,8 @@
 		err = PTR_ERR(blob);
 		goto err_vma;
 	}
-	DRM_DEBUG_DRIVER("CT: vma base=%#x\n", guc_ggtt_offset(ctch->vma));
+	CT_DEBUG_DRIVER("CT: vma base=%#x\n",
+			intel_guc_ggtt_offset(guc, ctch->vma));
 
 	/* store pointers to desc and cmds */
 	for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
@@ -170,8 +206,8 @@
 err_vma:
 	i915_vma_unpin_and_release(&ctch->vma);
 err_out:
-	DRM_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n",
-			 ctch->owner, err);
+	CT_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n",
+			ctch->owner, err);
 	return err;
 }
 
@@ -191,8 +227,8 @@
 	int err;
 	int i;
 
-	DRM_DEBUG_DRIVER("CT: channel %d reopen=%s\n",
-			 ctch->owner, yesno(ctch_is_open(ctch)));
+	CT_DEBUG_DRIVER("CT: channel %d reopen=%s\n",
+			ctch->owner, yesno(ctch_is_open(ctch)));
 
 	if (!ctch->vma) {
 		err = ctch_init(guc, ctch);
@@ -202,7 +238,7 @@
 	}
 
 	/* vma should be already allocated and map'ed */
-	base = guc_ggtt_offset(ctch->vma);
+	base = intel_guc_ggtt_offset(guc, ctch->vma);
 
 	/* (re)initialize descriptors
 	 * cmds buffers are in the second half of the blob page
@@ -263,10 +299,29 @@
 	return ++ctch->next_fence;
 }
 
+/**
+ * DOC: CTB Host to GuC request
+ *
+ * Format of the CTB Host to GuC request message is as follows::
+ *
+ *      +------------+---------+---------+---------+---------+
+ *      |   msg[0]   |   [1]   |   [2]   |   ...   |  [n-1]  |
+ *      +------------+---------+---------+---------+---------+
+ *      |   MESSAGE  |       MESSAGE PAYLOAD                 |
+ *      +   HEADER   +---------+---------+---------+---------+
+ *      |            |    0    |    1    |   ...   |    n    |
+ *      +============+=========+=========+=========+=========+
+ *      |  len >= 1  |  FENCE  |     request specific data   |
+ *      +------+-----+---------+---------+---------+---------+
+ *
+ *                   ^-----------------len-------------------^
+ */
+
 static int ctb_write(struct intel_guc_ct_buffer *ctb,
 		     const u32 *action,
 		     u32 len /* in dwords */,
-		     u32 fence)
+		     u32 fence,
+		     bool want_response)
 {
 	struct guc_ct_buffer_desc *desc = ctb->desc;
 	u32 head = desc->head / 4;	/* in dwords */
@@ -295,15 +350,21 @@
 	if (unlikely(used + len + 1 >= size))
 		return -ENOSPC;
 
-	/* Write the message. The format is the following:
+	/*
+	 * Write the message. The format is the following:
 	 * DW0: header (including action code)
 	 * DW1: fence
 	 * DW2+: action data
 	 */
 	header = (len << GUC_CT_MSG_LEN_SHIFT) |
 		 (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
+		 (want_response ? GUC_CT_MSG_SEND_STATUS : 0) |
 		 (action[0] << GUC_CT_MSG_ACTION_SHIFT);
 
+	CT_DEBUG_DRIVER("CT: writing %*ph %*ph %*ph\n",
+			4, &header, 4, &fence,
+			4 * (len - 1), &action[1]);
+
 	cmds[tail] = header;
 	tail = (tail + 1) % size;
 
@@ -322,16 +383,25 @@
 	return 0;
 }
 
-/* Wait for the response from the GuC.
+/**
+ * wait_for_ctb_desc_update - Wait for the CT buffer descriptor update.
+ * @desc:	buffer descriptor
  * @fence:	response fence
  * @status:	placeholder for status
- * return:	0 response received (status is valid)
- *		-ETIMEDOUT no response within hardcoded timeout
- *		-EPROTO no response, ct buffer was in error
+ *
+ * Guc will update CT buffer descriptor with new fence and status
+ * after processing the command identified by the fence. Wait for
+ * specified fence and then read from the descriptor status of the
+ * command.
+ *
+ * Return:
+ * *	0 response received (status is valid)
+ * *	-ETIMEDOUT no response within hardcoded timeout
+ * *	-EPROTO no response, CT buffer is in error
  */
-static int wait_for_response(struct guc_ct_buffer_desc *desc,
-			     u32 fence,
-			     u32 *status)
+static int wait_for_ctb_desc_update(struct guc_ct_buffer_desc *desc,
+				    u32 fence,
+				    u32 *status)
 {
 	int err;
 
@@ -363,71 +433,440 @@
 	return err;
 }
 
-static int ctch_send(struct intel_guc *guc,
+/**
+ * wait_for_ct_request_update - Wait for CT request state update.
+ * @req:	pointer to pending request
+ * @status:	placeholder for status
+ *
+ * For each sent request, Guc shall send bac CT response message.
+ * Our message handler will update status of tracked request once
+ * response message with given fence is received. Wait here and
+ * check for valid response status value.
+ *
+ * Return:
+ * *	0 response received (status is valid)
+ * *	-ETIMEDOUT no response within hardcoded timeout
+ */
+static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
+{
+	int err;
+
+	/*
+	 * Fast commands should complete in less than 10us, so sample quickly
+	 * up to that length of time, then switch to a slower sleep-wait loop.
+	 * No GuC command should ever take longer than 10ms.
+	 */
+#define done INTEL_GUC_MSG_IS_RESPONSE(READ_ONCE(req->status))
+	err = wait_for_us(done, 10);
+	if (err)
+		err = wait_for(done, 10);
+#undef done
+
+	if (unlikely(err))
+		DRM_ERROR("CT: fence %u err %d\n", req->fence, err);
+
+	*status = req->status;
+	return err;
+}
+
+static int ctch_send(struct intel_guc_ct *ct,
 		     struct intel_guc_ct_channel *ctch,
 		     const u32 *action,
 		     u32 len,
+		     u32 *response_buf,
+		     u32 response_buf_size,
 		     u32 *status)
 {
 	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND];
 	struct guc_ct_buffer_desc *desc = ctb->desc;
+	struct ct_request request;
+	unsigned long flags;
 	u32 fence;
 	int err;
 
 	GEM_BUG_ON(!ctch_is_open(ctch));
 	GEM_BUG_ON(!len);
 	GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
+	GEM_BUG_ON(!response_buf && response_buf_size);
 
 	fence = ctch_get_next_fence(ctch);
-	err = ctb_write(ctb, action, len, fence);
-	if (unlikely(err))
-		return err;
+	request.fence = fence;
+	request.status = 0;
+	request.response_len = response_buf_size;
+	request.response_buf = response_buf;
 
-	intel_guc_notify(guc);
+	spin_lock_irqsave(&ct->lock, flags);
+	list_add_tail(&request.link, &ct->pending_requests);
+	spin_unlock_irqrestore(&ct->lock, flags);
 
-	err = wait_for_response(desc, fence, status);
+	err = ctb_write(ctb, action, len, fence, !!response_buf);
 	if (unlikely(err))
-		return err;
-	if (*status != INTEL_GUC_STATUS_SUCCESS)
-		return -EIO;
-	return 0;
+		goto unlink;
+
+	intel_guc_notify(ct_to_guc(ct));
+
+	if (response_buf)
+		err = wait_for_ct_request_update(&request, status);
+	else
+		err = wait_for_ctb_desc_update(desc, fence, status);
+	if (unlikely(err))
+		goto unlink;
+
+	if (!INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(*status)) {
+		err = -EIO;
+		goto unlink;
+	}
+
+	if (response_buf) {
+		/* There shall be no data in the status */
+		WARN_ON(INTEL_GUC_MSG_TO_DATA(request.status));
+		/* Return actual response len */
+		err = request.response_len;
+	} else {
+		/* There shall be no response payload */
+		WARN_ON(request.response_len);
+		/* Return data decoded from the status dword */
+		err = INTEL_GUC_MSG_TO_DATA(*status);
+	}
+
+unlink:
+	spin_lock_irqsave(&ct->lock, flags);
+	list_del(&request.link);
+	spin_unlock_irqrestore(&ct->lock, flags);
+
+	return err;
 }
 
 /*
  * Command Transport (CT) buffer based GuC send function.
  */
-static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len)
+static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len,
+			     u32 *response_buf, u32 response_buf_size)
 {
-	struct intel_guc_ct_channel *ctch = &guc->ct.host_channel;
+	struct intel_guc_ct *ct = &guc->ct;
+	struct intel_guc_ct_channel *ctch = &ct->host_channel;
 	u32 status = ~0; /* undefined */
-	int err;
+	int ret;
 
 	mutex_lock(&guc->send_mutex);
 
-	err = ctch_send(guc, ctch, action, len, &status);
-	if (unlikely(err)) {
+	ret = ctch_send(ct, ctch, action, len, response_buf, response_buf_size,
+			&status);
+	if (unlikely(ret < 0)) {
 		DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
-			  action[0], err, status);
+			  action[0], ret, status);
+	} else if (unlikely(ret)) {
+		CT_DEBUG_DRIVER("CT: send action %#x returned %d (%#x)\n",
+				action[0], ret, ret);
 	}
 
 	mutex_unlock(&guc->send_mutex);
-	return err;
+	return ret;
+}
+
+static inline unsigned int ct_header_get_len(u32 header)
+{
+	return (header >> GUC_CT_MSG_LEN_SHIFT) & GUC_CT_MSG_LEN_MASK;
+}
+
+static inline unsigned int ct_header_get_action(u32 header)
+{
+	return (header >> GUC_CT_MSG_ACTION_SHIFT) & GUC_CT_MSG_ACTION_MASK;
+}
+
+static inline bool ct_header_is_response(u32 header)
+{
+	return ct_header_get_action(header) == INTEL_GUC_ACTION_DEFAULT;
+}
+
+static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data)
+{
+	struct guc_ct_buffer_desc *desc = ctb->desc;
+	u32 head = desc->head / 4;	/* in dwords */
+	u32 tail = desc->tail / 4;	/* in dwords */
+	u32 size = desc->size / 4;	/* in dwords */
+	u32 *cmds = ctb->cmds;
+	s32 available;			/* in dwords */
+	unsigned int len;
+	unsigned int i;
+
+	GEM_BUG_ON(desc->size % 4);
+	GEM_BUG_ON(desc->head % 4);
+	GEM_BUG_ON(desc->tail % 4);
+	GEM_BUG_ON(tail >= size);
+	GEM_BUG_ON(head >= size);
+
+	/* tail == head condition indicates empty */
+	available = tail - head;
+	if (unlikely(available == 0))
+		return -ENODATA;
+
+	/* beware of buffer wrap case */
+	if (unlikely(available < 0))
+		available += size;
+	CT_DEBUG_DRIVER("CT: available %d (%u:%u)\n", available, head, tail);
+	GEM_BUG_ON(available < 0);
+
+	data[0] = cmds[head];
+	head = (head + 1) % size;
+
+	/* message len with header */
+	len = ct_header_get_len(data[0]) + 1;
+	if (unlikely(len > (u32)available)) {
+		DRM_ERROR("CT: incomplete message %*ph %*ph %*ph\n",
+			  4, data,
+			  4 * (head + available - 1 > size ?
+			       size - head : available - 1), &cmds[head],
+			  4 * (head + available - 1 > size ?
+			       available - 1 - size + head : 0), &cmds[0]);
+		return -EPROTO;
+	}
+
+	for (i = 1; i < len; i++) {
+		data[i] = cmds[head];
+		head = (head + 1) % size;
+	}
+	CT_DEBUG_DRIVER("CT: received %*ph\n", 4 * len, data);
+
+	desc->head = head * 4;
+	return 0;
 }
 
 /**
- * Enable buffer based command transport
- * Shall only be called for platforms with HAS_GUC_CT.
- * @guc:	the guc
- * return:	0 on success
- *		non-zero on failure
+ * DOC: CTB GuC to Host response
+ *
+ * Format of the CTB GuC to Host response message is as follows::
+ *
+ *      +------------+---------+---------+---------+---------+---------+
+ *      |   msg[0]   |   [1]   |   [2]   |   [3]   |   ...   |  [n-1]  |
+ *      +------------+---------+---------+---------+---------+---------+
+ *      |   MESSAGE  |       MESSAGE PAYLOAD                           |
+ *      +   HEADER   +---------+---------+---------+---------+---------+
+ *      |            |    0    |    1    |    2    |   ...   |    n    |
+ *      +============+=========+=========+=========+=========+=========+
+ *      |  len >= 2  |  FENCE  |  STATUS |   response specific data    |
+ *      +------+-----+---------+---------+---------+---------+---------+
+ *
+ *                   ^-----------------------len-----------------------^
  */
-int intel_guc_enable_ct(struct intel_guc *guc)
+
+static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
 {
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct intel_guc_ct_channel *ctch = &guc->ct.host_channel;
+	u32 header = msg[0];
+	u32 len = ct_header_get_len(header);
+	u32 msglen = len + 1; /* total message length including header */
+	u32 fence;
+	u32 status;
+	u32 datalen;
+	struct ct_request *req;
+	bool found = false;
+
+	GEM_BUG_ON(!ct_header_is_response(header));
+	GEM_BUG_ON(!in_irq());
+
+	/* Response payload shall at least include fence and status */
+	if (unlikely(len < 2)) {
+		DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg);
+		return -EPROTO;
+	}
+
+	fence = msg[1];
+	status = msg[2];
+	datalen = len - 2;
+
+	/* Format of the status follows RESPONSE message */
+	if (unlikely(!INTEL_GUC_MSG_IS_RESPONSE(status))) {
+		DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg);
+		return -EPROTO;
+	}
+
+	CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status);
+
+	spin_lock(&ct->lock);
+	list_for_each_entry(req, &ct->pending_requests, link) {
+		if (unlikely(fence != req->fence)) {
+			CT_DEBUG_DRIVER("CT: request %u awaits response\n",
+					req->fence);
+			continue;
+		}
+		if (unlikely(datalen > req->response_len)) {
+			DRM_ERROR("CT: response %u too long %*ph\n",
+				  req->fence, 4 * msglen, msg);
+			datalen = 0;
+		}
+		if (datalen)
+			memcpy(req->response_buf, msg + 3, 4 * datalen);
+		req->response_len = datalen;
+		WRITE_ONCE(req->status, status);
+		found = true;
+		break;
+	}
+	spin_unlock(&ct->lock);
+
+	if (!found)
+		DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg);
+	return 0;
+}
+
+static void ct_process_request(struct intel_guc_ct *ct,
+			       u32 action, u32 len, const u32 *payload)
+{
+	struct intel_guc *guc = ct_to_guc(ct);
+
+	CT_DEBUG_DRIVER("CT: request %x %*ph\n", action, 4 * len, payload);
+
+	switch (action) {
+	case INTEL_GUC_ACTION_DEFAULT:
+		if (unlikely(len < 1))
+			goto fail_unexpected;
+		intel_guc_to_host_process_recv_msg(guc, *payload);
+		break;
+
+	default:
+fail_unexpected:
+		DRM_ERROR("CT: unexpected request %x %*ph\n",
+			  action, 4 * len, payload);
+		break;
+	}
+}
+
+static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
+{
+	unsigned long flags;
+	struct ct_incoming_request *request;
+	u32 header;
+	u32 *payload;
+	bool done;
+
+	spin_lock_irqsave(&ct->lock, flags);
+	request = list_first_entry_or_null(&ct->incoming_requests,
+					   struct ct_incoming_request, link);
+	if (request)
+		list_del(&request->link);
+	done = !!list_empty(&ct->incoming_requests);
+	spin_unlock_irqrestore(&ct->lock, flags);
+
+	if (!request)
+		return true;
+
+	header = request->msg[0];
+	payload = &request->msg[1];
+	ct_process_request(ct,
+			   ct_header_get_action(header),
+			   ct_header_get_len(header),
+			   payload);
+
+	kfree(request);
+	return done;
+}
+
+static void ct_incoming_request_worker_func(struct work_struct *w)
+{
+	struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, worker);
+	bool done;
+
+	done = ct_process_incoming_requests(ct);
+	if (!done)
+		queue_work(system_unbound_wq, &ct->worker);
+}
+
+/**
+ * DOC: CTB GuC to Host request
+ *
+ * Format of the CTB GuC to Host request message is as follows::
+ *
+ *      +------------+---------+---------+---------+---------+---------+
+ *      |   msg[0]   |   [1]   |   [2]   |   [3]   |   ...   |  [n-1]  |
+ *      +------------+---------+---------+---------+---------+---------+
+ *      |   MESSAGE  |       MESSAGE PAYLOAD                           |
+ *      +   HEADER   +---------+---------+---------+---------+---------+
+ *      |            |    0    |    1    |    2    |   ...   |    n    |
+ *      +============+=========+=========+=========+=========+=========+
+ *      |     len    |            request specific data                |
+ *      +------+-----+---------+---------+---------+---------+---------+
+ *
+ *                   ^-----------------------len-----------------------^
+ */
+
+static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg)
+{
+	u32 header = msg[0];
+	u32 len = ct_header_get_len(header);
+	u32 msglen = len + 1; /* total message length including header */
+	struct ct_incoming_request *request;
+	unsigned long flags;
+
+	GEM_BUG_ON(ct_header_is_response(header));
+
+	request = kmalloc(sizeof(*request) + 4 * msglen, GFP_ATOMIC);
+	if (unlikely(!request)) {
+		DRM_ERROR("CT: dropping request %*ph\n", 4 * msglen, msg);
+		return 0; /* XXX: -ENOMEM ? */
+	}
+	memcpy(request->msg, msg, 4 * msglen);
+
+	spin_lock_irqsave(&ct->lock, flags);
+	list_add_tail(&request->link, &ct->incoming_requests);
+	spin_unlock_irqrestore(&ct->lock, flags);
+
+	queue_work(system_unbound_wq, &ct->worker);
+	return 0;
+}
+
+static void ct_process_host_channel(struct intel_guc_ct *ct)
+{
+	struct intel_guc_ct_channel *ctch = &ct->host_channel;
+	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_RECV];
+	u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */
+	int err = 0;
+
+	if (!ctch_is_open(ctch))
+		return;
+
+	do {
+		err = ctb_read(ctb, msg);
+		if (err)
+			break;
+
+		if (ct_header_is_response(msg[0]))
+			err = ct_handle_response(ct, msg);
+		else
+			err = ct_handle_request(ct, msg);
+	} while (!err);
+
+	if (GEM_WARN_ON(err == -EPROTO)) {
+		DRM_ERROR("CT: corrupted message detected!\n");
+		ctb->desc->is_in_error = 1;
+	}
+}
+
+/*
+ * When we're communicating with the GuC over CT, GuC uses events
+ * to notify us about new messages being posted on the RECV buffer.
+ */
+static void intel_guc_to_host_event_handler_ct(struct intel_guc *guc)
+{
+	struct intel_guc_ct *ct = &guc->ct;
+
+	ct_process_host_channel(ct);
+}
+
+/**
+ * intel_guc_ct_enable - Enable buffer based command transport.
+ * @ct: pointer to CT struct
+ *
+ * Shall only be called for platforms with HAS_GUC_CT.
+ *
+ * Return: 0 on success, a negative errno code on failure.
+ */
+int intel_guc_ct_enable(struct intel_guc_ct *ct)
+{
+	struct intel_guc *guc = ct_to_guc(ct);
+	struct drm_i915_private *i915 = guc_to_i915(guc);
+	struct intel_guc_ct_channel *ctch = &ct->host_channel;
 	int err;
 
-	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
+	GEM_BUG_ON(!HAS_GUC_CT(i915));
 
 	err = ctch_open(guc, ctch);
 	if (unlikely(err))
@@ -435,21 +874,24 @@
 
 	/* Switch into cmd transport buffer based send() */
 	guc->send = intel_guc_send_ct;
+	guc->handler = intel_guc_to_host_event_handler_ct;
 	DRM_INFO("CT: %s\n", enableddisabled(true));
 	return 0;
 }
 
 /**
- * Disable buffer based command transport.
+ * intel_guc_ct_disable - Disable buffer based command transport.
+ * @ct: pointer to CT struct
+ *
  * Shall only be called for platforms with HAS_GUC_CT.
- * @guc: the guc
  */
-void intel_guc_disable_ct(struct intel_guc *guc)
+void intel_guc_ct_disable(struct intel_guc_ct *ct)
 {
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct intel_guc_ct_channel *ctch = &guc->ct.host_channel;
+	struct intel_guc *guc = ct_to_guc(ct);
+	struct drm_i915_private *i915 = guc_to_i915(guc);
+	struct intel_guc_ct_channel *ctch = &ct->host_channel;
 
-	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
+	GEM_BUG_ON(!HAS_GUC_CT(i915));
 
 	if (!ctch_is_open(ctch))
 		return;
@@ -458,5 +900,6 @@
 
 	/* Disable send */
 	guc->send = intel_guc_send_nop;
+	guc->handler = intel_guc_to_host_event_handler_nop;
 	DRM_INFO("CT: %s\n", enableddisabled(false));
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h
index 6d97f36..d774895a 100644
--- a/drivers/gpu/drm/i915/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/intel_guc_ct.h
@@ -75,12 +75,22 @@
 struct intel_guc_ct {
 	struct intel_guc_ct_channel host_channel;
 	/* other channels are tbd */
+
+	/** @lock: protects pending requests list */
+	spinlock_t lock;
+
+	/** @pending_requests: list of requests waiting for response */
+	struct list_head pending_requests;
+
+	/** @incoming_requests: list of incoming requests */
+	struct list_head incoming_requests;
+
+	/** @worker: worker for handling incoming requests */
+	struct work_struct worker;
 };
 
 void intel_guc_ct_init_early(struct intel_guc_ct *ct);
-
-/* XXX: move to intel_uc.h ? don't fit there either */
-int intel_guc_enable_ct(struct intel_guc *guc);
-void intel_guc_disable_ct(struct intel_guc *guc);
+int intel_guc_ct_enable(struct intel_guc_ct *ct);
+void intel_guc_ct_disable(struct intel_guc_ct *ct);
 
 #endif /* _INTEL_GUC_CT_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c
index d07f2b9..a9e6fcc 100644
--- a/drivers/gpu/drm/i915/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/intel_guc_fw.c
@@ -165,7 +165,7 @@
 	I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size);
 
 	/* Set the source address for the new blob */
-	offset = guc_ggtt_offset(vma) + guc_fw->header_offset;
+	offset = intel_guc_ggtt_offset(guc, vma) + guc_fw->header_offset;
 	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
 	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
 
@@ -275,9 +275,8 @@
  * Called from intel_uc_init_hw() during driver load, resume from sleep and
  * after a GPU reset.
  *
- * The firmware image should have already been fetched into memory by the
- * earlier call to intel_uc_init_fw(), so here we need to only check that
- * fetch succeeded, and then transfer the image to the h/w.
+ * The firmware image should have already been fetched into memory, so only
+ * check that fetch succeeded, and then transfer the image to the h/w.
  *
  * Return:	non-zero code on error
  */
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 6a10aa6..0867ba7 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -23,9 +23,6 @@
 #ifndef _INTEL_GUC_FWIF_H
 #define _INTEL_GUC_FWIF_H
 
-#define GUC_CORE_FAMILY_GEN9		12
-#define GUC_CORE_FAMILY_UNKNOWN		0x7fffffff
-
 #define GUC_CLIENT_PRIORITY_KMD_HIGH	0
 #define GUC_CLIENT_PRIORITY_HIGH	1
 #define GUC_CLIENT_PRIORITY_KMD_NORMAL	2
@@ -82,8 +79,6 @@
 #define GUC_CTL_ARAT_LOW		2
 
 #define GUC_CTL_DEVICE_INFO		3
-#define   GUC_CTL_GT_TYPE_SHIFT		0
-#define   GUC_CTL_CORE_FAMILY_SHIFT	7
 
 #define GUC_CTL_LOG_PARAMS		4
 #define   GUC_LOG_VALID			(1 << 0)
@@ -127,7 +122,7 @@
 #define   GUC_PROFILE_ENABLED		(1 << 7)
 #define   GUC_WQ_TRACK_ENABLED		(1 << 8)
 #define   GUC_ADS_ENABLED		(1 << 9)
-#define   GUC_DEBUG_RESERVED		(1 << 10)
+#define   GUC_LOG_DEFAULT_DISABLED	(1 << 10)
 #define   GUC_ADS_ADDR_SHIFT		11
 #define   GUC_ADS_ADDR_MASK		0xfffff800
 
@@ -327,6 +322,58 @@
 	u64 desc_private;
 } __packed;
 
+/**
+ * DOC: CTB based communication
+ *
+ * The CTB (command transport buffer) communication between Host and GuC
+ * is based on u32 data stream written to the shared buffer. One buffer can
+ * be used to transmit data only in one direction (one-directional channel).
+ *
+ * Current status of the each buffer is stored in the buffer descriptor.
+ * Buffer descriptor holds tail and head fields that represents active data
+ * stream. The tail field is updated by the data producer (sender), and head
+ * field is updated by the data consumer (receiver)::
+ *
+ *      +------------+
+ *      | DESCRIPTOR |          +=================+============+========+
+ *      +============+          |                 | MESSAGE(s) |        |
+ *      | address    |--------->+=================+============+========+
+ *      +------------+
+ *      | head       |          ^-----head--------^
+ *      +------------+
+ *      | tail       |          ^---------tail-----------------^
+ *      +------------+
+ *      | size       |          ^---------------size--------------------^
+ *      +------------+
+ *
+ * Each message in data stream starts with the single u32 treated as a header,
+ * followed by optional set of u32 data that makes message specific payload::
+ *
+ *      +------------+---------+---------+---------+
+ *      |         MESSAGE                          |
+ *      +------------+---------+---------+---------+
+ *      |   msg[0]   |   [1]   |   ...   |  [n-1]  |
+ *      +------------+---------+---------+---------+
+ *      |   MESSAGE  |       MESSAGE PAYLOAD       |
+ *      +   HEADER   +---------+---------+---------+
+ *      |            |    0    |   ...   |    n    |
+ *      +======+=====+=========+=========+=========+
+ *      | 31:16| code|         |         |         |
+ *      +------+-----+         |         |         |
+ *      |  15:5|flags|         |         |         |
+ *      +------+-----+         |         |         |
+ *      |   4:0|  len|         |         |         |
+ *      +------+-----+---------+---------+---------+
+ *
+ *                   ^-------------len-------------^
+ *
+ * The message header consists of:
+ *
+ * - **len**, indicates length of the message payload (in u32)
+ * - **code**, indicates message code
+ * - **flags**, holds various bits to control message handling
+ */
+
 /*
  * Describes single command transport buffer.
  * Used by both guc-master and clients.
@@ -534,16 +581,6 @@
 	u32 version;
 } __packed;
 
-union guc_log_control {
-	struct {
-		u32 logging_enabled:1;
-		u32 reserved1:3;
-		u32 verbosity:4;
-		u32 reserved2:24;
-	};
-	u32 value;
-} __packed;
-
 struct guc_ctx_report {
 	u32 report_return_status;
 	u32 reserved1[64];
@@ -570,7 +607,68 @@
 	struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM];
 } __packed;
 
-/* This Action will be programmed in C180 - SOFT_SCRATCH_O_REG */
+/**
+ * DOC: MMIO based communication
+ *
+ * The MMIO based communication between Host and GuC uses software scratch
+ * registers, where first register holds data treated as message header,
+ * and other registers are used to hold message payload.
+ *
+ * For Gen9+, GuC uses software scratch registers 0xC180-0xC1B8
+ *
+ *      +-----------+---------+---------+---------+
+ *      |  MMIO[0]  | MMIO[1] |   ...   | MMIO[n] |
+ *      +-----------+---------+---------+---------+
+ *      | header    |      optional payload       |
+ *      +======+====+=========+=========+=========+
+ *      | 31:28|type|         |         |         |
+ *      +------+----+         |         |         |
+ *      | 27:16|data|         |         |         |
+ *      +------+----+         |         |         |
+ *      |  15:0|code|         |         |         |
+ *      +------+----+---------+---------+---------+
+ *
+ * The message header consists of:
+ *
+ * - **type**, indicates message type
+ * - **code**, indicates message code, is specific for **type**
+ * - **data**, indicates message data, optional, depends on **code**
+ *
+ * The following message **types** are supported:
+ *
+ * - **REQUEST**, indicates Host-to-GuC request, requested GuC action code
+ *   must be priovided in **code** field. Optional action specific parameters
+ *   can be provided in remaining payload registers or **data** field.
+ *
+ * - **RESPONSE**, indicates GuC-to-Host response from earlier GuC request,
+ *   action response status will be provided in **code** field. Optional
+ *   response data can be returned in remaining payload registers or **data**
+ *   field.
+ */
+
+#define INTEL_GUC_MSG_TYPE_SHIFT	28
+#define INTEL_GUC_MSG_TYPE_MASK		(0xF << INTEL_GUC_MSG_TYPE_SHIFT)
+#define INTEL_GUC_MSG_DATA_SHIFT	16
+#define INTEL_GUC_MSG_DATA_MASK		(0xFFF << INTEL_GUC_MSG_DATA_SHIFT)
+#define INTEL_GUC_MSG_CODE_SHIFT	0
+#define INTEL_GUC_MSG_CODE_MASK		(0xFFFF << INTEL_GUC_MSG_CODE_SHIFT)
+
+#define __INTEL_GUC_MSG_GET(T, m) \
+	(((m) & INTEL_GUC_MSG_ ## T ## _MASK) >> INTEL_GUC_MSG_ ## T ## _SHIFT)
+#define INTEL_GUC_MSG_TO_TYPE(m)	__INTEL_GUC_MSG_GET(TYPE, m)
+#define INTEL_GUC_MSG_TO_DATA(m)	__INTEL_GUC_MSG_GET(DATA, m)
+#define INTEL_GUC_MSG_TO_CODE(m)	__INTEL_GUC_MSG_GET(CODE, m)
+
+enum intel_guc_msg_type {
+	INTEL_GUC_MSG_TYPE_REQUEST = 0x0,
+	INTEL_GUC_MSG_TYPE_RESPONSE = 0xF,
+};
+
+#define __INTEL_GUC_MSG_TYPE_IS(T, m) \
+	(INTEL_GUC_MSG_TO_TYPE(m) == INTEL_GUC_MSG_TYPE_ ## T)
+#define INTEL_GUC_MSG_IS_REQUEST(m)	__INTEL_GUC_MSG_TYPE_IS(REQUEST, m)
+#define INTEL_GUC_MSG_IS_RESPONSE(m)	__INTEL_GUC_MSG_TYPE_IS(RESPONSE, m)
+
 enum intel_guc_action {
 	INTEL_GUC_ACTION_DEFAULT = 0x0,
 	INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2,
@@ -602,24 +700,22 @@
 	INTEL_GUC_REPORT_STATUS_COMPLETE = 0x4,
 };
 
-/*
- * The GuC sends its response to a command by overwriting the
- * command in SS0. The response is distinguishable from a command
- * by the fact that all the MASK bits are set. The remaining bits
- * give more detail.
- */
-#define	INTEL_GUC_RECV_MASK	((u32)0xF0000000)
-#define	INTEL_GUC_RECV_IS_RESPONSE(x)	((u32)(x) >= INTEL_GUC_RECV_MASK)
-#define	INTEL_GUC_RECV_STATUS(x)	(INTEL_GUC_RECV_MASK | (x))
+#define GUC_LOG_CONTROL_LOGGING_ENABLED	(1 << 0)
+#define GUC_LOG_CONTROL_VERBOSITY_SHIFT	4
+#define GUC_LOG_CONTROL_VERBOSITY_MASK	(0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
+#define GUC_LOG_CONTROL_DEFAULT_LOGGING	(1 << 8)
 
-/* GUC will return status back to SOFT_SCRATCH_O_REG */
-enum intel_guc_status {
-	INTEL_GUC_STATUS_SUCCESS = INTEL_GUC_RECV_STATUS(0x0),
-	INTEL_GUC_STATUS_ALLOCATE_DOORBELL_FAIL = INTEL_GUC_RECV_STATUS(0x10),
-	INTEL_GUC_STATUS_DEALLOCATE_DOORBELL_FAIL = INTEL_GUC_RECV_STATUS(0x20),
-	INTEL_GUC_STATUS_GENERIC_FAIL = INTEL_GUC_RECV_STATUS(0x0000F000)
+enum intel_guc_response_status {
+	INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+	INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
 };
 
+#define INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(m) \
+	 (typecheck(u32, (m)) && \
+	  ((m) & (INTEL_GUC_MSG_TYPE_MASK | INTEL_GUC_MSG_CODE_MASK)) == \
+	  ((INTEL_GUC_MSG_TYPE_RESPONSE << INTEL_GUC_MSG_TYPE_SHIFT) | \
+	   (INTEL_GUC_RESPONSE_STATUS_SUCCESS << INTEL_GUC_MSG_CODE_SHIFT)))
+
 /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
 enum intel_guc_recv_message {
 	INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index c0c2e7d..401e170 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -23,12 +23,11 @@
  */
 
 #include <linux/debugfs.h>
-#include <linux/relay.h>
 
 #include "intel_guc_log.h"
 #include "i915_drv.h"
 
-static void guc_log_capture_logs(struct intel_guc *guc);
+static void guc_log_capture_logs(struct intel_guc_log *log);
 
 /**
  * DOC: GuC firmware log
@@ -39,7 +38,7 @@
  * registers value.
  */
 
-static int guc_log_flush_complete(struct intel_guc *guc)
+static int guc_action_flush_log_complete(struct intel_guc *guc)
 {
 	u32 action[] = {
 		INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE
@@ -48,7 +47,7 @@
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
-static int guc_log_flush(struct intel_guc *guc)
+static int guc_action_flush_log(struct intel_guc *guc)
 {
 	u32 action[] = {
 		INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH,
@@ -58,22 +57,40 @@
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
-static int guc_log_control(struct intel_guc *guc, bool enable, u32 verbosity)
+static int guc_action_control_log(struct intel_guc *guc, bool enable,
+				  bool default_logging, u32 verbosity)
 {
-	union guc_log_control control_val = {
-		{
-			.logging_enabled = enable,
-			.verbosity = verbosity,
-		},
-	};
 	u32 action[] = {
 		INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING,
-		control_val.value
+		(enable ? GUC_LOG_CONTROL_LOGGING_ENABLED : 0) |
+		(verbosity << GUC_LOG_CONTROL_VERBOSITY_SHIFT) |
+		(default_logging ? GUC_LOG_CONTROL_DEFAULT_LOGGING : 0)
 	};
 
+	GEM_BUG_ON(verbosity > GUC_LOG_VERBOSITY_MAX);
+
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
+static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
+{
+	return container_of(log, struct intel_guc, log);
+}
+
+static void guc_log_enable_flush_events(struct intel_guc_log *log)
+{
+	intel_guc_enable_msg(log_to_guc(log),
+			     INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
+			     INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
+}
+
+static void guc_log_disable_flush_events(struct intel_guc_log *log)
+{
+	intel_guc_disable_msg(log_to_guc(log),
+			      INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
+			      INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
+}
+
 /*
  * Sub buffer switch callback. Called whenever relay has to switch to a new
  * sub buffer, relay stays on the same sub buffer if 0 is returned.
@@ -121,14 +138,7 @@
 	if (!parent)
 		return NULL;
 
-	/*
-	 * Not using the channel filename passed as an argument, since for each
-	 * channel relay appends the corresponding CPU number to the filename
-	 * passed in relay_open(). This should be fine as relay just needs a
-	 * dentry of the file associated with the channel buffer and that file's
-	 * name need not be same as the filename passed as an argument.
-	 */
-	buf_file = debugfs_create_file("guc_log", mode,
+	buf_file = debugfs_create_file(filename, mode,
 				       parent, buf, &relay_file_operations);
 	return buf_file;
 }
@@ -149,59 +159,7 @@
 	.remove_buf_file = remove_buf_file_callback,
 };
 
-static int guc_log_relay_file_create(struct intel_guc *guc)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct dentry *log_dir;
-	int ret;
-
-	if (!i915_modparams.guc_log_level)
-		return 0;
-
-	mutex_lock(&guc->log.runtime.relay_lock);
-
-	/* For now create the log file in /sys/kernel/debug/dri/0 dir */
-	log_dir = dev_priv->drm.primary->debugfs_root;
-
-	/*
-	 * If /sys/kernel/debug/dri/0 location do not exist, then debugfs is
-	 * not mounted and so can't create the relay file.
-	 * The relay API seems to fit well with debugfs only, for availing relay
-	 * there are 3 requirements which can be met for debugfs file only in a
-	 * straightforward/clean manner :-
-	 * i)   Need the associated dentry pointer of the file, while opening the
-	 *      relay channel.
-	 * ii)  Should be able to use 'relay_file_operations' fops for the file.
-	 * iii) Set the 'i_private' field of file's inode to the pointer of
-	 *	relay channel buffer.
-	 */
-	if (!log_dir) {
-		DRM_ERROR("Debugfs dir not available yet for GuC log file\n");
-		ret = -ENODEV;
-		goto out_unlock;
-	}
-
-	ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir);
-	if (ret < 0 && ret != -EEXIST) {
-		DRM_ERROR("Couldn't associate relay chan with file %d\n", ret);
-		goto out_unlock;
-	}
-
-	ret = 0;
-
-out_unlock:
-	mutex_unlock(&guc->log.runtime.relay_lock);
-	return ret;
-}
-
-static bool guc_log_has_relay(struct intel_guc *guc)
-{
-	lockdep_assert_held(&guc->log.runtime.relay_lock);
-
-	return guc->log.runtime.relay_chan != NULL;
-}
-
-static void guc_move_to_next_buf(struct intel_guc *guc)
+static void guc_move_to_next_buf(struct intel_guc_log *log)
 {
 	/*
 	 * Make sure the updates made in the sub buffer are visible when
@@ -209,21 +167,15 @@
 	 */
 	smp_wmb();
 
-	if (!guc_log_has_relay(guc))
-		return;
-
 	/* All data has been written, so now move the offset of sub buffer. */
-	relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size);
+	relay_reserve(log->relay.channel, log->vma->obj->base.size);
 
 	/* Switch to the next sub buffer */
-	relay_flush(guc->log.runtime.relay_chan);
+	relay_flush(log->relay.channel);
 }
 
-static void *guc_get_write_buffer(struct intel_guc *guc)
+static void *guc_get_write_buffer(struct intel_guc_log *log)
 {
-	if (!guc_log_has_relay(guc))
-		return NULL;
-
 	/*
 	 * Just get the base address of a new sub buffer and copy data into it
 	 * ourselves. NULL will be returned in no-overwrite mode, if all sub
@@ -233,25 +185,25 @@
 	 * done without using relay_reserve() along with relay_write(). So its
 	 * better to use relay_reserve() alone.
 	 */
-	return relay_reserve(guc->log.runtime.relay_chan, 0);
+	return relay_reserve(log->relay.channel, 0);
 }
 
-static bool guc_check_log_buf_overflow(struct intel_guc *guc,
+static bool guc_check_log_buf_overflow(struct intel_guc_log *log,
 				       enum guc_log_buffer_type type,
 				       unsigned int full_cnt)
 {
-	unsigned int prev_full_cnt = guc->log.prev_overflow_count[type];
+	unsigned int prev_full_cnt = log->stats[type].sampled_overflow;
 	bool overflow = false;
 
 	if (full_cnt != prev_full_cnt) {
 		overflow = true;
 
-		guc->log.prev_overflow_count[type] = full_cnt;
-		guc->log.total_overflow_count[type] += full_cnt - prev_full_cnt;
+		log->stats[type].overflow = full_cnt;
+		log->stats[type].sampled_overflow += full_cnt - prev_full_cnt;
 
 		if (full_cnt < prev_full_cnt) {
 			/* buffer_full_cnt is a 4 bit counter */
-			guc->log.total_overflow_count[type] += 16;
+			log->stats[type].sampled_overflow += 16;
 		}
 		DRM_ERROR_RATELIMITED("GuC log buffer overflow\n");
 	}
@@ -275,7 +227,7 @@
 	return 0;
 }
 
-static void guc_read_update_log_buffer(struct intel_guc *guc)
+static void guc_read_update_log_buffer(struct intel_guc_log *log)
 {
 	unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt;
 	struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state;
@@ -284,16 +236,16 @@
 	void *src_data, *dst_data;
 	bool new_overflow;
 
-	if (WARN_ON(!guc->log.runtime.buf_addr))
-		return;
+	mutex_lock(&log->relay.lock);
+
+	if (WARN_ON(!intel_guc_log_relay_enabled(log)))
+		goto out_unlock;
 
 	/* Get the pointer to shared GuC log buffer */
-	log_buf_state = src_data = guc->log.runtime.buf_addr;
-
-	mutex_lock(&guc->log.runtime.relay_lock);
+	log_buf_state = src_data = log->relay.buf_addr;
 
 	/* Get the pointer to local buffer to store the logs */
-	log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc);
+	log_buf_snapshot_state = dst_data = guc_get_write_buffer(log);
 
 	if (unlikely(!log_buf_snapshot_state)) {
 		/*
@@ -301,10 +253,9 @@
 		 * getting consumed by User at a slow rate.
 		 */
 		DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n");
-		guc->log.capture_miss_count++;
-		mutex_unlock(&guc->log.runtime.relay_lock);
+		log->relay.full_count++;
 
-		return;
+		goto out_unlock;
 	}
 
 	/* Actual logs are present from the 2nd page */
@@ -325,8 +276,8 @@
 		full_cnt = log_buf_state_local.buffer_full_cnt;
 
 		/* Bookkeeping stuff */
-		guc->log.flush_count[type] += log_buf_state_local.flush_to_file;
-		new_overflow = guc_check_log_buf_overflow(guc, type, full_cnt);
+		log->stats[type].flush += log_buf_state_local.flush_to_file;
+		new_overflow = guc_check_log_buf_overflow(log, type, full_cnt);
 
 		/* Update the state of shared log buffer */
 		log_buf_state->read_ptr = write_offset;
@@ -373,38 +324,35 @@
 		dst_data += buffer_size;
 	}
 
-	guc_move_to_next_buf(guc);
+	guc_move_to_next_buf(log);
 
-	mutex_unlock(&guc->log.runtime.relay_lock);
+out_unlock:
+	mutex_unlock(&log->relay.lock);
 }
 
 static void capture_logs_work(struct work_struct *work)
 {
-	struct intel_guc *guc =
-		container_of(work, struct intel_guc, log.runtime.flush_work);
+	struct intel_guc_log *log =
+		container_of(work, struct intel_guc_log, relay.flush_work);
 
-	guc_log_capture_logs(guc);
+	guc_log_capture_logs(log);
 }
 
-static bool guc_log_has_runtime(struct intel_guc *guc)
+static int guc_log_map(struct intel_guc_log *log)
 {
-	return guc->log.runtime.buf_addr != NULL;
-}
-
-static int guc_log_runtime_create(struct intel_guc *guc)
-{
+	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	void *vaddr;
 	int ret;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+	lockdep_assert_held(&log->relay.lock);
 
-	if (!guc->log.vma)
+	if (!log->vma)
 		return -ENODEV;
 
-	GEM_BUG_ON(guc_log_has_runtime(guc));
-
-	ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true);
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	ret = i915_gem_object_set_to_wc_domain(log->vma->obj, true);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 	if (ret)
 		return ret;
 
@@ -413,49 +361,40 @@
 	 * buffer pages, so that we can directly get the data
 	 * (up-to-date) from memory.
 	 */
-	vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC);
+	vaddr = i915_gem_object_pin_map(log->vma->obj, I915_MAP_WC);
 	if (IS_ERR(vaddr)) {
 		DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
 		return PTR_ERR(vaddr);
 	}
 
-	guc->log.runtime.buf_addr = vaddr;
+	log->relay.buf_addr = vaddr;
 
 	return 0;
 }
 
-static void guc_log_runtime_destroy(struct intel_guc *guc)
+static void guc_log_unmap(struct intel_guc_log *log)
 {
-	/*
-	 * It's possible that the runtime stuff was never allocated because
-	 * GuC log was disabled at the boot time.
-	 */
-	if (!guc_log_has_runtime(guc))
-		return;
+	lockdep_assert_held(&log->relay.lock);
 
-	i915_gem_object_unpin_map(guc->log.vma->obj);
-	guc->log.runtime.buf_addr = NULL;
+	i915_gem_object_unpin_map(log->vma->obj);
+	log->relay.buf_addr = NULL;
 }
 
-void intel_guc_log_init_early(struct intel_guc *guc)
+void intel_guc_log_init_early(struct intel_guc_log *log)
 {
-	mutex_init(&guc->log.runtime.relay_lock);
-	INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
+	mutex_init(&log->relay.lock);
+	INIT_WORK(&log->relay.flush_work, capture_logs_work);
 }
 
-int intel_guc_log_relay_create(struct intel_guc *guc)
+static int guc_log_relay_create(struct intel_guc_log *log)
 {
+	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct rchan *guc_log_relay_chan;
 	size_t n_subbufs, subbuf_size;
 	int ret;
 
-	if (!i915_modparams.guc_log_level)
-		return 0;
-
-	mutex_lock(&guc->log.runtime.relay_lock);
-
-	GEM_BUG_ON(guc_log_has_relay(guc));
+	lockdep_assert_held(&log->relay.lock);
 
 	 /* Keep the size of sub buffers same as shared log buffer */
 	subbuf_size = GUC_LOG_SIZE;
@@ -468,157 +407,56 @@
 	 */
 	n_subbufs = 8;
 
-	/*
-	 * Create a relay channel, so that we have buffers for storing
-	 * the GuC firmware logs, the channel will be linked with a file
-	 * later on when debugfs is registered.
-	 */
-	guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size,
-					n_subbufs, &relay_callbacks, dev_priv);
+	guc_log_relay_chan = relay_open("guc_log",
+					dev_priv->drm.primary->debugfs_root,
+					subbuf_size, n_subbufs,
+					&relay_callbacks, dev_priv);
 	if (!guc_log_relay_chan) {
 		DRM_ERROR("Couldn't create relay chan for GuC logging\n");
 
 		ret = -ENOMEM;
-		goto err;
+		return ret;
 	}
 
 	GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
-	guc->log.runtime.relay_chan = guc_log_relay_chan;
-
-	mutex_unlock(&guc->log.runtime.relay_lock);
+	log->relay.channel = guc_log_relay_chan;
 
 	return 0;
-
-err:
-	mutex_unlock(&guc->log.runtime.relay_lock);
-	/* logging will be off */
-	i915_modparams.guc_log_level = 0;
-	return ret;
 }
 
-void intel_guc_log_relay_destroy(struct intel_guc *guc)
+static void guc_log_relay_destroy(struct intel_guc_log *log)
 {
-	mutex_lock(&guc->log.runtime.relay_lock);
+	lockdep_assert_held(&log->relay.lock);
 
-	/*
-	 * It's possible that the relay was never allocated because
-	 * GuC log was disabled at the boot time.
-	 */
-	if (!guc_log_has_relay(guc))
-		goto out_unlock;
-
-	relay_close(guc->log.runtime.relay_chan);
-	guc->log.runtime.relay_chan = NULL;
-
-out_unlock:
-	mutex_unlock(&guc->log.runtime.relay_lock);
+	relay_close(log->relay.channel);
+	log->relay.channel = NULL;
 }
 
-static int guc_log_late_setup(struct intel_guc *guc)
+static void guc_log_capture_logs(struct intel_guc_log *log)
 {
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	int ret;
-
-	if (!guc_log_has_runtime(guc)) {
-		/*
-		 * If log was disabled at boot time, then setup needed to handle
-		 * log buffer flush interrupts would not have been done yet, so
-		 * do that now.
-		 */
-		ret = intel_guc_log_relay_create(guc);
-		if (ret)
-			goto err;
-
-		mutex_lock(&dev_priv->drm.struct_mutex);
-		intel_runtime_pm_get(dev_priv);
-		ret = guc_log_runtime_create(guc);
-		intel_runtime_pm_put(dev_priv);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
-
-		if (ret)
-			goto err_relay;
-	}
-
-	ret = guc_log_relay_file_create(guc);
-	if (ret)
-		goto err_runtime;
-
-	return 0;
-
-err_runtime:
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	guc_log_runtime_destroy(guc);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-err_relay:
-	intel_guc_log_relay_destroy(guc);
-err:
-	/* logging will remain off */
-	i915_modparams.guc_log_level = 0;
-	return ret;
-}
-
-static void guc_log_capture_logs(struct intel_guc *guc)
-{
+	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 
-	guc_read_update_log_buffer(guc);
+	guc_read_update_log_buffer(log);
 
 	/*
 	 * Generally device is expected to be active only at this
 	 * time, so get/put should be really quick.
 	 */
 	intel_runtime_pm_get(dev_priv);
-	guc_log_flush_complete(guc);
+	guc_action_flush_log_complete(guc);
 	intel_runtime_pm_put(dev_priv);
 }
 
-static void guc_flush_logs(struct intel_guc *guc)
+int intel_guc_log_create(struct intel_guc_log *log)
 {
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-
-	if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level)
-		return;
-
-	/* First disable the interrupts, will be renabled afterwards */
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	intel_runtime_pm_get(dev_priv);
-	gen9_disable_guc_interrupts(dev_priv);
-	intel_runtime_pm_put(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
-	/*
-	 * Before initiating the forceful flush, wait for any pending/ongoing
-	 * flush to complete otherwise forceful flush may not actually happen.
-	 */
-	flush_work(&guc->log.runtime.flush_work);
-
-	/* Ask GuC to update the log buffer state */
-	intel_runtime_pm_get(dev_priv);
-	guc_log_flush(guc);
-	intel_runtime_pm_put(dev_priv);
-
-	/* GuC would have updated log buffer by now, so capture it */
-	guc_log_capture_logs(guc);
-}
-
-int intel_guc_log_create(struct intel_guc *guc)
-{
+	struct intel_guc *guc = log_to_guc(log);
 	struct i915_vma *vma;
 	unsigned long offset;
 	u32 flags;
 	int ret;
 
-	GEM_BUG_ON(guc->log.vma);
-
-	/*
-	 * We require SSE 4.1 for fast reads from the GuC log buffer and
-	 * it should be present on the chipsets supporting GuC based
-	 * submisssions.
-	 */
-	if (WARN_ON(!i915_has_memcpy_from_wc())) {
-		ret = -EINVAL;
-		goto err;
-	}
+	GEM_BUG_ON(log->vma);
 
 	vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE);
 	if (IS_ERR(vma)) {
@@ -626,13 +464,7 @@
 		goto err;
 	}
 
-	guc->log.vma = vma;
-
-	if (i915_modparams.guc_log_level) {
-		ret = guc_log_runtime_create(guc);
-		if (ret < 0)
-			goto err_vma;
-	}
+	log->vma = vma;
 
 	/* each allocated unit is a page */
 	flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL |
@@ -640,117 +472,159 @@
 		(GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
 		(GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
 
-	offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */
-	guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
+	offset = intel_guc_ggtt_offset(guc, vma) >> PAGE_SHIFT;
+	log->flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
 
 	return 0;
 
-err_vma:
-	i915_vma_unpin_and_release(&guc->log.vma);
 err:
 	/* logging will be off */
 	i915_modparams.guc_log_level = 0;
 	return ret;
 }
 
-void intel_guc_log_destroy(struct intel_guc *guc)
+void intel_guc_log_destroy(struct intel_guc_log *log)
 {
-	guc_log_runtime_destroy(guc);
-	i915_vma_unpin_and_release(&guc->log.vma);
+	i915_vma_unpin_and_release(&log->vma);
 }
 
-int intel_guc_log_control(struct intel_guc *guc, u64 control_val)
+int intel_guc_log_level_get(struct intel_guc_log *log)
 {
+	GEM_BUG_ON(!log->vma);
+	GEM_BUG_ON(i915_modparams.guc_log_level < 0);
+
+	return i915_modparams.guc_log_level;
+}
+
+int intel_guc_log_level_set(struct intel_guc_log *log, u64 val)
+{
+	struct intel_guc *guc = log_to_guc(log);
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	bool enable_logging = control_val > 0;
-	u32 verbosity;
 	int ret;
 
-	if (!guc->log.vma)
-		return -ENODEV;
+	BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0);
+	GEM_BUG_ON(!log->vma);
+	GEM_BUG_ON(i915_modparams.guc_log_level < 0);
 
-	BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN);
-	if (control_val > 1 + GUC_LOG_VERBOSITY_MAX)
+	/*
+	 * GuC is recognizing log levels starting from 0 to max, we're using 0
+	 * as indication that logging should be disabled.
+	 */
+	if (val < GUC_LOG_LEVEL_DISABLED || val > GUC_LOG_LEVEL_MAX)
 		return -EINVAL;
 
-	/* This combination doesn't make sense & won't have any effect */
-	if (!enable_logging && !i915_modparams.guc_log_level)
-		return 0;
+	mutex_lock(&dev_priv->drm.struct_mutex);
 
-	verbosity = enable_logging ? control_val - 1 : 0;
+	if (i915_modparams.guc_log_level == val) {
+		ret = 0;
+		goto out_unlock;
+	}
 
-	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
-	if (ret)
-		return ret;
 	intel_runtime_pm_get(dev_priv);
-	ret = guc_log_control(guc, enable_logging, verbosity);
+	ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(val),
+				     GUC_LOG_LEVEL_IS_ENABLED(val),
+				     GUC_LOG_LEVEL_TO_VERBOSITY(val));
 	intel_runtime_pm_put(dev_priv);
+	if (ret) {
+		DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret);
+		goto out_unlock;
+	}
+
+	i915_modparams.guc_log_level = val;
+
+out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	if (ret < 0) {
-		DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret);
-		return ret;
-	}
-
-	if (enable_logging) {
-		i915_modparams.guc_log_level = 1 + verbosity;
-
-		/*
-		 * If log was disabled at boot time, then the relay channel file
-		 * wouldn't have been created by now and interrupts also would
-		 * not have been enabled. Try again now, just in case.
-		 */
-		ret = guc_log_late_setup(guc);
-		if (ret < 0) {
-			DRM_DEBUG_DRIVER("GuC log late setup failed %d\n", ret);
-			return ret;
-		}
-
-		/* GuC logging is currently the only user of Guc2Host interrupts */
-		mutex_lock(&dev_priv->drm.struct_mutex);
-		intel_runtime_pm_get(dev_priv);
-		gen9_enable_guc_interrupts(dev_priv);
-		intel_runtime_pm_put(dev_priv);
-		mutex_unlock(&dev_priv->drm.struct_mutex);
-	} else {
-		/*
-		 * Once logging is disabled, GuC won't generate logs & send an
-		 * interrupt. But there could be some data in the log buffer
-		 * which is yet to be captured. So request GuC to update the log
-		 * buffer state and then collect the left over logs.
-		 */
-		guc_flush_logs(guc);
-
-		/* As logging is disabled, update log level to reflect that */
-		i915_modparams.guc_log_level = 0;
-	}
-
 	return ret;
 }
 
-void i915_guc_log_register(struct drm_i915_private *dev_priv)
+bool intel_guc_log_relay_enabled(const struct intel_guc_log *log)
 {
-	if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level)
-		return;
-
-	guc_log_late_setup(&dev_priv->guc);
+	return log->relay.buf_addr;
 }
 
-void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
+int intel_guc_log_relay_open(struct intel_guc_log *log)
 {
-	struct intel_guc *guc = &dev_priv->guc;
+	int ret;
 
-	if (!USES_GUC_SUBMISSION(dev_priv))
-		return;
+	mutex_lock(&log->relay.lock);
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	/* GuC logging is currently the only user of Guc2Host interrupts */
-	intel_runtime_pm_get(dev_priv);
-	gen9_disable_guc_interrupts(dev_priv);
-	intel_runtime_pm_put(dev_priv);
+	if (intel_guc_log_relay_enabled(log)) {
+		ret = -EEXIST;
+		goto out_unlock;
+	}
 
-	guc_log_runtime_destroy(guc);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
+	/*
+	 * We require SSE 4.1 for fast reads from the GuC log buffer and
+	 * it should be present on the chipsets supporting GuC based
+	 * submisssions.
+	 */
+	if (!i915_has_memcpy_from_wc()) {
+		ret = -ENXIO;
+		goto out_unlock;
+	}
 
-	intel_guc_log_relay_destroy(guc);
+	ret = guc_log_relay_create(log);
+	if (ret)
+		goto out_unlock;
+
+	ret = guc_log_map(log);
+	if (ret)
+		goto out_relay;
+
+	mutex_unlock(&log->relay.lock);
+
+	guc_log_enable_flush_events(log);
+
+	/*
+	 * When GuC is logging without us relaying to userspace, we're ignoring
+	 * the flush notification. This means that we need to unconditionally
+	 * flush on relay enabling, since GuC only notifies us once.
+	 */
+	queue_work(log->relay.flush_wq, &log->relay.flush_work);
+
+	return 0;
+
+out_relay:
+	guc_log_relay_destroy(log);
+out_unlock:
+	mutex_unlock(&log->relay.lock);
+
+	return ret;
+}
+
+void intel_guc_log_relay_flush(struct intel_guc_log *log)
+{
+	struct intel_guc *guc = log_to_guc(log);
+	struct drm_i915_private *i915 = guc_to_i915(guc);
+
+	/*
+	 * Before initiating the forceful flush, wait for any pending/ongoing
+	 * flush to complete otherwise forceful flush may not actually happen.
+	 */
+	flush_work(&log->relay.flush_work);
+
+	intel_runtime_pm_get(i915);
+	guc_action_flush_log(guc);
+	intel_runtime_pm_put(i915);
+
+	/* GuC would have updated log buffer by now, so capture it */
+	guc_log_capture_logs(log);
+}
+
+void intel_guc_log_relay_close(struct intel_guc_log *log)
+{
+	guc_log_disable_flush_events(log);
+	flush_work(&log->relay.flush_work);
+
+	mutex_lock(&log->relay.lock);
+	GEM_BUG_ON(!intel_guc_log_relay_enabled(log));
+	guc_log_unmap(log);
+	guc_log_relay_destroy(log);
+	mutex_unlock(&log->relay.lock);
+}
+
+void intel_guc_log_handle_flush_event(struct intel_guc_log *log)
+{
+	queue_work(log->relay.flush_wq, &log->relay.flush_work);
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h
index dab0e94..fa80535 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/intel_guc_log.h
@@ -25,11 +25,12 @@
 #ifndef _INTEL_GUC_LOG_H_
 #define _INTEL_GUC_LOG_H_
 
+#include <linux/mutex.h>
+#include <linux/relay.h>
 #include <linux/workqueue.h>
 
 #include "intel_guc_fwif.h"
 
-struct drm_i915_private;
 struct intel_guc;
 
 /*
@@ -39,33 +40,53 @@
 #define GUC_LOG_SIZE	((1 + GUC_LOG_DPC_PAGES + 1 + GUC_LOG_ISR_PAGES + \
 			  1 + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT)
 
+/*
+ * While we're using plain log level in i915, GuC controls are much more...
+ * "elaborate"? We have a couple of bits for verbosity, separate bit for actual
+ * log enabling, and separate bit for default logging - which "conveniently"
+ * ignores the enable bit.
+ */
+#define GUC_LOG_LEVEL_DISABLED		0
+#define GUC_LOG_LEVEL_NON_VERBOSE	1
+#define GUC_LOG_LEVEL_IS_ENABLED(x)	((x) > GUC_LOG_LEVEL_DISABLED)
+#define GUC_LOG_LEVEL_IS_VERBOSE(x)	((x) > GUC_LOG_LEVEL_NON_VERBOSE)
+#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({		\
+	typeof(x) _x = (x);				\
+	GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0;	\
+})
+#define GUC_VERBOSITY_TO_LOG_LEVEL(x)	((x) + 2)
+#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX)
+
 struct intel_guc_log {
 	u32 flags;
 	struct i915_vma *vma;
-	/* The runtime stuff gets created only when GuC logging gets enabled */
 	struct {
 		void *buf_addr;
 		struct workqueue_struct *flush_wq;
 		struct work_struct flush_work;
-		struct rchan *relay_chan;
-		/* To serialize the access to relay_chan */
-		struct mutex relay_lock;
-	} runtime;
+		struct rchan *channel;
+		struct mutex lock;
+		u32 full_count;
+	} relay;
 	/* logging related stats */
-	u32 capture_miss_count;
-	u32 flush_interrupt_count;
-	u32 prev_overflow_count[GUC_MAX_LOG_BUFFER];
-	u32 total_overflow_count[GUC_MAX_LOG_BUFFER];
-	u32 flush_count[GUC_MAX_LOG_BUFFER];
+	struct {
+		u32 sampled_overflow;
+		u32 overflow;
+		u32 flush;
+	} stats[GUC_MAX_LOG_BUFFER];
 };
 
-int intel_guc_log_create(struct intel_guc *guc);
-void intel_guc_log_destroy(struct intel_guc *guc);
-void intel_guc_log_init_early(struct intel_guc *guc);
-int intel_guc_log_relay_create(struct intel_guc *guc);
-void intel_guc_log_relay_destroy(struct intel_guc *guc);
-int intel_guc_log_control(struct intel_guc *guc, u64 control_val);
-void i915_guc_log_register(struct drm_i915_private *dev_priv);
-void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
+void intel_guc_log_init_early(struct intel_guc_log *log);
+int intel_guc_log_create(struct intel_guc_log *log);
+void intel_guc_log_destroy(struct intel_guc_log *log);
+
+int intel_guc_log_level_get(struct intel_guc_log *log);
+int intel_guc_log_level_set(struct intel_guc_log *log, u64 control_val);
+bool intel_guc_log_relay_enabled(const struct intel_guc_log *log);
+int intel_guc_log_relay_open(struct intel_guc_log *log);
+void intel_guc_log_relay_flush(struct intel_guc_log *log);
+void intel_guc_log_relay_close(struct intel_guc_log *log);
+
+void intel_guc_log_handle_flush_event(struct intel_guc_log *log);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_guc_reg.h b/drivers/gpu/drm/i915/intel_guc_reg.h
index 19a9247..d860847 100644
--- a/drivers/gpu/drm/i915/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/intel_guc_reg.h
@@ -66,22 +66,20 @@
 #define   UOS_MOVE			  (1<<4)
 #define   START_DMA			  (1<<0)
 #define DMA_GUC_WOPCM_OFFSET		_MMIO(0xc340)
+#define   GUC_WOPCM_OFFSET_VALID	  (1<<0)
 #define   HUC_LOADING_AGENT_VCR		  (0<<1)
 #define   HUC_LOADING_AGENT_GUC		  (1<<1)
-#define   GUC_WOPCM_OFFSET_VALUE	  0x80000	/* 512KB */
+#define   GUC_WOPCM_OFFSET_SHIFT	14
+#define   GUC_WOPCM_OFFSET_MASK		  (0x3ffff << GUC_WOPCM_OFFSET_SHIFT)
 #define GUC_MAX_IDLE_COUNT		_MMIO(0xC3E4)
 
 #define HUC_STATUS2             _MMIO(0xD3B0)
 #define   HUC_FW_VERIFIED       (1<<7)
 
-/* Defines WOPCM space available to GuC firmware */
 #define GUC_WOPCM_SIZE			_MMIO(0xc050)
-/* GuC addresses below GUC_WOPCM_TOP don't map through the GTT */
-#define   GUC_WOPCM_TOP			  (0x80 << 12)	/* 512KB */
-#define   BXT_GUC_WOPCM_RC6_RESERVED	  (0x10 << 12)	/* 64KB  */
-
-/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
-#define GUC_GGTT_TOP			0xFEE00000
+#define   GUC_WOPCM_SIZE_LOCKED		  (1<<0)
+#define   GUC_WOPCM_SIZE_SHIFT		12
+#define   GUC_WOPCM_SIZE_MASK		  (0xfffff << GUC_WOPCM_SIZE_SHIFT)
 
 #define GEN8_GT_PM_CONFIG		_MMIO(0x138140)
 #define GEN9LP_GT_PM_CONFIG		_MMIO(0x138140)
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 8a8ad2f..2feb650 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -124,9 +124,17 @@
 	return 0;
 }
 
+static bool has_doorbell(struct intel_guc_client *client)
+{
+	if (client->doorbell_id == GUC_DOORBELL_INVALID)
+		return false;
+
+	return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
+}
+
 static void unreserve_doorbell(struct intel_guc_client *client)
 {
-	GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID);
+	GEM_BUG_ON(!has_doorbell(client));
 
 	__clear_bit(client->doorbell_id, client->guc->doorbell_bitmap);
 	client->doorbell_id = GUC_DOORBELL_INVALID;
@@ -184,14 +192,6 @@
 	return client->vaddr + client->doorbell_offset;
 }
 
-static bool has_doorbell(struct intel_guc_client *client)
-{
-	if (client->doorbell_id == GUC_DOORBELL_INVALID)
-		return false;
-
-	return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
-}
-
 static void __create_doorbell(struct intel_guc_client *client)
 {
 	struct guc_doorbell_info *doorbell;
@@ -207,7 +207,6 @@
 	struct guc_doorbell_info *doorbell;
 	u16 db_id = client->doorbell_id;
 
-
 	doorbell = __get_doorbell(client);
 	doorbell->db_status = GUC_DOORBELL_DISABLED;
 	doorbell->cookie = 0;
@@ -224,6 +223,9 @@
 {
 	int ret;
 
+	if (WARN_ON(!has_doorbell(client)))
+		return -ENODEV; /* internal setup error, should never happen */
+
 	__update_doorbell_desc(client, client->doorbell_id);
 	__create_doorbell(client);
 
@@ -231,8 +233,8 @@
 	if (ret) {
 		__destroy_doorbell(client);
 		__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
-		DRM_ERROR("Couldn't create client %u doorbell: %d\n",
-			  client->stage_id, ret);
+		DRM_DEBUG_DRIVER("Couldn't create client %u doorbell: %d\n",
+				 client->stage_id, ret);
 		return ret;
 	}
 
@@ -362,7 +364,7 @@
 	desc->db_id = client->doorbell_id;
 
 	for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
-		struct intel_context *ce = &ctx->engine[engine->id];
+		struct intel_context *ce = to_intel_context(ctx, engine);
 		u32 guc_engine_id = engine->guc_id;
 		struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
 
@@ -386,8 +388,8 @@
 		lrc->context_desc = lower_32_bits(ce->lrc_desc);
 
 		/* The state page is after PPHWSP */
-		lrc->ring_lrca =
-			guc_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
+		lrc->ring_lrca = intel_guc_ggtt_offset(guc, ce->state) +
+				 LRC_STATE_PN * PAGE_SIZE;
 
 		/* XXX: In direct submission, the GuC wants the HW context id
 		 * here. In proxy submission, it wants the stage id
@@ -395,7 +397,7 @@
 		lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) |
 				(guc_engine_id << GUC_ELC_ENGINE_OFFSET);
 
-		lrc->ring_begin = guc_ggtt_offset(ce->ring->vma);
+		lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma);
 		lrc->ring_end = lrc->ring_begin + ce->ring->size - 1;
 		lrc->ring_next_free_location = lrc->ring_begin;
 		lrc->ring_current_tail_pointer_value = 0;
@@ -411,7 +413,7 @@
 	 * The doorbell, process descriptor, and workqueue are all parts
 	 * of the client object, which the GuC will reference via the GGTT
 	 */
-	gfx_addr = guc_ggtt_offset(client->vma);
+	gfx_addr = intel_guc_ggtt_offset(guc, client->vma);
 	desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
 				client->doorbell_offset;
 	desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client));
@@ -584,7 +586,7 @@
 	data[3] = engine->guc_id;
 	data[4] = guc->execbuf_client->priority;
 	data[5] = guc->execbuf_client->stage_id;
-	data[6] = guc_ggtt_offset(guc->shared_data);
+	data[6] = intel_guc_ggtt_offset(guc, guc->shared_data);
 
 	if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) {
 		execlists_clear_active(&engine->execlists,
@@ -657,7 +659,17 @@
 	port_set(port, i915_request_get(rq));
 }
 
-static void guc_dequeue(struct intel_engine_cs *engine)
+static inline int rq_prio(const struct i915_request *rq)
+{
+	return rq->sched.attr.priority;
+}
+
+static inline int port_prio(const struct execlist_port *port)
+{
+	return rq_prio(port_request(port));
+}
+
+static bool __guc_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -667,28 +679,29 @@
 	bool submit = false;
 	struct rb_node *rb;
 
-	spin_lock_irq(&engine->timeline->lock);
+	lockdep_assert_held(&engine->timeline.lock);
+
 	rb = execlists->first;
 	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
 
 	if (port_isset(port)) {
-		if (engine->i915->preempt_context) {
+		if (intel_engine_has_preemption(engine)) {
 			struct guc_preempt_work *preempt_work =
 				&engine->i915->guc.preempt_work[engine->id];
+			int prio = execlists->queue_priority;
 
-			if (execlists->queue_priority >
-			    max(port_request(port)->priotree.priority, 0)) {
+			if (__execlists_need_preempt(prio, port_prio(port))) {
 				execlists_set_active(execlists,
 						     EXECLISTS_ACTIVE_PREEMPT);
 				queue_work(engine->i915->guc.preempt_wq,
 					   &preempt_work->work);
-				goto unlock;
+				return false;
 			}
 		}
 
 		port++;
 		if (port_isset(port))
-			goto unlock;
+			return false;
 	}
 	GEM_BUG_ON(port_isset(port));
 
@@ -696,11 +709,11 @@
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
 
-		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
 			if (last && rq->ctx != last->ctx) {
 				if (port == last_port) {
 					__list_del_many(&p->requests,
-							&rq->priotree.link);
+							&rq->sched.link);
 					goto done;
 				}
 
@@ -709,7 +722,7 @@
 				port++;
 			}
 
-			INIT_LIST_HEAD(&rq->priotree.link);
+			INIT_LIST_HEAD(&rq->sched.link);
 
 			__i915_request_submit(rq);
 			trace_i915_request_in(rq, port_index(port, execlists));
@@ -726,19 +739,34 @@
 done:
 	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
 	execlists->first = rb;
-	if (submit) {
+	if (submit)
 		port_assign(port, last);
-		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
-		guc_submit(engine);
-	}
+	if (last)
+		execlists_user_begin(execlists, execlists->port);
 
 	/* We must always keep the beast fed if we have work piled up */
 	GEM_BUG_ON(port_isset(execlists->port) &&
 		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
 	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
 
-unlock:
-	spin_unlock_irq(&engine->timeline->lock);
+	return submit;
+}
+
+static void guc_dequeue(struct intel_engine_cs *engine)
+{
+	unsigned long flags;
+	bool submit;
+
+	local_irq_save(flags);
+
+	spin_lock(&engine->timeline.lock);
+	submit = __guc_dequeue(engine);
+	spin_unlock(&engine->timeline.lock);
+
+	if (submit)
+		guc_submit(engine);
+
+	local_irq_restore(flags);
 }
 
 static void guc_submission_tasklet(unsigned long data)
@@ -748,17 +776,20 @@
 	struct execlist_port *port = execlists->port;
 	struct i915_request *rq;
 
-	rq = port_request(&port[0]);
+	rq = port_request(port);
 	while (rq && i915_request_completed(rq)) {
 		trace_i915_request_out(rq);
 		i915_request_put(rq);
 
-		execlists_port_complete(execlists, port);
-
-		rq = port_request(&port[0]);
+		port = execlists_port_complete(execlists, port);
+		if (port_isset(port)) {
+			execlists_user_begin(execlists, port);
+			rq = port_request(port);
+		} else {
+			execlists_user_end(execlists);
+			rq = NULL;
+		}
 	}
-	if (!rq)
-		execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
 
 	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
 	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
@@ -977,7 +1008,8 @@
 	enum intel_engine_id id;
 
 	for_each_engine(engine, dev_priv, id) {
-		struct intel_context *ce = &client->owner->engine[id];
+		struct intel_context *ce =
+			to_intel_context(client->owner, engine);
 		u32 addr = intel_hws_preempt_done_address(engine);
 		u32 *cs;
 
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index c8ea510..d47e346 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -246,9 +246,8 @@
 	 */
 	tmp = I915_READ_CTL(engine);
 	if (tmp & RING_WAIT) {
-		i915_handle_error(dev_priv, BIT(engine->id),
-				  "Kicking stuck wait on %s",
-				  engine->name);
+		i915_handle_error(dev_priv, BIT(engine->id), 0,
+				  "stuck wait on %s", engine->name);
 		I915_WRITE_CTL(engine, tmp);
 		return ENGINE_WAIT_KICK;
 	}
@@ -258,8 +257,8 @@
 		default:
 			return ENGINE_DEAD;
 		case 1:
-			i915_handle_error(dev_priv, ALL_ENGINES,
-					  "Kicking stuck semaphore on %s",
+			i915_handle_error(dev_priv, ALL_ENGINES, 0,
+					  "stuck semaphore on %s",
 					  engine->name);
 			I915_WRITE_CTL(engine, tmp);
 			return ENGINE_WAIT_KICK;
@@ -357,7 +356,7 @@
 		break;
 
 	case ENGINE_DEAD:
-		if (drm_debug & DRM_UT_DRIVER) {
+		if (GEM_SHOW_DEBUG()) {
 			struct drm_printer p = drm_debug_printer("hangcheck");
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 		}
@@ -386,13 +385,13 @@
 	if (stuck != hung)
 		hung &= ~stuck;
 	len = scnprintf(msg, sizeof(msg),
-			"%s on ", stuck == hung ? "No progress" : "Hang");
+			"%s on ", stuck == hung ? "no progress" : "hang");
 	for_each_engine_masked(engine, i915, hung, tmp)
 		len += scnprintf(msg + len, sizeof(msg) - len,
 				 "%s, ", engine->name);
 	msg[len-2] = '\0';
 
-	return i915_handle_error(i915, hung, "%s", msg);
+	return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg);
 }
 
 /*
@@ -453,6 +452,7 @@
 void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
 {
 	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
+	engine->hangcheck.action_timestamp = jiffies;
 }
 
 void intel_hangcheck_init(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c
index 14ca5d3..2db5da5 100644
--- a/drivers/gpu/drm/i915/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/intel_hdcp.c
@@ -37,6 +37,43 @@
 	return 0;
 }
 
+static bool hdcp_key_loadable(struct drm_i915_private *dev_priv)
+{
+	struct i915_power_domains *power_domains = &dev_priv->power_domains;
+	struct i915_power_well *power_well;
+	enum i915_power_well_id id;
+	bool enabled = false;
+
+	/*
+	 * On HSW and BDW, Display HW loads the Key as soon as Display resumes.
+	 * On all BXT+, SW can load the keys only when the PW#1 is turned on.
+	 */
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+		id = HSW_DISP_PW_GLOBAL;
+	else
+		id = SKL_DISP_PW_1;
+
+	mutex_lock(&power_domains->lock);
+
+	/* PG1 (power well #1) needs to be enabled */
+	for_each_power_well(dev_priv, power_well) {
+		if (power_well->id == id) {
+			enabled = power_well->ops->is_enabled(dev_priv,
+							      power_well);
+			break;
+		}
+	}
+	mutex_unlock(&power_domains->lock);
+
+	/*
+	 * Another req for hdcp key loadability is enabled state of pll for
+	 * cdclk. Without active crtc we wont land here. So we are assuming that
+	 * cdclk is already on.
+	 */
+
+	return enabled;
+}
+
 static void intel_hdcp_clear_keys(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(HDCP_KEY_CONF, HDCP_CLEAR_KEYS_TRIGGER);
@@ -142,53 +179,17 @@
 	return true;
 }
 
-/* Implements Part 2 of the HDCP authorization procedure */
 static
-int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port,
-			       const struct intel_hdcp_shim *shim)
+int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port,
+				const struct intel_hdcp_shim *shim,
+				u8 *ksv_fifo, u8 num_downstream, u8 *bstatus)
 {
 	struct drm_i915_private *dev_priv;
 	u32 vprime, sha_text, sha_leftovers, rep_ctl;
-	u8 bstatus[2], num_downstream, *ksv_fifo;
 	int ret, i, j, sha_idx;
 
 	dev_priv = intel_dig_port->base.base.dev->dev_private;
 
-	ret = intel_hdcp_poll_ksv_fifo(intel_dig_port, shim);
-	if (ret) {
-		DRM_ERROR("KSV list failed to become ready (%d)\n", ret);
-		return ret;
-	}
-
-	ret = shim->read_bstatus(intel_dig_port, bstatus);
-	if (ret)
-		return ret;
-
-	if (DRM_HDCP_MAX_DEVICE_EXCEEDED(bstatus[0]) ||
-	    DRM_HDCP_MAX_CASCADE_EXCEEDED(bstatus[1])) {
-		DRM_ERROR("Max Topology Limit Exceeded\n");
-		return -EPERM;
-	}
-
-	/*
-	 * When repeater reports 0 device count, HDCP1.4 spec allows disabling
-	 * the HDCP encryption. That implies that repeater can't have its own
-	 * display. As there is no consumption of encrypted content in the
-	 * repeater with 0 downstream devices, we are failing the
-	 * authentication.
-	 */
-	num_downstream = DRM_HDCP_NUM_DOWNSTREAM(bstatus[0]);
-	if (num_downstream == 0)
-		return -EINVAL;
-
-	ksv_fifo = kzalloc(num_downstream * DRM_HDCP_KSV_LEN, GFP_KERNEL);
-	if (!ksv_fifo)
-		return -ENOMEM;
-
-	ret = shim->read_ksv_fifo(intel_dig_port, num_downstream, ksv_fifo);
-	if (ret)
-		return ret;
-
 	/* Process V' values from the receiver */
 	for (i = 0; i < DRM_HDCP_V_PRIME_NUM_PARTS; i++) {
 		ret = shim->read_v_prime_part(intel_dig_port, i, &vprime);
@@ -353,7 +354,8 @@
 			return ret;
 		sha_idx += sizeof(sha_text);
 	} else {
-		DRM_ERROR("Invalid number of leftovers %d\n", sha_leftovers);
+		DRM_DEBUG_KMS("Invalid number of leftovers %d\n",
+			      sha_leftovers);
 		return -EINVAL;
 	}
 
@@ -381,17 +383,83 @@
 	if (intel_wait_for_register(dev_priv, HDCP_REP_CTL,
 				    HDCP_SHA1_COMPLETE,
 				    HDCP_SHA1_COMPLETE, 1)) {
-		DRM_ERROR("Timed out waiting for SHA1 complete\n");
+		DRM_DEBUG_KMS("Timed out waiting for SHA1 complete\n");
 		return -ETIMEDOUT;
 	}
 	if (!(I915_READ(HDCP_REP_CTL) & HDCP_SHA1_V_MATCH)) {
-		DRM_ERROR("SHA-1 mismatch, HDCP failed\n");
+		DRM_DEBUG_KMS("SHA-1 mismatch, HDCP failed\n");
 		return -ENXIO;
 	}
 
+	return 0;
+}
+
+/* Implements Part 2 of the HDCP authorization procedure */
+static
+int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port,
+			       const struct intel_hdcp_shim *shim)
+{
+	u8 bstatus[2], num_downstream, *ksv_fifo;
+	int ret, i, tries = 3;
+
+	ret = intel_hdcp_poll_ksv_fifo(intel_dig_port, shim);
+	if (ret) {
+		DRM_ERROR("KSV list failed to become ready (%d)\n", ret);
+		return ret;
+	}
+
+	ret = shim->read_bstatus(intel_dig_port, bstatus);
+	if (ret)
+		return ret;
+
+	if (DRM_HDCP_MAX_DEVICE_EXCEEDED(bstatus[0]) ||
+	    DRM_HDCP_MAX_CASCADE_EXCEEDED(bstatus[1])) {
+		DRM_ERROR("Max Topology Limit Exceeded\n");
+		return -EPERM;
+	}
+
+	/*
+	 * When repeater reports 0 device count, HDCP1.4 spec allows disabling
+	 * the HDCP encryption. That implies that repeater can't have its own
+	 * display. As there is no consumption of encrypted content in the
+	 * repeater with 0 downstream devices, we are failing the
+	 * authentication.
+	 */
+	num_downstream = DRM_HDCP_NUM_DOWNSTREAM(bstatus[0]);
+	if (num_downstream == 0)
+		return -EINVAL;
+
+	ksv_fifo = kzalloc(num_downstream * DRM_HDCP_KSV_LEN, GFP_KERNEL);
+	if (!ksv_fifo)
+		return -ENOMEM;
+
+	ret = shim->read_ksv_fifo(intel_dig_port, num_downstream, ksv_fifo);
+	if (ret)
+		goto err;
+
+	/*
+	 * When V prime mismatches, DP Spec mandates re-read of
+	 * V prime atleast twice.
+	 */
+	for (i = 0; i < tries; i++) {
+		ret = intel_hdcp_validate_v_prime(intel_dig_port, shim,
+						  ksv_fifo, num_downstream,
+						  bstatus);
+		if (!ret)
+			break;
+	}
+
+	if (i == tries) {
+		DRM_ERROR("V Prime validation failed.(%d)\n", ret);
+		goto err;
+	}
+
 	DRM_DEBUG_KMS("HDCP is enabled (%d downstream devices)\n",
 		      num_downstream);
-	return 0;
+	ret = 0;
+err:
+	kfree(ksv_fifo);
+	return ret;
 }
 
 /* Implements Part 1 of the HDCP authorization procedure */
@@ -506,15 +574,26 @@
 	 */
 	wait_remaining_ms_from_jiffies(r0_prime_gen_start, 300);
 
-	ri.reg = 0;
-	ret = shim->read_ri_prime(intel_dig_port, ri.shim);
-	if (ret)
-		return ret;
-	I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg);
+	tries = 3;
 
-	/* Wait for Ri prime match */
-	if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) &
-		     (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) {
+	/*
+	 * DP HDCP Spec mandates the two more reattempt to read R0, incase
+	 * of R0 mismatch.
+	 */
+	for (i = 0; i < tries; i++) {
+		ri.reg = 0;
+		ret = shim->read_ri_prime(intel_dig_port, ri.shim);
+		if (ret)
+			return ret;
+		I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg);
+
+		/* Wait for Ri prime match */
+		if (!wait_for(I915_READ(PORT_HDCP_STATUS(port)) &
+		    (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1))
+			break;
+	}
+
+	if (i == tries) {
 		DRM_ERROR("Timed out waiting for Ri prime match (%x)\n",
 			  I915_READ(PORT_HDCP_STATUS(port)));
 		return -ETIMEDOUT;
@@ -580,8 +659,8 @@
 	DRM_DEBUG_KMS("[%s:%d] HDCP is being enabled...\n",
 		      connector->base.name, connector->base.base.id);
 
-	if (!(I915_READ(SKL_FUSE_STATUS) & SKL_FUSE_PG_DIST_STATUS(1))) {
-		DRM_ERROR("PG1 is disabled, cannot load keys\n");
+	if (!hdcp_key_loadable(dev_priv)) {
+		DRM_ERROR("HDCP key Load is not possible\n");
 		return -ENXIO;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 1baef4a..ee929f3 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -2082,41 +2082,33 @@
  * it enables scrambling. This should be called before enabling the HDMI
  * 2.0 port, as the sink can choose to disable the scrambling if it doesn't
  * detect a scrambled clock within 100 ms.
+ *
+ * Returns:
+ * True on success, false on failure.
  */
-void intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
+bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
 				       struct drm_connector *connector,
 				       bool high_tmds_clock_ratio,
 				       bool scrambling)
 {
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
-	struct drm_i915_private *dev_priv = connector->dev->dev_private;
 	struct drm_scrambling *sink_scrambling =
-				&connector->display_info.hdmi.scdc.scrambling;
-	struct i2c_adapter *adptr = intel_gmbus_get_adapter(dev_priv,
-							   intel_hdmi->ddc_bus);
-	bool ret;
+		&connector->display_info.hdmi.scdc.scrambling;
+	struct i2c_adapter *adapter =
+		intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus);
 
 	if (!sink_scrambling->supported)
-		return;
+		return true;
 
-	DRM_DEBUG_KMS("Setting sink scrambling for enc:%s connector:%s\n",
-		      encoder->base.name, connector->name);
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] scrambling=%s, TMDS bit clock ratio=1/%d\n",
+		      connector->base.id, connector->name,
+		      yesno(scrambling), high_tmds_clock_ratio ? 40 : 10);
 
-	/* Set TMDS bit clock ratio to 1/40 or 1/10 */
-	ret = drm_scdc_set_high_tmds_clock_ratio(adptr, high_tmds_clock_ratio);
-	if (!ret) {
-		DRM_ERROR("Set TMDS ratio failed\n");
-		return;
-	}
-
-	/* Enable/disable sink scrambling */
-	ret = drm_scdc_set_scrambling(adptr, scrambling);
-	if (!ret) {
-		DRM_ERROR("Set sink scrambling failed\n");
-		return;
-	}
-
-	DRM_DEBUG_KMS("sink scrambling handled\n");
+	/* Set TMDS bit clock ratio to 1/40 or 1/10, and enable/disable scrambling */
+	return drm_scdc_set_high_tmds_clock_ratio(adapter,
+						  high_tmds_clock_ratio) &&
+		drm_scdc_set_scrambling(adapter, scrambling);
 }
 
 static u8 chv_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port)
diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
index 0e3d3e8..43aa92b 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -100,6 +100,8 @@
 		if (IS_CNL_WITH_PORT_F(dev_priv))
 			return PORT_F;
 		return PORT_E;
+	case HPD_PORT_F:
+		return PORT_F;
 	default:
 		return PORT_NONE; /* no port for this pin */
 	}
@@ -132,6 +134,7 @@
 	case PORT_F:
 		if (IS_CNL_WITH_PORT_F(dev_priv))
 			return HPD_PORT_E;
+		return HPD_PORT_F;
 	default:
 		MISSING_CASE(port);
 		return HPD_NONE;
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index 65e2afb..2912852 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -55,7 +55,7 @@
 		return -ENOEXEC;
 
 	vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0,
-				PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
+				       PIN_OFFSET_BIAS | guc->ggtt_pin_bias);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret);
@@ -63,7 +63,8 @@
 	}
 
 	ret = intel_guc_auth_huc(guc,
-				 guc_ggtt_offset(vma) + huc->fw.rsa_offset);
+				 intel_guc_ggtt_offset(guc, vma) +
+				 huc->fw.rsa_offset);
 	if (ret) {
 		DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret);
 		goto fail_unpin;
@@ -91,3 +92,28 @@
 	DRM_ERROR("HuC: Authentication failed %d\n", ret);
 	return ret;
 }
+
+/**
+ * intel_huc_check_status() - check HuC status
+ * @huc: intel_huc structure
+ *
+ * This function reads status register to verify if HuC
+ * firmware was successfully loaded.
+ *
+ * Returns positive value if HuC firmware is loaded and verified
+ * and -ENODEV if HuC is not present.
+ */
+int intel_huc_check_status(struct intel_huc *huc)
+{
+	struct drm_i915_private *dev_priv = huc_to_i915(huc);
+	u32 status;
+
+	if (!HAS_HUC(dev_priv))
+		return -ENODEV;
+
+	intel_runtime_pm_get(dev_priv);
+	status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED;
+	intel_runtime_pm_put(dev_priv);
+
+	return status;
+}
diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h
index 5d6e804..aa85490 100644
--- a/drivers/gpu/drm/i915/intel_huc.h
+++ b/drivers/gpu/drm/i915/intel_huc.h
@@ -37,5 +37,12 @@
 
 void intel_huc_init_early(struct intel_huc *huc);
 int intel_huc_auth(struct intel_huc *huc);
+int intel_huc_check_status(struct intel_huc *huc);
+
+static inline int intel_huc_sanitize(struct intel_huc *huc)
+{
+	intel_uc_fw_sanitize(&huc->fw);
+	return 0;
+}
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c
index c66afa9..f93d238 100644
--- a/drivers/gpu/drm/i915/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/intel_huc_fw.c
@@ -118,7 +118,8 @@
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
 	/* Set the source address for the uCode */
-	offset = guc_ggtt_offset(vma) + huc_fw->header_offset;
+	offset = intel_guc_ggtt_offset(&dev_priv->guc, vma) +
+		 huc_fw->header_offset;
 	I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset));
 	I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF);
 
@@ -154,9 +155,8 @@
  * Called from intel_uc_init_hw() during driver load, resume from sleep and
  * after a GPU reset. Note that HuC must be loaded before GuC.
  *
- * The firmware image should have already been fetched into memory by the
- * earlier call to intel_uc_init_fw(), so here we need to only check that
- * fetch succeeded, and then transfer the image to the h/w.
+ * The firmware image should have already been fetched into memory, so only
+ * check that fetch succeeded, and then transfer the image to the h/w.
  *
  * Return:	non-zero code on error
  */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8704f7f..15434ca 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -139,6 +139,7 @@
 #include "i915_gem_render_state.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
+#include "intel_workarounds.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
 #define RING_EXECLIST1_VALID		(1 << 0x3)
@@ -176,14 +177,16 @@
 
 static inline int rq_prio(const struct i915_request *rq)
 {
-	return rq->priotree.priority;
+	return rq->sched.attr.priority;
 }
 
 static inline bool need_preempt(const struct intel_engine_cs *engine,
 				const struct i915_request *last,
 				int prio)
 {
-	return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+	return (intel_engine_has_preemption(engine) &&
+		__execlists_need_preempt(prio, rq_prio(last)) &&
+		!i915_request_completed(last));
 }
 
 /**
@@ -221,7 +224,7 @@
 intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
 				   struct intel_engine_cs *engine)
 {
-	struct intel_context *ce = &ctx->engine[engine->id];
+	struct intel_context *ce = to_intel_context(ctx, engine);
 	u64 desc;
 
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
@@ -255,9 +258,7 @@
 }
 
 static struct i915_priolist *
-lookup_priolist(struct intel_engine_cs *engine,
-		struct i915_priotree *pt,
-		int prio)
+lookup_priolist(struct intel_engine_cs *engine, int prio)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct i915_priolist *p;
@@ -328,10 +329,10 @@
 	struct i915_priolist *uninitialized_var(p);
 	int last_prio = I915_PRIORITY_INVALID;
 
-	lockdep_assert_held(&engine->timeline->lock);
+	lockdep_assert_held(&engine->timeline.lock);
 
 	list_for_each_entry_safe_reverse(rq, rn,
-					 &engine->timeline->requests,
+					 &engine->timeline.requests,
 					 link) {
 		if (i915_request_completed(rq))
 			return;
@@ -342,10 +343,11 @@
 		GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
 		if (rq_prio(rq) != last_prio) {
 			last_prio = rq_prio(rq);
-			p = lookup_priolist(engine, &rq->priotree, last_prio);
+			p = lookup_priolist(engine, last_prio);
 		}
 
-		list_add(&rq->priotree.link, &p->requests);
+		GEM_BUG_ON(p->priority != rq_prio(rq));
+		list_add(&rq->sched.link, &p->requests);
 	}
 }
 
@@ -354,10 +356,13 @@
 {
 	struct intel_engine_cs *engine =
 		container_of(execlists, typeof(*engine), execlists);
+	unsigned long flags;
 
-	spin_lock_irq(&engine->timeline->lock);
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+
 	__unwind_incomplete_requests(engine);
-	spin_unlock_irq(&engine->timeline->lock);
+
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static inline void
@@ -374,6 +379,19 @@
 				   status, rq);
 }
 
+inline void
+execlists_user_begin(struct intel_engine_execlists *execlists,
+		     const struct execlist_port *port)
+{
+	execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
+}
+
+inline void
+execlists_user_end(struct intel_engine_execlists *execlists)
+{
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+}
+
 static inline void
 execlists_context_schedule_in(struct i915_request *rq)
 {
@@ -382,10 +400,11 @@
 }
 
 static inline void
-execlists_context_schedule_out(struct i915_request *rq)
+execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
 {
 	intel_engine_context_out(rq->engine);
-	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+	execlists_context_status_change(rq, status);
+	trace_i915_request_out(rq);
 }
 
 static void
@@ -399,7 +418,7 @@
 
 static u64 execlists_update_context(struct i915_request *rq)
 {
-	struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
+	struct intel_context *ce = to_intel_context(rq->ctx, rq->engine);
 	struct i915_hw_ppgtt *ppgtt =
 		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
@@ -454,10 +473,12 @@
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
 
-			GEM_TRACE("%s in[%d]:  ctx=%d.%d, seqno=%x, prio=%d\n",
+			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
 				  engine->name, n,
 				  port[n].context_id, count,
 				  rq->global_seqno,
+				  rq->fence.context, rq->fence.seqno,
+				  intel_engine_get_seqno(engine),
 				  rq_prio(rq));
 		} else {
 			GEM_BUG_ON(!n);
@@ -506,7 +527,7 @@
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
 	struct intel_context *ce =
-		&engine->i915->preempt_context->engine[engine->id];
+		to_intel_context(engine->i915->preempt_context, engine);
 	unsigned int n;
 
 	GEM_BUG_ON(execlists->preempt_complete_status !=
@@ -535,7 +556,7 @@
 	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
-static void execlists_dequeue(struct intel_engine_cs *engine)
+static bool __execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
 	struct execlist_port *port = execlists->port;
@@ -545,6 +566,8 @@
 	struct rb_node *rb;
 	bool submit = false;
 
+	lockdep_assert_held(&engine->timeline.lock);
+
 	/* Hardware submission is through 2 ports. Conceptually each port
 	 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
 	 * static for a context, and unique to each, so we only execute
@@ -566,7 +589,6 @@
 	 * and context switches) submission.
 	 */
 
-	spin_lock_irq(&engine->timeline->lock);
 	rb = execlists->first;
 	GEM_BUG_ON(rb_first(&execlists->queue) != rb);
 
@@ -581,7 +603,7 @@
 						EXECLISTS_ACTIVE_USER));
 		GEM_BUG_ON(!port_count(&port[0]));
 		if (port_count(&port[0]) > 1)
-			goto unlock;
+			return false;
 
 		/*
 		 * If we write to ELSP a second time before the HW has had
@@ -591,11 +613,11 @@
 		 * the HW to indicate that it has had a chance to respond.
 		 */
 		if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
-			goto unlock;
+			return false;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
 			inject_preempt_context(engine);
-			goto unlock;
+			return false;
 		}
 
 		/*
@@ -620,7 +642,7 @@
 		 * priorities of the ports haven't been switch.
 		 */
 		if (port_count(&port[1]))
-			goto unlock;
+			return false;
 
 		/*
 		 * WaIdleLiteRestore:bdw,skl
@@ -637,7 +659,7 @@
 		struct i915_priolist *p = to_priolist(rb);
 		struct i915_request *rq, *rn;
 
-		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
 			/*
 			 * Can we combine this request with the current port?
 			 * It has to be the same context/ringbuffer and not
@@ -657,7 +679,7 @@
 				 */
 				if (port == last_port) {
 					__list_del_many(&p->requests,
-							&rq->priotree.link);
+							&rq->sched.link);
 					goto done;
 				}
 
@@ -671,7 +693,7 @@
 				if (ctx_single_port_submission(last->ctx) ||
 				    ctx_single_port_submission(rq->ctx)) {
 					__list_del_many(&p->requests,
-							&rq->priotree.link);
+							&rq->sched.link);
 					goto done;
 				}
 
@@ -684,7 +706,7 @@
 				GEM_BUG_ON(port_isset(port));
 			}
 
-			INIT_LIST_HEAD(&rq->priotree.link);
+			INIT_LIST_HEAD(&rq->sched.link);
 			__i915_request_submit(rq);
 			trace_i915_request_in(rq, port_index(port, execlists));
 			last = rq;
@@ -697,8 +719,27 @@
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
+
 done:
-	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
+	/*
+	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
+	 *
+	 * We choose queue_priority such that if we add a request of greater
+	 * priority than this, we kick the submission tasklet to decide on
+	 * the right order of submitting the requests to hardware. We must
+	 * also be prepared to reorder requests as they are in-flight on the
+	 * HW. We derive the queue_priority then as the first "hole" in
+	 * the HW submission ports and if there are no available slots,
+	 * the priority of the lowest executing request, i.e. last.
+	 *
+	 * When we do receive a higher priority request ready to run from the
+	 * user, see queue_request(), the queue_priority is bumped to that
+	 * request triggering preemption on the next dequeue (or subsequent
+	 * interrupt for secondary ports).
+	 */
+	execlists->queue_priority =
+		port != execlists->port ? rq_prio(last) : INT_MIN;
+
 	execlists->first = rb;
 	if (submit)
 		port_assign(port, last);
@@ -706,13 +747,25 @@
 	/* We must always keep the beast fed if we have work piled up */
 	GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
 
-unlock:
-	spin_unlock_irq(&engine->timeline->lock);
+	/* Re-evaluate the executing context setup after each preemptive kick */
+	if (last)
+		execlists_user_begin(execlists, execlists->port);
 
-	if (submit) {
-		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
+	return submit;
+}
+
+static void execlists_dequeue(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists * const execlists = &engine->execlists;
+	unsigned long flags;
+	bool submit;
+
+	spin_lock_irqsave(&engine->timeline.lock, flags);
+	submit = __execlists_dequeue(engine);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+	if (submit)
 		execlists_submit_ports(engine);
-	}
 
 	GEM_BUG_ON(port_isset(execlists->port) &&
 		   !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
@@ -727,13 +780,18 @@
 	while (num_ports-- && port_isset(port)) {
 		struct i915_request *rq = port_request(port);
 
-		GEM_BUG_ON(!execlists->active);
-		intel_engine_context_out(rq->engine);
+		GEM_TRACE("%s:port%u global=%d (fence %llx:%d), (current %d)\n",
+			  rq->engine->name,
+			  (unsigned int)(port - execlists->port),
+			  rq->global_seqno,
+			  rq->fence.context, rq->fence.seqno,
+			  intel_engine_get_seqno(rq->engine));
 
-		execlists_context_status_change(rq,
-						i915_request_completed(rq) ?
-						INTEL_CONTEXT_SCHEDULE_OUT :
-						INTEL_CONTEXT_SCHEDULE_PREEMPTED);
+		GEM_BUG_ON(!execlists->active);
+		execlists_context_schedule_out(rq,
+					       i915_request_completed(rq) ?
+					       INTEL_CONTEXT_SCHEDULE_OUT :
+					       INTEL_CONTEXT_SCHEDULE_PREEMPTED);
 
 		i915_request_put(rq);
 
@@ -741,7 +799,82 @@
 		port++;
 	}
 
-	execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+	execlists_user_end(execlists);
+}
+
+static void clear_gtiir(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	int i;
+
+	/*
+	 * Clear any pending interrupt state.
+	 *
+	 * We do it twice out of paranoia that some of the IIR are
+	 * double buffered, and so if we only reset it once there may
+	 * still be an interrupt pending.
+	 */
+	if (INTEL_GEN(dev_priv) >= 11) {
+		static const struct {
+			u8 bank;
+			u8 bit;
+		} gen11_gtiir[] = {
+			[RCS] = {0, GEN11_RCS0},
+			[BCS] = {0, GEN11_BCS},
+			[_VCS(0)] = {1, GEN11_VCS(0)},
+			[_VCS(1)] = {1, GEN11_VCS(1)},
+			[_VCS(2)] = {1, GEN11_VCS(2)},
+			[_VCS(3)] = {1, GEN11_VCS(3)},
+			[_VECS(0)] = {1, GEN11_VECS(0)},
+			[_VECS(1)] = {1, GEN11_VECS(1)},
+		};
+		unsigned long irqflags;
+
+		GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir));
+
+		spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+		for (i = 0; i < 2; i++) {
+			gen11_reset_one_iir(dev_priv,
+					    gen11_gtiir[engine->id].bank,
+					    gen11_gtiir[engine->id].bit);
+		}
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+	} else {
+		static const u8 gtiir[] = {
+			[RCS]  = 0,
+			[BCS]  = 0,
+			[VCS]  = 1,
+			[VCS2] = 1,
+			[VECS] = 3,
+		};
+
+		GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+
+		for (i = 0; i < 2; i++) {
+			I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
+				   engine->irq_keep_mask);
+			POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
+		}
+		GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
+			   engine->irq_keep_mask);
+	}
+}
+
+static void reset_irq(struct intel_engine_cs *engine)
+{
+	/* Mark all CS interrupts as complete */
+	smp_store_mb(engine->execlists.active, 0);
+	synchronize_hardirq(engine->i915->drm.irq);
+
+	clear_gtiir(engine);
+
+	/*
+	 * The port is checked prior to scheduling a tasklet, but
+	 * just in case we have suspended the tasklet to do the
+	 * wedging make sure that when it wakes, it decides there
+	 * is no work to do by clearing the irq_posted bit.
+	 */
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -751,7 +884,8 @@
 	struct rb_node *rb;
 	unsigned long flags;
 
-	GEM_TRACE("%s\n", engine->name);
+	GEM_TRACE("%s current %d\n",
+		  engine->name, intel_engine_get_seqno(engine));
 
 	/*
 	 * Before we call engine->cancel_requests(), we should have exclusive
@@ -771,11 +905,12 @@
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
 	execlists_cancel_port_requests(execlists);
+	reset_irq(engine);
 
-	spin_lock(&engine->timeline->lock);
+	spin_lock(&engine->timeline.lock);
 
 	/* Mark all executing requests as skipped. */
-	list_for_each_entry(rq, &engine->timeline->requests, link) {
+	list_for_each_entry(rq, &engine->timeline.requests, link) {
 		GEM_BUG_ON(!rq->global_seqno);
 		if (!i915_request_completed(rq))
 			dma_fence_set_error(&rq->fence, -EIO);
@@ -786,8 +921,8 @@
 	while (rb) {
 		struct i915_priolist *p = to_priolist(rb);
 
-		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
-			INIT_LIST_HEAD(&rq->priotree.link);
+		list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+			INIT_LIST_HEAD(&rq->sched.link);
 
 			dma_fence_set_error(&rq->fence, -EIO);
 			__i915_request_submit(rq);
@@ -807,18 +942,7 @@
 	execlists->first = NULL;
 	GEM_BUG_ON(port_isset(execlists->port));
 
-	spin_unlock(&engine->timeline->lock);
-
-	/*
-	 * The port is checked prior to scheduling a tasklet, but
-	 * just in case we have suspended the tasklet to do the
-	 * wedging make sure that when it wakes, it decides there
-	 * is no work to do by clearing the irq_posted bit.
-	 */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-
-	/* Mark all CS interrupts as complete */
-	execlists->active = 0;
+	spin_unlock(&engine->timeline.lock);
 
 	local_irq_restore(flags);
 }
@@ -831,7 +955,7 @@
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port * const port = execlists->port;
+	struct execlist_port *port = execlists->port;
 	struct drm_i915_private *dev_priv = engine->i915;
 	bool fw = false;
 
@@ -959,10 +1083,13 @@
 							EXECLISTS_ACTIVE_USER));
 
 			rq = port_unpack(port, &count);
-			GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
+			GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
 				  engine->name,
 				  port->context_id, count,
 				  rq ? rq->global_seqno : 0,
+				  rq ? rq->fence.context : 0,
+				  rq ? rq->fence.seqno : 0,
+				  intel_engine_get_seqno(engine),
 				  rq ? rq_prio(rq) : 0);
 
 			/* Check the context/desc id for this event matches */
@@ -970,28 +1097,43 @@
 
 			GEM_BUG_ON(count == 0);
 			if (--count == 0) {
+				/*
+				 * On the final event corresponding to the
+				 * submission of this context, we expect either
+				 * an element-switch event or a completion
+				 * event (and on completion, the active-idle
+				 * marker). No more preemptions, lite-restore
+				 * or otherwise.
+				 */
 				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
 				GEM_BUG_ON(port_isset(&port[1]) &&
 					   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+				GEM_BUG_ON(!port_isset(&port[1]) &&
+					   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+
+				/*
+				 * We rely on the hardware being strongly
+				 * ordered, that the breadcrumb write is
+				 * coherent (visible from the CPU) before the
+				 * user interrupt and CSB is processed.
+				 */
 				GEM_BUG_ON(!i915_request_completed(rq));
-				execlists_context_schedule_out(rq);
-				trace_i915_request_out(rq);
+
+				execlists_context_schedule_out(rq,
+							       INTEL_CONTEXT_SCHEDULE_OUT);
 				i915_request_put(rq);
 
 				GEM_TRACE("%s completed ctx=%d\n",
 					  engine->name, port->context_id);
 
-				execlists_port_complete(execlists, port);
+				port = execlists_port_complete(execlists, port);
+				if (port_isset(port))
+					execlists_user_begin(execlists, port);
+				else
+					execlists_user_end(execlists);
 			} else {
 				port_set(port, port_pack(rq, count));
 			}
-
-			/* After the final element, the hw should be idle */
-			GEM_BUG_ON(port_count(port) == 0 &&
-				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
-			if (port_count(port) == 0)
-				execlists_clear_active(execlists,
-						       EXECLISTS_ACTIVE_USER);
 		}
 
 		if (head != execlists->csb_head) {
@@ -1014,18 +1156,23 @@
 }
 
 static void queue_request(struct intel_engine_cs *engine,
-			  struct i915_priotree *pt,
+			  struct i915_sched_node *node,
 			  int prio)
 {
-	list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
+	list_add_tail(&node->link,
+		      &lookup_priolist(engine, prio)->requests);
+}
+
+static void __submit_queue(struct intel_engine_cs *engine, int prio)
+{
+	engine->execlists.queue_priority = prio;
+	tasklet_hi_schedule(&engine->execlists.tasklet);
 }
 
 static void submit_queue(struct intel_engine_cs *engine, int prio)
 {
-	if (prio > engine->execlists.queue_priority) {
-		engine->execlists.queue_priority = prio;
-		tasklet_hi_schedule(&engine->execlists.tasklet);
-	}
+	if (prio > engine->execlists.queue_priority)
+		__submit_queue(engine, prio);
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1034,42 +1181,45 @@
 	unsigned long flags;
 
 	/* Will be called from irq-context when using foreign fences. */
-	spin_lock_irqsave(&engine->timeline->lock, flags);
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 
-	queue_request(engine, &request->priotree, rq_prio(request));
+	queue_request(engine, &request->sched, rq_prio(request));
 	submit_queue(engine, rq_prio(request));
 
 	GEM_BUG_ON(!engine->execlists.first);
-	GEM_BUG_ON(list_empty(&request->priotree.link));
+	GEM_BUG_ON(list_empty(&request->sched.link));
 
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
-static struct i915_request *pt_to_request(struct i915_priotree *pt)
+static struct i915_request *sched_to_request(struct i915_sched_node *node)
 {
-	return container_of(pt, struct i915_request, priotree);
+	return container_of(node, struct i915_request, sched);
 }
 
 static struct intel_engine_cs *
-pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
+sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
 {
-	struct intel_engine_cs *engine = pt_to_request(pt)->engine;
+	struct intel_engine_cs *engine = sched_to_request(node)->engine;
 
 	GEM_BUG_ON(!locked);
 
 	if (engine != locked) {
-		spin_unlock(&locked->timeline->lock);
-		spin_lock(&engine->timeline->lock);
+		spin_unlock(&locked->timeline.lock);
+		spin_lock(&engine->timeline.lock);
 	}
 
 	return engine;
 }
 
-static void execlists_schedule(struct i915_request *request, int prio)
+static void execlists_schedule(struct i915_request *request,
+			       const struct i915_sched_attr *attr)
 {
-	struct intel_engine_cs *engine;
+	struct i915_priolist *uninitialized_var(pl);
+	struct intel_engine_cs *engine, *last;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
+	const int prio = attr->priority;
 	LIST_HEAD(dfs);
 
 	GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
@@ -1077,23 +1227,23 @@
 	if (i915_request_completed(request))
 		return;
 
-	if (prio <= READ_ONCE(request->priotree.priority))
+	if (prio <= READ_ONCE(request->sched.attr.priority))
 		return;
 
 	/* Need BKL in order to use the temporary link inside i915_dependency */
 	lockdep_assert_held(&request->i915->drm.struct_mutex);
 
-	stack.signaler = &request->priotree;
+	stack.signaler = &request->sched;
 	list_add(&stack.dfs_link, &dfs);
 
 	/*
 	 * Recursively bump all dependent priorities to match the new request.
 	 *
 	 * A naive approach would be to use recursion:
-	 * static void update_priorities(struct i915_priotree *pt, prio) {
-	 *	list_for_each_entry(dep, &pt->signalers_list, signal_link)
+	 * static void update_priorities(struct i915_sched_node *node, prio) {
+	 *	list_for_each_entry(dep, &node->signalers_list, signal_link)
 	 *		update_priorities(dep->signal, prio)
-	 *	queue_request(pt);
+	 *	queue_request(node);
 	 * }
 	 * but that may have unlimited recursion depth and so runs a very
 	 * real risk of overunning the kernel stack. Instead, we build
@@ -1105,7 +1255,7 @@
 	 * last element in the list is the request we must execute first.
 	 */
 	list_for_each_entry(dep, &dfs, dfs_link) {
-		struct i915_priotree *pt = dep->signaler;
+		struct i915_sched_node *node = dep->signaler;
 
 		/*
 		 * Within an engine, there can be no cycle, but we may
@@ -1113,14 +1263,14 @@
 		 * (redundant dependencies are not eliminated) and across
 		 * engines.
 		 */
-		list_for_each_entry(p, &pt->signalers_list, signal_link) {
+		list_for_each_entry(p, &node->signalers_list, signal_link) {
 			GEM_BUG_ON(p == dep); /* no cycles! */
 
-			if (i915_priotree_signaled(p->signaler))
+			if (i915_sched_node_signaled(p->signaler))
 				continue;
 
-			GEM_BUG_ON(p->signaler->priority < pt->priority);
-			if (prio > READ_ONCE(p->signaler->priority))
+			GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority);
+			if (prio > READ_ONCE(p->signaler->attr.priority))
 				list_move_tail(&p->dfs_link, &dfs);
 		}
 	}
@@ -1131,37 +1281,45 @@
 	 * execlists_submit_request()), we can set our own priority and skip
 	 * acquiring the engine locks.
 	 */
-	if (request->priotree.priority == I915_PRIORITY_INVALID) {
-		GEM_BUG_ON(!list_empty(&request->priotree.link));
-		request->priotree.priority = prio;
+	if (request->sched.attr.priority == I915_PRIORITY_INVALID) {
+		GEM_BUG_ON(!list_empty(&request->sched.link));
+		request->sched.attr = *attr;
 		if (stack.dfs_link.next == stack.dfs_link.prev)
 			return;
 		__list_del_entry(&stack.dfs_link);
 	}
 
+	last = NULL;
 	engine = request->engine;
-	spin_lock_irq(&engine->timeline->lock);
+	spin_lock_irq(&engine->timeline.lock);
 
 	/* Fifo and depth-first replacement ensure our deps execute before us */
 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
-		struct i915_priotree *pt = dep->signaler;
+		struct i915_sched_node *node = dep->signaler;
 
 		INIT_LIST_HEAD(&dep->dfs_link);
 
-		engine = pt_lock_engine(pt, engine);
+		engine = sched_lock_engine(node, engine);
 
-		if (prio <= pt->priority)
+		if (prio <= node->attr.priority)
 			continue;
 
-		pt->priority = prio;
-		if (!list_empty(&pt->link)) {
-			__list_del_entry(&pt->link);
-			queue_request(engine, pt, prio);
+		node->attr.priority = prio;
+		if (!list_empty(&node->link)) {
+			if (last != engine) {
+				pl = lookup_priolist(engine, prio);
+				last = engine;
+			}
+			GEM_BUG_ON(pl->priority != prio);
+			list_move_tail(&node->link, &pl->requests);
 		}
-		submit_queue(engine, prio);
+
+		if (prio > engine->execlists.queue_priority &&
+		    i915_sw_fence_done(&sched_to_request(node)->submit))
+			__submit_queue(engine, prio);
 	}
 
-	spin_unlock_irq(&engine->timeline->lock);
+	spin_unlock_irq(&engine->timeline.lock);
 }
 
 static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
@@ -1191,7 +1349,7 @@
 execlists_context_pin(struct intel_engine_cs *engine,
 		      struct i915_gem_context *ctx)
 {
-	struct intel_context *ce = &ctx->engine[engine->id];
+	struct intel_context *ce = to_intel_context(ctx, engine);
 	void *vaddr;
 	int ret;
 
@@ -1225,6 +1383,7 @@
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
 		i915_ggtt_offset(ce->ring->vma);
+	ce->lrc_reg_state[CTX_RING_HEAD+1] = ce->ring->head;
 
 	ce->state->obj->pin_global++;
 	i915_gem_context_get(ctx);
@@ -1243,7 +1402,7 @@
 static void execlists_context_unpin(struct intel_engine_cs *engine,
 				    struct i915_gem_context *ctx)
 {
-	struct intel_context *ce = &ctx->engine[engine->id];
+	struct intel_context *ce = to_intel_context(ctx, engine);
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(ce->pin_count == 0);
@@ -1262,8 +1421,8 @@
 
 static int execlists_request_alloc(struct i915_request *request)
 {
-	struct intel_engine_cs *engine = request->engine;
-	struct intel_context *ce = &request->ctx->engine[engine->id];
+	struct intel_context *ce =
+		to_intel_context(request->ctx, request->engine);
 	int ret;
 
 	GEM_BUG_ON(!ce->pin_count);
@@ -1523,6 +1682,8 @@
 		return -EINVAL;
 
 	switch (INTEL_GEN(engine->i915)) {
+	case 11:
+		return 0;
 	case 10:
 		wa_bb_fn[0] = gen10_init_indirectctx_bb;
 		wa_bb_fn[1] = NULL;
@@ -1575,14 +1736,6 @@
 	return ret;
 }
 
-static u8 gtiir[] = {
-	[RCS] = 0,
-	[BCS] = 0,
-	[VCS] = 1,
-	[VCS2] = 1,
-	[VECS] = 3,
-};
-
 static void enable_execlists(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -1642,6 +1795,8 @@
 	if (ret)
 		return ret;
 
+	intel_whitelist_workarounds_apply(engine);
+
 	/* We need to disable the AsyncFlip performance optimisations in order
 	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
 	 * programmed to '1' on all products.
@@ -1652,7 +1807,7 @@
 
 	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
 
-	return init_workarounds_ring(engine);
+	return 0;
 }
 
 static int gen9_init_render_ring(struct intel_engine_cs *engine)
@@ -1663,49 +1818,25 @@
 	if (ret)
 		return ret;
 
-	return init_workarounds_ring(engine);
-}
+	intel_whitelist_workarounds_apply(engine);
 
-static void reset_irq(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int i;
-
-	GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
-
-	/*
-	 * Clear any pending interrupt state.
-	 *
-	 * We do it twice out of paranoia that some of the IIR are double
-	 * buffered, and if we only reset it once there may still be
-	 * an interrupt pending.
-	 */
-	for (i = 0; i < 2; i++) {
-		I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
-			   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
-		POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
-	}
-	GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
-		   (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift));
-
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+	return 0;
 }
 
 static void reset_common_ring(struct intel_engine_cs *engine,
 			      struct i915_request *request)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct intel_context *ce;
 	unsigned long flags;
+	u32 *regs;
 
-	GEM_TRACE("%s seqno=%x\n",
-		  engine->name, request ? request->global_seqno : 0);
+	GEM_TRACE("%s request global=%x, current=%d\n",
+		  engine->name, request ? request->global_seqno : 0,
+		  intel_engine_get_seqno(engine));
 
 	/* See execlists_cancel_requests() for the irq/spinlock split. */
 	local_irq_save(flags);
 
-	reset_irq(engine);
-
 	/*
 	 * Catch up with any missed context-switch interrupts.
 	 *
@@ -1716,14 +1847,12 @@
 	 * requests were completed.
 	 */
 	execlists_cancel_port_requests(execlists);
+	reset_irq(engine);
 
 	/* Push back any incomplete requests for replay after the reset. */
-	spin_lock(&engine->timeline->lock);
+	spin_lock(&engine->timeline.lock);
 	__unwind_incomplete_requests(engine);
-	spin_unlock(&engine->timeline->lock);
-
-	/* Mark all CS interrupts as complete */
-	execlists->active = 0;
+	spin_unlock(&engine->timeline.lock);
 
 	local_irq_restore(flags);
 
@@ -1749,14 +1878,24 @@
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
-	ce = &request->ctx->engine[engine->id];
-	execlists_init_reg_state(ce->lrc_reg_state,
-				 request->ctx, engine, ce->ring);
+	regs = to_intel_context(request->ctx, engine)->lrc_reg_state;
+	if (engine->default_state) {
+		void *defaults;
+
+		defaults = i915_gem_object_pin_map(engine->default_state,
+						   I915_MAP_WB);
+		if (!IS_ERR(defaults)) {
+			memcpy(regs, /* skip restoring the vanilla PPHWSP */
+			       defaults + LRC_STATE_PN * PAGE_SIZE,
+			       engine->context_size - PAGE_SIZE);
+			i915_gem_object_unpin_map(engine->default_state);
+		}
+	}
+	execlists_init_reg_state(regs, request->ctx, engine, request->ring);
 
 	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
-		i915_ggtt_offset(ce->ring->vma);
-	ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
+	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
+	regs[CTX_RING_HEAD + 1] = request->postfix;
 
 	request->ring->head = request->postfix;
 	intel_ring_update_space(request->ring);
@@ -1817,7 +1956,7 @@
 		rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
 	}
 
-	cs = intel_ring_begin(rq, 4);
+	cs = intel_ring_begin(rq, 6);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
@@ -1846,6 +1985,9 @@
 		(flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0);
 	*cs++ = lower_32_bits(offset);
 	*cs++ = upper_32_bits(offset);
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+	*cs++ = MI_NOOP;
 	intel_ring_advance(rq, cs);
 
 	return 0;
@@ -1988,7 +2130,7 @@
 	cs = gen8_emit_ggtt_write(cs, request->global_seqno,
 				  intel_hws_seqno_address(request->engine));
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
@@ -2004,7 +2146,7 @@
 	cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno,
 				      intel_hws_seqno_address(request->engine));
 	*cs++ = MI_USER_INTERRUPT;
-	*cs++ = MI_NOOP;
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 	request->tail = intel_ring_offset(request, cs);
 	assert_ring_tail_valid(request->ring, request->tail);
 
@@ -2016,7 +2158,7 @@
 {
 	int ret;
 
-	ret = intel_ring_workarounds_emit(rq);
+	ret = intel_ctx_workarounds_emit(rq);
 	if (ret)
 		return ret;
 
@@ -2076,11 +2218,13 @@
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+	if (engine->i915->preempt_context)
+		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	engine->i915->caps.scheduler =
 		I915_SCHEDULER_CAP_ENABLED |
 		I915_SCHEDULER_CAP_PRIORITY;
-	if (engine->i915->preempt_context)
+	if (intel_engine_has_preemption(engine))
 		engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION;
 }
 
@@ -2119,7 +2263,20 @@
 static inline void
 logical_ring_default_irqs(struct intel_engine_cs *engine)
 {
-	unsigned shift = engine->irq_shift;
+	unsigned int shift = 0;
+
+	if (INTEL_GEN(engine->i915) < 11) {
+		const u8 irq_shifts[] = {
+			[RCS]  = GEN8_RCS_IRQ_SHIFT,
+			[BCS]  = GEN8_BCS_IRQ_SHIFT,
+			[VCS]  = GEN8_VCS1_IRQ_SHIFT,
+			[VCS2] = GEN8_VCS2_IRQ_SHIFT,
+			[VECS] = GEN8_VECS_IRQ_SHIFT,
+		};
+
+		shift = irq_shifts[engine->id];
+	}
+
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 }
@@ -2175,9 +2332,13 @@
 	}
 
 	engine->execlists.preempt_complete_status = ~0u;
-	if (engine->i915->preempt_context)
+	if (engine->i915->preempt_context) {
+		struct intel_context *ce =
+			to_intel_context(engine->i915->preempt_context, engine);
+
 		engine->execlists.preempt_complete_status =
-			upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc);
+			upper_32_bits(ce->lrc_desc);
+	}
 
 	return 0;
 
@@ -2431,8 +2592,10 @@
 
 		defaults = i915_gem_object_pin_map(engine->default_state,
 						   I915_MAP_WB);
-		if (IS_ERR(defaults))
-			return PTR_ERR(defaults);
+		if (IS_ERR(defaults)) {
+			ret = PTR_ERR(defaults);
+			goto err_unpin_ctx;
+		}
 
 		memcpy(vaddr + start, defaults + start, engine->context_size);
 		i915_gem_object_unpin_map(engine->default_state);
@@ -2450,19 +2613,20 @@
 			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
 					   CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
 
+err_unpin_ctx:
 	i915_gem_object_unpin_map(ctx_obj);
-
-	return 0;
+	return ret;
 }
 
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_object *ctx_obj;
-	struct intel_context *ce = &ctx->engine[engine->id];
+	struct intel_context *ce = to_intel_context(ctx, engine);
 	struct i915_vma *vma;
 	uint32_t context_size;
 	struct intel_ring *ring;
+	struct i915_timeline *timeline;
 	int ret;
 
 	if (ce->state)
@@ -2478,8 +2642,8 @@
 
 	ctx_obj = i915_gem_object_create(ctx->i915, context_size);
 	if (IS_ERR(ctx_obj)) {
-		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
-		return PTR_ERR(ctx_obj);
+		ret = PTR_ERR(ctx_obj);
+		goto error_deref_obj;
 	}
 
 	vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
@@ -2488,7 +2652,14 @@
 		goto error_deref_obj;
 	}
 
-	ring = intel_engine_create_ring(engine, ctx->ring_size);
+	timeline = i915_timeline_create(ctx->i915, ctx->name);
+	if (IS_ERR(timeline)) {
+		ret = PTR_ERR(timeline);
+		goto error_deref_obj;
+	}
+
+	ring = intel_engine_create_ring(engine, timeline, ctx->ring_size);
+	i915_timeline_put(timeline);
 	if (IS_ERR(ring)) {
 		ret = PTR_ERR(ring);
 		goto error_deref_obj;
@@ -2530,7 +2701,8 @@
 	 */
 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
 		for_each_engine(engine, dev_priv, id) {
-			struct intel_context *ce = &ctx->engine[engine->id];
+			struct intel_context *ce =
+				to_intel_context(ctx, engine);
 			u32 *reg;
 
 			if (!ce->state)
@@ -2552,3 +2724,7 @@
 		}
 	}
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/intel_lrc.c"
+#endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 59d7b86..4ec7d8d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -108,7 +108,7 @@
 intel_lr_context_descriptor(struct i915_gem_context *ctx,
 			    struct intel_engine_cs *engine)
 {
-	return ctx->engine[engine->id].lrc_desc;
+	return to_intel_context(ctx, engine)->lrc_desc;
 }
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index c0b34b7..9f0bd6a4 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -178,7 +178,8 @@
 {
 	bool result = false;
 
-	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
+	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) ||
+	    IS_ICELAKE(dev_priv)) {
 		table->size  = ARRAY_SIZE(skylake_mocs_table);
 		table->table = skylake_mocs_table;
 		result = true;
@@ -217,6 +218,8 @@
 		return GEN9_VEBOX_MOCS(index);
 	case VCS2:
 		return GEN9_MFX1_MOCS(index);
+	case VCS3:
+		return GEN11_MFX2_MOCS(index);
 	default:
 		MISSING_CASE(engine_id);
 		return INVALID_MMIO_REG;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 36671a9..c2f10d8 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -807,6 +807,7 @@
 		ret = PTR_ERR(vma);
 		goto out_pin_section;
 	}
+	intel_fb_obj_flush(new_bo, ORIGIN_DIRTYFB);
 
 	ret = i915_vma_put_fence(vma);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c
index 1f5cd57..39a4e4e 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
@@ -569,7 +569,8 @@
 static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv,
 				enum pipe pipe,
 				enum intel_pipe_crc_source *source,
-				uint32_t *val)
+				uint32_t *val,
+				bool set_wa)
 {
 	if (*source == INTEL_PIPE_CRC_SOURCE_AUTO)
 		*source = INTEL_PIPE_CRC_SOURCE_PF;
@@ -582,7 +583,7 @@
 		*val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_SPRITE_IVB;
 		break;
 	case INTEL_PIPE_CRC_SOURCE_PF:
-		if ((IS_HASWELL(dev_priv) ||
+		if (set_wa && (IS_HASWELL(dev_priv) ||
 		     IS_BROADWELL(dev_priv)) && pipe == PIPE_A)
 			hsw_pipe_A_crc_wa(dev_priv, true);
 
@@ -600,7 +601,8 @@
 
 static int get_new_crc_ctl_reg(struct drm_i915_private *dev_priv,
 			       enum pipe pipe,
-			       enum intel_pipe_crc_source *source, u32 *val)
+			       enum intel_pipe_crc_source *source, u32 *val,
+			       bool set_wa)
 {
 	if (IS_GEN2(dev_priv))
 		return i8xx_pipe_crc_ctl_reg(source, val);
@@ -611,7 +613,7 @@
 	else if (IS_GEN5(dev_priv) || IS_GEN6(dev_priv))
 		return ilk_pipe_crc_ctl_reg(source, val);
 	else
-		return ivb_pipe_crc_ctl_reg(dev_priv, pipe, source, val);
+		return ivb_pipe_crc_ctl_reg(dev_priv, pipe, source, val, set_wa);
 }
 
 static int pipe_crc_set_source(struct drm_i915_private *dev_priv,
@@ -636,7 +638,7 @@
 		return -EIO;
 	}
 
-	ret = get_new_crc_ctl_reg(dev_priv, pipe, &source, &val);
+	ret = get_new_crc_ctl_reg(dev_priv, pipe, &source, &val, true);
 	if (ret != 0)
 		goto out;
 
@@ -764,13 +766,12 @@
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(pipe_crc_objects); i++)
-		if (!strcmp(buf, pipe_crc_objects[i])) {
-			*o = i;
-			return 0;
-		}
+	i = match_string(pipe_crc_objects, ARRAY_SIZE(pipe_crc_objects), buf);
+	if (i < 0)
+		return i;
 
-	return -EINVAL;
+	*o = i;
+	return 0;
 }
 
 static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv,
@@ -796,13 +797,12 @@
 		return 0;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(pipe_crc_sources); i++)
-		if (!strcmp(buf, pipe_crc_sources[i])) {
-			*s = i;
-			return 0;
-		}
+	i = match_string(pipe_crc_sources, ARRAY_SIZE(pipe_crc_sources), buf);
+	if (i < 0)
+		return i;
 
-	return -EINVAL;
+	*s = i;
+	return 0;
 }
 
 static int display_crc_ctl_parse(struct drm_i915_private *dev_priv,
@@ -916,7 +916,7 @@
 int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name,
 			      size_t *values_cnt)
 {
-	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
 	struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index];
 	enum intel_display_power_domain power_domain;
 	enum intel_pipe_crc_source source;
@@ -934,10 +934,11 @@
 		return -EIO;
 	}
 
-	ret = get_new_crc_ctl_reg(dev_priv, crtc->index, &source, &val);
+	ret = get_new_crc_ctl_reg(dev_priv, crtc->index, &source, &val, true);
 	if (ret != 0)
 		goto out;
 
+	pipe_crc->source = source;
 	I915_WRITE(PIPE_CRC_CTL(crtc->index), val);
 	POSTING_READ(PIPE_CRC_CTL(crtc->index));
 
@@ -959,3 +960,39 @@
 
 	return ret;
 }
+
+void intel_crtc_enable_pipe_crc(struct intel_crtc *intel_crtc)
+{
+	struct drm_crtc *crtc = &intel_crtc->base;
+	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+	struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index];
+	u32 val = 0;
+
+	if (!crtc->crc.opened)
+		return;
+
+	if (get_new_crc_ctl_reg(dev_priv, crtc->index, &pipe_crc->source, &val, false) < 0)
+		return;
+
+	/* Don't need pipe_crc->lock here, IRQs are not generated. */
+	pipe_crc->skipped = 0;
+
+	I915_WRITE(PIPE_CRC_CTL(crtc->index), val);
+	POSTING_READ(PIPE_CRC_CTL(crtc->index));
+}
+
+void intel_crtc_disable_pipe_crc(struct intel_crtc *intel_crtc)
+{
+	struct drm_crtc *crtc = &intel_crtc->base;
+	struct drm_i915_private *dev_priv = to_i915(crtc->dev);
+	struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index];
+
+	/* Swallow crc's until we stop generating them. */
+	spin_lock_irq(&pipe_crc->lock);
+	pipe_crc->skipped = INT_MIN;
+	spin_unlock_irq(&pipe_crc->lock);
+
+	I915_WRITE(PIPE_CRC_CTL(crtc->index), 0);
+	POSTING_READ(PIPE_CRC_CTL(crtc->index));
+	synchronize_irq(dev_priv->drm.irq);
+}
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index b8da4dc..b85229e 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3567,6 +3567,23 @@
 	return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL);
 }
 
+static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv)
+{
+	u8 enabled_slices;
+
+	/* Slice 1 will always be enabled */
+	enabled_slices = 1;
+
+	/* Gen prior to GEN11 have only one DBuf slice */
+	if (INTEL_GEN(dev_priv) < 11)
+		return enabled_slices;
+
+	if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)
+		enabled_slices++;
+
+	return enabled_slices;
+}
+
 /*
  * FIXME: We still don't have the proper code detect if we need to apply the WA,
  * so assume we'll always need it in order to avoid underruns.
@@ -3754,9 +3771,42 @@
 	return true;
 }
 
+static unsigned int intel_get_ddb_size(struct drm_i915_private *dev_priv,
+				       const struct intel_crtc_state *cstate,
+				       const unsigned int total_data_rate,
+				       const int num_active,
+				       struct skl_ddb_allocation *ddb)
+{
+	const struct drm_display_mode *adjusted_mode;
+	u64 total_data_bw;
+	u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size;
+
+	WARN_ON(ddb_size == 0);
+
+	if (INTEL_GEN(dev_priv) < 11)
+		return ddb_size - 4; /* 4 blocks for bypass path allocation */
+
+	adjusted_mode = &cstate->base.adjusted_mode;
+	total_data_bw = (u64)total_data_rate * drm_mode_vrefresh(adjusted_mode);
+
+	/*
+	 * 12GB/s is maximum BW supported by single DBuf slice.
+	 */
+	if (total_data_bw >= GBps(12) || num_active > 1) {
+		ddb->enabled_slices = 2;
+	} else {
+		ddb->enabled_slices = 1;
+		ddb_size /= 2;
+	}
+
+	return ddb_size;
+}
+
 static void
 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
 				   const struct intel_crtc_state *cstate,
+				   const unsigned int total_data_rate,
+				   struct skl_ddb_allocation *ddb,
 				   struct skl_ddb_entry *alloc, /* out */
 				   int *num_active /* out */)
 {
@@ -3779,11 +3829,8 @@
 	else
 		*num_active = hweight32(dev_priv->active_crtcs);
 
-	ddb_size = INTEL_INFO(dev_priv)->ddb_size;
-	WARN_ON(ddb_size == 0);
-
-	if (INTEL_GEN(dev_priv) < 11)
-		ddb_size -= 4; /* 4 blocks for bypass path allocation */
+	ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate,
+				      *num_active, ddb);
 
 	/*
 	 * If the state doesn't change the active CRTC's, then there's
@@ -3817,14 +3864,64 @@
 	return 8;
 }
 
-static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
+static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
+				       struct skl_ddb_entry *entry, u32 reg)
 {
-	entry->start = reg & 0x3ff;
-	entry->end = (reg >> 16) & 0x3ff;
+	u16 mask;
+
+	if (INTEL_GEN(dev_priv) >= 11)
+		mask = ICL_DDB_ENTRY_MASK;
+	else
+		mask = SKL_DDB_ENTRY_MASK;
+	entry->start = reg & mask;
+	entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask;
+
 	if (entry->end)
 		entry->end += 1;
 }
 
+static void
+skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
+			   const enum pipe pipe,
+			   const enum plane_id plane_id,
+			   struct skl_ddb_allocation *ddb /* out */)
+{
+	u32 val, val2 = 0;
+	int fourcc, pixel_format;
+
+	/* Cursor doesn't support NV12/planar, so no extra calculation needed */
+	if (plane_id == PLANE_CURSOR) {
+		val = I915_READ(CUR_BUF_CFG(pipe));
+		skl_ddb_entry_init_from_hw(dev_priv,
+					   &ddb->plane[pipe][plane_id], val);
+		return;
+	}
+
+	val = I915_READ(PLANE_CTL(pipe, plane_id));
+
+	/* No DDB allocated for disabled planes */
+	if (!(val & PLANE_CTL_ENABLE))
+		return;
+
+	pixel_format = val & PLANE_CTL_FORMAT_MASK;
+	fourcc = skl_format_to_fourcc(pixel_format,
+				      val & PLANE_CTL_ORDER_RGBX,
+				      val & PLANE_CTL_ALPHA_MASK);
+
+	val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
+	val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id));
+
+	if (fourcc == DRM_FORMAT_NV12) {
+		skl_ddb_entry_init_from_hw(dev_priv,
+					   &ddb->plane[pipe][plane_id], val2);
+		skl_ddb_entry_init_from_hw(dev_priv,
+					   &ddb->uv_plane[pipe][plane_id], val);
+	} else {
+		skl_ddb_entry_init_from_hw(dev_priv,
+					   &ddb->plane[pipe][plane_id], val);
+	}
+}
+
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 			  struct skl_ddb_allocation *ddb /* out */)
 {
@@ -3832,6 +3929,8 @@
 
 	memset(ddb, 0, sizeof(*ddb));
 
+	ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv);
+
 	for_each_intel_crtc(&dev_priv->drm, crtc) {
 		enum intel_display_power_domain power_domain;
 		enum plane_id plane_id;
@@ -3841,16 +3940,9 @@
 		if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
 			continue;
 
-		for_each_plane_id_on_crtc(crtc, plane_id) {
-			u32 val;
-
-			if (plane_id != PLANE_CURSOR)
-				val = I915_READ(PLANE_BUF_CFG(pipe, plane_id));
-			else
-				val = I915_READ(CUR_BUF_CFG(pipe));
-
-			skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val);
-		}
+		for_each_plane_id_on_crtc(crtc, plane_id)
+			skl_ddb_get_hw_plane_state(dev_priv, pipe,
+						   plane_id, ddb);
 
 		intel_display_power_put(dev_priv, power_domain);
 	}
@@ -4009,9 +4101,9 @@
 static unsigned int
 skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 			     const struct drm_plane_state *pstate,
-			     int y)
+			     const int plane)
 {
-	struct intel_plane *plane = to_intel_plane(pstate->plane);
+	struct intel_plane *intel_plane = to_intel_plane(pstate->plane);
 	struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
 	uint32_t data_rate;
 	uint32_t width = 0, height = 0;
@@ -4025,9 +4117,9 @@
 	fb = pstate->fb;
 	format = fb->format->format;
 
-	if (plane->id == PLANE_CURSOR)
+	if (intel_plane->id == PLANE_CURSOR)
 		return 0;
-	if (y && format != DRM_FORMAT_NV12)
+	if (plane == 1 && format != DRM_FORMAT_NV12)
 		return 0;
 
 	/*
@@ -4038,19 +4130,14 @@
 	width = drm_rect_width(&intel_pstate->base.src) >> 16;
 	height = drm_rect_height(&intel_pstate->base.src) >> 16;
 
-	/* for planar format */
-	if (format == DRM_FORMAT_NV12) {
-		if (y)  /* y-plane data rate */
-			data_rate = width * height *
-				fb->format->cpp[0];
-		else    /* uv-plane data rate */
-			data_rate = (width / 2) * (height / 2) *
-				fb->format->cpp[1];
-	} else {
-		/* for packed formats */
-		data_rate = width * height * fb->format->cpp[0];
+	/* UV plane does 1/2 pixel sub-sampling */
+	if (plane == 1 && format == DRM_FORMAT_NV12) {
+		width /= 2;
+		height /= 2;
 	}
 
+	data_rate = width * height * fb->format->cpp[plane];
+
 	down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
 
 	return mul_round_up_u32_fixed16(data_rate, down_scale_amount);
@@ -4063,8 +4150,8 @@
  */
 static unsigned int
 skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate,
-				 unsigned *plane_data_rate,
-				 unsigned *plane_y_data_rate)
+				 unsigned int *plane_data_rate,
+				 unsigned int *uv_plane_data_rate)
 {
 	struct drm_crtc_state *cstate = &intel_cstate->base;
 	struct drm_atomic_state *state = cstate->state;
@@ -4080,17 +4167,17 @@
 		enum plane_id plane_id = to_intel_plane(plane)->id;
 		unsigned int rate;
 
-		/* packed/uv */
+		/* packed/y */
 		rate = skl_plane_relative_data_rate(intel_cstate,
 						    pstate, 0);
 		plane_data_rate[plane_id] = rate;
 
 		total_data_rate += rate;
 
-		/* y-plane */
+		/* uv-plane */
 		rate = skl_plane_relative_data_rate(intel_cstate,
 						    pstate, 1);
-		plane_y_data_rate[plane_id] = rate;
+		uv_plane_data_rate[plane_id] = rate;
 
 		total_data_rate += rate;
 	}
@@ -4099,8 +4186,7 @@
 }
 
 static uint16_t
-skl_ddb_min_alloc(const struct drm_plane_state *pstate,
-		  const int y)
+skl_ddb_min_alloc(const struct drm_plane_state *pstate, const int plane)
 {
 	struct drm_framebuffer *fb = pstate->fb;
 	struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
@@ -4111,8 +4197,8 @@
 	if (WARN_ON(!fb))
 		return 0;
 
-	/* For packed formats, no y-plane, return 0 */
-	if (y && fb->format->format != DRM_FORMAT_NV12)
+	/* For packed formats, and uv-plane, return 0 */
+	if (plane == 1 && fb->format->format != DRM_FORMAT_NV12)
 		return 0;
 
 	/* For Non Y-tile return 8-blocks */
@@ -4131,15 +4217,12 @@
 	src_h = drm_rect_height(&intel_pstate->base.src) >> 16;
 
 	/* Halve UV plane width and height for NV12 */
-	if (fb->format->format == DRM_FORMAT_NV12 && !y) {
+	if (plane == 1) {
 		src_w /= 2;
 		src_h /= 2;
 	}
 
-	if (fb->format->format == DRM_FORMAT_NV12 && !y)
-		plane_bpp = fb->format->cpp[1];
-	else
-		plane_bpp = fb->format->cpp[0];
+	plane_bpp = fb->format->cpp[plane];
 
 	if (drm_rotation_90_or_270(pstate->rotation)) {
 		switch (plane_bpp) {
@@ -4167,7 +4250,7 @@
 
 static void
 skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active,
-		 uint16_t *minimum, uint16_t *y_minimum)
+		 uint16_t *minimum, uint16_t *uv_minimum)
 {
 	const struct drm_plane_state *pstate;
 	struct drm_plane *plane;
@@ -4182,7 +4265,7 @@
 			continue;
 
 		minimum[plane_id] = skl_ddb_min_alloc(pstate, 0);
-		y_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1);
+		uv_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1);
 	}
 
 	minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active);
@@ -4200,17 +4283,17 @@
 	struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb;
 	uint16_t alloc_size, start;
 	uint16_t minimum[I915_MAX_PLANES] = {};
-	uint16_t y_minimum[I915_MAX_PLANES] = {};
+	uint16_t uv_minimum[I915_MAX_PLANES] = {};
 	unsigned int total_data_rate;
 	enum plane_id plane_id;
 	int num_active;
-	unsigned plane_data_rate[I915_MAX_PLANES] = {};
-	unsigned plane_y_data_rate[I915_MAX_PLANES] = {};
+	unsigned int plane_data_rate[I915_MAX_PLANES] = {};
+	unsigned int uv_plane_data_rate[I915_MAX_PLANES] = {};
 	uint16_t total_min_blocks = 0;
 
 	/* Clear the partitioning for disabled planes. */
 	memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
-	memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe]));
+	memset(ddb->uv_plane[pipe], 0, sizeof(ddb->uv_plane[pipe]));
 
 	if (WARN_ON(!state))
 		return 0;
@@ -4220,12 +4303,16 @@
 		return 0;
 	}
 
-	skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active);
+	total_data_rate = skl_get_total_relative_data_rate(cstate,
+							   plane_data_rate,
+							   uv_plane_data_rate);
+	skl_ddb_get_pipe_allocation_limits(dev, cstate, total_data_rate, ddb,
+					   alloc, &num_active);
 	alloc_size = skl_ddb_entry_size(alloc);
 	if (alloc_size == 0)
 		return 0;
 
-	skl_ddb_calc_min(cstate, num_active, minimum, y_minimum);
+	skl_ddb_calc_min(cstate, num_active, minimum, uv_minimum);
 
 	/*
 	 * 1. Allocate the mininum required blocks for each active plane
@@ -4235,7 +4322,7 @@
 
 	for_each_plane_id_on_crtc(intel_crtc, plane_id) {
 		total_min_blocks += minimum[plane_id];
-		total_min_blocks += y_minimum[plane_id];
+		total_min_blocks += uv_minimum[plane_id];
 	}
 
 	if (total_min_blocks > alloc_size) {
@@ -4255,16 +4342,13 @@
 	 *
 	 * FIXME: we may not allocate every single block here.
 	 */
-	total_data_rate = skl_get_total_relative_data_rate(cstate,
-							   plane_data_rate,
-							   plane_y_data_rate);
 	if (total_data_rate == 0)
 		return 0;
 
 	start = alloc->start;
 	for_each_plane_id_on_crtc(intel_crtc, plane_id) {
-		unsigned int data_rate, y_data_rate;
-		uint16_t plane_blocks, y_plane_blocks = 0;
+		unsigned int data_rate, uv_data_rate;
+		uint16_t plane_blocks, uv_plane_blocks;
 
 		if (plane_id == PLANE_CURSOR)
 			continue;
@@ -4288,21 +4372,20 @@
 
 		start += plane_blocks;
 
-		/*
-		 * allocation for y_plane part of planar format:
-		 */
-		y_data_rate = plane_y_data_rate[plane_id];
+		/* Allocate DDB for UV plane for planar format/NV12 */
+		uv_data_rate = uv_plane_data_rate[plane_id];
 
-		y_plane_blocks = y_minimum[plane_id];
-		y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
-					total_data_rate);
+		uv_plane_blocks = uv_minimum[plane_id];
+		uv_plane_blocks += div_u64((uint64_t)alloc_size * uv_data_rate,
+					   total_data_rate);
 
-		if (y_data_rate) {
-			ddb->y_plane[pipe][plane_id].start = start;
-			ddb->y_plane[pipe][plane_id].end = start + y_plane_blocks;
+		if (uv_data_rate) {
+			ddb->uv_plane[pipe][plane_id].start = start;
+			ddb->uv_plane[pipe][plane_id].end =
+				start + uv_plane_blocks;
 		}
 
-		start += y_plane_blocks;
+		start += uv_plane_blocks;
 	}
 
 	return 0;
@@ -4398,7 +4481,7 @@
 skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv,
 			    struct intel_crtc_state *cstate,
 			    const struct intel_plane_state *intel_pstate,
-			    struct skl_wm_params *wp)
+			    struct skl_wm_params *wp, int plane_id)
 {
 	struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
 	const struct drm_plane_state *pstate = &intel_pstate->base;
@@ -4411,6 +4494,12 @@
 	if (!intel_wm_plane_visible(cstate, intel_pstate))
 		return 0;
 
+	/* only NV12 format has two planes */
+	if (plane_id == 1 && fb->format->format != DRM_FORMAT_NV12) {
+		DRM_DEBUG_KMS("Non NV12 format have single plane\n");
+		return -EINVAL;
+	}
+
 	wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
 		      fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
 		      fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
@@ -4418,6 +4507,7 @@
 	wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
 	wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
 			 fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
+	wp->is_planar = fb->format->format == DRM_FORMAT_NV12;
 
 	if (plane->id == PLANE_CURSOR) {
 		wp->width = intel_pstate->base.crtc_w;
@@ -4430,8 +4520,10 @@
 		wp->width = drm_rect_width(&intel_pstate->base.src) >> 16;
 	}
 
-	wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] :
-							    fb->format->cpp[0];
+	if (plane_id == 1 && wp->is_planar)
+		wp->width /= 2;
+
+	wp->cpp = fb->format->cpp[plane_id];
 	wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate,
 							     intel_pstate);
 
@@ -4499,9 +4591,8 @@
 				uint16_t ddb_allocation,
 				int level,
 				const struct skl_wm_params *wp,
-				uint16_t *out_blocks, /* out */
-				uint8_t *out_lines, /* out */
-				bool *enabled /* out */)
+				const struct skl_wm_level *result_prev,
+				struct skl_wm_level *result /* out */)
 {
 	const struct drm_plane_state *pstate = &intel_pstate->base;
 	uint32_t latency = dev_priv->wm.skl_latency[level];
@@ -4515,7 +4606,7 @@
 
 	if (latency == 0 ||
 	    !intel_wm_plane_visible(cstate, intel_pstate)) {
-		*enabled = false;
+		result->plane_en = false;
 		return 0;
 	}
 
@@ -4568,6 +4659,15 @@
 		} else {
 			res_blocks++;
 		}
+
+		/*
+		 * Make sure result blocks for higher latency levels are atleast
+		 * as high as level below the current level.
+		 * Assumption in DDB algorithm optimization for special cases.
+		 * Also covers Display WA #1125 for RC.
+		 */
+		if (result_prev->plane_res_b > res_blocks)
+			res_blocks = result_prev->plane_res_b;
 	}
 
 	if (INTEL_GEN(dev_priv) >= 11) {
@@ -4596,7 +4696,7 @@
 	if ((level > 0 && res_lines > 31) ||
 	    res_blocks >= ddb_allocation ||
 	    min_disp_buf_needed >= ddb_allocation) {
-		*enabled = false;
+		result->plane_en = false;
 
 		/*
 		 * If there are no valid level 0 watermarks, then we can't
@@ -4615,10 +4715,21 @@
 		}
 	}
 
+	/*
+	 * Display WA #826 (SKL:ALL, BXT:ALL) & #1059 (CNL:A)
+	 * disable wm level 1-7 on NV12 planes
+	 */
+	if (wp->is_planar && level >= 1 &&
+	    (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) ||
+	     IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))) {
+		result->plane_en = false;
+		return 0;
+	}
+
 	/* The number of lines are ignored for the level 0 watermark. */
-	*out_lines = level ? res_lines : 0;
-	*out_blocks = res_blocks;
-	*enabled = true;
+	result->plane_res_b = res_blocks;
+	result->plane_res_l = res_lines;
+	result->plane_en = true;
 
 	return 0;
 }
@@ -4629,7 +4740,8 @@
 		      struct intel_crtc_state *cstate,
 		      const struct intel_plane_state *intel_pstate,
 		      const struct skl_wm_params *wm_params,
-		      struct skl_plane_wm *wm)
+		      struct skl_plane_wm *wm,
+		      int plane_id)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
 	struct drm_plane *plane = intel_pstate->base.plane;
@@ -4637,15 +4749,26 @@
 	uint16_t ddb_blocks;
 	enum pipe pipe = intel_crtc->pipe;
 	int level, max_level = ilk_wm_max_level(dev_priv);
+	enum plane_id intel_plane_id = intel_plane->id;
 	int ret;
 
 	if (WARN_ON(!intel_pstate->base.fb))
 		return -EINVAL;
 
-	ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]);
+	ddb_blocks = plane_id ?
+		     skl_ddb_entry_size(&ddb->uv_plane[pipe][intel_plane_id]) :
+		     skl_ddb_entry_size(&ddb->plane[pipe][intel_plane_id]);
 
 	for (level = 0; level <= max_level; level++) {
-		struct skl_wm_level *result = &wm->wm[level];
+		struct skl_wm_level *result = plane_id ? &wm->uv_wm[level] :
+							  &wm->wm[level];
+		struct skl_wm_level *result_prev;
+
+		if (level)
+			result_prev = plane_id ? &wm->uv_wm[level - 1] :
+						  &wm->wm[level - 1];
+		else
+			result_prev = plane_id ? &wm->uv_wm[0] : &wm->wm[0];
 
 		ret = skl_compute_plane_wm(dev_priv,
 					   cstate,
@@ -4653,13 +4776,15 @@
 					   ddb_blocks,
 					   level,
 					   wm_params,
-					   &result->plane_res_b,
-					   &result->plane_res_l,
-					   &result->plane_en);
+					   result_prev,
+					   result);
 		if (ret)
 			return ret;
 	}
 
+	if (intel_pstate->base.fb->format->format == DRM_FORMAT_NV12)
+		wm->is_planar = true;
+
 	return 0;
 }
 
@@ -4769,20 +4894,39 @@
 
 		wm = &pipe_wm->planes[plane_id];
 		ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][plane_id]);
-		memset(&wm_params, 0, sizeof(struct skl_wm_params));
 
 		ret = skl_compute_plane_wm_params(dev_priv, cstate,
-						  intel_pstate, &wm_params);
+						  intel_pstate, &wm_params, 0);
 		if (ret)
 			return ret;
 
 		ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
-					    intel_pstate, &wm_params, wm);
+					    intel_pstate, &wm_params, wm, 0);
 		if (ret)
 			return ret;
+
 		skl_compute_transition_wm(cstate, &wm_params, &wm->wm[0],
 					  ddb_blocks, &wm->trans_wm);
+
+		/* uv plane watermarks must also be validated for NV12/Planar */
+		if (wm_params.is_planar) {
+			memset(&wm_params, 0, sizeof(struct skl_wm_params));
+			wm->is_planar = true;
+
+			ret = skl_compute_plane_wm_params(dev_priv, cstate,
+							  intel_pstate,
+							  &wm_params, 1);
+			if (ret)
+				return ret;
+
+			ret = skl_compute_wm_levels(dev_priv, ddb, cstate,
+						    intel_pstate, &wm_params,
+						    wm, 1);
+			if (ret)
+				return ret;
+		}
 	}
+
 	pipe_wm->linetime = skl_compute_linetime_wm(cstate);
 
 	return 0;
@@ -4833,10 +4977,21 @@
 
 	skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
 			    &ddb->plane[pipe][plane_id]);
-	if (INTEL_GEN(dev_priv) < 11)
+	if (INTEL_GEN(dev_priv) >= 11)
+		return skl_ddb_entry_write(dev_priv,
+					   PLANE_BUF_CFG(pipe, plane_id),
+					   &ddb->plane[pipe][plane_id]);
+	if (wm->is_planar) {
+		skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
+				    &ddb->uv_plane[pipe][plane_id]);
 		skl_ddb_entry_write(dev_priv,
 				    PLANE_NV12_BUF_CFG(pipe, plane_id),
-				    &ddb->y_plane[pipe][plane_id]);
+				    &ddb->plane[pipe][plane_id]);
+	} else {
+		skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id),
+				    &ddb->plane[pipe][plane_id]);
+		I915_WRITE(PLANE_NV12_BUF_CFG(pipe, plane_id), 0x0);
+	}
 }
 
 static void skl_write_cursor_wm(struct intel_crtc *intel_crtc,
@@ -4944,15 +5099,13 @@
 	struct drm_plane *plane;
 	enum pipe pipe = intel_crtc->pipe;
 
-	WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc));
-
 	drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) {
 		enum plane_id plane_id = to_intel_plane(plane)->id;
 
 		if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id],
 					&new_ddb->plane[pipe][plane_id]) &&
-		    skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id],
-					&new_ddb->y_plane[pipe][plane_id]))
+		    skl_ddb_entry_equal(&cur_ddb->uv_plane[pipe][plane_id],
+					&new_ddb->uv_plane[pipe][plane_id]))
 			continue;
 
 		plane_state = drm_atomic_get_plane_state(state, plane);
@@ -4966,13 +5119,109 @@
 static int
 skl_compute_ddb(struct drm_atomic_state *state)
 {
-	struct drm_device *dev = state->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	const struct drm_i915_private *dev_priv = to_i915(state->dev);
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
-	struct intel_crtc *intel_crtc;
 	struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb;
+	struct intel_crtc *crtc;
+	struct intel_crtc_state *cstate;
+	int ret, i;
+
+	memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
+
+	for_each_new_intel_crtc_in_state(intel_state, crtc, cstate, i) {
+		ret = skl_allocate_pipe_ddb(cstate, ddb);
+		if (ret)
+			return ret;
+
+		ret = skl_ddb_add_affected_planes(cstate);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void
+skl_copy_ddb_for_pipe(struct skl_ddb_values *dst,
+		      struct skl_ddb_values *src,
+		      enum pipe pipe)
+{
+	memcpy(dst->ddb.uv_plane[pipe], src->ddb.uv_plane[pipe],
+	       sizeof(dst->ddb.uv_plane[pipe]));
+	memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe],
+	       sizeof(dst->ddb.plane[pipe]));
+	dst->ddb.enabled_slices = src->ddb.enabled_slices;
+}
+
+static void
+skl_print_wm_changes(const struct drm_atomic_state *state)
+{
+	const struct drm_device *dev = state->dev;
+	const struct drm_i915_private *dev_priv = to_i915(dev);
+	const struct intel_atomic_state *intel_state =
+		to_intel_atomic_state(state);
+	const struct drm_crtc *crtc;
+	const struct drm_crtc_state *cstate;
+	const struct intel_plane *intel_plane;
+	const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb;
+	const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
+	int i;
+
+	for_each_new_crtc_in_state(state, crtc, cstate, i) {
+		const struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+		enum pipe pipe = intel_crtc->pipe;
+
+		for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
+			enum plane_id plane_id = intel_plane->id;
+			const struct skl_ddb_entry *old, *new;
+
+			old = &old_ddb->plane[pipe][plane_id];
+			new = &new_ddb->plane[pipe][plane_id];
+
+			if (skl_ddb_entry_equal(old, new))
+				continue;
+
+			DRM_DEBUG_ATOMIC("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n",
+					 intel_plane->base.base.id,
+					 intel_plane->base.name,
+					 old->start, old->end,
+					 new->start, new->end);
+		}
+	}
+}
+
+static int
+skl_ddb_add_affected_pipes(struct drm_atomic_state *state, bool *changed)
+{
+	struct drm_device *dev = state->dev;
+	const struct drm_i915_private *dev_priv = to_i915(dev);
+	const struct drm_crtc *crtc;
+	const struct drm_crtc_state *cstate;
+	struct intel_crtc *intel_crtc;
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
 	uint32_t realloc_pipes = pipes_modified(state);
-	int ret;
+	int ret, i;
+
+	/*
+	 * When we distrust bios wm we always need to recompute to set the
+	 * expected DDB allocations for each CRTC.
+	 */
+	if (dev_priv->wm.distrust_bios_wm)
+		(*changed) = true;
+
+	/*
+	 * If this transaction isn't actually touching any CRTC's, don't
+	 * bother with watermark calculation.  Note that if we pass this
+	 * test, we're guaranteed to hold at least one CRTC state mutex,
+	 * which means we can safely use values like dev_priv->active_crtcs
+	 * since any racing commits that want to update them would need to
+	 * hold _all_ CRTC state mutexes.
+	 */
+	for_each_new_crtc_in_state(state, crtc, cstate, i)
+		(*changed) = true;
+
+	if (!*changed)
+		return 0;
 
 	/*
 	 * If this is our first atomic update following hardware readout,
@@ -5020,111 +5269,35 @@
 	 * We're not recomputing for the pipes not included in the commit, so
 	 * make sure we start with the current state.
 	 */
-	memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
-
 	for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
 		struct intel_crtc_state *cstate;
 
 		cstate = intel_atomic_get_crtc_state(state, intel_crtc);
 		if (IS_ERR(cstate))
 			return PTR_ERR(cstate);
-
-		ret = skl_allocate_pipe_ddb(cstate, ddb);
-		if (ret)
-			return ret;
-
-		ret = skl_ddb_add_affected_planes(cstate);
-		if (ret)
-			return ret;
 	}
 
 	return 0;
 }
 
-static void
-skl_copy_wm_for_pipe(struct skl_wm_values *dst,
-		     struct skl_wm_values *src,
-		     enum pipe pipe)
-{
-	memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe],
-	       sizeof(dst->ddb.y_plane[pipe]));
-	memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe],
-	       sizeof(dst->ddb.plane[pipe]));
-}
-
-static void
-skl_print_wm_changes(const struct drm_atomic_state *state)
-{
-	const struct drm_device *dev = state->dev;
-	const struct drm_i915_private *dev_priv = to_i915(dev);
-	const struct intel_atomic_state *intel_state =
-		to_intel_atomic_state(state);
-	const struct drm_crtc *crtc;
-	const struct drm_crtc_state *cstate;
-	const struct intel_plane *intel_plane;
-	const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb;
-	const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb;
-	int i;
-
-	for_each_new_crtc_in_state(state, crtc, cstate, i) {
-		const struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-		enum pipe pipe = intel_crtc->pipe;
-
-		for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) {
-			enum plane_id plane_id = intel_plane->id;
-			const struct skl_ddb_entry *old, *new;
-
-			old = &old_ddb->plane[pipe][plane_id];
-			new = &new_ddb->plane[pipe][plane_id];
-
-			if (skl_ddb_entry_equal(old, new))
-				continue;
-
-			DRM_DEBUG_ATOMIC("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n",
-					 intel_plane->base.base.id,
-					 intel_plane->base.name,
-					 old->start, old->end,
-					 new->start, new->end);
-		}
-	}
-}
-
 static int
 skl_compute_wm(struct drm_atomic_state *state)
 {
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *cstate;
 	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
-	struct skl_wm_values *results = &intel_state->wm_results;
-	struct drm_device *dev = state->dev;
+	struct skl_ddb_values *results = &intel_state->wm_results;
 	struct skl_pipe_wm *pipe_wm;
 	bool changed = false;
 	int ret, i;
 
-	/*
-	 * When we distrust bios wm we always need to recompute to set the
-	 * expected DDB allocations for each CRTC.
-	 */
-	if (to_i915(dev)->wm.distrust_bios_wm)
-		changed = true;
-
-	/*
-	 * If this transaction isn't actually touching any CRTC's, don't
-	 * bother with watermark calculation.  Note that if we pass this
-	 * test, we're guaranteed to hold at least one CRTC state mutex,
-	 * which means we can safely use values like dev_priv->active_crtcs
-	 * since any racing commits that want to update them would need to
-	 * hold _all_ CRTC state mutexes.
-	 */
-	for_each_new_crtc_in_state(state, crtc, cstate, i)
-		changed = true;
-
-	if (!changed)
-		return 0;
-
 	/* Clear all dirty flags */
 	results->dirty_pipes = 0;
 
+	ret = skl_ddb_add_affected_pipes(state, &changed);
+	if (ret || !changed)
+		return ret;
+
 	ret = skl_compute_ddb(state);
 	if (ret)
 		return ret;
@@ -5197,8 +5370,8 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc);
 	struct drm_device *dev = intel_crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct skl_wm_values *results = &state->wm_results;
-	struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
+	struct skl_ddb_values *results = &state->wm_results;
+	struct skl_ddb_values *hw_vals = &dev_priv->wm.skl_hw;
 	enum pipe pipe = intel_crtc->pipe;
 
 	if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0)
@@ -5209,7 +5382,7 @@
 	if (cstate->base.active_changed)
 		skl_atomic_update_crtc_wm(state, cstate);
 
-	skl_copy_wm_for_pipe(hw_vals, results, pipe);
+	skl_copy_ddb_for_pipe(hw_vals, results, pipe);
 
 	mutex_unlock(&dev_priv->wm.wm_mutex);
 }
@@ -5341,7 +5514,7 @@
 void skl_wm_get_hw_state(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct skl_wm_values *hw = &dev_priv->wm.skl_hw;
+	struct skl_ddb_values *hw = &dev_priv->wm.skl_hw;
 	struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb;
 	struct drm_crtc *crtc;
 	struct intel_crtc *intel_crtc;
@@ -5362,8 +5535,12 @@
 		/* Fully recompute DDB on first atomic commit */
 		dev_priv->wm.distrust_bios_wm = true;
 	} else {
-		/* Easy/common case; just sanitize DDB now if everything off */
-		memset(ddb, 0, sizeof(*ddb));
+		/*
+		 * Easy/common case; just sanitize DDB now if everything off
+		 * Keep dbuf slice info intact
+		 */
+		memset(ddb->plane, 0, sizeof(ddb->plane));
+		memset(ddb->uv_plane, 0, sizeof(ddb->uv_plane));
 	}
 }
 
@@ -6572,7 +6749,7 @@
 
 	rps->efficient_freq = rps->rp1_freq;
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
-	    IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
+	    IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
 		u32 ddcc_status = 0;
 
 		if (sandybridge_pcode_read(dev_priv,
@@ -6585,7 +6762,7 @@
 					rps->max_freq);
 	}
 
-	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
+	if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
 		/* Store the frequency values in 16.66 MHZ units, which is
 		 * the natural hardware unit for SKL
 		 */
@@ -6890,15 +7067,18 @@
 static void gen6_update_ring_freq(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	int min_freq = 15;
+	const int min_freq = 15;
+	const int scaling_factor = 180;
 	unsigned int gpu_freq;
 	unsigned int max_ia_freq, min_ring_freq;
 	unsigned int max_gpu_freq, min_gpu_freq;
-	int scaling_factor = 180;
 	struct cpufreq_policy *policy;
 
 	WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock));
 
+	if (rps->max_freq <= rps->min_freq)
+		return;
+
 	policy = cpufreq_cpu_get(0);
 	if (policy) {
 		max_ia_freq = policy->cpuinfo.max_freq;
@@ -6918,13 +7098,12 @@
 	/* convert DDR frequency from units of 266.6MHz to bandwidth */
 	min_ring_freq = mult_frac(min_ring_freq, 8, 3);
 
-	if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
+	min_gpu_freq = rps->min_freq;
+	max_gpu_freq = rps->max_freq;
+	if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
 		/* Convert GT frequency to 50 HZ units */
-		min_gpu_freq = rps->min_freq / GEN9_FREQ_SCALER;
-		max_gpu_freq = rps->max_freq / GEN9_FREQ_SCALER;
-	} else {
-		min_gpu_freq = rps->min_freq;
-		max_gpu_freq = rps->max_freq;
+		min_gpu_freq /= GEN9_FREQ_SCALER;
+		max_gpu_freq /= GEN9_FREQ_SCALER;
 	}
 
 	/*
@@ -6933,10 +7112,10 @@
 	 * the PCU should use as a reference to determine the ring frequency.
 	 */
 	for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) {
-		int diff = max_gpu_freq - gpu_freq;
+		const int diff = max_gpu_freq - gpu_freq;
 		unsigned int ia_freq = 0, ring_freq = 0;
 
-		if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
+		if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) {
 			/*
 			 * ring_freq = 2 * GT. ring_freq is in 100MHz units
 			 * No floor required for ring frequency on SKL.
@@ -8026,10 +8205,10 @@
 	dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */
 	intel_disable_gt_powersave(dev_priv);
 
-	if (INTEL_GEN(dev_priv) < 11)
-		gen6_reset_rps_interrupts(dev_priv);
+	if (INTEL_GEN(dev_priv) >= 11)
+		gen11_reset_rps_interrupts(dev_priv);
 	else
-		WARN_ON_ONCE(1);
+		gen6_reset_rps_interrupts(dev_priv);
 }
 
 static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
@@ -8142,8 +8321,6 @@
 		cherryview_enable_rps(dev_priv);
 	} else if (IS_VALLEYVIEW(dev_priv)) {
 		valleyview_enable_rps(dev_priv);
-	} else if (WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11)) {
-		/* TODO */
 	} else if (INTEL_GEN(dev_priv) >= 9) {
 		gen9_enable_rps(dev_priv);
 	} else if (IS_BROADWELL(dev_priv)) {
@@ -8487,6 +8664,13 @@
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
 }
 
+static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
+{
+	/* This is not an Wa. Enable to reduce Sampler power */
+	I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN,
+		   I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE);
+}
+
 static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
 {
 	if (!HAS_PCH_CNP(dev_priv))
@@ -9013,7 +9197,9 @@
  */
 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
 {
-	if (IS_CANNONLAKE(dev_priv))
+	if (IS_ICELAKE(dev_priv))
+		dev_priv->display.init_clock_gating = icl_init_clock_gating;
+	else if (IS_CANNONLAKE(dev_priv))
 		dev_priv->display.init_clock_gating = cnl_init_clock_gating;
 	else if (IS_COFFEELAKE(dev_priv))
 		dev_priv->display.init_clock_gating = cfl_init_clock_gating;
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index 23175c5..db27f2f 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -93,7 +93,115 @@
 	intel_display_power_put(dev_priv, psr_aux_domain(intel_dp));
 }
 
-static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp)
+void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug)
+{
+	u32 debug_mask, mask;
+
+	/* No PSR interrupts on VLV/CHV */
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		return;
+
+	mask = EDP_PSR_ERROR(TRANSCODER_EDP);
+	debug_mask = EDP_PSR_POST_EXIT(TRANSCODER_EDP) |
+		     EDP_PSR_PRE_ENTRY(TRANSCODER_EDP);
+
+	if (INTEL_GEN(dev_priv) >= 8) {
+		mask |= EDP_PSR_ERROR(TRANSCODER_A) |
+			EDP_PSR_ERROR(TRANSCODER_B) |
+			EDP_PSR_ERROR(TRANSCODER_C);
+
+		debug_mask |= EDP_PSR_POST_EXIT(TRANSCODER_A) |
+			      EDP_PSR_PRE_ENTRY(TRANSCODER_A) |
+			      EDP_PSR_POST_EXIT(TRANSCODER_B) |
+			      EDP_PSR_PRE_ENTRY(TRANSCODER_B) |
+			      EDP_PSR_POST_EXIT(TRANSCODER_C) |
+			      EDP_PSR_PRE_ENTRY(TRANSCODER_C);
+	}
+
+	if (debug)
+		mask |= debug_mask;
+
+	WRITE_ONCE(dev_priv->psr.debug, debug);
+	I915_WRITE(EDP_PSR_IMR, ~mask);
+}
+
+static void psr_event_print(u32 val, bool psr2_enabled)
+{
+	DRM_DEBUG_KMS("PSR exit events: 0x%x\n", val);
+	if (val & PSR_EVENT_PSR2_WD_TIMER_EXPIRE)
+		DRM_DEBUG_KMS("\tPSR2 watchdog timer expired\n");
+	if ((val & PSR_EVENT_PSR2_DISABLED) && psr2_enabled)
+		DRM_DEBUG_KMS("\tPSR2 disabled\n");
+	if (val & PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN)
+		DRM_DEBUG_KMS("\tSU dirty FIFO underrun\n");
+	if (val & PSR_EVENT_SU_CRC_FIFO_UNDERRUN)
+		DRM_DEBUG_KMS("\tSU CRC FIFO underrun\n");
+	if (val & PSR_EVENT_GRAPHICS_RESET)
+		DRM_DEBUG_KMS("\tGraphics reset\n");
+	if (val & PSR_EVENT_PCH_INTERRUPT)
+		DRM_DEBUG_KMS("\tPCH interrupt\n");
+	if (val & PSR_EVENT_MEMORY_UP)
+		DRM_DEBUG_KMS("\tMemory up\n");
+	if (val & PSR_EVENT_FRONT_BUFFER_MODIFY)
+		DRM_DEBUG_KMS("\tFront buffer modification\n");
+	if (val & PSR_EVENT_WD_TIMER_EXPIRE)
+		DRM_DEBUG_KMS("\tPSR watchdog timer expired\n");
+	if (val & PSR_EVENT_PIPE_REGISTERS_UPDATE)
+		DRM_DEBUG_KMS("\tPIPE registers updated\n");
+	if (val & PSR_EVENT_REGISTER_UPDATE)
+		DRM_DEBUG_KMS("\tRegister updated\n");
+	if (val & PSR_EVENT_HDCP_ENABLE)
+		DRM_DEBUG_KMS("\tHDCP enabled\n");
+	if (val & PSR_EVENT_KVMR_SESSION_ENABLE)
+		DRM_DEBUG_KMS("\tKVMR session enabled\n");
+	if (val & PSR_EVENT_VBI_ENABLE)
+		DRM_DEBUG_KMS("\tVBI enabled\n");
+	if (val & PSR_EVENT_LPSP_MODE_EXIT)
+		DRM_DEBUG_KMS("\tLPSP mode exited\n");
+	if ((val & PSR_EVENT_PSR_DISABLE) && !psr2_enabled)
+		DRM_DEBUG_KMS("\tPSR disabled\n");
+}
+
+void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir)
+{
+	u32 transcoders = BIT(TRANSCODER_EDP);
+	enum transcoder cpu_transcoder;
+	ktime_t time_ns =  ktime_get();
+
+	if (INTEL_GEN(dev_priv) >= 8)
+		transcoders |= BIT(TRANSCODER_A) |
+			       BIT(TRANSCODER_B) |
+			       BIT(TRANSCODER_C);
+
+	for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) {
+		/* FIXME: Exit PSR and link train manually when this happens. */
+		if (psr_iir & EDP_PSR_ERROR(cpu_transcoder))
+			DRM_DEBUG_KMS("[transcoder %s] PSR aux error\n",
+				      transcoder_name(cpu_transcoder));
+
+		if (psr_iir & EDP_PSR_PRE_ENTRY(cpu_transcoder)) {
+			dev_priv->psr.last_entry_attempt = time_ns;
+			DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n",
+				      transcoder_name(cpu_transcoder));
+		}
+
+		if (psr_iir & EDP_PSR_POST_EXIT(cpu_transcoder)) {
+			dev_priv->psr.last_exit = time_ns;
+			DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n",
+				      transcoder_name(cpu_transcoder));
+
+			if (INTEL_GEN(dev_priv) >= 9) {
+				u32 val = I915_READ(PSR_EVENT(cpu_transcoder));
+				bool psr2_enabled = dev_priv->psr.psr2_enabled;
+
+				I915_WRITE(PSR_EVENT(cpu_transcoder), val);
+				psr_event_print(val, psr2_enabled);
+			}
+		}
+	}
+}
+
+static bool intel_dp_get_y_coord_required(struct intel_dp *intel_dp)
 {
 	uint8_t psr_caps = 0;
 
@@ -122,6 +230,18 @@
 	return alpm_caps & DP_ALPM_CAP;
 }
 
+static u8 intel_dp_get_sink_sync_latency(struct intel_dp *intel_dp)
+{
+	u8 val = 0;
+
+	if (drm_dp_dpcd_readb(&intel_dp->aux,
+			      DP_SYNCHRONIZATION_LATENCY_IN_SINK, &val) == 1)
+		val &= DP_MAX_RESYNC_FRAME_COUNT_MASK;
+	else
+		DRM_ERROR("Unable to get sink synchronization latency\n");
+	return val;
+}
+
 void intel_psr_init_dpcd(struct intel_dp *intel_dp)
 {
 	struct drm_i915_private *dev_priv =
@@ -130,33 +250,36 @@
 	drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd,
 			 sizeof(intel_dp->psr_dpcd));
 
-	if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) {
+	if (intel_dp->psr_dpcd[0]) {
 		dev_priv->psr.sink_support = true;
 		DRM_DEBUG_KMS("Detected EDP PSR Panel.\n");
 	}
 
 	if (INTEL_GEN(dev_priv) >= 9 &&
-	    (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) {
-		uint8_t frame_sync_cap;
+	    (intel_dp->psr_dpcd[0] == DP_PSR2_WITH_Y_COORD_IS_SUPPORTED)) {
+		/*
+		 * All panels that supports PSR version 03h (PSR2 +
+		 * Y-coordinate) can handle Y-coordinates in VSC but we are
+		 * only sure that it is going to be used when required by the
+		 * panel. This way panel is capable to do selective update
+		 * without a aux frame sync.
+		 *
+		 * To support PSR version 02h and PSR version 03h without
+		 * Y-coordinate requirement panels we would need to enable
+		 * GTC first.
+		 */
+		dev_priv->psr.sink_psr2_support =
+				intel_dp_get_y_coord_required(intel_dp);
+		DRM_DEBUG_KMS("PSR2 %s on sink", dev_priv->psr.sink_psr2_support
+			      ? "supported" : "not supported");
 
-		dev_priv->psr.sink_support = true;
-		if (drm_dp_dpcd_readb(&intel_dp->aux,
-				      DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP,
-				      &frame_sync_cap) != 1)
-			frame_sync_cap = 0;
-		dev_priv->psr.aux_frame_sync = frame_sync_cap & DP_AUX_FRAME_SYNC_CAP;
-		/* PSR2 needs frame sync as well */
-		dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync;
-		DRM_DEBUG_KMS("PSR2 %s on sink",
-			      dev_priv->psr.psr2_support ? "supported" : "not supported");
-
-		if (dev_priv->psr.psr2_support) {
-			dev_priv->psr.y_cord_support =
-				intel_dp_get_y_cord_status(intel_dp);
+		if (dev_priv->psr.sink_psr2_support) {
 			dev_priv->psr.colorimetry_support =
 				intel_dp_get_colorimetry_status(intel_dp);
 			dev_priv->psr.alpm =
 				intel_dp_get_alpm_status(intel_dp);
+			dev_priv->psr.sink_sync_latency =
+				intel_dp_get_sink_sync_latency(intel_dp);
 		}
 	}
 }
@@ -193,21 +316,17 @@
 	struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
 	struct edp_vsc_psr psr_vsc;
 
-	if (dev_priv->psr.psr2_support) {
+	if (dev_priv->psr.psr2_enabled) {
 		/* Prepare VSC Header for SU as per EDP 1.4 spec, Table 6.11 */
 		memset(&psr_vsc, 0, sizeof(psr_vsc));
 		psr_vsc.sdp_header.HB0 = 0;
 		psr_vsc.sdp_header.HB1 = 0x7;
-		if (dev_priv->psr.colorimetry_support &&
-		    dev_priv->psr.y_cord_support) {
+		if (dev_priv->psr.colorimetry_support) {
 			psr_vsc.sdp_header.HB2 = 0x5;
 			psr_vsc.sdp_header.HB3 = 0x13;
-		} else if (dev_priv->psr.y_cord_support) {
+		} else {
 			psr_vsc.sdp_header.HB2 = 0x4;
 			psr_vsc.sdp_header.HB3 = 0xe;
-		} else {
-			psr_vsc.sdp_header.HB2 = 0x3;
-			psr_vsc.sdp_header.HB3 = 0xc;
 		}
 	} else {
 		/* Prepare VSC packet as per EDP 1.3 spec, Table 3.10 */
@@ -228,31 +347,12 @@
 			   DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE);
 }
 
-static i915_reg_t psr_aux_ctl_reg(struct drm_i915_private *dev_priv,
-				       enum port port)
-{
-	if (INTEL_GEN(dev_priv) >= 9)
-		return DP_AUX_CH_CTL(port);
-	else
-		return EDP_PSR_AUX_CTL;
-}
-
-static i915_reg_t psr_aux_data_reg(struct drm_i915_private *dev_priv,
-					enum port port, int index)
-{
-	if (INTEL_GEN(dev_priv) >= 9)
-		return DP_AUX_CH_DATA(port, index);
-	else
-		return EDP_PSR_AUX_DATA(index);
-}
-
-static void hsw_psr_enable_sink(struct intel_dp *intel_dp)
+static void hsw_psr_setup_aux(struct intel_dp *intel_dp)
 {
 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
-	struct drm_device *dev = dig_port->base.base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	uint32_t aux_clock_divider;
-	i915_reg_t aux_ctl_reg;
+	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
+	u32 aux_clock_divider, aux_ctl;
+	int i;
 	static const uint8_t aux_msg[] = {
 		[0] = DP_AUX_NATIVE_WRITE << 4,
 		[1] = DP_SET_POWER >> 8,
@@ -260,41 +360,47 @@
 		[3] = 1 - 1,
 		[4] = DP_SET_POWER_D0,
 	};
-	enum port port = dig_port->base.port;
-	u32 aux_ctl;
-	int i;
+	u32 psr_aux_mask = EDP_PSR_AUX_CTL_TIME_OUT_MASK |
+			   EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK |
+			   EDP_PSR_AUX_CTL_PRECHARGE_2US_MASK |
+			   EDP_PSR_AUX_CTL_BIT_CLOCK_2X_MASK;
 
 	BUILD_BUG_ON(sizeof(aux_msg) > 20);
+	for (i = 0; i < sizeof(aux_msg); i += 4)
+		I915_WRITE(EDP_PSR_AUX_DATA(i >> 2),
+			   intel_dp_pack_aux(&aux_msg[i], sizeof(aux_msg) - i));
 
 	aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, 0);
 
-	/* Enable AUX frame sync at sink */
-	if (dev_priv->psr.aux_frame_sync)
-		drm_dp_dpcd_writeb(&intel_dp->aux,
-				DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF,
-				DP_AUX_FRAME_SYNC_ENABLE);
+	/* Start with bits set for DDI_AUX_CTL register */
+	aux_ctl = intel_dp->get_aux_send_ctl(intel_dp, 0, sizeof(aux_msg),
+					     aux_clock_divider);
+
+	/* Select only valid bits for SRD_AUX_CTL */
+	aux_ctl &= psr_aux_mask;
+	I915_WRITE(EDP_PSR_AUX_CTL, aux_ctl);
+}
+
+static void hsw_psr_enable_sink(struct intel_dp *intel_dp)
+{
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = dig_port->base.base.dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	u8 dpcd_val = DP_PSR_ENABLE;
+
 	/* Enable ALPM at sink for psr2 */
-	if (dev_priv->psr.psr2_support && dev_priv->psr.alpm)
+	if (dev_priv->psr.psr2_enabled && dev_priv->psr.alpm)
 		drm_dp_dpcd_writeb(&intel_dp->aux,
 				DP_RECEIVER_ALPM_CONFIG,
 				DP_ALPM_ENABLE);
+
+	if (dev_priv->psr.psr2_enabled)
+		dpcd_val |= DP_PSR_ENABLE_PSR2;
 	if (dev_priv->psr.link_standby)
-		drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
-				   DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE);
-	else
-		drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
-				   DP_PSR_ENABLE);
+		dpcd_val |= DP_PSR_MAIN_LINK_ACTIVE;
+	drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, dpcd_val);
 
-	aux_ctl_reg = psr_aux_ctl_reg(dev_priv, port);
-
-	/* Setup AUX registers */
-	for (i = 0; i < sizeof(aux_msg); i += 4)
-		I915_WRITE(psr_aux_data_reg(dev_priv, port, i >> 2),
-			   intel_dp_pack_aux(&aux_msg[i], sizeof(aux_msg) - i));
-
-	aux_ctl = intel_dp->get_aux_send_ctl(intel_dp, 0, sizeof(aux_msg),
-					     aux_clock_divider);
-	I915_WRITE(aux_ctl_reg, aux_ctl);
+	drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, DP_SET_POWER_D0);
 }
 
 static void vlv_psr_enable_source(struct intel_dp *intel_dp,
@@ -396,25 +502,16 @@
 	 * with the 5 or 6 idle patterns.
 	 */
 	uint32_t idle_frames = max(6, dev_priv->vbt.psr.idle_frames);
-	uint32_t val;
-	uint8_t sink_latency;
-
-	val = idle_frames << EDP_PSR_IDLE_FRAME_SHIFT;
+	u32 val = idle_frames << EDP_PSR2_IDLE_FRAME_SHIFT;
 
 	/* FIXME: selective update is probably totally broken because it doesn't
 	 * mesh at all with our frontbuffer tracking. And the hw alone isn't
 	 * good enough. */
-	val |= EDP_PSR2_ENABLE |
-		EDP_SU_TRACK_ENABLE;
+	val |= EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE;
+	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
+		val |= EDP_Y_COORDINATE_ENABLE;
 
-	if (drm_dp_dpcd_readb(&intel_dp->aux,
-				DP_SYNCHRONIZATION_LATENCY_IN_SINK,
-				&sink_latency) == 1) {
-		sink_latency &= DP_MAX_RESYNC_FRAME_COUNT_MASK;
-	} else {
-		sink_latency = 0;
-	}
-	val |= EDP_PSR2_FRAME_BEFORE_SU(sink_latency + 1);
+	val |= EDP_PSR2_FRAME_BEFORE_SU(dev_priv->psr.sink_sync_latency + 1);
 
 	if (dev_priv->vbt.psr.tp2_tp3_wakeup_time > 5)
 		val |= EDP_PSR2_TP2_TIME_2500;
@@ -440,7 +537,7 @@
 	 */
 
 	/* psr1 and psr2 are mutually exclusive.*/
-	if (dev_priv->psr.psr2_support)
+	if (dev_priv->psr.psr2_enabled)
 		hsw_activate_psr2(intel_dp);
 	else
 		hsw_activate_psr1(intel_dp);
@@ -460,7 +557,7 @@
 	 * dynamically during PSR enable, and extracted from sink
 	 * caps during eDP detection.
 	 */
-	if (!dev_priv->psr.psr2_support)
+	if (!dev_priv->psr.sink_psr2_support)
 		return false;
 
 	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) {
@@ -478,15 +575,6 @@
 		return false;
 	}
 
-	/*
-	 * FIXME:enable psr2 only for y-cordinate psr2 panels
-	 * After gtc implementation , remove this restriction.
-	 */
-	if (!dev_priv->psr.y_cord_support) {
-		DRM_DEBUG_KMS("PSR2 not enabled, panel does not support Y coordinate\n");
-		return false;
-	}
-
 	return true;
 }
 
@@ -568,7 +656,7 @@
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	if (dev_priv->psr.psr2_support)
+	if (dev_priv->psr.psr2_enabled)
 		WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE);
 	else
 		WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE);
@@ -586,14 +674,24 @@
 	struct drm_device *dev = dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
-	u32 chicken;
 
 	psr_aux_io_power_get(intel_dp);
 
-	if (dev_priv->psr.psr2_support) {
-		chicken = PSR2_VSC_ENABLE_PROG_HEADER;
-		if (dev_priv->psr.y_cord_support)
-			chicken |= PSR2_ADD_VERTICAL_LINE_COUNT;
+	/* Only HSW and BDW have PSR AUX registers that need to be setup. SKL+
+	 * use hardcoded values PSR AUX transactions
+	 */
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+		hsw_psr_setup_aux(intel_dp);
+
+	if (dev_priv->psr.psr2_enabled) {
+		u32 chicken = I915_READ(CHICKEN_TRANS(cpu_transcoder));
+
+		if (INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv))
+			chicken |= (PSR2_VSC_ENABLE_PROG_HEADER
+				   | PSR2_ADD_VERTICAL_LINE_COUNT);
+
+		else
+			chicken &= ~VSC_DATA_SEL_SOFTWARE_CONTROL;
 		I915_WRITE(CHICKEN_TRANS(cpu_transcoder), chicken);
 
 		I915_WRITE(EDP_PSR_DEBUG,
@@ -613,7 +711,8 @@
 		I915_WRITE(EDP_PSR_DEBUG,
 			   EDP_PSR_DEBUG_MASK_MEMUP |
 			   EDP_PSR_DEBUG_MASK_HPD |
-			   EDP_PSR_DEBUG_MASK_LPSP);
+			   EDP_PSR_DEBUG_MASK_LPSP |
+			   EDP_PSR_DEBUG_MASK_DISP_REG_WRITE);
 	}
 }
 
@@ -644,7 +743,7 @@
 		goto unlock;
 	}
 
-	dev_priv->psr.psr2_support = crtc_state->has_psr2;
+	dev_priv->psr.psr2_enabled = crtc_state->has_psr2;
 	dev_priv->psr.busy_frontbuffer_bits = 0;
 
 	dev_priv->psr.setup_vsc(intel_dp, crtc_state);
@@ -714,12 +813,7 @@
 		i915_reg_t psr_status;
 		u32 psr_status_mask;
 
-		if (dev_priv->psr.aux_frame_sync)
-			drm_dp_dpcd_writeb(&intel_dp->aux,
-					DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF,
-					0);
-
-		if (dev_priv->psr.psr2_support) {
+		if (dev_priv->psr.psr2_enabled) {
 			psr_status = EDP_PSR2_STATUS;
 			psr_status_mask = EDP_PSR2_STATUS_STATE_MASK;
 
@@ -743,7 +837,7 @@
 
 		dev_priv->psr.active = false;
 	} else {
-		if (dev_priv->psr.psr2_support)
+		if (dev_priv->psr.psr2_enabled)
 			WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE);
 		else
 			WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE);
@@ -789,53 +883,59 @@
 	cancel_delayed_work_sync(&dev_priv->psr.work);
 }
 
+static bool psr_wait_for_idle(struct drm_i915_private *dev_priv)
+{
+	struct intel_dp *intel_dp;
+	i915_reg_t reg;
+	u32 mask;
+	int err;
+
+	intel_dp = dev_priv->psr.enabled;
+	if (!intel_dp)
+		return false;
+
+	if (HAS_DDI(dev_priv)) {
+		if (dev_priv->psr.psr2_enabled) {
+			reg = EDP_PSR2_STATUS;
+			mask = EDP_PSR2_STATUS_STATE_MASK;
+		} else {
+			reg = EDP_PSR_STATUS;
+			mask = EDP_PSR_STATUS_STATE_MASK;
+		}
+	} else {
+		struct drm_crtc *crtc =
+			dp_to_dig_port(intel_dp)->base.base.crtc;
+		enum pipe pipe = to_intel_crtc(crtc)->pipe;
+
+		reg = VLV_PSRSTAT(pipe);
+		mask = VLV_EDP_PSR_IN_TRANS;
+	}
+
+	mutex_unlock(&dev_priv->psr.lock);
+
+	err = intel_wait_for_register(dev_priv, reg, mask, 0, 50);
+	if (err)
+		DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n");
+
+	/* After the unlocked wait, verify that PSR is still wanted! */
+	mutex_lock(&dev_priv->psr.lock);
+	return err == 0 && dev_priv->psr.enabled;
+}
+
 static void intel_psr_work(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), psr.work.work);
-	struct intel_dp *intel_dp = dev_priv->psr.enabled;
-	struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc;
-	enum pipe pipe = to_intel_crtc(crtc)->pipe;
 
-	/* We have to make sure PSR is ready for re-enable
+	mutex_lock(&dev_priv->psr.lock);
+
+	/*
+	 * We have to make sure PSR is ready for re-enable
 	 * otherwise it keeps disabled until next full enable/disable cycle.
 	 * PSR might take some time to get fully disabled
 	 * and be ready for re-enable.
 	 */
-	if (HAS_DDI(dev_priv)) {
-		if (dev_priv->psr.psr2_support) {
-			if (intel_wait_for_register(dev_priv,
-						    EDP_PSR2_STATUS,
-						    EDP_PSR2_STATUS_STATE_MASK,
-						    0,
-						    50)) {
-				DRM_ERROR("Timed out waiting for PSR2 Idle for re-enable\n");
-				return;
-			}
-		} else {
-			if (intel_wait_for_register(dev_priv,
-						    EDP_PSR_STATUS,
-						    EDP_PSR_STATUS_STATE_MASK,
-						    0,
-						    50)) {
-				DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n");
-				return;
-			}
-		}
-	} else {
-		if (intel_wait_for_register(dev_priv,
-					    VLV_PSRSTAT(pipe),
-					    VLV_EDP_PSR_IN_TRANS,
-					    0,
-					    1)) {
-			DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n");
-			return;
-		}
-	}
-	mutex_lock(&dev_priv->psr.lock);
-	intel_dp = dev_priv->psr.enabled;
-
-	if (!intel_dp)
+	if (!psr_wait_for_idle(dev_priv))
 		goto unlock;
 
 	/*
@@ -846,7 +946,7 @@
 	if (dev_priv->psr.busy_frontbuffer_bits)
 		goto unlock;
 
-	intel_psr_activate(intel_dp);
+	intel_psr_activate(dev_priv->psr.enabled);
 unlock:
 	mutex_unlock(&dev_priv->psr.lock);
 }
@@ -862,11 +962,7 @@
 		return;
 
 	if (HAS_DDI(dev_priv)) {
-		if (dev_priv->psr.aux_frame_sync)
-			drm_dp_dpcd_writeb(&intel_dp->aux,
-					DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF,
-					0);
-		if (dev_priv->psr.psr2_support) {
+		if (dev_priv->psr.psr2_enabled) {
 			val = I915_READ(EDP_PSR2_CTL);
 			WARN_ON(!(val & EDP_PSR2_ENABLE));
 			I915_WRITE(EDP_PSR2_CTL, val & ~EDP_PSR2_ENABLE);
@@ -957,6 +1053,7 @@
  * intel_psr_invalidate - Invalidade PSR
  * @dev_priv: i915 device
  * @frontbuffer_bits: frontbuffer plane tracking bits
+ * @origin: which operation caused the invalidate
  *
  * Since the hardware frontbuffer tracking has gaps we need to integrate
  * with the software frontbuffer tracking. This function gets called every
@@ -966,7 +1063,7 @@
  * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits."
  */
 void intel_psr_invalidate(struct drm_i915_private *dev_priv,
-			  unsigned frontbuffer_bits)
+			  unsigned frontbuffer_bits, enum fb_op_origin origin)
 {
 	struct drm_crtc *crtc;
 	enum pipe pipe;
@@ -974,6 +1071,9 @@
 	if (!CAN_PSR(dev_priv))
 		return;
 
+	if (dev_priv->psr.has_hw_tracking && origin == ORIGIN_FLIP)
+		return;
+
 	mutex_lock(&dev_priv->psr.lock);
 	if (!dev_priv->psr.enabled) {
 		mutex_unlock(&dev_priv->psr.lock);
@@ -1014,6 +1114,9 @@
 	if (!CAN_PSR(dev_priv))
 		return;
 
+	if (dev_priv->psr.has_hw_tracking && origin == ORIGIN_FLIP)
+		return;
+
 	mutex_lock(&dev_priv->psr.lock);
 	if (!dev_priv->psr.enabled) {
 		mutex_unlock(&dev_priv->psr.lock);
@@ -1027,8 +1130,23 @@
 	dev_priv->psr.busy_frontbuffer_bits &= ~frontbuffer_bits;
 
 	/* By definition flush = invalidate + flush */
-	if (frontbuffer_bits)
-		intel_psr_exit(dev_priv);
+	if (frontbuffer_bits) {
+		if (dev_priv->psr.psr2_enabled ||
+		    IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+			intel_psr_exit(dev_priv);
+		} else {
+			/*
+			 * Display WA #0884: all
+			 * This documented WA for bxt can be safely applied
+			 * broadly so we can force HW tracking to exit PSR
+			 * instead of disabling and re-enabling.
+			 * Workaround tells us to write 0 to CUR_SURFLIVE_A,
+			 * but it makes more sense write to the current active
+			 * pipe.
+			 */
+			I915_WRITE(CURSURFLIVE(pipe), 0);
+		}
+	}
 
 	if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits)
 		if (!work_busy(&dev_priv->psr.work.work))
@@ -1055,9 +1173,12 @@
 	if (!dev_priv->psr.sink_support)
 		return;
 
-	/* Per platform default: all disabled. */
-	if (i915_modparams.enable_psr == -1)
+	if (i915_modparams.enable_psr == -1) {
+		i915_modparams.enable_psr = dev_priv->vbt.psr.enable;
+
+		/* Per platform default: all disabled. */
 		i915_modparams.enable_psr = 0;
+	}
 
 	/* Set link_standby x link_off defaults */
 	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
@@ -1090,6 +1211,7 @@
 		dev_priv->psr.activate = vlv_psr_activate;
 		dev_priv->psr.setup_vsc = vlv_psr_setup_vsc;
 	} else {
+		dev_priv->psr.has_hw_tracking = true;
 		dev_priv->psr.enable_source = hsw_psr_enable_source;
 		dev_priv->psr.disable_source = hsw_psr_disable;
 		dev_priv->psr.enable_sink = hsw_psr_enable_sink;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1d59952..8f19349 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -36,6 +36,7 @@
 #include "i915_gem_render_state.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
+#include "intel_workarounds.h"
 
 /* Rough estimate of the typical request size, performing a flush,
  * set-context and then emitting the batch.
@@ -557,7 +558,8 @@
 	 */
 	if (request) {
 		struct drm_i915_private *dev_priv = request->i915;
-		struct intel_context *ce = &request->ctx->engine[engine->id];
+		struct intel_context *ce = to_intel_context(request->ctx,
+							    engine);
 		struct i915_hw_ppgtt *ppgtt;
 
 		if (ce->state) {
@@ -599,7 +601,7 @@
 {
 	int ret;
 
-	ret = intel_ring_workarounds_emit(rq);
+	ret = intel_ctx_workarounds_emit(rq);
 	if (ret != 0)
 		return ret;
 
@@ -617,6 +619,8 @@
 	if (ret)
 		return ret;
 
+	intel_whitelist_workarounds_apply(engine);
+
 	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
 	if (IS_GEN(dev_priv, 4, 6))
 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
@@ -658,7 +662,7 @@
 	if (INTEL_GEN(dev_priv) >= 6)
 		I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
 
-	return init_workarounds_ring(engine);
+	return 0;
 }
 
 static u32 *gen6_signal(struct i915_request *rq, u32 *cs)
@@ -693,17 +697,17 @@
 	struct i915_request *request;
 	unsigned long flags;
 
-	spin_lock_irqsave(&engine->timeline->lock, flags);
+	spin_lock_irqsave(&engine->timeline.lock, flags);
 
 	/* Mark all submitted requests as skipped. */
-	list_for_each_entry(request, &engine->timeline->requests, link) {
+	list_for_each_entry(request, &engine->timeline.requests, link) {
 		GEM_BUG_ON(!request->global_seqno);
 		if (!i915_request_completed(request))
 			dma_fence_set_error(&request->fence, -EIO);
 	}
 	/* Remaining _unready_ requests will be nop'ed when submitted */
 
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static void i9xx_submit_request(struct i915_request *request)
@@ -1062,7 +1066,6 @@
 
 void intel_ring_reset(struct intel_ring *ring, u32 tail)
 {
-	GEM_BUG_ON(!list_empty(&ring->request_list));
 	ring->tail = tail;
 	ring->head = tail;
 	ring->emit = tail;
@@ -1114,19 +1117,24 @@
 }
 
 struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size)
+intel_engine_create_ring(struct intel_engine_cs *engine,
+			 struct i915_timeline *timeline,
+			 int size)
 {
 	struct intel_ring *ring;
 	struct i915_vma *vma;
 
 	GEM_BUG_ON(!is_power_of_2(size));
 	GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
+	GEM_BUG_ON(timeline == &engine->timeline);
+	lockdep_assert_held(&engine->i915->drm.struct_mutex);
 
 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
 	if (!ring)
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&ring->request_list);
+	ring->timeline = i915_timeline_get(timeline);
 
 	ring->size = size;
 	/* Workaround an erratum on the i830 which causes a hang if
@@ -1157,12 +1165,13 @@
 	i915_vma_close(ring->vma);
 	__i915_gem_object_release_unless_active(obj);
 
+	i915_timeline_put(ring->timeline);
 	kfree(ring);
 }
 
-static int context_pin(struct i915_gem_context *ctx)
+static int context_pin(struct intel_context *ce)
 {
-	struct i915_vma *vma = ctx->engine[RCS].state;
+	struct i915_vma *vma = ce->state;
 	int ret;
 
 	/*
@@ -1253,7 +1262,7 @@
 intel_ring_context_pin(struct intel_engine_cs *engine,
 		       struct i915_gem_context *ctx)
 {
-	struct intel_context *ce = &ctx->engine[engine->id];
+	struct intel_context *ce = to_intel_context(ctx, engine);
 	int ret;
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
@@ -1275,7 +1284,7 @@
 	}
 
 	if (ce->state) {
-		ret = context_pin(ctx);
+		ret = context_pin(ce);
 		if (ret)
 			goto err;
 
@@ -1296,7 +1305,7 @@
 static void intel_ring_context_unpin(struct intel_engine_cs *engine,
 				     struct i915_gem_context *ctx)
 {
-	struct intel_context *ce = &ctx->engine[engine->id];
+	struct intel_context *ce = to_intel_context(ctx, engine);
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
 	GEM_BUG_ON(ce->pin_count == 0);
@@ -1315,6 +1324,7 @@
 static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 {
 	struct intel_ring *ring;
+	struct i915_timeline *timeline;
 	int err;
 
 	intel_engine_setup_common(engine);
@@ -1323,7 +1333,14 @@
 	if (err)
 		goto err;
 
-	ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
+	timeline = i915_timeline_create(engine->i915, engine->name);
+	if (IS_ERR(timeline)) {
+		err = PTR_ERR(timeline);
+		goto err;
+	}
+
+	ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
+	i915_timeline_put(timeline);
 	if (IS_ERR(ring)) {
 		err = PTR_ERR(ring);
 		goto err;
@@ -1424,7 +1441,7 @@
 
 	*cs++ = MI_NOOP;
 	*cs++ = MI_SET_CONTEXT;
-	*cs++ = i915_ggtt_offset(rq->ctx->engine[RCS].state) | flags;
+	*cs++ = i915_ggtt_offset(to_intel_context(rq->ctx, engine)->state) | flags;
 	/*
 	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 	 * WaMiSetContext_Hang:snb,ivb,vlv
@@ -1515,7 +1532,7 @@
 		hw_flags = MI_FORCE_RESTORE;
 	}
 
-	if (to_ctx->engine[engine->id].state &&
+	if (to_intel_context(to_ctx, engine)->state &&
 	    (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) {
 		GEM_BUG_ON(engine->id != RCS);
 
@@ -1563,7 +1580,7 @@
 {
 	int ret;
 
-	GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count);
+	GEM_BUG_ON(!to_intel_context(request->ctx, request->engine)->pin_count);
 
 	/* Flush enough space to reduce the likelihood of waiting after
 	 * we start building the request - in which case we will just
@@ -1593,6 +1610,7 @@
 	if (intel_ring_update_space(ring) >= bytes)
 		return 0;
 
+	GEM_BUG_ON(list_empty(&ring->request_list));
 	list_for_each_entry(target, &ring->request_list, ring_link) {
 		/* Would completion of this request free enough space? */
 		if (bytes <= __intel_ring_space(target->postfix,
@@ -1692,17 +1710,18 @@
 		need_wrap &= ~1;
 		GEM_BUG_ON(need_wrap > ring->space);
 		GEM_BUG_ON(ring->emit + need_wrap > ring->size);
+		GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
 
 		/* Fill the tail with MI_NOOP */
-		memset(ring->vaddr + ring->emit, 0, need_wrap);
-		ring->emit = 0;
+		memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
 		ring->space -= need_wrap;
+		ring->emit = 0;
 	}
 
 	GEM_BUG_ON(ring->emit > ring->size - bytes);
 	GEM_BUG_ON(ring->space < bytes);
 	cs = ring->vaddr + ring->emit;
-	GEM_DEBUG_EXEC(memset(cs, POISON_INUSE, bytes));
+	GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
 	ring->emit += bytes;
 	ring->space -= bytes;
 
@@ -1712,22 +1731,24 @@
 /* Align the ring tail to a cacheline boundary */
 int intel_ring_cacheline_align(struct i915_request *rq)
 {
-	int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
-	u32 *cs;
+	int num_dwords;
+	void *cs;
 
+	num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
 	if (num_dwords == 0)
 		return 0;
 
-	num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords;
+	num_dwords = CACHELINE_DWORDS - num_dwords;
+	GEM_BUG_ON(num_dwords & 1);
+
 	cs = intel_ring_begin(rq, num_dwords);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
-	while (num_dwords--)
-		*cs++ = MI_NOOP;
-
+	memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
 	intel_ring_advance(rq, cs);
 
+	GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
 	return 0;
 }
 
@@ -1943,8 +1964,6 @@
 static void intel_ring_init_irq(struct drm_i915_private *dev_priv,
 				struct intel_engine_cs *engine)
 {
-	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift;
-
 	if (INTEL_GEN(dev_priv) >= 6) {
 		engine->irq_enable = gen6_irq_enable;
 		engine->irq_disable = gen6_irq_disable;
@@ -2029,6 +2048,8 @@
 	if (HAS_L3_DPF(dev_priv))
 		engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
 
+	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
+
 	if (INTEL_GEN(dev_priv) >= 6) {
 		engine->init_context = intel_rcs_ctx_init;
 		engine->emit_flush = gen7_render_ring_flush;
@@ -2079,7 +2100,6 @@
 		engine->emit_flush = gen6_bsd_ring_flush;
 		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 	} else {
-		engine->mmio_base = BSD_RING_BASE;
 		engine->emit_flush = bsd_ring_flush;
 		if (IS_GEN5(dev_priv))
 			engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0320c2c..010750e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -3,15 +3,19 @@
 #define _INTEL_RINGBUFFER_H_
 
 #include <linux/hashtable.h>
+#include <linux/seqlock.h>
 
 #include "i915_gem_batch_pool.h"
-#include "i915_gem_timeline.h"
 
+#include "i915_reg.h"
 #include "i915_pmu.h"
 #include "i915_request.h"
 #include "i915_selftest.h"
+#include "i915_timeline.h"
+#include "intel_gpu_commands.h"
 
 struct drm_printer;
+struct i915_sched_attr;
 
 #define I915_CMD_HASH_ORDER 9
 
@@ -84,7 +88,7 @@
 }
 
 #define I915_MAX_SLICES	3
-#define I915_MAX_SUBSLICES 3
+#define I915_MAX_SUBSLICES 8
 
 #define instdone_slice_mask(dev_priv__) \
 	(INTEL_GEN(dev_priv__) == 7 ? \
@@ -125,7 +129,9 @@
 	struct i915_vma *vma;
 	void *vaddr;
 
+	struct i915_timeline *timeline;
 	struct list_head request_list;
+	struct list_head active_link;
 
 	u32 head;
 	u32 tail;
@@ -330,10 +336,10 @@
 	u8 instance;
 	u32 context_size;
 	u32 mmio_base;
-	unsigned int irq_shift;
 
 	struct intel_ring *buffer;
-	struct intel_timeline *timeline;
+
+	struct i915_timeline timeline;
 
 	struct drm_i915_gem_object *default_state;
 
@@ -459,7 +465,8 @@
 	 *
 	 * Called under the struct_mutex.
 	 */
-	void		(*schedule)(struct i915_request *request, int priority);
+	void		(*schedule)(struct i915_request *request,
+				    const struct i915_sched_attr *attr);
 
 	/*
 	 * Cancel all requests on the hardware, or queued for execution.
@@ -561,6 +568,7 @@
 
 #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
+#define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 	unsigned int flags;
 
 	/*
@@ -591,7 +599,7 @@
 		/**
 		 * @lock: Lock protecting the below fields.
 		 */
-		spinlock_t lock;
+		seqlock_t lock;
 		/**
 		 * @enabled: Reference count indicating number of listeners.
 		 */
@@ -620,16 +628,29 @@
 	} stats;
 };
 
-static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
+static inline bool
+intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
 {
 	return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
 }
 
-static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine)
+static inline bool
+intel_engine_supports_stats(const struct intel_engine_cs *engine)
 {
 	return engine->flags & I915_ENGINE_SUPPORTS_STATS;
 }
 
+static inline bool
+intel_engine_has_preemption(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
+}
+
+static inline bool __execlists_need_preempt(int prio, int last)
+{
+	return prio > max(0, last);
+}
+
 static inline void
 execlists_set_active(struct intel_engine_execlists *execlists,
 		     unsigned int bit)
@@ -637,6 +658,13 @@
 	__set_bit(bit, (unsigned long *)&execlists->active);
 }
 
+static inline bool
+execlists_set_active_once(struct intel_engine_execlists *execlists,
+			  unsigned int bit)
+{
+	return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
+}
+
 static inline void
 execlists_clear_active(struct intel_engine_execlists *execlists,
 		       unsigned int bit)
@@ -651,6 +679,10 @@
 	return test_bit(bit, (unsigned long *)&execlists->active);
 }
 
+void execlists_user_begin(struct intel_engine_execlists *execlists,
+			  const struct execlist_port *port);
+void execlists_user_end(struct intel_engine_execlists *execlists);
+
 void
 execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
 
@@ -663,7 +695,7 @@
 	return execlists->port_mask + 1;
 }
 
-static inline void
+static inline struct execlist_port *
 execlists_port_complete(struct intel_engine_execlists * const execlists,
 			struct execlist_port * const port)
 {
@@ -674,6 +706,8 @@
 
 	memmove(port, port + 1, m * sizeof(struct execlist_port));
 	memset(port + m, 0, sizeof(struct execlist_port));
+
+	return port;
 }
 
 static inline unsigned int
@@ -736,7 +770,9 @@
 #define CNL_HWS_CSB_WRITE_INDEX		0x2f
 
 struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine, int size);
+intel_engine_create_ring(struct intel_engine_cs *engine,
+			 struct i915_timeline *timeline,
+			 int size);
 int intel_ring_pin(struct intel_ring *ring,
 		   struct drm_i915_private *i915,
 		   unsigned int offset_bias);
@@ -854,12 +890,9 @@
 	 * wtih serialising this hint with anything, so document it as
 	 * a hint and nothing more.
 	 */
-	return READ_ONCE(engine->timeline->seqno);
+	return READ_ONCE(engine->timeline.seqno);
 }
 
-int init_workarounds_ring(struct intel_engine_cs *engine);
-int intel_ring_workarounds_emit(struct i915_request *rq);
-
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
 			       struct intel_instdone *instdone);
 
@@ -939,7 +972,7 @@
 			   struct intel_wait *wait);
 void intel_engine_remove_wait(struct intel_engine_cs *engine,
 			      struct intel_wait *wait);
-void intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
+bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
 void intel_engine_cancel_signaling(struct i915_request *request);
 
 static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
@@ -1037,7 +1070,7 @@
 	if (READ_ONCE(engine->stats.enabled) == 0)
 		return;
 
-	spin_lock_irqsave(&engine->stats.lock, flags);
+	write_seqlock_irqsave(&engine->stats.lock, flags);
 
 	if (engine->stats.enabled > 0) {
 		if (engine->stats.active++ == 0)
@@ -1045,7 +1078,7 @@
 		GEM_BUG_ON(engine->stats.active == 0);
 	}
 
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 }
 
 static inline void intel_engine_context_out(struct intel_engine_cs *engine)
@@ -1055,7 +1088,7 @@
 	if (READ_ONCE(engine->stats.enabled) == 0)
 		return;
 
-	spin_lock_irqsave(&engine->stats.lock, flags);
+	write_seqlock_irqsave(&engine->stats.lock, flags);
 
 	if (engine->stats.enabled > 0) {
 		ktime_t last;
@@ -1082,7 +1115,7 @@
 		}
 	}
 
-	spin_unlock_irqrestore(&engine->stats.lock, flags);
+	write_sequnlock_irqrestore(&engine->stats.lock, flags);
 }
 
 int intel_enable_engine_stats(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 66de4b2..53a6eaa 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -542,6 +542,29 @@
 	dev_priv->csr.dc_state = val;
 }
 
+/**
+ * gen9_set_dc_state - set target display C power state
+ * @dev_priv: i915 device instance
+ * @state: target DC power state
+ * - DC_STATE_DISABLE
+ * - DC_STATE_EN_UPTO_DC5
+ * - DC_STATE_EN_UPTO_DC6
+ * - DC_STATE_EN_DC9
+ *
+ * Signal to DMC firmware/HW the target DC power state passed in @state.
+ * DMC/HW can turn off individual display clocks and power rails when entering
+ * a deeper DC power state (higher in number) and turns these back when exiting
+ * that state to a shallower power state (lower in number). The HW will decide
+ * when to actually enter a given state on an on-demand basis, for instance
+ * depending on the active state of display pipes. The state of display
+ * registers backed by affected power rails are saved/restored as needed.
+ *
+ * Based on the above enabling a deeper DC power state is asynchronous wrt.
+ * enabling it. Disabling a deeper power state is synchronous: for instance
+ * setting %DC_STATE_DISABLE won't complete until all HW resources are turned
+ * back on and register state is restored. This is guaranteed by the MMIO write
+ * to DC_STATE_EN blocking until the state is restored.
+ */
 static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state)
 {
 	uint32_t val;
@@ -635,7 +658,7 @@
 	assert_csr_loaded(dev_priv);
 }
 
-void skl_enable_dc6(struct drm_i915_private *dev_priv)
+static void skl_enable_dc6(struct drm_i915_private *dev_priv)
 {
 	assert_can_enable_dc6(dev_priv);
 
@@ -649,13 +672,6 @@
 	gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6);
 }
 
-void skl_disable_dc6(struct drm_i915_private *dev_priv)
-{
-	DRM_DEBUG_KMS("Disabling DC6\n");
-
-	gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
-}
-
 static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv,
 				   struct i915_power_well *power_well)
 {
@@ -2626,32 +2642,69 @@
 	mutex_unlock(&power_domains->lock);
 }
 
-static void gen9_dbuf_enable(struct drm_i915_private *dev_priv)
+static inline
+bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv,
+			  i915_reg_t reg, bool enable)
 {
-	I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST);
-	POSTING_READ(DBUF_CTL);
+	u32 val, status;
 
+	val = I915_READ(reg);
+	val = enable ? (val | DBUF_POWER_REQUEST) : (val & ~DBUF_POWER_REQUEST);
+	I915_WRITE(reg, val);
+	POSTING_READ(reg);
 	udelay(10);
 
-	if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE))
-		DRM_ERROR("DBuf power enable timeout\n");
+	status = I915_READ(reg) & DBUF_POWER_STATE;
+	if ((enable && !status) || (!enable && status)) {
+		DRM_ERROR("DBus power %s timeout!\n",
+			  enable ? "enable" : "disable");
+		return false;
+	}
+	return true;
+}
+
+static void gen9_dbuf_enable(struct drm_i915_private *dev_priv)
+{
+	intel_dbuf_slice_set(dev_priv, DBUF_CTL, true);
 }
 
 static void gen9_dbuf_disable(struct drm_i915_private *dev_priv)
 {
-	I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST);
-	POSTING_READ(DBUF_CTL);
-
-	udelay(10);
-
-	if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE)
-		DRM_ERROR("DBuf power disable timeout!\n");
+	intel_dbuf_slice_set(dev_priv, DBUF_CTL, false);
 }
 
-/*
- * TODO: we shouldn't always enable DBUF_CTL_S2, we should only enable it when
- * needed and keep it disabled as much as possible.
- */
+static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
+{
+	if (INTEL_GEN(dev_priv) < 11)
+		return 1;
+	return 2;
+}
+
+void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
+			    u8 req_slices)
+{
+	u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
+	u32 val;
+	bool ret;
+
+	if (req_slices > intel_dbuf_max_slices(dev_priv)) {
+		DRM_ERROR("Invalid number of dbuf slices requested\n");
+		return;
+	}
+
+	if (req_slices == hw_enabled_slices || req_slices == 0)
+		return;
+
+	val = I915_READ(DBUF_CTL_S2);
+	if (req_slices > hw_enabled_slices)
+		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true);
+	else
+		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, false);
+
+	if (ret)
+		dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices;
+}
+
 static void icl_dbuf_enable(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST);
@@ -2663,6 +2716,8 @@
 	if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
 	    !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
 		DRM_ERROR("DBuf power enable timeout\n");
+	else
+		dev_priv->wm.skl_hw.ddb.enabled_slices = 2;
 }
 
 static void icl_dbuf_disable(struct drm_i915_private *dev_priv)
@@ -2676,6 +2731,8 @@
 	if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) ||
 	    (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE))
 		DRM_ERROR("DBuf power disable timeout!\n");
+	else
+		dev_priv->wm.skl_hw.ddb.enabled_slices = 0;
 }
 
 static void icl_mbus_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 96e213e..2500502 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -2779,9 +2779,8 @@
 		return false;
 
 	for (i = 0; i < intel_sdvo_connector->format_supported_num; i++)
-		drm_property_add_enum(
-				intel_sdvo_connector->tv_format, i,
-				i, tv_format_names[intel_sdvo_connector->tv_format_supported[i]]);
+		drm_property_add_enum(intel_sdvo_connector->tv_format, i,
+				      tv_format_names[intel_sdvo_connector->tv_format_supported[i]]);
 
 	intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0];
 	drm_object_attach_property(&intel_sdvo_connector->base.base.base,
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index dbdcf85..ee23613f 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -48,6 +48,7 @@
 	case DRM_FORMAT_UYVY:
 	case DRM_FORMAT_VYUY:
 	case DRM_FORMAT_YVYU:
+	case DRM_FORMAT_NV12:
 		return true;
 	default:
 		return false;
@@ -130,7 +131,7 @@
 		if (scanline < min || scanline > max)
 			break;
 
-		if (timeout <= 0) {
+		if (!timeout) {
 			DRM_ERROR("Potential atomic update failure on pipe %c\n",
 				  pipe_name(crtc->pipe));
 			break;
@@ -935,20 +936,11 @@
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
 	struct drm_framebuffer *fb = state->base.fb;
-	int crtc_x, crtc_y;
-	unsigned int crtc_w, crtc_h;
-	uint32_t src_x, src_y, src_w, src_h;
-	struct drm_rect *src = &state->base.src;
-	struct drm_rect *dst = &state->base.dst;
-	struct drm_rect clip = {};
 	int max_stride = INTEL_GEN(dev_priv) >= 9 ? 32768 : 16384;
-	int hscale, vscale;
 	int max_scale, min_scale;
 	bool can_scale;
 	int ret;
-
-	*src = drm_plane_state_src(&state->base);
-	*dst = drm_plane_state_dest(&state->base);
+	uint32_t pixel_format = 0;
 
 	if (!fb) {
 		state->base.visible = false;
@@ -969,11 +961,14 @@
 
 	/* setup can_scale, min_scale, max_scale */
 	if (INTEL_GEN(dev_priv) >= 9) {
+		if (state->base.fb)
+			pixel_format = state->base.fb->format->format;
 		/* use scaler when colorkey is not required */
 		if (!state->ckey.flags) {
 			can_scale = 1;
 			min_scale = 1;
-			max_scale = skl_max_scale(crtc, crtc_state);
+			max_scale =
+				skl_max_scale(crtc, crtc_state, pixel_format);
 		} else {
 			can_scale = 0;
 			min_scale = DRM_PLANE_HELPER_NO_SCALING;
@@ -985,64 +980,19 @@
 		min_scale = plane->can_scale ? 1 : (1 << 16);
 	}
 
-	/*
-	 * FIXME the following code does a bunch of fuzzy adjustments to the
-	 * coordinates and sizes. We probably need some way to decide whether
-	 * more strict checking should be done instead.
-	 */
-	drm_rect_rotate(src, fb->width << 16, fb->height << 16,
-			state->base.rotation);
-
-	hscale = drm_rect_calc_hscale_relaxed(src, dst, min_scale, max_scale);
-	BUG_ON(hscale < 0);
-
-	vscale = drm_rect_calc_vscale_relaxed(src, dst, min_scale, max_scale);
-	BUG_ON(vscale < 0);
-
-	if (crtc_state->base.enable)
-		drm_mode_get_hv_timing(&crtc_state->base.mode,
-				       &clip.x2, &clip.y2);
-
-	state->base.visible = drm_rect_clip_scaled(src, dst, &clip, hscale, vscale);
-
-	crtc_x = dst->x1;
-	crtc_y = dst->y1;
-	crtc_w = drm_rect_width(dst);
-	crtc_h = drm_rect_height(dst);
+	ret = drm_atomic_helper_check_plane_state(&state->base,
+						  &crtc_state->base,
+						  min_scale, max_scale,
+						  true, true);
+	if (ret)
+		return ret;
 
 	if (state->base.visible) {
-		/* check again in case clipping clamped the results */
-		hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale);
-		if (hscale < 0) {
-			DRM_DEBUG_KMS("Horizontal scaling factor out of limits\n");
-			drm_rect_debug_print("src: ", src, true);
-			drm_rect_debug_print("dst: ", dst, false);
-
-			return hscale;
-		}
-
-		vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale);
-		if (vscale < 0) {
-			DRM_DEBUG_KMS("Vertical scaling factor out of limits\n");
-			drm_rect_debug_print("src: ", src, true);
-			drm_rect_debug_print("dst: ", dst, false);
-
-			return vscale;
-		}
-
-		/* Make the source viewport size an exact multiple of the scaling factors. */
-		drm_rect_adjust_size(src,
-				     drm_rect_width(dst) * hscale - drm_rect_width(src),
-				     drm_rect_height(dst) * vscale - drm_rect_height(src));
-
-		drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16,
-				    state->base.rotation);
-
-		/* sanity check to make sure the src viewport wasn't enlarged */
-		WARN_ON(src->x1 < (int) state->base.src_x ||
-			src->y1 < (int) state->base.src_y ||
-			src->x2 > (int) state->base.src_x + state->base.src_w ||
-			src->y2 > (int) state->base.src_y + state->base.src_h);
+		struct drm_rect *src = &state->base.src;
+		struct drm_rect *dst = &state->base.dst;
+		unsigned int crtc_w = drm_rect_width(dst);
+		unsigned int crtc_h = drm_rect_height(dst);
+		uint32_t src_x, src_y, src_w, src_h;
 
 		/*
 		 * Hardware doesn't handle subpixel coordinates.
@@ -1055,57 +1005,39 @@
 		src_y = src->y1 >> 16;
 		src_h = drm_rect_height(src) >> 16;
 
-		if (intel_format_is_yuv(fb->format->format)) {
-			src_x &= ~1;
-			src_w &= ~1;
-
-			/*
-			 * Must keep src and dst the
-			 * same if we can't scale.
-			 */
-			if (!can_scale)
-				crtc_w &= ~1;
-
-			if (crtc_w == 0)
-				state->base.visible = false;
-		}
-	}
-
-	/* Check size restrictions when scaling */
-	if (state->base.visible && (src_w != crtc_w || src_h != crtc_h)) {
-		unsigned int width_bytes;
-		int cpp = fb->format->cpp[0];
-
-		WARN_ON(!can_scale);
-
-		/* FIXME interlacing min height is 6 */
-
-		if (crtc_w < 3 || crtc_h < 3)
-			state->base.visible = false;
-
-		if (src_w < 3 || src_h < 3)
-			state->base.visible = false;
-
-		width_bytes = ((src_x * cpp) & 63) + src_w * cpp;
-
-		if (INTEL_GEN(dev_priv) < 9 && (src_w > 2048 || src_h > 2048 ||
-		    width_bytes > 4096 || fb->pitches[0] > 4096)) {
-			DRM_DEBUG_KMS("Source dimensions exceed hardware limits\n");
-			return -EINVAL;
-		}
-	}
-
-	if (state->base.visible) {
 		src->x1 = src_x << 16;
 		src->x2 = (src_x + src_w) << 16;
 		src->y1 = src_y << 16;
 		src->y2 = (src_y + src_h) << 16;
-	}
 
-	dst->x1 = crtc_x;
-	dst->x2 = crtc_x + crtc_w;
-	dst->y1 = crtc_y;
-	dst->y2 = crtc_y + crtc_h;
+		if (intel_format_is_yuv(fb->format->format) &&
+    		    fb->format->format != DRM_FORMAT_NV12 &&
+		    (src_x % 2 || src_w % 2)) {
+			DRM_DEBUG_KMS("src x/w (%u, %u) must be a multiple of 2 for YUV planes\n",
+				      src_x, src_w);
+			return -EINVAL;
+		}
+
+		/* Check size restrictions when scaling */
+		if (src_w != crtc_w || src_h != crtc_h) {
+			unsigned int width_bytes;
+			int cpp = fb->format->cpp[0];
+
+			WARN_ON(!can_scale);
+
+			width_bytes = ((src_x * cpp) & 63) + src_w * cpp;
+
+			/* FIXME interlacing min height is 6 */
+			if (INTEL_GEN(dev_priv) < 9 && (
+			     src_w < 3 || src_h < 3 ||
+			     src_w > 2048 || src_h > 2048 ||
+			     crtc_w < 3 || crtc_h < 3 ||
+			     width_bytes > 4096 || fb->pitches[0] > 4096)) {
+				DRM_DEBUG_KMS("Source dimensions exceed hardware limits\n");
+				return -EINVAL;
+			}
+		}
+	}
 
 	if (INTEL_GEN(dev_priv) >= 9) {
 		ret = skl_check_plane_surface(crtc_state, state);
@@ -1248,6 +1180,19 @@
 	DRM_FORMAT_VYUY,
 };
 
+static uint32_t skl_planar_formats[] = {
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_YVYU,
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_VYUY,
+	DRM_FORMAT_NV12,
+};
+
 static const uint64_t skl_plane_format_modifiers_noccs[] = {
 	I915_FORMAT_MOD_Yf_TILED,
 	I915_FORMAT_MOD_Y_TILED,
@@ -1342,6 +1287,7 @@
 	case DRM_FORMAT_YVYU:
 	case DRM_FORMAT_UYVY:
 	case DRM_FORMAT_VYUY:
+	case DRM_FORMAT_NV12:
 		if (modifier == I915_FORMAT_MOD_Yf_TILED)
 			return true;
 		/* fall through */
@@ -1441,8 +1387,14 @@
 		intel_plane->disable_plane = skl_disable_plane;
 		intel_plane->get_hw_state = skl_plane_get_hw_state;
 
-		plane_formats = skl_plane_formats;
-		num_plane_formats = ARRAY_SIZE(skl_plane_formats);
+		if (skl_plane_has_planar(dev_priv, pipe,
+					 PLANE_SPRITE0 + plane)) {
+			plane_formats = skl_planar_formats;
+			num_plane_formats = ARRAY_SIZE(skl_planar_formats);
+		} else {
+			plane_formats = skl_plane_formats;
+			num_plane_formats = ARRAY_SIZE(skl_plane_formats);
+		}
 
 		if (skl_plane_has_ccs(dev_priv, pipe, PLANE_SPRITE0 + plane))
 			modifiers = skl_plane_format_modifiers_ccs;
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index e5bf0d3..1cffaf7 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -69,13 +69,15 @@
 
 static int __get_default_guc_log_level(struct drm_i915_private *dev_priv)
 {
-	int guc_log_level = 0; /* disabled */
+	int guc_log_level;
 
-	/* Enable if we're running on platform with GuC and debug config */
-	if (HAS_GUC(dev_priv) && intel_uc_is_using_guc() &&
-	    (IS_ENABLED(CONFIG_DRM_I915_DEBUG) ||
-	     IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)))
-		guc_log_level = 1 + GUC_LOG_VERBOSITY_MAX;
+	if (!HAS_GUC(dev_priv) || !intel_uc_is_using_guc())
+		guc_log_level = GUC_LOG_LEVEL_DISABLED;
+	else if (IS_ENABLED(CONFIG_DRM_I915_DEBUG) ||
+		 IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		guc_log_level = GUC_LOG_LEVEL_MAX;
+	else
+		guc_log_level = GUC_LOG_LEVEL_NON_VERBOSE;
 
 	/* Any platform specific fine-tuning can be done here */
 
@@ -83,7 +85,7 @@
 }
 
 /**
- * intel_uc_sanitize_options - sanitize uC related modparam options
+ * sanitize_options_early - sanitize uC related modparam options
  * @dev_priv: device private
  *
  * In case of "enable_guc" option this function will attempt to modify
@@ -99,7 +101,7 @@
  * unless GuC is enabled on given platform and the driver is compiled with
  * debug config when this modparam will default to "enable(1..4)".
  */
-void intel_uc_sanitize_options(struct drm_i915_private *dev_priv)
+static void sanitize_options_early(struct drm_i915_private *dev_priv)
 {
 	struct intel_uc_fw *guc_fw = &dev_priv->guc.fw;
 	struct intel_uc_fw *huc_fw = &dev_priv->huc.fw;
@@ -142,51 +144,53 @@
 		i915_modparams.guc_log_level = 0;
 	}
 
-	if (i915_modparams.guc_log_level > 1 + GUC_LOG_VERBOSITY_MAX) {
+	if (i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX) {
 		DRM_WARN("Incompatible option detected: %s=%d, %s!\n",
 			 "guc_log_level", i915_modparams.guc_log_level,
 			 "verbosity too high");
-		i915_modparams.guc_log_level = 1 + GUC_LOG_VERBOSITY_MAX;
+		i915_modparams.guc_log_level = GUC_LOG_LEVEL_MAX;
 	}
 
-	DRM_DEBUG_DRIVER("guc_log_level=%d (enabled:%s verbosity:%d)\n",
+	DRM_DEBUG_DRIVER("guc_log_level=%d (enabled:%s, verbose:%s, verbosity:%d)\n",
 			 i915_modparams.guc_log_level,
 			 yesno(i915_modparams.guc_log_level),
-			 i915_modparams.guc_log_level - 1);
+			 yesno(GUC_LOG_LEVEL_IS_VERBOSE(i915_modparams.guc_log_level)),
+			 GUC_LOG_LEVEL_TO_VERBOSITY(i915_modparams.guc_log_level));
 
 	/* Make sure that sanitization was done */
 	GEM_BUG_ON(i915_modparams.enable_guc < 0);
 	GEM_BUG_ON(i915_modparams.guc_log_level < 0);
 }
 
-void intel_uc_init_early(struct drm_i915_private *dev_priv)
+void intel_uc_init_early(struct drm_i915_private *i915)
 {
-	intel_guc_init_early(&dev_priv->guc);
-	intel_huc_init_early(&dev_priv->huc);
+	struct intel_guc *guc = &i915->guc;
+	struct intel_huc *huc = &i915->huc;
+
+	intel_guc_init_early(guc);
+	intel_huc_init_early(huc);
+
+	sanitize_options_early(i915);
+
+	if (USES_GUC(i915))
+		intel_uc_fw_fetch(i915, &guc->fw);
+
+	if (USES_HUC(i915))
+		intel_uc_fw_fetch(i915, &huc->fw);
 }
 
-void intel_uc_init_fw(struct drm_i915_private *dev_priv)
+void intel_uc_cleanup_early(struct drm_i915_private *i915)
 {
-	if (!USES_GUC(dev_priv))
-		return;
+	struct intel_guc *guc = &i915->guc;
+	struct intel_huc *huc = &i915->huc;
 
-	if (USES_HUC(dev_priv))
-		intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw);
+	if (USES_HUC(i915))
+		intel_uc_fw_fini(&huc->fw);
 
-	intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw);
-}
+	if (USES_GUC(i915))
+		intel_uc_fw_fini(&guc->fw);
 
-void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
-{
-	if (!USES_GUC(dev_priv))
-		return;
-
-	intel_uc_fw_fini(&dev_priv->guc.fw);
-
-	if (USES_HUC(dev_priv))
-		intel_uc_fw_fini(&dev_priv->huc.fw);
-
-	guc_free_load_err_log(&dev_priv->guc);
+	guc_free_load_err_log(guc);
 }
 
 /**
@@ -223,10 +227,13 @@
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 
+	gen9_enable_guc_interrupts(dev_priv);
+
 	if (HAS_GUC_CT(dev_priv))
-		return intel_guc_enable_ct(guc);
+		return intel_guc_ct_enable(&guc->ct);
 
 	guc->send = intel_guc_send_mmio;
+	guc->handler = intel_guc_to_host_event_handler_mmio;
 	return 0;
 }
 
@@ -235,9 +242,12 @@
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 
 	if (HAS_GUC_CT(dev_priv))
-		intel_guc_disable_ct(guc);
+		intel_guc_ct_disable(&guc->ct);
+
+	gen9_disable_guc_interrupts(dev_priv);
 
 	guc->send = intel_guc_send_nop;
+	guc->handler = intel_guc_to_host_event_handler_nop;
 }
 
 int intel_uc_init_misc(struct drm_i915_private *dev_priv)
@@ -248,24 +258,13 @@
 	if (!USES_GUC(dev_priv))
 		return 0;
 
-	ret = intel_guc_init_wq(guc);
-	if (ret) {
-		DRM_ERROR("Couldn't allocate workqueues for GuC\n");
-		goto err;
-	}
+	intel_guc_init_ggtt_pin_bias(guc);
 
-	ret = intel_guc_log_relay_create(guc);
-	if (ret) {
-		DRM_ERROR("Couldn't allocate relay for GuC log\n");
-		goto err_relay;
-	}
+	ret = intel_guc_init_wq(guc);
+	if (ret)
+		return ret;
 
 	return 0;
-
-err_relay:
-	intel_guc_fini_wq(guc);
-err:
-	return ret;
 }
 
 void intel_uc_fini_misc(struct drm_i915_private *dev_priv)
@@ -276,8 +275,6 @@
 		return;
 
 	intel_guc_fini_wq(guc);
-
-	intel_guc_log_relay_destroy(guc);
 }
 
 int intel_uc_init(struct drm_i915_private *dev_priv)
@@ -325,6 +322,24 @@
 	intel_guc_fini(guc);
 }
 
+void intel_uc_sanitize(struct drm_i915_private *i915)
+{
+	struct intel_guc *guc = &i915->guc;
+	struct intel_huc *huc = &i915->huc;
+
+	if (!USES_GUC(i915))
+		return;
+
+	GEM_BUG_ON(!HAS_GUC(i915));
+
+	guc_disable_communication(guc);
+
+	intel_huc_sanitize(huc);
+	intel_guc_sanitize(guc);
+
+	__intel_uc_reset_hw(i915);
+}
+
 int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 {
 	struct intel_guc *guc = &dev_priv->guc;
@@ -336,14 +351,8 @@
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 
-	guc_disable_communication(guc);
 	gen9_reset_guc_interrupts(dev_priv);
 
-	/* init WOPCM */
-	I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
-	I915_WRITE(DMA_GUC_WOPCM_OFFSET,
-		   GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC);
-
 	/* WaEnableuKernelHeaderValidFix:skl */
 	/* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
 	if (IS_GEN9(dev_priv))
@@ -390,12 +399,9 @@
 	}
 
 	if (USES_GUC_SUBMISSION(dev_priv)) {
-		if (i915_modparams.guc_log_level)
-			gen9_enable_guc_interrupts(dev_priv);
-
 		ret = intel_guc_submission_enable(guc);
 		if (ret)
-			goto err_interrupts;
+			goto err_communication;
 	}
 
 	dev_info(dev_priv->drm.dev, "GuC firmware version %u.%u\n",
@@ -410,8 +416,6 @@
 	/*
 	 * We've failed to load the firmware :(
 	 */
-err_interrupts:
-	gen9_disable_guc_interrupts(dev_priv);
 err_communication:
 	guc_disable_communication(guc);
 err_log_capture:
@@ -441,9 +445,6 @@
 		intel_guc_submission_disable(guc);
 
 	guc_disable_communication(guc);
-
-	if (USES_GUC_SUBMISSION(dev_priv))
-		gen9_disable_guc_interrupts(dev_priv);
 }
 
 int intel_uc_suspend(struct drm_i915_private *i915)
@@ -479,8 +480,7 @@
 	if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
 		return 0;
 
-	if (i915_modparams.guc_log_level)
-		gen9_enable_guc_interrupts(i915);
+	gen9_enable_guc_interrupts(i915);
 
 	err = intel_guc_resume(guc);
 	if (err) {
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index f76d51d..25d73ad 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -28,13 +28,12 @@
 #include "intel_huc.h"
 #include "i915_params.h"
 
-void intel_uc_sanitize_options(struct drm_i915_private *dev_priv);
 void intel_uc_init_early(struct drm_i915_private *dev_priv);
+void intel_uc_cleanup_early(struct drm_i915_private *dev_priv);
 void intel_uc_init_mmio(struct drm_i915_private *dev_priv);
-void intel_uc_init_fw(struct drm_i915_private *dev_priv);
-void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
 int intel_uc_init_misc(struct drm_i915_private *dev_priv);
 void intel_uc_fini_misc(struct drm_i915_private *dev_priv);
+void intel_uc_sanitize(struct drm_i915_private *dev_priv);
 int intel_uc_init_hw(struct drm_i915_private *dev_priv);
 void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
 int intel_uc_init(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index 3ec0ce5..6e8e0b5 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -95,15 +95,6 @@
 	uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size;
 	uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
 
-	/* Header and uCode will be loaded to WOPCM */
-	size = uc_fw->header_size + uc_fw->ucode_size;
-	if (size > intel_guc_wopcm_size(dev_priv)) {
-		DRM_WARN("%s: Firmware is too large to fit in WOPCM\n",
-			 intel_uc_fw_type_repr(uc_fw->type));
-		err = -E2BIG;
-		goto fail;
-	}
-
 	/* now RSA */
 	if (css->key_size_dw != UOS_RSA_SCRATCH_COUNT) {
 		DRM_WARN("%s: Mismatched firmware RSA key size (%u)\n",
@@ -209,6 +200,7 @@
 				   struct i915_vma *vma))
 {
 	struct i915_vma *vma;
+	u32 ggtt_pin_bias;
 	int err;
 
 	DRM_DEBUG_DRIVER("%s fw load %s\n",
@@ -230,8 +222,9 @@
 		goto fail;
 	}
 
+	ggtt_pin_bias = to_i915(uc_fw->obj->base.dev)->guc.ggtt_pin_bias;
 	vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0,
-				       PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
+				       PIN_OFFSET_BIAS | ggtt_pin_bias);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n",
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
index d5fd460..87910aa 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/intel_uc_fw.h
@@ -30,7 +30,7 @@
 struct i915_vma;
 
 /* Home of GuC, HuC and DMC firmwares */
-#define INTEL_UC_FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware"
+#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915"
 
 enum intel_uc_fw_status {
 	INTEL_UC_FIRMWARE_FAIL = -1,
@@ -115,6 +115,28 @@
 	return uc_fw->path != NULL;
 }
 
+static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw)
+{
+	if (uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS)
+		uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING;
+}
+
+/**
+ * intel_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded.
+ * @uc_fw: uC firmware.
+ *
+ * Get the size of the firmware and header that will be uploaded to WOPCM.
+ *
+ * Return: Upload firmware size, or zero on firmware fetch failure.
+ */
+static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
+{
+	if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
+		return 0;
+
+	return uc_fw->header_size + uc_fw->ucode_size;
+}
+
 void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 		       struct intel_uc_fw *uc_fw);
 int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 4df7c2ef..448293e 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -62,6 +62,11 @@
 fw_domain_reset(struct drm_i915_private *i915,
 		const struct intel_uncore_forcewake_domain *d)
 {
+	/*
+	 * We don't really know if the powerwell for the forcewake domain we are
+	 * trying to reset here does exist at this point (engines could be fused
+	 * off in ICL+), so no waiting for acks
+	 */
 	__raw_i915_write32(i915, d->reg_set, i915->uncore.fw_reset);
 }
 
@@ -134,7 +139,9 @@
 	 * in the hope that the original ack will be delivered along with
 	 * the fallback ack.
 	 *
-	 * This workaround is described in HSDES #1604254524
+	 * This workaround is described in HSDES #1604254524 and it's known as:
+	 * WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl,icl
+	 * although the name is a bit misleading.
 	 */
 
 	pass = 1;
@@ -1353,6 +1360,23 @@
 	fw_domain_reset(dev_priv, d);
 }
 
+static void fw_domain_fini(struct drm_i915_private *dev_priv,
+			   enum forcewake_domain_id domain_id)
+{
+	struct intel_uncore_forcewake_domain *d;
+
+	if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT))
+		return;
+
+	d = &dev_priv->uncore.fw_domain[domain_id];
+
+	WARN_ON(d->wake_count);
+	WARN_ON(hrtimer_cancel(&d->timer));
+	memset(d, 0, sizeof(*d));
+
+	dev_priv->uncore.fw_domains &= ~BIT(domain_id);
+}
+
 static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 {
 	if (INTEL_GEN(dev_priv) <= 5 || intel_vgpu_active(dev_priv))
@@ -1372,7 +1396,8 @@
 	if (INTEL_GEN(dev_priv) >= 11) {
 		int i;
 
-		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
+		dev_priv->uncore.funcs.force_wake_get =
+			fw_domains_get_with_fallback;
 		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE_RENDER_GEN9,
@@ -1565,6 +1590,40 @@
 		&dev_priv->uncore.pmic_bus_access_nb);
 }
 
+/*
+ * We might have detected that some engines are fused off after we initialized
+ * the forcewake domains. Prune them, to make sure they only reference existing
+ * engines.
+ */
+void intel_uncore_prune(struct drm_i915_private *dev_priv)
+{
+	if (INTEL_GEN(dev_priv) >= 11) {
+		enum forcewake_domains fw_domains = dev_priv->uncore.fw_domains;
+		enum forcewake_domain_id domain_id;
+		int i;
+
+		for (i = 0; i < I915_MAX_VCS; i++) {
+			domain_id = FW_DOMAIN_ID_MEDIA_VDBOX0 + i;
+
+			if (HAS_ENGINE(dev_priv, _VCS(i)))
+				continue;
+
+			if (fw_domains & BIT(domain_id))
+				fw_domain_fini(dev_priv, domain_id);
+		}
+
+		for (i = 0; i < I915_MAX_VECS; i++) {
+			domain_id = FW_DOMAIN_ID_MEDIA_VEBOX0 + i;
+
+			if (HAS_ENGINE(dev_priv, _VECS(i)))
+				continue;
+
+			if (fw_domains & BIT(domain_id))
+				fw_domain_fini(dev_priv, domain_id);
+		}
+	}
+}
+
 void intel_uncore_fini(struct drm_i915_private *dev_priv)
 {
 	/* Paranoia: make sure we have disabled everything before we exit. */
@@ -1646,11 +1705,10 @@
 	const i915_reg_t mode = RING_MI_MODE(base);
 
 	I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING));
-	if (intel_wait_for_register_fw(dev_priv,
-				       mode,
-				       MODE_IDLE,
-				       MODE_IDLE,
-				       500))
+	if (__intel_wait_for_register_fw(dev_priv,
+					 mode, MODE_IDLE, MODE_IDLE,
+					 500, 0,
+					 NULL))
 		DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n",
 				 engine->name);
 
@@ -1804,9 +1862,10 @@
 	__raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);
 
 	/* Wait for the device to ack the reset requests */
-	err = intel_wait_for_register_fw(dev_priv,
-					  GEN6_GDRST, hw_domain_mask, 0,
-					  500);
+	err = __intel_wait_for_register_fw(dev_priv,
+					   GEN6_GDRST, hw_domain_mask, 0,
+					   500, 0,
+					   NULL);
 	if (err)
 		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
 				 hw_domain_mask);
@@ -1854,6 +1913,50 @@
 }
 
 /**
+ * gen11_reset_engines - reset individual engines
+ * @dev_priv: i915 device
+ * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset
+ *
+ * This function will reset the individual engines that are set in engine_mask.
+ * If you provide ALL_ENGINES as mask, full global domain reset will be issued.
+ *
+ * Note: It is responsibility of the caller to handle the difference between
+ * asking full domain reset versus reset for all available individual engines.
+ *
+ * Returns 0 on success, nonzero on error.
+ */
+static int gen11_reset_engines(struct drm_i915_private *dev_priv,
+			       unsigned engine_mask)
+{
+	struct intel_engine_cs *engine;
+	const u32 hw_engine_mask[I915_NUM_ENGINES] = {
+		[RCS] = GEN11_GRDOM_RENDER,
+		[BCS] = GEN11_GRDOM_BLT,
+		[VCS] = GEN11_GRDOM_MEDIA,
+		[VCS2] = GEN11_GRDOM_MEDIA2,
+		[VCS3] = GEN11_GRDOM_MEDIA3,
+		[VCS4] = GEN11_GRDOM_MEDIA4,
+		[VECS] = GEN11_GRDOM_VECS,
+		[VECS2] = GEN11_GRDOM_VECS2,
+	};
+	u32 hw_mask;
+
+	BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES);
+
+	if (engine_mask == ALL_ENGINES) {
+		hw_mask = GEN11_GRDOM_FULL;
+	} else {
+		unsigned int tmp;
+
+		hw_mask = 0;
+		for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
+			hw_mask |= hw_engine_mask[engine->id];
+	}
+
+	return gen6_hw_domain_reset(dev_priv, hw_mask);
+}
+
+/**
  * __intel_wait_for_register_fw - wait until register matches expected state
  * @dev_priv: the i915 device
  * @reg: the register to read
@@ -1940,7 +2043,7 @@
 	u32 reg_value;
 	int ret;
 
-	might_sleep();
+	might_sleep_if(slow_timeout_ms);
 
 	spin_lock_irq(&dev_priv->uncore.lock);
 	intel_uncore_forcewake_get__locked(dev_priv, fw);
@@ -1952,7 +2055,7 @@
 	intel_uncore_forcewake_put__locked(dev_priv, fw);
 	spin_unlock_irq(&dev_priv->uncore.lock);
 
-	if (ret)
+	if (ret && slow_timeout_ms)
 		ret = __wait_for(reg_value = I915_READ_NOTRACE(reg),
 				 (reg_value & mask) == value,
 				 slow_timeout_ms * 1000, 10, 1000);
@@ -1971,11 +2074,12 @@
 	I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base),
 		      _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
 
-	ret = intel_wait_for_register_fw(dev_priv,
-					 RING_RESET_CTL(engine->mmio_base),
-					 RESET_CTL_READY_TO_RESET,
-					 RESET_CTL_READY_TO_RESET,
-					 700);
+	ret = __intel_wait_for_register_fw(dev_priv,
+					   RING_RESET_CTL(engine->mmio_base),
+					   RESET_CTL_READY_TO_RESET,
+					   RESET_CTL_READY_TO_RESET,
+					   700, 0,
+					   NULL);
 	if (ret)
 		DRM_ERROR("%s: reset request timeout\n", engine->name);
 
@@ -2000,7 +2104,10 @@
 		if (gen8_reset_engine_start(engine))
 			goto not_ready;
 
-	return gen6_reset_engines(dev_priv, engine_mask);
+	if (INTEL_GEN(dev_priv) >= 11)
+		return gen11_reset_engines(dev_priv, engine_mask);
+	else
+		return gen6_reset_engines(dev_priv, engine_mask);
 
 not_ready:
 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
@@ -2038,15 +2145,31 @@
 	int retry;
 	int ret;
 
-	might_sleep();
+	/*
+	 * We want to perform per-engine reset from atomic context (e.g.
+	 * softirq), which imposes the constraint that we cannot sleep.
+	 * However, experience suggests that spending a bit of time waiting
+	 * for a reset helps in various cases, so for a full-device reset
+	 * we apply the opposite rule and wait if we want to. As we should
+	 * always follow up a failed per-engine reset with a full device reset,
+	 * being a little faster, stricter and more error prone for the
+	 * atomic case seems an acceptable compromise.
+	 *
+	 * Unfortunately this leads to a bimodal routine, when the goal was
+	 * to have a single reset function that worked for resetting any
+	 * number of engines simultaneously.
+	 */
+	might_sleep_if(engine_mask == ALL_ENGINES);
 
-	/* If the power well sleeps during the reset, the reset
+	/*
+	 * If the power well sleeps during the reset, the reset
 	 * request may be dropped and never completes (causing -EIO).
 	 */
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 	for (retry = 0; retry < 3; retry++) {
 
-		/* We stop engines, otherwise we might get failed reset and a
+		/*
+		 * We stop engines, otherwise we might get failed reset and a
 		 * dead gpu (on elk). Also as modern gpu as kbl can suffer
 		 * from system hang if batchbuffer is progressing when
 		 * the reset is issued, regardless of READY_TO_RESET ack.
@@ -2060,9 +2183,11 @@
 		i915_stop_engines(dev_priv, engine_mask);
 
 		ret = -ENODEV;
-		if (reset)
+		if (reset) {
+			GEM_TRACE("engine_mask=%x\n", engine_mask);
 			ret = reset(dev_priv, engine_mask);
-		if (ret != -ETIMEDOUT)
+		}
+		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
 			break;
 
 		cond_resched();
@@ -2085,12 +2210,14 @@
 
 int intel_reset_guc(struct drm_i915_private *dev_priv)
 {
+	u32 guc_domain = INTEL_GEN(dev_priv) >= 11 ? GEN11_GRDOM_GUC :
+						     GEN9_GRDOM_GUC;
 	int ret;
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC);
+	ret = gen6_hw_domain_reset(dev_priv, guc_domain);
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
 	return ret;
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index dfdf444..47478d6 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -140,6 +140,7 @@
 
 void intel_uncore_sanitize(struct drm_i915_private *dev_priv);
 void intel_uncore_init(struct drm_i915_private *dev_priv);
+void intel_uncore_prune(struct drm_i915_private *dev_priv);
 bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv);
 bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv);
 void intel_uncore_fini(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c
new file mode 100644
index 0000000..74bf76f
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_wopcm.c
@@ -0,0 +1,275 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017-2018 Intel Corporation
+ */
+
+#include "intel_wopcm.h"
+#include "i915_drv.h"
+
+/**
+ * DOC: WOPCM Layout
+ *
+ * The layout of the WOPCM will be fixed after writing to GuC WOPCM size and
+ * offset registers whose values are calculated and determined by HuC/GuC
+ * firmware size and set of hardware requirements/restrictions as shown below:
+ *
+ * ::
+ *
+ *    +=========> +====================+ <== WOPCM Top
+ *    ^           |  HW contexts RSVD  |
+ *    |     +===> +====================+ <== GuC WOPCM Top
+ *    |     ^     |                    |
+ *    |     |     |                    |
+ *    |     |     |                    |
+ *    |    GuC    |                    |
+ *    |   WOPCM   |                    |
+ *    |    Size   +--------------------+
+ *  WOPCM   |     |    GuC FW RSVD     |
+ *    |     |     +--------------------+
+ *    |     |     |   GuC Stack RSVD   |
+ *    |     |     +------------------- +
+ *    |     v     |   GuC WOPCM RSVD   |
+ *    |     +===> +====================+ <== GuC WOPCM base
+ *    |           |     WOPCM RSVD     |
+ *    |           +------------------- + <== HuC Firmware Top
+ *    v           |      HuC FW        |
+ *    +=========> +====================+ <== WOPCM Base
+ *
+ * GuC accessible WOPCM starts at GuC WOPCM base and ends at GuC WOPCM top.
+ * The top part of the WOPCM is reserved for hardware contexts (e.g. RC6
+ * context).
+ */
+
+/* Default WOPCM size 1MB. */
+#define GEN9_WOPCM_SIZE			(1024 * 1024)
+/* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */
+#define WOPCM_RESERVED_SIZE		(16 * 1024)
+
+/* 16KB reserved at the beginning of GuC WOPCM. */
+#define GUC_WOPCM_RESERVED		(16 * 1024)
+/* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */
+#define GUC_WOPCM_STACK_RESERVED	(8 * 1024)
+
+/* GuC WOPCM Offset value needs to be aligned to 16KB. */
+#define GUC_WOPCM_OFFSET_ALIGNMENT	(1UL << GUC_WOPCM_OFFSET_SHIFT)
+
+/* 24KB at the end of WOPCM is reserved for RC6 CTX on BXT. */
+#define BXT_WOPCM_RC6_CTX_RESERVED	(24 * 1024)
+/* 36KB WOPCM reserved at the end of WOPCM on CNL. */
+#define CNL_WOPCM_HW_CTX_RESERVED	(36 * 1024)
+
+/* 128KB from GUC_WOPCM_RESERVED is reserved for FW on Gen9. */
+#define GEN9_GUC_FW_RESERVED	(128 * 1024)
+#define GEN9_GUC_WOPCM_OFFSET	(GUC_WOPCM_RESERVED + GEN9_GUC_FW_RESERVED)
+
+/**
+ * intel_wopcm_init_early() - Early initialization of the WOPCM.
+ * @wopcm: pointer to intel_wopcm.
+ *
+ * Setup the size of WOPCM which will be used by later on WOPCM partitioning.
+ */
+void intel_wopcm_init_early(struct intel_wopcm *wopcm)
+{
+	wopcm->size = GEN9_WOPCM_SIZE;
+
+	DRM_DEBUG_DRIVER("WOPCM size: %uKiB\n", wopcm->size / 1024);
+}
+
+static inline u32 context_reserved_size(struct drm_i915_private *i915)
+{
+	if (IS_GEN9_LP(i915))
+		return BXT_WOPCM_RC6_CTX_RESERVED;
+	else if (INTEL_GEN(i915) >= 10)
+		return CNL_WOPCM_HW_CTX_RESERVED;
+	else
+		return 0;
+}
+
+static inline int gen9_check_dword_gap(u32 guc_wopcm_base, u32 guc_wopcm_size)
+{
+	u32 offset;
+
+	/*
+	 * GuC WOPCM size shall be at least a dword larger than the offset from
+	 * WOPCM base (GuC WOPCM offset from WOPCM base + GEN9_GUC_WOPCM_OFFSET)
+	 * due to hardware limitation on Gen9.
+	 */
+	offset = guc_wopcm_base + GEN9_GUC_WOPCM_OFFSET;
+	if (offset > guc_wopcm_size ||
+	    (guc_wopcm_size - offset) < sizeof(u32)) {
+		DRM_ERROR("GuC WOPCM size %uKiB is too small. %uKiB needed.\n",
+			  guc_wopcm_size / 1024,
+			  (u32)(offset + sizeof(u32)) / 1024);
+		return -E2BIG;
+	}
+
+	return 0;
+}
+
+static inline int gen9_check_huc_fw_fits(u32 guc_wopcm_size, u32 huc_fw_size)
+{
+	/*
+	 * On Gen9 & CNL A0, hardware requires the total available GuC WOPCM
+	 * size to be larger than or equal to HuC firmware size. Otherwise,
+	 * firmware uploading would fail.
+	 */
+	if (huc_fw_size > guc_wopcm_size - GUC_WOPCM_RESERVED) {
+		DRM_ERROR("HuC FW (%uKiB) won't fit in GuC WOPCM (%uKiB).\n",
+			  huc_fw_size / 1024,
+			  (guc_wopcm_size - GUC_WOPCM_RESERVED) / 1024);
+		return -E2BIG;
+	}
+
+	return 0;
+}
+
+static inline int check_hw_restriction(struct drm_i915_private *i915,
+				       u32 guc_wopcm_base, u32 guc_wopcm_size,
+				       u32 huc_fw_size)
+{
+	int err = 0;
+
+	if (IS_GEN9(i915))
+		err = gen9_check_dword_gap(guc_wopcm_base, guc_wopcm_size);
+
+	if (!err &&
+	    (IS_GEN9(i915) || IS_CNL_REVID(i915, CNL_REVID_A0, CNL_REVID_A0)))
+		err = gen9_check_huc_fw_fits(guc_wopcm_size, huc_fw_size);
+
+	return err;
+}
+
+/**
+ * intel_wopcm_init() - Initialize the WOPCM structure.
+ * @wopcm: pointer to intel_wopcm.
+ *
+ * This function will partition WOPCM space based on GuC and HuC firmware sizes
+ * and will allocate max remaining for use by GuC. This function will also
+ * enforce platform dependent hardware restrictions on GuC WOPCM offset and
+ * size. It will fail the WOPCM init if any of these checks were failed, so that
+ * the following GuC firmware uploading would be aborted.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_wopcm_init(struct intel_wopcm *wopcm)
+{
+	struct drm_i915_private *i915 = wopcm_to_i915(wopcm);
+	u32 guc_fw_size = intel_uc_fw_get_upload_size(&i915->guc.fw);
+	u32 huc_fw_size = intel_uc_fw_get_upload_size(&i915->huc.fw);
+	u32 ctx_rsvd = context_reserved_size(i915);
+	u32 guc_wopcm_base;
+	u32 guc_wopcm_size;
+	u32 guc_wopcm_rsvd;
+	int err;
+
+	GEM_BUG_ON(!wopcm->size);
+
+	if (guc_fw_size >= wopcm->size) {
+		DRM_ERROR("GuC FW (%uKiB) is too big to fit in WOPCM.",
+			  guc_fw_size / 1024);
+		return -E2BIG;
+	}
+
+	if (huc_fw_size >= wopcm->size) {
+		DRM_ERROR("HuC FW (%uKiB) is too big to fit in WOPCM.",
+			  huc_fw_size / 1024);
+		return -E2BIG;
+	}
+
+	guc_wopcm_base = ALIGN(huc_fw_size + WOPCM_RESERVED_SIZE,
+			       GUC_WOPCM_OFFSET_ALIGNMENT);
+	if ((guc_wopcm_base + ctx_rsvd) >= wopcm->size) {
+		DRM_ERROR("GuC WOPCM base (%uKiB) is too big.\n",
+			  guc_wopcm_base / 1024);
+		return -E2BIG;
+	}
+
+	guc_wopcm_size = wopcm->size - guc_wopcm_base - ctx_rsvd;
+	guc_wopcm_size &= GUC_WOPCM_SIZE_MASK;
+
+	DRM_DEBUG_DRIVER("Calculated GuC WOPCM Region: [%uKiB, %uKiB)\n",
+			 guc_wopcm_base / 1024, guc_wopcm_size / 1024);
+
+	guc_wopcm_rsvd = GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED;
+	if ((guc_fw_size + guc_wopcm_rsvd) > guc_wopcm_size) {
+		DRM_ERROR("Need %uKiB WOPCM for GuC, %uKiB available.\n",
+			  (guc_fw_size + guc_wopcm_rsvd) / 1024,
+			  guc_wopcm_size / 1024);
+		return -E2BIG;
+	}
+
+	err = check_hw_restriction(i915, guc_wopcm_base, guc_wopcm_size,
+				   huc_fw_size);
+	if (err)
+		return err;
+
+	wopcm->guc.base = guc_wopcm_base;
+	wopcm->guc.size = guc_wopcm_size;
+
+	return 0;
+}
+
+static inline int write_and_verify(struct drm_i915_private *dev_priv,
+				   i915_reg_t reg, u32 val, u32 mask,
+				   u32 locked_bit)
+{
+	u32 reg_val;
+
+	GEM_BUG_ON(val & ~mask);
+
+	I915_WRITE(reg, val);
+
+	reg_val = I915_READ(reg);
+
+	return (reg_val & mask) != (val | locked_bit) ? -EIO : 0;
+}
+
+/**
+ * intel_wopcm_init_hw() - Setup GuC WOPCM registers.
+ * @wopcm: pointer to intel_wopcm.
+ *
+ * Setup the GuC WOPCM size and offset registers with the calculated values. It
+ * will verify the register values to make sure the registers are locked with
+ * correct values.
+ *
+ * Return: 0 on success. -EIO if registers were locked with incorrect values.
+ */
+int intel_wopcm_init_hw(struct intel_wopcm *wopcm)
+{
+	struct drm_i915_private *dev_priv = wopcm_to_i915(wopcm);
+	u32 huc_agent;
+	u32 mask;
+	int err;
+
+	if (!USES_GUC(dev_priv))
+		return 0;
+
+	GEM_BUG_ON(!HAS_GUC(dev_priv));
+	GEM_BUG_ON(!wopcm->guc.size);
+	GEM_BUG_ON(!wopcm->guc.base);
+
+	err = write_and_verify(dev_priv, GUC_WOPCM_SIZE, wopcm->guc.size,
+			       GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED,
+			       GUC_WOPCM_SIZE_LOCKED);
+	if (err)
+		goto err_out;
+
+	huc_agent = USES_HUC(dev_priv) ? HUC_LOADING_AGENT_GUC : 0;
+	mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
+	err = write_and_verify(dev_priv, DMA_GUC_WOPCM_OFFSET,
+			       wopcm->guc.base | huc_agent, mask,
+			       GUC_WOPCM_OFFSET_VALID);
+	if (err)
+		goto err_out;
+
+	return 0;
+
+err_out:
+	DRM_ERROR("Failed to init WOPCM registers:\n");
+	DRM_ERROR("DMA_GUC_WOPCM_OFFSET=%#x\n",
+		  I915_READ(DMA_GUC_WOPCM_OFFSET));
+	DRM_ERROR("GUC_WOPCM_SIZE=%#x\n", I915_READ(GUC_WOPCM_SIZE));
+
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/intel_wopcm.h b/drivers/gpu/drm/i915/intel_wopcm.h
new file mode 100644
index 0000000..6298910
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_wopcm.h
@@ -0,0 +1,31 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017-2018 Intel Corporation
+ */
+
+#ifndef _INTEL_WOPCM_H_
+#define _INTEL_WOPCM_H_
+
+#include <linux/types.h>
+
+/**
+ * struct intel_wopcm - Overall WOPCM info and WOPCM regions.
+ * @size: Size of overall WOPCM.
+ * @guc: GuC WOPCM Region info.
+ * @guc.base: GuC WOPCM base which is offset from WOPCM base.
+ * @guc.size: Size of the GuC WOPCM region.
+ */
+struct intel_wopcm {
+	u32 size;
+	struct {
+		u32 base;
+		u32 size;
+	} guc;
+};
+
+void intel_wopcm_init_early(struct intel_wopcm *wopcm);
+int intel_wopcm_init(struct intel_wopcm *wopcm);
+int intel_wopcm_init_hw(struct intel_wopcm *wopcm);
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
new file mode 100644
index 0000000..2df3538
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -0,0 +1,949 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_workarounds.h"
+
+/**
+ * DOC: Hardware workarounds
+ *
+ * This file is intended as a central place to implement most [1]_ of the
+ * required workarounds for hardware to work as originally intended. They fall
+ * in five basic categories depending on how/when they are applied:
+ *
+ * - Workarounds that touch registers that are saved/restored to/from the HW
+ *   context image. The list is emitted (via Load Register Immediate commands)
+ *   everytime a new context is created.
+ * - GT workarounds. The list of these WAs is applied whenever these registers
+ *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
+ * - Display workarounds. The list is applied during display clock-gating
+ *   initialization.
+ * - Workarounds that whitelist a privileged register, so that UMDs can manage
+ *   them directly. This is just a special case of a MMMIO workaround (as we
+ *   write the list of these to/be-whitelisted registers to some special HW
+ *   registers).
+ * - Workaround batchbuffers, that get executed automatically by the hardware
+ *   on every HW context restore.
+ *
+ * .. [1] Please notice that there are other WAs that, due to their nature,
+ *    cannot be applied from a central place. Those are peppered around the rest
+ *    of the code, as needed.
+ *
+ * .. [2] Technically, some registers are powercontext saved & restored, so they
+ *    survive a suspend/resume. In practice, writing them again is not too
+ *    costly and simplifies things. We can revisit this in the future.
+ *
+ * Layout
+ * ''''''
+ *
+ * Keep things in this file ordered by WA type, as per the above (context, GT,
+ * display, register whitelist, batchbuffer). Then, inside each type, keep the
+ * following order:
+ *
+ * - Infrastructure functions and macros
+ * - WAs per platform in standard gen/chrono order
+ * - Public functions to init or apply the given workaround type.
+ */
+
+static int wa_add(struct drm_i915_private *dev_priv,
+		  i915_reg_t addr,
+		  const u32 mask, const u32 val)
+{
+	const unsigned int idx = dev_priv->workarounds.count;
+
+	if (WARN_ON(idx >= I915_MAX_WA_REGS))
+		return -ENOSPC;
+
+	dev_priv->workarounds.reg[idx].addr = addr;
+	dev_priv->workarounds.reg[idx].value = val;
+	dev_priv->workarounds.reg[idx].mask = mask;
+
+	dev_priv->workarounds.count++;
+
+	return 0;
+}
+
+#define WA_REG(addr, mask, val) do { \
+		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
+		if (r) \
+			return r; \
+	} while (0)
+
+#define WA_SET_BIT_MASKED(addr, mask) \
+	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
+
+#define WA_CLR_BIT_MASKED(addr, mask) \
+	WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
+
+#define WA_SET_FIELD_MASKED(addr, mask, value) \
+	WA_REG(addr, (mask), _MASKED_FIELD(mask, value))
+
+static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
+
+	/* WaDisableAsyncFlipPerfMode:bdw,chv */
+	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
+
+	/* WaDisablePartialInstShootdown:bdw,chv */
+	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+
+	/* Use Force Non-Coherent whenever executing a 3D context. This is a
+	 * workaround for for a possible hang in the unlikely event a TLB
+	 * invalidation occurs during a PSD flush.
+	 */
+	/* WaForceEnableNonCoherent:bdw,chv */
+	/* WaHdcDisableFetchWhenMasked:bdw,chv */
+	WA_SET_BIT_MASKED(HDC_CHICKEN0,
+			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
+			  HDC_FORCE_NON_COHERENT);
+
+	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
+	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
+	 *  polygons in the same 8x4 pixel/sample area to be processed without
+	 *  stalling waiting for the earlier ones to write to Hierarchical Z
+	 *  buffer."
+	 *
+	 * This optimization is off by default for BDW and CHV; turn it on.
+	 */
+	WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
+
+	/* Wa4x4STCOptimizationDisable:bdw,chv */
+	WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
+
+	/*
+	 * BSpec recommends 8x4 when MSAA is used,
+	 * however in practice 16x4 seems fastest.
+	 *
+	 * Note that PS/WM thread counts depend on the WIZ hashing
+	 * disable bit, which we don't touch here, but it's good
+	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+	 */
+	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
+			    GEN6_WIZ_HASHING_MASK,
+			    GEN6_WIZ_HASHING_16x4);
+
+	return 0;
+}
+
+static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	ret = gen8_ctx_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
+
+	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
+	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+
+	/* WaDisableDopClockGating:bdw
+	 *
+	 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
+	 * to disable EUTC clock gating.
+	 */
+	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
+			  DOP_CLOCK_GATING_DISABLE);
+
+	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
+			  GEN8_SAMPLER_POWER_BYPASS_DIS);
+
+	WA_SET_BIT_MASKED(HDC_CHICKEN0,
+			  /* WaForceContextSaveRestoreNonCoherent:bdw */
+			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
+			  /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
+			  (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
+
+	return 0;
+}
+
+static int chv_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	ret = gen8_ctx_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
+
+	/* WaDisableThreadStallDopClockGating:chv */
+	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+
+	/* Improve HiZ throughput on CHV. */
+	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
+
+	return 0;
+}
+
+static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	if (HAS_LLC(dev_priv)) {
+		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
+		 *
+		 * Must match Display Engine. See
+		 * WaCompressedResourceDisplayNewHashMode.
+		 */
+		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+				  GEN9_PBE_COMPRESSED_HASH_SELECTION);
+		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
+				  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
+	}
+
+	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
+	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
+	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+			  FLOW_CONTROL_ENABLE |
+			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+
+	/* Syncing dependencies between camera and graphics:skl,bxt,kbl */
+	if (!IS_COFFEELAKE(dev_priv))
+		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
+				  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
+
+	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
+	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
+	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
+			  GEN9_ENABLE_YV12_BUGFIX |
+			  GEN9_ENABLE_GPGPU_PREEMPTION);
+
+	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
+	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
+	WA_SET_BIT_MASKED(CACHE_MODE_1,
+			  GEN8_4x4_STC_OPTIMIZATION_DISABLE |
+			  GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
+
+	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
+	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
+			  GEN9_CCS_TLB_PREFETCH_ENABLE);
+
+	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
+	WA_SET_BIT_MASKED(HDC_CHICKEN0,
+			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
+			  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
+
+	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
+	 * both tied to WaForceContextSaveRestoreNonCoherent
+	 * in some hsds for skl. We keep the tie for all gen9. The
+	 * documentation is a bit hazy and so we want to get common behaviour,
+	 * even though there is no clear evidence we would need both on kbl/bxt.
+	 * This area has been source of system hangs so we play it safe
+	 * and mimic the skl regardless of what bspec says.
+	 *
+	 * Use Force Non-Coherent whenever executing a 3D context. This
+	 * is a workaround for a possible hang in the unlikely event
+	 * a TLB invalidation occurs during a PSD flush.
+	 */
+
+	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
+	WA_SET_BIT_MASKED(HDC_CHICKEN0,
+			  HDC_FORCE_NON_COHERENT);
+
+	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
+	if (IS_SKYLAKE(dev_priv) ||
+	    IS_KABYLAKE(dev_priv) ||
+	    IS_COFFEELAKE(dev_priv))
+		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
+				  GEN8_SAMPLER_POWER_BYPASS_DIS);
+
+	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
+	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
+
+	/*
+	 * Supporting preemption with fine-granularity requires changes in the
+	 * batch buffer programming. Since we can't break old userspace, we
+	 * need to set our default preemption level to safe value. Userspace is
+	 * still able to use more fine-grained preemption levels, since in
+	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
+	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
+	 * not real HW workarounds, but merely a way to start using preemption
+	 * while maintaining old contract with userspace.
+	 */
+
+	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
+	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
+
+	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
+	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
+			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
+
+	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
+	if (IS_GEN9_LP(dev_priv))
+		WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
+
+	return 0;
+}
+
+static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv)
+{
+	u8 vals[3] = { 0, 0, 0 };
+	unsigned int i;
+
+	for (i = 0; i < 3; i++) {
+		u8 ss;
+
+		/*
+		 * Only consider slices where one, and only one, subslice has 7
+		 * EUs
+		 */
+		if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
+			continue;
+
+		/*
+		 * subslice_7eu[i] != 0 (because of the check above) and
+		 * ss_max == 4 (maximum number of subslices possible per slice)
+		 *
+		 * ->    0 <= ss <= 3;
+		 */
+		ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
+		vals[i] = 3 - ss;
+	}
+
+	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
+		return 0;
+
+	/* Tune IZ hashing. See intel_device_info_runtime_init() */
+	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
+			    GEN9_IZ_HASHING_MASK(2) |
+			    GEN9_IZ_HASHING_MASK(1) |
+			    GEN9_IZ_HASHING_MASK(0),
+			    GEN9_IZ_HASHING(2, vals[2]) |
+			    GEN9_IZ_HASHING(1, vals[1]) |
+			    GEN9_IZ_HASHING(0, vals[0]));
+
+	return 0;
+}
+
+static int skl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	ret = gen9_ctx_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
+
+	return skl_tune_iz_hashing(dev_priv);
+}
+
+static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	ret = gen9_ctx_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
+
+	/* WaDisableThreadStallDopClockGating:bxt */
+	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
+			  STALL_DOP_GATING_DISABLE);
+
+	/* WaToEnableHwFixForPushConstHWBug:bxt */
+	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+	return 0;
+}
+
+static int kbl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	ret = gen9_ctx_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
+
+	/* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
+	if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
+		WA_SET_BIT_MASKED(HDC_CHICKEN0,
+				  HDC_FENCE_DEST_SLM_DISABLE);
+
+	/* WaToEnableHwFixForPushConstHWBug:kbl */
+	if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
+		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+	/* WaDisableSbeCacheDispatchPortSharing:kbl */
+	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
+			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+
+	return 0;
+}
+
+static int glk_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	ret = gen9_ctx_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
+
+	/* WaToEnableHwFixForPushConstHWBug:glk */
+	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+	return 0;
+}
+
+static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	ret = gen9_ctx_workarounds_init(dev_priv);
+	if (ret)
+		return ret;
+
+	/* WaToEnableHwFixForPushConstHWBug:cfl */
+	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+	/* WaDisableSbeCacheDispatchPortSharing:cfl */
+	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
+			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+
+	return 0;
+}
+
+static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	/* WaForceContextSaveRestoreNonCoherent:cnl */
+	WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
+			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
+
+	/* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
+	if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
+		WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
+
+	/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
+	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
+
+	/* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
+	if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0))
+		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
+
+	/* WaPushConstantDereferenceHoldDisable:cnl */
+	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
+
+	/* FtrEnableFastAnisoL1BankingFix:cnl */
+	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
+
+	/* WaDisable3DMidCmdPreemption:cnl */
+	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
+
+	/* WaDisableGPGPUMidCmdPreemption:cnl */
+	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
+			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
+			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
+
+	/* WaDisableEarlyEOT:cnl */
+	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
+
+	return 0;
+}
+
+static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	/* Wa_1604370585:icl (pre-prod)
+	 * Formerly known as WaPushConstantDereferenceHoldDisable
+	 */
+	if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
+		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
+				  PUSH_CONSTANT_DEREF_DISABLE);
+
+	/* WaForceEnableNonCoherent:icl
+	 * This is not the same workaround as in early Gen9 platforms, where
+	 * lacking this could cause system hangs, but coherency performance
+	 * overhead is high and only a few compute workloads really need it
+	 * (the register is whitelisted in hardware now, so UMDs can opt in
+	 * for coherency if they have a good reason).
+	 */
+	WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
+
+	return 0;
+}
+
+int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
+{
+	int err = 0;
+
+	dev_priv->workarounds.count = 0;
+
+	if (INTEL_GEN(dev_priv) < 8)
+		err = 0;
+	else if (IS_BROADWELL(dev_priv))
+		err = bdw_ctx_workarounds_init(dev_priv);
+	else if (IS_CHERRYVIEW(dev_priv))
+		err = chv_ctx_workarounds_init(dev_priv);
+	else if (IS_SKYLAKE(dev_priv))
+		err = skl_ctx_workarounds_init(dev_priv);
+	else if (IS_BROXTON(dev_priv))
+		err = bxt_ctx_workarounds_init(dev_priv);
+	else if (IS_KABYLAKE(dev_priv))
+		err = kbl_ctx_workarounds_init(dev_priv);
+	else if (IS_GEMINILAKE(dev_priv))
+		err = glk_ctx_workarounds_init(dev_priv);
+	else if (IS_COFFEELAKE(dev_priv))
+		err = cfl_ctx_workarounds_init(dev_priv);
+	else if (IS_CANNONLAKE(dev_priv))
+		err = cnl_ctx_workarounds_init(dev_priv);
+	else if (IS_ICELAKE(dev_priv))
+		err = icl_ctx_workarounds_init(dev_priv);
+	else
+		MISSING_CASE(INTEL_GEN(dev_priv));
+	if (err)
+		return err;
+
+	DRM_DEBUG_DRIVER("Number of context specific w/a: %d\n",
+			 dev_priv->workarounds.count);
+	return 0;
+}
+
+int intel_ctx_workarounds_emit(struct i915_request *rq)
+{
+	struct i915_workarounds *w = &rq->i915->workarounds;
+	u32 *cs;
+	int ret, i;
+
+	if (w->count == 0)
+		return 0;
+
+	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
+	if (ret)
+		return ret;
+
+	cs = intel_ring_begin(rq, (w->count * 2 + 2));
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(w->count);
+	for (i = 0; i < w->count; i++) {
+		*cs++ = i915_mmio_reg_offset(w->reg[i].addr);
+		*cs++ = w->reg[i].value;
+	}
+	*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+
+	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void bdw_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+}
+
+static void chv_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+}
+
+static void gen9_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+	/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
+	I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
+		   _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
+
+	/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
+	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
+		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
+
+	/* WaDisableKillLogic:bxt,skl,kbl */
+	if (!IS_COFFEELAKE(dev_priv))
+		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
+			   ECOCHK_DIS_TLB);
+
+	if (HAS_LLC(dev_priv)) {
+		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
+		 *
+		 * Must match Display Engine. See
+		 * WaCompressedResourceDisplayNewHashMode.
+		 */
+		I915_WRITE(MMCD_MISC_CTRL,
+			   I915_READ(MMCD_MISC_CTRL) |
+			   MMCD_PCLA |
+			   MMCD_HOTSPOT_EN);
+	}
+
+	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
+	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
+		   BDW_DISABLE_HDC_INVALIDATION);
+
+	/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
+	if (IS_GEN9_LP(dev_priv)) {
+		u32 val = I915_READ(GEN8_L3SQCREG1);
+
+		val &= ~L3_PRIO_CREDITS_MASK;
+		val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
+		I915_WRITE(GEN8_L3SQCREG1, val);
+	}
+
+	/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
+	I915_WRITE(GEN8_L3SQCREG4,
+		   I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES);
+
+	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
+	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
+		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
+}
+
+static void skl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+	gen9_gt_workarounds_apply(dev_priv);
+
+	/* WaEnableGapsTsvCreditFix:skl */
+	I915_WRITE(GEN8_GARBCNTL,
+		   I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
+
+	/* WaDisableGafsUnitClkGating:skl */
+	I915_WRITE(GEN7_UCGCTL4,
+		   I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaInPlaceDecompressionHang:skl */
+	if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
+		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
+			   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
+			   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+}
+
+static void bxt_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+	gen9_gt_workarounds_apply(dev_priv);
+
+	/* WaDisablePooledEuLoadBalancingFix:bxt */
+	I915_WRITE(FF_SLICE_CS_CHICKEN2,
+		   _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
+
+	/* WaInPlaceDecompressionHang:bxt */
+	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
+		   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
+		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+}
+
+static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+	gen9_gt_workarounds_apply(dev_priv);
+
+	/* WaEnableGapsTsvCreditFix:kbl */
+	I915_WRITE(GEN8_GARBCNTL,
+		   I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
+
+	/* WaDisableDynamicCreditSharing:kbl */
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+		I915_WRITE(GAMT_CHKN_BIT_REG,
+			   I915_READ(GAMT_CHKN_BIT_REG) |
+			   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
+
+	/* WaDisableGafsUnitClkGating:kbl */
+	I915_WRITE(GEN7_UCGCTL4,
+		   I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaInPlaceDecompressionHang:kbl */
+	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
+		   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
+		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+}
+
+static void glk_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+	gen9_gt_workarounds_apply(dev_priv);
+}
+
+static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+	gen9_gt_workarounds_apply(dev_priv);
+
+	/* WaEnableGapsTsvCreditFix:cfl */
+	I915_WRITE(GEN8_GARBCNTL,
+		   I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
+
+	/* WaDisableGafsUnitClkGating:cfl */
+	I915_WRITE(GEN7_UCGCTL4,
+		   I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
+
+	/* WaInPlaceDecompressionHang:cfl */
+	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
+		   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
+		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+}
+
+static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
+	if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
+		I915_WRITE(GAMT_CHKN_BIT_REG,
+			   I915_READ(GAMT_CHKN_BIT_REG) |
+			   GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
+
+	/* WaInPlaceDecompressionHang:cnl */
+	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
+		   I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
+		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+
+	/* WaEnablePreemptionGranularityControlByUMD:cnl */
+	I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
+		   _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
+}
+
+static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+	/* This is not an Wa. Enable for better image quality */
+	I915_WRITE(_3D_CHICKEN3,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
+
+	/* WaInPlaceDecompressionHang:icl */
+	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
+					    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+
+	/* WaPipelineFlushCoherentLines:icl */
+	I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
+				   GEN8_LQSC_FLUSH_COHERENT_LINES);
+
+	/* Wa_1405543622:icl
+	 * Formerly known as WaGAPZPriorityScheme
+	 */
+	I915_WRITE(GEN8_GARBCNTL, I915_READ(GEN8_GARBCNTL) |
+				  GEN11_ARBITRATION_PRIO_ORDER_MASK);
+
+	/* Wa_1604223664:icl
+	 * Formerly known as WaL3BankAddressHashing
+	 */
+	I915_WRITE(GEN8_GARBCNTL,
+		   (I915_READ(GEN8_GARBCNTL) & ~GEN11_HASH_CTRL_EXCL_MASK) |
+		   GEN11_HASH_CTRL_EXCL_BIT0);
+	I915_WRITE(GEN11_GLBLINVL,
+		   (I915_READ(GEN11_GLBLINVL) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK) |
+		   GEN11_BANK_HASH_ADDR_EXCL_BIT0);
+
+	/* WaModifyGamTlbPartitioning:icl */
+	I915_WRITE(GEN11_GACB_PERF_CTRL,
+		   (I915_READ(GEN11_GACB_PERF_CTRL) & ~GEN11_HASH_CTRL_MASK) |
+		   GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
+
+	/* Wa_1405733216:icl
+	 * Formerly known as WaDisableCleanEvicts
+	 */
+	I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
+				   GEN11_LQSC_CLEAN_EVICT_DISABLE);
+
+	/* Wa_1405766107:icl
+	 * Formerly known as WaCL2SFHalfMaxAlloc
+	 */
+	I915_WRITE(GEN11_LSN_UNSLCVC, I915_READ(GEN11_LSN_UNSLCVC) |
+				      GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
+				      GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
+
+	/* Wa_220166154:icl
+	 * Formerly known as WaDisCtxReload
+	 */
+	I915_WRITE(GAMW_ECO_DEV_RW_IA_REG, I915_READ(GAMW_ECO_DEV_RW_IA_REG) |
+					   GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
+
+	/* Wa_1405779004:icl (pre-prod) */
+	if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
+		I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE,
+			   I915_READ(SLICE_UNIT_LEVEL_CLKGATE) |
+			   MSCUNIT_CLKGATE_DIS);
+
+	/* Wa_1406680159:icl */
+	I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE,
+		   I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE) |
+		   GWUNIT_CLKGATE_DIS);
+
+	/* Wa_1604302699:icl */
+	I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER,
+		   I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER) |
+		   GEN11_I2M_WRITE_DISABLE);
+
+	/* Wa_1406838659:icl (pre-prod) */
+	if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
+		I915_WRITE(INF_UNIT_LEVEL_CLKGATE,
+			   I915_READ(INF_UNIT_LEVEL_CLKGATE) |
+			   CGPSF_CLKGATE_DIS);
+
+	/* WaForwardProgressSoftReset:icl */
+	I915_WRITE(GEN10_SCRATCH_LNCF2,
+		   I915_READ(GEN10_SCRATCH_LNCF2) |
+		   PMFLUSHDONE_LNICRSDROP |
+		   PMFLUSH_GAPL3UNBLOCK |
+		   PMFLUSHDONE_LNEBLK);
+}
+
+void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
+{
+	if (INTEL_GEN(dev_priv) < 8)
+		return;
+	else if (IS_BROADWELL(dev_priv))
+		bdw_gt_workarounds_apply(dev_priv);
+	else if (IS_CHERRYVIEW(dev_priv))
+		chv_gt_workarounds_apply(dev_priv);
+	else if (IS_SKYLAKE(dev_priv))
+		skl_gt_workarounds_apply(dev_priv);
+	else if (IS_BROXTON(dev_priv))
+		bxt_gt_workarounds_apply(dev_priv);
+	else if (IS_KABYLAKE(dev_priv))
+		kbl_gt_workarounds_apply(dev_priv);
+	else if (IS_GEMINILAKE(dev_priv))
+		glk_gt_workarounds_apply(dev_priv);
+	else if (IS_COFFEELAKE(dev_priv))
+		cfl_gt_workarounds_apply(dev_priv);
+	else if (IS_CANNONLAKE(dev_priv))
+		cnl_gt_workarounds_apply(dev_priv);
+	else if (IS_ICELAKE(dev_priv))
+		icl_gt_workarounds_apply(dev_priv);
+	else
+		MISSING_CASE(INTEL_GEN(dev_priv));
+}
+
+struct whitelist {
+	i915_reg_t reg[RING_MAX_NONPRIV_SLOTS];
+	unsigned int count;
+	u32 nopid;
+};
+
+static void whitelist_reg(struct whitelist *w, i915_reg_t reg)
+{
+	if (GEM_WARN_ON(w->count >= RING_MAX_NONPRIV_SLOTS))
+		return;
+
+	w->reg[w->count++] = reg;
+}
+
+static void bdw_whitelist_build(struct whitelist *w)
+{
+}
+
+static void chv_whitelist_build(struct whitelist *w)
+{
+}
+
+static void gen9_whitelist_build(struct whitelist *w)
+{
+	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
+	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
+
+	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
+	whitelist_reg(w, GEN8_CS_CHICKEN1);
+
+	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
+	whitelist_reg(w, GEN8_HDC_CHICKEN1);
+}
+
+static void skl_whitelist_build(struct whitelist *w)
+{
+	gen9_whitelist_build(w);
+
+	/* WaDisableLSQCROPERFforOCL:skl */
+	whitelist_reg(w, GEN8_L3SQCREG4);
+}
+
+static void bxt_whitelist_build(struct whitelist *w)
+{
+	gen9_whitelist_build(w);
+}
+
+static void kbl_whitelist_build(struct whitelist *w)
+{
+	gen9_whitelist_build(w);
+
+	/* WaDisableLSQCROPERFforOCL:kbl */
+	whitelist_reg(w, GEN8_L3SQCREG4);
+}
+
+static void glk_whitelist_build(struct whitelist *w)
+{
+	gen9_whitelist_build(w);
+
+	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
+	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
+}
+
+static void cfl_whitelist_build(struct whitelist *w)
+{
+	gen9_whitelist_build(w);
+}
+
+static void cnl_whitelist_build(struct whitelist *w)
+{
+	/* WaEnablePreemptionGranularityControlByUMD:cnl */
+	whitelist_reg(w, GEN8_CS_CHICKEN1);
+}
+
+static void icl_whitelist_build(struct whitelist *w)
+{
+}
+
+static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
+					 struct whitelist *w)
+{
+	struct drm_i915_private *i915 = engine->i915;
+
+	GEM_BUG_ON(engine->id != RCS);
+
+	w->count = 0;
+	w->nopid = i915_mmio_reg_offset(RING_NOPID(engine->mmio_base));
+
+	if (INTEL_GEN(i915) < 8)
+		return NULL;
+	else if (IS_BROADWELL(i915))
+		bdw_whitelist_build(w);
+	else if (IS_CHERRYVIEW(i915))
+		chv_whitelist_build(w);
+	else if (IS_SKYLAKE(i915))
+		skl_whitelist_build(w);
+	else if (IS_BROXTON(i915))
+		bxt_whitelist_build(w);
+	else if (IS_KABYLAKE(i915))
+		kbl_whitelist_build(w);
+	else if (IS_GEMINILAKE(i915))
+		glk_whitelist_build(w);
+	else if (IS_COFFEELAKE(i915))
+		cfl_whitelist_build(w);
+	else if (IS_CANNONLAKE(i915))
+		cnl_whitelist_build(w);
+	else if (IS_ICELAKE(i915))
+		icl_whitelist_build(w);
+	else
+		MISSING_CASE(INTEL_GEN(i915));
+
+	return w;
+}
+
+static void whitelist_apply(struct intel_engine_cs *engine,
+			    const struct whitelist *w)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	const u32 base = engine->mmio_base;
+	unsigned int i;
+
+	if (!w)
+		return;
+
+	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
+
+	for (i = 0; i < w->count; i++)
+		I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i),
+			      i915_mmio_reg_offset(w->reg[i]));
+
+	/* And clear the rest just in case of garbage */
+	for (; i < RING_MAX_NONPRIV_SLOTS; i++)
+		I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), w->nopid);
+
+	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
+}
+
+void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine)
+{
+	struct whitelist w;
+
+	whitelist_apply(engine, whitelist_build(engine, &w));
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/intel_workarounds.c"
+#endif
diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h
new file mode 100644
index 0000000..b11d0623
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_workarounds.h
@@ -0,0 +1,17 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef _I915_WORKAROUNDS_H_
+#define _I915_WORKAROUNDS_H_
+
+int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv);
+int intel_ctx_workarounds_emit(struct i915_request *rq);
+
+void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv);
+
+void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine);
+
+#endif
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 05bbef3..91c7291 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1091,7 +1091,7 @@
 out_vma_unpin:
 	i915_vma_unpin(vma);
 out_vma_close:
-	i915_vma_close(vma);
+	i915_vma_destroy(vma);
 
 	return err;
 }
@@ -1757,6 +1757,9 @@
 		goto out_unlock;
 	}
 
+	if (ctx->ppgtt)
+		ctx->ppgtt->base.scrub_64K = true;
+
 	err = i915_subtests(tests, ctx);
 
 out_unlock:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 7ecaed5..ddb03f0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -23,6 +23,7 @@
  */
 
 #include "../i915_selftest.h"
+#include "igt_flush_test.h"
 
 #include "mock_drm.h"
 #include "huge_gem_object.h"
@@ -411,6 +412,8 @@
 	}
 
 out_unlock:
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	mock_file_free(i915, file);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
deleted file mode 100644
index 3000e6a..0000000
--- a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Copyright © 2017 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "../i915_selftest.h"
-#include "i915_random.h"
-
-#include "mock_gem_device.h"
-#include "mock_timeline.h"
-
-struct __igt_sync {
-	const char *name;
-	u32 seqno;
-	bool expected;
-	bool set;
-};
-
-static int __igt_sync(struct intel_timeline *tl,
-		      u64 ctx,
-		      const struct __igt_sync *p,
-		      const char *name)
-{
-	int ret;
-
-	if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
-		pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
-		       name, p->name, ctx, p->seqno, yesno(p->expected));
-		return -EINVAL;
-	}
-
-	if (p->set) {
-		ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int igt_sync(void *arg)
-{
-	const struct __igt_sync pass[] = {
-		{ "unset", 0, false, false },
-		{ "new", 0, false, true },
-		{ "0a", 0, true, true },
-		{ "1a", 1, false, true },
-		{ "1b", 1, true, true },
-		{ "0b", 0, true, false },
-		{ "2a", 2, false, true },
-		{ "4", 4, false, true },
-		{ "INT_MAX", INT_MAX, false, true },
-		{ "INT_MAX-1", INT_MAX-1, true, false },
-		{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
-		{ "INT_MAX", INT_MAX, true, false },
-		{ "UINT_MAX", UINT_MAX, false, true },
-		{ "wrap", 0, false, true },
-		{ "unwrap", UINT_MAX, true, false },
-		{},
-	}, *p;
-	struct intel_timeline *tl;
-	int order, offset;
-	int ret = -ENODEV;
-
-	tl = mock_timeline(0);
-	if (!tl)
-		return -ENOMEM;
-
-	for (p = pass; p->name; p++) {
-		for (order = 1; order < 64; order++) {
-			for (offset = -1; offset <= (order > 1); offset++) {
-				u64 ctx = BIT_ULL(order) + offset;
-
-				ret = __igt_sync(tl, ctx, p, "1");
-				if (ret)
-					goto out;
-			}
-		}
-	}
-	mock_timeline_destroy(tl);
-
-	tl = mock_timeline(0);
-	if (!tl)
-		return -ENOMEM;
-
-	for (order = 1; order < 64; order++) {
-		for (offset = -1; offset <= (order > 1); offset++) {
-			u64 ctx = BIT_ULL(order) + offset;
-
-			for (p = pass; p->name; p++) {
-				ret = __igt_sync(tl, ctx, p, "2");
-				if (ret)
-					goto out;
-			}
-		}
-	}
-
-out:
-	mock_timeline_destroy(tl);
-	return ret;
-}
-
-static unsigned int random_engine(struct rnd_state *rnd)
-{
-	return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
-}
-
-static int bench_sync(void *arg)
-{
-	struct rnd_state prng;
-	struct intel_timeline *tl;
-	unsigned long end_time, count;
-	u64 prng32_1M;
-	ktime_t kt;
-	int order, last_order;
-
-	tl = mock_timeline(0);
-	if (!tl)
-		return -ENOMEM;
-
-	/* Lookups from cache are very fast and so the random number generation
-	 * and the loop itself becomes a significant factor in the per-iteration
-	 * timings. We try to compensate the results by measuring the overhead
-	 * of the prng and subtract it from the reported results.
-	 */
-	prandom_seed_state(&prng, i915_selftest.random_seed);
-	count = 0;
-	kt = ktime_get();
-	end_time = jiffies + HZ/10;
-	do {
-		u32 x;
-
-		/* Make sure the compiler doesn't optimise away the prng call */
-		WRITE_ONCE(x, prandom_u32_state(&prng));
-
-		count++;
-	} while (!time_after(jiffies, end_time));
-	kt = ktime_sub(ktime_get(), kt);
-	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
-		 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
-	prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
-
-	/* Benchmark (only) setting random context ids */
-	prandom_seed_state(&prng, i915_selftest.random_seed);
-	count = 0;
-	kt = ktime_get();
-	end_time = jiffies + HZ/10;
-	do {
-		u64 id = i915_prandom_u64_state(&prng);
-
-		__intel_timeline_sync_set(tl, id, 0);
-		count++;
-	} while (!time_after(jiffies, end_time));
-	kt = ktime_sub(ktime_get(), kt);
-	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
-	pr_info("%s: %lu random insertions, %lluns/insert\n",
-		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
-
-	/* Benchmark looking up the exact same context ids as we just set */
-	prandom_seed_state(&prng, i915_selftest.random_seed);
-	end_time = count;
-	kt = ktime_get();
-	while (end_time--) {
-		u64 id = i915_prandom_u64_state(&prng);
-
-		if (!__intel_timeline_sync_is_later(tl, id, 0)) {
-			mock_timeline_destroy(tl);
-			pr_err("Lookup of %llu failed\n", id);
-			return -EINVAL;
-		}
-	}
-	kt = ktime_sub(ktime_get(), kt);
-	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
-	pr_info("%s: %lu random lookups, %lluns/lookup\n",
-		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
-
-	mock_timeline_destroy(tl);
-	cond_resched();
-
-	tl = mock_timeline(0);
-	if (!tl)
-		return -ENOMEM;
-
-	/* Benchmark setting the first N (in order) contexts */
-	count = 0;
-	kt = ktime_get();
-	end_time = jiffies + HZ/10;
-	do {
-		__intel_timeline_sync_set(tl, count++, 0);
-	} while (!time_after(jiffies, end_time));
-	kt = ktime_sub(ktime_get(), kt);
-	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
-		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
-
-	/* Benchmark looking up the exact same context ids as we just set */
-	end_time = count;
-	kt = ktime_get();
-	while (end_time--) {
-		if (!__intel_timeline_sync_is_later(tl, end_time, 0)) {
-			pr_err("Lookup of %lu failed\n", end_time);
-			mock_timeline_destroy(tl);
-			return -EINVAL;
-		}
-	}
-	kt = ktime_sub(ktime_get(), kt);
-	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
-		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
-
-	mock_timeline_destroy(tl);
-	cond_resched();
-
-	tl = mock_timeline(0);
-	if (!tl)
-		return -ENOMEM;
-
-	/* Benchmark searching for a random context id and maybe changing it */
-	prandom_seed_state(&prng, i915_selftest.random_seed);
-	count = 0;
-	kt = ktime_get();
-	end_time = jiffies + HZ/10;
-	do {
-		u32 id = random_engine(&prng);
-		u32 seqno = prandom_u32_state(&prng);
-
-		if (!__intel_timeline_sync_is_later(tl, id, seqno))
-			__intel_timeline_sync_set(tl, id, seqno);
-
-		count++;
-	} while (!time_after(jiffies, end_time));
-	kt = ktime_sub(ktime_get(), kt);
-	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
-	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
-		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
-	mock_timeline_destroy(tl);
-	cond_resched();
-
-	/* Benchmark searching for a known context id and changing the seqno */
-	for (last_order = 1, order = 1; order < 32;
-	     ({ int tmp = last_order; last_order = order; order += tmp; })) {
-		unsigned int mask = BIT(order) - 1;
-
-		tl = mock_timeline(0);
-		if (!tl)
-			return -ENOMEM;
-
-		count = 0;
-		kt = ktime_get();
-		end_time = jiffies + HZ/10;
-		do {
-			/* Without assuming too many details of the underlying
-			 * implementation, try to identify its phase-changes
-			 * (if any)!
-			 */
-			u64 id = (u64)(count & mask) << order;
-
-			__intel_timeline_sync_is_later(tl, id, 0);
-			__intel_timeline_sync_set(tl, id, 0);
-
-			count++;
-		} while (!time_after(jiffies, end_time));
-		kt = ktime_sub(ktime_get(), kt);
-		pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
-			__func__, count, order,
-			(long long)div64_ul(ktime_to_ns(kt), count));
-		mock_timeline_destroy(tl);
-		cond_resched();
-	}
-
-	return 0;
-}
-
-int i915_gem_timeline_mock_selftests(void)
-{
-	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_sync),
-		SUBTEST(bench_sync),
-	};
-
-	return i915_subtests(tests, NULL);
-}
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 9c76f03..a00e2bd 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -11,6 +11,7 @@
  */
 selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
 selftest(uncore, intel_uncore_live_selftests)
+selftest(workarounds, intel_workarounds_live_selftests)
 selftest(requests, i915_request_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
 selftest(dmabuf, i915_gem_dmabuf_live_selftests)
@@ -20,4 +21,5 @@
 selftest(hugepages, i915_gem_huge_page_live_selftests)
 selftest(contexts, i915_gem_context_live_selftests)
 selftest(hangcheck, intel_hangcheck_live_selftests)
+selftest(execlists, intel_execlists_live_selftests)
 selftest(guc, intel_guc_live_selftest)
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 9a48aa44..d16d741 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -14,6 +14,7 @@
 selftest(scatterlist, scatterlist_mock_selftests)
 selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
+selftest(engine, intel_engine_cs_mock_selftests)
 selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
 selftest(timelines, i915_gem_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c
new file mode 100644
index 0000000..19f1c6a
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c
@@ -0,0 +1,267 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017-2018 Intel Corporation
+ */
+
+#include "../i915_selftest.h"
+#include "i915_random.h"
+
+#include "mock_gem_device.h"
+#include "mock_timeline.h"
+
+struct __igt_sync {
+	const char *name;
+	u32 seqno;
+	bool expected;
+	bool set;
+};
+
+static int __igt_sync(struct i915_timeline *tl,
+		      u64 ctx,
+		      const struct __igt_sync *p,
+		      const char *name)
+{
+	int ret;
+
+	if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
+		pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
+		       name, p->name, ctx, p->seqno, yesno(p->expected));
+		return -EINVAL;
+	}
+
+	if (p->set) {
+		ret = __i915_timeline_sync_set(tl, ctx, p->seqno);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int igt_sync(void *arg)
+{
+	const struct __igt_sync pass[] = {
+		{ "unset", 0, false, false },
+		{ "new", 0, false, true },
+		{ "0a", 0, true, true },
+		{ "1a", 1, false, true },
+		{ "1b", 1, true, true },
+		{ "0b", 0, true, false },
+		{ "2a", 2, false, true },
+		{ "4", 4, false, true },
+		{ "INT_MAX", INT_MAX, false, true },
+		{ "INT_MAX-1", INT_MAX-1, true, false },
+		{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
+		{ "INT_MAX", INT_MAX, true, false },
+		{ "UINT_MAX", UINT_MAX, false, true },
+		{ "wrap", 0, false, true },
+		{ "unwrap", UINT_MAX, true, false },
+		{},
+	}, *p;
+	struct i915_timeline tl;
+	int order, offset;
+	int ret = -ENODEV;
+
+	mock_timeline_init(&tl, 0);
+	for (p = pass; p->name; p++) {
+		for (order = 1; order < 64; order++) {
+			for (offset = -1; offset <= (order > 1); offset++) {
+				u64 ctx = BIT_ULL(order) + offset;
+
+				ret = __igt_sync(&tl, ctx, p, "1");
+				if (ret)
+					goto out;
+			}
+		}
+	}
+	mock_timeline_fini(&tl);
+
+	mock_timeline_init(&tl, 0);
+	for (order = 1; order < 64; order++) {
+		for (offset = -1; offset <= (order > 1); offset++) {
+			u64 ctx = BIT_ULL(order) + offset;
+
+			for (p = pass; p->name; p++) {
+				ret = __igt_sync(&tl, ctx, p, "2");
+				if (ret)
+					goto out;
+			}
+		}
+	}
+
+out:
+	mock_timeline_fini(&tl);
+	return ret;
+}
+
+static unsigned int random_engine(struct rnd_state *rnd)
+{
+	return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
+}
+
+static int bench_sync(void *arg)
+{
+	struct rnd_state prng;
+	struct i915_timeline tl;
+	unsigned long end_time, count;
+	u64 prng32_1M;
+	ktime_t kt;
+	int order, last_order;
+
+	mock_timeline_init(&tl, 0);
+
+	/* Lookups from cache are very fast and so the random number generation
+	 * and the loop itself becomes a significant factor in the per-iteration
+	 * timings. We try to compensate the results by measuring the overhead
+	 * of the prng and subtract it from the reported results.
+	 */
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+	count = 0;
+	kt = ktime_get();
+	end_time = jiffies + HZ/10;
+	do {
+		u32 x;
+
+		/* Make sure the compiler doesn't optimise away the prng call */
+		WRITE_ONCE(x, prandom_u32_state(&prng));
+
+		count++;
+	} while (!time_after(jiffies, end_time));
+	kt = ktime_sub(ktime_get(), kt);
+	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
+		 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+	prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
+
+	/* Benchmark (only) setting random context ids */
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+	count = 0;
+	kt = ktime_get();
+	end_time = jiffies + HZ/10;
+	do {
+		u64 id = i915_prandom_u64_state(&prng);
+
+		__i915_timeline_sync_set(&tl, id, 0);
+		count++;
+	} while (!time_after(jiffies, end_time));
+	kt = ktime_sub(ktime_get(), kt);
+	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+	pr_info("%s: %lu random insertions, %lluns/insert\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+	/* Benchmark looking up the exact same context ids as we just set */
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+	end_time = count;
+	kt = ktime_get();
+	while (end_time--) {
+		u64 id = i915_prandom_u64_state(&prng);
+
+		if (!__i915_timeline_sync_is_later(&tl, id, 0)) {
+			mock_timeline_fini(&tl);
+			pr_err("Lookup of %llu failed\n", id);
+			return -EINVAL;
+		}
+	}
+	kt = ktime_sub(ktime_get(), kt);
+	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+	pr_info("%s: %lu random lookups, %lluns/lookup\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+	mock_timeline_fini(&tl);
+	cond_resched();
+
+	mock_timeline_init(&tl, 0);
+
+	/* Benchmark setting the first N (in order) contexts */
+	count = 0;
+	kt = ktime_get();
+	end_time = jiffies + HZ/10;
+	do {
+		__i915_timeline_sync_set(&tl, count++, 0);
+	} while (!time_after(jiffies, end_time));
+	kt = ktime_sub(ktime_get(), kt);
+	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+	/* Benchmark looking up the exact same context ids as we just set */
+	end_time = count;
+	kt = ktime_get();
+	while (end_time--) {
+		if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) {
+			pr_err("Lookup of %lu failed\n", end_time);
+			mock_timeline_fini(&tl);
+			return -EINVAL;
+		}
+	}
+	kt = ktime_sub(ktime_get(), kt);
+	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+	mock_timeline_fini(&tl);
+	cond_resched();
+
+	mock_timeline_init(&tl, 0);
+
+	/* Benchmark searching for a random context id and maybe changing it */
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+	count = 0;
+	kt = ktime_get();
+	end_time = jiffies + HZ/10;
+	do {
+		u32 id = random_engine(&prng);
+		u32 seqno = prandom_u32_state(&prng);
+
+		if (!__i915_timeline_sync_is_later(&tl, id, seqno))
+			__i915_timeline_sync_set(&tl, id, seqno);
+
+		count++;
+	} while (!time_after(jiffies, end_time));
+	kt = ktime_sub(ktime_get(), kt);
+	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
+		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+	mock_timeline_fini(&tl);
+	cond_resched();
+
+	/* Benchmark searching for a known context id and changing the seqno */
+	for (last_order = 1, order = 1; order < 32;
+	     ({ int tmp = last_order; last_order = order; order += tmp; })) {
+		unsigned int mask = BIT(order) - 1;
+
+		mock_timeline_init(&tl, 0);
+
+		count = 0;
+		kt = ktime_get();
+		end_time = jiffies + HZ/10;
+		do {
+			/* Without assuming too many details of the underlying
+			 * implementation, try to identify its phase-changes
+			 * (if any)!
+			 */
+			u64 id = (u64)(count & mask) << order;
+
+			__i915_timeline_sync_is_later(&tl, id, 0);
+			__i915_timeline_sync_set(&tl, id, 0);
+
+			count++;
+		} while (!time_after(jiffies, end_time));
+		kt = ktime_sub(ktime_get(), kt);
+		pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
+			__func__, count, order,
+			(long long)div64_ul(ktime_to_ns(kt), count));
+		mock_timeline_fini(&tl);
+		cond_resched();
+	}
+
+	return 0;
+}
+
+int i915_gem_timeline_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_sync),
+		SUBTEST(bench_sync),
+	};
+
+	return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index eb89e30..e90f972 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -81,7 +81,7 @@
 	}
 
 	if (i915_vma_compare(vma, vm, view)) {
-		pr_err("i915_vma_compare failed with create parmaters!\n");
+		pr_err("i915_vma_compare failed with create parameters!\n");
 		return ERR_PTR(-EINVAL);
 	}
 
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
new file mode 100644
index 0000000..0d06f55
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -0,0 +1,70 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "../i915_drv.h"
+
+#include "../i915_selftest.h"
+#include "igt_flush_test.h"
+
+struct wedge_me {
+	struct delayed_work work;
+	struct drm_i915_private *i915;
+	const void *symbol;
+};
+
+static void wedge_me(struct work_struct *work)
+{
+	struct wedge_me *w = container_of(work, typeof(*w), work.work);
+
+	pr_err("%pS timed out, cancelling all further testing.\n", w->symbol);
+
+	GEM_TRACE("%pS timed out.\n", w->symbol);
+	GEM_TRACE_DUMP();
+
+	i915_gem_set_wedged(w->i915);
+}
+
+static void __init_wedge(struct wedge_me *w,
+			 struct drm_i915_private *i915,
+			 long timeout,
+			 const void *symbol)
+{
+	w->i915 = i915;
+	w->symbol = symbol;
+
+	INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me);
+	schedule_delayed_work(&w->work, timeout);
+}
+
+static void __fini_wedge(struct wedge_me *w)
+{
+	cancel_delayed_work_sync(&w->work);
+	destroy_delayed_work_on_stack(&w->work);
+	w->i915 = NULL;
+}
+
+#define wedge_on_timeout(W, DEV, TIMEOUT)				\
+	for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \
+	     (W)->i915;							\
+	     __fini_wedge((W)))
+
+int igt_flush_test(struct drm_i915_private *i915, unsigned int flags)
+{
+	struct wedge_me w;
+
+	cond_resched();
+
+	if (flags & I915_WAIT_LOCKED &&
+	    i915_gem_switch_to_kernel_context(i915)) {
+		pr_err("Failed to switch back to kernel context; declaring wedged\n");
+		i915_gem_set_wedged(i915);
+	}
+
+	wedge_on_timeout(&w, i915, HZ)
+		i915_gem_wait_for_idle(i915, flags);
+
+	return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
+}
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h
new file mode 100644
index 0000000..63e0099
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h
@@ -0,0 +1,14 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef IGT_FLUSH_TEST_H
+#define IGT_FLUSH_TEST_H
+
+struct drm_i915_private;
+
+int igt_flush_test(struct drm_i915_private *i915, unsigned int flags);
+
+#endif /* IGT_FLUSH_TEST_H */
diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
index 4658002..d6926e7 100644
--- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
@@ -412,10 +412,11 @@
 		 * that they are ready for the next test. We wait until all
 		 * threads are complete and waiting for us (i.e. not a seqno).
 		 */
-		err = wait_var_event_timeout(&done, !atomic_read(&done), 10 * HZ);
-		if (err) {
+		if (!wait_var_event_timeout(&done,
+					    !atomic_read(&done), 10 * HZ)) {
 			pr_err("Timed out waiting for %d remaining waiters\n",
 			       atomic_read(&done));
+			err = -ETIMEDOUT;
 			break;
 		}
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c b/drivers/gpu/drm/i915/selftests/intel_engine_cs.c
new file mode 100644
index 0000000..cfaa6b29
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/intel_engine_cs.c
@@ -0,0 +1,58 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "../i915_selftest.h"
+
+static int intel_mmio_bases_check(void *arg)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
+		const struct engine_info *info = &intel_engines[i];
+		char name[INTEL_ENGINE_CS_MAX_NAME];
+		u8 prev = U8_MAX;
+
+		__sprint_engine_name(name, info);
+
+		for (j = 0; j < MAX_MMIO_BASES; j++) {
+			u8 gen = info->mmio_bases[j].gen;
+			u32 base = info->mmio_bases[j].base;
+
+			if (gen >= prev) {
+				pr_err("%s: %s: mmio base for gen %x "
+					"is before the one for gen %x\n",
+				       __func__, name, prev, gen);
+				return -EINVAL;
+			}
+
+			if (gen == 0)
+				break;
+
+			if (!base) {
+				pr_err("%s: %s: invalid mmio base (%x) "
+					"for gen %x at entry %u\n",
+				       __func__, name, base, gen, j);
+				return -EINVAL;
+			}
+
+			prev = gen;
+		}
+
+		pr_info("%s: min gen supported for %s = %d\n",
+			__func__, name, prev);
+	}
+
+	return 0;
+}
+
+int intel_engine_cs_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(intel_mmio_bases_check),
+	};
+
+	return i915_subtests(tests, NULL);
+}
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index df7898c..438e0b0 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -25,10 +25,14 @@
 #include <linux/kthread.h>
 
 #include "../i915_selftest.h"
+#include "i915_random.h"
+#include "igt_flush_test.h"
 
 #include "mock_context.h"
 #include "mock_drm.h"
 
+#define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
+
 struct hang {
 	struct drm_i915_private *i915;
 	struct drm_i915_gem_object *hws;
@@ -250,58 +254,6 @@
 	return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]);
 }
 
-struct wedge_me {
-	struct delayed_work work;
-	struct drm_i915_private *i915;
-	const void *symbol;
-};
-
-static void wedge_me(struct work_struct *work)
-{
-	struct wedge_me *w = container_of(work, typeof(*w), work.work);
-
-	pr_err("%pS timed out, cancelling all further testing.\n",
-	       w->symbol);
-	i915_gem_set_wedged(w->i915);
-}
-
-static void __init_wedge(struct wedge_me *w,
-			 struct drm_i915_private *i915,
-			 long timeout,
-			 const void *symbol)
-{
-	w->i915 = i915;
-	w->symbol = symbol;
-
-	INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me);
-	schedule_delayed_work(&w->work, timeout);
-}
-
-static void __fini_wedge(struct wedge_me *w)
-{
-	cancel_delayed_work_sync(&w->work);
-	destroy_delayed_work_on_stack(&w->work);
-	w->i915 = NULL;
-}
-
-#define wedge_on_timeout(W, DEV, TIMEOUT)				\
-	for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \
-	     (W)->i915;							\
-	     __fini_wedge((W)))
-
-static noinline int
-flush_test(struct drm_i915_private *i915, unsigned int flags)
-{
-	struct wedge_me w;
-
-	cond_resched();
-
-	wedge_on_timeout(&w, i915, HZ)
-		i915_gem_wait_for_idle(i915, flags);
-
-	return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
-}
-
 static void hang_fini(struct hang *h)
 {
 	*h->batch = MI_BATCH_BUFFER_END;
@@ -315,10 +267,10 @@
 
 	kernel_context_close(h->ctx);
 
-	flush_test(h->i915, I915_WAIT_LOCKED);
+	igt_flush_test(h->i915, I915_WAIT_LOCKED);
 }
 
-static bool wait_for_hang(struct hang *h, struct i915_request *rq)
+static bool wait_until_running(struct hang *h, struct i915_request *rq)
 {
 	return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
 					       rq->fence.seqno),
@@ -433,7 +385,7 @@
 	mutex_lock(&i915->drm.struct_mutex);
 	reset_count = i915_reset_count(&i915->gpu_error);
 
-	i915_reset(i915, I915_RESET_QUIET);
+	i915_reset(i915, ALL_ENGINES, NULL);
 
 	if (i915_reset_count(&i915->gpu_error) == reset_count) {
 		pr_err("No GPU reset recorded!\n");
@@ -450,6 +402,11 @@
 	return err;
 }
 
+static bool wait_for_idle(struct intel_engine_cs *engine)
+{
+	return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0;
+}
+
 static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 {
 	struct intel_engine_cs *engine;
@@ -477,12 +434,21 @@
 		if (active && !intel_engine_can_store_dword(engine))
 			continue;
 
+		if (!wait_for_idle(engine)) {
+			pr_err("%s failed to idle before reset\n",
+			       engine->name);
+			err = -EIO;
+			break;
+		}
+
 		reset_count = i915_reset_count(&i915->gpu_error);
 		reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
 							     engine);
 
 		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
+			u32 seqno = intel_engine_get_seqno(engine);
+
 			if (active) {
 				struct i915_request *rq;
 
@@ -498,7 +464,7 @@
 				__i915_request_add(rq, true);
 				mutex_unlock(&i915->drm.struct_mutex);
 
-				if (!wait_for_hang(&h, rq)) {
+				if (!wait_until_running(&h, rq)) {
 					struct drm_printer p = drm_info_printer(i915->drm.dev);
 
 					pr_err("%s: Failed to start request %x, at %x\n",
@@ -511,14 +477,12 @@
 					break;
 				}
 
+				GEM_BUG_ON(!rq->global_seqno);
+				seqno = rq->global_seqno - 1;
 				i915_request_put(rq);
 			}
 
-			engine->hangcheck.stalled = true;
-			engine->hangcheck.seqno =
-				intel_engine_get_seqno(engine);
-
-			err = i915_reset_engine(engine, I915_RESET_QUIET);
+			err = i915_reset_engine(engine, NULL);
 			if (err) {
 				pr_err("i915_reset_engine failed\n");
 				break;
@@ -539,14 +503,25 @@
 				break;
 			}
 
-			engine->hangcheck.stalled = false;
+			if (!wait_for_idle(engine)) {
+				struct drm_printer p =
+					drm_info_printer(i915->drm.dev);
+
+				pr_err("%s failed to idle after reset\n",
+				       engine->name);
+				intel_engine_dump(engine, &p,
+						  "%s\n", engine->name);
+
+				err = -EIO;
+				break;
+			}
 		} while (time_before(jiffies, end_time));
 		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 
 		if (err)
 			break;
 
-		err = flush_test(i915, 0);
+		err = igt_flush_test(i915, 0);
 		if (err)
 			break;
 	}
@@ -573,11 +548,25 @@
 	return __igt_reset_engine(arg, true);
 }
 
+struct active_engine {
+	struct task_struct *task;
+	struct intel_engine_cs *engine;
+	unsigned long resets;
+	unsigned int flags;
+};
+
+#define TEST_ACTIVE	BIT(0)
+#define TEST_OTHERS	BIT(1)
+#define TEST_SELF	BIT(2)
+#define TEST_PRIORITY	BIT(3)
+
 static int active_engine(void *data)
 {
-	struct intel_engine_cs *engine = data;
-	struct i915_request *rq[2] = {};
-	struct i915_gem_context *ctx[2];
+	I915_RND_STATE(prng);
+	struct active_engine *arg = data;
+	struct intel_engine_cs *engine = arg->engine;
+	struct i915_request *rq[8] = {};
+	struct i915_gem_context *ctx[ARRAY_SIZE(rq)];
 	struct drm_file *file;
 	unsigned long count = 0;
 	int err = 0;
@@ -586,25 +575,20 @@
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	mutex_lock(&engine->i915->drm.struct_mutex);
-	ctx[0] = live_context(engine->i915, file);
-	mutex_unlock(&engine->i915->drm.struct_mutex);
-	if (IS_ERR(ctx[0])) {
-		err = PTR_ERR(ctx[0]);
-		goto err_file;
-	}
-
-	mutex_lock(&engine->i915->drm.struct_mutex);
-	ctx[1] = live_context(engine->i915, file);
-	mutex_unlock(&engine->i915->drm.struct_mutex);
-	if (IS_ERR(ctx[1])) {
-		err = PTR_ERR(ctx[1]);
-		i915_gem_context_put(ctx[0]);
-		goto err_file;
+	for (count = 0; count < ARRAY_SIZE(ctx); count++) {
+		mutex_lock(&engine->i915->drm.struct_mutex);
+		ctx[count] = live_context(engine->i915, file);
+		mutex_unlock(&engine->i915->drm.struct_mutex);
+		if (IS_ERR(ctx[count])) {
+			err = PTR_ERR(ctx[count]);
+			while (--count)
+				i915_gem_context_put(ctx[count]);
+			goto err_file;
+		}
 	}
 
 	while (!kthread_should_stop()) {
-		unsigned int idx = count++ & 1;
+		unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
 		struct i915_request *old = rq[idx];
 		struct i915_request *new;
 
@@ -616,14 +600,28 @@
 			break;
 		}
 
+		if (arg->flags & TEST_PRIORITY)
+			ctx[idx]->sched.priority =
+				i915_prandom_u32_max_state(512, &prng);
+
 		rq[idx] = i915_request_get(new);
 		i915_request_add(new);
 		mutex_unlock(&engine->i915->drm.struct_mutex);
 
 		if (old) {
-			i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT);
+			if (i915_request_wait(old, 0, HZ) < 0) {
+				GEM_TRACE("%s timed out.\n", engine->name);
+				GEM_TRACE_DUMP();
+
+				i915_gem_set_wedged(engine->i915);
+				i915_request_put(old);
+				err = -EIO;
+				break;
+			}
 			i915_request_put(old);
 		}
+
+		cond_resched();
 	}
 
 	for (count = 0; count < ARRAY_SIZE(rq); count++)
@@ -634,8 +632,9 @@
 	return err;
 }
 
-static int __igt_reset_engine_others(struct drm_i915_private *i915,
-				     bool active)
+static int __igt_reset_engines(struct drm_i915_private *i915,
+			       const char *test_name,
+			       unsigned int flags)
 {
 	struct intel_engine_cs *engine, *other;
 	enum intel_engine_id id, tmp;
@@ -649,50 +648,68 @@
 	if (!intel_has_reset_engine(i915))
 		return 0;
 
-	if (active) {
+	if (flags & TEST_ACTIVE) {
 		mutex_lock(&i915->drm.struct_mutex);
 		err = hang_init(&h, i915);
 		mutex_unlock(&i915->drm.struct_mutex);
 		if (err)
 			return err;
+
+		if (flags & TEST_PRIORITY)
+			h.ctx->sched.priority = 1024;
 	}
 
 	for_each_engine(engine, i915, id) {
-		struct task_struct *threads[I915_NUM_ENGINES] = {};
-		unsigned long resets[I915_NUM_ENGINES];
+		struct active_engine threads[I915_NUM_ENGINES] = {};
 		unsigned long global = i915_reset_count(&i915->gpu_error);
-		unsigned long count = 0;
+		unsigned long count = 0, reported;
 		IGT_TIMEOUT(end_time);
 
-		if (active && !intel_engine_can_store_dword(engine))
+		if (flags & TEST_ACTIVE &&
+		    !intel_engine_can_store_dword(engine))
 			continue;
 
+		if (!wait_for_idle(engine)) {
+			pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n",
+			       engine->name, test_name);
+			err = -EIO;
+			break;
+		}
+
 		memset(threads, 0, sizeof(threads));
 		for_each_engine(other, i915, tmp) {
 			struct task_struct *tsk;
 
-			resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
-							      other);
+			threads[tmp].resets =
+				i915_reset_engine_count(&i915->gpu_error,
+							other);
 
-			if (other == engine)
+			if (!(flags & TEST_OTHERS))
 				continue;
 
-			tsk = kthread_run(active_engine, other,
+			if (other == engine && !(flags & TEST_SELF))
+				continue;
+
+			threads[tmp].engine = other;
+			threads[tmp].flags = flags;
+
+			tsk = kthread_run(active_engine, &threads[tmp],
 					  "igt/%s", other->name);
 			if (IS_ERR(tsk)) {
 				err = PTR_ERR(tsk);
 				goto unwind;
 			}
 
-			threads[tmp] = tsk;
+			threads[tmp].task = tsk;
 			get_task_struct(tsk);
 		}
 
 		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
-			if (active) {
-				struct i915_request *rq;
+			u32 seqno = intel_engine_get_seqno(engine);
+			struct i915_request *rq = NULL;
 
+			if (flags & TEST_ACTIVE) {
 				mutex_lock(&i915->drm.struct_mutex);
 				rq = hang_create_request(&h, engine);
 				if (IS_ERR(rq)) {
@@ -705,7 +722,7 @@
 				__i915_request_add(rq, true);
 				mutex_unlock(&i915->drm.struct_mutex);
 
-				if (!wait_for_hang(&h, rq)) {
+				if (!wait_until_running(&h, rq)) {
 					struct drm_printer p = drm_info_printer(i915->drm.dev);
 
 					pr_err("%s: Failed to start request %x, at %x\n",
@@ -718,33 +735,48 @@
 					break;
 				}
 
-				i915_request_put(rq);
+				GEM_BUG_ON(!rq->global_seqno);
+				seqno = rq->global_seqno - 1;
 			}
 
-			engine->hangcheck.stalled = true;
-			engine->hangcheck.seqno =
-				intel_engine_get_seqno(engine);
-
-			err = i915_reset_engine(engine, I915_RESET_QUIET);
+			err = i915_reset_engine(engine, NULL);
 			if (err) {
-				pr_err("i915_reset_engine(%s:%s) failed, err=%d\n",
-				       engine->name, active ? "active" : "idle", err);
+				pr_err("i915_reset_engine(%s:%s): failed, err=%d\n",
+				       engine->name, test_name, err);
 				break;
 			}
 
-			engine->hangcheck.stalled = false;
 			count++;
+
+			if (rq) {
+				i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+				i915_request_put(rq);
+			}
+
+			if (!(flags & TEST_SELF) && !wait_for_idle(engine)) {
+				struct drm_printer p =
+					drm_info_printer(i915->drm.dev);
+
+				pr_err("i915_reset_engine(%s:%s):"
+				       " failed to idle after reset\n",
+				       engine->name, test_name);
+				intel_engine_dump(engine, &p,
+						  "%s\n", engine->name);
+
+				err = -EIO;
+				break;
+			}
 		} while (time_before(jiffies, end_time));
 		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		pr_info("i915_reset_engine(%s:%s): %lu resets\n",
-			engine->name, active ? "active" : "idle", count);
+			engine->name, test_name, count);
 
-		if (i915_reset_engine_count(&i915->gpu_error, engine) -
-		    resets[engine->id] != (active ? count : 0)) {
-			pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
-			       engine->name, active ? "active" : "idle", count,
-			       i915_reset_engine_count(&i915->gpu_error,
-						       engine) - resets[engine->id]);
+		reported = i915_reset_engine_count(&i915->gpu_error, engine);
+		reported -= threads[engine->id].resets;
+		if (reported != (flags & TEST_ACTIVE ? count : 0)) {
+			pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n",
+			       engine->name, test_name, count, reported,
+			       (flags & TEST_ACTIVE ? count : 0));
 			if (!err)
 				err = -EINVAL;
 		}
@@ -753,24 +785,26 @@
 		for_each_engine(other, i915, tmp) {
 			int ret;
 
-			if (!threads[tmp])
+			if (!threads[tmp].task)
 				continue;
 
-			ret = kthread_stop(threads[tmp]);
+			ret = kthread_stop(threads[tmp].task);
 			if (ret) {
 				pr_err("kthread for other engine %s failed, err=%d\n",
 				       other->name, ret);
 				if (!err)
 					err = ret;
 			}
-			put_task_struct(threads[tmp]);
+			put_task_struct(threads[tmp].task);
 
-			if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
-								   other)) {
+			if (other != engine &&
+			    threads[tmp].resets !=
+			    i915_reset_engine_count(&i915->gpu_error, other)) {
 				pr_err("Innocent engine %s was reset (count=%ld)\n",
 				       other->name,
 				       i915_reset_engine_count(&i915->gpu_error,
-							       other) - resets[tmp]);
+							       other) -
+				       threads[tmp].resets);
 				if (!err)
 					err = -EINVAL;
 			}
@@ -786,7 +820,7 @@
 		if (err)
 			break;
 
-		err = flush_test(i915, 0);
+		err = igt_flush_test(i915, 0);
 		if (err)
 			break;
 	}
@@ -794,7 +828,7 @@
 	if (i915_terminally_wedged(&i915->gpu_error))
 		err = -EIO;
 
-	if (active) {
+	if (flags & TEST_ACTIVE) {
 		mutex_lock(&i915->drm.struct_mutex);
 		hang_fini(&h);
 		mutex_unlock(&i915->drm.struct_mutex);
@@ -803,27 +837,56 @@
 	return err;
 }
 
-static int igt_reset_idle_engine_others(void *arg)
+static int igt_reset_engines(void *arg)
 {
-	return __igt_reset_engine_others(arg, false);
+	static const struct {
+		const char *name;
+		unsigned int flags;
+	} phases[] = {
+		{ "idle", 0 },
+		{ "active", TEST_ACTIVE },
+		{ "others-idle", TEST_OTHERS },
+		{ "others-active", TEST_OTHERS | TEST_ACTIVE },
+		{
+			"others-priority",
+			TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY
+		},
+		{
+			"self-priority",
+			TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF,
+		},
+		{ }
+	};
+	struct drm_i915_private *i915 = arg;
+	typeof(*phases) *p;
+	int err;
+
+	for (p = phases; p->name; p++) {
+		if (p->flags & TEST_PRIORITY) {
+			if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+				continue;
+		}
+
+		err = __igt_reset_engines(arg, p->name, p->flags);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
-static int igt_reset_active_engine_others(void *arg)
+static u32 fake_hangcheck(struct i915_request *rq, u32 mask)
 {
-	return __igt_reset_engine_others(arg, true);
-}
+	struct i915_gpu_error *error = &rq->i915->gpu_error;
+	u32 reset_count = i915_reset_count(error);
 
-static u32 fake_hangcheck(struct i915_request *rq)
-{
-	u32 reset_count;
+	error->stalled_mask = mask;
 
-	rq->engine->hangcheck.stalled = true;
-	rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine);
+	/* set_bit() must be after we have setup the backchannel (mask) */
+	smp_mb__before_atomic();
+	set_bit(I915_RESET_HANDOFF, &error->flags);
 
-	reset_count = i915_reset_count(&rq->i915->gpu_error);
-
-	set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags);
-	wake_up_all(&rq->i915->gpu_error.wait_queue);
+	wake_up_all(&error->wait_queue);
 
 	return reset_count;
 }
@@ -858,21 +921,20 @@
 	i915_request_get(rq);
 	__i915_request_add(rq, true);
 
-	if (!wait_for_hang(&h, rq)) {
+	if (!wait_until_running(&h, rq)) {
 		struct drm_printer p = drm_info_printer(i915->drm.dev);
 
 		pr_err("%s: Failed to start request %x, at %x\n",
 		       __func__, rq->fence.seqno, hws_seqno(&h, rq));
 		intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
 
-		i915_reset(i915, 0);
 		i915_gem_set_wedged(i915);
 
 		err = -EIO;
 		goto out_rq;
 	}
 
-	reset_count = fake_hangcheck(rq);
+	reset_count = fake_hangcheck(rq, ALL_ENGINES);
 
 	timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
 	if (timeout < 0) {
@@ -903,6 +965,23 @@
 	return err;
 }
 
+static int wait_for_others(struct drm_i915_private *i915,
+			   struct intel_engine_cs *exclude)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id) {
+		if (engine == exclude)
+			continue;
+
+		if (!wait_for_idle(engine))
+			return -EIO;
+	}
+
+	return 0;
+}
+
 static int igt_reset_queue(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -951,27 +1030,49 @@
 			i915_request_get(rq);
 			__i915_request_add(rq, true);
 
-			if (!wait_for_hang(&h, prev)) {
+			/*
+			 * XXX We don't handle resetting the kernel context
+			 * very well. If we trigger a device reset twice in
+			 * quick succession while the kernel context is
+			 * executing, we may end up skipping the breadcrumb.
+			 * This is really only a problem for the selftest as
+			 * normally there is a large interlude between resets
+			 * (hangcheck), or we focus on resetting just one
+			 * engine and so avoid repeatedly resetting innocents.
+			 */
+			err = wait_for_others(i915, engine);
+			if (err) {
+				pr_err("%s(%s): Failed to idle other inactive engines after device reset\n",
+				       __func__, engine->name);
+				i915_request_put(rq);
+				i915_request_put(prev);
+
+				GEM_TRACE_DUMP();
+				i915_gem_set_wedged(i915);
+				goto fini;
+			}
+
+			if (!wait_until_running(&h, prev)) {
 				struct drm_printer p = drm_info_printer(i915->drm.dev);
 
-				pr_err("%s: Failed to start request %x, at %x\n",
-				       __func__, prev->fence.seqno, hws_seqno(&h, prev));
-				intel_engine_dump(prev->engine, &p,
-						  "%s\n", prev->engine->name);
+				pr_err("%s(%s): Failed to start request %x, at %x\n",
+				       __func__, engine->name,
+				       prev->fence.seqno, hws_seqno(&h, prev));
+				intel_engine_dump(engine, &p,
+						  "%s\n", engine->name);
 
 				i915_request_put(rq);
 				i915_request_put(prev);
 
-				i915_reset(i915, 0);
 				i915_gem_set_wedged(i915);
 
 				err = -EIO;
 				goto fini;
 			}
 
-			reset_count = fake_hangcheck(prev);
+			reset_count = fake_hangcheck(prev, ENGINE_MASK(id));
 
-			i915_reset(i915, I915_RESET_QUIET);
+			i915_reset(i915, ENGINE_MASK(id), NULL);
 
 			GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
 					    &i915->gpu_error.flags));
@@ -1013,7 +1114,7 @@
 
 		i915_request_put(prev);
 
-		err = flush_test(i915, I915_WAIT_LOCKED);
+		err = igt_flush_test(i915, I915_WAIT_LOCKED);
 		if (err)
 			break;
 	}
@@ -1044,7 +1145,7 @@
 	if (!intel_has_reset_engine(i915))
 		return 0;
 
-	if (!intel_engine_can_store_dword(i915->engine[RCS]))
+	if (!engine || !intel_engine_can_store_dword(engine))
 		return 0;
 
 	mutex_lock(&i915->drm.struct_mutex);
@@ -1062,14 +1163,13 @@
 	i915_request_get(rq);
 	__i915_request_add(rq, true);
 
-	if (!wait_for_hang(&h, rq)) {
+	if (!wait_until_running(&h, rq)) {
 		struct drm_printer p = drm_info_printer(i915->drm.dev);
 
 		pr_err("%s: Failed to start request %x, at %x\n",
 		       __func__, rq->fence.seqno, hws_seqno(&h, rq));
 		intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
 
-		i915_reset(i915, 0);
 		i915_gem_set_wedged(i915);
 
 		err = -EIO;
@@ -1081,10 +1181,7 @@
 	/* Temporarily disable error capture */
 	error = xchg(&i915->gpu_error.first_error, (void *)-1);
 
-	engine->hangcheck.stalled = true;
-	engine->hangcheck.seqno = intel_engine_get_seqno(engine);
-
-	i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__);
+	i915_handle_error(i915, ENGINE_MASK(engine->id), 0, NULL);
 
 	xchg(&i915->gpu_error.first_error, error);
 
@@ -1112,8 +1209,7 @@
 		SUBTEST(igt_hang_sanitycheck),
 		SUBTEST(igt_reset_idle_engine),
 		SUBTEST(igt_reset_active_engine),
-		SUBTEST(igt_reset_idle_engine_others),
-		SUBTEST(igt_reset_active_engine_others),
+		SUBTEST(igt_reset_engines),
 		SUBTEST(igt_wait_reset),
 		SUBTEST(igt_reset_queue),
 		SUBTEST(igt_handle_error),
@@ -1129,6 +1225,10 @@
 
 	err = i915_subtests(tests, i915);
 
+	mutex_lock(&i915->drm.struct_mutex);
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+
 	i915_modparams.enable_hangcheck = saved_hangcheck;
 	intel_runtime_pm_put(i915);
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
new file mode 100644
index 0000000..1b8a071
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
@@ -0,0 +1,459 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "../i915_selftest.h"
+#include "igt_flush_test.h"
+
+#include "mock_context.h"
+
+struct spinner {
+	struct drm_i915_private *i915;
+	struct drm_i915_gem_object *hws;
+	struct drm_i915_gem_object *obj;
+	u32 *batch;
+	void *seqno;
+};
+
+static int spinner_init(struct spinner *spin, struct drm_i915_private *i915)
+{
+	unsigned int mode;
+	void *vaddr;
+	int err;
+
+	GEM_BUG_ON(INTEL_GEN(i915) < 8);
+
+	memset(spin, 0, sizeof(*spin));
+	spin->i915 = i915;
+
+	spin->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(spin->hws)) {
+		err = PTR_ERR(spin->hws);
+		goto err;
+	}
+
+	spin->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(spin->obj)) {
+		err = PTR_ERR(spin->obj);
+		goto err_hws;
+	}
+
+	i915_gem_object_set_cache_level(spin->hws, I915_CACHE_LLC);
+	vaddr = i915_gem_object_pin_map(spin->hws, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		err = PTR_ERR(vaddr);
+		goto err_obj;
+	}
+	spin->seqno = memset(vaddr, 0xff, PAGE_SIZE);
+
+	mode = HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
+	vaddr = i915_gem_object_pin_map(spin->obj, mode);
+	if (IS_ERR(vaddr)) {
+		err = PTR_ERR(vaddr);
+		goto err_unpin_hws;
+	}
+	spin->batch = vaddr;
+
+	return 0;
+
+err_unpin_hws:
+	i915_gem_object_unpin_map(spin->hws);
+err_obj:
+	i915_gem_object_put(spin->obj);
+err_hws:
+	i915_gem_object_put(spin->hws);
+err:
+	return err;
+}
+
+static unsigned int seqno_offset(u64 fence)
+{
+	return offset_in_page(sizeof(u32) * fence);
+}
+
+static u64 hws_address(const struct i915_vma *hws,
+		       const struct i915_request *rq)
+{
+	return hws->node.start + seqno_offset(rq->fence.context);
+}
+
+static int emit_recurse_batch(struct spinner *spin,
+			      struct i915_request *rq,
+			      u32 arbitration_command)
+{
+	struct i915_address_space *vm = &rq->ctx->ppgtt->base;
+	struct i915_vma *hws, *vma;
+	u32 *batch;
+	int err;
+
+	vma = i915_vma_instance(spin->obj, vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	hws = i915_vma_instance(spin->hws, vm, NULL);
+	if (IS_ERR(hws))
+		return PTR_ERR(hws);
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		return err;
+
+	err = i915_vma_pin(hws, 0, 0, PIN_USER);
+	if (err)
+		goto unpin_vma;
+
+	i915_vma_move_to_active(vma, rq, 0);
+	if (!i915_gem_object_has_active_reference(vma->obj)) {
+		i915_gem_object_get(vma->obj);
+		i915_gem_object_set_active_reference(vma->obj);
+	}
+
+	i915_vma_move_to_active(hws, rq, 0);
+	if (!i915_gem_object_has_active_reference(hws->obj)) {
+		i915_gem_object_get(hws->obj);
+		i915_gem_object_set_active_reference(hws->obj);
+	}
+
+	batch = spin->batch;
+
+	*batch++ = MI_STORE_DWORD_IMM_GEN4;
+	*batch++ = lower_32_bits(hws_address(hws, rq));
+	*batch++ = upper_32_bits(hws_address(hws, rq));
+	*batch++ = rq->fence.seqno;
+
+	*batch++ = arbitration_command;
+
+	*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
+	*batch++ = lower_32_bits(vma->node.start);
+	*batch++ = upper_32_bits(vma->node.start);
+	*batch++ = MI_BATCH_BUFFER_END; /* not reached */
+
+	i915_gem_chipset_flush(spin->i915);
+
+	err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0);
+
+	i915_vma_unpin(hws);
+unpin_vma:
+	i915_vma_unpin(vma);
+	return err;
+}
+
+static struct i915_request *
+spinner_create_request(struct spinner *spin,
+		       struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine,
+		       u32 arbitration_command)
+{
+	struct i915_request *rq;
+	int err;
+
+	rq = i915_request_alloc(engine, ctx);
+	if (IS_ERR(rq))
+		return rq;
+
+	err = emit_recurse_batch(spin, rq, arbitration_command);
+	if (err) {
+		__i915_request_add(rq, false);
+		return ERR_PTR(err);
+	}
+
+	return rq;
+}
+
+static u32 hws_seqno(const struct spinner *spin, const struct i915_request *rq)
+{
+	u32 *seqno = spin->seqno + seqno_offset(rq->fence.context);
+
+	return READ_ONCE(*seqno);
+}
+
+static void spinner_end(struct spinner *spin)
+{
+	*spin->batch = MI_BATCH_BUFFER_END;
+	i915_gem_chipset_flush(spin->i915);
+}
+
+static void spinner_fini(struct spinner *spin)
+{
+	spinner_end(spin);
+
+	i915_gem_object_unpin_map(spin->obj);
+	i915_gem_object_put(spin->obj);
+
+	i915_gem_object_unpin_map(spin->hws);
+	i915_gem_object_put(spin->hws);
+}
+
+static bool wait_for_spinner(struct spinner *spin, struct i915_request *rq)
+{
+	if (!wait_event_timeout(rq->execute,
+				READ_ONCE(rq->global_seqno),
+				msecs_to_jiffies(10)))
+		return false;
+
+	return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq),
+					       rq->fence.seqno),
+			     10) &&
+		 wait_for(i915_seqno_passed(hws_seqno(spin, rq),
+					    rq->fence.seqno),
+			  1000));
+}
+
+static int live_sanitycheck(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context *ctx;
+	enum intel_engine_id id;
+	struct spinner spin;
+	int err = -ENOMEM;
+
+	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	if (spinner_init(&spin, i915))
+		goto err_unlock;
+
+	ctx = kernel_context(i915);
+	if (!ctx)
+		goto err_spin;
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+
+		rq = spinner_create_request(&spin, ctx, engine, MI_NOOP);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_ctx;
+		}
+
+		i915_request_add(rq);
+		if (!wait_for_spinner(&spin, rq)) {
+			GEM_TRACE("spinner failed to start\n");
+			GEM_TRACE_DUMP();
+			i915_gem_set_wedged(i915);
+			err = -EIO;
+			goto err_ctx;
+		}
+
+		spinner_end(&spin);
+		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+			err = -EIO;
+			goto err_ctx;
+		}
+	}
+
+	err = 0;
+err_ctx:
+	kernel_context_close(ctx);
+err_spin:
+	spinner_fini(&spin);
+err_unlock:
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
+static int live_preempt(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *ctx_hi, *ctx_lo;
+	struct spinner spin_hi, spin_lo;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = -ENOMEM;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	if (spinner_init(&spin_hi, i915))
+		goto err_unlock;
+
+	if (spinner_init(&spin_lo, i915))
+		goto err_spin_hi;
+
+	ctx_hi = kernel_context(i915);
+	if (!ctx_hi)
+		goto err_spin_lo;
+	ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
+
+	ctx_lo = kernel_context(i915);
+	if (!ctx_lo)
+		goto err_ctx_hi;
+	ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+
+		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+					    MI_ARB_CHECK);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_ctx_lo;
+		}
+
+		i915_request_add(rq);
+		if (!wait_for_spinner(&spin_lo, rq)) {
+			GEM_TRACE("lo spinner failed to start\n");
+			GEM_TRACE_DUMP();
+			i915_gem_set_wedged(i915);
+			err = -EIO;
+			goto err_ctx_lo;
+		}
+
+		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
+					    MI_ARB_CHECK);
+		if (IS_ERR(rq)) {
+			spinner_end(&spin_lo);
+			err = PTR_ERR(rq);
+			goto err_ctx_lo;
+		}
+
+		i915_request_add(rq);
+		if (!wait_for_spinner(&spin_hi, rq)) {
+			GEM_TRACE("hi spinner failed to start\n");
+			GEM_TRACE_DUMP();
+			i915_gem_set_wedged(i915);
+			err = -EIO;
+			goto err_ctx_lo;
+		}
+
+		spinner_end(&spin_hi);
+		spinner_end(&spin_lo);
+		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+			err = -EIO;
+			goto err_ctx_lo;
+		}
+	}
+
+	err = 0;
+err_ctx_lo:
+	kernel_context_close(ctx_lo);
+err_ctx_hi:
+	kernel_context_close(ctx_hi);
+err_spin_lo:
+	spinner_fini(&spin_lo);
+err_spin_hi:
+	spinner_fini(&spin_hi);
+err_unlock:
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
+static int live_late_preempt(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *ctx_hi, *ctx_lo;
+	struct spinner spin_hi, spin_lo;
+	struct intel_engine_cs *engine;
+	struct i915_sched_attr attr = {};
+	enum intel_engine_id id;
+	int err = -ENOMEM;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+		return 0;
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	if (spinner_init(&spin_hi, i915))
+		goto err_unlock;
+
+	if (spinner_init(&spin_lo, i915))
+		goto err_spin_hi;
+
+	ctx_hi = kernel_context(i915);
+	if (!ctx_hi)
+		goto err_spin_lo;
+
+	ctx_lo = kernel_context(i915);
+	if (!ctx_lo)
+		goto err_ctx_hi;
+
+	for_each_engine(engine, i915, id) {
+		struct i915_request *rq;
+
+		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+					    MI_ARB_CHECK);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_ctx_lo;
+		}
+
+		i915_request_add(rq);
+		if (!wait_for_spinner(&spin_lo, rq)) {
+			pr_err("First context failed to start\n");
+			goto err_wedged;
+		}
+
+		rq = spinner_create_request(&spin_hi, ctx_hi, engine, MI_NOOP);
+		if (IS_ERR(rq)) {
+			spinner_end(&spin_lo);
+			err = PTR_ERR(rq);
+			goto err_ctx_lo;
+		}
+
+		i915_request_add(rq);
+		if (wait_for_spinner(&spin_hi, rq)) {
+			pr_err("Second context overtook first?\n");
+			goto err_wedged;
+		}
+
+		attr.priority = I915_PRIORITY_MAX;
+		engine->schedule(rq, &attr);
+
+		if (!wait_for_spinner(&spin_hi, rq)) {
+			pr_err("High priority context failed to preempt the low priority context\n");
+			GEM_TRACE_DUMP();
+			goto err_wedged;
+		}
+
+		spinner_end(&spin_hi);
+		spinner_end(&spin_lo);
+		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+			err = -EIO;
+			goto err_ctx_lo;
+		}
+	}
+
+	err = 0;
+err_ctx_lo:
+	kernel_context_close(ctx_lo);
+err_ctx_hi:
+	kernel_context_close(ctx_hi);
+err_spin_lo:
+	spinner_fini(&spin_lo);
+err_spin_hi:
+	spinner_fini(&spin_hi);
+err_unlock:
+	igt_flush_test(i915, I915_WAIT_LOCKED);
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+
+err_wedged:
+	spinner_end(&spin_hi);
+	spinner_end(&spin_lo);
+	i915_gem_set_wedged(i915);
+	err = -EIO;
+	goto err_ctx_lo;
+}
+
+int intel_execlists_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_sanitycheck),
+		SUBTEST(live_preempt),
+		SUBTEST(live_late_preempt),
+	};
+
+	if (!HAS_EXECLISTS(i915))
+		return 0;
+
+	return i915_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
new file mode 100644
index 0000000..17444a3
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
@@ -0,0 +1,291 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "../i915_selftest.h"
+
+#include "mock_context.h"
+
+static struct drm_i915_gem_object *
+read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+	struct drm_i915_gem_object *result;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	const u32 base = engine->mmio_base;
+	u32 srm, *cs;
+	int err;
+	int i;
+
+	result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+	if (IS_ERR(result))
+		return result;
+
+	i915_gem_object_set_cache_level(result, I915_CACHE_LLC);
+
+	cs = i915_gem_object_pin_map(result, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_obj;
+	}
+	memset(cs, 0xc5, PAGE_SIZE);
+	i915_gem_object_unpin_map(result);
+
+	vma = i915_vma_instance(result, &engine->i915->ggtt.base, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+	if (err)
+		goto err_obj;
+
+	rq = i915_request_alloc(engine, ctx);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_pin;
+	}
+
+	srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+	if (INTEL_GEN(ctx->i915) >= 8)
+		srm++;
+
+	cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_req;
+	}
+
+	for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
+		*cs++ = srm;
+		*cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i));
+		*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
+		*cs++ = 0;
+	}
+	intel_ring_advance(rq, cs);
+
+	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	reservation_object_lock(vma->resv, NULL);
+	reservation_object_add_excl_fence(vma->resv, &rq->fence);
+	reservation_object_unlock(vma->resv);
+
+	i915_gem_object_get(result);
+	i915_gem_object_set_active_reference(result);
+
+	__i915_request_add(rq, true);
+	i915_vma_unpin(vma);
+
+	return result;
+
+err_req:
+	i915_request_add(rq);
+err_pin:
+	i915_vma_unpin(vma);
+err_obj:
+	i915_gem_object_put(result);
+	return ERR_PTR(err);
+}
+
+static u32 get_whitelist_reg(const struct whitelist *w, unsigned int i)
+{
+	return i < w->count ? i915_mmio_reg_offset(w->reg[i]) : w->nopid;
+}
+
+static void print_results(const struct whitelist *w, const u32 *results)
+{
+	unsigned int i;
+
+	for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
+		u32 expected = get_whitelist_reg(w, i);
+		u32 actual = results[i];
+
+		pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n",
+			i, expected, actual);
+	}
+}
+
+static int check_whitelist(const struct whitelist *w,
+			   struct i915_gem_context *ctx,
+			   struct intel_engine_cs *engine)
+{
+	struct drm_i915_gem_object *results;
+	u32 *vaddr;
+	int err;
+	int i;
+
+	results = read_nonprivs(ctx, engine);
+	if (IS_ERR(results))
+		return PTR_ERR(results);
+
+	err = i915_gem_object_set_to_cpu_domain(results, false);
+	if (err)
+		goto out_put;
+
+	vaddr = i915_gem_object_pin_map(results, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		err = PTR_ERR(vaddr);
+		goto out_put;
+	}
+
+	for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) {
+		u32 expected = get_whitelist_reg(w, i);
+		u32 actual = vaddr[i];
+
+		if (expected != actual) {
+			print_results(w, vaddr);
+			pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n",
+			       i, expected, actual);
+
+			err = -EINVAL;
+			break;
+		}
+	}
+
+	i915_gem_object_unpin_map(results);
+out_put:
+	i915_gem_object_put(results);
+	return err;
+}
+
+static int do_device_reset(struct intel_engine_cs *engine)
+{
+	i915_reset(engine->i915, ENGINE_MASK(engine->id), NULL);
+	return 0;
+}
+
+static int do_engine_reset(struct intel_engine_cs *engine)
+{
+	return i915_reset_engine(engine, NULL);
+}
+
+static int switch_to_scratch_context(struct intel_engine_cs *engine)
+{
+	struct i915_gem_context *ctx;
+	struct i915_request *rq;
+
+	ctx = kernel_context(engine->i915);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	rq = i915_request_alloc(engine, ctx);
+	kernel_context_close(ctx);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	i915_request_add(rq);
+
+	return 0;
+}
+
+static int check_whitelist_across_reset(struct intel_engine_cs *engine,
+					int (*reset)(struct intel_engine_cs *),
+					const struct whitelist *w,
+					const char *name)
+{
+	struct i915_gem_context *ctx;
+	int err;
+
+	ctx = kernel_context(engine->i915);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	err = check_whitelist(w, ctx, engine);
+	if (err) {
+		pr_err("Invalid whitelist *before* %s reset!\n", name);
+		goto out;
+	}
+
+	err = switch_to_scratch_context(engine);
+	if (err)
+		goto out;
+
+	err = reset(engine);
+	if (err) {
+		pr_err("%s reset failed\n", name);
+		goto out;
+	}
+
+	err = check_whitelist(w, ctx, engine);
+	if (err) {
+		pr_err("Whitelist not preserved in context across %s reset!\n",
+		       name);
+		goto out;
+	}
+
+	kernel_context_close(ctx);
+
+	ctx = kernel_context(engine->i915);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	err = check_whitelist(w, ctx, engine);
+	if (err) {
+		pr_err("Invalid whitelist *after* %s reset in fresh context!\n",
+		       name);
+		goto out;
+	}
+
+out:
+	kernel_context_close(ctx);
+	return err;
+}
+
+static int live_reset_whitelist(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine = i915->engine[RCS];
+	struct i915_gpu_error *error = &i915->gpu_error;
+	struct whitelist w;
+	int err = 0;
+
+	/* If we reset the gpu, we should not lose the RING_NONPRIV */
+
+	if (!engine)
+		return 0;
+
+	if (!whitelist_build(engine, &w))
+		return 0;
+
+	pr_info("Checking %d whitelisted registers (RING_NONPRIV)\n", w.count);
+
+	set_bit(I915_RESET_BACKOFF, &error->flags);
+	set_bit(I915_RESET_ENGINE + engine->id, &error->flags);
+
+	if (intel_has_reset_engine(i915)) {
+		err = check_whitelist_across_reset(engine,
+						   do_engine_reset, &w,
+						   "engine");
+		if (err)
+			goto out;
+	}
+
+	if (intel_has_gpu_reset(i915)) {
+		err = check_whitelist_across_reset(engine,
+						   do_device_reset, &w,
+						   "device");
+		if (err)
+			goto out;
+	}
+
+out:
+	clear_bit(I915_RESET_ENGINE + engine->id, &error->flags);
+	clear_bit(I915_RESET_BACKOFF, &error->flags);
+	return err;
+}
+
+int intel_workarounds_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_reset_whitelist),
+	};
+	int err;
+
+	mutex_lock(&i915->drm.struct_mutex);
+	err = i915_subtests(tests, i915);
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 78a89ef..26bf29d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -25,6 +25,11 @@
 #include "mock_engine.h"
 #include "mock_request.h"
 
+struct mock_ring {
+	struct intel_ring base;
+	struct i915_timeline timeline;
+};
+
 static struct mock_request *first_request(struct mock_engine *engine)
 {
 	return list_first_entry_or_null(&engine->hw_queue,
@@ -71,14 +76,21 @@
 mock_context_pin(struct intel_engine_cs *engine,
 		 struct i915_gem_context *ctx)
 {
-	i915_gem_context_get(ctx);
+	struct intel_context *ce = to_intel_context(ctx, engine);
+
+	if (!ce->pin_count++)
+		i915_gem_context_get(ctx);
+
 	return engine->buffer;
 }
 
 static void mock_context_unpin(struct intel_engine_cs *engine,
 			       struct i915_gem_context *ctx)
 {
-	i915_gem_context_put(ctx);
+	struct intel_context *ce = to_intel_context(ctx, engine);
+
+	if (!--ce->pin_count)
+		i915_gem_context_put(ctx);
 }
 
 static int mock_request_alloc(struct i915_request *request)
@@ -125,7 +137,7 @@
 static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 {
 	const unsigned long sz = PAGE_SIZE / 2;
-	struct intel_ring *ring;
+	struct mock_ring *ring;
 
 	BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz);
 
@@ -133,14 +145,25 @@
 	if (!ring)
 		return NULL;
 
-	ring->size = sz;
-	ring->effective_size = sz;
-	ring->vaddr = (void *)(ring + 1);
+	i915_timeline_init(engine->i915, &ring->timeline, engine->name);
 
-	INIT_LIST_HEAD(&ring->request_list);
-	intel_ring_update_space(ring);
+	ring->base.size = sz;
+	ring->base.effective_size = sz;
+	ring->base.vaddr = (void *)(ring + 1);
+	ring->base.timeline = &ring->timeline;
 
-	return ring;
+	INIT_LIST_HEAD(&ring->base.request_list);
+	intel_ring_update_space(&ring->base);
+
+	return &ring->base;
+}
+
+static void mock_ring_free(struct intel_ring *base)
+{
+	struct mock_ring *ring = container_of(base, typeof(*ring), base);
+
+	i915_timeline_fini(&ring->timeline);
+	kfree(ring);
 }
 
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
@@ -155,12 +178,6 @@
 	if (!engine)
 		return NULL;
 
-	engine->base.buffer = mock_ring(&engine->base);
-	if (!engine->base.buffer) {
-		kfree(engine);
-		return NULL;
-	}
-
 	/* minimal engine setup for requests */
 	engine->base.i915 = i915;
 	snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
@@ -174,9 +191,7 @@
 	engine->base.emit_breadcrumb = mock_emit_breadcrumb;
 	engine->base.submit_request = mock_submit_request;
 
-	engine->base.timeline =
-		&i915->gt.global_timeline.engine[engine->base.id];
-
+	i915_timeline_init(i915, &engine->base.timeline, engine->base.name);
 	intel_engine_init_breadcrumbs(&engine->base);
 	engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */
 
@@ -185,7 +200,17 @@
 	timer_setup(&engine->hw_delay, hw_delay_complete, 0);
 	INIT_LIST_HEAD(&engine->hw_queue);
 
+	engine->base.buffer = mock_ring(&engine->base);
+	if (!engine->base.buffer)
+		goto err_breadcrumbs;
+
 	return &engine->base;
+
+err_breadcrumbs:
+	intel_engine_fini_breadcrumbs(&engine->base);
+	i915_timeline_fini(&engine->base.timeline);
+	kfree(engine);
+	return NULL;
 }
 
 void mock_engine_flush(struct intel_engine_cs *engine)
@@ -217,10 +242,12 @@
 	GEM_BUG_ON(timer_pending(&mock->hw_delay));
 
 	if (engine->last_retired_context)
-		engine->context_unpin(engine, engine->last_retired_context);
+		intel_context_unpin(engine->last_retired_context, engine);
+
+	mock_ring_free(engine->buffer);
 
 	intel_engine_fini_breadcrumbs(engine);
+	i915_timeline_fini(&engine->timeline);
 
-	kfree(engine->buffer);
 	kfree(engine);
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index e6d4b88..94baedf 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -44,6 +44,7 @@
 		mock_engine_flush(engine);
 
 	i915_retire_requests(i915);
+	GEM_BUG_ON(i915->gt.active_requests);
 }
 
 static void mock_device_release(struct drm_device *dev)
@@ -72,8 +73,8 @@
 
 	mutex_lock(&i915->drm.struct_mutex);
 	mock_fini_ggtt(i915);
-	i915_gem_timeline_fini(&i915->gt.global_timeline);
 	mutex_unlock(&i915->drm.struct_mutex);
+	WARN_ON(!list_empty(&i915->gt.timelines));
 
 	destroy_workqueue(i915->wq);
 
@@ -223,26 +224,25 @@
 	if (!i915->priorities)
 		goto err_dependencies;
 
-	mutex_lock(&i915->drm.struct_mutex);
 	INIT_LIST_HEAD(&i915->gt.timelines);
-	err = i915_gem_timeline_init__global(i915);
-	if (err) {
-		mutex_unlock(&i915->drm.struct_mutex);
-		goto err_priorities;
-	}
+	INIT_LIST_HEAD(&i915->gt.active_rings);
+	INIT_LIST_HEAD(&i915->gt.closed_vma);
+
+	mutex_lock(&i915->drm.struct_mutex);
 
 	mock_init_ggtt(i915);
-	mutex_unlock(&i915->drm.struct_mutex);
 
 	mkwrite_device_info(i915)->ring_mask = BIT(0);
 	i915->engine[RCS] = mock_engine(i915, "mock", RCS);
 	if (!i915->engine[RCS])
-		goto err_priorities;
+		goto err_unlock;
 
 	i915->kernel_context = mock_context(i915, NULL);
 	if (!i915->kernel_context)
 		goto err_engine;
 
+	mutex_unlock(&i915->drm.struct_mutex);
+
 	WARN_ON(i915_gemfs_init(i915));
 
 	return i915;
@@ -250,7 +250,8 @@
 err_engine:
 	for_each_engine(engine, i915, id)
 		mock_engine_free(engine);
-err_priorities:
+err_unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
 	kmem_cache_destroy(i915->priorities);
 err_dependencies:
 	kmem_cache_destroy(i915->dependencies);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index e96873f..36c1120 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -76,7 +76,6 @@
 
 	INIT_LIST_HEAD(&ppgtt->base.global_link);
 	drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total);
-	i915_gem_timeline_init(i915, &ppgtt->base.timeline, name);
 
 	ppgtt->base.clear_range = nop_clear_range;
 	ppgtt->base.insert_page = mock_insert_page;
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c
index 47b1f47..dcf3b16 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
@@ -1,45 +1,28 @@
 /*
- * Copyright © 2017 Intel Corporation
+ * SPDX-License-Identifier: MIT
  *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
+ * Copyright © 2017-2018 Intel Corporation
  */
 
+#include "../i915_timeline.h"
+
 #include "mock_timeline.h"
 
-struct intel_timeline *mock_timeline(u64 context)
+void mock_timeline_init(struct i915_timeline *timeline, u64 context)
 {
-	static struct lock_class_key class;
-	struct intel_timeline *tl;
+	timeline->fence_context = context;
 
-	tl = kzalloc(sizeof(*tl), GFP_KERNEL);
-	if (!tl)
-		return NULL;
+	spin_lock_init(&timeline->lock);
 
-	__intel_timeline_init(tl, NULL, context, &class, "mock");
+	init_request_active(&timeline->last_request, NULL);
+	INIT_LIST_HEAD(&timeline->requests);
 
-	return tl;
+	i915_syncmap_init(&timeline->sync);
+
+	INIT_LIST_HEAD(&timeline->link);
 }
 
-void mock_timeline_destroy(struct intel_timeline *tl)
+void mock_timeline_fini(struct i915_timeline *timeline)
 {
-	__intel_timeline_fini(tl);
-	kfree(tl);
+	i915_timeline_fini(timeline);
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h
index c27ff46..b6deaa6 100644
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.h
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h
@@ -1,33 +1,15 @@
 /*
- * Copyright © 2017 Intel Corporation
+ * SPDX-License-Identifier: MIT
  *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
+ * Copyright © 2017-2018 Intel Corporation
  */
 
 #ifndef __MOCK_TIMELINE__
 #define __MOCK_TIMELINE__
 
-#include "../i915_gem_timeline.h"
+struct i915_timeline;
 
-struct intel_timeline *mock_timeline(u64 context);
-void mock_timeline_destroy(struct intel_timeline *tl);
+void mock_timeline_init(struct i915_timeline *timeline, u64 context);
+void mock_timeline_fini(struct i915_timeline *timeline);
 
 #endif /* !__MOCK_TIMELINE__ */
diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig
index 294de45..119ec0a 100644
--- a/drivers/gpu/drm/mediatek/Kconfig
+++ b/drivers/gpu/drm/mediatek/Kconfig
@@ -11,6 +11,7 @@
 	select DRM_PANEL
 	select MEMORY
 	select MTK_SMI
+	select VIDEOMODE_HELPERS
 	help
 	  Choose this option if you have a Mediatek SoCs.
 	  The module will be called mediatek-drm
diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
index e80a603..6c0ea39 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/types.h>
 #include <linux/clk.h>
+#include <video/videomode.h>
 
 #include "mtk_dpi_regs.h"
 #include "mtk_drm_ddp_comp.h"
@@ -429,34 +430,35 @@
 	struct mtk_dpi_sync_param vsync_leven = { 0 };
 	struct mtk_dpi_sync_param vsync_rodd = { 0 };
 	struct mtk_dpi_sync_param vsync_reven = { 0 };
-	unsigned long pix_rate;
+	struct videomode vm = { 0 };
 	unsigned long pll_rate;
 	unsigned int factor;
 
 	/* let pll_rate can fix the valid range of tvdpll (1G~2GHz) */
-	pix_rate = 1000UL * mode->clock;
+
 	if (mode->clock <= 27000)
-		factor = 16 * 3;
+		factor = 3 << 4;
 	else if (mode->clock <= 84000)
-		factor = 8 * 3;
+		factor = 3 << 3;
 	else if (mode->clock <= 167000)
-		factor = 4 * 3;
+		factor = 3 << 2;
 	else
-		factor = 2 * 3;
-	pll_rate = pix_rate * factor;
+		factor = 3 << 1;
+	drm_display_mode_to_videomode(mode, &vm);
+	pll_rate = vm.pixelclock * factor;
 
 	dev_dbg(dpi->dev, "Want PLL %lu Hz, pixel clock %lu Hz\n",
-		pll_rate, pix_rate);
+		pll_rate, vm.pixelclock);
 
 	clk_set_rate(dpi->tvd_clk, pll_rate);
 	pll_rate = clk_get_rate(dpi->tvd_clk);
 
-	pix_rate = pll_rate / factor;
-	clk_set_rate(dpi->pixel_clk, pix_rate);
-	pix_rate = clk_get_rate(dpi->pixel_clk);
+	vm.pixelclock = pll_rate / factor;
+	clk_set_rate(dpi->pixel_clk, vm.pixelclock);
+	vm.pixelclock = clk_get_rate(dpi->pixel_clk);
 
 	dev_dbg(dpi->dev, "Got  PLL %lu Hz, pixel clock %lu Hz\n",
-		pll_rate, pix_rate);
+		pll_rate, vm.pixelclock);
 
 	limit.c_bottom = 0x0010;
 	limit.c_top = 0x0FE0;
@@ -465,33 +467,31 @@
 
 	dpi_pol.ck_pol = MTK_DPI_POLARITY_FALLING;
 	dpi_pol.de_pol = MTK_DPI_POLARITY_RISING;
-	dpi_pol.hsync_pol = mode->flags & DRM_MODE_FLAG_PHSYNC ?
+	dpi_pol.hsync_pol = vm.flags & DISPLAY_FLAGS_HSYNC_HIGH ?
 			    MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING;
-	dpi_pol.vsync_pol = mode->flags & DRM_MODE_FLAG_PVSYNC ?
+	dpi_pol.vsync_pol = vm.flags & DISPLAY_FLAGS_VSYNC_HIGH ?
 			    MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING;
-
-	hsync.sync_width = mode->hsync_end - mode->hsync_start;
-	hsync.back_porch = mode->htotal - mode->hsync_end;
-	hsync.front_porch = mode->hsync_start - mode->hdisplay;
+	hsync.sync_width = vm.hsync_len;
+	hsync.back_porch = vm.hback_porch;
+	hsync.front_porch = vm.hfront_porch;
 	hsync.shift_half_line = false;
-
-	vsync_lodd.sync_width = mode->vsync_end - mode->vsync_start;
-	vsync_lodd.back_porch = mode->vtotal - mode->vsync_end;
-	vsync_lodd.front_porch = mode->vsync_start - mode->vdisplay;
+	vsync_lodd.sync_width = vm.vsync_len;
+	vsync_lodd.back_porch = vm.vback_porch;
+	vsync_lodd.front_porch = vm.vfront_porch;
 	vsync_lodd.shift_half_line = false;
 
-	if (mode->flags & DRM_MODE_FLAG_INTERLACE &&
+	if (vm.flags & DISPLAY_FLAGS_INTERLACED &&
 	    mode->flags & DRM_MODE_FLAG_3D_MASK) {
 		vsync_leven = vsync_lodd;
 		vsync_rodd = vsync_lodd;
 		vsync_reven = vsync_lodd;
 		vsync_leven.shift_half_line = true;
 		vsync_reven.shift_half_line = true;
-	} else if (mode->flags & DRM_MODE_FLAG_INTERLACE &&
+	} else if (vm.flags & DISPLAY_FLAGS_INTERLACED &&
 		   !(mode->flags & DRM_MODE_FLAG_3D_MASK)) {
 		vsync_leven = vsync_lodd;
 		vsync_leven.shift_half_line = true;
-	} else if (!(mode->flags & DRM_MODE_FLAG_INTERLACE) &&
+	} else if (!(vm.flags & DISPLAY_FLAGS_INTERLACED) &&
 		   mode->flags & DRM_MODE_FLAG_3D_MASK) {
 		vsync_rodd = vsync_lodd;
 	}
@@ -505,12 +505,12 @@
 	mtk_dpi_config_vsync_reven(dpi, &vsync_reven);
 
 	mtk_dpi_config_3d(dpi, !!(mode->flags & DRM_MODE_FLAG_3D_MASK));
-	mtk_dpi_config_interface(dpi, !!(mode->flags &
-					 DRM_MODE_FLAG_INTERLACE));
-	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-		mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay / 2);
+	mtk_dpi_config_interface(dpi, !!(vm.flags &
+					 DISPLAY_FLAGS_INTERLACED));
+	if (vm.flags & DISPLAY_FLAGS_INTERLACED)
+		mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive >> 1);
 	else
-		mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay);
+		mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive);
 
 	mtk_dpi_config_channel_limit(dpi, &limit);
 	mtk_dpi_config_bit_num(dpi, dpi->bit_num);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
index f595ac8..259b7b0 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
@@ -220,7 +220,7 @@
 	mtk_gem = mtk_drm_gem_init(dev, attach->dmabuf->size);
 
 	if (IS_ERR(mtk_gem))
-		return ERR_PTR(PTR_ERR(mtk_gem));
+		return ERR_CAST(mtk_gem);
 
 	expected = sg_dma_address(sg->sgl);
 	for_each_sg(sg->sgl, s, sg->nents, i) {
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 7e5e24c..aa0943e 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -551,13 +551,12 @@
 	}
 
 	/**
-	 * vm.pixelclock is in kHz, pixel_clock unit is Hz, so multiply by 1000
 	 * htotal_time = htotal * byte_per_pixel / num_lanes
 	 * overhead_time = lpx + hs_prepare + hs_zero + hs_trail + hs_exit
 	 * mipi_ratio = (htotal_time + overhead_time) / htotal_time
 	 * data_rate = pixel_clock * bit_per_pixel * mipi_ratio / num_lanes;
 	 */
-	pixel_clock = dsi->vm.pixelclock * 1000;
+	pixel_clock = dsi->vm.pixelclock;
 	htotal = dsi->vm.hactive + dsi->vm.hback_porch + dsi->vm.hfront_porch +
 			dsi->vm.hsync_len;
 	htotal_bits = htotal * bit_per_pixel;
@@ -725,16 +724,7 @@
 {
 	struct mtk_dsi *dsi = encoder_to_dsi(encoder);
 
-	dsi->vm.pixelclock = adjusted->clock;
-	dsi->vm.hactive = adjusted->hdisplay;
-	dsi->vm.hback_porch = adjusted->htotal - adjusted->hsync_end;
-	dsi->vm.hfront_porch = adjusted->hsync_start - adjusted->hdisplay;
-	dsi->vm.hsync_len = adjusted->hsync_end - adjusted->hsync_start;
-
-	dsi->vm.vactive = adjusted->vdisplay;
-	dsi->vm.vback_porch = adjusted->vtotal - adjusted->vsync_end;
-	dsi->vm.vfront_porch = adjusted->vsync_start - adjusted->vdisplay;
-	dsi->vm.vsync_len = adjusted->vsync_end - adjusted->vsync_start;
+	drm_display_mode_to_videomode(adjusted, &dsi->vm);
 }
 
 static void mtk_dsi_encoder_disable(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index fb50a9d..8918539 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -1586,7 +1586,7 @@
 
 #define MODE_BANDWIDTH	MODE_BAD
 
-static int mga_vga_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status mga_vga_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 8e0cb16..0ae5ace 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -168,7 +168,6 @@
 	if (gpu->funcs->debugfs_init) {
 		gpu->funcs->debugfs_init(gpu, dev->primary);
 		gpu->funcs->debugfs_init(gpu, dev->render);
-		gpu->funcs->debugfs_init(gpu, dev->control);
 	}
 #endif
 
diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
index ba74cb4..1ff3fda 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -140,9 +140,6 @@
 	if (ret)
 		return ret;
 	ret = late_init_minor(dev->render);
-	if (ret)
-		return ret;
-	ret = late_init_minor(dev->control);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
index 5cae8db..ffe5137 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
@@ -99,7 +99,8 @@
 };
 
 static void mxsfb_pipe_enable(struct drm_simple_display_pipe *pipe,
-			      struct drm_crtc_state *crtc_state)
+			      struct drm_crtc_state *crtc_state,
+			      struct drm_plane_state *plane_state)
 {
 	struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe);
 
@@ -125,12 +126,6 @@
 	mxsfb_plane_atomic_update(mxsfb, plane_state);
 }
 
-static int mxsfb_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
-				 struct drm_plane_state *plane_state)
-{
-	return drm_gem_fb_prepare_fb(&pipe->plane, plane_state);
-}
-
 static int mxsfb_pipe_enable_vblank(struct drm_simple_display_pipe *pipe)
 {
 	struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe);
@@ -159,7 +154,7 @@
 	.enable		= mxsfb_pipe_enable,
 	.disable	= mxsfb_pipe_disable,
 	.update		= mxsfb_pipe_update,
-	.prepare_fb	= mxsfb_pipe_prepare_fb,
+	.prepare_fb	= drm_gem_fb_simple_display_pipe_prepare_fb,
 	.enable_vblank	= mxsfb_pipe_enable_vblank,
 	.disable_vblank	= mxsfb_pipe_disable_vblank,
 };
diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index 9c0c650..b17843d 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -38,14 +38,16 @@
 
 # DRM - modesetting
 nouveau-$(CONFIG_DRM_NOUVEAU_BACKLIGHT) += nouveau_backlight.o
+nouveau-y += nouveau_bios.o
 nouveau-y += nouveau_connector.o
 nouveau-y += nouveau_display.o
-nouveau-y += nv50_display.o
 nouveau-y += nouveau_dp.o
 nouveau-y += nouveau_fbcon.o
 nouveau-y += nv04_fbcon.o
 nouveau-y += nv50_fbcon.o
 nouveau-y += nvc0_fbcon.o
+include $(src)/dispnv04/Kbuild
+include $(src)/dispnv50/Kbuild
 
 # DRM - command submission
 nouveau-y += nouveau_abi16.o
@@ -59,8 +61,4 @@
 nouveau-y += nv84_fence.o
 nouveau-y += nvc0_fence.o
 
-# DRM - prehistoric modesetting (NV04-G7x)
-nouveau-y += nouveau_bios.o
-include $(src)/dispnv04/Kbuild
-
 obj-$(CONFIG_DRM_NOUVEAU) += nouveau.o
diff --git a/drivers/gpu/drm/nouveau/dispnv50/Kbuild b/drivers/gpu/drm/nouveau/dispnv50/Kbuild
new file mode 100644
index 0000000..849b0f4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/Kbuild
@@ -0,0 +1,51 @@
+nouveau-y += dispnv50/disp.o
+nouveau-y += dispnv50/lut.o
+
+nouveau-y += dispnv50/core.o
+nouveau-y += dispnv50/core507d.o
+nouveau-y += dispnv50/core827d.o
+nouveau-y += dispnv50/core907d.o
+nouveau-y += dispnv50/core917d.o
+nouveau-y += dispnv50/corec37d.o
+
+nouveau-y += dispnv50/dac507d.o
+nouveau-y += dispnv50/dac907d.o
+
+nouveau-y += dispnv50/pior507d.o
+
+nouveau-y += dispnv50/sor507d.o
+nouveau-y += dispnv50/sor907d.o
+nouveau-y += dispnv50/sorc37d.o
+
+nouveau-y += dispnv50/head.o
+nouveau-y += dispnv50/head507d.o
+nouveau-y += dispnv50/head827d.o
+nouveau-y += dispnv50/head907d.o
+nouveau-y += dispnv50/head917d.o
+nouveau-y += dispnv50/headc37d.o
+
+nouveau-y += dispnv50/wimm.o
+nouveau-y += dispnv50/wimmc37b.o
+
+nouveau-y += dispnv50/wndw.o
+nouveau-y += dispnv50/wndwc37e.o
+
+nouveau-y += dispnv50/base.o
+nouveau-y += dispnv50/base507c.o
+nouveau-y += dispnv50/base827c.o
+nouveau-y += dispnv50/base907c.o
+nouveau-y += dispnv50/base917c.o
+
+nouveau-y += dispnv50/curs.o
+nouveau-y += dispnv50/curs507a.o
+nouveau-y += dispnv50/curs907a.o
+nouveau-y += dispnv50/cursc37a.o
+
+nouveau-y += dispnv50/oimm.o
+nouveau-y += dispnv50/oimm507b.o
+
+nouveau-y += dispnv50/ovly.o
+nouveau-y += dispnv50/ovly507e.o
+nouveau-y += dispnv50/ovly827e.o
+nouveau-y += dispnv50/ovly907e.o
+nouveau-y += dispnv50/ovly917e.o
diff --git a/drivers/gpu/drm/nouveau/dispnv50/atom.h b/drivers/gpu/drm/nouveau/dispnv50/atom.h
new file mode 100644
index 0000000..908feb1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/atom.h
@@ -0,0 +1,222 @@
+#ifndef __NV50_KMS_ATOM_H__
+#define __NV50_KMS_ATOM_H__
+#define nv50_atom(p) container_of((p), struct nv50_atom, state)
+#include <drm/drm_atomic.h>
+
+struct nv50_atom {
+	struct drm_atomic_state state;
+
+	struct list_head outp;
+	bool lock_core;
+	bool flush_disable;
+};
+
+#define nv50_head_atom(p) container_of((p), struct nv50_head_atom, state)
+
+struct nv50_head_atom {
+	struct drm_crtc_state state;
+
+	struct {
+		u32 mask;
+		u32 olut;
+	} wndw;
+
+	struct {
+		u16 iW;
+		u16 iH;
+		u16 oW;
+		u16 oH;
+	} view;
+
+	struct nv50_head_mode {
+		bool interlace;
+		u32 clock;
+		struct {
+			u16 active;
+			u16 synce;
+			u16 blanke;
+			u16 blanks;
+		} h;
+		struct {
+			u32 active;
+			u16 synce;
+			u16 blanke;
+			u16 blanks;
+			u16 blank2s;
+			u16 blank2e;
+			u16 blankus;
+		} v;
+	} mode;
+
+	struct {
+		bool visible;
+		u32 handle;
+		u64 offset:40;
+		u8 buffer:1;
+		u8 mode:4;
+		u8 size:2;
+		u8 range:2;
+		u8 output_mode:2;
+	} olut;
+
+	struct {
+		bool visible;
+		u32 handle;
+		u64 offset:40;
+		u8  format;
+		u8  kind:7;
+		u8  layout:1;
+		u8  blockh:4;
+		u16 blocks:12;
+		u32 pitch:20;
+		u16 x;
+		u16 y;
+		u16 w;
+		u16 h;
+	} core;
+
+	struct {
+		bool visible;
+		u32 handle;
+		u64 offset:40;
+		u8  layout:2;
+		u8  format:8;
+	} curs;
+
+	struct {
+		u8  depth;
+		u8  cpp;
+		u16 x;
+		u16 y;
+		u16 w;
+		u16 h;
+	} base;
+
+	struct {
+		u8 cpp;
+	} ovly;
+
+	struct {
+		bool enable:1;
+		u8 bits:2;
+		u8 mode:4;
+	} dither;
+
+	struct {
+		struct {
+			u16 cos:12;
+			u16 sin:12;
+		} sat;
+	} procamp;
+
+	struct {
+		u8 nhsync:1;
+		u8 nvsync:1;
+		u8 depth:4;
+	} or;
+
+	union nv50_head_atom_mask {
+		struct {
+			bool olut:1;
+			bool core:1;
+			bool curs:1;
+			bool view:1;
+			bool mode:1;
+			bool base:1;
+			bool ovly:1;
+			bool dither:1;
+			bool procamp:1;
+			bool or:1;
+		};
+		u16 mask;
+	} set, clr;
+};
+
+static inline struct nv50_head_atom *
+nv50_head_atom_get(struct drm_atomic_state *state, struct drm_crtc *crtc)
+{
+	struct drm_crtc_state *statec = drm_atomic_get_crtc_state(state, crtc);
+	if (IS_ERR(statec))
+		return (void *)statec;
+	return nv50_head_atom(statec);
+}
+
+#define nv50_wndw_atom(p) container_of((p), struct nv50_wndw_atom, state)
+
+struct nv50_wndw_atom {
+	struct drm_plane_state state;
+
+	struct drm_property_blob *ilut;
+	bool visible;
+
+	struct {
+		u32  handle;
+		u16  offset:12;
+		bool awaken:1;
+	} ntfy;
+
+	struct {
+		u32 handle;
+		u16 offset:12;
+		u32 acquire;
+		u32 release;
+	} sema;
+
+	struct {
+		u32 handle;
+		struct {
+			u64 offset:40;
+			u8  buffer:1;
+			u8  enable:2;
+			u8  mode:4;
+			u8  size:2;
+			u8  range:2;
+			u8  output_mode:2;
+		} i;
+	} xlut;
+
+	struct {
+		u8  mode:2;
+		u8  interval:4;
+
+		u8  colorspace:2;
+		u8  format;
+		u8  kind:7;
+		u8  layout:1;
+		u8  blockh:4;
+		u16 blocks[3];
+		u32 pitch[3];
+		u16 w;
+		u16 h;
+
+		u32 handle[6];
+		u64 offset[6];
+	} image;
+
+	struct {
+		u16 sx;
+		u16 sy;
+		u16 sw;
+		u16 sh;
+		u16 dw;
+		u16 dh;
+	} scale;
+
+	struct {
+		u16 x;
+		u16 y;
+	} point;
+
+	union nv50_wndw_atom_mask {
+		struct {
+			bool ntfy:1;
+			bool sema:1;
+			bool xlut:1;
+			bool image:1;
+			bool scale:1;
+			bool point:1;
+		};
+		u8 mask;
+	} set, clr;
+};
+#endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/base.c b/drivers/gpu/drm/nouveau/dispnv50/base.c
new file mode 100644
index 0000000..7c752ac
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/base.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "base.h"
+
+#include <nvif/class.h>
+
+int
+nv50_base_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw)
+{
+	struct {
+		s32 oclass;
+		int version;
+		int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+	} bases[] = {
+		{ GK110_DISP_BASE_CHANNEL_DMA, 0, base917c_new },
+		{ GK104_DISP_BASE_CHANNEL_DMA, 0, base917c_new },
+		{ GF110_DISP_BASE_CHANNEL_DMA, 0, base907c_new },
+		{ GT214_DISP_BASE_CHANNEL_DMA, 0, base827c_new },
+		{ GT200_DISP_BASE_CHANNEL_DMA, 0, base827c_new },
+		{   G82_DISP_BASE_CHANNEL_DMA, 0, base827c_new },
+		{  NV50_DISP_BASE_CHANNEL_DMA, 0, base507c_new },
+		{}
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	int cid;
+
+	cid = nvif_mclass(&disp->disp->object, bases);
+	if (cid < 0) {
+		NV_ERROR(drm, "No supported base class\n");
+		return cid;
+	}
+
+	return bases[cid].new(drm, head, bases[cid].oclass, pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/base.h b/drivers/gpu/drm/nouveau/dispnv50/base.h
new file mode 100644
index 0000000..e7f14f2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/base.h
@@ -0,0 +1,31 @@
+#ifndef __NV50_KMS_BASE_H__
+#define __NV50_KMS_BASE_H__
+#include "wndw.h"
+
+int base507c_new(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+int base507c_new_(const struct nv50_wndw_func *, const u32 *format,
+		  struct nouveau_drm *, int head, s32 oclass,
+		  u32 interlock_data, struct nv50_wndw **);
+extern const u32 base507c_format[];
+int base507c_acquire(struct nv50_wndw *, struct nv50_wndw_atom *,
+		     struct nv50_head_atom *);
+void base507c_release(struct nv50_wndw *, struct nv50_wndw_atom *,
+		      struct nv50_head_atom *);
+void base507c_sema_set(struct nv50_wndw *, struct nv50_wndw_atom *);
+void base507c_sema_clr(struct nv50_wndw *);
+void base507c_ntfy_set(struct nv50_wndw *, struct nv50_wndw_atom *);
+void base507c_ntfy_clr(struct nv50_wndw *);
+void base507c_xlut_set(struct nv50_wndw *, struct nv50_wndw_atom *);
+void base507c_xlut_clr(struct nv50_wndw *);
+void base507c_image_clr(struct nv50_wndw *);
+void base507c_update(struct nv50_wndw *, u32 *);
+
+int base827c_new(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+
+int base907c_new(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+extern const struct nv50_wndw_func base907c;
+
+int base917c_new(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+
+int nv50_base_new(struct nouveau_drm *, int head, struct nv50_wndw **);
+#endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/base507c.c b/drivers/gpu/drm/nouveau/dispnv50/base507c.c
new file mode 100644
index 0000000..d5e295c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/base507c.c
@@ -0,0 +1,286 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "base.h"
+
+#include <nvif/cl507c.h>
+#include <nvif/event.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_plane_helper.h>
+#include "nouveau_bo.h"
+
+void
+base507c_update(struct nv50_wndw *wndw, u32 *interlock)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 2))) {
+		evo_mthd(push, 0x0080, 1);
+		evo_data(push, interlock[NV50_DISP_INTERLOCK_CORE]);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+void
+base507c_image_clr(struct nv50_wndw *wndw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 4))) {
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x00c0, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+base507c_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 10))) {
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, asyw->image.mode << 8 |
+			       asyw->image.interval << 4);
+		evo_mthd(push, 0x00c0, 1);
+		evo_data(push, asyw->image.handle[0]);
+		evo_mthd(push, 0x0800, 5);
+		evo_data(push, asyw->image.offset[0] >> 8);
+		evo_data(push, 0x00000000);
+		evo_data(push, asyw->image.h << 16 | asyw->image.w);
+		evo_data(push, asyw->image.layout << 20 |
+			       (asyw->image.pitch[0] >> 8) << 8 |
+			       asyw->image.blocks[0] << 8 |
+			       asyw->image.blockh);
+		evo_data(push, asyw->image.kind << 16 |
+			       asyw->image.format << 8);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+void
+base507c_xlut_clr(struct nv50_wndw *wndw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 2))) {
+		evo_mthd(push, 0x00e0, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+void
+base507c_xlut_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 2))) {
+		evo_mthd(push, 0x00e0, 1);
+		evo_data(push, 0x40000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+int
+base507c_ntfy_wait_begun(struct nouveau_bo *bo, u32 offset,
+			 struct nvif_device *device)
+{
+	s64 time = nvif_msec(device, 2000ULL,
+		u32 data = nouveau_bo_rd32(bo, offset / 4);
+		if ((data & 0xc0000000) == 0x40000000)
+			break;
+		usleep_range(1, 2);
+	);
+	return time < 0 ? time : 0;
+}
+
+void
+base507c_ntfy_clr(struct nv50_wndw *wndw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 2))) {
+		evo_mthd(push, 0x00a4, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+void
+base507c_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 3))) {
+		evo_mthd(push, 0x00a0, 2);
+		evo_data(push, asyw->ntfy.awaken << 30 | asyw->ntfy.offset);
+		evo_data(push, asyw->ntfy.handle);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+void
+base507c_ntfy_reset(struct nouveau_bo *bo, u32 offset)
+{
+	nouveau_bo_wr32(bo, offset / 4, 0x00000000);
+}
+
+void
+base507c_sema_clr(struct nv50_wndw *wndw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 2))) {
+		evo_mthd(push, 0x0094, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+void
+base507c_sema_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 5))) {
+		evo_mthd(push, 0x0088, 4);
+		evo_data(push, asyw->sema.offset);
+		evo_data(push, asyw->sema.acquire);
+		evo_data(push, asyw->sema.release);
+		evo_data(push, asyw->sema.handle);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+void
+base507c_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
+		 struct nv50_head_atom *asyh)
+{
+	asyh->base.cpp = 0;
+}
+
+int
+base507c_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
+		 struct nv50_head_atom *asyh)
+{
+	const struct drm_framebuffer *fb = asyw->state.fb;
+	int ret;
+
+	if (!fb->format->depth)
+		return -EINVAL;
+
+	ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  false, true);
+	if (ret)
+		return ret;
+
+	if (!wndw->func->ilut) {
+		if ((asyh->base.cpp != 1) ^ (fb->format->cpp[0] != 1))
+			asyh->state.color_mgmt_changed = true;
+	}
+
+	asyh->base.depth = fb->format->depth;
+	asyh->base.cpp = fb->format->cpp[0];
+	asyh->base.x = asyw->state.src.x1 >> 16;
+	asyh->base.y = asyw->state.src.y1 >> 16;
+	asyh->base.w = asyw->state.fb->width;
+	asyh->base.h = asyw->state.fb->height;
+	return 0;
+}
+
+const u32
+base507c_format[] = {
+	DRM_FORMAT_C8,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_ABGR2101010,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ABGR8888,
+	0
+};
+
+static const struct nv50_wndw_func
+base507c = {
+	.acquire = base507c_acquire,
+	.release = base507c_release,
+	.sema_set = base507c_sema_set,
+	.sema_clr = base507c_sema_clr,
+	.ntfy_reset = base507c_ntfy_reset,
+	.ntfy_set = base507c_ntfy_set,
+	.ntfy_clr = base507c_ntfy_clr,
+	.ntfy_wait_begun = base507c_ntfy_wait_begun,
+	.olut_core = 1,
+	.xlut_set = base507c_xlut_set,
+	.xlut_clr = base507c_xlut_clr,
+	.image_set = base507c_image_set,
+	.image_clr = base507c_image_clr,
+	.update = base507c_update,
+};
+
+int
+base507c_new_(const struct nv50_wndw_func *func, const u32 *format,
+	      struct nouveau_drm *drm, int head, s32 oclass, u32 interlock_data,
+	      struct nv50_wndw **pwndw)
+{
+	struct nv50_disp_base_channel_dma_v0 args = {
+		.head = head,
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	struct nv50_wndw *wndw;
+	int ret;
+
+	ret = nv50_wndw_new_(func, drm->dev, DRM_PLANE_TYPE_PRIMARY,
+			     "base", head, format, BIT(head),
+			     NV50_DISP_INTERLOCK_BASE, interlock_data, &wndw);
+	if (*pwndw = wndw, ret)
+		return ret;
+
+	ret = nv50_dmac_create(&drm->client.device, &disp->disp->object,
+			       &oclass, head, &args, sizeof(args),
+			       disp->sync->bo.offset, &wndw->wndw);
+	if (ret) {
+		NV_ERROR(drm, "base%04x allocation failed: %d\n", oclass, ret);
+		return ret;
+	}
+
+	ret = nvif_notify_init(&wndw->wndw.base.user, wndw->notify.func,
+			       false, NV50_DISP_BASE_CHANNEL_DMA_V0_NTFY_UEVENT,
+			       &(struct nvif_notify_uevent_req) {},
+			       sizeof(struct nvif_notify_uevent_req),
+			       sizeof(struct nvif_notify_uevent_rep),
+			       &wndw->notify);
+	if (ret)
+		return ret;
+
+	wndw->ntfy = NV50_DISP_BASE_NTFY(wndw->id);
+	wndw->sema = NV50_DISP_BASE_SEM0(wndw->id);
+	wndw->data = 0x00000000;
+	return 0;
+}
+
+int
+base507c_new(struct nouveau_drm *drm, int head, s32 oclass,
+	     struct nv50_wndw **pwndw)
+{
+	return base507c_new_(&base507c, base507c_format, drm, head, oclass,
+			     0x00000002 << (head * 8), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/base827c.c b/drivers/gpu/drm/nouveau/dispnv50/base827c.c
new file mode 100644
index 0000000..7364681
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/base827c.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "base.h"
+
+static void
+base827c_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 10))) {
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, asyw->image.mode << 8 |
+			       asyw->image.interval << 4);
+		evo_mthd(push, 0x00c0, 1);
+		evo_data(push, asyw->image.handle[0]);
+		evo_mthd(push, 0x0800, 5);
+		evo_data(push, asyw->image.offset[0] >> 8);
+		evo_data(push, 0x00000000);
+		evo_data(push, asyw->image.h << 16 | asyw->image.w);
+		evo_data(push, asyw->image.layout << 20 |
+			       (asyw->image.pitch[0] >> 8) << 8 |
+			       asyw->image.blocks[0] << 8 |
+			       asyw->image.blockh);
+		evo_data(push, asyw->image.format << 8);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static const struct nv50_wndw_func
+base827c = {
+	.acquire = base507c_acquire,
+	.release = base507c_release,
+	.sema_set = base507c_sema_set,
+	.sema_clr = base507c_sema_clr,
+	.ntfy_reset = base507c_ntfy_reset,
+	.ntfy_set = base507c_ntfy_set,
+	.ntfy_clr = base507c_ntfy_clr,
+	.ntfy_wait_begun = base507c_ntfy_wait_begun,
+	.olut_core = 1,
+	.xlut_set = base507c_xlut_set,
+	.xlut_clr = base507c_xlut_clr,
+	.image_set = base827c_image_set,
+	.image_clr = base507c_image_clr,
+	.update = base507c_update,
+};
+
+int
+base827c_new(struct nouveau_drm *drm, int head, s32 oclass,
+	     struct nv50_wndw **pwndw)
+{
+	return base507c_new_(&base827c, base507c_format, drm, head, oclass,
+			     0x00000002 << (head * 8), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/base907c.c b/drivers/gpu/drm/nouveau/dispnv50/base907c.c
new file mode 100644
index 0000000..a562fc9
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/base907c.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "base.h"
+
+static void
+base907c_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 10))) {
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, asyw->image.mode << 8 |
+			       asyw->image.interval << 4);
+		evo_mthd(push, 0x00c0, 1);
+		evo_data(push, asyw->image.handle[0]);
+		evo_mthd(push, 0x0400, 5);
+		evo_data(push, asyw->image.offset[0] >> 8);
+		evo_data(push, 0x00000000);
+		evo_data(push, asyw->image.h << 16 | asyw->image.w);
+		evo_data(push, asyw->image.layout << 24 |
+			       (asyw->image.pitch[0] >> 8) << 8 |
+			       asyw->image.blocks[0] << 8 |
+			       asyw->image.blockh);
+		evo_data(push, asyw->image.format << 8);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+base907c_xlut_clr(struct nv50_wndw *wndw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 6))) {
+		evo_mthd(push, 0x00e0, 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x00e8, 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x00fc, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+base907c_xlut_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 6))) {
+		evo_mthd(push, 0x00e0, 3);
+		evo_data(push, asyw->xlut.i.enable << 30 |
+			       asyw->xlut.i.mode << 24);
+		evo_data(push, asyw->xlut.i.offset >> 8);
+		evo_data(push, 0x40000000);
+		evo_mthd(push, 0x00fc, 1);
+		evo_data(push, asyw->xlut.handle);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+base907c_ilut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	asyw->xlut.i.mode = 7;
+	asyw->xlut.i.enable = 2;
+}
+
+const struct nv50_wndw_func
+base907c = {
+	.acquire = base507c_acquire,
+	.release = base507c_release,
+	.sema_set = base507c_sema_set,
+	.sema_clr = base507c_sema_clr,
+	.ntfy_reset = base507c_ntfy_reset,
+	.ntfy_set = base507c_ntfy_set,
+	.ntfy_clr = base507c_ntfy_clr,
+	.ntfy_wait_begun = base507c_ntfy_wait_begun,
+	.ilut = base907c_ilut,
+	.olut_core = true,
+	.xlut_set = base907c_xlut_set,
+	.xlut_clr = base907c_xlut_clr,
+	.image_set = base907c_image_set,
+	.image_clr = base507c_image_clr,
+	.update = base507c_update,
+};
+
+int
+base907c_new(struct nouveau_drm *drm, int head, s32 oclass,
+	     struct nv50_wndw **pwndw)
+{
+	return base507c_new_(&base907c, base507c_format, drm, head, oclass,
+			     0x00000002 << (head * 4), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/base917c.c b/drivers/gpu/drm/nouveau/dispnv50/base917c.c
new file mode 100644
index 0000000..54d705b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/base917c.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "base.h"
+#include "atom.h"
+
+const u32
+base917c_format[] = {
+	DRM_FORMAT_C8,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_ABGR2101010,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_XRGB2101010,
+	DRM_FORMAT_ARGB2101010,
+	0
+};
+
+int
+base917c_new(struct nouveau_drm *drm, int head, s32 oclass,
+	     struct nv50_wndw **pwndw)
+{
+	return base507c_new_(&base907c, base917c_format, drm, head, oclass,
+			     0x00000002 << (head * 4), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.c b/drivers/gpu/drm/nouveau/dispnv50/core.c
new file mode 100644
index 0000000..f3c49ad
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/core.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+
+#include <nvif/class.h>
+
+void
+nv50_core_del(struct nv50_core **pcore)
+{
+	struct nv50_core *core = *pcore;
+	if (core) {
+		nv50_dmac_destroy(&core->chan);
+		kfree(*pcore);
+		*pcore = NULL;
+	}
+}
+
+int
+nv50_core_new(struct nouveau_drm *drm, struct nv50_core **pcore)
+{
+	struct {
+		s32 oclass;
+		int version;
+		int (*new)(struct nouveau_drm *, s32, struct nv50_core **);
+	} cores[] = {
+		{ GV100_DISP_CORE_CHANNEL_DMA, 0, corec37d_new },
+		{ GP102_DISP_CORE_CHANNEL_DMA, 0, core917d_new },
+		{ GP100_DISP_CORE_CHANNEL_DMA, 0, core917d_new },
+		{ GM200_DISP_CORE_CHANNEL_DMA, 0, core917d_new },
+		{ GM107_DISP_CORE_CHANNEL_DMA, 0, core917d_new },
+		{ GK110_DISP_CORE_CHANNEL_DMA, 0, core917d_new },
+		{ GK104_DISP_CORE_CHANNEL_DMA, 0, core917d_new },
+		{ GF110_DISP_CORE_CHANNEL_DMA, 0, core907d_new },
+		{ GT214_DISP_CORE_CHANNEL_DMA, 0, core827d_new },
+		{ GT206_DISP_CORE_CHANNEL_DMA, 0, core827d_new },
+		{ GT200_DISP_CORE_CHANNEL_DMA, 0, core827d_new },
+		{   G82_DISP_CORE_CHANNEL_DMA, 0, core827d_new },
+		{  NV50_DISP_CORE_CHANNEL_DMA, 0, core507d_new },
+		{}
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	int cid;
+
+	cid = nvif_mclass(&disp->disp->object, cores);
+	if (cid < 0) {
+		NV_ERROR(drm, "No supported core channel class\n");
+		return cid;
+	}
+
+	return cores[cid].new(drm, cores[cid].oclass, pcore);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.h b/drivers/gpu/drm/nouveau/dispnv50/core.h
new file mode 100644
index 0000000..8470df9
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/core.h
@@ -0,0 +1,50 @@
+#ifndef __NV50_KMS_CORE_H__
+#define __NV50_KMS_CORE_H__
+#include "disp.h"
+#include "atom.h"
+
+struct nv50_core {
+	const struct nv50_core_func *func;
+	struct nv50_dmac chan;
+};
+
+int nv50_core_new(struct nouveau_drm *, struct nv50_core **);
+void nv50_core_del(struct nv50_core **);
+
+struct nv50_core_func {
+	void (*init)(struct nv50_core *);
+	void (*ntfy_init)(struct nouveau_bo *, u32 offset);
+	int (*ntfy_wait_done)(struct nouveau_bo *, u32 offset,
+			      struct nvif_device *);
+	void (*update)(struct nv50_core *, u32 *interlock, bool ntfy);
+
+	const struct nv50_head_func *head;
+	const struct nv50_outp_func {
+		void (*ctrl)(struct nv50_core *, int or, u32 ctrl,
+			     struct nv50_head_atom *);
+	} *dac, *pior, *sor;
+};
+
+int core507d_new(struct nouveau_drm *, s32, struct nv50_core **);
+int core507d_new_(const struct nv50_core_func *, struct nouveau_drm *, s32,
+		  struct nv50_core **);
+void core507d_init(struct nv50_core *);
+void core507d_ntfy_init(struct nouveau_bo *, u32);
+int core507d_ntfy_wait_done(struct nouveau_bo *, u32, struct nvif_device *);
+void core507d_update(struct nv50_core *, u32 *, bool);
+
+extern const struct nv50_outp_func dac507d;
+extern const struct nv50_outp_func sor507d;
+extern const struct nv50_outp_func pior507d;
+
+int core827d_new(struct nouveau_drm *, s32, struct nv50_core **);
+
+int core907d_new(struct nouveau_drm *, s32, struct nv50_core **);
+extern const struct nv50_outp_func dac907d;
+extern const struct nv50_outp_func sor907d;
+
+int core917d_new(struct nouveau_drm *, s32, struct nv50_core **);
+
+int corec37d_new(struct nouveau_drm *, s32, struct nv50_core **);
+extern const struct nv50_outp_func sorc37d;
+#endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core507d.c b/drivers/gpu/drm/nouveau/dispnv50/core507d.c
new file mode 100644
index 0000000..e7fcfa6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/core507d.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+#include "head.h"
+
+#include <nvif/cl507d.h>
+
+#include "nouveau_bo.h"
+
+void
+core507d_update(struct nv50_core *core, u32 *interlock, bool ntfy)
+{
+	u32 *push;
+	if ((push = evo_wait(&core->chan, 5))) {
+		if (ntfy) {
+			evo_mthd(push, 0x0084, 1);
+			evo_data(push, 0x80000000 | NV50_DISP_CORE_NTFY);
+		}
+		evo_mthd(push, 0x0080, 2);
+		evo_data(push, interlock[NV50_DISP_INTERLOCK_BASE] |
+			       interlock[NV50_DISP_INTERLOCK_OVLY]);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &core->chan);
+	}
+}
+
+int
+core507d_ntfy_wait_done(struct nouveau_bo *bo, u32 offset,
+			struct nvif_device *device)
+{
+	s64 time = nvif_msec(device, 2000ULL,
+		if (nouveau_bo_rd32(bo, offset / 4))
+			break;
+		usleep_range(1, 2);
+	);
+	return time < 0 ? time : 0;
+}
+
+void
+core507d_ntfy_init(struct nouveau_bo *bo, u32 offset)
+{
+	nouveau_bo_wr32(bo, offset / 4, 0x00000000);
+}
+
+void
+core507d_init(struct nv50_core *core)
+{
+	u32 *push;
+	if ((push = evo_wait(&core->chan, 2))) {
+		evo_mthd(push, 0x0088, 1);
+		evo_data(push, core->chan.sync.handle);
+		evo_kick(push, &core->chan);
+	}
+}
+
+static const struct nv50_core_func
+core507d = {
+	.init = core507d_init,
+	.ntfy_init = core507d_ntfy_init,
+	.ntfy_wait_done = core507d_ntfy_wait_done,
+	.update = core507d_update,
+	.head = &head507d,
+	.dac = &dac507d,
+	.sor = &sor507d,
+	.pior = &pior507d,
+};
+
+int
+core507d_new_(const struct nv50_core_func *func, struct nouveau_drm *drm,
+	      s32 oclass, struct nv50_core **pcore)
+{
+	struct nv50_disp_core_channel_dma_v0 args = {};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	struct nv50_core *core;
+	int ret;
+
+	if (!(core = *pcore = kzalloc(sizeof(*core), GFP_KERNEL)))
+		return -ENOMEM;
+	core->func = func;
+
+	ret = nv50_dmac_create(&drm->client.device, &disp->disp->object,
+			       &oclass, 0, &args, sizeof(args),
+			       disp->sync->bo.offset, &core->chan);
+	if (ret) {
+		NV_ERROR(drm, "core%04x allocation failed: %d\n", oclass, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int
+core507d_new(struct nouveau_drm *drm, s32 oclass, struct nv50_core **pcore)
+{
+	return core507d_new_(&core507d, drm, oclass, pcore);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core827d.c b/drivers/gpu/drm/nouveau/dispnv50/core827d.c
new file mode 100644
index 0000000..6123a06
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/core827d.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+#include "head.h"
+
+static const struct nv50_core_func
+core827d = {
+	.init = core507d_init,
+	.ntfy_init = core507d_ntfy_init,
+	.ntfy_wait_done = core507d_ntfy_wait_done,
+	.update = core507d_update,
+	.head = &head827d,
+	.dac = &dac507d,
+	.sor = &sor507d,
+	.pior = &pior507d,
+};
+
+int
+core827d_new(struct nouveau_drm *drm, s32 oclass, struct nv50_core **pcore)
+{
+	return core507d_new_(&core827d, drm, oclass, pcore);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core907d.c b/drivers/gpu/drm/nouveau/dispnv50/core907d.c
new file mode 100644
index 0000000..ef822f8
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/core907d.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+#include "head.h"
+
+static const struct nv50_core_func
+core907d = {
+	.init = core507d_init,
+	.ntfy_init = core507d_ntfy_init,
+	.ntfy_wait_done = core507d_ntfy_wait_done,
+	.update = core507d_update,
+	.head = &head907d,
+	.dac = &dac907d,
+	.sor = &sor907d,
+};
+
+int
+core907d_new(struct nouveau_drm *drm, s32 oclass, struct nv50_core **pcore)
+{
+	return core507d_new_(&core907d, drm, oclass, pcore);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core917d.c b/drivers/gpu/drm/nouveau/dispnv50/core917d.c
new file mode 100644
index 0000000..392338d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/core917d.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+#include "head.h"
+
+static const struct nv50_core_func
+core917d = {
+	.init = core507d_init,
+	.ntfy_init = core507d_ntfy_init,
+	.ntfy_wait_done = core507d_ntfy_wait_done,
+	.update = core507d_update,
+	.head = &head917d,
+	.dac = &dac907d,
+	.sor = &sor907d,
+};
+
+int
+core917d_new(struct nouveau_drm *drm, s32 oclass, struct nv50_core **pcore)
+{
+	return core507d_new_(&core917d, drm, oclass, pcore);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/corec37d.c b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c
new file mode 100644
index 0000000..b5c17c9
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+#include "head.h"
+
+#include <nouveau_bo.h>
+
+static void
+corec37d_update(struct nv50_core *core, u32 *interlock, bool ntfy)
+{
+	u32 *push;
+	if ((push = evo_wait(&core->chan, 9))) {
+		if (ntfy) {
+			evo_mthd(push, 0x020c, 1);
+			evo_data(push, 0x00001000 | NV50_DISP_CORE_NTFY);
+		}
+
+		evo_mthd(push, 0x0218, 2);
+		evo_data(push, interlock[NV50_DISP_INTERLOCK_CURS]);
+		evo_data(push, interlock[NV50_DISP_INTERLOCK_WNDW]);
+		evo_mthd(push, 0x0200, 1);
+		evo_data(push, 0x00000001);
+
+		if (ntfy) {
+			evo_mthd(push, 0x020c, 1);
+			evo_data(push, 0x00000000);
+		}
+		evo_kick(push, &core->chan);
+	}
+}
+
+int
+corec37d_ntfy_wait_done(struct nouveau_bo *bo, u32 offset,
+			struct nvif_device *device)
+{
+	u32 data;
+	s64 time = nvif_msec(device, 2000ULL,
+		data = nouveau_bo_rd32(bo, offset / 4 + 0);
+		if ((data & 0xc0000000) == 0x80000000)
+			break;
+		usleep_range(1, 2);
+	);
+	return time < 0 ? time : 0;
+}
+
+void
+corec37d_ntfy_init(struct nouveau_bo *bo, u32 offset)
+{
+	nouveau_bo_wr32(bo, offset / 4 + 0, 0x00000000);
+	nouveau_bo_wr32(bo, offset / 4 + 1, 0x00000000);
+	nouveau_bo_wr32(bo, offset / 4 + 2, 0x00000000);
+	nouveau_bo_wr32(bo, offset / 4 + 3, 0x00000000);
+}
+
+void
+corec37d_init(struct nv50_core *core)
+{
+	const u32 windows = 8; /*XXX*/
+	u32 *push, i;
+	if ((push = evo_wait(&core->chan, 2 + 6 * windows + 2))) {
+		evo_mthd(push, 0x0208, 1);
+		evo_data(push, core->chan.sync.handle);
+		for (i = 0; i < windows; i++) {
+			evo_mthd(push, 0x1000 + (i * 0x080), 3);
+			evo_data(push, i >> 1);
+			evo_data(push, 0x00000017);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x1010 + (i * 0x080), 1);
+			evo_data(push, 0x00127fff);
+		}
+		evo_mthd(push, 0x0200, 1);
+		evo_data(push, 0x00000001);
+		evo_kick(push, &core->chan);
+	}
+}
+
+static const struct nv50_core_func
+corec37d = {
+	.init = corec37d_init,
+	.ntfy_init = corec37d_ntfy_init,
+	.ntfy_wait_done = corec37d_ntfy_wait_done,
+	.update = corec37d_update,
+	.head = &headc37d,
+	.sor = &sorc37d,
+};
+
+int
+corec37d_new(struct nouveau_drm *drm, s32 oclass, struct nv50_core **pcore)
+{
+	return core507d_new_(&corec37d, drm, oclass, pcore);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs.c b/drivers/gpu/drm/nouveau/dispnv50/curs.c
new file mode 100644
index 0000000..f592087
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/curs.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "curs.h"
+
+#include <nvif/class.h>
+
+int
+nv50_curs_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw)
+{
+	struct {
+		s32 oclass;
+		int version;
+		int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+	} curses[] = {
+		{ GV100_DISP_CURSOR, 0, cursc37a_new },
+		{ GK104_DISP_CURSOR, 0, curs907a_new },
+		{ GF110_DISP_CURSOR, 0, curs907a_new },
+		{ GT214_DISP_CURSOR, 0, curs507a_new },
+		{   G82_DISP_CURSOR, 0, curs507a_new },
+		{  NV50_DISP_CURSOR, 0, curs507a_new },
+		{}
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	int cid;
+
+	cid = nvif_mclass(&disp->disp->object, curses);
+	if (cid < 0) {
+		NV_ERROR(drm, "No supported cursor immediate class\n");
+		return cid;
+	}
+
+	return curses[cid].new(drm, head, curses[cid].oclass, pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs.h b/drivers/gpu/drm/nouveau/dispnv50/curs.h
new file mode 100644
index 0000000..23aff5f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/curs.h
@@ -0,0 +1,14 @@
+#ifndef __NV50_KMS_CURS_H__
+#define __NV50_KMS_CURS_H__
+#include "wndw.h"
+
+int curs507a_new(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+int curs507a_new_(const struct nv50_wimm_func *, struct nouveau_drm *,
+		  int head, s32 oclass, u32 interlock_data,
+		  struct nv50_wndw **);
+
+int curs907a_new(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+int cursc37a_new(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+
+int nv50_curs_new(struct nouveau_drm *, int head, struct nv50_wndw **);
+#endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs507a.c b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c
new file mode 100644
index 0000000..291c081
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "curs.h"
+#include "core.h"
+#include "head.h"
+
+#include <nvif/cl507a.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_plane_helper.h>
+
+static void
+curs507a_update(struct nv50_wndw *wndw, u32 *interlock)
+{
+	nvif_wr32(&wndw->wimm.base.user, 0x0080, 0x00000000);
+}
+
+static void
+curs507a_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	nvif_wr32(&wndw->wimm.base.user, 0x0084, asyw->point.y << 16 |
+						 asyw->point.x);
+}
+
+const struct nv50_wimm_func
+curs507a = {
+	.point = curs507a_point,
+	.update = curs507a_update,
+};
+
+static void
+curs507a_prepare(struct nv50_wndw *wndw, struct nv50_head_atom *asyh,
+		 struct nv50_wndw_atom *asyw)
+{
+	u32 handle = nv50_disp(wndw->plane.dev)->core->chan.vram.handle;
+	u32 offset = asyw->image.offset[0];
+	if (asyh->curs.handle != handle || asyh->curs.offset != offset) {
+		asyh->curs.handle = handle;
+		asyh->curs.offset = offset;
+		asyh->set.curs = asyh->curs.visible;
+	}
+}
+
+static void
+curs507a_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
+		 struct nv50_head_atom *asyh)
+{
+	asyh->curs.visible = false;
+}
+
+static int
+curs507a_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
+		 struct nv50_head_atom *asyh)
+{
+	struct nv50_head *head = nv50_head(asyw->state.crtc);
+	int ret;
+
+	ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  true, true);
+	asyh->curs.visible = asyw->state.visible;
+	if (ret || !asyh->curs.visible)
+		return ret;
+
+	if (asyw->image.w != asyw->image.h)
+		return -EINVAL;
+
+	ret = head->func->curs_layout(head, asyw, asyh);
+	if (ret)
+		return ret;
+
+	return head->func->curs_format(head, asyw, asyh);
+}
+
+static const u32
+curs507a_format[] = {
+	DRM_FORMAT_ARGB8888,
+	0
+};
+
+static const struct nv50_wndw_func
+curs507a_wndw = {
+	.acquire = curs507a_acquire,
+	.release = curs507a_release,
+	.prepare = curs507a_prepare,
+};
+
+int
+curs507a_new_(const struct nv50_wimm_func *func, struct nouveau_drm *drm,
+	      int head, s32 oclass, u32 interlock_data,
+	      struct nv50_wndw **pwndw)
+{
+	struct nv50_disp_cursor_v0 args = {
+		.head = head,
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	struct nv50_wndw *wndw;
+	int ret;
+
+	ret = nv50_wndw_new_(&curs507a_wndw, drm->dev, DRM_PLANE_TYPE_CURSOR,
+			     "curs", head, curs507a_format, BIT(head),
+			     NV50_DISP_INTERLOCK_CURS, interlock_data, &wndw);
+	if (*pwndw = wndw, ret)
+		return ret;
+
+	ret = nvif_object_init(&disp->disp->object, 0, oclass, &args,
+			       sizeof(args), &wndw->wimm.base.user);
+	if (ret) {
+		NV_ERROR(drm, "curs%04x allocation failed: %d\n", oclass, ret);
+		return ret;
+	}
+
+	nvif_object_map(&wndw->wimm.base.user, NULL, 0);
+	wndw->immd = func;
+	wndw->ctxdma.parent = &disp->core->chan.base.user;
+	return 0;
+}
+
+int
+curs507a_new(struct nouveau_drm *drm, int head, s32 oclass,
+	     struct nv50_wndw **pwndw)
+{
+	return curs507a_new_(&curs507a, drm, head, oclass,
+			     0x00000001 << (head * 8), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs907a.c b/drivers/gpu/drm/nouveau/dispnv50/curs907a.c
new file mode 100644
index 0000000..d742362
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/curs907a.c
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "curs.h"
+
+int
+curs907a_new(struct nouveau_drm *drm, int head, s32 oclass,
+	     struct nv50_wndw **pwndw)
+{
+	return curs507a_new_(&curs507a, drm, head, oclass,
+			     0x00000001 << (head * 4), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/cursc37a.c b/drivers/gpu/drm/nouveau/dispnv50/cursc37a.c
new file mode 100644
index 0000000..23fb29d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/cursc37a.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "curs.h"
+#include "atom.h"
+
+static void
+cursc37a_update(struct nv50_wndw *wndw, u32 *interlock)
+{
+	nvif_wr32(&wndw->wimm.base.user, 0x0200, 0x00000001);
+}
+
+static void
+cursc37a_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	nvif_wr32(&wndw->wimm.base.user, 0x0208, asyw->point.y << 16 |
+						 asyw->point.x);
+}
+
+static const struct nv50_wimm_func
+cursc37a = {
+	.point = cursc37a_point,
+	.update = cursc37a_update,
+};
+
+int
+cursc37a_new(struct nouveau_drm *drm, int head, s32 oclass,
+	     struct nv50_wndw **pwndw)
+{
+	return curs507a_new_(&cursc37a, drm, head, oclass,
+			     0x00000001 << head, pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/dac507d.c b/drivers/gpu/drm/nouveau/dispnv50/dac507d.c
new file mode 100644
index 0000000..2a10ef7
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/dac507d.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+
+static void
+dac507d_ctrl(struct nv50_core *core, int or, u32 ctrl,
+	     struct nv50_head_atom *asyh)
+{
+	u32 *push, sync = 0;
+	if ((push = evo_wait(&core->chan, 3))) {
+		if (asyh) {
+			sync |= asyh->or.nvsync << 1;
+			sync |= asyh->or.nhsync;
+		}
+		evo_mthd(push, 0x0400 + (or * 0x080), 2);
+		evo_data(push, ctrl);
+		evo_data(push, sync);
+		evo_kick(push, &core->chan);
+	}
+}
+
+const struct nv50_outp_func
+dac507d = {
+	.ctrl = dac507d_ctrl,
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/dac907d.c b/drivers/gpu/drm/nouveau/dispnv50/dac907d.c
new file mode 100644
index 0000000..11e87fa
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/dac907d.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+
+static void
+dac907d_ctrl(struct nv50_core *core, int or, u32 ctrl,
+	     struct nv50_head_atom *asyh)
+{
+	u32 *push;
+	if ((push = evo_wait(&core->chan, 2))) {
+		evo_mthd(push, 0x0180 + (or * 0x020), 1);
+		evo_data(push, ctrl);
+		evo_kick(push, &core->chan);
+	}
+}
+
+const struct nv50_outp_func
+dac907d = {
+	.ctrl = dac907d_ctrl,
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
new file mode 100644
index 0000000..b83465a
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -0,0 +1,2238 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "disp.h"
+#include "atom.h"
+#include "core.h"
+#include "head.h"
+#include "wndw.h"
+
+#include <linux/dma-mapping.h>
+#include <linux/hdmi.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_edid.h>
+
+#include <nvif/class.h>
+#include <nvif/cl0002.h>
+#include <nvif/cl5070.h>
+#include <nvif/cl507d.h>
+#include <nvif/event.h>
+
+#include "nouveau_drv.h"
+#include "nouveau_dma.h"
+#include "nouveau_gem.h"
+#include "nouveau_connector.h"
+#include "nouveau_encoder.h"
+#include "nouveau_fence.h"
+#include "nouveau_fbcon.h"
+
+#include <subdev/bios/dp.h>
+
+/******************************************************************************
+ * Atomic state
+ *****************************************************************************/
+
+struct nv50_outp_atom {
+	struct list_head head;
+
+	struct drm_encoder *encoder;
+	bool flush_disable;
+
+	union nv50_outp_atom_mask {
+		struct {
+			bool ctrl:1;
+		};
+		u8 mask;
+	} set, clr;
+};
+
+/******************************************************************************
+ * EVO channel
+ *****************************************************************************/
+
+static int
+nv50_chan_create(struct nvif_device *device, struct nvif_object *disp,
+		 const s32 *oclass, u8 head, void *data, u32 size,
+		 struct nv50_chan *chan)
+{
+	struct nvif_sclass *sclass;
+	int ret, i, n;
+
+	chan->device = device;
+
+	ret = n = nvif_object_sclass_get(disp, &sclass);
+	if (ret < 0)
+		return ret;
+
+	while (oclass[0]) {
+		for (i = 0; i < n; i++) {
+			if (sclass[i].oclass == oclass[0]) {
+				ret = nvif_object_init(disp, 0, oclass[0],
+						       data, size, &chan->user);
+				if (ret == 0)
+					nvif_object_map(&chan->user, NULL, 0);
+				nvif_object_sclass_put(&sclass);
+				return ret;
+			}
+		}
+		oclass++;
+	}
+
+	nvif_object_sclass_put(&sclass);
+	return -ENOSYS;
+}
+
+static void
+nv50_chan_destroy(struct nv50_chan *chan)
+{
+	nvif_object_fini(&chan->user);
+}
+
+/******************************************************************************
+ * DMA EVO channel
+ *****************************************************************************/
+
+void
+nv50_dmac_destroy(struct nv50_dmac *dmac)
+{
+	nvif_object_fini(&dmac->vram);
+	nvif_object_fini(&dmac->sync);
+
+	nv50_chan_destroy(&dmac->base);
+
+	nvif_mem_fini(&dmac->push);
+}
+
+int
+nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp,
+		 const s32 *oclass, u8 head, void *data, u32 size, u64 syncbuf,
+		 struct nv50_dmac *dmac)
+{
+	struct nouveau_cli *cli = (void *)device->object.client;
+	struct nv50_disp_core_channel_dma_v0 *args = data;
+	int ret;
+
+	mutex_init(&dmac->lock);
+
+	ret = nvif_mem_init_map(&cli->mmu, NVIF_MEM_COHERENT, 0x1000,
+				&dmac->push);
+	if (ret)
+		return ret;
+
+	dmac->ptr = dmac->push.object.map.ptr;
+
+	args->pushbuf = nvif_handle(&dmac->push.object);
+
+	ret = nv50_chan_create(device, disp, oclass, head, data, size,
+			       &dmac->base);
+	if (ret)
+		return ret;
+
+	if (!syncbuf)
+		return 0;
+
+	ret = nvif_object_init(&dmac->base.user, 0xf0000000, NV_DMA_IN_MEMORY,
+			       &(struct nv_dma_v0) {
+					.target = NV_DMA_V0_TARGET_VRAM,
+					.access = NV_DMA_V0_ACCESS_RDWR,
+					.start = syncbuf + 0x0000,
+					.limit = syncbuf + 0x0fff,
+			       }, sizeof(struct nv_dma_v0),
+			       &dmac->sync);
+	if (ret)
+		return ret;
+
+	ret = nvif_object_init(&dmac->base.user, 0xf0000001, NV_DMA_IN_MEMORY,
+			       &(struct nv_dma_v0) {
+					.target = NV_DMA_V0_TARGET_VRAM,
+					.access = NV_DMA_V0_ACCESS_RDWR,
+					.start = 0,
+					.limit = device->info.ram_user - 1,
+			       }, sizeof(struct nv_dma_v0),
+			       &dmac->vram);
+	if (ret)
+		return ret;
+
+	return ret;
+}
+
+/******************************************************************************
+ * EVO channel helpers
+ *****************************************************************************/
+u32 *
+evo_wait(struct nv50_dmac *evoc, int nr)
+{
+	struct nv50_dmac *dmac = evoc;
+	struct nvif_device *device = dmac->base.device;
+	u32 put = nvif_rd32(&dmac->base.user, 0x0000) / 4;
+
+	mutex_lock(&dmac->lock);
+	if (put + nr >= (PAGE_SIZE / 4) - 8) {
+		dmac->ptr[put] = 0x20000000;
+
+		nvif_wr32(&dmac->base.user, 0x0000, 0x00000000);
+		if (nvif_msec(device, 2000,
+			if (!nvif_rd32(&dmac->base.user, 0x0004))
+				break;
+		) < 0) {
+			mutex_unlock(&dmac->lock);
+			pr_err("nouveau: evo channel stalled\n");
+			return NULL;
+		}
+
+		put = 0;
+	}
+
+	return dmac->ptr + put;
+}
+
+void
+evo_kick(u32 *push, struct nv50_dmac *evoc)
+{
+	struct nv50_dmac *dmac = evoc;
+	nvif_wr32(&dmac->base.user, 0x0000, (push - dmac->ptr) << 2);
+	mutex_unlock(&dmac->lock);
+}
+
+/******************************************************************************
+ * Output path helpers
+ *****************************************************************************/
+static void
+nv50_outp_release(struct nouveau_encoder *nv_encoder)
+{
+	struct nv50_disp *disp = nv50_disp(nv_encoder->base.base.dev);
+	struct {
+		struct nv50_disp_mthd_v1 base;
+	} args = {
+		.base.version = 1,
+		.base.method = NV50_DISP_MTHD_V1_RELEASE,
+		.base.hasht  = nv_encoder->dcb->hasht,
+		.base.hashm  = nv_encoder->dcb->hashm,
+	};
+
+	nvif_mthd(&disp->disp->object, 0, &args, sizeof(args));
+	nv_encoder->or = -1;
+	nv_encoder->link = 0;
+}
+
+static int
+nv50_outp_acquire(struct nouveau_encoder *nv_encoder)
+{
+	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	struct {
+		struct nv50_disp_mthd_v1 base;
+		struct nv50_disp_acquire_v0 info;
+	} args = {
+		.base.version = 1,
+		.base.method = NV50_DISP_MTHD_V1_ACQUIRE,
+		.base.hasht  = nv_encoder->dcb->hasht,
+		.base.hashm  = nv_encoder->dcb->hashm,
+	};
+	int ret;
+
+	ret = nvif_mthd(&disp->disp->object, 0, &args, sizeof(args));
+	if (ret) {
+		NV_ERROR(drm, "error acquiring output path: %d\n", ret);
+		return ret;
+	}
+
+	nv_encoder->or = args.info.or;
+	nv_encoder->link = args.info.link;
+	return 0;
+}
+
+static int
+nv50_outp_atomic_check_view(struct drm_encoder *encoder,
+			    struct drm_crtc_state *crtc_state,
+			    struct drm_connector_state *conn_state,
+			    struct drm_display_mode *native_mode)
+{
+	struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode;
+	struct drm_display_mode *mode = &crtc_state->mode;
+	struct drm_connector *connector = conn_state->connector;
+	struct nouveau_conn_atom *asyc = nouveau_conn_atom(conn_state);
+	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
+
+	NV_ATOMIC(drm, "%s atomic_check\n", encoder->name);
+	asyc->scaler.full = false;
+	if (!native_mode)
+		return 0;
+
+	if (asyc->scaler.mode == DRM_MODE_SCALE_NONE) {
+		switch (connector->connector_type) {
+		case DRM_MODE_CONNECTOR_LVDS:
+		case DRM_MODE_CONNECTOR_eDP:
+			/* Force use of scaler for non-EDID modes. */
+			if (adjusted_mode->type & DRM_MODE_TYPE_DRIVER)
+				break;
+			mode = native_mode;
+			asyc->scaler.full = true;
+			break;
+		default:
+			break;
+		}
+	} else {
+		mode = native_mode;
+	}
+
+	if (!drm_mode_equal(adjusted_mode, mode)) {
+		drm_mode_copy(adjusted_mode, mode);
+		crtc_state->mode_changed = true;
+	}
+
+	return 0;
+}
+
+static int
+nv50_outp_atomic_check(struct drm_encoder *encoder,
+		       struct drm_crtc_state *crtc_state,
+		       struct drm_connector_state *conn_state)
+{
+	struct nouveau_connector *nv_connector =
+		nouveau_connector(conn_state->connector);
+	return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
+					   nv_connector->native_mode);
+}
+
+/******************************************************************************
+ * DAC
+ *****************************************************************************/
+static void
+nv50_dac_disable(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nv50_core *core = nv50_disp(encoder->dev)->core;
+	if (nv_encoder->crtc)
+		core->func->dac->ctrl(core, nv_encoder->or, 0x00000000, NULL);
+	nv_encoder->crtc = NULL;
+	nv50_outp_release(nv_encoder);
+}
+
+static void
+nv50_dac_enable(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct nv50_head_atom *asyh = nv50_head_atom(nv_crtc->base.state);
+	struct nv50_core *core = nv50_disp(encoder->dev)->core;
+
+	nv50_outp_acquire(nv_encoder);
+
+	core->func->dac->ctrl(core, nv_encoder->or, 1 << nv_crtc->index, asyh);
+	asyh->or.depth = 0;
+
+	nv_encoder->crtc = encoder->crtc;
+}
+
+static enum drm_connector_status
+nv50_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	struct {
+		struct nv50_disp_mthd_v1 base;
+		struct nv50_disp_dac_load_v0 load;
+	} args = {
+		.base.version = 1,
+		.base.method = NV50_DISP_MTHD_V1_DAC_LOAD,
+		.base.hasht  = nv_encoder->dcb->hasht,
+		.base.hashm  = nv_encoder->dcb->hashm,
+	};
+	int ret;
+
+	args.load.data = nouveau_drm(encoder->dev)->vbios.dactestval;
+	if (args.load.data == 0)
+		args.load.data = 340;
+
+	ret = nvif_mthd(&disp->disp->object, 0, &args, sizeof(args));
+	if (ret || !args.load.load)
+		return connector_status_disconnected;
+
+	return connector_status_connected;
+}
+
+static const struct drm_encoder_helper_funcs
+nv50_dac_help = {
+	.atomic_check = nv50_outp_atomic_check,
+	.enable = nv50_dac_enable,
+	.disable = nv50_dac_disable,
+	.detect = nv50_dac_detect
+};
+
+static void
+nv50_dac_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+	kfree(encoder);
+}
+
+static const struct drm_encoder_funcs
+nv50_dac_func = {
+	.destroy = nv50_dac_destroy,
+};
+
+static int
+nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
+{
+	struct nouveau_drm *drm = nouveau_drm(connector->dev);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
+	struct nvkm_i2c_bus *bus;
+	struct nouveau_encoder *nv_encoder;
+	struct drm_encoder *encoder;
+	int type = DRM_MODE_ENCODER_DAC;
+
+	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
+	if (!nv_encoder)
+		return -ENOMEM;
+	nv_encoder->dcb = dcbe;
+
+	bus = nvkm_i2c_bus_find(i2c, dcbe->i2c_index);
+	if (bus)
+		nv_encoder->i2c = &bus->i2c;
+
+	encoder = to_drm_encoder(nv_encoder);
+	encoder->possible_crtcs = dcbe->heads;
+	encoder->possible_clones = 0;
+	drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type,
+			 "dac-%04x-%04x", dcbe->hasht, dcbe->hashm);
+	drm_encoder_helper_add(encoder, &nv50_dac_help);
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+	return 0;
+}
+
+/******************************************************************************
+ * Audio
+ *****************************************************************************/
+static void
+nv50_audio_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	struct {
+		struct nv50_disp_mthd_v1 base;
+		struct nv50_disp_sor_hda_eld_v0 eld;
+	} args = {
+		.base.version = 1,
+		.base.method  = NV50_DISP_MTHD_V1_SOR_HDA_ELD,
+		.base.hasht   = nv_encoder->dcb->hasht,
+		.base.hashm   = (0xf0ff & nv_encoder->dcb->hashm) |
+				(0x0100 << nv_crtc->index),
+	};
+
+	nvif_mthd(&disp->disp->object, 0, &args, sizeof(args));
+}
+
+static void
+nv50_audio_enable(struct drm_encoder *encoder, struct drm_display_mode *mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct nouveau_connector *nv_connector;
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	struct __packed {
+		struct {
+			struct nv50_disp_mthd_v1 mthd;
+			struct nv50_disp_sor_hda_eld_v0 eld;
+		} base;
+		u8 data[sizeof(nv_connector->base.eld)];
+	} args = {
+		.base.mthd.version = 1,
+		.base.mthd.method  = NV50_DISP_MTHD_V1_SOR_HDA_ELD,
+		.base.mthd.hasht   = nv_encoder->dcb->hasht,
+		.base.mthd.hashm   = (0xf0ff & nv_encoder->dcb->hashm) |
+				     (0x0100 << nv_crtc->index),
+	};
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	if (!drm_detect_monitor_audio(nv_connector->edid))
+		return;
+
+	memcpy(args.data, nv_connector->base.eld, sizeof(args.data));
+
+	nvif_mthd(&disp->disp->object, 0, &args,
+		  sizeof(args.base) + drm_eld_size(args.data));
+}
+
+/******************************************************************************
+ * HDMI
+ *****************************************************************************/
+static void
+nv50_hdmi_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	struct {
+		struct nv50_disp_mthd_v1 base;
+		struct nv50_disp_sor_hdmi_pwr_v0 pwr;
+	} args = {
+		.base.version = 1,
+		.base.method = NV50_DISP_MTHD_V1_SOR_HDMI_PWR,
+		.base.hasht  = nv_encoder->dcb->hasht,
+		.base.hashm  = (0xf0ff & nv_encoder->dcb->hashm) |
+			       (0x0100 << nv_crtc->index),
+	};
+
+	nvif_mthd(&disp->disp->object, 0, &args, sizeof(args));
+}
+
+static void
+nv50_hdmi_enable(struct drm_encoder *encoder, struct drm_display_mode *mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	struct {
+		struct nv50_disp_mthd_v1 base;
+		struct nv50_disp_sor_hdmi_pwr_v0 pwr;
+		u8 infoframes[2 * 17]; /* two frames, up to 17 bytes each */
+	} args = {
+		.base.version = 1,
+		.base.method = NV50_DISP_MTHD_V1_SOR_HDMI_PWR,
+		.base.hasht  = nv_encoder->dcb->hasht,
+		.base.hashm  = (0xf0ff & nv_encoder->dcb->hashm) |
+			       (0x0100 << nv_crtc->index),
+		.pwr.state = 1,
+		.pwr.rekey = 56, /* binary driver, and tegra, constant */
+	};
+	struct nouveau_connector *nv_connector;
+	u32 max_ac_packet;
+	union hdmi_infoframe avi_frame;
+	union hdmi_infoframe vendor_frame;
+	int ret;
+	int size;
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	if (!drm_detect_hdmi_monitor(nv_connector->edid))
+		return;
+
+	ret = drm_hdmi_avi_infoframe_from_display_mode(&avi_frame.avi, mode,
+						       false);
+	if (!ret) {
+		/* We have an AVI InfoFrame, populate it to the display */
+		args.pwr.avi_infoframe_length
+			= hdmi_infoframe_pack(&avi_frame, args.infoframes, 17);
+	}
+
+	ret = drm_hdmi_vendor_infoframe_from_display_mode(&vendor_frame.vendor.hdmi,
+							  &nv_connector->base, mode);
+	if (!ret) {
+		/* We have a Vendor InfoFrame, populate it to the display */
+		args.pwr.vendor_infoframe_length
+			= hdmi_infoframe_pack(&vendor_frame,
+					      args.infoframes
+					      + args.pwr.avi_infoframe_length,
+					      17);
+	}
+
+	max_ac_packet  = mode->htotal - mode->hdisplay;
+	max_ac_packet -= args.pwr.rekey;
+	max_ac_packet -= 18; /* constant from tegra */
+	args.pwr.max_ac_packet = max_ac_packet / 32;
+
+	size = sizeof(args.base)
+		+ sizeof(args.pwr)
+		+ args.pwr.avi_infoframe_length
+		+ args.pwr.vendor_infoframe_length;
+	nvif_mthd(&disp->disp->object, 0, &args, size);
+	nv50_audio_enable(encoder, mode);
+}
+
+/******************************************************************************
+ * MST
+ *****************************************************************************/
+#define nv50_mstm(p) container_of((p), struct nv50_mstm, mgr)
+#define nv50_mstc(p) container_of((p), struct nv50_mstc, connector)
+#define nv50_msto(p) container_of((p), struct nv50_msto, encoder)
+
+struct nv50_mstm {
+	struct nouveau_encoder *outp;
+
+	struct drm_dp_mst_topology_mgr mgr;
+	struct nv50_msto *msto[4];
+
+	bool modified;
+	bool disabled;
+	int links;
+};
+
+struct nv50_mstc {
+	struct nv50_mstm *mstm;
+	struct drm_dp_mst_port *port;
+	struct drm_connector connector;
+
+	struct drm_display_mode *native;
+	struct edid *edid;
+
+	int pbn;
+};
+
+struct nv50_msto {
+	struct drm_encoder encoder;
+
+	struct nv50_head *head;
+	struct nv50_mstc *mstc;
+	bool disabled;
+};
+
+static struct drm_dp_payload *
+nv50_msto_payload(struct nv50_msto *msto)
+{
+	struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev);
+	struct nv50_mstc *mstc = msto->mstc;
+	struct nv50_mstm *mstm = mstc->mstm;
+	int vcpi = mstc->port->vcpi.vcpi, i;
+
+	NV_ATOMIC(drm, "%s: vcpi %d\n", msto->encoder.name, vcpi);
+	for (i = 0; i < mstm->mgr.max_payloads; i++) {
+		struct drm_dp_payload *payload = &mstm->mgr.payloads[i];
+		NV_ATOMIC(drm, "%s: %d: vcpi %d start 0x%02x slots 0x%02x\n",
+			  mstm->outp->base.base.name, i, payload->vcpi,
+			  payload->start_slot, payload->num_slots);
+	}
+
+	for (i = 0; i < mstm->mgr.max_payloads; i++) {
+		struct drm_dp_payload *payload = &mstm->mgr.payloads[i];
+		if (payload->vcpi == vcpi)
+			return payload;
+	}
+
+	return NULL;
+}
+
+static void
+nv50_msto_cleanup(struct nv50_msto *msto)
+{
+	struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev);
+	struct nv50_mstc *mstc = msto->mstc;
+	struct nv50_mstm *mstm = mstc->mstm;
+
+	NV_ATOMIC(drm, "%s: msto cleanup\n", msto->encoder.name);
+	if (mstc->port && mstc->port->vcpi.vcpi > 0 && !nv50_msto_payload(msto))
+		drm_dp_mst_deallocate_vcpi(&mstm->mgr, mstc->port);
+	if (msto->disabled) {
+		msto->mstc = NULL;
+		msto->head = NULL;
+		msto->disabled = false;
+	}
+}
+
+static void
+nv50_msto_prepare(struct nv50_msto *msto)
+{
+	struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev);
+	struct nv50_mstc *mstc = msto->mstc;
+	struct nv50_mstm *mstm = mstc->mstm;
+	struct {
+		struct nv50_disp_mthd_v1 base;
+		struct nv50_disp_sor_dp_mst_vcpi_v0 vcpi;
+	} args = {
+		.base.version = 1,
+		.base.method = NV50_DISP_MTHD_V1_SOR_DP_MST_VCPI,
+		.base.hasht  = mstm->outp->dcb->hasht,
+		.base.hashm  = (0xf0ff & mstm->outp->dcb->hashm) |
+			       (0x0100 << msto->head->base.index),
+	};
+
+	NV_ATOMIC(drm, "%s: msto prepare\n", msto->encoder.name);
+	if (mstc->port && mstc->port->vcpi.vcpi > 0) {
+		struct drm_dp_payload *payload = nv50_msto_payload(msto);
+		if (payload) {
+			args.vcpi.start_slot = payload->start_slot;
+			args.vcpi.num_slots = payload->num_slots;
+			args.vcpi.pbn = mstc->port->vcpi.pbn;
+			args.vcpi.aligned_pbn = mstc->port->vcpi.aligned_pbn;
+		}
+	}
+
+	NV_ATOMIC(drm, "%s: %s: %02x %02x %04x %04x\n",
+		  msto->encoder.name, msto->head->base.base.name,
+		  args.vcpi.start_slot, args.vcpi.num_slots,
+		  args.vcpi.pbn, args.vcpi.aligned_pbn);
+	nvif_mthd(&drm->display->disp.object, 0, &args, sizeof(args));
+}
+
+static int
+nv50_msto_atomic_check(struct drm_encoder *encoder,
+		       struct drm_crtc_state *crtc_state,
+		       struct drm_connector_state *conn_state)
+{
+	struct nv50_mstc *mstc = nv50_mstc(conn_state->connector);
+	struct nv50_mstm *mstm = mstc->mstm;
+	int bpp = conn_state->connector->display_info.bpc * 3;
+	int slots;
+
+	mstc->pbn = drm_dp_calc_pbn_mode(crtc_state->adjusted_mode.clock, bpp);
+
+	slots = drm_dp_find_vcpi_slots(&mstm->mgr, mstc->pbn);
+	if (slots < 0)
+		return slots;
+
+	return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
+					   mstc->native);
+}
+
+static void
+nv50_msto_enable(struct drm_encoder *encoder)
+{
+	struct nv50_head *head = nv50_head(encoder->crtc);
+	struct nv50_msto *msto = nv50_msto(encoder);
+	struct nv50_mstc *mstc = NULL;
+	struct nv50_mstm *mstm = NULL;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter conn_iter;
+	u8 proto, depth;
+	int slots;
+	bool r;
+
+	drm_connector_list_iter_begin(encoder->dev, &conn_iter);
+	drm_for_each_connector_iter(connector, &conn_iter) {
+		if (connector->state->best_encoder == &msto->encoder) {
+			mstc = nv50_mstc(connector);
+			mstm = mstc->mstm;
+			break;
+		}
+	}
+	drm_connector_list_iter_end(&conn_iter);
+
+	if (WARN_ON(!mstc))
+		return;
+
+	slots = drm_dp_find_vcpi_slots(&mstm->mgr, mstc->pbn);
+	r = drm_dp_mst_allocate_vcpi(&mstm->mgr, mstc->port, mstc->pbn, slots);
+	WARN_ON(!r);
+
+	if (!mstm->links++)
+		nv50_outp_acquire(mstm->outp);
+
+	if (mstm->outp->link & 1)
+		proto = 0x8;
+	else
+		proto = 0x9;
+
+	switch (mstc->connector.display_info.bpc) {
+	case  6: depth = 0x2; break;
+	case  8: depth = 0x5; break;
+	case 10:
+	default: depth = 0x6; break;
+	}
+
+	mstm->outp->update(mstm->outp, head->base.index,
+			   nv50_head_atom(head->base.base.state), proto, depth);
+
+	msto->head = head;
+	msto->mstc = mstc;
+	mstm->modified = true;
+}
+
+static void
+nv50_msto_disable(struct drm_encoder *encoder)
+{
+	struct nv50_msto *msto = nv50_msto(encoder);
+	struct nv50_mstc *mstc = msto->mstc;
+	struct nv50_mstm *mstm = mstc->mstm;
+
+	if (mstc->port)
+		drm_dp_mst_reset_vcpi_slots(&mstm->mgr, mstc->port);
+
+	mstm->outp->update(mstm->outp, msto->head->base.index, NULL, 0, 0);
+	mstm->modified = true;
+	if (!--mstm->links)
+		mstm->disabled = true;
+	msto->disabled = true;
+}
+
+static const struct drm_encoder_helper_funcs
+nv50_msto_help = {
+	.disable = nv50_msto_disable,
+	.enable = nv50_msto_enable,
+	.atomic_check = nv50_msto_atomic_check,
+};
+
+static void
+nv50_msto_destroy(struct drm_encoder *encoder)
+{
+	struct nv50_msto *msto = nv50_msto(encoder);
+	drm_encoder_cleanup(&msto->encoder);
+	kfree(msto);
+}
+
+static const struct drm_encoder_funcs
+nv50_msto = {
+	.destroy = nv50_msto_destroy,
+};
+
+static int
+nv50_msto_new(struct drm_device *dev, u32 heads, const char *name, int id,
+	      struct nv50_msto **pmsto)
+{
+	struct nv50_msto *msto;
+	int ret;
+
+	if (!(msto = *pmsto = kzalloc(sizeof(*msto), GFP_KERNEL)))
+		return -ENOMEM;
+
+	ret = drm_encoder_init(dev, &msto->encoder, &nv50_msto,
+			       DRM_MODE_ENCODER_DPMST, "%s-mst-%d", name, id);
+	if (ret) {
+		kfree(*pmsto);
+		*pmsto = NULL;
+		return ret;
+	}
+
+	drm_encoder_helper_add(&msto->encoder, &nv50_msto_help);
+	msto->encoder.possible_crtcs = heads;
+	return 0;
+}
+
+static struct drm_encoder *
+nv50_mstc_atomic_best_encoder(struct drm_connector *connector,
+			      struct drm_connector_state *connector_state)
+{
+	struct nv50_head *head = nv50_head(connector_state->crtc);
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+	if (mstc->port) {
+		struct nv50_mstm *mstm = mstc->mstm;
+		return &mstm->msto[head->base.index]->encoder;
+	}
+	return NULL;
+}
+
+static struct drm_encoder *
+nv50_mstc_best_encoder(struct drm_connector *connector)
+{
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+	if (mstc->port) {
+		struct nv50_mstm *mstm = mstc->mstm;
+		return &mstm->msto[0]->encoder;
+	}
+	return NULL;
+}
+
+static enum drm_mode_status
+nv50_mstc_mode_valid(struct drm_connector *connector,
+		     struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static int
+nv50_mstc_get_modes(struct drm_connector *connector)
+{
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+	int ret = 0;
+
+	mstc->edid = drm_dp_mst_get_edid(&mstc->connector, mstc->port->mgr, mstc->port);
+	drm_mode_connector_update_edid_property(&mstc->connector, mstc->edid);
+	if (mstc->edid)
+		ret = drm_add_edid_modes(&mstc->connector, mstc->edid);
+
+	if (!mstc->connector.display_info.bpc)
+		mstc->connector.display_info.bpc = 8;
+
+	if (mstc->native)
+		drm_mode_destroy(mstc->connector.dev, mstc->native);
+	mstc->native = nouveau_conn_native_mode(&mstc->connector);
+	return ret;
+}
+
+static const struct drm_connector_helper_funcs
+nv50_mstc_help = {
+	.get_modes = nv50_mstc_get_modes,
+	.mode_valid = nv50_mstc_mode_valid,
+	.best_encoder = nv50_mstc_best_encoder,
+	.atomic_best_encoder = nv50_mstc_atomic_best_encoder,
+};
+
+static enum drm_connector_status
+nv50_mstc_detect(struct drm_connector *connector, bool force)
+{
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+	if (!mstc->port)
+		return connector_status_disconnected;
+	return drm_dp_mst_detect_port(connector, mstc->port->mgr, mstc->port);
+}
+
+static void
+nv50_mstc_destroy(struct drm_connector *connector)
+{
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+	drm_connector_cleanup(&mstc->connector);
+	kfree(mstc);
+}
+
+static const struct drm_connector_funcs
+nv50_mstc = {
+	.reset = nouveau_conn_reset,
+	.detect = nv50_mstc_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = nv50_mstc_destroy,
+	.atomic_duplicate_state = nouveau_conn_atomic_duplicate_state,
+	.atomic_destroy_state = nouveau_conn_atomic_destroy_state,
+	.atomic_set_property = nouveau_conn_atomic_set_property,
+	.atomic_get_property = nouveau_conn_atomic_get_property,
+};
+
+static int
+nv50_mstc_new(struct nv50_mstm *mstm, struct drm_dp_mst_port *port,
+	      const char *path, struct nv50_mstc **pmstc)
+{
+	struct drm_device *dev = mstm->outp->base.base.dev;
+	struct nv50_mstc *mstc;
+	int ret, i;
+
+	if (!(mstc = *pmstc = kzalloc(sizeof(*mstc), GFP_KERNEL)))
+		return -ENOMEM;
+	mstc->mstm = mstm;
+	mstc->port = port;
+
+	ret = drm_connector_init(dev, &mstc->connector, &nv50_mstc,
+				 DRM_MODE_CONNECTOR_DisplayPort);
+	if (ret) {
+		kfree(*pmstc);
+		*pmstc = NULL;
+		return ret;
+	}
+
+	drm_connector_helper_add(&mstc->connector, &nv50_mstc_help);
+
+	mstc->connector.funcs->reset(&mstc->connector);
+	nouveau_conn_attach_properties(&mstc->connector);
+
+	for (i = 0; i < ARRAY_SIZE(mstm->msto) && mstm->msto[i]; i++)
+		drm_mode_connector_attach_encoder(&mstc->connector, &mstm->msto[i]->encoder);
+
+	drm_object_attach_property(&mstc->connector.base, dev->mode_config.path_property, 0);
+	drm_object_attach_property(&mstc->connector.base, dev->mode_config.tile_property, 0);
+	drm_mode_connector_set_path_property(&mstc->connector, path);
+	return 0;
+}
+
+static void
+nv50_mstm_cleanup(struct nv50_mstm *mstm)
+{
+	struct nouveau_drm *drm = nouveau_drm(mstm->outp->base.base.dev);
+	struct drm_encoder *encoder;
+	int ret;
+
+	NV_ATOMIC(drm, "%s: mstm cleanup\n", mstm->outp->base.base.name);
+	ret = drm_dp_check_act_status(&mstm->mgr);
+
+	ret = drm_dp_update_payload_part2(&mstm->mgr);
+
+	drm_for_each_encoder(encoder, mstm->outp->base.base.dev) {
+		if (encoder->encoder_type == DRM_MODE_ENCODER_DPMST) {
+			struct nv50_msto *msto = nv50_msto(encoder);
+			struct nv50_mstc *mstc = msto->mstc;
+			if (mstc && mstc->mstm == mstm)
+				nv50_msto_cleanup(msto);
+		}
+	}
+
+	mstm->modified = false;
+}
+
+static void
+nv50_mstm_prepare(struct nv50_mstm *mstm)
+{
+	struct nouveau_drm *drm = nouveau_drm(mstm->outp->base.base.dev);
+	struct drm_encoder *encoder;
+	int ret;
+
+	NV_ATOMIC(drm, "%s: mstm prepare\n", mstm->outp->base.base.name);
+	ret = drm_dp_update_payload_part1(&mstm->mgr);
+
+	drm_for_each_encoder(encoder, mstm->outp->base.base.dev) {
+		if (encoder->encoder_type == DRM_MODE_ENCODER_DPMST) {
+			struct nv50_msto *msto = nv50_msto(encoder);
+			struct nv50_mstc *mstc = msto->mstc;
+			if (mstc && mstc->mstm == mstm)
+				nv50_msto_prepare(msto);
+		}
+	}
+
+	if (mstm->disabled) {
+		if (!mstm->links)
+			nv50_outp_release(mstm->outp);
+		mstm->disabled = false;
+	}
+}
+
+static void
+nv50_mstm_hotplug(struct drm_dp_mst_topology_mgr *mgr)
+{
+	struct nv50_mstm *mstm = nv50_mstm(mgr);
+	drm_kms_helper_hotplug_event(mstm->outp->base.base.dev);
+}
+
+static void
+nv50_mstm_destroy_connector(struct drm_dp_mst_topology_mgr *mgr,
+			    struct drm_connector *connector)
+{
+	struct nouveau_drm *drm = nouveau_drm(connector->dev);
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+
+	drm_connector_unregister(&mstc->connector);
+
+	drm_fb_helper_remove_one_connector(&drm->fbcon->helper, &mstc->connector);
+
+	drm_modeset_lock(&drm->dev->mode_config.connection_mutex, NULL);
+	mstc->port = NULL;
+	drm_modeset_unlock(&drm->dev->mode_config.connection_mutex);
+
+	drm_connector_unreference(&mstc->connector);
+}
+
+static void
+nv50_mstm_register_connector(struct drm_connector *connector)
+{
+	struct nouveau_drm *drm = nouveau_drm(connector->dev);
+
+	drm_fb_helper_add_one_connector(&drm->fbcon->helper, connector);
+
+	drm_connector_register(connector);
+}
+
+static struct drm_connector *
+nv50_mstm_add_connector(struct drm_dp_mst_topology_mgr *mgr,
+			struct drm_dp_mst_port *port, const char *path)
+{
+	struct nv50_mstm *mstm = nv50_mstm(mgr);
+	struct nv50_mstc *mstc;
+	int ret;
+
+	ret = nv50_mstc_new(mstm, port, path, &mstc);
+	if (ret) {
+		if (mstc)
+			mstc->connector.funcs->destroy(&mstc->connector);
+		return NULL;
+	}
+
+	return &mstc->connector;
+}
+
+static const struct drm_dp_mst_topology_cbs
+nv50_mstm = {
+	.add_connector = nv50_mstm_add_connector,
+	.register_connector = nv50_mstm_register_connector,
+	.destroy_connector = nv50_mstm_destroy_connector,
+	.hotplug = nv50_mstm_hotplug,
+};
+
+void
+nv50_mstm_service(struct nv50_mstm *mstm)
+{
+	struct drm_dp_aux *aux = mstm ? mstm->mgr.aux : NULL;
+	bool handled = true;
+	int ret;
+	u8 esi[8] = {};
+
+	if (!aux)
+		return;
+
+	while (handled) {
+		ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT_ESI, esi, 8);
+		if (ret != 8) {
+			drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false);
+			return;
+		}
+
+		drm_dp_mst_hpd_irq(&mstm->mgr, esi, &handled);
+		if (!handled)
+			break;
+
+		drm_dp_dpcd_write(aux, DP_SINK_COUNT_ESI + 1, &esi[1], 3);
+	}
+}
+
+void
+nv50_mstm_remove(struct nv50_mstm *mstm)
+{
+	if (mstm)
+		drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false);
+}
+
+static int
+nv50_mstm_enable(struct nv50_mstm *mstm, u8 dpcd, int state)
+{
+	struct nouveau_encoder *outp = mstm->outp;
+	struct {
+		struct nv50_disp_mthd_v1 base;
+		struct nv50_disp_sor_dp_mst_link_v0 mst;
+	} args = {
+		.base.version = 1,
+		.base.method = NV50_DISP_MTHD_V1_SOR_DP_MST_LINK,
+		.base.hasht = outp->dcb->hasht,
+		.base.hashm = outp->dcb->hashm,
+		.mst.state = state,
+	};
+	struct nouveau_drm *drm = nouveau_drm(outp->base.base.dev);
+	struct nvif_object *disp = &drm->display->disp.object;
+	int ret;
+
+	if (dpcd >= 0x12) {
+		ret = drm_dp_dpcd_readb(mstm->mgr.aux, DP_MSTM_CTRL, &dpcd);
+		if (ret < 0)
+			return ret;
+
+		dpcd &= ~DP_MST_EN;
+		if (state)
+			dpcd |= DP_MST_EN;
+
+		ret = drm_dp_dpcd_writeb(mstm->mgr.aux, DP_MSTM_CTRL, dpcd);
+		if (ret < 0)
+			return ret;
+	}
+
+	return nvif_mthd(disp, 0, &args, sizeof(args));
+}
+
+int
+nv50_mstm_detect(struct nv50_mstm *mstm, u8 dpcd[8], int allow)
+{
+	int ret, state = 0;
+
+	if (!mstm)
+		return 0;
+
+	if (dpcd[0] >= 0x12) {
+		ret = drm_dp_dpcd_readb(mstm->mgr.aux, DP_MSTM_CAP, &dpcd[1]);
+		if (ret < 0)
+			return ret;
+
+		if (!(dpcd[1] & DP_MST_CAP))
+			dpcd[0] = 0x11;
+		else
+			state = allow;
+	}
+
+	ret = nv50_mstm_enable(mstm, dpcd[0], state);
+	if (ret)
+		return ret;
+
+	ret = drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, state);
+	if (ret)
+		return nv50_mstm_enable(mstm, dpcd[0], 0);
+
+	return mstm->mgr.mst_state;
+}
+
+static void
+nv50_mstm_fini(struct nv50_mstm *mstm)
+{
+	if (mstm && mstm->mgr.mst_state)
+		drm_dp_mst_topology_mgr_suspend(&mstm->mgr);
+}
+
+static void
+nv50_mstm_init(struct nv50_mstm *mstm)
+{
+	if (mstm && mstm->mgr.mst_state)
+		drm_dp_mst_topology_mgr_resume(&mstm->mgr);
+}
+
+static void
+nv50_mstm_del(struct nv50_mstm **pmstm)
+{
+	struct nv50_mstm *mstm = *pmstm;
+	if (mstm) {
+		kfree(*pmstm);
+		*pmstm = NULL;
+	}
+}
+
+static int
+nv50_mstm_new(struct nouveau_encoder *outp, struct drm_dp_aux *aux, int aux_max,
+	      int conn_base_id, struct nv50_mstm **pmstm)
+{
+	const int max_payloads = hweight8(outp->dcb->heads);
+	struct drm_device *dev = outp->base.base.dev;
+	struct nv50_mstm *mstm;
+	int ret, i;
+	u8 dpcd;
+
+	/* This is a workaround for some monitors not functioning
+	 * correctly in MST mode on initial module load.  I think
+	 * some bad interaction with the VBIOS may be responsible.
+	 *
+	 * A good ol' off and on again seems to work here ;)
+	 */
+	ret = drm_dp_dpcd_readb(aux, DP_DPCD_REV, &dpcd);
+	if (ret >= 0 && dpcd >= 0x12)
+		drm_dp_dpcd_writeb(aux, DP_MSTM_CTRL, 0);
+
+	if (!(mstm = *pmstm = kzalloc(sizeof(*mstm), GFP_KERNEL)))
+		return -ENOMEM;
+	mstm->outp = outp;
+	mstm->mgr.cbs = &nv50_mstm;
+
+	ret = drm_dp_mst_topology_mgr_init(&mstm->mgr, dev, aux, aux_max,
+					   max_payloads, conn_base_id);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < max_payloads; i++) {
+		ret = nv50_msto_new(dev, outp->dcb->heads, outp->base.base.name,
+				    i, &mstm->msto[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/******************************************************************************
+ * SOR
+ *****************************************************************************/
+static void
+nv50_sor_update(struct nouveau_encoder *nv_encoder, u8 head,
+		struct nv50_head_atom *asyh, u8 proto, u8 depth)
+{
+	struct nv50_disp *disp = nv50_disp(nv_encoder->base.base.dev);
+	struct nv50_core *core = disp->core;
+
+	if (!asyh) {
+		nv_encoder->ctrl &= ~BIT(head);
+		if (!(nv_encoder->ctrl & 0x0000000f))
+			nv_encoder->ctrl = 0;
+	} else {
+		nv_encoder->ctrl |= proto << 8;
+		nv_encoder->ctrl |= BIT(head);
+		asyh->or.depth = depth;
+	}
+
+	core->func->sor->ctrl(core, nv_encoder->or, nv_encoder->ctrl, asyh);
+}
+
+static void
+nv50_sor_disable(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(nv_encoder->crtc);
+
+	nv_encoder->crtc = NULL;
+
+	if (nv_crtc) {
+		struct nvkm_i2c_aux *aux = nv_encoder->aux;
+		u8 pwr;
+
+		if (aux) {
+			int ret = nvkm_rdaux(aux, DP_SET_POWER, &pwr, 1);
+			if (ret == 0) {
+				pwr &= ~DP_SET_POWER_MASK;
+				pwr |=  DP_SET_POWER_D3;
+				nvkm_wraux(aux, DP_SET_POWER, &pwr, 1);
+			}
+		}
+
+		nv_encoder->update(nv_encoder, nv_crtc->index, NULL, 0, 0);
+		nv50_audio_disable(encoder, nv_crtc);
+		nv50_hdmi_disable(&nv_encoder->base.base, nv_crtc);
+		nv50_outp_release(nv_encoder);
+	}
+}
+
+static void
+nv50_sor_enable(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct nv50_head_atom *asyh = nv50_head_atom(nv_crtc->base.state);
+	struct drm_display_mode *mode = &asyh->state.adjusted_mode;
+	struct {
+		struct nv50_disp_mthd_v1 base;
+		struct nv50_disp_sor_lvds_script_v0 lvds;
+	} lvds = {
+		.base.version = 1,
+		.base.method  = NV50_DISP_MTHD_V1_SOR_LVDS_SCRIPT,
+		.base.hasht   = nv_encoder->dcb->hasht,
+		.base.hashm   = nv_encoder->dcb->hashm,
+	};
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	struct drm_device *dev = encoder->dev;
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_connector *nv_connector;
+	struct nvbios *bios = &drm->vbios;
+	u8 proto = 0xf;
+	u8 depth = 0x0;
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	nv_encoder->crtc = encoder->crtc;
+	nv50_outp_acquire(nv_encoder);
+
+	switch (nv_encoder->dcb->type) {
+	case DCB_OUTPUT_TMDS:
+		if (nv_encoder->link & 1) {
+			proto = 0x1;
+			/* Only enable dual-link if:
+			 *  - Need to (i.e. rate > 165MHz)
+			 *  - DCB says we can
+			 *  - Not an HDMI monitor, since there's no dual-link
+			 *    on HDMI.
+			 */
+			if (mode->clock >= 165000 &&
+			    nv_encoder->dcb->duallink_possible &&
+			    !drm_detect_hdmi_monitor(nv_connector->edid))
+				proto |= 0x4;
+		} else {
+			proto = 0x2;
+		}
+
+		nv50_hdmi_enable(&nv_encoder->base.base, mode);
+		break;
+	case DCB_OUTPUT_LVDS:
+		proto = 0x0;
+
+		if (bios->fp_no_ddc) {
+			if (bios->fp.dual_link)
+				lvds.lvds.script |= 0x0100;
+			if (bios->fp.if_is_24bit)
+				lvds.lvds.script |= 0x0200;
+		} else {
+			if (nv_connector->type == DCB_CONNECTOR_LVDS_SPWG) {
+				if (((u8 *)nv_connector->edid)[121] == 2)
+					lvds.lvds.script |= 0x0100;
+			} else
+			if (mode->clock >= bios->fp.duallink_transition_clk) {
+				lvds.lvds.script |= 0x0100;
+			}
+
+			if (lvds.lvds.script & 0x0100) {
+				if (bios->fp.strapless_is_24bit & 2)
+					lvds.lvds.script |= 0x0200;
+			} else {
+				if (bios->fp.strapless_is_24bit & 1)
+					lvds.lvds.script |= 0x0200;
+			}
+
+			if (nv_connector->base.display_info.bpc == 8)
+				lvds.lvds.script |= 0x0200;
+		}
+
+		nvif_mthd(&disp->disp->object, 0, &lvds, sizeof(lvds));
+		break;
+	case DCB_OUTPUT_DP:
+		if (nv_connector->base.display_info.bpc == 6)
+			depth = 0x2;
+		else
+		if (nv_connector->base.display_info.bpc == 8)
+			depth = 0x5;
+		else
+			depth = 0x6;
+
+		if (nv_encoder->link & 1)
+			proto = 0x8;
+		else
+			proto = 0x9;
+
+		nv50_audio_enable(encoder, mode);
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	nv_encoder->update(nv_encoder, nv_crtc->index, asyh, proto, depth);
+}
+
+static const struct drm_encoder_helper_funcs
+nv50_sor_help = {
+	.atomic_check = nv50_outp_atomic_check,
+	.enable = nv50_sor_enable,
+	.disable = nv50_sor_disable,
+};
+
+static void
+nv50_sor_destroy(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	nv50_mstm_del(&nv_encoder->dp.mstm);
+	drm_encoder_cleanup(encoder);
+	kfree(encoder);
+}
+
+static const struct drm_encoder_funcs
+nv50_sor_func = {
+	.destroy = nv50_sor_destroy,
+};
+
+static int
+nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
+{
+	struct nouveau_connector *nv_connector = nouveau_connector(connector);
+	struct nouveau_drm *drm = nouveau_drm(connector->dev);
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
+	struct nouveau_encoder *nv_encoder;
+	struct drm_encoder *encoder;
+	u8 ver, hdr, cnt, len;
+	u32 data;
+	int type, ret;
+
+	switch (dcbe->type) {
+	case DCB_OUTPUT_LVDS: type = DRM_MODE_ENCODER_LVDS; break;
+	case DCB_OUTPUT_TMDS:
+	case DCB_OUTPUT_DP:
+	default:
+		type = DRM_MODE_ENCODER_TMDS;
+		break;
+	}
+
+	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
+	if (!nv_encoder)
+		return -ENOMEM;
+	nv_encoder->dcb = dcbe;
+	nv_encoder->update = nv50_sor_update;
+
+	encoder = to_drm_encoder(nv_encoder);
+	encoder->possible_crtcs = dcbe->heads;
+	encoder->possible_clones = 0;
+	drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type,
+			 "sor-%04x-%04x", dcbe->hasht, dcbe->hashm);
+	drm_encoder_helper_add(encoder, &nv50_sor_help);
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+
+	if (dcbe->type == DCB_OUTPUT_DP) {
+		struct nv50_disp *disp = nv50_disp(encoder->dev);
+		struct nvkm_i2c_aux *aux =
+			nvkm_i2c_aux_find(i2c, dcbe->i2c_index);
+		if (aux) {
+			if (disp->disp->object.oclass < GF110_DISP) {
+				/* HW has no support for address-only
+				 * transactions, so we're required to
+				 * use custom I2C-over-AUX code.
+				 */
+				nv_encoder->i2c = &aux->i2c;
+			} else {
+				nv_encoder->i2c = &nv_connector->aux.ddc;
+			}
+			nv_encoder->aux = aux;
+		}
+
+		if ((data = nvbios_dp_table(bios, &ver, &hdr, &cnt, &len)) &&
+		    ver >= 0x40 && (nvbios_rd08(bios, data + 0x08) & 0x04)) {
+			ret = nv50_mstm_new(nv_encoder, &nv_connector->aux, 16,
+					    nv_connector->base.base.id,
+					    &nv_encoder->dp.mstm);
+			if (ret)
+				return ret;
+		}
+	} else {
+		struct nvkm_i2c_bus *bus =
+			nvkm_i2c_bus_find(i2c, dcbe->i2c_index);
+		if (bus)
+			nv_encoder->i2c = &bus->i2c;
+	}
+
+	return 0;
+}
+
+/******************************************************************************
+ * PIOR
+ *****************************************************************************/
+static int
+nv50_pior_atomic_check(struct drm_encoder *encoder,
+		       struct drm_crtc_state *crtc_state,
+		       struct drm_connector_state *conn_state)
+{
+	int ret = nv50_outp_atomic_check(encoder, crtc_state, conn_state);
+	if (ret)
+		return ret;
+	crtc_state->adjusted_mode.clock *= 2;
+	return 0;
+}
+
+static void
+nv50_pior_disable(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nv50_core *core = nv50_disp(encoder->dev)->core;
+	if (nv_encoder->crtc)
+		core->func->pior->ctrl(core, nv_encoder->or, 0x00000000, NULL);
+	nv_encoder->crtc = NULL;
+	nv50_outp_release(nv_encoder);
+}
+
+static void
+nv50_pior_enable(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct nouveau_connector *nv_connector;
+	struct nv50_head_atom *asyh = nv50_head_atom(nv_crtc->base.state);
+	struct nv50_core *core = nv50_disp(encoder->dev)->core;
+	u8 owner = 1 << nv_crtc->index;
+	u8 proto;
+
+	nv50_outp_acquire(nv_encoder);
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	switch (nv_connector->base.display_info.bpc) {
+	case 10: asyh->or.depth = 0x6; break;
+	case  8: asyh->or.depth = 0x5; break;
+	case  6: asyh->or.depth = 0x2; break;
+	default: asyh->or.depth = 0x0; break;
+	}
+
+	switch (nv_encoder->dcb->type) {
+	case DCB_OUTPUT_TMDS:
+	case DCB_OUTPUT_DP:
+		proto = 0x0;
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	core->func->pior->ctrl(core, nv_encoder->or, (proto << 8) | owner, asyh);
+	nv_encoder->crtc = encoder->crtc;
+}
+
+static const struct drm_encoder_helper_funcs
+nv50_pior_help = {
+	.atomic_check = nv50_pior_atomic_check,
+	.enable = nv50_pior_enable,
+	.disable = nv50_pior_disable,
+};
+
+static void
+nv50_pior_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+	kfree(encoder);
+}
+
+static const struct drm_encoder_funcs
+nv50_pior_func = {
+	.destroy = nv50_pior_destroy,
+};
+
+static int
+nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
+{
+	struct nouveau_drm *drm = nouveau_drm(connector->dev);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
+	struct nvkm_i2c_bus *bus = NULL;
+	struct nvkm_i2c_aux *aux = NULL;
+	struct i2c_adapter *ddc;
+	struct nouveau_encoder *nv_encoder;
+	struct drm_encoder *encoder;
+	int type;
+
+	switch (dcbe->type) {
+	case DCB_OUTPUT_TMDS:
+		bus  = nvkm_i2c_bus_find(i2c, NVKM_I2C_BUS_EXT(dcbe->extdev));
+		ddc  = bus ? &bus->i2c : NULL;
+		type = DRM_MODE_ENCODER_TMDS;
+		break;
+	case DCB_OUTPUT_DP:
+		aux  = nvkm_i2c_aux_find(i2c, NVKM_I2C_AUX_EXT(dcbe->extdev));
+		ddc  = aux ? &aux->i2c : NULL;
+		type = DRM_MODE_ENCODER_TMDS;
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
+	if (!nv_encoder)
+		return -ENOMEM;
+	nv_encoder->dcb = dcbe;
+	nv_encoder->i2c = ddc;
+	nv_encoder->aux = aux;
+
+	encoder = to_drm_encoder(nv_encoder);
+	encoder->possible_crtcs = dcbe->heads;
+	encoder->possible_clones = 0;
+	drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type,
+			 "pior-%04x-%04x", dcbe->hasht, dcbe->hashm);
+	drm_encoder_helper_add(encoder, &nv50_pior_help);
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+	return 0;
+}
+
+/******************************************************************************
+ * Atomic
+ *****************************************************************************/
+
+static void
+nv50_disp_atomic_commit_core(struct nouveau_drm *drm, u32 *interlock)
+{
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	struct nv50_core *core = disp->core;
+	struct nv50_mstm *mstm;
+	struct drm_encoder *encoder;
+
+	NV_ATOMIC(drm, "commit core %08x\n", interlock[NV50_DISP_INTERLOCK_BASE]);
+
+	drm_for_each_encoder(encoder, drm->dev) {
+		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
+			mstm = nouveau_encoder(encoder)->dp.mstm;
+			if (mstm && mstm->modified)
+				nv50_mstm_prepare(mstm);
+		}
+	}
+
+	core->func->ntfy_init(disp->sync, NV50_DISP_CORE_NTFY);
+	core->func->update(core, interlock, true);
+	if (core->func->ntfy_wait_done(disp->sync, NV50_DISP_CORE_NTFY,
+				       disp->core->chan.base.device))
+		NV_ERROR(drm, "core notifier timeout\n");
+
+	drm_for_each_encoder(encoder, drm->dev) {
+		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
+			mstm = nouveau_encoder(encoder)->dp.mstm;
+			if (mstm && mstm->modified)
+				nv50_mstm_cleanup(mstm);
+		}
+	}
+}
+
+static void
+nv50_disp_atomic_commit_tail(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
+	struct drm_crtc_state *new_crtc_state, *old_crtc_state;
+	struct drm_crtc *crtc;
+	struct drm_plane_state *new_plane_state;
+	struct drm_plane *plane;
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nv50_disp *disp = nv50_disp(dev);
+	struct nv50_atom *atom = nv50_atom(state);
+	struct nv50_outp_atom *outp, *outt;
+	u32 interlock[NV50_DISP_INTERLOCK__SIZE] = {};
+	int i;
+
+	NV_ATOMIC(drm, "commit %d %d\n", atom->lock_core, atom->flush_disable);
+	drm_atomic_helper_wait_for_fences(dev, state, false);
+	drm_atomic_helper_wait_for_dependencies(state);
+	drm_atomic_helper_update_legacy_modeset_state(dev, state);
+
+	if (atom->lock_core)
+		mutex_lock(&disp->mutex);
+
+	/* Disable head(s). */
+	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+		struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state);
+		struct nv50_head *head = nv50_head(crtc);
+
+		NV_ATOMIC(drm, "%s: clr %04x (set %04x)\n", crtc->name,
+			  asyh->clr.mask, asyh->set.mask);
+		if (old_crtc_state->active && !new_crtc_state->active)
+			drm_crtc_vblank_off(crtc);
+
+		if (asyh->clr.mask) {
+			nv50_head_flush_clr(head, asyh, atom->flush_disable);
+			interlock[NV50_DISP_INTERLOCK_CORE] |= 1;
+		}
+	}
+
+	/* Disable plane(s). */
+	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+		struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state);
+		struct nv50_wndw *wndw = nv50_wndw(plane);
+
+		NV_ATOMIC(drm, "%s: clr %02x (set %02x)\n", plane->name,
+			  asyw->clr.mask, asyw->set.mask);
+		if (!asyw->clr.mask)
+			continue;
+
+		nv50_wndw_flush_clr(wndw, interlock, atom->flush_disable, asyw);
+	}
+
+	/* Disable output path(s). */
+	list_for_each_entry(outp, &atom->outp, head) {
+		const struct drm_encoder_helper_funcs *help;
+		struct drm_encoder *encoder;
+
+		encoder = outp->encoder;
+		help = encoder->helper_private;
+
+		NV_ATOMIC(drm, "%s: clr %02x (set %02x)\n", encoder->name,
+			  outp->clr.mask, outp->set.mask);
+
+		if (outp->clr.mask) {
+			help->disable(encoder);
+			interlock[NV50_DISP_INTERLOCK_CORE] |= 1;
+			if (outp->flush_disable) {
+				nv50_disp_atomic_commit_core(drm, interlock);
+				memset(interlock, 0x00, sizeof(interlock));
+			}
+		}
+	}
+
+	/* Flush disable. */
+	if (interlock[NV50_DISP_INTERLOCK_CORE]) {
+		if (atom->flush_disable) {
+			for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+				struct nv50_wndw *wndw = nv50_wndw(plane);
+				if (interlock[wndw->interlock.type] & wndw->interlock.data) {
+					if (wndw->func->update)
+						wndw->func->update(wndw, interlock);
+				}
+			}
+
+			nv50_disp_atomic_commit_core(drm, interlock);
+			memset(interlock, 0x00, sizeof(interlock));
+		}
+	}
+
+	/* Update output path(s). */
+	list_for_each_entry_safe(outp, outt, &atom->outp, head) {
+		const struct drm_encoder_helper_funcs *help;
+		struct drm_encoder *encoder;
+
+		encoder = outp->encoder;
+		help = encoder->helper_private;
+
+		NV_ATOMIC(drm, "%s: set %02x (clr %02x)\n", encoder->name,
+			  outp->set.mask, outp->clr.mask);
+
+		if (outp->set.mask) {
+			help->enable(encoder);
+			interlock[NV50_DISP_INTERLOCK_CORE] = 1;
+		}
+
+		list_del(&outp->head);
+		kfree(outp);
+	}
+
+	/* Update head(s). */
+	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+		struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state);
+		struct nv50_head *head = nv50_head(crtc);
+
+		NV_ATOMIC(drm, "%s: set %04x (clr %04x)\n", crtc->name,
+			  asyh->set.mask, asyh->clr.mask);
+
+		if (asyh->set.mask) {
+			nv50_head_flush_set(head, asyh);
+			interlock[NV50_DISP_INTERLOCK_CORE] = 1;
+		}
+
+		if (new_crtc_state->active) {
+			if (!old_crtc_state->active)
+				drm_crtc_vblank_on(crtc);
+			if (new_crtc_state->event)
+				drm_crtc_vblank_get(crtc);
+		}
+	}
+
+	/* Update plane(s). */
+	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+		struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state);
+		struct nv50_wndw *wndw = nv50_wndw(plane);
+
+		NV_ATOMIC(drm, "%s: set %02x (clr %02x)\n", plane->name,
+			  asyw->set.mask, asyw->clr.mask);
+		if ( !asyw->set.mask &&
+		    (!asyw->clr.mask || atom->flush_disable))
+			continue;
+
+		nv50_wndw_flush_set(wndw, interlock, asyw);
+	}
+
+	/* Flush update. */
+	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+		struct nv50_wndw *wndw = nv50_wndw(plane);
+		if (interlock[wndw->interlock.type] & wndw->interlock.data) {
+			if (wndw->func->update)
+				wndw->func->update(wndw, interlock);
+		}
+	}
+
+	if (interlock[NV50_DISP_INTERLOCK_CORE]) {
+		if (interlock[NV50_DISP_INTERLOCK_BASE] ||
+		    !atom->state.legacy_cursor_update)
+			nv50_disp_atomic_commit_core(drm, interlock);
+		else
+			disp->core->func->update(disp->core, interlock, false);
+	}
+
+	if (atom->lock_core)
+		mutex_unlock(&disp->mutex);
+
+	/* Wait for HW to signal completion. */
+	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+		struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state);
+		struct nv50_wndw *wndw = nv50_wndw(plane);
+		int ret = nv50_wndw_wait_armed(wndw, asyw);
+		if (ret)
+			NV_ERROR(drm, "%s: timeout\n", plane->name);
+	}
+
+	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+		if (new_crtc_state->event) {
+			unsigned long flags;
+			/* Get correct count/ts if racing with vblank irq */
+			if (new_crtc_state->active)
+				drm_crtc_accurate_vblank_count(crtc);
+			spin_lock_irqsave(&crtc->dev->event_lock, flags);
+			drm_crtc_send_vblank_event(crtc, new_crtc_state->event);
+			spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+			new_crtc_state->event = NULL;
+			if (new_crtc_state->active)
+				drm_crtc_vblank_put(crtc);
+		}
+	}
+
+	drm_atomic_helper_commit_hw_done(state);
+	drm_atomic_helper_cleanup_planes(dev, state);
+	drm_atomic_helper_commit_cleanup_done(state);
+	drm_atomic_state_put(state);
+}
+
+static void
+nv50_disp_atomic_commit_work(struct work_struct *work)
+{
+	struct drm_atomic_state *state =
+		container_of(work, typeof(*state), commit_work);
+	nv50_disp_atomic_commit_tail(state);
+}
+
+static int
+nv50_disp_atomic_commit(struct drm_device *dev,
+			struct drm_atomic_state *state, bool nonblock)
+{
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct drm_plane_state *new_plane_state;
+	struct drm_plane *plane;
+	struct drm_crtc *crtc;
+	bool active = false;
+	int ret, i;
+
+	ret = pm_runtime_get_sync(dev->dev);
+	if (ret < 0 && ret != -EACCES)
+		return ret;
+
+	ret = drm_atomic_helper_setup_commit(state, nonblock);
+	if (ret)
+		goto done;
+
+	INIT_WORK(&state->commit_work, nv50_disp_atomic_commit_work);
+
+	ret = drm_atomic_helper_prepare_planes(dev, state);
+	if (ret)
+		goto done;
+
+	if (!nonblock) {
+		ret = drm_atomic_helper_wait_for_fences(dev, state, true);
+		if (ret)
+			goto err_cleanup;
+	}
+
+	ret = drm_atomic_helper_swap_state(state, true);
+	if (ret)
+		goto err_cleanup;
+
+	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+		struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state);
+		struct nv50_wndw *wndw = nv50_wndw(plane);
+
+		if (asyw->set.image)
+			nv50_wndw_ntfy_enable(wndw, asyw);
+	}
+
+	drm_atomic_state_get(state);
+
+	if (nonblock)
+		queue_work(system_unbound_wq, &state->commit_work);
+	else
+		nv50_disp_atomic_commit_tail(state);
+
+	drm_for_each_crtc(crtc, dev) {
+		if (crtc->state->enable) {
+			if (!drm->have_disp_power_ref) {
+				drm->have_disp_power_ref = true;
+				return 0;
+			}
+			active = true;
+			break;
+		}
+	}
+
+	if (!active && drm->have_disp_power_ref) {
+		pm_runtime_put_autosuspend(dev->dev);
+		drm->have_disp_power_ref = false;
+	}
+
+err_cleanup:
+	if (ret)
+		drm_atomic_helper_cleanup_planes(dev, state);
+done:
+	pm_runtime_put_autosuspend(dev->dev);
+	return ret;
+}
+
+static struct nv50_outp_atom *
+nv50_disp_outp_atomic_add(struct nv50_atom *atom, struct drm_encoder *encoder)
+{
+	struct nv50_outp_atom *outp;
+
+	list_for_each_entry(outp, &atom->outp, head) {
+		if (outp->encoder == encoder)
+			return outp;
+	}
+
+	outp = kzalloc(sizeof(*outp), GFP_KERNEL);
+	if (!outp)
+		return ERR_PTR(-ENOMEM);
+
+	list_add(&outp->head, &atom->outp);
+	outp->encoder = encoder;
+	return outp;
+}
+
+static int
+nv50_disp_outp_atomic_check_clr(struct nv50_atom *atom,
+				struct drm_connector_state *old_connector_state)
+{
+	struct drm_encoder *encoder = old_connector_state->best_encoder;
+	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+	struct drm_crtc *crtc;
+	struct nv50_outp_atom *outp;
+
+	if (!(crtc = old_connector_state->crtc))
+		return 0;
+
+	old_crtc_state = drm_atomic_get_old_crtc_state(&atom->state, crtc);
+	new_crtc_state = drm_atomic_get_new_crtc_state(&atom->state, crtc);
+	if (old_crtc_state->active && drm_atomic_crtc_needs_modeset(new_crtc_state)) {
+		outp = nv50_disp_outp_atomic_add(atom, encoder);
+		if (IS_ERR(outp))
+			return PTR_ERR(outp);
+
+		if (outp->encoder->encoder_type == DRM_MODE_ENCODER_DPMST) {
+			outp->flush_disable = true;
+			atom->flush_disable = true;
+		}
+		outp->clr.ctrl = true;
+		atom->lock_core = true;
+	}
+
+	return 0;
+}
+
+static int
+nv50_disp_outp_atomic_check_set(struct nv50_atom *atom,
+				struct drm_connector_state *connector_state)
+{
+	struct drm_encoder *encoder = connector_state->best_encoder;
+	struct drm_crtc_state *new_crtc_state;
+	struct drm_crtc *crtc;
+	struct nv50_outp_atom *outp;
+
+	if (!(crtc = connector_state->crtc))
+		return 0;
+
+	new_crtc_state = drm_atomic_get_new_crtc_state(&atom->state, crtc);
+	if (new_crtc_state->active && drm_atomic_crtc_needs_modeset(new_crtc_state)) {
+		outp = nv50_disp_outp_atomic_add(atom, encoder);
+		if (IS_ERR(outp))
+			return PTR_ERR(outp);
+
+		outp->set.ctrl = true;
+		atom->lock_core = true;
+	}
+
+	return 0;
+}
+
+static int
+nv50_disp_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
+{
+	struct nv50_atom *atom = nv50_atom(state);
+	struct drm_connector_state *old_connector_state, *new_connector_state;
+	struct drm_connector *connector;
+	struct drm_crtc_state *new_crtc_state;
+	struct drm_crtc *crtc;
+	int ret, i;
+
+	/* We need to handle colour management on a per-plane basis. */
+	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
+		if (new_crtc_state->color_mgmt_changed) {
+			ret = drm_atomic_add_affected_planes(state, crtc);
+			if (ret)
+				return ret;
+		}
+	}
+
+	ret = drm_atomic_helper_check(dev, state);
+	if (ret)
+		return ret;
+
+	for_each_oldnew_connector_in_state(state, connector, old_connector_state, new_connector_state, i) {
+		ret = nv50_disp_outp_atomic_check_clr(atom, old_connector_state);
+		if (ret)
+			return ret;
+
+		ret = nv50_disp_outp_atomic_check_set(atom, new_connector_state);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void
+nv50_disp_atomic_state_clear(struct drm_atomic_state *state)
+{
+	struct nv50_atom *atom = nv50_atom(state);
+	struct nv50_outp_atom *outp, *outt;
+
+	list_for_each_entry_safe(outp, outt, &atom->outp, head) {
+		list_del(&outp->head);
+		kfree(outp);
+	}
+
+	drm_atomic_state_default_clear(state);
+}
+
+static void
+nv50_disp_atomic_state_free(struct drm_atomic_state *state)
+{
+	struct nv50_atom *atom = nv50_atom(state);
+	drm_atomic_state_default_release(&atom->state);
+	kfree(atom);
+}
+
+static struct drm_atomic_state *
+nv50_disp_atomic_state_alloc(struct drm_device *dev)
+{
+	struct nv50_atom *atom;
+	if (!(atom = kzalloc(sizeof(*atom), GFP_KERNEL)) ||
+	    drm_atomic_state_init(dev, &atom->state) < 0) {
+		kfree(atom);
+		return NULL;
+	}
+	INIT_LIST_HEAD(&atom->outp);
+	return &atom->state;
+}
+
+static const struct drm_mode_config_funcs
+nv50_disp_func = {
+	.fb_create = nouveau_user_framebuffer_create,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
+	.atomic_check = nv50_disp_atomic_check,
+	.atomic_commit = nv50_disp_atomic_commit,
+	.atomic_state_alloc = nv50_disp_atomic_state_alloc,
+	.atomic_state_clear = nv50_disp_atomic_state_clear,
+	.atomic_state_free = nv50_disp_atomic_state_free,
+};
+
+/******************************************************************************
+ * Init
+ *****************************************************************************/
+
+void
+nv50_display_fini(struct drm_device *dev)
+{
+	struct nouveau_encoder *nv_encoder;
+	struct drm_encoder *encoder;
+	struct drm_plane *plane;
+
+	drm_for_each_plane(plane, dev) {
+		struct nv50_wndw *wndw = nv50_wndw(plane);
+		if (plane->funcs != &nv50_wndw)
+			continue;
+		nv50_wndw_fini(wndw);
+	}
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
+			nv_encoder = nouveau_encoder(encoder);
+			nv50_mstm_fini(nv_encoder->dp.mstm);
+		}
+	}
+}
+
+int
+nv50_display_init(struct drm_device *dev)
+{
+	struct nv50_core *core = nv50_disp(dev)->core;
+	struct drm_encoder *encoder;
+	struct drm_plane *plane;
+
+	core->func->init(core);
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
+			struct nouveau_encoder *nv_encoder =
+				nouveau_encoder(encoder);
+			nv50_mstm_init(nv_encoder->dp.mstm);
+		}
+	}
+
+	drm_for_each_plane(plane, dev) {
+		struct nv50_wndw *wndw = nv50_wndw(plane);
+		if (plane->funcs != &nv50_wndw)
+			continue;
+		nv50_wndw_init(wndw);
+	}
+
+	return 0;
+}
+
+void
+nv50_display_destroy(struct drm_device *dev)
+{
+	struct nv50_disp *disp = nv50_disp(dev);
+
+	nv50_core_del(&disp->core);
+
+	nouveau_bo_unmap(disp->sync);
+	if (disp->sync)
+		nouveau_bo_unpin(disp->sync);
+	nouveau_bo_ref(NULL, &disp->sync);
+
+	nouveau_display(dev)->priv = NULL;
+	kfree(disp);
+}
+
+MODULE_PARM_DESC(atomic, "Expose atomic ioctl (default: disabled)");
+static int nouveau_atomic = 0;
+module_param_named(atomic, nouveau_atomic, int, 0400);
+
+int
+nv50_display_create(struct drm_device *dev)
+{
+	struct nvif_device *device = &nouveau_drm(dev)->client.device;
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct dcb_table *dcb = &drm->vbios.dcb;
+	struct drm_connector *connector, *tmp;
+	struct nv50_disp *disp;
+	struct dcb_output *dcbe;
+	int crtcs, ret, i;
+
+	disp = kzalloc(sizeof(*disp), GFP_KERNEL);
+	if (!disp)
+		return -ENOMEM;
+
+	mutex_init(&disp->mutex);
+
+	nouveau_display(dev)->priv = disp;
+	nouveau_display(dev)->dtor = nv50_display_destroy;
+	nouveau_display(dev)->init = nv50_display_init;
+	nouveau_display(dev)->fini = nv50_display_fini;
+	disp->disp = &nouveau_display(dev)->disp;
+	dev->mode_config.funcs = &nv50_disp_func;
+	dev->driver->driver_features |= DRIVER_PREFER_XBGR_30BPP;
+	if (nouveau_atomic)
+		dev->driver->driver_features |= DRIVER_ATOMIC;
+
+	/* small shared memory area we use for notifiers and semaphores */
+	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+			     0, 0x0000, NULL, NULL, &disp->sync);
+	if (!ret) {
+		ret = nouveau_bo_pin(disp->sync, TTM_PL_FLAG_VRAM, true);
+		if (!ret) {
+			ret = nouveau_bo_map(disp->sync);
+			if (ret)
+				nouveau_bo_unpin(disp->sync);
+		}
+		if (ret)
+			nouveau_bo_ref(NULL, &disp->sync);
+	}
+
+	if (ret)
+		goto out;
+
+	/* allocate master evo channel */
+	ret = nv50_core_new(drm, &disp->core);
+	if (ret)
+		goto out;
+
+	/* create crtc objects to represent the hw heads */
+	if (disp->disp->object.oclass >= GV100_DISP)
+		crtcs = nvif_rd32(&device->object, 0x610060) & 0xff;
+	else
+	if (disp->disp->object.oclass >= GF110_DISP)
+		crtcs = nvif_rd32(&device->object, 0x612004) & 0xf;
+	else
+		crtcs = 0x3;
+
+	for (i = 0; i < fls(crtcs); i++) {
+		if (!(crtcs & (1 << i)))
+			continue;
+		ret = nv50_head_create(dev, i);
+		if (ret)
+			goto out;
+	}
+
+	/* create encoder/connector objects based on VBIOS DCB table */
+	for (i = 0, dcbe = &dcb->entry[0]; i < dcb->entries; i++, dcbe++) {
+		connector = nouveau_connector_create(dev, dcbe->connector);
+		if (IS_ERR(connector))
+			continue;
+
+		if (dcbe->location == DCB_LOC_ON_CHIP) {
+			switch (dcbe->type) {
+			case DCB_OUTPUT_TMDS:
+			case DCB_OUTPUT_LVDS:
+			case DCB_OUTPUT_DP:
+				ret = nv50_sor_create(connector, dcbe);
+				break;
+			case DCB_OUTPUT_ANALOG:
+				ret = nv50_dac_create(connector, dcbe);
+				break;
+			default:
+				ret = -ENODEV;
+				break;
+			}
+		} else {
+			ret = nv50_pior_create(connector, dcbe);
+		}
+
+		if (ret) {
+			NV_WARN(drm, "failed to create encoder %d/%d/%d: %d\n",
+				     dcbe->location, dcbe->type,
+				     ffs(dcbe->or) - 1, ret);
+			ret = 0;
+		}
+	}
+
+	/* cull any connectors we created that don't have an encoder */
+	list_for_each_entry_safe(connector, tmp, &dev->mode_config.connector_list, head) {
+		if (connector->encoder_ids[0])
+			continue;
+
+		NV_WARN(drm, "%s has no encoders, removing\n",
+			connector->name);
+		connector->funcs->destroy(connector);
+	}
+
+out:
+	if (ret)
+		nv50_display_destroy(dev);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.h b/drivers/gpu/drm/nouveau/dispnv50/disp.h
new file mode 100644
index 0000000..e48c5eb
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.h
@@ -0,0 +1,89 @@
+#ifndef __NV50_KMS_H__
+#define __NV50_KMS_H__
+#include <nvif/mem.h>
+
+#include "nouveau_display.h"
+
+struct nv50_disp {
+	struct nvif_disp *disp;
+	struct nv50_core *core;
+
+#define NV50_DISP_SYNC(c, o)                                ((c) * 0x040 + (o))
+#define NV50_DISP_CORE_NTFY                       NV50_DISP_SYNC(0      , 0x00)
+#define NV50_DISP_WNDW_SEM0(c)                    NV50_DISP_SYNC(1 + (c), 0x00)
+#define NV50_DISP_WNDW_SEM1(c)                    NV50_DISP_SYNC(1 + (c), 0x10)
+#define NV50_DISP_WNDW_NTFY(c)                    NV50_DISP_SYNC(1 + (c), 0x20)
+#define NV50_DISP_BASE_SEM0(c)                    NV50_DISP_WNDW_SEM0(0 + (c))
+#define NV50_DISP_BASE_SEM1(c)                    NV50_DISP_WNDW_SEM1(0 + (c))
+#define NV50_DISP_BASE_NTFY(c)                    NV50_DISP_WNDW_NTFY(0 + (c))
+#define NV50_DISP_OVLY_SEM0(c)                    NV50_DISP_WNDW_SEM0(4 + (c))
+#define NV50_DISP_OVLY_SEM1(c)                    NV50_DISP_WNDW_SEM1(4 + (c))
+#define NV50_DISP_OVLY_NTFY(c)                    NV50_DISP_WNDW_NTFY(4 + (c))
+	struct nouveau_bo *sync;
+
+	struct mutex mutex;
+};
+
+static inline struct nv50_disp *
+nv50_disp(struct drm_device *dev)
+{
+	return nouveau_display(dev)->priv;
+}
+
+struct nv50_disp_interlock {
+	enum nv50_disp_interlock_type {
+		NV50_DISP_INTERLOCK_CORE = 0,
+		NV50_DISP_INTERLOCK_CURS,
+		NV50_DISP_INTERLOCK_BASE,
+		NV50_DISP_INTERLOCK_OVLY,
+		NV50_DISP_INTERLOCK_WNDW,
+		NV50_DISP_INTERLOCK_WIMM,
+		NV50_DISP_INTERLOCK__SIZE
+	} type;
+	u32 data;
+};
+
+void corec37d_ntfy_init(struct nouveau_bo *, u32);
+
+struct nv50_chan {
+	struct nvif_object user;
+	struct nvif_device *device;
+};
+
+struct nv50_dmac {
+	struct nv50_chan base;
+
+	struct nvif_mem push;
+	u32 *ptr;
+
+	struct nvif_object sync;
+	struct nvif_object vram;
+
+	/* Protects against concurrent pushbuf access to this channel, lock is
+	 * grabbed by evo_wait (if the pushbuf reservation is successful) and
+	 * dropped again by evo_kick. */
+	struct mutex lock;
+};
+
+int nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp,
+		     const s32 *oclass, u8 head, void *data, u32 size,
+		     u64 syncbuf, struct nv50_dmac *dmac);
+void nv50_dmac_destroy(struct nv50_dmac *);
+
+u32 *evo_wait(struct nv50_dmac *, int nr);
+void evo_kick(u32 *, struct nv50_dmac *);
+
+#define evo_mthd(p, m, s) do {						\
+	const u32 _m = (m), _s = (s);					\
+	if (drm_debug & DRM_UT_KMS)					\
+		pr_err("%04x %d %s\n", _m, _s, __func__);		\
+	*((p)++) = ((_s << 18) | _m);					\
+} while(0)
+
+#define evo_data(p, d) do {						\
+	const u32 _d = (d);						\
+	if (drm_debug & DRM_UT_KMS)					\
+		pr_err("\t%08x\n", _d);					\
+	*((p)++) = _d;							\
+} while(0)
+#endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c
new file mode 100644
index 0000000..4f57e53
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/head.c
@@ -0,0 +1,511 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "head.h"
+#include "base.h"
+#include "core.h"
+#include "curs.h"
+#include "ovly.h"
+
+#include <nvif/class.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include "nouveau_connector.h"
+void
+nv50_head_flush_clr(struct nv50_head *head,
+		    struct nv50_head_atom *asyh, bool flush)
+{
+	union nv50_head_atom_mask clr = {
+		.mask = asyh->clr.mask & ~(flush ? 0 : asyh->set.mask),
+	};
+	if (clr.olut) head->func->olut_clr(head);
+	if (clr.core) head->func->core_clr(head);
+	if (clr.curs) head->func->curs_clr(head);
+}
+
+void
+nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	if (asyh->set.view   ) head->func->view    (head, asyh);
+	if (asyh->set.mode   ) head->func->mode    (head, asyh);
+	if (asyh->set.core   ) head->func->core_set(head, asyh);
+	if (asyh->set.olut   ) {
+		asyh->olut.offset = nv50_lut_load(&head->olut,
+						  asyh->olut.mode <= 1,
+						  asyh->olut.buffer,
+						  asyh->state.gamma_lut);
+		head->func->olut_set(head, asyh);
+	}
+	if (asyh->set.curs   ) head->func->curs_set(head, asyh);
+	if (asyh->set.base   ) head->func->base    (head, asyh);
+	if (asyh->set.ovly   ) head->func->ovly    (head, asyh);
+	if (asyh->set.dither ) head->func->dither  (head, asyh);
+	if (asyh->set.procamp) head->func->procamp (head, asyh);
+	if (asyh->set.or     ) head->func->or      (head, asyh);
+}
+
+static void
+nv50_head_atomic_check_procamp(struct nv50_head_atom *armh,
+			       struct nv50_head_atom *asyh,
+			       struct nouveau_conn_atom *asyc)
+{
+	const int vib = asyc->procamp.color_vibrance - 100;
+	const int hue = asyc->procamp.vibrant_hue - 90;
+	const int adj = (vib > 0) ? 50 : 0;
+	asyh->procamp.sat.cos = ((vib * 2047 + adj) / 100) & 0xfff;
+	asyh->procamp.sat.sin = ((hue * 2047) / 100) & 0xfff;
+	asyh->set.procamp = true;
+}
+
+static void
+nv50_head_atomic_check_dither(struct nv50_head_atom *armh,
+			      struct nv50_head_atom *asyh,
+			      struct nouveau_conn_atom *asyc)
+{
+	struct drm_connector *connector = asyc->state.connector;
+	u32 mode = 0x00;
+
+	if (asyc->dither.mode == DITHERING_MODE_AUTO) {
+		if (asyh->base.depth > connector->display_info.bpc * 3)
+			mode = DITHERING_MODE_DYNAMIC2X2;
+	} else {
+		mode = asyc->dither.mode;
+	}
+
+	if (asyc->dither.depth == DITHERING_DEPTH_AUTO) {
+		if (connector->display_info.bpc >= 8)
+			mode |= DITHERING_DEPTH_8BPC;
+	} else {
+		mode |= asyc->dither.depth;
+	}
+
+	asyh->dither.enable = mode;
+	asyh->dither.bits = mode >> 1;
+	asyh->dither.mode = mode >> 3;
+	asyh->set.dither = true;
+}
+
+static void
+nv50_head_atomic_check_view(struct nv50_head_atom *armh,
+			    struct nv50_head_atom *asyh,
+			    struct nouveau_conn_atom *asyc)
+{
+	struct drm_connector *connector = asyc->state.connector;
+	struct drm_display_mode *omode = &asyh->state.adjusted_mode;
+	struct drm_display_mode *umode = &asyh->state.mode;
+	int mode = asyc->scaler.mode;
+	struct edid *edid;
+	int umode_vdisplay, omode_hdisplay, omode_vdisplay;
+
+	if (connector->edid_blob_ptr)
+		edid = (struct edid *)connector->edid_blob_ptr->data;
+	else
+		edid = NULL;
+
+	if (!asyc->scaler.full) {
+		if (mode == DRM_MODE_SCALE_NONE)
+			omode = umode;
+	} else {
+		/* Non-EDID LVDS/eDP mode. */
+		mode = DRM_MODE_SCALE_FULLSCREEN;
+	}
+
+	/* For the user-specified mode, we must ignore doublescan and
+	 * the like, but honor frame packing.
+	 */
+	umode_vdisplay = umode->vdisplay;
+	if ((umode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING)
+		umode_vdisplay += umode->vtotal;
+	asyh->view.iW = umode->hdisplay;
+	asyh->view.iH = umode_vdisplay;
+	/* For the output mode, we can just use the stock helper. */
+	drm_mode_get_hv_timing(omode, &omode_hdisplay, &omode_vdisplay);
+	asyh->view.oW = omode_hdisplay;
+	asyh->view.oH = omode_vdisplay;
+
+	/* Add overscan compensation if necessary, will keep the aspect
+	 * ratio the same as the backend mode unless overridden by the
+	 * user setting both hborder and vborder properties.
+	 */
+	if ((asyc->scaler.underscan.mode == UNDERSCAN_ON ||
+	    (asyc->scaler.underscan.mode == UNDERSCAN_AUTO &&
+	     drm_detect_hdmi_monitor(edid)))) {
+		u32 bX = asyc->scaler.underscan.hborder;
+		u32 bY = asyc->scaler.underscan.vborder;
+		u32 r = (asyh->view.oH << 19) / asyh->view.oW;
+
+		if (bX) {
+			asyh->view.oW -= (bX * 2);
+			if (bY) asyh->view.oH -= (bY * 2);
+			else    asyh->view.oH  = ((asyh->view.oW * r) + (r / 2)) >> 19;
+		} else {
+			asyh->view.oW -= (asyh->view.oW >> 4) + 32;
+			if (bY) asyh->view.oH -= (bY * 2);
+			else    asyh->view.oH  = ((asyh->view.oW * r) + (r / 2)) >> 19;
+		}
+	}
+
+	/* Handle CENTER/ASPECT scaling, taking into account the areas
+	 * removed already for overscan compensation.
+	 */
+	switch (mode) {
+	case DRM_MODE_SCALE_CENTER:
+		asyh->view.oW = min((u16)umode->hdisplay, asyh->view.oW);
+		asyh->view.oH = min((u16)umode_vdisplay, asyh->view.oH);
+		/* fall-through */
+	case DRM_MODE_SCALE_ASPECT:
+		if (asyh->view.oH < asyh->view.oW) {
+			u32 r = (asyh->view.iW << 19) / asyh->view.iH;
+			asyh->view.oW = ((asyh->view.oH * r) + (r / 2)) >> 19;
+		} else {
+			u32 r = (asyh->view.iH << 19) / asyh->view.iW;
+			asyh->view.oH = ((asyh->view.oW * r) + (r / 2)) >> 19;
+		}
+		break;
+	default:
+		break;
+	}
+
+	asyh->set.view = true;
+}
+
+static int
+nv50_head_atomic_check_lut(struct nv50_head *head,
+			   struct nv50_head_atom *asyh)
+{
+	struct nv50_disp *disp = nv50_disp(head->base.base.dev);
+	struct drm_property_blob *olut = asyh->state.gamma_lut;
+
+	/* Determine whether core output LUT should be enabled. */
+	if (olut) {
+		/* Check if any window(s) have stolen the core output LUT
+		 * to as an input LUT for legacy gamma + I8 colour format.
+		 */
+		if (asyh->wndw.olut) {
+			/* If any window has stolen the core output LUT,
+			 * all of them must.
+			 */
+			if (asyh->wndw.olut != asyh->wndw.mask)
+				return -EINVAL;
+			olut = NULL;
+		}
+	}
+
+	if (!olut) {
+		asyh->olut.handle = 0;
+		return 0;
+	}
+
+	asyh->olut.handle = disp->core->chan.vram.handle;
+	asyh->olut.buffer = !asyh->olut.buffer;
+	head->func->olut(head, asyh);
+	return 0;
+}
+
+static void
+nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct drm_display_mode *mode = &asyh->state.adjusted_mode;
+	struct nv50_head_mode *m = &asyh->mode;
+	u32 blankus;
+
+	drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V | CRTC_STEREO_DOUBLE);
+
+	/*
+	 * DRM modes are defined in terms of a repeating interval
+	 * starting with the active display area.  The hardware modes
+	 * are defined in terms of a repeating interval starting one
+	 * unit (pixel or line) into the sync pulse.  So, add bias.
+	 */
+
+	m->h.active = mode->crtc_htotal;
+	m->h.synce  = mode->crtc_hsync_end - mode->crtc_hsync_start - 1;
+	m->h.blanke = mode->crtc_hblank_end - mode->crtc_hsync_start - 1;
+	m->h.blanks = m->h.blanke + mode->crtc_hdisplay;
+
+	m->v.active = mode->crtc_vtotal;
+	m->v.synce  = mode->crtc_vsync_end - mode->crtc_vsync_start - 1;
+	m->v.blanke = mode->crtc_vblank_end - mode->crtc_vsync_start - 1;
+	m->v.blanks = m->v.blanke + mode->crtc_vdisplay;
+
+	/*XXX: Safe underestimate, even "0" works */
+	blankus = (m->v.active - mode->crtc_vdisplay - 2) * m->h.active;
+	blankus *= 1000;
+	blankus /= mode->crtc_clock;
+	m->v.blankus = blankus;
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
+		m->v.blank2e =  m->v.active + m->v.blanke;
+		m->v.blank2s =  m->v.blank2e + mode->crtc_vdisplay;
+		m->v.active  = (m->v.active * 2) + 1;
+		m->interlace = true;
+	} else {
+		m->v.blank2e = 0;
+		m->v.blank2s = 1;
+		m->interlace = false;
+	}
+	m->clock = mode->crtc_clock;
+
+	asyh->or.nhsync = !!(mode->flags & DRM_MODE_FLAG_NHSYNC);
+	asyh->or.nvsync = !!(mode->flags & DRM_MODE_FLAG_NVSYNC);
+	asyh->set.or = head->func->or != NULL;
+	asyh->set.mode = true;
+}
+
+static int
+nv50_head_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state)
+{
+	struct nouveau_drm *drm = nouveau_drm(crtc->dev);
+	struct nv50_head *head = nv50_head(crtc);
+	struct nv50_head_atom *armh = nv50_head_atom(crtc->state);
+	struct nv50_head_atom *asyh = nv50_head_atom(state);
+	struct nouveau_conn_atom *asyc = NULL;
+	struct drm_connector_state *conns;
+	struct drm_connector *conn;
+	int i;
+
+	NV_ATOMIC(drm, "%s atomic_check %d\n", crtc->name, asyh->state.active);
+	if (asyh->state.active) {
+		for_each_new_connector_in_state(asyh->state.state, conn, conns, i) {
+			if (conns->crtc == crtc) {
+				asyc = nouveau_conn_atom(conns);
+				break;
+			}
+		}
+
+		if (armh->state.active) {
+			if (asyc) {
+				if (asyh->state.mode_changed)
+					asyc->set.scaler = true;
+				if (armh->base.depth != asyh->base.depth)
+					asyc->set.dither = true;
+			}
+		} else {
+			if (asyc)
+				asyc->set.mask = ~0;
+			asyh->set.mask = ~0;
+			asyh->set.or = head->func->or != NULL;
+		}
+
+		if (asyh->state.mode_changed)
+			nv50_head_atomic_check_mode(head, asyh);
+
+		if (asyh->state.color_mgmt_changed ||
+		    memcmp(&armh->wndw, &asyh->wndw, sizeof(asyh->wndw))) {
+			int ret = nv50_head_atomic_check_lut(head, asyh);
+			if (ret)
+				return ret;
+
+			asyh->olut.visible = asyh->olut.handle != 0;
+		}
+
+		if (asyc) {
+			if (asyc->set.scaler)
+				nv50_head_atomic_check_view(armh, asyh, asyc);
+			if (asyc->set.dither)
+				nv50_head_atomic_check_dither(armh, asyh, asyc);
+			if (asyc->set.procamp)
+				nv50_head_atomic_check_procamp(armh, asyh, asyc);
+		}
+
+		if (head->func->core_calc) {
+			head->func->core_calc(head, asyh);
+			if (!asyh->core.visible)
+				asyh->olut.visible = false;
+		}
+
+		asyh->set.base = armh->base.cpp != asyh->base.cpp;
+		asyh->set.ovly = armh->ovly.cpp != asyh->ovly.cpp;
+	} else {
+		asyh->olut.visible = false;
+		asyh->core.visible = false;
+		asyh->curs.visible = false;
+		asyh->base.cpp = 0;
+		asyh->ovly.cpp = 0;
+	}
+
+	if (!drm_atomic_crtc_needs_modeset(&asyh->state)) {
+		if (asyh->core.visible) {
+			if (memcmp(&armh->core, &asyh->core, sizeof(asyh->core)))
+				asyh->set.core = true;
+		} else
+		if (armh->core.visible) {
+			asyh->clr.core = true;
+		}
+
+		if (asyh->curs.visible) {
+			if (memcmp(&armh->curs, &asyh->curs, sizeof(asyh->curs)))
+				asyh->set.curs = true;
+		} else
+		if (armh->curs.visible) {
+			asyh->clr.curs = true;
+		}
+
+		if (asyh->olut.visible) {
+			if (memcmp(&armh->olut, &asyh->olut, sizeof(asyh->olut)))
+				asyh->set.olut = true;
+		} else
+		if (armh->olut.visible) {
+			asyh->clr.olut = true;
+		}
+	} else {
+		asyh->clr.olut = armh->olut.visible;
+		asyh->clr.core = armh->core.visible;
+		asyh->clr.curs = armh->curs.visible;
+		asyh->set.olut = asyh->olut.visible;
+		asyh->set.core = asyh->core.visible;
+		asyh->set.curs = asyh->curs.visible;
+	}
+
+	if (asyh->clr.mask || asyh->set.mask)
+		nv50_atom(asyh->state.state)->lock_core = true;
+	return 0;
+}
+
+static const struct drm_crtc_helper_funcs
+nv50_head_help = {
+	.atomic_check = nv50_head_atomic_check,
+};
+
+static void
+nv50_head_atomic_destroy_state(struct drm_crtc *crtc,
+			       struct drm_crtc_state *state)
+{
+	struct nv50_head_atom *asyh = nv50_head_atom(state);
+	__drm_atomic_helper_crtc_destroy_state(&asyh->state);
+	kfree(asyh);
+}
+
+static struct drm_crtc_state *
+nv50_head_atomic_duplicate_state(struct drm_crtc *crtc)
+{
+	struct nv50_head_atom *armh = nv50_head_atom(crtc->state);
+	struct nv50_head_atom *asyh;
+	if (!(asyh = kmalloc(sizeof(*asyh), GFP_KERNEL)))
+		return NULL;
+	__drm_atomic_helper_crtc_duplicate_state(crtc, &asyh->state);
+	asyh->wndw = armh->wndw;
+	asyh->view = armh->view;
+	asyh->mode = armh->mode;
+	asyh->olut = armh->olut;
+	asyh->core = armh->core;
+	asyh->curs = armh->curs;
+	asyh->base = armh->base;
+	asyh->ovly = armh->ovly;
+	asyh->dither = armh->dither;
+	asyh->procamp = armh->procamp;
+	asyh->clr.mask = 0;
+	asyh->set.mask = 0;
+	return &asyh->state;
+}
+
+static void
+__drm_atomic_helper_crtc_reset(struct drm_crtc *crtc,
+			       struct drm_crtc_state *state)
+{
+	if (crtc->state)
+		crtc->funcs->atomic_destroy_state(crtc, crtc->state);
+	crtc->state = state;
+	crtc->state->crtc = crtc;
+}
+
+static void
+nv50_head_reset(struct drm_crtc *crtc)
+{
+	struct nv50_head_atom *asyh;
+
+	if (WARN_ON(!(asyh = kzalloc(sizeof(*asyh), GFP_KERNEL))))
+		return;
+
+	__drm_atomic_helper_crtc_reset(crtc, &asyh->state);
+}
+
+static void
+nv50_head_destroy(struct drm_crtc *crtc)
+{
+	struct nv50_head *head = nv50_head(crtc);
+	nv50_lut_fini(&head->olut);
+	drm_crtc_cleanup(crtc);
+	kfree(head);
+}
+
+static const struct drm_crtc_funcs
+nv50_head_func = {
+	.reset = nv50_head_reset,
+	.gamma_set = drm_atomic_helper_legacy_gamma_set,
+	.destroy = nv50_head_destroy,
+	.set_config = drm_atomic_helper_set_config,
+	.page_flip = drm_atomic_helper_page_flip,
+	.atomic_duplicate_state = nv50_head_atomic_duplicate_state,
+	.atomic_destroy_state = nv50_head_atomic_destroy_state,
+};
+
+int
+nv50_head_create(struct drm_device *dev, int index)
+{
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nv50_disp *disp = nv50_disp(dev);
+	struct nv50_head *head;
+	struct nv50_wndw *curs, *wndw;
+	struct drm_crtc *crtc;
+	int ret;
+
+	head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (!head)
+		return -ENOMEM;
+
+	head->func = disp->core->func->head;
+	head->base.index = index;
+
+	if (disp->disp->object.oclass < GV100_DISP) {
+		ret = nv50_ovly_new(drm, head->base.index, &wndw);
+		ret = nv50_base_new(drm, head->base.index, &wndw);
+	} else {
+		ret = nv50_wndw_new(drm, DRM_PLANE_TYPE_OVERLAY,
+				    head->base.index * 2 + 1, &wndw);
+		ret = nv50_wndw_new(drm, DRM_PLANE_TYPE_PRIMARY,
+				    head->base.index * 2 + 0, &wndw);
+	}
+	if (ret == 0)
+		ret = nv50_curs_new(drm, head->base.index, &curs);
+	if (ret) {
+		kfree(head);
+		return ret;
+	}
+
+	crtc = &head->base.base;
+	drm_crtc_init_with_planes(dev, crtc, &wndw->plane, &curs->plane,
+				  &nv50_head_func, "head-%d", head->base.index);
+	drm_crtc_helper_add(crtc, &nv50_head_help);
+	drm_mode_crtc_set_gamma_size(crtc, 256);
+
+	if (head->func->olut_set) {
+		ret = nv50_lut_init(disp, &drm->client.mmu, &head->olut);
+		if (ret)
+			goto out;
+	}
+
+out:
+	if (ret)
+		nv50_head_destroy(crtc);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.h b/drivers/gpu/drm/nouveau/dispnv50/head.h
new file mode 100644
index 0000000..37b3248
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/head.h
@@ -0,0 +1,78 @@
+#ifndef __NV50_KMS_HEAD_H__
+#define __NV50_KMS_HEAD_H__
+#define nv50_head(c) container_of((c), struct nv50_head, base.base)
+#include "disp.h"
+#include "atom.h"
+#include "lut.h"
+
+#include "nouveau_crtc.h"
+
+struct nv50_head {
+	const struct nv50_head_func *func;
+	struct nouveau_crtc base;
+	struct nv50_lut olut;
+};
+
+int nv50_head_create(struct drm_device *, int index);
+void nv50_head_flush_set(struct nv50_head *, struct nv50_head_atom *);
+void nv50_head_flush_clr(struct nv50_head *, struct nv50_head_atom *, bool y);
+
+struct nv50_head_func {
+	void (*view)(struct nv50_head *, struct nv50_head_atom *);
+	void (*mode)(struct nv50_head *, struct nv50_head_atom *);
+	void (*olut)(struct nv50_head *, struct nv50_head_atom *);
+	void (*olut_set)(struct nv50_head *, struct nv50_head_atom *);
+	void (*olut_clr)(struct nv50_head *);
+	void (*core_calc)(struct nv50_head *, struct nv50_head_atom *);
+	void (*core_set)(struct nv50_head *, struct nv50_head_atom *);
+	void (*core_clr)(struct nv50_head *);
+	int (*curs_layout)(struct nv50_head *, struct nv50_wndw_atom *,
+			   struct nv50_head_atom *);
+	int (*curs_format)(struct nv50_head *, struct nv50_wndw_atom *,
+			   struct nv50_head_atom *);
+	void (*curs_set)(struct nv50_head *, struct nv50_head_atom *);
+	void (*curs_clr)(struct nv50_head *);
+	void (*base)(struct nv50_head *, struct nv50_head_atom *);
+	void (*ovly)(struct nv50_head *, struct nv50_head_atom *);
+	void (*dither)(struct nv50_head *, struct nv50_head_atom *);
+	void (*procamp)(struct nv50_head *, struct nv50_head_atom *);
+	void (*or)(struct nv50_head *, struct nv50_head_atom *);
+};
+
+extern const struct nv50_head_func head507d;
+void head507d_view(struct nv50_head *, struct nv50_head_atom *);
+void head507d_mode(struct nv50_head *, struct nv50_head_atom *);
+void head507d_olut(struct nv50_head *, struct nv50_head_atom *);
+void head507d_core_calc(struct nv50_head *, struct nv50_head_atom *);
+void head507d_core_clr(struct nv50_head *);
+int head507d_curs_layout(struct nv50_head *, struct nv50_wndw_atom *,
+			 struct nv50_head_atom *);
+int head507d_curs_format(struct nv50_head *, struct nv50_wndw_atom *,
+			 struct nv50_head_atom *);
+void head507d_base(struct nv50_head *, struct nv50_head_atom *);
+void head507d_ovly(struct nv50_head *, struct nv50_head_atom *);
+void head507d_dither(struct nv50_head *, struct nv50_head_atom *);
+void head507d_procamp(struct nv50_head *, struct nv50_head_atom *);
+
+extern const struct nv50_head_func head827d;
+
+extern const struct nv50_head_func head907d;
+void head907d_view(struct nv50_head *, struct nv50_head_atom *);
+void head907d_mode(struct nv50_head *, struct nv50_head_atom *);
+void head907d_olut(struct nv50_head *, struct nv50_head_atom *);
+void head907d_olut_set(struct nv50_head *, struct nv50_head_atom *);
+void head907d_olut_clr(struct nv50_head *);
+void head907d_core_set(struct nv50_head *, struct nv50_head_atom *);
+void head907d_core_clr(struct nv50_head *);
+void head907d_curs_set(struct nv50_head *, struct nv50_head_atom *);
+void head907d_curs_clr(struct nv50_head *);
+void head907d_ovly(struct nv50_head *, struct nv50_head_atom *);
+void head907d_procamp(struct nv50_head *, struct nv50_head_atom *);
+void head907d_or(struct nv50_head *, struct nv50_head_atom *);
+
+extern const struct nv50_head_func head917d;
+int head917d_curs_layout(struct nv50_head *, struct nv50_wndw_atom *,
+			 struct nv50_head_atom *);
+
+extern const struct nv50_head_func headc37d;
+#endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head507d.c b/drivers/gpu/drm/nouveau/dispnv50/head507d.c
new file mode 100644
index 0000000..51bc599
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/head507d.c
@@ -0,0 +1,325 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "head.h"
+#include "core.h"
+
+void
+head507d_procamp(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x08a8 + (head->base.index * 0x400), 1);
+		evo_data(push, asyh->procamp.sat.sin << 20 |
+			       asyh->procamp.sat.cos << 8);
+		evo_kick(push, core);
+	}
+}
+
+void
+head507d_dither(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x08a0 + (head->base.index * 0x0400), 1);
+		evo_data(push, asyh->dither.mode << 3 |
+			       asyh->dither.bits << 1 |
+			       asyh->dither.enable);
+		evo_kick(push, core);
+	}
+}
+
+void
+head507d_ovly(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 bounds = 0;
+	u32 *push;
+
+	if (asyh->ovly.cpp) {
+		switch (asyh->ovly.cpp) {
+		case 4: bounds |= 0x00000300; break;
+		case 2: bounds |= 0x00000100; break;
+		default:
+			WARN_ON(1);
+			break;
+		}
+		bounds |= 0x00000001;
+	} else {
+		bounds |= 0x00000100;
+	}
+
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x0904 + head->base.index * 0x400, 1);
+		evo_data(push, bounds);
+		evo_kick(push, core);
+	}
+}
+
+void
+head507d_base(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 bounds = 0;
+	u32 *push;
+
+	if (asyh->base.cpp) {
+		switch (asyh->base.cpp) {
+		case 8: bounds |= 0x00000500; break;
+		case 4: bounds |= 0x00000300; break;
+		case 2: bounds |= 0x00000100; break;
+		case 1: bounds |= 0x00000000; break;
+		default:
+			WARN_ON(1);
+			break;
+		}
+		bounds |= 0x00000001;
+	}
+
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x0900 + head->base.index * 0x400, 1);
+		evo_data(push, bounds);
+		evo_kick(push, core);
+	}
+}
+
+static void
+head507d_curs_clr(struct nv50_head *head)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x0880 + head->base.index * 0x400, 1);
+		evo_data(push, 0x05000000);
+		evo_kick(push, core);
+	}
+}
+
+static void
+head507d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 3))) {
+		evo_mthd(push, 0x0880 + head->base.index * 0x400, 2);
+		evo_data(push, 0x80000000 | asyh->curs.layout << 26 |
+					    asyh->curs.format << 24);
+		evo_data(push, asyh->curs.offset >> 8);
+		evo_kick(push, core);
+	}
+}
+
+int
+head507d_curs_format(struct nv50_head *head, struct nv50_wndw_atom *asyw,
+		     struct nv50_head_atom *asyh)
+{
+	switch (asyw->image.format) {
+	case 0xcf: asyh->curs.format = 1; break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int
+head507d_curs_layout(struct nv50_head *head, struct nv50_wndw_atom *asyw,
+		     struct nv50_head_atom *asyh)
+{
+	switch (asyw->image.w) {
+	case 32: asyh->curs.layout = 0; break;
+	case 64: asyh->curs.layout = 1; break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void
+head507d_core_clr(struct nv50_head *head)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x0874 + head->base.index * 0x400, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, core);
+	}
+}
+
+static void
+head507d_core_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 9))) {
+		evo_mthd(push, 0x0860 + head->base.index * 0x400, 1);
+		evo_data(push, asyh->core.offset >> 8);
+		evo_mthd(push, 0x0868 + head->base.index * 0x400, 4);
+		evo_data(push, asyh->core.h << 16 | asyh->core.w);
+		evo_data(push, asyh->core.layout << 20 |
+			       (asyh->core.pitch >> 8) << 8 |
+			       asyh->core.blocks << 8 |
+			       asyh->core.blockh);
+		evo_data(push, asyh->core.kind << 16 |
+			       asyh->core.format << 8);
+		evo_data(push, asyh->core.handle);
+		evo_mthd(push, 0x08c0 + head->base.index * 0x400, 1);
+		evo_data(push, asyh->core.y << 16 | asyh->core.x);
+		evo_kick(push, core);
+
+		/* EVO will complain with INVALID_STATE if we have an
+		 * active cursor and (re)specify HeadSetContextDmaIso
+		 * without also updating HeadSetOffsetCursor.
+		 */
+		asyh->set.curs = asyh->curs.visible;
+		asyh->set.olut = asyh->olut.handle != 0;
+	}
+}
+
+void
+head507d_core_calc(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_disp *disp = nv50_disp(head->base.base.dev);
+	if ((asyh->core.visible = (asyh->base.cpp != 0))) {
+		asyh->core.x = asyh->base.x;
+		asyh->core.y = asyh->base.y;
+		asyh->core.w = asyh->base.w;
+		asyh->core.h = asyh->base.h;
+	} else
+	if ((asyh->core.visible = (asyh->ovly.cpp != 0)) ||
+	    (asyh->core.visible = asyh->curs.visible)) {
+		/*XXX: We need to either find some way of having the
+		 *     primary base layer appear black, while still
+		 *     being able to display the other layers, or we
+		 *     need to allocate a dummy black surface here.
+		 */
+		asyh->core.x = 0;
+		asyh->core.y = 0;
+		asyh->core.w = asyh->state.mode.hdisplay;
+		asyh->core.h = asyh->state.mode.vdisplay;
+	}
+	asyh->core.handle = disp->core->chan.vram.handle;
+	asyh->core.offset = 0;
+	asyh->core.format = 0xcf;
+	asyh->core.kind = 0;
+	asyh->core.layout = 1;
+	asyh->core.blockh = 0;
+	asyh->core.blocks = 0;
+	asyh->core.pitch = ALIGN(asyh->core.w, 64) * 4;
+}
+
+static void
+head507d_olut_clr(struct nv50_head *head)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x0840 + (head->base.index * 0x400), 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, core);
+	}
+}
+
+static void
+head507d_olut_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 3))) {
+		evo_mthd(push, 0x0840 + (head->base.index * 0x400), 2);
+		evo_data(push, 0x80000000 | asyh->olut.mode << 30);
+		evo_data(push, asyh->olut.offset >> 8);
+		evo_kick(push, core);
+	}
+}
+
+void
+head507d_olut(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	if (asyh->base.cpp == 1)
+		asyh->olut.mode = 0;
+	else
+		asyh->olut.mode = 1;
+}
+
+void
+head507d_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	struct nv50_head_mode *m = &asyh->mode;
+	u32 *push;
+	if ((push = evo_wait(core, 13))) {
+		evo_mthd(push, 0x0804 + (head->base.index * 0x400), 2);
+		evo_data(push, 0x00800000 | m->clock);
+		evo_data(push, m->interlace ? 0x00000002 : 0x00000000);
+		evo_mthd(push, 0x0810 + (head->base.index * 0x400), 7);
+		evo_data(push, 0x00000000);
+		evo_data(push, m->v.active  << 16 | m->h.active );
+		evo_data(push, m->v.synce   << 16 | m->h.synce  );
+		evo_data(push, m->v.blanke  << 16 | m->h.blanke );
+		evo_data(push, m->v.blanks  << 16 | m->h.blanks );
+		evo_data(push, m->v.blank2e << 16 | m->v.blank2s);
+		evo_data(push, asyh->mode.v.blankus);
+		evo_mthd(push, 0x082c + (head->base.index * 0x400), 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, core);
+	}
+}
+
+void
+head507d_view(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 7))) {
+		evo_mthd(push, 0x08a4 + (head->base.index * 0x400), 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x08c8 + (head->base.index * 0x400), 1);
+		evo_data(push, asyh->view.iH << 16 | asyh->view.iW);
+		evo_mthd(push, 0x08d8 + (head->base.index * 0x400), 2);
+		evo_data(push, asyh->view.oH << 16 | asyh->view.oW);
+		evo_data(push, asyh->view.oH << 16 | asyh->view.oW);
+		evo_kick(push, core);
+	}
+}
+
+const struct nv50_head_func
+head507d = {
+	.view = head507d_view,
+	.mode = head507d_mode,
+	.olut = head507d_olut,
+	.olut_set = head507d_olut_set,
+	.olut_clr = head507d_olut_clr,
+	.core_calc = head507d_core_calc,
+	.core_set = head507d_core_set,
+	.core_clr = head507d_core_clr,
+	.curs_layout = head507d_curs_layout,
+	.curs_format = head507d_curs_format,
+	.curs_set = head507d_curs_set,
+	.curs_clr = head507d_curs_clr,
+	.base = head507d_base,
+	.ovly = head507d_ovly,
+	.dither = head507d_dither,
+	.procamp = head507d_procamp,
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head827d.c b/drivers/gpu/drm/nouveau/dispnv50/head827d.c
new file mode 100644
index 0000000..af5e7bd
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/head827d.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "head.h"
+#include "core.h"
+
+static void
+head827d_curs_clr(struct nv50_head *head)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 4))) {
+		evo_mthd(push, 0x0880 + head->base.index * 0x400, 1);
+		evo_data(push, 0x05000000);
+		evo_mthd(push, 0x089c + head->base.index * 0x400, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, core);
+	}
+}
+
+static void
+head827d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 5))) {
+		evo_mthd(push, 0x0880 + head->base.index * 0x400, 2);
+		evo_data(push, 0x80000000 | asyh->curs.layout << 26 |
+					    asyh->curs.format << 24);
+		evo_data(push, asyh->curs.offset >> 8);
+		evo_mthd(push, 0x089c + head->base.index * 0x400, 1);
+		evo_data(push, asyh->curs.handle);
+		evo_kick(push, core);
+	}
+}
+
+static void
+head827d_core_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 9))) {
+		evo_mthd(push, 0x0860 + head->base.index * 0x400, 1);
+		evo_data(push, asyh->core.offset >> 8);
+		evo_mthd(push, 0x0868 + head->base.index * 0x400, 4);
+		evo_data(push, asyh->core.h << 16 | asyh->core.w);
+		evo_data(push, asyh->core.layout << 20 |
+			       (asyh->core.pitch >> 8) << 8 |
+			       asyh->core.blocks << 8 |
+			       asyh->core.blockh);
+		evo_data(push, asyh->core.format << 8);
+		evo_data(push, asyh->core.handle);
+		evo_mthd(push, 0x08c0 + head->base.index * 0x400, 1);
+		evo_data(push, asyh->core.y << 16 | asyh->core.x);
+		evo_kick(push, core);
+	}
+}
+
+static void
+head827d_olut_clr(struct nv50_head *head)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 4))) {
+		evo_mthd(push, 0x0840 + (head->base.index * 0x400), 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x085c + (head->base.index * 0x400), 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, core);
+	}
+}
+
+static void
+head827d_olut_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 5))) {
+		evo_mthd(push, 0x0840 + (head->base.index * 0x400), 2);
+		evo_data(push, 0x80000000 | asyh->olut.mode << 30);
+		evo_data(push, asyh->olut.offset >> 8);
+		evo_mthd(push, 0x085c + (head->base.index * 0x400), 1);
+		evo_data(push, asyh->olut.handle);
+		evo_kick(push, core);
+	}
+}
+
+const struct nv50_head_func
+head827d = {
+	.view = head507d_view,
+	.mode = head507d_mode,
+	.olut = head507d_olut,
+	.olut_set = head827d_olut_set,
+	.olut_clr = head827d_olut_clr,
+	.core_calc = head507d_core_calc,
+	.core_set = head827d_core_set,
+	.core_clr = head507d_core_clr,
+	.curs_layout = head507d_curs_layout,
+	.curs_format = head507d_curs_format,
+	.curs_set = head827d_curs_set,
+	.curs_clr = head827d_curs_clr,
+	.base = head507d_base,
+	.ovly = head507d_ovly,
+	.dither = head507d_dither,
+	.procamp = head507d_procamp,
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head907d.c b/drivers/gpu/drm/nouveau/dispnv50/head907d.c
new file mode 100644
index 0000000..6339071
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/head907d.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "head.h"
+#include "core.h"
+
+void
+head907d_or(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 3))) {
+		evo_mthd(push, 0x0404 + (head->base.index * 0x300), 2);
+		evo_data(push, 0x00000001 | asyh->or.depth  << 6 |
+					    asyh->or.nvsync << 4 |
+					    asyh->or.nhsync << 3);
+		evo_data(push, 0x31ec6000 | head->base.index << 25 |
+					    asyh->mode.interlace);
+		evo_kick(push, core);
+	}
+}
+
+void
+head907d_procamp(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x0498 + (head->base.index * 0x300), 1);
+		evo_data(push, asyh->procamp.sat.sin << 20 |
+			       asyh->procamp.sat.cos << 8);
+		evo_kick(push, core);
+	}
+}
+
+static void
+head907d_dither(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x0490 + (head->base.index * 0x0300), 1);
+		evo_data(push, asyh->dither.mode << 3 |
+			       asyh->dither.bits << 1 |
+			       asyh->dither.enable);
+		evo_kick(push, core);
+	}
+}
+
+void
+head907d_ovly(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 bounds = 0;
+	u32 *push;
+
+	if (asyh->ovly.cpp) {
+		switch (asyh->ovly.cpp) {
+		case 8: bounds |= 0x00000500; break;
+		case 4: bounds |= 0x00000300; break;
+		case 2: bounds |= 0x00000100; break;
+		default:
+			WARN_ON(1);
+			break;
+		}
+		bounds |= 0x00000001;
+	} else {
+		bounds |= 0x00000100;
+	}
+
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x04d4 + head->base.index * 0x300, 1);
+		evo_data(push, bounds);
+		evo_kick(push, core);
+	}
+}
+
+static void
+head907d_base(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 bounds = 0;
+	u32 *push;
+
+	if (asyh->base.cpp) {
+		switch (asyh->base.cpp) {
+		case 8: bounds |= 0x00000500; break;
+		case 4: bounds |= 0x00000300; break;
+		case 2: bounds |= 0x00000100; break;
+		case 1: bounds |= 0x00000000; break;
+		default:
+			WARN_ON(1);
+			break;
+		}
+		bounds |= 0x00000001;
+	}
+
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x04d0 + head->base.index * 0x300, 1);
+		evo_data(push, bounds);
+		evo_kick(push, core);
+	}
+}
+
+void
+head907d_curs_clr(struct nv50_head *head)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 4))) {
+		evo_mthd(push, 0x0480 + head->base.index * 0x300, 1);
+		evo_data(push, 0x05000000);
+		evo_mthd(push, 0x048c + head->base.index * 0x300, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, core);
+	}
+}
+
+void
+head907d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 5))) {
+		evo_mthd(push, 0x0480 + head->base.index * 0x300, 2);
+		evo_data(push, 0x80000000 | asyh->curs.layout << 26 |
+					    asyh->curs.format << 24);
+		evo_data(push, asyh->curs.offset >> 8);
+		evo_mthd(push, 0x048c + head->base.index * 0x300, 1);
+		evo_data(push, asyh->curs.handle);
+		evo_kick(push, core);
+	}
+}
+
+void
+head907d_core_clr(struct nv50_head *head)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x0474 + head->base.index * 0x300, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, core);
+	}
+}
+
+void
+head907d_core_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 9))) {
+		evo_mthd(push, 0x0460 + head->base.index * 0x300, 1);
+		evo_data(push, asyh->core.offset >> 8);
+		evo_mthd(push, 0x0468 + head->base.index * 0x300, 4);
+		evo_data(push, asyh->core.h << 16 | asyh->core.w);
+		evo_data(push, asyh->core.layout << 24 |
+			       (asyh->core.pitch >> 8) << 8 |
+			       asyh->core.blocks << 8 |
+			       asyh->core.blockh);
+		evo_data(push, asyh->core.format << 8);
+		evo_data(push, asyh->core.handle);
+		evo_mthd(push, 0x04b0 + head->base.index * 0x300, 1);
+		evo_data(push, asyh->core.y << 16 | asyh->core.x);
+		evo_kick(push, core);
+	}
+}
+
+void
+head907d_olut_clr(struct nv50_head *head)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 4))) {
+		evo_mthd(push, 0x0448 + (head->base.index * 0x300), 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x045c + (head->base.index * 0x300), 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, core);
+	}
+}
+
+void
+head907d_olut_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 5))) {
+		evo_mthd(push, 0x0448 + (head->base.index * 0x300), 2);
+		evo_data(push, 0x80000000 | asyh->olut.mode << 24);
+		evo_data(push, asyh->olut.offset >> 8);
+		evo_mthd(push, 0x045c + (head->base.index * 0x300), 1);
+		evo_data(push, asyh->olut.handle);
+		evo_kick(push, core);
+	}
+}
+
+void
+head907d_olut(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	asyh->olut.mode = 7;
+}
+
+void
+head907d_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	struct nv50_head_mode *m = &asyh->mode;
+	u32 *push;
+	if ((push = evo_wait(core, 14))) {
+		evo_mthd(push, 0x0410 + (head->base.index * 0x300), 6);
+		evo_data(push, 0x00000000);
+		evo_data(push, m->v.active  << 16 | m->h.active );
+		evo_data(push, m->v.synce   << 16 | m->h.synce  );
+		evo_data(push, m->v.blanke  << 16 | m->h.blanke );
+		evo_data(push, m->v.blanks  << 16 | m->h.blanks );
+		evo_data(push, m->v.blank2e << 16 | m->v.blank2s);
+		evo_mthd(push, 0x042c + (head->base.index * 0x300), 2);
+		evo_data(push, 0x00000000); /* ??? */
+		evo_data(push, 0xffffff00);
+		evo_mthd(push, 0x0450 + (head->base.index * 0x300), 3);
+		evo_data(push, m->clock * 1000);
+		evo_data(push, 0x00200000); /* ??? */
+		evo_data(push, m->clock * 1000);
+		evo_kick(push, core);
+	}
+}
+
+void
+head907d_view(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 8))) {
+		evo_mthd(push, 0x0494 + (head->base.index * 0x300), 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x04b8 + (head->base.index * 0x300), 1);
+		evo_data(push, asyh->view.iH << 16 | asyh->view.iW);
+		evo_mthd(push, 0x04c0 + (head->base.index * 0x300), 3);
+		evo_data(push, asyh->view.oH << 16 | asyh->view.oW);
+		evo_data(push, asyh->view.oH << 16 | asyh->view.oW);
+		evo_data(push, asyh->view.oH << 16 | asyh->view.oW);
+		evo_kick(push, core);
+	}
+}
+
+const struct nv50_head_func
+head907d = {
+	.view = head907d_view,
+	.mode = head907d_mode,
+	.olut = head907d_olut,
+	.olut_set = head907d_olut_set,
+	.olut_clr = head907d_olut_clr,
+	.core_calc = head507d_core_calc,
+	.core_set = head907d_core_set,
+	.core_clr = head907d_core_clr,
+	.curs_layout = head507d_curs_layout,
+	.curs_format = head507d_curs_format,
+	.curs_set = head907d_curs_set,
+	.curs_clr = head907d_curs_clr,
+	.base = head907d_base,
+	.ovly = head907d_ovly,
+	.dither = head907d_dither,
+	.procamp = head907d_procamp,
+	.or = head907d_or,
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head917d.c b/drivers/gpu/drm/nouveau/dispnv50/head917d.c
new file mode 100644
index 0000000..303df84
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/head917d.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "head.h"
+#include "core.h"
+
+static void
+head917d_dither(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x04a0 + (head->base.index * 0x0300), 1);
+		evo_data(push, asyh->dither.mode << 3 |
+			       asyh->dither.bits << 1 |
+			       asyh->dither.enable);
+		evo_kick(push, core);
+	}
+}
+
+static void
+head917d_base(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 bounds = 0;
+	u32 *push;
+
+	if (asyh->base.cpp) {
+		switch (asyh->base.cpp) {
+		case 8: bounds |= 0x00000500; break;
+		case 4: bounds |= 0x00000300; break;
+		case 2: bounds |= 0x00000100; break;
+		case 1: bounds |= 0x00000000; break;
+		default:
+			WARN_ON(1);
+			break;
+		}
+		bounds |= 0x00020001;
+	}
+
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x04d0 + head->base.index * 0x300, 1);
+		evo_data(push, bounds);
+		evo_kick(push, core);
+	}
+}
+
+int
+head917d_curs_layout(struct nv50_head *head, struct nv50_wndw_atom *asyw,
+		     struct nv50_head_atom *asyh)
+{
+	switch (asyw->state.fb->width) {
+	case  32: asyh->curs.layout = 0; break;
+	case  64: asyh->curs.layout = 1; break;
+	case 128: asyh->curs.layout = 2; break;
+	case 256: asyh->curs.layout = 3; break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+const struct nv50_head_func
+head917d = {
+	.view = head907d_view,
+	.mode = head907d_mode,
+	.olut = head907d_olut,
+	.olut_set = head907d_olut_set,
+	.olut_clr = head907d_olut_clr,
+	.core_calc = head507d_core_calc,
+	.core_set = head907d_core_set,
+	.core_clr = head907d_core_clr,
+	.curs_layout = head917d_curs_layout,
+	.curs_format = head507d_curs_format,
+	.curs_set = head907d_curs_set,
+	.curs_clr = head907d_curs_clr,
+	.base = head917d_base,
+	.ovly = head907d_ovly,
+	.dither = head917d_dither,
+	.procamp = head907d_procamp,
+	.or = head907d_or,
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/headc37d.c b/drivers/gpu/drm/nouveau/dispnv50/headc37d.c
new file mode 100644
index 0000000..989c140
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/headc37d.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "head.h"
+#include "atom.h"
+#include "core.h"
+
+static void
+headc37d_or(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		/*XXX: This is a dirty hack until OR depth handling is
+		 *     improved later for deep colour etc.
+		 */
+		switch (asyh->or.depth) {
+		case 6: asyh->or.depth = 5; break;
+		case 5: asyh->or.depth = 4; break;
+		case 2: asyh->or.depth = 1; break;
+		case 0:	asyh->or.depth = 4; break;
+		default:
+			WARN_ON(1);
+			break;
+		}
+
+		evo_mthd(push, 0x2004 + (head->base.index * 0x400), 1);
+		evo_data(push, 0x00000001 |
+			       asyh->or.depth << 4 |
+			       asyh->or.nvsync << 3 |
+			       asyh->or.nhsync << 2);
+		evo_kick(push, core);
+	}
+}
+
+static void
+headc37d_procamp(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x2000 + (head->base.index * 0x400), 1);
+		evo_data(push, 0x80000000 |
+			       asyh->procamp.sat.sin << 16 |
+			       asyh->procamp.sat.cos << 4);
+		evo_kick(push, core);
+	}
+}
+
+static void
+headc37d_dither(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x2018 + (head->base.index * 0x0400), 1);
+		evo_data(push, asyh->dither.mode << 8 |
+			       asyh->dither.bits << 4 |
+			       asyh->dither.enable);
+		evo_kick(push, core);
+	}
+}
+
+static void
+headc37d_curs_clr(struct nv50_head *head)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 4))) {
+		evo_mthd(push, 0x209c + head->base.index * 0x400, 1);
+		evo_data(push, 0x000000cf);
+		evo_mthd(push, 0x2088 + head->base.index * 0x400, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, core);
+	}
+}
+
+static void
+headc37d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 7))) {
+		evo_mthd(push, 0x209c + head->base.index * 0x400, 2);
+		evo_data(push, 0x80000000 |
+			       asyh->curs.layout << 8 |
+			       asyh->curs.format << 0);
+		evo_data(push, 0x000072ff);
+		evo_mthd(push, 0x2088 + head->base.index * 0x400, 1);
+		evo_data(push, asyh->curs.handle);
+		evo_mthd(push, 0x2090 + head->base.index * 0x400, 1);
+		evo_data(push, asyh->curs.offset >> 8);
+		evo_kick(push, core);
+	}
+}
+
+static int
+headc37d_curs_format(struct nv50_head *head, struct nv50_wndw_atom *asyw,
+		     struct nv50_head_atom *asyh)
+{
+	asyh->curs.format = asyw->image.format;
+	return 0;
+}
+
+static void
+headc37d_olut_clr(struct nv50_head *head)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x20ac + (head->base.index * 0x400), 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, core);
+	}
+}
+
+static void
+headc37d_olut_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 4))) {
+		evo_mthd(push, 0x20a4 + (head->base.index * 0x400), 3);
+		evo_data(push, asyh->olut.output_mode << 8 |
+			       asyh->olut.range << 4 |
+			       asyh->olut.size);
+		evo_data(push, asyh->olut.offset >> 8);
+		evo_data(push, asyh->olut.handle);
+		evo_kick(push, core);
+	}
+}
+
+static void
+headc37d_olut(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	asyh->olut.mode = 2;
+	asyh->olut.size = 0;
+	asyh->olut.range = 0;
+	asyh->olut.output_mode = 1;
+}
+
+static void
+headc37d_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	struct nv50_head_mode *m = &asyh->mode;
+	u32 *push;
+	if ((push = evo_wait(core, 12))) {
+		evo_mthd(push, 0x2064 + (head->base.index * 0x400), 5);
+		evo_data(push, (m->v.active  << 16) | m->h.active );
+		evo_data(push, (m->v.synce   << 16) | m->h.synce  );
+		evo_data(push, (m->v.blanke  << 16) | m->h.blanke );
+		evo_data(push, (m->v.blanks  << 16) | m->h.blanks );
+		evo_data(push, (m->v.blank2e << 16) | m->v.blank2s);
+		evo_mthd(push, 0x200c + (head->base.index * 0x400), 1);
+		evo_data(push, m->clock * 1000);
+		evo_mthd(push, 0x2028 + (head->base.index * 0x400), 1);
+		evo_data(push, m->clock * 1000);
+		/*XXX: HEAD_USAGE_BOUNDS, doesn't belong here. */
+		evo_mthd(push, 0x2030 + (head->base.index * 0x400), 1);
+		evo_data(push, 0x00000124);
+		evo_kick(push, core);
+	}
+}
+
+static void
+headc37d_view(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 4))) {
+		evo_mthd(push, 0x204c + (head->base.index * 0x400), 1);
+		evo_data(push, (asyh->view.iH << 16) | asyh->view.iW);
+		evo_mthd(push, 0x2058 + (head->base.index * 0x400), 1);
+		evo_data(push, (asyh->view.oH << 16) | asyh->view.oW);
+		evo_kick(push, core);
+	}
+}
+
+const struct nv50_head_func
+headc37d = {
+	.view = headc37d_view,
+	.mode = headc37d_mode,
+	.olut = headc37d_olut,
+	.olut_set = headc37d_olut_set,
+	.olut_clr = headc37d_olut_clr,
+	.curs_layout = head917d_curs_layout,
+	.curs_format = headc37d_curs_format,
+	.curs_set = headc37d_curs_set,
+	.curs_clr = headc37d_curs_clr,
+	.dither = headc37d_dither,
+	.procamp = headc37d_procamp,
+	.or = headc37d_or,
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/lut.c b/drivers/gpu/drm/nouveau/dispnv50/lut.c
new file mode 100644
index 0000000..a6b96ae2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/lut.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "lut.h"
+#include "disp.h"
+
+#include <drm/drm_color_mgmt.h>
+#include <drm/drm_mode.h>
+#include <drm/drm_property.h>
+
+#include <nvif/class.h>
+
+u32
+nv50_lut_load(struct nv50_lut *lut, bool legacy, int buffer,
+	      struct drm_property_blob *blob)
+{
+	struct drm_color_lut *in = (struct drm_color_lut *)blob->data;
+	void __iomem *mem = lut->mem[buffer].object.map.ptr;
+	const int size = blob->length / sizeof(*in);
+	int bits, shift, i;
+	u16 zero, r, g, b;
+	u32 addr = lut->mem[buffer].addr;
+
+	/* This can't happen.. But it shuts the compiler up. */
+	if (WARN_ON(size != 256))
+		return 0;
+
+	if (legacy) {
+		bits = 11;
+		shift = 3;
+		zero = 0x0000;
+	} else {
+		bits = 14;
+		shift = 0;
+		zero = 0x6000;
+	}
+
+	for (i = 0; i < size; i++) {
+		r = (drm_color_lut_extract(in[i].  red, bits) + zero) << shift;
+		g = (drm_color_lut_extract(in[i].green, bits) + zero) << shift;
+		b = (drm_color_lut_extract(in[i]. blue, bits) + zero) << shift;
+		writew(r, mem + (i * 0x08) + 0);
+		writew(g, mem + (i * 0x08) + 2);
+		writew(b, mem + (i * 0x08) + 4);
+	}
+
+	/* INTERPOLATE modes require a "next" entry to interpolate with,
+	 * so we replicate the last entry to deal with this for now.
+	 */
+	writew(r, mem + (i * 0x08) + 0);
+	writew(g, mem + (i * 0x08) + 2);
+	writew(b, mem + (i * 0x08) + 4);
+	return addr;
+}
+
+void
+nv50_lut_fini(struct nv50_lut *lut)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(lut->mem); i++)
+		nvif_mem_fini(&lut->mem[i]);
+}
+
+int
+nv50_lut_init(struct nv50_disp *disp, struct nvif_mmu *mmu,
+	      struct nv50_lut *lut)
+{
+	const u32 size = disp->disp->object.oclass < GF110_DISP ? 257 : 1025;
+	int i;
+	for (i = 0; i < ARRAY_SIZE(lut->mem); i++) {
+		int ret = nvif_mem_init_map(mmu, NVIF_MEM_VRAM, size * 8,
+					    &lut->mem[i]);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/lut.h b/drivers/gpu/drm/nouveau/dispnv50/lut.h
new file mode 100644
index 0000000..6d7b835
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/lut.h
@@ -0,0 +1,15 @@
+#ifndef __NV50_KMS_LUT_H__
+#define __NV50_KMS_LUT_H__
+#include <nvif/mem.h>
+struct drm_property_blob;
+struct nv50_disp;
+
+struct nv50_lut {
+	struct nvif_mem mem[2];
+};
+
+int nv50_lut_init(struct nv50_disp *, struct nvif_mmu *, struct nv50_lut *);
+void nv50_lut_fini(struct nv50_lut *);
+u32 nv50_lut_load(struct nv50_lut *, bool legacy, int buffer,
+		  struct drm_property_blob *);
+#endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/oimm.c b/drivers/gpu/drm/nouveau/dispnv50/oimm.c
new file mode 100644
index 0000000..2a2841d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/oimm.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "oimm.h"
+
+#include <nvif/class.h>
+
+int
+nv50_oimm_init(struct nouveau_drm *drm, struct nv50_wndw *wndw)
+{
+	static const struct {
+		s32 oclass;
+		int version;
+		int (*init)(struct nouveau_drm *, s32, struct nv50_wndw *);
+	} oimms[] = {
+		{ GK104_DISP_OVERLAY, 0, oimm507b_init },
+		{ GF110_DISP_OVERLAY, 0, oimm507b_init },
+		{ GT214_DISP_OVERLAY, 0, oimm507b_init },
+		{   G82_DISP_OVERLAY, 0, oimm507b_init },
+		{  NV50_DISP_OVERLAY, 0, oimm507b_init },
+		{}
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	int cid;
+
+	cid = nvif_mclass(&disp->disp->object, oimms);
+	if (cid < 0) {
+		NV_ERROR(drm, "No supported overlay immediate class\n");
+		return cid;
+	}
+
+	return oimms[cid].init(drm, oimms[cid].oclass, wndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/oimm.h b/drivers/gpu/drm/nouveau/dispnv50/oimm.h
new file mode 100644
index 0000000..6fa51f1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/oimm.h
@@ -0,0 +1,8 @@
+#ifndef __NV50_KMS_OIMM_H__
+#define __NV50_KMS_OIMM_H__
+#include "wndw.h"
+
+int oimm507b_init(struct nouveau_drm *, s32, struct nv50_wndw *);
+
+int nv50_oimm_init(struct nouveau_drm *, struct nv50_wndw *);
+#endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/oimm507b.c b/drivers/gpu/drm/nouveau/dispnv50/oimm507b.c
new file mode 100644
index 0000000..2ee404b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/oimm507b.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "oimm.h"
+
+#include <nvif/cl507b.h>
+
+static int
+oimm507b_init_(const struct nv50_wimm_func *func, struct nouveau_drm *drm,
+	       s32 oclass, struct nv50_wndw *wndw)
+{
+	struct nv50_disp_overlay_v0 args = {
+		.head = wndw->id,
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	int ret;
+
+	ret = nvif_object_init(&disp->disp->object, 0, oclass, &args,
+			       sizeof(args), &wndw->wimm.base.user);
+	if (ret) {
+		NV_ERROR(drm, "oimm%04x allocation failed: %d\n", oclass, ret);
+		return ret;
+	}
+
+	nvif_object_map(&wndw->wimm.base.user, NULL, 0);
+	wndw->immd = func;
+	return 0;
+}
+
+int
+oimm507b_init(struct nouveau_drm *drm, s32 oclass, struct nv50_wndw *wndw)
+{
+	return oimm507b_init_(&curs507a, drm, oclass, wndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly.c b/drivers/gpu/drm/nouveau/dispnv50/ovly.c
new file mode 100644
index 0000000..90c246d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/ovly.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ovly.h"
+#include "oimm.h"
+
+#include <nvif/class.h>
+
+int
+nv50_ovly_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw)
+{
+	static const struct {
+		s32 oclass;
+		int version;
+		int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+	} ovlys[] = {
+		{ GK104_DISP_OVERLAY_CONTROL_DMA, 0, ovly917e_new },
+		{ GF110_DISP_OVERLAY_CONTROL_DMA, 0, ovly907e_new },
+		{ GT214_DISP_OVERLAY_CHANNEL_DMA, 0, ovly827e_new },
+		{ GT200_DISP_OVERLAY_CHANNEL_DMA, 0, ovly827e_new },
+		{   G82_DISP_OVERLAY_CHANNEL_DMA, 0, ovly827e_new },
+		{  NV50_DISP_OVERLAY_CHANNEL_DMA, 0, ovly507e_new },
+		{}
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	int cid, ret;
+
+	cid = nvif_mclass(&disp->disp->object, ovlys);
+	if (cid < 0) {
+		NV_ERROR(drm, "No supported overlay class\n");
+		return cid;
+	}
+
+	ret = ovlys[cid].new(drm, head, ovlys[cid].oclass, pwndw);
+	if (ret)
+		return ret;
+
+	return nv50_oimm_init(drm, *pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly.h b/drivers/gpu/drm/nouveau/dispnv50/ovly.h
new file mode 100644
index 0000000..4869d52
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/ovly.h
@@ -0,0 +1,30 @@
+#ifndef __NV50_KMS_OVLY_H__
+#define __NV50_KMS_OVLY_H__
+#include "wndw.h"
+
+int ovly507e_new(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+int ovly507e_new_(const struct nv50_wndw_func *, const u32 *format,
+		  struct nouveau_drm *, int head, s32 oclass,
+		  u32 interlock_data, struct nv50_wndw **);
+int ovly507e_acquire(struct nv50_wndw *, struct nv50_wndw_atom *,
+		     struct nv50_head_atom *);
+void ovly507e_release(struct nv50_wndw *, struct nv50_wndw_atom *,
+		      struct nv50_head_atom *);
+void ovly507e_ntfy_set(struct nv50_wndw *, struct nv50_wndw_atom *);
+void ovly507e_ntfy_clr(struct nv50_wndw *);
+void ovly507e_image_clr(struct nv50_wndw *);
+void ovly507e_scale_set(struct nv50_wndw *, struct nv50_wndw_atom *);
+void ovly507e_update(struct nv50_wndw *, u32 *);
+
+extern const u32 ovly827e_format[];
+void ovly827e_ntfy_reset(struct nouveau_bo *, u32);
+int ovly827e_ntfy_wait_begun(struct nouveau_bo *, u32, struct nvif_device *);
+
+extern const struct nv50_wndw_func ovly907e;
+
+int ovly827e_new(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+int ovly907e_new(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+int ovly917e_new(struct nouveau_drm *, int, s32, struct nv50_wndw **);
+
+int nv50_ovly_new(struct nouveau_drm *, int head, struct nv50_wndw **);
+#endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly507e.c b/drivers/gpu/drm/nouveau/dispnv50/ovly507e.c
new file mode 100644
index 0000000..cc41766
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/ovly507e.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ovly.h"
+#include "atom.h"
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_plane_helper.h>
+
+#include <nvif/cl507e.h>
+#include <nvif/event.h>
+
+void
+ovly507e_update(struct nv50_wndw *wndw, u32 *interlock)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 2))) {
+		evo_mthd(push, 0x0080, 1);
+		evo_data(push, interlock[NV50_DISP_INTERLOCK_CORE]);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+void
+ovly507e_scale_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 4))) {
+		evo_mthd(push, 0x00e0, 3);
+		evo_data(push, asyw->scale.sy << 16 | asyw->scale.sx);
+		evo_data(push, asyw->scale.sh << 16 | asyw->scale.sw);
+		evo_data(push, asyw->scale.dw);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+void
+ovly507e_image_clr(struct nv50_wndw *wndw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 4))) {
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x00c0, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+ovly507e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 12))) {
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, asyw->image.interval << 4);
+		evo_mthd(push, 0x00c0, 1);
+		evo_data(push, asyw->image.handle[0]);
+		evo_mthd(push, 0x0100, 1);
+		evo_data(push, 0x00000002);
+		evo_mthd(push, 0x0800, 1);
+		evo_data(push, asyw->image.offset[0] >> 8);
+		evo_mthd(push, 0x0808, 3);
+		evo_data(push, asyw->image.h << 16 | asyw->image.w);
+		evo_data(push, asyw->image.layout << 20 |
+			       (asyw->image.pitch[0] >> 8) << 8 |
+			       asyw->image.blocks[0] << 8 |
+			       asyw->image.blockh);
+		evo_data(push, asyw->image.kind << 16 |
+			       asyw->image.format << 8 |
+			       asyw->image.colorspace);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+void
+ovly507e_ntfy_clr(struct nv50_wndw *wndw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 2))) {
+		evo_mthd(push, 0x00a4, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+void
+ovly507e_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 3))) {
+		evo_mthd(push, 0x00a0, 2);
+		evo_data(push, asyw->ntfy.awaken << 30 | asyw->ntfy.offset);
+		evo_data(push, asyw->ntfy.handle);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+void
+ovly507e_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
+		 struct nv50_head_atom *asyh)
+{
+	asyh->ovly.cpp = 0;
+}
+
+int
+ovly507e_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
+		 struct nv50_head_atom *asyh)
+{
+	const struct drm_framebuffer *fb = asyw->state.fb;
+	int ret;
+
+	ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  true, true);
+	if (ret)
+		return ret;
+
+	asyh->ovly.cpp = fb->format->cpp[0];
+	return 0;
+}
+
+#include "nouveau_bo.h"
+
+static const struct nv50_wndw_func
+ovly507e = {
+	.acquire = ovly507e_acquire,
+	.release = ovly507e_release,
+	.ntfy_set = ovly507e_ntfy_set,
+	.ntfy_clr = ovly507e_ntfy_clr,
+	.ntfy_reset = base507c_ntfy_reset,
+	.ntfy_wait_begun = base507c_ntfy_wait_begun,
+	.image_set = ovly507e_image_set,
+	.image_clr = ovly507e_image_clr,
+	.scale_set = ovly507e_scale_set,
+	.update = ovly507e_update,
+};
+
+static const u32
+ovly507e_format[] = {
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_ARGB1555,
+	0
+};
+
+int
+ovly507e_new_(const struct nv50_wndw_func *func, const u32 *format,
+	      struct nouveau_drm *drm, int head, s32 oclass, u32 interlock_data,
+	      struct nv50_wndw **pwndw)
+{
+	struct nv50_disp_overlay_channel_dma_v0 args = {
+		.head = head,
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	struct nv50_wndw *wndw;
+	int ret;
+
+	ret = nv50_wndw_new_(func, drm->dev, DRM_PLANE_TYPE_OVERLAY,
+			     "ovly", head, format, BIT(head),
+			     NV50_DISP_INTERLOCK_OVLY, interlock_data,
+			     &wndw);
+	if (*pwndw = wndw, ret)
+		return ret;
+
+	ret = nv50_dmac_create(&drm->client.device, &disp->disp->object,
+			       &oclass, 0, &args, sizeof(args),
+			       disp->sync->bo.offset, &wndw->wndw);
+	if (ret) {
+		NV_ERROR(drm, "ovly%04x allocation failed: %d\n", oclass, ret);
+		return ret;
+	}
+
+	ret = nvif_notify_init(&wndw->wndw.base.user, wndw->notify.func, false,
+			       NV50_DISP_OVERLAY_CHANNEL_DMA_V0_NTFY_UEVENT,
+			       &(struct nvif_notify_uevent_req) {},
+			       sizeof(struct nvif_notify_uevent_req),
+			       sizeof(struct nvif_notify_uevent_rep),
+			       &wndw->notify);
+	if (ret)
+		return ret;
+
+	wndw->ntfy = NV50_DISP_OVLY_NTFY(wndw->id);
+	wndw->sema = NV50_DISP_OVLY_SEM0(wndw->id);
+	wndw->data = 0x00000000;
+	return 0;
+}
+
+int
+ovly507e_new(struct nouveau_drm *drm, int head, s32 oclass,
+	     struct nv50_wndw **pwndw)
+{
+	return ovly507e_new_(&ovly507e, ovly507e_format, drm, head, oclass,
+			     0x00000004 << (head * 8), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly827e.c b/drivers/gpu/drm/nouveau/dispnv50/ovly827e.c
new file mode 100644
index 0000000..aaa9fe5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/ovly827e.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ovly.h"
+#include "atom.h"
+
+#include <nouveau_bo.h>
+
+static void
+ovly827e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 12))) {
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, asyw->image.interval << 4);
+		evo_mthd(push, 0x00c0, 1);
+		evo_data(push, asyw->image.handle[0]);
+		evo_mthd(push, 0x0100, 1);
+		evo_data(push, 0x00000002);
+		evo_mthd(push, 0x0800, 1);
+		evo_data(push, asyw->image.offset[0] >> 8);
+		evo_mthd(push, 0x0808, 3);
+		evo_data(push, asyw->image.h << 16 | asyw->image.w);
+		evo_data(push, asyw->image.layout << 20 |
+			       (asyw->image.pitch[0] >> 8) << 8 |
+			       asyw->image.blocks[0] << 8 |
+			       asyw->image.blockh);
+		evo_data(push, asyw->image.format << 8 |
+			       asyw->image.colorspace);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+int
+ovly827e_ntfy_wait_begun(struct nouveau_bo *bo, u32 offset,
+			 struct nvif_device *device)
+{
+	s64 time = nvif_msec(device, 2000ULL,
+		u32 data = nouveau_bo_rd32(bo, offset / 4 + 3);
+		if ((data & 0xffff0000) == 0xffff0000)
+			break;
+		usleep_range(1, 2);
+	);
+	return time < 0 ? time : 0;
+}
+
+void
+ovly827e_ntfy_reset(struct nouveau_bo *bo, u32 offset)
+{
+	nouveau_bo_wr32(bo, offset / 4 + 0, 0x00000000);
+	nouveau_bo_wr32(bo, offset / 4 + 1, 0x00000000);
+	nouveau_bo_wr32(bo, offset / 4 + 2, 0x00000000);
+	nouveau_bo_wr32(bo, offset / 4 + 3, 0x80000000);
+}
+
+static const struct nv50_wndw_func
+ovly827e = {
+	.acquire = ovly507e_acquire,
+	.release = ovly507e_release,
+	.ntfy_set = ovly507e_ntfy_set,
+	.ntfy_clr = ovly507e_ntfy_clr,
+	.ntfy_reset = ovly827e_ntfy_reset,
+	.ntfy_wait_begun = ovly827e_ntfy_wait_begun,
+	.image_set = ovly827e_image_set,
+	.image_clr = ovly507e_image_clr,
+	.scale_set = ovly507e_scale_set,
+	.update = ovly507e_update,
+};
+
+const u32
+ovly827e_format[] = {
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_ABGR2101010,
+	0
+};
+
+int
+ovly827e_new(struct nouveau_drm *drm, int head, s32 oclass,
+	     struct nv50_wndw **pwndw)
+{
+	return ovly507e_new_(&ovly827e, ovly827e_format, drm, head, oclass,
+			     0x00000004 << (head * 8), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly907e.c b/drivers/gpu/drm/nouveau/dispnv50/ovly907e.c
new file mode 100644
index 0000000..a3ce530
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/ovly907e.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ovly.h"
+#include "atom.h"
+
+static void
+ovly907e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 12))) {
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, asyw->image.interval << 4);
+		evo_mthd(push, 0x00c0, 1);
+		evo_data(push, asyw->image.handle[0]);
+		evo_mthd(push, 0x0100, 1);
+		evo_data(push, 0x00000002);
+		evo_mthd(push, 0x0400, 1);
+		evo_data(push, asyw->image.offset[0] >> 8);
+		evo_mthd(push, 0x0408, 3);
+		evo_data(push, asyw->image.h << 16 | asyw->image.w);
+		evo_data(push, asyw->image.layout << 24 |
+			       (asyw->image.pitch[0] >> 8) << 8 |
+			       asyw->image.blocks[0] << 8 |
+			       asyw->image.blockh);
+		evo_data(push, asyw->image.format << 8 |
+			       asyw->image.colorspace);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+const struct nv50_wndw_func
+ovly907e = {
+	.acquire = ovly507e_acquire,
+	.release = ovly507e_release,
+	.ntfy_set = ovly507e_ntfy_set,
+	.ntfy_clr = ovly507e_ntfy_clr,
+	.ntfy_reset = ovly827e_ntfy_reset,
+	.ntfy_wait_begun = ovly827e_ntfy_wait_begun,
+	.image_set = ovly907e_image_set,
+	.image_clr = ovly507e_image_clr,
+	.scale_set = ovly507e_scale_set,
+	.update = ovly507e_update,
+};
+
+int
+ovly907e_new(struct nouveau_drm *drm, int head, s32 oclass,
+	     struct nv50_wndw **pwndw)
+{
+	return ovly507e_new_(&ovly907e, ovly827e_format, drm, head, oclass,
+			     0x00000004 << (head * 4), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/ovly917e.c b/drivers/gpu/drm/nouveau/dispnv50/ovly917e.c
new file mode 100644
index 0000000..505fa7e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/ovly917e.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ovly.h"
+
+static const u32
+ovly917e_format[] = {
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_ABGR2101010,
+	DRM_FORMAT_XRGB2101010,
+	DRM_FORMAT_ARGB2101010,
+	0
+};
+
+int
+ovly917e_new(struct nouveau_drm *drm, int head, s32 oclass,
+	     struct nv50_wndw **pwndw)
+{
+	return ovly507e_new_(&ovly907e, ovly917e_format, drm, head, oclass,
+			     0x00000004 << (head * 4), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/pior507d.c b/drivers/gpu/drm/nouveau/dispnv50/pior507d.c
new file mode 100644
index 0000000..d2bac6a3
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/pior507d.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+
+static void
+pior507d_ctrl(struct nv50_core *core, int or, u32 ctrl,
+	      struct nv50_head_atom *asyh)
+{
+	u32 *push;
+	if ((push = evo_wait(&core->chan, 2))) {
+		if (asyh) {
+			ctrl |= asyh->or.depth  << 16;
+			ctrl |= asyh->or.nvsync << 13;
+			ctrl |= asyh->or.nhsync << 12;
+		}
+		evo_mthd(push, 0x0700 + (or * 0x040), 1);
+		evo_data(push, ctrl);
+		evo_kick(push, &core->chan);
+	}
+}
+
+const struct nv50_outp_func
+pior507d = {
+	.ctrl = pior507d_ctrl,
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/sor507d.c b/drivers/gpu/drm/nouveau/dispnv50/sor507d.c
new file mode 100644
index 0000000..5222fe6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/sor507d.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+
+static void
+sor507d_ctrl(struct nv50_core *core, int or, u32 ctrl,
+	     struct nv50_head_atom *asyh)
+{
+	u32 *push;
+	if ((push = evo_wait(&core->chan, 2))) {
+		if (asyh) {
+			ctrl |= asyh->or.depth  << 16;
+			ctrl |= asyh->or.nvsync << 13;
+			ctrl |= asyh->or.nhsync << 12;
+		}
+		evo_mthd(push, 0x0600 + (or * 0x40), 1);
+		evo_data(push, ctrl);
+		evo_kick(push, &core->chan);
+	}
+}
+
+const struct nv50_outp_func
+sor507d = {
+	.ctrl = sor507d_ctrl,
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/sor907d.c b/drivers/gpu/drm/nouveau/dispnv50/sor907d.c
new file mode 100644
index 0000000..b0314ec1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/sor907d.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+
+#include <nvif/class.h>
+
+static void
+sor907d_ctrl(struct nv50_core *core, int or, u32 ctrl,
+	     struct nv50_head_atom *asyh)
+{
+	u32 *push;
+	if ((push = evo_wait(&core->chan, 2))) {
+		evo_mthd(push, 0x0200 + (or * 0x20), 1);
+		evo_data(push, ctrl);
+		evo_kick(push, &core->chan);
+	}
+}
+
+const struct nv50_outp_func
+sor907d = {
+	.ctrl = sor907d_ctrl,
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/sorc37d.c b/drivers/gpu/drm/nouveau/dispnv50/sorc37d.c
new file mode 100644
index 0000000..dff0592
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/sorc37d.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+
+static void
+sorc37d_ctrl(struct nv50_core *core, int or, u32 ctrl,
+	     struct nv50_head_atom *asyh)
+{
+	u32 *push;
+	if ((push = evo_wait(&core->chan, 2))) {
+		evo_mthd(push, 0x0300 + (or * 0x20), 1);
+		evo_data(push, ctrl);
+		evo_kick(push, &core->chan);
+	}
+}
+
+const struct nv50_outp_func
+sorc37d = {
+	.ctrl = sorc37d_ctrl,
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimm.c b/drivers/gpu/drm/nouveau/dispnv50/wimm.c
new file mode 100644
index 0000000..fc36e06
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/wimm.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "wimm.h"
+
+#include <nvif/class.h>
+
+int
+nv50_wimm_init(struct nouveau_drm *drm, struct nv50_wndw *wndw)
+{
+	struct {
+		s32 oclass;
+		int version;
+		int (*init)(struct nouveau_drm *, s32, struct nv50_wndw *);
+	} wimms[] = {
+		{ GV100_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init },
+		{}
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	int cid;
+
+	cid = nvif_mclass(&disp->disp->object, wimms);
+	if (cid < 0) {
+		NV_ERROR(drm, "No supported window immediate class\n");
+		return cid;
+	}
+
+	return wimms[cid].init(drm, wimms[cid].oclass, wndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimm.h b/drivers/gpu/drm/nouveau/dispnv50/wimm.h
new file mode 100644
index 0000000..3630523
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/wimm.h
@@ -0,0 +1,8 @@
+#ifndef __NV50_KMS_WIMM_H__
+#define __NV50_KMS_WIMM_H__
+#include "wndw.h"
+
+int nv50_wimm_init(struct nouveau_drm *drm, struct nv50_wndw *);
+
+int wimmc37b_init(struct nouveau_drm *, s32, struct nv50_wndw *);
+#endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c
new file mode 100644
index 0000000..9103b84
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "wimm.h"
+#include "atom.h"
+#include "wndw.h"
+
+#include <nvif/clc37b.h>
+
+static void
+wimmc37b_update(struct nv50_wndw *wndw, u32 *interlock)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wimm, 2))) {
+		evo_mthd(push, 0x0200, 1);
+		if (interlock[NV50_DISP_INTERLOCK_WNDW] & wndw->interlock.data)
+			evo_data(push, 0x00000003);
+		else
+			evo_data(push, 0x00000001);
+		evo_kick(push, &wndw->wimm);
+	}
+}
+
+static void
+wimmc37b_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wimm, 2))) {
+		evo_mthd(push, 0x0208, 1);
+		evo_data(push, asyw->point.y << 16 | asyw->point.x);
+		evo_kick(push, &wndw->wimm);
+	}
+}
+
+static const struct nv50_wimm_func
+wimmc37b = {
+	.point = wimmc37b_point,
+	.update = wimmc37b_update,
+};
+
+static int
+wimmc37b_init_(const struct nv50_wimm_func *func, struct nouveau_drm *drm,
+	       s32 oclass, struct nv50_wndw *wndw)
+{
+	struct nvc37b_window_imm_channel_dma_v0 args = {
+		.pushbuf = 0xb0007b00 | wndw->id,
+		.index = wndw->id,
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	int ret;
+
+	ret = nv50_dmac_create(&drm->client.device, &disp->disp->object,
+			       &oclass, 0, &args, sizeof(args), 0,
+			       &wndw->wimm);
+	if (ret) {
+		NV_ERROR(drm, "wimm%04x allocation failed: %d\n", oclass, ret);
+		return ret;
+	}
+
+	wndw->immd = func;
+	return 0;
+}
+
+int
+wimmc37b_init(struct nouveau_drm *drm, s32 oclass, struct nv50_wndw *wndw)
+{
+	return wimmc37b_init_(&wimmc37b, drm, oclass, wndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
new file mode 100644
index 0000000..224963b5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -0,0 +1,641 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "wndw.h"
+#include "wimm.h"
+
+#include <nvif/class.h>
+#include <nvif/cl0002.h>
+
+#include <drm/drm_atomic_helper.h>
+#include "nouveau_bo.h"
+
+static void
+nv50_wndw_ctxdma_del(struct nv50_wndw_ctxdma *ctxdma)
+{
+	nvif_object_fini(&ctxdma->object);
+	list_del(&ctxdma->head);
+	kfree(ctxdma);
+}
+
+static struct nv50_wndw_ctxdma *
+nv50_wndw_ctxdma_new(struct nv50_wndw *wndw, struct nouveau_framebuffer *fb)
+{
+	struct nouveau_drm *drm = nouveau_drm(fb->base.dev);
+	struct nv50_wndw_ctxdma *ctxdma;
+	const u8    kind = fb->nvbo->kind;
+	const u32 handle = 0xfb000000 | kind;
+	struct {
+		struct nv_dma_v0 base;
+		union {
+			struct nv50_dma_v0 nv50;
+			struct gf100_dma_v0 gf100;
+			struct gf119_dma_v0 gf119;
+		};
+	} args = {};
+	u32 argc = sizeof(args.base);
+	int ret;
+
+	list_for_each_entry(ctxdma, &wndw->ctxdma.list, head) {
+		if (ctxdma->object.handle == handle)
+			return ctxdma;
+	}
+
+	if (!(ctxdma = kzalloc(sizeof(*ctxdma), GFP_KERNEL)))
+		return ERR_PTR(-ENOMEM);
+	list_add(&ctxdma->head, &wndw->ctxdma.list);
+
+	args.base.target = NV_DMA_V0_TARGET_VRAM;
+	args.base.access = NV_DMA_V0_ACCESS_RDWR;
+	args.base.start  = 0;
+	args.base.limit  = drm->client.device.info.ram_user - 1;
+
+	if (drm->client.device.info.chipset < 0x80) {
+		args.nv50.part = NV50_DMA_V0_PART_256;
+		argc += sizeof(args.nv50);
+	} else
+	if (drm->client.device.info.chipset < 0xc0) {
+		args.nv50.part = NV50_DMA_V0_PART_256;
+		args.nv50.kind = kind;
+		argc += sizeof(args.nv50);
+	} else
+	if (drm->client.device.info.chipset < 0xd0) {
+		args.gf100.kind = kind;
+		argc += sizeof(args.gf100);
+	} else {
+		args.gf119.page = GF119_DMA_V0_PAGE_LP;
+		args.gf119.kind = kind;
+		argc += sizeof(args.gf119);
+	}
+
+	ret = nvif_object_init(wndw->ctxdma.parent, handle, NV_DMA_IN_MEMORY,
+			       &args, argc, &ctxdma->object);
+	if (ret) {
+		nv50_wndw_ctxdma_del(ctxdma);
+		return ERR_PTR(ret);
+	}
+
+	return ctxdma;
+}
+
+int
+nv50_wndw_wait_armed(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	struct nv50_disp *disp = nv50_disp(wndw->plane.dev);
+	if (asyw->set.ntfy) {
+		return wndw->func->ntfy_wait_begun(disp->sync,
+						   asyw->ntfy.offset,
+						   wndw->wndw.base.device);
+	}
+	return 0;
+}
+
+void
+nv50_wndw_flush_clr(struct nv50_wndw *wndw, u32 *interlock, bool flush,
+		    struct nv50_wndw_atom *asyw)
+{
+	union nv50_wndw_atom_mask clr = {
+		.mask = asyw->clr.mask & ~(flush ? 0 : asyw->set.mask),
+	};
+	if (clr.sema ) wndw->func-> sema_clr(wndw);
+	if (clr.ntfy ) wndw->func-> ntfy_clr(wndw);
+	if (clr.xlut ) wndw->func-> xlut_clr(wndw);
+	if (clr.image) wndw->func->image_clr(wndw);
+
+	interlock[wndw->interlock.type] |= wndw->interlock.data;
+}
+
+void
+nv50_wndw_flush_set(struct nv50_wndw *wndw, u32 *interlock,
+		    struct nv50_wndw_atom *asyw)
+{
+	if (interlock) {
+		asyw->image.mode = 0;
+		asyw->image.interval = 1;
+	}
+
+	if (asyw->set.sema ) wndw->func->sema_set (wndw, asyw);
+	if (asyw->set.ntfy ) wndw->func->ntfy_set (wndw, asyw);
+	if (asyw->set.image) wndw->func->image_set(wndw, asyw);
+
+	if (asyw->set.xlut ) {
+		if (asyw->ilut) {
+			asyw->xlut.i.offset =
+				nv50_lut_load(&wndw->ilut,
+					      asyw->xlut.i.mode <= 1,
+					      asyw->xlut.i.buffer,
+					      asyw->ilut);
+		}
+		wndw->func->xlut_set(wndw, asyw);
+	}
+
+	if (asyw->set.scale) wndw->func->scale_set(wndw, asyw);
+	if (asyw->set.point) {
+		if (asyw->set.point = false, asyw->set.mask)
+			interlock[wndw->interlock.type] |= wndw->interlock.data;
+		interlock[NV50_DISP_INTERLOCK_WIMM] |= wndw->interlock.data;
+
+		wndw->immd->point(wndw, asyw);
+		wndw->immd->update(wndw, interlock);
+	} else {
+		interlock[wndw->interlock.type] |= wndw->interlock.data;
+	}
+}
+
+void
+nv50_wndw_ntfy_enable(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	struct nv50_disp *disp = nv50_disp(wndw->plane.dev);
+
+	asyw->ntfy.handle = wndw->wndw.sync.handle;
+	asyw->ntfy.offset = wndw->ntfy;
+	asyw->ntfy.awaken = false;
+	asyw->set.ntfy = true;
+
+	wndw->func->ntfy_reset(disp->sync, wndw->ntfy);
+	wndw->ntfy ^= 0x10;
+}
+
+static void
+nv50_wndw_atomic_check_release(struct nv50_wndw *wndw,
+			       struct nv50_wndw_atom *asyw,
+			       struct nv50_head_atom *asyh)
+{
+	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
+	NV_ATOMIC(drm, "%s release\n", wndw->plane.name);
+	wndw->func->release(wndw, asyw, asyh);
+	asyw->ntfy.handle = 0;
+	asyw->sema.handle = 0;
+}
+
+static int
+nv50_wndw_atomic_check_acquire_yuv(struct nv50_wndw_atom *asyw)
+{
+	switch (asyw->state.fb->format->format) {
+	case DRM_FORMAT_YUYV: asyw->image.format = 0x28; break;
+	case DRM_FORMAT_UYVY: asyw->image.format = 0x29; break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+	asyw->image.colorspace = 1;
+	return 0;
+}
+
+static int
+nv50_wndw_atomic_check_acquire_rgb(struct nv50_wndw_atom *asyw)
+{
+	switch (asyw->state.fb->format->format) {
+	case DRM_FORMAT_C8         : asyw->image.format = 0x1e; break;
+	case DRM_FORMAT_XRGB8888   :
+	case DRM_FORMAT_ARGB8888   : asyw->image.format = 0xcf; break;
+	case DRM_FORMAT_RGB565     : asyw->image.format = 0xe8; break;
+	case DRM_FORMAT_XRGB1555   :
+	case DRM_FORMAT_ARGB1555   : asyw->image.format = 0xe9; break;
+	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_ABGR2101010: asyw->image.format = 0xd1; break;
+	case DRM_FORMAT_XBGR8888   :
+	case DRM_FORMAT_ABGR8888   : asyw->image.format = 0xd5; break;
+	case DRM_FORMAT_XRGB2101010:
+	case DRM_FORMAT_ARGB2101010: asyw->image.format = 0xdf; break;
+	default:
+		return -EINVAL;
+	}
+	asyw->image.colorspace = 0;
+	return 0;
+}
+
+static int
+nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, bool modeset,
+			       struct nv50_wndw_atom *armw,
+			       struct nv50_wndw_atom *asyw,
+			       struct nv50_head_atom *asyh)
+{
+	struct nouveau_framebuffer *fb = nouveau_framebuffer(asyw->state.fb);
+	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
+	int ret;
+
+	NV_ATOMIC(drm, "%s acquire\n", wndw->plane.name);
+
+	if (asyw->state.fb != armw->state.fb || !armw->visible || modeset) {
+		asyw->image.w = fb->base.width;
+		asyw->image.h = fb->base.height;
+		asyw->image.kind = fb->nvbo->kind;
+
+		ret = nv50_wndw_atomic_check_acquire_rgb(asyw);
+		if (ret) {
+			ret = nv50_wndw_atomic_check_acquire_yuv(asyw);
+			if (ret)
+				return ret;
+		}
+
+		if (asyw->image.kind) {
+			asyw->image.layout = 0;
+			if (drm->client.device.info.chipset >= 0xc0)
+				asyw->image.blockh = fb->nvbo->mode >> 4;
+			else
+				asyw->image.blockh = fb->nvbo->mode;
+			asyw->image.blocks[0] = fb->base.pitches[0] / 64;
+			asyw->image.pitch[0] = 0;
+		} else {
+			asyw->image.layout = 1;
+			asyw->image.blockh = 0;
+			asyw->image.blocks[0] = 0;
+			asyw->image.pitch[0] = fb->base.pitches[0];
+		}
+
+		if (!(asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC))
+			asyw->image.interval = 1;
+		else
+			asyw->image.interval = 0;
+		asyw->image.mode = asyw->image.interval ? 0 : 1;
+		asyw->set.image = wndw->func->image_set != NULL;
+	}
+
+	if (wndw->func->scale_set) {
+		asyw->scale.sx = asyw->state.src_x >> 16;
+		asyw->scale.sy = asyw->state.src_y >> 16;
+		asyw->scale.sw = asyw->state.src_w >> 16;
+		asyw->scale.sh = asyw->state.src_h >> 16;
+		asyw->scale.dw = asyw->state.crtc_w;
+		asyw->scale.dh = asyw->state.crtc_h;
+		if (memcmp(&armw->scale, &asyw->scale, sizeof(asyw->scale)))
+			asyw->set.scale = true;
+	}
+
+	if (wndw->immd) {
+		asyw->point.x = asyw->state.crtc_x;
+		asyw->point.y = asyw->state.crtc_y;
+		if (memcmp(&armw->point, &asyw->point, sizeof(asyw->point)))
+			asyw->set.point = true;
+	}
+
+	return wndw->func->acquire(wndw, asyw, asyh);
+}
+
+static void
+nv50_wndw_atomic_check_lut(struct nv50_wndw *wndw,
+			   struct nv50_wndw_atom *armw,
+			   struct nv50_wndw_atom *asyw,
+			   struct nv50_head_atom *asyh)
+{
+	struct drm_property_blob *ilut = asyh->state.degamma_lut;
+
+	/* I8 format without an input LUT makes no sense, and the
+	 * HW error-checks for this.
+	 *
+	 * In order to handle legacy gamma, when there's no input
+	 * LUT we need to steal the output LUT and use it instead.
+	 */
+	if (!ilut && asyw->state.fb->format->format == DRM_FORMAT_C8) {
+		/* This should be an error, but there's legacy clients
+		 * that do a modeset before providing a gamma table.
+		 *
+		 * We keep the window disabled to avoid angering HW.
+		 */
+		if (!(ilut = asyh->state.gamma_lut)) {
+			asyw->visible = false;
+			return;
+		}
+
+		if (wndw->func->ilut)
+			asyh->wndw.olut |= BIT(wndw->id);
+	} else {
+		asyh->wndw.olut &= ~BIT(wndw->id);
+	}
+
+	/* Recalculate LUT state. */
+	memset(&asyw->xlut, 0x00, sizeof(asyw->xlut));
+	if ((asyw->ilut = wndw->func->ilut ? ilut : NULL)) {
+		wndw->func->ilut(wndw, asyw);
+		asyw->xlut.handle = wndw->wndw.vram.handle;
+		asyw->xlut.i.buffer = !asyw->xlut.i.buffer;
+		asyw->set.xlut = true;
+	}
+
+	/* Handle setting base SET_OUTPUT_LUT_LO_ENABLE_USE_CORE_LUT. */
+	if (wndw->func->olut_core &&
+	    (!armw->visible || (armw->xlut.handle && !asyw->xlut.handle)))
+		asyw->set.xlut = true;
+
+	/* Can't do an immediate flip while changing the LUT. */
+	asyh->state.pageflip_flags &= ~DRM_MODE_PAGE_FLIP_ASYNC;
+}
+
+static int
+nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state)
+{
+	struct nouveau_drm *drm = nouveau_drm(plane->dev);
+	struct nv50_wndw *wndw = nv50_wndw(plane);
+	struct nv50_wndw_atom *armw = nv50_wndw_atom(wndw->plane.state);
+	struct nv50_wndw_atom *asyw = nv50_wndw_atom(state);
+	struct nv50_head_atom *harm = NULL, *asyh = NULL;
+	bool modeset = false;
+	int ret;
+
+	NV_ATOMIC(drm, "%s atomic_check\n", plane->name);
+
+	/* Fetch the assembly state for the head the window will belong to,
+	 * and determine whether the window will be visible.
+	 */
+	if (asyw->state.crtc) {
+		asyh = nv50_head_atom_get(asyw->state.state, asyw->state.crtc);
+		if (IS_ERR(asyh))
+			return PTR_ERR(asyh);
+		modeset = drm_atomic_crtc_needs_modeset(&asyh->state);
+		asyw->visible = asyh->state.active;
+	} else {
+		asyw->visible = false;
+	}
+
+	/* Fetch assembly state for the head the window used to belong to. */
+	if (armw->state.crtc) {
+		harm = nv50_head_atom_get(asyw->state.state, armw->state.crtc);
+		if (IS_ERR(harm))
+			return PTR_ERR(harm);
+	}
+
+	/* LUT configuration can potentially cause the window to be disabled. */
+	if (asyw->visible && wndw->func->xlut_set &&
+	    (!armw->visible ||
+	     asyh->state.color_mgmt_changed ||
+	     asyw->state.fb->format->format !=
+	     armw->state.fb->format->format))
+		nv50_wndw_atomic_check_lut(wndw, armw, asyw, asyh);
+
+	/* Calculate new window state. */
+	if (asyw->visible) {
+		ret = nv50_wndw_atomic_check_acquire(wndw, modeset,
+						     armw, asyw, asyh);
+		if (ret)
+			return ret;
+
+		asyh->wndw.mask |= BIT(wndw->id);
+	} else
+	if (armw->visible) {
+		nv50_wndw_atomic_check_release(wndw, asyw, harm);
+		harm->wndw.mask &= ~BIT(wndw->id);
+	} else {
+		return 0;
+	}
+
+	/* Aside from the obvious case where the window is actively being
+	 * disabled, we might also need to temporarily disable the window
+	 * when performing certain modeset operations.
+	 */
+	if (!asyw->visible || modeset) {
+		asyw->clr.ntfy = armw->ntfy.handle != 0;
+		asyw->clr.sema = armw->sema.handle != 0;
+		asyw->clr.xlut = armw->xlut.handle != 0;
+		if (wndw->func->image_clr)
+			asyw->clr.image = armw->image.handle[0] != 0;
+	}
+
+	return 0;
+}
+
+static void
+nv50_wndw_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state)
+{
+	struct nouveau_framebuffer *fb = nouveau_framebuffer(old_state->fb);
+	struct nouveau_drm *drm = nouveau_drm(plane->dev);
+
+	NV_ATOMIC(drm, "%s cleanup: %p\n", plane->name, old_state->fb);
+	if (!old_state->fb)
+		return;
+
+	nouveau_bo_unpin(fb->nvbo);
+}
+
+static int
+nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
+{
+	struct nouveau_framebuffer *fb = nouveau_framebuffer(state->fb);
+	struct nouveau_drm *drm = nouveau_drm(plane->dev);
+	struct nv50_wndw *wndw = nv50_wndw(plane);
+	struct nv50_wndw_atom *asyw = nv50_wndw_atom(state);
+	struct nv50_head_atom *asyh;
+	struct nv50_wndw_ctxdma *ctxdma;
+	int ret;
+
+	NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, state->fb);
+	if (!asyw->state.fb)
+		return 0;
+
+	ret = nouveau_bo_pin(fb->nvbo, TTM_PL_FLAG_VRAM, true);
+	if (ret)
+		return ret;
+
+	ctxdma = nv50_wndw_ctxdma_new(wndw, fb);
+	if (IS_ERR(ctxdma)) {
+		nouveau_bo_unpin(fb->nvbo);
+		return PTR_ERR(ctxdma);
+	}
+
+	asyw->state.fence = reservation_object_get_excl_rcu(fb->nvbo->bo.resv);
+	asyw->image.handle[0] = ctxdma->object.handle;
+	asyw->image.offset[0] = fb->nvbo->bo.offset;
+
+	if (wndw->func->prepare) {
+		asyh = nv50_head_atom_get(asyw->state.state, asyw->state.crtc);
+		if (IS_ERR(asyh))
+			return PTR_ERR(asyh);
+
+		wndw->func->prepare(wndw, asyh, asyw);
+	}
+
+	return 0;
+}
+
+static const struct drm_plane_helper_funcs
+nv50_wndw_helper = {
+	.prepare_fb = nv50_wndw_prepare_fb,
+	.cleanup_fb = nv50_wndw_cleanup_fb,
+	.atomic_check = nv50_wndw_atomic_check,
+};
+
+static void
+nv50_wndw_atomic_destroy_state(struct drm_plane *plane,
+			       struct drm_plane_state *state)
+{
+	struct nv50_wndw_atom *asyw = nv50_wndw_atom(state);
+	__drm_atomic_helper_plane_destroy_state(&asyw->state);
+	kfree(asyw);
+}
+
+static struct drm_plane_state *
+nv50_wndw_atomic_duplicate_state(struct drm_plane *plane)
+{
+	struct nv50_wndw_atom *armw = nv50_wndw_atom(plane->state);
+	struct nv50_wndw_atom *asyw;
+	if (!(asyw = kmalloc(sizeof(*asyw), GFP_KERNEL)))
+		return NULL;
+	__drm_atomic_helper_plane_duplicate_state(plane, &asyw->state);
+	asyw->sema = armw->sema;
+	asyw->ntfy = armw->ntfy;
+	asyw->ilut = NULL;
+	asyw->xlut = armw->xlut;
+	asyw->image = armw->image;
+	asyw->point = armw->point;
+	asyw->clr.mask = 0;
+	asyw->set.mask = 0;
+	return &asyw->state;
+}
+
+static void
+nv50_wndw_reset(struct drm_plane *plane)
+{
+	struct nv50_wndw_atom *asyw;
+
+	if (WARN_ON(!(asyw = kzalloc(sizeof(*asyw), GFP_KERNEL))))
+		return;
+
+	if (plane->state)
+		plane->funcs->atomic_destroy_state(plane, plane->state);
+	plane->state = &asyw->state;
+	plane->state->plane = plane;
+	plane->state->rotation = DRM_MODE_ROTATE_0;
+}
+
+static void
+nv50_wndw_destroy(struct drm_plane *plane)
+{
+	struct nv50_wndw *wndw = nv50_wndw(plane);
+	struct nv50_wndw_ctxdma *ctxdma, *ctxtmp;
+
+	list_for_each_entry_safe(ctxdma, ctxtmp, &wndw->ctxdma.list, head) {
+		nv50_wndw_ctxdma_del(ctxdma);
+	}
+
+	nvif_notify_fini(&wndw->notify);
+	nv50_dmac_destroy(&wndw->wimm);
+	nv50_dmac_destroy(&wndw->wndw);
+
+	nv50_lut_fini(&wndw->ilut);
+
+	drm_plane_cleanup(&wndw->plane);
+	kfree(wndw);
+}
+
+const struct drm_plane_funcs
+nv50_wndw = {
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
+	.destroy = nv50_wndw_destroy,
+	.reset = nv50_wndw_reset,
+	.atomic_duplicate_state = nv50_wndw_atomic_duplicate_state,
+	.atomic_destroy_state = nv50_wndw_atomic_destroy_state,
+};
+
+static int
+nv50_wndw_notify(struct nvif_notify *notify)
+{
+	return NVIF_NOTIFY_KEEP;
+}
+
+void
+nv50_wndw_fini(struct nv50_wndw *wndw)
+{
+	nvif_notify_put(&wndw->notify);
+}
+
+void
+nv50_wndw_init(struct nv50_wndw *wndw)
+{
+	nvif_notify_get(&wndw->notify);
+}
+
+int
+nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev,
+	       enum drm_plane_type type, const char *name, int index,
+	       const u32 *format, u32 heads,
+	       enum nv50_disp_interlock_type interlock_type, u32 interlock_data,
+	       struct nv50_wndw **pwndw)
+{
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvif_mmu *mmu = &drm->client.mmu;
+	struct nv50_disp *disp = nv50_disp(dev);
+	struct nv50_wndw *wndw;
+	int nformat;
+	int ret;
+
+	if (!(wndw = *pwndw = kzalloc(sizeof(*wndw), GFP_KERNEL)))
+		return -ENOMEM;
+	wndw->func = func;
+	wndw->id = index;
+	wndw->interlock.type = interlock_type;
+	wndw->interlock.data = interlock_data;
+	wndw->ctxdma.parent = &wndw->wndw.base.user;
+
+	wndw->ctxdma.parent = &wndw->wndw.base.user;
+	INIT_LIST_HEAD(&wndw->ctxdma.list);
+
+	for (nformat = 0; format[nformat]; nformat++);
+
+	ret = drm_universal_plane_init(dev, &wndw->plane, heads, &nv50_wndw,
+				       format, nformat, NULL,
+				       type, "%s-%d", name, index);
+	if (ret) {
+		kfree(*pwndw);
+		*pwndw = NULL;
+		return ret;
+	}
+
+	drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper);
+
+	if (wndw->func->ilut) {
+		ret = nv50_lut_init(disp, mmu, &wndw->ilut);
+		if (ret)
+			return ret;
+	}
+
+	wndw->notify.func = nv50_wndw_notify;
+	return 0;
+}
+
+int
+nv50_wndw_new(struct nouveau_drm *drm, enum drm_plane_type type, int index,
+	      struct nv50_wndw **pwndw)
+{
+	struct {
+		s32 oclass;
+		int version;
+		int (*new)(struct nouveau_drm *, enum drm_plane_type,
+			   int, s32, struct nv50_wndw **);
+	} wndws[] = {
+		{ GV100_DISP_WINDOW_CHANNEL_DMA, 0, wndwc37e_new },
+		{}
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	int cid, ret;
+
+	cid = nvif_mclass(&disp->disp->object, wndws);
+	if (cid < 0) {
+		NV_ERROR(drm, "No supported window class\n");
+		return cid;
+	}
+
+	ret = wndws[cid].new(drm, type, index, wndws[cid].oclass, pwndw);
+	if (ret)
+		return ret;
+
+	return nv50_wimm_init(drm, *pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
new file mode 100644
index 0000000..b0b6428
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
@@ -0,0 +1,96 @@
+#ifndef __NV50_KMS_WNDW_H__
+#define __NV50_KMS_WNDW_H__
+#define nv50_wndw(p) container_of((p), struct nv50_wndw, plane)
+#include "disp.h"
+#include "atom.h"
+#include "lut.h"
+
+#include <nvif/notify.h>
+
+struct nv50_wndw_ctxdma {
+	struct list_head head;
+	struct nvif_object object;
+};
+
+struct nv50_wndw {
+	const struct nv50_wndw_func *func;
+	const struct nv50_wimm_func *immd;
+	int id;
+	struct nv50_disp_interlock interlock;
+
+	struct {
+		struct nvif_object *parent;
+		struct list_head list;
+	} ctxdma;
+
+	struct drm_plane plane;
+
+	struct nv50_lut ilut;
+
+	struct nv50_dmac wndw;
+	struct nv50_dmac wimm;
+
+	struct nvif_notify notify;
+	u16 ntfy;
+	u16 sema;
+	u32 data;
+};
+
+int nv50_wndw_new_(const struct nv50_wndw_func *, struct drm_device *,
+		   enum drm_plane_type, const char *name, int index,
+		   const u32 *format, enum nv50_disp_interlock_type,
+		   u32 interlock_data, u32 heads, struct nv50_wndw **);
+void nv50_wndw_init(struct nv50_wndw *);
+void nv50_wndw_fini(struct nv50_wndw *);
+void nv50_wndw_flush_set(struct nv50_wndw *, u32 *interlock,
+			 struct nv50_wndw_atom *);
+void nv50_wndw_flush_clr(struct nv50_wndw *, u32 *interlock, bool flush,
+			 struct nv50_wndw_atom *);
+void nv50_wndw_ntfy_enable(struct nv50_wndw *, struct nv50_wndw_atom *);
+int nv50_wndw_wait_armed(struct nv50_wndw *, struct nv50_wndw_atom *);
+
+struct nv50_wndw_func {
+	int (*acquire)(struct nv50_wndw *, struct nv50_wndw_atom *asyw,
+		       struct nv50_head_atom *asyh);
+	void (*release)(struct nv50_wndw *, struct nv50_wndw_atom *asyw,
+			struct nv50_head_atom *asyh);
+	void (*prepare)(struct nv50_wndw *, struct nv50_head_atom *asyh,
+			struct nv50_wndw_atom *asyw);
+
+	void (*sema_set)(struct nv50_wndw *, struct nv50_wndw_atom *);
+	void (*sema_clr)(struct nv50_wndw *);
+	void (*ntfy_reset)(struct nouveau_bo *, u32 offset);
+	void (*ntfy_set)(struct nv50_wndw *, struct nv50_wndw_atom *);
+	void (*ntfy_clr)(struct nv50_wndw *);
+	int (*ntfy_wait_begun)(struct nouveau_bo *, u32 offset,
+			       struct nvif_device *);
+	void (*ilut)(struct nv50_wndw *, struct nv50_wndw_atom *);
+	bool olut_core;
+	void (*xlut_set)(struct nv50_wndw *, struct nv50_wndw_atom *);
+	void (*xlut_clr)(struct nv50_wndw *);
+	void (*image_set)(struct nv50_wndw *, struct nv50_wndw_atom *);
+	void (*image_clr)(struct nv50_wndw *);
+	void (*scale_set)(struct nv50_wndw *, struct nv50_wndw_atom *);
+
+	void (*update)(struct nv50_wndw *, u32 *interlock);
+};
+
+extern const struct drm_plane_funcs nv50_wndw;
+
+void base507c_ntfy_reset(struct nouveau_bo *, u32);
+int base507c_ntfy_wait_begun(struct nouveau_bo *, u32, struct nvif_device *);
+
+struct nv50_wimm_func {
+	void (*point)(struct nv50_wndw *, struct nv50_wndw_atom *);
+
+	void (*update)(struct nv50_wndw *, u32 *interlock);
+};
+
+extern const struct nv50_wimm_func curs507a;
+
+int wndwc37e_new(struct nouveau_drm *, enum drm_plane_type, int, s32,
+		 struct nv50_wndw **);
+
+int nv50_wndw_new(struct nouveau_drm *, enum drm_plane_type, int index,
+		  struct nv50_wndw **);
+#endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c
new file mode 100644
index 0000000..44afb0f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "wndw.h"
+#include "atom.h"
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_plane_helper.h>
+#include <nouveau_bo.h>
+
+#include <nvif/clc37e.h>
+
+static void
+wndwc37e_ilut_clr(struct nv50_wndw *wndw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 2))) {
+		evo_mthd(push, 0x02b8, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+wndwc37e_ilut_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 4))) {
+		evo_mthd(push, 0x02b0, 3);
+		evo_data(push, asyw->xlut.i.output_mode << 8 |
+			       asyw->xlut.i.range << 4 |
+			       asyw->xlut.i.size);
+		evo_data(push, asyw->xlut.i.offset >> 8);
+		evo_data(push, asyw->xlut.handle);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+wndwc37e_ilut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	asyw->xlut.i.mode = 2;
+	asyw->xlut.i.size = 0;
+	asyw->xlut.i.range = 0;
+	asyw->xlut.i.output_mode = 1;
+}
+
+static void
+wndwc37e_image_clr(struct nv50_wndw *wndw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 4))) {
+		evo_mthd(push, 0x0308, 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x0240, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+wndwc37e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+
+	if (!(push = evo_wait(&wndw->wndw, 25)))
+		return;
+
+	evo_mthd(push, 0x0308, 1);
+	evo_data(push, asyw->image.mode << 4 | asyw->image.interval);
+	evo_mthd(push, 0x0224, 4);
+	evo_data(push, asyw->image.h << 16 | asyw->image.w);
+	evo_data(push, asyw->image.layout << 4 | asyw->image.blockh);
+	evo_data(push, asyw->image.colorspace << 8 | asyw->image.format);
+	evo_data(push, asyw->image.blocks[0] | (asyw->image.pitch[0] >> 6));
+	evo_mthd(push, 0x0240, 1);
+	evo_data(push, asyw->image.handle[0]);
+	evo_mthd(push, 0x0260, 1);
+	evo_data(push, asyw->image.offset[0] >> 8);
+	evo_mthd(push, 0x0290, 1);
+	evo_data(push, (asyw->state.src_y >> 16) << 16 |
+		       (asyw->state.src_x >> 16));
+	evo_mthd(push, 0x0298, 1);
+	evo_data(push, (asyw->state.src_h >> 16) << 16 |
+		       (asyw->state.src_w >> 16));
+	evo_mthd(push, 0x02a4, 1);
+	evo_data(push, asyw->state.crtc_h << 16 |
+		       asyw->state.crtc_w);
+
+	/*XXX: Composition-related stuff.  Need to implement properly. */
+	evo_mthd(push, 0x02ec, 1);
+	evo_data(push, (2 - (wndw->id & 1)) << 4);
+	evo_mthd(push, 0x02f4, 5);
+	evo_data(push, 0x00000011);
+	evo_data(push, 0xffff0000);
+	evo_data(push, 0xffff0000);
+	evo_data(push, 0xffff0000);
+	evo_data(push, 0xffff0000);
+	evo_kick(push, &wndw->wndw);
+}
+
+static void
+wndwc37e_ntfy_clr(struct nv50_wndw *wndw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 2))) {
+		evo_mthd(push, 0x021c, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+wndwc37e_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 3))) {
+		evo_mthd(push, 0x021c, 2);
+		evo_data(push, asyw->ntfy.handle);
+		evo_data(push, asyw->ntfy.offset | asyw->ntfy.awaken);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+wndwc37e_sema_clr(struct nv50_wndw *wndw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 2))) {
+		evo_mthd(push, 0x0218, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+wndwc37e_sema_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 5))) {
+		evo_mthd(push, 0x020c, 4);
+		evo_data(push, asyw->sema.offset);
+		evo_data(push, asyw->sema.acquire);
+		evo_data(push, asyw->sema.release);
+		evo_data(push, asyw->sema.handle);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+wndwc37e_update(struct nv50_wndw *wndw, u32 *interlock)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 5))) {
+		evo_mthd(push, 0x0370, 2);
+		evo_data(push, interlock[NV50_DISP_INTERLOCK_CURS] << 1 |
+			       interlock[NV50_DISP_INTERLOCK_CORE]);
+		evo_data(push, interlock[NV50_DISP_INTERLOCK_WNDW]);
+		evo_mthd(push, 0x0200, 1);
+		if (interlock[NV50_DISP_INTERLOCK_WIMM] & wndw->interlock.data)
+			evo_data(push, 0x00001001);
+		else
+			evo_data(push, 0x00000001);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+wndwc37e_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
+		 struct nv50_head_atom *asyh)
+{
+}
+
+static int
+wndwc37e_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
+		 struct nv50_head_atom *asyh)
+{
+	return drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state,
+						   DRM_PLANE_HELPER_NO_SCALING,
+						   DRM_PLANE_HELPER_NO_SCALING,
+						   true, true);
+}
+
+static const u32
+wndwc37e_format[] = {
+	DRM_FORMAT_C8,
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_ABGR2101010,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_XRGB2101010,
+	DRM_FORMAT_ARGB2101010,
+	0
+};
+
+static const struct nv50_wndw_func
+wndwc37e = {
+	.acquire = wndwc37e_acquire,
+	.release = wndwc37e_release,
+	.sema_set = wndwc37e_sema_set,
+	.sema_clr = wndwc37e_sema_clr,
+	.ntfy_set = wndwc37e_ntfy_set,
+	.ntfy_clr = wndwc37e_ntfy_clr,
+	.ntfy_reset = corec37d_ntfy_init,
+	.ntfy_wait_begun = base507c_ntfy_wait_begun,
+	.ilut = wndwc37e_ilut,
+	.xlut_set = wndwc37e_ilut_set,
+	.xlut_clr = wndwc37e_ilut_clr,
+	.image_set = wndwc37e_image_set,
+	.image_clr = wndwc37e_image_clr,
+	.update = wndwc37e_update,
+};
+
+static int
+wndwc37e_new_(const struct nv50_wndw_func *func, struct nouveau_drm *drm,
+	      enum drm_plane_type type, int index, s32 oclass, u32 heads,
+	      struct nv50_wndw **pwndw)
+{
+	struct nvc37e_window_channel_dma_v0 args = {
+		.pushbuf = 0xb0007e00 | index,
+		.index = index,
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	struct nv50_wndw *wndw;
+	int ret;
+
+	ret = nv50_wndw_new_(func, drm->dev, type, "wndw", index,
+			     wndwc37e_format, heads, NV50_DISP_INTERLOCK_WNDW,
+			     BIT(index), &wndw);
+	if (*pwndw = wndw, ret)
+		return ret;
+
+	ret = nv50_dmac_create(&drm->client.device, &disp->disp->object,
+			       &oclass, 0, &args, sizeof(args),
+			       disp->sync->bo.offset, &wndw->wndw);
+	if (ret) {
+		NV_ERROR(drm, "qndw%04x allocation failed: %d\n", oclass, ret);
+		return ret;
+	}
+
+	wndw->ntfy = NV50_DISP_WNDW_NTFY(wndw->id);
+	wndw->sema = NV50_DISP_WNDW_SEM0(wndw->id);
+	wndw->data = 0x00000000;
+	return 0;
+}
+
+int
+wndwc37e_new(struct nouveau_drm *drm, enum drm_plane_type type, int index,
+	     s32 oclass, struct nv50_wndw **pwndw)
+{
+	return wndwc37e_new_(&wndwc37e, drm, type, index, oclass,
+			     BIT(index >> 1), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
index 2740278..4f52331 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
@@ -31,6 +31,7 @@
 #define NV_DEVICE_INFO_V0_KEPLER                                           0x08
 #define NV_DEVICE_INFO_V0_MAXWELL                                          0x09
 #define NV_DEVICE_INFO_V0_PASCAL                                           0x0a
+#define NV_DEVICE_INFO_V0_VOLTA                                            0x0b
 	__u8  family;
 	__u8  pad06[2];
 	__u64 ram_size;
@@ -39,9 +40,55 @@
 	char  name[64];
 };
 
+struct nv_device_info_v1 {
+	__u8  version;
+	__u8  count;
+	__u8  pad02[6];
+	struct nv_device_info_v1_data {
+		__u64 mthd; /* NV_DEVICE_INFO_* (see below). */
+		__u64 data;
+	} data[];
+};
+
 struct nv_device_time_v0 {
 	__u8  version;
 	__u8  pad01[7];
 	__u64 time;
 };
+
+#define NV_DEVICE_INFO_UNIT                               (0xffffffffULL << 32)
+#define NV_DEVICE_INFO(n)                          ((n) | (0x00000000ULL << 32))
+#define NV_DEVICE_FIFO(n)                          ((n) | (0x00000001ULL << 32))
+
+/* This will be returned for unsupported queries. */
+#define NV_DEVICE_INFO_INVALID                                           ~0ULL
+
+/* These return a mask of available engines of particular type. */
+#define NV_DEVICE_INFO_ENGINE_SW                     NV_DEVICE_INFO(0x00000000)
+#define NV_DEVICE_INFO_ENGINE_GR                     NV_DEVICE_INFO(0x00000001)
+#define NV_DEVICE_INFO_ENGINE_MPEG                   NV_DEVICE_INFO(0x00000002)
+#define NV_DEVICE_INFO_ENGINE_ME                     NV_DEVICE_INFO(0x00000003)
+#define NV_DEVICE_INFO_ENGINE_CIPHER                 NV_DEVICE_INFO(0x00000004)
+#define NV_DEVICE_INFO_ENGINE_BSP                    NV_DEVICE_INFO(0x00000005)
+#define NV_DEVICE_INFO_ENGINE_VP                     NV_DEVICE_INFO(0x00000006)
+#define NV_DEVICE_INFO_ENGINE_CE                     NV_DEVICE_INFO(0x00000007)
+#define NV_DEVICE_INFO_ENGINE_SEC                    NV_DEVICE_INFO(0x00000008)
+#define NV_DEVICE_INFO_ENGINE_MSVLD                  NV_DEVICE_INFO(0x00000009)
+#define NV_DEVICE_INFO_ENGINE_MSPDEC                 NV_DEVICE_INFO(0x0000000a)
+#define NV_DEVICE_INFO_ENGINE_MSPPP                  NV_DEVICE_INFO(0x0000000b)
+#define NV_DEVICE_INFO_ENGINE_MSENC                  NV_DEVICE_INFO(0x0000000c)
+#define NV_DEVICE_INFO_ENGINE_VIC                    NV_DEVICE_INFO(0x0000000d)
+#define NV_DEVICE_INFO_ENGINE_SEC2                   NV_DEVICE_INFO(0x0000000e)
+#define NV_DEVICE_INFO_ENGINE_NVDEC                  NV_DEVICE_INFO(0x0000000f)
+#define NV_DEVICE_INFO_ENGINE_NVENC                  NV_DEVICE_INFO(0x00000010)
+
+/* Returns the number of available channels. */
+#define NV_DEVICE_FIFO_CHANNELS                      NV_DEVICE_FIFO(0x00000000)
+
+/* Returns a mask of available runlists. */
+#define NV_DEVICE_FIFO_RUNLISTS                      NV_DEVICE_FIFO(0x00000001)
+
+/* These return a mask of engines available on a particular runlist. */
+#define NV_DEVICE_FIFO_RUNLIST_ENGINES(n)     ((n) + NV_DEVICE_FIFO(0x00000010))
+#define NV_DEVICE_FIFO_RUNLIST_ENGINES__SIZE                                64
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
index 56f5bd8..fbfcffc 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
@@ -4,25 +4,11 @@
 
 struct kepler_channel_gpfifo_a_v0 {
 	__u8  version;
-	__u8  pad01[5];
+	__u8  pad01[1];
 	__u16 chid;
-#define NVA06F_V0_ENGINE_SW                                          0x00000001
-#define NVA06F_V0_ENGINE_GR                                          0x00000002
-#define NVA06F_V0_ENGINE_SEC                                         0x00000004
-#define NVA06F_V0_ENGINE_MSVLD                                       0x00000010
-#define NVA06F_V0_ENGINE_MSPDEC                                      0x00000020
-#define NVA06F_V0_ENGINE_MSPPP                                       0x00000040
-#define NVA06F_V0_ENGINE_MSENC                                       0x00000080
-#define NVA06F_V0_ENGINE_VIC                                         0x00000100
-#define NVA06F_V0_ENGINE_NVDEC                                       0x00000200
-#define NVA06F_V0_ENGINE_NVENC0                                      0x00000400
-#define NVA06F_V0_ENGINE_NVENC1                                      0x00000800
-#define NVA06F_V0_ENGINE_CE0                                         0x00010000
-#define NVA06F_V0_ENGINE_CE1                                         0x00020000
-#define NVA06F_V0_ENGINE_CE2                                         0x00040000
-	__u32 engines;
 	__u32 ilength;
 	__u64 ioffset;
+	__u64 runlist;
 	__u64 vmm;
 };
 
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index a7c5bf5..6db56bd 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -52,6 +52,8 @@
 
 #define NV04_DISP                                     /* cl0046.h */ 0x00000046
 
+#define VOLTA_USERMODE_A                                             0x0000c361
+
 #define NV03_CHANNEL_DMA                              /* cl506b.h */ 0x0000006b
 #define NV10_CHANNEL_DMA                              /* cl506b.h */ 0x0000006e
 #define NV17_CHANNEL_DMA                              /* cl506b.h */ 0x0000176e
@@ -66,6 +68,7 @@
 #define KEPLER_CHANNEL_GPFIFO_B                       /* cla06f.h */ 0x0000a16f
 #define MAXWELL_CHANNEL_GPFIFO_A                      /* cla06f.h */ 0x0000b06f
 #define PASCAL_CHANNEL_GPFIFO_A                       /* cla06f.h */ 0x0000c06f
+#define VOLTA_CHANNEL_GPFIFO_A                        /* cla06f.h */ 0x0000c36f
 
 #define NV50_DISP                                     /* cl5070.h */ 0x00005070
 #define G82_DISP                                      /* cl5070.h */ 0x00008270
@@ -79,6 +82,7 @@
 #define GM200_DISP                                    /* cl5070.h */ 0x00009570
 #define GP100_DISP                                    /* cl5070.h */ 0x00009770
 #define GP102_DISP                                    /* cl5070.h */ 0x00009870
+#define GV100_DISP                                    /* cl5070.h */ 0x0000c370
 
 #define NV31_MPEG                                                    0x00003174
 #define G82_MPEG                                                     0x00008274
@@ -90,6 +94,7 @@
 #define GT214_DISP_CURSOR                             /* cl507a.h */ 0x0000857a
 #define GF110_DISP_CURSOR                             /* cl507a.h */ 0x0000907a
 #define GK104_DISP_CURSOR                             /* cl507a.h */ 0x0000917a
+#define GV100_DISP_CURSOR                             /* cl507a.h */ 0x0000c37a
 
 #define NV50_DISP_OVERLAY                             /* cl507b.h */ 0x0000507b
 #define G82_DISP_OVERLAY                              /* cl507b.h */ 0x0000827b
@@ -97,6 +102,8 @@
 #define GF110_DISP_OVERLAY                            /* cl507b.h */ 0x0000907b
 #define GK104_DISP_OVERLAY                            /* cl507b.h */ 0x0000917b
 
+#define GV100_DISP_WINDOW_IMM_CHANNEL_DMA             /* clc37b.h */ 0x0000c37b
+
 #define NV50_DISP_BASE_CHANNEL_DMA                    /* cl507c.h */ 0x0000507c
 #define G82_DISP_BASE_CHANNEL_DMA                     /* cl507c.h */ 0x0000827c
 #define GT200_DISP_BASE_CHANNEL_DMA                   /* cl507c.h */ 0x0000837c
@@ -117,6 +124,7 @@
 #define GM200_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000957d
 #define GP100_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000977d
 #define GP102_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000987d
+#define GV100_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000c37d
 
 #define NV50_DISP_OVERLAY_CHANNEL_DMA                 /* cl507e.h */ 0x0000507e
 #define G82_DISP_OVERLAY_CHANNEL_DMA                  /* cl507e.h */ 0x0000827e
@@ -125,6 +133,8 @@
 #define GF110_DISP_OVERLAY_CONTROL_DMA                /* cl507e.h */ 0x0000907e
 #define GK104_DISP_OVERLAY_CONTROL_DMA                /* cl507e.h */ 0x0000917e
 
+#define GV100_DISP_WINDOW_CHANNEL_DMA                 /* clc37e.h */ 0x0000c37e
+
 #define NV50_TESLA                                                   0x00005097
 #define G82_TESLA                                                    0x00008297
 #define GT200_TESLA                                                  0x00008397
@@ -145,6 +155,8 @@
 #define PASCAL_A                                      /* cl9097.h */ 0x0000c097
 #define PASCAL_B                                      /* cl9097.h */ 0x0000c197
 
+#define VOLTA_A                                       /* cl9097.h */ 0x0000c397
+
 #define NV74_BSP                                                     0x000074b0
 
 #define GT212_MSVLD                                                  0x000085b1
@@ -170,6 +182,7 @@
 #define MAXWELL_DMA_COPY_A                                           0x0000b0b5
 #define PASCAL_DMA_COPY_A                                            0x0000c0b5
 #define PASCAL_DMA_COPY_B                                            0x0000c1b5
+#define VOLTA_DMA_COPY_A                                             0x0000c3b5
 
 #define FERMI_DECOMPRESS                                             0x000090b8
 
@@ -183,6 +196,7 @@
 #define MAXWELL_COMPUTE_B                                            0x0000b1c0
 #define PASCAL_COMPUTE_A                                             0x0000c0c0
 #define PASCAL_COMPUTE_B                                             0x0000c1c0
+#define VOLTA_COMPUTE_A                                              0x0000c3c0
 
 #define NV74_CIPHER                                                  0x000074c1
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/clc37b.h b/drivers/gpu/drm/nouveau/include/nvif/clc37b.h
new file mode 100644
index 0000000..89b1818
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/clc37b.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NVIF_CLC37B_H__
+#define __NVIF_CLC37B_H__
+
+struct nvc37b_window_imm_channel_dma_v0 {
+	__u8  version;
+	__u8  index;
+	__u8  pad02[6];
+	__u64 pushbuf;
+};
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/clc37e.h b/drivers/gpu/drm/nouveau/include/nvif/clc37e.h
new file mode 100644
index 0000000..899db9e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/clc37e.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NVIF_CLC37E_H__
+#define __NVIF_CLC37E_H__
+
+struct nvc37e_window_channel_dma_v0 {
+	__u8  version;
+	__u8  index;
+	__u8  pad02[6];
+	__u64 pushbuf;
+};
+
+#define NVC37E_WINDOW_CHANNEL_DMA_V0_NTFY_UEVENT                           0x00
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/device.h b/drivers/gpu/drm/nouveau/include/nvif/device.h
index 6edb626..ef839bd 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/device.h
@@ -4,10 +4,18 @@
 
 #include <nvif/object.h>
 #include <nvif/cl0080.h>
+#include <nvif/user.h>
 
 struct nvif_device {
 	struct nvif_object object;
 	struct nv_device_info_v0 info;
+
+	struct nvif_fifo_runlist {
+		u64 engines;
+	} *runlist;
+	int runlists;
+
+	struct nvif_user user;
 };
 
 int  nvif_device_init(struct nvif_object *, u32 handle, s32 oclass, void *, u32,
@@ -67,6 +75,5 @@
 #include <engine/fifo.h>
 #include <engine/gr.h>
 
-#define nvxx_fifo(a) nvxx_device(a)->fifo
 #define nvxx_gr(a) nvxx_device(a)->gr
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/disp.h b/drivers/gpu/drm/nouveau/include/nvif/disp.h
new file mode 100644
index 0000000..7c0eda3
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/disp.h
@@ -0,0 +1,12 @@
+#ifndef __NVIF_DISP_H__
+#define __NVIF_DISP_H__
+#include <nvif/object.h>
+struct nvif_device;
+
+struct nvif_disp {
+	struct nvif_object object;
+};
+
+int nvif_disp_ctor(struct nvif_device *, s32 oclass, struct nvif_disp *);
+void nvif_disp_dtor(struct nvif_disp *);
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/fifo.h b/drivers/gpu/drm/nouveau/include/nvif/fifo.h
new file mode 100644
index 0000000..e9468c9
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/fifo.h
@@ -0,0 +1,18 @@
+#ifndef __NVIF_FIFO_H__
+#define __NVIF_FIFO_H__
+#include <nvif/device.h>
+
+/* Returns mask of runlists that support a NV_DEVICE_INFO_ENGINE_* type. */
+u64 nvif_fifo_runlist(struct nvif_device *, u64 engine);
+
+/* CE-supporting runlists (excluding GRCE, if others exist). */
+static inline u64
+nvif_fifo_runlist_ce(struct nvif_device *device)
+{
+	u64 runmgr = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_GR);
+	u64 runmce = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_CE);
+	if (runmce && !(runmce &= ~runmgr))
+		runmce = runmgr;
+	return runmce;
+}
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/mem.h b/drivers/gpu/drm/nouveau/include/nvif/mem.h
index b542fe3..80ee4ab 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/mem.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/mem.h
@@ -15,4 +15,6 @@
 int nvif_mem_init(struct nvif_mmu *mmu, s32 oclass, u8 type, u8 page,
 		  u64 size, void *argv, u32 argc, struct nvif_mem *);
 void nvif_mem_fini(struct nvif_mem *);
+
+int nvif_mem_init_map(struct nvif_mmu *, u8 type, u64 size, struct nvif_mem *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/mmu.h b/drivers/gpu/drm/nouveau/include/nvif/mmu.h
index c8cd5b5..747ecf6 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/mmu.h
@@ -8,6 +8,7 @@
 	u8  heap_nr;
 	u8  type_nr;
 	u16 kind_nr;
+	s32 mem;
 
 	struct {
 		u64 size;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/object.h b/drivers/gpu/drm/nouveau/include/nvif/object.h
index a2d5244..20754d9 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/object.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/object.h
@@ -99,6 +99,22 @@
 	ret;                                                                   \
 })
 
+#define nvif_sclass(o,m,u) ({                                                  \
+	const typeof(m[0]) *_mclass = (m);                                     \
+	s32 _oclass = (u);                                                     \
+	int _cid;                                                              \
+	if (_oclass) {                                                         \
+		for (_cid = 0; _mclass[_cid].oclass; _cid++) {                 \
+			if (_mclass[_cid].oclass == _oclass)                   \
+				break;                                         \
+		}                                                              \
+		_cid = _mclass[_cid].oclass ? _cid : -ENOSYS;                  \
+	} else {                                                               \
+		_cid = nvif_mclass((o), _mclass);                              \
+	}                                                                      \
+	_cid;                                                                  \
+})
+
 /*XXX*/
 #include <core/object.h>
 #define nvxx_object(a) ({                                                      \
diff --git a/drivers/gpu/drm/nouveau/include/nvif/user.h b/drivers/gpu/drm/nouveau/include/nvif/user.h
new file mode 100644
index 0000000..03c1182
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/user.h
@@ -0,0 +1,19 @@
+#ifndef __NVIF_USER_H__
+#define __NVIF_USER_H__
+#include <nvif/object.h>
+struct nvif_device;
+
+struct nvif_user {
+	const struct nvif_user_func *func;
+	struct nvif_object object;
+};
+
+struct nvif_user_func {
+	void (*doorbell)(struct nvif_user *, u32 token);
+};
+
+int nvif_user_init(struct nvif_device *);
+void nvif_user_fini(struct nvif_device *);
+
+extern const struct nvif_user_func nvif_userc361;
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index 560265b..d83d834 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -22,6 +22,7 @@
 	NVKM_SUBDEV_LTC,
 	NVKM_SUBDEV_MMU,
 	NVKM_SUBDEV_BAR,
+	NVKM_SUBDEV_FAULT,
 	NVKM_SUBDEV_PMU,
 	NVKM_SUBDEV_VOLT,
 	NVKM_SUBDEV_ICCSENSE,
@@ -37,7 +38,10 @@
 	NVKM_ENGINE_CE3,
 	NVKM_ENGINE_CE4,
 	NVKM_ENGINE_CE5,
-	NVKM_ENGINE_CE_LAST = NVKM_ENGINE_CE5,
+	NVKM_ENGINE_CE6,
+	NVKM_ENGINE_CE7,
+	NVKM_ENGINE_CE8,
+	NVKM_ENGINE_CE_LAST = NVKM_ENGINE_CE8,
 
 	NVKM_ENGINE_CIPHER,
 	NVKM_ENGINE_DISP,
@@ -109,6 +113,7 @@
 		NV_E0    = 0xe0,
 		GM100    = 0x110,
 		GP100    = 0x130,
+		GV100    = 0x140,
 	} card_type;
 	u32 chipset;
 	u8  chiprev;
@@ -123,6 +128,7 @@
 	struct nvkm_bus *bus;
 	struct nvkm_clk *clk;
 	struct nvkm_devinit *devinit;
+	struct nvkm_fault *fault;
 	struct nvkm_fb *fb;
 	struct nvkm_fuse *fuse;
 	struct nvkm_gpio *gpio;
@@ -143,7 +149,7 @@
 	struct nvkm_volt *volt;
 
 	struct nvkm_engine *bsp;
-	struct nvkm_engine *ce[6];
+	struct nvkm_engine *ce[9];
 	struct nvkm_engine *cipher;
 	struct nvkm_disp *disp;
 	struct nvkm_dma *dma;
@@ -194,6 +200,7 @@
 	int (*bus     )(struct nvkm_device *, int idx, struct nvkm_bus **);
 	int (*clk     )(struct nvkm_device *, int idx, struct nvkm_clk **);
 	int (*devinit )(struct nvkm_device *, int idx, struct nvkm_devinit **);
+	int (*fault   )(struct nvkm_device *, int idx, struct nvkm_fault **);
 	int (*fb      )(struct nvkm_device *, int idx, struct nvkm_fb **);
 	int (*fuse    )(struct nvkm_device *, int idx, struct nvkm_fuse **);
 	int (*gpio    )(struct nvkm_device *, int idx, struct nvkm_gpio **);
@@ -214,7 +221,7 @@
 	int (*volt    )(struct nvkm_device *, int idx, struct nvkm_volt **);
 
 	int (*bsp     )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*ce[6]   )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*ce[9]   )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*cipher  )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*disp    )(struct nvkm_device *, int idx, struct nvkm_disp **);
 	int (*dma     )(struct nvkm_device *, int idx, struct nvkm_dma **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
index ebf8473..8a2be5b 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
@@ -18,6 +18,7 @@
 	void *(*dtor)(struct nvkm_engine *);
 	void (*preinit)(struct nvkm_engine *);
 	int (*oneinit)(struct nvkm_engine *);
+	int (*info)(struct nvkm_engine *, u64 mthd, u64 *data);
 	int (*init)(struct nvkm_engine *);
 	int (*fini)(struct nvkm_engine *, bool suspend);
 	void (*intr)(struct nvkm_engine *);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
index 63df229..85a0777 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
@@ -17,6 +17,7 @@
 	void *(*dtor)(struct nvkm_subdev *);
 	int (*preinit)(struct nvkm_subdev *);
 	int (*oneinit)(struct nvkm_subdev *);
+	int (*info)(struct nvkm_subdev *, u64 mthd, u64 *data);
 	int (*init)(struct nvkm_subdev *);
 	int (*fini)(struct nvkm_subdev *, bool suspend);
 	void (*intr)(struct nvkm_subdev *);
@@ -29,6 +30,7 @@
 int  nvkm_subdev_preinit(struct nvkm_subdev *);
 int  nvkm_subdev_init(struct nvkm_subdev *);
 int  nvkm_subdev_fini(struct nvkm_subdev *, bool suspend);
+int  nvkm_subdev_info(struct nvkm_subdev *, u64, u64 *);
 void nvkm_subdev_intr(struct nvkm_subdev *);
 
 /* subdev logging */
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
index 5532459..fc295e1 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
@@ -10,4 +10,5 @@
 int gm200_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gp100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gp102_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
+int gv100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
index e83193d..ef7dc08 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
@@ -35,4 +35,5 @@
 int gm200_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gp102_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
+int gv100_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h
index 0f9c1c7..f0c1b2c 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h
@@ -27,4 +27,5 @@
 int nv50_dma_new(struct nvkm_device *, int, struct nvkm_dma **);
 int gf100_dma_new(struct nvkm_device *, int, struct nvkm_dma **);
 int gf119_dma_new(struct nvkm_device *, int, struct nvkm_dma **);
+int gv100_dma_new(struct nvkm_device *, int, struct nvkm_dma **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
index c17b3a9..7e39fbe 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
@@ -4,6 +4,7 @@
 #include <core/engine.h>
 #include <core/object.h>
 #include <core/event.h>
+struct nvkm_fault_data;
 
 #define NVKM_FIFO_CHID_NR 4096
 
@@ -45,6 +46,7 @@
 	struct nvkm_event kevent; /* channel killed */
 };
 
+void nvkm_fifo_fault(struct nvkm_fifo *, struct nvkm_fault_data *);
 void nvkm_fifo_pause(struct nvkm_fifo *, unsigned long *);
 void nvkm_fifo_start(struct nvkm_fifo *, unsigned long *);
 
@@ -71,4 +73,5 @@
 int gm20b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gp100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gp10b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
+int gv100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
index fb18f10..ba1518f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
@@ -45,6 +45,8 @@
 int gm20b_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gp100_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gp102_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
+int gp104_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gp107_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gp10b_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
+int gv100_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dp.h
index df34b41..512e25a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/dp.h
@@ -1,6 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __NVBIOS_DP_H__
 #define __NVBIOS_DP_H__
+
+u16
+nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
+
 struct nvbios_dpout {
 	u16 type;
 	u16 mask;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
index 4055806..486e763 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
@@ -30,4 +30,5 @@
 int gf100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int gm107_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int gm200_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
+int gv100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h
new file mode 100644
index 0000000..5a77498
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h
@@ -0,0 +1,33 @@
+#ifndef __NVKM_FAULT_H__
+#define __NVKM_FAULT_H__
+#include <core/subdev.h>
+#include <core/notify.h>
+
+struct nvkm_fault {
+	const struct nvkm_fault_func *func;
+	struct nvkm_subdev subdev;
+
+	struct nvkm_fault_buffer *buffer[2];
+	int buffer_nr;
+
+	struct nvkm_event event;
+
+	struct nvkm_notify nrpfb;
+};
+
+struct nvkm_fault_data {
+	u64  addr;
+	u64  inst;
+	u64  time;
+	u8 engine;
+	u8  valid;
+	u8    gpc;
+	u8    hub;
+	u8 access;
+	u8 client;
+	u8 reason;
+};
+
+int gp100_fault_new(struct nvkm_device *, int, struct nvkm_fault **);
+int gv100_fault_new(struct nvkm_device *, int, struct nvkm_fault **);
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
index 92be0e5..96ccc62 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
@@ -83,6 +83,7 @@
 int gp100_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gp102_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gp10b_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
+int gv100_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 
 #include <subdev/bios.h>
 #include <subdev/bios/ramcfg.h>
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
index 95b6115..9db5f82 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
@@ -21,12 +21,14 @@
 	int zbc_max;
 	u32 zbc_color[NVKM_LTC_MAX_ZBC_CNT][4];
 	u32 zbc_depth[NVKM_LTC_MAX_ZBC_CNT];
+	u32 zbc_stencil[NVKM_LTC_MAX_ZBC_CNT];
 };
 
 void nvkm_ltc_tags_clear(struct nvkm_device *, u32 first, u32 count);
 
 int nvkm_ltc_zbc_color_get(struct nvkm_ltc *, int index, const u32[4]);
 int nvkm_ltc_zbc_depth_get(struct nvkm_ltc *, int index, const u32);
+int nvkm_ltc_zbc_stencil_get(struct nvkm_ltc *, int index, const u32);
 
 void nvkm_ltc_invalidate(struct nvkm_ltc *);
 void nvkm_ltc_flush(struct nvkm_ltc *);
@@ -37,4 +39,5 @@
 int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gm200_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gp100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+int gp102_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
index baab933..6885955 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
@@ -129,4 +129,5 @@
 int gm20b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int gp100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int gp10b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
+int gv100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index ece650a..e2211bb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -23,6 +23,7 @@
 
 #include <nvif/client.h>
 #include <nvif/driver.h>
+#include <nvif/fifo.h>
 #include <nvif/ioctl.h>
 #include <nvif/class.h>
 #include <nvif/cl0002.h>
@@ -102,6 +103,7 @@
 	case NV_DEVICE_INFO_V0_KEPLER:
 	case NV_DEVICE_INFO_V0_MAXWELL:
 	case NV_DEVICE_INFO_V0_PASCAL:
+	case NV_DEVICE_INFO_V0_VOLTA:
 		return NVIF_CLASS_SW_GF100;
 	}
 
@@ -256,6 +258,7 @@
 	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
 	struct nouveau_abi16_chan *chan;
 	struct nvif_device *device;
+	u64 engine;
 	int ret;
 
 	if (unlikely(!abi16))
@@ -268,25 +271,26 @@
 
 	/* hack to allow channel engine type specification on kepler */
 	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
-		if (init->fb_ctxdma_handle != ~0)
-			init->fb_ctxdma_handle = NVA06F_V0_ENGINE_GR;
-		else {
-			init->fb_ctxdma_handle = 0;
-#define _(A,B) if (init->tt_ctxdma_handle & (A)) init->fb_ctxdma_handle |= (B)
-			_(0x01, NVA06F_V0_ENGINE_GR);
-			_(0x02, NVA06F_V0_ENGINE_MSPDEC);
-			_(0x04, NVA06F_V0_ENGINE_MSPPP);
-			_(0x08, NVA06F_V0_ENGINE_MSVLD);
-			_(0x10, NVA06F_V0_ENGINE_CE0);
-			_(0x20, NVA06F_V0_ENGINE_CE1);
-			_(0x40, NVA06F_V0_ENGINE_MSENC);
-#undef _
+		if (init->fb_ctxdma_handle == ~0) {
+			switch (init->tt_ctxdma_handle) {
+			case 0x01: engine = NV_DEVICE_INFO_ENGINE_GR    ; break;
+			case 0x02: engine = NV_DEVICE_INFO_ENGINE_MSPDEC; break;
+			case 0x04: engine = NV_DEVICE_INFO_ENGINE_MSPPP ; break;
+			case 0x08: engine = NV_DEVICE_INFO_ENGINE_MSVLD ; break;
+			case 0x30: engine = NV_DEVICE_INFO_ENGINE_CE    ; break;
+			default:
+				return nouveau_abi16_put(abi16, -ENOSYS);
+			}
+		} else {
+			engine = NV_DEVICE_INFO_ENGINE_GR;
 		}
 
-		/* allow flips to be executed if this is a graphics channel */
+		if (engine != NV_DEVICE_INFO_ENGINE_CE)
+			engine = nvif_fifo_runlist(device, engine);
+		else
+			engine = nvif_fifo_runlist_ce(device);
+		init->fb_ctxdma_handle = engine;
 		init->tt_ctxdma_handle = 0;
-		if (init->fb_ctxdma_handle == NVA06F_V0_ENGINE_GR)
-			init->tt_ctxdma_handle = 1;
 	}
 
 	if (init->fb_ctxdma_handle == ~0 || init->tt_ctxdma_handle == ~0)
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 5ffcb66..ffb1958 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -193,7 +193,7 @@
 	return nouveau_dsm_set_discrete_state(nouveau_dsm_priv.dhandle, state);
 }
 
-static int nouveau_dsm_get_client_id(struct pci_dev *pdev)
+static enum vga_switcheroo_client_id nouveau_dsm_get_client_id(struct pci_dev *pdev)
 {
 	/* easy option one - intel vendor ID means Integrated */
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index ab61c03..7214022 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1141,6 +1141,8 @@
 			    struct ttm_mem_reg *, struct ttm_mem_reg *);
 		int (*init)(struct nouveau_channel *, u32 handle);
 	} _methods[] = {
+		{  "COPY", 4, 0xc3b5, nve0_bo_move_copy, nve0_bo_move_init },
+		{  "GRCE", 0, 0xc3b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{  "COPY", 4, 0xc1b5, nve0_bo_move_copy, nve0_bo_move_init },
 		{  "GRCE", 0, 0xc1b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{  "COPY", 4, 0xc0b5, nve0_bo_move_copy, nve0_bo_move_init },
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index af11166..92d3115 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -163,12 +163,15 @@
 			return ret;
 		}
 
+		chan->push.addr = chan->push.vma->addr;
+
+		if (device->info.family >= NV_DEVICE_INFO_V0_FERMI)
+			return 0;
+
 		args.target = NV_DMA_V0_TARGET_VM;
 		args.access = NV_DMA_V0_ACCESS_VM;
 		args.start = 0;
 		args.limit = cli->vmm.vmm.limit - 1;
-
-		chan->push.addr = chan->push.vma->addr;
 	} else
 	if (chan->push.buffer->bo.mem.mem_type == TTM_PL_VRAM) {
 		if (device->info.family == NV_DEVICE_INFO_V0_TNT) {
@@ -214,10 +217,11 @@
 
 static int
 nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
-		    u32 engine, struct nouveau_channel **pchan)
+		    u64 runlist, struct nouveau_channel **pchan)
 {
 	struct nouveau_cli *cli = (void *)device->object.client;
-	static const u16 oclasses[] = { PASCAL_CHANNEL_GPFIFO_A,
+	static const u16 oclasses[] = { VOLTA_CHANNEL_GPFIFO_A,
+					PASCAL_CHANNEL_GPFIFO_A,
 					MAXWELL_CHANNEL_GPFIFO_A,
 					KEPLER_CHANNEL_GPFIFO_B,
 					KEPLER_CHANNEL_GPFIFO_A,
@@ -245,9 +249,9 @@
 	do {
 		if (oclass[0] >= KEPLER_CHANNEL_GPFIFO_A) {
 			args.kepler.version = 0;
-			args.kepler.engines = engine;
 			args.kepler.ilength = 0x02000;
 			args.kepler.ioffset = 0x10000 + chan->push.addr;
+			args.kepler.runlist = runlist;
 			args.kepler.vmm = nvif_handle(&cli->vmm.vmm.object);
 			size = sizeof(args.kepler);
 		} else
@@ -474,3 +478,28 @@
 	cli->base.super = super;
 	return ret;
 }
+
+int
+nouveau_channels_init(struct nouveau_drm *drm)
+{
+	struct {
+		struct nv_device_info_v1 m;
+		struct {
+			struct nv_device_info_v1_data channels;
+		} v;
+	} args = {
+		.m.version = 1,
+		.m.count = sizeof(args.v) / sizeof(args.v.channels),
+		.v.channels.mthd = NV_DEVICE_FIFO_CHANNELS,
+	};
+	struct nvif_object *device = &drm->client.device.object;
+	int ret;
+
+	ret = nvif_object_mthd(device, NV_DEVICE_V0_INFO, &args, sizeof(args));
+	if (ret || args.v.channels.mthd == NV_DEVICE_INFO_INVALID)
+		return -ENODEV;
+
+	drm->chan.nr = args.v.channels.data;
+	drm->chan.context_base = dma_fence_context_alloc(drm->chan.nr);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h
index 14607c1..64454c2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -45,6 +45,7 @@
 	atomic_t killed;
 };
 
+int nouveau_channels_init(struct nouveau_drm *);
 
 int  nouveau_channel_new(struct nouveau_drm *, struct nvif_device *,
 			 u32 arg0, u32 arg1, struct nouveau_channel **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 6ed9cb0..7b557c3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -151,7 +151,7 @@
 				/* ... except prior to G80, where the code
 				 * doesn't support such things.
 				 */
-				if (disp->disp.oclass < NV50_DISP)
+				if (disp->disp.object.oclass < NV50_DISP)
 					return -EINVAL;
 				break;
 			default:
@@ -260,7 +260,7 @@
 	asyc->procamp.color_vibrance = 150;
 	asyc->procamp.vibrant_hue = 90;
 
-	if (nouveau_display(connector->dev)->disp.oclass < NV50_DISP) {
+	if (nouveau_display(connector->dev)->disp.object.oclass < NV50_DISP) {
 		switch (connector->connector_type) {
 		case DRM_MODE_CONNECTOR_LVDS:
 			/* See note in nouveau_conn_atomic_set_property(). */
@@ -314,7 +314,7 @@
 	case DRM_MODE_CONNECTOR_TV:
 		break;
 	case DRM_MODE_CONNECTOR_VGA:
-		if (disp->disp.oclass < NV50_DISP)
+		if (disp->disp.object.oclass < NV50_DISP)
 			break; /* Can only scale on DFPs. */
 		/* Fall-through. */
 	default:
@@ -1005,7 +1005,7 @@
 		return 112000;
 }
 
-static int
+static enum drm_mode_status
 nouveau_connector_mode_valid(struct drm_connector *connector,
 			     struct drm_display_mode *mode)
 {
@@ -1321,7 +1321,7 @@
 	}
 
 	/* HDMI 3D support */
-	if ((disp->disp.oclass >= G82_DISP)
+	if ((disp->disp.object.oclass >= G82_DISP)
 	    && ((type == DRM_MODE_CONNECTOR_DisplayPort)
 		|| (type == DRM_MODE_CONNECTOR_eDP)
 		|| (type == DRM_MODE_CONNECTOR_HDMIA)))
@@ -1343,7 +1343,7 @@
 	case DCB_CONNECTOR_LVDS_SPWG:
 	case DCB_CONNECTOR_eDP:
 		/* see note in nouveau_connector_set_property() */
-		if (disp->disp.oclass < NV50_DISP) {
+		if (disp->disp.object.oclass < NV50_DISP) {
 			nv_connector->scaling_mode = DRM_MODE_SCALE_FULLSCREEN;
 			break;
 		}
@@ -1366,8 +1366,8 @@
 		break;
 	}
 
-	ret = nvif_notify_init(&disp->disp, nouveau_connector_hotplug, true,
-			       NV04_DISP_NTFY_CONN,
+	ret = nvif_notify_init(&disp->disp.object, nouveau_connector_hotplug,
+			       true, NV04_DISP_NTFY_CONN,
 			       &(struct nvif_notify_conn_req_v0) {
 				.mask = NVIF_NOTIFY_CONN_V0_ANY,
 				.conn = index,
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 0097134..774b429 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -116,7 +116,7 @@
 	bool ret = false;
 
 	do {
-		ret = nvif_mthd(&disp->disp, 0, &args, sizeof(args));
+		ret = nvif_mthd(&disp->disp.object, 0, &args, sizeof(args));
 		if (ret != 0)
 			return false;
 
@@ -175,7 +175,7 @@
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-		ret = nvif_notify_init(&disp->disp,
+		ret = nvif_notify_init(&disp->disp.object,
 				       nouveau_display_vblank_handler, false,
 				       NV04_DISP_NTFY_VBLANK,
 				       &(struct nvif_notify_head_req_v0) {
@@ -338,11 +338,9 @@
 	if (c) {                                                               \
 		p = drm_property_create(dev, DRM_MODE_PROP_ENUM, n, c);        \
 		l = (list);                                                    \
-		c = 0;                                                         \
 		while (p && l->gen_mask) {                                     \
 			if (l->gen_mask & (1 << (gen))) {                      \
-				drm_property_add_enum(p, c, l->type, l->name); \
-				c++;                                           \
+				drm_property_add_enum(p, l->type, l->name);    \
 			}                                                      \
 			l++;                                                   \
 		}                                                              \
@@ -456,10 +454,10 @@
 	struct nouveau_display *disp = nouveau_display(dev);
 	int gen;
 
-	if (disp->disp.oclass < NV50_DISP)
+	if (disp->disp.object.oclass < NV50_DISP)
 		gen = 0;
 	else
-	if (disp->disp.oclass < GF110_DISP)
+	if (disp->disp.object.oclass < GF110_DISP)
 		gen = 1;
 	else
 		gen = 2;
@@ -535,31 +533,10 @@
 	drm_kms_helper_poll_disable(dev);
 
 	if (nouveau_modeset != 2 && drm->vbios.dcb.entries) {
-		static const u16 oclass[] = {
-			GP102_DISP,
-			GP100_DISP,
-			GM200_DISP,
-			GM107_DISP,
-			GK110_DISP,
-			GK104_DISP,
-			GF110_DISP,
-			GT214_DISP,
-			GT206_DISP,
-			GT200_DISP,
-			G82_DISP,
-			NV50_DISP,
-			NV04_DISP,
-		};
-		int i;
-
-		for (i = 0, ret = -ENODEV; ret && i < ARRAY_SIZE(oclass); i++) {
-			ret = nvif_object_init(&drm->client.device.object, 0,
-					       oclass[i], NULL, 0, &disp->disp);
-		}
-
+		ret = nvif_disp_ctor(&drm->client.device, 0, &disp->disp);
 		if (ret == 0) {
 			nouveau_display_create_properties(dev);
-			if (disp->disp.oclass < NV50_DISP)
+			if (disp->disp.object.oclass < NV50_DISP)
 				ret = nv04_display_create(dev);
 			else
 				ret = nv50_display_create(dev);
@@ -613,7 +590,7 @@
 	if (disp->dtor)
 		disp->dtor(dev);
 
-	nvif_object_fini(&disp->disp);
+	nvif_disp_dtor(&disp->disp);
 
 	nouveau_drm(dev)->display = NULL;
 	kfree(disp);
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h
index 270ba56..54aa7c3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.h
+++ b/drivers/gpu/drm/nouveau/nouveau_display.h
@@ -2,6 +2,7 @@
 #ifndef __NOUVEAU_DISPLAY_H__
 #define __NOUVEAU_DISPLAY_H__
 #include "nouveau_drv.h"
+#include <nvif/disp.h>
 
 struct nouveau_framebuffer {
 	struct drm_framebuffer base;
@@ -38,7 +39,7 @@
 	int  (*init)(struct drm_device *);
 	void (*fini)(struct drm_device *);
 
-	struct nvif_object disp;
+	struct nvif_disp disp;
 
 	struct drm_property *dithering_mode;
 	struct drm_property *dithering_depth;
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 10e84f6..945afd3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -28,6 +28,8 @@
 #include "nouveau_dma.h"
 #include "nouveau_vmm.h"
 
+#include <nvif/user.h>
+
 void
 OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords)
 {
@@ -80,18 +82,11 @@
 }
 
 void
-nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo,
-	      int delta, int length)
+nv50_dma_push(struct nouveau_channel *chan, u64 offset, int length)
 {
-	struct nouveau_cli *cli = (void *)chan->user.client;
+	struct nvif_user *user = &chan->drm->client.device.user;
 	struct nouveau_bo *pb = chan->push.buffer;
-	struct nouveau_vma *vma;
 	int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
-	u64 offset;
-
-	vma = nouveau_vma_find(bo, &cli->vmm);
-	BUG_ON(!vma);
-	offset = vma->addr + delta;
 
 	BUG_ON(chan->dma.ib_free < 1);
 
@@ -105,6 +100,8 @@
 	nouveau_bo_rd32(pb, 0);
 
 	nvif_wr32(&chan->user, 0x8c, chan->dma.ib_put);
+	if (user->func && user->func->doorbell)
+		user->func->doorbell(user, chan->chid);
 	chan->dma.ib_free--;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h
index 74e10b1..fc5e3f4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.h
@@ -31,8 +31,7 @@
 #include "nouveau_chan.h"
 
 int nouveau_dma_wait(struct nouveau_channel *, int slots, int size);
-void nv50_dma_push(struct nouveau_channel *, struct nouveau_bo *,
-		   int delta, int length);
+void nv50_dma_push(struct nouveau_channel *, u64 addr, int length);
 
 /*
  * There's a hw race condition where you can't jump to your PUT offset,
@@ -55,7 +54,6 @@
 
 	NvSub2D		= 3, /* DO NOT CHANGE - hardcoded for kepler gr fifo */
 	NvSubCopy	= 4, /* DO NOT CHANGE - hardcoded for kepler gr fifo */
-	FermiSw		= 5, /* DO NOT CHANGE (well.. 6/7 will work...) */
 };
 
 /* Object handles - for stuff that's doesn't use handle == oclass. */
@@ -151,7 +149,7 @@
 	chan->accel_done = true;
 
 	if (chan->dma.ib_max) {
-		nv50_dma_push(chan, chan->push.buffer, chan->dma.put << 2,
+		nv50_dma_push(chan, chan->push.addr + (chan->dma.put << 2),
 			      (chan->dma.cur - chan->dma.put) << 2);
 	} else {
 		WRITE_PUT(chan->dma.cur);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index bbbf353..775443c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -38,6 +38,8 @@
 #include <core/tegra.h>
 
 #include <nvif/driver.h>
+#include <nvif/fifo.h>
+#include <nvif/user.h>
 
 #include <nvif/class.h>
 #include <nvif/cl0002.h>
@@ -112,24 +114,22 @@
 }
 
 static inline bool
-nouveau_cli_work_ready(struct dma_fence *fence, bool wait)
+nouveau_cli_work_ready(struct dma_fence *fence)
 {
-	if (!dma_fence_is_signaled(fence)) {
-		if (!wait)
-			return false;
-		WARN_ON(dma_fence_wait_timeout(fence, false, 2 * HZ) <= 0);
-	}
+	if (!dma_fence_is_signaled(fence))
+		return false;
 	dma_fence_put(fence);
 	return true;
 }
 
 static void
-nouveau_cli_work_flush(struct nouveau_cli *cli, bool wait)
+nouveau_cli_work(struct work_struct *w)
 {
+	struct nouveau_cli *cli = container_of(w, typeof(*cli), work);
 	struct nouveau_cli_work *work, *wtmp;
 	mutex_lock(&cli->lock);
 	list_for_each_entry_safe(work, wtmp, &cli->worker, head) {
-		if (!work->fence || nouveau_cli_work_ready(work->fence, wait)) {
+		if (!work->fence || nouveau_cli_work_ready(work->fence)) {
 			list_del(&work->head);
 			work->func(work);
 		}
@@ -158,16 +158,16 @@
 }
 
 static void
-nouveau_cli_work(struct work_struct *w)
-{
-	struct nouveau_cli *cli = container_of(w, typeof(*cli), work);
-	nouveau_cli_work_flush(cli, false);
-}
-
-static void
 nouveau_cli_fini(struct nouveau_cli *cli)
 {
-	nouveau_cli_work_flush(cli, true);
+	/* All our channels are dead now, which means all the fences they
+	 * own are signalled, and all callback functions have been called.
+	 *
+	 * So, after flushing the workqueue, there should be nothing left.
+	 */
+	flush_work(&cli->work);
+	WARN_ON(!list_empty(&cli->worker));
+
 	usif_client_fini(cli);
 	nouveau_vmm_fini(&cli->vmm);
 	nvif_mmu_fini(&cli->mmu);
@@ -307,6 +307,16 @@
 	if (nouveau_noaccel)
 		return;
 
+	ret = nouveau_channels_init(drm);
+	if (ret)
+		return;
+
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_VOLTA) {
+		ret = nvif_user_init(device);
+		if (ret)
+			return;
+	}
+
 	/* initialise synchronisation routines */
 	/*XXX: this is crap, but the fence/channel stuff is a little
 	 *     backwards in some places.  this will be fixed.
@@ -338,6 +348,7 @@
 		case KEPLER_CHANNEL_GPFIFO_B:
 		case MAXWELL_CHANNEL_GPFIFO_A:
 		case PASCAL_CHANNEL_GPFIFO_A:
+		case VOLTA_CHANNEL_GPFIFO_A:
 			ret = nvc0_fence_create(drm);
 			break;
 		default:
@@ -354,13 +365,12 @@
 
 	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
 		ret = nouveau_channel_new(drm, &drm->client.device,
-					  NVA06F_V0_ENGINE_CE0 |
-					  NVA06F_V0_ENGINE_CE1,
-					  0, &drm->cechan);
+					  nvif_fifo_runlist_ce(device), 0,
+					  &drm->cechan);
 		if (ret)
 			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
 
-		arg0 = NVA06F_V0_ENGINE_GR;
+		arg0 = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_GR);
 		arg1 = 1;
 	} else
 	if (device->info.chipset >= 0xa3 &&
@@ -386,38 +396,36 @@
 		return;
 	}
 
-	ret = nvif_object_init(&drm->channel->user, NVDRM_NVSW,
-			       nouveau_abi16_swclass(drm), NULL, 0, &drm->nvsw);
-	if (ret == 0) {
-		ret = RING_SPACE(drm->channel, 2);
+	if (device->info.family < NV_DEVICE_INFO_V0_TESLA) {
+		ret = nvif_object_init(&drm->channel->user, NVDRM_NVSW,
+				       nouveau_abi16_swclass(drm), NULL, 0,
+				       &drm->nvsw);
 		if (ret == 0) {
-			if (device->info.family < NV_DEVICE_INFO_V0_FERMI) {
+			ret = RING_SPACE(drm->channel, 2);
+			if (ret == 0) {
 				BEGIN_NV04(drm->channel, NvSubSw, 0, 1);
-				OUT_RING  (drm->channel, NVDRM_NVSW);
-			} else
-			if (device->info.family < NV_DEVICE_INFO_V0_KEPLER) {
-				BEGIN_NVC0(drm->channel, FermiSw, 0, 1);
-				OUT_RING  (drm->channel, 0x001f0000);
+				OUT_RING  (drm->channel, drm->nvsw.handle);
+			}
+
+			ret = nvif_notify_init(&drm->nvsw,
+					       nouveau_flip_complete,
+					       false, NV04_NVSW_NTFY_UEVENT,
+					       NULL, 0, 0, &drm->flip);
+			if (ret == 0)
+				ret = nvif_notify_get(&drm->flip);
+			if (ret) {
+				nouveau_accel_fini(drm);
+				return;
 			}
 		}
 
-		ret = nvif_notify_init(&drm->nvsw, nouveau_flip_complete,
-				       false, NV04_NVSW_NTFY_UEVENT,
-				       NULL, 0, 0, &drm->flip);
-		if (ret == 0)
-			ret = nvif_notify_get(&drm->flip);
 		if (ret) {
+			NV_ERROR(drm, "failed to allocate sw class, %d\n", ret);
 			nouveau_accel_fini(drm);
 			return;
 		}
 	}
 
-	if (ret) {
-		NV_ERROR(drm, "failed to allocate software object, %d\n", ret);
-		nouveau_accel_fini(drm);
-		return;
-	}
-
 	if (device->info.family < NV_DEVICE_INFO_V0_FERMI) {
 		ret = nvkm_gpuobj_new(nvxx_device(&drm->client.device), 32, 0,
 				      false, NULL, &drm->notify);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 881b44b..6e1acae 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -170,6 +170,12 @@
 	/* synchronisation */
 	void *fence;
 
+	/* Global channel management. */
+	struct {
+		int nr;
+		u64 context_base;
+	} chan;
+
 	/* context for accelerated drm-internal operations */
 	struct nouveau_channel *cechan;
 	struct nouveau_channel *channel;
diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h
index e28d966..3517f92 100644
--- a/drivers/gpu/drm/nouveau/nouveau_encoder.h
+++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h
@@ -32,6 +32,7 @@
 #include <drm/drm_encoder_slave.h>
 #include <drm/drm_dp_mst_helper.h>
 #include "dispnv04/disp.h"
+struct nv50_head_atom;
 
 #define NV_DPMS_CLEARED 0x80
 
@@ -68,7 +69,7 @@
 	void (*enc_save)(struct drm_encoder *encoder);
 	void (*enc_restore)(struct drm_encoder *encoder);
 	void (*update)(struct nouveau_encoder *, u8 head,
-		       struct drm_display_mode *, u8 proto, u8 depth);
+		       struct nv50_head_atom *, u8 proto, u8 depth);
 };
 
 struct nouveau_encoder *
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 503fa94..412d49b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -74,15 +74,14 @@
 }
 
 static struct nouveau_fence *
-nouveau_local_fence(struct dma_fence *fence, struct nouveau_drm *drm) {
-	struct nouveau_fence_priv *priv = (void*)drm->fence;
-
+nouveau_local_fence(struct dma_fence *fence, struct nouveau_drm *drm)
+{
 	if (fence->ops != &nouveau_fence_ops_legacy &&
 	    fence->ops != &nouveau_fence_ops_uevent)
 		return NULL;
 
-	if (fence->context < priv->context_base ||
-	    fence->context >= priv->context_base + priv->contexts)
+	if (fence->context < drm->chan.context_base ||
+	    fence->context >= drm->chan.context_base + drm->chan.nr)
 		return NULL;
 
 	return from_fence(fence);
@@ -176,7 +175,7 @@
 	INIT_LIST_HEAD(&fctx->flip);
 	INIT_LIST_HEAD(&fctx->pending);
 	spin_lock_init(&fctx->lock);
-	fctx->context = priv->context_base + chan->chid;
+	fctx->context = chan->drm->chan.context_base + chan->chid;
 
 	if (chan == chan->drm->cechan)
 		strcpy(fctx->name, "copy engine channel");
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 5bd8d30..b999e60 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -55,8 +55,6 @@
 	int  (*context_new)(struct nouveau_channel *);
 	void (*context_del)(struct nouveau_channel *);
 
-	u32 contexts;
-	u64 context_base;
 	bool uevent;
 };
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index e72a7e3..300daee 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -99,6 +99,7 @@
 static void
 nouveau_gem_object_delete(struct nouveau_vma *vma)
 {
+	nouveau_fence_unref(&vma->fence);
 	nouveau_vma_del(&vma);
 }
 
@@ -114,25 +115,12 @@
 static void
 nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nouveau_vma *vma)
 {
-	const bool mapped = nvbo->bo.mem.mem_type != TTM_PL_SYSTEM;
-	struct reservation_object *resv = nvbo->bo.resv;
-	struct reservation_object_list *fobj;
+	struct dma_fence *fence = vma->fence ? &vma->fence->base : NULL;
 	struct nouveau_gem_object_unmap *work;
-	struct dma_fence *fence = NULL;
-
-	fobj = reservation_object_get_list(resv);
 
 	list_del_init(&vma->head);
 
-	if (fobj && fobj->shared_count > 1)
-		ttm_bo_wait(&nvbo->bo, false, false);
-	else if (fobj && fobj->shared_count == 1)
-		fence = rcu_dereference_protected(fobj->shared[0],
-						reservation_object_held(resv));
-	else
-		fence = reservation_object_get_excl(nvbo->bo.resv);
-
-	if (!fence || !mapped) {
+	if (!fence) {
 		nouveau_gem_object_delete(vma);
 		return;
 	}
@@ -344,9 +332,20 @@
 		nvbo = list_entry(op->list.next, struct nouveau_bo, entry);
 		b = &pbbo[nvbo->pbbo_index];
 
-		if (likely(fence))
+		if (likely(fence)) {
+			struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
+			struct nouveau_vma *vma;
+
 			nouveau_bo_fence(nvbo, fence, !!b->write_domains);
 
+			if (drm->client.vmm.vmm.object.oclass >= NVIF_CLASS_VMM_NV50) {
+				vma = (void *)(unsigned long)b->user_priv;
+				nouveau_fence_unref(&vma->fence);
+				dma_fence_get(&fence->base);
+				vma->fence = fence;
+			}
+		}
+
 		if (unlikely(nvbo->validate_mapped)) {
 			ttm_bo_kunmap(&nvbo->kmap);
 			nvbo->validate_mapped = false;
@@ -432,7 +431,20 @@
 			}
 		}
 
-		b->user_priv = (uint64_t)(unsigned long)nvbo;
+		if (cli->vmm.vmm.object.oclass >= NVIF_CLASS_VMM_NV50) {
+			struct nouveau_vmm *vmm = &cli->vmm;
+			struct nouveau_vma *vma = nouveau_vma_find(nvbo, vmm);
+			if (!vma) {
+				NV_PRINTK(err, cli, "vma not found!\n");
+				ret = -EINVAL;
+				break;
+			}
+
+			b->user_priv = (uint64_t)(unsigned long)vma;
+		} else {
+			b->user_priv = (uint64_t)(unsigned long)nvbo;
+		}
+
 		nvbo->reserved_by = file_priv;
 		nvbo->pbbo_index = i;
 		if ((b->valid_domains & NOUVEAU_GEM_DOMAIN_VRAM) &&
@@ -763,10 +775,10 @@
 		}
 
 		for (i = 0; i < req->nr_push; i++) {
-			struct nouveau_bo *nvbo = (void *)(unsigned long)
+			struct nouveau_vma *vma = (void *)(unsigned long)
 				bo[push[i].bo_index].user_priv;
 
-			nv50_dma_push(chan, nvbo, push[i].offset,
+			nv50_dma_push(chan, vma->addr + push[i].offset,
 				      push[i].length);
 		}
 	} else
diff --git a/drivers/gpu/drm/nouveau/nouveau_hwmon.c b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
index 7c96564..44178b4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hwmon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
@@ -327,7 +327,7 @@
 	struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data);
 	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
-	if (therm && therm->attr_get && nvkm_therm_temp_get(therm) < 0)
+	if (!therm || !therm->attr_get || nvkm_therm_temp_get(therm) < 0)
 		return 0;
 
 	switch (attr) {
@@ -351,8 +351,8 @@
 	struct nouveau_drm *drm = nouveau_drm((struct drm_device *)data);
 	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
-	if (therm && therm->attr_get && therm->fan_get &&
-				therm->fan_get(therm) < 0)
+	if (!therm || !therm->attr_get || !therm->fan_get ||
+	    therm->fan_get(therm) < 0)
 		return 0;
 
 	switch (attr) {
@@ -707,13 +707,20 @@
 {
 #if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
 	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
 	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
 	const struct attribute_group *special_groups[N_ATTR_GROUPS];
 	struct nouveau_hwmon *hwmon;
 	struct device *hwmon_dev;
 	int ret = 0;
 	int i = 0;
 
+	if (!iccsense && !therm && !volt) {
+		NV_DEBUG(drm, "Skipping hwmon registration\n");
+		return 0;
+	}
+
 	hwmon = drm->hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
 	if (!hwmon)
 		return -ENOMEM;
@@ -749,6 +756,9 @@
 #if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
 	struct nouveau_hwmon *hwmon = nouveau_hwmon(dev);
 
+	if (!hwmon)
+		return;
+
 	if (hwmon->hwmon)
 		hwmon_device_unregister(hwmon->hwmon);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c
index f5371d9..2032c3e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c
@@ -92,6 +92,7 @@
 	vma->refs = 1;
 	vma->addr = ~0ULL;
 	vma->mem = NULL;
+	vma->fence = NULL;
 	list_add_tail(&vma->head, &nvbo->vma_list);
 
 	if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM &&
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.h b/drivers/gpu/drm/nouveau/nouveau_vmm.h
index 5c31f43..7e3b118 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.h
@@ -11,6 +11,8 @@
 	u64 addr;
 
 	struct nouveau_mem *mem;
+
+	struct nouveau_fence *fence;
 };
 
 struct nouveau_vma *nouveau_vma_find(struct nouveau_bo *, struct nouveau_vmm *);
diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c
index fa8f237..c41e82b 100644
--- a/drivers/gpu/drm/nouveau/nv04_fence.c
+++ b/drivers/gpu/drm/nouveau/nv04_fence.c
@@ -109,7 +109,5 @@
 	priv->base.dtor = nv04_fence_destroy;
 	priv->base.context_new = nv04_fence_context_new;
 	priv->base.context_del = nv04_fence_context_del;
-	priv->base.contexts = 15;
-	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv10_fence.c b/drivers/gpu/drm/nouveau/nv10_fence.c
index 2998bde..4476b71 100644
--- a/drivers/gpu/drm/nouveau/nv10_fence.c
+++ b/drivers/gpu/drm/nouveau/nv10_fence.c
@@ -103,8 +103,6 @@
 	priv->base.dtor = nv10_fence_destroy;
 	priv->base.context_new = nv10_fence_context_new;
 	priv->base.context_del = nv10_fence_context_del;
-	priv->base.contexts = 31;
-	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv17_fence.c b/drivers/gpu/drm/nouveau/nv17_fence.c
index 6477b70..5d613d4 100644
--- a/drivers/gpu/drm/nouveau/nv17_fence.c
+++ b/drivers/gpu/drm/nouveau/nv17_fence.c
@@ -125,8 +125,6 @@
 	priv->base.resume = nv17_fence_resume;
 	priv->base.context_new = nv17_fence_context_new;
 	priv->base.context_del = nv10_fence_context_del;
-	priv->base.contexts = 31;
-	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 
 	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
deleted file mode 100644
index 2b3ccd8..0000000
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ /dev/null
@@ -1,4558 +0,0 @@
-/*
- * Copyright 2011 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/hdmi.h>
-
-#include <drm/drmP.h>
-#include <drm/drm_atomic.h>
-#include <drm/drm_atomic_helper.h>
-#include <drm/drm_crtc_helper.h>
-#include <drm/drm_dp_helper.h>
-#include <drm/drm_fb_helper.h>
-#include <drm/drm_plane_helper.h>
-#include <drm/drm_edid.h>
-
-#include <nvif/class.h>
-#include <nvif/cl0002.h>
-#include <nvif/cl5070.h>
-#include <nvif/cl507a.h>
-#include <nvif/cl507b.h>
-#include <nvif/cl507c.h>
-#include <nvif/cl507d.h>
-#include <nvif/cl507e.h>
-#include <nvif/event.h>
-
-#include "nouveau_drv.h"
-#include "nouveau_dma.h"
-#include "nouveau_gem.h"
-#include "nouveau_connector.h"
-#include "nouveau_encoder.h"
-#include "nouveau_crtc.h"
-#include "nouveau_fence.h"
-#include "nouveau_fbcon.h"
-#include "nv50_display.h"
-
-#define EVO_DMA_NR 9
-
-#define EVO_MASTER  (0x00)
-#define EVO_FLIP(c) (0x01 + (c))
-#define EVO_OVLY(c) (0x05 + (c))
-#define EVO_OIMM(c) (0x09 + (c))
-#define EVO_CURS(c) (0x0d + (c))
-
-/* offsets in shared sync bo of various structures */
-#define EVO_SYNC(c, o) ((c) * 0x0100 + (o))
-#define EVO_MAST_NTFY     EVO_SYNC(      0, 0x00)
-#define EVO_FLIP_SEM0(c)  EVO_SYNC((c) + 1, 0x00)
-#define EVO_FLIP_SEM1(c)  EVO_SYNC((c) + 1, 0x10)
-#define EVO_FLIP_NTFY0(c) EVO_SYNC((c) + 1, 0x20)
-#define EVO_FLIP_NTFY1(c) EVO_SYNC((c) + 1, 0x30)
-
-/******************************************************************************
- * Atomic state
- *****************************************************************************/
-#define nv50_atom(p) container_of((p), struct nv50_atom, state)
-
-struct nv50_atom {
-	struct drm_atomic_state state;
-
-	struct list_head outp;
-	bool lock_core;
-	bool flush_disable;
-};
-
-struct nv50_outp_atom {
-	struct list_head head;
-
-	struct drm_encoder *encoder;
-	bool flush_disable;
-
-	union {
-		struct {
-			bool ctrl:1;
-		};
-		u8 mask;
-	} clr;
-
-	union {
-		struct {
-			bool ctrl:1;
-		};
-		u8 mask;
-	} set;
-};
-
-#define nv50_head_atom(p) container_of((p), struct nv50_head_atom, state)
-
-struct nv50_head_atom {
-	struct drm_crtc_state state;
-
-	struct {
-		u16 iW;
-		u16 iH;
-		u16 oW;
-		u16 oH;
-	} view;
-
-	struct nv50_head_mode {
-		bool interlace;
-		u32 clock;
-		struct {
-			u16 active;
-			u16 synce;
-			u16 blanke;
-			u16 blanks;
-		} h;
-		struct {
-			u32 active;
-			u16 synce;
-			u16 blanke;
-			u16 blanks;
-			u16 blank2s;
-			u16 blank2e;
-			u16 blankus;
-		} v;
-	} mode;
-
-	struct {
-		bool visible;
-		u32 handle;
-		u64 offset:40;
-		u8  mode:4;
-	} lut;
-
-	struct {
-		bool visible;
-		u32 handle;
-		u64 offset:40;
-		u8  format;
-		u8  kind:7;
-		u8  layout:1;
-		u8  block:4;
-		u32 pitch:20;
-		u16 x;
-		u16 y;
-		u16 w;
-		u16 h;
-	} core;
-
-	struct {
-		bool visible;
-		u32 handle;
-		u64 offset:40;
-		u8  layout:1;
-		u8  format:1;
-	} curs;
-
-	struct {
-		u8  depth;
-		u8  cpp;
-		u16 x;
-		u16 y;
-		u16 w;
-		u16 h;
-	} base;
-
-	struct {
-		u8 cpp;
-	} ovly;
-
-	struct {
-		bool enable:1;
-		u8 bits:2;
-		u8 mode:4;
-	} dither;
-
-	struct {
-		struct {
-			u16 cos:12;
-			u16 sin:12;
-		} sat;
-	} procamp;
-
-	union {
-		struct {
-			bool ilut:1;
-			bool core:1;
-			bool curs:1;
-		};
-		u8 mask;
-	} clr;
-
-	union {
-		struct {
-			bool ilut:1;
-			bool core:1;
-			bool curs:1;
-			bool view:1;
-			bool mode:1;
-			bool base:1;
-			bool ovly:1;
-			bool dither:1;
-			bool procamp:1;
-		};
-		u16 mask;
-	} set;
-};
-
-static inline struct nv50_head_atom *
-nv50_head_atom_get(struct drm_atomic_state *state, struct drm_crtc *crtc)
-{
-	struct drm_crtc_state *statec = drm_atomic_get_crtc_state(state, crtc);
-	if (IS_ERR(statec))
-		return (void *)statec;
-	return nv50_head_atom(statec);
-}
-
-#define nv50_wndw_atom(p) container_of((p), struct nv50_wndw_atom, state)
-
-struct nv50_wndw_atom {
-	struct drm_plane_state state;
-	u8 interval;
-
-	struct {
-		u32  handle;
-		u16  offset:12;
-		bool awaken:1;
-	} ntfy;
-
-	struct {
-		u32 handle;
-		u16 offset:12;
-		u32 acquire;
-		u32 release;
-	} sema;
-
-	struct {
-		u8 enable:2;
-	} lut;
-
-	struct {
-		u8  mode:2;
-		u8  interval:4;
-
-		u8  format;
-		u8  kind:7;
-		u8  layout:1;
-		u8  block:4;
-		u32 pitch:20;
-		u16 w;
-		u16 h;
-
-		u32 handle;
-		u64 offset;
-	} image;
-
-	struct {
-		u16 x;
-		u16 y;
-	} point;
-
-	union {
-		struct {
-			bool ntfy:1;
-			bool sema:1;
-			bool image:1;
-		};
-		u8 mask;
-	} clr;
-
-	union {
-		struct {
-			bool ntfy:1;
-			bool sema:1;
-			bool image:1;
-			bool lut:1;
-			bool point:1;
-		};
-		u8 mask;
-	} set;
-};
-
-/******************************************************************************
- * EVO channel
- *****************************************************************************/
-
-struct nv50_chan {
-	struct nvif_object user;
-	struct nvif_device *device;
-};
-
-static int
-nv50_chan_create(struct nvif_device *device, struct nvif_object *disp,
-		 const s32 *oclass, u8 head, void *data, u32 size,
-		 struct nv50_chan *chan)
-{
-	struct nvif_sclass *sclass;
-	int ret, i, n;
-
-	chan->device = device;
-
-	ret = n = nvif_object_sclass_get(disp, &sclass);
-	if (ret < 0)
-		return ret;
-
-	while (oclass[0]) {
-		for (i = 0; i < n; i++) {
-			if (sclass[i].oclass == oclass[0]) {
-				ret = nvif_object_init(disp, 0, oclass[0],
-						       data, size, &chan->user);
-				if (ret == 0)
-					nvif_object_map(&chan->user, NULL, 0);
-				nvif_object_sclass_put(&sclass);
-				return ret;
-			}
-		}
-		oclass++;
-	}
-
-	nvif_object_sclass_put(&sclass);
-	return -ENOSYS;
-}
-
-static void
-nv50_chan_destroy(struct nv50_chan *chan)
-{
-	nvif_object_fini(&chan->user);
-}
-
-/******************************************************************************
- * PIO EVO channel
- *****************************************************************************/
-
-struct nv50_pioc {
-	struct nv50_chan base;
-};
-
-static void
-nv50_pioc_destroy(struct nv50_pioc *pioc)
-{
-	nv50_chan_destroy(&pioc->base);
-}
-
-static int
-nv50_pioc_create(struct nvif_device *device, struct nvif_object *disp,
-		 const s32 *oclass, u8 head, void *data, u32 size,
-		 struct nv50_pioc *pioc)
-{
-	return nv50_chan_create(device, disp, oclass, head, data, size,
-				&pioc->base);
-}
-
-/******************************************************************************
- * Overlay Immediate
- *****************************************************************************/
-
-struct nv50_oimm {
-	struct nv50_pioc base;
-};
-
-static int
-nv50_oimm_create(struct nvif_device *device, struct nvif_object *disp,
-		 int head, struct nv50_oimm *oimm)
-{
-	struct nv50_disp_cursor_v0 args = {
-		.head = head,
-	};
-	static const s32 oclass[] = {
-		GK104_DISP_OVERLAY,
-		GF110_DISP_OVERLAY,
-		GT214_DISP_OVERLAY,
-		G82_DISP_OVERLAY,
-		NV50_DISP_OVERLAY,
-		0
-	};
-
-	return nv50_pioc_create(device, disp, oclass, head, &args, sizeof(args),
-				&oimm->base);
-}
-
-/******************************************************************************
- * DMA EVO channel
- *****************************************************************************/
-
-struct nv50_dmac_ctxdma {
-	struct list_head head;
-	struct nvif_object object;
-};
-
-struct nv50_dmac {
-	struct nv50_chan base;
-	dma_addr_t handle;
-	u32 *ptr;
-
-	struct nvif_object sync;
-	struct nvif_object vram;
-	struct list_head ctxdma;
-
-	/* Protects against concurrent pushbuf access to this channel, lock is
-	 * grabbed by evo_wait (if the pushbuf reservation is successful) and
-	 * dropped again by evo_kick. */
-	struct mutex lock;
-};
-
-static void
-nv50_dmac_ctxdma_del(struct nv50_dmac_ctxdma *ctxdma)
-{
-	nvif_object_fini(&ctxdma->object);
-	list_del(&ctxdma->head);
-	kfree(ctxdma);
-}
-
-static struct nv50_dmac_ctxdma *
-nv50_dmac_ctxdma_new(struct nv50_dmac *dmac, struct nouveau_framebuffer *fb)
-{
-	struct nouveau_drm *drm = nouveau_drm(fb->base.dev);
-	struct nv50_dmac_ctxdma *ctxdma;
-	const u8    kind = fb->nvbo->kind;
-	const u32 handle = 0xfb000000 | kind;
-	struct {
-		struct nv_dma_v0 base;
-		union {
-			struct nv50_dma_v0 nv50;
-			struct gf100_dma_v0 gf100;
-			struct gf119_dma_v0 gf119;
-		};
-	} args = {};
-	u32 argc = sizeof(args.base);
-	int ret;
-
-	list_for_each_entry(ctxdma, &dmac->ctxdma, head) {
-		if (ctxdma->object.handle == handle)
-			return ctxdma;
-	}
-
-	if (!(ctxdma = kzalloc(sizeof(*ctxdma), GFP_KERNEL)))
-		return ERR_PTR(-ENOMEM);
-	list_add(&ctxdma->head, &dmac->ctxdma);
-
-	args.base.target = NV_DMA_V0_TARGET_VRAM;
-	args.base.access = NV_DMA_V0_ACCESS_RDWR;
-	args.base.start  = 0;
-	args.base.limit  = drm->client.device.info.ram_user - 1;
-
-	if (drm->client.device.info.chipset < 0x80) {
-		args.nv50.part = NV50_DMA_V0_PART_256;
-		argc += sizeof(args.nv50);
-	} else
-	if (drm->client.device.info.chipset < 0xc0) {
-		args.nv50.part = NV50_DMA_V0_PART_256;
-		args.nv50.kind = kind;
-		argc += sizeof(args.nv50);
-	} else
-	if (drm->client.device.info.chipset < 0xd0) {
-		args.gf100.kind = kind;
-		argc += sizeof(args.gf100);
-	} else {
-		args.gf119.page = GF119_DMA_V0_PAGE_LP;
-		args.gf119.kind = kind;
-		argc += sizeof(args.gf119);
-	}
-
-	ret = nvif_object_init(&dmac->base.user, handle, NV_DMA_IN_MEMORY,
-			       &args, argc, &ctxdma->object);
-	if (ret) {
-		nv50_dmac_ctxdma_del(ctxdma);
-		return ERR_PTR(ret);
-	}
-
-	return ctxdma;
-}
-
-static void
-nv50_dmac_destroy(struct nv50_dmac *dmac, struct nvif_object *disp)
-{
-	struct nvif_device *device = dmac->base.device;
-	struct nv50_dmac_ctxdma *ctxdma, *ctxtmp;
-
-	list_for_each_entry_safe(ctxdma, ctxtmp, &dmac->ctxdma, head) {
-		nv50_dmac_ctxdma_del(ctxdma);
-	}
-
-	nvif_object_fini(&dmac->vram);
-	nvif_object_fini(&dmac->sync);
-
-	nv50_chan_destroy(&dmac->base);
-
-	if (dmac->ptr) {
-		struct device *dev = nvxx_device(device)->dev;
-		dma_free_coherent(dev, PAGE_SIZE, dmac->ptr, dmac->handle);
-	}
-}
-
-static int
-nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp,
-		 const s32 *oclass, u8 head, void *data, u32 size, u64 syncbuf,
-		 struct nv50_dmac *dmac)
-{
-	struct nv50_disp_core_channel_dma_v0 *args = data;
-	struct nvif_object pushbuf;
-	int ret;
-
-	mutex_init(&dmac->lock);
-	INIT_LIST_HEAD(&dmac->ctxdma);
-
-	dmac->ptr = dma_alloc_coherent(nvxx_device(device)->dev, PAGE_SIZE,
-				       &dmac->handle, GFP_KERNEL);
-	if (!dmac->ptr)
-		return -ENOMEM;
-
-	ret = nvif_object_init(&device->object, 0, NV_DMA_FROM_MEMORY,
-			       &(struct nv_dma_v0) {
-					.target = NV_DMA_V0_TARGET_PCI_US,
-					.access = NV_DMA_V0_ACCESS_RD,
-					.start = dmac->handle + 0x0000,
-					.limit = dmac->handle + 0x0fff,
-			       }, sizeof(struct nv_dma_v0), &pushbuf);
-	if (ret)
-		return ret;
-
-	args->pushbuf = nvif_handle(&pushbuf);
-
-	ret = nv50_chan_create(device, disp, oclass, head, data, size,
-			       &dmac->base);
-	nvif_object_fini(&pushbuf);
-	if (ret)
-		return ret;
-
-	ret = nvif_object_init(&dmac->base.user, 0xf0000000, NV_DMA_IN_MEMORY,
-			       &(struct nv_dma_v0) {
-					.target = NV_DMA_V0_TARGET_VRAM,
-					.access = NV_DMA_V0_ACCESS_RDWR,
-					.start = syncbuf + 0x0000,
-					.limit = syncbuf + 0x0fff,
-			       }, sizeof(struct nv_dma_v0),
-			       &dmac->sync);
-	if (ret)
-		return ret;
-
-	ret = nvif_object_init(&dmac->base.user, 0xf0000001, NV_DMA_IN_MEMORY,
-			       &(struct nv_dma_v0) {
-					.target = NV_DMA_V0_TARGET_VRAM,
-					.access = NV_DMA_V0_ACCESS_RDWR,
-					.start = 0,
-					.limit = device->info.ram_user - 1,
-			       }, sizeof(struct nv_dma_v0),
-			       &dmac->vram);
-	if (ret)
-		return ret;
-
-	return ret;
-}
-
-/******************************************************************************
- * Core
- *****************************************************************************/
-
-struct nv50_mast {
-	struct nv50_dmac base;
-};
-
-static int
-nv50_core_create(struct nvif_device *device, struct nvif_object *disp,
-		 u64 syncbuf, struct nv50_mast *core)
-{
-	struct nv50_disp_core_channel_dma_v0 args = {
-		.pushbuf = 0xb0007d00,
-	};
-	static const s32 oclass[] = {
-		GP102_DISP_CORE_CHANNEL_DMA,
-		GP100_DISP_CORE_CHANNEL_DMA,
-		GM200_DISP_CORE_CHANNEL_DMA,
-		GM107_DISP_CORE_CHANNEL_DMA,
-		GK110_DISP_CORE_CHANNEL_DMA,
-		GK104_DISP_CORE_CHANNEL_DMA,
-		GF110_DISP_CORE_CHANNEL_DMA,
-		GT214_DISP_CORE_CHANNEL_DMA,
-		GT206_DISP_CORE_CHANNEL_DMA,
-		GT200_DISP_CORE_CHANNEL_DMA,
-		G82_DISP_CORE_CHANNEL_DMA,
-		NV50_DISP_CORE_CHANNEL_DMA,
-		0
-	};
-
-	return nv50_dmac_create(device, disp, oclass, 0, &args, sizeof(args),
-				syncbuf, &core->base);
-}
-
-/******************************************************************************
- * Base
- *****************************************************************************/
-
-struct nv50_sync {
-	struct nv50_dmac base;
-	u32 addr;
-	u32 data;
-};
-
-static int
-nv50_base_create(struct nvif_device *device, struct nvif_object *disp,
-		 int head, u64 syncbuf, struct nv50_sync *base)
-{
-	struct nv50_disp_base_channel_dma_v0 args = {
-		.pushbuf = 0xb0007c00 | head,
-		.head = head,
-	};
-	static const s32 oclass[] = {
-		GK110_DISP_BASE_CHANNEL_DMA,
-		GK104_DISP_BASE_CHANNEL_DMA,
-		GF110_DISP_BASE_CHANNEL_DMA,
-		GT214_DISP_BASE_CHANNEL_DMA,
-		GT200_DISP_BASE_CHANNEL_DMA,
-		G82_DISP_BASE_CHANNEL_DMA,
-		NV50_DISP_BASE_CHANNEL_DMA,
-		0
-	};
-
-	return nv50_dmac_create(device, disp, oclass, head, &args, sizeof(args),
-				syncbuf, &base->base);
-}
-
-/******************************************************************************
- * Overlay
- *****************************************************************************/
-
-struct nv50_ovly {
-	struct nv50_dmac base;
-};
-
-static int
-nv50_ovly_create(struct nvif_device *device, struct nvif_object *disp,
-		 int head, u64 syncbuf, struct nv50_ovly *ovly)
-{
-	struct nv50_disp_overlay_channel_dma_v0 args = {
-		.pushbuf = 0xb0007e00 | head,
-		.head = head,
-	};
-	static const s32 oclass[] = {
-		GK104_DISP_OVERLAY_CONTROL_DMA,
-		GF110_DISP_OVERLAY_CONTROL_DMA,
-		GT214_DISP_OVERLAY_CHANNEL_DMA,
-		GT200_DISP_OVERLAY_CHANNEL_DMA,
-		G82_DISP_OVERLAY_CHANNEL_DMA,
-		NV50_DISP_OVERLAY_CHANNEL_DMA,
-		0
-	};
-
-	return nv50_dmac_create(device, disp, oclass, head, &args, sizeof(args),
-				syncbuf, &ovly->base);
-}
-
-struct nv50_head {
-	struct nouveau_crtc base;
-	struct {
-		struct nouveau_bo *nvbo[2];
-		int next;
-	} lut;
-	struct nv50_ovly ovly;
-	struct nv50_oimm oimm;
-};
-
-#define nv50_head(c) ((struct nv50_head *)nouveau_crtc(c))
-#define nv50_ovly(c) (&nv50_head(c)->ovly)
-#define nv50_oimm(c) (&nv50_head(c)->oimm)
-#define nv50_chan(c) (&(c)->base.base)
-#define nv50_vers(c) nv50_chan(c)->user.oclass
-
-struct nv50_disp {
-	struct nvif_object *disp;
-	struct nv50_mast mast;
-
-	struct nouveau_bo *sync;
-
-	struct mutex mutex;
-};
-
-static struct nv50_disp *
-nv50_disp(struct drm_device *dev)
-{
-	return nouveau_display(dev)->priv;
-}
-
-#define nv50_mast(d) (&nv50_disp(d)->mast)
-
-/******************************************************************************
- * EVO channel helpers
- *****************************************************************************/
-static u32 *
-evo_wait(void *evoc, int nr)
-{
-	struct nv50_dmac *dmac = evoc;
-	struct nvif_device *device = dmac->base.device;
-	u32 put = nvif_rd32(&dmac->base.user, 0x0000) / 4;
-
-	mutex_lock(&dmac->lock);
-	if (put + nr >= (PAGE_SIZE / 4) - 8) {
-		dmac->ptr[put] = 0x20000000;
-
-		nvif_wr32(&dmac->base.user, 0x0000, 0x00000000);
-		if (nvif_msec(device, 2000,
-			if (!nvif_rd32(&dmac->base.user, 0x0004))
-				break;
-		) < 0) {
-			mutex_unlock(&dmac->lock);
-			pr_err("nouveau: evo channel stalled\n");
-			return NULL;
-		}
-
-		put = 0;
-	}
-
-	return dmac->ptr + put;
-}
-
-static void
-evo_kick(u32 *push, void *evoc)
-{
-	struct nv50_dmac *dmac = evoc;
-	nvif_wr32(&dmac->base.user, 0x0000, (push - dmac->ptr) << 2);
-	mutex_unlock(&dmac->lock);
-}
-
-#define evo_mthd(p, m, s) do {						\
-	const u32 _m = (m), _s = (s);					\
-	if (drm_debug & DRM_UT_KMS)					\
-		pr_err("%04x %d %s\n", _m, _s, __func__);		\
-	*((p)++) = ((_s << 18) | _m);					\
-} while(0)
-
-#define evo_data(p, d) do {						\
-	const u32 _d = (d);						\
-	if (drm_debug & DRM_UT_KMS)					\
-		pr_err("\t%08x\n", _d);					\
-	*((p)++) = _d;							\
-} while(0)
-
-/******************************************************************************
- * Plane
- *****************************************************************************/
-#define nv50_wndw(p) container_of((p), struct nv50_wndw, plane)
-
-struct nv50_wndw {
-	const struct nv50_wndw_func *func;
-	struct nv50_dmac *dmac;
-
-	struct drm_plane plane;
-
-	struct nvif_notify notify;
-	u16 ntfy;
-	u16 sema;
-	u32 data;
-};
-
-struct nv50_wndw_func {
-	void *(*dtor)(struct nv50_wndw *);
-	int (*acquire)(struct nv50_wndw *, struct nv50_wndw_atom *asyw,
-		       struct nv50_head_atom *asyh);
-	void (*release)(struct nv50_wndw *, struct nv50_wndw_atom *asyw,
-			struct nv50_head_atom *asyh);
-	void (*prepare)(struct nv50_wndw *, struct nv50_head_atom *asyh,
-			struct nv50_wndw_atom *asyw);
-
-	void (*sema_set)(struct nv50_wndw *, struct nv50_wndw_atom *);
-	void (*sema_clr)(struct nv50_wndw *);
-	void (*ntfy_set)(struct nv50_wndw *, struct nv50_wndw_atom *);
-	void (*ntfy_clr)(struct nv50_wndw *);
-	int (*ntfy_wait_begun)(struct nv50_wndw *, struct nv50_wndw_atom *);
-	void (*image_set)(struct nv50_wndw *, struct nv50_wndw_atom *);
-	void (*image_clr)(struct nv50_wndw *);
-	void (*lut)(struct nv50_wndw *, struct nv50_wndw_atom *);
-	void (*point)(struct nv50_wndw *, struct nv50_wndw_atom *);
-
-	u32 (*update)(struct nv50_wndw *, u32 interlock);
-};
-
-static int
-nv50_wndw_wait_armed(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
-{
-	if (asyw->set.ntfy)
-		return wndw->func->ntfy_wait_begun(wndw, asyw);
-	return 0;
-}
-
-static u32
-nv50_wndw_flush_clr(struct nv50_wndw *wndw, u32 interlock, bool flush,
-		    struct nv50_wndw_atom *asyw)
-{
-	if (asyw->clr.sema && (!asyw->set.sema || flush))
-		wndw->func->sema_clr(wndw);
-	if (asyw->clr.ntfy && (!asyw->set.ntfy || flush))
-		wndw->func->ntfy_clr(wndw);
-	if (asyw->clr.image && (!asyw->set.image || flush))
-		wndw->func->image_clr(wndw);
-
-	return flush ? wndw->func->update(wndw, interlock) : 0;
-}
-
-static u32
-nv50_wndw_flush_set(struct nv50_wndw *wndw, u32 interlock,
-		    struct nv50_wndw_atom *asyw)
-{
-	if (interlock) {
-		asyw->image.mode = 0;
-		asyw->image.interval = 1;
-	}
-
-	if (asyw->set.sema ) wndw->func->sema_set (wndw, asyw);
-	if (asyw->set.ntfy ) wndw->func->ntfy_set (wndw, asyw);
-	if (asyw->set.image) wndw->func->image_set(wndw, asyw);
-	if (asyw->set.lut  ) wndw->func->lut      (wndw, asyw);
-	if (asyw->set.point) wndw->func->point    (wndw, asyw);
-
-	return wndw->func->update(wndw, interlock);
-}
-
-static void
-nv50_wndw_atomic_check_release(struct nv50_wndw *wndw,
-			       struct nv50_wndw_atom *asyw,
-			       struct nv50_head_atom *asyh)
-{
-	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
-	NV_ATOMIC(drm, "%s release\n", wndw->plane.name);
-	wndw->func->release(wndw, asyw, asyh);
-	asyw->ntfy.handle = 0;
-	asyw->sema.handle = 0;
-}
-
-static int
-nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw,
-			       struct nv50_wndw_atom *asyw,
-			       struct nv50_head_atom *asyh)
-{
-	struct nouveau_framebuffer *fb = nouveau_framebuffer(asyw->state.fb);
-	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
-	int ret;
-
-	NV_ATOMIC(drm, "%s acquire\n", wndw->plane.name);
-
-	asyw->image.w = fb->base.width;
-	asyw->image.h = fb->base.height;
-	asyw->image.kind = fb->nvbo->kind;
-
-	if (asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC)
-		asyw->interval = 0;
-	else
-		asyw->interval = 1;
-
-	if (asyw->image.kind) {
-		asyw->image.layout = 0;
-		if (drm->client.device.info.chipset >= 0xc0)
-			asyw->image.block = fb->nvbo->mode >> 4;
-		else
-			asyw->image.block = fb->nvbo->mode;
-		asyw->image.pitch = (fb->base.pitches[0] / 4) << 4;
-	} else {
-		asyw->image.layout = 1;
-		asyw->image.block  = 0;
-		asyw->image.pitch  = fb->base.pitches[0];
-	}
-
-	ret = wndw->func->acquire(wndw, asyw, asyh);
-	if (ret)
-		return ret;
-
-	if (asyw->set.image) {
-		if (!(asyw->image.mode = asyw->interval ? 0 : 1))
-			asyw->image.interval = asyw->interval;
-		else
-			asyw->image.interval = 0;
-	}
-
-	return 0;
-}
-
-static int
-nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state)
-{
-	struct nouveau_drm *drm = nouveau_drm(plane->dev);
-	struct nv50_wndw *wndw = nv50_wndw(plane);
-	struct nv50_wndw_atom *armw = nv50_wndw_atom(wndw->plane.state);
-	struct nv50_wndw_atom *asyw = nv50_wndw_atom(state);
-	struct nv50_head_atom *harm = NULL, *asyh = NULL;
-	bool varm = false, asyv = false, asym = false;
-	int ret;
-
-	NV_ATOMIC(drm, "%s atomic_check\n", plane->name);
-	if (asyw->state.crtc) {
-		asyh = nv50_head_atom_get(asyw->state.state, asyw->state.crtc);
-		if (IS_ERR(asyh))
-			return PTR_ERR(asyh);
-		asym = drm_atomic_crtc_needs_modeset(&asyh->state);
-		asyv = asyh->state.active;
-	}
-
-	if (armw->state.crtc) {
-		harm = nv50_head_atom_get(asyw->state.state, armw->state.crtc);
-		if (IS_ERR(harm))
-			return PTR_ERR(harm);
-		varm = harm->state.crtc->state->active;
-	}
-
-	if (asyv) {
-		asyw->point.x = asyw->state.crtc_x;
-		asyw->point.y = asyw->state.crtc_y;
-		if (memcmp(&armw->point, &asyw->point, sizeof(asyw->point)))
-			asyw->set.point = true;
-
-		ret = nv50_wndw_atomic_check_acquire(wndw, asyw, asyh);
-		if (ret)
-			return ret;
-	} else
-	if (varm) {
-		nv50_wndw_atomic_check_release(wndw, asyw, harm);
-	} else {
-		return 0;
-	}
-
-	if (!asyv || asym) {
-		asyw->clr.ntfy = armw->ntfy.handle != 0;
-		asyw->clr.sema = armw->sema.handle != 0;
-		if (wndw->func->image_clr)
-			asyw->clr.image = armw->image.handle != 0;
-		asyw->set.lut = wndw->func->lut && asyv;
-	}
-
-	return 0;
-}
-
-static void
-nv50_wndw_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state)
-{
-	struct nouveau_framebuffer *fb = nouveau_framebuffer(old_state->fb);
-	struct nouveau_drm *drm = nouveau_drm(plane->dev);
-
-	NV_ATOMIC(drm, "%s cleanup: %p\n", plane->name, old_state->fb);
-	if (!old_state->fb)
-		return;
-
-	nouveau_bo_unpin(fb->nvbo);
-}
-
-static int
-nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
-{
-	struct nouveau_framebuffer *fb = nouveau_framebuffer(state->fb);
-	struct nouveau_drm *drm = nouveau_drm(plane->dev);
-	struct nv50_wndw *wndw = nv50_wndw(plane);
-	struct nv50_wndw_atom *asyw = nv50_wndw_atom(state);
-	struct nv50_head_atom *asyh;
-	struct nv50_dmac_ctxdma *ctxdma;
-	int ret;
-
-	NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, state->fb);
-	if (!asyw->state.fb)
-		return 0;
-
-	ret = nouveau_bo_pin(fb->nvbo, TTM_PL_FLAG_VRAM, true);
-	if (ret)
-		return ret;
-
-	ctxdma = nv50_dmac_ctxdma_new(wndw->dmac, fb);
-	if (IS_ERR(ctxdma)) {
-		nouveau_bo_unpin(fb->nvbo);
-		return PTR_ERR(ctxdma);
-	}
-
-	asyw->state.fence = reservation_object_get_excl_rcu(fb->nvbo->bo.resv);
-	asyw->image.handle = ctxdma->object.handle;
-	asyw->image.offset = fb->nvbo->bo.offset;
-
-	if (wndw->func->prepare) {
-		asyh = nv50_head_atom_get(asyw->state.state, asyw->state.crtc);
-		if (IS_ERR(asyh))
-			return PTR_ERR(asyh);
-
-		wndw->func->prepare(wndw, asyh, asyw);
-	}
-
-	return 0;
-}
-
-static const struct drm_plane_helper_funcs
-nv50_wndw_helper = {
-	.prepare_fb = nv50_wndw_prepare_fb,
-	.cleanup_fb = nv50_wndw_cleanup_fb,
-	.atomic_check = nv50_wndw_atomic_check,
-};
-
-static void
-nv50_wndw_atomic_destroy_state(struct drm_plane *plane,
-			       struct drm_plane_state *state)
-{
-	struct nv50_wndw_atom *asyw = nv50_wndw_atom(state);
-	__drm_atomic_helper_plane_destroy_state(&asyw->state);
-	kfree(asyw);
-}
-
-static struct drm_plane_state *
-nv50_wndw_atomic_duplicate_state(struct drm_plane *plane)
-{
-	struct nv50_wndw_atom *armw = nv50_wndw_atom(plane->state);
-	struct nv50_wndw_atom *asyw;
-	if (!(asyw = kmalloc(sizeof(*asyw), GFP_KERNEL)))
-		return NULL;
-	__drm_atomic_helper_plane_duplicate_state(plane, &asyw->state);
-	asyw->interval = 1;
-	asyw->sema = armw->sema;
-	asyw->ntfy = armw->ntfy;
-	asyw->image = armw->image;
-	asyw->point = armw->point;
-	asyw->lut = armw->lut;
-	asyw->clr.mask = 0;
-	asyw->set.mask = 0;
-	return &asyw->state;
-}
-
-static void
-nv50_wndw_reset(struct drm_plane *plane)
-{
-	struct nv50_wndw_atom *asyw;
-
-	if (WARN_ON(!(asyw = kzalloc(sizeof(*asyw), GFP_KERNEL))))
-		return;
-
-	if (plane->state)
-		plane->funcs->atomic_destroy_state(plane, plane->state);
-	plane->state = &asyw->state;
-	plane->state->plane = plane;
-	plane->state->rotation = DRM_MODE_ROTATE_0;
-}
-
-static void
-nv50_wndw_destroy(struct drm_plane *plane)
-{
-	struct nv50_wndw *wndw = nv50_wndw(plane);
-	void *data;
-	nvif_notify_fini(&wndw->notify);
-	data = wndw->func->dtor(wndw);
-	drm_plane_cleanup(&wndw->plane);
-	kfree(data);
-}
-
-static const struct drm_plane_funcs
-nv50_wndw = {
-	.update_plane = drm_atomic_helper_update_plane,
-	.disable_plane = drm_atomic_helper_disable_plane,
-	.destroy = nv50_wndw_destroy,
-	.reset = nv50_wndw_reset,
-	.atomic_duplicate_state = nv50_wndw_atomic_duplicate_state,
-	.atomic_destroy_state = nv50_wndw_atomic_destroy_state,
-};
-
-static void
-nv50_wndw_fini(struct nv50_wndw *wndw)
-{
-	nvif_notify_put(&wndw->notify);
-}
-
-static void
-nv50_wndw_init(struct nv50_wndw *wndw)
-{
-	nvif_notify_get(&wndw->notify);
-}
-
-static int
-nv50_wndw_ctor(const struct nv50_wndw_func *func, struct drm_device *dev,
-	       enum drm_plane_type type, const char *name, int index,
-	       struct nv50_dmac *dmac, const u32 *format, int nformat,
-	       struct nv50_wndw *wndw)
-{
-	int ret;
-
-	wndw->func = func;
-	wndw->dmac = dmac;
-
-	ret = drm_universal_plane_init(dev, &wndw->plane, 0, &nv50_wndw,
-				       format, nformat, NULL,
-				       type, "%s-%d", name, index);
-	if (ret)
-		return ret;
-
-	drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper);
-	return 0;
-}
-
-/******************************************************************************
- * Cursor plane
- *****************************************************************************/
-#define nv50_curs(p) container_of((p), struct nv50_curs, wndw)
-
-struct nv50_curs {
-	struct nv50_wndw wndw;
-	struct nvif_object chan;
-};
-
-static u32
-nv50_curs_update(struct nv50_wndw *wndw, u32 interlock)
-{
-	struct nv50_curs *curs = nv50_curs(wndw);
-	nvif_wr32(&curs->chan, 0x0080, 0x00000000);
-	return 0;
-}
-
-static void
-nv50_curs_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
-{
-	struct nv50_curs *curs = nv50_curs(wndw);
-	nvif_wr32(&curs->chan, 0x0084, (asyw->point.y << 16) | asyw->point.x);
-}
-
-static void
-nv50_curs_prepare(struct nv50_wndw *wndw, struct nv50_head_atom *asyh,
-		  struct nv50_wndw_atom *asyw)
-{
-	u32 handle = nv50_disp(wndw->plane.dev)->mast.base.vram.handle;
-	u32 offset = asyw->image.offset;
-	if (asyh->curs.handle != handle || asyh->curs.offset != offset) {
-		asyh->curs.handle = handle;
-		asyh->curs.offset = offset;
-		asyh->set.curs = asyh->curs.visible;
-	}
-}
-
-static void
-nv50_curs_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
-		  struct nv50_head_atom *asyh)
-{
-	asyh->curs.visible = false;
-}
-
-static int
-nv50_curs_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
-		  struct nv50_head_atom *asyh)
-{
-	int ret;
-
-	ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state,
-						  DRM_PLANE_HELPER_NO_SCALING,
-						  DRM_PLANE_HELPER_NO_SCALING,
-						  true, true);
-	asyh->curs.visible = asyw->state.visible;
-	if (ret || !asyh->curs.visible)
-		return ret;
-
-	switch (asyw->state.fb->width) {
-	case 32: asyh->curs.layout = 0; break;
-	case 64: asyh->curs.layout = 1; break;
-	default:
-		return -EINVAL;
-	}
-
-	if (asyw->state.fb->width != asyw->state.fb->height)
-		return -EINVAL;
-
-	switch (asyw->state.fb->format->format) {
-	case DRM_FORMAT_ARGB8888: asyh->curs.format = 1; break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static void *
-nv50_curs_dtor(struct nv50_wndw *wndw)
-{
-	struct nv50_curs *curs = nv50_curs(wndw);
-	nvif_object_fini(&curs->chan);
-	return curs;
-}
-
-static const u32
-nv50_curs_format[] = {
-	DRM_FORMAT_ARGB8888,
-};
-
-static const struct nv50_wndw_func
-nv50_curs = {
-	.dtor = nv50_curs_dtor,
-	.acquire = nv50_curs_acquire,
-	.release = nv50_curs_release,
-	.prepare = nv50_curs_prepare,
-	.point = nv50_curs_point,
-	.update = nv50_curs_update,
-};
-
-static int
-nv50_curs_new(struct nouveau_drm *drm, struct nv50_head *head,
-	      struct nv50_curs **pcurs)
-{
-	static const struct nvif_mclass curses[] = {
-		{ GK104_DISP_CURSOR, 0 },
-		{ GF110_DISP_CURSOR, 0 },
-		{ GT214_DISP_CURSOR, 0 },
-		{   G82_DISP_CURSOR, 0 },
-		{  NV50_DISP_CURSOR, 0 },
-		{}
-	};
-	struct nv50_disp_cursor_v0 args = {
-		.head = head->base.index,
-	};
-	struct nv50_disp *disp = nv50_disp(drm->dev);
-	struct nv50_curs *curs;
-	int cid, ret;
-
-	cid = nvif_mclass(disp->disp, curses);
-	if (cid < 0) {
-		NV_ERROR(drm, "No supported cursor immediate class\n");
-		return cid;
-	}
-
-	if (!(curs = *pcurs = kzalloc(sizeof(*curs), GFP_KERNEL)))
-		return -ENOMEM;
-
-	ret = nv50_wndw_ctor(&nv50_curs, drm->dev, DRM_PLANE_TYPE_CURSOR,
-			     "curs", head->base.index, &disp->mast.base,
-			     nv50_curs_format, ARRAY_SIZE(nv50_curs_format),
-			     &curs->wndw);
-	if (ret) {
-		kfree(curs);
-		return ret;
-	}
-
-	ret = nvif_object_init(disp->disp, 0, curses[cid].oclass, &args,
-			       sizeof(args), &curs->chan);
-	if (ret) {
-		NV_ERROR(drm, "curs%04x allocation failed: %d\n",
-			 curses[cid].oclass, ret);
-		return ret;
-	}
-
-	return 0;
-}
-
-/******************************************************************************
- * Primary plane
- *****************************************************************************/
-#define nv50_base(p) container_of((p), struct nv50_base, wndw)
-
-struct nv50_base {
-	struct nv50_wndw wndw;
-	struct nv50_sync chan;
-	int id;
-};
-
-static int
-nv50_base_notify(struct nvif_notify *notify)
-{
-	return NVIF_NOTIFY_KEEP;
-}
-
-static void
-nv50_base_lut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
-{
-	struct nv50_base *base = nv50_base(wndw);
-	u32 *push;
-	if ((push = evo_wait(&base->chan, 2))) {
-		evo_mthd(push, 0x00e0, 1);
-		evo_data(push, asyw->lut.enable << 30);
-		evo_kick(push, &base->chan);
-	}
-}
-
-static void
-nv50_base_image_clr(struct nv50_wndw *wndw)
-{
-	struct nv50_base *base = nv50_base(wndw);
-	u32 *push;
-	if ((push = evo_wait(&base->chan, 4))) {
-		evo_mthd(push, 0x0084, 1);
-		evo_data(push, 0x00000000);
-		evo_mthd(push, 0x00c0, 1);
-		evo_data(push, 0x00000000);
-		evo_kick(push, &base->chan);
-	}
-}
-
-static void
-nv50_base_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
-{
-	struct nv50_base *base = nv50_base(wndw);
-	const s32 oclass = base->chan.base.base.user.oclass;
-	u32 *push;
-	if ((push = evo_wait(&base->chan, 10))) {
-		evo_mthd(push, 0x0084, 1);
-		evo_data(push, (asyw->image.mode << 8) |
-			       (asyw->image.interval << 4));
-		evo_mthd(push, 0x00c0, 1);
-		evo_data(push, asyw->image.handle);
-		if (oclass < G82_DISP_BASE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0800, 5);
-			evo_data(push, asyw->image.offset >> 8);
-			evo_data(push, 0x00000000);
-			evo_data(push, (asyw->image.h << 16) | asyw->image.w);
-			evo_data(push, (asyw->image.layout << 20) |
-					asyw->image.pitch |
-					asyw->image.block);
-			evo_data(push, (asyw->image.kind << 16) |
-				       (asyw->image.format << 8));
-		} else
-		if (oclass < GF110_DISP_BASE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0800, 5);
-			evo_data(push, asyw->image.offset >> 8);
-			evo_data(push, 0x00000000);
-			evo_data(push, (asyw->image.h << 16) | asyw->image.w);
-			evo_data(push, (asyw->image.layout << 20) |
-					asyw->image.pitch |
-					asyw->image.block);
-			evo_data(push, asyw->image.format << 8);
-		} else {
-			evo_mthd(push, 0x0400, 5);
-			evo_data(push, asyw->image.offset >> 8);
-			evo_data(push, 0x00000000);
-			evo_data(push, (asyw->image.h << 16) | asyw->image.w);
-			evo_data(push, (asyw->image.layout << 24) |
-					asyw->image.pitch |
-					asyw->image.block);
-			evo_data(push, asyw->image.format << 8);
-		}
-		evo_kick(push, &base->chan);
-	}
-}
-
-static void
-nv50_base_ntfy_clr(struct nv50_wndw *wndw)
-{
-	struct nv50_base *base = nv50_base(wndw);
-	u32 *push;
-	if ((push = evo_wait(&base->chan, 2))) {
-		evo_mthd(push, 0x00a4, 1);
-		evo_data(push, 0x00000000);
-		evo_kick(push, &base->chan);
-	}
-}
-
-static void
-nv50_base_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
-{
-	struct nv50_base *base = nv50_base(wndw);
-	u32 *push;
-	if ((push = evo_wait(&base->chan, 3))) {
-		evo_mthd(push, 0x00a0, 2);
-		evo_data(push, (asyw->ntfy.awaken << 30) | asyw->ntfy.offset);
-		evo_data(push, asyw->ntfy.handle);
-		evo_kick(push, &base->chan);
-	}
-}
-
-static void
-nv50_base_sema_clr(struct nv50_wndw *wndw)
-{
-	struct nv50_base *base = nv50_base(wndw);
-	u32 *push;
-	if ((push = evo_wait(&base->chan, 2))) {
-		evo_mthd(push, 0x0094, 1);
-		evo_data(push, 0x00000000);
-		evo_kick(push, &base->chan);
-	}
-}
-
-static void
-nv50_base_sema_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
-{
-	struct nv50_base *base = nv50_base(wndw);
-	u32 *push;
-	if ((push = evo_wait(&base->chan, 5))) {
-		evo_mthd(push, 0x0088, 4);
-		evo_data(push, asyw->sema.offset);
-		evo_data(push, asyw->sema.acquire);
-		evo_data(push, asyw->sema.release);
-		evo_data(push, asyw->sema.handle);
-		evo_kick(push, &base->chan);
-	}
-}
-
-static u32
-nv50_base_update(struct nv50_wndw *wndw, u32 interlock)
-{
-	struct nv50_base *base = nv50_base(wndw);
-	u32 *push;
-
-	if (!(push = evo_wait(&base->chan, 2)))
-		return 0;
-	evo_mthd(push, 0x0080, 1);
-	evo_data(push, interlock);
-	evo_kick(push, &base->chan);
-
-	if (base->chan.base.base.user.oclass < GF110_DISP_BASE_CHANNEL_DMA)
-		return interlock ? 2 << (base->id * 8) : 0;
-	return interlock ? 2 << (base->id * 4) : 0;
-}
-
-static int
-nv50_base_ntfy_wait_begun(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
-{
-	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
-	struct nv50_disp *disp = nv50_disp(wndw->plane.dev);
-	if (nvif_msec(&drm->client.device, 2000ULL,
-		u32 data = nouveau_bo_rd32(disp->sync, asyw->ntfy.offset / 4);
-		if ((data & 0xc0000000) == 0x40000000)
-			break;
-		usleep_range(1, 2);
-	) < 0)
-		return -ETIMEDOUT;
-	return 0;
-}
-
-static void
-nv50_base_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
-		  struct nv50_head_atom *asyh)
-{
-	asyh->base.cpp = 0;
-}
-
-static int
-nv50_base_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
-		  struct nv50_head_atom *asyh)
-{
-	const struct drm_framebuffer *fb = asyw->state.fb;
-	int ret;
-
-	if (!fb->format->depth)
-		return -EINVAL;
-
-	ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state,
-						  DRM_PLANE_HELPER_NO_SCALING,
-						  DRM_PLANE_HELPER_NO_SCALING,
-						  false, true);
-	if (ret)
-		return ret;
-
-	asyh->base.depth = fb->format->depth;
-	asyh->base.cpp = fb->format->cpp[0];
-	asyh->base.x = asyw->state.src.x1 >> 16;
-	asyh->base.y = asyw->state.src.y1 >> 16;
-	asyh->base.w = asyw->state.fb->width;
-	asyh->base.h = asyw->state.fb->height;
-
-	switch (fb->format->format) {
-	case DRM_FORMAT_C8         : asyw->image.format = 0x1e; break;
-	case DRM_FORMAT_RGB565     : asyw->image.format = 0xe8; break;
-	case DRM_FORMAT_XRGB1555   :
-	case DRM_FORMAT_ARGB1555   : asyw->image.format = 0xe9; break;
-	case DRM_FORMAT_XRGB8888   :
-	case DRM_FORMAT_ARGB8888   : asyw->image.format = 0xcf; break;
-	case DRM_FORMAT_XBGR2101010:
-	case DRM_FORMAT_ABGR2101010: asyw->image.format = 0xd1; break;
-	case DRM_FORMAT_XBGR8888   :
-	case DRM_FORMAT_ABGR8888   : asyw->image.format = 0xd5; break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	asyw->lut.enable = 1;
-	asyw->set.image = true;
-	return 0;
-}
-
-static void *
-nv50_base_dtor(struct nv50_wndw *wndw)
-{
-	struct nv50_disp *disp = nv50_disp(wndw->plane.dev);
-	struct nv50_base *base = nv50_base(wndw);
-	nv50_dmac_destroy(&base->chan.base, disp->disp);
-	return base;
-}
-
-static const u32
-nv50_base_format[] = {
-	DRM_FORMAT_C8,
-	DRM_FORMAT_RGB565,
-	DRM_FORMAT_XRGB1555,
-	DRM_FORMAT_ARGB1555,
-	DRM_FORMAT_XRGB8888,
-	DRM_FORMAT_ARGB8888,
-	DRM_FORMAT_XBGR2101010,
-	DRM_FORMAT_ABGR2101010,
-	DRM_FORMAT_XBGR8888,
-	DRM_FORMAT_ABGR8888,
-};
-
-static const struct nv50_wndw_func
-nv50_base = {
-	.dtor = nv50_base_dtor,
-	.acquire = nv50_base_acquire,
-	.release = nv50_base_release,
-	.sema_set = nv50_base_sema_set,
-	.sema_clr = nv50_base_sema_clr,
-	.ntfy_set = nv50_base_ntfy_set,
-	.ntfy_clr = nv50_base_ntfy_clr,
-	.ntfy_wait_begun = nv50_base_ntfy_wait_begun,
-	.image_set = nv50_base_image_set,
-	.image_clr = nv50_base_image_clr,
-	.lut = nv50_base_lut,
-	.update = nv50_base_update,
-};
-
-static int
-nv50_base_new(struct nouveau_drm *drm, struct nv50_head *head,
-	      struct nv50_base **pbase)
-{
-	struct nv50_disp *disp = nv50_disp(drm->dev);
-	struct nv50_base *base;
-	int ret;
-
-	if (!(base = *pbase = kzalloc(sizeof(*base), GFP_KERNEL)))
-		return -ENOMEM;
-	base->id = head->base.index;
-	base->wndw.ntfy = EVO_FLIP_NTFY0(base->id);
-	base->wndw.sema = EVO_FLIP_SEM0(base->id);
-	base->wndw.data = 0x00000000;
-
-	ret = nv50_wndw_ctor(&nv50_base, drm->dev, DRM_PLANE_TYPE_PRIMARY,
-			     "base", base->id, &base->chan.base,
-			     nv50_base_format, ARRAY_SIZE(nv50_base_format),
-			     &base->wndw);
-	if (ret) {
-		kfree(base);
-		return ret;
-	}
-
-	ret = nv50_base_create(&drm->client.device, disp->disp, base->id,
-			       disp->sync->bo.offset, &base->chan);
-	if (ret)
-		return ret;
-
-	return nvif_notify_init(&base->chan.base.base.user, nv50_base_notify,
-				false,
-				NV50_DISP_BASE_CHANNEL_DMA_V0_NTFY_UEVENT,
-				&(struct nvif_notify_uevent_req) {},
-				sizeof(struct nvif_notify_uevent_req),
-				sizeof(struct nvif_notify_uevent_rep),
-				&base->wndw.notify);
-}
-
-/******************************************************************************
- * Head
- *****************************************************************************/
-static void
-nv50_head_procamp(struct nv50_head *head, struct nv50_head_atom *asyh)
-{
-	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
-	u32 *push;
-	if ((push = evo_wait(core, 2))) {
-		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA)
-			evo_mthd(push, 0x08a8 + (head->base.index * 0x400), 1);
-		else
-			evo_mthd(push, 0x0498 + (head->base.index * 0x300), 1);
-		evo_data(push, (asyh->procamp.sat.sin << 20) |
-			       (asyh->procamp.sat.cos << 8));
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_head_dither(struct nv50_head *head, struct nv50_head_atom *asyh)
-{
-	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
-	u32 *push;
-	if ((push = evo_wait(core, 2))) {
-		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA)
-			evo_mthd(push, 0x08a0 + (head->base.index * 0x0400), 1);
-		else
-		if (core->base.user.oclass < GK104_DISP_CORE_CHANNEL_DMA)
-			evo_mthd(push, 0x0490 + (head->base.index * 0x0300), 1);
-		else
-			evo_mthd(push, 0x04a0 + (head->base.index * 0x0300), 1);
-		evo_data(push, (asyh->dither.mode << 3) |
-			       (asyh->dither.bits << 1) |
-			        asyh->dither.enable);
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_head_ovly(struct nv50_head *head, struct nv50_head_atom *asyh)
-{
-	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
-	u32 bounds = 0;
-	u32 *push;
-
-	if (asyh->base.cpp) {
-		switch (asyh->base.cpp) {
-		case 8: bounds |= 0x00000500; break;
-		case 4: bounds |= 0x00000300; break;
-		case 2: bounds |= 0x00000100; break;
-		default:
-			WARN_ON(1);
-			break;
-		}
-		bounds |= 0x00000001;
-	}
-
-	if ((push = evo_wait(core, 2))) {
-		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA)
-			evo_mthd(push, 0x0904 + head->base.index * 0x400, 1);
-		else
-			evo_mthd(push, 0x04d4 + head->base.index * 0x300, 1);
-		evo_data(push, bounds);
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_head_base(struct nv50_head *head, struct nv50_head_atom *asyh)
-{
-	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
-	u32 bounds = 0;
-	u32 *push;
-
-	if (asyh->base.cpp) {
-		switch (asyh->base.cpp) {
-		case 8: bounds |= 0x00000500; break;
-		case 4: bounds |= 0x00000300; break;
-		case 2: bounds |= 0x00000100; break;
-		case 1: bounds |= 0x00000000; break;
-		default:
-			WARN_ON(1);
-			break;
-		}
-		bounds |= 0x00000001;
-	}
-
-	if ((push = evo_wait(core, 2))) {
-		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA)
-			evo_mthd(push, 0x0900 + head->base.index * 0x400, 1);
-		else
-			evo_mthd(push, 0x04d0 + head->base.index * 0x300, 1);
-		evo_data(push, bounds);
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_head_curs_clr(struct nv50_head *head)
-{
-	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
-	u32 *push;
-	if ((push = evo_wait(core, 4))) {
-		if (core->base.user.oclass < G82_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0880 + head->base.index * 0x400, 1);
-			evo_data(push, 0x05000000);
-		} else
-		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0880 + head->base.index * 0x400, 1);
-			evo_data(push, 0x05000000);
-			evo_mthd(push, 0x089c + head->base.index * 0x400, 1);
-			evo_data(push, 0x00000000);
-		} else {
-			evo_mthd(push, 0x0480 + head->base.index * 0x300, 1);
-			evo_data(push, 0x05000000);
-			evo_mthd(push, 0x048c + head->base.index * 0x300, 1);
-			evo_data(push, 0x00000000);
-		}
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_head_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh)
-{
-	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
-	u32 *push;
-	if ((push = evo_wait(core, 5))) {
-		if (core->base.user.oclass < G82_DISP_BASE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0880 + head->base.index * 0x400, 2);
-			evo_data(push, 0x80000000 | (asyh->curs.layout << 26) |
-						    (asyh->curs.format << 24));
-			evo_data(push, asyh->curs.offset >> 8);
-		} else
-		if (core->base.user.oclass < GF110_DISP_BASE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0880 + head->base.index * 0x400, 2);
-			evo_data(push, 0x80000000 | (asyh->curs.layout << 26) |
-						    (asyh->curs.format << 24));
-			evo_data(push, asyh->curs.offset >> 8);
-			evo_mthd(push, 0x089c + head->base.index * 0x400, 1);
-			evo_data(push, asyh->curs.handle);
-		} else {
-			evo_mthd(push, 0x0480 + head->base.index * 0x300, 2);
-			evo_data(push, 0x80000000 | (asyh->curs.layout << 26) |
-						    (asyh->curs.format << 24));
-			evo_data(push, asyh->curs.offset >> 8);
-			evo_mthd(push, 0x048c + head->base.index * 0x300, 1);
-			evo_data(push, asyh->curs.handle);
-		}
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_head_core_clr(struct nv50_head *head)
-{
-	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
-	u32 *push;
-	if ((push = evo_wait(core, 2))) {
-		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA)
-			evo_mthd(push, 0x0874 + head->base.index * 0x400, 1);
-		else
-			evo_mthd(push, 0x0474 + head->base.index * 0x300, 1);
-		evo_data(push, 0x00000000);
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_head_core_set(struct nv50_head *head, struct nv50_head_atom *asyh)
-{
-	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
-	u32 *push;
-	if ((push = evo_wait(core, 9))) {
-		if (core->base.user.oclass < G82_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0860 + head->base.index * 0x400, 1);
-			evo_data(push, asyh->core.offset >> 8);
-			evo_mthd(push, 0x0868 + head->base.index * 0x400, 4);
-			evo_data(push, (asyh->core.h << 16) | asyh->core.w);
-			evo_data(push, asyh->core.layout << 20 |
-				       (asyh->core.pitch >> 8) << 8 |
-				       asyh->core.block);
-			evo_data(push, asyh->core.kind << 16 |
-				       asyh->core.format << 8);
-			evo_data(push, asyh->core.handle);
-			evo_mthd(push, 0x08c0 + head->base.index * 0x400, 1);
-			evo_data(push, (asyh->core.y << 16) | asyh->core.x);
-			/* EVO will complain with INVALID_STATE if we have an
-			 * active cursor and (re)specify HeadSetContextDmaIso
-			 * without also updating HeadSetOffsetCursor.
-			 */
-			asyh->set.curs = asyh->curs.visible;
-		} else
-		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0860 + head->base.index * 0x400, 1);
-			evo_data(push, asyh->core.offset >> 8);
-			evo_mthd(push, 0x0868 + head->base.index * 0x400, 4);
-			evo_data(push, (asyh->core.h << 16) | asyh->core.w);
-			evo_data(push, asyh->core.layout << 20 |
-				       (asyh->core.pitch >> 8) << 8 |
-				       asyh->core.block);
-			evo_data(push, asyh->core.format << 8);
-			evo_data(push, asyh->core.handle);
-			evo_mthd(push, 0x08c0 + head->base.index * 0x400, 1);
-			evo_data(push, (asyh->core.y << 16) | asyh->core.x);
-		} else {
-			evo_mthd(push, 0x0460 + head->base.index * 0x300, 1);
-			evo_data(push, asyh->core.offset >> 8);
-			evo_mthd(push, 0x0468 + head->base.index * 0x300, 4);
-			evo_data(push, (asyh->core.h << 16) | asyh->core.w);
-			evo_data(push, asyh->core.layout << 24 |
-				       (asyh->core.pitch >> 8) << 8 |
-				       asyh->core.block);
-			evo_data(push, asyh->core.format << 8);
-			evo_data(push, asyh->core.handle);
-			evo_mthd(push, 0x04b0 + head->base.index * 0x300, 1);
-			evo_data(push, (asyh->core.y << 16) | asyh->core.x);
-		}
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_head_lut_clr(struct nv50_head *head)
-{
-	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
-	u32 *push;
-	if ((push = evo_wait(core, 4))) {
-		if (core->base.user.oclass < G82_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0840 + (head->base.index * 0x400), 1);
-			evo_data(push, 0x40000000);
-		} else
-		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0840 + (head->base.index * 0x400), 1);
-			evo_data(push, 0x40000000);
-			evo_mthd(push, 0x085c + (head->base.index * 0x400), 1);
-			evo_data(push, 0x00000000);
-		} else {
-			evo_mthd(push, 0x0440 + (head->base.index * 0x300), 1);
-			evo_data(push, 0x03000000);
-			evo_mthd(push, 0x045c + (head->base.index * 0x300), 1);
-			evo_data(push, 0x00000000);
-		}
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_head_lut_load(struct drm_property_blob *blob, int mode,
-		   struct nouveau_bo *nvbo)
-{
-	struct drm_color_lut *in = (struct drm_color_lut *)blob->data;
-	void __iomem *lut = (u8 *)nvbo_kmap_obj_iovirtual(nvbo);
-	const int size = blob->length / sizeof(*in);
-	int bits, shift, i;
-	u16 zero, r, g, b;
-
-	/* This can't happen.. But it shuts the compiler up. */
-	if (WARN_ON(size != 256))
-		return;
-
-	switch (mode) {
-	case 0: /* LORES. */
-	case 1: /* HIRES. */
-		bits = 11;
-		shift = 3;
-		zero = 0x0000;
-		break;
-	case 7: /* INTERPOLATE_257_UNITY_RANGE. */
-		bits = 14;
-		shift = 0;
-		zero = 0x6000;
-		break;
-	default:
-		WARN_ON(1);
-		return;
-	}
-
-	for (i = 0; i < size; i++) {
-		r = (drm_color_lut_extract(in[i].  red, bits) + zero) << shift;
-		g = (drm_color_lut_extract(in[i].green, bits) + zero) << shift;
-		b = (drm_color_lut_extract(in[i]. blue, bits) + zero) << shift;
-		writew(r, lut + (i * 0x08) + 0);
-		writew(g, lut + (i * 0x08) + 2);
-		writew(b, lut + (i * 0x08) + 4);
-	}
-
-	/* INTERPOLATE modes require a "next" entry to interpolate with,
-	 * so we replicate the last entry to deal with this for now.
-	 */
-	writew(r, lut + (i * 0x08) + 0);
-	writew(g, lut + (i * 0x08) + 2);
-	writew(b, lut + (i * 0x08) + 4);
-}
-
-static void
-nv50_head_lut_set(struct nv50_head *head, struct nv50_head_atom *asyh)
-{
-	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
-	u32 *push;
-	if ((push = evo_wait(core, 7))) {
-		if (core->base.user.oclass < G82_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0840 + (head->base.index * 0x400), 2);
-			evo_data(push, 0x80000000 | asyh->lut.mode << 30);
-			evo_data(push, asyh->lut.offset >> 8);
-		} else
-		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0840 + (head->base.index * 0x400), 2);
-			evo_data(push, 0x80000000 | asyh->lut.mode << 30);
-			evo_data(push, asyh->lut.offset >> 8);
-			evo_mthd(push, 0x085c + (head->base.index * 0x400), 1);
-			evo_data(push, asyh->lut.handle);
-		} else {
-			evo_mthd(push, 0x0440 + (head->base.index * 0x300), 4);
-			evo_data(push, 0x80000000 | asyh->lut.mode << 24);
-			evo_data(push, asyh->lut.offset >> 8);
-			evo_data(push, 0x00000000);
-			evo_data(push, 0x00000000);
-			evo_mthd(push, 0x045c + (head->base.index * 0x300), 1);
-			evo_data(push, asyh->lut.handle);
-		}
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_head_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
-{
-	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
-	struct nv50_head_mode *m = &asyh->mode;
-	u32 *push;
-	if ((push = evo_wait(core, 14))) {
-		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0804 + (head->base.index * 0x400), 2);
-			evo_data(push, 0x00800000 | m->clock);
-			evo_data(push, m->interlace ? 0x00000002 : 0x00000000);
-			evo_mthd(push, 0x0810 + (head->base.index * 0x400), 7);
-			evo_data(push, 0x00000000);
-			evo_data(push, (m->v.active  << 16) | m->h.active );
-			evo_data(push, (m->v.synce   << 16) | m->h.synce  );
-			evo_data(push, (m->v.blanke  << 16) | m->h.blanke );
-			evo_data(push, (m->v.blanks  << 16) | m->h.blanks );
-			evo_data(push, (m->v.blank2e << 16) | m->v.blank2s);
-			evo_data(push, asyh->mode.v.blankus);
-			evo_mthd(push, 0x082c + (head->base.index * 0x400), 1);
-			evo_data(push, 0x00000000);
-		} else {
-			evo_mthd(push, 0x0410 + (head->base.index * 0x300), 6);
-			evo_data(push, 0x00000000);
-			evo_data(push, (m->v.active  << 16) | m->h.active );
-			evo_data(push, (m->v.synce   << 16) | m->h.synce  );
-			evo_data(push, (m->v.blanke  << 16) | m->h.blanke );
-			evo_data(push, (m->v.blanks  << 16) | m->h.blanks );
-			evo_data(push, (m->v.blank2e << 16) | m->v.blank2s);
-			evo_mthd(push, 0x042c + (head->base.index * 0x300), 2);
-			evo_data(push, 0x00000000); /* ??? */
-			evo_data(push, 0xffffff00);
-			evo_mthd(push, 0x0450 + (head->base.index * 0x300), 3);
-			evo_data(push, m->clock * 1000);
-			evo_data(push, 0x00200000); /* ??? */
-			evo_data(push, m->clock * 1000);
-		}
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_head_view(struct nv50_head *head, struct nv50_head_atom *asyh)
-{
-	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
-	u32 *push;
-	if ((push = evo_wait(core, 10))) {
-		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x08a4 + (head->base.index * 0x400), 1);
-			evo_data(push, 0x00000000);
-			evo_mthd(push, 0x08c8 + (head->base.index * 0x400), 1);
-			evo_data(push, (asyh->view.iH << 16) | asyh->view.iW);
-			evo_mthd(push, 0x08d8 + (head->base.index * 0x400), 2);
-			evo_data(push, (asyh->view.oH << 16) | asyh->view.oW);
-			evo_data(push, (asyh->view.oH << 16) | asyh->view.oW);
-		} else {
-			evo_mthd(push, 0x0494 + (head->base.index * 0x300), 1);
-			evo_data(push, 0x00000000);
-			evo_mthd(push, 0x04b8 + (head->base.index * 0x300), 1);
-			evo_data(push, (asyh->view.iH << 16) | asyh->view.iW);
-			evo_mthd(push, 0x04c0 + (head->base.index * 0x300), 3);
-			evo_data(push, (asyh->view.oH << 16) | asyh->view.oW);
-			evo_data(push, (asyh->view.oH << 16) | asyh->view.oW);
-			evo_data(push, (asyh->view.oH << 16) | asyh->view.oW);
-		}
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_head_flush_clr(struct nv50_head *head, struct nv50_head_atom *asyh, bool y)
-{
-	if (asyh->clr.ilut && (!asyh->set.ilut || y))
-		nv50_head_lut_clr(head);
-	if (asyh->clr.core && (!asyh->set.core || y))
-		nv50_head_core_clr(head);
-	if (asyh->clr.curs && (!asyh->set.curs || y))
-		nv50_head_curs_clr(head);
-}
-
-static void
-nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh)
-{
-	if (asyh->set.view   ) nv50_head_view    (head, asyh);
-	if (asyh->set.mode   ) nv50_head_mode    (head, asyh);
-	if (asyh->set.ilut   ) {
-		struct nouveau_bo *nvbo = head->lut.nvbo[head->lut.next];
-		struct drm_property_blob *blob = asyh->state.gamma_lut;
-		if (blob)
-			nv50_head_lut_load(blob, asyh->lut.mode, nvbo);
-		asyh->lut.offset = nvbo->bo.offset;
-		head->lut.next ^= 1;
-		nv50_head_lut_set(head, asyh);
-	}
-	if (asyh->set.core   ) nv50_head_core_set(head, asyh);
-	if (asyh->set.curs   ) nv50_head_curs_set(head, asyh);
-	if (asyh->set.base   ) nv50_head_base    (head, asyh);
-	if (asyh->set.ovly   ) nv50_head_ovly    (head, asyh);
-	if (asyh->set.dither ) nv50_head_dither  (head, asyh);
-	if (asyh->set.procamp) nv50_head_procamp (head, asyh);
-}
-
-static void
-nv50_head_atomic_check_procamp(struct nv50_head_atom *armh,
-			       struct nv50_head_atom *asyh,
-			       struct nouveau_conn_atom *asyc)
-{
-	const int vib = asyc->procamp.color_vibrance - 100;
-	const int hue = asyc->procamp.vibrant_hue - 90;
-	const int adj = (vib > 0) ? 50 : 0;
-	asyh->procamp.sat.cos = ((vib * 2047 + adj) / 100) & 0xfff;
-	asyh->procamp.sat.sin = ((hue * 2047) / 100) & 0xfff;
-	asyh->set.procamp = true;
-}
-
-static void
-nv50_head_atomic_check_dither(struct nv50_head_atom *armh,
-			      struct nv50_head_atom *asyh,
-			      struct nouveau_conn_atom *asyc)
-{
-	struct drm_connector *connector = asyc->state.connector;
-	u32 mode = 0x00;
-
-	if (asyc->dither.mode == DITHERING_MODE_AUTO) {
-		if (asyh->base.depth > connector->display_info.bpc * 3)
-			mode = DITHERING_MODE_DYNAMIC2X2;
-	} else {
-		mode = asyc->dither.mode;
-	}
-
-	if (asyc->dither.depth == DITHERING_DEPTH_AUTO) {
-		if (connector->display_info.bpc >= 8)
-			mode |= DITHERING_DEPTH_8BPC;
-	} else {
-		mode |= asyc->dither.depth;
-	}
-
-	asyh->dither.enable = mode;
-	asyh->dither.bits = mode >> 1;
-	asyh->dither.mode = mode >> 3;
-	asyh->set.dither = true;
-}
-
-static void
-nv50_head_atomic_check_view(struct nv50_head_atom *armh,
-			    struct nv50_head_atom *asyh,
-			    struct nouveau_conn_atom *asyc)
-{
-	struct drm_connector *connector = asyc->state.connector;
-	struct drm_display_mode *omode = &asyh->state.adjusted_mode;
-	struct drm_display_mode *umode = &asyh->state.mode;
-	int mode = asyc->scaler.mode;
-	struct edid *edid;
-	int umode_vdisplay, omode_hdisplay, omode_vdisplay;
-
-	if (connector->edid_blob_ptr)
-		edid = (struct edid *)connector->edid_blob_ptr->data;
-	else
-		edid = NULL;
-
-	if (!asyc->scaler.full) {
-		if (mode == DRM_MODE_SCALE_NONE)
-			omode = umode;
-	} else {
-		/* Non-EDID LVDS/eDP mode. */
-		mode = DRM_MODE_SCALE_FULLSCREEN;
-	}
-
-	/* For the user-specified mode, we must ignore doublescan and
-	 * the like, but honor frame packing.
-	 */
-	umode_vdisplay = umode->vdisplay;
-	if ((umode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING)
-		umode_vdisplay += umode->vtotal;
-	asyh->view.iW = umode->hdisplay;
-	asyh->view.iH = umode_vdisplay;
-	/* For the output mode, we can just use the stock helper. */
-	drm_mode_get_hv_timing(omode, &omode_hdisplay, &omode_vdisplay);
-	asyh->view.oW = omode_hdisplay;
-	asyh->view.oH = omode_vdisplay;
-
-	/* Add overscan compensation if necessary, will keep the aspect
-	 * ratio the same as the backend mode unless overridden by the
-	 * user setting both hborder and vborder properties.
-	 */
-	if ((asyc->scaler.underscan.mode == UNDERSCAN_ON ||
-	    (asyc->scaler.underscan.mode == UNDERSCAN_AUTO &&
-	     drm_detect_hdmi_monitor(edid)))) {
-		u32 bX = asyc->scaler.underscan.hborder;
-		u32 bY = asyc->scaler.underscan.vborder;
-		u32 r = (asyh->view.oH << 19) / asyh->view.oW;
-
-		if (bX) {
-			asyh->view.oW -= (bX * 2);
-			if (bY) asyh->view.oH -= (bY * 2);
-			else    asyh->view.oH  = ((asyh->view.oW * r) + (r / 2)) >> 19;
-		} else {
-			asyh->view.oW -= (asyh->view.oW >> 4) + 32;
-			if (bY) asyh->view.oH -= (bY * 2);
-			else    asyh->view.oH  = ((asyh->view.oW * r) + (r / 2)) >> 19;
-		}
-	}
-
-	/* Handle CENTER/ASPECT scaling, taking into account the areas
-	 * removed already for overscan compensation.
-	 */
-	switch (mode) {
-	case DRM_MODE_SCALE_CENTER:
-		asyh->view.oW = min((u16)umode->hdisplay, asyh->view.oW);
-		asyh->view.oH = min((u16)umode_vdisplay, asyh->view.oH);
-		/* fall-through */
-	case DRM_MODE_SCALE_ASPECT:
-		if (asyh->view.oH < asyh->view.oW) {
-			u32 r = (asyh->view.iW << 19) / asyh->view.iH;
-			asyh->view.oW = ((asyh->view.oH * r) + (r / 2)) >> 19;
-		} else {
-			u32 r = (asyh->view.iH << 19) / asyh->view.iW;
-			asyh->view.oH = ((asyh->view.oW * r) + (r / 2)) >> 19;
-		}
-		break;
-	default:
-		break;
-	}
-
-	asyh->set.view = true;
-}
-
-static void
-nv50_head_atomic_check_lut(struct nv50_head *head,
-			   struct nv50_head_atom *armh,
-			   struct nv50_head_atom *asyh)
-{
-	struct nv50_disp *disp = nv50_disp(head->base.base.dev);
-
-	/* An I8 surface without an input LUT makes no sense, and
-	 * EVO will throw an error if you try.
-	 *
-	 * Legacy clients actually cause this due to the order in
-	 * which they call ioctls, so we will enable the LUT with
-	 * whatever contents the buffer already contains to avoid
-	 * triggering the error check.
-	 */
-	if (!asyh->state.gamma_lut && asyh->base.cpp != 1) {
-		asyh->lut.handle = 0;
-		asyh->clr.ilut = armh->lut.visible;
-		return;
-	}
-
-	if (disp->disp->oclass < GF110_DISP) {
-		asyh->lut.mode = (asyh->base.cpp == 1) ? 0 : 1;
-		asyh->set.ilut = true;
-	} else {
-		asyh->lut.mode = 7;
-		asyh->set.ilut = asyh->state.color_mgmt_changed;
-	}
-	asyh->lut.handle = disp->mast.base.vram.handle;
-}
-
-static void
-nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
-{
-	struct drm_display_mode *mode = &asyh->state.adjusted_mode;
-	struct nv50_head_mode *m = &asyh->mode;
-	u32 blankus;
-
-	drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V | CRTC_STEREO_DOUBLE);
-
-	/*
-	 * DRM modes are defined in terms of a repeating interval
-	 * starting with the active display area.  The hardware modes
-	 * are defined in terms of a repeating interval starting one
-	 * unit (pixel or line) into the sync pulse.  So, add bias.
-	 */
-
-	m->h.active = mode->crtc_htotal;
-	m->h.synce  = mode->crtc_hsync_end - mode->crtc_hsync_start - 1;
-	m->h.blanke = mode->crtc_hblank_end - mode->crtc_hsync_start - 1;
-	m->h.blanks = m->h.blanke + mode->crtc_hdisplay;
-
-	m->v.active = mode->crtc_vtotal;
-	m->v.synce  = mode->crtc_vsync_end - mode->crtc_vsync_start - 1;
-	m->v.blanke = mode->crtc_vblank_end - mode->crtc_vsync_start - 1;
-	m->v.blanks = m->v.blanke + mode->crtc_vdisplay;
-
-	/*XXX: Safe underestimate, even "0" works */
-	blankus = (m->v.active - mode->crtc_vdisplay - 2) * m->h.active;
-	blankus *= 1000;
-	blankus /= mode->crtc_clock;
-	m->v.blankus = blankus;
-
-	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
-		m->v.blank2e =  m->v.active + m->v.blanke;
-		m->v.blank2s =  m->v.blank2e + mode->crtc_vdisplay;
-		m->v.active  = (m->v.active * 2) + 1;
-		m->interlace = true;
-	} else {
-		m->v.blank2e = 0;
-		m->v.blank2s = 1;
-		m->interlace = false;
-	}
-	m->clock = mode->crtc_clock;
-
-	asyh->set.mode = true;
-}
-
-static int
-nv50_head_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state)
-{
-	struct nouveau_drm *drm = nouveau_drm(crtc->dev);
-	struct nv50_disp *disp = nv50_disp(crtc->dev);
-	struct nv50_head *head = nv50_head(crtc);
-	struct nv50_head_atom *armh = nv50_head_atom(crtc->state);
-	struct nv50_head_atom *asyh = nv50_head_atom(state);
-	struct nouveau_conn_atom *asyc = NULL;
-	struct drm_connector_state *conns;
-	struct drm_connector *conn;
-	int i;
-
-	NV_ATOMIC(drm, "%s atomic_check %d\n", crtc->name, asyh->state.active);
-	if (asyh->state.active) {
-		for_each_new_connector_in_state(asyh->state.state, conn, conns, i) {
-			if (conns->crtc == crtc) {
-				asyc = nouveau_conn_atom(conns);
-				break;
-			}
-		}
-
-		if (armh->state.active) {
-			if (asyc) {
-				if (asyh->state.mode_changed)
-					asyc->set.scaler = true;
-				if (armh->base.depth != asyh->base.depth)
-					asyc->set.dither = true;
-			}
-		} else {
-			if (asyc)
-				asyc->set.mask = ~0;
-			asyh->set.mask = ~0;
-		}
-
-		if (asyh->state.mode_changed)
-			nv50_head_atomic_check_mode(head, asyh);
-
-		if (asyh->state.color_mgmt_changed ||
-		    asyh->base.cpp != armh->base.cpp)
-			nv50_head_atomic_check_lut(head, armh, asyh);
-		asyh->lut.visible = asyh->lut.handle != 0;
-
-		if (asyc) {
-			if (asyc->set.scaler)
-				nv50_head_atomic_check_view(armh, asyh, asyc);
-			if (asyc->set.dither)
-				nv50_head_atomic_check_dither(armh, asyh, asyc);
-			if (asyc->set.procamp)
-				nv50_head_atomic_check_procamp(armh, asyh, asyc);
-		}
-
-		if ((asyh->core.visible = (asyh->base.cpp != 0))) {
-			asyh->core.x = asyh->base.x;
-			asyh->core.y = asyh->base.y;
-			asyh->core.w = asyh->base.w;
-			asyh->core.h = asyh->base.h;
-		} else
-		if ((asyh->core.visible = asyh->curs.visible) ||
-		    (asyh->core.visible = asyh->lut.visible)) {
-			/*XXX: We need to either find some way of having the
-			 *     primary base layer appear black, while still
-			 *     being able to display the other layers, or we
-			 *     need to allocate a dummy black surface here.
-			 */
-			asyh->core.x = 0;
-			asyh->core.y = 0;
-			asyh->core.w = asyh->state.mode.hdisplay;
-			asyh->core.h = asyh->state.mode.vdisplay;
-		}
-		asyh->core.handle = disp->mast.base.vram.handle;
-		asyh->core.offset = 0;
-		asyh->core.format = 0xcf;
-		asyh->core.kind = 0;
-		asyh->core.layout = 1;
-		asyh->core.block = 0;
-		asyh->core.pitch = ALIGN(asyh->core.w, 64) * 4;
-		asyh->set.base = armh->base.cpp != asyh->base.cpp;
-		asyh->set.ovly = armh->ovly.cpp != asyh->ovly.cpp;
-	} else {
-		asyh->lut.visible = false;
-		asyh->core.visible = false;
-		asyh->curs.visible = false;
-		asyh->base.cpp = 0;
-		asyh->ovly.cpp = 0;
-	}
-
-	if (!drm_atomic_crtc_needs_modeset(&asyh->state)) {
-		if (asyh->core.visible) {
-			if (memcmp(&armh->core, &asyh->core, sizeof(asyh->core)))
-				asyh->set.core = true;
-		} else
-		if (armh->core.visible) {
-			asyh->clr.core = true;
-		}
-
-		if (asyh->curs.visible) {
-			if (memcmp(&armh->curs, &asyh->curs, sizeof(asyh->curs)))
-				asyh->set.curs = true;
-		} else
-		if (armh->curs.visible) {
-			asyh->clr.curs = true;
-		}
-	} else {
-		asyh->clr.ilut = armh->lut.visible;
-		asyh->clr.core = armh->core.visible;
-		asyh->clr.curs = armh->curs.visible;
-		asyh->set.ilut = asyh->lut.visible;
-		asyh->set.core = asyh->core.visible;
-		asyh->set.curs = asyh->curs.visible;
-	}
-
-	if (asyh->clr.mask || asyh->set.mask)
-		nv50_atom(asyh->state.state)->lock_core = true;
-	return 0;
-}
-
-static const struct drm_crtc_helper_funcs
-nv50_head_help = {
-	.atomic_check = nv50_head_atomic_check,
-};
-
-static void
-nv50_head_atomic_destroy_state(struct drm_crtc *crtc,
-			       struct drm_crtc_state *state)
-{
-	struct nv50_head_atom *asyh = nv50_head_atom(state);
-	__drm_atomic_helper_crtc_destroy_state(&asyh->state);
-	kfree(asyh);
-}
-
-static struct drm_crtc_state *
-nv50_head_atomic_duplicate_state(struct drm_crtc *crtc)
-{
-	struct nv50_head_atom *armh = nv50_head_atom(crtc->state);
-	struct nv50_head_atom *asyh;
-	if (!(asyh = kmalloc(sizeof(*asyh), GFP_KERNEL)))
-		return NULL;
-	__drm_atomic_helper_crtc_duplicate_state(crtc, &asyh->state);
-	asyh->view = armh->view;
-	asyh->mode = armh->mode;
-	asyh->lut  = armh->lut;
-	asyh->core = armh->core;
-	asyh->curs = armh->curs;
-	asyh->base = armh->base;
-	asyh->ovly = armh->ovly;
-	asyh->dither = armh->dither;
-	asyh->procamp = armh->procamp;
-	asyh->clr.mask = 0;
-	asyh->set.mask = 0;
-	return &asyh->state;
-}
-
-static void
-__drm_atomic_helper_crtc_reset(struct drm_crtc *crtc,
-			       struct drm_crtc_state *state)
-{
-	if (crtc->state)
-		crtc->funcs->atomic_destroy_state(crtc, crtc->state);
-	crtc->state = state;
-	crtc->state->crtc = crtc;
-}
-
-static void
-nv50_head_reset(struct drm_crtc *crtc)
-{
-	struct nv50_head_atom *asyh;
-
-	if (WARN_ON(!(asyh = kzalloc(sizeof(*asyh), GFP_KERNEL))))
-		return;
-
-	__drm_atomic_helper_crtc_reset(crtc, &asyh->state);
-}
-
-static void
-nv50_head_destroy(struct drm_crtc *crtc)
-{
-	struct nv50_disp *disp = nv50_disp(crtc->dev);
-	struct nv50_head *head = nv50_head(crtc);
-	int i;
-
-	nv50_dmac_destroy(&head->ovly.base, disp->disp);
-	nv50_pioc_destroy(&head->oimm.base);
-
-	for (i = 0; i < ARRAY_SIZE(head->lut.nvbo); i++)
-		nouveau_bo_unmap_unpin_unref(&head->lut.nvbo[i]);
-
-	drm_crtc_cleanup(crtc);
-	kfree(crtc);
-}
-
-static const struct drm_crtc_funcs
-nv50_head_func = {
-	.reset = nv50_head_reset,
-	.gamma_set = drm_atomic_helper_legacy_gamma_set,
-	.destroy = nv50_head_destroy,
-	.set_config = drm_atomic_helper_set_config,
-	.page_flip = drm_atomic_helper_page_flip,
-	.atomic_duplicate_state = nv50_head_atomic_duplicate_state,
-	.atomic_destroy_state = nv50_head_atomic_destroy_state,
-};
-
-static int
-nv50_head_create(struct drm_device *dev, int index)
-{
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->client.device;
-	struct nv50_disp *disp = nv50_disp(dev);
-	struct nv50_head *head;
-	struct nv50_base *base;
-	struct nv50_curs *curs;
-	struct drm_crtc *crtc;
-	int ret, i;
-
-	head = kzalloc(sizeof(*head), GFP_KERNEL);
-	if (!head)
-		return -ENOMEM;
-
-	head->base.index = index;
-	ret = nv50_base_new(drm, head, &base);
-	if (ret == 0)
-		ret = nv50_curs_new(drm, head, &curs);
-	if (ret) {
-		kfree(head);
-		return ret;
-	}
-
-	crtc = &head->base.base;
-	drm_crtc_init_with_planes(dev, crtc, &base->wndw.plane,
-				  &curs->wndw.plane, &nv50_head_func,
-				  "head-%d", head->base.index);
-	drm_crtc_helper_add(crtc, &nv50_head_help);
-	drm_mode_crtc_set_gamma_size(crtc, 256);
-
-	for (i = 0; i < ARRAY_SIZE(head->lut.nvbo); i++) {
-		ret = nouveau_bo_new_pin_map(&drm->client, 1025 * 8, 0x100,
-					     TTM_PL_FLAG_VRAM,
-					     &head->lut.nvbo[i]);
-		if (ret)
-			goto out;
-	}
-
-	/* allocate overlay resources */
-	ret = nv50_oimm_create(device, disp->disp, index, &head->oimm);
-	if (ret)
-		goto out;
-
-	ret = nv50_ovly_create(device, disp->disp, index, disp->sync->bo.offset,
-			       &head->ovly);
-	if (ret)
-		goto out;
-
-out:
-	if (ret)
-		nv50_head_destroy(crtc);
-	return ret;
-}
-
-/******************************************************************************
- * Output path helpers
- *****************************************************************************/
-static void
-nv50_outp_release(struct nouveau_encoder *nv_encoder)
-{
-	struct nv50_disp *disp = nv50_disp(nv_encoder->base.base.dev);
-	struct {
-		struct nv50_disp_mthd_v1 base;
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_RELEASE,
-		.base.hasht  = nv_encoder->dcb->hasht,
-		.base.hashm  = nv_encoder->dcb->hashm,
-	};
-
-	nvif_mthd(disp->disp, 0, &args, sizeof(args));
-	nv_encoder->or = -1;
-	nv_encoder->link = 0;
-}
-
-static int
-nv50_outp_acquire(struct nouveau_encoder *nv_encoder)
-{
-	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nv50_disp *disp = nv50_disp(drm->dev);
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_acquire_v0 info;
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_ACQUIRE,
-		.base.hasht  = nv_encoder->dcb->hasht,
-		.base.hashm  = nv_encoder->dcb->hashm,
-	};
-	int ret;
-
-	ret = nvif_mthd(disp->disp, 0, &args, sizeof(args));
-	if (ret) {
-		NV_ERROR(drm, "error acquiring output path: %d\n", ret);
-		return ret;
-	}
-
-	nv_encoder->or = args.info.or;
-	nv_encoder->link = args.info.link;
-	return 0;
-}
-
-static int
-nv50_outp_atomic_check_view(struct drm_encoder *encoder,
-			    struct drm_crtc_state *crtc_state,
-			    struct drm_connector_state *conn_state,
-			    struct drm_display_mode *native_mode)
-{
-	struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode;
-	struct drm_display_mode *mode = &crtc_state->mode;
-	struct drm_connector *connector = conn_state->connector;
-	struct nouveau_conn_atom *asyc = nouveau_conn_atom(conn_state);
-	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
-
-	NV_ATOMIC(drm, "%s atomic_check\n", encoder->name);
-	asyc->scaler.full = false;
-	if (!native_mode)
-		return 0;
-
-	if (asyc->scaler.mode == DRM_MODE_SCALE_NONE) {
-		switch (connector->connector_type) {
-		case DRM_MODE_CONNECTOR_LVDS:
-		case DRM_MODE_CONNECTOR_eDP:
-			/* Force use of scaler for non-EDID modes. */
-			if (adjusted_mode->type & DRM_MODE_TYPE_DRIVER)
-				break;
-			mode = native_mode;
-			asyc->scaler.full = true;
-			break;
-		default:
-			break;
-		}
-	} else {
-		mode = native_mode;
-	}
-
-	if (!drm_mode_equal(adjusted_mode, mode)) {
-		drm_mode_copy(adjusted_mode, mode);
-		crtc_state->mode_changed = true;
-	}
-
-	return 0;
-}
-
-static int
-nv50_outp_atomic_check(struct drm_encoder *encoder,
-		       struct drm_crtc_state *crtc_state,
-		       struct drm_connector_state *conn_state)
-{
-	struct nouveau_connector *nv_connector =
-		nouveau_connector(conn_state->connector);
-	return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
-					   nv_connector->native_mode);
-}
-
-/******************************************************************************
- * DAC
- *****************************************************************************/
-static void
-nv50_dac_disable(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_mast *mast = nv50_mast(encoder->dev);
-	const int or = nv_encoder->or;
-	u32 *push;
-
-	if (nv_encoder->crtc) {
-		push = evo_wait(mast, 4);
-		if (push) {
-			if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-				evo_mthd(push, 0x0400 + (or * 0x080), 1);
-				evo_data(push, 0x00000000);
-			} else {
-				evo_mthd(push, 0x0180 + (or * 0x020), 1);
-				evo_data(push, 0x00000000);
-			}
-			evo_kick(push, mast);
-		}
-	}
-
-	nv_encoder->crtc = NULL;
-	nv50_outp_release(nv_encoder);
-}
-
-static void
-nv50_dac_enable(struct drm_encoder *encoder)
-{
-	struct nv50_mast *mast = nv50_mast(encoder->dev);
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
-	struct drm_display_mode *mode = &nv_crtc->base.state->adjusted_mode;
-	u32 *push;
-
-	nv50_outp_acquire(nv_encoder);
-
-	push = evo_wait(mast, 8);
-	if (push) {
-		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-			u32 syncs = 0x00000000;
-
-			if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-				syncs |= 0x00000001;
-			if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-				syncs |= 0x00000002;
-
-			evo_mthd(push, 0x0400 + (nv_encoder->or * 0x080), 2);
-			evo_data(push, 1 << nv_crtc->index);
-			evo_data(push, syncs);
-		} else {
-			u32 magic = 0x31ec6000 | (nv_crtc->index << 25);
-			u32 syncs = 0x00000001;
-
-			if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-				syncs |= 0x00000008;
-			if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-				syncs |= 0x00000010;
-
-			if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-				magic |= 0x00000001;
-
-			evo_mthd(push, 0x0404 + (nv_crtc->index * 0x300), 2);
-			evo_data(push, syncs);
-			evo_data(push, magic);
-			evo_mthd(push, 0x0180 + (nv_encoder->or * 0x020), 1);
-			evo_data(push, 1 << nv_crtc->index);
-		}
-
-		evo_kick(push, mast);
-	}
-
-	nv_encoder->crtc = encoder->crtc;
-}
-
-static enum drm_connector_status
-nv50_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_disp *disp = nv50_disp(encoder->dev);
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_dac_load_v0 load;
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_DAC_LOAD,
-		.base.hasht  = nv_encoder->dcb->hasht,
-		.base.hashm  = nv_encoder->dcb->hashm,
-	};
-	int ret;
-
-	args.load.data = nouveau_drm(encoder->dev)->vbios.dactestval;
-	if (args.load.data == 0)
-		args.load.data = 340;
-
-	ret = nvif_mthd(disp->disp, 0, &args, sizeof(args));
-	if (ret || !args.load.load)
-		return connector_status_disconnected;
-
-	return connector_status_connected;
-}
-
-static const struct drm_encoder_helper_funcs
-nv50_dac_help = {
-	.atomic_check = nv50_outp_atomic_check,
-	.enable = nv50_dac_enable,
-	.disable = nv50_dac_disable,
-	.detect = nv50_dac_detect
-};
-
-static void
-nv50_dac_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-	kfree(encoder);
-}
-
-static const struct drm_encoder_funcs
-nv50_dac_func = {
-	.destroy = nv50_dac_destroy,
-};
-
-static int
-nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
-{
-	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
-	struct nvkm_i2c_bus *bus;
-	struct nouveau_encoder *nv_encoder;
-	struct drm_encoder *encoder;
-	int type = DRM_MODE_ENCODER_DAC;
-
-	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
-	if (!nv_encoder)
-		return -ENOMEM;
-	nv_encoder->dcb = dcbe;
-
-	bus = nvkm_i2c_bus_find(i2c, dcbe->i2c_index);
-	if (bus)
-		nv_encoder->i2c = &bus->i2c;
-
-	encoder = to_drm_encoder(nv_encoder);
-	encoder->possible_crtcs = dcbe->heads;
-	encoder->possible_clones = 0;
-	drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type,
-			 "dac-%04x-%04x", dcbe->hasht, dcbe->hashm);
-	drm_encoder_helper_add(encoder, &nv50_dac_help);
-
-	drm_mode_connector_attach_encoder(connector, encoder);
-	return 0;
-}
-
-/******************************************************************************
- * Audio
- *****************************************************************************/
-static void
-nv50_audio_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_disp *disp = nv50_disp(encoder->dev);
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_hda_eld_v0 eld;
-	} args = {
-		.base.version = 1,
-		.base.method  = NV50_DISP_MTHD_V1_SOR_HDA_ELD,
-		.base.hasht   = nv_encoder->dcb->hasht,
-		.base.hashm   = (0xf0ff & nv_encoder->dcb->hashm) |
-				(0x0100 << nv_crtc->index),
-	};
-
-	nvif_mthd(disp->disp, 0, &args, sizeof(args));
-}
-
-static void
-nv50_audio_enable(struct drm_encoder *encoder, struct drm_display_mode *mode)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
-	struct nouveau_connector *nv_connector;
-	struct nv50_disp *disp = nv50_disp(encoder->dev);
-	struct __packed {
-		struct {
-			struct nv50_disp_mthd_v1 mthd;
-			struct nv50_disp_sor_hda_eld_v0 eld;
-		} base;
-		u8 data[sizeof(nv_connector->base.eld)];
-	} args = {
-		.base.mthd.version = 1,
-		.base.mthd.method  = NV50_DISP_MTHD_V1_SOR_HDA_ELD,
-		.base.mthd.hasht   = nv_encoder->dcb->hasht,
-		.base.mthd.hashm   = (0xf0ff & nv_encoder->dcb->hashm) |
-				     (0x0100 << nv_crtc->index),
-	};
-
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	if (!drm_detect_monitor_audio(nv_connector->edid))
-		return;
-
-	memcpy(args.data, nv_connector->base.eld, sizeof(args.data));
-
-	nvif_mthd(disp->disp, 0, &args,
-		  sizeof(args.base) + drm_eld_size(args.data));
-}
-
-/******************************************************************************
- * HDMI
- *****************************************************************************/
-static void
-nv50_hdmi_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_disp *disp = nv50_disp(encoder->dev);
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_hdmi_pwr_v0 pwr;
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_SOR_HDMI_PWR,
-		.base.hasht  = nv_encoder->dcb->hasht,
-		.base.hashm  = (0xf0ff & nv_encoder->dcb->hashm) |
-			       (0x0100 << nv_crtc->index),
-	};
-
-	nvif_mthd(disp->disp, 0, &args, sizeof(args));
-}
-
-static void
-nv50_hdmi_enable(struct drm_encoder *encoder, struct drm_display_mode *mode)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
-	struct nv50_disp *disp = nv50_disp(encoder->dev);
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_hdmi_pwr_v0 pwr;
-		u8 infoframes[2 * 17]; /* two frames, up to 17 bytes each */
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_SOR_HDMI_PWR,
-		.base.hasht  = nv_encoder->dcb->hasht,
-		.base.hashm  = (0xf0ff & nv_encoder->dcb->hashm) |
-			       (0x0100 << nv_crtc->index),
-		.pwr.state = 1,
-		.pwr.rekey = 56, /* binary driver, and tegra, constant */
-	};
-	struct nouveau_connector *nv_connector;
-	u32 max_ac_packet;
-	union hdmi_infoframe avi_frame;
-	union hdmi_infoframe vendor_frame;
-	int ret;
-	int size;
-
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	if (!drm_detect_hdmi_monitor(nv_connector->edid))
-		return;
-
-	ret = drm_hdmi_avi_infoframe_from_display_mode(&avi_frame.avi, mode,
-						       false);
-	if (!ret) {
-		/* We have an AVI InfoFrame, populate it to the display */
-		args.pwr.avi_infoframe_length
-			= hdmi_infoframe_pack(&avi_frame, args.infoframes, 17);
-	}
-
-	ret = drm_hdmi_vendor_infoframe_from_display_mode(&vendor_frame.vendor.hdmi,
-							  &nv_connector->base, mode);
-	if (!ret) {
-		/* We have a Vendor InfoFrame, populate it to the display */
-		args.pwr.vendor_infoframe_length
-			= hdmi_infoframe_pack(&vendor_frame,
-					      args.infoframes
-					      + args.pwr.avi_infoframe_length,
-					      17);
-	}
-
-	max_ac_packet  = mode->htotal - mode->hdisplay;
-	max_ac_packet -= args.pwr.rekey;
-	max_ac_packet -= 18; /* constant from tegra */
-	args.pwr.max_ac_packet = max_ac_packet / 32;
-
-	size = sizeof(args.base)
-		+ sizeof(args.pwr)
-		+ args.pwr.avi_infoframe_length
-		+ args.pwr.vendor_infoframe_length;
-	nvif_mthd(disp->disp, 0, &args, size);
-	nv50_audio_enable(encoder, mode);
-}
-
-/******************************************************************************
- * MST
- *****************************************************************************/
-#define nv50_mstm(p) container_of((p), struct nv50_mstm, mgr)
-#define nv50_mstc(p) container_of((p), struct nv50_mstc, connector)
-#define nv50_msto(p) container_of((p), struct nv50_msto, encoder)
-
-struct nv50_mstm {
-	struct nouveau_encoder *outp;
-
-	struct drm_dp_mst_topology_mgr mgr;
-	struct nv50_msto *msto[4];
-
-	bool modified;
-	bool disabled;
-	int links;
-};
-
-struct nv50_mstc {
-	struct nv50_mstm *mstm;
-	struct drm_dp_mst_port *port;
-	struct drm_connector connector;
-
-	struct drm_display_mode *native;
-	struct edid *edid;
-
-	int pbn;
-};
-
-struct nv50_msto {
-	struct drm_encoder encoder;
-
-	struct nv50_head *head;
-	struct nv50_mstc *mstc;
-	bool disabled;
-};
-
-static struct drm_dp_payload *
-nv50_msto_payload(struct nv50_msto *msto)
-{
-	struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev);
-	struct nv50_mstc *mstc = msto->mstc;
-	struct nv50_mstm *mstm = mstc->mstm;
-	int vcpi = mstc->port->vcpi.vcpi, i;
-
-	NV_ATOMIC(drm, "%s: vcpi %d\n", msto->encoder.name, vcpi);
-	for (i = 0; i < mstm->mgr.max_payloads; i++) {
-		struct drm_dp_payload *payload = &mstm->mgr.payloads[i];
-		NV_ATOMIC(drm, "%s: %d: vcpi %d start 0x%02x slots 0x%02x\n",
-			  mstm->outp->base.base.name, i, payload->vcpi,
-			  payload->start_slot, payload->num_slots);
-	}
-
-	for (i = 0; i < mstm->mgr.max_payloads; i++) {
-		struct drm_dp_payload *payload = &mstm->mgr.payloads[i];
-		if (payload->vcpi == vcpi)
-			return payload;
-	}
-
-	return NULL;
-}
-
-static void
-nv50_msto_cleanup(struct nv50_msto *msto)
-{
-	struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev);
-	struct nv50_mstc *mstc = msto->mstc;
-	struct nv50_mstm *mstm = mstc->mstm;
-
-	NV_ATOMIC(drm, "%s: msto cleanup\n", msto->encoder.name);
-	if (mstc->port && mstc->port->vcpi.vcpi > 0 && !nv50_msto_payload(msto))
-		drm_dp_mst_deallocate_vcpi(&mstm->mgr, mstc->port);
-	if (msto->disabled) {
-		msto->mstc = NULL;
-		msto->head = NULL;
-		msto->disabled = false;
-	}
-}
-
-static void
-nv50_msto_prepare(struct nv50_msto *msto)
-{
-	struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev);
-	struct nv50_mstc *mstc = msto->mstc;
-	struct nv50_mstm *mstm = mstc->mstm;
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_dp_mst_vcpi_v0 vcpi;
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_SOR_DP_MST_VCPI,
-		.base.hasht  = mstm->outp->dcb->hasht,
-		.base.hashm  = (0xf0ff & mstm->outp->dcb->hashm) |
-			       (0x0100 << msto->head->base.index),
-	};
-
-	NV_ATOMIC(drm, "%s: msto prepare\n", msto->encoder.name);
-	if (mstc->port && mstc->port->vcpi.vcpi > 0) {
-		struct drm_dp_payload *payload = nv50_msto_payload(msto);
-		if (payload) {
-			args.vcpi.start_slot = payload->start_slot;
-			args.vcpi.num_slots = payload->num_slots;
-			args.vcpi.pbn = mstc->port->vcpi.pbn;
-			args.vcpi.aligned_pbn = mstc->port->vcpi.aligned_pbn;
-		}
-	}
-
-	NV_ATOMIC(drm, "%s: %s: %02x %02x %04x %04x\n",
-		  msto->encoder.name, msto->head->base.base.name,
-		  args.vcpi.start_slot, args.vcpi.num_slots,
-		  args.vcpi.pbn, args.vcpi.aligned_pbn);
-	nvif_mthd(&drm->display->disp, 0, &args, sizeof(args));
-}
-
-static int
-nv50_msto_atomic_check(struct drm_encoder *encoder,
-		       struct drm_crtc_state *crtc_state,
-		       struct drm_connector_state *conn_state)
-{
-	struct nv50_mstc *mstc = nv50_mstc(conn_state->connector);
-	struct nv50_mstm *mstm = mstc->mstm;
-	int bpp = conn_state->connector->display_info.bpc * 3;
-	int slots;
-
-	mstc->pbn = drm_dp_calc_pbn_mode(crtc_state->adjusted_mode.clock, bpp);
-
-	slots = drm_dp_find_vcpi_slots(&mstm->mgr, mstc->pbn);
-	if (slots < 0)
-		return slots;
-
-	return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
-					   mstc->native);
-}
-
-static void
-nv50_msto_enable(struct drm_encoder *encoder)
-{
-	struct nv50_head *head = nv50_head(encoder->crtc);
-	struct nv50_msto *msto = nv50_msto(encoder);
-	struct nv50_mstc *mstc = NULL;
-	struct nv50_mstm *mstm = NULL;
-	struct drm_connector *connector;
-	struct drm_connector_list_iter conn_iter;
-	u8 proto, depth;
-	int slots;
-	bool r;
-
-	drm_connector_list_iter_begin(encoder->dev, &conn_iter);
-	drm_for_each_connector_iter(connector, &conn_iter) {
-		if (connector->state->best_encoder == &msto->encoder) {
-			mstc = nv50_mstc(connector);
-			mstm = mstc->mstm;
-			break;
-		}
-	}
-	drm_connector_list_iter_end(&conn_iter);
-
-	if (WARN_ON(!mstc))
-		return;
-
-	slots = drm_dp_find_vcpi_slots(&mstm->mgr, mstc->pbn);
-	r = drm_dp_mst_allocate_vcpi(&mstm->mgr, mstc->port, mstc->pbn, slots);
-	WARN_ON(!r);
-
-	if (!mstm->links++)
-		nv50_outp_acquire(mstm->outp);
-
-	if (mstm->outp->link & 1)
-		proto = 0x8;
-	else
-		proto = 0x9;
-
-	switch (mstc->connector.display_info.bpc) {
-	case  6: depth = 0x2; break;
-	case  8: depth = 0x5; break;
-	case 10:
-	default: depth = 0x6; break;
-	}
-
-	mstm->outp->update(mstm->outp, head->base.index,
-			   &head->base.base.state->adjusted_mode, proto, depth);
-
-	msto->head = head;
-	msto->mstc = mstc;
-	mstm->modified = true;
-}
-
-static void
-nv50_msto_disable(struct drm_encoder *encoder)
-{
-	struct nv50_msto *msto = nv50_msto(encoder);
-	struct nv50_mstc *mstc = msto->mstc;
-	struct nv50_mstm *mstm = mstc->mstm;
-
-	if (mstc->port)
-		drm_dp_mst_reset_vcpi_slots(&mstm->mgr, mstc->port);
-
-	mstm->outp->update(mstm->outp, msto->head->base.index, NULL, 0, 0);
-	mstm->modified = true;
-	if (!--mstm->links)
-		mstm->disabled = true;
-	msto->disabled = true;
-}
-
-static const struct drm_encoder_helper_funcs
-nv50_msto_help = {
-	.disable = nv50_msto_disable,
-	.enable = nv50_msto_enable,
-	.atomic_check = nv50_msto_atomic_check,
-};
-
-static void
-nv50_msto_destroy(struct drm_encoder *encoder)
-{
-	struct nv50_msto *msto = nv50_msto(encoder);
-	drm_encoder_cleanup(&msto->encoder);
-	kfree(msto);
-}
-
-static const struct drm_encoder_funcs
-nv50_msto = {
-	.destroy = nv50_msto_destroy,
-};
-
-static int
-nv50_msto_new(struct drm_device *dev, u32 heads, const char *name, int id,
-	      struct nv50_msto **pmsto)
-{
-	struct nv50_msto *msto;
-	int ret;
-
-	if (!(msto = *pmsto = kzalloc(sizeof(*msto), GFP_KERNEL)))
-		return -ENOMEM;
-
-	ret = drm_encoder_init(dev, &msto->encoder, &nv50_msto,
-			       DRM_MODE_ENCODER_DPMST, "%s-mst-%d", name, id);
-	if (ret) {
-		kfree(*pmsto);
-		*pmsto = NULL;
-		return ret;
-	}
-
-	drm_encoder_helper_add(&msto->encoder, &nv50_msto_help);
-	msto->encoder.possible_crtcs = heads;
-	return 0;
-}
-
-static struct drm_encoder *
-nv50_mstc_atomic_best_encoder(struct drm_connector *connector,
-			      struct drm_connector_state *connector_state)
-{
-	struct nv50_head *head = nv50_head(connector_state->crtc);
-	struct nv50_mstc *mstc = nv50_mstc(connector);
-	if (mstc->port) {
-		struct nv50_mstm *mstm = mstc->mstm;
-		return &mstm->msto[head->base.index]->encoder;
-	}
-	return NULL;
-}
-
-static struct drm_encoder *
-nv50_mstc_best_encoder(struct drm_connector *connector)
-{
-	struct nv50_mstc *mstc = nv50_mstc(connector);
-	if (mstc->port) {
-		struct nv50_mstm *mstm = mstc->mstm;
-		return &mstm->msto[0]->encoder;
-	}
-	return NULL;
-}
-
-static enum drm_mode_status
-nv50_mstc_mode_valid(struct drm_connector *connector,
-		     struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
-static int
-nv50_mstc_get_modes(struct drm_connector *connector)
-{
-	struct nv50_mstc *mstc = nv50_mstc(connector);
-	int ret = 0;
-
-	mstc->edid = drm_dp_mst_get_edid(&mstc->connector, mstc->port->mgr, mstc->port);
-	drm_mode_connector_update_edid_property(&mstc->connector, mstc->edid);
-	if (mstc->edid)
-		ret = drm_add_edid_modes(&mstc->connector, mstc->edid);
-
-	if (!mstc->connector.display_info.bpc)
-		mstc->connector.display_info.bpc = 8;
-
-	if (mstc->native)
-		drm_mode_destroy(mstc->connector.dev, mstc->native);
-	mstc->native = nouveau_conn_native_mode(&mstc->connector);
-	return ret;
-}
-
-static const struct drm_connector_helper_funcs
-nv50_mstc_help = {
-	.get_modes = nv50_mstc_get_modes,
-	.mode_valid = nv50_mstc_mode_valid,
-	.best_encoder = nv50_mstc_best_encoder,
-	.atomic_best_encoder = nv50_mstc_atomic_best_encoder,
-};
-
-static enum drm_connector_status
-nv50_mstc_detect(struct drm_connector *connector, bool force)
-{
-	struct nv50_mstc *mstc = nv50_mstc(connector);
-	if (!mstc->port)
-		return connector_status_disconnected;
-	return drm_dp_mst_detect_port(connector, mstc->port->mgr, mstc->port);
-}
-
-static void
-nv50_mstc_destroy(struct drm_connector *connector)
-{
-	struct nv50_mstc *mstc = nv50_mstc(connector);
-	drm_connector_cleanup(&mstc->connector);
-	kfree(mstc);
-}
-
-static const struct drm_connector_funcs
-nv50_mstc = {
-	.reset = nouveau_conn_reset,
-	.detect = nv50_mstc_detect,
-	.fill_modes = drm_helper_probe_single_connector_modes,
-	.destroy = nv50_mstc_destroy,
-	.atomic_duplicate_state = nouveau_conn_atomic_duplicate_state,
-	.atomic_destroy_state = nouveau_conn_atomic_destroy_state,
-	.atomic_set_property = nouveau_conn_atomic_set_property,
-	.atomic_get_property = nouveau_conn_atomic_get_property,
-};
-
-static int
-nv50_mstc_new(struct nv50_mstm *mstm, struct drm_dp_mst_port *port,
-	      const char *path, struct nv50_mstc **pmstc)
-{
-	struct drm_device *dev = mstm->outp->base.base.dev;
-	struct nv50_mstc *mstc;
-	int ret, i;
-
-	if (!(mstc = *pmstc = kzalloc(sizeof(*mstc), GFP_KERNEL)))
-		return -ENOMEM;
-	mstc->mstm = mstm;
-	mstc->port = port;
-
-	ret = drm_connector_init(dev, &mstc->connector, &nv50_mstc,
-				 DRM_MODE_CONNECTOR_DisplayPort);
-	if (ret) {
-		kfree(*pmstc);
-		*pmstc = NULL;
-		return ret;
-	}
-
-	drm_connector_helper_add(&mstc->connector, &nv50_mstc_help);
-
-	mstc->connector.funcs->reset(&mstc->connector);
-	nouveau_conn_attach_properties(&mstc->connector);
-
-	for (i = 0; i < ARRAY_SIZE(mstm->msto) && mstm->msto[i]; i++)
-		drm_mode_connector_attach_encoder(&mstc->connector, &mstm->msto[i]->encoder);
-
-	drm_object_attach_property(&mstc->connector.base, dev->mode_config.path_property, 0);
-	drm_object_attach_property(&mstc->connector.base, dev->mode_config.tile_property, 0);
-	drm_mode_connector_set_path_property(&mstc->connector, path);
-	return 0;
-}
-
-static void
-nv50_mstm_cleanup(struct nv50_mstm *mstm)
-{
-	struct nouveau_drm *drm = nouveau_drm(mstm->outp->base.base.dev);
-	struct drm_encoder *encoder;
-	int ret;
-
-	NV_ATOMIC(drm, "%s: mstm cleanup\n", mstm->outp->base.base.name);
-	ret = drm_dp_check_act_status(&mstm->mgr);
-
-	ret = drm_dp_update_payload_part2(&mstm->mgr);
-
-	drm_for_each_encoder(encoder, mstm->outp->base.base.dev) {
-		if (encoder->encoder_type == DRM_MODE_ENCODER_DPMST) {
-			struct nv50_msto *msto = nv50_msto(encoder);
-			struct nv50_mstc *mstc = msto->mstc;
-			if (mstc && mstc->mstm == mstm)
-				nv50_msto_cleanup(msto);
-		}
-	}
-
-	mstm->modified = false;
-}
-
-static void
-nv50_mstm_prepare(struct nv50_mstm *mstm)
-{
-	struct nouveau_drm *drm = nouveau_drm(mstm->outp->base.base.dev);
-	struct drm_encoder *encoder;
-	int ret;
-
-	NV_ATOMIC(drm, "%s: mstm prepare\n", mstm->outp->base.base.name);
-	ret = drm_dp_update_payload_part1(&mstm->mgr);
-
-	drm_for_each_encoder(encoder, mstm->outp->base.base.dev) {
-		if (encoder->encoder_type == DRM_MODE_ENCODER_DPMST) {
-			struct nv50_msto *msto = nv50_msto(encoder);
-			struct nv50_mstc *mstc = msto->mstc;
-			if (mstc && mstc->mstm == mstm)
-				nv50_msto_prepare(msto);
-		}
-	}
-
-	if (mstm->disabled) {
-		if (!mstm->links)
-			nv50_outp_release(mstm->outp);
-		mstm->disabled = false;
-	}
-}
-
-static void
-nv50_mstm_hotplug(struct drm_dp_mst_topology_mgr *mgr)
-{
-	struct nv50_mstm *mstm = nv50_mstm(mgr);
-	drm_kms_helper_hotplug_event(mstm->outp->base.base.dev);
-}
-
-static void
-nv50_mstm_destroy_connector(struct drm_dp_mst_topology_mgr *mgr,
-			    struct drm_connector *connector)
-{
-	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nv50_mstc *mstc = nv50_mstc(connector);
-
-	drm_connector_unregister(&mstc->connector);
-
-	drm_fb_helper_remove_one_connector(&drm->fbcon->helper, &mstc->connector);
-
-	drm_modeset_lock(&drm->dev->mode_config.connection_mutex, NULL);
-	mstc->port = NULL;
-	drm_modeset_unlock(&drm->dev->mode_config.connection_mutex);
-
-	drm_connector_unreference(&mstc->connector);
-}
-
-static void
-nv50_mstm_register_connector(struct drm_connector *connector)
-{
-	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-
-	drm_fb_helper_add_one_connector(&drm->fbcon->helper, connector);
-
-	drm_connector_register(connector);
-}
-
-static struct drm_connector *
-nv50_mstm_add_connector(struct drm_dp_mst_topology_mgr *mgr,
-			struct drm_dp_mst_port *port, const char *path)
-{
-	struct nv50_mstm *mstm = nv50_mstm(mgr);
-	struct nv50_mstc *mstc;
-	int ret;
-
-	ret = nv50_mstc_new(mstm, port, path, &mstc);
-	if (ret) {
-		if (mstc)
-			mstc->connector.funcs->destroy(&mstc->connector);
-		return NULL;
-	}
-
-	return &mstc->connector;
-}
-
-static const struct drm_dp_mst_topology_cbs
-nv50_mstm = {
-	.add_connector = nv50_mstm_add_connector,
-	.register_connector = nv50_mstm_register_connector,
-	.destroy_connector = nv50_mstm_destroy_connector,
-	.hotplug = nv50_mstm_hotplug,
-};
-
-void
-nv50_mstm_service(struct nv50_mstm *mstm)
-{
-	struct drm_dp_aux *aux = mstm ? mstm->mgr.aux : NULL;
-	bool handled = true;
-	int ret;
-	u8 esi[8] = {};
-
-	if (!aux)
-		return;
-
-	while (handled) {
-		ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT_ESI, esi, 8);
-		if (ret != 8) {
-			drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false);
-			return;
-		}
-
-		drm_dp_mst_hpd_irq(&mstm->mgr, esi, &handled);
-		if (!handled)
-			break;
-
-		drm_dp_dpcd_write(aux, DP_SINK_COUNT_ESI + 1, &esi[1], 3);
-	}
-}
-
-void
-nv50_mstm_remove(struct nv50_mstm *mstm)
-{
-	if (mstm)
-		drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false);
-}
-
-static int
-nv50_mstm_enable(struct nv50_mstm *mstm, u8 dpcd, int state)
-{
-	struct nouveau_encoder *outp = mstm->outp;
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_dp_mst_link_v0 mst;
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_SOR_DP_MST_LINK,
-		.base.hasht = outp->dcb->hasht,
-		.base.hashm = outp->dcb->hashm,
-		.mst.state = state,
-	};
-	struct nouveau_drm *drm = nouveau_drm(outp->base.base.dev);
-	struct nvif_object *disp = &drm->display->disp;
-	int ret;
-
-	if (dpcd >= 0x12) {
-		ret = drm_dp_dpcd_readb(mstm->mgr.aux, DP_MSTM_CTRL, &dpcd);
-		if (ret < 0)
-			return ret;
-
-		dpcd &= ~DP_MST_EN;
-		if (state)
-			dpcd |= DP_MST_EN;
-
-		ret = drm_dp_dpcd_writeb(mstm->mgr.aux, DP_MSTM_CTRL, dpcd);
-		if (ret < 0)
-			return ret;
-	}
-
-	return nvif_mthd(disp, 0, &args, sizeof(args));
-}
-
-int
-nv50_mstm_detect(struct nv50_mstm *mstm, u8 dpcd[8], int allow)
-{
-	int ret, state = 0;
-
-	if (!mstm)
-		return 0;
-
-	if (dpcd[0] >= 0x12) {
-		ret = drm_dp_dpcd_readb(mstm->mgr.aux, DP_MSTM_CAP, &dpcd[1]);
-		if (ret < 0)
-			return ret;
-
-		if (!(dpcd[1] & DP_MST_CAP))
-			dpcd[0] = 0x11;
-		else
-			state = allow;
-	}
-
-	ret = nv50_mstm_enable(mstm, dpcd[0], state);
-	if (ret)
-		return ret;
-
-	ret = drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, state);
-	if (ret)
-		return nv50_mstm_enable(mstm, dpcd[0], 0);
-
-	return mstm->mgr.mst_state;
-}
-
-static void
-nv50_mstm_fini(struct nv50_mstm *mstm)
-{
-	if (mstm && mstm->mgr.mst_state)
-		drm_dp_mst_topology_mgr_suspend(&mstm->mgr);
-}
-
-static void
-nv50_mstm_init(struct nv50_mstm *mstm)
-{
-	if (mstm && mstm->mgr.mst_state)
-		drm_dp_mst_topology_mgr_resume(&mstm->mgr);
-}
-
-static void
-nv50_mstm_del(struct nv50_mstm **pmstm)
-{
-	struct nv50_mstm *mstm = *pmstm;
-	if (mstm) {
-		kfree(*pmstm);
-		*pmstm = NULL;
-	}
-}
-
-static int
-nv50_mstm_new(struct nouveau_encoder *outp, struct drm_dp_aux *aux, int aux_max,
-	      int conn_base_id, struct nv50_mstm **pmstm)
-{
-	const int max_payloads = hweight8(outp->dcb->heads);
-	struct drm_device *dev = outp->base.base.dev;
-	struct nv50_mstm *mstm;
-	int ret, i;
-	u8 dpcd;
-
-	/* This is a workaround for some monitors not functioning
-	 * correctly in MST mode on initial module load.  I think
-	 * some bad interaction with the VBIOS may be responsible.
-	 *
-	 * A good ol' off and on again seems to work here ;)
-	 */
-	ret = drm_dp_dpcd_readb(aux, DP_DPCD_REV, &dpcd);
-	if (ret >= 0 && dpcd >= 0x12)
-		drm_dp_dpcd_writeb(aux, DP_MSTM_CTRL, 0);
-
-	if (!(mstm = *pmstm = kzalloc(sizeof(*mstm), GFP_KERNEL)))
-		return -ENOMEM;
-	mstm->outp = outp;
-	mstm->mgr.cbs = &nv50_mstm;
-
-	ret = drm_dp_mst_topology_mgr_init(&mstm->mgr, dev, aux, aux_max,
-					   max_payloads, conn_base_id);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < max_payloads; i++) {
-		ret = nv50_msto_new(dev, outp->dcb->heads, outp->base.base.name,
-				    i, &mstm->msto[i]);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-/******************************************************************************
- * SOR
- *****************************************************************************/
-static void
-nv50_sor_update(struct nouveau_encoder *nv_encoder, u8 head,
-		struct drm_display_mode *mode, u8 proto, u8 depth)
-{
-	struct nv50_dmac *core = &nv50_mast(nv_encoder->base.base.dev)->base;
-	u32 *push;
-
-	if (!mode) {
-		nv_encoder->ctrl &= ~BIT(head);
-		if (!(nv_encoder->ctrl & 0x0000000f))
-			nv_encoder->ctrl = 0;
-	} else {
-		nv_encoder->ctrl |= proto << 8;
-		nv_encoder->ctrl |= BIT(head);
-	}
-
-	if ((push = evo_wait(core, 6))) {
-		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
-			if (mode) {
-				if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-					nv_encoder->ctrl |= 0x00001000;
-				if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-					nv_encoder->ctrl |= 0x00002000;
-				nv_encoder->ctrl |= depth << 16;
-			}
-			evo_mthd(push, 0x0600 + (nv_encoder->or * 0x40), 1);
-		} else {
-			if (mode) {
-				u32 magic = 0x31ec6000 | (head << 25);
-				u32 syncs = 0x00000001;
-				if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-					syncs |= 0x00000008;
-				if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-					syncs |= 0x00000010;
-				if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-					magic |= 0x00000001;
-
-				evo_mthd(push, 0x0404 + (head * 0x300), 2);
-				evo_data(push, syncs | (depth << 6));
-				evo_data(push, magic);
-			}
-			evo_mthd(push, 0x0200 + (nv_encoder->or * 0x20), 1);
-		}
-		evo_data(push, nv_encoder->ctrl);
-		evo_kick(push, core);
-	}
-}
-
-static void
-nv50_sor_disable(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(nv_encoder->crtc);
-
-	nv_encoder->crtc = NULL;
-
-	if (nv_crtc) {
-		struct nvkm_i2c_aux *aux = nv_encoder->aux;
-		u8 pwr;
-
-		if (aux) {
-			int ret = nvkm_rdaux(aux, DP_SET_POWER, &pwr, 1);
-			if (ret == 0) {
-				pwr &= ~DP_SET_POWER_MASK;
-				pwr |=  DP_SET_POWER_D3;
-				nvkm_wraux(aux, DP_SET_POWER, &pwr, 1);
-			}
-		}
-
-		nv_encoder->update(nv_encoder, nv_crtc->index, NULL, 0, 0);
-		nv50_audio_disable(encoder, nv_crtc);
-		nv50_hdmi_disable(&nv_encoder->base.base, nv_crtc);
-		nv50_outp_release(nv_encoder);
-	}
-}
-
-static void
-nv50_sor_enable(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
-	struct drm_display_mode *mode = &nv_crtc->base.state->adjusted_mode;
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_lvds_script_v0 lvds;
-	} lvds = {
-		.base.version = 1,
-		.base.method  = NV50_DISP_MTHD_V1_SOR_LVDS_SCRIPT,
-		.base.hasht   = nv_encoder->dcb->hasht,
-		.base.hashm   = nv_encoder->dcb->hashm,
-	};
-	struct nv50_disp *disp = nv50_disp(encoder->dev);
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_connector *nv_connector;
-	struct nvbios *bios = &drm->vbios;
-	u8 proto = 0xf;
-	u8 depth = 0x0;
-
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	nv_encoder->crtc = encoder->crtc;
-	nv50_outp_acquire(nv_encoder);
-
-	switch (nv_encoder->dcb->type) {
-	case DCB_OUTPUT_TMDS:
-		if (nv_encoder->link & 1) {
-			proto = 0x1;
-			/* Only enable dual-link if:
-			 *  - Need to (i.e. rate > 165MHz)
-			 *  - DCB says we can
-			 *  - Not an HDMI monitor, since there's no dual-link
-			 *    on HDMI.
-			 */
-			if (mode->clock >= 165000 &&
-			    nv_encoder->dcb->duallink_possible &&
-			    !drm_detect_hdmi_monitor(nv_connector->edid))
-				proto |= 0x4;
-		} else {
-			proto = 0x2;
-		}
-
-		nv50_hdmi_enable(&nv_encoder->base.base, mode);
-		break;
-	case DCB_OUTPUT_LVDS:
-		proto = 0x0;
-
-		if (bios->fp_no_ddc) {
-			if (bios->fp.dual_link)
-				lvds.lvds.script |= 0x0100;
-			if (bios->fp.if_is_24bit)
-				lvds.lvds.script |= 0x0200;
-		} else {
-			if (nv_connector->type == DCB_CONNECTOR_LVDS_SPWG) {
-				if (((u8 *)nv_connector->edid)[121] == 2)
-					lvds.lvds.script |= 0x0100;
-			} else
-			if (mode->clock >= bios->fp.duallink_transition_clk) {
-				lvds.lvds.script |= 0x0100;
-			}
-
-			if (lvds.lvds.script & 0x0100) {
-				if (bios->fp.strapless_is_24bit & 2)
-					lvds.lvds.script |= 0x0200;
-			} else {
-				if (bios->fp.strapless_is_24bit & 1)
-					lvds.lvds.script |= 0x0200;
-			}
-
-			if (nv_connector->base.display_info.bpc == 8)
-				lvds.lvds.script |= 0x0200;
-		}
-
-		nvif_mthd(disp->disp, 0, &lvds, sizeof(lvds));
-		break;
-	case DCB_OUTPUT_DP:
-		if (nv_connector->base.display_info.bpc == 6)
-			depth = 0x2;
-		else
-		if (nv_connector->base.display_info.bpc == 8)
-			depth = 0x5;
-		else
-			depth = 0x6;
-
-		if (nv_encoder->link & 1)
-			proto = 0x8;
-		else
-			proto = 0x9;
-
-		nv50_audio_enable(encoder, mode);
-		break;
-	default:
-		BUG();
-		break;
-	}
-
-	nv_encoder->update(nv_encoder, nv_crtc->index, mode, proto, depth);
-}
-
-static const struct drm_encoder_helper_funcs
-nv50_sor_help = {
-	.atomic_check = nv50_outp_atomic_check,
-	.enable = nv50_sor_enable,
-	.disable = nv50_sor_disable,
-};
-
-static void
-nv50_sor_destroy(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	nv50_mstm_del(&nv_encoder->dp.mstm);
-	drm_encoder_cleanup(encoder);
-	kfree(encoder);
-}
-
-static const struct drm_encoder_funcs
-nv50_sor_func = {
-	.destroy = nv50_sor_destroy,
-};
-
-static int
-nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
-{
-	struct nouveau_connector *nv_connector = nouveau_connector(connector);
-	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
-	struct nouveau_encoder *nv_encoder;
-	struct drm_encoder *encoder;
-	int type, ret;
-
-	switch (dcbe->type) {
-	case DCB_OUTPUT_LVDS: type = DRM_MODE_ENCODER_LVDS; break;
-	case DCB_OUTPUT_TMDS:
-	case DCB_OUTPUT_DP:
-	default:
-		type = DRM_MODE_ENCODER_TMDS;
-		break;
-	}
-
-	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
-	if (!nv_encoder)
-		return -ENOMEM;
-	nv_encoder->dcb = dcbe;
-	nv_encoder->update = nv50_sor_update;
-
-	encoder = to_drm_encoder(nv_encoder);
-	encoder->possible_crtcs = dcbe->heads;
-	encoder->possible_clones = 0;
-	drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type,
-			 "sor-%04x-%04x", dcbe->hasht, dcbe->hashm);
-	drm_encoder_helper_add(encoder, &nv50_sor_help);
-
-	drm_mode_connector_attach_encoder(connector, encoder);
-
-	if (dcbe->type == DCB_OUTPUT_DP) {
-		struct nv50_disp *disp = nv50_disp(encoder->dev);
-		struct nvkm_i2c_aux *aux =
-			nvkm_i2c_aux_find(i2c, dcbe->i2c_index);
-		if (aux) {
-			if (disp->disp->oclass < GF110_DISP) {
-				/* HW has no support for address-only
-				 * transactions, so we're required to
-				 * use custom I2C-over-AUX code.
-				 */
-				nv_encoder->i2c = &aux->i2c;
-			} else {
-				nv_encoder->i2c = &nv_connector->aux.ddc;
-			}
-			nv_encoder->aux = aux;
-		}
-
-		/*TODO: Use DP Info Table to check for support. */
-		if (disp->disp->oclass >= GF110_DISP) {
-			ret = nv50_mstm_new(nv_encoder, &nv_connector->aux, 16,
-					    nv_connector->base.base.id,
-					    &nv_encoder->dp.mstm);
-			if (ret)
-				return ret;
-		}
-	} else {
-		struct nvkm_i2c_bus *bus =
-			nvkm_i2c_bus_find(i2c, dcbe->i2c_index);
-		if (bus)
-			nv_encoder->i2c = &bus->i2c;
-	}
-
-	return 0;
-}
-
-/******************************************************************************
- * PIOR
- *****************************************************************************/
-static int
-nv50_pior_atomic_check(struct drm_encoder *encoder,
-		       struct drm_crtc_state *crtc_state,
-		       struct drm_connector_state *conn_state)
-{
-	int ret = nv50_outp_atomic_check(encoder, crtc_state, conn_state);
-	if (ret)
-		return ret;
-	crtc_state->adjusted_mode.clock *= 2;
-	return 0;
-}
-
-static void
-nv50_pior_disable(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_mast *mast = nv50_mast(encoder->dev);
-	const int or = nv_encoder->or;
-	u32 *push;
-
-	if (nv_encoder->crtc) {
-		push = evo_wait(mast, 4);
-		if (push) {
-			if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-				evo_mthd(push, 0x0700 + (or * 0x040), 1);
-				evo_data(push, 0x00000000);
-			}
-			evo_kick(push, mast);
-		}
-	}
-
-	nv_encoder->crtc = NULL;
-	nv50_outp_release(nv_encoder);
-}
-
-static void
-nv50_pior_enable(struct drm_encoder *encoder)
-{
-	struct nv50_mast *mast = nv50_mast(encoder->dev);
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
-	struct nouveau_connector *nv_connector;
-	struct drm_display_mode *mode = &nv_crtc->base.state->adjusted_mode;
-	u8 owner = 1 << nv_crtc->index;
-	u8 proto, depth;
-	u32 *push;
-
-	nv50_outp_acquire(nv_encoder);
-
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	switch (nv_connector->base.display_info.bpc) {
-	case 10: depth = 0x6; break;
-	case  8: depth = 0x5; break;
-	case  6: depth = 0x2; break;
-	default: depth = 0x0; break;
-	}
-
-	switch (nv_encoder->dcb->type) {
-	case DCB_OUTPUT_TMDS:
-	case DCB_OUTPUT_DP:
-		proto = 0x0;
-		break;
-	default:
-		BUG();
-		break;
-	}
-
-	push = evo_wait(mast, 8);
-	if (push) {
-		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-			u32 ctrl = (depth << 16) | (proto << 8) | owner;
-			if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-				ctrl |= 0x00001000;
-			if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-				ctrl |= 0x00002000;
-			evo_mthd(push, 0x0700 + (nv_encoder->or * 0x040), 1);
-			evo_data(push, ctrl);
-		}
-
-		evo_kick(push, mast);
-	}
-
-	nv_encoder->crtc = encoder->crtc;
-}
-
-static const struct drm_encoder_helper_funcs
-nv50_pior_help = {
-	.atomic_check = nv50_pior_atomic_check,
-	.enable = nv50_pior_enable,
-	.disable = nv50_pior_disable,
-};
-
-static void
-nv50_pior_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-	kfree(encoder);
-}
-
-static const struct drm_encoder_funcs
-nv50_pior_func = {
-	.destroy = nv50_pior_destroy,
-};
-
-static int
-nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
-{
-	struct nouveau_connector *nv_connector = nouveau_connector(connector);
-	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
-	struct nvkm_i2c_bus *bus = NULL;
-	struct nvkm_i2c_aux *aux = NULL;
-	struct i2c_adapter *ddc;
-	struct nouveau_encoder *nv_encoder;
-	struct drm_encoder *encoder;
-	int type;
-
-	switch (dcbe->type) {
-	case DCB_OUTPUT_TMDS:
-		bus  = nvkm_i2c_bus_find(i2c, NVKM_I2C_BUS_EXT(dcbe->extdev));
-		ddc  = bus ? &bus->i2c : NULL;
-		type = DRM_MODE_ENCODER_TMDS;
-		break;
-	case DCB_OUTPUT_DP:
-		aux  = nvkm_i2c_aux_find(i2c, NVKM_I2C_AUX_EXT(dcbe->extdev));
-		ddc  = aux ? &nv_connector->aux.ddc : NULL;
-		type = DRM_MODE_ENCODER_TMDS;
-		break;
-	default:
-		return -ENODEV;
-	}
-
-	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
-	if (!nv_encoder)
-		return -ENOMEM;
-	nv_encoder->dcb = dcbe;
-	nv_encoder->i2c = ddc;
-	nv_encoder->aux = aux;
-
-	encoder = to_drm_encoder(nv_encoder);
-	encoder->possible_crtcs = dcbe->heads;
-	encoder->possible_clones = 0;
-	drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type,
-			 "pior-%04x-%04x", dcbe->hasht, dcbe->hashm);
-	drm_encoder_helper_add(encoder, &nv50_pior_help);
-
-	drm_mode_connector_attach_encoder(connector, encoder);
-	return 0;
-}
-
-/******************************************************************************
- * Atomic
- *****************************************************************************/
-
-static void
-nv50_disp_atomic_commit_core(struct nouveau_drm *drm, u32 interlock)
-{
-	struct nv50_disp *disp = nv50_disp(drm->dev);
-	struct nv50_dmac *core = &disp->mast.base;
-	struct nv50_mstm *mstm;
-	struct drm_encoder *encoder;
-	u32 *push;
-
-	NV_ATOMIC(drm, "commit core %08x\n", interlock);
-
-	drm_for_each_encoder(encoder, drm->dev) {
-		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
-			mstm = nouveau_encoder(encoder)->dp.mstm;
-			if (mstm && mstm->modified)
-				nv50_mstm_prepare(mstm);
-		}
-	}
-
-	if ((push = evo_wait(core, 5))) {
-		evo_mthd(push, 0x0084, 1);
-		evo_data(push, 0x80000000);
-		evo_mthd(push, 0x0080, 2);
-		evo_data(push, interlock);
-		evo_data(push, 0x00000000);
-		nouveau_bo_wr32(disp->sync, 0, 0x00000000);
-		evo_kick(push, core);
-		if (nvif_msec(&drm->client.device, 2000ULL,
-			if (nouveau_bo_rd32(disp->sync, 0))
-				break;
-			usleep_range(1, 2);
-		) < 0)
-			NV_ERROR(drm, "EVO timeout\n");
-	}
-
-	drm_for_each_encoder(encoder, drm->dev) {
-		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
-			mstm = nouveau_encoder(encoder)->dp.mstm;
-			if (mstm && mstm->modified)
-				nv50_mstm_cleanup(mstm);
-		}
-	}
-}
-
-static void
-nv50_disp_atomic_commit_tail(struct drm_atomic_state *state)
-{
-	struct drm_device *dev = state->dev;
-	struct drm_crtc_state *new_crtc_state, *old_crtc_state;
-	struct drm_crtc *crtc;
-	struct drm_plane_state *new_plane_state;
-	struct drm_plane *plane;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nv50_disp *disp = nv50_disp(dev);
-	struct nv50_atom *atom = nv50_atom(state);
-	struct nv50_outp_atom *outp, *outt;
-	u32 interlock_core = 0;
-	u32 interlock_chan = 0;
-	int i;
-
-	NV_ATOMIC(drm, "commit %d %d\n", atom->lock_core, atom->flush_disable);
-	drm_atomic_helper_wait_for_fences(dev, state, false);
-	drm_atomic_helper_wait_for_dependencies(state);
-	drm_atomic_helper_update_legacy_modeset_state(dev, state);
-
-	if (atom->lock_core)
-		mutex_lock(&disp->mutex);
-
-	/* Disable head(s). */
-	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
-		struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state);
-		struct nv50_head *head = nv50_head(crtc);
-
-		NV_ATOMIC(drm, "%s: clr %04x (set %04x)\n", crtc->name,
-			  asyh->clr.mask, asyh->set.mask);
-		if (old_crtc_state->active && !new_crtc_state->active)
-			drm_crtc_vblank_off(crtc);
-
-		if (asyh->clr.mask) {
-			nv50_head_flush_clr(head, asyh, atom->flush_disable);
-			interlock_core |= 1;
-		}
-	}
-
-	/* Disable plane(s). */
-	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
-		struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state);
-		struct nv50_wndw *wndw = nv50_wndw(plane);
-
-		NV_ATOMIC(drm, "%s: clr %02x (set %02x)\n", plane->name,
-			  asyw->clr.mask, asyw->set.mask);
-		if (!asyw->clr.mask)
-			continue;
-
-		interlock_chan |= nv50_wndw_flush_clr(wndw, interlock_core,
-						      atom->flush_disable,
-						      asyw);
-	}
-
-	/* Disable output path(s). */
-	list_for_each_entry(outp, &atom->outp, head) {
-		const struct drm_encoder_helper_funcs *help;
-		struct drm_encoder *encoder;
-
-		encoder = outp->encoder;
-		help = encoder->helper_private;
-
-		NV_ATOMIC(drm, "%s: clr %02x (set %02x)\n", encoder->name,
-			  outp->clr.mask, outp->set.mask);
-
-		if (outp->clr.mask) {
-			help->disable(encoder);
-			interlock_core |= 1;
-			if (outp->flush_disable) {
-				nv50_disp_atomic_commit_core(drm, interlock_chan);
-				interlock_core = 0;
-				interlock_chan = 0;
-			}
-		}
-	}
-
-	/* Flush disable. */
-	if (interlock_core) {
-		if (atom->flush_disable) {
-			nv50_disp_atomic_commit_core(drm, interlock_chan);
-			interlock_core = 0;
-			interlock_chan = 0;
-		}
-	}
-
-	/* Update output path(s). */
-	list_for_each_entry_safe(outp, outt, &atom->outp, head) {
-		const struct drm_encoder_helper_funcs *help;
-		struct drm_encoder *encoder;
-
-		encoder = outp->encoder;
-		help = encoder->helper_private;
-
-		NV_ATOMIC(drm, "%s: set %02x (clr %02x)\n", encoder->name,
-			  outp->set.mask, outp->clr.mask);
-
-		if (outp->set.mask) {
-			help->enable(encoder);
-			interlock_core = 1;
-		}
-
-		list_del(&outp->head);
-		kfree(outp);
-	}
-
-	/* Update head(s). */
-	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
-		struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state);
-		struct nv50_head *head = nv50_head(crtc);
-
-		NV_ATOMIC(drm, "%s: set %04x (clr %04x)\n", crtc->name,
-			  asyh->set.mask, asyh->clr.mask);
-
-		if (asyh->set.mask) {
-			nv50_head_flush_set(head, asyh);
-			interlock_core = 1;
-		}
-
-		if (new_crtc_state->active) {
-			if (!old_crtc_state->active)
-				drm_crtc_vblank_on(crtc);
-			if (new_crtc_state->event)
-				drm_crtc_vblank_get(crtc);
-		}
-	}
-
-	/* Update plane(s). */
-	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
-		struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state);
-		struct nv50_wndw *wndw = nv50_wndw(plane);
-
-		NV_ATOMIC(drm, "%s: set %02x (clr %02x)\n", plane->name,
-			  asyw->set.mask, asyw->clr.mask);
-		if ( !asyw->set.mask &&
-		    (!asyw->clr.mask || atom->flush_disable))
-			continue;
-
-		interlock_chan |= nv50_wndw_flush_set(wndw, interlock_core, asyw);
-	}
-
-	/* Flush update. */
-	if (interlock_core) {
-		if (!interlock_chan && atom->state.legacy_cursor_update) {
-			u32 *push = evo_wait(&disp->mast, 2);
-			if (push) {
-				evo_mthd(push, 0x0080, 1);
-				evo_data(push, 0x00000000);
-				evo_kick(push, &disp->mast);
-			}
-		} else {
-			nv50_disp_atomic_commit_core(drm, interlock_chan);
-		}
-	}
-
-	if (atom->lock_core)
-		mutex_unlock(&disp->mutex);
-
-	/* Wait for HW to signal completion. */
-	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
-		struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state);
-		struct nv50_wndw *wndw = nv50_wndw(plane);
-		int ret = nv50_wndw_wait_armed(wndw, asyw);
-		if (ret)
-			NV_ERROR(drm, "%s: timeout\n", plane->name);
-	}
-
-	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
-		if (new_crtc_state->event) {
-			unsigned long flags;
-			/* Get correct count/ts if racing with vblank irq */
-			if (new_crtc_state->active)
-				drm_crtc_accurate_vblank_count(crtc);
-			spin_lock_irqsave(&crtc->dev->event_lock, flags);
-			drm_crtc_send_vblank_event(crtc, new_crtc_state->event);
-			spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-
-			new_crtc_state->event = NULL;
-			if (new_crtc_state->active)
-				drm_crtc_vblank_put(crtc);
-		}
-	}
-
-	drm_atomic_helper_commit_hw_done(state);
-	drm_atomic_helper_cleanup_planes(dev, state);
-	drm_atomic_helper_commit_cleanup_done(state);
-	drm_atomic_state_put(state);
-}
-
-static void
-nv50_disp_atomic_commit_work(struct work_struct *work)
-{
-	struct drm_atomic_state *state =
-		container_of(work, typeof(*state), commit_work);
-	nv50_disp_atomic_commit_tail(state);
-}
-
-static int
-nv50_disp_atomic_commit(struct drm_device *dev,
-			struct drm_atomic_state *state, bool nonblock)
-{
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nv50_disp *disp = nv50_disp(dev);
-	struct drm_plane_state *new_plane_state;
-	struct drm_plane *plane;
-	struct drm_crtc *crtc;
-	bool active = false;
-	int ret, i;
-
-	ret = pm_runtime_get_sync(dev->dev);
-	if (ret < 0 && ret != -EACCES)
-		return ret;
-
-	ret = drm_atomic_helper_setup_commit(state, nonblock);
-	if (ret)
-		goto done;
-
-	INIT_WORK(&state->commit_work, nv50_disp_atomic_commit_work);
-
-	ret = drm_atomic_helper_prepare_planes(dev, state);
-	if (ret)
-		goto done;
-
-	if (!nonblock) {
-		ret = drm_atomic_helper_wait_for_fences(dev, state, true);
-		if (ret)
-			goto err_cleanup;
-	}
-
-	ret = drm_atomic_helper_swap_state(state, true);
-	if (ret)
-		goto err_cleanup;
-
-	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
-		struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state);
-		struct nv50_wndw *wndw = nv50_wndw(plane);
-
-		if (asyw->set.image) {
-			asyw->ntfy.handle = wndw->dmac->sync.handle;
-			asyw->ntfy.offset = wndw->ntfy;
-			asyw->ntfy.awaken = false;
-			asyw->set.ntfy = true;
-			nouveau_bo_wr32(disp->sync, wndw->ntfy / 4, 0x00000000);
-			wndw->ntfy ^= 0x10;
-		}
-	}
-
-	drm_atomic_state_get(state);
-
-	if (nonblock)
-		queue_work(system_unbound_wq, &state->commit_work);
-	else
-		nv50_disp_atomic_commit_tail(state);
-
-	drm_for_each_crtc(crtc, dev) {
-		if (crtc->state->enable) {
-			if (!drm->have_disp_power_ref) {
-				drm->have_disp_power_ref = true;
-				return 0;
-			}
-			active = true;
-			break;
-		}
-	}
-
-	if (!active && drm->have_disp_power_ref) {
-		pm_runtime_put_autosuspend(dev->dev);
-		drm->have_disp_power_ref = false;
-	}
-
-err_cleanup:
-	if (ret)
-		drm_atomic_helper_cleanup_planes(dev, state);
-done:
-	pm_runtime_put_autosuspend(dev->dev);
-	return ret;
-}
-
-static struct nv50_outp_atom *
-nv50_disp_outp_atomic_add(struct nv50_atom *atom, struct drm_encoder *encoder)
-{
-	struct nv50_outp_atom *outp;
-
-	list_for_each_entry(outp, &atom->outp, head) {
-		if (outp->encoder == encoder)
-			return outp;
-	}
-
-	outp = kzalloc(sizeof(*outp), GFP_KERNEL);
-	if (!outp)
-		return ERR_PTR(-ENOMEM);
-
-	list_add(&outp->head, &atom->outp);
-	outp->encoder = encoder;
-	return outp;
-}
-
-static int
-nv50_disp_outp_atomic_check_clr(struct nv50_atom *atom,
-				struct drm_connector_state *old_connector_state)
-{
-	struct drm_encoder *encoder = old_connector_state->best_encoder;
-	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
-	struct drm_crtc *crtc;
-	struct nv50_outp_atom *outp;
-
-	if (!(crtc = old_connector_state->crtc))
-		return 0;
-
-	old_crtc_state = drm_atomic_get_old_crtc_state(&atom->state, crtc);
-	new_crtc_state = drm_atomic_get_new_crtc_state(&atom->state, crtc);
-	if (old_crtc_state->active && drm_atomic_crtc_needs_modeset(new_crtc_state)) {
-		outp = nv50_disp_outp_atomic_add(atom, encoder);
-		if (IS_ERR(outp))
-			return PTR_ERR(outp);
-
-		if (outp->encoder->encoder_type == DRM_MODE_ENCODER_DPMST) {
-			outp->flush_disable = true;
-			atom->flush_disable = true;
-		}
-		outp->clr.ctrl = true;
-		atom->lock_core = true;
-	}
-
-	return 0;
-}
-
-static int
-nv50_disp_outp_atomic_check_set(struct nv50_atom *atom,
-				struct drm_connector_state *connector_state)
-{
-	struct drm_encoder *encoder = connector_state->best_encoder;
-	struct drm_crtc_state *new_crtc_state;
-	struct drm_crtc *crtc;
-	struct nv50_outp_atom *outp;
-
-	if (!(crtc = connector_state->crtc))
-		return 0;
-
-	new_crtc_state = drm_atomic_get_new_crtc_state(&atom->state, crtc);
-	if (new_crtc_state->active && drm_atomic_crtc_needs_modeset(new_crtc_state)) {
-		outp = nv50_disp_outp_atomic_add(atom, encoder);
-		if (IS_ERR(outp))
-			return PTR_ERR(outp);
-
-		outp->set.ctrl = true;
-		atom->lock_core = true;
-	}
-
-	return 0;
-}
-
-static int
-nv50_disp_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
-{
-	struct nv50_atom *atom = nv50_atom(state);
-	struct drm_connector_state *old_connector_state, *new_connector_state;
-	struct drm_connector *connector;
-	int ret, i;
-
-	ret = drm_atomic_helper_check(dev, state);
-	if (ret)
-		return ret;
-
-	for_each_oldnew_connector_in_state(state, connector, old_connector_state, new_connector_state, i) {
-		ret = nv50_disp_outp_atomic_check_clr(atom, old_connector_state);
-		if (ret)
-			return ret;
-
-		ret = nv50_disp_outp_atomic_check_set(atom, new_connector_state);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static void
-nv50_disp_atomic_state_clear(struct drm_atomic_state *state)
-{
-	struct nv50_atom *atom = nv50_atom(state);
-	struct nv50_outp_atom *outp, *outt;
-
-	list_for_each_entry_safe(outp, outt, &atom->outp, head) {
-		list_del(&outp->head);
-		kfree(outp);
-	}
-
-	drm_atomic_state_default_clear(state);
-}
-
-static void
-nv50_disp_atomic_state_free(struct drm_atomic_state *state)
-{
-	struct nv50_atom *atom = nv50_atom(state);
-	drm_atomic_state_default_release(&atom->state);
-	kfree(atom);
-}
-
-static struct drm_atomic_state *
-nv50_disp_atomic_state_alloc(struct drm_device *dev)
-{
-	struct nv50_atom *atom;
-	if (!(atom = kzalloc(sizeof(*atom), GFP_KERNEL)) ||
-	    drm_atomic_state_init(dev, &atom->state) < 0) {
-		kfree(atom);
-		return NULL;
-	}
-	INIT_LIST_HEAD(&atom->outp);
-	return &atom->state;
-}
-
-static const struct drm_mode_config_funcs
-nv50_disp_func = {
-	.fb_create = nouveau_user_framebuffer_create,
-	.output_poll_changed = drm_fb_helper_output_poll_changed,
-	.atomic_check = nv50_disp_atomic_check,
-	.atomic_commit = nv50_disp_atomic_commit,
-	.atomic_state_alloc = nv50_disp_atomic_state_alloc,
-	.atomic_state_clear = nv50_disp_atomic_state_clear,
-	.atomic_state_free = nv50_disp_atomic_state_free,
-};
-
-/******************************************************************************
- * Init
- *****************************************************************************/
-
-void
-nv50_display_fini(struct drm_device *dev)
-{
-	struct nouveau_encoder *nv_encoder;
-	struct drm_encoder *encoder;
-	struct drm_plane *plane;
-
-	drm_for_each_plane(plane, dev) {
-		struct nv50_wndw *wndw = nv50_wndw(plane);
-		if (plane->funcs != &nv50_wndw)
-			continue;
-		nv50_wndw_fini(wndw);
-	}
-
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
-			nv_encoder = nouveau_encoder(encoder);
-			nv50_mstm_fini(nv_encoder->dp.mstm);
-		}
-	}
-}
-
-int
-nv50_display_init(struct drm_device *dev)
-{
-	struct drm_encoder *encoder;
-	struct drm_plane *plane;
-	u32 *push;
-
-	push = evo_wait(nv50_mast(dev), 32);
-	if (!push)
-		return -EBUSY;
-
-	evo_mthd(push, 0x0088, 1);
-	evo_data(push, nv50_mast(dev)->base.sync.handle);
-	evo_kick(push, nv50_mast(dev));
-
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
-			struct nouveau_encoder *nv_encoder =
-				nouveau_encoder(encoder);
-			nv50_mstm_init(nv_encoder->dp.mstm);
-		}
-	}
-
-	drm_for_each_plane(plane, dev) {
-		struct nv50_wndw *wndw = nv50_wndw(plane);
-		if (plane->funcs != &nv50_wndw)
-			continue;
-		nv50_wndw_init(wndw);
-	}
-
-	return 0;
-}
-
-void
-nv50_display_destroy(struct drm_device *dev)
-{
-	struct nv50_disp *disp = nv50_disp(dev);
-
-	nv50_dmac_destroy(&disp->mast.base, disp->disp);
-
-	nouveau_bo_unmap(disp->sync);
-	if (disp->sync)
-		nouveau_bo_unpin(disp->sync);
-	nouveau_bo_ref(NULL, &disp->sync);
-
-	nouveau_display(dev)->priv = NULL;
-	kfree(disp);
-}
-
-MODULE_PARM_DESC(atomic, "Expose atomic ioctl (default: disabled)");
-static int nouveau_atomic = 0;
-module_param_named(atomic, nouveau_atomic, int, 0400);
-
-int
-nv50_display_create(struct drm_device *dev)
-{
-	struct nvif_device *device = &nouveau_drm(dev)->client.device;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct dcb_table *dcb = &drm->vbios.dcb;
-	struct drm_connector *connector, *tmp;
-	struct nv50_disp *disp;
-	struct dcb_output *dcbe;
-	int crtcs, ret, i;
-
-	disp = kzalloc(sizeof(*disp), GFP_KERNEL);
-	if (!disp)
-		return -ENOMEM;
-
-	mutex_init(&disp->mutex);
-
-	nouveau_display(dev)->priv = disp;
-	nouveau_display(dev)->dtor = nv50_display_destroy;
-	nouveau_display(dev)->init = nv50_display_init;
-	nouveau_display(dev)->fini = nv50_display_fini;
-	disp->disp = &nouveau_display(dev)->disp;
-	dev->mode_config.funcs = &nv50_disp_func;
-	dev->driver->driver_features |= DRIVER_PREFER_XBGR_30BPP;
-	if (nouveau_atomic)
-		dev->driver->driver_features |= DRIVER_ATOMIC;
-
-	/* small shared memory area we use for notifiers and semaphores */
-	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
-			     0, 0x0000, NULL, NULL, &disp->sync);
-	if (!ret) {
-		ret = nouveau_bo_pin(disp->sync, TTM_PL_FLAG_VRAM, true);
-		if (!ret) {
-			ret = nouveau_bo_map(disp->sync);
-			if (ret)
-				nouveau_bo_unpin(disp->sync);
-		}
-		if (ret)
-			nouveau_bo_ref(NULL, &disp->sync);
-	}
-
-	if (ret)
-		goto out;
-
-	/* allocate master evo channel */
-	ret = nv50_core_create(device, disp->disp, disp->sync->bo.offset,
-			      &disp->mast);
-	if (ret)
-		goto out;
-
-	/* create crtc objects to represent the hw heads */
-	if (disp->disp->oclass >= GF110_DISP)
-		crtcs = nvif_rd32(&device->object, 0x612004) & 0xf;
-	else
-		crtcs = 0x3;
-
-	for (i = 0; i < fls(crtcs); i++) {
-		if (!(crtcs & (1 << i)))
-			continue;
-		ret = nv50_head_create(dev, i);
-		if (ret)
-			goto out;
-	}
-
-	/* create encoder/connector objects based on VBIOS DCB table */
-	for (i = 0, dcbe = &dcb->entry[0]; i < dcb->entries; i++, dcbe++) {
-		connector = nouveau_connector_create(dev, dcbe->connector);
-		if (IS_ERR(connector))
-			continue;
-
-		if (dcbe->location == DCB_LOC_ON_CHIP) {
-			switch (dcbe->type) {
-			case DCB_OUTPUT_TMDS:
-			case DCB_OUTPUT_LVDS:
-			case DCB_OUTPUT_DP:
-				ret = nv50_sor_create(connector, dcbe);
-				break;
-			case DCB_OUTPUT_ANALOG:
-				ret = nv50_dac_create(connector, dcbe);
-				break;
-			default:
-				ret = -ENODEV;
-				break;
-			}
-		} else {
-			ret = nv50_pior_create(connector, dcbe);
-		}
-
-		if (ret) {
-			NV_WARN(drm, "failed to create encoder %d/%d/%d: %d\n",
-				     dcbe->location, dcbe->type,
-				     ffs(dcbe->or) - 1, ret);
-			ret = 0;
-		}
-	}
-
-	/* cull any connectors we created that don't have an encoder */
-	list_for_each_entry_safe(connector, tmp, &dev->mode_config.connector_list, head) {
-		if (connector->encoder_ids[0])
-			continue;
-
-		NV_WARN(drm, "%s has no encoders, removing\n",
-			connector->name);
-		connector->funcs->destroy(connector);
-	}
-
-out:
-	if (ret)
-		nv50_display_destroy(dev);
-	return ret;
-}
diff --git a/drivers/gpu/drm/nouveau/nv50_display.h b/drivers/gpu/drm/nouveau/nv50_display.h
index 918187c..fbd3b15 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.h
+++ b/drivers/gpu/drm/nouveau/nv50_display.h
@@ -28,7 +28,6 @@
 #define __NV50_DISPLAY_H__
 
 #include "nouveau_display.h"
-#include "nouveau_crtc.h"
 #include "nouveau_reg.h"
 
 int  nv50_display_create(struct drm_device *);
diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c
index a369d97..a00ecc3de 100644
--- a/drivers/gpu/drm/nouveau/nv50_fence.c
+++ b/drivers/gpu/drm/nouveau/nv50_fence.c
@@ -78,8 +78,6 @@
 	priv->base.resume = nv17_fence_resume;
 	priv->base.context_new = nv50_fence_context_new;
 	priv->base.context_del = nv10_fence_context_del;
-	priv->base.contexts = 127;
-	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 
 	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
index 5f0c0c2..0906648 100644
--- a/drivers/gpu/drm/nouveau/nv84_fence.c
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -141,9 +141,9 @@
 	struct nv84_fence_priv *priv = drm->fence;
 	int i;
 
-	priv->suspend = vmalloc(priv->base.contexts * sizeof(u32));
+	priv->suspend = vmalloc(drm->chan.nr * sizeof(u32));
 	if (priv->suspend) {
-		for (i = 0; i < priv->base.contexts; i++)
+		for (i = 0; i < drm->chan.nr; i++)
 			priv->suspend[i] = nouveau_bo_rd32(priv->bo, i*4);
 	}
 
@@ -157,7 +157,7 @@
 	int i;
 
 	if (priv->suspend) {
-		for (i = 0; i < priv->base.contexts; i++)
+		for (i = 0; i < drm->chan.nr; i++)
 			nouveau_bo_wr32(priv->bo, i*4, priv->suspend[i]);
 		vfree(priv->suspend);
 		priv->suspend = NULL;
@@ -179,7 +179,6 @@
 int
 nv84_fence_create(struct nouveau_drm *drm)
 {
-	struct nvkm_fifo *fifo = nvxx_fifo(&drm->client.device);
 	struct nv84_fence_priv *priv;
 	u32 domain;
 	int ret;
@@ -194,8 +193,6 @@
 	priv->base.context_new = nv84_fence_context_new;
 	priv->base.context_del = nv84_fence_context_del;
 
-	priv->base.contexts = fifo->nr;
-	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	priv->base.uevent = true;
 
 	mutex_init(&priv->mutex);
@@ -207,7 +204,7 @@
 			  * will lose CPU/GPU coherency!
 			  */
 			 TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED;
-	ret = nouveau_bo_new(&drm->client, 16 * priv->base.contexts, 0,
+	ret = nouveau_bo_new(&drm->client, 16 * drm->chan.nr, 0,
 			     domain, 0, 0, NULL, NULL, &priv->bo);
 	if (ret == 0) {
 		ret = nouveau_bo_pin(priv->bo, domain, false);
diff --git a/drivers/gpu/drm/nouveau/nvif/Kbuild b/drivers/gpu/drm/nouveau/nvif/Kbuild
index f1675a4..42e8c85 100644
--- a/drivers/gpu/drm/nouveau/nvif/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvif/Kbuild
@@ -1,8 +1,14 @@
 nvif-y := nvif/object.o
 nvif-y += nvif/client.o
 nvif-y += nvif/device.o
+nvif-y += nvif/disp.o
 nvif-y += nvif/driver.o
+nvif-y += nvif/fifo.o
 nvif-y += nvif/mem.o
 nvif-y += nvif/mmu.o
 nvif-y += nvif/notify.o
 nvif-y += nvif/vmm.o
+
+# Usermode classes
+nvif-y += nvif/user.o
+nvif-y += nvif/userc361.o
diff --git a/drivers/gpu/drm/nouveau/nvif/device.c b/drivers/gpu/drm/nouveau/nvif/device.c
index 252d8c3..1ec101b 100644
--- a/drivers/gpu/drm/nouveau/nvif/device.c
+++ b/drivers/gpu/drm/nouveau/nvif/device.c
@@ -37,6 +37,9 @@
 void
 nvif_device_fini(struct nvif_device *device)
 {
+	nvif_user_fini(device);
+	kfree(device->runlist);
+	device->runlist = NULL;
 	nvif_object_fini(&device->object);
 }
 
@@ -46,6 +49,8 @@
 {
 	int ret = nvif_object_init(parent, handle, oclass, data, size,
 				   &device->object);
+	device->runlist = NULL;
+	device->user.func = NULL;
 	if (ret == 0) {
 		device->info.version = 0;
 		ret = nvif_object_mthd(&device->object, NV_DEVICE_V0_INFO,
diff --git a/drivers/gpu/drm/nouveau/nvif/disp.c b/drivers/gpu/drm/nouveau/nvif/disp.c
new file mode 100644
index 0000000..18c7d06
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvif/disp.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <nvif/disp.h>
+#include <nvif/device.h>
+
+#include <nvif/class.h>
+
+void
+nvif_disp_dtor(struct nvif_disp *disp)
+{
+	nvif_object_fini(&disp->object);
+}
+
+int
+nvif_disp_ctor(struct nvif_device *device, s32 oclass, struct nvif_disp *disp)
+{
+	static const struct nvif_mclass disps[] = {
+		{ GV100_DISP, -1 },
+		{ GP102_DISP, -1 },
+		{ GP100_DISP, -1 },
+		{ GM200_DISP, -1 },
+		{ GM107_DISP, -1 },
+		{ GK110_DISP, -1 },
+		{ GK104_DISP, -1 },
+		{ GF110_DISP, -1 },
+		{ GT214_DISP, -1 },
+		{ GT206_DISP, -1 },
+		{ GT200_DISP, -1 },
+		{   G82_DISP, -1 },
+		{  NV50_DISP, -1 },
+		{  NV04_DISP, -1 },
+		{}
+	};
+	int cid = nvif_sclass(&device->object, disps, oclass);
+	disp->object.client = NULL;
+	if (cid < 0)
+		return cid;
+
+	return nvif_object_init(&device->object, 0, disps[cid].oclass,
+				NULL, 0, &disp->object);
+}
diff --git a/drivers/gpu/drm/nouveau/nvif/fifo.c b/drivers/gpu/drm/nouveau/nvif/fifo.c
new file mode 100644
index 0000000..99d4fd1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvif/fifo.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <nvif/fifo.h>
+
+static int
+nvif_fifo_runlists(struct nvif_device *device)
+{
+	struct nvif_object *object = &device->object;
+	struct {
+		struct nv_device_info_v1 m;
+		struct {
+			struct nv_device_info_v1_data runlists;
+			struct nv_device_info_v1_data runlist[64];
+		} v;
+	} *a;
+	int ret, i;
+
+	if (device->runlist)
+		return 0;
+
+	if (!(a = kmalloc(sizeof(*a), GFP_KERNEL)))
+		return -ENOMEM;
+	a->m.version = 1;
+	a->m.count = sizeof(a->v) / sizeof(a->v.runlists);
+	a->v.runlists.mthd = NV_DEVICE_FIFO_RUNLISTS;
+	for (i = 0; i < ARRAY_SIZE(a->v.runlist); i++)
+		a->v.runlist[i].mthd = NV_DEVICE_FIFO_RUNLIST_ENGINES(i);
+
+	ret = nvif_object_mthd(object, NV_DEVICE_V0_INFO, a, sizeof(*a));
+	if (ret)
+		goto done;
+
+	device->runlists = fls64(a->v.runlists.data);
+	device->runlist = kzalloc(sizeof(*device->runlist) *
+				  device->runlists, GFP_KERNEL);
+	if (!device->runlist) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	for (i = 0; i < device->runlists; i++) {
+		if (a->v.runlists.data & BIT_ULL(i))
+			device->runlist[i].engines = a->v.runlist[i].data;
+	}
+
+done:
+	kfree(a);
+	return ret;
+}
+
+u64
+nvif_fifo_runlist(struct nvif_device *device, u64 engine)
+{
+	struct nvif_object *object = &device->object;
+	struct {
+		struct nv_device_info_v1 m;
+		struct {
+			struct nv_device_info_v1_data engine;
+		} v;
+	} a = {
+		.m.version = 1,
+		.m.count = sizeof(a.v) / sizeof(a.v.engine),
+		.v.engine.mthd = engine,
+	};
+	u64 runm = 0;
+	int ret, i;
+
+	if ((ret = nvif_fifo_runlists(device)))
+		return runm;
+
+	ret = nvif_object_mthd(object, NV_DEVICE_V0_INFO, &a, sizeof(a));
+	if (ret == 0) {
+		for (i = 0; i < device->runlists; i++) {
+			if (device->runlist[i].engines & a.v.engine.data)
+				runm |= BIT_ULL(i);
+		}
+	}
+
+	return runm;
+}
diff --git a/drivers/gpu/drm/nouveau/nvif/mem.c b/drivers/gpu/drm/nouveau/nvif/mem.c
index 0f9382c..b6ebb3b 100644
--- a/drivers/gpu/drm/nouveau/nvif/mem.c
+++ b/drivers/gpu/drm/nouveau/nvif/mem.c
@@ -24,6 +24,19 @@
 
 #include <nvif/if000a.h>
 
+int
+nvif_mem_init_map(struct nvif_mmu *mmu, u8 type, u64 size, struct nvif_mem *mem)
+{
+	int ret = nvif_mem_init(mmu, mmu->mem, NVIF_MEM_MAPPABLE | type, 0,
+				size, NULL, 0, mem);
+	if (ret == 0) {
+		ret = nvif_object_map(&mem->object, NULL, 0);
+		if (ret)
+			nvif_mem_fini(mem);
+	}
+	return ret;
+}
+
 void
 nvif_mem_fini(struct nvif_mem *mem)
 {
diff --git a/drivers/gpu/drm/nouveau/nvif/mmu.c b/drivers/gpu/drm/nouveau/nvif/mmu.c
index 15d0dcb..358ac4f 100644
--- a/drivers/gpu/drm/nouveau/nvif/mmu.c
+++ b/drivers/gpu/drm/nouveau/nvif/mmu.c
@@ -36,6 +36,12 @@
 int
 nvif_mmu_init(struct nvif_object *parent, s32 oclass, struct nvif_mmu *mmu)
 {
+	static const struct nvif_mclass mems[] = {
+		{ NVIF_CLASS_MEM_GF100, -1 },
+		{ NVIF_CLASS_MEM_NV50 , -1 },
+		{ NVIF_CLASS_MEM_NV04 , -1 },
+		{}
+	};
 	struct nvif_mmu_v0 args;
 	int ret, i;
 
@@ -54,6 +60,11 @@
 	mmu->type_nr = args.type_nr;
 	mmu->kind_nr = args.kind_nr;
 
+	ret = nvif_mclass(&mmu->object, mems);
+	if (ret < 0)
+		goto done;
+	mmu->mem = mems[ret].oclass;
+
 	mmu->heap = kmalloc(sizeof(*mmu->heap) * mmu->heap_nr, GFP_KERNEL);
 	mmu->type = kmalloc(sizeof(*mmu->type) * mmu->type_nr, GFP_KERNEL);
 	if (ret = -ENOMEM, !mmu->heap || !mmu->type)
diff --git a/drivers/gpu/drm/nouveau/nvif/user.c b/drivers/gpu/drm/nouveau/nvif/user.c
new file mode 100644
index 0000000..10da3cd
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvif/user.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <nvif/user.h>
+#include <nvif/device.h>
+
+#include <nvif/class.h>
+
+void
+nvif_user_fini(struct nvif_device *device)
+{
+	if (device->user.func) {
+		nvif_object_fini(&device->user.object);
+		device->user.func = NULL;
+	}
+}
+
+int
+nvif_user_init(struct nvif_device *device)
+{
+	struct {
+		s32 oclass;
+		int version;
+		const struct nvif_user_func *func;
+	} users[] = {
+		{ VOLTA_USERMODE_A, -1, &nvif_userc361 },
+		{}
+	};
+	int cid, ret;
+
+	if (device->user.func)
+		return 0;
+
+	cid = nvif_mclass(&device->object, users);
+	if (cid < 0)
+		return cid;
+
+	ret = nvif_object_init(&device->object, 0, users[cid].oclass, NULL, 0,
+			       &device->user.object);
+	if (ret)
+		return ret;
+
+	nvif_object_map(&device->user.object, NULL, 0);
+	device->user.func = users[cid].func;
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvif/userc361.c b/drivers/gpu/drm/nouveau/nvif/userc361.c
new file mode 100644
index 0000000..19f9958
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvif/userc361.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <nvif/user.h>
+
+static void
+nvif_userc361_doorbell(struct nvif_user *user, u32 token)
+{
+	nvif_wr32(&user->object, 0x90, token);
+}
+
+const struct nvif_user_func
+nvif_userc361 = {
+	.doorbell = nvif_userc361_doorbell,
+};
diff --git a/drivers/gpu/drm/nouveau/nvif/vmm.c b/drivers/gpu/drm/nouveau/nvif/vmm.c
index 31cdb2d..191832b 100644
--- a/drivers/gpu/drm/nouveau/nvif/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvif/vmm.c
@@ -37,7 +37,7 @@
 	     struct nvif_mem *mem, u64 offset)
 {
 	struct nvif_vmm_map_v0 *args;
-	u8 stack[16];
+	u8 stack[48];
 	int ret;
 
 	if (sizeof(*args) + argc > sizeof(stack)) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
index 657231c..d0322ce 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
@@ -83,6 +83,20 @@
 }
 
 static int
+nvkm_engine_info(struct nvkm_subdev *subdev, u64 mthd, u64 *data)
+{
+	struct nvkm_engine *engine = nvkm_engine(subdev);
+	if (engine->func->info) {
+		if ((engine = nvkm_engine_ref(engine))) {
+			int ret = engine->func->info(engine, mthd, data);
+			nvkm_engine_unref(&engine);
+			return ret;
+		}
+	}
+	return -ENOSYS;
+}
+
+static int
 nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend)
 {
 	struct nvkm_engine *engine = nvkm_engine(subdev);
@@ -150,6 +164,7 @@
 	.preinit = nvkm_engine_preinit,
 	.init = nvkm_engine_init,
 	.fini = nvkm_engine_fini,
+	.info = nvkm_engine_info,
 	.intr = nvkm_engine_intr,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c
index ccba4ae..8162e3d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c
@@ -144,8 +144,7 @@
 	struct nvkm_ramht *ramht;
 	int ret, i;
 
-	if (!(ramht = *pramht = vzalloc(sizeof(*ramht) +
-					(size >> 3) * sizeof(*ramht->data))))
+	if (!(ramht = *pramht = vzalloc(struct_size(ramht, data, (size >> 3)))))
 		return -ENOMEM;
 
 	ramht->device = device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
index a134d225..03f676c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
@@ -35,6 +35,7 @@
 	[NVKM_SUBDEV_BUS     ] = "bus",
 	[NVKM_SUBDEV_CLK     ] = "clk",
 	[NVKM_SUBDEV_DEVINIT ] = "devinit",
+	[NVKM_SUBDEV_FAULT   ] = "fault",
 	[NVKM_SUBDEV_FB      ] = "fb",
 	[NVKM_SUBDEV_FUSE    ] = "fuse",
 	[NVKM_SUBDEV_GPIO    ] = "gpio",
@@ -60,6 +61,9 @@
 	[NVKM_ENGINE_CE3     ] = "ce3",
 	[NVKM_ENGINE_CE4     ] = "ce4",
 	[NVKM_ENGINE_CE5     ] = "ce5",
+	[NVKM_ENGINE_CE6     ] = "ce6",
+	[NVKM_ENGINE_CE7     ] = "ce7",
+	[NVKM_ENGINE_CE8     ] = "ce8",
 	[NVKM_ENGINE_CIPHER  ] = "cipher",
 	[NVKM_ENGINE_DISP    ] = "disp",
 	[NVKM_ENGINE_DMAOBJ  ] = "dma",
@@ -92,6 +96,14 @@
 }
 
 int
+nvkm_subdev_info(struct nvkm_subdev *subdev, u64 mthd, u64 *data)
+{
+	if (subdev->func->info)
+		return subdev->func->info(subdev, mthd, data);
+	return -ENOSYS;
+}
+
+int
 nvkm_subdev_fini(struct nvkm_subdev *subdev, bool suspend)
 {
 	struct nvkm_device *device = subdev->device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
index 255d81c..80d7844 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
@@ -5,3 +5,4 @@
 nvkm-y += nvkm/engine/ce/gm200.o
 nvkm-y += nvkm/engine/ce/gp100.o
 nvkm-y += nvkm/engine/ce/gp102.o
+nvkm-y += nvkm/engine/ce/gv100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gv100.c
new file mode 100644
index 0000000..fcda3de
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gv100.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <nvif/class.h>
+
+static const struct nvkm_engine_func
+gv100_ce = {
+	.intr = gp100_ce_intr,
+	.sclass = {
+		{ -1, -1, VOLTA_DMA_COPY_A },
+		{}
+	}
+};
+
+int
+gv100_ce_new(struct nvkm_device *device, int index,
+	     struct nvkm_engine **pengine)
+{
+	return nvkm_engine_new_(&gv100_ce, device, index, true, pengine);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 05cd674..e294013 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2161,6 +2161,7 @@
 	.bios = nvkm_bios_new,
 	.bus = gf100_bus_new,
 	.devinit = gm200_devinit_new,
+	.fault = gp100_fault_new,
 	.fb = gp100_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
@@ -2196,13 +2197,14 @@
 	.bios = nvkm_bios_new,
 	.bus = gf100_bus_new,
 	.devinit = gm200_devinit_new,
+	.fault = gp100_fault_new,
 	.fb = gp102_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
 	.i2c = gm200_i2c_new,
 	.ibus = gm200_ibus_new,
 	.imem = nv50_instmem_new,
-	.ltc = gp100_ltc_new,
+	.ltc = gp102_ltc_new,
 	.mc = gp100_mc_new,
 	.mmu = gp100_mmu_new,
 	.therm = gp100_therm_new,
@@ -2231,13 +2233,14 @@
 	.bios = nvkm_bios_new,
 	.bus = gf100_bus_new,
 	.devinit = gm200_devinit_new,
+	.fault = gp100_fault_new,
 	.fb = gp102_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
 	.i2c = gm200_i2c_new,
 	.ibus = gm200_ibus_new,
 	.imem = nv50_instmem_new,
-	.ltc = gp100_ltc_new,
+	.ltc = gp102_ltc_new,
 	.mc = gp100_mc_new,
 	.mmu = gp100_mmu_new,
 	.therm = gp100_therm_new,
@@ -2253,7 +2256,7 @@
 	.disp = gp102_disp_new,
 	.dma = gf119_dma_new,
 	.fifo = gp100_fifo_new,
-	.gr = gp102_gr_new,
+	.gr = gp104_gr_new,
 	.nvdec = gp102_nvdec_new,
 	.sec2 = gp102_sec2_new,
 	.sw = gf100_sw_new,
@@ -2266,13 +2269,14 @@
 	.bios = nvkm_bios_new,
 	.bus = gf100_bus_new,
 	.devinit = gm200_devinit_new,
+	.fault = gp100_fault_new,
 	.fb = gp102_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
 	.i2c = gm200_i2c_new,
 	.ibus = gm200_ibus_new,
 	.imem = nv50_instmem_new,
-	.ltc = gp100_ltc_new,
+	.ltc = gp102_ltc_new,
 	.mc = gp100_mc_new,
 	.mmu = gp100_mmu_new,
 	.therm = gp100_therm_new,
@@ -2288,7 +2292,7 @@
 	.disp = gp102_disp_new,
 	.dma = gf119_dma_new,
 	.fifo = gp100_fifo_new,
-	.gr = gp102_gr_new,
+	.gr = gp104_gr_new,
 	.nvdec = gp102_nvdec_new,
 	.sec2 = gp102_sec2_new,
 	.sw = gf100_sw_new,
@@ -2301,13 +2305,14 @@
 	.bios = nvkm_bios_new,
 	.bus = gf100_bus_new,
 	.devinit = gm200_devinit_new,
+	.fault = gp100_fault_new,
 	.fb = gp102_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
 	.i2c = gm200_i2c_new,
 	.ibus = gm200_ibus_new,
 	.imem = nv50_instmem_new,
-	.ltc = gp100_ltc_new,
+	.ltc = gp102_ltc_new,
 	.mc = gp100_mc_new,
 	.mmu = gp100_mmu_new,
 	.therm = gp100_therm_new,
@@ -2336,13 +2341,14 @@
 	.bios = nvkm_bios_new,
 	.bus = gf100_bus_new,
 	.devinit = gm200_devinit_new,
+	.fault = gp100_fault_new,
 	.fb = gp102_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
 	.i2c = gm200_i2c_new,
 	.ibus = gm200_ibus_new,
 	.imem = nv50_instmem_new,
-	.ltc = gp100_ltc_new,
+	.ltc = gp102_ltc_new,
 	.mc = gp100_mc_new,
 	.mmu = gp100_mmu_new,
 	.therm = gp100_therm_new,
@@ -2369,11 +2375,12 @@
 	.name = "GP10B",
 	.bar = gm20b_bar_new,
 	.bus = gf100_bus_new,
+	.fault = gp100_fault_new,
 	.fb = gp10b_fb_new,
 	.fuse = gm107_fuse_new,
 	.ibus = gp10b_ibus_new,
 	.imem = gk20a_instmem_new,
-	.ltc = gp100_ltc_new,
+	.ltc = gp102_ltc_new,
 	.mc = gp10b_mc_new,
 	.mmu = gp10b_mmu_new,
 	.secboot = gp10b_secboot_new,
@@ -2387,6 +2394,46 @@
 	.sw = gf100_sw_new,
 };
 
+static const struct nvkm_device_chip
+nv140_chipset = {
+	.name = "GV100",
+	.bar = gm107_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = gv100_devinit_new,
+	.fault = gv100_fault_new,
+	.fb = gv100_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp102_ltc_new,
+	.mc = gp100_mc_new,
+	.mmu = gv100_mmu_new,
+	.pci = gp100_pci_new,
+	.pmu = gp102_pmu_new,
+	.secboot = gp108_secboot_new,
+	.therm = gp100_therm_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.disp = gv100_disp_new,
+	.ce[0] = gv100_ce_new,
+	.ce[1] = gv100_ce_new,
+	.ce[2] = gv100_ce_new,
+	.ce[3] = gv100_ce_new,
+	.ce[4] = gv100_ce_new,
+	.ce[5] = gv100_ce_new,
+	.ce[6] = gv100_ce_new,
+	.ce[7] = gv100_ce_new,
+	.ce[8] = gv100_ce_new,
+	.dma = gv100_dma_new,
+	.fifo = gv100_fifo_new,
+	.gr = gv100_gr_new,
+	.nvdec = gp102_nvdec_new,
+	.sec2 = gp102_sec2_new,
+};
+
 static int
 nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size,
 		       struct nvkm_notify *notify)
@@ -2420,6 +2467,7 @@
 	_(BUS     , device->bus     , &device->bus->subdev);
 	_(CLK     , device->clk     , &device->clk->subdev);
 	_(DEVINIT , device->devinit , &device->devinit->subdev);
+	_(FAULT   , device->fault   , &device->fault->subdev);
 	_(FB      , device->fb      , &device->fb->subdev);
 	_(FUSE    , device->fuse    , &device->fuse->subdev);
 	_(GPIO    , device->gpio    , &device->gpio->subdev);
@@ -2463,6 +2511,9 @@
 	_(CE3    , device->ce[3]   ,  device->ce[3]);
 	_(CE4    , device->ce[4]   ,  device->ce[4]);
 	_(CE5    , device->ce[5]   ,  device->ce[5]);
+	_(CE6    , device->ce[6]   ,  device->ce[6]);
+	_(CE7    , device->ce[7]   ,  device->ce[7]);
+	_(CE8    , device->ce[8]   ,  device->ce[8]);
 	_(CIPHER , device->cipher  ,  device->cipher);
 	_(DISP   , device->disp    , &device->disp->engine);
 	_(DMAOBJ , device->dma     , &device->dma->engine);
@@ -2739,6 +2790,7 @@
 			case 0x110:
 			case 0x120: device->card_type = GM100; break;
 			case 0x130: device->card_type = GP100; break;
+			case 0x140: device->card_type = GV100; break;
 			default:
 				break;
 			}
@@ -2830,6 +2882,7 @@
 		case 0x137: device->chip = &nv137_chipset; break;
 		case 0x138: device->chip = &nv138_chipset; break;
 		case 0x13b: device->chip = &nv13b_chipset; break;
+		case 0x140: device->chip = &nv140_chipset; break;
 		default:
 			nvdev_error(device, "unknown chipset (%08x)\n", boot0);
 			goto done;
@@ -2891,6 +2944,7 @@
 		_(NVKM_SUBDEV_BUS     ,      bus);
 		_(NVKM_SUBDEV_CLK     ,      clk);
 		_(NVKM_SUBDEV_DEVINIT ,  devinit);
+		_(NVKM_SUBDEV_FAULT   ,    fault);
 		_(NVKM_SUBDEV_FB      ,       fb);
 		_(NVKM_SUBDEV_FUSE    ,     fuse);
 		_(NVKM_SUBDEV_GPIO    ,     gpio);
@@ -2916,6 +2970,9 @@
 		_(NVKM_ENGINE_CE3     ,    ce[3]);
 		_(NVKM_ENGINE_CE4     ,    ce[4]);
 		_(NVKM_ENGINE_CE5     ,    ce[5]);
+		_(NVKM_ENGINE_CE6     ,    ce[6]);
+		_(NVKM_ENGINE_CE7     ,    ce[7]);
+		_(NVKM_ENGINE_CE8     ,    ce[8]);
 		_(NVKM_ENGINE_CIPHER  ,   cipher);
 		_(NVKM_ENGINE_DISP    ,     disp);
 		_(NVKM_ENGINE_DMAOBJ  ,      dma);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
index 08d0bf6..253ab91 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
@@ -8,6 +8,7 @@
 #include <subdev/bus.h>
 #include <subdev/clk.h>
 #include <subdev/devinit.h>
+#include <subdev/fault.h>
 #include <subdev/fb.h>
 #include <subdev/fuse.h>
 #include <subdev/gpio.h>
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
index 17adcb4..dde6bba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
@@ -40,6 +40,66 @@
 };
 
 static int
+nvkm_udevice_info_subdev(struct nvkm_device *device, u64 mthd, u64 *data)
+{
+	struct nvkm_subdev *subdev;
+	enum nvkm_devidx subidx;
+
+	switch (mthd & NV_DEVICE_INFO_UNIT) {
+	case NV_DEVICE_FIFO(0): subidx = NVKM_ENGINE_FIFO; break;
+	default:
+		return -EINVAL;
+	}
+
+	subdev = nvkm_device_subdev(device, subidx);
+	if (subdev)
+		return nvkm_subdev_info(subdev, mthd, data);
+	return -ENODEV;
+}
+
+static void
+nvkm_udevice_info_v1(struct nvkm_device *device,
+		     struct nv_device_info_v1_data *args)
+{
+	if (args->mthd & NV_DEVICE_INFO_UNIT) {
+		if (nvkm_udevice_info_subdev(device, args->mthd, &args->data))
+			args->mthd = NV_DEVICE_INFO_INVALID;
+		return;
+	}
+
+	switch (args->mthd) {
+#define ENGINE__(A,B,C) NV_DEVICE_INFO_ENGINE_##A: { int _i;                   \
+	for (_i = (B), args->data = 0ULL; _i <= (C); _i++) {                   \
+		if (nvkm_device_engine(device, _i))                            \
+			args->data |= BIT_ULL(_i);                             \
+	}                                                                      \
+}
+#define ENGINE_A(A) ENGINE__(A, NVKM_ENGINE_##A   , NVKM_ENGINE_##A)
+#define ENGINE_B(A) ENGINE__(A, NVKM_ENGINE_##A##0, NVKM_ENGINE_##A##_LAST)
+	case ENGINE_A(SW    ); break;
+	case ENGINE_A(GR    ); break;
+	case ENGINE_A(MPEG  ); break;
+	case ENGINE_A(ME    ); break;
+	case ENGINE_A(CIPHER); break;
+	case ENGINE_A(BSP   ); break;
+	case ENGINE_A(VP    ); break;
+	case ENGINE_B(CE    ); break;
+	case ENGINE_A(SEC   ); break;
+	case ENGINE_A(MSVLD ); break;
+	case ENGINE_A(MSPDEC); break;
+	case ENGINE_A(MSPPP ); break;
+	case ENGINE_A(MSENC ); break;
+	case ENGINE_A(VIC   ); break;
+	case ENGINE_A(SEC2  ); break;
+	case ENGINE_A(NVDEC ); break;
+	case ENGINE_B(NVENC ); break;
+	default:
+		args->mthd = NV_DEVICE_INFO_INVALID;
+		break;
+	}
+}
+
+static int
 nvkm_udevice_info(struct nvkm_udevice *udev, void *data, u32 size)
 {
 	struct nvkm_object *object = &udev->object;
@@ -48,10 +108,21 @@
 	struct nvkm_instmem *imem = device->imem;
 	union {
 		struct nv_device_info_v0 v0;
+		struct nv_device_info_v1 v1;
 	} *args = data;
-	int ret = -ENOSYS;
+	int ret = -ENOSYS, i;
 
 	nvif_ioctl(object, "device info size %d\n", size);
+	if (!(ret = nvif_unpack(ret, &data, &size, args->v1, 1, 1, true))) {
+		nvif_ioctl(object, "device info vers %d count %d\n",
+			   args->v1.version, args->v1.count);
+		if (args->v1.count * sizeof(args->v1.data[0]) == size) {
+			for (i = 0; i < args->v1.count; i++)
+				nvkm_udevice_info_v1(device, &args->v1.data[i]);
+			return 0;
+		}
+		return -EINVAL;
+	} else
 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
 		nvif_ioctl(object, "device info vers %d\n", args->v0.version);
 	} else
@@ -103,6 +174,7 @@
 	case NV_E0: args->v0.family = NV_DEVICE_INFO_V0_KEPLER; break;
 	case GM100: args->v0.family = NV_DEVICE_INFO_V0_MAXWELL; break;
 	case GP100: args->v0.family = NV_DEVICE_INFO_V0_PASCAL; break;
+	case GV100: args->v0.family = NV_DEVICE_INFO_V0_VOLTA; break;
 	default:
 		args->v0.family = 0;
 		break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
index 48ce669..3d485db 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
@@ -14,12 +14,14 @@
 nvkm-y += nvkm/engine/disp/gm200.o
 nvkm-y += nvkm/engine/disp/gp100.o
 nvkm-y += nvkm/engine/disp/gp102.o
+nvkm-y += nvkm/engine/disp/gv100.o
 nvkm-y += nvkm/engine/disp/vga.o
 
 nvkm-y += nvkm/engine/disp/head.o
 nvkm-y += nvkm/engine/disp/headnv04.o
 nvkm-y += nvkm/engine/disp/headnv50.o
 nvkm-y += nvkm/engine/disp/headgf119.o
+nvkm-y += nvkm/engine/disp/headgv100.o
 
 nvkm-y += nvkm/engine/disp/ior.o
 nvkm-y += nvkm/engine/disp/dacnv50.o
@@ -35,6 +37,7 @@
 nvkm-y += nvkm/engine/disp/sorgk104.o
 nvkm-y += nvkm/engine/disp/sorgm107.o
 nvkm-y += nvkm/engine/disp/sorgm200.o
+nvkm-y += nvkm/engine/disp/sorgv100.o
 
 nvkm-y += nvkm/engine/disp/outp.o
 nvkm-y += nvkm/engine/disp/dp.o
@@ -47,6 +50,7 @@
 nvkm-y += nvkm/engine/disp/hdmigt215.o
 nvkm-y += nvkm/engine/disp/hdmigf119.o
 nvkm-y += nvkm/engine/disp/hdmigk104.o
+nvkm-y += nvkm/engine/disp/hdmigv100.o
 
 nvkm-y += nvkm/engine/disp/conn.o
 
@@ -63,57 +67,49 @@
 nvkm-y += nvkm/engine/disp/rootgm200.o
 nvkm-y += nvkm/engine/disp/rootgp100.o
 nvkm-y += nvkm/engine/disp/rootgp102.o
+nvkm-y += nvkm/engine/disp/rootgv100.o
 
 nvkm-y += nvkm/engine/disp/channv50.o
 nvkm-y += nvkm/engine/disp/changf119.o
+nvkm-y += nvkm/engine/disp/changv100.o
 
 nvkm-y += nvkm/engine/disp/dmacnv50.o
 nvkm-y += nvkm/engine/disp/dmacgf119.o
 nvkm-y += nvkm/engine/disp/dmacgp102.o
+nvkm-y += nvkm/engine/disp/dmacgv100.o
 
 nvkm-y += nvkm/engine/disp/basenv50.o
 nvkm-y += nvkm/engine/disp/baseg84.o
-nvkm-y += nvkm/engine/disp/basegt200.o
-nvkm-y += nvkm/engine/disp/basegt215.o
 nvkm-y += nvkm/engine/disp/basegf119.o
-nvkm-y += nvkm/engine/disp/basegk104.o
-nvkm-y += nvkm/engine/disp/basegk110.o
 nvkm-y += nvkm/engine/disp/basegp102.o
 
 nvkm-y += nvkm/engine/disp/corenv50.o
 nvkm-y += nvkm/engine/disp/coreg84.o
 nvkm-y += nvkm/engine/disp/coreg94.o
-nvkm-y += nvkm/engine/disp/coregt200.o
-nvkm-y += nvkm/engine/disp/coregt215.o
 nvkm-y += nvkm/engine/disp/coregf119.o
 nvkm-y += nvkm/engine/disp/coregk104.o
-nvkm-y += nvkm/engine/disp/coregk110.o
-nvkm-y += nvkm/engine/disp/coregm107.o
-nvkm-y += nvkm/engine/disp/coregm200.o
-nvkm-y += nvkm/engine/disp/coregp100.o
 nvkm-y += nvkm/engine/disp/coregp102.o
+nvkm-y += nvkm/engine/disp/coregv100.o
 
 nvkm-y += nvkm/engine/disp/ovlynv50.o
 nvkm-y += nvkm/engine/disp/ovlyg84.o
 nvkm-y += nvkm/engine/disp/ovlygt200.o
-nvkm-y += nvkm/engine/disp/ovlygt215.o
 nvkm-y += nvkm/engine/disp/ovlygf119.o
 nvkm-y += nvkm/engine/disp/ovlygk104.o
 nvkm-y += nvkm/engine/disp/ovlygp102.o
 
+nvkm-y += nvkm/engine/disp/wimmgv100.o
+
+nvkm-y += nvkm/engine/disp/wndwgv100.o
+
 nvkm-y += nvkm/engine/disp/piocnv50.o
 nvkm-y += nvkm/engine/disp/piocgf119.o
 
 nvkm-y += nvkm/engine/disp/cursnv50.o
-nvkm-y += nvkm/engine/disp/cursg84.o
-nvkm-y += nvkm/engine/disp/cursgt215.o
 nvkm-y += nvkm/engine/disp/cursgf119.o
-nvkm-y += nvkm/engine/disp/cursgk104.o
 nvkm-y += nvkm/engine/disp/cursgp102.o
+nvkm-y += nvkm/engine/disp/cursgv100.o
 
 nvkm-y += nvkm/engine/disp/oimmnv50.o
-nvkm-y += nvkm/engine/disp/oimmg84.o
-nvkm-y += nvkm/engine/disp/oimmgt215.o
 nvkm-y += nvkm/engine/disp/oimmgf119.o
-nvkm-y += nvkm/engine/disp/oimmgk104.o
 nvkm-y += nvkm/engine/disp/oimmgp102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
index 93a75e5..32fa94a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
@@ -220,6 +220,9 @@
 	struct nvkm_conn *conn;
 	struct nvkm_outp *outp;
 
+	if (disp->func->fini)
+		disp->func->fini(disp);
+
 	list_for_each_entry(outp, &disp->outp, head) {
 		nvkm_outp_fini(outp);
 	}
@@ -237,6 +240,7 @@
 	struct nvkm_disp *disp = nvkm_disp(engine);
 	struct nvkm_conn *conn;
 	struct nvkm_outp *outp;
+	struct nvkm_ior *ior;
 
 	list_for_each_entry(conn, &disp->conn, head) {
 		nvkm_conn_init(conn);
@@ -246,6 +250,19 @@
 		nvkm_outp_init(outp);
 	}
 
+	if (disp->func->init) {
+		int ret = disp->func->init(disp);
+		if (ret)
+			return ret;
+	}
+
+	/* Set 'normal' (ie. when it's attached to a head) state for
+	 * each output resource to 'fully enabled'.
+	 */
+	list_for_each_entry(ior, &disp->ior, head) {
+		ior->func->power(ior, true, true, true, true, true);
+	}
+
 	return 0;
 }
 
@@ -376,6 +393,12 @@
 	if (ret)
 		return ret;
 
+	if (disp->func->oneinit) {
+		ret = disp->func->oneinit(disp);
+		if (ret)
+			return ret;
+	}
+
 	i = 0;
 	list_for_each_entry(head, &disp->head, head)
 		i = max(i, head->id + 1);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/baseg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/baseg84.c
index 6d17630..01253f4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/baseg84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/baseg84.c
@@ -21,10 +21,7 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
+#include "channv50.h"
 
 static const struct nv50_disp_mthd_list
 g84_disp_base_mthd_base = {
@@ -56,8 +53,8 @@
 	}
 };
 
-const struct nv50_disp_chan_mthd
-g84_disp_base_chan_mthd = {
+static const struct nv50_disp_chan_mthd
+g84_disp_base_mthd = {
 	.name = "Base",
 	.addr = 0x000540,
 	.prev = 0x000004,
@@ -68,13 +65,10 @@
 	}
 };
 
-const struct nv50_disp_dmac_oclass
-g84_disp_base_oclass = {
-	.base.oclass = G82_DISP_BASE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_base_new,
-	.func = &nv50_disp_dmac_func,
-	.mthd = &g84_disp_base_chan_mthd,
-	.chid = 1,
-};
+int
+g84_disp_base_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		  struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_base_new_(&nv50_disp_dmac_func, &g84_disp_base_mthd,
+				   disp, 1, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegf119.c
index ebcb925..389e19d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegf119.c
@@ -21,10 +21,7 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
+#include "channv50.h"
 
 static const struct nv50_disp_mthd_list
 gf119_disp_base_mthd_base = {
@@ -91,7 +88,7 @@
 };
 
 const struct nv50_disp_chan_mthd
-gf119_disp_base_chan_mthd = {
+gf119_disp_base_mthd = {
 	.name = "Base",
 	.addr = 0x001000,
 	.prev = -0x020000,
@@ -102,13 +99,10 @@
 	}
 };
 
-const struct nv50_disp_dmac_oclass
-gf119_disp_base_oclass = {
-	.base.oclass = GF110_DISP_BASE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_base_new,
-	.func = &gf119_disp_dmac_func,
-	.mthd = &gf119_disp_base_chan_mthd,
-	.chid = 1,
-};
+int
+gf119_disp_base_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_base_new_(&gf119_disp_dmac_func, &gf119_disp_base_mthd,
+				   disp, 1, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegk104.c
deleted file mode 100644
index 780a1d9..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegk104.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gk104_disp_base_oclass = {
-	.base.oclass = GK104_DISP_BASE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_base_new,
-	.func = &gf119_disp_dmac_func,
-	.mthd = &gf119_disp_base_chan_mthd,
-	.chid = 1,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegk110.c
deleted file mode 100644
index d8bdd24..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegk110.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gk110_disp_base_oclass = {
-	.base.oclass = GK110_DISP_BASE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_base_new,
-	.func = &gf119_disp_dmac_func,
-	.mthd = &gf119_disp_base_chan_mthd,
-	.chid = 1,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp102.c
index 8a3cdeef..0cb23d6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp102.c
@@ -21,18 +21,12 @@
  *
  * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
+#include "channv50.h"
 
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gp102_disp_base_oclass = {
-	.base.oclass = GK110_DISP_BASE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_base_new,
-	.func = &gp102_disp_dmac_func,
-	.mthd = &gf119_disp_base_chan_mthd,
-	.chid = 1,
-};
+int
+gp102_disp_base_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_base_new_(&gp102_disp_dmac_func, &gf119_disp_base_mthd,
+				   disp, 1, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegt200.c
deleted file mode 100644
index 93451e4..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegt200.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gt200_disp_base_oclass = {
-	.base.oclass = GT200_DISP_BASE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_base_new,
-	.func = &nv50_disp_dmac_func,
-	.mthd = &g84_disp_base_chan_mthd,
-	.chid = 1,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegt215.c
deleted file mode 100644
index 08e2b1f..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegt215.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gt215_disp_base_oclass = {
-	.base.oclass = GT214_DISP_BASE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_base_new,
-	.func = &nv50_disp_dmac_func,
-	.mthd = &g84_disp_base_chan_mthd,
-	.chid = 1,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basenv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basenv50.c
index f1d6b82..19eb7dd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basenv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basenv50.c
@@ -21,33 +21,30 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
+#include "channv50.h"
 #include "head.h"
-#include "rootnv50.h"
 
 #include <core/client.h>
 
-#include <nvif/class.h>
 #include <nvif/cl507c.h>
 #include <nvif/unpack.h>
 
 int
-nv50_disp_base_new(const struct nv50_disp_dmac_func *func,
-		   const struct nv50_disp_chan_mthd *mthd,
-		   struct nv50_disp_root *root, int chid,
-		   const struct nvkm_oclass *oclass, void *data, u32 size,
-		   struct nvkm_object **pobject)
+nv50_disp_base_new_(const struct nv50_disp_chan_func *func,
+		    const struct nv50_disp_chan_mthd *mthd,
+		    struct nv50_disp *disp, int chid,
+		    const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nvkm_object **pobject)
 {
 	union {
 		struct nv50_disp_base_channel_dma_v0 v0;
-	} *args = data;
+	} *args = argv;
 	struct nvkm_object *parent = oclass->parent;
-	struct nv50_disp *disp = root->disp;
 	int head, ret = -ENOSYS;
 	u64 push;
 
-	nvif_ioctl(parent, "create disp base channel dma size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
+	nvif_ioctl(parent, "create disp base channel dma size %d\n", argc);
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
 		nvif_ioctl(parent, "create disp base channel dma vers %d "
 				   "pushbuf %016llx head %d\n",
 			   args->v0.version, args->v0.pushbuf, args->v0.head);
@@ -58,7 +55,7 @@
 	} else
 		return ret;
 
-	return nv50_disp_dmac_new_(func, mthd, root, chid + head,
+	return nv50_disp_dmac_new_(func, mthd, disp, chid + head,
 				   head, push, oclass, pobject);
 }
 
@@ -102,7 +99,7 @@
 };
 
 static const struct nv50_disp_chan_mthd
-nv50_disp_base_chan_mthd = {
+nv50_disp_base_mthd = {
 	.name = "Base",
 	.addr = 0x000540,
 	.prev = 0x000004,
@@ -113,13 +110,10 @@
 	}
 };
 
-const struct nv50_disp_dmac_oclass
-nv50_disp_base_oclass = {
-	.base.oclass = NV50_DISP_BASE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_base_new,
-	.func = &nv50_disp_dmac_func,
-	.mthd = &nv50_disp_base_chan_mthd,
-	.chid = 1,
-};
+int
+nv50_disp_base_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		   struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_base_new_(&nv50_disp_dmac_func, &nv50_disp_base_mthd,
+				   disp, 1, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/changf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/changf119.c
index 17a3d83..29e6dd5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/changf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/changf119.c
@@ -47,3 +47,16 @@
 	.init = gf119_disp_chan_uevent_init,
 	.fini = gf119_disp_chan_uevent_fini,
 };
+
+void
+gf119_disp_chan_intr(struct nv50_disp_chan *chan, bool en)
+{
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	const u64 mask = 0x00000001 << chan->chid.user;
+	if (!en) {
+		nvkm_mask(device, 0x610090, mask, 0x00000000);
+		nvkm_mask(device, 0x6100a0, mask, 0x00000000);
+	} else {
+		nvkm_mask(device, 0x6100a0, mask, mask);
+	}
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/changv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/changv100.c
new file mode 100644
index 0000000..75247c9
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/changv100.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "channv50.h"
+
+const struct nvkm_event_func
+gv100_disp_chan_uevent = {
+	.ctor = nv50_disp_chan_uevent_ctor,
+};
+
+u64
+gv100_disp_chan_user(struct nv50_disp_chan *chan, u64 *psize)
+{
+	*psize = 0x1000;
+	return 0x690000 + ((chan->chid.user - 1) * 0x1000);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
index 723dcbd..57719f6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
@@ -26,6 +26,7 @@
 
 #include <core/client.h>
 #include <core/notify.h>
+#include <core/oproxy.h>
 #include <core/ramht.h>
 #include <engine/dma.h>
 
@@ -65,7 +66,7 @@
 void
 nv50_disp_chan_mthd(struct nv50_disp_chan *chan, int debug)
 {
-	struct nv50_disp *disp = chan->root->disp;
+	struct nv50_disp *disp = chan->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	const struct nv50_disp_chan_mthd *mthd = chan->mthd;
 	const struct nv50_disp_mthd_list *list;
@@ -154,13 +155,29 @@
 	.fini = nv50_disp_chan_uevent_fini,
 };
 
+u64
+nv50_disp_chan_user(struct nv50_disp_chan *chan, u64 *psize)
+{
+	*psize = 0x1000;
+	return 0x640000 + (chan->chid.user * 0x1000);
+}
+
+void
+nv50_disp_chan_intr(struct nv50_disp_chan *chan, bool en)
+{
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	const u64 mask = 0x00010001 << chan->chid.user;
+	const u64 data = en ? 0x00010000 : 0x00000000;
+	nvkm_mask(device, 0x610028, mask, data);
+}
+
 static int
 nv50_disp_chan_rd32(struct nvkm_object *object, u64 addr, u32 *data)
 {
 	struct nv50_disp_chan *chan = nv50_disp_chan(object);
-	struct nv50_disp *disp = chan->root->disp;
-	struct nvkm_device *device = disp->base.engine.subdev.device;
-	*data = nvkm_rd32(device, 0x640000 + (chan->chid.user * 0x1000) + addr);
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	u64 size, base = chan->func->user(chan, &size);
+	*data = nvkm_rd32(device, base + addr);
 	return 0;
 }
 
@@ -168,9 +185,9 @@
 nv50_disp_chan_wr32(struct nvkm_object *object, u64 addr, u32 data)
 {
 	struct nv50_disp_chan *chan = nv50_disp_chan(object);
-	struct nv50_disp *disp = chan->root->disp;
-	struct nvkm_device *device = disp->base.engine.subdev.device;
-	nvkm_wr32(device, 0x640000 + (chan->chid.user * 0x1000) + addr, data);
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	u64 size, base = chan->func->user(chan, &size);
+	nvkm_wr32(device, base + addr, data);
 	return 0;
 }
 
@@ -179,7 +196,7 @@
 		    struct nvkm_event **pevent)
 {
 	struct nv50_disp_chan *chan = nv50_disp_chan(object);
-	struct nv50_disp *disp = chan->root->disp;
+	struct nv50_disp *disp = chan->disp;
 	switch (type) {
 	case NV50_DISP_CORE_CHANNEL_DMA_V0_NTFY_UEVENT:
 		*pevent = &disp->uevent;
@@ -195,34 +212,83 @@
 		   enum nvkm_object_map *type, u64 *addr, u64 *size)
 {
 	struct nv50_disp_chan *chan = nv50_disp_chan(object);
-	struct nv50_disp *disp = chan->root->disp;
-	struct nvkm_device *device = disp->base.engine.subdev.device;
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	const u64 base = device->func->resource_addr(device, 0);
 	*type = NVKM_OBJECT_MAP_IO;
-	*addr = device->func->resource_addr(device, 0) +
-		0x640000 + (chan->chid.user * 0x1000);
-	*size = 0x001000;
+	*addr = base + chan->func->user(chan, size);
+	return 0;
+}
+
+struct nv50_disp_chan_object {
+	struct nvkm_oproxy oproxy;
+	struct nv50_disp *disp;
+	int hash;
+};
+
+static void
+nv50_disp_chan_child_del_(struct nvkm_oproxy *base)
+{
+	struct nv50_disp_chan_object *object =
+		container_of(base, typeof(*object), oproxy);
+	nvkm_ramht_remove(object->disp->ramht, object->hash);
+}
+
+static const struct nvkm_oproxy_func
+nv50_disp_chan_child_func_ = {
+	.dtor[0] = nv50_disp_chan_child_del_,
+};
+
+static int
+nv50_disp_chan_child_new(const struct nvkm_oclass *oclass,
+			 void *argv, u32 argc, struct nvkm_object **pobject)
+{
+	struct nv50_disp_chan *chan = nv50_disp_chan(oclass->parent);
+	struct nv50_disp *disp = chan->disp;
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	const struct nvkm_device_oclass *sclass = oclass->priv;
+	struct nv50_disp_chan_object *object;
+	int ret;
+
+	if (!(object = kzalloc(sizeof(*object), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_oproxy_ctor(&nv50_disp_chan_child_func_, oclass, &object->oproxy);
+	object->disp = disp;
+	*pobject = &object->oproxy.base;
+
+	ret = sclass->ctor(device, oclass, argv, argc, &object->oproxy.object);
+	if (ret)
+		return ret;
+
+	object->hash = chan->func->bind(chan, object->oproxy.object,
+					      oclass->handle);
+	if (object->hash < 0)
+		return object->hash;
+
 	return 0;
 }
 
 static int
-nv50_disp_chan_child_new(const struct nvkm_oclass *oclass,
-			 void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nv50_disp_chan *chan = nv50_disp_chan(oclass->parent);
-	return chan->func->child_new(chan, oclass, data, size, pobject);
-}
-
-static int
 nv50_disp_chan_child_get(struct nvkm_object *object, int index,
-			 struct nvkm_oclass *oclass)
+			 struct nvkm_oclass *sclass)
 {
 	struct nv50_disp_chan *chan = nv50_disp_chan(object);
-	if (chan->func->child_get) {
-		int ret = chan->func->child_get(chan, index, oclass);
-		if (ret == 0)
-			oclass->ctor = nv50_disp_chan_child_new;
-		return ret;
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	const struct nvkm_device_oclass *oclass = NULL;
+
+	if (chan->func->bind)
+		sclass->engine = nvkm_device_engine(device, NVKM_ENGINE_DMAOBJ);
+	else
+		sclass->engine = NULL;
+
+	if (sclass->engine && sclass->engine->func->base.sclass) {
+		sclass->engine->func->base.sclass(sclass, index, &oclass);
+		if (oclass) {
+			sclass->ctor = nv50_disp_chan_child_new,
+			sclass->priv = oclass;
+			return 0;
+		}
 	}
+
 	return -EINVAL;
 }
 
@@ -231,6 +297,7 @@
 {
 	struct nv50_disp_chan *chan = nv50_disp_chan(object);
 	chan->func->fini(chan);
+	chan->func->intr(chan, false);
 	return 0;
 }
 
@@ -238,6 +305,7 @@
 nv50_disp_chan_init(struct nvkm_object *object)
 {
 	struct nv50_disp_chan *chan = nv50_disp_chan(object);
+	chan->func->intr(chan, true);
 	return chan->func->init(chan);
 }
 
@@ -245,10 +313,11 @@
 nv50_disp_chan_dtor(struct nvkm_object *object)
 {
 	struct nv50_disp_chan *chan = nv50_disp_chan(object);
-	struct nv50_disp *disp = chan->root->disp;
+	struct nv50_disp *disp = chan->disp;
 	if (chan->chid.user >= 0)
 		disp->chan[chan->chid.user] = NULL;
-	return chan->func->dtor ? chan->func->dtor(chan) : chan;
+	nvkm_memory_unref(&chan->memory);
+	return chan;
 }
 
 static const struct nvkm_object_func
@@ -264,18 +333,22 @@
 };
 
 int
-nv50_disp_chan_ctor(const struct nv50_disp_chan_func *func,
+nv50_disp_chan_new_(const struct nv50_disp_chan_func *func,
 		    const struct nv50_disp_chan_mthd *mthd,
-		    struct nv50_disp_root *root, int ctrl, int user, int head,
+		    struct nv50_disp *disp, int ctrl, int user, int head,
 		    const struct nvkm_oclass *oclass,
-		    struct nv50_disp_chan *chan)
+		    struct nvkm_object **pobject)
 {
-	struct nv50_disp *disp = root->disp;
+	struct nv50_disp_chan *chan;
+
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	*pobject = &chan->object;
 
 	nvkm_object_ctor(&nv50_disp_chan, oclass, &chan->object);
 	chan->func = func;
 	chan->mthd = mthd;
-	chan->root = root;
+	chan->disp = disp;
 	chan->chid.ctrl = ctrl;
 	chan->chid.user = user;
 	chan->head = head;
@@ -287,20 +360,3 @@
 	disp->chan[chan->chid.user] = chan;
 	return 0;
 }
-
-int
-nv50_disp_chan_new_(const struct nv50_disp_chan_func *func,
-		    const struct nv50_disp_chan_mthd *mthd,
-		    struct nv50_disp_root *root, int ctrl, int user, int head,
-		    const struct nvkm_oclass *oclass,
-		    struct nvkm_object **pobject)
-{
-	struct nv50_disp_chan *chan;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->object;
-
-	return nv50_disp_chan_ctor(func, mthd, root, ctrl, user,
-				   head, oclass, chan);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
index 40681db..adc9d76 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
@@ -4,11 +4,12 @@
 #define nv50_disp_chan(p) container_of((p), struct nv50_disp_chan, object)
 #include <core/object.h>
 #include "nv50.h"
+struct nv50_disp_root;
 
 struct nv50_disp_chan {
 	const struct nv50_disp_chan_func *func;
 	const struct nv50_disp_chan_mthd *mthd;
-	struct nv50_disp_root *root;
+	struct nv50_disp *disp;
 
 	struct {
 		int ctrl;
@@ -17,36 +18,133 @@
 	int head;
 
 	struct nvkm_object object;
+
+	struct nvkm_memory *memory;
+	u64 push;
 };
 
 struct nv50_disp_chan_func {
-	void *(*dtor)(struct nv50_disp_chan *);
 	int (*init)(struct nv50_disp_chan *);
 	void (*fini)(struct nv50_disp_chan *);
-	int (*child_get)(struct nv50_disp_chan *, int index,
-			 struct nvkm_oclass *);
-	int (*child_new)(struct nv50_disp_chan *, const struct nvkm_oclass *,
-			 void *data, u32 size, struct nvkm_object **);
+	void (*intr)(struct nv50_disp_chan *, bool en);
+	u64 (*user)(struct nv50_disp_chan *, u64 *size);
+	int (*bind)(struct nv50_disp_chan *, struct nvkm_object *, u32 handle);
 };
 
-int nv50_disp_chan_ctor(const struct nv50_disp_chan_func *,
-			const struct nv50_disp_chan_mthd *,
-			struct nv50_disp_root *, int ctrl, int user, int head,
-			const struct nvkm_oclass *, struct nv50_disp_chan *);
 int nv50_disp_chan_new_(const struct nv50_disp_chan_func *,
 			const struct nv50_disp_chan_mthd *,
-			struct nv50_disp_root *, int ctrl, int user, int head,
+			struct nv50_disp *, int ctrl, int user, int head,
+			const struct nvkm_oclass *, struct nvkm_object **);
+int nv50_disp_dmac_new_(const struct nv50_disp_chan_func *,
+			const struct nv50_disp_chan_mthd *,
+			struct nv50_disp *, int chid, int head, u64 push,
 			const struct nvkm_oclass *, struct nvkm_object **);
 
+void nv50_disp_chan_intr(struct nv50_disp_chan *, bool);
+u64 nv50_disp_chan_user(struct nv50_disp_chan *, u64 *);
 extern const struct nv50_disp_chan_func nv50_disp_pioc_func;
+extern const struct nv50_disp_chan_func nv50_disp_dmac_func;
+int nv50_disp_dmac_bind(struct nv50_disp_chan *, struct nvkm_object *, u32);
+extern const struct nv50_disp_chan_func nv50_disp_core_func;
+
+void gf119_disp_chan_intr(struct nv50_disp_chan *, bool);
 extern const struct nv50_disp_chan_func gf119_disp_pioc_func;
+extern const struct nv50_disp_chan_func gf119_disp_dmac_func;
+void gf119_disp_dmac_fini(struct nv50_disp_chan *);
+int gf119_disp_dmac_bind(struct nv50_disp_chan *, struct nvkm_object *, u32);
+extern const struct nv50_disp_chan_func gf119_disp_core_func;
+void gf119_disp_core_fini(struct nv50_disp_chan *);
 
-extern const struct nvkm_event_func nv50_disp_chan_uevent;
-int  nv50_disp_chan_uevent_ctor(struct nvkm_object *, void *, u32,
-				struct nvkm_notify *);
-void nv50_disp_chan_uevent_send(struct nv50_disp *, int);
+extern const struct nv50_disp_chan_func gp102_disp_dmac_func;
 
-extern const struct nvkm_event_func gf119_disp_chan_uevent;
+u64 gv100_disp_chan_user(struct nv50_disp_chan *, u64 *);
+int gv100_disp_dmac_init(struct nv50_disp_chan *);
+void gv100_disp_dmac_fini(struct nv50_disp_chan *);
+int gv100_disp_dmac_bind(struct nv50_disp_chan *, struct nvkm_object *, u32);
+
+int nv50_disp_curs_new_(const struct nv50_disp_chan_func *,
+			struct nv50_disp *, int ctrl, int user,
+			const struct nvkm_oclass *, void *argv, u32 argc,
+			struct nvkm_object **);
+int nv50_disp_oimm_new_(const struct nv50_disp_chan_func *,
+			struct nv50_disp *, int ctrl, int user,
+			const struct nvkm_oclass *, void *argv, u32 argc,
+			struct nvkm_object **);
+int nv50_disp_base_new_(const struct nv50_disp_chan_func *,
+			const struct nv50_disp_chan_mthd *,
+			struct nv50_disp *, int chid,
+			const struct nvkm_oclass *, void *argv, u32 argc,
+			struct nvkm_object **);
+int nv50_disp_core_new_(const struct nv50_disp_chan_func *,
+			const struct nv50_disp_chan_mthd *,
+			struct nv50_disp *, int chid,
+			const struct nvkm_oclass *oclass, void *argv, u32 argc,
+			struct nvkm_object **);
+int nv50_disp_ovly_new_(const struct nv50_disp_chan_func *,
+			const struct nv50_disp_chan_mthd *,
+			struct nv50_disp *, int chid,
+			const struct nvkm_oclass *, void *argv, u32 argc,
+			struct nvkm_object **);
+
+int nv50_disp_curs_new(const struct nvkm_oclass *, void *, u32,
+		       struct nv50_disp *, struct nvkm_object **);
+int nv50_disp_oimm_new(const struct nvkm_oclass *, void *, u32,
+		       struct nv50_disp *, struct nvkm_object **);
+int nv50_disp_base_new(const struct nvkm_oclass *, void *, u32,
+		       struct nv50_disp *, struct nvkm_object **);
+int nv50_disp_core_new(const struct nvkm_oclass *, void *, u32,
+		       struct nv50_disp *, struct nvkm_object **);
+int nv50_disp_ovly_new(const struct nvkm_oclass *, void *, u32,
+		       struct nv50_disp *, struct nvkm_object **);
+
+int g84_disp_base_new(const struct nvkm_oclass *, void *, u32,
+		      struct nv50_disp *, struct nvkm_object **);
+int g84_disp_core_new(const struct nvkm_oclass *, void *, u32,
+		      struct nv50_disp *, struct nvkm_object **);
+int g84_disp_ovly_new(const struct nvkm_oclass *, void *, u32,
+		      struct nv50_disp *, struct nvkm_object **);
+
+int g94_disp_core_new(const struct nvkm_oclass *, void *, u32,
+		      struct nv50_disp *, struct nvkm_object **);
+
+int gt200_disp_ovly_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+
+int gf119_disp_curs_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+int gf119_disp_oimm_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+int gf119_disp_base_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+int gf119_disp_core_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+int gf119_disp_ovly_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+
+int gk104_disp_core_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+int gk104_disp_ovly_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+
+int gp102_disp_curs_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+int gp102_disp_oimm_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+int gp102_disp_base_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+int gp102_disp_core_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+int gp102_disp_ovly_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+
+int gv100_disp_curs_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+int gv100_disp_wimm_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+int gv100_disp_core_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
+int gv100_disp_wndw_new(const struct nvkm_oclass *, void *, u32,
+			struct nv50_disp *, struct nvkm_object **);
 
 struct nv50_disp_mthd_list {
 	u32 mthd;
@@ -76,64 +174,18 @@
 extern const struct nv50_disp_mthd_list nv50_disp_core_mthd_pior;
 extern const struct nv50_disp_mthd_list nv50_disp_base_mthd_image;
 
-extern const struct nv50_disp_chan_mthd g84_disp_core_chan_mthd;
+extern const struct nv50_disp_chan_mthd g84_disp_core_mthd;
 extern const struct nv50_disp_mthd_list g84_disp_core_mthd_dac;
 extern const struct nv50_disp_mthd_list g84_disp_core_mthd_head;
-extern const struct nv50_disp_chan_mthd g84_disp_base_chan_mthd;
-extern const struct nv50_disp_chan_mthd g84_disp_ovly_chan_mthd;
 
-extern const struct nv50_disp_chan_mthd g94_disp_core_chan_mthd;
+extern const struct nv50_disp_chan_mthd g94_disp_core_mthd;
 
 extern const struct nv50_disp_mthd_list gf119_disp_core_mthd_base;
 extern const struct nv50_disp_mthd_list gf119_disp_core_mthd_dac;
 extern const struct nv50_disp_mthd_list gf119_disp_core_mthd_sor;
 extern const struct nv50_disp_mthd_list gf119_disp_core_mthd_pior;
-extern const struct nv50_disp_chan_mthd gf119_disp_base_chan_mthd;
+extern const struct nv50_disp_chan_mthd gf119_disp_base_mthd;
 
-extern const struct nv50_disp_chan_mthd gk104_disp_core_chan_mthd;
-extern const struct nv50_disp_chan_mthd gk104_disp_ovly_chan_mthd;
-
-struct nv50_disp_pioc_oclass {
-	int (*ctor)(const struct nv50_disp_chan_func *,
-		    const struct nv50_disp_chan_mthd *,
-		    struct nv50_disp_root *, int ctrl, int user,
-		    const struct nvkm_oclass *, void *data, u32 size,
-		    struct nvkm_object **);
-	struct nvkm_sclass base;
-	const struct nv50_disp_chan_func *func;
-	const struct nv50_disp_chan_mthd *mthd;
-	struct {
-		int ctrl;
-		int user;
-	} chid;
-};
-
-extern const struct nv50_disp_pioc_oclass nv50_disp_oimm_oclass;
-extern const struct nv50_disp_pioc_oclass nv50_disp_curs_oclass;
-
-extern const struct nv50_disp_pioc_oclass g84_disp_oimm_oclass;
-extern const struct nv50_disp_pioc_oclass g84_disp_curs_oclass;
-
-extern const struct nv50_disp_pioc_oclass gt215_disp_oimm_oclass;
-extern const struct nv50_disp_pioc_oclass gt215_disp_curs_oclass;
-
-extern const struct nv50_disp_pioc_oclass gf119_disp_oimm_oclass;
-extern const struct nv50_disp_pioc_oclass gf119_disp_curs_oclass;
-
-extern const struct nv50_disp_pioc_oclass gk104_disp_oimm_oclass;
-extern const struct nv50_disp_pioc_oclass gk104_disp_curs_oclass;
-
-extern const struct nv50_disp_pioc_oclass gp102_disp_oimm_oclass;
-extern const struct nv50_disp_pioc_oclass gp102_disp_curs_oclass;
-
-int nv50_disp_curs_new(const struct nv50_disp_chan_func *,
-		       const struct nv50_disp_chan_mthd *,
-		       struct nv50_disp_root *, int ctrl, int user,
-		       const struct nvkm_oclass *, void *data, u32 size,
-		       struct nvkm_object **);
-int nv50_disp_oimm_new(const struct nv50_disp_chan_func *,
-		       const struct nv50_disp_chan_mthd *,
-		       struct nv50_disp_root *, int ctrl, int user,
-		       const struct nvkm_oclass *, void *data, u32 size,
-		       struct nvkm_object **);
+extern const struct nv50_disp_chan_mthd gk104_disp_core_mthd;
+extern const struct nv50_disp_chan_mthd gk104_disp_ovly_mthd;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg84.c
index 1baa5c3..cfc54aa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg84.c
@@ -21,10 +21,7 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
+#include "channv50.h"
 
 const struct nv50_disp_mthd_list
 g84_disp_core_mthd_dac = {
@@ -91,7 +88,7 @@
 };
 
 const struct nv50_disp_chan_mthd
-g84_disp_core_chan_mthd = {
+g84_disp_core_mthd = {
 	.name = "Core",
 	.addr = 0x000000,
 	.prev = 0x000004,
@@ -105,13 +102,10 @@
 	}
 };
 
-const struct nv50_disp_dmac_oclass
-g84_disp_core_oclass = {
-	.base.oclass = G82_DISP_CORE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_core_new,
-	.func = &nv50_disp_core_func,
-	.mthd = &g84_disp_core_chan_mthd,
-	.chid = 0,
-};
+int
+g84_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		  struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_core_new_(&nv50_disp_core_func, &g84_disp_core_mthd,
+				   disp, 0, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c
index c65c9f3..e911925 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c
@@ -21,10 +21,7 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
+#include "channv50.h"
 
 static const struct nv50_disp_mthd_list
 g94_disp_core_mthd_sor = {
@@ -37,7 +34,7 @@
 };
 
 const struct nv50_disp_chan_mthd
-g94_disp_core_chan_mthd = {
+g94_disp_core_mthd = {
 	.name = "Core",
 	.addr = 0x000000,
 	.prev = 0x000004,
@@ -51,13 +48,10 @@
 	}
 };
 
-const struct nv50_disp_dmac_oclass
-g94_disp_core_oclass = {
-	.base.oclass = GT206_DISP_CORE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_core_new,
-	.func = &nv50_disp_core_func,
-	.mthd = &g94_disp_core_chan_mthd,
-	.chid = 0,
-};
+int
+g94_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		  struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_core_new_(&nv50_disp_core_func, &g94_disp_core_mthd,
+				   disp, 0, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c
index 21fbf89..d162b9cf4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c
@@ -21,15 +21,10 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
+#include "channv50.h"
 
-#include <core/client.h>
 #include <subdev/timer.h>
 
-#include <nvif/class.h>
-#include <nvif/unpack.h>
-
 const struct nv50_disp_mthd_list
 gf119_disp_core_mthd_base = {
 	.mthd = 0x0000,
@@ -157,7 +152,7 @@
 };
 
 static const struct nv50_disp_chan_mthd
-gf119_disp_core_chan_mthd = {
+gf119_disp_core_mthd = {
 	.name = "Core",
 	.addr = 0x000000,
 	.prev = -0x020000,
@@ -172,10 +167,9 @@
 };
 
 void
-gf119_disp_core_fini(struct nv50_disp_dmac *chan)
+gf119_disp_core_fini(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->base.root->disp;
-	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 
 	/* deactivate channel */
@@ -188,22 +182,14 @@
 		nvkm_error(subdev, "core fini: %08x\n",
 			   nvkm_rd32(device, 0x610490));
 	}
-
-	/* disable error reporting and completion notification */
-	nvkm_mask(device, 0x610090, 0x00000001, 0x00000000);
-	nvkm_mask(device, 0x6100a0, 0x00000001, 0x00000000);
 }
 
 static int
-gf119_disp_core_init(struct nv50_disp_dmac *chan)
+gf119_disp_core_init(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->base.root->disp;
-	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 
-	/* enable error reporting */
-	nvkm_mask(device, 0x6100a0, 0x00000001, 0x00000001);
-
 	/* initialise channel for dma command submission */
 	nvkm_wr32(device, 0x610494, chan->push);
 	nvkm_wr32(device, 0x610498, 0x00010000);
@@ -225,20 +211,19 @@
 	return 0;
 }
 
-const struct nv50_disp_dmac_func
+const struct nv50_disp_chan_func
 gf119_disp_core_func = {
 	.init = gf119_disp_core_init,
 	.fini = gf119_disp_core_fini,
+	.intr = gf119_disp_chan_intr,
+	.user = nv50_disp_chan_user,
 	.bind = gf119_disp_dmac_bind,
 };
 
-const struct nv50_disp_dmac_oclass
-gf119_disp_core_oclass = {
-	.base.oclass = GF110_DISP_CORE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_core_new,
-	.func = &gf119_disp_core_func,
-	.mthd = &gf119_disp_core_chan_mthd,
-	.chid = 0,
-};
+int
+gf119_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_core_new_(&gf119_disp_core_func, &gf119_disp_core_mthd,
+				   disp, 0, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk104.c
index 088ab22..5c80017 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk104.c
@@ -21,10 +21,7 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
+#include "channv50.h"
 
 static const struct nv50_disp_mthd_list
 gk104_disp_core_mthd_head = {
@@ -106,7 +103,7 @@
 };
 
 const struct nv50_disp_chan_mthd
-gk104_disp_core_chan_mthd = {
+gk104_disp_core_mthd = {
 	.name = "Core",
 	.addr = 0x000000,
 	.prev = -0x020000,
@@ -120,13 +117,10 @@
 	}
 };
 
-const struct nv50_disp_dmac_oclass
-gk104_disp_core_oclass = {
-	.base.oclass = GK104_DISP_CORE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_core_new,
-	.func = &gf119_disp_core_func,
-	.mthd = &gk104_disp_core_chan_mthd,
-	.chid = 0,
-};
+int
+gk104_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_core_new_(&gf119_disp_core_func, &gk104_disp_core_mthd,
+				   disp, 0, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk110.c
deleted file mode 100644
index df0f45c..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregk110.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gk110_disp_core_oclass = {
-	.base.oclass = GK110_DISP_CORE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_core_new,
-	.func = &gf119_disp_core_func,
-	.mthd = &gk104_disp_core_chan_mthd,
-	.chid = 0,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm107.c
deleted file mode 100644
index 9e27f8f..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm107.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gm107_disp_core_oclass = {
-	.base.oclass = GM107_DISP_CORE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_core_new,
-	.func = &gf119_disp_core_func,
-	.mthd = &gk104_disp_core_chan_mthd,
-	.chid = 0,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm200.c
deleted file mode 100644
index bb23a86..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregm200.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gm200_disp_core_oclass = {
-	.base.oclass = GM200_DISP_CORE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_core_new,
-	.func = &gf119_disp_core_func,
-	.mthd = &gk104_disp_core_chan_mthd,
-	.chid = 0,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c
deleted file mode 100644
index d5dff66..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gp100_disp_core_oclass = {
-	.base.oclass = GP100_DISP_CORE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_core_new,
-	.func = &gf119_disp_core_func,
-	.mthd = &gk104_disp_core_chan_mthd,
-	.chid = 0,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp102.c
index b0df4b7..5b7f993 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp102.c
@@ -21,23 +21,16 @@
  *
  * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
+#include "channv50.h"
 
 #include <subdev/timer.h>
 
-#include <nvif/class.h>
-
 static int
-gp102_disp_core_init(struct nv50_disp_dmac *chan)
+gp102_disp_core_init(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->base.root->disp;
-	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 
-	/* enable error reporting */
-	nvkm_mask(device, 0x6100a0, 0x00000001, 0x00000001);
-
 	/* initialise channel for dma command submission */
 	nvkm_wr32(device, 0x611494, chan->push);
 	nvkm_wr32(device, 0x611498, 0x00010000);
@@ -59,20 +52,19 @@
 	return 0;
 }
 
-static const struct nv50_disp_dmac_func
+static const struct nv50_disp_chan_func
 gp102_disp_core_func = {
 	.init = gp102_disp_core_init,
 	.fini = gf119_disp_core_fini,
+	.intr = gf119_disp_chan_intr,
+	.user = nv50_disp_chan_user,
 	.bind = gf119_disp_dmac_bind,
 };
 
-const struct nv50_disp_dmac_oclass
-gp102_disp_core_oclass = {
-	.base.oclass = GP102_DISP_CORE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_core_new,
-	.func = &gp102_disp_core_func,
-	.mthd = &gk104_disp_core_chan_mthd,
-	.chid = 0,
-};
+int
+gp102_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_core_new_(&gp102_disp_core_func, &gk104_disp_core_mthd,
+				   disp, 0, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregt200.c
deleted file mode 100644
index b234547..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregt200.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gt200_disp_core_oclass = {
-	.base.oclass = GT200_DISP_CORE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_core_new,
-	.func = &nv50_disp_core_func,
-	.mthd = &g84_disp_core_chan_mthd,
-	.chid = 0,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregt215.c
deleted file mode 100644
index 8f5ba20..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregt215.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gt215_disp_core_oclass = {
-	.base.oclass = GT214_DISP_CORE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_core_new,
-	.func = &nv50_disp_core_func,
-	.mthd = &g94_disp_core_chan_mthd,
-	.chid = 0,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregv100.c
new file mode 100644
index 0000000..4592d0e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregv100.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "channv50.h"
+
+#include <subdev/timer.h>
+
+const struct nv50_disp_mthd_list
+gv100_disp_core_mthd_base = {
+	.mthd = 0x0000,
+	.addr = 0x000000,
+	.data = {
+		{ 0x0200, 0x680200 },
+		{ 0x0208, 0x680208 },
+		{ 0x020c, 0x68020c },
+		{ 0x0210, 0x680210 },
+		{ 0x0214, 0x680214 },
+		{ 0x0218, 0x680218 },
+		{ 0x021c, 0x68021c },
+		{}
+	}
+};
+
+const struct nv50_disp_mthd_list
+gv100_disp_core_mthd_sor = {
+	.mthd = 0x0020,
+	.addr = 0x000020,
+	.data = {
+		{ 0x0300, 0x680300 },
+		{ 0x0304, 0x680304 },
+		{ 0x0308, 0x680308 },
+		{ 0x030c, 0x68030c },
+		{}
+	}
+};
+
+static const struct nv50_disp_mthd_list
+gv100_disp_core_mthd_wndw = {
+	.mthd = 0x0080,
+	.addr = 0x000080,
+	.data = {
+		{ 0x1000, 0x681000 },
+		{ 0x1004, 0x681004 },
+		{ 0x1008, 0x681008 },
+		{ 0x100c, 0x68100c },
+		{ 0x1010, 0x681010 },
+		{}
+	}
+};
+
+static const struct nv50_disp_mthd_list
+gv100_disp_core_mthd_head = {
+	.mthd = 0x0400,
+	.addr = 0x000400,
+	.data = {
+		{ 0x2000, 0x682000 },
+		{ 0x2004, 0x682004 },
+		{ 0x2008, 0x682008 },
+		{ 0x200c, 0x68200c },
+		{ 0x2014, 0x682014 },
+		{ 0x2018, 0x682018 },
+		{ 0x201c, 0x68201c },
+		{ 0x2020, 0x682020 },
+		{ 0x2028, 0x682028 },
+		{ 0x202c, 0x68202c },
+		{ 0x2030, 0x682030 },
+		{ 0x2038, 0x682038 },
+		{ 0x203c, 0x68203c },
+		{ 0x2048, 0x682048 },
+		{ 0x204c, 0x68204c },
+		{ 0x2050, 0x682050 },
+		{ 0x2054, 0x682054 },
+		{ 0x2058, 0x682058 },
+		{ 0x205c, 0x68205c },
+		{ 0x2060, 0x682060 },
+		{ 0x2064, 0x682064 },
+		{ 0x2068, 0x682068 },
+		{ 0x206c, 0x68206c },
+		{ 0x2070, 0x682070 },
+		{ 0x2074, 0x682074 },
+		{ 0x2078, 0x682078 },
+		{ 0x207c, 0x68207c },
+		{ 0x2080, 0x682080 },
+		{ 0x2088, 0x682088 },
+		{ 0x2090, 0x682090 },
+		{ 0x209c, 0x68209c },
+		{ 0x20a0, 0x6820a0 },
+		{ 0x20a4, 0x6820a4 },
+		{ 0x20a8, 0x6820a8 },
+		{ 0x20ac, 0x6820ac },
+		{ 0x218c, 0x68218c },
+		{ 0x2194, 0x682194 },
+		{ 0x2198, 0x682198 },
+		{ 0x219c, 0x68219c },
+		{ 0x21a0, 0x6821a0 },
+		{ 0x21a4, 0x6821a4 },
+		{ 0x2214, 0x682214 },
+		{ 0x2218, 0x682218 },
+		{}
+	}
+};
+
+static const struct nv50_disp_chan_mthd
+gv100_disp_core_mthd = {
+	.name = "Core",
+	.addr = 0x000000,
+	.prev = 0x008000,
+	.data = {
+		{ "Global", 1, &gv100_disp_core_mthd_base },
+		{    "SOR", 4, &gv100_disp_core_mthd_sor  },
+		{ "WINDOW", 8, &gv100_disp_core_mthd_wndw },
+		{   "HEAD", 4, &gv100_disp_core_mthd_head },
+		{}
+	}
+};
+
+static int
+gv100_disp_core_idle(struct nv50_disp_chan *chan)
+{
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	nvkm_msec(device, 2000,
+		u32 stat = nvkm_rd32(device, 0x610630);
+		if ((stat & 0x001f0000) == 0x000b0000)
+			return 0;
+	);
+	return -EBUSY;
+}
+
+static u64
+gv100_disp_core_user(struct nv50_disp_chan *chan, u64 *psize)
+{
+	*psize = 0x10000;
+	return 0x680000;
+}
+
+static void
+gv100_disp_core_intr(struct nv50_disp_chan *chan, bool en)
+{
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	const u32 mask = 0x00000001;
+	const u32 data = en ? mask : 0;
+	nvkm_mask(device, 0x611dac, mask, data);
+}
+
+static void
+gv100_disp_core_fini(struct nv50_disp_chan *chan)
+{
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	nvkm_mask(device, 0x6104e0, 0x00000010, 0x00000000);
+	gv100_disp_core_idle(chan);
+	nvkm_mask(device, 0x6104e0, 0x00000002, 0x00000000);
+}
+
+static int
+gv100_disp_core_init(struct nv50_disp_chan *chan)
+{
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+
+	nvkm_wr32(device, 0x610b24, lower_32_bits(chan->push));
+	nvkm_wr32(device, 0x610b20, upper_32_bits(chan->push));
+	nvkm_wr32(device, 0x610b28, 0x00000001);
+	nvkm_wr32(device, 0x610b2c, 0x00000040);
+
+	nvkm_mask(device, 0x6104e0, 0x00000010, 0x00000010);
+	nvkm_wr32(device, 0x680000, 0x00000000);
+	nvkm_wr32(device, 0x6104e0, 0x00000013);
+	return gv100_disp_core_idle(chan);
+}
+
+static const struct nv50_disp_chan_func
+gv100_disp_core = {
+	.init = gv100_disp_core_init,
+	.fini = gv100_disp_core_fini,
+	.intr = gv100_disp_core_intr,
+	.user = gv100_disp_core_user,
+	.bind = gv100_disp_dmac_bind,
+};
+
+int
+gv100_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_core_new_(&gv100_disp_core, &gv100_disp_core_mthd,
+				   disp, 0, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/corenv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/corenv50.c
index b547c8b..55db9a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/corenv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/corenv50.c
@@ -21,32 +21,30 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
+#include "channv50.h"
 
 #include <core/client.h>
 #include <subdev/timer.h>
 
-#include <nvif/class.h>
 #include <nvif/cl507d.h>
 #include <nvif/unpack.h>
 
 int
-nv50_disp_core_new(const struct nv50_disp_dmac_func *func,
-		   const struct nv50_disp_chan_mthd *mthd,
-		   struct nv50_disp_root *root, int chid,
-		   const struct nvkm_oclass *oclass, void *data, u32 size,
-		   struct nvkm_object **pobject)
+nv50_disp_core_new_(const struct nv50_disp_chan_func *func,
+		    const struct nv50_disp_chan_mthd *mthd,
+		    struct nv50_disp *disp, int chid,
+		    const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nvkm_object **pobject)
 {
 	union {
 		struct nv50_disp_core_channel_dma_v0 v0;
-	} *args = data;
+	} *args = argv;
 	struct nvkm_object *parent = oclass->parent;
 	u64 push;
 	int ret = -ENOSYS;
 
-	nvif_ioctl(parent, "create disp core channel dma size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
+	nvif_ioctl(parent, "create disp core channel dma size %d\n", argc);
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
 		nvif_ioctl(parent, "create disp core channel dma vers %d "
 				   "pushbuf %016llx\n",
 			   args->v0.version, args->v0.pushbuf);
@@ -54,7 +52,7 @@
 	} else
 		return ret;
 
-	return nv50_disp_dmac_new_(func, mthd, root, chid, 0,
+	return nv50_disp_dmac_new_(func, mthd, disp, chid, 0,
 				   push, oclass, pobject);
 }
 
@@ -151,7 +149,7 @@
 };
 
 static const struct nv50_disp_chan_mthd
-nv50_disp_core_chan_mthd = {
+nv50_disp_core_mthd = {
 	.name = "Core",
 	.addr = 0x000000,
 	.prev = 0x000004,
@@ -166,10 +164,9 @@
 };
 
 static void
-nv50_disp_core_fini(struct nv50_disp_dmac *chan)
+nv50_disp_core_fini(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->base.root->disp;
-	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 
 	/* deactivate channel */
@@ -182,21 +179,14 @@
 		nvkm_error(subdev, "core fini: %08x\n",
 			   nvkm_rd32(device, 0x610200));
 	}
-
-	/* disable error reporting and completion notifications */
-	nvkm_mask(device, 0x610028, 0x00010001, 0x00000000);
 }
 
 static int
-nv50_disp_core_init(struct nv50_disp_dmac *chan)
+nv50_disp_core_init(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->base.root->disp;
-	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 
-	/* enable error reporting */
-	nvkm_mask(device, 0x610028, 0x00010000, 0x00010000);
-
 	/* attempt to unstick channel from some unknown state */
 	if ((nvkm_rd32(device, 0x610200) & 0x009f0000) == 0x00020000)
 		nvkm_mask(device, 0x610200, 0x00800000, 0x00800000);
@@ -224,20 +214,19 @@
 	return 0;
 }
 
-const struct nv50_disp_dmac_func
+const struct nv50_disp_chan_func
 nv50_disp_core_func = {
 	.init = nv50_disp_core_init,
 	.fini = nv50_disp_core_fini,
+	.intr = nv50_disp_chan_intr,
+	.user = nv50_disp_chan_user,
 	.bind = nv50_disp_dmac_bind,
 };
 
-const struct nv50_disp_dmac_oclass
-nv50_disp_core_oclass = {
-	.base.oclass = NV50_DISP_CORE_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_core_new,
-	.func = &nv50_disp_core_func,
-	.mthd = &nv50_disp_core_chan_mthd,
-	.chid = 0,
-};
+int
+nv50_disp_core_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		   struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_core_new_(&nv50_disp_core_func, &nv50_disp_core_mthd,
+				   disp, 0, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c
deleted file mode 100644
index fa781b5..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "channv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_pioc_oclass
-g84_disp_curs_oclass = {
-	.base.oclass = G82_DISP_CURSOR,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_curs_new,
-	.func = &nv50_disp_pioc_func,
-	.chid = { 7, 7 },
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c
index 2be6fb0..cdda365 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c
@@ -22,16 +22,11 @@
  * Authors: Ben Skeggs
  */
 #include "channv50.h"
-#include "rootnv50.h"
 
-#include <nvif/class.h>
-
-const struct nv50_disp_pioc_oclass
-gf119_disp_curs_oclass = {
-	.base.oclass = GF110_DISP_CURSOR,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_curs_new,
-	.func = &gf119_disp_pioc_func,
-	.chid = { 13, 13 },
-};
+int
+gf119_disp_curs_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_curs_new_(&gf119_disp_pioc_func, disp, 13, 13,
+				   oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c
deleted file mode 100644
index 2a99db4..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_pioc_oclass
-gk104_disp_curs_oclass = {
-	.base.oclass = GK104_DISP_CURSOR,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_curs_new,
-	.func = &gf119_disp_pioc_func,
-	.chid = { 13, 13 },
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c
index e958210..1a4601f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c
@@ -22,16 +22,11 @@
  * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
 #include "channv50.h"
-#include "rootnv50.h"
 
-#include <nvif/class.h>
-
-const struct nv50_disp_pioc_oclass
-gp102_disp_curs_oclass = {
-	.base.oclass = GK104_DISP_CURSOR,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_curs_new,
-	.func = &gf119_disp_pioc_func,
-	.chid = { 13, 17 },
-};
+int
+gp102_disp_curs_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_curs_new_(&gf119_disp_pioc_func, disp, 13, 17,
+				   oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c
deleted file mode 100644
index 00a7f35..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "channv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_pioc_oclass
-gt215_disp_curs_oclass = {
-	.base.oclass = GT214_DISP_CURSOR,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_curs_new,
-	.func = &nv50_disp_pioc_func,
-	.chid = { 7, 7 },
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgv100.c
new file mode 100644
index 0000000..a3e4f69
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgv100.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "channv50.h"
+
+#include <subdev/timer.h>
+
+static int
+gv100_disp_curs_idle(struct nv50_disp_chan *chan)
+{
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	const u32 soff = (chan->chid.ctrl - 1) * 0x04;
+	nvkm_msec(device, 2000,
+		u32 stat = nvkm_rd32(device, 0x610664 + soff);
+		if ((stat & 0x00070000) == 0x00040000)
+			return 0;
+	);
+	return -EBUSY;
+}
+
+static void
+gv100_disp_curs_intr(struct nv50_disp_chan *chan, bool en)
+{
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	const u32 mask = 0x00010000 << chan->head;
+	const u32 data = en ? mask : 0;
+	nvkm_mask(device, 0x611dac, mask, data);
+}
+
+static void
+gv100_disp_curs_fini(struct nv50_disp_chan *chan)
+{
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	const u32 hoff = chan->chid.ctrl * 4;
+	nvkm_mask(device, 0x6104e0 + hoff, 0x00000010, 0x00000010);
+	gv100_disp_curs_idle(chan);
+	nvkm_mask(device, 0x6104e0 + hoff, 0x00000001, 0x00000000);
+}
+
+static int
+gv100_disp_curs_init(struct nv50_disp_chan *chan)
+{
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	nvkm_wr32(device, 0x6104e0 + chan->chid.ctrl * 4, 0x00000001);
+	return gv100_disp_curs_idle(chan);
+}
+
+static const struct nv50_disp_chan_func
+gv100_disp_curs = {
+	.init = gv100_disp_curs_init,
+	.fini = gv100_disp_curs_fini,
+	.intr = gv100_disp_curs_intr,
+	.user = gv100_disp_chan_user,
+};
+
+int
+gv100_disp_curs_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_curs_new_(&gv100_disp_curs, disp, 73, 73,
+				   oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c
index ab51121..d297585 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c
@@ -23,30 +23,26 @@
  */
 #include "channv50.h"
 #include "head.h"
-#include "rootnv50.h"
 
 #include <core/client.h>
 
-#include <nvif/class.h>
 #include <nvif/cl507a.h>
 #include <nvif/unpack.h>
 
 int
-nv50_disp_curs_new(const struct nv50_disp_chan_func *func,
-		   const struct nv50_disp_chan_mthd *mthd,
-		   struct nv50_disp_root *root, int ctrl, int user,
-		   const struct nvkm_oclass *oclass, void *data, u32 size,
-		   struct nvkm_object **pobject)
+nv50_disp_curs_new_(const struct nv50_disp_chan_func *func,
+		    struct nv50_disp *disp, int ctrl, int user,
+		    const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nvkm_object **pobject)
 {
 	union {
 		struct nv50_disp_cursor_v0 v0;
-	} *args = data;
+	} *args = argv;
 	struct nvkm_object *parent = oclass->parent;
-	struct nv50_disp *disp = root->disp;
 	int head, ret = -ENOSYS;
 
-	nvif_ioctl(parent, "create disp cursor size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
+	nvif_ioctl(parent, "create disp cursor size %d\n", argc);
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
 		nvif_ioctl(parent, "create disp cursor vers %d head %d\n",
 			   args->v0.version, args->v0.head);
 		if (!nvkm_head_find(&disp->base, args->v0.head))
@@ -55,16 +51,14 @@
 	} else
 		return ret;
 
-	return nv50_disp_chan_new_(func, mthd, root, ctrl + head, user + head,
+	return nv50_disp_chan_new_(func, NULL, disp, ctrl + head, user + head,
 				   head, oclass, pobject);
 }
 
-const struct nv50_disp_pioc_oclass
-nv50_disp_curs_oclass = {
-	.base.oclass = NV50_DISP_CURSOR,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_curs_new,
-	.func = &nv50_disp_pioc_func,
-	.chid = { 7, 7 },
-};
+int
+nv50_disp_curs_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		   struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_curs_new_(&nv50_disp_pioc_func, disp, 7, 7,
+				   oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacgf119.c
index dbd032e..71a9477 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacgf119.c
@@ -58,8 +58,13 @@
 int
 gf119_dac_new(struct nvkm_disp *disp, int id)
 {
-	struct nvkm_device *device = disp->engine.subdev.device;
-	if (!(nvkm_rd32(device, 0x612004) & (0x00000010 << id)))
-		return 0;
 	return nvkm_ior_new_(&gf119_dac, disp, DAC, id);
 }
+
+int
+gf119_dac_cnt(struct nvkm_disp *disp, unsigned long *pmask)
+{
+	struct nvkm_device *device = disp->engine.subdev.device;
+	*pmask = (nvkm_rd32(device, 0x612004) & 0x000000f0) >> 4;
+	return 4;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacnv50.c
index 85e692b..558012d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dacnv50.c
@@ -109,8 +109,13 @@
 int
 nv50_dac_new(struct nvkm_disp *disp, int id)
 {
-	struct nvkm_device *device = disp->engine.subdev.device;
-	if (!(nvkm_rd32(device, 0x610184) & (0x00100000 << id)))
-		return 0;
 	return nvkm_ior_new_(&nv50_dac, disp, DAC, id);
 }
+
+int
+nv50_dac_cnt(struct nvkm_disp *disp, unsigned long *pmask)
+{
+	struct nvkm_device *device = disp->engine.subdev.device;
+	*pmask = (nvkm_rd32(device, 0x610184) & 0x00700000) >> 20;
+	return 3;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
index ce7cd74..edf7dd0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
@@ -21,29 +21,27 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
+#include "channv50.h"
 
 #include <core/ramht.h>
 #include <subdev/timer.h>
 
 int
-gf119_disp_dmac_bind(struct nv50_disp_dmac *chan,
+gf119_disp_dmac_bind(struct nv50_disp_chan *chan,
 		     struct nvkm_object *object, u32 handle)
 {
-	return nvkm_ramht_insert(chan->base.root->ramht, object,
-				 chan->base.chid.user, -9, handle,
-				 chan->base.chid.user << 27 | 0x00000001);
+	return nvkm_ramht_insert(chan->disp->ramht, object,
+				 chan->chid.user, -9, handle,
+				 chan->chid.user << 27 | 0x00000001);
 }
 
 void
-gf119_disp_dmac_fini(struct nv50_disp_dmac *chan)
+gf119_disp_dmac_fini(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->base.root->disp;
-	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int ctrl = chan->base.chid.ctrl;
-	int user = chan->base.chid.user;
+	int ctrl = chan->chid.ctrl;
+	int user = chan->chid.user;
 
 	/* deactivate channel */
 	nvkm_mask(device, 0x610490 + (ctrl * 0x0010), 0x00001010, 0x00001000);
@@ -55,23 +53,15 @@
 		nvkm_error(subdev, "ch %d fini: %08x\n", user,
 			   nvkm_rd32(device, 0x610490 + (ctrl * 0x10)));
 	}
-
-	/* disable error reporting and completion notification */
-	nvkm_mask(device, 0x610090, 0x00000001 << user, 0x00000000);
-	nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000000);
 }
 
 static int
-gf119_disp_dmac_init(struct nv50_disp_dmac *chan)
+gf119_disp_dmac_init(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->base.root->disp;
-	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int ctrl = chan->base.chid.ctrl;
-	int user = chan->base.chid.user;
-
-	/* enable error reporting */
-	nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000001 << user);
+	int ctrl = chan->chid.ctrl;
+	int user = chan->chid.user;
 
 	/* initialise channel for dma command submission */
 	nvkm_wr32(device, 0x610494 + (ctrl * 0x0010), chan->push);
@@ -94,9 +84,11 @@
 	return 0;
 }
 
-const struct nv50_disp_dmac_func
+const struct nv50_disp_chan_func
 gf119_disp_dmac_func = {
 	.init = gf119_disp_dmac_init,
 	.fini = gf119_disp_dmac_fini,
+	.intr = gf119_disp_chan_intr,
+	.user = nv50_disp_chan_user,
 	.bind = gf119_disp_dmac_bind,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp102.c
index cdead95..f21a433 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp102.c
@@ -21,22 +21,17 @@
  *
  * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
+#include "channv50.h"
 
 #include <subdev/timer.h>
 
 static int
-gp102_disp_dmac_init(struct nv50_disp_dmac *chan)
+gp102_disp_dmac_init(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->base.root->disp;
-	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int ctrl = chan->base.chid.ctrl;
-	int user = chan->base.chid.user;
-
-	/* enable error reporting */
-	nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000001 << user);
+	int ctrl = chan->chid.ctrl;
+	int user = chan->chid.user;
 
 	/* initialise channel for dma command submission */
 	nvkm_wr32(device, 0x611494 + (ctrl * 0x0010), chan->push);
@@ -59,9 +54,11 @@
 	return 0;
 }
 
-const struct nv50_disp_dmac_func
+const struct nv50_disp_chan_func
 gp102_disp_dmac_func = {
 	.init = gp102_disp_dmac_init,
 	.fini = gf119_disp_dmac_fini,
+	.intr = gf119_disp_chan_intr,
+	.user = nv50_disp_chan_user,
 	.bind = gf119_disp_dmac_bind,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgv100.c
new file mode 100644
index 0000000..eac0e42
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgv100.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "channv50.h"
+
+#include <core/ramht.h>
+#include <subdev/timer.h>
+
+static int
+gv100_disp_dmac_idle(struct nv50_disp_chan *chan)
+{
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	const u32 soff = (chan->chid.ctrl - 1) * 0x04;
+	nvkm_msec(device, 2000,
+		u32 stat = nvkm_rd32(device, 0x610664 + soff);
+		if ((stat & 0x000f0000) == 0x00040000)
+			return 0;
+	);
+	return -EBUSY;
+}
+
+int
+gv100_disp_dmac_bind(struct nv50_disp_chan *chan,
+		     struct nvkm_object *object, u32 handle)
+{
+	return nvkm_ramht_insert(chan->disp->ramht, object,
+				 chan->chid.user, -9, handle,
+				 chan->chid.user << 25 | 0x00000040);
+}
+
+void
+gv100_disp_dmac_fini(struct nv50_disp_chan *chan)
+{
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	const u32 coff = chan->chid.ctrl * 0x04;
+	nvkm_mask(device, 0x6104e0 + coff, 0x00000010, 0x00000000);
+	gv100_disp_dmac_idle(chan);
+	nvkm_mask(device, 0x6104e0 + coff, 0x00000002, 0x00000000);
+}
+
+int
+gv100_disp_dmac_init(struct nv50_disp_chan *chan)
+{
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	const u32 uoff = (chan->chid.ctrl - 1) * 0x1000;
+	const u32 poff = chan->chid.ctrl * 0x10;
+	const u32 coff = chan->chid.ctrl * 0x04;
+
+	nvkm_wr32(device, 0x610b24 + poff, lower_32_bits(chan->push));
+	nvkm_wr32(device, 0x610b20 + poff, upper_32_bits(chan->push));
+	nvkm_wr32(device, 0x610b28 + poff, 0x00000001);
+	nvkm_wr32(device, 0x610b2c + poff, 0x00000040);
+
+	nvkm_mask(device, 0x6104e0 + coff, 0x00000010, 0x00000010);
+	nvkm_wr32(device, 0x690000 + uoff, 0x00000000);
+	nvkm_wr32(device, 0x6104e0 + coff, 0x00000013);
+	return gv100_disp_dmac_idle(chan);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
index 070ec5e..9e8a9d7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
@@ -21,176 +21,68 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
+#include "channv50.h"
 
 #include <core/client.h>
-#include <core/oproxy.h>
 #include <core/ramht.h>
 #include <subdev/fb.h>
+#include <subdev/mmu.h>
 #include <subdev/timer.h>
 #include <engine/dma.h>
 
-struct nv50_disp_dmac_object {
-	struct nvkm_oproxy oproxy;
-	struct nv50_disp_root *root;
-	int hash;
-};
-
-static void
-nv50_disp_dmac_child_del_(struct nvkm_oproxy *base)
-{
-	struct nv50_disp_dmac_object *object =
-		container_of(base, typeof(*object), oproxy);
-	nvkm_ramht_remove(object->root->ramht, object->hash);
-}
-
-static const struct nvkm_oproxy_func
-nv50_disp_dmac_child_func_ = {
-	.dtor[0] = nv50_disp_dmac_child_del_,
-};
-
-static int
-nv50_disp_dmac_child_new_(struct nv50_disp_chan *base,
-			  const struct nvkm_oclass *oclass,
-			  void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nv50_disp_dmac *chan = nv50_disp_dmac(base);
-	struct nv50_disp_root *root = chan->base.root;
-	struct nvkm_device *device = root->disp->base.engine.subdev.device;
-	const struct nvkm_device_oclass *sclass = oclass->priv;
-	struct nv50_disp_dmac_object *object;
-	int ret;
-
-	if (!(object = kzalloc(sizeof(*object), GFP_KERNEL)))
-		return -ENOMEM;
-	nvkm_oproxy_ctor(&nv50_disp_dmac_child_func_, oclass, &object->oproxy);
-	object->root = root;
-	*pobject = &object->oproxy.base;
-
-	ret = sclass->ctor(device, oclass, data, size, &object->oproxy.object);
-	if (ret)
-		return ret;
-
-	object->hash = chan->func->bind(chan, object->oproxy.object,
-					      oclass->handle);
-	if (object->hash < 0)
-		return object->hash;
-
-	return 0;
-}
-
-static int
-nv50_disp_dmac_child_get_(struct nv50_disp_chan *base, int index,
-			  struct nvkm_oclass *sclass)
-{
-	struct nv50_disp_dmac *chan = nv50_disp_dmac(base);
-	struct nv50_disp *disp = chan->base.root->disp;
-	struct nvkm_device *device = disp->base.engine.subdev.device;
-	const struct nvkm_device_oclass *oclass = NULL;
-
-	sclass->engine = nvkm_device_engine(device, NVKM_ENGINE_DMAOBJ);
-	if (sclass->engine && sclass->engine->func->base.sclass) {
-		sclass->engine->func->base.sclass(sclass, index, &oclass);
-		if (oclass) {
-			sclass->priv = oclass;
-			return 0;
-		}
-	}
-
-	return -EINVAL;
-}
-
-static void
-nv50_disp_dmac_fini_(struct nv50_disp_chan *base)
-{
-	struct nv50_disp_dmac *chan = nv50_disp_dmac(base);
-	chan->func->fini(chan);
-}
-
-static int
-nv50_disp_dmac_init_(struct nv50_disp_chan *base)
-{
-	struct nv50_disp_dmac *chan = nv50_disp_dmac(base);
-	return chan->func->init(chan);
-}
-
-static void *
-nv50_disp_dmac_dtor_(struct nv50_disp_chan *base)
-{
-	return nv50_disp_dmac(base);
-}
-
-static const struct nv50_disp_chan_func
-nv50_disp_dmac_func_ = {
-	.dtor = nv50_disp_dmac_dtor_,
-	.init = nv50_disp_dmac_init_,
-	.fini = nv50_disp_dmac_fini_,
-	.child_get = nv50_disp_dmac_child_get_,
-	.child_new = nv50_disp_dmac_child_new_,
-};
-
 int
-nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *func,
+nv50_disp_dmac_new_(const struct nv50_disp_chan_func *func,
 		    const struct nv50_disp_chan_mthd *mthd,
-		    struct nv50_disp_root *root, int chid, int head, u64 push,
+		    struct nv50_disp *disp, int chid, int head, u64 push,
 		    const struct nvkm_oclass *oclass,
 		    struct nvkm_object **pobject)
 {
 	struct nvkm_client *client = oclass->client;
-	struct nvkm_dmaobj *dmaobj;
-	struct nv50_disp_dmac *chan;
+	struct nv50_disp_chan *chan;
 	int ret;
 
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-	chan->func = func;
-
-	ret = nv50_disp_chan_ctor(&nv50_disp_dmac_func_, mthd, root,
-				  chid, chid, head, oclass, &chan->base);
+	ret = nv50_disp_chan_new_(func, mthd, disp, chid, chid, head, oclass,
+				  pobject);
+	chan = nv50_disp_chan(*pobject);
 	if (ret)
 		return ret;
 
-	dmaobj = nvkm_dmaobj_search(client, push);
-	if (IS_ERR(dmaobj))
-		return PTR_ERR(dmaobj);
+	chan->memory = nvkm_umem_search(client, push);
+	if (IS_ERR(chan->memory))
+		return PTR_ERR(chan->memory);
 
-	if (dmaobj->limit - dmaobj->start != 0xfff)
+	if (nvkm_memory_size(chan->memory) < 0x1000)
 		return -EINVAL;
 
-	switch (dmaobj->target) {
-	case NV_MEM_TARGET_VRAM:
-		chan->push = 0x00000001 | dmaobj->start >> 8;
-		break;
-	case NV_MEM_TARGET_PCI_NOSNOOP:
-		chan->push = 0x00000003 | dmaobj->start >> 8;
-		break;
+	switch (nvkm_memory_target(chan->memory)) {
+	case NVKM_MEM_TARGET_VRAM: chan->push = 0x00000001; break;
+	case NVKM_MEM_TARGET_NCOH: chan->push = 0x00000002; break;
+	case NVKM_MEM_TARGET_HOST: chan->push = 0x00000003; break;
 	default:
 		return -EINVAL;
 	}
 
+	chan->push |= nvkm_memory_addr(chan->memory) >> 8;
 	return 0;
 }
 
 int
-nv50_disp_dmac_bind(struct nv50_disp_dmac *chan,
+nv50_disp_dmac_bind(struct nv50_disp_chan *chan,
 		    struct nvkm_object *object, u32 handle)
 {
-	return nvkm_ramht_insert(chan->base.root->ramht, object,
-				 chan->base.chid.user, -10, handle,
-				 chan->base.chid.user << 28 |
-				 chan->base.chid.user);
+	return nvkm_ramht_insert(chan->disp->ramht, object,
+				 chan->chid.user, -10, handle,
+				 chan->chid.user << 28 |
+				 chan->chid.user);
 }
 
 static void
-nv50_disp_dmac_fini(struct nv50_disp_dmac *chan)
+nv50_disp_dmac_fini(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->base.root->disp;
-	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int ctrl = chan->base.chid.ctrl;
-	int user = chan->base.chid.user;
+	int ctrl = chan->chid.ctrl;
+	int user = chan->chid.user;
 
 	/* deactivate channel */
 	nvkm_mask(device, 0x610200 + (ctrl * 0x0010), 0x00001010, 0x00001000);
@@ -202,22 +94,15 @@
 		nvkm_error(subdev, "ch %d fini timeout, %08x\n", user,
 			   nvkm_rd32(device, 0x610200 + (ctrl * 0x10)));
 	}
-
-	/* disable error reporting and completion notifications */
-	nvkm_mask(device, 0x610028, 0x00010001 << user, 0x00000000 << user);
 }
 
 static int
-nv50_disp_dmac_init(struct nv50_disp_dmac *chan)
+nv50_disp_dmac_init(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->base.root->disp;
-	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_subdev *subdev = &chan->disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int ctrl = chan->base.chid.ctrl;
-	int user = chan->base.chid.user;
-
-	/* enable error reporting */
-	nvkm_mask(device, 0x610028, 0x00010000 << user, 0x00010000 << user);
+	int ctrl = chan->chid.ctrl;
+	int user = chan->chid.user;
 
 	/* initialise channel for dma command submission */
 	nvkm_wr32(device, 0x610204 + (ctrl * 0x0010), chan->push);
@@ -240,9 +125,11 @@
 	return 0;
 }
 
-const struct nv50_disp_dmac_func
+const struct nv50_disp_chan_func
 nv50_disp_dmac_func = {
 	.init = nv50_disp_dmac_init,
 	.fini = nv50_disp_dmac_fini,
+	.intr = nv50_disp_chan_intr,
+	.user = nv50_disp_chan_user,
 	.bind = nv50_disp_dmac_bind,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
deleted file mode 100644
index f9b9821..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NV50_DISP_DMAC_H__
-#define __NV50_DISP_DMAC_H__
-#define nv50_disp_dmac(p) container_of((p), struct nv50_disp_dmac, base)
-#include "channv50.h"
-
-struct nv50_disp_dmac {
-	const struct nv50_disp_dmac_func *func;
-	struct nv50_disp_chan base;
-	u32 push;
-};
-
-struct nv50_disp_dmac_func {
-	int  (*init)(struct nv50_disp_dmac *);
-	void (*fini)(struct nv50_disp_dmac *);
-	int  (*bind)(struct nv50_disp_dmac *, struct nvkm_object *, u32 handle);
-};
-
-int nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *,
-			const struct nv50_disp_chan_mthd *,
-			struct nv50_disp_root *, int chid, int head, u64 push,
-			const struct nvkm_oclass *, struct nvkm_object **);
-
-extern const struct nv50_disp_dmac_func nv50_disp_dmac_func;
-int nv50_disp_dmac_bind(struct nv50_disp_dmac *, struct nvkm_object *, u32);
-extern const struct nv50_disp_dmac_func nv50_disp_core_func;
-
-extern const struct nv50_disp_dmac_func gf119_disp_dmac_func;
-void gf119_disp_dmac_fini(struct nv50_disp_dmac *);
-int gf119_disp_dmac_bind(struct nv50_disp_dmac *, struct nvkm_object *, u32);
-extern const struct nv50_disp_dmac_func gf119_disp_core_func;
-void gf119_disp_core_fini(struct nv50_disp_dmac *);
-
-extern const struct nv50_disp_dmac_func gp102_disp_dmac_func;
-
-struct nv50_disp_dmac_oclass {
-	int (*ctor)(const struct nv50_disp_dmac_func *,
-		    const struct nv50_disp_chan_mthd *,
-		    struct nv50_disp_root *, int chid,
-		    const struct nvkm_oclass *, void *data, u32 size,
-		    struct nvkm_object **);
-	struct nvkm_sclass base;
-	const struct nv50_disp_dmac_func *func;
-	const struct nv50_disp_chan_mthd *mthd;
-	int chid;
-};
-
-int nv50_disp_core_new(const struct nv50_disp_dmac_func *,
-		       const struct nv50_disp_chan_mthd *,
-		       struct nv50_disp_root *, int chid,
-		       const struct nvkm_oclass *oclass, void *data, u32 size,
-		       struct nvkm_object **);
-int nv50_disp_base_new(const struct nv50_disp_dmac_func *,
-		       const struct nv50_disp_chan_mthd *,
-		       struct nv50_disp_root *, int chid,
-		       const struct nvkm_oclass *oclass, void *data, u32 size,
-		       struct nvkm_object **);
-int nv50_disp_ovly_new(const struct nv50_disp_dmac_func *,
-		       const struct nv50_disp_chan_mthd *,
-		       struct nv50_disp_root *, int chid,
-		       const struct nvkm_oclass *oclass, void *data, u32 size,
-		       struct nvkm_object **);
-
-extern const struct nv50_disp_dmac_oclass nv50_disp_core_oclass;
-extern const struct nv50_disp_dmac_oclass nv50_disp_base_oclass;
-extern const struct nv50_disp_dmac_oclass nv50_disp_ovly_oclass;
-
-extern const struct nv50_disp_dmac_oclass g84_disp_core_oclass;
-extern const struct nv50_disp_dmac_oclass g84_disp_base_oclass;
-extern const struct nv50_disp_dmac_oclass g84_disp_ovly_oclass;
-
-extern const struct nv50_disp_dmac_oclass g94_disp_core_oclass;
-
-extern const struct nv50_disp_dmac_oclass gt200_disp_core_oclass;
-extern const struct nv50_disp_dmac_oclass gt200_disp_base_oclass;
-extern const struct nv50_disp_dmac_oclass gt200_disp_ovly_oclass;
-
-extern const struct nv50_disp_dmac_oclass gt215_disp_core_oclass;
-extern const struct nv50_disp_dmac_oclass gt215_disp_base_oclass;
-extern const struct nv50_disp_dmac_oclass gt215_disp_ovly_oclass;
-
-extern const struct nv50_disp_dmac_oclass gf119_disp_core_oclass;
-extern const struct nv50_disp_dmac_oclass gf119_disp_base_oclass;
-extern const struct nv50_disp_dmac_oclass gf119_disp_ovly_oclass;
-
-extern const struct nv50_disp_dmac_oclass gk104_disp_core_oclass;
-extern const struct nv50_disp_dmac_oclass gk104_disp_base_oclass;
-extern const struct nv50_disp_dmac_oclass gk104_disp_ovly_oclass;
-
-extern const struct nv50_disp_dmac_oclass gk110_disp_core_oclass;
-extern const struct nv50_disp_dmac_oclass gk110_disp_base_oclass;
-
-extern const struct nv50_disp_dmac_oclass gm107_disp_core_oclass;
-
-extern const struct nv50_disp_dmac_oclass gm200_disp_core_oclass;
-
-extern const struct nv50_disp_dmac_oclass gp100_disp_core_oclass;
-
-extern const struct nv50_disp_dmac_oclass gp102_disp_core_oclass;
-extern const struct nv50_disp_dmac_oclass gp102_disp_base_oclass;
-extern const struct nv50_disp_dmac_oclass gp102_disp_ovly_oclass;
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c
index 842e1b7..731f188 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c
@@ -28,18 +28,20 @@
 
 static const struct nv50_disp_func
 g84_disp = {
+	.init = nv50_disp_init,
+	.fini = nv50_disp_fini,
 	.intr = nv50_disp_intr,
 	.uevent = &nv50_disp_chan_uevent,
 	.super = nv50_disp_super,
 	.root = &g84_disp_root_oclass,
-	.head.new = nv50_head_new,
-	.dac = { .nr = 3, .new = nv50_dac_new },
-	.sor = { .nr = 2, .new = g84_sor_new },
-	.pior = { .nr = 3, .new = nv50_pior_new },
+	.head = { .cnt = nv50_head_cnt, .new = nv50_head_new },
+	.dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new },
+	.sor = { .cnt = nv50_sor_cnt, .new = g84_sor_new },
+	.pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new },
 };
 
 int
 g84_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return nv50_disp_new_(&g84_disp, device, index, 2, pdisp);
+	return nv50_disp_new_(&g84_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c
index d184e6a..def54fe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c
@@ -28,18 +28,20 @@
 
 static const struct nv50_disp_func
 g94_disp = {
+	.init = nv50_disp_init,
+	.fini = nv50_disp_fini,
 	.intr = nv50_disp_intr,
 	.uevent = &nv50_disp_chan_uevent,
 	.super = nv50_disp_super,
 	.root = &g94_disp_root_oclass,
-	.head.new = nv50_head_new,
-	.dac = { .nr = 3, .new = nv50_dac_new },
-	.sor = { .nr = 4, .new = g94_sor_new },
-	.pior = { .nr = 3, .new = nv50_pior_new },
+	.head = { .cnt = nv50_head_cnt, .new = nv50_head_new },
+	.dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new },
+	.sor = { .cnt = g94_sor_cnt, .new = g94_sor_new },
+	.pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new },
 };
 
 int
 g94_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return nv50_disp_new_(&g94_disp, device, index, 2, pdisp);
+	return nv50_disp_new_(&g94_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
index d8765b5..794e909 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
@@ -24,8 +24,12 @@
 #include "nv50.h"
 #include "head.h"
 #include "ior.h"
+#include "channv50.h"
 #include "rootnv50.h"
 
+#include <core/ramht.h>
+#include <subdev/timer.h>
+
 void
 gf119_disp_super(struct work_struct *work)
 {
@@ -164,28 +168,99 @@
 	}
 }
 
-int
-gf119_disp_new_(const struct nv50_disp_func *func, struct nvkm_device *device,
-		int index, struct nvkm_disp **pdisp)
+void
+gf119_disp_fini(struct nv50_disp *disp)
 {
-	u32 heads = nvkm_rd32(device, 0x022448);
-	return nv50_disp_new_(func, device, index, heads, pdisp);
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	/* disable all interrupts */
+	nvkm_wr32(device, 0x6100b0, 0x00000000);
+}
+
+int
+gf119_disp_init(struct nv50_disp *disp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	struct nvkm_head *head;
+	u32 tmp;
+	int i;
+
+	/* The below segments of code copying values from one register to
+	 * another appear to inform EVO of the display capabilities or
+	 * something similar.
+	 */
+
+	/* ... CRTC caps */
+	list_for_each_entry(head, &disp->base.head, head) {
+		const u32 hoff = head->id * 0x800;
+		tmp = nvkm_rd32(device, 0x616104 + hoff);
+		nvkm_wr32(device, 0x6101b4 + hoff, tmp);
+		tmp = nvkm_rd32(device, 0x616108 + hoff);
+		nvkm_wr32(device, 0x6101b8 + hoff, tmp);
+		tmp = nvkm_rd32(device, 0x61610c + hoff);
+		nvkm_wr32(device, 0x6101bc + hoff, tmp);
+	}
+
+	/* ... DAC caps */
+	for (i = 0; i < disp->dac.nr; i++) {
+		tmp = nvkm_rd32(device, 0x61a000 + (i * 0x800));
+		nvkm_wr32(device, 0x6101c0 + (i * 0x800), tmp);
+	}
+
+	/* ... SOR caps */
+	for (i = 0; i < disp->sor.nr; i++) {
+		tmp = nvkm_rd32(device, 0x61c000 + (i * 0x800));
+		nvkm_wr32(device, 0x6301c4 + (i * 0x800), tmp);
+	}
+
+	/* steal display away from vbios, or something like that */
+	if (nvkm_rd32(device, 0x6100ac) & 0x00000100) {
+		nvkm_wr32(device, 0x6100ac, 0x00000100);
+		nvkm_mask(device, 0x6194e8, 0x00000001, 0x00000000);
+		if (nvkm_msec(device, 2000,
+			if (!(nvkm_rd32(device, 0x6194e8) & 0x00000002))
+				break;
+		) < 0)
+			return -EBUSY;
+	}
+
+	/* point at display engine memory area (hash table, objects) */
+	nvkm_wr32(device, 0x610010, (disp->inst->addr >> 8) | 9);
+
+	/* enable supervisor interrupts, disable everything else */
+	nvkm_wr32(device, 0x610090, 0x00000000);
+	nvkm_wr32(device, 0x6100a0, 0x00000000);
+	nvkm_wr32(device, 0x6100b0, 0x00000307);
+
+	/* disable underflow reporting, preventing an intermittent issue
+	 * on some gk104 boards where the production vbios left this
+	 * setting enabled by default.
+	 *
+	 * ftp://download.nvidia.com/open-gpu-doc/gk104-disable-underflow-reporting/1/gk104-disable-underflow-reporting.txt
+	 */
+	list_for_each_entry(head, &disp->base.head, head) {
+		const u32 hoff = head->id * 0x800;
+		nvkm_mask(device, 0x616308 + hoff, 0x00000111, 0x00000010);
+	}
+
+	return 0;
 }
 
 static const struct nv50_disp_func
 gf119_disp = {
+	.init = gf119_disp_init,
+	.fini = gf119_disp_fini,
 	.intr = gf119_disp_intr,
 	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_super,
 	.root = &gf119_disp_root_oclass,
-	.head.new = gf119_head_new,
-	.dac = { .nr = 3, .new = gf119_dac_new },
-	.sor = { .nr = 4, .new = gf119_sor_new },
+	.head = { .cnt = gf119_head_cnt, .new = gf119_head_new },
+	.dac = { .cnt = gf119_dac_cnt, .new = gf119_dac_new },
+	.sor = { .cnt = gf119_sor_cnt, .new = gf119_sor_new },
 };
 
 int
 gf119_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return gf119_disp_new_(&gf119_disp, device, index, pdisp);
+	return nv50_disp_new_(&gf119_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
index e8fe9f3..4c3439b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
@@ -28,18 +28,20 @@
 
 static const struct nv50_disp_func
 gk104_disp = {
+	.init = gf119_disp_init,
+	.fini = gf119_disp_fini,
 	.intr = gf119_disp_intr,
 	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_super,
 	.root = &gk104_disp_root_oclass,
-	.head.new = gf119_head_new,
-	.dac = { .nr = 3, .new = gf119_dac_new },
-	.sor = { .nr = 4, .new = gk104_sor_new },
+	.head = { .cnt = gf119_head_cnt, .new = gf119_head_new },
+	.dac = { .cnt = gf119_dac_cnt, .new = gf119_dac_new },
+	.sor = { .cnt = gf119_sor_cnt, .new = gk104_sor_new },
 };
 
 int
 gk104_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return gf119_disp_new_(&gk104_disp, device, index, pdisp);
+	return nv50_disp_new_(&gk104_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c
index 7696875..bc6f475 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c
@@ -28,18 +28,20 @@
 
 static const struct nv50_disp_func
 gk110_disp = {
+	.init = gf119_disp_init,
+	.fini = gf119_disp_fini,
 	.intr = gf119_disp_intr,
 	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_super,
 	.root = &gk110_disp_root_oclass,
-	.head.new = gf119_head_new,
-	.dac = { .nr = 3, .new = gf119_dac_new },
-	.sor = { .nr = 4, .new = gk104_sor_new },
+	.head = { .cnt = gf119_head_cnt, .new = gf119_head_new },
+	.dac = { .cnt = gf119_dac_cnt, .new = gf119_dac_new },
+	.sor = { .cnt = gf119_sor_cnt, .new = gk104_sor_new },
 };
 
 int
 gk110_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return gf119_disp_new_(&gk110_disp, device, index, pdisp);
+	return nv50_disp_new_(&gk110_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
index ede70e5..031cf6b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
@@ -28,18 +28,20 @@
 
 static const struct nv50_disp_func
 gm107_disp = {
+	.init = gf119_disp_init,
+	.fini = gf119_disp_fini,
 	.intr = gf119_disp_intr,
 	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_super,
 	.root = &gm107_disp_root_oclass,
-	.head.new = gf119_head_new,
-	.dac = { .nr = 3, .new = gf119_dac_new },
-	.sor = { .nr = 4, .new = gm107_sor_new },
+	.head = { .cnt = gf119_head_cnt, .new = gf119_head_new },
+	.dac = { .cnt = gf119_dac_cnt, .new = gf119_dac_new },
+	.sor = { .cnt = gf119_sor_cnt, .new = gm107_sor_new },
 };
 
 int
 gm107_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return gf119_disp_new_(&gm107_disp, device, index, pdisp);
+	return nv50_disp_new_(&gm107_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
index 292d3b5..ec9c33a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
@@ -28,18 +28,20 @@
 
 static const struct nv50_disp_func
 gm200_disp = {
+	.init = gf119_disp_init,
+	.fini = gf119_disp_fini,
 	.intr = gf119_disp_intr,
 	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_super,
 	.root = &gm200_disp_root_oclass,
-	.head.new = gf119_head_new,
-	.dac = { .nr = 3, .new = gf119_dac_new },
-	.sor = { .nr = 4, .new = gm200_sor_new },
+	.head = { .cnt = gf119_head_cnt, .new = gf119_head_new },
+	.dac = { .cnt = gf119_dac_cnt, .new = gf119_dac_new },
+	.sor = { .cnt = gf119_sor_cnt, .new = gm200_sor_new },
 };
 
 int
 gm200_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return gf119_disp_new_(&gm200_disp, device, index, pdisp);
+	return nv50_disp_new_(&gm200_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
index 39eb98b..fd62166 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
@@ -28,17 +28,19 @@
 
 static const struct nv50_disp_func
 gp100_disp = {
+	.init = gf119_disp_init,
+	.fini = gf119_disp_fini,
 	.intr = gf119_disp_intr,
 	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_super,
 	.root = &gp100_disp_root_oclass,
-	.head.new = gf119_head_new,
-	.sor = { .nr = 4, .new = gm200_sor_new },
+	.head = { .cnt = gf119_head_cnt, .new = gf119_head_new },
+	.sor = { .cnt = gf119_sor_cnt, .new = gm200_sor_new },
 };
 
 int
 gp100_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return gf119_disp_new_(&gp100_disp, device, index, pdisp);
+	return nv50_disp_new_(&gp100_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c
index 91d70fe1..3468dde 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c
@@ -24,6 +24,7 @@
 #include "nv50.h"
 #include "head.h"
 #include "ior.h"
+#include "channv50.h"
 #include "rootnv50.h"
 
 static void
@@ -54,17 +55,19 @@
 
 static const struct nv50_disp_func
 gp102_disp = {
+	.init = gf119_disp_init,
+	.fini = gf119_disp_fini,
 	.intr = gf119_disp_intr,
 	.intr_error = gp102_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_super,
 	.root = &gp102_disp_root_oclass,
-	.head.new = gf119_head_new,
-	.sor = { .nr = 4, .new = gm200_sor_new },
+	.head = { .cnt = gf119_head_cnt, .new = gf119_head_new },
+	.sor = { .cnt = gf119_sor_cnt, .new = gm200_sor_new },
 };
 
 int
 gp102_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return gf119_disp_new_(&gp102_disp, device, index, pdisp);
+	return nv50_disp_new_(&gp102_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt200.c
index bf00c4e..f801837 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt200.c
@@ -28,18 +28,20 @@
 
 static const struct nv50_disp_func
 gt200_disp = {
+	.init = nv50_disp_init,
+	.fini = nv50_disp_fini,
 	.intr = nv50_disp_intr,
 	.uevent = &nv50_disp_chan_uevent,
 	.super = nv50_disp_super,
 	.root = &gt200_disp_root_oclass,
-	.head.new = nv50_head_new,
-	.dac = { .nr = 3, .new = nv50_dac_new },
-	.sor = { .nr = 2, .new = g84_sor_new },
-	.pior = { .nr = 3, .new = nv50_pior_new },
+	.head = { .cnt = nv50_head_cnt, .new = nv50_head_new },
+	.dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new },
+	.sor = { .cnt = nv50_sor_cnt, .new = g84_sor_new },
+	.pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new },
 };
 
 int
 gt200_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return nv50_disp_new_(&gt200_disp, device, index, 2, pdisp);
+	return nv50_disp_new_(&gt200_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
index 2cdd4d7..7581efc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
@@ -28,18 +28,20 @@
 
 static const struct nv50_disp_func
 gt215_disp = {
+	.init = nv50_disp_init,
+	.fini = nv50_disp_fini,
 	.intr = nv50_disp_intr,
 	.uevent = &nv50_disp_chan_uevent,
 	.super = nv50_disp_super,
 	.root = &gt215_disp_root_oclass,
-	.head.new = nv50_head_new,
-	.dac = { .nr = 3, .new = nv50_dac_new },
-	.sor = { .nr = 4, .new = gt215_sor_new },
-	.pior = { .nr = 3, .new = nv50_pior_new },
+	.head = { .cnt = nv50_head_cnt, .new = nv50_head_new },
+	.dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new },
+	.sor = { .cnt = g94_sor_cnt, .new = gt215_sor_new },
+	.pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new },
 };
 
 int
 gt215_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return nv50_disp_new_(&gt215_disp, device, index, 2, pdisp);
+	return nv50_disp_new_(&gt215_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
new file mode 100644
index 0000000..d0a7e34
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "nv50.h"
+#include "head.h"
+#include "ior.h"
+#include "channv50.h"
+#include "rootnv50.h"
+
+#include <core/gpuobj.h>
+#include <subdev/timer.h>
+
+static int
+gv100_disp_wndw_cnt(struct nvkm_disp *disp, unsigned long *pmask)
+{
+	struct nvkm_device *device = disp->engine.subdev.device;
+	*pmask = nvkm_rd32(device, 0x610064);
+	return (nvkm_rd32(device, 0x610074) & 0x03f00000) >> 20;
+}
+
+static void
+gv100_disp_super(struct work_struct *work)
+{
+	struct nv50_disp *disp =
+		container_of(work, struct nv50_disp, supervisor);
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_head *head;
+	u32 stat = nvkm_rd32(device, 0x6107a8);
+	u32 mask[4];
+
+	nvkm_debug(subdev, "supervisor %d: %08x\n", ffs(disp->super), stat);
+	list_for_each_entry(head, &disp->base.head, head) {
+		mask[head->id] = nvkm_rd32(device, 0x6107ac + (head->id * 4));
+		HEAD_DBG(head, "%08x", mask[head->id]);
+	}
+
+	if (disp->super & 0x00000001) {
+		nv50_disp_chan_mthd(disp->chan[0], NV_DBG_DEBUG);
+		nv50_disp_super_1(disp);
+		list_for_each_entry(head, &disp->base.head, head) {
+			if (!(mask[head->id] & 0x00001000))
+				continue;
+			nv50_disp_super_1_0(disp, head);
+		}
+	} else
+	if (disp->super & 0x00000002) {
+		list_for_each_entry(head, &disp->base.head, head) {
+			if (!(mask[head->id] & 0x00001000))
+				continue;
+			nv50_disp_super_2_0(disp, head);
+		}
+		nvkm_outp_route(&disp->base);
+		list_for_each_entry(head, &disp->base.head, head) {
+			if (!(mask[head->id] & 0x00010000))
+				continue;
+			nv50_disp_super_2_1(disp, head);
+		}
+		list_for_each_entry(head, &disp->base.head, head) {
+			if (!(mask[head->id] & 0x00001000))
+				continue;
+			nv50_disp_super_2_2(disp, head);
+		}
+	} else
+	if (disp->super & 0x00000004) {
+		list_for_each_entry(head, &disp->base.head, head) {
+			if (!(mask[head->id] & 0x00001000))
+				continue;
+			nv50_disp_super_3_0(disp, head);
+		}
+	}
+
+	list_for_each_entry(head, &disp->base.head, head)
+		nvkm_wr32(device, 0x6107ac + (head->id * 4), 0x00000000);
+	nvkm_wr32(device, 0x6107a8, 0x80000000);
+}
+
+static void
+gv100_disp_exception(struct nv50_disp *disp, int chid)
+{
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x611020 + (chid * 12));
+	u32 type = (stat & 0x00007000) >> 12;
+	u32 mthd = (stat & 0x00000fff) << 2;
+	u32 data = nvkm_rd32(device, 0x611024 + (chid * 12));
+	u32 code = nvkm_rd32(device, 0x611028 + (chid * 12));
+
+	nvkm_error(subdev, "chid %d %08x [type %d mthd %04x] "
+			   "data %08x code %08x\n",
+		   chid, stat, type, mthd, data, code);
+
+	if (chid < ARRAY_SIZE(disp->chan) && disp->chan[chid]) {
+		switch (mthd) {
+		case 0x0200:
+			nv50_disp_chan_mthd(disp->chan[chid], NV_DBG_ERROR);
+			break;
+		default:
+			break;
+		}
+	}
+
+	nvkm_wr32(device, 0x611020 + (chid * 12), 0x90000000);
+}
+
+static void
+gv100_disp_intr_ctrl_disp(struct nv50_disp *disp)
+{
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x611c30);
+
+	if (stat & 0x00000007) {
+		disp->super = (stat & 0x00000007);
+		queue_work(disp->wq, &disp->supervisor);
+		nvkm_wr32(device, 0x611860, disp->super);
+		stat &= ~0x00000007;
+	}
+
+	/*TODO: I would guess this is VBIOS_RELEASE, however, NFI how to
+	 *      ACK it, nor does RM appear to bother.
+	 */
+	if (stat & 0x00000008)
+		stat &= ~0x00000008;
+
+	if (stat & 0x00000100) {
+		unsigned long wndws = nvkm_rd32(device, 0x611858);
+		unsigned long other = nvkm_rd32(device, 0x61185c);
+		int wndw;
+
+		nvkm_wr32(device, 0x611858, wndws);
+		nvkm_wr32(device, 0x61185c, other);
+
+		/* AWAKEN_OTHER_CORE. */
+		if (other & 0x00000001)
+			nv50_disp_chan_uevent_send(disp, 0);
+
+		/* AWAKEN_WIN_CH(n). */
+		for_each_set_bit(wndw, &wndws, disp->wndw.nr) {
+			nv50_disp_chan_uevent_send(disp, 1 + wndw);
+		}
+	}
+
+	if (stat)
+		nvkm_warn(subdev, "ctrl %08x\n", stat);
+}
+
+static void
+gv100_disp_intr_exc_other(struct nv50_disp *disp)
+{
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x611854);
+	unsigned long mask;
+	int head;
+
+	if (stat & 0x00000001) {
+		nvkm_wr32(device, 0x611854, 0x00000001);
+		gv100_disp_exception(disp, 0);
+		stat &= ~0x00000001;
+	}
+
+	if ((mask = (stat & 0x00ff0000) >> 16)) {
+		for_each_set_bit(head, &mask, disp->wndw.nr) {
+			nvkm_wr32(device, 0x611854, 0x00010000 << head);
+			gv100_disp_exception(disp, 73 + head);
+			stat &= ~(0x00010000 << head);
+		}
+	}
+
+	if (stat) {
+		nvkm_warn(subdev, "exception %08x\n", stat);
+		nvkm_wr32(device, 0x611854, stat);
+	}
+}
+
+static void
+gv100_disp_intr_exc_winim(struct nv50_disp *disp)
+{
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	unsigned long stat = nvkm_rd32(device, 0x611850);
+	int wndw;
+
+	for_each_set_bit(wndw, &stat, disp->wndw.nr) {
+		nvkm_wr32(device, 0x611850, BIT(wndw));
+		gv100_disp_exception(disp, 33 + wndw);
+		stat &= ~BIT(wndw);
+	}
+
+	if (stat) {
+		nvkm_warn(subdev, "wimm %08x\n", (u32)stat);
+		nvkm_wr32(device, 0x611850, stat);
+	}
+}
+
+static void
+gv100_disp_intr_exc_win(struct nv50_disp *disp)
+{
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	unsigned long stat = nvkm_rd32(device, 0x61184c);
+	int wndw;
+
+	for_each_set_bit(wndw, &stat, disp->wndw.nr) {
+		nvkm_wr32(device, 0x61184c, BIT(wndw));
+		gv100_disp_exception(disp, 1 + wndw);
+		stat &= ~BIT(wndw);
+	}
+
+	if (stat) {
+		nvkm_warn(subdev, "wndw %08x\n", (u32)stat);
+		nvkm_wr32(device, 0x61184c, stat);
+	}
+}
+
+static void
+gv100_disp_intr_head_timing(struct nv50_disp *disp, int head)
+{
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x611800 + (head * 0x04));
+
+	/* LAST_DATA, LOADV. */
+	if (stat & 0x00000003) {
+		nvkm_wr32(device, 0x611800 + (head * 0x04), stat & 0x00000003);
+		stat &= ~0x00000003;
+	}
+
+	if (stat & 0x00000004) {
+		nvkm_disp_vblank(&disp->base, head);
+		nvkm_wr32(device, 0x611800 + (head * 0x04), 0x00000004);
+		stat &= ~0x00000004;
+	}
+
+	if (stat) {
+		nvkm_warn(subdev, "head %08x\n", stat);
+		nvkm_wr32(device, 0x611800 + (head * 0x04), stat);
+	}
+}
+
+static void
+gv100_disp_intr(struct nv50_disp *disp)
+{
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x611ec0);
+	unsigned long mask;
+	int head;
+
+	if ((mask = (stat & 0x000000ff))) {
+		for_each_set_bit(head, &mask, 8) {
+			gv100_disp_intr_head_timing(disp, head);
+			stat &= ~BIT(head);
+		}
+	}
+
+	if (stat & 0x00000200) {
+		gv100_disp_intr_exc_win(disp);
+		stat &= ~0x00000200;
+	}
+
+	if (stat & 0x00000400) {
+		gv100_disp_intr_exc_winim(disp);
+		stat &= ~0x00000400;
+	}
+
+	if (stat & 0x00000800) {
+		gv100_disp_intr_exc_other(disp);
+		stat &= ~0x00000800;
+	}
+
+	if (stat & 0x00001000) {
+		gv100_disp_intr_ctrl_disp(disp);
+		stat &= ~0x00001000;
+	}
+
+	if (stat)
+		nvkm_warn(subdev, "intr %08x\n", stat);
+}
+
+static void
+gv100_disp_fini(struct nv50_disp *disp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	nvkm_wr32(device, 0x611db0, 0x00000000);
+}
+
+static int
+gv100_disp_init(struct nv50_disp *disp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	struct nvkm_head *head;
+	int i, j;
+	u32 tmp;
+
+	/* Claim ownership of display. */
+	if (nvkm_rd32(device, 0x6254e8) & 0x00000002) {
+		nvkm_mask(device, 0x6254e8, 0x00000001, 0x00000000);
+		if (nvkm_msec(device, 2000,
+			if (!(nvkm_rd32(device, 0x6254e8) & 0x00000002))
+				break;
+		) < 0)
+			return -EBUSY;
+	}
+
+	/* Lock pin capabilities. */
+	tmp = nvkm_rd32(device, 0x610068);
+	nvkm_wr32(device, 0x640008, tmp);
+
+	/* SOR capabilities. */
+	for (i = 0; i < disp->sor.nr; i++) {
+		tmp = nvkm_rd32(device, 0x61c000 + (i * 0x800));
+		nvkm_mask(device, 0x640000, 0x00000100 << i, 0x00000100 << i);
+		nvkm_wr32(device, 0x640144 + (i * 0x08), tmp);
+	}
+
+	/* Head capabilities. */
+	list_for_each_entry(head, &disp->base.head, head) {
+		const int id = head->id;
+
+		/* RG. */
+		tmp = nvkm_rd32(device, 0x616300 + (id * 0x800));
+		nvkm_wr32(device, 0x640048 + (id * 0x020), tmp);
+
+		/* POSTCOMP. */
+		for (j = 0; j < 6 * 4; j += 4) {
+			tmp = nvkm_rd32(device, 0x616100 + (id * 0x800) + j);
+			nvkm_wr32(device, 0x640030 + (id * 0x20) + j, tmp);
+		}
+	}
+
+	/* Window capabilities. */
+	for (i = 0; i < disp->wndw.nr; i++) {
+		nvkm_mask(device, 0x640004, 1 << i, 1 << i);
+		for (j = 0; j < 6 * 4; j += 4) {
+			tmp = nvkm_rd32(device, 0x630050 + (i * 0x800) + j);
+			nvkm_wr32(device, 0x6401e4 + (i * 0x20) + j, tmp);
+		}
+	}
+
+	/* IHUB capabilities. */
+	for (i = 0; i < 4; i++) {
+		tmp = nvkm_rd32(device, 0x62e000 + (i * 0x04));
+		nvkm_wr32(device, 0x640010 + (i * 0x04), tmp);
+	}
+
+	nvkm_mask(device, 0x610078, 0x00000001, 0x00000001);
+
+	/* Setup instance memory. */
+	switch (nvkm_memory_target(disp->inst->memory)) {
+	case NVKM_MEM_TARGET_VRAM: tmp = 0x00000001; break;
+	case NVKM_MEM_TARGET_NCOH: tmp = 0x00000002; break;
+	case NVKM_MEM_TARGET_HOST: tmp = 0x00000003; break;
+	default:
+		break;
+	}
+	nvkm_wr32(device, 0x610010, 0x00000008 | tmp);
+	nvkm_wr32(device, 0x610014, disp->inst->addr >> 16);
+
+	/* CTRL_DISP: AWAKEN, ERROR, SUPERVISOR[1-3]. */
+	nvkm_wr32(device, 0x611cf0, 0x00000187); /* MSK. */
+	nvkm_wr32(device, 0x611db0, 0x00000187); /* EN. */
+
+	/* EXC_OTHER: CURSn, CORE. */
+	nvkm_wr32(device, 0x611cec, disp->head.mask << 16 |
+				    0x00000001); /* MSK. */
+	nvkm_wr32(device, 0x611dac, 0x00000000); /* EN. */
+
+	/* EXC_WINIM. */
+	nvkm_wr32(device, 0x611ce8, disp->wndw.mask); /* MSK. */
+	nvkm_wr32(device, 0x611da8, 0x00000000); /* EN. */
+
+	/* EXC_WIN. */
+	nvkm_wr32(device, 0x611ce4, disp->wndw.mask); /* MSK. */
+	nvkm_wr32(device, 0x611da4, 0x00000000); /* EN. */
+
+	/* HEAD_TIMING(n): VBLANK. */
+	list_for_each_entry(head, &disp->base.head, head) {
+		const u32 hoff = head->id * 4;
+		nvkm_wr32(device, 0x611cc0 + hoff, 0x00000004); /* MSK. */
+		nvkm_wr32(device, 0x611d80 + hoff, 0x00000000); /* EN. */
+	}
+
+	/* OR. */
+	nvkm_wr32(device, 0x611cf4, 0x00000000); /* MSK. */
+	nvkm_wr32(device, 0x611db4, 0x00000000); /* EN. */
+	return 0;
+}
+
+static const struct nv50_disp_func
+gv100_disp = {
+	.init = gv100_disp_init,
+	.fini = gv100_disp_fini,
+	.intr = gv100_disp_intr,
+	.uevent = &gv100_disp_chan_uevent,
+	.super = gv100_disp_super,
+	.root = &gv100_disp_root_oclass,
+	.wndw = { .cnt = gv100_disp_wndw_cnt },
+	.head = { .cnt = gv100_head_cnt, .new = gv100_head_new },
+	.sor = { .cnt = gv100_sor_cnt, .new = gv100_sor_new },
+	.ramht_size = 0x2000,
+};
+
+int
+gv100_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
+{
+	return nv50_disp_new_(&gv100_disp, device, index, pdisp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c
new file mode 100644
index 0000000..6e3c450
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "hdmi.h"
+
+void
+gv100_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
+		u8 rekey, u8 *avi, u8 avi_size, u8 *vendor, u8 vendor_size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	const u32 ctrl = 0x40000000 * enable |
+			 max_ac_packet << 16 |
+			 rekey;
+	const u32 hoff = head * 0x800;
+	const u32 hdmi = head * 0x400;
+	struct packed_hdmi_infoframe avi_infoframe;
+	struct packed_hdmi_infoframe vendor_infoframe;
+
+	pack_hdmi_infoframe(&avi_infoframe, avi, avi_size);
+	pack_hdmi_infoframe(&vendor_infoframe, vendor, vendor_size);
+
+	if (!(ctrl & 0x40000000)) {
+		nvkm_mask(device, 0x6165c0 + hoff, 0x40000000, 0x00000000);
+		nvkm_mask(device, 0x6f0100 + hdmi, 0x00000001, 0x00000000);
+		nvkm_mask(device, 0x6f00c0 + hdmi, 0x00000001, 0x00000000);
+		nvkm_mask(device, 0x6f0000 + hdmi, 0x00000001, 0x00000000);
+		return;
+	}
+
+	/* AVI InfoFrame (AVI). */
+	nvkm_mask(device, 0x6f0000 + hdmi, 0x00000001, 0x00000000);
+	if (avi_size) {
+		nvkm_wr32(device, 0x6f0008 + hdmi, avi_infoframe.header);
+		nvkm_wr32(device, 0x6f000c + hdmi, avi_infoframe.subpack0_low);
+		nvkm_wr32(device, 0x6f0010 + hdmi, avi_infoframe.subpack0_high);
+		nvkm_wr32(device, 0x6f0014 + hdmi, avi_infoframe.subpack1_low);
+		nvkm_wr32(device, 0x6f0018 + hdmi, avi_infoframe.subpack1_high);
+		nvkm_mask(device, 0x6f0000 + hdmi, 0x00000001, 0x00000001);
+	}
+
+	/* Vendor-specific InfoFrame (VSI). */
+	nvkm_mask(device, 0x6f0100 + hdmi, 0x00010001, 0x00000000);
+	if (vendor_size) {
+		nvkm_wr32(device, 0x6f0108 + hdmi, vendor_infoframe.header);
+		nvkm_wr32(device, 0x6f010c + hdmi, vendor_infoframe.subpack0_low);
+		nvkm_wr32(device, 0x6f0110 + hdmi, vendor_infoframe.subpack0_high);
+		nvkm_wr32(device, 0x6f0110 + hdmi, 0x00000000);
+		nvkm_wr32(device, 0x6f0114 + hdmi, 0x00000000);
+		nvkm_wr32(device, 0x6f0118 + hdmi, 0x00000000);
+		nvkm_wr32(device, 0x6f011c + hdmi, 0x00000000);
+		nvkm_wr32(device, 0x6f0120 + hdmi, 0x00000000);
+		nvkm_wr32(device, 0x6f0124 + hdmi, 0x00000000);
+		nvkm_mask(device, 0x6f0100 + hdmi, 0x00000001, 0x00000001);
+	}
+
+
+	/* General Control (GCP). */
+	nvkm_mask(device, 0x6f00c0 + hdmi, 0x00000001, 0x00000000);
+	nvkm_wr32(device, 0x6f00cc + hdmi, 0x00000010);
+	nvkm_mask(device, 0x6f00c0 + hdmi, 0x00000001, 0x00000001);
+
+	/* Audio Clock Regeneration (ACR). */
+	nvkm_wr32(device, 0x6f0080 + hdmi, 0x82000000);
+
+	/* NV_PDISP_SF_HDMI_CTRL. */
+	nvkm_mask(device, 0x6165c0 + hoff, 0x401f007f, ctrl);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h
index 57030b3..7d55faf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h
@@ -52,6 +52,14 @@
 #define HEAD_DBG(h,f,a...) HEAD_MSG((h), debug, f, ##a)
 
 int nv04_head_new(struct nvkm_disp *, int id);
+
+int nv50_head_cnt(struct nvkm_disp *, unsigned long *);
 int nv50_head_new(struct nvkm_disp *, int id);
+
+int gf119_head_cnt(struct nvkm_disp *, unsigned long *);
 int gf119_head_new(struct nvkm_disp *, int id);
+void gf119_head_rgclk(struct nvkm_head *, int);
+
+int gv100_head_cnt(struct nvkm_disp *, unsigned long *);
+int gv100_head_new(struct nvkm_disp *, int id);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgf119.c
index 9fd7ae3..e86298b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgf119.c
@@ -39,7 +39,7 @@
 	nvkm_mask(device, 0x6100c0 + hoff, 0x00000001, 0x00000001);
 }
 
-static void
+void
 gf119_head_rgclk(struct nvkm_head *head, int div)
 {
 	struct nvkm_device *device = head->disp->engine.subdev.device;
@@ -92,8 +92,13 @@
 int
 gf119_head_new(struct nvkm_disp *disp, int id)
 {
-	struct nvkm_device *device = disp->engine.subdev.device;
-	if (!(nvkm_rd32(device, 0x612004) & (0x00000001 << id)))
-		return 0;
 	return nvkm_head_new_(&gf119_head, disp, id);
 }
+
+int
+gf119_head_cnt(struct nvkm_disp *disp, unsigned long *pmask)
+{
+	struct nvkm_device *device = disp->engine.subdev.device;
+	*pmask = nvkm_rd32(device, 0x612004) & 0x0000000f;
+	return nvkm_rd32(device, 0x022448);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgv100.c
new file mode 100644
index 0000000..1a061b4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/headgv100.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "head.h"
+
+static void
+gv100_head_vblank_put(struct nvkm_head *head)
+{
+	struct nvkm_device *device = head->disp->engine.subdev.device;
+	nvkm_mask(device, 0x611d80 + (head->id * 4), 0x00000004, 0x00000000);
+}
+
+static void
+gv100_head_vblank_get(struct nvkm_head *head)
+{
+	struct nvkm_device *device = head->disp->engine.subdev.device;
+	nvkm_mask(device, 0x611d80 + (head->id * 4), 0x00000004, 0x00000004);
+}
+
+static void
+gv100_head_rgpos(struct nvkm_head *head, u16 *hline, u16 *vline)
+{
+	struct nvkm_device *device = head->disp->engine.subdev.device;
+	const u32 hoff = head->id * 0x800;
+	/* vline read locks hline. */
+	*vline = nvkm_rd32(device, 0x616330 + hoff) & 0x0000ffff;
+	*hline = nvkm_rd32(device, 0x616334 + hoff) & 0x0000ffff;
+}
+
+static void
+gv100_head_state(struct nvkm_head *head, struct nvkm_head_state *state)
+{
+	struct nvkm_device *device = head->disp->engine.subdev.device;
+	const u32 hoff = (state == &head->arm) * 0x8000 + head->id * 0x400;
+	u32 data;
+
+	data = nvkm_rd32(device, 0x682064 + hoff);
+	state->vtotal = (data & 0xffff0000) >> 16;
+	state->htotal = (data & 0x0000ffff);
+	data = nvkm_rd32(device, 0x682068 + hoff);
+	state->vsynce = (data & 0xffff0000) >> 16;
+	state->hsynce = (data & 0x0000ffff);
+	data = nvkm_rd32(device, 0x68206c + hoff);
+	state->vblanke = (data & 0xffff0000) >> 16;
+	state->hblanke = (data & 0x0000ffff);
+	data = nvkm_rd32(device, 0x682070 + hoff);
+	state->vblanks = (data & 0xffff0000) >> 16;
+	state->hblanks = (data & 0x0000ffff);
+	state->hz = nvkm_rd32(device, 0x68200c + hoff);
+
+	data = nvkm_rd32(device, 0x682004 + hoff);
+	switch ((data & 0x000000f0) >> 4) {
+	case 5: state->or.depth = 30; break;
+	case 4: state->or.depth = 24; break;
+	case 1: state->or.depth = 18; break;
+	default:
+		state->or.depth = 18;
+		WARN_ON(1);
+		break;
+	}
+}
+
+static const struct nvkm_head_func
+gv100_head = {
+	.state = gv100_head_state,
+	.rgpos = gv100_head_rgpos,
+	.rgclk = gf119_head_rgclk,
+	.vblank_get = gv100_head_vblank_get,
+	.vblank_put = gv100_head_vblank_put,
+};
+
+int
+gv100_head_new(struct nvkm_disp *disp, int id)
+{
+	struct nvkm_device *device = disp->engine.subdev.device;
+	if (!(nvkm_rd32(device, 0x610060) & (0x00000001 << id)))
+		return 0;
+	return nvkm_head_new_(&gv100_head, disp, id);
+}
+
+int
+gv100_head_cnt(struct nvkm_disp *disp, unsigned long *pmask)
+{
+	struct nvkm_device *device = disp->engine.subdev.device;
+	*pmask = nvkm_rd32(device, 0x610060) & 0x000000ff;
+	return nvkm_rd32(device, 0x610074) & 0x0000000f;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/headnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/headnv50.c
index c80d06d..e7d5c39 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/headnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/headnv50.c
@@ -90,3 +90,10 @@
 {
 	return nvkm_head_new_(&nv50_head, disp, id);
 }
+
+int
+nv50_head_cnt(struct nvkm_disp *disp, unsigned long *pmask)
+{
+	*pmask = 3;
+	return 2;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
index 4548c03..e0b4e0c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
@@ -30,7 +30,7 @@
 			UNKNOWN
 		} proto:3;
 		unsigned link:2;
-		unsigned head:4;
+		unsigned head:8;
 	} arm, asy;
 
 	/* Armed DP state. */
@@ -106,7 +106,6 @@
 	return nv50_ior_base(ior) + ((ior->asy.link == 2) * 0x80);
 }
 
-int nv50_sor_new_(const struct nvkm_ior_func *, struct nvkm_disp *, int id);
 void nv50_sor_state(struct nvkm_ior *, struct nvkm_ior_state *);
 void nv50_sor_power(struct nvkm_ior *, bool, bool, bool, bool, bool);
 void nv50_sor_clock(struct nvkm_ior *);
@@ -122,7 +121,6 @@
 
 void gt215_sor_dp_audio(struct nvkm_ior *, int, bool);
 
-int gf119_sor_new_(const struct nvkm_ior_func *, struct nvkm_disp *, int id);
 void gf119_sor_state(struct nvkm_ior *, struct nvkm_ior_state *);
 void gf119_sor_clock(struct nvkm_ior *);
 int gf119_sor_dp_links(struct nvkm_ior *, struct nvkm_i2c_aux *);
@@ -135,10 +133,15 @@
 
 void gm107_sor_dp_pattern(struct nvkm_ior *, int);
 
+void gm200_sor_route_set(struct nvkm_outp *, struct nvkm_ior *);
+int gm200_sor_route_get(struct nvkm_outp *, int *);
+void gm200_sor_dp_drive(struct nvkm_ior *, int, int, int, int, int);
+
 void g84_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
 void gt215_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
 void gf119_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
 void gk104_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
+void gv100_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
 
 void gt215_hda_hpd(struct nvkm_ior *, int, bool);
 void gt215_hda_eld(struct nvkm_ior *, u8 *, u8);
@@ -153,19 +156,34 @@
 #define IOR_WARN(i,f,a...) IOR_MSG((i), warn, f, ##a)
 #define IOR_DBG(i,f,a...) IOR_MSG((i), debug, f, ##a)
 
+int nv50_dac_cnt(struct nvkm_disp *, unsigned long *);
 int nv50_dac_new(struct nvkm_disp *, int);
+
+int gf119_dac_cnt(struct nvkm_disp *, unsigned long *);
 int gf119_dac_new(struct nvkm_disp *, int);
 
+int nv50_pior_cnt(struct nvkm_disp *, unsigned long *);
 int nv50_pior_new(struct nvkm_disp *, int);
 
+int nv50_sor_cnt(struct nvkm_disp *, unsigned long *);
 int nv50_sor_new(struct nvkm_disp *, int);
+
 int g84_sor_new(struct nvkm_disp *, int);
+
+int g94_sor_cnt(struct nvkm_disp *, unsigned long *);
 int g94_sor_new(struct nvkm_disp *, int);
+
 int mcp77_sor_new(struct nvkm_disp *, int);
 int gt215_sor_new(struct nvkm_disp *, int);
 int mcp89_sor_new(struct nvkm_disp *, int);
+
+int gf119_sor_cnt(struct nvkm_disp *, unsigned long *);
 int gf119_sor_new(struct nvkm_disp *, int);
+
 int gk104_sor_new(struct nvkm_disp *, int);
 int gm107_sor_new(struct nvkm_disp *, int);
 int gm200_sor_new(struct nvkm_disp *, int);
+
+int gv100_sor_cnt(struct nvkm_disp *, unsigned long *);
+int gv100_sor_new(struct nvkm_disp *, int);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c
index d7e0fbb..cfdce23 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c
@@ -26,18 +26,20 @@
 
 static const struct nv50_disp_func
 mcp77_disp = {
+	.init = nv50_disp_init,
+	.fini = nv50_disp_fini,
 	.intr = nv50_disp_intr,
 	.uevent = &nv50_disp_chan_uevent,
 	.super = nv50_disp_super,
 	.root = &g94_disp_root_oclass,
-	.head.new = nv50_head_new,
-	.dac = { .nr = 3, .new = nv50_dac_new },
-	.sor = { .nr = 4, .new = mcp77_sor_new },
-	.pior = { .nr = 3, .new = nv50_pior_new },
+	.head = { .cnt = nv50_head_cnt, .new = nv50_head_new },
+	.dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new },
+	.sor = { .cnt = g94_sor_cnt, .new = mcp77_sor_new },
+	.pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new },
 };
 
 int
 mcp77_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return nv50_disp_new_(&mcp77_disp, device, index, 2, pdisp);
+	return nv50_disp_new_(&mcp77_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c
index 7b75c57..85d9329 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c
@@ -26,18 +26,20 @@
 
 static const struct nv50_disp_func
 mcp89_disp = {
+	.init = nv50_disp_init,
+	.fini = nv50_disp_fini,
 	.intr = nv50_disp_intr,
 	.uevent = &nv50_disp_chan_uevent,
 	.super = nv50_disp_super,
 	.root = &gt215_disp_root_oclass,
-	.head.new = nv50_head_new,
-	.dac = { .nr = 3, .new = nv50_dac_new },
-	.sor = { .nr = 4, .new = mcp89_sor_new },
-	.pior = { .nr = 3, .new = nv50_pior_new },
+	.head = { .cnt = nv50_head_cnt, .new = nv50_head_new },
+	.dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new },
+	.sor = { .cnt = g94_sor_cnt, .new = mcp89_sor_new },
+	.pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new },
 };
 
 int
 mcp89_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return nv50_disp_new_(&mcp89_disp, device, index, 2, pdisp);
+	return nv50_disp_new_(&mcp89_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
index 0c570db..f89c7b9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -24,11 +24,12 @@
 #include "nv50.h"
 #include "head.h"
 #include "ior.h"
+#include "channv50.h"
 #include "rootnv50.h"
 
 #include <core/client.h>
 #include <core/enum.h>
-#include <core/gpuobj.h>
+#include <core/ramht.h>
 #include <subdev/bios.h>
 #include <subdev/bios/disp.h>
 #include <subdev/bios/init.h>
@@ -49,29 +50,115 @@
 	disp->func->intr(disp);
 }
 
+static void
+nv50_disp_fini_(struct nvkm_disp *base)
+{
+	struct nv50_disp *disp = nv50_disp(base);
+	disp->func->fini(disp);
+}
+
+static int
+nv50_disp_init_(struct nvkm_disp *base)
+{
+	struct nv50_disp *disp = nv50_disp(base);
+	return disp->func->init(disp);
+}
+
 static void *
 nv50_disp_dtor_(struct nvkm_disp *base)
 {
 	struct nv50_disp *disp = nv50_disp(base);
+
+	nvkm_ramht_del(&disp->ramht);
+	nvkm_gpuobj_del(&disp->inst);
+
 	nvkm_event_fini(&disp->uevent);
 	if (disp->wq)
 		destroy_workqueue(disp->wq);
+
 	return disp;
 }
 
+static int
+nv50_disp_oneinit_(struct nvkm_disp *base)
+{
+	struct nv50_disp *disp = nv50_disp(base);
+	const struct nv50_disp_func *func = disp->func;
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	int ret, i;
+
+	if (func->wndw.cnt) {
+		disp->wndw.nr = func->wndw.cnt(&disp->base, &disp->wndw.mask);
+		nvkm_debug(subdev, "Window(s): %d (%08lx)\n",
+			   disp->wndw.nr, disp->wndw.mask);
+	}
+
+	disp->head.nr = func->head.cnt(&disp->base, &disp->head.mask);
+	nvkm_debug(subdev, "  Head(s): %d (%02lx)\n",
+		   disp->head.nr, disp->head.mask);
+	for_each_set_bit(i, &disp->head.mask, disp->head.nr) {
+		ret = func->head.new(&disp->base, i);
+		if (ret)
+			return ret;
+	}
+
+	if (func->dac.cnt) {
+		disp->dac.nr = func->dac.cnt(&disp->base, &disp->dac.mask);
+		nvkm_debug(subdev, "   DAC(s): %d (%02lx)\n",
+			   disp->dac.nr, disp->dac.mask);
+		for_each_set_bit(i, &disp->dac.mask, disp->dac.nr) {
+			ret = func->dac.new(&disp->base, i);
+			if (ret)
+				return ret;
+		}
+	}
+
+	if (func->pior.cnt) {
+		disp->pior.nr = func->pior.cnt(&disp->base, &disp->pior.mask);
+		nvkm_debug(subdev, "  PIOR(s): %d (%02lx)\n",
+			   disp->pior.nr, disp->pior.mask);
+		for_each_set_bit(i, &disp->pior.mask, disp->pior.nr) {
+			ret = func->pior.new(&disp->base, i);
+			if (ret)
+				return ret;
+		}
+	}
+
+	disp->sor.nr = func->sor.cnt(&disp->base, &disp->sor.mask);
+	nvkm_debug(subdev, "   SOR(s): %d (%02lx)\n",
+		   disp->sor.nr, disp->sor.mask);
+	for_each_set_bit(i, &disp->sor.mask, disp->sor.nr) {
+		ret = func->sor.new(&disp->base, i);
+		if (ret)
+			return ret;
+	}
+
+	ret = nvkm_gpuobj_new(device, 0x10000, 0x10000, false, NULL,
+			      &disp->inst);
+	if (ret)
+		return ret;
+
+	return nvkm_ramht_new(device, func->ramht_size ? func->ramht_size :
+			      0x1000, 0, disp->inst, &disp->ramht);
+}
+
 static const struct nvkm_disp_func
 nv50_disp_ = {
 	.dtor = nv50_disp_dtor_,
+	.oneinit = nv50_disp_oneinit_,
+	.init = nv50_disp_init_,
+	.fini = nv50_disp_fini_,
 	.intr = nv50_disp_intr_,
 	.root = nv50_disp_root_,
 };
 
 int
 nv50_disp_new_(const struct nv50_disp_func *func, struct nvkm_device *device,
-	       int index, int heads, struct nvkm_disp **pdisp)
+	       int index, struct nvkm_disp **pdisp)
 {
 	struct nv50_disp *disp;
-	int ret, i;
+	int ret;
 
 	if (!(disp = kzalloc(sizeof(*disp), GFP_KERNEL)))
 		return -ENOMEM;
@@ -85,33 +172,11 @@
 	disp->wq = create_singlethread_workqueue("nvkm-disp");
 	if (!disp->wq)
 		return -ENOMEM;
+
 	INIT_WORK(&disp->supervisor, func->super);
 
-	for (i = 0; func->head.new && i < heads; i++) {
-		ret = func->head.new(&disp->base, i);
-		if (ret)
-			return ret;
-	}
-
-	for (i = 0; func->dac.new && i < func->dac.nr; i++) {
-		ret = func->dac.new(&disp->base, i);
-		if (ret)
-			return ret;
-	}
-
-	for (i = 0; func->pior.new && i < func->pior.nr; i++) {
-		ret = func->pior.new(&disp->base, i);
-		if (ret)
-			return ret;
-	}
-
-	for (i = 0; func->sor.new && i < func->sor.nr; i++) {
-		ret = func->sor.new(&disp->base, i);
-		if (ret)
-			return ret;
-	}
-
-	return nvkm_event_init(func->uevent, 1, 1 + (heads * 4), &disp->uevent);
+	return nvkm_event_init(func->uevent, 1, ARRAY_SIZE(disp->chan),
+			       &disp->uevent);
 }
 
 static u32
@@ -613,20 +678,96 @@
 	}
 }
 
+void
+nv50_disp_fini(struct nv50_disp *disp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	/* disable all interrupts */
+	nvkm_wr32(device, 0x610024, 0x00000000);
+	nvkm_wr32(device, 0x610020, 0x00000000);
+}
+
+int
+nv50_disp_init(struct nv50_disp *disp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	struct nvkm_head *head;
+	u32 tmp;
+	int i;
+
+	/* The below segments of code copying values from one register to
+	 * another appear to inform EVO of the display capabilities or
+	 * something similar.  NFI what the 0x614004 caps are for..
+	 */
+	tmp = nvkm_rd32(device, 0x614004);
+	nvkm_wr32(device, 0x610184, tmp);
+
+	/* ... CRTC caps */
+	list_for_each_entry(head, &disp->base.head, head) {
+		tmp = nvkm_rd32(device, 0x616100 + (head->id * 0x800));
+		nvkm_wr32(device, 0x610190 + (head->id * 0x10), tmp);
+		tmp = nvkm_rd32(device, 0x616104 + (head->id * 0x800));
+		nvkm_wr32(device, 0x610194 + (head->id * 0x10), tmp);
+		tmp = nvkm_rd32(device, 0x616108 + (head->id * 0x800));
+		nvkm_wr32(device, 0x610198 + (head->id * 0x10), tmp);
+		tmp = nvkm_rd32(device, 0x61610c + (head->id * 0x800));
+		nvkm_wr32(device, 0x61019c + (head->id * 0x10), tmp);
+	}
+
+	/* ... DAC caps */
+	for (i = 0; i < disp->dac.nr; i++) {
+		tmp = nvkm_rd32(device, 0x61a000 + (i * 0x800));
+		nvkm_wr32(device, 0x6101d0 + (i * 0x04), tmp);
+	}
+
+	/* ... SOR caps */
+	for (i = 0; i < disp->sor.nr; i++) {
+		tmp = nvkm_rd32(device, 0x61c000 + (i * 0x800));
+		nvkm_wr32(device, 0x6101e0 + (i * 0x04), tmp);
+	}
+
+	/* ... PIOR caps */
+	for (i = 0; i < disp->pior.nr; i++) {
+		tmp = nvkm_rd32(device, 0x61e000 + (i * 0x800));
+		nvkm_wr32(device, 0x6101f0 + (i * 0x04), tmp);
+	}
+
+	/* steal display away from vbios, or something like that */
+	if (nvkm_rd32(device, 0x610024) & 0x00000100) {
+		nvkm_wr32(device, 0x610024, 0x00000100);
+		nvkm_mask(device, 0x6194e8, 0x00000001, 0x00000000);
+		if (nvkm_msec(device, 2000,
+			if (!(nvkm_rd32(device, 0x6194e8) & 0x00000002))
+				break;
+		) < 0)
+			return -EBUSY;
+	}
+
+	/* point at display engine memory area (hash table, objects) */
+	nvkm_wr32(device, 0x610010, (disp->inst->addr >> 8) | 9);
+
+	/* enable supervisor interrupts, disable everything else */
+	nvkm_wr32(device, 0x61002c, 0x00000370);
+	nvkm_wr32(device, 0x610028, 0x00000000);
+	return 0;
+}
+
 static const struct nv50_disp_func
 nv50_disp = {
+	.init = nv50_disp_init,
+	.fini = nv50_disp_fini,
 	.intr = nv50_disp_intr,
 	.uevent = &nv50_disp_chan_uevent,
 	.super = nv50_disp_super,
 	.root = &nv50_disp_root_oclass,
-	.head.new = nv50_head_new,
-	.dac = { .nr = 3, .new = nv50_dac_new },
-	.sor = { .nr = 2, .new = nv50_sor_new },
-	.pior = { .nr = 3, .new = nv50_pior_new },
+	.head = { .cnt = nv50_head_cnt, .new = nv50_head_new },
+	.dac = { .cnt = nv50_dac_cnt, .new = nv50_dac_new },
+	.sor = { .cnt = nv50_sor_cnt, .new = nv50_sor_new },
+	.pior = { .cnt = nv50_pior_cnt, .new = nv50_pior_new },
 };
 
 int
 nv50_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return nv50_disp_new_(&nv50_disp, device, index, 2, pdisp);
+	return nv50_disp_new_(&nv50_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
index eb0b8ac..8580382 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
@@ -16,14 +16,26 @@
 	struct nvkm_event uevent;
 
 	struct {
+		unsigned long mask;
+		int nr;
+	} wndw, head, dac;
+
+	struct {
+		unsigned long mask;
+		int nr;
 		u32 lvdsconf;
 	} sor;
 
 	struct {
+		unsigned long mask;
+		int nr;
 		u8 type[3];
 	} pior;
 
-	struct nv50_disp_chan *chan[21];
+	struct nvkm_gpuobj *inst;
+	struct nvkm_ramht *ramht;
+
+	struct nv50_disp_chan *chan[81];
 };
 
 void nv50_disp_super_1(struct nv50_disp *);
@@ -34,11 +46,11 @@
 void nv50_disp_super_3_0(struct nv50_disp *, struct nvkm_head *);
 
 int nv50_disp_new_(const struct nv50_disp_func *, struct nvkm_device *,
-		   int index, int heads, struct nvkm_disp **);
-int gf119_disp_new_(const struct nv50_disp_func *, struct nvkm_device *,
-		    int index, struct nvkm_disp **);
+		   int index, struct nvkm_disp **);
 
 struct nv50_disp_func {
+	int (*init)(struct nv50_disp *);
+	void (*fini)(struct nv50_disp *);
 	void (*intr)(struct nv50_disp *);
 	void (*intr_error)(struct nv50_disp *, int chid);
 
@@ -48,28 +60,20 @@
 	const struct nvkm_disp_oclass *root;
 
 	struct {
+		int (*cnt)(struct nvkm_disp *, unsigned long *mask);
 		int (*new)(struct nvkm_disp *, int id);
-	} head;
+	} wndw, head, dac, sor, pior;
 
-	struct {
-		int nr;
-		int (*new)(struct nvkm_disp *, int id);
-	} dac;
-
-	struct {
-		int nr;
-		int (*new)(struct nvkm_disp *, int id);
-	} sor;
-
-	struct {
-		int nr;
-		int (*new)(struct nvkm_disp *, int id);
-	} pior;
+	u16 ramht_size;
 };
 
+int nv50_disp_init(struct nv50_disp *);
+void nv50_disp_fini(struct nv50_disp *);
 void nv50_disp_intr(struct nv50_disp *);
 void nv50_disp_super(struct work_struct *);
 
+int gf119_disp_init(struct nv50_disp *);
+void gf119_disp_fini(struct nv50_disp *);
 void gf119_disp_intr(struct nv50_disp *);
 void gf119_disp_super(struct work_struct *);
 void gf119_disp_intr_error(struct nv50_disp *, int);
@@ -77,4 +81,12 @@
 void nv50_disp_dptmds_war_2(struct nv50_disp *, struct dcb_output *);
 void nv50_disp_dptmds_war_3(struct nv50_disp *, struct dcb_output *);
 void nv50_disp_update_sppll1(struct nv50_disp *);
+
+extern const struct nvkm_event_func nv50_disp_chan_uevent;
+int  nv50_disp_chan_uevent_ctor(struct nvkm_object *, void *, u32,
+				struct nvkm_notify *);
+void nv50_disp_chan_uevent_send(struct nv50_disp *, int);
+
+extern const struct nvkm_event_func gf119_disp_chan_uevent;
+extern const struct nvkm_event_func gv100_disp_chan_uevent;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c
deleted file mode 100644
index 5ad5d0f..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_pioc_oclass
-g84_disp_oimm_oclass = {
-	.base.oclass = G82_DISP_OVERLAY,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_oimm_new,
-	.func = &nv50_disp_pioc_func,
-	.chid = { 5, 5 },
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c
index 1f9fd34..1ae0bcf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c
@@ -22,16 +22,11 @@
  * Authors: Ben Skeggs
  */
 #include "channv50.h"
-#include "rootnv50.h"
 
-#include <nvif/class.h>
-
-const struct nv50_disp_pioc_oclass
-gf119_disp_oimm_oclass = {
-	.base.oclass = GF110_DISP_OVERLAY,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_oimm_new,
-	.func = &gf119_disp_pioc_func,
-	.chid = { 9, 9 },
-};
+int
+gf119_disp_oimm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_oimm_new_(&gf119_disp_pioc_func, disp, 9, 9,
+				   oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c
deleted file mode 100644
index 0c09fe8..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_pioc_oclass
-gk104_disp_oimm_oclass = {
-	.base.oclass = GK104_DISP_OVERLAY,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_oimm_new,
-	.func = &gf119_disp_pioc_func,
-	.chid = { 9, 9 },
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c
index abf8236..30ffb10 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c
@@ -22,16 +22,11 @@
  * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
 #include "channv50.h"
-#include "rootnv50.h"
 
-#include <nvif/class.h>
-
-const struct nv50_disp_pioc_oclass
-gp102_disp_oimm_oclass = {
-	.base.oclass = GK104_DISP_OVERLAY,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_oimm_new,
-	.func = &gf119_disp_pioc_func,
-	.chid = { 9, 13 },
-};
+int
+gp102_disp_oimm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_oimm_new_(&gf119_disp_pioc_func, disp, 9, 13,
+				   oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c
deleted file mode 100644
index 1281db2..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_pioc_oclass
-gt215_disp_oimm_oclass = {
-	.base.oclass = GT214_DISP_OVERLAY,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_oimm_new,
-	.func = &nv50_disp_pioc_func,
-	.chid = { 5, 5 },
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c
index f3b0fa2..0db99bf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c
@@ -23,30 +23,26 @@
  */
 #include "channv50.h"
 #include "head.h"
-#include "rootnv50.h"
 
 #include <core/client.h>
 
-#include <nvif/class.h>
 #include <nvif/cl507b.h>
 #include <nvif/unpack.h>
 
 int
-nv50_disp_oimm_new(const struct nv50_disp_chan_func *func,
-		   const struct nv50_disp_chan_mthd *mthd,
-		   struct nv50_disp_root *root, int ctrl, int user,
-		   const struct nvkm_oclass *oclass, void *data, u32 size,
-		   struct nvkm_object **pobject)
+nv50_disp_oimm_new_(const struct nv50_disp_chan_func *func,
+		    struct nv50_disp *disp, int ctrl, int user,
+		    const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nvkm_object **pobject)
 {
 	union {
 		struct nv50_disp_overlay_v0 v0;
-	} *args = data;
+	} *args = argv;
 	struct nvkm_object *parent = oclass->parent;
-	struct nv50_disp *disp = root->disp;
 	int head, ret = -ENOSYS;
 
-	nvif_ioctl(parent, "create disp overlay size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
+	nvif_ioctl(parent, "create disp overlay size %d\n", argc);
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
 		nvif_ioctl(parent, "create disp overlay vers %d head %d\n",
 			   args->v0.version, args->v0.head);
 		if (!nvkm_head_find(&disp->base, args->v0.head))
@@ -55,16 +51,14 @@
 	} else
 		return ret;
 
-	return nv50_disp_chan_new_(func, mthd, root, ctrl + head, user + head,
+	return nv50_disp_chan_new_(func, NULL, disp, ctrl + head, user + head,
 				   head, oclass, pobject);
 }
 
-const struct nv50_disp_pioc_oclass
-nv50_disp_oimm_oclass = {
-	.base.oclass = NV50_DISP_OVERLAY,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_oimm_new,
-	.func = &nv50_disp_pioc_func,
-	.chid = { 5, 5 },
-};
+int
+nv50_disp_oimm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		   struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_oimm_new_(&nv50_disp_pioc_func, disp, 5, 5,
+				   oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlyg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlyg84.c
index db6234e..31b915d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlyg84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlyg84.c
@@ -21,10 +21,7 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
+#include "channv50.h"
 
 static const struct nv50_disp_mthd_list
 g84_disp_ovly_mthd_base = {
@@ -54,8 +51,8 @@
 	}
 };
 
-const struct nv50_disp_chan_mthd
-g84_disp_ovly_chan_mthd = {
+static const struct nv50_disp_chan_mthd
+g84_disp_ovly_mthd = {
 	.name = "Overlay",
 	.addr = 0x000540,
 	.prev = 0x000004,
@@ -65,13 +62,10 @@
 	}
 };
 
-const struct nv50_disp_dmac_oclass
-g84_disp_ovly_oclass = {
-	.base.oclass = G82_DISP_OVERLAY_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_ovly_new,
-	.func = &nv50_disp_dmac_func,
-	.mthd = &g84_disp_ovly_chan_mthd,
-	.chid = 3,
-};
+int
+g84_disp_ovly_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		  struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_ovly_new_(&nv50_disp_dmac_func, &g84_disp_ovly_mthd,
+				   disp, 3, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygf119.c
index 5985879..83fd534 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygf119.c
@@ -21,10 +21,7 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
+#include "channv50.h"
 
 static const struct nv50_disp_mthd_list
 gf119_disp_ovly_mthd_base = {
@@ -79,7 +76,7 @@
 };
 
 static const struct nv50_disp_chan_mthd
-gf119_disp_ovly_chan_mthd = {
+gf119_disp_ovly_mthd = {
 	.name = "Overlay",
 	.addr = 0x001000,
 	.prev = -0x020000,
@@ -89,13 +86,10 @@
 	}
 };
 
-const struct nv50_disp_dmac_oclass
-gf119_disp_ovly_oclass = {
-	.base.oclass = GF110_DISP_OVERLAY_CONTROL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_ovly_new,
-	.func = &gf119_disp_dmac_func,
-	.mthd = &gf119_disp_ovly_chan_mthd,
-	.chid = 5,
-};
+int
+gf119_disp_ovly_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_ovly_new_(&gf119_disp_dmac_func, &gf119_disp_ovly_mthd,
+				   disp, 5, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c
index 2f0220b..a7acacb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c
@@ -21,10 +21,7 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
+#include "channv50.h"
 
 static const struct nv50_disp_mthd_list
 gk104_disp_ovly_mthd_base = {
@@ -81,7 +78,7 @@
 };
 
 const struct nv50_disp_chan_mthd
-gk104_disp_ovly_chan_mthd = {
+gk104_disp_ovly_mthd = {
 	.name = "Overlay",
 	.addr = 0x001000,
 	.prev = -0x020000,
@@ -91,13 +88,10 @@
 	}
 };
 
-const struct nv50_disp_dmac_oclass
-gk104_disp_ovly_oclass = {
-	.base.oclass = GK104_DISP_OVERLAY_CONTROL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_ovly_new,
-	.func = &gf119_disp_dmac_func,
-	.mthd = &gk104_disp_ovly_chan_mthd,
-	.chid = 5,
-};
+int
+gk104_disp_ovly_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_ovly_new_(&gf119_disp_dmac_func, &gk104_disp_ovly_mthd,
+				   disp, 5, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp102.c
index 589bd2f..e0eca6e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp102.c
@@ -21,18 +21,12 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
+#include "channv50.h"
 
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gp102_disp_ovly_oclass = {
-	.base.oclass = GK104_DISP_OVERLAY_CONTROL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_ovly_new,
-	.func = &gp102_disp_dmac_func,
-	.mthd = &gk104_disp_ovly_chan_mthd,
-	.chid = 5,
-};
+int
+gp102_disp_ovly_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_ovly_new_(&gp102_disp_dmac_func, &gk104_disp_ovly_mthd,
+				   disp, 5, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt200.c
index f858053..dc60cd0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt200.c
@@ -21,10 +21,7 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
+#include "channv50.h"
 
 static const struct nv50_disp_mthd_list
 gt200_disp_ovly_mthd_base = {
@@ -58,7 +55,7 @@
 };
 
 static const struct nv50_disp_chan_mthd
-gt200_disp_ovly_chan_mthd = {
+gt200_disp_ovly_mthd = {
 	.name = "Overlay",
 	.addr = 0x000540,
 	.prev = 0x000004,
@@ -68,13 +65,10 @@
 	}
 };
 
-const struct nv50_disp_dmac_oclass
-gt200_disp_ovly_oclass = {
-	.base.oclass = GT200_DISP_OVERLAY_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_ovly_new,
-	.func = &nv50_disp_dmac_func,
-	.mthd = &gt200_disp_ovly_chan_mthd,
-	.chid = 3,
-};
+int
+gt200_disp_ovly_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_ovly_new_(&nv50_disp_dmac_func, &gt200_disp_ovly_mthd,
+				   disp, 3, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt215.c
deleted file mode 100644
index c947e1e..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygt215.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include "dmacnv50.h"
-#include "rootnv50.h"
-
-#include <nvif/class.h>
-
-const struct nv50_disp_dmac_oclass
-gt215_disp_ovly_oclass = {
-	.base.oclass = GT214_DISP_OVERLAY_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_ovly_new,
-	.func = &nv50_disp_dmac_func,
-	.mthd = &g84_disp_ovly_chan_mthd,
-	.chid = 3,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlynv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlynv50.c
index 9ebaaa6..6974c12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlynv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlynv50.c
@@ -21,33 +21,30 @@
  *
  * Authors: Ben Skeggs
  */
-#include "dmacnv50.h"
+#include "channv50.h"
 #include "head.h"
-#include "rootnv50.h"
 
 #include <core/client.h>
 
-#include <nvif/class.h>
 #include <nvif/cl507e.h>
 #include <nvif/unpack.h>
 
 int
-nv50_disp_ovly_new(const struct nv50_disp_dmac_func *func,
-		   const struct nv50_disp_chan_mthd *mthd,
-		   struct nv50_disp_root *root, int chid,
-		   const struct nvkm_oclass *oclass, void *data, u32 size,
-		   struct nvkm_object **pobject)
+nv50_disp_ovly_new_(const struct nv50_disp_chan_func *func,
+		    const struct nv50_disp_chan_mthd *mthd,
+		    struct nv50_disp *disp, int chid,
+		    const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nvkm_object **pobject)
 {
 	union {
 		struct nv50_disp_overlay_channel_dma_v0 v0;
-	} *args = data;
+	} *args = argv;
 	struct nvkm_object *parent = oclass->parent;
-	struct nv50_disp *disp = root->disp;
 	int head, ret = -ENOSYS;
 	u64 push;
 
-	nvif_ioctl(parent, "create disp overlay channel dma size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
+	nvif_ioctl(parent, "create disp overlay channel dma size %d\n", argc);
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
 		nvif_ioctl(parent, "create disp overlay channel dma vers %d "
 				   "pushbuf %016llx head %d\n",
 			   args->v0.version, args->v0.pushbuf, args->v0.head);
@@ -58,7 +55,7 @@
 	} else
 		return ret;
 
-	return nv50_disp_dmac_new_(func, mthd, root, chid + head,
+	return nv50_disp_dmac_new_(func, mthd, disp, chid + head,
 				   head, push, oclass, pobject);
 }
 
@@ -91,7 +88,7 @@
 };
 
 static const struct nv50_disp_chan_mthd
-nv50_disp_ovly_chan_mthd = {
+nv50_disp_ovly_mthd = {
 	.name = "Overlay",
 	.addr = 0x000540,
 	.prev = 0x000004,
@@ -101,13 +98,10 @@
 	}
 };
 
-const struct nv50_disp_dmac_oclass
-nv50_disp_ovly_oclass = {
-	.base.oclass = NV50_DISP_OVERLAY_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_disp_ovly_new,
-	.func = &nv50_disp_dmac_func,
-	.mthd = &nv50_disp_ovly_chan_mthd,
-	.chid = 3,
-};
+int
+nv50_disp_ovly_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		   struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return nv50_disp_ovly_new_(&nv50_disp_dmac_func, &nv50_disp_ovly_mthd,
+				   disp, 3, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c
index 0abaa64..5296e7b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c
@@ -29,7 +29,7 @@
 static void
 gf119_disp_pioc_fini(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->root->disp;
+	struct nv50_disp *disp = chan->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	int ctrl = chan->chid.ctrl;
@@ -43,24 +43,17 @@
 		nvkm_error(subdev, "ch %d fini: %08x\n", user,
 			   nvkm_rd32(device, 0x610490 + (ctrl * 0x10)));
 	}
-
-	/* disable error reporting and completion notification */
-	nvkm_mask(device, 0x610090, 0x00000001 << user, 0x00000000);
-	nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000000);
 }
 
 static int
 gf119_disp_pioc_init(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->root->disp;
+	struct nv50_disp *disp = chan->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	int ctrl = chan->chid.ctrl;
 	int user = chan->chid.user;
 
-	/* enable error reporting */
-	nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000001 << user);
-
 	/* activate channel */
 	nvkm_wr32(device, 0x610490 + (ctrl * 0x10), 0x00000001);
 	if (nvkm_msec(device, 2000,
@@ -80,4 +73,6 @@
 gf119_disp_pioc_func = {
 	.init = gf119_disp_pioc_init,
 	.fini = gf119_disp_pioc_fini,
+	.intr = gf119_disp_chan_intr,
+	.user = nv50_disp_chan_user,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c
index 0211e0e..4faed6f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c
@@ -29,7 +29,7 @@
 static void
 nv50_disp_pioc_fini(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->root->disp;
+	struct nv50_disp *disp = chan->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	int ctrl = chan->chid.ctrl;
@@ -48,7 +48,7 @@
 static int
 nv50_disp_pioc_init(struct nv50_disp_chan *chan)
 {
-	struct nv50_disp *disp = chan->root->disp;
+	struct nv50_disp *disp = chan->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	int ctrl = chan->chid.ctrl;
@@ -82,4 +82,6 @@
 nv50_disp_pioc_func = {
 	.init = nv50_disp_pioc_init,
 	.fini = nv50_disp_pioc_fini,
+	.intr = nv50_disp_chan_intr,
+	.user = nv50_disp_chan_user,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piornv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piornv50.c
index 99b3b90..e997a20 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piornv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piornv50.c
@@ -127,8 +127,13 @@
 int
 nv50_pior_new(struct nvkm_disp *disp, int id)
 {
-	struct nvkm_device *device = disp->engine.subdev.device;
-	if (!(nvkm_rd32(device, 0x610184) & (0x10000000 << id)))
-		return 0;
 	return nvkm_ior_new_(&nv50_pior, disp, PIOR, id);
 }
+
+int
+nv50_pior_cnt(struct nvkm_disp *disp, unsigned long *pmask)
+{
+	struct nvkm_device *device = disp->engine.subdev.device;
+	*pmask = (nvkm_rd32(device, 0x610184) & 0x70000000) >> 28;
+	return 3;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h
index 6c9bfff..ef66c5f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h
@@ -12,6 +12,9 @@
 
 struct nvkm_disp_func {
 	void *(*dtor)(struct nvkm_disp *);
+	int (*oneinit)(struct nvkm_disp *);
+	int (*init)(struct nvkm_disp *);
+	void (*fini)(struct nvkm_disp *);
 	void (*intr)(struct nvkm_disp *);
 
 	const struct nvkm_disp_oclass *(*root)(struct nvkm_disp *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg84.c
index 721e4f7..1ed371f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg84.c
@@ -22,22 +22,19 @@
  * Authors: Ben Skeggs
  */
 #include "rootnv50.h"
-#include "dmacnv50.h"
+#include "channv50.h"
 
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
 g84_disp_root = {
-	.init = nv50_disp_root_init,
-	.fini = nv50_disp_root_fini,
-	.dmac = {
-		&g84_disp_core_oclass,
-		&g84_disp_base_oclass,
-		&g84_disp_ovly_oclass,
-	},
-	.pioc = {
-		&g84_disp_oimm_oclass,
-		&g84_disp_curs_oclass,
+	.user = {
+		{{0,0,G82_DISP_CURSOR             }, nv50_disp_curs_new },
+		{{0,0,G82_DISP_OVERLAY            }, nv50_disp_oimm_new },
+		{{0,0,G82_DISP_BASE_CHANNEL_DMA   },  g84_disp_base_new },
+		{{0,0,G82_DISP_CORE_CHANNEL_DMA   },  g84_disp_core_new },
+		{{0,0,G82_DISP_OVERLAY_CHANNEL_DMA},  g84_disp_ovly_new },
+		{}
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg94.c
index 9493f6e..ef579eb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootg94.c
@@ -22,22 +22,19 @@
  * Authors: Ben Skeggs
  */
 #include "rootnv50.h"
-#include "dmacnv50.h"
+#include "channv50.h"
 
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
 g94_disp_root = {
-	.init = nv50_disp_root_init,
-	.fini = nv50_disp_root_fini,
-	.dmac = {
-		&g94_disp_core_oclass,
-		&gt200_disp_base_oclass,
-		&gt200_disp_ovly_oclass,
-	},
-	.pioc = {
-		&g84_disp_oimm_oclass,
-		&g84_disp_curs_oclass,
+	.user = {
+		{{0,0,  G82_DISP_CURSOR             },  nv50_disp_curs_new },
+		{{0,0,  G82_DISP_OVERLAY            },  nv50_disp_oimm_new },
+		{{0,0,GT200_DISP_BASE_CHANNEL_DMA   },   g84_disp_base_new },
+		{{0,0,GT206_DISP_CORE_CHANNEL_DMA   },   g94_disp_core_new },
+		{{0,0,GT200_DISP_OVERLAY_CHANNEL_DMA}, gt200_disp_ovly_new },
+		{}
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgf119.c
index 333c842..fe01116 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgf119.c
@@ -22,104 +22,19 @@
  * Authors: Ben Skeggs
  */
 #include "rootnv50.h"
-#include "head.h"
-#include "dmacnv50.h"
-
-#include <core/ramht.h>
-#include <subdev/timer.h>
+#include "channv50.h"
 
 #include <nvif/class.h>
 
-void
-gf119_disp_root_fini(struct nv50_disp_root *root)
-{
-	struct nvkm_device *device = root->disp->base.engine.subdev.device;
-	/* disable all interrupts */
-	nvkm_wr32(device, 0x6100b0, 0x00000000);
-}
-
-int
-gf119_disp_root_init(struct nv50_disp_root *root)
-{
-	struct nv50_disp *disp = root->disp;
-	struct nvkm_head *head;
-	struct nvkm_device *device = disp->base.engine.subdev.device;
-	u32 tmp;
-	int i;
-
-	/* The below segments of code copying values from one register to
-	 * another appear to inform EVO of the display capabilities or
-	 * something similar.
-	 */
-
-	/* ... CRTC caps */
-	list_for_each_entry(head, &disp->base.head, head) {
-		const u32 hoff = head->id * 0x800;
-		tmp = nvkm_rd32(device, 0x616104 + hoff);
-		nvkm_wr32(device, 0x6101b4 + hoff, tmp);
-		tmp = nvkm_rd32(device, 0x616108 + hoff);
-		nvkm_wr32(device, 0x6101b8 + hoff, tmp);
-		tmp = nvkm_rd32(device, 0x61610c + hoff);
-		nvkm_wr32(device, 0x6101bc + hoff, tmp);
-	}
-
-	/* ... DAC caps */
-	for (i = 0; i < disp->func->dac.nr; i++) {
-		tmp = nvkm_rd32(device, 0x61a000 + (i * 0x800));
-		nvkm_wr32(device, 0x6101c0 + (i * 0x800), tmp);
-	}
-
-	/* ... SOR caps */
-	for (i = 0; i < disp->func->sor.nr; i++) {
-		tmp = nvkm_rd32(device, 0x61c000 + (i * 0x800));
-		nvkm_wr32(device, 0x6301c4 + (i * 0x800), tmp);
-	}
-
-	/* steal display away from vbios, or something like that */
-	if (nvkm_rd32(device, 0x6100ac) & 0x00000100) {
-		nvkm_wr32(device, 0x6100ac, 0x00000100);
-		nvkm_mask(device, 0x6194e8, 0x00000001, 0x00000000);
-		if (nvkm_msec(device, 2000,
-			if (!(nvkm_rd32(device, 0x6194e8) & 0x00000002))
-				break;
-		) < 0)
-			return -EBUSY;
-	}
-
-	/* point at display engine memory area (hash table, objects) */
-	nvkm_wr32(device, 0x610010, (root->instmem->addr >> 8) | 9);
-
-	/* enable supervisor interrupts, disable everything else */
-	nvkm_wr32(device, 0x610090, 0x00000000);
-	nvkm_wr32(device, 0x6100a0, 0x00000000);
-	nvkm_wr32(device, 0x6100b0, 0x00000307);
-
-	/* disable underflow reporting, preventing an intermittent issue
-	 * on some gk104 boards where the production vbios left this
-	 * setting enabled by default.
-	 *
-	 * ftp://download.nvidia.com/open-gpu-doc/gk104-disable-underflow-reporting/1/gk104-disable-underflow-reporting.txt
-	 */
-	list_for_each_entry(head, &disp->base.head, head) {
-		const u32 hoff = head->id * 0x800;
-		nvkm_mask(device, 0x616308 + hoff, 0x00000111, 0x00000010);
-	}
-
-	return 0;
-}
-
 static const struct nv50_disp_root_func
 gf119_disp_root = {
-	.init = gf119_disp_root_init,
-	.fini = gf119_disp_root_fini,
-	.dmac = {
-		&gf119_disp_core_oclass,
-		&gf119_disp_base_oclass,
-		&gf119_disp_ovly_oclass,
-	},
-	.pioc = {
-		&gf119_disp_oimm_oclass,
-		&gf119_disp_curs_oclass,
+	.user = {
+		{{0,0,GF110_DISP_CURSOR             }, gf119_disp_curs_new },
+		{{0,0,GF110_DISP_OVERLAY            }, gf119_disp_oimm_new },
+		{{0,0,GF110_DISP_BASE_CHANNEL_DMA   }, gf119_disp_base_new },
+		{{0,0,GF110_DISP_CORE_CHANNEL_DMA   }, gf119_disp_core_new },
+		{{0,0,GF110_DISP_OVERLAY_CONTROL_DMA}, gf119_disp_ovly_new },
+		{}
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk104.c
index 0bfdb1d..9e8ffd3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk104.c
@@ -22,22 +22,19 @@
  * Authors: Ben Skeggs
  */
 #include "rootnv50.h"
-#include "dmacnv50.h"
+#include "channv50.h"
 
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
 gk104_disp_root = {
-	.init = gf119_disp_root_init,
-	.fini = gf119_disp_root_fini,
-	.dmac = {
-		&gk104_disp_core_oclass,
-		&gk104_disp_base_oclass,
-		&gk104_disp_ovly_oclass,
-	},
-	.pioc = {
-		&gk104_disp_oimm_oclass,
-		&gk104_disp_curs_oclass,
+	.user = {
+		{{0,0,GK104_DISP_CURSOR             }, gf119_disp_curs_new },
+		{{0,0,GK104_DISP_OVERLAY            }, gf119_disp_oimm_new },
+		{{0,0,GK104_DISP_BASE_CHANNEL_DMA   }, gf119_disp_base_new },
+		{{0,0,GK104_DISP_CORE_CHANNEL_DMA   }, gk104_disp_core_new },
+		{{0,0,GK104_DISP_OVERLAY_CONTROL_DMA}, gk104_disp_ovly_new },
+		{}
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk110.c
index 1e8dbed..dc85cc1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgk110.c
@@ -22,22 +22,19 @@
  * Authors: Ben Skeggs
  */
 #include "rootnv50.h"
-#include "dmacnv50.h"
+#include "channv50.h"
 
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
 gk110_disp_root = {
-	.init = gf119_disp_root_init,
-	.fini = gf119_disp_root_fini,
-	.dmac = {
-		&gk110_disp_core_oclass,
-		&gk110_disp_base_oclass,
-		&gk104_disp_ovly_oclass,
-	},
-	.pioc = {
-		&gk104_disp_oimm_oclass,
-		&gk104_disp_curs_oclass,
+	.user = {
+		{{0,0,GK104_DISP_CURSOR             }, gf119_disp_curs_new },
+		{{0,0,GK104_DISP_OVERLAY            }, gf119_disp_oimm_new },
+		{{0,0,GK110_DISP_BASE_CHANNEL_DMA   }, gf119_disp_base_new },
+		{{0,0,GK110_DISP_CORE_CHANNEL_DMA   }, gk104_disp_core_new },
+		{{0,0,GK104_DISP_OVERLAY_CONTROL_DMA}, gk104_disp_ovly_new },
+		{}
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm107.c
index 44c55be..e0181ca 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm107.c
@@ -22,22 +22,19 @@
  * Authors: Ben Skeggs
  */
 #include "rootnv50.h"
-#include "dmacnv50.h"
+#include "channv50.h"
 
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
 gm107_disp_root = {
-	.init = gf119_disp_root_init,
-	.fini = gf119_disp_root_fini,
-	.dmac = {
-		&gm107_disp_core_oclass,
-		&gk110_disp_base_oclass,
-		&gk104_disp_ovly_oclass,
-	},
-	.pioc = {
-		&gk104_disp_oimm_oclass,
-		&gk104_disp_curs_oclass,
+	.user = {
+		{{0,0,GK104_DISP_CURSOR             }, gf119_disp_curs_new },
+		{{0,0,GK104_DISP_OVERLAY            }, gf119_disp_oimm_new },
+		{{0,0,GK110_DISP_BASE_CHANNEL_DMA   }, gf119_disp_base_new },
+		{{0,0,GM107_DISP_CORE_CHANNEL_DMA   }, gk104_disp_core_new },
+		{{0,0,GK104_DISP_OVERLAY_CONTROL_DMA}, gk104_disp_ovly_new },
+		{}
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm200.c
index 38f5ee1..e5e590e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgm200.c
@@ -22,22 +22,19 @@
  * Authors: Ben Skeggs
  */
 #include "rootnv50.h"
-#include "dmacnv50.h"
+#include "channv50.h"
 
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
 gm200_disp_root = {
-	.init = gf119_disp_root_init,
-	.fini = gf119_disp_root_fini,
-	.dmac = {
-		&gm200_disp_core_oclass,
-		&gk110_disp_base_oclass,
-		&gk104_disp_ovly_oclass,
-	},
-	.pioc = {
-		&gk104_disp_oimm_oclass,
-		&gk104_disp_curs_oclass,
+	.user = {
+		{{0,0,GK104_DISP_CURSOR             }, gf119_disp_curs_new },
+		{{0,0,GK104_DISP_OVERLAY            }, gf119_disp_oimm_new },
+		{{0,0,GK110_DISP_BASE_CHANNEL_DMA   }, gf119_disp_base_new },
+		{{0,0,GM200_DISP_CORE_CHANNEL_DMA   }, gk104_disp_core_new },
+		{{0,0,GK104_DISP_OVERLAY_CONTROL_DMA}, gk104_disp_ovly_new },
+		{}
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c
index ac8fdd7..762a1a9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c
@@ -22,22 +22,19 @@
  * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
 #include "rootnv50.h"
-#include "dmacnv50.h"
+#include "channv50.h"
 
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
 gp100_disp_root = {
-	.init = gf119_disp_root_init,
-	.fini = gf119_disp_root_fini,
-	.dmac = {
-		&gp100_disp_core_oclass,
-		&gk110_disp_base_oclass,
-		&gk104_disp_ovly_oclass,
-	},
-	.pioc = {
-		&gk104_disp_oimm_oclass,
-		&gk104_disp_curs_oclass,
+	.user = {
+		{{0,0,GK104_DISP_CURSOR             }, gf119_disp_curs_new },
+		{{0,0,GK104_DISP_OVERLAY            }, gf119_disp_oimm_new },
+		{{0,0,GK110_DISP_BASE_CHANNEL_DMA   }, gf119_disp_base_new },
+		{{0,0,GP100_DISP_CORE_CHANNEL_DMA   }, gk104_disp_core_new },
+		{{0,0,GK104_DISP_OVERLAY_CONTROL_DMA}, gk104_disp_ovly_new },
+		{}
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp102.c
index 37122ca..c7f0094 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp102.c
@@ -22,22 +22,19 @@
  * Authors: Ben Skeggs <bskeggs@redhat.com>
  */
 #include "rootnv50.h"
-#include "dmacnv50.h"
+#include "channv50.h"
 
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
 gp102_disp_root = {
-	.init = gf119_disp_root_init,
-	.fini = gf119_disp_root_fini,
-	.dmac = {
-		&gp102_disp_core_oclass,
-		&gp102_disp_base_oclass,
-		&gp102_disp_ovly_oclass,
-	},
-	.pioc = {
-		&gp102_disp_oimm_oclass,
-		&gp102_disp_curs_oclass,
+	.user = {
+		{{0,0,GK104_DISP_CURSOR             }, gp102_disp_curs_new },
+		{{0,0,GK104_DISP_OVERLAY            }, gp102_disp_oimm_new },
+		{{0,0,GK110_DISP_BASE_CHANNEL_DMA   }, gp102_disp_base_new },
+		{{0,0,GP102_DISP_CORE_CHANNEL_DMA   }, gp102_disp_core_new },
+		{{0,0,GK104_DISP_OVERLAY_CONTROL_DMA}, gp102_disp_ovly_new },
+		{}
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt200.c
index 124a0c2..a6963654 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt200.c
@@ -22,22 +22,19 @@
  * Authors: Ben Skeggs
  */
 #include "rootnv50.h"
-#include "dmacnv50.h"
+#include "channv50.h"
 
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
 gt200_disp_root = {
-	.init = nv50_disp_root_init,
-	.fini = nv50_disp_root_fini,
-	.dmac = {
-		&gt200_disp_core_oclass,
-		&gt200_disp_base_oclass,
-		&gt200_disp_ovly_oclass,
-	},
-	.pioc = {
-		&g84_disp_oimm_oclass,
-		&g84_disp_curs_oclass,
+	.user = {
+		{{0,0,  G82_DISP_CURSOR             },  nv50_disp_curs_new },
+		{{0,0,  G82_DISP_OVERLAY            },  nv50_disp_oimm_new },
+		{{0,0,GT200_DISP_BASE_CHANNEL_DMA   },   g84_disp_base_new },
+		{{0,0,GT200_DISP_CORE_CHANNEL_DMA   },   g84_disp_core_new },
+		{{0,0,GT200_DISP_OVERLAY_CHANNEL_DMA}, gt200_disp_ovly_new },
+		{}
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt215.c
index dff52f3..4fe0a3a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgt215.c
@@ -22,22 +22,19 @@
  * Authors: Ben Skeggs
  */
 #include "rootnv50.h"
-#include "dmacnv50.h"
+#include "channv50.h"
 
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
 gt215_disp_root = {
-	.init = nv50_disp_root_init,
-	.fini = nv50_disp_root_fini,
-	.dmac = {
-		&gt215_disp_core_oclass,
-		&gt215_disp_base_oclass,
-		&gt215_disp_ovly_oclass,
-	},
-	.pioc = {
-		&gt215_disp_oimm_oclass,
-		&gt215_disp_curs_oclass,
+	.user = {
+		{{0,0,GT214_DISP_CURSOR             },  nv50_disp_curs_new },
+		{{0,0,GT214_DISP_OVERLAY            },  nv50_disp_oimm_new },
+		{{0,0,GT214_DISP_BASE_CHANNEL_DMA   },   g84_disp_base_new },
+		{{0,0,GT214_DISP_CORE_CHANNEL_DMA   },   g94_disp_core_new },
+		{{0,0,GT214_DISP_OVERLAY_CHANNEL_DMA},   g84_disp_ovly_new },
+		{}
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgv100.c
new file mode 100644
index 0000000..9c658d6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgv100.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "rootnv50.h"
+#include "channv50.h"
+
+#include <nvif/class.h>
+
+static const struct nv50_disp_root_func
+gv100_disp_root = {
+	.user = {
+		{{0,0,GV100_DISP_CURSOR                }, gv100_disp_curs_new },
+		{{0,0,GV100_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new },
+		{{0,0,GV100_DISP_CORE_CHANNEL_DMA      }, gv100_disp_core_new },
+		{{0,0,GV100_DISP_WINDOW_CHANNEL_DMA    }, gv100_disp_wndw_new },
+		{}
+	},
+};
+
+static int
+gv100_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
+		    void *data, u32 size, struct nvkm_object **pobject)
+{
+	return nv50_disp_root_new_(&gv100_disp_root, disp, oclass,
+				   data, size, pobject);
+}
+
+const struct nvkm_disp_oclass
+gv100_disp_root_oclass = {
+	.base.oclass = GV100_DISP,
+	.base.minver = -1,
+	.base.maxver = -1,
+	.ctor = gv100_disp_root_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
index 1208524..3aa5a28 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
@@ -22,14 +22,12 @@
  * Authors: Ben Skeggs
  */
 #include "rootnv50.h"
-#include "dmacnv50.h"
+#include "channv50.h"
 #include "dp.h"
 #include "head.h"
 #include "ior.h"
 
 #include <core/client.h>
-#include <core/ramht.h>
-#include <subdev/timer.h>
 
 #include <nvif/class.h>
 #include <nvif/cl5070.h>
@@ -271,23 +269,12 @@
 }
 
 static int
-nv50_disp_root_dmac_new_(const struct nvkm_oclass *oclass,
-			 void *data, u32 size, struct nvkm_object **pobject)
+nv50_disp_root_child_new_(const struct nvkm_oclass *oclass,
+			  void *argv, u32 argc, struct nvkm_object **pobject)
 {
-	const struct nv50_disp_dmac_oclass *sclass = oclass->priv;
-	struct nv50_disp_root *root = nv50_disp_root(oclass->parent);
-	return sclass->ctor(sclass->func, sclass->mthd, root, sclass->chid,
-			    oclass, data, size, pobject);
-}
-
-static int
-nv50_disp_root_pioc_new_(const struct nvkm_oclass *oclass,
-			 void *data, u32 size, struct nvkm_object **pobject)
-{
-	const struct nv50_disp_pioc_oclass *sclass = oclass->priv;
-	struct nv50_disp_root *root = nv50_disp_root(oclass->parent);
-	return sclass->ctor(sclass->func, sclass->mthd, root, sclass->chid.ctrl,
-			    sclass->chid.user, oclass, data, size, pobject);
+	struct nv50_disp *disp = nv50_disp_root(oclass->parent)->disp;
+	const struct nv50_disp_user *user = oclass->priv;
+	return user->ctor(oclass, argv, argc, disp, pobject);
 }
 
 static int
@@ -296,68 +283,26 @@
 {
 	struct nv50_disp_root *root = nv50_disp_root(object);
 
-	if (index < ARRAY_SIZE(root->func->dmac)) {
-		sclass->base = root->func->dmac[index]->base;
-		sclass->priv = root->func->dmac[index];
-		sclass->ctor = nv50_disp_root_dmac_new_;
-		return 0;
-	}
-
-	index -= ARRAY_SIZE(root->func->dmac);
-
-	if (index < ARRAY_SIZE(root->func->pioc)) {
-		sclass->base = root->func->pioc[index]->base;
-		sclass->priv = root->func->pioc[index];
-		sclass->ctor = nv50_disp_root_pioc_new_;
+	if (root->func->user[index].ctor) {
+		sclass->base = root->func->user[index].base;
+		sclass->priv = root->func->user + index;
+		sclass->ctor = nv50_disp_root_child_new_;
 		return 0;
 	}
 
 	return -EINVAL;
 }
 
-static int
-nv50_disp_root_fini_(struct nvkm_object *object, bool suspend)
-{
-	struct nv50_disp_root *root = nv50_disp_root(object);
-	root->func->fini(root);
-	return 0;
-}
-
-static int
-nv50_disp_root_init_(struct nvkm_object *object)
-{
-	struct nv50_disp_root *root = nv50_disp_root(object);
-	struct nvkm_ior *ior;
-	int ret;
-
-	ret = root->func->init(root);
-	if (ret)
-		return ret;
-
-	/* Set 'normal' (ie. when it's attached to a head) state for
-	 * each output resource to 'fully enabled'.
-	 */
-	list_for_each_entry(ior, &root->disp->base.ior, head) {
-		ior->func->power(ior, true, true, true, true, true);
-	}
-
-	return 0;
-}
-
 static void *
 nv50_disp_root_dtor_(struct nvkm_object *object)
 {
 	struct nv50_disp_root *root = nv50_disp_root(object);
-	nvkm_ramht_del(&root->ramht);
-	nvkm_gpuobj_del(&root->instmem);
 	return root;
 }
 
 static const struct nvkm_object_func
 nv50_disp_root_ = {
 	.dtor = nv50_disp_root_dtor_,
-	.init = nv50_disp_root_init_,
-	.fini = nv50_disp_root_fini_,
 	.mthd = nv50_disp_root_mthd_,
 	.ntfy = nvkm_disp_ntfy,
 	.sclass = nv50_disp_root_child_get_,
@@ -370,8 +315,6 @@
 {
 	struct nv50_disp *disp = nv50_disp(base);
 	struct nv50_disp_root *root;
-	struct nvkm_device *device = disp->base.engine.subdev.device;
-	int ret;
 
 	if (!(root = kzalloc(sizeof(*root), GFP_KERNEL)))
 		return -ENOMEM;
@@ -380,102 +323,18 @@
 	nvkm_object_ctor(&nv50_disp_root_, oclass, &root->object);
 	root->func = func;
 	root->disp = disp;
-
-	ret = nvkm_gpuobj_new(disp->base.engine.subdev.device, 0x10000, 0x10000,
-			      false, NULL, &root->instmem);
-	if (ret)
-		return ret;
-
-	return nvkm_ramht_new(device, 0x1000, 0, root->instmem, &root->ramht);
-}
-
-void
-nv50_disp_root_fini(struct nv50_disp_root *root)
-{
-	struct nvkm_device *device = root->disp->base.engine.subdev.device;
-	/* disable all interrupts */
-	nvkm_wr32(device, 0x610024, 0x00000000);
-	nvkm_wr32(device, 0x610020, 0x00000000);
-}
-
-int
-nv50_disp_root_init(struct nv50_disp_root *root)
-{
-	struct nv50_disp *disp = root->disp;
-	struct nvkm_head *head;
-	struct nvkm_device *device = disp->base.engine.subdev.device;
-	u32 tmp;
-	int i;
-
-	/* The below segments of code copying values from one register to
-	 * another appear to inform EVO of the display capabilities or
-	 * something similar.  NFI what the 0x614004 caps are for..
-	 */
-	tmp = nvkm_rd32(device, 0x614004);
-	nvkm_wr32(device, 0x610184, tmp);
-
-	/* ... CRTC caps */
-	list_for_each_entry(head, &disp->base.head, head) {
-		tmp = nvkm_rd32(device, 0x616100 + (head->id * 0x800));
-		nvkm_wr32(device, 0x610190 + (head->id * 0x10), tmp);
-		tmp = nvkm_rd32(device, 0x616104 + (head->id * 0x800));
-		nvkm_wr32(device, 0x610194 + (head->id * 0x10), tmp);
-		tmp = nvkm_rd32(device, 0x616108 + (head->id * 0x800));
-		nvkm_wr32(device, 0x610198 + (head->id * 0x10), tmp);
-		tmp = nvkm_rd32(device, 0x61610c + (head->id * 0x800));
-		nvkm_wr32(device, 0x61019c + (head->id * 0x10), tmp);
-	}
-
-	/* ... DAC caps */
-	for (i = 0; i < disp->func->dac.nr; i++) {
-		tmp = nvkm_rd32(device, 0x61a000 + (i * 0x800));
-		nvkm_wr32(device, 0x6101d0 + (i * 0x04), tmp);
-	}
-
-	/* ... SOR caps */
-	for (i = 0; i < disp->func->sor.nr; i++) {
-		tmp = nvkm_rd32(device, 0x61c000 + (i * 0x800));
-		nvkm_wr32(device, 0x6101e0 + (i * 0x04), tmp);
-	}
-
-	/* ... PIOR caps */
-	for (i = 0; i < disp->func->pior.nr; i++) {
-		tmp = nvkm_rd32(device, 0x61e000 + (i * 0x800));
-		nvkm_wr32(device, 0x6101f0 + (i * 0x04), tmp);
-	}
-
-	/* steal display away from vbios, or something like that */
-	if (nvkm_rd32(device, 0x610024) & 0x00000100) {
-		nvkm_wr32(device, 0x610024, 0x00000100);
-		nvkm_mask(device, 0x6194e8, 0x00000001, 0x00000000);
-		if (nvkm_msec(device, 2000,
-			if (!(nvkm_rd32(device, 0x6194e8) & 0x00000002))
-				break;
-		) < 0)
-			return -EBUSY;
-	}
-
-	/* point at display engine memory area (hash table, objects) */
-	nvkm_wr32(device, 0x610010, (root->instmem->addr >> 8) | 9);
-
-	/* enable supervisor interrupts, disable everything else */
-	nvkm_wr32(device, 0x61002c, 0x00000370);
-	nvkm_wr32(device, 0x610028, 0x00000000);
 	return 0;
 }
 
 static const struct nv50_disp_root_func
 nv50_disp_root = {
-	.init = nv50_disp_root_init,
-	.fini = nv50_disp_root_fini,
-	.dmac = {
-		&nv50_disp_core_oclass,
-		&nv50_disp_base_oclass,
-		&nv50_disp_ovly_oclass,
-	},
-	.pioc = {
-		&nv50_disp_oimm_oclass,
-		&nv50_disp_curs_oclass,
+	.user = {
+		{{0,0,NV50_DISP_CURSOR             }, nv50_disp_curs_new },
+		{{0,0,NV50_DISP_OVERLAY            }, nv50_disp_oimm_new },
+		{{0,0,NV50_DISP_BASE_CHANNEL_DMA   }, nv50_disp_base_new },
+		{{0,0,NV50_DISP_CORE_CHANNEL_DMA   }, nv50_disp_core_new },
+		{{0,0,NV50_DISP_OVERLAY_CHANNEL_DMA}, nv50_disp_ovly_new },
+		{}
 	},
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
index 4818fa6..6ca4f91 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
@@ -2,34 +2,27 @@
 #ifndef __NV50_DISP_ROOT_H__
 #define __NV50_DISP_ROOT_H__
 #define nv50_disp_root(p) container_of((p), struct nv50_disp_root, object)
+#include <core/object.h>
 #include "nv50.h"
-#include "channv50.h"
-#include "dmacnv50.h"
 
 struct nv50_disp_root {
 	const struct nv50_disp_root_func *func;
 	struct nv50_disp *disp;
 	struct nvkm_object object;
-
-	struct nvkm_gpuobj *instmem;
-	struct nvkm_ramht *ramht;
 };
 
 struct nv50_disp_root_func {
-	int (*init)(struct nv50_disp_root *);
-	void (*fini)(struct nv50_disp_root *);
-	const struct nv50_disp_dmac_oclass *dmac[3];
-	const struct nv50_disp_pioc_oclass *pioc[2];
+	int blah;
+	struct nv50_disp_user {
+		struct nvkm_sclass base;
+		int (*ctor)(const struct nvkm_oclass *, void *argv, u32 argc,
+			    struct nv50_disp *, struct nvkm_object **);
+	} user[];
 };
 
 int  nv50_disp_root_new_(const struct nv50_disp_root_func *, struct nvkm_disp *,
 			 const struct nvkm_oclass *, void *data, u32 size,
 			 struct nvkm_object **);
-int  nv50_disp_root_init(struct nv50_disp_root *);
-void nv50_disp_root_fini(struct nv50_disp_root *);
-
-int  gf119_disp_root_init(struct nv50_disp_root *);
-void gf119_disp_root_fini(struct nv50_disp_root *);
 
 extern const struct nvkm_disp_oclass nv50_disp_root_oclass;
 extern const struct nvkm_disp_oclass g84_disp_root_oclass;
@@ -43,4 +36,5 @@
 extern const struct nvkm_disp_oclass gm200_disp_root_oclass;
 extern const struct nvkm_disp_oclass gp100_disp_root_oclass;
 extern const struct nvkm_disp_oclass gp102_disp_root_oclass;
+extern const struct nvkm_disp_oclass gv100_disp_root_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg84.c
index f40b909..ec3a7db 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg84.c
@@ -34,5 +34,5 @@
 int
 g84_sor_new(struct nvkm_disp *disp, int id)
 {
-	return nv50_sor_new_(&g84_sor, disp, id);
+	return nvkm_ior_new_(&g84_sor, disp, SOR, id);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
index 49aeafd..4d59d02 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
@@ -279,5 +279,13 @@
 int
 g94_sor_new(struct nvkm_disp *disp, int id)
 {
-	return nv50_sor_new_(&g94_sor, disp, id);
+	return nvkm_ior_new_(&g94_sor, disp, SOR, id);
+}
+
+int
+g94_sor_cnt(struct nvkm_disp *disp, unsigned long *pmask)
+{
+	struct nvkm_device *device = disp->engine.subdev.device;
+	*pmask = (nvkm_rd32(device, 0x610184) & 0x0f000000) >> 24;
+	return 4;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
index 700fc75..e6e6dfb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
@@ -152,15 +152,6 @@
 	state->head = ctrl & 0x0000000f;
 }
 
-int
-gf119_sor_new_(const struct nvkm_ior_func *func, struct nvkm_disp *disp, int id)
-{
-	struct nvkm_device *device = disp->engine.subdev.device;
-	if (!(nvkm_rd32(device, 0x612004) & (0x00000100 << id)))
-		return 0;
-	return nvkm_ior_new_(func, disp, SOR, id);
-}
-
 static const struct nvkm_ior_func
 gf119_sor = {
 	.state = gf119_sor_state,
@@ -189,5 +180,13 @@
 int
 gf119_sor_new(struct nvkm_disp *disp, int id)
 {
-	return gf119_sor_new_(&gf119_sor, disp, id);
+	return nvkm_ior_new_(&gf119_sor, disp, SOR, id);
+}
+
+int
+gf119_sor_cnt(struct nvkm_disp *disp, unsigned long *pmask)
+{
+	struct nvkm_device *device = disp->engine.subdev.device;
+	*pmask = (nvkm_rd32(device, 0x612004) & 0x0000ff00) >> 8;
+	return 8;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c
index a1547bd..b94090e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgk104.c
@@ -49,5 +49,5 @@
 int
 gk104_sor_new(struct nvkm_disp *disp, int id)
 {
-	return gf119_sor_new_(&gk104_sor, disp, id);
+	return nvkm_ior_new_(&gk104_sor, disp, SOR, id);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
index 6023095..e6965de 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
@@ -63,5 +63,5 @@
 int
 gm107_sor_new(struct nvkm_disp *disp, int id)
 {
-	return gf119_sor_new_(&gm107_sor, disp, id);
+	return nvkm_ior_new_(&gm107_sor, disp, SOR, id);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
index f9b8107..d892bdf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
@@ -23,7 +23,7 @@
  */
 #include "ior.h"
 
-static void
+void
 gm200_sor_dp_drive(struct nvkm_ior *sor, int ln, int pc, int dc, int pe, int pu)
 {
 	struct nvkm_device *device = sor->disp->engine.subdev.device;
@@ -45,7 +45,7 @@
 	nvkm_wr32(device, 0x61c13c + loff, data[3] | (pc << shift));
 }
 
-static void
+void
 gm200_sor_route_set(struct nvkm_outp *outp, struct nvkm_ior *ior)
 {
 	struct nvkm_device *device = outp->disp->engine.subdev.device;
@@ -62,7 +62,7 @@
 		nvkm_mask(device, 0x612388 + moff, 0x0000001f, link << 4 | sor);
 }
 
-static int
+int
 gm200_sor_route_get(struct nvkm_outp *outp, int *link)
 {
 	struct nvkm_device *device = outp->disp->engine.subdev.device;
@@ -120,5 +120,5 @@
 int
 gm200_sor_new(struct nvkm_disp *disp, int id)
 {
-	return gf119_sor_new_(&gm200_sor, disp, id);
+	return nvkm_ior_new_(&gm200_sor, disp, SOR, id);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.c
index da228b5..54d134d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgt215.c
@@ -65,5 +65,5 @@
 int
 gt215_sor_new(struct nvkm_disp *disp, int id)
 {
-	return nv50_sor_new_(&gt215_sor, disp, id);
+	return nvkm_ior_new_(&gt215_sor, disp, SOR, id);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c
new file mode 100644
index 0000000..040db8a
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ior.h"
+
+#include <subdev/timer.h>
+
+static void
+gv100_sor_dp_watermark(struct nvkm_ior *sor, int head, u8 watermark)
+{
+	struct nvkm_device *device = sor->disp->engine.subdev.device;
+	const u32 hoff = head * 0x800;
+	nvkm_mask(device, 0x616550 + hoff, 0x0c00003f, 0x08000000 | watermark);
+}
+
+static void
+gv100_sor_dp_audio_sym(struct nvkm_ior *sor, int head, u16 h, u32 v)
+{
+	struct nvkm_device *device = sor->disp->engine.subdev.device;
+	const u32 hoff = head * 0x800;
+	nvkm_mask(device, 0x616568 + hoff, 0x0000ffff, h);
+	nvkm_mask(device, 0x61656c + hoff, 0x00ffffff, v);
+}
+
+static void
+gv100_sor_dp_audio(struct nvkm_ior *sor, int head, bool enable)
+{
+	struct nvkm_device *device = sor->disp->engine.subdev.device;
+	const u32 hoff = 0x800 * head;
+	const u32 data = 0x80000000 | (0x00000001 * enable);
+	const u32 mask = 0x8000000d;
+	nvkm_mask(device, 0x616560 + hoff, mask, data);
+	nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x616560 + hoff) & 0x80000000))
+			break;
+	);
+}
+
+static void
+gv100_sor_state(struct nvkm_ior *sor, struct nvkm_ior_state *state)
+{
+	struct nvkm_device *device = sor->disp->engine.subdev.device;
+	const u32 coff = (state == &sor->arm) * 0x8000 + sor->id * 0x20;
+	u32 ctrl = nvkm_rd32(device, 0x680300 + coff);
+
+	state->proto_evo = (ctrl & 0x00000f00) >> 8;
+	switch (state->proto_evo) {
+	case 0: state->proto = LVDS; state->link = 1; break;
+	case 1: state->proto = TMDS; state->link = 1; break;
+	case 2: state->proto = TMDS; state->link = 2; break;
+	case 5: state->proto = TMDS; state->link = 3; break;
+	case 8: state->proto =   DP; state->link = 1; break;
+	case 9: state->proto =   DP; state->link = 2; break;
+	default:
+		state->proto = UNKNOWN;
+		break;
+	}
+
+	state->head = ctrl & 0x000000ff;
+}
+
+static const struct nvkm_ior_func
+gv100_sor = {
+	.route = {
+		.get = gm200_sor_route_get,
+		.set = gm200_sor_route_set,
+	},
+	.state = gv100_sor_state,
+	.power = nv50_sor_power,
+	.clock = gf119_sor_clock,
+	.hdmi = {
+		.ctrl = gv100_hdmi_ctrl,
+	},
+	.dp = {
+		.lanes = { 0, 1, 2, 3 },
+		.links = gf119_sor_dp_links,
+		.power = g94_sor_dp_power,
+		.pattern = gm107_sor_dp_pattern,
+		.drive = gm200_sor_dp_drive,
+		.audio = gv100_sor_dp_audio,
+		.audio_sym = gv100_sor_dp_audio_sym,
+		.watermark = gv100_sor_dp_watermark,
+	},
+	.hda = {
+		.hpd = gf119_hda_hpd,
+		.eld = gf119_hda_eld,
+	},
+};
+
+int
+gv100_sor_new(struct nvkm_disp *disp, int id)
+{
+	return nvkm_ior_new_(&gv100_sor, disp, SOR, id);
+}
+
+int
+gv100_sor_cnt(struct nvkm_disp *disp, unsigned long *pmask)
+{
+	struct nvkm_device *device = disp->engine.subdev.device;
+	*pmask = (nvkm_rd32(device, 0x610060) & 0x0000ff00) >> 8;
+	return (nvkm_rd32(device, 0x610074) & 0x00000f00) >> 8;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp77.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp77.c
index c0179cc..8a70dd25 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp77.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp77.c
@@ -44,5 +44,5 @@
 int
 mcp77_sor_new(struct nvkm_disp *disp, int id)
 {
-	return nv50_sor_new_(&mcp77_sor, disp, id);
+	return nvkm_ior_new_(&mcp77_sor, disp, SOR, id);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.c
index 9bb01cd..eac9c5be 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sormcp89.c
@@ -49,5 +49,5 @@
 int
 mcp89_sor_new(struct nvkm_disp *disp, int id)
 {
-	return nv50_sor_new_(&mcp89_sor, disp, id);
+	return nvkm_ior_new_(&mcp89_sor, disp, SOR, id);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sornv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sornv50.c
index f3ebd0c..b4729f8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sornv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sornv50.c
@@ -84,15 +84,6 @@
 	state->head = ctrl & 0x00000003;
 }
 
-int
-nv50_sor_new_(const struct nvkm_ior_func *func, struct nvkm_disp *disp, int id)
-{
-	struct nvkm_device *device = disp->engine.subdev.device;
-	if (!(nvkm_rd32(device, 0x610184) & (0x01000000 << id)))
-		return 0;
-	return nvkm_ior_new_(func, disp, SOR, id);
-}
-
 static const struct nvkm_ior_func
 nv50_sor = {
 	.state = nv50_sor_state,
@@ -103,5 +94,13 @@
 int
 nv50_sor_new(struct nvkm_disp *disp, int id)
 {
-	return nv50_sor_new_(&nv50_sor, disp, id);
+	return nvkm_ior_new_(&nv50_sor, disp, SOR, id);
+}
+
+int
+nv50_sor_cnt(struct nvkm_disp *disp, unsigned long *pmask)
+{
+	struct nvkm_device *device = disp->engine.subdev.device;
+	*pmask = (nvkm_rd32(device, 0x610184) & 0x03000000) >> 24;
+	return 2;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/wimmgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/wimmgv100.c
new file mode 100644
index 0000000..89d7833
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/wimmgv100.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "channv50.h"
+
+#include <core/client.h>
+
+#include <nvif/clc37b.h>
+#include <nvif/unpack.h>
+
+static void
+gv100_disp_wimm_intr(struct nv50_disp_chan *chan, bool en)
+{
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	const u32 mask = 0x00000001 << chan->head;
+	const u32 data = en ? mask : 0;
+	nvkm_mask(device, 0x611da8, mask, data);
+}
+
+const struct nv50_disp_chan_func
+gv100_disp_wimm = {
+	.init = gv100_disp_dmac_init,
+	.fini = gv100_disp_dmac_fini,
+	.intr = gv100_disp_wimm_intr,
+	.user = gv100_disp_chan_user,
+};
+
+static int
+gv100_disp_wimm_new_(const struct nv50_disp_chan_func *func,
+		     const struct nv50_disp_chan_mthd *mthd,
+		     struct nv50_disp *disp, int chid,
+		     const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		     struct nvkm_object **pobject)
+{
+	union {
+		struct nvc37b_window_imm_channel_dma_v0 v0;
+	} *args = argv;
+	struct nvkm_object *parent = oclass->parent;
+	int wndw, ret = -ENOSYS;
+	u64 push;
+
+	nvif_ioctl(parent, "create window imm channel dma size %d\n", argc);
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
+		nvif_ioctl(parent, "create window imm channel dma vers %d "
+				   "pushbuf %016llx index %d\n",
+			   args->v0.version, args->v0.pushbuf, args->v0.index);
+		if (!(disp->wndw.mask & BIT(args->v0.index)))
+			return -EINVAL;
+		push = args->v0.pushbuf;
+		wndw = args->v0.index;
+	} else
+		return ret;
+
+	return nv50_disp_dmac_new_(func, mthd, disp, chid + wndw,
+				   wndw, push, oclass, pobject);
+}
+
+int
+gv100_disp_wimm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return gv100_disp_wimm_new_(&gv100_disp_wimm, NULL, disp, 33,
+				    oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/wndwgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/wndwgv100.c
new file mode 100644
index 0000000..9891180
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/wndwgv100.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "channv50.h"
+
+#include <core/client.h>
+
+#include <nvif/clc37e.h>
+#include <nvif/unpack.h>
+
+static const struct nv50_disp_mthd_list
+gv100_disp_wndw_mthd_base = {
+	.mthd = 0x0000,
+	.addr = 0x000000,
+	.data = {
+		{ 0x0200, 0x690200 },
+		{ 0x020c, 0x69020c },
+		{ 0x0210, 0x690210 },
+		{ 0x0214, 0x690214 },
+		{ 0x0218, 0x690218 },
+		{ 0x021c, 0x69021c },
+		{ 0x0220, 0x690220 },
+		{ 0x0224, 0x690224 },
+		{ 0x0228, 0x690228 },
+		{ 0x022c, 0x69022c },
+		{ 0x0230, 0x690230 },
+		{ 0x0234, 0x690234 },
+		{ 0x0238, 0x690238 },
+		{ 0x0240, 0x690240 },
+		{ 0x0244, 0x690244 },
+		{ 0x0248, 0x690248 },
+		{ 0x024c, 0x69024c },
+		{ 0x0250, 0x690250 },
+		{ 0x0254, 0x690254 },
+		{ 0x0260, 0x690260 },
+		{ 0x0264, 0x690264 },
+		{ 0x0268, 0x690268 },
+		{ 0x026c, 0x69026c },
+		{ 0x0270, 0x690270 },
+		{ 0x0274, 0x690274 },
+		{ 0x0280, 0x690280 },
+		{ 0x0284, 0x690284 },
+		{ 0x0288, 0x690288 },
+		{ 0x028c, 0x69028c },
+		{ 0x0290, 0x690290 },
+		{ 0x0298, 0x690298 },
+		{ 0x029c, 0x69029c },
+		{ 0x02a0, 0x6902a0 },
+		{ 0x02a4, 0x6902a4 },
+		{ 0x02a8, 0x6902a8 },
+		{ 0x02ac, 0x6902ac },
+		{ 0x02b0, 0x6902b0 },
+		{ 0x02b4, 0x6902b4 },
+		{ 0x02b8, 0x6902b8 },
+		{ 0x02bc, 0x6902bc },
+		{ 0x02c0, 0x6902c0 },
+		{ 0x02c4, 0x6902c4 },
+		{ 0x02c8, 0x6902c8 },
+		{ 0x02cc, 0x6902cc },
+		{ 0x02d0, 0x6902d0 },
+		{ 0x02d4, 0x6902d4 },
+		{ 0x02d8, 0x6902d8 },
+		{ 0x02dc, 0x6902dc },
+		{ 0x02e0, 0x6902e0 },
+		{ 0x02e4, 0x6902e4 },
+		{ 0x02e8, 0x6902e8 },
+		{ 0x02ec, 0x6902ec },
+		{ 0x02f0, 0x6902f0 },
+		{ 0x02f4, 0x6902f4 },
+		{ 0x02f8, 0x6902f8 },
+		{ 0x02fc, 0x6902fc },
+		{ 0x0300, 0x690300 },
+		{ 0x0304, 0x690304 },
+		{ 0x0308, 0x690308 },
+		{ 0x0310, 0x690310 },
+		{ 0x0314, 0x690314 },
+		{ 0x0318, 0x690318 },
+		{ 0x031c, 0x69031c },
+		{ 0x0320, 0x690320 },
+		{ 0x0324, 0x690324 },
+		{ 0x0328, 0x690328 },
+		{ 0x032c, 0x69032c },
+		{ 0x033c, 0x69033c },
+		{ 0x0340, 0x690340 },
+		{ 0x0344, 0x690344 },
+		{ 0x0348, 0x690348 },
+		{ 0x034c, 0x69034c },
+		{ 0x0350, 0x690350 },
+		{ 0x0354, 0x690354 },
+		{ 0x0358, 0x690358 },
+		{ 0x0364, 0x690364 },
+		{ 0x0368, 0x690368 },
+		{ 0x036c, 0x69036c },
+		{ 0x0370, 0x690370 },
+		{ 0x0374, 0x690374 },
+		{ 0x0380, 0x690380 },
+		{}
+	}
+};
+
+const struct nv50_disp_chan_mthd
+gv100_disp_wndw_mthd = {
+	.name = "Base",
+	.addr = 0x001000,
+	.prev = 0x000800,
+	.data = {
+		{ "Global", 1, &gv100_disp_wndw_mthd_base },
+		{}
+	}
+};
+
+static void
+gv100_disp_wndw_intr(struct nv50_disp_chan *chan, bool en)
+{
+	struct nvkm_device *device = chan->disp->base.engine.subdev.device;
+	const u32 mask = 0x00000001 << chan->head;
+	const u32 data = en ? mask : 0;
+	nvkm_mask(device, 0x611da4, mask, data);
+}
+
+const struct nv50_disp_chan_func
+gv100_disp_wndw = {
+	.init = gv100_disp_dmac_init,
+	.fini = gv100_disp_dmac_fini,
+	.intr = gv100_disp_wndw_intr,
+	.user = gv100_disp_chan_user,
+	.bind = gv100_disp_dmac_bind,
+};
+
+static int
+gv100_disp_wndw_new_(const struct nv50_disp_chan_func *func,
+		     const struct nv50_disp_chan_mthd *mthd,
+		     struct nv50_disp *disp, int chid,
+		     const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		     struct nvkm_object **pobject)
+{
+	union {
+		struct nvc37e_window_channel_dma_v0 v0;
+	} *args = argv;
+	struct nvkm_object *parent = oclass->parent;
+	int wndw, ret = -ENOSYS;
+	u64 push;
+
+	nvif_ioctl(parent, "create window channel dma size %d\n", argc);
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
+		nvif_ioctl(parent, "create window channel dma vers %d "
+				   "pushbuf %016llx index %d\n",
+			   args->v0.version, args->v0.pushbuf, args->v0.index);
+		if (!(disp->wndw.mask & BIT(args->v0.index)))
+			return -EINVAL;
+		push = args->v0.pushbuf;
+		wndw = args->v0.index;
+	} else
+		return ret;
+
+	return nv50_disp_dmac_new_(func, mthd, disp, chid + wndw,
+				   wndw, push, oclass, pobject);
+}
+
+int
+gv100_disp_wndw_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nv50_disp *disp, struct nvkm_object **pobject)
+{
+	return gv100_disp_wndw_new_(&gv100_disp_wndw, &gv100_disp_wndw_mthd,
+				    disp, 1, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/dma/Kbuild
index c4a2ce9..e96d1f5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/Kbuild
@@ -3,9 +3,11 @@
 nvkm-y += nvkm/engine/dma/nv50.o
 nvkm-y += nvkm/engine/dma/gf100.o
 nvkm-y += nvkm/engine/dma/gf119.o
+nvkm-y += nvkm/engine/dma/gv100.o
 
 nvkm-y += nvkm/engine/dma/user.o
 nvkm-y += nvkm/engine/dma/usernv04.o
 nvkm-y += nvkm/engine/dma/usernv50.o
 nvkm-y += nvkm/engine/dma/usergf100.o
 nvkm-y += nvkm/engine/dma/usergf119.o
+nvkm-y += nvkm/engine/dma/usergv100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/gv100.c
new file mode 100644
index 0000000..c65a4c2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/gv100.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+#include "user.h"
+
+static const struct nvkm_dma_func
+gv100_dma = {
+	.class_new = gv100_dmaobj_new,
+};
+
+int
+gv100_dma_new(struct nvkm_device *device, int index, struct nvkm_dma **pdma)
+{
+	return nvkm_dma_new_(&gv100_dma, device, index, pdma);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.h b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.h
index 4bbac8a..9fe01fd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.h
@@ -16,4 +16,6 @@
 		     struct nvkm_dmaobj **);
 int gf119_dmaobj_new(struct nvkm_dma *, const struct nvkm_oclass *, void *, u32,
 		     struct nvkm_dmaobj **);
+int gv100_dmaobj_new(struct nvkm_dma *, const struct nvkm_oclass *, void *, u32,
+		     struct nvkm_dmaobj **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/usergv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/usergv100.c
new file mode 100644
index 0000000..39eba9f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/usergv100.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define gv100_dmaobj(p) container_of((p), struct gv100_dmaobj, base)
+#include "user.h"
+
+#include <core/client.h>
+#include <core/gpuobj.h>
+#include <subdev/fb.h>
+
+#include <nvif/cl0002.h>
+#include <nvif/unpack.h>
+
+struct gv100_dmaobj {
+	struct nvkm_dmaobj base;
+	u32 flags0;
+};
+
+static int
+gv100_dmaobj_bind(struct nvkm_dmaobj *base, struct nvkm_gpuobj *parent,
+		  int align, struct nvkm_gpuobj **pgpuobj)
+{
+	struct gv100_dmaobj *dmaobj = gv100_dmaobj(base);
+	struct nvkm_device *device = dmaobj->base.dma->engine.subdev.device;
+	u64 start = dmaobj->base.start >> 8;
+	u64 limit = dmaobj->base.limit >> 8;
+	int ret;
+
+	ret = nvkm_gpuobj_new(device, 24, align, false, parent, pgpuobj);
+	if (ret == 0) {
+		nvkm_kmap(*pgpuobj);
+		nvkm_wo32(*pgpuobj, 0x00, dmaobj->flags0);
+		nvkm_wo32(*pgpuobj, 0x04, lower_32_bits(start));
+		nvkm_wo32(*pgpuobj, 0x08, upper_32_bits(start));
+		nvkm_wo32(*pgpuobj, 0x0c, lower_32_bits(limit));
+		nvkm_wo32(*pgpuobj, 0x10, upper_32_bits(limit));
+		nvkm_done(*pgpuobj);
+	}
+
+	return ret;
+}
+
+static const struct nvkm_dmaobj_func
+gv100_dmaobj_func = {
+	.bind = gv100_dmaobj_bind,
+};
+
+int
+gv100_dmaobj_new(struct nvkm_dma *dma, const struct nvkm_oclass *oclass,
+		 void *data, u32 size, struct nvkm_dmaobj **pdmaobj)
+{
+	union {
+		struct gf119_dma_v0 v0;
+	} *args;
+	struct nvkm_object *parent = oclass->parent;
+	struct gv100_dmaobj *dmaobj;
+	u32 kind, page;
+	int ret;
+
+	if (!(dmaobj = kzalloc(sizeof(*dmaobj), GFP_KERNEL)))
+		return -ENOMEM;
+	*pdmaobj = &dmaobj->base;
+
+	ret = nvkm_dmaobj_ctor(&gv100_dmaobj_func, dma, oclass,
+			       &data, &size, &dmaobj->base);
+	if (ret)
+		return ret;
+
+	ret  = -ENOSYS;
+	args = data;
+
+	nvif_ioctl(parent, "create gv100 dma size %d\n", size);
+	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
+		nvif_ioctl(parent,
+			   "create gv100 dma vers %d page %d kind %02x\n",
+			   args->v0.version, args->v0.page, args->v0.kind);
+		kind = args->v0.kind != 0;
+		page = args->v0.page != 0;
+	} else
+	if (size == 0) {
+		kind = 0;
+		page = GF119_DMA_V0_PAGE_SP;
+	} else
+		return ret;
+
+	if (kind)
+		dmaobj->flags0 |= 0x00100000;
+	if (page)
+		dmaobj->flags0 |= 0x00000040;
+	dmaobj->flags0 |= 0x00000004; /* rw */
+
+	switch (dmaobj->base.target) {
+	case NV_MEM_TARGET_VRAM       : dmaobj->flags0 |= 0x00000001; break;
+	case NV_MEM_TARGET_PCI        : dmaobj->flags0 |= 0x00000002; break;
+	case NV_MEM_TARGET_PCI_NOSNOOP: dmaobj->flags0 |= 0x00000003; break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
index 64e5183..f004085 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
@@ -15,6 +15,7 @@
 nvkm-y += nvkm/engine/fifo/gm20b.o
 nvkm-y += nvkm/engine/fifo/gp100.o
 nvkm-y += nvkm/engine/fifo/gp10b.o
+nvkm-y += nvkm/engine/fifo/gv100.o
 
 nvkm-y += nvkm/engine/fifo/chan.o
 nvkm-y += nvkm/engine/fifo/channv50.o
@@ -31,6 +32,6 @@
 nvkm-y += nvkm/engine/fifo/gpfifog84.o
 nvkm-y += nvkm/engine/fifo/gpfifogf100.o
 nvkm-y += nvkm/engine/fifo/gpfifogk104.o
-nvkm-y += nvkm/engine/fifo/gpfifogk110.o
-nvkm-y += nvkm/engine/fifo/gpfifogm200.o
-nvkm-y += nvkm/engine/fifo/gpfifogp100.o
+nvkm-y += nvkm/engine/fifo/gpfifogv100.o
+
+nvkm-y += nvkm/engine/fifo/usergv100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
index 64f6b76..c773caf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
@@ -30,6 +30,7 @@
 #include <subdev/mc.h>
 
 #include <nvif/event.h>
+#include <nvif/cl0080.h>
 #include <nvif/unpack.h>
 
 void
@@ -56,6 +57,12 @@
 }
 
 void
+nvkm_fifo_fault(struct nvkm_fifo *fifo, struct nvkm_fault_data *info)
+{
+	return fifo->func->fault(fifo, info);
+}
+
+void
 nvkm_fifo_chan_put(struct nvkm_fifo *fifo, unsigned long flags,
 		   struct nvkm_fifo_chan **pchan)
 {
@@ -209,6 +216,20 @@
 }
 
 static int
+nvkm_fifo_class_new_(struct nvkm_device *device,
+		     const struct nvkm_oclass *oclass, void *data, u32 size,
+		     struct nvkm_object **pobject)
+{
+	struct nvkm_fifo *fifo = nvkm_fifo(oclass->engine);
+	return fifo->func->class_new(fifo, oclass, data, size, pobject);
+}
+
+static const struct nvkm_device_oclass
+nvkm_fifo_class_ = {
+	.ctor = nvkm_fifo_class_new_,
+};
+
+static int
 nvkm_fifo_class_new(struct nvkm_device *device,
 		    const struct nvkm_oclass *oclass, void *data, u32 size,
 		    struct nvkm_object **pobject)
@@ -232,13 +253,9 @@
 	int c = 0;
 
 	if (fifo->func->class_get) {
-		int ret = fifo->func->class_get(fifo, index, &sclass);
-		if (ret == 0) {
-			oclass->base = sclass->base;
-			oclass->engn = sclass;
-			*class = &nvkm_fifo_class;
-			return 0;
-		}
+		int ret = fifo->func->class_get(fifo, index, oclass);
+		if (ret == 0)
+			*class = &nvkm_fifo_class_;
 		return ret;
 	}
 
@@ -271,6 +288,20 @@
 }
 
 static int
+nvkm_fifo_info(struct nvkm_engine *engine, u64 mthd, u64 *data)
+{
+	struct nvkm_fifo *fifo = nvkm_fifo(engine);
+	switch (mthd) {
+	case NV_DEVICE_FIFO_CHANNELS: *data = fifo->nr; return 0;
+	default:
+		if (fifo->func->info)
+			return fifo->func->info(fifo, mthd, data);
+		break;
+	}
+	return -ENOSYS;
+}
+
+static int
 nvkm_fifo_oneinit(struct nvkm_engine *engine)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(engine);
@@ -311,6 +342,7 @@
 	.dtor = nvkm_fifo_dtor,
 	.preinit = nvkm_fifo_preinit,
 	.oneinit = nvkm_fifo_oneinit,
+	.info = nvkm_fifo_info,
 	.init = nvkm_fifo_init,
 	.fini = nvkm_fifo_fini,
 	.intr = nvkm_fifo_intr,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h
new file mode 100644
index 0000000..d0ac60b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h
@@ -0,0 +1,11 @@
+#ifndef __NVKM_FIFO_CGRP_H__
+#define __NVKM_FIFO_CGRP_H__
+#include "priv.h"
+
+struct nvkm_fifo_cgrp {
+	int id;
+	struct list_head head;
+	struct list_head chan;
+	int chan_nr;
+};
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
index 1208e3d..8e28ba6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
@@ -10,6 +10,7 @@
 	struct gk104_fifo *fifo;
 	int runl;
 
+	struct nvkm_fifo_cgrp *cgrp;
 	struct list_head head;
 	bool killed;
 
@@ -19,11 +20,20 @@
 	} engn[NVKM_SUBDEV_NR];
 };
 
-int gk104_fifo_gpfifo_new(struct nvkm_fifo *, const struct nvkm_oclass *,
-			  void *data, u32 size, struct nvkm_object **);
+extern const struct nvkm_fifo_chan_func gk104_fifo_gpfifo_func;
 
-extern const struct nvkm_fifo_chan_oclass gk104_fifo_gpfifo_oclass;
-extern const struct nvkm_fifo_chan_oclass gk110_fifo_gpfifo_oclass;
-extern const struct nvkm_fifo_chan_oclass gm200_fifo_gpfifo_oclass;
-extern const struct nvkm_fifo_chan_oclass gp100_fifo_gpfifo_oclass;
+int gk104_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *,
+			  void *data, u32 size, struct nvkm_object **);
+void *gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *);
+void gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *);
+void gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *);
+int gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *, struct nvkm_engine *,
+				  struct nvkm_object *);
+void gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *,
+				   struct nvkm_engine *);
+int gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *);
+int gk104_fifo_gpfifo_kick_locked(struct gk104_fifo_chan *);
+
+int gv100_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *,
+			  void *data, u32 size, struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 84bd703..a990464 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -22,16 +22,19 @@
  * Authors: Ben Skeggs
  */
 #include "gk104.h"
+#include "cgrp.h"
 #include "changk104.h"
 
 #include <core/client.h>
 #include <core/gpuobj.h>
 #include <subdev/bar.h>
+#include <subdev/fault.h>
 #include <subdev/timer.h>
 #include <subdev/top.h>
 #include <engine/sw.h>
 
 #include <nvif/class.h>
+#include <nvif/cl0080.h>
 
 struct gk104_fifo_engine_status {
 	bool busy;
@@ -93,15 +96,39 @@
 }
 
 static int
+gk104_fifo_class_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
+		     void *argv, u32 argc, struct nvkm_object **pobject)
+{
+	struct gk104_fifo *fifo = gk104_fifo(base);
+	if (oclass->engn == &fifo->func->chan) {
+		const struct gk104_fifo_chan_user *user = oclass->engn;
+		return user->ctor(fifo, oclass, argv, argc, pobject);
+	} else
+	if (oclass->engn == &fifo->func->user) {
+		const struct gk104_fifo_user_user *user = oclass->engn;
+		return user->ctor(oclass, argv, argc, pobject);
+	}
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+static int
 gk104_fifo_class_get(struct nvkm_fifo *base, int index,
-		     const struct nvkm_fifo_chan_oclass **psclass)
+		     struct nvkm_oclass *oclass)
 {
 	struct gk104_fifo *fifo = gk104_fifo(base);
 	int c = 0;
 
-	while ((*psclass = fifo->func->chan[c])) {
-		if (c++ == index)
-			return 0;
+	if (fifo->func->user.ctor && c++ == index) {
+		oclass->base =  fifo->func->user.user;
+		oclass->engn = &fifo->func->user;
+		return 0;
+	}
+
+	if (fifo->func->chan.ctor && c++ == index) {
+		oclass->base =  fifo->func->chan.user;
+		oclass->engn = &fifo->func->chan;
+		return 0;
 	}
 
 	return c;
@@ -124,10 +151,12 @@
 void
 gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
 {
+	const struct gk104_fifo_runlist_func *func = fifo->func->runlist;
 	struct gk104_fifo_chan *chan;
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_memory *mem;
+	struct nvkm_fifo_cgrp *cgrp;
 	int nr = 0;
 	int target;
 
@@ -137,9 +166,14 @@
 
 	nvkm_kmap(mem);
 	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
-		nvkm_wo32(mem, (nr * 8) + 0, chan->base.chid);
-		nvkm_wo32(mem, (nr * 8) + 4, 0x00000000);
-		nr++;
+		func->chan(chan, mem, nr++ * func->size);
+	}
+
+	list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) {
+		func->cgrp(cgrp, mem, nr++ * func->size);
+		list_for_each_entry(chan, &cgrp->chan, head) {
+			func->chan(chan, mem, nr++ * func->size);
+		}
 	}
 	nvkm_done(mem);
 
@@ -155,10 +189,10 @@
 				    (target << 28));
 	nvkm_wr32(device, 0x002274, (runl << 20) | nr);
 
-	if (wait_event_timeout(fifo->runlist[runl].wait,
-			       !(nvkm_rd32(device, 0x002284 + (runl * 0x08))
-				       & 0x00100000),
-			       msecs_to_jiffies(2000)) == 0)
+	if (nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x002284 + (runl * 0x08)) & 0x00100000))
+			break;
+	) < 0)
 		nvkm_error(subdev, "runlist %d update timeout\n", runl);
 unlock:
 	mutex_unlock(&subdev->mutex);
@@ -167,19 +201,45 @@
 void
 gk104_fifo_runlist_remove(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan)
 {
+	struct nvkm_fifo_cgrp *cgrp = chan->cgrp;
 	mutex_lock(&fifo->base.engine.subdev.mutex);
-	list_del_init(&chan->head);
+	if (!list_empty(&chan->head)) {
+		list_del_init(&chan->head);
+		if (cgrp && !--cgrp->chan_nr)
+			list_del_init(&cgrp->head);
+	}
 	mutex_unlock(&fifo->base.engine.subdev.mutex);
 }
 
 void
 gk104_fifo_runlist_insert(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan)
 {
+	struct nvkm_fifo_cgrp *cgrp = chan->cgrp;
 	mutex_lock(&fifo->base.engine.subdev.mutex);
-	list_add_tail(&chan->head, &fifo->runlist[chan->runl].chan);
+	if (cgrp) {
+		if (!cgrp->chan_nr++)
+			list_add_tail(&cgrp->head, &fifo->runlist[chan->runl].cgrp);
+		list_add_tail(&chan->head, &cgrp->chan);
+	} else {
+		list_add_tail(&chan->head, &fifo->runlist[chan->runl].chan);
+	}
 	mutex_unlock(&fifo->base.engine.subdev.mutex);
 }
 
+void
+gk104_fifo_runlist_chan(struct gk104_fifo_chan *chan,
+			struct nvkm_memory *memory, u32 offset)
+{
+	nvkm_wo32(memory, offset + 0, chan->base.chid);
+	nvkm_wo32(memory, offset + 4, 0x00000000);
+}
+
+const struct gk104_fifo_runlist_func
+gk104_fifo_runlist = {
+	.size = 8,
+	.chan = gk104_fifo_runlist_chan,
+};
+
 static void
 gk104_fifo_recover_work(struct work_struct *w)
 {
@@ -235,6 +295,32 @@
 	schedule_work(&fifo->recover.work);
 }
 
+static struct gk104_fifo_chan *
+gk104_fifo_recover_chid(struct gk104_fifo *fifo, int runl, int chid)
+{
+	struct gk104_fifo_chan *chan;
+	struct nvkm_fifo_cgrp *cgrp;
+
+	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
+		if (chan->base.chid == chid) {
+			list_del_init(&chan->head);
+			return chan;
+		}
+	}
+
+	list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) {
+		if (cgrp->id == chid) {
+			chan = list_first_entry(&cgrp->chan, typeof(*chan), head);
+			list_del_init(&chan->head);
+			if (!--cgrp->chan_nr)
+				list_del_init(&cgrp->head);
+			return chan;
+		}
+	}
+
+	return NULL;
+}
+
 static void
 gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid)
 {
@@ -252,13 +338,10 @@
 		return;
 
 	/* Lookup SW state for channel, and mark it as dead. */
-	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
-		if (chan->base.chid == chid) {
-			list_del_init(&chan->head);
-			chan->killed = true;
-			nvkm_fifo_kevent(&fifo->base, chid);
-			break;
-		}
+	chan = gk104_fifo_recover_chid(fifo, runl, chid);
+	if (chan) {
+		chan->killed = true;
+		nvkm_fifo_kevent(&fifo->base, chid);
 	}
 
 	/* Disable channel. */
@@ -347,6 +430,90 @@
 	schedule_work(&fifo->recover.work);
 }
 
+static void
+gk104_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info)
+{
+	struct gk104_fifo *fifo = gk104_fifo(base);
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	const struct nvkm_enum *er, *ee, *ec, *ea;
+	struct nvkm_engine *engine = NULL;
+	struct nvkm_fifo_chan *chan;
+	unsigned long flags;
+	char ct[8] = "HUB/", en[16] = "";
+	int engn;
+
+	er = nvkm_enum_find(fifo->func->fault.reason, info->reason);
+	ee = nvkm_enum_find(fifo->func->fault.engine, info->engine);
+	if (info->hub) {
+		ec = nvkm_enum_find(fifo->func->fault.hubclient, info->client);
+	} else {
+		ec = nvkm_enum_find(fifo->func->fault.gpcclient, info->client);
+		snprintf(ct, sizeof(ct), "GPC%d/", info->gpc);
+	}
+	ea = nvkm_enum_find(fifo->func->fault.access, info->access);
+
+	if (ee && ee->data2) {
+		switch (ee->data2) {
+		case NVKM_SUBDEV_BAR:
+			nvkm_mask(device, 0x001704, 0x00000000, 0x00000000);
+			break;
+		case NVKM_SUBDEV_INSTMEM:
+			nvkm_mask(device, 0x001714, 0x00000000, 0x00000000);
+			break;
+		case NVKM_ENGINE_IFB:
+			nvkm_mask(device, 0x001718, 0x00000000, 0x00000000);
+			break;
+		default:
+			engine = nvkm_device_engine(device, ee->data2);
+			break;
+		}
+	}
+
+	if (ee == NULL) {
+		enum nvkm_devidx engidx = nvkm_top_fault(device, info->engine);
+		if (engidx < NVKM_SUBDEV_NR) {
+			const char *src = nvkm_subdev_name[engidx];
+			char *dst = en;
+			do {
+				*dst++ = toupper(*src++);
+			} while(*src);
+			engine = nvkm_device_engine(device, engidx);
+		}
+	} else {
+		snprintf(en, sizeof(en), "%s", ee->name);
+	}
+
+	spin_lock_irqsave(&fifo->base.lock, flags);
+	chan = nvkm_fifo_chan_inst_locked(&fifo->base, info->inst);
+
+	nvkm_error(subdev,
+		   "fault %02x [%s] at %016llx engine %02x [%s] client %02x "
+		   "[%s%s] reason %02x [%s] on channel %d [%010llx %s]\n",
+		   info->access, ea ? ea->name : "", info->addr,
+		   info->engine, ee ? ee->name : en,
+		   info->client, ct, ec ? ec->name : "",
+		   info->reason, er ? er->name : "", chan ? chan->chid : -1,
+		   info->inst, chan ? chan->object.client->name : "unknown");
+
+	/* Kill the channel that caused the fault. */
+	if (chan)
+		gk104_fifo_recover_chan(&fifo->base, chan->chid);
+
+	/* Channel recovery will probably have already done this for the
+	 * correct engine(s), but just in case we can't find the channel
+	 * information...
+	 */
+	for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
+		if (fifo->engine[engn].engine == engine) {
+			gk104_fifo_recover_engn(fifo, engn);
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->base.lock, flags);
+}
+
 static const struct nvkm_enum
 gk104_fifo_bind_reason[] = {
 	{ 0x01, "BIND_NOT_UNBOUND" },
@@ -456,88 +623,21 @@
 	u32 inst = nvkm_rd32(device, 0x002800 + (unit * 0x10));
 	u32 valo = nvkm_rd32(device, 0x002804 + (unit * 0x10));
 	u32 vahi = nvkm_rd32(device, 0x002808 + (unit * 0x10));
-	u32 stat = nvkm_rd32(device, 0x00280c + (unit * 0x10));
-	u32 gpc    = (stat & 0x1f000000) >> 24;
-	u32 client = (stat & 0x00001f00) >> 8;
-	u32 write  = (stat & 0x00000080);
-	u32 hub    = (stat & 0x00000040);
-	u32 reason = (stat & 0x0000000f);
-	const struct nvkm_enum *er, *eu, *ec;
-	struct nvkm_engine *engine = NULL;
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	char gpcid[8] = "", en[16] = "";
-	int engn;
+	u32 type = nvkm_rd32(device, 0x00280c + (unit * 0x10));
+	struct nvkm_fault_data info;
 
-	er = nvkm_enum_find(fifo->func->fault.reason, reason);
-	eu = nvkm_enum_find(fifo->func->fault.engine, unit);
-	if (hub) {
-		ec = nvkm_enum_find(fifo->func->fault.hubclient, client);
-	} else {
-		ec = nvkm_enum_find(fifo->func->fault.gpcclient, client);
-		snprintf(gpcid, sizeof(gpcid), "GPC%d/", gpc);
-	}
+	info.inst   =  (u64)inst << 12;
+	info.addr   = ((u64)vahi << 32) | valo;
+	info.time   = 0;
+	info.engine = unit;
+	info.valid  = 1;
+	info.gpc    = (type & 0x1f000000) >> 24;
+	info.client = (type & 0x00001f00) >> 8;
+	info.access = (type & 0x00000080) >> 7;
+	info.hub    = (type & 0x00000040) >> 6;
+	info.reason = (type & 0x000000ff);
 
-	if (eu && eu->data2) {
-		switch (eu->data2) {
-		case NVKM_SUBDEV_BAR:
-			nvkm_mask(device, 0x001704, 0x00000000, 0x00000000);
-			break;
-		case NVKM_SUBDEV_INSTMEM:
-			nvkm_mask(device, 0x001714, 0x00000000, 0x00000000);
-			break;
-		case NVKM_ENGINE_IFB:
-			nvkm_mask(device, 0x001718, 0x00000000, 0x00000000);
-			break;
-		default:
-			engine = nvkm_device_engine(device, eu->data2);
-			break;
-		}
-	}
-
-	if (eu == NULL) {
-		enum nvkm_devidx engidx = nvkm_top_fault(device, unit);
-		if (engidx < NVKM_SUBDEV_NR) {
-			const char *src = nvkm_subdev_name[engidx];
-			char *dst = en;
-			do {
-				*dst++ = toupper(*src++);
-			} while(*src);
-			engine = nvkm_device_engine(device, engidx);
-		}
-	} else {
-		snprintf(en, sizeof(en), "%s", eu->name);
-	}
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	chan = nvkm_fifo_chan_inst_locked(&fifo->base, (u64)inst << 12);
-
-	nvkm_error(subdev,
-		   "%s fault at %010llx engine %02x [%s] client %02x [%s%s] "
-		   "reason %02x [%s] on channel %d [%010llx %s]\n",
-		   write ? "write" : "read", (u64)vahi << 32 | valo,
-		   unit, en, client, gpcid, ec ? ec->name : "",
-		   reason, er ? er->name : "", chan ? chan->chid : -1,
-		   (u64)inst << 12,
-		   chan ? chan->object.client->name : "unknown");
-
-
-	/* Kill the channel that caused the fault. */
-	if (chan)
-		gk104_fifo_recover_chan(&fifo->base, chan->chid);
-
-	/* Channel recovery will probably have already done this for the
-	 * correct engine(s), but just in case we can't find the channel
-	 * information...
-	 */
-	for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
-		if (fifo->engine[engn].engine == engine) {
-			gk104_fifo_recover_engn(fifo, engn);
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
+	nvkm_fifo_fault(&fifo->base, &info);
 }
 
 static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = {
@@ -766,6 +866,34 @@
 }
 
 static int
+gk104_fifo_info(struct nvkm_fifo *base, u64 mthd, u64 *data)
+{
+	struct gk104_fifo *fifo = gk104_fifo(base);
+	switch (mthd) {
+	case NV_DEVICE_FIFO_RUNLISTS:
+		*data = (1ULL << fifo->runlist_nr) - 1;
+		return 0;
+	case NV_DEVICE_FIFO_RUNLIST_ENGINES(0)...
+	     NV_DEVICE_FIFO_RUNLIST_ENGINES(63): {
+		int runl = mthd - NV_DEVICE_FIFO_RUNLIST_ENGINES(0), engn;
+		if (runl < fifo->runlist_nr) {
+			unsigned long engm = fifo->runlist[runl].engm;
+			struct nvkm_engine *engine;
+			*data = 0;
+			for_each_set_bit(engn, &engm, fifo->engine_nr) {
+				if ((engine = fifo->engine[engn].engine))
+					*data |= BIT_ULL(engine->subdev.index);
+			}
+			return 0;
+		}
+	}
+		return -EINVAL;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int
 gk104_fifo_oneinit(struct nvkm_fifo *base)
 {
 	struct gk104_fifo *fifo = gk104_fifo(base);
@@ -813,19 +941,18 @@
 	kfree(map);
 
 	for (i = 0; i < fifo->runlist_nr; i++) {
-		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
-				      0x8000, 0x1000, false,
-				      &fifo->runlist[i].mem[0]);
-		if (ret)
-			return ret;
-
-		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
-				      0x8000, 0x1000, false,
-				      &fifo->runlist[i].mem[1]);
-		if (ret)
-			return ret;
+		for (j = 0; j < ARRAY_SIZE(fifo->runlist[i].mem); j++) {
+			ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
+					      fifo->base.nr * 2/* TSG+chan */ *
+					      fifo->func->runlist->size,
+					      0x1000, false,
+					      &fifo->runlist[i].mem[j]);
+			if (ret)
+				return ret;
+		}
 
 		init_waitqueue_head(&fifo->runlist[i].wait);
+		INIT_LIST_HEAD(&fifo->runlist[i].cgrp);
 		INIT_LIST_HEAD(&fifo->runlist[i].chan);
 	}
 
@@ -868,6 +995,9 @@
 
 	nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12);
 
+	if (fifo->func->init_pbdma_timeout)
+		fifo->func->init_pbdma_timeout(fifo);
+
 	nvkm_wr32(device, 0x002100, 0xffffffff);
 	nvkm_wr32(device, 0x002140, 0x7fffffff);
 }
@@ -894,13 +1024,16 @@
 gk104_fifo_ = {
 	.dtor = gk104_fifo_dtor,
 	.oneinit = gk104_fifo_oneinit,
+	.info = gk104_fifo_info,
 	.init = gk104_fifo_init,
 	.fini = gk104_fifo_fini,
 	.intr = gk104_fifo_intr,
+	.fault = gk104_fifo_fault,
 	.uevent_init = gk104_fifo_uevent_init,
 	.uevent_fini = gk104_fifo_uevent_fini,
 	.recover_chan = gk104_fifo_recover_chan,
 	.class_get = gk104_fifo_class_get,
+	.class_new = gk104_fifo_class_new,
 };
 
 int
@@ -919,6 +1052,13 @@
 }
 
 const struct nvkm_enum
+gk104_fifo_fault_access[] = {
+	{ 0x0, "READ" },
+	{ 0x1, "WRITE" },
+	{}
+};
+
+const struct nvkm_enum
 gk104_fifo_fault_engine[] = {
 	{ 0x00, "GR", NULL, NVKM_ENGINE_GR },
 	{ 0x01, "DISPLAY" },
@@ -1035,14 +1175,13 @@
 
 static const struct gk104_fifo_func
 gk104_fifo = {
+	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gk104_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
 	.fault.hubclient = gk104_fifo_fault_hubclient,
 	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.chan = {
-		&gk104_fifo_gpfifo_oclass,
-		NULL
-	},
+	.runlist = &gk104_fifo_runlist,
+	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
index 1579785..d295b81 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
@@ -3,6 +3,7 @@
 #define __GK104_FIFO_H__
 #define gk104_fifo(p) container_of((p), struct gk104_fifo, base)
 #include "priv.h"
+struct nvkm_fifo_cgrp;
 
 #include <core/enum.h>
 #include <subdev/mmu.h>
@@ -31,6 +32,7 @@
 		struct nvkm_memory *mem[2];
 		int next;
 		wait_queue_head_t wait;
+		struct list_head cgrp;
 		struct list_head chan;
 		u32 engm;
 	} runlist[16];
@@ -43,14 +45,36 @@
 };
 
 struct gk104_fifo_func {
+	void (*init_pbdma_timeout)(struct gk104_fifo *);
+
 	struct {
+		const struct nvkm_enum *access;
 		const struct nvkm_enum *engine;
 		const struct nvkm_enum *reason;
 		const struct nvkm_enum *hubclient;
 		const struct nvkm_enum *gpcclient;
 	} fault;
 
-	const struct nvkm_fifo_chan_oclass *chan[];
+	const struct gk104_fifo_runlist_func {
+		u8 size;
+		void (*cgrp)(struct nvkm_fifo_cgrp *,
+			     struct nvkm_memory *, u32 offset);
+		void (*chan)(struct gk104_fifo_chan *,
+			     struct nvkm_memory *, u32 offset);
+	} *runlist;
+
+	struct gk104_fifo_user_user {
+		struct nvkm_sclass user;
+		int (*ctor)(const struct nvkm_oclass *, void *, u32,
+			    struct nvkm_object **);
+	} user;
+
+	struct gk104_fifo_chan_user {
+		struct nvkm_sclass user;
+		int (*ctor)(struct gk104_fifo *, const struct nvkm_oclass *,
+			    void *, u32, struct nvkm_object **);
+	} chan;
+	bool cgrp_force;
 };
 
 int gk104_fifo_new_(const struct gk104_fifo_func *, struct nvkm_device *,
@@ -59,30 +83,23 @@
 void gk104_fifo_runlist_remove(struct gk104_fifo *, struct gk104_fifo_chan *);
 void gk104_fifo_runlist_commit(struct gk104_fifo *, int runl);
 
-static inline u64
-gk104_fifo_engine_subdev(int engine)
-{
-	switch (engine) {
-	case 0: return (1ULL << NVKM_ENGINE_GR) |
-		       (1ULL << NVKM_ENGINE_SW) |
-		       (1ULL << NVKM_ENGINE_CE2);
-	case 1: return (1ULL << NVKM_ENGINE_MSPDEC);
-	case 2: return (1ULL << NVKM_ENGINE_MSPPP);
-	case 3: return (1ULL << NVKM_ENGINE_MSVLD);
-	case 4: return (1ULL << NVKM_ENGINE_CE0);
-	case 5: return (1ULL << NVKM_ENGINE_CE1);
-	case 6: return (1ULL << NVKM_ENGINE_MSENC);
-	default:
-		WARN_ON(1);
-		return 0;
-	}
-}
-
+extern const struct nvkm_enum gk104_fifo_fault_access[];
 extern const struct nvkm_enum gk104_fifo_fault_engine[];
 extern const struct nvkm_enum gk104_fifo_fault_reason[];
 extern const struct nvkm_enum gk104_fifo_fault_hubclient[];
 extern const struct nvkm_enum gk104_fifo_fault_gpcclient[];
+extern const struct gk104_fifo_runlist_func gk104_fifo_runlist;
+void gk104_fifo_runlist_chan(struct gk104_fifo_chan *,
+			     struct nvkm_memory *, u32);
+
+extern const struct gk104_fifo_runlist_func gk110_fifo_runlist;
+void gk110_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *,
+			     struct nvkm_memory *, u32);
+
+void gk208_fifo_init_pbdma_timeout(struct gk104_fifo *);
 
 extern const struct nvkm_enum gm107_fifo_fault_engine[];
+extern const struct gk104_fifo_runlist_func gm107_fifo_runlist;
+
 extern const struct nvkm_enum gp100_fifo_fault_engine[];
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
index b2f8ab7..ac7655a1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
@@ -22,18 +22,38 @@
  * Authors: Ben Skeggs
  */
 #include "gk104.h"
+#include "cgrp.h"
 #include "changk104.h"
 
+#include <core/memory.h>
+
+#include <nvif/class.h>
+
+void
+gk110_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *cgrp,
+			struct nvkm_memory *memory, u32 offset)
+{
+	nvkm_wo32(memory, offset + 0, (cgrp->chan_nr << 26) | (128 << 18) |
+				      (3 << 14) | 0x00002000 | cgrp->id);
+	nvkm_wo32(memory, offset + 4, 0x00000000);
+}
+
+const struct gk104_fifo_runlist_func
+gk110_fifo_runlist = {
+	.size = 8,
+	.cgrp = gk110_fifo_runlist_cgrp,
+	.chan = gk104_fifo_runlist_chan,
+};
+
 static const struct gk104_fifo_func
 gk110_fifo = {
+	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gk104_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
 	.fault.hubclient = gk104_fifo_fault_hubclient,
 	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.chan = {
-		&gk110_fifo_gpfifo_oclass,
-		NULL
-	},
+	.runlist = &gk110_fifo_runlist,
+	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_B}, gk104_fifo_gpfifo_new },
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c
index 160617d..5ea7e45 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c
@@ -24,16 +24,28 @@
 #include "gk104.h"
 #include "changk104.h"
 
+#include <nvif/class.h>
+
+void
+gk208_fifo_init_pbdma_timeout(struct gk104_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	int i;
+
+	for (i = 0; i < fifo->pbdma_nr; i++)
+		nvkm_wr32(device, 0x04012c + (i * 0x2000), 0x0000ffff);
+}
+
 static const struct gk104_fifo_func
 gk208_fifo = {
+	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gk104_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
 	.fault.hubclient = gk104_fifo_fault_hubclient,
 	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.chan = {
-		&gk104_fifo_gpfifo_oclass,
-		NULL
-	},
+	.runlist = &gk110_fifo_runlist,
+	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c
index be9f5c1..535a0eb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c
@@ -22,16 +22,18 @@
 #include "gk104.h"
 #include "changk104.h"
 
+#include <nvif/class.h>
+
 static const struct gk104_fifo_func
 gk20a_fifo = {
+	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gk104_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
 	.fault.hubclient = gk104_fifo_fault_hubclient,
 	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.chan = {
-		&gk104_fifo_gpfifo_oclass,
-		NULL
-	},
+	.runlist = &gk110_fifo_runlist,
+	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
index 29c0806..79ae19b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
@@ -24,6 +24,25 @@
 #include "gk104.h"
 #include "changk104.h"
 
+#include <core/gpuobj.h>
+
+#include <nvif/class.h>
+
+static void
+gm107_fifo_runlist_chan(struct gk104_fifo_chan *chan,
+			struct nvkm_memory *memory, u32 offset)
+{
+	nvkm_wo32(memory, offset + 0, chan->base.chid);
+	nvkm_wo32(memory, offset + 4, chan->base.inst->addr >> 12);
+}
+
+const struct gk104_fifo_runlist_func
+gm107_fifo_runlist = {
+	.size = 8,
+	.cgrp = gk110_fifo_runlist_cgrp,
+	.chan = gm107_fifo_runlist_chan,
+};
+
 const struct nvkm_enum
 gm107_fifo_fault_engine[] = {
 	{ 0x01, "DISPLAY" },
@@ -49,14 +68,14 @@
 
 static const struct gk104_fifo_func
 gm107_fifo = {
+	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gm107_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
 	.fault.hubclient = gk104_fifo_fault_hubclient,
 	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.chan = {
-		&gk110_fifo_gpfifo_oclass,
-		NULL
-	},
+	.runlist = &gm107_fifo_runlist,
+	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_B}, gk104_fifo_gpfifo_new },
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
index b069f78..49565fa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
@@ -24,16 +24,18 @@
 #include "gk104.h"
 #include "changk104.h"
 
+#include <nvif/class.h>
+
 static const struct gk104_fifo_func
 gm200_fifo = {
+	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gm107_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
 	.fault.hubclient = gk104_fifo_fault_hubclient,
 	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.chan = {
-		&gm200_fifo_gpfifo_oclass,
-		NULL
-	},
+	.runlist = &gm107_fifo_runlist,
+	.chan = {{0,0,MAXWELL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
index 2ed87c2..46736513 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
@@ -22,16 +22,18 @@
 #include "gk104.h"
 #include "changk104.h"
 
+#include <nvif/class.h>
+
 static const struct gk104_fifo_func
 gm20b_fifo = {
+	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gm107_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
 	.fault.hubclient = gk104_fifo_fault_hubclient,
 	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.chan = {
-		&gm200_fifo_gpfifo_oclass,
-		NULL
-	},
+	.runlist = &gm107_fifo_runlist,
+	.chan = {{0,0,MAXWELL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
index 41f16cf..e2f8f90 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
@@ -24,6 +24,8 @@
 #include "gk104.h"
 #include "changk104.h"
 
+#include <nvif/class.h>
+
 const struct nvkm_enum
 gp100_fifo_fault_engine[] = {
 	{ 0x01, "DISPLAY" },
@@ -50,14 +52,15 @@
 
 static const struct gk104_fifo_func
 gp100_fifo = {
+	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gp100_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
 	.fault.hubclient = gk104_fifo_fault_hubclient,
 	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.chan = {
-		&gp100_fifo_gpfifo_oclass,
-		NULL
-	},
+	.runlist = &gm107_fifo_runlist,
+	.chan = {{0,0,PASCAL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
+	.cgrp_force = true,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c
index 4af96c3..7733bf7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c
@@ -22,16 +22,19 @@
 #include "gk104.h"
 #include "changk104.h"
 
+#include <nvif/class.h>
+
 static const struct gk104_fifo_func
 gp10b_fifo = {
+	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gp100_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
 	.fault.hubclient = gk104_fifo_fault_hubclient,
 	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.chan = {
-		&gp100_fifo_gpfifo_oclass,
-		NULL
-	},
+	.runlist = &gm107_fifo_runlist,
+	.chan = {{0,0,PASCAL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
+	.cgrp_force = true,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
index 80c8752..118b37a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
@@ -22,6 +22,7 @@
  * Authors: Ben Skeggs
  */
 #include "changk104.h"
+#include "cgrp.h"
 
 #include <core/client.h>
 #include <core/gpuobj.h>
@@ -33,27 +34,40 @@
 #include <nvif/cla06f.h>
 #include <nvif/unpack.h>
 
-static int
-gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan)
+int
+gk104_fifo_gpfifo_kick_locked(struct gk104_fifo_chan *chan)
 {
 	struct gk104_fifo *fifo = chan->fifo;
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_client *client = chan->base.object.client;
+	struct nvkm_fifo_cgrp *cgrp = chan->cgrp;
 	int ret = 0;
 
-	mutex_lock(&subdev->mutex);
-	nvkm_wr32(device, 0x002634, chan->base.chid);
+	if (cgrp)
+		nvkm_wr32(device, 0x002634, cgrp->id | 0x01000000);
+	else
+		nvkm_wr32(device, 0x002634, chan->base.chid);
 	if (nvkm_msec(device, 2000,
 		if (!(nvkm_rd32(device, 0x002634) & 0x00100000))
 			break;
 	) < 0) {
-		nvkm_error(subdev, "channel %d [%s] kick timeout\n",
-			   chan->base.chid, client->name);
+		nvkm_error(subdev, "%s %d [%s] kick timeout\n",
+			   cgrp ? "tsg" : "channel",
+			   cgrp ? cgrp->id : chan->base.chid, client->name);
 		nvkm_fifo_recover_chan(&fifo->base, chan->base.chid);
 		ret = -ETIMEDOUT;
 	}
-	mutex_unlock(&subdev->mutex);
+	return ret;
+}
+
+int
+gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan)
+{
+	int ret;
+	mutex_lock(&chan->base.fifo->engine.subdev.mutex);
+	ret = gk104_fifo_gpfifo_kick_locked(chan);
+	mutex_unlock(&chan->base.fifo->engine.subdev.mutex);
 	return ret;
 }
 
@@ -62,9 +76,8 @@
 {
 	switch (engine->subdev.index) {
 	case NVKM_ENGINE_SW    :
-	case NVKM_ENGINE_CE0   :
-	case NVKM_ENGINE_CE1   :
-	case NVKM_ENGINE_CE2   : return 0x0000;
+	case NVKM_ENGINE_CE0...NVKM_ENGINE_CE_LAST:
+		return 0;
 	case NVKM_ENGINE_GR    : return 0x0210;
 	case NVKM_ENGINE_SEC   : return 0x0220;
 	case NVKM_ENGINE_MSPDEC: return 0x0250;
@@ -133,7 +146,7 @@
 	return 0;
 }
 
-static void
+void
 gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine)
 {
@@ -142,7 +155,7 @@
 	nvkm_gpuobj_del(&chan->engn[engine->subdev.index].inst);
 }
 
-static int
+int
 gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine,
 			      struct nvkm_object *object)
@@ -167,7 +180,7 @@
 			       chan->engn[engn].vma, NULL, 0);
 }
 
-static void
+void
 gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
 {
 	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
@@ -185,7 +198,7 @@
 	nvkm_wr32(device, 0x800000 + coff, 0x00000000);
 }
 
-static void
+void
 gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
 {
 	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
@@ -205,13 +218,15 @@
 	}
 }
 
-static void *
+void *
 gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base)
 {
-	return gk104_fifo_chan(base);
+	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
+	kfree(chan->cgrp);
+	return chan;
 }
 
-static const struct nvkm_fifo_chan_func
+const struct nvkm_fifo_chan_func
 gk104_fifo_gpfifo_func = {
 	.dtor = gk104_fifo_gpfifo_dtor,
 	.init = gk104_fifo_gpfifo_init,
@@ -223,62 +238,30 @@
 	.engine_fini = gk104_fifo_gpfifo_engine_fini,
 };
 
-struct gk104_fifo_chan_func {
-	u32 engine;
-	u64 subdev;
-};
-
 static int
-gk104_fifo_gpfifo_new_(const struct gk104_fifo_chan_func *func,
-		       struct gk104_fifo *fifo, u32 *engmask, u16 *chid,
+gk104_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
 		       u64 vmm, u64 ioffset, u64 ilength,
 		       const struct nvkm_oclass *oclass,
 		       struct nvkm_object **pobject)
 {
 	struct gk104_fifo_chan *chan;
-	int runlist = -1, ret = -ENOSYS, i, j;
-	u32 engines = 0, present = 0;
+	int runlist = ffs(*runlists) -1, ret, i;
+	unsigned long engm;
 	u64 subdevs = 0;
 	u64 usermem;
 
-	if (!vmm)
+	if (!vmm || runlist < 0 || runlist >= fifo->runlist_nr)
 		return -EINVAL;
+	*runlists = BIT_ULL(runlist);
 
-	/* Determine which downstream engines are present */
-	for (i = 0; i < fifo->engine_nr; i++) {
-		struct nvkm_engine *engine = fifo->engine[i].engine;
-		if (engine) {
-			u64 submask = BIT_ULL(engine->subdev.index);
-			for (j = 0; func[j].subdev; j++) {
-				if (func[j].subdev & submask) {
-					present |= func[j].engine;
-					break;
-				}
-			}
-
-			if (!func[j].subdev)
-				continue;
-
-			if (runlist < 0 && (*engmask & present))
-				runlist = fifo->engine[i].runl;
-			if (runlist == fifo->engine[i].runl) {
-				engines |= func[j].engine;
-				subdevs |= func[j].subdev;
-			}
-		}
+	engm = fifo->runlist[runlist].engm;
+	for_each_set_bit(i, &engm, fifo->engine_nr) {
+		if (fifo->engine[i].engine)
+			subdevs |= BIT_ULL(fifo->engine[i].engine->subdev.index);
 	}
 
-	/* Just an engine mask query?  All done here! */
-	if (!*engmask) {
-		*engmask = present;
-		return nvkm_object_new(oclass, NULL, 0, pobject);
-	}
-
-	/* No runlist?  No supported engines. */
-	*engmask = present;
-	if (runlist < 0)
-		return -ENODEV;
-	*engmask = engines;
+	if (subdevs & BIT_ULL(NVKM_ENGINE_GR))
+		subdevs |= BIT_ULL(NVKM_ENGINE_SW);
 
 	/* Allocate the channel. */
 	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
@@ -297,6 +280,18 @@
 
 	*chid = chan->base.chid;
 
+	/* Hack to support GPUs where even individual channels should be
+	 * part of a channel group.
+	 */
+	if (fifo->func->cgrp_force) {
+		if (!(chan->cgrp = kmalloc(sizeof(*chan->cgrp), GFP_KERNEL)))
+			return -ENOMEM;
+		chan->cgrp->id = chan->base.chid;
+		INIT_LIST_HEAD(&chan->cgrp->head);
+		INIT_LIST_HEAD(&chan->cgrp->chan);
+		chan->cgrp->chan_nr = 0;
+	}
+
 	/* Clear channel control registers. */
 	usermem = chan->base.chid * 0x200;
 	ilength = order_base_2(ilength / 8);
@@ -328,45 +323,25 @@
 	return 0;
 }
 
-static const struct gk104_fifo_chan_func
-gk104_fifo_gpfifo[] = {
-	{ NVA06F_V0_ENGINE_SW | NVA06F_V0_ENGINE_GR,
-		BIT_ULL(NVKM_ENGINE_SW) | BIT_ULL(NVKM_ENGINE_GR)
-	},
-	{ NVA06F_V0_ENGINE_SEC   , BIT_ULL(NVKM_ENGINE_SEC   ) },
-	{ NVA06F_V0_ENGINE_MSVLD , BIT_ULL(NVKM_ENGINE_MSVLD ) },
-	{ NVA06F_V0_ENGINE_MSPDEC, BIT_ULL(NVKM_ENGINE_MSPDEC) },
-	{ NVA06F_V0_ENGINE_MSPPP , BIT_ULL(NVKM_ENGINE_MSPPP ) },
-	{ NVA06F_V0_ENGINE_MSENC , BIT_ULL(NVKM_ENGINE_MSENC ) },
-	{ NVA06F_V0_ENGINE_VIC   , BIT_ULL(NVKM_ENGINE_VIC   ) },
-	{ NVA06F_V0_ENGINE_NVDEC , BIT_ULL(NVKM_ENGINE_NVDEC ) },
-	{ NVA06F_V0_ENGINE_NVENC0, BIT_ULL(NVKM_ENGINE_NVENC0) },
-	{ NVA06F_V0_ENGINE_NVENC1, BIT_ULL(NVKM_ENGINE_NVENC1) },
-	{ NVA06F_V0_ENGINE_CE0   , BIT_ULL(NVKM_ENGINE_CE0   ) },
-	{ NVA06F_V0_ENGINE_CE1   , BIT_ULL(NVKM_ENGINE_CE1   ) },
-	{ NVA06F_V0_ENGINE_CE2   , BIT_ULL(NVKM_ENGINE_CE2   ) },
-	{}
-};
-
 int
-gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
+gk104_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
 		      void *data, u32 size, struct nvkm_object **pobject)
 {
 	struct nvkm_object *parent = oclass->parent;
 	union {
 		struct kepler_channel_gpfifo_a_v0 v0;
 	} *args = data;
-	struct gk104_fifo *fifo = gk104_fifo(base);
 	int ret = -ENOSYS;
 
 	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
 		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "ioffset %016llx ilength %08x engine %08x\n",
+				   "ioffset %016llx ilength %08x "
+				   "runlist %016llx\n",
 			   args->v0.version, args->v0.vmm, args->v0.ioffset,
-			   args->v0.ilength, args->v0.engines);
-		return gk104_fifo_gpfifo_new_(gk104_fifo_gpfifo, fifo,
-					      &args->v0.engines,
+			   args->v0.ilength, args->v0.runlist);
+		return gk104_fifo_gpfifo_new_(fifo,
+					      &args->v0.runlist,
 					      &args->v0.chid,
 					       args->v0.vmm,
 					       args->v0.ioffset,
@@ -376,11 +351,3 @@
 
 	return ret;
 }
-
-const struct nvkm_fifo_chan_oclass
-gk104_fifo_gpfifo_oclass = {
-	.base.oclass = KEPLER_CHANNEL_GPFIFO_A,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = gk104_fifo_gpfifo_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk110.c
deleted file mode 100644
index a9aa69c..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk110.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2016 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "changk104.h"
-
-#include <nvif/class.h>
-
-const struct nvkm_fifo_chan_oclass
-gk110_fifo_gpfifo_oclass = {
-	.base.oclass = KEPLER_CHANNEL_GPFIFO_B,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = gk104_fifo_gpfifo_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogm200.c
deleted file mode 100644
index a133151..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogm200.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2015 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "changk104.h"
-
-#include <nvif/class.h>
-
-const struct nvkm_fifo_chan_oclass
-gm200_fifo_gpfifo_oclass = {
-	.base.oclass = MAXWELL_CHANNEL_GPFIFO_A,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = gk104_fifo_gpfifo_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c
deleted file mode 100644
index 1530a92..0000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2016 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "changk104.h"
-
-#include <nvif/class.h>
-
-const struct nvkm_fifo_chan_oclass
-gp100_fifo_gpfifo_oclass = {
-	.base.oclass = PASCAL_CHANNEL_GPFIFO_A,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = gk104_fifo_gpfifo_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c
new file mode 100644
index 0000000..9598853
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "changk104.h"
+#include "cgrp.h"
+
+#include <core/client.h>
+#include <core/gpuobj.h>
+
+#include <nvif/cla06f.h>
+#include <nvif/unpack.h>
+
+static int
+gv100_fifo_gpfifo_engine_valid(struct gk104_fifo_chan *chan, bool ce, bool valid)
+{
+	struct nvkm_subdev *subdev = &chan->base.fifo->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	const u32 mask = ce ? 0x00020000 : 0x00010000;
+	const u32 data = valid ? mask : 0x00000000;
+	int ret;
+
+	/* Block runlist to prevent the channel from being rescheduled. */
+	mutex_lock(&subdev->mutex);
+	nvkm_mask(device, 0x002630, BIT(chan->runl), BIT(chan->runl));
+
+	/* Preempt the channel. */
+	ret = gk104_fifo_gpfifo_kick_locked(chan);
+	if (ret == 0) {
+		/* Update engine context validity. */
+		nvkm_kmap(chan->base.inst);
+		nvkm_mo32(chan->base.inst, 0x0ac, mask, data);
+		nvkm_done(chan->base.inst);
+	}
+
+	/* Resume runlist. */
+	nvkm_mask(device, 0x002630, BIT(chan->runl), 0);
+	mutex_unlock(&subdev->mutex);
+	return ret;
+}
+
+static int
+gv100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
+			      struct nvkm_engine *engine, bool suspend)
+{
+	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
+	struct nvkm_gpuobj *inst = chan->base.inst;
+	int ret;
+
+	if (engine->subdev.index >= NVKM_ENGINE_CE0 &&
+	    engine->subdev.index <= NVKM_ENGINE_CE_LAST)
+		return gk104_fifo_gpfifo_kick(chan);
+
+	ret = gv100_fifo_gpfifo_engine_valid(chan, false, false);
+	if (ret && suspend)
+		return ret;
+
+	nvkm_kmap(inst);
+	nvkm_wo32(inst, 0x0210, 0x00000000);
+	nvkm_wo32(inst, 0x0214, 0x00000000);
+	nvkm_done(inst);
+	return ret;
+}
+
+static int
+gv100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
+			      struct nvkm_engine *engine)
+{
+	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
+	struct nvkm_gpuobj *inst = chan->base.inst;
+	u64 addr;
+
+	if (engine->subdev.index >= NVKM_ENGINE_CE0 &&
+	    engine->subdev.index <= NVKM_ENGINE_CE_LAST)
+		return 0;
+
+	addr = chan->engn[engine->subdev.index].vma->addr;
+	nvkm_kmap(inst);
+	nvkm_wo32(inst, 0x210, lower_32_bits(addr) | 0x00000004);
+	nvkm_wo32(inst, 0x214, upper_32_bits(addr));
+	nvkm_done(inst);
+
+	return gv100_fifo_gpfifo_engine_valid(chan, false, true);
+}
+
+const struct nvkm_fifo_chan_func
+gv100_fifo_gpfifo_func = {
+	.dtor = gk104_fifo_gpfifo_dtor,
+	.init = gk104_fifo_gpfifo_init,
+	.fini = gk104_fifo_gpfifo_fini,
+	.ntfy = gf100_fifo_chan_ntfy,
+	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
+	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
+	.engine_init = gv100_fifo_gpfifo_engine_init,
+	.engine_fini = gv100_fifo_gpfifo_engine_fini,
+};
+
+static int
+gv100_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
+		       u64 vmm, u64 ioffset, u64 ilength,
+		       const struct nvkm_oclass *oclass,
+		       struct nvkm_object **pobject)
+{
+	struct gk104_fifo_chan *chan;
+	int runlist = ffs(*runlists) -1, ret, i;
+	unsigned long engm;
+	u64 subdevs = 0;
+	u64 usermem;
+
+	if (!vmm || runlist < 0 || runlist >= fifo->runlist_nr)
+		return -EINVAL;
+	*runlists = BIT_ULL(runlist);
+
+	engm = fifo->runlist[runlist].engm;
+	for_each_set_bit(i, &engm, fifo->engine_nr) {
+		if (fifo->engine[i].engine)
+			subdevs |= BIT_ULL(fifo->engine[i].engine->subdev.index);
+	}
+
+	/* Allocate the channel. */
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	*pobject = &chan->base.object;
+	chan->fifo = fifo;
+	chan->runl = runlist;
+	INIT_LIST_HEAD(&chan->head);
+
+	ret = nvkm_fifo_chan_ctor(&gv100_fifo_gpfifo_func, &fifo->base,
+				  0x1000, 0x1000, true, vmm, 0, subdevs,
+				  1, fifo->user.bar->addr, 0x200,
+				  oclass, &chan->base);
+	if (ret)
+		return ret;
+
+	*chid = chan->base.chid;
+
+	/* Hack to support GPUs where even individual channels should be
+	 * part of a channel group.
+	 */
+	if (fifo->func->cgrp_force) {
+		if (!(chan->cgrp = kmalloc(sizeof(*chan->cgrp), GFP_KERNEL)))
+			return -ENOMEM;
+		chan->cgrp->id = chan->base.chid;
+		INIT_LIST_HEAD(&chan->cgrp->head);
+		INIT_LIST_HEAD(&chan->cgrp->chan);
+		chan->cgrp->chan_nr = 0;
+	}
+
+	/* Clear channel control registers. */
+	usermem = chan->base.chid * 0x200;
+	ilength = order_base_2(ilength / 8);
+
+	nvkm_kmap(fifo->user.mem);
+	for (i = 0; i < 0x200; i += 4)
+		nvkm_wo32(fifo->user.mem, usermem + i, 0x00000000);
+	nvkm_done(fifo->user.mem);
+	usermem = nvkm_memory_addr(fifo->user.mem) + usermem;
+
+	/* RAMFC */
+	nvkm_kmap(chan->base.inst);
+	nvkm_wo32(chan->base.inst, 0x008, lower_32_bits(usermem));
+	nvkm_wo32(chan->base.inst, 0x00c, upper_32_bits(usermem));
+	nvkm_wo32(chan->base.inst, 0x010, 0x0000face);
+	nvkm_wo32(chan->base.inst, 0x030, 0x7ffff902);
+	nvkm_wo32(chan->base.inst, 0x048, lower_32_bits(ioffset));
+	nvkm_wo32(chan->base.inst, 0x04c, upper_32_bits(ioffset) |
+					  (ilength << 16));
+	nvkm_wo32(chan->base.inst, 0x084, 0x20400000);
+	nvkm_wo32(chan->base.inst, 0x094, 0x30000001);
+	nvkm_wo32(chan->base.inst, 0x0e4, 0x00000020);
+	nvkm_wo32(chan->base.inst, 0x0e8, chan->base.chid);
+	nvkm_wo32(chan->base.inst, 0x0f4, 0x00001100);
+	nvkm_wo32(chan->base.inst, 0x0f8, 0x10003080);
+	nvkm_mo32(chan->base.inst, 0x218, 0x00000000, 0x00000000);
+	nvkm_wo32(chan->base.inst, 0x220, 0x020a1000);
+	nvkm_wo32(chan->base.inst, 0x224, 0x00000000);
+	nvkm_done(chan->base.inst);
+	return gv100_fifo_gpfifo_engine_valid(chan, true, true);
+}
+
+int
+gv100_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
+		      void *data, u32 size, struct nvkm_object **pobject)
+{
+	struct nvkm_object *parent = oclass->parent;
+	union {
+		struct kepler_channel_gpfifo_a_v0 v0;
+	} *args = data;
+	int ret = -ENOSYS;
+
+	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
+	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
+		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
+				   "ioffset %016llx ilength %08x "
+				   "runlist %016llx\n",
+			   args->v0.version, args->v0.vmm, args->v0.ioffset,
+			   args->v0.ilength, args->v0.runlist);
+		return gv100_fifo_gpfifo_new_(fifo,
+					      &args->v0.runlist,
+					      &args->v0.chid,
+					       args->v0.vmm,
+					       args->v0.ioffset,
+					       args->v0.ilength,
+					      oclass, pobject);
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c
new file mode 100644
index 0000000..4e1d159
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gk104.h"
+#include "cgrp.h"
+#include "changk104.h"
+#include "user.h"
+
+#include <core/gpuobj.h>
+
+#include <nvif/class.h>
+
+static void
+gv100_fifo_runlist_chan(struct gk104_fifo_chan *chan,
+			struct nvkm_memory *memory, u32 offset)
+{
+	struct nvkm_memory *usermem = chan->fifo->user.mem;
+	const u64 user = nvkm_memory_addr(usermem) + (chan->base.chid * 0x200);
+	const u64 inst = chan->base.inst->addr;
+
+	nvkm_wo32(memory, offset + 0x0, lower_32_bits(user));
+	nvkm_wo32(memory, offset + 0x4, upper_32_bits(user));
+	nvkm_wo32(memory, offset + 0x8, lower_32_bits(inst) | chan->base.chid);
+	nvkm_wo32(memory, offset + 0xc, upper_32_bits(inst));
+}
+
+static void
+gv100_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *cgrp,
+			struct nvkm_memory *memory, u32 offset)
+{
+	nvkm_wo32(memory, offset + 0x0, (128 << 24) | (3 << 16) | 0x00000001);
+	nvkm_wo32(memory, offset + 0x4, cgrp->chan_nr);
+	nvkm_wo32(memory, offset + 0x8, cgrp->id);
+	nvkm_wo32(memory, offset + 0xc, 0x00000000);
+}
+
+const struct gk104_fifo_runlist_func
+gv100_fifo_runlist = {
+	.size = 16,
+	.cgrp = gv100_fifo_runlist_cgrp,
+	.chan = gv100_fifo_runlist_chan,
+};
+
+static const struct nvkm_enum
+gv100_fifo_fault_gpcclient[] = {
+	{ 0x00, "T1_0" },
+	{ 0x01, "T1_1" },
+	{ 0x02, "T1_2" },
+	{ 0x03, "T1_3" },
+	{ 0x04, "T1_4" },
+	{ 0x05, "T1_5" },
+	{ 0x06, "T1_6" },
+	{ 0x07, "T1_7" },
+	{ 0x08, "PE_0" },
+	{ 0x09, "PE_1" },
+	{ 0x0a, "PE_2" },
+	{ 0x0b, "PE_3" },
+	{ 0x0c, "PE_4" },
+	{ 0x0d, "PE_5" },
+	{ 0x0e, "PE_6" },
+	{ 0x0f, "PE_7" },
+	{ 0x10, "RAST" },
+	{ 0x11, "GCC" },
+	{ 0x12, "GPCCS" },
+	{ 0x13, "PROP_0" },
+	{ 0x14, "PROP_1" },
+	{ 0x15, "PROP_2" },
+	{ 0x16, "PROP_3" },
+	{ 0x17, "GPM" },
+	{ 0x18, "LTP_UTLB_0" },
+	{ 0x19, "LTP_UTLB_1" },
+	{ 0x1a, "LTP_UTLB_2" },
+	{ 0x1b, "LTP_UTLB_3" },
+	{ 0x1c, "LTP_UTLB_4" },
+	{ 0x1d, "LTP_UTLB_5" },
+	{ 0x1e, "LTP_UTLB_6" },
+	{ 0x1f, "LTP_UTLB_7" },
+	{ 0x20, "RGG_UTLB" },
+	{ 0x21, "T1_8" },
+	{ 0x22, "T1_9" },
+	{ 0x23, "T1_10" },
+	{ 0x24, "T1_11" },
+	{ 0x25, "T1_12" },
+	{ 0x26, "T1_13" },
+	{ 0x27, "T1_14" },
+	{ 0x28, "T1_15" },
+	{ 0x29, "TPCCS_0" },
+	{ 0x2a, "TPCCS_1" },
+	{ 0x2b, "TPCCS_2" },
+	{ 0x2c, "TPCCS_3" },
+	{ 0x2d, "TPCCS_4" },
+	{ 0x2e, "TPCCS_5" },
+	{ 0x2f, "TPCCS_6" },
+	{ 0x30, "TPCCS_7" },
+	{ 0x31, "PE_8" },
+	{ 0x32, "PE_9" },
+	{ 0x33, "TPCCS_8" },
+	{ 0x34, "TPCCS_9" },
+	{ 0x35, "T1_16" },
+	{ 0x36, "T1_17" },
+	{ 0x37, "T1_18" },
+	{ 0x38, "T1_19" },
+	{ 0x39, "PE_10" },
+	{ 0x3a, "PE_11" },
+	{ 0x3b, "TPCCS_10" },
+	{ 0x3c, "TPCCS_11" },
+	{ 0x3d, "T1_20" },
+	{ 0x3e, "T1_21" },
+	{ 0x3f, "T1_22" },
+	{ 0x40, "T1_23" },
+	{ 0x41, "PE_12" },
+	{ 0x42, "PE_13" },
+	{ 0x43, "TPCCS_12" },
+	{ 0x44, "TPCCS_13" },
+	{ 0x45, "T1_24" },
+	{ 0x46, "T1_25" },
+	{ 0x47, "T1_26" },
+	{ 0x48, "T1_27" },
+	{ 0x49, "PE_14" },
+	{ 0x4a, "PE_15" },
+	{ 0x4b, "TPCCS_14" },
+	{ 0x4c, "TPCCS_15" },
+	{ 0x4d, "T1_28" },
+	{ 0x4e, "T1_29" },
+	{ 0x4f, "T1_30" },
+	{ 0x50, "T1_31" },
+	{ 0x51, "PE_16" },
+	{ 0x52, "PE_17" },
+	{ 0x53, "TPCCS_16" },
+	{ 0x54, "TPCCS_17" },
+	{ 0x55, "T1_32" },
+	{ 0x56, "T1_33" },
+	{ 0x57, "T1_34" },
+	{ 0x58, "T1_35" },
+	{ 0x59, "PE_18" },
+	{ 0x5a, "PE_19" },
+	{ 0x5b, "TPCCS_18" },
+	{ 0x5c, "TPCCS_19" },
+	{ 0x5d, "T1_36" },
+	{ 0x5e, "T1_37" },
+	{ 0x5f, "T1_38" },
+	{ 0x60, "T1_39" },
+	{}
+};
+
+static const struct nvkm_enum
+gv100_fifo_fault_hubclient[] = {
+	{ 0x00, "VIP" },
+	{ 0x01, "CE0" },
+	{ 0x02, "CE1" },
+	{ 0x03, "DNISO" },
+	{ 0x04, "FE" },
+	{ 0x05, "FECS" },
+	{ 0x06, "HOST" },
+	{ 0x07, "HOST_CPU" },
+	{ 0x08, "HOST_CPU_NB" },
+	{ 0x09, "ISO" },
+	{ 0x0a, "MMU" },
+	{ 0x0b, "NVDEC" },
+	{ 0x0d, "NVENC1" },
+	{ 0x0e, "NISO" },
+	{ 0x0f, "P2P" },
+	{ 0x10, "PD" },
+	{ 0x11, "PERF" },
+	{ 0x12, "PMU" },
+	{ 0x13, "RASTERTWOD" },
+	{ 0x14, "SCC" },
+	{ 0x15, "SCC_NB" },
+	{ 0x16, "SEC" },
+	{ 0x17, "SSYNC" },
+	{ 0x18, "CE2" },
+	{ 0x19, "XV" },
+	{ 0x1a, "MMU_NB" },
+	{ 0x1b, "NVENC0" },
+	{ 0x1c, "DFALCON" },
+	{ 0x1d, "SKED" },
+	{ 0x1e, "AFALCON" },
+	{ 0x1f, "DONT_CARE" },
+	{ 0x20, "HSCE0" },
+	{ 0x21, "HSCE1" },
+	{ 0x22, "HSCE2" },
+	{ 0x23, "HSCE3" },
+	{ 0x24, "HSCE4" },
+	{ 0x25, "HSCE5" },
+	{ 0x26, "HSCE6" },
+	{ 0x27, "HSCE7" },
+	{ 0x28, "HSCE8" },
+	{ 0x29, "HSCE9" },
+	{ 0x2a, "HSHUB" },
+	{ 0x2b, "PTP_X0" },
+	{ 0x2c, "PTP_X1" },
+	{ 0x2d, "PTP_X2" },
+	{ 0x2e, "PTP_X3" },
+	{ 0x2f, "PTP_X4" },
+	{ 0x30, "PTP_X5" },
+	{ 0x31, "PTP_X6" },
+	{ 0x32, "PTP_X7" },
+	{ 0x33, "NVENC2" },
+	{ 0x34, "VPR_SCRUBBER0" },
+	{ 0x35, "VPR_SCRUBBER1" },
+	{ 0x36, "DWBIF" },
+	{ 0x37, "FBFALCON" },
+	{ 0x38, "CE_SHIM" },
+	{ 0x39, "GSP" },
+	{}
+};
+
+static const struct nvkm_enum
+gv100_fifo_fault_reason[] = {
+	{ 0x00, "PDE" },
+	{ 0x01, "PDE_SIZE" },
+	{ 0x02, "PTE" },
+	{ 0x03, "VA_LIMIT_VIOLATION" },
+	{ 0x04, "UNBOUND_INST_BLOCK" },
+	{ 0x05, "PRIV_VIOLATION" },
+	{ 0x06, "RO_VIOLATION" },
+	{ 0x07, "WO_VIOLATION" },
+	{ 0x08, "PITCH_MASK_VIOLATION" },
+	{ 0x09, "WORK_CREATION" },
+	{ 0x0a, "UNSUPPORTED_APERTURE" },
+	{ 0x0b, "COMPRESSION_FAILURE" },
+	{ 0x0c, "UNSUPPORTED_KIND" },
+	{ 0x0d, "REGION_VIOLATION" },
+	{ 0x0e, "POISONED" },
+	{ 0x0f, "ATOMIC_VIOLATION" },
+	{}
+};
+
+static const struct nvkm_enum
+gv100_fifo_fault_engine[] = {
+	{ 0x01, "DISPLAY" },
+	{ 0x03, "PTP" },
+	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
+	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
+	{ 0x06, "PWR_PMU" },
+	{ 0x08, "IFB", NULL, NVKM_ENGINE_IFB },
+	{ 0x09, "PERF" },
+	{ 0x1f, "PHYSICAL" },
+	{ 0x20, "HOST0" },
+	{ 0x21, "HOST1" },
+	{ 0x22, "HOST2" },
+	{ 0x23, "HOST3" },
+	{ 0x24, "HOST4" },
+	{ 0x25, "HOST5" },
+	{ 0x26, "HOST6" },
+	{ 0x27, "HOST7" },
+	{ 0x28, "HOST8" },
+	{ 0x29, "HOST9" },
+	{ 0x2a, "HOST10" },
+	{ 0x2b, "HOST11" },
+	{ 0x2c, "HOST12" },
+	{ 0x2d, "HOST13" },
+	{}
+};
+
+static const struct nvkm_enum
+gv100_fifo_fault_access[] = {
+	{ 0x0, "VIRT_READ" },
+	{ 0x1, "VIRT_WRITE" },
+	{ 0x2, "VIRT_ATOMIC" },
+	{ 0x3, "VIRT_PREFETCH" },
+	{ 0x4, "VIRT_ATOMIC_WEAK" },
+	{ 0x8, "PHYS_READ" },
+	{ 0x9, "PHYS_WRITE" },
+	{ 0xa, "PHYS_ATOMIC" },
+	{ 0xb, "PHYS_PREFETCH" },
+	{}
+};
+
+static const struct gk104_fifo_func
+gv100_fifo = {
+	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.fault.access = gv100_fifo_fault_access,
+	.fault.engine = gv100_fifo_fault_engine,
+	.fault.reason = gv100_fifo_fault_reason,
+	.fault.hubclient = gv100_fifo_fault_hubclient,
+	.fault.gpcclient = gv100_fifo_fault_gpcclient,
+	.runlist = &gv100_fifo_runlist,
+	.user = {{-1,-1,VOLTA_USERMODE_A      }, gv100_fifo_user_new   },
+	.chan = {{ 0, 0,VOLTA_CHANNEL_GPFIFO_A}, gv100_fifo_gpfifo_new },
+	.cgrp_force = true,
+};
+
+int
+gv100_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
+{
+	return gk104_fifo_new_(&gv100_fifo, device, index, 4096, pfifo);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
index ae76b1a..d5acbba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
@@ -18,16 +18,19 @@
 struct nvkm_fifo_func {
 	void *(*dtor)(struct nvkm_fifo *);
 	int (*oneinit)(struct nvkm_fifo *);
+	int (*info)(struct nvkm_fifo *, u64 mthd, u64 *data);
 	void (*init)(struct nvkm_fifo *);
 	void (*fini)(struct nvkm_fifo *);
 	void (*intr)(struct nvkm_fifo *);
+	void (*fault)(struct nvkm_fifo *, struct nvkm_fault_data *);
 	void (*pause)(struct nvkm_fifo *, unsigned long *);
 	void (*start)(struct nvkm_fifo *, unsigned long *);
 	void (*uevent_init)(struct nvkm_fifo *);
 	void (*uevent_fini)(struct nvkm_fifo *);
 	void (*recover_chan)(struct nvkm_fifo *, int chid);
-	int (*class_get)(struct nvkm_fifo *, int index,
-			 const struct nvkm_fifo_chan_oclass **);
+	int (*class_get)(struct nvkm_fifo *, int index, struct nvkm_oclass *);
+	int (*class_new)(struct nvkm_fifo *, const struct nvkm_oclass *,
+			 void *, u32, struct nvkm_object **);
 	const struct nvkm_fifo_chan_oclass *chan[];
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h
new file mode 100644
index 0000000..ed84092
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h
@@ -0,0 +1,6 @@
+#ifndef __NVKM_FIFO_USER_H__
+#define __NVKM_FIFO_USER_H__
+#include "priv.h"
+int gv100_fifo_user_new(const struct nvkm_oclass *, void *, u32,
+			struct nvkm_object **);
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usergv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usergv100.c
new file mode 100644
index 0000000..3dc3b8b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usergv100.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "user.h"
+
+static int
+gv100_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc,
+		    enum nvkm_object_map *type, u64 *addr, u64 *size)
+{
+	struct nvkm_device *device = object->engine->subdev.device;
+	*addr = 0x810000 + device->func->resource_addr(device, 0);
+	*size = 0x010000;
+	*type = NVKM_OBJECT_MAP_IO;
+	return 0;
+}
+
+static const struct nvkm_object_func
+gv100_fifo_user = {
+	.map = gv100_fifo_user_map,
+};
+
+int
+gv100_fifo_user_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nvkm_object **pobject)
+{
+	return nvkm_object_new_(&gv100_fifo_user, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
index 8a22558..93e3733 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
@@ -33,8 +33,10 @@
 nvkm-y += nvkm/engine/gr/gm20b.o
 nvkm-y += nvkm/engine/gr/gp100.o
 nvkm-y += nvkm/engine/gr/gp102.o
+nvkm-y += nvkm/engine/gr/gp104.o
 nvkm-y += nvkm/engine/gr/gp107.o
 nvkm-y += nvkm/engine/gr/gp10b.o
+nvkm-y += nvkm/engine/gr/gv100.o
 
 nvkm-y += nvkm/engine/gr/ctxnv40.o
 nvkm-y += nvkm/engine/gr/ctxnv50.o
@@ -54,4 +56,6 @@
 nvkm-y += nvkm/engine/gr/ctxgm20b.o
 nvkm-y += nvkm/engine/gr/ctxgp100.o
 nvkm-y += nvkm/engine/gr/ctxgp102.o
+nvkm-y += nvkm/engine/gr/ctxgp104.o
 nvkm-y += nvkm/engine/gr/ctxgp107.o
+nvkm-y += nvkm/engine/gr/ctxgv100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index 8810150..e813a3f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -850,12 +850,17 @@
 };
 
 const struct gf100_gr_pack
-gf100_grctx_pack_gpc[] = {
+gf100_grctx_pack_gpc_0[] = {
 	{ gf100_grctx_init_gpc_unk_0 },
 	{ gf100_grctx_init_prop_0 },
 	{ gf100_grctx_init_gpc_unk_1 },
 	{ gf100_grctx_init_setup_0 },
 	{ gf100_grctx_init_zcull_0 },
+	{}
+};
+
+const struct gf100_gr_pack
+gf100_grctx_pack_gpc_1[] = {
 	{ gf100_grctx_init_crstr_0 },
 	{ gf100_grctx_init_gpm_0 },
 	{ gf100_grctx_init_gcc_0 },
@@ -1025,6 +1030,13 @@
 }
 
 void
+gf100_grctx_generate_r419cb8(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x419cb8, 0x00007c00, 0x00000000);
+}
+
+void
 gf100_grctx_generate_bundle(struct gf100_grctx *info)
 {
 	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
@@ -1080,89 +1092,38 @@
 }
 
 void
-gf100_grctx_generate_tpcid(struct gf100_gr *gr)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int gpc, tpc, id;
-
-	for (tpc = 0, id = 0; tpc < 4; tpc++) {
-		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-			if (tpc < gr->tpc_nr[gpc]) {
-				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
-				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), id);
-				nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
-				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
-				id++;
-			}
-
-			nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
-			nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
-		}
-	}
-}
-
-void
-gf100_grctx_generate_r406028(struct gf100_gr *gr)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	u32 tmp[GPC_MAX / 8] = {}, i = 0;
-	for (i = 0; i < gr->gpc_nr; i++)
-		tmp[i / 8] |= gr->tpc_nr[i] << ((i % 8) * 4);
-	for (i = 0; i < 4; i++) {
-		nvkm_wr32(device, 0x406028 + (i * 4), tmp[i]);
-		nvkm_wr32(device, 0x405870 + (i * 4), tmp[i]);
-	}
-}
-
-void
 gf100_grctx_generate_r4060a8(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	u8  tpcnr[GPC_MAX], data[TPC_MAX];
-	int gpc, tpc, i;
+	const u8 gpcmax = nvkm_rd32(device, 0x022430);
+	const u8 tpcmax = nvkm_rd32(device, 0x022434) * gpcmax;
+	int i, j, sm = 0;
+	u32 data;
 
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-	memset(data, 0x1f, sizeof(data));
-
-	gpc = -1;
-	for (tpc = 0; tpc < gr->tpc_total; tpc++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while (!tpcnr[gpc]);
-		tpcnr[gpc]--;
-		data[tpc] = gpc;
+	for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++) {
+		for (data = 0, j = 0; j < 4; j++) {
+			if (sm < gr->sm_nr)
+				data |= gr->sm[sm++].gpc << (j * 8);
+			else
+				data |= 0x1f << (j * 8);
+		}
+		nvkm_wr32(device, 0x4060a8 + (i * 4), data);
 	}
-
-	for (i = 0; i < 4; i++)
-		nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
 }
 
 void
-gf100_grctx_generate_r418bb8(struct gf100_gr *gr)
+gf100_grctx_generate_rop_mapping(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 data[6] = {}, data2[2] = {};
-	u8  tpcnr[GPC_MAX];
 	u8  shift, ntpcv;
-	int gpc, tpc, i;
+	int i;
 
-	/* calculate first set of magics */
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+	/* Pack tile map into register format. */
+	for (i = 0; i < 32; i++)
+		data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
 
-	gpc = -1;
-	for (tpc = 0; tpc < gr->tpc_total; tpc++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while (!tpcnr[gpc]);
-		tpcnr[gpc]--;
-
-		data[tpc / 6] |= gpc << ((tpc % 6) * 5);
-	}
-
-	for (; tpc < 32; tpc++)
-		data[tpc / 6] |= 7 << ((tpc % 6) * 5);
-
-	/* and the second... */
+	/* Magic. */
 	shift = 0;
 	ntpcv = gr->tpc_total;
 	while (!(ntpcv & (1 << 4))) {
@@ -1197,40 +1158,214 @@
 }
 
 void
-gf100_grctx_generate_r406800(struct gf100_gr *gr)
+gf100_grctx_generate_max_ways_evict(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	u64 tpc_mask = 0, tpc_set = 0;
-	u8  tpcnr[GPC_MAX];
-	int gpc, tpc;
-	int i, a, b;
+	u32 fbps = nvkm_rd32(device, 0x121c74);
+	if (fbps == 1)
+		nvkm_mask(device, 0x17e91c, 0x001f0000, 0x00090000);
+}
 
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
-		tpc_mask |= ((1ULL << gr->tpc_nr[gpc]) - 1) << (gpc * 8);
+static const u32
+gf100_grctx_alpha_beta_map[17][32] = {
+	[1] = {
+		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	},
+	[2] = {
+		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	},
+	//XXX: 3
+	[4] = {
+		1, 1, 1, 1, 1, 1, 1, 1,
+		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+		3, 3, 3, 3, 3, 3, 3, 3,
+	},
+	//XXX: 5
+	//XXX: 6
+	[7] = {
+		1, 1, 1, 1,
+		2, 2, 2, 2, 2, 2,
+		3, 3, 3, 3, 3, 3,
+		4, 4, 4, 4, 4, 4,
+		5, 5, 5, 5, 5, 5,
+		6, 6, 6, 6,
+	},
+	[8] = {
+		1, 1, 1,
+		2, 2, 2, 2, 2,
+		3, 3, 3, 3, 3,
+		4, 4, 4, 4, 4, 4,
+		5, 5, 5, 5, 5,
+		6, 6, 6, 6, 6,
+		7, 7, 7,
+	},
+	//XXX: 9
+	//XXX: 10
+	[11] = {
+		1, 1,
+		2, 2, 2, 2,
+		3, 3, 3,
+		4, 4, 4, 4,
+		5, 5, 5,
+		6, 6, 6,
+		7, 7, 7, 7,
+		8, 8, 8,
+		9, 9, 9, 9,
+		10, 10,
+	},
+	//XXX: 12
+	//XXX: 13
+	[14] = {
+		1, 1,
+		2, 2,
+		3, 3, 3,
+		4, 4, 4,
+		5, 5,
+		6, 6, 6,
+		7, 7,
+		8, 8, 8,
+		9, 9,
+		10, 10, 10,
+		11, 11, 11,
+		12, 12,
+		13, 13,
+	},
+	[15] = {
+		1, 1,
+		2, 2,
+		3, 3,
+		4, 4, 4,
+		5, 5,
+		6, 6, 6,
+		7, 7,
+		8, 8,
+		9, 9, 9,
+		10, 10,
+		11, 11, 11,
+		12, 12,
+		13, 13,
+		14, 14,
+	},
+	[16] = {
+		1, 1,
+		2, 2,
+		3, 3,
+		4, 4,
+		5, 5,
+		6, 6, 6,
+		7, 7,
+		8, 8,
+		9, 9,
+		10, 10, 10,
+		11, 11,
+		12, 12,
+		13, 13,
+		14, 14,
+		15, 15,
+	},
+};
 
-	for (i = 0, gpc = -1, b = -1; i < 32; i++) {
-		a = (i * (gr->tpc_total - 1)) / 32;
-		if (a != b) {
-			b = a;
-			do {
-				gpc = (gpc + 1) % gr->gpc_nr;
-			} while (!tpcnr[gpc]);
-			tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
+void
+gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *gr)
+{
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	int i, gpc;
 
-			tpc_set |= 1ULL << ((gpc * 8) + tpc);
+	for (i = 0; i < 32; i++) {
+		u32 atarget = gf100_grctx_alpha_beta_map[gr->tpc_total][i];
+		u32 abits[GPC_MAX] = {}, amask = 0, bmask = 0;
+
+		if (!atarget) {
+			nvkm_warn(subdev, "missing alpha/beta mapping table\n");
+			atarget = max_t(u32, gr->tpc_total * i / 32, 1);
 		}
 
-		nvkm_wr32(device, 0x406800 + (i * 0x20), lower_32_bits(tpc_set));
-		nvkm_wr32(device, 0x406c00 + (i * 0x20), lower_32_bits(tpc_set ^ tpc_mask));
-		if (gr->gpc_nr > 4) {
-			nvkm_wr32(device, 0x406804 + (i * 0x20), upper_32_bits(tpc_set));
-			nvkm_wr32(device, 0x406c04 + (i * 0x20), upper_32_bits(tpc_set ^ tpc_mask));
+		while (atarget) {
+			for (gpc = 0; atarget && gpc < gr->gpc_nr; gpc++) {
+				if (abits[gpc] < gr->tpc_nr[gpc]) {
+					abits[gpc]++;
+					atarget--;
+				}
+			}
 		}
+
+		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+			u32 bbits = gr->tpc_nr[gpc] - abits[gpc];
+			amask |= ((1 << abits[gpc]) - 1) << (gpc * 8);
+			bmask |= ((1 << bbits) - 1) << abits[gpc] << (gpc * 8);
+		}
+
+		nvkm_wr32(device, 0x406800 + (i * 0x20), amask);
+		nvkm_wr32(device, 0x406c00 + (i * 0x20), bmask);
 	}
 }
 
 void
+gf100_grctx_generate_tpc_nr(struct gf100_gr *gr, int gpc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
+	nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
+}
+
+void
+gf100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), sm);
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), sm);
+	nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm);
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm);
+}
+
+void
+gf100_grctx_generate_floorsweep(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const struct gf100_grctx_func *func = gr->func->grctx;
+	int gpc, sm, i, j;
+	u32 data;
+
+	for (sm = 0; sm < gr->sm_nr; sm++) {
+		func->sm_id(gr, gr->sm[sm].gpc, gr->sm[sm].tpc, sm);
+		if (func->tpc_nr)
+			func->tpc_nr(gr, gr->sm[sm].gpc);
+	}
+
+	for (gpc = 0, i = 0; i < 4; i++) {
+		for (data = 0, j = 0; j < 8 && gpc < gr->gpc_nr; j++, gpc++)
+			data |= gr->tpc_nr[gpc] << (j * 4);
+		nvkm_wr32(device, 0x406028 + (i * 4), data);
+		nvkm_wr32(device, 0x405870 + (i * 4), data);
+	}
+
+	if (func->r4060a8)
+		func->r4060a8(gr);
+
+	func->rop_mapping(gr);
+
+	if (func->alpha_beta_tables)
+		func->alpha_beta_tables(gr);
+	if (func->max_ways_evict)
+		func->max_ways_evict(gr);
+	if (func->dist_skip_table)
+		func->dist_skip_table(gr);
+	if (func->r406500)
+		func->r406500(gr);
+	if (func->gpc_tpc_nr)
+		func->gpc_tpc_nr(gr);
+	if (func->r419f78)
+		func->r419f78(gr);
+	if (func->tpc_mask)
+		func->tpc_mask(gr);
+	if (func->smid_config)
+		func->smid_config(gr);
+}
+
+void
 gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -1239,29 +1374,63 @@
 
 	nvkm_mc_unk260(device, 0);
 
-	gf100_gr_mmio(gr, grctx->hub);
-	gf100_gr_mmio(gr, grctx->gpc);
-	gf100_gr_mmio(gr, grctx->zcull);
-	gf100_gr_mmio(gr, grctx->tpc);
-	gf100_gr_mmio(gr, grctx->ppc);
+	if (!gr->fuc_sw_ctx) {
+		gf100_gr_mmio(gr, grctx->hub);
+		gf100_gr_mmio(gr, grctx->gpc_0);
+		gf100_gr_mmio(gr, grctx->zcull);
+		gf100_gr_mmio(gr, grctx->gpc_1);
+		gf100_gr_mmio(gr, grctx->tpc);
+		gf100_gr_mmio(gr, grctx->ppc);
+	} else {
+		gf100_gr_mmio(gr, gr->fuc_sw_ctx);
+	}
+
+	gf100_gr_wait_idle(gr);
 
 	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
 
-	grctx->bundle(info);
 	grctx->pagepool(info);
+	grctx->bundle(info);
 	grctx->attrib(info);
+	if (grctx->patch_ltc)
+		grctx->patch_ltc(info);
 	grctx->unkn(gr);
 
-	gf100_grctx_generate_tpcid(gr);
-	gf100_grctx_generate_r406028(gr);
-	gf100_grctx_generate_r4060a8(gr);
-	gf100_grctx_generate_r418bb8(gr);
-	gf100_grctx_generate_r406800(gr);
+	gf100_grctx_generate_floorsweep(gr);
 
-	gf100_gr_icmd(gr, grctx->icmd);
+	gf100_gr_wait_idle(gr);
+
+	if (grctx->r400088) grctx->r400088(gr, false);
+	if (gr->fuc_bundle)
+		gf100_gr_icmd(gr, gr->fuc_bundle);
+	else
+		gf100_gr_icmd(gr, grctx->icmd);
+	if (grctx->sw_veid_bundle_init)
+		gf100_gr_icmd(gr, grctx->sw_veid_bundle_init);
+	if (grctx->r400088) grctx->r400088(gr, true);
+
 	nvkm_wr32(device, 0x404154, idle_timeout);
-	gf100_gr_mthd(gr, grctx->mthd);
+
+	if (gr->fuc_method)
+		gf100_gr_mthd(gr, gr->fuc_method);
+	else
+		gf100_gr_mthd(gr, grctx->mthd);
 	nvkm_mc_unk260(device, 1);
+
+	if (grctx->r419cb8)
+		grctx->r419cb8(gr);
+	if (grctx->r418800)
+		grctx->r418800(gr);
+	if (grctx->r419eb0)
+		grctx->r419eb0(gr);
+	if (grctx->r419e00)
+		grctx->r419e00(gr);
+	if (grctx->r418e94)
+		grctx->r418e94(gr);
+	if (grctx->r419a3c)
+		grctx->r419a3c(gr);
+	if (grctx->r408840)
+		grctx->r408840(gr);
 }
 
 #define CB_RESERVED 0x80000
@@ -1280,6 +1449,32 @@
 	int ret, i;
 	u64 addr;
 
+	/* NV_PGRAPH_FE_PWR_MODE_FORCE_ON. */
+	nvkm_wr32(device, 0x404170, 0x00000012);
+	nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x404170) & 0x00000010))
+			break;
+	);
+
+	if (grctx->unkn88c)
+		grctx->unkn88c(gr, true);
+
+	/* Reset FECS. */
+	nvkm_wr32(device, 0x409614, 0x00000070);
+	nvkm_usec(device, 10, NVKM_DELAY);
+	nvkm_mask(device, 0x409614, 0x00000700, 0x00000700);
+	nvkm_usec(device, 10, NVKM_DELAY);
+	nvkm_rd32(device, 0x409614);
+
+	if (grctx->unkn88c)
+		grctx->unkn88c(gr, false);
+
+	/* NV_PGRAPH_FE_PWR_MODE_AUTO. */
+	nvkm_wr32(device, 0x404170, 0x00000010);
+
+	/* Init SCC RAM. */
+	nvkm_wr32(device, 0x40802c, 0x00000001);
+
 	/* Allocate memory to for a "channel", which we'll use to generate
 	 * the default context values.
 	 */
@@ -1392,7 +1587,8 @@
 	.main  = gf100_grctx_generate_main,
 	.unkn  = gf100_grctx_generate_unkn,
 	.hub   = gf100_grctx_pack_hub,
-	.gpc   = gf100_grctx_pack_gpc,
+	.gpc_0 = gf100_grctx_pack_gpc_0,
+	.gpc_1 = gf100_grctx_pack_gpc_1,
 	.zcull = gf100_grctx_pack_zcull,
 	.tpc   = gf100_grctx_pack_tpc,
 	.icmd  = gf100_grctx_pack_icmd,
@@ -1404,4 +1600,11 @@
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
+	.sm_id = gf100_grctx_generate_sm_id,
+	.tpc_nr = gf100_grctx_generate_tpc_nr,
+	.r4060a8 = gf100_grctx_generate_r4060a8,
+	.rop_mapping = gf100_grctx_generate_rop_mapping,
+	.alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+	.max_ways_evict = gf100_grctx_generate_max_ways_evict,
+	.r419cb8 = gf100_grctx_generate_r419cb8,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index 5199e5a..33e932b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -21,19 +21,22 @@
 #define mmio_wr32(a,b,c) mmio_refn((a), (b), (c),  0, -1)
 
 struct gf100_grctx_func {
+	void (*unkn88c)(struct gf100_gr *, bool on);
 	/* main context generation function */
 	void  (*main)(struct gf100_gr *, struct gf100_grctx *);
 	/* context-specific modify-on-first-load list generation function */
 	void  (*unkn)(struct gf100_gr *);
 	/* mmio context data */
 	const struct gf100_gr_pack *hub;
-	const struct gf100_gr_pack *gpc;
+	const struct gf100_gr_pack *gpc_0;
+	const struct gf100_gr_pack *gpc_1;
 	const struct gf100_gr_pack *zcull;
 	const struct gf100_gr_pack *tpc;
 	const struct gf100_gr_pack *ppc;
 	/* indirect context data, generated with icmds/mthds */
 	const struct gf100_gr_pack *icmd;
 	const struct gf100_gr_pack *mthd;
+	const struct gf100_gr_pack *sw_veid_bundle_init;
 	/* bundle circular buffer */
 	void (*bundle)(struct gf100_grctx *);
 	u32 bundle_size;
@@ -48,6 +51,31 @@
 	u32 attrib_nr;
 	u32 alpha_nr_max;
 	u32 alpha_nr;
+	u32 gfxp_nr;
+	/* other patch buffer stuff */
+	void (*patch_ltc)(struct gf100_grctx *);
+	/* floorsweeping */
+	void (*sm_id)(struct gf100_gr *, int gpc, int tpc, int sm);
+	void (*tpc_nr)(struct gf100_gr *, int gpc);
+	void (*r4060a8)(struct gf100_gr *);
+	void (*rop_mapping)(struct gf100_gr *);
+	void (*alpha_beta_tables)(struct gf100_gr *);
+	void (*max_ways_evict)(struct gf100_gr *);
+	void (*dist_skip_table)(struct gf100_gr *);
+	void (*r406500)(struct gf100_gr *);
+	void (*gpc_tpc_nr)(struct gf100_gr *);
+	void (*r419f78)(struct gf100_gr *);
+	void (*tpc_mask)(struct gf100_gr *);
+	void (*smid_config)(struct gf100_gr *);
+	/* misc other things */
+	void (*r400088)(struct gf100_gr *, bool);
+	void (*r419cb8)(struct gf100_gr *);
+	void (*r418800)(struct gf100_gr *);
+	void (*r419eb0)(struct gf100_gr *);
+	void (*r419e00)(struct gf100_gr *);
+	void (*r418e94)(struct gf100_gr *);
+	void (*r419a3c)(struct gf100_gr *);
+	void (*r408840)(struct gf100_gr *);
 };
 
 extern const struct gf100_grctx_func gf100_grctx;
@@ -57,11 +85,14 @@
 void gf100_grctx_generate_pagepool(struct gf100_grctx *);
 void gf100_grctx_generate_attrib(struct gf100_grctx *);
 void gf100_grctx_generate_unkn(struct gf100_gr *);
-void gf100_grctx_generate_tpcid(struct gf100_gr *);
-void gf100_grctx_generate_r406028(struct gf100_gr *);
+void gf100_grctx_generate_floorsweep(struct gf100_gr *);
+void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
+void gf100_grctx_generate_tpc_nr(struct gf100_gr *, int);
 void gf100_grctx_generate_r4060a8(struct gf100_gr *);
-void gf100_grctx_generate_r418bb8(struct gf100_gr *);
-void gf100_grctx_generate_r406800(struct gf100_gr *);
+void gf100_grctx_generate_rop_mapping(struct gf100_gr *);
+void gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *);
+void gf100_grctx_generate_max_ways_evict(struct gf100_gr *);
+void gf100_grctx_generate_r419cb8(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gf108_grctx;
 void gf108_grctx_generate_attrib(struct gf100_grctx *);
@@ -72,22 +103,25 @@
 
 extern const struct gf100_grctx_func gf117_grctx;
 void gf117_grctx_generate_attrib(struct gf100_grctx *);
+void gf117_grctx_generate_rop_mapping(struct gf100_gr *);
+void gf117_grctx_generate_dist_skip_table(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gf119_grctx;
 
 extern const struct gf100_grctx_func gk104_grctx;
+void gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *);
+void gk104_grctx_generate_gpc_tpc_nr(struct gf100_gr *);
+
 extern const struct gf100_grctx_func gk20a_grctx;
-void gk104_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
 void gk104_grctx_generate_bundle(struct gf100_grctx *);
 void gk104_grctx_generate_pagepool(struct gf100_grctx *);
+void gk104_grctx_generate_patch_ltc(struct gf100_grctx *);
 void gk104_grctx_generate_unkn(struct gf100_gr *);
-void gk104_grctx_generate_r418bb8(struct gf100_gr *);
-
-void gm107_grctx_generate_bundle(struct gf100_grctx *);
-void gm107_grctx_generate_pagepool(struct gf100_grctx *);
-void gm107_grctx_generate_attrib(struct gf100_grctx *);
+void gk104_grctx_generate_r418800(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gk110_grctx;
+void gk110_grctx_generate_r419eb0(struct gf100_gr *);
+
 extern const struct gf100_grctx_func gk110b_grctx;
 extern const struct gf100_grctx_func gk208_grctx;
 
@@ -95,22 +129,30 @@
 void gm107_grctx_generate_bundle(struct gf100_grctx *);
 void gm107_grctx_generate_pagepool(struct gf100_grctx *);
 void gm107_grctx_generate_attrib(struct gf100_grctx *);
+void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
 
 extern const struct gf100_grctx_func gm200_grctx;
-void gm200_grctx_generate_tpcid(struct gf100_gr *);
-void gm200_grctx_generate_405b60(struct gf100_gr *);
+void gm200_grctx_generate_dist_skip_table(struct gf100_gr *);
+void gm200_grctx_generate_r406500(struct gf100_gr *);
+void gm200_grctx_generate_tpc_mask(struct gf100_gr *);
+void gm200_grctx_generate_smid_config(struct gf100_gr *);
+void gm200_grctx_generate_r419a3c(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gm20b_grctx;
 
 extern const struct gf100_grctx_func gp100_grctx;
-void gp100_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
 void gp100_grctx_generate_pagepool(struct gf100_grctx *);
+void gp100_grctx_generate_smid_config(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gp102_grctx;
 void gp102_grctx_generate_attrib(struct gf100_grctx *);
 
+extern const struct gf100_grctx_func gp104_grctx;
+
 extern const struct gf100_grctx_func gp107_grctx;
 
+extern const struct gf100_grctx_func gv100_grctx;
+
 /* context init value lists */
 
 extern const struct gf100_gr_pack gf100_grctx_pack_icmd[];
@@ -128,7 +170,8 @@
 extern const struct gf100_gr_init gf100_grctx_init_rstr2d_0[];
 extern const struct gf100_gr_init gf100_grctx_init_scc_0[];
 
-extern const struct gf100_gr_pack gf100_grctx_pack_gpc[];
+extern const struct gf100_gr_pack gf100_grctx_pack_gpc_0[];
+extern const struct gf100_gr_pack gf100_grctx_pack_gpc_1[];
 extern const struct gf100_gr_init gf100_grctx_init_gpc_unk_0[];
 extern const struct gf100_gr_init gf100_grctx_init_prop_0[];
 extern const struct gf100_gr_init gf100_grctx_init_gpc_unk_1[];
@@ -177,6 +220,8 @@
 
 extern const struct gf100_gr_init gf117_grctx_init_wwdx_0[];
 
+extern const struct gf100_gr_pack gf117_grctx_pack_gpc_1[];
+
 extern const struct gf100_gr_init gk104_grctx_init_memfmt_0[];
 extern const struct gf100_gr_init gk104_grctx_init_ds_0[];
 extern const struct gf100_gr_init gk104_grctx_init_scc_0[];
@@ -186,7 +231,6 @@
 extern const struct gf100_gr_init gk104_grctx_init_pes_0[];
 
 extern const struct gf100_gr_pack gk104_grctx_pack_hub[];
-extern const struct gf100_gr_pack gk104_grctx_pack_gpc[];
 extern const struct gf100_gr_pack gk104_grctx_pack_tpc[];
 extern const struct gf100_gr_pack gk104_grctx_pack_ppc[];
 extern const struct gf100_gr_pack gk104_grctx_pack_icmd[];
@@ -200,7 +244,8 @@
 extern const struct gf100_gr_init gk110_grctx_init_pri_0[];
 extern const struct gf100_gr_init gk110_grctx_init_cwd_0[];
 
-extern const struct gf100_gr_pack gk110_grctx_pack_gpc[];
+extern const struct gf100_gr_pack gk110_grctx_pack_gpc_0[];
+extern const struct gf100_gr_pack gk110_grctx_pack_gpc_1[];
 extern const struct gf100_gr_init gk110_grctx_init_gpc_unk_2[];
 
 extern const struct gf100_gr_init gk110_grctx_init_tex_0[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
index 54fd74e..7a0564b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
@@ -84,7 +84,8 @@
 	.main  = gf100_grctx_generate_main,
 	.unkn  = gf100_grctx_generate_unkn,
 	.hub   = gf100_grctx_pack_hub,
-	.gpc   = gf100_grctx_pack_gpc,
+	.gpc_0 = gf100_grctx_pack_gpc_0,
+	.gpc_1 = gf100_grctx_pack_gpc_1,
 	.zcull = gf100_grctx_pack_zcull,
 	.tpc   = gf104_grctx_pack_tpc,
 	.icmd  = gf100_grctx_pack_icmd,
@@ -96,4 +97,11 @@
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
+	.sm_id = gf100_grctx_generate_sm_id,
+	.tpc_nr = gf100_grctx_generate_tpc_nr,
+	.r4060a8 = gf100_grctx_generate_r4060a8,
+	.rop_mapping = gf100_grctx_generate_rop_mapping,
+	.alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+	.max_ways_evict = gf100_grctx_generate_max_ways_evict,
+	.r419cb8 = gf100_grctx_generate_r419cb8,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
index 82f71b1..dda2c32 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
@@ -667,12 +667,17 @@
 };
 
 static const struct gf100_gr_pack
-gf108_grctx_pack_gpc[] = {
+gf108_grctx_pack_gpc_0[] = {
 	{ gf100_grctx_init_gpc_unk_0 },
 	{ gf100_grctx_init_prop_0 },
 	{ gf100_grctx_init_gpc_unk_1 },
 	{ gf108_grctx_init_setup_0 },
 	{ gf100_grctx_init_zcull_0 },
+	{}
+};
+
+static const struct gf100_gr_pack
+gf108_grctx_pack_gpc_1[] = {
 	{ gf100_grctx_init_crstr_0 },
 	{ gf108_grctx_init_gpm_0 },
 	{ gf100_grctx_init_gcc_0 },
@@ -780,7 +785,8 @@
 	.main  = gf100_grctx_generate_main,
 	.unkn  = gf108_grctx_generate_unkn,
 	.hub   = gf108_grctx_pack_hub,
-	.gpc   = gf108_grctx_pack_gpc,
+	.gpc_0 = gf108_grctx_pack_gpc_0,
+	.gpc_1 = gf108_grctx_pack_gpc_1,
 	.zcull = gf100_grctx_pack_zcull,
 	.tpc   = gf108_grctx_pack_tpc,
 	.icmd  = gf108_grctx_pack_icmd,
@@ -794,4 +800,11 @@
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x324,
 	.alpha_nr = 0x218,
+	.sm_id = gf100_grctx_generate_sm_id,
+	.tpc_nr = gf100_grctx_generate_tpc_nr,
+	.r4060a8 = gf100_grctx_generate_r4060a8,
+	.rop_mapping = gf100_grctx_generate_rop_mapping,
+	.alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+	.max_ways_evict = gf100_grctx_generate_max_ways_evict,
+	.r419cb8 = gf100_grctx_generate_r419cb8,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
index 7df398b..f5cca5e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
@@ -314,15 +314,12 @@
 };
 
 static const struct gf100_gr_pack
-gf110_grctx_pack_gpc[] = {
+gf110_grctx_pack_gpc_0[] = {
 	{ gf100_grctx_init_gpc_unk_0 },
 	{ gf100_grctx_init_prop_0 },
 	{ gf100_grctx_init_gpc_unk_1 },
 	{ gf110_grctx_init_setup_0 },
 	{ gf100_grctx_init_zcull_0 },
-	{ gf100_grctx_init_crstr_0 },
-	{ gf100_grctx_init_gpm_0 },
-	{ gf100_grctx_init_gcc_0 },
 	{}
 };
 
@@ -335,7 +332,8 @@
 	.main  = gf100_grctx_generate_main,
 	.unkn  = gf100_grctx_generate_unkn,
 	.hub   = gf100_grctx_pack_hub,
-	.gpc   = gf110_grctx_pack_gpc,
+	.gpc_0 = gf110_grctx_pack_gpc_0,
+	.gpc_1 = gf100_grctx_pack_gpc_1,
 	.zcull = gf100_grctx_pack_zcull,
 	.tpc   = gf100_grctx_pack_tpc,
 	.icmd  = gf110_grctx_pack_icmd,
@@ -347,4 +345,11 @@
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
+	.sm_id = gf100_grctx_generate_sm_id,
+	.tpc_nr = gf100_grctx_generate_tpc_nr,
+	.r4060a8 = gf100_grctx_generate_r4060a8,
+	.rop_mapping = gf100_grctx_generate_rop_mapping,
+	.alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+	.max_ways_evict = gf100_grctx_generate_max_ways_evict,
+	.r419cb8 = gf100_grctx_generate_r419cb8,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index 19301d8..276c282 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -84,12 +84,17 @@
 };
 
 static const struct gf100_gr_pack
-gf117_grctx_pack_gpc[] = {
+gf117_grctx_pack_gpc_0[] = {
 	{ gf100_grctx_init_gpc_unk_0 },
 	{ gf119_grctx_init_prop_0 },
 	{ gf119_grctx_init_gpc_unk_1 },
 	{ gf117_grctx_init_setup_0 },
 	{ gf100_grctx_init_zcull_0 },
+	{}
+};
+
+const struct gf100_gr_pack
+gf117_grctx_pack_gpc_1[] = {
 	{ gf119_grctx_init_crstr_0 },
 	{ gf108_grctx_init_gpm_0 },
 	{ gf100_grctx_init_gcc_0 },
@@ -180,6 +185,62 @@
  ******************************************************************************/
 
 void
+gf117_grctx_generate_dist_skip_table(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int i;
+
+	for (i = 0; i < 8; i++)
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
+}
+
+void
+gf117_grctx_generate_rop_mapping(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 data[6] = {}, data2[2] = {};
+	u8  shift, ntpcv;
+	int i;
+
+	/* Pack tile map into register format. */
+	for (i = 0; i < 32; i++)
+		data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
+
+	/* Magic. */
+	shift = 0;
+	ntpcv = gr->tpc_total;
+	while (!(ntpcv & (1 << 4))) {
+		ntpcv <<= 1;
+		shift++;
+	}
+
+	data2[0]  = (ntpcv << 16);
+	data2[0] |= (shift << 21);
+	data2[0] |= (((1 << (0 + 5)) % ntpcv) << 24);
+	for (i = 1; i < 7; i++)
+		data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5);
+
+	/* GPC_BROADCAST */
+	nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) |
+				     gr->screen_tile_row_offset);
+	for (i = 0; i < 6; i++)
+		nvkm_wr32(device, 0x418b08 + (i * 4), data[i]);
+
+	/* GPC_BROADCAST.TP_BROADCAST */
+	nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) |
+				     gr->screen_tile_row_offset | data2[0]);
+	nvkm_wr32(device, 0x41bfe4, data2[1]);
+	for (i = 0; i < 6; i++)
+		nvkm_wr32(device, 0x41bf00 + (i * 4), data[i]);
+
+	/* UNK78xx */
+	nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) |
+				     gr->screen_tile_row_offset);
+	for (i = 0; i < 6; i++)
+		nvkm_wr32(device, 0x40780c + (i * 4), data[i]);
+}
+
+void
 gf117_grctx_generate_attrib(struct gf100_grctx *info)
 {
 	struct gf100_gr *gr = info->gr;
@@ -217,50 +278,13 @@
 	}
 }
 
-static void
-gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const struct gf100_grctx_func *grctx = gr->func->grctx;
-	u32 idle_timeout;
-	int i;
-
-	nvkm_mc_unk260(device, 0);
-
-	gf100_gr_mmio(gr, grctx->hub);
-	gf100_gr_mmio(gr, grctx->gpc);
-	gf100_gr_mmio(gr, grctx->zcull);
-	gf100_gr_mmio(gr, grctx->tpc);
-	gf100_gr_mmio(gr, grctx->ppc);
-
-	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
-
-	grctx->bundle(info);
-	grctx->pagepool(info);
-	grctx->attrib(info);
-	grctx->unkn(gr);
-
-	gf100_grctx_generate_tpcid(gr);
-	gf100_grctx_generate_r406028(gr);
-	gf100_grctx_generate_r4060a8(gr);
-	gk104_grctx_generate_r418bb8(gr);
-	gf100_grctx_generate_r406800(gr);
-
-	for (i = 0; i < 8; i++)
-		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
-
-	gf100_gr_icmd(gr, grctx->icmd);
-	nvkm_wr32(device, 0x404154, idle_timeout);
-	gf100_gr_mthd(gr, grctx->mthd);
-	nvkm_mc_unk260(device, 1);
-}
-
 const struct gf100_grctx_func
 gf117_grctx = {
-	.main  = gf117_grctx_generate_main,
+	.main  = gf100_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gf117_grctx_pack_hub,
-	.gpc   = gf117_grctx_pack_gpc,
+	.gpc_0 = gf117_grctx_pack_gpc_0,
+	.gpc_1 = gf117_grctx_pack_gpc_1,
 	.zcull = gf100_grctx_pack_zcull,
 	.tpc   = gf117_grctx_pack_tpc,
 	.ppc   = gf117_grctx_pack_ppc,
@@ -275,4 +299,12 @@
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x7ff,
 	.alpha_nr = 0x324,
+	.sm_id = gf100_grctx_generate_sm_id,
+	.tpc_nr = gf100_grctx_generate_tpc_nr,
+	.r4060a8 = gf100_grctx_generate_r4060a8,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+	.max_ways_evict = gf100_grctx_generate_max_ways_evict,
+	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
+	.r419cb8 = gf100_grctx_generate_r419cb8,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
index 605185b..0cfe463 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
@@ -431,15 +431,12 @@
 };
 
 static const struct gf100_gr_pack
-gf119_grctx_pack_gpc[] = {
+gf119_grctx_pack_gpc_0[] = {
 	{ gf100_grctx_init_gpc_unk_0 },
 	{ gf119_grctx_init_prop_0 },
 	{ gf119_grctx_init_gpc_unk_1 },
 	{ gf119_grctx_init_setup_0 },
 	{ gf100_grctx_init_zcull_0 },
-	{ gf119_grctx_init_crstr_0 },
-	{ gf108_grctx_init_gpm_0 },
-	{ gf100_grctx_init_gcc_0 },
 	{}
 };
 
@@ -503,7 +500,8 @@
 	.main  = gf100_grctx_generate_main,
 	.unkn  = gf108_grctx_generate_unkn,
 	.hub   = gf119_grctx_pack_hub,
-	.gpc   = gf119_grctx_pack_gpc,
+	.gpc_0 = gf119_grctx_pack_gpc_0,
+	.gpc_1 = gf117_grctx_pack_gpc_1,
 	.zcull = gf100_grctx_pack_zcull,
 	.tpc   = gf119_grctx_pack_tpc,
 	.icmd  = gf119_grctx_pack_icmd,
@@ -517,4 +515,11 @@
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x324,
 	.alpha_nr = 0x218,
+	.sm_id = gf100_grctx_generate_sm_id,
+	.tpc_nr = gf100_grctx_generate_tpc_nr,
+	.r4060a8 = gf100_grctx_generate_r4060a8,
+	.rop_mapping = gf100_grctx_generate_rop_mapping,
+	.alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+	.max_ways_evict = gf100_grctx_generate_max_ways_evict,
+	.r419cb8 = gf100_grctx_generate_r419cb8,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index 825c8fd..304e9d2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -739,13 +739,18 @@
 	{}
 };
 
-const struct gf100_gr_pack
-gk104_grctx_pack_gpc[] = {
+static const struct gf100_gr_pack
+gk104_grctx_pack_gpc_0[] = {
 	{ gf100_grctx_init_gpc_unk_0 },
 	{ gf119_grctx_init_prop_0 },
 	{ gf119_grctx_init_gpc_unk_1 },
 	{ gk104_grctx_init_setup_0 },
 	{ gf100_grctx_init_zcull_0 },
+	{}
+};
+
+static const struct gf100_gr_pack
+gk104_grctx_pack_gpc_1[] = {
 	{ gf119_grctx_init_crstr_0 },
 	{ gk104_grctx_init_gpm_0 },
 	{ gf100_grctx_init_gcc_0 },
@@ -841,6 +846,32 @@
  ******************************************************************************/
 
 void
+gk104_grctx_generate_r418800(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	/*XXX: Not real sure where to apply these, there doesn't seem
+	 *     to be any pattern to which chipsets it's done on.
+	 *
+	 *     Perhaps a VBIOS tweak?
+	 */
+	if (0) {
+		nvkm_mask(device, 0x418800, 0x00200000, 0x00200000);
+		nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000);
+	}
+}
+
+void
+gk104_grctx_generate_patch_ltc(struct gf100_grctx *info)
+{
+	struct nvkm_device *device = info->gr->base.engine.subdev.device;
+	u32 data0 = nvkm_rd32(device, 0x17e91c);
+	u32 data1 = nvkm_rd32(device, 0x17e920);
+	/*XXX: Figure out how to modify this correctly! */
+	mmio_wr32(info, 0x17e91c, data0);
+	mmio_wr32(info, 0x17e920, data1);
+}
+
+void
 gk104_grctx_generate_bundle(struct gf100_grctx *info)
 {
 	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
@@ -881,114 +912,74 @@
 	nvkm_mask(device, 0x419c00, 0x00000008, 0x00000008);
 }
 
-void
-gk104_grctx_generate_r418bb8(struct gf100_gr *gr)
+static void
+gk104_grctx_generate_r419f78(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	u32 data[6] = {}, data2[2] = {};
-	u8  tpcnr[GPC_MAX];
-	u8  shift, ntpcv;
-	int gpc, tpc, i;
-
-	/* calculate first set of magics */
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
-	gpc = -1;
-	for (tpc = 0; tpc < gr->tpc_total; tpc++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while (!tpcnr[gpc]);
-		tpcnr[gpc]--;
-
-		data[tpc / 6] |= gpc << ((tpc % 6) * 5);
-	}
-
-	for (; tpc < 32; tpc++)
-		data[tpc / 6] |= 7 << ((tpc % 6) * 5);
-
-	/* and the second... */
-	shift = 0;
-	ntpcv = gr->tpc_total;
-	while (!(ntpcv & (1 << 4))) {
-		ntpcv <<= 1;
-		shift++;
-	}
-
-	data2[0]  = (ntpcv << 16);
-	data2[0] |= (shift << 21);
-	data2[0] |= (((1 << (0 + 5)) % ntpcv) << 24);
-	for (i = 1; i < 7; i++)
-		data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5);
-
-	/* GPC_BROADCAST */
-	nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) |
-				     gr->screen_tile_row_offset);
-	for (i = 0; i < 6; i++)
-		nvkm_wr32(device, 0x418b08 + (i * 4), data[i]);
-
-	/* GPC_BROADCAST.TP_BROADCAST */
-	nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) |
-				     gr->screen_tile_row_offset | data2[0]);
-	nvkm_wr32(device, 0x41bfe4, data2[1]);
-	for (i = 0; i < 6; i++)
-		nvkm_wr32(device, 0x41bf00 + (i * 4), data[i]);
-
-	/* UNK78xx */
-	nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) |
-				     gr->screen_tile_row_offset);
-	for (i = 0; i < 6; i++)
-		nvkm_wr32(device, 0x40780c + (i * 4), data[i]);
+	nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
 }
 
 void
-gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+gk104_grctx_generate_gpc_tpc_nr(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const struct gf100_grctx_func *grctx = gr->func->grctx;
-	u32 idle_timeout;
-	int i;
-
-	nvkm_mc_unk260(device, 0);
-
-	gf100_gr_mmio(gr, grctx->hub);
-	gf100_gr_mmio(gr, grctx->gpc);
-	gf100_gr_mmio(gr, grctx->zcull);
-	gf100_gr_mmio(gr, grctx->tpc);
-	gf100_gr_mmio(gr, grctx->ppc);
-
-	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
-
-	grctx->bundle(info);
-	grctx->pagepool(info);
-	grctx->attrib(info);
-	grctx->unkn(gr);
-
-	gf100_grctx_generate_tpcid(gr);
-	gf100_grctx_generate_r406028(gr);
-	gk104_grctx_generate_r418bb8(gr);
-	gf100_grctx_generate_r406800(gr);
-
-	for (i = 0; i < 8; i++)
-		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
-
 	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
-	nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
+}
 
-	gf100_gr_icmd(gr, grctx->icmd);
-	nvkm_wr32(device, 0x404154, idle_timeout);
-	gf100_gr_mthd(gr, grctx->mthd);
-	nvkm_mc_unk260(device, 1);
+void
+gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int i, j, gpc, ppc;
 
-	nvkm_mask(device, 0x418800, 0x00200000, 0x00200000);
-	nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000);
+	for (i = 0; i < 32; i++) {
+		u32 atarget = max_t(u32, gr->tpc_total * i / 32, 1);
+		u32 btarget = gr->tpc_total - atarget;
+		bool alpha = atarget < btarget;
+		u64 amask = 0, bmask = 0;
+
+		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+			for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) {
+				u32 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc];
+				u32 abits, bbits, pmask;
+
+				if (alpha) {
+					abits = atarget ? ppc_tpcs : 0;
+					bbits = ppc_tpcs - abits;
+				} else {
+					bbits = btarget ? ppc_tpcs : 0;
+					abits = ppc_tpcs - bbits;
+				}
+
+				pmask = gr->ppc_tpc_mask[gpc][ppc];
+				while (ppc_tpcs-- > abits)
+					pmask &= pmask - 1;
+				amask |= (u64)pmask << (gpc * 8);
+
+				pmask ^= gr->ppc_tpc_mask[gpc][ppc];
+				bmask |= (u64)pmask << (gpc * 8);
+
+				atarget -= min(abits, atarget);
+				btarget -= min(bbits, btarget);
+				if ((abits > 0) || (bbits > 0))
+					alpha = !alpha;
+			}
+		}
+
+		for (j = 0; j < gr->gpc_nr; j += 4, amask >>= 32, bmask >>= 32) {
+			nvkm_wr32(device, 0x406800 + (i * 0x20) + j, amask);
+			nvkm_wr32(device, 0x406c00 + (i * 0x20) + j, bmask);
+		}
+	}
 }
 
 const struct gf100_grctx_func
 gk104_grctx = {
-	.main  = gk104_grctx_generate_main,
+	.main  = gf100_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gk104_grctx_pack_hub,
-	.gpc   = gk104_grctx_pack_gpc,
+	.gpc_0 = gk104_grctx_pack_gpc_0,
+	.gpc_1 = gk104_grctx_pack_gpc_1,
 	.zcull = gf100_grctx_pack_zcull,
 	.tpc   = gk104_grctx_pack_tpc,
 	.ppc   = gk104_grctx_pack_ppc,
@@ -1005,4 +996,13 @@
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x7ff,
 	.alpha_nr = 0x648,
+	.patch_ltc = gk104_grctx_generate_patch_ltc,
+	.sm_id = gf100_grctx_generate_sm_id,
+	.tpc_nr = gf100_grctx_generate_tpc_nr,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
+	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+	.r419f78 = gk104_grctx_generate_r419f78,
+	.r418800 = gk104_grctx_generate_r418800,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
index 7b95ec2..86547cfc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
@@ -704,12 +704,17 @@
 };
 
 const struct gf100_gr_pack
-gk110_grctx_pack_gpc[] = {
+gk110_grctx_pack_gpc_0[] = {
 	{ gf100_grctx_init_gpc_unk_0 },
 	{ gf119_grctx_init_prop_0 },
 	{ gf119_grctx_init_gpc_unk_1 },
 	{ gk110_grctx_init_setup_0 },
 	{ gf100_grctx_init_zcull_0 },
+	{}
+};
+
+const struct gf100_gr_pack
+gk110_grctx_pack_gpc_1[] = {
 	{ gf119_grctx_init_crstr_0 },
 	{ gk104_grctx_init_gpm_0 },
 	{ gk110_grctx_init_gpc_unk_2 },
@@ -808,12 +813,20 @@
  * PGRAPH context implementation
  ******************************************************************************/
 
+void
+gk110_grctx_generate_r419eb0(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000);
+}
+
 const struct gf100_grctx_func
 gk110_grctx = {
-	.main  = gk104_grctx_generate_main,
+	.main  = gf100_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gk110_grctx_pack_hub,
-	.gpc   = gk110_grctx_pack_gpc,
+	.gpc_0 = gk110_grctx_pack_gpc_0,
+	.gpc_1 = gk110_grctx_pack_gpc_1,
 	.zcull = gf100_grctx_pack_zcull,
 	.tpc   = gk110_grctx_pack_tpc,
 	.ppc   = gk110_grctx_pack_ppc,
@@ -830,4 +843,13 @@
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x7ff,
 	.alpha_nr = 0x648,
+	.patch_ltc = gk104_grctx_generate_patch_ltc,
+	.sm_id = gf100_grctx_generate_sm_id,
+	.tpc_nr = gf100_grctx_generate_tpc_nr,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
+	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+	.r418800 = gk104_grctx_generate_r418800,
+	.r419eb0 = gk110_grctx_generate_r419eb0,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
index 048b115..ebb947b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
@@ -71,10 +71,11 @@
 
 const struct gf100_grctx_func
 gk110b_grctx = {
-	.main  = gk104_grctx_generate_main,
+	.main  = gf100_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gk110_grctx_pack_hub,
-	.gpc   = gk110_grctx_pack_gpc,
+	.gpc_0 = gk110_grctx_pack_gpc_0,
+	.gpc_1 = gk110_grctx_pack_gpc_1,
 	.zcull = gf100_grctx_pack_zcull,
 	.tpc   = gk110b_grctx_pack_tpc,
 	.ppc   = gk110_grctx_pack_ppc,
@@ -91,4 +92,13 @@
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x7ff,
 	.alpha_nr = 0x648,
+	.patch_ltc = gk104_grctx_generate_patch_ltc,
+	.sm_id = gf100_grctx_generate_sm_id,
+	.tpc_nr = gf100_grctx_generate_tpc_nr,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
+	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+	.r418800 = gk104_grctx_generate_r418800,
+	.r419eb0 = gk110_grctx_generate_r419eb0,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
index 67b7a1b..4d40512 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
@@ -443,12 +443,17 @@
 };
 
 static const struct gf100_gr_pack
-gk208_grctx_pack_gpc[] = {
+gk208_grctx_pack_gpc_0[] = {
 	{ gf100_grctx_init_gpc_unk_0 },
 	{ gk208_grctx_init_prop_0 },
 	{ gk208_grctx_init_gpc_unk_1 },
 	{ gk208_grctx_init_setup_0 },
 	{ gf100_grctx_init_zcull_0 },
+	{}
+};
+
+static const struct gf100_gr_pack
+gk208_grctx_pack_gpc_1[] = {
 	{ gk208_grctx_init_crstr_0 },
 	{ gk208_grctx_init_gpm_0 },
 	{ gk110_grctx_init_gpc_unk_2 },
@@ -532,10 +537,11 @@
 
 const struct gf100_grctx_func
 gk208_grctx = {
-	.main  = gk104_grctx_generate_main,
+	.main  = gf100_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gk208_grctx_pack_hub,
-	.gpc   = gk208_grctx_pack_gpc,
+	.gpc_0 = gk208_grctx_pack_gpc_0,
+	.gpc_1 = gk208_grctx_pack_gpc_1,
 	.zcull = gf100_grctx_pack_zcull,
 	.tpc   = gk208_grctx_pack_tpc,
 	.ppc   = gk208_grctx_pack_ppc,
@@ -552,4 +558,12 @@
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x7ff,
 	.alpha_nr = 0x648,
+	.patch_ltc = gk104_grctx_generate_patch_ltc,
+	.sm_id = gf100_grctx_generate_sm_id,
+	.tpc_nr = gf100_grctx_generate_tpc_nr,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
+	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+	.r418800 = gk104_grctx_generate_r418800,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
index da7c35a..896d473 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
@@ -42,10 +42,7 @@
 
 	grctx->unkn(gr);
 
-	gf100_grctx_generate_tpcid(gr);
-	gf100_grctx_generate_r406028(gr);
-	gk104_grctx_generate_r418bb8(gr);
-	gf100_grctx_generate_r406800(gr);
+	gf100_grctx_generate_floorsweep(gr);
 
 	for (i = 0; i < 8; i++)
 		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
@@ -82,4 +79,8 @@
 	.attrib_nr = 0x240,
 	.alpha_nr_max = 0x648 + (0x648 / 2),
 	.alpha_nr = 0x648,
+	.sm_id = gf100_grctx_generate_sm_id,
+	.tpc_nr = gf100_grctx_generate_tpc_nr,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
index 9b43d4c..0b3964e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -744,12 +744,17 @@
 };
 
 static const struct gf100_gr_pack
-gm107_grctx_pack_gpc[] = {
+gm107_grctx_pack_gpc_0[] = {
 	{ gm107_grctx_init_gpc_unk_0 },
 	{ gk208_grctx_init_prop_0 },
 	{ gm107_grctx_init_gpc_unk_1 },
 	{ gm107_grctx_init_setup_0 },
 	{ gf100_grctx_init_zcull_0 },
+	{}
+};
+
+static const struct gf100_gr_pack
+gm107_grctx_pack_gpc_1[] = {
 	{ gk208_grctx_init_crstr_0 },
 	{ gk104_grctx_init_gpm_0 },
 	{ gm107_grctx_init_gpc_unk_2 },
@@ -860,6 +865,16 @@
  * PGRAPH context implementation
  ******************************************************************************/
 
+static void
+gm107_grctx_generate_r419e00(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x419e00, 0x00808080, 0x00808080);
+	nvkm_mask(device, 0x419ccc, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x419f80, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x419f88, 0x80000000, 0x80000000);
+}
+
 void
 gm107_grctx_generate_bundle(struct gf100_grctx *info)
 {
@@ -931,75 +946,27 @@
 }
 
 static void
-gm107_grctx_generate_tpcid(struct gf100_gr *gr)
+gm107_grctx_generate_r406500(struct gf100_gr *gr)
 {
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int gpc, tpc, id;
-
-	for (tpc = 0, id = 0; tpc < 4; tpc++) {
-		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-			if (tpc < gr->tpc_nr[gpc]) {
-				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
-				nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
-				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
-				id++;
-			}
-
-			nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
-			nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
-		}
-	}
+	nvkm_wr32(gr->base.engine.subdev.device, 0x406500, 0x00000001);
 }
 
-static void
-gm107_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+void
+gm107_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const struct gf100_grctx_func *grctx = gr->func->grctx;
-	u32 idle_timeout;
-	int i;
-
-	gf100_gr_mmio(gr, grctx->hub);
-	gf100_gr_mmio(gr, grctx->gpc);
-	gf100_gr_mmio(gr, grctx->zcull);
-	gf100_gr_mmio(gr, grctx->tpc);
-	gf100_gr_mmio(gr, grctx->ppc);
-
-	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
-
-	grctx->bundle(info);
-	grctx->pagepool(info);
-	grctx->attrib(info);
-	grctx->unkn(gr);
-
-	gm107_grctx_generate_tpcid(gr);
-	gf100_grctx_generate_r406028(gr);
-	gk104_grctx_generate_r418bb8(gr);
-	gf100_grctx_generate_r406800(gr);
-
-	nvkm_wr32(device, 0x4064d0, 0x00000001);
-	for (i = 1; i < 8; i++)
-		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
-	nvkm_wr32(device, 0x406500, 0x00000001);
-
-	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
-
-	gf100_gr_icmd(gr, grctx->icmd);
-	nvkm_wr32(device, 0x404154, idle_timeout);
-	gf100_gr_mthd(gr, grctx->mthd);
-
-	nvkm_mask(device, 0x419e00, 0x00808080, 0x00808080);
-	nvkm_mask(device, 0x419ccc, 0x80000000, 0x80000000);
-	nvkm_mask(device, 0x419f80, 0x80000000, 0x80000000);
-	nvkm_mask(device, 0x419f88, 0x80000000, 0x80000000);
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), sm);
+	nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm);
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm);
 }
 
 const struct gf100_grctx_func
 gm107_grctx = {
-	.main  = gm107_grctx_generate_main,
+	.main  = gf100_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gm107_grctx_pack_hub,
-	.gpc   = gm107_grctx_pack_gpc,
+	.gpc_0 = gm107_grctx_pack_gpc_0,
+	.gpc_1 = gm107_grctx_pack_gpc_1,
 	.zcull = gf100_grctx_pack_zcull,
 	.tpc   = gm107_grctx_pack_tpc,
 	.ppc   = gm107_grctx_pack_ppc,
@@ -1016,4 +983,12 @@
 	.attrib_nr = 0xaa0,
 	.alpha_nr_max = 0x1800,
 	.alpha_nr = 0x1000,
+	.sm_id = gm107_grctx_generate_sm_id,
+	.tpc_nr = gf100_grctx_generate_tpc_nr,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
+	.r406500 = gm107_grctx_generate_r406500,
+	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+	.r419e00 = gm107_grctx_generate_r419e00,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
index db209d3..013d05a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
@@ -28,47 +28,34 @@
  ******************************************************************************/
 
 void
-gm200_grctx_generate_tpcid(struct gf100_gr *gr)
+gm200_grctx_generate_r419a3c(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int gpc, tpc, id;
+	nvkm_mask(device, 0x419a3c, 0x00000014, 0x00000000);
+}
 
-	for (tpc = 0, id = 0; tpc < TPC_MAX_PER_GPC; tpc++) {
-		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-			if (tpc < gr->tpc_nr[gpc]) {
-				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
-				nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
-				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
-				id++;
-			}
-		}
-	}
+static void
+gm200_grctx_generate_r418e94(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x418e94, 0xffffffff, 0xc4230000);
+	nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000);
 }
 
 void
-gm200_grctx_generate_405b60(struct gf100_gr *gr)
+gm200_grctx_generate_smid_config(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
 	u32 dist[TPC_MAX / 4] = {};
 	u32 gpcs[GPC_MAX] = {};
-	u8  tpcnr[GPC_MAX];
-	int tpc, gpc, i;
+	u8  sm, i;
 
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
-	/* won't result in the same distribution as the binary driver where
-	 * some of the gpcs have more tpcs than others, but this shall do
-	 * for the moment.  the code for earlier gpus has this issue too.
-	 */
-	for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while(!tpcnr[gpc]);
-		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
-		dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
-		gpcs[gpc] |= i << (tpc * 8);
+	for (sm = 0; sm < gr->sm_nr; sm++) {
+		const u8 gpc = gr->sm[sm].gpc;
+		const u8 tpc = gr->sm[sm].tpc;
+		dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8);
+		gpcs[gpc] |= sm << (tpc * 8);
 	}
 
 	for (i = 0; i < dist_nr; i++)
@@ -77,50 +64,46 @@
 		nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
 }
 
-static void
-gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+void
+gm200_grctx_generate_tpc_mask(struct gf100_gr *gr)
+{
+	u32 tmp, i;
+	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
+		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * gr->func->tpc_nr);
+	nvkm_wr32(gr->base.engine.subdev.device, 0x4041c4, tmp);
+}
+
+void
+gm200_grctx_generate_r406500(struct gf100_gr *gr)
+{
+	nvkm_wr32(gr->base.engine.subdev.device, 0x406500, 0x00000000);
+}
+
+void
+gm200_grctx_generate_dist_skip_table(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const struct gf100_grctx_func *grctx = gr->func->grctx;
-	u32 idle_timeout, tmp;
-	int i;
+	u32 data[8] = {};
+	int gpc, ppc, i;
 
-	gf100_gr_mmio(gr, gr->fuc_sw_ctx);
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+			u8 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc];
+			u8 ppc_tpcm = gr->ppc_tpc_mask[gpc][ppc];
+			while (ppc_tpcs-- > gr->ppc_tpc_min)
+				ppc_tpcm &= ppc_tpcm - 1;
+			ppc_tpcm ^= gr->ppc_tpc_mask[gpc][ppc];
+			((u8 *)data)[gpc] |= ppc_tpcm;
+		}
+	}
 
-	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
-
-	grctx->bundle(info);
-	grctx->pagepool(info);
-	grctx->attrib(info);
-	grctx->unkn(gr);
-
-	gm200_grctx_generate_tpcid(gr);
-	gf100_grctx_generate_r406028(gr);
-	gk104_grctx_generate_r418bb8(gr);
-
-	for (i = 0; i < 8; i++)
-		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
-	nvkm_wr32(device, 0x406500, 0x00000000);
-
-	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
-
-	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
-		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
-	nvkm_wr32(device, 0x4041c4, tmp);
-
-	gm200_grctx_generate_405b60(gr);
-
-	gf100_gr_icmd(gr, gr->fuc_bundle);
-	nvkm_wr32(device, 0x404154, idle_timeout);
-	gf100_gr_mthd(gr, gr->fuc_method);
-
-	nvkm_mask(device, 0x418e94, 0xffffffff, 0xc4230000);
-	nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000);
+	for (i = 0; i < ARRAY_SIZE(data); i++)
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), data[i]);
 }
 
 const struct gf100_grctx_func
 gm200_grctx = {
-	.main  = gm200_grctx_generate_main,
+	.main  = gf100_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.bundle = gm107_grctx_generate_bundle,
 	.bundle_size = 0x3000,
@@ -133,4 +116,13 @@
 	.attrib_nr = 0x400,
 	.alpha_nr_max = 0x1800,
 	.alpha_nr = 0x1000,
+	.sm_id = gm107_grctx_generate_sm_id,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.dist_skip_table = gm200_grctx_generate_dist_skip_table,
+	.r406500 = gm200_grctx_generate_r406500,
+	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+	.tpc_mask = gm200_grctx_generate_tpc_mask,
+	.smid_config = gm200_grctx_generate_smid_config,
+	.r418e94 = gm200_grctx_generate_r418e94,
+	.r419a3c = gm200_grctx_generate_r419a3c,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
index e5702e3..a1d9e11 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
@@ -22,20 +22,6 @@
 #include "ctxgf100.h"
 
 static void
-gm20b_grctx_generate_r406028(struct gf100_gr *gr)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	u32 tpc_per_gpc = 0;
-	int i;
-
-	for (i = 0; i < gr->gpc_nr; i++)
-		tpc_per_gpc |= gr->tpc_nr[i] << (4 * i);
-
-	nvkm_wr32(device, 0x406028, tpc_per_gpc);
-	nvkm_wr32(device, 0x405870, tpc_per_gpc);
-}
-
-static void
 gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -53,9 +39,7 @@
 
 	grctx->unkn(gr);
 
-	gm200_grctx_generate_tpcid(gr);
-	gm20b_grctx_generate_r406028(gr);
-	gk104_grctx_generate_r418bb8(gr);
+	gf100_grctx_generate_floorsweep(gr);
 
 	for (i = 0; i < 8; i++)
 		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
@@ -68,7 +52,7 @@
 		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
 	nvkm_wr32(device, 0x4041c4, tmp);
 
-	gm200_grctx_generate_405b60(gr);
+	gm200_grctx_generate_smid_config(gr);
 
 	gf100_gr_wait_idle(gr);
 
@@ -98,4 +82,6 @@
 	.attrib_nr = 0x400,
 	.alpha_nr_max = 0xc00,
 	.alpha_nr = 0x800,
+	.sm_id = gm107_grctx_generate_sm_id,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
index 88ea322..0b33262 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
@@ -36,7 +36,7 @@
 	const int s = 8;
 	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true);
 	mmio_refn(info, 0x40800c, 0x00000000, s, b);
-	mmio_wr32(info, 0x408010, 0x80000000);
+	mmio_wr32(info, 0x408010, 0x8007d800);
 	mmio_refn(info, 0x419004, 0x00000000, s, b);
 	mmio_wr32(info, 0x419008, 0x00000000);
 }
@@ -48,14 +48,17 @@
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
-	const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const u32   size = roundup(gr->tpc_total * pertpc, 0x80);
 	const int s = 12;
-	const int b = mmio_vram(info, size, (1 << s), false);
 	const int max_batches = 0xffff;
+	u32 size = grctx->alpha_nr_max * gr->tpc_total;
 	u32 ao = 0;
-	u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total;
-	int gpc, ppc, n = 0;
+	u32 bo = ao + size;
+	int gpc, ppc, b, n = 0;
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
+		size += grctx->attrib_nr_max * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
+	size = ((size * 0x20) + 128) & ~127;
+	b = mmio_vram(info, size, (1 << s), false);
 
 	mmio_refn(info, 0x418810, 0x80000000, s, b);
 	mmio_refn(info, 0x419848, 0x10000000, s, b);
@@ -69,7 +72,7 @@
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
-			const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 bs = attrib * gr->ppc_tpc_max;
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
@@ -77,7 +80,7 @@
 			mmio_wr32(info, o + 0xc0, bs);
 			mmio_wr32(info, o + 0xf4, bo);
 			mmio_wr32(info, o + 0xf0, bs);
-			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
+			bo += grctx->attrib_nr_max * gr->ppc_tpc_max;
 			mmio_wr32(info, o + 0xe4, as);
 			mmio_wr32(info, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
@@ -89,79 +92,30 @@
 	mmio_wr32(info, 0x41befc, 0x00000000);
 }
 
-static void
-gp100_grctx_generate_405b60(struct gf100_gr *gr)
+void
+gp100_grctx_generate_smid_config(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
-	u32 dist[TPC_MAX / 4] = {};
-	u32 gpcs[GPC_MAX * 2] = {};
-	u8  tpcnr[GPC_MAX];
-	int tpc, gpc, i;
+	u32 dist[TPC_MAX / 4] = {}, gpcs[16] = {};
+	u8  sm, i;
 
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
-	/* won't result in the same distribution as the binary driver where
-	 * some of the gpcs have more tpcs than others, but this shall do
-	 * for the moment.  the code for earlier gpus has this issue too.
-	 */
-	for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while(!tpcnr[gpc]);
-		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
-		dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
-		gpcs[gpc + (gr->gpc_nr * (tpc / 4))] |= i << (tpc * 8);
+	for (sm = 0; sm < gr->sm_nr; sm++) {
+		const u8 gpc = gr->sm[sm].gpc;
+		const u8 tpc = gr->sm[sm].tpc;
+		dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8);
+		gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= sm << ((tpc % 4) * 8);
 	}
 
 	for (i = 0; i < dist_nr; i++)
 		nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]);
-	for (i = 0; i < gr->gpc_nr * 2; i++)
+	for (i = 0; i < ARRAY_SIZE(gpcs); i++)
 		nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
 }
 
-void
-gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const struct gf100_grctx_func *grctx = gr->func->grctx;
-	u32 idle_timeout, tmp;
-	int i;
-
-	gf100_gr_mmio(gr, gr->fuc_sw_ctx);
-
-	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
-
-	grctx->pagepool(info);
-	grctx->bundle(info);
-	grctx->attrib(info);
-	grctx->unkn(gr);
-
-	gm200_grctx_generate_tpcid(gr);
-	gf100_grctx_generate_r406028(gr);
-	gk104_grctx_generate_r418bb8(gr);
-
-	for (i = 0; i < 8; i++)
-		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
-	nvkm_wr32(device, 0x406500, 0x00000000);
-
-	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
-
-	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
-		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 5);
-	nvkm_wr32(device, 0x4041c4, tmp);
-
-	gp100_grctx_generate_405b60(gr);
-
-	gf100_gr_icmd(gr, gr->fuc_bundle);
-	nvkm_wr32(device, 0x404154, idle_timeout);
-	gf100_gr_mthd(gr, gr->fuc_method);
-}
-
 const struct gf100_grctx_func
 gp100_grctx = {
-	.main  = gp100_grctx_generate_main,
+	.main  = gf100_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.bundle = gm107_grctx_generate_bundle,
 	.bundle_size = 0x3000,
@@ -174,4 +128,12 @@
 	.attrib_nr = 0x440,
 	.alpha_nr_max = 0xc00,
 	.alpha_nr = 0x800,
+	.sm_id = gm107_grctx_generate_sm_id,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.dist_skip_table = gm200_grctx_generate_dist_skip_table,
+	.r406500 = gm200_grctx_generate_r406500,
+	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+	.tpc_mask = gm200_grctx_generate_tpc_mask,
+	.smid_config = gp100_grctx_generate_smid_config,
+	.r419a3c = gm200_grctx_generate_r419a3c,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
index 7a66b4c..daee17b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
@@ -29,6 +29,13 @@
  * PGRAPH context implementation
  ******************************************************************************/
 
+static void
+gp102_grctx_generate_r408840(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x408840, 0x00000003, 0x00000000);
+}
+
 void
 gp102_grctx_generate_attrib(struct gf100_grctx *info)
 {
@@ -36,14 +43,18 @@
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
-	const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const u32   size = roundup(gr->tpc_total * pertpc, 0x80);
+	const u32   gfxp = grctx->gfxp_nr;
 	const int s = 12;
-	const int b = mmio_vram(info, size, (1 << s), false);
 	const int max_batches = 0xffff;
+	u32 size = grctx->alpha_nr_max * gr->tpc_total;
 	u32 ao = 0;
-	u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total;
-	int gpc, ppc, n = 0;
+	u32 bo = ao + size;
+	int gpc, ppc, b, n = 0;
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
+		size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
+	size = ((size * 0x20) + 128) & ~127;
+	b = mmio_vram(info, size, (1 << s), false);
 
 	mmio_refn(info, 0x418810, 0x80000000, s, b);
 	mmio_refn(info, 0x419848, 0x10000000, s, b);
@@ -57,17 +68,18 @@
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
-			const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 bs = attrib * gr->ppc_tpc_max;
+			const u32 gs =   gfxp * gr->ppc_tpc_max;
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
 			const u32 p = GPC_UNIT(gpc, 0xc44 + (ppc * 4));
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, bs);
+			mmio_wr32(info, o + 0xc0, gs);
 			mmio_wr32(info, p, bs);
 			mmio_wr32(info, o + 0xf4, bo);
 			mmio_wr32(info, o + 0xf0, bs);
-			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
+			bo += gs;
 			mmio_wr32(info, o + 0xe4, as);
 			mmio_wr32(info, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
@@ -81,7 +93,7 @@
 
 const struct gf100_grctx_func
 gp102_grctx = {
-	.main = gp100_grctx_generate_main,
+	.main = gf100_grctx_generate_main,
 	.unkn = gk104_grctx_generate_unkn,
 	.bundle = gm107_grctx_generate_bundle,
 	.bundle_size = 0x3000,
@@ -90,8 +102,18 @@
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
 	.attrib = gp102_grctx_generate_attrib,
-	.attrib_nr_max = 0x5d4,
+	.attrib_nr_max = 0x4b0,
 	.attrib_nr = 0x320,
 	.alpha_nr_max = 0xc00,
 	.alpha_nr = 0x800,
+	.gfxp_nr = 0xba8,
+	.sm_id = gm107_grctx_generate_sm_id,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.dist_skip_table = gm200_grctx_generate_dist_skip_table,
+	.r406500 = gm200_grctx_generate_r406500,
+	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+	.tpc_mask = gm200_grctx_generate_tpc_mask,
+	.smid_config = gp100_grctx_generate_smid_config,
+	.r419a3c = gm200_grctx_generate_r419a3c,
+	.r408840 = gp102_grctx_generate_r408840,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
new file mode 100644
index 0000000..3b85e3d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ctxgf100.h"
+
+const struct gf100_grctx_func
+gp104_grctx = {
+	.main = gf100_grctx_generate_main,
+	.unkn = gk104_grctx_generate_unkn,
+	.bundle = gm107_grctx_generate_bundle,
+	.bundle_size = 0x3000,
+	.bundle_min_gpm_fifo_depth = 0x180,
+	.bundle_token_limit = 0x900,
+	.pagepool = gp100_grctx_generate_pagepool,
+	.pagepool_size = 0x20000,
+	.attrib = gp102_grctx_generate_attrib,
+	.attrib_nr_max = 0x4b0,
+	.attrib_nr = 0x320,
+	.alpha_nr_max = 0xc00,
+	.alpha_nr = 0x800,
+	.gfxp_nr = 0xba8,
+	.sm_id = gm107_grctx_generate_sm_id,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.dist_skip_table = gm200_grctx_generate_dist_skip_table,
+	.r406500 = gm200_grctx_generate_r406500,
+	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+	.tpc_mask = gm200_grctx_generate_tpc_mask,
+	.smid_config = gp100_grctx_generate_smid_config,
+	.r419a3c = gm200_grctx_generate_r419a3c,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
index 8da91a0..5060c5e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
@@ -31,7 +31,7 @@
 
 const struct gf100_grctx_func
 gp107_grctx = {
-	.main = gp100_grctx_generate_main,
+	.main = gf100_grctx_generate_main,
 	.unkn = gk104_grctx_generate_unkn,
 	.bundle = gm107_grctx_generate_bundle,
 	.bundle_size = 0x3000,
@@ -44,4 +44,13 @@
 	.attrib_nr = 0x540,
 	.alpha_nr_max = 0xc00,
 	.alpha_nr = 0x800,
+	.gfxp_nr = 0xe94,
+	.sm_id = gm107_grctx_generate_sm_id,
+	.rop_mapping = gf117_grctx_generate_rop_mapping,
+	.dist_skip_table = gm200_grctx_generate_dist_skip_table,
+	.r406500 = gm200_grctx_generate_r406500,
+	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+	.tpc_mask = gm200_grctx_generate_tpc_mask,
+	.smid_config = gp100_grctx_generate_smid_config,
+	.r419a3c = gm200_grctx_generate_r419a3c,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
new file mode 100644
index 0000000..0990765
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ctxgf100.h"
+
+/*******************************************************************************
+ * PGRAPH context implementation
+ ******************************************************************************/
+
+static const struct gf100_gr_init
+gv100_grctx_init_sw_veid_bundle_init_0[] = {
+	{ 0x00001000, 64, 0x00100000, 0x00000008 },
+	{ 0x00000941, 64, 0x00100000, 0x00000000 },
+	{ 0x0000097e, 64, 0x00100000, 0x00000000 },
+	{ 0x0000097f, 64, 0x00100000, 0x00000100 },
+	{ 0x0000035c, 64, 0x00100000, 0x00000000 },
+	{ 0x0000035d, 64, 0x00100000, 0x00000000 },
+	{ 0x00000a08, 64, 0x00100000, 0x00000000 },
+	{ 0x00000a09, 64, 0x00100000, 0x00000000 },
+	{ 0x00000a0a, 64, 0x00100000, 0x00000000 },
+	{ 0x00000352, 64, 0x00100000, 0x00000000 },
+	{ 0x00000353, 64, 0x00100000, 0x00000000 },
+	{ 0x00000358, 64, 0x00100000, 0x00000000 },
+	{ 0x00000359, 64, 0x00100000, 0x00000000 },
+	{ 0x00000370, 64, 0x00100000, 0x00000000 },
+	{ 0x00000371, 64, 0x00100000, 0x00000000 },
+	{ 0x00000372, 64, 0x00100000, 0x000fffff },
+	{ 0x00000366, 64, 0x00100000, 0x00000000 },
+	{ 0x00000367, 64, 0x00100000, 0x00000000 },
+	{ 0x00000368, 64, 0x00100000, 0x00000fff },
+	{ 0x00000623, 64, 0x00100000, 0x00000000 },
+	{ 0x00000624, 64, 0x00100000, 0x00000000 },
+	{ 0x0001e100,  1, 0x00000001, 0x02000001 },
+	{}
+};
+
+static const struct gf100_gr_pack
+gv100_grctx_pack_sw_veid_bundle_init[] = {
+	{ gv100_grctx_init_sw_veid_bundle_init_0 },
+	{}
+};
+
+static void
+gv100_grctx_generate_attrib(struct gf100_grctx *info)
+{
+	struct gf100_gr *gr = info->gr;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	const u32  alpha = grctx->alpha_nr;
+	const u32 attrib = grctx->attrib_nr;
+	const u32   gfxp = grctx->gfxp_nr;
+	const int s = 12;
+	const int max_batches = 0xffff;
+	u32 size = grctx->alpha_nr_max * gr->tpc_total;
+	u32 ao = 0;
+	u32 bo = ao + size;
+	int gpc, ppc, b, n = 0;
+
+	size += grctx->gfxp_nr * gr->tpc_total;
+	size = ((size * 0x20) + 128) & ~127;
+	b = mmio_vram(info, size, (1 << s), false);
+
+	mmio_refn(info, 0x418810, 0x80000000, s, b);
+	mmio_refn(info, 0x419848, 0x10000000, s, b);
+	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
+	mmio_refn(info, 0x419e00, 0x00000000, s, b);
+	mmio_wr32(info, 0x419e04, 0x80000000 | size >> 7);
+	mmio_wr32(info, 0x405830, attrib);
+	mmio_wr32(info, 0x40585c, alpha);
+	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 gs =   gfxp * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 u = 0x418ea0 + (n * 0x04);
+			const u32 o = PPC_UNIT(gpc, ppc, 0);
+			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
+				continue;
+			mmio_wr32(info, o + 0xc0, gs);
+			mmio_wr32(info, o + 0xf4, bo);
+			mmio_wr32(info, o + 0xf0, bs);
+			bo += gs;
+			mmio_wr32(info, o + 0xe4, as);
+			mmio_wr32(info, o + 0xf8, ao);
+			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
+			mmio_wr32(info, u, bs);
+		}
+	}
+
+	mmio_wr32(info, 0x4181e4, 0x00000100);
+	mmio_wr32(info, 0x41befc, 0x00000100);
+}
+
+static void
+gv100_grctx_generate_rop_mapping(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 data;
+	int i, j;
+
+	/* Pack tile map into register format. */
+	nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) |
+				     gr->screen_tile_row_offset);
+	for (i = 0; i < 11; i++) {
+		for (data = 0, j = 0; j < 6; j++)
+			data |= (gr->tile[i * 6 + j] & 0x1f) << (j * 5);
+		nvkm_wr32(device, 0x418b08 + (i * 4), data);
+		nvkm_wr32(device, 0x41bf00 + (i * 4), data);
+		nvkm_wr32(device, 0x40780c + (i * 4), data);
+	}
+
+	/* GPC_BROADCAST.TP_BROADCAST */
+	nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) |
+				     gr->screen_tile_row_offset);
+	for (i = 0, j = 1; i < 5; i++, j += 4) {
+		u8 v19 = (1 << (j + 0)) % gr->tpc_total;
+		u8 v20 = (1 << (j + 1)) % gr->tpc_total;
+		u8 v21 = (1 << (j + 2)) % gr->tpc_total;
+		u8 v22 = (1 << (j + 3)) % gr->tpc_total;
+		nvkm_wr32(device, 0x41bfb0 + (i * 4), (v22 << 24) |
+						      (v21 << 16) |
+						      (v20 <<  8) |
+						       v19);
+	}
+
+	/* UNK78xx */
+	nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) |
+				     gr->screen_tile_row_offset);
+}
+
+static void
+gv100_grctx_generate_r400088(struct gf100_gr *gr, bool on)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x400088, 0x00060000, on ? 0x00060000 : 0x00000000);
+}
+
+static void
+gv100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm);
+	nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm);
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm);
+}
+
+static void
+gv100_grctx_generate_unkn(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x41980c, 0x00000010, 0x00000010);
+	nvkm_mask(device, 0x41be08, 0x00000004, 0x00000004);
+	nvkm_mask(device, 0x4064c0, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x405800, 0x08000000, 0x08000000);
+	nvkm_mask(device, 0x419c00, 0x00000008, 0x00000008);
+}
+
+static void
+gv100_grctx_unkn88c(struct gf100_gr *gr, bool on)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 mask = 0x00000010, data = on ? mask : 0x00000000;
+	nvkm_mask(device, 0x40988c, mask, data);
+	nvkm_rd32(device, 0x40988c);
+	nvkm_mask(device, 0x41a88c, mask, data);
+	nvkm_rd32(device, 0x41a88c);
+	nvkm_mask(device, 0x408a14, mask, data);
+	nvkm_rd32(device, 0x408a14);
+}
+
+const struct gf100_grctx_func
+gv100_grctx = {
+	.unkn88c = gv100_grctx_unkn88c,
+	.main = gf100_grctx_generate_main,
+	.unkn = gv100_grctx_generate_unkn,
+	.sw_veid_bundle_init = gv100_grctx_pack_sw_veid_bundle_init,
+	.bundle = gm107_grctx_generate_bundle,
+	.bundle_size = 0x3000,
+	.bundle_min_gpm_fifo_depth = 0x180,
+	.bundle_token_limit = 0x1680,
+	.pagepool = gp100_grctx_generate_pagepool,
+	.pagepool_size = 0x20000,
+	.attrib = gv100_grctx_generate_attrib,
+	.attrib_nr_max = 0x6c0,
+	.attrib_nr = 0x480,
+	.alpha_nr_max = 0xc00,
+	.alpha_nr = 0x800,
+	.gfxp_nr = 0xd10,
+	.sm_id = gv100_grctx_generate_sm_id,
+	.rop_mapping = gv100_grctx_generate_rop_mapping,
+	.dist_skip_table = gm200_grctx_generate_dist_skip_table,
+	.r406500 = gm200_grctx_generate_r406500,
+	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+	.smid_config = gp100_grctx_generate_smid_config,
+	.r400088 = gv100_grctx_generate_r400088,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 2f8dc10..70d3d41 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -32,6 +32,7 @@
 #include <subdev/fb.h>
 #include <subdev/mc.h>
 #include <subdev/pmu.h>
+#include <subdev/therm.h>
 #include <subdev/timer.h>
 #include <engine/fifo.h>
 
@@ -91,7 +92,7 @@
 	memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
 	gr->zbc_color[zbc].format = format;
 	nvkm_ltc_zbc_color_get(ltc, zbc, l2);
-	gf100_gr_zbc_clear_color(gr, zbc);
+	gr->func->zbc->clear_color(gr, zbc);
 	return zbc;
 }
 
@@ -136,10 +137,16 @@
 	gr->zbc_depth[zbc].ds = ds;
 	gr->zbc_depth[zbc].l2 = l2;
 	nvkm_ltc_zbc_depth_get(ltc, zbc, l2);
-	gf100_gr_zbc_clear_depth(gr, zbc);
+	gr->func->zbc->clear_depth(gr, zbc);
 	return zbc;
 }
 
+const struct gf100_gr_func_zbc
+gf100_gr_zbc = {
+	.clear_color = gf100_gr_zbc_clear_color,
+	.clear_depth = gf100_gr_zbc_clear_depth,
+};
+
 /*******************************************************************************
  * Graphics object classes
  ******************************************************************************/
@@ -743,21 +750,31 @@
 	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
 			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
-	int index;
+	int index, c = ltc->zbc_min, d = ltc->zbc_min, s = ltc->zbc_min;
 
 	if (!gr->zbc_color[0].format) {
-		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]);
-		gf100_gr_zbc_color_get(gr, 2,  &  one[0],    &one[4]);
-		gf100_gr_zbc_color_get(gr, 4,  &f32_0[0],  &f32_0[4]);
-		gf100_gr_zbc_color_get(gr, 4,  &f32_1[0],  &f32_1[4]);
-		gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000);
-		gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000);
+		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]); c++;
+		gf100_gr_zbc_color_get(gr, 2,  &  one[0],    &one[4]); c++;
+		gf100_gr_zbc_color_get(gr, 4,  &f32_0[0],  &f32_0[4]); c++;
+		gf100_gr_zbc_color_get(gr, 4,  &f32_1[0],  &f32_1[4]); c++;
+		gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000); d++;
+		gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000); d++;
+		if (gr->func->zbc->stencil_get) {
+			gr->func->zbc->stencil_get(gr, 1, 0x00, 0x00); s++;
+			gr->func->zbc->stencil_get(gr, 1, 0x01, 0x01); s++;
+			gr->func->zbc->stencil_get(gr, 1, 0xff, 0xff); s++;
+		}
 	}
 
-	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
-		gf100_gr_zbc_clear_color(gr, index);
-	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
-		gf100_gr_zbc_clear_depth(gr, index);
+	for (index = c; index <= ltc->zbc_max; index++)
+		gr->func->zbc->clear_color(gr, index);
+	for (index = d; index <= ltc->zbc_max; index++)
+		gr->func->zbc->clear_depth(gr, index);
+
+	if (gr->func->zbc->clear_stencil) {
+		for (index = s; index <= ltc->zbc_max; index++)
+			gr->func->zbc->clear_stencil(gr, index);
+	}
 }
 
 /**
@@ -970,7 +987,7 @@
 	nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
 }
 
-static const struct nvkm_enum gf100_mp_warp_error[] = {
+const struct nvkm_enum gf100_mp_warp_error[] = {
 	{ 0x01, "STACK_ERROR" },
 	{ 0x02, "API_STACK_ERROR" },
 	{ 0x03, "RET_EMPTY_STACK_ERROR" },
@@ -995,7 +1012,7 @@
 	{}
 };
 
-static const struct nvkm_bitfield gf100_mp_global_error[] = {
+const struct nvkm_bitfield gf100_mp_global_error[] = {
 	{ 0x00000001, "SM_TO_SM_FAULT" },
 	{ 0x00000002, "L1_ERROR" },
 	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
@@ -1009,7 +1026,7 @@
 	{}
 };
 
-static void
+void
 gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
 {
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
@@ -1045,7 +1062,7 @@
 	}
 
 	if (stat & 0x00000002) {
-		gf100_gr_trap_mp(gr, gpc, tpc);
+		gr->func->trap_mp(gr, gpc, tpc);
 		stat &= ~0x00000002;
 	}
 
@@ -1611,7 +1628,8 @@
 
 	/* load register lists */
 	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
-	gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000);
+	gf100_gr_init_csdata(gr, grctx->gpc_0, 0x41a000, 0x000, 0x418000);
+	gf100_gr_init_csdata(gr, grctx->gpc_1, 0x41a000, 0x000, 0x418000);
 	gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
 	gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
 
@@ -1651,6 +1669,97 @@
 	return ret;
 }
 
+void
+gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
+{
+	int tpc, gpc;
+	for (tpc = 0; tpc < gr->tpc_max; tpc++) {
+		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+			if (tpc < gr->tpc_nr[gpc]) {
+				gr->sm[gr->sm_nr].gpc = gpc;
+				gr->sm[gr->sm_nr].tpc = tpc;
+				gr->sm_nr++;
+			}
+		}
+	}
+}
+
+void
+gf100_gr_oneinit_tiles(struct gf100_gr *gr)
+{
+	static const u8 primes[] = {
+		3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61
+	};
+	int init_frac[GPC_MAX], init_err[GPC_MAX], run_err[GPC_MAX], i, j;
+	u32 mul_factor, comm_denom;
+	u8  gpc_map[GPC_MAX];
+	bool sorted;
+
+	switch (gr->tpc_total) {
+	case 15: gr->screen_tile_row_offset = 0x06; break;
+	case 14: gr->screen_tile_row_offset = 0x05; break;
+	case 13: gr->screen_tile_row_offset = 0x02; break;
+	case 11: gr->screen_tile_row_offset = 0x07; break;
+	case 10: gr->screen_tile_row_offset = 0x06; break;
+	case  7:
+	case  5: gr->screen_tile_row_offset = 0x01; break;
+	case  3: gr->screen_tile_row_offset = 0x02; break;
+	case  2:
+	case  1: gr->screen_tile_row_offset = 0x01; break;
+	default: gr->screen_tile_row_offset = 0x03;
+		for (i = 0; i < ARRAY_SIZE(primes); i++) {
+			if (gr->tpc_total % primes[i]) {
+				gr->screen_tile_row_offset = primes[i];
+				break;
+			}
+		}
+		break;
+	}
+
+	/* Sort GPCs by TPC count, highest-to-lowest. */
+	for (i = 0; i < gr->gpc_nr; i++)
+		gpc_map[i] = i;
+	sorted = false;
+
+	while (!sorted) {
+		for (sorted = true, i = 0; i < gr->gpc_nr - 1; i++) {
+			if (gr->tpc_nr[gpc_map[i + 1]] >
+			    gr->tpc_nr[gpc_map[i + 0]]) {
+				u8 swap = gpc_map[i];
+				gpc_map[i + 0] = gpc_map[i + 1];
+				gpc_map[i + 1] = swap;
+				sorted = false;
+			}
+		}
+	}
+
+	/* Determine tile->GPC mapping */
+	mul_factor = gr->gpc_nr * gr->tpc_max;
+	if (mul_factor & 1)
+		mul_factor = 2;
+	else
+		mul_factor = 1;
+
+	comm_denom = gr->gpc_nr * gr->tpc_max * mul_factor;
+
+	for (i = 0; i < gr->gpc_nr; i++) {
+		init_frac[i] = gr->tpc_nr[gpc_map[i]] * gr->gpc_nr * mul_factor;
+		 init_err[i] = i * gr->tpc_max * mul_factor - comm_denom/2;
+		  run_err[i] = init_frac[i] + init_err[i];
+	}
+
+	for (i = 0; i < gr->tpc_total;) {
+		for (j = 0; j < gr->gpc_nr; j++) {
+			if ((run_err[j] * 2) >= comm_denom) {
+				gr->tile[i++] = gpc_map[j];
+				run_err[j] += init_frac[j] - comm_denom;
+			} else {
+				run_err[j] += init_frac[j];
+			}
+		}
+	}
+}
+
 static int
 gf100_gr_oneinit(struct nvkm_gr *base)
 {
@@ -1674,55 +1783,27 @@
 	gr->gpc_nr = nvkm_rd32(device, 0x409604) & 0x0000001f;
 	for (i = 0; i < gr->gpc_nr; i++) {
 		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
+		gr->tpc_max = max(gr->tpc_max, gr->tpc_nr[i]);
 		gr->tpc_total += gr->tpc_nr[i];
 		gr->ppc_nr[i]  = gr->func->ppc_nr;
 		for (j = 0; j < gr->ppc_nr[i]; j++) {
-			u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
-			if (mask)
-				gr->ppc_mask[i] |= (1 << j);
-			gr->ppc_tpc_nr[i][j] = hweight8(mask);
+			gr->ppc_tpc_mask[i][j] =
+				nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
+			if (gr->ppc_tpc_mask[i][j] == 0)
+				continue;
+			gr->ppc_mask[i] |= (1 << j);
+			gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]);
+			if (gr->ppc_tpc_min == 0 ||
+			    gr->ppc_tpc_min > gr->ppc_tpc_nr[i][j])
+				gr->ppc_tpc_min = gr->ppc_tpc_nr[i][j];
+			if (gr->ppc_tpc_max < gr->ppc_tpc_nr[i][j])
+				gr->ppc_tpc_max = gr->ppc_tpc_nr[i][j];
 		}
 	}
 
-	/*XXX: these need figuring out... though it might not even matter */
-	switch (device->chipset) {
-	case 0xc0:
-		if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
-			gr->screen_tile_row_offset = 0x07;
-		} else
-		if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
-			gr->screen_tile_row_offset = 0x05;
-		} else
-		if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
-			gr->screen_tile_row_offset = 0x06;
-		}
-		break;
-	case 0xc3: /* 450, 4/0/0/0, 2 */
-		gr->screen_tile_row_offset = 0x03;
-		break;
-	case 0xc4: /* 460, 3/4/0/0, 4 */
-		gr->screen_tile_row_offset = 0x01;
-		break;
-	case 0xc1: /* 2/0/0/0, 1 */
-		gr->screen_tile_row_offset = 0x01;
-		break;
-	case 0xc8: /* 4/4/3/4, 5 */
-		gr->screen_tile_row_offset = 0x06;
-		break;
-	case 0xce: /* 4/4/0/0, 4 */
-		gr->screen_tile_row_offset = 0x03;
-		break;
-	case 0xcf: /* 4/0/0/0, 3 */
-		gr->screen_tile_row_offset = 0x03;
-		break;
-	case 0xd7:
-	case 0xd9: /* 1/0/0/0, 1 */
-	case 0xea: /* gk20a */
-	case 0x12b: /* gm20b */
-		gr->screen_tile_row_offset = 0x01;
-		break;
-	}
-
+	memset(gr->tile, 0xff, sizeof(gr->tile));
+	gr->func->oneinit_tiles(gr);
+	gr->func->oneinit_sm_id(gr);
 	return 0;
 }
 
@@ -1914,13 +1995,68 @@
 }
 
 void
+gf100_gr_init_400054(struct gf100_gr *gr)
+{
+	nvkm_wr32(gr->base.engine.subdev.device, 0x400054, 0x34ce3464);
+}
+
+void
+gf100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
+}
+
+void
+gf100_gr_init_tex_hww_esr(struct gf100_gr *gr, int gpc, int tpc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+}
+
+void
+gf100_gr_init_419eb4(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
+}
+
+void
+gf100_gr_init_419cc0(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int gpc, tpc;
+
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++)
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+	}
+}
+
+void
+gf100_gr_init_40601c(struct gf100_gr *gr)
+{
+	nvkm_wr32(gr->base.engine.subdev.device, 0x40601c, 0xc0000000);
+}
+
+void
+gf100_gr_init_fecs_exceptions(struct gf100_gr *gr)
+{
+	const u32 data = gr->firmware ? 0x000e0000 : 0x000e0001;
+	nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, data);
+}
+
+void
 gf100_gr_init_gpc_mmu(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct nvkm_fb *fb = device->fb;
 
 	nvkm_wr32(device, 0x418880, nvkm_rd32(device, 0x100c80) & 0x00000001);
-	nvkm_wr32(device, 0x4188a4, 0x00000000);
+	nvkm_wr32(device, 0x4188a4, 0x03000000);
 	nvkm_wr32(device, 0x418888, 0x00000000);
 	nvkm_wr32(device, 0x41888c, 0x00000000);
 	nvkm_wr32(device, 0x418890, 0x00000000);
@@ -1929,37 +2065,30 @@
 	nvkm_wr32(device, 0x4188b8, nvkm_memory_addr(fb->mmu_rd) >> 8);
 }
 
-int
-gf100_gr_init(struct gf100_gr *gr)
+void
+gf100_gr_init_num_active_ltcs(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+}
+
+void
+gf100_gr_init_zcull(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
-	u32 data[TPC_MAX / 8] = {};
-	u8  tpcnr[GPC_MAX];
-	int gpc, tpc, rop;
-	int i;
+	const u8 tile_nr = ALIGN(gr->tpc_total, 32);
+	u8 bank[GPC_MAX] = {}, gpc, i, j;
+	u32 data;
 
-	gr->func->init_gpc_mmu(gr);
-
-	gf100_gr_mmio(gr, gr->func->mmio);
-
-	nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001);
-
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while (!tpcnr[gpc]);
-		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
-		data[i / 8] |= tpc << ((i % 8) * 4);
+	for (i = 0; i < tile_nr; i += 8) {
+		for (data = 0, j = 0; j < 8 && i + j < gr->tpc_total; j++) {
+			data |= bank[gr->tile[i + j]] << (j * 4);
+			bank[gr->tile[i + j]]++;
+		}
+		nvkm_wr32(device, GPC_BCAST(0x0980 + ((i / 8) * 4)), data);
 	}
 
-	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
-	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
-	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
-	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
 			  gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
@@ -1968,29 +2097,88 @@
 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
 	}
 
-	if (device->chipset != 0xd7)
-		nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
-	else
-		nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
+}
 
-	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+void
+gf100_gr_init_vsc_stream_master(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001);
+}
+
+int
+gf100_gr_init(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int gpc, tpc, rop;
+
+	if (gr->func->init_419bd8)
+		gr->func->init_419bd8(gr);
+
+	gr->func->init_gpc_mmu(gr);
+
+	if (gr->fuc_sw_nonctx)
+		gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
+	else
+		gf100_gr_mmio(gr, gr->func->mmio);
+
+	gf100_gr_wait_idle(gr);
+
+	if (gr->func->init_r405a14)
+		gr->func->init_r405a14(gr);
+
+	if (gr->func->clkgate_pack)
+		nvkm_therm_clkgate_init(device->therm, gr->func->clkgate_pack);
+
+	if (gr->func->init_bios)
+		gr->func->init_bios(gr);
+
+	gr->func->init_vsc_stream_master(gr);
+	gr->func->init_zcull(gr);
+	gr->func->init_num_active_ltcs(gr);
+	if (gr->func->init_rop_active_fbps)
+		gr->func->init_rop_active_fbps(gr);
+	if (gr->func->init_bios_2)
+		gr->func->init_bios_2(gr);
+	if (gr->func->init_swdx_pes_mask)
+		gr->func->init_swdx_pes_mask(gr);
 
 	nvkm_wr32(device, 0x400500, 0x00010001);
 
 	nvkm_wr32(device, 0x400100, 0xffffffff);
 	nvkm_wr32(device, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400124, 0x00000002);
 
-	nvkm_wr32(device, 0x409c24, 0x000f0000);
+	gr->func->init_fecs_exceptions(gr);
+	if (gr->func->init_ds_hww_esr_2)
+		gr->func->init_ds_hww_esr_2(gr);
+
 	nvkm_wr32(device, 0x404000, 0xc0000000);
 	nvkm_wr32(device, 0x404600, 0xc0000000);
 	nvkm_wr32(device, 0x408030, 0xc0000000);
-	nvkm_wr32(device, 0x40601c, 0xc0000000);
+
+	if (gr->func->init_40601c)
+		gr->func->init_40601c(gr);
+
 	nvkm_wr32(device, 0x404490, 0xc0000000);
 	nvkm_wr32(device, 0x406018, 0xc0000000);
+
+	if (gr->func->init_sked_hww_esr)
+		gr->func->init_sked_hww_esr(gr);
+
 	nvkm_wr32(device, 0x405840, 0xc0000000);
 	nvkm_wr32(device, 0x405844, 0x00ffffff);
-	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
-	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
+
+	if (gr->func->init_419cc0)
+		gr->func->init_419cc0(gr);
+	if (gr->func->init_419eb4)
+		gr->func->init_419eb4(gr);
+	if (gr->func->init_419c9c)
+		gr->func->init_419c9c(gr);
+
+	if (gr->func->init_ppc_exceptions)
+		gr->func->init_ppc_exceptions(gr);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
@@ -2000,19 +2188,20 @@
 		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
 			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
 			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			if (gr->func->init_tex_hww_esr)
+				gr->func->init_tex_hww_esr(gr, gpc, tpc);
 			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
+			if (gr->func->init_504430)
+				gr->func->init_504430(gr, gpc, tpc);
+			gr->func->init_shader_exceptions(gr, gpc, tpc);
 		}
 		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
 		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
 	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
 		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
 		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
 	}
@@ -2024,10 +2213,14 @@
 	nvkm_wr32(device, 0x40011c, 0xffffffff);
 	nvkm_wr32(device, 0x400134, 0xffffffff);
 
-	nvkm_wr32(device, 0x400054, 0x34ce3464);
+	if (gr->func->init_400054)
+		gr->func->init_400054(gr);
 
 	gf100_gr_zbc_init(gr);
 
+	if (gr->func->init_4188a4)
+		gr->func->init_4188a4(gr);
+
 	return gf100_gr_init_ctxctl(gr);
 }
 
@@ -2053,13 +2246,27 @@
 
 static const struct gf100_gr_func
 gf100_gr = {
+	.oneinit_tiles = gf100_gr_oneinit_tiles,
+	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
 	.init = gf100_gr_init,
 	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+	.init_zcull = gf100_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+	.init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+	.init_40601c = gf100_gr_init_40601c,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_419eb4 = gf100_gr_init_419eb4,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_400054 = gf100_gr_init_400054,
+	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf100_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf100_grctx,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index c8ec3fd..dc46cf01 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -72,6 +72,12 @@
 	u32 l2;
 };
 
+struct gf100_gr_zbc_stencil {
+	u32 format;
+	u32 ds;
+	u32 l2;
+};
+
 struct gf100_gr {
 	const struct gf100_gr_func *func;
 	struct nvkm_gr base;
@@ -95,21 +101,33 @@
 
 	struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_CNT];
 	struct gf100_gr_zbc_depth zbc_depth[NVKM_LTC_MAX_ZBC_CNT];
+	struct gf100_gr_zbc_stencil zbc_stencil[NVKM_LTC_MAX_ZBC_CNT];
 
 	u8 rop_nr;
 	u8 gpc_nr;
 	u8 tpc_nr[GPC_MAX];
+	u8 tpc_max;
 	u8 tpc_total;
 	u8 ppc_nr[GPC_MAX];
 	u8 ppc_mask[GPC_MAX];
+	u8 ppc_tpc_mask[GPC_MAX][4];
 	u8 ppc_tpc_nr[GPC_MAX][4];
+	u8 ppc_tpc_min;
+	u8 ppc_tpc_max;
+
+	u8 screen_tile_row_offset;
+	u8 tile[TPC_MAX];
+
+	struct {
+		u8 gpc;
+		u8 tpc;
+	} sm[TPC_MAX];
+	u8 sm_nr;
 
 	struct gf100_gr_data mmio_data[4];
 	struct gf100_gr_mmio mmio_list[4096/8];
 	u32  size;
 	u32 *data;
-
-	u8 screen_tile_row_offset;
 };
 
 int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *,
@@ -118,14 +136,43 @@
 		  int, struct nvkm_gr **);
 void *gf100_gr_dtor(struct nvkm_gr *);
 
+struct gf100_gr_func_zbc {
+	void (*clear_color)(struct gf100_gr *, int zbc);
+	void (*clear_depth)(struct gf100_gr *, int zbc);
+	int (*stencil_get)(struct gf100_gr *, int format,
+			   const u32 ds, const u32 l2);
+	void (*clear_stencil)(struct gf100_gr *, int zbc);
+};
+
 struct gf100_gr_func {
 	void (*dtor)(struct gf100_gr *);
+	void (*oneinit_tiles)(struct gf100_gr *);
+	void (*oneinit_sm_id)(struct gf100_gr *);
 	int (*init)(struct gf100_gr *);
+	void (*init_419bd8)(struct gf100_gr *);
 	void (*init_gpc_mmu)(struct gf100_gr *);
-	void (*init_rop_active_fbps)(struct gf100_gr *);
-	void (*init_ppc_exceptions)(struct gf100_gr *);
-	void (*init_swdx_pes_mask)(struct gf100_gr *);
+	void (*init_r405a14)(struct gf100_gr *);
+	void (*init_bios)(struct gf100_gr *);
+	void (*init_vsc_stream_master)(struct gf100_gr *);
+	void (*init_zcull)(struct gf100_gr *);
 	void (*init_num_active_ltcs)(struct gf100_gr *);
+	void (*init_rop_active_fbps)(struct gf100_gr *);
+	void (*init_bios_2)(struct gf100_gr *);
+	void (*init_swdx_pes_mask)(struct gf100_gr *);
+	void (*init_fecs_exceptions)(struct gf100_gr *);
+	void (*init_ds_hww_esr_2)(struct gf100_gr *);
+	void (*init_40601c)(struct gf100_gr *);
+	void (*init_sked_hww_esr)(struct gf100_gr *);
+	void (*init_419cc0)(struct gf100_gr *);
+	void (*init_419eb4)(struct gf100_gr *);
+	void (*init_419c9c)(struct gf100_gr *);
+	void (*init_ppc_exceptions)(struct gf100_gr *);
+	void (*init_tex_hww_esr)(struct gf100_gr *, int gpc, int tpc);
+	void (*init_504430)(struct gf100_gr *, int gpc, int tpc);
+	void (*init_shader_exceptions)(struct gf100_gr *, int gpc, int tpc);
+	void (*init_400054)(struct gf100_gr *);
+	void (*init_4188a4)(struct gf100_gr *);
+	void (*trap_mp)(struct gf100_gr *, int gpc, int tpc);
 	void (*set_hww_esr_report_mask)(struct gf100_gr *);
 	const struct gf100_gr_pack *mmio;
 	struct {
@@ -135,26 +182,60 @@
 		struct gf100_gr_ucode *ucode;
 	} gpccs;
 	int (*rops)(struct gf100_gr *);
+	int gpc_nr;
+	int tpc_nr;
 	int ppc_nr;
 	const struct gf100_grctx_func *grctx;
 	const struct nvkm_therm_clkgate_pack *clkgate_pack;
+	const struct gf100_gr_func_zbc *zbc;
 	struct nvkm_sclass sclass[];
 };
 
-int gf100_gr_init(struct gf100_gr *);
 int gf100_gr_rops(struct gf100_gr *);
+void gf100_gr_oneinit_tiles(struct gf100_gr *);
+void gf100_gr_oneinit_sm_id(struct gf100_gr *);
+int gf100_gr_init(struct gf100_gr *);
+void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
+void gf100_gr_init_zcull(struct gf100_gr *);
+void gf100_gr_init_num_active_ltcs(struct gf100_gr *);
+void gf100_gr_init_fecs_exceptions(struct gf100_gr *);
+void gf100_gr_init_40601c(struct gf100_gr *);
+void gf100_gr_init_419cc0(struct gf100_gr *);
+void gf100_gr_init_419eb4(struct gf100_gr *);
+void gf100_gr_init_tex_hww_esr(struct gf100_gr *, int, int);
+void gf100_gr_init_shader_exceptions(struct gf100_gr *, int, int);
+void gf100_gr_init_400054(struct gf100_gr *);
+extern const struct gf100_gr_func_zbc gf100_gr_zbc;
 
-int gk104_gr_init(struct gf100_gr *);
+void gf117_gr_init_zcull(struct gf100_gr *);
+
+void gk104_gr_init_vsc_stream_master(struct gf100_gr *);
 void gk104_gr_init_rop_active_fbps(struct gf100_gr *);
 void gk104_gr_init_ppc_exceptions(struct gf100_gr *);
+void gk104_gr_init_sked_hww_esr(struct gf100_gr *);
+
+void gk110_gr_init_419eb4(struct gf100_gr *);
+
+void gm107_gr_init_504430(struct gf100_gr *, int, int);
+void gm107_gr_init_shader_exceptions(struct gf100_gr *, int, int);
+void gm107_gr_init_400054(struct gf100_gr *);
 
 int gk20a_gr_init(struct gf100_gr *);
 
-int gm200_gr_init(struct gf100_gr *);
+void gm200_gr_oneinit_tiles(struct gf100_gr *);
+void gm200_gr_oneinit_sm_id(struct gf100_gr *);
 int gm200_gr_rops(struct gf100_gr *);
+void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
+void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
 
-int gp100_gr_init(struct gf100_gr *);
 void gp100_gr_init_rop_active_fbps(struct gf100_gr *);
+void gp100_gr_init_fecs_exceptions(struct gf100_gr *);
+void gp100_gr_init_shader_exceptions(struct gf100_gr *, int, int);
+void gp100_gr_zbc_clear_color(struct gf100_gr *, int);
+void gp100_gr_zbc_clear_depth(struct gf100_gr *, int);
+
+void gp102_gr_init_swdx_pes_mask(struct gf100_gr *);
+extern const struct gf100_gr_func_zbc gp102_gr_zbc;
 
 #define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object)
 #include <core/object.h>
@@ -187,7 +268,7 @@
 struct gf100_gr_init {
 	u32 addr;
 	u8  count;
-	u8  pitch;
+	u32 pitch;
 	u32 data;
 };
 
@@ -257,6 +338,9 @@
 extern const struct gf100_gr_init gf100_gr_init_fe_1[];
 extern const struct gf100_gr_init gf100_gr_init_pe_1[];
 void gf100_gr_init_gpc_mmu(struct gf100_gr *);
+void gf100_gr_trap_mp(struct gf100_gr *, int, int);
+extern const struct nvkm_bitfield gf100_mp_global_error[];
+extern const struct nvkm_enum gf100_mp_warp_error[];
 
 extern const struct gf100_gr_init gf104_gr_init_ds_0[];
 extern const struct gf100_gr_init gf104_gr_init_tex_0[];
@@ -279,6 +363,7 @@
 extern const struct gf100_gr_init gf117_gr_init_cbm_0[];
 
 extern const struct gf100_gr_init gk104_gr_init_main_0[];
+extern const struct gf100_gr_init gk104_gr_init_gpc_unk_2[];
 extern const struct gf100_gr_init gk104_gr_init_tpccs_0[];
 extern const struct gf100_gr_init gk104_gr_init_pe_0[];
 extern const struct gf100_gr_init gk104_gr_init_be_0[];
@@ -306,8 +391,4 @@
 void gm107_gr_init_bios(struct gf100_gr *);
 
 void gm200_gr_init_gpc_mmu(struct gf100_gr *);
-
-void gp100_gr_init_num_active_ltcs(struct gf100_gr *gr);
-
-void gp102_gr_init_swdx_pes_mask(struct gf100_gr *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
index ec0f119..42c2fd9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
@@ -114,13 +114,27 @@
 
 static const struct gf100_gr_func
 gf104_gr = {
+	.oneinit_tiles = gf100_gr_oneinit_tiles,
+	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
 	.init = gf100_gr_init,
 	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+	.init_zcull = gf100_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+	.init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+	.init_40601c = gf100_gr_init_40601c,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_419eb4 = gf100_gr_init_419eb4,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_400054 = gf100_gr_init_400054,
+	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf104_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf104_grctx,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
index cc152eb..4731a46 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
@@ -103,15 +103,36 @@
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
+static void
+gf108_gr_init_r405a14(struct gf100_gr *gr)
+{
+	nvkm_wr32(gr->base.engine.subdev.device, 0x405a14, 0x80000000);
+}
+
 static const struct gf100_gr_func
 gf108_gr = {
+	.oneinit_tiles = gf100_gr_oneinit_tiles,
+	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
 	.init = gf100_gr_init,
 	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+	.init_r405a14 = gf108_gr_init_r405a14,
+	.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+	.init_zcull = gf100_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+	.init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+	.init_40601c = gf100_gr_init_40601c,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_419eb4 = gf100_gr_init_419eb4,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_400054 = gf100_gr_init_400054,
+	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf108_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf108_grctx,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
index 10d2d73..cdf759c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
@@ -86,13 +86,27 @@
 
 static const struct gf100_gr_func
 gf110_gr = {
+	.oneinit_tiles = gf100_gr_oneinit_tiles,
+	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
 	.init = gf100_gr_init,
 	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+	.init_zcull = gf100_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+	.init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+	.init_40601c = gf100_gr_init_40601c,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_419eb4 = gf100_gr_init_419eb4,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_400054 = gf100_gr_init_400054,
+	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf110_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf110_grctx,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
index ac09a07..a4158f8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
@@ -120,16 +120,58 @@
 	.data.size = sizeof(gf117_grgpc_data),
 };
 
+void
+gf117_gr_init_zcull(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
+	const u8 tile_nr = ALIGN(gr->tpc_total, 32);
+	u8 bank[GPC_MAX] = {}, gpc, i, j;
+	u32 data;
+
+	for (i = 0; i < tile_nr; i += 8) {
+		for (data = 0, j = 0; j < 8 && i + j < gr->tpc_total; j++) {
+			data |= bank[gr->tile[i + j]] << (j * 4);
+			bank[gr->tile[i + j]]++;
+		}
+		nvkm_wr32(device, GPC_BCAST(0x0980 + ((i / 8) * 4)), data);
+	}
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
+			  gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+							 gr->tpc_total);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
+	}
+
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+}
+
 static const struct gf100_gr_func
 gf117_gr = {
+	.oneinit_tiles = gf100_gr_oneinit_tiles,
+	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
 	.init = gf100_gr_init,
 	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+	.init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+	.init_40601c = gf100_gr_init_40601c,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_419eb4 = gf100_gr_init_419eb4,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_400054 = gf100_gr_init_400054,
+	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf117_gr_pack_mmio,
 	.fecs.ucode = &gf117_gr_fecs_ucode,
 	.gpccs.ucode = &gf117_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 1,
 	.grctx = &gf117_grctx,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
index 7f449ec..4197844 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
@@ -177,13 +177,27 @@
 
 static const struct gf100_gr_func
 gf119_gr = {
+	.oneinit_tiles = gf100_gr_oneinit_tiles,
+	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
 	.init = gf100_gr_init,
 	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+	.init_zcull = gf100_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+	.init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+	.init_40601c = gf100_gr_init_40601c,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_419eb4 = gf100_gr_init_419eb4,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_400054 = gf100_gr_init_400054,
+	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf119_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf119_grctx,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
index 1b52fcb..477fee3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
@@ -83,6 +83,12 @@
 };
 
 const struct gf100_gr_init
+gk104_gr_init_gpc_unk_2[] = {
+	{ 0x418884,   1, 0x04, 0x00000000 },
+	{}
+};
+
+const struct gf100_gr_init
 gk104_gr_init_tpccs_0[] = {
 	{ 0x419d0c,   1, 0x04, 0x00000000 },
 	{ 0x419d10,   1, 0x04, 0x00000014 },
@@ -160,6 +166,7 @@
 	{ gf119_gr_init_gpm_0 },
 	{ gk104_gr_init_gpc_unk_1 },
 	{ gf100_gr_init_gcc_0 },
+	{ gk104_gr_init_gpc_unk_2 },
 	{ gk104_gr_init_tpccs_0 },
 	{ gf119_gr_init_tex_0 },
 	{ gk104_gr_init_pe_0 },
@@ -381,6 +388,21 @@
  ******************************************************************************/
 
 void
+gk104_gr_init_sked_hww_esr(struct gf100_gr *gr)
+{
+	nvkm_wr32(gr->base.engine.subdev.device, 0x407020, 0x40000000);
+}
+
+static void
+gk104_gr_init_fecs_exceptions(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, 0x409ffc, 0x00000000);
+	nvkm_wr32(device, 0x409c14, 0x00003e3e);
+	nvkm_wr32(device, 0x409c24, 0x000f0001);
+}
+
+void
 gk104_gr_init_rop_active_fbps(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -404,112 +426,11 @@
 	}
 }
 
-int
-gk104_gr_init(struct gf100_gr *gr)
+void
+gk104_gr_init_vsc_stream_master(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
-	u32 data[TPC_MAX / 8] = {};
-	u8  tpcnr[GPC_MAX];
-	int gpc, tpc, rop;
-	int i;
-
-	gr->func->init_gpc_mmu(gr);
-
-	gf100_gr_mmio(gr, gr->func->mmio);
-	if (gr->func->clkgate_pack)
-		nvkm_therm_clkgate_init(gr->base.engine.subdev.device->therm,
-					gr->func->clkgate_pack);
-
 	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
-
-	memset(data, 0x00, sizeof(data));
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while (!tpcnr[gpc]);
-		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
-		data[i / 8] |= tpc << ((i % 8) * 4);
-	}
-
-	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
-	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
-	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
-	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
-			  gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
-							 gr->tpc_total);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
-	}
-
-	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
-	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
-
-	gr->func->init_rop_active_fbps(gr);
-
-	nvkm_wr32(device, 0x400500, 0x00010001);
-
-	nvkm_wr32(device, 0x400100, 0xffffffff);
-	nvkm_wr32(device, 0x40013c, 0xffffffff);
-
-	nvkm_wr32(device, 0x409ffc, 0x00000000);
-	nvkm_wr32(device, 0x409c14, 0x00003e3e);
-	nvkm_wr32(device, 0x409c24, 0x000f0001);
-	nvkm_wr32(device, 0x404000, 0xc0000000);
-	nvkm_wr32(device, 0x404600, 0xc0000000);
-	nvkm_wr32(device, 0x408030, 0xc0000000);
-	nvkm_wr32(device, 0x404490, 0xc0000000);
-	nvkm_wr32(device, 0x406018, 0xc0000000);
-	nvkm_wr32(device, 0x407020, 0x40000000);
-	nvkm_wr32(device, 0x405840, 0xc0000000);
-	nvkm_wr32(device, 0x405844, 0x00ffffff);
-	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
-	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
-
-	gr->func->init_ppc_exceptions(gr);
-
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
-		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
-		}
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
-	}
-
-	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
-	}
-
-	nvkm_wr32(device, 0x400108, 0xffffffff);
-	nvkm_wr32(device, 0x400138, 0xffffffff);
-	nvkm_wr32(device, 0x400118, 0xffffffff);
-	nvkm_wr32(device, 0x400130, 0xffffffff);
-	nvkm_wr32(device, 0x40011c, 0xffffffff);
-	nvkm_wr32(device, 0x400134, 0xffffffff);
-
-	nvkm_wr32(device, 0x400054, 0x34ce3464);
-
-	gf100_gr_zbc_init(gr);
-
-	return gf100_gr_init_ctxctl(gr);
 }
 
 #include "fuc/hubgk104.fuc3.h"
@@ -534,10 +455,23 @@
 
 static const struct gf100_gr_func
 gk104_gr = {
-	.init = gk104_gr_init,
+	.oneinit_tiles = gf100_gr_oneinit_tiles,
+	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
 	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
 	.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+	.init_fecs_exceptions = gk104_gr_init_fecs_exceptions,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_400054 = gf100_gr_init_400054,
+	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gk104_gr_pack_mmio,
 	.fecs.ucode = &gk104_gr_fecs_ucode,
 	.gpccs.ucode = &gk104_gr_gpccs_ucode,
@@ -545,6 +479,7 @@
 	.ppc_nr = 1,
 	.grctx = &gk104_grctx,
 	.clkgate_pack = gk104_clkgate_pack,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, KEPLER_INLINE_TO_MEMORY_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
index 4da916a..7cd628c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
@@ -143,6 +143,7 @@
 	{ gf119_gr_init_gpm_0 },
 	{ gk110_gr_init_gpc_unk_1 },
 	{ gf100_gr_init_gcc_0 },
+	{ gk104_gr_init_gpc_unk_2 },
 	{ gk104_gr_init_tpccs_0 },
 	{ gk110_gr_init_tex_0 },
 	{ gk104_gr_init_pe_0 },
@@ -334,12 +335,39 @@
 	.data.size = sizeof(gk110_grgpc_data),
 };
 
+void
+gk110_gr_init_419eb4(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
+	nvkm_mask(device, 0x419eb4, 0x00002000, 0x00002000);
+	nvkm_mask(device, 0x419eb4, 0x00004000, 0x00004000);
+	nvkm_mask(device, 0x419eb4, 0x00008000, 0x00008000);
+	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00000000);
+	nvkm_mask(device, 0x419eb4, 0x00002000, 0x00000000);
+	nvkm_mask(device, 0x419eb4, 0x00004000, 0x00000000);
+	nvkm_mask(device, 0x419eb4, 0x00008000, 0x00000000);
+}
+
 static const struct gf100_gr_func
 gk110_gr = {
-	.init = gk104_gr_init,
+	.oneinit_tiles = gf100_gr_oneinit_tiles,
+	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
 	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
 	.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+	.init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_419eb4 = gk110_gr_init_419eb4,
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_400054 = gf100_gr_init_400054,
+	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gk110_gr_pack_mmio,
 	.fecs.ucode = &gk110_gr_fecs_ucode,
 	.gpccs.ucode = &gk110_gr_gpccs_ucode,
@@ -347,6 +375,7 @@
 	.ppc_nr = 2,
 	.grctx = &gk110_grctx,
 	.clkgate_pack = gk110_clkgate_pack,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
index 1912c0b..a38faa2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
@@ -82,6 +82,7 @@
 	{ gf119_gr_init_gpm_0 },
 	{ gk110_gr_init_gpc_unk_1 },
 	{ gf100_gr_init_gcc_0 },
+	{ gk104_gr_init_gpc_unk_2 },
 	{ gk104_gr_init_tpccs_0 },
 	{ gk110_gr_init_tex_0 },
 	{ gk104_gr_init_pe_0 },
@@ -102,16 +103,30 @@
 
 static const struct gf100_gr_func
 gk110b_gr = {
-	.init = gk104_gr_init,
+	.oneinit_tiles = gf100_gr_oneinit_tiles,
+	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
 	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
 	.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+	.init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_419eb4 = gk110_gr_init_419eb4,
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_400054 = gf100_gr_init_400054,
+	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gk110b_gr_pack_mmio,
 	.fecs.ucode = &gk110_gr_fecs_ucode,
 	.gpccs.ucode = &gk110_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 2,
 	.grctx = &gk110b_grctx,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
index 1fc2581..5845666 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
@@ -121,6 +121,7 @@
 	{ gf119_gr_init_gpm_0 },
 	{ gk110_gr_init_gpc_unk_1 },
 	{ gf100_gr_init_gcc_0 },
+	{ gk104_gr_init_gpc_unk_2 },
 	{ gk104_gr_init_tpccs_0 },
 	{ gk208_gr_init_tex_0 },
 	{ gk104_gr_init_pe_0 },
@@ -161,16 +162,29 @@
 
 static const struct gf100_gr_func
 gk208_gr = {
-	.init = gk104_gr_init,
+	.oneinit_tiles = gf100_gr_oneinit_tiles,
+	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
 	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
 	.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+	.init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_419cc0 = gf100_gr_init_419cc0,
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_400054 = gf100_gr_init_400054,
+	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gk208_gr_pack_mmio,
 	.fecs.ucode = &gk208_gr_fecs_ucode,
 	.gpccs.ucode = &gk208_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 1,
 	.grctx = &gk208_grctx,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
index de8b806..500cb08 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -219,11 +219,7 @@
 gk20a_gr_init(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
-	u32 data[TPC_MAX / 8] = {};
-	u8  tpcnr[GPC_MAX];
-	int gpc, tpc;
-	int ret, i;
+	int ret;
 
 	/* Clear SCC RAM */
 	nvkm_wr32(device, 0x40802c, 0x1);
@@ -246,31 +242,7 @@
 	nvkm_mask(device, 0x503018, 0x1, 0x1);
 
 	/* Zcull init */
-	memset(data, 0x00, sizeof(data));
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while (!tpcnr[gpc]);
-		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
-		data[i / 8] |= tpc << ((i % 8) * 4);
-	}
-
-	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
-	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
-	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
-	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
-			  gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
-			  gr->tpc_total);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
-	}
-
-	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+	gr->func->init_zcull(gr);
 
 	gr->func->init_rop_active_fbps(gr);
 
@@ -310,12 +282,17 @@
 
 static const struct gf100_gr_func
 gk20a_gr = {
+	.oneinit_tiles = gf100_gr_oneinit_tiles,
+	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
 	.init = gk20a_gr_init,
+	.init_zcull = gf117_gr_init_zcull,
 	.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+	.trap_mp = gf100_gr_trap_mp,
 	.set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 1,
 	.grctx = &gk20a_grctx,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, KEPLER_INLINE_TO_MEMORY_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
index 2c67fac..92e31d3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
@@ -25,6 +25,8 @@
 #include "ctxgf100.h"
 
 #include <subdev/bios.h>
+#include <subdev/bios/bit.h>
+#include <subdev/bios/init.h>
 #include <subdev/bios/P0260.h>
 #include <subdev/fb.h>
 
@@ -36,6 +38,10 @@
 
 static const struct gf100_gr_init
 gm107_gr_init_main_0[] = {
+	{ 0x40880c,   1, 0x04, 0x00000000 },
+	{ 0x408910,   1, 0x04, 0x00000000 },
+	{ 0x408984,   1, 0x04, 0x00000000 },
+	{ 0x41a8a0,   1, 0x04, 0x00000000 },
 	{ 0x400080,   1, 0x04, 0x003003c2 },
 	{ 0x400088,   1, 0x04, 0x0001bfe7 },
 	{ 0x40008c,   1, 0x04, 0x00060000 },
@@ -210,14 +216,13 @@
 static const struct gf100_gr_init
 gm107_gr_init_be_0[] = {
 	{ 0x408890,   1, 0x04, 0x000000ff },
-	{ 0x40880c,   1, 0x04, 0x00000000 },
 	{ 0x408850,   1, 0x04, 0x00000004 },
 	{ 0x408878,   1, 0x04, 0x00c81603 },
 	{ 0x40887c,   1, 0x04, 0x80543432 },
 	{ 0x408880,   1, 0x04, 0x0010581e },
 	{ 0x408884,   1, 0x04, 0x00001205 },
 	{ 0x408974,   1, 0x04, 0x000000ff },
-	{ 0x408910,   9, 0x04, 0x00000000 },
+	{ 0x408914,   8, 0x04, 0x00000000 },
 	{ 0x408950,   1, 0x04, 0x00000000 },
 	{ 0x408954,   1, 0x04, 0x0000ffff },
 	{ 0x408958,   1, 0x04, 0x00000034 },
@@ -227,7 +232,6 @@
 	{ 0x408968,   1, 0x04, 0x02808833 },
 	{ 0x40896c,   1, 0x04, 0x01f02438 },
 	{ 0x408970,   1, 0x04, 0x00012c00 },
-	{ 0x408984,   1, 0x04, 0x00000000 },
 	{ 0x408988,   1, 0x04, 0x08040201 },
 	{ 0x40898c,   1, 0x04, 0x80402010 },
 	{}
@@ -260,6 +264,7 @@
 	{ gf100_gr_init_gpm_0 },
 	{ gm107_gr_init_gpc_unk_1 },
 	{ gf100_gr_init_gcc_0 },
+	{ gk104_gr_init_gpc_unk_2 },
 	{ gm107_gr_init_tpccs_0 },
 	{ gm107_gr_init_tex_0 },
 	{ gm107_gr_init_pe_0 },
@@ -280,6 +285,52 @@
  ******************************************************************************/
 
 void
+gm107_gr_init_400054(struct gf100_gr *gr)
+{
+	nvkm_wr32(gr->base.engine.subdev.device, 0x400054, 0x2c350f63);
+}
+
+void
+gm107_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
+}
+
+void
+gm107_gr_init_504430(struct gf100_gr *gr, int gpc, int tpc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
+}
+
+static void
+gm107_gr_init_bios_2(struct gf100_gr *gr)
+{
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_bios *bios = device->bios;
+	struct bit_entry bit_P;
+	if (!bit_entry(bios, 'P', &bit_P) &&
+	    bit_P.version == 2 && bit_P.length >= 0x2c) {
+		u32 data = nvbios_rd32(bios, bit_P.offset + 0x28);
+		if (data) {
+			u8 ver = nvbios_rd08(bios, data + 0x00);
+			u8 hdr = nvbios_rd08(bios, data + 0x01);
+			if (ver == 0x20 && hdr >= 8) {
+				data = nvbios_rd32(bios, data + 0x04);
+				if (data) {
+					u32 save = nvkm_rd32(device, 0x619444);
+					nvbios_init(subdev, data);
+					nvkm_wr32(device, 0x619444, save);
+				}
+			}
+		}
+	}
+}
+
+void
 gm107_gr_init_bios(struct gf100_gr *gr)
 {
 	static const struct {
@@ -308,115 +359,17 @@
 	}
 }
 
-static int
-gm107_gr_init(struct gf100_gr *gr)
+static void
+gm107_gr_init_gpc_mmu(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct nvkm_fb *fb = device->fb;
-	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
-	u32 data[TPC_MAX / 8] = {};
-	u8  tpcnr[GPC_MAX];
-	int gpc, tpc, rop;
-	int i;
 
 	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
 	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
 	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
 	nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(fb->mmu_wr) >> 8);
 	nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(fb->mmu_rd) >> 8);
-
-	gf100_gr_mmio(gr, gr->func->mmio);
-
-	gm107_gr_init_bios(gr);
-
-	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
-
-	memset(data, 0x00, sizeof(data));
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while (!tpcnr[gpc]);
-		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
-		data[i / 8] |= tpc << ((i % 8) * 4);
-	}
-
-	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
-	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
-	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
-	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
-			  gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
-							 gr->tpc_total);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
-	}
-
-	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
-	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
-
-	gr->func->init_rop_active_fbps(gr);
-
-	nvkm_wr32(device, 0x400500, 0x00010001);
-
-	nvkm_wr32(device, 0x400100, 0xffffffff);
-	nvkm_wr32(device, 0x40013c, 0xffffffff);
-	nvkm_wr32(device, 0x400124, 0x00000002);
-	nvkm_wr32(device, 0x409c24, 0x000e0000);
-
-	nvkm_wr32(device, 0x404000, 0xc0000000);
-	nvkm_wr32(device, 0x404600, 0xc0000000);
-	nvkm_wr32(device, 0x408030, 0xc0000000);
-	nvkm_wr32(device, 0x404490, 0xc0000000);
-	nvkm_wr32(device, 0x406018, 0xc0000000);
-	nvkm_wr32(device, 0x407020, 0x40000000);
-	nvkm_wr32(device, 0x405840, 0xc0000000);
-	nvkm_wr32(device, 0x405844, 0x00ffffff);
-	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
-
-	gr->func->init_ppc_exceptions(gr);
-
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
-		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
-		}
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
-	}
-
-	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
-	}
-
-	nvkm_wr32(device, 0x400108, 0xffffffff);
-	nvkm_wr32(device, 0x400138, 0xffffffff);
-	nvkm_wr32(device, 0x400118, 0xffffffff);
-	nvkm_wr32(device, 0x400130, 0xffffffff);
-	nvkm_wr32(device, 0x40011c, 0xffffffff);
-	nvkm_wr32(device, 0x400134, 0xffffffff);
-
-	nvkm_wr32(device, 0x400054, 0x2c350f63);
-
-	gf100_gr_zbc_init(gr);
-
-	return gf100_gr_init_ctxctl(gr);
 }
 
 #include "fuc/hubgm107.fuc5.h"
@@ -441,15 +394,32 @@
 
 static const struct gf100_gr_func
 gm107_gr = {
-	.init = gm107_gr_init,
+	.oneinit_tiles = gf100_gr_oneinit_tiles,
+	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
+	.init_gpc_mmu = gm107_gr_init_gpc_mmu,
+	.init_bios = gm107_gr_init_bios,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
 	.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+	.init_bios_2 = gm107_gr_init_bios_2,
+	.init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_419cc0 = gf100_gr_init_419cc0,
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_504430 = gm107_gr_init_504430,
+	.init_shader_exceptions = gm107_gr_init_shader_exceptions,
+	.init_400054 = gm107_gr_init_400054,
+	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gm107_gr_pack_mmio,
 	.fecs.ucode = &gm107_gr_fecs_ucode,
 	.gpccs.ucode = &gm107_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 2,
 	.grctx = &gm107_grctx,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
index 6435f12..eff3066 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
@@ -39,6 +39,22 @@
 }
 
 void
+gm200_gr_init_ds_hww_esr_2(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, 0x405848, 0xc0000000);
+	nvkm_mask(device, 0x40584c, 0x00000001, 0x00000001);
+}
+
+void
+gm200_gr_init_num_active_ltcs(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+	nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
+}
+
+void
 gm200_gr_init_gpc_mmu(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -61,111 +77,51 @@
 	nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
 }
 
-int
-gm200_gr_init(struct gf100_gr *gr)
+static u8
+gm200_gr_tile_map_6_24[] = {
+	0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2, 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2,
+};
+
+static u8
+gm200_gr_tile_map_4_16[] = {
+	0, 1, 2, 3, 2, 3, 0, 1, 3, 0, 1, 2, 1, 2, 3, 0,
+};
+
+static u8
+gm200_gr_tile_map_2_8[] = {
+	0, 1, 1, 0, 0, 1, 1, 0,
+};
+
+void
+gm200_gr_oneinit_sm_id(struct gf100_gr *gr)
 {
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
-	u32 data[TPC_MAX / 8] = {};
-	u8  tpcnr[GPC_MAX];
-	int gpc, tpc, rop;
-	int i;
+	/*XXX: There's a different algorithm here I've not yet figured out. */
+	gf100_gr_oneinit_sm_id(gr);
+}
 
-	gr->func->init_gpc_mmu(gr);
-
-	gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
-
-	gm107_gr_init_bios(gr);
-
-	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
-
-	memset(data, 0x00, sizeof(data));
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while (!tpcnr[gpc]);
-		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
-		data[i / 8] |= tpc << ((i % 8) * 4);
+void
+gm200_gr_oneinit_tiles(struct gf100_gr *gr)
+{
+	/*XXX: Not sure what this is about.  The algorithm from NVGPU
+	 *     seems to work for all boards I tried from earlier (and
+	 *     later) GPUs except in these specific configurations.
+	 *
+	 *     Let's just hardcode them for now.
+	 */
+	if (gr->gpc_nr == 2 && gr->tpc_total == 8) {
+		memcpy(gr->tile, gm200_gr_tile_map_2_8, gr->tpc_total);
+		gr->screen_tile_row_offset = 1;
+	} else
+	if (gr->gpc_nr == 4 && gr->tpc_total == 16) {
+		memcpy(gr->tile, gm200_gr_tile_map_4_16, gr->tpc_total);
+		gr->screen_tile_row_offset = 4;
+	} else
+	if (gr->gpc_nr == 6 && gr->tpc_total == 24) {
+		memcpy(gr->tile, gm200_gr_tile_map_6_24, gr->tpc_total);
+		gr->screen_tile_row_offset = 5;
+	} else {
+		gf100_gr_oneinit_tiles(gr);
 	}
-
-	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
-	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
-	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
-	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
-			  gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
-							 gr->tpc_total);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
-	}
-
-	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
-	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
-	nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
-
-	gr->func->init_rop_active_fbps(gr);
-
-	nvkm_wr32(device, 0x400500, 0x00010001);
-	nvkm_wr32(device, 0x400100, 0xffffffff);
-	nvkm_wr32(device, 0x40013c, 0xffffffff);
-	nvkm_wr32(device, 0x400124, 0x00000002);
-	nvkm_wr32(device, 0x409c24, 0x000e0000);
-	nvkm_wr32(device, 0x405848, 0xc0000000);
-	nvkm_wr32(device, 0x40584c, 0x00000001);
-	nvkm_wr32(device, 0x404000, 0xc0000000);
-	nvkm_wr32(device, 0x404600, 0xc0000000);
-	nvkm_wr32(device, 0x408030, 0xc0000000);
-	nvkm_wr32(device, 0x404490, 0xc0000000);
-	nvkm_wr32(device, 0x406018, 0xc0000000);
-	nvkm_wr32(device, 0x407020, 0x40000000);
-	nvkm_wr32(device, 0x405840, 0xc0000000);
-	nvkm_wr32(device, 0x405844, 0x00ffffff);
-	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
-
-	gr->func->init_ppc_exceptions(gr);
-
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
-		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
-		}
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
-	}
-
-	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
-	}
-
-	nvkm_wr32(device, 0x400108, 0xffffffff);
-	nvkm_wr32(device, 0x400138, 0xffffffff);
-	nvkm_wr32(device, 0x400118, 0xffffffff);
-	nvkm_wr32(device, 0x400130, 0xffffffff);
-	nvkm_wr32(device, 0x40011c, 0xffffffff);
-	nvkm_wr32(device, 0x400134, 0xffffffff);
-
-	nvkm_wr32(device, 0x400054, 0x2c350f63);
-
-	gf100_gr_zbc_init(gr);
-
-	return gf100_gr_init_ctxctl(gr);
 }
 
 int
@@ -208,13 +164,30 @@
 
 static const struct gf100_gr_func
 gm200_gr = {
-	.init = gm200_gr_init,
+	.oneinit_tiles = gm200_gr_oneinit_tiles,
+	.oneinit_sm_id = gm200_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
 	.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+	.init_bios = gm107_gr_init_bios,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gm200_gr_init_num_active_ltcs,
 	.init_rop_active_fbps = gm200_gr_init_rop_active_fbps,
+	.init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+	.init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_419cc0 = gf100_gr_init_419cc0,
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_504430 = gm107_gr_init_504430,
+	.init_shader_exceptions = gm107_gr_init_shader_exceptions,
+	.init_400054 = gm107_gr_init_400054,
+	.trap_mp = gf100_gr_trap_mp,
 	.rops = gm200_gr_rops,
+	.tpc_nr = 4,
 	.ppc_nr = 2,
 	.grctx = &gm200_grctx,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
index 69479af..a667770 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
@@ -64,13 +64,18 @@
 
 static const struct gf100_gr_func
 gm20b_gr = {
+	.oneinit_tiles = gm200_gr_oneinit_tiles,
+	.oneinit_sm_id = gm200_gr_oneinit_sm_id,
 	.init = gk20a_gr_init,
+	.init_zcull = gf117_gr_init_zcull,
 	.init_gpc_mmu = gm20b_gr_init_gpc_mmu,
 	.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+	.trap_mp = gf100_gr_trap_mp,
 	.set_hww_esr_report_mask = gm20b_gr_set_hww_esr_report_mask,
 	.rops = gm200_gr_rops,
 	.ppc_nr = 1,
 	.grctx = &gm20b_grctx,
+	.zbc = &gf100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
index 867a5f7..9d0521c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
@@ -29,6 +29,66 @@
 /*******************************************************************************
  * PGRAPH engine/subdev functions
  ******************************************************************************/
+void
+gp100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const int znum =  zbc - 1;
+	const u32 zoff = znum * 4;
+
+	if (gr->zbc_color[zbc].format) {
+		nvkm_wr32(device, 0x418010 + zoff, gr->zbc_color[zbc].ds[0]);
+		nvkm_wr32(device, 0x41804c + zoff, gr->zbc_color[zbc].ds[1]);
+		nvkm_wr32(device, 0x418088 + zoff, gr->zbc_color[zbc].ds[2]);
+		nvkm_wr32(device, 0x4180c4 + zoff, gr->zbc_color[zbc].ds[3]);
+	}
+
+	nvkm_mask(device, 0x418100 + ((znum / 4) * 4),
+			  0x0000007f << ((znum % 4) * 7),
+			  gr->zbc_color[zbc].format << ((znum % 4) * 7));
+}
+
+void
+gp100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const int znum =  zbc - 1;
+	const u32 zoff = znum * 4;
+
+	if (gr->zbc_depth[zbc].format)
+		nvkm_wr32(device, 0x418110 + zoff, gr->zbc_depth[zbc].ds);
+	nvkm_mask(device, 0x41814c + ((znum / 4) * 4),
+			  0x0000007f << ((znum % 4) * 7),
+			  gr->zbc_depth[zbc].format << ((znum % 4) * 7));
+}
+
+static const struct gf100_gr_func_zbc
+gp100_gr_zbc = {
+	.clear_color = gp100_gr_zbc_clear_color,
+	.clear_depth = gp100_gr_zbc_clear_depth,
+};
+
+void
+gp100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000105);
+}
+
+static void
+gp100_gr_init_419c9c(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x419c9c, 0x00010000, 0x00010000);
+	nvkm_mask(device, 0x419c9c, 0x00020000, 0x00020000);
+}
+
+void
+gp100_gr_init_fecs_exceptions(struct gf100_gr *gr)
+{
+	nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x000f0002);
+}
 
 void
 gp100_gr_init_rop_active_fbps(struct gf100_gr *gr)
@@ -40,132 +100,32 @@
 	nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
 }
 
-void
-gp100_gr_init_num_active_ltcs(struct gf100_gr *gr)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-
-	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
-	nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
-}
-
-int
-gp100_gr_init(struct gf100_gr *gr)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
-	u32 data[TPC_MAX / 8] = {};
-	u8  tpcnr[GPC_MAX];
-	int gpc, tpc, rop;
-	int i;
-
-	gr->func->init_gpc_mmu(gr);
-
-	gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
-
-	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
-
-	memset(data, 0x00, sizeof(data));
-	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
-		do {
-			gpc = (gpc + 1) % gr->gpc_nr;
-		} while (!tpcnr[gpc]);
-		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
-		data[i / 8] |= tpc << ((i % 8) * 4);
-	}
-
-	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
-	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
-	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
-	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
-			  gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
-							 gr->tpc_total);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
-	}
-
-	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
-	gr->func->init_num_active_ltcs(gr);
-
-	gr->func->init_rop_active_fbps(gr);
-	if (gr->func->init_swdx_pes_mask)
-		gr->func->init_swdx_pes_mask(gr);
-
-	nvkm_wr32(device, 0x400500, 0x00010001);
-	nvkm_wr32(device, 0x400100, 0xffffffff);
-	nvkm_wr32(device, 0x40013c, 0xffffffff);
-	nvkm_wr32(device, 0x400124, 0x00000002);
-	nvkm_wr32(device, 0x409c24, 0x000f0002);
-	nvkm_wr32(device, 0x405848, 0xc0000000);
-	nvkm_mask(device, 0x40584c, 0x00000000, 0x00000001);
-	nvkm_wr32(device, 0x404000, 0xc0000000);
-	nvkm_wr32(device, 0x404600, 0xc0000000);
-	nvkm_wr32(device, 0x408030, 0xc0000000);
-	nvkm_wr32(device, 0x404490, 0xc0000000);
-	nvkm_wr32(device, 0x406018, 0xc0000000);
-	nvkm_wr32(device, 0x407020, 0x40000000);
-	nvkm_wr32(device, 0x405840, 0xc0000000);
-	nvkm_wr32(device, 0x405844, 0x00ffffff);
-	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
-
-	nvkm_mask(device, 0x419c9c, 0x00010000, 0x00010000);
-	nvkm_mask(device, 0x419c9c, 0x00020000, 0x00020000);
-
-	gr->func->init_ppc_exceptions(gr);
-
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
-		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
-			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000105);
-		}
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
-	}
-
-	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
-	}
-
-	nvkm_wr32(device, 0x400108, 0xffffffff);
-	nvkm_wr32(device, 0x400138, 0xffffffff);
-	nvkm_wr32(device, 0x400118, 0xffffffff);
-	nvkm_wr32(device, 0x400130, 0xffffffff);
-	nvkm_wr32(device, 0x40011c, 0xffffffff);
-	nvkm_wr32(device, 0x400134, 0xffffffff);
-
-	gf100_gr_zbc_init(gr);
-
-	return gf100_gr_init_ctxctl(gr);
-}
-
 static const struct gf100_gr_func
 gp100_gr = {
-	.init = gp100_gr_init,
+	.oneinit_tiles = gm200_gr_oneinit_tiles,
+	.oneinit_sm_id = gm200_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
 	.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gm200_gr_init_num_active_ltcs,
 	.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
+	.init_fecs_exceptions = gp100_gr_init_fecs_exceptions,
+	.init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_419c9c = gp100_gr_init_419c9c,
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
-	.init_num_active_ltcs = gp100_gr_init_num_active_ltcs,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_504430 = gm107_gr_init_504430,
+	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.trap_mp = gf100_gr_trap_mp,
 	.rops = gm200_gr_rops,
+	.gpc_nr = 6,
+	.tpc_nr = 5,
 	.ppc_nr = 2,
 	.grctx = &gp100_grctx,
+	.zbc = &gp100_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
index 61e3a0b..37f7d73 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
@@ -26,6 +26,62 @@
 
 #include <nvif/class.h>
 
+static void
+gp102_gr_zbc_clear_stencil(struct gf100_gr *gr, int zbc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const int znum =  zbc - 1;
+	const u32 zoff = znum * 4;
+
+	if (gr->zbc_stencil[zbc].format)
+		nvkm_wr32(device, 0x41815c + zoff, gr->zbc_stencil[zbc].ds);
+	nvkm_mask(device, 0x418198 + ((znum / 4) * 4),
+			  0x0000007f << ((znum % 4) * 7),
+			  gr->zbc_stencil[zbc].format << ((znum % 4) * 7));
+}
+
+static int
+gp102_gr_zbc_stencil_get(struct gf100_gr *gr, int format,
+			 const u32 ds, const u32 l2)
+{
+	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
+	int zbc = -ENOSPC, i;
+
+	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+		if (gr->zbc_stencil[i].format) {
+			if (gr->zbc_stencil[i].format != format)
+				continue;
+			if (gr->zbc_stencil[i].ds != ds)
+				continue;
+			if (gr->zbc_stencil[i].l2 != l2) {
+				WARN_ON(1);
+				return -EINVAL;
+			}
+			return i;
+		} else {
+			zbc = (zbc < 0) ? i : zbc;
+		}
+	}
+
+	if (zbc < 0)
+		return zbc;
+
+	gr->zbc_stencil[zbc].format = format;
+	gr->zbc_stencil[zbc].ds = ds;
+	gr->zbc_stencil[zbc].l2 = l2;
+	nvkm_ltc_zbc_stencil_get(ltc, zbc, l2);
+	gr->func->zbc->clear_stencil(gr, zbc);
+	return zbc;
+}
+
+const struct gf100_gr_func_zbc
+gp102_gr_zbc = {
+	.clear_color = gp100_gr_zbc_clear_color,
+	.clear_depth = gp100_gr_zbc_clear_depth,
+	.stencil_get = gp102_gr_zbc_stencil_get,
+	.clear_stencil = gp102_gr_zbc_clear_stencil,
+};
+
 void
 gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr)
 {
@@ -42,15 +98,30 @@
 
 static const struct gf100_gr_func
 gp102_gr = {
-	.init = gp100_gr_init,
+	.oneinit_tiles = gm200_gr_oneinit_tiles,
+	.oneinit_sm_id = gm200_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
 	.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gm200_gr_init_num_active_ltcs,
 	.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
-	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask,
-	.init_num_active_ltcs = gp100_gr_init_num_active_ltcs,
+	.init_fecs_exceptions = gp100_gr_init_fecs_exceptions,
+	.init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_504430 = gm107_gr_init_504430,
+	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.trap_mp = gf100_gr_trap_mp,
 	.rops = gm200_gr_rops,
+	.gpc_nr = 6,
+	.tpc_nr = 5,
 	.ppc_nr = 3,
 	.grctx = &gp102_grctx,
+	.zbc = &gp102_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
new file mode 100644
index 0000000..4573c91
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gf100.h"
+#include "ctxgf100.h"
+
+#include <nvif/class.h>
+
+static const struct gf100_gr_func
+gp104_gr = {
+	.oneinit_tiles = gm200_gr_oneinit_tiles,
+	.oneinit_sm_id = gm200_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
+	.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gm200_gr_init_num_active_ltcs,
+	.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
+	.init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask,
+	.init_fecs_exceptions = gp100_gr_init_fecs_exceptions,
+	.init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_504430 = gm107_gr_init_504430,
+	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.trap_mp = gf100_gr_trap_mp,
+	.rops = gm200_gr_rops,
+	.gpc_nr = 6,
+	.tpc_nr = 5,
+	.ppc_nr = 3,
+	.grctx = &gp104_grctx,
+	.zbc = &gp102_gr_zbc,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, PASCAL_B, &gf100_fermi },
+		{ -1, -1, PASCAL_COMPUTE_B },
+		{}
+	}
+};
+
+int
+gp104_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gm200_gr_new_(&gp104_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
index f727232..812aba9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
@@ -28,15 +28,30 @@
 
 static const struct gf100_gr_func
 gp107_gr = {
-	.init = gp100_gr_init,
+	.oneinit_tiles = gm200_gr_oneinit_tiles,
+	.oneinit_sm_id = gm200_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
 	.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gm200_gr_init_num_active_ltcs,
 	.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
-	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask,
-	.init_num_active_ltcs = gp100_gr_init_num_active_ltcs,
+	.init_fecs_exceptions = gp100_gr_init_fecs_exceptions,
+	.init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_419cc0 = gf100_gr_init_419cc0,
+	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_504430 = gm107_gr_init_504430,
+	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.trap_mp = gf100_gr_trap_mp,
 	.rops = gm200_gr_rops,
+	.gpc_nr = 2,
+	.tpc_nr = 3,
 	.ppc_nr = 1,
 	.grctx = &gp107_grctx,
+	.zbc = &gp102_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
index 5f3d161..303dced 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
@@ -25,24 +25,31 @@
 
 #include <nvif/class.h>
 
-static void
-gp10b_gr_init_num_active_ltcs(struct gf100_gr *gr)
-{
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-
-	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
-}
-
 static const struct gf100_gr_func
 gp10b_gr = {
-	.init = gp100_gr_init,
+	.oneinit_tiles = gm200_gr_oneinit_tiles,
+	.oneinit_sm_id = gm200_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
 	.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
 	.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
+	.init_fecs_exceptions = gp100_gr_init_fecs_exceptions,
+	.init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_419cc0 = gf100_gr_init_419cc0,
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
-	.init_num_active_ltcs = gp10b_gr_init_num_active_ltcs,
+	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+	.init_504430 = gm107_gr_init_504430,
+	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.trap_mp = gf100_gr_trap_mp,
 	.rops = gm200_gr_rops,
+	.gpc_nr = 1,
+	.tpc_nr = 2,
 	.ppc_nr = 1,
 	.grctx = &gp102_grctx,
+	.zbc = &gp102_gr_zbc,
 	.sclass = {
 		{ -1, -1, FERMI_TWOD_A },
 		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c
new file mode 100644
index 0000000..19173ea
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gf100.h"
+#include "ctxgf100.h"
+
+#include <nvif/class.h>
+
+static void
+gv100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
+{
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x730));
+	u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x734));
+	const struct nvkm_enum *warp;
+	char glob[128];
+
+	nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
+	warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);
+
+	nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
+			   "global %08x [%s] warp %04x [%s]\n",
+		   gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
+
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x730), 0x00000000);
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x734), gerr);
+}
+
+static void
+gv100_gr_init_4188a4(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x4188a4, 0x03000000, 0x03000000);
+}
+
+static void
+gv100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int sm;
+	for (sm = 0; sm < 0x100; sm += 0x80) {
+		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x728 + sm), 0x0085eb64);
+		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x610), 0x00000001);
+		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x72c + sm), 0x00000004);
+	}
+}
+
+static void
+gv100_gr_init_504430(struct gf100_gr *gr, int gpc, int tpc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0x403f0000);
+}
+
+static void
+gv100_gr_init_419bd8(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x419bd8, 0x00000700, 0x00000000);
+}
+
+static const struct gf100_gr_func
+gv100_gr = {
+	.oneinit_tiles = gm200_gr_oneinit_tiles,
+	.oneinit_sm_id = gm200_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
+	.init_419bd8 = gv100_gr_init_419bd8,
+	.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = gf117_gr_init_zcull,
+	.init_num_active_ltcs = gm200_gr_init_num_active_ltcs,
+	.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
+	.init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask,
+	.init_fecs_exceptions = gp100_gr_init_fecs_exceptions,
+	.init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.init_504430 = gv100_gr_init_504430,
+	.init_shader_exceptions = gv100_gr_init_shader_exceptions,
+	.init_4188a4 = gv100_gr_init_4188a4,
+	.trap_mp = gv100_gr_trap_mp,
+	.rops = gm200_gr_rops,
+	.gpc_nr = 6,
+	.tpc_nr = 5,
+	.ppc_nr = 3,
+	.grctx = &gv100_grctx,
+	.zbc = &gp102_gr_zbc,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, VOLTA_A, &gf100_fermi },
+		{ -1, -1, VOLTA_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gv100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gm200_gr_new_(&gv100_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c
index 53859b6..b2785be 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c
@@ -779,8 +779,8 @@
 
 		sdom = spec;
 		while (sdom->signal_nr) {
-			dom = kzalloc(sizeof(*dom) + sdom->signal_nr *
-				      sizeof(*dom->signal), GFP_KERNEL);
+			dom = kzalloc(struct_size(dom, signal, sdom->signal_nr),
+				      GFP_KERNEL);
 			if (!dom)
 				return -ENOMEM;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c
index 58a59b7..771e16a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c
@@ -506,6 +506,7 @@
 		break;
 	case 0x0148cdec:
 	case 0x015ccf3e:
+	case 0x0167d263:
 		ret = msgqueue_0148cdec_new(falcon, sb, queue);
 		break;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild
index 3f5d38d..cfdffef 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild
@@ -3,6 +3,7 @@
 include $(src)/nvkm/subdev/bus/Kbuild
 include $(src)/nvkm/subdev/clk/Kbuild
 include $(src)/nvkm/subdev/devinit/Kbuild
+include $(src)/nvkm/subdev/fault/Kbuild
 include $(src)/nvkm/subdev/fb/Kbuild
 include $(src)/nvkm/subdev/fuse/Kbuild
 include $(src)/nvkm/subdev/gpio/Kbuild
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
index 7c7efa4..3133b28 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
@@ -25,7 +25,7 @@
 #include <subdev/bios/bit.h>
 #include <subdev/bios/dp.h>
 
-static u16
+u16
 nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	struct bit_entry d;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c
index 2ca23a9..e6e804c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c
@@ -193,7 +193,10 @@
 		data += hdr;
 		while (cnt--) {
 			if (nvbios_rd08(bios, data + 0) == type) {
-				*reg = nvbios_rd32(bios, data + 3);
+				if (*ver < 0x50)
+					*reg = nvbios_rd32(bios, data + 3);
+				else
+					*reg = 0;
 				return data;
 			}
 			data += *len;
@@ -361,6 +364,20 @@
 		info->min_p = nvbios_rd08(bios, data + 12);
 		info->max_p = nvbios_rd08(bios, data + 13);
 		break;
+	case 0x50:
+		info->refclk = nvbios_rd16(bios, data + 1) * 1000;
+		/* info->refclk_alt = nvbios_rd16(bios, data + 3) * 1000; */
+		info->vco1.min_freq = nvbios_rd16(bios, data + 5) * 1000;
+		info->vco1.max_freq = nvbios_rd16(bios, data + 7) * 1000;
+		info->vco1.min_inputfreq = nvbios_rd16(bios, data + 9) * 1000;
+		info->vco1.max_inputfreq = nvbios_rd16(bios, data + 11) * 1000;
+		info->vco1.min_m = nvbios_rd08(bios, data + 13);
+		info->vco1.max_m = nvbios_rd08(bios, data + 14);
+		info->vco1.min_n = nvbios_rd08(bios, data + 15);
+		info->vco1.max_n = nvbios_rd08(bios, data + 16);
+		info->min_p = nvbios_rd08(bios, data + 17);
+		info->max_p = nvbios_rd08(bios, data + 18);
+		break;
 	default:
 		nvkm_error(subdev, "unknown pll limits version 0x%02x\n", ver);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c
index 0f537c2..3634cd0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c
@@ -78,7 +78,10 @@
 	 * important as we don't want to be touching vram on an
 	 * uninitialised board
 	 */
-	addr = nvkm_rd32(device, 0x619f04);
+	if (device->card_type >= GV100)
+		addr = nvkm_rd32(device, 0x625f04);
+	else
+		addr = nvkm_rd32(device, 0x619f04);
 	if (!(addr & 0x00000008)) {
 		nvkm_debug(subdev, "... not enabled\n");
 		return ERR_PTR(-ENODEV);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c
index e4c8d31..ba6a868 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c
@@ -109,18 +109,17 @@
 
 static struct nvkm_cstate *
 nvkm_cstate_find_best(struct nvkm_clk *clk, struct nvkm_pstate *pstate,
-		      struct nvkm_cstate *start)
+		      struct nvkm_cstate *cstate)
 {
 	struct nvkm_device *device = clk->subdev.device;
 	struct nvkm_volt *volt = device->volt;
-	struct nvkm_cstate *cstate;
 	int max_volt;
 
-	if (!pstate || !start)
+	if (!pstate || !cstate)
 		return NULL;
 
 	if (!volt)
-		return start;
+		return cstate;
 
 	max_volt = volt->max_uv;
 	if (volt->max0_id != 0xff)
@@ -133,8 +132,7 @@
 		max_volt = min(max_volt,
 			       nvkm_volt_map(volt, volt->max2_id, clk->temp));
 
-	for (cstate = start; &cstate->head != &pstate->list;
-	     cstate = list_entry(cstate->head.prev, typeof(*cstate), head)) {
+	list_for_each_entry_from_reverse(cstate, &pstate->list, head) {
 		if (nvkm_cstate_valid(clk, cstate, max_volt, clk->temp))
 			break;
 	}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
index eac88e3..50a4369 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
@@ -12,3 +12,4 @@
 nvkm-y += nvkm/subdev/devinit/gf100.o
 nvkm-y += nvkm/subdev/devinit/gm107.o
 nvkm-y += nvkm/subdev/devinit/gm200.o
+nvkm-y += nvkm/subdev/devinit/gv100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
index 1730371..b80618e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
@@ -107,7 +107,7 @@
 	return pmu_exec(init, pmu.init_addr_pmu), 0;
 }
 
-static int
+int
 gm200_devinit_post(struct nvkm_devinit *base, bool post)
 {
 	struct nv50_devinit *init = nv50_devinit(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gv100.c
new file mode 100644
index 0000000..fbde682
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gv100.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "nv50.h"
+
+#include <subdev/bios.h>
+#include <subdev/bios/pll.h>
+#include <subdev/clk/pll.h>
+
+static int
+gv100_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
+{
+	struct nvkm_subdev *subdev = &init->subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvbios_pll info;
+	int head = type - PLL_VPLL0;
+	int N, fN, M, P;
+	int ret;
+
+	ret = nvbios_pll_parse(device->bios, type, &info);
+	if (ret)
+		return ret;
+
+	ret = gt215_pll_calc(subdev, &info, freq, &N, &fN, &M, &P);
+	if (ret < 0)
+		return ret;
+
+	switch (info.type) {
+	case PLL_VPLL0:
+	case PLL_VPLL1:
+	case PLL_VPLL2:
+	case PLL_VPLL3:
+		nvkm_wr32(device, 0x00ef10 + (head * 0x40), fN << 16);
+		nvkm_wr32(device, 0x00ef04 + (head * 0x40), (P << 16) |
+							    (N <<  8) |
+							    (M <<  0));
+		break;
+	default:
+		nvkm_warn(subdev, "%08x/%dKhz unimplemented\n", type, freq);
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static const struct nvkm_devinit_func
+gv100_devinit = {
+	.preinit = gf100_devinit_preinit,
+	.init = nv50_devinit_init,
+	.post = gm200_devinit_post,
+	.pll_set = gv100_devinit_pll_set,
+	.disable = gm107_devinit_disable,
+};
+
+int
+gv100_devinit_new(struct nvkm_device *device, int index,
+		struct nvkm_devinit **pinit)
+{
+	return nv50_devinit_new_(&gv100_devinit, device, index, pinit);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h
index 315ebaf..9b9f0dc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h
@@ -24,4 +24,6 @@
 void gf100_devinit_preinit(struct nvkm_devinit *);
 
 u64  gm107_devinit_disable(struct nvkm_devinit *);
+
+int gm200_devinit_post(struct nvkm_devinit *, bool);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild
new file mode 100644
index 0000000..45bb46f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild
@@ -0,0 +1,3 @@
+nvkm-y += nvkm/subdev/fault/base.o
+nvkm-y += nvkm/subdev/fault/gp100.o
+nvkm-y += nvkm/subdev/fault/gv100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c
new file mode 100644
index 0000000..007bf4a
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <core/memory.h>
+#include <core/notify.h>
+#include <subdev/bar.h>
+#include <subdev/mmu.h>
+
+static void
+nvkm_fault_ntfy_fini(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fault *fault = container_of(event, typeof(*fault), event);
+	fault->func->buffer.fini(fault->buffer[index]);
+}
+
+static void
+nvkm_fault_ntfy_init(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fault *fault = container_of(event, typeof(*fault), event);
+	fault->func->buffer.init(fault->buffer[index]);
+}
+
+static int
+nvkm_fault_ntfy_ctor(struct nvkm_object *object, void *argv, u32 argc,
+		     struct nvkm_notify *notify)
+{
+	struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object);
+	if (argc == 0) {
+		notify->size  = 0;
+		notify->types = 1;
+		notify->index = buffer->id;
+		return 0;
+	}
+	return -ENOSYS;
+}
+
+static const struct nvkm_event_func
+nvkm_fault_ntfy = {
+	.ctor = nvkm_fault_ntfy_ctor,
+	.init = nvkm_fault_ntfy_init,
+	.fini = nvkm_fault_ntfy_fini,
+};
+
+static void
+nvkm_fault_intr(struct nvkm_subdev *subdev)
+{
+	struct nvkm_fault *fault = nvkm_fault(subdev);
+	return fault->func->intr(fault);
+}
+
+static int
+nvkm_fault_fini(struct nvkm_subdev *subdev, bool suspend)
+{
+	struct nvkm_fault *fault = nvkm_fault(subdev);
+	if (fault->func->fini)
+		fault->func->fini(fault);
+	return 0;
+}
+
+static int
+nvkm_fault_init(struct nvkm_subdev *subdev)
+{
+	struct nvkm_fault *fault = nvkm_fault(subdev);
+	if (fault->func->init)
+		fault->func->init(fault);
+	return 0;
+}
+
+static int
+nvkm_fault_oneinit_buffer(struct nvkm_fault *fault, int id)
+{
+	struct nvkm_subdev *subdev = &fault->subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_vmm *bar2 = nvkm_bar_bar2_vmm(device);
+	struct nvkm_fault_buffer *buffer;
+	int ret;
+
+	if (!(buffer = kzalloc(sizeof(*buffer), GFP_KERNEL)))
+		return -ENOMEM;
+	buffer->fault = fault;
+	buffer->id = id;
+	buffer->entries = fault->func->buffer.entries(buffer);
+	fault->buffer[id] = buffer;
+
+	nvkm_debug(subdev, "buffer %d: %d entries\n", id, buffer->entries);
+
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, buffer->entries *
+			      fault->func->buffer.entry_size, 0x1000, true,
+			      &buffer->mem);
+	if (ret)
+		return ret;
+
+	ret = nvkm_vmm_get(bar2, 12, nvkm_memory_size(buffer->mem),
+			   &buffer->vma);
+	if (ret)
+		return ret;
+
+	return nvkm_memory_map(buffer->mem, 0, bar2, buffer->vma, NULL, 0);
+}
+
+static int
+nvkm_fault_oneinit(struct nvkm_subdev *subdev)
+{
+	struct nvkm_fault *fault = nvkm_fault(subdev);
+	int ret, i;
+
+	for (i = 0; i < ARRAY_SIZE(fault->buffer); i++) {
+		if (i < fault->func->buffer.nr) {
+			ret = nvkm_fault_oneinit_buffer(fault, i);
+			if (ret)
+				return ret;
+			fault->buffer_nr = i + 1;
+		}
+	}
+
+	return nvkm_event_init(&nvkm_fault_ntfy, 1, fault->buffer_nr,
+			       &fault->event);
+}
+
+static void *
+nvkm_fault_dtor(struct nvkm_subdev *subdev)
+{
+	struct nvkm_vmm *bar2 = nvkm_bar_bar2_vmm(subdev->device);
+	struct nvkm_fault *fault = nvkm_fault(subdev);
+	int i;
+
+	nvkm_event_fini(&fault->event);
+
+	for (i = 0; i < fault->buffer_nr; i++) {
+		if (fault->buffer[i]) {
+			nvkm_vmm_put(bar2, &fault->buffer[i]->vma);
+			nvkm_memory_unref(&fault->buffer[i]->mem);
+			kfree(fault->buffer[i]);
+		}
+	}
+
+	return fault;
+}
+
+static const struct nvkm_subdev_func
+nvkm_fault = {
+	.dtor = nvkm_fault_dtor,
+	.oneinit = nvkm_fault_oneinit,
+	.init = nvkm_fault_init,
+	.fini = nvkm_fault_fini,
+	.intr = nvkm_fault_intr,
+};
+
+int
+nvkm_fault_new_(const struct nvkm_fault_func *func, struct nvkm_device *device,
+		int index, struct nvkm_fault **pfault)
+{
+	struct nvkm_fault *fault;
+	if (!(fault = *pfault = kzalloc(sizeof(*fault), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_subdev_ctor(&nvkm_fault, device, index, &fault->subdev);
+	fault->func = func;
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c
new file mode 100644
index 0000000..5e71db2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <subdev/mmu.h>
+
+static void
+gp100_fault_buffer_fini(struct nvkm_fault_buffer *buffer)
+{
+	struct nvkm_device *device = buffer->fault->subdev.device;
+	nvkm_mask(device, 0x002a70, 0x00000001, 0x00000000);
+}
+
+static void
+gp100_fault_buffer_init(struct nvkm_fault_buffer *buffer)
+{
+	struct nvkm_device *device = buffer->fault->subdev.device;
+	nvkm_wr32(device, 0x002a74, upper_32_bits(buffer->vma->addr));
+	nvkm_wr32(device, 0x002a70, lower_32_bits(buffer->vma->addr));
+	nvkm_mask(device, 0x002a70, 0x00000001, 0x00000001);
+}
+
+static u32
+gp100_fault_buffer_entries(struct nvkm_fault_buffer *buffer)
+{
+	return nvkm_rd32(buffer->fault->subdev.device, 0x002a78);
+}
+
+static void
+gp100_fault_intr(struct nvkm_fault *fault)
+{
+	nvkm_event_send(&fault->event, 1, 0, NULL, 0);
+}
+
+static const struct nvkm_fault_func
+gp100_fault = {
+	.intr = gp100_fault_intr,
+	.buffer.nr = 1,
+	.buffer.entry_size = 32,
+	.buffer.entries = gp100_fault_buffer_entries,
+	.buffer.init = gp100_fault_buffer_init,
+	.buffer.fini = gp100_fault_buffer_fini,
+};
+
+int
+gp100_fault_new(struct nvkm_device *device, int index,
+		struct nvkm_fault **pfault)
+{
+	return nvkm_fault_new_(&gp100_fault, device, index, pfault);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c
new file mode 100644
index 0000000..73c7728
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <core/memory.h>
+#include <subdev/mmu.h>
+#include <engine/fifo.h>
+
+static void
+gv100_fault_buffer_process(struct nvkm_fault_buffer *buffer)
+{
+	struct nvkm_device *device = buffer->fault->subdev.device;
+	struct nvkm_memory *mem = buffer->mem;
+	const u32 foff = buffer->id * 0x14;
+	u32 get = nvkm_rd32(device, 0x100e2c + foff);
+	u32 put = nvkm_rd32(device, 0x100e30 + foff);
+	if (put == get)
+		return;
+
+	nvkm_kmap(mem);
+	while (get != put) {
+		const u32   base = get * buffer->fault->func->buffer.entry_size;
+		const u32 instlo = nvkm_ro32(mem, base + 0x00);
+		const u32 insthi = nvkm_ro32(mem, base + 0x04);
+		const u32 addrlo = nvkm_ro32(mem, base + 0x08);
+		const u32 addrhi = nvkm_ro32(mem, base + 0x0c);
+		const u32 timelo = nvkm_ro32(mem, base + 0x10);
+		const u32 timehi = nvkm_ro32(mem, base + 0x14);
+		const u32  info0 = nvkm_ro32(mem, base + 0x18);
+		const u32  info1 = nvkm_ro32(mem, base + 0x1c);
+		struct nvkm_fault_data info;
+
+		if (++get == buffer->entries)
+			get = 0;
+		nvkm_wr32(device, 0x100e2c + foff, get);
+
+		info.addr   = ((u64)addrhi << 32) | addrlo;
+		info.inst   = ((u64)insthi << 32) | instlo;
+		info.time   = ((u64)timehi << 32) | timelo;
+		info.engine = (info0 & 0x000000ff);
+		info.valid  = (info1 & 0x80000000) >> 31;
+		info.gpc    = (info1 & 0x1f000000) >> 24;
+		info.hub    = (info1 & 0x00100000) >> 20;
+		info.access = (info1 & 0x000f0000) >> 16;
+		info.client = (info1 & 0x00007f00) >> 8;
+		info.reason = (info1 & 0x0000001f);
+
+		nvkm_fifo_fault(device->fifo, &info);
+	}
+	nvkm_done(mem);
+}
+
+static void
+gv100_fault_buffer_fini(struct nvkm_fault_buffer *buffer)
+{
+	struct nvkm_device *device = buffer->fault->subdev.device;
+	const u32 intr = buffer->id ? 0x08000000 : 0x20000000;
+	const u32 foff = buffer->id * 0x14;
+
+	nvkm_mask(device, 0x100a34, intr, intr);
+	nvkm_mask(device, 0x100e34 + foff, 0x80000000, 0x00000000);
+}
+
+static void
+gv100_fault_buffer_init(struct nvkm_fault_buffer *buffer)
+{
+	struct nvkm_device *device = buffer->fault->subdev.device;
+	const u32 intr = buffer->id ? 0x08000000 : 0x20000000;
+	const u32 foff = buffer->id * 0x14;
+
+	nvkm_mask(device, 0x100e34 + foff, 0xc0000000, 0x40000000);
+	nvkm_wr32(device, 0x100e28 + foff, upper_32_bits(buffer->vma->addr));
+	nvkm_wr32(device, 0x100e24 + foff, lower_32_bits(buffer->vma->addr));
+	nvkm_mask(device, 0x100e34 + foff, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x100a2c, intr, intr);
+}
+
+static u32
+gv100_fault_buffer_entries(struct nvkm_fault_buffer *buffer)
+{
+	struct nvkm_device *device = buffer->fault->subdev.device;
+	const u32 foff = buffer->id * 0x14;
+	nvkm_mask(device, 0x100e34 + foff, 0x40000000, 0x40000000);
+	return nvkm_rd32(device, 0x100e34 + foff) & 0x000fffff;
+}
+
+static int
+gv100_fault_ntfy_nrpfb(struct nvkm_notify *notify)
+{
+	struct nvkm_fault *fault = container_of(notify, typeof(*fault), nrpfb);
+	gv100_fault_buffer_process(fault->buffer[0]);
+	return NVKM_NOTIFY_KEEP;
+}
+
+static void
+gv100_fault_intr_fault(struct nvkm_fault *fault)
+{
+	struct nvkm_subdev *subdev = &fault->subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_fault_data info;
+	const u32 addrlo = nvkm_rd32(device, 0x100e4c);
+	const u32 addrhi = nvkm_rd32(device, 0x100e50);
+	const u32  info0 = nvkm_rd32(device, 0x100e54);
+	const u32 insthi = nvkm_rd32(device, 0x100e58);
+	const u32  info1 = nvkm_rd32(device, 0x100e5c);
+
+	info.addr = ((u64)addrhi << 32) | addrlo;
+	info.inst = ((u64)insthi << 32) | (info0 & 0xfffff000);
+	info.time = 0;
+	info.engine = (info0 & 0x000000ff);
+	info.valid  = (info1 & 0x80000000) >> 31;
+	info.gpc    = (info1 & 0x1f000000) >> 24;
+	info.hub    = (info1 & 0x00100000) >> 20;
+	info.access = (info1 & 0x000f0000) >> 16;
+	info.client = (info1 & 0x00007f00) >> 8;
+	info.reason = (info1 & 0x0000001f);
+
+	nvkm_fifo_fault(device->fifo, &info);
+}
+
+static void
+gv100_fault_intr(struct nvkm_fault *fault)
+{
+	struct nvkm_subdev *subdev = &fault->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x100a20);
+
+	if (stat & 0x80000000) {
+		gv100_fault_intr_fault(fault);
+		nvkm_wr32(device, 0x100e60, 0x80000000);
+		stat &= ~0x80000000;
+	}
+
+	if (stat & 0x20000000) {
+		if (fault->buffer[0]) {
+			nvkm_event_send(&fault->event, 1, 0, NULL, 0);
+			stat &= ~0x20000000;
+		}
+	}
+
+	if (stat) {
+		nvkm_debug(subdev, "intr %08x\n", stat);
+	}
+}
+
+static void
+gv100_fault_fini(struct nvkm_fault *fault)
+{
+	nvkm_notify_put(&fault->nrpfb);
+	nvkm_mask(fault->subdev.device, 0x100a34, 0x80000000, 0x80000000);
+}
+
+static void
+gv100_fault_init(struct nvkm_fault *fault)
+{
+	nvkm_mask(fault->subdev.device, 0x100a2c, 0x80000000, 0x80000000);
+	nvkm_notify_get(&fault->nrpfb);
+}
+
+static const struct nvkm_fault_func
+gv100_fault = {
+	.init = gv100_fault_init,
+	.fini = gv100_fault_fini,
+	.intr = gv100_fault_intr,
+	.buffer.nr = 2,
+	.buffer.entry_size = 32,
+	.buffer.entries = gv100_fault_buffer_entries,
+	.buffer.init = gv100_fault_buffer_init,
+	.buffer.fini = gv100_fault_buffer_fini,
+};
+
+int
+gv100_fault_new(struct nvkm_device *device, int index,
+		struct nvkm_fault **pfault)
+{
+	struct nvkm_fault *fault;
+	int ret;
+
+	ret = nvkm_fault_new_(&gv100_fault, device, index, &fault);
+	*pfault = fault;
+	if (ret)
+		return ret;
+
+	return nvkm_notify_init(&fault->buffer[0]->object, &fault->event,
+				gv100_fault_ntfy_nrpfb, false, NULL, 0, 0,
+				&fault->nrpfb);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h
new file mode 100644
index 0000000..44843ec
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h
@@ -0,0 +1,34 @@
+#ifndef __NVKM_FAULT_PRIV_H__
+#define __NVKM_FAULT_PRIV_H__
+#define nvkm_fault_buffer(p) container_of((p), struct nvkm_fault_buffer, object)
+#define nvkm_fault(p) container_of((p), struct nvkm_fault, subdev)
+#include <subdev/fault.h>
+
+#include <core/event.h>
+#include <core/object.h>
+
+struct nvkm_fault_buffer {
+	struct nvkm_object object;
+	struct nvkm_fault *fault;
+	int id;
+	int entries;
+	struct nvkm_memory *mem;
+	struct nvkm_vma *vma;
+};
+
+int nvkm_fault_new_(const struct nvkm_fault_func *, struct nvkm_device *,
+		    int index, struct nvkm_fault **);
+
+struct nvkm_fault_func {
+	void (*init)(struct nvkm_fault *);
+	void (*fini)(struct nvkm_fault *);
+	void (*intr)(struct nvkm_fault *);
+	struct {
+		int nr;
+		u32 entry_size;
+		u32 (*entries)(struct nvkm_fault_buffer *);
+		void (*init)(struct nvkm_fault_buffer *);
+		void (*fini)(struct nvkm_fault_buffer *);
+	} buffer;
+};
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
index b4f22cc..9696109 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
@@ -30,6 +30,7 @@
 nvkm-y += nvkm/subdev/fb/gp100.o
 nvkm-y += nvkm/subdev/fb/gp102.o
 nvkm-y += nvkm/subdev/fb/gp10b.o
+nvkm-y += nvkm/subdev/fb/gv100.o
 
 nvkm-y += nvkm/subdev/fb/ram.o
 nvkm-y += nvkm/subdev/fb/ramnv04.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
index cdc4e0a2..e8dc4e9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
@@ -46,10 +46,10 @@
 {
 	struct gf100_fb *fb = gf100_fb(base);
 	struct nvkm_device *device = fb->base.subdev.device;
-	int ret, size = 0x1000;
+	int ret, size = 1 << (fb->base.page ? fb->base.page : 17);
 
 	size = nvkm_longopt(device->cfgopt, "MmuDebugBufferSize", size);
-	size = min(size, 0x1000);
+	size = max(size, 0x1000);
 
 	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, size, 0x1000,
 			      true, &fb->base.mmu_rd);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
index 8137e19..d3b8c33 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
@@ -49,8 +49,6 @@
 	if (fb->r100c10_page)
 		nvkm_wr32(device, 0x100c10, fb->r100c10 >> 8);
 
-	nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000); /* 128KiB lpg */
-
 	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->base.mmu_wr) >> 8);
 	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->base.mmu_rd) >> 8);
 	nvkm_mask(device, 0x100cc4, 0x00060000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c
index 147f69b..dffe1f5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c
@@ -26,7 +26,7 @@
 
 #include <core/memory.h>
 
-static void
+void
 gp100_fb_init_unkn(struct nvkm_fb *base)
 {
 	struct nvkm_device *device = gf100_fb(base)->base.subdev.device;
@@ -48,7 +48,7 @@
 	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->base.mmu_wr) >> 8);
 	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->base.mmu_rd) >> 8);
 	nvkm_mask(device, 0x100cc4, 0x00060000,
-		  max(nvkm_memory_size(fb->base.mmu_rd) >> 16, (u64)2) << 17);
+		  min(nvkm_memory_size(fb->base.mmu_rd) >> 16, (u64)2) << 17);
 }
 
 static const struct nvkm_fb_func
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
new file mode 100644
index 0000000..3c5e02e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gf100.h"
+#include "ram.h"
+
+static int
+gv100_fb_init_page(struct nvkm_fb *fb)
+{
+	return (fb->page == 16) ? 0 : -EINVAL;
+}
+
+static const struct nvkm_fb_func
+gv100_fb = {
+	.dtor = gf100_fb_dtor,
+	.oneinit = gf100_fb_oneinit,
+	.init = gp100_fb_init,
+	.init_page = gv100_fb_init_page,
+	.init_unkn = gp100_fb_init_unkn,
+	.ram_new = gp100_ram_new,
+	.default_bigpage = 16,
+};
+
+int
+gv100_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
+{
+	return gf100_fb_new_(&gv100_fb, device, index, pfb);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
index 414a423..2857f31 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
@@ -68,4 +68,6 @@
 int gf100_fb_init_page(struct nvkm_fb *);
 
 int gm200_fb_init_page(struct nvkm_fb *);
+
+void gp100_fb_init_unkn(struct nvkm_fb *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
index 12d6f4f..290ff1c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
@@ -4,3 +4,4 @@
 nvkm-y += nvkm/subdev/ltc/gm107.o
 nvkm-y += nvkm/subdev/ltc/gm200.o
 nvkm-y += nvkm/subdev/ltc/gp100.o
+nvkm-y += nvkm/subdev/ltc/gp102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c
index 1f18527..2324217 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c
@@ -55,6 +55,14 @@
 	return index;
 }
 
+int
+nvkm_ltc_zbc_stencil_get(struct nvkm_ltc *ltc, int index, const u32 stencil)
+{
+	ltc->zbc_stencil[index] = stencil;
+	ltc->func->zbc_clear_stencil(ltc, index, stencil);
+	return index;
+}
+
 void
 nvkm_ltc_invalidate(struct nvkm_ltc *ltc)
 {
@@ -92,6 +100,8 @@
 	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
 		ltc->func->zbc_clear_color(ltc, i, ltc->zbc_color[i]);
 		ltc->func->zbc_clear_depth(ltc, i, ltc->zbc_depth[i]);
+		if (ltc->func->zbc_clear_stencil)
+			ltc->func->zbc_clear_stencil(ltc, i, ltc->zbc_stencil[i]);
 	}
 
 	ltc->func->init(ltc);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c
index e34d421..e923ed7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c
@@ -23,7 +23,7 @@
  */
 #include "priv.h"
 
-static void
+void
 gp100_ltc_intr(struct nvkm_ltc *ltc)
 {
 	struct nvkm_device *device = ltc->subdev.device;
@@ -38,7 +38,7 @@
 	}
 }
 
-static int
+int
 gp100_ltc_oneinit(struct nvkm_ltc *ltc)
 {
 	struct nvkm_device *device = ltc->subdev.device;
@@ -48,7 +48,7 @@
 	return 0;
 }
 
-static void
+void
 gp100_ltc_init(struct nvkm_ltc *ltc)
 {
 	/*XXX: PMU LS call to setup tagram address */
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c
new file mode 100644
index 0000000..601747a
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+void
+gp102_ltc_zbc_clear_stencil(struct nvkm_ltc *ltc, int i, const u32 stencil)
+{
+	struct nvkm_device *device = ltc->subdev.device;
+	nvkm_mask(device, 0x17e338, 0x0000000f, i);
+	nvkm_wr32(device, 0x17e204, stencil);
+}
+
+static const struct nvkm_ltc_func
+gp102_ltc = {
+	.oneinit = gp100_ltc_oneinit,
+	.init = gp100_ltc_init,
+	.intr = gp100_ltc_intr,
+	.cbc_clear = gm107_ltc_cbc_clear,
+	.cbc_wait = gm107_ltc_cbc_wait,
+	.zbc = 16,
+	.zbc_clear_color = gm107_ltc_zbc_clear_color,
+	.zbc_clear_depth = gm107_ltc_zbc_clear_depth,
+	.zbc_clear_stencil = gp102_ltc_zbc_clear_stencil,
+	.invalidate = gf100_ltc_invalidate,
+	.flush = gf100_ltc_flush,
+};
+
+int
+gp102_ltc_new(struct nvkm_device *device, int index, struct nvkm_ltc **pltc)
+{
+	return nvkm_ltc_new_(&gp102_ltc, device, index, pltc);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
index e71cc25..9dcde43 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
@@ -19,6 +19,7 @@
 	int zbc;
 	void (*zbc_clear_color)(struct nvkm_ltc *, int, const u32[4]);
 	void (*zbc_clear_depth)(struct nvkm_ltc *, int, const u32);
+	void (*zbc_clear_stencil)(struct nvkm_ltc *, int, const u32);
 
 	void (*invalidate)(struct nvkm_ltc *);
 	void (*flush)(struct nvkm_ltc *);
@@ -41,4 +42,8 @@
 void gm107_ltc_cbc_wait(struct nvkm_ltc *);
 void gm107_ltc_zbc_clear_color(struct nvkm_ltc *, int, const u32[4]);
 void gm107_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32);
+
+int gp100_ltc_oneinit(struct nvkm_ltc *);
+void gp100_ltc_init(struct nvkm_ltc *);
+void gp100_ltc_intr(struct nvkm_ltc *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c
index 7321ad3..43db245 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c
@@ -75,10 +75,28 @@
 	spin_unlock_irqrestore(&mc->lock, flags);
 }
 
+const struct nvkm_mc_map
+gp100_mc_intr[] = {
+	{ 0x04000000, NVKM_ENGINE_DISP },
+	{ 0x00000100, NVKM_ENGINE_FIFO },
+	{ 0x00000200, NVKM_SUBDEV_FAULT },
+	{ 0x40000000, NVKM_SUBDEV_IBUS },
+	{ 0x10000000, NVKM_SUBDEV_BUS },
+	{ 0x08000000, NVKM_SUBDEV_FB },
+	{ 0x02000000, NVKM_SUBDEV_LTC },
+	{ 0x01000000, NVKM_SUBDEV_PMU },
+	{ 0x00200000, NVKM_SUBDEV_GPIO },
+	{ 0x00200000, NVKM_SUBDEV_I2C },
+	{ 0x00100000, NVKM_SUBDEV_TIMER },
+	{ 0x00040000, NVKM_SUBDEV_THERM },
+	{ 0x00002000, NVKM_SUBDEV_FB },
+	{},
+};
+
 static const struct nvkm_mc_func
 gp100_mc = {
 	.init = nv50_mc_init,
-	.intr = gk104_mc_intr,
+	.intr = gp100_mc_intr,
 	.intr_unarm = gp100_mc_intr_unarm,
 	.intr_rearm = gp100_mc_intr_rearm,
 	.intr_mask = gp100_mc_intr_mask,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c
index 2283e3b..ff8629d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c
@@ -34,7 +34,7 @@
 static const struct nvkm_mc_func
 gp10b_mc = {
 	.init = gp10b_mc_init,
-	.intr = gk104_mc_intr,
+	.intr = gp100_mc_intr,
 	.intr_unarm = gp100_mc_intr_unarm,
 	.intr_rearm = gp100_mc_intr_rearm,
 	.intr_mask = gp100_mc_intr_mask,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
index 8869d79c..d9e3691 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
@@ -57,4 +57,6 @@
 
 extern const struct nvkm_mc_map gk104_mc_intr[];
 extern const struct nvkm_mc_map gk104_mc_reset[];
+
+extern const struct nvkm_mc_map gp100_mc_intr[];
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
index 67ee983..58a24e3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
@@ -12,6 +12,7 @@
 nvkm-y += nvkm/subdev/mmu/gm20b.o
 nvkm-y += nvkm/subdev/mmu/gp100.o
 nvkm-y += nvkm/subdev/mmu/gp10b.o
+nvkm-y += nvkm/subdev/mmu/gv100.o
 
 nvkm-y += nvkm/subdev/mmu/mem.o
 nvkm-y += nvkm/subdev/mmu/memnv04.o
@@ -31,6 +32,7 @@
 nvkm-y += nvkm/subdev/mmu/vmmgm20b.o
 nvkm-y += nvkm/subdev/mmu/vmmgp100.o
 nvkm-y += nvkm/subdev/mmu/vmmgp10b.o
+nvkm-y += nvkm/subdev/mmu/vmmgv100.o
 
 nvkm-y += nvkm/subdev/mmu/umem.o
 nvkm-y += nvkm/subdev/mmu/ummu.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c
new file mode 100644
index 0000000..f666cb5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "mem.h"
+#include "vmm.h"
+
+#include <core/option.h>
+
+#include <nvif/class.h>
+
+static const struct nvkm_mmu_func
+gv100_mmu = {
+	.dma_bits = 47,
+	.mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}},
+	.mem = {{ -1,  0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map },
+	.vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gv100_vmm_new },
+	.kind = gm200_mmu_kind,
+	.kind_sys = true,
+};
+
+int
+gv100_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu)
+{
+	return nvkm_mmu_new_(&gv100_mmu, device, index, pmmu);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index da06e64..1a3b0a3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -236,6 +236,9 @@
 int gp10b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
 		  struct lock_class_key *, const char *,
 		  struct nvkm_vmm **);
+int gv100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+		  struct lock_class_key *, const char *,
+		  struct nvkm_vmm **);
 
 #define VMM_PRINT(l,v,p,f,a...) do {                                           \
 	struct nvkm_vmm *_vmm = (v);                                           \
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c
new file mode 100644
index 0000000..2fa40c1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "vmm.h"
+
+#include <subdev/fb.h>
+#include <subdev/ltc.h>
+
+#include <nvif/ifc00d.h>
+#include <nvif/unpack.h>
+
+int
+gv100_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
+{
+	u64 data[2], mask;
+	int ret = gp100_vmm_join(vmm, inst), i;
+	if (ret)
+		return ret;
+
+	nvkm_kmap(inst);
+	data[0] = nvkm_ro32(inst, 0x200);
+	data[1] = nvkm_ro32(inst, 0x204);
+	mask = BIT_ULL(0);
+
+	nvkm_wo32(inst, 0x21c, 0x00000000);
+
+	for (i = 0; i < 64; i++) {
+		if (mask & BIT_ULL(i)) {
+			nvkm_wo32(inst, 0x2a4 + (i * 0x10), data[1]);
+			nvkm_wo32(inst, 0x2a0 + (i * 0x10), data[0]);
+		} else {
+			nvkm_wo32(inst, 0x2a4 + (i * 0x10), 0x00000001);
+			nvkm_wo32(inst, 0x2a0 + (i * 0x10), 0x00000001);
+		}
+		nvkm_wo32(inst, 0x2a8 + (i * 0x10), 0x00000000);
+	}
+
+	nvkm_wo32(inst, 0x298, lower_32_bits(mask));
+	nvkm_wo32(inst, 0x29c, upper_32_bits(mask));
+	nvkm_done(inst);
+	return 0;
+}
+
+static const struct nvkm_vmm_func
+gv100_vmm = {
+	.join = gv100_vmm_join,
+	.part = gf100_vmm_part,
+	.aper = gf100_vmm_aper,
+	.valid = gp100_vmm_valid,
+	.flush = gp100_vmm_flush,
+	.page = {
+		{ 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx },
+		{ 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx },
+		{ 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx },
+		{ 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxC },
+		{ 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxC },
+		{ 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SVHx },
+		{}
+	}
+};
+
+int
+gv100_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
+	      struct lock_class_key *key, const char *name,
+	      struct nvkm_vmm **pvmm)
+{
+	return nv04_vmm_new_(&gv100_vmm, mmu, 0, addr, size,
+			     argv, argc, key, name, pvmm);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp108.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp108.c
index e8c27ec..737a8d5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp108.c
@@ -65,3 +65,24 @@
 MODULE_FIRMWARE("nvidia/gp108/sec2/desc.bin");
 MODULE_FIRMWARE("nvidia/gp108/sec2/image.bin");
 MODULE_FIRMWARE("nvidia/gp108/sec2/sig.bin");
+
+MODULE_FIRMWARE("nvidia/gv100/acr/bl.bin");
+MODULE_FIRMWARE("nvidia/gv100/acr/unload_bl.bin");
+MODULE_FIRMWARE("nvidia/gv100/acr/ucode_load.bin");
+MODULE_FIRMWARE("nvidia/gv100/acr/ucode_unload.bin");
+MODULE_FIRMWARE("nvidia/gv100/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/gv100/gr/fecs_inst.bin");
+MODULE_FIRMWARE("nvidia/gv100/gr/fecs_data.bin");
+MODULE_FIRMWARE("nvidia/gv100/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/gv100/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/gv100/gr/gpccs_inst.bin");
+MODULE_FIRMWARE("nvidia/gv100/gr/gpccs_data.bin");
+MODULE_FIRMWARE("nvidia/gv100/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/gv100/gr/sw_ctx.bin");
+MODULE_FIRMWARE("nvidia/gv100/gr/sw_nonctx.bin");
+MODULE_FIRMWARE("nvidia/gv100/gr/sw_bundle_init.bin");
+MODULE_FIRMWARE("nvidia/gv100/gr/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/gv100/nvdec/scrubber.bin");
+MODULE_FIRMWARE("nvidia/gv100/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/gv100/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/gv100/sec2/sig.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c
index 6f10b09..1e1f1c6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c
@@ -80,12 +80,11 @@
 			 struct nvkm_falcon *falcon, u32 addr_args)
 {
 	struct nvkm_device *device = falcon->owner->device;
-	u32 cmdline_size = NVKM_MSGQUEUE_CMDLINE_SIZE;
-	u8 buf[cmdline_size];
+	u8 buf[NVKM_MSGQUEUE_CMDLINE_SIZE];
 
-	memset(buf, 0, cmdline_size);
+	memset(buf, 0, sizeof(buf));
 	nvkm_msgqueue_write_cmdline(queue, buf);
-	nvkm_falcon_load_dmem(falcon, buf, addr_args, cmdline_size, 0);
+	nvkm_falcon_load_dmem(falcon, buf, addr_args, sizeof(buf), 0);
 	/* rearm the queue so it will wait for the init message */
 	nvkm_msgqueue_reinit(queue);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
index fea4957..4f1f3e8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
@@ -48,7 +48,8 @@
 		case 0x00000001: /* DATA */
 			inst        = (data & 0x3c000000) >> 26;
 			info->addr  = (data & 0x00fff000);
-			info->fault = (data & 0x000000f8) >> 3;
+			if (data & 0x00000004)
+				info->fault = (data & 0x000003f8) >> 3;
 			break;
 		case 0x00000002: /* ENUM */
 			if (data & 0x00000020)
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index 3632854..ef3b0e3 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -319,6 +319,9 @@
 	dev->mode_config.max_width = 8192;
 	dev->mode_config.max_height = 8192;
 
+	/* We want the zpos to be normalized */
+	dev->mode_config.normalize_zpos = true;
+
 	dev->mode_config.funcs = &omap_mode_config_funcs;
 	dev->mode_config.helper_private = &omap_mode_config_helper_funcs;
 
diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c
index 2899435..161233c 100644
--- a/drivers/gpu/drm/omapdrm/omap_plane.c
+++ b/drivers/gpu/drm/omapdrm/omap_plane.c
@@ -65,7 +65,7 @@
 	info.rotation_type = OMAP_DSS_ROT_NONE;
 	info.rotation = DRM_MODE_ROTATE_0;
 	info.global_alpha = 0xff;
-	info.zorder = state->zpos;
+	info.zorder = state->normalized_zpos;
 
 	/* update scanout: */
 	omap_framebuffer_update_scanout(state->fb, state, &info);
diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
index d964d45..2c9c972 100644
--- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
+++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
@@ -238,12 +238,6 @@
 
 static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val)
 {
-#if 0
-	/* The firmware uses LP DSI transactions like this to bring up
-	 * the hardware, which should be faster than using I2C to then
-	 * pass to the Toshiba.  However, I was unable to get it to
-	 * work.
-	 */
 	u8 msg[] = {
 		reg,
 		reg >> 8,
@@ -253,13 +247,7 @@
 		val >> 24,
 	};
 
-	mipi_dsi_dcs_write_buffer(ts->dsi, msg, sizeof(msg));
-#else
-	rpi_touchscreen_i2c_write(ts, REG_WR_ADDRH, reg >> 8);
-	rpi_touchscreen_i2c_write(ts, REG_WR_ADDRL, reg);
-	rpi_touchscreen_i2c_write(ts, REG_WRITEH, val >> 8);
-	rpi_touchscreen_i2c_write(ts, REG_WRITEL, val);
-#endif
+	mipi_dsi_generic_write(ts->dsi, msg, sizeof(msg));
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/pl111/Makefile b/drivers/gpu/drm/pl111/Makefile
index 9c5e8db..19a8189 100644
--- a/drivers/gpu/drm/pl111/Makefile
+++ b/drivers/gpu/drm/pl111/Makefile
@@ -3,6 +3,7 @@
 		pl111_versatile.o \
 		pl111_drv.o
 
+pl111_drm-$(CONFIG_ARCH_VEXPRESS) += pl111_vexpress.o
 pl111_drm-$(CONFIG_DEBUG_FS) += pl111_debugfs.o
 
 obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c
index 3106464..19b0d00 100644
--- a/drivers/gpu/drm/pl111/pl111_display.c
+++ b/drivers/gpu/drm/pl111/pl111_display.c
@@ -120,7 +120,8 @@
 }
 
 static void pl111_display_enable(struct drm_simple_display_pipe *pipe,
-				 struct drm_crtc_state *cstate)
+				 struct drm_crtc_state *cstate,
+				 struct drm_plane_state *plane_state)
 {
 	struct drm_crtc *crtc = &pipe->crtc;
 	struct drm_plane *plane = &pipe->plane;
@@ -376,19 +377,13 @@
 	writel(0, priv->regs + priv->ienb);
 }
 
-static int pl111_display_prepare_fb(struct drm_simple_display_pipe *pipe,
-				    struct drm_plane_state *plane_state)
-{
-	return drm_gem_fb_prepare_fb(&pipe->plane, plane_state);
-}
-
 static struct drm_simple_display_pipe_funcs pl111_display_funcs = {
 	.mode_valid = pl111_mode_valid,
 	.check = pl111_display_check,
 	.enable = pl111_display_enable,
 	.disable = pl111_display_disable,
 	.update = pl111_display_update,
-	.prepare_fb = pl111_display_prepare_fb,
+	.prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb,
 };
 
 static int pl111_clk_div_choose_div(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
index 8639b2d..ce4501d 100644
--- a/drivers/gpu/drm/pl111/pl111_drm.h
+++ b/drivers/gpu/drm/pl111/pl111_drm.h
@@ -79,6 +79,7 @@
 	const struct pl111_variant_data *variant;
 	void (*variant_display_enable) (struct drm_device *drm, u32 format);
 	void (*variant_display_disable) (struct drm_device *drm);
+	bool use_device_memory;
 };
 
 int pl111_display_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c
index 4621259..454ff08 100644
--- a/drivers/gpu/drm/pl111/pl111_drv.c
+++ b/drivers/gpu/drm/pl111/pl111_drv.c
@@ -60,6 +60,7 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/of_graph.h>
+#include <linux/of_reserved_mem.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
@@ -207,6 +208,24 @@
 	return ret;
 }
 
+static struct drm_gem_object *
+pl111_gem_import_sg_table(struct drm_device *dev,
+			  struct dma_buf_attachment *attach,
+			  struct sg_table *sgt)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+
+	/*
+	 * When using device-specific reserved memory we can't import
+	 * DMA buffers: those are passed by reference in any global
+	 * memory and we can only handle a specific range of memory.
+	 */
+	if (priv->use_device_memory)
+		return ERR_PTR(-EINVAL);
+
+	return drm_gem_cma_prime_import_sg_table(dev, attach, sgt);
+}
+
 DEFINE_DRM_GEM_CMA_FOPS(drm_fops);
 
 static struct drm_driver pl111_drm_driver = {
@@ -227,7 +246,7 @@
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_import = drm_gem_prime_import,
-	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+	.gem_prime_import_sg_table = pl111_gem_import_sg_table,
 	.gem_prime_export = drm_gem_prime_export,
 	.gem_prime_get_sg_table	= drm_gem_cma_prime_get_sg_table,
 
@@ -257,6 +276,12 @@
 	drm->dev_private = priv;
 	priv->variant = variant;
 
+	ret = of_reserved_mem_device_init(dev);
+	if (!ret) {
+		dev_info(dev, "using device-specific reserved memory\n");
+		priv->use_device_memory = true;
+	}
+
 	if (of_property_read_u32(dev->of_node, "max-memory-bandwidth",
 				 &priv->memory_bw)) {
 		dev_info(dev, "no max memory bandwidth specified, assume unlimited\n");
@@ -275,7 +300,8 @@
 	priv->regs = devm_ioremap_resource(dev, &amba_dev->res);
 	if (IS_ERR(priv->regs)) {
 		dev_err(dev, "%s failed mmio\n", __func__);
-		return PTR_ERR(priv->regs);
+		ret = PTR_ERR(priv->regs);
+		goto dev_unref;
 	}
 
 	/* This may override some variant settings */
@@ -305,11 +331,14 @@
 
 dev_unref:
 	drm_dev_unref(drm);
+	of_reserved_mem_device_release(dev);
+
 	return ret;
 }
 
 static int pl111_amba_remove(struct amba_device *amba_dev)
 {
+	struct device *dev = &amba_dev->dev;
 	struct drm_device *drm = amba_get_drvdata(amba_dev);
 	struct pl111_drm_dev_private *priv = drm->dev_private;
 
@@ -319,6 +348,7 @@
 		drm_panel_bridge_remove(priv->bridge);
 	drm_mode_config_cleanup(drm);
 	drm_dev_unref(drm);
+	of_reserved_mem_device_release(dev);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/pl111/pl111_versatile.c b/drivers/gpu/drm/pl111/pl111_versatile.c
index 9302f51..b9baefd 100644
--- a/drivers/gpu/drm/pl111/pl111_versatile.c
+++ b/drivers/gpu/drm/pl111/pl111_versatile.c
@@ -1,12 +1,14 @@
 #include <linux/amba/clcd-regs.h>
 #include <linux/device.h>
 #include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
 #include <linux/bitops.h>
 #include <linux/module.h>
 #include <drm/drmP.h>
 #include "pl111_versatile.h"
+#include "pl111_vexpress.h"
 #include "pl111_drm.h"
 
 static struct regmap *versatile_syscon_map;
@@ -22,6 +24,7 @@
 	REALVIEW_CLCD_PB11MP,
 	REALVIEW_CLCD_PBA8,
 	REALVIEW_CLCD_PBX,
+	VEXPRESS_CLCD_V2M,
 };
 
 static const struct of_device_id versatile_clcd_of_match[] = {
@@ -53,6 +56,10 @@
 		.compatible = "arm,realview-pbx-syscon",
 		.data = (void *)REALVIEW_CLCD_PBX,
 	},
+	{
+		.compatible = "arm,vexpress-muxfpga",
+		.data = (void *)VEXPRESS_CLCD_V2M,
+	},
 	{},
 };
 
@@ -286,12 +293,26 @@
 	.fb_bpp = 16,
 };
 
+/*
+ * Versatile Express PL111 variant, again we just push the maximum
+ * BPP to 16 to be able to get 1024x768 without saturating the memory
+ * bus. The clockdivider also seems broken on the Versatile Express.
+ */
+static const struct pl111_variant_data pl111_vexpress = {
+	.name = "PL111 Versatile Express",
+	.formats = pl111_realview_pixel_formats,
+	.nformats = ARRAY_SIZE(pl111_realview_pixel_formats),
+	.fb_bpp = 16,
+	.broken_clockdivider = true,
+};
+
 int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv)
 {
 	const struct of_device_id *clcd_id;
 	enum versatile_clcd versatile_clcd_type;
 	struct device_node *np;
 	struct regmap *map;
+	int ret;
 
 	np = of_find_matching_node_and_match(NULL, versatile_clcd_of_match,
 					     &clcd_id);
@@ -301,7 +322,33 @@
 	}
 	versatile_clcd_type = (enum versatile_clcd)clcd_id->data;
 
-	map = syscon_node_to_regmap(np);
+	/* Versatile Express special handling */
+	if (versatile_clcd_type == VEXPRESS_CLCD_V2M) {
+		struct platform_device *pdev;
+
+		/* Registers a driver for the muxfpga */
+		ret = vexpress_muxfpga_init();
+		if (ret) {
+			dev_err(dev, "unable to initialize muxfpga driver\n");
+			return ret;
+		}
+
+		/* Call into deep Vexpress configuration API */
+		pdev = of_find_device_by_node(np);
+		if (!pdev) {
+			dev_err(dev, "can't find the sysreg device, deferring\n");
+			return -EPROBE_DEFER;
+		}
+		map = dev_get_drvdata(&pdev->dev);
+		if (!map) {
+			dev_err(dev, "sysreg has not yet probed\n");
+			platform_device_put(pdev);
+			return -EPROBE_DEFER;
+		}
+	} else {
+		map = syscon_node_to_regmap(np);
+	}
+
 	if (IS_ERR(map)) {
 		dev_err(dev, "no Versatile syscon regmap\n");
 		return PTR_ERR(map);
@@ -340,6 +387,13 @@
 		priv->variant_display_disable = pl111_realview_clcd_disable;
 		dev_info(dev, "set up callbacks for RealView PL111\n");
 		break;
+	case VEXPRESS_CLCD_V2M:
+		priv->variant = &pl111_vexpress;
+		dev_info(dev, "initializing Versatile Express PL111\n");
+		ret = pl111_vexpress_clcd_init(dev, priv, map);
+		if (ret)
+			return ret;
+		break;
 	default:
 		dev_info(dev, "unknown Versatile system controller\n");
 		break;
diff --git a/drivers/gpu/drm/pl111/pl111_vexpress.c b/drivers/gpu/drm/pl111/pl111_vexpress.c
new file mode 100644
index 0000000..a534b22
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_vexpress.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Versatile Express PL111 handling
+ * Copyright (C) 2018 Linus Walleij
+ *
+ * This module binds to the "arm,vexpress-muxfpga" device on the
+ * Versatile Express configuration bus and sets up which CLCD instance
+ * gets muxed out on the DVI bridge.
+ */
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/vexpress.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include "pl111_drm.h"
+#include "pl111_vexpress.h"
+
+#define VEXPRESS_FPGAMUX_MOTHERBOARD		0x00
+#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_1	0x01
+#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_2	0x02
+
+int pl111_vexpress_clcd_init(struct device *dev,
+			     struct pl111_drm_dev_private *priv,
+			     struct regmap *map)
+{
+	struct device_node *root;
+	struct device_node *child;
+	struct device_node *ct_clcd = NULL;
+	bool has_coretile_clcd = false;
+	bool has_coretile_hdlcd = false;
+	bool mux_motherboard = true;
+	u32 val;
+	int ret;
+
+	/*
+	 * Check if we have a CLCD or HDLCD on the core tile by checking if a
+	 * CLCD or HDLCD is available in the root of the device tree.
+	 */
+	root = of_find_node_by_path("/");
+	if (!root)
+		return -EINVAL;
+
+	for_each_available_child_of_node(root, child) {
+		if (of_device_is_compatible(child, "arm,pl111")) {
+			has_coretile_clcd = true;
+			ct_clcd = child;
+			break;
+		}
+		if (of_device_is_compatible(child, "arm,hdlcd")) {
+			has_coretile_hdlcd = true;
+			break;
+		}
+	}
+
+	/*
+	 * If there is a coretile HDLCD and it has a driver,
+	 * do not mux the CLCD on the motherboard to the DVI.
+	 */
+	if (has_coretile_hdlcd && IS_ENABLED(CONFIG_DRM_HDLCD))
+		mux_motherboard = false;
+
+	/*
+	 * On the Vexpress CA9 we let the CLCD on the coretile
+	 * take precedence, so also in this case do not mux the
+	 * motherboard to the DVI.
+	 */
+	if (has_coretile_clcd)
+		mux_motherboard = false;
+
+	if (mux_motherboard) {
+		dev_info(dev, "DVI muxed to motherboard CLCD\n");
+		val = VEXPRESS_FPGAMUX_MOTHERBOARD;
+	} else if (ct_clcd == dev->of_node) {
+		dev_info(dev,
+			 "DVI muxed to daughterboard 1 (core tile) CLCD\n");
+		val = VEXPRESS_FPGAMUX_DAUGHTERBOARD_1;
+	} else {
+		dev_info(dev, "core tile graphics present\n");
+		dev_info(dev, "this device will be deactivated\n");
+		return -ENODEV;
+	}
+
+	ret = regmap_write(map, 0, val);
+	if (ret) {
+		dev_err(dev, "error setting DVI muxmode\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/*
+ * This sets up the regmap pointer that will then be retrieved by
+ * the detection code in pl111_versatile.c and passed in to the
+ * pl111_vexpress_clcd_init() function above.
+ */
+static int vexpress_muxfpga_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct regmap *map;
+
+	map = devm_regmap_init_vexpress_config(&pdev->dev);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+	dev_set_drvdata(dev, map);
+
+	return 0;
+}
+
+static const struct of_device_id vexpress_muxfpga_match[] = {
+	{ .compatible = "arm,vexpress-muxfpga", }
+};
+
+static struct platform_driver vexpress_muxfpga_driver = {
+	.driver = {
+		.name = "vexpress-muxfpga",
+		.of_match_table = of_match_ptr(vexpress_muxfpga_match),
+	},
+	.probe = vexpress_muxfpga_probe,
+};
+
+int vexpress_muxfpga_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&vexpress_muxfpga_driver);
+	/* -EBUSY just means this driver is already registered */
+	if (ret == -EBUSY)
+		ret = 0;
+	return ret;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_vexpress.h b/drivers/gpu/drm/pl111/pl111_vexpress.h
new file mode 100644
index 0000000..5d3681b
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_vexpress.h
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+struct device;
+struct pl111_drm_dev_private;
+struct regmap;
+
+#ifdef CONFIG_ARCH_VEXPRESS
+
+int pl111_vexpress_clcd_init(struct device *dev,
+			     struct pl111_drm_dev_private *priv,
+			     struct regmap *map);
+
+int vexpress_muxfpga_init(void);
+
+#else
+
+static inline int pl111_vexpress_clcd_init(struct device *dev,
+					   struct pl111_drm_dev_private *priv,
+					   struct regmap *map)
+{
+	return -ENODEV;
+}
+
+static inline int vexpress_muxfpga_init(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index 01665b9..208af9f 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -339,12 +339,9 @@
 	surface_height = surf->surf.height;
 
 	if (area->left < 0 || area->top < 0 ||
-	    area->right > surface_width || area->bottom > surface_height) {
-		qxl_io_log(qdev, "%s: not doing area update for "
-			   "%d, (%d,%d,%d,%d) (%d,%d)\n", __func__, surface_id, area->left,
-			   area->top, area->right, area->bottom, surface_width, surface_height);
+	    area->right > surface_width || area->bottom > surface_height)
 		return -EINVAL;
-	}
+
 	mutex_lock(&qdev->update_area_mutex);
 	qdev->ram_header->update_area = *area;
 	qdev->ram_header->update_surface = surface_id;
@@ -372,6 +369,7 @@
 void qxl_io_destroy_primary(struct qxl_device *qdev)
 {
 	wait_for_io_cmd(qdev, 0, QXL_IO_DESTROY_PRIMARY_ASYNC);
+	qdev->primary_created = false;
 }
 
 void qxl_io_create_primary(struct qxl_device *qdev,
@@ -397,6 +395,7 @@
 	create->type = QXL_SURF_TYPE_PRIMARY;
 
 	wait_for_io_cmd(qdev, 0, QXL_IO_CREATE_PRIMARY_ASYNC);
+	qdev->primary_created = true;
 }
 
 void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id)
@@ -405,20 +404,6 @@
 	wait_for_io_cmd(qdev, id, QXL_IO_MEMSLOT_ADD_ASYNC);
 }
 
-void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...)
-{
-	va_list args;
-
-	va_start(args, fmt);
-	vsnprintf(qdev->ram_header->log_buf, QXL_LOG_BUF_SIZE, fmt, args);
-	va_end(args);
-	/*
-	 * DO not do a DRM output here - this will call printk, which will
-	 * call back into qxl for rendering (qxl_fb)
-	 */
-	outb(0, qdev->io_base + QXL_IO_LOG);
-}
-
 void qxl_io_reset(struct qxl_device *qdev)
 {
 	outb(0, qdev->io_base + QXL_IO_RESET);
@@ -426,19 +411,6 @@
 
 void qxl_io_monitors_config(struct qxl_device *qdev)
 {
-	qxl_io_log(qdev, "%s: %d [%dx%d+%d+%d]\n", __func__,
-		   qdev->monitors_config ?
-		   qdev->monitors_config->count : -1,
-		   qdev->monitors_config && qdev->monitors_config->count ?
-		   qdev->monitors_config->heads[0].width : -1,
-		   qdev->monitors_config && qdev->monitors_config->count ?
-		   qdev->monitors_config->heads[0].height : -1,
-		   qdev->monitors_config && qdev->monitors_config->count ?
-		   qdev->monitors_config->heads[0].x : -1,
-		   qdev->monitors_config && qdev->monitors_config->count ?
-		   qdev->monitors_config->heads[0].y : -1
-		   );
-
 	wait_for_io_cmd(qdev, 0, QXL_IO_MONITORS_CONFIG_ASYNC);
 }
 
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index ecb35ed..b8cda94 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -48,12 +48,8 @@
 		qdev->client_monitors_config = kzalloc(
 				sizeof(struct qxl_monitors_config) +
 				sizeof(struct qxl_head) * count, GFP_KERNEL);
-		if (!qdev->client_monitors_config) {
-			qxl_io_log(qdev,
-				   "%s: allocation failure for %u heads\n",
-				   __func__, count);
+		if (!qdev->client_monitors_config)
 			return;
-		}
 	}
 	qdev->client_monitors_config->count = count;
 }
@@ -74,12 +70,8 @@
 	num_monitors = qdev->rom->client_monitors_config.count;
 	crc = crc32(0, (const uint8_t *)&qdev->rom->client_monitors_config,
 		  sizeof(qdev->rom->client_monitors_config));
-	if (crc != qdev->rom->client_monitors_config_crc) {
-		qxl_io_log(qdev, "crc mismatch: have %X (%zd) != %X\n", crc,
-			   sizeof(qdev->rom->client_monitors_config),
-			   qdev->rom->client_monitors_config_crc);
+	if (crc != qdev->rom->client_monitors_config_crc)
 		return MONITORS_CONFIG_BAD_CRC;
-	}
 	if (!num_monitors) {
 		DRM_DEBUG_KMS("no client monitors configured\n");
 		return status;
@@ -170,12 +162,10 @@
 		udelay(5);
 	}
 	if (status == MONITORS_CONFIG_BAD_CRC) {
-		qxl_io_log(qdev, "config: bad crc\n");
 		DRM_DEBUG_KMS("ignoring client monitors config: bad crc");
 		return;
 	}
 	if (status == MONITORS_CONFIG_UNCHANGED) {
-		qxl_io_log(qdev, "config: unchanged\n");
 		DRM_DEBUG_KMS("ignoring client monitors config: unchanged");
 		return;
 	}
@@ -268,6 +258,89 @@
 	return i - 1;
 }
 
+static void qxl_send_monitors_config(struct qxl_device *qdev)
+{
+	int i;
+
+	BUG_ON(!qdev->ram_header->monitors_config);
+
+	if (qdev->monitors_config->count == 0)
+		return;
+
+	for (i = 0 ; i < qdev->monitors_config->count ; ++i) {
+		struct qxl_head *head = &qdev->monitors_config->heads[i];
+
+		if (head->y > 8192 || head->x > 8192 ||
+		    head->width > 8192 || head->height > 8192) {
+			DRM_ERROR("head %d wrong: %dx%d+%d+%d\n",
+				  i, head->width, head->height,
+				  head->x, head->y);
+			return;
+		}
+	}
+	qxl_io_monitors_config(qdev);
+}
+
+static void qxl_crtc_update_monitors_config(struct drm_crtc *crtc,
+					    const char *reason)
+{
+	struct drm_device *dev = crtc->dev;
+	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
+	struct qxl_head head;
+	int oldcount, i = qcrtc->index;
+
+	if (!qdev->primary_created) {
+		DRM_DEBUG_KMS("no primary surface, skip (%s)\n", reason);
+		return;
+	}
+
+	if (!qdev->monitors_config ||
+	    qdev->monitors_config->max_allowed <= i)
+		return;
+
+	head.id = i;
+	head.flags = 0;
+	oldcount = qdev->monitors_config->count;
+	if (crtc->state->active) {
+		struct drm_display_mode *mode = &crtc->mode;
+		head.width = mode->hdisplay;
+		head.height = mode->vdisplay;
+		head.x = crtc->x;
+		head.y = crtc->y;
+		if (qdev->monitors_config->count < i + 1)
+			qdev->monitors_config->count = i + 1;
+	} else if (i > 0) {
+		head.width = 0;
+		head.height = 0;
+		head.x = 0;
+		head.y = 0;
+		if (qdev->monitors_config->count == i + 1)
+			qdev->monitors_config->count = i;
+	} else {
+		DRM_DEBUG_KMS("inactive head 0, skip (%s)\n", reason);
+		return;
+	}
+
+	if (head.width  == qdev->monitors_config->heads[i].width  &&
+	    head.height == qdev->monitors_config->heads[i].height &&
+	    head.x      == qdev->monitors_config->heads[i].x      &&
+	    head.y      == qdev->monitors_config->heads[i].y      &&
+	    oldcount    == qdev->monitors_config->count)
+		return;
+
+	DRM_DEBUG_KMS("head %d, %dx%d, at +%d+%d, %s (%s)\n",
+		      i, head.width, head.height, head.x, head.y,
+		      crtc->state->active ? "on" : "off", reason);
+	if (oldcount != qdev->monitors_config->count)
+		DRM_DEBUG_KMS("active heads %d -> %d (%d total)\n",
+			      oldcount, qdev->monitors_config->count,
+			      qdev->monitors_config->max_allowed);
+
+	qdev->monitors_config->heads[i] = head;
+	qxl_send_monitors_config(qdev);
+}
+
 static void qxl_crtc_atomic_flush(struct drm_crtc *crtc,
 				  struct drm_crtc_state *old_crtc_state)
 {
@@ -283,6 +356,8 @@
 		drm_crtc_send_vblank_event(crtc, event);
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 	}
+
+	qxl_crtc_update_monitors_config(crtc, "flush");
 }
 
 static void qxl_crtc_destroy(struct drm_crtc *crtc)
@@ -381,95 +456,19 @@
 	return 0;
 }
 
-static bool qxl_crtc_mode_fixup(struct drm_crtc *crtc,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode)
-{
-	struct drm_device *dev = crtc->dev;
-	struct qxl_device *qdev = dev->dev_private;
-
-	qxl_io_log(qdev, "%s: (%d,%d) => (%d,%d)\n",
-		   __func__,
-		   mode->hdisplay, mode->vdisplay,
-		   adjusted_mode->hdisplay,
-		   adjusted_mode->vdisplay);
-	return true;
-}
-
-static void
-qxl_send_monitors_config(struct qxl_device *qdev)
-{
-	int i;
-
-	BUG_ON(!qdev->ram_header->monitors_config);
-
-	if (qdev->monitors_config->count == 0) {
-		qxl_io_log(qdev, "%s: 0 monitors??\n", __func__);
-		return;
-	}
-	for (i = 0 ; i < qdev->monitors_config->count ; ++i) {
-		struct qxl_head *head = &qdev->monitors_config->heads[i];
-
-		if (head->y > 8192 || head->x > 8192 ||
-		    head->width > 8192 || head->height > 8192) {
-			DRM_ERROR("head %d wrong: %dx%d+%d+%d\n",
-				  i, head->width, head->height,
-				  head->x, head->y);
-			return;
-		}
-	}
-	qxl_io_monitors_config(qdev);
-}
-
-static void qxl_monitors_config_set(struct qxl_device *qdev,
-				    int index,
-				    unsigned x, unsigned y,
-				    unsigned width, unsigned height,
-				    unsigned surf_id)
-{
-	DRM_DEBUG_KMS("%d:%dx%d+%d+%d\n", index, width, height, x, y);
-	qdev->monitors_config->heads[index].x = x;
-	qdev->monitors_config->heads[index].y = y;
-	qdev->monitors_config->heads[index].width = width;
-	qdev->monitors_config->heads[index].height = height;
-	qdev->monitors_config->heads[index].surface_id = surf_id;
-
-}
-
-static void qxl_mode_set_nofb(struct drm_crtc *crtc)
-{
-	struct qxl_device *qdev = crtc->dev->dev_private;
-	struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
-	struct drm_display_mode *mode = &crtc->mode;
-
-	DRM_DEBUG("Mode set (%d,%d)\n",
-		  mode->hdisplay, mode->vdisplay);
-
-	qxl_monitors_config_set(qdev, qcrtc->index, 0, 0,
-				mode->hdisplay,	mode->vdisplay, 0);
-
-}
-
 static void qxl_crtc_atomic_enable(struct drm_crtc *crtc,
 				   struct drm_crtc_state *old_state)
 {
-	DRM_DEBUG("\n");
+	qxl_crtc_update_monitors_config(crtc, "enable");
 }
 
 static void qxl_crtc_atomic_disable(struct drm_crtc *crtc,
 				    struct drm_crtc_state *old_state)
 {
-	struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
-	struct qxl_device *qdev = crtc->dev->dev_private;
-
-	qxl_monitors_config_set(qdev, qcrtc->index, 0, 0, 0, 0, 0);
-
-	qxl_send_monitors_config(qdev);
+	qxl_crtc_update_monitors_config(crtc, "disable");
 }
 
 static const struct drm_crtc_helper_funcs qxl_crtc_helper_funcs = {
-	.mode_fixup = qxl_crtc_mode_fixup,
-	.mode_set_nofb = qxl_mode_set_nofb,
 	.atomic_flush = qxl_crtc_atomic_flush,
 	.atomic_enable = qxl_crtc_atomic_enable,
 	.atomic_disable = qxl_crtc_atomic_disable,
@@ -613,12 +612,6 @@
 	}
 }
 
-static int qxl_plane_atomic_check(struct drm_plane *plane,
-				  struct drm_plane_state *state)
-{
-	return 0;
-}
-
 static void qxl_cursor_atomic_update(struct drm_plane *plane,
 				     struct drm_plane_state *old_state)
 {
@@ -824,7 +817,6 @@
 };
 
 static const struct drm_plane_helper_funcs qxl_cursor_helper_funcs = {
-	.atomic_check = qxl_plane_atomic_check,
 	.atomic_update = qxl_cursor_atomic_update,
 	.atomic_disable = qxl_cursor_atomic_disable,
 	.prepare_fb = qxl_plane_prepare_fb,
@@ -949,81 +941,6 @@
 	return r;
 }
 
-static void qxl_enc_dpms(struct drm_encoder *encoder, int mode)
-{
-	DRM_DEBUG("\n");
-}
-
-static void qxl_enc_prepare(struct drm_encoder *encoder)
-{
-	DRM_DEBUG("\n");
-}
-
-static void qxl_write_monitors_config_for_encoder(struct qxl_device *qdev,
-		struct drm_encoder *encoder)
-{
-	int i;
-	struct qxl_output *output = drm_encoder_to_qxl_output(encoder);
-	struct qxl_head *head;
-	struct drm_display_mode *mode;
-
-	BUG_ON(!encoder);
-	/* TODO: ugly, do better */
-	i = output->index;
-	if (!qdev->monitors_config ||
-	    qdev->monitors_config->max_allowed <= i) {
-		DRM_ERROR(
-		"head number too large or missing monitors config: %p, %d",
-		qdev->monitors_config,
-		qdev->monitors_config ?
-			qdev->monitors_config->max_allowed : -1);
-		return;
-	}
-	if (!encoder->crtc) {
-		DRM_ERROR("missing crtc on encoder %p\n", encoder);
-		return;
-	}
-	if (i != 0)
-		DRM_DEBUG("missing for multiple monitors: no head holes\n");
-	head = &qdev->monitors_config->heads[i];
-	head->id = i;
-	if (encoder->crtc->enabled) {
-		mode = &encoder->crtc->mode;
-		head->width = mode->hdisplay;
-		head->height = mode->vdisplay;
-		head->x = encoder->crtc->x;
-		head->y = encoder->crtc->y;
-		if (qdev->monitors_config->count < i + 1)
-			qdev->monitors_config->count = i + 1;
-	} else {
-		head->width = 0;
-		head->height = 0;
-		head->x = 0;
-		head->y = 0;
-	}
-	DRM_DEBUG_KMS("setting head %d to +%d+%d %dx%d out of %d\n",
-		      i, head->x, head->y, head->width, head->height, qdev->monitors_config->count);
-	head->flags = 0;
-	/* TODO - somewhere else to call this for multiple monitors
-	 * (config_commit?) */
-	qxl_send_monitors_config(qdev);
-}
-
-static void qxl_enc_commit(struct drm_encoder *encoder)
-{
-	struct qxl_device *qdev = encoder->dev->dev_private;
-
-	qxl_write_monitors_config_for_encoder(qdev, encoder);
-	DRM_DEBUG("\n");
-}
-
-static void qxl_enc_mode_set(struct drm_encoder *encoder,
-				struct drm_display_mode *mode,
-				struct drm_display_mode *adjusted_mode)
-{
-	DRM_DEBUG("\n");
-}
-
 static int qxl_conn_get_modes(struct drm_connector *connector)
 {
 	unsigned pwidth = 1024;
@@ -1037,7 +954,7 @@
 	return ret;
 }
 
-static int qxl_conn_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status qxl_conn_mode_valid(struct drm_connector *connector,
 			       struct drm_display_mode *mode)
 {
 	struct drm_device *ddev = connector->dev;
@@ -1069,10 +986,6 @@
 
 
 static const struct drm_encoder_helper_funcs qxl_enc_helper_funcs = {
-	.dpms = qxl_enc_dpms,
-	.prepare = qxl_enc_prepare,
-	.mode_set = qxl_enc_mode_set,
-	.commit = qxl_enc_commit,
 };
 
 static const struct drm_connector_helper_funcs qxl_connector_helper_funcs = {
@@ -1100,21 +1013,11 @@
 		     qxl_head_enabled(&qdev->client_monitors_config->heads[output->index]);
 
 	DRM_DEBUG("#%d connected: %d\n", output->index, connected);
-	if (!connected)
-		qxl_monitors_config_set(qdev, output->index, 0, 0, 0, 0, 0);
 
 	return connected ? connector_status_connected
 			 : connector_status_disconnected;
 }
 
-static int qxl_conn_set_property(struct drm_connector *connector,
-				   struct drm_property *property,
-				   uint64_t value)
-{
-	DRM_DEBUG("\n");
-	return 0;
-}
-
 static void qxl_conn_destroy(struct drm_connector *connector)
 {
 	struct qxl_output *qxl_output =
@@ -1129,7 +1032,6 @@
 	.dpms = drm_helper_connector_dpms,
 	.detect = qxl_conn_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
-	.set_property = qxl_conn_set_property,
 	.destroy = qxl_conn_destroy,
 	.reset = drm_atomic_helper_connector_reset,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 864b456..01220d3 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -299,9 +299,6 @@
 	int monitors_config_height;
 };
 
-/* forward declaration for QXL_INFO_IO */
-__printf(2,3) void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...);
-
 extern const struct drm_ioctl_desc qxl_ioctls[];
 extern int qxl_max_ioctl;
 
diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c
index 3388914..9a67526 100644
--- a/drivers/gpu/drm/qxl/qxl_fb.c
+++ b/drivers/gpu/drm/qxl/qxl_fb.c
@@ -185,8 +185,6 @@
 	/*
 	 * we are using a shadow draw buffer, at qdev->surface0_shadow
 	 */
-	qxl_io_log(qdev, "dirty x[%d, %d], y[%d, %d]\n", clips->x1, clips->x2,
-		   clips->y1, clips->y2);
 	image->dx = clips->x1;
 	image->dy = clips->y1;
 	image->width = clips->x2 - clips->x1;
diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c
index 23a4010..3bb31ad 100644
--- a/drivers/gpu/drm/qxl/qxl_irq.c
+++ b/drivers/gpu/drm/qxl/qxl_irq.c
@@ -57,10 +57,9 @@
 		 * to avoid endless loops).
 		 */
 		qdev->irq_received_error++;
-		qxl_io_log(qdev, "%s: driver is in bug mode.\n", __func__);
+		DRM_WARN("driver is in bug mode\n");
 	}
 	if (pending & QXL_INTERRUPT_CLIENT_MONITORS_CONFIG) {
-		qxl_io_log(qdev, "QXL_INTERRUPT_CLIENT_MONITORS_CONFIG\n");
 		schedule_work(&qdev->client_monitors_config_work);
 	}
 	qdev->ram_header->int_mask = QXL_INTERRUPT_MASK;
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index ee2340e3..86a1fb3 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -105,16 +105,16 @@
 static struct vm_operations_struct qxl_ttm_vm_ops;
 static const struct vm_operations_struct *ttm_vm_ops;
 
-static int qxl_ttm_fault(struct vm_fault *vmf)
+static vm_fault_t qxl_ttm_fault(struct vm_fault *vmf)
 {
 	struct ttm_buffer_object *bo;
-	int r;
+	vm_fault_t ret;
 
 	bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data;
 	if (bo == NULL)
 		return VM_FAULT_NOPAGE;
-	r = ttm_vm_ops->fault(vmf);
-	return r;
+	ret = ttm_vm_ops->fault(vmf);
+	return ret;
 }
 
 int qxl_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 02baaaf..efbd581 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1145,7 +1145,6 @@
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_framebuffer *radeon_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_gem_object *obj;
 	struct radeon_bo *rbo;
@@ -1164,19 +1163,15 @@
 		return 0;
 	}
 
-	if (atomic) {
-		radeon_fb = to_radeon_framebuffer(fb);
+	if (atomic)
 		target_fb = fb;
-	}
-	else {
-		radeon_fb = to_radeon_framebuffer(crtc->primary->fb);
+	else
 		target_fb = crtc->primary->fb;
-	}
 
 	/* If atomic, assume fb object is pinned & idle & fenced and
 	 * just update base pointers
 	 */
-	obj = radeon_fb->obj;
+	obj = target_fb->obj[0];
 	rbo = gem_to_radeon_bo(obj);
 	r = radeon_bo_reserve(rbo, false);
 	if (unlikely(r != 0))
@@ -1441,8 +1436,7 @@
 	WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0);
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
-		radeon_fb = to_radeon_framebuffer(fb);
-		rbo = gem_to_radeon_bo(radeon_fb->obj);
+		rbo = gem_to_radeon_bo(fb->obj[0]);
 		r = radeon_bo_reserve(rbo, false);
 		if (unlikely(r != 0))
 			return r;
@@ -1463,7 +1457,6 @@
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_framebuffer *radeon_fb;
 	struct drm_gem_object *obj;
 	struct radeon_bo *rbo;
 	struct drm_framebuffer *target_fb;
@@ -1481,16 +1474,12 @@
 		return 0;
 	}
 
-	if (atomic) {
-		radeon_fb = to_radeon_framebuffer(fb);
+	if (atomic)
 		target_fb = fb;
-	}
-	else {
-		radeon_fb = to_radeon_framebuffer(crtc->primary->fb);
+	else
 		target_fb = crtc->primary->fb;
-	}
 
-	obj = radeon_fb->obj;
+	obj = target_fb->obj[0];
 	rbo = gem_to_radeon_bo(obj);
 	r = radeon_bo_reserve(rbo, false);
 	if (unlikely(r != 0))
@@ -1641,8 +1630,7 @@
 	WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3);
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
-		radeon_fb = to_radeon_framebuffer(fb);
-		rbo = gem_to_radeon_bo(radeon_fb->obj);
+		rbo = gem_to_radeon_bo(fb->obj[0]);
 		r = radeon_bo_reserve(rbo, false);
 		if (unlikely(r != 0))
 			return r;
@@ -2149,11 +2137,9 @@
 	atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
 		int r;
-		struct radeon_framebuffer *radeon_fb;
 		struct radeon_bo *rbo;
 
-		radeon_fb = to_radeon_framebuffer(crtc->primary->fb);
-		rbo = gem_to_radeon_bo(radeon_fb->obj);
+		rbo = gem_to_radeon_bo(crtc->primary->fb->obj[0]);
 		r = radeon_bo_reserve(rbo, false);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve rbo before unpin\n");
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 40be406..fa5fada 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -526,7 +526,7 @@
  * look up whether we are the integrated or discrete GPU (all asics).
  * Returns the client id.
  */
-static int radeon_atpx_get_client_id(struct pci_dev *pdev)
+static enum vga_switcheroo_client_id radeon_atpx_get_client_id(struct pci_dev *pdev)
 {
 	if (radeon_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
 		return VGA_SWITCHEROO_IGD;
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index df9469a..2aea2bd 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -852,7 +852,7 @@
 	return ret;
 }
 
-static int radeon_lvds_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_lvds_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	struct drm_encoder *encoder = radeon_best_single_encoder(connector);
@@ -1012,7 +1012,7 @@
 	return ret;
 }
 
-static int radeon_vga_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_vga_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
@@ -1156,7 +1156,7 @@
 	return 1;
 }
 
-static int radeon_tv_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_tv_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
 	if ((mode->hdisplay > 1024) || (mode->vdisplay > 768))
@@ -1498,7 +1498,7 @@
 		radeon_connector->use_digital = true;
 }
 
-static int radeon_dvi_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_dvi_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
@@ -1800,7 +1800,7 @@
 	return ret;
 }
 
-static int radeon_dp_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status radeon_dp_mode_valid(struct drm_connector *connector,
 				  struct drm_display_mode *mode)
 {
 	struct drm_device *dev = connector->dev;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 48d0e6b..59c8a66 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1591,7 +1591,7 @@
 	/* unpin the front buffers and cursors */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-		struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb);
+		struct drm_framebuffer *fb = crtc->primary->fb;
 		struct radeon_bo *robj;
 
 		if (radeon_crtc->cursor_bo) {
@@ -1603,10 +1603,10 @@
 			}
 		}
 
-		if (rfb == NULL || rfb->obj == NULL) {
+		if (fb == NULL || fb->obj[0] == NULL) {
 			continue;
 		}
-		robj = gem_to_radeon_bo(rfb->obj);
+		robj = gem_to_radeon_bo(fb->obj[0]);
 		/* don't unpin kernel fb objects */
 		if (!radeon_fbdev_robj_is_fb(rdev, robj)) {
 			r = radeon_bo_reserve(robj, false);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 26129b2..9d3ac8b 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -32,6 +32,7 @@
 
 #include <linux/pm_runtime.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_edid.h>
@@ -478,8 +479,6 @@
 	struct drm_device *dev = crtc->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-	struct radeon_framebuffer *old_radeon_fb;
-	struct radeon_framebuffer *new_radeon_fb;
 	struct drm_gem_object *obj;
 	struct radeon_flip_work *work;
 	struct radeon_bo *new_rbo;
@@ -501,15 +500,13 @@
 	work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0;
 
 	/* schedule unpin of the old buffer */
-	old_radeon_fb = to_radeon_framebuffer(crtc->primary->fb);
-	obj = old_radeon_fb->obj;
+	obj = crtc->primary->fb->obj[0];
 
 	/* take a reference to the old object */
 	drm_gem_object_get(obj);
 	work->old_rbo = gem_to_radeon_bo(obj);
 
-	new_radeon_fb = to_radeon_framebuffer(fb);
-	obj = new_radeon_fb->obj;
+	obj = fb->obj[0];
 	new_rbo = gem_to_radeon_bo(obj);
 
 	/* pin the new buffer */
@@ -1285,41 +1282,23 @@
 
 }
 
-static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb)
-{
-	struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb);
-
-	drm_gem_object_put_unlocked(radeon_fb->obj);
-	drm_framebuffer_cleanup(fb);
-	kfree(radeon_fb);
-}
-
-static int radeon_user_framebuffer_create_handle(struct drm_framebuffer *fb,
-						  struct drm_file *file_priv,
-						  unsigned int *handle)
-{
-	struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb);
-
-	return drm_gem_handle_create(file_priv, radeon_fb->obj, handle);
-}
-
 static const struct drm_framebuffer_funcs radeon_fb_funcs = {
-	.destroy = radeon_user_framebuffer_destroy,
-	.create_handle = radeon_user_framebuffer_create_handle,
+	.destroy = drm_gem_fb_destroy,
+	.create_handle = drm_gem_fb_create_handle,
 };
 
 int
 radeon_framebuffer_init(struct drm_device *dev,
-			struct radeon_framebuffer *rfb,
+			struct drm_framebuffer *fb,
 			const struct drm_mode_fb_cmd2 *mode_cmd,
 			struct drm_gem_object *obj)
 {
 	int ret;
-	rfb->obj = obj;
-	drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
-	ret = drm_framebuffer_init(dev, &rfb->base, &radeon_fb_funcs);
+	fb->obj[0] = obj;
+	drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd);
+	ret = drm_framebuffer_init(dev, fb, &radeon_fb_funcs);
 	if (ret) {
-		rfb->obj = NULL;
+		fb->obj[0] = NULL;
 		return ret;
 	}
 	return 0;
@@ -1331,7 +1310,7 @@
 			       const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct drm_gem_object *obj;
-	struct radeon_framebuffer *radeon_fb;
+	struct drm_framebuffer *fb;
 	int ret;
 
 	obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]);
@@ -1347,20 +1326,20 @@
 		return ERR_PTR(-EINVAL);
 	}
 
-	radeon_fb = kzalloc(sizeof(*radeon_fb), GFP_KERNEL);
-	if (radeon_fb == NULL) {
+	fb = kzalloc(sizeof(*fb), GFP_KERNEL);
+	if (fb == NULL) {
 		drm_gem_object_put_unlocked(obj);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	ret = radeon_framebuffer_init(dev, radeon_fb, mode_cmd, obj);
+	ret = radeon_framebuffer_init(dev, fb, mode_cmd, obj);
 	if (ret) {
-		kfree(radeon_fb);
+		kfree(fb);
 		drm_gem_object_put_unlocked(obj);
 		return ERR_PTR(ret);
 	}
 
-	return &radeon_fb->base;
+	return fb;
 }
 
 static const struct drm_mode_config_funcs radeon_mode_funcs = {
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index b28288a..2a7977a 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -168,7 +168,12 @@
 int radeon_modeset = -1;
 int radeon_dynclks = -1;
 int radeon_r4xx_atom = 0;
+#ifdef __powerpc__
+/* Default to PCI on PowerPC (fdo #95017) */
+int radeon_agpmode = -1;
+#else
 int radeon_agpmode = 0;
+#endif
 int radeon_vram_limit = 0;
 int radeon_gart_size = -1; /* auto */
 int radeon_benchmarking = 0;
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 57c5404..11790340 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -43,7 +43,7 @@
  */
 struct radeon_fbdev {
 	struct drm_fb_helper helper;
-	struct radeon_framebuffer rfb;
+	struct drm_framebuffer fb;
 	struct radeon_device *rdev;
 };
 
@@ -246,13 +246,13 @@
 
 	info->par = rfbdev;
 
-	ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
+	ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->fb, &mode_cmd, gobj);
 	if (ret) {
 		DRM_ERROR("failed to initialize framebuffer %d\n", ret);
 		goto out;
 	}
 
-	fb = &rfbdev->rfb.base;
+	fb = &rfbdev->fb;
 
 	/* setup helper */
 	rfbdev->helper.fb = fb;
@@ -308,15 +308,15 @@
 
 static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfbdev)
 {
-	struct radeon_framebuffer *rfb = &rfbdev->rfb;
+	struct drm_framebuffer *fb = &rfbdev->fb;
 
 	drm_fb_helper_unregister_fbi(&rfbdev->helper);
 
-	if (rfb->obj) {
-		radeonfb_destroy_pinned_object(rfb->obj);
-		rfb->obj = NULL;
-		drm_framebuffer_unregister_private(&rfb->base);
-		drm_framebuffer_cleanup(&rfb->base);
+	if (fb->obj[0]) {
+		radeonfb_destroy_pinned_object(fb->obj[0]);
+		fb->obj[0] = NULL;
+		drm_framebuffer_unregister_private(fb);
+		drm_framebuffer_cleanup(fb);
 	}
 	drm_fb_helper_fini(&rfbdev->helper);
 
@@ -400,7 +400,7 @@
 	if (!rdev->mode_info.rfbdev)
 		return false;
 
-	if (robj == gem_to_radeon_bo(rdev->mode_info.rfbdev->rfb.obj))
+	if (robj == gem_to_radeon_bo(rdev->mode_info.rfbdev->fb.obj[0]))
 		return true;
 	return false;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 1f1856e..35a205a 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -374,7 +374,6 @@
 	struct drm_device *dev = crtc->dev;
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-	struct radeon_framebuffer *radeon_fb;
 	struct drm_framebuffer *target_fb;
 	struct drm_gem_object *obj;
 	struct radeon_bo *rbo;
@@ -393,14 +392,10 @@
 		return 0;
 	}
 
-	if (atomic) {
-		radeon_fb = to_radeon_framebuffer(fb);
+	if (atomic)
 		target_fb = fb;
-	}
-	else {
-		radeon_fb = to_radeon_framebuffer(crtc->primary->fb);
+	else
 		target_fb = crtc->primary->fb;
-	}
 
 	switch (target_fb->format->cpp[0] * 8) {
 	case 8:
@@ -423,7 +418,7 @@
 	}
 
 	/* Pin framebuffer & get tilling informations */
-	obj = radeon_fb->obj;
+	obj = target_fb->obj[0];
 	rbo = gem_to_radeon_bo(obj);
 retry:
 	r = radeon_bo_reserve(rbo, false);
@@ -451,7 +446,7 @@
 			struct radeon_bo *old_rbo;
 			unsigned long nsize, osize;
 
-			old_rbo = gem_to_radeon_bo(to_radeon_framebuffer(fb)->obj);
+			old_rbo = gem_to_radeon_bo(fb->obj[0]);
 			osize = radeon_bo_size(old_rbo);
 			nsize = radeon_bo_size(rbo);
 			if (nsize <= osize && !radeon_bo_reserve(old_rbo, false)) {
@@ -558,8 +553,7 @@
 	WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch);
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
-		radeon_fb = to_radeon_framebuffer(fb);
-		rbo = gem_to_radeon_bo(radeon_fb->obj);
+		rbo = gem_to_radeon_bo(fb->obj[0]);
 		r = radeon_bo_reserve(rbo, false);
 		if (unlikely(r != 0))
 			return r;
@@ -1093,11 +1087,9 @@
 	radeon_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 	if (crtc->primary->fb) {
 		int r;
-		struct radeon_framebuffer *radeon_fb;
 		struct radeon_bo *rbo;
 
-		radeon_fb = to_radeon_framebuffer(crtc->primary->fb);
-		rbo = gem_to_radeon_bo(radeon_fb->obj);
+		rbo = gem_to_radeon_bo(crtc->primary->fb->obj[0]);
 		r = radeon_bo_reserve(rbo, false);
 		if (unlikely(r))
 			DRM_ERROR("failed to reserve rbo before unpin\n");
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 3243e5e..fd470d6 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -46,7 +46,6 @@
 #define to_radeon_crtc(x) container_of(x, struct radeon_crtc, base)
 #define to_radeon_connector(x) container_of(x, struct radeon_connector, base)
 #define to_radeon_encoder(x) container_of(x, struct radeon_encoder, base)
-#define to_radeon_framebuffer(x) container_of(x, struct radeon_framebuffer, base)
 
 #define RADEON_MAX_HPD_PINS 7
 #define RADEON_MAX_CRTCS 6
@@ -574,11 +573,6 @@
 	int enabled_attribs;
 };
 
-struct radeon_framebuffer {
-	struct drm_framebuffer base;
-	struct drm_gem_object *obj;
-};
-
 #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
 				((em) == ATOM_ENCODER_MODE_DP_MST))
 
@@ -932,7 +926,7 @@
 extern void
 radeon_combios_encoder_dpms_scratch_regs(struct drm_encoder *encoder, bool on);
 int radeon_framebuffer_init(struct drm_device *dev,
-			     struct radeon_framebuffer *rfb,
+			     struct drm_framebuffer *rfb,
 			     const struct drm_mode_fb_cmd2 *mode_cmd,
 			     struct drm_gem_object *obj);
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index c442053..f2a0bd1 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -767,7 +767,8 @@
  * Initialization
  */
 
-int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
+int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex,
+			unsigned int hwindex)
 {
 	static const unsigned int mmio_offsets[] = {
 		DU0_REG_OFFSET, DU1_REG_OFFSET, DU2_REG_OFFSET, DU3_REG_OFFSET
@@ -775,7 +776,7 @@
 
 	struct rcar_du_device *rcdu = rgrp->dev;
 	struct platform_device *pdev = to_platform_device(rcdu->dev);
-	struct rcar_du_crtc *rcrtc = &rcdu->crtcs[index];
+	struct rcar_du_crtc *rcrtc = &rcdu->crtcs[swindex];
 	struct drm_crtc *crtc = &rcrtc->crtc;
 	struct drm_plane *primary;
 	unsigned int irqflags;
@@ -787,7 +788,7 @@
 
 	/* Get the CRTC clock and the optional external clock. */
 	if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) {
-		sprintf(clk_name, "du.%u", index);
+		sprintf(clk_name, "du.%u", hwindex);
 		name = clk_name;
 	} else {
 		name = NULL;
@@ -795,16 +796,16 @@
 
 	rcrtc->clock = devm_clk_get(rcdu->dev, name);
 	if (IS_ERR(rcrtc->clock)) {
-		dev_err(rcdu->dev, "no clock for CRTC %u\n", index);
+		dev_err(rcdu->dev, "no clock for DU channel %u\n", hwindex);
 		return PTR_ERR(rcrtc->clock);
 	}
 
-	sprintf(clk_name, "dclkin.%u", index);
+	sprintf(clk_name, "dclkin.%u", hwindex);
 	clk = devm_clk_get(rcdu->dev, clk_name);
 	if (!IS_ERR(clk)) {
 		rcrtc->extclock = clk;
 	} else if (PTR_ERR(rcrtc->clock) == -EPROBE_DEFER) {
-		dev_info(rcdu->dev, "can't get external clock %u\n", index);
+		dev_info(rcdu->dev, "can't get external clock %u\n", hwindex);
 		return -EPROBE_DEFER;
 	}
 
@@ -813,13 +814,13 @@
 	spin_lock_init(&rcrtc->vblank_lock);
 
 	rcrtc->group = rgrp;
-	rcrtc->mmio_offset = mmio_offsets[index];
-	rcrtc->index = index;
+	rcrtc->mmio_offset = mmio_offsets[hwindex];
+	rcrtc->index = hwindex;
 
 	if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE))
 		primary = &rcrtc->vsp->planes[rcrtc->vsp_pipe].plane;
 	else
-		primary = &rgrp->planes[index % 2].plane;
+		primary = &rgrp->planes[swindex % 2].plane;
 
 	ret = drm_crtc_init_with_planes(rcdu->ddev, crtc, primary,
 					NULL, &crtc_funcs, NULL);
@@ -833,7 +834,8 @@
 
 	/* Register the interrupt handler. */
 	if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) {
-		irq = platform_get_irq(pdev, index);
+		/* The IRQ's are associated with the CRTC (sw)index. */
+		irq = platform_get_irq(pdev, swindex);
 		irqflags = 0;
 	} else {
 		irq = platform_get_irq(pdev, 0);
@@ -841,7 +843,7 @@
 	}
 
 	if (irq < 0) {
-		dev_err(rcdu->dev, "no IRQ for CRTC %u\n", index);
+		dev_err(rcdu->dev, "no IRQ for CRTC %u\n", swindex);
 		return irq;
 	}
 
@@ -849,7 +851,7 @@
 			       dev_name(rcdu->dev), rcrtc);
 	if (ret < 0) {
 		dev_err(rcdu->dev,
-			"failed to register IRQ for CRTC %u\n", index);
+			"failed to register IRQ for CRTC %u\n", swindex);
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
index fdc2bf9..84b5e23 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
@@ -80,7 +80,8 @@
 	RCAR_DU_OUTPUT_MAX,
 };
 
-int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index);
+int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex,
+			unsigned int hwindex);
 void rcar_du_crtc_suspend(struct rcar_du_crtc *rcrtc);
 void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc);
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
index 3917d83..02aee6c 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
@@ -40,7 +40,7 @@
 	.gen = 2,
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
 		  | RCAR_DU_FEATURE_EXT_CTRL_REGS,
-	.num_crtcs = 2,
+	.channels_mask = BIT(1) | BIT(0),
 	.routes = {
 		/*
 		 * R8A7743 has one RGB output and one LVDS output
@@ -61,7 +61,7 @@
 	.gen = 2,
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
 		  | RCAR_DU_FEATURE_EXT_CTRL_REGS,
-	.num_crtcs = 2,
+	.channels_mask = BIT(1) | BIT(0),
 	.routes = {
 		/*
 		 * R8A7745 has two RGB outputs
@@ -80,7 +80,7 @@
 static const struct rcar_du_device_info rcar_du_r8a7779_info = {
 	.gen = 2,
 	.features = 0,
-	.num_crtcs = 2,
+	.channels_mask = BIT(1) | BIT(0),
 	.routes = {
 		/*
 		 * R8A7779 has two RGB outputs and one (currently unsupported)
@@ -102,7 +102,7 @@
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
 		  | RCAR_DU_FEATURE_EXT_CTRL_REGS,
 	.quirks = RCAR_DU_QUIRK_ALIGN_128B,
-	.num_crtcs = 3,
+	.channels_mask = BIT(2) | BIT(1) | BIT(0),
 	.routes = {
 		/*
 		 * R8A7790 has one RGB output, two LVDS outputs and one
@@ -129,7 +129,7 @@
 	.gen = 2,
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
 		  | RCAR_DU_FEATURE_EXT_CTRL_REGS,
-	.num_crtcs = 2,
+	.channels_mask = BIT(1) | BIT(0),
 	.routes = {
 		/*
 		 * R8A779[13] has one RGB output, one LVDS output and one
@@ -151,7 +151,7 @@
 	.gen = 2,
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
 		  | RCAR_DU_FEATURE_EXT_CTRL_REGS,
-	.num_crtcs = 2,
+	.channels_mask = BIT(1) | BIT(0),
 	.routes = {
 		/* R8A7792 has two RGB outputs. */
 		[RCAR_DU_OUTPUT_DPAD0] = {
@@ -169,7 +169,7 @@
 	.gen = 2,
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
 		  | RCAR_DU_FEATURE_EXT_CTRL_REGS,
-	.num_crtcs = 2,
+	.channels_mask = BIT(1) | BIT(0),
 	.routes = {
 		/*
 		 * R8A7794 has two RGB outputs and one (currently unsupported)
@@ -191,7 +191,7 @@
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
 		  | RCAR_DU_FEATURE_EXT_CTRL_REGS
 		  | RCAR_DU_FEATURE_VSP1_SOURCE,
-	.num_crtcs = 4,
+	.channels_mask = BIT(3) | BIT(2) | BIT(1) | BIT(0),
 	.routes = {
 		/*
 		 * R8A7795 has one RGB output, two HDMI outputs and one
@@ -215,7 +215,7 @@
 		},
 	},
 	.num_lvds = 1,
-	.dpll_ch =  BIT(1) | BIT(2),
+	.dpll_ch =  BIT(2) | BIT(1),
 };
 
 static const struct rcar_du_device_info rcar_du_r8a7796_info = {
@@ -223,7 +223,7 @@
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
 		  | RCAR_DU_FEATURE_EXT_CTRL_REGS
 		  | RCAR_DU_FEATURE_VSP1_SOURCE,
-	.num_crtcs = 3,
+	.channels_mask = BIT(2) | BIT(1) | BIT(0),
 	.routes = {
 		/*
 		 * R8A7796 has one RGB output, one LVDS output and one HDMI
@@ -246,12 +246,40 @@
 	.dpll_ch =  BIT(1),
 };
 
+static const struct rcar_du_device_info rcar_du_r8a77965_info = {
+	.gen = 3,
+	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
+		  | RCAR_DU_FEATURE_EXT_CTRL_REGS
+		  | RCAR_DU_FEATURE_VSP1_SOURCE,
+	.channels_mask = BIT(3) | BIT(1) | BIT(0),
+	.routes = {
+		/*
+		 * R8A77965 has one RGB output, one LVDS output and one HDMI
+		 * output.
+		 */
+		[RCAR_DU_OUTPUT_DPAD0] = {
+			.possible_crtcs = BIT(2),
+			.port = 0,
+		},
+		[RCAR_DU_OUTPUT_HDMI0] = {
+			.possible_crtcs = BIT(1),
+			.port = 1,
+		},
+		[RCAR_DU_OUTPUT_LVDS0] = {
+			.possible_crtcs = BIT(0),
+			.port = 2,
+		},
+	},
+	.num_lvds = 1,
+	.dpll_ch =  BIT(1),
+};
+
 static const struct rcar_du_device_info rcar_du_r8a77970_info = {
 	.gen = 3,
 	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
 		  | RCAR_DU_FEATURE_EXT_CTRL_REGS
 		  | RCAR_DU_FEATURE_VSP1_SOURCE,
-	.num_crtcs = 1,
+	.channels_mask = BIT(0),
 	.routes = {
 		/* R8A77970 has one RGB output and one LVDS output. */
 		[RCAR_DU_OUTPUT_DPAD0] = {
@@ -277,6 +305,7 @@
 	{ .compatible = "renesas,du-r8a7794", .data = &rcar_du_r8a7794_info },
 	{ .compatible = "renesas,du-r8a7795", .data = &rcar_du_r8a7795_info },
 	{ .compatible = "renesas,du-r8a7796", .data = &rcar_du_r8a7796_info },
+	{ .compatible = "renesas,du-r8a77965", .data = &rcar_du_r8a77965_info },
 	{ .compatible = "renesas,du-r8a77970", .data = &rcar_du_r8a77970_info },
 	{ }
 };
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
index 5c7ec15..b3a25e8 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
@@ -52,7 +52,7 @@
  * @gen: device generation (2 or 3)
  * @features: device features (RCAR_DU_FEATURE_*)
  * @quirks: device quirks (RCAR_DU_QUIRK_*)
- * @num_crtcs: total number of CRTCs
+ * @channels_mask: bit mask of available DU channels
  * @routes: array of CRTC to output routes, indexed by output (RCAR_DU_OUTPUT_*)
  * @num_lvds: number of internal LVDS encoders
  */
@@ -60,7 +60,7 @@
 	unsigned int gen;
 	unsigned int features;
 	unsigned int quirks;
-	unsigned int num_crtcs;
+	unsigned int channels_mask;
 	struct rcar_du_output_routing routes[RCAR_DU_OUTPUT_MAX];
 	unsigned int num_lvds;
 	unsigned int dpll_ch;
@@ -87,7 +87,6 @@
 	struct rcar_du_vsp vsps[RCAR_DU_MAX_VSPS];
 
 	struct {
-		struct drm_property *alpha;
 		struct drm_property *colorkey;
 	} props;
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c
index 2f37ea9..d539cb2 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_group.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c
@@ -46,10 +46,13 @@
 
 static void rcar_du_group_setup_pins(struct rcar_du_group *rgrp)
 {
-	u32 defr6 = DEFR6_CODE | DEFR6_ODPM12_DISP;
+	u32 defr6 = DEFR6_CODE;
 
-	if (rgrp->num_crtcs > 1)
-		defr6 |= DEFR6_ODPM22_DISP;
+	if (rgrp->channels_mask & BIT(0))
+		defr6 |= DEFR6_ODPM02_DISP;
+
+	if (rgrp->channels_mask & BIT(1))
+		defr6 |= DEFR6_ODPM12_DISP;
 
 	rcar_du_group_write(rgrp, DEFR6, defr6);
 }
@@ -80,10 +83,11 @@
 		 * On Gen3 VSPD routing can't be configured, but DPAD routing
 		 * needs to be set despite having a single option available.
 		 */
-		u32 crtc = ffs(possible_crtcs) - 1;
+		unsigned int rgb_crtc = ffs(possible_crtcs) - 1;
+		struct rcar_du_crtc *crtc = &rcdu->crtcs[rgb_crtc];
 
-		if (crtc / 2 == rgrp->index)
-			defr8 |= DEFR8_DRGBS_DU(crtc);
+		if (crtc->index / 2 == rgrp->index)
+			defr8 |= DEFR8_DRGBS_DU(crtc->index);
 	}
 
 	rcar_du_group_write(rgrp, DEFR8, defr8);
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.h b/drivers/gpu/drm/rcar-du/rcar_du_group.h
index 5e3adc6..42105ae 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_group.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.h
@@ -25,6 +25,7 @@
  * @dev: the DU device
  * @mmio_offset: registers offset in the device memory map
  * @index: group index
+ * @channels_mask: bitmask of populated DU channels in this group
  * @num_crtcs: number of CRTCs in this group (1 or 2)
  * @use_count: number of users of the group (rcar_du_group_(get|put))
  * @used_crtcs: number of CRTCs currently in use
@@ -39,6 +40,7 @@
 	unsigned int mmio_offset;
 	unsigned int index;
 
+	unsigned int channels_mask;
 	unsigned int num_crtcs;
 	unsigned int use_count;
 	unsigned int used_crtcs;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index 0329b35..f0bc7cc 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -233,15 +233,7 @@
 	struct rcar_du_device *rcdu = dev->dev_private;
 	int ret;
 
-	ret = drm_atomic_helper_check_modeset(dev, state);
-	if (ret)
-		return ret;
-
-	ret = drm_atomic_normalize_zpos(dev, state);
-	if (ret)
-		return ret;
-
-	ret = drm_atomic_helper_check_planes(dev, state);
+	ret = drm_atomic_helper_check(dev, state);
 	if (ret)
 		return ret;
 
@@ -415,11 +407,6 @@
 
 static int rcar_du_properties_init(struct rcar_du_device *rcdu)
 {
-	rcdu->props.alpha =
-		drm_property_create_range(rcdu->ddev, 0, "alpha", 0, 255);
-	if (rcdu->props.alpha == NULL)
-		return -ENOMEM;
-
 	/*
 	 * The color key is expressed as an RGB888 triplet stored in a 32-bit
 	 * integer in XRGB8888 format. Bit 24 is used as a flag to disable (0)
@@ -441,7 +428,7 @@
 	struct {
 		struct device_node *np;
 		unsigned int crtcs_mask;
-	} vsps[RCAR_DU_MAX_VSPS] = { { 0, }, };
+	} vsps[RCAR_DU_MAX_VSPS] = { { NULL, }, };
 	unsigned int vsps_count = 0;
 	unsigned int cells;
 	unsigned int i;
@@ -520,6 +507,8 @@
 	struct drm_fbdev_cma *fbdev;
 	unsigned int num_encoders;
 	unsigned int num_groups;
+	unsigned int swindex;
+	unsigned int hwindex;
 	unsigned int i;
 	int ret;
 
@@ -529,10 +518,11 @@
 	dev->mode_config.min_height = 0;
 	dev->mode_config.max_width = 4095;
 	dev->mode_config.max_height = 2047;
+	dev->mode_config.normalize_zpos = true;
 	dev->mode_config.funcs = &rcar_du_mode_config_funcs;
 	dev->mode_config.helper_private = &rcar_du_mode_config_helper;
 
-	rcdu->num_crtcs = rcdu->info->num_crtcs;
+	rcdu->num_crtcs = hweight8(rcdu->info->channels_mask);
 
 	ret = rcar_du_properties_init(rcdu);
 	if (ret < 0)
@@ -542,7 +532,7 @@
 	 * Initialize vertical blanking interrupts handling. Start with vblank
 	 * disabled for all CRTCs.
 	 */
-	ret = drm_vblank_init(dev, (1 << rcdu->info->num_crtcs) - 1);
+	ret = drm_vblank_init(dev, (1 << rcdu->num_crtcs) - 1);
 	if (ret < 0)
 		return ret;
 
@@ -557,7 +547,10 @@
 		rgrp->dev = rcdu;
 		rgrp->mmio_offset = mmio_offsets[i];
 		rgrp->index = i;
-		rgrp->num_crtcs = min(rcdu->num_crtcs - 2 * i, 2U);
+		/* Extract the channel mask for this group only. */
+		rgrp->channels_mask = (rcdu->info->channels_mask >> (2 * i))
+				   & GENMASK(1, 0);
+		rgrp->num_crtcs = hweight8(rgrp->channels_mask);
 
 		/*
 		 * If we have more than one CRTCs in this group pre-associate
@@ -584,10 +577,16 @@
 	}
 
 	/* Create the CRTCs. */
-	for (i = 0; i < rcdu->num_crtcs; ++i) {
-		struct rcar_du_group *rgrp = &rcdu->groups[i / 2];
+	for (swindex = 0, hwindex = 0; swindex < rcdu->num_crtcs; ++hwindex) {
+		struct rcar_du_group *rgrp;
 
-		ret = rcar_du_crtc_create(rgrp, i);
+		/* Skip unpopulated DU channels. */
+		if (!(rcdu->info->channels_mask & BIT(hwindex)))
+			continue;
+
+		rgrp = &rcdu->groups[hwindex / 2];
+
+		ret = rcar_du_crtc_create(rgrp, swindex++, hwindex);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_of.c b/drivers/gpu/drm/rcar-du/rcar_du_of.c
index 68a0b82..afef696 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_of.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_of.c
@@ -18,6 +18,7 @@
 
 #include "rcar_du_crtc.h"
 #include "rcar_du_drv.h"
+#include "rcar_du_of.h"
 
 /* -----------------------------------------------------------------------------
  * Generic Overlay Handling
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_of.h b/drivers/gpu/drm/rcar-du/rcar_du_of.h
index c2e65a7..8dd3fbe 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_of.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_of.h
@@ -11,7 +11,7 @@
 
 struct of_device_id;
 
-#ifdef CONFIG_DRM_RCAR_LVDS
+#if IS_ENABLED(CONFIG_DRM_RCAR_LVDS)
 void __init rcar_du_of_init(const struct of_device_id *of_ids);
 #else
 static inline void rcar_du_of_init(const struct of_device_id *of_ids) { }
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
index 68556bd..c20f7ed 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
@@ -423,7 +423,7 @@
 		rcar_du_plane_write(rgrp, index, PnALPHAR, PnALPHAR_ABIT_0);
 	else
 		rcar_du_plane_write(rgrp, index, PnALPHAR,
-				    PnALPHAR_ABIT_X | state->alpha);
+				    PnALPHAR_ABIT_X | state->state.alpha >> 8);
 
 	pnmr = PnMR_BM_MD | state->format->pnmr;
 
@@ -692,11 +692,11 @@
 
 	state->hwindex = -1;
 	state->source = RCAR_DU_PLANE_MEMORY;
-	state->alpha = 255;
 	state->colorkey = RCAR_DU_COLORKEY_NONE;
 	state->state.zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : 1;
 
 	plane->state = &state->state;
+	plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE;
 	plane->state->plane = plane;
 }
 
@@ -708,9 +708,7 @@
 	struct rcar_du_plane_state *rstate = to_rcar_plane_state(state);
 	struct rcar_du_device *rcdu = to_rcar_plane(plane)->group->dev;
 
-	if (property == rcdu->props.alpha)
-		rstate->alpha = val;
-	else if (property == rcdu->props.colorkey)
+	if (property == rcdu->props.colorkey)
 		rstate->colorkey = val;
 	else
 		return -EINVAL;
@@ -726,9 +724,7 @@
 		container_of(state, const struct rcar_du_plane_state, state);
 	struct rcar_du_device *rcdu = to_rcar_plane(plane)->group->dev;
 
-	if (property == rcdu->props.alpha)
-		*val = rstate->alpha;
-	else if (property == rcdu->props.colorkey)
+	if (property == rcdu->props.colorkey)
 		*val = rstate->colorkey;
 	else
 		return -EINVAL;
@@ -797,10 +793,9 @@
 			continue;
 
 		drm_object_attach_property(&plane->plane.base,
-					   rcdu->props.alpha, 255);
-		drm_object_attach_property(&plane->plane.base,
 					   rcdu->props.colorkey,
 					   RCAR_DU_COLORKEY_NONE);
+		drm_plane_create_alpha_property(&plane->plane);
 		drm_plane_create_zpos_property(&plane->plane, 1, 1, 7);
 	}
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.h b/drivers/gpu/drm/rcar-du/rcar_du_plane.h
index 890321b..5c19c69 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_plane.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.h
@@ -50,7 +50,6 @@
  * @state: base DRM plane state
  * @format: information about the pixel format used by the plane
  * @hwindex: 0-based hardware plane index, -1 means unused
- * @alpha: value of the plane alpha property
  * @colorkey: value of the plane colorkey property
  */
 struct rcar_du_plane_state {
@@ -60,7 +59,6 @@
 	int hwindex;
 	enum rcar_du_plane_source source;
 
-	unsigned int alpha;
 	unsigned int colorkey;
 };
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_regs.h b/drivers/gpu/drm/rcar-du/rcar_du_regs.h
index d5bae99..9dfd220 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_regs.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_regs.h
@@ -187,14 +187,14 @@
 
 #define DEFR6			0x000e8
 #define DEFR6_CODE		(0x7778 << 16)
-#define DEFR6_ODPM22_DSMR	(0 << 10)
-#define DEFR6_ODPM22_DISP	(2 << 10)
-#define DEFR6_ODPM22_CDE	(3 << 10)
-#define DEFR6_ODPM22_MASK	(3 << 10)
-#define DEFR6_ODPM12_DSMR	(0 << 8)
-#define DEFR6_ODPM12_DISP	(2 << 8)
-#define DEFR6_ODPM12_CDE	(3 << 8)
-#define DEFR6_ODPM12_MASK	(3 << 8)
+#define DEFR6_ODPM12_DSMR	(0 << 10)
+#define DEFR6_ODPM12_DISP	(2 << 10)
+#define DEFR6_ODPM12_CDE	(3 << 10)
+#define DEFR6_ODPM12_MASK	(3 << 10)
+#define DEFR6_ODPM02_DSMR	(0 << 8)
+#define DEFR6_ODPM02_DISP	(2 << 8)
+#define DEFR6_ODPM02_CDE	(3 << 8)
+#define DEFR6_ODPM02_MASK	(3 << 8)
 #define DEFR6_TCNE1		(1 << 6)
 #define DEFR6_TCNE0		(1 << 4)
 #define DEFR6_MLOS1		(1 << 2)
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
index 2c260c3..c59f0cf 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -17,6 +17,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_plane_helper.h>
 
 #include <linux/bitops.h>
@@ -54,6 +55,7 @@
 	};
 	struct rcar_du_plane_state state = {
 		.state = {
+			.alpha = DRM_BLEND_ALPHA_OPAQUE,
 			.crtc = &crtc->crtc,
 			.dst.x1 = 0,
 			.dst.y1 = 0,
@@ -67,7 +69,6 @@
 		},
 		.format = rcar_du_format_info(DRM_FORMAT_ARGB8888),
 		.source = RCAR_DU_PLANE_VSPD1,
-		.alpha = 255,
 		.colorkey = 0,
 	};
 
@@ -173,7 +174,7 @@
 	struct vsp1_du_atomic_config cfg = {
 		.pixelformat = 0,
 		.pitch = fb->pitches[0],
-		.alpha = state->alpha,
+		.alpha = state->state.alpha >> 8,
 		.zpos = state->state.zpos,
 	};
 	unsigned int i;
@@ -237,6 +238,10 @@
 		}
 	}
 
+	ret = drm_gem_fb_prepare_fb(plane, state);
+	if (ret)
+		goto fail;
+
 	return 0;
 
 fail:
@@ -299,14 +304,12 @@
 static struct drm_plane_state *
 rcar_du_vsp_plane_atomic_duplicate_state(struct drm_plane *plane)
 {
-	struct rcar_du_vsp_plane_state *state;
 	struct rcar_du_vsp_plane_state *copy;
 
 	if (WARN_ON(!plane->state))
 		return NULL;
 
-	state = to_rcar_vsp_plane_state(plane->state);
-	copy = kmemdup(state, sizeof(*state), GFP_KERNEL);
+	copy = kzalloc(sizeof(*copy), GFP_KERNEL);
 	if (copy == NULL)
 		return NULL;
 
@@ -335,44 +338,13 @@
 	if (state == NULL)
 		return;
 
-	state->alpha = 255;
+	state->state.alpha = DRM_BLEND_ALPHA_OPAQUE;
 	state->state.zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : 1;
 
 	plane->state = &state->state;
 	plane->state->plane = plane;
 }
 
-static int rcar_du_vsp_plane_atomic_set_property(struct drm_plane *plane,
-	struct drm_plane_state *state, struct drm_property *property,
-	uint64_t val)
-{
-	struct rcar_du_vsp_plane_state *rstate = to_rcar_vsp_plane_state(state);
-	struct rcar_du_device *rcdu = to_rcar_vsp_plane(plane)->vsp->dev;
-
-	if (property == rcdu->props.alpha)
-		rstate->alpha = val;
-	else
-		return -EINVAL;
-
-	return 0;
-}
-
-static int rcar_du_vsp_plane_atomic_get_property(struct drm_plane *plane,
-	const struct drm_plane_state *state, struct drm_property *property,
-	uint64_t *val)
-{
-	const struct rcar_du_vsp_plane_state *rstate =
-		container_of(state, const struct rcar_du_vsp_plane_state, state);
-	struct rcar_du_device *rcdu = to_rcar_vsp_plane(plane)->vsp->dev;
-
-	if (property == rcdu->props.alpha)
-		*val = rstate->alpha;
-	else
-		return -EINVAL;
-
-	return 0;
-}
-
 static const struct drm_plane_funcs rcar_du_vsp_plane_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
@@ -380,8 +352,6 @@
 	.destroy = drm_plane_cleanup,
 	.atomic_duplicate_state = rcar_du_vsp_plane_atomic_duplicate_state,
 	.atomic_destroy_state = rcar_du_vsp_plane_atomic_destroy_state,
-	.atomic_set_property = rcar_du_vsp_plane_atomic_set_property,
-	.atomic_get_property = rcar_du_vsp_plane_atomic_get_property,
 };
 
 int rcar_du_vsp_init(struct rcar_du_vsp *vsp, struct device_node *np,
@@ -438,8 +408,7 @@
 		if (type == DRM_PLANE_TYPE_PRIMARY)
 			continue;
 
-		drm_object_attach_property(&plane->plane.base,
-					   rcdu->props.alpha, 255);
+		drm_plane_create_alpha_property(&plane->plane);
 		drm_plane_create_zpos_property(&plane->plane, 1, 1,
 					       vsp->num_planes - 1);
 	}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.h b/drivers/gpu/drm/rcar-du/rcar_du_vsp.h
index 4c5d7bb..8a8a25c 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.h
@@ -44,15 +44,12 @@
  * @state: base DRM plane state
  * @format: information about the pixel format used by the plane
  * @sg_tables: scatter-gather tables for the frame buffer memory
- * @alpha: value of the plane alpha property
  */
 struct rcar_du_vsp_plane_state {
 	struct drm_plane_state state;
 
 	const struct rcar_du_format_info *format;
 	struct sg_table sg_tables[3];
-
-	unsigned int alpha;
 };
 
 static inline struct rcar_du_vsp_plane_state *
diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
index 3e8bf79..080f053 100644
--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
@@ -77,13 +77,13 @@
 	struct analogix_dp_plat_data plat_data;
 };
 
-static void analogix_dp_psr_set(struct drm_encoder *encoder, bool enabled)
+static int analogix_dp_psr_set(struct drm_encoder *encoder, bool enabled)
 {
 	struct rockchip_dp_device *dp = to_dp(encoder);
 	int ret;
 
 	if (!analogix_dp_psr_enabled(dp->adp))
-		return;
+		return 0;
 
 	DRM_DEV_DEBUG(dp->dev, "%s PSR...\n", enabled ? "Entry" : "Exit");
 
@@ -91,13 +91,13 @@
 					 PSR_WAIT_LINE_FLAG_TIMEOUT_MS);
 	if (ret) {
 		DRM_DEV_ERROR(dp->dev, "line flag interrupt did not arrive\n");
-		return;
+		return -ETIMEDOUT;
 	}
 
 	if (enabled)
-		analogix_dp_enable_psr(dp->adp);
+		return analogix_dp_enable_psr(dp->adp);
 	else
-		analogix_dp_disable_psr(dp->adp);
+		return analogix_dp_disable_psr(dp->adp);
 }
 
 static int rockchip_dp_pre_init(struct rockchip_dp_device *dp)
@@ -109,7 +109,7 @@
 	return 0;
 }
 
-static int rockchip_dp_poweron(struct analogix_dp_plat_data *plat_data)
+static int rockchip_dp_poweron_start(struct analogix_dp_plat_data *plat_data)
 {
 	struct rockchip_dp_device *dp = to_dp(plat_data);
 	int ret;
@@ -127,7 +127,14 @@
 		return ret;
 	}
 
-	return rockchip_drm_psr_activate(&dp->encoder);
+	return ret;
+}
+
+static int rockchip_dp_poweron_end(struct analogix_dp_plat_data *plat_data)
+{
+	struct rockchip_dp_device *dp = to_dp(plat_data);
+
+	return rockchip_drm_psr_inhibit_put(&dp->encoder);
 }
 
 static int rockchip_dp_powerdown(struct analogix_dp_plat_data *plat_data)
@@ -135,7 +142,7 @@
 	struct rockchip_dp_device *dp = to_dp(plat_data);
 	int ret;
 
-	ret = rockchip_drm_psr_deactivate(&dp->encoder);
+	ret = rockchip_drm_psr_inhibit_get(&dp->encoder);
 	if (ret != 0)
 		return ret;
 
@@ -218,6 +225,7 @@
 				      struct drm_connector_state *conn_state)
 {
 	struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state);
+	struct drm_display_info *di = &conn_state->connector->display_info;
 
 	/*
 	 * The hardware IC designed that VOP must output the RGB10 video
@@ -229,6 +237,7 @@
 
 	s->output_mode = ROCKCHIP_OUT_MODE_AAAA;
 	s->output_type = DRM_MODE_CONNECTOR_eDP;
+	s->output_bpc = di->bpc;
 
 	return 0;
 }
@@ -328,7 +337,8 @@
 	dp->plat_data.encoder = &dp->encoder;
 
 	dp->plat_data.dev_type = dp->data->chip_type;
-	dp->plat_data.power_on = rockchip_dp_poweron;
+	dp->plat_data.power_on_start = rockchip_dp_poweron_start;
+	dp->plat_data.power_on_end = rockchip_dp_poweron_end;
 	dp->plat_data.power_off = rockchip_dp_powerdown;
 	dp->plat_data.get_modes = rockchip_dp_get_modes;
 
@@ -358,6 +368,8 @@
 	analogix_dp_unbind(dp->adp);
 	rockchip_drm_psr_unregister(&dp->encoder);
 	dp->encoder.funcs->destroy(&dp->encoder);
+
+	dp->adp = ERR_PTR(-ENODEV);
 }
 
 static const struct component_ops rockchip_dp_component_ops = {
@@ -381,6 +393,7 @@
 		return -ENOMEM;
 
 	dp->dev = dev;
+	dp->adp = ERR_PTR(-ENODEV);
 	dp->plat_data.panel = panel;
 
 	ret = rockchip_dp_of_probe(dp);
@@ -404,6 +417,9 @@
 {
 	struct rockchip_dp_device *dp = dev_get_drvdata(dev);
 
+	if (IS_ERR(dp->adp))
+		return 0;
+
 	return analogix_dp_suspend(dp->adp);
 }
 
@@ -411,6 +427,9 @@
 {
 	struct rockchip_dp_device *dp = dev_get_drvdata(dev);
 
+	if (IS_ERR(dp->adp))
+		return 0;
+
 	return analogix_dp_resume(dp->adp);
 }
 #endif
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index 9c064a4..3a6ebfc 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -36,6 +36,7 @@
 	struct drm_crtc_state base;
 	int output_type;
 	int output_mode;
+	int output_bpc;
 };
 #define to_rockchip_crtc_state(s) \
 		container_of(s, struct rockchip_crtc_state, base)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index e266539..d4f4118 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -167,8 +167,67 @@
 	return ERR_PTR(ret);
 }
 
+static void
+rockchip_drm_psr_inhibit_get_state(struct drm_atomic_state *state)
+{
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	struct drm_encoder *encoder;
+	u32 encoder_mask = 0;
+	int i;
+
+	for_each_old_crtc_in_state(state, crtc, crtc_state, i) {
+		encoder_mask |= crtc_state->encoder_mask;
+		encoder_mask |= crtc->state->encoder_mask;
+	}
+
+	drm_for_each_encoder_mask(encoder, state->dev, encoder_mask)
+		rockchip_drm_psr_inhibit_get(encoder);
+}
+
+static void
+rockchip_drm_psr_inhibit_put_state(struct drm_atomic_state *state)
+{
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	struct drm_encoder *encoder;
+	u32 encoder_mask = 0;
+	int i;
+
+	for_each_old_crtc_in_state(state, crtc, crtc_state, i) {
+		encoder_mask |= crtc_state->encoder_mask;
+		encoder_mask |= crtc->state->encoder_mask;
+	}
+
+	drm_for_each_encoder_mask(encoder, state->dev, encoder_mask)
+		rockchip_drm_psr_inhibit_put(encoder);
+}
+
+static void
+rockchip_atomic_helper_commit_tail_rpm(struct drm_atomic_state *old_state)
+{
+	struct drm_device *dev = old_state->dev;
+
+	rockchip_drm_psr_inhibit_get_state(old_state);
+
+	drm_atomic_helper_commit_modeset_disables(dev, old_state);
+
+	drm_atomic_helper_commit_modeset_enables(dev, old_state);
+
+	drm_atomic_helper_commit_planes(dev, old_state,
+					DRM_PLANE_COMMIT_ACTIVE_ONLY);
+
+	rockchip_drm_psr_inhibit_put_state(old_state);
+
+	drm_atomic_helper_commit_hw_done(old_state);
+
+	drm_atomic_helper_wait_for_vblanks(dev, old_state);
+
+	drm_atomic_helper_cleanup_planes(dev, old_state);
+}
+
 static const struct drm_mode_config_helper_funcs rockchip_mode_config_helpers = {
-	.atomic_commit_tail = drm_atomic_helper_commit_tail_rpm,
+	.atomic_commit_tail = rockchip_atomic_helper_commit_tail_rpm,
 };
 
 static const struct drm_mode_config_funcs rockchip_drm_mode_config_funcs = {
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index 074db7a..a8db758 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -357,8 +357,8 @@
 }
 
 /*
- * rockchip_gem_free_object - (struct drm_driver)->gem_free_object callback
- * function
+ * rockchip_gem_free_object - (struct drm_driver)->gem_free_object_unlocked
+ * callback function
  */
 void rockchip_gem_free_object(struct drm_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_psr.c b/drivers/gpu/drm/rockchip/rockchip_drm_psr.c
index b339ca9..79d00d8 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_psr.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_psr.c
@@ -20,42 +20,19 @@
 
 #define PSR_FLUSH_TIMEOUT_MS	100
 
-enum psr_state {
-	PSR_FLUSH,
-	PSR_ENABLE,
-	PSR_DISABLE,
-};
-
 struct psr_drv {
 	struct list_head	list;
 	struct drm_encoder	*encoder;
 
 	struct mutex		lock;
-	bool			active;
-	enum psr_state		state;
+	int			inhibit_count;
+	bool			enabled;
 
 	struct delayed_work	flush_work;
 
-	void (*set)(struct drm_encoder *encoder, bool enable);
+	int (*set)(struct drm_encoder *encoder, bool enable);
 };
 
-static struct psr_drv *find_psr_by_crtc(struct drm_crtc *crtc)
-{
-	struct rockchip_drm_private *drm_drv = crtc->dev->dev_private;
-	struct psr_drv *psr;
-
-	mutex_lock(&drm_drv->psr_list_lock);
-	list_for_each_entry(psr, &drm_drv->psr_list, list) {
-		if (psr->encoder->crtc == crtc)
-			goto out;
-	}
-	psr = ERR_PTR(-ENODEV);
-
-out:
-	mutex_unlock(&drm_drv->psr_list_lock);
-	return psr;
-}
-
 static struct psr_drv *find_psr_by_encoder(struct drm_encoder *encoder)
 {
 	struct rockchip_drm_private *drm_drv = encoder->dev->dev_private;
@@ -73,46 +50,22 @@
 	return psr;
 }
 
-static void psr_set_state_locked(struct psr_drv *psr, enum psr_state state)
+static int psr_set_state_locked(struct psr_drv *psr, bool enable)
 {
-	/*
-	 * Allowed finite state machine:
-	 *
-	 *   PSR_ENABLE  < = = = = = >  PSR_FLUSH
-	 *       | ^                        |
-	 *       | |                        |
-	 *       v |                        |
-	 *   PSR_DISABLE < - - - - - - - - -
-	 */
-	if (state == psr->state || !psr->active)
-		return;
+	int ret;
 
-	/* Already disabled in flush, change the state, but not the hardware */
-	if (state == PSR_DISABLE && psr->state == PSR_FLUSH) {
-		psr->state = state;
-		return;
-	}
+	if (psr->inhibit_count > 0)
+		return -EINVAL;
 
-	psr->state = state;
+	if (enable == psr->enabled)
+		return 0;
 
-	/* Actually commit the state change to hardware */
-	switch (psr->state) {
-	case PSR_ENABLE:
-		psr->set(psr->encoder, true);
-		break;
+	ret = psr->set(psr->encoder, enable);
+	if (ret)
+		return ret;
 
-	case PSR_DISABLE:
-	case PSR_FLUSH:
-		psr->set(psr->encoder, false);
-		break;
-	}
-}
-
-static void psr_set_state(struct psr_drv *psr, enum psr_state state)
-{
-	mutex_lock(&psr->lock);
-	psr_set_state_locked(psr, state);
-	mutex_unlock(&psr->lock);
+	psr->enabled = enable;
+	return 0;
 }
 
 static void psr_flush_handler(struct work_struct *work)
@@ -120,21 +73,24 @@
 	struct psr_drv *psr = container_of(to_delayed_work(work),
 					   struct psr_drv, flush_work);
 
-	/* If the state has changed since we initiated the flush, do nothing */
 	mutex_lock(&psr->lock);
-	if (psr->state == PSR_FLUSH)
-		psr_set_state_locked(psr, PSR_ENABLE);
+	psr_set_state_locked(psr, true);
 	mutex_unlock(&psr->lock);
 }
 
 /**
- * rockchip_drm_psr_activate - activate PSR on the given pipe
+ * rockchip_drm_psr_inhibit_put - release PSR inhibit on given encoder
  * @encoder: encoder to obtain the PSR encoder
  *
+ * Decrements PSR inhibit count on given encoder. Should be called only
+ * for a PSR inhibit count increment done before. If PSR inhibit counter
+ * reaches zero, PSR flush work is scheduled to make the hardware enter
+ * PSR mode in PSR_FLUSH_TIMEOUT_MS.
+ *
  * Returns:
  * Zero on success, negative errno on failure.
  */
-int rockchip_drm_psr_activate(struct drm_encoder *encoder)
+int rockchip_drm_psr_inhibit_put(struct drm_encoder *encoder)
 {
 	struct psr_drv *psr = find_psr_by_encoder(encoder);
 
@@ -142,21 +98,30 @@
 		return PTR_ERR(psr);
 
 	mutex_lock(&psr->lock);
-	psr->active = true;
+	--psr->inhibit_count;
+	WARN_ON(psr->inhibit_count < 0);
+	if (!psr->inhibit_count)
+		mod_delayed_work(system_wq, &psr->flush_work,
+				 PSR_FLUSH_TIMEOUT_MS);
 	mutex_unlock(&psr->lock);
 
 	return 0;
 }
-EXPORT_SYMBOL(rockchip_drm_psr_activate);
+EXPORT_SYMBOL(rockchip_drm_psr_inhibit_put);
 
 /**
- * rockchip_drm_psr_deactivate - deactivate PSR on the given pipe
+ * rockchip_drm_psr_inhibit_get - acquire PSR inhibit on given encoder
  * @encoder: encoder to obtain the PSR encoder
  *
+ * Increments PSR inhibit count on given encoder. This function guarantees
+ * that after it returns PSR is turned off on given encoder and no PSR-related
+ * hardware state change occurs at least until a matching call to
+ * rockchip_drm_psr_inhibit_put() is done.
+ *
  * Returns:
  * Zero on success, negative errno on failure.
  */
-int rockchip_drm_psr_deactivate(struct drm_encoder *encoder)
+int rockchip_drm_psr_inhibit_get(struct drm_encoder *encoder)
 {
 	struct psr_drv *psr = find_psr_by_encoder(encoder);
 
@@ -164,37 +129,25 @@
 		return PTR_ERR(psr);
 
 	mutex_lock(&psr->lock);
-	psr->active = false;
+	psr_set_state_locked(psr, false);
+	++psr->inhibit_count;
 	mutex_unlock(&psr->lock);
 	cancel_delayed_work_sync(&psr->flush_work);
 
 	return 0;
 }
-EXPORT_SYMBOL(rockchip_drm_psr_deactivate);
+EXPORT_SYMBOL(rockchip_drm_psr_inhibit_get);
 
 static void rockchip_drm_do_flush(struct psr_drv *psr)
 {
-	psr_set_state(psr, PSR_FLUSH);
-	mod_delayed_work(system_wq, &psr->flush_work, PSR_FLUSH_TIMEOUT_MS);
-}
+	cancel_delayed_work_sync(&psr->flush_work);
 
-/**
- * rockchip_drm_psr_flush - flush a single pipe
- * @crtc: CRTC of the pipe to flush
- *
- * Returns:
- * 0 on success, -errno on fail
- */
-int rockchip_drm_psr_flush(struct drm_crtc *crtc)
-{
-	struct psr_drv *psr = find_psr_by_crtc(crtc);
-	if (IS_ERR(psr))
-		return PTR_ERR(psr);
-
-	rockchip_drm_do_flush(psr);
-	return 0;
+	mutex_lock(&psr->lock);
+	if (!psr_set_state_locked(psr, false))
+		mod_delayed_work(system_wq, &psr->flush_work,
+				 PSR_FLUSH_TIMEOUT_MS);
+	mutex_unlock(&psr->lock);
 }
-EXPORT_SYMBOL(rockchip_drm_psr_flush);
 
 /**
  * rockchip_drm_psr_flush_all - force to flush all registered PSR encoders
@@ -225,11 +178,16 @@
  * @encoder: encoder that obtain the PSR function
  * @psr_set: call back to set PSR state
  *
+ * The function returns with PSR inhibit counter initialized with one
+ * and the caller (typically encoder driver) needs to call
+ * rockchip_drm_psr_inhibit_put() when it becomes ready to accept PSR
+ * enable request.
+ *
  * Returns:
  * Zero on success, negative errno on failure.
  */
 int rockchip_drm_psr_register(struct drm_encoder *encoder,
-			void (*psr_set)(struct drm_encoder *, bool enable))
+			int (*psr_set)(struct drm_encoder *, bool enable))
 {
 	struct rockchip_drm_private *drm_drv = encoder->dev->dev_private;
 	struct psr_drv *psr;
@@ -244,8 +202,8 @@
 	INIT_DELAYED_WORK(&psr->flush_work, psr_flush_handler);
 	mutex_init(&psr->lock);
 
-	psr->active = true;
-	psr->state = PSR_DISABLE;
+	psr->inhibit_count = 1;
+	psr->enabled = false;
 	psr->encoder = encoder;
 	psr->set = psr_set;
 
@@ -262,6 +220,11 @@
  * @encoder: encoder that obtain the PSR function
  * @psr_set: call back to set PSR state
  *
+ * It is expected that the PSR inhibit counter is 1 when this function is
+ * called, which corresponds to a state when related encoder has been
+ * disconnected from any CRTCs and its driver called
+ * rockchip_drm_psr_inhibit_get() to stop the PSR logic.
+ *
  * Returns:
  * Zero on success, negative errno on failure.
  */
@@ -273,7 +236,12 @@
 	mutex_lock(&drm_drv->psr_list_lock);
 	list_for_each_entry_safe(psr, n, &drm_drv->psr_list, list) {
 		if (psr->encoder == encoder) {
-			cancel_delayed_work_sync(&psr->flush_work);
+			/*
+			 * Any other value would mean that the encoder
+			 * is still in use.
+			 */
+			WARN_ON(psr->inhibit_count != 1);
+
 			list_del(&psr->list);
 			kfree(psr);
 		}
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_psr.h b/drivers/gpu/drm/rockchip/rockchip_drm_psr.h
index b1ea015..860c624 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_psr.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_psr.h
@@ -16,13 +16,12 @@
 #define __ROCKCHIP_DRM_PSR___
 
 void rockchip_drm_psr_flush_all(struct drm_device *dev);
-int rockchip_drm_psr_flush(struct drm_crtc *crtc);
 
-int rockchip_drm_psr_activate(struct drm_encoder *encoder);
-int rockchip_drm_psr_deactivate(struct drm_encoder *encoder);
+int rockchip_drm_psr_inhibit_put(struct drm_encoder *encoder);
+int rockchip_drm_psr_inhibit_get(struct drm_encoder *encoder);
 
 int rockchip_drm_psr_register(struct drm_encoder *encoder,
-			void (*psr_set)(struct drm_encoder *, bool enable));
+			int (*psr_set)(struct drm_encoder *, bool enable));
 void rockchip_drm_psr_unregister(struct drm_encoder *encoder);
 
 #endif /* __ROCKCHIP_DRM_PSR__ */
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 53d4afe..2121345 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -76,6 +76,9 @@
 #define VOP_WIN_GET_YRGBADDR(vop, win) \
 		vop_readl(vop, win->base + win->phy->yrgb_mst.offset)
 
+#define VOP_WIN_TO_INDEX(vop_win) \
+	((vop_win) - (vop_win)->vop->win)
+
 #define to_vop(x) container_of(x, struct vop, crtc)
 #define to_vop_win(x) container_of(x, struct vop_win, base)
 
@@ -708,6 +711,7 @@
 	dma_addr_t dma_addr;
 	uint32_t val;
 	bool rb_swap;
+	int win_index = VOP_WIN_TO_INDEX(vop_win);
 	int format;
 
 	/*
@@ -777,7 +781,14 @@
 	rb_swap = has_rb_swapped(fb->format->format);
 	VOP_WIN_SET(vop, win, rb_swap, rb_swap);
 
-	if (fb->format->has_alpha) {
+	/*
+	 * Blending win0 with the background color doesn't seem to work
+	 * correctly. We only get the background color, no matter the contents
+	 * of the win0 framebuffer.  However, blending pre-multiplied color
+	 * with the default opaque black default background color is a no-op,
+	 * so we can just disable blending to get the correct result.
+	 */
+	if (fb->format->has_alpha && win_index > 0) {
 		VOP_WIN_SET(vop, win, dst_alpha_ctl,
 			    DST_FACTOR_M0(ALPHA_SRC_INVERSE));
 		val = SRC_ALPHA_EN(1) | SRC_COLOR_M0(ALPHA_SRC_PRE_MUL) |
@@ -925,6 +936,12 @@
 	if (s->output_mode == ROCKCHIP_OUT_MODE_AAAA &&
 	    !(vop_data->feature & VOP_FEATURE_OUTPUT_RGB10))
 		s->output_mode = ROCKCHIP_OUT_MODE_P888;
+
+	if (s->output_mode == ROCKCHIP_OUT_MODE_AAAA && s->output_bpc == 8)
+		VOP_REG_SET(vop, common, pre_dither_down, 1);
+	else
+		VOP_REG_SET(vop, common, pre_dither_down, 0);
+
 	VOP_REG_SET(vop, common, out_mode, s->output_mode);
 
 	VOP_REG_SET(vop, modeset, htotal_pw, (htotal << 16) | hsync_len);
@@ -1017,22 +1034,15 @@
 			continue;
 
 		drm_framebuffer_get(old_plane_state->fb);
+		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
 		drm_flip_work_queue(&vop->fb_unref_work, old_plane_state->fb);
 		set_bit(VOP_PENDING_FB_UNREF, &vop->pending);
-		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
 	}
 }
 
-static void vop_crtc_atomic_begin(struct drm_crtc *crtc,
-				  struct drm_crtc_state *old_crtc_state)
-{
-	rockchip_drm_psr_flush(crtc);
-}
-
 static const struct drm_crtc_helper_funcs vop_crtc_helper_funcs = {
 	.mode_fixup = vop_crtc_mode_fixup,
 	.atomic_flush = vop_crtc_atomic_flush,
-	.atomic_begin = vop_crtc_atomic_begin,
 	.atomic_enable = vop_crtc_atomic_enable,
 	.atomic_disable = vop_crtc_atomic_disable,
 };
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
index 56bbd2e..084acdd 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
@@ -67,6 +67,7 @@
 	struct vop_reg cfg_done;
 	struct vop_reg dsp_blank;
 	struct vop_reg data_blank;
+	struct vop_reg pre_dither_down;
 	struct vop_reg dither_down;
 	struct vop_reg dither_up;
 	struct vop_reg gate_en;
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
index 2e4eea3..08023d3 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
@@ -264,6 +264,7 @@
 	.standby = VOP_REG_SYNC(RK3288_SYS_CTRL, 0x1, 22),
 	.gate_en = VOP_REG(RK3288_SYS_CTRL, 0x1, 23),
 	.mmu_en = VOP_REG(RK3288_SYS_CTRL, 0x1, 20),
+	.pre_dither_down = VOP_REG(RK3288_DSP_CTRL1, 0x1, 1),
 	.dither_down = VOP_REG(RK3288_DSP_CTRL1, 0xf, 1),
 	.dither_up = VOP_REG(RK3288_DSP_CTRL1, 0x1, 6),
 	.data_blank = VOP_REG(RK3288_DSP_CTRL0, 0x1, 19),
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 0d95888..df1578d 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -30,7 +30,7 @@
 #include <drm/spsc_queue.h>
 
 #define CREATE_TRACE_POINTS
-#include <drm/gpu_scheduler_trace.h>
+#include "gpu_scheduler_trace.h"
 
 #define to_drm_sched_job(sched_job)		\
 		container_of((sched_job), struct drm_sched_job, queue_node)
@@ -117,15 +117,15 @@
  * @sched	The pointer to the scheduler
  * @entity	The pointer to a valid drm_sched_entity
  * @rq		The run queue this entity belongs
- * @kernel	If this is an entity for the kernel
- * @jobs	The max number of jobs in the job queue
+ * @guilty      atomic_t set to 1 when a job on this queue
+ *              is found to be guilty causing a timeout
  *
  * return 0 if succeed. negative error code on failure
 */
 int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
 			  struct drm_sched_entity *entity,
 			  struct drm_sched_rq *rq,
-			  uint32_t jobs, atomic_t *guilty)
+			  atomic_t *guilty)
 {
 	if (!(sched && entity && rq))
 		return -EINVAL;
@@ -135,9 +135,10 @@
 	entity->rq = rq;
 	entity->sched = sched;
 	entity->guilty = guilty;
+	entity->fini_status = 0;
+	entity->last_scheduled = NULL;
 
 	spin_lock_init(&entity->rq_lock);
-	spin_lock_init(&entity->queue_lock);
 	spsc_queue_init(&entity->job_queue);
 
 	atomic_set(&entity->fence_seq, 0);
@@ -196,19 +197,30 @@
 	return true;
 }
 
+static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
+				    struct dma_fence_cb *cb)
+{
+	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
+						 finish_cb);
+	drm_sched_fence_finished(job->s_fence);
+	WARN_ON(job->s_fence->parent);
+	dma_fence_put(&job->s_fence->finished);
+	job->sched->ops->free_job(job);
+}
+
+
 /**
  * Destroy a context entity
  *
  * @sched       Pointer to scheduler instance
  * @entity	The pointer to a valid scheduler entity
  *
- * Cleanup and free the allocated resources.
+ * Splitting drm_sched_entity_fini() into two functions, The first one is does the waiting,
+ * removes the entity from the runqueue and returns an error when the process was killed.
  */
-void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
+void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
 			   struct drm_sched_entity *entity)
 {
-	int r;
-
 	if (!drm_sched_entity_is_initialized(sched, entity))
 		return;
 	/**
@@ -216,13 +228,28 @@
 	 * queued IBs or discard them on SIGKILL
 	*/
 	if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL)
-		r = -ERESTARTSYS;
+		entity->fini_status = -ERESTARTSYS;
 	else
-		r = wait_event_killable(sched->job_scheduled,
+		entity->fini_status = wait_event_killable(sched->job_scheduled,
 					drm_sched_entity_is_idle(entity));
 	drm_sched_entity_set_rq(entity, NULL);
-	if (r) {
+}
+EXPORT_SYMBOL(drm_sched_entity_do_release);
+
+/**
+ * Destroy a context entity
+ *
+ * @sched       Pointer to scheduler instance
+ * @entity	The pointer to a valid scheduler entity
+ *
+ * The second one then goes over the entity and signals all jobs with an error code.
+ */
+void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
+			   struct drm_sched_entity *entity)
+{
+	if (entity->fini_status) {
 		struct drm_sched_job *job;
+		int r;
 
 		/* Park the kernel for a moment to make sure it isn't processing
 		 * our enity.
@@ -240,12 +267,25 @@
 			struct drm_sched_fence *s_fence = job->s_fence;
 			drm_sched_fence_scheduled(s_fence);
 			dma_fence_set_error(&s_fence->finished, -ESRCH);
-			drm_sched_fence_finished(s_fence);
-			WARN_ON(s_fence->parent);
-			dma_fence_put(&s_fence->finished);
-			sched->ops->free_job(job);
+			r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb,
+							drm_sched_entity_kill_jobs_cb);
+			if (r == -ENOENT)
+				drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
+			else if (r)
+				DRM_ERROR("fence add callback failed (%d)\n", r);
 		}
 	}
+
+	dma_fence_put(entity->last_scheduled);
+	entity->last_scheduled = NULL;
+}
+EXPORT_SYMBOL(drm_sched_entity_cleanup);
+
+void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
+				struct drm_sched_entity *entity)
+{
+	drm_sched_entity_do_release(sched, entity);
+	drm_sched_entity_cleanup(sched, entity);
 }
 EXPORT_SYMBOL(drm_sched_entity_fini);
 
@@ -360,6 +400,9 @@
 	if (entity->guilty && atomic_read(entity->guilty))
 		dma_fence_set_error(&sched_job->s_fence->finished, -ECANCELED);
 
+	dma_fence_put(entity->last_scheduled);
+	entity->last_scheduled = dma_fence_get(&sched_job->s_fence->finished);
+
 	spsc_queue_pop(&entity->job_queue);
 	return sched_job;
 }
@@ -369,6 +412,10 @@
  *
  * @sched_job		The pointer to job required to submit
  *
+ * Note: To guarantee that the order of insertion to queue matches
+ * the job's fence sequence number this function should be
+ * called with drm_sched_job_init under common lock.
+ *
  * Returns 0 for success, negative error code otherwise.
  */
 void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
@@ -379,11 +426,8 @@
 
 	trace_drm_sched_job(sched_job, entity);
 
-	spin_lock(&entity->queue_lock);
 	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
 
-	spin_unlock(&entity->queue_lock);
-
 	/* first job wakes up scheduler */
 	if (first) {
 		/* Add the entity to the run queue */
@@ -529,6 +573,7 @@
 		spin_unlock(&sched->job_list_lock);
 		fence = sched->ops->run_job(s_job);
 		atomic_inc(&sched->hw_rq_count);
+
 		if (fence) {
 			s_fence->parent = dma_fence_get(fence);
 			r = dma_fence_add_callback(fence, &s_fence->cb,
@@ -548,13 +593,19 @@
 }
 EXPORT_SYMBOL(drm_sched_job_recovery);
 
-/* init a sched_job with basic field */
+/**
+ * Init a sched_job with basic field
+ *
+ * Note: Refer to drm_sched_entity_push_job documentation
+ * for locking considerations.
+ */
 int drm_sched_job_init(struct drm_sched_job *job,
 		       struct drm_gpu_scheduler *sched,
 		       struct drm_sched_entity *entity,
 		       void *owner)
 {
 	job->sched = sched;
+	job->entity = entity;
 	job->s_priority = entity->rq - sched->sched_rq;
 	job->s_fence = drm_sched_fence_create(entity, owner);
 	if (!job->s_fence)
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
new file mode 100644
index 0000000..4998ad9
--- /dev/null
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _GPU_SCHED_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include <drm/drmP.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gpu_scheduler
+#define TRACE_INCLUDE_FILE gpu_scheduler_trace
+
+TRACE_EVENT(drm_sched_job,
+	    TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
+	    TP_ARGS(sched_job, entity),
+	    TP_STRUCT__entry(
+			     __field(struct drm_sched_entity *, entity)
+			     __field(struct dma_fence *, fence)
+			     __field(const char *, name)
+			     __field(uint64_t, id)
+			     __field(u32, job_count)
+			     __field(int, hw_job_count)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->entity = entity;
+			   __entry->id = sched_job->id;
+			   __entry->fence = &sched_job->s_fence->finished;
+			   __entry->name = sched_job->sched->name;
+			   __entry->job_count = spsc_queue_count(&entity->job_queue);
+			   __entry->hw_job_count = atomic_read(
+				   &sched_job->sched->hw_rq_count);
+			   ),
+	    TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
+		      __entry->entity, __entry->id,
+		      __entry->fence, __entry->name,
+		      __entry->job_count, __entry->hw_job_count)
+);
+
+TRACE_EVENT(drm_sched_process_job,
+	    TP_PROTO(struct drm_sched_fence *fence),
+	    TP_ARGS(fence),
+	    TP_STRUCT__entry(
+		    __field(struct dma_fence *, fence)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->fence = &fence->finished;
+		    ),
+	    TP_printk("fence=%p signaled", __entry->fence)
+);
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/scheduler
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
index 69aab086..df44616 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/scheduler/sched_fence.c
@@ -87,7 +87,7 @@
 }
 
 /**
- * amd_sched_fence_free - free up the fence memory
+ * drm_sched_fence_free - free up the fence memory
  *
  * @rcu: RCU callback head
  *
@@ -98,12 +98,11 @@
 	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
 	struct drm_sched_fence *fence = to_drm_sched_fence(f);
 
-	dma_fence_put(fence->parent);
 	kmem_cache_free(sched_fence_slab, fence);
 }
 
 /**
- * amd_sched_fence_release_scheduled - callback that fence can be freed
+ * drm_sched_fence_release_scheduled - callback that fence can be freed
  *
  * @fence: fence
  *
@@ -114,11 +113,12 @@
 {
 	struct drm_sched_fence *fence = to_drm_sched_fence(f);
 
+	dma_fence_put(fence->parent);
 	call_rcu(&fence->finished.rcu, drm_sched_fence_free);
 }
 
 /**
- * amd_sched_fence_release_finished - drop extra reference
+ * drm_sched_fence_release_finished - drop extra reference
  *
  * @f: fence
  *
diff --git a/drivers/gpu/drm/selftests/Makefile b/drivers/gpu/drm/selftests/Makefile
index 4aebfc7..9fc349f 100644
--- a/drivers/gpu/drm/selftests/Makefile
+++ b/drivers/gpu/drm/selftests/Makefile
@@ -1 +1 @@
-obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += test-drm_mm.o
+obj-$(CONFIG_DRM_DEBUG_SELFTEST) += test-drm_mm.o test-drm-helper.o
diff --git a/drivers/gpu/drm/selftests/drm_helper_selftests.h b/drivers/gpu/drm/selftests/drm_helper_selftests.h
new file mode 100644
index 0000000..9771290
--- /dev/null
+++ b/drivers/gpu/drm/selftests/drm_helper_selftests.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* List each unit test as selftest(name, function)
+ *
+ * The name is used as both an enum and expanded as igt__name to create
+ * a module parameter. It must be unique and legal for a C identifier.
+ *
+ * Tests are executed in order by igt/drm_selftests_helper
+ */
+selftest(check_plane_state, igt_check_plane_state)
diff --git a/drivers/gpu/drm/selftests/test-drm-helper.c b/drivers/gpu/drm/selftests/test-drm-helper.c
new file mode 100644
index 0000000..a015712
--- /dev/null
+++ b/drivers/gpu/drm/selftests/test-drm-helper.c
@@ -0,0 +1,247 @@
+/*
+ * Test cases for the drm_kms_helper functions
+ */
+
+#define pr_fmt(fmt) "drm_kms_helper: " fmt
+
+#include <linux/module.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_modes.h>
+
+#define TESTS "drm_helper_selftests.h"
+#include "drm_selftest.h"
+
+#define FAIL(test, msg, ...) \
+	do { \
+		if (test) { \
+			pr_err("%s/%u: " msg, __FUNCTION__, __LINE__, ##__VA_ARGS__); \
+			return -EINVAL; \
+		} \
+	} while (0)
+
+#define FAIL_ON(x) FAIL((x), "%s", "FAIL_ON(" __stringify(x) ")\n")
+
+static void set_src(struct drm_plane_state *plane_state,
+		    unsigned src_x, unsigned src_y,
+		    unsigned src_w, unsigned src_h)
+{
+	plane_state->src_x = src_x;
+	plane_state->src_y = src_y;
+	plane_state->src_w = src_w;
+	plane_state->src_h = src_h;
+}
+
+static bool check_src_eq(struct drm_plane_state *plane_state,
+			 unsigned src_x, unsigned src_y,
+			 unsigned src_w, unsigned src_h)
+{
+	if (plane_state->src.x1 < 0) {
+		pr_err("src x coordinate %x should never be below 0.\n", plane_state->src.x1);
+		drm_rect_debug_print("src: ", &plane_state->src, true);
+		return false;
+	}
+	if (plane_state->src.y1 < 0) {
+		pr_err("src y coordinate %x should never be below 0.\n", plane_state->src.y1);
+		drm_rect_debug_print("src: ", &plane_state->src, true);
+		return false;
+	}
+
+	if (plane_state->src.x1 != src_x ||
+	    plane_state->src.y1 != src_y ||
+	    drm_rect_width(&plane_state->src) != src_w ||
+	    drm_rect_height(&plane_state->src) != src_h) {
+		drm_rect_debug_print("src: ", &plane_state->src, true);
+		return false;
+	}
+
+	return true;
+}
+
+static void set_crtc(struct drm_plane_state *plane_state,
+		     int crtc_x, int crtc_y,
+		     unsigned crtc_w, unsigned crtc_h)
+{
+	plane_state->crtc_x = crtc_x;
+	plane_state->crtc_y = crtc_y;
+	plane_state->crtc_w = crtc_w;
+	plane_state->crtc_h = crtc_h;
+}
+
+static bool check_crtc_eq(struct drm_plane_state *plane_state,
+			  int crtc_x, int crtc_y,
+			  unsigned crtc_w, unsigned crtc_h)
+{
+	if (plane_state->dst.x1 != crtc_x ||
+	    plane_state->dst.y1 != crtc_y ||
+	    drm_rect_width(&plane_state->dst) != crtc_w ||
+	    drm_rect_height(&plane_state->dst) != crtc_h) {
+		drm_rect_debug_print("dst: ", &plane_state->dst, false);
+
+		return false;
+	}
+
+	return true;
+}
+
+static int igt_check_plane_state(void *ignored)
+{
+	int ret;
+
+	const struct drm_crtc_state crtc_state = {
+		.crtc = ZERO_SIZE_PTR,
+		.enable = true,
+		.active = true,
+		.mode = {
+			DRM_MODE("1024x768", 0, 65000, 1024, 1048,
+				1184, 1344, 0, 768, 771, 777, 806, 0,
+				DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC)
+		},
+	};
+	struct drm_framebuffer fb = {
+		.width = 2048,
+		.height = 2048
+	};
+	struct drm_plane_state plane_state = {
+		.crtc = ZERO_SIZE_PTR,
+		.fb = &fb,
+		.rotation = DRM_MODE_ROTATE_0
+	};
+
+	/* Simple clipping, no scaling. */
+	set_src(&plane_state, 0, 0, fb.width << 16, fb.height << 16);
+	set_crtc(&plane_state, 0, 0, fb.width, fb.height);
+	ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  false, false);
+	FAIL(ret < 0, "Simple clipping check should pass\n");
+	FAIL_ON(!plane_state.visible);
+	FAIL_ON(!check_src_eq(&plane_state, 0, 0, 1024 << 16, 768 << 16));
+	FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768));
+
+	/* Rotated clipping + reflection, no scaling. */
+	plane_state.rotation = DRM_MODE_ROTATE_90 | DRM_MODE_REFLECT_X;
+	ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  false, false);
+	FAIL(ret < 0, "Rotated clipping check should pass\n");
+	FAIL_ON(!plane_state.visible);
+	FAIL_ON(!check_src_eq(&plane_state, 0, 0, 768 << 16, 1024 << 16));
+	FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768));
+	plane_state.rotation = DRM_MODE_ROTATE_0;
+
+	/* Check whether positioning works correctly. */
+	set_src(&plane_state, 0, 0, 1023 << 16, 767 << 16);
+	set_crtc(&plane_state, 0, 0, 1023, 767);
+	ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  false, false);
+	FAIL(!ret, "Should not be able to position on the crtc with can_position=false\n");
+
+	ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  true, false);
+	FAIL(ret < 0, "Simple positioning should work\n");
+	FAIL_ON(!plane_state.visible);
+	FAIL_ON(!check_src_eq(&plane_state, 0, 0, 1023 << 16, 767 << 16));
+	FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1023, 767));
+
+	/* Simple scaling tests. */
+	set_src(&plane_state, 0, 0, 512 << 16, 384 << 16);
+	set_crtc(&plane_state, 0, 0, 1024, 768);
+	ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+						  0x8001,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  false, false);
+	FAIL(!ret, "Upscaling out of range should fail.\n");
+	ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+						  0x8000,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  false, false);
+	FAIL(ret < 0, "Upscaling exactly 2x should work\n");
+	FAIL_ON(!plane_state.visible);
+	FAIL_ON(!check_src_eq(&plane_state, 0, 0, 512 << 16, 384 << 16));
+	FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768));
+
+	set_src(&plane_state, 0, 0, 2048 << 16, 1536 << 16);
+	ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  0x1ffff, false, false);
+	FAIL(!ret, "Downscaling out of range should fail.\n");
+	ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  0x20000, false, false);
+	FAIL(ret < 0, "Should succeed with exact scaling limit\n");
+	FAIL_ON(!plane_state.visible);
+	FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2048 << 16, 1536 << 16));
+	FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768));
+
+	/* Testing rounding errors. */
+	set_src(&plane_state, 0, 0, 0x40001, 0x40001);
+	set_crtc(&plane_state, 1022, 766, 4, 4);
+	ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  0x10001,
+						  true, false);
+	FAIL(ret < 0, "Should succeed by clipping to exact multiple");
+	FAIL_ON(!plane_state.visible);
+	FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2 << 16, 2 << 16));
+	FAIL_ON(!check_crtc_eq(&plane_state, 1022, 766, 2, 2));
+
+	set_src(&plane_state, 0x20001, 0x20001, 0x4040001, 0x3040001);
+	set_crtc(&plane_state, -2, -2, 1028, 772);
+	ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  0x10001,
+						  false, false);
+	FAIL(ret < 0, "Should succeed by clipping to exact multiple");
+	FAIL_ON(!plane_state.visible);
+	FAIL_ON(!check_src_eq(&plane_state, 0x40002, 0x40002, 1024 << 16, 768 << 16));
+	FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768));
+
+	set_src(&plane_state, 0, 0, 0x3ffff, 0x3ffff);
+	set_crtc(&plane_state, 1022, 766, 4, 4);
+	ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+						  0xffff,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  true, false);
+	FAIL(ret < 0, "Should succeed by clipping to exact multiple");
+	FAIL_ON(!plane_state.visible);
+	/* Should not be rounded to 0x20001, which would be upscaling. */
+	FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2 << 16, 2 << 16));
+	FAIL_ON(!check_crtc_eq(&plane_state, 1022, 766, 2, 2));
+
+	set_src(&plane_state, 0x1ffff, 0x1ffff, 0x403ffff, 0x303ffff);
+	set_crtc(&plane_state, -2, -2, 1028, 772);
+	ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+						  0xffff,
+						  DRM_PLANE_HELPER_NO_SCALING,
+						  false, false);
+	FAIL(ret < 0, "Should succeed by clipping to exact multiple");
+	FAIL_ON(!plane_state.visible);
+	FAIL_ON(!check_src_eq(&plane_state, 0x3fffe, 0x3fffe, 1024 << 16, 768 << 16));
+	FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768));
+
+	return 0;
+}
+
+#include "drm_selftest.c"
+
+static int __init test_drm_helper_init(void)
+{
+	int err;
+
+	err = run_selftests(selftests, ARRAY_SIZE(selftests), NULL);
+
+	return err > 0 ? 0 : err;
+}
+
+module_init(test_drm_helper_init);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/sti/Kconfig b/drivers/gpu/drm/sti/Kconfig
index cca4b3c..1963cc1 100644
--- a/drivers/gpu/drm/sti/Kconfig
+++ b/drivers/gpu/drm/sti/Kconfig
@@ -1,6 +1,6 @@
 config DRM_STI
 	tristate "DRM Support for STMicroelectronics SoC stiH4xx Series"
-	depends on DRM && (ARCH_STI || ARCH_MULTIPLATFORM)
+	depends on OF && DRM && (ARCH_STI || ARCH_MULTIPLATFORM)
 	select RESET_CONTROLLER
 	select DRM_KMS_HELPER
 	select DRM_GEM_CMA_HELPER
@@ -8,6 +8,5 @@
 	select DRM_PANEL
 	select FW_LOADER
 	select SND_SOC_HDMI_CODEC if SND_SOC
-	select OF
 	help
 	  Choose this option to enable DRM on STM stiH4xx chipset
diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c
index 21e50d7..5824e6a 100644
--- a/drivers/gpu/drm/sti/sti_crtc.c
+++ b/drivers/gpu/drm/sti/sti_crtc.c
@@ -357,7 +357,7 @@
 	res = drm_crtc_init_with_planes(drm_dev, crtc, primary, cursor,
 					&sti_crtc_funcs, NULL);
 	if (res) {
-		DRM_ERROR("Can't initialze CRTC\n");
+		DRM_ERROR("Can't initialize CRTC\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c
index 55b6967..90c46b4 100644
--- a/drivers/gpu/drm/sti/sti_drv.c
+++ b/drivers/gpu/drm/sti/sti_drv.c
@@ -119,30 +119,10 @@
 	return ret;
 }
 
-static int sti_atomic_check(struct drm_device *dev,
-			    struct drm_atomic_state *state)
-{
-	int ret;
-
-	ret = drm_atomic_helper_check_modeset(dev, state);
-	if (ret)
-		return ret;
-
-	ret = drm_atomic_normalize_zpos(dev, state);
-	if (ret)
-		return ret;
-
-	ret = drm_atomic_helper_check_planes(dev, state);
-	if (ret)
-		return ret;
-
-	return ret;
-}
-
 static const struct drm_mode_config_funcs sti_mode_config_funcs = {
 	.fb_create = drm_gem_fb_create,
 	.output_poll_changed = drm_fb_helper_output_poll_changed,
-	.atomic_check = sti_atomic_check,
+	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
 
@@ -160,6 +140,8 @@
 	dev->mode_config.max_height = STI_MAX_FB_HEIGHT;
 
 	dev->mode_config.funcs = &sti_mode_config_funcs;
+
+	dev->mode_config.normalize_zpos = true;
 }
 
 DEFINE_DRM_GEM_CMA_FOPS(sti_driver_fops);
diff --git a/drivers/gpu/drm/sti/sti_plane.c b/drivers/gpu/drm/sti/sti_plane.c
index b074609..b48cd86 100644
--- a/drivers/gpu/drm/sti/sti_plane.c
+++ b/drivers/gpu/drm/sti/sti_plane.c
@@ -40,6 +40,7 @@
 			  bool new_frame,
 			  bool new_field)
 {
+	struct drm_plane_state *state = plane->drm_plane.state;
 	ktime_t now;
 	struct sti_fps_info *fps;
 	int fpks, fipks, ms_since_last, num_frames, num_fields;
@@ -66,14 +67,14 @@
 	fps->last_timestamp = now;
 	fps->last_frame_counter = fps->curr_frame_counter;
 
-	if (plane->drm_plane.fb) {
+	if (state->fb) {
 		fpks = (num_frames * 1000000) / ms_since_last;
 		snprintf(plane->fps_info.fps_str, FPS_LENGTH,
 			 "%-8s %4dx%-4d %.4s @ %3d.%-3.3d fps (%s)",
 			 plane->drm_plane.name,
-			 plane->drm_plane.fb->width,
-			 plane->drm_plane.fb->height,
-			 (char *)&plane->drm_plane.fb->format->format,
+			 state->fb->width,
+			 state->fb->height,
+			 (char *)&state->fb->format->format,
 			 fpks / 1000, fpks % 1000,
 			 sti_plane_to_str(plane));
 	}
diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c
index 9ab00a8..8698e08 100644
--- a/drivers/gpu/drm/stm/drv.c
+++ b/drivers/gpu/drm/stm/drv.c
@@ -72,8 +72,6 @@
 	.gem_prime_vmap = drm_gem_cma_prime_vmap,
 	.gem_prime_vunmap = drm_gem_cma_prime_vunmap,
 	.gem_prime_mmap = drm_gem_cma_prime_mmap,
-	.enable_vblank = ltdc_crtc_enable_vblank,
-	.disable_vblank = ltdc_crtc_disable_vblank,
 };
 
 static int drv_load(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index 1a3277e..d997a60 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -392,9 +392,6 @@
 	u32 val;
 	int i;
 
-	if (!crtc || !crtc->state)
-		return;
-
 	if (!crtc->state->color_mgmt_changed || !crtc->state->gamma_lut)
 		return;
 
@@ -448,6 +445,43 @@
 	reg_set(ldev->regs, LTDC_SRCR, SRCR_IMR);
 }
 
+#define CLK_TOLERANCE_HZ 50
+
+static enum drm_mode_status
+ltdc_crtc_mode_valid(struct drm_crtc *crtc,
+		     const struct drm_display_mode *mode)
+{
+	struct ltdc_device *ldev = crtc_to_ltdc(crtc);
+	int target = mode->clock * 1000;
+	int target_min = target - CLK_TOLERANCE_HZ;
+	int target_max = target + CLK_TOLERANCE_HZ;
+	int result;
+
+	/*
+	 * Accept all "preferred" modes:
+	 * - this is important for panels because panel clock tolerances are
+	 *   bigger than hdmi ones and there is no reason to not accept them
+	 *   (the fps may vary a little but it is not a problem).
+	 * - the hdmi preferred mode will be accepted too, but userland will
+	 *   be able to use others hdmi "valid" modes if necessary.
+	 */
+	if (mode->type & DRM_MODE_TYPE_PREFERRED)
+		return MODE_OK;
+
+	result = clk_round_rate(ldev->pixel_clk, target);
+
+	DRM_DEBUG_DRIVER("clk rate target %d, available %d\n", target, result);
+
+	/*
+	 * Filter modes according to the clock value, particularly useful for
+	 * hdmi modes that require precise pixel clocks.
+	 */
+	if (result < target_min || result > target_max)
+		return MODE_CLOCK_RANGE;
+
+	return MODE_OK;
+}
+
 static bool ltdc_crtc_mode_fixup(struct drm_crtc *crtc,
 				 const struct drm_display_mode *mode,
 				 struct drm_display_mode *adjusted_mode)
@@ -562,6 +596,7 @@
 }
 
 static const struct drm_crtc_helper_funcs ltdc_crtc_helper_funcs = {
+	.mode_valid = ltdc_crtc_mode_valid,
 	.mode_fixup = ltdc_crtc_mode_fixup,
 	.mode_set_nofb = ltdc_crtc_mode_set_nofb,
 	.atomic_flush = ltdc_crtc_atomic_flush,
@@ -569,9 +604,9 @@
 	.atomic_disable = ltdc_crtc_atomic_disable,
 };
 
-int ltdc_crtc_enable_vblank(struct drm_device *ddev, unsigned int pipe)
+static int ltdc_crtc_enable_vblank(struct drm_crtc *crtc)
 {
-	struct ltdc_device *ldev = ddev->dev_private;
+	struct ltdc_device *ldev = crtc_to_ltdc(crtc);
 
 	DRM_DEBUG_DRIVER("\n");
 	reg_set(ldev->regs, LTDC_IER, IER_LIE);
@@ -579,9 +614,9 @@
 	return 0;
 }
 
-void ltdc_crtc_disable_vblank(struct drm_device *ddev, unsigned int pipe)
+static void ltdc_crtc_disable_vblank(struct drm_crtc *crtc)
 {
-	struct ltdc_device *ldev = ddev->dev_private;
+	struct ltdc_device *ldev = crtc_to_ltdc(crtc);
 
 	DRM_DEBUG_DRIVER("\n");
 	reg_clear(ldev->regs, LTDC_IER, IER_LIE);
@@ -594,6 +629,8 @@
 	.reset = drm_atomic_helper_crtc_reset,
 	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+	.enable_vblank = ltdc_crtc_enable_vblank,
+	.disable_vblank = ltdc_crtc_disable_vblank,
 	.gamma_set = drm_atomic_helper_legacy_gamma_set,
 };
 
@@ -727,6 +764,8 @@
 	reg_update_bits(ldev->regs, LTDC_L1CR + lofs,
 			LXCR_LEN | LXCR_CLUTEN, val);
 
+	ldev->plane_fpsi[plane->index].counter++;
+
 	mutex_lock(&ldev->err_lock);
 	if (ldev->error_status & ISR_FUIF) {
 		DRM_DEBUG_DRIVER("Fifo underrun\n");
@@ -752,6 +791,25 @@
 			 oldstate->crtc->base.id, plane->base.id);
 }
 
+static void ltdc_plane_atomic_print_state(struct drm_printer *p,
+					  const struct drm_plane_state *state)
+{
+	struct drm_plane *plane = state->plane;
+	struct ltdc_device *ldev = plane_to_ltdc(plane);
+	struct fps_info *fpsi = &ldev->plane_fpsi[plane->index];
+	int ms_since_last;
+	ktime_t now;
+
+	now = ktime_get();
+	ms_since_last = ktime_to_ms(ktime_sub(now, fpsi->last_timestamp));
+
+	drm_printf(p, "\tuser_updates=%dfps\n",
+		   DIV_ROUND_CLOSEST(fpsi->counter * 1000, ms_since_last));
+
+	fpsi->last_timestamp = now;
+	fpsi->counter = 0;
+}
+
 static const struct drm_plane_funcs ltdc_plane_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
@@ -759,6 +817,7 @@
 	.reset = drm_atomic_helper_plane_reset,
 	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+	.atomic_print_state = ltdc_plane_atomic_print_state,
 };
 
 static const struct drm_plane_helper_funcs ltdc_plane_helper_funcs = {
@@ -801,13 +860,13 @@
 
 	plane = devm_kzalloc(dev, sizeof(*plane), GFP_KERNEL);
 	if (!plane)
-		return 0;
+		return NULL;
 
 	ret = drm_universal_plane_init(ddev, plane, possible_crtcs,
 				       &ltdc_plane_funcs, formats, nb_fmt,
 				       NULL, type, NULL);
 	if (ret < 0)
-		return 0;
+		return NULL;
 
 	drm_plane_helper_add(plane, &ltdc_plane_helper_funcs);
 
@@ -966,14 +1025,13 @@
 						  &bridge[i]);
 
 		/*
-		 * If at least one endpoint is ready, continue probing,
-		 * else if at least one endpoint is -EPROBE_DEFER and
-		 * there is no previous ready endpoints, defer probing.
+		 * If at least one endpoint is -EPROBE_DEFER, defer probing,
+		 * else if at least one endpoint is ready, continue probing.
 		 */
-		if (!ret)
+		if (ret == -EPROBE_DEFER)
+			return ret;
+		else if (!ret)
 			endpoint_not_ready = 0;
-		else if (ret == -EPROBE_DEFER && endpoint_not_ready)
-			endpoint_not_ready = -EPROBE_DEFER;
 	}
 
 	if (endpoint_not_ready)
diff --git a/drivers/gpu/drm/stm/ltdc.h b/drivers/gpu/drm/stm/ltdc.h
index edb2681..1e16d6a 100644
--- a/drivers/gpu/drm/stm/ltdc.h
+++ b/drivers/gpu/drm/stm/ltdc.h
@@ -20,6 +20,13 @@
 	bool non_alpha_only_l1; /* non-native no-alpha formats on layer 1 */
 };
 
+#define LTDC_MAX_LAYER	4
+
+struct fps_info {
+	unsigned int counter;
+	ktime_t last_timestamp;
+};
+
 struct ltdc_device {
 	void __iomem *regs;
 	struct clk *pixel_clk;	/* lcd pixel clock */
@@ -27,10 +34,9 @@
 	struct ltdc_caps caps;
 	u32 error_status;
 	u32 irq_status;
+	struct fps_info plane_fpsi[LTDC_MAX_LAYER];
 };
 
-int ltdc_crtc_enable_vblank(struct drm_device *dev, unsigned int pipe);
-void ltdc_crtc_disable_vblank(struct drm_device *dev, unsigned int pipe);
 int ltdc_load(struct drm_device *ddev);
 void ltdc_unload(struct drm_device *ddev);
 
diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
index eee6bc0..156a865 100644
--- a/drivers/gpu/drm/sun4i/Kconfig
+++ b/drivers/gpu/drm/sun4i/Kconfig
@@ -40,6 +40,16 @@
 	  do some alpha blending and feed graphics to TCON. If M is
 	  selected the module will be called sun4i-backend.
 
+config DRM_SUN6I_DSI
+	tristate "Allwinner A31 MIPI-DSI Controller Support"
+	default MACH_SUN8I
+	select CRC_CCITT
+	select DRM_MIPI_DSI
+	help
+	  Choose this option if you want have an Allwinner SoC with
+	  MIPI-DSI support. If M is selected the module will be called
+	  sun6i-dsi
+
 config DRM_SUN8I_DW_HDMI
 	tristate "Support for Allwinner version of DesignWare HDMI"
 	depends on DRM_SUN4I
diff --git a/drivers/gpu/drm/sun4i/Makefile b/drivers/gpu/drm/sun4i/Makefile
index 330843c..2589f4a 100644
--- a/drivers/gpu/drm/sun4i/Makefile
+++ b/drivers/gpu/drm/sun4i/Makefile
@@ -24,6 +24,9 @@
 sun4i-tcon-y			+= sun4i_tcon.o
 sun4i-tcon-y			+= sun4i_rgb.o
 
+sun6i-dsi-y			+= sun6i_mipi_dphy.o
+sun6i-dsi-y			+= sun6i_mipi_dsi.o
+
 obj-$(CONFIG_DRM_SUN4I)		+= sun4i-drm.o
 obj-$(CONFIG_DRM_SUN4I)		+= sun4i-tcon.o
 obj-$(CONFIG_DRM_SUN4I)		+= sun4i_tv.o
@@ -31,5 +34,6 @@
 
 obj-$(CONFIG_DRM_SUN4I_BACKEND)	+= sun4i-backend.o sun4i-frontend.o
 obj-$(CONFIG_DRM_SUN4I_HDMI)	+= sun4i-drm-hdmi.o
+obj-$(CONFIG_DRM_SUN6I_DSI)	+= sun6i-dsi.o
 obj-$(CONFIG_DRM_SUN8I_DW_HDMI)	+= sun8i-drm-hdmi.o
 obj-$(CONFIG_DRM_SUN8I_MIXER)	+= sun8i-mixer.o
diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c
index 9bad54f..de0a76d 100644
--- a/drivers/gpu/drm/sun4i/sun4i_backend.c
+++ b/drivers/gpu/drm/sun4i/sun4i_backend.c
@@ -295,6 +295,15 @@
 	DRM_DEBUG_DRIVER("Switching display backend interlaced mode %s\n",
 			 interlaced ? "on" : "off");
 
+	val = SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA(state->alpha >> 8);
+	if (state->alpha != DRM_BLEND_ALPHA_OPAQUE)
+		val |= SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_EN;
+	regmap_update_bits(backend->engine.regs,
+			   SUN4I_BACKEND_ATTCTL_REG0(layer),
+			   SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_MASK |
+			   SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_EN,
+			   val);
+
 	if (sun4i_backend_format_is_yuv(fb->format->format))
 		return sun4i_backend_update_yuv_format(backend, layer, plane);
 
@@ -490,7 +499,7 @@
 		DRM_DEBUG_DRIVER("Plane FB format is %s\n",
 				 drm_get_format_name(fb->format->format,
 						     &format_name));
-		if (fb->format->has_alpha)
+		if (fb->format->has_alpha || (plane_state->alpha != DRM_BLEND_ALPHA_OPAQUE))
 			num_alpha_planes++;
 
 		if (sun4i_backend_format_is_yuv(fb->format->format)) {
@@ -548,7 +557,8 @@
 	}
 
 	/* We can't have an alpha plane at the lowest position */
-	if (plane_states[0]->fb->format->has_alpha)
+	if (plane_states[0]->fb->format->has_alpha ||
+	    (plane_states[0]->alpha != DRM_BLEND_ALPHA_OPAQUE))
 		return -EINVAL;
 
 	for (i = 1; i < num_planes; i++) {
@@ -560,7 +570,7 @@
 		 * The only alpha position is the lowest plane of the
 		 * second pipe.
 		 */
-		if (fb->format->has_alpha)
+		if (fb->format->has_alpha || (p_state->alpha != DRM_BLEND_ALPHA_OPAQUE))
 			current_pipe++;
 
 		s_state->pipe = current_pipe;
diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.h b/drivers/gpu/drm/sun4i/sun4i_backend.h
index 316f217..4caee03 100644
--- a/drivers/gpu/drm/sun4i/sun4i_backend.h
+++ b/drivers/gpu/drm/sun4i/sun4i_backend.h
@@ -68,12 +68,15 @@
 #define SUN4I_BACKEND_CKMIN_REG			0x884
 #define SUN4I_BACKEND_CKCFG_REG			0x888
 #define SUN4I_BACKEND_ATTCTL_REG0(l)		(0x890 + (0x4 * (l)))
+#define SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_MASK	GENMASK(31, 24)
+#define SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA(x)		((x) << 24)
 #define SUN4I_BACKEND_ATTCTL_REG0_LAY_PIPESEL_MASK	BIT(15)
 #define SUN4I_BACKEND_ATTCTL_REG0_LAY_PIPESEL(x)		((x) << 15)
 #define SUN4I_BACKEND_ATTCTL_REG0_LAY_PRISEL_MASK	GENMASK(11, 10)
 #define SUN4I_BACKEND_ATTCTL_REG0_LAY_PRISEL(x)			((x) << 10)
 #define SUN4I_BACKEND_ATTCTL_REG0_LAY_YUVEN		BIT(2)
 #define SUN4I_BACKEND_ATTCTL_REG0_LAY_VDOEN		BIT(1)
+#define SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_EN	BIT(0)
 
 #define SUN4I_BACKEND_ATTCTL_REG1(l)		(0x8a0 + (0x4 * (l)))
 #define SUN4I_BACKEND_ATTCTL_REG1_LAY_HSCAFCT		GENMASK(15, 14)
diff --git a/drivers/gpu/drm/sun4i/sun4i_layer.c b/drivers/gpu/drm/sun4i/sun4i_layer.c
index 2949a3c..750ad24 100644
--- a/drivers/gpu/drm/sun4i/sun4i_layer.c
+++ b/drivers/gpu/drm/sun4i/sun4i_layer.c
@@ -37,6 +37,7 @@
 	if (state) {
 		plane->state = &state->state;
 		plane->state->plane = plane;
+		plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE;
 		plane->state->zpos = layer->id;
 	}
 }
@@ -167,6 +168,7 @@
 			     &sun4i_backend_layer_helper_funcs);
 	layer->backend = backend;
 
+	drm_plane_create_alpha_property(&layer->plane);
 	drm_plane_create_zpos_property(&layer->plane, 0, 0,
 				       SUN4I_BACKEND_NUM_LAYERS - 1);
 
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index c3d92d5..08747fc 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -35,6 +35,7 @@
 #include "sun4i_lvds.h"
 #include "sun4i_rgb.h"
 #include "sun4i_tcon.h"
+#include "sun6i_mipi_dsi.h"
 #include "sunxi_engine.h"
 
 static struct drm_connector *sun4i_tcon_get_connector(const struct drm_encoder *encoder)
@@ -169,6 +170,7 @@
 	case DRM_MODE_ENCODER_LVDS:
 		is_lvds = true;
 		/* Fallthrough */
+	case DRM_MODE_ENCODER_DSI:
 	case DRM_MODE_ENCODER_NONE:
 		channel = 0;
 		break;
@@ -201,7 +203,8 @@
 	DRM_DEBUG_DRIVER("%sabling VBLANK interrupt\n", enable ? "En" : "Dis");
 
 	mask = SUN4I_TCON_GINT0_VBLANK_ENABLE(0) |
-	       SUN4I_TCON_GINT0_VBLANK_ENABLE(1);
+		SUN4I_TCON_GINT0_VBLANK_ENABLE(1) |
+		SUN4I_TCON_GINT0_TCON0_TRI_FINISH_ENABLE;
 
 	if (enable)
 		val = mask;
@@ -273,6 +276,71 @@
 		     SUN4I_TCON0_BASIC0_Y(mode->crtc_vdisplay));
 }
 
+static void sun4i_tcon0_mode_set_cpu(struct sun4i_tcon *tcon,
+				     struct mipi_dsi_device *device,
+				     const struct drm_display_mode *mode)
+{
+	u8 bpp = mipi_dsi_pixel_format_to_bpp(device->format);
+	u8 lanes = device->lanes;
+	u32 block_space, start_delay;
+	u32 tcon_div;
+
+	tcon->dclk_min_div = 4;
+	tcon->dclk_max_div = 127;
+
+	sun4i_tcon0_mode_set_common(tcon, mode);
+
+	regmap_update_bits(tcon->regs, SUN4I_TCON0_CTL_REG,
+			   SUN4I_TCON0_CTL_IF_MASK,
+			   SUN4I_TCON0_CTL_IF_8080);
+
+	regmap_write(tcon->regs, SUN4I_TCON_ECC_FIFO_REG,
+		     SUN4I_TCON_ECC_FIFO_EN);
+
+	regmap_write(tcon->regs, SUN4I_TCON0_CPU_IF_REG,
+		     SUN4I_TCON0_CPU_IF_MODE_DSI |
+		     SUN4I_TCON0_CPU_IF_TRI_FIFO_FLUSH |
+		     SUN4I_TCON0_CPU_IF_TRI_FIFO_EN |
+		     SUN4I_TCON0_CPU_IF_TRI_EN);
+
+	/*
+	 * This looks suspicious, but it works...
+	 *
+	 * The datasheet says that this should be set higher than 20 *
+	 * pixel cycle, but it's not clear what a pixel cycle is.
+	 */
+	regmap_read(tcon->regs, SUN4I_TCON0_DCLK_REG, &tcon_div);
+	tcon_div &= GENMASK(6, 0);
+	block_space = mode->htotal * bpp / (tcon_div * lanes);
+	block_space -= mode->hdisplay + 40;
+
+	regmap_write(tcon->regs, SUN4I_TCON0_CPU_TRI0_REG,
+		     SUN4I_TCON0_CPU_TRI0_BLOCK_SPACE(block_space) |
+		     SUN4I_TCON0_CPU_TRI0_BLOCK_SIZE(mode->hdisplay));
+
+	regmap_write(tcon->regs, SUN4I_TCON0_CPU_TRI1_REG,
+		     SUN4I_TCON0_CPU_TRI1_BLOCK_NUM(mode->vdisplay));
+
+	start_delay = (mode->crtc_vtotal - mode->crtc_vdisplay - 10 - 1);
+	start_delay = start_delay * mode->crtc_htotal * 149;
+	start_delay = start_delay / (mode->crtc_clock / 1000) / 8;
+	regmap_write(tcon->regs, SUN4I_TCON0_CPU_TRI2_REG,
+		     SUN4I_TCON0_CPU_TRI2_TRANS_START_SET(10) |
+		     SUN4I_TCON0_CPU_TRI2_START_DELAY(start_delay));
+
+	/*
+	 * The Allwinner BSP has a comment that the period should be
+	 * the display clock * 15, but uses an hardcoded 3000...
+	 */
+	regmap_write(tcon->regs, SUN4I_TCON_SAFE_PERIOD_REG,
+		     SUN4I_TCON_SAFE_PERIOD_NUM(3000) |
+		     SUN4I_TCON_SAFE_PERIOD_MODE(3));
+
+	/* Enable the output on the pins */
+	regmap_write(tcon->regs, SUN4I_TCON0_IO_TRI_REG,
+		     0xe0000000);
+}
+
 static void sun4i_tcon0_mode_set_lvds(struct sun4i_tcon *tcon,
 				      const struct drm_encoder *encoder,
 				      const struct drm_display_mode *mode)
@@ -538,7 +606,17 @@
 			 const struct drm_encoder *encoder,
 			 const struct drm_display_mode *mode)
 {
+	struct sun6i_dsi *dsi;
+
 	switch (encoder->encoder_type) {
+	case DRM_MODE_ENCODER_DSI:
+		/*
+		 * This is not really elegant, but it's the "cleaner"
+		 * way I could think of...
+		 */
+		dsi = encoder_to_sun6i_dsi(encoder);
+		sun4i_tcon0_mode_set_cpu(tcon, dsi->device, mode);
+		break;
 	case DRM_MODE_ENCODER_LVDS:
 		sun4i_tcon0_mode_set_lvds(tcon, encoder, mode);
 		break;
@@ -582,7 +660,8 @@
 	regmap_read(tcon->regs, SUN4I_TCON_GINT0_REG, &status);
 
 	if (!(status & (SUN4I_TCON_GINT0_VBLANK_INT(0) |
-			SUN4I_TCON_GINT0_VBLANK_INT(1))))
+			SUN4I_TCON_GINT0_VBLANK_INT(1) |
+			SUN4I_TCON_GINT0_TCON0_TRI_FINISH_INT)))
 		return IRQ_NONE;
 
 	drm_crtc_handle_vblank(&scrtc->crtc);
@@ -591,7 +670,8 @@
 	/* Acknowledge the interrupt */
 	regmap_update_bits(tcon->regs, SUN4I_TCON_GINT0_REG,
 			   SUN4I_TCON_GINT0_VBLANK_INT(0) |
-			   SUN4I_TCON_GINT0_VBLANK_INT(1),
+			   SUN4I_TCON_GINT0_VBLANK_INT(1) |
+			   SUN4I_TCON_GINT0_TCON0_TRI_FINISH_INT,
 			   0);
 
 	if (engine->ops->vblank_quirk)
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.h b/drivers/gpu/drm/sun4i/sun4i_tcon.h
index 161e094..f6a071c 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.h
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.h
@@ -28,13 +28,32 @@
 
 #define SUN4I_TCON_GINT0_REG			0x4
 #define SUN4I_TCON_GINT0_VBLANK_ENABLE(pipe)		BIT(31 - (pipe))
+#define SUN4I_TCON_GINT0_TCON0_TRI_FINISH_ENABLE	BIT(27)
+#define SUN4I_TCON_GINT0_TCON0_TRI_COUNTER_ENABLE	BIT(26)
 #define SUN4I_TCON_GINT0_VBLANK_INT(pipe)		BIT(15 - (pipe))
+#define SUN4I_TCON_GINT0_TCON0_TRI_FINISH_INT		BIT(11)
+#define SUN4I_TCON_GINT0_TCON0_TRI_COUNTER_INT		BIT(10)
 
 #define SUN4I_TCON_GINT1_REG			0x8
+
 #define SUN4I_TCON_FRM_CTL_REG			0x10
+#define SUN4I_TCON_FRM_CTL_EN				BIT(31)
+
+#define SUN4I_TCON_FRM_SEED_PR_REG		0x14
+#define SUN4I_TCON_FRM_SEED_PG_REG		0x18
+#define SUN4I_TCON_FRM_SEED_PB_REG		0x1c
+#define SUN4I_TCON_FRM_SEED_LR_REG		0x20
+#define SUN4I_TCON_FRM_SEED_LG_REG		0x24
+#define SUN4I_TCON_FRM_SEED_LB_REG		0x28
+#define SUN4I_TCON_FRM_TBL0_REG			0x2c
+#define SUN4I_TCON_FRM_TBL1_REG			0x30
+#define SUN4I_TCON_FRM_TBL2_REG			0x34
+#define SUN4I_TCON_FRM_TBL3_REG			0x38
 
 #define SUN4I_TCON0_CTL_REG			0x40
 #define SUN4I_TCON0_CTL_TCON_ENABLE			BIT(31)
+#define SUN4I_TCON0_CTL_IF_MASK				GENMASK(25, 24)
+#define SUN4I_TCON0_CTL_IF_8080				(1 << 24)
 #define SUN4I_TCON0_CTL_CLK_DELAY_MASK			GENMASK(8, 4)
 #define SUN4I_TCON0_CTL_CLK_DELAY(delay)		((delay << 4) & SUN4I_TCON0_CTL_CLK_DELAY_MASK)
 #define SUN4I_TCON0_CTL_SRC_SEL_MASK			GENMASK(2, 0)
@@ -61,7 +80,14 @@
 #define SUN4I_TCON0_BASIC3_V_SYNC(height)		(((height) - 1) & 0x7ff)
 
 #define SUN4I_TCON0_HV_IF_REG			0x58
+
 #define SUN4I_TCON0_CPU_IF_REG			0x60
+#define SUN4I_TCON0_CPU_IF_MODE_MASK			GENMASK(31, 28)
+#define SUN4I_TCON0_CPU_IF_MODE_DSI			(1 << 28)
+#define SUN4I_TCON0_CPU_IF_TRI_FIFO_FLUSH		BIT(16)
+#define SUN4I_TCON0_CPU_IF_TRI_FIFO_EN			BIT(2)
+#define SUN4I_TCON0_CPU_IF_TRI_EN			BIT(0)
+
 #define SUN4I_TCON0_CPU_WR_REG			0x64
 #define SUN4I_TCON0_CPU_RD0_REG			0x68
 #define SUN4I_TCON0_CPU_RDA_REG			0x6c
@@ -128,6 +154,10 @@
 
 #define SUN4I_TCON1_IO_POL_REG			0xf0
 #define SUN4I_TCON1_IO_TRI_REG			0xf4
+
+#define SUN4I_TCON_ECC_FIFO_REG			0xf8
+#define SUN4I_TCON_ECC_FIFO_EN				BIT(3)
+
 #define SUN4I_TCON_CEU_CTL_REG			0x100
 #define SUN4I_TCON_CEU_MUL_RR_REG		0x110
 #define SUN4I_TCON_CEU_MUL_RG_REG		0x114
@@ -144,6 +174,22 @@
 #define SUN4I_TCON_CEU_RANGE_R_REG		0x140
 #define SUN4I_TCON_CEU_RANGE_G_REG		0x144
 #define SUN4I_TCON_CEU_RANGE_B_REG		0x148
+
+#define SUN4I_TCON0_CPU_TRI0_REG		0x160
+#define SUN4I_TCON0_CPU_TRI0_BLOCK_SPACE(space)		((((space) - 1) & 0xfff) << 16)
+#define SUN4I_TCON0_CPU_TRI0_BLOCK_SIZE(size)		(((size) - 1) & 0xfff)
+
+#define SUN4I_TCON0_CPU_TRI1_REG		0x164
+#define SUN4I_TCON0_CPU_TRI1_BLOCK_NUM(num)		(((num) - 1) & 0xffff)
+
+#define SUN4I_TCON0_CPU_TRI2_REG		0x168
+#define SUN4I_TCON0_CPU_TRI2_START_DELAY(delay)		(((delay) & 0xffff) << 16)
+#define SUN4I_TCON0_CPU_TRI2_TRANS_START_SET(set)	((set) & 0xfff)
+
+#define SUN4I_TCON_SAFE_PERIOD_REG		0x1f0
+#define SUN4I_TCON_SAFE_PERIOD_NUM(num)			(((num) & 0xfff) << 16)
+#define SUN4I_TCON_SAFE_PERIOD_MODE(mode)		((mode) & 0x3)
+
 #define SUN4I_TCON_MUX_CTRL_REG			0x200
 
 #define SUN4I_TCON0_LVDS_ANA0_REG		0x220
diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dphy.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dphy.c
new file mode 100644
index 0000000..e4d1943
--- /dev/null
+++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dphy.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2016 Allwinnertech Co., Ltd.
+ * Copyright (C) 2017-2018 Bootlin
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/of_address.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+#include "sun6i_mipi_dsi.h"
+
+#define SUN6I_DPHY_GCTL_REG		0x00
+#define SUN6I_DPHY_GCTL_LANE_NUM(n)		((((n) - 1) & 3) << 4)
+#define SUN6I_DPHY_GCTL_EN			BIT(0)
+
+#define SUN6I_DPHY_TX_CTL_REG		0x04
+#define SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT	BIT(28)
+
+#define SUN6I_DPHY_TX_TIME0_REG		0x10
+#define SUN6I_DPHY_TX_TIME0_HS_TRAIL(n)		(((n) & 0xff) << 24)
+#define SUN6I_DPHY_TX_TIME0_HS_PREPARE(n)	(((n) & 0xff) << 16)
+#define SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(n)	((n) & 0xff)
+
+#define SUN6I_DPHY_TX_TIME1_REG		0x14
+#define SUN6I_DPHY_TX_TIME1_CLK_POST(n)		(((n) & 0xff) << 24)
+#define SUN6I_DPHY_TX_TIME1_CLK_PRE(n)		(((n) & 0xff) << 16)
+#define SUN6I_DPHY_TX_TIME1_CLK_ZERO(n)		(((n) & 0xff) << 8)
+#define SUN6I_DPHY_TX_TIME1_CLK_PREPARE(n)	((n) & 0xff)
+
+#define SUN6I_DPHY_TX_TIME2_REG		0x18
+#define SUN6I_DPHY_TX_TIME2_CLK_TRAIL(n)	((n) & 0xff)
+
+#define SUN6I_DPHY_TX_TIME3_REG		0x1c
+
+#define SUN6I_DPHY_TX_TIME4_REG		0x20
+#define SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(n)	(((n) & 0xff) << 8)
+#define SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(n)	((n) & 0xff)
+
+#define SUN6I_DPHY_ANA0_REG		0x4c
+#define SUN6I_DPHY_ANA0_REG_PWS			BIT(31)
+#define SUN6I_DPHY_ANA0_REG_DMPC		BIT(28)
+#define SUN6I_DPHY_ANA0_REG_DMPD(n)		(((n) & 0xf) << 24)
+#define SUN6I_DPHY_ANA0_REG_SLV(n)		(((n) & 7) << 12)
+#define SUN6I_DPHY_ANA0_REG_DEN(n)		(((n) & 0xf) << 8)
+
+#define SUN6I_DPHY_ANA1_REG		0x50
+#define SUN6I_DPHY_ANA1_REG_VTTMODE		BIT(31)
+#define SUN6I_DPHY_ANA1_REG_CSMPS(n)		(((n) & 3) << 28)
+#define SUN6I_DPHY_ANA1_REG_SVTT(n)		(((n) & 0xf) << 24)
+
+#define SUN6I_DPHY_ANA2_REG		0x54
+#define SUN6I_DPHY_ANA2_EN_P2S_CPU(n)		(((n) & 0xf) << 24)
+#define SUN6I_DPHY_ANA2_EN_P2S_CPU_MASK		GENMASK(27, 24)
+#define SUN6I_DPHY_ANA2_EN_CK_CPU		BIT(4)
+#define SUN6I_DPHY_ANA2_REG_ENIB		BIT(1)
+
+#define SUN6I_DPHY_ANA3_REG		0x58
+#define SUN6I_DPHY_ANA3_EN_VTTD(n)		(((n) & 0xf) << 28)
+#define SUN6I_DPHY_ANA3_EN_VTTD_MASK		GENMASK(31, 28)
+#define SUN6I_DPHY_ANA3_EN_VTTC			BIT(27)
+#define SUN6I_DPHY_ANA3_EN_DIV			BIT(26)
+#define SUN6I_DPHY_ANA3_EN_LDOC			BIT(25)
+#define SUN6I_DPHY_ANA3_EN_LDOD			BIT(24)
+#define SUN6I_DPHY_ANA3_EN_LDOR			BIT(18)
+
+#define SUN6I_DPHY_ANA4_REG		0x5c
+#define SUN6I_DPHY_ANA4_REG_DMPLVC		BIT(24)
+#define SUN6I_DPHY_ANA4_REG_DMPLVD(n)		(((n) & 0xf) << 20)
+#define SUN6I_DPHY_ANA4_REG_CKDV(n)		(((n) & 0x1f) << 12)
+#define SUN6I_DPHY_ANA4_REG_TMSC(n)		(((n) & 3) << 10)
+#define SUN6I_DPHY_ANA4_REG_TMSD(n)		(((n) & 3) << 8)
+#define SUN6I_DPHY_ANA4_REG_TXDNSC(n)		(((n) & 3) << 6)
+#define SUN6I_DPHY_ANA4_REG_TXDNSD(n)		(((n) & 3) << 4)
+#define SUN6I_DPHY_ANA4_REG_TXPUSC(n)		(((n) & 3) << 2)
+#define SUN6I_DPHY_ANA4_REG_TXPUSD(n)		((n) & 3)
+
+#define SUN6I_DPHY_DBG5_REG		0xf4
+
+int sun6i_dphy_init(struct sun6i_dphy *dphy, unsigned int lanes)
+{
+	reset_control_deassert(dphy->reset);
+	clk_prepare_enable(dphy->mod_clk);
+	clk_set_rate_exclusive(dphy->mod_clk, 150000000);
+
+	regmap_write(dphy->regs, SUN6I_DPHY_TX_CTL_REG,
+		     SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT);
+
+	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME0_REG,
+		     SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(14) |
+		     SUN6I_DPHY_TX_TIME0_HS_PREPARE(6) |
+		     SUN6I_DPHY_TX_TIME0_HS_TRAIL(10));
+
+	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME1_REG,
+		     SUN6I_DPHY_TX_TIME1_CLK_PREPARE(7) |
+		     SUN6I_DPHY_TX_TIME1_CLK_ZERO(50) |
+		     SUN6I_DPHY_TX_TIME1_CLK_PRE(3) |
+		     SUN6I_DPHY_TX_TIME1_CLK_POST(10));
+
+	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME2_REG,
+		     SUN6I_DPHY_TX_TIME2_CLK_TRAIL(30));
+
+	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME3_REG, 0);
+
+	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME4_REG,
+		     SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) |
+		     SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3));
+
+	regmap_write(dphy->regs, SUN6I_DPHY_GCTL_REG,
+		     SUN6I_DPHY_GCTL_LANE_NUM(lanes) |
+		     SUN6I_DPHY_GCTL_EN);
+
+	return 0;
+}
+
+int sun6i_dphy_power_on(struct sun6i_dphy *dphy, unsigned int lanes)
+{
+	u8 lanes_mask = GENMASK(lanes - 1, 0);
+
+	regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG,
+		     SUN6I_DPHY_ANA0_REG_PWS |
+		     SUN6I_DPHY_ANA0_REG_DMPC |
+		     SUN6I_DPHY_ANA0_REG_SLV(7) |
+		     SUN6I_DPHY_ANA0_REG_DMPD(lanes_mask) |
+		     SUN6I_DPHY_ANA0_REG_DEN(lanes_mask));
+
+	regmap_write(dphy->regs, SUN6I_DPHY_ANA1_REG,
+		     SUN6I_DPHY_ANA1_REG_CSMPS(1) |
+		     SUN6I_DPHY_ANA1_REG_SVTT(7));
+
+	regmap_write(dphy->regs, SUN6I_DPHY_ANA4_REG,
+		     SUN6I_DPHY_ANA4_REG_CKDV(1) |
+		     SUN6I_DPHY_ANA4_REG_TMSC(1) |
+		     SUN6I_DPHY_ANA4_REG_TMSD(1) |
+		     SUN6I_DPHY_ANA4_REG_TXDNSC(1) |
+		     SUN6I_DPHY_ANA4_REG_TXDNSD(1) |
+		     SUN6I_DPHY_ANA4_REG_TXPUSC(1) |
+		     SUN6I_DPHY_ANA4_REG_TXPUSD(1) |
+		     SUN6I_DPHY_ANA4_REG_DMPLVC |
+		     SUN6I_DPHY_ANA4_REG_DMPLVD(lanes_mask));
+
+	regmap_write(dphy->regs, SUN6I_DPHY_ANA2_REG,
+		     SUN6I_DPHY_ANA2_REG_ENIB);
+	udelay(5);
+
+	regmap_write(dphy->regs, SUN6I_DPHY_ANA3_REG,
+		     SUN6I_DPHY_ANA3_EN_LDOR |
+		     SUN6I_DPHY_ANA3_EN_LDOC |
+		     SUN6I_DPHY_ANA3_EN_LDOD);
+	udelay(1);
+
+	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA3_REG,
+			   SUN6I_DPHY_ANA3_EN_VTTC |
+			   SUN6I_DPHY_ANA3_EN_VTTD_MASK,
+			   SUN6I_DPHY_ANA3_EN_VTTC |
+			   SUN6I_DPHY_ANA3_EN_VTTD(lanes_mask));
+	udelay(1);
+
+	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA3_REG,
+			   SUN6I_DPHY_ANA3_EN_DIV,
+			   SUN6I_DPHY_ANA3_EN_DIV);
+	udelay(1);
+
+	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG,
+			   SUN6I_DPHY_ANA2_EN_CK_CPU,
+			   SUN6I_DPHY_ANA2_EN_CK_CPU);
+	udelay(1);
+
+	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA1_REG,
+			   SUN6I_DPHY_ANA1_REG_VTTMODE,
+			   SUN6I_DPHY_ANA1_REG_VTTMODE);
+
+	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG,
+			   SUN6I_DPHY_ANA2_EN_P2S_CPU_MASK,
+			   SUN6I_DPHY_ANA2_EN_P2S_CPU(lanes_mask));
+
+	return 0;
+}
+
+int sun6i_dphy_power_off(struct sun6i_dphy *dphy)
+{
+	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA1_REG,
+			   SUN6I_DPHY_ANA1_REG_VTTMODE, 0);
+
+	return 0;
+}
+
+int sun6i_dphy_exit(struct sun6i_dphy *dphy)
+{
+	clk_rate_exclusive_put(dphy->mod_clk);
+	clk_disable_unprepare(dphy->mod_clk);
+	reset_control_assert(dphy->reset);
+
+	return 0;
+}
+
+static struct regmap_config sun6i_dphy_regmap_config = {
+	.reg_bits	= 32,
+	.val_bits	= 32,
+	.reg_stride	= 4,
+	.max_register	= SUN6I_DPHY_DBG5_REG,
+	.name		= "mipi-dphy",
+};
+
+static const struct of_device_id sun6i_dphy_of_table[] = {
+	{ .compatible = "allwinner,sun6i-a31-mipi-dphy" },
+	{ }
+};
+
+int sun6i_dphy_probe(struct sun6i_dsi *dsi, struct device_node *node)
+{
+	struct sun6i_dphy *dphy;
+	struct resource res;
+	void __iomem *regs;
+	int ret;
+
+	if (!of_match_node(sun6i_dphy_of_table, node)) {
+		dev_err(dsi->dev, "Incompatible D-PHY\n");
+		return -EINVAL;
+	}
+
+	dphy = devm_kzalloc(dsi->dev, sizeof(*dphy), GFP_KERNEL);
+	if (!dphy)
+		return -ENOMEM;
+
+	ret = of_address_to_resource(node, 0, &res);
+	if (ret) {
+		dev_err(dsi->dev, "phy: Couldn't get our resources\n");
+		return ret;
+	}
+
+	regs = devm_ioremap_resource(dsi->dev, &res);
+	if (IS_ERR(regs)) {
+		dev_err(dsi->dev, "Couldn't map the DPHY encoder registers\n");
+		return PTR_ERR(regs);
+	}
+
+	dphy->regs = devm_regmap_init_mmio(dsi->dev, regs,
+					   &sun6i_dphy_regmap_config);
+	if (IS_ERR(dphy->regs)) {
+		dev_err(dsi->dev, "Couldn't create the DPHY encoder regmap\n");
+		return PTR_ERR(dphy->regs);
+	}
+
+	dphy->reset = of_reset_control_get_shared(node, NULL);
+	if (IS_ERR(dphy->reset)) {
+		dev_err(dsi->dev, "Couldn't get our reset line\n");
+		return PTR_ERR(dphy->reset);
+	}
+
+	dphy->bus_clk = of_clk_get_by_name(node, "bus");
+	if (IS_ERR(dphy->bus_clk)) {
+		dev_err(dsi->dev, "Couldn't get the DPHY bus clock\n");
+		ret = PTR_ERR(dphy->bus_clk);
+		goto err_free_reset;
+	}
+	regmap_mmio_attach_clk(dphy->regs, dphy->bus_clk);
+
+	dphy->mod_clk = of_clk_get_by_name(node, "mod");
+	if (IS_ERR(dphy->mod_clk)) {
+		dev_err(dsi->dev, "Couldn't get the DPHY mod clock\n");
+		ret = PTR_ERR(dphy->mod_clk);
+		goto err_free_bus;
+	}
+
+	dsi->dphy = dphy;
+
+	return 0;
+
+err_free_bus:
+	regmap_mmio_detach_clk(dphy->regs);
+	clk_put(dphy->bus_clk);
+err_free_reset:
+	reset_control_put(dphy->reset);
+	return ret;
+}
+
+int sun6i_dphy_remove(struct sun6i_dsi *dsi)
+{
+	struct sun6i_dphy *dphy = dsi->dphy;
+
+	regmap_mmio_detach_clk(dphy->regs);
+	clk_put(dphy->mod_clk);
+	clk_put(dphy->bus_clk);
+	reset_control_put(dphy->reset);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
new file mode 100644
index 0000000..bfbf761
--- /dev/null
+++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
@@ -0,0 +1,1107 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2016 Allwinnertech Co., Ltd.
+ * Copyright (C) 2017-2018 Bootlin
+ *
+ * Maxime Ripard <maxime.ripard@bootlin.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/crc-ccitt.h>
+#include <linux/of_address.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+#include <linux/phy/phy.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_panel.h>
+
+#include "sun4i_drv.h"
+#include "sun6i_mipi_dsi.h"
+
+#include <video/mipi_display.h>
+
+#define SUN6I_DSI_CTL_REG		0x000
+#define SUN6I_DSI_CTL_EN			BIT(0)
+
+#define SUN6I_DSI_BASIC_CTL_REG		0x00c
+#define SUN6I_DSI_BASIC_CTL_HBP_DIS		BIT(2)
+#define SUN6I_DSI_BASIC_CTL_HSA_HSE_DIS		BIT(1)
+#define SUN6I_DSI_BASIC_CTL_VIDEO_BURST		BIT(0)
+
+#define SUN6I_DSI_BASIC_CTL0_REG	0x010
+#define SUN6I_DSI_BASIC_CTL0_HS_EOTP_EN		BIT(18)
+#define SUN6I_DSI_BASIC_CTL0_CRC_EN		BIT(17)
+#define SUN6I_DSI_BASIC_CTL0_ECC_EN		BIT(16)
+#define SUN6I_DSI_BASIC_CTL0_INST_ST		BIT(0)
+
+#define SUN6I_DSI_BASIC_CTL1_REG	0x014
+#define SUN6I_DSI_BASIC_CTL1_VIDEO_ST_DELAY(n)	(((n) & 0x1fff) << 4)
+#define SUN6I_DSI_BASIC_CTL1_VIDEO_FILL		BIT(2)
+#define SUN6I_DSI_BASIC_CTL1_VIDEO_PRECISION	BIT(1)
+#define SUN6I_DSI_BASIC_CTL1_VIDEO_MODE		BIT(0)
+
+#define SUN6I_DSI_BASIC_SIZE0_REG	0x018
+#define SUN6I_DSI_BASIC_SIZE0_VBP(n)		(((n) & 0xfff) << 16)
+#define SUN6I_DSI_BASIC_SIZE0_VSA(n)		((n) & 0xfff)
+
+#define SUN6I_DSI_BASIC_SIZE1_REG	0x01c
+#define SUN6I_DSI_BASIC_SIZE1_VT(n)		(((n) & 0xfff) << 16)
+#define SUN6I_DSI_BASIC_SIZE1_VACT(n)		((n) & 0xfff)
+
+#define SUN6I_DSI_INST_FUNC_REG(n)	(0x020 + (n) * 0x04)
+#define SUN6I_DSI_INST_FUNC_INST_MODE(n)	(((n) & 0xf) << 28)
+#define SUN6I_DSI_INST_FUNC_ESCAPE_ENTRY(n)	(((n) & 0xf) << 24)
+#define SUN6I_DSI_INST_FUNC_TRANS_PACKET(n)	(((n) & 0xf) << 20)
+#define SUN6I_DSI_INST_FUNC_LANE_CEN		BIT(4)
+#define SUN6I_DSI_INST_FUNC_LANE_DEN(n)		((n) & 0xf)
+
+#define SUN6I_DSI_INST_LOOP_SEL_REG	0x040
+
+#define SUN6I_DSI_INST_LOOP_NUM_REG(n)	(0x044 + (n) * 0x10)
+#define SUN6I_DSI_INST_LOOP_NUM_N1(n)		(((n) & 0xfff) << 16)
+#define SUN6I_DSI_INST_LOOP_NUM_N0(n)		((n) & 0xfff)
+
+#define SUN6I_DSI_INST_JUMP_SEL_REG	0x048
+
+#define SUN6I_DSI_INST_JUMP_CFG_REG(n)	(0x04c + (n) * 0x04)
+#define SUN6I_DSI_INST_JUMP_CFG_TO(n)		(((n) & 0xf) << 20)
+#define SUN6I_DSI_INST_JUMP_CFG_POINT(n)	(((n) & 0xf) << 16)
+#define SUN6I_DSI_INST_JUMP_CFG_NUM(n)		((n) & 0xffff)
+
+#define SUN6I_DSI_TRANS_START_REG	0x060
+
+#define SUN6I_DSI_TRANS_ZERO_REG	0x078
+
+#define SUN6I_DSI_TCON_DRQ_REG		0x07c
+#define SUN6I_DSI_TCON_DRQ_ENABLE_MODE		BIT(28)
+#define SUN6I_DSI_TCON_DRQ_SET(n)		((n) & 0x3ff)
+
+#define SUN6I_DSI_PIXEL_CTL0_REG	0x080
+#define SUN6I_DSI_PIXEL_CTL0_PD_PLUG_DISABLE	BIT(16)
+#define SUN6I_DSI_PIXEL_CTL0_FORMAT(n)		((n) & 0xf)
+
+#define SUN6I_DSI_PIXEL_CTL1_REG	0x084
+
+#define SUN6I_DSI_PIXEL_PH_REG		0x090
+#define SUN6I_DSI_PIXEL_PH_ECC(n)		(((n) & 0xff) << 24)
+#define SUN6I_DSI_PIXEL_PH_WC(n)		(((n) & 0xffff) << 8)
+#define SUN6I_DSI_PIXEL_PH_VC(n)		(((n) & 3) << 6)
+#define SUN6I_DSI_PIXEL_PH_DT(n)		((n) & 0x3f)
+
+#define SUN6I_DSI_PIXEL_PF0_REG		0x098
+#define SUN6I_DSI_PIXEL_PF0_CRC_FORCE(n)	((n) & 0xffff)
+
+#define SUN6I_DSI_PIXEL_PF1_REG		0x09c
+#define SUN6I_DSI_PIXEL_PF1_CRC_INIT_LINEN(n)	(((n) & 0xffff) << 16)
+#define SUN6I_DSI_PIXEL_PF1_CRC_INIT_LINE0(n)	((n) & 0xffff)
+
+#define SUN6I_DSI_SYNC_HSS_REG		0x0b0
+
+#define SUN6I_DSI_SYNC_HSE_REG		0x0b4
+
+#define SUN6I_DSI_SYNC_VSS_REG		0x0b8
+
+#define SUN6I_DSI_SYNC_VSE_REG		0x0bc
+
+#define SUN6I_DSI_BLK_HSA0_REG		0x0c0
+
+#define SUN6I_DSI_BLK_HSA1_REG		0x0c4
+#define SUN6I_DSI_BLK_PF(n)			(((n) & 0xffff) << 16)
+#define SUN6I_DSI_BLK_PD(n)			((n) & 0xff)
+
+#define SUN6I_DSI_BLK_HBP0_REG		0x0c8
+
+#define SUN6I_DSI_BLK_HBP1_REG		0x0cc
+
+#define SUN6I_DSI_BLK_HFP0_REG		0x0d0
+
+#define SUN6I_DSI_BLK_HFP1_REG		0x0d4
+
+#define SUN6I_DSI_BLK_HBLK0_REG		0x0e0
+
+#define SUN6I_DSI_BLK_HBLK1_REG		0x0e4
+
+#define SUN6I_DSI_BLK_VBLK0_REG		0x0e8
+
+#define SUN6I_DSI_BLK_VBLK1_REG		0x0ec
+
+#define SUN6I_DSI_BURST_LINE_REG	0x0f0
+#define SUN6I_DSI_BURST_LINE_SYNC_POINT(n)	(((n) & 0xffff) << 16)
+#define SUN6I_DSI_BURST_LINE_NUM(n)		((n) & 0xffff)
+
+#define SUN6I_DSI_BURST_DRQ_REG		0x0f4
+#define SUN6I_DSI_BURST_DRQ_EDGE1(n)		(((n) & 0xffff) << 16)
+#define SUN6I_DSI_BURST_DRQ_EDGE0(n)		((n) & 0xffff)
+
+#define SUN6I_DSI_CMD_CTL_REG		0x200
+#define SUN6I_DSI_CMD_CTL_RX_OVERFLOW		BIT(26)
+#define SUN6I_DSI_CMD_CTL_RX_FLAG		BIT(25)
+#define SUN6I_DSI_CMD_CTL_TX_FLAG		BIT(9)
+
+#define SUN6I_DSI_CMD_RX_REG(n)		(0x240 + (n) * 0x04)
+
+#define SUN6I_DSI_DEBUG_DATA_REG	0x2f8
+
+#define SUN6I_DSI_CMD_TX_REG(n)		(0x300 + (n) * 0x04)
+
+enum sun6i_dsi_start_inst {
+	DSI_START_LPRX,
+	DSI_START_LPTX,
+	DSI_START_HSC,
+	DSI_START_HSD,
+};
+
+enum sun6i_dsi_inst_id {
+	DSI_INST_ID_LP11	= 0,
+	DSI_INST_ID_TBA,
+	DSI_INST_ID_HSC,
+	DSI_INST_ID_HSD,
+	DSI_INST_ID_LPDT,
+	DSI_INST_ID_HSCEXIT,
+	DSI_INST_ID_NOP,
+	DSI_INST_ID_DLY,
+	DSI_INST_ID_END		= 15,
+};
+
+enum sun6i_dsi_inst_mode {
+	DSI_INST_MODE_STOP	= 0,
+	DSI_INST_MODE_TBA,
+	DSI_INST_MODE_HS,
+	DSI_INST_MODE_ESCAPE,
+	DSI_INST_MODE_HSCEXIT,
+	DSI_INST_MODE_NOP,
+};
+
+enum sun6i_dsi_inst_escape {
+	DSI_INST_ESCA_LPDT	= 0,
+	DSI_INST_ESCA_ULPS,
+	DSI_INST_ESCA_UN1,
+	DSI_INST_ESCA_UN2,
+	DSI_INST_ESCA_RESET,
+	DSI_INST_ESCA_UN3,
+	DSI_INST_ESCA_UN4,
+	DSI_INST_ESCA_UN5,
+};
+
+enum sun6i_dsi_inst_packet {
+	DSI_INST_PACK_PIXEL	= 0,
+	DSI_INST_PACK_COMMAND,
+};
+
+static const u32 sun6i_dsi_ecc_array[] = {
+	[0] = (BIT(0) | BIT(1) | BIT(2) | BIT(4) | BIT(5) | BIT(7) | BIT(10) |
+	       BIT(11) | BIT(13) | BIT(16) | BIT(20) | BIT(21) | BIT(22) |
+	       BIT(23)),
+	[1] = (BIT(0) | BIT(1) | BIT(3) | BIT(4) | BIT(6) | BIT(8) | BIT(10) |
+	       BIT(12) | BIT(14) | BIT(17) | BIT(20) | BIT(21) | BIT(22) |
+	       BIT(23)),
+	[2] = (BIT(0) | BIT(2) | BIT(3) | BIT(5) | BIT(6) | BIT(9) | BIT(11) |
+	       BIT(12) | BIT(15) | BIT(18) | BIT(20) | BIT(21) | BIT(22)),
+	[3] = (BIT(1) | BIT(2) | BIT(3) | BIT(7) | BIT(8) | BIT(9) | BIT(13) |
+	       BIT(14) | BIT(15) | BIT(19) | BIT(20) | BIT(21) | BIT(23)),
+	[4] = (BIT(4) | BIT(5) | BIT(6) | BIT(7) | BIT(8) | BIT(9) | BIT(16) |
+	       BIT(17) | BIT(18) | BIT(19) | BIT(20) | BIT(22) | BIT(23)),
+	[5] = (BIT(10) | BIT(11) | BIT(12) | BIT(13) | BIT(14) | BIT(15) |
+	       BIT(16) | BIT(17) | BIT(18) | BIT(19) | BIT(21) | BIT(22) |
+	       BIT(23)),
+};
+
+static u32 sun6i_dsi_ecc_compute(unsigned int data)
+{
+	int i;
+	u8 ecc = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sun6i_dsi_ecc_array); i++) {
+		u32 field = sun6i_dsi_ecc_array[i];
+		bool init = false;
+		u8 val = 0;
+		int j;
+
+		for (j = 0; j < 24; j++) {
+			if (!(BIT(j) & field))
+				continue;
+
+			if (!init) {
+				val = (BIT(j) & data) ? 1 : 0;
+				init = true;
+			} else {
+				val ^= (BIT(j) & data) ? 1 : 0;
+			}
+		}
+
+		ecc |= val << i;
+	}
+
+	return ecc;
+}
+
+static u16 sun6i_dsi_crc_compute(u8 const *buffer, size_t len)
+{
+	return crc_ccitt(0xffff, buffer, len);
+}
+
+static u16 sun6i_dsi_crc_repeat_compute(u8 pd, size_t len)
+{
+	u8 buffer[len];
+
+	memset(buffer, pd, len);
+
+	return sun6i_dsi_crc_compute(buffer, len);
+}
+
+static u32 sun6i_dsi_build_sync_pkt(u8 dt, u8 vc, u8 d0, u8 d1)
+{
+	u32 val = dt & 0x3f;
+
+	val |= (vc & 3) << 6;
+	val |= (d0 & 0xff) << 8;
+	val |= (d1 & 0xff) << 16;
+	val |= sun6i_dsi_ecc_compute(val) << 24;
+
+	return val;
+}
+
+static u32 sun6i_dsi_build_blk0_pkt(u8 vc, u16 wc)
+{
+	return sun6i_dsi_build_sync_pkt(MIPI_DSI_BLANKING_PACKET, vc,
+					wc & 0xff, wc >> 8);
+}
+
+static u32 sun6i_dsi_build_blk1_pkt(u16 pd, size_t len)
+{
+	u32 val = SUN6I_DSI_BLK_PD(pd);
+
+	return val | SUN6I_DSI_BLK_PF(sun6i_dsi_crc_repeat_compute(pd, len));
+}
+
+static void sun6i_dsi_inst_abort(struct sun6i_dsi *dsi)
+{
+	regmap_update_bits(dsi->regs, SUN6I_DSI_BASIC_CTL0_REG,
+			   SUN6I_DSI_BASIC_CTL0_INST_ST, 0);
+}
+
+static void sun6i_dsi_inst_commit(struct sun6i_dsi *dsi)
+{
+	regmap_update_bits(dsi->regs, SUN6I_DSI_BASIC_CTL0_REG,
+			   SUN6I_DSI_BASIC_CTL0_INST_ST,
+			   SUN6I_DSI_BASIC_CTL0_INST_ST);
+}
+
+static int sun6i_dsi_inst_wait_for_completion(struct sun6i_dsi *dsi)
+{
+	u32 val;
+
+	return regmap_read_poll_timeout(dsi->regs, SUN6I_DSI_BASIC_CTL0_REG,
+					val,
+					!(val & SUN6I_DSI_BASIC_CTL0_INST_ST),
+					100, 5000);
+}
+
+static void sun6i_dsi_inst_setup(struct sun6i_dsi *dsi,
+				 enum sun6i_dsi_inst_id id,
+				 enum sun6i_dsi_inst_mode mode,
+				 bool clock, u8 data,
+				 enum sun6i_dsi_inst_packet packet,
+				 enum sun6i_dsi_inst_escape escape)
+{
+	regmap_write(dsi->regs, SUN6I_DSI_INST_FUNC_REG(id),
+		     SUN6I_DSI_INST_FUNC_INST_MODE(mode) |
+		     SUN6I_DSI_INST_FUNC_ESCAPE_ENTRY(escape) |
+		     SUN6I_DSI_INST_FUNC_TRANS_PACKET(packet) |
+		     (clock ? SUN6I_DSI_INST_FUNC_LANE_CEN : 0) |
+		     SUN6I_DSI_INST_FUNC_LANE_DEN(data));
+}
+
+static void sun6i_dsi_inst_init(struct sun6i_dsi *dsi,
+				struct mipi_dsi_device *device)
+{
+	u8 lanes_mask = GENMASK(device->lanes - 1, 0);
+
+	sun6i_dsi_inst_setup(dsi, DSI_INST_ID_LP11, DSI_INST_MODE_STOP,
+			     true, lanes_mask, 0, 0);
+
+	sun6i_dsi_inst_setup(dsi, DSI_INST_ID_TBA, DSI_INST_MODE_TBA,
+			     false, 1, 0, 0);
+
+	sun6i_dsi_inst_setup(dsi, DSI_INST_ID_HSC, DSI_INST_MODE_HS,
+			     true, 0, DSI_INST_PACK_PIXEL, 0);
+
+	sun6i_dsi_inst_setup(dsi, DSI_INST_ID_HSD, DSI_INST_MODE_HS,
+			     false, lanes_mask, DSI_INST_PACK_PIXEL, 0);
+
+	sun6i_dsi_inst_setup(dsi, DSI_INST_ID_LPDT, DSI_INST_MODE_ESCAPE,
+			     false, 1, DSI_INST_PACK_COMMAND,
+			     DSI_INST_ESCA_LPDT);
+
+	sun6i_dsi_inst_setup(dsi, DSI_INST_ID_HSCEXIT, DSI_INST_MODE_HSCEXIT,
+			     true, 0, 0, 0);
+
+	sun6i_dsi_inst_setup(dsi, DSI_INST_ID_NOP, DSI_INST_MODE_STOP,
+			     false, lanes_mask, 0, 0);
+
+	sun6i_dsi_inst_setup(dsi, DSI_INST_ID_DLY, DSI_INST_MODE_NOP,
+			     true, lanes_mask, 0, 0);
+
+	regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_CFG_REG(0),
+		     SUN6I_DSI_INST_JUMP_CFG_POINT(DSI_INST_ID_NOP) |
+		     SUN6I_DSI_INST_JUMP_CFG_TO(DSI_INST_ID_HSCEXIT) |
+		     SUN6I_DSI_INST_JUMP_CFG_NUM(1));
+};
+
+static u16 sun6i_dsi_get_video_start_delay(struct sun6i_dsi *dsi,
+					   struct drm_display_mode *mode)
+{
+	return mode->vtotal - (mode->vsync_end - mode->vdisplay) + 1;
+}
+
+static void sun6i_dsi_setup_burst(struct sun6i_dsi *dsi,
+				  struct drm_display_mode *mode)
+{
+	struct mipi_dsi_device *device = dsi->device;
+	u32 val = 0;
+
+	if ((mode->hsync_end - mode->hdisplay) > 20) {
+		/* Maaaaaagic */
+		u16 drq = (mode->hsync_end - mode->hdisplay) - 20;
+
+		drq *= mipi_dsi_pixel_format_to_bpp(device->format);
+		drq /= 32;
+
+		val = (SUN6I_DSI_TCON_DRQ_ENABLE_MODE |
+		       SUN6I_DSI_TCON_DRQ_SET(drq));
+	}
+
+	regmap_write(dsi->regs, SUN6I_DSI_TCON_DRQ_REG, val);
+}
+
+static void sun6i_dsi_setup_inst_loop(struct sun6i_dsi *dsi,
+				      struct drm_display_mode *mode)
+{
+	u16 delay = 50 - 1;
+
+	regmap_write(dsi->regs, SUN6I_DSI_INST_LOOP_NUM_REG(0),
+		     SUN6I_DSI_INST_LOOP_NUM_N0(50 - 1) |
+		     SUN6I_DSI_INST_LOOP_NUM_N1(delay));
+	regmap_write(dsi->regs, SUN6I_DSI_INST_LOOP_NUM_REG(1),
+		     SUN6I_DSI_INST_LOOP_NUM_N0(50 - 1) |
+		     SUN6I_DSI_INST_LOOP_NUM_N1(delay));
+}
+
+static void sun6i_dsi_setup_format(struct sun6i_dsi *dsi,
+				   struct drm_display_mode *mode)
+{
+	struct mipi_dsi_device *device = dsi->device;
+	u32 val = SUN6I_DSI_PIXEL_PH_VC(device->channel);
+	u8 dt, fmt;
+	u16 wc;
+
+	/*
+	 * TODO: The format defines are only valid in video mode and
+	 * change in command mode.
+	 */
+	switch (device->format) {
+	case MIPI_DSI_FMT_RGB888:
+		dt = MIPI_DSI_PACKED_PIXEL_STREAM_24;
+		fmt = 8;
+		break;
+	case MIPI_DSI_FMT_RGB666:
+		dt = MIPI_DSI_PIXEL_STREAM_3BYTE_18;
+		fmt = 9;
+		break;
+	case MIPI_DSI_FMT_RGB666_PACKED:
+		dt = MIPI_DSI_PACKED_PIXEL_STREAM_18;
+		fmt = 10;
+		break;
+	case MIPI_DSI_FMT_RGB565:
+		dt = MIPI_DSI_PACKED_PIXEL_STREAM_16;
+		fmt = 11;
+		break;
+	default:
+		return;
+	}
+	val |= SUN6I_DSI_PIXEL_PH_DT(dt);
+
+	wc = mode->hdisplay * mipi_dsi_pixel_format_to_bpp(device->format) / 8;
+	val |= SUN6I_DSI_PIXEL_PH_WC(wc);
+	val |= SUN6I_DSI_PIXEL_PH_ECC(sun6i_dsi_ecc_compute(val));
+
+	regmap_write(dsi->regs, SUN6I_DSI_PIXEL_PH_REG, val);
+
+	regmap_write(dsi->regs, SUN6I_DSI_PIXEL_PF0_REG,
+		     SUN6I_DSI_PIXEL_PF0_CRC_FORCE(0xffff));
+
+	regmap_write(dsi->regs, SUN6I_DSI_PIXEL_PF1_REG,
+		     SUN6I_DSI_PIXEL_PF1_CRC_INIT_LINE0(0xffff) |
+		     SUN6I_DSI_PIXEL_PF1_CRC_INIT_LINEN(0xffff));
+
+	regmap_write(dsi->regs, SUN6I_DSI_PIXEL_CTL0_REG,
+		     SUN6I_DSI_PIXEL_CTL0_PD_PLUG_DISABLE |
+		     SUN6I_DSI_PIXEL_CTL0_FORMAT(fmt));
+}
+
+static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi,
+				    struct drm_display_mode *mode)
+{
+	struct mipi_dsi_device *device = dsi->device;
+	unsigned int Bpp = mipi_dsi_pixel_format_to_bpp(device->format) / 8;
+	u16 hbp, hfp, hsa, hblk, vblk;
+
+	regmap_write(dsi->regs, SUN6I_DSI_BASIC_CTL_REG, 0);
+
+	regmap_write(dsi->regs, SUN6I_DSI_SYNC_HSS_REG,
+		     sun6i_dsi_build_sync_pkt(MIPI_DSI_H_SYNC_START,
+					      device->channel,
+					      0, 0));
+
+	regmap_write(dsi->regs, SUN6I_DSI_SYNC_HSE_REG,
+		     sun6i_dsi_build_sync_pkt(MIPI_DSI_H_SYNC_END,
+					      device->channel,
+					      0, 0));
+
+	regmap_write(dsi->regs, SUN6I_DSI_SYNC_VSS_REG,
+		     sun6i_dsi_build_sync_pkt(MIPI_DSI_V_SYNC_START,
+					      device->channel,
+					      0, 0));
+
+	regmap_write(dsi->regs, SUN6I_DSI_SYNC_VSE_REG,
+		     sun6i_dsi_build_sync_pkt(MIPI_DSI_V_SYNC_END,
+					      device->channel,
+					      0, 0));
+
+	regmap_write(dsi->regs, SUN6I_DSI_BASIC_SIZE0_REG,
+		     SUN6I_DSI_BASIC_SIZE0_VSA(mode->vsync_end -
+					       mode->vsync_start) |
+		     SUN6I_DSI_BASIC_SIZE0_VBP(mode->vsync_start -
+					       mode->vdisplay));
+
+	regmap_write(dsi->regs, SUN6I_DSI_BASIC_SIZE1_REG,
+		     SUN6I_DSI_BASIC_SIZE1_VACT(mode->vdisplay) |
+		     SUN6I_DSI_BASIC_SIZE1_VT(mode->vtotal));
+
+	/*
+	 * A sync period is composed of a blanking packet (4 bytes +
+	 * payload + 2 bytes) and a sync event packet (4 bytes). Its
+	 * minimal size is therefore 10 bytes
+	 */
+#define HSA_PACKET_OVERHEAD	10
+	hsa = max((unsigned int)HSA_PACKET_OVERHEAD,
+		  (mode->hsync_end - mode->hsync_start) * Bpp - HSA_PACKET_OVERHEAD);
+	regmap_write(dsi->regs, SUN6I_DSI_BLK_HSA0_REG,
+		     sun6i_dsi_build_blk0_pkt(device->channel, hsa));
+	regmap_write(dsi->regs, SUN6I_DSI_BLK_HSA1_REG,
+		     sun6i_dsi_build_blk1_pkt(0, hsa));
+
+	/*
+	 * The backporch is set using a blanking packet (4 bytes +
+	 * payload + 2 bytes). Its minimal size is therefore 6 bytes
+	 */
+#define HBP_PACKET_OVERHEAD	6
+	hbp = max((unsigned int)HBP_PACKET_OVERHEAD,
+		  (mode->hsync_start - mode->hdisplay) * Bpp - HBP_PACKET_OVERHEAD);
+	regmap_write(dsi->regs, SUN6I_DSI_BLK_HBP0_REG,
+		     sun6i_dsi_build_blk0_pkt(device->channel, hbp));
+	regmap_write(dsi->regs, SUN6I_DSI_BLK_HBP1_REG,
+		     sun6i_dsi_build_blk1_pkt(0, hbp));
+
+	/*
+	 * The frontporch is set using a blanking packet (4 bytes +
+	 * payload + 2 bytes). Its minimal size is therefore 6 bytes
+	 */
+#define HFP_PACKET_OVERHEAD	6
+	hfp = max((unsigned int)HFP_PACKET_OVERHEAD,
+		  (mode->htotal - mode->hsync_end) * Bpp - HFP_PACKET_OVERHEAD);
+	regmap_write(dsi->regs, SUN6I_DSI_BLK_HFP0_REG,
+		     sun6i_dsi_build_blk0_pkt(device->channel, hfp));
+	regmap_write(dsi->regs, SUN6I_DSI_BLK_HFP1_REG,
+		     sun6i_dsi_build_blk1_pkt(0, hfp));
+
+	/*
+	 * hblk seems to be the line + porches length.
+	 */
+	hblk = mode->htotal * Bpp - hsa;
+	regmap_write(dsi->regs, SUN6I_DSI_BLK_HBLK0_REG,
+		     sun6i_dsi_build_blk0_pkt(device->channel, hblk));
+	regmap_write(dsi->regs, SUN6I_DSI_BLK_HBLK1_REG,
+		     sun6i_dsi_build_blk1_pkt(0, hblk));
+
+	/*
+	 * And I'm not entirely sure what vblk is about. The driver in
+	 * Allwinner BSP is using a rather convoluted calculation
+	 * there only for 4 lanes. However, using 0 (the !4 lanes
+	 * case) even with a 4 lanes screen seems to work...
+	 */
+	vblk = 0;
+	regmap_write(dsi->regs, SUN6I_DSI_BLK_VBLK0_REG,
+		     sun6i_dsi_build_blk0_pkt(device->channel, vblk));
+	regmap_write(dsi->regs, SUN6I_DSI_BLK_VBLK1_REG,
+		     sun6i_dsi_build_blk1_pkt(0, vblk));
+}
+
+static int sun6i_dsi_start(struct sun6i_dsi *dsi,
+			   enum sun6i_dsi_start_inst func)
+{
+	switch (func) {
+	case DSI_START_LPTX:
+		regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG,
+			     DSI_INST_ID_LPDT << (4 * DSI_INST_ID_LP11) |
+			     DSI_INST_ID_END  << (4 * DSI_INST_ID_LPDT));
+		break;
+	case DSI_START_LPRX:
+		regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG,
+			     DSI_INST_ID_LPDT << (4 * DSI_INST_ID_LP11) |
+			     DSI_INST_ID_DLY  << (4 * DSI_INST_ID_LPDT) |
+			     DSI_INST_ID_TBA  << (4 * DSI_INST_ID_DLY) |
+			     DSI_INST_ID_END  << (4 * DSI_INST_ID_TBA));
+		break;
+	case DSI_START_HSC:
+		regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG,
+			     DSI_INST_ID_HSC  << (4 * DSI_INST_ID_LP11) |
+			     DSI_INST_ID_END  << (4 * DSI_INST_ID_HSC));
+		break;
+	case DSI_START_HSD:
+		regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG,
+			     DSI_INST_ID_NOP  << (4 * DSI_INST_ID_LP11) |
+			     DSI_INST_ID_HSD  << (4 * DSI_INST_ID_NOP) |
+			     DSI_INST_ID_DLY  << (4 * DSI_INST_ID_HSD) |
+			     DSI_INST_ID_NOP  << (4 * DSI_INST_ID_DLY) |
+			     DSI_INST_ID_END  << (4 * DSI_INST_ID_HSCEXIT));
+		break;
+	default:
+		regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG,
+			     DSI_INST_ID_END  << (4 * DSI_INST_ID_LP11));
+		break;
+	}
+
+	sun6i_dsi_inst_abort(dsi);
+	sun6i_dsi_inst_commit(dsi);
+
+	if (func == DSI_START_HSC)
+		regmap_write_bits(dsi->regs,
+				  SUN6I_DSI_INST_FUNC_REG(DSI_INST_ID_LP11),
+				  SUN6I_DSI_INST_FUNC_LANE_CEN, 0);
+
+	return 0;
+}
+
+static void sun6i_dsi_encoder_enable(struct drm_encoder *encoder)
+{
+	struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
+	struct sun6i_dsi *dsi = encoder_to_sun6i_dsi(encoder);
+	struct mipi_dsi_device *device = dsi->device;
+	u16 delay;
+
+	DRM_DEBUG_DRIVER("Enabling DSI output\n");
+
+	pm_runtime_get_sync(dsi->dev);
+
+	delay = sun6i_dsi_get_video_start_delay(dsi, mode);
+	regmap_write(dsi->regs, SUN6I_DSI_BASIC_CTL1_REG,
+		     SUN6I_DSI_BASIC_CTL1_VIDEO_ST_DELAY(delay) |
+		     SUN6I_DSI_BASIC_CTL1_VIDEO_FILL |
+		     SUN6I_DSI_BASIC_CTL1_VIDEO_PRECISION |
+		     SUN6I_DSI_BASIC_CTL1_VIDEO_MODE);
+
+	sun6i_dsi_setup_burst(dsi, mode);
+	sun6i_dsi_setup_inst_loop(dsi, mode);
+	sun6i_dsi_setup_format(dsi, mode);
+	sun6i_dsi_setup_timings(dsi, mode);
+
+	sun6i_dphy_init(dsi->dphy, device->lanes);
+	sun6i_dphy_power_on(dsi->dphy, device->lanes);
+
+	if (!IS_ERR(dsi->panel))
+		drm_panel_prepare(dsi->panel);
+
+	/*
+	 * FIXME: This should be moved after the switch to HS mode.
+	 *
+	 * Unfortunately, once in HS mode, it seems like we're not
+	 * able to send DCS commands anymore, which would prevent any
+	 * panel to send any DCS command as part as their enable
+	 * method, which is quite common.
+	 *
+	 * I haven't seen any artifact due to that sub-optimal
+	 * ordering on the panels I've tested it with, so I guess this
+	 * will do for now, until that IP is better understood.
+	 */
+	if (!IS_ERR(dsi->panel))
+		drm_panel_enable(dsi->panel);
+
+	sun6i_dsi_start(dsi, DSI_START_HSC);
+
+	udelay(1000);
+
+	sun6i_dsi_start(dsi, DSI_START_HSD);
+}
+
+static void sun6i_dsi_encoder_disable(struct drm_encoder *encoder)
+{
+	struct sun6i_dsi *dsi = encoder_to_sun6i_dsi(encoder);
+
+	DRM_DEBUG_DRIVER("Disabling DSI output\n");
+
+	if (!IS_ERR(dsi->panel)) {
+		drm_panel_disable(dsi->panel);
+		drm_panel_unprepare(dsi->panel);
+	}
+
+	sun6i_dphy_power_off(dsi->dphy);
+	sun6i_dphy_exit(dsi->dphy);
+
+	pm_runtime_put(dsi->dev);
+}
+
+static int sun6i_dsi_get_modes(struct drm_connector *connector)
+{
+	struct sun6i_dsi *dsi = connector_to_sun6i_dsi(connector);
+
+	return drm_panel_get_modes(dsi->panel);
+}
+
+static struct drm_connector_helper_funcs sun6i_dsi_connector_helper_funcs = {
+	.get_modes	= sun6i_dsi_get_modes,
+};
+
+static enum drm_connector_status
+sun6i_dsi_connector_detect(struct drm_connector *connector, bool force)
+{
+	return connector_status_connected;
+}
+
+static const struct drm_connector_funcs sun6i_dsi_connector_funcs = {
+	.detect			= sun6i_dsi_connector_detect,
+	.fill_modes		= drm_helper_probe_single_connector_modes,
+	.destroy		= drm_connector_cleanup,
+	.reset			= drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state	= drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_connector_destroy_state,
+};
+
+static const struct drm_encoder_helper_funcs sun6i_dsi_enc_helper_funcs = {
+	.disable	= sun6i_dsi_encoder_disable,
+	.enable		= sun6i_dsi_encoder_enable,
+};
+
+static const struct drm_encoder_funcs sun6i_dsi_enc_funcs = {
+	.destroy	= drm_encoder_cleanup,
+};
+
+static u32 sun6i_dsi_dcs_build_pkt_hdr(struct sun6i_dsi *dsi,
+				       const struct mipi_dsi_msg *msg)
+{
+	u32 pkt = msg->type;
+
+	if (msg->type == MIPI_DSI_DCS_LONG_WRITE) {
+		pkt |= ((msg->tx_len + 1) & 0xffff) << 8;
+		pkt |= (((msg->tx_len + 1) >> 8) & 0xffff) << 16;
+	} else {
+		pkt |= (((u8 *)msg->tx_buf)[0] << 8);
+		if (msg->tx_len > 1)
+			pkt |= (((u8 *)msg->tx_buf)[1] << 16);
+	}
+
+	pkt |= sun6i_dsi_ecc_compute(pkt) << 24;
+
+	return pkt;
+}
+
+static int sun6i_dsi_dcs_write_short(struct sun6i_dsi *dsi,
+				     const struct mipi_dsi_msg *msg)
+{
+	regmap_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(0),
+		     sun6i_dsi_dcs_build_pkt_hdr(dsi, msg));
+	regmap_write_bits(dsi->regs, SUN6I_DSI_CMD_CTL_REG,
+			  0xff, (4 - 1));
+
+	sun6i_dsi_start(dsi, DSI_START_LPTX);
+
+	return msg->tx_len;
+}
+
+static int sun6i_dsi_dcs_write_long(struct sun6i_dsi *dsi,
+				    const struct mipi_dsi_msg *msg)
+{
+	int ret, len = 0;
+	u8 *bounce;
+	u16 crc;
+
+	regmap_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(0),
+		     sun6i_dsi_dcs_build_pkt_hdr(dsi, msg));
+
+	bounce = kzalloc(msg->tx_len + sizeof(crc), GFP_KERNEL);
+	if (!bounce)
+		return -ENOMEM;
+
+	memcpy(bounce, msg->tx_buf, msg->tx_len);
+	len += msg->tx_len;
+
+	crc = sun6i_dsi_crc_compute(bounce, msg->tx_len);
+	memcpy((u8 *)bounce + msg->tx_len, &crc, sizeof(crc));
+	len += sizeof(crc);
+
+	regmap_bulk_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(1), bounce, len);
+	regmap_write(dsi->regs, SUN6I_DSI_CMD_CTL_REG, len + 4 - 1);
+	kfree(bounce);
+
+	sun6i_dsi_start(dsi, DSI_START_LPTX);
+
+	ret = sun6i_dsi_inst_wait_for_completion(dsi);
+	if (ret < 0) {
+		sun6i_dsi_inst_abort(dsi);
+		return ret;
+	}
+
+	/*
+	 * TODO: There's some bits (reg 0x200, bits 8/9) that
+	 * apparently can be used to check whether the data have been
+	 * sent, but I couldn't get it to work reliably.
+	 */
+	return msg->tx_len;
+}
+
+static int sun6i_dsi_dcs_read(struct sun6i_dsi *dsi,
+			      const struct mipi_dsi_msg *msg)
+{
+	u32 val;
+	int ret;
+	u8 byte0;
+
+	regmap_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(0),
+		     sun6i_dsi_dcs_build_pkt_hdr(dsi, msg));
+	regmap_write(dsi->regs, SUN6I_DSI_CMD_CTL_REG,
+		     (4 - 1));
+
+	sun6i_dsi_start(dsi, DSI_START_LPRX);
+
+	ret = sun6i_dsi_inst_wait_for_completion(dsi);
+	if (ret < 0) {
+		sun6i_dsi_inst_abort(dsi);
+		return ret;
+	}
+
+	/*
+	 * TODO: There's some bits (reg 0x200, bits 24/25) that
+	 * apparently can be used to check whether the data have been
+	 * received, but I couldn't get it to work reliably.
+	 */
+	regmap_read(dsi->regs, SUN6I_DSI_CMD_CTL_REG, &val);
+	if (val & SUN6I_DSI_CMD_CTL_RX_OVERFLOW)
+		return -EIO;
+
+	regmap_read(dsi->regs, SUN6I_DSI_CMD_RX_REG(0), &val);
+	byte0 = val & 0xff;
+	if (byte0 == MIPI_DSI_RX_ACKNOWLEDGE_AND_ERROR_REPORT)
+		return -EIO;
+
+	((u8 *)msg->rx_buf)[0] = (val >> 8);
+
+	return 1;
+}
+
+static int sun6i_dsi_attach(struct mipi_dsi_host *host,
+			    struct mipi_dsi_device *device)
+{
+	struct sun6i_dsi *dsi = host_to_sun6i_dsi(host);
+
+	dsi->device = device;
+	dsi->panel = of_drm_find_panel(device->dev.of_node);
+	if (!dsi->panel)
+		return -EINVAL;
+
+	dev_info(host->dev, "Attached device %s\n", device->name);
+
+	return 0;
+}
+
+static int sun6i_dsi_detach(struct mipi_dsi_host *host,
+			    struct mipi_dsi_device *device)
+{
+	struct sun6i_dsi *dsi = host_to_sun6i_dsi(host);
+
+	dsi->panel = NULL;
+	dsi->device = NULL;
+
+	return 0;
+}
+
+static ssize_t sun6i_dsi_transfer(struct mipi_dsi_host *host,
+				  const struct mipi_dsi_msg *msg)
+{
+	struct sun6i_dsi *dsi = host_to_sun6i_dsi(host);
+	int ret;
+
+	ret = sun6i_dsi_inst_wait_for_completion(dsi);
+	if (ret < 0)
+		sun6i_dsi_inst_abort(dsi);
+
+	regmap_write(dsi->regs, SUN6I_DSI_CMD_CTL_REG,
+		     SUN6I_DSI_CMD_CTL_RX_OVERFLOW |
+		     SUN6I_DSI_CMD_CTL_RX_FLAG |
+		     SUN6I_DSI_CMD_CTL_TX_FLAG);
+
+	switch (msg->type) {
+	case MIPI_DSI_DCS_SHORT_WRITE:
+	case MIPI_DSI_DCS_SHORT_WRITE_PARAM:
+		ret = sun6i_dsi_dcs_write_short(dsi, msg);
+		break;
+
+	case MIPI_DSI_DCS_LONG_WRITE:
+		ret = sun6i_dsi_dcs_write_long(dsi, msg);
+		break;
+
+	case MIPI_DSI_DCS_READ:
+		if (msg->rx_len == 1) {
+			ret = sun6i_dsi_dcs_read(dsi, msg);
+			break;
+		}
+
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static const struct mipi_dsi_host_ops sun6i_dsi_host_ops = {
+	.attach		= sun6i_dsi_attach,
+	.detach		= sun6i_dsi_detach,
+	.transfer	= sun6i_dsi_transfer,
+};
+
+static const struct regmap_config sun6i_dsi_regmap_config = {
+	.reg_bits	= 32,
+	.val_bits	= 32,
+	.reg_stride	= 4,
+	.max_register	= SUN6I_DSI_CMD_TX_REG(255),
+	.name		= "mipi-dsi",
+};
+
+static int sun6i_dsi_bind(struct device *dev, struct device *master,
+			 void *data)
+{
+	struct drm_device *drm = data;
+	struct sun4i_drv *drv = drm->dev_private;
+	struct sun6i_dsi *dsi = dev_get_drvdata(dev);
+	int ret;
+
+	if (!dsi->panel)
+		return -EPROBE_DEFER;
+
+	dsi->drv = drv;
+
+	drm_encoder_helper_add(&dsi->encoder,
+			       &sun6i_dsi_enc_helper_funcs);
+	ret = drm_encoder_init(drm,
+			       &dsi->encoder,
+			       &sun6i_dsi_enc_funcs,
+			       DRM_MODE_ENCODER_DSI,
+			       NULL);
+	if (ret) {
+		dev_err(dsi->dev, "Couldn't initialise the DSI encoder\n");
+		return ret;
+	}
+	dsi->encoder.possible_crtcs = BIT(0);
+
+	drm_connector_helper_add(&dsi->connector,
+				 &sun6i_dsi_connector_helper_funcs);
+	ret = drm_connector_init(drm, &dsi->connector,
+				 &sun6i_dsi_connector_funcs,
+				 DRM_MODE_CONNECTOR_DSI);
+	if (ret) {
+		dev_err(dsi->dev,
+			"Couldn't initialise the DSI connector\n");
+		goto err_cleanup_connector;
+	}
+
+	drm_mode_connector_attach_encoder(&dsi->connector, &dsi->encoder);
+	drm_panel_attach(dsi->panel, &dsi->connector);
+
+	return 0;
+
+err_cleanup_connector:
+	drm_encoder_cleanup(&dsi->encoder);
+	return ret;
+}
+
+static void sun6i_dsi_unbind(struct device *dev, struct device *master,
+			    void *data)
+{
+	struct sun6i_dsi *dsi = dev_get_drvdata(dev);
+
+	drm_panel_detach(dsi->panel);
+}
+
+static const struct component_ops sun6i_dsi_ops = {
+	.bind	= sun6i_dsi_bind,
+	.unbind	= sun6i_dsi_unbind,
+};
+
+static int sun6i_dsi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *dphy_node;
+	struct sun6i_dsi *dsi;
+	struct resource *res;
+	void __iomem *base;
+	int ret;
+
+	dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL);
+	if (!dsi)
+		return -ENOMEM;
+	dev_set_drvdata(dev, dsi);
+	dsi->dev = dev;
+	dsi->host.ops = &sun6i_dsi_host_ops;
+	dsi->host.dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base)) {
+		dev_err(dev, "Couldn't map the DSI encoder registers\n");
+		return PTR_ERR(base);
+	}
+
+	dsi->regs = devm_regmap_init_mmio_clk(dev, "bus", base,
+					      &sun6i_dsi_regmap_config);
+	if (IS_ERR(dsi->regs)) {
+		dev_err(dev, "Couldn't create the DSI encoder regmap\n");
+		return PTR_ERR(dsi->regs);
+	}
+
+	dsi->reset = devm_reset_control_get_shared(dev, NULL);
+	if (IS_ERR(dsi->reset)) {
+		dev_err(dev, "Couldn't get our reset line\n");
+		return PTR_ERR(dsi->reset);
+	}
+
+	dsi->mod_clk = devm_clk_get(dev, "mod");
+	if (IS_ERR(dsi->mod_clk)) {
+		dev_err(dev, "Couldn't get the DSI mod clock\n");
+		return PTR_ERR(dsi->mod_clk);
+	}
+
+	/*
+	 * In order to operate properly, that clock seems to be always
+	 * set to 297MHz.
+	 */
+	clk_set_rate_exclusive(dsi->mod_clk, 297000000);
+
+	dphy_node = of_parse_phandle(dev->of_node, "phys", 0);
+	ret = sun6i_dphy_probe(dsi, dphy_node);
+	of_node_put(dphy_node);
+	if (ret) {
+		dev_err(dev, "Couldn't get the MIPI D-PHY\n");
+		goto err_unprotect_clk;
+	}
+
+	pm_runtime_enable(dev);
+
+	ret = mipi_dsi_host_register(&dsi->host);
+	if (ret) {
+		dev_err(dev, "Couldn't register MIPI-DSI host\n");
+		goto err_remove_phy;
+	}
+
+	ret = component_add(&pdev->dev, &sun6i_dsi_ops);
+	if (ret) {
+		dev_err(dev, "Couldn't register our component\n");
+		goto err_remove_dsi_host;
+	}
+
+	return 0;
+
+err_remove_dsi_host:
+	mipi_dsi_host_unregister(&dsi->host);
+err_remove_phy:
+	pm_runtime_disable(dev);
+	sun6i_dphy_remove(dsi);
+err_unprotect_clk:
+	clk_rate_exclusive_put(dsi->mod_clk);
+	return ret;
+}
+
+static int sun6i_dsi_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sun6i_dsi *dsi = dev_get_drvdata(dev);
+
+	component_del(&pdev->dev, &sun6i_dsi_ops);
+	mipi_dsi_host_unregister(&dsi->host);
+	pm_runtime_disable(dev);
+	sun6i_dphy_remove(dsi);
+	clk_rate_exclusive_put(dsi->mod_clk);
+
+	return 0;
+}
+
+static int sun6i_dsi_runtime_resume(struct device *dev)
+{
+	struct sun6i_dsi *dsi = dev_get_drvdata(dev);
+
+	reset_control_deassert(dsi->reset);
+	clk_prepare_enable(dsi->mod_clk);
+
+	/*
+	 * Enable the DSI block.
+	 *
+	 * Some part of it can only be done once we get a number of
+	 * lanes, see sun6i_dsi_inst_init
+	 */
+	regmap_write(dsi->regs, SUN6I_DSI_CTL_REG, SUN6I_DSI_CTL_EN);
+
+	regmap_write(dsi->regs, SUN6I_DSI_BASIC_CTL0_REG,
+		     SUN6I_DSI_BASIC_CTL0_ECC_EN | SUN6I_DSI_BASIC_CTL0_CRC_EN);
+
+	regmap_write(dsi->regs, SUN6I_DSI_TRANS_START_REG, 10);
+	regmap_write(dsi->regs, SUN6I_DSI_TRANS_ZERO_REG, 0);
+
+	if (dsi->device)
+		sun6i_dsi_inst_init(dsi, dsi->device);
+
+	regmap_write(dsi->regs, SUN6I_DSI_DEBUG_DATA_REG, 0xff);
+
+	return 0;
+}
+
+static int sun6i_dsi_runtime_suspend(struct device *dev)
+{
+	struct sun6i_dsi *dsi = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(dsi->mod_clk);
+	reset_control_assert(dsi->reset);
+
+	return 0;
+}
+
+static const struct dev_pm_ops sun6i_dsi_pm_ops = {
+	SET_RUNTIME_PM_OPS(sun6i_dsi_runtime_suspend,
+			   sun6i_dsi_runtime_resume,
+			   NULL)
+};
+
+static const struct of_device_id sun6i_dsi_of_table[] = {
+	{ .compatible = "allwinner,sun6i-a31-mipi-dsi" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, sun6i_dsi_of_table);
+
+static struct platform_driver sun6i_dsi_platform_driver = {
+	.probe		= sun6i_dsi_probe,
+	.remove		= sun6i_dsi_remove,
+	.driver		= {
+		.name		= "sun6i-mipi-dsi",
+		.of_match_table	= sun6i_dsi_of_table,
+		.pm		= &sun6i_dsi_pm_ops,
+	},
+};
+module_platform_driver(sun6i_dsi_platform_driver);
+
+MODULE_AUTHOR("Maxime Ripard <maxime.ripard@free-electrons.com>");
+MODULE_DESCRIPTION("Allwinner A31 DSI Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h
new file mode 100644
index 0000000..dbbc5b3
--- /dev/null
+++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2016 Allwinnertech Co., Ltd.
+ * Copyright (C) 2017-2018 Bootlin
+ *
+ * Maxime Ripard <maxime.ripard@bootlin.com>
+ */
+
+#ifndef _SUN6I_MIPI_DSI_H_
+#define _SUN6I_MIPI_DSI_H_
+
+#include <drm/drm_connector.h>
+#include <drm/drm_encoder.h>
+#include <drm/drm_mipi_dsi.h>
+
+struct sun6i_dphy {
+	struct clk		*bus_clk;
+	struct clk		*mod_clk;
+	struct regmap		*regs;
+	struct reset_control	*reset;
+};
+
+struct sun6i_dsi {
+	struct drm_connector	connector;
+	struct drm_encoder	encoder;
+	struct mipi_dsi_host	host;
+
+	struct clk		*bus_clk;
+	struct clk		*mod_clk;
+	struct regmap		*regs;
+	struct reset_control	*reset;
+	struct sun6i_dphy	*dphy;
+
+	struct device		*dev;
+	struct sun4i_drv	*drv;
+	struct mipi_dsi_device	*device;
+	struct drm_panel	*panel;
+};
+
+static inline struct sun6i_dsi *host_to_sun6i_dsi(struct mipi_dsi_host *host)
+{
+	return container_of(host, struct sun6i_dsi, host);
+};
+
+static inline struct sun6i_dsi *connector_to_sun6i_dsi(struct drm_connector *connector)
+{
+	return container_of(connector, struct sun6i_dsi, connector);
+};
+
+static inline struct sun6i_dsi *encoder_to_sun6i_dsi(const struct drm_encoder *encoder)
+{
+	return container_of(encoder, struct sun6i_dsi, encoder);
+};
+
+int sun6i_dphy_probe(struct sun6i_dsi *dsi, struct device_node *node);
+int sun6i_dphy_remove(struct sun6i_dsi *dsi);
+
+int sun6i_dphy_init(struct sun6i_dphy *dphy, unsigned int lanes);
+int sun6i_dphy_power_on(struct sun6i_dphy *dphy, unsigned int lanes);
+int sun6i_dphy_power_off(struct sun6i_dphy *dphy);
+int sun6i_dphy_exit(struct sun6i_dphy *dphy);
+
+#endif /* _SUN6I_MIPI_DSI_H_ */
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 9f83a65..c3afe7b2 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -163,28 +163,89 @@
 			 BLEND_COLOR_KEY_NONE;
 	u32 blendnokey = BLEND_WEIGHT1(255) | BLEND_WEIGHT0(255);
 	struct tegra_plane_state *state;
+	u32 blending[2];
 	unsigned int i;
 
-	state = to_tegra_plane_state(plane->base.state);
-
-	/* alpha contribution is 1 minus sum of overlapping windows */
-	for (i = 0; i < 3; i++) {
-		if (state->dependent[i])
-			background[i] |= BLEND_CONTROL_DEPENDENT;
-	}
-
-	/* enable alpha blending if pixel format has an alpha component */
-	if (!state->opaque)
-		foreground |= BLEND_CONTROL_ALPHA;
-
-	/*
-	 * Disable blending and assume Window A is the bottom-most window,
-	 * Window C is the top-most window and Window B is in the middle.
-	 */
+	/* disable blending for non-overlapping case */
 	tegra_plane_writel(plane, blendnokey, DC_WIN_BLEND_NOKEY);
 	tegra_plane_writel(plane, foreground, DC_WIN_BLEND_1WIN);
 
-	switch (plane->index) {
+	state = to_tegra_plane_state(plane->base.state);
+
+	if (state->opaque) {
+		/*
+		 * Since custom fix-weight blending isn't utilized and weight
+		 * of top window is set to max, we can enforce dependent
+		 * blending which in this case results in transparent bottom
+		 * window if top window is opaque and if top window enables
+		 * alpha blending, then bottom window is getting alpha value
+		 * of 1 minus the sum of alpha components of the overlapping
+		 * plane.
+		 */
+		background[0] |= BLEND_CONTROL_DEPENDENT;
+		background[1] |= BLEND_CONTROL_DEPENDENT;
+
+		/*
+		 * The region where three windows overlap is the intersection
+		 * of the two regions where two windows overlap. It contributes
+		 * to the area if all of the windows on top of it have an alpha
+		 * component.
+		 */
+		switch (state->base.normalized_zpos) {
+		case 0:
+			if (state->blending[0].alpha &&
+			    state->blending[1].alpha)
+				background[2] |= BLEND_CONTROL_DEPENDENT;
+			break;
+
+		case 1:
+			background[2] |= BLEND_CONTROL_DEPENDENT;
+			break;
+		}
+	} else {
+		/*
+		 * Enable alpha blending if pixel format has an alpha
+		 * component.
+		 */
+		foreground |= BLEND_CONTROL_ALPHA;
+
+		/*
+		 * If any of the windows on top of this window is opaque, it
+		 * will completely conceal this window within that area. If
+		 * top window has an alpha component, it is blended over the
+		 * bottom window.
+		 */
+		for (i = 0; i < 2; i++) {
+			if (state->blending[i].alpha &&
+			    state->blending[i].top)
+				background[i] |= BLEND_CONTROL_DEPENDENT;
+		}
+
+		switch (state->base.normalized_zpos) {
+		case 0:
+			if (state->blending[0].alpha &&
+			    state->blending[1].alpha)
+				background[2] |= BLEND_CONTROL_DEPENDENT;
+			break;
+
+		case 1:
+			/*
+			 * When both middle and topmost windows have an alpha,
+			 * these windows a mixed together and then the result
+			 * is blended over the bottom window.
+			 */
+			if (state->blending[0].alpha &&
+			    state->blending[0].top)
+				background[2] |= BLEND_CONTROL_ALPHA;
+
+			if (state->blending[1].alpha &&
+			    state->blending[1].top)
+				background[2] |= BLEND_CONTROL_ALPHA;
+			break;
+		}
+	}
+
+	switch (state->base.normalized_zpos) {
 	case 0:
 		tegra_plane_writel(plane, background[0], DC_WIN_BLEND_2WIN_X);
 		tegra_plane_writel(plane, background[1], DC_WIN_BLEND_2WIN_Y);
@@ -192,8 +253,21 @@
 		break;
 
 	case 1:
-		tegra_plane_writel(plane, foreground, DC_WIN_BLEND_2WIN_X);
-		tegra_plane_writel(plane, background[1], DC_WIN_BLEND_2WIN_Y);
+		/*
+		 * If window B / C is topmost, then X / Y registers are
+		 * matching the order of blending[...] state indices,
+		 * otherwise a swap is required.
+		 */
+		if (!state->blending[0].top && state->blending[1].top) {
+			blending[0] = foreground;
+			blending[1] = background[1];
+		} else {
+			blending[0] = background[0];
+			blending[1] = foreground;
+		}
+
+		tegra_plane_writel(plane, blending[0], DC_WIN_BLEND_2WIN_X);
+		tegra_plane_writel(plane, blending[1], DC_WIN_BLEND_2WIN_Y);
 		tegra_plane_writel(plane, background[2], DC_WIN_BLEND_3WIN_XY);
 		break;
 
@@ -224,6 +298,39 @@
 	tegra_plane_writel(plane, value, DC_WIN_BLEND_LAYER_CONTROL);
 }
 
+static bool
+tegra_plane_use_horizontal_filtering(struct tegra_plane *plane,
+				     const struct tegra_dc_window *window)
+{
+	struct tegra_dc *dc = plane->dc;
+
+	if (window->src.w == window->dst.w)
+		return false;
+
+	if (plane->index == 0 && dc->soc->has_win_a_without_filters)
+		return false;
+
+	return true;
+}
+
+static bool
+tegra_plane_use_vertical_filtering(struct tegra_plane *plane,
+				   const struct tegra_dc_window *window)
+{
+	struct tegra_dc *dc = plane->dc;
+
+	if (window->src.h == window->dst.h)
+		return false;
+
+	if (plane->index == 0 && dc->soc->has_win_a_without_filters)
+		return false;
+
+	if (plane->index == 2 && dc->soc->has_win_c_without_vert_filter)
+		return false;
+
+	return true;
+}
+
 static void tegra_dc_setup_window(struct tegra_plane *plane,
 				  const struct tegra_dc_window *window)
 {
@@ -361,12 +468,50 @@
 	if (window->bottom_up)
 		value |= V_DIRECTION;
 
+	if (tegra_plane_use_horizontal_filtering(plane, window)) {
+		/*
+		 * Enable horizontal 6-tap filter and set filtering
+		 * coefficients to the default values defined in TRM.
+		 */
+		tegra_plane_writel(plane, 0x00008000, DC_WIN_H_FILTER_P(0));
+		tegra_plane_writel(plane, 0x3e087ce1, DC_WIN_H_FILTER_P(1));
+		tegra_plane_writel(plane, 0x3b117ac1, DC_WIN_H_FILTER_P(2));
+		tegra_plane_writel(plane, 0x591b73aa, DC_WIN_H_FILTER_P(3));
+		tegra_plane_writel(plane, 0x57256d9a, DC_WIN_H_FILTER_P(4));
+		tegra_plane_writel(plane, 0x552f668b, DC_WIN_H_FILTER_P(5));
+		tegra_plane_writel(plane, 0x73385e8b, DC_WIN_H_FILTER_P(6));
+		tegra_plane_writel(plane, 0x72435583, DC_WIN_H_FILTER_P(7));
+		tegra_plane_writel(plane, 0x714c4c8b, DC_WIN_H_FILTER_P(8));
+		tegra_plane_writel(plane, 0x70554393, DC_WIN_H_FILTER_P(9));
+		tegra_plane_writel(plane, 0x715e389b, DC_WIN_H_FILTER_P(10));
+		tegra_plane_writel(plane, 0x71662faa, DC_WIN_H_FILTER_P(11));
+		tegra_plane_writel(plane, 0x536d25ba, DC_WIN_H_FILTER_P(12));
+		tegra_plane_writel(plane, 0x55731bca, DC_WIN_H_FILTER_P(13));
+		tegra_plane_writel(plane, 0x387a11d9, DC_WIN_H_FILTER_P(14));
+		tegra_plane_writel(plane, 0x3c7c08f1, DC_WIN_H_FILTER_P(15));
+
+		value |= H_FILTER;
+	}
+
+	if (tegra_plane_use_vertical_filtering(plane, window)) {
+		unsigned int i, k;
+
+		/*
+		 * Enable vertical 2-tap filter and set filtering
+		 * coefficients to the default values defined in TRM.
+		 */
+		for (i = 0, k = 128; i < 16; i++, k -= 8)
+			tegra_plane_writel(plane, k, DC_WIN_V_FILTER_P(i));
+
+		value |= V_FILTER;
+	}
+
 	tegra_plane_writel(plane, value, DC_WIN_WIN_OPTIONS);
 
-	if (dc->soc->supports_blending)
-		tegra_plane_setup_blending(plane, window);
-	else
+	if (dc->soc->has_legacy_blending)
 		tegra_plane_setup_blending_legacy(plane);
+	else
+		tegra_plane_setup_blending(plane, window);
 }
 
 static const u32 tegra20_primary_formats[] = {
@@ -451,17 +596,18 @@
 				    struct drm_plane_state *state)
 {
 	struct tegra_plane_state *plane_state = to_tegra_plane_state(state);
+	unsigned int rotation = DRM_MODE_ROTATE_0 | DRM_MODE_REFLECT_Y;
 	struct tegra_bo_tiling *tiling = &plane_state->tiling;
 	struct tegra_plane *tegra = to_tegra_plane(plane);
 	struct tegra_dc *dc = to_tegra_dc(state->crtc);
-	unsigned int format;
 	int err;
 
 	/* no need for further checks if the plane is being disabled */
 	if (!state->crtc)
 		return 0;
 
-	err = tegra_plane_format(state->fb->format->format, &format,
+	err = tegra_plane_format(state->fb->format->format,
+				 &plane_state->format,
 				 &plane_state->swap);
 	if (err < 0)
 		return err;
@@ -472,22 +618,12 @@
 	 * the corresponding opaque formats. However, the opaque formats can
 	 * be emulated by disabling alpha blending for the plane.
 	 */
-	if (!dc->soc->supports_blending) {
-		if (!tegra_plane_format_has_alpha(format)) {
-			err = tegra_plane_format_get_alpha(format, &format);
-			if (err < 0)
-				return err;
-
-			plane_state->opaque = true;
-		} else {
-			plane_state->opaque = false;
-		}
-
-		tegra_plane_check_dependent(tegra, plane_state);
+	if (dc->soc->has_legacy_blending) {
+		err = tegra_plane_setup_legacy_state(tegra, plane_state);
+		if (err < 0)
+			return err;
 	}
 
-	plane_state->format = format;
-
 	err = tegra_fb_get_tiling(state->fb, tiling);
 	if (err < 0)
 		return err;
@@ -498,6 +634,13 @@
 		return -EINVAL;
 	}
 
+	rotation = drm_rotation_simplify(state->rotation, rotation);
+
+	if (rotation & DRM_MODE_REFLECT_Y)
+		plane_state->bottom_up = true;
+	else
+		plane_state->bottom_up = false;
+
 	/*
 	 * Tegra doesn't support different strides for U and V planes so we
 	 * error out if the user tries to display a framebuffer with such a
@@ -558,7 +701,7 @@
 	window.dst.w = drm_rect_width(&plane->state->dst);
 	window.dst.h = drm_rect_height(&plane->state->dst);
 	window.bits_per_pixel = fb->format->cpp[0] * 8;
-	window.bottom_up = tegra_fb_is_bottom_up(fb);
+	window.bottom_up = tegra_fb_is_bottom_up(fb) || state->bottom_up;
 
 	/* copy from state */
 	window.zpos = plane->state->normalized_zpos;
@@ -639,9 +782,15 @@
 	}
 
 	drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs);
+	drm_plane_create_zpos_property(&plane->base, plane->index, 0, 255);
 
-	if (dc->soc->supports_blending)
-		drm_plane_create_zpos_property(&plane->base, 0, 0, 255);
+	err = drm_plane_create_rotation_property(&plane->base,
+						 DRM_MODE_ROTATE_0,
+						 DRM_MODE_ROTATE_0 |
+						 DRM_MODE_REFLECT_Y);
+	if (err < 0)
+		dev_err(dc->dev, "failed to create rotation property: %d\n",
+			err);
 
 	return &plane->base;
 }
@@ -918,9 +1067,15 @@
 	}
 
 	drm_plane_helper_add(&plane->base, &tegra_plane_helper_funcs);
+	drm_plane_create_zpos_property(&plane->base, plane->index, 0, 255);
 
-	if (dc->soc->supports_blending)
-		drm_plane_create_zpos_property(&plane->base, 0, 0, 255);
+	err = drm_plane_create_rotation_property(&plane->base,
+						 DRM_MODE_ROTATE_0,
+						 DRM_MODE_ROTATE_0 |
+						 DRM_MODE_REFLECT_Y);
+	if (err < 0)
+		dev_err(dc->dev, "failed to create rotation property: %d\n",
+			err);
 
 	return &plane->base;
 }
@@ -1826,7 +1981,6 @@
 static int tegra_dc_init(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
-	struct iommu_group *group = iommu_group_get(client->dev);
 	unsigned long flags = HOST1X_SYNCPT_CLIENT_MANAGED;
 	struct tegra_dc *dc = host1x_client_to_dc(client);
 	struct tegra_drm *tegra = drm->dev_private;
@@ -1838,20 +1992,11 @@
 	if (!dc->syncpt)
 		dev_warn(dc->dev, "failed to allocate syncpoint\n");
 
-	if (group && tegra->domain) {
-		if (group != tegra->group) {
-			err = iommu_attach_group(tegra->domain, group);
-			if (err < 0) {
-				dev_err(dc->dev,
-					"failed to attach to domain: %d\n",
-					err);
-				return err;
-			}
-
-			tegra->group = group;
-		}
-
-		dc->domain = tegra->domain;
+	dc->group = host1x_client_iommu_attach(client, true);
+	if (IS_ERR(dc->group)) {
+		err = PTR_ERR(dc->group);
+		dev_err(client->dev, "failed to attach to domain: %d\n", err);
+		return err;
 	}
 
 	if (dc->soc->wgrps)
@@ -1916,24 +2061,15 @@
 	if (!IS_ERR(primary))
 		drm_plane_cleanup(primary);
 
-	if (group && dc->domain) {
-		if (group == tegra->group) {
-			iommu_detach_group(dc->domain, group);
-			tegra->group = NULL;
-		}
-
-		dc->domain = NULL;
-	}
+	host1x_client_iommu_detach(client, dc->group);
+	host1x_syncpt_free(dc->syncpt);
 
 	return err;
 }
 
 static int tegra_dc_exit(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
-	struct iommu_group *group = iommu_group_get(client->dev);
 	struct tegra_dc *dc = host1x_client_to_dc(client);
-	struct tegra_drm *tegra = drm->dev_private;
 	int err;
 
 	devm_free_irq(dc->dev, dc->irq, dc);
@@ -1944,15 +2080,7 @@
 		return err;
 	}
 
-	if (group && dc->domain) {
-		if (group == tegra->group) {
-			iommu_detach_group(dc->domain, group);
-			tegra->group = NULL;
-		}
-
-		dc->domain = NULL;
-	}
-
+	host1x_client_iommu_detach(client, dc->group);
 	host1x_syncpt_free(dc->syncpt);
 
 	return 0;
@@ -1968,7 +2096,7 @@
 	.supports_interlacing = false,
 	.supports_cursor = false,
 	.supports_block_linear = false,
-	.supports_blending = false,
+	.has_legacy_blending = true,
 	.pitch_align = 8,
 	.has_powergate = false,
 	.coupled_pm = true,
@@ -1978,6 +2106,8 @@
 	.num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats),
 	.overlay_formats = tegra20_overlay_formats,
 	.modifiers = tegra20_modifiers,
+	.has_win_a_without_filters = true,
+	.has_win_c_without_vert_filter = true,
 };
 
 static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
@@ -1985,7 +2115,7 @@
 	.supports_interlacing = false,
 	.supports_cursor = false,
 	.supports_block_linear = false,
-	.supports_blending = false,
+	.has_legacy_blending = true,
 	.pitch_align = 8,
 	.has_powergate = false,
 	.coupled_pm = false,
@@ -1995,6 +2125,8 @@
 	.num_overlay_formats = ARRAY_SIZE(tegra20_overlay_formats),
 	.overlay_formats = tegra20_overlay_formats,
 	.modifiers = tegra20_modifiers,
+	.has_win_a_without_filters = false,
+	.has_win_c_without_vert_filter = false,
 };
 
 static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
@@ -2002,7 +2134,7 @@
 	.supports_interlacing = false,
 	.supports_cursor = false,
 	.supports_block_linear = false,
-	.supports_blending = false,
+	.has_legacy_blending = true,
 	.pitch_align = 64,
 	.has_powergate = true,
 	.coupled_pm = false,
@@ -2012,6 +2144,8 @@
 	.num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats),
 	.overlay_formats = tegra114_overlay_formats,
 	.modifiers = tegra20_modifiers,
+	.has_win_a_without_filters = false,
+	.has_win_c_without_vert_filter = false,
 };
 
 static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
@@ -2019,7 +2153,7 @@
 	.supports_interlacing = true,
 	.supports_cursor = true,
 	.supports_block_linear = true,
-	.supports_blending = true,
+	.has_legacy_blending = false,
 	.pitch_align = 64,
 	.has_powergate = true,
 	.coupled_pm = false,
@@ -2029,6 +2163,8 @@
 	.num_overlay_formats = ARRAY_SIZE(tegra124_overlay_formats),
 	.overlay_formats = tegra124_overlay_formats,
 	.modifiers = tegra124_modifiers,
+	.has_win_a_without_filters = false,
+	.has_win_c_without_vert_filter = false,
 };
 
 static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
@@ -2036,7 +2172,7 @@
 	.supports_interlacing = true,
 	.supports_cursor = true,
 	.supports_block_linear = true,
-	.supports_blending = true,
+	.has_legacy_blending = false,
 	.pitch_align = 64,
 	.has_powergate = true,
 	.coupled_pm = false,
@@ -2046,6 +2182,8 @@
 	.num_overlay_formats = ARRAY_SIZE(tegra114_overlay_formats),
 	.overlay_formats = tegra114_overlay_formats,
 	.modifiers = tegra124_modifiers,
+	.has_win_a_without_filters = false,
+	.has_win_c_without_vert_filter = false,
 };
 
 static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
@@ -2087,7 +2225,7 @@
 	.supports_interlacing = true,
 	.supports_cursor = true,
 	.supports_block_linear = true,
-	.supports_blending = true,
+	.has_legacy_blending = false,
 	.pitch_align = 64,
 	.has_powergate = false,
 	.coupled_pm = false,
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index d2b50d3..e96f582 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -55,7 +55,7 @@
 	bool supports_interlacing;
 	bool supports_cursor;
 	bool supports_block_linear;
-	bool supports_blending;
+	bool has_legacy_blending;
 	unsigned int pitch_align;
 	bool has_powergate;
 	bool coupled_pm;
@@ -67,6 +67,8 @@
 	const u32 *overlay_formats;
 	unsigned int num_overlay_formats;
 	const u64 *modifiers;
+	bool has_win_a_without_filters;
+	bool has_win_c_without_vert_filter;
 };
 
 struct tegra_dc {
@@ -92,7 +94,7 @@
 
 	const struct tegra_dc_soc_info *soc;
 
-	struct iommu_domain *domain;
+	struct iommu_group *group;
 };
 
 static inline struct tegra_dc *
@@ -553,6 +555,9 @@
 #define  THREAD_NUM(x) (((x) & 0x1f) << 1)
 #define  THREAD_GROUP_ENABLE (1 << 0)
 
+#define DC_WIN_H_FILTER_P(p)			(0x601 + (p))
+#define DC_WIN_V_FILTER_P(p)			(0x619 + (p))
+
 #define DC_WIN_CSC_YOF				0x611
 #define DC_WIN_CSC_KYRGB			0x612
 #define DC_WIN_CSC_KUR				0x613
@@ -566,6 +571,8 @@
 #define H_DIRECTION  (1 <<  0)
 #define V_DIRECTION  (1 <<  2)
 #define COLOR_EXPAND (1 <<  6)
+#define H_FILTER     (1 <<  8)
+#define V_FILTER     (1 << 10)
 #define CSC_ENABLE   (1 << 18)
 #define WIN_ENABLE   (1 << 30)
 
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 7afe2f6..776c151 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -38,26 +38,11 @@
 {
 	int err;
 
-	err = drm_atomic_helper_check_modeset(drm, state);
+	err = drm_atomic_helper_check(drm, state);
 	if (err < 0)
 		return err;
 
-	err = tegra_display_hub_atomic_check(drm, state);
-	if (err < 0)
-		return err;
-
-	err = drm_atomic_normalize_zpos(drm, state);
-	if (err < 0)
-		return err;
-
-	err = drm_atomic_helper_check_planes(drm, state);
-	if (err < 0)
-		return err;
-
-	if (state->legacy_cursor_update)
-		state->async_update = !drm_atomic_helper_async_check(drm, state);
-
-	return 0;
+	return tegra_display_hub_atomic_check(drm, state);
 }
 
 static const struct drm_mode_config_funcs tegra_drm_mode_config_funcs = {
@@ -113,6 +98,10 @@
 			goto free;
 		}
 
+		err = iova_cache_get();
+		if (err < 0)
+			goto domain;
+
 		geometry = &tegra->domain->geometry;
 		gem_start = geometry->aperture_start;
 		gem_end = geometry->aperture_end - CARVEOUT_SZ;
@@ -151,6 +140,8 @@
 
 	drm->mode_config.allow_fb_modifiers = true;
 
+	drm->mode_config.normalize_zpos = true;
+
 	drm->mode_config.funcs = &tegra_drm_mode_config_funcs;
 	drm->mode_config.helper_private = &tegra_drm_mode_config_helpers;
 
@@ -204,11 +195,14 @@
 	drm_mode_config_cleanup(drm);
 
 	if (tegra->domain) {
-		iommu_domain_free(tegra->domain);
-		drm_mm_takedown(&tegra->mm);
 		mutex_destroy(&tegra->mm_lock);
+		drm_mm_takedown(&tegra->mm);
 		put_iova_domain(&tegra->carveout.domain);
+		iova_cache_put();
 	}
+domain:
+	if (tegra->domain)
+		iommu_domain_free(tegra->domain);
 free:
 	kfree(tegra);
 	return err;
@@ -230,10 +224,11 @@
 		return;
 
 	if (tegra->domain) {
-		iommu_domain_free(tegra->domain);
-		drm_mm_takedown(&tegra->mm);
 		mutex_destroy(&tegra->mm_lock);
+		drm_mm_takedown(&tegra->mm);
 		put_iova_domain(&tegra->carveout.domain);
+		iova_cache_put();
+		iommu_domain_free(tegra->domain);
 	}
 
 	kfree(tegra);
@@ -313,46 +308,15 @@
 	return 0;
 }
 
-static int host1x_waitchk_copy_from_user(struct host1x_waitchk *dest,
-					 struct drm_tegra_waitchk __user *src,
-					 struct drm_file *file)
-{
-	u32 cmdbuf;
-	int err;
-
-	err = get_user(cmdbuf, &src->handle);
-	if (err < 0)
-		return err;
-
-	err = get_user(dest->offset, &src->offset);
-	if (err < 0)
-		return err;
-
-	err = get_user(dest->syncpt_id, &src->syncpt);
-	if (err < 0)
-		return err;
-
-	err = get_user(dest->thresh, &src->thresh);
-	if (err < 0)
-		return err;
-
-	dest->bo = host1x_bo_lookup(file, cmdbuf);
-	if (!dest->bo)
-		return -ENOENT;
-
-	return 0;
-}
-
 int tegra_drm_submit(struct tegra_drm_context *context,
 		     struct drm_tegra_submit *args, struct drm_device *drm,
 		     struct drm_file *file)
 {
+	struct host1x_client *client = &context->client->base;
 	unsigned int num_cmdbufs = args->num_cmdbufs;
 	unsigned int num_relocs = args->num_relocs;
-	unsigned int num_waitchks = args->num_waitchks;
 	struct drm_tegra_cmdbuf __user *user_cmdbufs;
 	struct drm_tegra_reloc __user *user_relocs;
-	struct drm_tegra_waitchk __user *user_waitchks;
 	struct drm_tegra_syncpt __user *user_syncpt;
 	struct drm_tegra_syncpt syncpt;
 	struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
@@ -364,7 +328,6 @@
 
 	user_cmdbufs = u64_to_user_ptr(args->cmdbufs);
 	user_relocs = u64_to_user_ptr(args->relocs);
-	user_waitchks = u64_to_user_ptr(args->waitchks);
 	user_syncpt = u64_to_user_ptr(args->syncpts);
 
 	/* We don't yet support other than one syncpt_incr struct per submit */
@@ -376,21 +339,20 @@
 		return -EINVAL;
 
 	job = host1x_job_alloc(context->channel, args->num_cmdbufs,
-			       args->num_relocs, args->num_waitchks);
+			       args->num_relocs);
 	if (!job)
 		return -ENOMEM;
 
 	job->num_relocs = args->num_relocs;
-	job->num_waitchk = args->num_waitchks;
-	job->client = (u32)args->context;
-	job->class = context->client->base.class;
+	job->client = client;
+	job->class = client->class;
 	job->serialize = true;
 
 	/*
 	 * Track referenced BOs so that they can be unreferenced after the
 	 * submission is complete.
 	 */
-	num_refs = num_cmdbufs + num_relocs * 2 + num_waitchks;
+	num_refs = num_cmdbufs + num_relocs * 2;
 
 	refs = kmalloc_array(num_refs, sizeof(*refs), GFP_KERNEL);
 	if (!refs) {
@@ -451,13 +413,13 @@
 		struct host1x_reloc *reloc;
 		struct tegra_bo *obj;
 
-		err = host1x_reloc_copy_from_user(&job->relocarray[num_relocs],
+		err = host1x_reloc_copy_from_user(&job->relocs[num_relocs],
 						  &user_relocs[num_relocs], drm,
 						  file);
 		if (err < 0)
 			goto fail;
 
-		reloc = &job->relocarray[num_relocs];
+		reloc = &job->relocs[num_relocs];
 		obj = host1x_to_tegra_bo(reloc->cmdbuf.bo);
 		refs[num_refs++] = &obj->gem;
 
@@ -481,30 +443,6 @@
 		}
 	}
 
-	/* copy and resolve waitchks from submit */
-	while (num_waitchks--) {
-		struct host1x_waitchk *wait = &job->waitchk[num_waitchks];
-		struct tegra_bo *obj;
-
-		err = host1x_waitchk_copy_from_user(
-			wait, &user_waitchks[num_waitchks], file);
-		if (err < 0)
-			goto fail;
-
-		obj = host1x_to_tegra_bo(wait->bo);
-		refs[num_refs++] = &obj->gem;
-
-		/*
-		 * The unaligned offset will cause an unaligned write during
-		 * of the waitchks patching, corrupting the commands stream.
-		 */
-		if (wait->offset & 3 ||
-		    wait->offset >= obj->gem.size) {
-			err = -EINVAL;
-			goto fail;
-		}
-	}
-
 	if (copy_from_user(&syncpt, user_syncpt, sizeof(syncpt))) {
 		err = -EFAULT;
 		goto fail;
@@ -1114,6 +1052,52 @@
 	return 0;
 }
 
+struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client,
+					       bool shared)
+{
+	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct tegra_drm *tegra = drm->dev_private;
+	struct iommu_group *group = NULL;
+	int err;
+
+	if (tegra->domain) {
+		group = iommu_group_get(client->dev);
+		if (!group) {
+			dev_err(client->dev, "failed to get IOMMU group\n");
+			return ERR_PTR(-ENODEV);
+		}
+
+		if (!shared || (shared && (group != tegra->group))) {
+			err = iommu_attach_group(tegra->domain, group);
+			if (err < 0) {
+				iommu_group_put(group);
+				return ERR_PTR(err);
+			}
+
+			if (shared && !tegra->group)
+				tegra->group = group;
+		}
+	}
+
+	return group;
+}
+
+void host1x_client_iommu_detach(struct host1x_client *client,
+				struct iommu_group *group)
+{
+	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct tegra_drm *tegra = drm->dev_private;
+
+	if (group) {
+		if (group == tegra->group) {
+			iommu_detach_group(tegra->domain, group);
+			tegra->group = NULL;
+		}
+
+		iommu_group_put(group);
+	}
+}
+
 void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma)
 {
 	struct iova *alloc;
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 4f41aae..92d2487 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -29,16 +29,10 @@
 
 struct reset_control;
 
-struct tegra_fb {
-	struct drm_framebuffer base;
-	struct tegra_bo **planes;
-	unsigned int num_planes;
-};
-
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 struct tegra_fbdev {
 	struct drm_fb_helper base;
-	struct tegra_fb *fb;
+	struct drm_framebuffer *fb;
 };
 #endif
 
@@ -97,6 +91,7 @@
 	struct host1x_client base;
 	struct list_head list;
 
+	unsigned int version;
 	const struct tegra_drm_client_ops *ops;
 };
 
@@ -110,6 +105,10 @@
 			      struct tegra_drm_client *client);
 int tegra_drm_unregister_client(struct tegra_drm *tegra,
 				struct tegra_drm_client *client);
+struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client,
+					       bool shared);
+void host1x_client_iommu_detach(struct host1x_client *client,
+				struct iommu_group *group);
 
 int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm);
 int tegra_drm_exit(struct tegra_drm *tegra);
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index e694349..4c22cdd 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -14,11 +14,7 @@
 
 #include "drm.h"
 #include "gem.h"
-
-static inline struct tegra_fb *to_tegra_fb(struct drm_framebuffer *fb)
-{
-	return container_of(fb, struct tegra_fb, base);
-}
+#include <drm/drm_gem_framebuffer_helper.h>
 
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 static inline struct tegra_fbdev *to_tegra_fbdev(struct drm_fb_helper *helper)
@@ -30,19 +26,14 @@
 struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer,
 				    unsigned int index)
 {
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
-
-	if (index >= framebuffer->format->num_planes)
-		return NULL;
-
-	return fb->planes[index];
+	return to_tegra_bo(drm_gem_fb_get_obj(framebuffer, index));
 }
 
 bool tegra_fb_is_bottom_up(struct drm_framebuffer *framebuffer)
 {
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
+	struct tegra_bo *bo = tegra_fb_get_plane(framebuffer, 0);
 
-	if (fb->planes[0]->flags & TEGRA_BO_BOTTOM_UP)
+	if (bo->flags & TEGRA_BO_BOTTOM_UP)
 		return true;
 
 	return false;
@@ -51,8 +42,7 @@
 int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer,
 			struct tegra_bo_tiling *tiling)
 {
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
-	uint64_t modifier = fb->base.modifier;
+	uint64_t modifier = framebuffer->modifier;
 
 	switch (modifier) {
 	case DRM_FORMAT_MOD_LINEAR:
@@ -102,46 +92,17 @@
 	return 0;
 }
 
-static void tegra_fb_destroy(struct drm_framebuffer *framebuffer)
-{
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
-	unsigned int i;
-
-	for (i = 0; i < fb->num_planes; i++) {
-		struct tegra_bo *bo = fb->planes[i];
-
-		if (bo) {
-			if (bo->pages)
-				vunmap(bo->vaddr);
-
-			drm_gem_object_put_unlocked(&bo->gem);
-		}
-	}
-
-	drm_framebuffer_cleanup(framebuffer);
-	kfree(fb->planes);
-	kfree(fb);
-}
-
-static int tegra_fb_create_handle(struct drm_framebuffer *framebuffer,
-				  struct drm_file *file, unsigned int *handle)
-{
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
-
-	return drm_gem_handle_create(file, &fb->planes[0]->gem, handle);
-}
-
 static const struct drm_framebuffer_funcs tegra_fb_funcs = {
-	.destroy = tegra_fb_destroy,
-	.create_handle = tegra_fb_create_handle,
+	.destroy = drm_gem_fb_destroy,
+	.create_handle = drm_gem_fb_create_handle,
 };
 
-static struct tegra_fb *tegra_fb_alloc(struct drm_device *drm,
-				       const struct drm_mode_fb_cmd2 *mode_cmd,
-				       struct tegra_bo **planes,
-				       unsigned int num_planes)
+static struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm,
+					      const struct drm_mode_fb_cmd2 *mode_cmd,
+					      struct tegra_bo **planes,
+					      unsigned int num_planes)
 {
-	struct tegra_fb *fb;
+	struct drm_framebuffer *fb;
 	unsigned int i;
 	int err;
 
@@ -149,24 +110,15 @@
 	if (!fb)
 		return ERR_PTR(-ENOMEM);
 
-	fb->planes = kzalloc(num_planes * sizeof(*planes), GFP_KERNEL);
-	if (!fb->planes) {
-		kfree(fb);
-		return ERR_PTR(-ENOMEM);
-	}
+	drm_helper_mode_fill_fb_struct(drm, fb, mode_cmd);
 
-	fb->num_planes = num_planes;
+	for (i = 0; i < fb->format->num_planes; i++)
+		fb->obj[i] = &planes[i]->gem;
 
-	drm_helper_mode_fill_fb_struct(drm, &fb->base, mode_cmd);
-
-	for (i = 0; i < fb->num_planes; i++)
-		fb->planes[i] = planes[i];
-
-	err = drm_framebuffer_init(drm, &fb->base, &tegra_fb_funcs);
+	err = drm_framebuffer_init(drm, fb, &tegra_fb_funcs);
 	if (err < 0) {
 		dev_err(drm->dev, "failed to initialize framebuffer: %d\n",
 			err);
-		kfree(fb->planes);
 		kfree(fb);
 		return ERR_PTR(err);
 	}
@@ -181,7 +133,7 @@
 	unsigned int hsub, vsub, i;
 	struct tegra_bo *planes[4];
 	struct drm_gem_object *gem;
-	struct tegra_fb *fb;
+	struct drm_framebuffer *fb;
 	int err;
 
 	hsub = drm_format_horz_chroma_subsampling(cmd->pixel_format);
@@ -217,7 +169,7 @@
 		goto unreference;
 	}
 
-	return &fb->base;
+	return fb;
 
 unreference:
 	while (i--)
@@ -298,7 +250,7 @@
 		return PTR_ERR(fbdev->fb);
 	}
 
-	fb = &fbdev->fb->base;
+	fb = fbdev->fb;
 	helper->fb = fb;
 	helper->fbdev = info;
 
@@ -398,8 +350,17 @@
 {
 	drm_fb_helper_unregister_fbi(&fbdev->base);
 
-	if (fbdev->fb)
-		drm_framebuffer_remove(&fbdev->fb->base);
+	if (fbdev->fb) {
+		struct tegra_bo *bo = tegra_fb_get_plane(fbdev->fb, 0);
+
+		/* Undo the special mapping we made in fbdev probe. */
+		if (bo && bo->pages) {
+			vunmap(bo->vaddr);
+			bo->vaddr = 0;
+		}
+
+		drm_framebuffer_remove(fbdev->fb);
+	}
 
 	drm_fb_helper_fini(&fbdev->base);
 	tegra_fbdev_free(fbdev);
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 8b0b4ff..00a5c9f 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -422,14 +422,13 @@
 	return 0;
 }
 
-static int tegra_bo_fault(struct vm_fault *vmf)
+static vm_fault_t tegra_bo_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct drm_gem_object *gem = vma->vm_private_data;
 	struct tegra_bo *bo = to_tegra_bo(gem);
 	struct page *page;
 	pgoff_t offset;
-	int err;
 
 	if (!bo->pages)
 		return VM_FAULT_SIGBUS;
@@ -437,20 +436,7 @@
 	offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 	page = bo->pages[offset];
 
-	err = vm_insert_page(vma, vmf->address, page);
-	switch (err) {
-	case -EAGAIN:
-	case 0:
-	case -ERESTARTSYS:
-	case -EINTR:
-	case -EBUSY:
-		return VM_FAULT_NOPAGE;
-
-	case -ENOMEM:
-		return VM_FAULT_OOM;
-	}
-
-	return VM_FAULT_SIGBUS;
+	return vmf_insert_page(vma, vmf->address, page);
 }
 
 const struct vm_operations_struct tegra_bo_vm_ops = {
@@ -663,6 +649,8 @@
 {
 	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
 
+	exp_info.exp_name = KBUILD_MODNAME;
+	exp_info.owner = drm->driver->fops->owner;
 	exp_info.ops = &tegra_gem_prime_dmabuf_ops;
 	exp_info.size = gem->size;
 	exp_info.flags = flags;
diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 9a8ea93..673059f 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -7,16 +7,25 @@
  */
 
 #include <linux/clk.h>
+#include <linux/iommu.h>
+#include <linux/of_device.h>
 
 #include "drm.h"
 #include "gem.h"
 #include "gr2d.h"
 
+struct gr2d_soc {
+	unsigned int version;
+};
+
 struct gr2d {
+	struct iommu_group *group;
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
 	struct clk *clk;
 
+	const struct gr2d_soc *soc;
+
 	DECLARE_BITMAP(addr_regs, GR2D_NUM_REGS);
 };
 
@@ -31,6 +40,7 @@
 	struct drm_device *dev = dev_get_drvdata(client->parent);
 	unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
 	struct gr2d *gr2d = to_gr2d(drm);
+	int err;
 
 	gr2d->channel = host1x_channel_request(client->dev);
 	if (!gr2d->channel)
@@ -38,24 +48,48 @@
 
 	client->syncpts[0] = host1x_syncpt_request(client, flags);
 	if (!client->syncpts[0]) {
-		host1x_channel_put(gr2d->channel);
-		return -ENOMEM;
+		err = -ENOMEM;
+		dev_err(client->dev, "failed to request syncpoint: %d\n", err);
+		goto put;
 	}
 
-	return tegra_drm_register_client(dev->dev_private, drm);
+	gr2d->group = host1x_client_iommu_attach(client, false);
+	if (IS_ERR(gr2d->group)) {
+		err = PTR_ERR(gr2d->group);
+		dev_err(client->dev, "failed to attach to domain: %d\n", err);
+		goto free;
+	}
+
+	err = tegra_drm_register_client(dev->dev_private, drm);
+	if (err < 0) {
+		dev_err(client->dev, "failed to register client: %d\n", err);
+		goto detach;
+	}
+
+	return 0;
+
+detach:
+	host1x_client_iommu_detach(client, gr2d->group);
+free:
+	host1x_syncpt_free(client->syncpts[0]);
+put:
+	host1x_channel_put(gr2d->channel);
+	return err;
 }
 
 static int gr2d_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
 	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct tegra_drm *tegra = dev->dev_private;
 	struct gr2d *gr2d = to_gr2d(drm);
 	int err;
 
-	err = tegra_drm_unregister_client(dev->dev_private, drm);
+	err = tegra_drm_unregister_client(tegra, drm);
 	if (err < 0)
 		return err;
 
+	host1x_client_iommu_detach(client, gr2d->group);
 	host1x_syncpt_free(client->syncpts[0]);
 	host1x_channel_put(gr2d->channel);
 
@@ -123,9 +157,17 @@
 	.submit = tegra_drm_submit,
 };
 
+static const struct gr2d_soc tegra20_gr2d_soc = {
+	.version = 0x20,
+};
+
+static const struct gr2d_soc tegra30_gr2d_soc = {
+	.version = 0x30,
+};
+
 static const struct of_device_id gr2d_match[] = {
-	{ .compatible = "nvidia,tegra30-gr2d" },
-	{ .compatible = "nvidia,tegra20-gr2d" },
+	{ .compatible = "nvidia,tegra30-gr2d", .data = &tegra20_gr2d_soc },
+	{ .compatible = "nvidia,tegra20-gr2d", .data = &tegra30_gr2d_soc },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, gr2d_match);
@@ -158,6 +200,8 @@
 	if (!gr2d)
 		return -ENOMEM;
 
+	gr2d->soc = of_device_get_match_data(dev);
+
 	syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
 	if (!syncpts)
 		return -ENOMEM;
@@ -182,6 +226,7 @@
 	gr2d->client.base.num_syncpts = 1;
 
 	INIT_LIST_HEAD(&gr2d->client.list);
+	gr2d->client.version = gr2d->soc->version;
 	gr2d->client.ops = &gr2d_ops;
 
 	err = host1x_client_register(&gr2d->client.base);
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 28c4ef6..4778ae9 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -9,7 +9,9 @@
 
 #include <linux/clk.h>
 #include <linux/host1x.h>
+#include <linux/iommu.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 
@@ -19,7 +21,12 @@
 #include "gem.h"
 #include "gr3d.h"
 
+struct gr3d_soc {
+	unsigned int version;
+};
+
 struct gr3d {
+	struct iommu_group *group;
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
 	struct clk *clk_secondary;
@@ -27,6 +34,8 @@
 	struct reset_control *rst_secondary;
 	struct reset_control *rst;
 
+	const struct gr3d_soc *soc;
+
 	DECLARE_BITMAP(addr_regs, GR3D_NUM_REGS);
 };
 
@@ -41,6 +50,7 @@
 	struct drm_device *dev = dev_get_drvdata(client->parent);
 	unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
 	struct gr3d *gr3d = to_gr3d(drm);
+	int err;
 
 	gr3d->channel = host1x_channel_request(client->dev);
 	if (!gr3d->channel)
@@ -48,11 +58,33 @@
 
 	client->syncpts[0] = host1x_syncpt_request(client, flags);
 	if (!client->syncpts[0]) {
-		host1x_channel_put(gr3d->channel);
-		return -ENOMEM;
+		err = -ENOMEM;
+		dev_err(client->dev, "failed to request syncpoint: %d\n", err);
+		goto put;
 	}
 
-	return tegra_drm_register_client(dev->dev_private, drm);
+	gr3d->group = host1x_client_iommu_attach(client, false);
+	if (IS_ERR(gr3d->group)) {
+		err = PTR_ERR(gr3d->group);
+		dev_err(client->dev, "failed to attach to domain: %d\n", err);
+		goto free;
+	}
+
+	err = tegra_drm_register_client(dev->dev_private, drm);
+	if (err < 0) {
+		dev_err(client->dev, "failed to register client: %d\n", err);
+		goto detach;
+	}
+
+	return 0;
+
+detach:
+	host1x_client_iommu_detach(client, gr3d->group);
+free:
+	host1x_syncpt_free(client->syncpts[0]);
+put:
+	host1x_channel_put(gr3d->channel);
+	return err;
 }
 
 static int gr3d_exit(struct host1x_client *client)
@@ -66,6 +98,7 @@
 	if (err < 0)
 		return err;
 
+	host1x_client_iommu_detach(client, gr3d->group);
 	host1x_syncpt_free(client->syncpts[0]);
 	host1x_channel_put(gr3d->channel);
 
@@ -125,10 +158,22 @@
 	.submit = tegra_drm_submit,
 };
 
+static const struct gr3d_soc tegra20_gr3d_soc = {
+	.version = 0x20,
+};
+
+static const struct gr3d_soc tegra30_gr3d_soc = {
+	.version = 0x30,
+};
+
+static const struct gr3d_soc tegra114_gr3d_soc = {
+	.version = 0x35,
+};
+
 static const struct of_device_id tegra_gr3d_match[] = {
-	{ .compatible = "nvidia,tegra114-gr3d" },
-	{ .compatible = "nvidia,tegra30-gr3d" },
-	{ .compatible = "nvidia,tegra20-gr3d" },
+	{ .compatible = "nvidia,tegra114-gr3d", .data = &tegra114_gr3d_soc },
+	{ .compatible = "nvidia,tegra30-gr3d", .data = &tegra30_gr3d_soc },
+	{ .compatible = "nvidia,tegra20-gr3d", .data = &tegra20_gr3d_soc },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, tegra_gr3d_match);
@@ -250,6 +295,8 @@
 	if (!gr3d)
 		return -ENOMEM;
 
+	gr3d->soc = of_device_get_match_data(&pdev->dev);
+
 	syncpts = devm_kzalloc(&pdev->dev, sizeof(*syncpts), GFP_KERNEL);
 	if (!syncpts)
 		return -ENOMEM;
@@ -307,6 +354,7 @@
 	gr3d->client.base.num_syncpts = 1;
 
 	INIT_LIST_HEAD(&gr3d->client.list);
+	gr3d->client.version = gr3d->soc->version;
 	gr3d->client.ops = &gr3d_ops;
 
 	err = host1x_client_register(&gr3d->client.base);
diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
index 9a3f23d..8f4fcbb 100644
--- a/drivers/gpu/drm/tegra/hub.c
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -687,7 +687,7 @@
 	struct device *dev = hub->client.dev;
 	int err;
 
-	hub_state = tegra_display_hub_get_state(hub, state);
+	hub_state = to_tegra_display_hub_state(hub->base.state);
 
 	if (hub_state->clk) {
 		err = clk_set_rate(hub_state->clk, hub_state->rate);
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c
index 176ef46..d068e8a 100644
--- a/drivers/gpu/drm/tegra/plane.c
+++ b/drivers/gpu/drm/tegra/plane.c
@@ -23,6 +23,7 @@
 
 static void tegra_plane_reset(struct drm_plane *plane)
 {
+	struct tegra_plane *p = to_tegra_plane(plane);
 	struct tegra_plane_state *state;
 
 	if (plane->state)
@@ -35,6 +36,8 @@
 	if (state) {
 		plane->state = &state->base;
 		plane->state->plane = plane;
+		plane->state->zpos = p->index;
+		plane->state->normalized_zpos = p->index;
 	}
 }
 
@@ -53,10 +56,11 @@
 	copy->tiling = state->tiling;
 	copy->format = state->format;
 	copy->swap = state->swap;
+	copy->bottom_up = state->bottom_up;
 	copy->opaque = state->opaque;
 
-	for (i = 0; i < 3; i++)
-		copy->dependent[i] = state->dependent[i];
+	for (i = 0; i < 2; i++)
+		copy->blending[i] = state->blending[i];
 
 	return &copy->base;
 }
@@ -267,24 +271,8 @@
 	return false;
 }
 
-/*
- * This is applicable to Tegra20 and Tegra30 only where the opaque formats can
- * be emulated using the alpha formats and alpha blending disabled.
- */
-bool tegra_plane_format_has_alpha(unsigned int format)
-{
-	switch (format) {
-	case WIN_COLOR_DEPTH_B5G5R5A1:
-	case WIN_COLOR_DEPTH_A1B5G5R5:
-	case WIN_COLOR_DEPTH_R8G8B8A8:
-	case WIN_COLOR_DEPTH_B8G8R8A8:
-		return true;
-	}
-
-	return false;
-}
-
-int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha)
+static int tegra_plane_format_get_alpha(unsigned int opaque,
+					unsigned int *alpha)
 {
 	if (tegra_plane_format_is_yuv(opaque, NULL)) {
 		*alpha = opaque;
@@ -316,6 +304,67 @@
 	return -EINVAL;
 }
 
+/*
+ * This is applicable to Tegra20 and Tegra30 only where the opaque formats can
+ * be emulated using the alpha formats and alpha blending disabled.
+ */
+static int tegra_plane_setup_opacity(struct tegra_plane *tegra,
+				     struct tegra_plane_state *state)
+{
+	unsigned int format;
+	int err;
+
+	switch (state->format) {
+	case WIN_COLOR_DEPTH_B5G5R5A1:
+	case WIN_COLOR_DEPTH_A1B5G5R5:
+	case WIN_COLOR_DEPTH_R8G8B8A8:
+	case WIN_COLOR_DEPTH_B8G8R8A8:
+		state->opaque = false;
+		break;
+
+	default:
+		err = tegra_plane_format_get_alpha(state->format, &format);
+		if (err < 0)
+			return err;
+
+		state->format = format;
+		state->opaque = true;
+		break;
+	}
+
+	return 0;
+}
+
+static int tegra_plane_check_transparency(struct tegra_plane *tegra,
+					  struct tegra_plane_state *state)
+{
+	struct drm_plane_state *old, *plane_state;
+	struct drm_plane *plane;
+
+	old = drm_atomic_get_old_plane_state(state->base.state, &tegra->base);
+
+	/* check if zpos / transparency changed */
+	if (old->normalized_zpos == state->base.normalized_zpos &&
+	    to_tegra_plane_state(old)->opaque == state->opaque)
+		return 0;
+
+	/* include all sibling planes into this commit */
+	drm_for_each_plane(plane, tegra->base.dev) {
+		struct tegra_plane *p = to_tegra_plane(plane);
+
+		/* skip this plane and planes on different CRTCs */
+		if (p == tegra || p->dc != tegra->dc)
+			continue;
+
+		plane_state = drm_atomic_get_plane_state(state->base.state,
+							 plane);
+		if (IS_ERR(plane_state))
+			return PTR_ERR(plane_state);
+	}
+
+	return 1;
+}
+
 static unsigned int tegra_plane_get_overlap_index(struct tegra_plane *plane,
 						  struct tegra_plane *other)
 {
@@ -336,61 +385,98 @@
 	return index;
 }
 
-void tegra_plane_check_dependent(struct tegra_plane *tegra,
-				 struct tegra_plane_state *state)
+static void tegra_plane_update_transparency(struct tegra_plane *tegra,
+					    struct tegra_plane_state *state)
 {
-	struct drm_plane_state *old, *new;
+	struct drm_plane_state *new;
 	struct drm_plane *plane;
-	unsigned int zpos[2];
 	unsigned int i;
 
-	for (i = 0; i < 2; i++)
-		zpos[i] = 0;
-
-	for_each_oldnew_plane_in_state(state->base.state, plane, old, new, i) {
+	for_each_new_plane_in_state(state->base.state, plane, new, i) {
 		struct tegra_plane *p = to_tegra_plane(plane);
 		unsigned index;
 
 		/* skip this plane and planes on different CRTCs */
-		if (p == tegra || new->crtc != state->base.crtc)
+		if (p == tegra || p->dc != tegra->dc)
 			continue;
 
 		index = tegra_plane_get_overlap_index(tegra, p);
 
-		state->dependent[index] = false;
+		if (new->fb && __drm_format_has_alpha(new->fb->format->format))
+			state->blending[index].alpha = true;
+		else
+			state->blending[index].alpha = false;
+
+		if (new->normalized_zpos > state->base.normalized_zpos)
+			state->blending[index].top = true;
+		else
+			state->blending[index].top = false;
 
 		/*
-		 * If any of the other planes is on top of this plane and uses
-		 * a format with an alpha component, mark this plane as being
-		 * dependent, meaning it's alpha value will be 1 minus the sum
-		 * of alpha components of the overlapping planes.
+		 * Missing framebuffer means that plane is disabled, in this
+		 * case mark B / C window as top to be able to differentiate
+		 * windows indices order in regards to zPos for the middle
+		 * window X / Y registers programming.
 		 */
-		if (p->index > tegra->index) {
-			if (__drm_format_has_alpha(new->fb->format->format))
-				state->dependent[index] = true;
-
-			/* keep track of the Z position */
-			zpos[index] = p->index;
-		}
+		if (!new->fb)
+			state->blending[index].top = (index == 1);
 	}
+}
+
+static int tegra_plane_setup_transparency(struct tegra_plane *tegra,
+					  struct tegra_plane_state *state)
+{
+	struct tegra_plane_state *tegra_state;
+	struct drm_plane_state *new;
+	struct drm_plane *plane;
+	int err;
 
 	/*
-	 * The region where three windows overlap is the intersection of the
-	 * two regions where two windows overlap. It contributes to the area
-	 * if any of the windows on top of it have an alpha component.
+	 * If planes zpos / transparency changed, sibling planes blending
+	 * state may require adjustment and in this case they will be included
+	 * into this atom commit, otherwise blending state is unchanged.
 	 */
-	for (i = 0; i < 2; i++)
-		state->dependent[2] = state->dependent[2] ||
-				      state->dependent[i];
+	err = tegra_plane_check_transparency(tegra, state);
+	if (err <= 0)
+		return err;
 
 	/*
-	 * However, if any of the windows on top of this window is opaque, it
-	 * will completely conceal this window within that area, so avoid the
-	 * window from contributing to the area.
+	 * All planes are now in the atomic state, walk them up and update
+	 * transparency state for each plane.
 	 */
-	for (i = 0; i < 2; i++) {
-		if (zpos[i] > tegra->index)
-			state->dependent[2] = state->dependent[2] &&
-					      state->dependent[i];
+	drm_for_each_plane(plane, tegra->base.dev) {
+		struct tegra_plane *p = to_tegra_plane(plane);
+
+		/* skip planes on different CRTCs */
+		if (p->dc != tegra->dc)
+			continue;
+
+		new = drm_atomic_get_new_plane_state(state->base.state, plane);
+		tegra_state = to_tegra_plane_state(new);
+
+		/*
+		 * There is no need to update blending state for the disabled
+		 * plane.
+		 */
+		if (new->fb)
+			tegra_plane_update_transparency(p, tegra_state);
 	}
+
+	return 0;
+}
+
+int tegra_plane_setup_legacy_state(struct tegra_plane *tegra,
+				   struct tegra_plane_state *state)
+{
+	int err;
+
+	err = tegra_plane_setup_opacity(tegra, state);
+	if (err < 0)
+		return err;
+
+	err = tegra_plane_setup_transparency(tegra, state);
+	if (err < 0)
+		return err;
+
+	return 0;
 }
diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h
index 6938719..e79e6b4 100644
--- a/drivers/gpu/drm/tegra/plane.h
+++ b/drivers/gpu/drm/tegra/plane.h
@@ -34,6 +34,11 @@
 	return container_of(plane, struct tegra_plane, base);
 }
 
+struct tegra_plane_legacy_blending_state {
+	bool alpha;
+	bool top;
+};
+
 struct tegra_plane_state {
 	struct drm_plane_state base;
 
@@ -41,9 +46,11 @@
 	u32 format;
 	u32 swap;
 
+	bool bottom_up;
+
 	/* used for legacy blending support only */
+	struct tegra_plane_legacy_blending_state blending[2];
 	bool opaque;
-	bool dependent[3];
 };
 
 static inline struct tegra_plane_state *
@@ -62,9 +69,7 @@
 
 int tegra_plane_format(u32 fourcc, u32 *format, u32 *swap);
 bool tegra_plane_format_is_yuv(unsigned int format, bool *planar);
-bool tegra_plane_format_has_alpha(unsigned int format);
-int tegra_plane_format_get_alpha(unsigned int opaque, unsigned int *alpha);
-void tegra_plane_check_dependent(struct tegra_plane *tegra,
-				 struct tegra_plane_state *state);
+int tegra_plane_setup_legacy_state(struct tegra_plane *tegra,
+				   struct tegra_plane_state *state);
 
 #endif /* TEGRA_PLANE_H */
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index f5794dd..9f657a6 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -25,6 +25,7 @@
 
 struct vic_config {
 	const char *firmware;
+	unsigned int version;
 };
 
 struct vic {
@@ -264,18 +265,21 @@
 
 static const struct vic_config vic_t124_config = {
 	.firmware = NVIDIA_TEGRA_124_VIC_FIRMWARE,
+	.version = 0x40,
 };
 
 #define NVIDIA_TEGRA_210_VIC_FIRMWARE "nvidia/tegra210/vic04_ucode.bin"
 
 static const struct vic_config vic_t210_config = {
 	.firmware = NVIDIA_TEGRA_210_VIC_FIRMWARE,
+	.version = 0x21,
 };
 
 #define NVIDIA_TEGRA_186_VIC_FIRMWARE "nvidia/tegra186/vic04_ucode.bin"
 
 static const struct vic_config vic_t186_config = {
 	.firmware = NVIDIA_TEGRA_186_VIC_FIRMWARE,
+	.version = 0x18,
 };
 
 static const struct of_device_id vic_match[] = {
@@ -342,6 +346,7 @@
 	vic->dev = dev;
 
 	INIT_LIST_HEAD(&vic->client.list);
+	vic->client.version = vic->config->version;
 	vic->client.ops = &vic_ops;
 
 	err = host1x_client_register(&vic->client.base);
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index 1b278a2..1067e70 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -224,7 +224,7 @@
 
 	ret = clk_set_rate(priv->clk, req_rate * clkdiv);
 	clk_rate = clk_get_rate(priv->clk);
-	if (ret < 0) {
+	if (ret < 0 || tilcdc_pclk_diff(req_rate, clk_rate) > 5) {
 		/*
 		 * If we fail to set the clock rate (some architectures don't
 		 * use the common clock framework yet and may not implement
diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-core.c b/drivers/gpu/drm/tinydrm/core/tinydrm-core.c
index 4c661627..24a33bf 100644
--- a/drivers/gpu/drm/tinydrm/core/tinydrm-core.c
+++ b/drivers/gpu/drm/tinydrm/core/tinydrm-core.c
@@ -91,7 +91,7 @@
  * GEM object state and frees the memory used to store the object itself using
  * drm_gem_cma_free_object(). It also handles PRIME buffers which has the kernel
  * virtual address set by tinydrm_gem_cma_prime_import_sg_table(). Drivers
- * can use this as their &drm_driver->gem_free_object callback.
+ * can use this as their &drm_driver->gem_free_object_unlocked callback.
  */
 void tinydrm_gem_cma_free_object(struct drm_gem_object *gem_obj)
 {
diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c
index d1c3ce9..dcd3901 100644
--- a/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c
+++ b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c
@@ -78,6 +78,36 @@
 }
 EXPORT_SYMBOL(tinydrm_merge_clips);
 
+int tinydrm_fb_dirty(struct drm_framebuffer *fb,
+		     struct drm_file *file_priv,
+		     unsigned int flags, unsigned int color,
+		     struct drm_clip_rect *clips,
+		     unsigned int num_clips)
+{
+	struct tinydrm_device *tdev = fb->dev->dev_private;
+	struct drm_plane *plane = &tdev->pipe.plane;
+	int ret = 0;
+
+	drm_modeset_lock(&plane->mutex, NULL);
+
+	/* fbdev can flush even when we're not interested */
+	if (plane->state->fb == fb) {
+		mutex_lock(&tdev->dirty_lock);
+		ret = tdev->fb_dirty(fb, file_priv, flags,
+				     color, clips, num_clips);
+		mutex_unlock(&tdev->dirty_lock);
+	}
+
+	drm_modeset_unlock(&plane->mutex);
+
+	if (ret)
+		dev_err_once(fb->dev->dev,
+			     "Failed to update display %d\n", ret);
+
+	return ret;
+}
+EXPORT_SYMBOL(tinydrm_fb_dirty);
+
 /**
  * tinydrm_memcpy - Copy clip buffer
  * @dst: Destination buffer
diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c
index 11ae950..7e8e24d 100644
--- a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c
+++ b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c
@@ -125,9 +125,8 @@
 	struct drm_crtc *crtc = &tdev->pipe.crtc;
 
 	if (fb && (fb != old_state->fb)) {
-		pipe->plane.fb = fb;
-		if (fb->funcs->dirty)
-			fb->funcs->dirty(fb, NULL, 0, 0, NULL, 0);
+		if (tdev->fb_dirty)
+			tdev->fb_dirty(fb, NULL, 0, 0, NULL, 0);
 	}
 
 	if (crtc->state->event) {
@@ -139,23 +138,6 @@
 }
 EXPORT_SYMBOL(tinydrm_display_pipe_update);
 
-/**
- * tinydrm_display_pipe_prepare_fb - Display pipe prepare_fb helper
- * @pipe: Simple display pipe
- * @plane_state: Plane state
- *
- * This function uses drm_gem_fb_prepare_fb() to check if the plane FB has an
- * dma-buf attached, extracts the exclusive fence and attaches it to plane
- * state for the atomic helper to wait on. Drivers can use this as their
- * &drm_simple_display_pipe_funcs->prepare_fb callback.
- */
-int tinydrm_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
-				    struct drm_plane_state *plane_state)
-{
-	return drm_gem_fb_prepare_fb(&pipe->plane, plane_state);
-}
-EXPORT_SYMBOL(tinydrm_display_pipe_prepare_fb);
-
 static int tinydrm_rotate_mode(struct drm_display_mode *mode,
 			       unsigned int rotation)
 {
diff --git a/drivers/gpu/drm/tinydrm/ili9225.c b/drivers/gpu/drm/tinydrm/ili9225.c
index a075950..841c69a 100644
--- a/drivers/gpu/drm/tinydrm/ili9225.c
+++ b/drivers/gpu/drm/tinydrm/ili9225.c
@@ -88,14 +88,8 @@
 	bool full;
 	void *tr;
 
-	mutex_lock(&tdev->dirty_lock);
-
 	if (!mipi->enabled)
-		goto out_unlock;
-
-	/* fbdev can flush even when we're not interested */
-	if (tdev->pipe.plane.fb != fb)
-		goto out_unlock;
+		return 0;
 
 	full = tinydrm_merge_clips(&clip, clips, num_clips, flags,
 				   fb->width, fb->height);
@@ -108,7 +102,7 @@
 		tr = mipi->tx_buf;
 		ret = mipi_dbi_buf_copy(mipi->tx_buf, fb, &clip, swap);
 		if (ret)
-			goto out_unlock;
+			return ret;
 	} else {
 		tr = cma_obj->vaddr;
 	}
@@ -159,24 +153,18 @@
 	ret = mipi_dbi_command_buf(mipi, ILI9225_WRITE_DATA_TO_GRAM, tr,
 				(clip.x2 - clip.x1) * (clip.y2 - clip.y1) * 2);
 
-out_unlock:
-	mutex_unlock(&tdev->dirty_lock);
-
-	if (ret)
-		dev_err_once(fb->dev->dev, "Failed to update display %d\n",
-			     ret);
-
 	return ret;
 }
 
 static const struct drm_framebuffer_funcs ili9225_fb_funcs = {
 	.destroy	= drm_gem_fb_destroy,
 	.create_handle	= drm_gem_fb_create_handle,
-	.dirty		= ili9225_fb_dirty,
+	.dirty		= tinydrm_fb_dirty,
 };
 
 static void ili9225_pipe_enable(struct drm_simple_display_pipe *pipe,
-				struct drm_crtc_state *crtc_state)
+				struct drm_crtc_state *crtc_state,
+				struct drm_plane_state *plane_state)
 {
 	struct tinydrm_device *tdev = pipe_to_tinydrm(pipe);
 	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
@@ -268,7 +256,7 @@
 
 	ili9225_command(mipi, ILI9225_DISPLAY_CONTROL_1, 0x1017);
 
-	mipi_dbi_enable_flush(mipi);
+	mipi_dbi_enable_flush(mipi, crtc_state, plane_state);
 }
 
 static void ili9225_pipe_disable(struct drm_simple_display_pipe *pipe)
@@ -341,6 +329,8 @@
 	if (ret)
 		return ret;
 
+	tdev->fb_dirty = ili9225_fb_dirty;
+
 	ret = tinydrm_display_pipe_init(tdev, pipe_funcs,
 					DRM_MODE_CONNECTOR_VIRTUAL,
 					ili9225_formats,
@@ -364,7 +354,7 @@
 	.enable		= ili9225_pipe_enable,
 	.disable	= ili9225_pipe_disable,
 	.update		= tinydrm_display_pipe_update,
-	.prepare_fb	= tinydrm_display_pipe_prepare_fb,
+	.prepare_fb	= drm_gem_fb_simple_display_pipe_prepare_fb,
 };
 
 static const struct drm_display_mode ili9225_mode = {
diff --git a/drivers/gpu/drm/tinydrm/mi0283qt.c b/drivers/gpu/drm/tinydrm/mi0283qt.c
index d8ed6e6..015d03f 100644
--- a/drivers/gpu/drm/tinydrm/mi0283qt.c
+++ b/drivers/gpu/drm/tinydrm/mi0283qt.c
@@ -19,6 +19,7 @@
 
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_modeset_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/tinydrm/mipi-dbi.h>
 #include <drm/tinydrm/tinydrm-helpers.h>
 #include <video/mipi_display.h>
@@ -49,7 +50,8 @@
 #define ILI9341_MADCTL_MY	BIT(7)
 
 static void mi0283qt_enable(struct drm_simple_display_pipe *pipe,
-			    struct drm_crtc_state *crtc_state)
+			    struct drm_crtc_state *crtc_state,
+			    struct drm_plane_state *plane_state)
 {
 	struct tinydrm_device *tdev = pipe_to_tinydrm(pipe);
 	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
@@ -83,24 +85,6 @@
 	/* Memory Access Control */
 	mipi_dbi_command(mipi, MIPI_DCS_SET_PIXEL_FORMAT, MIPI_DCS_PIXEL_FMT_16BIT);
 
-	switch (mipi->rotation) {
-	default:
-		addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY |
-			    ILI9341_MADCTL_MX;
-		break;
-	case 90:
-		addr_mode = ILI9341_MADCTL_MY;
-		break;
-	case 180:
-		addr_mode = ILI9341_MADCTL_MV;
-		break;
-	case 270:
-		addr_mode = ILI9341_MADCTL_MX;
-		break;
-	}
-	addr_mode |= ILI9341_MADCTL_BGR;
-	mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode);
-
 	/* Frame Rate */
 	mipi_dbi_command(mipi, ILI9341_FRMCTR1, 0x00, 0x1b);
 
@@ -126,14 +110,37 @@
 	msleep(100);
 
 out_enable:
-	mipi_dbi_enable_flush(mipi);
+	/* The PiTFT (ili9340) has a hardware reset circuit that
+	 * resets only on power-on and not on each reboot through
+	 * a gpio like the rpi-display does.
+	 * As a result, we need to always apply the rotation value
+	 * regardless of the display "on/off" state.
+	 */
+	switch (mipi->rotation) {
+	default:
+		addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY |
+			    ILI9341_MADCTL_MX;
+		break;
+	case 90:
+		addr_mode = ILI9341_MADCTL_MY;
+		break;
+	case 180:
+		addr_mode = ILI9341_MADCTL_MV;
+		break;
+	case 270:
+		addr_mode = ILI9341_MADCTL_MX;
+		break;
+	}
+	addr_mode |= ILI9341_MADCTL_BGR;
+	mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode);
+	mipi_dbi_enable_flush(mipi, crtc_state, plane_state);
 }
 
 static const struct drm_simple_display_pipe_funcs mi0283qt_pipe_funcs = {
 	.enable = mi0283qt_enable,
 	.disable = mipi_dbi_pipe_disable,
 	.update = tinydrm_display_pipe_update,
-	.prepare_fb = tinydrm_display_pipe_prepare_fb,
+	.prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb,
 };
 
 static const struct drm_display_mode mi0283qt_mode = {
diff --git a/drivers/gpu/drm/tinydrm/mipi-dbi.c b/drivers/gpu/drm/tinydrm/mipi-dbi.c
index 9e90381..4d1fb31 100644
--- a/drivers/gpu/drm/tinydrm/mipi-dbi.c
+++ b/drivers/gpu/drm/tinydrm/mipi-dbi.c
@@ -219,14 +219,8 @@
 	bool full;
 	void *tr;
 
-	mutex_lock(&tdev->dirty_lock);
-
 	if (!mipi->enabled)
-		goto out_unlock;
-
-	/* fbdev can flush even when we're not interested */
-	if (tdev->pipe.plane.fb != fb)
-		goto out_unlock;
+		return 0;
 
 	full = tinydrm_merge_clips(&clip, clips, num_clips, flags,
 				   fb->width, fb->height);
@@ -239,7 +233,7 @@
 		tr = mipi->tx_buf;
 		ret = mipi_dbi_buf_copy(mipi->tx_buf, fb, &clip, swap);
 		if (ret)
-			goto out_unlock;
+			return ret;
 	} else {
 		tr = cma_obj->vaddr;
 	}
@@ -254,20 +248,13 @@
 	ret = mipi_dbi_command_buf(mipi, MIPI_DCS_WRITE_MEMORY_START, tr,
 				(clip.x2 - clip.x1) * (clip.y2 - clip.y1) * 2);
 
-out_unlock:
-	mutex_unlock(&tdev->dirty_lock);
-
-	if (ret)
-		dev_err_once(fb->dev->dev, "Failed to update display %d\n",
-			     ret);
-
 	return ret;
 }
 
 static const struct drm_framebuffer_funcs mipi_dbi_fb_funcs = {
 	.destroy	= drm_gem_fb_destroy,
 	.create_handle	= drm_gem_fb_create_handle,
-	.dirty		= mipi_dbi_fb_dirty,
+	.dirty		= tinydrm_fb_dirty,
 };
 
 /**
@@ -278,13 +265,16 @@
  * enables the backlight. Drivers can use this in their
  * &drm_simple_display_pipe_funcs->enable callback.
  */
-void mipi_dbi_enable_flush(struct mipi_dbi *mipi)
+void mipi_dbi_enable_flush(struct mipi_dbi *mipi,
+			   struct drm_crtc_state *crtc_state,
+			   struct drm_plane_state *plane_state)
 {
-	struct drm_framebuffer *fb = mipi->tinydrm.pipe.plane.fb;
+	struct tinydrm_device *tdev = &mipi->tinydrm;
+	struct drm_framebuffer *fb = plane_state->fb;
 
 	mipi->enabled = true;
 	if (fb)
-		fb->funcs->dirty(fb, NULL, 0, 0, NULL, 0);
+		tdev->fb_dirty(fb, NULL, 0, 0, NULL, 0);
 
 	backlight_enable(mipi->backlight);
 }
@@ -381,6 +371,8 @@
 	if (ret)
 		return ret;
 
+	tdev->fb_dirty = mipi_dbi_fb_dirty;
+
 	/* TODO: Maybe add DRM_MODE_CONNECTOR_SPI */
 	ret = tinydrm_display_pipe_init(tdev, pipe_funcs,
 					DRM_MODE_CONNECTOR_VIRTUAL,
diff --git a/drivers/gpu/drm/tinydrm/repaper.c b/drivers/gpu/drm/tinydrm/repaper.c
index 7574063..1ee6855 100644
--- a/drivers/gpu/drm/tinydrm/repaper.c
+++ b/drivers/gpu/drm/tinydrm/repaper.c
@@ -540,14 +540,8 @@
 	clip.y1 = 0;
 	clip.y2 = fb->height;
 
-	mutex_lock(&tdev->dirty_lock);
-
 	if (!epd->enabled)
-		goto out_unlock;
-
-	/* fbdev can flush even when we're not interested */
-	if (tdev->pipe.plane.fb != fb)
-		goto out_unlock;
+		return 0;
 
 	repaper_get_temperature(epd);
 
@@ -555,16 +549,14 @@
 		  epd->factored_stage_time);
 
 	buf = kmalloc(fb->width * fb->height, GFP_KERNEL);
-	if (!buf) {
-		ret = -ENOMEM;
-		goto out_unlock;
-	}
+	if (!buf)
+		return -ENOMEM;
 
 	if (import_attach) {
 		ret = dma_buf_begin_cpu_access(import_attach->dmabuf,
 					       DMA_FROM_DEVICE);
 		if (ret)
-			goto out_unlock;
+			goto out_free;
 	}
 
 	tinydrm_xrgb8888_to_gray8(buf, cma_obj->vaddr, fb, &clip);
@@ -573,7 +565,7 @@
 		ret = dma_buf_end_cpu_access(import_attach->dmabuf,
 					     DMA_FROM_DEVICE);
 		if (ret)
-			goto out_unlock;
+			goto out_free;
 	}
 
 	repaper_gray8_to_mono_reversed(buf, fb->width, fb->height);
@@ -625,11 +617,7 @@
 			}
 	}
 
-out_unlock:
-	mutex_unlock(&tdev->dirty_lock);
-
-	if (ret)
-		DRM_DEV_ERROR(fb->dev->dev, "Failed to update display (%d)\n", ret);
+out_free:
 	kfree(buf);
 
 	return ret;
@@ -638,7 +626,7 @@
 static const struct drm_framebuffer_funcs repaper_fb_funcs = {
 	.destroy	= drm_gem_fb_destroy,
 	.create_handle	= drm_gem_fb_create_handle,
-	.dirty		= repaper_fb_dirty,
+	.dirty		= tinydrm_fb_dirty,
 };
 
 static void power_off(struct repaper_epd *epd)
@@ -659,7 +647,8 @@
 }
 
 static void repaper_pipe_enable(struct drm_simple_display_pipe *pipe,
-				struct drm_crtc_state *crtc_state)
+				struct drm_crtc_state *crtc_state,
+				struct drm_plane_state *plane_state)
 {
 	struct tinydrm_device *tdev = pipe_to_tinydrm(pipe);
 	struct repaper_epd *epd = epd_from_tinydrm(tdev);
@@ -852,7 +841,7 @@
 	.enable = repaper_pipe_enable,
 	.disable = repaper_pipe_disable,
 	.update = tinydrm_display_pipe_update,
-	.prepare_fb = tinydrm_display_pipe_prepare_fb,
+	.prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb,
 };
 
 static const uint32_t repaper_formats[] = {
@@ -1069,6 +1058,8 @@
 	if (ret)
 		return ret;
 
+	tdev->fb_dirty = repaper_fb_dirty;
+
 	ret = tinydrm_display_pipe_init(tdev, &repaper_pipe_funcs,
 					DRM_MODE_CONNECTOR_VIRTUAL,
 					repaper_formats,
diff --git a/drivers/gpu/drm/tinydrm/st7586.c b/drivers/gpu/drm/tinydrm/st7586.c
index a6396ef..5c29e38 100644
--- a/drivers/gpu/drm/tinydrm/st7586.c
+++ b/drivers/gpu/drm/tinydrm/st7586.c
@@ -120,14 +120,8 @@
 	int start, end;
 	int ret = 0;
 
-	mutex_lock(&tdev->dirty_lock);
-
 	if (!mipi->enabled)
-		goto out_unlock;
-
-	/* fbdev can flush even when we're not interested */
-	if (tdev->pipe.plane.fb != fb)
-		goto out_unlock;
+		return 0;
 
 	tinydrm_merge_clips(&clip, clips, num_clips, flags, fb->width,
 			    fb->height);
@@ -141,7 +135,7 @@
 
 	ret = st7586_buf_copy(mipi->tx_buf, fb, &clip);
 	if (ret)
-		goto out_unlock;
+		return ret;
 
 	/* Pixels are packed 3 per byte */
 	start = clip.x1 / 3;
@@ -158,24 +152,18 @@
 				   (u8 *)mipi->tx_buf,
 				   (end - start) * (clip.y2 - clip.y1));
 
-out_unlock:
-	mutex_unlock(&tdev->dirty_lock);
-
-	if (ret)
-		dev_err_once(fb->dev->dev, "Failed to update display %d\n",
-			     ret);
-
 	return ret;
 }
 
 static const struct drm_framebuffer_funcs st7586_fb_funcs = {
 	.destroy	= drm_gem_fb_destroy,
 	.create_handle	= drm_gem_fb_create_handle,
-	.dirty		= st7586_fb_dirty,
+	.dirty		= tinydrm_fb_dirty,
 };
 
 static void st7586_pipe_enable(struct drm_simple_display_pipe *pipe,
-			       struct drm_crtc_state *crtc_state)
+			       struct drm_crtc_state *crtc_state,
+			       struct drm_plane_state *plane_state)
 {
 	struct tinydrm_device *tdev = pipe_to_tinydrm(pipe);
 	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
@@ -237,7 +225,7 @@
 
 	mipi_dbi_command(mipi, MIPI_DCS_SET_DISPLAY_ON);
 
-	mipi_dbi_enable_flush(mipi);
+	mipi_dbi_enable_flush(mipi, crtc_state, plane_state);
 }
 
 static void st7586_pipe_disable(struct drm_simple_display_pipe *pipe)
@@ -277,6 +265,8 @@
 	if (ret)
 		return ret;
 
+	tdev->fb_dirty = st7586_fb_dirty;
+
 	ret = tinydrm_display_pipe_init(tdev, pipe_funcs,
 					DRM_MODE_CONNECTOR_VIRTUAL,
 					st7586_formats,
@@ -300,7 +290,7 @@
 	.enable		= st7586_pipe_enable,
 	.disable	= st7586_pipe_disable,
 	.update		= tinydrm_display_pipe_update,
-	.prepare_fb	= tinydrm_display_pipe_prepare_fb,
+	.prepare_fb	= drm_gem_fb_simple_display_pipe_prepare_fb,
 };
 
 static const struct drm_display_mode st7586_mode = {
diff --git a/drivers/gpu/drm/tinydrm/st7735r.c b/drivers/gpu/drm/tinydrm/st7735r.c
index 67d197e..6c7b15c 100644
--- a/drivers/gpu/drm/tinydrm/st7735r.c
+++ b/drivers/gpu/drm/tinydrm/st7735r.c
@@ -37,7 +37,8 @@
 #define ST7735R_MV	BIT(5)
 
 static void jd_t18003_t01_pipe_enable(struct drm_simple_display_pipe *pipe,
-				      struct drm_crtc_state *crtc_state)
+				      struct drm_crtc_state *crtc_state,
+				      struct drm_plane_state *plane_state)
 {
 	struct tinydrm_device *tdev = pipe_to_tinydrm(pipe);
 	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
@@ -98,14 +99,14 @@
 
 	msleep(20);
 
-	mipi_dbi_enable_flush(mipi);
+	mipi_dbi_enable_flush(mipi, crtc_state, plane_state);
 }
 
 static const struct drm_simple_display_pipe_funcs jd_t18003_t01_pipe_funcs = {
 	.enable		= jd_t18003_t01_pipe_enable,
 	.disable	= mipi_dbi_pipe_disable,
 	.update		= tinydrm_display_pipe_update,
-	.prepare_fb	= tinydrm_display_pipe_prepare_fb,
+	.prepare_fb	= drm_gem_fb_simple_display_pipe_prepare_fb,
 };
 
 static const struct drm_display_mode jd_t18003_t01_mode = {
diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
index 7c2485f..ea4d59e 100644
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /**************************************************************************
  *
  * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 98e06f8..5d8688e52 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /**************************************************************************
  *
  * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
@@ -1175,7 +1176,6 @@
 	reservation_object_init(&bo->ttm_resv);
 	atomic_inc(&bo->bdev->glob->bo_count);
 	drm_vma_node_reset(&bo->vma_node);
-	bo->priority = 0;
 
 	/*
 	 * For ttm_bo_type_device buffers, allocate
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index a7c232d..18d3deb 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /**************************************************************************
  *
  * Copyright (c) 2007-2010 VMware, Inc., Palo Alto, CA., USA
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 2ebbae6..f2c1677 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /**************************************************************************
  *
  * Copyright (c) 2007-2009 VMware, Inc., Palo Alto, CA., USA
@@ -39,6 +40,11 @@
 #include <linux/module.h>
 #include <linux/reservation.h>
 
+struct ttm_transfer_obj {
+	struct ttm_buffer_object base;
+	struct ttm_buffer_object *bo;
+};
+
 void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
 {
 	ttm_bo_mem_put(bo, &bo->mem);
@@ -454,7 +460,11 @@
 
 static void ttm_transfered_destroy(struct ttm_buffer_object *bo)
 {
-	kfree(bo);
+	struct ttm_transfer_obj *fbo;
+
+	fbo = container_of(bo, struct ttm_transfer_obj, base);
+	ttm_bo_unref(&fbo->bo);
+	kfree(fbo);
 }
 
 /**
@@ -475,14 +485,15 @@
 static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 				      struct ttm_buffer_object **new_obj)
 {
-	struct ttm_buffer_object *fbo;
+	struct ttm_transfer_obj *fbo;
 	int ret;
 
 	fbo = kmalloc(sizeof(*fbo), GFP_KERNEL);
 	if (!fbo)
 		return -ENOMEM;
 
-	*fbo = *bo;
+	fbo->base = *bo;
+	fbo->bo = ttm_bo_reference(bo);
 
 	/**
 	 * Fix up members that we shouldn't copy directly:
@@ -490,25 +501,25 @@
 	 */
 
 	atomic_inc(&bo->bdev->glob->bo_count);
-	INIT_LIST_HEAD(&fbo->ddestroy);
-	INIT_LIST_HEAD(&fbo->lru);
-	INIT_LIST_HEAD(&fbo->swap);
-	INIT_LIST_HEAD(&fbo->io_reserve_lru);
-	mutex_init(&fbo->wu_mutex);
-	fbo->moving = NULL;
-	drm_vma_node_reset(&fbo->vma_node);
-	atomic_set(&fbo->cpu_writers, 0);
+	INIT_LIST_HEAD(&fbo->base.ddestroy);
+	INIT_LIST_HEAD(&fbo->base.lru);
+	INIT_LIST_HEAD(&fbo->base.swap);
+	INIT_LIST_HEAD(&fbo->base.io_reserve_lru);
+	mutex_init(&fbo->base.wu_mutex);
+	fbo->base.moving = NULL;
+	drm_vma_node_reset(&fbo->base.vma_node);
+	atomic_set(&fbo->base.cpu_writers, 0);
 
-	kref_init(&fbo->list_kref);
-	kref_init(&fbo->kref);
-	fbo->destroy = &ttm_transfered_destroy;
-	fbo->acc_size = 0;
-	fbo->resv = &fbo->ttm_resv;
-	reservation_object_init(fbo->resv);
-	ret = reservation_object_trylock(fbo->resv);
+	kref_init(&fbo->base.list_kref);
+	kref_init(&fbo->base.kref);
+	fbo->base.destroy = &ttm_transfered_destroy;
+	fbo->base.acc_size = 0;
+	fbo->base.resv = &fbo->base.ttm_resv;
+	reservation_object_init(fbo->base.resv);
+	ret = reservation_object_trylock(fbo->base.resv);
 	WARN_ON(!ret);
 
-	*new_obj = fbo;
+	*new_obj = &fbo->base;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 8eba95b..c7ece76 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /**************************************************************************
  *
  * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 3dca206e..e73ae0d 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /**************************************************************************
  *
  * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
diff --git a/drivers/gpu/drm/ttm/ttm_lock.c b/drivers/gpu/drm/ttm/ttm_lock.c
index 913f431..20694b8 100644
--- a/drivers/gpu/drm/ttm/ttm_lock.c
+++ b/drivers/gpu/drm/ttm/ttm_lock.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /**************************************************************************
  *
  * Copyright (c) 2007-2009 VMware, Inc., Palo Alto, CA., USA
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index 27856c5..450387c 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /**************************************************************************
  *
  * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c
index 66fc639..6ff40c0 100644
--- a/drivers/gpu/drm/ttm/ttm_module.c
+++ b/drivers/gpu/drm/ttm/ttm_module.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /**************************************************************************
  *
  * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index 1aa2baa..74f1b1e 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /**************************************************************************
  *
  * Copyright (c) 2009-2013 VMware, Inc., Palo Alto, CA., USA
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 7e672be..a1e5439 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
 /**************************************************************************
  *
  * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
diff --git a/drivers/gpu/drm/tve200/tve200_display.c b/drivers/gpu/drm/tve200/tve200_display.c
index db397fc..e8723a2 100644
--- a/drivers/gpu/drm/tve200/tve200_display.c
+++ b/drivers/gpu/drm/tve200/tve200_display.c
@@ -120,7 +120,8 @@
 }
 
 static void tve200_display_enable(struct drm_simple_display_pipe *pipe,
-				 struct drm_crtc_state *cstate)
+				 struct drm_crtc_state *cstate,
+				 struct drm_plane_state *plane_state)
 {
 	struct drm_crtc *crtc = &pipe->crtc;
 	struct drm_plane *plane = &pipe->plane;
@@ -292,18 +293,12 @@
 	writel(0, priv->regs + TVE200_INT_EN);
 }
 
-static int tve200_display_prepare_fb(struct drm_simple_display_pipe *pipe,
-				    struct drm_plane_state *plane_state)
-{
-	return drm_gem_fb_prepare_fb(&pipe->plane, plane_state);
-}
-
 static const struct drm_simple_display_pipe_funcs tve200_display_funcs = {
 	.check = tve200_display_check,
 	.enable = tve200_display_enable,
 	.disable = tve200_display_disable,
 	.update = tve200_display_update,
-	.prepare_fb = tve200_display_prepare_fb,
+	.prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb,
 	.enable_vblank = tve200_display_enable_vblank,
 	.disable_vblank = tve200_display_disable_vblank,
 };
diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c
index c3dc1fd..09dc585 100644
--- a/drivers/gpu/drm/udl/udl_connector.c
+++ b/drivers/gpu/drm/udl/udl_connector.c
@@ -105,7 +105,7 @@
 	return 0;
 }
 
-static int udl_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status udl_mode_valid(struct drm_connector *connector,
 			  struct drm_display_mode *mode)
 {
 	struct udl_device *udl = connector->dev->dev_private;
diff --git a/drivers/gpu/drm/udl/udl_dmabuf.c b/drivers/gpu/drm/udl/udl_dmabuf.c
index 2867ed1..0a20695 100644
--- a/drivers/gpu/drm/udl/udl_dmabuf.c
+++ b/drivers/gpu/drm/udl/udl_dmabuf.c
@@ -76,6 +76,7 @@
 	struct udl_drm_dmabuf_attachment *udl_attach = attach->priv;
 	struct udl_gem_object *obj = to_udl_bo(attach->dmabuf->priv);
 	struct drm_device *dev = obj->base.dev;
+	struct udl_device *udl = dev->dev_private;
 	struct scatterlist *rd, *wr;
 	struct sg_table *sgt = NULL;
 	unsigned int i;
@@ -112,7 +113,7 @@
 		return ERR_PTR(-ENOMEM);
 	}
 
-	mutex_lock(&dev->struct_mutex);
+	mutex_lock(&udl->gem_lock);
 
 	rd = obj->sg->sgl;
 	wr = sgt->sgl;
@@ -137,7 +138,7 @@
 	attach->priv = udl_attach;
 
 err_unlock:
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&udl->gem_lock);
 	return sgt;
 }
 
diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c
index 3c45a30..9ef515d 100644
--- a/drivers/gpu/drm/udl/udl_drv.c
+++ b/drivers/gpu/drm/udl/udl_drv.c
@@ -53,7 +53,7 @@
 	.unload = udl_driver_unload,
 
 	/* gem hooks */
-	.gem_free_object = udl_gem_free_object,
+	.gem_free_object_unlocked = udl_gem_free_object,
 	.gem_vm_ops = &udl_gem_vm_ops,
 
 	.dumb_create = udl_dumb_create,
diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h
index 2a75ab8..55c0cc3 100644
--- a/drivers/gpu/drm/udl/udl_drv.h
+++ b/drivers/gpu/drm/udl/udl_drv.h
@@ -54,6 +54,8 @@
 	struct usb_device *udev;
 	struct drm_crtc *crtc;
 
+	struct mutex gem_lock;
+
 	int sku_pixel_limit;
 
 	struct urb_list urbs;
diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c
index dee6bd9..9a15cce 100644
--- a/drivers/gpu/drm/udl/udl_gem.c
+++ b/drivers/gpu/drm/udl/udl_gem.c
@@ -214,9 +214,10 @@
 {
 	struct udl_gem_object *gobj;
 	struct drm_gem_object *obj;
+	struct udl_device *udl = dev->dev_private;
 	int ret = 0;
 
-	mutex_lock(&dev->struct_mutex);
+	mutex_lock(&udl->gem_lock);
 	obj = drm_gem_object_lookup(file, handle);
 	if (obj == NULL) {
 		ret = -ENOENT;
@@ -236,6 +237,6 @@
 out:
 	drm_gem_object_put(&gobj->base);
 unlock:
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&udl->gem_lock);
 	return ret;
 }
diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c
index f1ec452..d518de8 100644
--- a/drivers/gpu/drm/udl/udl_main.c
+++ b/drivers/gpu/drm/udl/udl_main.c
@@ -324,6 +324,8 @@
 	udl->ddev = dev;
 	dev->dev_private = udl;
 
+	mutex_init(&udl->gem_lock);
+
 	if (!udl_parse_vendor_descriptor(dev, udl->udev)) {
 		ret = -ENODEV;
 		DRM_ERROR("firmware not recognized. Assume incompatible device\n");
diff --git a/drivers/gpu/drm/v3d/Kconfig b/drivers/gpu/drm/v3d/Kconfig
new file mode 100644
index 0000000..a0c0259
--- /dev/null
+++ b/drivers/gpu/drm/v3d/Kconfig
@@ -0,0 +1,9 @@
+config DRM_V3D
+	tristate "Broadcom V3D 3.x and newer"
+	depends on ARCH_BCM || ARCH_BCMSTB || COMPILE_TEST
+	depends on DRM
+	depends on COMMON_CLK
+	select DRM_SCHED
+	help
+	  Choose this option if you have a system that has a Broadcom
+	  V3D 3.x or newer GPU, such as BCM7268.
diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile
new file mode 100644
index 0000000..34446e1
--- /dev/null
+++ b/drivers/gpu/drm/v3d/Makefile
@@ -0,0 +1,18 @@
+# Please keep these build lists sorted!
+
+# core driver code
+v3d-y := \
+	v3d_bo.o \
+	v3d_drv.o \
+	v3d_fence.o \
+	v3d_gem.o \
+	v3d_irq.o \
+	v3d_mmu.o \
+	v3d_trace_points.o \
+	v3d_sched.o
+
+v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o
+
+obj-$(CONFIG_DRM_V3D)  += v3d.o
+
+CFLAGS_v3d_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
new file mode 100644
index 0000000..7b1e2a5
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_bo.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2015-2018 Broadcom */
+
+/**
+ * DOC: V3D GEM BO management support
+ *
+ * Compared to VC4 (V3D 2.x), V3D 3.3 introduces an MMU between the
+ * GPU and the bus, allowing us to use shmem objects for our storage
+ * instead of CMA.
+ *
+ * Physically contiguous objects may still be imported to V3D, but the
+ * driver doesn't allocate physically contiguous objects on its own.
+ * Display engines requiring physically contiguous allocations should
+ * look into Mesa's "renderonly" support (as used by the Mesa pl111
+ * driver) for an example of how to integrate with V3D.
+ *
+ * Long term, we should support evicting pages from the MMU when under
+ * memory pressure (thus the v3d_bo_get_pages() refcounting), but
+ * that's not a high priority since our systems tend to not have swap.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/pfn_t.h>
+
+#include "v3d_drv.h"
+#include "uapi/drm/v3d_drm.h"
+
+/* Pins the shmem pages, fills in the .pages and .sgt fields of the BO, and maps
+ * it for DMA.
+ */
+static int
+v3d_bo_get_pages(struct v3d_bo *bo)
+{
+	struct drm_gem_object *obj = &bo->base;
+	struct drm_device *dev = obj->dev;
+	int npages = obj->size >> PAGE_SHIFT;
+	int ret = 0;
+
+	mutex_lock(&bo->lock);
+	if (bo->pages_refcount++ != 0)
+		goto unlock;
+
+	if (!obj->import_attach) {
+		bo->pages = drm_gem_get_pages(obj);
+		if (IS_ERR(bo->pages)) {
+			ret = PTR_ERR(bo->pages);
+			goto unlock;
+		}
+
+		bo->sgt = drm_prime_pages_to_sg(bo->pages, npages);
+		if (IS_ERR(bo->sgt)) {
+			ret = PTR_ERR(bo->sgt);
+			goto put_pages;
+		}
+
+		/* Map the pages for use by the GPU. */
+		dma_map_sg(dev->dev, bo->sgt->sgl,
+			   bo->sgt->nents, DMA_BIDIRECTIONAL);
+	} else {
+		bo->pages = kcalloc(npages, sizeof(*bo->pages), GFP_KERNEL);
+		if (!bo->pages)
+			goto put_pages;
+
+		drm_prime_sg_to_page_addr_arrays(bo->sgt, bo->pages,
+						 NULL, npages);
+
+		/* Note that dma-bufs come in mapped. */
+	}
+
+	mutex_unlock(&bo->lock);
+
+	return 0;
+
+put_pages:
+	drm_gem_put_pages(obj, bo->pages, true, true);
+	bo->pages = NULL;
+unlock:
+	bo->pages_refcount--;
+	mutex_unlock(&bo->lock);
+	return ret;
+}
+
+static void
+v3d_bo_put_pages(struct v3d_bo *bo)
+{
+	struct drm_gem_object *obj = &bo->base;
+
+	mutex_lock(&bo->lock);
+	if (--bo->pages_refcount == 0) {
+		if (!obj->import_attach) {
+			dma_unmap_sg(obj->dev->dev, bo->sgt->sgl,
+				     bo->sgt->nents, DMA_BIDIRECTIONAL);
+			sg_free_table(bo->sgt);
+			kfree(bo->sgt);
+			drm_gem_put_pages(obj, bo->pages, true, true);
+		} else {
+			kfree(bo->pages);
+		}
+	}
+	mutex_unlock(&bo->lock);
+}
+
+static struct v3d_bo *v3d_bo_create_struct(struct drm_device *dev,
+					   size_t unaligned_size)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct drm_gem_object *obj;
+	struct v3d_bo *bo;
+	size_t size = roundup(unaligned_size, PAGE_SIZE);
+	int ret;
+
+	if (size == 0)
+		return ERR_PTR(-EINVAL);
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo)
+		return ERR_PTR(-ENOMEM);
+	obj = &bo->base;
+
+	INIT_LIST_HEAD(&bo->vmas);
+	INIT_LIST_HEAD(&bo->unref_head);
+	mutex_init(&bo->lock);
+
+	ret = drm_gem_object_init(dev, obj, size);
+	if (ret)
+		goto free_bo;
+
+	spin_lock(&v3d->mm_lock);
+	ret = drm_mm_insert_node_generic(&v3d->mm, &bo->node,
+					 obj->size >> PAGE_SHIFT,
+					 GMP_GRANULARITY >> PAGE_SHIFT, 0, 0);
+	spin_unlock(&v3d->mm_lock);
+	if (ret)
+		goto free_obj;
+
+	return bo;
+
+free_obj:
+	drm_gem_object_release(obj);
+free_bo:
+	kfree(bo);
+	return ERR_PTR(ret);
+}
+
+struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv,
+			     size_t unaligned_size)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct drm_gem_object *obj;
+	struct v3d_bo *bo;
+	int ret;
+
+	bo = v3d_bo_create_struct(dev, unaligned_size);
+	if (IS_ERR(bo))
+		return bo;
+	obj = &bo->base;
+
+	bo->resv = &bo->_resv;
+	reservation_object_init(bo->resv);
+
+	ret = v3d_bo_get_pages(bo);
+	if (ret)
+		goto free_mm;
+
+	v3d_mmu_insert_ptes(bo);
+
+	mutex_lock(&v3d->bo_lock);
+	v3d->bo_stats.num_allocated++;
+	v3d->bo_stats.pages_allocated += obj->size >> PAGE_SHIFT;
+	mutex_unlock(&v3d->bo_lock);
+
+	return bo;
+
+free_mm:
+	spin_lock(&v3d->mm_lock);
+	drm_mm_remove_node(&bo->node);
+	spin_unlock(&v3d->mm_lock);
+
+	drm_gem_object_release(obj);
+	kfree(bo);
+	return ERR_PTR(ret);
+}
+
+/* Called DRM core on the last userspace/kernel unreference of the
+ * BO.
+ */
+void v3d_free_object(struct drm_gem_object *obj)
+{
+	struct v3d_dev *v3d = to_v3d_dev(obj->dev);
+	struct v3d_bo *bo = to_v3d_bo(obj);
+
+	mutex_lock(&v3d->bo_lock);
+	v3d->bo_stats.num_allocated--;
+	v3d->bo_stats.pages_allocated -= obj->size >> PAGE_SHIFT;
+	mutex_unlock(&v3d->bo_lock);
+
+	reservation_object_fini(&bo->_resv);
+
+	v3d_bo_put_pages(bo);
+
+	if (obj->import_attach)
+		drm_prime_gem_destroy(obj, bo->sgt);
+
+	v3d_mmu_remove_ptes(bo);
+	spin_lock(&v3d->mm_lock);
+	drm_mm_remove_node(&bo->node);
+	spin_unlock(&v3d->mm_lock);
+
+	mutex_destroy(&bo->lock);
+
+	drm_gem_object_release(obj);
+	kfree(bo);
+}
+
+struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj)
+{
+	struct v3d_bo *bo = to_v3d_bo(obj);
+
+	return bo->resv;
+}
+
+static void
+v3d_set_mmap_vma_flags(struct vm_area_struct *vma)
+{
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+}
+
+int v3d_gem_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct v3d_bo *bo = to_v3d_bo(obj);
+	unsigned long pfn;
+	pgoff_t pgoff;
+	int ret;
+
+	/* We don't use vmf->pgoff since that has the fake offset: */
+	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+	pfn = page_to_pfn(bo->pages[pgoff]);
+
+	ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
+
+	switch (ret) {
+	case -EAGAIN:
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		/*
+		 * EBUSY is ok: this just means that another thread
+		 * already did the job.
+		 */
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+int v3d_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	int ret;
+
+	ret = drm_gem_mmap(filp, vma);
+	if (ret)
+		return ret;
+
+	v3d_set_mmap_vma_flags(vma);
+
+	return ret;
+}
+
+int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	int ret;
+
+	ret = drm_gem_mmap_obj(obj, obj->size, vma);
+	if (ret < 0)
+		return ret;
+
+	v3d_set_mmap_vma_flags(vma);
+
+	return 0;
+}
+
+struct sg_table *
+v3d_prime_get_sg_table(struct drm_gem_object *obj)
+{
+	struct v3d_bo *bo = to_v3d_bo(obj);
+	int npages = obj->size >> PAGE_SHIFT;
+
+	return drm_prime_pages_to_sg(bo->pages, npages);
+}
+
+struct drm_gem_object *
+v3d_prime_import_sg_table(struct drm_device *dev,
+			  struct dma_buf_attachment *attach,
+			  struct sg_table *sgt)
+{
+	struct drm_gem_object *obj;
+	struct v3d_bo *bo;
+
+	bo = v3d_bo_create_struct(dev, attach->dmabuf->size);
+	if (IS_ERR(bo))
+		return ERR_CAST(bo);
+	obj = &bo->base;
+
+	bo->resv = attach->dmabuf->resv;
+
+	bo->sgt = sgt;
+	v3d_bo_get_pages(bo);
+
+	v3d_mmu_insert_ptes(bo);
+
+	return obj;
+}
+
+int v3d_create_bo_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
+{
+	struct drm_v3d_create_bo *args = data;
+	struct v3d_bo *bo = NULL;
+	int ret;
+
+	if (args->flags != 0) {
+		DRM_INFO("unknown create_bo flags: %d\n", args->flags);
+		return -EINVAL;
+	}
+
+	bo = v3d_bo_create(dev, file_priv, PAGE_ALIGN(args->size));
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	args->offset = bo->node.start << PAGE_SHIFT;
+
+	ret = drm_gem_handle_create(file_priv, &bo->base, &args->handle);
+	drm_gem_object_put_unlocked(&bo->base);
+
+	return ret;
+}
+
+int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct drm_v3d_mmap_bo *args = data;
+	struct drm_gem_object *gem_obj;
+	int ret;
+
+	if (args->flags != 0) {
+		DRM_INFO("unknown mmap_bo flags: %d\n", args->flags);
+		return -EINVAL;
+	}
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+		return -ENOENT;
+	}
+
+	ret = drm_gem_create_mmap_offset(gem_obj);
+	if (ret == 0)
+		args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+	drm_gem_object_put_unlocked(gem_obj);
+
+	return ret;
+}
+
+int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file_priv)
+{
+	struct drm_v3d_get_bo_offset *args = data;
+	struct drm_gem_object *gem_obj;
+	struct v3d_bo *bo;
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+		return -ENOENT;
+	}
+	bo = to_v3d_bo(gem_obj);
+
+	args->offset = bo->node.start << PAGE_SHIFT;
+
+	drm_gem_object_put_unlocked(gem_obj);
+	return 0;
+}
diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
new file mode 100644
index 0000000..4db62c5
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2014-2018 Broadcom */
+
+#include <linux/circ_buf.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/pm_runtime.h>
+#include <linux/seq_file.h>
+#include <drm/drmP.h>
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define REGDEF(reg) { reg, #reg }
+struct v3d_reg_def {
+	u32 reg;
+	const char *name;
+};
+
+static const struct v3d_reg_def v3d_hub_reg_defs[] = {
+	REGDEF(V3D_HUB_AXICFG),
+	REGDEF(V3D_HUB_UIFCFG),
+	REGDEF(V3D_HUB_IDENT0),
+	REGDEF(V3D_HUB_IDENT1),
+	REGDEF(V3D_HUB_IDENT2),
+	REGDEF(V3D_HUB_IDENT3),
+	REGDEF(V3D_HUB_INT_STS),
+	REGDEF(V3D_HUB_INT_MSK_STS),
+};
+
+static const struct v3d_reg_def v3d_gca_reg_defs[] = {
+	REGDEF(V3D_GCA_SAFE_SHUTDOWN),
+	REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK),
+};
+
+static const struct v3d_reg_def v3d_core_reg_defs[] = {
+	REGDEF(V3D_CTL_IDENT0),
+	REGDEF(V3D_CTL_IDENT1),
+	REGDEF(V3D_CTL_IDENT2),
+	REGDEF(V3D_CTL_MISCCFG),
+	REGDEF(V3D_CTL_INT_STS),
+	REGDEF(V3D_CTL_INT_MSK_STS),
+	REGDEF(V3D_CLE_CT0CS),
+	REGDEF(V3D_CLE_CT0CA),
+	REGDEF(V3D_CLE_CT0EA),
+	REGDEF(V3D_CLE_CT1CS),
+	REGDEF(V3D_CLE_CT1CA),
+	REGDEF(V3D_CLE_CT1EA),
+
+	REGDEF(V3D_PTB_BPCA),
+	REGDEF(V3D_PTB_BPCS),
+
+	REGDEF(V3D_MMU_CTL),
+	REGDEF(V3D_MMU_VIO_ADDR),
+
+	REGDEF(V3D_GMP_STATUS),
+	REGDEF(V3D_GMP_CFG),
+	REGDEF(V3D_GMP_VIO_ADDR),
+};
+
+static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	int i, core;
+
+	for (i = 0; i < ARRAY_SIZE(v3d_hub_reg_defs); i++) {
+		seq_printf(m, "%s (0x%04x): 0x%08x\n",
+			   v3d_hub_reg_defs[i].name, v3d_hub_reg_defs[i].reg,
+			   V3D_READ(v3d_hub_reg_defs[i].reg));
+	}
+
+	for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
+		seq_printf(m, "%s (0x%04x): 0x%08x\n",
+			   v3d_gca_reg_defs[i].name, v3d_gca_reg_defs[i].reg,
+			   V3D_GCA_READ(v3d_gca_reg_defs[i].reg));
+	}
+
+	for (core = 0; core < v3d->cores; core++) {
+		for (i = 0; i < ARRAY_SIZE(v3d_core_reg_defs); i++) {
+			seq_printf(m, "core %d %s (0x%04x): 0x%08x\n",
+				   core,
+				   v3d_core_reg_defs[i].name,
+				   v3d_core_reg_defs[i].reg,
+				   V3D_CORE_READ(core,
+						 v3d_core_reg_defs[i].reg));
+		}
+	}
+
+	return 0;
+}
+
+static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	u32 ident0, ident1, ident2, ident3, cores;
+	int ret, core;
+
+	ret = pm_runtime_get_sync(v3d->dev);
+	if (ret < 0)
+		return ret;
+
+	ident0 = V3D_READ(V3D_HUB_IDENT0);
+	ident1 = V3D_READ(V3D_HUB_IDENT1);
+	ident2 = V3D_READ(V3D_HUB_IDENT2);
+	ident3 = V3D_READ(V3D_HUB_IDENT3);
+	cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES);
+
+	seq_printf(m, "Revision:   %d.%d.%d.%d\n",
+		   V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER),
+		   V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV),
+		   V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPREV),
+		   V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPIDX));
+	seq_printf(m, "MMU:        %s\n",
+		   (ident2 & V3D_HUB_IDENT2_WITH_MMU) ? "yes" : "no");
+	seq_printf(m, "TFU:        %s\n",
+		   (ident1 & V3D_HUB_IDENT1_WITH_TFU) ? "yes" : "no");
+	seq_printf(m, "TSY:        %s\n",
+		   (ident1 & V3D_HUB_IDENT1_WITH_TSY) ? "yes" : "no");
+	seq_printf(m, "MSO:        %s\n",
+		   (ident1 & V3D_HUB_IDENT1_WITH_MSO) ? "yes" : "no");
+	seq_printf(m, "L3C:        %s (%dkb)\n",
+		   (ident1 & V3D_HUB_IDENT1_WITH_L3C) ? "yes" : "no",
+		   V3D_GET_FIELD(ident2, V3D_HUB_IDENT2_L3C_NKB));
+
+	for (core = 0; core < cores; core++) {
+		u32 misccfg;
+		u32 nslc, ntmu, qups;
+
+		ident0 = V3D_CORE_READ(core, V3D_CTL_IDENT0);
+		ident1 = V3D_CORE_READ(core, V3D_CTL_IDENT1);
+		ident2 = V3D_CORE_READ(core, V3D_CTL_IDENT2);
+		misccfg = V3D_CORE_READ(core, V3D_CTL_MISCCFG);
+
+		nslc = V3D_GET_FIELD(ident1, V3D_IDENT1_NSLC);
+		ntmu = V3D_GET_FIELD(ident1, V3D_IDENT1_NTMU);
+		qups = V3D_GET_FIELD(ident1, V3D_IDENT1_QUPS);
+
+		seq_printf(m, "Core %d:\n", core);
+		seq_printf(m, "  Revision:     %d.%d\n",
+			   V3D_GET_FIELD(ident0, V3D_IDENT0_VER),
+			   V3D_GET_FIELD(ident1, V3D_IDENT1_REV));
+		seq_printf(m, "  Slices:       %d\n", nslc);
+		seq_printf(m, "  TMUs:         %d\n", nslc * ntmu);
+		seq_printf(m, "  QPUs:         %d\n", nslc * qups);
+		seq_printf(m, "  Semaphores:   %d\n",
+			   V3D_GET_FIELD(ident1, V3D_IDENT1_NSEM));
+		seq_printf(m, "  BCG int:      %d\n",
+			   (ident2 & V3D_IDENT2_BCG_INT) != 0);
+		seq_printf(m, "  Override TMU: %d\n",
+			   (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0);
+	}
+
+	pm_runtime_mark_last_busy(v3d->dev);
+	pm_runtime_put_autosuspend(v3d->dev);
+
+	return 0;
+}
+
+static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+
+	mutex_lock(&v3d->bo_lock);
+	seq_printf(m, "allocated bos:          %d\n",
+		   v3d->bo_stats.num_allocated);
+	seq_printf(m, "allocated bo size (kb): %ld\n",
+		   (long)v3d->bo_stats.pages_allocated << (PAGE_SHIFT - 10));
+	mutex_unlock(&v3d->bo_lock);
+
+	return 0;
+}
+
+static const struct drm_info_list v3d_debugfs_list[] = {
+	{"v3d_ident", v3d_v3d_debugfs_ident, 0},
+	{"v3d_regs", v3d_v3d_debugfs_regs, 0},
+	{"bo_stats", v3d_debugfs_bo_stats, 0},
+};
+
+int
+v3d_debugfs_init(struct drm_minor *minor)
+{
+	return drm_debugfs_create_files(v3d_debugfs_list,
+					ARRAY_SIZE(v3d_debugfs_list),
+					minor->debugfs_root, minor);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
new file mode 100644
index 0000000..cdb5820
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2014-2018 Broadcom */
+
+/**
+ * DOC: Broadcom V3D Graphics Driver
+ *
+ * This driver supports the Broadcom V3D 3.3 and 4.1 OpenGL ES GPUs.
+ * For V3D 2.x support, see the VC4 driver.
+ *
+ * Currently only single-core rendering using the binner and renderer
+ * is supported.  The TFU (texture formatting unit) and V3D 4.x's CSD
+ * (compute shader dispatch) are not yet supported.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fb_helper.h>
+
+#include "uapi/drm/v3d_drm.h"
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define DRIVER_NAME "v3d"
+#define DRIVER_DESC "Broadcom V3D graphics"
+#define DRIVER_DATE "20180419"
+#define DRIVER_MAJOR 1
+#define DRIVER_MINOR 0
+#define DRIVER_PATCHLEVEL 0
+
+#ifdef CONFIG_PM
+static int v3d_runtime_suspend(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct v3d_dev *v3d = to_v3d_dev(drm);
+
+	v3d_irq_disable(v3d);
+
+	clk_disable_unprepare(v3d->clk);
+
+	return 0;
+}
+
+static int v3d_runtime_resume(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct v3d_dev *v3d = to_v3d_dev(drm);
+	int ret;
+
+	ret = clk_prepare_enable(v3d->clk);
+	if (ret != 0)
+		return ret;
+
+	/* XXX: VPM base */
+
+	v3d_mmu_set_page_table(v3d);
+	v3d_irq_enable(v3d);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops v3d_v3d_pm_ops = {
+	SET_RUNTIME_PM_OPS(v3d_runtime_suspend, v3d_runtime_resume, NULL)
+};
+
+static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct drm_v3d_get_param *args = data;
+	int ret;
+	static const u32 reg_map[] = {
+		[DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_UIFCFG,
+		[DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_IDENT1,
+		[DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_IDENT2,
+		[DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_IDENT3,
+		[DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_IDENT0,
+		[DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_IDENT1,
+		[DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_IDENT2,
+	};
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	/* Note that DRM_V3D_PARAM_V3D_CORE0_IDENT0 is 0, so we need
+	 * to explicitly allow it in the "the register in our
+	 * parameter map" check.
+	 */
+	if (args->param < ARRAY_SIZE(reg_map) &&
+	    (reg_map[args->param] ||
+	     args->param == DRM_V3D_PARAM_V3D_CORE0_IDENT0)) {
+		u32 offset = reg_map[args->param];
+
+		if (args->value != 0)
+			return -EINVAL;
+
+		ret = pm_runtime_get_sync(v3d->dev);
+		if (args->param >= DRM_V3D_PARAM_V3D_CORE0_IDENT0 &&
+		    args->param <= DRM_V3D_PARAM_V3D_CORE0_IDENT2) {
+			args->value = V3D_CORE_READ(0, offset);
+		} else {
+			args->value = V3D_READ(offset);
+		}
+		pm_runtime_mark_last_busy(v3d->dev);
+		pm_runtime_put_autosuspend(v3d->dev);
+		return 0;
+	}
+
+	/* Any params that aren't just register reads would go here. */
+
+	DRM_DEBUG("Unknown parameter %d\n", args->param);
+	return -EINVAL;
+}
+
+static int
+v3d_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_file_priv *v3d_priv;
+	int i;
+
+	v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL);
+	if (!v3d_priv)
+		return -ENOMEM;
+
+	v3d_priv->v3d = v3d;
+
+	for (i = 0; i < V3D_MAX_QUEUES; i++) {
+		drm_sched_entity_init(&v3d->queue[i].sched,
+				      &v3d_priv->sched_entity[i],
+				      &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL],
+				      NULL);
+	}
+
+	file->driver_priv = v3d_priv;
+
+	return 0;
+}
+
+static void
+v3d_postclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_file_priv *v3d_priv = file->driver_priv;
+	enum v3d_queue q;
+
+	for (q = 0; q < V3D_MAX_QUEUES; q++) {
+		drm_sched_entity_fini(&v3d->queue[q].sched,
+				      &v3d_priv->sched_entity[q]);
+	}
+
+	kfree(v3d_priv);
+}
+
+static const struct file_operations v3d_drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = v3d_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+	.compat_ioctl = drm_compat_ioctl,
+	.llseek = noop_llseek,
+};
+
+/* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP
+ * protection between clients.  Note that render nodes would be be
+ * able to submit CLs that could access BOs from clients authenticated
+ * with the master node.
+ */
+static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(V3D_WAIT_BO, v3d_wait_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(V3D_CREATE_BO, v3d_create_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
+};
+
+static const struct vm_operations_struct v3d_vm_ops = {
+	.fault = v3d_gem_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+static struct drm_driver v3d_drm_driver = {
+	.driver_features = (DRIVER_GEM |
+			    DRIVER_RENDER |
+			    DRIVER_PRIME |
+			    DRIVER_SYNCOBJ),
+
+	.open = v3d_open,
+	.postclose = v3d_postclose,
+
+#if defined(CONFIG_DEBUG_FS)
+	.debugfs_init = v3d_debugfs_init,
+#endif
+
+	.gem_free_object_unlocked = v3d_free_object,
+	.gem_vm_ops = &v3d_vm_ops,
+
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_import = drm_gem_prime_import,
+	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_res_obj = v3d_prime_res_obj,
+	.gem_prime_get_sg_table	= v3d_prime_get_sg_table,
+	.gem_prime_import_sg_table = v3d_prime_import_sg_table,
+	.gem_prime_mmap = v3d_prime_mmap,
+
+	.ioctls = v3d_drm_ioctls,
+	.num_ioctls = ARRAY_SIZE(v3d_drm_ioctls),
+	.fops = &v3d_drm_fops,
+
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static const struct of_device_id v3d_of_match[] = {
+	{ .compatible = "brcm,7268-v3d" },
+	{ .compatible = "brcm,7278-v3d" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, v3d_of_match);
+
+static int
+map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name)
+{
+	struct resource *res =
+		platform_get_resource_byname(v3d->pdev, IORESOURCE_MEM, name);
+
+	*regs = devm_ioremap_resource(v3d->dev, res);
+	return PTR_ERR_OR_ZERO(*regs);
+}
+
+static int v3d_platform_drm_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct drm_device *drm;
+	struct v3d_dev *v3d;
+	int ret;
+	u32 ident1;
+
+	dev->coherent_dma_mask = DMA_BIT_MASK(36);
+
+	v3d = kzalloc(sizeof(*v3d), GFP_KERNEL);
+	if (!v3d)
+		return -ENOMEM;
+	v3d->dev = dev;
+	v3d->pdev = pdev;
+	drm = &v3d->drm;
+
+	ret = map_regs(v3d, &v3d->bridge_regs, "bridge");
+	if (ret)
+		goto dev_free;
+
+	ret = map_regs(v3d, &v3d->hub_regs, "hub");
+	if (ret)
+		goto dev_free;
+
+	ret = map_regs(v3d, &v3d->core_regs[0], "core0");
+	if (ret)
+		goto dev_free;
+
+	ident1 = V3D_READ(V3D_HUB_IDENT1);
+	v3d->ver = (V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER) * 10 +
+		    V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV));
+	v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES);
+	WARN_ON(v3d->cores > 1); /* multicore not yet implemented */
+
+	if (v3d->ver < 41) {
+		ret = map_regs(v3d, &v3d->gca_regs, "gca");
+		if (ret)
+			goto dev_free;
+	}
+
+	v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr,
+					GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
+	if (!v3d->mmu_scratch) {
+		dev_err(dev, "Failed to allocate MMU scratch page\n");
+		ret = -ENOMEM;
+		goto dev_free;
+	}
+
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 50);
+	pm_runtime_enable(dev);
+
+	ret = drm_dev_init(&v3d->drm, &v3d_drm_driver, dev);
+	if (ret)
+		goto dma_free;
+
+	platform_set_drvdata(pdev, drm);
+	drm->dev_private = v3d;
+
+	ret = v3d_gem_init(drm);
+	if (ret)
+		goto dev_destroy;
+
+	v3d_irq_init(v3d);
+
+	ret = drm_dev_register(drm, 0);
+	if (ret)
+		goto gem_destroy;
+
+	return 0;
+
+gem_destroy:
+	v3d_gem_destroy(drm);
+dev_destroy:
+	drm_dev_put(drm);
+dma_free:
+	dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr);
+dev_free:
+	kfree(v3d);
+	return ret;
+}
+
+static int v3d_platform_drm_remove(struct platform_device *pdev)
+{
+	struct drm_device *drm = platform_get_drvdata(pdev);
+	struct v3d_dev *v3d = to_v3d_dev(drm);
+
+	drm_dev_unregister(drm);
+
+	v3d_gem_destroy(drm);
+
+	drm_dev_put(drm);
+
+	dma_free_wc(v3d->dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr);
+
+	return 0;
+}
+
+static struct platform_driver v3d_platform_driver = {
+	.probe		= v3d_platform_drm_probe,
+	.remove		= v3d_platform_drm_remove,
+	.driver		= {
+		.name	= "v3d",
+		.of_match_table = v3d_of_match,
+	},
+};
+
+static int __init v3d_drm_register(void)
+{
+	return platform_driver_register(&v3d_platform_driver);
+}
+
+static void __exit v3d_drm_unregister(void)
+{
+	platform_driver_unregister(&v3d_platform_driver);
+}
+
+module_init(v3d_drm_register);
+module_exit(v3d_drm_unregister);
+
+MODULE_ALIAS("platform:v3d-drm");
+MODULE_DESCRIPTION("Broadcom V3D DRM Driver");
+MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
new file mode 100644
index 0000000..a043ac3
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2015-2018 Broadcom */
+
+#include <linux/reservation.h>
+#include <drm/drmP.h>
+#include <drm/drm_encoder.h>
+#include <drm/drm_gem.h>
+#include <drm/gpu_scheduler.h>
+
+#define GMP_GRANULARITY (128 * 1024)
+
+/* Enum for each of the V3D queues.  We maintain various queue
+ * tracking as an array because at some point we'll want to support
+ * the TFU (texture formatting unit) as another queue.
+ */
+enum v3d_queue {
+	V3D_BIN,
+	V3D_RENDER,
+};
+
+#define V3D_MAX_QUEUES (V3D_RENDER + 1)
+
+struct v3d_queue_state {
+	struct drm_gpu_scheduler sched;
+
+	u64 fence_context;
+	u64 emit_seqno;
+	u64 finished_seqno;
+};
+
+struct v3d_dev {
+	struct drm_device drm;
+
+	/* Short representation (e.g. 33, 41) of the V3D tech version
+	 * and revision.
+	 */
+	int ver;
+
+	struct device *dev;
+	struct platform_device *pdev;
+	void __iomem *hub_regs;
+	void __iomem *core_regs[3];
+	void __iomem *bridge_regs;
+	void __iomem *gca_regs;
+	struct clk *clk;
+
+	/* Virtual and DMA addresses of the single shared page table. */
+	volatile u32 *pt;
+	dma_addr_t pt_paddr;
+
+	/* Virtual and DMA addresses of the MMU's scratch page.  When
+	 * a read or write is invalid in the MMU, it will be
+	 * redirected here.
+	 */
+	void *mmu_scratch;
+	dma_addr_t mmu_scratch_paddr;
+
+	/* Number of V3D cores. */
+	u32 cores;
+
+	/* Allocator managing the address space.  All units are in
+	 * number of pages.
+	 */
+	struct drm_mm mm;
+	spinlock_t mm_lock;
+
+	struct work_struct overflow_mem_work;
+
+	struct v3d_exec_info *bin_job;
+	struct v3d_exec_info *render_job;
+
+	struct v3d_queue_state queue[V3D_MAX_QUEUES];
+
+	/* Spinlock used to synchronize the overflow memory
+	 * management against bin job submission.
+	 */
+	spinlock_t job_lock;
+
+	/* Protects bo_stats */
+	struct mutex bo_lock;
+
+	/* Lock taken when resetting the GPU, to keep multiple
+	 * processes from trying to park the scheduler threads and
+	 * reset at once.
+	 */
+	struct mutex reset_lock;
+
+	struct {
+		u32 num_allocated;
+		u32 pages_allocated;
+	} bo_stats;
+};
+
+static inline struct v3d_dev *
+to_v3d_dev(struct drm_device *dev)
+{
+	return (struct v3d_dev *)dev->dev_private;
+}
+
+/* The per-fd struct, which tracks the MMU mappings. */
+struct v3d_file_priv {
+	struct v3d_dev *v3d;
+
+	struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
+};
+
+/* Tracks a mapping of a BO into a per-fd address space */
+struct v3d_vma {
+	struct v3d_page_table *pt;
+	struct list_head list; /* entry in v3d_bo.vmas */
+};
+
+struct v3d_bo {
+	struct drm_gem_object base;
+
+	struct mutex lock;
+
+	struct drm_mm_node node;
+
+	u32 pages_refcount;
+	struct page **pages;
+	struct sg_table *sgt;
+	void *vaddr;
+
+	struct list_head vmas;    /* list of v3d_vma */
+
+	/* List entry for the BO's position in
+	 * v3d_exec_info->unref_list
+	 */
+	struct list_head unref_head;
+
+	/* normally (resv == &_resv) except for imported bo's */
+	struct reservation_object *resv;
+	struct reservation_object _resv;
+};
+
+static inline struct v3d_bo *
+to_v3d_bo(struct drm_gem_object *bo)
+{
+	return (struct v3d_bo *)bo;
+}
+
+struct v3d_fence {
+	struct dma_fence base;
+	struct drm_device *dev;
+	/* v3d seqno for signaled() test */
+	u64 seqno;
+	enum v3d_queue queue;
+};
+
+static inline struct v3d_fence *
+to_v3d_fence(struct dma_fence *fence)
+{
+	return (struct v3d_fence *)fence;
+}
+
+#define V3D_READ(offset) readl(v3d->hub_regs + offset)
+#define V3D_WRITE(offset, val) writel(val, v3d->hub_regs + offset)
+
+#define V3D_BRIDGE_READ(offset) readl(v3d->bridge_regs + offset)
+#define V3D_BRIDGE_WRITE(offset, val) writel(val, v3d->bridge_regs + offset)
+
+#define V3D_GCA_READ(offset) readl(v3d->gca_regs + offset)
+#define V3D_GCA_WRITE(offset, val) writel(val, v3d->gca_regs + offset)
+
+#define V3D_CORE_READ(core, offset) readl(v3d->core_regs[core] + offset)
+#define V3D_CORE_WRITE(core, offset, val) writel(val, v3d->core_regs[core] + offset)
+
+struct v3d_job {
+	struct drm_sched_job base;
+
+	struct v3d_exec_info *exec;
+
+	/* An optional fence userspace can pass in for the job to depend on. */
+	struct dma_fence *in_fence;
+
+	/* v3d fence to be signaled by IRQ handler when the job is complete. */
+	struct dma_fence *done_fence;
+
+	/* GPU virtual addresses of the start/end of the CL job. */
+	u32 start, end;
+};
+
+struct v3d_exec_info {
+	struct v3d_dev *v3d;
+
+	struct v3d_job bin, render;
+
+	/* Fence for when the scheduler considers the binner to be
+	 * done, for render to depend on.
+	 */
+	struct dma_fence *bin_done_fence;
+
+	struct kref refcount;
+
+	/* This is the array of BOs that were looked up at the start of exec. */
+	struct v3d_bo **bo;
+	u32 bo_count;
+
+	/* List of overflow BOs used in the job that need to be
+	 * released once the job is complete.
+	 */
+	struct list_head unref_list;
+
+	/* Submitted tile memory allocation start/size, tile state. */
+	u32 qma, qms, qts;
+};
+
+/**
+ * _wait_for - magic (register) wait macro
+ *
+ * Does the right thing for modeset paths when run under kdgb or similar atomic
+ * contexts. Note that it's important that we check the condition again after
+ * having timed out, since the timeout could be due to preemption or similar and
+ * we've never had a chance to check the condition before the timeout.
+ */
+#define wait_for(COND, MS) ({ \
+	unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1;	\
+	int ret__ = 0;							\
+	while (!(COND)) {						\
+		if (time_after(jiffies, timeout__)) {			\
+			if (!(COND))					\
+				ret__ = -ETIMEDOUT;			\
+			break;						\
+		}							\
+		msleep(1);					\
+	}								\
+	ret__;								\
+})
+
+static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
+{
+	/* nsecs_to_jiffies64() does not guard against overflow */
+	if (NSEC_PER_SEC % HZ &&
+	    div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
+		return MAX_JIFFY_OFFSET;
+
+	return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
+}
+
+/* v3d_bo.c */
+void v3d_free_object(struct drm_gem_object *gem_obj);
+struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv,
+			     size_t size);
+int v3d_create_bo_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv);
+int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file_priv);
+int v3d_gem_fault(struct vm_fault *vmf);
+int v3d_mmap(struct file *filp, struct vm_area_struct *vma);
+struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj);
+int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+struct sg_table *v3d_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev,
+						 struct dma_buf_attachment *attach,
+						 struct sg_table *sgt);
+
+/* v3d_debugfs.c */
+int v3d_debugfs_init(struct drm_minor *minor);
+
+/* v3d_fence.c */
+extern const struct dma_fence_ops v3d_fence_ops;
+struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue);
+
+/* v3d_gem.c */
+int v3d_gem_init(struct drm_device *dev);
+void v3d_gem_destroy(struct drm_device *dev);
+int v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv);
+void v3d_exec_put(struct v3d_exec_info *exec);
+void v3d_reset(struct v3d_dev *v3d);
+void v3d_invalidate_caches(struct v3d_dev *v3d);
+void v3d_flush_caches(struct v3d_dev *v3d);
+
+/* v3d_irq.c */
+void v3d_irq_init(struct v3d_dev *v3d);
+void v3d_irq_enable(struct v3d_dev *v3d);
+void v3d_irq_disable(struct v3d_dev *v3d);
+void v3d_irq_reset(struct v3d_dev *v3d);
+
+/* v3d_mmu.c */
+int v3d_mmu_get_offset(struct drm_file *file_priv, struct v3d_bo *bo,
+		       u32 *offset);
+int v3d_mmu_set_page_table(struct v3d_dev *v3d);
+void v3d_mmu_insert_ptes(struct v3d_bo *bo);
+void v3d_mmu_remove_ptes(struct v3d_bo *bo);
+
+/* v3d_sched.c */
+int v3d_sched_init(struct v3d_dev *v3d);
+void v3d_sched_fini(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
new file mode 100644
index 0000000..087d49c
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_fence.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2017-2018 Broadcom */
+
+#include "v3d_drv.h"
+
+struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue)
+{
+	struct v3d_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return ERR_PTR(-ENOMEM);
+
+	fence->dev = &v3d->drm;
+	fence->queue = queue;
+	fence->seqno = ++v3d->queue[queue].emit_seqno;
+	dma_fence_init(&fence->base, &v3d_fence_ops, &v3d->job_lock,
+		       v3d->queue[queue].fence_context, fence->seqno);
+
+	return &fence->base;
+}
+
+static const char *v3d_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "v3d";
+}
+
+static const char *v3d_fence_get_timeline_name(struct dma_fence *fence)
+{
+	struct v3d_fence *f = to_v3d_fence(fence);
+
+	if (f->queue == V3D_BIN)
+		return "v3d-bin";
+	else
+		return "v3d-render";
+}
+
+static bool v3d_fence_enable_signaling(struct dma_fence *fence)
+{
+	return true;
+}
+
+static bool v3d_fence_signaled(struct dma_fence *fence)
+{
+	struct v3d_fence *f = to_v3d_fence(fence);
+	struct v3d_dev *v3d = to_v3d_dev(f->dev);
+
+	return v3d->queue[f->queue].finished_seqno >= f->seqno;
+}
+
+const struct dma_fence_ops v3d_fence_ops = {
+	.get_driver_name = v3d_fence_get_driver_name,
+	.get_timeline_name = v3d_fence_get_timeline_name,
+	.enable_signaling = v3d_fence_enable_signaling,
+	.signaled = v3d_fence_signaled,
+	.wait = dma_fence_default_wait,
+	.release = dma_fence_free,
+};
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
new file mode 100644
index 0000000..b513f91
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -0,0 +1,668 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2014-2018 Broadcom */
+
+#include <drm/drmP.h>
+#include <drm/drm_syncobj.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/sched/signal.h>
+
+#include "uapi/drm/v3d_drm.h"
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+#include "v3d_trace.h"
+
+static void
+v3d_init_core(struct v3d_dev *v3d, int core)
+{
+	/* Set OVRTMUOUT, which means that the texture sampler uniform
+	 * configuration's tmu output type field is used, instead of
+	 * using the hardware default behavior based on the texture
+	 * type.  If you want the default behavior, you can still put
+	 * "2" in the indirect texture state's output_type field.
+	 */
+	V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT);
+
+	/* Whenever we flush the L2T cache, we always want to flush
+	 * the whole thing.
+	 */
+	V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0);
+	V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0);
+}
+
+/* Sets invariant state for the HW. */
+static void
+v3d_init_hw_state(struct v3d_dev *v3d)
+{
+	v3d_init_core(v3d, 0);
+}
+
+static void
+v3d_idle_axi(struct v3d_dev *v3d, int core)
+{
+	V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ);
+
+	if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) &
+		      (V3D_GMP_STATUS_RD_COUNT_MASK |
+		       V3D_GMP_STATUS_WR_COUNT_MASK |
+		       V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) {
+		DRM_ERROR("Failed to wait for safe GMP shutdown\n");
+	}
+}
+
+static void
+v3d_idle_gca(struct v3d_dev *v3d)
+{
+	if (v3d->ver >= 41)
+		return;
+
+	V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN);
+
+	if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) &
+		      V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) ==
+		     V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) {
+		DRM_ERROR("Failed to wait for safe GCA shutdown\n");
+	}
+}
+
+static void
+v3d_reset_v3d(struct v3d_dev *v3d)
+{
+	int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION);
+
+	if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) {
+		V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0,
+				 V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT);
+		V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0);
+
+		/* GFXH-1383: The SW_INIT may cause a stray write to address 0
+		 * of the unit, so reset it to its power-on value here.
+		 */
+		V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK);
+	} else {
+		WARN_ON_ONCE(V3D_GET_FIELD(version,
+					   V3D_TOP_GR_BRIDGE_MAJOR) != 7);
+		V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1,
+				 V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT);
+		V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0);
+	}
+
+	v3d_init_hw_state(v3d);
+}
+
+void
+v3d_reset(struct v3d_dev *v3d)
+{
+	struct drm_device *dev = &v3d->drm;
+
+	DRM_ERROR("Resetting GPU.\n");
+	trace_v3d_reset_begin(dev);
+
+	/* XXX: only needed for safe powerdown, not reset. */
+	if (false)
+		v3d_idle_axi(v3d, 0);
+
+	v3d_idle_gca(v3d);
+	v3d_reset_v3d(v3d);
+
+	v3d_mmu_set_page_table(v3d);
+	v3d_irq_reset(v3d);
+
+	trace_v3d_reset_end(dev);
+}
+
+static void
+v3d_flush_l3(struct v3d_dev *v3d)
+{
+	if (v3d->ver < 41) {
+		u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL);
+
+		V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
+			      gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH);
+
+		if (v3d->ver < 33) {
+			V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
+				      gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH);
+		}
+	}
+}
+
+/* Invalidates the (read-only) L2 cache. */
+static void
+v3d_invalidate_l2(struct v3d_dev *v3d, int core)
+{
+	V3D_CORE_WRITE(core, V3D_CTL_L2CACTL,
+		       V3D_L2CACTL_L2CCLR |
+		       V3D_L2CACTL_L2CENA);
+}
+
+static void
+v3d_invalidate_l1td(struct v3d_dev *v3d, int core)
+{
+	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
+	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+		       V3D_L2TCACTL_L2TFLS), 100)) {
+		DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
+	}
+}
+
+/* Invalidates texture L2 cachelines */
+static void
+v3d_flush_l2t(struct v3d_dev *v3d, int core)
+{
+	v3d_invalidate_l1td(v3d, core);
+
+	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
+		       V3D_L2TCACTL_L2TFLS |
+		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
+	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+		       V3D_L2TCACTL_L2TFLS), 100)) {
+		DRM_ERROR("Timeout waiting for L2T flush\n");
+	}
+}
+
+/* Invalidates the slice caches.  These are read-only caches. */
+static void
+v3d_invalidate_slices(struct v3d_dev *v3d, int core)
+{
+	V3D_CORE_WRITE(core, V3D_CTL_SLCACTL,
+		       V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) |
+		       V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) |
+		       V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
+		       V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC));
+}
+
+/* Invalidates texture L2 cachelines */
+static void
+v3d_invalidate_l2t(struct v3d_dev *v3d, int core)
+{
+	V3D_CORE_WRITE(core,
+		       V3D_CTL_L2TCACTL,
+		       V3D_L2TCACTL_L2TFLS |
+		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAR, V3D_L2TCACTL_FLM));
+	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+		       V3D_L2TCACTL_L2TFLS), 100)) {
+		DRM_ERROR("Timeout waiting for L2T invalidate\n");
+	}
+}
+
+void
+v3d_invalidate_caches(struct v3d_dev *v3d)
+{
+	v3d_flush_l3(v3d);
+
+	v3d_invalidate_l2(v3d, 0);
+	v3d_invalidate_slices(v3d, 0);
+	v3d_flush_l2t(v3d, 0);
+}
+
+void
+v3d_flush_caches(struct v3d_dev *v3d)
+{
+	v3d_invalidate_l1td(v3d, 0);
+	v3d_invalidate_l2t(v3d, 0);
+}
+
+static void
+v3d_attach_object_fences(struct v3d_exec_info *exec)
+{
+	struct dma_fence *out_fence = &exec->render.base.s_fence->finished;
+	struct v3d_bo *bo;
+	int i;
+
+	for (i = 0; i < exec->bo_count; i++) {
+		bo = to_v3d_bo(&exec->bo[i]->base);
+
+		/* XXX: Use shared fences for read-only objects. */
+		reservation_object_add_excl_fence(bo->resv, out_fence);
+	}
+}
+
+static void
+v3d_unlock_bo_reservations(struct drm_device *dev,
+			   struct v3d_exec_info *exec,
+			   struct ww_acquire_ctx *acquire_ctx)
+{
+	int i;
+
+	for (i = 0; i < exec->bo_count; i++) {
+		struct v3d_bo *bo = to_v3d_bo(&exec->bo[i]->base);
+
+		ww_mutex_unlock(&bo->resv->lock);
+	}
+
+	ww_acquire_fini(acquire_ctx);
+}
+
+/* Takes the reservation lock on all the BOs being referenced, so that
+ * at queue submit time we can update the reservations.
+ *
+ * We don't lock the RCL the tile alloc/state BOs, or overflow memory
+ * (all of which are on exec->unref_list).  They're entirely private
+ * to v3d, so we don't attach dma-buf fences to them.
+ */
+static int
+v3d_lock_bo_reservations(struct drm_device *dev,
+			 struct v3d_exec_info *exec,
+			 struct ww_acquire_ctx *acquire_ctx)
+{
+	int contended_lock = -1;
+	int i, ret;
+	struct v3d_bo *bo;
+
+	ww_acquire_init(acquire_ctx, &reservation_ww_class);
+
+retry:
+	if (contended_lock != -1) {
+		bo = to_v3d_bo(&exec->bo[contended_lock]->base);
+		ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
+						       acquire_ctx);
+		if (ret) {
+			ww_acquire_done(acquire_ctx);
+			return ret;
+		}
+	}
+
+	for (i = 0; i < exec->bo_count; i++) {
+		if (i == contended_lock)
+			continue;
+
+		bo = to_v3d_bo(&exec->bo[i]->base);
+
+		ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
+		if (ret) {
+			int j;
+
+			for (j = 0; j < i; j++) {
+				bo = to_v3d_bo(&exec->bo[j]->base);
+				ww_mutex_unlock(&bo->resv->lock);
+			}
+
+			if (contended_lock != -1 && contended_lock >= i) {
+				bo = to_v3d_bo(&exec->bo[contended_lock]->base);
+
+				ww_mutex_unlock(&bo->resv->lock);
+			}
+
+			if (ret == -EDEADLK) {
+				contended_lock = i;
+				goto retry;
+			}
+
+			ww_acquire_done(acquire_ctx);
+			return ret;
+		}
+	}
+
+	ww_acquire_done(acquire_ctx);
+
+	/* Reserve space for our shared (read-only) fence references,
+	 * before we commit the CL to the hardware.
+	 */
+	for (i = 0; i < exec->bo_count; i++) {
+		bo = to_v3d_bo(&exec->bo[i]->base);
+
+		ret = reservation_object_reserve_shared(bo->resv);
+		if (ret) {
+			v3d_unlock_bo_reservations(dev, exec, acquire_ctx);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
+ * referenced by the job.
+ * @dev: DRM device
+ * @file_priv: DRM file for this fd
+ * @exec: V3D job being set up
+ *
+ * The command validator needs to reference BOs by their index within
+ * the submitted job's BO list.  This does the validation of the job's
+ * BO list and reference counting for the lifetime of the job.
+ *
+ * Note that this function doesn't need to unreference the BOs on
+ * failure, because that will happen at v3d_exec_cleanup() time.
+ */
+static int
+v3d_cl_lookup_bos(struct drm_device *dev,
+		  struct drm_file *file_priv,
+		  struct drm_v3d_submit_cl *args,
+		  struct v3d_exec_info *exec)
+{
+	u32 *handles;
+	int ret = 0;
+	int i;
+
+	exec->bo_count = args->bo_handle_count;
+
+	if (!exec->bo_count) {
+		/* See comment on bo_index for why we have to check
+		 * this.
+		 */
+		DRM_DEBUG("Rendering requires BOs\n");
+		return -EINVAL;
+	}
+
+	exec->bo = kvmalloc_array(exec->bo_count,
+				  sizeof(struct drm_gem_cma_object *),
+				  GFP_KERNEL | __GFP_ZERO);
+	if (!exec->bo) {
+		DRM_DEBUG("Failed to allocate validated BO pointers\n");
+		return -ENOMEM;
+	}
+
+	handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL);
+	if (!handles) {
+		ret = -ENOMEM;
+		DRM_DEBUG("Failed to allocate incoming GEM handles\n");
+		goto fail;
+	}
+
+	if (copy_from_user(handles,
+			   (void __user *)(uintptr_t)args->bo_handles,
+			   exec->bo_count * sizeof(u32))) {
+		ret = -EFAULT;
+		DRM_DEBUG("Failed to copy in GEM handles\n");
+		goto fail;
+	}
+
+	spin_lock(&file_priv->table_lock);
+	for (i = 0; i < exec->bo_count; i++) {
+		struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
+						     handles[i]);
+		if (!bo) {
+			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
+				  i, handles[i]);
+			ret = -ENOENT;
+			spin_unlock(&file_priv->table_lock);
+			goto fail;
+		}
+		drm_gem_object_get(bo);
+		exec->bo[i] = to_v3d_bo(bo);
+	}
+	spin_unlock(&file_priv->table_lock);
+
+fail:
+	kvfree(handles);
+	return ret;
+}
+
+static void
+v3d_exec_cleanup(struct kref *ref)
+{
+	struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info,
+						  refcount);
+	struct v3d_dev *v3d = exec->v3d;
+	unsigned int i;
+	struct v3d_bo *bo, *save;
+
+	dma_fence_put(exec->bin.in_fence);
+	dma_fence_put(exec->render.in_fence);
+
+	dma_fence_put(exec->bin.done_fence);
+	dma_fence_put(exec->render.done_fence);
+
+	dma_fence_put(exec->bin_done_fence);
+
+	for (i = 0; i < exec->bo_count; i++)
+		drm_gem_object_put_unlocked(&exec->bo[i]->base);
+	kvfree(exec->bo);
+
+	list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) {
+		drm_gem_object_put_unlocked(&bo->base);
+	}
+
+	pm_runtime_mark_last_busy(v3d->dev);
+	pm_runtime_put_autosuspend(v3d->dev);
+
+	kfree(exec);
+}
+
+void v3d_exec_put(struct v3d_exec_info *exec)
+{
+	kref_put(&exec->refcount, v3d_exec_cleanup);
+}
+
+int
+v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv)
+{
+	int ret;
+	struct drm_v3d_wait_bo *args = data;
+	struct drm_gem_object *gem_obj;
+	struct v3d_bo *bo;
+	ktime_t start = ktime_get();
+	u64 delta_ns;
+	unsigned long timeout_jiffies =
+		nsecs_to_jiffies_timeout(args->timeout_ns);
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+		return -EINVAL;
+	}
+	bo = to_v3d_bo(gem_obj);
+
+	ret = reservation_object_wait_timeout_rcu(bo->resv,
+						  true, true,
+						  timeout_jiffies);
+
+	if (ret == 0)
+		ret = -ETIME;
+	else if (ret > 0)
+		ret = 0;
+
+	/* Decrement the user's timeout, in case we got interrupted
+	 * such that the ioctl will be restarted.
+	 */
+	delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start));
+	if (delta_ns < args->timeout_ns)
+		args->timeout_ns -= delta_ns;
+	else
+		args->timeout_ns = 0;
+
+	/* Asked to wait beyond the jiffie/scheduler precision? */
+	if (ret == -ETIME && args->timeout_ns)
+		ret = -EAGAIN;
+
+	drm_gem_object_put_unlocked(gem_obj);
+
+	return ret;
+}
+
+/**
+ * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * This is the main entrypoint for userspace to submit a 3D frame to
+ * the GPU.  Userspace provides the binner command list (if
+ * applicable), and the kernel sets up the render command list to draw
+ * to the framebuffer described in the ioctl, using the command lists
+ * that the 3D engine's binner will produce.
+ */
+int
+v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+	struct drm_v3d_submit_cl *args = data;
+	struct v3d_exec_info *exec;
+	struct ww_acquire_ctx acquire_ctx;
+	struct drm_syncobj *sync_out;
+	int ret = 0;
+
+	if (args->pad != 0) {
+		DRM_INFO("pad must be zero: %d\n", args->pad);
+		return -EINVAL;
+	}
+
+	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
+	if (!exec)
+		return -ENOMEM;
+
+	ret = pm_runtime_get_sync(v3d->dev);
+	if (ret < 0) {
+		kfree(exec);
+		return ret;
+	}
+
+	kref_init(&exec->refcount);
+
+	ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
+				     &exec->bin.in_fence);
+	if (ret == -EINVAL)
+		goto fail;
+
+	ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
+				     &exec->render.in_fence);
+	if (ret == -EINVAL)
+		goto fail;
+
+	exec->qma = args->qma;
+	exec->qms = args->qms;
+	exec->qts = args->qts;
+	exec->bin.exec = exec;
+	exec->bin.start = args->bcl_start;
+	exec->bin.end = args->bcl_end;
+	exec->render.exec = exec;
+	exec->render.start = args->rcl_start;
+	exec->render.end = args->rcl_end;
+	exec->v3d = v3d;
+	INIT_LIST_HEAD(&exec->unref_list);
+
+	ret = v3d_cl_lookup_bos(dev, file_priv, args, exec);
+	if (ret)
+		goto fail;
+
+	ret = v3d_lock_bo_reservations(dev, exec, &acquire_ctx);
+	if (ret)
+		goto fail;
+
+	if (exec->bin.start != exec->bin.end) {
+		ret = drm_sched_job_init(&exec->bin.base,
+					 &v3d->queue[V3D_BIN].sched,
+					 &v3d_priv->sched_entity[V3D_BIN],
+					 v3d_priv);
+		if (ret)
+			goto fail_unreserve;
+
+		exec->bin_done_fence =
+			dma_fence_get(&exec->bin.base.s_fence->finished);
+
+		kref_get(&exec->refcount); /* put by scheduler job completion */
+		drm_sched_entity_push_job(&exec->bin.base,
+					  &v3d_priv->sched_entity[V3D_BIN]);
+	}
+
+	ret = drm_sched_job_init(&exec->render.base,
+				 &v3d->queue[V3D_RENDER].sched,
+				 &v3d_priv->sched_entity[V3D_RENDER],
+				 v3d_priv);
+	if (ret)
+		goto fail_unreserve;
+
+	kref_get(&exec->refcount); /* put by scheduler job completion */
+	drm_sched_entity_push_job(&exec->render.base,
+				  &v3d_priv->sched_entity[V3D_RENDER]);
+
+	v3d_attach_object_fences(exec);
+
+	v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
+
+	/* Update the return sync object for the */
+	sync_out = drm_syncobj_find(file_priv, args->out_sync);
+	if (sync_out) {
+		drm_syncobj_replace_fence(sync_out,
+					  &exec->render.base.s_fence->finished);
+		drm_syncobj_put(sync_out);
+	}
+
+	v3d_exec_put(exec);
+
+	return 0;
+
+fail_unreserve:
+	v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
+fail:
+	v3d_exec_put(exec);
+
+	return ret;
+}
+
+int
+v3d_gem_init(struct drm_device *dev)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	u32 pt_size = 4096 * 1024;
+	int ret, i;
+
+	for (i = 0; i < V3D_MAX_QUEUES; i++)
+		v3d->queue[i].fence_context = dma_fence_context_alloc(1);
+
+	spin_lock_init(&v3d->mm_lock);
+	spin_lock_init(&v3d->job_lock);
+	mutex_init(&v3d->bo_lock);
+	mutex_init(&v3d->reset_lock);
+
+	/* Note: We don't allocate address 0.  Various bits of HW
+	 * treat 0 as special, such as the occlusion query counters
+	 * where 0 means "disabled".
+	 */
+	drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1);
+
+	v3d->pt = dma_alloc_wc(v3d->dev, pt_size,
+			       &v3d->pt_paddr,
+			       GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
+	if (!v3d->pt) {
+		drm_mm_takedown(&v3d->mm);
+		dev_err(v3d->dev,
+			"Failed to allocate page tables. "
+			"Please ensure you have CMA enabled.\n");
+		return -ENOMEM;
+	}
+
+	v3d_init_hw_state(v3d);
+	v3d_mmu_set_page_table(v3d);
+
+	ret = v3d_sched_init(v3d);
+	if (ret) {
+		drm_mm_takedown(&v3d->mm);
+		dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt,
+				  v3d->pt_paddr);
+	}
+
+	return 0;
+}
+
+void
+v3d_gem_destroy(struct drm_device *dev)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	enum v3d_queue q;
+
+	v3d_sched_fini(v3d);
+
+	/* Waiting for exec to finish would need to be done before
+	 * unregistering V3D.
+	 */
+	for (q = 0; q < V3D_MAX_QUEUES; q++) {
+		WARN_ON(v3d->queue[q].emit_seqno !=
+			v3d->queue[q].finished_seqno);
+	}
+
+	drm_mm_takedown(&v3d->mm);
+
+	dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, v3d->pt_paddr);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
new file mode 100644
index 0000000..77e1fa0
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2014-2018 Broadcom */
+
+/**
+ * DOC: Interrupt management for the V3D engine
+ *
+ * When we take a binning or rendering flush done interrupt, we need
+ * to signal the fence for that job so that the scheduler can queue up
+ * the next one and unblock any waiters.
+ *
+ * When we take the binner out of memory interrupt, we need to
+ * allocate some new memory and pass it to the binner so that the
+ * current job can make progress.
+ */
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM |	\
+			     V3D_INT_FLDONE |	\
+			     V3D_INT_FRDONE |	\
+			     V3D_INT_GMPV))
+
+#define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV |	\
+			    V3D_HUB_INT_MMU_PTI |	\
+			    V3D_HUB_INT_MMU_CAP))
+
+static void
+v3d_overflow_mem_work(struct work_struct *work)
+{
+	struct v3d_dev *v3d =
+		container_of(work, struct v3d_dev, overflow_mem_work);
+	struct drm_device *dev = &v3d->drm;
+	struct v3d_bo *bo = v3d_bo_create(dev, NULL /* XXX: GMP */, 256 * 1024);
+	unsigned long irqflags;
+
+	if (IS_ERR(bo)) {
+		DRM_ERROR("Couldn't allocate binner overflow mem\n");
+		return;
+	}
+
+	/* We lost a race, and our work task came in after the bin job
+	 * completed and exited.  This can happen because the HW
+	 * signals OOM before it's fully OOM, so the binner might just
+	 * barely complete.
+	 *
+	 * If we lose the race and our work task comes in after a new
+	 * bin job got scheduled, that's fine.  We'll just give them
+	 * some binner pool anyway.
+	 */
+	spin_lock_irqsave(&v3d->job_lock, irqflags);
+	if (!v3d->bin_job) {
+		spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+		goto out;
+	}
+
+	drm_gem_object_get(&bo->base);
+	list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list);
+	spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+
+	V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT);
+	V3D_CORE_WRITE(0, V3D_PTB_BPOS, bo->base.size);
+
+out:
+	drm_gem_object_put_unlocked(&bo->base);
+}
+
+static irqreturn_t
+v3d_irq(int irq, void *arg)
+{
+	struct v3d_dev *v3d = arg;
+	u32 intsts;
+	irqreturn_t status = IRQ_NONE;
+
+	intsts = V3D_CORE_READ(0, V3D_CTL_INT_STS);
+
+	/* Acknowledge the interrupts we're handling here. */
+	V3D_CORE_WRITE(0, V3D_CTL_INT_CLR, intsts);
+
+	if (intsts & V3D_INT_OUTOMEM) {
+		/* Note that the OOM status is edge signaled, so the
+		 * interrupt won't happen again until the we actually
+		 * add more memory.
+		 */
+		schedule_work(&v3d->overflow_mem_work);
+		status = IRQ_HANDLED;
+	}
+
+	if (intsts & V3D_INT_FLDONE) {
+		v3d->queue[V3D_BIN].finished_seqno++;
+		dma_fence_signal(v3d->bin_job->bin.done_fence);
+		status = IRQ_HANDLED;
+	}
+
+	if (intsts & V3D_INT_FRDONE) {
+		v3d->queue[V3D_RENDER].finished_seqno++;
+		dma_fence_signal(v3d->render_job->render.done_fence);
+
+		status = IRQ_HANDLED;
+	}
+
+	/* We shouldn't be triggering these if we have GMP in
+	 * always-allowed mode.
+	 */
+	if (intsts & V3D_INT_GMPV)
+		dev_err(v3d->dev, "GMP violation\n");
+
+	return status;
+}
+
+static irqreturn_t
+v3d_hub_irq(int irq, void *arg)
+{
+	struct v3d_dev *v3d = arg;
+	u32 intsts;
+	irqreturn_t status = IRQ_NONE;
+
+	intsts = V3D_READ(V3D_HUB_INT_STS);
+
+	/* Acknowledge the interrupts we're handling here. */
+	V3D_WRITE(V3D_HUB_INT_CLR, intsts);
+
+	if (intsts & (V3D_HUB_INT_MMU_WRV |
+		      V3D_HUB_INT_MMU_PTI |
+		      V3D_HUB_INT_MMU_CAP)) {
+		u32 axi_id = V3D_READ(V3D_MMU_VIO_ID);
+		u64 vio_addr = (u64)V3D_READ(V3D_MMU_VIO_ADDR) << 8;
+
+		dev_err(v3d->dev, "MMU error from client %d at 0x%08llx%s%s%s\n",
+			axi_id, (long long)vio_addr,
+			((intsts & V3D_HUB_INT_MMU_WRV) ?
+			 ", write violation" : ""),
+			((intsts & V3D_HUB_INT_MMU_PTI) ?
+			 ", pte invalid" : ""),
+			((intsts & V3D_HUB_INT_MMU_CAP) ?
+			 ", cap exceeded" : ""));
+		status = IRQ_HANDLED;
+	}
+
+	return status;
+}
+
+void
+v3d_irq_init(struct v3d_dev *v3d)
+{
+	int ret, core;
+
+	INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work);
+
+	/* Clear any pending interrupts someone might have left around
+	 * for us.
+	 */
+	for (core = 0; core < v3d->cores; core++)
+		V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS);
+	V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS);
+
+	ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 0),
+			       v3d_hub_irq, IRQF_SHARED,
+			       "v3d_hub", v3d);
+	ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 1),
+			       v3d_irq, IRQF_SHARED,
+			       "v3d_core0", v3d);
+	if (ret)
+		dev_err(v3d->dev, "IRQ setup failed: %d\n", ret);
+
+	v3d_irq_enable(v3d);
+}
+
+void
+v3d_irq_enable(struct v3d_dev *v3d)
+{
+	int core;
+
+	/* Enable our set of interrupts, masking out any others. */
+	for (core = 0; core < v3d->cores; core++) {
+		V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS);
+		V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS);
+	}
+
+	V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS);
+	V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS);
+}
+
+void
+v3d_irq_disable(struct v3d_dev *v3d)
+{
+	int core;
+
+	/* Disable all interrupts. */
+	for (core = 0; core < v3d->cores; core++)
+		V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0);
+	V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0);
+
+	/* Clear any pending interrupts we might have left. */
+	for (core = 0; core < v3d->cores; core++)
+		V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS);
+	V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS);
+
+	cancel_work_sync(&v3d->overflow_mem_work);
+}
+
+/** Reinitializes interrupt registers when a GPU reset is performed. */
+void v3d_irq_reset(struct v3d_dev *v3d)
+{
+	v3d_irq_enable(v3d);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c
new file mode 100644
index 0000000..b00f97c
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_mmu.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2017-2018 Broadcom */
+
+/**
+ * DOC: Broadcom V3D MMU
+ *
+ * The V3D 3.x hardware (compared to VC4) now includes an MMU.  It has
+ * a single level of page tables for the V3D's 4GB address space to
+ * map to AXI bus addresses, thus it could need up to 4MB of
+ * physically contiguous memory to store the PTEs.
+ *
+ * Because the 4MB of contiguous memory for page tables is precious,
+ * and switching between them is expensive, we load all BOs into the
+ * same 4GB address space.
+ *
+ * To protect clients from each other, we should use the GMP to
+ * quickly mask out (at 128kb granularity) what pages are available to
+ * each client.  This is not yet implemented.
+ */
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define V3D_MMU_PAGE_SHIFT 12
+
+/* Note: All PTEs for the 1MB superpage must be filled with the
+ * superpage bit set.
+ */
+#define V3D_PTE_SUPERPAGE BIT(31)
+#define V3D_PTE_WRITEABLE BIT(29)
+#define V3D_PTE_VALID BIT(28)
+
+static int v3d_mmu_flush_all(struct v3d_dev *v3d)
+{
+	int ret;
+
+	/* Make sure that another flush isn't already running when we
+	 * start this one.
+	 */
+	ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
+			 V3D_MMU_CTL_TLB_CLEARING), 100);
+	if (ret)
+		dev_err(v3d->dev, "TLB clear wait idle pre-wait failed\n");
+
+	V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) |
+		  V3D_MMU_CTL_TLB_CLEAR);
+
+	V3D_WRITE(V3D_MMUC_CONTROL,
+		  V3D_MMUC_CONTROL_FLUSH |
+		  V3D_MMUC_CONTROL_ENABLE);
+
+	ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
+			 V3D_MMU_CTL_TLB_CLEARING), 100);
+	if (ret) {
+		dev_err(v3d->dev, "TLB clear wait idle failed\n");
+		return ret;
+	}
+
+	ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) &
+			 V3D_MMUC_CONTROL_FLUSHING), 100);
+	if (ret)
+		dev_err(v3d->dev, "MMUC flush wait idle failed\n");
+
+	return ret;
+}
+
+int v3d_mmu_set_page_table(struct v3d_dev *v3d)
+{
+	V3D_WRITE(V3D_MMU_PT_PA_BASE, v3d->pt_paddr >> V3D_MMU_PAGE_SHIFT);
+	V3D_WRITE(V3D_MMU_CTL,
+		  V3D_MMU_CTL_ENABLE |
+		  V3D_MMU_CTL_PT_INVALID |
+		  V3D_MMU_CTL_PT_INVALID_ABORT |
+		  V3D_MMU_CTL_WRITE_VIOLATION_ABORT |
+		  V3D_MMU_CTL_CAP_EXCEEDED_ABORT);
+	V3D_WRITE(V3D_MMU_ILLEGAL_ADDR,
+		  (v3d->mmu_scratch_paddr >> V3D_MMU_PAGE_SHIFT) |
+		  V3D_MMU_ILLEGAL_ADDR_ENABLE);
+	V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_ENABLE);
+
+	return v3d_mmu_flush_all(v3d);
+}
+
+void v3d_mmu_insert_ptes(struct v3d_bo *bo)
+{
+	struct v3d_dev *v3d = to_v3d_dev(bo->base.dev);
+	u32 page = bo->node.start;
+	u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID;
+	unsigned int count;
+	struct scatterlist *sgl;
+
+	for_each_sg(bo->sgt->sgl, sgl, bo->sgt->nents, count) {
+		u32 page_address = sg_dma_address(sgl) >> V3D_MMU_PAGE_SHIFT;
+		u32 pte = page_prot | page_address;
+		u32 i;
+
+		BUG_ON(page_address + (sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT) >=
+		       BIT(24));
+
+		for (i = 0; i < sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT; i++)
+			v3d->pt[page++] = pte + i;
+	}
+
+	WARN_ON_ONCE(page - bo->node.start !=
+		     bo->base.size >> V3D_MMU_PAGE_SHIFT);
+
+	if (v3d_mmu_flush_all(v3d))
+		dev_err(v3d->dev, "MMU flush timeout\n");
+}
+
+void v3d_mmu_remove_ptes(struct v3d_bo *bo)
+{
+	struct v3d_dev *v3d = to_v3d_dev(bo->base.dev);
+	u32 npages = bo->base.size >> V3D_MMU_PAGE_SHIFT;
+	u32 page;
+
+	for (page = bo->node.start; page < bo->node.start + npages; page++)
+		v3d->pt[page] = 0;
+
+	if (v3d_mmu_flush_all(v3d))
+		dev_err(v3d->dev, "MMU flush timeout\n");
+}
diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
new file mode 100644
index 0000000..fc13282
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_regs.h
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2017-2018 Broadcom */
+
+#ifndef V3D_REGS_H
+#define V3D_REGS_H
+
+#include <linux/bitops.h>
+
+#define V3D_MASK(high, low) ((u32)GENMASK(high, low))
+/* Using the GNU statement expression extension */
+#define V3D_SET_FIELD(value, field)					\
+	({								\
+		u32 fieldval = (value) << field##_SHIFT;		\
+		WARN_ON((fieldval & ~field##_MASK) != 0);		\
+		fieldval & field##_MASK;				\
+	 })
+
+#define V3D_GET_FIELD(word, field) (((word) & field##_MASK) >>		\
+				    field##_SHIFT)
+
+/* Hub registers for shared hardware between V3D cores. */
+
+#define V3D_HUB_AXICFG                                 0x00000
+# define V3D_HUB_AXICFG_MAX_LEN_MASK                   V3D_MASK(3, 0)
+# define V3D_HUB_AXICFG_MAX_LEN_SHIFT                  0
+#define V3D_HUB_UIFCFG                                 0x00004
+#define V3D_HUB_IDENT0                                 0x00008
+
+#define V3D_HUB_IDENT1                                 0x0000c
+# define V3D_HUB_IDENT1_WITH_MSO                       BIT(19)
+# define V3D_HUB_IDENT1_WITH_TSY                       BIT(18)
+# define V3D_HUB_IDENT1_WITH_TFU                       BIT(17)
+# define V3D_HUB_IDENT1_WITH_L3C                       BIT(16)
+# define V3D_HUB_IDENT1_NHOSTS_MASK                    V3D_MASK(15, 12)
+# define V3D_HUB_IDENT1_NHOSTS_SHIFT                   12
+# define V3D_HUB_IDENT1_NCORES_MASK                    V3D_MASK(11, 8)
+# define V3D_HUB_IDENT1_NCORES_SHIFT                   8
+# define V3D_HUB_IDENT1_REV_MASK                       V3D_MASK(7, 4)
+# define V3D_HUB_IDENT1_REV_SHIFT                      4
+# define V3D_HUB_IDENT1_TVER_MASK                      V3D_MASK(3, 0)
+# define V3D_HUB_IDENT1_TVER_SHIFT                     0
+
+#define V3D_HUB_IDENT2                                 0x00010
+# define V3D_HUB_IDENT2_WITH_MMU                       BIT(8)
+# define V3D_HUB_IDENT2_L3C_NKB_MASK                   V3D_MASK(7, 0)
+# define V3D_HUB_IDENT2_L3C_NKB_SHIFT                  0
+
+#define V3D_HUB_IDENT3                                 0x00014
+# define V3D_HUB_IDENT3_IPREV_MASK                     V3D_MASK(15, 8)
+# define V3D_HUB_IDENT3_IPREV_SHIFT                    8
+# define V3D_HUB_IDENT3_IPIDX_MASK                     V3D_MASK(7, 0)
+# define V3D_HUB_IDENT3_IPIDX_SHIFT                    0
+
+#define V3D_HUB_INT_STS                                0x00050
+#define V3D_HUB_INT_SET                                0x00054
+#define V3D_HUB_INT_CLR                                0x00058
+#define V3D_HUB_INT_MSK_STS                            0x0005c
+#define V3D_HUB_INT_MSK_SET                            0x00060
+#define V3D_HUB_INT_MSK_CLR                            0x00064
+# define V3D_HUB_INT_MMU_WRV                           BIT(5)
+# define V3D_HUB_INT_MMU_PTI                           BIT(4)
+# define V3D_HUB_INT_MMU_CAP                           BIT(3)
+# define V3D_HUB_INT_MSO                               BIT(2)
+# define V3D_HUB_INT_TFUC                              BIT(1)
+# define V3D_HUB_INT_TFUF                              BIT(0)
+
+#define V3D_GCA_CACHE_CTRL                             0x0000c
+# define V3D_GCA_CACHE_CTRL_FLUSH                      BIT(0)
+
+#define V3D_GCA_SAFE_SHUTDOWN                          0x000b0
+# define V3D_GCA_SAFE_SHUTDOWN_EN                      BIT(0)
+
+#define V3D_GCA_SAFE_SHUTDOWN_ACK                      0x000b4
+# define V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED               3
+
+# define V3D_TOP_GR_BRIDGE_REVISION                    0x00000
+# define V3D_TOP_GR_BRIDGE_MAJOR_MASK                  V3D_MASK(15, 8)
+# define V3D_TOP_GR_BRIDGE_MAJOR_SHIFT                 8
+# define V3D_TOP_GR_BRIDGE_MINOR_MASK                  V3D_MASK(7, 0)
+# define V3D_TOP_GR_BRIDGE_MINOR_SHIFT                 0
+
+/* 7268 reset reg */
+# define V3D_TOP_GR_BRIDGE_SW_INIT_0                   0x00008
+# define V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT BIT(0)
+/* 7278 reset reg */
+# define V3D_TOP_GR_BRIDGE_SW_INIT_1                   0x0000c
+# define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0)
+
+/* Per-MMU registers. */
+
+#define V3D_MMUC_CONTROL                               0x01000
+# define V3D_MMUC_CONTROL_CLEAR                        BIT(3)
+# define V3D_MMUC_CONTROL_FLUSHING                     BIT(2)
+# define V3D_MMUC_CONTROL_FLUSH                        BIT(1)
+# define V3D_MMUC_CONTROL_ENABLE                       BIT(0)
+
+#define V3D_MMU_CTL                                    0x01200
+# define V3D_MMU_CTL_CAP_EXCEEDED                      BIT(27)
+# define V3D_MMU_CTL_CAP_EXCEEDED_ABORT                BIT(26)
+# define V3D_MMU_CTL_CAP_EXCEEDED_INT                  BIT(25)
+# define V3D_MMU_CTL_CAP_EXCEEDED_EXCEPTION            BIT(24)
+# define V3D_MMU_CTL_PT_INVALID                        BIT(20)
+# define V3D_MMU_CTL_PT_INVALID_ABORT                  BIT(19)
+# define V3D_MMU_CTL_PT_INVALID_INT                    BIT(18)
+# define V3D_MMU_CTL_PT_INVALID_EXCEPTION              BIT(17)
+# define V3D_MMU_CTL_WRITE_VIOLATION                   BIT(16)
+# define V3D_MMU_CTL_WRITE_VIOLATION_ABORT             BIT(11)
+# define V3D_MMU_CTL_WRITE_VIOLATION_INT               BIT(10)
+# define V3D_MMU_CTL_WRITE_VIOLATION_EXCEPTION         BIT(9)
+# define V3D_MMU_CTL_TLB_CLEARING                      BIT(7)
+# define V3D_MMU_CTL_TLB_STATS_CLEAR                   BIT(3)
+# define V3D_MMU_CTL_TLB_CLEAR                         BIT(2)
+# define V3D_MMU_CTL_TLB_STATS_ENABLE                  BIT(1)
+# define V3D_MMU_CTL_ENABLE                            BIT(0)
+
+#define V3D_MMU_PT_PA_BASE                             0x01204
+#define V3D_MMU_HIT                                    0x01208
+#define V3D_MMU_MISSES                                 0x0120c
+#define V3D_MMU_STALLS                                 0x01210
+
+#define V3D_MMU_ADDR_CAP                               0x01214
+# define V3D_MMU_ADDR_CAP_ENABLE                       BIT(31)
+# define V3D_MMU_ADDR_CAP_MPAGE_MASK                   V3D_MASK(11, 0)
+# define V3D_MMU_ADDR_CAP_MPAGE_SHIFT                  0
+
+#define V3D_MMU_SHOOT_DOWN                             0x01218
+# define V3D_MMU_SHOOT_DOWN_SHOOTING                   BIT(29)
+# define V3D_MMU_SHOOT_DOWN_SHOOT                      BIT(28)
+# define V3D_MMU_SHOOT_DOWN_PAGE_MASK                  V3D_MASK(27, 0)
+# define V3D_MMU_SHOOT_DOWN_PAGE_SHIFT                 0
+
+#define V3D_MMU_BYPASS_START                           0x0121c
+#define V3D_MMU_BYPASS_END                             0x01220
+
+/* AXI ID of the access that faulted */
+#define V3D_MMU_VIO_ID                                 0x0122c
+
+/* Address for illegal PTEs to return */
+#define V3D_MMU_ILLEGAL_ADDR                           0x01230
+# define V3D_MMU_ILLEGAL_ADDR_ENABLE                   BIT(31)
+
+/* Address that faulted */
+#define V3D_MMU_VIO_ADDR                               0x01234
+
+/* Per-V3D-core registers */
+
+#define V3D_CTL_IDENT0                                 0x00000
+# define V3D_IDENT0_VER_MASK                           V3D_MASK(31, 24)
+# define V3D_IDENT0_VER_SHIFT                          24
+
+#define V3D_CTL_IDENT1                                 0x00004
+/* Multiples of 1kb */
+# define V3D_IDENT1_VPM_SIZE_MASK                      V3D_MASK(31, 28)
+# define V3D_IDENT1_VPM_SIZE_SHIFT                     28
+# define V3D_IDENT1_NSEM_MASK                          V3D_MASK(23, 16)
+# define V3D_IDENT1_NSEM_SHIFT                         16
+# define V3D_IDENT1_NTMU_MASK                          V3D_MASK(15, 12)
+# define V3D_IDENT1_NTMU_SHIFT                         12
+# define V3D_IDENT1_QUPS_MASK                          V3D_MASK(11, 8)
+# define V3D_IDENT1_QUPS_SHIFT                         8
+# define V3D_IDENT1_NSLC_MASK                          V3D_MASK(7, 4)
+# define V3D_IDENT1_NSLC_SHIFT                         4
+# define V3D_IDENT1_REV_MASK                           V3D_MASK(3, 0)
+# define V3D_IDENT1_REV_SHIFT                          0
+
+#define V3D_CTL_IDENT2                                 0x00008
+# define V3D_IDENT2_BCG_INT                            BIT(28)
+
+#define V3D_CTL_MISCCFG                                0x00018
+# define V3D_MISCCFG_OVRTMUOUT                         BIT(0)
+
+#define V3D_CTL_L2CACTL                                0x00020
+# define V3D_L2CACTL_L2CCLR                            BIT(2)
+# define V3D_L2CACTL_L2CDIS                            BIT(1)
+# define V3D_L2CACTL_L2CENA                            BIT(0)
+
+#define V3D_CTL_SLCACTL                                0x00024
+# define V3D_SLCACTL_TVCCS_MASK                        V3D_MASK(27, 24)
+# define V3D_SLCACTL_TVCCS_SHIFT                       24
+# define V3D_SLCACTL_TDCCS_MASK                        V3D_MASK(19, 16)
+# define V3D_SLCACTL_TDCCS_SHIFT                       16
+# define V3D_SLCACTL_UCC_MASK                          V3D_MASK(11, 8)
+# define V3D_SLCACTL_UCC_SHIFT                         8
+# define V3D_SLCACTL_ICC_MASK                          V3D_MASK(3, 0)
+# define V3D_SLCACTL_ICC_SHIFT                         0
+
+#define V3D_CTL_L2TCACTL                               0x00030
+# define V3D_L2TCACTL_TMUWCF                           BIT(8)
+# define V3D_L2TCACTL_L2T_NO_WM                        BIT(4)
+# define V3D_L2TCACTL_FLM_FLUSH                        0
+# define V3D_L2TCACTL_FLM_CLEAR                        1
+# define V3D_L2TCACTL_FLM_CLEAN                        2
+# define V3D_L2TCACTL_FLM_MASK                         V3D_MASK(2, 1)
+# define V3D_L2TCACTL_FLM_SHIFT                        1
+# define V3D_L2TCACTL_L2TFLS                           BIT(0)
+#define V3D_CTL_L2TFLSTA                               0x00034
+#define V3D_CTL_L2TFLEND                               0x00038
+
+#define V3D_CTL_INT_STS                                0x00050
+#define V3D_CTL_INT_SET                                0x00054
+#define V3D_CTL_INT_CLR                                0x00058
+#define V3D_CTL_INT_MSK_STS                            0x0005c
+#define V3D_CTL_INT_MSK_SET                            0x00060
+#define V3D_CTL_INT_MSK_CLR                            0x00064
+# define V3D_INT_QPU_MASK                              V3D_MASK(27, 16)
+# define V3D_INT_QPU_SHIFT                             16
+# define V3D_INT_GMPV                                  BIT(5)
+# define V3D_INT_TRFB                                  BIT(4)
+# define V3D_INT_SPILLUSE                              BIT(3)
+# define V3D_INT_OUTOMEM                               BIT(2)
+# define V3D_INT_FLDONE                                BIT(1)
+# define V3D_INT_FRDONE                                BIT(0)
+
+#define V3D_CLE_CT0CS                                  0x00100
+#define V3D_CLE_CT1CS                                  0x00104
+#define V3D_CLE_CTNCS(n) (V3D_CLE_CT0CS + 4 * n)
+#define V3D_CLE_CT0EA                                  0x00108
+#define V3D_CLE_CT1EA                                  0x0010c
+#define V3D_CLE_CTNEA(n) (V3D_CLE_CT0EA + 4 * n)
+#define V3D_CLE_CT0CA                                  0x00110
+#define V3D_CLE_CT1CA                                  0x00114
+#define V3D_CLE_CTNCA(n) (V3D_CLE_CT0CA + 4 * n)
+#define V3D_CLE_CT0RA                                  0x00118
+#define V3D_CLE_CT1RA                                  0x0011c
+#define V3D_CLE_CT0LC                                  0x00120
+#define V3D_CLE_CT1LC                                  0x00124
+#define V3D_CLE_CT0PC                                  0x00128
+#define V3D_CLE_CT1PC                                  0x0012c
+#define V3D_CLE_PCS                                    0x00130
+#define V3D_CLE_BFC                                    0x00134
+#define V3D_CLE_RFC                                    0x00138
+#define V3D_CLE_TFBC                                   0x0013c
+#define V3D_CLE_TFIT                                   0x00140
+#define V3D_CLE_CT1CFG                                 0x00144
+#define V3D_CLE_CT1TILECT                              0x00148
+#define V3D_CLE_CT1TSKIP                               0x0014c
+#define V3D_CLE_CT1PTCT                                0x00150
+#define V3D_CLE_CT0SYNC                                0x00154
+#define V3D_CLE_CT1SYNC                                0x00158
+#define V3D_CLE_CT0QTS                                 0x0015c
+# define V3D_CLE_CT0QTS_ENABLE                         BIT(1)
+#define V3D_CLE_CT0QBA                                 0x00160
+#define V3D_CLE_CT1QBA                                 0x00164
+#define V3D_CLE_CTNQBA(n) (V3D_CLE_CT0QBA + 4 * n)
+#define V3D_CLE_CT0QEA                                 0x00168
+#define V3D_CLE_CT1QEA                                 0x0016c
+#define V3D_CLE_CTNQEA(n) (V3D_CLE_CT0QEA + 4 * n)
+#define V3D_CLE_CT0QMA                                 0x00170
+#define V3D_CLE_CT0QMS                                 0x00174
+#define V3D_CLE_CT1QCFG                                0x00178
+/* If set without ETPROC, entirely skip tiles with no primitives. */
+# define V3D_CLE_QCFG_ETFILT                           BIT(7)
+/* If set with ETFILT, just write the clear color to tiles with no
+ * primitives.
+ */
+# define V3D_CLE_QCFG_ETPROC                           BIT(6)
+# define V3D_CLE_QCFG_ETSFLUSH                         BIT(1)
+# define V3D_CLE_QCFG_MCDIS                            BIT(0)
+
+#define V3D_PTB_BPCA                                   0x00300
+#define V3D_PTB_BPCS                                   0x00304
+#define V3D_PTB_BPOA                                   0x00308
+#define V3D_PTB_BPOS                                   0x0030c
+
+#define V3D_PTB_BXCF                                   0x00310
+# define V3D_PTB_BXCF_RWORDERDISA                      BIT(1)
+# define V3D_PTB_BXCF_CLIPDISA                         BIT(0)
+
+#define V3D_GMP_STATUS                                 0x00800
+# define V3D_GMP_STATUS_GMPRST                         BIT(31)
+# define V3D_GMP_STATUS_WR_COUNT_MASK                  V3D_MASK(30, 24)
+# define V3D_GMP_STATUS_WR_COUNT_SHIFT                 24
+# define V3D_GMP_STATUS_RD_COUNT_MASK                  V3D_MASK(22, 16)
+# define V3D_GMP_STATUS_RD_COUNT_SHIFT                 16
+# define V3D_GMP_STATUS_WR_ACTIVE                      BIT(5)
+# define V3D_GMP_STATUS_RD_ACTIVE                      BIT(4)
+# define V3D_GMP_STATUS_CFG_BUSY                       BIT(3)
+# define V3D_GMP_STATUS_CNTOVF                         BIT(2)
+# define V3D_GMP_STATUS_INVPROT                        BIT(1)
+# define V3D_GMP_STATUS_VIO                            BIT(0)
+
+#define V3D_GMP_CFG                                    0x00804
+# define V3D_GMP_CFG_LBURSTEN                          BIT(3)
+# define V3D_GMP_CFG_PGCRSEN                           BIT()
+# define V3D_GMP_CFG_STOP_REQ                          BIT(1)
+# define V3D_GMP_CFG_PROT_ENABLE                       BIT(0)
+
+#define V3D_GMP_VIO_ADDR                               0x00808
+#define V3D_GMP_VIO_TYPE                               0x0080c
+#define V3D_GMP_TABLE_ADDR                             0x00810
+#define V3D_GMP_CLEAR_LOAD                             0x00814
+#define V3D_GMP_PRESERVE_LOAD                          0x00818
+#define V3D_GMP_VALID_LINES                            0x00820
+
+#endif /* V3D_REGS_H */
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
new file mode 100644
index 0000000..b07bece
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2018 Broadcom */
+
+/**
+ * DOC: Broadcom V3D scheduling
+ *
+ * The shared DRM GPU scheduler is used to coordinate submitting jobs
+ * to the hardware.  Each DRM fd (roughly a client process) gets its
+ * own scheduler entity, which will process jobs in order.  The GPU
+ * scheduler will round-robin between clients to submit the next job.
+ *
+ * For simplicity, and in order to keep latency low for interactive
+ * jobs when bulk background jobs are queued up, we submit a new job
+ * to the HW only when it has completed the last one, instead of
+ * filling up the CT[01]Q FIFOs with jobs.  Similarly, we use
+ * v3d_job_dependency() to manage the dependency between bin and
+ * render, instead of having the clients submit jobs with using the
+ * HW's semaphores to interlock between them.
+ */
+
+#include <linux/kthread.h>
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+#include "v3d_trace.h"
+
+static struct v3d_job *
+to_v3d_job(struct drm_sched_job *sched_job)
+{
+	return container_of(sched_job, struct v3d_job, base);
+}
+
+static void
+v3d_job_free(struct drm_sched_job *sched_job)
+{
+	struct v3d_job *job = to_v3d_job(sched_job);
+
+	v3d_exec_put(job->exec);
+}
+
+/**
+ * Returns the fences that the bin job depends on, one by one.
+ * v3d_job_run() won't be called until all of them have been signaled.
+ */
+static struct dma_fence *
+v3d_job_dependency(struct drm_sched_job *sched_job,
+		   struct drm_sched_entity *s_entity)
+{
+	struct v3d_job *job = to_v3d_job(sched_job);
+	struct v3d_exec_info *exec = job->exec;
+	enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
+	struct dma_fence *fence;
+
+	fence = job->in_fence;
+	if (fence) {
+		job->in_fence = NULL;
+		return fence;
+	}
+
+	if (q == V3D_RENDER) {
+		/* If we had a bin job, the render job definitely depends on
+		 * it. We first have to wait for bin to be scheduled, so that
+		 * its done_fence is created.
+		 */
+		fence = exec->bin_done_fence;
+		if (fence) {
+			exec->bin_done_fence = NULL;
+			return fence;
+		}
+	}
+
+	/* XXX: Wait on a fence for switching the GMP if necessary,
+	 * and then do so.
+	 */
+
+	return fence;
+}
+
+static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
+{
+	struct v3d_job *job = to_v3d_job(sched_job);
+	struct v3d_exec_info *exec = job->exec;
+	enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
+	struct v3d_dev *v3d = exec->v3d;
+	struct drm_device *dev = &v3d->drm;
+	struct dma_fence *fence;
+	unsigned long irqflags;
+
+	if (unlikely(job->base.s_fence->finished.error))
+		return NULL;
+
+	/* Lock required around bin_job update vs
+	 * v3d_overflow_mem_work().
+	 */
+	spin_lock_irqsave(&v3d->job_lock, irqflags);
+	if (q == V3D_BIN) {
+		v3d->bin_job = job->exec;
+
+		/* Clear out the overflow allocation, so we don't
+		 * reuse the overflow attached to a previous job.
+		 */
+		V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
+	} else {
+		v3d->render_job = job->exec;
+	}
+	spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+
+	/* Can we avoid this flush when q==RENDER?  We need to be
+	 * careful of scheduling, though -- imagine job0 rendering to
+	 * texture and job1 reading, and them being executed as bin0,
+	 * bin1, render0, render1, so that render1's flush at bin time
+	 * wasn't enough.
+	 */
+	v3d_invalidate_caches(v3d);
+
+	fence = v3d_fence_create(v3d, q);
+	if (!fence)
+		return fence;
+
+	if (job->done_fence)
+		dma_fence_put(job->done_fence);
+	job->done_fence = dma_fence_get(fence);
+
+	trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno,
+			    job->start, job->end);
+
+	if (q == V3D_BIN) {
+		if (exec->qma) {
+			V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma);
+			V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms);
+		}
+		if (exec->qts) {
+			V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
+				       V3D_CLE_CT0QTS_ENABLE |
+				       exec->qts);
+		}
+	} else {
+		/* XXX: Set the QCFG */
+	}
+
+	/* Set the current and end address of the control list.
+	 * Writing the end register is what starts the job.
+	 */
+	V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start);
+	V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end);
+
+	return fence;
+}
+
+static void
+v3d_job_timedout(struct drm_sched_job *sched_job)
+{
+	struct v3d_job *job = to_v3d_job(sched_job);
+	struct v3d_exec_info *exec = job->exec;
+	struct v3d_dev *v3d = exec->v3d;
+	enum v3d_queue q;
+
+	mutex_lock(&v3d->reset_lock);
+
+	/* block scheduler */
+	for (q = 0; q < V3D_MAX_QUEUES; q++) {
+		struct drm_gpu_scheduler *sched = &v3d->queue[q].sched;
+
+		kthread_park(sched->thread);
+		drm_sched_hw_job_reset(sched, (sched_job->sched == sched ?
+					       sched_job : NULL));
+	}
+
+	/* get the GPU back into the init state */
+	v3d_reset(v3d);
+
+	/* Unblock schedulers and restart their jobs. */
+	for (q = 0; q < V3D_MAX_QUEUES; q++) {
+		drm_sched_job_recovery(&v3d->queue[q].sched);
+		kthread_unpark(v3d->queue[q].sched.thread);
+	}
+
+	mutex_unlock(&v3d->reset_lock);
+}
+
+static const struct drm_sched_backend_ops v3d_sched_ops = {
+	.dependency = v3d_job_dependency,
+	.run_job = v3d_job_run,
+	.timedout_job = v3d_job_timedout,
+	.free_job = v3d_job_free
+};
+
+int
+v3d_sched_init(struct v3d_dev *v3d)
+{
+	int hw_jobs_limit = 1;
+	int job_hang_limit = 0;
+	int hang_limit_ms = 500;
+	int ret;
+
+	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
+			     &v3d_sched_ops,
+			     hw_jobs_limit, job_hang_limit,
+			     msecs_to_jiffies(hang_limit_ms),
+			     "v3d_bin");
+	if (ret) {
+		dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
+		return ret;
+	}
+
+	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
+			     &v3d_sched_ops,
+			     hw_jobs_limit, job_hang_limit,
+			     msecs_to_jiffies(hang_limit_ms),
+			     "v3d_render");
+	if (ret) {
+		dev_err(v3d->dev, "Failed to create render scheduler: %d.",
+			ret);
+		drm_sched_fini(&v3d->queue[V3D_BIN].sched);
+		return ret;
+	}
+
+	return 0;
+}
+
+void
+v3d_sched_fini(struct v3d_dev *v3d)
+{
+	enum v3d_queue q;
+
+	for (q = 0; q < V3D_MAX_QUEUES; q++)
+		drm_sched_fini(&v3d->queue[q].sched);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h
new file mode 100644
index 0000000..85dd351
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_trace.h
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2015-2018 Broadcom */
+
+#if !defined(_V3D_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _V3D_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM v3d
+#define TRACE_INCLUDE_FILE v3d_trace
+
+TRACE_EVENT(v3d_submit_cl,
+	    TP_PROTO(struct drm_device *dev, bool is_render,
+		     uint64_t seqno,
+		     u32 ctnqba, u32 ctnqea),
+	    TP_ARGS(dev, is_render, seqno, ctnqba, ctnqea),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(bool, is_render)
+			     __field(u64, seqno)
+			     __field(u32, ctnqba)
+			     __field(u32, ctnqea)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->is_render = is_render;
+			   __entry->seqno = seqno;
+			   __entry->ctnqba = ctnqba;
+			   __entry->ctnqea = ctnqea;
+			   ),
+
+	    TP_printk("dev=%u, %s, seqno=%llu, 0x%08x..0x%08x",
+		      __entry->dev,
+		      __entry->is_render ? "RCL" : "BCL",
+		      __entry->seqno,
+		      __entry->ctnqba,
+		      __entry->ctnqea)
+);
+
+TRACE_EVENT(v3d_reset_begin,
+	    TP_PROTO(struct drm_device *dev),
+	    TP_ARGS(dev),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   ),
+
+	    TP_printk("dev=%u",
+		      __entry->dev)
+);
+
+TRACE_EVENT(v3d_reset_end,
+	    TP_PROTO(struct drm_device *dev),
+	    TP_ARGS(dev),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   ),
+
+	    TP_printk("dev=%u",
+		      __entry->dev)
+);
+
+#endif /* _V3D_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/v3d/v3d_trace_points.c b/drivers/gpu/drm/v3d/v3d_trace_points.c
new file mode 100644
index 0000000..482922d
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_trace_points.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2015 Broadcom */
+
+#include "v3d_drv.h"
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "v3d_trace.h"
+#endif
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index c61dff5..c8650bb 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -42,51 +42,18 @@
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
-struct vc4_crtc {
-	struct drm_crtc base;
-	const struct vc4_crtc_data *data;
-	void __iomem *regs;
-
-	/* Timestamp at start of vblank irq - unaffected by lock delays. */
-	ktime_t t_vblank;
-
-	/* Which HVS channel we're using for our CRTC. */
-	int channel;
-
-	u8 lut_r[256];
-	u8 lut_g[256];
-	u8 lut_b[256];
-	/* Size in pixels of the COB memory allocated to this CRTC. */
-	u32 cob_size;
-
-	struct drm_pending_vblank_event *event;
-};
-
 struct vc4_crtc_state {
 	struct drm_crtc_state base;
 	/* Dlist area for this CRTC configuration. */
 	struct drm_mm_node mm;
 };
 
-static inline struct vc4_crtc *
-to_vc4_crtc(struct drm_crtc *crtc)
-{
-	return (struct vc4_crtc *)crtc;
-}
-
 static inline struct vc4_crtc_state *
 to_vc4_crtc_state(struct drm_crtc_state *crtc_state)
 {
 	return (struct vc4_crtc_state *)crtc_state;
 }
 
-struct vc4_crtc_data {
-	/* Which channel of the HVS this pixelvalve sources from. */
-	int hvs_channel;
-
-	enum vc4_encoder_type encoder_types[4];
-};
-
 #define CRTC_WRITE(offset, val) writel(val, vc4_crtc->regs + (offset))
 #define CRTC_READ(offset) readl(vc4_crtc->regs + (offset))
 
@@ -298,23 +265,21 @@
 		HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_b[i]);
 }
 
-static int
-vc4_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
-		   uint32_t size,
-		   struct drm_modeset_acquire_ctx *ctx)
+static void
+vc4_crtc_update_gamma_lut(struct drm_crtc *crtc)
 {
 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+	struct drm_color_lut *lut = crtc->state->gamma_lut->data;
+	u32 length = drm_color_lut_size(crtc->state->gamma_lut);
 	u32 i;
 
-	for (i = 0; i < size; i++) {
-		vc4_crtc->lut_r[i] = r[i] >> 8;
-		vc4_crtc->lut_g[i] = g[i] >> 8;
-		vc4_crtc->lut_b[i] = b[i] >> 8;
+	for (i = 0; i < length; i++) {
+		vc4_crtc->lut_r[i] = drm_color_lut_extract(lut[i].red, 8);
+		vc4_crtc->lut_g[i] = drm_color_lut_extract(lut[i].green, 8);
+		vc4_crtc->lut_b[i] = drm_color_lut_extract(lut[i].blue, 8);
 	}
 
 	vc4_crtc_lut_load(crtc);
-
-	return 0;
 }
 
 static u32 vc4_get_fifo_full_level(u32 format)
@@ -699,6 +664,22 @@
 	if (crtc->state->active && old_state->active)
 		vc4_crtc_update_dlist(crtc);
 
+	if (crtc->state->color_mgmt_changed) {
+		u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(vc4_crtc->channel));
+
+		if (crtc->state->gamma_lut) {
+			vc4_crtc_update_gamma_lut(crtc);
+			dispbkgndx |= SCALER_DISPBKGND_GAMMA;
+		} else {
+			/* Unsetting DISPBKGND_GAMMA skips the gamma lut step
+			 * in hardware, which is the same as a linear lut that
+			 * DRM expects us to use in absence of a user lut.
+			 */
+			dispbkgndx &= ~SCALER_DISPBKGND_GAMMA;
+		}
+		HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel), dispbkgndx);
+	}
+
 	if (debug_dump_regs) {
 		DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
 		vc4_hvs_dump_state(dev);
@@ -953,7 +934,7 @@
 	.reset = vc4_crtc_reset,
 	.atomic_duplicate_state = vc4_crtc_duplicate_state,
 	.atomic_destroy_state = vc4_crtc_destroy_state,
-	.gamma_set = vc4_crtc_gamma_set,
+	.gamma_set = drm_atomic_helper_legacy_gamma_set,
 	.enable_vblank = vc4_enable_vblank,
 	.disable_vblank = vc4_disable_vblank,
 };
@@ -1079,6 +1060,12 @@
 	primary_plane->crtc = crtc;
 	vc4_crtc->channel = vc4_crtc->data->hvs_channel;
 	drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r));
+	drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size);
+
+	/* We support CTM, but only for one CRTC at a time. It's therefore
+	 * implemented as private driver state in vc4_kms, not here.
+	 */
+	drm_crtc_enable_color_mgmt(crtc, 0, true, crtc->gamma_size);
 
 	/* Set up some arbitrary number of planes.  We're not limited
 	 * by a set number of physical registers, just the space in
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 7c95ed5..466d0a2 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -176,7 +176,8 @@
 			    DRIVER_GEM |
 			    DRIVER_HAVE_IRQ |
 			    DRIVER_RENDER |
-			    DRIVER_PRIME),
+			    DRIVER_PRIME |
+			    DRIVER_SYNCOBJ),
 	.lastclose = drm_fb_helper_lastclose,
 	.open = vc4_open,
 	.postclose = vc4_close,
@@ -319,8 +320,8 @@
 
 static void vc4_drm_unbind(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct drm_device *drm = platform_get_drvdata(pdev);
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct vc4_dev *vc4 = to_vc4_dev(drm);
 
 	drm_dev_unregister(drm);
 
@@ -328,6 +329,8 @@
 
 	drm_mode_config_cleanup(drm);
 
+	drm_atomic_private_obj_fini(&vc4->ctm_manager);
+
 	drm_dev_unref(drm);
 }
 
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 1b4cd1f..554a4e8 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -10,6 +10,8 @@
 #include <drm/drmP.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_syncobj.h>
 
 #include "uapi/drm/vc4_drm.h"
 
@@ -193,6 +195,9 @@
 	} hangcheck;
 
 	struct semaphore async_modeset;
+
+	struct drm_modeset_lock ctm_state_lock;
+	struct drm_private_obj ctm_manager;
 };
 
 static inline struct vc4_dev *
@@ -392,6 +397,39 @@
 	return container_of(encoder, struct vc4_encoder, base);
 }
 
+struct vc4_crtc_data {
+	/* Which channel of the HVS this pixelvalve sources from. */
+	int hvs_channel;
+
+	enum vc4_encoder_type encoder_types[4];
+};
+
+struct vc4_crtc {
+	struct drm_crtc base;
+	const struct vc4_crtc_data *data;
+	void __iomem *regs;
+
+	/* Timestamp at start of vblank irq - unaffected by lock delays. */
+	ktime_t t_vblank;
+
+	/* Which HVS channel we're using for our CRTC. */
+	int channel;
+
+	u8 lut_r[256];
+	u8 lut_g[256];
+	u8 lut_b[256];
+	/* Size in pixels of the COB memory allocated to this CRTC. */
+	u32 cob_size;
+
+	struct drm_pending_vblank_event *event;
+};
+
+static inline struct vc4_crtc *
+to_vc4_crtc(struct drm_crtc *crtc)
+{
+	return (struct vc4_crtc *)crtc;
+}
+
 #define V3D_READ(offset) readl(vc4->v3d->regs + offset)
 #define V3D_WRITE(offset, val) writel(val, vc4->v3d->regs + offset)
 #define HVS_READ(offset) readl(vc4->hvs->regs + offset)
diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index 94085f8..8aa8978 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -753,6 +753,11 @@
 			 (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_STOP : 0) |
 			 (dsi->lanes > 3 ? DSI1_STAT_PHY_D3_STOP : 0));
 	int ret;
+	bool ulps_currently_enabled = (DSI_PORT_READ(PHY_AFEC0) &
+				       DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS));
+
+	if (ulps == ulps_currently_enabled)
+		return;
 
 	DSI_PORT_WRITE(STAT, stat_ulps);
 	DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) | phyc_ulps);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 2107b0d..7910b9a 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -27,6 +27,7 @@
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/sched/signal.h>
+#include <linux/dma-fence-array.h>
 
 #include "uapi/drm/vc4_drm.h"
 #include "vc4_drv.h"
@@ -655,7 +656,8 @@
  */
 static int
 vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
-		 struct ww_acquire_ctx *acquire_ctx)
+		 struct ww_acquire_ctx *acquire_ctx,
+		 struct drm_syncobj *out_sync)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_exec_info *renderjob;
@@ -678,6 +680,9 @@
 	fence->seqno = exec->seqno;
 	exec->fence = &fence->base;
 
+	if (out_sync)
+		drm_syncobj_replace_fence(out_sync, exec->fence);
+
 	vc4_update_bo_seqnos(exec, seqno);
 
 	vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
@@ -1113,8 +1118,10 @@
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_file *vc4file = file_priv->driver_priv;
 	struct drm_vc4_submit_cl *args = data;
+	struct drm_syncobj *out_sync = NULL;
 	struct vc4_exec_info *exec;
 	struct ww_acquire_ctx acquire_ctx;
+	struct dma_fence *in_fence;
 	int ret = 0;
 
 	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
@@ -1126,7 +1133,7 @@
 	}
 
 	if (args->pad2 != 0) {
-		DRM_DEBUG("->pad2 must be set to zero\n");
+		DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2);
 		return -EINVAL;
 	}
 
@@ -1164,6 +1171,29 @@
 		}
 	}
 
+	if (args->in_sync) {
+		ret = drm_syncobj_find_fence(file_priv, args->in_sync,
+					     &in_fence);
+		if (ret)
+			goto fail;
+
+		/* When the fence (or fence array) is exclusively from our
+		 * context we can skip the wait since jobs are executed in
+		 * order of their submission through this ioctl and this can
+		 * only have fences from a prior job.
+		 */
+		if (!dma_fence_match_context(in_fence,
+					     vc4->dma_fence_context)) {
+			ret = dma_fence_wait(in_fence, true);
+			if (ret) {
+				dma_fence_put(in_fence);
+				goto fail;
+			}
+		}
+
+		dma_fence_put(in_fence);
+	}
+
 	if (exec->args->bin_cl_size != 0) {
 		ret = vc4_get_bcl(dev, exec);
 		if (ret)
@@ -1181,12 +1211,33 @@
 	if (ret)
 		goto fail;
 
+	if (args->out_sync) {
+		out_sync = drm_syncobj_find(file_priv, args->out_sync);
+		if (!out_sync) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		/* We replace the fence in out_sync in vc4_queue_submit since
+		 * the render job could execute immediately after that call.
+		 * If it finishes before our ioctl processing resumes the
+		 * render job fence could already have been freed.
+		 */
+	}
+
 	/* Clear this out of the struct we'll be putting in the queue,
 	 * since it's part of our stack.
 	 */
 	exec->args = NULL;
 
-	ret = vc4_queue_submit(dev, exec, &acquire_ctx);
+	ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync);
+
+	/* The syncobj isn't part of the exec data and we need to free our
+	 * reference even if job submission failed.
+	 */
+	if (out_sync)
+		drm_syncobj_put(out_sync);
+
 	if (ret)
 		goto fail;
 
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
index 2b62fc5..5d8c749 100644
--- a/drivers/gpu/drm/vc4/vc4_hvs.c
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -58,6 +58,10 @@
 	HVS_REG(SCALER_DISPSTAT2),
 	HVS_REG(SCALER_DISPBASE2),
 	HVS_REG(SCALER_DISPALPHA2),
+	HVS_REG(SCALER_OLEDOFFS),
+	HVS_REG(SCALER_OLEDCOEF0),
+	HVS_REG(SCALER_OLEDCOEF1),
+	HVS_REG(SCALER_OLEDCOEF2),
 };
 
 void vc4_hvs_dump_state(struct drm_device *dev)
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index ba60153..8a411e5 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -23,6 +23,117 @@
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include "vc4_drv.h"
+#include "vc4_regs.h"
+
+struct vc4_ctm_state {
+	struct drm_private_state base;
+	struct drm_color_ctm *ctm;
+	int fifo;
+};
+
+static struct vc4_ctm_state *to_vc4_ctm_state(struct drm_private_state *priv)
+{
+	return container_of(priv, struct vc4_ctm_state, base);
+}
+
+static struct vc4_ctm_state *vc4_get_ctm_state(struct drm_atomic_state *state,
+					       struct drm_private_obj *manager)
+{
+	struct drm_device *dev = state->dev;
+	struct vc4_dev *vc4 = dev->dev_private;
+	struct drm_private_state *priv_state;
+	int ret;
+
+	ret = drm_modeset_lock(&vc4->ctm_state_lock, state->acquire_ctx);
+	if (ret)
+		return ERR_PTR(ret);
+
+	priv_state = drm_atomic_get_private_obj_state(state, manager);
+	if (IS_ERR(priv_state))
+		return ERR_CAST(priv_state);
+
+	return to_vc4_ctm_state(priv_state);
+}
+
+static struct drm_private_state *
+vc4_ctm_duplicate_state(struct drm_private_obj *obj)
+{
+	struct vc4_ctm_state *state;
+
+	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	__drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
+
+	return &state->base;
+}
+
+static void vc4_ctm_destroy_state(struct drm_private_obj *obj,
+				  struct drm_private_state *state)
+{
+	struct vc4_ctm_state *ctm_state = to_vc4_ctm_state(state);
+
+	kfree(ctm_state);
+}
+
+static const struct drm_private_state_funcs vc4_ctm_state_funcs = {
+	.atomic_duplicate_state = vc4_ctm_duplicate_state,
+	.atomic_destroy_state = vc4_ctm_destroy_state,
+};
+
+/* Converts a DRM S31.32 value to the HW S0.9 format. */
+static u16 vc4_ctm_s31_32_to_s0_9(u64 in)
+{
+	u16 r;
+
+	/* Sign bit. */
+	r = in & BIT_ULL(63) ? BIT(9) : 0;
+
+	if ((in & GENMASK_ULL(62, 32)) > 0) {
+		/* We have zero integer bits so we can only saturate here. */
+		r |= GENMASK(8, 0);
+	} else {
+		/* Otherwise take the 9 most important fractional bits. */
+		r |= (in >> 23) & GENMASK(8, 0);
+	}
+
+	return r;
+}
+
+static void
+vc4_ctm_commit(struct vc4_dev *vc4, struct drm_atomic_state *state)
+{
+	struct vc4_ctm_state *ctm_state = to_vc4_ctm_state(vc4->ctm_manager.state);
+	struct drm_color_ctm *ctm = ctm_state->ctm;
+
+	if (ctm_state->fifo) {
+		HVS_WRITE(SCALER_OLEDCOEF2,
+			  VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[0]),
+					SCALER_OLEDCOEF2_R_TO_R) |
+			  VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[3]),
+					SCALER_OLEDCOEF2_R_TO_G) |
+			  VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[6]),
+					SCALER_OLEDCOEF2_R_TO_B));
+		HVS_WRITE(SCALER_OLEDCOEF1,
+			  VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[1]),
+					SCALER_OLEDCOEF1_G_TO_R) |
+			  VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[4]),
+					SCALER_OLEDCOEF1_G_TO_G) |
+			  VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[7]),
+					SCALER_OLEDCOEF1_G_TO_B));
+		HVS_WRITE(SCALER_OLEDCOEF0,
+			  VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[2]),
+					SCALER_OLEDCOEF0_B_TO_R) |
+			  VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[5]),
+					SCALER_OLEDCOEF0_B_TO_G) |
+			  VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[8]),
+					SCALER_OLEDCOEF0_B_TO_B));
+	}
+
+	HVS_WRITE(SCALER_OLEDOFFS,
+		  VC4_SET_FIELD(ctm_state->fifo, SCALER_OLEDOFFS_DISPFIFO));
+}
 
 static void
 vc4_atomic_complete_commit(struct drm_atomic_state *state)
@@ -36,6 +147,8 @@
 
 	drm_atomic_helper_commit_modeset_disables(dev, state);
 
+	vc4_ctm_commit(vc4, state);
+
 	drm_atomic_helper_commit_planes(dev, state, 0);
 
 	drm_atomic_helper_commit_modeset_enables(dev, state);
@@ -90,6 +203,26 @@
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	int ret;
 
+	if (state->async_update) {
+		ret = down_interruptible(&vc4->async_modeset);
+		if (ret)
+			return ret;
+
+		ret = drm_atomic_helper_prepare_planes(dev, state);
+		if (ret) {
+			up(&vc4->async_modeset);
+			return ret;
+		}
+
+		drm_atomic_helper_async_commit(dev, state);
+
+		drm_atomic_helper_cleanup_planes(dev, state);
+
+		up(&vc4->async_modeset);
+
+		return 0;
+	}
+
 	ret = drm_atomic_helper_setup_commit(state, nonblock);
 	if (ret)
 		return ret;
@@ -187,9 +320,89 @@
 	return drm_gem_fb_create(dev, file_priv, mode_cmd);
 }
 
+/* Our CTM has some peculiar limitations: we can only enable it for one CRTC
+ * at a time and the HW only supports S0.9 scalars. To account for the latter,
+ * we don't allow userland to set a CTM that we have no hope of approximating.
+ */
+static int
+vc4_ctm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_ctm_state *ctm_state = NULL;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+	struct drm_color_ctm *ctm;
+	int i;
+
+	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+		/* CTM is being disabled. */
+		if (!new_crtc_state->ctm && old_crtc_state->ctm) {
+			ctm_state = vc4_get_ctm_state(state, &vc4->ctm_manager);
+			if (IS_ERR(ctm_state))
+				return PTR_ERR(ctm_state);
+			ctm_state->fifo = 0;
+		}
+	}
+
+	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+		if (new_crtc_state->ctm == old_crtc_state->ctm)
+			continue;
+
+		if (!ctm_state) {
+			ctm_state = vc4_get_ctm_state(state, &vc4->ctm_manager);
+			if (IS_ERR(ctm_state))
+				return PTR_ERR(ctm_state);
+		}
+
+		/* CTM is being enabled or the matrix changed. */
+		if (new_crtc_state->ctm) {
+			/* fifo is 1-based since 0 disables CTM. */
+			int fifo = to_vc4_crtc(crtc)->channel + 1;
+
+			/* Check userland isn't trying to turn on CTM for more
+			 * than one CRTC at a time.
+			 */
+			if (ctm_state->fifo && ctm_state->fifo != fifo) {
+				DRM_DEBUG_DRIVER("Too many CTM configured\n");
+				return -EINVAL;
+			}
+
+			/* Check we can approximate the specified CTM.
+			 * We disallow scalars |c| > 1.0 since the HW has
+			 * no integer bits.
+			 */
+			ctm = new_crtc_state->ctm->data;
+			for (i = 0; i < ARRAY_SIZE(ctm->matrix); i++) {
+				u64 val = ctm->matrix[i];
+
+				val &= ~BIT_ULL(63);
+				if (val > BIT_ULL(32))
+					return -EINVAL;
+			}
+
+			ctm_state->fifo = fifo;
+			ctm_state->ctm = ctm;
+		}
+	}
+
+	return 0;
+}
+
+static int
+vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
+{
+	int ret;
+
+	ret = vc4_ctm_atomic_check(dev, state);
+	if (ret < 0)
+		return ret;
+
+	return drm_atomic_helper_check(dev, state);
+}
+
 static const struct drm_mode_config_funcs vc4_mode_funcs = {
 	.output_poll_changed = drm_fb_helper_output_poll_changed,
-	.atomic_check = drm_atomic_helper_check,
+	.atomic_check = vc4_atomic_check,
 	.atomic_commit = vc4_atomic_commit,
 	.fb_create = vc4_fb_create,
 };
@@ -197,6 +410,7 @@
 int vc4_kms_load(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_ctm_state *ctm_state;
 	int ret;
 
 	sema_init(&vc4->async_modeset, 1);
@@ -217,6 +431,14 @@
 	dev->mode_config.async_page_flip = true;
 	dev->mode_config.allow_fb_modifiers = true;
 
+	drm_modeset_lock_init(&vc4->ctm_state_lock);
+
+	ctm_state = kzalloc(sizeof(*ctm_state), GFP_KERNEL);
+	if (!ctm_state)
+		return -ENOMEM;
+	drm_atomic_private_obj_init(&vc4->ctm_manager, &ctm_state->base,
+				    &vc4_ctm_state_funcs);
+
 	drm_mode_config_reset(dev);
 
 	if (dev->mode_config.num_connector)
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 13dcaad..71d44c3 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -201,6 +201,7 @@
 		return;
 
 	plane->state = &vc4_state->base;
+	plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE;
 	vc4_state->base.plane = plane;
 }
 
@@ -467,6 +468,7 @@
 	u32 ctl0_offset = vc4_state->dlist_count;
 	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
 	int num_planes = drm_format_num_planes(format->drm);
+	bool mix_plane_alpha;
 	bool covers_screen;
 	u32 scl0, scl1, pitch0;
 	u32 lbm_size, tiling;
@@ -552,7 +554,7 @@
 	/* Position Word 0: Image Positions and Alpha Value */
 	vc4_state->pos0_offset = vc4_state->dlist_count;
 	vc4_dlist_write(vc4_state,
-			VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) |
+			VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
 			VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
 			VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
 
@@ -565,6 +567,13 @@
 					      SCALER_POS1_SCL_HEIGHT));
 	}
 
+	/* Don't waste cycles mixing with plane alpha if the set alpha
+	 * is opaque or there is no per-pixel alpha information.
+	 * In any case we use the alpha property value as the fixed alpha.
+	 */
+	mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
+			  fb->format->has_alpha;
+
 	/* Position Word 2: Source Image Size, Alpha */
 	vc4_state->pos2_offset = vc4_state->dlist_count;
 	vc4_dlist_write(vc4_state,
@@ -572,6 +581,7 @@
 				      SCALER_POS2_ALPHA_MODE_PIPELINE :
 				      SCALER_POS2_ALPHA_MODE_FIXED,
 				      SCALER_POS2_ALPHA_MODE) |
+			(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
 			(fb->format->has_alpha ? SCALER_POS2_ALPHA_PREMULT : 0) |
 			VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) |
 			VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT));
@@ -653,10 +663,11 @@
 			vc4_state->crtc_w == state->crtc->mode.hdisplay &&
 			vc4_state->crtc_h == state->crtc->mode.vdisplay;
 	/* Background fill might be necessary when the plane has per-pixel
-	 * alpha content and blends from the background or does not cover
-	 * the entire screen.
+	 * alpha content or a non-opaque plane alpha and could blend from the
+	 * background or does not cover the entire screen.
 	 */
-	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen;
+	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
+				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
 
 	return 0;
 }
@@ -741,6 +752,57 @@
 	vc4_state->dlist[vc4_state->ptr0_offset] = addr;
 }
 
+static void vc4_plane_atomic_async_update(struct drm_plane *plane,
+					  struct drm_plane_state *state)
+{
+	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
+
+	if (plane->state->fb != state->fb) {
+		vc4_plane_async_set_fb(plane, state->fb);
+		drm_atomic_set_fb_for_plane(plane->state, state->fb);
+	}
+
+	/* Set the cursor's position on the screen.  This is the
+	 * expected change from the drm_mode_cursor_universal()
+	 * helper.
+	 */
+	plane->state->crtc_x = state->crtc_x;
+	plane->state->crtc_y = state->crtc_y;
+
+	/* Allow changing the start position within the cursor BO, if
+	 * that matters.
+	 */
+	plane->state->src_x = state->src_x;
+	plane->state->src_y = state->src_y;
+
+	/* Update the display list based on the new crtc_x/y. */
+	vc4_plane_atomic_check(plane, plane->state);
+
+	/* Note that we can't just call vc4_plane_write_dlist()
+	 * because that would smash the context data that the HVS is
+	 * currently using.
+	 */
+	writel(vc4_state->dlist[vc4_state->pos0_offset],
+	       &vc4_state->hw_dlist[vc4_state->pos0_offset]);
+	writel(vc4_state->dlist[vc4_state->pos2_offset],
+	       &vc4_state->hw_dlist[vc4_state->pos2_offset]);
+	writel(vc4_state->dlist[vc4_state->ptr0_offset],
+	       &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
+}
+
+static int vc4_plane_atomic_async_check(struct drm_plane *plane,
+					struct drm_plane_state *state)
+{
+	/* No configuring new scaling in the fast path. */
+	if (plane->state->crtc_w != state->crtc_w ||
+	    plane->state->crtc_h != state->crtc_h ||
+	    plane->state->src_w != state->src_w ||
+	    plane->state->src_h != state->src_h)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int vc4_prepare_fb(struct drm_plane *plane,
 			  struct drm_plane_state *state)
 {
@@ -780,6 +842,8 @@
 	.atomic_update = vc4_plane_atomic_update,
 	.prepare_fb = vc4_prepare_fb,
 	.cleanup_fb = vc4_cleanup_fb,
+	.atomic_async_check = vc4_plane_atomic_async_check,
+	.atomic_async_update = vc4_plane_atomic_async_update,
 };
 
 static void vc4_plane_destroy(struct drm_plane *plane)
@@ -788,82 +852,6 @@
 	drm_plane_cleanup(plane);
 }
 
-/* Implements immediate (non-vblank-synced) updates of the cursor
- * position, or falls back to the atomic helper otherwise.
- */
-static int
-vc4_update_plane(struct drm_plane *plane,
-		 struct drm_crtc *crtc,
-		 struct drm_framebuffer *fb,
-		 int crtc_x, int crtc_y,
-		 unsigned int crtc_w, unsigned int crtc_h,
-		 uint32_t src_x, uint32_t src_y,
-		 uint32_t src_w, uint32_t src_h,
-		 struct drm_modeset_acquire_ctx *ctx)
-{
-	struct drm_plane_state *plane_state;
-	struct vc4_plane_state *vc4_state;
-
-	if (plane != crtc->cursor)
-		goto out;
-
-	plane_state = plane->state;
-	vc4_state = to_vc4_plane_state(plane_state);
-
-	if (!plane_state)
-		goto out;
-
-	/* No configuring new scaling in the fast path. */
-	if (crtc_w != plane_state->crtc_w ||
-	    crtc_h != plane_state->crtc_h ||
-	    src_w != plane_state->src_w ||
-	    src_h != plane_state->src_h) {
-		goto out;
-	}
-
-	if (fb != plane_state->fb) {
-		drm_atomic_set_fb_for_plane(plane->state, fb);
-		vc4_plane_async_set_fb(plane, fb);
-	}
-
-	/* Set the cursor's position on the screen.  This is the
-	 * expected change from the drm_mode_cursor_universal()
-	 * helper.
-	 */
-	plane_state->crtc_x = crtc_x;
-	plane_state->crtc_y = crtc_y;
-
-	/* Allow changing the start position within the cursor BO, if
-	 * that matters.
-	 */
-	plane_state->src_x = src_x;
-	plane_state->src_y = src_y;
-
-	/* Update the display list based on the new crtc_x/y. */
-	vc4_plane_atomic_check(plane, plane_state);
-
-	/* Note that we can't just call vc4_plane_write_dlist()
-	 * because that would smash the context data that the HVS is
-	 * currently using.
-	 */
-	writel(vc4_state->dlist[vc4_state->pos0_offset],
-	       &vc4_state->hw_dlist[vc4_state->pos0_offset]);
-	writel(vc4_state->dlist[vc4_state->pos2_offset],
-	       &vc4_state->hw_dlist[vc4_state->pos2_offset]);
-	writel(vc4_state->dlist[vc4_state->ptr0_offset],
-	       &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
-
-	return 0;
-
-out:
-	return drm_atomic_helper_update_plane(plane, crtc, fb,
-					      crtc_x, crtc_y,
-					      crtc_w, crtc_h,
-					      src_x, src_y,
-					      src_w, src_h,
-					      ctx);
-}
-
 static bool vc4_format_mod_supported(struct drm_plane *plane,
 				     uint32_t format,
 				     uint64_t modifier)
@@ -891,7 +879,7 @@
 }
 
 static const struct drm_plane_funcs vc4_plane_funcs = {
-	.update_plane = vc4_update_plane,
+	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
 	.destroy = vc4_plane_destroy,
 	.set_property = NULL,
@@ -939,5 +927,7 @@
 
 	drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
 
+	drm_plane_create_alpha_property(plane);
+
 	return plane;
 }
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index a141496..d1fb6fe 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -330,6 +330,21 @@
 #define SCALER_DISPCTRL0                        0x00000040
 # define SCALER_DISPCTRLX_ENABLE		BIT(31)
 # define SCALER_DISPCTRLX_RESET			BIT(30)
+/* Generates a single frame when VSTART is seen and stops at the last
+ * pixel read from the FIFO.
+ */
+# define SCALER_DISPCTRLX_ONESHOT		BIT(29)
+/* Processes a single context in the dlist and then task switch,
+ * instead of an entire line.
+ */
+# define SCALER_DISPCTRLX_ONECTX		BIT(28)
+/* Set to have DISPSLAVE return 2 16bpp pixels and no status data. */
+# define SCALER_DISPCTRLX_FIFO32		BIT(27)
+/* Turns on output to the DISPSLAVE register instead of the normal
+ * FIFO.
+ */
+# define SCALER_DISPCTRLX_FIFOREG		BIT(26)
+
 # define SCALER_DISPCTRLX_WIDTH_MASK		VC4_MASK(23, 12)
 # define SCALER_DISPCTRLX_WIDTH_SHIFT		12
 # define SCALER_DISPCTRLX_HEIGHT_MASK		VC4_MASK(11, 0)
@@ -402,6 +417,68 @@
  */
 # define SCALER_GAMADDR_SRAMENB			BIT(30)
 
+#define SCALER_OLEDOFFS                         0x00000080
+/* Clamps R to [16,235] and G/B to [16,240]. */
+# define SCALER_OLEDOFFS_YUVCLAMP               BIT(31)
+
+/* Chooses which display FIFO the matrix applies to. */
+# define SCALER_OLEDOFFS_DISPFIFO_MASK          VC4_MASK(25, 24)
+# define SCALER_OLEDOFFS_DISPFIFO_SHIFT         24
+# define SCALER_OLEDOFFS_DISPFIFO_DISABLED      0
+# define SCALER_OLEDOFFS_DISPFIFO_0             1
+# define SCALER_OLEDOFFS_DISPFIFO_1             2
+# define SCALER_OLEDOFFS_DISPFIFO_2             3
+
+/* Offsets are 8-bit 2s-complement. */
+# define SCALER_OLEDOFFS_RED_MASK               VC4_MASK(23, 16)
+# define SCALER_OLEDOFFS_RED_SHIFT              16
+# define SCALER_OLEDOFFS_GREEN_MASK             VC4_MASK(15, 8)
+# define SCALER_OLEDOFFS_GREEN_SHIFT            8
+# define SCALER_OLEDOFFS_BLUE_MASK              VC4_MASK(7, 0)
+# define SCALER_OLEDOFFS_BLUE_SHIFT             0
+
+/* The coefficients are S0.9 fractions. */
+#define SCALER_OLEDCOEF0                        0x00000084
+# define SCALER_OLEDCOEF0_B_TO_R_MASK           VC4_MASK(29, 20)
+# define SCALER_OLEDCOEF0_B_TO_R_SHIFT          20
+# define SCALER_OLEDCOEF0_B_TO_G_MASK           VC4_MASK(19, 10)
+# define SCALER_OLEDCOEF0_B_TO_G_SHIFT          10
+# define SCALER_OLEDCOEF0_B_TO_B_MASK           VC4_MASK(9, 0)
+# define SCALER_OLEDCOEF0_B_TO_B_SHIFT          0
+
+#define SCALER_OLEDCOEF1                        0x00000088
+# define SCALER_OLEDCOEF1_G_TO_R_MASK           VC4_MASK(29, 20)
+# define SCALER_OLEDCOEF1_G_TO_R_SHIFT          20
+# define SCALER_OLEDCOEF1_G_TO_G_MASK           VC4_MASK(19, 10)
+# define SCALER_OLEDCOEF1_G_TO_G_SHIFT          10
+# define SCALER_OLEDCOEF1_G_TO_B_MASK           VC4_MASK(9, 0)
+# define SCALER_OLEDCOEF1_G_TO_B_SHIFT          0
+
+#define SCALER_OLEDCOEF2                        0x0000008c
+# define SCALER_OLEDCOEF2_R_TO_R_MASK           VC4_MASK(29, 20)
+# define SCALER_OLEDCOEF2_R_TO_R_SHIFT          20
+# define SCALER_OLEDCOEF2_R_TO_G_MASK           VC4_MASK(19, 10)
+# define SCALER_OLEDCOEF2_R_TO_G_SHIFT          10
+# define SCALER_OLEDCOEF2_R_TO_B_MASK           VC4_MASK(9, 0)
+# define SCALER_OLEDCOEF2_R_TO_B_SHIFT          0
+
+/* Slave addresses for DMAing from HVS composition output to other
+ * devices.  The top bits are valid only in !FIFO32 mode.
+ */
+#define SCALER_DISPSLAVE0                       0x000000c0
+#define SCALER_DISPSLAVE1                       0x000000c9
+#define SCALER_DISPSLAVE2                       0x000000d0
+# define SCALER_DISPSLAVE_ISSUE_VSTART          BIT(31)
+# define SCALER_DISPSLAVE_ISSUE_HSTART          BIT(30)
+/* Set when the current line has been read and an HSTART is required. */
+# define SCALER_DISPSLAVE_EOL                   BIT(26)
+/* Set when the display FIFO is empty. */
+# define SCALER_DISPSLAVE_EMPTY                 BIT(25)
+/* Set when there is RGB data ready to read. */
+# define SCALER_DISPSLAVE_VALID                 BIT(24)
+# define SCALER_DISPSLAVE_RGB_MASK              VC4_MASK(23, 0)
+# define SCALER_DISPSLAVE_RGB_SHIFT             0
+
 #define SCALER_GAMDATA                          0x000000e0
 #define SCALER_DLIST_START                      0x00002000
 #define SCALER_DLIST_SIZE                       0x00004000
@@ -767,6 +844,10 @@
 	HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE = 9,
 	HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE = 10,
 	HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE = 11,
+	HVS_PIXEL_FORMAT_H264 = 12,
+	HVS_PIXEL_FORMAT_PALETTE = 13,
+	HVS_PIXEL_FORMAT_YUV444_RGB = 14,
+	HVS_PIXEL_FORMAT_AYUV444_RGB = 15,
 };
 
 /* Note: the LSB is the rightmost character shown.  Only valid for
@@ -800,12 +881,27 @@
 #define SCALER_CTL0_TILING_128B			2
 #define SCALER_CTL0_TILING_256B_OR_T		3
 
+#define SCALER_CTL0_ALPHA_MASK                  BIT(19)
 #define SCALER_CTL0_HFLIP                       BIT(16)
 #define SCALER_CTL0_VFLIP                       BIT(15)
 
+#define SCALER_CTL0_KEY_MODE_MASK		VC4_MASK(18, 17)
+#define SCALER_CTL0_KEY_MODE_SHIFT		17
+#define SCALER_CTL0_KEY_DISABLED		0
+#define SCALER_CTL0_KEY_LUMA_OR_COMMON_RGB	1
+#define SCALER_CTL0_KEY_MATCH			2 /* turn transparent */
+#define SCALER_CTL0_KEY_REPLACE			3 /* replace with value from key mask word 2 */
+
 #define SCALER_CTL0_ORDER_MASK			VC4_MASK(14, 13)
 #define SCALER_CTL0_ORDER_SHIFT			13
 
+#define SCALER_CTL0_RGBA_EXPAND_MASK		VC4_MASK(12, 11)
+#define SCALER_CTL0_RGBA_EXPAND_SHIFT		11
+#define SCALER_CTL0_RGBA_EXPAND_ZERO		0
+#define SCALER_CTL0_RGBA_EXPAND_LSB		1
+#define SCALER_CTL0_RGBA_EXPAND_MSB		2
+#define SCALER_CTL0_RGBA_EXPAND_ROUND		3
+
 #define SCALER_CTL0_SCL1_MASK			VC4_MASK(10, 8)
 #define SCALER_CTL0_SCL1_SHIFT			8
 
@@ -849,6 +945,7 @@
 #define SCALER_POS2_ALPHA_MODE_FIXED_NONZERO	2
 #define SCALER_POS2_ALPHA_MODE_FIXED_OVER_0x07	3
 #define SCALER_POS2_ALPHA_PREMULT		BIT(29)
+#define SCALER_POS2_ALPHA_MIX			BIT(28)
 
 #define SCALER_POS2_HEIGHT_MASK			VC4_MASK(27, 16)
 #define SCALER_POS2_HEIGHT_SHIFT		16
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index bfc2fa7..e47e294 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -218,8 +218,7 @@
  * overall CMA pool before they make scenes complicated enough to run
  * out of bin space.
  */
-int
-vc4_allocate_bin_bo(struct drm_device *drm)
+static int vc4_allocate_bin_bo(struct drm_device *drm)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(drm);
 	struct vc4_v3d *v3d = vc4->v3d;
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
index 8cc8c34..a5edd86 100644
--- a/drivers/gpu/drm/virtio/virtgpu_display.c
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -208,7 +208,7 @@
 	return count;
 }
 
-static int virtio_gpu_conn_mode_valid(struct drm_connector *connector,
+static enum drm_mode_status virtio_gpu_conn_mode_valid(struct drm_connector *connector,
 				      struct drm_display_mode *mode)
 {
 	struct virtio_gpu_output *output =
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 8b770a8..09cc721 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -159,14 +159,14 @@
 		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_CURSOR_BYPASS,
 		      vmw_kms_cursor_bypass_ioctl,
-		      DRM_MASTER | DRM_CONTROL_ALLOW),
+		      DRM_MASTER),
 
 	VMW_IOCTL_DEF(VMW_CONTROL_STREAM, vmw_overlay_ioctl,
-		      DRM_MASTER | DRM_CONTROL_ALLOW),
+		      DRM_MASTER),
 	VMW_IOCTL_DEF(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl,
-		      DRM_MASTER | DRM_CONTROL_ALLOW),
+		      DRM_MASTER),
 	VMW_IOCTL_DEF(VMW_UNREF_STREAM, vmw_stream_unref_ioctl,
-		      DRM_MASTER | DRM_CONTROL_ALLOW),
+		      DRM_MASTER),
 
 	VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl,
 		      DRM_AUTH | DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 96fd7a0..01f2dc9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -384,9 +384,9 @@
 	hotspot_x = du->hotspot_x;
 	hotspot_y = du->hotspot_y;
 
-	if (plane->fb) {
-		hotspot_x += plane->fb->hot_x;
-		hotspot_y += plane->fb->hot_y;
+	if (plane->state->fb) {
+		hotspot_x += plane->state->fb->hot_x;
+		hotspot_y += plane->state->fb->hot_y;
 	}
 
 	du->cursor_surface = vps->surf;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index 3824595..4a5907e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -281,39 +281,6 @@
  * Legacy Display Plane Functions
  */
 
-/**
- * vmw_ldu_primary_plane_cleanup_fb - Noop
- *
- * @plane:  display plane
- * @old_state: Contains the FB to clean up
- *
- * Unpins the display surface
- *
- * Returns 0 on success
- */
-static void
-vmw_ldu_primary_plane_cleanup_fb(struct drm_plane *plane,
-				 struct drm_plane_state *old_state)
-{
-}
-
-
-/**
- * vmw_ldu_primary_plane_prepare_fb - Noop
- *
- * @plane:  display plane
- * @new_state: info on the new plane state, including the FB
- *
- * Returns 0 on success
- */
-static int
-vmw_ldu_primary_plane_prepare_fb(struct drm_plane *plane,
-				 struct drm_plane_state *new_state)
-{
-	return 0;
-}
-
-
 static void
 vmw_ldu_primary_plane_atomic_update(struct drm_plane *plane,
 				    struct drm_plane_state *old_state)
@@ -373,8 +340,6 @@
 drm_plane_helper_funcs vmw_ldu_primary_plane_helper_funcs = {
 	.atomic_check = vmw_du_primary_plane_atomic_check,
 	.atomic_update = vmw_ldu_primary_plane_atomic_update,
-	.prepare_fb = vmw_ldu_primary_plane_prepare_fb,
-	.cleanup_fb = vmw_ldu_primary_plane_cleanup_fb,
 };
 
 static const struct drm_crtc_helper_funcs vmw_ldu_crtc_helper_funcs = {
diff --git a/drivers/gpu/drm/xen/Kconfig b/drivers/gpu/drm/xen/Kconfig
new file mode 100644
index 0000000..4cca160
--- /dev/null
+++ b/drivers/gpu/drm/xen/Kconfig
@@ -0,0 +1,17 @@
+config DRM_XEN
+	bool "DRM Support for Xen guest OS"
+	depends on XEN
+	help
+	  Choose this option if you want to enable DRM support
+	  for Xen.
+
+config DRM_XEN_FRONTEND
+	tristate "Para-virtualized frontend driver for Xen guest OS"
+	depends on DRM_XEN
+	depends on DRM
+	select DRM_KMS_HELPER
+	select VIDEOMODE_HELPERS
+	select XEN_XENBUS_FRONTEND
+	help
+	  Choose this option if you want to enable a para-virtualized
+	  frontend DRM/KMS driver for Xen guest OSes.
diff --git a/drivers/gpu/drm/xen/Makefile b/drivers/gpu/drm/xen/Makefile
new file mode 100644
index 0000000..712afff
--- /dev/null
+++ b/drivers/gpu/drm/xen/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0 OR MIT
+
+drm_xen_front-objs := xen_drm_front.o \
+		      xen_drm_front_kms.o \
+		      xen_drm_front_conn.o \
+		      xen_drm_front_evtchnl.o \
+		      xen_drm_front_shbuf.o \
+		      xen_drm_front_cfg.o \
+		      xen_drm_front_gem.o
+
+obj-$(CONFIG_DRM_XEN_FRONTEND) += drm_xen_front.o
diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c
new file mode 100644
index 0000000..b3786c1
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front.c
@@ -0,0 +1,839 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_gem.h>
+
+#include <linux/of_device.h>
+
+#include <xen/platform_pci.h>
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+
+#include <xen/interface/io/displif.h>
+
+#include "xen_drm_front.h"
+#include "xen_drm_front_cfg.h"
+#include "xen_drm_front_evtchnl.h"
+#include "xen_drm_front_gem.h"
+#include "xen_drm_front_kms.h"
+#include "xen_drm_front_shbuf.h"
+
+struct xen_drm_front_dbuf {
+	struct list_head list;
+	u64 dbuf_cookie;
+	u64 fb_cookie;
+	struct xen_drm_front_shbuf *shbuf;
+};
+
+static int dbuf_add_to_list(struct xen_drm_front_info *front_info,
+			    struct xen_drm_front_shbuf *shbuf, u64 dbuf_cookie)
+{
+	struct xen_drm_front_dbuf *dbuf;
+
+	dbuf = kzalloc(sizeof(*dbuf), GFP_KERNEL);
+	if (!dbuf)
+		return -ENOMEM;
+
+	dbuf->dbuf_cookie = dbuf_cookie;
+	dbuf->shbuf = shbuf;
+	list_add(&dbuf->list, &front_info->dbuf_list);
+	return 0;
+}
+
+static struct xen_drm_front_dbuf *dbuf_get(struct list_head *dbuf_list,
+					   u64 dbuf_cookie)
+{
+	struct xen_drm_front_dbuf *buf, *q;
+
+	list_for_each_entry_safe(buf, q, dbuf_list, list)
+		if (buf->dbuf_cookie == dbuf_cookie)
+			return buf;
+
+	return NULL;
+}
+
+static void dbuf_flush_fb(struct list_head *dbuf_list, u64 fb_cookie)
+{
+	struct xen_drm_front_dbuf *buf, *q;
+
+	list_for_each_entry_safe(buf, q, dbuf_list, list)
+		if (buf->fb_cookie == fb_cookie)
+			xen_drm_front_shbuf_flush(buf->shbuf);
+}
+
+static void dbuf_free(struct list_head *dbuf_list, u64 dbuf_cookie)
+{
+	struct xen_drm_front_dbuf *buf, *q;
+
+	list_for_each_entry_safe(buf, q, dbuf_list, list)
+		if (buf->dbuf_cookie == dbuf_cookie) {
+			list_del(&buf->list);
+			xen_drm_front_shbuf_unmap(buf->shbuf);
+			xen_drm_front_shbuf_free(buf->shbuf);
+			kfree(buf);
+			break;
+		}
+}
+
+static void dbuf_free_all(struct list_head *dbuf_list)
+{
+	struct xen_drm_front_dbuf *buf, *q;
+
+	list_for_each_entry_safe(buf, q, dbuf_list, list) {
+		list_del(&buf->list);
+		xen_drm_front_shbuf_unmap(buf->shbuf);
+		xen_drm_front_shbuf_free(buf->shbuf);
+		kfree(buf);
+	}
+}
+
+static struct xendispl_req *
+be_prepare_req(struct xen_drm_front_evtchnl *evtchnl, u8 operation)
+{
+	struct xendispl_req *req;
+
+	req = RING_GET_REQUEST(&evtchnl->u.req.ring,
+			       evtchnl->u.req.ring.req_prod_pvt);
+	req->operation = operation;
+	req->id = evtchnl->evt_next_id++;
+	evtchnl->evt_id = req->id;
+	return req;
+}
+
+static int be_stream_do_io(struct xen_drm_front_evtchnl *evtchnl,
+			   struct xendispl_req *req)
+{
+	reinit_completion(&evtchnl->u.req.completion);
+	if (unlikely(evtchnl->state != EVTCHNL_STATE_CONNECTED))
+		return -EIO;
+
+	xen_drm_front_evtchnl_flush(evtchnl);
+	return 0;
+}
+
+static int be_stream_wait_io(struct xen_drm_front_evtchnl *evtchnl)
+{
+	if (wait_for_completion_timeout(&evtchnl->u.req.completion,
+			msecs_to_jiffies(XEN_DRM_FRONT_WAIT_BACK_MS)) <= 0)
+		return -ETIMEDOUT;
+
+	return evtchnl->u.req.resp_status;
+}
+
+int xen_drm_front_mode_set(struct xen_drm_front_drm_pipeline *pipeline,
+			   u32 x, u32 y, u32 width, u32 height,
+			   u32 bpp, u64 fb_cookie)
+{
+	struct xen_drm_front_evtchnl *evtchnl;
+	struct xen_drm_front_info *front_info;
+	struct xendispl_req *req;
+	unsigned long flags;
+	int ret;
+
+	front_info = pipeline->drm_info->front_info;
+	evtchnl = &front_info->evt_pairs[pipeline->index].req;
+	if (unlikely(!evtchnl))
+		return -EIO;
+
+	mutex_lock(&evtchnl->u.req.req_io_lock);
+
+	spin_lock_irqsave(&front_info->io_lock, flags);
+	req = be_prepare_req(evtchnl, XENDISPL_OP_SET_CONFIG);
+	req->op.set_config.x = x;
+	req->op.set_config.y = y;
+	req->op.set_config.width = width;
+	req->op.set_config.height = height;
+	req->op.set_config.bpp = bpp;
+	req->op.set_config.fb_cookie = fb_cookie;
+
+	ret = be_stream_do_io(evtchnl, req);
+	spin_unlock_irqrestore(&front_info->io_lock, flags);
+
+	if (ret == 0)
+		ret = be_stream_wait_io(evtchnl);
+
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	return ret;
+}
+
+int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info,
+			      u64 dbuf_cookie, u32 width, u32 height,
+			      u32 bpp, u64 size, struct page **pages)
+{
+	struct xen_drm_front_evtchnl *evtchnl;
+	struct xen_drm_front_shbuf *shbuf;
+	struct xendispl_req *req;
+	struct xen_drm_front_shbuf_cfg buf_cfg;
+	unsigned long flags;
+	int ret;
+
+	evtchnl = &front_info->evt_pairs[GENERIC_OP_EVT_CHNL].req;
+	if (unlikely(!evtchnl))
+		return -EIO;
+
+	memset(&buf_cfg, 0, sizeof(buf_cfg));
+	buf_cfg.xb_dev = front_info->xb_dev;
+	buf_cfg.pages = pages;
+	buf_cfg.size = size;
+	buf_cfg.be_alloc = front_info->cfg.be_alloc;
+
+	shbuf = xen_drm_front_shbuf_alloc(&buf_cfg);
+	if (IS_ERR(shbuf))
+		return PTR_ERR(shbuf);
+
+	ret = dbuf_add_to_list(front_info, shbuf, dbuf_cookie);
+	if (ret < 0) {
+		xen_drm_front_shbuf_free(shbuf);
+		return ret;
+	}
+
+	mutex_lock(&evtchnl->u.req.req_io_lock);
+
+	spin_lock_irqsave(&front_info->io_lock, flags);
+	req = be_prepare_req(evtchnl, XENDISPL_OP_DBUF_CREATE);
+	req->op.dbuf_create.gref_directory =
+			xen_drm_front_shbuf_get_dir_start(shbuf);
+	req->op.dbuf_create.buffer_sz = size;
+	req->op.dbuf_create.dbuf_cookie = dbuf_cookie;
+	req->op.dbuf_create.width = width;
+	req->op.dbuf_create.height = height;
+	req->op.dbuf_create.bpp = bpp;
+	if (buf_cfg.be_alloc)
+		req->op.dbuf_create.flags |= XENDISPL_DBUF_FLG_REQ_ALLOC;
+
+	ret = be_stream_do_io(evtchnl, req);
+	spin_unlock_irqrestore(&front_info->io_lock, flags);
+
+	if (ret < 0)
+		goto fail;
+
+	ret = be_stream_wait_io(evtchnl);
+	if (ret < 0)
+		goto fail;
+
+	ret = xen_drm_front_shbuf_map(shbuf);
+	if (ret < 0)
+		goto fail;
+
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	return 0;
+
+fail:
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	dbuf_free(&front_info->dbuf_list, dbuf_cookie);
+	return ret;
+}
+
+static int xen_drm_front_dbuf_destroy(struct xen_drm_front_info *front_info,
+				      u64 dbuf_cookie)
+{
+	struct xen_drm_front_evtchnl *evtchnl;
+	struct xendispl_req *req;
+	unsigned long flags;
+	bool be_alloc;
+	int ret;
+
+	evtchnl = &front_info->evt_pairs[GENERIC_OP_EVT_CHNL].req;
+	if (unlikely(!evtchnl))
+		return -EIO;
+
+	be_alloc = front_info->cfg.be_alloc;
+
+	/*
+	 * For the backend allocated buffer release references now, so backend
+	 * can free the buffer.
+	 */
+	if (be_alloc)
+		dbuf_free(&front_info->dbuf_list, dbuf_cookie);
+
+	mutex_lock(&evtchnl->u.req.req_io_lock);
+
+	spin_lock_irqsave(&front_info->io_lock, flags);
+	req = be_prepare_req(evtchnl, XENDISPL_OP_DBUF_DESTROY);
+	req->op.dbuf_destroy.dbuf_cookie = dbuf_cookie;
+
+	ret = be_stream_do_io(evtchnl, req);
+	spin_unlock_irqrestore(&front_info->io_lock, flags);
+
+	if (ret == 0)
+		ret = be_stream_wait_io(evtchnl);
+
+	/*
+	 * Do this regardless of communication status with the backend:
+	 * if we cannot remove remote resources remove what we can locally.
+	 */
+	if (!be_alloc)
+		dbuf_free(&front_info->dbuf_list, dbuf_cookie);
+
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	return ret;
+}
+
+int xen_drm_front_fb_attach(struct xen_drm_front_info *front_info,
+			    u64 dbuf_cookie, u64 fb_cookie, u32 width,
+			    u32 height, u32 pixel_format)
+{
+	struct xen_drm_front_evtchnl *evtchnl;
+	struct xen_drm_front_dbuf *buf;
+	struct xendispl_req *req;
+	unsigned long flags;
+	int ret;
+
+	evtchnl = &front_info->evt_pairs[GENERIC_OP_EVT_CHNL].req;
+	if (unlikely(!evtchnl))
+		return -EIO;
+
+	buf = dbuf_get(&front_info->dbuf_list, dbuf_cookie);
+	if (!buf)
+		return -EINVAL;
+
+	buf->fb_cookie = fb_cookie;
+
+	mutex_lock(&evtchnl->u.req.req_io_lock);
+
+	spin_lock_irqsave(&front_info->io_lock, flags);
+	req = be_prepare_req(evtchnl, XENDISPL_OP_FB_ATTACH);
+	req->op.fb_attach.dbuf_cookie = dbuf_cookie;
+	req->op.fb_attach.fb_cookie = fb_cookie;
+	req->op.fb_attach.width = width;
+	req->op.fb_attach.height = height;
+	req->op.fb_attach.pixel_format = pixel_format;
+
+	ret = be_stream_do_io(evtchnl, req);
+	spin_unlock_irqrestore(&front_info->io_lock, flags);
+
+	if (ret == 0)
+		ret = be_stream_wait_io(evtchnl);
+
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	return ret;
+}
+
+int xen_drm_front_fb_detach(struct xen_drm_front_info *front_info,
+			    u64 fb_cookie)
+{
+	struct xen_drm_front_evtchnl *evtchnl;
+	struct xendispl_req *req;
+	unsigned long flags;
+	int ret;
+
+	evtchnl = &front_info->evt_pairs[GENERIC_OP_EVT_CHNL].req;
+	if (unlikely(!evtchnl))
+		return -EIO;
+
+	mutex_lock(&evtchnl->u.req.req_io_lock);
+
+	spin_lock_irqsave(&front_info->io_lock, flags);
+	req = be_prepare_req(evtchnl, XENDISPL_OP_FB_DETACH);
+	req->op.fb_detach.fb_cookie = fb_cookie;
+
+	ret = be_stream_do_io(evtchnl, req);
+	spin_unlock_irqrestore(&front_info->io_lock, flags);
+
+	if (ret == 0)
+		ret = be_stream_wait_io(evtchnl);
+
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	return ret;
+}
+
+int xen_drm_front_page_flip(struct xen_drm_front_info *front_info,
+			    int conn_idx, u64 fb_cookie)
+{
+	struct xen_drm_front_evtchnl *evtchnl;
+	struct xendispl_req *req;
+	unsigned long flags;
+	int ret;
+
+	if (unlikely(conn_idx >= front_info->num_evt_pairs))
+		return -EINVAL;
+
+	dbuf_flush_fb(&front_info->dbuf_list, fb_cookie);
+	evtchnl = &front_info->evt_pairs[conn_idx].req;
+
+	mutex_lock(&evtchnl->u.req.req_io_lock);
+
+	spin_lock_irqsave(&front_info->io_lock, flags);
+	req = be_prepare_req(evtchnl, XENDISPL_OP_PG_FLIP);
+	req->op.pg_flip.fb_cookie = fb_cookie;
+
+	ret = be_stream_do_io(evtchnl, req);
+	spin_unlock_irqrestore(&front_info->io_lock, flags);
+
+	if (ret == 0)
+		ret = be_stream_wait_io(evtchnl);
+
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	return ret;
+}
+
+void xen_drm_front_on_frame_done(struct xen_drm_front_info *front_info,
+				 int conn_idx, u64 fb_cookie)
+{
+	struct xen_drm_front_drm_info *drm_info = front_info->drm_info;
+
+	if (unlikely(conn_idx >= front_info->cfg.num_connectors))
+		return;
+
+	xen_drm_front_kms_on_frame_done(&drm_info->pipeline[conn_idx],
+					fb_cookie);
+}
+
+static int xen_drm_drv_dumb_create(struct drm_file *filp,
+				   struct drm_device *dev,
+				   struct drm_mode_create_dumb *args)
+{
+	struct xen_drm_front_drm_info *drm_info = dev->dev_private;
+	struct drm_gem_object *obj;
+	int ret;
+
+	/*
+	 * Dumb creation is a two stage process: first we create a fully
+	 * constructed GEM object which is communicated to the backend, and
+	 * only after that we can create GEM's handle. This is done so,
+	 * because of the possible races: once you create a handle it becomes
+	 * immediately visible to user-space, so the latter can try accessing
+	 * object without pages etc.
+	 * For details also see drm_gem_handle_create
+	 */
+	args->pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
+	args->size = args->pitch * args->height;
+
+	obj = xen_drm_front_gem_create(dev, args->size);
+	if (IS_ERR_OR_NULL(obj)) {
+		ret = PTR_ERR(obj);
+		goto fail;
+	}
+
+	ret = xen_drm_front_dbuf_create(drm_info->front_info,
+					xen_drm_front_dbuf_to_cookie(obj),
+					args->width, args->height, args->bpp,
+					args->size,
+					xen_drm_front_gem_get_pages(obj));
+	if (ret)
+		goto fail_backend;
+
+	/* This is the tail of GEM object creation */
+	ret = drm_gem_handle_create(filp, obj, &args->handle);
+	if (ret)
+		goto fail_handle;
+
+	/* Drop reference from allocate - handle holds it now */
+	drm_gem_object_put_unlocked(obj);
+	return 0;
+
+fail_handle:
+	xen_drm_front_dbuf_destroy(drm_info->front_info,
+				   xen_drm_front_dbuf_to_cookie(obj));
+fail_backend:
+	/* drop reference from allocate */
+	drm_gem_object_put_unlocked(obj);
+fail:
+	DRM_ERROR("Failed to create dumb buffer: %d\n", ret);
+	return ret;
+}
+
+static void xen_drm_drv_free_object_unlocked(struct drm_gem_object *obj)
+{
+	struct xen_drm_front_drm_info *drm_info = obj->dev->dev_private;
+	int idx;
+
+	if (drm_dev_enter(obj->dev, &idx)) {
+		xen_drm_front_dbuf_destroy(drm_info->front_info,
+					   xen_drm_front_dbuf_to_cookie(obj));
+		drm_dev_exit(idx);
+	} else {
+		dbuf_free(&drm_info->front_info->dbuf_list,
+			  xen_drm_front_dbuf_to_cookie(obj));
+	}
+
+	xen_drm_front_gem_free_object_unlocked(obj);
+}
+
+static void xen_drm_drv_release(struct drm_device *dev)
+{
+	struct xen_drm_front_drm_info *drm_info = dev->dev_private;
+	struct xen_drm_front_info *front_info = drm_info->front_info;
+
+	xen_drm_front_kms_fini(drm_info);
+
+	drm_atomic_helper_shutdown(dev);
+	drm_mode_config_cleanup(dev);
+
+	drm_dev_fini(dev);
+	kfree(dev);
+
+	if (front_info->cfg.be_alloc)
+		xenbus_switch_state(front_info->xb_dev,
+				    XenbusStateInitialising);
+
+	kfree(drm_info);
+}
+
+static const struct file_operations xen_drm_dev_fops = {
+	.owner          = THIS_MODULE,
+	.open           = drm_open,
+	.release        = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl   = drm_compat_ioctl,
+#endif
+	.poll           = drm_poll,
+	.read           = drm_read,
+	.llseek         = no_llseek,
+	.mmap           = xen_drm_front_gem_mmap,
+};
+
+static const struct vm_operations_struct xen_drm_drv_vm_ops = {
+	.open           = drm_gem_vm_open,
+	.close          = drm_gem_vm_close,
+};
+
+static struct drm_driver xen_drm_driver = {
+	.driver_features           = DRIVER_GEM | DRIVER_MODESET |
+				     DRIVER_PRIME | DRIVER_ATOMIC,
+	.release                   = xen_drm_drv_release,
+	.gem_vm_ops                = &xen_drm_drv_vm_ops,
+	.gem_free_object_unlocked  = xen_drm_drv_free_object_unlocked,
+	.prime_handle_to_fd        = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle        = drm_gem_prime_fd_to_handle,
+	.gem_prime_import          = drm_gem_prime_import,
+	.gem_prime_export          = drm_gem_prime_export,
+	.gem_prime_import_sg_table = xen_drm_front_gem_import_sg_table,
+	.gem_prime_get_sg_table    = xen_drm_front_gem_get_sg_table,
+	.gem_prime_vmap            = xen_drm_front_gem_prime_vmap,
+	.gem_prime_vunmap          = xen_drm_front_gem_prime_vunmap,
+	.gem_prime_mmap            = xen_drm_front_gem_prime_mmap,
+	.dumb_create               = xen_drm_drv_dumb_create,
+	.fops                      = &xen_drm_dev_fops,
+	.name                      = "xendrm-du",
+	.desc                      = "Xen PV DRM Display Unit",
+	.date                      = "20180221",
+	.major                     = 1,
+	.minor                     = 0,
+
+};
+
+static int xen_drm_drv_init(struct xen_drm_front_info *front_info)
+{
+	struct device *dev = &front_info->xb_dev->dev;
+	struct xen_drm_front_drm_info *drm_info;
+	struct drm_device *drm_dev;
+	int ret;
+
+	DRM_INFO("Creating %s\n", xen_drm_driver.desc);
+
+	drm_info = kzalloc(sizeof(*drm_info), GFP_KERNEL);
+	if (!drm_info) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	drm_info->front_info = front_info;
+	front_info->drm_info = drm_info;
+
+	drm_dev = drm_dev_alloc(&xen_drm_driver, dev);
+	if (IS_ERR(drm_dev)) {
+		ret = PTR_ERR(drm_dev);
+		goto fail;
+	}
+
+	drm_info->drm_dev = drm_dev;
+
+	drm_dev->dev_private = drm_info;
+
+	ret = xen_drm_front_kms_init(drm_info);
+	if (ret) {
+		DRM_ERROR("Failed to initialize DRM/KMS, ret %d\n", ret);
+		goto fail_modeset;
+	}
+
+	ret = drm_dev_register(drm_dev, 0);
+	if (ret)
+		goto fail_register;
+
+	DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n",
+		 xen_drm_driver.name, xen_drm_driver.major,
+		 xen_drm_driver.minor, xen_drm_driver.patchlevel,
+		 xen_drm_driver.date, drm_dev->primary->index);
+
+	return 0;
+
+fail_register:
+	drm_dev_unregister(drm_dev);
+fail_modeset:
+	drm_kms_helper_poll_fini(drm_dev);
+	drm_mode_config_cleanup(drm_dev);
+fail:
+	kfree(drm_info);
+	return ret;
+}
+
+static void xen_drm_drv_fini(struct xen_drm_front_info *front_info)
+{
+	struct xen_drm_front_drm_info *drm_info = front_info->drm_info;
+	struct drm_device *dev;
+
+	if (!drm_info)
+		return;
+
+	dev = drm_info->drm_dev;
+	if (!dev)
+		return;
+
+	/* Nothing to do if device is already unplugged */
+	if (drm_dev_is_unplugged(dev))
+		return;
+
+	drm_kms_helper_poll_fini(dev);
+	drm_dev_unplug(dev);
+
+	front_info->drm_info = NULL;
+
+	xen_drm_front_evtchnl_free_all(front_info);
+	dbuf_free_all(&front_info->dbuf_list);
+
+	/*
+	 * If we are not using backend allocated buffers, then tell the
+	 * backend we are ready to (re)initialize. Otherwise, wait for
+	 * drm_driver.release.
+	 */
+	if (!front_info->cfg.be_alloc)
+		xenbus_switch_state(front_info->xb_dev,
+				    XenbusStateInitialising);
+}
+
+static int displback_initwait(struct xen_drm_front_info *front_info)
+{
+	struct xen_drm_front_cfg *cfg = &front_info->cfg;
+	int ret;
+
+	cfg->front_info = front_info;
+	ret = xen_drm_front_cfg_card(front_info, cfg);
+	if (ret < 0)
+		return ret;
+
+	DRM_INFO("Have %d conector(s)\n", cfg->num_connectors);
+	/* Create event channels for all connectors and publish */
+	ret = xen_drm_front_evtchnl_create_all(front_info);
+	if (ret < 0)
+		return ret;
+
+	return xen_drm_front_evtchnl_publish_all(front_info);
+}
+
+static int displback_connect(struct xen_drm_front_info *front_info)
+{
+	xen_drm_front_evtchnl_set_state(front_info, EVTCHNL_STATE_CONNECTED);
+	return xen_drm_drv_init(front_info);
+}
+
+static void displback_disconnect(struct xen_drm_front_info *front_info)
+{
+	if (!front_info->drm_info)
+		return;
+
+	/* Tell the backend to wait until we release the DRM driver. */
+	xenbus_switch_state(front_info->xb_dev, XenbusStateReconfiguring);
+
+	xen_drm_drv_fini(front_info);
+}
+
+static void displback_changed(struct xenbus_device *xb_dev,
+			      enum xenbus_state backend_state)
+{
+	struct xen_drm_front_info *front_info = dev_get_drvdata(&xb_dev->dev);
+	int ret;
+
+	DRM_DEBUG("Backend state is %s, front is %s\n",
+		  xenbus_strstate(backend_state),
+		  xenbus_strstate(xb_dev->state));
+
+	switch (backend_state) {
+	case XenbusStateReconfiguring:
+		/* fall through */
+	case XenbusStateReconfigured:
+		/* fall through */
+	case XenbusStateInitialised:
+		break;
+
+	case XenbusStateInitialising:
+		if (xb_dev->state == XenbusStateReconfiguring)
+			break;
+
+		/* recovering after backend unexpected closure */
+		displback_disconnect(front_info);
+		break;
+
+	case XenbusStateInitWait:
+		if (xb_dev->state == XenbusStateReconfiguring)
+			break;
+
+		/* recovering after backend unexpected closure */
+		displback_disconnect(front_info);
+		if (xb_dev->state != XenbusStateInitialising)
+			break;
+
+		ret = displback_initwait(front_info);
+		if (ret < 0)
+			xenbus_dev_fatal(xb_dev, ret, "initializing frontend");
+		else
+			xenbus_switch_state(xb_dev, XenbusStateInitialised);
+		break;
+
+	case XenbusStateConnected:
+		if (xb_dev->state != XenbusStateInitialised)
+			break;
+
+		ret = displback_connect(front_info);
+		if (ret < 0) {
+			displback_disconnect(front_info);
+			xenbus_dev_fatal(xb_dev, ret, "connecting backend");
+		} else {
+			xenbus_switch_state(xb_dev, XenbusStateConnected);
+		}
+		break;
+
+	case XenbusStateClosing:
+		/*
+		 * in this state backend starts freeing resources,
+		 * so let it go into closed state, so we can also
+		 * remove ours
+		 */
+		break;
+
+	case XenbusStateUnknown:
+		/* fall through */
+	case XenbusStateClosed:
+		if (xb_dev->state == XenbusStateClosed)
+			break;
+
+		displback_disconnect(front_info);
+		break;
+	}
+}
+
+static int xen_drv_probe(struct xenbus_device *xb_dev,
+			 const struct xenbus_device_id *id)
+{
+	struct xen_drm_front_info *front_info;
+	struct device *dev = &xb_dev->dev;
+	int ret;
+
+	/*
+	 * The device is not spawn from a device tree, so arch_setup_dma_ops
+	 * is not called, thus leaving the device with dummy DMA ops.
+	 * This makes the device return error on PRIME buffer import, which
+	 * is not correct: to fix this call of_dma_configure() with a NULL
+	 * node to set default DMA ops.
+	 */
+	dev->coherent_dma_mask = DMA_BIT_MASK(32);
+	ret = of_dma_configure(dev, NULL, true);
+	if (ret < 0) {
+		DRM_ERROR("Cannot setup DMA ops, ret %d", ret);
+		return ret;
+	}
+
+	front_info = devm_kzalloc(&xb_dev->dev,
+				  sizeof(*front_info), GFP_KERNEL);
+	if (!front_info)
+		return -ENOMEM;
+
+	front_info->xb_dev = xb_dev;
+	spin_lock_init(&front_info->io_lock);
+	INIT_LIST_HEAD(&front_info->dbuf_list);
+	dev_set_drvdata(&xb_dev->dev, front_info);
+
+	return xenbus_switch_state(xb_dev, XenbusStateInitialising);
+}
+
+static int xen_drv_remove(struct xenbus_device *dev)
+{
+	struct xen_drm_front_info *front_info = dev_get_drvdata(&dev->dev);
+	int to = 100;
+
+	xenbus_switch_state(dev, XenbusStateClosing);
+
+	/*
+	 * On driver removal it is disconnected from XenBus,
+	 * so no backend state change events come via .otherend_changed
+	 * callback. This prevents us from exiting gracefully, e.g.
+	 * signaling the backend to free event channels, waiting for its
+	 * state to change to XenbusStateClosed and cleaning at our end.
+	 * Normally when front driver removed backend will finally go into
+	 * XenbusStateInitWait state.
+	 *
+	 * Workaround: read backend's state manually and wait with time-out.
+	 */
+	while ((xenbus_read_unsigned(front_info->xb_dev->otherend, "state",
+				     XenbusStateUnknown) != XenbusStateInitWait) &&
+				     --to)
+		msleep(10);
+
+	if (!to) {
+		unsigned int state;
+
+		state = xenbus_read_unsigned(front_info->xb_dev->otherend,
+					     "state", XenbusStateUnknown);
+		DRM_ERROR("Backend state is %s while removing driver\n",
+			  xenbus_strstate(state));
+	}
+
+	xen_drm_drv_fini(front_info);
+	xenbus_frontend_closed(dev);
+	return 0;
+}
+
+static const struct xenbus_device_id xen_driver_ids[] = {
+	{ XENDISPL_DRIVER_NAME },
+	{ "" }
+};
+
+static struct xenbus_driver xen_driver = {
+	.ids = xen_driver_ids,
+	.probe = xen_drv_probe,
+	.remove = xen_drv_remove,
+	.otherend_changed = displback_changed,
+};
+
+static int __init xen_drv_init(void)
+{
+	/* At the moment we only support case with XEN_PAGE_SIZE == PAGE_SIZE */
+	if (XEN_PAGE_SIZE != PAGE_SIZE) {
+		DRM_ERROR(XENDISPL_DRIVER_NAME ": different kernel and Xen page sizes are not supported: XEN_PAGE_SIZE (%lu) != PAGE_SIZE (%lu)\n",
+			  XEN_PAGE_SIZE, PAGE_SIZE);
+		return -ENODEV;
+	}
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	if (!xen_has_pv_devices())
+		return -ENODEV;
+
+	DRM_INFO("Registering XEN PV " XENDISPL_DRIVER_NAME "\n");
+	return xenbus_register_frontend(&xen_driver);
+}
+
+static void __exit xen_drv_fini(void)
+{
+	DRM_INFO("Unregistering XEN PV " XENDISPL_DRIVER_NAME "\n");
+	xenbus_unregister_driver(&xen_driver);
+}
+
+module_init(xen_drv_init);
+module_exit(xen_drv_fini);
+
+MODULE_DESCRIPTION("Xen para-virtualized display device frontend");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:" XENDISPL_DRIVER_NAME);
diff --git a/drivers/gpu/drm/xen/xen_drm_front.h b/drivers/gpu/drm/xen/xen_drm_front.h
new file mode 100644
index 0000000..2c2479b
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#ifndef __XEN_DRM_FRONT_H_
+#define __XEN_DRM_FRONT_H_
+
+#include <drm/drmP.h>
+#include <drm/drm_simple_kms_helper.h>
+
+#include <linux/scatterlist.h>
+
+#include "xen_drm_front_cfg.h"
+
+/**
+ * DOC: Driver modes of operation in terms of display buffers used
+ *
+ * Depending on the requirements for the para-virtualized environment, namely
+ * requirements dictated by the accompanying DRM/(v)GPU drivers running in both
+ * host and guest environments, display buffers can be allocated by either
+ * frontend driver or backend.
+ */
+
+/**
+ * DOC: Buffers allocated by the frontend driver
+ *
+ * In this mode of operation driver allocates buffers from system memory.
+ *
+ * Note! If used with accompanying DRM/(v)GPU drivers this mode of operation
+ * may require IOMMU support on the platform, so accompanying DRM/vGPU
+ * hardware can still reach display buffer memory while importing PRIME
+ * buffers from the frontend driver.
+ */
+
+/**
+ * DOC: Buffers allocated by the backend
+ *
+ * This mode of operation is run-time configured via guest domain configuration
+ * through XenStore entries.
+ *
+ * For systems which do not provide IOMMU support, but having specific
+ * requirements for display buffers it is possible to allocate such buffers
+ * at backend side and share those with the frontend.
+ * For example, if host domain is 1:1 mapped and has DRM/GPU hardware expecting
+ * physically contiguous memory, this allows implementing zero-copying
+ * use-cases.
+ *
+ * Note, while using this scenario the following should be considered:
+ *
+ * #. If guest domain dies then pages/grants received from the backend
+ *    cannot be claimed back
+ *
+ * #. Misbehaving guest may send too many requests to the
+ *    backend exhausting its grant references and memory
+ *    (consider this from security POV)
+ */
+
+/**
+ * DOC: Driver limitations
+ *
+ * #. Only primary plane without additional properties is supported.
+ *
+ * #. Only one video mode per connector supported which is configured
+ *    via XenStore.
+ *
+ * #. All CRTCs operate at fixed frequency of 60Hz.
+ */
+
+/* timeout in ms to wait for backend to respond */
+#define XEN_DRM_FRONT_WAIT_BACK_MS	3000
+
+#ifndef GRANT_INVALID_REF
+/*
+ * Note on usage of grant reference 0 as invalid grant reference:
+ * grant reference 0 is valid, but never exposed to a PV driver,
+ * because of the fact it is already in use/reserved by the PV console.
+ */
+#define GRANT_INVALID_REF	0
+#endif
+
+struct xen_drm_front_info {
+	struct xenbus_device *xb_dev;
+	struct xen_drm_front_drm_info *drm_info;
+
+	/* to protect data between backend IO code and interrupt handler */
+	spinlock_t io_lock;
+
+	int num_evt_pairs;
+	struct xen_drm_front_evtchnl_pair *evt_pairs;
+	struct xen_drm_front_cfg cfg;
+
+	/* display buffers */
+	struct list_head dbuf_list;
+};
+
+struct xen_drm_front_drm_pipeline {
+	struct xen_drm_front_drm_info *drm_info;
+
+	int index;
+
+	struct drm_simple_display_pipe pipe;
+
+	struct drm_connector conn;
+	/* These are only for connector mode checking */
+	int width, height;
+
+	struct drm_pending_vblank_event *pending_event;
+
+	struct delayed_work pflip_to_worker;
+
+	bool conn_connected;
+};
+
+struct xen_drm_front_drm_info {
+	struct xen_drm_front_info *front_info;
+	struct drm_device *drm_dev;
+
+	struct xen_drm_front_drm_pipeline pipeline[XEN_DRM_FRONT_MAX_CRTCS];
+};
+
+static inline u64 xen_drm_front_fb_to_cookie(struct drm_framebuffer *fb)
+{
+	return (u64)fb;
+}
+
+static inline u64 xen_drm_front_dbuf_to_cookie(struct drm_gem_object *gem_obj)
+{
+	return (u64)gem_obj;
+}
+
+int xen_drm_front_mode_set(struct xen_drm_front_drm_pipeline *pipeline,
+			   u32 x, u32 y, u32 width, u32 height,
+			   u32 bpp, u64 fb_cookie);
+
+int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info,
+			      u64 dbuf_cookie, u32 width, u32 height,
+			      u32 bpp, u64 size, struct page **pages);
+
+int xen_drm_front_fb_attach(struct xen_drm_front_info *front_info,
+			    u64 dbuf_cookie, u64 fb_cookie, u32 width,
+			    u32 height, u32 pixel_format);
+
+int xen_drm_front_fb_detach(struct xen_drm_front_info *front_info,
+			    u64 fb_cookie);
+
+int xen_drm_front_page_flip(struct xen_drm_front_info *front_info,
+			    int conn_idx, u64 fb_cookie);
+
+void xen_drm_front_on_frame_done(struct xen_drm_front_info *front_info,
+				 int conn_idx, u64 fb_cookie);
+
+#endif /* __XEN_DRM_FRONT_H_ */
diff --git a/drivers/gpu/drm/xen/xen_drm_front_cfg.c b/drivers/gpu/drm/xen/xen_drm_front_cfg.c
new file mode 100644
index 0000000..5baf2b9
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front_cfg.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#include <drm/drmP.h>
+
+#include <linux/device.h>
+
+#include <xen/interface/io/displif.h>
+#include <xen/xenbus.h>
+
+#include "xen_drm_front.h"
+#include "xen_drm_front_cfg.h"
+
+static int cfg_connector(struct xen_drm_front_info *front_info,
+			 struct xen_drm_front_cfg_connector *connector,
+			 const char *path, int index)
+{
+	char *connector_path;
+
+	connector_path = devm_kasprintf(&front_info->xb_dev->dev,
+					GFP_KERNEL, "%s/%d", path, index);
+	if (!connector_path)
+		return -ENOMEM;
+
+	if (xenbus_scanf(XBT_NIL, connector_path, XENDISPL_FIELD_RESOLUTION,
+			 "%d" XENDISPL_RESOLUTION_SEPARATOR "%d",
+			 &connector->width, &connector->height) < 0) {
+		/* either no entry configured or wrong resolution set */
+		connector->width = 0;
+		connector->height = 0;
+		return -EINVAL;
+	}
+
+	connector->xenstore_path = connector_path;
+
+	DRM_INFO("Connector %s: resolution %dx%d\n",
+		 connector_path, connector->width, connector->height);
+	return 0;
+}
+
+int xen_drm_front_cfg_card(struct xen_drm_front_info *front_info,
+			   struct xen_drm_front_cfg *cfg)
+{
+	struct xenbus_device *xb_dev = front_info->xb_dev;
+	int ret, i;
+
+	if (xenbus_read_unsigned(front_info->xb_dev->nodename,
+				 XENDISPL_FIELD_BE_ALLOC, 0)) {
+		DRM_INFO("Backend can provide display buffers\n");
+		cfg->be_alloc = true;
+	}
+
+	cfg->num_connectors = 0;
+	for (i = 0; i < ARRAY_SIZE(cfg->connectors); i++) {
+		ret = cfg_connector(front_info, &cfg->connectors[i],
+				    xb_dev->nodename, i);
+		if (ret < 0)
+			break;
+		cfg->num_connectors++;
+	}
+
+	if (!cfg->num_connectors) {
+		DRM_ERROR("No connector(s) configured at %s\n",
+			  xb_dev->nodename);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
diff --git a/drivers/gpu/drm/xen/xen_drm_front_cfg.h b/drivers/gpu/drm/xen/xen_drm_front_cfg.h
new file mode 100644
index 0000000..aa8490b
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front_cfg.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#ifndef __XEN_DRM_FRONT_CFG_H_
+#define __XEN_DRM_FRONT_CFG_H_
+
+#include <linux/types.h>
+
+#define XEN_DRM_FRONT_MAX_CRTCS	4
+
+struct xen_drm_front_cfg_connector {
+	int width;
+	int height;
+	char *xenstore_path;
+};
+
+struct xen_drm_front_cfg {
+	struct xen_drm_front_info *front_info;
+	/* number of connectors in this configuration */
+	int num_connectors;
+	/* connector configurations */
+	struct xen_drm_front_cfg_connector connectors[XEN_DRM_FRONT_MAX_CRTCS];
+	/* set if dumb buffers are allocated externally on backend side */
+	bool be_alloc;
+};
+
+int xen_drm_front_cfg_card(struct xen_drm_front_info *front_info,
+			   struct xen_drm_front_cfg *cfg);
+
+#endif /* __XEN_DRM_FRONT_CFG_H_ */
diff --git a/drivers/gpu/drm/xen/xen_drm_front_conn.c b/drivers/gpu/drm/xen/xen_drm_front_conn.c
new file mode 100644
index 0000000..c91ae53
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front_conn.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+
+#include <video/videomode.h>
+
+#include "xen_drm_front.h"
+#include "xen_drm_front_conn.h"
+#include "xen_drm_front_kms.h"
+
+static struct xen_drm_front_drm_pipeline *
+to_xen_drm_pipeline(struct drm_connector *connector)
+{
+	return container_of(connector, struct xen_drm_front_drm_pipeline, conn);
+}
+
+static const u32 plane_formats[] = {
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_RGB888,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XRGB4444,
+	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_ARGB1555,
+};
+
+const u32 *xen_drm_front_conn_get_formats(int *format_count)
+{
+	*format_count = ARRAY_SIZE(plane_formats);
+	return plane_formats;
+}
+
+static int connector_detect(struct drm_connector *connector,
+			    struct drm_modeset_acquire_ctx *ctx,
+			    bool force)
+{
+	struct xen_drm_front_drm_pipeline *pipeline =
+			to_xen_drm_pipeline(connector);
+
+	if (drm_dev_is_unplugged(connector->dev))
+		pipeline->conn_connected = false;
+
+	return pipeline->conn_connected ? connector_status_connected :
+			connector_status_disconnected;
+}
+
+#define XEN_DRM_CRTC_VREFRESH_HZ	60
+
+static int connector_get_modes(struct drm_connector *connector)
+{
+	struct xen_drm_front_drm_pipeline *pipeline =
+			to_xen_drm_pipeline(connector);
+	struct drm_display_mode *mode;
+	struct videomode videomode;
+	int width, height;
+
+	mode = drm_mode_create(connector->dev);
+	if (!mode)
+		return 0;
+
+	memset(&videomode, 0, sizeof(videomode));
+	videomode.hactive = pipeline->width;
+	videomode.vactive = pipeline->height;
+	width = videomode.hactive + videomode.hfront_porch +
+			videomode.hback_porch + videomode.hsync_len;
+	height = videomode.vactive + videomode.vfront_porch +
+			videomode.vback_porch + videomode.vsync_len;
+	videomode.pixelclock = width * height * XEN_DRM_CRTC_VREFRESH_HZ;
+	mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
+
+	drm_display_mode_from_videomode(&videomode, mode);
+	drm_mode_probed_add(connector, mode);
+	return 1;
+}
+
+static const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = connector_get_modes,
+	.detect_ctx = connector_detect,
+};
+
+static const struct drm_connector_funcs connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = drm_connector_cleanup,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+int xen_drm_front_conn_init(struct xen_drm_front_drm_info *drm_info,
+			    struct drm_connector *connector)
+{
+	struct xen_drm_front_drm_pipeline *pipeline =
+			to_xen_drm_pipeline(connector);
+
+	drm_connector_helper_add(connector, &connector_helper_funcs);
+
+	pipeline->conn_connected = true;
+
+	connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			DRM_CONNECTOR_POLL_DISCONNECT;
+
+	return drm_connector_init(drm_info->drm_dev, connector,
+				  &connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL);
+}
diff --git a/drivers/gpu/drm/xen/xen_drm_front_conn.h b/drivers/gpu/drm/xen/xen_drm_front_conn.h
new file mode 100644
index 0000000..39de7cf
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front_conn.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#ifndef __XEN_DRM_FRONT_CONN_H_
+#define __XEN_DRM_FRONT_CONN_H_
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_encoder.h>
+
+#include <linux/wait.h>
+
+struct xen_drm_front_drm_info;
+
+int xen_drm_front_conn_init(struct xen_drm_front_drm_info *drm_info,
+			    struct drm_connector *connector);
+
+const u32 *xen_drm_front_conn_get_formats(int *format_count);
+
+#endif /* __XEN_DRM_FRONT_CONN_H_ */
diff --git a/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c
new file mode 100644
index 0000000..945226a
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#include <drm/drmP.h>
+
+#include <linux/errno.h>
+#include <linux/irq.h>
+
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/grant_table.h>
+
+#include "xen_drm_front.h"
+#include "xen_drm_front_evtchnl.h"
+
+static irqreturn_t evtchnl_interrupt_ctrl(int irq, void *dev_id)
+{
+	struct xen_drm_front_evtchnl *evtchnl = dev_id;
+	struct xen_drm_front_info *front_info = evtchnl->front_info;
+	struct xendispl_resp *resp;
+	RING_IDX i, rp;
+	unsigned long flags;
+
+	if (unlikely(evtchnl->state != EVTCHNL_STATE_CONNECTED))
+		return IRQ_HANDLED;
+
+	spin_lock_irqsave(&front_info->io_lock, flags);
+
+again:
+	rp = evtchnl->u.req.ring.sring->rsp_prod;
+	/* ensure we see queued responses up to rp */
+	virt_rmb();
+
+	for (i = evtchnl->u.req.ring.rsp_cons; i != rp; i++) {
+		resp = RING_GET_RESPONSE(&evtchnl->u.req.ring, i);
+		if (unlikely(resp->id != evtchnl->evt_id))
+			continue;
+
+		switch (resp->operation) {
+		case XENDISPL_OP_PG_FLIP:
+		case XENDISPL_OP_FB_ATTACH:
+		case XENDISPL_OP_FB_DETACH:
+		case XENDISPL_OP_DBUF_CREATE:
+		case XENDISPL_OP_DBUF_DESTROY:
+		case XENDISPL_OP_SET_CONFIG:
+			evtchnl->u.req.resp_status = resp->status;
+			complete(&evtchnl->u.req.completion);
+			break;
+
+		default:
+			DRM_ERROR("Operation %d is not supported\n",
+				  resp->operation);
+			break;
+		}
+	}
+
+	evtchnl->u.req.ring.rsp_cons = i;
+
+	if (i != evtchnl->u.req.ring.req_prod_pvt) {
+		int more_to_do;
+
+		RING_FINAL_CHECK_FOR_RESPONSES(&evtchnl->u.req.ring,
+					       more_to_do);
+		if (more_to_do)
+			goto again;
+	} else {
+		evtchnl->u.req.ring.sring->rsp_event = i + 1;
+	}
+
+	spin_unlock_irqrestore(&front_info->io_lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t evtchnl_interrupt_evt(int irq, void *dev_id)
+{
+	struct xen_drm_front_evtchnl *evtchnl = dev_id;
+	struct xen_drm_front_info *front_info = evtchnl->front_info;
+	struct xendispl_event_page *page = evtchnl->u.evt.page;
+	u32 cons, prod;
+	unsigned long flags;
+
+	if (unlikely(evtchnl->state != EVTCHNL_STATE_CONNECTED))
+		return IRQ_HANDLED;
+
+	spin_lock_irqsave(&front_info->io_lock, flags);
+
+	prod = page->in_prod;
+	/* ensure we see ring contents up to prod */
+	virt_rmb();
+	if (prod == page->in_cons)
+		goto out;
+
+	for (cons = page->in_cons; cons != prod; cons++) {
+		struct xendispl_evt *event;
+
+		event = &XENDISPL_IN_RING_REF(page, cons);
+		if (unlikely(event->id != evtchnl->evt_id++))
+			continue;
+
+		switch (event->type) {
+		case XENDISPL_EVT_PG_FLIP:
+			xen_drm_front_on_frame_done(front_info, evtchnl->index,
+						    event->op.pg_flip.fb_cookie);
+			break;
+		}
+	}
+	page->in_cons = cons;
+	/* ensure ring contents */
+	virt_wmb();
+
+out:
+	spin_unlock_irqrestore(&front_info->io_lock, flags);
+	return IRQ_HANDLED;
+}
+
+static void evtchnl_free(struct xen_drm_front_info *front_info,
+			 struct xen_drm_front_evtchnl *evtchnl)
+{
+	unsigned long page = 0;
+
+	if (evtchnl->type == EVTCHNL_TYPE_REQ)
+		page = (unsigned long)evtchnl->u.req.ring.sring;
+	else if (evtchnl->type == EVTCHNL_TYPE_EVT)
+		page = (unsigned long)evtchnl->u.evt.page;
+	if (!page)
+		return;
+
+	evtchnl->state = EVTCHNL_STATE_DISCONNECTED;
+
+	if (evtchnl->type == EVTCHNL_TYPE_REQ) {
+		/* release all who still waits for response if any */
+		evtchnl->u.req.resp_status = -EIO;
+		complete_all(&evtchnl->u.req.completion);
+	}
+
+	if (evtchnl->irq)
+		unbind_from_irqhandler(evtchnl->irq, evtchnl);
+
+	if (evtchnl->port)
+		xenbus_free_evtchn(front_info->xb_dev, evtchnl->port);
+
+	/* end access and free the page */
+	if (evtchnl->gref != GRANT_INVALID_REF)
+		gnttab_end_foreign_access(evtchnl->gref, 0, page);
+
+	memset(evtchnl, 0, sizeof(*evtchnl));
+}
+
+static int evtchnl_alloc(struct xen_drm_front_info *front_info, int index,
+			 struct xen_drm_front_evtchnl *evtchnl,
+			 enum xen_drm_front_evtchnl_type type)
+{
+	struct xenbus_device *xb_dev = front_info->xb_dev;
+	unsigned long page;
+	grant_ref_t gref;
+	irq_handler_t handler;
+	int ret;
+
+	memset(evtchnl, 0, sizeof(*evtchnl));
+	evtchnl->type = type;
+	evtchnl->index = index;
+	evtchnl->front_info = front_info;
+	evtchnl->state = EVTCHNL_STATE_DISCONNECTED;
+	evtchnl->gref = GRANT_INVALID_REF;
+
+	page = get_zeroed_page(GFP_NOIO | __GFP_HIGH);
+	if (!page) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	if (type == EVTCHNL_TYPE_REQ) {
+		struct xen_displif_sring *sring;
+
+		init_completion(&evtchnl->u.req.completion);
+		mutex_init(&evtchnl->u.req.req_io_lock);
+		sring = (struct xen_displif_sring *)page;
+		SHARED_RING_INIT(sring);
+		FRONT_RING_INIT(&evtchnl->u.req.ring, sring, XEN_PAGE_SIZE);
+
+		ret = xenbus_grant_ring(xb_dev, sring, 1, &gref);
+		if (ret < 0) {
+			evtchnl->u.req.ring.sring = NULL;
+			free_page(page);
+			goto fail;
+		}
+
+		handler = evtchnl_interrupt_ctrl;
+	} else {
+		ret = gnttab_grant_foreign_access(xb_dev->otherend_id,
+						  virt_to_gfn((void *)page), 0);
+		if (ret < 0) {
+			free_page(page);
+			goto fail;
+		}
+
+		evtchnl->u.evt.page = (struct xendispl_event_page *)page;
+		gref = ret;
+		handler = evtchnl_interrupt_evt;
+	}
+	evtchnl->gref = gref;
+
+	ret = xenbus_alloc_evtchn(xb_dev, &evtchnl->port);
+	if (ret < 0)
+		goto fail;
+
+	ret = bind_evtchn_to_irqhandler(evtchnl->port,
+					handler, 0, xb_dev->devicetype,
+					evtchnl);
+	if (ret < 0)
+		goto fail;
+
+	evtchnl->irq = ret;
+	return 0;
+
+fail:
+	DRM_ERROR("Failed to allocate ring: %d\n", ret);
+	return ret;
+}
+
+int xen_drm_front_evtchnl_create_all(struct xen_drm_front_info *front_info)
+{
+	struct xen_drm_front_cfg *cfg;
+	int ret, conn;
+
+	cfg = &front_info->cfg;
+
+	front_info->evt_pairs =
+			kcalloc(cfg->num_connectors,
+				sizeof(struct xen_drm_front_evtchnl_pair),
+				GFP_KERNEL);
+	if (!front_info->evt_pairs) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	for (conn = 0; conn < cfg->num_connectors; conn++) {
+		ret = evtchnl_alloc(front_info, conn,
+				    &front_info->evt_pairs[conn].req,
+				    EVTCHNL_TYPE_REQ);
+		if (ret < 0) {
+			DRM_ERROR("Error allocating control channel\n");
+			goto fail;
+		}
+
+		ret = evtchnl_alloc(front_info, conn,
+				    &front_info->evt_pairs[conn].evt,
+				    EVTCHNL_TYPE_EVT);
+		if (ret < 0) {
+			DRM_ERROR("Error allocating in-event channel\n");
+			goto fail;
+		}
+	}
+	front_info->num_evt_pairs = cfg->num_connectors;
+	return 0;
+
+fail:
+	xen_drm_front_evtchnl_free_all(front_info);
+	return ret;
+}
+
+static int evtchnl_publish(struct xenbus_transaction xbt,
+			   struct xen_drm_front_evtchnl *evtchnl,
+			   const char *path, const char *node_ring,
+			   const char *node_chnl)
+{
+	struct xenbus_device *xb_dev = evtchnl->front_info->xb_dev;
+	int ret;
+
+	/* write control channel ring reference */
+	ret = xenbus_printf(xbt, path, node_ring, "%u", evtchnl->gref);
+	if (ret < 0) {
+		xenbus_dev_error(xb_dev, ret, "writing ring-ref");
+		return ret;
+	}
+
+	/* write event channel ring reference */
+	ret = xenbus_printf(xbt, path, node_chnl, "%u", evtchnl->port);
+	if (ret < 0) {
+		xenbus_dev_error(xb_dev, ret, "writing event channel");
+		return ret;
+	}
+
+	return 0;
+}
+
+int xen_drm_front_evtchnl_publish_all(struct xen_drm_front_info *front_info)
+{
+	struct xenbus_transaction xbt;
+	struct xen_drm_front_cfg *plat_data;
+	int ret, conn;
+
+	plat_data = &front_info->cfg;
+
+again:
+	ret = xenbus_transaction_start(&xbt);
+	if (ret < 0) {
+		xenbus_dev_fatal(front_info->xb_dev, ret,
+				 "starting transaction");
+		return ret;
+	}
+
+	for (conn = 0; conn < plat_data->num_connectors; conn++) {
+		ret = evtchnl_publish(xbt, &front_info->evt_pairs[conn].req,
+				      plat_data->connectors[conn].xenstore_path,
+				      XENDISPL_FIELD_REQ_RING_REF,
+				      XENDISPL_FIELD_REQ_CHANNEL);
+		if (ret < 0)
+			goto fail;
+
+		ret = evtchnl_publish(xbt, &front_info->evt_pairs[conn].evt,
+				      plat_data->connectors[conn].xenstore_path,
+				      XENDISPL_FIELD_EVT_RING_REF,
+				      XENDISPL_FIELD_EVT_CHANNEL);
+		if (ret < 0)
+			goto fail;
+	}
+
+	ret = xenbus_transaction_end(xbt, 0);
+	if (ret < 0) {
+		if (ret == -EAGAIN)
+			goto again;
+
+		xenbus_dev_fatal(front_info->xb_dev, ret,
+				 "completing transaction");
+		goto fail_to_end;
+	}
+
+	return 0;
+
+fail:
+	xenbus_transaction_end(xbt, 1);
+
+fail_to_end:
+	xenbus_dev_fatal(front_info->xb_dev, ret, "writing Xen store");
+	return ret;
+}
+
+void xen_drm_front_evtchnl_flush(struct xen_drm_front_evtchnl *evtchnl)
+{
+	int notify;
+
+	evtchnl->u.req.ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&evtchnl->u.req.ring, notify);
+	if (notify)
+		notify_remote_via_irq(evtchnl->irq);
+}
+
+void xen_drm_front_evtchnl_set_state(struct xen_drm_front_info *front_info,
+				     enum xen_drm_front_evtchnl_state state)
+{
+	unsigned long flags;
+	int i;
+
+	if (!front_info->evt_pairs)
+		return;
+
+	spin_lock_irqsave(&front_info->io_lock, flags);
+	for (i = 0; i < front_info->num_evt_pairs; i++) {
+		front_info->evt_pairs[i].req.state = state;
+		front_info->evt_pairs[i].evt.state = state;
+	}
+	spin_unlock_irqrestore(&front_info->io_lock, flags);
+}
+
+void xen_drm_front_evtchnl_free_all(struct xen_drm_front_info *front_info)
+{
+	int i;
+
+	if (!front_info->evt_pairs)
+		return;
+
+	for (i = 0; i < front_info->num_evt_pairs; i++) {
+		evtchnl_free(front_info, &front_info->evt_pairs[i].req);
+		evtchnl_free(front_info, &front_info->evt_pairs[i].evt);
+	}
+
+	kfree(front_info->evt_pairs);
+	front_info->evt_pairs = NULL;
+}
diff --git a/drivers/gpu/drm/xen/xen_drm_front_evtchnl.h b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.h
new file mode 100644
index 0000000..b0af699
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#ifndef __XEN_DRM_FRONT_EVTCHNL_H_
+#define __XEN_DRM_FRONT_EVTCHNL_H_
+
+#include <linux/completion.h>
+#include <linux/types.h>
+
+#include <xen/interface/io/ring.h>
+#include <xen/interface/io/displif.h>
+
+/*
+ * All operations which are not connector oriented use this ctrl event channel,
+ * e.g. fb_attach/destroy which belong to a DRM device, not to a CRTC.
+ */
+#define GENERIC_OP_EVT_CHNL	0
+
+enum xen_drm_front_evtchnl_state {
+	EVTCHNL_STATE_DISCONNECTED,
+	EVTCHNL_STATE_CONNECTED,
+};
+
+enum xen_drm_front_evtchnl_type {
+	EVTCHNL_TYPE_REQ,
+	EVTCHNL_TYPE_EVT,
+};
+
+struct xen_drm_front_drm_info;
+
+struct xen_drm_front_evtchnl {
+	struct xen_drm_front_info *front_info;
+	int gref;
+	int port;
+	int irq;
+	int index;
+	enum xen_drm_front_evtchnl_state state;
+	enum xen_drm_front_evtchnl_type type;
+	/* either response id or incoming event id */
+	u16 evt_id;
+	/* next request id or next expected event id */
+	u16 evt_next_id;
+	union {
+		struct {
+			struct xen_displif_front_ring ring;
+			struct completion completion;
+			/* latest response status */
+			int resp_status;
+			/* serializer for backend IO: request/response */
+			struct mutex req_io_lock;
+		} req;
+		struct {
+			struct xendispl_event_page *page;
+		} evt;
+	} u;
+};
+
+struct xen_drm_front_evtchnl_pair {
+	struct xen_drm_front_evtchnl req;
+	struct xen_drm_front_evtchnl evt;
+};
+
+int xen_drm_front_evtchnl_create_all(struct xen_drm_front_info *front_info);
+
+int xen_drm_front_evtchnl_publish_all(struct xen_drm_front_info *front_info);
+
+void xen_drm_front_evtchnl_flush(struct xen_drm_front_evtchnl *evtchnl);
+
+void xen_drm_front_evtchnl_set_state(struct xen_drm_front_info *front_info,
+				     enum xen_drm_front_evtchnl_state state);
+
+void xen_drm_front_evtchnl_free_all(struct xen_drm_front_info *front_info);
+
+#endif /* __XEN_DRM_FRONT_EVTCHNL_H_ */
diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c
new file mode 100644
index 0000000..c85bfe7
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#include "xen_drm_front_gem.h"
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem.h>
+
+#include <linux/dma-buf.h>
+#include <linux/scatterlist.h>
+#include <linux/shmem_fs.h>
+
+#include <xen/balloon.h>
+
+#include "xen_drm_front.h"
+#include "xen_drm_front_shbuf.h"
+
+struct xen_gem_object {
+	struct drm_gem_object base;
+
+	size_t num_pages;
+	struct page **pages;
+
+	/* set for buffers allocated by the backend */
+	bool be_alloc;
+
+	/* this is for imported PRIME buffer */
+	struct sg_table *sgt_imported;
+};
+
+static inline struct xen_gem_object *
+to_xen_gem_obj(struct drm_gem_object *gem_obj)
+{
+	return container_of(gem_obj, struct xen_gem_object, base);
+}
+
+static int gem_alloc_pages_array(struct xen_gem_object *xen_obj,
+				 size_t buf_size)
+{
+	xen_obj->num_pages = DIV_ROUND_UP(buf_size, PAGE_SIZE);
+	xen_obj->pages = kvmalloc_array(xen_obj->num_pages,
+					sizeof(struct page *), GFP_KERNEL);
+	return !xen_obj->pages ? -ENOMEM : 0;
+}
+
+static void gem_free_pages_array(struct xen_gem_object *xen_obj)
+{
+	kvfree(xen_obj->pages);
+	xen_obj->pages = NULL;
+}
+
+static struct xen_gem_object *gem_create_obj(struct drm_device *dev,
+					     size_t size)
+{
+	struct xen_gem_object *xen_obj;
+	int ret;
+
+	xen_obj = kzalloc(sizeof(*xen_obj), GFP_KERNEL);
+	if (!xen_obj)
+		return ERR_PTR(-ENOMEM);
+
+	ret = drm_gem_object_init(dev, &xen_obj->base, size);
+	if (ret < 0) {
+		kfree(xen_obj);
+		return ERR_PTR(ret);
+	}
+
+	return xen_obj;
+}
+
+static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size)
+{
+	struct xen_drm_front_drm_info *drm_info = dev->dev_private;
+	struct xen_gem_object *xen_obj;
+	int ret;
+
+	size = round_up(size, PAGE_SIZE);
+	xen_obj = gem_create_obj(dev, size);
+	if (IS_ERR_OR_NULL(xen_obj))
+		return xen_obj;
+
+	if (drm_info->front_info->cfg.be_alloc) {
+		/*
+		 * backend will allocate space for this buffer, so
+		 * only allocate array of pointers to pages
+		 */
+		ret = gem_alloc_pages_array(xen_obj, size);
+		if (ret < 0)
+			goto fail;
+
+		/*
+		 * allocate ballooned pages which will be used to map
+		 * grant references provided by the backend
+		 */
+		ret = alloc_xenballooned_pages(xen_obj->num_pages,
+					       xen_obj->pages);
+		if (ret < 0) {
+			DRM_ERROR("Cannot allocate %zu ballooned pages: %d\n",
+				  xen_obj->num_pages, ret);
+			gem_free_pages_array(xen_obj);
+			goto fail;
+		}
+
+		xen_obj->be_alloc = true;
+		return xen_obj;
+	}
+	/*
+	 * need to allocate backing pages now, so we can share those
+	 * with the backend
+	 */
+	xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
+	xen_obj->pages = drm_gem_get_pages(&xen_obj->base);
+	if (IS_ERR_OR_NULL(xen_obj->pages)) {
+		ret = PTR_ERR(xen_obj->pages);
+		xen_obj->pages = NULL;
+		goto fail;
+	}
+
+	return xen_obj;
+
+fail:
+	DRM_ERROR("Failed to allocate buffer with size %zu\n", size);
+	return ERR_PTR(ret);
+}
+
+struct drm_gem_object *xen_drm_front_gem_create(struct drm_device *dev,
+						size_t size)
+{
+	struct xen_gem_object *xen_obj;
+
+	xen_obj = gem_create(dev, size);
+	if (IS_ERR_OR_NULL(xen_obj))
+		return ERR_CAST(xen_obj);
+
+	return &xen_obj->base;
+}
+
+void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj)
+{
+	struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj);
+
+	if (xen_obj->base.import_attach) {
+		drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported);
+		gem_free_pages_array(xen_obj);
+	} else {
+		if (xen_obj->pages) {
+			if (xen_obj->be_alloc) {
+				free_xenballooned_pages(xen_obj->num_pages,
+							xen_obj->pages);
+				gem_free_pages_array(xen_obj);
+			} else {
+				drm_gem_put_pages(&xen_obj->base,
+						  xen_obj->pages, true, false);
+			}
+		}
+	}
+	drm_gem_object_release(gem_obj);
+	kfree(xen_obj);
+}
+
+struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *gem_obj)
+{
+	struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj);
+
+	return xen_obj->pages;
+}
+
+struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj)
+{
+	struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj);
+
+	if (!xen_obj->pages)
+		return NULL;
+
+	return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages);
+}
+
+struct drm_gem_object *
+xen_drm_front_gem_import_sg_table(struct drm_device *dev,
+				  struct dma_buf_attachment *attach,
+				  struct sg_table *sgt)
+{
+	struct xen_drm_front_drm_info *drm_info = dev->dev_private;
+	struct xen_gem_object *xen_obj;
+	size_t size;
+	int ret;
+
+	size = attach->dmabuf->size;
+	xen_obj = gem_create_obj(dev, size);
+	if (IS_ERR_OR_NULL(xen_obj))
+		return ERR_CAST(xen_obj);
+
+	ret = gem_alloc_pages_array(xen_obj, size);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	xen_obj->sgt_imported = sgt;
+
+	ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages,
+					       NULL, xen_obj->num_pages);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	ret = xen_drm_front_dbuf_create(drm_info->front_info,
+					xen_drm_front_dbuf_to_cookie(&xen_obj->base),
+					0, 0, 0, size, xen_obj->pages);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	DRM_DEBUG("Imported buffer of size %zu with nents %u\n",
+		  size, sgt->nents);
+
+	return &xen_obj->base;
+}
+
+static int gem_mmap_obj(struct xen_gem_object *xen_obj,
+			struct vm_area_struct *vma)
+{
+	unsigned long addr = vma->vm_start;
+	int i;
+
+	/*
+	 * clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
+	 * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
+	 * the whole buffer.
+	 */
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+	vma->vm_pgoff = 0;
+	vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+
+	/*
+	 * vm_operations_struct.fault handler will be called if CPU access
+	 * to VM is here. For GPUs this isn't the case, because CPU
+	 * doesn't touch the memory. Insert pages now, so both CPU and GPU are
+	 * happy.
+	 * FIXME: as we insert all the pages now then no .fault handler must
+	 * be called, so don't provide one
+	 */
+	for (i = 0; i < xen_obj->num_pages; i++) {
+		int ret;
+
+		ret = vm_insert_page(vma, addr, xen_obj->pages[i]);
+		if (ret < 0) {
+			DRM_ERROR("Failed to insert pages into vma: %d\n", ret);
+			return ret;
+		}
+
+		addr += PAGE_SIZE;
+	}
+	return 0;
+}
+
+int xen_drm_front_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct xen_gem_object *xen_obj;
+	struct drm_gem_object *gem_obj;
+	int ret;
+
+	ret = drm_gem_mmap(filp, vma);
+	if (ret < 0)
+		return ret;
+
+	gem_obj = vma->vm_private_data;
+	xen_obj = to_xen_gem_obj(gem_obj);
+	return gem_mmap_obj(xen_obj, vma);
+}
+
+void *xen_drm_front_gem_prime_vmap(struct drm_gem_object *gem_obj)
+{
+	struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj);
+
+	if (!xen_obj->pages)
+		return NULL;
+
+	return vmap(xen_obj->pages, xen_obj->num_pages,
+		    VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+}
+
+void xen_drm_front_gem_prime_vunmap(struct drm_gem_object *gem_obj,
+				    void *vaddr)
+{
+	vunmap(vaddr);
+}
+
+int xen_drm_front_gem_prime_mmap(struct drm_gem_object *gem_obj,
+				 struct vm_area_struct *vma)
+{
+	struct xen_gem_object *xen_obj;
+	int ret;
+
+	ret = drm_gem_mmap_obj(gem_obj, gem_obj->size, vma);
+	if (ret < 0)
+		return ret;
+
+	xen_obj = to_xen_gem_obj(gem_obj);
+	return gem_mmap_obj(xen_obj, vma);
+}
diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.h b/drivers/gpu/drm/xen/xen_drm_front_gem.h
new file mode 100644
index 0000000..d5ab734
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front_gem.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#ifndef __XEN_DRM_FRONT_GEM_H
+#define __XEN_DRM_FRONT_GEM_H
+
+#include <drm/drmP.h>
+
+struct drm_gem_object *xen_drm_front_gem_create(struct drm_device *dev,
+						size_t size);
+
+struct drm_gem_object *
+xen_drm_front_gem_import_sg_table(struct drm_device *dev,
+				  struct dma_buf_attachment *attach,
+				  struct sg_table *sgt);
+
+struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj);
+
+struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *obj);
+
+void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj);
+
+int xen_drm_front_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+
+void *xen_drm_front_gem_prime_vmap(struct drm_gem_object *gem_obj);
+
+void xen_drm_front_gem_prime_vunmap(struct drm_gem_object *gem_obj,
+				    void *vaddr);
+
+int xen_drm_front_gem_prime_mmap(struct drm_gem_object *gem_obj,
+				 struct vm_area_struct *vma);
+
+#endif /* __XEN_DRM_FRONT_GEM_H */
diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.c b/drivers/gpu/drm/xen/xen_drm_front_kms.c
new file mode 100644
index 0000000..a3479eb
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front_kms.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#include "xen_drm_front_kms.h"
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+
+#include "xen_drm_front.h"
+#include "xen_drm_front_conn.h"
+
+/*
+ * Timeout in ms to wait for frame done event from the backend:
+ * must be a bit more than IO time-out
+ */
+#define FRAME_DONE_TO_MS	(XEN_DRM_FRONT_WAIT_BACK_MS + 100)
+
+static struct xen_drm_front_drm_pipeline *
+to_xen_drm_pipeline(struct drm_simple_display_pipe *pipe)
+{
+	return container_of(pipe, struct xen_drm_front_drm_pipeline, pipe);
+}
+
+static void fb_destroy(struct drm_framebuffer *fb)
+{
+	struct xen_drm_front_drm_info *drm_info = fb->dev->dev_private;
+	int idx;
+
+	if (drm_dev_enter(fb->dev, &idx)) {
+		xen_drm_front_fb_detach(drm_info->front_info,
+					xen_drm_front_fb_to_cookie(fb));
+		drm_dev_exit(idx);
+	}
+	drm_gem_fb_destroy(fb);
+}
+
+static struct drm_framebuffer_funcs fb_funcs = {
+	.destroy = fb_destroy,
+};
+
+static struct drm_framebuffer *
+fb_create(struct drm_device *dev, struct drm_file *filp,
+	  const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct xen_drm_front_drm_info *drm_info = dev->dev_private;
+	static struct drm_framebuffer *fb;
+	struct drm_gem_object *gem_obj;
+	int ret;
+
+	fb = drm_gem_fb_create_with_funcs(dev, filp, mode_cmd, &fb_funcs);
+	if (IS_ERR_OR_NULL(fb))
+		return fb;
+
+	gem_obj = drm_gem_object_lookup(filp, mode_cmd->handles[0]);
+	if (!gem_obj) {
+		DRM_ERROR("Failed to lookup GEM object\n");
+		ret = -ENOENT;
+		goto fail;
+	}
+
+	drm_gem_object_put_unlocked(gem_obj);
+
+	ret = xen_drm_front_fb_attach(drm_info->front_info,
+				      xen_drm_front_dbuf_to_cookie(gem_obj),
+				      xen_drm_front_fb_to_cookie(fb),
+				      fb->width, fb->height,
+				      fb->format->format);
+	if (ret < 0) {
+		DRM_ERROR("Back failed to attach FB %p: %d\n", fb, ret);
+		goto fail;
+	}
+
+	return fb;
+
+fail:
+	drm_gem_fb_destroy(fb);
+	return ERR_PTR(ret);
+}
+
+static const struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = fb_create,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+static void send_pending_event(struct xen_drm_front_drm_pipeline *pipeline)
+{
+	struct drm_crtc *crtc = &pipeline->pipe.crtc;
+	struct drm_device *dev = crtc->dev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->event_lock, flags);
+	if (pipeline->pending_event)
+		drm_crtc_send_vblank_event(crtc, pipeline->pending_event);
+	pipeline->pending_event = NULL;
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+}
+
+static void display_enable(struct drm_simple_display_pipe *pipe,
+			   struct drm_crtc_state *crtc_state,
+			   struct drm_plane_state *plane_state)
+{
+	struct xen_drm_front_drm_pipeline *pipeline =
+			to_xen_drm_pipeline(pipe);
+	struct drm_crtc *crtc = &pipe->crtc;
+	struct drm_framebuffer *fb = plane_state->fb;
+	int ret, idx;
+
+	if (!drm_dev_enter(pipe->crtc.dev, &idx))
+		return;
+
+	ret = xen_drm_front_mode_set(pipeline, crtc->x, crtc->y,
+				     fb->width, fb->height,
+				     fb->format->cpp[0] * 8,
+				     xen_drm_front_fb_to_cookie(fb));
+
+	if (ret) {
+		DRM_ERROR("Failed to enable display: %d\n", ret);
+		pipeline->conn_connected = false;
+	}
+
+	drm_dev_exit(idx);
+}
+
+static void display_disable(struct drm_simple_display_pipe *pipe)
+{
+	struct xen_drm_front_drm_pipeline *pipeline =
+			to_xen_drm_pipeline(pipe);
+	int ret = 0, idx;
+
+	if (drm_dev_enter(pipe->crtc.dev, &idx)) {
+		ret = xen_drm_front_mode_set(pipeline, 0, 0, 0, 0, 0,
+					     xen_drm_front_fb_to_cookie(NULL));
+		drm_dev_exit(idx);
+	}
+	if (ret)
+		DRM_ERROR("Failed to disable display: %d\n", ret);
+
+	/* Make sure we can restart with enabled connector next time */
+	pipeline->conn_connected = true;
+
+	/* release stalled event if any */
+	send_pending_event(pipeline);
+}
+
+void xen_drm_front_kms_on_frame_done(struct xen_drm_front_drm_pipeline *pipeline,
+				     u64 fb_cookie)
+{
+	/*
+	 * This runs in interrupt context, e.g. under
+	 * drm_info->front_info->io_lock, so we cannot call _sync version
+	 * to cancel the work
+	 */
+	cancel_delayed_work(&pipeline->pflip_to_worker);
+
+	send_pending_event(pipeline);
+}
+
+static void pflip_to_worker(struct work_struct *work)
+{
+	struct delayed_work *delayed_work = to_delayed_work(work);
+	struct xen_drm_front_drm_pipeline *pipeline =
+			container_of(delayed_work,
+				     struct xen_drm_front_drm_pipeline,
+				     pflip_to_worker);
+
+	DRM_ERROR("Frame done timed-out, releasing");
+	send_pending_event(pipeline);
+}
+
+static bool display_send_page_flip(struct drm_simple_display_pipe *pipe,
+				   struct drm_plane_state *old_plane_state)
+{
+	struct drm_plane_state *plane_state =
+			drm_atomic_get_new_plane_state(old_plane_state->state,
+						       &pipe->plane);
+
+	/*
+	 * If old_plane_state->fb is NULL and plane_state->fb is not,
+	 * then this is an atomic commit which will enable display.
+	 * If old_plane_state->fb is not NULL and plane_state->fb is,
+	 * then this is an atomic commit which will disable display.
+	 * Ignore these and do not send page flip as this framebuffer will be
+	 * sent to the backend as a part of display_set_config call.
+	 */
+	if (old_plane_state->fb && plane_state->fb) {
+		struct xen_drm_front_drm_pipeline *pipeline =
+				to_xen_drm_pipeline(pipe);
+		struct xen_drm_front_drm_info *drm_info = pipeline->drm_info;
+		int ret;
+
+		schedule_delayed_work(&pipeline->pflip_to_worker,
+				      msecs_to_jiffies(FRAME_DONE_TO_MS));
+
+		ret = xen_drm_front_page_flip(drm_info->front_info,
+					      pipeline->index,
+					      xen_drm_front_fb_to_cookie(plane_state->fb));
+		if (ret) {
+			DRM_ERROR("Failed to send page flip request to backend: %d\n", ret);
+
+			pipeline->conn_connected = false;
+			/*
+			 * Report the flip not handled, so pending event is
+			 * sent, unblocking user-space.
+			 */
+			return false;
+		}
+		/*
+		 * Signal that page flip was handled, pending event will be sent
+		 * on frame done event from the backend.
+		 */
+		return true;
+	}
+
+	return false;
+}
+
+static void display_update(struct drm_simple_display_pipe *pipe,
+			   struct drm_plane_state *old_plane_state)
+{
+	struct xen_drm_front_drm_pipeline *pipeline =
+			to_xen_drm_pipeline(pipe);
+	struct drm_crtc *crtc = &pipe->crtc;
+	struct drm_pending_vblank_event *event;
+	int idx;
+
+	event = crtc->state->event;
+	if (event) {
+		struct drm_device *dev = crtc->dev;
+		unsigned long flags;
+
+		WARN_ON(pipeline->pending_event);
+
+		spin_lock_irqsave(&dev->event_lock, flags);
+		crtc->state->event = NULL;
+
+		pipeline->pending_event = event;
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	}
+
+	if (!drm_dev_enter(pipe->crtc.dev, &idx)) {
+		send_pending_event(pipeline);
+		return;
+	}
+
+	/*
+	 * Send page flip request to the backend *after* we have event cached
+	 * above, so on page flip done event from the backend we can
+	 * deliver it and there is no race condition between this code and
+	 * event from the backend.
+	 * If this is not a page flip, e.g. no flip done event from the backend
+	 * is expected, then send now.
+	 */
+	if (!display_send_page_flip(pipe, old_plane_state))
+		send_pending_event(pipeline);
+
+	drm_dev_exit(idx);
+}
+
+static enum drm_mode_status
+display_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *mode)
+{
+	struct xen_drm_front_drm_pipeline *pipeline =
+			container_of(crtc, struct xen_drm_front_drm_pipeline,
+				     pipe.crtc);
+
+	if (mode->hdisplay != pipeline->width)
+		return MODE_ERROR;
+
+	if (mode->vdisplay != pipeline->height)
+		return MODE_ERROR;
+
+	return MODE_OK;
+}
+
+static const struct drm_simple_display_pipe_funcs display_funcs = {
+	.mode_valid = display_mode_valid,
+	.enable = display_enable,
+	.disable = display_disable,
+	.prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb,
+	.update = display_update,
+};
+
+static int display_pipe_init(struct xen_drm_front_drm_info *drm_info,
+			     int index, struct xen_drm_front_cfg_connector *cfg,
+			     struct xen_drm_front_drm_pipeline *pipeline)
+{
+	struct drm_device *dev = drm_info->drm_dev;
+	const u32 *formats;
+	int format_count;
+	int ret;
+
+	pipeline->drm_info = drm_info;
+	pipeline->index = index;
+	pipeline->height = cfg->height;
+	pipeline->width = cfg->width;
+
+	INIT_DELAYED_WORK(&pipeline->pflip_to_worker, pflip_to_worker);
+
+	ret = xen_drm_front_conn_init(drm_info, &pipeline->conn);
+	if (ret)
+		return ret;
+
+	formats = xen_drm_front_conn_get_formats(&format_count);
+
+	return drm_simple_display_pipe_init(dev, &pipeline->pipe,
+					    &display_funcs, formats,
+					    format_count, NULL,
+					    &pipeline->conn);
+}
+
+int xen_drm_front_kms_init(struct xen_drm_front_drm_info *drm_info)
+{
+	struct drm_device *dev = drm_info->drm_dev;
+	int i, ret;
+
+	drm_mode_config_init(dev);
+
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+	dev->mode_config.max_width = 4095;
+	dev->mode_config.max_height = 2047;
+	dev->mode_config.funcs = &mode_config_funcs;
+
+	for (i = 0; i < drm_info->front_info->cfg.num_connectors; i++) {
+		struct xen_drm_front_cfg_connector *cfg =
+				&drm_info->front_info->cfg.connectors[i];
+		struct xen_drm_front_drm_pipeline *pipeline =
+				&drm_info->pipeline[i];
+
+		ret = display_pipe_init(drm_info, i, cfg, pipeline);
+		if (ret) {
+			drm_mode_config_cleanup(dev);
+			return ret;
+		}
+	}
+
+	drm_mode_config_reset(dev);
+	drm_kms_helper_poll_init(dev);
+	return 0;
+}
+
+void xen_drm_front_kms_fini(struct xen_drm_front_drm_info *drm_info)
+{
+	int i;
+
+	for (i = 0; i < drm_info->front_info->cfg.num_connectors; i++) {
+		struct xen_drm_front_drm_pipeline *pipeline =
+				&drm_info->pipeline[i];
+
+		cancel_delayed_work_sync(&pipeline->pflip_to_worker);
+
+		send_pending_event(pipeline);
+	}
+}
diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.h b/drivers/gpu/drm/xen/xen_drm_front_kms.h
new file mode 100644
index 0000000..ab2fbad
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front_kms.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#ifndef __XEN_DRM_FRONT_KMS_H_
+#define __XEN_DRM_FRONT_KMS_H_
+
+#include <linux/types.h>
+
+struct xen_drm_front_drm_info;
+struct xen_drm_front_drm_pipeline;
+
+int xen_drm_front_kms_init(struct xen_drm_front_drm_info *drm_info);
+
+void xen_drm_front_kms_fini(struct xen_drm_front_drm_info *drm_info);
+
+void xen_drm_front_kms_on_frame_done(struct xen_drm_front_drm_pipeline *pipeline,
+				     u64 fb_cookie);
+
+#endif /* __XEN_DRM_FRONT_KMS_H_ */
diff --git a/drivers/gpu/drm/xen/xen_drm_front_shbuf.c b/drivers/gpu/drm/xen/xen_drm_front_shbuf.c
new file mode 100644
index 0000000..8099cb3
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front_shbuf.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#include <drm/drmP.h>
+
+#if defined(CONFIG_X86)
+#include <drm/drm_cache.h>
+#endif
+#include <linux/errno.h>
+#include <linux/mm.h>
+
+#include <asm/xen/hypervisor.h>
+#include <xen/balloon.h>
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/interface/io/ring.h>
+#include <xen/interface/io/displif.h>
+
+#include "xen_drm_front.h"
+#include "xen_drm_front_shbuf.h"
+
+struct xen_drm_front_shbuf_ops {
+	/*
+	 * Calculate number of grefs required to handle this buffer,
+	 * e.g. if grefs are required for page directory only or the buffer
+	 * pages as well.
+	 */
+	void (*calc_num_grefs)(struct xen_drm_front_shbuf *buf);
+	/* Fill page directory according to para-virtual display protocol. */
+	void (*fill_page_dir)(struct xen_drm_front_shbuf *buf);
+	/* Claim grant references for the pages of the buffer. */
+	int (*grant_refs_for_buffer)(struct xen_drm_front_shbuf *buf,
+				     grant_ref_t *priv_gref_head, int gref_idx);
+	/* Map grant references of the buffer. */
+	int (*map)(struct xen_drm_front_shbuf *buf);
+	/* Unmap grant references of the buffer. */
+	int (*unmap)(struct xen_drm_front_shbuf *buf);
+};
+
+grant_ref_t xen_drm_front_shbuf_get_dir_start(struct xen_drm_front_shbuf *buf)
+{
+	if (!buf->grefs)
+		return GRANT_INVALID_REF;
+
+	return buf->grefs[0];
+}
+
+int xen_drm_front_shbuf_map(struct xen_drm_front_shbuf *buf)
+{
+	if (buf->ops->map)
+		return buf->ops->map(buf);
+
+	/* no need to map own grant references */
+	return 0;
+}
+
+int xen_drm_front_shbuf_unmap(struct xen_drm_front_shbuf *buf)
+{
+	if (buf->ops->unmap)
+		return buf->ops->unmap(buf);
+
+	/* no need to unmap own grant references */
+	return 0;
+}
+
+void xen_drm_front_shbuf_flush(struct xen_drm_front_shbuf *buf)
+{
+#if defined(CONFIG_X86)
+	drm_clflush_pages(buf->pages, buf->num_pages);
+#endif
+}
+
+void xen_drm_front_shbuf_free(struct xen_drm_front_shbuf *buf)
+{
+	if (buf->grefs) {
+		int i;
+
+		for (i = 0; i < buf->num_grefs; i++)
+			if (buf->grefs[i] != GRANT_INVALID_REF)
+				gnttab_end_foreign_access(buf->grefs[i],
+							  0, 0UL);
+	}
+	kfree(buf->grefs);
+	kfree(buf->directory);
+	kfree(buf);
+}
+
+/*
+ * number of grefs a page can hold with respect to the
+ * struct xendispl_page_directory header
+ */
+#define XEN_DRM_NUM_GREFS_PER_PAGE ((PAGE_SIZE - \
+		offsetof(struct xendispl_page_directory, gref)) / \
+		sizeof(grant_ref_t))
+
+static int get_num_pages_dir(struct xen_drm_front_shbuf *buf)
+{
+	/* number of pages the page directory consumes itself */
+	return DIV_ROUND_UP(buf->num_pages, XEN_DRM_NUM_GREFS_PER_PAGE);
+}
+
+static void backend_calc_num_grefs(struct xen_drm_front_shbuf *buf)
+{
+	/* only for pages the page directory consumes itself */
+	buf->num_grefs = get_num_pages_dir(buf);
+}
+
+static void guest_calc_num_grefs(struct xen_drm_front_shbuf *buf)
+{
+	/*
+	 * number of pages the page directory consumes itself
+	 * plus grefs for the buffer pages
+	 */
+	buf->num_grefs = get_num_pages_dir(buf) + buf->num_pages;
+}
+
+#define xen_page_to_vaddr(page) \
+		((phys_addr_t)pfn_to_kaddr(page_to_xen_pfn(page)))
+
+static int backend_unmap(struct xen_drm_front_shbuf *buf)
+{
+	struct gnttab_unmap_grant_ref *unmap_ops;
+	int i, ret;
+
+	if (!buf->pages || !buf->backend_map_handles || !buf->grefs)
+		return 0;
+
+	unmap_ops = kcalloc(buf->num_pages, sizeof(*unmap_ops),
+			    GFP_KERNEL);
+	if (!unmap_ops) {
+		DRM_ERROR("Failed to get memory while unmapping\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < buf->num_pages; i++) {
+		phys_addr_t addr;
+
+		addr = xen_page_to_vaddr(buf->pages[i]);
+		gnttab_set_unmap_op(&unmap_ops[i], addr, GNTMAP_host_map,
+				    buf->backend_map_handles[i]);
+	}
+
+	ret = gnttab_unmap_refs(unmap_ops, NULL, buf->pages,
+				buf->num_pages);
+
+	for (i = 0; i < buf->num_pages; i++) {
+		if (unlikely(unmap_ops[i].status != GNTST_okay))
+			DRM_ERROR("Failed to unmap page %d: %d\n",
+				  i, unmap_ops[i].status);
+	}
+
+	if (ret)
+		DRM_ERROR("Failed to unmap grant references, ret %d", ret);
+
+	kfree(unmap_ops);
+	kfree(buf->backend_map_handles);
+	buf->backend_map_handles = NULL;
+	return ret;
+}
+
+static int backend_map(struct xen_drm_front_shbuf *buf)
+{
+	struct gnttab_map_grant_ref *map_ops = NULL;
+	unsigned char *ptr;
+	int ret, cur_gref, cur_dir_page, cur_page, grefs_left;
+
+	map_ops = kcalloc(buf->num_pages, sizeof(*map_ops), GFP_KERNEL);
+	if (!map_ops)
+		return -ENOMEM;
+
+	buf->backend_map_handles = kcalloc(buf->num_pages,
+					   sizeof(*buf->backend_map_handles),
+					   GFP_KERNEL);
+	if (!buf->backend_map_handles) {
+		kfree(map_ops);
+		return -ENOMEM;
+	}
+
+	/*
+	 * read page directory to get grefs from the backend: for external
+	 * buffer we only allocate buf->grefs for the page directory,
+	 * so buf->num_grefs has number of pages in the page directory itself
+	 */
+	ptr = buf->directory;
+	grefs_left = buf->num_pages;
+	cur_page = 0;
+	for (cur_dir_page = 0; cur_dir_page < buf->num_grefs; cur_dir_page++) {
+		struct xendispl_page_directory *page_dir =
+				(struct xendispl_page_directory *)ptr;
+		int to_copy = XEN_DRM_NUM_GREFS_PER_PAGE;
+
+		if (to_copy > grefs_left)
+			to_copy = grefs_left;
+
+		for (cur_gref = 0; cur_gref < to_copy; cur_gref++) {
+			phys_addr_t addr;
+
+			addr = xen_page_to_vaddr(buf->pages[cur_page]);
+			gnttab_set_map_op(&map_ops[cur_page], addr,
+					  GNTMAP_host_map,
+					  page_dir->gref[cur_gref],
+					  buf->xb_dev->otherend_id);
+			cur_page++;
+		}
+
+		grefs_left -= to_copy;
+		ptr += PAGE_SIZE;
+	}
+	ret = gnttab_map_refs(map_ops, NULL, buf->pages, buf->num_pages);
+
+	/* save handles even if error, so we can unmap */
+	for (cur_page = 0; cur_page < buf->num_pages; cur_page++) {
+		buf->backend_map_handles[cur_page] = map_ops[cur_page].handle;
+		if (unlikely(map_ops[cur_page].status != GNTST_okay))
+			DRM_ERROR("Failed to map page %d: %d\n",
+				  cur_page, map_ops[cur_page].status);
+	}
+
+	if (ret) {
+		DRM_ERROR("Failed to map grant references, ret %d", ret);
+		backend_unmap(buf);
+	}
+
+	kfree(map_ops);
+	return ret;
+}
+
+static void backend_fill_page_dir(struct xen_drm_front_shbuf *buf)
+{
+	struct xendispl_page_directory *page_dir;
+	unsigned char *ptr;
+	int i, num_pages_dir;
+
+	ptr = buf->directory;
+	num_pages_dir = get_num_pages_dir(buf);
+
+	/* fill only grefs for the page directory itself */
+	for (i = 0; i < num_pages_dir - 1; i++) {
+		page_dir = (struct xendispl_page_directory *)ptr;
+
+		page_dir->gref_dir_next_page = buf->grefs[i + 1];
+		ptr += PAGE_SIZE;
+	}
+	/* last page must say there is no more pages */
+	page_dir = (struct xendispl_page_directory *)ptr;
+	page_dir->gref_dir_next_page = GRANT_INVALID_REF;
+}
+
+static void guest_fill_page_dir(struct xen_drm_front_shbuf *buf)
+{
+	unsigned char *ptr;
+	int cur_gref, grefs_left, to_copy, i, num_pages_dir;
+
+	ptr = buf->directory;
+	num_pages_dir = get_num_pages_dir(buf);
+
+	/*
+	 * while copying, skip grefs at start, they are for pages
+	 * granted for the page directory itself
+	 */
+	cur_gref = num_pages_dir;
+	grefs_left = buf->num_pages;
+	for (i = 0; i < num_pages_dir; i++) {
+		struct xendispl_page_directory *page_dir =
+				(struct xendispl_page_directory *)ptr;
+
+		if (grefs_left <= XEN_DRM_NUM_GREFS_PER_PAGE) {
+			to_copy = grefs_left;
+			page_dir->gref_dir_next_page = GRANT_INVALID_REF;
+		} else {
+			to_copy = XEN_DRM_NUM_GREFS_PER_PAGE;
+			page_dir->gref_dir_next_page = buf->grefs[i + 1];
+		}
+		memcpy(&page_dir->gref, &buf->grefs[cur_gref],
+		       to_copy * sizeof(grant_ref_t));
+		ptr += PAGE_SIZE;
+		grefs_left -= to_copy;
+		cur_gref += to_copy;
+	}
+}
+
+static int guest_grant_refs_for_buffer(struct xen_drm_front_shbuf *buf,
+				       grant_ref_t *priv_gref_head,
+				       int gref_idx)
+{
+	int i, cur_ref, otherend_id;
+
+	otherend_id = buf->xb_dev->otherend_id;
+	for (i = 0; i < buf->num_pages; i++) {
+		cur_ref = gnttab_claim_grant_reference(priv_gref_head);
+		if (cur_ref < 0)
+			return cur_ref;
+
+		gnttab_grant_foreign_access_ref(cur_ref, otherend_id,
+						xen_page_to_gfn(buf->pages[i]),
+						0);
+		buf->grefs[gref_idx++] = cur_ref;
+	}
+	return 0;
+}
+
+static int grant_references(struct xen_drm_front_shbuf *buf)
+{
+	grant_ref_t priv_gref_head;
+	int ret, i, j, cur_ref;
+	int otherend_id, num_pages_dir;
+
+	ret = gnttab_alloc_grant_references(buf->num_grefs, &priv_gref_head);
+	if (ret < 0) {
+		DRM_ERROR("Cannot allocate grant references\n");
+		return ret;
+	}
+
+	otherend_id = buf->xb_dev->otherend_id;
+	j = 0;
+	num_pages_dir = get_num_pages_dir(buf);
+	for (i = 0; i < num_pages_dir; i++) {
+		unsigned long frame;
+
+		cur_ref = gnttab_claim_grant_reference(&priv_gref_head);
+		if (cur_ref < 0)
+			return cur_ref;
+
+		frame = xen_page_to_gfn(virt_to_page(buf->directory +
+					PAGE_SIZE * i));
+		gnttab_grant_foreign_access_ref(cur_ref, otherend_id, frame, 0);
+		buf->grefs[j++] = cur_ref;
+	}
+
+	if (buf->ops->grant_refs_for_buffer) {
+		ret = buf->ops->grant_refs_for_buffer(buf, &priv_gref_head, j);
+		if (ret)
+			return ret;
+	}
+
+	gnttab_free_grant_references(priv_gref_head);
+	return 0;
+}
+
+static int alloc_storage(struct xen_drm_front_shbuf *buf)
+{
+	buf->grefs = kcalloc(buf->num_grefs, sizeof(*buf->grefs), GFP_KERNEL);
+	if (!buf->grefs)
+		return -ENOMEM;
+
+	buf->directory = kcalloc(get_num_pages_dir(buf), PAGE_SIZE, GFP_KERNEL);
+	if (!buf->directory)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/*
+ * For be allocated buffers we don't need grant_refs_for_buffer as those
+ * grant references are allocated at backend side
+ */
+static const struct xen_drm_front_shbuf_ops backend_ops = {
+	.calc_num_grefs = backend_calc_num_grefs,
+	.fill_page_dir = backend_fill_page_dir,
+	.map = backend_map,
+	.unmap = backend_unmap
+};
+
+/* For locally granted references we do not need to map/unmap the references */
+static const struct xen_drm_front_shbuf_ops local_ops = {
+	.calc_num_grefs = guest_calc_num_grefs,
+	.fill_page_dir = guest_fill_page_dir,
+	.grant_refs_for_buffer = guest_grant_refs_for_buffer,
+};
+
+struct xen_drm_front_shbuf *
+xen_drm_front_shbuf_alloc(struct xen_drm_front_shbuf_cfg *cfg)
+{
+	struct xen_drm_front_shbuf *buf;
+	int ret;
+
+	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	if (cfg->be_alloc)
+		buf->ops = &backend_ops;
+	else
+		buf->ops = &local_ops;
+
+	buf->xb_dev = cfg->xb_dev;
+	buf->num_pages = DIV_ROUND_UP(cfg->size, PAGE_SIZE);
+	buf->pages = cfg->pages;
+
+	buf->ops->calc_num_grefs(buf);
+
+	ret = alloc_storage(buf);
+	if (ret)
+		goto fail;
+
+	ret = grant_references(buf);
+	if (ret)
+		goto fail;
+
+	buf->ops->fill_page_dir(buf);
+
+	return buf;
+
+fail:
+	xen_drm_front_shbuf_free(buf);
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/xen/xen_drm_front_shbuf.h b/drivers/gpu/drm/xen/xen_drm_front_shbuf.h
new file mode 100644
index 0000000..7545c69
--- /dev/null
+++ b/drivers/gpu/drm/xen/xen_drm_front_shbuf.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ *  Xen para-virtual DRM device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#ifndef __XEN_DRM_FRONT_SHBUF_H_
+#define __XEN_DRM_FRONT_SHBUF_H_
+
+#include <linux/kernel.h>
+#include <linux/scatterlist.h>
+
+#include <xen/grant_table.h>
+
+struct xen_drm_front_shbuf {
+	/*
+	 * number of references granted for the backend use:
+	 *  - for allocated/imported dma-buf's this holds number of grant
+	 *    references for the page directory and pages of the buffer
+	 *  - for the buffer provided by the backend this holds number of
+	 *    grant references for the page directory as grant references for
+	 *    the buffer will be provided by the backend
+	 */
+	int num_grefs;
+	grant_ref_t *grefs;
+	unsigned char *directory;
+
+	int num_pages;
+	struct page **pages;
+
+	struct xenbus_device *xb_dev;
+
+	/* these are the ops used internally depending on be_alloc mode */
+	const struct xen_drm_front_shbuf_ops *ops;
+
+	/* Xen map handles for the buffer allocated by the backend */
+	grant_handle_t *backend_map_handles;
+};
+
+struct xen_drm_front_shbuf_cfg {
+	struct xenbus_device *xb_dev;
+	size_t size;
+	struct page **pages;
+	bool be_alloc;
+};
+
+struct xen_drm_front_shbuf *
+xen_drm_front_shbuf_alloc(struct xen_drm_front_shbuf_cfg *cfg);
+
+grant_ref_t xen_drm_front_shbuf_get_dir_start(struct xen_drm_front_shbuf *buf);
+
+int xen_drm_front_shbuf_map(struct xen_drm_front_shbuf *buf);
+
+int xen_drm_front_shbuf_unmap(struct xen_drm_front_shbuf *buf);
+
+void xen_drm_front_shbuf_flush(struct xen_drm_front_shbuf *buf);
+
+void xen_drm_front_shbuf_free(struct xen_drm_front_shbuf *buf);
+
+#endif /* __XEN_DRM_FRONT_SHBUF_H_ */
diff --git a/drivers/gpu/drm/zte/zx_plane.c b/drivers/gpu/drm/zte/zx_plane.c
index 94545ad..d1931f5 100644
--- a/drivers/gpu/drm/zte/zx_plane.c
+++ b/drivers/gpu/drm/zte/zx_plane.c
@@ -268,7 +268,7 @@
 	struct zx_plane *zplane = to_zx_plane(plane);
 	void __iomem *hbsc = zplane->hbsc;
 
-	zx_vou_layer_disable(plane);
+	zx_vou_layer_disable(plane, old_state);
 
 	/* Disable HBSC block */
 	zx_writel_mask(hbsc + HBSC_CTRL0, HBSC_CTRL_EN, 0);
diff --git a/drivers/gpu/drm/zte/zx_vou.c b/drivers/gpu/drm/zte/zx_vou.c
index 7491813..442311d 100644
--- a/drivers/gpu/drm/zte/zx_vou.c
+++ b/drivers/gpu/drm/zte/zx_vou.c
@@ -627,9 +627,10 @@
 	zx_writel_mask(vou->osd + OSD_CTRL0, bits->enable, bits->enable);
 }
 
-void zx_vou_layer_disable(struct drm_plane *plane)
+void zx_vou_layer_disable(struct drm_plane *plane,
+			  struct drm_plane_state *old_state)
 {
-	struct zx_crtc *zcrtc = to_zx_crtc(plane->crtc);
+	struct zx_crtc *zcrtc = to_zx_crtc(old_state->crtc);
 	struct zx_vou_hw *vou = zcrtc->vou;
 	struct zx_plane *zplane = to_zx_plane(plane);
 	const struct vou_layer_bits *bits = zplane->bits;
diff --git a/drivers/gpu/drm/zte/zx_vou.h b/drivers/gpu/drm/zte/zx_vou.h
index 97d72bf..5b7f84f 100644
--- a/drivers/gpu/drm/zte/zx_vou.h
+++ b/drivers/gpu/drm/zte/zx_vou.h
@@ -62,6 +62,7 @@
 			    struct vou_div_config *configs, int num);
 
 void zx_vou_layer_enable(struct drm_plane *plane);
-void zx_vou_layer_disable(struct drm_plane *plane);
+void zx_vou_layer_disable(struct drm_plane *plane,
+			  struct drm_plane_state *old_state);
 
 #endif /* __ZX_VOU_H__ */
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 88a3558..815bdb4 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -314,6 +314,11 @@
 	return strcmp(dev_name(dev), drv->name) == 0;
 }
 
+static int host1x_dma_configure(struct device *dev)
+{
+	return of_dma_configure(dev, dev->of_node, true);
+}
+
 static const struct dev_pm_ops host1x_device_pm_ops = {
 	.suspend = pm_generic_suspend,
 	.resume = pm_generic_resume,
@@ -326,8 +331,8 @@
 struct bus_type host1x_bus_type = {
 	.name = "host1x",
 	.match = host1x_device_match,
+	.dma_configure	= host1x_dma_configure,
 	.pm = &host1x_device_pm_ops,
-	.force_dma = true,
 };
 
 static void __host1x_device_del(struct host1x_device *device)
@@ -416,7 +421,7 @@
 	device->dev.bus = &host1x_bus_type;
 	device->dev.parent = host1x->dev;
 
-	of_dma_configure(&device->dev, host1x->dev->of_node);
+	of_dma_configure(&device->dev, host1x->dev->of_node, true);
 
 	err = host1x_device_parse_dt(device, driver);
 	if (err < 0) {
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 28541b2..91df51e 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -51,7 +51,7 @@
 	struct host1x_cdma *cdma = pb_to_cdma(pb);
 	struct host1x *host1x = cdma_to_host1x(cdma);
 
-	if (!pb->phys)
+	if (!pb->mapped)
 		return;
 
 	if (host1x->domain) {
@@ -127,7 +127,7 @@
 iommu_free_iova:
 	__free_iova(&host1x->iova, alloc);
 iommu_free_mem:
-	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
+	dma_free_wc(host1x->dev, size, pb->mapped, pb->phys);
 
 	return err;
 }
@@ -247,7 +247,7 @@
 static void stop_cdma_timer_locked(struct host1x_cdma *cdma)
 {
 	cancel_delayed_work(&cdma->timeout.wq);
-	cdma->timeout.client = 0;
+	cdma->timeout.client = NULL;
 }
 
 /*
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 286d493..e97e17b 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -44,7 +44,7 @@
 struct push_buffer {
 	void *mapped;			/* mapped pushbuffer memory */
 	dma_addr_t dma;			/* device address of pushbuffer */
-	phys_addr_t phys;		/* physical address of pushbuffer */
+	dma_addr_t phys;		/* physical address of pushbuffer */
 	u32 fence;			/* index we've written */
 	u32 pos;			/* index to write to */
 	u32 size;
@@ -58,7 +58,7 @@
 	u32 syncpt_val;			/* syncpt value when completed */
 	ktime_t start_ktime;		/* starting time */
 	/* context timeout information */
-	int client;
+	struct host1x_client *client;
 };
 
 enum cdma_event {
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index dc77ec4..329e4a3 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -103,7 +103,7 @@
 
 static void show_all(struct host1x *m, struct output *o, bool show_fifo)
 {
-	int i;
+	unsigned int i;
 
 	host1x_hw_show_mlocks(m, o);
 	show_syncpts(m, o);
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 03db711..f1d5f76 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -223,10 +223,14 @@
 		struct iommu_domain_geometry *geometry;
 		unsigned long order;
 
+		err = iova_cache_get();
+		if (err < 0)
+			goto put_group;
+
 		host->domain = iommu_domain_alloc(&platform_bus_type);
 		if (!host->domain) {
 			err = -ENOMEM;
-			goto put_group;
+			goto put_cache;
 		}
 
 		err = iommu_attach_group(host->domain, host->group);
@@ -234,6 +238,7 @@
 			if (err == -ENODEV) {
 				iommu_domain_free(host->domain);
 				host->domain = NULL;
+				iova_cache_put();
 				iommu_group_put(host->group);
 				host->group = NULL;
 				goto skip_iommu;
@@ -308,6 +313,9 @@
 fail_free_domain:
 	if (host->domain)
 		iommu_domain_free(host->domain);
+put_cache:
+	if (host->group)
+		iova_cache_put();
 put_group:
 	iommu_group_put(host->group);
 
@@ -328,6 +336,7 @@
 		put_iova_domain(&host->iova);
 		iommu_detach_group(host->domain, host->group);
 		iommu_domain_free(host->domain);
+		iova_cache_put();
 		iommu_group_put(host->group);
 	}
 
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 43e9fab..36f44ff 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -78,7 +78,6 @@
 	void (*load_wait_base)(struct host1x_syncpt *syncpt);
 	u32 (*load)(struct host1x_syncpt *syncpt);
 	int (*cpu_incr)(struct host1x_syncpt *syncpt);
-	int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
 	void (*assign_to_channel)(struct host1x_syncpt *syncpt,
 	                          struct host1x_channel *channel);
 	void (*enable_protection)(struct host1x *host);
@@ -183,13 +182,6 @@
 	return host->syncpt_op->cpu_incr(sp);
 }
 
-static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
-					      struct host1x_syncpt *sp,
-					      void *patch_addr)
-{
-	return host->syncpt_op->patch_wait(sp, patch_addr);
-}
-
 static inline void host1x_hw_syncpt_assign_to_channel(
 	struct host1x *host, struct host1x_syncpt *sp,
 	struct host1x_channel *ch)
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 9af7587..d188f90 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -104,8 +104,7 @@
 	sp = host->syncpt + job->syncpt_id;
 	trace_host1x_channel_submit(dev_name(ch->dev),
 				    job->num_gathers, job->num_relocs,
-				    job->num_waitchk, job->syncpt_id,
-				    job->syncpt_incrs);
+				    job->syncpt_id, job->syncpt_incrs);
 
 	/* before error checks, return current max */
 	prev_max = job->syncpt_end = host1x_syncpt_read_max(sp);
@@ -165,7 +164,7 @@
 	trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval);
 
 	/* schedule a submit complete interrupt */
-	err = host1x_intr_add_action(host, job->syncpt_id, syncval,
+	err = host1x_intr_add_action(host, sp, syncval,
 				     HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch,
 				     completed_waiter, NULL);
 	completed_waiter = NULL;
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
index 7dfd47d..a23bb33 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -96,16 +96,6 @@
 	return 0;
 }
 
-/* remove a wait pointed to by patch_addr */
-static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
-{
-	u32 override = host1x_class_host_wait_syncpt(HOST1X_SYNCPT_RESERVED, 0);
-
-	*((u32 *)patch_addr) = override;
-
-	return 0;
-}
-
 /**
  * syncpt_assign_to_channel() - Assign syncpoint to channel
  * @sp: syncpoint
@@ -156,7 +146,6 @@
 	.load_wait_base = syncpt_read_wait_base,
 	.load = syncpt_load,
 	.cpu_incr = syncpt_cpu_incr,
-	.patch_wait = syncpt_patch_wait,
 	.assign_to_channel = syncpt_assign_to_channel,
 	.enable_protection = syncpt_enable_protection,
 };
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 8b4fad0..9629c00 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -144,7 +144,7 @@
 static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT])
 {
 	struct list_head *head = completed;
-	int i;
+	unsigned int i;
 
 	for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) {
 		action_handler handler = action_handlers[i];
@@ -211,11 +211,11 @@
 				host1x_syncpt_load(host->syncpt + id));
 }
 
-int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh,
-			   enum host1x_intr_action action, void *data,
-			   struct host1x_waitlist *waiter, void **ref)
+int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
+			   u32 thresh, enum host1x_intr_action action,
+			   void *data, struct host1x_waitlist *waiter,
+			   void **ref)
 {
-	struct host1x_syncpt *syncpt;
 	int queue_was_empty;
 
 	if (waiter == NULL) {
@@ -234,19 +234,17 @@
 	waiter->data = data;
 	waiter->count = 1;
 
-	syncpt = host->syncpt + id;
-
 	spin_lock(&syncpt->intr.lock);
 
 	queue_was_empty = list_empty(&syncpt->intr.wait_head);
 
 	if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) {
 		/* added at head of list - new threshold value */
-		host1x_hw_intr_set_syncpt_threshold(host, id, thresh);
+		host1x_hw_intr_set_syncpt_threshold(host, syncpt->id, thresh);
 
 		/* added as first waiter - enable interrupt */
 		if (queue_was_empty)
-			host1x_hw_intr_enable_syncpt_intr(host, id);
+			host1x_hw_intr_enable_syncpt_intr(host, syncpt->id);
 	}
 
 	spin_unlock(&syncpt->intr.lock);
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
index 1370c2b..6db96af 100644
--- a/drivers/gpu/host1x/intr.h
+++ b/drivers/gpu/host1x/intr.h
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
 
+struct host1x_syncpt;
 struct host1x;
 
 enum host1x_intr_action {
@@ -75,9 +76,10 @@
  *
  * This is a non-blocking api.
  */
-int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh,
-	enum host1x_intr_action action, void *data,
-	struct host1x_waitlist *waiter, void **ref);
+int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
+			   u32 thresh, enum host1x_intr_action action,
+			   void *data, struct host1x_waitlist *waiter,
+			   void **ref);
 
 /*
  * Unreference an action submitted to host1x_intr_add_action().
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index db509ab..e2f4a4d 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -34,8 +34,7 @@
 #define HOST1X_WAIT_SYNCPT_OFFSET 0x8
 
 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
-				    u32 num_cmdbufs, u32 num_relocs,
-				    u32 num_waitchks)
+				    u32 num_cmdbufs, u32 num_relocs)
 {
 	struct host1x_job *job = NULL;
 	unsigned int num_unpins = num_cmdbufs + num_relocs;
@@ -46,7 +45,6 @@
 	total = sizeof(struct host1x_job) +
 		(u64)num_relocs * sizeof(struct host1x_reloc) +
 		(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
-		(u64)num_waitchks * sizeof(struct host1x_waitchk) +
 		(u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
 		(u64)num_unpins * sizeof(dma_addr_t) +
 		(u64)num_unpins * sizeof(u32 *);
@@ -62,12 +60,10 @@
 
 	/* Redistribute memory to the structs  */
 	mem += sizeof(struct host1x_job);
-	job->relocarray = num_relocs ? mem : NULL;
+	job->relocs = num_relocs ? mem : NULL;
 	mem += num_relocs * sizeof(struct host1x_reloc);
 	job->unpins = num_unpins ? mem : NULL;
 	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
-	job->waitchk = num_waitchks ? mem : NULL;
-	mem += num_waitchks * sizeof(struct host1x_waitchk);
 	job->gathers = num_cmdbufs ? mem : NULL;
 	mem += num_cmdbufs * sizeof(struct host1x_job_gather);
 	job->addr_phys = num_unpins ? mem : NULL;
@@ -100,84 +96,18 @@
 EXPORT_SYMBOL(host1x_job_put);
 
 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
-			   u32 words, u32 offset)
+			   unsigned int words, unsigned int offset)
 {
-	struct host1x_job_gather *cur_gather = &job->gathers[job->num_gathers];
+	struct host1x_job_gather *gather = &job->gathers[job->num_gathers];
 
-	cur_gather->words = words;
-	cur_gather->bo = bo;
-	cur_gather->offset = offset;
+	gather->words = words;
+	gather->bo = bo;
+	gather->offset = offset;
+
 	job->num_gathers++;
 }
 EXPORT_SYMBOL(host1x_job_add_gather);
 
-/*
- * NULL an already satisfied WAIT_SYNCPT host method, by patching its
- * args in the command stream. The method data is changed to reference
- * a reserved (never given out or incr) HOST1X_SYNCPT_RESERVED syncpt
- * with a matching threshold value of 0, so is guaranteed to be popped
- * by the host HW.
- */
-static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp,
-				       struct host1x_bo *h, u32 offset)
-{
-	void *patch_addr = NULL;
-
-	/* patch the wait */
-	patch_addr = host1x_bo_kmap(h, offset >> PAGE_SHIFT);
-	if (patch_addr) {
-		host1x_syncpt_patch_wait(sp,
-					 patch_addr + (offset & ~PAGE_MASK));
-		host1x_bo_kunmap(h, offset >> PAGE_SHIFT, patch_addr);
-	} else
-		pr_err("Could not map cmdbuf for wait check\n");
-}
-
-/*
- * Check driver supplied waitchk structs for syncpt thresholds
- * that have already been satisfied and NULL the comparison (to
- * avoid a wrap condition in the HW).
- */
-static int do_waitchks(struct host1x_job *job, struct host1x *host,
-		       struct host1x_job_gather *g)
-{
-	struct host1x_bo *patch = g->bo;
-	int i;
-
-	/* compare syncpt vs wait threshold */
-	for (i = 0; i < job->num_waitchk; i++) {
-		struct host1x_waitchk *wait = &job->waitchk[i];
-		struct host1x_syncpt *sp =
-			host1x_syncpt_get(host, wait->syncpt_id);
-
-		/* validate syncpt id */
-		if (wait->syncpt_id > host1x_syncpt_nb_pts(host))
-			continue;
-
-		/* skip all other gathers */
-		if (patch != wait->bo)
-			continue;
-
-		trace_host1x_syncpt_wait_check(wait->bo, wait->offset,
-					       wait->syncpt_id, wait->thresh,
-					       host1x_syncpt_read_min(sp));
-
-		if (host1x_syncpt_is_expired(sp, wait->thresh)) {
-			dev_dbg(host->dev,
-				"drop WAIT id %u (%s) thresh 0x%x, min 0x%x\n",
-				wait->syncpt_id, sp->name, wait->thresh,
-				host1x_syncpt_read_min(sp));
-
-			host1x_syncpt_patch_offset(sp, patch,
-						   g->offset + wait->offset);
-		}
-
-		wait->bo = NULL;
-	}
-
-	return 0;
-}
-
 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 {
 	unsigned int i;
@@ -186,7 +116,7 @@
 	job->num_unpins = 0;
 
 	for (i = 0; i < job->num_relocs; i++) {
-		struct host1x_reloc *reloc = &job->relocarray[i];
+		struct host1x_reloc *reloc = &job->relocs[i];
 		struct sg_table *sgt;
 		dma_addr_t phys_addr;
 
@@ -267,14 +197,14 @@
 
 static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
 {
-	int i = 0;
 	u32 last_page = ~0;
 	void *cmdbuf_page_addr = NULL;
 	struct host1x_bo *cmdbuf = g->bo;
+	unsigned int i;
 
 	/* pin & patch the relocs for one gather */
 	for (i = 0; i < job->num_relocs; i++) {
-		struct host1x_reloc *reloc = &job->relocarray[i];
+		struct host1x_reloc *reloc = &job->relocs[i];
 		u32 reloc_addr = (job->reloc_addr_phys[i] +
 				  reloc->target.offset) >> reloc->shift;
 		u32 *target;
@@ -331,17 +261,6 @@
 	return true;
 }
 
-static bool check_wait(struct host1x_waitchk *wait, struct host1x_bo *cmdbuf,
-		       unsigned int offset)
-{
-	offset *= sizeof(u32);
-
-	if (wait->bo != cmdbuf || wait->offset != offset)
-		return false;
-
-	return true;
-}
-
 struct host1x_firewall {
 	struct host1x_job *job;
 	struct device *dev;
@@ -349,9 +268,6 @@
 	unsigned int num_relocs;
 	struct host1x_reloc *reloc;
 
-	unsigned int num_waitchks;
-	struct host1x_waitchk *waitchk;
-
 	struct host1x_bo *cmdbuf;
 	unsigned int offset;
 
@@ -378,20 +294,6 @@
 		fw->reloc++;
 	}
 
-	if (offset == HOST1X_WAIT_SYNCPT_OFFSET) {
-		if (fw->class != HOST1X_CLASS_HOST1X)
-			return -EINVAL;
-
-		if (!fw->num_waitchks)
-			return -EINVAL;
-
-		if (!check_wait(fw->waitchk, fw->cmdbuf, fw->offset))
-			return -EINVAL;
-
-		fw->num_waitchks--;
-		fw->waitchk++;
-	}
-
 	return 0;
 }
 
@@ -550,14 +452,12 @@
 	struct host1x_firewall fw;
 	size_t size = 0;
 	size_t offset = 0;
-	int i;
+	unsigned int i;
 
 	fw.job = job;
 	fw.dev = dev;
-	fw.reloc = job->relocarray;
+	fw.reloc = job->relocs;
 	fw.num_relocs = job->num_relocs;
-	fw.waitchk = job->waitchk;
-	fw.num_waitchks = job->num_waitchk;
 	fw.class = job->class;
 
 	for (i = 0; i < job->num_gathers; i++) {
@@ -604,8 +504,8 @@
 		offset += g->words * sizeof(u32);
 	}
 
-	/* No relocs and waitchks should remain at this point */
-	if (fw.num_relocs || fw.num_waitchks)
+	/* No relocs should remain at this point */
+	if (fw.num_relocs)
 		return -EINVAL;
 
 	return 0;
@@ -616,19 +516,6 @@
 	int err;
 	unsigned int i, j;
 	struct host1x *host = dev_get_drvdata(dev->parent);
-	DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host));
-
-	bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host));
-	for (i = 0; i < job->num_waitchk; i++) {
-		u32 syncpt_id = job->waitchk[i].syncpt_id;
-
-		if (syncpt_id < host1x_syncpt_nb_pts(host))
-			set_bit(syncpt_id, waitchk_mask);
-	}
-
-	/* get current syncpt values for waitchk */
-	for_each_set_bit(i, waitchk_mask, host1x_syncpt_nb_pts(host))
-		host1x_syncpt_load(host->syncpt + i);
 
 	/* pin memory */
 	err = pin_job(host, job);
@@ -663,10 +550,6 @@
 		err = do_relocs(job, g);
 		if (err)
 			break;
-
-		err = do_waitchks(job, host, g);
-		if (err)
-			break;
 	}
 
 out:
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 4bda51d..188400e 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -20,10 +20,10 @@
 #define __HOST1X_JOB_H
 
 struct host1x_job_gather {
-	u32 words;
+	unsigned int words;
 	dma_addr_t base;
 	struct host1x_bo *bo;
-	u32 offset;
+	unsigned int offset;
 	bool handled;
 };
 
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index a2a952a..a5dbf1b 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -57,8 +57,8 @@
 						 struct host1x_client *client,
 						 unsigned long flags)
 {
-	int i;
 	struct host1x_syncpt *sp = host->syncpt;
+	unsigned int i;
 	char *name;
 
 	mutex_lock(&host->syncpt_mutex);
@@ -255,7 +255,7 @@
 	}
 
 	/* schedule a wakeup when the syncpoint value is reached */
-	err = host1x_intr_add_action(sp->host, sp->id, thresh,
+	err = host1x_intr_add_action(sp->host, sp, thresh,
 				     HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
 				     &wq, waiter, &ref);
 	if (err)
@@ -373,12 +373,6 @@
 		return (s32)(current_val - thresh) >= 0;
 }
 
-/* remove a wait pointed to by patch_addr */
-int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
-{
-	return host1x_hw_syncpt_patch_wait(sp->host, sp, patch_addr);
-}
-
 int host1x_syncpt_init(struct host1x *host)
 {
 	struct host1x_syncpt_base *bases;
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index 9d88d37..d98e223 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -124,7 +124,4 @@
 	return sp->id < host1x_syncpt_nb_pts(sp->host);
 }
 
-/* Patch a wait by replacing it with a wait for syncpt 0 value 0 */
-int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr);
-
 #endif
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 7285518..ced0418 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -63,6 +63,9 @@
 	case (VERSION_WIN10):
 		return VERSION_WIN8_1;
 
+	case (VERSION_WIN10_V5):
+		return VERSION_WIN10;
+
 	case (VERSION_WS2008):
 	default:
 		return VERSION_INVAL;
@@ -80,9 +83,29 @@
 
 	msg = (struct vmbus_channel_initiate_contact *)msginfo->msg;
 
+	memset(msg, 0, sizeof(*msg));
 	msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT;
 	msg->vmbus_version_requested = version;
-	msg->interrupt_page = virt_to_phys(vmbus_connection.int_page);
+
+	/*
+	 * VMBus protocol 5.0 (VERSION_WIN10_V5) requires that we must use
+	 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message,
+	 * and for subsequent messages, we must use the Message Connection ID
+	 * field in the host-returned Version Response Message. And, with
+	 * VERSION_WIN10_V5, we don't use msg->interrupt_page, but we tell
+	 * the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for
+	 * compatibility.
+	 *
+	 * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1).
+	 */
+	if (version >= VERSION_WIN10_V5) {
+		msg->msg_sint = VMBUS_MESSAGE_SINT;
+		vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4;
+	} else {
+		msg->interrupt_page = virt_to_phys(vmbus_connection.int_page);
+		vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID;
+	}
+
 	msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]);
 	msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]);
 	/*
@@ -137,6 +160,10 @@
 	/* Check if successful */
 	if (msginfo->response.version_response.version_supported) {
 		vmbus_connection.conn_state = CONNECTED;
+
+		if (version >= VERSION_WIN10_V5)
+			vmbus_connection.msg_conn_id =
+				msginfo->response.version_response.msg_conn_id;
 	} else {
 		return -ECONNREFUSED;
 	}
@@ -354,13 +381,14 @@
  */
 int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep)
 {
+	struct vmbus_channel_message_header *hdr;
 	union hv_connection_id conn_id;
 	int ret = 0;
 	int retries = 0;
 	u32 usec = 1;
 
 	conn_id.asu32 = 0;
-	conn_id.u.id = VMBUS_MESSAGE_CONNECTION_ID;
+	conn_id.u.id = vmbus_connection.msg_conn_id;
 
 	/*
 	 * hv_post_message() can have transient failures because of
@@ -373,6 +401,18 @@
 		switch (ret) {
 		case HV_STATUS_INVALID_CONNECTION_ID:
 			/*
+			 * See vmbus_negotiate_version(): VMBus protocol 5.0
+			 * requires that we must use
+			 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate
+			 * Contact message, but on old hosts that only
+			 * support VMBus protocol 4.0 or lower, here we get
+			 * HV_STATUS_INVALID_CONNECTION_ID and we should
+			 * return an error immediately without retrying.
+			 */
+			hdr = buffer;
+			if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT)
+				return -EINVAL;
+			/*
 			 * We could get this if we send messages too
 			 * frequently.
 			 */
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index f761bef3..72eaba3 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -187,6 +187,7 @@
 
 enum {
 	VMBUS_MESSAGE_CONNECTION_ID	= 1,
+	VMBUS_MESSAGE_CONNECTION_ID_4	= 4,
 	VMBUS_MESSAGE_PORT_ID		= 1,
 	VMBUS_EVENT_CONNECTION_ID	= 2,
 	VMBUS_EVENT_PORT_ID		= 2,
@@ -302,6 +303,8 @@
 	 */
 	int connect_cpu;
 
+	u32 msg_conn_id;
+
 	atomic_t offer_in_progress;
 
 	enum vmbus_connect_state conn_state;
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 6ec307c..f10840a 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -717,15 +717,12 @@
 	  be called ltc2945.
 
 config SENSORS_LTC2990
-	tristate "Linear Technology LTC2990 (current monitoring mode only)"
+	tristate "Linear Technology LTC2990"
 	depends on I2C
 	help
 	  If you say yes here you get support for Linear Technology LTC2990
 	  I2C System Monitor. The LTC2990 supports a combination of voltage,
-	  current and temperature monitoring, but in addition to the Vcc supply
-	  voltage and chip temperature, this driver currently only supports
-	  reading two currents by measuring two differential voltages across
-	  series resistors.
+	  current and temperature monitoring.
 
 	  This driver can also be built as a module. If so, the module will
 	  be called ltc2990.
diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c
index 975c43d..a6636fe 100644
--- a/drivers/hwmon/asus_atk0110.c
+++ b/drivers/hwmon/asus_atk0110.c
@@ -125,6 +125,8 @@
 	int temperature_count;
 	int fan_count;
 	struct list_head sensor_list;
+	struct attribute_group attr_group;
+	const struct attribute_group *attr_groups[2];
 
 	struct {
 		struct dentry *root;
@@ -188,7 +190,6 @@
 static int atk_remove(struct acpi_device *device);
 static void atk_print_sensor(struct atk_data *data, union acpi_object *obj);
 static int atk_read_value(struct atk_sensor_data *sensor, u64 *value);
-static void atk_free_sensors(struct atk_data *data);
 
 static struct acpi_driver atk_driver = {
 	.name	= ATK_HID,
@@ -262,14 +263,6 @@
 	return sprintf(buf, "%lld\n", value);
 }
 
-static ssize_t atk_name_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "atk0110\n");
-}
-static struct device_attribute atk_name_attr =
-		__ATTR(name, 0444, atk_name_show, NULL);
-
 static void atk_init_attribute(struct device_attribute *attr, char *name,
 		sysfs_show_func show)
 {
@@ -912,15 +905,13 @@
 	limit1 = atk_get_pack_member(data, obj, HWMON_PACK_LIMIT1);
 	limit2 = atk_get_pack_member(data, obj, HWMON_PACK_LIMIT2);
 
-	sensor = kzalloc(sizeof(*sensor), GFP_KERNEL);
+	sensor = devm_kzalloc(dev, sizeof(*sensor), GFP_KERNEL);
 	if (!sensor)
 		return -ENOMEM;
 
-	sensor->acpi_name = kstrdup(name->string.pointer, GFP_KERNEL);
-	if (!sensor->acpi_name) {
-		err = -ENOMEM;
-		goto out;
-	}
+	sensor->acpi_name = devm_kstrdup(dev, name->string.pointer, GFP_KERNEL);
+	if (!sensor->acpi_name)
+		return -ENOMEM;
 
 	INIT_LIST_HEAD(&sensor->list);
 	sensor->type = type;
@@ -961,9 +952,6 @@
 	(*num)++;
 
 	return 1;
-out:
-	kfree(sensor);
-	return err;
 }
 
 static int atk_enumerate_old_hwmon(struct atk_data *data)
@@ -1004,8 +992,7 @@
 		dev_warn(dev, METHOD_OLD_ENUM_TMP ": ACPI exception: %s\n",
 				acpi_format_exception(status));
 
-		ret = -ENODEV;
-		goto cleanup;
+		return -ENODEV;
 	}
 
 	pack = buf.pointer;
@@ -1026,8 +1013,7 @@
 		dev_warn(dev, METHOD_OLD_ENUM_FAN ": ACPI exception: %s\n",
 				acpi_format_exception(status));
 
-		ret = -ENODEV;
-		goto cleanup;
+		return -ENODEV;
 	}
 
 	pack = buf.pointer;
@@ -1041,9 +1027,6 @@
 	ACPI_FREE(buf.pointer);
 
 	return count;
-cleanup:
-	atk_free_sensors(data);
-	return ret;
 }
 
 static int atk_ec_present(struct atk_data *data)
@@ -1193,76 +1176,44 @@
 	return err;
 }
 
-static int atk_create_files(struct atk_data *data)
+static int atk_init_attribute_groups(struct atk_data *data)
 {
+	struct device *dev = &data->acpi_dev->dev;
 	struct atk_sensor_data *s;
-	int err;
+	struct attribute **attrs;
+	int i = 0;
+	int len = (data->voltage_count + data->temperature_count
+			+ data->fan_count) * 4 + 1;
+
+	attrs = devm_kcalloc(dev, len, sizeof(struct attribute *), GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
 
 	list_for_each_entry(s, &data->sensor_list, list) {
-		err = device_create_file(data->hwmon_dev, &s->input_attr);
-		if (err)
-			return err;
-		err = device_create_file(data->hwmon_dev, &s->label_attr);
-		if (err)
-			return err;
-		err = device_create_file(data->hwmon_dev, &s->limit1_attr);
-		if (err)
-			return err;
-		err = device_create_file(data->hwmon_dev, &s->limit2_attr);
-		if (err)
-			return err;
+		attrs[i++] = &s->input_attr.attr;
+		attrs[i++] = &s->label_attr.attr;
+		attrs[i++] = &s->limit1_attr.attr;
+		attrs[i++] = &s->limit2_attr.attr;
 	}
 
-	err = device_create_file(data->hwmon_dev, &atk_name_attr);
+	data->attr_group.attrs = attrs;
+	data->attr_groups[0] = &data->attr_group;
 
-	return err;
-}
-
-static void atk_remove_files(struct atk_data *data)
-{
-	struct atk_sensor_data *s;
-
-	list_for_each_entry(s, &data->sensor_list, list) {
-		device_remove_file(data->hwmon_dev, &s->input_attr);
-		device_remove_file(data->hwmon_dev, &s->label_attr);
-		device_remove_file(data->hwmon_dev, &s->limit1_attr);
-		device_remove_file(data->hwmon_dev, &s->limit2_attr);
-	}
-	device_remove_file(data->hwmon_dev, &atk_name_attr);
-}
-
-static void atk_free_sensors(struct atk_data *data)
-{
-	struct list_head *head = &data->sensor_list;
-	struct atk_sensor_data *s, *tmp;
-
-	list_for_each_entry_safe(s, tmp, head, list) {
-		kfree(s->acpi_name);
-		kfree(s);
-	}
+	return 0;
 }
 
 static int atk_register_hwmon(struct atk_data *data)
 {
 	struct device *dev = &data->acpi_dev->dev;
-	int err;
 
 	dev_dbg(dev, "registering hwmon device\n");
-	data->hwmon_dev = hwmon_device_register(dev);
+	data->hwmon_dev = hwmon_device_register_with_groups(dev, "atk0110",
+							    data,
+							    data->attr_groups);
 	if (IS_ERR(data->hwmon_dev))
 		return PTR_ERR(data->hwmon_dev);
 
-	dev_dbg(dev, "populating sysfs directory\n");
-	err = atk_create_files(data);
-	if (err)
-		goto remove;
-
 	return 0;
-remove:
-	/* Cleanup the registered files */
-	atk_remove_files(data);
-	hwmon_device_unregister(data->hwmon_dev);
-	return err;
 }
 
 static int atk_probe_if(struct atk_data *data)
@@ -1350,7 +1301,7 @@
 
 	dev_dbg(&device->dev, "adding...\n");
 
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	data = devm_kzalloc(&device->dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
@@ -1397,20 +1348,20 @@
 		goto out;
 	}
 
+	err = atk_init_attribute_groups(data);
+	if (err)
+		goto out;
 	err = atk_register_hwmon(data);
 	if (err)
-		goto cleanup;
+		goto out;
 
 	atk_debugfs_init(data);
 
 	device->driver_data = data;
 	return 0;
-cleanup:
-	atk_free_sensors(data);
 out:
 	if (data->disable_ec)
 		atk_ec_ctl(data, 0);
-	kfree(data);
 	return err;
 }
 
@@ -1423,8 +1374,6 @@
 
 	atk_debugfs_cleanup(data);
 
-	atk_remove_files(data);
-	atk_free_sensors(data);
 	hwmon_device_unregister(data->hwmon_dev);
 
 	if (data->disable_ec) {
@@ -1432,8 +1381,6 @@
 			dev_err(&device->dev, "Failed to disable EC\n");
 	}
 
-	kfree(data);
-
 	return 0;
 }
 
diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c
index 5e78229..22d3a84 100644
--- a/drivers/hwmon/fschmd.c
+++ b/drivers/hwmon/fschmd.c
@@ -105,7 +105,7 @@
 static const int FSCHMD_NO_VOLT_SENSORS[7] = { 3, 3, 3, 3, 3, 3, 6 };
 
 /*
- * minimum pwm at which the fan is driven (pwm can by increased depending on
+ * minimum pwm at which the fan is driven (pwm can be increased depending on
  * the temp. Notice that for the scy some fans share there minimum speed.
  * Also notice that with the scy the sensor order is different than with the
  * other chips, this order was in the 2.4 driver and kept for consistency.
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 32083e4..e88c019 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -698,6 +698,9 @@
 	if (chip && (!chip->ops || !chip->ops->is_visible || !chip->info))
 		return ERR_PTR(-EINVAL);
 
+	if (chip && !dev)
+		return ERR_PTR(-EINVAL);
+
 	return __hwmon_device_register(dev, name, drvdata, chip, extra_groups);
 }
 EXPORT_SYMBOL_GPL(hwmon_device_register_with_info);
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 3b73dee..17c6460 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -37,6 +37,10 @@
 /* Provide lock for writing to NB_SMU_IND_ADDR */
 static DEFINE_MUTEX(nb_smu_ind_mutex);
 
+#ifndef PCI_DEVICE_ID_AMD_15H_M70H_NB_F3
+#define PCI_DEVICE_ID_AMD_15H_M70H_NB_F3	0x15b3
+#endif
+
 #ifndef PCI_DEVICE_ID_AMD_17H_DF_F3
 #define PCI_DEVICE_ID_AMD_17H_DF_F3	0x1463
 #endif
@@ -81,6 +85,7 @@
 	void (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
 	int temp_offset;
 	u32 temp_adjust_mask;
+	bool show_tdie;
 };
 
 struct tctl_offset {
@@ -141,17 +146,24 @@
 		     F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval);
 }
 
-static ssize_t temp1_input_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static unsigned int get_raw_temp(struct k10temp_data *data)
 {
-	struct k10temp_data *data = dev_get_drvdata(dev);
-	u32 regval;
 	unsigned int temp;
+	u32 regval;
 
 	data->read_tempreg(data->pdev, &regval);
 	temp = (regval >> 21) * 125;
 	if (regval & data->temp_adjust_mask)
 		temp -= 49000;
+	return temp;
+}
+
+static ssize_t temp1_input_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct k10temp_data *data = dev_get_drvdata(dev);
+	unsigned int temp = get_raw_temp(data);
+
 	if (temp > data->temp_offset)
 		temp -= data->temp_offset;
 	else
@@ -160,6 +172,23 @@
 	return sprintf(buf, "%u\n", temp);
 }
 
+static ssize_t temp2_input_show(struct device *dev,
+				struct device_attribute *devattr, char *buf)
+{
+	struct k10temp_data *data = dev_get_drvdata(dev);
+	unsigned int temp = get_raw_temp(data);
+
+	return sprintf(buf, "%u\n", temp);
+}
+
+static ssize_t temp_label_show(struct device *dev,
+			       struct device_attribute *devattr, char *buf)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+
+	return sprintf(buf, "%s\n", attr->index ? "Tctl" : "Tdie");
+}
+
 static ssize_t temp1_max_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
@@ -187,16 +216,23 @@
 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_temp_crit, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, show_temp_crit, NULL, 1);
 
+static SENSOR_DEVICE_ATTR(temp1_label, 0444, temp_label_show, NULL, 0);
+static DEVICE_ATTR_RO(temp2_input);
+static SENSOR_DEVICE_ATTR(temp2_label, 0444, temp_label_show, NULL, 1);
+
 static umode_t k10temp_is_visible(struct kobject *kobj,
 				  struct attribute *attr, int index)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct k10temp_data *data = dev_get_drvdata(dev);
 	struct pci_dev *pdev = data->pdev;
+	u32 reg;
 
-	if (index >= 2) {
-		u32 reg;
-
+	switch (index) {
+	case 0 ... 1:	/* temp1_input, temp1_max */
+	default:
+		break;
+	case 2 ... 3:	/* temp1_crit, temp1_crit_hyst */
 		if (!data->read_htcreg)
 			return 0;
 
@@ -208,6 +244,11 @@
 		data->read_htcreg(data->pdev, &reg);
 		if (!(reg & HTC_ENABLE))
 			return 0;
+		break;
+	case 4 ... 6:	/* temp1_label, temp2_input, temp2_label */
+		if (!data->show_tdie)
+			return 0;
+		break;
 	}
 	return attr->mode;
 }
@@ -217,6 +258,9 @@
 	&dev_attr_temp1_max.attr,
 	&sensor_dev_attr_temp1_crit.dev_attr.attr,
 	&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
+	&sensor_dev_attr_temp1_label.dev_attr.attr,
+	&dev_attr_temp2_input.attr,
+	&sensor_dev_attr_temp2_label.dev_attr.attr,
 	NULL
 };
 
@@ -292,6 +336,7 @@
 	} else if (boot_cpu_data.x86 == 0x17) {
 		data->temp_adjust_mask = 0x80000;
 		data->read_tempreg = read_tempreg_nb_f17;
+		data->show_tdie = true;
 	} else {
 		data->read_htcreg = read_htcreg_pci;
 		data->read_tempreg = read_tempreg_pci;
@@ -320,6 +365,7 @@
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) },
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) },
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M70H_NB_F3) },
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
 	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
diff --git a/drivers/hwmon/ltc2990.c b/drivers/hwmon/ltc2990.c
index 8f8fe05..2aefdc5 100644
--- a/drivers/hwmon/ltc2990.c
+++ b/drivers/hwmon/ltc2990.c
@@ -5,18 +5,16 @@
  * Author: Mike Looijmans <mike.looijmans@topic.nl>
  *
  * License: GPLv2
- *
- * This driver assumes the chip is wired as a dual current monitor, and
- * reports the voltage drop across two series resistors. It also reports
- * the chip's internal temperature and Vcc power supply voltage.
  */
 
+#include <linux/bitops.h>
 #include <linux/err.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/i2c.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of.h>
 
 #define LTC2990_STATUS	0x00
 #define LTC2990_CONTROL	0x01
@@ -28,45 +26,108 @@
 #define LTC2990_V4_MSB	0x0C
 #define LTC2990_VCC_MSB	0x0E
 
-#define LTC2990_CONTROL_KELVIN		BIT(7)
-#define LTC2990_CONTROL_SINGLE		BIT(6)
-#define LTC2990_CONTROL_MEASURE_ALL	(0x3 << 3)
-#define LTC2990_CONTROL_MODE_CURRENT	0x06
-#define LTC2990_CONTROL_MODE_VOLTAGE	0x07
+#define LTC2990_IN0	BIT(0)
+#define LTC2990_IN1	BIT(1)
+#define LTC2990_IN2	BIT(2)
+#define LTC2990_IN3	BIT(3)
+#define LTC2990_IN4	BIT(4)
+#define LTC2990_CURR1	BIT(5)
+#define LTC2990_CURR2	BIT(6)
+#define LTC2990_TEMP1	BIT(7)
+#define LTC2990_TEMP2	BIT(8)
+#define LTC2990_TEMP3	BIT(9)
+#define LTC2990_NONE	0
+#define LTC2990_ALL	GENMASK(9, 0)
 
-/* convert raw register value to sign-extended integer in 16-bit range */
-static int ltc2990_voltage_to_int(int raw)
-{
-	if (raw & BIT(14))
-		return -(0x4000 - (raw & 0x3FFF)) << 2;
-	else
-		return (raw & 0x3FFF) << 2;
-}
+#define LTC2990_MODE0_SHIFT	0
+#define LTC2990_MODE0_MASK	GENMASK(2, 0)
+#define LTC2990_MODE1_SHIFT	3
+#define LTC2990_MODE1_MASK	GENMASK(1, 0)
+
+/* Enabled measurements for mode bits 2..0 */
+static const int ltc2990_attrs_ena_0[] = {
+	LTC2990_IN1 | LTC2990_IN2 | LTC2990_TEMP3,
+	LTC2990_CURR1 | LTC2990_TEMP3,
+	LTC2990_CURR1 | LTC2990_IN3 | LTC2990_IN4,
+	LTC2990_TEMP2 | LTC2990_IN3 | LTC2990_IN4,
+	LTC2990_TEMP2 | LTC2990_CURR2,
+	LTC2990_TEMP2 | LTC2990_TEMP3,
+	LTC2990_CURR1 | LTC2990_CURR2,
+	LTC2990_IN1 | LTC2990_IN2 | LTC2990_IN3 | LTC2990_IN4
+};
+
+/* Enabled measurements for mode bits 4..3 */
+static const int ltc2990_attrs_ena_1[] = {
+	LTC2990_NONE,
+	LTC2990_TEMP2 | LTC2990_IN1 | LTC2990_CURR1,
+	LTC2990_TEMP3 | LTC2990_IN3 | LTC2990_CURR2,
+	LTC2990_ALL
+};
+
+struct ltc2990_data {
+	struct i2c_client *i2c;
+	u32 mode[2];
+};
 
 /* Return the converted value from the given register in uV or mC */
-static int ltc2990_get_value(struct i2c_client *i2c, u8 reg, int *result)
+static int ltc2990_get_value(struct i2c_client *i2c, int index, int *result)
 {
 	int val;
+	u8 reg;
+
+	switch (index) {
+	case LTC2990_IN0:
+		reg = LTC2990_VCC_MSB;
+		break;
+	case LTC2990_IN1:
+	case LTC2990_CURR1:
+	case LTC2990_TEMP2:
+		reg = LTC2990_V1_MSB;
+		break;
+	case LTC2990_IN2:
+		reg = LTC2990_V2_MSB;
+		break;
+	case LTC2990_IN3:
+	case LTC2990_CURR2:
+	case LTC2990_TEMP3:
+		reg = LTC2990_V3_MSB;
+		break;
+	case LTC2990_IN4:
+		reg = LTC2990_V4_MSB;
+		break;
+	case LTC2990_TEMP1:
+		reg = LTC2990_TINT_MSB;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	val = i2c_smbus_read_word_swapped(i2c, reg);
 	if (unlikely(val < 0))
 		return val;
 
-	switch (reg) {
-	case LTC2990_TINT_MSB:
-		/* internal temp, 0.0625 degrees/LSB, 13-bit  */
-		val = (val & 0x1FFF) << 3;
-		*result = (val * 1000) >> 7;
+	switch (index) {
+	case LTC2990_TEMP1:
+	case LTC2990_TEMP2:
+	case LTC2990_TEMP3:
+		/* temp, 0.0625 degrees/LSB */
+		*result = sign_extend32(val, 12) * 1000 / 16;
 		break;
-	case LTC2990_V1_MSB:
-	case LTC2990_V3_MSB:
-		 /* Vx-Vy, 19.42uV/LSB. Depends on mode. */
-		*result = ltc2990_voltage_to_int(val) * 1942 / (4 * 100);
+	case LTC2990_CURR1:
+	case LTC2990_CURR2:
+		 /* Vx-Vy, 19.42uV/LSB */
+		*result = sign_extend32(val, 14) * 1942 / 100;
 		break;
-	case LTC2990_VCC_MSB:
-		/* Vcc, 305.18μV/LSB, 2.5V offset */
-		*result = (ltc2990_voltage_to_int(val) * 30518 /
-			   (4 * 100 * 1000)) + 2500;
+	case LTC2990_IN0:
+		/* Vcc, 305.18uV/LSB, 2.5V offset */
+		*result = sign_extend32(val, 14) * 30518 / (100 * 1000) + 2500;
+		break;
+	case LTC2990_IN1:
+	case LTC2990_IN2:
+	case LTC2990_IN3:
+	case LTC2990_IN4:
+		/* Vx, 305.18uV/LSB */
+		*result = sign_extend32(val, 14) * 30518 / (100 * 1000);
 		break;
 	default:
 		return -EINVAL; /* won't happen, keep compiler happy */
@@ -79,48 +140,117 @@
 				  struct device_attribute *da, char *buf)
 {
 	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+	struct ltc2990_data *data = dev_get_drvdata(dev);
 	int value;
 	int ret;
 
-	ret = ltc2990_get_value(dev_get_drvdata(dev), attr->index, &value);
+	ret = ltc2990_get_value(data->i2c, attr->index, &value);
 	if (unlikely(ret < 0))
 		return ret;
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", value);
 }
 
+static umode_t ltc2990_attrs_visible(struct kobject *kobj,
+				     struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct ltc2990_data *data = dev_get_drvdata(dev);
+	struct device_attribute *da =
+			container_of(a, struct device_attribute, attr);
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+
+	int attrs_mask = LTC2990_IN0 | LTC2990_TEMP1 |
+			 (ltc2990_attrs_ena_0[data->mode[0]] &
+			  ltc2990_attrs_ena_1[data->mode[1]]);
+
+	if (attr->index & attrs_mask)
+		return a->mode;
+
+	return 0;
+}
+
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, ltc2990_show_value, NULL,
-			  LTC2990_TINT_MSB);
+			  LTC2990_TEMP1);
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, ltc2990_show_value, NULL,
+			  LTC2990_TEMP2);
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, ltc2990_show_value, NULL,
+			  LTC2990_TEMP3);
 static SENSOR_DEVICE_ATTR(curr1_input, S_IRUGO, ltc2990_show_value, NULL,
-			  LTC2990_V1_MSB);
+			  LTC2990_CURR1);
 static SENSOR_DEVICE_ATTR(curr2_input, S_IRUGO, ltc2990_show_value, NULL,
-			  LTC2990_V3_MSB);
+			  LTC2990_CURR2);
 static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, ltc2990_show_value, NULL,
-			  LTC2990_VCC_MSB);
+			  LTC2990_IN0);
+static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, ltc2990_show_value, NULL,
+			  LTC2990_IN1);
+static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, ltc2990_show_value, NULL,
+			  LTC2990_IN2);
+static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO, ltc2990_show_value, NULL,
+			  LTC2990_IN3);
+static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO, ltc2990_show_value, NULL,
+			  LTC2990_IN4);
 
 static struct attribute *ltc2990_attrs[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	&sensor_dev_attr_temp2_input.dev_attr.attr,
+	&sensor_dev_attr_temp3_input.dev_attr.attr,
 	&sensor_dev_attr_curr1_input.dev_attr.attr,
 	&sensor_dev_attr_curr2_input.dev_attr.attr,
 	&sensor_dev_attr_in0_input.dev_attr.attr,
+	&sensor_dev_attr_in1_input.dev_attr.attr,
+	&sensor_dev_attr_in2_input.dev_attr.attr,
+	&sensor_dev_attr_in3_input.dev_attr.attr,
+	&sensor_dev_attr_in4_input.dev_attr.attr,
 	NULL,
 };
-ATTRIBUTE_GROUPS(ltc2990);
+
+static const struct attribute_group ltc2990_group = {
+	.attrs = ltc2990_attrs,
+	.is_visible = ltc2990_attrs_visible,
+};
+__ATTRIBUTE_GROUPS(ltc2990);
 
 static int ltc2990_i2c_probe(struct i2c_client *i2c,
 			     const struct i2c_device_id *id)
 {
 	int ret;
 	struct device *hwmon_dev;
+	struct ltc2990_data *data;
+	struct device_node *of_node = i2c->dev.of_node;
 
 	if (!i2c_check_functionality(i2c->adapter, I2C_FUNC_SMBUS_BYTE_DATA |
 				     I2C_FUNC_SMBUS_WORD_DATA))
 		return -ENODEV;
 
-	/* Setup continuous mode, current monitor */
+	data = devm_kzalloc(&i2c->dev, sizeof(struct ltc2990_data), GFP_KERNEL);
+	if (unlikely(!data))
+		return -ENOMEM;
+
+	data->i2c = i2c;
+
+	if (of_node) {
+		ret = of_property_read_u32_array(of_node, "lltc,meas-mode",
+						 data->mode, 2);
+		if (ret < 0)
+			return ret;
+
+		if (data->mode[0] & ~LTC2990_MODE0_MASK ||
+		    data->mode[1] & ~LTC2990_MODE1_MASK)
+			return -EINVAL;
+	} else {
+		ret = i2c_smbus_read_byte_data(i2c, LTC2990_CONTROL);
+		if (ret < 0)
+			return ret;
+
+		data->mode[0] = ret >> LTC2990_MODE0_SHIFT & LTC2990_MODE0_MASK;
+		data->mode[1] = ret >> LTC2990_MODE1_SHIFT & LTC2990_MODE1_MASK;
+	}
+
+	/* Setup continuous mode */
 	ret = i2c_smbus_write_byte_data(i2c, LTC2990_CONTROL,
-					LTC2990_CONTROL_MEASURE_ALL |
-					LTC2990_CONTROL_MODE_CURRENT);
+					data->mode[0] << LTC2990_MODE0_SHIFT |
+					data->mode[1] << LTC2990_MODE1_SHIFT);
 	if (ret < 0) {
 		dev_err(&i2c->dev, "Error: Failed to set control mode.\n");
 		return ret;
@@ -134,7 +264,7 @@
 
 	hwmon_dev = devm_hwmon_device_register_with_groups(&i2c->dev,
 							   i2c->name,
-							   i2c,
+							   data,
 							   ltc2990_groups);
 
 	return PTR_ERR_OR_ZERO(hwmon_dev);
diff --git a/drivers/hwmon/mc13783-adc.c b/drivers/hwmon/mc13783-adc.c
index 960a1db..67860ad 100644
--- a/drivers/hwmon/mc13783-adc.c
+++ b/drivers/hwmon/mc13783-adc.c
@@ -63,6 +63,10 @@
 	if (ret)
 		return ret;
 
+	/* ADIN7 subchannels */
+	if (channel >= 16)
+		channel = 7;
+
 	channel &= 0x7;
 
 	*val = (sample[channel % 4] >> (channel > 3 ? 14 : 2)) & 0x3ff;
@@ -111,6 +115,57 @@
 	return sprintf(buf, "%u\n", val);
 }
 
+static ssize_t mc13783_adc_read_uid(struct device *dev,
+		struct device_attribute *devattr, char *buf)
+{
+	unsigned int val;
+	struct platform_device *pdev = to_platform_device(dev);
+	kernel_ulong_t driver_data = platform_get_device_id(pdev)->driver_data;
+	int ret = mc13783_adc_read(dev, devattr, &val);
+
+	if (ret)
+		return ret;
+
+	if (driver_data & MC13783_ADC_BPDIV2)
+		/* MC13892 have 1/2 divider, input range is [0, 4.800V] */
+		val = DIV_ROUND_CLOSEST(val * 4800, 1024);
+	else
+		/* MC13783 have 0.9 divider, input range is [0, 2.555V] */
+		val = DIV_ROUND_CLOSEST(val * 2555, 1024);
+
+	return sprintf(buf, "%u\n", val);
+}
+
+static ssize_t mc13783_adc_read_temp(struct device *dev,
+		struct device_attribute *devattr, char *buf)
+{
+	unsigned int val;
+	struct platform_device *pdev = to_platform_device(dev);
+	kernel_ulong_t driver_data = platform_get_device_id(pdev)->driver_data;
+	int ret = mc13783_adc_read(dev, devattr, &val);
+
+	if (ret)
+		return ret;
+
+	if (driver_data & MC13783_ADC_BPDIV2) {
+		/*
+		 * MC13892:
+		 * Die Temperature Read Out Code at 25C 680
+		 * Temperature change per LSB +0.4244C
+		 */
+		ret = DIV_ROUND_CLOSEST(-2635920 + val * 4244, 10);
+	} else {
+		/*
+		 * MC13783:
+		 * Die Temperature Read Out Code at 25C 282
+		 * Temperature change per LSB -1.14C
+		 */
+		ret = 346480 - 1140 * val;
+	}
+
+	return sprintf(buf, "%d\n", ret);
+}
+
 static DEVICE_ATTR_RO(name);
 static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, mc13783_adc_read_bp, NULL, 2);
 static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, mc13783_adc_read_gp, NULL, 5);
@@ -124,6 +179,9 @@
 static SENSOR_DEVICE_ATTR(in13_input, S_IRUGO, mc13783_adc_read_gp, NULL, 13);
 static SENSOR_DEVICE_ATTR(in14_input, S_IRUGO, mc13783_adc_read_gp, NULL, 14);
 static SENSOR_DEVICE_ATTR(in15_input, S_IRUGO, mc13783_adc_read_gp, NULL, 15);
+static SENSOR_DEVICE_ATTR(in16_input, S_IRUGO, mc13783_adc_read_uid, NULL, 16);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
+			  mc13783_adc_read_temp, NULL, 17);
 
 static struct attribute *mc13783_attr_base[] = {
 	&dev_attr_name.attr,
@@ -131,6 +189,8 @@
 	&sensor_dev_attr_in5_input.dev_attr.attr,
 	&sensor_dev_attr_in6_input.dev_attr.attr,
 	&sensor_dev_attr_in7_input.dev_attr.attr,
+	&sensor_dev_attr_in16_input.dev_attr.attr,
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	NULL
 };
 
diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c
index ad2f8ca..d897e52 100644
--- a/drivers/hwspinlock/omap_hwspinlock.c
+++ b/drivers/hwspinlock/omap_hwspinlock.c
@@ -132,7 +132,7 @@
 
 	num_locks = i * 32; /* actual number of locks in this device */
 
-	bank = kzalloc(sizeof(*bank) + num_locks * sizeof(*hwlock), GFP_KERNEL);
+	bank = kzalloc(struct_size(bank, lock, num_locks), GFP_KERNEL);
 	if (!bank) {
 		ret = -ENOMEM;
 		goto iounmap_base;
diff --git a/drivers/hwspinlock/sirf_hwspinlock.c b/drivers/hwspinlock/sirf_hwspinlock.c
index 1601854..cb38e48 100644
--- a/drivers/hwspinlock/sirf_hwspinlock.c
+++ b/drivers/hwspinlock/sirf_hwspinlock.c
@@ -62,8 +62,10 @@
 	if (!pdev->dev.of_node)
 		return -ENODEV;
 
-	hwspin = devm_kzalloc(&pdev->dev, sizeof(*hwspin) +
-			sizeof(*hwlock) * HW_SPINLOCK_NUMBER, GFP_KERNEL);
+	hwspin = devm_kzalloc(&pdev->dev,
+			      struct_size(hwspin, bank.lock,
+					  HW_SPINLOCK_NUMBER),
+			      GFP_KERNEL);
 	if (!hwspin)
 		return -ENOMEM;
 
diff --git a/drivers/hwspinlock/u8500_hsem.c b/drivers/hwspinlock/u8500_hsem.c
index e93eabb..0128d8f 100644
--- a/drivers/hwspinlock/u8500_hsem.c
+++ b/drivers/hwspinlock/u8500_hsem.c
@@ -119,7 +119,7 @@
 	/* clear all interrupts */
 	writel(0xFFFF, io_base + HSEM_ICRALL);
 
-	bank = kzalloc(sizeof(*bank) + num_locks * sizeof(*hwlock), GFP_KERNEL);
+	bank = kzalloc(struct_size(bank, lock, num_locks), GFP_KERNEL);
 	if (!bank) {
 		ret = -ENOMEM;
 		goto iounmap_base;
diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c
index 9cdb3fb..45b2460 100644
--- a/drivers/hwtracing/coresight/coresight-cpu-debug.c
+++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c
@@ -1,20 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2017 Linaro Limited. All rights reserved.
  *
  * Author: Leo Yan <leo.yan@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- *
  */
 #include <linux/amba/bus.h>
 #include <linux/coresight.h>
@@ -315,7 +303,7 @@
 	}
 
 	pc = debug_adjust_pc(drvdata);
-	dev_emerg(dev, " EDPCSR:  [<%px>] %pS\n", (void *)pc, (void *)pc);
+	dev_emerg(dev, " EDPCSR:  %pS\n", (void *)pc);
 
 	if (drvdata->edcidsr_present)
 		dev_emerg(dev, " EDCIDSR: %08x\n", drvdata->edcidsr);
diff --git a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c
index 043da86..f6d0571 100644
--- a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c
+++ b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2011-2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/amba/bus.h>
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 580cd38..9b6c555 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -1,15 +1,8 @@
-/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
  *
  * Description: CoreSight Embedded Trace Buffer driver
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <asm/local.h>
diff --git a/drivers/hwtracing/coresight/coresight-etm-cp14.c b/drivers/hwtracing/coresight/coresight-etm-cp14.c
index 12a2206..4174a8d 100644
--- a/drivers/hwtracing/coresight/coresight-etm-cp14.c
+++ b/drivers/hwtracing/coresight/coresight-etm-cp14.c
@@ -1,13 +1,6 @@
-/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 4e5ed65..6776956 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/coresight.h>
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
index 3ffc9fe..4197df4 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.h
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _CORESIGHT_ETM_PERF_H
diff --git a/drivers/hwtracing/coresight/coresight-etm.h b/drivers/hwtracing/coresight/coresight-etm.h
index 70b0a24..e8b4549 100644
--- a/drivers/hwtracing/coresight/coresight-etm.h
+++ b/drivers/hwtracing/coresight/coresight-etm.h
@@ -1,13 +1,6 @@
-/* Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
  */
 
 #ifndef _CORESIGHT_CORESIGHT_ETM_H
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
index 6e547ec..9435c14 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/pm_runtime.h>
diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index 39f42fd..15ed64d 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -1,15 +1,8 @@
-/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
  *
  * Description: CoreSight Program Flow Trace driver
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index d219617..4eb8da7 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/pm_runtime.h>
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index cf364a5..9bc04c5 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -1,13 +1,6 @@
-/* Copyright (c) 2014, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index b3b5ea7..b7c4a6f 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -1,13 +1,6 @@
-/* Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
  */
 
 #ifndef _CORESIGHT_CORESIGHT_ETM_H
diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c
index 9f8ac0be..448145a 100644
--- a/drivers/hwtracing/coresight/coresight-funnel.c
+++ b/drivers/hwtracing/coresight/coresight-funnel.c
@@ -1,15 +1,8 @@
-/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
  *
  * Description: CoreSight Funnel driver
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index f1d0e21d..0e5a74d 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -1,13 +1,6 @@
-/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
  */
 
 #ifndef _CORESIGHT_PRIV_H
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
index 3756e71..8d2eaaa 100644
--- a/drivers/hwtracing/coresight/coresight-replicator.c
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -1,15 +1,8 @@
-/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
  *
  * Description: CoreSight Replicator driver
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index 15e7ef38..c46c70a 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -1,16 +1,9 @@
-/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
  *
  * Description: CoreSight System Trace Macrocell driver
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Initial implementation by Pratik Patel
  * (C) 2014-2015 Pratik Patel <pratikp@codeaurora.org>
  *
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index e2513b7..61d849b 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright(C) 2016 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/circ_buf.h>
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 68fbc8f..02f747a 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright(C) 2016 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/coresight.h>
@@ -124,10 +113,9 @@
 	bool used = false;
 	unsigned long flags;
 	void __iomem *vaddr = NULL;
-	dma_addr_t paddr;
+	dma_addr_t paddr = 0;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
-
 	/*
 	 * If we don't have a buffer release the lock and allocate memory.
 	 * Otherwise keep the lock and move along.
@@ -164,11 +152,11 @@
 		goto out;
 
 	/*
-	 * If drvdata::buf == NULL, use the memory allocated above.
+	 * If drvdata::vaddr == NULL, use the memory allocated above.
 	 * Otherwise a buffer still exists from a previous session, so
 	 * simply use that.
 	 */
-	if (drvdata->buf == NULL) {
+	if (drvdata->vaddr == NULL) {
 		used = true;
 		drvdata->vaddr = vaddr;
 		drvdata->paddr = paddr;
diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
index 0ea04f5..456f122 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.c
+++ b/drivers/hwtracing/coresight/coresight-tmc.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2012, The Linux Foundation. All rights reserved.
  *
  * Description: CoreSight Trace Memory Controller driver
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
index 8df7a81..dfaff07 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.h
+++ b/drivers/hwtracing/coresight/coresight-tmc.h
@@ -1,18 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright(C) 2015 Linaro Limited. All rights reserved.
  * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _CORESIGHT_TMC_H
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index 805f7c2..01b7457 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -1,15 +1,8 @@
-/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
  *
  * Description: CoreSight Trace Port Interface Unit driver
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 389c4ba..29e834a 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -1,13 +1,6 @@
-/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
  */
 
 #include <linux/kernel.h>
@@ -1026,8 +1019,10 @@
 	dev_set_name(&csdev->dev, "%s", desc->pdata->name);
 
 	ret = device_register(&csdev->dev);
-	if (ret)
-		goto err_device_register;
+	if (ret) {
+		put_device(&csdev->dev);
+		goto err_kzalloc_csdev;
+	}
 
 	mutex_lock(&coresight_mutex);
 
@@ -1038,8 +1033,6 @@
 
 	return csdev;
 
-err_device_register:
-	kfree(conns);
 err_kzalloc_conns:
 	kfree(refcnts);
 err_kzalloc_refcnts:
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index 7c37544..a33a92e 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -1,13 +1,6 @@
-/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
  */
 
 #include <linux/types.h>
diff --git a/drivers/hwtracing/stm/ftrace.c b/drivers/hwtracing/stm/ftrace.c
index 7da7564..ce868e0 100644
--- a/drivers/hwtracing/stm/ftrace.c
+++ b/drivers/hwtracing/stm/ftrace.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Simple kernel driver to link kernel Ftrace and an STM device
  * Copyright (c) 2016, Linaro Ltd.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
  * STM Ftrace will be registered as a trace_export.
  */
 
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 8d21b98..fce9f2c 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -202,8 +202,7 @@
 
 	  Note this controller is hooked up to a TI bq24292i charger-IC,
 	  combined with a FUSB302 Type-C port-controller as such it is advised
-	  to also select CONFIG_CHARGER_BQ24190=m and CONFIG_TYPEC_FUSB302=m
-	  (the fusb302 driver currently is in drivers/staging).
+	  to also select CONFIG_TYPEC_FUSB302=m.
 
 config I2C_NFORCE2
 	tristate "Nvidia nForce2, nForce3 and nForce4"
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 1ba40bb..a17f46a 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -363,7 +363,7 @@
 		goto err_clear_wakeup_irq;
 
 	status = dev_pm_domain_attach(&client->dev, true);
-	if (status == -EPROBE_DEFER)
+	if (status)
 		goto err_clear_wakeup_irq;
 
 	/*
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index 0e6bc63..8b2b72b 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -92,7 +92,7 @@
 	struct request *rq;
 	int error;
 
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	rq->special = (char *)pc;
 
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 5a8e8e3..5f17838 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -437,7 +437,7 @@
 		bool delay = false;
 
 		rq = blk_get_request(drive->queue,
-			write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN,  __GFP_RECLAIM);
+			write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
 		memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
 		ide_req(rq)->type = ATA_PRIV_PC;
 		rq->rq_flags |= rq_flags;
@@ -1426,21 +1426,8 @@
 	return 0;
 }
 
-static int idecd_capacity_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, idecd_capacity_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idecd_capacity_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= idecd_capacity_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static ide_proc_entry_t idecd_proc[] = {
-	{ "capacity", S_IFREG|S_IRUGO, &idecd_capacity_proc_fops },
+	{ "capacity", S_IFREG|S_IRUGO, idecd_capacity_proc_show },
 	{}
 };
 
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 2acca12..b132240 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c
@@ -304,7 +304,7 @@
 	struct request *rq;
 	int ret;
 
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	rq->rq_flags = RQF_QUIET;
 	blk_execute_rq(drive->queue, cd->disk, rq, 0);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 4e20747..f4f8afd 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -166,7 +166,7 @@
 	if (!(setting->flags & DS_SYNC))
 		return setting->set(drive, arg);
 
-	rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
+	rq = blk_get_request(q, REQ_OP_DRV_IN, 0);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd_len = 5;
 	scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index f1a7c58..e3b4e65 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -478,7 +478,7 @@
 	if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
 		return -EBUSY;
 
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
 	ide_req(rq)->type = ATA_PRIV_TASKFILE;
 
 	drive->mult_req = arg;
diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c
index 82a36ce..95d239b 100644
--- a/drivers/ide/ide-disk_proc.c
+++ b/drivers/ide/ide-disk_proc.c
@@ -52,19 +52,6 @@
 	return 0;
 }
 
-static int idedisk_cache_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, idedisk_cache_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idedisk_cache_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= idedisk_cache_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int idedisk_capacity_proc_show(struct seq_file *m, void *v)
 {
 	ide_drive_t*drive = (ide_drive_t *)m->private;
@@ -73,19 +60,6 @@
 	return 0;
 }
 
-static int idedisk_capacity_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, idedisk_capacity_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idedisk_capacity_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= idedisk_capacity_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __idedisk_proc_show(struct seq_file *m, ide_drive_t *drive, u8 sub_cmd)
 {
 	u8 *buf;
@@ -114,43 +88,17 @@
 	return __idedisk_proc_show(m, m->private, ATA_SMART_READ_VALUES);
 }
 
-static int idedisk_sv_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, idedisk_sv_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idedisk_sv_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= idedisk_sv_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int idedisk_st_proc_show(struct seq_file *m, void *v)
 {
 	return __idedisk_proc_show(m, m->private, ATA_SMART_READ_THRESHOLDS);
 }
 
-static int idedisk_st_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, idedisk_st_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idedisk_st_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= idedisk_st_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 ide_proc_entry_t ide_disk_proc[] = {
-	{ "cache",	  S_IFREG|S_IRUGO, &idedisk_cache_proc_fops	},
-	{ "capacity",	  S_IFREG|S_IRUGO, &idedisk_capacity_proc_fops	},
-	{ "geometry",	  S_IFREG|S_IRUGO, &ide_geometry_proc_fops	},
-	{ "smart_values", S_IFREG|S_IRUSR, &idedisk_sv_proc_fops	},
-	{ "smart_thresholds", S_IFREG|S_IRUSR, &idedisk_st_proc_fops	},
+	{ "cache",	  S_IFREG|S_IRUGO, idedisk_cache_proc_show	},
+	{ "capacity",	  S_IFREG|S_IRUGO, idedisk_capacity_proc_show	},
+	{ "geometry",	  S_IFREG|S_IRUGO, ide_geometry_proc_show	},
+	{ "smart_values", S_IFREG|S_IRUSR, idedisk_sv_proc_show		},
+	{ "smart_thresholds", S_IFREG|S_IRUSR, idedisk_st_proc_show	},
 	{}
 };
 
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 54d4d78..6f34465 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -180,7 +180,6 @@
 void ide_dma_off_quietly(ide_drive_t *drive)
 {
 	drive->dev_flags &= ~IDE_DFLAG_USING_DMA;
-	ide_toggle_bounce(drive, 0);
 
 	drive->hwif->dma_ops->dma_host_set(drive, 0);
 }
@@ -211,7 +210,6 @@
 void ide_dma_on(ide_drive_t *drive)
 {
 	drive->dev_flags |= IDE_DFLAG_USING_DMA;
-	ide_toggle_bounce(drive, 1);
 
 	drive->hwif->dma_ops->dma_host_set(drive, 1);
 }
diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c
index 471457e..7f697ddb 100644
--- a/drivers/ide/ide-floppy_proc.c
+++ b/drivers/ide/ide-floppy_proc.c
@@ -14,22 +14,9 @@
 	return 0;
 }
 
-static int idefloppy_capacity_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, idefloppy_capacity_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idefloppy_capacity_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= idefloppy_capacity_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 ide_proc_entry_t ide_floppy_proc[] = {
-	{ "capacity",	S_IFREG|S_IRUGO, &idefloppy_capacity_proc_fops	},
-	{ "geometry",	S_IFREG|S_IRUGO, &ide_geometry_proc_fops	},
+	{ "capacity",	S_IFREG|S_IRUGO, idefloppy_capacity_proc_show	},
+	{ "geometry",	S_IFREG|S_IRUGO, ide_geometry_proc_show		},
 	{}
 };
 
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index 3661abb..af5119a 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -125,7 +125,7 @@
 	if (NULL == (void *) arg) {
 		struct request *rq;
 
-		rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+		rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
 		ide_req(rq)->type = ATA_PRIV_TASKFILE;
 		blk_execute_rq(drive->queue, NULL, rq, 0);
 		err = scsi_req(rq)->result ? -EIO : 0;
@@ -222,7 +222,7 @@
 	struct request *rq;
 	int ret = 0;
 
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd_len = 1;
 	scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
index e1180fa..78cb79e 100644
--- a/drivers/ide/ide-lib.c
+++ b/drivers/ide/ide-lib.c
@@ -6,32 +6,6 @@
 #include <linux/ide.h>
 #include <linux/bitops.h>
 
-/**
- *	ide_toggle_bounce	-	handle bounce buffering
- *	@drive: drive to update
- *	@on: on/off boolean
- *
- *	Enable or disable bounce buffering for the device. Drives move
- *	between PIO and DMA and that changes the rules we need.
- */
-
-void ide_toggle_bounce(ide_drive_t *drive, int on)
-{
-	u64 addr = BLK_BOUNCE_HIGH;	/* dma64_addr_t */
-
-	if (!PCI_DMA_BUS_IS_PHYS) {
-		addr = BLK_BOUNCE_ANY;
-	} else if (on && drive->media == ide_disk) {
-		struct device *dev = drive->hwif->dev;
-
-		if (dev && dev->dma_mask)
-			addr = *dev->dma_mask;
-	}
-
-	if (drive->queue)
-		blk_queue_bounce_limit(drive->queue, addr);
-}
-
 u64 ide_get_lba_addr(struct ide_cmd *cmd, int lba48)
 {
 	struct ide_taskfile *tf = &cmd->tf;
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 6465bcc..622f0ed 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -32,7 +32,7 @@
 	}
 	spin_unlock_irq(&hwif->lock);
 
-	rq = blk_get_request(q, REQ_OP_DRV_IN, __GFP_RECLAIM);
+	rq = blk_get_request(q, REQ_OP_DRV_IN, 0);
 	scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
 	scsi_req(rq)->cmd_len = 1;
 	ide_req(rq)->type = ATA_PRIV_MISC;
@@ -47,7 +47,7 @@
 	 * Make sure that *some* command is sent to the drive after the
 	 * timeout has expired, so power management will be reenabled.
 	 */
-	rq = blk_get_request(q, REQ_OP_DRV_IN, GFP_NOWAIT);
+	rq = blk_get_request(q, REQ_OP_DRV_IN, BLK_MQ_REQ_NOWAIT);
 	if (IS_ERR(rq))
 		goto out;
 
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index ad8a125..59217aa 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -19,7 +19,7 @@
 	}
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
 	ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
 	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_SUSPEND;
@@ -90,8 +90,7 @@
 	}
 
 	memset(&rqpm, 0, sizeof(rqpm));
-	rq = blk_get_request_flags(drive->queue, REQ_OP_DRV_IN,
-				   BLK_MQ_REQ_PREEMPT);
+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT);
 	ide_req(rq)->type = ATA_PRIV_PM_RESUME;
 	rq->special = &rqpm;
 	rqpm.pm_step = IDE_PM_START_RESUME;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 2019e66..56d7bc2 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -796,8 +796,7 @@
 	 * This will be fixed once we teach pci_map_sg() about our boundary
 	 * requirements, hopefully soon. *FIXME*
 	 */
-	if (!PCI_DMA_BUS_IS_PHYS)
-		max_sg_entries >>= 1;
+	max_sg_entries >>= 1;
 #endif /* CONFIG_PCI */
 
 	blk_queue_max_segments(q, max_sg_entries);
@@ -805,9 +804,6 @@
 	/* assign drive queue */
 	drive->queue = q;
 
-	/* needs drive->queue to be set */
-	ide_toggle_bounce(drive, 1);
-
 	return 0;
 }
 
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 863db44..45c9974 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -56,19 +56,6 @@
 	return 0;
 }
 
-static int ide_imodel_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ide_imodel_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ide_imodel_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ide_imodel_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int ide_mate_proc_show(struct seq_file *m, void *v)
 {
 	ide_hwif_t	*hwif = (ide_hwif_t *) m->private;
@@ -80,19 +67,6 @@
 	return 0;
 }
 
-static int ide_mate_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ide_mate_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ide_mate_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ide_mate_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int ide_channel_proc_show(struct seq_file *m, void *v)
 {
 	ide_hwif_t	*hwif = (ide_hwif_t *) m->private;
@@ -101,19 +75,6 @@
 	return 0;
 }
 
-static int ide_channel_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ide_channel_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ide_channel_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ide_channel_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int ide_identify_proc_show(struct seq_file *m, void *v)
 {
 	ide_drive_t *drive = (ide_drive_t *)m->private;
@@ -141,19 +102,6 @@
 	return 0;
 }
 
-static int ide_identify_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ide_identify_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ide_identify_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ide_identify_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /**
  *	ide_find_setting	-	find a specific setting
  *	@st: setting table pointer
@@ -441,27 +389,14 @@
 	.write		= ide_settings_proc_write,
 };
 
-static int ide_capacity_proc_show(struct seq_file *m, void *v)
+int ide_capacity_proc_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "%llu\n", (long long)0x7fffffff);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ide_capacity_proc_show);
 
-static int ide_capacity_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ide_capacity_proc_show, NULL);
-}
-
-const struct file_operations ide_capacity_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ide_capacity_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-EXPORT_SYMBOL_GPL(ide_capacity_proc_fops);
-
-static int ide_geometry_proc_show(struct seq_file *m, void *v)
+int ide_geometry_proc_show(struct seq_file *m, void *v)
 {
 	ide_drive_t	*drive = (ide_drive_t *) m->private;
 
@@ -471,20 +406,7 @@
 			drive->bios_cyl, drive->bios_head, drive->bios_sect);
 	return 0;
 }
-
-static int ide_geometry_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ide_geometry_proc_show, PDE_DATA(inode));
-}
-
-const struct file_operations ide_geometry_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ide_geometry_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-EXPORT_SYMBOL(ide_geometry_proc_fops);
+EXPORT_SYMBOL(ide_geometry_proc_show);
 
 static int ide_dmodel_proc_show(struct seq_file *seq, void *v)
 {
@@ -495,19 +417,6 @@
 	return 0;
 }
 
-static int ide_dmodel_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ide_dmodel_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ide_dmodel_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ide_dmodel_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int ide_driver_proc_show(struct seq_file *m, void *v)
 {
 	ide_drive_t		*drive = (ide_drive_t *)m->private;
@@ -523,65 +432,6 @@
 	return 0;
 }
 
-static int ide_driver_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ide_driver_proc_show, PDE_DATA(inode));
-}
-
-static int ide_replace_subdriver(ide_drive_t *drive, const char *driver)
-{
-	struct device *dev = &drive->gendev;
-	int ret = 1;
-	int err;
-
-	device_release_driver(dev);
-	/* FIXME: device can still be in use by previous driver */
-	strlcpy(drive->driver_req, driver, sizeof(drive->driver_req));
-	err = device_attach(dev);
-	if (err < 0)
-		printk(KERN_WARNING "IDE: %s: device_attach error: %d\n",
-			__func__, err);
-	drive->driver_req[0] = 0;
-	if (dev->driver == NULL) {
-		err = device_attach(dev);
-		if (err < 0)
-			printk(KERN_WARNING
-				"IDE: %s: device_attach(2) error: %d\n",
-				__func__, err);
-	}
-	if (dev->driver && !strcmp(dev->driver->name, driver))
-		ret = 0;
-
-	return ret;
-}
-
-static ssize_t ide_driver_proc_write(struct file *file, const char __user *buffer,
-				     size_t count, loff_t *pos)
-{
-	ide_drive_t	*drive = PDE_DATA(file_inode(file));
-	char name[32];
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-	if (count > 31)
-		count = 31;
-	if (copy_from_user(name, buffer, count))
-		return -EFAULT;
-	name[count] = '\0';
-	if (ide_replace_subdriver(drive, name))
-		return -EINVAL;
-	return count;
-}
-
-static const struct file_operations ide_driver_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ide_driver_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= ide_driver_proc_write,
-};
-
 static int ide_media_proc_show(struct seq_file *m, void *v)
 {
 	ide_drive_t	*drive = (ide_drive_t *) m->private;
@@ -613,11 +463,10 @@
 };
 
 static ide_proc_entry_t generic_drive_entries[] = {
-	{ "driver",	S_IFREG|S_IRUGO,	 &ide_driver_proc_fops	},
-	{ "identify",	S_IFREG|S_IRUSR,	 &ide_identify_proc_fops},
-	{ "media",	S_IFREG|S_IRUGO,	 &ide_media_proc_fops	},
-	{ "model",	S_IFREG|S_IRUGO,	 &ide_dmodel_proc_fops	},
-	{ "settings",	S_IFREG|S_IRUSR|S_IWUSR, &ide_settings_proc_fops},
+	{ "driver",	S_IFREG|S_IRUGO,	 ide_driver_proc_show	},
+	{ "identify",	S_IFREG|S_IRUSR,	 ide_identify_proc_show	},
+	{ "media",	S_IFREG|S_IRUGO,	 ide_media_proc_show	},
+	{ "model",	S_IFREG|S_IRUGO,	 ide_dmodel_proc_show	},
 	{}
 };
 
@@ -628,7 +477,7 @@
 	if (!dir || !p)
 		return;
 	while (p->name != NULL) {
-		ent = proc_create_data(p->name, p->mode, dir, p->proc_fops, data);
+		ent = proc_create_single_data(p->name, p->mode, dir, p->show, data);
 		if (!ent) return;
 		p++;
 	}
@@ -693,8 +542,12 @@
 			continue;
 
 		drive->proc = proc_mkdir(drive->name, parent);
-		if (drive->proc)
+		if (drive->proc) {
 			ide_add_proc_entries(drive->proc, generic_drive_entries, drive);
+			proc_create_data("setting", S_IFREG|S_IRUSR|S_IWUSR,
+					drive->proc, &ide_settings_proc_fops,
+					drive);
+		}
 		sprintf(name, "ide%d/%s", (drive->name[2]-'a')/2, drive->name);
 		ent = proc_symlink(drive->name, proc_ide_root, name);
 		if (!ent) return;
@@ -704,6 +557,7 @@
 void ide_proc_unregister_device(ide_drive_t *drive)
 {
 	if (drive->proc) {
+		remove_proc_entry("settings", drive->proc);
 		ide_remove_proc_entries(drive->proc, generic_drive_entries);
 		remove_proc_entry(drive->name, proc_ide_root);
 		remove_proc_entry(drive->name, drive->hwif->proc);
@@ -712,9 +566,9 @@
 }
 
 static ide_proc_entry_t hwif_entries[] = {
-	{ "channel",	S_IFREG|S_IRUGO,	&ide_channel_proc_fops	},
-	{ "mate",	S_IFREG|S_IRUGO,	&ide_mate_proc_fops	},
-	{ "model",	S_IFREG|S_IRUGO,	&ide_imodel_proc_fops	},
+	{ "channel",	S_IFREG|S_IRUGO,	ide_channel_proc_show	},
+	{ "mate",	S_IFREG|S_IRUGO,	ide_mate_proc_show	},
+	{ "model",	S_IFREG|S_IRUGO,	ide_imodel_proc_show	},
 	{}
 };
 
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index fd57e8c..aee7b46 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -854,7 +854,7 @@
 	BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
 	BUG_ON(size < 0 || size % tape->blk_size);
 
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
 	ide_req(rq)->type = ATA_PRIV_MISC;
 	scsi_req(rq)->cmd[13] = cmd;
 	rq->rq_disk = tape->disk;
@@ -862,7 +862,7 @@
 
 	if (size) {
 		ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
-				      __GFP_RECLAIM);
+				      GFP_NOIO);
 		if (ret)
 			goto out_put;
 	}
@@ -1847,22 +1847,9 @@
 	return 0;
 }
 
-static int idetape_name_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, idetape_name_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations idetape_name_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= idetape_name_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static ide_proc_entry_t idetape_proc[] = {
-	{ "capacity",	S_IFREG|S_IRUGO,	&ide_capacity_proc_fops	},
-	{ "name",	S_IFREG|S_IRUGO,	&idetape_name_proc_fops	},
+	{ "capacity",	S_IFREG|S_IRUGO,	ide_capacity_proc_show	},
+	{ "name",	S_IFREG|S_IRUGO,	idetape_name_proc_show	},
 	{}
 };
 
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index abe0822..c034cd9 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -431,7 +431,7 @@
 
 	rq = blk_get_request(drive->queue,
 		(cmd->tf_flags & IDE_TFLAG_WRITE) ?
-			REQ_OP_DRV_OUT : REQ_OP_DRV_IN, __GFP_RECLAIM);
+			REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
 	ide_req(rq)->type = ATA_PRIV_TASKFILE;
 
 	/*
@@ -442,7 +442,7 @@
 	 */
 	if (nsect) {
 		error = blk_rq_map_kern(drive->queue, rq, buf,
-					nsect * SECTOR_SIZE, __GFP_RECLAIM);
+					nsect * SECTOR_SIZE, GFP_NOIO);
 		if (error)
 			goto put_req;
 	}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index ecc55e9..3330d97 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1157,8 +1157,9 @@
 			goto err;
 	}
 
-	pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
-			     sizeof *pkey_cache->table, GFP_KERNEL);
+	pkey_cache = kmalloc(struct_size(pkey_cache, table,
+					 tprops->pkey_tbl_len),
+			     GFP_KERNEL);
 	if (!pkey_cache)
 		goto err;
 
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index a92e1a5..36a4d90 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -4298,8 +4298,8 @@
 	int count = 0;
 	u8 i;
 
-	cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
-			 ib_device->phys_port_cnt, GFP_KERNEL);
+	cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
+			 GFP_KERNEL);
 	if (!cm_dev)
 		return;
 
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 4eb72ff..6c48f41 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -813,7 +813,7 @@
 	int i;
 	int count = 0;
 
-	dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
+	dev = kmalloc(struct_size(dev, port, device->phys_port_cnt),
 		      GFP_KERNEL);
 	if (!dev)
 		return;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 21a887c9..e3662a8e 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2756,8 +2756,8 @@
 	struct ib_uflow_resources *resources;
 
 	resources =
-		kmalloc(sizeof(*resources) +
-			num_specs * sizeof(*resources->collection), GFP_KERNEL);
+		kmalloc(struct_size(resources, collection, num_specs),
+			GFP_KERNEL);
 
 	if (!resources)
 		return NULL;
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
index 0f88a19..6ceb672 100644
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ b/drivers/infiniband/core/uverbs_ioctl_merge.c
@@ -297,8 +297,7 @@
 	if (max_attr_buckets >= 0)
 		num_attr_buckets = max_attr_buckets + 1;
 
-	method = kzalloc(sizeof(*method) +
-			 num_attr_buckets * sizeof(*method->attr_buckets),
+	method = kzalloc(struct_size(method, attr_buckets, num_attr_buckets),
 			 GFP_KERNEL);
 	if (!method)
 		return ERR_PTR(-ENOMEM);
@@ -446,9 +445,9 @@
 	if (max_method_buckets >= 0)
 		num_method_buckets = max_method_buckets + 1;
 
-	object = kzalloc(sizeof(*object) +
-			 num_method_buckets *
-			 sizeof(*object->method_buckets), GFP_KERNEL);
+	object = kzalloc(struct_size(object, method_buckets,
+				     num_method_buckets),
+			 GFP_KERNEL);
 	if (!object)
 		return ERR_PTR(-ENOMEM);
 
@@ -469,8 +468,8 @@
 		if (methods_max_bucket < 0)
 			continue;
 
-		hash = kzalloc(sizeof(*hash) +
-			       sizeof(*hash->methods) * (methods_max_bucket + 1),
+		hash = kzalloc(struct_size(hash, methods,
+					   methods_max_bucket + 1),
 			       GFP_KERNEL);
 		if (!hash) {
 			res = -ENOMEM;
@@ -579,8 +578,8 @@
 	if (max_object_buckets >= 0)
 		num_objects_buckets = max_object_buckets + 1;
 
-	root_spec = kzalloc(sizeof(*root_spec) +
-			    num_objects_buckets * sizeof(*root_spec->object_buckets),
+	root_spec = kzalloc(struct_size(root_spec, object_buckets,
+					num_objects_buckets),
 			    GFP_KERNEL);
 	if (!root_spec)
 		return ERR_PTR(-ENOMEM);
@@ -603,8 +602,8 @@
 		if (objects_max_bucket < 0)
 			continue;
 
-		hash = kzalloc(sizeof(*hash) +
-			       sizeof(*hash->objects) * (objects_max_bucket + 1),
+		hash = kzalloc(struct_size(hash, objects,
+					   objects_max_bucket + 1),
 			       GFP_KERNEL);
 		if (!hash) {
 			res = -ENOMEM;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 2fe503e..7a31be3 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -367,7 +367,7 @@
 	obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size;
 	num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
 
-	table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
+	table = kmalloc(struct_size(table, icm, num_icm), GFP_KERNEL);
 	if (!table)
 		return NULL;
 
@@ -529,7 +529,7 @@
 		return NULL;
 
 	npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
-	db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
+	db_tab = kmalloc(struct_size(db_tab, page, npages), GFP_KERNEL);
 	if (!db_tab)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index cc429b5..49c9541 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -283,7 +283,7 @@
 
 	/* Allocate struct plus pointers to first level page tables. */
 	m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
-	mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
+	mr = kzalloc(struct_size(mr, mr.map, m), GFP_KERNEL);
 	if (!mr)
 		goto bail;
 
@@ -730,7 +730,7 @@
 
 	/* Allocate struct plus pointers to first level page tables. */
 	m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ;
-	fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
+	fmr = kzalloc(struct_size(fmr, mr.map, m), GFP_KERNEL);
 	if (!fmr)
 		goto bail;
 
diff --git a/drivers/input/input-leds.c b/drivers/input/input-leds.c
index 5f04b2d..99cc784 100644
--- a/drivers/input/input-leds.c
+++ b/drivers/input/input-leds.c
@@ -98,8 +98,7 @@
 	if (!num_leds)
 		return -ENXIO;
 
-	leds = kzalloc(sizeof(*leds) + num_leds * sizeof(*leds->leds),
-		       GFP_KERNEL);
+	leds = kzalloc(struct_size(leds, leds, num_leds), GFP_KERNEL);
 	if (!leds)
 		return -ENOMEM;
 
diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c
index a1bbec9..cf30523 100644
--- a/drivers/input/input-mt.c
+++ b/drivers/input/input-mt.c
@@ -49,7 +49,7 @@
 	if (mt)
 		return mt->num_slots != num_slots ? -EINVAL : 0;
 
-	mt = kzalloc(sizeof(*mt) + num_slots * sizeof(*mt->slots), GFP_KERNEL);
+	mt = kzalloc(struct_size(mt, slots, num_slots), GFP_KERNEL);
 	if (!mt)
 		goto err_mem;
 
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 9785546..6365c19 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1943,8 +1943,7 @@
 		break;
 
 	default:
-		pr_err("input_set_capability: unknown type %u (code %u)\n",
-		       type, code);
+		pr_err("%s: unknown type %u (code %u)\n", __func__, type, code);
 		dump_stack();
 		return;
 	}
diff --git a/drivers/input/joystick/as5011.c b/drivers/input/joystick/as5011.c
index 005d852..f051993 100644
--- a/drivers/input/joystick/as5011.c
+++ b/drivers/input/joystick/as5011.c
@@ -269,9 +269,7 @@
 	input_dev->id.bustype = BUS_I2C;
 	input_dev->dev.parent = &client->dev;
 
-	__set_bit(EV_KEY, input_dev->evbit);
-	__set_bit(EV_ABS, input_dev->evbit);
-	__set_bit(BTN_JOYSTICK, input_dev->keybit);
+	input_set_capability(input_dev, EV_KEY, BTN_JOYSTICK);
 
 	input_set_abs_params(input_dev, ABS_X,
 		AS5011_MIN_AXIS, AS5011_MAX_AXIS, AS5011_FUZZ, AS5011_FLAT);
diff --git a/drivers/input/joystick/gamecon.c b/drivers/input/joystick/gamecon.c
index 2ffb2e8..4e10ffd 100644
--- a/drivers/input/joystick/gamecon.c
+++ b/drivers/input/joystick/gamecon.c
@@ -862,7 +862,7 @@
 
 	case GC_N64:
 		for (i = 0; i < 10; i++)
-			__set_bit(gc_n64_btn[i], input_dev->keybit);
+			input_set_capability(input_dev, EV_KEY, gc_n64_btn[i]);
 
 		for (i = 0; i < 2; i++) {
 			input_set_abs_params(input_dev, ABS_X + i, -127, 126, 0, 2);
@@ -879,26 +879,27 @@
 		break;
 
 	case GC_SNESMOUSE:
-		__set_bit(BTN_LEFT, input_dev->keybit);
-		__set_bit(BTN_RIGHT, input_dev->keybit);
-		__set_bit(REL_X, input_dev->relbit);
-		__set_bit(REL_Y, input_dev->relbit);
+		input_set_capability(input_dev, EV_KEY, BTN_LEFT);
+		input_set_capability(input_dev, EV_KEY, BTN_RIGHT);
+		input_set_capability(input_dev, EV_REL, REL_X);
+		input_set_capability(input_dev, EV_REL, REL_Y);
 		break;
 
 	case GC_SNES:
 		for (i = 4; i < 8; i++)
-			__set_bit(gc_snes_btn[i], input_dev->keybit);
+			input_set_capability(input_dev, EV_KEY, gc_snes_btn[i]);
 		/* fall through */
 	case GC_NES:
 		for (i = 0; i < 4; i++)
-			__set_bit(gc_snes_btn[i], input_dev->keybit);
+			input_set_capability(input_dev, EV_KEY, gc_snes_btn[i]);
 		break;
 
 	case GC_MULTI2:
-		__set_bit(BTN_THUMB, input_dev->keybit);
+		input_set_capability(input_dev, EV_KEY, BTN_THUMB);
 		/* fall through */
 	case GC_MULTI:
-		__set_bit(BTN_TRIGGER, input_dev->keybit);
+		input_set_capability(input_dev, EV_KEY, BTN_TRIGGER);
+		/* fall through */
 		break;
 
 	case GC_PSX:
@@ -906,15 +907,17 @@
 			input_set_abs_params(input_dev,
 					     gc_psx_abs[i], 4, 252, 0, 2);
 		for (i = 0; i < 12; i++)
-			__set_bit(gc_psx_btn[i], input_dev->keybit);
+			input_set_capability(input_dev, EV_KEY, gc_psx_btn[i]);
+		break;
 
 		break;
 
 	case GC_DDR:
 		for (i = 0; i < 4; i++)
-			__set_bit(gc_psx_ddr_btn[i], input_dev->keybit);
+			input_set_capability(input_dev, EV_KEY,
+					     gc_psx_ddr_btn[i]);
 		for (i = 0; i < 12; i++)
-			__set_bit(gc_psx_btn[i], input_dev->keybit);
+			input_set_capability(input_dev, EV_KEY, gc_psx_btn[i]);
 
 		break;
 	}
diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 06e9650..48e36ac 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -86,8 +86,10 @@
 
 #define XPAD_PKT_LEN 64
 
-/* xbox d-pads should map to buttons, as is required for DDR pads
-   but we map them to axes when possible to simplify things */
+/*
+ * xbox d-pads should map to buttons, as is required for DDR pads
+ * but we map them to axes when possible to simplify things
+ */
 #define MAP_DPAD_TO_BUTTONS		(1 << 0)
 #define MAP_TRIGGERS_TO_BUTTONS		(1 << 1)
 #define MAP_STICKS_TO_NULL		(1 << 2)
@@ -123,6 +125,7 @@
 	u8 mapping;
 	u8 xtype;
 } xpad_device[] = {
+	{ 0x0079, 0x18d4, "GPD Win 2 Controller", 0, XTYPE_XBOX360 },
 	{ 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX },
 	{ 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX },
 	{ 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
@@ -387,15 +390,15 @@
  * match against vendor id as well. Wired Xbox 360 devices have protocol 1,
  * wireless controllers have protocol 129.
  */
-#define XPAD_XBOX360_VENDOR_PROTOCOL(vend,pr) \
+#define XPAD_XBOX360_VENDOR_PROTOCOL(vend, pr) \
 	.match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_INFO, \
 	.idVendor = (vend), \
 	.bInterfaceClass = USB_CLASS_VENDOR_SPEC, \
 	.bInterfaceSubClass = 93, \
 	.bInterfaceProtocol = (pr)
 #define XPAD_XBOX360_VENDOR(vend) \
-	{ XPAD_XBOX360_VENDOR_PROTOCOL(vend,1) }, \
-	{ XPAD_XBOX360_VENDOR_PROTOCOL(vend,129) }
+	{ XPAD_XBOX360_VENDOR_PROTOCOL((vend), 1) }, \
+	{ XPAD_XBOX360_VENDOR_PROTOCOL((vend), 129) }
 
 /* The Xbox One controller uses subclass 71 and protocol 208. */
 #define XPAD_XBOXONE_VENDOR_PROTOCOL(vend, pr) \
@@ -405,10 +408,11 @@
 	.bInterfaceSubClass = 71, \
 	.bInterfaceProtocol = (pr)
 #define XPAD_XBOXONE_VENDOR(vend) \
-	{ XPAD_XBOXONE_VENDOR_PROTOCOL(vend, 208) }
+	{ XPAD_XBOXONE_VENDOR_PROTOCOL((vend), 208) }
 
 static const struct usb_device_id xpad_table[] = {
 	{ USB_INTERFACE_INFO('X', 'B', 0) },	/* X-Box USB-IF not approved class */
+	XPAD_XBOX360_VENDOR(0x0079),		/* GPD Win 2 Controller */
 	XPAD_XBOX360_VENDOR(0x044f),		/* Thrustmaster X-Box 360 controllers */
 	XPAD_XBOX360_VENDOR(0x045e),		/* Microsoft X-Box 360 controllers */
 	XPAD_XBOXONE_VENDOR(0x045e),		/* Microsoft X-Box One controllers */
@@ -1573,7 +1577,6 @@
 static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs)
 {
 	struct usb_xpad *xpad = input_get_drvdata(input_dev);
-	set_bit(abs, input_dev->absbit);
 
 	switch (abs) {
 	case ABS_X:
@@ -1593,6 +1596,9 @@
 	case ABS_HAT0Y:	/* the d-pad (only if dpad is mapped to axes */
 		input_set_abs_params(input_dev, abs, -1, 1, 0, 0);
 		break;
+	default:
+		input_set_abs_params(input_dev, abs, 0, 0, 0, 0);
+		break;
 	}
 }
 
@@ -1633,10 +1639,7 @@
 		input_dev->close = xpad_close;
 	}
 
-	__set_bit(EV_KEY, input_dev->evbit);
-
 	if (!(xpad->mapping & MAP_STICKS_TO_NULL)) {
-		__set_bit(EV_ABS, input_dev->evbit);
 		/* set up axes */
 		for (i = 0; xpad_abs[i] >= 0; i++)
 			xpad_set_up_abs(input_dev, xpad_abs[i]);
@@ -1644,21 +1647,22 @@
 
 	/* set up standard buttons */
 	for (i = 0; xpad_common_btn[i] >= 0; i++)
-		__set_bit(xpad_common_btn[i], input_dev->keybit);
+		input_set_capability(input_dev, EV_KEY, xpad_common_btn[i]);
 
 	/* set up model-specific ones */
 	if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX360W ||
 	    xpad->xtype == XTYPE_XBOXONE) {
 		for (i = 0; xpad360_btn[i] >= 0; i++)
-			__set_bit(xpad360_btn[i], input_dev->keybit);
+			input_set_capability(input_dev, EV_KEY, xpad360_btn[i]);
 	} else {
 		for (i = 0; xpad_btn[i] >= 0; i++)
-			__set_bit(xpad_btn[i], input_dev->keybit);
+			input_set_capability(input_dev, EV_KEY, xpad_btn[i]);
 	}
 
 	if (xpad->mapping & MAP_DPAD_TO_BUTTONS) {
 		for (i = 0; xpad_btn_pad[i] >= 0; i++)
-			__set_bit(xpad_btn_pad[i], input_dev->keybit);
+			input_set_capability(input_dev, EV_KEY,
+					     xpad_btn_pad[i]);
 	}
 
 	/*
@@ -1675,7 +1679,8 @@
 
 	if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) {
 		for (i = 0; xpad_btn_triggers[i] >= 0; i++)
-			__set_bit(xpad_btn_triggers[i], input_dev->keybit);
+			input_set_capability(input_dev, EV_KEY,
+					     xpad_btn_triggers[i]);
 	} else {
 		for (i = 0; xpad_abs_triggers[i] >= 0; i++)
 			xpad_set_up_abs(input_dev, xpad_abs_triggers[i]);
diff --git a/drivers/input/keyboard/cap11xx.c b/drivers/input/keyboard/cap11xx.c
index 1a1eaca..312916f 100644
--- a/drivers/input/keyboard/cap11xx.c
+++ b/drivers/input/keyboard/cap11xx.c
@@ -357,8 +357,7 @@
 	}
 
 	priv = devm_kzalloc(dev,
-			    sizeof(*priv) +
-				cap->num_channels * sizeof(priv->keycodes[0]),
+			    struct_size(priv, keycodes, cap->num_channels),
 			    GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
index 79eb295..489ddd3 100644
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -244,24 +244,35 @@
 
 	switch (ckdev->ec->event_data.event_type) {
 	case EC_MKBP_EVENT_KEY_MATRIX:
-		/*
-		 * If EC is not the wake source, discard key state changes
-		 * during suspend.
-		 */
-		if (queued_during_suspend)
-			return NOTIFY_OK;
+		if (device_may_wakeup(ckdev->dev)) {
+			pm_wakeup_event(ckdev->dev, 0);
+		} else {
+			/*
+			 * If keyboard is not wake enabled, discard key state
+			 * changes during suspend. Switches will be re-checked
+			 * in cros_ec_keyb_resume() to be sure nothing is lost.
+			 */
+			if (queued_during_suspend)
+				return NOTIFY_OK;
+		}
 
 		if (ckdev->ec->event_size != ckdev->cols) {
 			dev_err(ckdev->dev,
 				"Discarded incomplete key matrix event.\n");
 			return NOTIFY_OK;
 		}
+
 		cros_ec_keyb_process(ckdev,
 				     ckdev->ec->event_data.data.key_matrix,
 				     ckdev->ec->event_size);
 		break;
 
 	case EC_MKBP_EVENT_SYSRQ:
+		if (device_may_wakeup(ckdev->dev))
+			pm_wakeup_event(ckdev->dev, 0);
+		else if (queued_during_suspend)
+			return NOTIFY_OK;
+
 		val = get_unaligned_le32(&ckdev->ec->event_data.data.sysrq);
 		dev_dbg(ckdev->dev, "sysrq code from EC: %#x\n", val);
 		handle_sysrq(val);
@@ -269,12 +280,9 @@
 
 	case EC_MKBP_EVENT_BUTTON:
 	case EC_MKBP_EVENT_SWITCH:
-		/*
-		 * If EC is not the wake source, discard key state
-		 * changes during suspend. Switches will be re-checked in
-		 * cros_ec_keyb_resume() to be sure nothing is lost.
-		 */
-		if (queued_during_suspend)
+		if (device_may_wakeup(ckdev->dev))
+			pm_wakeup_event(ckdev->dev, 0);
+		else if (queued_during_suspend)
 			return NOTIFY_OK;
 
 		if (ckdev->ec->event_data.event_type == EC_MKBP_EVENT_BUTTON) {
@@ -639,6 +647,7 @@
 		return err;
 	}
 
+	device_init_wakeup(ckdev->dev, true);
 	return 0;
 }
 
diff --git a/drivers/input/misc/ati_remote2.c b/drivers/input/misc/ati_remote2.c
index ded5b84..d8fd58f 100644
--- a/drivers/input/misc/ati_remote2.c
+++ b/drivers/input/misc/ati_remote2.c
@@ -22,7 +22,7 @@
 /*
  * ATI Remote Wonder II Channel Configuration
  *
- * The remote control can by assigned one of sixteen "channels" in order to facilitate
+ * The remote control can be assigned one of sixteen "channels" in order to facilitate
  * the use of multiple remote controls within range of each other.
  * A remote's "channel" may be altered by pressing and holding the "PC" button for
  * approximately 3 seconds, after which the button will slowly flash the count of the
diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c
index 49b34de0..47eb8ca 100644
--- a/drivers/input/misc/hp_sdc_rtc.c
+++ b/drivers/input/misc/hp_sdc_rtc.c
@@ -509,18 +509,6 @@
 #undef NY
 }
 
-static int hp_sdc_rtc_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hp_sdc_rtc_proc_show, NULL);
-}
-
-static const struct file_operations hp_sdc_rtc_proc_fops = {
-	.open		= hp_sdc_rtc_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int hp_sdc_rtc_ioctl(struct file *file, 
 			    unsigned int cmd, unsigned long arg)
 {
@@ -713,7 +701,7 @@
 	if (misc_register(&hp_sdc_rtc_dev) != 0)
 		printk(KERN_INFO "Could not register misc. dev for i8042 rtc\n");
 
-        proc_create("driver/rtc", 0, NULL, &hp_sdc_rtc_proc_fops);
+        proc_create_single("driver/rtc", 0, NULL, hp_sdc_rtc_proc_show);
 
 	printk(KERN_INFO "HP i8042 SDC + MSM-58321 RTC support loaded "
 			 "(RTC v " RTC_VERSION ")\n");
diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig
index 89ebb8f..f27f23f 100644
--- a/drivers/input/mouse/Kconfig
+++ b/drivers/input/mouse/Kconfig
@@ -133,6 +133,18 @@
 
 	  If unsure, say N.
 
+config MOUSE_PS2_ELANTECH_SMBUS
+	bool "Elantech PS/2 SMbus companion" if EXPERT
+	default y
+	depends on MOUSE_PS2 && MOUSE_PS2_ELANTECH
+	depends on I2C=y || I2C=MOUSE_PS2
+	select MOUSE_PS2_SMBUS
+	help
+	  Say Y here if you have a Elantech touchpad connected to
+	  to an SMBus, but enumerated through PS/2.
+
+	  If unsure, say Y.
+
 config MOUSE_PS2_SENTELIC
 	bool "Sentelic Finger Sensing Pad PS/2 protocol extension"
 	depends on MOUSE_PS2
diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 38f9501..cb55797 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -2049,14 +2049,11 @@
 	return 0;
 }
 
-static int alps_hw_init_v6(struct psmouse *psmouse)
+/* Must be in passthrough mode when calling this function */
+static int alps_trackstick_enter_extended_mode_v3_v6(struct psmouse *psmouse)
 {
 	unsigned char param[2] = {0xC8, 0x14};
 
-	/* Enter passthrough mode to let trackpoint enter 6byte raw mode */
-	if (alps_passthrough_mode_v2(psmouse, true))
-		return -1;
-
 	if (ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_SETSCALE11) ||
 	    ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_SETSCALE11) ||
 	    ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_SETSCALE11) ||
@@ -2064,9 +2061,25 @@
 	    ps2_command(&psmouse->ps2dev, &param[1], PSMOUSE_CMD_SETRATE))
 		return -1;
 
+	return 0;
+}
+
+static int alps_hw_init_v6(struct psmouse *psmouse)
+{
+	int ret;
+
+	/* Enter passthrough mode to let trackpoint enter 6byte raw mode */
+	if (alps_passthrough_mode_v2(psmouse, true))
+		return -1;
+
+	ret = alps_trackstick_enter_extended_mode_v3_v6(psmouse);
+
 	if (alps_passthrough_mode_v2(psmouse, false))
 		return -1;
 
+	if (ret)
+		return ret;
+
 	if (alps_absolute_mode_v6(psmouse)) {
 		psmouse_err(psmouse, "Failed to enable absolute mode\n");
 		return -1;
@@ -2140,10 +2153,18 @@
 
 static int alps_setup_trackstick_v3(struct psmouse *psmouse, int reg_base)
 {
-	struct ps2dev *ps2dev = &psmouse->ps2dev;
 	int ret = 0;
+	int reg_val;
 	unsigned char param[4];
 
+	/*
+	 * We need to configure trackstick to report data for touchpad in
+	 * extended format. And also we need to tell touchpad to expect data
+	 * from trackstick in extended format. Without this configuration
+	 * trackstick packets sent from touchpad are in basic format which is
+	 * different from what we expect.
+	 */
+
 	if (alps_passthrough_mode_v3(psmouse, reg_base, true))
 		return -EIO;
 
@@ -2161,39 +2182,36 @@
 		ret = -ENODEV;
 	} else {
 		psmouse_dbg(psmouse, "trackstick E7 report: %3ph\n", param);
-
-		/*
-		 * Not sure what this does, but it is absolutely
-		 * essential. Without it, the touchpad does not
-		 * work at all and the trackstick just emits normal
-		 * PS/2 packets.
-		 */
-		if (ps2_command(ps2dev, NULL, PSMOUSE_CMD_SETSCALE11) ||
-		    ps2_command(ps2dev, NULL, PSMOUSE_CMD_SETSCALE11) ||
-		    ps2_command(ps2dev, NULL, PSMOUSE_CMD_SETSCALE11) ||
-		    alps_command_mode_send_nibble(psmouse, 0x9) ||
-		    alps_command_mode_send_nibble(psmouse, 0x4)) {
-			psmouse_err(psmouse,
-				    "Error sending magic E6 sequence\n");
+		if (alps_trackstick_enter_extended_mode_v3_v6(psmouse)) {
+			psmouse_err(psmouse, "Failed to enter into trackstick extended mode\n");
 			ret = -EIO;
-			goto error;
 		}
+	}
 
+	if (alps_passthrough_mode_v3(psmouse, reg_base, false))
+		return -EIO;
+
+	if (ret)
+		return ret;
+
+	if (alps_enter_command_mode(psmouse))
+		return -EIO;
+
+	reg_val = alps_command_mode_read_reg(psmouse, reg_base + 0x08);
+	if (reg_val == -1) {
+		ret = -EIO;
+	} else {
 		/*
-		 * This ensures the trackstick packets are in the format
-		 * supported by this driver. If bit 1 isn't set the packet
-		 * format is different.
+		 * Tell touchpad that trackstick is now in extended mode.
+		 * If bit 1 isn't set the packet format is different.
 		 */
-		if (alps_enter_command_mode(psmouse) ||
-		    alps_command_mode_write_reg(psmouse,
-						reg_base + 0x08, 0x82) ||
-		    alps_exit_command_mode(psmouse))
+		reg_val |= BIT(1);
+		if (__alps_command_mode_write_reg(psmouse, reg_val))
 			ret = -EIO;
 	}
 
-error:
-	if (alps_passthrough_mode_v3(psmouse, reg_base, false))
-		ret = -EIO;
+	if (alps_exit_command_mode(psmouse))
+		return -EIO;
 
 	return ret;
 }
diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 75e7575..8ff7511 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -36,6 +36,7 @@
 #include <linux/jiffies.h>
 #include <linux/completion.h>
 #include <linux/of.h>
+#include <linux/property.h>
 #include <linux/regulator/consumer.h>
 #include <asm/unaligned.h>
 
@@ -51,6 +52,7 @@
 #define ETP_MAX_FINGERS		5
 #define ETP_FINGER_DATA_LEN	5
 #define ETP_REPORT_ID		0x5D
+#define ETP_TP_REPORT_ID	0x5E
 #define ETP_REPORT_ID_OFFSET	2
 #define ETP_TOUCH_INFO_OFFSET	3
 #define ETP_FINGER_DATA_OFFSET	4
@@ -61,6 +63,7 @@
 struct elan_tp_data {
 	struct i2c_client	*client;
 	struct input_dev	*input;
+	struct input_dev	*tp_input; /* trackpoint input node */
 	struct regulator	*vcc;
 
 	const struct elan_transport_ops *ops;
@@ -930,6 +933,33 @@
 	input_sync(input);
 }
 
+static void elan_report_trackpoint(struct elan_tp_data *data, u8 *report)
+{
+	struct input_dev *input = data->tp_input;
+	u8 *packet = &report[ETP_REPORT_ID_OFFSET + 1];
+	int x, y;
+
+	if (!data->tp_input) {
+		dev_warn_once(&data->client->dev,
+			      "received a trackpoint report while no trackpoint device has been created. Please report upstream.\n");
+		return;
+	}
+
+	input_report_key(input, BTN_LEFT, packet[0] & 0x01);
+	input_report_key(input, BTN_RIGHT, packet[0] & 0x02);
+	input_report_key(input, BTN_MIDDLE, packet[0] & 0x04);
+
+	if ((packet[3] & 0x0F) == 0x06) {
+		x = packet[4] - (int)((packet[1] ^ 0x80) << 1);
+		y = (int)((packet[2] ^ 0x80) << 1) - packet[5];
+
+		input_report_rel(input, REL_X, x);
+		input_report_rel(input, REL_Y, y);
+	}
+
+	input_sync(input);
+}
+
 static irqreturn_t elan_isr(int irq, void *dev_id)
 {
 	struct elan_tp_data *data = dev_id;
@@ -951,11 +981,17 @@
 	if (error)
 		goto out;
 
-	if (report[ETP_REPORT_ID_OFFSET] != ETP_REPORT_ID)
+	switch (report[ETP_REPORT_ID_OFFSET]) {
+	case ETP_REPORT_ID:
+		elan_report_absolute(data, report);
+		break;
+	case ETP_TP_REPORT_ID:
+		elan_report_trackpoint(data, report);
+		break;
+	default:
 		dev_err(dev, "invalid report id data (%x)\n",
 			report[ETP_REPORT_ID_OFFSET]);
-	else
-		elan_report_absolute(data, report);
+	}
 
 out:
 	return IRQ_HANDLED;
@@ -966,6 +1002,36 @@
  * Elan initialization functions
  ******************************************************************
  */
+
+static int elan_setup_trackpoint_input_device(struct elan_tp_data *data)
+{
+	struct device *dev = &data->client->dev;
+	struct input_dev *input;
+
+	input = devm_input_allocate_device(dev);
+	if (!input)
+		return -ENOMEM;
+
+	input->name = "Elan TrackPoint";
+	input->id.bustype = BUS_I2C;
+	input->id.vendor = ELAN_VENDOR_ID;
+	input->id.product = data->product_id;
+	input_set_drvdata(input, data);
+
+	input_set_capability(input, EV_REL, REL_X);
+	input_set_capability(input, EV_REL, REL_Y);
+	input_set_capability(input, EV_KEY, BTN_LEFT);
+	input_set_capability(input, EV_KEY, BTN_RIGHT);
+	input_set_capability(input, EV_KEY, BTN_MIDDLE);
+
+	__set_bit(INPUT_PROP_POINTER, input->propbit);
+	__set_bit(INPUT_PROP_POINTING_STICK, input->propbit);
+
+	data->tp_input = input;
+
+	return 0;
+}
+
 static int elan_setup_input_device(struct elan_tp_data *data)
 {
 	struct device *dev = &data->client->dev;
@@ -1140,6 +1206,12 @@
 	if (error)
 		return error;
 
+	if (device_property_read_bool(&client->dev, "elan,trackpoint")) {
+		error = elan_setup_trackpoint_input_device(data);
+		if (error)
+			return error;
+	}
+
 	/*
 	 * Platform code (ACPI, DTS) should normally set up interrupt
 	 * for us, but in case it did not let's fall back to using falling
@@ -1177,6 +1249,16 @@
 		return error;
 	}
 
+	if (data->tp_input) {
+		error = input_register_device(data->tp_input);
+		if (error) {
+			dev_err(&client->dev,
+				"failed to register TrackPoint input device: %d\n",
+				error);
+			return error;
+		}
+	}
+
 	/*
 	 * Systems using device tree should set up wakeup via DTS,
 	 * the rest will configure device as wakeup source by default.
@@ -1262,6 +1344,7 @@
 	{ "ELAN060B", 0 },
 	{ "ELAN060C", 0 },
 	{ "ELAN0611", 0 },
+	{ "ELAN0612", 0 },
 	{ "ELAN1000", 0 },
 	{ }
 };
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index db47a5e..fb4d902 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -14,17 +14,20 @@
 #include <linux/dmi.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/i2c.h>
 #include <linux/input.h>
 #include <linux/input/mt.h>
+#include <linux/platform_device.h>
 #include <linux/serio.h>
 #include <linux/libps2.h>
 #include <asm/unaligned.h>
 #include "psmouse.h"
 #include "elantech.h"
+#include "elan_i2c.h"
 
 #define elantech_debug(fmt, ...)					\
 	do {								\
-		if (etd->debug)						\
+		if (etd->info.debug)					\
 			psmouse_printk(KERN_DEBUG, psmouse,		\
 					fmt, ##__VA_ARGS__);		\
 	} while (0)
@@ -105,7 +108,7 @@
 	if (reg > 0x11 && reg < 0x20)
 		return -1;
 
-	switch (etd->hw_version) {
+	switch (etd->info.hw_version) {
 	case 1:
 		if (ps2_sliced_command(&psmouse->ps2dev, ETP_REGISTER_READ) ||
 		    ps2_sliced_command(&psmouse->ps2dev, reg) ||
@@ -137,7 +140,7 @@
 
 	if (rc)
 		psmouse_err(psmouse, "failed to read register 0x%02x.\n", reg);
-	else if (etd->hw_version != 4)
+	else if (etd->info.hw_version != 4)
 		*val = param[0];
 	else
 		*val = param[1];
@@ -160,7 +163,7 @@
 	if (reg > 0x11 && reg < 0x20)
 		return -1;
 
-	switch (etd->hw_version) {
+	switch (etd->info.hw_version) {
 	case 1:
 		if (ps2_sliced_command(&psmouse->ps2dev, ETP_REGISTER_WRITE) ||
 		    ps2_sliced_command(&psmouse->ps2dev, reg) ||
@@ -237,7 +240,7 @@
 	unsigned char *packet = psmouse->packet;
 	int fingers;
 
-	if (etd->fw_version < 0x020000) {
+	if (etd->info.fw_version < 0x020000) {
 		/*
 		 * byte 0:  D   U  p1  p2   1  p3   R   L
 		 * byte 1:  f   0  th  tw  x9  x8  y9  y8
@@ -252,7 +255,7 @@
 		fingers = (packet[0] & 0xc0) >> 6;
 	}
 
-	if (etd->jumpy_cursor) {
+	if (etd->info.jumpy_cursor) {
 		if (fingers != 1) {
 			etd->single_finger_reports = 0;
 		} else if (etd->single_finger_reports < 2) {
@@ -282,8 +285,8 @@
 
 	psmouse_report_standard_buttons(dev, packet[0]);
 
-	if (etd->fw_version < 0x020000 &&
-	    (etd->capabilities[0] & ETP_CAP_HAS_ROCKER)) {
+	if (etd->info.fw_version < 0x020000 &&
+	    (etd->info.capabilities[0] & ETP_CAP_HAS_ROCKER)) {
 		/* rocker up */
 		input_report_key(dev, BTN_FORWARD, packet[0] & 0x40);
 		/* rocker down */
@@ -391,7 +394,7 @@
 	input_report_key(dev, BTN_TOOL_TRIPLETAP, fingers == 3);
 	input_report_key(dev, BTN_TOOL_QUADTAP, fingers == 4);
 	psmouse_report_standard_buttons(dev, packet[0]);
-	if (etd->reports_pressure) {
+	if (etd->info.reports_pressure) {
 		input_report_abs(dev, ABS_PRESSURE, pres);
 		input_report_abs(dev, ABS_TOOL_WIDTH, width);
 	}
@@ -444,7 +447,7 @@
 
 	default:
 		/* Dump unexpected packet sequences if debug=1 (default) */
-		if (etd->debug == 1)
+		if (etd->info.debug == 1)
 			elantech_packet_dump(psmouse);
 
 		break;
@@ -523,7 +526,7 @@
 	input_report_key(dev, BTN_TOOL_TRIPLETAP, fingers == 3);
 
 	/* For clickpads map both buttons to BTN_LEFT */
-	if (etd->fw_version & 0x001000)
+	if (etd->info.fw_version & 0x001000)
 		input_report_key(dev, BTN_LEFT, packet[0] & 0x03);
 	else
 		psmouse_report_standard_buttons(dev, packet[0]);
@@ -541,7 +544,7 @@
 	unsigned char *packet = psmouse->packet;
 
 	/* For clickpads map both buttons to BTN_LEFT */
-	if (etd->fw_version & 0x001000)
+	if (etd->info.fw_version & 0x001000)
 		input_report_key(dev, BTN_LEFT, packet[0] & 0x03);
 	else
 		psmouse_report_standard_buttons(dev, packet[0]);
@@ -669,7 +672,7 @@
 	unsigned char p1, p2, p3;
 
 	/* Parity bits are placed differently */
-	if (etd->fw_version < 0x020000) {
+	if (etd->info.fw_version < 0x020000) {
 		/* byte 0:  D   U  p1  p2   1  p3   R   L */
 		p1 = (packet[0] & 0x20) >> 5;
 		p2 = (packet[0] & 0x10) >> 4;
@@ -714,7 +717,7 @@
 	 * With all three cases, if the constant bits are not exactly what I
 	 * expected, I consider them invalid.
 	 */
-	if (etd->reports_pressure)
+	if (etd->info.reports_pressure)
 		return (packet[0] & 0x0c) == 0x04 &&
 		       (packet[3] & 0x0f) == 0x02;
 
@@ -751,7 +754,7 @@
 	 * If the hardware flag 'crc_enabled' is set the packets have
 	 * different signatures.
 	 */
-	if (etd->crc_enabled) {
+	if (etd->info.crc_enabled) {
 		if ((packet[3] & 0x09) == 0x08)
 			return PACKET_V3_HEAD;
 
@@ -782,7 +785,7 @@
 		return PACKET_TRACKPOINT;
 
 	/* This represents the version of IC body. */
-	ic_version = (etd->fw_version & 0x0f0000) >> 16;
+	ic_version = (etd->info.fw_version & 0x0f0000) >> 16;
 
 	/*
 	 * Sanity check based on the constant bits of a packet.
@@ -791,9 +794,9 @@
 	 * the IC body, but are the same for every packet,
 	 * regardless of the type.
 	 */
-	if (etd->crc_enabled)
+	if (etd->info.crc_enabled)
 		sanity_check = ((packet[3] & 0x08) == 0x00);
-	else if (ic_version == 7 && etd->samples[1] == 0x2A)
+	else if (ic_version == 7 && etd->info.samples[1] == 0x2A)
 		sanity_check = ((packet[3] & 0x1c) == 0x10);
 	else
 		sanity_check = ((packet[0] & 0x0c) == 0x04 &&
@@ -827,12 +830,12 @@
 	if (psmouse->pktcnt < psmouse->pktsize)
 		return PSMOUSE_GOOD_DATA;
 
-	if (etd->debug > 1)
+	if (etd->info.debug > 1)
 		elantech_packet_dump(psmouse);
 
-	switch (etd->hw_version) {
+	switch (etd->info.hw_version) {
 	case 1:
-		if (etd->paritycheck && !elantech_packet_check_v1(psmouse))
+		if (etd->info.paritycheck && !elantech_packet_check_v1(psmouse))
 			return PSMOUSE_BAD_DATA;
 
 		elantech_report_absolute_v1(psmouse);
@@ -843,7 +846,7 @@
 		if (elantech_debounce_check_v2(psmouse))
 			return PSMOUSE_FULL_PACKET;
 
-		if (etd->paritycheck && !elantech_packet_check_v2(psmouse))
+		if (etd->info.paritycheck && !elantech_packet_check_v2(psmouse))
 			return PSMOUSE_BAD_DATA;
 
 		elantech_report_absolute_v2(psmouse);
@@ -916,7 +919,7 @@
 	int tries = ETP_READ_BACK_TRIES;
 	int rc = 0;
 
-	switch (etd->hw_version) {
+	switch (etd->info.hw_version) {
 	case 1:
 		etd->reg_10 = 0x16;
 		etd->reg_11 = 0x8f;
@@ -939,7 +942,7 @@
 		break;
 
 	case 3:
-		if (etd->set_hw_resolution)
+		if (etd->info.set_hw_resolution)
 			etd->reg_10 = 0x0b;
 		else
 			etd->reg_10 = 0x01;
@@ -976,7 +979,7 @@
 		if (rc) {
 			psmouse_err(psmouse,
 				    "failed to read back register 0x10.\n");
-		} else if (etd->hw_version == 1 &&
+		} else if (etd->info.hw_version == 1 &&
 			   !(val & ETP_R10_ABSOLUTE_MODE)) {
 			psmouse_err(psmouse,
 				    "touchpad refuses to switch to absolute mode.\n");
@@ -997,10 +1000,11 @@
 			      unsigned int *width)
 {
 	struct elantech_data *etd = psmouse->private;
+	struct elantech_device_info *info = &etd->info;
 	unsigned char param[3];
 	unsigned char traces;
 
-	switch (etd->hw_version) {
+	switch (info->hw_version) {
 	case 1:
 		*x_min = ETP_XMIN_V1;
 		*y_min = ETP_YMIN_V1;
@@ -1009,9 +1013,9 @@
 		break;
 
 	case 2:
-		if (etd->fw_version == 0x020800 ||
-		    etd->fw_version == 0x020b00 ||
-		    etd->fw_version == 0x020030) {
+		if (info->fw_version == 0x020800 ||
+		    info->fw_version == 0x020b00 ||
+		    info->fw_version == 0x020030) {
 			*x_min = ETP_XMIN_V2;
 			*y_min = ETP_YMIN_V2;
 			*x_max = ETP_XMAX_V2;
@@ -1020,35 +1024,35 @@
 			int i;
 			int fixed_dpi;
 
-			i = (etd->fw_version > 0x020800 &&
-			     etd->fw_version < 0x020900) ? 1 : 2;
+			i = (info->fw_version > 0x020800 &&
+			     info->fw_version < 0x020900) ? 1 : 2;
 
-			if (etd->send_cmd(psmouse, ETP_FW_ID_QUERY, param))
+			if (info->send_cmd(psmouse, ETP_FW_ID_QUERY, param))
 				return -1;
 
 			fixed_dpi = param[1] & 0x10;
 
-			if (((etd->fw_version >> 16) == 0x14) && fixed_dpi) {
-				if (etd->send_cmd(psmouse, ETP_SAMPLE_QUERY, param))
+			if (((info->fw_version >> 16) == 0x14) && fixed_dpi) {
+				if (info->send_cmd(psmouse, ETP_SAMPLE_QUERY, param))
 					return -1;
 
-				*x_max = (etd->capabilities[1] - i) * param[1] / 2;
-				*y_max = (etd->capabilities[2] - i) * param[2] / 2;
-			} else if (etd->fw_version == 0x040216) {
+				*x_max = (info->capabilities[1] - i) * param[1] / 2;
+				*y_max = (info->capabilities[2] - i) * param[2] / 2;
+			} else if (info->fw_version == 0x040216) {
 				*x_max = 819;
 				*y_max = 405;
-			} else if (etd->fw_version == 0x040219 || etd->fw_version == 0x040215) {
+			} else if (info->fw_version == 0x040219 || info->fw_version == 0x040215) {
 				*x_max = 900;
 				*y_max = 500;
 			} else {
-				*x_max = (etd->capabilities[1] - i) * 64;
-				*y_max = (etd->capabilities[2] - i) * 64;
+				*x_max = (info->capabilities[1] - i) * 64;
+				*y_max = (info->capabilities[2] - i) * 64;
 			}
 		}
 		break;
 
 	case 3:
-		if (etd->send_cmd(psmouse, ETP_FW_ID_QUERY, param))
+		if (info->send_cmd(psmouse, ETP_FW_ID_QUERY, param))
 			return -1;
 
 		*x_max = (0x0f & param[0]) << 8 | param[1];
@@ -1056,12 +1060,12 @@
 		break;
 
 	case 4:
-		if (etd->send_cmd(psmouse, ETP_FW_ID_QUERY, param))
+		if (info->send_cmd(psmouse, ETP_FW_ID_QUERY, param))
 			return -1;
 
 		*x_max = (0x0f & param[0]) << 8 | param[1];
 		*y_max = (0xf0 & param[0]) << 4 | param[2];
-		traces = etd->capabilities[1];
+		traces = info->capabilities[1];
 		if ((traces < 2) || (traces > *x_max))
 			return -1;
 
@@ -1083,7 +1087,8 @@
 
 static int elantech_get_resolution_v4(struct psmouse *psmouse,
 				      unsigned int *x_res,
-				      unsigned int *y_res)
+				      unsigned int *y_res,
+				      unsigned int *bus)
 {
 	unsigned char param[3];
 
@@ -1092,6 +1097,7 @@
 
 	*x_res = elantech_convert_res(param[1] & 0x0f);
 	*y_res = elantech_convert_res((param[1] & 0xf0) >> 4);
+	*bus = param[2];
 
 	return 0;
 }
@@ -1140,7 +1146,7 @@
 	struct input_dev *dev = psmouse->dev;
 	struct elantech_data *etd = psmouse->private;
 
-	if (etd->fw_version & 0x001000) {
+	if (etd->info.fw_version & 0x001000) {
 		__set_bit(INPUT_PROP_BUTTONPAD, dev->propbit);
 		__clear_bit(BTN_RIGHT, dev->keybit);
 	}
@@ -1176,8 +1182,8 @@
 {
 	struct input_dev *dev = psmouse->dev;
 	struct elantech_data *etd = psmouse->private;
+	struct elantech_device_info *info = &etd->info;
 	unsigned int x_min = 0, y_min = 0, x_max = 0, y_max = 0, width = 0;
-	unsigned int x_res = 31, y_res = 31;
 
 	if (elantech_set_range(psmouse, &x_min, &y_min, &x_max, &y_max, &width))
 		return -1;
@@ -1197,11 +1203,11 @@
 	__set_bit(BTN_TOOL_DOUBLETAP, dev->keybit);
 	__set_bit(BTN_TOOL_TRIPLETAP, dev->keybit);
 
-	switch (etd->hw_version) {
+	switch (info->hw_version) {
 	case 1:
 		/* Rocker button */
-		if (etd->fw_version < 0x020000 &&
-		    (etd->capabilities[0] & ETP_CAP_HAS_ROCKER)) {
+		if (info->fw_version < 0x020000 &&
+		    (info->capabilities[0] & ETP_CAP_HAS_ROCKER)) {
 			__set_bit(BTN_FORWARD, dev->keybit);
 			__set_bit(BTN_BACK, dev->keybit);
 		}
@@ -1214,11 +1220,11 @@
 		__set_bit(INPUT_PROP_SEMI_MT, dev->propbit);
 		/* fall through */
 	case 3:
-		if (etd->hw_version == 3)
+		if (info->hw_version == 3)
 			elantech_set_buttonpad_prop(psmouse);
 		input_set_abs_params(dev, ABS_X, x_min, x_max, 0, 0);
 		input_set_abs_params(dev, ABS_Y, y_min, y_max, 0, 0);
-		if (etd->reports_pressure) {
+		if (info->reports_pressure) {
 			input_set_abs_params(dev, ABS_PRESSURE, ETP_PMIN_V2,
 					     ETP_PMAX_V2, 0, 0);
 			input_set_abs_params(dev, ABS_TOOL_WIDTH, ETP_WMIN_V2,
@@ -1230,13 +1236,6 @@
 		break;
 
 	case 4:
-		if (elantech_get_resolution_v4(psmouse, &x_res, &y_res)) {
-			/*
-			 * if query failed, print a warning and leave the values
-			 * zero to resemble synaptics.c behavior.
-			 */
-			psmouse_warn(psmouse, "couldn't query resolution data.\n");
-		}
 		elantech_set_buttonpad_prop(psmouse);
 		__set_bit(BTN_TOOL_QUADTAP, dev->keybit);
 		/* For X to recognize me as touchpad. */
@@ -1265,11 +1264,11 @@
 		break;
 	}
 
-	input_abs_set_res(dev, ABS_X, x_res);
-	input_abs_set_res(dev, ABS_Y, y_res);
-	if (etd->hw_version > 1) {
-		input_abs_set_res(dev, ABS_MT_POSITION_X, x_res);
-		input_abs_set_res(dev, ABS_MT_POSITION_Y, y_res);
+	input_abs_set_res(dev, ABS_X, info->x_res);
+	input_abs_set_res(dev, ABS_Y, info->y_res);
+	if (info->hw_version > 1) {
+		input_abs_set_res(dev, ABS_MT_POSITION_X, info->x_res);
+		input_abs_set_res(dev, ABS_MT_POSITION_Y, info->y_res);
 	}
 
 	etd->y_max = y_max;
@@ -1317,7 +1316,7 @@
 		return err;
 
 	/* Do we need to preserve some bits for version 2 hardware too? */
-	if (etd->hw_version == 1) {
+	if (etd->info.hw_version == 1) {
 		if (attr->reg == 0x10)
 			/* Force absolute mode always on */
 			value |= ETP_R10_ABSOLUTE_MODE;
@@ -1337,11 +1336,22 @@
 		.field_offset = offsetof(struct elantech_data, _name),	\
 		.reg = _register,					\
 	};								\
-	PSMOUSE_DEFINE_ATTR(_name, S_IWUSR | S_IRUGO,			\
+	PSMOUSE_DEFINE_ATTR(_name, 0644,				\
 			    &elantech_attr_##_name,			\
 			    elantech_show_int_attr,			\
 			    elantech_set_int_attr)
 
+#define ELANTECH_INFO_ATTR(_name)					       \
+	static struct elantech_attr_data elantech_attr_##_name = {	       \
+		.field_offset = offsetof(struct elantech_data, info) +	       \
+				offsetof(struct elantech_device_info, _name),  \
+		.reg = 0,						       \
+	};								       \
+	PSMOUSE_DEFINE_ATTR(_name, 0644,				       \
+			    &elantech_attr_##_name,			       \
+			    elantech_show_int_attr,			       \
+			    elantech_set_int_attr)
+
 ELANTECH_INT_ATTR(reg_07, 0x07);
 ELANTECH_INT_ATTR(reg_10, 0x10);
 ELANTECH_INT_ATTR(reg_11, 0x11);
@@ -1352,9 +1362,9 @@
 ELANTECH_INT_ATTR(reg_24, 0x24);
 ELANTECH_INT_ATTR(reg_25, 0x25);
 ELANTECH_INT_ATTR(reg_26, 0x26);
-ELANTECH_INT_ATTR(debug, 0);
-ELANTECH_INT_ATTR(paritycheck, 0);
-ELANTECH_INT_ATTR(crc_enabled, 0);
+ELANTECH_INFO_ATTR(debug);
+ELANTECH_INFO_ATTR(paritycheck);
+ELANTECH_INFO_ATTR(crc_enabled);
 
 static struct attribute *elantech_attrs[] = {
 	&psmouse_attr_reg_07.dattr.attr,
@@ -1469,6 +1479,12 @@
 {
 	struct elantech_data *etd = psmouse->private;
 
+	/*
+	 * We might have left a breadcrumb when trying to
+	 * set up SMbus companion.
+	 */
+	psmouse_smbus_cleanup(psmouse);
+
 	if (etd->tp_dev)
 		input_unregister_device(etd->tp_dev);
 	sysfs_remove_group(&psmouse->ps2dev.serio->dev.kobj,
@@ -1588,25 +1604,25 @@
 /*
  * determine hardware version and set some properties according to it.
  */
-static int elantech_set_properties(struct elantech_data *etd)
+static int elantech_set_properties(struct elantech_device_info *info)
 {
 	/* This represents the version of IC body. */
-	int ver = (etd->fw_version & 0x0f0000) >> 16;
+	int ver = (info->fw_version & 0x0f0000) >> 16;
 
 	/* Early version of Elan touchpads doesn't obey the rule. */
-	if (etd->fw_version < 0x020030 || etd->fw_version == 0x020600)
-		etd->hw_version = 1;
+	if (info->fw_version < 0x020030 || info->fw_version == 0x020600)
+		info->hw_version = 1;
 	else {
 		switch (ver) {
 		case 2:
 		case 4:
-			etd->hw_version = 2;
+			info->hw_version = 2;
 			break;
 		case 5:
-			etd->hw_version = 3;
+			info->hw_version = 3;
 			break;
 		case 6 ... 15:
-			etd->hw_version = 4;
+			info->hw_version = 4;
 			break;
 		default:
 			return -1;
@@ -1614,100 +1630,88 @@
 	}
 
 	/* decide which send_cmd we're gonna use early */
-	etd->send_cmd = etd->hw_version >= 3 ? elantech_send_cmd :
-					       synaptics_send_cmd;
+	info->send_cmd = info->hw_version >= 3 ? elantech_send_cmd :
+						 synaptics_send_cmd;
 
 	/* Turn on packet checking by default */
-	etd->paritycheck = 1;
+	info->paritycheck = 1;
 
 	/*
 	 * This firmware suffers from misreporting coordinates when
 	 * a touch action starts causing the mouse cursor or scrolled page
 	 * to jump. Enable a workaround.
 	 */
-	etd->jumpy_cursor =
-		(etd->fw_version == 0x020022 || etd->fw_version == 0x020600);
+	info->jumpy_cursor =
+		(info->fw_version == 0x020022 || info->fw_version == 0x020600);
 
-	if (etd->hw_version > 1) {
+	if (info->hw_version > 1) {
 		/* For now show extra debug information */
-		etd->debug = 1;
+		info->debug = 1;
 
-		if (etd->fw_version >= 0x020800)
-			etd->reports_pressure = true;
+		if (info->fw_version >= 0x020800)
+			info->reports_pressure = true;
 	}
 
 	/*
 	 * The signatures of v3 and v4 packets change depending on the
 	 * value of this hardware flag.
 	 */
-	etd->crc_enabled = (etd->fw_version & 0x4000) == 0x4000 ||
-			   dmi_check_system(elantech_dmi_force_crc_enabled);
+	info->crc_enabled = (info->fw_version & 0x4000) == 0x4000 ||
+			     dmi_check_system(elantech_dmi_force_crc_enabled);
 
 	/* Enable real hardware resolution on hw_version 3 ? */
-	etd->set_hw_resolution = !dmi_check_system(no_hw_res_dmi_table);
+	info->set_hw_resolution = !dmi_check_system(no_hw_res_dmi_table);
 
 	return 0;
 }
 
-/*
- * Initialize the touchpad and create sysfs entries
- */
-int elantech_init(struct psmouse *psmouse)
+static int elantech_query_info(struct psmouse *psmouse,
+			       struct elantech_device_info *info)
 {
-	struct elantech_data *etd;
-	int i;
-	int error = -EINVAL;
 	unsigned char param[3];
-	struct input_dev *tp_dev;
 
-	psmouse->private = etd = kzalloc(sizeof(struct elantech_data), GFP_KERNEL);
-	if (!etd)
-		return -ENOMEM;
-
-	psmouse_reset(psmouse);
-
-	etd->parity[0] = 1;
-	for (i = 1; i < 256; i++)
-		etd->parity[i] = etd->parity[i & (i - 1)] ^ 1;
+	memset(info, 0, sizeof(*info));
 
 	/*
 	 * Do the version query again so we can store the result
 	 */
 	if (synaptics_send_cmd(psmouse, ETP_FW_VERSION_QUERY, param)) {
 		psmouse_err(psmouse, "failed to query firmware version.\n");
-		goto init_fail;
+		return -EINVAL;
 	}
-	etd->fw_version = (param[0] << 16) | (param[1] << 8) | param[2];
+	info->fw_version = (param[0] << 16) | (param[1] << 8) | param[2];
 
-	if (elantech_set_properties(etd)) {
+	if (elantech_set_properties(info)) {
 		psmouse_err(psmouse, "unknown hardware version, aborting...\n");
-		goto init_fail;
+		return -EINVAL;
 	}
 	psmouse_info(psmouse,
 		     "assuming hardware version %d (with firmware version 0x%02x%02x%02x)\n",
-		     etd->hw_version, param[0], param[1], param[2]);
+		     info->hw_version, param[0], param[1], param[2]);
 
-	if (etd->send_cmd(psmouse, ETP_CAPABILITIES_QUERY,
-	    etd->capabilities)) {
+	if (info->send_cmd(psmouse, ETP_CAPABILITIES_QUERY,
+	    info->capabilities)) {
 		psmouse_err(psmouse, "failed to query capabilities.\n");
-		goto init_fail;
+		return -EINVAL;
 	}
 	psmouse_info(psmouse,
 		     "Synaptics capabilities query result 0x%02x, 0x%02x, 0x%02x.\n",
-		     etd->capabilities[0], etd->capabilities[1],
-		     etd->capabilities[2]);
+		     info->capabilities[0], info->capabilities[1],
+		     info->capabilities[2]);
 
-	if (etd->hw_version != 1) {
-		if (etd->send_cmd(psmouse, ETP_SAMPLE_QUERY, etd->samples)) {
+	if (info->hw_version != 1) {
+		if (info->send_cmd(psmouse, ETP_SAMPLE_QUERY, info->samples)) {
 			psmouse_err(psmouse, "failed to query sample data\n");
-			goto init_fail;
+			return -EINVAL;
 		}
 		psmouse_info(psmouse,
 			     "Elan sample query result %02x, %02x, %02x\n",
-			     etd->samples[0], etd->samples[1], etd->samples[2]);
+			     info->samples[0],
+			     info->samples[1],
+			     info->samples[2]);
 	}
 
-	if (etd->samples[1] == 0x74 && etd->hw_version == 0x03) {
+	if (info->samples[1] == 0x74 && info->hw_version == 0x03) {
 		/*
 		 * This module has a bug which makes absolute mode
 		 * unusable, so let's abort so we'll be using standard
@@ -1715,16 +1719,181 @@
 		 */
 		psmouse_info(psmouse,
 			     "absolute mode broken, forcing standard PS/2 protocol\n");
+		return -ENODEV;
+	}
+
+	/* The MSB indicates the presence of the trackpoint */
+	info->has_trackpoint = (info->capabilities[0] & 0x80) == 0x80;
+
+	info->x_res = 31;
+	info->y_res = 31;
+	if (info->hw_version == 4) {
+		if (elantech_get_resolution_v4(psmouse,
+					       &info->x_res,
+					       &info->y_res,
+					       &info->bus)) {
+			psmouse_warn(psmouse,
+				     "failed to query resolution data.\n");
+		}
+	}
+
+	return 0;
+}
+
+#if defined(CONFIG_MOUSE_PS2_ELANTECH_SMBUS)
+
+/*
+ * The newest Elantech device can use a secondary bus (over SMBus) which
+ * provides a better bandwidth and allow a better control of the touchpads.
+ * This is used to decide if we need to use this bus or not.
+ */
+enum {
+	ELANTECH_SMBUS_NOT_SET = -1,
+	ELANTECH_SMBUS_OFF,
+	ELANTECH_SMBUS_ON,
+};
+
+static int elantech_smbus = IS_ENABLED(CONFIG_MOUSE_ELAN_I2C_SMBUS) ?
+		ELANTECH_SMBUS_NOT_SET : ELANTECH_SMBUS_OFF;
+module_param_named(elantech_smbus, elantech_smbus, int, 0644);
+MODULE_PARM_DESC(elantech_smbus, "Use a secondary bus for the Elantech device.");
+
+static int elantech_create_smbus(struct psmouse *psmouse,
+				 struct elantech_device_info *info,
+				 bool leave_breadcrumbs)
+{
+	const struct property_entry i2c_properties[] = {
+		PROPERTY_ENTRY_BOOL("elan,trackpoint"),
+		{ },
+	};
+	struct i2c_board_info smbus_board = {
+		I2C_BOARD_INFO("elan_i2c", 0x15),
+		.flags = I2C_CLIENT_HOST_NOTIFY,
+	};
+
+	if (info->has_trackpoint)
+		smbus_board.properties = i2c_properties;
+
+	return psmouse_smbus_init(psmouse, &smbus_board, NULL, 0, false,
+				  leave_breadcrumbs);
+}
+
+/**
+ * elantech_setup_smbus - called once the PS/2 devices are enumerated
+ * and decides to instantiate a SMBus InterTouch device.
+ */
+static int elantech_setup_smbus(struct psmouse *psmouse,
+				struct elantech_device_info *info,
+				bool leave_breadcrumbs)
+{
+	int error;
+
+	if (elantech_smbus == ELANTECH_SMBUS_OFF)
+		return -ENXIO;
+
+	if (elantech_smbus == ELANTECH_SMBUS_NOT_SET) {
+		/*
+		 * New ICs are enabled by default.
+		 * Old ICs are up to the user to decide.
+		 */
+		if (!ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version))
+			return -ENXIO;
+	}
+
+	psmouse_info(psmouse, "Trying to set up SMBus access\n");
+
+	error = elantech_create_smbus(psmouse, info, leave_breadcrumbs);
+	if (error) {
+		if (error == -EAGAIN)
+			psmouse_info(psmouse, "SMbus companion is not ready yet\n");
+		else
+			psmouse_err(psmouse, "unable to create intertouch device\n");
+
+		return error;
+	}
+
+	return 0;
+}
+
+static bool elantech_use_host_notify(struct psmouse *psmouse,
+				     struct elantech_device_info *info)
+{
+	if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version))
+		return true;
+
+	switch (info->bus) {
+	case ETP_BUS_PS2_ONLY:
+		/* expected case */
+		break;
+	case ETP_BUS_SMB_ALERT_ONLY:
+		/* fall-through  */
+	case ETP_BUS_PS2_SMB_ALERT:
+		psmouse_dbg(psmouse, "Ignoring SMBus provider through alert protocol.\n");
+		break;
+	case ETP_BUS_SMB_HST_NTFY_ONLY:
+		/* fall-through  */
+	case ETP_BUS_PS2_SMB_HST_NTFY:
+		return true;
+	default:
+		psmouse_dbg(psmouse,
+			    "Ignoring SMBus bus provider %d.\n",
+			    info->bus);
+	}
+
+	return false;
+}
+
+int elantech_init_smbus(struct psmouse *psmouse)
+{
+	struct elantech_device_info info;
+	int error = -EINVAL;
+
+	psmouse_reset(psmouse);
+
+	error = elantech_query_info(psmouse, &info);
+	if (error)
+		goto init_fail;
+
+	if (info.hw_version < 4) {
+		error = -ENXIO;
 		goto init_fail;
 	}
 
+	return elantech_create_smbus(psmouse, &info, false);
+ init_fail:
+	psmouse_reset(psmouse);
+	return error;
+}
+#endif /* CONFIG_MOUSE_PS2_ELANTECH_SMBUS */
+
+/*
+ * Initialize the touchpad and create sysfs entries
+ */
+static int elantech_setup_ps2(struct psmouse *psmouse,
+			      struct elantech_device_info *info)
+{
+	struct elantech_data *etd;
+	int i;
+	int error = -EINVAL;
+	struct input_dev *tp_dev;
+
+	psmouse->private = etd = kzalloc(sizeof(*etd), GFP_KERNEL);
+	if (!etd)
+		return -ENOMEM;
+
+	etd->info = *info;
+
+	etd->parity[0] = 1;
+	for (i = 1; i < 256; i++)
+		etd->parity[i] = etd->parity[i & (i - 1)] ^ 1;
+
 	if (elantech_set_absolute_mode(psmouse)) {
 		psmouse_err(psmouse,
 			    "failed to put touchpad into absolute mode.\n");
 		goto init_fail;
 	}
 
-	if (etd->fw_version == 0x381f17) {
+	if (info->fw_version == 0x381f17) {
 		etd->original_set_rate = psmouse->set_rate;
 		psmouse->set_rate = elantech_set_rate_restore_reg_07;
 	}
@@ -1743,8 +1912,7 @@
 		goto init_fail;
 	}
 
-	/* The MSB indicates the presence of the trackpoint */
-	if ((etd->capabilities[0] & 0x80) == 0x80) {
+	if (info->has_trackpoint) {
 		tp_dev = input_allocate_device();
 
 		if (!tp_dev) {
@@ -1780,7 +1948,7 @@
 	psmouse->protocol_handler = elantech_process_byte;
 	psmouse->disconnect = elantech_disconnect;
 	psmouse->reconnect = elantech_reconnect;
-	psmouse->pktsize = etd->hw_version > 1 ? 6 : 4;
+	psmouse->pktsize = info->hw_version > 1 ? 6 : 4;
 
 	return 0;
  init_fail_tp_reg:
@@ -1789,7 +1957,70 @@
 	sysfs_remove_group(&psmouse->ps2dev.serio->dev.kobj,
 			   &elantech_attr_group);
  init_fail:
-	psmouse_reset(psmouse);
 	kfree(etd);
 	return error;
 }
+
+int elantech_init_ps2(struct psmouse *psmouse)
+{
+	struct elantech_device_info info;
+	int error = -EINVAL;
+
+	psmouse_reset(psmouse);
+
+	error = elantech_query_info(psmouse, &info);
+	if (error)
+		goto init_fail;
+
+	error = elantech_setup_ps2(psmouse, &info);
+	if (error)
+		goto init_fail;
+
+	return 0;
+ init_fail:
+	psmouse_reset(psmouse);
+	return error;
+}
+
+int elantech_init(struct psmouse *psmouse)
+{
+	struct elantech_device_info info;
+	int error = -EINVAL;
+
+	psmouse_reset(psmouse);
+
+	error = elantech_query_info(psmouse, &info);
+	if (error)
+		goto init_fail;
+
+#if defined(CONFIG_MOUSE_PS2_ELANTECH_SMBUS)
+
+	if (elantech_use_host_notify(psmouse, &info)) {
+		if (!IS_ENABLED(CONFIG_MOUSE_ELAN_I2C_SMBUS) ||
+		    !IS_ENABLED(CONFIG_MOUSE_PS2_ELANTECH_SMBUS)) {
+			psmouse_warn(psmouse,
+				     "The touchpad can support a better bus than the too old PS/2 protocol. "
+				     "Make sure MOUSE_PS2_ELANTECH_SMBUS and MOUSE_ELAN_I2C_SMBUS are enabled to get a better touchpad experience.\n");
+		}
+		error = elantech_setup_smbus(psmouse, &info, true);
+		if (!error)
+			return PSMOUSE_ELANTECH_SMBUS;
+	}
+
+#endif /* CONFIG_MOUSE_PS2_ELANTECH_SMBUS */
+
+	error = elantech_setup_ps2(psmouse, &info);
+	if (error < 0) {
+		/*
+		 * Not using any flavor of Elantech support, so clean up
+		 * SMbus breadcrumbs, if any.
+		 */
+		psmouse_smbus_cleanup(psmouse);
+		goto init_fail;
+	}
+
+	return PSMOUSE_ELANTECH;
+ init_fail:
+	psmouse_reset(psmouse);
+	return error;
+}
diff --git a/drivers/input/mouse/elantech.h b/drivers/input/mouse/elantech.h
index e1cbf40..1197270 100644
--- a/drivers/input/mouse/elantech.h
+++ b/drivers/input/mouse/elantech.h
@@ -107,6 +107,30 @@
 #define ETP_WEIGHT_VALUE		5
 
 /*
+ * Bus information on 3rd byte of query ETP_RESOLUTION_QUERY(0x04)
+ */
+#define ETP_BUS_PS2_ONLY		0
+#define ETP_BUS_SMB_ALERT_ONLY		1
+#define ETP_BUS_SMB_HST_NTFY_ONLY	2
+#define ETP_BUS_PS2_SMB_ALERT		3
+#define ETP_BUS_PS2_SMB_HST_NTFY	4
+
+/*
+ * New ICs are either using SMBus Host Notify or just plain PS2.
+ *
+ * ETP_FW_VERSION_QUERY is:
+ * Byte 1:
+ *  - bit 0..3: IC BODY
+ * Byte 2:
+ *  - bit 4: HiddenButton
+ *  - bit 5: PS2_SMBUS_NOTIFY
+ *  - bit 6: PS2CRCCheck
+ */
+#define ETP_NEW_IC_SMBUS_HOST_NOTIFY(fw_version)	\
+		((((fw_version) & 0x0f2000) == 0x0f2000) && \
+		 ((fw_version) & 0x0000ff) > 0)
+
+/*
  * The base position for one finger, v4 hardware
  */
 struct finger_pos {
@@ -114,6 +138,25 @@
 	unsigned int y;
 };
 
+struct elantech_device_info {
+	unsigned char capabilities[3];
+	unsigned char samples[3];
+	unsigned char debug;
+	unsigned char hw_version;
+	unsigned int fw_version;
+	unsigned int x_res;
+	unsigned int y_res;
+	unsigned int bus;
+	bool paritycheck;
+	bool jumpy_cursor;
+	bool reports_pressure;
+	bool crc_enabled;
+	bool set_hw_resolution;
+	bool has_trackpoint;
+	int (*send_cmd)(struct psmouse *psmouse, unsigned char c,
+			unsigned char *param);
+};
+
 struct elantech_data {
 	struct input_dev *tp_dev;	/* Relative device for trackpoint */
 	char tp_phys[32];
@@ -127,27 +170,18 @@
 	unsigned char reg_24;
 	unsigned char reg_25;
 	unsigned char reg_26;
-	unsigned char debug;
-	unsigned char capabilities[3];
-	unsigned char samples[3];
-	bool paritycheck;
-	bool jumpy_cursor;
-	bool reports_pressure;
-	bool crc_enabled;
-	bool set_hw_resolution;
-	unsigned char hw_version;
-	unsigned int fw_version;
 	unsigned int single_finger_reports;
 	unsigned int y_max;
 	unsigned int width;
 	struct finger_pos mt[ETP_MAX_FINGERS];
 	unsigned char parity[256];
-	int (*send_cmd)(struct psmouse *psmouse, unsigned char c, unsigned char *param);
+	struct elantech_device_info info;
 	void (*original_set_rate)(struct psmouse *psmouse, unsigned int rate);
 };
 
 #ifdef CONFIG_MOUSE_PS2_ELANTECH
 int elantech_detect(struct psmouse *psmouse, bool set_properties);
+int elantech_init_ps2(struct psmouse *psmouse);
 int elantech_init(struct psmouse *psmouse);
 #else
 static inline int elantech_detect(struct psmouse *psmouse, bool set_properties)
@@ -158,6 +192,19 @@
 {
 	return -ENOSYS;
 }
+static inline int elantech_init_ps2(struct psmouse *psmouse)
+{
+	return -ENOSYS;
+}
 #endif /* CONFIG_MOUSE_PS2_ELANTECH */
 
+#if defined(CONFIG_MOUSE_PS2_ELANTECH_SMBUS)
+int elantech_init_smbus(struct psmouse *psmouse);
+#else
+static inline int elantech_init_smbus(struct psmouse *psmouse)
+{
+	return -ENOSYS;
+}
+#endif /* CONFIG_MOUSE_PS2_ELANTECH_SMBUS */
+
 #endif
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 8900c31..5ff5b1952 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -856,7 +856,17 @@
 		.name		= "ETPS/2",
 		.alias		= "elantech",
 		.detect		= elantech_detect,
-		.init		= elantech_init,
+		.init		= elantech_init_ps2,
+	},
+#endif
+#ifdef CONFIG_MOUSE_PS2_ELANTECH_SMBUS
+	{
+		.type		= PSMOUSE_ELANTECH_SMBUS,
+		.name		= "ETSMBus",
+		.alias		= "elantech-smbus",
+		.detect		= elantech_detect,
+		.init		= elantech_init_smbus,
+		.smbus_companion = true,
 	},
 #endif
 #ifdef CONFIG_MOUSE_PS2_SENTELIC
@@ -1158,8 +1168,13 @@
 	/* Try Elantech touchpad */
 	if (max_proto > PSMOUSE_IMEX &&
 	    psmouse_try_protocol(psmouse, PSMOUSE_ELANTECH,
-				 &max_proto, set_properties, true)) {
-		return PSMOUSE_ELANTECH;
+				 &max_proto, set_properties, false)) {
+		if (!set_properties)
+			return PSMOUSE_ELANTECH;
+
+		ret = elantech_init(psmouse);
+		if (ret >= 0)
+			return ret;
 	}
 
 	if (max_proto > PSMOUSE_IMEX) {
diff --git a/drivers/input/mouse/psmouse-smbus.c b/drivers/input/mouse/psmouse-smbus.c
index c7ac24d..852d4b4 100644
--- a/drivers/input/mouse/psmouse-smbus.c
+++ b/drivers/input/mouse/psmouse-smbus.c
@@ -23,6 +23,7 @@
 	struct i2c_client *client;
 	struct list_head node;
 	bool dead;
+	bool need_deactivate;
 };
 
 static LIST_HEAD(psmouse_smbus_list);
@@ -118,7 +119,10 @@
 
 static int psmouse_smbus_reconnect(struct psmouse *psmouse)
 {
-	psmouse_deactivate(psmouse);
+	struct psmouse_smbus_dev *smbdev = psmouse->private;
+
+	if (smbdev->need_deactivate)
+		psmouse_deactivate(psmouse);
 
 	return 0;
 }
@@ -225,6 +229,7 @@
 int psmouse_smbus_init(struct psmouse *psmouse,
 		       const struct i2c_board_info *board,
 		       const void *pdata, size_t pdata_size,
+		       bool need_deactivate,
 		       bool leave_breadcrumbs)
 {
 	struct psmouse_smbus_dev *smbdev;
@@ -236,13 +241,20 @@
 
 	smbdev->psmouse = psmouse;
 	smbdev->board = *board;
+	smbdev->need_deactivate = need_deactivate;
 
-	smbdev->board.platform_data = kmemdup(pdata, pdata_size, GFP_KERNEL);
-	if (!smbdev->board.platform_data) {
-		kfree(smbdev);
-		return -ENOMEM;
+	if (pdata) {
+		smbdev->board.platform_data = kmemdup(pdata, pdata_size,
+						      GFP_KERNEL);
+		if (!smbdev->board.platform_data) {
+			kfree(smbdev);
+			return -ENOMEM;
+		}
 	}
 
+	if (need_deactivate)
+		psmouse_deactivate(psmouse);
+
 	psmouse->private = smbdev;
 	psmouse->protocol_handler = psmouse_smbus_process_byte;
 	psmouse->reconnect = psmouse_smbus_reconnect;
@@ -250,8 +262,6 @@
 	psmouse->disconnect = psmouse_smbus_disconnect;
 	psmouse->resync_time = 0;
 
-	psmouse_deactivate(psmouse);
-
 	mutex_lock(&psmouse_smbus_mutex);
 	list_add_tail(&smbdev->node, &psmouse_smbus_list);
 	mutex_unlock(&psmouse_smbus_mutex);
diff --git a/drivers/input/mouse/psmouse.h b/drivers/input/mouse/psmouse.h
index 71ac500..64c3a5d 100644
--- a/drivers/input/mouse/psmouse.h
+++ b/drivers/input/mouse/psmouse.h
@@ -68,6 +68,7 @@
 	PSMOUSE_VMMOUSE,
 	PSMOUSE_BYD,
 	PSMOUSE_SYNAPTICS_SMBUS,
+	PSMOUSE_ELANTECH_SMBUS,
 	PSMOUSE_AUTO		/* This one should always be last */
 };
 
@@ -224,6 +225,7 @@
 int psmouse_smbus_init(struct psmouse *psmouse,
 		       const struct i2c_board_info *board,
 		       const void *pdata, size_t pdata_size,
+		       bool need_deactivate,
 		       bool leave_breadcrumbs);
 void psmouse_smbus_cleanup(struct psmouse *psmouse);
 
diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index a9591d2..55d3350 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -1754,7 +1754,7 @@
 	};
 
 	return psmouse_smbus_init(psmouse, &intertouch_board,
-				  &pdata, sizeof(pdata),
+				  &pdata, sizeof(pdata), true,
 				  leave_breadcrumbs);
 }
 
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 3e613af..32267c1 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -164,6 +164,17 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called chipone_icn8318.
 
+config TOUCHSCREEN_CHIPONE_ICN8505
+	tristate "chipone icn8505 touchscreen controller"
+	depends on I2C && ACPI
+	help
+	  Say Y here if you have a ChipOne icn8505 based I2C touchscreen.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called chipone_icn8505.
+
 config TOUCHSCREEN_CY8CTMG110
 	tristate "cy8ctmg110 touchscreen"
 	depends on I2C
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index dddae79..fd4fd32 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -19,6 +19,7 @@
 obj-$(CONFIG_TOUCHSCREEN_AUO_PIXCIR)	+= auo-pixcir-ts.o
 obj-$(CONFIG_TOUCHSCREEN_BU21013)	+= bu21013_ts.o
 obj-$(CONFIG_TOUCHSCREEN_CHIPONE_ICN8318)	+= chipone_icn8318.o
+obj-$(CONFIG_TOUCHSCREEN_CHIPONE_ICN8505)	+= chipone_icn8505.o
 obj-$(CONFIG_TOUCHSCREEN_CY8CTMG110)	+= cy8ctmg110_ts.o
 obj-$(CONFIG_TOUCHSCREEN_CYTTSP_CORE)	+= cyttsp_core.o
 obj-$(CONFIG_TOUCHSCREEN_CYTTSP_I2C)	+= cyttsp_i2c.o cyttsp_i2c_common.o
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 0919472..54fe190 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -194,6 +194,8 @@
 
 /* Delay times */
 #define MXT_BACKUP_TIME		50	/* msec */
+#define MXT_RESET_GPIO_TIME	20	/* msec */
+#define MXT_RESET_INVALID_CHG	100	/* msec */
 #define MXT_RESET_TIME		200	/* msec */
 #define MXT_RESET_TIMEOUT	3000	/* msec */
 #define MXT_CRC_TIMEOUT		1000	/* msec */
@@ -1208,7 +1210,7 @@
 		return ret;
 
 	/* Ignore CHG line for 100ms after reset */
-	msleep(100);
+	msleep(MXT_RESET_INVALID_CHG);
 
 	mxt_acquire_irq(data);
 
@@ -2999,142 +3001,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_ACPI
-
-struct mxt_acpi_platform_data {
-	const char *hid;
-	const struct property_entry *props;
-};
-
-static unsigned int samus_touchpad_buttons[] = {
-	KEY_RESERVED,
-	KEY_RESERVED,
-	KEY_RESERVED,
-	BTN_LEFT
-};
-
-static const struct property_entry samus_touchpad_props[] = {
-	PROPERTY_ENTRY_U32_ARRAY("linux,gpio-keymap", samus_touchpad_buttons),
-	{ }
-};
-
-static struct mxt_acpi_platform_data samus_platform_data[] = {
-	{
-		/* Touchpad */
-		.hid	= "ATML0000",
-		.props	= samus_touchpad_props,
-	},
-	{
-		/* Touchscreen */
-		.hid	= "ATML0001",
-	},
-	{ }
-};
-
-static unsigned int chromebook_tp_buttons[] = {
-	KEY_RESERVED,
-	KEY_RESERVED,
-	KEY_RESERVED,
-	KEY_RESERVED,
-	KEY_RESERVED,
-	BTN_LEFT
-};
-
-static const struct property_entry chromebook_tp_props[] = {
-	PROPERTY_ENTRY_U32_ARRAY("linux,gpio-keymap", chromebook_tp_buttons),
-	{ }
-};
-
-static struct mxt_acpi_platform_data chromebook_platform_data[] = {
-	{
-		/* Touchpad */
-		.hid	= "ATML0000",
-		.props	= chromebook_tp_props,
-	},
-	{
-		/* Touchscreen */
-		.hid	= "ATML0001",
-	},
-	{ }
-};
-
-static const struct dmi_system_id mxt_dmi_table[] = {
-	{
-		/* 2015 Google Pixel */
-		.ident = "Chromebook Pixel 2",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "Samus"),
-		},
-		.driver_data = samus_platform_data,
-	},
-	{
-		/* Samsung Chromebook Pro */
-		.ident = "Samsung Chromebook Pro",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Google"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "Caroline"),
-		},
-		.driver_data = samus_platform_data,
-	},
-	{
-		/* Other Google Chromebooks */
-		.ident = "Chromebook",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
-		},
-		.driver_data = chromebook_platform_data,
-	},
-	{ }
-};
-
-static int mxt_prepare_acpi_properties(struct i2c_client *client)
-{
-	struct acpi_device *adev;
-	const struct dmi_system_id *system_id;
-	const struct mxt_acpi_platform_data *acpi_pdata;
-
-	adev = ACPI_COMPANION(&client->dev);
-	if (!adev)
-		return -ENOENT;
-
-	system_id = dmi_first_match(mxt_dmi_table);
-	if (!system_id)
-		return -ENOENT;
-
-	acpi_pdata = system_id->driver_data;
-	if (!acpi_pdata)
-		return -ENOENT;
-
-	while (acpi_pdata->hid) {
-		if (!strcmp(acpi_device_hid(adev), acpi_pdata->hid)) {
-			/*
-			 * Remove previously installed properties if we
-			 * are probing this device not for the very first
-			 * time.
-			 */
-			device_remove_properties(&client->dev);
-
-			/*
-			 * Now install the platform-specific properties
-			 * that are missing from ACPI.
-			 */
-			device_add_properties(&client->dev, acpi_pdata->props);
-			break;
-		}
-
-		acpi_pdata++;
-	}
-
-	return 0;
-}
-#else
-static int mxt_prepare_acpi_properties(struct i2c_client *client)
-{
-	return -ENOENT;
-}
-#endif
-
 static const struct dmi_system_id chromebook_T9_suspend_dmi[] = {
 	{
 		.matches = {
@@ -3156,6 +3022,18 @@
 	int error;
 
 	/*
+	 * Ignore devices that do not have device properties attached to
+	 * them, as we need help determining whether we are dealing with
+	 * touch screen or touchpad.
+	 *
+	 * So far on x86 the only users of Atmel touch controllers are
+	 * Chromebooks, and chromeos_laptop driver will ensure that
+	 * necessary properties are provided (if firmware does not do that).
+	 */
+	if (!device_property_present(&client->dev, "compatible"))
+		return -ENXIO;
+
+	/*
 	 * Ignore ACPI devices representing bootloader mode.
 	 *
 	 * This is a bit of a hack: Google Chromebook BIOS creates ACPI
@@ -3186,10 +3064,6 @@
 	data->suspend_mode = dmi_check_system(chromebook_T9_suspend_dmi) ?
 		MXT_SUSPEND_T9_CTRL : MXT_SUSPEND_DEEP_SLEEP;
 
-	error = mxt_prepare_acpi_properties(client);
-	if (error && error != -ENOENT)
-		return error;
-
 	error = mxt_parse_device_properties(data);
 	if (error)
 		return error;
@@ -3210,20 +3084,14 @@
 		return error;
 	}
 
-	if (data->reset_gpio) {
-		data->in_bootloader = true;
-		msleep(MXT_RESET_TIME);
-		reinit_completion(&data->bl_completion);
-		gpiod_set_value(data->reset_gpio, 1);
-		error = mxt_wait_for_completion(data, &data->bl_completion,
-						MXT_RESET_TIMEOUT);
-		if (error)
-			return error;
-		data->in_bootloader = false;
-	}
-
 	disable_irq(client->irq);
 
+	if (data->reset_gpio) {
+		msleep(MXT_RESET_GPIO_TIME);
+		gpiod_set_value(data->reset_gpio, 1);
+		msleep(MXT_RESET_INVALID_CHG);
+	}
+
 	error = mxt_initialize(data);
 	if (error)
 		return error;
diff --git a/drivers/input/touchscreen/chipone_icn8505.c b/drivers/input/touchscreen/chipone_icn8505.c
new file mode 100644
index 0000000..c768186
--- /dev/null
+++ b/drivers/input/touchscreen/chipone_icn8505.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Driver for ChipOne icn8505 i2c touchscreen controller
+ *
+ * Copyright (c) 2015-2018 Red Hat Inc.
+ *
+ * Red Hat authors:
+ * Hans de Goede <hdegoede@redhat.com>
+ */
+
+#include <asm/unaligned.h>
+#include <linux/acpi.h>
+#include <linux/crc32.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/input/touchscreen.h>
+#include <linux/module.h>
+
+/* Normal operation mode defines */
+#define ICN8505_REG_ADDR_WIDTH		16
+
+#define ICN8505_REG_POWER		0x0004
+#define ICN8505_REG_TOUCHDATA		0x1000
+#define ICN8505_REG_CONFIGDATA		0x8000
+
+/* ICN8505_REG_POWER commands */
+#define ICN8505_POWER_ACTIVE		0x00
+#define ICN8505_POWER_MONITOR		0x01
+#define ICN8505_POWER_HIBERNATE		0x02
+/*
+ * The Android driver uses these to turn on/off the charger filter, but the
+ * filter is way too aggressive making e.g. onscreen keyboards unusable.
+ */
+#define ICN8505_POWER_ENA_CHARGER_MODE	0x55
+#define ICN8505_POWER_DIS_CHARGER_MODE	0x66
+
+#define ICN8505_MAX_TOUCHES		10
+
+/* Programming mode defines */
+#define ICN8505_PROG_I2C_ADDR		0x30
+#define ICN8505_PROG_REG_ADDR_WIDTH	24
+
+#define MAX_FW_UPLOAD_TRIES		3
+
+struct icn8505_touch {
+	u8 slot;
+	u8 x[2];
+	u8 y[2];
+	u8 pressure;	/* Seems more like finger width then pressure really */
+	u8 event;
+/* The difference between 2 and 3 is unclear */
+#define ICN8505_EVENT_NO_DATA	1 /* No finger seen yet since wakeup */
+#define ICN8505_EVENT_UPDATE1	2 /* New or updated coordinates */
+#define ICN8505_EVENT_UPDATE2	3 /* New or updated coordinates */
+#define ICN8505_EVENT_END	4 /* Finger lifted */
+} __packed;
+
+struct icn8505_touch_data {
+	u8 softbutton;
+	u8 touch_count;
+	struct icn8505_touch touches[ICN8505_MAX_TOUCHES];
+} __packed;
+
+struct icn8505_data {
+	struct i2c_client *client;
+	struct input_dev *input;
+	struct gpio_desc *wake_gpio;
+	struct touchscreen_properties prop;
+	char firmware_name[32];
+};
+
+static int icn8505_read_xfer(struct i2c_client *client, u16 i2c_addr,
+			     int reg_addr, int reg_addr_width,
+			     void *data, int len, bool silent)
+{
+	u8 buf[3];
+	int i, ret;
+	struct i2c_msg msg[2] = {
+		{
+			.addr = i2c_addr,
+			.buf = buf,
+			.len = reg_addr_width / 8,
+		},
+		{
+			.addr = i2c_addr,
+			.flags = I2C_M_RD,
+			.buf = data,
+			.len = len,
+		}
+	};
+
+	for (i = 0; i < (reg_addr_width / 8); i++)
+		buf[i] = (reg_addr >> (reg_addr_width - (i + 1) * 8)) & 0xff;
+
+	ret = i2c_transfer(client->adapter, msg, 2);
+	if (ret != ARRAY_SIZE(msg)) {
+		if (ret >= 0)
+			ret = -EIO;
+		if (!silent)
+			dev_err(&client->dev,
+				"Error reading addr %#x reg %#x: %d\n",
+				i2c_addr, reg_addr, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int icn8505_write_xfer(struct i2c_client *client, u16 i2c_addr,
+			      int reg_addr, int reg_addr_width,
+			      const void *data, int len, bool silent)
+{
+	u8 buf[3 + 32]; /* 3 bytes for 24 bit reg-addr + 32 bytes max len */
+	int i, ret;
+	struct i2c_msg msg = {
+		.addr = i2c_addr,
+		.buf = buf,
+		.len = reg_addr_width / 8 + len,
+	};
+
+	if (WARN_ON(len > 32))
+		return -EINVAL;
+
+	for (i = 0; i < (reg_addr_width / 8); i++)
+		buf[i] = (reg_addr >> (reg_addr_width - (i + 1) * 8)) & 0xff;
+
+	memcpy(buf + reg_addr_width / 8, data, len);
+
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret != 1) {
+		if (ret >= 0)
+			ret = -EIO;
+		if (!silent)
+			dev_err(&client->dev,
+				"Error writing addr %#x reg %#x: %d\n",
+				i2c_addr, reg_addr, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int icn8505_read_data(struct icn8505_data *icn8505, int reg,
+			     void *buf, int len)
+{
+	return icn8505_read_xfer(icn8505->client, icn8505->client->addr, reg,
+				 ICN8505_REG_ADDR_WIDTH, buf, len, false);
+}
+
+static int icn8505_read_reg_silent(struct icn8505_data *icn8505, int reg)
+{
+	u8 buf;
+	int error;
+
+	error = icn8505_read_xfer(icn8505->client, icn8505->client->addr, reg,
+				  ICN8505_REG_ADDR_WIDTH, &buf, 1, true);
+	if (error)
+		return error;
+
+	return buf;
+}
+
+static int icn8505_write_reg(struct icn8505_data *icn8505, int reg, u8 val)
+{
+	return icn8505_write_xfer(icn8505->client, icn8505->client->addr, reg,
+				  ICN8505_REG_ADDR_WIDTH, &val, 1, false);
+}
+
+static int icn8505_read_prog_data(struct icn8505_data *icn8505, int reg,
+				  void *buf, int len)
+{
+	return icn8505_read_xfer(icn8505->client, ICN8505_PROG_I2C_ADDR, reg,
+				 ICN8505_PROG_REG_ADDR_WIDTH, buf, len, false);
+}
+
+static int icn8505_write_prog_data(struct icn8505_data *icn8505, int reg,
+				   const void *buf, int len)
+{
+	return icn8505_write_xfer(icn8505->client, ICN8505_PROG_I2C_ADDR, reg,
+				  ICN8505_PROG_REG_ADDR_WIDTH, buf, len, false);
+}
+
+static int icn8505_write_prog_reg(struct icn8505_data *icn8505, int reg, u8 val)
+{
+	return icn8505_write_xfer(icn8505->client, ICN8505_PROG_I2C_ADDR, reg,
+				  ICN8505_PROG_REG_ADDR_WIDTH, &val, 1, false);
+}
+
+/*
+ * Note this function uses a number of magic register addresses and values,
+ * there are deliberately no defines for these because the algorithm is taken
+ * from the icn85xx Android driver and I do not want to make up possibly wrong
+ * names for the addresses and/or values.
+ */
+static int icn8505_try_fw_upload(struct icn8505_data *icn8505,
+				 const struct firmware *fw)
+{
+	struct device *dev = &icn8505->client->dev;
+	size_t offset, count;
+	int error;
+	u8 buf[4];
+	u32 crc;
+
+	/* Put the controller in programming mode */
+	error = icn8505_write_prog_reg(icn8505, 0xcc3355, 0x5a);
+	if (error)
+		return error;
+
+	usleep_range(2000, 5000);
+
+	error = icn8505_write_prog_reg(icn8505, 0x040400, 0x01);
+	if (error)
+		return error;
+
+	usleep_range(2000, 5000);
+
+	error = icn8505_read_prog_data(icn8505, 0x040002, buf, 1);
+	if (error)
+		return error;
+
+	if (buf[0] != 0x85) {
+		dev_err(dev, "Failed to enter programming mode\n");
+		return -ENODEV;
+	}
+
+	usleep_range(1000, 5000);
+
+	/* Enable CRC mode */
+	error = icn8505_write_prog_reg(icn8505, 0x40028, 1);
+	if (error)
+		return error;
+
+	/* Send the firmware to SRAM */
+	for (offset = 0; offset < fw->size; offset += count) {
+		count = min_t(size_t, fw->size - offset, 32);
+		error = icn8505_write_prog_data(icn8505, offset,
+					      fw->data + offset, count);
+		if (error)
+			return error;
+	}
+
+	/* Disable CRC mode */
+	error = icn8505_write_prog_reg(icn8505, 0x40028, 0);
+	if (error)
+		return error;
+
+	/* Get and check length and CRC */
+	error = icn8505_read_prog_data(icn8505, 0x40034, buf, 2);
+	if (error)
+		return error;
+
+	if (get_unaligned_le16(buf) != fw->size) {
+		dev_warn(dev, "Length mismatch after uploading fw\n");
+		return -EIO;
+	}
+
+	error = icn8505_read_prog_data(icn8505, 0x4002c, buf, 4);
+	if (error)
+		return error;
+
+	crc = crc32_be(0, fw->data, fw->size);
+	if (get_unaligned_le32(buf) != crc) {
+		dev_warn(dev, "CRC mismatch after uploading fw\n");
+		return -EIO;
+	}
+
+	/* Boot controller from SRAM */
+	error = icn8505_write_prog_reg(icn8505, 0x40400, 0x03);
+	if (error)
+		return error;
+
+	usleep_range(2000, 5000);
+	return 0;
+}
+
+static int icn8505_upload_fw(struct icn8505_data *icn8505)
+{
+	struct device *dev = &icn8505->client->dev;
+	const struct firmware *fw;
+	int i, error;
+
+	/*
+	 * Always load the firmware, even if we don't need it at boot, we
+	 * we may need it at resume. Having loaded it once will make the
+	 * firmware class code cache it at suspend/resume.
+	 */
+	error = request_firmware(&fw, icn8505->firmware_name, dev);
+	if (error) {
+		dev_err(dev, "Firmware request error %d\n", error);
+		return error;
+	}
+
+	/* Check if the controller is not already up and running */
+	if (icn8505_read_reg_silent(icn8505, 0x000a) == 0x85)
+		goto success;
+
+	for (i = 1; i <= MAX_FW_UPLOAD_TRIES; i++) {
+		error = icn8505_try_fw_upload(icn8505, fw);
+		if (!error)
+			goto success;
+
+		dev_err(dev, "Failed to upload firmware: %d (attempt %d/%d)\n",
+			error, i, MAX_FW_UPLOAD_TRIES);
+		usleep_range(2000, 5000);
+	}
+
+success:
+	release_firmware(fw);
+	return error;
+}
+
+static bool icn8505_touch_active(u8 event)
+{
+	return event == ICN8505_EVENT_UPDATE1 ||
+	       event == ICN8505_EVENT_UPDATE2;
+}
+
+static irqreturn_t icn8505_irq(int irq, void *dev_id)
+{
+	struct icn8505_data *icn8505 = dev_id;
+	struct device *dev = &icn8505->client->dev;
+	struct icn8505_touch_data touch_data;
+	int i, error;
+
+	error = icn8505_read_data(icn8505, ICN8505_REG_TOUCHDATA,
+				  &touch_data, sizeof(touch_data));
+	if (error) {
+		dev_err(dev, "Error reading touch data: %d\n", error);
+		return IRQ_HANDLED;
+	}
+
+	if (touch_data.touch_count > ICN8505_MAX_TOUCHES) {
+		dev_warn(dev, "Too many touches %d > %d\n",
+			 touch_data.touch_count, ICN8505_MAX_TOUCHES);
+		touch_data.touch_count = ICN8505_MAX_TOUCHES;
+	}
+
+	for (i = 0; i < touch_data.touch_count; i++) {
+		struct icn8505_touch *touch = &touch_data.touches[i];
+		bool act = icn8505_touch_active(touch->event);
+
+		input_mt_slot(icn8505->input, touch->slot);
+		input_mt_report_slot_state(icn8505->input, MT_TOOL_FINGER, act);
+		if (!act)
+			continue;
+
+		touchscreen_report_pos(icn8505->input, &icn8505->prop,
+				       get_unaligned_le16(touch->x),
+				       get_unaligned_le16(touch->y),
+				       true);
+	}
+
+	input_mt_sync_frame(icn8505->input);
+	input_report_key(icn8505->input, KEY_LEFTMETA,
+			 touch_data.softbutton == 1);
+	input_sync(icn8505->input);
+
+	return IRQ_HANDLED;
+}
+
+static int icn8505_probe_acpi(struct icn8505_data *icn8505, struct device *dev)
+{
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	const char *subsys = "unknown";
+	struct acpi_device *adev;
+	union acpi_object *obj;
+	acpi_status status;
+
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
+		return -ENODEV;
+
+	status = acpi_evaluate_object(adev->handle, "_SUB", NULL, &buffer);
+	if (ACPI_SUCCESS(status)) {
+		obj = buffer.pointer;
+		if (obj->type == ACPI_TYPE_STRING)
+			subsys = obj->string.pointer;
+		else
+			dev_warn(dev, "Warning ACPI _SUB did not return a string\n");
+	} else {
+		dev_warn(dev, "Warning ACPI _SUB failed: %#x\n", status);
+		buffer.pointer = NULL;
+	}
+
+	snprintf(icn8505->firmware_name, sizeof(icn8505->firmware_name),
+		 "chipone/icn8505-%s.fw", subsys);
+
+	kfree(buffer.pointer);
+	return 0;
+}
+
+static int icn8505_probe(struct i2c_client *client)
+{
+	struct device *dev = &client->dev;
+	struct icn8505_data *icn8505;
+	struct input_dev *input;
+	__le16 resolution[2];
+	int error;
+
+	if (!client->irq) {
+		dev_err(dev, "No irq specified\n");
+		return -EINVAL;
+	}
+
+	icn8505 = devm_kzalloc(dev, sizeof(*icn8505), GFP_KERNEL);
+	if (!icn8505)
+		return -ENOMEM;
+
+	input = devm_input_allocate_device(dev);
+	if (!input)
+		return -ENOMEM;
+
+	input->name = client->name;
+	input->id.bustype = BUS_I2C;
+
+	input_set_capability(input, EV_ABS, ABS_MT_POSITION_X);
+	input_set_capability(input, EV_ABS, ABS_MT_POSITION_Y);
+	input_set_capability(input, EV_KEY, KEY_LEFTMETA);
+
+	icn8505->client = client;
+	icn8505->input = input;
+	input_set_drvdata(input, icn8505);
+
+	error = icn8505_probe_acpi(icn8505, dev);
+	if (error)
+		return error;
+
+	error = icn8505_upload_fw(icn8505);
+	if (error)
+		return error;
+
+	error = icn8505_read_data(icn8505, ICN8505_REG_CONFIGDATA,
+				resolution, sizeof(resolution));
+	if (error) {
+		dev_err(dev, "Error reading resolution: %d\n", error);
+		return error;
+	}
+
+	input_set_abs_params(input, ABS_MT_POSITION_X, 0,
+			     le16_to_cpu(resolution[0]) - 1, 0, 0);
+	input_set_abs_params(input, ABS_MT_POSITION_Y, 0,
+			     le16_to_cpu(resolution[1]) - 1, 0, 0);
+
+	touchscreen_parse_properties(input, true, &icn8505->prop);
+	if (!input_abs_get_max(input, ABS_MT_POSITION_X) ||
+	    !input_abs_get_max(input, ABS_MT_POSITION_Y)) {
+		dev_err(dev, "Error touchscreen-size-x and/or -y missing\n");
+		return -EINVAL;
+	}
+
+	error = input_mt_init_slots(input, ICN8505_MAX_TOUCHES,
+				  INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
+	if (error)
+		return error;
+
+	error = devm_request_threaded_irq(dev, client->irq, NULL, icn8505_irq,
+					IRQF_ONESHOT, client->name, icn8505);
+	if (error) {
+		dev_err(dev, "Error requesting irq: %d\n", error);
+		return error;
+	}
+
+	error = input_register_device(input);
+	if (error)
+		return error;
+
+	i2c_set_clientdata(client, icn8505);
+	return 0;
+}
+
+static int __maybe_unused icn8505_suspend(struct device *dev)
+{
+	struct icn8505_data *icn8505 = i2c_get_clientdata(to_i2c_client(dev));
+
+	disable_irq(icn8505->client->irq);
+
+	icn8505_write_reg(icn8505, ICN8505_REG_POWER, ICN8505_POWER_HIBERNATE);
+
+	return 0;
+}
+
+static int __maybe_unused icn8505_resume(struct device *dev)
+{
+	struct icn8505_data *icn8505 = i2c_get_clientdata(to_i2c_client(dev));
+	int error;
+
+	error = icn8505_upload_fw(icn8505);
+	if (error)
+		return error;
+
+	enable_irq(icn8505->client->irq);
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(icn8505_pm_ops, icn8505_suspend, icn8505_resume);
+
+static const struct acpi_device_id icn8505_acpi_match[] = {
+	{ "CHPN0001" },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, icn8505_acpi_match);
+
+static struct i2c_driver icn8505_driver = {
+	.driver = {
+		.name	= "chipone_icn8505",
+		.pm	= &icn8505_pm_ops,
+		.acpi_match_table = icn8505_acpi_match,
+	},
+	.probe_new = icn8505_probe,
+};
+
+module_i2c_driver(icn8505_driver);
+
+MODULE_DESCRIPTION("ChipOne icn8505 I2C Touchscreen Driver");
+MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
index 9736c83..f2d9c2c 100644
--- a/drivers/input/touchscreen/goodix.c
+++ b/drivers/input/touchscreen/goodix.c
@@ -933,6 +933,7 @@
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id goodix_acpi_match[] = {
 	{ "GDIX1001", 0 },
+	{ "GDIX1002", 0 },
 	{ }
 };
 MODULE_DEVICE_TABLE(acpi, goodix_acpi_match);
diff --git a/drivers/input/touchscreen/mk712.c b/drivers/input/touchscreen/mk712.c
index bd53528..c179060 100644
--- a/drivers/input/touchscreen/mk712.c
+++ b/drivers/input/touchscreen/mk712.c
@@ -17,7 +17,7 @@
  * found in Gateway AOL Connected Touchpad computers.
  *
  * Documentation for ICS MK712 can be found at:
- *	http://www.idt.com/products/getDoc.cfm?docID=18713923
+ *	https://www.idt.com/general-parts/mk712-touch-screen-controller
  */
 
 /*
diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c
index f1043ae..b86c1e5 100644
--- a/drivers/input/touchscreen/ti_am335x_tsc.c
+++ b/drivers/input/touchscreen/ti_am335x_tsc.c
@@ -34,6 +34,8 @@
 #define SEQ_SETTLE		275
 #define MAX_12BIT		((1 << 12) - 1)
 
+#define TSC_IRQENB_MASK		(IRQENB_FIFO0THRES | IRQENB_EOS | IRQENB_HW_PEN)
+
 static const int config_pins[] = {
 	STEPCONFIG_XPP,
 	STEPCONFIG_XNN,
@@ -274,6 +276,7 @@
 	if (status & IRQENB_HW_PEN) {
 		ts_dev->pen_down = true;
 		irqclr |= IRQENB_HW_PEN;
+		pm_stay_awake(ts_dev->mfd_tscadc->dev);
 	}
 
 	if (status & IRQENB_PENUP) {
@@ -283,6 +286,7 @@
 			input_report_key(input_dev, BTN_TOUCH, 0);
 			input_report_abs(input_dev, ABS_PRESSURE, 0);
 			input_sync(input_dev);
+			pm_relax(ts_dev->mfd_tscadc->dev);
 		} else {
 			ts_dev->pen_down = true;
 		}
@@ -432,6 +436,7 @@
 		goto err_free_mem;
 	}
 
+	titsc_writel(ts_dev, REG_IRQSTATUS, TSC_IRQENB_MASK);
 	titsc_writel(ts_dev, REG_IRQENABLE, IRQENB_FIFO0THRES);
 	titsc_writel(ts_dev, REG_IRQENABLE, IRQENB_EOS);
 	err = titsc_config_wires(ts_dev);
@@ -495,6 +500,7 @@
 
 	tscadc_dev = ti_tscadc_dev_get(to_platform_device(dev));
 	if (device_may_wakeup(tscadc_dev->dev)) {
+		titsc_writel(ts_dev, REG_IRQSTATUS, TSC_IRQENB_MASK);
 		idle = titsc_readl(ts_dev, REG_IRQENABLE);
 		titsc_writel(ts_dev, REG_IRQENABLE,
 				(idle | IRQENB_HW_PEN));
@@ -513,6 +519,7 @@
 		titsc_writel(ts_dev, REG_IRQWAKEUP,
 				0x00);
 		titsc_writel(ts_dev, REG_IRQCLR, IRQENB_HW_PEN);
+		pm_relax(ts_dev->mfd_tscadc->dev);
 	}
 	titsc_step_config(ts_dev);
 	titsc_writel(ts_dev, REG_FIFO0THR,
diff --git a/drivers/input/touchscreen/usbtouchscreen.c b/drivers/input/touchscreen/usbtouchscreen.c
index c6cf908..d61570d 100644
--- a/drivers/input/touchscreen/usbtouchscreen.c
+++ b/drivers/input/touchscreen/usbtouchscreen.c
@@ -440,6 +440,8 @@
 #define MTOUCHUSB_RESET                 7
 #define MTOUCHUSB_REQ_CTRLLR_ID         10
 
+#define MTOUCHUSB_REQ_CTRLLR_ID_LEN	16
+
 static int mtouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt)
 {
 	if (hwcalib_xy) {
@@ -454,11 +456,93 @@
 	return 1;
 }
 
+struct mtouch_priv {
+	u8 fw_rev_major;
+	u8 fw_rev_minor;
+};
+
+static ssize_t mtouch_firmware_rev_show(struct device *dev,
+				struct device_attribute *attr, char *output)
+{
+	struct usb_interface *intf = to_usb_interface(dev);
+	struct usbtouch_usb *usbtouch = usb_get_intfdata(intf);
+	struct mtouch_priv *priv = usbtouch->priv;
+
+	return scnprintf(output, PAGE_SIZE, "%1x.%1x\n",
+			 priv->fw_rev_major, priv->fw_rev_minor);
+}
+static DEVICE_ATTR(firmware_rev, 0444, mtouch_firmware_rev_show, NULL);
+
+static struct attribute *mtouch_attrs[] = {
+	&dev_attr_firmware_rev.attr,
+	NULL
+};
+
+static const struct attribute_group mtouch_attr_group = {
+	.attrs = mtouch_attrs,
+};
+
+static int mtouch_get_fw_revision(struct usbtouch_usb *usbtouch)
+{
+	struct usb_device *udev = interface_to_usbdev(usbtouch->interface);
+	struct mtouch_priv *priv = usbtouch->priv;
+	u8 *buf;
+	int ret;
+
+	buf = kzalloc(MTOUCHUSB_REQ_CTRLLR_ID_LEN, GFP_NOIO);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
+			      MTOUCHUSB_REQ_CTRLLR_ID,
+			      USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+			      0, 0, buf, MTOUCHUSB_REQ_CTRLLR_ID_LEN,
+			      USB_CTRL_SET_TIMEOUT);
+	if (ret != MTOUCHUSB_REQ_CTRLLR_ID_LEN) {
+		dev_warn(&usbtouch->interface->dev,
+			 "Failed to read FW rev: %d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto free;
+	}
+
+	priv->fw_rev_major = buf[3];
+	priv->fw_rev_minor = buf[4];
+
+	ret = 0;
+
+free:
+	kfree(buf);
+	return ret;
+}
+
+static int mtouch_alloc(struct usbtouch_usb *usbtouch)
+{
+	int ret;
+
+	usbtouch->priv = kmalloc(sizeof(struct mtouch_priv), GFP_KERNEL);
+	if (!usbtouch->priv)
+		return -ENOMEM;
+
+	ret = sysfs_create_group(&usbtouch->interface->dev.kobj,
+				 &mtouch_attr_group);
+	if (ret) {
+		kfree(usbtouch->priv);
+		usbtouch->priv = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
 static int mtouch_init(struct usbtouch_usb *usbtouch)
 {
 	int ret, i;
 	struct usb_device *udev = interface_to_usbdev(usbtouch->interface);
 
+	ret = mtouch_get_fw_revision(usbtouch);
+	if (ret)
+		return ret;
+
 	ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
 	                      MTOUCHUSB_RESET,
 	                      USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
@@ -492,6 +576,14 @@
 
 	return 0;
 }
+
+static void mtouch_exit(struct usbtouch_usb *usbtouch)
+{
+	struct mtouch_priv *priv = usbtouch->priv;
+
+	sysfs_remove_group(&usbtouch->interface->dev.kobj, &mtouch_attr_group);
+	kfree(priv);
+}
 #endif
 
 
@@ -1119,7 +1211,9 @@
 		.max_yc		= 0x4000,
 		.rept_size	= 11,
 		.read_data	= mtouch_read_data,
+		.alloc		= mtouch_alloc,
 		.init		= mtouch_init,
+		.exit		= mtouch_exit,
 	},
 #endif
 
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index df171cb..5b714a0 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -146,6 +146,7 @@
 	select DMA_DIRECT_OPS
 	select IOMMU_API
 	select IOMMU_IOVA
+	select NEED_DMA_MAP_STATE
 	select DMAR_TABLE
 	help
 	  DMA remapping (DMAR) devices support enables independent address
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 5ed465a..15f268f 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -27,7 +27,7 @@
 obj-$(CONFIG_ARM_GIC_PM)		+= irq-gic-pm.o
 obj-$(CONFIG_ARCH_REALVIEW)		+= irq-gic-realview.o
 obj-$(CONFIG_ARM_GIC_V2M)		+= irq-gic-v2m.o
-obj-$(CONFIG_ARM_GIC_V3)		+= irq-gic-v3.o irq-gic-common.o
+obj-$(CONFIG_ARM_GIC_V3)		+= irq-gic-v3.o irq-gic-v3-mbi.o irq-gic-common.o
 obj-$(CONFIG_ARM_GIC_V3_ITS)		+= irq-gic-v3-its.o irq-gic-v3-its-platform-msi.o irq-gic-v4.o
 obj-$(CONFIG_ARM_GIC_V3_ITS_PCI)	+= irq-gic-v3-its-pci-msi.o
 obj-$(CONFIG_ARM_GIC_V3_ITS_FSL_MC)	+= irq-gic-v3-its-fsl-mc-msi.o
diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c
new file mode 100644
index 0000000..ad70e7c
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v3-mbi.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#define pr_fmt(fmt) "GICv3: " fmt
+
+#include <linux/dma-iommu.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+struct mbi_range {
+	u32			spi_start;
+	u32			nr_spis;
+	unsigned long		*bm;
+};
+
+static struct mutex		mbi_lock;
+static phys_addr_t		mbi_phys_base;
+static struct mbi_range		*mbi_ranges;
+static unsigned int		mbi_range_nr;
+
+static struct irq_chip mbi_irq_chip = {
+	.name			= "MBI",
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_type		= irq_chip_set_type_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+};
+
+static int mbi_irq_gic_domain_alloc(struct irq_domain *domain,
+				       unsigned int virq,
+				       irq_hw_number_t hwirq)
+{
+	struct irq_fwspec fwspec;
+	struct irq_data *d;
+	int err;
+
+	/*
+	 * Using ACPI? There is no MBI support in the spec, you
+	 * shouldn't even be here.
+	 */
+	if (!is_of_node(domain->parent->fwnode))
+		return -EINVAL;
+
+	/*
+	 * Let's default to edge. This is consistent with traditional
+	 * MSIs, and systems requiring level signaling will just
+	 * enforce the trigger on their own.
+	 */
+	fwspec.fwnode = domain->parent->fwnode;
+	fwspec.param_count = 3;
+	fwspec.param[0] = 0;
+	fwspec.param[1] = hwirq - 32;
+	fwspec.param[2] = IRQ_TYPE_EDGE_RISING;
+
+	err = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+	if (err)
+		return err;
+
+	d = irq_domain_get_irq_data(domain->parent, virq);
+	return d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING);
+}
+
+static void mbi_free_msi(struct mbi_range *mbi, unsigned int hwirq,
+			 int nr_irqs)
+{
+	mutex_lock(&mbi_lock);
+	bitmap_release_region(mbi->bm, hwirq - mbi->spi_start,
+			      get_count_order(nr_irqs));
+	mutex_unlock(&mbi_lock);
+}
+
+static int mbi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				   unsigned int nr_irqs, void *args)
+{
+	struct mbi_range *mbi = NULL;
+	int hwirq, offset, i, err = 0;
+
+	mutex_lock(&mbi_lock);
+	for (i = 0; i < mbi_range_nr; i++) {
+		offset = bitmap_find_free_region(mbi_ranges[i].bm,
+						 mbi_ranges[i].nr_spis,
+						 get_count_order(nr_irqs));
+		if (offset >= 0) {
+			mbi = &mbi_ranges[i];
+			break;
+		}
+	}
+	mutex_unlock(&mbi_lock);
+
+	if (!mbi)
+		return -ENOSPC;
+
+	hwirq = mbi->spi_start + offset;
+
+	for (i = 0; i < nr_irqs; i++) {
+		err = mbi_irq_gic_domain_alloc(domain, virq + i, hwirq + i);
+		if (err)
+			goto fail;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+					      &mbi_irq_chip, mbi);
+	}
+
+	return 0;
+
+fail:
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+	mbi_free_msi(mbi, hwirq, nr_irqs);
+	return err;
+}
+
+static void mbi_irq_domain_free(struct irq_domain *domain,
+				unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct mbi_range *mbi = irq_data_get_irq_chip_data(d);
+
+	mbi_free_msi(mbi, d->hwirq, nr_irqs);
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops mbi_domain_ops = {
+	.alloc			= mbi_irq_domain_alloc,
+	.free			= mbi_irq_domain_free,
+};
+
+static void mbi_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	msg[0].address_hi = upper_32_bits(mbi_phys_base + GICD_SETSPI_NSR);
+	msg[0].address_lo = lower_32_bits(mbi_phys_base + GICD_SETSPI_NSR);
+	msg[0].data = data->parent_data->hwirq;
+
+	iommu_dma_map_msi_msg(data->irq, msg);
+}
+
+#ifdef CONFIG_PCI_MSI
+/* PCI-specific irqchip */
+static void mbi_mask_msi_irq(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void mbi_unmask_msi_irq(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip mbi_msi_irq_chip = {
+	.name			= "MSI",
+	.irq_mask		= mbi_mask_msi_irq,
+	.irq_unmask		= mbi_unmask_msi_irq,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_compose_msi_msg	= mbi_compose_msi_msg,
+	.irq_write_msi_msg	= pci_msi_domain_write_msg,
+};
+
+static struct msi_domain_info mbi_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_PCI_MSIX | MSI_FLAG_MULTI_PCI_MSI),
+	.chip	= &mbi_msi_irq_chip,
+};
+
+static int mbi_allocate_pci_domain(struct irq_domain *nexus_domain,
+				   struct irq_domain **pci_domain)
+{
+	*pci_domain = pci_msi_create_irq_domain(nexus_domain->parent->fwnode,
+						&mbi_msi_domain_info,
+						nexus_domain);
+	if (!*pci_domain)
+		return -ENOMEM;
+
+	return 0;
+}
+#else
+static int mbi_allocate_pci_domain(struct irq_domain *nexus_domain,
+				   struct irq_domain **pci_domain)
+{
+	*pci_domain = NULL;
+	return 0;
+}
+#endif
+
+static void mbi_compose_mbi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	mbi_compose_msi_msg(data, msg);
+
+	msg[1].address_hi = upper_32_bits(mbi_phys_base + GICD_CLRSPI_NSR);
+	msg[1].address_lo = lower_32_bits(mbi_phys_base + GICD_CLRSPI_NSR);
+	msg[1].data = data->parent_data->hwirq;
+
+	iommu_dma_map_msi_msg(data->irq, &msg[1]);
+}
+
+/* Platform-MSI specific irqchip */
+static struct irq_chip mbi_pmsi_irq_chip = {
+	.name			= "pMSI",
+	.irq_set_type		= irq_chip_set_type_parent,
+	.irq_compose_msi_msg	= mbi_compose_mbi_msg,
+	.flags			= IRQCHIP_SUPPORTS_LEVEL_MSI,
+};
+
+static struct msi_domain_ops mbi_pmsi_ops = {
+};
+
+static struct msi_domain_info mbi_pmsi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_LEVEL_CAPABLE),
+	.ops	= &mbi_pmsi_ops,
+	.chip	= &mbi_pmsi_irq_chip,
+};
+
+static int mbi_allocate_domains(struct irq_domain *parent)
+{
+	struct irq_domain *nexus_domain, *pci_domain, *plat_domain;
+	int err;
+
+	nexus_domain = irq_domain_create_tree(parent->fwnode,
+					      &mbi_domain_ops, NULL);
+	if (!nexus_domain)
+		return -ENOMEM;
+
+	irq_domain_update_bus_token(nexus_domain, DOMAIN_BUS_NEXUS);
+	nexus_domain->parent = parent;
+
+	err = mbi_allocate_pci_domain(nexus_domain, &pci_domain);
+
+	plat_domain = platform_msi_create_irq_domain(parent->fwnode,
+						     &mbi_pmsi_domain_info,
+						     nexus_domain);
+
+	if (err || !plat_domain) {
+		if (plat_domain)
+			irq_domain_remove(plat_domain);
+		if (pci_domain)
+			irq_domain_remove(pci_domain);
+		irq_domain_remove(nexus_domain);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+int __init mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent)
+{
+	struct device_node *np;
+	const __be32 *reg;
+	int ret, n;
+
+	np = to_of_node(fwnode);
+
+	if (!of_property_read_bool(np, "msi-controller"))
+		return 0;
+
+	n = of_property_count_elems_of_size(np, "mbi-ranges", sizeof(u32));
+	if (n <= 0 || n % 2)
+		return -EINVAL;
+
+	mbi_range_nr = n / 2;
+	mbi_ranges = kcalloc(mbi_range_nr, sizeof(*mbi_ranges), GFP_KERNEL);
+	if (!mbi_ranges)
+		return -ENOMEM;
+
+	for (n = 0; n < mbi_range_nr; n++) {
+		ret = of_property_read_u32_index(np, "mbi-ranges", n * 2,
+						 &mbi_ranges[n].spi_start);
+		if (ret)
+			goto err_free_mbi;
+		ret = of_property_read_u32_index(np, "mbi-ranges", n * 2 + 1,
+						 &mbi_ranges[n].nr_spis);
+		if (ret)
+			goto err_free_mbi;
+
+		mbi_ranges[n].bm = kcalloc(BITS_TO_LONGS(mbi_ranges[n].nr_spis),
+					   sizeof(long), GFP_KERNEL);
+		if (!mbi_ranges[n].bm) {
+			ret = -ENOMEM;
+			goto err_free_mbi;
+		}
+		pr_info("MBI range [%d:%d]\n", mbi_ranges[n].spi_start,
+			mbi_ranges[n].spi_start + mbi_ranges[n].nr_spis - 1);
+	}
+
+	reg = of_get_property(np, "mbi-alias", NULL);
+	if (reg) {
+		mbi_phys_base = of_translate_address(np, reg);
+		if (mbi_phys_base == OF_BAD_ADDR) {
+			ret = -ENXIO;
+			goto err_free_mbi;
+		}
+	} else {
+		struct resource res;
+
+		if (of_address_to_resource(np, 0, &res)) {
+			ret = -ENXIO;
+			goto err_free_mbi;
+		}
+
+		mbi_phys_base = res.start;
+	}
+
+	pr_info("Using MBI frame %pa\n", &mbi_phys_base);
+
+	ret = mbi_allocate_domains(parent);
+	if (ret)
+		goto err_free_mbi;
+
+	return 0;
+
+err_free_mbi:
+	if (mbi_ranges) {
+		for (n = 0; n < mbi_range_nr; n++)
+			kfree(mbi_ranges[n].bm);
+		kfree(mbi_ranges);
+	}
+
+	return ret;
+}
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index e5d1014..5a67ec0 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1099,6 +1099,7 @@
 
 	gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops,
 						 &gic_data);
+	irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED);
 	gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist));
 	gic_data.rdists.has_vlpis = true;
 	gic_data.rdists.has_direct_lpi = true;
@@ -1112,6 +1113,12 @@
 	pr_info("Distributor has %sRange Selector support\n",
 		gic_data.has_rss ? "" : "no ");
 
+	if (typer & GICD_TYPER_MBIS) {
+		err = mbi_init(handle, gic_data.domain);
+		if (err)
+			pr_err("Failed to initialize MBIs\n");
+	}
+
 	set_handle_irq(gic_handle_irq);
 
 	gic_update_vlpi_properties();
diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
index a59bdbc..7b531fd 100644
--- a/drivers/irqchip/irq-meson-gpio.c
+++ b/drivers/irqchip/irq-meson-gpio.c
@@ -63,11 +63,16 @@
 	.nr_hwirq = 110,
 };
 
+static const struct meson_gpio_irq_params axg_params = {
+	.nr_hwirq = 100,
+};
+
 static const struct of_device_id meson_irq_gpio_matches[] = {
 	{ .compatible = "amlogic,meson8-gpio-intc", .data = &meson8_params },
 	{ .compatible = "amlogic,meson8b-gpio-intc", .data = &meson8b_params },
 	{ .compatible = "amlogic,meson-gxbb-gpio-intc", .data = &gxbb_params },
 	{ .compatible = "amlogic,meson-gxl-gpio-intc", .data = &gxl_params },
+	{ .compatible = "amlogic,meson-axg-gpio-intc", .data = &axg_params },
 	{ }
 };
 
diff --git a/drivers/irqchip/irq-mvebu-gicp.c b/drivers/irqchip/irq-mvebu-gicp.c
index 17a4a7b..4e17f70 100644
--- a/drivers/irqchip/irq-mvebu-gicp.c
+++ b/drivers/irqchip/irq-mvebu-gicp.c
@@ -19,8 +19,6 @@
 
 #include <dt-bindings/interrupt-controller/arm-gic.h>
 
-#include "irq-mvebu-gicp.h"
-
 #define GICP_SETSPI_NSR_OFFSET	0x0
 #define GICP_CLRSPI_NSR_OFFSET	0x8
 
@@ -55,34 +53,18 @@
 	return -EINVAL;
 }
 
-int mvebu_gicp_get_doorbells(struct device_node *dn, phys_addr_t *setspi,
-			     phys_addr_t *clrspi)
-{
-	struct platform_device *pdev;
-	struct mvebu_gicp *gicp;
-
-	pdev = of_find_device_by_node(dn);
-	if (!pdev)
-		return -ENODEV;
-
-	gicp = platform_get_drvdata(pdev);
-	if (!gicp)
-		return -ENODEV;
-
-	*setspi = gicp->res->start + GICP_SETSPI_NSR_OFFSET;
-	*clrspi = gicp->res->start + GICP_CLRSPI_NSR_OFFSET;
-
-	return 0;
-}
-
 static void gicp_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
 {
 	struct mvebu_gicp *gicp = data->chip_data;
 	phys_addr_t setspi = gicp->res->start + GICP_SETSPI_NSR_OFFSET;
+	phys_addr_t clrspi = gicp->res->start + GICP_CLRSPI_NSR_OFFSET;
 
-	msg->data = data->hwirq;
-	msg->address_lo = lower_32_bits(setspi);
-	msg->address_hi = upper_32_bits(setspi);
+	msg[0].data = data->hwirq;
+	msg[0].address_lo = lower_32_bits(setspi);
+	msg[0].address_hi = upper_32_bits(setspi);
+	msg[1].data = data->hwirq;
+	msg[1].address_lo = lower_32_bits(clrspi);
+	msg[1].address_hi = upper_32_bits(clrspi);
 }
 
 static struct irq_chip gicp_irq_chip = {
@@ -170,13 +152,15 @@
 static struct irq_chip gicp_msi_irq_chip = {
 	.name		= "GICP",
 	.irq_set_type	= irq_chip_set_type_parent,
+	.flags		= IRQCHIP_SUPPORTS_LEVEL_MSI,
 };
 
 static struct msi_domain_ops gicp_msi_ops = {
 };
 
 static struct msi_domain_info gicp_msi_domain_info = {
-	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS),
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_LEVEL_CAPABLE),
 	.ops	= &gicp_msi_ops,
 	.chip	= &gicp_msi_irq_chip,
 };
diff --git a/drivers/irqchip/irq-mvebu-gicp.h b/drivers/irqchip/irq-mvebu-gicp.h
deleted file mode 100644
index eaa12fb..0000000
--- a/drivers/irqchip/irq-mvebu-gicp.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __MVEBU_GICP_H__
-#define __MVEBU_GICP_H__
-
-#include <linux/types.h>
-
-struct device_node;
-
-int mvebu_gicp_get_doorbells(struct device_node *dn, phys_addr_t *setspi,
-			     phys_addr_t *clrspi);
-
-#endif /* __MVEBU_GICP_H__ */
diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c
index e18c48d..1306333 100644
--- a/drivers/irqchip/irq-mvebu-icu.c
+++ b/drivers/irqchip/irq-mvebu-icu.c
@@ -21,8 +21,6 @@
 
 #include <dt-bindings/interrupt-controller/mvebu-icu.h>
 
-#include "irq-mvebu-gicp.h"
-
 /* ICU registers */
 #define ICU_SETSPI_NSR_AL	0x10
 #define ICU_SETSPI_NSR_AH	0x14
@@ -43,6 +41,7 @@
 	void __iomem *base;
 	struct irq_domain *domain;
 	struct device *dev;
+	atomic_t initialized;
 };
 
 struct mvebu_icu_irq_data {
@@ -51,6 +50,18 @@
 	unsigned int type;
 };
 
+static void mvebu_icu_init(struct mvebu_icu *icu, struct msi_msg *msg)
+{
+	if (atomic_cmpxchg(&icu->initialized, false, true))
+		return;
+
+	/* Set Clear/Set ICU SPI message address in AP */
+	writel_relaxed(msg[0].address_hi, icu->base + ICU_SETSPI_NSR_AH);
+	writel_relaxed(msg[0].address_lo, icu->base + ICU_SETSPI_NSR_AL);
+	writel_relaxed(msg[1].address_hi, icu->base + ICU_CLRSPI_NSR_AH);
+	writel_relaxed(msg[1].address_lo, icu->base + ICU_CLRSPI_NSR_AL);
+}
+
 static void mvebu_icu_write_msg(struct msi_desc *desc, struct msi_msg *msg)
 {
 	struct irq_data *d = irq_get_irq_data(desc->irq);
@@ -59,6 +70,8 @@
 	unsigned int icu_int;
 
 	if (msg->address_lo || msg->address_hi) {
+		/* One off initialization */
+		mvebu_icu_init(icu, msg);
 		/* Configure the ICU with irq number & type */
 		icu_int = msg->data | ICU_INT_ENABLE;
 		if (icu_irqd->type & IRQ_TYPE_EDGE_RISING)
@@ -197,9 +210,7 @@
 	struct device_node *node = pdev->dev.of_node;
 	struct device_node *gicp_dn;
 	struct resource *res;
-	phys_addr_t setspi, clrspi;
-	u32 i, icu_int;
-	int ret;
+	int i;
 
 	icu = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_icu),
 			   GFP_KERNEL);
@@ -242,22 +253,12 @@
 	if (!gicp_dn)
 		return -ENODEV;
 
-	ret = mvebu_gicp_get_doorbells(gicp_dn, &setspi, &clrspi);
-	if (ret)
-		return ret;
-
-	/* Set Clear/Set ICU SPI message address in AP */
-	writel_relaxed(upper_32_bits(setspi), icu->base + ICU_SETSPI_NSR_AH);
-	writel_relaxed(lower_32_bits(setspi), icu->base + ICU_SETSPI_NSR_AL);
-	writel_relaxed(upper_32_bits(clrspi), icu->base + ICU_CLRSPI_NSR_AH);
-	writel_relaxed(lower_32_bits(clrspi), icu->base + ICU_CLRSPI_NSR_AL);
-
 	/*
 	 * Clean all ICU interrupts with type SPI_NSR, required to
 	 * avoid unpredictable SPI assignments done by firmware.
 	 */
 	for (i = 0 ; i < ICU_MAX_IRQS ; i++) {
-		icu_int = readl(icu->base + ICU_INT_CFG(i));
+		u32 icu_int = readl_relaxed(icu->base + ICU_INT_CFG(i));
 		if ((icu_int >> ICU_GROUP_SHIFT) == ICU_GRP_NSR)
 			writel_relaxed(0x0, icu->base + ICU_INT_CFG(i));
 	}
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index 36f0fbe..5089c1e 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -14,6 +14,9 @@
 #include <linux/irqdomain.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/syscore_ops.h>
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
 
 #define IRQS_PER_BANK 32
 
@@ -23,29 +26,69 @@
 	u32 rtsr_ofst;
 	u32 ftsr_ofst;
 	u32 swier_ofst;
-	u32 pr_ofst;
+	u32 rpr_ofst;
+	u32 fpr_ofst;
 };
 
+#define UNDEF_REG ~0
+
+struct stm32_desc_irq {
+	u32 exti;
+	u32 irq_parent;
+};
+
+struct stm32_exti_drv_data {
+	const struct stm32_exti_bank **exti_banks;
+	const struct stm32_desc_irq *desc_irqs;
+	u32 bank_nr;
+	u32 irq_nr;
+};
+
+struct stm32_exti_chip_data {
+	struct stm32_exti_host_data *host_data;
+	const struct stm32_exti_bank *reg_bank;
+	struct raw_spinlock rlock;
+	u32 wake_active;
+	u32 mask_cache;
+	u32 rtsr_cache;
+	u32 ftsr_cache;
+};
+
+struct stm32_exti_host_data {
+	void __iomem *base;
+	struct stm32_exti_chip_data *chips_data;
+	const struct stm32_exti_drv_data *drv_data;
+};
+
+static struct stm32_exti_host_data *stm32_host_data;
+
 static const struct stm32_exti_bank stm32f4xx_exti_b1 = {
 	.imr_ofst	= 0x00,
 	.emr_ofst	= 0x04,
 	.rtsr_ofst	= 0x08,
 	.ftsr_ofst	= 0x0C,
 	.swier_ofst	= 0x10,
-	.pr_ofst	= 0x14,
+	.rpr_ofst	= 0x14,
+	.fpr_ofst	= UNDEF_REG,
 };
 
 static const struct stm32_exti_bank *stm32f4xx_exti_banks[] = {
 	&stm32f4xx_exti_b1,
 };
 
+static const struct stm32_exti_drv_data stm32f4xx_drv_data = {
+	.exti_banks = stm32f4xx_exti_banks,
+	.bank_nr = ARRAY_SIZE(stm32f4xx_exti_banks),
+};
+
 static const struct stm32_exti_bank stm32h7xx_exti_b1 = {
 	.imr_ofst	= 0x80,
 	.emr_ofst	= 0x84,
 	.rtsr_ofst	= 0x00,
 	.ftsr_ofst	= 0x04,
 	.swier_ofst	= 0x08,
-	.pr_ofst	= 0x88,
+	.rpr_ofst	= 0x88,
+	.fpr_ofst	= UNDEF_REG,
 };
 
 static const struct stm32_exti_bank stm32h7xx_exti_b2 = {
@@ -54,7 +97,8 @@
 	.rtsr_ofst	= 0x20,
 	.ftsr_ofst	= 0x24,
 	.swier_ofst	= 0x28,
-	.pr_ofst	= 0x98,
+	.rpr_ofst	= 0x98,
+	.fpr_ofst	= UNDEF_REG,
 };
 
 static const struct stm32_exti_bank stm32h7xx_exti_b3 = {
@@ -63,7 +107,8 @@
 	.rtsr_ofst	= 0x40,
 	.ftsr_ofst	= 0x44,
 	.swier_ofst	= 0x48,
-	.pr_ofst	= 0xA8,
+	.rpr_ofst	= 0xA8,
+	.fpr_ofst	= UNDEF_REG,
 };
 
 static const struct stm32_exti_bank *stm32h7xx_exti_banks[] = {
@@ -72,18 +117,105 @@
 	&stm32h7xx_exti_b3,
 };
 
-static unsigned long stm32_exti_pending(struct irq_chip_generic *gc)
-{
-	const struct stm32_exti_bank *stm32_bank = gc->private;
+static const struct stm32_exti_drv_data stm32h7xx_drv_data = {
+	.exti_banks = stm32h7xx_exti_banks,
+	.bank_nr = ARRAY_SIZE(stm32h7xx_exti_banks),
+};
 
-	return irq_reg_readl(gc, stm32_bank->pr_ofst);
+static const struct stm32_exti_bank stm32mp1_exti_b1 = {
+	.imr_ofst	= 0x80,
+	.emr_ofst	= 0x84,
+	.rtsr_ofst	= 0x00,
+	.ftsr_ofst	= 0x04,
+	.swier_ofst	= 0x08,
+	.rpr_ofst	= 0x0C,
+	.fpr_ofst	= 0x10,
+};
+
+static const struct stm32_exti_bank stm32mp1_exti_b2 = {
+	.imr_ofst	= 0x90,
+	.emr_ofst	= 0x94,
+	.rtsr_ofst	= 0x20,
+	.ftsr_ofst	= 0x24,
+	.swier_ofst	= 0x28,
+	.rpr_ofst	= 0x2C,
+	.fpr_ofst	= 0x30,
+};
+
+static const struct stm32_exti_bank stm32mp1_exti_b3 = {
+	.imr_ofst	= 0xA0,
+	.emr_ofst	= 0xA4,
+	.rtsr_ofst	= 0x40,
+	.ftsr_ofst	= 0x44,
+	.swier_ofst	= 0x48,
+	.rpr_ofst	= 0x4C,
+	.fpr_ofst	= 0x50,
+};
+
+static const struct stm32_exti_bank *stm32mp1_exti_banks[] = {
+	&stm32mp1_exti_b1,
+	&stm32mp1_exti_b2,
+	&stm32mp1_exti_b3,
+};
+
+static const struct stm32_desc_irq stm32mp1_desc_irq[] = {
+	{ .exti = 1, .irq_parent = 7 },
+	{ .exti = 2, .irq_parent = 8 },
+	{ .exti = 3, .irq_parent = 9 },
+	{ .exti = 4, .irq_parent = 10 },
+	{ .exti = 5, .irq_parent = 23 },
+	{ .exti = 6, .irq_parent = 64 },
+	{ .exti = 7, .irq_parent = 65 },
+	{ .exti = 8, .irq_parent = 66 },
+	{ .exti = 9, .irq_parent = 67 },
+	{ .exti = 10, .irq_parent = 40 },
+	{ .exti = 11, .irq_parent = 42 },
+	{ .exti = 12, .irq_parent = 76 },
+	{ .exti = 13, .irq_parent = 77 },
+	{ .exti = 14, .irq_parent = 121 },
+	{ .exti = 15, .irq_parent = 127 },
+	{ .exti = 16, .irq_parent = 1 },
+	{ .exti = 65, .irq_parent = 144 },
+	{ .exti = 68, .irq_parent = 143 },
+	{ .exti = 73, .irq_parent = 129 },
+};
+
+static const struct stm32_exti_drv_data stm32mp1_drv_data = {
+	.exti_banks = stm32mp1_exti_banks,
+	.bank_nr = ARRAY_SIZE(stm32mp1_exti_banks),
+	.desc_irqs = stm32mp1_desc_irq,
+	.irq_nr = ARRAY_SIZE(stm32mp1_desc_irq),
+};
+
+static int stm32_exti_to_irq(const struct stm32_exti_drv_data *drv_data,
+			     irq_hw_number_t hwirq)
+{
+	const struct stm32_desc_irq *desc_irq;
+	int i;
+
+	if (!drv_data->desc_irqs)
+		return -EINVAL;
+
+	for (i = 0; i < drv_data->irq_nr; i++) {
+		desc_irq = &drv_data->desc_irqs[i];
+		if (desc_irq->exti == hwirq)
+			return desc_irq->irq_parent;
+	}
+
+	return -EINVAL;
 }
 
-static void stm32_exti_irq_ack(struct irq_chip_generic *gc, u32 mask)
+static unsigned long stm32_exti_pending(struct irq_chip_generic *gc)
 {
-	const struct stm32_exti_bank *stm32_bank = gc->private;
+	struct stm32_exti_chip_data *chip_data = gc->private;
+	const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+	unsigned long pending;
 
-	irq_reg_writel(gc, mask, stm32_bank->pr_ofst);
+	pending = irq_reg_readl(gc, stm32_bank->rpr_ofst);
+	if (stm32_bank->fpr_ofst != UNDEF_REG)
+		pending |= irq_reg_readl(gc, stm32_bank->fpr_ofst);
+
+	return pending;
 }
 
 static void stm32_irq_handler(struct irq_desc *desc)
@@ -92,7 +224,6 @@
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned int virq, nbanks = domain->gc->num_chips;
 	struct irq_chip_generic *gc;
-	const struct stm32_exti_bank *stm32_bank;
 	unsigned long pending;
 	int n, i, irq_base = 0;
 
@@ -100,13 +231,11 @@
 
 	for (i = 0; i < nbanks; i++, irq_base += IRQS_PER_BANK) {
 		gc = irq_get_domain_generic_chip(domain, irq_base);
-		stm32_bank = gc->private;
 
 		while ((pending = stm32_exti_pending(gc))) {
 			for_each_set_bit(n, &pending, IRQS_PER_BANK) {
 				virq = irq_find_mapping(domain, irq_base + n);
 				generic_handle_irq(virq);
-				stm32_exti_irq_ack(gc, BIT(n));
 			}
 		}
 	}
@@ -114,34 +243,48 @@
 	chained_irq_exit(chip, desc);
 }
 
-static int stm32_irq_set_type(struct irq_data *data, unsigned int type)
+static int stm32_exti_set_type(struct irq_data *d,
+			       unsigned int type, u32 *rtsr, u32 *ftsr)
 {
-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
-	const struct stm32_exti_bank *stm32_bank = gc->private;
-	int pin = data->hwirq % IRQS_PER_BANK;
+	u32 mask = BIT(d->hwirq % IRQS_PER_BANK);
+
+	switch (type) {
+	case IRQ_TYPE_EDGE_RISING:
+		*rtsr |= mask;
+		*ftsr &= ~mask;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		*rtsr &= ~mask;
+		*ftsr |= mask;
+		break;
+	case IRQ_TYPE_EDGE_BOTH:
+		*rtsr |= mask;
+		*ftsr |= mask;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int stm32_irq_set_type(struct irq_data *d, unsigned int type)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	struct stm32_exti_chip_data *chip_data = gc->private;
+	const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
 	u32 rtsr, ftsr;
+	int err;
 
 	irq_gc_lock(gc);
 
 	rtsr = irq_reg_readl(gc, stm32_bank->rtsr_ofst);
 	ftsr = irq_reg_readl(gc, stm32_bank->ftsr_ofst);
 
-	switch (type) {
-	case IRQ_TYPE_EDGE_RISING:
-		rtsr |= BIT(pin);
-		ftsr &= ~BIT(pin);
-		break;
-	case IRQ_TYPE_EDGE_FALLING:
-		rtsr &= ~BIT(pin);
-		ftsr |= BIT(pin);
-		break;
-	case IRQ_TYPE_EDGE_BOTH:
-		rtsr |= BIT(pin);
-		ftsr |= BIT(pin);
-		break;
-	default:
+	err = stm32_exti_set_type(d, type, &rtsr, &ftsr);
+	if (err) {
 		irq_gc_unlock(gc);
-		return -EINVAL;
+		return err;
 	}
 
 	irq_reg_writel(gc, rtsr, stm32_bank->rtsr_ofst);
@@ -152,40 +295,59 @@
 	return 0;
 }
 
-static int stm32_irq_set_wake(struct irq_data *data, unsigned int on)
+static void stm32_chip_suspend(struct stm32_exti_chip_data *chip_data,
+			       u32 wake_active)
 {
-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
-	const struct stm32_exti_bank *stm32_bank = gc->private;
-	int pin = data->hwirq % IRQS_PER_BANK;
-	u32 imr;
+	const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+	void __iomem *base = chip_data->host_data->base;
+
+	/* save rtsr, ftsr registers */
+	chip_data->rtsr_cache = readl_relaxed(base + stm32_bank->rtsr_ofst);
+	chip_data->ftsr_cache = readl_relaxed(base + stm32_bank->ftsr_ofst);
+
+	writel_relaxed(wake_active, base + stm32_bank->imr_ofst);
+}
+
+static void stm32_chip_resume(struct stm32_exti_chip_data *chip_data,
+			      u32 mask_cache)
+{
+	const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+	void __iomem *base = chip_data->host_data->base;
+
+	/* restore rtsr, ftsr, registers */
+	writel_relaxed(chip_data->rtsr_cache, base + stm32_bank->rtsr_ofst);
+	writel_relaxed(chip_data->ftsr_cache, base + stm32_bank->ftsr_ofst);
+
+	writel_relaxed(mask_cache, base + stm32_bank->imr_ofst);
+}
+
+static void stm32_irq_suspend(struct irq_chip_generic *gc)
+{
+	struct stm32_exti_chip_data *chip_data = gc->private;
 
 	irq_gc_lock(gc);
-
-	imr = irq_reg_readl(gc, stm32_bank->imr_ofst);
-	if (on)
-		imr |= BIT(pin);
-	else
-		imr &= ~BIT(pin);
-	irq_reg_writel(gc, imr, stm32_bank->imr_ofst);
-
+	stm32_chip_suspend(chip_data, gc->wake_active);
 	irq_gc_unlock(gc);
+}
 
-	return 0;
+static void stm32_irq_resume(struct irq_chip_generic *gc)
+{
+	struct stm32_exti_chip_data *chip_data = gc->private;
+
+	irq_gc_lock(gc);
+	stm32_chip_resume(chip_data, gc->mask_cache);
+	irq_gc_unlock(gc);
 }
 
 static int stm32_exti_alloc(struct irq_domain *d, unsigned int virq,
 			    unsigned int nr_irqs, void *data)
 {
-	struct irq_chip_generic *gc;
 	struct irq_fwspec *fwspec = data;
 	irq_hw_number_t hwirq;
 
 	hwirq = fwspec->param[0];
-	gc = irq_get_domain_generic_chip(d, hwirq);
 
 	irq_map_generic_chip(d, virq, hwirq);
-	irq_domain_set_info(d, virq, hwirq, &gc->chip_types->chip, gc,
-			    handle_simple_irq, NULL, NULL);
 
 	return 0;
 }
@@ -198,30 +360,318 @@
 	irq_domain_reset_irq_data(data);
 }
 
-struct irq_domain_ops irq_exti_domain_ops = {
+static const struct irq_domain_ops irq_exti_domain_ops = {
 	.map	= irq_map_generic_chip,
-	.xlate	= irq_domain_xlate_onetwocell,
 	.alloc  = stm32_exti_alloc,
 	.free	= stm32_exti_free,
 };
 
-static int
-__init stm32_exti_init(const struct stm32_exti_bank **stm32_exti_banks,
-		       int bank_nr, struct device_node *node)
+static void stm32_irq_ack(struct irq_data *d)
 {
-	unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
-	int nr_irqs, nr_exti, ret, i;
-	struct irq_chip_generic *gc;
-	struct irq_domain *domain;
-	void *base;
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
+	struct stm32_exti_chip_data *chip_data = gc->private;
+	const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
 
-	base = of_iomap(node, 0);
-	if (!base) {
-		pr_err("%pOF: Unable to map registers\n", node);
-		return -ENOMEM;
+	irq_gc_lock(gc);
+
+	irq_reg_writel(gc, d->mask, stm32_bank->rpr_ofst);
+	if (stm32_bank->fpr_ofst != UNDEF_REG)
+		irq_reg_writel(gc, d->mask, stm32_bank->fpr_ofst);
+
+	irq_gc_unlock(gc);
+}
+
+static inline u32 stm32_exti_set_bit(struct irq_data *d, u32 reg)
+{
+	struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+	void __iomem *base = chip_data->host_data->base;
+	u32 val;
+
+	val = readl_relaxed(base + reg);
+	val |= BIT(d->hwirq % IRQS_PER_BANK);
+	writel_relaxed(val, base + reg);
+
+	return val;
+}
+
+static inline u32 stm32_exti_clr_bit(struct irq_data *d, u32 reg)
+{
+	struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+	void __iomem *base = chip_data->host_data->base;
+	u32 val;
+
+	val = readl_relaxed(base + reg);
+	val &= ~BIT(d->hwirq % IRQS_PER_BANK);
+	writel_relaxed(val, base + reg);
+
+	return val;
+}
+
+static void stm32_exti_h_eoi(struct irq_data *d)
+{
+	struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+	const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+
+	raw_spin_lock(&chip_data->rlock);
+
+	stm32_exti_set_bit(d, stm32_bank->rpr_ofst);
+	if (stm32_bank->fpr_ofst != UNDEF_REG)
+		stm32_exti_set_bit(d, stm32_bank->fpr_ofst);
+
+	raw_spin_unlock(&chip_data->rlock);
+
+	if (d->parent_data->chip)
+		irq_chip_eoi_parent(d);
+}
+
+static void stm32_exti_h_mask(struct irq_data *d)
+{
+	struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+	const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+
+	raw_spin_lock(&chip_data->rlock);
+	chip_data->mask_cache = stm32_exti_clr_bit(d, stm32_bank->imr_ofst);
+	raw_spin_unlock(&chip_data->rlock);
+
+	if (d->parent_data->chip)
+		irq_chip_mask_parent(d);
+}
+
+static void stm32_exti_h_unmask(struct irq_data *d)
+{
+	struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+	const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+
+	raw_spin_lock(&chip_data->rlock);
+	chip_data->mask_cache = stm32_exti_set_bit(d, stm32_bank->imr_ofst);
+	raw_spin_unlock(&chip_data->rlock);
+
+	if (d->parent_data->chip)
+		irq_chip_unmask_parent(d);
+}
+
+static int stm32_exti_h_set_type(struct irq_data *d, unsigned int type)
+{
+	struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+	const struct stm32_exti_bank *stm32_bank = chip_data->reg_bank;
+	void __iomem *base = chip_data->host_data->base;
+	u32 rtsr, ftsr;
+	int err;
+
+	raw_spin_lock(&chip_data->rlock);
+	rtsr = readl_relaxed(base + stm32_bank->rtsr_ofst);
+	ftsr = readl_relaxed(base + stm32_bank->ftsr_ofst);
+
+	err = stm32_exti_set_type(d, type, &rtsr, &ftsr);
+	if (err) {
+		raw_spin_unlock(&chip_data->rlock);
+		return err;
 	}
 
-	domain = irq_domain_add_linear(node, bank_nr * IRQS_PER_BANK,
+	writel_relaxed(rtsr, base + stm32_bank->rtsr_ofst);
+	writel_relaxed(ftsr, base + stm32_bank->ftsr_ofst);
+	raw_spin_unlock(&chip_data->rlock);
+
+	return 0;
+}
+
+static int stm32_exti_h_set_wake(struct irq_data *d, unsigned int on)
+{
+	struct stm32_exti_chip_data *chip_data = irq_data_get_irq_chip_data(d);
+	u32 mask = BIT(d->hwirq % IRQS_PER_BANK);
+
+	raw_spin_lock(&chip_data->rlock);
+
+	if (on)
+		chip_data->wake_active |= mask;
+	else
+		chip_data->wake_active &= ~mask;
+
+	raw_spin_unlock(&chip_data->rlock);
+
+	return 0;
+}
+
+static int stm32_exti_h_set_affinity(struct irq_data *d,
+				     const struct cpumask *dest, bool force)
+{
+	if (d->parent_data->chip)
+		return irq_chip_set_affinity_parent(d, dest, force);
+
+	return -EINVAL;
+}
+
+#ifdef CONFIG_PM
+static int stm32_exti_h_suspend(void)
+{
+	struct stm32_exti_chip_data *chip_data;
+	int i;
+
+	for (i = 0; i < stm32_host_data->drv_data->bank_nr; i++) {
+		chip_data = &stm32_host_data->chips_data[i];
+		raw_spin_lock(&chip_data->rlock);
+		stm32_chip_suspend(chip_data, chip_data->wake_active);
+		raw_spin_unlock(&chip_data->rlock);
+	}
+
+	return 0;
+}
+
+static void stm32_exti_h_resume(void)
+{
+	struct stm32_exti_chip_data *chip_data;
+	int i;
+
+	for (i = 0; i < stm32_host_data->drv_data->bank_nr; i++) {
+		chip_data = &stm32_host_data->chips_data[i];
+		raw_spin_lock(&chip_data->rlock);
+		stm32_chip_resume(chip_data, chip_data->mask_cache);
+		raw_spin_unlock(&chip_data->rlock);
+	}
+}
+
+static struct syscore_ops stm32_exti_h_syscore_ops = {
+	.suspend	= stm32_exti_h_suspend,
+	.resume		= stm32_exti_h_resume,
+};
+
+static void stm32_exti_h_syscore_init(void)
+{
+	register_syscore_ops(&stm32_exti_h_syscore_ops);
+}
+#else
+static inline void stm32_exti_h_syscore_init(void) {}
+#endif
+
+static struct irq_chip stm32_exti_h_chip = {
+	.name			= "stm32-exti-h",
+	.irq_eoi		= stm32_exti_h_eoi,
+	.irq_mask		= stm32_exti_h_mask,
+	.irq_unmask		= stm32_exti_h_unmask,
+	.irq_retrigger		= irq_chip_retrigger_hierarchy,
+	.irq_set_type		= stm32_exti_h_set_type,
+	.irq_set_wake		= stm32_exti_h_set_wake,
+	.flags			= IRQCHIP_MASK_ON_SUSPEND,
+#ifdef CONFIG_SMP
+	.irq_set_affinity	= stm32_exti_h_set_affinity,
+#endif
+};
+
+static int stm32_exti_h_domain_alloc(struct irq_domain *dm,
+				     unsigned int virq,
+				     unsigned int nr_irqs, void *data)
+{
+	struct stm32_exti_host_data *host_data = dm->host_data;
+	struct stm32_exti_chip_data *chip_data;
+	struct irq_fwspec *fwspec = data;
+	struct irq_fwspec p_fwspec;
+	irq_hw_number_t hwirq;
+	int p_irq, bank;
+
+	hwirq = fwspec->param[0];
+	bank  = hwirq / IRQS_PER_BANK;
+	chip_data = &host_data->chips_data[bank];
+
+	irq_domain_set_hwirq_and_chip(dm, virq, hwirq,
+				      &stm32_exti_h_chip, chip_data);
+
+	p_irq = stm32_exti_to_irq(host_data->drv_data, hwirq);
+	if (p_irq >= 0) {
+		p_fwspec.fwnode = dm->parent->fwnode;
+		p_fwspec.param_count = 3;
+		p_fwspec.param[0] = GIC_SPI;
+		p_fwspec.param[1] = p_irq;
+		p_fwspec.param[2] = IRQ_TYPE_LEVEL_HIGH;
+
+		return irq_domain_alloc_irqs_parent(dm, virq, 1, &p_fwspec);
+	}
+
+	return 0;
+}
+
+static struct
+stm32_exti_host_data *stm32_exti_host_init(const struct stm32_exti_drv_data *dd,
+					   struct device_node *node)
+{
+	struct stm32_exti_host_data *host_data;
+
+	host_data = kzalloc(sizeof(*host_data), GFP_KERNEL);
+	if (!host_data)
+		return NULL;
+
+	host_data->drv_data = dd;
+	host_data->chips_data = kcalloc(dd->bank_nr,
+					sizeof(struct stm32_exti_chip_data),
+					GFP_KERNEL);
+	if (!host_data->chips_data)
+		return NULL;
+
+	host_data->base = of_iomap(node, 0);
+	if (!host_data->base) {
+		pr_err("%pOF: Unable to map registers\n", node);
+		return NULL;
+	}
+
+	stm32_host_data = host_data;
+
+	return host_data;
+}
+
+static struct
+stm32_exti_chip_data *stm32_exti_chip_init(struct stm32_exti_host_data *h_data,
+					   u32 bank_idx,
+					   struct device_node *node)
+{
+	const struct stm32_exti_bank *stm32_bank;
+	struct stm32_exti_chip_data *chip_data;
+	void __iomem *base = h_data->base;
+	u32 irqs_mask;
+
+	stm32_bank = h_data->drv_data->exti_banks[bank_idx];
+	chip_data = &h_data->chips_data[bank_idx];
+	chip_data->host_data = h_data;
+	chip_data->reg_bank = stm32_bank;
+
+	raw_spin_lock_init(&chip_data->rlock);
+
+	/* Determine number of irqs supported */
+	writel_relaxed(~0UL, base + stm32_bank->rtsr_ofst);
+	irqs_mask = readl_relaxed(base + stm32_bank->rtsr_ofst);
+
+	/*
+	 * This IP has no reset, so after hot reboot we should
+	 * clear registers to avoid residue
+	 */
+	writel_relaxed(0, base + stm32_bank->imr_ofst);
+	writel_relaxed(0, base + stm32_bank->emr_ofst);
+	writel_relaxed(0, base + stm32_bank->rtsr_ofst);
+	writel_relaxed(0, base + stm32_bank->ftsr_ofst);
+	writel_relaxed(~0UL, base + stm32_bank->rpr_ofst);
+	if (stm32_bank->fpr_ofst != UNDEF_REG)
+		writel_relaxed(~0UL, base + stm32_bank->fpr_ofst);
+
+	pr_info("%s: bank%d, External IRQs available:%#x\n",
+		node->full_name, bank_idx, irqs_mask);
+
+	return chip_data;
+}
+
+static int __init stm32_exti_init(const struct stm32_exti_drv_data *drv_data,
+				  struct device_node *node)
+{
+	struct stm32_exti_host_data *host_data;
+	unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
+	int nr_irqs, ret, i;
+	struct irq_chip_generic *gc;
+	struct irq_domain *domain;
+
+	host_data = stm32_exti_host_init(drv_data, node);
+	if (!host_data) {
+		ret = -ENOMEM;
+		goto out_free_mem;
+	}
+
+	domain = irq_domain_add_linear(node, drv_data->bank_nr * IRQS_PER_BANK,
 				       &irq_exti_domain_ops, NULL);
 	if (!domain) {
 		pr_err("%s: Could not register interrupt domain.\n",
@@ -234,44 +684,32 @@
 					     handle_edge_irq, clr, 0, 0);
 	if (ret) {
 		pr_err("%pOF: Could not allocate generic interrupt chip.\n",
-			node);
+		       node);
 		goto out_free_domain;
 	}
 
-	for (i = 0; i < bank_nr; i++) {
-		const struct stm32_exti_bank *stm32_bank = stm32_exti_banks[i];
-		u32 irqs_mask;
+	for (i = 0; i < drv_data->bank_nr; i++) {
+		const struct stm32_exti_bank *stm32_bank;
+		struct stm32_exti_chip_data *chip_data;
+
+		stm32_bank = drv_data->exti_banks[i];
+		chip_data = stm32_exti_chip_init(host_data, i, node);
 
 		gc = irq_get_domain_generic_chip(domain, i * IRQS_PER_BANK);
 
-		gc->reg_base = base;
+		gc->reg_base = host_data->base;
 		gc->chip_types->type = IRQ_TYPE_EDGE_BOTH;
-		gc->chip_types->chip.irq_ack = irq_gc_ack_set_bit;
+		gc->chip_types->chip.irq_ack = stm32_irq_ack;
 		gc->chip_types->chip.irq_mask = irq_gc_mask_clr_bit;
 		gc->chip_types->chip.irq_unmask = irq_gc_mask_set_bit;
 		gc->chip_types->chip.irq_set_type = stm32_irq_set_type;
-		gc->chip_types->chip.irq_set_wake = stm32_irq_set_wake;
-		gc->chip_types->regs.ack = stm32_bank->pr_ofst;
+		gc->chip_types->chip.irq_set_wake = irq_gc_set_wake;
+		gc->suspend = stm32_irq_suspend;
+		gc->resume = stm32_irq_resume;
+		gc->wake_enabled = IRQ_MSK(IRQS_PER_BANK);
+
 		gc->chip_types->regs.mask = stm32_bank->imr_ofst;
-		gc->private = (void *)stm32_bank;
-
-		/* Determine number of irqs supported */
-		writel_relaxed(~0UL, base + stm32_bank->rtsr_ofst);
-		irqs_mask = readl_relaxed(base + stm32_bank->rtsr_ofst);
-		nr_exti = fls(readl_relaxed(base + stm32_bank->rtsr_ofst));
-
-		/*
-		 * This IP has no reset, so after hot reboot we should
-		 * clear registers to avoid residue
-		 */
-		writel_relaxed(0, base + stm32_bank->imr_ofst);
-		writel_relaxed(0, base + stm32_bank->emr_ofst);
-		writel_relaxed(0, base + stm32_bank->rtsr_ofst);
-		writel_relaxed(0, base + stm32_bank->ftsr_ofst);
-		writel_relaxed(~0UL, base + stm32_bank->pr_ofst);
-
-		pr_info("%s: bank%d, External IRQs available:%#x\n",
-			node->full_name, i, irqs_mask);
+		gc->private = (void *)chip_data;
 	}
 
 	nr_irqs = of_irq_count(node);
@@ -287,15 +725,69 @@
 out_free_domain:
 	irq_domain_remove(domain);
 out_unmap:
-	iounmap(base);
+	iounmap(host_data->base);
+out_free_mem:
+	kfree(host_data->chips_data);
+	kfree(host_data);
+	return ret;
+}
+
+static const struct irq_domain_ops stm32_exti_h_domain_ops = {
+	.alloc	= stm32_exti_h_domain_alloc,
+	.free	= irq_domain_free_irqs_common,
+};
+
+static int
+__init stm32_exti_hierarchy_init(const struct stm32_exti_drv_data *drv_data,
+				 struct device_node *node,
+				 struct device_node *parent)
+{
+	struct irq_domain *parent_domain, *domain;
+	struct stm32_exti_host_data *host_data;
+	int ret, i;
+
+	parent_domain = irq_find_host(parent);
+	if (!parent_domain) {
+		pr_err("interrupt-parent not found\n");
+		return -EINVAL;
+	}
+
+	host_data = stm32_exti_host_init(drv_data, node);
+	if (!host_data) {
+		ret = -ENOMEM;
+		goto out_free_mem;
+	}
+
+	for (i = 0; i < drv_data->bank_nr; i++)
+		stm32_exti_chip_init(host_data, i, node);
+
+	domain = irq_domain_add_hierarchy(parent_domain, 0,
+					  drv_data->bank_nr * IRQS_PER_BANK,
+					  node, &stm32_exti_h_domain_ops,
+					  host_data);
+
+	if (!domain) {
+		pr_err("%s: Could not register exti domain.\n", node->name);
+		ret = -ENOMEM;
+		goto out_unmap;
+	}
+
+	stm32_exti_h_syscore_init();
+
+	return 0;
+
+out_unmap:
+	iounmap(host_data->base);
+out_free_mem:
+	kfree(host_data->chips_data);
+	kfree(host_data);
 	return ret;
 }
 
 static int __init stm32f4_exti_of_init(struct device_node *np,
 				       struct device_node *parent)
 {
-	return stm32_exti_init(stm32f4xx_exti_banks,
-			ARRAY_SIZE(stm32f4xx_exti_banks), np);
+	return stm32_exti_init(&stm32f4xx_drv_data, np);
 }
 
 IRQCHIP_DECLARE(stm32f4_exti, "st,stm32-exti", stm32f4_exti_of_init);
@@ -303,8 +795,15 @@
 static int __init stm32h7_exti_of_init(struct device_node *np,
 				       struct device_node *parent)
 {
-	return stm32_exti_init(stm32h7xx_exti_banks,
-			ARRAY_SIZE(stm32h7xx_exti_banks), np);
+	return stm32_exti_init(&stm32h7xx_drv_data, np);
 }
 
 IRQCHIP_DECLARE(stm32h7_exti, "st,stm32h7-exti", stm32h7_exti_of_init);
+
+static int __init stm32mp1_exti_of_init(struct device_node *np,
+					struct device_node *parent)
+{
+	return stm32_exti_hierarchy_init(&stm32mp1_drv_data, np, parent);
+}
+
+IRQCHIP_DECLARE(stm32mp1_exti, "st,stm32mp1-exti", stm32mp1_exti_of_init);
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 19cd937..baa1ee2 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -1340,19 +1340,6 @@
 	return 0;
 }
 
-static int capi20_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, capi20_proc_show, NULL);
-}
-
-static const struct file_operations capi20_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= capi20_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /*
  * /proc/capi/capi20ncci:
  *  applid ncci
@@ -1373,23 +1360,10 @@
 	return 0;
 }
 
-static int capi20ncci_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, capi20ncci_proc_show, NULL);
-}
-
-static const struct file_operations capi20ncci_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= capi20ncci_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static void __init proc_init(void)
 {
-	proc_create("capi/capi20", 0, NULL, &capi20_proc_fops);
-	proc_create("capi/capi20ncci", 0, NULL, &capi20ncci_proc_fops);
+	proc_create_single("capi/capi20", 0, NULL, capi20_proc_show);
+	proc_create_single("capi/capi20ncci", 0, NULL, capi20ncci_proc_show);
 }
 
 static void __exit proc_exit(void)
diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index 49fef08..7ac5179 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c
@@ -2460,22 +2460,9 @@
 	return 0;
 }
 
-static int capidrv_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, capidrv_proc_show, NULL);
-}
-
-static const struct file_operations capidrv_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= capidrv_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static void __init proc_init(void)
 {
-	proc_create("capi/capidrv", 0, NULL, &capidrv_proc_fops);
+	proc_create_single("capi/capidrv", 0, NULL, capidrv_proc_show);
 }
 
 static void __exit proc_exit(void)
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 46c189a..0ff517d 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -534,7 +534,8 @@
 	init_waitqueue_head(&ctr->state_wait_queue);
 
 	sprintf(ctr->procfn, "capi/controllers/%d", ctr->cnr);
-	ctr->procent = proc_create_data(ctr->procfn, 0, NULL, ctr->proc_fops, ctr);
+	ctr->procent = proc_create_single_data(ctr->procfn, 0, NULL,
+			ctr->proc_show, ctr);
 
 	ncontrollers++;
 
diff --git a/drivers/isdn/capi/kcapi_proc.c b/drivers/isdn/capi/kcapi_proc.c
index 68db3c5..c94bd12 100644
--- a/drivers/isdn/capi/kcapi_proc.c
+++ b/drivers/isdn/capi/kcapi_proc.c
@@ -108,32 +108,6 @@
 	.show	= contrstats_show,
 };
 
-static int seq_controller_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &seq_controller_ops);
-}
-
-static int seq_contrstats_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &seq_contrstats_ops);
-}
-
-static const struct file_operations proc_controller_ops = {
-	.owner		= THIS_MODULE,
-	.open		= seq_controller_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static const struct file_operations proc_contrstats_ops = {
-	.owner		= THIS_MODULE,
-	.open		= seq_contrstats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 // /proc/capi/applications:
 //      applid l3cnt dblkcnt dblklen #ncci recvqueuelen
 // /proc/capi/applstats:
@@ -216,34 +190,6 @@
 	.show	= applstats_show,
 };
 
-static int
-seq_applications_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &seq_applications_ops);
-}
-
-static int
-seq_applstats_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &seq_applstats_ops);
-}
-
-static const struct file_operations proc_applications_ops = {
-	.owner		= THIS_MODULE,
-	.open		= seq_applications_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static const struct file_operations proc_applstats_ops = {
-	.owner		= THIS_MODULE,
-	.open		= seq_applstats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 // ---------------------------------------------------------------------------
 
 static void *capi_driver_start(struct seq_file *seq, loff_t *pos)
@@ -279,22 +225,6 @@
 	.show	= capi_driver_show,
 };
 
-static int
-seq_capi_driver_open(struct inode *inode, struct file *file)
-{
-	int err;
-	err = seq_open(file, &seq_capi_driver_ops);
-	return err;
-}
-
-static const struct file_operations proc_driver_ops = {
-	.owner		= THIS_MODULE,
-	.open		= seq_capi_driver_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 // ---------------------------------------------------------------------------
 
 void __init
@@ -302,11 +232,11 @@
 {
 	proc_mkdir("capi",             NULL);
 	proc_mkdir("capi/controllers", NULL);
-	proc_create("capi/controller",   0, NULL, &proc_controller_ops);
-	proc_create("capi/contrstats",   0, NULL, &proc_contrstats_ops);
-	proc_create("capi/applications", 0, NULL, &proc_applications_ops);
-	proc_create("capi/applstats",    0, NULL, &proc_applstats_ops);
-	proc_create("capi/driver",       0, NULL, &proc_driver_ops);
+	proc_create_seq("capi/controller",   0, NULL, &seq_controller_ops);
+	proc_create_seq("capi/contrstats",   0, NULL, &seq_contrstats_ops);
+	proc_create_seq("capi/applications", 0, NULL, &seq_applications_ops);
+	proc_create_seq("capi/applstats",    0, NULL, &seq_applstats_ops);
+	proc_create_seq("capi/driver",       0, NULL, &seq_capi_driver_ops);
 }
 
 void __exit
diff --git a/drivers/isdn/gigaset/capi.c b/drivers/isdn/gigaset/capi.c
index ccec777..56748af 100644
--- a/drivers/isdn/gigaset/capi.c
+++ b/drivers/isdn/gigaset/capi.c
@@ -2437,19 +2437,6 @@
 	return 0;
 }
 
-static int gigaset_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, gigaset_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations gigaset_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= gigaset_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /**
  * gigaset_isdn_regdev() - register device to LL
  * @cs:		device descriptor structure.
@@ -2479,7 +2466,7 @@
 	iif->ctr.release_appl  = gigaset_release_appl;
 	iif->ctr.send_message  = gigaset_send_message;
 	iif->ctr.procinfo      = gigaset_procinfo;
-	iif->ctr.proc_fops = &gigaset_proc_fops;
+	iif->ctr.proc_show     = gigaset_proc_show,
 	INIT_LIST_HEAD(&iif->appls);
 	skb_queue_head_init(&iif->sendqueue);
 	atomic_set(&iif->sendqlen, 0);
diff --git a/drivers/isdn/hardware/avm/avmcard.h b/drivers/isdn/hardware/avm/avmcard.h
index c95712d..cdfa89c 100644
--- a/drivers/isdn/hardware/avm/avmcard.h
+++ b/drivers/isdn/hardware/avm/avmcard.h
@@ -556,7 +556,7 @@
 void b1_parse_version(avmctrl_info *card);
 irqreturn_t b1_interrupt(int interrupt, void *devptr);
 
-extern const struct file_operations b1ctl_proc_fops;
+int b1_proc_show(struct seq_file *m, void *v);
 
 avmcard_dmainfo *avmcard_dma_alloc(char *name, struct pci_dev *,
 				   long rsize, long ssize);
@@ -576,6 +576,6 @@
 			 capi_register_params *rp);
 void b1dma_release_appl(struct capi_ctr *ctrl, u16 appl);
 u16  b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb);
-extern const struct file_operations b1dmactl_proc_fops;
+int b1dma_proc_show(struct seq_file *m, void *v);
 
 #endif /* _AVMCARD_H_ */
diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c
index b1833d0..5ee5489 100644
--- a/drivers/isdn/hardware/avm/b1.c
+++ b/drivers/isdn/hardware/avm/b1.c
@@ -637,7 +637,7 @@
 }
 
 /* ------------------------------------------------------------- */
-static int b1ctl_proc_show(struct seq_file *m, void *v)
+int b1_proc_show(struct seq_file *m, void *v)
 {
 	struct capi_ctr *ctrl = m->private;
 	avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
@@ -699,20 +699,7 @@
 
 	return 0;
 }
-
-static int b1ctl_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, b1ctl_proc_show, PDE_DATA(inode));
-}
-
-const struct file_operations b1ctl_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= b1ctl_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-EXPORT_SYMBOL(b1ctl_proc_fops);
+EXPORT_SYMBOL(b1_proc_show);
 
 /* ------------------------------------------------------------- */
 
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 9538a9e..6a3dc99 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -858,7 +858,7 @@
 
 /* ------------------------------------------------------------- */
 
-static int b1dmactl_proc_show(struct seq_file *m, void *v)
+int b1dma_proc_show(struct seq_file *m, void *v)
 {
 	struct capi_ctr *ctrl = m->private;
 	avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
@@ -941,20 +941,7 @@
 
 	return 0;
 }
-
-static int b1dmactl_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, b1dmactl_proc_show, PDE_DATA(inode));
-}
-
-const struct file_operations b1dmactl_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= b1dmactl_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-EXPORT_SYMBOL(b1dmactl_proc_fops);
+EXPORT_SYMBOL(b1dma_proc_show);
 
 /* ------------------------------------------------------------- */
 
diff --git a/drivers/isdn/hardware/avm/b1isa.c b/drivers/isdn/hardware/avm/b1isa.c
index 54e871a..cdfea72 100644
--- a/drivers/isdn/hardware/avm/b1isa.c
+++ b/drivers/isdn/hardware/avm/b1isa.c
@@ -121,7 +121,7 @@
 	cinfo->capi_ctrl.load_firmware = b1_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = b1isa_procinfo;
-	cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
+	cinfo->capi_ctrl.proc_show     = b1_proc_show;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/b1pci.c b/drivers/isdn/hardware/avm/b1pci.c
index ac4863c..b76b57a 100644
--- a/drivers/isdn/hardware/avm/b1pci.c
+++ b/drivers/isdn/hardware/avm/b1pci.c
@@ -112,7 +112,7 @@
 	cinfo->capi_ctrl.load_firmware = b1_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = b1pci_procinfo;
-	cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
+	cinfo->capi_ctrl.proc_show     = b1_proc_show;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 	cinfo->capi_ctrl.owner         = THIS_MODULE;
 
@@ -251,7 +251,7 @@
 	cinfo->capi_ctrl.load_firmware = b1dma_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1dma_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = b1pciv4_procinfo;
-	cinfo->capi_ctrl.proc_fops = &b1dmactl_proc_fops;
+	cinfo->capi_ctrl.proc_show     = b1dma_proc_show;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/b1pcmcia.c b/drivers/isdn/hardware/avm/b1pcmcia.c
index 6b0d19d..3aca16e 100644
--- a/drivers/isdn/hardware/avm/b1pcmcia.c
+++ b/drivers/isdn/hardware/avm/b1pcmcia.c
@@ -108,7 +108,7 @@
 	cinfo->capi_ctrl.load_firmware = b1_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = b1pcmcia_procinfo;
-	cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
+	cinfo->capi_ctrl.proc_show     = b1_proc_show;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 034caba..ac72cd2 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -1127,19 +1127,6 @@
 	return 0;
 }
 
-static int c4_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, c4_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations c4_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= c4_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /* ------------------------------------------------------------- */
 
 static int c4_add_card(struct capicardparams *p, struct pci_dev *dev,
@@ -1211,7 +1198,7 @@
 		cinfo->capi_ctrl.load_firmware = c4_load_firmware;
 		cinfo->capi_ctrl.reset_ctr     = c4_reset_ctr;
 		cinfo->capi_ctrl.procinfo      = c4_procinfo;
-		cinfo->capi_ctrl.proc_fops = &c4_proc_fops;
+		cinfo->capi_ctrl.proc_show     = c4_proc_show;
 		strcpy(cinfo->capi_ctrl.name, card->name);
 
 		retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c
index 9f80d20..2153619 100644
--- a/drivers/isdn/hardware/avm/t1isa.c
+++ b/drivers/isdn/hardware/avm/t1isa.c
@@ -430,7 +430,7 @@
 	cinfo->capi_ctrl.load_firmware = t1isa_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = t1isa_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = t1isa_procinfo;
-	cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
+	cinfo->capi_ctrl.proc_show     = b1_proc_show;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/t1pci.c b/drivers/isdn/hardware/avm/t1pci.c
index 2180b16..f5ed1d5 100644
--- a/drivers/isdn/hardware/avm/t1pci.c
+++ b/drivers/isdn/hardware/avm/t1pci.c
@@ -119,7 +119,7 @@
 	cinfo->capi_ctrl.load_firmware = b1dma_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1dma_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = t1pci_procinfo;
-	cinfo->capi_ctrl.proc_fops = &b1dmactl_proc_fops;
+	cinfo->capi_ctrl.proc_show     = b1dma_proc_show;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/eicon/capimain.c b/drivers/isdn/hardware/eicon/capimain.c
index be36d82..f9244dc 100644
--- a/drivers/isdn/hardware/eicon/capimain.c
+++ b/drivers/isdn/hardware/eicon/capimain.c
@@ -90,19 +90,6 @@
 	return 0;
 }
 
-static int diva_ctl_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, diva_ctl_proc_show, NULL);
-}
-
-static const struct file_operations diva_ctl_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= diva_ctl_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /*
  * set additional os settings in capi_ctr struct
  */
@@ -111,7 +98,7 @@
 	ctrl->driver_name = DRIVERLNAME;
 	ctrl->load_firmware = NULL;
 	ctrl->reset_ctr = NULL;
-	ctrl->proc_fops = &diva_ctl_proc_fops;
+	ctrl->proc_show = diva_ctl_proc_show;
 	ctrl->owner = THIS_MODULE;
 }
 
diff --git a/drivers/isdn/hardware/eicon/diva_didd.c b/drivers/isdn/hardware/eicon/diva_didd.c
index fab6ccf..60e7925 100644
--- a/drivers/isdn/hardware/eicon/diva_didd.c
+++ b/drivers/isdn/hardware/eicon/diva_didd.c
@@ -78,26 +78,13 @@
 	return 0;
 }
 
-static int divadidd_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, divadidd_proc_show, NULL);
-}
-
-static const struct file_operations divadidd_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= divadidd_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init create_proc(void)
 {
 	proc_net_eicon = proc_mkdir("eicon", init_net.proc_net);
 
 	if (proc_net_eicon) {
-		proc_didd = proc_create(DRIVERLNAME, S_IRUGO, proc_net_eicon,
-					&divadidd_proc_fops);
+		proc_didd = proc_create_single(DRIVERLNAME, S_IRUGO,
+				proc_net_eicon, divadidd_proc_show);
 		return (1);
 	}
 	return (0);
diff --git a/drivers/isdn/hardware/eicon/divasi.c b/drivers/isdn/hardware/eicon/divasi.c
index 525518c..e7081e0 100644
--- a/drivers/isdn/hardware/eicon/divasi.c
+++ b/drivers/isdn/hardware/eicon/divasi.c
@@ -101,23 +101,10 @@
 	return 0;
 }
 
-static int um_idi_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, um_idi_proc_show, NULL);
-}
-
-static const struct file_operations um_idi_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= um_idi_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init create_um_idi_proc(void)
 {
-	um_idi_proc_entry = proc_create(DRIVERLNAME, S_IRUGO, proc_net_eicon,
-					&um_idi_proc_fops);
+	um_idi_proc_entry = proc_create_single(DRIVERLNAME, S_IRUGO,
+			proc_net_eicon, um_idi_proc_show);
 	if (!um_idi_proc_entry)
 		return (0);
 	return (1);
diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c
index eac0f51..a2c15cd 100644
--- a/drivers/isdn/hysdn/hycapi.c
+++ b/drivers/isdn/hysdn/hycapi.c
@@ -467,19 +467,6 @@
 	return 0;
 }
 
-static int hycapi_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hycapi_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations hycapi_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= hycapi_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /**************************************************************
 hycapi_load_firmware
 
@@ -774,7 +761,7 @@
 		ctrl->load_firmware = hycapi_load_firmware;
 		ctrl->reset_ctr     = hycapi_reset_ctr;
 		ctrl->procinfo      = hycapi_procinfo;
-		ctrl->proc_fops = &hycapi_proc_fops;
+		ctrl->proc_show     = hycapi_proc_show;
 		strcpy(ctrl->name, cinfo->cardname);
 		ctrl->owner = THIS_MODULE;
 
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 1f8f489..98f90aa 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -588,7 +588,7 @@
 	.getname	= data_sock_getname,
 	.sendmsg	= mISDN_sock_sendmsg,
 	.recvmsg	= mISDN_sock_recvmsg,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= data_sock_setsockopt,
@@ -745,7 +745,6 @@
 	.getname	= sock_no_getname,
 	.sendmsg	= sock_no_sendmsg,
 	.recvmsg	= sock_no_recvmsg,
-	.poll		= sock_no_poll,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 2c896c0..6e3a998 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -104,6 +104,19 @@
 	  This option enables support for LEDs offered by Motorola's
 	  CPCAP PMIC.
 
+config LEDS_CR0014114
+	tristate "LED Support for Crane CR0014114"
+	depends on LEDS_CLASS
+	depends on SPI
+	depends on OF
+	help
+	  This option enables support for CR0014114 LED Board which
+	  is widely used in vending machines produced by
+	  Crane Merchandising Systems.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called leds-cr0014114.
+
 config LEDS_LM3530
 	tristate "LCD Backlight driver for LM3530"
 	depends on LEDS_CLASS
@@ -145,6 +158,15 @@
 	  This option enables support for the TI LM3692x family
 	  of white LED string drivers used for backlighting.
 
+config LEDS_LM3601X
+	tristate "LED support for LM3601x Chips"
+	depends on LEDS_CLASS && I2C
+	depends on LEDS_CLASS_FLASH
+	select REGMAP_I2C
+	help
+	  This option enables support for the TI LM3601x family
+	  of flash, torch and indicator classes.
+
 config LEDS_LOCOMO
 	tristate "LED Support for Locomo device"
 	depends on LEDS_CLASS
@@ -647,6 +669,17 @@
 	  LED controllers. They are I2C devices with multiple constant-current
 	  channels, each with independent 256-level PWM control.
 
+config LEDS_SC27XX_BLTC
+	tristate "LED support for the SC27xx breathing light controller"
+	depends on LEDS_CLASS && MFD_SC27XX_PMIC
+	depends on OF
+	help
+	  Say Y here to include support for the SC27xx breathing light controller
+	  LEDs.
+
+	  This driver can also be built as a module. If so the module will be
+	  called leds-sc27xx-bltc.
+
 comment "LED driver for blink(1) USB RGB LED is under Special HID drivers (HID_THINGM)"
 
 config LEDS_BLINKM
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 91eca81..420b5d2 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -76,8 +76,11 @@
 obj-$(CONFIG_LEDS_NIC78BX)		+= leds-nic78bx.o
 obj-$(CONFIG_LEDS_MT6323)		+= leds-mt6323.o
 obj-$(CONFIG_LEDS_LM3692X)		+= leds-lm3692x.o
+obj-$(CONFIG_LEDS_SC27XX_BLTC)		+= leds-sc27xx-bltc.o
+obj-$(CONFIG_LEDS_LM3601X)		+= leds-lm3601x.o
 
 # LED SPI Drivers
+obj-$(CONFIG_LEDS_CR0014114)		+= leds-cr0014114.o
 obj-$(CONFIG_LEDS_DAC124S085)		+= leds-dac124s085.o
 
 # LED Userspace Drivers
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index b0e2d55..3c7e348 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -260,10 +260,14 @@
 	if (ret < 0)
 		return ret;
 
+	mutex_init(&led_cdev->led_access);
+	mutex_lock(&led_cdev->led_access);
 	led_cdev->dev = device_create_with_groups(leds_class, parent, 0,
 				led_cdev, led_cdev->groups, "%s", name);
-	if (IS_ERR(led_cdev->dev))
+	if (IS_ERR(led_cdev->dev)) {
+		mutex_unlock(&led_cdev->led_access);
 		return PTR_ERR(led_cdev->dev);
+	}
 	led_cdev->dev->of_node = np;
 
 	if (ret)
@@ -274,6 +278,7 @@
 		ret = led_add_brightness_hw_changed(led_cdev);
 		if (ret) {
 			device_unregister(led_cdev->dev);
+			mutex_unlock(&led_cdev->led_access);
 			return ret;
 		}
 	}
@@ -285,7 +290,6 @@
 #ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED
 	led_cdev->brightness_hw_changed = -1;
 #endif
-	mutex_init(&led_cdev->led_access);
 	/* add to the list of leds */
 	down_write(&leds_list_lock);
 	list_add_tail(&led_cdev->node, &leds_list);
@@ -302,6 +306,8 @@
 	led_trigger_set_default(led_cdev);
 #endif
 
+	mutex_unlock(&led_cdev->led_access);
+
 	dev_dbg(parent, "Registered led device: %s\n",
 			led_cdev->name);
 
diff --git a/drivers/leds/leds-cr0014114.c b/drivers/leds/leds-cr0014114.c
new file mode 100644
index 0000000..a4b1c1d
--- /dev/null
+++ b/drivers/leds/leds-cr0014114.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Crane Merchandising Systems. All rights reserved.
+// Copyright (C) 2018 Oleh Kravchenko <oleg@kaa.org.ua>
+
+#include <linux/delay.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/spi/spi.h>
+#include <linux/workqueue.h>
+#include <uapi/linux/uleds.h>
+
+/*
+ *  CR0014114 SPI protocol descrtiption:
+ *  +----+-----------------------------------+----+
+ *  | CMD|             BRIGHTNESS            |CRC |
+ *  +----+-----------------------------------+----+
+ *  |    | LED0| LED1| LED2| LED3| LED4| LED5|    |
+ *  |    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+    |
+ *  |    |R|G|B|R|G|B|R|G|B|R|G|B|R|G|B|R|G|B|    |
+ *  | 1  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 1  |
+ *  |    |1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|    |
+ *  |    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+    |
+ *  |    |               18                  |    |
+ *  +----+-----------------------------------+----+
+ *  |                    20                       |
+ *  +---------------------------------------------+
+ *
+ *  PS: Boards can be connected to the chain:
+ *      SPI -> board0 -> board1 -> board2 ..
+ */
+
+/* CR0014114 SPI commands */
+#define CR_SET_BRIGHTNESS	0x80
+#define CR_INIT_REENUMERATE	0x81
+#define CR_NEXT_REENUMERATE	0x82
+
+/* CR0014114 default settings */
+#define CR_MAX_BRIGHTNESS	GENMASK(6, 0)
+#define CR_FW_DELAY_MSEC	10
+#define CR_RECOUNT_DELAY	(HZ * 3600)
+
+struct cr0014114_led {
+	char			name[LED_MAX_NAME_SIZE];
+	struct cr0014114	*priv;
+	struct led_classdev	ldev;
+	u8			brightness;
+};
+
+struct cr0014114 {
+	bool			do_recount;
+	size_t			count;
+	struct delayed_work	work;
+	struct device		*dev;
+	struct mutex		lock;
+	struct spi_device	*spi;
+	u8			*buf;
+	unsigned long		delay;
+	struct cr0014114_led	leds[];
+};
+
+static void cr0014114_calc_crc(u8 *buf, const size_t len)
+{
+	size_t	i;
+	u8	crc;
+
+	for (i = 1, crc = 1; i < len - 1; i++)
+		crc += buf[i];
+	crc |= BIT(7);
+
+	/* special case when CRC matches the SPI commands */
+	if (crc == CR_SET_BRIGHTNESS ||
+	    crc == CR_INIT_REENUMERATE ||
+	    crc == CR_NEXT_REENUMERATE)
+		crc = 0xfe;
+
+	buf[len - 1] = crc;
+}
+
+static int cr0014114_recount(struct cr0014114 *priv)
+{
+	int	ret;
+	size_t	i;
+	u8	cmd;
+
+	dev_dbg(priv->dev, "LEDs recount is started\n");
+
+	cmd = CR_INIT_REENUMERATE;
+	ret = spi_write(priv->spi, &cmd, sizeof(cmd));
+	if (ret)
+		goto err;
+
+	cmd = CR_NEXT_REENUMERATE;
+	for (i = 0; i < priv->count; i++) {
+		msleep(CR_FW_DELAY_MSEC);
+
+		ret = spi_write(priv->spi, &cmd, sizeof(cmd));
+		if (ret)
+			goto err;
+	}
+
+err:
+	dev_dbg(priv->dev, "LEDs recount is finished\n");
+
+	if (ret)
+		dev_err(priv->dev, "with error %d", ret);
+
+	return ret;
+}
+
+static int cr0014114_sync(struct cr0014114 *priv)
+{
+	int		ret;
+	size_t		i;
+	unsigned long	udelay, now = jiffies;
+
+	/* to avoid SPI mistiming with firmware we should wait some time */
+	if (time_after(priv->delay, now)) {
+		udelay = jiffies_to_usecs(priv->delay - now);
+		usleep_range(udelay, udelay + 1);
+	}
+
+	if (unlikely(priv->do_recount)) {
+		ret = cr0014114_recount(priv);
+		if (ret)
+			goto err;
+
+		priv->do_recount = false;
+		msleep(CR_FW_DELAY_MSEC);
+	}
+
+	priv->buf[0] = CR_SET_BRIGHTNESS;
+	for (i = 0; i < priv->count; i++)
+		priv->buf[i + 1] = priv->leds[i].brightness;
+	cr0014114_calc_crc(priv->buf, priv->count + 2);
+	ret = spi_write(priv->spi, priv->buf, priv->count + 2);
+
+err:
+	priv->delay = jiffies + msecs_to_jiffies(CR_FW_DELAY_MSEC);
+
+	return ret;
+}
+
+static void cr0014114_recount_work(struct work_struct *work)
+{
+	int			ret;
+	struct cr0014114	*priv = container_of(work,
+						     struct cr0014114,
+						     work.work);
+
+	mutex_lock(&priv->lock);
+	priv->do_recount = true;
+	ret = cr0014114_sync(priv);
+	mutex_unlock(&priv->lock);
+
+	if (ret)
+		dev_warn(priv->dev, "sync of LEDs failed %d\n", ret);
+
+	schedule_delayed_work(&priv->work, CR_RECOUNT_DELAY);
+}
+
+static int cr0014114_set_sync(struct led_classdev *ldev,
+			      enum led_brightness brightness)
+{
+	int			ret;
+	struct cr0014114_led    *led = container_of(ldev,
+						    struct cr0014114_led,
+						    ldev);
+
+	dev_dbg(led->priv->dev, "Set brightness of %s to %d\n",
+		led->name, brightness);
+
+	mutex_lock(&led->priv->lock);
+	led->brightness = (u8)brightness;
+	ret = cr0014114_sync(led->priv);
+	mutex_unlock(&led->priv->lock);
+
+	return ret;
+}
+
+static int cr0014114_probe_dt(struct cr0014114 *priv)
+{
+	size_t			i = 0;
+	struct cr0014114_led	*led;
+	struct fwnode_handle	*child;
+	struct device_node	*np;
+	int			ret;
+	const char		*str;
+
+	device_for_each_child_node(priv->dev, child) {
+		np = to_of_node(child);
+		led = &priv->leds[i];
+
+		ret = fwnode_property_read_string(child, "label", &str);
+		if (ret)
+			snprintf(led->name, sizeof(led->name),
+				 "cr0014114::");
+		else
+			snprintf(led->name, sizeof(led->name),
+				 "cr0014114:%s", str);
+
+		fwnode_property_read_string(child, "linux,default-trigger",
+					    &led->ldev.default_trigger);
+
+		led->priv			  = priv;
+		led->ldev.name			  = led->name;
+		led->ldev.max_brightness	  = CR_MAX_BRIGHTNESS;
+		led->ldev.brightness_set_blocking = cr0014114_set_sync;
+
+		ret = devm_of_led_classdev_register(priv->dev, np,
+						    &led->ldev);
+		if (ret) {
+			dev_err(priv->dev,
+				"failed to register LED device %s, err %d",
+				led->name, ret);
+			fwnode_handle_put(child);
+			return ret;
+		}
+
+		led->ldev.dev->of_node = np;
+
+		i++;
+	}
+
+	return 0;
+}
+
+static int cr0014114_probe(struct spi_device *spi)
+{
+	struct cr0014114	*priv;
+	size_t			count;
+	int			ret;
+
+	count = device_get_child_node_count(&spi->dev);
+	if (!count) {
+		dev_err(&spi->dev, "LEDs are not defined in device tree!");
+		return -ENODEV;
+	}
+
+	priv = devm_kzalloc(&spi->dev,
+			    sizeof(*priv) + sizeof(*priv->leds) * count,
+			    GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->buf = devm_kzalloc(&spi->dev, count + 2, GFP_KERNEL);
+	if (!priv->buf)
+		return -ENOMEM;
+
+	mutex_init(&priv->lock);
+	INIT_DELAYED_WORK(&priv->work, cr0014114_recount_work);
+	priv->count	= count;
+	priv->dev	= &spi->dev;
+	priv->spi	= spi;
+	priv->delay	= jiffies -
+			  msecs_to_jiffies(CR_FW_DELAY_MSEC);
+
+	priv->do_recount = true;
+	ret = cr0014114_sync(priv);
+	if (ret) {
+		dev_err(priv->dev, "first recount failed %d\n", ret);
+		return ret;
+	}
+
+	priv->do_recount = true;
+	ret = cr0014114_sync(priv);
+	if (ret) {
+		dev_err(priv->dev, "second recount failed %d\n", ret);
+		return ret;
+	}
+
+	ret = cr0014114_probe_dt(priv);
+	if (ret)
+		return ret;
+
+	/* setup recount work to workaround buggy firmware */
+	schedule_delayed_work(&priv->work, CR_RECOUNT_DELAY);
+
+	spi_set_drvdata(spi, priv);
+
+	return 0;
+}
+
+static int cr0014114_remove(struct spi_device *spi)
+{
+	struct cr0014114 *priv = spi_get_drvdata(spi);
+
+	cancel_delayed_work_sync(&priv->work);
+	mutex_destroy(&priv->lock);
+
+	return 0;
+}
+
+static const struct of_device_id cr0014114_dt_ids[] = {
+	{ .compatible = "crane,cr0014114", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, cr0014114_dt_ids);
+
+static struct spi_driver cr0014114_driver = {
+	.probe		= cr0014114_probe,
+	.remove		= cr0014114_remove,
+	.driver = {
+		.name		= KBUILD_MODNAME,
+		.of_match_table	= cr0014114_dt_ids,
+	},
+};
+
+module_spi_driver(cr0014114_driver);
+
+MODULE_AUTHOR("Oleh Kravchenko <oleg@kaa.org.ua>");
+MODULE_DESCRIPTION("cr0014114 LED driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("spi:cr0014114");
diff --git a/drivers/leds/leds-lm3601x.c b/drivers/leds/leds-lm3601x.c
new file mode 100644
index 0000000..081aa71
--- /dev/null
+++ b/drivers/leds/leds-lm3601x.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+// Flash and torch driver for Texas Instruments LM3601X LED
+// Flash driver chip family
+// Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com/
+
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/leds.h>
+#include <linux/led-class-flash.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <uapi/linux/uleds.h>
+
+#define LM3601X_LED_IR		0x0
+#define LM3601X_LED_TORCH	0x1
+
+/* Registers */
+#define LM3601X_ENABLE_REG	0x01
+#define LM3601X_CFG_REG		0x02
+#define LM3601X_LED_FLASH_REG	0x03
+#define LM3601X_LED_TORCH_REG	0x04
+#define LM3601X_FLAGS_REG	0x05
+#define LM3601X_DEV_ID_REG	0x06
+
+#define LM3601X_SW_RESET	BIT(7)
+
+/* Enable Mode bits */
+#define LM3601X_MODE_STANDBY	0x00
+#define LM3601X_MODE_IR_DRV	BIT(0)
+#define LM3601X_MODE_TORCH	BIT(1)
+#define LM3601X_MODE_STROBE	(BIT(0) | BIT(1))
+#define LM3601X_STRB_EN		BIT(2)
+#define LM3601X_STRB_EDGE_TRIG	BIT(3)
+#define LM3601X_IVFM_EN		BIT(4)
+
+#define LM36010_BOOST_LIMIT_28	BIT(5)
+#define LM36010_BOOST_FREQ_4MHZ	BIT(6)
+#define LM36010_BOOST_MODE_PASS	BIT(7)
+
+/* Flag Mask */
+#define LM3601X_FLASH_TIME_OUT	BIT(0)
+#define LM3601X_UVLO_FAULT	BIT(1)
+#define LM3601X_THERM_SHUTDOWN	BIT(2)
+#define LM3601X_THERM_CURR	BIT(3)
+#define LM36010_CURR_LIMIT	BIT(4)
+#define LM3601X_SHORT_FAULT	BIT(5)
+#define LM3601X_IVFM_TRIP	BIT(6)
+#define LM36010_OVP_FAULT	BIT(7)
+
+#define LM3601X_MAX_TORCH_I_UA	376000
+#define LM3601X_MIN_TORCH_I_UA	2400
+#define LM3601X_TORCH_REG_DIV	2965
+
+#define LM3601X_MAX_STROBE_I_UA	1500000
+#define LM3601X_MIN_STROBE_I_UA	11000
+#define LM3601X_STROBE_REG_DIV	11800
+
+#define LM3601X_TIMEOUT_MASK	0x1e
+#define LM3601X_ENABLE_MASK	(LM3601X_MODE_IR_DRV | LM3601X_MODE_TORCH)
+
+#define LM3601X_LOWER_STEP_US	40000
+#define LM3601X_UPPER_STEP_US	200000
+#define LM3601X_MIN_TIMEOUT_US	40000
+#define LM3601X_MAX_TIMEOUT_US	1600000
+#define LM3601X_TIMEOUT_XOVER_US 400000
+
+enum lm3601x_type {
+	CHIP_LM36010 = 0,
+	CHIP_LM36011,
+};
+
+/**
+ * struct lm3601x_led -
+ * @fled_cdev: flash LED class device pointer
+ * @client: Pointer to the I2C client
+ * @regmap: Devices register map
+ * @lock: Lock for reading/writing the device
+ * @led_name: LED label for the Torch or IR LED
+ * @flash_timeout: the timeout for the flash
+ * @last_flag: last known flags register value
+ * @torch_current_max: maximum current for the torch
+ * @flash_current_max: maximum current for the flash
+ * @max_flash_timeout: maximum timeout for the flash
+ * @led_mode: The mode to enable either IR or Torch
+ */
+struct lm3601x_led {
+	struct led_classdev_flash fled_cdev;
+	struct i2c_client *client;
+	struct regmap *regmap;
+	struct mutex lock;
+
+	char led_name[LED_MAX_NAME_SIZE];
+
+	unsigned int flash_timeout;
+	unsigned int last_flag;
+
+	u32 torch_current_max;
+	u32 flash_current_max;
+	u32 max_flash_timeout;
+
+	u32 led_mode;
+};
+
+static const struct reg_default lm3601x_regmap_defs[] = {
+	{ LM3601X_ENABLE_REG, 0x20 },
+	{ LM3601X_CFG_REG, 0x15 },
+	{ LM3601X_LED_FLASH_REG, 0x00 },
+	{ LM3601X_LED_TORCH_REG, 0x00 },
+};
+
+static bool lm3601x_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case LM3601X_FLAGS_REG:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config lm3601x_regmap = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = LM3601X_DEV_ID_REG,
+	.reg_defaults = lm3601x_regmap_defs,
+	.num_reg_defaults = ARRAY_SIZE(lm3601x_regmap_defs),
+	.cache_type = REGCACHE_RBTREE,
+	.volatile_reg = lm3601x_volatile_reg,
+};
+
+static struct lm3601x_led *fled_cdev_to_led(struct led_classdev_flash *fled_cdev)
+{
+	return container_of(fled_cdev, struct lm3601x_led, fled_cdev);
+}
+
+static int lm3601x_read_faults(struct lm3601x_led *led)
+{
+	int flags_val;
+	int ret;
+
+	ret = regmap_read(led->regmap, LM3601X_FLAGS_REG, &flags_val);
+	if (ret < 0)
+		return -EIO;
+
+	led->last_flag = 0;
+
+	if (flags_val & LM36010_OVP_FAULT)
+		led->last_flag |= LED_FAULT_OVER_VOLTAGE;
+
+	if (flags_val & (LM3601X_THERM_SHUTDOWN | LM3601X_THERM_CURR))
+		led->last_flag |= LED_FAULT_OVER_TEMPERATURE;
+
+	if (flags_val & LM3601X_SHORT_FAULT)
+		led->last_flag |= LED_FAULT_SHORT_CIRCUIT;
+
+	if (flags_val & LM36010_CURR_LIMIT)
+		led->last_flag |= LED_FAULT_OVER_CURRENT;
+
+	if (flags_val & LM3601X_UVLO_FAULT)
+		led->last_flag |= LED_FAULT_UNDER_VOLTAGE;
+
+	if (flags_val & LM3601X_IVFM_TRIP)
+		led->last_flag |= LED_FAULT_INPUT_VOLTAGE;
+
+	if (flags_val & LM3601X_THERM_SHUTDOWN)
+		led->last_flag |= LED_FAULT_LED_OVER_TEMPERATURE;
+
+	return led->last_flag;
+}
+
+static int lm3601x_brightness_set(struct led_classdev *cdev,
+					enum led_brightness brightness)
+{
+	struct led_classdev_flash *fled_cdev = lcdev_to_flcdev(cdev);
+	struct lm3601x_led *led = fled_cdev_to_led(fled_cdev);
+	int ret, led_mode_val;
+
+	mutex_lock(&led->lock);
+
+	ret = lm3601x_read_faults(led);
+	if (ret < 0)
+		goto out;
+
+	if (led->led_mode == LM3601X_LED_TORCH)
+		led_mode_val = LM3601X_MODE_TORCH;
+	else
+		led_mode_val = LM3601X_MODE_IR_DRV;
+
+	if (brightness == LED_OFF) {
+		ret = regmap_update_bits(led->regmap, LM3601X_ENABLE_REG,
+					led_mode_val, LED_OFF);
+		goto out;
+	}
+
+	ret = regmap_write(led->regmap, LM3601X_LED_TORCH_REG, brightness);
+	if (ret < 0)
+		goto out;
+
+	ret = regmap_update_bits(led->regmap, LM3601X_ENABLE_REG,
+				LM3601X_MODE_TORCH | LM3601X_MODE_IR_DRV,
+				led_mode_val);
+out:
+	mutex_unlock(&led->lock);
+	return ret;
+}
+
+static int lm3601x_strobe_set(struct led_classdev_flash *fled_cdev,
+				bool state)
+{
+	struct lm3601x_led *led = fled_cdev_to_led(fled_cdev);
+	int timeout_reg_val;
+	int current_timeout;
+	int ret;
+
+	mutex_lock(&led->lock);
+
+	ret = regmap_read(led->regmap, LM3601X_CFG_REG, &current_timeout);
+	if (ret < 0)
+		goto out;
+
+	if (led->flash_timeout >= LM3601X_TIMEOUT_XOVER_US)
+		timeout_reg_val = led->flash_timeout / LM3601X_UPPER_STEP_US + 0x07;
+	else
+		timeout_reg_val = led->flash_timeout / LM3601X_LOWER_STEP_US - 0x01;
+
+	if (led->flash_timeout != current_timeout)
+		ret = regmap_update_bits(led->regmap, LM3601X_CFG_REG,
+					LM3601X_TIMEOUT_MASK, timeout_reg_val);
+
+	if (state)
+		ret = regmap_update_bits(led->regmap, LM3601X_ENABLE_REG,
+					LM3601X_MODE_TORCH | LM3601X_MODE_IR_DRV,
+					LM3601X_MODE_STROBE);
+	else
+		ret = regmap_update_bits(led->regmap, LM3601X_ENABLE_REG,
+					LM3601X_MODE_STROBE, LED_OFF);
+
+	ret = lm3601x_read_faults(led);
+out:
+	mutex_unlock(&led->lock);
+	return ret;
+}
+
+static int lm3601x_flash_brightness_set(struct led_classdev_flash *fled_cdev,
+					u32 brightness)
+{
+	struct lm3601x_led *led = fled_cdev_to_led(fled_cdev);
+	u8 brightness_val;
+	int ret;
+
+	mutex_lock(&led->lock);
+	ret = lm3601x_read_faults(led);
+	if (ret < 0)
+		goto out;
+
+	if (brightness == LED_OFF) {
+		ret = regmap_update_bits(led->regmap, LM3601X_ENABLE_REG,
+					LM3601X_MODE_STROBE, LED_OFF);
+		goto out;
+	}
+
+	brightness_val = brightness / LM3601X_STROBE_REG_DIV;
+
+	ret = regmap_write(led->regmap, LM3601X_LED_FLASH_REG, brightness_val);
+out:
+	mutex_unlock(&led->lock);
+	return ret;
+}
+
+static int lm3601x_flash_timeout_set(struct led_classdev_flash *fled_cdev,
+				u32 timeout)
+{
+	struct lm3601x_led *led = fled_cdev_to_led(fled_cdev);
+
+	mutex_lock(&led->lock);
+
+	led->flash_timeout = timeout;
+
+	mutex_unlock(&led->lock);
+
+	return 0;
+}
+
+static int lm3601x_strobe_get(struct led_classdev_flash *fled_cdev, bool *state)
+{
+	struct lm3601x_led *led = fled_cdev_to_led(fled_cdev);
+	int strobe_state;
+	int ret;
+
+	mutex_lock(&led->lock);
+
+	ret = regmap_read(led->regmap, LM3601X_ENABLE_REG, &strobe_state);
+	if (ret < 0)
+		goto out;
+
+	*state = strobe_state & LM3601X_MODE_STROBE;
+
+out:
+	mutex_unlock(&led->lock);
+	return ret;
+}
+
+static int lm3601x_flash_fault_get(struct led_classdev_flash *fled_cdev,
+				u32 *fault)
+{
+	struct lm3601x_led *led = fled_cdev_to_led(fled_cdev);
+
+	lm3601x_read_faults(led);
+
+	*fault = led->last_flag;
+
+	return 0;
+}
+
+static const struct led_flash_ops flash_ops = {
+	.flash_brightness_set	= lm3601x_flash_brightness_set,
+	.strobe_set		= lm3601x_strobe_set,
+	.strobe_get		= lm3601x_strobe_get,
+	.timeout_set		= lm3601x_flash_timeout_set,
+	.fault_get		= lm3601x_flash_fault_get,
+};
+
+static int lm3601x_register_leds(struct lm3601x_led *led)
+{
+	struct led_classdev *led_cdev;
+	struct led_flash_setting *setting;
+
+	led->fled_cdev.ops = &flash_ops;
+
+	setting = &led->fled_cdev.timeout;
+	setting->min = LM3601X_MIN_TIMEOUT_US;
+	setting->max = led->max_flash_timeout;
+	setting->step = LM3601X_LOWER_STEP_US;
+	setting->val = led->max_flash_timeout;
+
+	setting = &led->fled_cdev.brightness;
+	setting->min = LM3601X_MIN_STROBE_I_UA;
+	setting->max = led->flash_current_max;
+	setting->step = LM3601X_TORCH_REG_DIV;
+	setting->val = led->flash_current_max;
+
+	led_cdev = &led->fled_cdev.led_cdev;
+	led_cdev->name = led->led_name;
+	led_cdev->brightness_set_blocking = lm3601x_brightness_set;
+	led_cdev->max_brightness = DIV_ROUND_UP(led->torch_current_max,
+						LM3601X_TORCH_REG_DIV);
+	led_cdev->flags |= LED_DEV_CAP_FLASH;
+
+	return led_classdev_flash_register(&led->client->dev, &led->fled_cdev);
+}
+
+static int lm3601x_parse_node(struct lm3601x_led *led)
+{
+	struct fwnode_handle *child = NULL;
+	int ret = -ENODEV;
+	const char *name;
+
+	child = device_get_next_child_node(&led->client->dev, child);
+	if (!child) {
+		dev_err(&led->client->dev, "No LED Child node\n");
+		return ret;
+	}
+
+	ret = fwnode_property_read_u32(child, "reg", &led->led_mode);
+	if (ret) {
+		dev_err(&led->client->dev, "reg DT property missing\n");
+		goto out_err;
+	}
+
+	if (led->led_mode > LM3601X_LED_TORCH ||
+	    led->led_mode < LM3601X_LED_IR) {
+		dev_warn(&led->client->dev, "Invalid led mode requested\n");
+		ret = -EINVAL;
+		goto out_err;
+	}
+
+	ret = fwnode_property_read_string(child, "label", &name);
+	if (ret) {
+		if (led->led_mode == LM3601X_LED_TORCH)
+			name = "torch";
+		else
+			name = "infrared";
+	}
+
+	snprintf(led->led_name, sizeof(led->led_name),
+		"%s:%s", led->client->name, name);
+
+	ret = fwnode_property_read_u32(child, "led-max-microamp",
+					&led->torch_current_max);
+	if (ret) {
+		dev_warn(&led->client->dev,
+			"led-max-microamp DT property missing\n");
+		goto out_err;
+	}
+
+	ret = fwnode_property_read_u32(child, "flash-max-microamp",
+				&led->flash_current_max);
+	if (ret) {
+		dev_warn(&led->client->dev,
+			 "flash-max-microamp DT property missing\n");
+		goto out_err;
+	}
+
+	ret = fwnode_property_read_u32(child, "flash-max-timeout-us",
+				&led->max_flash_timeout);
+	if (ret) {
+		dev_warn(&led->client->dev,
+			 "flash-max-timeout-us DT property missing\n");
+		goto out_err;
+	}
+
+out_err:
+	fwnode_handle_put(child);
+	return ret;
+}
+
+static int lm3601x_probe(struct i2c_client *client)
+{
+	struct lm3601x_led *led;
+	int ret;
+
+	led = devm_kzalloc(&client->dev, sizeof(*led), GFP_KERNEL);
+	if (!led)
+		return -ENOMEM;
+
+	led->client = client;
+	i2c_set_clientdata(client, led);
+
+	ret = lm3601x_parse_node(led);
+	if (ret)
+		return -ENODEV;
+
+	led->regmap = devm_regmap_init_i2c(client, &lm3601x_regmap);
+	if (IS_ERR(led->regmap)) {
+		ret = PTR_ERR(led->regmap);
+		dev_err(&client->dev,
+			"Failed to allocate register map: %d\n", ret);
+		return ret;
+	}
+
+	mutex_init(&led->lock);
+
+	return lm3601x_register_leds(led);
+}
+
+static int lm3601x_remove(struct i2c_client *client)
+{
+	struct lm3601x_led *led = i2c_get_clientdata(client);
+
+	led_classdev_flash_unregister(&led->fled_cdev);
+	mutex_destroy(&led->lock);
+
+	return regmap_update_bits(led->regmap, LM3601X_ENABLE_REG,
+			   LM3601X_ENABLE_MASK,
+			   LM3601X_MODE_STANDBY);
+}
+
+static const struct i2c_device_id lm3601x_id[] = {
+	{ "LM36010", CHIP_LM36010 },
+	{ "LM36011", CHIP_LM36011 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, lm3601x_id);
+
+static const struct of_device_id of_lm3601x_leds_match[] = {
+	{ .compatible = "ti,lm36010", },
+	{ .compatible = "ti,lm36011", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, of_lm3601x_leds_match);
+
+static struct i2c_driver lm3601x_i2c_driver = {
+	.driver = {
+		.name = "lm3601x",
+		.of_match_table = of_lm3601x_leds_match,
+	},
+	.probe_new = lm3601x_probe,
+	.remove = lm3601x_remove,
+	.id_table = lm3601x_id,
+};
+module_i2c_driver(lm3601x_i2c_driver);
+
+MODULE_DESCRIPTION("Texas Instruments Flash Lighting driver for LM3601X");
+MODULE_AUTHOR("Dan Murphy <dmurphy@ti.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/leds/leds-sc27xx-bltc.c b/drivers/leds/leds-sc27xx-bltc.c
new file mode 100644
index 0000000..9d9b7aa
--- /dev/null
+++ b/drivers/leds/leds-sc27xx-bltc.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Spreadtrum Communications Inc.
+
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <uapi/linux/uleds.h>
+
+/* PMIC global control register definition */
+#define SC27XX_MODULE_EN0	0xc08
+#define SC27XX_CLK_EN0		0xc18
+#define SC27XX_RGB_CTRL		0xebc
+
+#define SC27XX_BLTC_EN		BIT(9)
+#define SC27XX_RTC_EN		BIT(7)
+#define SC27XX_RGB_PD		BIT(0)
+
+/* Breathing light controller register definition */
+#define SC27XX_LEDS_CTRL	0x00
+#define SC27XX_LEDS_PRESCALE	0x04
+#define SC27XX_LEDS_DUTY	0x08
+#define SC27XX_LEDS_CURVE0	0x0c
+#define SC27XX_LEDS_CURVE1	0x10
+
+#define SC27XX_CTRL_SHIFT	4
+#define SC27XX_LED_RUN		BIT(0)
+#define SC27XX_LED_TYPE		BIT(1)
+
+#define SC27XX_DUTY_SHIFT	8
+#define SC27XX_DUTY_MASK	GENMASK(15, 0)
+#define SC27XX_MOD_MASK		GENMASK(7, 0)
+
+#define SC27XX_LEDS_OFFSET	0x10
+#define SC27XX_LEDS_MAX		3
+
+struct sc27xx_led {
+	char name[LED_MAX_NAME_SIZE];
+	struct led_classdev ldev;
+	struct sc27xx_led_priv *priv;
+	u8 line;
+	bool active;
+};
+
+struct sc27xx_led_priv {
+	struct sc27xx_led leds[SC27XX_LEDS_MAX];
+	struct regmap *regmap;
+	struct mutex lock;
+	u32 base;
+};
+
+#define to_sc27xx_led(ldev) \
+	container_of(ldev, struct sc27xx_led, ldev)
+
+static int sc27xx_led_init(struct regmap *regmap)
+{
+	int err;
+
+	err = regmap_update_bits(regmap, SC27XX_MODULE_EN0, SC27XX_BLTC_EN,
+				 SC27XX_BLTC_EN);
+	if (err)
+		return err;
+
+	err = regmap_update_bits(regmap, SC27XX_CLK_EN0, SC27XX_RTC_EN,
+				 SC27XX_RTC_EN);
+	if (err)
+		return err;
+
+	return regmap_update_bits(regmap, SC27XX_RGB_CTRL, SC27XX_RGB_PD, 0);
+}
+
+static u32 sc27xx_led_get_offset(struct sc27xx_led *leds)
+{
+	return leds->priv->base + SC27XX_LEDS_OFFSET * leds->line;
+}
+
+static int sc27xx_led_enable(struct sc27xx_led *leds, enum led_brightness value)
+{
+	u32 base = sc27xx_led_get_offset(leds);
+	u32 ctrl_base = leds->priv->base + SC27XX_LEDS_CTRL;
+	u8 ctrl_shift = SC27XX_CTRL_SHIFT * leds->line;
+	struct regmap *regmap = leds->priv->regmap;
+	int err;
+
+	err = regmap_update_bits(regmap, base + SC27XX_LEDS_DUTY,
+				 SC27XX_DUTY_MASK,
+				 (value << SC27XX_DUTY_SHIFT) |
+				 SC27XX_MOD_MASK);
+	if (err)
+		return err;
+
+	return regmap_update_bits(regmap, ctrl_base,
+			(SC27XX_LED_RUN | SC27XX_LED_TYPE) << ctrl_shift,
+			(SC27XX_LED_RUN | SC27XX_LED_TYPE) << ctrl_shift);
+}
+
+static int sc27xx_led_disable(struct sc27xx_led *leds)
+{
+	struct regmap *regmap = leds->priv->regmap;
+	u32 ctrl_base = leds->priv->base + SC27XX_LEDS_CTRL;
+	u8 ctrl_shift = SC27XX_CTRL_SHIFT * leds->line;
+
+	return regmap_update_bits(regmap, ctrl_base,
+			(SC27XX_LED_RUN | SC27XX_LED_TYPE) << ctrl_shift, 0);
+}
+
+static int sc27xx_led_set(struct led_classdev *ldev, enum led_brightness value)
+{
+	struct sc27xx_led *leds = to_sc27xx_led(ldev);
+	int err;
+
+	mutex_lock(&leds->priv->lock);
+
+	if (value == LED_OFF)
+		err = sc27xx_led_disable(leds);
+	else
+		err = sc27xx_led_enable(leds, value);
+
+	mutex_unlock(&leds->priv->lock);
+
+	return err;
+}
+
+static int sc27xx_led_register(struct device *dev, struct sc27xx_led_priv *priv)
+{
+	int i, err;
+
+	err = sc27xx_led_init(priv->regmap);
+	if (err)
+		return err;
+
+	for (i = 0; i < SC27XX_LEDS_MAX; i++) {
+		struct sc27xx_led *led = &priv->leds[i];
+
+		if (!led->active)
+			continue;
+
+		led->line = i;
+		led->priv = priv;
+		led->ldev.name = led->name;
+		led->ldev.brightness_set_blocking = sc27xx_led_set;
+
+		err = devm_led_classdev_register(dev, &led->ldev);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int sc27xx_led_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node, *child;
+	struct sc27xx_led_priv *priv;
+	const char *str;
+	u32 base, count, reg;
+	int err;
+
+	count = of_get_child_count(np);
+	if (!count || count > SC27XX_LEDS_MAX)
+		return -EINVAL;
+
+	err = of_property_read_u32(np, "reg", &base);
+	if (err) {
+		dev_err(dev, "fail to get reg of property\n");
+		return err;
+	}
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, priv);
+	mutex_init(&priv->lock);
+	priv->base = base;
+	priv->regmap = dev_get_regmap(dev->parent, NULL);
+	if (!priv->regmap) {
+		err = -ENODEV;
+		dev_err(dev, "failed to get regmap: %d\n", err);
+		return err;
+	}
+
+	for_each_child_of_node(np, child) {
+		err = of_property_read_u32(child, "reg", &reg);
+		if (err) {
+			of_node_put(child);
+			mutex_destroy(&priv->lock);
+			return err;
+		}
+
+		if (reg >= SC27XX_LEDS_MAX || priv->leds[reg].active) {
+			of_node_put(child);
+			mutex_destroy(&priv->lock);
+			return -EINVAL;
+		}
+
+		priv->leds[reg].active = true;
+
+		err = of_property_read_string(child, "label", &str);
+		if (err)
+			snprintf(priv->leds[reg].name, LED_MAX_NAME_SIZE,
+				 "sc27xx::");
+		else
+			snprintf(priv->leds[reg].name, LED_MAX_NAME_SIZE,
+				 "sc27xx:%s", str);
+	}
+
+	err = sc27xx_led_register(dev, priv);
+	if (err)
+		mutex_destroy(&priv->lock);
+
+	return err;
+}
+
+static int sc27xx_led_remove(struct platform_device *pdev)
+{
+	struct sc27xx_led_priv *priv = platform_get_drvdata(pdev);
+
+	mutex_destroy(&priv->lock);
+	return 0;
+}
+
+static const struct of_device_id sc27xx_led_of_match[] = {
+	{ .compatible = "sprd,sc2731-bltc", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, sc27xx_led_of_match);
+
+static struct platform_driver sc27xx_led_driver = {
+	.driver = {
+		.name = "sprd-bltc",
+		.of_match_table = sc27xx_led_of_match,
+	},
+	.probe = sc27xx_led_probe,
+	.remove = sc27xx_led_remove,
+};
+
+module_platform_driver(sc27xx_led_driver);
+
+MODULE_DESCRIPTION("Spreadtrum SC27xx breathing light controller driver");
+MODULE_AUTHOR("Xiaotong Lu <xiaotong.lu@spreadtrum.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/leds/leds-wm831x-status.c b/drivers/leds/leds-wm831x-status.c
index be93b20..c5798b9 100644
--- a/drivers/leds/leds-wm831x-status.c
+++ b/drivers/leds/leds-wm831x-status.c
@@ -188,24 +188,14 @@
 {
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
 	struct wm831x_status *led = to_wm831x_status(led_cdev);
-	char name[20];
 	int i;
-	size_t len;
 
-	name[sizeof(name) - 1] = '\0';
-	strncpy(name, buf, sizeof(name) - 1);
-	len = strlen(name);
-
-	if (len && name[len - 1] == '\n')
-		name[len - 1] = '\0';
-
-	for (i = 0; i < ARRAY_SIZE(led_src_texts); i++) {
-		if (!strcmp(name, led_src_texts[i])) {
-			mutex_lock(&led->mutex);
-			led->src = i;
-			mutex_unlock(&led->mutex);
-			wm831x_status_set(led);
-		}
+	i = sysfs_match_string(led_src_texts, buf);
+	if (i >= 0) {
+		mutex_lock(&led->mutex);
+		led->src = i;
+		mutex_unlock(&led->mutex);
+		wm831x_status_set(led);
 	}
 
 	return size;
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 63171cd..60aa7bc 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -431,7 +431,7 @@
 	return 0;
 err_sysfs:
 	if (tt->exit)
-		tt->exit(targetdata);
+		tt->exit(targetdata, true);
 err_init:
 	blk_cleanup_queue(tqueue);
 	tdisk->queue = NULL;
@@ -446,7 +446,7 @@
 	return ret;
 }
 
-static void __nvm_remove_target(struct nvm_target *t)
+static void __nvm_remove_target(struct nvm_target *t, bool graceful)
 {
 	struct nvm_tgt_type *tt = t->type;
 	struct gendisk *tdisk = t->disk;
@@ -459,7 +459,7 @@
 		tt->sysfs_exit(tdisk);
 
 	if (tt->exit)
-		tt->exit(tdisk->private_data);
+		tt->exit(tdisk->private_data, graceful);
 
 	nvm_remove_tgt_dev(t->dev, 1);
 	put_disk(tdisk);
@@ -489,7 +489,7 @@
 		mutex_unlock(&dev->mlock);
 		return 1;
 	}
-	__nvm_remove_target(t);
+	__nvm_remove_target(t, true);
 	mutex_unlock(&dev->mlock);
 
 	return 0;
@@ -963,7 +963,7 @@
 	list_for_each_entry_safe(t, tmp, &dev->targets, list) {
 		if (t->dev->parent != dev)
 			continue;
-		__nvm_remove_target(t);
+		__nvm_remove_target(t, false);
 	}
 	mutex_unlock(&dev->mlock);
 
diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c
index 29a2311..b1c6d7e 100644
--- a/drivers/lightnvm/pblk-cache.c
+++ b/drivers/lightnvm/pblk-cache.c
@@ -44,13 +44,15 @@
 		goto out;
 	}
 
-	if (unlikely(!bio_has_data(bio)))
-		goto out;
-
 	pblk_ppa_set_empty(&w_ctx.ppa);
 	w_ctx.flags = flags;
-	if (bio->bi_opf & REQ_PREFLUSH)
+	if (bio->bi_opf & REQ_PREFLUSH) {
 		w_ctx.flags |= PBLK_FLUSH_ENTRY;
+		pblk_write_kick(pblk);
+	}
+
+	if (unlikely(!bio_has_data(bio)))
+		goto out;
 
 	for (i = 0; i < nr_entries; i++) {
 		void *data = bio_data(bio);
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 94d5d97..ed9cc97 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -40,7 +40,7 @@
 	}
 
 	kfree(ppa);
-	mempool_free(line_ws, pblk->gen_ws_pool);
+	mempool_free(line_ws, &pblk->gen_ws_pool);
 }
 
 static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
@@ -102,7 +102,7 @@
 	struct pblk *pblk = rqd->private;
 
 	__pblk_end_io_erase(pblk, rqd);
-	mempool_free(rqd, pblk->e_rq_pool);
+	mempool_free(rqd, &pblk->e_rq_pool);
 }
 
 /*
@@ -237,15 +237,15 @@
 	switch (type) {
 	case PBLK_WRITE:
 	case PBLK_WRITE_INT:
-		pool = pblk->w_rq_pool;
+		pool = &pblk->w_rq_pool;
 		rq_size = pblk_w_rq_size;
 		break;
 	case PBLK_READ:
-		pool = pblk->r_rq_pool;
+		pool = &pblk->r_rq_pool;
 		rq_size = pblk_g_rq_size;
 		break;
 	default:
-		pool = pblk->e_rq_pool;
+		pool = &pblk->e_rq_pool;
 		rq_size = pblk_g_rq_size;
 	}
 
@@ -265,20 +265,22 @@
 	case PBLK_WRITE:
 		kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap);
 	case PBLK_WRITE_INT:
-		pool = pblk->w_rq_pool;
+		pool = &pblk->w_rq_pool;
 		break;
 	case PBLK_READ:
-		pool = pblk->r_rq_pool;
+		pool = &pblk->r_rq_pool;
 		break;
 	case PBLK_ERASE:
-		pool = pblk->e_rq_pool;
+		pool = &pblk->e_rq_pool;
 		break;
 	default:
 		pr_err("pblk: trying to free unknown rqd type\n");
 		return;
 	}
 
-	nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list);
+	if (rqd->meta_list)
+		nvm_dev_dma_free(dev->parent, rqd->meta_list,
+				rqd->dma_meta_list);
 	mempool_free(rqd, pool);
 }
 
@@ -292,7 +294,7 @@
 
 	for (i = off; i < nr_pages + off; i++) {
 		bv = bio->bi_io_vec[i];
-		mempool_free(bv.bv_page, pblk->page_bio_pool);
+		mempool_free(bv.bv_page, &pblk->page_bio_pool);
 	}
 }
 
@@ -304,23 +306,23 @@
 	int i, ret;
 
 	for (i = 0; i < nr_pages; i++) {
-		page = mempool_alloc(pblk->page_bio_pool, flags);
+		page = mempool_alloc(&pblk->page_bio_pool, flags);
 
 		ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
 		if (ret != PBLK_EXPOSED_PAGE_SIZE) {
 			pr_err("pblk: could not add page to bio\n");
-			mempool_free(page, pblk->page_bio_pool);
+			mempool_free(page, &pblk->page_bio_pool);
 			goto err;
 		}
 	}
 
 	return 0;
 err:
-	pblk_bio_free_pages(pblk, bio, 0, i - 1);
+	pblk_bio_free_pages(pblk, bio, (bio->bi_vcnt - i), i);
 	return -1;
 }
 
-static void pblk_write_kick(struct pblk *pblk)
+void pblk_write_kick(struct pblk *pblk)
 {
 	wake_up_process(pblk->writer_ts);
 	mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
@@ -342,13 +344,6 @@
 		pblk_write_kick(pblk);
 }
 
-void pblk_end_io_sync(struct nvm_rq *rqd)
-{
-	struct completion *waiting = rqd->private;
-
-	complete(waiting);
-}
-
 static void pblk_wait_for_meta(struct pblk *pblk)
 {
 	do {
@@ -380,7 +375,13 @@
 
 	lockdep_assert_held(&line->lock);
 
-	if (!vsc) {
+	if (line->w_err_gc->has_write_err) {
+		if (line->gc_group != PBLK_LINEGC_WERR) {
+			line->gc_group = PBLK_LINEGC_WERR;
+			move_list = &l_mg->gc_werr_list;
+			pblk_rl_werr_line_in(&pblk->rl);
+		}
+	} else if (!vsc) {
 		if (line->gc_group != PBLK_LINEGC_FULL) {
 			line->gc_group = PBLK_LINEGC_FULL;
 			move_list = &l_mg->gc_full_list;
@@ -467,16 +468,13 @@
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 
-#ifdef CONFIG_NVM_DEBUG
-	int ret;
-
-	ret = pblk_check_io(pblk, rqd);
-	if (ret)
-		return ret;
-#endif
-
 	atomic_inc(&pblk->inflight_io);
 
+#ifdef CONFIG_NVM_DEBUG
+	if (pblk_check_io(pblk, rqd))
+		return NVM_IO_ERR;
+#endif
+
 	return nvm_submit_io(dev, rqd);
 }
 
@@ -484,16 +482,13 @@
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 
-#ifdef CONFIG_NVM_DEBUG
-	int ret;
-
-	ret = pblk_check_io(pblk, rqd);
-	if (ret)
-		return ret;
-#endif
-
 	atomic_inc(&pblk->inflight_io);
 
+#ifdef CONFIG_NVM_DEBUG
+	if (pblk_check_io(pblk, rqd))
+		return NVM_IO_ERR;
+#endif
+
 	return nvm_submit_io_sync(dev, rqd);
 }
 
@@ -856,9 +851,10 @@
 	atomic_dec(&pblk->inflight_io);
 
 	if (rqd.error) {
-		if (dir == PBLK_WRITE)
+		if (dir == PBLK_WRITE) {
 			pblk_log_write_err(pblk, &rqd);
-		else if (dir == PBLK_READ)
+			ret = 1;
+		} else if (dir == PBLK_READ)
 			pblk_log_read_err(pblk, &rqd);
 	}
 
@@ -1071,6 +1067,25 @@
 	return 1;
 }
 
+static int pblk_line_alloc_bitmaps(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+
+	line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
+	if (!line->map_bitmap)
+		return -ENOMEM;
+
+	/* will be initialized using bb info from map_bitmap */
+	line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
+	if (!line->invalid_bitmap) {
+		kfree(line->map_bitmap);
+		line->map_bitmap = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 /* For now lines are always assumed full lines. Thus, smeta former and current
  * lun bitmaps are omitted.
  */
@@ -1108,7 +1123,7 @@
 
 	if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) {
 		pr_debug("pblk: line smeta I/O failed. Retry\n");
-		return 1;
+		return 0;
 	}
 
 	bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
@@ -1174,19 +1189,9 @@
 static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
 {
 	struct pblk_line_meta *lm = &pblk->lm;
+	int blk_in_line = atomic_read(&line->blk_in_line);
 	int blk_to_erase;
 
-	line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC);
-	if (!line->map_bitmap)
-		return -ENOMEM;
-
-	/* will be initialized using bb info from map_bitmap */
-	line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_ATOMIC);
-	if (!line->invalid_bitmap) {
-		kfree(line->map_bitmap);
-		return -ENOMEM;
-	}
-
 	/* Bad blocks do not need to be erased */
 	bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
 
@@ -1199,16 +1204,19 @@
 		blk_to_erase = pblk_prepare_new_line(pblk, line);
 		line->state = PBLK_LINESTATE_FREE;
 	} else {
-		blk_to_erase = atomic_read(&line->blk_in_line);
+		blk_to_erase = blk_in_line;
+	}
+
+	if (blk_in_line < lm->min_blk_line) {
+		spin_unlock(&line->lock);
+		return -EAGAIN;
 	}
 
 	if (line->state != PBLK_LINESTATE_FREE) {
-		kfree(line->map_bitmap);
-		kfree(line->invalid_bitmap);
-		spin_unlock(&line->lock);
 		WARN(1, "pblk: corrupted line %d, state %d\n",
 							line->id, line->state);
-		return -EAGAIN;
+		spin_unlock(&line->lock);
+		return -EINTR;
 	}
 
 	line->state = PBLK_LINESTATE_OPEN;
@@ -1241,13 +1249,16 @@
 	}
 	spin_unlock(&l_mg->free_lock);
 
-	pblk_rl_free_lines_dec(&pblk->rl, line, true);
+	ret = pblk_line_alloc_bitmaps(pblk, line);
+	if (ret)
+		return ret;
 
 	if (!pblk_line_init_bb(pblk, line, 0)) {
 		list_add(&line->list, &l_mg->free_list);
 		return -EINTR;
 	}
 
+	pblk_rl_free_lines_dec(&pblk->rl, line, true);
 	return 0;
 }
 
@@ -1259,6 +1270,24 @@
 	line->emeta = NULL;
 }
 
+static void pblk_line_reinit(struct pblk_line *line)
+{
+	*line->vsc = cpu_to_le32(EMPTY_ENTRY);
+
+	line->map_bitmap = NULL;
+	line->invalid_bitmap = NULL;
+	line->smeta = NULL;
+	line->emeta = NULL;
+}
+
+void pblk_line_free(struct pblk_line *line)
+{
+	kfree(line->map_bitmap);
+	kfree(line->invalid_bitmap);
+
+	pblk_line_reinit(line);
+}
+
 struct pblk_line *pblk_line_get(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -1292,10 +1321,14 @@
 
 	ret = pblk_line_prepare(pblk, line);
 	if (ret) {
-		if (ret == -EAGAIN) {
+		switch (ret) {
+		case -EAGAIN:
+			list_add(&line->list, &l_mg->bad_list);
+			goto retry;
+		case -EINTR:
 			list_add(&line->list, &l_mg->corrupt_list);
 			goto retry;
-		} else {
+		default:
 			pr_err("pblk: failed to prepare line %d\n", line->id);
 			list_add(&line->list, &l_mg->free_list);
 			l_mg->nr_free_lines++;
@@ -1321,11 +1354,14 @@
 		return NULL;
 	}
 
+	retry_line->map_bitmap = line->map_bitmap;
+	retry_line->invalid_bitmap = line->invalid_bitmap;
 	retry_line->smeta = line->smeta;
 	retry_line->emeta = line->emeta;
 	retry_line->meta_line = line->meta_line;
 
-	pblk_line_free(pblk, line);
+	pblk_line_reinit(line);
+
 	l_mg->data_line = retry_line;
 	spin_unlock(&l_mg->free_lock);
 
@@ -1378,6 +1414,9 @@
 	}
 	spin_unlock(&l_mg->free_lock);
 
+	if (pblk_line_alloc_bitmaps(pblk, line))
+		return NULL;
+
 	if (pblk_line_erase(pblk, line)) {
 		line = pblk_line_retry(pblk, line);
 		if (!line)
@@ -1449,7 +1488,7 @@
 	flush_workqueue(pblk->close_wq);
 }
 
-void pblk_pipeline_stop(struct pblk *pblk)
+void __pblk_pipeline_flush(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	int ret;
@@ -1474,6 +1513,11 @@
 
 	flush_workqueue(pblk->bb_wq);
 	pblk_line_close_meta_sync(pblk);
+}
+
+void __pblk_pipeline_stop(struct pblk *pblk)
+{
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 
 	spin_lock(&l_mg->free_lock);
 	pblk->state = PBLK_STATE_STOPPED;
@@ -1482,6 +1526,12 @@
 	spin_unlock(&l_mg->free_lock);
 }
 
+void pblk_pipeline_stop(struct pblk *pblk)
+{
+	__pblk_pipeline_flush(pblk);
+	__pblk_pipeline_stop(pblk);
+}
+
 struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -1511,6 +1561,9 @@
 		goto retry_erase;
 	}
 
+	if (pblk_line_alloc_bitmaps(pblk, new))
+		return NULL;
+
 retry_setup:
 	if (!pblk_line_init_metadata(pblk, new, cur)) {
 		new = pblk_line_retry(pblk, new);
@@ -1550,19 +1603,6 @@
 	return new;
 }
 
-void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
-{
-	kfree(line->map_bitmap);
-	kfree(line->invalid_bitmap);
-
-	*line->vsc = cpu_to_le32(EMPTY_ENTRY);
-
-	line->map_bitmap = NULL;
-	line->invalid_bitmap = NULL;
-	line->smeta = NULL;
-	line->emeta = NULL;
-}
-
 static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line)
 {
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
@@ -1572,9 +1612,14 @@
 	WARN_ON(line->state != PBLK_LINESTATE_GC);
 	line->state = PBLK_LINESTATE_FREE;
 	line->gc_group = PBLK_LINEGC_NONE;
-	pblk_line_free(pblk, line);
-	spin_unlock(&line->lock);
+	pblk_line_free(line);
 
+	if (line->w_err_gc->has_write_err) {
+		pblk_rl_werr_line_out(&pblk->rl);
+		line->w_err_gc->has_write_err = 0;
+	}
+
+	spin_unlock(&line->lock);
 	atomic_dec(&gc->pipeline_gc);
 
 	spin_lock(&l_mg->free_lock);
@@ -1593,7 +1638,7 @@
 	struct pblk_line *line = line_put_ws->line;
 
 	__pblk_line_put(pblk, line);
-	mempool_free(line_put_ws, pblk->gen_ws_pool);
+	mempool_free(line_put_ws, &pblk->gen_ws_pool);
 }
 
 void pblk_line_put(struct kref *ref)
@@ -1610,7 +1655,7 @@
 	struct pblk *pblk = line->pblk;
 	struct pblk_line_ws *line_put_ws;
 
-	line_put_ws = mempool_alloc(pblk->gen_ws_pool, GFP_ATOMIC);
+	line_put_ws = mempool_alloc(&pblk->gen_ws_pool, GFP_ATOMIC);
 	if (!line_put_ws)
 		return;
 
@@ -1737,11 +1782,34 @@
 
 	spin_lock(&l_mg->close_lock);
 	spin_lock(&line->lock);
+
+	/* Update the in-memory start address for emeta, in case it has
+	 * shifted due to write errors
+	 */
+	if (line->emeta_ssec != line->cur_sec)
+		line->emeta_ssec = line->cur_sec;
+
 	list_add_tail(&line->list, &l_mg->emeta_list);
 	spin_unlock(&line->lock);
 	spin_unlock(&l_mg->close_lock);
 
 	pblk_line_should_sync_meta(pblk);
+
+
+}
+
+static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
+{
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	unsigned int lba_list_size = lm->emeta_len[2];
+	struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
+	struct pblk_emeta *emeta = line->emeta;
+
+	w_err_gc->lba_list = pblk_malloc(lba_list_size,
+					 l_mg->emeta_alloc_type, GFP_KERNEL);
+	memcpy(w_err_gc->lba_list, emeta_to_lbas(pblk, emeta->buf),
+				lba_list_size);
 }
 
 void pblk_line_close_ws(struct work_struct *work)
@@ -1750,9 +1818,16 @@
 									ws);
 	struct pblk *pblk = line_ws->pblk;
 	struct pblk_line *line = line_ws->line;
+	struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
+
+	/* Write errors makes the emeta start address stored in smeta invalid,
+	 * so keep a copy of the lba list until we've gc'd the line
+	 */
+	if (w_err_gc->has_write_err)
+		pblk_save_lba_list(pblk, line);
 
 	pblk_line_close(pblk, line);
-	mempool_free(line_ws, pblk->gen_ws_pool);
+	mempool_free(line_ws, &pblk->gen_ws_pool);
 }
 
 void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
@@ -1761,7 +1836,7 @@
 {
 	struct pblk_line_ws *line_ws;
 
-	line_ws = mempool_alloc(pblk->gen_ws_pool, gfp_mask);
+	line_ws = mempool_alloc(&pblk->gen_ws_pool, gfp_mask);
 
 	line_ws->pblk = pblk;
 	line_ws->line = line;
diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c
index 6851a5c..df88f1b 100644
--- a/drivers/lightnvm/pblk-gc.c
+++ b/drivers/lightnvm/pblk-gc.c
@@ -129,6 +129,53 @@
 	kfree(gc_rq_ws);
 }
 
+static __le64 *get_lba_list_from_emeta(struct pblk *pblk,
+				       struct pblk_line *line)
+{
+	struct line_emeta *emeta_buf;
+	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
+	struct pblk_line_meta *lm = &pblk->lm;
+	unsigned int lba_list_size = lm->emeta_len[2];
+	__le64 *lba_list;
+	int ret;
+
+	emeta_buf = pblk_malloc(lm->emeta_len[0],
+				l_mg->emeta_alloc_type, GFP_KERNEL);
+	if (!emeta_buf)
+		return NULL;
+
+	ret = pblk_line_read_emeta(pblk, line, emeta_buf);
+	if (ret) {
+		pr_err("pblk: line %d read emeta failed (%d)\n",
+				line->id, ret);
+		pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+		return NULL;
+	}
+
+	/* If this read fails, it means that emeta is corrupted.
+	 * For now, leave the line untouched.
+	 * TODO: Implement a recovery routine that scans and moves
+	 * all sectors on the line.
+	 */
+
+	ret = pblk_recov_check_emeta(pblk, emeta_buf);
+	if (ret) {
+		pr_err("pblk: inconsistent emeta (line %d)\n",
+				line->id);
+		pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+		return NULL;
+	}
+
+	lba_list = pblk_malloc(lba_list_size,
+			       l_mg->emeta_alloc_type, GFP_KERNEL);
+	if (lba_list)
+		memcpy(lba_list, emeta_to_lbas(pblk, emeta_buf), lba_list_size);
+
+	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+
+	return lba_list;
+}
+
 static void pblk_gc_line_prepare_ws(struct work_struct *work)
 {
 	struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
@@ -138,46 +185,26 @@
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_gc *gc = &pblk->gc;
-	struct line_emeta *emeta_buf;
 	struct pblk_line_ws *gc_rq_ws;
 	struct pblk_gc_rq *gc_rq;
 	__le64 *lba_list;
 	unsigned long *invalid_bitmap;
 	int sec_left, nr_secs, bit;
-	int ret;
 
 	invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
 	if (!invalid_bitmap)
 		goto fail_free_ws;
 
-	emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
-								GFP_KERNEL);
-	if (!emeta_buf) {
-		pr_err("pblk: cannot use GC emeta\n");
-		goto fail_free_bitmap;
-	}
-
-	ret = pblk_line_read_emeta(pblk, line, emeta_buf);
-	if (ret) {
-		pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
-		goto fail_free_emeta;
-	}
-
-	/* If this read fails, it means that emeta is corrupted. For now, leave
-	 * the line untouched. TODO: Implement a recovery routine that scans and
-	 * moves all sectors on the line.
-	 */
-
-	ret = pblk_recov_check_emeta(pblk, emeta_buf);
-	if (ret) {
-		pr_err("pblk: inconsistent emeta (line %d)\n", line->id);
-		goto fail_free_emeta;
-	}
-
-	lba_list = emeta_to_lbas(pblk, emeta_buf);
-	if (!lba_list) {
-		pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
-		goto fail_free_emeta;
+	if (line->w_err_gc->has_write_err) {
+		lba_list = line->w_err_gc->lba_list;
+		line->w_err_gc->lba_list = NULL;
+	} else {
+		lba_list = get_lba_list_from_emeta(pblk, line);
+		if (!lba_list) {
+			pr_err("pblk: could not interpret emeta (line %d)\n",
+					line->id);
+			goto fail_free_ws;
+		}
 	}
 
 	spin_lock(&line->lock);
@@ -187,14 +214,14 @@
 
 	if (sec_left < 0) {
 		pr_err("pblk: corrupted GC line (%d)\n", line->id);
-		goto fail_free_emeta;
+		goto fail_free_lba_list;
 	}
 
 	bit = -1;
 next_rq:
 	gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
 	if (!gc_rq)
-		goto fail_free_emeta;
+		goto fail_free_lba_list;
 
 	nr_secs = 0;
 	do {
@@ -240,7 +267,7 @@
 		goto next_rq;
 
 out:
-	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
+	pblk_mfree(lba_list, l_mg->emeta_alloc_type);
 	kfree(line_ws);
 	kfree(invalid_bitmap);
 
@@ -251,9 +278,8 @@
 
 fail_free_gc_rq:
 	kfree(gc_rq);
-fail_free_emeta:
-	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
-fail_free_bitmap:
+fail_free_lba_list:
+	pblk_mfree(lba_list, l_mg->emeta_alloc_type);
 	kfree(invalid_bitmap);
 fail_free_ws:
 	kfree(line_ws);
@@ -349,12 +375,14 @@
 static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
 {
 	unsigned int nr_blocks_free, nr_blocks_need;
+	unsigned int werr_lines = atomic_read(&rl->werr_lines);
 
 	nr_blocks_need = pblk_rl_high_thrs(rl);
 	nr_blocks_free = pblk_rl_nr_free_blks(rl);
 
 	/* This is not critical, no need to take lock here */
-	return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
+	return ((werr_lines > 0) ||
+		((gc->gc_active) && (nr_blocks_need > nr_blocks_free)));
 }
 
 void pblk_gc_free_full_lines(struct pblk *pblk)
@@ -649,7 +677,7 @@
 	return ret;
 }
 
-void pblk_gc_exit(struct pblk *pblk)
+void pblk_gc_exit(struct pblk *pblk, bool graceful)
 {
 	struct pblk_gc *gc = &pblk->gc;
 
@@ -663,10 +691,12 @@
 	if (gc->gc_reader_ts)
 		kthread_stop(gc->gc_reader_ts);
 
-	flush_workqueue(gc->gc_reader_wq);
-	destroy_workqueue(gc->gc_reader_wq);
+	if (graceful) {
+		flush_workqueue(gc->gc_reader_wq);
+		flush_workqueue(gc->gc_line_reader_wq);
+	}
 
-	flush_workqueue(gc->gc_line_reader_wq);
+	destroy_workqueue(gc->gc_reader_wq);
 	destroy_workqueue(gc->gc_line_reader_wq);
 
 	if (gc->gc_writer_ts)
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 91a5bc2..ce561f5 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -20,10 +20,15 @@
 
 #include "pblk.h"
 
+unsigned int write_buffer_size;
+
+module_param(write_buffer_size, uint, 0644);
+MODULE_PARM_DESC(write_buffer_size, "number of entries in a write buffer");
+
 static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
 				*pblk_w_rq_cache;
 static DECLARE_RWSEM(pblk_lock);
-struct bio_set *pblk_bio_set;
+struct bio_set pblk_bio_set;
 
 static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
 			  struct bio *bio)
@@ -127,10 +132,8 @@
 	if (!line) {
 		/* Configure next line for user data */
 		line = pblk_line_get_first_data(pblk);
-		if (!line) {
-			pr_err("pblk: line list corrupted\n");
+		if (!line)
 			return -EFAULT;
-		}
 	}
 
 	return 0;
@@ -141,6 +144,7 @@
 	sector_t i;
 	struct ppa_addr ppa;
 	size_t map_size;
+	int ret = 0;
 
 	map_size = pblk_trans_map_size(pblk);
 	pblk->trans_map = vmalloc(map_size);
@@ -152,7 +156,11 @@
 	for (i = 0; i < pblk->rl.nr_secs; i++)
 		pblk_trans_map_set(pblk, i, ppa);
 
-	return pblk_l2p_recover(pblk, factory_init);
+	ret = pblk_l2p_recover(pblk, factory_init);
+	if (ret)
+		vfree(pblk->trans_map);
+
+	return ret;
 }
 
 static void pblk_rwb_free(struct pblk *pblk)
@@ -169,10 +177,15 @@
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
 	struct pblk_rb_entry *entries;
-	unsigned long nr_entries;
+	unsigned long nr_entries, buffer_size;
 	unsigned int power_size, power_seg_sz;
 
-	nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer);
+	if (write_buffer_size && (write_buffer_size > pblk->pgs_in_buffer))
+		buffer_size = write_buffer_size;
+	else
+		buffer_size = pblk->pgs_in_buffer;
+
+	nr_entries = pblk_rb_calculate_size(buffer_size);
 
 	entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry));
 	if (!entries)
@@ -341,7 +354,7 @@
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
-	int max_write_ppas;
+	int ret, max_write_ppas;
 
 	atomic64_set(&pblk->user_wa, 0);
 	atomic64_set(&pblk->pad_wa, 0);
@@ -375,33 +388,33 @@
 		goto fail_free_pad_dist;
 
 	/* Internal bios can be at most the sectors signaled by the device. */
-	pblk->page_bio_pool = mempool_create_page_pool(NVM_MAX_VLBA, 0);
-	if (!pblk->page_bio_pool)
+	ret = mempool_init_page_pool(&pblk->page_bio_pool, NVM_MAX_VLBA, 0);
+	if (ret)
 		goto free_global_caches;
 
-	pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE,
-							pblk_ws_cache);
-	if (!pblk->gen_ws_pool)
+	ret = mempool_init_slab_pool(&pblk->gen_ws_pool, PBLK_GEN_WS_POOL_SIZE,
+				     pblk_ws_cache);
+	if (ret)
 		goto free_page_bio_pool;
 
-	pblk->rec_pool = mempool_create_slab_pool(geo->all_luns,
-							pblk_rec_cache);
-	if (!pblk->rec_pool)
+	ret = mempool_init_slab_pool(&pblk->rec_pool, geo->all_luns,
+				     pblk_rec_cache);
+	if (ret)
 		goto free_gen_ws_pool;
 
-	pblk->r_rq_pool = mempool_create_slab_pool(geo->all_luns,
-							pblk_g_rq_cache);
-	if (!pblk->r_rq_pool)
+	ret = mempool_init_slab_pool(&pblk->r_rq_pool, geo->all_luns,
+				     pblk_g_rq_cache);
+	if (ret)
 		goto free_rec_pool;
 
-	pblk->e_rq_pool = mempool_create_slab_pool(geo->all_luns,
-							pblk_g_rq_cache);
-	if (!pblk->e_rq_pool)
+	ret = mempool_init_slab_pool(&pblk->e_rq_pool, geo->all_luns,
+				     pblk_g_rq_cache);
+	if (ret)
 		goto free_r_rq_pool;
 
-	pblk->w_rq_pool = mempool_create_slab_pool(geo->all_luns,
-							pblk_w_rq_cache);
-	if (!pblk->w_rq_pool)
+	ret = mempool_init_slab_pool(&pblk->w_rq_pool, geo->all_luns,
+				     pblk_w_rq_cache);
+	if (ret)
 		goto free_e_rq_pool;
 
 	pblk->close_wq = alloc_workqueue("pblk-close-wq",
@@ -423,6 +436,7 @@
 		goto free_r_end_wq;
 
 	INIT_LIST_HEAD(&pblk->compl_list);
+	INIT_LIST_HEAD(&pblk->resubmit_list);
 
 	return 0;
 
@@ -433,17 +447,17 @@
 free_close_wq:
 	destroy_workqueue(pblk->close_wq);
 free_w_rq_pool:
-	mempool_destroy(pblk->w_rq_pool);
+	mempool_exit(&pblk->w_rq_pool);
 free_e_rq_pool:
-	mempool_destroy(pblk->e_rq_pool);
+	mempool_exit(&pblk->e_rq_pool);
 free_r_rq_pool:
-	mempool_destroy(pblk->r_rq_pool);
+	mempool_exit(&pblk->r_rq_pool);
 free_rec_pool:
-	mempool_destroy(pblk->rec_pool);
+	mempool_exit(&pblk->rec_pool);
 free_gen_ws_pool:
-	mempool_destroy(pblk->gen_ws_pool);
+	mempool_exit(&pblk->gen_ws_pool);
 free_page_bio_pool:
-	mempool_destroy(pblk->page_bio_pool);
+	mempool_exit(&pblk->page_bio_pool);
 free_global_caches:
 	pblk_free_global_caches(pblk);
 fail_free_pad_dist:
@@ -462,12 +476,12 @@
 	if (pblk->bb_wq)
 		destroy_workqueue(pblk->bb_wq);
 
-	mempool_destroy(pblk->page_bio_pool);
-	mempool_destroy(pblk->gen_ws_pool);
-	mempool_destroy(pblk->rec_pool);
-	mempool_destroy(pblk->r_rq_pool);
-	mempool_destroy(pblk->e_rq_pool);
-	mempool_destroy(pblk->w_rq_pool);
+	mempool_exit(&pblk->page_bio_pool);
+	mempool_exit(&pblk->gen_ws_pool);
+	mempool_exit(&pblk->rec_pool);
+	mempool_exit(&pblk->r_rq_pool);
+	mempool_exit(&pblk->e_rq_pool);
+	mempool_exit(&pblk->w_rq_pool);
 
 	pblk_free_global_caches(pblk);
 	kfree(pblk->pad_dist);
@@ -489,11 +503,17 @@
 	}
 }
 
-static void pblk_line_meta_free(struct pblk_line *line)
+static void pblk_line_meta_free(struct pblk_line_mgmt *l_mg,
+				struct pblk_line *line)
 {
+	struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
+
 	kfree(line->blk_bitmap);
 	kfree(line->erase_bitmap);
 	kfree(line->chks);
+
+	pblk_mfree(w_err_gc->lba_list, l_mg->emeta_alloc_type);
+	kfree(w_err_gc);
 }
 
 static void pblk_lines_free(struct pblk *pblk)
@@ -506,8 +526,8 @@
 	for (i = 0; i < l_mg->nr_lines; i++) {
 		line = &pblk->lines[i];
 
-		pblk_line_free(pblk, line);
-		pblk_line_meta_free(line);
+		pblk_line_free(line);
+		pblk_line_meta_free(l_mg, line);
 	}
 	spin_unlock(&l_mg->free_lock);
 
@@ -748,14 +768,14 @@
 		chunk->cnlb = chunk_meta->cnlb;
 		chunk->wp = chunk_meta->wp;
 
-		if (!(chunk->state & NVM_CHK_ST_OFFLINE))
-			continue;
-
 		if (chunk->type & NVM_CHK_TP_SZ_SPEC) {
 			WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n");
 			continue;
 		}
 
+		if (!(chunk->state & NVM_CHK_ST_OFFLINE))
+			continue;
+
 		set_bit(pos, line->blk_bitmap);
 		nr_bad_chks++;
 	}
@@ -809,20 +829,28 @@
 		return -ENOMEM;
 
 	line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
-	if (!line->erase_bitmap) {
-		kfree(line->blk_bitmap);
-		return -ENOMEM;
-	}
+	if (!line->erase_bitmap)
+		goto free_blk_bitmap;
+
 
 	line->chks = kmalloc(lm->blk_per_line * sizeof(struct nvm_chk_meta),
 								GFP_KERNEL);
-	if (!line->chks) {
-		kfree(line->erase_bitmap);
-		kfree(line->blk_bitmap);
-		return -ENOMEM;
-	}
+	if (!line->chks)
+		goto free_erase_bitmap;
+
+	line->w_err_gc = kzalloc(sizeof(struct pblk_w_err_gc), GFP_KERNEL);
+	if (!line->w_err_gc)
+		goto free_chks;
 
 	return 0;
+
+free_chks:
+	kfree(line->chks);
+free_erase_bitmap:
+	kfree(line->erase_bitmap);
+free_blk_bitmap:
+	kfree(line->blk_bitmap);
+	return -ENOMEM;
 }
 
 static int pblk_line_mg_init(struct pblk *pblk)
@@ -847,12 +875,14 @@
 	INIT_LIST_HEAD(&l_mg->gc_mid_list);
 	INIT_LIST_HEAD(&l_mg->gc_low_list);
 	INIT_LIST_HEAD(&l_mg->gc_empty_list);
+	INIT_LIST_HEAD(&l_mg->gc_werr_list);
 
 	INIT_LIST_HEAD(&l_mg->emeta_list);
 
-	l_mg->gc_lists[0] = &l_mg->gc_high_list;
-	l_mg->gc_lists[1] = &l_mg->gc_mid_list;
-	l_mg->gc_lists[2] = &l_mg->gc_low_list;
+	l_mg->gc_lists[0] = &l_mg->gc_werr_list;
+	l_mg->gc_lists[1] = &l_mg->gc_high_list;
+	l_mg->gc_lists[2] = &l_mg->gc_mid_list;
+	l_mg->gc_lists[3] = &l_mg->gc_low_list;
 
 	spin_lock_init(&l_mg->free_lock);
 	spin_lock_init(&l_mg->close_lock);
@@ -1047,6 +1077,11 @@
 		nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i);
 	}
 
+	if (!nr_free_chks) {
+		pr_err("pblk: too many bad blocks prevent for sane instance\n");
+		return -EINTR;
+	}
+
 	pblk_set_provision(pblk, nr_free_chks);
 
 	kfree(chunk_meta);
@@ -1054,7 +1089,7 @@
 
 fail_free_lines:
 	while (--i >= 0)
-		pblk_line_meta_free(&pblk->lines[i]);
+		pblk_line_meta_free(l_mg, &pblk->lines[i]);
 	kfree(pblk->lines);
 fail_free_chunk_meta:
 	kfree(chunk_meta);
@@ -1110,23 +1145,25 @@
 	kfree(pblk);
 }
 
-static void pblk_tear_down(struct pblk *pblk)
+static void pblk_tear_down(struct pblk *pblk, bool graceful)
 {
-	pblk_pipeline_stop(pblk);
+	if (graceful)
+		__pblk_pipeline_flush(pblk);
+	__pblk_pipeline_stop(pblk);
 	pblk_writer_stop(pblk);
 	pblk_rb_sync_l2p(&pblk->rwb);
 	pblk_rl_free(&pblk->rl);
 
-	pr_debug("pblk: consistent tear down\n");
+	pr_debug("pblk: consistent tear down (graceful:%d)\n", graceful);
 }
 
-static void pblk_exit(void *private)
+static void pblk_exit(void *private, bool graceful)
 {
 	struct pblk *pblk = private;
 
 	down_write(&pblk_lock);
-	pblk_gc_exit(pblk);
-	pblk_tear_down(pblk);
+	pblk_gc_exit(pblk, graceful);
+	pblk_tear_down(pblk, graceful);
 
 #ifdef CONFIG_NVM_DEBUG
 	pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk));
@@ -1175,6 +1212,7 @@
 	pblk->state = PBLK_STATE_RUNNING;
 	pblk->gc.gc_enabled = 0;
 
+	spin_lock_init(&pblk->resubmit_lock);
 	spin_lock_init(&pblk->trans_lock);
 	spin_lock_init(&pblk->lock);
 
@@ -1297,18 +1335,18 @@
 {
 	int ret;
 
-	pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
-	if (!pblk_bio_set)
-		return -ENOMEM;
+	ret = bioset_init(&pblk_bio_set, BIO_POOL_SIZE, 0, 0);
+	if (ret)
+		return ret;
 	ret = nvm_register_tgt_type(&tt_pblk);
 	if (ret)
-		bioset_free(pblk_bio_set);
+		bioset_exit(&pblk_bio_set);
 	return ret;
 }
 
 static void pblk_module_exit(void)
 {
-	bioset_free(pblk_bio_set);
+	bioset_exit(&pblk_bio_set);
 	nvm_unregister_tgt_type(&tt_pblk);
 }
 
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 20dbaa8..953ca31 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -18,11 +18,11 @@
 
 #include "pblk.h"
 
-static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
-			       struct ppa_addr *ppa_list,
-			       unsigned long *lun_bitmap,
-			       struct pblk_sec_meta *meta_list,
-			       unsigned int valid_secs)
+static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
+			      struct ppa_addr *ppa_list,
+			      unsigned long *lun_bitmap,
+			      struct pblk_sec_meta *meta_list,
+			      unsigned int valid_secs)
 {
 	struct pblk_line *line = pblk_line_get_data(pblk);
 	struct pblk_emeta *emeta;
@@ -35,8 +35,14 @@
 	if (pblk_line_is_full(line)) {
 		struct pblk_line *prev_line = line;
 
+		/* If we cannot allocate a new line, make sure to store metadata
+		 * on current line and then fail
+		 */
 		line = pblk_line_replace_data(pblk);
 		pblk_line_close_meta(pblk, prev_line);
+
+		if (!line)
+			return -EINTR;
 	}
 
 	emeta = line->emeta;
@@ -74,6 +80,7 @@
 	}
 
 	pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap);
+	return 0;
 }
 
 void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
@@ -87,8 +94,12 @@
 
 	for (i = off; i < rqd->nr_ppas; i += min) {
 		map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
-		pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
-					lun_bitmap, &meta_list[i], map_secs);
+		if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
+					lun_bitmap, &meta_list[i], map_secs)) {
+			bio_put(rqd->bio);
+			pblk_free_rqd(pblk, rqd, PBLK_WRITE);
+			pblk_pipeline_stop(pblk);
+		}
 	}
 }
 
@@ -108,8 +119,12 @@
 
 	for (i = 0; i < rqd->nr_ppas; i += min) {
 		map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
-		pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
-					lun_bitmap, &meta_list[i], map_secs);
+		if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
+					lun_bitmap, &meta_list[i], map_secs)) {
+			bio_put(rqd->bio);
+			pblk_free_rqd(pblk, rqd, PBLK_WRITE);
+			pblk_pipeline_stop(pblk);
+		}
 
 		erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
 
diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c
index 52fdd85..00cd1f2 100644
--- a/drivers/lightnvm/pblk-rb.c
+++ b/drivers/lightnvm/pblk-rb.c
@@ -142,10 +142,9 @@
 {
 	int flags;
 
-try:
 	flags = READ_ONCE(w_ctx->flags);
-	if (!(flags & PBLK_SUBMITTED_ENTRY))
-		goto try;
+	WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
+			"pblk: overwriting unsubmitted data\n");
 
 	/* Release flags on context. Protect from writes and reads */
 	smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
@@ -350,7 +349,7 @@
 }
 
 static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
-				  unsigned int pos)
+				   unsigned int pos)
 {
 	struct pblk_rb_entry *entry;
 	unsigned int sync, flush_point;
@@ -420,7 +419,7 @@
 	if (pblk_rb_flush_point_set(rb, NULL, mem))
 		return;
 
-	pblk_write_should_kick(pblk);
+	pblk_write_kick(pblk);
 }
 
 static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
@@ -504,45 +503,6 @@
 }
 
 /*
- * The caller of this function must ensure that the backpointer will not
- * overwrite the entries passed on the list.
- */
-unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
-				      struct list_head *list,
-				      unsigned int max)
-{
-	struct pblk_rb_entry *entry, *tentry;
-	struct page *page;
-	unsigned int read = 0;
-	int ret;
-
-	list_for_each_entry_safe(entry, tentry, list, index) {
-		if (read > max) {
-			pr_err("pblk: too many entries on list\n");
-			goto out;
-		}
-
-		page = virt_to_page(entry->data);
-		if (!page) {
-			pr_err("pblk: could not allocate write bio page\n");
-			goto out;
-		}
-
-		ret = bio_add_page(bio, page, rb->seg_size, 0);
-		if (ret != rb->seg_size) {
-			pr_err("pblk: could not add page to write bio\n");
-			goto out;
-		}
-
-		list_del(&entry->index);
-		read++;
-	}
-
-out:
-	return read;
-}
-
-/*
  * Read available entries on rb and add them to the given bio. To avoid a memory
  * copy, a page reference to the write buffer is used to be added to the bio.
  *
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 9eee10f..1869469 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -39,10 +39,10 @@
 }
 
 static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
-				 sector_t blba, unsigned long *read_bitmap)
+				 struct bio *bio, sector_t blba,
+				 unsigned long *read_bitmap)
 {
 	struct pblk_sec_meta *meta_list = rqd->meta_list;
-	struct bio *bio = rqd->bio;
 	struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS];
 	int nr_secs = rqd->nr_ppas;
 	bool advanced_bio = false;
@@ -102,34 +102,71 @@
 #endif
 }
 
-static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd)
+
+static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
+				sector_t blba)
 {
-	int err;
-
-	err = pblk_submit_io(pblk, rqd);
-	if (err)
-		return NVM_IO_ERR;
-
-	return NVM_IO_OK;
-}
-
-static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd,
-			   sector_t blba)
-{
-	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
 	int nr_lbas = rqd->nr_ppas;
 	int i;
 
 	for (i = 0; i < nr_lbas; i++) {
-		u64 lba = le64_to_cpu(meta_list[i].lba);
+		u64 lba = le64_to_cpu(meta_lba_list[i].lba);
 
 		if (lba == ADDR_EMPTY)
 			continue;
 
-		WARN(lba != blba + i, "pblk: corrupted read LBA\n");
+		if (lba != blba + i) {
+#ifdef CONFIG_NVM_DEBUG
+			struct ppa_addr *p;
+
+			p = (nr_lbas == 1) ? &rqd->ppa_list[i] : &rqd->ppa_addr;
+			print_ppa(&pblk->dev->geo, p, "seq", i);
+#endif
+			pr_err("pblk: corrupted read LBA (%llu/%llu)\n",
+							lba, (u64)blba + i);
+			WARN_ON(1);
+		}
 	}
 }
 
+/*
+ * There can be holes in the lba list.
+ */
+static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
+				 u64 *lba_list, int nr_lbas)
+{
+	struct pblk_sec_meta *meta_lba_list = rqd->meta_list;
+	int i, j;
+
+	for (i = 0, j = 0; i < nr_lbas; i++) {
+		u64 lba = lba_list[i];
+		u64 meta_lba;
+
+		if (lba == ADDR_EMPTY)
+			continue;
+
+		meta_lba = le64_to_cpu(meta_lba_list[j].lba);
+
+		if (lba != meta_lba) {
+#ifdef CONFIG_NVM_DEBUG
+			struct ppa_addr *p;
+			int nr_ppas = rqd->nr_ppas;
+
+			p = (nr_ppas == 1) ? &rqd->ppa_list[j] : &rqd->ppa_addr;
+			print_ppa(&pblk->dev->geo, p, "seq", j);
+#endif
+			pr_err("pblk: corrupted read LBA (%llu/%llu)\n",
+								lba, meta_lba);
+			WARN_ON(1);
+		}
+
+		j++;
+	}
+
+	WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
+}
+
 static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd)
 {
 	struct ppa_addr *ppa_list;
@@ -152,7 +189,6 @@
 	WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n");
 #endif
 	bio_endio(bio);
-	bio_put(bio);
 }
 
 static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
@@ -160,23 +196,18 @@
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
-	struct bio *bio = rqd->bio;
+	struct bio *int_bio = rqd->bio;
 	unsigned long start_time = r_ctx->start_time;
 
 	generic_end_io_acct(dev->q, READ, &pblk->disk->part0, start_time);
 
 	if (rqd->error)
 		pblk_log_read_err(pblk, rqd);
-#ifdef CONFIG_NVM_DEBUG
-	else
-		WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n");
-#endif
 
-	pblk_read_check(pblk, rqd, r_ctx->lba);
+	pblk_read_check_seq(pblk, rqd, r_ctx->lba);
 
-	bio_put(bio);
-	if (r_ctx->private)
-		pblk_end_user_read((struct bio *)r_ctx->private);
+	if (int_bio)
+		bio_put(int_bio);
 
 	if (put_line)
 		pblk_read_put_rqd_kref(pblk, rqd);
@@ -193,16 +224,19 @@
 static void pblk_end_io_read(struct nvm_rq *rqd)
 {
 	struct pblk *pblk = rqd->private;
+	struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
+	struct bio *bio = (struct bio *)r_ctx->private;
 
+	pblk_end_user_read(bio);
 	__pblk_end_io_read(pblk, rqd, true);
 }
 
-static int pblk_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd,
-				 unsigned int bio_init_idx,
-				 unsigned long *read_bitmap)
+static int pblk_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
+			     struct bio *orig_bio, unsigned int bio_init_idx,
+			     unsigned long *read_bitmap)
 {
-	struct bio *new_bio, *bio = rqd->bio;
 	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	struct bio *new_bio;
 	struct bio_vec src_bv, dst_bv;
 	void *ppa_ptr = NULL;
 	void *src_p, *dst_p;
@@ -219,11 +253,11 @@
 	new_bio = bio_alloc(GFP_KERNEL, nr_holes);
 
 	if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes))
-		goto err;
+		goto fail_add_pages;
 
 	if (nr_holes != new_bio->bi_vcnt) {
 		pr_err("pblk: malformed bio\n");
-		goto err;
+		goto fail;
 	}
 
 	for (i = 0; i < nr_secs; i++)
@@ -246,7 +280,7 @@
 	if (ret) {
 		bio_put(rqd->bio);
 		pr_err("pblk: sync read IO submission failed\n");
-		goto err;
+		goto fail;
 	}
 
 	if (rqd->error) {
@@ -282,7 +316,7 @@
 		meta_list[hole].lba = lba_list_media[i];
 
 		src_bv = new_bio->bi_io_vec[i++];
-		dst_bv = bio->bi_io_vec[bio_init_idx + hole];
+		dst_bv = orig_bio->bi_io_vec[bio_init_idx + hole];
 
 		src_p = kmap_atomic(src_bv.bv_page);
 		dst_p = kmap_atomic(dst_bv.bv_page);
@@ -294,35 +328,33 @@
 		kunmap_atomic(src_p);
 		kunmap_atomic(dst_p);
 
-		mempool_free(src_bv.bv_page, pblk->page_bio_pool);
+		mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
 
 		hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1);
 	} while (hole < nr_secs);
 
 	bio_put(new_bio);
 
-	/* Complete the original bio and associated request */
-	bio_endio(bio);
-	rqd->bio = bio;
+	/* restore original request */
+	rqd->bio = NULL;
 	rqd->nr_ppas = nr_secs;
 
 	__pblk_end_io_read(pblk, rqd, false);
-	return NVM_IO_OK;
+	return NVM_IO_DONE;
 
-err:
-	pr_err("pblk: failed to perform partial read\n");
-
+fail:
 	/* Free allocated pages in new bio */
-	pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt);
+	pblk_bio_free_pages(pblk, new_bio, 0, new_bio->bi_vcnt);
+fail_add_pages:
+	pr_err("pblk: failed to perform partial read\n");
 	__pblk_end_io_read(pblk, rqd, false);
 	return NVM_IO_ERR;
 }
 
-static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd,
+static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
 			 sector_t lba, unsigned long *read_bitmap)
 {
 	struct pblk_sec_meta *meta_list = rqd->meta_list;
-	struct bio *bio = rqd->bio;
 	struct ppa_addr ppa;
 
 	pblk_lookup_l2p_seq(pblk, &ppa, lba, 1);
@@ -386,14 +418,15 @@
 	rqd = pblk_alloc_rqd(pblk, PBLK_READ);
 
 	rqd->opcode = NVM_OP_PREAD;
-	rqd->bio = bio;
 	rqd->nr_ppas = nr_secs;
+	rqd->bio = NULL; /* cloned bio if needed */
 	rqd->private = pblk;
 	rqd->end_io = pblk_end_io_read;
 
 	r_ctx = nvm_rq_to_pdu(rqd);
 	r_ctx->start_time = jiffies;
 	r_ctx->lba = blba;
+	r_ctx->private = bio; /* original bio */
 
 	/* Save the index for this bio's start. This is needed in case
 	 * we need to fill a partial read.
@@ -411,17 +444,15 @@
 		rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size;
 		rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size;
 
-		pblk_read_ppalist_rq(pblk, rqd, blba, &read_bitmap);
+		pblk_read_ppalist_rq(pblk, rqd, bio, blba, &read_bitmap);
 	} else {
-		pblk_read_rq(pblk, rqd, blba, &read_bitmap);
+		pblk_read_rq(pblk, rqd, bio, blba, &read_bitmap);
 	}
 
-	bio_get(bio);
 	if (bitmap_full(&read_bitmap, nr_secs)) {
-		bio_endio(bio);
 		atomic_inc(&pblk->inflight_io);
 		__pblk_end_io_read(pblk, rqd, false);
-		return NVM_IO_OK;
+		return NVM_IO_DONE;
 	}
 
 	/* All sectors are to be read from the device */
@@ -429,20 +460,17 @@
 		struct bio *int_bio = NULL;
 
 		/* Clone read bio to deal with read errors internally */
-		int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set);
+		int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
 		if (!int_bio) {
 			pr_err("pblk: could not clone read bio\n");
 			goto fail_end_io;
 		}
 
 		rqd->bio = int_bio;
-		r_ctx->private = bio;
 
-		ret = pblk_submit_read_io(pblk, rqd);
-		if (ret) {
+		if (pblk_submit_io(pblk, rqd)) {
 			pr_err("pblk: read IO submission failed\n");
-			if (int_bio)
-				bio_put(int_bio);
+			ret = NVM_IO_ERR;
 			goto fail_end_io;
 		}
 
@@ -452,7 +480,7 @@
 	/* The read bio request could be partially filled by the write buffer,
 	 * but there are some holes that need to be read from the drive.
 	 */
-	return pblk_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap);
+	return pblk_partial_read(pblk, rqd, bio, bio_init_idx, &read_bitmap);
 
 fail_rqd_free:
 	pblk_free_rqd(pblk, rqd, PBLK_READ);
@@ -585,6 +613,8 @@
 		goto err_free_bio;
 	}
 
+	pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs);
+
 	atomic_dec(&pblk->inflight_io);
 
 	if (rqd.error) {
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 3e079c2..5983428 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -16,97 +16,6 @@
 
 #include "pblk.h"
 
-void pblk_submit_rec(struct work_struct *work)
-{
-	struct pblk_rec_ctx *recovery =
-			container_of(work, struct pblk_rec_ctx, ws_rec);
-	struct pblk *pblk = recovery->pblk;
-	struct nvm_rq *rqd = recovery->rqd;
-	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
-	struct bio *bio;
-	unsigned int nr_rec_secs;
-	unsigned int pgs_read;
-	int ret;
-
-	nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status,
-								NVM_MAX_VLBA);
-
-	bio = bio_alloc(GFP_KERNEL, nr_rec_secs);
-
-	bio->bi_iter.bi_sector = 0;
-	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-	rqd->bio = bio;
-	rqd->nr_ppas = nr_rec_secs;
-
-	pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed,
-								nr_rec_secs);
-	if (pgs_read != nr_rec_secs) {
-		pr_err("pblk: could not read recovery entries\n");
-		goto err;
-	}
-
-	if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) {
-		pr_err("pblk: could not setup recovery request\n");
-		goto err;
-	}
-
-#ifdef CONFIG_NVM_DEBUG
-	atomic_long_add(nr_rec_secs, &pblk->recov_writes);
-#endif
-
-	ret = pblk_submit_io(pblk, rqd);
-	if (ret) {
-		pr_err("pblk: I/O submission failed: %d\n", ret);
-		goto err;
-	}
-
-	mempool_free(recovery, pblk->rec_pool);
-	return;
-
-err:
-	bio_put(bio);
-	pblk_free_rqd(pblk, rqd, PBLK_WRITE);
-}
-
-int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
-			struct pblk_rec_ctx *recovery, u64 *comp_bits,
-			unsigned int comp)
-{
-	struct nvm_rq *rec_rqd;
-	struct pblk_c_ctx *rec_ctx;
-	int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded;
-
-	rec_rqd = pblk_alloc_rqd(pblk, PBLK_WRITE);
-	rec_ctx = nvm_rq_to_pdu(rec_rqd);
-
-	/* Copy completion bitmap, but exclude the first X completed entries */
-	bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status,
-				(unsigned long int *)comp_bits,
-				comp, NVM_MAX_VLBA);
-
-	/* Save the context for the entries that need to be re-written and
-	 * update current context with the completed entries.
-	 */
-	rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp);
-	if (comp >= c_ctx->nr_valid) {
-		rec_ctx->nr_valid = 0;
-		rec_ctx->nr_padded = nr_entries - comp;
-
-		c_ctx->nr_padded = comp - c_ctx->nr_valid;
-	} else {
-		rec_ctx->nr_valid = c_ctx->nr_valid - comp;
-		rec_ctx->nr_padded = c_ctx->nr_padded;
-
-		c_ctx->nr_valid = comp;
-		c_ctx->nr_padded = 0;
-	}
-
-	recovery->rqd = rec_rqd;
-	recovery->pblk = pblk;
-
-	return 0;
-}
-
 int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta_buf)
 {
 	u32 crc;
@@ -865,18 +774,30 @@
 }
 
 static int pblk_line_was_written(struct pblk_line *line,
-			    struct pblk_line_meta *lm)
+			    struct pblk *pblk)
 {
 
-	int i;
-	int state_mask = NVM_CHK_ST_OFFLINE | NVM_CHK_ST_FREE;
+	struct pblk_line_meta *lm = &pblk->lm;
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct nvm_chk_meta *chunk;
+	struct ppa_addr bppa;
+	int smeta_blk;
 
-	for (i = 0; i < lm->blk_per_line; i++) {
-		if (!(line->chks[i].state & state_mask))
-			return 1;
-	}
+	if (line->state == PBLK_LINESTATE_BAD)
+		return 0;
 
-	return 0;
+	smeta_blk = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
+	if (smeta_blk >= lm->blk_per_line)
+		return 0;
+
+	bppa = pblk->luns[smeta_blk].bppa;
+	chunk = &line->chks[pblk_ppa_to_pos(geo, bppa)];
+
+	if (chunk->state & NVM_CHK_ST_FREE)
+		return 0;
+
+	return 1;
 }
 
 struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
@@ -915,7 +836,7 @@
 		line->lun_bitmap = ((void *)(smeta_buf)) +
 						sizeof(struct line_smeta);
 
-		if (!pblk_line_was_written(line, lm))
+		if (!pblk_line_was_written(line, pblk))
 			continue;
 
 		/* Lines that cannot be read are assumed as not written here */
diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c
index 883a711..6a0616a 100644
--- a/drivers/lightnvm/pblk-rl.c
+++ b/drivers/lightnvm/pblk-rl.c
@@ -73,6 +73,16 @@
 	pblk_rl_kick_u_timer(rl);
 }
 
+void pblk_rl_werr_line_in(struct pblk_rl *rl)
+{
+	atomic_inc(&rl->werr_lines);
+}
+
+void pblk_rl_werr_line_out(struct pblk_rl *rl)
+{
+	atomic_dec(&rl->werr_lines);
+}
+
 void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
 {
 	atomic_add(nr_entries, &rl->rb_gc_cnt);
@@ -99,11 +109,21 @@
 {
 	struct pblk *pblk = container_of(rl, struct pblk, rl);
 	int max = rl->rb_budget;
+	int werr_gc_needed = atomic_read(&rl->werr_lines);
 
 	if (free_blocks >= rl->high) {
-		rl->rb_user_max = max;
-		rl->rb_gc_max = 0;
-		rl->rb_state = PBLK_RL_HIGH;
+		if (werr_gc_needed) {
+			/* Allocate a small budget for recovering
+			 * lines with write errors
+			 */
+			rl->rb_gc_max = 1 << rl->rb_windows_pw;
+			rl->rb_user_max = max - rl->rb_gc_max;
+			rl->rb_state = PBLK_RL_WERR;
+		} else {
+			rl->rb_user_max = max;
+			rl->rb_gc_max = 0;
+			rl->rb_state = PBLK_RL_OFF;
+		}
 	} else if (free_blocks < rl->high) {
 		int shift = rl->high_pw - rl->rb_windows_pw;
 		int user_windows = free_blocks >> shift;
@@ -124,7 +144,7 @@
 		rl->rb_state = PBLK_RL_LOW;
 	}
 
-	if (rl->rb_state == (PBLK_RL_MID | PBLK_RL_LOW))
+	if (rl->rb_state != PBLK_RL_OFF)
 		pblk_gc_should_start(pblk);
 	else
 		pblk_gc_should_stop(pblk);
@@ -221,6 +241,7 @@
 	atomic_set(&rl->rb_user_cnt, 0);
 	atomic_set(&rl->rb_gc_cnt, 0);
 	atomic_set(&rl->rb_space, -1);
+	atomic_set(&rl->werr_lines, 0);
 
 	timer_setup(&rl->u_timer, pblk_rl_u_timer, 0);
 
diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c
index e61909a..88a0a7c 100644
--- a/drivers/lightnvm/pblk-sysfs.c
+++ b/drivers/lightnvm/pblk-sysfs.c
@@ -173,6 +173,8 @@
 	int free_line_cnt = 0, closed_line_cnt = 0, emeta_line_cnt = 0;
 	int d_line_cnt = 0, l_line_cnt = 0;
 	int gc_full = 0, gc_high = 0, gc_mid = 0, gc_low = 0, gc_empty = 0;
+	int gc_werr = 0;
+
 	int bad = 0, cor = 0;
 	int msecs = 0, cur_sec = 0, vsc = 0, sec_in_line = 0;
 	int map_weight = 0, meta_weight = 0;
@@ -237,6 +239,15 @@
 		gc_empty++;
 	}
 
+	list_for_each_entry(line, &l_mg->gc_werr_list, list) {
+		if (line->type == PBLK_LINETYPE_DATA)
+			d_line_cnt++;
+		else if (line->type == PBLK_LINETYPE_LOG)
+			l_line_cnt++;
+		closed_line_cnt++;
+		gc_werr++;
+	}
+
 	list_for_each_entry(line, &l_mg->bad_list, list)
 		bad++;
 	list_for_each_entry(line, &l_mg->corrupt_list, list)
@@ -275,8 +286,8 @@
 					l_mg->nr_lines);
 
 	sz += snprintf(page + sz, PAGE_SIZE - sz,
-		"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n",
-			gc_full, gc_high, gc_mid, gc_low, gc_empty,
+		"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
+			gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
 			atomic_read(&pblk->gc.read_inflight_gc));
 
 	sz += snprintf(page + sz, PAGE_SIZE - sz,
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index 3e6f1eb..f353e52 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -103,68 +103,150 @@
 	pblk_rb_sync_end(&pblk->rwb, &flags);
 }
 
-/* When a write fails, we are not sure whether the block has grown bad or a page
- * range is more susceptible to write errors. If a high number of pages fail, we
- * assume that the block is bad and we mark it accordingly. In all cases, we
- * remap and resubmit the failed entries as fast as possible; if a flush is
- * waiting on a completion, the whole stack would stall otherwise.
- */
+/* Map remaining sectors in chunk, starting from ppa */
+static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line *line;
+	struct ppa_addr map_ppa = *ppa;
+	u64 paddr;
+	int done = 0;
+
+	line = &pblk->lines[pblk_ppa_to_line(*ppa)];
+	spin_lock(&line->lock);
+
+	while (!done)  {
+		paddr = pblk_dev_ppa_to_line_addr(pblk, map_ppa);
+
+		if (!test_and_set_bit(paddr, line->map_bitmap))
+			line->left_msecs--;
+
+		if (!test_and_set_bit(paddr, line->invalid_bitmap))
+			le32_add_cpu(line->vsc, -1);
+
+		if (geo->version == NVM_OCSSD_SPEC_12) {
+			map_ppa.ppa++;
+			if (map_ppa.g.pg == geo->num_pg)
+				done = 1;
+		} else {
+			map_ppa.m.sec++;
+			if (map_ppa.m.sec == geo->clba)
+				done = 1;
+		}
+	}
+
+	line->w_err_gc->has_write_err = 1;
+	spin_unlock(&line->lock);
+}
+
+static void pblk_prepare_resubmit(struct pblk *pblk, unsigned int sentry,
+				  unsigned int nr_entries)
+{
+	struct pblk_rb *rb = &pblk->rwb;
+	struct pblk_rb_entry *entry;
+	struct pblk_line *line;
+	struct pblk_w_ctx *w_ctx;
+	struct ppa_addr ppa_l2p;
+	int flags;
+	unsigned int pos, i;
+
+	spin_lock(&pblk->trans_lock);
+	pos = sentry;
+	for (i = 0; i < nr_entries; i++) {
+		entry = &rb->entries[pos];
+		w_ctx = &entry->w_ctx;
+
+		/* Check if the lba has been overwritten */
+		ppa_l2p = pblk_trans_map_get(pblk, w_ctx->lba);
+		if (!pblk_ppa_comp(ppa_l2p, entry->cacheline))
+			w_ctx->lba = ADDR_EMPTY;
+
+		/* Mark up the entry as submittable again */
+		flags = READ_ONCE(w_ctx->flags);
+		flags |= PBLK_WRITTEN_DATA;
+		/* Release flags on write context. Protect from writes */
+		smp_store_release(&w_ctx->flags, flags);
+
+		/* Decrese the reference count to the line as we will
+		 * re-map these entries
+		 */
+		line = &pblk->lines[pblk_ppa_to_line(w_ctx->ppa)];
+		kref_put(&line->ref, pblk_line_put);
+
+		pos = (pos + 1) & (rb->nr_entries - 1);
+	}
+	spin_unlock(&pblk->trans_lock);
+}
+
+static void pblk_queue_resubmit(struct pblk *pblk, struct pblk_c_ctx *c_ctx)
+{
+	struct pblk_c_ctx *r_ctx;
+
+	r_ctx = kzalloc(sizeof(struct pblk_c_ctx), GFP_KERNEL);
+	if (!r_ctx)
+		return;
+
+	r_ctx->lun_bitmap = NULL;
+	r_ctx->sentry = c_ctx->sentry;
+	r_ctx->nr_valid = c_ctx->nr_valid;
+	r_ctx->nr_padded = c_ctx->nr_padded;
+
+	spin_lock(&pblk->resubmit_lock);
+	list_add_tail(&r_ctx->list, &pblk->resubmit_list);
+	spin_unlock(&pblk->resubmit_lock);
+
+#ifdef CONFIG_NVM_DEBUG
+	atomic_long_add(c_ctx->nr_valid, &pblk->recov_writes);
+#endif
+}
+
+static void pblk_submit_rec(struct work_struct *work)
+{
+	struct pblk_rec_ctx *recovery =
+			container_of(work, struct pblk_rec_ctx, ws_rec);
+	struct pblk *pblk = recovery->pblk;
+	struct nvm_rq *rqd = recovery->rqd;
+	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
+	struct ppa_addr *ppa_list;
+
+	pblk_log_write_err(pblk, rqd);
+
+	if (rqd->nr_ppas == 1)
+		ppa_list = &rqd->ppa_addr;
+	else
+		ppa_list = rqd->ppa_list;
+
+	pblk_map_remaining(pblk, ppa_list);
+	pblk_queue_resubmit(pblk, c_ctx);
+
+	pblk_up_rq(pblk, rqd->ppa_list, rqd->nr_ppas, c_ctx->lun_bitmap);
+	if (c_ctx->nr_padded)
+		pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid,
+							c_ctx->nr_padded);
+	bio_put(rqd->bio);
+	pblk_free_rqd(pblk, rqd, PBLK_WRITE);
+	mempool_free(recovery, &pblk->rec_pool);
+
+	atomic_dec(&pblk->inflight_io);
+}
+
+
 static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd)
 {
-	void *comp_bits = &rqd->ppa_status;
-	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
 	struct pblk_rec_ctx *recovery;
-	struct ppa_addr *ppa_list = rqd->ppa_list;
-	int nr_ppas = rqd->nr_ppas;
-	unsigned int c_entries;
-	int bit, ret;
 
-	if (unlikely(nr_ppas == 1))
-		ppa_list = &rqd->ppa_addr;
-
-	recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC);
-
-	INIT_LIST_HEAD(&recovery->failed);
-
-	bit = -1;
-	while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) {
-		struct pblk_rb_entry *entry;
-		struct ppa_addr ppa;
-
-		/* Logic error */
-		if (bit > c_ctx->nr_valid) {
-			WARN_ONCE(1, "pblk: corrupted write request\n");
-			mempool_free(recovery, pblk->rec_pool);
-			goto out;
-		}
-
-		ppa = ppa_list[bit];
-		entry = pblk_rb_sync_scan_entry(&pblk->rwb, &ppa);
-		if (!entry) {
-			pr_err("pblk: could not scan entry on write failure\n");
-			mempool_free(recovery, pblk->rec_pool);
-			goto out;
-		}
-
-		/* The list is filled first and emptied afterwards. No need for
-		 * protecting it with a lock
-		 */
-		list_add_tail(&entry->index, &recovery->failed);
+	recovery = mempool_alloc(&pblk->rec_pool, GFP_ATOMIC);
+	if (!recovery) {
+		pr_err("pblk: could not allocate recovery work\n");
+		return;
 	}
 
-	c_entries = find_first_bit(comp_bits, nr_ppas);
-	ret = pblk_recov_setup_rq(pblk, c_ctx, recovery, comp_bits, c_entries);
-	if (ret) {
-		pr_err("pblk: could not recover from write failure\n");
-		mempool_free(recovery, pblk->rec_pool);
-		goto out;
-	}
+	recovery->pblk = pblk;
+	recovery->rqd = rqd;
 
 	INIT_WORK(&recovery->ws_rec, pblk_submit_rec);
 	queue_work(pblk->close_wq, &recovery->ws_rec);
-
-out:
-	pblk_complete_write(pblk, rqd, c_ctx);
 }
 
 static void pblk_end_io_write(struct nvm_rq *rqd)
@@ -173,8 +255,8 @@
 	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
 
 	if (rqd->error) {
-		pblk_log_write_err(pblk, rqd);
-		return pblk_end_w_fail(pblk, rqd);
+		pblk_end_w_fail(pblk, rqd);
+		return;
 	}
 #ifdef CONFIG_NVM_DEBUG
 	else
@@ -198,6 +280,7 @@
 	if (rqd->error) {
 		pblk_log_write_err(pblk, rqd);
 		pr_err("pblk: metadata I/O failed. Line %d\n", line->id);
+		line->w_err_gc->has_write_err = 1;
 	}
 
 	sync = atomic_add_return(rqd->nr_ppas, &emeta->sync);
@@ -266,31 +349,6 @@
 	return 0;
 }
 
-int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
-			struct pblk_c_ctx *c_ctx)
-{
-	struct pblk_line_meta *lm = &pblk->lm;
-	unsigned long *lun_bitmap;
-	int ret;
-
-	lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL);
-	if (!lun_bitmap)
-		return -ENOMEM;
-
-	c_ctx->lun_bitmap = lun_bitmap;
-
-	ret = pblk_alloc_w_rq(pblk, rqd, rqd->nr_ppas, pblk_end_io_write);
-	if (ret)
-		return ret;
-
-	pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0);
-
-	rqd->ppa_status = (u64)0;
-	rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
-
-	return ret;
-}
-
 static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
 				  unsigned int secs_to_flush)
 {
@@ -339,6 +397,7 @@
 	bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
 					l_mg->emeta_alloc_type, GFP_KERNEL);
 	if (IS_ERR(bio)) {
+		pr_err("pblk: failed to map emeta io");
 		ret = PTR_ERR(bio);
 		goto fail_free_rqd;
 	}
@@ -515,27 +574,55 @@
 	unsigned int secs_avail, secs_to_sync, secs_to_com;
 	unsigned int secs_to_flush;
 	unsigned long pos;
+	unsigned int resubmit;
 
-	/* If there are no sectors in the cache, flushes (bios without data)
-	 * will be cleared on the cache threads
-	 */
-	secs_avail = pblk_rb_read_count(&pblk->rwb);
-	if (!secs_avail)
-		return 1;
+	spin_lock(&pblk->resubmit_lock);
+	resubmit = !list_empty(&pblk->resubmit_list);
+	spin_unlock(&pblk->resubmit_lock);
 
-	secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
-	if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
-		return 1;
+	/* Resubmit failed writes first */
+	if (resubmit) {
+		struct pblk_c_ctx *r_ctx;
 
-	secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush);
-	if (secs_to_sync > pblk->max_write_pgs) {
-		pr_err("pblk: bad buffer sync calculation\n");
-		return 1;
+		spin_lock(&pblk->resubmit_lock);
+		r_ctx = list_first_entry(&pblk->resubmit_list,
+					struct pblk_c_ctx, list);
+		list_del(&r_ctx->list);
+		spin_unlock(&pblk->resubmit_lock);
+
+		secs_avail = r_ctx->nr_valid;
+		pos = r_ctx->sentry;
+
+		pblk_prepare_resubmit(pblk, pos, secs_avail);
+		secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
+				secs_avail);
+
+		kfree(r_ctx);
+	} else {
+		/* If there are no sectors in the cache,
+		 * flushes (bios without data) will be cleared on
+		 * the cache threads
+		 */
+		secs_avail = pblk_rb_read_count(&pblk->rwb);
+		if (!secs_avail)
+			return 1;
+
+		secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
+		if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
+			return 1;
+
+		secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
+					secs_to_flush);
+		if (secs_to_sync > pblk->max_write_pgs) {
+			pr_err("pblk: bad buffer sync calculation\n");
+			return 1;
+		}
+
+		secs_to_com = (secs_to_sync > secs_avail) ?
+			secs_avail : secs_to_sync;
+		pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
 	}
 
-	secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync;
-	pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
-
 	bio = bio_alloc(GFP_KERNEL, secs_to_sync);
 
 	bio->bi_iter.bi_sector = 0; /* internal bio */
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 9c682ac..34cc1d6 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -89,12 +89,14 @@
 /* The number of GC lists and the rate-limiter states go together. This way the
  * rate-limiter can dictate how much GC is needed based on resource utilization.
  */
-#define PBLK_GC_NR_LISTS 3
+#define PBLK_GC_NR_LISTS 4
 
 enum {
-	PBLK_RL_HIGH = 1,
-	PBLK_RL_MID = 2,
-	PBLK_RL_LOW = 3,
+	PBLK_RL_OFF = 0,
+	PBLK_RL_WERR = 1,
+	PBLK_RL_HIGH = 2,
+	PBLK_RL_MID = 3,
+	PBLK_RL_LOW = 4
 };
 
 #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS)
@@ -128,7 +130,6 @@
 struct pblk_rec_ctx {
 	struct pblk *pblk;
 	struct nvm_rq *rqd;
-	struct list_head failed;
 	struct work_struct ws_rec;
 };
 
@@ -279,6 +280,8 @@
 	int rb_user_active;
 	int rb_gc_active;
 
+	atomic_t werr_lines;	/* Number of write error lines that needs gc */
+
 	struct timer_list u_timer;
 
 	unsigned long long nr_secs;
@@ -312,6 +315,7 @@
 	PBLK_LINEGC_MID = 23,
 	PBLK_LINEGC_HIGH = 24,
 	PBLK_LINEGC_FULL = 25,
+	PBLK_LINEGC_WERR = 26
 };
 
 #define PBLK_MAGIC 0x70626c6b /*pblk*/
@@ -413,6 +417,11 @@
 	struct line_smeta *buf;		/* smeta buffer in persistent format */
 };
 
+struct pblk_w_err_gc {
+	int has_write_err;
+	__le64 *lba_list;
+};
+
 struct pblk_line {
 	struct pblk *pblk;
 	unsigned int id;		/* Line number corresponds to the
@@ -458,6 +467,8 @@
 
 	struct kref ref;		/* Write buffer L2P references */
 
+	struct pblk_w_err_gc *w_err_gc;	/* Write error gc recovery metadata */
+
 	spinlock_t lock;		/* Necessary for invalid_bitmap only */
 };
 
@@ -489,6 +500,8 @@
 	struct list_head gc_mid_list;	/* Full lines ready to GC, mid isc */
 	struct list_head gc_low_list;	/* Full lines ready to GC, low isc */
 
+	struct list_head gc_werr_list;  /* Write err recovery list */
+
 	struct list_head gc_full_list;	/* Full lines ready to GC, no valid */
 	struct list_head gc_empty_list;	/* Full lines close, all valid */
 
@@ -664,12 +677,15 @@
 
 	struct list_head compl_list;
 
-	mempool_t *page_bio_pool;
-	mempool_t *gen_ws_pool;
-	mempool_t *rec_pool;
-	mempool_t *r_rq_pool;
-	mempool_t *w_rq_pool;
-	mempool_t *e_rq_pool;
+	spinlock_t resubmit_lock;	 /* Resubmit list lock */
+	struct list_head resubmit_list; /* Resubmit list for failed writes*/
+
+	mempool_t page_bio_pool;
+	mempool_t gen_ws_pool;
+	mempool_t rec_pool;
+	mempool_t r_rq_pool;
+	mempool_t w_rq_pool;
+	mempool_t e_rq_pool;
 
 	struct workqueue_struct *close_wq;
 	struct workqueue_struct *bb_wq;
@@ -713,9 +729,6 @@
 unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
 				 unsigned int pos, unsigned int nr_entries,
 				 unsigned int count);
-unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio,
-				      struct list_head *list,
-				      unsigned int max);
 int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
 			struct ppa_addr ppa, int bio_iter, bool advanced_bio);
 unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int entries);
@@ -766,11 +779,13 @@
 struct pblk_line *pblk_line_get_erase(struct pblk *pblk);
 int pblk_line_erase(struct pblk *pblk, struct pblk_line *line);
 int pblk_line_is_full(struct pblk_line *line);
-void pblk_line_free(struct pblk *pblk, struct pblk_line *line);
+void pblk_line_free(struct pblk_line *line);
 void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line);
 void pblk_line_close(struct pblk *pblk, struct pblk_line *line);
 void pblk_line_close_ws(struct work_struct *work);
 void pblk_pipeline_stop(struct pblk *pblk);
+void __pblk_pipeline_stop(struct pblk *pblk);
+void __pblk_pipeline_flush(struct pblk *pblk);
 void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
 		     void (*work)(struct work_struct *), gfp_t gfp_mask,
 		     struct workqueue_struct *wq);
@@ -794,7 +809,6 @@
 void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas);
 void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
 		unsigned long *lun_bitmap);
-void pblk_end_io_sync(struct nvm_rq *rqd);
 int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
 		       int nr_pages);
 void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
@@ -837,23 +851,20 @@
 int pblk_write_ts(void *data);
 void pblk_write_timer_fn(struct timer_list *t);
 void pblk_write_should_kick(struct pblk *pblk);
+void pblk_write_kick(struct pblk *pblk);
 
 /*
  * pblk read path
  */
-extern struct bio_set *pblk_bio_set;
+extern struct bio_set pblk_bio_set;
 int pblk_submit_read(struct pblk *pblk, struct bio *bio);
 int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq);
 /*
  * pblk recovery
  */
-void pblk_submit_rec(struct work_struct *work);
 struct pblk_line *pblk_recov_l2p(struct pblk *pblk);
 int pblk_recov_pad(struct pblk *pblk);
 int pblk_recov_check_emeta(struct pblk *pblk, struct line_emeta *emeta);
-int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx,
-			struct pblk_rec_ctx *recovery, u64 *comp_bits,
-			unsigned int comp);
 
 /*
  * pblk gc
@@ -864,7 +875,7 @@
 #define PBLK_GC_RSV_LINE 1	/* Reserved lines for GC */
 
 int pblk_gc_init(struct pblk *pblk);
-void pblk_gc_exit(struct pblk *pblk);
+void pblk_gc_exit(struct pblk *pblk, bool graceful);
 void pblk_gc_should_start(struct pblk *pblk);
 void pblk_gc_should_stop(struct pblk *pblk);
 void pblk_gc_should_kick(struct pblk *pblk);
@@ -894,6 +905,9 @@
 			    bool used);
 int pblk_rl_is_limit(struct pblk_rl *rl);
 
+void pblk_rl_werr_line_in(struct pblk_rl *rl);
+void pblk_rl_werr_line_out(struct pblk_rl *rl);
+
 /*
  * pblk sysfs
  */
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 433dbed..6663893 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -191,10 +191,10 @@
 static void pmu_start(void);
 static irqreturn_t via_pmu_interrupt(int irq, void *arg);
 static irqreturn_t gpio1_interrupt(int irq, void *arg);
-static const struct file_operations pmu_info_proc_fops;
-static const struct file_operations pmu_irqstats_proc_fops;
+static int pmu_info_proc_show(struct seq_file *m, void *v);
+static int pmu_irqstats_proc_show(struct seq_file *m, void *v);
+static int pmu_battery_proc_show(struct seq_file *m, void *v);
 static void pmu_pass_intr(unsigned char *data, int len);
-static const struct file_operations pmu_battery_proc_fops;
 static const struct file_operations pmu_options_proc_fops;
 
 #ifdef CONFIG_ADB
@@ -511,13 +511,15 @@
 		for (i=0; i<pmu_battery_count; i++) {
 			char title[16];
 			sprintf(title, "battery_%ld", i);
-			proc_pmu_batt[i] = proc_create_data(title, 0, proc_pmu_root,
-					&pmu_battery_proc_fops, (void *)i);
+			proc_pmu_batt[i] = proc_create_single_data(title, 0,
+					proc_pmu_root, pmu_battery_proc_show,
+					(void *)i);
 		}
 
-		proc_pmu_info = proc_create("info", 0, proc_pmu_root, &pmu_info_proc_fops);
-		proc_pmu_irqstats = proc_create("interrupts", 0, proc_pmu_root,
-						&pmu_irqstats_proc_fops);
+		proc_pmu_info = proc_create_single("info", 0, proc_pmu_root,
+				pmu_info_proc_show);
+		proc_pmu_irqstats = proc_create_single("interrupts", 0,
+				proc_pmu_root, pmu_irqstats_proc_show);
 		proc_pmu_options = proc_create("options", 0600, proc_pmu_root,
 						&pmu_options_proc_fops);
 	}
@@ -811,19 +813,6 @@
 	return 0;
 }
 
-static int pmu_info_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pmu_info_proc_show, NULL);
-}
-
-static const struct file_operations pmu_info_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pmu_info_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int pmu_irqstats_proc_show(struct seq_file *m, void *v)
 {
 	int i;
@@ -848,19 +837,6 @@
 	return 0;
 }
 
-static int pmu_irqstats_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pmu_irqstats_proc_show, NULL);
-}
-
-static const struct file_operations pmu_irqstats_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pmu_irqstats_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int pmu_battery_proc_show(struct seq_file *m, void *v)
 {
 	long batnum = (long)m->private;
@@ -875,19 +851,6 @@
 	return 0;
 }
 
-static int pmu_battery_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pmu_battery_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations pmu_battery_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pmu_battery_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int pmu_options_proc_show(struct seq_file *m, void *v)
 {
 #if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC32)
diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c
index 3ef7f03..fc3c237 100644
--- a/drivers/mailbox/pcc.c
+++ b/drivers/mailbox/pcc.c
@@ -373,33 +373,24 @@
 };
 
 /**
- * parse_pcc_subspace - Parse the PCC table and verify PCC subspace
- *		entries. There should be one entry per PCC client.
+ * parse_pcc_subspaces -- Count PCC subspaces defined
  * @header: Pointer to the ACPI subtable header under the PCCT.
  * @end: End of subtable entry.
  *
- * Return: 0 for Success, else errno.
+ * Return: If we find a PCC subspace entry of a valid type, return 0.
+ *	Otherwise, return -EINVAL.
  *
  * This gets called for each entry in the PCC table.
  */
 static int parse_pcc_subspace(struct acpi_subtable_header *header,
 		const unsigned long end)
 {
-	struct acpi_pcct_hw_reduced *pcct_ss;
+	struct acpi_pcct_subspace *ss = (struct acpi_pcct_subspace *) header;
 
-	if (pcc_mbox_ctrl.num_chans <= MAX_PCC_SUBSPACES) {
-		pcct_ss = (struct acpi_pcct_hw_reduced *) header;
+	if (ss->header.type < ACPI_PCCT_TYPE_RESERVED)
+		return 0;
 
-		if ((pcct_ss->header.type !=
-				ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE)
-		    && (pcct_ss->header.type !=
-				ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2)) {
-			pr_err("Incorrect PCC Subspace type detected\n");
-			return -EINVAL;
-		}
-	}
-
-	return 0;
+	return -EINVAL;
 }
 
 /**
@@ -449,8 +440,8 @@
 	struct acpi_table_header *pcct_tbl;
 	struct acpi_subtable_header *pcct_entry;
 	struct acpi_table_pcct *acpi_pcct_tbl;
+	struct acpi_subtable_proc proc[ACPI_PCCT_TYPE_RESERVED];
 	int count, i, rc;
-	int sum = 0;
 	acpi_status status = AE_OK;
 
 	/* Search for PCCT */
@@ -459,43 +450,41 @@
 	if (ACPI_FAILURE(status) || !pcct_tbl)
 		return -ENODEV;
 
-	count = acpi_table_parse_entries(ACPI_SIG_PCCT,
-			sizeof(struct acpi_table_pcct),
-			ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE,
-			parse_pcc_subspace, MAX_PCC_SUBSPACES);
-	sum += (count > 0) ? count : 0;
+	/* Set up the subtable handlers */
+	for (i = ACPI_PCCT_TYPE_GENERIC_SUBSPACE;
+	     i < ACPI_PCCT_TYPE_RESERVED; i++) {
+		proc[i].id = i;
+		proc[i].count = 0;
+		proc[i].handler = parse_pcc_subspace;
+	}
 
-	count = acpi_table_parse_entries(ACPI_SIG_PCCT,
-			sizeof(struct acpi_table_pcct),
-			ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2,
-			parse_pcc_subspace, MAX_PCC_SUBSPACES);
-	sum += (count > 0) ? count : 0;
-
-	if (sum == 0 || sum >= MAX_PCC_SUBSPACES) {
-		pr_err("Error parsing PCC subspaces from PCCT\n");
+	count = acpi_table_parse_entries_array(ACPI_SIG_PCCT,
+			sizeof(struct acpi_table_pcct), proc,
+			ACPI_PCCT_TYPE_RESERVED, MAX_PCC_SUBSPACES);
+	if (count == 0 || count > MAX_PCC_SUBSPACES) {
+		pr_warn("Invalid PCCT: %d PCC subspaces\n", count);
 		return -EINVAL;
 	}
 
-	pcc_mbox_channels = kzalloc(sizeof(struct mbox_chan) *
-			sum, GFP_KERNEL);
+	pcc_mbox_channels = kzalloc(sizeof(struct mbox_chan) * count, GFP_KERNEL);
 	if (!pcc_mbox_channels) {
 		pr_err("Could not allocate space for PCC mbox channels\n");
 		return -ENOMEM;
 	}
 
-	pcc_doorbell_vaddr = kcalloc(sum, sizeof(void *), GFP_KERNEL);
+	pcc_doorbell_vaddr = kcalloc(count, sizeof(void *), GFP_KERNEL);
 	if (!pcc_doorbell_vaddr) {
 		rc = -ENOMEM;
 		goto err_free_mbox;
 	}
 
-	pcc_doorbell_ack_vaddr = kcalloc(sum, sizeof(void *), GFP_KERNEL);
+	pcc_doorbell_ack_vaddr = kcalloc(count, sizeof(void *), GFP_KERNEL);
 	if (!pcc_doorbell_ack_vaddr) {
 		rc = -ENOMEM;
 		goto err_free_db_vaddr;
 	}
 
-	pcc_doorbell_irq = kcalloc(sum, sizeof(int), GFP_KERNEL);
+	pcc_doorbell_irq = kcalloc(count, sizeof(int), GFP_KERNEL);
 	if (!pcc_doorbell_irq) {
 		rc = -ENOMEM;
 		goto err_free_db_ack_vaddr;
@@ -509,18 +498,24 @@
 	if (acpi_pcct_tbl->flags & ACPI_PCCT_DOORBELL)
 		pcc_mbox_ctrl.txdone_irq = true;
 
-	for (i = 0; i < sum; i++) {
+	for (i = 0; i < count; i++) {
 		struct acpi_generic_address *db_reg;
-		struct acpi_pcct_hw_reduced *pcct_ss;
+		struct acpi_pcct_subspace *pcct_ss;
 		pcc_mbox_channels[i].con_priv = pcct_entry;
 
-		pcct_ss = (struct acpi_pcct_hw_reduced *) pcct_entry;
+		if (pcct_entry->type == ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE ||
+		    pcct_entry->type == ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2) {
+			struct acpi_pcct_hw_reduced *pcct_hrss;
 
-		if (pcc_mbox_ctrl.txdone_irq) {
-			rc = pcc_parse_subspace_irq(i, pcct_ss);
-			if (rc < 0)
-				goto err;
+			pcct_hrss = (struct acpi_pcct_hw_reduced *) pcct_entry;
+
+			if (pcc_mbox_ctrl.txdone_irq) {
+				rc = pcc_parse_subspace_irq(i, pcct_hrss);
+				if (rc < 0)
+					goto err;
+			}
 		}
+		pcct_ss = (struct acpi_pcct_subspace *) pcct_entry;
 
 		/* If doorbell is in system memory cache the virt address */
 		db_reg = &pcct_ss->doorbell_register;
@@ -531,7 +526,7 @@
 			((unsigned long) pcct_entry + pcct_entry->length);
 	}
 
-	pcc_mbox_ctrl.num_chans = sum;
+	pcc_mbox_ctrl.num_chans = count;
 
 	pr_info("Detected %d PCC Subspaces\n", pcc_mbox_ctrl.num_chans);
 
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 3a0cfb2..d6bf294f 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -269,7 +269,7 @@
 	atomic_t		*stripe_sectors_dirty;
 	unsigned long		*full_dirty_stripes;
 
-	struct bio_set		*bio_split;
+	struct bio_set		bio_split;
 
 	unsigned		data_csum:1;
 
@@ -345,6 +345,7 @@
 
 	struct keybuf		writeback_keys;
 
+	struct task_struct	*status_update_thread;
 	/*
 	 * Order the write-half of writeback operations strongly in dispatch
 	 * order.  (Maintain LBA order; don't allow reads completing out of
@@ -392,6 +393,7 @@
 #define DEFAULT_CACHED_DEV_ERROR_LIMIT	64
 	atomic_t		io_errors;
 	unsigned		error_limit;
+	unsigned		offline_seconds;
 
 	char			backing_dev_name[BDEVNAME_SIZE];
 };
@@ -528,9 +530,9 @@
 	struct closure		sb_write;
 	struct semaphore	sb_write_mutex;
 
-	mempool_t		*search;
-	mempool_t		*bio_meta;
-	struct bio_set		*bio_split;
+	mempool_t		search;
+	mempool_t		bio_meta;
+	struct bio_set		bio_split;
 
 	/* For the btree cache */
 	struct shrinker		shrink;
@@ -655,7 +657,7 @@
 	 * A btree node on disk could have too many bsets for an iterator to fit
 	 * on the stack - have to dynamically allocate them
 	 */
-	mempool_t		*fill_iter;
+	mempool_t		fill_iter;
 
 	struct bset_sort_state	sort;
 
@@ -956,8 +958,6 @@
 void bch_write_bdev_super(struct cached_dev *, struct closure *);
 
 extern struct workqueue_struct *bcache_wq;
-extern const char * const bch_cache_modes[];
-extern const char * const bch_stop_on_failure_modes[];
 extern struct mutex bch_register_lock;
 extern struct list_head bch_cache_sets;
 
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 579c696..f3403b4 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1118,8 +1118,7 @@
 
 void bch_bset_sort_state_free(struct bset_sort_state *state)
 {
-	if (state->pool)
-		mempool_destroy(state->pool);
+	mempool_exit(&state->pool);
 }
 
 int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
@@ -1129,11 +1128,7 @@
 	state->page_order = page_order;
 	state->crit_factor = int_sqrt(1 << page_order);
 
-	state->pool = mempool_create_page_pool(1, page_order);
-	if (!state->pool)
-		return -ENOMEM;
-
-	return 0;
+	return mempool_init_page_pool(&state->pool, 1, page_order);
 }
 EXPORT_SYMBOL(bch_bset_sort_state_init);
 
@@ -1191,7 +1186,7 @@
 
 		BUG_ON(order > state->page_order);
 
-		outp = mempool_alloc(state->pool, GFP_NOIO);
+		outp = mempool_alloc(&state->pool, GFP_NOIO);
 		out = page_address(outp);
 		used_mempool = true;
 		order = state->page_order;
@@ -1220,7 +1215,7 @@
 	}
 
 	if (used_mempool)
-		mempool_free(virt_to_page(out), state->pool);
+		mempool_free(virt_to_page(out), &state->pool);
 	else
 		free_pages((unsigned long) out, order);
 
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 0c24280..b867f22 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -347,7 +347,7 @@
 /* Sorting */
 
 struct bset_sort_state {
-	mempool_t		*pool;
+	mempool_t		pool;
 
 	unsigned		page_order;
 	unsigned		crit_factor;
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 17936b2..2a0968c0 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -204,7 +204,7 @@
 	struct bset *i = btree_bset_first(b);
 	struct btree_iter *iter;
 
-	iter = mempool_alloc(b->c->fill_iter, GFP_NOIO);
+	iter = mempool_alloc(&b->c->fill_iter, GFP_NOIO);
 	iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
 	iter->used = 0;
 
@@ -271,7 +271,7 @@
 		bch_bset_init_next(&b->keys, write_block(b),
 				   bset_magic(&b->c->sb));
 out:
-	mempool_free(iter, b->c->fill_iter);
+	mempool_free(iter, &b->c->fill_iter);
 	return;
 err:
 	set_btree_node_io_error(b);
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 2ddf851..9612873 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -17,12 +17,12 @@
 void bch_bbio_free(struct bio *bio, struct cache_set *c)
 {
 	struct bbio *b = container_of(bio, struct bbio, bio);
-	mempool_free(b, c->bio_meta);
+	mempool_free(b, &c->bio_meta);
 }
 
 struct bio *bch_bbio_alloc(struct cache_set *c)
 {
-	struct bbio *b = mempool_alloc(c->bio_meta, GFP_NOIO);
+	struct bbio *b = mempool_alloc(&c->bio_meta, GFP_NOIO);
 	struct bio *bio = &b->bio;
 
 	bio_init(bio, bio->bi_inline_vecs, bucket_pages(c));
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 8e3e865..ae67f5f 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -213,7 +213,7 @@
 	do {
 		unsigned i;
 		struct bkey *k;
-		struct bio_set *split = op->c->bio_split;
+		struct bio_set *split = &op->c->bio_split;
 
 		/* 1 for the device pointer and 1 for the chksum */
 		if (bch_keylist_realloc(&op->insert_keys,
@@ -548,7 +548,7 @@
 
 	n = bio_next_split(bio, min_t(uint64_t, INT_MAX,
 				      KEY_OFFSET(k) - bio->bi_iter.bi_sector),
-			   GFP_NOIO, s->d->bio_split);
+			   GFP_NOIO, &s->d->bio_split);
 
 	bio_key = &container_of(n, struct bbio, bio)->key;
 	bch_bkey_copy_single_ptr(bio_key, k, ptr);
@@ -707,7 +707,7 @@
 
 	bio_complete(s);
 	closure_debug_destroy(cl);
-	mempool_free(s, s->d->c->search);
+	mempool_free(s, &s->d->c->search);
 }
 
 static inline struct search *search_alloc(struct bio *bio,
@@ -715,7 +715,7 @@
 {
 	struct search *s;
 
-	s = mempool_alloc(d->c->search, GFP_NOIO);
+	s = mempool_alloc(&d->c->search, GFP_NOIO);
 
 	closure_init(&s->cl, NULL);
 	do_bio_hook(s, bio, request_endio);
@@ -864,7 +864,7 @@
 	s->cache_missed = 1;
 
 	if (s->cache_miss || s->iop.bypass) {
-		miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
+		miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
 		ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
 		goto out_submit;
 	}
@@ -887,14 +887,14 @@
 
 	s->iop.replace = true;
 
-	miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
+	miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
 
 	/* btree_search_recurse()'s btree iterator is no good anymore */
 	ret = miss == bio ? MAP_DONE : -EINTR;
 
 	cache_bio = bio_alloc_bioset(GFP_NOWAIT,
 			DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS),
-			dc->disk.bio_split);
+			&dc->disk.bio_split);
 	if (!cache_bio)
 		goto out_submit;
 
@@ -1008,7 +1008,7 @@
 			struct bio *flush;
 
 			flush = bio_alloc_bioset(GFP_NOIO, 0,
-						 dc->disk.bio_split);
+						 &dc->disk.bio_split);
 			if (!flush) {
 				s->iop.status = BLK_STS_RESOURCE;
 				goto insert_data;
@@ -1021,7 +1021,7 @@
 			closure_bio_submit(s->iop.c, flush, cl);
 		}
 	} else {
-		s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
+		s->iop.bio = bio_clone_fast(bio, GFP_NOIO, &dc->disk.bio_split);
 		/* I/O request sent to backing device */
 		bio->bi_end_io = backing_request_endio;
 		closure_bio_submit(s->iop.c, bio, cl);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 3dea06b..a31e55b 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -37,24 +37,6 @@
 	0xc8, 0x50, 0xfc, 0x5e, 0xcb, 0x16, 0xcd, 0x99
 };
 
-/* Default is -1; we skip past it for struct cached_dev's cache mode */
-const char * const bch_cache_modes[] = {
-	"default",
-	"writethrough",
-	"writeback",
-	"writearound",
-	"none",
-	NULL
-};
-
-/* Default is -1; we skip past it for stop_when_cache_set_failed */
-const char * const bch_stop_on_failure_modes[] = {
-	"default",
-	"auto",
-	"always",
-	NULL
-};
-
 static struct kobject *bcache_kobj;
 struct mutex bch_register_lock;
 LIST_HEAD(bch_cache_sets);
@@ -654,6 +636,11 @@
 		     unsigned int cmd, unsigned long arg)
 {
 	struct bcache_device *d = b->bd_disk->private_data;
+	struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
+	if (dc->io_disable)
+		return -EIO;
+
 	return d->ioctl(d, mode, cmd, arg);
 }
 
@@ -766,8 +753,7 @@
 		put_disk(d->disk);
 	}
 
-	if (d->bio_split)
-		bioset_free(d->bio_split);
+	bioset_exit(&d->bio_split);
 	kvfree(d->full_dirty_stripes);
 	kvfree(d->stripe_sectors_dirty);
 
@@ -809,9 +795,8 @@
 	if (idx < 0)
 		return idx;
 
-	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio),
-					   BIOSET_NEED_BVECS |
-					   BIOSET_NEED_RESCUER)) ||
+	if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio),
+			BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
 	    !(d->disk = alloc_disk(BCACHE_MINORS))) {
 		ida_simple_remove(&bcache_device_idx, idx);
 		return -ENOMEM;
@@ -864,6 +849,44 @@
 	c->cached_dev_sectors = sectors;
 }
 
+#define BACKING_DEV_OFFLINE_TIMEOUT 5
+static int cached_dev_status_update(void *arg)
+{
+	struct cached_dev *dc = arg;
+	struct request_queue *q;
+
+	/*
+	 * If this delayed worker is stopping outside, directly quit here.
+	 * dc->io_disable might be set via sysfs interface, so check it
+	 * here too.
+	 */
+	while (!kthread_should_stop() && !dc->io_disable) {
+		q = bdev_get_queue(dc->bdev);
+		if (blk_queue_dying(q))
+			dc->offline_seconds++;
+		else
+			dc->offline_seconds = 0;
+
+		if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
+			pr_err("%s: device offline for %d seconds",
+			       dc->backing_dev_name,
+			       BACKING_DEV_OFFLINE_TIMEOUT);
+			pr_err("%s: disable I/O request due to backing "
+			       "device offline", dc->disk.name);
+			dc->io_disable = true;
+			/* let others know earlier that io_disable is true */
+			smp_mb();
+			bcache_device_stop(&dc->disk);
+			break;
+		}
+		schedule_timeout_interruptible(HZ);
+	}
+
+	wait_for_kthread_stop();
+	return 0;
+}
+
+
 void bch_cached_dev_run(struct cached_dev *dc)
 {
 	struct bcache_device *d = &dc->disk;
@@ -906,6 +929,14 @@
 	if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
 	    sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
 		pr_debug("error creating sysfs link");
+
+	dc->status_update_thread = kthread_run(cached_dev_status_update,
+					       dc, "bcache_status_update");
+	if (IS_ERR(dc->status_update_thread)) {
+		pr_warn("failed to create bcache_status_update kthread, "
+			"continue to run without monitoring backing "
+			"device status");
+	}
 }
 
 /*
@@ -1139,6 +1170,8 @@
 		kthread_stop(dc->writeback_thread);
 	if (dc->writeback_write_wq)
 		destroy_workqueue(dc->writeback_write_wq);
+	if (!IS_ERR_OR_NULL(dc->status_update_thread))
+		kthread_stop(dc->status_update_thread);
 
 	if (atomic_read(&dc->running))
 		bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
@@ -1465,14 +1498,10 @@
 
 	if (c->moving_gc_wq)
 		destroy_workqueue(c->moving_gc_wq);
-	if (c->bio_split)
-		bioset_free(c->bio_split);
-	if (c->fill_iter)
-		mempool_destroy(c->fill_iter);
-	if (c->bio_meta)
-		mempool_destroy(c->bio_meta);
-	if (c->search)
-		mempool_destroy(c->search);
+	bioset_exit(&c->bio_split);
+	mempool_exit(&c->fill_iter);
+	mempool_exit(&c->bio_meta);
+	mempool_exit(&c->search);
 	kfree(c->devices);
 
 	mutex_lock(&bch_register_lock);
@@ -1683,21 +1712,17 @@
 	INIT_LIST_HEAD(&c->btree_cache_freed);
 	INIT_LIST_HEAD(&c->data_buckets);
 
-	c->search = mempool_create_slab_pool(32, bch_search_cache);
-	if (!c->search)
-		goto err;
-
 	iter_size = (sb->bucket_size / sb->block_size + 1) *
 		sizeof(struct btree_iter_set);
 
 	if (!(c->devices = kzalloc(c->nr_uuids * sizeof(void *), GFP_KERNEL)) ||
-	    !(c->bio_meta = mempool_create_kmalloc_pool(2,
-				sizeof(struct bbio) + sizeof(struct bio_vec) *
-				bucket_pages(c))) ||
-	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
-	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio),
-					   BIOSET_NEED_BVECS |
-					   BIOSET_NEED_RESCUER)) ||
+	    mempool_init_slab_pool(&c->search, 32, bch_search_cache) ||
+	    mempool_init_kmalloc_pool(&c->bio_meta, 2,
+				      sizeof(struct bbio) + sizeof(struct bio_vec) *
+				      bucket_pages(c)) ||
+	    mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
+	    bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
+			BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
 						WQ_MEM_RECLAIM, 0)) ||
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index dfeef58..8ccbc8f 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -16,6 +16,22 @@
 #include <linux/sort.h>
 #include <linux/sched/clock.h>
 
+/* Default is -1; we skip past it for struct cached_dev's cache mode */
+static const char * const bch_cache_modes[] = {
+	"writethrough",
+	"writeback",
+	"writearound",
+	"none",
+	NULL
+};
+
+/* Default is -1; we skip past it for stop_when_cache_set_failed */
+static const char * const bch_stop_on_failure_modes[] = {
+	"auto",
+	"always",
+	NULL
+};
+
 static const char * const cache_replacement_policies[] = {
 	"lru",
 	"fifo",
@@ -114,6 +130,20 @@
 rw_attribute(copy_gc_enabled);
 rw_attribute(size);
 
+static ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[],
+			    size_t selected)
+{
+	char *out = buf;
+	size_t i;
+
+	for (i = 0; list[i]; i++)
+		out += snprintf(out, buf + size - out,
+				i == selected ? "[%s] " : "%s ", list[i]);
+
+	out[-1] = '\n';
+	return out - buf;
+}
+
 SHOW(__bch_cached_dev)
 {
 	struct cached_dev *dc = container_of(kobj, struct cached_dev,
@@ -124,12 +154,12 @@
 
 	if (attr == &sysfs_cache_mode)
 		return bch_snprint_string_list(buf, PAGE_SIZE,
-					       bch_cache_modes + 1,
+					       bch_cache_modes,
 					       BDEV_CACHE_MODE(&dc->sb));
 
 	if (attr == &sysfs_stop_when_cache_set_failed)
 		return bch_snprint_string_list(buf, PAGE_SIZE,
-					       bch_stop_on_failure_modes + 1,
+					       bch_stop_on_failure_modes,
 					       dc->stop_when_cache_set_failed);
 
 
@@ -253,8 +283,7 @@
 		bch_cached_dev_run(dc);
 
 	if (attr == &sysfs_cache_mode) {
-		v = bch_read_string_list(buf, bch_cache_modes + 1);
-
+		v = __sysfs_match_string(bch_cache_modes, -1, buf);
 		if (v < 0)
 			return v;
 
@@ -265,8 +294,7 @@
 	}
 
 	if (attr == &sysfs_stop_when_cache_set_failed) {
-		v = bch_read_string_list(buf, bch_stop_on_failure_modes + 1);
-
+		v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
 		if (v < 0)
 			return v;
 
@@ -635,6 +663,7 @@
 STORE(__bch_cache_set)
 {
 	struct cache_set *c = container_of(kobj, struct cache_set, kobj);
+	ssize_t v;
 
 	if (attr == &sysfs_unregister)
 		bch_cache_set_unregister(c);
@@ -698,8 +727,7 @@
 		      c->congested_write_threshold_us);
 
 	if (attr == &sysfs_errors) {
-		ssize_t v = bch_read_string_list(buf, error_actions);
-
+		v = __sysfs_match_string(error_actions, -1, buf);
 		if (v < 0)
 			return v;
 
@@ -714,8 +742,7 @@
 		c->error_decay = strtoul_or_return(buf) / 88;
 
 	if (attr == &sysfs_io_disable) {
-		int v = strtoul_or_return(buf);
-
+		v = strtoul_or_return(buf);
 		if (v) {
 			if (test_and_set_bit(CACHE_SET_IO_DISABLE,
 					     &c->flags))
@@ -929,6 +956,7 @@
 STORE(__bch_cache)
 {
 	struct cache *ca = container_of(kobj, struct cache, kobj);
+	ssize_t v;
 
 	if (attr == &sysfs_discard) {
 		bool v = strtoul_or_return(buf);
@@ -943,8 +971,7 @@
 	}
 
 	if (attr == &sysfs_cache_replacement_policy) {
-		ssize_t v = bch_read_string_list(buf, cache_replacement_policies);
-
+		v = __sysfs_match_string(cache_replacement_policies, -1, buf);
 		if (v < 0)
 			return v;
 
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index 74febd5..fc479b0 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -120,41 +120,6 @@
 		return sprintf(buf, "%llu.%i%c", q, t * 10 / 1024, units[u]);
 }
 
-ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[],
-			    size_t selected)
-{
-	char *out = buf;
-	size_t i;
-
-	for (i = 0; list[i]; i++)
-		out += snprintf(out, buf + size - out,
-				i == selected ? "[%s] " : "%s ", list[i]);
-
-	out[-1] = '\n';
-	return out - buf;
-}
-
-ssize_t bch_read_string_list(const char *buf, const char * const list[])
-{
-	size_t i;
-	char *s, *d = kstrndup(buf, PAGE_SIZE - 1, GFP_KERNEL);
-	if (!d)
-		return -ENOMEM;
-
-	s = strim(d);
-
-	for (i = 0; list[i]; i++)
-		if (!strcmp(list[i], s))
-			break;
-
-	kfree(d);
-
-	if (!list[i])
-		return -EINVAL;
-
-	return i;
-}
-
 bool bch_is_zero(const char *p, size_t n)
 {
 	size_t i;
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 2680245..cced87f 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -365,11 +365,6 @@
 bool bch_is_zero(const char *p, size_t n);
 int bch_parse_uuid(const char *s, char *uuid);
 
-ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[],
-			    size_t selected);
-
-ssize_t bch_read_string_list(const char *buf, const char * const list[]);
-
 struct time_stats {
 	spinlock_t	lock;
 	/*
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index 874841f..e794e36 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -19,7 +19,7 @@
 
 struct dm_bio_prison {
 	spinlock_t lock;
-	mempool_t *cell_pool;
+	mempool_t cell_pool;
 	struct rb_root cells;
 };
 
@@ -33,15 +33,16 @@
  */
 struct dm_bio_prison *dm_bio_prison_create(void)
 {
-	struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL);
+	struct dm_bio_prison *prison = kzalloc(sizeof(*prison), GFP_KERNEL);
+	int ret;
 
 	if (!prison)
 		return NULL;
 
 	spin_lock_init(&prison->lock);
 
-	prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache);
-	if (!prison->cell_pool) {
+	ret = mempool_init_slab_pool(&prison->cell_pool, MIN_CELLS, _cell_cache);
+	if (ret) {
 		kfree(prison);
 		return NULL;
 	}
@@ -54,21 +55,21 @@
 
 void dm_bio_prison_destroy(struct dm_bio_prison *prison)
 {
-	mempool_destroy(prison->cell_pool);
+	mempool_exit(&prison->cell_pool);
 	kfree(prison);
 }
 EXPORT_SYMBOL_GPL(dm_bio_prison_destroy);
 
 struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp)
 {
-	return mempool_alloc(prison->cell_pool, gfp);
+	return mempool_alloc(&prison->cell_pool, gfp);
 }
 EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell);
 
 void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
 			     struct dm_bio_prison_cell *cell)
 {
-	mempool_free(cell, prison->cell_pool);
+	mempool_free(cell, &prison->cell_pool);
 }
 EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
 
diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c
index 8ce3a1a..f866bc9 100644
--- a/drivers/md/dm-bio-prison-v2.c
+++ b/drivers/md/dm-bio-prison-v2.c
@@ -21,7 +21,7 @@
 	struct workqueue_struct *wq;
 
 	spinlock_t lock;
-	mempool_t *cell_pool;
+	mempool_t cell_pool;
 	struct rb_root cells;
 };
 
@@ -35,7 +35,8 @@
  */
 struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq)
 {
-	struct dm_bio_prison_v2 *prison = kmalloc(sizeof(*prison), GFP_KERNEL);
+	struct dm_bio_prison_v2 *prison = kzalloc(sizeof(*prison), GFP_KERNEL);
+	int ret;
 
 	if (!prison)
 		return NULL;
@@ -43,8 +44,8 @@
 	prison->wq = wq;
 	spin_lock_init(&prison->lock);
 
-	prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache);
-	if (!prison->cell_pool) {
+	ret = mempool_init_slab_pool(&prison->cell_pool, MIN_CELLS, _cell_cache);
+	if (ret) {
 		kfree(prison);
 		return NULL;
 	}
@@ -57,21 +58,21 @@
 
 void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison)
 {
-	mempool_destroy(prison->cell_pool);
+	mempool_exit(&prison->cell_pool);
 	kfree(prison);
 }
 EXPORT_SYMBOL_GPL(dm_bio_prison_destroy_v2);
 
 struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison, gfp_t gfp)
 {
-	return mempool_alloc(prison->cell_pool, gfp);
+	return mempool_alloc(&prison->cell_pool, gfp);
 }
 EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell_v2);
 
 void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison,
 				struct dm_bio_prison_cell_v2 *cell)
 {
-	mempool_free(cell, prison->cell_pool);
+	mempool_free(cell, &prison->cell_pool);
 }
 EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell_v2);
 
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index da20863..001c712 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -447,9 +447,9 @@
 	struct work_struct migration_worker;
 	struct delayed_work waker;
 	struct dm_bio_prison_v2 *prison;
-	struct bio_set *bs;
+	struct bio_set bs;
 
-	mempool_t *migration_pool;
+	mempool_t migration_pool;
 
 	struct dm_cache_policy *policy;
 	unsigned policy_nr_args;
@@ -550,7 +550,7 @@
 {
 	struct dm_cache_migration *mg;
 
-	mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
+	mg = mempool_alloc(&cache->migration_pool, GFP_NOWAIT);
 	if (!mg)
 		return NULL;
 
@@ -569,7 +569,7 @@
 	if (atomic_dec_and_test(&cache->nr_allocated_migrations))
 		wake_up(&cache->migration_wait);
 
-	mempool_free(mg, cache->migration_pool);
+	mempool_free(mg, &cache->migration_pool);
 }
 
 /*----------------------------------------------------------------*/
@@ -924,7 +924,7 @@
 static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
 				      dm_oblock_t oblock, dm_cblock_t cblock)
 {
-	struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, cache->bs);
+	struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, &cache->bs);
 
 	BUG_ON(!origin_bio);
 
@@ -2011,7 +2011,7 @@
 {
 	unsigned i;
 
-	mempool_destroy(cache->migration_pool);
+	mempool_exit(&cache->migration_pool);
 
 	if (cache->prison)
 		dm_bio_prison_destroy_v2(cache->prison);
@@ -2047,8 +2047,7 @@
 		kfree(cache->ctr_args[i]);
 	kfree(cache->ctr_args);
 
-	if (cache->bs)
-		bioset_free(cache->bs);
+	bioset_exit(&cache->bs);
 
 	kfree(cache);
 }
@@ -2498,8 +2497,8 @@
 	cache->features = ca->features;
 	if (writethrough_mode(cache)) {
 		/* Create bioset for writethrough bios issued to origin */
-		cache->bs = bioset_create(BIO_POOL_SIZE, 0, 0);
-		if (!cache->bs)
+		r = bioset_init(&cache->bs, BIO_POOL_SIZE, 0, 0);
+		if (r)
 			goto bad;
 	}
 
@@ -2630,9 +2629,9 @@
 		goto bad;
 	}
 
-	cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
-							 migration_cache);
-	if (!cache->migration_pool) {
+	r = mempool_init_slab_pool(&cache->migration_pool, MIGRATION_POOL_SIZE,
+				   migration_cache);
+	if (r) {
 		*error = "Error creating cache's migration mempool";
 		goto bad;
 	}
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 3222e21..f21c5d2 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -91,8 +91,8 @@
 	/*
 	 * io objects are allocated from here.
 	 */
-	struct bio_set *io_bs;
-	struct bio_set *bs;
+	struct bio_set io_bs;
+	struct bio_set bs;
 
 	/*
 	 * freeze/thaw support require holding onto a super block
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 44ff473..da02f4d 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -143,14 +143,14 @@
 	 * pool for per bio private data, crypto requests,
 	 * encryption requeusts/buffer pages and integrity tags
 	 */
-	mempool_t *req_pool;
-	mempool_t *page_pool;
-	mempool_t *tag_pool;
+	mempool_t req_pool;
+	mempool_t page_pool;
+	mempool_t tag_pool;
 	unsigned tag_pool_max_sectors;
 
 	struct percpu_counter n_allocated_pages;
 
-	struct bio_set *bs;
+	struct bio_set bs;
 	struct mutex bio_alloc_lock;
 
 	struct workqueue_struct *io_queue;
@@ -1245,7 +1245,7 @@
 	unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);
 
 	if (!ctx->r.req)
-		ctx->r.req = mempool_alloc(cc->req_pool, GFP_NOIO);
+		ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO);
 
 	skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]);
 
@@ -1262,7 +1262,7 @@
 				 struct convert_context *ctx)
 {
 	if (!ctx->r.req_aead)
-		ctx->r.req_aead = mempool_alloc(cc->req_pool, GFP_NOIO);
+		ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO);
 
 	aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]);
 
@@ -1290,7 +1290,7 @@
 	struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size);
 
 	if ((struct skcipher_request *)(io + 1) != req)
-		mempool_free(req, cc->req_pool);
+		mempool_free(req, &cc->req_pool);
 }
 
 static void crypt_free_req_aead(struct crypt_config *cc,
@@ -1299,7 +1299,7 @@
 	struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size);
 
 	if ((struct aead_request *)(io + 1) != req)
-		mempool_free(req, cc->req_pool);
+		mempool_free(req, &cc->req_pool);
 }
 
 static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_bio)
@@ -1409,7 +1409,7 @@
 	if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
 		mutex_lock(&cc->bio_alloc_lock);
 
-	clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
+	clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, &cc->bs);
 	if (!clone)
 		goto out;
 
@@ -1418,7 +1418,7 @@
 	remaining_size = size;
 
 	for (i = 0; i < nr_iovecs; i++) {
-		page = mempool_alloc(cc->page_pool, gfp_mask);
+		page = mempool_alloc(&cc->page_pool, gfp_mask);
 		if (!page) {
 			crypt_free_buffer_pages(cc, clone);
 			bio_put(clone);
@@ -1453,7 +1453,7 @@
 
 	bio_for_each_segment_all(bv, clone, i) {
 		BUG_ON(!bv->bv_page);
-		mempool_free(bv->bv_page, cc->page_pool);
+		mempool_free(bv->bv_page, &cc->page_pool);
 	}
 }
 
@@ -1492,7 +1492,7 @@
 		crypt_free_req(cc, io->ctx.r.req, base_bio);
 
 	if (unlikely(io->integrity_metadata_from_pool))
-		mempool_free(io->integrity_metadata, io->cc->tag_pool);
+		mempool_free(io->integrity_metadata, &io->cc->tag_pool);
 	else
 		kfree(io->integrity_metadata);
 
@@ -1565,7 +1565,7 @@
 	 * biovecs we don't need to worry about the block layer
 	 * modifying the biovec array; so leverage bio_clone_fast().
 	 */
-	clone = bio_clone_fast(io->base_bio, gfp, cc->bs);
+	clone = bio_clone_fast(io->base_bio, gfp, &cc->bs);
 	if (!clone)
 		return 1;
 
@@ -2219,15 +2219,13 @@
 
 	crypt_free_tfms(cc);
 
-	if (cc->bs)
-		bioset_free(cc->bs);
+	bioset_exit(&cc->bs);
 
-	mempool_destroy(cc->page_pool);
-	mempool_destroy(cc->req_pool);
-	mempool_destroy(cc->tag_pool);
+	mempool_exit(&cc->page_pool);
+	mempool_exit(&cc->req_pool);
+	mempool_exit(&cc->tag_pool);
 
-	if (cc->page_pool)
-		WARN_ON(percpu_counter_sum(&cc->n_allocated_pages) != 0);
+	WARN_ON(percpu_counter_sum(&cc->n_allocated_pages) != 0);
 	percpu_counter_destroy(&cc->n_allocated_pages);
 
 	if (cc->iv_gen_ops && cc->iv_gen_ops->dtr)
@@ -2743,8 +2741,6 @@
 		iv_size_padding = align_mask;
 	}
 
-	ret = -ENOMEM;
-
 	/*  ...| IV + padding | original IV | original sec. number | bio tag offset | */
 	additional_req_size = sizeof(struct dm_crypt_request) +
 		iv_size_padding + cc->iv_size +
@@ -2752,8 +2748,8 @@
 		sizeof(uint64_t) +
 		sizeof(unsigned int);
 
-	cc->req_pool = mempool_create_kmalloc_pool(MIN_IOS, cc->dmreq_start + additional_req_size);
-	if (!cc->req_pool) {
+	ret = mempool_init_kmalloc_pool(&cc->req_pool, MIN_IOS, cc->dmreq_start + additional_req_size);
+	if (ret) {
 		ti->error = "Cannot allocate crypt request mempool";
 		goto bad;
 	}
@@ -2762,14 +2758,14 @@
 		ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size,
 		      ARCH_KMALLOC_MINALIGN);
 
-	cc->page_pool = mempool_create(BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc);
-	if (!cc->page_pool) {
+	ret = mempool_init(&cc->page_pool, BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc);
+	if (ret) {
 		ti->error = "Cannot allocate page mempool";
 		goto bad;
 	}
 
-	cc->bs = bioset_create(MIN_IOS, 0, BIOSET_NEED_BVECS);
-	if (!cc->bs) {
+	ret = bioset_init(&cc->bs, MIN_IOS, 0, BIOSET_NEED_BVECS);
+	if (ret) {
 		ti->error = "Cannot allocate crypt bioset";
 		goto bad;
 	}
@@ -2806,11 +2802,10 @@
 		if (!cc->tag_pool_max_sectors)
 			cc->tag_pool_max_sectors = 1;
 
-		cc->tag_pool = mempool_create_kmalloc_pool(MIN_IOS,
+		ret = mempool_init_kmalloc_pool(&cc->tag_pool, MIN_IOS,
 			cc->tag_pool_max_sectors * cc->on_disk_tag_size);
-		if (!cc->tag_pool) {
+		if (ret) {
 			ti->error = "Cannot allocate integrity tags mempool";
-			ret = -ENOMEM;
 			goto bad;
 		}
 
@@ -2903,7 +2898,7 @@
 				GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN)))) {
 			if (bio_sectors(bio) > cc->tag_pool_max_sectors)
 				dm_accept_partial_bio(bio, cc->tag_pool_max_sectors);
-			io->integrity_metadata = mempool_alloc(cc->tag_pool, GFP_NOIO);
+			io->integrity_metadata = mempool_alloc(&cc->tag_pool, GFP_NOIO);
 			io->integrity_metadata_from_pool = true;
 		}
 	}
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 514fb4a..fc68c7a 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -142,7 +142,7 @@
 	unsigned tag_size;
 	__s8 log2_tag_size;
 	sector_t start;
-	mempool_t *journal_io_mempool;
+	mempool_t journal_io_mempool;
 	struct dm_io_client *io;
 	struct dm_bufio_client *bufio;
 	struct workqueue_struct *metadata_wq;
@@ -1817,7 +1817,7 @@
 	struct journal_completion *comp = io->comp;
 	struct dm_integrity_c *ic = comp->ic;
 	remove_range(ic, &io->range);
-	mempool_free(io, ic->journal_io_mempool);
+	mempool_free(io, &ic->journal_io_mempool);
 	if (unlikely(error != 0))
 		dm_integrity_io_error(ic, "copying from journal", -EIO);
 	complete_journal_op(comp);
@@ -1886,7 +1886,7 @@
 			}
 			next_loop = k - 1;
 
-			io = mempool_alloc(ic->journal_io_mempool, GFP_NOIO);
+			io = mempool_alloc(&ic->journal_io_mempool, GFP_NOIO);
 			io->comp = &comp;
 			io->range.logical_sector = sec;
 			io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
@@ -1918,7 +1918,7 @@
 				if (j == k) {
 					remove_range_unlocked(ic, &io->range);
 					spin_unlock_irq(&ic->endio_wait.lock);
-					mempool_free(io, ic->journal_io_mempool);
+					mempool_free(io, &ic->journal_io_mempool);
 					goto skip_io;
 				}
 				for (l = j; l < k; l++) {
@@ -2980,9 +2980,8 @@
 		goto bad;
 	}
 
-	ic->journal_io_mempool = mempool_create_slab_pool(JOURNAL_IO_MEMPOOL, journal_io_cache);
-	if (!ic->journal_io_mempool) {
-		r = -ENOMEM;
+	r = mempool_init_slab_pool(&ic->journal_io_mempool, JOURNAL_IO_MEMPOOL, journal_io_cache);
+	if (r) {
 		ti->error = "Cannot allocate mempool";
 		goto bad;
 	}
@@ -3196,7 +3195,7 @@
 		destroy_workqueue(ic->writer_wq);
 	if (ic->bufio)
 		dm_bufio_client_destroy(ic->bufio);
-	mempool_destroy(ic->journal_io_mempool);
+	mempool_exit(&ic->journal_io_mempool);
 	if (ic->io)
 		dm_io_client_destroy(ic->io);
 	if (ic->dev)
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index a8d914d..81ffc59 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -22,8 +22,8 @@
 #define DM_IO_MAX_REGIONS	BITS_PER_LONG
 
 struct dm_io_client {
-	mempool_t *pool;
-	struct bio_set *bios;
+	mempool_t pool;
+	struct bio_set bios;
 };
 
 /*
@@ -49,32 +49,33 @@
 {
 	struct dm_io_client *client;
 	unsigned min_ios = dm_get_reserved_bio_based_ios();
+	int ret;
 
-	client = kmalloc(sizeof(*client), GFP_KERNEL);
+	client = kzalloc(sizeof(*client), GFP_KERNEL);
 	if (!client)
 		return ERR_PTR(-ENOMEM);
 
-	client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache);
-	if (!client->pool)
+	ret = mempool_init_slab_pool(&client->pool, min_ios, _dm_io_cache);
+	if (ret)
 		goto bad;
 
-	client->bios = bioset_create(min_ios, 0, BIOSET_NEED_BVECS);
-	if (!client->bios)
+	ret = bioset_init(&client->bios, min_ios, 0, BIOSET_NEED_BVECS);
+	if (ret)
 		goto bad;
 
 	return client;
 
    bad:
-	mempool_destroy(client->pool);
+	mempool_exit(&client->pool);
 	kfree(client);
-	return ERR_PTR(-ENOMEM);
+	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL(dm_io_client_create);
 
 void dm_io_client_destroy(struct dm_io_client *client)
 {
-	mempool_destroy(client->pool);
-	bioset_free(client->bios);
+	mempool_exit(&client->pool);
+	bioset_exit(&client->bios);
 	kfree(client);
 }
 EXPORT_SYMBOL(dm_io_client_destroy);
@@ -120,7 +121,7 @@
 		invalidate_kernel_vmap_range(io->vma_invalidate_address,
 					     io->vma_invalidate_size);
 
-	mempool_free(io, io->client->pool);
+	mempool_free(io, &io->client->pool);
 	fn(error_bits, context);
 }
 
@@ -344,7 +345,7 @@
 					  dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
 		}
 
-		bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
+		bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios);
 		bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
 		bio_set_dev(bio, where->bdev);
 		bio->bi_end_io = endio;
@@ -442,7 +443,7 @@
 
 	init_completion(&sio.wait);
 
-	io = mempool_alloc(client->pool, GFP_NOIO);
+	io = mempool_alloc(&client->pool, GFP_NOIO);
 	io->error_bits = 0;
 	atomic_set(&io->count, 1); /* see dispatch_io() */
 	io->client = client;
@@ -474,7 +475,7 @@
 		return -EIO;
 	}
 
-	io = mempool_alloc(client->pool, GFP_NOIO);
+	io = mempool_alloc(&client->pool, GFP_NOIO);
 	io->error_bits = 0;
 	atomic_set(&io->count, 1); /* see dispatch_io() */
 	io->client = client;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index e6e7c68..ce7efc7 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -47,7 +47,7 @@
 	wait_queue_head_t destroyq;
 	atomic_t nr_jobs;
 
-	mempool_t *job_pool;
+	mempool_t job_pool;
 
 	struct workqueue_struct *kcopyd_wq;
 	struct work_struct kcopyd_work;
@@ -479,7 +479,7 @@
 	 */
 	if (job->master_job == job) {
 		mutex_destroy(&job->lock);
-		mempool_free(job, kc->job_pool);
+		mempool_free(job, &kc->job_pool);
 	}
 	fn(read_err, write_err, context);
 
@@ -751,7 +751,7 @@
 	 * Allocate an array of jobs consisting of one master job
 	 * followed by SPLIT_COUNT sub jobs.
 	 */
-	job = mempool_alloc(kc->job_pool, GFP_NOIO);
+	job = mempool_alloc(&kc->job_pool, GFP_NOIO);
 	mutex_init(&job->lock);
 
 	/*
@@ -835,7 +835,7 @@
 {
 	struct kcopyd_job *job;
 
-	job = mempool_alloc(kc->job_pool, GFP_NOIO);
+	job = mempool_alloc(&kc->job_pool, GFP_NOIO);
 
 	memset(job, 0, sizeof(struct kcopyd_job));
 	job->kc = kc;
@@ -879,10 +879,10 @@
  *---------------------------------------------------------------*/
 struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle)
 {
-	int r = -ENOMEM;
+	int r;
 	struct dm_kcopyd_client *kc;
 
-	kc = kmalloc(sizeof(*kc), GFP_KERNEL);
+	kc = kzalloc(sizeof(*kc), GFP_KERNEL);
 	if (!kc)
 		return ERR_PTR(-ENOMEM);
 
@@ -892,14 +892,16 @@
 	INIT_LIST_HEAD(&kc->pages_jobs);
 	kc->throttle = throttle;
 
-	kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
-	if (!kc->job_pool)
+	r = mempool_init_slab_pool(&kc->job_pool, MIN_JOBS, _job_cache);
+	if (r)
 		goto bad_slab;
 
 	INIT_WORK(&kc->kcopyd_work, do_work);
 	kc->kcopyd_wq = alloc_workqueue("kcopyd", WQ_MEM_RECLAIM, 0);
-	if (!kc->kcopyd_wq)
+	if (!kc->kcopyd_wq) {
+		r = -ENOMEM;
 		goto bad_workqueue;
+	}
 
 	kc->pages = NULL;
 	kc->nr_reserved_pages = kc->nr_free_pages = 0;
@@ -923,7 +925,7 @@
 bad_client_pages:
 	destroy_workqueue(kc->kcopyd_wq);
 bad_workqueue:
-	mempool_destroy(kc->job_pool);
+	mempool_exit(&kc->job_pool);
 bad_slab:
 	kfree(kc);
 
@@ -942,7 +944,7 @@
 	destroy_workqueue(kc->kcopyd_wq);
 	dm_io_client_destroy(kc->io_client);
 	client_free_pages(kc);
-	mempool_destroy(kc->job_pool);
+	mempool_exit(&kc->job_pool);
 	kfree(kc);
 }
 EXPORT_SYMBOL(dm_kcopyd_client_destroy);
diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index 53b7b06d..52090be 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -76,7 +76,7 @@
 	 */
 	uint32_t integrated_flush;
 
-	mempool_t *flush_entry_pool;
+	mempool_t flush_entry_pool;
 };
 
 static struct kmem_cache *_flush_entry_cache;
@@ -249,11 +249,10 @@
 		goto out;
 	}
 
-	lc->flush_entry_pool = mempool_create_slab_pool(FLUSH_ENTRY_POOL_SIZE,
-							_flush_entry_cache);
-	if (!lc->flush_entry_pool) {
+	r = mempool_init_slab_pool(&lc->flush_entry_pool, FLUSH_ENTRY_POOL_SIZE,
+				   _flush_entry_cache);
+	if (r) {
 		DMERR("Failed to create flush_entry_pool");
-		r = -ENOMEM;
 		goto out;
 	}
 
@@ -313,7 +312,7 @@
 out:
 	kfree(devices_rdata);
 	if (r) {
-		mempool_destroy(lc->flush_entry_pool);
+		mempool_exit(&lc->flush_entry_pool);
 		kfree(lc);
 		kfree(ctr_str);
 	} else {
@@ -342,7 +341,7 @@
 	if (lc->log_dev)
 		dm_put_device(lc->ti, lc->log_dev);
 
-	mempool_destroy(lc->flush_entry_pool);
+	mempool_exit(&lc->flush_entry_pool);
 
 	kfree(lc->usr_argv_str);
 	kfree(lc);
@@ -570,7 +569,7 @@
 	int mark_list_is_empty;
 	int clear_list_is_empty;
 	struct dm_dirty_log_flush_entry *fe, *tmp_fe;
-	mempool_t *flush_entry_pool = lc->flush_entry_pool;
+	mempool_t *flush_entry_pool = &lc->flush_entry_pool;
 
 	spin_lock_irqsave(&lc->flush_lock, flags);
 	list_splice_init(&lc->mark_list, &mark_list);
@@ -653,7 +652,7 @@
 	struct dm_dirty_log_flush_entry *fe;
 
 	/* Wait for an allocation, but _never_ fail */
-	fe = mempool_alloc(lc->flush_entry_pool, GFP_NOIO);
+	fe = mempool_alloc(&lc->flush_entry_pool, GFP_NOIO);
 	BUG_ON(!fe);
 
 	spin_lock_irqsave(&lc->flush_lock, flags);
@@ -687,7 +686,7 @@
 	 * to cause the region to be resync'ed when the
 	 * device is activated next time.
 	 */
-	fe = mempool_alloc(lc->flush_entry_pool, GFP_ATOMIC);
+	fe = mempool_alloc(&lc->flush_entry_pool, GFP_ATOMIC);
 	if (!fe) {
 		DMERR("Failed to allocate memory to clear region.");
 		return;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 203a041..d94ba6f 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -520,7 +520,8 @@
 
 	bdev = pgpath->path.dev->bdev;
 	q = bdev_get_queue(bdev);
-	clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC);
+	clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE,
+			BLK_MQ_REQ_NOWAIT);
 	if (IS_ERR(clone)) {
 		/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
 		if (blk_queue_dying(q)) {
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 6f823f4..ab13fce 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -756,7 +756,7 @@
 		return ERR_PTR(-EINVAL);
 	}
 
-	rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL);
+	rs = kzalloc(struct_size(rs, dev, raid_devs), GFP_KERNEL);
 	if (!rs) {
 		ti->error = "Cannot allocate raid context";
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 85c32b2..abf3521 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -63,7 +63,7 @@
 
 	/* hash table */
 	rwlock_t hash_lock;
-	mempool_t *region_pool;
+	mempool_t region_pool;
 	unsigned mask;
 	unsigned nr_buckets;
 	unsigned prime;
@@ -169,6 +169,7 @@
 	struct dm_region_hash *rh;
 	unsigned nr_buckets, max_buckets;
 	size_t i;
+	int ret;
 
 	/*
 	 * Calculate a suitable number of buckets for our hash
@@ -179,7 +180,7 @@
 		;
 	nr_buckets >>= 1;
 
-	rh = kmalloc(sizeof(*rh), GFP_KERNEL);
+	rh = kzalloc(sizeof(*rh), GFP_KERNEL);
 	if (!rh) {
 		DMERR("unable to allocate region hash memory");
 		return ERR_PTR(-ENOMEM);
@@ -220,9 +221,9 @@
 	INIT_LIST_HEAD(&rh->failed_recovered_regions);
 	rh->flush_failure = 0;
 
-	rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
-						      sizeof(struct dm_region));
-	if (!rh->region_pool) {
+	ret = mempool_init_kmalloc_pool(&rh->region_pool, MIN_REGIONS,
+					sizeof(struct dm_region));
+	if (ret) {
 		vfree(rh->buckets);
 		kfree(rh);
 		rh = ERR_PTR(-ENOMEM);
@@ -242,14 +243,14 @@
 		list_for_each_entry_safe(reg, nreg, rh->buckets + h,
 					 hash_list) {
 			BUG_ON(atomic_read(&reg->pending));
-			mempool_free(reg, rh->region_pool);
+			mempool_free(reg, &rh->region_pool);
 		}
 	}
 
 	if (rh->log)
 		dm_dirty_log_destroy(rh->log);
 
-	mempool_destroy(rh->region_pool);
+	mempool_exit(&rh->region_pool);
 	vfree(rh->buckets);
 	kfree(rh);
 }
@@ -287,7 +288,7 @@
 {
 	struct dm_region *reg, *nreg;
 
-	nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
+	nreg = mempool_alloc(&rh->region_pool, GFP_ATOMIC);
 	if (unlikely(!nreg))
 		nreg = kmalloc(sizeof(*nreg), GFP_NOIO | __GFP_NOFAIL);
 
@@ -303,7 +304,7 @@
 	reg = __rh_lookup(rh, region);
 	if (reg)
 		/* We lost the race. */
-		mempool_free(nreg, rh->region_pool);
+		mempool_free(nreg, &rh->region_pool);
 	else {
 		__rh_insert(rh, nreg);
 		if (nreg->state == DM_RH_CLEAN) {
@@ -481,17 +482,17 @@
 	list_for_each_entry_safe(reg, next, &recovered, list) {
 		rh->log->type->clear_region(rh->log, reg->key);
 		complete_resync_work(reg, 1);
-		mempool_free(reg, rh->region_pool);
+		mempool_free(reg, &rh->region_pool);
 	}
 
 	list_for_each_entry_safe(reg, next, &failed_recovered, list) {
 		complete_resync_work(reg, errors_handled ? 0 : 1);
-		mempool_free(reg, rh->region_pool);
+		mempool_free(reg, &rh->region_pool);
 	}
 
 	list_for_each_entry_safe(reg, next, &clean, list) {
 		rh->log->type->clear_region(rh->log, reg->key);
-		mempool_free(reg, rh->region_pool);
+		mempool_free(reg, &rh->region_pool);
 	}
 
 	rh->log->type->flush(rh->log);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index bf0b840..6e547b8 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -406,7 +406,7 @@
 	if (blk_queue_io_stat(clone->q))
 		clone->rq_flags |= RQF_IO_STAT;
 
-	clone->start_time = jiffies;
+	clone->start_time_ns = ktime_get_ns();
 	r = blk_insert_cloned_request(clone->q, clone);
 	if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
 		/* must complete clone in terms of original request */
@@ -433,7 +433,7 @@
 {
 	int r;
 
-	r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
+	r = blk_rq_prep_clone(clone, rq, &tio->md->bs, gfp_mask,
 			      dm_rq_bio_constructor, tio);
 	if (r)
 		return r;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 216035b..f745404 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -87,7 +87,7 @@
 	 */
 	struct list_head out_of_order_list;
 
-	mempool_t *pending_pool;
+	mempool_t pending_pool;
 
 	struct dm_exception_table pending;
 	struct dm_exception_table complete;
@@ -682,7 +682,7 @@
 
 static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s)
 {
-	struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool,
+	struct dm_snap_pending_exception *pe = mempool_alloc(&s->pending_pool,
 							     GFP_NOIO);
 
 	atomic_inc(&s->pending_exceptions_count);
@@ -695,7 +695,7 @@
 {
 	struct dm_snapshot *s = pe->snap;
 
-	mempool_free(pe, s->pending_pool);
+	mempool_free(pe, &s->pending_pool);
 	smp_mb__before_atomic();
 	atomic_dec(&s->pending_exceptions_count);
 }
@@ -1120,7 +1120,7 @@
 		origin_mode = FMODE_WRITE;
 	}
 
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
 	if (!s) {
 		ti->error = "Cannot allocate private snapshot structure";
 		r = -ENOMEM;
@@ -1196,10 +1196,9 @@
 		goto bad_kcopyd;
 	}
 
-	s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
-	if (!s->pending_pool) {
+	r = mempool_init_slab_pool(&s->pending_pool, MIN_IOS, pending_cache);
+	if (r) {
 		ti->error = "Could not allocate mempool for pending exceptions";
-		r = -ENOMEM;
 		goto bad_pending_pool;
 	}
 
@@ -1259,7 +1258,7 @@
 	unregister_snapshot(s);
 
 bad_load_and_register:
-	mempool_destroy(s->pending_pool);
+	mempool_exit(&s->pending_pool);
 
 bad_pending_pool:
 	dm_kcopyd_client_destroy(s->kcopyd_client);
@@ -1355,7 +1354,7 @@
 	while (atomic_read(&s->pending_exceptions_count))
 		msleep(1);
 	/*
-	 * Ensure instructions in mempool_destroy aren't reordered
+	 * Ensure instructions in mempool_exit aren't reordered
 	 * before atomic_read.
 	 */
 	smp_mb();
@@ -1367,7 +1366,7 @@
 
 	__free_exceptions(s);
 
-	mempool_destroy(s->pending_pool);
+	mempool_exit(&s->pending_pool);
 
 	dm_exception_store_destroy(s->store);
 
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 0589a4d..caa51dd 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -548,14 +548,14 @@
  * On the other hand, dm-switch needs to process bulk data using messages and
  * excessive use of GFP_NOIO could cause trouble.
  */
-static char **realloc_argv(unsigned *array_size, char **old_argv)
+static char **realloc_argv(unsigned *size, char **old_argv)
 {
 	char **argv;
 	unsigned new_size;
 	gfp_t gfp;
 
-	if (*array_size) {
-		new_size = *array_size * 2;
+	if (*size) {
+		new_size = *size * 2;
 		gfp = GFP_KERNEL;
 	} else {
 		new_size = 8;
@@ -563,8 +563,8 @@
 	}
 	argv = kmalloc(new_size * sizeof(*argv), gfp);
 	if (argv) {
-		memcpy(argv, old_argv, *array_size * sizeof(*argv));
-		*array_size = new_size;
+		memcpy(argv, old_argv, *size * sizeof(*argv));
+		*size = new_size;
 	}
 
 	kfree(old_argv);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index b111074..5772756 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -260,7 +260,7 @@
 	struct dm_deferred_set *all_io_ds;
 
 	struct dm_thin_new_mapping *next_mapping;
-	mempool_t *mapping_pool;
+	mempool_t mapping_pool;
 
 	process_bio_fn process_bio;
 	process_bio_fn process_discard;
@@ -917,7 +917,7 @@
 {
 	cell_error(m->tc->pool, m->cell);
 	list_del(&m->list);
-	mempool_free(m, m->tc->pool->mapping_pool);
+	mempool_free(m, &m->tc->pool->mapping_pool);
 }
 
 static void process_prepared_mapping(struct dm_thin_new_mapping *m)
@@ -961,7 +961,7 @@
 
 out:
 	list_del(&m->list);
-	mempool_free(m, pool->mapping_pool);
+	mempool_free(m, &pool->mapping_pool);
 }
 
 /*----------------------------------------------------------------*/
@@ -971,7 +971,7 @@
 	struct thin_c *tc = m->tc;
 	if (m->cell)
 		cell_defer_no_holder(tc, m->cell);
-	mempool_free(m, tc->pool->mapping_pool);
+	mempool_free(m, &tc->pool->mapping_pool);
 }
 
 static void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
@@ -999,7 +999,7 @@
 		bio_endio(m->bio);
 
 	cell_defer_no_holder(tc, m->cell);
-	mempool_free(m, tc->pool->mapping_pool);
+	mempool_free(m, &tc->pool->mapping_pool);
 }
 
 /*----------------------------------------------------------------*/
@@ -1092,7 +1092,7 @@
 		metadata_operation_failed(pool, "dm_thin_remove_range", r);
 		bio_io_error(m->bio);
 		cell_defer_no_holder(tc, m->cell);
-		mempool_free(m, pool->mapping_pool);
+		mempool_free(m, &pool->mapping_pool);
 		return;
 	}
 
@@ -1105,7 +1105,7 @@
 		metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
 		bio_io_error(m->bio);
 		cell_defer_no_holder(tc, m->cell);
-		mempool_free(m, pool->mapping_pool);
+		mempool_free(m, &pool->mapping_pool);
 		return;
 	}
 
@@ -1150,7 +1150,7 @@
 		bio_endio(m->bio);
 
 	cell_defer_no_holder(tc, m->cell);
-	mempool_free(m, pool->mapping_pool);
+	mempool_free(m, &pool->mapping_pool);
 }
 
 static void process_prepared(struct pool *pool, struct list_head *head,
@@ -1196,7 +1196,7 @@
 	if (pool->next_mapping)
 		return 0;
 
-	pool->next_mapping = mempool_alloc(pool->mapping_pool, GFP_ATOMIC);
+	pool->next_mapping = mempool_alloc(&pool->mapping_pool, GFP_ATOMIC);
 
 	return pool->next_mapping ? 0 : -ENOMEM;
 }
@@ -2835,8 +2835,8 @@
 		destroy_workqueue(pool->wq);
 
 	if (pool->next_mapping)
-		mempool_free(pool->next_mapping, pool->mapping_pool);
-	mempool_destroy(pool->mapping_pool);
+		mempool_free(pool->next_mapping, &pool->mapping_pool);
+	mempool_exit(&pool->mapping_pool);
 	dm_deferred_set_destroy(pool->shared_read_ds);
 	dm_deferred_set_destroy(pool->all_io_ds);
 	kfree(pool);
@@ -2861,7 +2861,7 @@
 		return (struct pool *)pmd;
 	}
 
-	pool = kmalloc(sizeof(*pool), GFP_KERNEL);
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
 	if (!pool) {
 		*error = "Error allocating memory for pool";
 		err_p = ERR_PTR(-ENOMEM);
@@ -2931,11 +2931,11 @@
 	}
 
 	pool->next_mapping = NULL;
-	pool->mapping_pool = mempool_create_slab_pool(MAPPING_POOL_SIZE,
-						      _new_mapping_cache);
-	if (!pool->mapping_pool) {
+	r = mempool_init_slab_pool(&pool->mapping_pool, MAPPING_POOL_SIZE,
+				   _new_mapping_cache);
+	if (r) {
 		*error = "Error creating pool's mapping mempool";
-		err_p = ERR_PTR(-ENOMEM);
+		err_p = ERR_PTR(r);
 		goto bad_mapping_pool;
 	}
 
@@ -2955,7 +2955,7 @@
 	return pool;
 
 bad_sort_array:
-	mempool_destroy(pool->mapping_pool);
+	mempool_exit(&pool->mapping_pool);
 bad_mapping_pool:
 	dm_deferred_set_destroy(pool->all_io_ds);
 bad_all_io_ds:
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index e13f908..684af08 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -309,13 +309,13 @@
 	unsigned n;
 
 	if (!fio->rs)
-		fio->rs = mempool_alloc(v->fec->rs_pool, GFP_NOIO);
+		fio->rs = mempool_alloc(&v->fec->rs_pool, GFP_NOIO);
 
 	fec_for_each_prealloc_buffer(n) {
 		if (fio->bufs[n])
 			continue;
 
-		fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOWAIT);
+		fio->bufs[n] = mempool_alloc(&v->fec->prealloc_pool, GFP_NOWAIT);
 		if (unlikely(!fio->bufs[n])) {
 			DMERR("failed to allocate FEC buffer");
 			return -ENOMEM;
@@ -327,7 +327,7 @@
 		if (fio->bufs[n])
 			continue;
 
-		fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOWAIT);
+		fio->bufs[n] = mempool_alloc(&v->fec->extra_pool, GFP_NOWAIT);
 		/* we can manage with even one buffer if necessary */
 		if (unlikely(!fio->bufs[n]))
 			break;
@@ -335,7 +335,7 @@
 	fio->nbufs = n;
 
 	if (!fio->output)
-		fio->output = mempool_alloc(v->fec->output_pool, GFP_NOIO);
+		fio->output = mempool_alloc(&v->fec->output_pool, GFP_NOIO);
 
 	return 0;
 }
@@ -493,15 +493,15 @@
 	if (!verity_fec_is_enabled(io->v))
 		return;
 
-	mempool_free(fio->rs, f->rs_pool);
+	mempool_free(fio->rs, &f->rs_pool);
 
 	fec_for_each_prealloc_buffer(n)
-		mempool_free(fio->bufs[n], f->prealloc_pool);
+		mempool_free(fio->bufs[n], &f->prealloc_pool);
 
 	fec_for_each_extra_buffer(fio, n)
-		mempool_free(fio->bufs[n], f->extra_pool);
+		mempool_free(fio->bufs[n], &f->extra_pool);
 
-	mempool_free(fio->output, f->output_pool);
+	mempool_free(fio->output, &f->output_pool);
 }
 
 /*
@@ -549,9 +549,9 @@
 	if (!verity_fec_is_enabled(v))
 		goto out;
 
-	mempool_destroy(f->rs_pool);
-	mempool_destroy(f->prealloc_pool);
-	mempool_destroy(f->extra_pool);
+	mempool_exit(&f->rs_pool);
+	mempool_exit(&f->prealloc_pool);
+	mempool_exit(&f->extra_pool);
 	kmem_cache_destroy(f->cache);
 
 	if (f->data_bufio)
@@ -570,7 +570,7 @@
 {
 	struct dm_verity *v = (struct dm_verity *)pool_data;
 
-	return init_rs(8, 0x11d, 0, 1, v->fec->roots);
+	return init_rs_gfp(8, 0x11d, 0, 1, v->fec->roots, gfp_mask);
 }
 
 static void fec_rs_free(void *element, void *pool_data)
@@ -675,6 +675,7 @@
 	struct dm_verity_fec *f = v->fec;
 	struct dm_target *ti = v->ti;
 	u64 hash_blocks;
+	int ret;
 
 	if (!verity_fec_is_enabled(v)) {
 		verity_fec_dtr(v);
@@ -770,11 +771,11 @@
 	}
 
 	/* Preallocate an rs_control structure for each worker thread */
-	f->rs_pool = mempool_create(num_online_cpus(), fec_rs_alloc,
-				    fec_rs_free, (void *) v);
-	if (!f->rs_pool) {
+	ret = mempool_init(&f->rs_pool, num_online_cpus(), fec_rs_alloc,
+			   fec_rs_free, (void *) v);
+	if (ret) {
 		ti->error = "Cannot allocate RS pool";
-		return -ENOMEM;
+		return ret;
 	}
 
 	f->cache = kmem_cache_create("dm_verity_fec_buffers",
@@ -786,26 +787,26 @@
 	}
 
 	/* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */
-	f->prealloc_pool = mempool_create_slab_pool(num_online_cpus() *
-						    DM_VERITY_FEC_BUF_PREALLOC,
-						    f->cache);
-	if (!f->prealloc_pool) {
+	ret = mempool_init_slab_pool(&f->prealloc_pool, num_online_cpus() *
+				     DM_VERITY_FEC_BUF_PREALLOC,
+				     f->cache);
+	if (ret) {
 		ti->error = "Cannot allocate FEC buffer prealloc pool";
-		return -ENOMEM;
+		return ret;
 	}
 
-	f->extra_pool = mempool_create_slab_pool(0, f->cache);
-	if (!f->extra_pool) {
+	ret = mempool_init_slab_pool(&f->extra_pool, 0, f->cache);
+	if (ret) {
 		ti->error = "Cannot allocate FEC buffer extra pool";
-		return -ENOMEM;
+		return ret;
 	}
 
 	/* Preallocate an output buffer for each thread */
-	f->output_pool = mempool_create_kmalloc_pool(num_online_cpus(),
-						     1 << v->data_dev_block_bits);
-	if (!f->output_pool) {
+	ret = mempool_init_kmalloc_pool(&f->output_pool, num_online_cpus(),
+					1 << v->data_dev_block_bits);
+	if (ret) {
 		ti->error = "Cannot allocate FEC output pool";
-		return -ENOMEM;
+		return ret;
 	}
 
 	/* Reserve space for our per-bio data */
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h
index bb31ce8..6ad803b 100644
--- a/drivers/md/dm-verity-fec.h
+++ b/drivers/md/dm-verity-fec.h
@@ -46,10 +46,10 @@
 	sector_t hash_blocks;	/* blocks covered after v->hash_start */
 	unsigned char roots;	/* number of parity bytes, M-N of RS(M, N) */
 	unsigned char rsn;	/* N of RS(M, N) */
-	mempool_t *rs_pool;	/* mempool for fio->rs */
-	mempool_t *prealloc_pool;	/* mempool for preallocated buffers */
-	mempool_t *extra_pool;	/* mempool for extra buffers */
-	mempool_t *output_pool;	/* mempool for output */
+	mempool_t rs_pool;	/* mempool for fio->rs */
+	mempool_t prealloc_pool;	/* mempool for preallocated buffers */
+	mempool_t extra_pool;	/* mempool for extra buffers */
+	mempool_t output_pool;	/* mempool for output */
 	struct kmem_cache *cache;	/* cache for buffers */
 };
 
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index e73b077..30602d1 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -57,7 +57,7 @@
 	struct workqueue_struct *chunk_wq;
 
 	/* For cloned BIOs to zones */
-	struct bio_set		*bio_set;
+	struct bio_set		bio_set;
 
 	/* For flush */
 	spinlock_t		flush_lock;
@@ -121,7 +121,7 @@
 	}
 
 	/* Partial BIO: we need to clone the BIO */
-	clone = bio_clone_fast(bio, GFP_NOIO, dmz->bio_set);
+	clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
 	if (!clone)
 		return -ENOMEM;
 
@@ -779,10 +779,9 @@
 	ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) << dev->zone_nr_sectors_shift;
 
 	/* Zone BIO */
-	dmz->bio_set = bioset_create(DMZ_MIN_BIOS, 0, 0);
-	if (!dmz->bio_set) {
+	ret = bioset_init(&dmz->bio_set, DMZ_MIN_BIOS, 0, 0);
+	if (ret) {
 		ti->error = "Create BIO set failed";
-		ret = -ENOMEM;
 		goto err_meta;
 	}
 
@@ -828,7 +827,7 @@
 	destroy_workqueue(dmz->chunk_wq);
 err_bio:
 	mutex_destroy(&dmz->chunk_lock);
-	bioset_free(dmz->bio_set);
+	bioset_exit(&dmz->bio_set);
 err_meta:
 	dmz_dtr_metadata(dmz->metadata);
 err_dev:
@@ -858,7 +857,7 @@
 
 	dmz_dtr_metadata(dmz->metadata);
 
-	bioset_free(dmz->bio_set);
+	bioset_exit(&dmz->bio_set);
 
 	dmz_put_zoned_device(ti);
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 0a7b010..98dff36 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -148,8 +148,8 @@
  * For mempools pre-allocation at the table loading time.
  */
 struct dm_md_mempools {
-	struct bio_set *bs;
-	struct bio_set *io_bs;
+	struct bio_set bs;
+	struct bio_set io_bs;
 };
 
 struct table_device {
@@ -537,7 +537,7 @@
 	struct dm_target_io *tio;
 	struct bio *clone;
 
-	clone = bio_alloc_bioset(GFP_NOIO, 0, md->io_bs);
+	clone = bio_alloc_bioset(GFP_NOIO, 0, &md->io_bs);
 	if (!clone)
 		return NULL;
 
@@ -572,7 +572,7 @@
 		/* the dm_target_io embedded in ci->io is available */
 		tio = &ci->io->tio;
 	} else {
-		struct bio *clone = bio_alloc_bioset(gfp_mask, 0, ci->io->md->bs);
+		struct bio *clone = bio_alloc_bioset(gfp_mask, 0, &ci->io->md->bs);
 		if (!clone)
 			return NULL;
 
@@ -1583,7 +1583,7 @@
 				 * won't be affected by this reassignment.
 				 */
 				struct bio *b = bio_clone_bioset(bio, GFP_NOIO,
-								 md->queue->bio_split);
+								 &md->queue->bio_split);
 				ci.io->orig_bio = b;
 				bio_advance(bio, (bio_sectors(bio) - ci.sector_count) << 9);
 				bio_chain(b, bio);
@@ -1785,10 +1785,8 @@
 		destroy_workqueue(md->wq);
 	if (md->kworker_task)
 		kthread_stop(md->kworker_task);
-	if (md->bs)
-		bioset_free(md->bs);
-	if (md->io_bs)
-		bioset_free(md->io_bs);
+	bioset_exit(&md->bs);
+	bioset_exit(&md->io_bs);
 
 	if (md->dax_dev) {
 		kill_dax(md->dax_dev);
@@ -1965,16 +1963,10 @@
 		 * If so, reload bioset because front_pad may have changed
 		 * because a different table was loaded.
 		 */
-		if (md->bs) {
-			bioset_free(md->bs);
-			md->bs = NULL;
-		}
-		if (md->io_bs) {
-			bioset_free(md->io_bs);
-			md->io_bs = NULL;
-		}
+		bioset_exit(&md->bs);
+		bioset_exit(&md->io_bs);
 
-	} else if (md->bs) {
+	} else if (bioset_initialized(&md->bs)) {
 		/*
 		 * There's no need to reload with request-based dm
 		 * because the size of front_pad doesn't change.
@@ -1986,12 +1978,14 @@
 		goto out;
 	}
 
-	BUG_ON(!p || md->bs || md->io_bs);
+	BUG_ON(!p ||
+	       bioset_initialized(&md->bs) ||
+	       bioset_initialized(&md->io_bs));
 
 	md->bs = p->bs;
-	p->bs = NULL;
+	memset(&p->bs, 0, sizeof(p->bs));
 	md->io_bs = p->io_bs;
-	p->io_bs = NULL;
+	memset(&p->io_bs, 0, sizeof(p->io_bs));
 out:
 	/* mempool bind completed, no longer need any mempools in the table */
 	dm_table_free_md_mempools(t);
@@ -2905,6 +2899,7 @@
 	struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
 	unsigned int pool_size = 0;
 	unsigned int front_pad, io_front_pad;
+	int ret;
 
 	if (!pools)
 		return NULL;
@@ -2916,10 +2911,10 @@
 		pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
 		front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
 		io_front_pad = roundup(front_pad,  __alignof__(struct dm_io)) + offsetof(struct dm_io, tio);
-		pools->io_bs = bioset_create(pool_size, io_front_pad, 0);
-		if (!pools->io_bs)
+		ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, 0);
+		if (ret)
 			goto out;
-		if (integrity && bioset_integrity_create(pools->io_bs, pool_size))
+		if (integrity && bioset_integrity_create(&pools->io_bs, pool_size))
 			goto out;
 		break;
 	case DM_TYPE_REQUEST_BASED:
@@ -2932,11 +2927,11 @@
 		BUG();
 	}
 
-	pools->bs = bioset_create(pool_size, front_pad, 0);
-	if (!pools->bs)
+	ret = bioset_init(&pools->bs, pool_size, front_pad, 0);
+	if (ret)
 		goto out;
 
-	if (integrity && bioset_integrity_create(pools->bs, pool_size))
+	if (integrity && bioset_integrity_create(&pools->bs, pool_size))
 		goto out;
 
 	return pools;
@@ -2952,10 +2947,8 @@
 	if (!pools)
 		return;
 
-	if (pools->bs)
-		bioset_free(pools->bs);
-	if (pools->io_bs)
-		bioset_free(pools->io_bs);
+	bioset_exit(&pools->bs);
+	bioset_exit(&pools->io_bs);
 
 	kfree(pools);
 }
diff --git a/drivers/md/md-faulty.c b/drivers/md/md-faulty.c
index 38264b3..c2fdf89 100644
--- a/drivers/md/md-faulty.c
+++ b/drivers/md/md-faulty.c
@@ -214,7 +214,7 @@
 		}
 	}
 	if (failit) {
-		struct bio *b = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
+		struct bio *b = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
 
 		bio_set_dev(b, conf->rdev->bdev);
 		b->bi_private = bio;
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 4964323..d45c697 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -269,7 +269,7 @@
 	if (unlikely(bio_end_sector(bio) > end_sector)) {
 		/* This bio crosses a device boundary, so we have to split it */
 		struct bio *split = bio_split(bio, end_sector - bio_sector,
-					      GFP_NOIO, mddev->bio_set);
+					      GFP_NOIO, &mddev->bio_set);
 		bio_chain(split, bio);
 		generic_make_request(bio);
 		bio = split;
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 0a7e99d..f71fcdb 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -80,7 +80,7 @@
 
 	bio->bi_status = status;
 	bio_endio(bio);
-	mempool_free(mp_bh, conf->pool);
+	mempool_free(mp_bh, &conf->pool);
 }
 
 static void multipath_end_request(struct bio *bio)
@@ -117,7 +117,7 @@
 		return true;
 	}
 
-	mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
+	mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
 
 	mp_bh->master_bio = bio;
 	mp_bh->mddev = mddev;
@@ -125,7 +125,7 @@
 	mp_bh->path = multipath_map(conf);
 	if (mp_bh->path < 0) {
 		bio_io_error(bio);
-		mempool_free(mp_bh, conf->pool);
+		mempool_free(mp_bh, &conf->pool);
 		return true;
 	}
 	multipath = conf->multipaths + mp_bh->path;
@@ -378,6 +378,7 @@
 	struct multipath_info *disk;
 	struct md_rdev *rdev;
 	int working_disks;
+	int ret;
 
 	if (md_check_no_bitmap(mddev))
 		return -EINVAL;
@@ -431,9 +432,9 @@
 	}
 	mddev->degraded = conf->raid_disks - working_disks;
 
-	conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS,
-						 sizeof(struct multipath_bh));
-	if (conf->pool == NULL)
+	ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS,
+					sizeof(struct multipath_bh));
+	if (ret)
 		goto out_free_conf;
 
 	mddev->thread = md_register_thread(multipathd, mddev,
@@ -455,7 +456,7 @@
 	return 0;
 
 out_free_conf:
-	mempool_destroy(conf->pool);
+	mempool_exit(&conf->pool);
 	kfree(conf->multipaths);
 	kfree(conf);
 	mddev->private = NULL;
@@ -467,7 +468,7 @@
 {
 	struct mpconf *conf = priv;
 
-	mempool_destroy(conf->pool);
+	mempool_exit(&conf->pool);
 	kfree(conf->multipaths);
 	kfree(conf);
 }
diff --git a/drivers/md/md-multipath.h b/drivers/md/md-multipath.h
index 0adb941..b3099e5 100644
--- a/drivers/md/md-multipath.h
+++ b/drivers/md/md-multipath.h
@@ -13,7 +13,7 @@
 	spinlock_t		device_lock;
 	struct list_head	retry_list;
 
-	mempool_t		*pool;
+	mempool_t		pool;
 };
 
 /*
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c208c01..fc692b7 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -193,10 +193,10 @@
 {
 	struct bio *b;
 
-	if (!mddev || !mddev->bio_set)
+	if (!mddev || !bioset_initialized(&mddev->bio_set))
 		return bio_alloc(gfp_mask, nr_iovecs);
 
-	b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
+	b = bio_alloc_bioset(gfp_mask, nr_iovecs, &mddev->bio_set);
 	if (!b)
 		return NULL;
 	return b;
@@ -205,10 +205,10 @@
 
 static struct bio *md_bio_alloc_sync(struct mddev *mddev)
 {
-	if (!mddev || !mddev->sync_set)
+	if (!mddev || !bioset_initialized(&mddev->sync_set))
 		return bio_alloc(GFP_NOIO, 1);
 
-	return bio_alloc_bioset(GFP_NOIO, 1, mddev->sync_set);
+	return bio_alloc_bioset(GFP_NOIO, 1, &mddev->sync_set);
 }
 
 /*
@@ -510,7 +510,10 @@
 
 static void mddev_put(struct mddev *mddev)
 {
-	struct bio_set *bs = NULL, *sync_bs = NULL;
+	struct bio_set bs, sync_bs;
+
+	memset(&bs, 0, sizeof(bs));
+	memset(&sync_bs, 0, sizeof(sync_bs));
 
 	if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
 		return;
@@ -521,8 +524,8 @@
 		list_del_init(&mddev->all_mddevs);
 		bs = mddev->bio_set;
 		sync_bs = mddev->sync_set;
-		mddev->bio_set = NULL;
-		mddev->sync_set = NULL;
+		memset(&mddev->bio_set, 0, sizeof(mddev->bio_set));
+		memset(&mddev->sync_set, 0, sizeof(mddev->sync_set));
 		if (mddev->gendisk) {
 			/* We did a probe so need to clean up.  Call
 			 * queue_work inside the spinlock so that
@@ -535,10 +538,8 @@
 			kfree(mddev);
 	}
 	spin_unlock(&all_mddevs_lock);
-	if (bs)
-		bioset_free(bs);
-	if (sync_bs)
-		bioset_free(sync_bs);
+	bioset_exit(&bs);
+	bioset_exit(&sync_bs);
 }
 
 static void md_safemode_timeout(struct timer_list *t);
@@ -2123,7 +2124,7 @@
 			       bdev_get_integrity(reference->bdev));
 
 	pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
-	if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
+	if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE)) {
 		pr_err("md: failed to create integrity pool for %s\n",
 		       mdname(mddev));
 		return -EINVAL;
@@ -5497,17 +5498,15 @@
 		sysfs_notify_dirent_safe(rdev->sysfs_state);
 	}
 
-	if (mddev->bio_set == NULL) {
-		mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-		if (!mddev->bio_set)
-			return -ENOMEM;
+	if (!bioset_initialized(&mddev->bio_set)) {
+		err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+		if (err)
+			return err;
 	}
-	if (mddev->sync_set == NULL) {
-		mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-		if (!mddev->sync_set) {
-			err = -ENOMEM;
+	if (!bioset_initialized(&mddev->sync_set)) {
+		err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+		if (err)
 			goto abort;
-		}
 	}
 
 	spin_lock(&pers_lock);
@@ -5668,14 +5667,8 @@
 	return 0;
 
 abort:
-	if (mddev->bio_set) {
-		bioset_free(mddev->bio_set);
-		mddev->bio_set = NULL;
-	}
-	if (mddev->sync_set) {
-		bioset_free(mddev->sync_set);
-		mddev->sync_set = NULL;
-	}
+	bioset_exit(&mddev->bio_set);
+	bioset_exit(&mddev->sync_set);
 
 	return err;
 }
@@ -5888,14 +5881,8 @@
 	 * This is called from dm-raid
 	 */
 	__md_stop(mddev);
-	if (mddev->bio_set) {
-		bioset_free(mddev->bio_set);
-		mddev->bio_set = NULL;
-	}
-	if (mddev->sync_set) {
-		bioset_free(mddev->sync_set);
-		mddev->sync_set = NULL;
-	}
+	bioset_exit(&mddev->bio_set);
+	bioset_exit(&mddev->sync_set);
 }
 
 EXPORT_SYMBOL_GPL(md_stop);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fbc925c..3507cab2 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -452,8 +452,8 @@
 
 	struct attribute_group		*to_remove;
 
-	struct bio_set			*bio_set;
-	struct bio_set			*sync_set; /* for sync operations like
+	struct bio_set			bio_set;
+	struct bio_set			sync_set; /* for sync operations like
 						   * metadata and bitmap writes
 						   */
 
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 584c103..65ae47a 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -479,7 +479,7 @@
 	if (bio_end_sector(bio) > zone->zone_end) {
 		struct bio *split = bio_split(bio,
 			zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
-			mddev->bio_set);
+			&mddev->bio_set);
 		bio_chain(split, bio);
 		generic_make_request(bio);
 		bio = split;
@@ -582,7 +582,8 @@
 	sector = bio_sector;
 
 	if (sectors < bio_sectors(bio)) {
-		struct bio *split = bio_split(bio, sectors, GFP_NOIO, mddev->bio_set);
+		struct bio *split = bio_split(bio, sectors, GFP_NOIO,
+					      &mddev->bio_set);
 		bio_chain(split, bio);
 		generic_make_request(bio);
 		bio = split;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e9e3308..bad2852 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -221,7 +221,7 @@
 	struct r1conf *conf = r1_bio->mddev->private;
 
 	put_all_bios(conf, r1_bio);
-	mempool_free(r1_bio, conf->r1bio_pool);
+	mempool_free(r1_bio, &conf->r1bio_pool);
 }
 
 static void put_buf(struct r1bio *r1_bio)
@@ -236,7 +236,7 @@
 			rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
 	}
 
-	mempool_free(r1_bio, conf->r1buf_pool);
+	mempool_free(r1_bio, &conf->r1buf_pool);
 
 	lower_barrier(conf, sect);
 }
@@ -1178,7 +1178,7 @@
 	struct r1conf *conf = mddev->private;
 	struct r1bio *r1_bio;
 
-	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
+	r1_bio = mempool_alloc(&conf->r1bio_pool, GFP_NOIO);
 	/* Ensure no bio records IO_BLOCKED */
 	memset(r1_bio->bios, 0, conf->raid_disks * sizeof(r1_bio->bios[0]));
 	init_r1bio(r1_bio, mddev, bio);
@@ -1268,7 +1268,7 @@
 
 	if (max_sectors < bio_sectors(bio)) {
 		struct bio *split = bio_split(bio, max_sectors,
-					      gfp, conf->bio_split);
+					      gfp, &conf->bio_split);
 		bio_chain(split, bio);
 		generic_make_request(bio);
 		bio = split;
@@ -1278,7 +1278,7 @@
 
 	r1_bio->read_disk = rdisk;
 
-	read_bio = bio_clone_fast(bio, gfp, mddev->bio_set);
+	read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
 
 	r1_bio->bios[rdisk] = read_bio;
 
@@ -1439,7 +1439,7 @@
 
 	if (max_sectors < bio_sectors(bio)) {
 		struct bio *split = bio_split(bio, max_sectors,
-					      GFP_NOIO, conf->bio_split);
+					      GFP_NOIO, &conf->bio_split);
 		bio_chain(split, bio);
 		generic_make_request(bio);
 		bio = split;
@@ -1479,9 +1479,9 @@
 
 		if (r1_bio->behind_master_bio)
 			mbio = bio_clone_fast(r1_bio->behind_master_bio,
-					      GFP_NOIO, mddev->bio_set);
+					      GFP_NOIO, &mddev->bio_set);
 		else
-			mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
+			mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
 
 		if (r1_bio->behind_master_bio) {
 			if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
@@ -1657,8 +1657,7 @@
 		_allow_barrier(conf, idx);
 	}
 
-	mempool_destroy(conf->r1buf_pool);
-	conf->r1buf_pool = NULL;
+	mempool_exit(&conf->r1buf_pool);
 }
 
 static int raid1_spare_active(struct mddev *mddev)
@@ -2348,10 +2347,10 @@
 		if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
 			wbio = bio_clone_fast(r1_bio->behind_master_bio,
 					      GFP_NOIO,
-					      mddev->bio_set);
+					      &mddev->bio_set);
 		} else {
 			wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO,
-					      mddev->bio_set);
+					      &mddev->bio_set);
 		}
 
 		bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
@@ -2564,17 +2563,15 @@
 	int buffs;
 
 	buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
-	BUG_ON(conf->r1buf_pool);
-	conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free,
-					  conf->poolinfo);
-	if (!conf->r1buf_pool)
-		return -ENOMEM;
-	return 0;
+	BUG_ON(mempool_initialized(&conf->r1buf_pool));
+
+	return mempool_init(&conf->r1buf_pool, buffs, r1buf_pool_alloc,
+			    r1buf_pool_free, conf->poolinfo);
 }
 
 static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf)
 {
-	struct r1bio *r1bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
+	struct r1bio *r1bio = mempool_alloc(&conf->r1buf_pool, GFP_NOIO);
 	struct resync_pages *rps;
 	struct bio *bio;
 	int i;
@@ -2617,7 +2614,7 @@
 	int idx = sector_to_idx(sector_nr);
 	int page_idx = 0;
 
-	if (!conf->r1buf_pool)
+	if (!mempool_initialized(&conf->r1buf_pool))
 		if (init_resync(conf))
 			return 0;
 
@@ -2953,14 +2950,13 @@
 	if (!conf->poolinfo)
 		goto abort;
 	conf->poolinfo->raid_disks = mddev->raid_disks * 2;
-	conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
-					  r1bio_pool_free,
-					  conf->poolinfo);
-	if (!conf->r1bio_pool)
+	err = mempool_init(&conf->r1bio_pool, NR_RAID1_BIOS, r1bio_pool_alloc,
+			   r1bio_pool_free, conf->poolinfo);
+	if (err)
 		goto abort;
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
-	if (!conf->bio_split)
+	err = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0);
+	if (err)
 		goto abort;
 
 	conf->poolinfo->mddev = mddev;
@@ -3033,7 +3029,7 @@
 
  abort:
 	if (conf) {
-		mempool_destroy(conf->r1bio_pool);
+		mempool_exit(&conf->r1bio_pool);
 		kfree(conf->mirrors);
 		safe_put_page(conf->tmppage);
 		kfree(conf->poolinfo);
@@ -3041,8 +3037,7 @@
 		kfree(conf->nr_waiting);
 		kfree(conf->nr_queued);
 		kfree(conf->barrier);
-		if (conf->bio_split)
-			bioset_free(conf->bio_split);
+		bioset_exit(&conf->bio_split);
 		kfree(conf);
 	}
 	return ERR_PTR(err);
@@ -3144,7 +3139,7 @@
 {
 	struct r1conf *conf = priv;
 
-	mempool_destroy(conf->r1bio_pool);
+	mempool_exit(&conf->r1bio_pool);
 	kfree(conf->mirrors);
 	safe_put_page(conf->tmppage);
 	kfree(conf->poolinfo);
@@ -3152,8 +3147,7 @@
 	kfree(conf->nr_waiting);
 	kfree(conf->nr_queued);
 	kfree(conf->barrier);
-	if (conf->bio_split)
-		bioset_free(conf->bio_split);
+	bioset_exit(&conf->bio_split);
 	kfree(conf);
 }
 
@@ -3199,13 +3193,17 @@
 	 * At the same time, we "pack" the devices so that all the missing
 	 * devices have the higher raid_disk numbers.
 	 */
-	mempool_t *newpool, *oldpool;
+	mempool_t newpool, oldpool;
 	struct pool_info *newpoolinfo;
 	struct raid1_info *newmirrors;
 	struct r1conf *conf = mddev->private;
 	int cnt, raid_disks;
 	unsigned long flags;
 	int d, d2;
+	int ret;
+
+	memset(&newpool, 0, sizeof(newpool));
+	memset(&oldpool, 0, sizeof(oldpool));
 
 	/* Cannot change chunk_size, layout, or level */
 	if (mddev->chunk_sectors != mddev->new_chunk_sectors ||
@@ -3237,17 +3235,17 @@
 	newpoolinfo->mddev = mddev;
 	newpoolinfo->raid_disks = raid_disks * 2;
 
-	newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
-				 r1bio_pool_free, newpoolinfo);
-	if (!newpool) {
+	ret = mempool_init(&newpool, NR_RAID1_BIOS, r1bio_pool_alloc,
+			   r1bio_pool_free, newpoolinfo);
+	if (ret) {
 		kfree(newpoolinfo);
-		return -ENOMEM;
+		return ret;
 	}
 	newmirrors = kzalloc(sizeof(struct raid1_info) * raid_disks * 2,
 			     GFP_KERNEL);
 	if (!newmirrors) {
 		kfree(newpoolinfo);
-		mempool_destroy(newpool);
+		mempool_exit(&newpool);
 		return -ENOMEM;
 	}
 
@@ -3287,7 +3285,7 @@
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
 
-	mempool_destroy(oldpool);
+	mempool_exit(&oldpool);
 	return 0;
 }
 
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index eb84bc6..e7ccad89 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -118,10 +118,10 @@
 	 * mempools - it changes when the array grows or shrinks
 	 */
 	struct pool_info	*poolinfo;
-	mempool_t		*r1bio_pool;
-	mempool_t		*r1buf_pool;
+	mempool_t		r1bio_pool;
+	mempool_t		r1buf_pool;
 
-	struct bio_set		*bio_split;
+	struct bio_set		bio_split;
 
 	/* temporary buffer to synchronous IO when attempting to repair
 	 * a read error.
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3c60774..37d4b23 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -291,14 +291,14 @@
 	struct r10conf *conf = r10_bio->mddev->private;
 
 	put_all_bios(conf, r10_bio);
-	mempool_free(r10_bio, conf->r10bio_pool);
+	mempool_free(r10_bio, &conf->r10bio_pool);
 }
 
 static void put_buf(struct r10bio *r10_bio)
 {
 	struct r10conf *conf = r10_bio->mddev->private;
 
-	mempool_free(r10_bio, conf->r10buf_pool);
+	mempool_free(r10_bio, &conf->r10buf_pool);
 
 	lower_barrier(conf);
 }
@@ -1204,7 +1204,7 @@
 				   (unsigned long long)r10_bio->sector);
 	if (max_sectors < bio_sectors(bio)) {
 		struct bio *split = bio_split(bio, max_sectors,
-					      gfp, conf->bio_split);
+					      gfp, &conf->bio_split);
 		bio_chain(split, bio);
 		generic_make_request(bio);
 		bio = split;
@@ -1213,7 +1213,7 @@
 	}
 	slot = r10_bio->read_slot;
 
-	read_bio = bio_clone_fast(bio, gfp, mddev->bio_set);
+	read_bio = bio_clone_fast(bio, gfp, &mddev->bio_set);
 
 	r10_bio->devs[slot].bio = read_bio;
 	r10_bio->devs[slot].rdev = rdev;
@@ -1261,7 +1261,7 @@
 	} else
 		rdev = conf->mirrors[devnum].rdev;
 
-	mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
+	mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
 	if (replacement)
 		r10_bio->devs[n_copy].repl_bio = mbio;
 	else
@@ -1509,7 +1509,7 @@
 
 	if (r10_bio->sectors < bio_sectors(bio)) {
 		struct bio *split = bio_split(bio, r10_bio->sectors,
-					      GFP_NOIO, conf->bio_split);
+					      GFP_NOIO, &conf->bio_split);
 		bio_chain(split, bio);
 		generic_make_request(bio);
 		bio = split;
@@ -1533,7 +1533,7 @@
 	struct r10conf *conf = mddev->private;
 	struct r10bio *r10_bio;
 
-	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
+	r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
 
 	r10_bio->master_bio = bio;
 	r10_bio->sectors = sectors;
@@ -1732,8 +1732,7 @@
 	wait_barrier(conf);
 	allow_barrier(conf);
 
-	mempool_destroy(conf->r10buf_pool);
-	conf->r10buf_pool = NULL;
+	mempool_exit(&conf->r10buf_pool);
 }
 
 static int raid10_spare_active(struct mddev *mddev)
@@ -2583,7 +2582,7 @@
 		if (sectors > sect_to_write)
 			sectors = sect_to_write;
 		/* Write at 'sector' for 'sectors' */
-		wbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
+		wbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
 		bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
 		wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
 		wbio->bi_iter.bi_sector = wsector +
@@ -2816,25 +2815,25 @@
 
 static int init_resync(struct r10conf *conf)
 {
-	int buffs;
-	int i;
+	int ret, buffs, i;
 
 	buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
-	BUG_ON(conf->r10buf_pool);
+	BUG_ON(mempool_initialized(&conf->r10buf_pool));
 	conf->have_replacement = 0;
 	for (i = 0; i < conf->geo.raid_disks; i++)
 		if (conf->mirrors[i].replacement)
 			conf->have_replacement = 1;
-	conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf);
-	if (!conf->r10buf_pool)
-		return -ENOMEM;
+	ret = mempool_init(&conf->r10buf_pool, buffs,
+			   r10buf_pool_alloc, r10buf_pool_free, conf);
+	if (ret)
+		return ret;
 	conf->next_resync = 0;
 	return 0;
 }
 
 static struct r10bio *raid10_alloc_init_r10buf(struct r10conf *conf)
 {
-	struct r10bio *r10bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
+	struct r10bio *r10bio = mempool_alloc(&conf->r10buf_pool, GFP_NOIO);
 	struct rsync_pages *rp;
 	struct bio *bio;
 	int nalloc;
@@ -2945,7 +2944,7 @@
 	sector_t chunk_mask = conf->geo.chunk_mask;
 	int page_idx = 0;
 
-	if (!conf->r10buf_pool)
+	if (!mempool_initialized(&conf->r10buf_pool))
 		if (init_resync(conf))
 			return 0;
 
@@ -3699,13 +3698,13 @@
 
 	conf->geo = geo;
 	conf->copies = copies;
-	conf->r10bio_pool = mempool_create(NR_RAID10_BIOS, r10bio_pool_alloc,
-					   r10bio_pool_free, conf);
-	if (!conf->r10bio_pool)
+	err = mempool_init(&conf->r10bio_pool, NR_RAID10_BIOS, r10bio_pool_alloc,
+			   r10bio_pool_free, conf);
+	if (err)
 		goto out;
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
-	if (!conf->bio_split)
+	err = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0);
+	if (err)
 		goto out;
 
 	calc_sectors(conf, mddev->dev_sectors);
@@ -3733,6 +3732,7 @@
 	init_waitqueue_head(&conf->wait_barrier);
 	atomic_set(&conf->nr_pending, 0);
 
+	err = -ENOMEM;
 	conf->thread = md_register_thread(raid10d, mddev, "raid10");
 	if (!conf->thread)
 		goto out;
@@ -3742,11 +3742,10 @@
 
  out:
 	if (conf) {
-		mempool_destroy(conf->r10bio_pool);
+		mempool_exit(&conf->r10bio_pool);
 		kfree(conf->mirrors);
 		safe_put_page(conf->tmppage);
-		if (conf->bio_split)
-			bioset_free(conf->bio_split);
+		bioset_exit(&conf->bio_split);
 		kfree(conf);
 	}
 	return ERR_PTR(err);
@@ -3953,7 +3952,7 @@
 
 out_free_conf:
 	md_unregister_thread(&mddev->thread);
-	mempool_destroy(conf->r10bio_pool);
+	mempool_exit(&conf->r10bio_pool);
 	safe_put_page(conf->tmppage);
 	kfree(conf->mirrors);
 	kfree(conf);
@@ -3966,13 +3965,12 @@
 {
 	struct r10conf *conf = priv;
 
-	mempool_destroy(conf->r10bio_pool);
+	mempool_exit(&conf->r10bio_pool);
 	safe_put_page(conf->tmppage);
 	kfree(conf->mirrors);
 	kfree(conf->mirrors_old);
 	kfree(conf->mirrors_new);
-	if (conf->bio_split)
-		bioset_free(conf->bio_split);
+	bioset_exit(&conf->bio_split);
 	kfree(conf);
 }
 
@@ -4543,7 +4541,7 @@
 		 * on all the target devices.
 		 */
 		// FIXME
-		mempool_free(r10_bio, conf->r10buf_pool);
+		mempool_free(r10_bio, &conf->r10buf_pool);
 		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 		return sectors_done;
 	}
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index e2e8840..d3eaaf3 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -93,10 +93,10 @@
 						   */
 	wait_queue_head_t	wait_barrier;
 
-	mempool_t		*r10bio_pool;
-	mempool_t		*r10buf_pool;
+	mempool_t		r10bio_pool;
+	mempool_t		r10buf_pool;
 	struct page		*tmppage;
-	struct bio_set		*bio_split;
+	struct bio_set		bio_split;
 
 	/* When taking over an array from a different personality, we store
 	 * the new thread here until we fully activate the array.
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 3c65f52..2b775ab 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -125,9 +125,9 @@
 	struct list_head no_mem_stripes;   /* pending stripes, -ENOMEM */
 
 	struct kmem_cache *io_kc;
-	mempool_t *io_pool;
-	struct bio_set *bs;
-	mempool_t *meta_pool;
+	mempool_t io_pool;
+	struct bio_set bs;
+	mempool_t meta_pool;
 
 	struct md_thread *reclaim_thread;
 	unsigned long reclaim_target;	/* number of space that need to be
@@ -579,7 +579,7 @@
 		md_error(log->rdev->mddev, log->rdev);
 
 	bio_put(bio);
-	mempool_free(io->meta_page, log->meta_pool);
+	mempool_free(io->meta_page, &log->meta_pool);
 
 	spin_lock_irqsave(&log->io_list_lock, flags);
 	__r5l_set_io_unit_state(io, IO_UNIT_IO_END);
@@ -748,7 +748,7 @@
 
 static struct bio *r5l_bio_alloc(struct r5l_log *log)
 {
-	struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, log->bs);
+	struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, &log->bs);
 
 	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 	bio_set_dev(bio, log->rdev->bdev);
@@ -780,7 +780,7 @@
 	struct r5l_io_unit *io;
 	struct r5l_meta_block *block;
 
-	io = mempool_alloc(log->io_pool, GFP_ATOMIC);
+	io = mempool_alloc(&log->io_pool, GFP_ATOMIC);
 	if (!io)
 		return NULL;
 	memset(io, 0, sizeof(*io));
@@ -791,7 +791,7 @@
 	bio_list_init(&io->flush_barriers);
 	io->state = IO_UNIT_RUNNING;
 
-	io->meta_page = mempool_alloc(log->meta_pool, GFP_NOIO);
+	io->meta_page = mempool_alloc(&log->meta_pool, GFP_NOIO);
 	block = page_address(io->meta_page);
 	clear_page(block);
 	block->magic = cpu_to_le32(R5LOG_MAGIC);
@@ -1223,7 +1223,7 @@
 		log->next_checkpoint = io->log_start;
 
 		list_del(&io->log_sibling);
-		mempool_free(io, log->io_pool);
+		mempool_free(io, &log->io_pool);
 		r5l_run_no_mem_stripe(log);
 
 		found = true;
@@ -1647,7 +1647,7 @@
 {
 	struct page *page;
 
-	ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_PAGES, log->bs);
+	ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_PAGES, &log->bs);
 	if (!ctx->ra_bio)
 		return -ENOMEM;
 
@@ -3066,6 +3066,7 @@
 	struct request_queue *q = bdev_get_queue(rdev->bdev);
 	struct r5l_log *log;
 	char b[BDEVNAME_SIZE];
+	int ret;
 
 	pr_debug("md/raid:%s: using device %s as journal\n",
 		 mdname(conf->mddev), bdevname(rdev->bdev, b));
@@ -3111,16 +3112,16 @@
 	if (!log->io_kc)
 		goto io_kc;
 
-	log->io_pool = mempool_create_slab_pool(R5L_POOL_SIZE, log->io_kc);
-	if (!log->io_pool)
+	ret = mempool_init_slab_pool(&log->io_pool, R5L_POOL_SIZE, log->io_kc);
+	if (ret)
 		goto io_pool;
 
-	log->bs = bioset_create(R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-	if (!log->bs)
+	ret = bioset_init(&log->bs, R5L_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+	if (ret)
 		goto io_bs;
 
-	log->meta_pool = mempool_create_page_pool(R5L_POOL_SIZE, 0);
-	if (!log->meta_pool)
+	ret = mempool_init_page_pool(&log->meta_pool, R5L_POOL_SIZE, 0);
+	if (ret)
 		goto out_mempool;
 
 	spin_lock_init(&log->tree_lock);
@@ -3155,11 +3156,11 @@
 	rcu_assign_pointer(conf->log, NULL);
 	md_unregister_thread(&log->reclaim_thread);
 reclaim_thread:
-	mempool_destroy(log->meta_pool);
+	mempool_exit(&log->meta_pool);
 out_mempool:
-	bioset_free(log->bs);
+	bioset_exit(&log->bs);
 io_bs:
-	mempool_destroy(log->io_pool);
+	mempool_exit(&log->io_pool);
 io_pool:
 	kmem_cache_destroy(log->io_kc);
 io_kc:
@@ -3178,9 +3179,9 @@
 	wake_up(&conf->mddev->sb_wait);
 	flush_work(&log->disable_writeback_work);
 	md_unregister_thread(&log->reclaim_thread);
-	mempool_destroy(log->meta_pool);
-	bioset_free(log->bs);
-	mempool_destroy(log->io_pool);
+	mempool_exit(&log->meta_pool);
+	bioset_exit(&log->bs);
+	mempool_exit(&log->io_pool);
 	kmem_cache_destroy(log->io_kc);
 	kfree(log);
 }
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 42890a08..3a7c363 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -105,9 +105,9 @@
 	atomic64_t seq;		/* current log write sequence number */
 
 	struct kmem_cache *io_kc;
-	mempool_t *io_pool;
-	struct bio_set *bs;
-	struct bio_set *flush_bs;
+	mempool_t io_pool;
+	struct bio_set bs;
+	struct bio_set flush_bs;
 
 	/* used only for recovery */
 	int recovered_entries;
@@ -244,7 +244,7 @@
 	struct ppl_header *pplhdr;
 	struct page *header_page;
 
-	io = mempool_alloc(ppl_conf->io_pool, GFP_NOWAIT);
+	io = mempool_alloc(&ppl_conf->io_pool, GFP_NOWAIT);
 	if (!io)
 		return NULL;
 
@@ -503,7 +503,7 @@
 			struct bio *prev = bio;
 
 			bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES,
-					       ppl_conf->bs);
+					       &ppl_conf->bs);
 			bio->bi_opf = prev->bi_opf;
 			bio_copy_dev(bio, prev);
 			bio->bi_iter.bi_sector = bio_end_sector(prev);
@@ -570,7 +570,7 @@
 	list_del(&io->log_sibling);
 	spin_unlock(&log->io_list_lock);
 
-	mempool_free(io, ppl_conf->io_pool);
+	mempool_free(io, &ppl_conf->io_pool);
 
 	spin_lock(&ppl_conf->no_mem_stripes_lock);
 	if (!list_empty(&ppl_conf->no_mem_stripes)) {
@@ -642,7 +642,7 @@
 			struct bio *bio;
 			char b[BDEVNAME_SIZE];
 
-			bio = bio_alloc_bioset(GFP_NOIO, 0, ppl_conf->flush_bs);
+			bio = bio_alloc_bioset(GFP_NOIO, 0, &ppl_conf->flush_bs);
 			bio_set_dev(bio, bdev);
 			bio->bi_private = io;
 			bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
@@ -1246,11 +1246,9 @@
 
 	kfree(ppl_conf->child_logs);
 
-	if (ppl_conf->bs)
-		bioset_free(ppl_conf->bs);
-	if (ppl_conf->flush_bs)
-		bioset_free(ppl_conf->flush_bs);
-	mempool_destroy(ppl_conf->io_pool);
+	bioset_exit(&ppl_conf->bs);
+	bioset_exit(&ppl_conf->flush_bs);
+	mempool_exit(&ppl_conf->io_pool);
 	kmem_cache_destroy(ppl_conf->io_kc);
 
 	kfree(ppl_conf);
@@ -1387,24 +1385,18 @@
 		goto err;
 	}
 
-	ppl_conf->io_pool = mempool_create(conf->raid_disks, ppl_io_pool_alloc,
-					   ppl_io_pool_free, ppl_conf->io_kc);
-	if (!ppl_conf->io_pool) {
-		ret = -ENOMEM;
+	ret = mempool_init(&ppl_conf->io_pool, conf->raid_disks, ppl_io_pool_alloc,
+			   ppl_io_pool_free, ppl_conf->io_kc);
+	if (ret)
 		goto err;
-	}
 
-	ppl_conf->bs = bioset_create(conf->raid_disks, 0, BIOSET_NEED_BVECS);
-	if (!ppl_conf->bs) {
-		ret = -ENOMEM;
+	ret = bioset_init(&ppl_conf->bs, conf->raid_disks, 0, BIOSET_NEED_BVECS);
+	if (ret)
 		goto err;
-	}
 
-	ppl_conf->flush_bs = bioset_create(conf->raid_disks, 0, 0);
-	if (!ppl_conf->flush_bs) {
-		ret = -ENOMEM;
+	ret = bioset_init(&ppl_conf->flush_bs, conf->raid_disks, 0, 0);
+	if (ret)
 		goto err;
-	}
 
 	ppl_conf->count = conf->raid_disks;
 	ppl_conf->child_logs = kcalloc(ppl_conf->count, sizeof(struct ppl_log),
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index be117d0..a2e6498 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5192,7 +5192,7 @@
 	/*
 	 * use bio_clone_fast to make a copy of the bio
 	 */
-	align_bi = bio_clone_fast(raid_bio, GFP_NOIO, mddev->bio_set);
+	align_bi = bio_clone_fast(raid_bio, GFP_NOIO, &mddev->bio_set);
 	if (!align_bi)
 		return 0;
 	/*
@@ -5277,7 +5277,7 @@
 
 	if (sectors < bio_sectors(raid_bio)) {
 		struct r5conf *conf = mddev->private;
-		split = bio_split(raid_bio, sectors, GFP_NOIO, conf->bio_split);
+		split = bio_split(raid_bio, sectors, GFP_NOIO, &conf->bio_split);
 		bio_chain(split, raid_bio);
 		generic_make_request(raid_bio);
 		raid_bio = split;
@@ -6773,8 +6773,7 @@
 		if (conf->disks[i].extra_page)
 			put_page(conf->disks[i].extra_page);
 	kfree(conf->disks);
-	if (conf->bio_split)
-		bioset_free(conf->bio_split);
+	bioset_exit(&conf->bio_split);
 	kfree(conf->stripe_hashtbl);
 	kfree(conf->pending_data);
 	kfree(conf);
@@ -6853,6 +6852,7 @@
 	int i;
 	int group_cnt, worker_cnt_per_group;
 	struct r5worker_group *new_group;
+	int ret;
 
 	if (mddev->new_level != 5
 	    && mddev->new_level != 4
@@ -6950,8 +6950,8 @@
 			goto abort;
 	}
 
-	conf->bio_split = bioset_create(BIO_POOL_SIZE, 0, 0);
-	if (!conf->bio_split)
+	ret = bioset_init(&conf->bio_split, BIO_POOL_SIZE, 0, 0);
+	if (ret)
 		goto abort;
 	conf->mddev = mddev;
 
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 3f8da26..72e75ba 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -669,7 +669,7 @@
 	int			pool_size; /* number of disks in stripeheads in pool */
 	spinlock_t		device_lock;
 	struct disk_info	*disks;
-	struct bio_set		*bio_split;
+	struct bio_set		bio_split;
 
 	/* When taking over an array from a different personality, we store
 	 * the new thread here until we fully activate the array.
diff --git a/drivers/media/i2c/tda1997x.c b/drivers/media/i2c/tda1997x.c
index 3021913..33d7fcf 100644
--- a/drivers/media/i2c/tda1997x.c
+++ b/drivers/media/i2c/tda1997x.c
@@ -2444,7 +2444,7 @@
 				struct snd_soc_dai *dai)
 {
 	struct tda1997x_state *state = snd_soc_dai_get_drvdata(dai);
-	struct snd_soc_codec *codec = dai->codec;
+	struct snd_soc_component *component = dai->component;
 	struct snd_pcm_runtime *rtd = substream->runtime;
 	int rate, err;
 
@@ -2452,11 +2452,11 @@
 	err = snd_pcm_hw_constraint_minmax(rtd, SNDRV_PCM_HW_PARAM_RATE,
 					   rate, rate);
 	if (err < 0) {
-		dev_err(codec->dev, "failed to constrain samplerate to %dHz\n",
+		dev_err(component->dev, "failed to constrain samplerate to %dHz\n",
 			rate);
 		return err;
 	}
-	dev_info(codec->dev, "set samplerate constraint to %dHz\n", rate);
+	dev_info(component->dev, "set samplerate constraint to %dHz\n", rate);
 
 	return 0;
 }
@@ -2479,20 +2479,22 @@
 	.ops = &tda1997x_dai_ops,
 };
 
-static int tda1997x_codec_probe(struct snd_soc_codec *codec)
+static int tda1997x_codec_probe(struct snd_soc_component *component)
 {
 	return 0;
 }
 
-static int tda1997x_codec_remove(struct snd_soc_codec *codec)
+static void tda1997x_codec_remove(struct snd_soc_component *component)
 {
-	return 0;
 }
 
-static struct snd_soc_codec_driver tda1997x_codec_driver = {
-	.probe = tda1997x_codec_probe,
-	.remove = tda1997x_codec_remove,
-	.reg_word_size = sizeof(u16),
+static struct snd_soc_component_driver tda1997x_codec_driver = {
+	.probe			= tda1997x_codec_probe,
+	.remove			= tda1997x_codec_remove,
+	.idle_bias_on		= 1,
+	.use_pmdown_time	= 1,
+	.endianness		= 1,
+	.non_legacy_dai_naming	= 1,
 };
 
 static int tda1997x_probe(struct i2c_client *client,
@@ -2737,7 +2739,7 @@
 		else
 			formats = SNDRV_PCM_FMTBIT_S16_LE;
 		tda1997x_audio_dai.capture.formats = formats;
-		ret = snd_soc_register_codec(&state->client->dev,
+		ret = devm_snd_soc_register_component(&state->client->dev,
 					     &tda1997x_codec_driver,
 					     &tda1997x_audio_dai, 1);
 		if (ret) {
@@ -2782,7 +2784,6 @@
 	struct tda1997x_platform_data *pdata = &state->pdata;
 
 	if (pdata->audout_format) {
-		snd_soc_unregister_codec(&client->dev);
 		mutex_destroy(&state->audio_lock);
 	}
 
diff --git a/drivers/media/pci/saa7164/saa7164-core.c b/drivers/media/pci/saa7164/saa7164-core.c
index fca36a4..d697e1a 100644
--- a/drivers/media/pci/saa7164/saa7164-core.c
+++ b/drivers/media/pci/saa7164/saa7164-core.c
@@ -1122,23 +1122,11 @@
 	return 0;
 }
 
-static int saa7164_proc_open(struct inode *inode, struct file *filp)
-{
-	return single_open(filp, saa7164_proc_show, NULL);
-}
-
-static const struct file_operations saa7164_proc_fops = {
-	.open		= saa7164_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int saa7164_proc_create(void)
 {
 	struct proc_dir_entry *pe;
 
-	pe = proc_create("saa7164", S_IRUGO, NULL, &saa7164_proc_fops);
+	pe = proc_create_single("saa7164", S_IRUGO, NULL, saa7164_proc_show);
 	if (!pe)
 		return -ENOMEM;
 
diff --git a/drivers/media/pci/zoran/videocodec.c b/drivers/media/pci/zoran/videocodec.c
index 5ff23ef..4427ae7 100644
--- a/drivers/media/pci/zoran/videocodec.c
+++ b/drivers/media/pci/zoran/videocodec.c
@@ -344,19 +344,6 @@
 
 	return 0;
 }
-
-static int proc_videocodecs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_videocodecs_show, NULL);
-}
-
-static const struct file_operations videocodecs_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= proc_videocodecs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
 
 /* ===================== */
@@ -373,7 +360,8 @@
 	       VIDEOCODEC_VERSION);
 
 #ifdef CONFIG_PROC_FS
-	videocodec_proc_entry = proc_create("videocodecs", 0, NULL, &videocodecs_proc_fops);
+	videocodec_proc_entry = proc_create_single("videocodecs", 0, NULL,
+			proc_videocodecs_show);
 	if (!videocodec_proc_entry) {
 		dprintk(1, KERN_ERR "videocodec: can't init procfs.\n");
 	}
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 57b13df..a15181f 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -2094,14 +2094,9 @@
 static int msb_init_disk(struct memstick_dev *card)
 {
 	struct msb_data *msb = memstick_get_drvdata(card);
-	struct memstick_host *host = card->host;
 	int rc;
-	u64 limit = BLK_BOUNCE_HIGH;
 	unsigned long capacity;
 
-	if (host->dev.dma_mask && *(host->dev.dma_mask))
-		limit = *(host->dev.dma_mask);
-
 	mutex_lock(&msb_disk_lock);
 	msb->disk_id = idr_alloc(&msb_disk_idr, card, 0, 256, GFP_KERNEL);
 	mutex_unlock(&msb_disk_lock);
@@ -2123,7 +2118,6 @@
 
 	msb->queue->queuedata = card;
 
-	blk_queue_bounce_limit(msb->queue, limit);
 	blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES);
 	blk_queue_max_segments(msb->queue, MS_BLOCK_MAX_SEGS);
 	blk_queue_max_segment_size(msb->queue,
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 8897962..5ee9326 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1170,17 +1170,12 @@
 static int mspro_block_init_disk(struct memstick_dev *card)
 {
 	struct mspro_block_data *msb = memstick_get_drvdata(card);
-	struct memstick_host *host = card->host;
 	struct mspro_devinfo *dev_info = NULL;
 	struct mspro_sys_info *sys_info = NULL;
 	struct mspro_sys_attr *s_attr = NULL;
 	int rc, disk_id;
-	u64 limit = BLK_BOUNCE_HIGH;
 	unsigned long capacity;
 
-	if (host->dev.dma_mask && *(host->dev.dma_mask))
-		limit = *(host->dev.dma_mask);
-
 	for (rc = 0; msb->attr_group.attrs[rc]; ++rc) {
 		s_attr = mspro_from_sysfs_attr(msb->attr_group.attrs[rc]);
 
@@ -1219,7 +1214,6 @@
 
 	msb->queue->queuedata = card;
 
-	blk_queue_bounce_limit(msb->queue, limit);
 	blk_queue_max_hw_sectors(msb->queue, MSPRO_BLOCK_MAX_PAGES);
 	blk_queue_max_segments(msb->queue, MSPRO_BLOCK_MAX_SEGS);
 	blk_queue_max_segment_size(msb->queue,
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 51eb1b0..a746ccd 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -197,9 +197,9 @@
 static int	mpt_host_page_alloc(MPT_ADAPTER *ioc, pIOCInit_t ioc_init);
 
 #ifdef CONFIG_PROC_FS
-static const struct file_operations mpt_summary_proc_fops;
-static const struct file_operations mpt_version_proc_fops;
-static const struct file_operations mpt_iocinfo_proc_fops;
+static int mpt_summary_proc_show(struct seq_file *m, void *v);
+static int mpt_version_proc_show(struct seq_file *m, void *v);
+static int mpt_iocinfo_proc_show(struct seq_file *m, void *v);
 #endif
 static void	mpt_get_fw_exp_ver(char *buf, MPT_ADAPTER *ioc);
 
@@ -2040,8 +2040,10 @@
 	 */
 	dent = proc_mkdir(ioc->name, mpt_proc_root_dir);
 	if (dent) {
-		proc_create_data("info", S_IRUGO, dent, &mpt_iocinfo_proc_fops, ioc);
-		proc_create_data("summary", S_IRUGO, dent, &mpt_summary_proc_fops, ioc);
+		proc_create_single_data("info", S_IRUGO, dent,
+				mpt_iocinfo_proc_show, ioc);
+		proc_create_single_data("summary", S_IRUGO, dent,
+				mpt_summary_proc_show, ioc);
 	}
 #endif
 
@@ -6606,8 +6608,10 @@
 	if (mpt_proc_root_dir == NULL)
 		return -ENOTDIR;
 
-	proc_create("summary", S_IRUGO, mpt_proc_root_dir, &mpt_summary_proc_fops);
-	proc_create("version", S_IRUGO, mpt_proc_root_dir, &mpt_version_proc_fops);
+	proc_create_single("summary", S_IRUGO, mpt_proc_root_dir,
+			mpt_summary_proc_show);
+	proc_create_single("version", S_IRUGO, mpt_proc_root_dir,
+			mpt_version_proc_show);
 	return 0;
 }
 
@@ -6646,19 +6650,6 @@
 	return 0;
 }
 
-static int mpt_summary_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mpt_summary_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations mpt_summary_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= mpt_summary_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int mpt_version_proc_show(struct seq_file *m, void *v)
 {
 	u8	 cb_idx;
@@ -6701,19 +6692,6 @@
 	return 0;
 }
 
-static int mpt_version_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mpt_version_proc_show, NULL);
-}
-
-static const struct file_operations mpt_version_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= mpt_version_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int mpt_iocinfo_proc_show(struct seq_file *m, void *v)
 {
 	MPT_ADAPTER	*ioc = m->private;
@@ -6793,19 +6771,6 @@
 
 	return 0;
 }
-
-static int mpt_iocinfo_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mpt_iocinfo_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations mpt_iocinfo_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= mpt_iocinfo_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif		/* CONFIG_PROC_FS } */
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 86503f6..19a5aa7 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1929,7 +1929,7 @@
 	MPT_SCSI_HOST *hd;
 	MPT_ADAPTER   *ioc;
 	VirtDevice    *vdevice;
-	enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
+	enum blk_eh_timer_return rc = BLK_EH_DONE;
 
 	hd = shost_priv(sc->device->host);
 	if (hd == NULL) {
diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index d610241..36156a4 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -229,7 +229,7 @@
 }
 EXPORT_SYMBOL(cros_ec_suspend);
 
-static void cros_ec_drain_events(struct cros_ec_device *ec_dev)
+static void cros_ec_report_events_during_suspend(struct cros_ec_device *ec_dev)
 {
 	while (cros_ec_get_next_event(ec_dev, NULL) > 0)
 		blocking_notifier_call_chain(&ec_dev->event_notifier,
@@ -253,21 +253,16 @@
 		dev_dbg(ec_dev->dev, "Error %d sending resume event to ec",
 			ret);
 
-	/*
-	 * In some cases, we need to distinguish between events that occur
-	 * during suspend if the EC is not a wake source. For example,
-	 * keypresses during suspend should be discarded if it does not wake
-	 * the system.
-	 *
-	 * If the EC is not a wake source, drain the event queue and mark them
-	 * as "queued during suspend".
-	 */
 	if (ec_dev->wake_enabled) {
 		disable_irq_wake(ec_dev->irq);
 		ec_dev->wake_enabled = 0;
-	} else {
-		cros_ec_drain_events(ec_dev);
 	}
+	/*
+	 * Let the mfd devices know about events that occur during
+	 * suspend. This way the clients know what to do with them.
+	 */
+	cros_ec_report_events_during_suspend(ec_dev);
+
 
 	return 0;
 }
diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
index d7f54e4..c63e331 100644
--- a/drivers/mfd/mc13xxx-core.c
+++ b/drivers/mfd/mc13xxx-core.c
@@ -279,8 +279,21 @@
 	adc0 = MC13XXX_ADC0_ADINC1 | MC13XXX_ADC0_ADINC2;
 	adc1 = MC13XXX_ADC1_ADEN | MC13XXX_ADC1_ADTRIGIGN | MC13XXX_ADC1_ASC;
 
-	if (channel > 7)
+	/*
+	 * Channels mapped through ADIN7:
+	 * 7  - General purpose ADIN7
+	 * 16 - UID
+	 * 17 - Die temperature
+	 */
+	if (channel > 7 && channel < 16) {
 		adc1 |= MC13XXX_ADC1_ADSEL;
+	} else if (channel == 16) {
+		adc0 |= MC13XXX_ADC0_ADIN7SEL_UID;
+		channel = 7;
+	} else if (channel == 17) {
+		adc0 |= MC13XXX_ADC0_ADIN7SEL_DIE;
+		channel = 7;
+	}
 
 	switch (mode) {
 	case MC13XXX_ADC_MODE_TS:
diff --git a/drivers/mfd/qcom-pm8xxx.c b/drivers/mfd/qcom-pm8xxx.c
index f08758f..e6e8d81 100644
--- a/drivers/mfd/qcom-pm8xxx.c
+++ b/drivers/mfd/qcom-pm8xxx.c
@@ -563,8 +563,8 @@
 	pr_info("PMIC revision 2: %02X\n", val);
 	rev |= val << BITS_PER_BYTE;
 
-	chip = devm_kzalloc(&pdev->dev, sizeof(*chip) +
-			    sizeof(chip->config[0]) * data->num_irqs,
+	chip = devm_kzalloc(&pdev->dev,
+			    struct_size(chip, config, data->num_irqs),
 			    GFP_KERNEL);
 	if (!chip)
 		return -ENOMEM;
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 5d71300..3726eac 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -113,6 +113,20 @@
 	  for information on the specific driver level and support statement
 	  for your IBM server.
 
+config IBMVMC
+	tristate "IBM Virtual Management Channel support"
+	depends on PPC_PSERIES
+	help
+	  This is the IBM POWER Virtual Management Channel
+
+	  This driver is to be used for the POWER Virtual
+	  Management Channel virtual adapter on the PowerVM
+	  platform. It provides both request/response and
+	  async message support through the /dev/ibmvmc node.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ibmvmc.
+
 config PHANTOM
 	tristate "Sensable PHANToM (PCI)"
 	depends on PCI
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 20be70c..af22bbc 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -4,6 +4,7 @@
 #
 
 obj-$(CONFIG_IBM_ASM)		+= ibmasm/
+obj-$(CONFIG_IBMVMC)		+= ibmvmc.o
 obj-$(CONFIG_AD525X_DPOT)	+= ad525x_dpot.o
 obj-$(CONFIG_AD525X_DPOT_I2C)	+= ad525x_dpot-i2c.o
 obj-$(CONFIG_AD525X_DPOT_SPI)	+= ad525x_dpot-spi.o
diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c
index fb397e7..4d4acf7 100644
--- a/drivers/misc/cb710/core.c
+++ b/drivers/misc/cb710/core.c
@@ -232,8 +232,8 @@
 	if (val & CB710_SLOT_SM)
 		++n;
 
-	chip = devm_kzalloc(&pdev->dev,
-		sizeof(*chip) + n * sizeof(*chip->slot), GFP_KERNEL);
+	chip = devm_kzalloc(&pdev->dev, struct_size(chip, slot, n),
+			    GFP_KERNEL);
 	if (!chip)
 		return -ENOMEM;
 
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 7ff315a..c6ec872 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -128,11 +128,12 @@
 	mutex_unlock(&ctx->mapping_lock);
 }
 
-static int cxl_mmap_fault(struct vm_fault *vmf)
+static vm_fault_t cxl_mmap_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct cxl_context *ctx = vma->vm_file->private_data;
 	u64 area, offset;
+	vm_fault_t ret;
 
 	offset = vmf->pgoff << PAGE_SHIFT;
 
@@ -169,11 +170,11 @@
 		return VM_FAULT_SIGBUS;
 	}
 
-	vm_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT);
+	ret = vmf_insert_pfn(vma, vmf->address, (area + offset) >> PAGE_SHIFT);
 
 	mutex_unlock(&ctx->status_mutex);
 
-	return VM_FAULT_NOPAGE;
+	return ret;
 }
 
 static const struct vm_operations_struct cxl_mmap_vmops = {
diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c
new file mode 100644
index 0000000..fb83d13
--- /dev/null
+++ b/drivers/misc/ibmvmc.c
@@ -0,0 +1,2418 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IBM Power Systems Virtual Management Channel Support.
+ *
+ * Copyright (c) 2004, 2018 IBM Corp.
+ *   Dave Engebretsen engebret@us.ibm.com
+ *   Steven Royer seroyer@linux.vnet.ibm.com
+ *   Adam Reznechek adreznec@linux.vnet.ibm.com
+ *   Bryant G. Ly <bryantly@linux.vnet.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/fcntl.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/miscdevice.h>
+#include <linux/sched/signal.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/vio.h>
+
+#include "ibmvmc.h"
+
+#define IBMVMC_DRIVER_VERSION "1.0"
+
+/*
+ * Static global variables
+ */
+static DECLARE_WAIT_QUEUE_HEAD(ibmvmc_read_wait);
+
+static const char ibmvmc_driver_name[] = "ibmvmc";
+
+static struct ibmvmc_struct ibmvmc;
+static struct ibmvmc_hmc hmcs[MAX_HMCS];
+static struct crq_server_adapter ibmvmc_adapter;
+
+static int ibmvmc_max_buf_pool_size = DEFAULT_BUF_POOL_SIZE;
+static int ibmvmc_max_hmcs = DEFAULT_HMCS;
+static int ibmvmc_max_mtu = DEFAULT_MTU;
+
+static inline long h_copy_rdma(s64 length, u64 sliobn, u64 slioba,
+			       u64 dliobn, u64 dlioba)
+{
+	long rc = 0;
+
+	/* Ensure all writes to source memory are visible before hcall */
+	dma_wmb();
+	pr_debug("ibmvmc: h_copy_rdma(0x%llx, 0x%llx, 0x%llx, 0x%llx, 0x%llx\n",
+		 length, sliobn, slioba, dliobn, dlioba);
+	rc = plpar_hcall_norets(H_COPY_RDMA, length, sliobn, slioba,
+				dliobn, dlioba);
+	pr_debug("ibmvmc: h_copy_rdma rc = 0x%lx\n", rc);
+
+	return rc;
+}
+
+static inline void h_free_crq(uint32_t unit_address)
+{
+	long rc = 0;
+
+	do {
+		if (H_IS_LONG_BUSY(rc))
+			msleep(get_longbusy_msecs(rc));
+
+		rc = plpar_hcall_norets(H_FREE_CRQ, unit_address);
+	} while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
+}
+
+/**
+ * h_request_vmc: - request a hypervisor virtual management channel device
+ * @vmc_index: drc index of the vmc device created
+ *
+ * Requests the hypervisor create a new virtual management channel device,
+ * allowing this partition to send hypervisor virtualization control
+ * commands.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static inline long h_request_vmc(u32 *vmc_index)
+{
+	long rc = 0;
+	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+	do {
+		if (H_IS_LONG_BUSY(rc))
+			msleep(get_longbusy_msecs(rc));
+
+		/* Call to request the VMC device from phyp */
+		rc = plpar_hcall(H_REQUEST_VMC, retbuf);
+		pr_debug("ibmvmc: %s rc = 0x%lx\n", __func__, rc);
+		*vmc_index = retbuf[0];
+	} while ((rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
+
+	return rc;
+}
+
+/* routines for managing a command/response queue */
+/**
+ * ibmvmc_handle_event: - Interrupt handler for crq events
+ * @irq:        number of irq to handle, not used
+ * @dev_instance: crq_server_adapter that received interrupt
+ *
+ * Disables interrupts and schedules ibmvmc_task
+ *
+ * Always returns IRQ_HANDLED
+ */
+static irqreturn_t ibmvmc_handle_event(int irq, void *dev_instance)
+{
+	struct crq_server_adapter *adapter =
+		(struct crq_server_adapter *)dev_instance;
+
+	vio_disable_interrupts(to_vio_dev(adapter->dev));
+	tasklet_schedule(&adapter->work_task);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ibmvmc_release_crq_queue - Release CRQ Queue
+ *
+ * @adapter:	crq_server_adapter struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-Zero - Failure
+ */
+static void ibmvmc_release_crq_queue(struct crq_server_adapter *adapter)
+{
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	struct crq_queue *queue = &adapter->queue;
+
+	free_irq(vdev->irq, (void *)adapter);
+	tasklet_kill(&adapter->work_task);
+
+	if (adapter->reset_task)
+		kthread_stop(adapter->reset_task);
+
+	h_free_crq(vdev->unit_address);
+	dma_unmap_single(adapter->dev,
+			 queue->msg_token,
+			 queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL);
+	free_page((unsigned long)queue->msgs);
+}
+
+/**
+ * ibmvmc_reset_crq_queue - Reset CRQ Queue
+ *
+ * @adapter:	crq_server_adapter struct
+ *
+ * This function calls h_free_crq and then calls H_REG_CRQ and does all the
+ * bookkeeping to get us back to where we can communicate.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-Zero - Failure
+ */
+static int ibmvmc_reset_crq_queue(struct crq_server_adapter *adapter)
+{
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	struct crq_queue *queue = &adapter->queue;
+	int rc = 0;
+
+	/* Close the CRQ */
+	h_free_crq(vdev->unit_address);
+
+	/* Clean out the queue */
+	memset(queue->msgs, 0x00, PAGE_SIZE);
+	queue->cur = 0;
+
+	/* And re-open it again */
+	rc = plpar_hcall_norets(H_REG_CRQ,
+				vdev->unit_address,
+				queue->msg_token, PAGE_SIZE);
+	if (rc == 2)
+		/* Adapter is good, but other end is not ready */
+		dev_warn(adapter->dev, "Partner adapter not ready\n");
+	else if (rc != 0)
+		dev_err(adapter->dev, "couldn't register crq--rc 0x%x\n", rc);
+
+	return rc;
+}
+
+/**
+ * crq_queue_next_crq: - Returns the next entry in message queue
+ * @queue:      crq_queue to use
+ *
+ * Returns pointer to next entry in queue, or NULL if there are no new
+ * entried in the CRQ.
+ */
+static struct ibmvmc_crq_msg *crq_queue_next_crq(struct crq_queue *queue)
+{
+	struct ibmvmc_crq_msg *crq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&queue->lock, flags);
+	crq = &queue->msgs[queue->cur];
+	if (crq->valid & 0x80) {
+		if (++queue->cur == queue->size)
+			queue->cur = 0;
+
+		/* Ensure the read of the valid bit occurs before reading any
+		 * other bits of the CRQ entry
+		 */
+		dma_rmb();
+	} else {
+		crq = NULL;
+	}
+
+	spin_unlock_irqrestore(&queue->lock, flags);
+
+	return crq;
+}
+
+/**
+ * ibmvmc_send_crq - Send CRQ
+ *
+ * @adapter:	crq_server_adapter struct
+ * @word1:	Word1 Data field
+ * @word2:	Word2 Data field
+ *
+ * Return:
+ *	0 - Success
+ *	Non-Zero - Failure
+ */
+static long ibmvmc_send_crq(struct crq_server_adapter *adapter,
+			    u64 word1, u64 word2)
+{
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	long rc = 0;
+
+	dev_dbg(adapter->dev, "(0x%x, 0x%016llx, 0x%016llx)\n",
+		vdev->unit_address, word1, word2);
+
+	/*
+	 * Ensure the command buffer is flushed to memory before handing it
+	 * over to the other side to prevent it from fetching any stale data.
+	 */
+	dma_wmb();
+	rc = plpar_hcall_norets(H_SEND_CRQ, vdev->unit_address, word1, word2);
+	dev_dbg(adapter->dev, "rc = 0x%lx\n", rc);
+
+	return rc;
+}
+
+/**
+ * alloc_dma_buffer - Create DMA Buffer
+ *
+ * @vdev:	vio_dev struct
+ * @size:	Size field
+ * @dma_handle:	DMA address field
+ *
+ * Allocates memory for the command queue and maps remote memory into an
+ * ioba.
+ *
+ * Returns a pointer to the buffer
+ */
+static void *alloc_dma_buffer(struct vio_dev *vdev, size_t size,
+			      dma_addr_t *dma_handle)
+{
+	/* allocate memory */
+	void *buffer = kzalloc(size, GFP_KERNEL);
+
+	if (!buffer) {
+		*dma_handle = 0;
+		return NULL;
+	}
+
+	/* DMA map */
+	*dma_handle = dma_map_single(&vdev->dev, buffer, size,
+				     DMA_BIDIRECTIONAL);
+
+	if (dma_mapping_error(&vdev->dev, *dma_handle)) {
+		*dma_handle = 0;
+		kzfree(buffer);
+		return NULL;
+	}
+
+	return buffer;
+}
+
+/**
+ * free_dma_buffer - Free DMA Buffer
+ *
+ * @vdev:	vio_dev struct
+ * @size:	Size field
+ * @vaddr:	Address field
+ * @dma_handle:	DMA address field
+ *
+ * Releases memory for a command queue and unmaps mapped remote memory.
+ */
+static void free_dma_buffer(struct vio_dev *vdev, size_t size, void *vaddr,
+			    dma_addr_t dma_handle)
+{
+	/* DMA unmap */
+	dma_unmap_single(&vdev->dev, dma_handle, size, DMA_BIDIRECTIONAL);
+
+	/* deallocate memory */
+	kzfree(vaddr);
+}
+
+/**
+ * ibmvmc_get_valid_hmc_buffer - Retrieve Valid HMC Buffer
+ *
+ * @hmc_index:	HMC Index Field
+ *
+ * Return:
+ *	Pointer to ibmvmc_buffer
+ */
+static struct ibmvmc_buffer *ibmvmc_get_valid_hmc_buffer(u8 hmc_index)
+{
+	struct ibmvmc_buffer *buffer;
+	struct ibmvmc_buffer *ret_buf = NULL;
+	unsigned long i;
+
+	if (hmc_index > ibmvmc.max_hmc_index)
+		return NULL;
+
+	buffer = hmcs[hmc_index].buffer;
+
+	for (i = 0; i < ibmvmc_max_buf_pool_size; i++) {
+		if (buffer[i].valid && buffer[i].free &&
+		    buffer[i].owner == VMC_BUF_OWNER_ALPHA) {
+			buffer[i].free = 0;
+			ret_buf = &buffer[i];
+			break;
+		}
+	}
+
+	return ret_buf;
+}
+
+/**
+ * ibmvmc_get_free_hmc_buffer - Get Free HMC Buffer
+ *
+ * @adapter:	crq_server_adapter struct
+ * @hmc_index:	Hmc Index field
+ *
+ * Return:
+ *	Pointer to ibmvmc_buffer
+ */
+static struct ibmvmc_buffer *ibmvmc_get_free_hmc_buffer(struct crq_server_adapter *adapter,
+							u8 hmc_index)
+{
+	struct ibmvmc_buffer *buffer;
+	struct ibmvmc_buffer *ret_buf = NULL;
+	unsigned long i;
+
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		dev_info(adapter->dev, "get_free_hmc_buffer: invalid hmc_index=0x%x\n",
+			 hmc_index);
+		return NULL;
+	}
+
+	buffer = hmcs[hmc_index].buffer;
+
+	for (i = 0; i < ibmvmc_max_buf_pool_size; i++) {
+		if (buffer[i].free &&
+		    buffer[i].owner == VMC_BUF_OWNER_ALPHA) {
+			buffer[i].free = 0;
+			ret_buf = &buffer[i];
+			break;
+		}
+	}
+
+	return ret_buf;
+}
+
+/**
+ * ibmvmc_free_hmc_buffer - Free an HMC Buffer
+ *
+ * @hmc:	ibmvmc_hmc struct
+ * @buffer:	ibmvmc_buffer struct
+ *
+ */
+static void ibmvmc_free_hmc_buffer(struct ibmvmc_hmc *hmc,
+				   struct ibmvmc_buffer *buffer)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&hmc->lock, flags);
+	buffer->free = 1;
+	spin_unlock_irqrestore(&hmc->lock, flags);
+}
+
+/**
+ * ibmvmc_count_hmc_buffers - Count HMC Buffers
+ *
+ * @hmc_index:	HMC Index field
+ * @valid:	Valid number of buffers field
+ * @free:	Free number of buffers field
+ *
+ */
+static void ibmvmc_count_hmc_buffers(u8 hmc_index, unsigned int *valid,
+				     unsigned int *free)
+{
+	struct ibmvmc_buffer *buffer;
+	unsigned long i;
+	unsigned long flags;
+
+	if (hmc_index > ibmvmc.max_hmc_index)
+		return;
+
+	if (!valid || !free)
+		return;
+
+	*valid = 0; *free = 0;
+
+	buffer = hmcs[hmc_index].buffer;
+	spin_lock_irqsave(&hmcs[hmc_index].lock, flags);
+
+	for (i = 0; i < ibmvmc_max_buf_pool_size; i++) {
+		if (buffer[i].valid) {
+			*valid = *valid + 1;
+			if (buffer[i].free)
+				*free = *free + 1;
+		}
+	}
+
+	spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+}
+
+/**
+ * ibmvmc_get_free_hmc - Get Free HMC
+ *
+ * Return:
+ *	Pointer to an available HMC Connection
+ *	Null otherwise
+ */
+static struct ibmvmc_hmc *ibmvmc_get_free_hmc(void)
+{
+	unsigned long i;
+	unsigned long flags;
+
+	/*
+	 * Find an available HMC connection.
+	 */
+	for (i = 0; i <= ibmvmc.max_hmc_index; i++) {
+		spin_lock_irqsave(&hmcs[i].lock, flags);
+		if (hmcs[i].state == ibmhmc_state_free) {
+			hmcs[i].index = i;
+			hmcs[i].state = ibmhmc_state_initial;
+			spin_unlock_irqrestore(&hmcs[i].lock, flags);
+			return &hmcs[i];
+		}
+		spin_unlock_irqrestore(&hmcs[i].lock, flags);
+	}
+
+	return NULL;
+}
+
+/**
+ * ibmvmc_return_hmc - Return an HMC Connection
+ *
+ * @hmc:		ibmvmc_hmc struct
+ * @release_readers:	Number of readers connected to session
+ *
+ * This function releases the HMC connections back into the pool.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_return_hmc(struct ibmvmc_hmc *hmc, bool release_readers)
+{
+	struct ibmvmc_buffer *buffer;
+	struct crq_server_adapter *adapter;
+	struct vio_dev *vdev;
+	unsigned long i;
+	unsigned long flags;
+
+	if (!hmc || !hmc->adapter)
+		return -EIO;
+
+	if (release_readers) {
+		if (hmc->file_session) {
+			struct ibmvmc_file_session *session = hmc->file_session;
+
+			session->valid = 0;
+			wake_up_interruptible(&ibmvmc_read_wait);
+		}
+	}
+
+	adapter = hmc->adapter;
+	vdev = to_vio_dev(adapter->dev);
+
+	spin_lock_irqsave(&hmc->lock, flags);
+	hmc->index = 0;
+	hmc->state = ibmhmc_state_free;
+	hmc->queue_head = 0;
+	hmc->queue_tail = 0;
+	buffer = hmc->buffer;
+	for (i = 0; i < ibmvmc_max_buf_pool_size; i++) {
+		if (buffer[i].valid) {
+			free_dma_buffer(vdev,
+					ibmvmc.max_mtu,
+					buffer[i].real_addr_local,
+					buffer[i].dma_addr_local);
+			dev_dbg(adapter->dev, "Forgot buffer id 0x%lx\n", i);
+		}
+		memset(&buffer[i], 0, sizeof(struct ibmvmc_buffer));
+
+		hmc->queue_outbound_msgs[i] = VMC_INVALID_BUFFER_ID;
+	}
+
+	spin_unlock_irqrestore(&hmc->lock, flags);
+
+	return 0;
+}
+
+/**
+ * ibmvmc_send_open - Interface Open
+ * @buffer: Pointer to ibmvmc_buffer struct
+ * @hmc: Pointer to ibmvmc_hmc struct
+ *
+ * This command is sent by the management partition as the result of a
+ * management partition device request. It causes the hypervisor to
+ * prepare a set of data buffers for the management application connection
+ * indicated HMC idx. A unique HMC Idx would be used if multiple management
+ * applications running concurrently were desired. Before responding to this
+ * command, the hypervisor must provide the management partition with at
+ * least one of these new buffers via the Add Buffer. This indicates whether
+ * the messages are inbound or outbound from the hypervisor.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_send_open(struct ibmvmc_buffer *buffer,
+			    struct ibmvmc_hmc *hmc)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	struct crq_server_adapter *adapter;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+	int rc = 0;
+
+	if (!hmc || !hmc->adapter)
+		return -EIO;
+
+	adapter = hmc->adapter;
+
+	dev_dbg(adapter->dev, "send_open: 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
+		(unsigned long)buffer->size, (unsigned long)adapter->liobn,
+		(unsigned long)buffer->dma_addr_local,
+		(unsigned long)adapter->riobn,
+		(unsigned long)buffer->dma_addr_remote);
+
+	rc = h_copy_rdma(buffer->size,
+			 adapter->liobn,
+			 buffer->dma_addr_local,
+			 adapter->riobn,
+			 buffer->dma_addr_remote);
+	if (rc) {
+		dev_err(adapter->dev, "Error: In send_open, h_copy_rdma rc 0x%x\n",
+			rc);
+		return -EIO;
+	}
+
+	hmc->state = ibmhmc_state_opening;
+
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_OPEN;
+	crq_msg.status = 0;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc->session;
+	crq_msg.hmc_index = hmc->index;
+	crq_msg.var2.buffer_id = cpu_to_be16(buffer->id);
+	crq_msg.rsvd = 0;
+	crq_msg.var3.rsvd = 0;
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return rc;
+}
+
+/**
+ * ibmvmc_send_close - Interface Close
+ * @hmc: Pointer to ibmvmc_hmc struct
+ *
+ * This command is sent by the management partition to terminate a
+ * management application to hypervisor connection. When this command is
+ * sent, the management partition has quiesced all I/O operations to all
+ * buffers associated with this management application connection, and
+ * has freed any storage for these buffers.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_send_close(struct ibmvmc_hmc *hmc)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	struct crq_server_adapter *adapter;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+	int rc = 0;
+
+	if (!hmc || !hmc->adapter)
+		return -EIO;
+
+	adapter = hmc->adapter;
+
+	dev_info(adapter->dev, "CRQ send: close\n");
+
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_CLOSE;
+	crq_msg.status = 0;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc->session;
+	crq_msg.hmc_index = hmc->index;
+	crq_msg.var2.rsvd = 0;
+	crq_msg.rsvd = 0;
+	crq_msg.var3.rsvd = 0;
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return rc;
+}
+
+/**
+ * ibmvmc_send_capabilities - Send VMC Capabilities
+ *
+ * @adapter:	crq_server_adapter struct
+ *
+ * The capabilities message is an administrative message sent after the CRQ
+ * initialization sequence of messages and is used to exchange VMC capabilities
+ * between the management partition and the hypervisor. The management
+ * partition must send this message and the hypervisor must respond with VMC
+ * capabilities Response message before HMC interface message can begin. Any
+ * HMC interface messages received before the exchange of capabilities has
+ * complete are dropped.
+ *
+ * Return:
+ *	0 - Success
+ */
+static int ibmvmc_send_capabilities(struct crq_server_adapter *adapter)
+{
+	struct ibmvmc_admin_crq_msg crq_msg;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+
+	dev_dbg(adapter->dev, "ibmvmc: CRQ send: capabilities\n");
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_CAP;
+	crq_msg.status = 0;
+	crq_msg.rsvd[0] = 0;
+	crq_msg.rsvd[1] = 0;
+	crq_msg.max_hmc = ibmvmc_max_hmcs;
+	crq_msg.max_mtu = cpu_to_be32(ibmvmc_max_mtu);
+	crq_msg.pool_size = cpu_to_be16(ibmvmc_max_buf_pool_size);
+	crq_msg.crq_size = cpu_to_be16(adapter->queue.size);
+	crq_msg.version = cpu_to_be16(IBMVMC_PROTOCOL_VERSION);
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	ibmvmc.state = ibmvmc_state_capabilities;
+
+	return 0;
+}
+
+/**
+ * ibmvmc_send_add_buffer_resp - Add Buffer Response
+ *
+ * @adapter:	crq_server_adapter struct
+ * @status:	Status field
+ * @hmc_session: HMC Session field
+ * @hmc_index:	HMC Index field
+ * @buffer_id:	Buffer Id field
+ *
+ * This command is sent by the management partition to the hypervisor in
+ * response to the Add Buffer message. The Status field indicates the result of
+ * the command.
+ *
+ * Return:
+ *	0 - Success
+ */
+static int ibmvmc_send_add_buffer_resp(struct crq_server_adapter *adapter,
+				       u8 status, u8 hmc_session,
+				       u8 hmc_index, u16 buffer_id)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+
+	dev_dbg(adapter->dev, "CRQ send: add_buffer_resp\n");
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_ADD_BUF_RESP;
+	crq_msg.status = status;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc_session;
+	crq_msg.hmc_index = hmc_index;
+	crq_msg.var2.buffer_id = cpu_to_be16(buffer_id);
+	crq_msg.rsvd = 0;
+	crq_msg.var3.rsvd = 0;
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return 0;
+}
+
+/**
+ * ibmvmc_send_rem_buffer_resp - Remove Buffer Response
+ *
+ * @adapter:	crq_server_adapter struct
+ * @status:	Status field
+ * @hmc_session: HMC Session field
+ * @hmc_index:	HMC Index field
+ * @buffer_id:	Buffer Id field
+ *
+ * This command is sent by the management partition to the hypervisor in
+ * response to the Remove Buffer message. The Buffer ID field indicates
+ * which buffer the management partition selected to remove. The Status
+ * field indicates the result of the command.
+ *
+ * Return:
+ *	0 - Success
+ */
+static int ibmvmc_send_rem_buffer_resp(struct crq_server_adapter *adapter,
+				       u8 status, u8 hmc_session,
+				       u8 hmc_index, u16 buffer_id)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+
+	dev_dbg(adapter->dev, "CRQ send: rem_buffer_resp\n");
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_REM_BUF_RESP;
+	crq_msg.status = status;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc_session;
+	crq_msg.hmc_index = hmc_index;
+	crq_msg.var2.buffer_id = cpu_to_be16(buffer_id);
+	crq_msg.rsvd = 0;
+	crq_msg.var3.rsvd = 0;
+
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return 0;
+}
+
+/**
+ * ibmvmc_send_msg - Signal Message
+ *
+ * @adapter:	crq_server_adapter struct
+ * @buffer:	ibmvmc_buffer struct
+ * @hmc:	ibmvmc_hmc struct
+ * @msg_length:	message length field
+ *
+ * This command is sent between the management partition and the hypervisor
+ * in order to signal the arrival of an HMC protocol message. The command
+ * can be sent by both the management partition and the hypervisor. It is
+ * used for all traffic between the management application and the hypervisor,
+ * regardless of who initiated the communication.
+ *
+ * There is no response to this message.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_send_msg(struct crq_server_adapter *adapter,
+			   struct ibmvmc_buffer *buffer,
+			   struct ibmvmc_hmc *hmc, int msg_len)
+{
+	struct ibmvmc_crq_msg crq_msg;
+	__be64 *crq_as_u64 = (__be64 *)&crq_msg;
+	int rc = 0;
+
+	dev_dbg(adapter->dev, "CRQ send: rdma to HV\n");
+	rc = h_copy_rdma(msg_len,
+			 adapter->liobn,
+			 buffer->dma_addr_local,
+			 adapter->riobn,
+			 buffer->dma_addr_remote);
+	if (rc) {
+		dev_err(adapter->dev, "Error in send_msg, h_copy_rdma rc 0x%x\n",
+			rc);
+		return rc;
+	}
+
+	crq_msg.valid = 0x80;
+	crq_msg.type = VMC_MSG_SIGNAL;
+	crq_msg.status = 0;
+	crq_msg.var1.rsvd = 0;
+	crq_msg.hmc_session = hmc->session;
+	crq_msg.hmc_index = hmc->index;
+	crq_msg.var2.buffer_id = cpu_to_be16(buffer->id);
+	crq_msg.var3.msg_len = cpu_to_be32(msg_len);
+	dev_dbg(adapter->dev, "CRQ send: msg to HV 0x%llx 0x%llx\n",
+		be64_to_cpu(crq_as_u64[0]), be64_to_cpu(crq_as_u64[1]));
+
+	buffer->owner = VMC_BUF_OWNER_HV;
+	ibmvmc_send_crq(adapter, be64_to_cpu(crq_as_u64[0]),
+			be64_to_cpu(crq_as_u64[1]));
+
+	return rc;
+}
+
+/**
+ * ibmvmc_open - Open Session
+ *
+ * @inode:	inode struct
+ * @file:	file struct
+ *
+ * Return:
+ *	0 - Success
+ */
+static int ibmvmc_open(struct inode *inode, struct file *file)
+{
+	struct ibmvmc_file_session *session;
+	int rc = 0;
+
+	pr_debug("%s: inode = 0x%lx, file = 0x%lx, state = 0x%x\n", __func__,
+		 (unsigned long)inode, (unsigned long)file,
+		 ibmvmc.state);
+
+	session = kzalloc(sizeof(*session), GFP_KERNEL);
+	session->file = file;
+	file->private_data = session;
+
+	return rc;
+}
+
+/**
+ * ibmvmc_close - Close Session
+ *
+ * @inode:	inode struct
+ * @file:	file struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_close(struct inode *inode, struct file *file)
+{
+	struct ibmvmc_file_session *session;
+	struct ibmvmc_hmc *hmc;
+	int rc = 0;
+	unsigned long flags;
+
+	pr_debug("%s: file = 0x%lx, state = 0x%x\n", __func__,
+		 (unsigned long)file, ibmvmc.state);
+
+	session = file->private_data;
+	if (!session)
+		return -EIO;
+
+	hmc = session->hmc;
+	if (hmc) {
+		if (!hmc->adapter)
+			return -EIO;
+
+		if (ibmvmc.state == ibmvmc_state_failed) {
+			dev_warn(hmc->adapter->dev, "close: state_failed\n");
+			return -EIO;
+		}
+
+		spin_lock_irqsave(&hmc->lock, flags);
+		if (hmc->state >= ibmhmc_state_opening) {
+			rc = ibmvmc_send_close(hmc);
+			if (rc)
+				dev_warn(hmc->adapter->dev, "close: send_close failed.\n");
+		}
+		spin_unlock_irqrestore(&hmc->lock, flags);
+	}
+
+	kzfree(session);
+
+	return rc;
+}
+
+/**
+ * ibmvmc_read - Read
+ *
+ * @file:	file struct
+ * @buf:	Character buffer
+ * @nbytes:	Size in bytes
+ * @ppos:	Offset
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static ssize_t ibmvmc_read(struct file *file, char *buf, size_t nbytes,
+			   loff_t *ppos)
+{
+	struct ibmvmc_file_session *session;
+	struct ibmvmc_hmc *hmc;
+	struct crq_server_adapter *adapter;
+	struct ibmvmc_buffer *buffer;
+	ssize_t n;
+	ssize_t retval = 0;
+	unsigned long flags;
+	DEFINE_WAIT(wait);
+
+	pr_debug("ibmvmc: read: file = 0x%lx, buf = 0x%lx, nbytes = 0x%lx\n",
+		 (unsigned long)file, (unsigned long)buf,
+		 (unsigned long)nbytes);
+
+	if (nbytes == 0)
+		return 0;
+
+	if (nbytes > ibmvmc.max_mtu) {
+		pr_warn("ibmvmc: read: nbytes invalid 0x%x\n",
+			(unsigned int)nbytes);
+		return -EINVAL;
+	}
+
+	session = file->private_data;
+	if (!session) {
+		pr_warn("ibmvmc: read: no session\n");
+		return -EIO;
+	}
+
+	hmc = session->hmc;
+	if (!hmc) {
+		pr_warn("ibmvmc: read: no hmc\n");
+		return -EIO;
+	}
+
+	adapter = hmc->adapter;
+	if (!adapter) {
+		pr_warn("ibmvmc: read: no adapter\n");
+		return -EIO;
+	}
+
+	do {
+		prepare_to_wait(&ibmvmc_read_wait, &wait, TASK_INTERRUPTIBLE);
+
+		spin_lock_irqsave(&hmc->lock, flags);
+		if (hmc->queue_tail != hmc->queue_head)
+			/* Data is available */
+			break;
+
+		spin_unlock_irqrestore(&hmc->lock, flags);
+
+		if (!session->valid) {
+			retval = -EBADFD;
+			goto out;
+		}
+		if (file->f_flags & O_NONBLOCK) {
+			retval = -EAGAIN;
+			goto out;
+		}
+
+		schedule();
+
+		if (signal_pending(current)) {
+			retval = -ERESTARTSYS;
+			goto out;
+		}
+	} while (1);
+
+	buffer = &(hmc->buffer[hmc->queue_outbound_msgs[hmc->queue_tail]]);
+	hmc->queue_tail++;
+	if (hmc->queue_tail == ibmvmc_max_buf_pool_size)
+		hmc->queue_tail = 0;
+	spin_unlock_irqrestore(&hmc->lock, flags);
+
+	nbytes = min_t(size_t, nbytes, buffer->msg_len);
+	n = copy_to_user((void *)buf, buffer->real_addr_local, nbytes);
+	dev_dbg(adapter->dev, "read: copy to user nbytes = 0x%lx.\n", nbytes);
+	ibmvmc_free_hmc_buffer(hmc, buffer);
+	retval = nbytes;
+
+	if (n) {
+		dev_warn(adapter->dev, "read: copy to user failed.\n");
+		retval = -EFAULT;
+	}
+
+ out:
+	finish_wait(&ibmvmc_read_wait, &wait);
+	dev_dbg(adapter->dev, "read: out %ld\n", retval);
+	return retval;
+}
+
+/**
+ * ibmvmc_poll - Poll
+ *
+ * @file:	file struct
+ * @wait:	Poll Table
+ *
+ * Return:
+ *	poll.h return values
+ */
+static unsigned int ibmvmc_poll(struct file *file, poll_table *wait)
+{
+	struct ibmvmc_file_session *session;
+	struct ibmvmc_hmc *hmc;
+	unsigned int mask = 0;
+
+	session = file->private_data;
+	if (!session)
+		return 0;
+
+	hmc = session->hmc;
+	if (!hmc)
+		return 0;
+
+	poll_wait(file, &ibmvmc_read_wait, wait);
+
+	if (hmc->queue_head != hmc->queue_tail)
+		mask |= POLLIN | POLLRDNORM;
+
+	return mask;
+}
+
+/**
+ * ibmvmc_write - Write
+ *
+ * @file:	file struct
+ * @buf:	Character buffer
+ * @count:	Count field
+ * @ppos:	Offset
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static ssize_t ibmvmc_write(struct file *file, const char *buffer,
+			    size_t count, loff_t *ppos)
+{
+	struct ibmvmc_buffer *vmc_buffer;
+	struct ibmvmc_file_session *session;
+	struct crq_server_adapter *adapter;
+	struct ibmvmc_hmc *hmc;
+	unsigned char *buf;
+	unsigned long flags;
+	size_t bytes;
+	const char *p = buffer;
+	size_t c = count;
+	int ret = 0;
+
+	session = file->private_data;
+	if (!session)
+		return -EIO;
+
+	hmc = session->hmc;
+	if (!hmc)
+		return -EIO;
+
+	spin_lock_irqsave(&hmc->lock, flags);
+	if (hmc->state == ibmhmc_state_free) {
+		/* HMC connection is not valid (possibly was reset under us). */
+		ret = -EIO;
+		goto out;
+	}
+
+	adapter = hmc->adapter;
+	if (!adapter) {
+		ret = -EIO;
+		goto out;
+	}
+
+	if (count > ibmvmc.max_mtu) {
+		dev_warn(adapter->dev, "invalid buffer size 0x%lx\n",
+			 (unsigned long)count);
+		ret = -EIO;
+		goto out;
+	}
+
+	/* Waiting for the open resp message to the ioctl(1) - retry */
+	if (hmc->state == ibmhmc_state_opening) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* Make sure the ioctl() was called & the open msg sent, and that
+	 * the HMC connection has not failed.
+	 */
+	if (hmc->state != ibmhmc_state_ready) {
+		ret = -EIO;
+		goto out;
+	}
+
+	vmc_buffer = ibmvmc_get_valid_hmc_buffer(hmc->index);
+	if (!vmc_buffer) {
+		/* No buffer available for the msg send, or we have not yet
+		 * completed the open/open_resp sequence.  Retry until this is
+		 * complete.
+		 */
+		ret = -EBUSY;
+		goto out;
+	}
+	if (!vmc_buffer->real_addr_local) {
+		dev_err(adapter->dev, "no buffer storage assigned\n");
+		ret = -EIO;
+		goto out;
+	}
+	buf = vmc_buffer->real_addr_local;
+
+	while (c > 0) {
+		bytes = min_t(size_t, c, vmc_buffer->size);
+
+		bytes -= copy_from_user(buf, p, bytes);
+		if (!bytes) {
+			ret = -EFAULT;
+			goto out;
+		}
+		c -= bytes;
+		p += bytes;
+	}
+	if (p == buffer)
+		goto out;
+
+	file->f_path.dentry->d_inode->i_mtime = current_time(file_inode(file));
+	mark_inode_dirty(file->f_path.dentry->d_inode);
+
+	dev_dbg(adapter->dev, "write: file = 0x%lx, count = 0x%lx\n",
+		(unsigned long)file, (unsigned long)count);
+
+	ibmvmc_send_msg(adapter, vmc_buffer, hmc, count);
+	ret = p - buffer;
+ out:
+	spin_unlock_irqrestore(&hmc->lock, flags);
+	return (ssize_t)(ret);
+}
+
+/**
+ * ibmvmc_setup_hmc - Setup the HMC
+ *
+ * @session:	ibmvmc_file_session struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_setup_hmc(struct ibmvmc_file_session *session)
+{
+	struct ibmvmc_hmc *hmc;
+	unsigned int valid, free, index;
+
+	if (ibmvmc.state == ibmvmc_state_failed) {
+		pr_warn("ibmvmc: Reserve HMC: state_failed\n");
+		return -EIO;
+	}
+
+	if (ibmvmc.state < ibmvmc_state_ready) {
+		pr_warn("ibmvmc: Reserve HMC: not state_ready\n");
+		return -EAGAIN;
+	}
+
+	/* Device is busy until capabilities have been exchanged and we
+	 * have a generic buffer for each possible HMC connection.
+	 */
+	for (index = 0; index <= ibmvmc.max_hmc_index; index++) {
+		valid = 0;
+		ibmvmc_count_hmc_buffers(index, &valid, &free);
+		if (valid == 0) {
+			pr_warn("ibmvmc: buffers not ready for index %d\n",
+				index);
+			return -ENOBUFS;
+		}
+	}
+
+	/* Get an hmc object, and transition to ibmhmc_state_initial */
+	hmc = ibmvmc_get_free_hmc();
+	if (!hmc) {
+		pr_warn("%s: free hmc not found\n", __func__);
+		return -EBUSY;
+	}
+
+	hmc->session = hmc->session + 1;
+	if (hmc->session == 0xff)
+		hmc->session = 1;
+
+	session->hmc = hmc;
+	hmc->adapter = &ibmvmc_adapter;
+	hmc->file_session = session;
+	session->valid = 1;
+
+	return 0;
+}
+
+/**
+ * ibmvmc_ioctl_sethmcid - IOCTL Set HMC ID
+ *
+ * @session:	ibmvmc_file_session struct
+ * @new_hmc_id:	HMC id field
+ *
+ * IOCTL command to setup the hmc id
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_ioctl_sethmcid(struct ibmvmc_file_session *session,
+				  unsigned char __user *new_hmc_id)
+{
+	struct ibmvmc_hmc *hmc;
+	struct ibmvmc_buffer *buffer;
+	size_t bytes;
+	char print_buffer[HMC_ID_LEN + 1];
+	unsigned long flags;
+	long rc = 0;
+
+	/* Reserve HMC session */
+	hmc = session->hmc;
+	if (!hmc) {
+		rc = ibmvmc_setup_hmc(session);
+		if (rc)
+			return rc;
+
+		hmc = session->hmc;
+		if (!hmc) {
+			pr_err("ibmvmc: setup_hmc success but no hmc\n");
+			return -EIO;
+		}
+	}
+
+	if (hmc->state != ibmhmc_state_initial) {
+		pr_warn("ibmvmc: sethmcid: invalid state to send open 0x%x\n",
+			hmc->state);
+		return -EIO;
+	}
+
+	bytes = copy_from_user(hmc->hmc_id, new_hmc_id, HMC_ID_LEN);
+	if (bytes)
+		return -EFAULT;
+
+	/* Send Open Session command */
+	spin_lock_irqsave(&hmc->lock, flags);
+	buffer = ibmvmc_get_valid_hmc_buffer(hmc->index);
+	spin_unlock_irqrestore(&hmc->lock, flags);
+
+	if (!buffer || !buffer->real_addr_local) {
+		pr_warn("ibmvmc: sethmcid: no buffer available\n");
+		return -EIO;
+	}
+
+	/* Make sure buffer is NULL terminated before trying to print it */
+	memset(print_buffer, 0, HMC_ID_LEN + 1);
+	strncpy(print_buffer, hmc->hmc_id, HMC_ID_LEN);
+	pr_info("ibmvmc: sethmcid: Set HMC ID: \"%s\"\n", print_buffer);
+
+	memcpy(buffer->real_addr_local, hmc->hmc_id, HMC_ID_LEN);
+	/* RDMA over ID, send open msg, change state to ibmhmc_state_opening */
+	rc = ibmvmc_send_open(buffer, hmc);
+
+	return rc;
+}
+
+/**
+ * ibmvmc_ioctl_query - IOCTL Query
+ *
+ * @session:	ibmvmc_file_session struct
+ * @ret_struct:	ibmvmc_query_struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_ioctl_query(struct ibmvmc_file_session *session,
+			       struct ibmvmc_query_struct __user *ret_struct)
+{
+	struct ibmvmc_query_struct query_struct;
+	size_t bytes;
+
+	memset(&query_struct, 0, sizeof(query_struct));
+	query_struct.have_vmc = (ibmvmc.state > ibmvmc_state_initial);
+	query_struct.state = ibmvmc.state;
+	query_struct.vmc_drc_index = ibmvmc.vmc_drc_index;
+
+	bytes = copy_to_user(ret_struct, &query_struct,
+			     sizeof(query_struct));
+	if (bytes)
+		return -EFAULT;
+
+	return 0;
+}
+
+/**
+ * ibmvmc_ioctl_requestvmc - IOCTL Request VMC
+ *
+ * @session:	ibmvmc_file_session struct
+ * @ret_vmc_index:	VMC Index
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_ioctl_requestvmc(struct ibmvmc_file_session *session,
+				    u32 __user *ret_vmc_index)
+{
+	/* TODO: (adreznec) Add locking to control multiple process access */
+	size_t bytes;
+	long rc;
+	u32 vmc_drc_index;
+
+	/* Call to request the VMC device from phyp*/
+	rc = h_request_vmc(&vmc_drc_index);
+	pr_debug("ibmvmc: requestvmc: H_REQUEST_VMC rc = 0x%lx\n", rc);
+
+	if (rc == H_SUCCESS) {
+		rc = 0;
+	} else if (rc == H_FUNCTION) {
+		pr_err("ibmvmc: requestvmc: h_request_vmc not supported\n");
+		return -EPERM;
+	} else if (rc == H_AUTHORITY) {
+		pr_err("ibmvmc: requestvmc: hypervisor denied vmc request\n");
+		return -EPERM;
+	} else if (rc == H_HARDWARE) {
+		pr_err("ibmvmc: requestvmc: hypervisor hardware fault\n");
+		return -EIO;
+	} else if (rc == H_RESOURCE) {
+		pr_err("ibmvmc: requestvmc: vmc resource unavailable\n");
+		return -ENODEV;
+	} else if (rc == H_NOT_AVAILABLE) {
+		pr_err("ibmvmc: requestvmc: system cannot be vmc managed\n");
+		return -EPERM;
+	} else if (rc == H_PARAMETER) {
+		pr_err("ibmvmc: requestvmc: invalid parameter\n");
+		return -EINVAL;
+	}
+
+	/* Success, set the vmc index in global struct */
+	ibmvmc.vmc_drc_index = vmc_drc_index;
+
+	bytes = copy_to_user(ret_vmc_index, &vmc_drc_index,
+			     sizeof(*ret_vmc_index));
+	if (bytes) {
+		pr_warn("ibmvmc: requestvmc: copy to user failed.\n");
+		return -EFAULT;
+	}
+	return rc;
+}
+
+/**
+ * ibmvmc_ioctl - IOCTL
+ *
+ * @session:	ibmvmc_file_session struct
+ * @cmd:	cmd field
+ * @arg:	Argument field
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static long ibmvmc_ioctl(struct file *file,
+			 unsigned int cmd, unsigned long arg)
+{
+	struct ibmvmc_file_session *session = file->private_data;
+
+	pr_debug("ibmvmc: ioctl file=0x%lx, cmd=0x%x, arg=0x%lx, ses=0x%lx\n",
+		 (unsigned long)file, cmd, arg,
+		 (unsigned long)session);
+
+	if (!session) {
+		pr_warn("ibmvmc: ioctl: no session\n");
+		return -EIO;
+	}
+
+	switch (cmd) {
+	case VMC_IOCTL_SETHMCID:
+		return ibmvmc_ioctl_sethmcid(session,
+			(unsigned char __user *)arg);
+	case VMC_IOCTL_QUERY:
+		return ibmvmc_ioctl_query(session,
+			(struct ibmvmc_query_struct __user *)arg);
+	case VMC_IOCTL_REQUESTVMC:
+		return ibmvmc_ioctl_requestvmc(session,
+			(unsigned int __user *)arg);
+	default:
+		pr_warn("ibmvmc: unknown ioctl 0x%x\n", cmd);
+		return -EINVAL;
+	}
+}
+
+static const struct file_operations ibmvmc_fops = {
+	.owner		= THIS_MODULE,
+	.read		= ibmvmc_read,
+	.write		= ibmvmc_write,
+	.poll		= ibmvmc_poll,
+	.unlocked_ioctl	= ibmvmc_ioctl,
+	.open           = ibmvmc_open,
+	.release        = ibmvmc_close,
+};
+
+/**
+ * ibmvmc_add_buffer - Add Buffer
+ *
+ * @adapter: crq_server_adapter struct
+ * @crq:	ibmvmc_crq_msg struct
+ *
+ * This message transfers a buffer from hypervisor ownership to management
+ * partition ownership. The LIOBA is obtained from the virtual TCE table
+ * associated with the hypervisor side of the VMC device, and points to a
+ * buffer of size MTU (as established in the capabilities exchange).
+ *
+ * Typical flow for ading buffers:
+ * 1. A new management application connection is opened by the management
+ *	partition.
+ * 2. The hypervisor assigns new buffers for the traffic associated with
+ *	that connection.
+ * 3. The hypervisor sends VMC Add Buffer messages to the management
+ *	partition, informing it of the new buffers.
+ * 4. The hypervisor sends an HMC protocol message (to the management
+ *	application) notifying it of the new buffers. This informs the
+ *	application that it has buffers available for sending HMC
+ *	commands.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_add_buffer(struct crq_server_adapter *adapter,
+			     struct ibmvmc_crq_msg *crq)
+{
+	struct ibmvmc_buffer *buffer;
+	u8 hmc_index;
+	u8 hmc_session;
+	u16 buffer_id;
+	unsigned long flags;
+	int rc = 0;
+
+	if (!crq)
+		return -1;
+
+	hmc_session = crq->hmc_session;
+	hmc_index = crq->hmc_index;
+	buffer_id = be16_to_cpu(crq->var2.buffer_id);
+
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		dev_err(adapter->dev, "add_buffer: invalid hmc_index = 0x%x\n",
+			hmc_index);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_HMC_INDEX,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	if (buffer_id >= ibmvmc.max_buffer_pool_size) {
+		dev_err(adapter->dev, "add_buffer: invalid buffer_id = 0x%x\n",
+			buffer_id);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_BUFFER_ID,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	spin_lock_irqsave(&hmcs[hmc_index].lock, flags);
+	buffer = &hmcs[hmc_index].buffer[buffer_id];
+
+	if (buffer->real_addr_local || buffer->dma_addr_local) {
+		dev_warn(adapter->dev, "add_buffer: already allocated id = 0x%lx\n",
+			 (unsigned long)buffer_id);
+		spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_BUFFER_ID,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	buffer->real_addr_local = alloc_dma_buffer(to_vio_dev(adapter->dev),
+						   ibmvmc.max_mtu,
+						   &buffer->dma_addr_local);
+
+	if (!buffer->real_addr_local) {
+		dev_err(adapter->dev, "add_buffer: alloc_dma_buffer failed.\n");
+		spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INTERFACE_FAILURE,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	buffer->dma_addr_remote = be32_to_cpu(crq->var3.lioba);
+	buffer->size = ibmvmc.max_mtu;
+	buffer->owner = crq->var1.owner;
+	buffer->free = 1;
+	/* Must ensure valid==1 is observable only after all other fields are */
+	dma_wmb();
+	buffer->valid = 1;
+	buffer->id = buffer_id;
+
+	dev_dbg(adapter->dev, "add_buffer: successfully added a buffer:\n");
+	dev_dbg(adapter->dev, "   index: %d, session: %d, buffer: 0x%x, owner: %d\n",
+		hmc_index, hmc_session, buffer_id, buffer->owner);
+	dev_dbg(adapter->dev, "   local: 0x%x, remote: 0x%x\n",
+		(u32)buffer->dma_addr_local,
+		(u32)buffer->dma_addr_remote);
+	spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+
+	ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_SUCCESS, hmc_session,
+				    hmc_index, buffer_id);
+
+	return rc;
+}
+
+/**
+ * ibmvmc_rem_buffer - Remove Buffer
+ *
+ * @adapter: crq_server_adapter struct
+ * @crq:	ibmvmc_crq_msg struct
+ *
+ * This message requests an HMC buffer to be transferred from management
+ * partition ownership to hypervisor ownership. The management partition may
+ * not be able to satisfy the request at a particular point in time if all its
+ * buffers are in use. The management partition requires a depth of at least
+ * one inbound buffer to allow management application commands to flow to the
+ * hypervisor. It is, therefore, an interface error for the hypervisor to
+ * attempt to remove the management partition's last buffer.
+ *
+ * The hypervisor is expected to manage buffer usage with the management
+ * application directly and inform the management partition when buffers may be
+ * removed. The typical flow for removing buffers:
+ *
+ * 1. The management application no longer needs a communication path to a
+ *	particular hypervisor function. That function is closed.
+ * 2. The hypervisor and the management application quiesce all traffic to that
+ *	function. The hypervisor requests a reduction in buffer pool size.
+ * 3. The management application acknowledges the reduction in buffer pool size.
+ * 4. The hypervisor sends a Remove Buffer message to the management partition,
+ *	informing it of the reduction in buffers.
+ * 5. The management partition verifies it can remove the buffer. This is
+ *	possible if buffers have been quiesced.
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+/*
+ * The hypervisor requested that we pick an unused buffer, and return it.
+ * Before sending the buffer back, we free any storage associated with the
+ * buffer.
+ */
+static int ibmvmc_rem_buffer(struct crq_server_adapter *adapter,
+			     struct ibmvmc_crq_msg *crq)
+{
+	struct ibmvmc_buffer *buffer;
+	u8 hmc_index;
+	u8 hmc_session;
+	u16 buffer_id = 0;
+	unsigned long flags;
+	int rc = 0;
+
+	if (!crq)
+		return -1;
+
+	hmc_session = crq->hmc_session;
+	hmc_index = crq->hmc_index;
+
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		dev_warn(adapter->dev, "rem_buffer: invalid hmc_index = 0x%x\n",
+			 hmc_index);
+		ibmvmc_send_rem_buffer_resp(adapter, VMC_MSG_INVALID_HMC_INDEX,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	spin_lock_irqsave(&hmcs[hmc_index].lock, flags);
+	buffer = ibmvmc_get_free_hmc_buffer(adapter, hmc_index);
+	if (!buffer) {
+		dev_info(adapter->dev, "rem_buffer: no buffer to remove\n");
+		spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+		ibmvmc_send_rem_buffer_resp(adapter, VMC_MSG_NO_BUFFER,
+					    hmc_session, hmc_index,
+					    VMC_INVALID_BUFFER_ID);
+		return -1;
+	}
+
+	buffer_id = buffer->id;
+
+	if (buffer->valid)
+		free_dma_buffer(to_vio_dev(adapter->dev),
+				ibmvmc.max_mtu,
+				buffer->real_addr_local,
+				buffer->dma_addr_local);
+
+	memset(buffer, 0, sizeof(struct ibmvmc_buffer));
+	spin_unlock_irqrestore(&hmcs[hmc_index].lock, flags);
+
+	dev_dbg(adapter->dev, "rem_buffer: removed buffer 0x%x.\n", buffer_id);
+	ibmvmc_send_rem_buffer_resp(adapter, VMC_MSG_SUCCESS, hmc_session,
+				    hmc_index, buffer_id);
+
+	return rc;
+}
+
+static int ibmvmc_recv_msg(struct crq_server_adapter *adapter,
+			   struct ibmvmc_crq_msg *crq)
+{
+	struct ibmvmc_buffer *buffer;
+	struct ibmvmc_hmc *hmc;
+	unsigned long msg_len;
+	u8 hmc_index;
+	u8 hmc_session;
+	u16 buffer_id;
+	unsigned long flags;
+	int rc = 0;
+
+	if (!crq)
+		return -1;
+
+	/* Hypervisor writes CRQs directly into our memory in big endian */
+	dev_dbg(adapter->dev, "Recv_msg: msg from HV 0x%016llx 0x%016llx\n",
+		be64_to_cpu(*((unsigned long *)crq)),
+		be64_to_cpu(*(((unsigned long *)crq) + 1)));
+
+	hmc_session = crq->hmc_session;
+	hmc_index = crq->hmc_index;
+	buffer_id = be16_to_cpu(crq->var2.buffer_id);
+	msg_len = be32_to_cpu(crq->var3.msg_len);
+
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		dev_err(adapter->dev, "Recv_msg: invalid hmc_index = 0x%x\n",
+			hmc_index);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_HMC_INDEX,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	if (buffer_id >= ibmvmc.max_buffer_pool_size) {
+		dev_err(adapter->dev, "Recv_msg: invalid buffer_id = 0x%x\n",
+			buffer_id);
+		ibmvmc_send_add_buffer_resp(adapter, VMC_MSG_INVALID_BUFFER_ID,
+					    hmc_session, hmc_index, buffer_id);
+		return -1;
+	}
+
+	hmc = &hmcs[hmc_index];
+	spin_lock_irqsave(&hmc->lock, flags);
+
+	if (hmc->state == ibmhmc_state_free) {
+		dev_err(adapter->dev, "Recv_msg: invalid hmc state = 0x%x\n",
+			hmc->state);
+		/* HMC connection is not valid (possibly was reset under us). */
+		spin_unlock_irqrestore(&hmc->lock, flags);
+		return -1;
+	}
+
+	buffer = &hmc->buffer[buffer_id];
+
+	if (buffer->valid == 0 || buffer->owner == VMC_BUF_OWNER_ALPHA) {
+		dev_err(adapter->dev, "Recv_msg: not valid, or not HV.  0x%x 0x%x\n",
+			buffer->valid, buffer->owner);
+		spin_unlock_irqrestore(&hmc->lock, flags);
+		return -1;
+	}
+
+	/* RDMA the data into the partition. */
+	rc = h_copy_rdma(msg_len,
+			 adapter->riobn,
+			 buffer->dma_addr_remote,
+			 adapter->liobn,
+			 buffer->dma_addr_local);
+
+	dev_dbg(adapter->dev, "Recv_msg: msg_len = 0x%x, buffer_id = 0x%x, queue_head = 0x%x, hmc_idx = 0x%x\n",
+		(unsigned int)msg_len, (unsigned int)buffer_id,
+		(unsigned int)hmc->queue_head, (unsigned int)hmc_index);
+	buffer->msg_len = msg_len;
+	buffer->free = 0;
+	buffer->owner = VMC_BUF_OWNER_ALPHA;
+
+	if (rc) {
+		dev_err(adapter->dev, "Failure in recv_msg: h_copy_rdma = 0x%x\n",
+			rc);
+		spin_unlock_irqrestore(&hmc->lock, flags);
+		return -1;
+	}
+
+	/* Must be locked because read operates on the same data */
+	hmc->queue_outbound_msgs[hmc->queue_head] = buffer_id;
+	hmc->queue_head++;
+	if (hmc->queue_head == ibmvmc_max_buf_pool_size)
+		hmc->queue_head = 0;
+
+	if (hmc->queue_head == hmc->queue_tail)
+		dev_err(adapter->dev, "outbound buffer queue wrapped.\n");
+
+	spin_unlock_irqrestore(&hmc->lock, flags);
+
+	wake_up_interruptible(&ibmvmc_read_wait);
+
+	return 0;
+}
+
+/**
+ * ibmvmc_process_capabilities - Process Capabilities
+ *
+ * @adapter:	crq_server_adapter struct
+ * @crqp:	ibmvmc_crq_msg struct
+ *
+ */
+static void ibmvmc_process_capabilities(struct crq_server_adapter *adapter,
+					struct ibmvmc_crq_msg *crqp)
+{
+	struct ibmvmc_admin_crq_msg *crq = (struct ibmvmc_admin_crq_msg *)crqp;
+
+	if ((be16_to_cpu(crq->version) >> 8) !=
+			(IBMVMC_PROTOCOL_VERSION >> 8)) {
+		dev_err(adapter->dev, "init failed, incompatible versions 0x%x 0x%x\n",
+			be16_to_cpu(crq->version),
+			IBMVMC_PROTOCOL_VERSION);
+		ibmvmc.state = ibmvmc_state_failed;
+		return;
+	}
+
+	ibmvmc.max_mtu = min_t(u32, ibmvmc_max_mtu, be32_to_cpu(crq->max_mtu));
+	ibmvmc.max_buffer_pool_size = min_t(u16, ibmvmc_max_buf_pool_size,
+					    be16_to_cpu(crq->pool_size));
+	ibmvmc.max_hmc_index = min_t(u8, ibmvmc_max_hmcs, crq->max_hmc) - 1;
+	ibmvmc.state = ibmvmc_state_ready;
+
+	dev_info(adapter->dev, "Capabilities: mtu=0x%x, pool_size=0x%x, max_hmc=0x%x\n",
+		 ibmvmc.max_mtu, ibmvmc.max_buffer_pool_size,
+		 ibmvmc.max_hmc_index);
+}
+
+/**
+ * ibmvmc_validate_hmc_session - Validate HMC Session
+ *
+ * @adapter:	crq_server_adapter struct
+ * @crq:	ibmvmc_crq_msg struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_validate_hmc_session(struct crq_server_adapter *adapter,
+				       struct ibmvmc_crq_msg *crq)
+{
+	unsigned char hmc_index;
+
+	hmc_index = crq->hmc_index;
+
+	if (crq->hmc_session == 0)
+		return 0;
+
+	if (hmc_index > ibmvmc.max_hmc_index)
+		return -1;
+
+	if (hmcs[hmc_index].session != crq->hmc_session) {
+		dev_warn(adapter->dev, "Drop, bad session: expected 0x%x, recv 0x%x\n",
+			 hmcs[hmc_index].session, crq->hmc_session);
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * ibmvmc_reset - Reset
+ *
+ * @adapter:	crq_server_adapter struct
+ * @xport_event:	export_event field
+ *
+ * Closes all HMC sessions and conditionally schedules a CRQ reset.
+ * @xport_event: If true, the partner closed their CRQ; we don't need to reset.
+ *               If false, we need to schedule a CRQ reset.
+ */
+static void ibmvmc_reset(struct crq_server_adapter *adapter, bool xport_event)
+{
+	int i;
+
+	if (ibmvmc.state != ibmvmc_state_sched_reset) {
+		dev_info(adapter->dev, "*** Reset to initial state.\n");
+		for (i = 0; i < ibmvmc_max_hmcs; i++)
+			ibmvmc_return_hmc(&hmcs[i], xport_event);
+
+		if (xport_event) {
+			/* CRQ was closed by the partner.  We don't need to do
+			 * anything except set ourself to the correct state to
+			 * handle init msgs.
+			 */
+			ibmvmc.state = ibmvmc_state_crqinit;
+		} else {
+			/* The partner did not close their CRQ - instead, we're
+			 * closing the CRQ on our end. Need to schedule this
+			 * for process context, because CRQ reset may require a
+			 * sleep.
+			 *
+			 * Setting ibmvmc.state here immediately prevents
+			 * ibmvmc_open from completing until the reset
+			 * completes in process context.
+			 */
+			ibmvmc.state = ibmvmc_state_sched_reset;
+			dev_dbg(adapter->dev, "Device reset scheduled");
+			wake_up_interruptible(&adapter->reset_wait_queue);
+		}
+	}
+}
+
+/**
+ * ibmvmc_reset_task - Reset Task
+ *
+ * @data:	Data field
+ *
+ * Performs a CRQ reset of the VMC device in process context.
+ * NOTE: This function should not be called directly, use ibmvmc_reset.
+ */
+static int ibmvmc_reset_task(void *data)
+{
+	struct crq_server_adapter *adapter = data;
+	int rc;
+
+	set_user_nice(current, -20);
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(adapter->reset_wait_queue,
+			(ibmvmc.state == ibmvmc_state_sched_reset) ||
+			kthread_should_stop());
+
+		if (kthread_should_stop())
+			break;
+
+		dev_dbg(adapter->dev, "CRQ resetting in process context");
+		tasklet_disable(&adapter->work_task);
+
+		rc = ibmvmc_reset_crq_queue(adapter);
+
+		if (rc != H_SUCCESS && rc != H_RESOURCE) {
+			dev_err(adapter->dev, "Error initializing CRQ.  rc = 0x%x\n",
+				rc);
+			ibmvmc.state = ibmvmc_state_failed;
+		} else {
+			ibmvmc.state = ibmvmc_state_crqinit;
+
+			if (ibmvmc_send_crq(adapter, 0xC001000000000000LL, 0)
+			    != 0 && rc != H_RESOURCE)
+				dev_warn(adapter->dev, "Failed to send initialize CRQ message\n");
+		}
+
+		vio_enable_interrupts(to_vio_dev(adapter->dev));
+		tasklet_enable(&adapter->work_task);
+	}
+
+	return 0;
+}
+
+/**
+ * ibmvmc_process_open_resp - Process Open Response
+ *
+ * @crq: ibmvmc_crq_msg struct
+ * @adapter:    crq_server_adapter struct
+ *
+ * This command is sent by the hypervisor in response to the Interface
+ * Open message. When this message is received, the indicated buffer is
+ * again available for management partition use.
+ */
+static void ibmvmc_process_open_resp(struct ibmvmc_crq_msg *crq,
+				     struct crq_server_adapter *adapter)
+{
+	unsigned char hmc_index;
+	unsigned short buffer_id;
+
+	hmc_index = crq->hmc_index;
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		/* Why would PHYP give an index > max negotiated? */
+		ibmvmc_reset(adapter, false);
+		return;
+	}
+
+	if (crq->status) {
+		dev_warn(adapter->dev, "open_resp: failed - status 0x%x\n",
+			 crq->status);
+		ibmvmc_return_hmc(&hmcs[hmc_index], false);
+		return;
+	}
+
+	if (hmcs[hmc_index].state == ibmhmc_state_opening) {
+		buffer_id = be16_to_cpu(crq->var2.buffer_id);
+		if (buffer_id >= ibmvmc.max_buffer_pool_size) {
+			dev_err(adapter->dev, "open_resp: invalid buffer_id = 0x%x\n",
+				buffer_id);
+			hmcs[hmc_index].state = ibmhmc_state_failed;
+		} else {
+			ibmvmc_free_hmc_buffer(&hmcs[hmc_index],
+					       &hmcs[hmc_index].buffer[buffer_id]);
+			hmcs[hmc_index].state = ibmhmc_state_ready;
+			dev_dbg(adapter->dev, "open_resp: set hmc state = ready\n");
+		}
+	} else {
+		dev_warn(adapter->dev, "open_resp: invalid hmc state (0x%x)\n",
+			 hmcs[hmc_index].state);
+	}
+}
+
+/**
+ * ibmvmc_process_close_resp - Process Close Response
+ *
+ * @crq: ibmvmc_crq_msg struct
+ * @adapter:    crq_server_adapter struct
+ *
+ * This command is sent by the hypervisor in response to the managemant
+ * application Interface Close message.
+ *
+ * If the close fails, simply reset the entire driver as the state of the VMC
+ * must be in tough shape.
+ */
+static void ibmvmc_process_close_resp(struct ibmvmc_crq_msg *crq,
+				      struct crq_server_adapter *adapter)
+{
+	unsigned char hmc_index;
+
+	hmc_index = crq->hmc_index;
+	if (hmc_index > ibmvmc.max_hmc_index) {
+		ibmvmc_reset(adapter, false);
+		return;
+	}
+
+	if (crq->status) {
+		dev_warn(adapter->dev, "close_resp: failed - status 0x%x\n",
+			 crq->status);
+		ibmvmc_reset(adapter, false);
+		return;
+	}
+
+	ibmvmc_return_hmc(&hmcs[hmc_index], false);
+}
+
+/**
+ * ibmvmc_crq_process - Process CRQ
+ *
+ * @adapter:    crq_server_adapter struct
+ * @crq:	ibmvmc_crq_msg struct
+ *
+ * Process the CRQ message based upon the type of message received.
+ *
+ */
+static void ibmvmc_crq_process(struct crq_server_adapter *adapter,
+			       struct ibmvmc_crq_msg *crq)
+{
+	switch (crq->type) {
+	case VMC_MSG_CAP_RESP:
+		dev_dbg(adapter->dev, "CRQ recv: capabilities resp (0x%x)\n",
+			crq->type);
+		if (ibmvmc.state == ibmvmc_state_capabilities)
+			ibmvmc_process_capabilities(adapter, crq);
+		else
+			dev_warn(adapter->dev, "caps msg invalid in state 0x%x\n",
+				 ibmvmc.state);
+		break;
+	case VMC_MSG_OPEN_RESP:
+		dev_dbg(adapter->dev, "CRQ recv: open resp (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_process_open_resp(crq, adapter);
+		break;
+	case VMC_MSG_ADD_BUF:
+		dev_dbg(adapter->dev, "CRQ recv: add buf (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_add_buffer(adapter, crq);
+		break;
+	case VMC_MSG_REM_BUF:
+		dev_dbg(adapter->dev, "CRQ recv: rem buf (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_rem_buffer(adapter, crq);
+		break;
+	case VMC_MSG_SIGNAL:
+		dev_dbg(adapter->dev, "CRQ recv: signal msg (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_recv_msg(adapter, crq);
+		break;
+	case VMC_MSG_CLOSE_RESP:
+		dev_dbg(adapter->dev, "CRQ recv: close resp (0x%x)\n",
+			crq->type);
+		if (ibmvmc_validate_hmc_session(adapter, crq) == 0)
+			ibmvmc_process_close_resp(crq, adapter);
+		break;
+	case VMC_MSG_CAP:
+	case VMC_MSG_OPEN:
+	case VMC_MSG_CLOSE:
+	case VMC_MSG_ADD_BUF_RESP:
+	case VMC_MSG_REM_BUF_RESP:
+		dev_warn(adapter->dev, "CRQ recv: unexpected msg (0x%x)\n",
+			 crq->type);
+		break;
+	default:
+		dev_warn(adapter->dev, "CRQ recv: unknown msg (0x%x)\n",
+			 crq->type);
+		break;
+	}
+}
+
+/**
+ * ibmvmc_handle_crq_init - Handle CRQ Init
+ *
+ * @crq:	ibmvmc_crq_msg struct
+ * @adapter:	crq_server_adapter struct
+ *
+ * Handle the type of crq initialization based on whether
+ * it is a message or a response.
+ *
+ */
+static void ibmvmc_handle_crq_init(struct ibmvmc_crq_msg *crq,
+				   struct crq_server_adapter *adapter)
+{
+	switch (crq->type) {
+	case 0x01:	/* Initialization message */
+		dev_dbg(adapter->dev, "CRQ recv: CRQ init msg - state 0x%x\n",
+			ibmvmc.state);
+		if (ibmvmc.state == ibmvmc_state_crqinit) {
+			/* Send back a response */
+			if (ibmvmc_send_crq(adapter, 0xC002000000000000,
+					    0) == 0)
+				ibmvmc_send_capabilities(adapter);
+			else
+				dev_err(adapter->dev, " Unable to send init rsp\n");
+		} else {
+			dev_err(adapter->dev, "Invalid state 0x%x mtu = 0x%x\n",
+				ibmvmc.state, ibmvmc.max_mtu);
+		}
+
+		break;
+	case 0x02:	/* Initialization response */
+		dev_dbg(adapter->dev, "CRQ recv: initialization resp msg - state 0x%x\n",
+			ibmvmc.state);
+		if (ibmvmc.state == ibmvmc_state_crqinit)
+			ibmvmc_send_capabilities(adapter);
+		break;
+	default:
+		dev_warn(adapter->dev, "Unknown crq message type 0x%lx\n",
+			 (unsigned long)crq->type);
+	}
+}
+
+/**
+ * ibmvmc_handle_crq - Handle CRQ
+ *
+ * @crq:	ibmvmc_crq_msg struct
+ * @adapter:	crq_server_adapter struct
+ *
+ * Read the command elements from the command queue and execute the
+ * requests based upon the type of crq message.
+ *
+ */
+static void ibmvmc_handle_crq(struct ibmvmc_crq_msg *crq,
+			      struct crq_server_adapter *adapter)
+{
+	switch (crq->valid) {
+	case 0xC0:		/* initialization */
+		ibmvmc_handle_crq_init(crq, adapter);
+		break;
+	case 0xFF:	/* Hypervisor telling us the connection is closed */
+		dev_warn(adapter->dev, "CRQ recv: virtual adapter failed - resetting.\n");
+		ibmvmc_reset(adapter, true);
+		break;
+	case 0x80:	/* real payload */
+		ibmvmc_crq_process(adapter, crq);
+		break;
+	default:
+		dev_warn(adapter->dev, "CRQ recv: unknown msg 0x%02x.\n",
+			 crq->valid);
+		break;
+	}
+}
+
+static void ibmvmc_task(unsigned long data)
+{
+	struct crq_server_adapter *adapter =
+		(struct crq_server_adapter *)data;
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	struct ibmvmc_crq_msg *crq;
+	int done = 0;
+
+	while (!done) {
+		/* Pull all the valid messages off the CRQ */
+		while ((crq = crq_queue_next_crq(&adapter->queue)) != NULL) {
+			ibmvmc_handle_crq(crq, adapter);
+			crq->valid = 0x00;
+			/* CRQ reset was requested, stop processing CRQs.
+			 * Interrupts will be re-enabled by the reset task.
+			 */
+			if (ibmvmc.state == ibmvmc_state_sched_reset)
+				return;
+		}
+
+		vio_enable_interrupts(vdev);
+		crq = crq_queue_next_crq(&adapter->queue);
+		if (crq) {
+			vio_disable_interrupts(vdev);
+			ibmvmc_handle_crq(crq, adapter);
+			crq->valid = 0x00;
+			/* CRQ reset was requested, stop processing CRQs.
+			 * Interrupts will be re-enabled by the reset task.
+			 */
+			if (ibmvmc.state == ibmvmc_state_sched_reset)
+				return;
+		} else {
+			done = 1;
+		}
+	}
+}
+
+/**
+ * ibmvmc_init_crq_queue - Init CRQ Queue
+ *
+ * @adapter:	crq_server_adapter struct
+ *
+ * Return:
+ *	0 - Success
+ *	Non-zero - Failure
+ */
+static int ibmvmc_init_crq_queue(struct crq_server_adapter *adapter)
+{
+	struct vio_dev *vdev = to_vio_dev(adapter->dev);
+	struct crq_queue *queue = &adapter->queue;
+	int rc = 0;
+	int retrc = 0;
+
+	queue->msgs = (struct ibmvmc_crq_msg *)get_zeroed_page(GFP_KERNEL);
+
+	if (!queue->msgs)
+		goto malloc_failed;
+
+	queue->size = PAGE_SIZE / sizeof(*queue->msgs);
+
+	queue->msg_token = dma_map_single(adapter->dev, queue->msgs,
+					  queue->size * sizeof(*queue->msgs),
+					  DMA_BIDIRECTIONAL);
+
+	if (dma_mapping_error(adapter->dev, queue->msg_token))
+		goto map_failed;
+
+	retrc = plpar_hcall_norets(H_REG_CRQ,
+				   vdev->unit_address,
+				   queue->msg_token, PAGE_SIZE);
+	retrc = rc;
+
+	if (rc == H_RESOURCE)
+		rc = ibmvmc_reset_crq_queue(adapter);
+
+	if (rc == 2) {
+		dev_warn(adapter->dev, "Partner adapter not ready\n");
+		retrc = 0;
+	} else if (rc != 0) {
+		dev_err(adapter->dev, "Error %d opening adapter\n", rc);
+		goto reg_crq_failed;
+	}
+
+	queue->cur = 0;
+	spin_lock_init(&queue->lock);
+
+	tasklet_init(&adapter->work_task, ibmvmc_task, (unsigned long)adapter);
+
+	if (request_irq(vdev->irq,
+			ibmvmc_handle_event,
+			0, "ibmvmc", (void *)adapter) != 0) {
+		dev_err(adapter->dev, "couldn't register irq 0x%x\n",
+			vdev->irq);
+		goto req_irq_failed;
+	}
+
+	rc = vio_enable_interrupts(vdev);
+	if (rc != 0) {
+		dev_err(adapter->dev, "Error %d enabling interrupts!!!\n", rc);
+		goto req_irq_failed;
+	}
+
+	return retrc;
+
+req_irq_failed:
+	/* Cannot have any work since we either never got our IRQ registered,
+	 * or never got interrupts enabled
+	 */
+	tasklet_kill(&adapter->work_task);
+	h_free_crq(vdev->unit_address);
+reg_crq_failed:
+	dma_unmap_single(adapter->dev,
+			 queue->msg_token,
+			 queue->size * sizeof(*queue->msgs), DMA_BIDIRECTIONAL);
+map_failed:
+	free_page((unsigned long)queue->msgs);
+malloc_failed:
+	return -ENOMEM;
+}
+
+/* Fill in the liobn and riobn fields on the adapter */
+static int read_dma_window(struct vio_dev *vdev,
+			   struct crq_server_adapter *adapter)
+{
+	const __be32 *dma_window;
+	const __be32 *prop;
+
+	/* TODO Using of_parse_dma_window would be better, but it doesn't give
+	 * a way to read multiple windows without already knowing the size of
+	 * a window or the number of windows
+	 */
+	dma_window =
+		(const __be32 *)vio_get_attribute(vdev, "ibm,my-dma-window",
+						NULL);
+	if (!dma_window) {
+		dev_warn(adapter->dev, "Couldn't find ibm,my-dma-window property\n");
+		return -1;
+	}
+
+	adapter->liobn = be32_to_cpu(*dma_window);
+	dma_window++;
+
+	prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-address-cells",
+						NULL);
+	if (!prop) {
+		dev_warn(adapter->dev, "Couldn't find ibm,#dma-address-cells property\n");
+		dma_window++;
+	} else {
+		dma_window += be32_to_cpu(*prop);
+	}
+
+	prop = (const __be32 *)vio_get_attribute(vdev, "ibm,#dma-size-cells",
+						NULL);
+	if (!prop) {
+		dev_warn(adapter->dev, "Couldn't find ibm,#dma-size-cells property\n");
+		dma_window++;
+	} else {
+		dma_window += be32_to_cpu(*prop);
+	}
+
+	/* dma_window should point to the second window now */
+	adapter->riobn = be32_to_cpu(*dma_window);
+
+	return 0;
+}
+
+static int ibmvmc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+{
+	struct crq_server_adapter *adapter = &ibmvmc_adapter;
+	int rc;
+
+	dev_set_drvdata(&vdev->dev, NULL);
+	memset(adapter, 0, sizeof(*adapter));
+	adapter->dev = &vdev->dev;
+
+	dev_info(adapter->dev, "Probe for UA 0x%x\n", vdev->unit_address);
+
+	rc = read_dma_window(vdev, adapter);
+	if (rc != 0) {
+		ibmvmc.state = ibmvmc_state_failed;
+		return -1;
+	}
+
+	dev_dbg(adapter->dev, "Probe: liobn 0x%x, riobn 0x%x\n",
+		adapter->liobn, adapter->riobn);
+
+	init_waitqueue_head(&adapter->reset_wait_queue);
+	adapter->reset_task = kthread_run(ibmvmc_reset_task, adapter, "ibmvmc");
+	if (IS_ERR(adapter->reset_task)) {
+		dev_err(adapter->dev, "Failed to start reset thread\n");
+		ibmvmc.state = ibmvmc_state_failed;
+		rc = PTR_ERR(adapter->reset_task);
+		adapter->reset_task = NULL;
+		return rc;
+	}
+
+	rc = ibmvmc_init_crq_queue(adapter);
+	if (rc != 0 && rc != H_RESOURCE) {
+		dev_err(adapter->dev, "Error initializing CRQ.  rc = 0x%x\n",
+			rc);
+		ibmvmc.state = ibmvmc_state_failed;
+		goto crq_failed;
+	}
+
+	ibmvmc.state = ibmvmc_state_crqinit;
+
+	/* Try to send an initialization message.  Note that this is allowed
+	 * to fail if the other end is not acive.  In that case we just wait
+	 * for the other side to initialize.
+	 */
+	if (ibmvmc_send_crq(adapter, 0xC001000000000000LL, 0) != 0 &&
+	    rc != H_RESOURCE)
+		dev_warn(adapter->dev, "Failed to send initialize CRQ message\n");
+
+	dev_set_drvdata(&vdev->dev, adapter);
+
+	return 0;
+
+crq_failed:
+	kthread_stop(adapter->reset_task);
+	adapter->reset_task = NULL;
+	return -EPERM;
+}
+
+static int ibmvmc_remove(struct vio_dev *vdev)
+{
+	struct crq_server_adapter *adapter = dev_get_drvdata(&vdev->dev);
+
+	dev_info(adapter->dev, "Entering remove for UA 0x%x\n",
+		 vdev->unit_address);
+	ibmvmc_release_crq_queue(adapter);
+
+	return 0;
+}
+
+static struct vio_device_id ibmvmc_device_table[] = {
+	{ "ibm,vmc", "IBM,vmc" },
+	{ "", "" }
+};
+MODULE_DEVICE_TABLE(vio, ibmvmc_device_table);
+
+static struct vio_driver ibmvmc_driver = {
+	.name        = ibmvmc_driver_name,
+	.id_table    = ibmvmc_device_table,
+	.probe       = ibmvmc_probe,
+	.remove      = ibmvmc_remove,
+};
+
+static void __init ibmvmc_scrub_module_parms(void)
+{
+	if (ibmvmc_max_mtu > MAX_MTU) {
+		pr_warn("ibmvmc: Max MTU reduced to %d\n", MAX_MTU);
+		ibmvmc_max_mtu = MAX_MTU;
+	} else if (ibmvmc_max_mtu < MIN_MTU) {
+		pr_warn("ibmvmc: Max MTU increased to %d\n", MIN_MTU);
+		ibmvmc_max_mtu = MIN_MTU;
+	}
+
+	if (ibmvmc_max_buf_pool_size > MAX_BUF_POOL_SIZE) {
+		pr_warn("ibmvmc: Max buffer pool size reduced to %d\n",
+			MAX_BUF_POOL_SIZE);
+		ibmvmc_max_buf_pool_size = MAX_BUF_POOL_SIZE;
+	} else if (ibmvmc_max_buf_pool_size < MIN_BUF_POOL_SIZE) {
+		pr_warn("ibmvmc: Max buffer pool size increased to %d\n",
+			MIN_BUF_POOL_SIZE);
+		ibmvmc_max_buf_pool_size = MIN_BUF_POOL_SIZE;
+	}
+
+	if (ibmvmc_max_hmcs > MAX_HMCS) {
+		pr_warn("ibmvmc: Max HMCs reduced to %d\n", MAX_HMCS);
+		ibmvmc_max_hmcs = MAX_HMCS;
+	} else if (ibmvmc_max_hmcs < MIN_HMCS) {
+		pr_warn("ibmvmc: Max HMCs increased to %d\n", MIN_HMCS);
+		ibmvmc_max_hmcs = MIN_HMCS;
+	}
+}
+
+static struct miscdevice ibmvmc_miscdev = {
+	.name = ibmvmc_driver_name,
+	.minor = MISC_DYNAMIC_MINOR,
+	.fops = &ibmvmc_fops,
+};
+
+static int __init ibmvmc_module_init(void)
+{
+	int rc, i, j;
+
+	ibmvmc.state = ibmvmc_state_initial;
+	pr_info("ibmvmc: version %s\n", IBMVMC_DRIVER_VERSION);
+
+	rc = misc_register(&ibmvmc_miscdev);
+	if (rc) {
+		pr_err("ibmvmc: misc registration failed\n");
+		goto misc_register_failed;
+	}
+	pr_info("ibmvmc: node %d:%d\n", MISC_MAJOR,
+		ibmvmc_miscdev.minor);
+
+	/* Initialize data structures */
+	memset(hmcs, 0, sizeof(struct ibmvmc_hmc) * MAX_HMCS);
+	for (i = 0; i < MAX_HMCS; i++) {
+		spin_lock_init(&hmcs[i].lock);
+		hmcs[i].state = ibmhmc_state_free;
+		for (j = 0; j < MAX_BUF_POOL_SIZE; j++)
+			hmcs[i].queue_outbound_msgs[j] = VMC_INVALID_BUFFER_ID;
+	}
+
+	/* Sanity check module parms */
+	ibmvmc_scrub_module_parms();
+
+	/*
+	 * Initialize some reasonable values.  Might be negotiated smaller
+	 * values during the capabilities exchange.
+	 */
+	ibmvmc.max_mtu = ibmvmc_max_mtu;
+	ibmvmc.max_buffer_pool_size = ibmvmc_max_buf_pool_size;
+	ibmvmc.max_hmc_index = ibmvmc_max_hmcs - 1;
+
+	rc = vio_register_driver(&ibmvmc_driver);
+
+	if (rc) {
+		pr_err("ibmvmc: rc %d from vio_register_driver\n", rc);
+		goto vio_reg_failed;
+	}
+
+	return 0;
+
+vio_reg_failed:
+	misc_deregister(&ibmvmc_miscdev);
+misc_register_failed:
+	return rc;
+}
+
+static void __exit ibmvmc_module_exit(void)
+{
+	pr_info("ibmvmc: module exit\n");
+	vio_unregister_driver(&ibmvmc_driver);
+	misc_deregister(&ibmvmc_miscdev);
+}
+
+module_init(ibmvmc_module_init);
+module_exit(ibmvmc_module_exit);
+
+module_param_named(buf_pool_size, ibmvmc_max_buf_pool_size,
+		   int, 0644);
+MODULE_PARM_DESC(buf_pool_size, "Buffer pool size");
+module_param_named(max_hmcs, ibmvmc_max_hmcs, int, 0644);
+MODULE_PARM_DESC(max_hmcs, "Max HMCs");
+module_param_named(max_mtu, ibmvmc_max_mtu, int, 0644);
+MODULE_PARM_DESC(max_mtu, "Max MTU");
+
+MODULE_AUTHOR("Steven Royer <seroyer@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM VMC");
+MODULE_VERSION(IBMVMC_DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/ibmvmc.h b/drivers/misc/ibmvmc.h
new file mode 100644
index 0000000..e140ada
--- /dev/null
+++ b/drivers/misc/ibmvmc.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0+
+ *
+ * linux/drivers/misc/ibmvmc.h
+ *
+ * IBM Power Systems Virtual Management Channel Support.
+ *
+ * Copyright (c) 2004, 2018 IBM Corp.
+ *   Dave Engebretsen engebret@us.ibm.com
+ *   Steven Royer seroyer@linux.vnet.ibm.com
+ *   Adam Reznechek adreznec@linux.vnet.ibm.com
+ *   Bryant G. Ly <bryantly@linux.vnet.ibm.com>
+ */
+#ifndef IBMVMC_H
+#define IBMVMC_H
+
+#include <linux/types.h>
+#include <linux/cdev.h>
+
+#include <asm/vio.h>
+
+#define IBMVMC_PROTOCOL_VERSION    0x0101
+
+#define MIN_BUF_POOL_SIZE 16
+#define MIN_HMCS          1
+#define MIN_MTU           4096
+#define MAX_BUF_POOL_SIZE 64
+#define MAX_HMCS          2
+#define MAX_MTU           (4 * 4096)
+#define DEFAULT_BUF_POOL_SIZE 32
+#define DEFAULT_HMCS          1
+#define DEFAULT_MTU           4096
+#define HMC_ID_LEN        32
+
+#define VMC_INVALID_BUFFER_ID 0xFFFF
+
+/* ioctl numbers */
+#define VMC_BASE	     0xCC
+#define VMC_IOCTL_SETHMCID   _IOW(VMC_BASE, 0x00, unsigned char *)
+#define VMC_IOCTL_QUERY      _IOR(VMC_BASE, 0x01, struct ibmvmc_query_struct)
+#define VMC_IOCTL_REQUESTVMC _IOR(VMC_BASE, 0x02, u32)
+
+#define VMC_MSG_CAP          0x01
+#define VMC_MSG_CAP_RESP     0x81
+#define VMC_MSG_OPEN         0x02
+#define VMC_MSG_OPEN_RESP    0x82
+#define VMC_MSG_CLOSE        0x03
+#define VMC_MSG_CLOSE_RESP   0x83
+#define VMC_MSG_ADD_BUF      0x04
+#define VMC_MSG_ADD_BUF_RESP 0x84
+#define VMC_MSG_REM_BUF      0x05
+#define VMC_MSG_REM_BUF_RESP 0x85
+#define VMC_MSG_SIGNAL       0x06
+
+#define VMC_MSG_SUCCESS 0
+#define VMC_MSG_INVALID_HMC_INDEX 1
+#define VMC_MSG_INVALID_BUFFER_ID 2
+#define VMC_MSG_CLOSED_HMC        3
+#define VMC_MSG_INTERFACE_FAILURE 4
+#define VMC_MSG_NO_BUFFER         5
+
+#define VMC_BUF_OWNER_ALPHA 0
+#define VMC_BUF_OWNER_HV    1
+
+enum ibmvmc_states {
+	ibmvmc_state_sched_reset  = -1,
+	ibmvmc_state_initial      = 0,
+	ibmvmc_state_crqinit      = 1,
+	ibmvmc_state_capabilities = 2,
+	ibmvmc_state_ready        = 3,
+	ibmvmc_state_failed       = 4,
+};
+
+enum ibmhmc_states {
+	/* HMC connection not established */
+	ibmhmc_state_free    = 0,
+
+	/* HMC connection established (open called) */
+	ibmhmc_state_initial = 1,
+
+	/* open msg sent to HV, due to ioctl(1) call */
+	ibmhmc_state_opening = 2,
+
+	/* HMC connection ready, open resp msg from HV */
+	ibmhmc_state_ready   = 3,
+
+	/* HMC connection failure */
+	ibmhmc_state_failed  = 4,
+};
+
+struct ibmvmc_buffer {
+	u8 valid;	/* 1 when DMA storage allocated to buffer          */
+	u8 free;	/* 1 when buffer available for the Alpha Partition */
+	u8 owner;
+	u16 id;
+	u32 size;
+	u32 msg_len;
+	dma_addr_t dma_addr_local;
+	dma_addr_t dma_addr_remote;
+	void *real_addr_local;
+};
+
+struct ibmvmc_admin_crq_msg {
+	u8 valid;	/* RPA Defined           */
+	u8 type;	/* ibmvmc msg type       */
+	u8 status;	/* Response msg status. Zero is success and on failure,
+			 * either 1 - General Failure, or 2 - Invalid Version is
+			 * returned.
+			 */
+	u8 rsvd[2];
+	u8 max_hmc;	/* Max # of independent HMC connections supported */
+	__be16 pool_size;	/* Maximum number of buffers supported per HMC
+				 * connection
+				 */
+	__be32 max_mtu;		/* Maximum message size supported (bytes) */
+	__be16 crq_size;	/* # of entries available in the CRQ for the
+				 * source partition. The target partition must
+				 * limit the number of outstanding messages to
+				 * one half or less.
+				 */
+	__be16 version;	/* Indicates the code level of the management partition
+			 * or the hypervisor with the high-order byte
+			 * indicating a major version and the low-order byte
+			 * indicating a minor version.
+			 */
+};
+
+struct ibmvmc_crq_msg {
+	u8 valid;     /* RPA Defined           */
+	u8 type;      /* ibmvmc msg type       */
+	u8 status;    /* Response msg status   */
+	union {
+		u8 rsvd;  /* Reserved              */
+		u8 owner;
+	} var1;
+	u8 hmc_session;	/* Session Identifier for the current VMC connection */
+	u8 hmc_index;	/* A unique HMC Idx would be used if multiple management
+			 * applications running concurrently were desired
+			 */
+	union {
+		__be16 rsvd;
+		__be16 buffer_id;
+	} var2;
+	__be32 rsvd;
+	union {
+		__be32 rsvd;
+		__be32 lioba;
+		__be32 msg_len;
+	} var3;
+};
+
+/* an RPA command/response transport queue */
+struct crq_queue {
+	struct ibmvmc_crq_msg *msgs;
+	int size, cur;
+	dma_addr_t msg_token;
+	spinlock_t lock;
+};
+
+/* VMC server adapter settings */
+struct crq_server_adapter {
+	struct device *dev;
+	struct crq_queue queue;
+	u32 liobn;
+	u32 riobn;
+	struct tasklet_struct work_task;
+	wait_queue_head_t reset_wait_queue;
+	struct task_struct *reset_task;
+};
+
+/* Driver wide settings */
+struct ibmvmc_struct {
+	u32 state;
+	u32 max_mtu;
+	u32 max_buffer_pool_size;
+	u32 max_hmc_index;
+	struct crq_server_adapter *adapter;
+	struct cdev cdev;
+	u32 vmc_drc_index;
+};
+
+struct ibmvmc_file_session;
+
+/* Connection specific settings */
+struct ibmvmc_hmc {
+	u8 session;
+	u8 index;
+	u32 state;
+	struct crq_server_adapter *adapter;
+	spinlock_t lock;
+	unsigned char hmc_id[HMC_ID_LEN];
+	struct ibmvmc_buffer buffer[MAX_BUF_POOL_SIZE];
+	unsigned short queue_outbound_msgs[MAX_BUF_POOL_SIZE];
+	int queue_head, queue_tail;
+	struct ibmvmc_file_session *file_session;
+};
+
+struct ibmvmc_file_session {
+	struct file *file;
+	struct ibmvmc_hmc *hmc;
+	bool valid;
+};
+
+struct ibmvmc_query_struct {
+	int have_vmc;
+	int state;
+	int vmc_drc_index;
+};
+
+#endif /* __IBMVMC_H */
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index 3641f13..ab174f28 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -926,7 +926,7 @@
  *
  * 	Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
  */
-int gru_fault(struct vm_fault *vmf)
+vm_fault_t gru_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct gru_thread_state *gts;
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index 4f76359..42ea2ec 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -270,16 +270,6 @@
 	return single_open(file, options_show, NULL);
 }
 
-static int cch_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &cch_seq_ops);
-}
-
-static int gru_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &gru_seq_ops);
-}
-
 /* *INDENT-OFF* */
 static const struct file_operations statistics_fops = {
 	.open 		= statistics_open,
@@ -305,73 +295,30 @@
 	.release 	= single_release,
 };
 
-static const struct file_operations cch_fops = {
-	.open 		= cch_open,
-	.read 		= seq_read,
-	.llseek 	= seq_lseek,
-	.release 	= seq_release,
-};
-static const struct file_operations gru_fops = {
-	.open 		= gru_open,
-	.read 		= seq_read,
-	.llseek 	= seq_lseek,
-	.release 	= seq_release,
-};
-
-static struct proc_entry {
-	char *name;
-	umode_t mode;
-	const struct file_operations *fops;
-	struct proc_dir_entry *entry;
-} proc_files[] = {
-	{"statistics", 0644, &statistics_fops},
-	{"mcs_statistics", 0644, &mcs_statistics_fops},
-	{"debug_options", 0644, &options_fops},
-	{"cch_status", 0444, &cch_fops},
-	{"gru_status", 0444, &gru_fops},
-	{NULL}
-};
-/* *INDENT-ON* */
-
 static struct proc_dir_entry *proc_gru __read_mostly;
 
-static int create_proc_file(struct proc_entry *p)
-{
-	p->entry = proc_create(p->name, p->mode, proc_gru, p->fops);
-	if (!p->entry)
-		return -1;
-	return 0;
-}
-
-static void delete_proc_files(void)
-{
-	struct proc_entry *p;
-
-	if (proc_gru) {
-		for (p = proc_files; p->name; p++)
-			if (p->entry)
-				remove_proc_entry(p->name, proc_gru);
-		proc_remove(proc_gru);
-	}
-}
-
 int gru_proc_init(void)
 {
-	struct proc_entry *p;
-
 	proc_gru = proc_mkdir("sgi_uv/gru", NULL);
-
-	for (p = proc_files; p->name; p++)
-		if (create_proc_file(p))
-			goto err;
+	if (!proc_gru)
+		return -1;
+	if (!proc_create("statistics", 0644, proc_gru, &statistics_fops))
+		goto err;
+	if (!proc_create("mcs_statistics", 0644, proc_gru, &mcs_statistics_fops))
+		goto err;
+	if (!proc_create("debug_options", 0644, proc_gru, &options_fops))
+		goto err;
+	if (!proc_create_seq("cch_status", 0444, proc_gru, &cch_seq_ops))
+		goto err;
+	if (!proc_create_seq("gru_status", 0444, proc_gru, &gru_seq_ops))
+		goto err;
 	return 0;
-
 err:
-	delete_proc_files();
+	remove_proc_subtree("sgi_uv/gru", NULL);
 	return -1;
 }
 
 void gru_proc_exit(void)
 {
-	delete_proc_files();
+	remove_proc_subtree("sgi_uv/gru", NULL);
 }
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index b5e308b..3e041b6 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -147,6 +147,7 @@
 #include <linux/mutex.h>
 #include <linux/wait.h>
 #include <linux/mmu_notifier.h>
+#include <linux/mm_types.h>
 #include "gru.h"
 #include "grulib.h"
 #include "gruhandles.h"
@@ -665,7 +666,7 @@
 		int cbr_au_count, char *cbmap);
 extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
 		int dsr_au_count, char *dsmap);
-extern int gru_fault(struct vm_fault *vmf);
+extern vm_fault_t gru_fault(struct vm_fault *vmf);
 extern struct gru_mm_struct *gru_register_mmu_notifier(void);
 extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
 
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 0c26eaf..216d5c7 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -407,7 +407,7 @@
  * destination partid.  If the destination partid octets are 0xffff,
  * this packet is to be broadcast to all connected partitions.
  */
-static int
+static netdev_tx_t
 xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct xpnet_pending_msg *queued_msg;
diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c
index b77aaca..5ec3f5a 100644
--- a/drivers/misc/ti-st/st_kim.c
+++ b/drivers/misc/ti-st/st_kim.c
@@ -735,7 +735,7 @@
 		st_kim_devices[0] = pdev;
 	}
 
-	kim_gdata = kzalloc(sizeof(struct kim_data_s), GFP_ATOMIC);
+	kim_gdata = kzalloc(sizeof(struct kim_data_s), GFP_KERNEL);
 	if (!kim_gdata) {
 		pr_err("no mem to allocate");
 		return -ENOMEM;
diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c
index e5f1087..9ac95b4 100644
--- a/drivers/misc/tifm_7xx1.c
+++ b/drivers/misc/tifm_7xx1.c
@@ -239,9 +239,13 @@
 	unsigned long timeout;
 	unsigned int good_sockets = 0, bad_sockets = 0;
 	unsigned long flags;
-	unsigned char new_ids[fm->num_sockets];
+	/* Maximum number of entries is 4 */
+	unsigned char new_ids[4];
 	DECLARE_COMPLETION_ONSTACK(finish_resume);
 
+	if (WARN_ON(fm->num_sockets > ARRAY_SIZE(new_ids)))
+		return -ENXIO;
+
 	pci_set_power_state(dev, PCI_D0);
 	pci_restore_state(dev);
 	rc = pci_enable_device(dev);
diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c
index 9eea30f..80a6f19 100644
--- a/drivers/misc/vexpress-syscfg.c
+++ b/drivers/misc/vexpress-syscfg.c
@@ -182,8 +182,7 @@
 		val = energy_quirk;
 	}
 
-	func = kzalloc(sizeof(*func) + sizeof(*func->template) * num,
-			GFP_KERNEL);
+	func = kzalloc(struct_size(func, template, num), GFP_KERNEL);
 	if (!func)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 9047c0a..efd7334 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -576,15 +576,9 @@
 		}
 	}
 
-	if (b->batch_page) {
-		vunmap(b->batch_page);
-		b->batch_page = NULL;
-	}
-
-	if (b->page) {
-		__free_page(b->page);
-		b->page = NULL;
-	}
+	/* Clearing the batch_page unconditionally has no adverse effect */
+	free_page((unsigned long)b->batch_page);
+	b->batch_page = NULL;
 }
 
 /*
@@ -991,16 +985,13 @@
 
 static bool vmballoon_init_batching(struct vmballoon *b)
 {
-	b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP);
-	if (!b->page)
+	struct page *page;
+
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
 		return false;
 
-	b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL);
-	if (!b->batch_page) {
-		__free_page(b->page);
-		return false;
-	}
-
+	b->batch_page = page_address(page);
 	return true;
 }
 
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 38a7586..a0b9102 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -244,7 +244,7 @@
 	mq = &md->queue;
 
 	/* Dispatch locking to the block layer */
-	req = blk_get_request(mq->queue, REQ_OP_DRV_OUT, __GFP_RECLAIM);
+	req = blk_get_request(mq->queue, REQ_OP_DRV_OUT, 0);
 	if (IS_ERR(req)) {
 		count = PTR_ERR(req);
 		goto out_put;
@@ -650,8 +650,7 @@
 	 */
 	mq = &md->queue;
 	req = blk_get_request(mq->queue,
-		idata->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN,
-		__GFP_RECLAIM);
+		idata->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto cmd_done;
@@ -721,8 +720,7 @@
 	 */
 	mq = &md->queue;
 	req = blk_get_request(mq->queue,
-		idata[0]->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN,
-		__GFP_RECLAIM);
+		idata[0]->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto cmd_err;
@@ -2353,7 +2351,8 @@
 	set_disk_ro(md->disk, md->read_only || default_ro);
 	md->disk->flags = GENHD_FL_EXT_DEVT;
 	if (area_type & (MMC_BLK_DATA_AREA_RPMB | MMC_BLK_DATA_AREA_BOOT))
-		md->disk->flags |= GENHD_FL_NO_PART_SCAN;
+		md->disk->flags |= GENHD_FL_NO_PART_SCAN
+				   | GENHD_FL_SUPPRESS_PARTITION_INFO;
 
 	/*
 	 * As discussed on lkml, GENHD_FL_REMOVABLE should:
@@ -2750,7 +2749,7 @@
 	int ret;
 
 	/* Ask the block layer about the card status */
-	req = blk_get_request(mq->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+	req = blk_get_request(mq->queue, REQ_OP_DRV_IN, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 	req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_CARD_STATUS;
@@ -2786,7 +2785,7 @@
 		return -ENOMEM;
 
 	/* Ask the block layer for the EXT CSD */
-	req = blk_get_request(mq->queue, REQ_OP_DRV_IN, __GFP_RECLAIM);
+	req = blk_get_request(mq->queue, REQ_OP_DRV_IN, 0);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto out_free;
@@ -2967,9 +2966,11 @@
 	mmc_blk_remove_debugfs(card, md);
 	mmc_blk_remove_parts(card, md);
 	pm_runtime_get_sync(&card->dev);
-	mmc_claim_host(card->host);
-	mmc_blk_part_switch(card, md->part_type);
-	mmc_release_host(card->host);
+	if (md->part_curr != md->part_type) {
+		mmc_claim_host(card->host);
+		mmc_blk_part_switch(card, md->part_type);
+		mmc_release_host(card->host);
+	}
 	if (card->type != MMC_TYPE_SD_COMBO)
 		pm_runtime_disable(&card->dev);
 	pm_runtime_put_noidle(&card->dev);
diff --git a/drivers/mmc/core/card.h b/drivers/mmc/core/card.h
index 9c821ee..1170feb 100644
--- a/drivers/mmc/core/card.h
+++ b/drivers/mmc/core/card.h
@@ -149,6 +149,12 @@
 	card->quirks &= ~data;
 }
 
+static inline void __maybe_unused add_limit_rate_quirk(struct mmc_card *card,
+						       int data)
+{
+	card->quirk_max_rate = data;
+}
+
 /*
  * Quirk add/remove for MMC products.
  */
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 121ce50..281826d 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -50,9 +50,6 @@
 #include "sd_ops.h"
 #include "sdio_ops.h"
 
-/* If the device is not responding */
-#define MMC_CORE_TIMEOUT_MS	(10 * 60 * 1000) /* 10 minute timeout */
-
 /* The max erase timeout, used when host->max_busy_timeout isn't specified */
 #define MMC_ERASE_TIMEOUT_MS	(60 * 1000) /* 60 s */
 
@@ -1484,6 +1481,17 @@
 
 }
 
+void mmc_set_initial_signal_voltage(struct mmc_host *host)
+{
+	/* Try to set signal voltage to 3.3V but fall back to 1.8v or 1.2v */
+	if (!mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330))
+		dev_dbg(mmc_dev(host), "Initial signal voltage of 3.3v\n");
+	else if (!mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180))
+		dev_dbg(mmc_dev(host), "Initial signal voltage of 1.8v\n");
+	else if (!mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120))
+		dev_dbg(mmc_dev(host), "Initial signal voltage of 1.2v\n");
+}
+
 int mmc_host_set_uhs_voltage(struct mmc_host *host)
 {
 	u32 clock;
@@ -1646,19 +1654,13 @@
 	/* Set initial state and call mmc_set_ios */
 	mmc_set_initial_state(host);
 
-	/* Try to set signal voltage to 3.3V but fall back to 1.8v or 1.2v */
-	if (!mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330))
-		dev_dbg(mmc_dev(host), "Initial signal voltage of 3.3v\n");
-	else if (!mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180))
-		dev_dbg(mmc_dev(host), "Initial signal voltage of 1.8v\n");
-	else if (!mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120))
-		dev_dbg(mmc_dev(host), "Initial signal voltage of 1.2v\n");
+	mmc_set_initial_signal_voltage(host);
 
 	/*
 	 * This delay should be sufficient to allow the power supply
 	 * to reach the minimum voltage.
 	 */
-	mmc_delay(10);
+	mmc_delay(host->ios.power_delay_ms);
 
 	mmc_pwrseq_post_power_on(host);
 
@@ -1671,7 +1673,7 @@
 	 * This delay must be at least 74 clock sizes, or 1 ms, or the
 	 * time required to reach a stable voltage.
 	 */
-	mmc_delay(10);
+	mmc_delay(host->ios.power_delay_ms);
 }
 
 void mmc_power_off(struct mmc_host *host)
@@ -1967,6 +1969,7 @@
 	unsigned int qty = 0, busy_timeout = 0;
 	bool use_r1b_resp = false;
 	unsigned long timeout;
+	int loop_udelay=64, udelay_max=32768;
 	int err;
 
 	mmc_retune_hold(card->host);
@@ -2091,9 +2094,15 @@
 			err =  -EIO;
 			goto out;
 		}
+		if ((cmd.resp[0] & R1_READY_FOR_DATA) &&
+		    R1_CURRENT_STATE(cmd.resp[0]) != R1_STATE_PRG)
+			break;
 
-	} while (!(cmd.resp[0] & R1_READY_FOR_DATA) ||
-		 (R1_CURRENT_STATE(cmd.resp[0]) == R1_STATE_PRG));
+		usleep_range(loop_udelay, loop_udelay*2);
+		if (loop_udelay < udelay_max)
+			loop_udelay *= 2;
+	} while (1);
+
 out:
 	mmc_retune_release(card->host);
 	return err;
@@ -2435,22 +2444,46 @@
 		return -EINVAL;
 
 	mmc_bus_get(host);
-	if (!host->bus_ops || host->bus_dead || !host->bus_ops->reset) {
+	if (!host->bus_ops || host->bus_dead || !host->bus_ops->hw_reset) {
 		mmc_bus_put(host);
 		return -EOPNOTSUPP;
 	}
 
-	ret = host->bus_ops->reset(host);
+	ret = host->bus_ops->hw_reset(host);
 	mmc_bus_put(host);
 
 	if (ret)
-		pr_warn("%s: tried to reset card, got error %d\n",
+		pr_warn("%s: tried to HW reset card, got error %d\n",
 			mmc_hostname(host), ret);
 
 	return ret;
 }
 EXPORT_SYMBOL(mmc_hw_reset);
 
+int mmc_sw_reset(struct mmc_host *host)
+{
+	int ret;
+
+	if (!host->card)
+		return -EINVAL;
+
+	mmc_bus_get(host);
+	if (!host->bus_ops || host->bus_dead || !host->bus_ops->sw_reset) {
+		mmc_bus_put(host);
+		return -EOPNOTSUPP;
+	}
+
+	ret = host->bus_ops->sw_reset(host);
+	mmc_bus_put(host);
+
+	if (ret)
+		pr_warn("%s: tried to SW reset card, got error %d\n",
+			mmc_hostname(host), ret);
+
+	return ret;
+}
+EXPORT_SYMBOL(mmc_sw_reset);
+
 static int mmc_rescan_try_freq(struct mmc_host *host, unsigned freq)
 {
 	host->f_init = freq;
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index d6303d6..9d8f09a 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -32,7 +32,8 @@
 	int (*power_restore)(struct mmc_host *);
 	int (*alive)(struct mmc_host *);
 	int (*shutdown)(struct mmc_host *);
-	int (*reset)(struct mmc_host *);
+	int (*hw_reset)(struct mmc_host *);
+	int (*sw_reset)(struct mmc_host *);
 };
 
 void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops);
@@ -51,6 +52,7 @@
 int mmc_set_uhs_voltage(struct mmc_host *host, u32 ocr);
 int mmc_host_set_uhs_voltage(struct mmc_host *host);
 int mmc_set_signal_voltage(struct mmc_host *host, int signal_voltage);
+void mmc_set_initial_signal_voltage(struct mmc_host *host);
 void mmc_set_timing(struct mmc_host *host, unsigned int timing);
 void mmc_set_driver_type(struct mmc_host *host, unsigned int drv_type);
 int mmc_select_drive_strength(struct mmc_card *card, unsigned int max_dtr,
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 64b03d6..abf9e88 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -143,9 +143,6 @@
 			goto out;
 
 		return_to_hs400 = true;
-
-		if (host->ops->prepare_hs400_tuning)
-			host->ops->prepare_hs400_tuning(host, &host->ios);
 	}
 
 	err = mmc_execute_tuning(host->card);
@@ -179,7 +176,7 @@
 int mmc_of_parse(struct mmc_host *host)
 {
 	struct device *dev = host->parent;
-	u32 bus_width, drv_type;
+	u32 bus_width, drv_type, cd_debounce_delay_ms;
 	int ret;
 	bool cd_cap_invert, cd_gpio_invert = false;
 	bool ro_cap_invert, ro_gpio_invert = false;
@@ -230,11 +227,16 @@
 	} else {
 		cd_cap_invert = device_property_read_bool(dev, "cd-inverted");
 
+		if (device_property_read_u32(dev, "cd-debounce-delay-ms",
+					     &cd_debounce_delay_ms))
+			cd_debounce_delay_ms = 200;
+
 		if (device_property_read_bool(dev, "broken-cd"))
 			host->caps |= MMC_CAP_NEEDS_POLL;
 
 		ret = mmc_gpiod_request_cd(host, "cd", 0, true,
-					   0, &cd_gpio_invert);
+					   cd_debounce_delay_ms,
+					   &cd_gpio_invert);
 		if (!ret)
 			dev_info(host->parent, "Got CD GPIO\n");
 		else if (ret != -ENOENT && ret != -ENOSYS)
@@ -338,6 +340,9 @@
 		host->dsr_req = 0;
 	}
 
+	device_property_read_u32(dev, "post-power-on-delay-ms",
+				 &host->ios.power_delay_ms);
+
 	return mmc_pwrseq_alloc(host);
 }
 
@@ -403,6 +408,7 @@
 	host->max_blk_count = PAGE_SIZE / 512;
 
 	host->fixed_drv_type = -EINVAL;
+	host->ios.power_delay_ms = 10;
 
 	return host;
 }
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 6f8ebd6..4466f5d 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1282,6 +1282,10 @@
 
 	mmc_set_bus_speed(card);
 
+	/* Prepare tuning for HS400 mode. */
+	if (host->ops->prepare_hs400_tuning)
+		host->ops->prepare_hs400_tuning(host, &host->ios);
+
 	return 0;
 
 out_err:
@@ -1830,6 +1834,14 @@
 		}
 	}
 
+	if (host->caps2 & MMC_CAP2_AVOID_3_3V &&
+	    host->ios.signal_voltage == MMC_SIGNAL_VOLTAGE_330) {
+		pr_err("%s: Host failed to negotiate down from 3.3V\n",
+			mmc_hostname(host));
+		err = -EINVAL;
+		goto free_card;
+	}
+
 	if (!oldcard)
 		host->card = card;
 
@@ -2117,7 +2129,7 @@
 	return 1;
 }
 
-static int mmc_reset(struct mmc_host *host)
+static int _mmc_hw_reset(struct mmc_host *host)
 {
 	struct mmc_card *card = host->card;
 
@@ -2151,7 +2163,7 @@
 	.runtime_resume = mmc_runtime_resume,
 	.alive = mmc_alive,
 	.shutdown = mmc_shutdown,
-	.reset = mmc_reset,
+	.hw_reset = _mmc_hw_reset,
 };
 
 /*
diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index 13ef162..a8b9fee 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -40,14 +40,18 @@
 	struct gpio_descs *reset_gpios = pwrseq->reset_gpios;
 
 	if (!IS_ERR(reset_gpios)) {
-		int i;
-		int values[reset_gpios->ndescs];
+		int i, *values;
+		int nvalues = reset_gpios->ndescs;
 
-		for (i = 0; i < reset_gpios->ndescs; i++)
+		values = kmalloc_array(nvalues, sizeof(int), GFP_KERNEL);
+		if (!values)
+			return;
+
+		for (i = 0; i < nvalues; i++)
 			values[i] = value;
 
-		gpiod_set_array_value_cansleep(
-			reset_gpios->ndescs, reset_gpios->desc, values);
+		gpiod_set_array_value_cansleep(nvalues, reset_gpios->desc, values);
+		kfree(values);
 	}
 }
 
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 56e9a80..648eb67 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -111,8 +111,9 @@
 				__mmc_cqe_recovery_notifier(mq);
 			return BLK_EH_RESET_TIMER;
 		}
-		/* No timeout */
-		return BLK_EH_HANDLED;
+		/* No timeout (XXX: huh? comment doesn't make much sense) */
+		blk_mq_complete_request(req);
+		return BLK_EH_DONE;
 	default:
 		/* Timeout is handled by mmc core */
 		return BLK_EH_RESET_TIMER;
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index 5153577..dd2f73a 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -132,6 +132,9 @@
 	SDIO_FIXUP(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8797_F0,
 		   add_quirk, MMC_QUIRK_BROKEN_IRQ_POLLING),
 
+	SDIO_FIXUP(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8887WLAN,
+		   add_limit_rate_quirk, 150000000),
+
 	END_FIXUP
 };
 
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index baf3d5d..d0d9f90 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1058,6 +1058,14 @@
 			mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
 		}
 	}
+
+	if (host->caps2 & MMC_CAP2_AVOID_3_3V &&
+	    host->ios.signal_voltage == MMC_SIGNAL_VOLTAGE_330) {
+		pr_err("%s: Host failed to negotiate down from 3.3V\n",
+			mmc_hostname(host));
+		err = -EINVAL;
+		goto free_card;
+	}
 done:
 	host->card = card;
 	return 0;
@@ -1214,7 +1222,7 @@
 	return 0;
 }
 
-static int mmc_sd_reset(struct mmc_host *host)
+static int mmc_sd_hw_reset(struct mmc_host *host)
 {
 	mmc_power_cycle(host, host->card->ocr);
 	return mmc_sd_init_card(host, host->card->ocr, host->card);
@@ -1229,7 +1237,7 @@
 	.resume = mmc_sd_resume,
 	.alive = mmc_sd_alive,
 	.shutdown = mmc_sd_suspend,
-	.reset = mmc_sd_reset,
+	.hw_reset = mmc_sd_hw_reset,
 };
 
 /*
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index c599a62..a86490d 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -444,6 +444,7 @@
 	unsigned int bus_speed, timing;
 	int err;
 	unsigned char speed;
+	unsigned int max_rate;
 
 	/*
 	 * If the host doesn't support any of the UHS-I modes, fallback on
@@ -500,9 +501,12 @@
 	if (err)
 		return err;
 
+	max_rate = min_not_zero(card->quirk_max_rate,
+				card->sw_caps.uhs_max_dtr);
+
 	if (bus_speed) {
 		mmc_set_timing(card->host, timing);
-		mmc_set_clock(card->host, card->sw_caps.uhs_max_dtr);
+		mmc_set_clock(card->host, max_rate);
 	}
 
 	return 0;
@@ -788,6 +792,14 @@
 		if (err)
 			goto remove;
 	}
+
+	if (host->caps2 & MMC_CAP2_AVOID_3_3V &&
+	    host->ios.signal_voltage == MMC_SIGNAL_VOLTAGE_330) {
+		pr_err("%s: Host failed to negotiate down from 3.3V\n",
+			mmc_hostname(host));
+		err = -EINVAL;
+		goto remove;
+	}
 finish:
 	if (!oldcard)
 		host->card = card;
@@ -801,6 +813,22 @@
 	return err;
 }
 
+static int mmc_sdio_reinit_card(struct mmc_host *host, bool powered_resume)
+{
+	int ret;
+
+	sdio_reset(host);
+	mmc_go_idle(host);
+	mmc_send_if_cond(host, host->card->ocr);
+
+	ret = mmc_send_io_op_cond(host, 0, NULL);
+	if (ret)
+		return ret;
+
+	return mmc_sdio_init_card(host, host->card->ocr, host->card,
+				  powered_resume);
+}
+
 /*
  * Host is being removed. Free up the current card.
  */
@@ -948,14 +976,7 @@
 
 	/* No need to reinitialize powered-resumed nonremovable cards */
 	if (mmc_card_is_removable(host) || !mmc_card_keep_power(host)) {
-		sdio_reset(host);
-		mmc_go_idle(host);
-		mmc_send_if_cond(host, host->card->ocr);
-		err = mmc_send_io_op_cond(host, 0, NULL);
-		if (!err)
-			err = mmc_sdio_init_card(host, host->card->ocr,
-						 host->card,
-						 mmc_card_keep_power(host));
+		err = mmc_sdio_reinit_card(host, mmc_card_keep_power(host));
 	} else if (mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
 		/* We may have switched to 1-bit mode during suspend */
 		err = sdio_enable_4bit_bus(host->card);
@@ -978,8 +999,6 @@
 {
 	int ret;
 
-	mmc_claim_host(host);
-
 	/*
 	 * Reset the card by performing the same steps that are taken by
 	 * mmc_rescan_try_freq() and mmc_attach_sdio() during a "normal" probe.
@@ -997,20 +1016,12 @@
 	 *
 	 */
 
-	sdio_reset(host);
-	mmc_go_idle(host);
-	mmc_send_if_cond(host, host->card->ocr);
+	mmc_claim_host(host);
 
-	ret = mmc_send_io_op_cond(host, 0, NULL);
-	if (ret)
-		goto out;
-
-	ret = mmc_sdio_init_card(host, host->card->ocr, host->card,
-				mmc_card_keep_power(host));
+	ret = mmc_sdio_reinit_card(host, mmc_card_keep_power(host));
 	if (!ret && host->sdio_irqs)
 		mmc_signal_sdio_irq(host);
 
-out:
 	mmc_release_host(host);
 
 	return ret;
@@ -1039,12 +1050,24 @@
 	return ret;
 }
 
-static int mmc_sdio_reset(struct mmc_host *host)
+static int mmc_sdio_hw_reset(struct mmc_host *host)
 {
 	mmc_power_cycle(host, host->card->ocr);
 	return mmc_sdio_power_restore(host);
 }
 
+static int mmc_sdio_sw_reset(struct mmc_host *host)
+{
+	mmc_set_clock(host, host->f_init);
+	sdio_reset(host);
+	mmc_go_idle(host);
+
+	mmc_set_initial_state(host);
+	mmc_set_initial_signal_voltage(host);
+
+	return mmc_sdio_reinit_card(host, 0);
+}
+
 static const struct mmc_bus_ops mmc_sdio_ops = {
 	.remove = mmc_sdio_remove,
 	.detect = mmc_sdio_detect,
@@ -1055,7 +1078,8 @@
 	.runtime_resume = mmc_sdio_runtime_resume,
 	.power_restore = mmc_sdio_power_restore,
 	.alive = mmc_sdio_alive,
-	.reset = mmc_sdio_reset,
+	.hw_reset = mmc_sdio_hw_reset,
+	.sw_reset = mmc_sdio_sw_reset,
 };
 
 
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 2b32b88..b6d8203 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -139,7 +139,7 @@
 		return -ENODEV;
 
 	ret = dev_pm_domain_attach(dev, false);
-	if (ret == -EPROBE_DEFER)
+	if (ret)
 		return ret;
 
 	/* Unbound SDIO functions are always suspended.
diff --git a/drivers/mmc/core/sdio_uart.c b/drivers/mmc/core/sdio_uart.c
index d3c91f4..25e1130 100644
--- a/drivers/mmc/core/sdio_uart.c
+++ b/drivers/mmc/core/sdio_uart.c
@@ -1008,19 +1008,6 @@
 	return 0;
 }
 
-static int sdio_uart_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sdio_uart_proc_show, NULL);
-}
-
-static const struct file_operations sdio_uart_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= sdio_uart_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static const struct tty_port_operations sdio_uart_port_ops = {
 	.dtr_rts = uart_dtr_rts,
 	.carrier_raised = uart_carrier_raised,
@@ -1045,7 +1032,7 @@
 	.tiocmset		= sdio_uart_tiocmset,
 	.install		= sdio_uart_install,
 	.cleanup		= sdio_uart_cleanup,
-	.proc_fops		= &sdio_uart_proc_fops,
+	.proc_show		= sdio_uart_proc_show,
 };
 
 static struct tty_driver *sdio_uart_tty_driver;
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 31f7dbb..ef05e00 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -28,15 +28,17 @@
 	irqreturn_t (*cd_gpio_isr)(int irq, void *dev_id);
 	char *ro_label;
 	char cd_label[0];
+	u32 cd_debounce_delay_ms;
 };
 
 static irqreturn_t mmc_gpio_cd_irqt(int irq, void *dev_id)
 {
 	/* Schedule a card detection after a debounce timeout */
 	struct mmc_host *host = dev_id;
+	struct mmc_gpio *ctx = host->slot.handler_priv;
 
 	host->trigger_card_event = true;
-	mmc_detect_change(host, msecs_to_jiffies(200));
+	mmc_detect_change(host, msecs_to_jiffies(ctx->cd_debounce_delay_ms));
 
 	return IRQ_HANDLED;
 }
@@ -49,6 +51,7 @@
 
 	if (ctx) {
 		ctx->ro_label = ctx->cd_label + len;
+		ctx->cd_debounce_delay_ms = 200;
 		snprintf(ctx->cd_label, len, "%s cd", dev_name(host->parent));
 		snprintf(ctx->ro_label, len, "%s ro", dev_name(host->parent));
 		host->slot.handler_priv = ctx;
@@ -76,15 +79,22 @@
 int mmc_gpio_get_cd(struct mmc_host *host)
 {
 	struct mmc_gpio *ctx = host->slot.handler_priv;
+	int cansleep;
 
 	if (!ctx || !ctx->cd_gpio)
 		return -ENOSYS;
 
-	if (ctx->override_cd_active_level)
-		return !gpiod_get_raw_value_cansleep(ctx->cd_gpio) ^
-			!!(host->caps2 & MMC_CAP2_CD_ACTIVE_HIGH);
+	cansleep = gpiod_cansleep(ctx->cd_gpio);
+	if (ctx->override_cd_active_level) {
+		int value = cansleep ?
+				gpiod_get_raw_value_cansleep(ctx->cd_gpio) :
+				gpiod_get_raw_value(ctx->cd_gpio);
+		return !value ^ !!(host->caps2 & MMC_CAP2_CD_ACTIVE_HIGH);
+	}
 
-	return gpiod_get_value_cansleep(ctx->cd_gpio);
+	return cansleep ?
+		gpiod_get_value_cansleep(ctx->cd_gpio) :
+		gpiod_get_value(ctx->cd_gpio);
 }
 EXPORT_SYMBOL(mmc_gpio_get_cd);
 
@@ -261,7 +271,7 @@
 	if (debounce) {
 		ret = gpiod_set_debounce(desc, debounce);
 		if (ret < 0)
-			return ret;
+			ctx->cd_debounce_delay_ms = debounce;
 	}
 
 	if (gpio_invert)
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 9589f9c..0581c19 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -345,11 +345,11 @@
 	  If unsure, say N.
 
 config MMC_MESON_GX
-	tristate "Amlogic S905/GX* SD/MMC Host Controller support"
+	tristate "Amlogic S905/GX*/AXG SD/MMC Host Controller support"
 	depends on ARCH_MESON && MMC
 	help
 	  This selects support for the Amlogic SD/MMC Host Controller
-	  found on the S905/GX* family of SoCs.  This controller is
+	  found on the S905/GX*/AXG family of SoCs.  This controller is
 	  MMC 5.1 compliant and supports SD, eMMC and SDIO interfaces.
 
 	  If you have a controller with this interface, say Y here.
@@ -358,7 +358,6 @@
 	tristate "Amlogic Meson6/Meson8/Meson8b SD/MMC Host Controller support"
 	depends on ARCH_MESON || COMPILE_TEST
 	depends on COMMON_CLK
-	depends on HAS_DMA
 	depends on OF
 	help
 	  This selects support for the SD/MMC Host Controller on
@@ -401,7 +400,6 @@
 
 config MMC_OMAP_HS
 	tristate "TI OMAP High Speed Multimedia Card Interface support"
-	depends on HAS_DMA
 	depends on ARCH_OMAP2PLUS || ARCH_KEYSTONE || COMPILE_TEST
 	help
 	  This selects the TI OMAP High Speed Multimedia card Interface.
@@ -511,7 +509,6 @@
 
 config MMC_GOLDFISH
 	tristate "goldfish qemu Multimedia Card Interface support"
-	depends on HAS_DMA
 	depends on GOLDFISH || COMPILE_TEST
 	help
 	  This selects the Goldfish Multimedia card Interface emulation
@@ -605,7 +602,7 @@
 
 config MMC_SDHI_SYS_DMAC
 	tristate "DMA for SDHI SD/SDIO controllers using SYS-DMAC"
-	depends on MMC_SDHI && HAS_DMA
+	depends on MMC_SDHI
 	default MMC_SDHI if (SUPERH || ARM)
 	help
 	  This provides DMA support for SDHI SD/SDIO controllers
@@ -615,7 +612,7 @@
 config MMC_SDHI_INTERNAL_DMAC
 	tristate "DMA for SDHI SD/SDIO controllers using on-chip bus mastering"
 	depends on ARM64 || COMPILE_TEST
-	depends on MMC_SDHI && HAS_DMA
+	depends on MMC_SDHI
 	default MMC_SDHI if ARM64
 	help
 	  This provides DMA support for SDHI SD/SDIO controllers
@@ -669,7 +666,6 @@
 
 config MMC_DW
 	tristate "Synopsys DesignWare Memory Card Interface"
-	depends on HAS_DMA
 	depends on ARC || ARM || ARM64 || MIPS || COMPILE_TEST
 	help
 	  This selects support for the Synopsys DesignWare Mobile Storage IP
@@ -690,6 +686,15 @@
 
 	  If unsure, say Y.
 
+config MMC_DW_BLUEFIELD
+	tristate "BlueField specific extensions for Synopsys DW Memory Card Interface"
+	depends on MMC_DW
+	select MMC_DW_PLTFM
+	help
+	  This selects support for Mellanox BlueField SoC specific extensions to
+	  the Synopsys DesignWare Memory Card Interface driver. Select this
+	  option for platforms based on Mellanox BlueField SoC's.
+
 config MMC_DW_EXYNOS
 	tristate "Exynos specific extensions for Synopsys DW Memory Card Interface"
 	depends on MMC_DW
@@ -748,7 +753,6 @@
 
 config MMC_SH_MMCIF
 	tristate "SuperH Internal MMCIF support"
-	depends on HAS_DMA
 	depends on SUPERH || ARCH_RENESAS || COMPILE_TEST
 	help
 	  This selects the MMC Host Interface controller (MMCIF) found in various
@@ -756,11 +760,12 @@
 
 
 config MMC_JZ4740
-	tristate "JZ4740 SD/Multimedia Card Interface support"
-	depends on MACH_JZ4740
+	tristate "Ingenic JZ47xx SD/Multimedia Card Interface support"
+	depends on MACH_JZ4740 || MACH_JZ4780
 	help
-	  This selects support for the SD/MMC controller on Ingenic JZ4740
-	  SoCs.
+	  This selects support for the SD/MMC controller on Ingenic
+	  JZ4740, JZ4750, JZ4770 and JZ4780 SoCs.
+
 	  If you have a board based on such a SoC and with a SD/MMC slot,
 	  say Y or M here.
 
@@ -868,7 +873,6 @@
 config MMC_BCM2835
 	tristate "Broadcom BCM2835 SDHOST MMC Controller support"
 	depends on ARCH_BCM2835 || COMPILE_TEST
-	depends on HAS_DMA
 	help
 	  This selects the BCM2835 SDHOST MMC controller. If you have
 	  a BCM2835 platform with SD or MMC devices, say Y or M here.
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 6aead24..85dc132 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -49,6 +49,7 @@
 obj-$(CONFIG_MMC_CAVIUM_THUNDERX) += thunderx-mmc.o
 obj-$(CONFIG_MMC_DW)		+= dw_mmc.o
 obj-$(CONFIG_MMC_DW_PLTFM)	+= dw_mmc-pltfm.o
+obj-$(CONFIG_MMC_DW_BLUEFIELD)	+= dw_mmc-bluefield.o
 obj-$(CONFIG_MMC_DW_EXYNOS)	+= dw_mmc-exynos.o
 obj-$(CONFIG_MMC_DW_HI3798CV200) += dw_mmc-hi3798cv200.o
 obj-$(CONFIG_MMC_DW_K3)		+= dw_mmc-k3.o
diff --git a/drivers/mmc/host/android-goldfish.c b/drivers/mmc/host/android-goldfish.c
index 63d2758..294de17 100644
--- a/drivers/mmc/host/android-goldfish.c
+++ b/drivers/mmc/host/android-goldfish.c
@@ -217,8 +217,8 @@
 			 * We don't really have DMA, so we need
 			 * to copy from our platform driver buffer
 			 */
-			uint8_t *dest = (uint8_t *)sg_virt(data->sg);
-			memcpy(dest, host->virt_base, data->sg->length);
+			sg_copy_to_buffer(data->sg, 1, host->virt_base,
+					data->sg->length);
 		}
 		host->data->bytes_xfered += data->sg->length;
 		dma_unmap_sg(mmc_dev(host->mmc), data->sg, host->sg_len,
@@ -393,8 +393,8 @@
 		 * We don't really have DMA, so we need to copy to our
 		 * platform driver buffer
 		 */
-		const uint8_t *src = (uint8_t *)sg_virt(data->sg);
-		memcpy(host->virt_base, src, data->sg->length);
+		sg_copy_from_buffer(data->sg, 1, host->virt_base,
+				data->sg->length);
 	}
 }
 
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index e55f393..5aa2c94 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -1967,7 +1967,6 @@
 static void atmci_read_data_pio(struct atmel_mci *host)
 {
 	struct scatterlist	*sg = host->sg;
-	void			*buf = sg_virt(sg);
 	unsigned int		offset = host->pio_offset;
 	struct mmc_data		*data = host->data;
 	u32			value;
@@ -1977,7 +1976,7 @@
 	do {
 		value = atmci_readl(host, ATMCI_RDR);
 		if (likely(offset + 4 <= sg->length)) {
-			put_unaligned(value, (u32 *)(buf + offset));
+			sg_pcopy_to_buffer(sg, 1, &value, sizeof(u32), offset);
 
 			offset += 4;
 			nbytes += 4;
@@ -1990,11 +1989,11 @@
 					goto done;
 
 				offset = 0;
-				buf = sg_virt(sg);
 			}
 		} else {
 			unsigned int remaining = sg->length - offset;
-			memcpy(buf + offset, &value, remaining);
+
+			sg_pcopy_to_buffer(sg, 1, &value, remaining, offset);
 			nbytes += remaining;
 
 			flush_dcache_page(sg_page(sg));
@@ -2004,8 +2003,8 @@
 				goto done;
 
 			offset = 4 - remaining;
-			buf = sg_virt(sg);
-			memcpy(buf, (u8 *)&value + remaining, offset);
+			sg_pcopy_to_buffer(sg, 1, (u8 *)&value + remaining,
+					offset, 0);
 			nbytes += offset;
 		}
 
@@ -2035,7 +2034,6 @@
 static void atmci_write_data_pio(struct atmel_mci *host)
 {
 	struct scatterlist	*sg = host->sg;
-	void			*buf = sg_virt(sg);
 	unsigned int		offset = host->pio_offset;
 	struct mmc_data		*data = host->data;
 	u32			value;
@@ -2044,7 +2042,7 @@
 
 	do {
 		if (likely(offset + 4 <= sg->length)) {
-			value = get_unaligned((u32 *)(buf + offset));
+			sg_pcopy_from_buffer(sg, 1, &value, sizeof(u32), offset);
 			atmci_writel(host, ATMCI_TDR, value);
 
 			offset += 4;
@@ -2056,13 +2054,12 @@
 					goto done;
 
 				offset = 0;
-				buf = sg_virt(sg);
 			}
 		} else {
 			unsigned int remaining = sg->length - offset;
 
 			value = 0;
-			memcpy(&value, buf + offset, remaining);
+			sg_pcopy_from_buffer(sg, 1, &value, remaining, offset);
 			nbytes += remaining;
 
 			host->sg = sg = sg_next(sg);
@@ -2073,8 +2070,8 @@
 			}
 
 			offset = 4 - remaining;
-			buf = sg_virt(sg);
-			memcpy((u8 *)&value + remaining, buf, offset);
+			sg_pcopy_from_buffer(sg, 1, (u8 *)&value + remaining,
+					offset, 0);
 			atmci_writel(host, ATMCI_TDR, value);
 			nbytes += offset;
 		}
diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c
index ed77fbfa..9b4be67 100644
--- a/drivers/mmc/host/au1xmmc.c
+++ b/drivers/mmc/host/au1xmmc.c
@@ -40,6 +40,7 @@
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/scatterlist.h>
+#include <linux/highmem.h>
 #include <linux/leds.h>
 #include <linux/mmc/host.h>
 #include <linux/slab.h>
@@ -405,7 +406,7 @@
 
 	/* This is the pointer to the data buffer */
 	sg = &data->sg[host->pio.index];
-	sg_ptr = sg_virt(sg) + host->pio.offset;
+	sg_ptr = kmap_atomic(sg_page(sg)) + sg->offset + host->pio.offset;
 
 	/* This is the space left inside the buffer */
 	sg_len = data->sg[host->pio.index].length - host->pio.offset;
@@ -421,11 +422,12 @@
 		if (!(status & SD_STATUS_TH))
 			break;
 
-		val = *sg_ptr++;
+		val = sg_ptr[count];
 
 		__raw_writel((unsigned long)val, HOST_TXPORT(host));
 		wmb(); /* drain writebuffer */
 	}
+	kunmap_atomic(sg_ptr);
 
 	host->pio.len -= count;
 	host->pio.offset += count;
@@ -462,7 +464,7 @@
 
 	if (host->pio.index < host->dma.len) {
 		sg = &data->sg[host->pio.index];
-		sg_ptr = sg_virt(sg) + host->pio.offset;
+		sg_ptr = kmap_atomic(sg_page(sg)) + sg->offset + host->pio.offset;
 
 		/* This is the space left inside the buffer */
 		sg_len = sg_dma_len(&data->sg[host->pio.index]) - host->pio.offset;
@@ -501,8 +503,10 @@
 		val = __raw_readl(HOST_RXPORT(host));
 
 		if (sg_ptr)
-			*sg_ptr++ = (unsigned char)(val & 0xFF);
+			sg_ptr[count] = (unsigned char)(val & 0xFF);
 	}
+	if (sg_ptr)
+		kunmap_atomic(sg_ptr);
 
 	host->pio.len -= count;
 	host->pio.offset += count;
diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index 8e36317..9e68c36 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -1377,8 +1377,7 @@
 #ifdef CONFIG_PM
 static int davinci_mmcsd_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct mmc_davinci_host *host = platform_get_drvdata(pdev);
+	struct mmc_davinci_host *host = dev_get_drvdata(dev);
 
 	writel(0, host->base + DAVINCI_MMCIM);
 	mmc_davinci_reset_ctrl(host, 1);
@@ -1389,8 +1388,7 @@
 
 static int davinci_mmcsd_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct mmc_davinci_host *host = platform_get_drvdata(pdev);
+	struct mmc_davinci_host *host = dev_get_drvdata(dev);
 
 	clk_enable(host->clk);
 	mmc_davinci_reset_ctrl(host, 0);
diff --git a/drivers/mmc/host/dw_mmc-bluefield.c b/drivers/mmc/host/dw_mmc-bluefield.c
new file mode 100644
index 0000000..54c3fbb
--- /dev/null
+++ b/drivers/mmc/host/dw_mmc-bluefield.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Mellanox Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include "dw_mmc.h"
+#include "dw_mmc-pltfm.h"
+
+#define UHS_REG_EXT_SAMPLE_MASK		GENMASK(22, 16)
+#define UHS_REG_EXT_DRIVE_MASK		GENMASK(29, 23)
+#define BLUEFIELD_UHS_REG_EXT_SAMPLE	2
+#define BLUEFIELD_UHS_REG_EXT_DRIVE	4
+
+static void dw_mci_bluefield_set_ios(struct dw_mci *host, struct mmc_ios *ios)
+{
+	u32 reg;
+
+	/* Update the Drive and Sample fields in register UHS_REG_EXT. */
+	reg = mci_readl(host, UHS_REG_EXT);
+	reg &= ~UHS_REG_EXT_SAMPLE_MASK;
+	reg |= FIELD_PREP(UHS_REG_EXT_SAMPLE_MASK,
+			  BLUEFIELD_UHS_REG_EXT_SAMPLE);
+	reg &= ~UHS_REG_EXT_DRIVE_MASK;
+	reg |= FIELD_PREP(UHS_REG_EXT_DRIVE_MASK, BLUEFIELD_UHS_REG_EXT_DRIVE);
+	mci_writel(host, UHS_REG_EXT, reg);
+}
+
+static const struct dw_mci_drv_data bluefield_drv_data = {
+	.set_ios		= dw_mci_bluefield_set_ios
+};
+
+static const struct of_device_id dw_mci_bluefield_match[] = {
+	{ .compatible = "mellanox,bluefield-dw-mshc",
+	  .data = &bluefield_drv_data },
+	{},
+};
+MODULE_DEVICE_TABLE(of, dw_mci_bluefield_match);
+
+static int dw_mci_bluefield_probe(struct platform_device *pdev)
+{
+	const struct dw_mci_drv_data *drv_data = NULL;
+	const struct of_device_id *match;
+
+	if (pdev->dev.of_node) {
+		match = of_match_node(dw_mci_bluefield_match,
+				      pdev->dev.of_node);
+		drv_data = match->data;
+	}
+
+	return dw_mci_pltfm_register(pdev, drv_data);
+}
+
+static struct platform_driver dw_mci_bluefield_pltfm_driver = {
+	.probe		= dw_mci_bluefield_probe,
+	.remove		= dw_mci_pltfm_remove,
+	.driver		= {
+		.name		= "dwmmc_bluefield",
+		.of_match_table	= dw_mci_bluefield_match,
+		.pm		= &dw_mci_pltfm_pmops,
+	},
+};
+
+module_platform_driver(dw_mci_bluefield_pltfm_driver);
+
+MODULE_DESCRIPTION("BlueField DW Multimedia Card driver");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index 40d7de2..8c86a80 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -44,9 +44,8 @@
 	 * bus_hz = cclkin / RK3288_CLKGEN_DIV
 	 * ios->clock = (div == 0) ? bus_hz : (bus_hz / (2 * div))
 	 *
-	 * Note: div can only be 0 or 1
-	 *       if DDR50 8bit mode(only emmc work in 8bit mode),
-	 *       div must be set 1
+	 * Note: div can only be 0 or 1, but div must be set to 1 for eMMC
+	 * DDR52 8-bit mode.
 	 */
 	if (ios->bus_width == MMC_BUS_WIDTH_8 &&
 	    ios->timing == MMC_TIMING_MMC_DDR52)
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 29a1afa..623f4d2 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1230,6 +1230,8 @@
 	if (host->state == STATE_WAITING_CMD11_DONE)
 		sdmmc_cmd_bits |= SDMMC_CMD_VOLT_SWITCH;
 
+	slot->mmc->actual_clock = 0;
+
 	if (!clock) {
 		mci_writel(host, CLKENA, 0);
 		mci_send_cmd(slot, sdmmc_cmd_bits, 0);
@@ -1288,6 +1290,8 @@
 
 		/* keep the last clock value that was requested from core */
 		slot->__clk_old = clock;
+		slot->mmc->actual_clock = div ? ((host->bus_hz / div) >> 1) :
+					  host->bus_hz;
 	}
 
 	host->current_speed = clock;
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index a0168e9..993386c9 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -1,5 +1,7 @@
 /*
  *  Copyright (C) 2009-2010, Lars-Peter Clausen <lars@metafoo.de>
+ *  Copyright (C) 2013, Imagination Technologies
+ *
  *  JZ4740 SD/MMC controller driver
  *
  *  This program is free software; you can redistribute  it and/or modify it
@@ -13,24 +15,25 @@
  *
  */
 
-#include <linux/mmc/host.h>
-#include <linux/mmc/slot-gpio.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 #include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
-#include <linux/interrupt.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/slot-gpio.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
-#include <linux/delay.h>
 #include <linux/scatterlist.h>
-#include <linux/clk.h>
 
-#include <linux/bitops.h>
-#include <linux/gpio.h>
 #include <asm/cacheflush.h>
-#include <linux/dma-mapping.h>
-#include <linux/dmaengine.h>
 
 #include <asm/mach-jz4740/dma.h>
 #include <asm/mach-jz4740/jz4740_mmc.h>
@@ -51,6 +54,7 @@
 #define JZ_REG_MMC_RESP_FIFO	0x34
 #define JZ_REG_MMC_RXFIFO	0x38
 #define JZ_REG_MMC_TXFIFO	0x3C
+#define JZ_REG_MMC_DMAC		0x44
 
 #define JZ_MMC_STRPCL_EXIT_MULTIPLE BIT(7)
 #define JZ_MMC_STRPCL_EXIT_TRANSFER BIT(6)
@@ -104,9 +108,17 @@
 #define JZ_MMC_IRQ_PRG_DONE BIT(1)
 #define JZ_MMC_IRQ_DATA_TRAN_DONE BIT(0)
 
+#define JZ_MMC_DMAC_DMA_SEL BIT(1)
+#define JZ_MMC_DMAC_DMA_EN BIT(0)
 
 #define JZ_MMC_CLK_RATE 24000000
 
+enum jz4740_mmc_version {
+	JZ_MMC_JZ4740,
+	JZ_MMC_JZ4750,
+	JZ_MMC_JZ4780,
+};
+
 enum jz4740_mmc_state {
 	JZ4740_MMC_STATE_READ_RESPONSE,
 	JZ4740_MMC_STATE_TRANSFER_DATA,
@@ -125,6 +137,8 @@
 	struct jz4740_mmc_platform_data *pdata;
 	struct clk *clk;
 
+	enum jz4740_mmc_version version;
+
 	int irq;
 	int card_detect_irq;
 
@@ -137,7 +151,7 @@
 
 	uint32_t cmdat;
 
-	uint16_t irq_mask;
+	uint32_t irq_mask;
 
 	spinlock_t lock;
 
@@ -159,6 +173,32 @@
 #define JZ4740_MMC_FIFO_HALF_SIZE 8
 };
 
+static void jz4740_mmc_write_irq_mask(struct jz4740_mmc_host *host,
+				      uint32_t val)
+{
+	if (host->version >= JZ_MMC_JZ4750)
+		return writel(val, host->base + JZ_REG_MMC_IMASK);
+	else
+		return writew(val, host->base + JZ_REG_MMC_IMASK);
+}
+
+static void jz4740_mmc_write_irq_reg(struct jz4740_mmc_host *host,
+				     uint32_t val)
+{
+	if (host->version >= JZ_MMC_JZ4780)
+		return writel(val, host->base + JZ_REG_MMC_IREG);
+	else
+		return writew(val, host->base + JZ_REG_MMC_IREG);
+}
+
+static uint32_t jz4740_mmc_read_irq_reg(struct jz4740_mmc_host *host)
+{
+	if (host->version >= JZ_MMC_JZ4780)
+		return readl(host->base + JZ_REG_MMC_IREG);
+	else
+		return readw(host->base + JZ_REG_MMC_IREG);
+}
+
 /*----------------------------------------------------------------------------*/
 /* DMA infrastructure */
 
@@ -173,31 +213,23 @@
 
 static int jz4740_mmc_acquire_dma_channels(struct jz4740_mmc_host *host)
 {
-	dma_cap_mask_t mask;
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_SLAVE, mask);
-
-	host->dma_tx = dma_request_channel(mask, NULL, host);
-	if (!host->dma_tx) {
+	host->dma_tx = dma_request_chan(mmc_dev(host->mmc), "tx");
+	if (IS_ERR(host->dma_tx)) {
 		dev_err(mmc_dev(host->mmc), "Failed to get dma_tx channel\n");
-		return -ENODEV;
+		return PTR_ERR(host->dma_tx);
 	}
 
-	host->dma_rx = dma_request_channel(mask, NULL, host);
-	if (!host->dma_rx) {
+	host->dma_rx = dma_request_chan(mmc_dev(host->mmc), "rx");
+	if (IS_ERR(host->dma_rx)) {
 		dev_err(mmc_dev(host->mmc), "Failed to get dma_rx channel\n");
-		goto free_master_write;
+		dma_release_channel(host->dma_tx);
+		return PTR_ERR(host->dma_rx);
 	}
 
 	/* Initialize DMA pre request cookie */
 	host->next_data.cookie = 1;
 
 	return 0;
-
-free_master_write:
-	dma_release_channel(host->dma_tx);
-	return -ENODEV;
 }
 
 static inline struct dma_chan *jz4740_mmc_get_dma_chan(struct jz4740_mmc_host *host,
@@ -363,7 +395,7 @@
 	else
 		host->irq_mask |= irq;
 
-	writew(host->irq_mask, host->base + JZ_REG_MMC_IMASK);
+	jz4740_mmc_write_irq_mask(host, host->irq_mask);
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
@@ -415,10 +447,10 @@
 	unsigned int irq)
 {
 	unsigned int timeout = 0x800;
-	uint16_t status;
+	uint32_t status;
 
 	do {
-		status = readw(host->base + JZ_REG_MMC_IREG);
+		status = jz4740_mmc_read_irq_reg(host);
 	} while (!(status & irq) && --timeout);
 
 	if (timeout == 0) {
@@ -518,7 +550,7 @@
 	void __iomem *fifo_addr = host->base + JZ_REG_MMC_RXFIFO;
 	uint32_t *buf;
 	uint32_t d;
-	uint16_t status;
+	uint32_t status;
 	size_t i, j;
 	unsigned int timeout;
 
@@ -654,8 +686,25 @@
 		cmdat |= JZ_MMC_CMDAT_DATA_EN;
 		if (cmd->data->flags & MMC_DATA_WRITE)
 			cmdat |= JZ_MMC_CMDAT_WRITE;
-		if (host->use_dma)
-			cmdat |= JZ_MMC_CMDAT_DMA_EN;
+		if (host->use_dma) {
+			/*
+			 * The 4780's MMC controller has integrated DMA ability
+			 * in addition to being able to use the external DMA
+			 * controller. It moves DMA control bits to a separate
+			 * register. The DMA_SEL bit chooses the external
+			 * controller over the integrated one. Earlier SoCs
+			 * can only use the external controller, and have a
+			 * single DMA enable bit in CMDAT.
+			 */
+			if (host->version >= JZ_MMC_JZ4780) {
+				writel(JZ_MMC_DMAC_DMA_EN | JZ_MMC_DMAC_DMA_SEL,
+				       host->base + JZ_REG_MMC_DMAC);
+			} else {
+				cmdat |= JZ_MMC_CMDAT_DMA_EN;
+			}
+		} else if (host->version >= JZ_MMC_JZ4780) {
+			writel(0, host->base + JZ_REG_MMC_DMAC);
+		}
 
 		writew(cmd->data->blksz, host->base + JZ_REG_MMC_BLKLEN);
 		writew(cmd->data->blocks, host->base + JZ_REG_MMC_NOB);
@@ -736,7 +785,7 @@
 			host->state = JZ4740_MMC_STATE_SEND_STOP;
 			break;
 		}
-		writew(JZ_MMC_IRQ_DATA_TRAN_DONE, host->base + JZ_REG_MMC_IREG);
+		jz4740_mmc_write_irq_reg(host, JZ_MMC_IRQ_DATA_TRAN_DONE);
 
 	case JZ4740_MMC_STATE_SEND_STOP:
 		if (!req->stop)
@@ -766,9 +815,10 @@
 {
 	struct jz4740_mmc_host *host = devid;
 	struct mmc_command *cmd = host->cmd;
-	uint16_t irq_reg, status, tmp;
+	uint32_t irq_reg, status, tmp;
 
-	irq_reg = readw(host->base + JZ_REG_MMC_IREG);
+	status = readl(host->base + JZ_REG_MMC_STATUS);
+	irq_reg = jz4740_mmc_read_irq_reg(host);
 
 	tmp = irq_reg;
 	irq_reg &= ~host->irq_mask;
@@ -777,10 +827,10 @@
 		JZ_MMC_IRQ_PRG_DONE | JZ_MMC_IRQ_DATA_TRAN_DONE);
 
 	if (tmp != irq_reg)
-		writew(tmp & ~irq_reg, host->base + JZ_REG_MMC_IREG);
+		jz4740_mmc_write_irq_reg(host, tmp & ~irq_reg);
 
 	if (irq_reg & JZ_MMC_IRQ_SDIO) {
-		writew(JZ_MMC_IRQ_SDIO, host->base + JZ_REG_MMC_IREG);
+		jz4740_mmc_write_irq_reg(host, JZ_MMC_IRQ_SDIO);
 		mmc_signal_sdio_irq(host->mmc);
 		irq_reg &= ~JZ_MMC_IRQ_SDIO;
 	}
@@ -789,8 +839,6 @@
 		if (test_and_clear_bit(0, &host->waiting)) {
 			del_timer(&host->timeout_timer);
 
-			status = readl(host->base + JZ_REG_MMC_STATUS);
-
 			if (status & JZ_MMC_STATUS_TIMEOUT_RES) {
 					cmd->error = -ETIMEDOUT;
 			} else if (status & JZ_MMC_STATUS_CRC_RES_ERR) {
@@ -803,7 +851,7 @@
 			}
 
 			jz4740_mmc_set_irq_enabled(host, irq_reg, false);
-			writew(irq_reg, host->base + JZ_REG_MMC_IREG);
+			jz4740_mmc_write_irq_reg(host, irq_reg);
 
 			return IRQ_WAKE_THREAD;
 		}
@@ -818,7 +866,7 @@
 	int real_rate;
 
 	jz4740_mmc_clock_disable(host);
-	clk_set_rate(host->clk, JZ_MMC_CLK_RATE);
+	clk_set_rate(host->clk, host->mmc->f_max);
 
 	real_rate = clk_get_rate(host->clk);
 
@@ -837,9 +885,7 @@
 
 	host->req = req;
 
-	writew(0xffff, host->base + JZ_REG_MMC_IREG);
-
-	writew(JZ_MMC_IRQ_END_CMD_RES, host->base + JZ_REG_MMC_IREG);
+	jz4740_mmc_write_irq_reg(host, ~0);
 	jz4740_mmc_set_irq_enabled(host, JZ_MMC_IRQ_END_CMD_RES, true);
 
 	host->state = JZ4740_MMC_STATE_READ_RESPONSE;
@@ -857,7 +903,7 @@
 	switch (ios->power_mode) {
 	case MMC_POWER_UP:
 		jz4740_mmc_reset(host);
-		if (gpio_is_valid(host->pdata->gpio_power))
+		if (host->pdata && gpio_is_valid(host->pdata->gpio_power))
 			gpio_set_value(host->pdata->gpio_power,
 					!host->pdata->power_active_low);
 		host->cmdat |= JZ_MMC_CMDAT_INIT;
@@ -866,7 +912,7 @@
 	case MMC_POWER_ON:
 		break;
 	default:
-		if (gpio_is_valid(host->pdata->gpio_power))
+		if (host->pdata && gpio_is_valid(host->pdata->gpio_power))
 			gpio_set_value(host->pdata->gpio_power,
 					host->pdata->power_active_low);
 		clk_disable_unprepare(host->clk);
@@ -926,7 +972,7 @@
 static int jz4740_mmc_request_gpios(struct mmc_host *mmc,
 	struct platform_device *pdev)
 {
-	struct jz4740_mmc_platform_data *pdata = pdev->dev.platform_data;
+	struct jz4740_mmc_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	int ret = 0;
 
 	if (!pdata)
@@ -955,7 +1001,7 @@
 
 static void jz4740_mmc_free_gpios(struct platform_device *pdev)
 {
-	struct jz4740_mmc_platform_data *pdata = pdev->dev.platform_data;
+	struct jz4740_mmc_platform_data *pdata = dev_get_platdata(&pdev->dev);
 
 	if (!pdata)
 		return;
@@ -964,14 +1010,22 @@
 		gpio_free(pdata->gpio_power);
 }
 
+static const struct of_device_id jz4740_mmc_of_match[] = {
+	{ .compatible = "ingenic,jz4740-mmc", .data = (void *) JZ_MMC_JZ4740 },
+	{ .compatible = "ingenic,jz4780-mmc", .data = (void *) JZ_MMC_JZ4780 },
+	{},
+};
+MODULE_DEVICE_TABLE(of, jz4740_mmc_of_match);
+
 static int jz4740_mmc_probe(struct platform_device* pdev)
 {
 	int ret;
 	struct mmc_host *mmc;
 	struct jz4740_mmc_host *host;
+	const struct of_device_id *match;
 	struct jz4740_mmc_platform_data *pdata;
 
-	pdata = pdev->dev.platform_data;
+	pdata = dev_get_platdata(&pdev->dev);
 
 	mmc = mmc_alloc_host(sizeof(struct jz4740_mmc_host), &pdev->dev);
 	if (!mmc) {
@@ -982,6 +1036,27 @@
 	host = mmc_priv(mmc);
 	host->pdata = pdata;
 
+	match = of_match_device(jz4740_mmc_of_match, &pdev->dev);
+	if (match) {
+		host->version = (enum jz4740_mmc_version)match->data;
+		ret = mmc_of_parse(mmc);
+		if (ret) {
+			if (ret != -EPROBE_DEFER)
+				dev_err(&pdev->dev,
+					"could not parse of data: %d\n", ret);
+			goto err_free_host;
+		}
+	} else {
+		/* JZ4740 should be the only one using legacy probe */
+		host->version = JZ_MMC_JZ4740;
+		mmc->caps |= MMC_CAP_SDIO_IRQ;
+		if (!(pdata && pdata->data_1bit))
+			mmc->caps |= MMC_CAP_4_BIT_DATA;
+		ret = jz4740_mmc_request_gpios(mmc, pdev);
+		if (ret)
+			goto err_free_host;
+	}
+
 	host->irq = platform_get_irq(pdev, 0);
 	if (host->irq < 0) {
 		ret = host->irq;
@@ -1004,16 +1079,11 @@
 		goto err_free_host;
 	}
 
-	ret = jz4740_mmc_request_gpios(mmc, pdev);
-	if (ret)
-		goto err_release_dma;
-
 	mmc->ops = &jz4740_mmc_ops;
-	mmc->f_min = JZ_MMC_CLK_RATE / 128;
-	mmc->f_max = JZ_MMC_CLK_RATE;
+	if (!mmc->f_max)
+		mmc->f_max = JZ_MMC_CLK_RATE;
+	mmc->f_min = mmc->f_max / 128;
 	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-	mmc->caps = (pdata && pdata->data_1bit) ? 0 : MMC_CAP_4_BIT_DATA;
-	mmc->caps |= MMC_CAP_SDIO_IRQ;
 
 	mmc->max_blk_size = (1 << 10) - 1;
 	mmc->max_blk_count = (1 << 15) - 1;
@@ -1025,7 +1095,9 @@
 	host->mmc = mmc;
 	host->pdev = pdev;
 	spin_lock_init(&host->lock);
-	host->irq_mask = 0xffff;
+	host->irq_mask = ~0;
+
+	jz4740_mmc_reset(host);
 
 	ret = request_threaded_irq(host->irq, jz_mmc_irq, jz_mmc_irq_worker, 0,
 			dev_name(&pdev->dev), host);
@@ -1034,20 +1106,20 @@
 		goto err_free_gpios;
 	}
 
-	jz4740_mmc_reset(host);
 	jz4740_mmc_clock_disable(host);
 	timer_setup(&host->timeout_timer, jz4740_mmc_timeout, 0);
 
-	host->use_dma = true;
-	if (host->use_dma && jz4740_mmc_acquire_dma_channels(host) != 0)
-		host->use_dma = false;
+	ret = jz4740_mmc_acquire_dma_channels(host);
+	if (ret == -EPROBE_DEFER)
+		goto err_free_irq;
+	host->use_dma = !ret;
 
 	platform_set_drvdata(pdev, host);
 	ret = mmc_add_host(mmc);
 
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to add mmc host: %d\n", ret);
-		goto err_free_irq;
+		goto err_release_dma;
 	}
 	dev_info(&pdev->dev, "JZ SD/MMC card driver registered\n");
 
@@ -1057,13 +1129,13 @@
 
 	return 0;
 
+err_release_dma:
+	if (host->use_dma)
+		jz4740_mmc_release_dma_channels(host);
 err_free_irq:
 	free_irq(host->irq, host);
 err_free_gpios:
 	jz4740_mmc_free_gpios(pdev);
-err_release_dma:
-	if (host->use_dma)
-		jz4740_mmc_release_dma_channels(host);
 err_free_host:
 	mmc_free_host(mmc);
 
@@ -1116,6 +1188,7 @@
 	.remove = jz4740_mmc_remove,
 	.driver = {
 		.name = "jz4740-mmc",
+		.of_match_table = of_match_ptr(jz4740_mmc_of_match),
 		.pm = JZ4740_MMC_PM_OPS,
 	},
 };
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 4f972b8..c201c37 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -35,6 +35,7 @@
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/regulator/consumer.h>
+#include <linux/reset.h>
 #include <linux/interrupt.h>
 #include <linux/bitfield.h>
 #include <linux/pinctrl/consumer.h>
@@ -47,15 +48,29 @@
 #define   CLK_CORE_PHASE_MASK GENMASK(9, 8)
 #define   CLK_TX_PHASE_MASK GENMASK(11, 10)
 #define   CLK_RX_PHASE_MASK GENMASK(13, 12)
-#define   CLK_TX_DELAY_MASK GENMASK(19, 16)
-#define   CLK_RX_DELAY_MASK GENMASK(23, 20)
+#define   CLK_V2_TX_DELAY_MASK GENMASK(19, 16)
+#define   CLK_V2_RX_DELAY_MASK GENMASK(23, 20)
+#define   CLK_V2_ALWAYS_ON BIT(24)
+
+#define   CLK_V3_TX_DELAY_MASK GENMASK(21, 16)
+#define   CLK_V3_RX_DELAY_MASK GENMASK(27, 22)
+#define   CLK_V3_ALWAYS_ON BIT(28)
+
 #define   CLK_DELAY_STEP_PS 200
 #define   CLK_PHASE_STEP 30
 #define   CLK_PHASE_POINT_NUM (360 / CLK_PHASE_STEP)
-#define   CLK_ALWAYS_ON BIT(24)
+
+#define   CLK_TX_DELAY_MASK(h)		(h->data->tx_delay_mask)
+#define   CLK_RX_DELAY_MASK(h)		(h->data->rx_delay_mask)
+#define   CLK_ALWAYS_ON(h)		(h->data->always_on)
 
 #define SD_EMMC_DELAY 0x4
 #define SD_EMMC_ADJUST 0x8
+
+#define SD_EMMC_DELAY1 0x4
+#define SD_EMMC_DELAY2 0x8
+#define SD_EMMC_V3_ADJUST 0xc
+
 #define SD_EMMC_CALOUT 0x10
 #define SD_EMMC_START 0x40
 #define   START_DESC_INIT BIT(0)
@@ -122,6 +137,12 @@
 
 #define MUX_CLK_NUM_PARENTS 2
 
+struct meson_mmc_data {
+	unsigned int tx_delay_mask;
+	unsigned int rx_delay_mask;
+	unsigned int always_on;
+};
+
 struct sd_emmc_desc {
 	u32 cmd_cfg;
 	u32 cmd_arg;
@@ -131,6 +152,7 @@
 
 struct meson_host {
 	struct	device		*dev;
+	struct	meson_mmc_data *data;
 	struct	mmc_host	*mmc;
 	struct	mmc_command	*cmd;
 
@@ -474,7 +496,7 @@
 
 	/* init SD_EMMC_CLOCK to sane defaults w/min clock rate */
 	clk_reg = 0;
-	clk_reg |= CLK_ALWAYS_ON;
+	clk_reg |= CLK_ALWAYS_ON(host);
 	clk_reg |= CLK_DIV_MASK;
 	writel(clk_reg, host->regs + SD_EMMC_CLOCK);
 
@@ -574,7 +596,7 @@
 
 	tx->reg = host->regs + SD_EMMC_CLOCK;
 	tx->phase_mask = CLK_TX_PHASE_MASK;
-	tx->delay_mask = CLK_TX_DELAY_MASK;
+	tx->delay_mask = CLK_TX_DELAY_MASK(host);
 	tx->delay_step_ps = CLK_DELAY_STEP_PS;
 	tx->hw.init = &init;
 
@@ -597,7 +619,7 @@
 
 	rx->reg = host->regs + SD_EMMC_CLOCK;
 	rx->phase_mask = CLK_RX_PHASE_MASK;
-	rx->delay_mask = CLK_RX_DELAY_MASK;
+	rx->delay_mask = CLK_RX_DELAY_MASK(host);
 	rx->delay_step_ps = CLK_DELAY_STEP_PS;
 	rx->hw.init = &init;
 
@@ -1184,6 +1206,21 @@
 		goto free_host;
 	}
 
+	host->data = (struct meson_mmc_data *)
+		of_device_get_match_data(&pdev->dev);
+	if (!host->data) {
+		ret = -EINVAL;
+		goto free_host;
+	}
+
+	ret = device_reset_optional(&pdev->dev);
+	if (ret) {
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "device reset failed: %d\n", ret);
+
+		return ret;
+	}
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	host->regs = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(host->regs)) {
@@ -1315,11 +1352,24 @@
 	return 0;
 }
 
+static const struct meson_mmc_data meson_gx_data = {
+	.tx_delay_mask	= CLK_V2_TX_DELAY_MASK,
+	.rx_delay_mask	= CLK_V2_RX_DELAY_MASK,
+	.always_on	= CLK_V2_ALWAYS_ON,
+};
+
+static const struct meson_mmc_data meson_axg_data = {
+	.tx_delay_mask	= CLK_V3_TX_DELAY_MASK,
+	.rx_delay_mask	= CLK_V3_RX_DELAY_MASK,
+	.always_on	= CLK_V3_ALWAYS_ON,
+};
+
 static const struct of_device_id meson_mmc_of_match[] = {
-	{ .compatible = "amlogic,meson-gx-mmc", },
-	{ .compatible = "amlogic,meson-gxbb-mmc", },
-	{ .compatible = "amlogic,meson-gxl-mmc", },
-	{ .compatible = "amlogic,meson-gxm-mmc", },
+	{ .compatible = "amlogic,meson-gx-mmc",		.data = &meson_gx_data },
+	{ .compatible = "amlogic,meson-gxbb-mmc", 	.data = &meson_gx_data },
+	{ .compatible = "amlogic,meson-gxl-mmc",	.data = &meson_gx_data },
+	{ .compatible = "amlogic,meson-gxm-mmc",	.data = &meson_gx_data },
+	{ .compatible = "amlogic,meson-axg-mmc",	.data = &meson_axg_data },
 	{}
 };
 MODULE_DEVICE_TABLE(of, meson_mmc_of_match);
@@ -1335,6 +1385,6 @@
 
 module_platform_driver(meson_mmc_driver);
 
-MODULE_DESCRIPTION("Amlogic S905*/GX* SD/eMMC driver");
+MODULE_DESCRIPTION("Amlogic S905*/GX*/AXG SD/eMMC driver");
 MODULE_AUTHOR("Kevin Hilman <khilman@baylibre.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 70b0df8..f184977 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1253,15 +1253,12 @@
 	struct sg_mapping_iter *sg_miter = &host->sg_miter;
 	struct variant_data *variant = host->variant;
 	void __iomem *base = host->base;
-	unsigned long flags;
 	u32 status;
 
 	status = readl(base + MMCISTATUS);
 
 	dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status);
 
-	local_irq_save(flags);
-
 	do {
 		unsigned int remain, len;
 		char *buffer;
@@ -1301,8 +1298,6 @@
 
 	sg_miter_stop(sg_miter);
 
-	local_irq_restore(flags);
-
 	/*
 	 * If we have less than the fifo 'half-full' threshold to transfer,
 	 * trigger a PIO interrupt as soon as any data is available.
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index cb274e8..0484138 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -19,6 +19,7 @@
 #include <linux/ioport.h>
 #include <linux/irq.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/of_gpio.h>
 #include <linux/pinctrl/consumer.h>
@@ -70,6 +71,7 @@
 #define SDC_ADV_CFG0     0x64
 #define EMMC_IOCON       0x7c
 #define SDC_ACMD_RESP    0x80
+#define DMA_SA_H4BIT     0x8c
 #define MSDC_DMA_SA      0x90
 #define MSDC_DMA_CTRL    0x98
 #define MSDC_DMA_CFG     0x9c
@@ -194,6 +196,9 @@
 /* SDC_ADV_CFG0 mask */
 #define SDC_RX_ENHANCE_EN	(0x1 << 20)	/* RW */
 
+/* DMA_SA_H4BIT mask */
+#define DMA_ADDR_HIGH_4BIT      (0xf << 0)      /* RW */
+
 /* MSDC_DMA_CTRL mask */
 #define MSDC_DMA_CTRL_START     (0x1 << 0)	/* W */
 #define MSDC_DMA_CTRL_STOP      (0x1 << 1)	/* W */
@@ -227,6 +232,7 @@
 
 #define MSDC_PATCH_BIT2_CFGRESP   (0x1 << 15)   /* RW */
 #define MSDC_PATCH_BIT2_CFGCRCSTS (0x1 << 28)   /* RW */
+#define MSDC_PB2_SUPPORT_64G      (0x1 << 1)    /* RW */
 #define MSDC_PB2_RESPWAIT         (0x3 << 2)    /* RW */
 #define MSDC_PB2_RESPSTSENSEL     (0x7 << 16)   /* RW */
 #define MSDC_PB2_CRCSTSENSEL      (0x7 << 29)   /* RW */
@@ -280,6 +286,8 @@
 #define GPDMA_DESC_BDP		(0x1 << 1)
 #define GPDMA_DESC_CHECKSUM	(0xff << 8) /* bit8 ~ bit15 */
 #define GPDMA_DESC_INT		(0x1 << 16)
+#define GPDMA_DESC_NEXT_H4	(0xf << 24)
+#define GPDMA_DESC_PTR_H4	(0xf << 28)
 	u32 next;
 	u32 ptr;
 	u32 gpd_data_len;
@@ -296,6 +304,8 @@
 #define BDMA_DESC_CHECKSUM	(0xff << 8) /* bit8 ~ bit15 */
 #define BDMA_DESC_BLKPAD	(0x1 << 17)
 #define BDMA_DESC_DWPAD		(0x1 << 18)
+#define BDMA_DESC_NEXT_H4	(0xf << 24)
+#define BDMA_DESC_PTR_H4	(0xf << 28)
 	u32 next;
 	u32 ptr;
 	u32 bd_data_len;
@@ -334,6 +344,7 @@
 	bool busy_check;
 	bool stop_clk_fix;
 	bool enhance_rx;
+	bool support_64g;
 };
 
 struct msdc_tune_para {
@@ -403,6 +414,7 @@
 	.busy_check = false,
 	.stop_clk_fix = false,
 	.enhance_rx = false,
+	.support_64g = false,
 };
 
 static const struct mtk_mmc_compatible mt8173_compat = {
@@ -414,6 +426,7 @@
 	.busy_check = false,
 	.stop_clk_fix = false,
 	.enhance_rx = false,
+	.support_64g = false,
 };
 
 static const struct mtk_mmc_compatible mt2701_compat = {
@@ -425,6 +438,7 @@
 	.busy_check = false,
 	.stop_clk_fix = false,
 	.enhance_rx = false,
+	.support_64g = false,
 };
 
 static const struct mtk_mmc_compatible mt2712_compat = {
@@ -436,6 +450,7 @@
 	.busy_check = true,
 	.stop_clk_fix = true,
 	.enhance_rx = true,
+	.support_64g = true,
 };
 
 static const struct mtk_mmc_compatible mt7622_compat = {
@@ -447,6 +462,7 @@
 	.busy_check = true,
 	.stop_clk_fix = true,
 	.enhance_rx = true,
+	.support_64g = false,
 };
 
 static const struct of_device_id msdc_of_ids[] = {
@@ -556,7 +572,12 @@
 		/* init bd */
 		bd[j].bd_info &= ~BDMA_DESC_BLKPAD;
 		bd[j].bd_info &= ~BDMA_DESC_DWPAD;
-		bd[j].ptr = (u32)dma_address;
+		bd[j].ptr = lower_32_bits(dma_address);
+		if (host->dev_comp->support_64g) {
+			bd[j].bd_info &= ~BDMA_DESC_PTR_H4;
+			bd[j].bd_info |= (upper_32_bits(dma_address) & 0xf)
+					 << 28;
+		}
 		bd[j].bd_data_len &= ~BDMA_DESC_BUFLEN;
 		bd[j].bd_data_len |= (dma_len & BDMA_DESC_BUFLEN);
 
@@ -575,7 +596,10 @@
 	dma_ctrl &= ~(MSDC_DMA_CTRL_BRUSTSZ | MSDC_DMA_CTRL_MODE);
 	dma_ctrl |= (MSDC_BURST_64B << 12 | 1 << 8);
 	writel_relaxed(dma_ctrl, host->base + MSDC_DMA_CTRL);
-	writel((u32)dma->gpd_addr, host->base + MSDC_DMA_SA);
+	if (host->dev_comp->support_64g)
+		sdr_set_field(host->base + DMA_SA_H4BIT, DMA_ADDR_HIGH_4BIT,
+			      upper_32_bits(dma->gpd_addr) & 0xf);
+	writel(lower_32_bits(dma->gpd_addr), host->base + MSDC_DMA_SA);
 }
 
 static void msdc_prepare_data(struct msdc_host *host, struct mmc_request *mrq)
@@ -1366,6 +1390,9 @@
 			     MSDC_PATCH_BIT2_CFGCRCSTS);
 	}
 
+	if (host->dev_comp->support_64g)
+		sdr_set_bits(host->base + MSDC_PATCH_BIT2,
+			     MSDC_PB2_SUPPORT_64G);
 	if (host->dev_comp->data_tune) {
 		sdr_set_bits(host->base + tune_reg,
 			     MSDC_PAD_TUNE_RD_SEL | MSDC_PAD_TUNE_CMD_SEL);
@@ -1407,19 +1434,32 @@
 {
 	struct mt_gpdma_desc *gpd = dma->gpd;
 	struct mt_bdma_desc *bd = dma->bd;
+	dma_addr_t dma_addr;
 	int i;
 
 	memset(gpd, 0, sizeof(struct mt_gpdma_desc) * 2);
 
+	dma_addr = dma->gpd_addr + sizeof(struct mt_gpdma_desc);
 	gpd->gpd_info = GPDMA_DESC_BDP; /* hwo, cs, bd pointer */
-	gpd->ptr = (u32)dma->bd_addr; /* physical address */
 	/* gpd->next is must set for desc DMA
 	 * That's why must alloc 2 gpd structure.
 	 */
-	gpd->next = (u32)dma->gpd_addr + sizeof(struct mt_gpdma_desc);
+	gpd->next = lower_32_bits(dma_addr);
+	if (host->dev_comp->support_64g)
+		gpd->gpd_info |= (upper_32_bits(dma_addr) & 0xf) << 24;
+
+	dma_addr = dma->bd_addr;
+	gpd->ptr = lower_32_bits(dma->bd_addr); /* physical address */
+	if (host->dev_comp->support_64g)
+		gpd->gpd_info |= (upper_32_bits(dma_addr) & 0xf) << 28;
+
 	memset(bd, 0, sizeof(struct mt_bdma_desc) * MAX_BD_NUM);
-	for (i = 0; i < (MAX_BD_NUM - 1); i++)
-		bd[i].next = (u32)dma->bd_addr + sizeof(*bd) * (i + 1);
+	for (i = 0; i < (MAX_BD_NUM - 1); i++) {
+		dma_addr = dma->bd_addr + sizeof(*bd) * (i + 1);
+		bd[i].next = lower_32_bits(dma_addr);
+		if (host->dev_comp->support_64g)
+			bd[i].bd_info |= (upper_32_bits(dma_addr) & 0xf) << 24;
+	}
 }
 
 static void msdc_ops_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
@@ -1820,7 +1860,6 @@
 	struct mmc_host *mmc;
 	struct msdc_host *host;
 	struct resource *res;
-	const struct of_device_id *of_id;
 	int ret;
 
 	if (!pdev->dev.of_node) {
@@ -1828,9 +1867,6 @@
 		return -EINVAL;
 	}
 
-	of_id = of_match_node(msdc_of_ids, pdev->dev.of_node);
-	if (!of_id)
-		return -EINVAL;
 	/* Allocate MMC host for this device */
 	mmc = mmc_alloc_host(sizeof(struct msdc_host), &pdev->dev);
 	if (!mmc)
@@ -1899,7 +1935,7 @@
 	msdc_of_property_parse(pdev, host);
 
 	host->dev = &pdev->dev;
-	host->dev_comp = of_id->data;
+	host->dev_comp = of_device_get_match_data(&pdev->dev);
 	host->mmc = mmc;
 	host->src_clk_freq = clk_get_rate(host->src_clk);
 	/* Set host parameters to mmc */
@@ -1916,7 +1952,10 @@
 	mmc->max_blk_size = 2048;
 	mmc->max_req_size = 512 * 1024;
 	mmc->max_blk_count = mmc->max_req_size / 512;
-	host->dma_mask = DMA_BIT_MASK(32);
+	if (host->dev_comp->support_64g)
+		host->dma_mask = DMA_BIT_MASK(36);
+	else
+		host->dma_mask = DMA_BIT_MASK(32);
 	mmc_dev(mmc)->dma_mask = &host->dma_mask;
 
 	host->timeout_clks = 3 * 1048576;
diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index 210247b..e22bbff 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -143,6 +143,7 @@
 	struct mmc_command *cmd = mrq->cmd;
 	u32 cmdreg = 0, xfer = 0, intr = 0;
 	unsigned long flags;
+	unsigned int timeout;
 
 	BUG_ON(host->mrq != NULL);
 	host->mrq = mrq;
@@ -234,7 +235,8 @@
 	mvsd_write(MVSD_NOR_INTR_EN, host->intr_en);
 	mvsd_write(MVSD_ERR_INTR_EN, 0xffff);
 
-	mod_timer(&host->timer, jiffies + 5 * HZ);
+	timeout = cmd->busy_timeout ? cmd->busy_timeout : 5000;
+	mod_timer(&host->timer, jiffies + msecs_to_jiffies(timeout));
 
 	spin_unlock_irqrestore(&host->lock, flags);
 }
@@ -755,6 +757,8 @@
 	if (maxfreq)
 		mmc->f_max = maxfreq;
 
+	mmc->caps |= MMC_CAP_ERASE;
+
 	spin_lock_init(&host->lock);
 
 	host->base = devm_ioremap_resource(&pdev->dev, r);
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 5ff8ef7..75f781c 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
+#include <linux/highmem.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/blkdev.h>
@@ -291,8 +292,10 @@
 	struct scatterlist *sg;
 	int i;
 
-	for_each_sg(data->sg, sg, data->sg_len, i)
-		buffer_swap32(sg_virt(sg), sg->length);
+	for_each_sg(data->sg, sg, data->sg_len, i) {
+		void *buf = kmap_atomic(sg_page(sg) + sg->offset;
+		buffer_swap32(buf, sg->length);
+		kunmap_atomic(buf);
 }
 #else
 static inline void mxcmci_swap_buffers(struct mmc_data *data) {}
@@ -609,6 +612,7 @@
 {
 	struct mmc_data *data = host->req->data;
 	struct scatterlist *sg;
+	void *buf;
 	int stat, i;
 
 	host->data = data;
@@ -616,14 +620,18 @@
 
 	if (data->flags & MMC_DATA_READ) {
 		for_each_sg(data->sg, sg, data->sg_len, i) {
-			stat = mxcmci_pull(host, sg_virt(sg), sg->length);
+			buf = kmap_atomic(sg_page(sg) + sg->offset);
+			stat = mxcmci_pull(host, buf, sg->length);
+			kunmap(buf);
 			if (stat)
 				return stat;
 			host->datasize += sg->length;
 		}
 	} else {
 		for_each_sg(data->sg, sg, data->sg_len, i) {
-			stat = mxcmci_push(host, sg_virt(sg), sg->length);
+			buf = kmap_atomic(sg_page(sg) + sg->offset);
+			stat = mxcmci_push(host, buf, sg->length);
+			kunmap(buf);
 			if (stat)
 				return stat;
 			host->datasize += sg->length;
@@ -1206,7 +1214,8 @@
 	return 0;
 }
 
-static int __maybe_unused mxcmci_suspend(struct device *dev)
+#ifdef CONFIG_PM_SLEEP
+static int mxcmci_suspend(struct device *dev)
 {
 	struct mmc_host *mmc = dev_get_drvdata(dev);
 	struct mxcmci_host *host = mmc_priv(mmc);
@@ -1216,7 +1225,7 @@
 	return 0;
 }
 
-static int __maybe_unused mxcmci_resume(struct device *dev)
+static int mxcmci_resume(struct device *dev)
 {
 	struct mmc_host *mmc = dev_get_drvdata(dev);
 	struct mxcmci_host *host = mmc_priv(mmc);
@@ -1232,6 +1241,7 @@
 
 	return ret;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume);
 
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index 51e01f0..45c015da 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -28,6 +28,7 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/mmc/host.h>
+#include <linux/mmc/slot-gpio.h>
 #include <linux/mfd/tmio.h>
 #include <linux/sh_dma.h>
 #include <linux/delay.h>
@@ -534,6 +535,10 @@
 	host->multi_io_quirk	= renesas_sdhi_multi_io_quirk;
 	host->dma_ops		= dma_ops;
 
+	/* For some SoC, we disable internal WP. GPIO may override this */
+	if (mmc_can_gpio_ro(host->mmc))
+		mmc_data->capabilities2 &= ~MMC_CAP2_NO_WRITE_PROTECT;
+
 	/* SDR speeds are only available on Gen2+ */
 	if (mmc_data->flags & TMIO_MMC_MIN_RCAR2) {
 		/* card_busy caused issues on r8a73a4 (pre-Gen2) CD-less SDHI */
diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
index 6af946d..f7f9773 100644
--- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
@@ -87,11 +87,12 @@
 			  TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2,
 	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
 			  MMC_CAP_CMD23,
+	.capabilities2	= MMC_CAP2_NO_WRITE_PROTECT,
 	.bus_shift	= 2,
 	.scc_offset	= 0x1000,
 	.taps		= rcar_gen3_scc_taps,
 	.taps_num	= ARRAY_SIZE(rcar_gen3_scc_taps),
-	/* Gen3 SDHI DMAC can handle 0xffffffff blk count, but seg = 1 */
+	/* DMAC can handle 0xffffffff blk count but only 1 segment */
 	.max_blk_count	= 0xffffffff,
 	.max_segs	= 1,
 };
@@ -157,38 +158,34 @@
 {
 	struct scatterlist *sg = host->sg_ptr;
 	u32 dtran_mode = DTRAN_MODE_BUS_WID_TH | DTRAN_MODE_ADDR_MODE;
-	enum dma_data_direction dir;
-	int ret;
 
-	/* This DMAC cannot handle if sg_len is not 1 */
-	WARN_ON(host->sg_len > 1);
+	if (!dma_map_sg(&host->pdev->dev, sg, host->sg_len,
+			mmc_get_dma_dir(data)))
+		goto force_pio;
 
 	/* This DMAC cannot handle if buffer is not 8-bytes alignment */
-	if (!IS_ALIGNED(sg->offset, 8))
+	if (!IS_ALIGNED(sg_dma_address(sg), 8)) {
+		dma_unmap_sg(&host->pdev->dev, sg, host->sg_len,
+			     mmc_get_dma_dir(data));
 		goto force_pio;
+	}
 
 	if (data->flags & MMC_DATA_READ) {
 		dtran_mode |= DTRAN_MODE_CH_NUM_CH1;
-		dir = DMA_FROM_DEVICE;
 		if (test_bit(SDHI_INTERNAL_DMAC_ONE_RX_ONLY, &global_flags) &&
 		    test_and_set_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags))
 			goto force_pio;
 	} else {
 		dtran_mode |= DTRAN_MODE_CH_NUM_CH0;
-		dir = DMA_TO_DEVICE;
 	}
 
-	ret = dma_map_sg(&host->pdev->dev, sg, host->sg_len, dir);
-	if (ret == 0)
-		goto force_pio;
-
 	renesas_sdhi_internal_dmac_enable_dma(host, true);
 
 	/* set dma parameters */
 	renesas_sdhi_internal_dmac_dm_write(host, DM_CM_DTRAN_MODE,
 					    dtran_mode);
 	renesas_sdhi_internal_dmac_dm_write(host, DM_DTRAN_ADDR,
-					    sg->dma_address);
+					    sg_dma_address(sg));
 
 	return;
 
@@ -272,12 +269,17 @@
  * implementation as others may use a different implementation.
  */
 static const struct soc_device_attribute gen3_soc_whitelist[] = {
+	/* specific ones */
 	{ .soc_id = "r8a7795", .revision = "ES1.*",
 	  .data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) },
-	{ .soc_id = "r8a7795", .revision = "ES2.0" },
 	{ .soc_id = "r8a7796", .revision = "ES1.0",
 	  .data = (void *)BIT(SDHI_INTERNAL_DMAC_ONE_RX_ONLY) },
-	{ .soc_id = "r8a77995", .revision = "ES1.0" },
+	/* generic ones */
+	{ .soc_id = "r8a7795" },
+	{ .soc_id = "r8a7796" },
+	{ .soc_id = "r8a77965" },
+	{ .soc_id = "r8a77980" },
+	{ .soc_id = "r8a77995" },
 	{ /* sentinel */ }
 };
 
diff --git a/drivers/mmc/host/renesas_sdhi_sys_dmac.c b/drivers/mmc/host/renesas_sdhi_sys_dmac.c
index 848e50c..4bb46c4 100644
--- a/drivers/mmc/host/renesas_sdhi_sys_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_sys_dmac.c
@@ -42,6 +42,7 @@
 static const struct renesas_sdhi_of_data of_rcar_gen1_compatible = {
 	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_CLK_ACTUAL,
 	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ,
+	.capabilities2	= MMC_CAP2_NO_WRITE_PROTECT,
 };
 
 /* Definitions for sampling clocks */
@@ -61,6 +62,7 @@
 			  TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2,
 	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
 			  MMC_CAP_CMD23,
+	.capabilities2	= MMC_CAP2_NO_WRITE_PROTECT,
 	.dma_buswidth	= DMA_SLAVE_BUSWIDTH_4_BYTES,
 	.dma_rx_offset	= 0x2000,
 	.scc_offset	= 0x0300,
@@ -81,6 +83,7 @@
 			  TMIO_MMC_HAVE_CBSY | TMIO_MMC_MIN_RCAR2,
 	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
 			  MMC_CAP_CMD23,
+	.capabilities2	= MMC_CAP2_NO_WRITE_PROTECT,
 	.bus_shift	= 2,
 	.scc_offset	= 0x1000,
 	.taps		= rcar_gen3_scc_taps,
diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c
index 7842207..9a3ff22 100644
--- a/drivers/mmc/host/rtsx_usb_sdmmc.c
+++ b/drivers/mmc/host/rtsx_usb_sdmmc.c
@@ -26,7 +26,6 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/sd.h>
-#include <linux/mmc/sdio.h>
 #include <linux/mmc/card.h>
 #include <linux/scatterlist.h>
 #include <linux/pm_runtime.h>
@@ -343,7 +342,7 @@
 	}
 
 	if (rsp_type == SD_RSP_TYPE_R1b)
-		timeout = 3000;
+		timeout = cmd->busy_timeout ? cmd->busy_timeout : 3000;
 
 	if (cmd->opcode == SD_SWITCH_VOLTAGE) {
 		err = rtsx_usb_write_register(ucr, SD_BUS_STAT,
@@ -839,17 +838,6 @@
 		goto finish_detect_card;
 	}
 
-	/*
-	 * Reject SDIO CMDs to speed up card identification
-	 * since unsupported
-	 */
-	if (cmd->opcode == SD_IO_SEND_OP_COND ||
-	    cmd->opcode == SD_IO_RW_DIRECT ||
-	    cmd->opcode == SD_IO_RW_EXTENDED) {
-		cmd->error = -EINVAL;
-		goto finish;
-	}
-
 	mutex_lock(&ucr->dev_mutex);
 
 	mutex_lock(&host->host_mutex);
@@ -1332,8 +1320,9 @@
 	mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SD_HIGHSPEED |
 		MMC_CAP_MMC_HIGHSPEED | MMC_CAP_BUS_WIDTH_TEST |
 		MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | MMC_CAP_UHS_SDR50 |
-		MMC_CAP_NEEDS_POLL;
-	mmc->caps2 = MMC_CAP2_NO_PRESCAN_POWERUP | MMC_CAP2_FULL_PWR_CYCLE;
+		MMC_CAP_NEEDS_POLL | MMC_CAP_ERASE;
+	mmc->caps2 = MMC_CAP2_NO_PRESCAN_POWERUP | MMC_CAP2_FULL_PWR_CYCLE |
+		MMC_CAP2_NO_SDIO;
 
 	mmc->max_current_330 = 400;
 	mmc->max_current_180 = 800;
diff --git a/drivers/mmc/host/sdhci-bcm-kona.c b/drivers/mmc/host/sdhci-bcm-kona.c
index 11ca95c..bdbd489 100644
--- a/drivers/mmc/host/sdhci-bcm-kona.c
+++ b/drivers/mmc/host/sdhci-bcm-kona.c
@@ -284,10 +284,8 @@
 	sdhci_bcm_kona_sd_init(host);
 
 	ret = sdhci_add_host(host);
-	if (ret) {
-		dev_err(dev, "Failed sdhci_add_host\n");
+	if (ret)
 		goto err_reset;
-	}
 
 	/* if device is eMMC, emulate card insert right here */
 	if (!mmc_card_is_removable(host->mmc)) {
diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c
index 0f589e2..7a343b8 100644
--- a/drivers/mmc/host/sdhci-cadence.c
+++ b/drivers/mmc/host/sdhci-cadence.c
@@ -253,6 +253,7 @@
 	struct sdhci_cdns_priv *priv = sdhci_cdns_priv(host);
 	void __iomem *reg = priv->hrs_addr + SDHCI_CDNS_HRS06;
 	u32 tmp;
+	int i, ret;
 
 	if (WARN_ON(!FIELD_FIT(SDHCI_CDNS_HRS06_TUNE, val)))
 		return -EINVAL;
@@ -260,11 +261,24 @@
 	tmp = readl(reg);
 	tmp &= ~SDHCI_CDNS_HRS06_TUNE;
 	tmp |= FIELD_PREP(SDHCI_CDNS_HRS06_TUNE, val);
-	tmp |= SDHCI_CDNS_HRS06_TUNE_UP;
-	writel(tmp, reg);
 
-	return readl_poll_timeout(reg, tmp, !(tmp & SDHCI_CDNS_HRS06_TUNE_UP),
-				  0, 1);
+	/*
+	 * Workaround for IP errata:
+	 * The IP6116 SD/eMMC PHY design has a timing issue on receive data
+	 * path. Send tune request twice.
+	 */
+	for (i = 0; i < 2; i++) {
+		tmp |= SDHCI_CDNS_HRS06_TUNE_UP;
+		writel(tmp, reg);
+
+		ret = readl_poll_timeout(reg, tmp,
+					 !(tmp & SDHCI_CDNS_HRS06_TUNE_UP),
+					 0, 1);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static int sdhci_cdns_execute_tuning(struct mmc_host *mmc, u32 opcode)
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index cd2b5f6..d6aef70 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -41,6 +41,12 @@
 #define  ESDHC_VENDOR_SPEC_FRC_SDCLK_ON	(1 << 8)
 #define ESDHC_WTMK_LVL			0x44
 #define  ESDHC_WTMK_DEFAULT_VAL		0x10401040
+#define  ESDHC_WTMK_LVL_RD_WML_MASK	0x000000FF
+#define  ESDHC_WTMK_LVL_RD_WML_SHIFT	0
+#define  ESDHC_WTMK_LVL_WR_WML_MASK	0x00FF0000
+#define  ESDHC_WTMK_LVL_WR_WML_SHIFT	16
+#define  ESDHC_WTMK_LVL_WML_VAL_DEF	64
+#define  ESDHC_WTMK_LVL_WML_VAL_MAX	128
 #define ESDHC_MIX_CTRL			0x48
 #define  ESDHC_MIX_CTRL_DDREN		(1 << 3)
 #define  ESDHC_MIX_CTRL_AC23EN		(1 << 7)
@@ -516,6 +522,7 @@
 		}
 
 		if (esdhc_is_usdhc(imx_data)) {
+			u32 wml;
 			u32 m = readl(host->ioaddr + ESDHC_MIX_CTRL);
 			/* Swap AC23 bit */
 			if (val & SDHCI_TRNS_AUTO_CMD23) {
@@ -524,6 +531,21 @@
 			}
 			m = val | (m & ~ESDHC_MIX_CTRL_SDHCI_MASK);
 			writel(m, host->ioaddr + ESDHC_MIX_CTRL);
+
+			/* Set watermark levels for PIO access to maximum value
+			 * (128 words) to accommodate full 512 bytes buffer.
+			 * For DMA access restore the levels to default value.
+			 */
+			m = readl(host->ioaddr + ESDHC_WTMK_LVL);
+			if (val & SDHCI_TRNS_DMA)
+				wml = ESDHC_WTMK_LVL_WML_VAL_DEF;
+			else
+				wml = ESDHC_WTMK_LVL_WML_VAL_MAX;
+			m &= ~(ESDHC_WTMK_LVL_RD_WML_MASK |
+			       ESDHC_WTMK_LVL_WR_WML_MASK);
+			m |= (wml << ESDHC_WTMK_LVL_RD_WML_SHIFT) |
+			     (wml << ESDHC_WTMK_LVL_WR_WML_SHIFT);
+			writel(m, host->ioaddr + ESDHC_WTMK_LVL);
 		} else {
 			/*
 			 * Postpone this write, we must do it together with a
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index c283291..646bf37 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -21,6 +21,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/iopoll.h>
+#include <linux/regulator/consumer.h>
 
 #include "sdhci-pltfm.h"
 
@@ -77,10 +78,16 @@
 #define CORE_HC_MCLK_SEL_DFLT	(2 << 8)
 #define CORE_HC_MCLK_SEL_HS400	(3 << 8)
 #define CORE_HC_MCLK_SEL_MASK	(3 << 8)
+#define CORE_IO_PAD_PWR_SWITCH_EN	(1 << 15)
+#define CORE_IO_PAD_PWR_SWITCH  (1 << 16)
 #define CORE_HC_SELECT_IN_EN	BIT(18)
 #define CORE_HC_SELECT_IN_HS400	(6 << 19)
 #define CORE_HC_SELECT_IN_MASK	(7 << 19)
 
+#define CORE_3_0V_SUPPORT	(1 << 25)
+#define CORE_1_8V_SUPPORT	(1 << 26)
+#define CORE_VOLT_SUPPORT	(CORE_3_0V_SUPPORT | CORE_1_8V_SUPPORT)
+
 #define CORE_CSR_CDC_CTLR_CFG0		0x130
 #define CORE_SW_TRIG_FULL_CALIB		BIT(16)
 #define CORE_HW_AUTOCAL_ENA		BIT(17)
@@ -148,6 +155,7 @@
 	u32 curr_io_level;
 	wait_queue_head_t pwr_irq_wait;
 	bool pwr_irq_flag;
+	u32 caps_0;
 };
 
 static unsigned int msm_get_clock_rate_for_bus_mode(struct sdhci_host *host,
@@ -1103,8 +1111,8 @@
 	struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
 	u32 irq_status, irq_ack = 0;
 	int retry = 10;
-	int pwr_state = 0, io_level = 0;
-
+	u32 pwr_state = 0, io_level = 0;
+	u32 config;
 
 	irq_status = readl_relaxed(msm_host->core_mem + CORE_PWRCTL_STATUS);
 	irq_status &= INT_MASK;
@@ -1161,6 +1169,38 @@
 	 */
 	writel_relaxed(irq_ack, msm_host->core_mem + CORE_PWRCTL_CTL);
 
+	/*
+	 * If we don't have info regarding the voltage levels supported by
+	 * regulators, don't change the IO PAD PWR SWITCH.
+	 */
+	if (msm_host->caps_0 & CORE_VOLT_SUPPORT) {
+		u32 new_config;
+		/*
+		 * We should unset IO PAD PWR switch only if the register write
+		 * can set IO lines high and the regulator also switches to 3 V.
+		 * Else, we should keep the IO PAD PWR switch set.
+		 * This is applicable to certain targets where eMMC vccq supply
+		 * is only 1.8V. In such targets, even during REQ_IO_HIGH, the
+		 * IO PAD PWR switch must be kept set to reflect actual
+		 * regulator voltage. This way, during initialization of
+		 * controllers with only 1.8V, we will set the IO PAD bit
+		 * without waiting for a REQ_IO_LOW.
+		 */
+		config = readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC);
+		new_config = config;
+
+		if ((io_level & REQ_IO_HIGH) &&
+				(msm_host->caps_0 & CORE_3_0V_SUPPORT))
+			new_config &= ~CORE_IO_PAD_PWR_SWITCH;
+		else if ((io_level & REQ_IO_LOW) ||
+				(msm_host->caps_0 & CORE_1_8V_SUPPORT))
+			new_config |= CORE_IO_PAD_PWR_SWITCH;
+
+		if (config ^ new_config)
+			writel_relaxed(new_config,
+					host->ioaddr + CORE_VENDOR_SPEC);
+	}
+
 	if (pwr_state)
 		msm_host->curr_pwr_state = pwr_state;
 	if (io_level)
@@ -1313,6 +1353,45 @@
 		sdhci_msm_check_power_status(host, req_type);
 }
 
+static void sdhci_msm_set_regulator_caps(struct sdhci_msm_host *msm_host)
+{
+	struct mmc_host *mmc = msm_host->mmc;
+	struct regulator *supply = mmc->supply.vqmmc;
+	u32 caps = 0, config;
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	if (!IS_ERR(mmc->supply.vqmmc)) {
+		if (regulator_is_supported_voltage(supply, 1700000, 1950000))
+			caps |= CORE_1_8V_SUPPORT;
+		if (regulator_is_supported_voltage(supply, 2700000, 3600000))
+			caps |= CORE_3_0V_SUPPORT;
+
+		if (!caps)
+			pr_warn("%s: 1.8/3V not supported for vqmmc\n",
+					mmc_hostname(mmc));
+	}
+
+	if (caps) {
+		/*
+		 * Set the PAD_PWR_SWITCH_EN bit so that the PAD_PWR_SWITCH
+		 * bit can be used as required later on.
+		 */
+		u32 io_level = msm_host->curr_io_level;
+
+		config = readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC);
+		config |= CORE_IO_PAD_PWR_SWITCH_EN;
+
+		if ((io_level & REQ_IO_HIGH) && (caps &	CORE_3_0V_SUPPORT))
+			config &= ~CORE_IO_PAD_PWR_SWITCH;
+		else if ((io_level & REQ_IO_LOW) || (caps & CORE_1_8V_SUPPORT))
+			config |= CORE_IO_PAD_PWR_SWITCH;
+
+		writel_relaxed(config, host->ioaddr + CORE_VENDOR_SPEC);
+	}
+	msm_host->caps_0 |= caps;
+	pr_debug("%s: supported caps: 0x%08x\n", mmc_hostname(mmc), caps);
+}
+
 static const struct of_device_id sdhci_msm_dt_match[] = {
 	{ .compatible = "qcom,sdhci-msm-v4" },
 	{},
@@ -1333,7 +1412,6 @@
 
 static const struct sdhci_pltfm_data sdhci_msm_pdata = {
 	.quirks = SDHCI_QUIRK_BROKEN_CARD_DETECTION |
-		  SDHCI_QUIRK_NO_CARD_NO_RESET |
 		  SDHCI_QUIRK_SINGLE_POWER_WRITE |
 		  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
 	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
@@ -1530,6 +1608,7 @@
 	ret = sdhci_add_host(host);
 	if (ret)
 		goto pm_runtime_disable;
+	sdhci_msm_set_regulator_caps(msm_host);
 
 	pm_runtime_mark_last_busy(&pdev->dev);
 	pm_runtime_put_autosuspend(&pdev->dev);
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index c33a5f7..e3332a5 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -290,7 +290,8 @@
 	.ops = &sdhci_arasan_ops,
 	.quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
 	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
-			SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN,
+			SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN |
+			SDHCI_QUIRK2_STOP_WITH_TC,
 };
 
 static u32 sdhci_arasan_cqhci_irq(struct sdhci_host *host, u32 intmask)
@@ -359,8 +360,7 @@
  */
 static int sdhci_arasan_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
 	int ret;
@@ -403,8 +403,7 @@
  */
 static int sdhci_arasan_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct sdhci_host *host = dev_get_drvdata(dev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
 	int ret;
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 1456abd..f3a7c8e 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -26,6 +26,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/pinctrl/consumer.h>
+#include <linux/sys_soc.h>
 
 #include "sdhci-pltfm.h"
 
@@ -35,6 +36,7 @@
 #define CON_DDR			BIT(19)
 #define CON_CLKEXTFREE		BIT(16)
 #define CON_PADEN		BIT(15)
+#define CON_CTPL		BIT(11)
 #define CON_INIT		BIT(1)
 #define CON_OD			BIT(0)
 
@@ -100,6 +102,7 @@
 };
 
 struct sdhci_omap_host {
+	char			*version;
 	void __iomem		*base;
 	struct device		*dev;
 	struct	regulator	*pbias;
@@ -224,6 +227,23 @@
 	}
 }
 
+static void sdhci_omap_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host);
+	u32 reg;
+
+	reg = sdhci_omap_readl(omap_host, SDHCI_OMAP_CON);
+	if (enable)
+		reg |= (CON_CTPL | CON_CLKEXTFREE);
+	else
+		reg &= ~(CON_CTPL | CON_CLKEXTFREE);
+	sdhci_omap_writel(omap_host, SDHCI_OMAP_CON, reg);
+
+	sdhci_enable_sdio_irq(mmc, enable);
+}
+
 static inline void sdhci_omap_set_dll(struct sdhci_omap_host *omap_host,
 				      int count)
 {
@@ -713,10 +733,15 @@
 		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
 	.quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN |
 		   SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
-		   SDHCI_QUIRK2_RSP_136_HAS_CRC,
+		   SDHCI_QUIRK2_RSP_136_HAS_CRC |
+		   SDHCI_QUIRK2_DISABLE_HW_TIMEOUT,
 	.ops = &sdhci_omap_ops,
 };
 
+static const struct sdhci_omap_data k2g_data = {
+	.offset = 0x200,
+};
+
 static const struct sdhci_omap_data dra7_data = {
 	.offset = 0x200,
 	.flags	= SDHCI_OMAP_REQUIRE_IODELAY,
@@ -724,6 +749,7 @@
 
 static const struct of_device_id omap_sdhci_match[] = {
 	{ .compatible = "ti,dra7-sdhci", .data = &dra7_data },
+	{ .compatible = "ti,k2g-sdhci", .data = &k2g_data },
 	{},
 };
 MODULE_DEVICE_TABLE(of, omap_sdhci_match);
@@ -733,12 +759,21 @@
 				  u32 *caps, u32 capmask)
 {
 	struct device *dev = omap_host->dev;
+	char *version = omap_host->version;
 	struct pinctrl_state *pinctrl_state = ERR_PTR(-ENODEV);
+	char str[20];
 
 	if (!(*caps & capmask))
 		goto ret;
 
-	pinctrl_state = pinctrl_lookup_state(omap_host->pinctrl, mode);
+	if (version) {
+		snprintf(str, 20, "%s-%s", mode, version);
+		pinctrl_state = pinctrl_lookup_state(omap_host->pinctrl, str);
+	}
+
+	if (IS_ERR(pinctrl_state))
+		pinctrl_state = pinctrl_lookup_state(omap_host->pinctrl, mode);
+
 	if (IS_ERR(pinctrl_state)) {
 		dev_err(dev, "no pinctrl state for %s mode", mode);
 		*caps &= ~capmask;
@@ -807,8 +842,15 @@
 
 	state = sdhci_omap_iodelay_pinctrl_state(omap_host, "ddr_1_8v", caps,
 						 MMC_CAP_1_8V_DDR);
-	if (!IS_ERR(state))
+	if (!IS_ERR(state)) {
 		pinctrl_state[MMC_TIMING_MMC_DDR52] = state;
+	} else {
+		state = sdhci_omap_iodelay_pinctrl_state(omap_host, "ddr_3_3v",
+							 caps,
+							 MMC_CAP_3_3V_DDR);
+		if (!IS_ERR(state))
+			pinctrl_state[MMC_TIMING_MMC_DDR52] = state;
+	}
 
 	state = sdhci_omap_iodelay_pinctrl_state(omap_host, "hs", caps,
 						 MMC_CAP_SD_HIGHSPEED);
@@ -830,6 +872,16 @@
 	return 0;
 }
 
+static const struct soc_device_attribute sdhci_omap_soc_devices[] = {
+	{
+		.machine = "DRA7[45]*",
+		.revision = "ES1.[01]",
+	},
+	{
+		/* sentinel */
+	}
+};
+
 static int sdhci_omap_probe(struct platform_device *pdev)
 {
 	int ret;
@@ -841,6 +893,7 @@
 	struct mmc_host *mmc;
 	const struct of_device_id *match;
 	struct sdhci_omap_data *data;
+	const struct soc_device_attribute *soc;
 
 	match = of_match_device(omap_sdhci_match, dev);
 	if (!match)
@@ -871,10 +924,22 @@
 	host->ioaddr += offset;
 
 	mmc = host->mmc;
+	sdhci_get_of_property(pdev);
 	ret = mmc_of_parse(mmc);
 	if (ret)
 		goto err_pltfm_free;
 
+	soc = soc_device_match(sdhci_omap_soc_devices);
+	if (soc) {
+		omap_host->version = "rev11";
+		if (!strcmp(dev_name(dev), "4809c000.mmc"))
+			mmc->f_max = 96000000;
+		if (!strcmp(dev_name(dev), "480b4000.mmc"))
+			mmc->f_max = 48000000;
+		if (!strcmp(dev_name(dev), "480ad000.mmc"))
+			mmc->f_max = 48000000;
+	}
+
 	pltfm_host->clk = devm_clk_get(dev, "fck");
 	if (IS_ERR(pltfm_host->clk)) {
 		ret = PTR_ERR(pltfm_host->clk);
@@ -916,26 +981,31 @@
 		goto err_put_sync;
 	}
 
-	ret = sdhci_omap_config_iodelay_pinctrl_state(omap_host);
-	if (ret)
-		goto err_put_sync;
-
 	host->mmc_host_ops.get_ro = mmc_gpio_get_ro;
 	host->mmc_host_ops.start_signal_voltage_switch =
 					sdhci_omap_start_signal_voltage_switch;
 	host->mmc_host_ops.set_ios = sdhci_omap_set_ios;
 	host->mmc_host_ops.card_busy = sdhci_omap_card_busy;
 	host->mmc_host_ops.execute_tuning = sdhci_omap_execute_tuning;
+	host->mmc_host_ops.enable_sdio_irq = sdhci_omap_enable_sdio_irq;
 
-	sdhci_read_caps(host);
-	host->caps |= SDHCI_CAN_DO_ADMA2;
-
-	ret = sdhci_add_host(host);
+	ret = sdhci_setup_host(host);
 	if (ret)
 		goto err_put_sync;
 
+	ret = sdhci_omap_config_iodelay_pinctrl_state(omap_host);
+	if (ret)
+		goto err_cleanup_host;
+
+	ret = __sdhci_add_host(host);
+	if (ret)
+		goto err_cleanup_host;
+
 	return 0;
 
+err_cleanup_host:
+	sdhci_cleanup_host(host);
+
 err_put_sync:
 	pm_runtime_put_sync(dev);
 
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 78c25ad..77dd352 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -453,6 +453,7 @@
 enum {
 	INTEL_DSM_FNS		=  0,
 	INTEL_DSM_V18_SWITCH	=  3,
+	INTEL_DSM_V33_SWITCH	=  4,
 	INTEL_DSM_DRV_STRENGTH	=  9,
 	INTEL_DSM_D3_RETUNE	= 10,
 };
@@ -620,17 +621,37 @@
 	sdhci_writel(host, val, INTEL_HS400_ES_REG);
 }
 
-static void sdhci_intel_voltage_switch(struct sdhci_host *host)
+static int intel_start_signal_voltage_switch(struct mmc_host *mmc,
+					     struct mmc_ios *ios)
 {
+	struct device *dev = mmc_dev(mmc);
+	struct sdhci_host *host = mmc_priv(mmc);
 	struct sdhci_pci_slot *slot = sdhci_priv(host);
 	struct intel_host *intel_host = sdhci_pci_priv(slot);
-	struct device *dev = &slot->chip->pdev->dev;
+	unsigned int fn;
 	u32 result = 0;
 	int err;
 
-	err = intel_dsm(intel_host, dev, INTEL_DSM_V18_SWITCH, &result);
-	pr_debug("%s: %s DSM error %d result %u\n",
-		 mmc_hostname(host->mmc), __func__, err, result);
+	err = sdhci_start_signal_voltage_switch(mmc, ios);
+	if (err)
+		return err;
+
+	switch (ios->signal_voltage) {
+	case MMC_SIGNAL_VOLTAGE_330:
+		fn = INTEL_DSM_V33_SWITCH;
+		break;
+	case MMC_SIGNAL_VOLTAGE_180:
+		fn = INTEL_DSM_V18_SWITCH;
+		break;
+	default:
+		return 0;
+	}
+
+	err = intel_dsm(intel_host, dev, fn, &result);
+	pr_debug("%s: %s DSM fn %u error %d result %u\n",
+		 mmc_hostname(mmc), __func__, fn, err, result);
+
+	return 0;
 }
 
 static const struct sdhci_ops sdhci_intel_byt_ops = {
@@ -641,7 +662,6 @@
 	.reset			= sdhci_reset,
 	.set_uhs_signaling	= sdhci_set_uhs_signaling,
 	.hw_reset		= sdhci_pci_hw_reset,
-	.voltage_switch		= sdhci_intel_voltage_switch,
 };
 
 static const struct sdhci_ops sdhci_intel_glk_ops = {
@@ -652,7 +672,6 @@
 	.reset			= sdhci_reset,
 	.set_uhs_signaling	= sdhci_set_uhs_signaling,
 	.hw_reset		= sdhci_pci_hw_reset,
-	.voltage_switch		= sdhci_intel_voltage_switch,
 	.irq			= sdhci_cqhci_irq,
 };
 
@@ -691,6 +710,7 @@
 	byt_read_dsm(slot);
 
 	ops->execute_tuning = intel_execute_tuning;
+	ops->start_signal_voltage_switch = intel_start_signal_voltage_switch;
 }
 
 static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
@@ -832,6 +852,10 @@
 	    slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_GLK_SD)
 		slot->host->mmc_host_ops.get_cd = bxt_get_cd;
 
+	if (slot->chip->pdev->subsystem_vendor == PCI_VENDOR_ID_NI &&
+	    slot->chip->pdev->subsystem_device == PCI_SUBDEVICE_ID_NI_78E3)
+		slot->host->mmc->caps2 |= MMC_CAP2_AVOID_3_3V;
+
 	return 0;
 }
 
diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
index 5cbcdc4..db9cb54 100644
--- a/drivers/mmc/host/sdhci-pci.h
+++ b/drivers/mmc/host/sdhci-pci.h
@@ -54,6 +54,7 @@
 #define PCI_DEVICE_ID_REALTEK_5250	0x5250
 
 #define PCI_SUBDEVICE_ID_NI_7884	0x7884
+#define PCI_SUBDEVICE_ID_NI_78E3	0x78e3
 
 #define PCI_VENDOR_ID_ARASAN		0x16e6
 #define PCI_DEVICE_ID_ARASAN_PHY_EMMC	0x0670
diff --git a/drivers/mmc/host/sdhci-pic32.c b/drivers/mmc/host/sdhci-pic32.c
index a6caa49..a11e639 100644
--- a/drivers/mmc/host/sdhci-pic32.c
+++ b/drivers/mmc/host/sdhci-pic32.c
@@ -200,10 +200,8 @@
 	}
 
 	ret = sdhci_add_host(host);
-	if (ret) {
-		dev_err(&pdev->dev, "error adding host\n");
+	if (ret)
 		goto err_base_clk;
-	}
 
 	dev_info(&pdev->dev, "Successfully added sdhci host\n");
 	return 0;
diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
index 8986f9d..2c3827f 100644
--- a/drivers/mmc/host/sdhci-pxav2.c
+++ b/drivers/mmc/host/sdhci-pxav2.c
@@ -221,10 +221,8 @@
 	host->ops = &pxav2_sdhci_ops;
 
 	ret = sdhci_add_host(host);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to add host\n");
+	if (ret)
 		goto disable_clk;
-	}
 
 	return 0;
 
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index a344341..b8e96f3 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -472,10 +472,8 @@
 	pm_suspend_ignore_children(&pdev->dev, 1);
 
 	ret = sdhci_add_host(host);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to add host\n");
+	if (ret)
 		goto err_add_host;
-	}
 
 	if (host->mmc->pm_caps & MMC_PM_WAKE_SDIO_IRQ)
 		device_init_wakeup(&pdev->dev, 1);
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index cda83cc..9ef89d0 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -655,10 +655,8 @@
 		goto err_req_regs;
 
 	ret = sdhci_add_host(host);
-	if (ret) {
-		dev_err(dev, "sdhci_add_host() failed\n");
+	if (ret)
 		goto err_req_regs;
-	}
 
 #ifdef CONFIG_PM
 	if (pdata->cd_type != S3C_SDHCI_CD_INTERNAL)
diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
index 1451152..9247d51 100644
--- a/drivers/mmc/host/sdhci-spear.c
+++ b/drivers/mmc/host/sdhci-spear.c
@@ -126,10 +126,8 @@
 	}
 
 	ret = sdhci_add_host(host);
-	if (ret) {
-		dev_dbg(&pdev->dev, "error adding host\n");
+	if (ret)
 		goto disable_clk;
-	}
 
 	platform_set_drvdata(pdev, host);
 
diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
index c32daed..8f95647 100644
--- a/drivers/mmc/host/sdhci-st.c
+++ b/drivers/mmc/host/sdhci-st.c
@@ -422,10 +422,8 @@
 	st_mmcss_cconfig(np, host);
 
 	ret = sdhci_add_host(host);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed sdhci_add_host\n");
+	if (ret)
 		goto err_out;
-	}
 
 	host_version = readw_relaxed((host->ioaddr + SDHCI_HOST_VERSION));
 
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index b877c13..970d38f6 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -231,7 +231,7 @@
 	if (timing == MMC_TIMING_UHS_DDR50)
 		tegra_host->ddr_signaling = true;
 
-	return sdhci_set_uhs_signaling(host, timing);
+	sdhci_set_uhs_signaling(host, timing);
 }
 
 static unsigned int tegra_sdhci_get_max_clock(struct sdhci_host *host)
diff --git a/drivers/mmc/host/sdhci-xenon-phy.c b/drivers/mmc/host/sdhci-xenon-phy.c
index ec87943..a35804b 100644
--- a/drivers/mmc/host/sdhci-xenon-phy.c
+++ b/drivers/mmc/host/sdhci-xenon-phy.c
@@ -814,15 +814,10 @@
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct xenon_priv *priv = sdhci_pltfm_priv(pltfm_host);
-	int i, ret;
+	int ret;
 
-	for (i = 0; i < NR_PHY_TYPES; i++) {
-		if (!strcmp(phy_name, phy_types[i])) {
-			priv->phy_type = i;
-			break;
-		}
-	}
-	if (i == NR_PHY_TYPES) {
+	priv->phy_type = match_string(phy_types, NR_PHY_TYPES, phy_name);
+	if (priv->phy_type < 0) {
 		dev_err(mmc_dev(host->mmc),
 			"Unable to determine PHY name %s. Use default eMMC 5.1 PHY\n",
 			phy_name);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 2ededa7..1c828e0 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -709,29 +709,16 @@
 		return sg_dma_address(host->data->sg);
 }
 
-static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd)
+static unsigned int sdhci_target_timeout(struct sdhci_host *host,
+					 struct mmc_command *cmd,
+					 struct mmc_data *data)
 {
-	u8 count;
-	struct mmc_data *data = cmd->data;
-	unsigned target_timeout, current_timeout;
-
-	/*
-	 * If the host controller provides us with an incorrect timeout
-	 * value, just skip the check and use 0xE.  The hardware may take
-	 * longer to time out, but that's much better than having a too-short
-	 * timeout value.
-	 */
-	if (host->quirks & SDHCI_QUIRK_BROKEN_TIMEOUT_VAL)
-		return 0xE;
-
-	/* Unspecified timeout, assume max */
-	if (!data && !cmd->busy_timeout)
-		return 0xE;
+	unsigned int target_timeout;
 
 	/* timeout in us */
-	if (!data)
+	if (!data) {
 		target_timeout = cmd->busy_timeout * 1000;
-	else {
+	} else {
 		target_timeout = DIV_ROUND_UP(data->timeout_ns, 1000);
 		if (host->clock && data->timeout_clks) {
 			unsigned long long val;
@@ -748,6 +735,67 @@
 		}
 	}
 
+	return target_timeout;
+}
+
+static void sdhci_calc_sw_timeout(struct sdhci_host *host,
+				  struct mmc_command *cmd)
+{
+	struct mmc_data *data = cmd->data;
+	struct mmc_host *mmc = host->mmc;
+	struct mmc_ios *ios = &mmc->ios;
+	unsigned char bus_width = 1 << ios->bus_width;
+	unsigned int blksz;
+	unsigned int freq;
+	u64 target_timeout;
+	u64 transfer_time;
+
+	target_timeout = sdhci_target_timeout(host, cmd, data);
+	target_timeout *= NSEC_PER_USEC;
+
+	if (data) {
+		blksz = data->blksz;
+		freq = host->mmc->actual_clock ? : host->clock;
+		transfer_time = (u64)blksz * NSEC_PER_SEC * (8 / bus_width);
+		do_div(transfer_time, freq);
+		/* multiply by '2' to account for any unknowns */
+		transfer_time = transfer_time * 2;
+		/* calculate timeout for the entire data */
+		host->data_timeout = data->blocks * target_timeout +
+				     transfer_time;
+	} else {
+		host->data_timeout = target_timeout;
+	}
+
+	if (host->data_timeout)
+		host->data_timeout += MMC_CMD_TRANSFER_TIME;
+}
+
+static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd,
+			     bool *too_big)
+{
+	u8 count;
+	struct mmc_data *data = cmd->data;
+	unsigned target_timeout, current_timeout;
+
+	*too_big = true;
+
+	/*
+	 * If the host controller provides us with an incorrect timeout
+	 * value, just skip the check and use 0xE.  The hardware may take
+	 * longer to time out, but that's much better than having a too-short
+	 * timeout value.
+	 */
+	if (host->quirks & SDHCI_QUIRK_BROKEN_TIMEOUT_VAL)
+		return 0xE;
+
+	/* Unspecified timeout, assume max */
+	if (!data && !cmd->busy_timeout)
+		return 0xE;
+
+	/* timeout in us */
+	target_timeout = sdhci_target_timeout(host, cmd, data);
+
 	/*
 	 * Figure out needed cycles.
 	 * We do this in steps in order to fit inside a 32 bit int.
@@ -768,9 +816,12 @@
 	}
 
 	if (count >= 0xF) {
-		DBG("Too large timeout 0x%x requested for CMD%d!\n",
-		    count, cmd->opcode);
+		if (!(host->quirks2 & SDHCI_QUIRK2_DISABLE_HW_TIMEOUT))
+			DBG("Too large timeout 0x%x requested for CMD%d!\n",
+			    count, cmd->opcode);
 		count = 0xE;
+	} else {
+		*too_big = false;
 	}
 
 	return count;
@@ -790,6 +841,16 @@
 	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 }
 
+static void sdhci_set_data_timeout_irq(struct sdhci_host *host, bool enable)
+{
+	if (enable)
+		host->ier |= SDHCI_INT_DATA_TIMEOUT;
+	else
+		host->ier &= ~SDHCI_INT_DATA_TIMEOUT;
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+}
+
 static void sdhci_set_timeout(struct sdhci_host *host, struct mmc_command *cmd)
 {
 	u8 count;
@@ -797,7 +858,18 @@
 	if (host->ops->set_timeout) {
 		host->ops->set_timeout(host, cmd);
 	} else {
-		count = sdhci_calc_timeout(host, cmd);
+		bool too_big = false;
+
+		count = sdhci_calc_timeout(host, cmd, &too_big);
+
+		if (too_big &&
+		    host->quirks2 & SDHCI_QUIRK2_DISABLE_HW_TIMEOUT) {
+			sdhci_calc_sw_timeout(host, cmd);
+			sdhci_set_data_timeout_irq(host, false);
+		} else if (!(host->ier & SDHCI_INT_DATA_TIMEOUT)) {
+			sdhci_set_data_timeout_irq(host, true);
+		}
+
 		sdhci_writeb(host, count, SDHCI_TIMEOUT_CONTROL);
 	}
 }
@@ -807,6 +879,8 @@
 	u8 ctrl;
 	struct mmc_data *data = cmd->data;
 
+	host->data_timeout = 0;
+
 	if (sdhci_data_line_cmd(cmd))
 		sdhci_set_timeout(host, cmd);
 
@@ -1160,13 +1234,6 @@
 		mdelay(1);
 	}
 
-	timeout = jiffies;
-	if (!cmd->data && cmd->busy_timeout > 9000)
-		timeout += DIV_ROUND_UP(cmd->busy_timeout, 1000) * HZ + HZ;
-	else
-		timeout += 10 * HZ;
-	sdhci_mod_timer(host, cmd->mrq, timeout);
-
 	host->cmd = cmd;
 	if (sdhci_data_line_cmd(cmd)) {
 		WARN_ON(host->data_cmd);
@@ -1206,6 +1273,15 @@
 	    cmd->opcode == MMC_SEND_TUNING_BLOCK_HS200)
 		flags |= SDHCI_CMD_DATA;
 
+	timeout = jiffies;
+	if (host->data_timeout)
+		timeout += nsecs_to_jiffies(host->data_timeout);
+	else if (!cmd->data && cmd->busy_timeout > 9000)
+		timeout += DIV_ROUND_UP(cmd->busy_timeout, 1000) * HZ + HZ;
+	else
+		timeout += 10 * HZ;
+	sdhci_mod_timer(host, cmd->mrq, timeout);
+
 	sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND);
 }
 EXPORT_SYMBOL_GPL(sdhci_send_command);
@@ -3616,6 +3692,10 @@
 		mmc->max_busy_timeout /= host->timeout_clk;
 	}
 
+	if (host->quirks2 & SDHCI_QUIRK2_DISABLE_HW_TIMEOUT &&
+	    !host->ops->get_max_timeout_count)
+		mmc->max_busy_timeout = 0;
+
 	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
 	mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
 
@@ -3672,6 +3752,16 @@
 	if (host->quirks2 & SDHCI_QUIRK2_NO_1_8_V) {
 		host->caps1 &= ~(SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_SDR50 |
 				 SDHCI_SUPPORT_DDR50);
+		/*
+		 * The SDHCI controller in a SoC might support HS200/HS400
+		 * (indicated using mmc-hs200-1_8v/mmc-hs400-1_8v dt property),
+		 * but if the board is modeled such that the IO lines are not
+		 * connected to 1.8v then HS200/HS400 cannot be supported.
+		 * Disable HS200/HS400 if the board does not have 1.8v connected
+		 * to the IO lines. (Applicable for other modes in 1.8v)
+		 */
+		mmc->caps2 &= ~(MMC_CAP2_HSX00_1_8V | MMC_CAP2_HS400_ES);
+		mmc->caps &= ~(MMC_CAP_1_8V_DDR | MMC_CAP_UHS);
 	}
 
 	/* Any UHS-I mode in caps implies SDR12 and SDR25 support. */
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index c95b0a4..23966f8 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -332,6 +332,14 @@
 /* Allow for a a command request and a data request at the same time */
 #define SDHCI_MAX_MRQS		2
 
+/*
+ * 48bit command and 136 bit response in 100KHz clock could take upto 2.48ms.
+ * However since the start time of the command, the time between
+ * command and response, and the time between response and start of data is
+ * not known, set the command transfer time to 10ms.
+ */
+#define MMC_CMD_TRANSFER_TIME	(10 * NSEC_PER_MSEC) /* max 10 ms */
+
 enum sdhci_cookie {
 	COOKIE_UNMAPPED,
 	COOKIE_PRE_MAPPED,	/* mapped by sdhci_pre_req() */
@@ -437,6 +445,11 @@
 #define SDHCI_QUIRK2_CLOCK_DIV_ZERO_BROKEN		(1<<15)
 /* Controller has CRC in 136 bit Command Response */
 #define SDHCI_QUIRK2_RSP_136_HAS_CRC			(1<<16)
+/*
+ * Disable HW timeout if the requested timeout is more than the maximum
+ * obtainable timeout.
+ */
+#define SDHCI_QUIRK2_DISABLE_HW_TIMEOUT			(1<<17)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
@@ -550,6 +563,8 @@
 	/* Host SDMA buffer boundary. */
 	u32			sdma_boundary;
 
+	u64			data_timeout;
+
 	unsigned long private[0] ____cacheline_aligned;
 };
 
diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
index 20cfb20..e747259 100644
--- a/drivers/mmc/host/sunxi-mmc.c
+++ b/drivers/mmc/host/sunxi-mmc.c
@@ -13,36 +13,34 @@
  * the License, or (at your option) any later version.
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/io.h>
-#include <linux/device.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-
 #include <linux/clk.h>
 #include <linux/clk/sunxi-ng.h>
-#include <linux/gpio.h>
-#include <linux/platform_device.h>
-#include <linux/spinlock.h>
-#include <linux/scatterlist.h>
+#include <linux/delay.h>
+#include <linux/device.h>
 #include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <linux/reset.h>
-#include <linux/regulator/consumer.h>
-
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/core.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/sd.h>
+#include <linux/mmc/sdio.h>
+#include <linux/mmc/slot-gpio.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_gpio.h>
 #include <linux/of_platform.h>
-
-#include <linux/mmc/host.h>
-#include <linux/mmc/sd.h>
-#include <linux/mmc/sdio.h>
-#include <linux/mmc/mmc.h>
-#include <linux/mmc/core.h>
-#include <linux/mmc/card.h>
-#include <linux/mmc/slot-gpio.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regulator/consumer.h>
+#include <linux/reset.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
 
 /* register offset definitions */
 #define SDXC_REG_GCTRL	(0x00) /* SMC Global Control Register */
@@ -322,10 +320,9 @@
 	return 0;
 }
 
-static int sunxi_mmc_init_host(struct mmc_host *mmc)
+static int sunxi_mmc_init_host(struct sunxi_mmc_host *host)
 {
 	u32 rval;
-	struct sunxi_mmc_host *host = mmc_priv(mmc);
 
 	if (sunxi_mmc_reset_host(host))
 		return -EIO;
@@ -859,17 +856,48 @@
 	return 0;
 }
 
-static void sunxi_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+static void sunxi_mmc_set_bus_width(struct sunxi_mmc_host *host,
+				   unsigned char width)
 {
-	struct sunxi_mmc_host *host = mmc_priv(mmc);
+	switch (width) {
+	case MMC_BUS_WIDTH_1:
+		mmc_writel(host, REG_WIDTH, SDXC_WIDTH1);
+		break;
+	case MMC_BUS_WIDTH_4:
+		mmc_writel(host, REG_WIDTH, SDXC_WIDTH4);
+		break;
+	case MMC_BUS_WIDTH_8:
+		mmc_writel(host, REG_WIDTH, SDXC_WIDTH8);
+		break;
+	}
+}
+
+static void sunxi_mmc_set_clk(struct sunxi_mmc_host *host, struct mmc_ios *ios)
+{
 	u32 rval;
 
-	/* Set the power state */
-	switch (ios->power_mode) {
-	case MMC_POWER_ON:
-		break;
+	/* set ddr mode */
+	rval = mmc_readl(host, REG_GCTRL);
+	if (ios->timing == MMC_TIMING_UHS_DDR50 ||
+	    ios->timing == MMC_TIMING_MMC_DDR52)
+		rval |= SDXC_DDR_MODE;
+	else
+		rval &= ~SDXC_DDR_MODE;
+	mmc_writel(host, REG_GCTRL, rval);
 
+	host->ferror = sunxi_mmc_clk_set_rate(host, ios);
+	/* Android code had a usleep_range(50000, 55000); here */
+}
+
+static void sunxi_mmc_card_power(struct sunxi_mmc_host *host,
+				 struct mmc_ios *ios)
+{
+	struct mmc_host *mmc = host->mmc;
+
+	switch (ios->power_mode) {
 	case MMC_POWER_UP:
+		dev_dbg(mmc_dev(mmc), "Powering card up\n");
+
 		if (!IS_ERR(mmc->supply.vmmc)) {
 			host->ferror = mmc_regulator_set_ocr(mmc,
 							     mmc->supply.vmmc,
@@ -887,53 +915,33 @@
 			}
 			host->vqmmc_enabled = true;
 		}
-
-		host->ferror = sunxi_mmc_init_host(mmc);
-		if (host->ferror)
-			return;
-
-		dev_dbg(mmc_dev(mmc), "power on!\n");
 		break;
 
 	case MMC_POWER_OFF:
-		dev_dbg(mmc_dev(mmc), "power off!\n");
-		sunxi_mmc_reset_host(host);
+		dev_dbg(mmc_dev(mmc), "Powering card off\n");
+
 		if (!IS_ERR(mmc->supply.vmmc))
 			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
 
 		if (!IS_ERR(mmc->supply.vqmmc) && host->vqmmc_enabled)
 			regulator_disable(mmc->supply.vqmmc);
+
 		host->vqmmc_enabled = false;
 		break;
-	}
 
-	/* set bus width */
-	switch (ios->bus_width) {
-	case MMC_BUS_WIDTH_1:
-		mmc_writel(host, REG_WIDTH, SDXC_WIDTH1);
-		break;
-	case MMC_BUS_WIDTH_4:
-		mmc_writel(host, REG_WIDTH, SDXC_WIDTH4);
-		break;
-	case MMC_BUS_WIDTH_8:
-		mmc_writel(host, REG_WIDTH, SDXC_WIDTH8);
+	default:
+		dev_dbg(mmc_dev(mmc), "Ignoring unknown card power state\n");
 		break;
 	}
+}
 
-	/* set ddr mode */
-	rval = mmc_readl(host, REG_GCTRL);
-	if (ios->timing == MMC_TIMING_UHS_DDR50 ||
-	    ios->timing == MMC_TIMING_MMC_DDR52)
-		rval |= SDXC_DDR_MODE;
-	else
-		rval &= ~SDXC_DDR_MODE;
-	mmc_writel(host, REG_GCTRL, rval);
+static void sunxi_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct sunxi_mmc_host *host = mmc_priv(mmc);
 
-	/* set up clock */
-	if (ios->power_mode) {
-		host->ferror = sunxi_mmc_clk_set_rate(host, ios);
-		/* Android code had a usleep_range(50000, 55000); here */
-	}
+	sunxi_mmc_card_power(host, ios);
+	sunxi_mmc_set_bus_width(host, ios->bus_width);
+	sunxi_mmc_set_clk(host, ios);
 }
 
 static int sunxi_mmc_volt_switch(struct mmc_host *mmc, struct mmc_ios *ios)
@@ -955,6 +963,9 @@
 	unsigned long flags;
 	u32 imask;
 
+	if (enable)
+		pm_runtime_get_noresume(host->dev);
+
 	spin_lock_irqsave(&host->lock, flags);
 
 	imask = mmc_readl(host, REG_IMASK);
@@ -967,6 +978,9 @@
 	}
 	mmc_writel(host, REG_IMASK, imask);
 	spin_unlock_irqrestore(&host->lock, flags);
+
+	if (!enable)
+		pm_runtime_put_noidle(host->mmc->parent);
 }
 
 static void sunxi_mmc_hw_reset(struct mmc_host *mmc)
@@ -1380,6 +1394,15 @@
 	if (ret)
 		goto error_free_dma;
 
+	ret = sunxi_mmc_init_host(host);
+	if (ret)
+		goto error_free_dma;
+
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
 	ret = mmc_add_host(mmc);
 	if (ret)
 		goto error_free_dma;
@@ -1400,6 +1423,7 @@
 	struct sunxi_mmc_host *host = mmc_priv(mmc);
 
 	mmc_remove_host(mmc);
+	pm_runtime_force_suspend(&pdev->dev);
 	disable_irq(host->irq);
 	sunxi_mmc_disable(host);
 	dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
@@ -1408,10 +1432,47 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int sunxi_mmc_runtime_resume(struct device *dev)
+{
+	struct mmc_host	*mmc = dev_get_drvdata(dev);
+	struct sunxi_mmc_host *host = mmc_priv(mmc);
+	int ret;
+
+	ret = sunxi_mmc_enable(host);
+	if (ret)
+		return ret;
+
+	sunxi_mmc_init_host(host);
+	sunxi_mmc_set_bus_width(host, mmc->ios.bus_width);
+	sunxi_mmc_set_clk(host, &mmc->ios);
+
+	return 0;
+}
+
+static int sunxi_mmc_runtime_suspend(struct device *dev)
+{
+	struct mmc_host	*mmc = dev_get_drvdata(dev);
+	struct sunxi_mmc_host *host = mmc_priv(mmc);
+
+	sunxi_mmc_reset_host(host);
+	sunxi_mmc_disable(host);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops sunxi_mmc_pm_ops = {
+	SET_RUNTIME_PM_OPS(sunxi_mmc_runtime_suspend,
+			   sunxi_mmc_runtime_resume,
+			   NULL)
+};
+
 static struct platform_driver sunxi_mmc_driver = {
 	.driver = {
 		.name	= "sunxi-mmc",
 		.of_match_table = of_match_ptr(sunxi_mmc_of_match),
+		.pm = &sunxi_mmc_pm_ops,
 	},
 	.probe		= sunxi_mmc_probe,
 	.remove		= sunxi_mmc_remove,
diff --git a/drivers/mmc/host/ushc.c b/drivers/mmc/host/ushc.c
index 81dac17..b2b379b 100644
--- a/drivers/mmc/host/ushc.c
+++ b/drivers/mmc/host/ushc.c
@@ -300,8 +300,10 @@
 			pipe = usb_sndbulkpipe(ushc->usb_dev, 2);
 
 		usb_fill_bulk_urb(ushc->data_urb, ushc->usb_dev, pipe,
-				  sg_virt(data->sg), data->sg->length,
+				  NULL, data->sg->length,
 				  data_callback, ushc);
+		ushc->data_urb->num_sgs = 1;
+		ushc->data_urb->sg = data->sg;
 		ret = usb_submit_urb(ushc->data_urb, GFP_ATOMIC);
 		if (ret < 0)
 			goto out;
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index f423357..1e54bbf 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -268,43 +268,29 @@
 	return host->num_sg;
 }
 
-static inline char *wbsd_sg_to_buffer(struct wbsd_host *host)
+static inline char *wbsd_map_sg(struct wbsd_host *host)
 {
-	return sg_virt(host->cur_sg);
+	return kmap_atomic(sg_page(host->cur_sg)) + host->cur_sg->offset;
 }
 
 static inline void wbsd_sg_to_dma(struct wbsd_host *host, struct mmc_data *data)
 {
-	unsigned int len, i;
-	struct scatterlist *sg;
-	char *dmabuf = host->dma_buffer;
-	char *sgbuf;
+	size_t len = 0;
+	int i;
 
-	sg = data->sg;
-	len = data->sg_len;
-
-	for (i = 0; i < len; i++) {
-		sgbuf = sg_virt(&sg[i]);
-		memcpy(dmabuf, sgbuf, sg[i].length);
-		dmabuf += sg[i].length;
-	}
+	for (i = 0; i < data->sg_len; i++)
+		len += data->sg[i].length;
+	sg_copy_to_buffer(data->sg, data->sg_len, host->dma_buffer, len);
 }
 
 static inline void wbsd_dma_to_sg(struct wbsd_host *host, struct mmc_data *data)
 {
-	unsigned int len, i;
-	struct scatterlist *sg;
-	char *dmabuf = host->dma_buffer;
-	char *sgbuf;
+	size_t len = 0;
+	int i;
 
-	sg = data->sg;
-	len = data->sg_len;
-
-	for (i = 0; i < len; i++) {
-		sgbuf = sg_virt(&sg[i]);
-		memcpy(sgbuf, dmabuf, sg[i].length);
-		dmabuf += sg[i].length;
-	}
+	for (i = 0; i < data->sg_len; i++)
+		len += data->sg[i].length;
+	sg_copy_from_buffer(data->sg, data->sg_len, host->dma_buffer, len);
 }
 
 /*
@@ -418,7 +404,7 @@
 {
 	struct mmc_data *data = host->mrq->cmd->data;
 	char *buffer;
-	int i, fsr, fifo;
+	int i, idx, fsr, fifo;
 
 	/*
 	 * Handle excessive data.
@@ -426,7 +412,8 @@
 	if (host->num_sg == 0)
 		return;
 
-	buffer = wbsd_sg_to_buffer(host) + host->offset;
+	buffer = wbsd_map_sg(host) + host->offset;
+	idx = 0;
 
 	/*
 	 * Drain the fifo. This has a tendency to loop longer
@@ -445,8 +432,7 @@
 			fifo = 1;
 
 		for (i = 0; i < fifo; i++) {
-			*buffer = inb(host->base + WBSD_DFR);
-			buffer++;
+			buffer[idx++] = inb(host->base + WBSD_DFR);
 			host->offset++;
 			host->remain--;
 
@@ -456,16 +442,19 @@
 			 * End of scatter list entry?
 			 */
 			if (host->remain == 0) {
+				kunmap_atomic(buffer);
 				/*
 				 * Get next entry. Check if last.
 				 */
 				if (!wbsd_next_sg(host))
 					return;
 
-				buffer = wbsd_sg_to_buffer(host);
+				buffer = wbsd_map_sg(host);
+				idx = 0;
 			}
 		}
 	}
+	kunmap_atomic(buffer);
 
 	/*
 	 * This is a very dirty hack to solve a
@@ -480,7 +469,7 @@
 {
 	struct mmc_data *data = host->mrq->cmd->data;
 	char *buffer;
-	int i, fsr, fifo;
+	int i, idx, fsr, fifo;
 
 	/*
 	 * Check that we aren't being called after the
@@ -489,7 +478,8 @@
 	if (host->num_sg == 0)
 		return;
 
-	buffer = wbsd_sg_to_buffer(host) + host->offset;
+	buffer = wbsd_map_sg(host) + host->offset;
+	idx = 0;
 
 	/*
 	 * Fill the fifo. This has a tendency to loop longer
@@ -508,8 +498,7 @@
 			fifo = 15;
 
 		for (i = 16; i > fifo; i--) {
-			outb(*buffer, host->base + WBSD_DFR);
-			buffer++;
+			outb(buffer[idx], host->base + WBSD_DFR);
 			host->offset++;
 			host->remain--;
 
@@ -519,16 +508,19 @@
 			 * End of scatter list entry?
 			 */
 			if (host->remain == 0) {
+				kunmap_atomic(buffer);
 				/*
 				 * Get next entry. Check if last.
 				 */
 				if (!wbsd_next_sg(host))
 					return;
 
-				buffer = wbsd_sg_to_buffer(host);
+				buffer = wbsd_map_sg(host);
+				idx = 0;
 			}
 		}
 	}
+	kunmap_atomic(buffer);
 
 	/*
 	 * The controller stops sending interrupts for
diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
index fd30ac7..3ba42f5 100644
--- a/drivers/mmc/host/wmt-sdmmc.c
+++ b/drivers/mmc/host/wmt-sdmmc.c
@@ -928,8 +928,7 @@
 static int wmt_mci_suspend(struct device *dev)
 {
 	u32 reg_tmp;
-	struct platform_device *pdev = to_platform_device(dev);
-	struct mmc_host *mmc = platform_get_drvdata(pdev);
+	struct mmc_host *mmc = dev_get_drvdata(dev);
 	struct wmt_mci_priv *priv;
 
 	if (!mmc)
@@ -953,8 +952,7 @@
 static int wmt_mci_resume(struct device *dev)
 {
 	u32 reg_tmp;
-	struct platform_device *pdev = to_platform_device(dev);
-	struct mmc_host *mmc = platform_get_drvdata(pdev);
+	struct mmc_host *mmc = dev_get_drvdata(dev);
 	struct wmt_mci_priv *priv;
 
 	if (mmc) {
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 6def544..57b02c4b 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -81,6 +81,7 @@
 config MTD_M25P80
 	tristate "Support most SPI Flash chips (AT26DF, M25P, W25X, ...)"
 	depends on SPI_MASTER && MTD_SPI_NOR
+	select SPI_MEM
 	help
 	  This enables access to most modern SPI flash chips, used for
 	  program and data storage.   Series supported include Atmel AT26DF,
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index a4e18f6..e84563d 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -24,12 +24,13 @@
 #include <linux/mtd/partitions.h>
 
 #include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
 #include <linux/spi/flash.h>
 #include <linux/mtd/spi-nor.h>
 
 #define	MAX_CMD_SIZE		6
 struct m25p {
-	struct spi_device	*spi;
+	struct spi_mem		*spimem;
 	struct spi_nor		spi_nor;
 	u8			command[MAX_CMD_SIZE];
 };
@@ -37,97 +38,68 @@
 static int m25p80_read_reg(struct spi_nor *nor, u8 code, u8 *val, int len)
 {
 	struct m25p *flash = nor->priv;
-	struct spi_device *spi = flash->spi;
+	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(code, 1),
+					  SPI_MEM_OP_NO_ADDR,
+					  SPI_MEM_OP_NO_DUMMY,
+					  SPI_MEM_OP_DATA_IN(len, val, 1));
 	int ret;
 
-	ret = spi_write_then_read(spi, &code, 1, val, len);
+	ret = spi_mem_exec_op(flash->spimem, &op);
 	if (ret < 0)
-		dev_err(&spi->dev, "error %d reading %x\n", ret, code);
+		dev_err(&flash->spimem->spi->dev, "error %d reading %x\n", ret,
+			code);
 
 	return ret;
 }
 
-static void m25p_addr2cmd(struct spi_nor *nor, unsigned int addr, u8 *cmd)
-{
-	/* opcode is in cmd[0] */
-	cmd[1] = addr >> (nor->addr_width * 8 -  8);
-	cmd[2] = addr >> (nor->addr_width * 8 - 16);
-	cmd[3] = addr >> (nor->addr_width * 8 - 24);
-	cmd[4] = addr >> (nor->addr_width * 8 - 32);
-}
-
-static int m25p_cmdsz(struct spi_nor *nor)
-{
-	return 1 + nor->addr_width;
-}
-
 static int m25p80_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
 {
 	struct m25p *flash = nor->priv;
-	struct spi_device *spi = flash->spi;
+	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(opcode, 1),
+					  SPI_MEM_OP_NO_ADDR,
+					  SPI_MEM_OP_NO_DUMMY,
+					  SPI_MEM_OP_DATA_OUT(len, buf, 1));
 
-	flash->command[0] = opcode;
-	if (buf)
-		memcpy(&flash->command[1], buf, len);
-
-	return spi_write(spi, flash->command, len + 1);
+	return spi_mem_exec_op(flash->spimem, &op);
 }
 
 static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
 			    const u_char *buf)
 {
 	struct m25p *flash = nor->priv;
-	struct spi_device *spi = flash->spi;
-	unsigned int inst_nbits, addr_nbits, data_nbits, data_idx;
-	struct spi_transfer t[3] = {};
-	struct spi_message m;
-	int cmd_sz = m25p_cmdsz(nor);
-	ssize_t ret;
+	struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(nor->program_opcode, 1),
+				   SPI_MEM_OP_ADDR(nor->addr_width, to, 1),
+				   SPI_MEM_OP_DUMMY(0, 1),
+				   SPI_MEM_OP_DATA_OUT(len, buf, 1));
+	size_t remaining = len;
+	int ret;
 
 	/* get transfer protocols. */
-	inst_nbits = spi_nor_get_protocol_inst_nbits(nor->write_proto);
-	addr_nbits = spi_nor_get_protocol_addr_nbits(nor->write_proto);
-	data_nbits = spi_nor_get_protocol_data_nbits(nor->write_proto);
-
-	spi_message_init(&m);
+	op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->write_proto);
+	op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->write_proto);
+	op.dummy.buswidth = op.addr.buswidth;
+	op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->write_proto);
 
 	if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second)
-		cmd_sz = 1;
+		op.addr.nbytes = 0;
 
-	flash->command[0] = nor->program_opcode;
-	m25p_addr2cmd(nor, to, flash->command);
+	while (remaining) {
+		op.data.nbytes = remaining < UINT_MAX ? remaining : UINT_MAX;
+		ret = spi_mem_adjust_op_size(flash->spimem, &op);
+		if (ret)
+			return ret;
 
-	t[0].tx_buf = flash->command;
-	t[0].tx_nbits = inst_nbits;
-	t[0].len = cmd_sz;
-	spi_message_add_tail(&t[0], &m);
+		ret = spi_mem_exec_op(flash->spimem, &op);
+		if (ret)
+			return ret;
 
-	/* split the op code and address bytes into two transfers if needed. */
-	data_idx = 1;
-	if (addr_nbits != inst_nbits) {
-		t[0].len = 1;
-
-		t[1].tx_buf = &flash->command[1];
-		t[1].tx_nbits = addr_nbits;
-		t[1].len = cmd_sz - 1;
-		spi_message_add_tail(&t[1], &m);
-
-		data_idx = 2;
+		op.addr.val += op.data.nbytes;
+		remaining -= op.data.nbytes;
+		op.data.buf.out += op.data.nbytes;
 	}
 
-	t[data_idx].tx_buf = buf;
-	t[data_idx].tx_nbits = data_nbits;
-	t[data_idx].len = len;
-	spi_message_add_tail(&t[data_idx], &m);
-
-	ret = spi_sync(spi, &m);
-	if (ret)
-		return ret;
-
-	ret = m.actual_length - cmd_sz;
-	if (ret < 0)
-		return -EIO;
-	return ret;
+	return len;
 }
 
 /*
@@ -138,92 +110,39 @@
 			   u_char *buf)
 {
 	struct m25p *flash = nor->priv;
-	struct spi_device *spi = flash->spi;
-	unsigned int inst_nbits, addr_nbits, data_nbits, data_idx;
-	struct spi_transfer t[3];
-	struct spi_message m;
-	unsigned int dummy = nor->read_dummy;
-	ssize_t ret;
-	int cmd_sz;
+	struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(nor->read_opcode, 1),
+				   SPI_MEM_OP_ADDR(nor->addr_width, from, 1),
+				   SPI_MEM_OP_DUMMY(nor->read_dummy, 1),
+				   SPI_MEM_OP_DATA_IN(len, buf, 1));
+	size_t remaining = len;
+	int ret;
 
 	/* get transfer protocols. */
-	inst_nbits = spi_nor_get_protocol_inst_nbits(nor->read_proto);
-	addr_nbits = spi_nor_get_protocol_addr_nbits(nor->read_proto);
-	data_nbits = spi_nor_get_protocol_data_nbits(nor->read_proto);
+	op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->read_proto);
+	op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->read_proto);
+	op.dummy.buswidth = op.addr.buswidth;
+	op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->read_proto);
 
 	/* convert the dummy cycles to the number of bytes */
-	dummy = (dummy * addr_nbits) / 8;
+	op.dummy.nbytes = (nor->read_dummy * op.dummy.buswidth) / 8;
 
-	if (spi_flash_read_supported(spi)) {
-		struct spi_flash_read_message msg;
-
-		memset(&msg, 0, sizeof(msg));
-
-		msg.buf = buf;
-		msg.from = from;
-		msg.len = len;
-		msg.read_opcode = nor->read_opcode;
-		msg.addr_width = nor->addr_width;
-		msg.dummy_bytes = dummy;
-		msg.opcode_nbits = inst_nbits;
-		msg.addr_nbits = addr_nbits;
-		msg.data_nbits = data_nbits;
-
-		ret = spi_flash_read(spi, &msg);
-		if (ret < 0)
+	while (remaining) {
+		op.data.nbytes = remaining < UINT_MAX ? remaining : UINT_MAX;
+		ret = spi_mem_adjust_op_size(flash->spimem, &op);
+		if (ret)
 			return ret;
-		return msg.retlen;
+
+		ret = spi_mem_exec_op(flash->spimem, &op);
+		if (ret)
+			return ret;
+
+		op.addr.val += op.data.nbytes;
+		remaining -= op.data.nbytes;
+		op.data.buf.in += op.data.nbytes;
 	}
 
-	spi_message_init(&m);
-	memset(t, 0, (sizeof t));
-
-	flash->command[0] = nor->read_opcode;
-	m25p_addr2cmd(nor, from, flash->command);
-
-	t[0].tx_buf = flash->command;
-	t[0].tx_nbits = inst_nbits;
-	t[0].len = m25p_cmdsz(nor) + dummy;
-	spi_message_add_tail(&t[0], &m);
-
-	/*
-	 * Set all dummy/mode cycle bits to avoid sending some manufacturer
-	 * specific pattern, which might make the memory enter its Continuous
-	 * Read mode by mistake.
-	 * Based on the different mode cycle bit patterns listed and described
-	 * in the JESD216B specification, the 0xff value works for all memories
-	 * and all manufacturers.
-	 */
-	cmd_sz = t[0].len;
-	memset(flash->command + cmd_sz - dummy, 0xff, dummy);
-
-	/* split the op code and address bytes into two transfers if needed. */
-	data_idx = 1;
-	if (addr_nbits != inst_nbits) {
-		t[0].len = 1;
-
-		t[1].tx_buf = &flash->command[1];
-		t[1].tx_nbits = addr_nbits;
-		t[1].len = cmd_sz - 1;
-		spi_message_add_tail(&t[1], &m);
-
-		data_idx = 2;
-	}
-
-	t[data_idx].rx_buf = buf;
-	t[data_idx].rx_nbits = data_nbits;
-	t[data_idx].len = min3(len, spi_max_transfer_size(spi),
-			       spi_max_message_size(spi) - cmd_sz);
-	spi_message_add_tail(&t[data_idx], &m);
-
-	ret = spi_sync(spi, &m);
-	if (ret)
-		return ret;
-
-	ret = m.actual_length - cmd_sz;
-	if (ret < 0)
-		return -EIO;
-	return ret;
+	return len;
 }
 
 /*
@@ -231,8 +150,9 @@
  * matches what the READ command supports, at least until this driver
  * understands FAST_READ (for clocks over 25 MHz).
  */
-static int m25p_probe(struct spi_device *spi)
+static int m25p_probe(struct spi_mem *spimem)
 {
+	struct spi_device *spi = spimem->spi;
 	struct flash_platform_data	*data;
 	struct m25p *flash;
 	struct spi_nor *nor;
@@ -244,9 +164,9 @@
 	char *flash_name;
 	int ret;
 
-	data = dev_get_platdata(&spi->dev);
+	data = dev_get_platdata(&spimem->spi->dev);
 
-	flash = devm_kzalloc(&spi->dev, sizeof(*flash), GFP_KERNEL);
+	flash = devm_kzalloc(&spimem->spi->dev, sizeof(*flash), GFP_KERNEL);
 	if (!flash)
 		return -ENOMEM;
 
@@ -258,12 +178,12 @@
 	nor->write_reg = m25p80_write_reg;
 	nor->read_reg = m25p80_read_reg;
 
-	nor->dev = &spi->dev;
+	nor->dev = &spimem->spi->dev;
 	spi_nor_set_flash_node(nor, spi->dev.of_node);
 	nor->priv = flash;
 
-	spi_set_drvdata(spi, flash);
-	flash->spi = spi;
+	spi_mem_set_drvdata(spimem, flash);
+	flash->spimem = spimem;
 
 	if (spi->mode & SPI_RX_QUAD) {
 		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
@@ -303,9 +223,9 @@
 }
 
 
-static int m25p_remove(struct spi_device *spi)
+static int m25p_remove(struct spi_mem *spimem)
 {
-	struct m25p	*flash = spi_get_drvdata(spi);
+	struct m25p	*flash = spi_mem_get_drvdata(spimem);
 
 	spi_nor_restore(&flash->spi_nor);
 
@@ -313,9 +233,9 @@
 	return mtd_device_unregister(&flash->spi_nor.mtd);
 }
 
-static void m25p_shutdown(struct spi_device *spi)
+static void m25p_shutdown(struct spi_mem *spimem)
 {
-	struct m25p *flash = spi_get_drvdata(spi);
+	struct m25p *flash = spi_mem_get_drvdata(spimem);
 
 	spi_nor_restore(&flash->spi_nor);
 }
@@ -386,12 +306,14 @@
 };
 MODULE_DEVICE_TABLE(of, m25p_of_table);
 
-static struct spi_driver m25p80_driver = {
-	.driver = {
-		.name	= "m25p80",
-		.of_match_table = m25p_of_table,
+static struct spi_mem_driver m25p80_driver = {
+	.spidrv = {
+		.driver = {
+			.name	= "m25p80",
+			.of_match_table = m25p_of_table,
+		},
+		.id_table	= m25p_ids,
 	},
-	.id_table	= m25p_ids,
 	.probe	= m25p_probe,
 	.remove	= m25p_remove,
 	.shutdown	= m25p_shutdown,
@@ -402,7 +324,7 @@
 	 */
 };
 
-module_spi_driver(m25p80_driver);
+module_spi_mem_driver(m25p80_driver);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Mike Lavender");
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 16ae4ae..29c0bfd 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -82,7 +82,6 @@
 
 	block = blk_rq_pos(req) << 9 >> tr->blkshift;
 	nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
-	buf = bio_data(req->bio);
 
 	if (req_op(req) == REQ_OP_FLUSH) {
 		if (tr->flush(dev))
@@ -100,9 +99,14 @@
 			return BLK_STS_IOERR;
 		return BLK_STS_OK;
 	case REQ_OP_READ:
-		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
-			if (tr->readsect(dev, block, buf))
+		buf = kmap(bio_page(req->bio)) + bio_offset(req->bio);
+		for (; nsect > 0; nsect--, block++, buf += tr->blksize) {
+			if (tr->readsect(dev, block, buf)) {
+				kunmap(bio_page(req->bio));
 				return BLK_STS_IOERR;
+			}
+		}
+		kunmap(bio_page(req->bio));
 		rq_flush_dcache_pages(req);
 		return BLK_STS_OK;
 	case REQ_OP_WRITE:
@@ -110,9 +114,14 @@
 			return BLK_STS_IOERR;
 
 		rq_flush_dcache_pages(req);
-		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
-			if (tr->writesect(dev, block, buf))
+		buf = kmap(bio_page(req->bio)) + bio_offset(req->bio);
+		for (; nsect > 0; nsect--, block++, buf += tr->blksize) {
+			if (tr->writesect(dev, block, buf)) {
+				kunmap(bio_page(req->bio));
 				return BLK_STS_IOERR;
+			}
+		}
+		kunmap(bio_page(req->bio));
 		return BLK_STS_OK;
 	default:
 		return BLK_STS_IOERR;
@@ -418,7 +427,6 @@
 	new->rq->queuedata = new;
 	blk_queue_logical_block_size(new->rq, tr->blksize);
 
-	blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH);
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
 	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
 
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 807d17d..64a1fca 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1829,18 +1829,6 @@
 	mutex_unlock(&mtd_table_mutex);
 	return 0;
 }
-
-static int mtd_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mtd_proc_show, NULL);
-}
-
-static const struct file_operations mtd_proc_ops = {
-	.open		= mtd_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif /* CONFIG_PROC_FS */
 
 /*====================================================================*/
@@ -1883,7 +1871,7 @@
 		goto err_bdi;
 	}
 
-	proc_mtd = proc_create("mtd", 0, NULL, &mtd_proc_ops);
+	proc_mtd = proc_create_single("mtd", 0, NULL, mtd_proc_show);
 
 	ret = init_mtdchar();
 	if (ret)
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index d8c8c9d..d721f48 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -394,12 +394,13 @@
 
 		for (i=0; i<8; i+=2) {
 			uint32_t tmp = cafe_readl(cafe, NAND_ECC_SYN01 + (i*2));
-			syn[i] = cafe->rs->index_of[tmp & 0xfff];
-			syn[i+1] = cafe->rs->index_of[(tmp >> 16) & 0xfff];
+
+			syn[i] = cafe->rs->codec->index_of[tmp & 0xfff];
+			syn[i+1] = cafe->rs->codec->index_of[(tmp >> 16) & 0xfff];
 		}
 
 		n = decode_rs16(cafe->rs, NULL, NULL, 1367, syn, 0, pos, 0,
-		                pat);
+				pat);
 
 		for (i = 0; i < n; i++) {
 			int p = pos[i];
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 86a258de..2b7b2b9 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -66,6 +66,7 @@
 	int curchip;
 	int mh0_page;
 	int mh1_page;
+	struct rs_control *rs_decoder;
 	struct mtd_info *nextdoc;
 
 	/* Handle the last stage of initialization (BBT scan, partitioning) */
@@ -123,9 +124,6 @@
 /* Number of symbols */
 #define NN 1023
 
-/* the Reed Solomon control structure */
-static struct rs_control *rs_decoder;
-
 /*
  * The HW decoder in the DoC ASIC's provides us a error syndrome,
  * which we must convert to a standard syndrome usable by the generic
@@ -140,6 +138,7 @@
 	int i, j, nerr, errpos[8];
 	uint8_t parity;
 	uint16_t ds[4], s[5], tmp, errval[8], syn[4];
+	struct rs_codec *cd = rs->codec;
 
 	memset(syn, 0, sizeof(syn));
 	/* Convert the ecc bytes into words */
@@ -160,15 +159,15 @@
 	for (j = 1; j < NROOTS; j++) {
 		if (ds[j] == 0)
 			continue;
-		tmp = rs->index_of[ds[j]];
+		tmp = cd->index_of[ds[j]];
 		for (i = 0; i < NROOTS; i++)
-			s[i] ^= rs->alpha_to[rs_modnn(rs, tmp + (FCR + i) * j)];
+			s[i] ^= cd->alpha_to[rs_modnn(cd, tmp + (FCR + i) * j)];
 	}
 
 	/* Calc syn[i] = s[i] / alpha^(v + i) */
 	for (i = 0; i < NROOTS; i++) {
 		if (s[i])
-			syn[i] = rs_modnn(rs, rs->index_of[s[i]] + (NN - FCR - i));
+			syn[i] = rs_modnn(cd, cd->index_of[s[i]] + (NN - FCR - i));
 	}
 	/* Call the decoder library */
 	nerr = decode_rs16(rs, NULL, NULL, 1019, syn, 0, errpos, 0, errval);
@@ -930,7 +929,7 @@
 				calc_ecc[i] = ReadDOC_(docptr, DoC_ECCSyndrome0 + i);
 		}
 
-		ret = doc_ecc_decode(rs_decoder, dat, calc_ecc);
+		ret = doc_ecc_decode(doc->rs_decoder, dat, calc_ecc);
 		if (ret > 0)
 			pr_err("doc200x_correct_data corrected %d errors\n",
 			       ret);
@@ -1421,10 +1420,10 @@
 
 static int __init doc_probe(unsigned long physadr)
 {
+	struct nand_chip *nand = NULL;
+	struct doc_priv *doc = NULL;
 	unsigned char ChipID;
 	struct mtd_info *mtd;
-	struct nand_chip *nand;
-	struct doc_priv *doc;
 	void __iomem *virtadr;
 	unsigned char save_control;
 	unsigned char tmp, tmpb, tmpc;
@@ -1561,8 +1560,25 @@
 		goto fail;
 	}
 
+
+	/*
+	 * Allocate a RS codec instance
+	 *
+	 * Symbolsize is 10 (bits)
+	 * Primitve polynomial is x^10+x^3+1
+	 * First consecutive root is 510
+	 * Primitve element to generate roots = 1
+	 * Generator polinomial degree = 4
+	 */
+	doc = (struct doc_priv *) (nand + 1);
+	doc->rs_decoder = init_rs(10, 0x409, FCR, 1, NROOTS);
+	if (!doc->rs_decoder) {
+		pr_err("DiskOnChip: Could not create a RS codec\n");
+		ret = -ENOMEM;
+		goto fail;
+	}
+
 	mtd			= nand_to_mtd(nand);
-	doc			= (struct doc_priv *) (nand + 1);
 	nand->bbt_td		= (struct nand_bbt_descr *) (doc + 1);
 	nand->bbt_md		= nand->bbt_td + 1;
 
@@ -1612,7 +1628,6 @@
 		   haven't yet added it.  This is handled without incident by
 		   mtd_device_unregister, as far as I can tell. */
 		nand_release(mtd);
-		kfree(nand);
 		goto fail;
 	}
 
@@ -1625,6 +1640,9 @@
 	   actually a DiskOnChip.  */
 	WriteDOC(save_control, virtadr, DOCControl);
  fail:
+	if (doc)
+		free_rs(doc->rs_decoder);
+	kfree(nand);
 	iounmap(virtadr);
 
 error_ioremap:
@@ -1647,6 +1665,7 @@
 		nand_release(mtd);
 		iounmap(doc->virtadr);
 		release_mem_region(doc->physadr, DOC_IOREMAP_LEN);
+		free_rs(doc->rs_decoder);
 		kfree(nand);
 	}
 }
@@ -1655,27 +1674,12 @@
 {
 	int i, ret = 0;
 
-	/* We could create the decoder on demand, if memory is a concern.
-	 * This way we have it handy, if an error happens
-	 *
-	 * Symbolsize is 10 (bits)
-	 * Primitve polynomial is x^10+x^3+1
-	 * first consecutive root is 510
-	 * primitve element to generate roots = 1
-	 * generator polinomial degree = 4
-	 */
-	rs_decoder = init_rs(10, 0x409, FCR, 1, NROOTS);
-	if (!rs_decoder) {
-		pr_err("DiskOnChip: Could not create a RS decoder\n");
-		return -ENOMEM;
-	}
-
 	if (doc_config_location) {
 		pr_info("Using configured DiskOnChip probe address 0x%lx\n",
 			doc_config_location);
 		ret = doc_probe(doc_config_location);
 		if (ret < 0)
-			goto outerr;
+			return ret;
 	} else {
 		for (i = 0; (doc_locations[i] != 0xffffffff); i++) {
 			doc_probe(doc_locations[i]);
@@ -1686,11 +1690,7 @@
 	if (!doclist) {
 		pr_info("No valid DiskOnChip devices found\n");
 		ret = -ENODEV;
-		goto outerr;
 	}
-	return 0;
- outerr:
-	free_rs(rs_decoder);
 	return ret;
 }
 
@@ -1698,11 +1698,6 @@
 {
 	/* Cleanup the nand/DoC resources */
 	release_nanddoc();
-
-	/* Free the reed solomon resources */
-	if (rs_decoder) {
-		free_rs(rs_decoder);
-	}
 }
 
 module_init(init_nanddoc);
diff --git a/drivers/mtd/spi-nor/aspeed-smc.c b/drivers/mtd/spi-nor/aspeed-smc.c
index 8d3cbe2..95e5446 100644
--- a/drivers/mtd/spi-nor/aspeed-smc.c
+++ b/drivers/mtd/spi-nor/aspeed-smc.c
@@ -861,8 +861,9 @@
 		return -ENODEV;
 	info = match->data;
 
-	controller = devm_kzalloc(&pdev->dev, sizeof(*controller) +
-		info->nce * sizeof(controller->chips[0]), GFP_KERNEL);
+	controller = devm_kzalloc(&pdev->dev,
+				  struct_size(controller, chips, info->nce),
+				  GFP_KERNEL);
 	if (!controller)
 		return -ENOMEM;
 	controller->info = info;
diff --git a/drivers/mux/adg792a.c b/drivers/mux/adg792a.c
index 6a8725cf..e8fc2fc 100644
--- a/drivers/mux/adg792a.c
+++ b/drivers/mux/adg792a.c
@@ -58,8 +58,7 @@
 	.set = adg792a_set,
 };
 
-static int adg792a_probe(struct i2c_client *i2c,
-			 const struct i2c_device_id *id)
+static int adg792a_probe(struct i2c_client *i2c)
 {
 	struct device *dev = &i2c->dev;
 	struct mux_chip *mux_chip;
@@ -144,7 +143,7 @@
 		.name		= "adg792a",
 		.of_match_table = of_match_ptr(adg792a_of_match),
 	},
-	.probe		= adg792a_probe,
+	.probe_new	= adg792a_probe,
 	.id_table	= adg792a_id,
 };
 module_i2c_driver(adg792a_driver);
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 01059f1..9f7d83e 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -10,7 +10,7 @@
 static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(RCU)
 {
-	struct bonding *bond = seq->private;
+	struct bonding *bond = PDE_DATA(file_inode(seq->file));
 	struct list_head *iter;
 	struct slave *slave;
 	loff_t off = 0;
@@ -29,7 +29,7 @@
 
 static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct bonding *bond = seq->private;
+	struct bonding *bond = PDE_DATA(file_inode(seq->file));
 	struct list_head *iter;
 	struct slave *slave;
 	bool found = false;
@@ -56,7 +56,7 @@
 
 static void bond_info_show_master(struct seq_file *seq)
 {
-	struct bonding *bond = seq->private;
+	struct bonding *bond = PDE_DATA(file_inode(seq->file));
 	const struct bond_opt_value *optval;
 	struct slave *curr, *primary;
 	int i;
@@ -167,7 +167,7 @@
 static void bond_info_show_slave(struct seq_file *seq,
 				 const struct slave *slave)
 {
-	struct bonding *bond = seq->private;
+	struct bonding *bond = PDE_DATA(file_inode(seq->file));
 
 	seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name);
 	seq_printf(seq, "MII Status: %s\n", bond_slave_link_status(slave->link));
@@ -257,38 +257,14 @@
 	.show  = bond_info_seq_show,
 };
 
-static int bond_info_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq;
-	int res;
-
-	res = seq_open(file, &bond_info_seq_ops);
-	if (!res) {
-		/* recover the pointer buried in proc_dir_entry data */
-		seq = file->private_data;
-		seq->private = PDE_DATA(inode);
-	}
-
-	return res;
-}
-
-static const struct file_operations bond_info_fops = {
-	.owner   = THIS_MODULE,
-	.open    = bond_info_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
 void bond_create_proc_entry(struct bonding *bond)
 {
 	struct net_device *bond_dev = bond->dev;
 	struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id);
 
 	if (bn->proc_dir) {
-		bond->proc_entry = proc_create_data(bond_dev->name,
-						    0444, bn->proc_dir,
-						    &bond_info_fops, bond);
+		bond->proc_entry = proc_create_seq_data(bond_dev->name, 0444,
+				bn->proc_dir, &bond_info_seq_ops, bond);
 		if (bond->proc_entry == NULL)
 			netdev_warn(bond_dev, "Cannot create /proc/net/%s/%s\n",
 				    DRV_NAME, bond_dev->name);
diff --git a/drivers/net/can/peak_canfd/peak_pciefd_main.c b/drivers/net/can/peak_canfd/peak_pciefd_main.c
index 3c51a88..b9e2857 100644
--- a/drivers/net/can/peak_canfd/peak_pciefd_main.c
+++ b/drivers/net/can/peak_canfd/peak_pciefd_main.c
@@ -752,8 +752,7 @@
 		can_count = 1;
 
 	/* allocate board structure object */
-	pciefd = devm_kzalloc(&pdev->dev, sizeof(*pciefd) +
-			      can_count * sizeof(*pciefd->can),
+	pciefd = devm_kzalloc(&pdev->dev, struct_size(pciefd, can, can_count),
 			      GFP_KERNEL);
 	if (!pciefd) {
 		err = -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 7ecadb5..413080a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -494,7 +494,7 @@
 	int err;
 	int i;
 
-	d = kzalloc(sizeof(*d) + nfile * sizeof(d->fields[0]), GFP_KERNEL);
+	d = kzalloc(struct_size(d, fields, nfile), GFP_KERNEL);
 	if (!d)
 		return -ENOMEM;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index f9c2c03..e1b609c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1191,8 +1191,7 @@
 {
 	struct mlx5_flow_handle *handle;
 
-	handle = kzalloc(sizeof(*handle) + sizeof(handle->rule[0]) *
-			  num_rules, GFP_KERNEL);
+	handle = kzalloc(struct_size(handle, rule, num_rules), GFP_KERNEL);
 	if (!handle)
 		return NULL;
 
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 88c1eee..ad4a354 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1289,9 +1289,8 @@
 
 	pci_set_master(pci_dev);
 
-	/* Set the PCI DMA mask.  Try all possibilities from our
-	 * genuine mask down to 32 bits, because some architectures
-	 * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
+	/* Set the PCI DMA mask.  Try all possibilities from our genuine mask
+	 * down to 32 bits, because some architectures will allow 40 bit
 	 * masks event though they reject 46 bit masks.
 	 */
 	while (dma_mask > 0x7fffffffUL) {
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index 3d6c91e..dd5530a 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -1242,9 +1242,8 @@
 
 	pci_set_master(pci_dev);
 
-	/* Set the PCI DMA mask.  Try all possibilities from our
-	 * genuine mask down to 32 bits, because some architectures
-	 * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
+	/* Set the PCI DMA mask.  Try all possibilities from our genuine mask
+	 * down to 32 bits, because some architectures will allow 40 bit
 	 * masks event though they reject 46 bit masks.
 	 */
 	while (dma_mask > 0x7fffffffUL) {
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index dfabbae..f347fd9 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -449,20 +449,6 @@
 	.show = bpq_seq_show,
 };
 
-static int bpq_info_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &bpq_seqops);
-}
-
-static const struct file_operations bpq_info_fops = {
-	.owner = THIS_MODULE,
-	.open = bpq_info_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
-
-
 /* ------------------------------------------------------------------------ */
 
 static const struct net_device_ops bpq_netdev_ops = {
@@ -590,7 +576,7 @@
 static int __init bpq_init_driver(void)
 {
 #ifdef CONFIG_PROC_FS
-	if (!proc_create("bpqether", 0444, init_net.proc_net, &bpq_info_fops)) {
+	if (!proc_create_seq("bpqether", 0444, init_net.proc_net, &bpq_seqops)) {
 		printk(KERN_ERR
 			"bpq: cannot create /proc/net/bpqether entry.\n");
 		return -ENOENT;
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 3de2729..6c03932 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -2084,21 +2084,6 @@
 	.stop   = scc_net_seq_stop,
 	.show   = scc_net_seq_show,
 };
-
-
-static int scc_net_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &scc_net_seq_ops);
-}
-
-static const struct file_operations scc_net_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = scc_net_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_private,
-};
-
 #endif /* CONFIG_PROC_FS */
 
  
@@ -2122,7 +2107,7 @@
 	}
 	rtnl_unlock();
 
-	proc_create("z8530drv", 0, init_net.proc_net, &scc_net_seq_fops);
+	proc_create_seq("z8530drv", 0, init_net.proc_net, &scc_net_seq_ops);
 
 	return 0;
 }
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 83034eb..16ec7af 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -841,20 +841,6 @@
 	.stop = yam_seq_stop,
 	.show = yam_seq_show,
 };
-
-static int yam_info_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &yam_seqops);
-}
-
-static const struct file_operations yam_info_fops = {
-	.owner = THIS_MODULE,
-	.open = yam_info_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
-
 #endif
 
 
@@ -1168,7 +1154,7 @@
 	yam_timer.expires = jiffies + HZ / 100;
 	add_timer(&yam_timer);
 
-	proc_create("yam", 0444, init_net.proc_net, &yam_info_fops);
+	proc_create_seq("yam", 0444, init_net.proc_net, &yam_seqops);
 	return 0;
  error:
 	while (--i >= 0) {
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 7df0733..de51e8f70f 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1096,21 +1096,6 @@
 	.stop		= pppoe_seq_stop,
 	.show		= pppoe_seq_show,
 };
-
-static int pppoe_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &pppoe_seq_ops,
-			sizeof(struct seq_net_private));
-}
-
-static const struct file_operations pppoe_seq_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pppoe_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
 #endif /* CONFIG_PROC_FS */
 
 static const struct proto_ops pppoe_ops = {
@@ -1122,7 +1107,7 @@
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.getname	= pppoe_getname,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
@@ -1146,7 +1131,8 @@
 
 	rwlock_init(&pn->hash_lock);
 
-	pde = proc_create("pppoe", 0444, net->proc_net, &pppoe_seq_fops);
+	pde = proc_create_net("pppoe", 0444, net->proc_net,
+			&pppoe_seq_ops, sizeof(struct seq_net_private));
 #ifdef CONFIG_PROC_FS
 	if (!pde)
 		return -ENOMEM;
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index c4267ec..157b67c 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -624,7 +624,6 @@
 	.socketpair = sock_no_socketpair,
 	.accept     = sock_no_accept,
 	.getname    = pptp_getname,
-	.poll       = sock_no_poll,
 	.listen     = sock_no_listen,
 	.shutdown   = sock_no_shutdown,
 	.setsockopt = sock_no_setsockopt,
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 4cf54a7..ad4f6e3 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -694,7 +694,7 @@
 		dir = ".";
 
 	snprintf(filename, sizeof(filename), "%s/%s", dir, file);
-	ret = request_firmware(&fw, filename, ar->dev);
+	ret = firmware_request_nowarn(&fw, filename, ar->dev);
 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot fw request '%s': %d\n",
 		   filename, ret);
 
diff --git a/drivers/net/wireless/ath/ath10k/testmode.c b/drivers/net/wireless/ath/ath10k/testmode.c
index 568810b..c24ee61 100644
--- a/drivers/net/wireless/ath/ath10k/testmode.c
+++ b/drivers/net/wireless/ath/ath10k/testmode.c
@@ -157,7 +157,7 @@
 		 ar->hw_params.fw.dir, ATH10K_FW_UTF_FILE);
 
 	/* load utf firmware image */
-	ret = request_firmware_direct(&fw_file->firmware, filename, ar->dev);
+	ret = firmware_request_nowarn(&fw_file->firmware, filename, ar->dev);
 	ath10k_dbg(ar, ATH10K_DBG_TESTMODE, "testmode fw request '%s': %d\n",
 		   filename, ret);
 
diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c
index d122386..b01dc34 100644
--- a/drivers/net/wireless/atmel/atmel.c
+++ b/drivers/net/wireless/atmel/atmel.c
@@ -1482,18 +1482,6 @@
 	return 0;
 }
 
-static int atmel_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, atmel_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations atmel_proc_fops = {
-	.open		= atmel_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static const struct net_device_ops atmel_netdev_ops = {
 	.ndo_open 		= atmel_open,
 	.ndo_stop		= atmel_close,
@@ -1614,7 +1602,8 @@
 
 	netif_carrier_off(dev);
 
-	if (!proc_create_data("driver/atmel", 0, NULL, &atmel_proc_fops, priv))
+	if (!proc_create_single_data("driver/atmel", 0, NULL, atmel_proc_show,
+			priv))
 		printk(KERN_WARNING "atmel: unable to create /proc entry.\n");
 
 	printk(KERN_INFO "%s: Atmel at76c50x. Version %d.%d. MAC %pM\n",
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 94202e2..a6e0722 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -3022,9 +3022,8 @@
 
 			mvmsta = iwl_mvm_sta_from_mac80211(sta);
 			WARN_ON(rcu_access_pointer(mvmsta->ptk_pn[keyidx]));
-			ptk_pn = kzalloc(sizeof(*ptk_pn) +
-					 mvm->trans->num_rx_queues *
-						sizeof(ptk_pn->q[0]),
+			ptk_pn = kzalloc(struct_size(ptk_pn, q,
+						     mvm->trans->num_rx_queues),
 					 GFP_KERNEL);
 			if (!ptk_pn) {
 				ret = -ENOMEM;
diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c
index b4dfe189..d1884b8 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ap.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c
@@ -69,7 +69,7 @@
 #ifndef PRISM2_NO_PROCFS_DEBUG
 static int ap_debug_proc_show(struct seq_file *m, void *v)
 {
-	struct ap_data *ap = m->private;
+	struct ap_data *ap = PDE_DATA(file_inode(m->file));
 
 	seq_printf(m, "BridgedUnicastFrames=%u\n", ap->bridged_unicast);
 	seq_printf(m, "BridgedMulticastFrames=%u\n", ap->bridged_multicast);
@@ -81,18 +81,6 @@
 	seq_printf(m, "tx_drop_nonassoc=%u\n", ap->tx_drop_nonassoc);
 	return 0;
 }
-
-static int ap_debug_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ap_debug_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ap_debug_proc_fops = {
-	.open		= ap_debug_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif /* PRISM2_NO_PROCFS_DEBUG */
 
 
@@ -333,7 +321,7 @@
 
 static int ap_control_proc_show(struct seq_file *m, void *v)
 {
-	struct ap_data *ap = m->private;
+	struct ap_data *ap = PDE_DATA(file_inode(m->file));
 	char *policy_txt;
 	struct mac_entry *entry;
 
@@ -365,20 +353,20 @@
 
 static void *ap_control_proc_start(struct seq_file *m, loff_t *_pos)
 {
-	struct ap_data *ap = m->private;
+	struct ap_data *ap = PDE_DATA(file_inode(m->file));
 	spin_lock_bh(&ap->mac_restrictions.lock);
 	return seq_list_start_head(&ap->mac_restrictions.mac_list, *_pos);
 }
 
 static void *ap_control_proc_next(struct seq_file *m, void *v, loff_t *_pos)
 {
-	struct ap_data *ap = m->private;
+	struct ap_data *ap = PDE_DATA(file_inode(m->file));
 	return seq_list_next(v, &ap->mac_restrictions.mac_list, _pos);
 }
 
 static void ap_control_proc_stop(struct seq_file *m, void *v)
 {
-	struct ap_data *ap = m->private;
+	struct ap_data *ap = PDE_DATA(file_inode(m->file));
 	spin_unlock_bh(&ap->mac_restrictions.lock);
 }
 
@@ -389,24 +377,6 @@
 	.show	= ap_control_proc_show,
 };
 
-static int ap_control_proc_open(struct inode *inode, struct file *file)
-{
-	int ret = seq_open(file, &ap_control_proc_seqops);
-	if (ret == 0) {
-		struct seq_file *m = file->private_data;
-		m->private = PDE_DATA(inode);
-	}
-	return ret;
-}
-
-static const struct file_operations ap_control_proc_fops = {
-	.open		= ap_control_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-
 int ap_control_add_mac(struct mac_restrictions *mac_restrictions, u8 *mac)
 {
 	struct mac_entry *entry;
@@ -585,20 +555,20 @@
 
 static void *prism2_ap_proc_start(struct seq_file *m, loff_t *_pos)
 {
-	struct ap_data *ap = m->private;
+	struct ap_data *ap = PDE_DATA(file_inode(m->file));
 	spin_lock_bh(&ap->sta_table_lock);
 	return seq_list_start_head(&ap->sta_list, *_pos);
 }
 
 static void *prism2_ap_proc_next(struct seq_file *m, void *v, loff_t *_pos)
 {
-	struct ap_data *ap = m->private;
+	struct ap_data *ap = PDE_DATA(file_inode(m->file));
 	return seq_list_next(v, &ap->sta_list, _pos);
 }
 
 static void prism2_ap_proc_stop(struct seq_file *m, void *v)
 {
-	struct ap_data *ap = m->private;
+	struct ap_data *ap = PDE_DATA(file_inode(m->file));
 	spin_unlock_bh(&ap->sta_table_lock);
 }
 
@@ -608,23 +578,6 @@
 	.stop	= prism2_ap_proc_stop,
 	.show	= prism2_ap_proc_show,
 };
-
-static int prism2_ap_proc_open(struct inode *inode, struct file *file)
-{
-	int ret = seq_open(file, &prism2_ap_proc_seqops);
-	if (ret == 0) {
-		struct seq_file *m = file->private_data;
-		m->private = PDE_DATA(inode);
-	}
-	return ret;
-}
-
-static const struct file_operations prism2_ap_proc_fops = {
-	.open		= prism2_ap_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
 #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
 
 
@@ -896,12 +849,13 @@
 		return;
 
 #ifndef PRISM2_NO_PROCFS_DEBUG
-	proc_create_data("ap_debug", 0, ap->proc, &ap_debug_proc_fops, ap);
+	proc_create_single_data("ap_debug", 0, ap->proc, ap_debug_proc_show, ap);
 #endif /* PRISM2_NO_PROCFS_DEBUG */
 
 #ifndef PRISM2_NO_KERNEL_IEEE80211_MGMT
-	proc_create_data("ap_control", 0, ap->proc, &ap_control_proc_fops, ap);
-	proc_create_data("ap", 0, ap->proc, &prism2_ap_proc_fops, ap);
+	proc_create_seq_data("ap_control", 0, ap->proc, &ap_control_proc_seqops,
+			ap);
+	proc_create_seq_data("ap", 0, ap->proc, &prism2_ap_proc_seqops, ap);
 #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
 
 }
@@ -1106,18 +1060,6 @@
 	return 0;
 }
 
-static int prism2_sta_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, prism2_sta_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations prism2_sta_proc_fops = {
-	.open		= prism2_sta_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static void handle_add_proc_queue(struct work_struct *work)
 {
 	struct ap_data *ap = container_of(work, struct ap_data,
@@ -1138,9 +1080,9 @@
 
 		if (sta) {
 			sprintf(name, "%pM", sta->addr);
-			sta->proc = proc_create_data(
+			sta->proc = proc_create_single_data(
 				name, 0, ap->proc,
-				&prism2_sta_proc_fops, sta);
+				prism2_sta_proc_show, sta);
 
 			atomic_dec(&sta->users);
 		}
diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c
index 5c4a17a..2720aa3 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_hw.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c
@@ -2951,19 +2951,6 @@
 
 	return 0;
 }
-
-static int prism2_registers_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, prism2_registers_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations prism2_registers_proc_fops = {
-	.open		= prism2_registers_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 #endif /* PRISM2_NO_PROCFS_DEBUG */
 
 
@@ -3279,8 +3266,8 @@
 		}
 		hostap_init_proc(local);
 #ifndef PRISM2_NO_PROCFS_DEBUG
-		proc_create_data("registers", 0, local->proc,
-				 &prism2_registers_proc_fops, local);
+		proc_create_single_data("registers", 0, local->proc,
+				 prism2_registers_proc_show, local);
 #endif /* PRISM2_NO_PROCFS_DEBUG */
 		hostap_init_ap_proc(local);
 		return 0;
diff --git a/drivers/net/wireless/intersil/hostap/hostap_proc.c b/drivers/net/wireless/intersil/hostap/hostap_proc.c
index d234231..5b33cca 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_proc.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_proc.c
@@ -43,18 +43,6 @@
 
 	return 0;
 }
-
-static int prism2_debug_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, prism2_debug_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations prism2_debug_proc_fops = {
-	.open		= prism2_debug_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif /* PRISM2_NO_PROCFS_DEBUG */
 
 
@@ -95,19 +83,6 @@
 	return 0;
 }
 
-static int prism2_stats_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, prism2_stats_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations prism2_stats_proc_fops = {
-	.open		= prism2_stats_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-
 static int prism2_wds_proc_show(struct seq_file *m, void *v)
 {
 	struct list_head *ptr = v;
@@ -122,20 +97,20 @@
 
 static void *prism2_wds_proc_start(struct seq_file *m, loff_t *_pos)
 {
-	local_info_t *local = m->private;
+	local_info_t *local = PDE_DATA(file_inode(m->file));
 	read_lock_bh(&local->iface_lock);
 	return seq_list_start(&local->hostap_interfaces, *_pos);
 }
 
 static void *prism2_wds_proc_next(struct seq_file *m, void *v, loff_t *_pos)
 {
-	local_info_t *local = m->private;
+	local_info_t *local = PDE_DATA(file_inode(m->file));
 	return seq_list_next(v, &local->hostap_interfaces, _pos);
 }
 
 static void prism2_wds_proc_stop(struct seq_file *m, void *v)
 {
-	local_info_t *local = m->private;
+	local_info_t *local = PDE_DATA(file_inode(m->file));
 	read_unlock_bh(&local->iface_lock);
 }
 
@@ -146,27 +121,9 @@
 	.show	= prism2_wds_proc_show,
 };
 
-static int prism2_wds_proc_open(struct inode *inode, struct file *file)
-{
-	int ret = seq_open(file, &prism2_wds_proc_seqops);
-	if (ret == 0) {
-		struct seq_file *m = file->private_data;
-		m->private = PDE_DATA(inode);
-	}
-	return ret;
-}
-
-static const struct file_operations prism2_wds_proc_fops = {
-	.open		= prism2_wds_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-
 static int prism2_bss_list_proc_show(struct seq_file *m, void *v)
 {
-	local_info_t *local = m->private;
+	local_info_t *local = PDE_DATA(file_inode(m->file));
 	struct list_head *ptr = v;
 	struct hostap_bss_info *bss;
 
@@ -193,20 +150,20 @@
 
 static void *prism2_bss_list_proc_start(struct seq_file *m, loff_t *_pos)
 {
-	local_info_t *local = m->private;
+	local_info_t *local = PDE_DATA(file_inode(m->file));
 	spin_lock_bh(&local->lock);
 	return seq_list_start_head(&local->bss_list, *_pos);
 }
 
 static void *prism2_bss_list_proc_next(struct seq_file *m, void *v, loff_t *_pos)
 {
-	local_info_t *local = m->private;
+	local_info_t *local = PDE_DATA(file_inode(m->file));
 	return seq_list_next(v, &local->bss_list, _pos);
 }
 
 static void prism2_bss_list_proc_stop(struct seq_file *m, void *v)
 {
-	local_info_t *local = m->private;
+	local_info_t *local = PDE_DATA(file_inode(m->file));
 	spin_unlock_bh(&local->lock);
 }
 
@@ -217,24 +174,6 @@
 	.show	= prism2_bss_list_proc_show,
 };
 
-static int prism2_bss_list_proc_open(struct inode *inode, struct file *file)
-{
-	int ret = seq_open(file, &prism2_bss_list_proc_seqops);
-	if (ret == 0) {
-		struct seq_file *m = file->private_data;
-		m->private = PDE_DATA(inode);
-	}
-	return ret;
-}
-
-static const struct file_operations prism2_bss_list_proc_fops = {
-	.open		= prism2_bss_list_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-
 static int prism2_crypt_proc_show(struct seq_file *m, void *v)
 {
 	local_info_t *local = m->private;
@@ -252,19 +191,6 @@
 	return 0;
 }
 
-static int prism2_crypt_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, prism2_crypt_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations prism2_crypt_proc_fops = {
-	.open		= prism2_crypt_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-
 static ssize_t prism2_pda_proc_read(struct file *file, char __user *buf,
 				    size_t count, loff_t *_pos)
 {
@@ -342,7 +268,7 @@
 #ifndef PRISM2_NO_STATION_MODES
 static int prism2_scan_results_proc_show(struct seq_file *m, void *v)
 {
-	local_info_t *local = m->private;
+	local_info_t *local = PDE_DATA(file_inode(m->file));
 	unsigned long entry;
 	int i, len;
 	struct hfa384x_hostscan_result *scanres;
@@ -392,7 +318,7 @@
 
 static void *prism2_scan_results_proc_start(struct seq_file *m, loff_t *_pos)
 {
-	local_info_t *local = m->private;
+	local_info_t *local = PDE_DATA(file_inode(m->file));
 	spin_lock_bh(&local->lock);
 
 	/* We have a header (pos 0) + N results to show (pos 1...N) */
@@ -403,7 +329,7 @@
 
 static void *prism2_scan_results_proc_next(struct seq_file *m, void *v, loff_t *_pos)
 {
-	local_info_t *local = m->private;
+	local_info_t *local = PDE_DATA(file_inode(m->file));
 
 	++*_pos;
 	if (*_pos > local->last_scan_results_count)
@@ -413,7 +339,7 @@
 
 static void prism2_scan_results_proc_stop(struct seq_file *m, void *v)
 {
-	local_info_t *local = m->private;
+	local_info_t *local = PDE_DATA(file_inode(m->file));
 	spin_unlock_bh(&local->lock);
 }
 
@@ -423,25 +349,6 @@
 	.stop	= prism2_scan_results_proc_stop,
 	.show	= prism2_scan_results_proc_show,
 };
-
-static int prism2_scan_results_proc_open(struct inode *inode, struct file *file)
-{
-	int ret = seq_open(file, &prism2_scan_results_proc_seqops);
-	if (ret == 0) {
-		struct seq_file *m = file->private_data;
-		m->private = PDE_DATA(inode);
-	}
-	return ret;
-}
-
-static const struct file_operations prism2_scan_results_proc_fops = {
-	.open		= prism2_scan_results_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-
 #endif /* PRISM2_NO_STATION_MODES */
 
 
@@ -463,29 +370,29 @@
 	}
 
 #ifndef PRISM2_NO_PROCFS_DEBUG
-	proc_create_data("debug", 0, local->proc,
-			 &prism2_debug_proc_fops, local);
+	proc_create_single_data("debug", 0, local->proc,
+			prism2_debug_proc_show, local);
 #endif /* PRISM2_NO_PROCFS_DEBUG */
-	proc_create_data("stats", 0, local->proc,
-			 &prism2_stats_proc_fops, local);
-	proc_create_data("wds", 0, local->proc,
-			 &prism2_wds_proc_fops, local);
+	proc_create_single_data("stats", 0, local->proc, prism2_stats_proc_show,
+			local);
+	proc_create_seq_data("wds", 0, local->proc,
+			&prism2_wds_proc_seqops, local);
 	proc_create_data("pda", 0, local->proc,
 			 &prism2_pda_proc_fops, local);
 	proc_create_data("aux_dump", 0, local->proc,
 			 local->func->read_aux_fops ?: &prism2_aux_dump_proc_fops,
 			 local);
-	proc_create_data("bss_list", 0, local->proc,
-			 &prism2_bss_list_proc_fops, local);
-	proc_create_data("crypt", 0, local->proc,
-			 &prism2_crypt_proc_fops, local);
+	proc_create_seq_data("bss_list", 0, local->proc,
+			&prism2_bss_list_proc_seqops, local);
+	proc_create_single_data("crypt", 0, local->proc, prism2_crypt_proc_show,
+		local);
 #ifdef PRISM2_IO_DEBUG
-	proc_create_data("io_debug", 0, local->proc,
-			 &prism2_io_debug_proc_fops, local);
+	proc_create_single_data("io_debug", 0, local->proc,
+			prism2_debug_proc_show, local);
 #endif /* PRISM2_IO_DEBUG */
 #ifndef PRISM2_NO_STATION_MODES
-	proc_create_data("scan_results", 0, local->proc,
-			 &prism2_scan_results_proc_fops, local);
+	proc_create_seq_data("scan_results", 0, local->proc,
+			&prism2_scan_results_proc_seqops, local);
 #endif /* PRISM2_NO_STATION_MODES */
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/agg-rx.c b/drivers/net/wireless/mediatek/mt76/agg-rx.c
index b67acc6..1e8cdce 100644
--- a/drivers/net/wireless/mediatek/mt76/agg-rx.c
+++ b/drivers/net/wireless/mediatek/mt76/agg-rx.c
@@ -246,8 +246,7 @@
 
 	mt76_rx_aggr_stop(dev, wcid, tidno);
 
-	tid = kzalloc(sizeof(*tid) + size * sizeof(tid->reorder_buf[0]),
-		      GFP_KERNEL);
+	tid = kzalloc(struct_size(tid, reorder_buf, size), GFP_KERNEL);
 	if (!tid)
 		return -ENOMEM;
 
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 7f9b16b..a7e0a17 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2663,19 +2663,6 @@
 	}
 	return 0;
 }
-
-static int ray_cs_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ray_cs_proc_show, NULL);
-}
-
-static const struct file_operations ray_cs_proc_fops = {
-	.owner = THIS_MODULE,
-	.open = ray_cs_proc_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
 #endif
 /*===========================================================================*/
 static int build_auth_frame(ray_dev_t *local, UCHAR *dest, int auth_type)
@@ -2814,7 +2801,7 @@
 #ifdef CONFIG_PROC_FS
 	proc_mkdir("driver/ray_cs", NULL);
 
-	proc_create("driver/ray_cs/ray_cs", 0, NULL, &ray_cs_proc_fops);
+	proc_create_single("driver/ray_cs/ray_cs", 0, NULL, ray_cs_proc_show);
 	proc_create("driver/ray_cs/essid", 0200, NULL, &ray_cs_essid_proc_fops);
 	proc_create_data("driver/ray_cs/net_type", 0200, NULL, &int_proc_fops,
 			 &net_type);
diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
index e153e8b..d5553c4 100644
--- a/drivers/nfc/pn533/usb.c
+++ b/drivers/nfc/pn533/usb.c
@@ -62,6 +62,9 @@
 	struct urb *out_urb;
 	struct urb *in_urb;
 
+	struct urb *ack_urb;
+	u8 *ack_buffer;
+
 	struct pn533 *priv;
 };
 
@@ -150,13 +153,16 @@
 	struct pn533_usb_phy *phy = dev->phy;
 	static const u8 ack[6] = {0x00, 0x00, 0xff, 0x00, 0xff, 0x00};
 	/* spec 7.1.1.3:  Preamble, SoPC (2), ACK Code (2), Postamble */
-	int rc;
 
-	phy->out_urb->transfer_buffer = (u8 *)ack;
-	phy->out_urb->transfer_buffer_length = sizeof(ack);
-	rc = usb_submit_urb(phy->out_urb, flags);
+	if (!phy->ack_buffer) {
+		phy->ack_buffer = kmemdup(ack, sizeof(ack), flags);
+		if (!phy->ack_buffer)
+			return -ENOMEM;
+	}
 
-	return rc;
+	phy->ack_urb->transfer_buffer = phy->ack_buffer;
+	phy->ack_urb->transfer_buffer_length = sizeof(ack);
+	return usb_submit_urb(phy->ack_urb, flags);
 }
 
 static int pn533_usb_send_frame(struct pn533 *dev,
@@ -375,26 +381,31 @@
 	/* Power on th reader (CCID cmd) */
 	u8 cmd[10] = {PN533_ACR122_PC_TO_RDR_ICCPOWERON,
 		      0, 0, 0, 0, 0, 0, 3, 0, 0};
+	char *buffer;
+	int transferred;
 	int rc;
 	void *cntx;
 	struct pn533_acr122_poweron_rdr_arg arg;
 
 	dev_dbg(&phy->udev->dev, "%s\n", __func__);
 
+	buffer = kmemdup(cmd, sizeof(cmd), GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
 	init_completion(&arg.done);
 	cntx = phy->in_urb->context;  /* backup context */
 
 	phy->in_urb->complete = pn533_acr122_poweron_rdr_resp;
 	phy->in_urb->context = &arg;
 
-	phy->out_urb->transfer_buffer = cmd;
-	phy->out_urb->transfer_buffer_length = sizeof(cmd);
-
 	print_hex_dump_debug("ACR122 TX: ", DUMP_PREFIX_NONE, 16, 1,
 		       cmd, sizeof(cmd), false);
 
-	rc = usb_submit_urb(phy->out_urb, GFP_KERNEL);
-	if (rc) {
+	rc = usb_bulk_msg(phy->udev, phy->out_urb->pipe, buffer, sizeof(cmd),
+			  &transferred, 0);
+	kfree(buffer);
+	if (rc || (transferred != sizeof(cmd))) {
 		nfc_err(&phy->udev->dev,
 			"Reader power on cmd error %d\n", rc);
 		return rc;
@@ -490,8 +501,9 @@
 
 	phy->in_urb = usb_alloc_urb(0, GFP_KERNEL);
 	phy->out_urb = usb_alloc_urb(0, GFP_KERNEL);
+	phy->ack_urb = usb_alloc_urb(0, GFP_KERNEL);
 
-	if (!phy->in_urb || !phy->out_urb)
+	if (!phy->in_urb || !phy->out_urb || !phy->ack_urb)
 		goto error;
 
 	usb_fill_bulk_urb(phy->in_urb, phy->udev,
@@ -501,7 +513,9 @@
 	usb_fill_bulk_urb(phy->out_urb, phy->udev,
 			  usb_sndbulkpipe(phy->udev, out_endpoint),
 			  NULL, 0, pn533_send_complete, phy);
-
+	usb_fill_bulk_urb(phy->ack_urb, phy->udev,
+			  usb_sndbulkpipe(phy->udev, out_endpoint),
+			  NULL, 0, pn533_send_complete, phy);
 
 	switch (id->driver_info) {
 	case PN533_DEVICE_STD:
@@ -554,6 +568,7 @@
 error:
 	usb_free_urb(phy->in_urb);
 	usb_free_urb(phy->out_urb);
+	usb_free_urb(phy->ack_urb);
 	usb_put_dev(phy->udev);
 	kfree(in_buf);
 
@@ -573,10 +588,13 @@
 
 	usb_kill_urb(phy->in_urb);
 	usb_kill_urb(phy->out_urb);
+	usb_kill_urb(phy->ack_urb);
 
 	kfree(phy->in_urb->transfer_buffer);
 	usb_free_urb(phy->in_urb);
 	usb_free_urb(phy->out_urb);
+	usb_free_urb(phy->ack_urb);
+	kfree(phy->ack_buffer);
 
 	nfc_info(&interface->dev, "NXP PN533 NFC device disconnected\n");
 }
diff --git a/drivers/nubus/bus.c b/drivers/nubus/bus.c
index d306c34..a59b6c4 100644
--- a/drivers/nubus/bus.c
+++ b/drivers/nubus/bus.c
@@ -63,20 +63,15 @@
 	.init_name	= "nubus",
 };
 
-int __init nubus_bus_register(void)
+static int __init nubus_bus_register(void)
 {
-	int err;
+	return bus_register(&nubus_bus_type);
+}
+postcore_initcall(nubus_bus_register);
 
-	err = device_register(&nubus_parent);
-	if (err)
-		return err;
-
-	err = bus_register(&nubus_bus_type);
-	if (!err)
-		return 0;
-
-	device_unregister(&nubus_parent);
-	return err;
+int __init nubus_parent_device_register(void)
+{
+	return device_register(&nubus_parent);
 }
 
 static void nubus_device_release(struct device *dev)
diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c
index 4621ff9..bb0d63a 100644
--- a/drivers/nubus/nubus.c
+++ b/drivers/nubus/nubus.c
@@ -875,7 +875,7 @@
 		return 0;
 
 	nubus_proc_init();
-	err = nubus_bus_register();
+	err = nubus_parent_device_register();
 	if (err)
 		return err;
 	nubus_scan_bus();
diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c
index c2e5a7e..88e1f9a 100644
--- a/drivers/nubus/proc.c
+++ b/drivers/nubus/proc.c
@@ -45,18 +45,6 @@
 	return 0;
 }
 
-static int nubus_devices_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, nubus_devices_proc_show, NULL);
-}
-
-static const struct file_operations nubus_devices_proc_fops = {
-	.open		= nubus_devices_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static struct proc_dir_entry *proc_bus_nubus_dir;
 
 /*
@@ -149,18 +137,6 @@
 	return 0;
 }
 
-static int nubus_proc_rsrc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, nubus_proc_rsrc_show, inode);
-}
-
-static const struct file_operations nubus_proc_rsrc_fops = {
-	.open		= nubus_proc_rsrc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir,
 			     const struct nubus_dirent *ent,
 			     unsigned int size)
@@ -176,8 +152,8 @@
 		pde_data = nubus_proc_alloc_pde_data(nubus_dirptr(ent), size);
 	else
 		pde_data = NULL;
-	proc_create_data(name, S_IFREG | 0444, procdir,
-			 &nubus_proc_rsrc_fops, pde_data);
+	proc_create_single_data(name, S_IFREG | 0444, procdir,
+			nubus_proc_rsrc_show, pde_data);
 }
 
 void nubus_proc_add_rsrc(struct proc_dir_entry *procdir,
@@ -190,32 +166,21 @@
 		return;
 
 	snprintf(name, sizeof(name), "%x", ent->type);
-	proc_create_data(name, S_IFREG | 0444, procdir,
-			 &nubus_proc_rsrc_fops,
-			 nubus_proc_alloc_pde_data(data, 0));
+	proc_create_single_data(name, S_IFREG | 0444, procdir,
+			nubus_proc_rsrc_show,
+			nubus_proc_alloc_pde_data(data, 0));
 }
 
 /*
  * /proc/nubus stuff
  */
 
-static int nubus_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, nubus_proc_show, NULL);
-}
-
-static const struct file_operations nubus_proc_fops = {
-	.open		= nubus_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 void __init nubus_proc_init(void)
 {
-	proc_create("nubus", 0, NULL, &nubus_proc_fops);
+	proc_create_single("nubus", 0, NULL, nubus_proc_show);
 	proc_bus_nubus_dir = proc_mkdir("bus/nubus", NULL);
 	if (!proc_bus_nubus_dir)
 		return;
-	proc_create("devices", 0, proc_bus_nubus_dir, &nubus_devices_proc_fops);
+	proc_create_single("devices", 0, proc_bus_nubus_dir,
+			nubus_devices_proc_show);
 }
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 3085227..2e96b34 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -276,7 +276,8 @@
 	if (rw == READ) {
 		if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
 			return -EIO;
-		return memcpy_mcsafe(buf, nsio->addr + offset, size);
+		if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
+			return -EIO;
 	}
 
 	if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 9d71492..e023d6a 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -101,15 +101,15 @@
 		void *pmem_addr, unsigned int len)
 {
 	unsigned int chunk;
-	int rc;
+	unsigned long rem;
 	void *mem;
 
 	while (len) {
 		mem = kmap_atomic(page);
 		chunk = min_t(unsigned int, len, PAGE_SIZE);
-		rc = memcpy_mcsafe(mem + off, pmem_addr, chunk);
+		rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
 		kunmap_atomic(mem);
-		if (rc)
+		if (rem)
 			return BLK_STS_IOERR;
 		len -= chunk;
 		off = 0;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index b9ca782..c8b3006 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -101,6 +101,15 @@
 static int nvme_revalidate_disk(struct gendisk *disk);
 static void nvme_put_subsystem(struct nvme_subsystem *subsys);
 
+static void nvme_queue_scan(struct nvme_ctrl *ctrl)
+{
+	/*
+	 * Only new queue scan work when admin and IO queues are both alive
+	 */
+	if (ctrl->state == NVME_CTRL_LIVE)
+		queue_work(nvme_wq, &ctrl->scan_work);
+}
+
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
 {
 	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -244,9 +253,6 @@
 
 void nvme_cancel_request(struct request *req, void *data, bool reserved)
 {
-	if (!blk_mq_request_started(req))
-		return;
-
 	dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
 				"Cancelling I/O %d", req->tag);
 
@@ -351,7 +357,7 @@
 	nvme_mpath_remove_disk(head);
 	ida_simple_remove(&head->subsys->ns_ida, head->instance);
 	list_del_init(&head->entry);
-	cleanup_srcu_struct(&head->srcu);
+	cleanup_srcu_struct_quiesced(&head->srcu);
 	nvme_put_subsystem(head->subsys);
 	kfree(head);
 }
@@ -1033,6 +1039,21 @@
 }
 EXPORT_SYMBOL_GPL(nvme_set_queue_count);
 
+#define NVME_AEN_SUPPORTED \
+	(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT)
+
+static void nvme_enable_aen(struct nvme_ctrl *ctrl)
+{
+	u32 result;
+	int status;
+
+	status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT,
+			ctrl->oaes & NVME_AEN_SUPPORTED, NULL, 0, &result);
+	if (status)
+		dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n",
+			 ctrl->oaes & NVME_AEN_SUPPORTED);
+}
+
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
 	struct nvme_user_io io;
@@ -1351,13 +1372,19 @@
 	blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
 }
 
-static void nvme_config_discard(struct nvme_ctrl *ctrl,
-		unsigned stream_alignment, struct request_queue *queue)
+static void nvme_config_discard(struct nvme_ns *ns)
 {
+	struct nvme_ctrl *ctrl = ns->ctrl;
+	struct request_queue *queue = ns->queue;
 	u32 size = queue_logical_block_size(queue);
 
-	if (stream_alignment)
-		size *= stream_alignment;
+	if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) {
+		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue);
+		return;
+	}
+
+	if (ctrl->nr_streams && ns->sws && ns->sgs)
+		size *= ns->sws * ns->sgs;
 
 	BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
 			NVME_DSM_MAX_RANGES);
@@ -1365,9 +1392,12 @@
 	queue->limits.discard_alignment = 0;
 	queue->limits.discard_granularity = size;
 
+	/* If discard is already enabled, don't reset queue limits */
+	if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue))
+		return;
+
 	blk_queue_max_discard_sectors(queue, UINT_MAX);
 	blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
-	blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue);
 
 	if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
 		blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
@@ -1411,10 +1441,6 @@
 {
 	sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
 	unsigned short bs = 1 << ns->lba_shift;
-	unsigned stream_alignment = 0;
-
-	if (ns->ctrl->nr_streams && ns->sws && ns->sgs)
-		stream_alignment = ns->sws * ns->sgs;
 
 	blk_mq_freeze_queue(disk->queue);
 	blk_integrity_unregister(disk);
@@ -1428,10 +1454,9 @@
 		nvme_init_integrity(disk, ns->ms, ns->pi_type);
 	if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk))
 		capacity = 0;
-	set_capacity(disk, capacity);
 
-	if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
-		nvme_config_discard(ns->ctrl, stream_alignment, disk->queue);
+	set_capacity(disk, capacity);
+	nvme_config_discard(ns);
 	blk_mq_unfreeze_queue(disk->queue);
 }
 
@@ -1577,7 +1602,7 @@
 static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
 		enum pr_type type, bool abort)
 {
-	u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
+	u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1);
 	return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
 }
 
@@ -1589,7 +1614,7 @@
 
 static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
 {
-	u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
+	u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0);
 	return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
 }
 
@@ -2183,7 +2208,8 @@
 		 * Verify that the subsystem actually supports multiple
 		 * controllers, else bail out.
 		 */
-		if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
+		if (!ctrl->opts->discovery_nqn &&
+		    nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
 			dev_err(ctrl->device,
 				"ignoring ctrl due to duplicate subnqn (%s).\n",
 				found->subnqn);
@@ -2314,7 +2340,7 @@
 	if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {
 		ret = nvme_get_effects_log(ctrl);
 		if (ret < 0)
-			return ret;
+			goto out_free;
 	}
 
 	if (!ctrl->identified) {
@@ -2345,6 +2371,7 @@
 
 	ctrl->oacs = le16_to_cpu(id->oacs);
 	ctrl->oncs = le16_to_cpup(&id->oncs);
+	ctrl->oaes = le32_to_cpu(id->oaes);
 	atomic_set(&ctrl->abort_limit, id->acl + 1);
 	ctrl->vwc = id->vwc;
 	ctrl->cntlid = le16_to_cpup(&id->cntlid);
@@ -3170,6 +3197,42 @@
 	nvme_remove_invalid_namespaces(ctrl, nn);
 }
 
+static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl)
+{
+	size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32);
+	__le32 *log;
+	int error, i;
+	bool ret = false;
+
+	log = kzalloc(log_size, GFP_KERNEL);
+	if (!log)
+		return false;
+
+	error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
+	if (error) {
+		dev_warn(ctrl->device,
+			"reading changed ns log failed: %d\n", error);
+		goto out_free_log;
+	}
+
+	if (log[0] == cpu_to_le32(0xffffffff))
+		goto out_free_log;
+
+	for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) {
+		u32 nsid = le32_to_cpu(log[i]);
+
+		if (nsid == 0)
+			break;
+		dev_info(ctrl->device, "rescanning namespace %d.\n", nsid);
+		nvme_validate_ns(ctrl, nsid);
+	}
+	ret = true;
+
+out_free_log:
+	kfree(log);
+	return ret;
+}
+
 static void nvme_scan_work(struct work_struct *work)
 {
 	struct nvme_ctrl *ctrl =
@@ -3182,6 +3245,12 @@
 
 	WARN_ON_ONCE(!ctrl->tagset);
 
+	if (test_and_clear_bit(EVENT_NS_CHANGED, &ctrl->events)) {
+		if (nvme_scan_changed_ns_log(ctrl))
+			goto out_sort_namespaces;
+		dev_info(ctrl->device, "rescanning namespaces.\n");
+	}
+
 	if (nvme_identify_ctrl(ctrl, &id))
 		return;
 
@@ -3189,26 +3258,17 @@
 	if (ctrl->vs >= NVME_VS(1, 1, 0) &&
 	    !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
 		if (!nvme_scan_ns_list(ctrl, nn))
-			goto done;
+			goto out_free_id;
 	}
 	nvme_scan_ns_sequential(ctrl, nn);
- done:
+out_free_id:
+	kfree(id);
+out_sort_namespaces:
 	down_write(&ctrl->namespaces_rwsem);
 	list_sort(NULL, &ctrl->namespaces, ns_cmp);
 	up_write(&ctrl->namespaces_rwsem);
-	kfree(id);
 }
 
-void nvme_queue_scan(struct nvme_ctrl *ctrl)
-{
-	/*
-	 * Only new queue scan work when admin and IO queues are both alive
-	 */
-	if (ctrl->state == NVME_CTRL_LIVE)
-		queue_work(nvme_wq, &ctrl->scan_work);
-}
-EXPORT_SYMBOL_GPL(nvme_queue_scan);
-
 /*
  * This function iterates the namespace list unlocked to allow recovery from
  * controller failure. It is up to the caller to ensure the namespace list is
@@ -3322,8 +3382,23 @@
 	nvme_get_fw_slot_info(ctrl);
 }
 
+static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
+{
+	switch ((result & 0xff00) >> 8) {
+	case NVME_AER_NOTICE_NS_CHANGED:
+		set_bit(EVENT_NS_CHANGED, &ctrl->events);
+		nvme_queue_scan(ctrl);
+		break;
+	case NVME_AER_NOTICE_FW_ACT_STARTING:
+		queue_work(nvme_wq, &ctrl->fw_act_work);
+		break;
+	default:
+		dev_warn(ctrl->device, "async event result %08x\n", result);
+	}
+}
+
 void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
-		union nvme_result *res)
+		volatile union nvme_result *res)
 {
 	u32 result = le32_to_cpu(res->u32);
 
@@ -3331,6 +3406,9 @@
 		return;
 
 	switch (result & 0x7) {
+	case NVME_AER_NOTICE:
+		nvme_handle_aen_notice(ctrl, result);
+		break;
 	case NVME_AER_ERROR:
 	case NVME_AER_SMART:
 	case NVME_AER_CSS:
@@ -3340,18 +3418,6 @@
 	default:
 		break;
 	}
-
-	switch (result & 0xff07) {
-	case NVME_AER_NOTICE_NS_CHANGED:
-		dev_info(ctrl->device, "rescanning\n");
-		nvme_queue_scan(ctrl);
-		break;
-	case NVME_AER_NOTICE_FW_ACT_STARTING:
-		queue_work(nvme_wq, &ctrl->fw_act_work);
-		break;
-	default:
-		dev_warn(ctrl->device, "async event result %08x\n", result);
-	}
 	queue_work(nvme_wq, &ctrl->async_event_work);
 }
 EXPORT_SYMBOL_GPL(nvme_complete_async_event);
@@ -3374,6 +3440,7 @@
 
 	if (ctrl->queue_count > 1) {
 		nvme_queue_scan(ctrl);
+		nvme_enable_aen(ctrl);
 		queue_work(nvme_wq, &ctrl->async_event_work);
 		nvme_start_queues(ctrl);
 	}
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 7ae732a..5f5f706 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -57,7 +57,7 @@
 		goto out_unlock;
 
 	kref_init(&host->ref);
-	memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
+	strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
 
 	list_add_tail(&host->list, &nvmf_hosts);
 out_unlock:
@@ -545,71 +545,54 @@
 		return BLK_STS_OK;
 
 	switch (ctrl->state) {
-	case NVME_CTRL_DELETING:
-		goto reject_io;
-
 	case NVME_CTRL_NEW:
 	case NVME_CTRL_CONNECTING:
+	case NVME_CTRL_DELETING:
+		/*
+		 * This is the case of starting a new or deleting an association
+		 * but connectivity was lost before it was fully created or torn
+		 * down. We need to error the commands used to initialize the
+		 * controller so the reconnect can go into a retry attempt.  The
+		 * commands should all be marked REQ_FAILFAST_DRIVER, which will
+		 * hit the reject path below. Anything else will be queued while
+		 * the state settles.
+		 */
 		if (!is_connected)
-			/*
-			 * This is the case of starting a new
-			 * association but connectivity was lost
-			 * before it was fully created. We need to
-			 * error the commands used to initialize the
-			 * controller so the reconnect can go into a
-			 * retry attempt. The commands should all be
-			 * marked REQ_FAILFAST_DRIVER, which will hit
-			 * the reject path below. Anything else will
-			 * be queued while the state settles.
-			 */
-			goto reject_or_queue_io;
+			break;
 
-		if ((queue_live &&
-		     !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) ||
-		    (!queue_live && blk_rq_is_passthrough(rq) &&
-		     cmd->common.opcode == nvme_fabrics_command &&
-		     cmd->fabrics.fctype == nvme_fabrics_type_connect))
-			/*
-			 * If queue is live, allow only commands that
-			 * are internally generated pass through. These
-			 * are commands on the admin queue to initialize
-			 * the controller. This will reject any ioctl
-			 * admin cmds received while initializing.
-			 *
-			 * If the queue is not live, allow only a
-			 * connect command. This will reject any ioctl
-			 * admin cmd as well as initialization commands
-			 * if the controller reverted the queue to non-live.
-			 */
+		/*
+		 * If queue is live, allow only commands that are internally
+		 * generated pass through.  These are commands on the admin
+		 * queue to initialize the controller. This will reject any
+		 * ioctl admin cmds received while initializing.
+		 */
+		if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD))
 			return BLK_STS_OK;
 
 		/*
-		 * fall-thru to the reject_or_queue_io clause
+		 * If the queue is not live, allow only a connect command.  This
+		 * will reject any ioctl admin cmd as well as initialization
+		 * commands if the controller reverted the queue to non-live.
 		 */
+		if (!queue_live && blk_rq_is_passthrough(rq) &&
+		     cmd->common.opcode == nvme_fabrics_command &&
+		     cmd->fabrics.fctype == nvme_fabrics_type_connect)
+			return BLK_STS_OK;
 		break;
-
-	/* these cases fall-thru
-	 * case NVME_CTRL_LIVE:
-	 * case NVME_CTRL_RESETTING:
-	 */
 	default:
 		break;
 	}
 
-reject_or_queue_io:
 	/*
-	 * Any other new io is something we're not in a state to send
-	 * to the device. Default action is to busy it and retry it
-	 * after the controller state is recovered. However, anything
-	 * marked for failfast or nvme multipath is immediately failed.
-	 * Note: commands used to initialize the controller will be
-	 *  marked for failfast.
+	 * Any other new io is something we're not in a state to send to the
+	 * device.  Default action is to busy it and retry it after the
+	 * controller state is recovered. However, anything marked for failfast
+	 * or nvme multipath is immediately failed.  Note: commands used to
+	 * initialize the controller will be marked for failfast.
 	 * Note: nvme cli/ioctl commands are marked for failfast.
 	 */
 	if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
 		return BLK_STS_RESOURCE;
-
-reject_io:
 	nvme_req(rq)->status = NVME_SC_ABORT_REQ;
 	return BLK_STS_IOERR;
 }
@@ -689,10 +672,6 @@
 			opts->discovery_nqn =
 				!(strcmp(opts->subsysnqn,
 					 NVME_DISC_SUBSYS_NAME));
-			if (opts->discovery_nqn) {
-				opts->kato = 0;
-				opts->nr_io_queues = 0;
-			}
 			break;
 		case NVMF_OPT_TRADDR:
 			p = match_strdup(args);
@@ -851,6 +830,11 @@
 		}
 	}
 
+	if (opts->discovery_nqn) {
+		opts->kato = 0;
+		opts->nr_io_queues = 0;
+		opts->duplicate_connect = true;
+	}
 	if (ctrl_loss_tmo < 0)
 		opts->max_reconnects = -1;
 	else
@@ -983,16 +967,6 @@
 		goto out_module_put;
 	}
 
-	if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) {
-		dev_warn(ctrl->device,
-			"controller returned incorrect NQN: \"%s\".\n",
-			ctrl->subsys->subnqn);
-		module_put(ops->module);
-		up_read(&nvmf_transports_rwsem);
-		nvme_delete_ctrl_sync(ctrl);
-		return ERR_PTR(-EINVAL);
-	}
-
 	module_put(ops->module);
 	up_read(&nvmf_transports_rwsem);
 	return ctrl;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index ef46c91..0cf0460 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -139,7 +139,9 @@
 nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
 			struct nvmf_ctrl_options *opts)
 {
-	if (strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
+	if (ctrl->state == NVME_CTRL_DELETING ||
+	    ctrl->state == NVME_CTRL_DEAD ||
+	    strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
 	    strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||
 	    memcmp(&opts->host->id, &ctrl->opts->host->id, sizeof(uuid_t)))
 		return false;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 6cb26bc..0bad658 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1686,16 +1686,6 @@
 		goto check_error;
 	}
 
-	/*
-	 * Force failures of commands if we're killing the controller
-	 * or have an error on a command used to create an new association
-	 */
-	if (status &&
-	    (blk_queue_dying(rq->q) ||
-	     ctrl->ctrl.state == NVME_CTRL_NEW ||
-	     ctrl->ctrl.state == NVME_CTRL_CONNECTING))
-		status |= cpu_to_le16(NVME_SC_DNR << 1);
-
 	__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
 	nvme_end_request(rq, status, result);
 
@@ -2403,9 +2393,6 @@
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 	struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
 
-	if (!blk_mq_request_started(req))
-		return;
-
 	__nvme_fc_abort_op(ctrl, op);
 }
 
@@ -3284,6 +3271,8 @@
 	}
 	spin_unlock_irqrestore(&nvme_fc_lock, flags);
 
+	pr_warn("%s: %s - %s combination not found\n",
+		__func__, opts->traddr, opts->host_traddr);
 	return ERR_PTR(-ENOENT);
 }
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 17d2f7c..de24fe7 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -22,6 +22,7 @@
 #include <linux/lightnvm.h>
 #include <linux/sed-opal.h>
 #include <linux/fault-inject.h>
+#include <linux/rcupdate.h>
 
 extern unsigned int nvme_io_timeout;
 #define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)
@@ -180,6 +181,7 @@
 	u16 kas;
 	u8 npss;
 	u8 apsta;
+	u32 oaes;
 	u32 aen_result;
 	unsigned int shutdown_timeout;
 	unsigned int kato;
@@ -192,6 +194,8 @@
 	struct delayed_work ka_work;
 	struct nvme_command ka_cmd;
 	struct work_struct fw_act_work;
+#define EVENT_NS_CHANGED		(1 << 0)
+	unsigned long events;
 
 	/* Power saving configuration */
 	u64 ps_max_latency_us;
@@ -398,14 +402,13 @@
 void nvme_put_ctrl(struct nvme_ctrl *ctrl);
 int nvme_init_identify(struct nvme_ctrl *ctrl);
 
-void nvme_queue_scan(struct nvme_ctrl *ctrl);
 void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
 
 int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
 		bool send);
 
 void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
-		union nvme_result *res);
+		volatile union nvme_result *res);
 
 void nvme_stop_queues(struct nvme_ctrl *ctrl);
 void nvme_start_queues(struct nvme_ctrl *ctrl);
@@ -454,7 +457,7 @@
 {
 	struct nvme_ns_head *head = ns->head;
 
-	if (head && ns == srcu_dereference(head->current_path, &head->srcu))
+	if (head && ns == rcu_access_pointer(head->current_path))
 		rcu_assign_pointer(head->current_path, NULL);
 }
 struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 17a0190..e526437 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/aer.h>
+#include <linux/async.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-mq-pci.h>
@@ -68,7 +69,6 @@
 struct nvme_dev;
 struct nvme_queue;
 
-static void nvme_process_cq(struct nvme_queue *nvmeq);
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
 
 /*
@@ -147,9 +147,10 @@
 struct nvme_queue {
 	struct device *q_dmadev;
 	struct nvme_dev *dev;
-	spinlock_t q_lock;
+	spinlock_t sq_lock;
 	struct nvme_command *sq_cmds;
 	struct nvme_command __iomem *sq_cmds_io;
+	spinlock_t cq_lock ____cacheline_aligned_in_smp;
 	volatile struct nvme_completion *cqes;
 	struct blk_mq_tags **tags;
 	dma_addr_t sq_dma_addr;
@@ -159,9 +160,9 @@
 	s16 cq_vector;
 	u16 sq_tail;
 	u16 cq_head;
+	u16 last_cq_head;
 	u16 qid;
 	u8 cq_phase;
-	u8 cqe_seen;
 	u32 *dbbuf_sq_db;
 	u32 *dbbuf_cq_db;
 	u32 *dbbuf_sq_ei;
@@ -420,28 +421,25 @@
 }
 
 /**
- * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
+ * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
  * @nvmeq: The queue to use
  * @cmd: The command to send
- *
- * Safe to use from interrupt context
  */
-static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
-						struct nvme_command *cmd)
+static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
 {
-	u16 tail = nvmeq->sq_tail;
-
+	spin_lock(&nvmeq->sq_lock);
 	if (nvmeq->sq_cmds_io)
-		memcpy_toio(&nvmeq->sq_cmds_io[tail], cmd, sizeof(*cmd));
+		memcpy_toio(&nvmeq->sq_cmds_io[nvmeq->sq_tail], cmd,
+				sizeof(*cmd));
 	else
-		memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
+		memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
 
-	if (++tail == nvmeq->q_depth)
-		tail = 0;
-	if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db,
-					      nvmeq->dbbuf_sq_ei))
-		writel(tail, nvmeq->q_db);
-	nvmeq->sq_tail = tail;
+	if (++nvmeq->sq_tail == nvmeq->q_depth)
+		nvmeq->sq_tail = 0;
+	if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
+			nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
+		writel(nvmeq->sq_tail, nvmeq->q_db);
+	spin_unlock(&nvmeq->sq_lock);
 }
 
 static void **nvme_pci_iod_list(struct request *req)
@@ -872,6 +870,13 @@
 	struct nvme_command cmnd;
 	blk_status_t ret;
 
+	/*
+	 * We should not need to do this, but we're still using this to
+	 * ensure we can drain requests on a dying queue.
+	 */
+	if (unlikely(nvmeq->cq_vector < 0))
+		return BLK_STS_IOERR;
+
 	ret = nvme_setup_cmd(ns, req, &cmnd);
 	if (ret)
 		return ret;
@@ -887,16 +892,7 @@
 	}
 
 	blk_mq_start_request(req);
-
-	spin_lock_irq(&nvmeq->q_lock);
-	if (unlikely(nvmeq->cq_vector < 0)) {
-		ret = BLK_STS_IOERR;
-		spin_unlock_irq(&nvmeq->q_lock);
-		goto out_cleanup_iod;
-	}
-	__nvme_submit_cmd(nvmeq, &cmnd);
-	nvme_process_cq(nvmeq);
-	spin_unlock_irq(&nvmeq->q_lock);
+	nvme_submit_cmd(nvmeq, &cmnd);
 	return BLK_STS_OK;
 out_cleanup_iod:
 	nvme_free_iod(dev, req);
@@ -914,10 +910,10 @@
 }
 
 /* We read the CQE phase first to check if the rest of the entry is valid */
-static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
-		u16 phase)
+static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
 {
-	return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
+	return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
+			nvmeq->cq_phase;
 }
 
 static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
@@ -931,9 +927,9 @@
 	}
 }
 
-static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
-		struct nvme_completion *cqe)
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 {
+	volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
 	struct request *req;
 
 	if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
@@ -956,83 +952,87 @@
 		return;
 	}
 
-	nvmeq->cqe_seen = 1;
 	req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
 	nvme_end_request(req, cqe->status, cqe->result);
 }
 
-static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
-		struct nvme_completion *cqe)
+static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
 {
-	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
-		*cqe = nvmeq->cqes[nvmeq->cq_head];
-
-		if (++nvmeq->cq_head == nvmeq->q_depth) {
-			nvmeq->cq_head = 0;
-			nvmeq->cq_phase = !nvmeq->cq_phase;
-		}
-		return true;
+	while (start != end) {
+		nvme_handle_cqe(nvmeq, start);
+		if (++start == nvmeq->q_depth)
+			start = 0;
 	}
-	return false;
 }
 
-static void nvme_process_cq(struct nvme_queue *nvmeq)
+static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
 {
-	struct nvme_completion cqe;
-	int consumed = 0;
-
-	while (nvme_read_cqe(nvmeq, &cqe)) {
-		nvme_handle_cqe(nvmeq, &cqe);
-		consumed++;
+	if (++nvmeq->cq_head == nvmeq->q_depth) {
+		nvmeq->cq_head = 0;
+		nvmeq->cq_phase = !nvmeq->cq_phase;
 	}
+}
 
-	if (consumed)
+static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
+		u16 *end, int tag)
+{
+	bool found = false;
+
+	*start = nvmeq->cq_head;
+	while (!found && nvme_cqe_pending(nvmeq)) {
+		if (nvmeq->cqes[nvmeq->cq_head].command_id == tag)
+			found = true;
+		nvme_update_cq_head(nvmeq);
+	}
+	*end = nvmeq->cq_head;
+
+	if (*start != *end)
 		nvme_ring_cq_doorbell(nvmeq);
+	return found;
 }
 
 static irqreturn_t nvme_irq(int irq, void *data)
 {
-	irqreturn_t result;
 	struct nvme_queue *nvmeq = data;
-	spin_lock(&nvmeq->q_lock);
-	nvme_process_cq(nvmeq);
-	result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE;
-	nvmeq->cqe_seen = 0;
-	spin_unlock(&nvmeq->q_lock);
-	return result;
+	irqreturn_t ret = IRQ_NONE;
+	u16 start, end;
+
+	spin_lock(&nvmeq->cq_lock);
+	if (nvmeq->cq_head != nvmeq->last_cq_head)
+		ret = IRQ_HANDLED;
+	nvme_process_cq(nvmeq, &start, &end, -1);
+	nvmeq->last_cq_head = nvmeq->cq_head;
+	spin_unlock(&nvmeq->cq_lock);
+
+	if (start != end) {
+		nvme_complete_cqes(nvmeq, start, end);
+		return IRQ_HANDLED;
+	}
+
+	return ret;
 }
 
 static irqreturn_t nvme_irq_check(int irq, void *data)
 {
 	struct nvme_queue *nvmeq = data;
-	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+	if (nvme_cqe_pending(nvmeq))
 		return IRQ_WAKE_THREAD;
 	return IRQ_NONE;
 }
 
 static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
 {
-	struct nvme_completion cqe;
-	int found = 0, consumed = 0;
+	u16 start, end;
+	bool found;
 
-	if (!nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+	if (!nvme_cqe_pending(nvmeq))
 		return 0;
 
-	spin_lock_irq(&nvmeq->q_lock);
-	while (nvme_read_cqe(nvmeq, &cqe)) {
-		nvme_handle_cqe(nvmeq, &cqe);
-		consumed++;
+	spin_lock_irq(&nvmeq->cq_lock);
+	found = nvme_process_cq(nvmeq, &start, &end, tag);
+	spin_unlock_irq(&nvmeq->cq_lock);
 
-		if (tag == cqe.command_id) {
-			found = 1;
-			break;
-		}
-       }
-
-	if (consumed)
-		nvme_ring_cq_doorbell(nvmeq);
-	spin_unlock_irq(&nvmeq->q_lock);
-
+	nvme_complete_cqes(nvmeq, start, end);
 	return found;
 }
 
@@ -1052,10 +1052,7 @@
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = nvme_admin_async_event;
 	c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
-
-	spin_lock_irq(&nvmeq->q_lock);
-	__nvme_submit_cmd(nvmeq, &c);
-	spin_unlock_irq(&nvmeq->q_lock);
+	nvme_submit_cmd(nvmeq, &c);
 }
 
 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
@@ -1070,7 +1067,7 @@
 }
 
 static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
-						struct nvme_queue *nvmeq)
+		struct nvme_queue *nvmeq, s16 vector)
 {
 	struct nvme_command c;
 	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
@@ -1085,7 +1082,7 @@
 	c.create_cq.cqid = cpu_to_le16(qid);
 	c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
 	c.create_cq.cq_flags = cpu_to_le16(flags);
-	c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
+	c.create_cq.irq_vector = cpu_to_le16(vector);
 
 	return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
 }
@@ -1208,7 +1205,7 @@
 		nvme_warn_reset(dev, csts);
 		nvme_dev_disable(dev, false);
 		nvme_reset_ctrl(&dev->ctrl);
-		return BLK_EH_HANDLED;
+		return BLK_EH_DONE;
 	}
 
 	/*
@@ -1218,24 +1215,24 @@
 		dev_warn(dev->ctrl.device,
 			 "I/O %d QID %d timeout, completion polled\n",
 			 req->tag, nvmeq->qid);
-		return BLK_EH_HANDLED;
+		return BLK_EH_DONE;
 	}
 
 	/*
 	 * Shutdown immediately if controller times out while starting. The
 	 * reset work will see the pci device disabled when it gets the forced
 	 * cancellation error. All outstanding requests are completed on
-	 * shutdown, so we return BLK_EH_HANDLED.
+	 * shutdown, so we return BLK_EH_DONE.
 	 */
 	switch (dev->ctrl.state) {
 	case NVME_CTRL_CONNECTING:
 	case NVME_CTRL_RESETTING:
-		dev_warn(dev->ctrl.device,
+		dev_warn_ratelimited(dev->ctrl.device,
 			 "I/O %d QID %d timeout, disable controller\n",
 			 req->tag, nvmeq->qid);
 		nvme_dev_disable(dev, false);
 		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
-		return BLK_EH_HANDLED;
+		return BLK_EH_DONE;
 	default:
 		break;
 	}
@@ -1252,12 +1249,8 @@
 		nvme_dev_disable(dev, false);
 		nvme_reset_ctrl(&dev->ctrl);
 
-		/*
-		 * Mark the request as handled, since the inline shutdown
-		 * forces all outstanding requests to complete.
-		 */
 		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
-		return BLK_EH_HANDLED;
+		return BLK_EH_DONE;
 	}
 
 	if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
@@ -1321,15 +1314,21 @@
 {
 	int vector;
 
-	spin_lock_irq(&nvmeq->q_lock);
+	spin_lock_irq(&nvmeq->cq_lock);
 	if (nvmeq->cq_vector == -1) {
-		spin_unlock_irq(&nvmeq->q_lock);
+		spin_unlock_irq(&nvmeq->cq_lock);
 		return 1;
 	}
 	vector = nvmeq->cq_vector;
 	nvmeq->dev->online_queues--;
 	nvmeq->cq_vector = -1;
-	spin_unlock_irq(&nvmeq->q_lock);
+	spin_unlock_irq(&nvmeq->cq_lock);
+
+	/*
+	 * Ensure that nvme_queue_rq() sees it ->cq_vector == -1 without
+	 * having to grab the lock.
+	 */
+	mb();
 
 	if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
 		blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
@@ -1342,15 +1341,18 @@
 static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
 {
 	struct nvme_queue *nvmeq = &dev->queues[0];
+	u16 start, end;
 
 	if (shutdown)
 		nvme_shutdown_ctrl(&dev->ctrl);
 	else
 		nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
 
-	spin_lock_irq(&nvmeq->q_lock);
-	nvme_process_cq(nvmeq);
-	spin_unlock_irq(&nvmeq->q_lock);
+	spin_lock_irq(&nvmeq->cq_lock);
+	nvme_process_cq(nvmeq, &start, &end, -1);
+	spin_unlock_irq(&nvmeq->cq_lock);
+
+	nvme_complete_cqes(nvmeq, start, end);
 }
 
 static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
@@ -1408,7 +1410,8 @@
 
 	nvmeq->q_dmadev = dev->dev;
 	nvmeq->dev = dev;
-	spin_lock_init(&nvmeq->q_lock);
+	spin_lock_init(&nvmeq->sq_lock);
+	spin_lock_init(&nvmeq->cq_lock);
 	nvmeq->cq_head = 0;
 	nvmeq->cq_phase = 1;
 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
@@ -1444,7 +1447,7 @@
 {
 	struct nvme_dev *dev = nvmeq->dev;
 
-	spin_lock_irq(&nvmeq->q_lock);
+	spin_lock_irq(&nvmeq->cq_lock);
 	nvmeq->sq_tail = 0;
 	nvmeq->cq_head = 0;
 	nvmeq->cq_phase = 1;
@@ -1452,13 +1455,14 @@
 	memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
 	nvme_dbbuf_init(dev, nvmeq, qid);
 	dev->online_queues++;
-	spin_unlock_irq(&nvmeq->q_lock);
+	spin_unlock_irq(&nvmeq->cq_lock);
 }
 
 static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 {
 	struct nvme_dev *dev = nvmeq->dev;
 	int result;
+	s16 vector;
 
 	if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
 		unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
@@ -1471,15 +1475,21 @@
 	 * A queue's vector matches the queue identifier unless the controller
 	 * has only one vector available.
 	 */
-	nvmeq->cq_vector = dev->num_vecs == 1 ? 0 : qid;
-	result = adapter_alloc_cq(dev, qid, nvmeq);
+	vector = dev->num_vecs == 1 ? 0 : qid;
+	result = adapter_alloc_cq(dev, qid, nvmeq, vector);
 	if (result < 0)
-		goto release_vector;
+		goto out;
 
 	result = adapter_alloc_sq(dev, qid, nvmeq);
 	if (result < 0)
 		goto release_cq;
 
+	/*
+	 * Set cq_vector after alloc cq/sq, otherwise nvme_suspend_queue will
+	 * invoke free_irq for it and cause a 'Trying to free already-free IRQ
+	 * xxx' warning if the create CQ/SQ command times out.
+	 */
+	nvmeq->cq_vector = vector;
 	nvme_init_queue(nvmeq, qid);
 	result = queue_request_irq(nvmeq);
 	if (result < 0)
@@ -1487,13 +1497,13 @@
 
 	return result;
 
- release_sq:
+release_sq:
+	nvmeq->cq_vector = -1;
 	dev->online_queues--;
 	adapter_delete_sq(dev, qid);
- release_cq:
+release_cq:
 	adapter_delete_cq(dev, qid);
- release_vector:
-	nvmeq->cq_vector = -1;
+out:
 	return result;
 }
 
@@ -1997,19 +2007,22 @@
 static void nvme_del_cq_end(struct request *req, blk_status_t error)
 {
 	struct nvme_queue *nvmeq = req->end_io_data;
+	u16 start, end;
 
 	if (!error) {
 		unsigned long flags;
 
 		/*
-		 * We might be called with the AQ q_lock held
-		 * and the I/O queue q_lock should always
+		 * We might be called with the AQ cq_lock held
+		 * and the I/O queue cq_lock should always
 		 * nest inside the AQ one.
 		 */
-		spin_lock_irqsave_nested(&nvmeq->q_lock, flags,
+		spin_lock_irqsave_nested(&nvmeq->cq_lock, flags,
 					SINGLE_DEPTH_NESTING);
-		nvme_process_cq(nvmeq);
-		spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+		nvme_process_cq(nvmeq, &start, &end, -1);
+		spin_unlock_irqrestore(&nvmeq->cq_lock, flags);
+
+		nvme_complete_cqes(nvmeq, start, end);
 	}
 
 	nvme_del_queue_end(req, error);
@@ -2497,6 +2510,15 @@
 	return 0;
 }
 
+static void nvme_async_probe(void *data, async_cookie_t cookie)
+{
+	struct nvme_dev *dev = data;
+
+	nvme_reset_ctrl_sync(&dev->ctrl);
+	flush_work(&dev->ctrl.scan_work);
+	nvme_put_ctrl(&dev->ctrl);
+}
+
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int node, result = -ENOMEM;
@@ -2541,7 +2563,8 @@
 
 	dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
 
-	nvme_reset_ctrl(&dev->ctrl);
+	nvme_get_ctrl(&dev->ctrl);
+	async_schedule(nvme_async_probe, dev);
 
 	return 0;
 
@@ -2685,6 +2708,9 @@
 
 static void nvme_error_resume(struct pci_dev *pdev)
 {
+	struct nvme_dev *dev = pci_get_drvdata(pdev);
+
+	flush_work(&dev->ctrl.reset_work);
 	pci_cleanup_aer_uncorrect_error_status(pdev);
 }
 
@@ -2714,6 +2740,8 @@
 				NVME_QUIRK_MEDIUM_PRIO_SQ },
 	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
 		.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
+	{ PCI_DEVICE(0x1bb1, 0x0100),   /* Seagate Nytro Flash Storage */
+		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE(0x1c58, 0x0003),	/* HGST adapter */
 		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE(0x1c58, 0x0023),	/* WDC SN200 adapter */
@@ -2728,6 +2756,8 @@
 		.driver_data = NVME_QUIRK_LIGHTNVM, },
 	{ PCI_DEVICE(0x1d1d, 0x2807),	/* CNEX WL */
 		.driver_data = NVME_QUIRK_LIGHTNVM, },
+	{ PCI_DEVICE(0x1d1d, 0x2601),	/* CNEX Granby */
+		.driver_data = NVME_QUIRK_LIGHTNVM, },
 	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 1eb4438..7b3f084 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -778,7 +778,7 @@
 	if (error) {
 		dev_err(ctrl->ctrl.device,
 			"prop_get NVME_REG_CAP failed\n");
-		goto out_cleanup_queue;
+		goto out_stop_queue;
 	}
 
 	ctrl->ctrl.sqsize =
@@ -786,23 +786,25 @@
 
 	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 	if (error)
-		goto out_cleanup_queue;
+		goto out_stop_queue;
 
 	ctrl->ctrl.max_hw_sectors =
 		(ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9);
 
 	error = nvme_init_identify(&ctrl->ctrl);
 	if (error)
-		goto out_cleanup_queue;
+		goto out_stop_queue;
 
 	error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
 			&ctrl->async_event_sqe, sizeof(struct nvme_command),
 			DMA_TO_DEVICE);
 	if (error)
-		goto out_cleanup_queue;
+		goto out_stop_queue;
 
 	return 0;
 
+out_stop_queue:
+	nvme_rdma_stop_queue(&ctrl->queues[0]);
 out_cleanup_queue:
 	if (new)
 		blk_cleanup_queue(ctrl->ctrl.admin_q);
@@ -1598,7 +1600,7 @@
 	/* fail with DNR on cmd timeout */
 	nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
 
-	return BLK_EH_HANDLED;
+	return BLK_EH_DONE;
 }
 
 static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
index ea91fcc..01390f0 100644
--- a/drivers/nvme/host/trace.h
+++ b/drivers/nvme/host/trace.h
@@ -148,8 +148,8 @@
 		    __entry->flags = nvme_req(req)->flags;
 		    __entry->status = nvme_req(req)->status;
 	    ),
-	    TP_printk("cmdid=%u, qid=%d, res=%llu, retries=%u, flags=0x%x, status=%u",
-		      __entry->cid, __entry->qid, __entry->result,
+	    TP_printk("qid=%d, cmdid=%u, res=%llu, retries=%u, flags=0x%x, status=%u",
+		      __entry->qid, __entry->cid, __entry->result,
 		      __entry->retries, __entry->flags, __entry->status)
 
 );
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index 4882501..8118c93 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -6,8 +6,8 @@
 obj-$(CONFIG_NVME_TARGET_FC)		+= nvmet-fc.o
 obj-$(CONFIG_NVME_TARGET_FCLOOP)	+= nvme-fcloop.o
 
-nvmet-y		+= core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \
-			discovery.o
+nvmet-y		+= core.o configfs.o admin-cmd.o fabrics-cmd.o \
+			discovery.o io-cmd-file.o io-cmd-bdev.o
 nvme-loop-y	+= loop.o
 nvmet-rdma-y	+= rdma.o
 nvmet-fc-y	+= fc.o
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 5e0e9fc..ead8fbe 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -32,6 +32,11 @@
 	return len;
 }
 
+static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
+{
+	nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
+}
+
 static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
 		struct nvme_smart_log *slog)
 {
@@ -45,6 +50,10 @@
 		return NVME_SC_INVALID_NS;
 	}
 
+	/* we don't have the right data for file backed ns */
+	if (!ns->bdev)
+		goto out;
+
 	host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]);
 	data_units_read = part_stat_read(ns->bdev->bd_part, sectors[READ]);
 	host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]);
@@ -54,6 +63,7 @@
 	put_unaligned_le64(data_units_read, &slog->data_units_read[0]);
 	put_unaligned_le64(host_writes, &slog->host_writes[0]);
 	put_unaligned_le64(data_units_written, &slog->data_units_written[0]);
+out:
 	nvmet_put_namespace(ns);
 
 	return NVME_SC_SUCCESS;
@@ -71,6 +81,9 @@
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
+		/* we don't have the right data for file backed ns */
+		if (!ns->bdev)
+			continue;
 		host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]);
 		data_units_read +=
 			part_stat_read(ns->bdev->bd_part, sectors[READ]);
@@ -89,74 +102,50 @@
 	return NVME_SC_SUCCESS;
 }
 
-static u16 nvmet_get_smart_log(struct nvmet_req *req,
-		struct nvme_smart_log *slog)
+static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
 {
-	u16 status;
+	struct nvme_smart_log *log;
+	u16 status = NVME_SC_INTERNAL;
 
-	WARN_ON(req == NULL || slog == NULL);
+	if (req->data_len != sizeof(*log))
+		goto out;
+
+	log = kzalloc(sizeof(*log), GFP_KERNEL);
+	if (!log)
+		goto out;
+
 	if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL))
-		status = nvmet_get_smart_log_all(req, slog);
+		status = nvmet_get_smart_log_all(req, log);
 	else
-		status = nvmet_get_smart_log_nsid(req, slog);
-	return status;
+		status = nvmet_get_smart_log_nsid(req, log);
+	if (status)
+		goto out;
+
+	status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
+out:
+	nvmet_req_complete(req, status);
 }
 
-static void nvmet_execute_get_log_page(struct nvmet_req *req)
+static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
 {
-	struct nvme_smart_log *smart_log;
-	size_t data_len = nvmet_get_log_page_len(req->cmd);
-	void *buf;
-	u16 status = 0;
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	u16 status = NVME_SC_INTERNAL;
+	size_t len;
 
-	buf = kzalloc(data_len, GFP_KERNEL);
-	if (!buf) {
-		status = NVME_SC_INTERNAL;
+	if (req->data_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32))
 		goto out;
-	}
 
-	switch (req->cmd->get_log_page.lid) {
-	case NVME_LOG_ERROR:
-		/*
-		 * We currently never set the More bit in the status field,
-		 * so all error log entries are invalid and can be zeroed out.
-		 * This is called a minum viable implementation (TM) of this
-		 * mandatory log page.
-		 */
-		break;
-	case NVME_LOG_SMART:
-		/*
-		 * XXX: fill out actual smart log
-		 *
-		 * We might have a hard time coming up with useful values for
-		 * many of the fields, and even when we have useful data
-		 * available (e.g. units or commands read/written) those aren't
-		 * persistent over power loss.
-		 */
-		if (data_len != sizeof(*smart_log)) {
-			status = NVME_SC_INTERNAL;
-			goto err;
-		}
-		smart_log = buf;
-		status = nvmet_get_smart_log(req, smart_log);
-		if (status)
-			goto err;
-		break;
-	case NVME_LOG_FW_SLOT:
-		/*
-		 * We only support a single firmware slot which always is
-		 * active, so we can zero out the whole firmware slot log and
-		 * still claim to fully implement this mandatory log page.
-		 */
-		break;
-	default:
-		BUG();
-	}
-
-	status = nvmet_copy_to_sgl(req, 0, buf, data_len);
-
-err:
-	kfree(buf);
+	mutex_lock(&ctrl->lock);
+	if (ctrl->nr_changed_ns == U32_MAX)
+		len = sizeof(__le32);
+	else
+		len = ctrl->nr_changed_ns * sizeof(__le32);
+	status = nvmet_copy_to_sgl(req, 0, ctrl->changed_ns_list, len);
+	if (!status)
+		status = nvmet_zero_sgl(req, len, req->data_len - len);
+	ctrl->nr_changed_ns = 0;
+	clear_bit(NVME_AEN_CFG_NS_ATTR, &ctrl->aen_masked);
+	mutex_unlock(&ctrl->lock);
 out:
 	nvmet_req_complete(req, status);
 }
@@ -201,7 +190,7 @@
 	id->ver = cpu_to_le32(ctrl->subsys->ver);
 
 	/* XXX: figure out what to do about RTD3R/RTD3 */
-	id->oaes = cpu_to_le32(1 << 8);
+	id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL);
 	id->ctratt = cpu_to_le32(1 << 0);
 
 	id->oacs = 0;
@@ -447,6 +436,16 @@
 		req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
 		nvmet_set_result(req, req->sq->ctrl->kato);
 		break;
+	case NVME_FEAT_ASYNC_EVENT:
+		val32 = le32_to_cpu(req->cmd->common.cdw10[1]);
+		if (val32 & ~NVMET_AEN_CFG_ALL) {
+			status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+			break;
+		}
+
+		WRITE_ONCE(req->sq->ctrl->aen_enabled, val32);
+		nvmet_set_result(req, val32);
+		break;
 	case NVME_FEAT_HOST_ID:
 		status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
 		break;
@@ -485,9 +484,10 @@
 		break;
 	case NVME_FEAT_WRITE_ATOMIC:
 		break;
-	case NVME_FEAT_ASYNC_EVENT:
-		break;
 #endif
+	case NVME_FEAT_ASYNC_EVENT:
+		nvmet_set_result(req, READ_ONCE(req->sq->ctrl->aen_enabled));
+		break;
 	case NVME_FEAT_VOLATILE_WC:
 		nvmet_set_result(req, 1);
 		break;
@@ -548,8 +548,6 @@
 	struct nvme_command *cmd = req->cmd;
 	u16 ret;
 
-	req->ns = NULL;
-
 	ret = nvmet_check_ctrl_status(req, cmd);
 	if (unlikely(ret))
 		return ret;
@@ -560,9 +558,28 @@
 
 		switch (cmd->get_log_page.lid) {
 		case NVME_LOG_ERROR:
+			/*
+			 * We currently never set the More bit in the status
+			 * field, so all error log entries are invalid and can
+			 * be zeroed out.  This is called a minum viable
+			 * implementation (TM) of this mandatory log page.
+			 */
+			req->execute = nvmet_execute_get_log_page_noop;
+			return 0;
 		case NVME_LOG_SMART:
+			req->execute = nvmet_execute_get_log_page_smart;
+			return 0;
 		case NVME_LOG_FW_SLOT:
-			req->execute = nvmet_execute_get_log_page;
+			/*
+			 * We only support a single firmware slot which always
+			 * is active, so we can zero out the whole firmware slot
+			 * log and still claim to fully implement this mandatory
+			 * log page.
+			 */
+			req->execute = nvmet_execute_get_log_page_noop;
+			return 0;
+		case NVME_LOG_CHANGED_NS:
+			req->execute = nvmet_execute_get_log_changed_ns;
 			return 0;
 		}
 		break;
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index e95424f..a03da76 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -57,6 +57,13 @@
 	return 0;
 }
 
+u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
+{
+	if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len)
+		return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
+	return 0;
+}
+
 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
 {
 	struct nvmet_ns *ns;
@@ -137,6 +144,51 @@
 	schedule_work(&ctrl->async_event_work);
 }
 
+static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen)
+{
+	if (!(READ_ONCE(ctrl->aen_enabled) & aen))
+		return true;
+	return test_and_set_bit(aen, &ctrl->aen_masked);
+}
+
+static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
+{
+	u32 i;
+
+	mutex_lock(&ctrl->lock);
+	if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES)
+		goto out_unlock;
+
+	for (i = 0; i < ctrl->nr_changed_ns; i++) {
+		if (ctrl->changed_ns_list[i] == nsid)
+			goto out_unlock;
+	}
+
+	if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) {
+		ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff);
+		ctrl->nr_changed_ns = U32_MAX;
+		goto out_unlock;
+	}
+
+	ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid;
+out_unlock:
+	mutex_unlock(&ctrl->lock);
+}
+
+static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
+{
+	struct nvmet_ctrl *ctrl;
+
+	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+		nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
+		if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR))
+			continue;
+		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
+				NVME_AER_NOTICE_NS_CHANGED,
+				NVME_LOG_CHANGED_NS);
+	}
+}
+
 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
 {
 	int ret = 0;
@@ -271,33 +323,31 @@
 	percpu_ref_put(&ns->ref);
 }
 
+static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
+{
+	nvmet_bdev_ns_disable(ns);
+	nvmet_file_ns_disable(ns);
+}
+
 int nvmet_ns_enable(struct nvmet_ns *ns)
 {
 	struct nvmet_subsys *subsys = ns->subsys;
-	struct nvmet_ctrl *ctrl;
 	int ret = 0;
 
 	mutex_lock(&subsys->lock);
 	if (ns->enabled)
 		goto out_unlock;
 
-	ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
-			NULL);
-	if (IS_ERR(ns->bdev)) {
-		pr_err("failed to open block device %s: (%ld)\n",
-		       ns->device_path, PTR_ERR(ns->bdev));
-		ret = PTR_ERR(ns->bdev);
-		ns->bdev = NULL;
+	ret = nvmet_bdev_ns_enable(ns);
+	if (ret)
+		ret = nvmet_file_ns_enable(ns);
+	if (ret)
 		goto out_unlock;
-	}
-
-	ns->size = i_size_read(ns->bdev->bd_inode);
-	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
 
 	ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
 				0, GFP_KERNEL);
 	if (ret)
-		goto out_blkdev_put;
+		goto out_dev_put;
 
 	if (ns->nsid > subsys->max_nsid)
 		subsys->max_nsid = ns->nsid;
@@ -320,24 +370,20 @@
 		list_add_tail_rcu(&ns->dev_link, &old->dev_link);
 	}
 
-	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
-		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
-
+	nvmet_ns_changed(subsys, ns->nsid);
 	ns->enabled = true;
 	ret = 0;
 out_unlock:
 	mutex_unlock(&subsys->lock);
 	return ret;
-out_blkdev_put:
-	blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
-	ns->bdev = NULL;
+out_dev_put:
+	nvmet_ns_dev_disable(ns);
 	goto out_unlock;
 }
 
 void nvmet_ns_disable(struct nvmet_ns *ns)
 {
 	struct nvmet_subsys *subsys = ns->subsys;
-	struct nvmet_ctrl *ctrl;
 
 	mutex_lock(&subsys->lock);
 	if (!ns->enabled)
@@ -363,11 +409,8 @@
 	percpu_ref_exit(&ns->ref);
 
 	mutex_lock(&subsys->lock);
-	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
-		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
-
-	if (ns->bdev)
-		blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
+	nvmet_ns_changed(subsys, ns->nsid);
+	nvmet_ns_dev_disable(ns);
 out_unlock:
 	mutex_unlock(&subsys->lock);
 }
@@ -499,6 +542,25 @@
 }
 EXPORT_SYMBOL_GPL(nvmet_sq_init);
 
+static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+	u16 ret;
+
+	ret = nvmet_check_ctrl_status(req, cmd);
+	if (unlikely(ret))
+		return ret;
+
+	req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
+	if (unlikely(!req->ns))
+		return NVME_SC_INVALID_NS | NVME_SC_DNR;
+
+	if (req->ns->file)
+		return nvmet_file_parse_io_cmd(req);
+	else
+		return nvmet_bdev_parse_io_cmd(req);
+}
+
 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 		struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
 {
@@ -710,15 +772,14 @@
 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
 {
 	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
-		pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n",
+		pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
 		       cmd->common.opcode, req->sq->qid);
 		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
 	}
 
 	if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
-		pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n",
+		pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
 		       cmd->common.opcode, req->sq->qid);
-		req->ns = NULL;
 		return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
 	}
 	return 0;
@@ -809,12 +870,18 @@
 
 	kref_init(&ctrl->ref);
 	ctrl->subsys = subsys;
+	WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
+
+	ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
+			sizeof(__le32), GFP_KERNEL);
+	if (!ctrl->changed_ns_list)
+		goto out_free_ctrl;
 
 	ctrl->cqs = kcalloc(subsys->max_qid + 1,
 			sizeof(struct nvmet_cq *),
 			GFP_KERNEL);
 	if (!ctrl->cqs)
-		goto out_free_ctrl;
+		goto out_free_changed_ns_list;
 
 	ctrl->sqs = kcalloc(subsys->max_qid + 1,
 			sizeof(struct nvmet_sq *),
@@ -872,6 +939,8 @@
 	kfree(ctrl->sqs);
 out_free_cqs:
 	kfree(ctrl->cqs);
+out_free_changed_ns_list:
+	kfree(ctrl->changed_ns_list);
 out_free_ctrl:
 	kfree(ctrl);
 out_put_subsystem:
@@ -898,6 +967,7 @@
 
 	kfree(ctrl->sqs);
 	kfree(ctrl->cqs);
+	kfree(ctrl->changed_ns_list);
 	kfree(ctrl);
 
 	nvmet_subsys_put(subsys);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 231e04e..08656b8 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -187,8 +187,6 @@
 {
 	struct nvme_command *cmd = req->cmd;
 
-	req->ns = NULL;
-
 	if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
 		pr_err("got cmd %d while not ready\n",
 		       cmd->common.opcode);
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 19e9e42..d84ae004 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -77,8 +77,6 @@
 {
 	struct nvme_command *cmd = req->cmd;
 
-	req->ns = NULL;
-
 	switch (cmd->fabrics.fctype) {
 	case nvme_fabrics_type_property_set:
 		req->data_len = 0;
@@ -242,8 +240,6 @@
 {
 	struct nvme_command *cmd = req->cmd;
 
-	req->ns = NULL;
-
 	if (cmd->common.opcode != nvme_fabrics_command) {
 		pr_err("invalid command 0x%x on unconnected queue.\n",
 			cmd->fabrics.opcode);
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 33ee8d3..408279c 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -31,7 +31,7 @@
 /* *************************** Data Structures/Defines ****************** */
 
 
-#define NVMET_LS_CTX_COUNT		4
+#define NVMET_LS_CTX_COUNT		256
 
 /* for this implementation, assume small single frame rqst/rsp */
 #define NVME_FC_MAX_LS_BUFFER_SIZE		2048
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
new file mode 100644
index 0000000..e0b0f7d
--- /dev/null
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -0,0 +1,241 @@
+/*
+ * NVMe I/O command implementation.
+ * Copyright (c) 2015-2016 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include "nvmet.h"
+
+int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
+{
+	int ret;
+
+	ns->bdev = blkdev_get_by_path(ns->device_path,
+			FMODE_READ | FMODE_WRITE, NULL);
+	if (IS_ERR(ns->bdev)) {
+		ret = PTR_ERR(ns->bdev);
+		if (ret != -ENOTBLK) {
+			pr_err("failed to open block device %s: (%ld)\n",
+					ns->device_path, PTR_ERR(ns->bdev));
+		}
+		ns->bdev = NULL;
+		return ret;
+	}
+	ns->size = i_size_read(ns->bdev->bd_inode);
+	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+	return 0;
+}
+
+void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
+{
+	if (ns->bdev) {
+		blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
+		ns->bdev = NULL;
+	}
+}
+
+static void nvmet_bio_done(struct bio *bio)
+{
+	struct nvmet_req *req = bio->bi_private;
+
+	nvmet_req_complete(req,
+		bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+
+	if (bio != &req->b.inline_bio)
+		bio_put(bio);
+}
+
+static void nvmet_bdev_execute_rw(struct nvmet_req *req)
+{
+	int sg_cnt = req->sg_cnt;
+	struct bio *bio = &req->b.inline_bio;
+	struct scatterlist *sg;
+	sector_t sector;
+	blk_qc_t cookie;
+	int op, op_flags = 0, i;
+
+	if (!req->sg_cnt) {
+		nvmet_req_complete(req, 0);
+		return;
+	}
+
+	if (req->cmd->rw.opcode == nvme_cmd_write) {
+		op = REQ_OP_WRITE;
+		op_flags = REQ_SYNC | REQ_IDLE;
+		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+			op_flags |= REQ_FUA;
+	} else {
+		op = REQ_OP_READ;
+	}
+
+	sector = le64_to_cpu(req->cmd->rw.slba);
+	sector <<= (req->ns->blksize_shift - 9);
+
+	bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+	bio_set_dev(bio, req->ns->bdev);
+	bio->bi_iter.bi_sector = sector;
+	bio->bi_private = req;
+	bio->bi_end_io = nvmet_bio_done;
+	bio_set_op_attrs(bio, op, op_flags);
+
+	for_each_sg(req->sg, sg, req->sg_cnt, i) {
+		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
+				!= sg->length) {
+			struct bio *prev = bio;
+
+			bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+			bio_set_dev(bio, req->ns->bdev);
+			bio->bi_iter.bi_sector = sector;
+			bio_set_op_attrs(bio, op, op_flags);
+
+			bio_chain(bio, prev);
+			submit_bio(prev);
+		}
+
+		sector += sg->length >> 9;
+		sg_cnt--;
+	}
+
+	cookie = submit_bio(bio);
+
+	blk_poll(bdev_get_queue(req->ns->bdev), cookie);
+}
+
+static void nvmet_bdev_execute_flush(struct nvmet_req *req)
+{
+	struct bio *bio = &req->b.inline_bio;
+
+	bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
+	bio_set_dev(bio, req->ns->bdev);
+	bio->bi_private = req;
+	bio->bi_end_io = nvmet_bio_done;
+	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+
+	submit_bio(bio);
+}
+
+static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns,
+		struct nvme_dsm_range *range, struct bio **bio)
+{
+	int ret;
+
+	ret = __blkdev_issue_discard(ns->bdev,
+			le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
+			le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
+			GFP_KERNEL, 0, bio);
+	if (ret && ret != -EOPNOTSUPP)
+		return NVME_SC_INTERNAL | NVME_SC_DNR;
+	return 0;
+}
+
+static void nvmet_bdev_execute_discard(struct nvmet_req *req)
+{
+	struct nvme_dsm_range range;
+	struct bio *bio = NULL;
+	int i;
+	u16 status;
+
+	for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
+		status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
+				sizeof(range));
+		if (status)
+			break;
+
+		status = nvmet_bdev_discard_range(req->ns, &range, &bio);
+		if (status)
+			break;
+	}
+
+	if (bio) {
+		bio->bi_private = req;
+		bio->bi_end_io = nvmet_bio_done;
+		if (status) {
+			bio->bi_status = BLK_STS_IOERR;
+			bio_endio(bio);
+		} else {
+			submit_bio(bio);
+		}
+	} else {
+		nvmet_req_complete(req, status);
+	}
+}
+
+static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
+{
+	switch (le32_to_cpu(req->cmd->dsm.attributes)) {
+	case NVME_DSMGMT_AD:
+		nvmet_bdev_execute_discard(req);
+		return;
+	case NVME_DSMGMT_IDR:
+	case NVME_DSMGMT_IDW:
+	default:
+		/* Not supported yet */
+		nvmet_req_complete(req, 0);
+		return;
+	}
+}
+
+static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
+{
+	struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
+	struct bio *bio = NULL;
+	u16 status = NVME_SC_SUCCESS;
+	sector_t sector;
+	sector_t nr_sector;
+
+	sector = le64_to_cpu(write_zeroes->slba) <<
+		(req->ns->blksize_shift - 9);
+	nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
+		(req->ns->blksize_shift - 9));
+
+	if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
+				GFP_KERNEL, &bio, 0))
+		status = NVME_SC_INTERNAL | NVME_SC_DNR;
+
+	if (bio) {
+		bio->bi_private = req;
+		bio->bi_end_io = nvmet_bio_done;
+		submit_bio(bio);
+	} else {
+		nvmet_req_complete(req, status);
+	}
+}
+
+u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+
+	switch (cmd->common.opcode) {
+	case nvme_cmd_read:
+	case nvme_cmd_write:
+		req->execute = nvmet_bdev_execute_rw;
+		req->data_len = nvmet_rw_len(req);
+		return 0;
+	case nvme_cmd_flush:
+		req->execute = nvmet_bdev_execute_flush;
+		req->data_len = 0;
+		return 0;
+	case nvme_cmd_dsm:
+		req->execute = nvmet_bdev_execute_dsm;
+		req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
+			sizeof(struct nvme_dsm_range);
+		return 0;
+	case nvme_cmd_write_zeroes:
+		req->execute = nvmet_bdev_execute_write_zeroes;
+		return 0;
+	default:
+		pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
+		       req->sq->qid);
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+}
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
new file mode 100644
index 0000000..8c42b3a
--- /dev/null
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVMe Over Fabrics Target File I/O commands implementation.
+ * Copyright (c) 2017-2018 Western Digital Corporation or its
+ * affiliates.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/uio.h>
+#include <linux/falloc.h>
+#include <linux/file.h>
+#include "nvmet.h"
+
+#define NVMET_MAX_MPOOL_BVEC		16
+#define NVMET_MIN_MPOOL_OBJ		16
+
+void nvmet_file_ns_disable(struct nvmet_ns *ns)
+{
+	if (ns->file) {
+		mempool_destroy(ns->bvec_pool);
+		ns->bvec_pool = NULL;
+		kmem_cache_destroy(ns->bvec_cache);
+		ns->bvec_cache = NULL;
+		fput(ns->file);
+		ns->file = NULL;
+	}
+}
+
+int nvmet_file_ns_enable(struct nvmet_ns *ns)
+{
+	int ret;
+	struct kstat stat;
+
+	ns->file = filp_open(ns->device_path,
+			O_RDWR | O_LARGEFILE | O_DIRECT, 0);
+	if (IS_ERR(ns->file)) {
+		pr_err("failed to open file %s: (%ld)\n",
+				ns->device_path, PTR_ERR(ns->file));
+		return PTR_ERR(ns->file);
+	}
+
+	ret = vfs_getattr(&ns->file->f_path,
+			&stat, STATX_SIZE, AT_STATX_FORCE_SYNC);
+	if (ret)
+		goto err;
+
+	ns->size = stat.size;
+	ns->blksize_shift = file_inode(ns->file)->i_blkbits;
+
+	ns->bvec_cache = kmem_cache_create("nvmet-bvec",
+			NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
+			0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!ns->bvec_cache) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ns->bvec_pool = mempool_create(NVMET_MIN_MPOOL_OBJ, mempool_alloc_slab,
+			mempool_free_slab, ns->bvec_cache);
+
+	if (!ns->bvec_pool) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	return ret;
+err:
+	ns->size = 0;
+	ns->blksize_shift = 0;
+	nvmet_file_ns_disable(ns);
+	return ret;
+}
+
+static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter)
+{
+	bv->bv_page = sg_page_iter_page(iter);
+	bv->bv_offset = iter->sg->offset;
+	bv->bv_len = PAGE_SIZE - iter->sg->offset;
+}
+
+static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
+		unsigned long nr_segs, size_t count)
+{
+	struct kiocb *iocb = &req->f.iocb;
+	ssize_t (*call_iter)(struct kiocb *iocb, struct iov_iter *iter);
+	struct iov_iter iter;
+	int ki_flags = 0, rw;
+	ssize_t ret;
+
+	if (req->cmd->rw.opcode == nvme_cmd_write) {
+		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
+			ki_flags = IOCB_DSYNC;
+		call_iter = req->ns->file->f_op->write_iter;
+		rw = WRITE;
+	} else {
+		call_iter = req->ns->file->f_op->read_iter;
+		rw = READ;
+	}
+
+	iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count);
+
+	iocb->ki_pos = pos;
+	iocb->ki_filp = req->ns->file;
+	iocb->ki_flags = IOCB_DIRECT | ki_flags;
+
+	ret = call_iter(iocb, &iter);
+
+	if (ret != -EIOCBQUEUED && iocb->ki_complete)
+		iocb->ki_complete(iocb, ret, 0);
+
+	return ret;
+}
+
+static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
+{
+	struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb);
+
+	if (req->f.bvec != req->inline_bvec) {
+		if (likely(req->f.mpool_alloc == false))
+			kfree(req->f.bvec);
+		else
+			mempool_free(req->f.bvec, req->ns->bvec_pool);
+	}
+
+	nvmet_req_complete(req, ret != req->data_len ?
+			NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_rw(struct nvmet_req *req)
+{
+	ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE);
+	struct sg_page_iter sg_pg_iter;
+	unsigned long bv_cnt = 0;
+	bool is_sync = false;
+	size_t len = 0, total_len = 0;
+	ssize_t ret = 0;
+	loff_t pos;
+
+	if (!req->sg_cnt || !nr_bvec) {
+		nvmet_req_complete(req, 0);
+		return;
+	}
+
+	if (nr_bvec > NVMET_MAX_INLINE_BIOVEC)
+		req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec),
+				GFP_KERNEL);
+	else
+		req->f.bvec = req->inline_bvec;
+
+	req->f.mpool_alloc = false;
+	if (unlikely(!req->f.bvec)) {
+		/* fallback under memory pressure */
+		req->f.bvec = mempool_alloc(req->ns->bvec_pool, GFP_KERNEL);
+		req->f.mpool_alloc = true;
+		if (nr_bvec > NVMET_MAX_MPOOL_BVEC)
+			is_sync = true;
+	}
+
+	pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
+
+	memset(&req->f.iocb, 0, sizeof(struct kiocb));
+	for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) {
+		nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter);
+		len += req->f.bvec[bv_cnt].bv_len;
+		total_len += req->f.bvec[bv_cnt].bv_len;
+		bv_cnt++;
+
+		WARN_ON_ONCE((nr_bvec - 1) < 0);
+
+		if (unlikely(is_sync) &&
+		    (nr_bvec - 1 == 0 || bv_cnt == NVMET_MAX_MPOOL_BVEC)) {
+			ret = nvmet_file_submit_bvec(req, pos, bv_cnt, len);
+			if (ret < 0)
+				goto out;
+			pos += len;
+			bv_cnt = 0;
+			len = 0;
+		}
+		nr_bvec--;
+	}
+
+	if (WARN_ON_ONCE(total_len != req->data_len))
+		ret = -EIO;
+out:
+	if (unlikely(is_sync || ret)) {
+		nvmet_file_io_done(&req->f.iocb, ret < 0 ? ret : total_len, 0);
+		return;
+	}
+	req->f.iocb.ki_complete = nvmet_file_io_done;
+	nvmet_file_submit_bvec(req, pos, bv_cnt, total_len);
+}
+
+static void nvmet_file_flush_work(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+	int ret;
+
+	ret = vfs_fsync(req->ns->file, 1);
+
+	nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_flush(struct nvmet_req *req)
+{
+	INIT_WORK(&req->f.work, nvmet_file_flush_work);
+	schedule_work(&req->f.work);
+}
+
+static void nvmet_file_execute_discard(struct nvmet_req *req)
+{
+	int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+	struct nvme_dsm_range range;
+	loff_t offset;
+	loff_t len;
+	int i, ret;
+
+	for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
+		if (nvmet_copy_from_sgl(req, i * sizeof(range), &range,
+					sizeof(range)))
+			break;
+		offset = le64_to_cpu(range.slba) << req->ns->blksize_shift;
+		len = le32_to_cpu(range.nlb) << req->ns->blksize_shift;
+		ret = vfs_fallocate(req->ns->file, mode, offset, len);
+		if (ret)
+			break;
+	}
+
+	nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_dsm_work(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+
+	switch (le32_to_cpu(req->cmd->dsm.attributes)) {
+	case NVME_DSMGMT_AD:
+		nvmet_file_execute_discard(req);
+		return;
+	case NVME_DSMGMT_IDR:
+	case NVME_DSMGMT_IDW:
+	default:
+		/* Not supported yet */
+		nvmet_req_complete(req, 0);
+		return;
+	}
+}
+
+static void nvmet_file_execute_dsm(struct nvmet_req *req)
+{
+	INIT_WORK(&req->f.work, nvmet_file_dsm_work);
+	schedule_work(&req->f.work);
+}
+
+static void nvmet_file_write_zeroes_work(struct work_struct *w)
+{
+	struct nvmet_req *req = container_of(w, struct nvmet_req, f.work);
+	struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
+	int mode = FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE;
+	loff_t offset;
+	loff_t len;
+	int ret;
+
+	offset = le64_to_cpu(write_zeroes->slba) << req->ns->blksize_shift;
+	len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
+			req->ns->blksize_shift);
+
+	ret = vfs_fallocate(req->ns->file, mode, offset, len);
+	nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
+}
+
+static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
+{
+	INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
+	schedule_work(&req->f.work);
+}
+
+u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
+{
+	struct nvme_command *cmd = req->cmd;
+
+	switch (cmd->common.opcode) {
+	case nvme_cmd_read:
+	case nvme_cmd_write:
+		req->execute = nvmet_file_execute_rw;
+		req->data_len = nvmet_rw_len(req);
+		return 0;
+	case nvme_cmd_flush:
+		req->execute = nvmet_file_execute_flush;
+		req->data_len = 0;
+		return 0;
+	case nvme_cmd_dsm:
+		req->execute = nvmet_file_execute_dsm;
+		req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
+			sizeof(struct nvme_dsm_range);
+		return 0;
+	case nvme_cmd_write_zeroes:
+		req->execute = nvmet_file_execute_write_zeroes;
+		req->data_len = 0;
+		return 0;
+	default:
+		pr_err("unhandled cmd for file ns %d on qid %d\n",
+				cmd->common.opcode, req->sq->qid);
+		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+	}
+}
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
deleted file mode 100644
index cd23441..0000000
--- a/drivers/nvme/target/io-cmd.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * NVMe I/O command implementation.
- * Copyright (c) 2015-2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/blkdev.h>
-#include <linux/module.h>
-#include "nvmet.h"
-
-static void nvmet_bio_done(struct bio *bio)
-{
-	struct nvmet_req *req = bio->bi_private;
-
-	nvmet_req_complete(req,
-		bio->bi_status ? NVME_SC_INTERNAL | NVME_SC_DNR : 0);
-
-	if (bio != &req->inline_bio)
-		bio_put(bio);
-}
-
-static inline u32 nvmet_rw_len(struct nvmet_req *req)
-{
-	return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
-			req->ns->blksize_shift;
-}
-
-static void nvmet_execute_rw(struct nvmet_req *req)
-{
-	int sg_cnt = req->sg_cnt;
-	struct bio *bio = &req->inline_bio;
-	struct scatterlist *sg;
-	sector_t sector;
-	blk_qc_t cookie;
-	int op, op_flags = 0, i;
-
-	if (!req->sg_cnt) {
-		nvmet_req_complete(req, 0);
-		return;
-	}
-
-	if (req->cmd->rw.opcode == nvme_cmd_write) {
-		op = REQ_OP_WRITE;
-		op_flags = REQ_SYNC | REQ_IDLE;
-		if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
-			op_flags |= REQ_FUA;
-	} else {
-		op = REQ_OP_READ;
-	}
-
-	sector = le64_to_cpu(req->cmd->rw.slba);
-	sector <<= (req->ns->blksize_shift - 9);
-
-	bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
-	bio_set_dev(bio, req->ns->bdev);
-	bio->bi_iter.bi_sector = sector;
-	bio->bi_private = req;
-	bio->bi_end_io = nvmet_bio_done;
-	bio_set_op_attrs(bio, op, op_flags);
-
-	for_each_sg(req->sg, sg, req->sg_cnt, i) {
-		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
-				!= sg->length) {
-			struct bio *prev = bio;
-
-			bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
-			bio_set_dev(bio, req->ns->bdev);
-			bio->bi_iter.bi_sector = sector;
-			bio_set_op_attrs(bio, op, op_flags);
-
-			bio_chain(bio, prev);
-			submit_bio(prev);
-		}
-
-		sector += sg->length >> 9;
-		sg_cnt--;
-	}
-
-	cookie = submit_bio(bio);
-
-	blk_poll(bdev_get_queue(req->ns->bdev), cookie);
-}
-
-static void nvmet_execute_flush(struct nvmet_req *req)
-{
-	struct bio *bio = &req->inline_bio;
-
-	bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
-	bio_set_dev(bio, req->ns->bdev);
-	bio->bi_private = req;
-	bio->bi_end_io = nvmet_bio_done;
-	bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
-	submit_bio(bio);
-}
-
-static u16 nvmet_discard_range(struct nvmet_ns *ns,
-		struct nvme_dsm_range *range, struct bio **bio)
-{
-	int ret;
-
-	ret = __blkdev_issue_discard(ns->bdev,
-			le64_to_cpu(range->slba) << (ns->blksize_shift - 9),
-			le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
-			GFP_KERNEL, 0, bio);
-	if (ret && ret != -EOPNOTSUPP)
-		return NVME_SC_INTERNAL | NVME_SC_DNR;
-	return 0;
-}
-
-static void nvmet_execute_discard(struct nvmet_req *req)
-{
-	struct nvme_dsm_range range;
-	struct bio *bio = NULL;
-	int i;
-	u16 status;
-
-	for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
-		status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
-				sizeof(range));
-		if (status)
-			break;
-
-		status = nvmet_discard_range(req->ns, &range, &bio);
-		if (status)
-			break;
-	}
-
-	if (bio) {
-		bio->bi_private = req;
-		bio->bi_end_io = nvmet_bio_done;
-		if (status) {
-			bio->bi_status = BLK_STS_IOERR;
-			bio_endio(bio);
-		} else {
-			submit_bio(bio);
-		}
-	} else {
-		nvmet_req_complete(req, status);
-	}
-}
-
-static void nvmet_execute_dsm(struct nvmet_req *req)
-{
-	switch (le32_to_cpu(req->cmd->dsm.attributes)) {
-	case NVME_DSMGMT_AD:
-		nvmet_execute_discard(req);
-		return;
-	case NVME_DSMGMT_IDR:
-	case NVME_DSMGMT_IDW:
-	default:
-		/* Not supported yet */
-		nvmet_req_complete(req, 0);
-		return;
-	}
-}
-
-static void nvmet_execute_write_zeroes(struct nvmet_req *req)
-{
-	struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
-	struct bio *bio = NULL;
-	u16 status = NVME_SC_SUCCESS;
-	sector_t sector;
-	sector_t nr_sector;
-
-	sector = le64_to_cpu(write_zeroes->slba) <<
-		(req->ns->blksize_shift - 9);
-	nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
-		(req->ns->blksize_shift - 9));
-
-	if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
-				GFP_KERNEL, &bio, 0))
-		status = NVME_SC_INTERNAL | NVME_SC_DNR;
-
-	if (bio) {
-		bio->bi_private = req;
-		bio->bi_end_io = nvmet_bio_done;
-		submit_bio(bio);
-	} else {
-		nvmet_req_complete(req, status);
-	}
-}
-
-u16 nvmet_parse_io_cmd(struct nvmet_req *req)
-{
-	struct nvme_command *cmd = req->cmd;
-	u16 ret;
-
-	ret = nvmet_check_ctrl_status(req, cmd);
-	if (unlikely(ret)) {
-		req->ns = NULL;
-		return ret;
-	}
-
-	req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
-	if (unlikely(!req->ns))
-		return NVME_SC_INVALID_NS | NVME_SC_DNR;
-
-	switch (cmd->common.opcode) {
-	case nvme_cmd_read:
-	case nvme_cmd_write:
-		req->execute = nvmet_execute_rw;
-		req->data_len = nvmet_rw_len(req);
-		return 0;
-	case nvme_cmd_flush:
-		req->execute = nvmet_execute_flush;
-		req->data_len = 0;
-		return 0;
-	case nvme_cmd_dsm:
-		req->execute = nvmet_execute_dsm;
-		req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
-			sizeof(struct nvme_dsm_range);
-		return 0;
-	case nvme_cmd_write_zeroes:
-		req->execute = nvmet_execute_write_zeroes;
-		return 0;
-	default:
-		pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
-		       req->sq->qid);
-		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
-	}
-}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 27a8561..1304ec3 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -45,6 +45,7 @@
 	struct nvme_ctrl	ctrl;
 
 	struct nvmet_ctrl	*target_ctrl;
+	struct nvmet_port	*port;
 };
 
 static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
@@ -63,7 +64,8 @@
 	unsigned long		flags;
 };
 
-static struct nvmet_port *nvmet_loop_port;
+static LIST_HEAD(nvme_loop_ports);
+static DEFINE_MUTEX(nvme_loop_ports_mutex);
 
 static LIST_HEAD(nvme_loop_ctrl_list);
 static DEFINE_MUTEX(nvme_loop_ctrl_mutex);
@@ -146,7 +148,7 @@
 	/* fail with DNR on admin cmd timeout */
 	nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
 
-	return BLK_EH_HANDLED;
+	return BLK_EH_DONE;
 }
 
 static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -169,12 +171,12 @@
 
 	blk_mq_start_request(req);
 	iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
-	iod->req.port = nvmet_loop_port;
+	iod->req.port = queue->ctrl->port;
 	if (!nvmet_req_init(&iod->req, &queue->nvme_cq,
 			&queue->nvme_sq, &nvme_loop_ops))
 		return BLK_STS_OK;
 
-	if (blk_rq_payload_bytes(req)) {
+	if (blk_rq_nr_phys_segments(req)) {
 		iod->sg_table.sgl = iod->first_sgl;
 		if (sg_alloc_table_chained(&iod->sg_table,
 				blk_rq_nr_phys_segments(req),
@@ -517,6 +519,7 @@
 	.free_ctrl		= nvme_loop_free_ctrl,
 	.submit_async_event	= nvme_loop_submit_async_event,
 	.delete_ctrl		= nvme_loop_delete_ctrl_host,
+	.get_address		= nvmf_get_address,
 };
 
 static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
@@ -565,6 +568,23 @@
 	return ret;
 }
 
+static struct nvmet_port *nvme_loop_find_port(struct nvme_ctrl *ctrl)
+{
+	struct nvmet_port *p, *found = NULL;
+
+	mutex_lock(&nvme_loop_ports_mutex);
+	list_for_each_entry(p, &nvme_loop_ports, entry) {
+		/* if no transport address is specified use the first port */
+		if ((ctrl->opts->mask & NVMF_OPT_TRADDR) &&
+		    strcmp(ctrl->opts->traddr, p->disc_addr.traddr))
+			continue;
+		found = p;
+		break;
+	}
+	mutex_unlock(&nvme_loop_ports_mutex);
+	return found;
+}
+
 static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 		struct nvmf_ctrl_options *opts)
 {
@@ -589,6 +609,7 @@
 
 	ctrl->ctrl.sqsize = opts->queue_size - 1;
 	ctrl->ctrl.kato = opts->kato;
+	ctrl->port = nvme_loop_find_port(&ctrl->ctrl);
 
 	ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues),
 			GFP_KERNEL);
@@ -646,27 +667,17 @@
 
 static int nvme_loop_add_port(struct nvmet_port *port)
 {
-	/*
-	 * XXX: disalow adding more than one port so
-	 * there is no connection rejections when a
-	 * a subsystem is assigned to a port for which
-	 * loop doesn't have a pointer.
-	 * This scenario would be possible if we allowed
-	 * more than one port to be added and a subsystem
-	 * was assigned to a port other than nvmet_loop_port.
-	 */
-
-	if (nvmet_loop_port)
-		return -EPERM;
-
-	nvmet_loop_port = port;
+	mutex_lock(&nvme_loop_ports_mutex);
+	list_add_tail(&port->entry, &nvme_loop_ports);
+	mutex_unlock(&nvme_loop_ports_mutex);
 	return 0;
 }
 
 static void nvme_loop_remove_port(struct nvmet_port *port)
 {
-	if (port == nvmet_loop_port)
-		nvmet_loop_port = NULL;
+	mutex_lock(&nvme_loop_ports_mutex);
+	list_del_init(&port->entry);
+	mutex_unlock(&nvme_loop_ports_mutex);
 }
 
 static const struct nvmet_fabrics_ops nvme_loop_ops = {
@@ -682,6 +693,7 @@
 	.name		= "loop",
 	.module		= THIS_MODULE,
 	.create_ctrl	= nvme_loop_create_ctrl,
+	.allowed_opts	= NVMF_OPT_TRADDR,
 };
 
 static int __init nvme_loop_init_module(void)
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 15fd84a..480dfe1 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -30,6 +30,21 @@
 #define NVMET_ASYNC_EVENTS		4
 #define NVMET_ERROR_LOG_SLOTS		128
 
+
+/*
+ * Supported optional AENs:
+ */
+#define NVMET_AEN_CFG_OPTIONAL \
+	NVME_AEN_CFG_NS_ATTR
+
+/*
+ * Plus mandatory SMART AENs (we'll never send them, but allow enabling them):
+ */
+#define NVMET_AEN_CFG_ALL \
+	(NVME_SMART_CRIT_SPARE | NVME_SMART_CRIT_TEMPERATURE | \
+	 NVME_SMART_CRIT_RELIABILITY | NVME_SMART_CRIT_MEDIA | \
+	 NVME_SMART_CRIT_VOLATILE_MEMORY | NVMET_AEN_CFG_OPTIONAL)
+
 /* Helper Macros when NVMe error is NVME_SC_CONNECT_INVALID_PARAM
  * The 16 bit shift is to set IATTR bit to 1, which means offending
  * offset starts in the data section of connect()
@@ -43,6 +58,7 @@
 	struct list_head	dev_link;
 	struct percpu_ref	ref;
 	struct block_device	*bdev;
+	struct file		*file;
 	u32			nsid;
 	u32			blksize_shift;
 	loff_t			size;
@@ -57,6 +73,8 @@
 	struct config_group	group;
 
 	struct completion	disable_done;
+	mempool_t		*bvec_pool;
+	struct kmem_cache	*bvec_cache;
 };
 
 static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -82,7 +100,7 @@
 /**
  * struct nvmet_port -	Common structure to keep port
  *				information for the target.
- * @entry:		List head for holding a list of these elements.
+ * @entry:		Entry into referrals or transport list.
  * @disc_addr:		Address information is stored in a format defined
  *				for a discovery log page entry.
  * @group:		ConfigFS group for this element's folder.
@@ -120,6 +138,8 @@
 	u16			cntlid;
 	u32			kato;
 
+	u32			aen_enabled;
+	unsigned long		aen_masked;
 	struct nvmet_req	*async_event_cmds[NVMET_ASYNC_EVENTS];
 	unsigned int		nr_async_event_cmds;
 	struct list_head	async_events;
@@ -132,6 +152,9 @@
 
 	const struct nvmet_fabrics_ops *ops;
 
+	__le32			*changed_ns_list;
+	u32			nr_changed_ns;
+
 	char			subsysnqn[NVMF_NQN_FIELD_LEN];
 	char			hostnqn[NVMF_NQN_FIELD_LEN];
 };
@@ -222,8 +245,18 @@
 	struct nvmet_cq		*cq;
 	struct nvmet_ns		*ns;
 	struct scatterlist	*sg;
-	struct bio		inline_bio;
 	struct bio_vec		inline_bvec[NVMET_MAX_INLINE_BIOVEC];
+	union {
+		struct {
+			struct bio      inline_bio;
+		} b;
+		struct {
+			bool			mpool_alloc;
+			struct kiocb            iocb;
+			struct bio_vec          *bvec;
+			struct work_struct      work;
+		} f;
+	};
 	int			sg_cnt;
 	/* data length as parsed from the command: */
 	size_t			data_len;
@@ -263,7 +296,8 @@
 };
 
 u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
-u16 nvmet_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
+u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
 u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
 u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
 u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
@@ -316,6 +350,7 @@
 		size_t len);
 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf,
 		size_t len);
+u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len);
 
 u32 nvmet_get_log_page_len(struct nvme_command *cmd);
 
@@ -338,4 +373,14 @@
 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
 		const char *hostnqn);
 
+int nvmet_bdev_ns_enable(struct nvmet_ns *ns);
+int nvmet_file_ns_enable(struct nvmet_ns *ns);
+void nvmet_bdev_ns_disable(struct nvmet_ns *ns);
+void nvmet_file_ns_disable(struct nvmet_ns *ns);
+
+static inline u32 nvmet_rw_len(struct nvmet_req *req)
+{
+	return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
+			req->ns->blksize_shift;
+}
 #endif /* _NVMET_H */
diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig
index 1090924..54a3c29 100644
--- a/drivers/nvmem/Kconfig
+++ b/drivers/nvmem/Kconfig
@@ -175,4 +175,10 @@
 	  This driver can also be built as a module. If so, the module
 	  will be called nvmem-snvs-lpgpr.
 
+config RAVE_SP_EEPROM
+	tristate "Rave SP EEPROM Support"
+	depends on RAVE_SP_CORE
+	help
+	  Say y here to enable Rave SP EEPROM support.
+
 endif
diff --git a/drivers/nvmem/Makefile b/drivers/nvmem/Makefile
index e54dcfa..27e96a8 100644
--- a/drivers/nvmem/Makefile
+++ b/drivers/nvmem/Makefile
@@ -37,3 +37,6 @@
 nvmem_meson_mx_efuse-y		:= meson-mx-efuse.o
 obj-$(CONFIG_NVMEM_SNVS_LPGPR)	+= nvmem_snvs_lpgpr.o
 nvmem_snvs_lpgpr-y		:= snvs_lpgpr.o
+obj-$(CONFIG_RAVE_SP_EEPROM)	+= nvmem-rave-sp-eeprom.o
+nvmem-rave-sp-eeprom-y		:= rave-sp-eeprom.o
+
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index b05aa8e..b5b0cdc 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -353,18 +353,27 @@
 	return 0;
 }
 
-static int nvmem_add_cells(struct nvmem_device *nvmem,
-			   const struct nvmem_config *cfg)
+/**
+ * nvmem_add_cells() - Add cell information to an nvmem device
+ *
+ * @nvmem: nvmem device to add cells to.
+ * @info: nvmem cell info to add to the device
+ * @ncells: number of cells in info
+ *
+ * Return: 0 or negative error code on failure.
+ */
+int nvmem_add_cells(struct nvmem_device *nvmem,
+		    const struct nvmem_cell_info *info,
+		    int ncells)
 {
 	struct nvmem_cell **cells;
-	const struct nvmem_cell_info *info = cfg->cells;
 	int i, rval;
 
-	cells = kcalloc(cfg->ncells, sizeof(*cells), GFP_KERNEL);
+	cells = kcalloc(ncells, sizeof(*cells), GFP_KERNEL);
 	if (!cells)
 		return -ENOMEM;
 
-	for (i = 0; i < cfg->ncells; i++) {
+	for (i = 0; i < ncells; i++) {
 		cells[i] = kzalloc(sizeof(**cells), GFP_KERNEL);
 		if (!cells[i]) {
 			rval = -ENOMEM;
@@ -380,7 +389,7 @@
 		nvmem_cell_add(cells[i]);
 	}
 
-	nvmem->ncells = cfg->ncells;
+	nvmem->ncells = ncells;
 	/* remove tmp array */
 	kfree(cells);
 
@@ -393,6 +402,7 @@
 
 	return rval;
 }
+EXPORT_SYMBOL_GPL(nvmem_add_cells);
 
 /*
  * nvmem_setup_compat() - Create an additional binary entry in
@@ -509,7 +519,7 @@
 	}
 
 	if (config->cells)
-		nvmem_add_cells(nvmem, config);
+		nvmem_add_cells(nvmem, config->cells, config->ncells);
 
 	return nvmem;
 
@@ -559,6 +569,7 @@
  * nvmem_config.
  * Also creates an binary entry in /sys/bus/nvmem/devices/dev-name/nvmem
  *
+ * @dev: Device that uses the nvmem device.
  * @config: nvmem device configuration with which nvmem device is created.
  *
  * Return: Will be an ERR_PTR() on error or a valid pointer to nvmem_device
@@ -597,6 +608,7 @@
  * devm_nvmem_unregister() - Unregister previously registered managed nvmem
  * device.
  *
+ * @dev: Device that uses the nvmem device.
  * @nvmem: Pointer to previously registered nvmem device.
  *
  * Return: Will be an negative on error or a zero on success.
@@ -1107,6 +1119,8 @@
 
 		/* setup the first byte with lsb bits from nvmem */
 		rc = nvmem_reg_read(nvmem, cell->offset, &v, 1);
+		if (rc)
+			goto err;
 		*b++ |= GENMASK(bit_offset - 1, 0) & v;
 
 		/* setup rest of the byte if any */
@@ -1125,11 +1139,16 @@
 		/* setup the last byte with msb bits from nvmem */
 		rc = nvmem_reg_read(nvmem,
 				    cell->offset + cell->bytes - 1, &v, 1);
+		if (rc)
+			goto err;
 		*p |= GENMASK(7, (nbits + bit_offset) % BITS_PER_BYTE) & v;
 
 	}
 
 	return buf;
+err:
+	kfree(buf);
+	return ERR_PTR(rc);
 }
 
 /**
diff --git a/drivers/nvmem/meson-efuse.c b/drivers/nvmem/meson-efuse.c
index 71823d1..d769840 100644
--- a/drivers/nvmem/meson-efuse.c
+++ b/drivers/nvmem/meson-efuse.c
@@ -24,23 +24,16 @@
 static int meson_efuse_read(void *context, unsigned int offset,
 			    void *val, size_t bytes)
 {
-	u8 *buf = val;
-	int ret;
-
-	ret = meson_sm_call_read(buf, bytes, SM_EFUSE_READ, offset,
-				 bytes, 0, 0, 0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return meson_sm_call_read((u8 *)val, bytes, SM_EFUSE_READ, offset,
+				  bytes, 0, 0, 0);
 }
 
-static struct nvmem_config econfig = {
-	.name = "meson-efuse",
-	.stride = 1,
-	.word_size = 1,
-	.read_only = true,
-};
+static int meson_efuse_write(void *context, unsigned int offset,
+			     void *val, size_t bytes)
+{
+	return meson_sm_call_write((u8 *)val, bytes, SM_EFUSE_WRITE, offset,
+				   bytes, 0, 0, 0);
+}
 
 static const struct of_device_id meson_efuse_match[] = {
 	{ .compatible = "amlogic,meson-gxbb-efuse", },
@@ -50,17 +43,27 @@
 
 static int meson_efuse_probe(struct platform_device *pdev)
 {
+	struct device *dev = &pdev->dev;
 	struct nvmem_device *nvmem;
+	struct nvmem_config *econfig;
 	unsigned int size;
 
 	if (meson_sm_call(SM_EFUSE_USER_MAX, &size, 0, 0, 0, 0, 0) < 0)
 		return -EINVAL;
 
-	econfig.dev = &pdev->dev;
-	econfig.reg_read = meson_efuse_read;
-	econfig.size = size;
+	econfig = devm_kzalloc(dev, sizeof(*econfig), GFP_KERNEL);
+	if (!econfig)
+		return -ENOMEM;
 
-	nvmem = devm_nvmem_register(&pdev->dev, &econfig);
+	econfig->dev = dev;
+	econfig->name = dev_name(dev);
+	econfig->stride = 1;
+	econfig->word_size = 1;
+	econfig->reg_read = meson_efuse_read;
+	econfig->reg_write = meson_efuse_write;
+	econfig->size = size;
+
+	nvmem = devm_nvmem_register(&pdev->dev, econfig);
 
 	return PTR_ERR_OR_ZERO(nvmem);
 }
diff --git a/drivers/nvmem/rave-sp-eeprom.c b/drivers/nvmem/rave-sp-eeprom.c
new file mode 100644
index 0000000..50aeea6
--- /dev/null
+++ b/drivers/nvmem/rave-sp-eeprom.c
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * EEPROM driver for RAVE SP
+ *
+ * Copyright (C) 2018 Zodiac Inflight Innovations
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/mfd/rave-sp.h>
+#include <linux/module.h>
+#include <linux/nvmem-provider.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/sizes.h>
+
+/**
+ * enum rave_sp_eeprom_access_type - Supported types of EEPROM access
+ *
+ * @RAVE_SP_EEPROM_WRITE:	EEPROM write
+ * @RAVE_SP_EEPROM_READ:	EEPROM read
+ */
+enum rave_sp_eeprom_access_type {
+	RAVE_SP_EEPROM_WRITE = 0,
+	RAVE_SP_EEPROM_READ  = 1,
+};
+
+/**
+ * enum rave_sp_eeprom_header_size - EEPROM command header sizes
+ *
+ * @RAVE_SP_EEPROM_HEADER_SMALL: EEPROM header size for "small" devices (< 8K)
+ * @RAVE_SP_EEPROM_HEADER_BIG:	 EEPROM header size for "big" devices (> 8K)
+ */
+enum rave_sp_eeprom_header_size {
+	RAVE_SP_EEPROM_HEADER_SMALL = 4U,
+	RAVE_SP_EEPROM_HEADER_BIG   = 5U,
+};
+
+#define	RAVE_SP_EEPROM_PAGE_SIZE	32U
+
+/**
+ * struct rave_sp_eeprom_page - RAVE SP EEPROM page
+ *
+ * @type:	Access type (see enum rave_sp_eeprom_access_type)
+ * @success:	Success flag (Success = 1, Failure = 0)
+ * @data:	Read data
+
+ * Note this structure corresponds to RSP_*_EEPROM payload from RAVE
+ * SP ICD
+ */
+struct rave_sp_eeprom_page {
+	u8  type;
+	u8  success;
+	u8  data[RAVE_SP_EEPROM_PAGE_SIZE];
+} __packed;
+
+/**
+ * struct rave_sp_eeprom - RAVE SP EEPROM device
+ *
+ * @sp:			Pointer to parent RAVE SP device
+ * @mutex:		Lock protecting access to EEPROM
+ * @address:		EEPROM device address
+ * @header_size:	Size of EEPROM command header for this device
+ * @dev:		Pointer to corresponding struct device used for logging
+ */
+struct rave_sp_eeprom {
+	struct rave_sp *sp;
+	struct mutex mutex;
+	u8 address;
+	unsigned int header_size;
+	struct device *dev;
+};
+
+/**
+ * rave_sp_eeprom_io - Low-level part of EEPROM page access
+ *
+ * @eeprom:	EEPROM device to write to
+ * @type:	EEPROM access type (read or write)
+ * @idx:	number of the EEPROM page
+ * @page:	Data to write or buffer to store result (via page->data)
+ *
+ * This function does all of the low-level work required to perform a
+ * EEPROM access. This includes formatting correct command payload,
+ * sending it and checking received results.
+ *
+ * Returns zero in case of success or negative error code in
+ * case of failure.
+ */
+static int rave_sp_eeprom_io(struct rave_sp_eeprom *eeprom,
+			     enum rave_sp_eeprom_access_type type,
+			     u16 idx,
+			     struct rave_sp_eeprom_page *page)
+{
+	const bool is_write = type == RAVE_SP_EEPROM_WRITE;
+	const unsigned int data_size = is_write ? sizeof(page->data) : 0;
+	const unsigned int cmd_size = eeprom->header_size + data_size;
+	const unsigned int rsp_size =
+		is_write ? sizeof(*page) - sizeof(page->data) : sizeof(*page);
+	unsigned int offset = 0;
+	u8 cmd[cmd_size];
+	int ret;
+
+	cmd[offset++] = eeprom->address;
+	cmd[offset++] = 0;
+	cmd[offset++] = type;
+	cmd[offset++] = idx;
+
+	/*
+	 * If there's still room in this command's header it means we
+	 * are talkin to EEPROM that uses 16-bit page numbers and we
+	 * have to specify index's MSB in payload as well.
+	 */
+	if (offset < eeprom->header_size)
+		cmd[offset++] = idx >> 8;
+	/*
+	 * Copy our data to write to command buffer first. In case of
+	 * a read data_size should be zero and memcpy would become a
+	 * no-op
+	 */
+	memcpy(&cmd[offset], page->data, data_size);
+
+	ret = rave_sp_exec(eeprom->sp, cmd, cmd_size, page, rsp_size);
+	if (ret)
+		return ret;
+
+	if (page->type != type)
+		return -EPROTO;
+
+	if (!page->success)
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * rave_sp_eeprom_page_access - Access single EEPROM page
+ *
+ * @eeprom:	EEPROM device to access
+ * @type:	Access type to perform (read or write)
+ * @offset:	Offset within EEPROM to access
+ * @data:	Data buffer
+ * @data_len:	Size of the data buffer
+ *
+ * This function performs a generic access to a single page or a
+ * portion thereof. Requested access MUST NOT cross the EEPROM page
+ * boundary.
+ *
+ * Returns zero in case of success or negative error code in
+ * case of failure.
+ */
+static int
+rave_sp_eeprom_page_access(struct rave_sp_eeprom *eeprom,
+			   enum rave_sp_eeprom_access_type type,
+			   unsigned int offset, u8 *data,
+			   size_t data_len)
+{
+	const unsigned int page_offset = offset % RAVE_SP_EEPROM_PAGE_SIZE;
+	const unsigned int page_nr     = offset / RAVE_SP_EEPROM_PAGE_SIZE;
+	struct rave_sp_eeprom_page page;
+	int ret;
+
+	/*
+	 * This function will not work if data access we've been asked
+	 * to do is crossing EEPROM page boundary. Normally this
+	 * should never happen and getting here would indicate a bug
+	 * in the code.
+	 */
+	if (WARN_ON(data_len > sizeof(page.data) - page_offset))
+		return -EINVAL;
+
+	if (type == RAVE_SP_EEPROM_WRITE) {
+		/*
+		 * If doing a partial write we need to do a read first
+		 * to fill the rest of the page with correct data.
+		 */
+		if (data_len < RAVE_SP_EEPROM_PAGE_SIZE) {
+			ret = rave_sp_eeprom_io(eeprom, RAVE_SP_EEPROM_READ,
+						page_nr, &page);
+			if (ret)
+				return ret;
+		}
+
+		memcpy(&page.data[page_offset], data, data_len);
+	}
+
+	ret = rave_sp_eeprom_io(eeprom, type, page_nr, &page);
+	if (ret)
+		return ret;
+
+	/*
+	 * Since we receive the result of the read via 'page.data'
+	 * buffer we need to copy that to 'data'
+	 */
+	if (type == RAVE_SP_EEPROM_READ)
+		memcpy(data, &page.data[page_offset], data_len);
+
+	return 0;
+}
+
+/**
+ * rave_sp_eeprom_access - Access EEPROM data
+ *
+ * @eeprom:	EEPROM device to access
+ * @type:	Access type to perform (read or write)
+ * @offset:	Offset within EEPROM to access
+ * @data:	Data buffer
+ * @data_len:	Size of the data buffer
+ *
+ * This function performs a generic access (either read or write) at
+ * arbitrary offset (not necessary page aligned) of arbitrary length
+ * (is not constrained by EEPROM page size).
+ *
+ * Returns zero in case of success or negative error code in case of
+ * failure.
+ */
+static int rave_sp_eeprom_access(struct rave_sp_eeprom *eeprom,
+				 enum rave_sp_eeprom_access_type type,
+				 unsigned int offset, u8 *data,
+				 unsigned int data_len)
+{
+	unsigned int residue;
+	unsigned int chunk;
+	unsigned int head;
+	int ret;
+
+	mutex_lock(&eeprom->mutex);
+
+	head    = offset % RAVE_SP_EEPROM_PAGE_SIZE;
+	residue = data_len;
+
+	do {
+		/*
+		 * First iteration, if we are doing an access that is
+		 * not 32-byte aligned, we need to access only data up
+		 * to a page boundary to avoid corssing it in
+		 * rave_sp_eeprom_page_access()
+		 */
+		if (unlikely(head)) {
+			chunk = RAVE_SP_EEPROM_PAGE_SIZE - head;
+			/*
+			 * This can only happen once per
+			 * rave_sp_eeprom_access() call, so we set
+			 * head to zero to process all the other
+			 * iterations normally.
+			 */
+			head  = 0;
+		} else {
+			chunk = RAVE_SP_EEPROM_PAGE_SIZE;
+		}
+
+		/*
+		 * We should never read more that 'residue' bytes
+		 */
+		chunk = min(chunk, residue);
+		ret = rave_sp_eeprom_page_access(eeprom, type, offset,
+						 data, chunk);
+		if (ret)
+			goto out;
+
+		residue -= chunk;
+		offset  += chunk;
+		data    += chunk;
+	} while (residue);
+out:
+	mutex_unlock(&eeprom->mutex);
+	return ret;
+}
+
+static int rave_sp_eeprom_reg_read(void *eeprom, unsigned int offset,
+				   void *val, size_t bytes)
+{
+	return rave_sp_eeprom_access(eeprom, RAVE_SP_EEPROM_READ,
+				     offset, val, bytes);
+}
+
+static int rave_sp_eeprom_reg_write(void *eeprom, unsigned int offset,
+				    void *val, size_t bytes)
+{
+	return rave_sp_eeprom_access(eeprom, RAVE_SP_EEPROM_WRITE,
+				     offset, val, bytes);
+}
+
+static int rave_sp_eeprom_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rave_sp *sp = dev_get_drvdata(dev->parent);
+	struct device_node *np = dev->of_node;
+	struct nvmem_config config = { 0 };
+	struct rave_sp_eeprom *eeprom;
+	struct nvmem_device *nvmem;
+	u32 reg[2], size;
+
+	if (of_property_read_u32_array(np, "reg", reg, ARRAY_SIZE(reg))) {
+		dev_err(dev, "Failed to parse \"reg\" property\n");
+		return -EINVAL;
+	}
+
+	size = reg[1];
+	/*
+	 * Per ICD, we have no more than 2 bytes to specify EEPROM
+	 * page.
+	 */
+	if (size > U16_MAX * RAVE_SP_EEPROM_PAGE_SIZE) {
+		dev_err(dev, "Specified size is too big\n");
+		return -EINVAL;
+	}
+
+	eeprom = devm_kzalloc(dev, sizeof(*eeprom), GFP_KERNEL);
+	if (!eeprom)
+		return -ENOMEM;
+
+	eeprom->address = reg[0];
+	eeprom->sp      = sp;
+	eeprom->dev     = dev;
+
+	if (size > SZ_8K)
+		eeprom->header_size = RAVE_SP_EEPROM_HEADER_BIG;
+	else
+		eeprom->header_size = RAVE_SP_EEPROM_HEADER_SMALL;
+
+	mutex_init(&eeprom->mutex);
+
+	config.id		= -1;
+	of_property_read_string(np, "zii,eeprom-name", &config.name);
+	config.priv		= eeprom;
+	config.dev		= dev;
+	config.size		= size;
+	config.reg_read		= rave_sp_eeprom_reg_read;
+	config.reg_write	= rave_sp_eeprom_reg_write;
+	config.word_size	= 1;
+	config.stride		= 1;
+
+	nvmem = devm_nvmem_register(dev, &config);
+
+	return PTR_ERR_OR_ZERO(nvmem);
+}
+
+static const struct of_device_id rave_sp_eeprom_of_match[] = {
+	{ .compatible = "zii,rave-sp-eeprom" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, rave_sp_eeprom_of_match);
+
+static struct platform_driver rave_sp_eeprom_driver = {
+	.probe = rave_sp_eeprom_probe,
+	.driver	= {
+		.name = KBUILD_MODNAME,
+		.of_match_table = rave_sp_eeprom_of_match,
+	},
+};
+module_platform_driver(rave_sp_eeprom_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Andrey Vostrikov <andrey.vostrikov@cogentembedded.com>");
+MODULE_AUTHOR("Nikita Yushchenko <nikita.yoush@cogentembedded.com>");
+MODULE_AUTHOR("Andrey Smirnov <andrew.smirnov@gmail.com>");
+MODULE_DESCRIPTION("RAVE SP EEPROM driver");
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 064c818..33d8551 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -76,6 +76,8 @@
  * of_dma_configure - Setup DMA configuration
  * @dev:	Device to apply DMA configuration
  * @np:		Pointer to OF node having DMA configuration
+ * @force_dma:  Whether device is to be set up by of_dma_configure() even if
+ *		DMA capability is not explicitly described by firmware.
  *
  * Try to get devices's DMA configuration from DT and update it
  * accordingly.
@@ -84,7 +86,7 @@
  * can use a platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE events
  * to fix up DMA configuration.
  */
-int of_dma_configure(struct device *dev, struct device_node *np)
+int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
 {
 	u64 dma_addr, paddr, size = 0;
 	int ret;
@@ -100,7 +102,7 @@
 		 * DMA configuration regardless of whether "dma-ranges" is
 		 * correctly specified or not.
 		 */
-		if (!dev->bus->force_dma)
+		if (!force_dma)
 			return ret == -ENODEV ? 0 : ret;
 
 		dma_addr = offset = 0;
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 9a4f4246..895c83e 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -353,7 +353,7 @@
 		/* ensure that dma_ops is set for virtual devices
 		 * using reserved memory
 		 */
-		of_dma_configure(dev, np);
+		of_dma_configure(dev, np, true);
 
 		dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
 	} else {
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 92fa94a..ab2f3fe 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -33,8 +33,6 @@
 /* Lock to allow exclusive modification to the device and opp lists */
 DEFINE_MUTEX(opp_table_lock);
 
-static void dev_pm_opp_get(struct dev_pm_opp *opp);
-
 static struct opp_device *_find_opp_dev(const struct device *dev,
 					struct opp_table *opp_table)
 {
@@ -281,6 +279,23 @@
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_suspend_opp_freq);
 
+int _get_opp_count(struct opp_table *opp_table)
+{
+	struct dev_pm_opp *opp;
+	int count = 0;
+
+	mutex_lock(&opp_table->lock);
+
+	list_for_each_entry(opp, &opp_table->opp_list, node) {
+		if (opp->available)
+			count++;
+	}
+
+	mutex_unlock(&opp_table->lock);
+
+	return count;
+}
+
 /**
  * dev_pm_opp_get_opp_count() - Get number of opps available in the opp table
  * @dev:	device for which we do this operation
@@ -291,25 +306,17 @@
 int dev_pm_opp_get_opp_count(struct device *dev)
 {
 	struct opp_table *opp_table;
-	struct dev_pm_opp *temp_opp;
-	int count = 0;
+	int count;
 
 	opp_table = _find_opp_table(dev);
 	if (IS_ERR(opp_table)) {
 		count = PTR_ERR(opp_table);
 		dev_dbg(dev, "%s: OPP table not found (%d)\n",
 			__func__, count);
-		return count;
+		return 0;
 	}
 
-	mutex_lock(&opp_table->lock);
-
-	list_for_each_entry(temp_opp, &opp_table->opp_list, node) {
-		if (temp_opp->available)
-			count++;
-	}
-
-	mutex_unlock(&opp_table->lock);
+	count = _get_opp_count(opp_table);
 	dev_pm_opp_put_opp_table(opp_table);
 
 	return count;
@@ -892,7 +899,7 @@
 	dev_pm_opp_put_opp_table(opp_table);
 }
 
-static void dev_pm_opp_get(struct dev_pm_opp *opp)
+void dev_pm_opp_get(struct dev_pm_opp *opp)
 {
 	kref_get(&opp->kref);
 }
@@ -985,6 +992,43 @@
 	return true;
 }
 
+static int _opp_is_duplicate(struct device *dev, struct dev_pm_opp *new_opp,
+			     struct opp_table *opp_table,
+			     struct list_head **head)
+{
+	struct dev_pm_opp *opp;
+
+	/*
+	 * Insert new OPP in order of increasing frequency and discard if
+	 * already present.
+	 *
+	 * Need to use &opp_table->opp_list in the condition part of the 'for'
+	 * loop, don't replace it with head otherwise it will become an infinite
+	 * loop.
+	 */
+	list_for_each_entry(opp, &opp_table->opp_list, node) {
+		if (new_opp->rate > opp->rate) {
+			*head = &opp->node;
+			continue;
+		}
+
+		if (new_opp->rate < opp->rate)
+			return 0;
+
+		/* Duplicate OPPs */
+		dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
+			 __func__, opp->rate, opp->supplies[0].u_volt,
+			 opp->available, new_opp->rate,
+			 new_opp->supplies[0].u_volt, new_opp->available);
+
+		/* Should we compare voltages for all regulators here ? */
+		return opp->available &&
+		       new_opp->supplies[0].u_volt == opp->supplies[0].u_volt ? -EBUSY : -EEXIST;
+	}
+
+	return 0;
+}
+
 /*
  * Returns:
  * 0: On success. And appropriate error message for duplicate OPPs.
@@ -996,49 +1040,22 @@
  *  should be considered an error by the callers of _opp_add().
  */
 int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
-	     struct opp_table *opp_table)
+	     struct opp_table *opp_table, bool rate_not_available)
 {
-	struct dev_pm_opp *opp;
 	struct list_head *head;
 	int ret;
 
-	/*
-	 * Insert new OPP in order of increasing frequency and discard if
-	 * already present.
-	 *
-	 * Need to use &opp_table->opp_list in the condition part of the 'for'
-	 * loop, don't replace it with head otherwise it will become an infinite
-	 * loop.
-	 */
 	mutex_lock(&opp_table->lock);
 	head = &opp_table->opp_list;
 
-	list_for_each_entry(opp, &opp_table->opp_list, node) {
-		if (new_opp->rate > opp->rate) {
-			head = &opp->node;
-			continue;
+	if (likely(!rate_not_available)) {
+		ret = _opp_is_duplicate(dev, new_opp, opp_table, &head);
+		if (ret) {
+			mutex_unlock(&opp_table->lock);
+			return ret;
 		}
-
-		if (new_opp->rate < opp->rate)
-			break;
-
-		/* Duplicate OPPs */
-		dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
-			 __func__, opp->rate, opp->supplies[0].u_volt,
-			 opp->available, new_opp->rate,
-			 new_opp->supplies[0].u_volt, new_opp->available);
-
-		/* Should we compare voltages for all regulators here ? */
-		ret = opp->available &&
-		      new_opp->supplies[0].u_volt == opp->supplies[0].u_volt ? -EBUSY : -EEXIST;
-
-		mutex_unlock(&opp_table->lock);
-		return ret;
 	}
 
-	if (opp_table->get_pstate)
-		new_opp->pstate = opp_table->get_pstate(dev, new_opp->rate);
-
 	list_add(&new_opp->node, head);
 	mutex_unlock(&opp_table->lock);
 
@@ -1104,7 +1121,7 @@
 	new_opp->available = true;
 	new_opp->dynamic = dynamic;
 
-	ret = _opp_add(dev, new_opp, opp_table);
+	ret = _opp_add(dev, new_opp, opp_table, false);
 	if (ret) {
 		/* Don't return error for duplicate OPPs */
 		if (ret == -EBUSY)
@@ -1140,7 +1157,6 @@
 			const u32 *versions, unsigned int count)
 {
 	struct opp_table *opp_table;
-	int ret;
 
 	opp_table = dev_pm_opp_get_opp_table(dev);
 	if (!opp_table)
@@ -1149,29 +1165,20 @@
 	/* Make sure there are no concurrent readers while updating opp_table */
 	WARN_ON(!list_empty(&opp_table->opp_list));
 
-	/* Do we already have a version hierarchy associated with opp_table? */
-	if (opp_table->supported_hw) {
-		dev_err(dev, "%s: Already have supported hardware list\n",
-			__func__);
-		ret = -EBUSY;
-		goto err;
-	}
+	/* Another CPU that shares the OPP table has set the property ? */
+	if (opp_table->supported_hw)
+		return opp_table;
 
 	opp_table->supported_hw = kmemdup(versions, count * sizeof(*versions),
 					GFP_KERNEL);
 	if (!opp_table->supported_hw) {
-		ret = -ENOMEM;
-		goto err;
+		dev_pm_opp_put_opp_table(opp_table);
+		return ERR_PTR(-ENOMEM);
 	}
 
 	opp_table->supported_hw_count = count;
 
 	return opp_table;
-
-err:
-	dev_pm_opp_put_opp_table(opp_table);
-
-	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_set_supported_hw);
 
@@ -1188,12 +1195,6 @@
 	/* Make sure there are no concurrent readers while updating opp_table */
 	WARN_ON(!list_empty(&opp_table->opp_list));
 
-	if (!opp_table->supported_hw) {
-		pr_err("%s: Doesn't have supported hardware list\n",
-		       __func__);
-		return;
-	}
-
 	kfree(opp_table->supported_hw);
 	opp_table->supported_hw = NULL;
 	opp_table->supported_hw_count = 0;
@@ -1215,7 +1216,6 @@
 struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 {
 	struct opp_table *opp_table;
-	int ret;
 
 	opp_table = dev_pm_opp_get_opp_table(dev);
 	if (!opp_table)
@@ -1224,26 +1224,17 @@
 	/* Make sure there are no concurrent readers while updating opp_table */
 	WARN_ON(!list_empty(&opp_table->opp_list));
 
-	/* Do we already have a prop-name associated with opp_table? */
-	if (opp_table->prop_name) {
-		dev_err(dev, "%s: Already have prop-name %s\n", __func__,
-			opp_table->prop_name);
-		ret = -EBUSY;
-		goto err;
-	}
+	/* Another CPU that shares the OPP table has set the property ? */
+	if (opp_table->prop_name)
+		return opp_table;
 
 	opp_table->prop_name = kstrdup(name, GFP_KERNEL);
 	if (!opp_table->prop_name) {
-		ret = -ENOMEM;
-		goto err;
+		dev_pm_opp_put_opp_table(opp_table);
+		return ERR_PTR(-ENOMEM);
 	}
 
 	return opp_table;
-
-err:
-	dev_pm_opp_put_opp_table(opp_table);
-
-	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_set_prop_name);
 
@@ -1260,11 +1251,6 @@
 	/* Make sure there are no concurrent readers while updating opp_table */
 	WARN_ON(!list_empty(&opp_table->opp_list));
 
-	if (!opp_table->prop_name) {
-		pr_err("%s: Doesn't have a prop-name\n", __func__);
-		return;
-	}
-
 	kfree(opp_table->prop_name);
 	opp_table->prop_name = NULL;
 
@@ -1334,11 +1320,9 @@
 		goto err;
 	}
 
-	/* Already have regulators set */
-	if (opp_table->regulators) {
-		ret = -EBUSY;
-		goto err;
-	}
+	/* Another CPU that shares the OPP table has set the regulators ? */
+	if (opp_table->regulators)
+		return opp_table;
 
 	opp_table->regulators = kmalloc_array(count,
 					      sizeof(*opp_table->regulators),
@@ -1392,10 +1376,8 @@
 {
 	int i;
 
-	if (!opp_table->regulators) {
-		pr_err("%s: Doesn't have regulators set\n", __func__);
-		return;
-	}
+	if (!opp_table->regulators)
+		goto put_opp_table;
 
 	/* Make sure there are no concurrent readers while updating opp_table */
 	WARN_ON(!list_empty(&opp_table->opp_list));
@@ -1409,6 +1391,7 @@
 	opp_table->regulators = NULL;
 	opp_table->regulator_count = 0;
 
+put_opp_table:
 	dev_pm_opp_put_opp_table(opp_table);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_put_regulators);
@@ -1494,7 +1477,6 @@
 			int (*set_opp)(struct dev_pm_set_opp_data *data))
 {
 	struct opp_table *opp_table;
-	int ret;
 
 	if (!set_opp)
 		return ERR_PTR(-EINVAL);
@@ -1505,24 +1487,15 @@
 
 	/* This should be called before OPPs are initialized */
 	if (WARN_ON(!list_empty(&opp_table->opp_list))) {
-		ret = -EBUSY;
-		goto err;
+		dev_pm_opp_put_opp_table(opp_table);
+		return ERR_PTR(-EBUSY);
 	}
 
-	/* Already have custom set_opp helper */
-	if (WARN_ON(opp_table->set_opp)) {
-		ret = -EBUSY;
-		goto err;
-	}
-
-	opp_table->set_opp = set_opp;
+	/* Another CPU that shares the OPP table has set the helper ? */
+	if (!opp_table->set_opp)
+		opp_table->set_opp = set_opp;
 
 	return opp_table;
-
-err:
-	dev_pm_opp_put_opp_table(opp_table);
-
-	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_register_set_opp_helper);
 
@@ -1535,97 +1508,15 @@
  */
 void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table)
 {
-	if (!opp_table->set_opp) {
-		pr_err("%s: Doesn't have custom set_opp helper set\n",
-		       __func__);
-		return;
-	}
-
 	/* Make sure there are no concurrent readers while updating opp_table */
 	WARN_ON(!list_empty(&opp_table->opp_list));
 
 	opp_table->set_opp = NULL;
-
 	dev_pm_opp_put_opp_table(opp_table);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_unregister_set_opp_helper);
 
 /**
- * dev_pm_opp_register_get_pstate_helper() - Register get_pstate() helper.
- * @dev: Device for which the helper is getting registered.
- * @get_pstate: Helper.
- *
- * TODO: Remove this callback after the same information is available via Device
- * Tree.
- *
- * This allows a platform to initialize the performance states of individual
- * OPPs for its devices, until we get similar information directly from DT.
- *
- * This must be called before the OPPs are initialized for the device.
- */
-struct opp_table *dev_pm_opp_register_get_pstate_helper(struct device *dev,
-		int (*get_pstate)(struct device *dev, unsigned long rate))
-{
-	struct opp_table *opp_table;
-	int ret;
-
-	if (!get_pstate)
-		return ERR_PTR(-EINVAL);
-
-	opp_table = dev_pm_opp_get_opp_table(dev);
-	if (!opp_table)
-		return ERR_PTR(-ENOMEM);
-
-	/* This should be called before OPPs are initialized */
-	if (WARN_ON(!list_empty(&opp_table->opp_list))) {
-		ret = -EBUSY;
-		goto err;
-	}
-
-	/* Already have genpd_performance_state set */
-	if (WARN_ON(opp_table->genpd_performance_state)) {
-		ret = -EBUSY;
-		goto err;
-	}
-
-	opp_table->genpd_performance_state = true;
-	opp_table->get_pstate = get_pstate;
-
-	return opp_table;
-
-err:
-	dev_pm_opp_put_opp_table(opp_table);
-
-	return ERR_PTR(ret);
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_register_get_pstate_helper);
-
-/**
- * dev_pm_opp_unregister_get_pstate_helper() - Releases resources blocked for
- *					   get_pstate() helper
- * @opp_table: OPP table returned from dev_pm_opp_register_get_pstate_helper().
- *
- * Release resources blocked for platform specific get_pstate() helper.
- */
-void dev_pm_opp_unregister_get_pstate_helper(struct opp_table *opp_table)
-{
-	if (!opp_table->genpd_performance_state) {
-		pr_err("%s: Doesn't have performance states set\n",
-		       __func__);
-		return;
-	}
-
-	/* Make sure there are no concurrent readers while updating opp_table */
-	WARN_ON(!list_empty(&opp_table->opp_list));
-
-	opp_table->genpd_performance_state = false;
-	opp_table->get_pstate = NULL;
-
-	dev_pm_opp_put_opp_table(opp_table);
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_unregister_get_pstate_helper);
-
-/**
  * dev_pm_opp_add()  - Add an OPP table from a table definitions
  * @dev:	device for which we do this operation
  * @freq:	Frequency in Hz for this OPP
diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c
index b03c035..e6828e5 100644
--- a/drivers/opp/debugfs.c
+++ b/drivers/opp/debugfs.c
@@ -77,10 +77,21 @@
 {
 	struct dentry *pdentry = opp_table->dentry;
 	struct dentry *d;
+	unsigned long id;
 	char name[25];	/* 20 chars for 64 bit value + 5 (opp:\0) */
 
-	/* Rate is unique to each OPP, use it to give opp-name */
-	snprintf(name, sizeof(name), "opp:%lu", opp->rate);
+	/*
+	 * Get directory name for OPP.
+	 *
+	 * - Normally rate is unique to each OPP, use it to get unique opp-name.
+	 * - For some devices rate isn't available, use index instead.
+	 */
+	if (likely(opp->rate))
+		id = opp->rate;
+	else
+		id = _get_opp_count(opp_table);
+
+	snprintf(name, sizeof(name), "opp:%lu", id);
 
 	/* Create per-opp directory */
 	d = debugfs_create_dir(name, pdentry);
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index cb716aa..7af0dde 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/device.h>
 #include <linux/of_device.h>
+#include <linux/pm_domain.h>
 #include <linux/slab.h>
 #include <linux/export.h>
 
@@ -250,20 +251,17 @@
 
 /* Returns opp descriptor node for a device node, caller must
  * do of_node_put() */
-static struct device_node *_opp_of_get_opp_desc_node(struct device_node *np)
+static struct device_node *_opp_of_get_opp_desc_node(struct device_node *np,
+						     int index)
 {
-	/*
-	 * There should be only ONE phandle present in "operating-points-v2"
-	 * property.
-	 */
-
-	return of_parse_phandle(np, "operating-points-v2", 0);
+	/* "operating-points-v2" can be an array for power domain providers */
+	return of_parse_phandle(np, "operating-points-v2", index);
 }
 
 /* Returns opp descriptor node for a device, caller must do of_node_put() */
 struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev)
 {
-	return _opp_of_get_opp_desc_node(dev->of_node);
+	return _opp_of_get_opp_desc_node(dev->of_node, 0);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_opp_desc_node);
 
@@ -289,9 +287,10 @@
 			      struct device_node *np)
 {
 	struct dev_pm_opp *new_opp;
-	u64 rate;
+	u64 rate = 0;
 	u32 val;
 	int ret;
+	bool rate_not_available = false;
 
 	new_opp = _opp_allocate(opp_table);
 	if (!new_opp)
@@ -299,8 +298,21 @@
 
 	ret = of_property_read_u64(np, "opp-hz", &rate);
 	if (ret < 0) {
-		dev_err(dev, "%s: opp-hz not found\n", __func__);
-		goto free_opp;
+		/* "opp-hz" is optional for devices like power domains. */
+		if (!of_find_property(dev->of_node, "#power-domain-cells",
+				      NULL)) {
+			dev_err(dev, "%s: opp-hz not found\n", __func__);
+			goto free_opp;
+		}
+
+		rate_not_available = true;
+	} else {
+		/*
+		 * Rate is defined as an unsigned long in clk API, and so
+		 * casting explicitly to its type. Must be fixed once rate is 64
+		 * bit guaranteed in clk API.
+		 */
+		new_opp->rate = (unsigned long)rate;
 	}
 
 	/* Check if the OPP supports hardware's hierarchy of versions or not */
@@ -309,12 +321,6 @@
 		goto free_opp;
 	}
 
-	/*
-	 * Rate is defined as an unsigned long in clk API, and so casting
-	 * explicitly to its type. Must be fixed once rate is 64 bit
-	 * guaranteed in clk API.
-	 */
-	new_opp->rate = (unsigned long)rate;
 	new_opp->turbo = of_property_read_bool(np, "turbo-mode");
 
 	new_opp->np = np;
@@ -324,11 +330,13 @@
 	if (!of_property_read_u32(np, "clock-latency-ns", &val))
 		new_opp->clock_latency_ns = val;
 
+	new_opp->pstate = of_genpd_opp_to_performance_state(dev, np);
+
 	ret = opp_parse_supplies(new_opp, dev, opp_table);
 	if (ret)
 		goto free_opp;
 
-	ret = _opp_add(dev, new_opp, opp_table);
+	ret = _opp_add(dev, new_opp, opp_table, rate_not_available);
 	if (ret) {
 		/* Don't return error for duplicate OPPs */
 		if (ret == -EBUSY)
@@ -374,7 +382,8 @@
 {
 	struct device_node *np;
 	struct opp_table *opp_table;
-	int ret = 0, count = 0;
+	int ret = 0, count = 0, pstate_count = 0;
+	struct dev_pm_opp *opp;
 
 	opp_table = _managed_opp(opp_np);
 	if (opp_table) {
@@ -408,6 +417,20 @@
 		goto put_opp_table;
 	}
 
+	list_for_each_entry(opp, &opp_table->opp_list, node)
+		pstate_count += !!opp->pstate;
+
+	/* Either all or none of the nodes shall have performance state set */
+	if (pstate_count && pstate_count != count) {
+		dev_err(dev, "Not all nodes have performance state set (%d: %d)\n",
+			count, pstate_count);
+		ret = -ENOENT;
+		goto put_opp_table;
+	}
+
+	if (pstate_count)
+		opp_table->genpd_performance_state = true;
+
 	opp_table->np = opp_np;
 	if (of_property_read_bool(opp_np, "opp-shared"))
 		opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED;
@@ -509,6 +532,54 @@
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table);
 
+/**
+ * dev_pm_opp_of_add_table_indexed() - Initialize indexed opp table from device tree
+ * @dev:	device pointer used to lookup OPP table.
+ * @index:	Index number.
+ *
+ * Register the initial OPP table with the OPP library for given device only
+ * using the "operating-points-v2" property.
+ *
+ * Return:
+ * 0		On success OR
+ *		Duplicate OPPs (both freq and volt are same) and opp->available
+ * -EEXIST	Freq are same and volt are different OR
+ *		Duplicate OPPs (both freq and volt are same) and !opp->available
+ * -ENOMEM	Memory allocation failure
+ * -ENODEV	when 'operating-points' property is not found or is invalid data
+ *		in device node.
+ * -ENODATA	when empty 'operating-points' property is found
+ * -EINVAL	when invalid entries are found in opp-v2 table
+ */
+int dev_pm_opp_of_add_table_indexed(struct device *dev, int index)
+{
+	struct device_node *opp_np;
+	int ret, count;
+
+again:
+	opp_np = _opp_of_get_opp_desc_node(dev->of_node, index);
+	if (!opp_np) {
+		/*
+		 * If only one phandle is present, then the same OPP table
+		 * applies for all index requests.
+		 */
+		count = of_count_phandle_with_args(dev->of_node,
+						   "operating-points-v2", NULL);
+		if (count == 1 && index) {
+			index = 0;
+			goto again;
+		}
+
+		return -ENODEV;
+	}
+
+	ret = _of_add_opp_table_v2(dev, opp_np);
+	of_node_put(opp_np);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table_indexed);
+
 /* CPU device specific helpers */
 
 /**
@@ -613,7 +684,7 @@
 		}
 
 		/* Get OPP descriptor node */
-		tmp_np = _opp_of_get_opp_desc_node(cpu_np);
+		tmp_np = _opp_of_get_opp_desc_node(cpu_np, 0);
 		of_node_put(cpu_np);
 		if (!tmp_np) {
 			pr_err("%pOF: Couldn't find opp node\n", cpu_np);
@@ -633,3 +704,76 @@
 	return ret;
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_sharing_cpus);
+
+/**
+ * of_dev_pm_opp_find_required_opp() - Search for required OPP.
+ * @dev: The device whose OPP node is referenced by the 'np' DT node.
+ * @np: Node that contains the "required-opps" property.
+ *
+ * Returns the OPP of the device 'dev', whose phandle is present in the "np"
+ * node. Although the "required-opps" property supports having multiple
+ * phandles, this helper routine only parses the very first phandle in the list.
+ *
+ * Return: Matching opp, else returns ERR_PTR in case of error and should be
+ * handled using IS_ERR.
+ *
+ * The callers are required to call dev_pm_opp_put() for the returned OPP after
+ * use.
+ */
+struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev,
+						   struct device_node *np)
+{
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ENODEV);
+	struct device_node *required_np;
+	struct opp_table *opp_table;
+
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table))
+		return ERR_CAST(opp_table);
+
+	required_np = of_parse_phandle(np, "required-opps", 0);
+	if (unlikely(!required_np)) {
+		dev_err(dev, "Unable to parse required-opps\n");
+		goto put_opp_table;
+	}
+
+	mutex_lock(&opp_table->lock);
+
+	list_for_each_entry(temp_opp, &opp_table->opp_list, node) {
+		if (temp_opp->available && temp_opp->np == required_np) {
+			opp = temp_opp;
+
+			/* Increment the reference count of OPP */
+			dev_pm_opp_get(opp);
+			break;
+		}
+	}
+
+	mutex_unlock(&opp_table->lock);
+
+	of_node_put(required_np);
+put_opp_table:
+	dev_pm_opp_put_opp_table(opp_table);
+
+	return opp;
+}
+EXPORT_SYMBOL_GPL(of_dev_pm_opp_find_required_opp);
+
+/**
+ * dev_pm_opp_get_of_node() - Gets the DT node corresponding to an opp
+ * @opp:	opp for which DT node has to be returned for
+ *
+ * Return: DT node corresponding to the opp, else 0 on success.
+ *
+ * The caller needs to put the node with of_node_put() after using it.
+ */
+struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp)
+{
+	if (IS_ERR_OR_NULL(opp)) {
+		pr_err("%s: Invalid parameters\n", __func__);
+		return NULL;
+	}
+
+	return of_node_get(opp->np);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index 4d00061..7c540fd 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -140,7 +140,6 @@
  * @genpd_performance_state: Device's power domain support performance state.
  * @set_opp: Platform specific set_opp callback
  * @set_opp_data: Data to be passed to set_opp callback
- * @get_pstate: Platform specific get_pstate callback
  * @dentry:	debugfs dentry pointer of the real device directory (not links).
  * @dentry_name: Name of the real dentry.
  *
@@ -178,7 +177,6 @@
 
 	int (*set_opp)(struct dev_pm_set_opp_data *data);
 	struct dev_pm_set_opp_data *set_opp_data;
-	int (*get_pstate)(struct device *dev, unsigned long rate);
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dentry;
@@ -187,14 +185,16 @@
 };
 
 /* Routines internal to opp core */
+void dev_pm_opp_get(struct dev_pm_opp *opp);
 void _get_opp_table_kref(struct opp_table *opp_table);
+int _get_opp_count(struct opp_table *opp_table);
 struct opp_table *_find_opp_table(struct device *dev);
 struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table);
 void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev, bool remove_all);
 void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all);
 struct dev_pm_opp *_opp_allocate(struct opp_table *opp_table);
 void _opp_free(struct dev_pm_opp *opp);
-int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table);
+int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table, bool rate_not_available);
 int _opp_add_v1(struct opp_table *opp_table, struct device *dev, unsigned long freq, long u_volt, bool dynamic);
 void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of);
 struct opp_table *_add_opp_table(struct device *dev);
diff --git a/drivers/parisc/Kconfig b/drivers/parisc/Kconfig
index 3a102a8..5a48b56 100644
--- a/drivers/parisc/Kconfig
+++ b/drivers/parisc/Kconfig
@@ -103,11 +103,6 @@
 	depends on PCI_LBA
 	default PCI_LBA
 
-config IOMMU_HELPER
-	bool
-	depends on IOMMU_SBA || IOMMU_CCIO
-	default y
-
 source "drivers/pcmcia/Kconfig"
 
 endmenu
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 297599f..6148236 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1108,19 +1108,6 @@
 	return 0;
 }
 
-static int ccio_proc_info_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, &ccio_proc_info, NULL);
-}
-
-static const struct file_operations ccio_proc_info_fops = {
-	.owner = THIS_MODULE,
-	.open = ccio_proc_info_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
 static int ccio_proc_bitmap_info(struct seq_file *m, void *p)
 {
 	struct ioc *ioc = ioc_list;
@@ -1135,19 +1122,6 @@
 
 	return 0;
 }
-
-static int ccio_proc_bitmap_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, &ccio_proc_bitmap_info, NULL);
-}
-
-static const struct file_operations ccio_proc_bitmap_fops = {
-	.owner = THIS_MODULE,
-	.open = ccio_proc_bitmap_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
 #endif /* CONFIG_PROC_FS */
 
 /**
@@ -1589,15 +1563,13 @@
 
 #ifdef CONFIG_PROC_FS
 	if (ioc_count == 0) {
-		proc_create(MODULE_NAME, 0, proc_runway_root,
-			    &ccio_proc_info_fops);
-		proc_create(MODULE_NAME"-bitmap", 0, proc_runway_root,
-			    &ccio_proc_bitmap_fops);
+		proc_create_single(MODULE_NAME, 0, proc_runway_root,
+				ccio_proc_info);
+		proc_create_single(MODULE_NAME"-bitmap", 0, proc_runway_root,
+				ccio_proc_bitmap_info);
 	}
 #endif
 	ioc_count++;
-
-	parisc_has_iommu();
 	return 0;
 }
 
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index 0a9c762..11de0ec 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1864,20 +1864,6 @@
 }
 
 static int
-sba_proc_open(struct inode *i, struct file *f)
-{
-	return single_open(f, &sba_proc_info, NULL);
-}
-
-static const struct file_operations sba_proc_fops = {
-	.owner = THIS_MODULE,
-	.open = sba_proc_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-static int
 sba_proc_bitmap_info(struct seq_file *m, void *p)
 {
 	struct sba_device *sba_dev = sba_list;
@@ -1889,20 +1875,6 @@
 
 	return 0;
 }
-
-static int
-sba_proc_bitmap_open(struct inode *i, struct file *f)
-{
-	return single_open(f, &sba_proc_bitmap_info, NULL);
-}
-
-static const struct file_operations sba_proc_bitmap_fops = {
-	.owner = THIS_MODULE,
-	.open = sba_proc_bitmap_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
 #endif /* CONFIG_PROC_FS */
 
 static const struct parisc_device_id sba_tbl[] __initconst = {
@@ -2014,11 +1986,9 @@
 		break;
 	}
 
-	proc_create("sba_iommu", 0, root, &sba_proc_fops);
-	proc_create("sba_iommu-bitmap", 0, root, &sba_proc_bitmap_fops);
+	proc_create_single("sba_iommu", 0, root, sba_proc_info);
+	proc_create_single("sba_iommu-bitmap", 0, root, sba_proc_bitmap_info);
 #endif
-
-	parisc_has_iommu();
 	return 0;
 }
 
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 34b56a8..29a487f 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -5,10 +5,6 @@
 
 source "drivers/pci/pcie/Kconfig"
 
-config PCI_BUS_ADDR_T_64BIT
-	def_bool y if (ARCH_DMA_ADDR_T_64BIT || 64BIT)
-	depends on PCI
-
 config PCI_MSI
 	bool "Message Signaled Interrupts (MSI and MSI-X)"
 	depends on PCI
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index bc2ded4..35b7fc8 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -120,7 +120,7 @@
 EXPORT_SYMBOL_GPL(devm_request_pci_bus_resources);
 
 static struct pci_bus_region pci_32_bit = {0, 0xffffffffULL};
-#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 static struct pci_bus_region pci_64_bit = {0,
 				(pci_bus_addr_t) 0xffffffffffffffffULL};
 static struct pci_bus_region pci_high = {(pci_bus_addr_t) 0x100000000ULL,
@@ -230,7 +230,7 @@
 					  resource_size_t),
 		void *alignf_data)
 {
-#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 	int rc;
 
 	if (res->flags & IORESOURCE_MEM_64) {
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 3025063..f45b74f 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1434,6 +1434,9 @@
 {
 	struct irq_domain *domain;
 
+	if (WARN_ON(info->flags & MSI_FLAG_LEVEL_CAPABLE))
+		info->flags &= ~MSI_FLAG_LEVEL_CAPABLE;
+
 	if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
 		pci_msi_domain_update_dom_ops(info);
 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index b9a1311..18ba62c 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -16,6 +16,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/suspend.h>
 #include <linux/kexec.h>
+#include <linux/of_device.h>
+#include <linux/acpi.h>
 #include "pci.h"
 #include "pcie/portdrv.h"
 
@@ -753,10 +755,11 @@
 	 * better to resume the device from runtime suspend here.
 	 */
 	if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) ||
-	    !pci_dev_keep_suspended(pci_dev))
+	    !pci_dev_keep_suspended(pci_dev)) {
 		pm_runtime_resume(dev);
+		pci_dev->state_saved = false;
+	}
 
-	pci_dev->state_saved = false;
 	if (pm->suspend) {
 		pci_power_t prev = pci_dev->current_state;
 		int error;
@@ -1577,6 +1580,35 @@
 	return pci_num_vf(to_pci_dev(dev));
 }
 
+/**
+ * pci_dma_configure - Setup DMA configuration
+ * @dev: ptr to dev structure
+ *
+ * Function to update PCI devices's DMA configuration using the same
+ * info from the OF node or ACPI node of host bridge's parent (if any).
+ */
+static int pci_dma_configure(struct device *dev)
+{
+	struct device *bridge;
+	int ret = 0;
+
+	bridge = pci_get_host_bridge_device(to_pci_dev(dev));
+
+	if (IS_ENABLED(CONFIG_OF) && bridge->parent &&
+	    bridge->parent->of_node) {
+		ret = of_dma_configure(dev, bridge->parent->of_node, true);
+	} else if (has_acpi_companion(bridge)) {
+		struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
+		enum dev_dma_attr attr = acpi_get_dma_attr(adev);
+
+		if (attr != DEV_DMA_NOT_SUPPORTED)
+			ret = acpi_dma_configure(dev, attr);
+	}
+
+	pci_put_host_bridge_device(bridge);
+	return ret;
+}
+
 struct bus_type pci_bus_type = {
 	.name		= "pci",
 	.match		= pci_bus_match,
@@ -1589,7 +1621,7 @@
 	.drv_groups	= pci_drv_groups,
 	.pm		= PCI_PM_OPS_PTR,
 	.num_vf		= pci_bus_num_vf,
-	.force_dma	= true,
+	.dma_configure	= pci_dma_configure,
 };
 EXPORT_SYMBOL(pci_bus_type);
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index dbfe7c4..e90cf5c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2025,8 +2025,7 @@
 
 	if (platform_pci_power_manageable(dev)) {
 		/*
-		 * Call the platform to choose the target state of the device
-		 * and enable wake-up from this state if supported.
+		 * Call the platform to find the target state for the device.
 		 */
 		pci_power_t state = platform_pci_choose_state(dev);
 
@@ -2059,8 +2058,7 @@
 	if (wakeup) {
 		/*
 		 * Find the deepest state from which the device can generate
-		 * wake-up events, make it the target state and enable device
-		 * to generate PME#.
+		 * PME#.
 		 */
 		if (dev->pme_support) {
 			while (target_state
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index 1ee8927..7ac035af 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -435,25 +435,12 @@
 	return 0;
 }
 
-static int proc_bus_pci_dev_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &proc_bus_pci_devices_op);
-}
-
-static const struct file_operations proc_bus_pci_dev_operations = {
-	.owner		= THIS_MODULE,
-	.open		= proc_bus_pci_dev_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static int __init pci_proc_init(void)
 {
 	struct pci_dev *dev = NULL;
 	proc_bus_pci_dir = proc_mkdir("bus/pci", NULL);
-	proc_create("devices", 0, proc_bus_pci_dir,
-		    &proc_bus_pci_dev_operations);
+	proc_create_seq("devices", 0, proc_bus_pci_dir,
+		    &proc_bus_pci_devices_op);
 	proc_initialized = 1;
 	for_each_pci_dev(dev)
 		pci_proc_attach_device(dev);
diff --git a/drivers/phy/mediatek/Kconfig b/drivers/phy/mediatek/Kconfig
index 88ab4e2..8857d00 100644
--- a/drivers/phy/mediatek/Kconfig
+++ b/drivers/phy/mediatek/Kconfig
@@ -12,3 +12,12 @@
 	  different banks layout, the T-PHY with shared banks between
 	  multi-ports is first version, otherwise is second veriosn,
 	  so you can easily distinguish them by banks layout.
+
+config PHY_MTK_XSPHY
+    tristate "MediaTek XS-PHY Driver"
+    depends on ARCH_MEDIATEK && OF
+    select GENERIC_PHY
+    help
+	  Enable this to support the SuperSpeedPlus XS-PHY transceiver for
+	  USB3.1 GEN2 controllers on MediaTek chips. The driver supports
+	  multiple USB2.0, USB3.1 GEN2 ports.
diff --git a/drivers/phy/mediatek/Makefile b/drivers/phy/mediatek/Makefile
index 763a92e..e5074b6 100644
--- a/drivers/phy/mediatek/Makefile
+++ b/drivers/phy/mediatek/Makefile
@@ -3,3 +3,4 @@
 #
 
 obj-$(CONFIG_PHY_MTK_TPHY)		+= phy-mtk-tphy.o
+obj-$(CONFIG_PHY_MTK_XSPHY)		+= phy-mtk-xsphy.o
diff --git a/drivers/phy/mediatek/phy-mtk-xsphy.c b/drivers/phy/mediatek/phy-mtk-xsphy.c
new file mode 100644
index 0000000..020cd02
--- /dev/null
+++ b/drivers/phy/mediatek/phy-mtk-xsphy.c
@@ -0,0 +1,600 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MediaTek USB3.1 gen2 xsphy Driver
+ *
+ * Copyright (c) 2018 MediaTek Inc.
+ * Author: Chunfeng Yun <chunfeng.yun@mediatek.com>
+ *
+ */
+
+#include <dt-bindings/phy/phy.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+
+/* u2 phy banks */
+#define SSUSB_SIFSLV_MISC		0x000
+#define SSUSB_SIFSLV_U2FREQ		0x100
+#define SSUSB_SIFSLV_U2PHY_COM	0x300
+
+/* u3 phy shared banks */
+#define SSPXTP_SIFSLV_DIG_GLB		0x000
+#define SSPXTP_SIFSLV_PHYA_GLB		0x100
+
+/* u3 phy banks */
+#define SSPXTP_SIFSLV_DIG_LN_TOP	0x000
+#define SSPXTP_SIFSLV_DIG_LN_TX0	0x100
+#define SSPXTP_SIFSLV_DIG_LN_RX0	0x200
+#define SSPXTP_SIFSLV_DIG_LN_DAIF	0x300
+#define SSPXTP_SIFSLV_PHYA_LN		0x400
+
+#define XSP_U2FREQ_FMCR0	((SSUSB_SIFSLV_U2FREQ) + 0x00)
+#define P2F_RG_FREQDET_EN	BIT(24)
+#define P2F_RG_CYCLECNT		GENMASK(23, 0)
+#define P2F_RG_CYCLECNT_VAL(x)	((P2F_RG_CYCLECNT) & (x))
+
+#define XSP_U2FREQ_MMONR0  ((SSUSB_SIFSLV_U2FREQ) + 0x0c)
+
+#define XSP_U2FREQ_FMMONR1	((SSUSB_SIFSLV_U2FREQ) + 0x10)
+#define P2F_RG_FRCK_EN		BIT(8)
+#define P2F_USB_FM_VALID	BIT(0)
+
+#define XSP_USBPHYACR0	((SSUSB_SIFSLV_U2PHY_COM) + 0x00)
+#define P2A0_RG_INTR_EN	BIT(5)
+
+#define XSP_USBPHYACR1		((SSUSB_SIFSLV_U2PHY_COM) + 0x04)
+#define P2A1_RG_INTR_CAL		GENMASK(23, 19)
+#define P2A1_RG_INTR_CAL_VAL(x)	((0x1f & (x)) << 19)
+#define P2A1_RG_VRT_SEL			GENMASK(14, 12)
+#define P2A1_RG_VRT_SEL_VAL(x)	((0x7 & (x)) << 12)
+#define P2A1_RG_TERM_SEL		GENMASK(10, 8)
+#define P2A1_RG_TERM_SEL_VAL(x)	((0x7 & (x)) << 8)
+
+#define XSP_USBPHYACR5		((SSUSB_SIFSLV_U2PHY_COM) + 0x014)
+#define P2A5_RG_HSTX_SRCAL_EN	BIT(15)
+#define P2A5_RG_HSTX_SRCTRL		GENMASK(14, 12)
+#define P2A5_RG_HSTX_SRCTRL_VAL(x)	((0x7 & (x)) << 12)
+
+#define XSP_USBPHYACR6		((SSUSB_SIFSLV_U2PHY_COM) + 0x018)
+#define P2A6_RG_BC11_SW_EN	BIT(23)
+#define P2A6_RG_OTG_VBUSCMP_EN	BIT(20)
+
+#define XSP_U2PHYDTM1		((SSUSB_SIFSLV_U2PHY_COM) + 0x06C)
+#define P2D_FORCE_IDDIG		BIT(9)
+#define P2D_RG_VBUSVALID	BIT(5)
+#define P2D_RG_SESSEND		BIT(4)
+#define P2D_RG_AVALID		BIT(2)
+#define P2D_RG_IDDIG		BIT(1)
+
+#define SSPXTP_PHYA_GLB_00		((SSPXTP_SIFSLV_PHYA_GLB) + 0x00)
+#define RG_XTP_GLB_BIAS_INTR_CTRL		GENMASK(21, 16)
+#define RG_XTP_GLB_BIAS_INTR_CTRL_VAL(x)	((0x3f & (x)) << 16)
+
+#define SSPXTP_PHYA_LN_04	((SSPXTP_SIFSLV_PHYA_LN) + 0x04)
+#define RG_XTP_LN0_TX_IMPSEL		GENMASK(4, 0)
+#define RG_XTP_LN0_TX_IMPSEL_VAL(x)	(0x1f & (x))
+
+#define SSPXTP_PHYA_LN_14	((SSPXTP_SIFSLV_PHYA_LN) + 0x014)
+#define RG_XTP_LN0_RX_IMPSEL		GENMASK(4, 0)
+#define RG_XTP_LN0_RX_IMPSEL_VAL(x)	(0x1f & (x))
+
+#define XSP_REF_CLK		26	/* MHZ */
+#define XSP_SLEW_RATE_COEF	17
+#define XSP_SR_COEF_DIVISOR	1000
+#define XSP_FM_DET_CYCLE_CNT	1024
+
+struct xsphy_instance {
+	struct phy *phy;
+	void __iomem *port_base;
+	struct clk *ref_clk;	/* reference clock of anolog phy */
+	u32 index;
+	u32 type;
+	/* only for HQA test */
+	int efuse_intr;
+	int efuse_tx_imp;
+	int efuse_rx_imp;
+	/* u2 eye diagram */
+	int eye_src;
+	int eye_vrt;
+	int eye_term;
+};
+
+struct mtk_xsphy {
+	struct device *dev;
+	void __iomem *glb_base;	/* only shared u3 sif */
+	struct xsphy_instance **phys;
+	int nphys;
+	int src_ref_clk; /* MHZ, reference clock for slew rate calibrate */
+	int src_coef;    /* coefficient for slew rate calibrate */
+};
+
+static void u2_phy_slew_rate_calibrate(struct mtk_xsphy *xsphy,
+					struct xsphy_instance *inst)
+{
+	void __iomem *pbase = inst->port_base;
+	int calib_val;
+	int fm_out;
+	u32 tmp;
+
+	/* use force value */
+	if (inst->eye_src)
+		return;
+
+	/* enable USB ring oscillator */
+	tmp = readl(pbase + XSP_USBPHYACR5);
+	tmp |= P2A5_RG_HSTX_SRCAL_EN;
+	writel(tmp, pbase + XSP_USBPHYACR5);
+	udelay(1);	/* wait clock stable */
+
+	/* enable free run clock */
+	tmp = readl(pbase + XSP_U2FREQ_FMMONR1);
+	tmp |= P2F_RG_FRCK_EN;
+	writel(tmp, pbase + XSP_U2FREQ_FMMONR1);
+
+	/* set cycle count as 1024 */
+	tmp = readl(pbase + XSP_U2FREQ_FMCR0);
+	tmp &= ~(P2F_RG_CYCLECNT);
+	tmp |= P2F_RG_CYCLECNT_VAL(XSP_FM_DET_CYCLE_CNT);
+	writel(tmp, pbase + XSP_U2FREQ_FMCR0);
+
+	/* enable frequency meter */
+	tmp = readl(pbase + XSP_U2FREQ_FMCR0);
+	tmp |= P2F_RG_FREQDET_EN;
+	writel(tmp, pbase + XSP_U2FREQ_FMCR0);
+
+	/* ignore return value */
+	readl_poll_timeout(pbase + XSP_U2FREQ_FMMONR1, tmp,
+			   (tmp & P2F_USB_FM_VALID), 10, 200);
+
+	fm_out = readl(pbase + XSP_U2FREQ_MMONR0);
+
+	/* disable frequency meter */
+	tmp = readl(pbase + XSP_U2FREQ_FMCR0);
+	tmp &= ~P2F_RG_FREQDET_EN;
+	writel(tmp, pbase + XSP_U2FREQ_FMCR0);
+
+	/* disable free run clock */
+	tmp = readl(pbase + XSP_U2FREQ_FMMONR1);
+	tmp &= ~P2F_RG_FRCK_EN;
+	writel(tmp, pbase + XSP_U2FREQ_FMMONR1);
+
+	if (fm_out) {
+		/* (1024 / FM_OUT) x reference clock frequency x coefficient */
+		tmp = xsphy->src_ref_clk * xsphy->src_coef;
+		tmp = (tmp * XSP_FM_DET_CYCLE_CNT) / fm_out;
+		calib_val = DIV_ROUND_CLOSEST(tmp, XSP_SR_COEF_DIVISOR);
+	} else {
+		/* if FM detection fail, set default value */
+		calib_val = 3;
+	}
+	dev_dbg(xsphy->dev, "phy.%d, fm_out:%d, calib:%d (clk:%d, coef:%d)\n",
+		inst->index, fm_out, calib_val,
+		xsphy->src_ref_clk, xsphy->src_coef);
+
+	/* set HS slew rate */
+	tmp = readl(pbase + XSP_USBPHYACR5);
+	tmp &= ~P2A5_RG_HSTX_SRCTRL;
+	tmp |= P2A5_RG_HSTX_SRCTRL_VAL(calib_val);
+	writel(tmp, pbase + XSP_USBPHYACR5);
+
+	/* disable USB ring oscillator */
+	tmp = readl(pbase + XSP_USBPHYACR5);
+	tmp &= ~P2A5_RG_HSTX_SRCAL_EN;
+	writel(tmp, pbase + XSP_USBPHYACR5);
+}
+
+static void u2_phy_instance_init(struct mtk_xsphy *xsphy,
+				 struct xsphy_instance *inst)
+{
+	void __iomem *pbase = inst->port_base;
+	u32 tmp;
+
+	/* DP/DM BC1.1 path Disable */
+	tmp = readl(pbase + XSP_USBPHYACR6);
+	tmp &= ~P2A6_RG_BC11_SW_EN;
+	writel(tmp, pbase + XSP_USBPHYACR6);
+
+	tmp = readl(pbase + XSP_USBPHYACR0);
+	tmp |= P2A0_RG_INTR_EN;
+	writel(tmp, pbase + XSP_USBPHYACR0);
+}
+
+static void u2_phy_instance_power_on(struct mtk_xsphy *xsphy,
+				     struct xsphy_instance *inst)
+{
+	void __iomem *pbase = inst->port_base;
+	u32 index = inst->index;
+	u32 tmp;
+
+	tmp = readl(pbase + XSP_USBPHYACR6);
+	tmp |= P2A6_RG_OTG_VBUSCMP_EN;
+	writel(tmp, pbase + XSP_USBPHYACR6);
+
+	tmp = readl(pbase + XSP_U2PHYDTM1);
+	tmp |= P2D_RG_VBUSVALID | P2D_RG_AVALID;
+	tmp &= ~P2D_RG_SESSEND;
+	writel(tmp, pbase + XSP_U2PHYDTM1);
+
+	dev_dbg(xsphy->dev, "%s(%d)\n", __func__, index);
+}
+
+static void u2_phy_instance_power_off(struct mtk_xsphy *xsphy,
+				      struct xsphy_instance *inst)
+{
+	void __iomem *pbase = inst->port_base;
+	u32 index = inst->index;
+	u32 tmp;
+
+	tmp = readl(pbase + XSP_USBPHYACR6);
+	tmp &= ~P2A6_RG_OTG_VBUSCMP_EN;
+	writel(tmp, pbase + XSP_USBPHYACR6);
+
+	tmp = readl(pbase + XSP_U2PHYDTM1);
+	tmp &= ~(P2D_RG_VBUSVALID | P2D_RG_AVALID);
+	tmp |= P2D_RG_SESSEND;
+	writel(tmp, pbase + XSP_U2PHYDTM1);
+
+	dev_dbg(xsphy->dev, "%s(%d)\n", __func__, index);
+}
+
+static void u2_phy_instance_set_mode(struct mtk_xsphy *xsphy,
+				     struct xsphy_instance *inst,
+				     enum phy_mode mode)
+{
+	u32 tmp;
+
+	tmp = readl(inst->port_base + XSP_U2PHYDTM1);
+	switch (mode) {
+	case PHY_MODE_USB_DEVICE:
+		tmp |= P2D_FORCE_IDDIG | P2D_RG_IDDIG;
+		break;
+	case PHY_MODE_USB_HOST:
+		tmp |= P2D_FORCE_IDDIG;
+		tmp &= ~P2D_RG_IDDIG;
+		break;
+	case PHY_MODE_USB_OTG:
+		tmp &= ~(P2D_FORCE_IDDIG | P2D_RG_IDDIG);
+		break;
+	default:
+		return;
+	}
+	writel(tmp, inst->port_base + XSP_U2PHYDTM1);
+}
+
+static void phy_parse_property(struct mtk_xsphy *xsphy,
+				struct xsphy_instance *inst)
+{
+	struct device *dev = &inst->phy->dev;
+
+	switch (inst->type) {
+	case PHY_TYPE_USB2:
+		device_property_read_u32(dev, "mediatek,efuse-intr",
+					 &inst->efuse_intr);
+		device_property_read_u32(dev, "mediatek,eye-src",
+					 &inst->eye_src);
+		device_property_read_u32(dev, "mediatek,eye-vrt",
+					 &inst->eye_vrt);
+		device_property_read_u32(dev, "mediatek,eye-term",
+					 &inst->eye_term);
+		dev_dbg(dev, "intr:%d, src:%d, vrt:%d, term:%d\n",
+			inst->efuse_intr, inst->eye_src,
+			inst->eye_vrt, inst->eye_term);
+		break;
+	case PHY_TYPE_USB3:
+		device_property_read_u32(dev, "mediatek,efuse-intr",
+					 &inst->efuse_intr);
+		device_property_read_u32(dev, "mediatek,efuse-tx-imp",
+					 &inst->efuse_tx_imp);
+		device_property_read_u32(dev, "mediatek,efuse-rx-imp",
+					 &inst->efuse_rx_imp);
+		dev_dbg(dev, "intr:%d, tx-imp:%d, rx-imp:%d\n",
+			inst->efuse_intr, inst->efuse_tx_imp,
+			inst->efuse_rx_imp);
+		break;
+	default:
+		dev_err(xsphy->dev, "incompatible phy type\n");
+		return;
+	}
+}
+
+static void u2_phy_props_set(struct mtk_xsphy *xsphy,
+			     struct xsphy_instance *inst)
+{
+	void __iomem *pbase = inst->port_base;
+	u32 tmp;
+
+	if (inst->efuse_intr) {
+		tmp = readl(pbase + XSP_USBPHYACR1);
+		tmp &= ~P2A1_RG_INTR_CAL;
+		tmp |= P2A1_RG_INTR_CAL_VAL(inst->efuse_intr);
+		writel(tmp, pbase + XSP_USBPHYACR1);
+	}
+
+	if (inst->eye_src) {
+		tmp = readl(pbase + XSP_USBPHYACR5);
+		tmp &= ~P2A5_RG_HSTX_SRCTRL;
+		tmp |= P2A5_RG_HSTX_SRCTRL_VAL(inst->eye_src);
+		writel(tmp, pbase + XSP_USBPHYACR5);
+	}
+
+	if (inst->eye_vrt) {
+		tmp = readl(pbase + XSP_USBPHYACR1);
+		tmp &= ~P2A1_RG_VRT_SEL;
+		tmp |= P2A1_RG_VRT_SEL_VAL(inst->eye_vrt);
+		writel(tmp, pbase + XSP_USBPHYACR1);
+	}
+
+	if (inst->eye_term) {
+		tmp = readl(pbase + XSP_USBPHYACR1);
+		tmp &= ~P2A1_RG_TERM_SEL;
+		tmp |= P2A1_RG_TERM_SEL_VAL(inst->eye_term);
+		writel(tmp, pbase + XSP_USBPHYACR1);
+	}
+}
+
+static void u3_phy_props_set(struct mtk_xsphy *xsphy,
+			     struct xsphy_instance *inst)
+{
+	void __iomem *pbase = inst->port_base;
+	u32 tmp;
+
+	if (inst->efuse_intr) {
+		tmp = readl(xsphy->glb_base + SSPXTP_PHYA_GLB_00);
+		tmp &= ~RG_XTP_GLB_BIAS_INTR_CTRL;
+		tmp |= RG_XTP_GLB_BIAS_INTR_CTRL_VAL(inst->efuse_intr);
+		writel(tmp, xsphy->glb_base + SSPXTP_PHYA_GLB_00);
+	}
+
+	if (inst->efuse_tx_imp) {
+		tmp = readl(pbase + SSPXTP_PHYA_LN_04);
+		tmp &= ~RG_XTP_LN0_TX_IMPSEL;
+		tmp |= RG_XTP_LN0_TX_IMPSEL_VAL(inst->efuse_tx_imp);
+		writel(tmp, pbase + SSPXTP_PHYA_LN_04);
+	}
+
+	if (inst->efuse_rx_imp) {
+		tmp = readl(pbase + SSPXTP_PHYA_LN_14);
+		tmp &= ~RG_XTP_LN0_RX_IMPSEL;
+		tmp |= RG_XTP_LN0_RX_IMPSEL_VAL(inst->efuse_rx_imp);
+		writel(tmp, pbase + SSPXTP_PHYA_LN_14);
+	}
+}
+
+static int mtk_phy_init(struct phy *phy)
+{
+	struct xsphy_instance *inst = phy_get_drvdata(phy);
+	struct mtk_xsphy *xsphy = dev_get_drvdata(phy->dev.parent);
+	int ret;
+
+	ret = clk_prepare_enable(inst->ref_clk);
+	if (ret) {
+		dev_err(xsphy->dev, "failed to enable ref_clk\n");
+		return ret;
+	}
+
+	switch (inst->type) {
+	case PHY_TYPE_USB2:
+		u2_phy_instance_init(xsphy, inst);
+		u2_phy_props_set(xsphy, inst);
+		break;
+	case PHY_TYPE_USB3:
+		u3_phy_props_set(xsphy, inst);
+		break;
+	default:
+		dev_err(xsphy->dev, "incompatible phy type\n");
+		clk_disable_unprepare(inst->ref_clk);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int mtk_phy_power_on(struct phy *phy)
+{
+	struct xsphy_instance *inst = phy_get_drvdata(phy);
+	struct mtk_xsphy *xsphy = dev_get_drvdata(phy->dev.parent);
+
+	if (inst->type == PHY_TYPE_USB2) {
+		u2_phy_instance_power_on(xsphy, inst);
+		u2_phy_slew_rate_calibrate(xsphy, inst);
+	}
+
+	return 0;
+}
+
+static int mtk_phy_power_off(struct phy *phy)
+{
+	struct xsphy_instance *inst = phy_get_drvdata(phy);
+	struct mtk_xsphy *xsphy = dev_get_drvdata(phy->dev.parent);
+
+	if (inst->type == PHY_TYPE_USB2)
+		u2_phy_instance_power_off(xsphy, inst);
+
+	return 0;
+}
+
+static int mtk_phy_exit(struct phy *phy)
+{
+	struct xsphy_instance *inst = phy_get_drvdata(phy);
+
+	clk_disable_unprepare(inst->ref_clk);
+	return 0;
+}
+
+static int mtk_phy_set_mode(struct phy *phy, enum phy_mode mode)
+{
+	struct xsphy_instance *inst = phy_get_drvdata(phy);
+	struct mtk_xsphy *xsphy = dev_get_drvdata(phy->dev.parent);
+
+	if (inst->type == PHY_TYPE_USB2)
+		u2_phy_instance_set_mode(xsphy, inst, mode);
+
+	return 0;
+}
+
+static struct phy *mtk_phy_xlate(struct device *dev,
+				 struct of_phandle_args *args)
+{
+	struct mtk_xsphy *xsphy = dev_get_drvdata(dev);
+	struct xsphy_instance *inst = NULL;
+	struct device_node *phy_np = args->np;
+	int index;
+
+	if (args->args_count != 1) {
+		dev_err(dev, "invalid number of cells in 'phy' property\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	for (index = 0; index < xsphy->nphys; index++)
+		if (phy_np == xsphy->phys[index]->phy->dev.of_node) {
+			inst = xsphy->phys[index];
+			break;
+		}
+
+	if (!inst) {
+		dev_err(dev, "failed to find appropriate phy\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	inst->type = args->args[0];
+	if (!(inst->type == PHY_TYPE_USB2 ||
+	      inst->type == PHY_TYPE_USB3)) {
+		dev_err(dev, "unsupported phy type: %d\n", inst->type);
+		return ERR_PTR(-EINVAL);
+	}
+
+	phy_parse_property(xsphy, inst);
+
+	return inst->phy;
+}
+
+static const struct phy_ops mtk_xsphy_ops = {
+	.init		= mtk_phy_init,
+	.exit		= mtk_phy_exit,
+	.power_on	= mtk_phy_power_on,
+	.power_off	= mtk_phy_power_off,
+	.set_mode	= mtk_phy_set_mode,
+	.owner		= THIS_MODULE,
+};
+
+static const struct of_device_id mtk_xsphy_id_table[] = {
+	{ .compatible = "mediatek,xsphy", },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, mtk_xsphy_id_table);
+
+static int mtk_xsphy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct device_node *child_np;
+	struct phy_provider *provider;
+	struct resource *glb_res;
+	struct mtk_xsphy *xsphy;
+	struct resource res;
+	int port, retval;
+
+	xsphy = devm_kzalloc(dev, sizeof(*xsphy), GFP_KERNEL);
+	if (!xsphy)
+		return -ENOMEM;
+
+	xsphy->nphys = of_get_child_count(np);
+	xsphy->phys = devm_kcalloc(dev, xsphy->nphys,
+				       sizeof(*xsphy->phys), GFP_KERNEL);
+	if (!xsphy->phys)
+		return -ENOMEM;
+
+	xsphy->dev = dev;
+	platform_set_drvdata(pdev, xsphy);
+
+	glb_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	/* optional, may not exist if no u3 phys */
+	if (glb_res) {
+		/* get banks shared by multiple u3 phys */
+		xsphy->glb_base = devm_ioremap_resource(dev, glb_res);
+		if (IS_ERR(xsphy->glb_base)) {
+			dev_err(dev, "failed to remap glb regs\n");
+			return PTR_ERR(xsphy->glb_base);
+		}
+	}
+
+	xsphy->src_ref_clk = XSP_REF_CLK;
+	xsphy->src_coef = XSP_SLEW_RATE_COEF;
+	/* update parameters of slew rate calibrate if exist */
+	device_property_read_u32(dev, "mediatek,src-ref-clk-mhz",
+				 &xsphy->src_ref_clk);
+	device_property_read_u32(dev, "mediatek,src-coef", &xsphy->src_coef);
+
+	port = 0;
+	for_each_child_of_node(np, child_np) {
+		struct xsphy_instance *inst;
+		struct phy *phy;
+
+		inst = devm_kzalloc(dev, sizeof(*inst), GFP_KERNEL);
+		if (!inst) {
+			retval = -ENOMEM;
+			goto put_child;
+		}
+
+		xsphy->phys[port] = inst;
+
+		phy = devm_phy_create(dev, child_np, &mtk_xsphy_ops);
+		if (IS_ERR(phy)) {
+			dev_err(dev, "failed to create phy\n");
+			retval = PTR_ERR(phy);
+			goto put_child;
+		}
+
+		retval = of_address_to_resource(child_np, 0, &res);
+		if (retval) {
+			dev_err(dev, "failed to get address resource(id-%d)\n",
+				port);
+			goto put_child;
+		}
+
+		inst->port_base = devm_ioremap_resource(&phy->dev, &res);
+		if (IS_ERR(inst->port_base)) {
+			dev_err(dev, "failed to remap phy regs\n");
+			retval = PTR_ERR(inst->port_base);
+			goto put_child;
+		}
+
+		inst->phy = phy;
+		inst->index = port;
+		phy_set_drvdata(phy, inst);
+		port++;
+
+		inst->ref_clk = devm_clk_get(&phy->dev, "ref");
+		if (IS_ERR(inst->ref_clk)) {
+			dev_err(dev, "failed to get ref_clk(id-%d)\n", port);
+			retval = PTR_ERR(inst->ref_clk);
+			goto put_child;
+		}
+	}
+
+	provider = devm_of_phy_provider_register(dev, mtk_phy_xlate);
+	return PTR_ERR_OR_ZERO(provider);
+
+put_child:
+	of_node_put(child_np);
+	return retval;
+}
+
+static struct platform_driver mtk_xsphy_driver = {
+	.probe		= mtk_xsphy_probe,
+	.driver		= {
+		.name	= "mtk-xsphy",
+		.of_match_table = mtk_xsphy_id_table,
+	},
+};
+
+module_platform_driver(mtk_xsphy_driver);
+
+MODULE_AUTHOR("Chunfeng Yun <chunfeng.yun@mediatek.com>");
+MODULE_DESCRIPTION("MediaTek USB XS-PHY driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c
index 5439dd9..23705e1 100644
--- a/drivers/phy/motorola/phy-mapphone-mdm6600.c
+++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c
@@ -19,6 +19,8 @@
 
 #define PHY_MDM6600_PHY_DELAY_MS	4000	/* PHY enable 2.2s to 3.5s */
 #define PHY_MDM6600_ENABLED_DELAY_MS	8000	/* 8s more total for MDM6600 */
+#define MDM6600_MODEM_IDLE_DELAY_MS	1000	/* modem after USB suspend */
+#define MDM6600_MODEM_WAKE_DELAY_MS	200	/* modem response after idle */
 
 enum phy_mdm6600_ctrl_lines {
 	PHY_MDM6600_ENABLE,			/* USB PHY enable */
@@ -93,9 +95,11 @@
 	struct gpio_descs *cmd_gpios;
 	struct delayed_work bootup_work;
 	struct delayed_work status_work;
+	struct delayed_work modem_wake_work;
 	struct completion ack;
 	bool enabled;				/* mdm6600 phy enabled */
 	bool running;				/* mdm6600 boot done */
+	bool awake;				/* mdm6600 respnds on n_gsm */
 	int status;
 };
 
@@ -446,6 +450,62 @@
 		dev_err(ddata->dev, "Device not functional\n");
 }
 
+/*
+ * USB suspend puts mdm6600 into low power mode. For any n_gsm using apps,
+ * we need to keep the modem awake by kicking it's mode0 GPIO. This will
+ * keep the modem awake for about 1.2 seconds. When no n_gsm apps are using
+ * the modem, runtime PM auto mode can be enabled so modem can enter low
+ * power mode.
+ */
+static void phy_mdm6600_wake_modem(struct phy_mdm6600 *ddata)
+{
+	struct gpio_desc *mode_gpio0;
+
+	mode_gpio0 = ddata->mode_gpios->desc[PHY_MDM6600_MODE0];
+	gpiod_set_value_cansleep(mode_gpio0, 1);
+	usleep_range(5, 15);
+	gpiod_set_value_cansleep(mode_gpio0, 0);
+	if (ddata->awake)
+		usleep_range(5, 15);
+	else
+		msleep(MDM6600_MODEM_WAKE_DELAY_MS);
+}
+
+static void phy_mdm6600_modem_wake(struct work_struct *work)
+{
+	struct phy_mdm6600 *ddata;
+
+	ddata = container_of(work, struct phy_mdm6600, modem_wake_work.work);
+	phy_mdm6600_wake_modem(ddata);
+	schedule_delayed_work(&ddata->modem_wake_work,
+			      msecs_to_jiffies(MDM6600_MODEM_IDLE_DELAY_MS));
+}
+
+static int __maybe_unused phy_mdm6600_runtime_suspend(struct device *dev)
+{
+	struct phy_mdm6600 *ddata = dev_get_drvdata(dev);
+
+	cancel_delayed_work_sync(&ddata->modem_wake_work);
+	ddata->awake = false;
+
+	return 0;
+}
+
+static int __maybe_unused phy_mdm6600_runtime_resume(struct device *dev)
+{
+	struct phy_mdm6600 *ddata = dev_get_drvdata(dev);
+
+	phy_mdm6600_modem_wake(&ddata->modem_wake_work.work);
+	ddata->awake = true;
+
+	return 0;
+}
+
+static const struct dev_pm_ops phy_mdm6600_pm_ops = {
+	SET_RUNTIME_PM_OPS(phy_mdm6600_runtime_suspend,
+			   phy_mdm6600_runtime_resume, NULL)
+};
+
 static const struct of_device_id phy_mdm6600_id_table[] = {
 	{ .compatible = "motorola,mapphone-mdm6600", },
 	{},
@@ -464,6 +524,7 @@
 	INIT_DELAYED_WORK(&ddata->bootup_work,
 			  phy_mdm6600_deferred_power_on);
 	INIT_DELAYED_WORK(&ddata->status_work, phy_mdm6600_status);
+	INIT_DELAYED_WORK(&ddata->modem_wake_work, phy_mdm6600_modem_wake);
 	init_completion(&ddata->ack);
 
 	ddata->dev = &pdev->dev;
@@ -500,6 +561,24 @@
 	 */
 	msleep(PHY_MDM6600_PHY_DELAY_MS + 500);
 
+	/*
+	 * Enable PM runtime only after PHY has been powered up properly.
+	 * It is currently only needed after USB suspends mdm6600 and n_gsm
+	 * needs to access the device. We don't want to do this earlier as
+	 * gpio mode0 pin doubles as mdm6600 wake-up gpio.
+	 */
+	pm_runtime_use_autosuspend(ddata->dev);
+	pm_runtime_set_autosuspend_delay(ddata->dev,
+					 MDM6600_MODEM_IDLE_DELAY_MS);
+	pm_runtime_enable(ddata->dev);
+	error = pm_runtime_get_sync(ddata->dev);
+	if (error < 0) {
+		dev_warn(ddata->dev, "failed to wake modem: %i\n", error);
+		pm_runtime_put_noidle(ddata->dev);
+	}
+	pm_runtime_mark_last_busy(ddata->dev);
+	pm_runtime_put_autosuspend(ddata->dev);
+
 	return 0;
 
 cleanup:
@@ -512,6 +591,10 @@
 	struct phy_mdm6600 *ddata = platform_get_drvdata(pdev);
 	struct gpio_desc *reset_gpio = ddata->ctrl_gpios[PHY_MDM6600_RESET];
 
+	pm_runtime_dont_use_autosuspend(ddata->dev);
+	pm_runtime_put_sync(ddata->dev);
+	pm_runtime_disable(ddata->dev);
+
 	if (!ddata->running)
 		wait_for_completion_timeout(&ddata->ack,
 			msecs_to_jiffies(PHY_MDM6600_ENABLED_DELAY_MS));
@@ -519,6 +602,7 @@
 	gpiod_set_value_cansleep(reset_gpio, 1);
 	phy_mdm6600_device_power_off(ddata);
 
+	cancel_delayed_work_sync(&ddata->modem_wake_work);
 	cancel_delayed_work_sync(&ddata->bootup_work);
 	cancel_delayed_work_sync(&ddata->status_work);
 
@@ -530,6 +614,7 @@
 	.remove = phy_mdm6600_remove,
 	.driver = {
 		.name = "phy-mapphone-mdm6600",
+		.pm = &phy_mdm6600_pm_ops,
 		.of_match_table = of_match_ptr(phy_mdm6600_id_table),
 	},
 };
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index 09ac8af..35fd38c 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -153,6 +153,9 @@
 {
 	int ret;
 
+	if (!phy)
+		return 0;
+
 	if (!pm_runtime_enabled(&phy->dev))
 		return -ENOTSUPP;
 
@@ -168,6 +171,9 @@
 {
 	int ret;
 
+	if (!phy)
+		return 0;
+
 	if (!pm_runtime_enabled(&phy->dev))
 		return -ENOTSUPP;
 
@@ -181,6 +187,9 @@
 
 int phy_pm_runtime_put(struct phy *phy)
 {
+	if (!phy)
+		return 0;
+
 	if (!pm_runtime_enabled(&phy->dev))
 		return -ENOTSUPP;
 
@@ -190,6 +199,9 @@
 
 int phy_pm_runtime_put_sync(struct phy *phy)
 {
+	if (!phy)
+		return 0;
+
 	if (!pm_runtime_enabled(&phy->dev))
 		return -ENOTSUPP;
 
@@ -199,6 +211,9 @@
 
 void phy_pm_runtime_allow(struct phy *phy)
 {
+	if (!phy)
+		return;
+
 	if (!pm_runtime_enabled(&phy->dev))
 		return;
 
@@ -208,6 +223,9 @@
 
 void phy_pm_runtime_forbid(struct phy *phy)
 {
+	if (!phy)
+		return;
+
 	if (!pm_runtime_enabled(&phy->dev))
 		return;
 
diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig
index 7bfa64b..632a0e7 100644
--- a/drivers/phy/qualcomm/Kconfig
+++ b/drivers/phy/qualcomm/Kconfig
@@ -1,6 +1,15 @@
 #
-# Phy drivers for Qualcomm platforms
+# Phy drivers for Qualcomm and Atheros platforms
 #
+config PHY_ATH79_USB
+	tristate "Atheros AR71XX/9XXX USB PHY driver"
+	depends on OF && (ATH79 || COMPILE_TEST)
+	default y if USB_EHCI_HCD_PLATFORM || USB_OHCI_HCD_PLATFORM
+	select RESET_CONTROLLER
+	select GENERIC_PHY
+	help
+	  Enable this to support the USB PHY on Atheros AR71XX/9XXX SoCs.
+
 config PHY_QCOM_APQ8064_SATA
 	tristate "Qualcomm APQ8064 SATA SerDes/PHY driver"
 	depends on ARCH_QCOM
diff --git a/drivers/phy/qualcomm/Makefile b/drivers/phy/qualcomm/Makefile
index 9abb789..deb831f4 100644
--- a/drivers/phy/qualcomm/Makefile
+++ b/drivers/phy/qualcomm/Makefile
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PHY_ATH79_USB)		+= phy-ath79-usb.o
 obj-$(CONFIG_PHY_QCOM_APQ8064_SATA)	+= phy-qcom-apq8064-sata.o
 obj-$(CONFIG_PHY_QCOM_IPQ806X_SATA)	+= phy-qcom-ipq806x-sata.o
 obj-$(CONFIG_PHY_QCOM_QMP)		+= phy-qcom-qmp.o
diff --git a/drivers/phy/qualcomm/phy-ath79-usb.c b/drivers/phy/qualcomm/phy-ath79-usb.c
new file mode 100644
index 0000000..6fd6e07
--- /dev/null
+++ b/drivers/phy/qualcomm/phy-ath79-usb.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Atheros AR71XX/9XXX USB PHY driver
+ *
+ * Copyright (C) 2015-2018 Alban Bedel <albeu@free.fr>
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/phy/phy.h>
+#include <linux/reset.h>
+
+struct ath79_usb_phy {
+	struct reset_control *reset;
+	/* The suspend override logic is inverted, hence the no prefix
+	 * to make the code a bit easier to understand.
+	 */
+	struct reset_control *no_suspend_override;
+};
+
+static int ath79_usb_phy_power_on(struct phy *phy)
+{
+	struct ath79_usb_phy *priv = phy_get_drvdata(phy);
+	int err = 0;
+
+	if (priv->no_suspend_override) {
+		err = reset_control_assert(priv->no_suspend_override);
+		if (err)
+			return err;
+	}
+
+	err = reset_control_deassert(priv->reset);
+	if (err && priv->no_suspend_override)
+		reset_control_assert(priv->no_suspend_override);
+
+	return err;
+}
+
+static int ath79_usb_phy_power_off(struct phy *phy)
+{
+	struct ath79_usb_phy *priv = phy_get_drvdata(phy);
+	int err = 0;
+
+	err = reset_control_assert(priv->reset);
+	if (err)
+		return err;
+
+	if (priv->no_suspend_override) {
+		err = reset_control_deassert(priv->no_suspend_override);
+		if (err)
+			reset_control_deassert(priv->reset);
+	}
+
+	return err;
+}
+
+static const struct phy_ops ath79_usb_phy_ops = {
+	.power_on	= ath79_usb_phy_power_on,
+	.power_off	= ath79_usb_phy_power_off,
+	.owner		= THIS_MODULE,
+};
+
+static int ath79_usb_phy_probe(struct platform_device *pdev)
+{
+	struct ath79_usb_phy *priv;
+	struct phy *phy;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->reset = devm_reset_control_get(&pdev->dev, "usb-phy");
+	if (IS_ERR(priv->reset))
+		return PTR_ERR(priv->reset);
+
+	priv->no_suspend_override = devm_reset_control_get_optional(
+		&pdev->dev, "usb-suspend-override");
+	if (IS_ERR(priv->no_suspend_override))
+		return PTR_ERR(priv->no_suspend_override);
+
+	phy = devm_phy_create(&pdev->dev, NULL, &ath79_usb_phy_ops);
+	if (IS_ERR(phy))
+		return PTR_ERR(phy);
+
+	phy_set_drvdata(phy, priv);
+
+	return PTR_ERR_OR_ZERO(devm_of_phy_provider_register(
+				&pdev->dev, of_phy_simple_xlate));
+}
+
+static const struct of_device_id ath79_usb_phy_of_match[] = {
+	{ .compatible = "qca,ar7100-usb-phy" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, ath79_usb_phy_of_match);
+
+static struct platform_driver ath79_usb_phy_driver = {
+	.probe	= ath79_usb_phy_probe,
+	.driver = {
+		.of_match_table	= ath79_usb_phy_of_match,
+		.name		= "ath79-usb-phy",
+	}
+};
+module_platform_driver(ath79_usb_phy_driver);
+
+MODULE_DESCRIPTION("ATH79 USB PHY driver");
+MODULE_AUTHOR("Alban Bedel <albeu@free.fr>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c
index 6470c5d..4c47010 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.c
@@ -490,6 +490,118 @@
 	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_RUN_TIME, 0x13),
 };
 
+static const struct qmp_phy_init_tbl qmp_v3_usb3_uniphy_serdes_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_IVCO, 0x07),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYSCLK_EN_SEL, 0x14),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_CLK_SELECT, 0x30),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYS_CLK_CTRL, 0x02),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_RESETSM_CNTRL2, 0x08),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_CMN_CONFIG, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SVS_MODE_CLK_SEL, 0x01),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_HSCLK_SEL, 0x80),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_DEC_START_MODE0, 0x82),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_DIV_FRAC_START1_MODE0, 0xab),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_DIV_FRAC_START2_MODE0, 0xea),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_DIV_FRAC_START3_MODE0, 0x02),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_CP_CTRL_MODE0, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_RCTRL_MODE0, 0x16),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_PLL_CCTRL_MODE0, 0x36),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_INTEGLOOP_GAIN1_MODE0, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_INTEGLOOP_GAIN0_MODE0, 0x3f),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE2_MODE0, 0x01),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE1_MODE0, 0xc9),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_CORECLK_DIV_MODE0, 0x0a),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP3_MODE0, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP2_MODE0, 0x34),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP1_MODE0, 0x15),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP_EN, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_CORE_CLK_EN, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_LOCK_CMP_CFG, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_VCO_TUNE_MAP, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SYSCLK_BUF_ENABLE, 0x0a),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_EN_CENTER, 0x01),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_PER1, 0x31),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_PER2, 0x01),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_ADJ_PER1, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_ADJ_PER2, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_STEP_SIZE1, 0x85),
+	QMP_PHY_INIT_CFG(QSERDES_V3_COM_SSC_STEP_SIZE2, 0x07),
+};
+
+static const struct qmp_phy_init_tbl qmp_v3_usb3_uniphy_tx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V3_TX_HIGHZ_DRVR_EN, 0x10),
+	QMP_PHY_INIT_CFG(QSERDES_V3_TX_RCV_DETECT_LVL_2, 0x12),
+	QMP_PHY_INIT_CFG(QSERDES_V3_TX_LANE_MODE_1, 0xc6),
+	QMP_PHY_INIT_CFG(QSERDES_V3_TX_RES_CODE_LANE_OFFSET_RX, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V3_TX_RES_CODE_LANE_OFFSET_TX, 0x06),
+};
+
+static const struct qmp_phy_init_tbl qmp_v3_usb3_uniphy_rx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_VGA_CAL_CNTRL2, 0x0c),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_MODE_00, 0x50),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_FASTLOCK_FO_GAIN, 0x0b),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0e),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4e),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL4, 0x18),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x77),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x80),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_CNTRL, 0x03),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_SIGDET_DEGLITCH_CNTRL, 0x1c),
+	QMP_PHY_INIT_CFG(QSERDES_V3_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x75),
+};
+
+static const struct qmp_phy_init_tbl qmp_v3_usb3_uniphy_pcs_tbl[] = {
+	/* FLL settings */
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNTRL2, 0x83),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNT_VAL_L, 0x09),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNT_VAL_H_TOL, 0xa2),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_MAN_CODE, 0x40),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_FLL_CNTRL1, 0x02),
+
+	/* Lock Det settings */
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG1, 0xd1),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG2, 0x1f),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_LOCK_DETECT_CONFIG3, 0x47),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_POWER_STATE_CONFIG2, 0x1b),
+
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RX_SIGDET_LVL, 0xba),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V0, 0x9f),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V1, 0x9f),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V2, 0xb5),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V3, 0x4c),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_V4, 0x64),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXMGN_LS, 0x6a),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V0, 0x15),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V0, 0x0d),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V1, 0x15),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V1, 0x0d),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V2, 0x15),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V2, 0x0d),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V3, 0x15),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V3, 0x1d),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_V4, 0x15),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_V4, 0x0d),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M6DB_LS, 0x15),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TXDEEMPH_M3P5DB_LS, 0x0d),
+
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RATE_SLEW_CNTRL, 0x02),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_PWRUP_RESET_DLY_TIME_AUXCLK, 0x04),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_TSYNC_RSYNC_TIME, 0x44),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_PWRUP_RESET_DLY_TIME_AUXCLK, 0x04),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_U3_L, 0x40),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RCVR_DTCT_DLY_U3_H, 0x00),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_WAIT_TIME, 0x75),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_LFPS_TX_ECSTART_EQTLOCK, 0x86),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_RXEQTRAINING_RUN_TIME, 0x13),
+
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_REFGEN_REQ_CONFIG1, 0x21),
+	QMP_PHY_INIT_CFG(QPHY_V3_PCS_REFGEN_REQ_CONFIG2, 0x60),
+};
+
+
 /* struct qmp_phy_cfg - per-PHY initialization config */
 struct qmp_phy_cfg {
 	/* phy-type - PCIE/UFS/USB */
@@ -766,6 +878,7 @@
 	.pwrdn_ctrl		= SW_PWRDN,
 	.mask_pcs_ready		= PHYSTATUS,
 
+	.has_pwrdn_delay	= true,
 	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
 	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 
@@ -774,6 +887,35 @@
 	.rx_b_lane_offset	= 0x400,
 };
 
+static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
+	.type			= PHY_TYPE_USB3,
+	.nlanes			= 1,
+
+	.serdes_tbl		= qmp_v3_usb3_uniphy_serdes_tbl,
+	.serdes_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_uniphy_serdes_tbl),
+	.tx_tbl			= qmp_v3_usb3_uniphy_tx_tbl,
+	.tx_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_uniphy_tx_tbl),
+	.rx_tbl			= qmp_v3_usb3_uniphy_rx_tbl,
+	.rx_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_uniphy_rx_tbl),
+	.pcs_tbl		= qmp_v3_usb3_uniphy_pcs_tbl,
+	.pcs_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_uniphy_pcs_tbl),
+	.clk_list		= qmp_v3_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
+	.reset_list		= msm8996_usb3phy_reset_l,
+	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
+	.vreg_list		= msm8996_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(msm8996_phy_vreg_l),
+	.regs			= qmp_v3_usb3phy_regs_layout,
+
+	.start_ctrl		= SERDES_START | PCS_START,
+	.pwrdn_ctrl		= SW_PWRDN,
+	.mask_pcs_ready		= PHYSTATUS,
+
+	.has_pwrdn_delay	= true,
+	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
+	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
+};
+
 static void qcom_qmp_phy_configure(void __iomem *base,
 				   const unsigned int *regs,
 				   const struct qmp_phy_init_tbl tbl[],
@@ -793,19 +935,6 @@
 	}
 }
 
-static int qcom_qmp_phy_poweron(struct phy *phy)
-{
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	struct qcom_qmp *qmp = qphy->qmp;
-	int ret;
-
-	ret = clk_prepare_enable(qphy->pipe_clk);
-	if (ret)
-		dev_err(qmp->dev, "pipe_clk enable failed, err=%d\n", ret);
-
-	return ret;
-}
-
 static int qcom_qmp_phy_com_init(struct qcom_qmp *qmp)
 {
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
@@ -974,6 +1103,12 @@
 		}
 	}
 
+	ret = clk_prepare_enable(qphy->pipe_clk);
+	if (ret) {
+		dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret);
+		goto err_clk_enable;
+	}
+
 	/* Tx, Rx, and PCS configurations */
 	qcom_qmp_phy_configure(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num);
 	/* Configuration for other LANE for USB-DP combo PHY */
@@ -1019,6 +1154,8 @@
 	return ret;
 
 err_pcs_ready:
+	clk_disable_unprepare(qphy->pipe_clk);
+err_clk_enable:
 	if (cfg->has_lane_rst)
 		reset_control_assert(qphy->lane_rst);
 err_lane_rst:
@@ -1283,7 +1420,6 @@
 static const struct phy_ops qcom_qmp_phy_gen_ops = {
 	.init		= qcom_qmp_phy_init,
 	.exit		= qcom_qmp_phy_exit,
-	.power_on	= qcom_qmp_phy_poweron,
 	.set_mode	= qcom_qmp_phy_set_mode,
 	.owner		= THIS_MODULE,
 };
@@ -1381,8 +1517,11 @@
 		.compatible = "qcom,ipq8074-qmp-pcie-phy",
 		.data = &ipq8074_pciephy_cfg,
 	}, {
-		.compatible = "qcom,qmp-v3-usb3-phy",
+		.compatible = "qcom,sdm845-qmp-usb3-phy",
 		.data = &qmp_v3_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,sdm845-qmp-usb3-uni-phy",
+		.data = &qmp_v3_usb3_uniphy_cfg,
 	},
 	{ },
 };
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h
index d1c6905..5d78d43 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.h
@@ -214,6 +214,8 @@
 #define QSERDES_V3_RX_UCDR_FASTLOCK_FO_GAIN		0x030
 #define QSERDES_V3_RX_UCDR_SO_SATURATION_AND_ENABLE	0x034
 #define QSERDES_V3_RX_RX_TERM_BW			0x07c
+#define QSERDES_V3_RX_VGA_CAL_CNTRL1			0x0bc
+#define QSERDES_V3_RX_VGA_CAL_CNTRL2			0x0c0
 #define QSERDES_V3_RX_RX_EQ_GAIN2_LSB			0x0c8
 #define QSERDES_V3_RX_RX_EQ_GAIN2_MSB			0x0cc
 #define QSERDES_V3_RX_RX_EQU_ADAPTOR_CNTRL2		0x0d4
@@ -227,6 +229,7 @@
 #define QSERDES_V3_RX_SIGDET_DEGLITCH_CNTRL		0x10c
 #define QSERDES_V3_RX_RX_BAND				0x110
 #define QSERDES_V3_RX_RX_INTERFACE_MODE			0x11c
+#define QSERDES_V3_RX_RX_MODE_00			0x164
 
 /* Only for QMP V3 PHY - PCS registers */
 #define QPHY_V3_PCS_POWER_DOWN_CONTROL			0x004
@@ -273,6 +276,8 @@
 #define QPHY_V3_PCS_FLL_CNT_VAL_H_TOL			0x0d0
 #define QPHY_V3_PCS_FLL_MAN_CODE			0x0d4
 #define QPHY_V3_PCS_RX_SIGDET_LVL			0x1d8
+#define QPHY_V3_PCS_REFGEN_REQ_CONFIG1			0x20c
+#define QPHY_V3_PCS_REFGEN_REQ_CONFIG2			0x210
 
 /* Only for QMP V3 PHY - PCS_MISC registers */
 #define QPHY_V3_PCS_MISC_CLAMP_ENABLE			0x0c
diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c
index 94afeac..e70e425 100644
--- a/drivers/phy/qualcomm/phy-qcom-qusb2.c
+++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c
@@ -20,6 +20,8 @@
 #include <linux/reset.h>
 #include <linux/slab.h>
 
+#include <dt-bindings/phy/phy-qcom-qusb2.h>
+
 #define QUSB2PHY_PLL_TEST		0x04
 #define CLK_REF_SEL			BIT(7)
 
@@ -60,6 +62,17 @@
 #define CORE_RESET				BIT(5)
 #define CORE_RESET_MUX				BIT(6)
 
+/* QUSB2PHY_IMP_CTRL1 register bits */
+#define IMP_RES_OFFSET_MASK			GENMASK(5, 0)
+#define IMP_RES_OFFSET_SHIFT			0x0
+
+/* QUSB2PHY_PORT_TUNE1 register bits */
+#define HSTX_TRIM_MASK				GENMASK(7, 4)
+#define HSTX_TRIM_SHIFT				0x4
+#define PREEMPH_WIDTH_HALF_BIT			BIT(2)
+#define PREEMPHASIS_EN_MASK			GENMASK(1, 0)
+#define PREEMPHASIS_EN_SHIFT			0x0
+
 #define QUSB2PHY_PLL_ANALOG_CONTROLS_TWO	0x04
 #define QUSB2PHY_PLL_CLOCK_INVERTERS		0x18c
 #define QUSB2PHY_PLL_CMODE			0x2c
@@ -139,7 +152,7 @@
 	QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_PWR_CTRL, 0x00),
 };
 
-static const unsigned int qusb2_v2_regs_layout[] = {
+static const unsigned int sdm845_regs_layout[] = {
 	[QUSB2PHY_PLL_CORE_INPUT_OVERRIDE] = 0xa8,
 	[QUSB2PHY_PLL_STATUS]		= 0x1a0,
 	[QUSB2PHY_PORT_TUNE1]		= 0x240,
@@ -153,7 +166,7 @@
 	[QUSB2PHY_INTR_CTRL]		= 0x230,
 };
 
-static const struct qusb2_phy_init_tbl qusb2_v2_init_tbl[] = {
+static const struct qusb2_phy_init_tbl sdm845_init_tbl[] = {
 	QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_ANALOG_CONTROLS_TWO, 0x03),
 	QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_CLOCK_INVERTERS, 0x7c),
 	QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_CMODE, 0x80),
@@ -208,10 +221,10 @@
 	.autoresume_en	 = BIT(3),
 };
 
-static const struct qusb2_phy_cfg qusb2_v2_phy_cfg = {
-	.tbl		= qusb2_v2_init_tbl,
-	.tbl_num	= ARRAY_SIZE(qusb2_v2_init_tbl),
-	.regs		= qusb2_v2_regs_layout,
+static const struct qusb2_phy_cfg sdm845_phy_cfg = {
+	.tbl		= sdm845_init_tbl,
+	.tbl_num	= ARRAY_SIZE(sdm845_init_tbl),
+	.regs		= sdm845_regs_layout,
 
 	.disable_ctrl	= (PWR_CTRL1_VREF_SUPPLY_TRIM | PWR_CTRL1_CLAMP_N_EN |
 			   POWER_DOWN),
@@ -241,6 +254,15 @@
  * @tcsr: TCSR syscon register map
  * @cell: nvmem cell containing phy tuning value
  *
+ * @override_imp_res_offset: PHY should use different rescode offset
+ * @imp_res_offset_value: rescode offset to be updated in IMP_CTRL1 register
+ * @override_hstx_trim: PHY should use different HSTX o/p current value
+ * @hstx_trim_value: HSTX_TRIM value to be updated in TUNE1 register
+ * @override_preemphasis: PHY should use different pre-amphasis amplitude
+ * @preemphasis_level: Amplitude Pre-Emphasis to be updated in TUNE1 register
+ * @override_preemphasis_width: PHY should use different pre-emphasis duration
+ * @preemphasis_width: half/full-width Pre-Emphasis updated via TUNE1
+ *
  * @cfg: phy config data
  * @has_se_clk_scheme: indicate if PHY has single-ended ref clock scheme
  * @phy_initialized: indicate if PHY has been initialized
@@ -259,12 +281,35 @@
 	struct regmap *tcsr;
 	struct nvmem_cell *cell;
 
+	bool override_imp_res_offset;
+	u8 imp_res_offset_value;
+	bool override_hstx_trim;
+	u8 hstx_trim_value;
+	bool override_preemphasis;
+	u8 preemphasis_level;
+	bool override_preemphasis_width;
+	u8 preemphasis_width;
+
 	const struct qusb2_phy_cfg *cfg;
 	bool has_se_clk_scheme;
 	bool phy_initialized;
 	enum phy_mode mode;
 };
 
+static inline void qusb2_write_mask(void __iomem *base, u32 offset,
+				    u32 val, u32 mask)
+{
+	u32 reg;
+
+	reg = readl(base + offset);
+	reg &= ~mask;
+	reg |= val & mask;
+	writel(reg, base + offset);
+
+	/* Ensure above write is completed */
+	readl(base + offset);
+}
+
 static inline void qusb2_setbits(void __iomem *base, u32 offset, u32 val)
 {
 	u32 reg;
@@ -305,6 +350,42 @@
 }
 
 /*
+ * Update board specific PHY tuning override values if specified from
+ * device tree.
+ */
+static void qusb2_phy_override_phy_params(struct qusb2_phy *qphy)
+{
+	const struct qusb2_phy_cfg *cfg = qphy->cfg;
+
+	if (qphy->override_imp_res_offset)
+		qusb2_write_mask(qphy->base, QUSB2PHY_IMP_CTRL1,
+			     qphy->imp_res_offset_value << IMP_RES_OFFSET_SHIFT,
+			     IMP_RES_OFFSET_MASK);
+
+	if (qphy->override_hstx_trim)
+		qusb2_write_mask(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE1],
+				 qphy->hstx_trim_value << HSTX_TRIM_SHIFT,
+				 HSTX_TRIM_MASK);
+
+	if (qphy->override_preemphasis)
+		qusb2_write_mask(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE1],
+				qphy->preemphasis_level << PREEMPHASIS_EN_SHIFT,
+				PREEMPHASIS_EN_MASK);
+
+	if (qphy->override_preemphasis_width) {
+		if (qphy->preemphasis_width ==
+		    QUSB2_V2_PREEMPHASIS_WIDTH_HALF_BIT)
+			qusb2_setbits(qphy->base,
+				      cfg->regs[QUSB2PHY_PORT_TUNE1],
+				      PREEMPH_WIDTH_HALF_BIT);
+		else
+			qusb2_clrbits(qphy->base,
+				      cfg->regs[QUSB2PHY_PORT_TUNE1],
+				      PREEMPH_WIDTH_HALF_BIT);
+	}
+}
+
+/*
  * Fetches HS Tx tuning value from nvmem and sets the
  * QUSB2PHY_PORT_TUNE1/2 register.
  * For error case, skip setting the value and use the default value.
@@ -315,6 +396,10 @@
 	const struct qusb2_phy_cfg *cfg = qphy->cfg;
 	u8 *val;
 
+	/* efuse register is optional */
+	if (!qphy->cell)
+		return;
+
 	/*
 	 * Read efuse register having TUNE2/1 parameter's high nibble.
 	 * If efuse register shows value as 0x0, or if we fail to find
@@ -521,6 +606,9 @@
 	qcom_qusb2_phy_configure(qphy->base, cfg->regs, cfg->tbl,
 				 cfg->tbl_num);
 
+	/* Override board specific PHY tuning values */
+	qusb2_phy_override_phy_params(qphy);
+
 	/* Set efuse value for tuning the PHY */
 	qusb2_phy_set_tune2_param(qphy);
 
@@ -643,8 +731,8 @@
 		.compatible	= "qcom,msm8996-qusb2-phy",
 		.data		= &msm8996_phy_cfg,
 	}, {
-		.compatible	= "qcom,qusb2-v2-phy",
-		.data		= &qusb2_v2_phy_cfg,
+		.compatible	= "qcom,sdm845-qusb2-phy",
+		.data		= &sdm845_phy_cfg,
 	},
 	{ },
 };
@@ -664,6 +752,7 @@
 	struct resource *res;
 	int ret, i;
 	int num;
+	u32 value;
 
 	qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL);
 	if (!qphy)
@@ -732,6 +821,31 @@
 		qphy->cell = NULL;
 		dev_dbg(dev, "failed to lookup tune2 hstx trim value\n");
 	}
+
+	if (!of_property_read_u32(dev->of_node, "qcom,imp-res-offset-value",
+				  &value)) {
+		qphy->imp_res_offset_value = (u8)value;
+		qphy->override_imp_res_offset = true;
+	}
+
+	if (!of_property_read_u32(dev->of_node, "qcom,hstx-trim-value",
+				  &value)) {
+		qphy->hstx_trim_value = (u8)value;
+		qphy->override_hstx_trim = true;
+	}
+
+	if (!of_property_read_u32(dev->of_node, "qcom,preemphasis-level",
+				     &value)) {
+		qphy->preemphasis_level = (u8)value;
+		qphy->override_preemphasis = true;
+	}
+
+	if (!of_property_read_u32(dev->of_node, "qcom,preemphasis-width",
+				     &value)) {
+		qphy->preemphasis_width = (u8)value;
+		qphy->override_preemphasis_width = true;
+	}
+
 	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
 	/*
diff --git a/drivers/phy/samsung/phy-exynos-mipi-video.c b/drivers/phy/samsung/phy-exynos-mipi-video.c
index c198886..00d8959 100644
--- a/drivers/phy/samsung/phy-exynos-mipi-video.c
+++ b/drivers/phy/samsung/phy-exynos-mipi-video.c
@@ -231,33 +231,27 @@
 static int __set_phy_state(const struct exynos_mipi_phy_desc *data,
 			   struct exynos_mipi_video_phy *state, unsigned int on)
 {
-	u32 val;
+	struct regmap *enable_map = state->regmaps[data->enable_map];
+	struct regmap *resetn_map = state->regmaps[data->resetn_map];
 
 	spin_lock(&state->slock);
 
 	/* disable in PMU sysreg */
 	if (!on && data->coupled_phy_id >= 0 &&
-	    state->phys[data->coupled_phy_id].phy->power_count == 0) {
-		regmap_read(state->regmaps[data->enable_map], data->enable_reg,
-			    &val);
-		val &= ~data->enable_val;
-		regmap_write(state->regmaps[data->enable_map], data->enable_reg,
-			     val);
-	}
-
+	    state->phys[data->coupled_phy_id].phy->power_count == 0)
+		regmap_update_bits(enable_map, data->enable_reg,
+				   data->enable_val, 0);
 	/* PHY reset */
-	regmap_read(state->regmaps[data->resetn_map], data->resetn_reg, &val);
-	val = on ? (val | data->resetn_val) : (val & ~data->resetn_val);
-	regmap_write(state->regmaps[data->resetn_map], data->resetn_reg, val);
-
+	if (on)
+		regmap_update_bits(resetn_map, data->resetn_reg,
+				   data->resetn_val, data->resetn_val);
+	else
+		regmap_update_bits(resetn_map, data->resetn_reg,
+				   data->resetn_val, 0);
 	/* enable in PMU sysreg */
-	if (on) {
-		regmap_read(state->regmaps[data->enable_map], data->enable_reg,
-			    &val);
-		val |= data->enable_val;
-		regmap_write(state->regmaps[data->enable_map], data->enable_reg,
-			     val);
-	}
+	if (on)
+		regmap_update_bits(enable_map, data->enable_reg,
+				   data->enable_val, data->enable_val);
 
 	spin_unlock(&state->slock);
 
diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c
index bc4e78a..1255cd1 100644
--- a/drivers/phy/st/phy-stm32-usbphyc.c
+++ b/drivers/phy/st/phy-stm32-usbphyc.c
@@ -71,7 +71,6 @@
 	struct stm32_usbphyc_phy **phys;
 	int nphys;
 	int switch_setup;
-	bool pll_enabled;
 };
 
 static inline void stm32_usbphyc_set_bits(void __iomem *reg, u32 bits)
@@ -84,7 +83,8 @@
 	writel_relaxed(readl_relaxed(reg) & ~bits, reg);
 }
 
-static void stm32_usbphyc_get_pll_params(u32 clk_rate, struct pll_params *pll_params)
+static void stm32_usbphyc_get_pll_params(u32 clk_rate,
+					 struct pll_params *pll_params)
 {
 	unsigned long long fvco, ndiv, frac;
 
@@ -271,7 +271,6 @@
 	struct stm32_usbphyc *usbphyc = dev_get_drvdata(dev);
 	struct stm32_usbphyc_phy *usbphyc_phy = NULL;
 	struct device_node *phynode = args->np;
-
 	int port = 0;
 
 	for (port = 0; port < usbphyc->nphys; port++) {
@@ -367,8 +366,8 @@
 		if (IS_ERR(phy)) {
 			ret = PTR_ERR(phy);
 			if (ret != -EPROBE_DEFER)
-				dev_err(dev,
-					"failed to create phy%d: %d\n", i, ret);
+				dev_err(dev, "failed to create phy%d: %d\n",
+					port, ret);
 			goto put_child;
 		}
 
diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
index 11aa590..de1b4eb 100644
--- a/drivers/phy/tegra/xusb.c
+++ b/drivers/phy/tegra/xusb.c
@@ -102,19 +102,6 @@
 	return np;
 }
 
-static int
-tegra_xusb_lane_lookup_function(struct tegra_xusb_lane *lane,
-				    const char *function)
-{
-	unsigned int i;
-
-	for (i = 0; i < lane->soc->num_funcs; i++)
-		if (strcmp(function, lane->soc->funcs[i]) == 0)
-			return i;
-
-	return -EINVAL;
-}
-
 int tegra_xusb_lane_parse_dt(struct tegra_xusb_lane *lane,
 			     struct device_node *np)
 {
@@ -126,7 +113,7 @@
 	if (err < 0)
 		return err;
 
-	err = tegra_xusb_lane_lookup_function(lane, function);
+	err = match_string(lane->soc->funcs, lane->soc->num_funcs, function);
 	if (err < 0) {
 		dev_err(dev, "invalid function \"%s\" for lane \"%s\"\n",
 			function, np->name);
diff --git a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c
index 288e656..c399f09 100644
--- a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c
+++ b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c
@@ -483,8 +483,8 @@
 		++nr_domains;
 	}
 
-	data = devm_kzalloc(dev, sizeof(*data)
-			+ nr_domains * sizeof(*data->domains), GFP_KERNEL);
+	data = devm_kzalloc(dev, struct_size(data, domains, nr_domains),
+			    GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 	data->drvdata = d;
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index 6cbcff4..dfed609 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -267,12 +267,13 @@
 }
 
 static struct irq_chip stm32_gpio_irq_chip = {
-	.name           = "stm32gpio",
-	.irq_ack       = irq_chip_ack_parent,
-	.irq_mask       = irq_chip_mask_parent,
-	.irq_unmask     = irq_chip_unmask_parent,
-	.irq_set_type   = irq_chip_set_type_parent,
-	.irq_set_wake   = irq_chip_set_wake_parent,
+	.name		= "stm32gpio",
+	.irq_eoi	= irq_chip_eoi_parent,
+	.irq_ack	= irq_chip_ack_parent,
+	.irq_mask	= irq_chip_mask_parent,
+	.irq_unmask	= irq_chip_unmask_parent,
+	.irq_set_type	= irq_chip_set_type_parent,
+	.irq_set_wake	= irq_chip_set_wake_parent,
 	.irq_request_resources = stm32_gpio_irq_request_resources,
 	.irq_release_resources = stm32_gpio_irq_release_resources,
 };
diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c
index ec0f77a..add8e87 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c
@@ -759,8 +759,7 @@
 
 	nregs = DIV_ROUND_UP(count * width, 32);
 
-	region = devm_kzalloc(dev,
-			      sizeof(*region) + sizeof(region->vals[0]) * nregs,
+	region = devm_kzalloc(dev, struct_size(region, vals, nregs),
 			      GFP_KERNEL);
 	if (!region)
 		return -ENOMEM;
diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index e728a96..cb0df9e 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -38,6 +38,17 @@
 	  If you have a supported Chromebook, choose Y or M here.
 	  The module will be called chromeos_pstore.
 
+config CHROMEOS_TBMC
+	tristate "ChromeOS Tablet Switch Controller"
+	depends on ACPI
+	depends on INPUT
+	help
+	  This option adds a driver for the tablet switch on
+	  select Chrome OS systems.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called chromeos_tbmc.
+
 config CROS_EC_CTL
         tristate
 
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index ff3b369..e44c37a 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -2,6 +2,7 @@
 
 obj-$(CONFIG_CHROMEOS_LAPTOP)		+= chromeos_laptop.o
 obj-$(CONFIG_CHROMEOS_PSTORE)		+= chromeos_pstore.o
+obj-$(CONFIG_CHROMEOS_TBMC)		+= chromeos_tbmc.o
 cros_ec_ctl-objs			:= cros_ec_sysfs.o cros_ec_lightbar.o \
 					   cros_ec_vbc.o cros_ec_debugfs.o
 obj-$(CONFIG_CROS_EC_CTL)		+= cros_ec_ctl.o
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index 5c47f45..24326ee 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -6,6 +6,7 @@
 
 #define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
 
+#include <linux/acpi.h>
 #include <linux/dmi.h>
 #include <linux/i2c.h>
 #include <linux/input.h>
@@ -54,6 +55,11 @@
 	struct i2c_client *client;
 };
 
+struct acpi_peripheral {
+	char hid[ACPI_ID_LEN];
+	const struct property_entry *properties;
+};
+
 struct chromeos_laptop {
 	/*
 	 * Note that we can't mark this pointer as const because
@@ -61,6 +67,9 @@
 	 */
 	struct i2c_peripheral *i2c_peripherals;
 	unsigned int num_i2c_peripherals;
+
+	const struct acpi_peripheral *acpi_peripherals;
+	unsigned int num_acpi_peripherals;
 };
 
 static const struct chromeos_laptop *cros_laptop;
@@ -148,6 +157,38 @@
 	}
 }
 
+static bool chromeos_laptop_adjust_client(struct i2c_client *client)
+{
+	const struct acpi_peripheral *acpi_dev;
+	struct acpi_device_id acpi_ids[2] = { };
+	int i;
+	int error;
+
+	if (!has_acpi_companion(&client->dev))
+		return false;
+
+	for (i = 0; i < cros_laptop->num_acpi_peripherals; i++) {
+		acpi_dev = &cros_laptop->acpi_peripherals[i];
+
+		memcpy(acpi_ids[0].id, acpi_dev->hid, ACPI_ID_LEN);
+
+		if (acpi_match_device(acpi_ids, &client->dev)) {
+			error = device_add_properties(&client->dev,
+						      acpi_dev->properties);
+			if (error) {
+				dev_err(&client->dev,
+					"failed to add properties: %d\n",
+					error);
+				break;
+			}
+
+			return true;
+		}
+	}
+
+	return false;
+}
+
 static void chromeos_laptop_detach_i2c_client(struct i2c_client *client)
 {
 	struct i2c_peripheral *i2c_dev;
@@ -170,6 +211,8 @@
 	case BUS_NOTIFY_ADD_DEVICE:
 		if (dev->type == &i2c_adapter_type)
 			chromeos_laptop_check_adapter(to_i2c_adapter(dev));
+		else if (dev->type == &i2c_client_type)
+			chromeos_laptop_adjust_client(to_i2c_client(dev));
 		break;
 
 	case BUS_NOTIFY_REMOVED_DEVICE:
@@ -191,6 +234,12 @@
 	.num_i2c_peripherals	= ARRAY_SIZE(_name##_peripherals),	\
 }
 
+#define DECLARE_ACPI_CROS_LAPTOP(_name)					\
+static const struct chromeos_laptop _name __initconst = {		\
+	.acpi_peripherals	= _name##_peripherals,			\
+	.num_acpi_peripherals	= ARRAY_SIZE(_name##_peripherals),	\
+}
+
 static struct i2c_peripheral samsung_series_5_550_peripherals[] __initdata = {
 	/* Touchpad. */
 	{
@@ -234,16 +283,25 @@
 
 static const struct property_entry
 chromebook_pixel_trackpad_props[] __initconst = {
+	PROPERTY_ENTRY_STRING("compatible", "atmel,maxtouch"),
 	PROPERTY_ENTRY_U32_ARRAY("linux,gpio-keymap", chromebook_pixel_tp_keys),
 	{ }
 };
 
+static const struct property_entry
+chromebook_atmel_touchscreen_props[] __initconst = {
+	PROPERTY_ENTRY_STRING("compatible", "atmel,maxtouch"),
+	{ }
+};
+
 static struct i2c_peripheral chromebook_pixel_peripherals[] __initdata = {
 	/* Touch Screen. */
 	{
 		.board_info	= {
 			I2C_BOARD_INFO("atmel_mxt_ts",
 					ATMEL_TS_I2C_ADDR),
+			.properties	=
+				chromebook_atmel_touchscreen_props,
 			.flags		= I2C_CLIENT_WAKE,
 		},
 		.dmi_name	= "touchscreen",
@@ -354,6 +412,8 @@
 		.board_info	= {
 			I2C_BOARD_INFO("atmel_mxt_ts",
 					ATMEL_TS_I2C_ADDR),
+			.properties	=
+				chromebook_atmel_touchscreen_props,
 			.flags		= I2C_CLIENT_WAKE,
 		},
 		.dmi_name	= "touchscreen",
@@ -419,6 +479,47 @@
 };
 DECLARE_CROS_LAPTOP(cr48);
 
+static const u32 samus_touchpad_buttons[] __initconst = {
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	BTN_LEFT
+};
+
+static const struct property_entry samus_trackpad_props[] __initconst = {
+	PROPERTY_ENTRY_STRING("compatible", "atmel,maxtouch"),
+	PROPERTY_ENTRY_U32_ARRAY("linux,gpio-keymap", samus_touchpad_buttons),
+	{ }
+};
+
+static struct acpi_peripheral samus_peripherals[] __initdata = {
+	/* Touchpad */
+	{
+		.hid		= "ATML0000",
+		.properties	= samus_trackpad_props,
+	},
+	/* Touchsceen */
+	{
+		.hid		= "ATML0001",
+		.properties	= chromebook_atmel_touchscreen_props,
+	},
+};
+DECLARE_ACPI_CROS_LAPTOP(samus);
+
+static struct acpi_peripheral generic_atmel_peripherals[] __initdata = {
+	/* Touchpad */
+	{
+		.hid		= "ATML0000",
+		.properties	= chromebook_pixel_trackpad_props,
+	},
+	/* Touchsceen */
+	{
+		.hid		= "ATML0001",
+		.properties	= chromebook_atmel_touchscreen_props,
+	},
+};
+DECLARE_ACPI_CROS_LAPTOP(generic_atmel);
+
 static const struct dmi_system_id chromeos_laptop_dmi_table[] __initconst = {
 	{
 		.ident = "Samsung Series 5 550",
@@ -502,17 +603,72 @@
 		},
 		.driver_data = (void *)&cr48,
 	},
+	/* Devices with peripherals incompletely described in ACPI */
+	{
+		.ident = "Chromebook Pro",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Google"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Caroline"),
+		},
+		.driver_data = (void *)&samus,
+	},
+	{
+		.ident = "Google Pixel 2 (2015)",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Samus"),
+		},
+		.driver_data = (void *)&samus,
+	},
+	{
+		.ident = "Samsung Chromebook 3",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Celes"),
+		},
+		.driver_data = (void *)&samus,
+	},
+	{
+		/*
+		 * Other Chromebooks with Atmel touch controllers:
+		 * - Winky (touchpad)
+		 * - Clapper, Expresso, Rambi, Glimmer (touchscreen)
+		 */
+		.ident = "Other Chromebook",
+		.matches = {
+			/*
+			 * This will match all Google devices, not only devices
+			 * with Atmel, but we will validate that the device
+			 * actually has matching peripherals.
+			 */
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+		},
+		.driver_data = (void *)&generic_atmel,
+	},
 	{ }
 };
 MODULE_DEVICE_TABLE(dmi, chromeos_laptop_dmi_table);
 
-static int __init chromeos_laptop_scan_adapter(struct device *dev, void *data)
+static int __init chromeos_laptop_scan_peripherals(struct device *dev, void *data)
 {
-	struct i2c_adapter *adapter;
+	int error;
 
-	adapter = i2c_verify_adapter(dev);
-	if (adapter)
-		chromeos_laptop_check_adapter(adapter);
+	if (dev->type == &i2c_adapter_type) {
+		chromeos_laptop_check_adapter(to_i2c_adapter(dev));
+	} else if (dev->type == &i2c_client_type) {
+		if (chromeos_laptop_adjust_client(to_i2c_client(dev))) {
+			/*
+			 * Now that we have needed properties re-trigger
+			 * driver probe in case driver was initialized
+			 * earlier and probe failed.
+			 */
+			error = device_attach(dev);
+			if (error < 0)
+				dev_warn(dev,
+					 "%s: device_attach() failed: %d\n",
+					 __func__, error);
+		}
+	}
 
 	return 0;
 }
@@ -556,27 +712,24 @@
 	return 0;
 }
 
-static struct chromeos_laptop * __init
-chromeos_laptop_prepare(const struct chromeos_laptop *src)
+static int __init
+chromeos_laptop_prepare_i2c_peripherals(struct chromeos_laptop *cros_laptop,
+					const struct chromeos_laptop *src)
 {
-	struct chromeos_laptop *cros_laptop;
 	struct i2c_peripheral *i2c_dev;
 	struct i2c_board_info *info;
-	int error;
 	int i;
+	int error;
 
-	cros_laptop = kzalloc(sizeof(*cros_laptop), GFP_KERNEL);
-	if (!cros_laptop)
-		return ERR_PTR(-ENOMEM);
+	if (!src->num_i2c_peripherals)
+		return 0;
 
 	cros_laptop->i2c_peripherals = kmemdup(src->i2c_peripherals,
 					       src->num_i2c_peripherals *
 						sizeof(*src->i2c_peripherals),
 					       GFP_KERNEL);
-	if (!cros_laptop->i2c_peripherals) {
-		error = -ENOMEM;
-		goto err_free_cros_laptop;
-	}
+	if (!cros_laptop->i2c_peripherals)
+		return -ENOMEM;
 
 	cros_laptop->num_i2c_peripherals = src->num_i2c_peripherals;
 
@@ -586,7 +739,7 @@
 
 		error = chromeos_laptop_setup_irq(i2c_dev);
 		if (error)
-			goto err_destroy_cros_peripherals;
+			goto err_out;
 
 		/* We need to deep-copy properties */
 		if (info->properties) {
@@ -594,14 +747,14 @@
 				property_entries_dup(info->properties);
 			if (IS_ERR(info->properties)) {
 				error = PTR_ERR(info->properties);
-				goto err_destroy_cros_peripherals;
+				goto err_out;
 			}
 		}
 	}
 
-	return cros_laptop;
+	return 0;
 
-err_destroy_cros_peripherals:
+err_out:
 	while (--i >= 0) {
 		i2c_dev = &cros_laptop->i2c_peripherals[i];
 		info = &i2c_dev->board_info;
@@ -609,13 +762,74 @@
 			property_entries_free(info->properties);
 	}
 	kfree(cros_laptop->i2c_peripherals);
-err_free_cros_laptop:
-	kfree(cros_laptop);
-	return ERR_PTR(error);
+	return error;
+}
+
+static int __init
+chromeos_laptop_prepare_acpi_peripherals(struct chromeos_laptop *cros_laptop,
+					const struct chromeos_laptop *src)
+{
+	struct acpi_peripheral *acpi_peripherals;
+	struct acpi_peripheral *acpi_dev;
+	const struct acpi_peripheral *src_dev;
+	int n_peripherals = 0;
+	int i;
+	int error;
+
+	for (i = 0; i < src->num_acpi_peripherals; i++) {
+		if (acpi_dev_present(src->acpi_peripherals[i].hid, NULL, -1))
+			n_peripherals++;
+	}
+
+	if (!n_peripherals)
+		return 0;
+
+	acpi_peripherals = kcalloc(n_peripherals,
+				   sizeof(*src->acpi_peripherals),
+				   GFP_KERNEL);
+	if (!acpi_peripherals)
+		return -ENOMEM;
+
+	acpi_dev = acpi_peripherals;
+	for (i = 0; i < src->num_acpi_peripherals; i++) {
+		src_dev = &src->acpi_peripherals[i];
+		if (!acpi_dev_present(src_dev->hid, NULL, -1))
+			continue;
+
+		*acpi_dev = *src_dev;
+
+		/* We need to deep-copy properties */
+		if (src_dev->properties) {
+			acpi_dev->properties =
+				property_entries_dup(src_dev->properties);
+			if (IS_ERR(acpi_dev->properties)) {
+				error = PTR_ERR(acpi_dev->properties);
+				goto err_out;
+			}
+		}
+
+		acpi_dev++;
+	}
+
+	cros_laptop->acpi_peripherals = acpi_peripherals;
+	cros_laptop->num_acpi_peripherals = n_peripherals;
+
+	return 0;
+
+err_out:
+	while (--i >= 0) {
+		acpi_dev = &acpi_peripherals[i];
+		if (acpi_dev->properties)
+			property_entries_free(acpi_dev->properties);
+	}
+
+	kfree(acpi_peripherals);
+	return error;
 }
 
 static void chromeos_laptop_destroy(const struct chromeos_laptop *cros_laptop)
 {
+	const struct acpi_peripheral *acpi_dev;
 	struct i2c_peripheral *i2c_dev;
 	struct i2c_board_info *info;
 	int i;
@@ -631,10 +845,41 @@
 			property_entries_free(info->properties);
 	}
 
+	for (i = 0; i < cros_laptop->num_acpi_peripherals; i++) {
+		acpi_dev = &cros_laptop->acpi_peripherals[i];
+
+		if (acpi_dev->properties)
+			property_entries_free(acpi_dev->properties);
+	}
+
 	kfree(cros_laptop->i2c_peripherals);
+	kfree(cros_laptop->acpi_peripherals);
 	kfree(cros_laptop);
 }
 
+static struct chromeos_laptop * __init
+chromeos_laptop_prepare(const struct chromeos_laptop *src)
+{
+	struct chromeos_laptop *cros_laptop;
+	int error;
+
+	cros_laptop = kzalloc(sizeof(*cros_laptop), GFP_KERNEL);
+	if (!cros_laptop)
+		return ERR_PTR(-ENOMEM);
+
+	error = chromeos_laptop_prepare_i2c_peripherals(cros_laptop, src);
+	if (!error)
+		error = chromeos_laptop_prepare_acpi_peripherals(cros_laptop,
+								 src);
+
+	if (error) {
+		chromeos_laptop_destroy(cros_laptop);
+		return ERR_PTR(error);
+	}
+
+	return cros_laptop;
+}
+
 static int __init chromeos_laptop_init(void)
 {
 	const struct dmi_system_id *dmi_id;
@@ -652,21 +897,33 @@
 	if (IS_ERR(cros_laptop))
 		return PTR_ERR(cros_laptop);
 
+	if (!cros_laptop->num_i2c_peripherals &&
+	    !cros_laptop->num_acpi_peripherals) {
+		pr_debug("no relevant devices detected\n");
+		error = -ENODEV;
+		goto err_destroy_cros_laptop;
+	}
+
 	error = bus_register_notifier(&i2c_bus_type,
 				      &chromeos_laptop_i2c_notifier);
 	if (error) {
-		pr_err("failed to register i2c bus notifier: %d\n", error);
-		chromeos_laptop_destroy(cros_laptop);
-		return error;
+		pr_err("failed to register i2c bus notifier: %d\n",
+		       error);
+		goto err_destroy_cros_laptop;
 	}
 
 	/*
-	 * Scan adapters that have been registered before we installed
-	 * the notifier to make sure we do not miss any devices.
+	 * Scan adapters that have been registered and clients that have
+	 * been created before we installed the notifier to make sure
+	 * we do not miss any devices.
 	 */
-	i2c_for_each_dev(NULL, chromeos_laptop_scan_adapter);
+	i2c_for_each_dev(NULL, chromeos_laptop_scan_peripherals);
 
 	return 0;
+
+err_destroy_cros_laptop:
+	chromeos_laptop_destroy(cros_laptop);
+	return error;
 }
 
 static void __exit chromeos_laptop_exit(void)
diff --git a/drivers/platform/chrome/chromeos_tbmc.c b/drivers/platform/chrome/chromeos_tbmc.c
new file mode 100644
index 0000000..b935df6
--- /dev/null
+++ b/drivers/platform/chrome/chromeos_tbmc.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Driver to detect Tablet Mode for ChromeOS convertible.
+//
+// Copyright (C) 2017 Google, Inc.
+// Author: Gwendal Grignou <gwendal@chromium.org>
+
+#include <linux/acpi.h>
+#include <linux/input.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#define DRV_NAME "chromeos_tbmc"
+#define ACPI_DRV_NAME "GOOG0006"
+
+static int chromeos_tbmc_query_switch(struct acpi_device *adev,
+				     struct input_dev *idev)
+{
+	unsigned long long state;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(adev->handle, "TBMC", NULL, &state);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	/* input layer checks if event is redundant */
+	input_report_switch(idev, SW_TABLET_MODE, state);
+	input_sync(idev);
+
+	return 0;
+}
+
+static __maybe_unused int chromeos_tbmc_resume(struct device *dev)
+{
+	struct acpi_device *adev = to_acpi_device(dev);
+
+	return chromeos_tbmc_query_switch(adev, adev->driver_data);
+}
+
+static void chromeos_tbmc_notify(struct acpi_device *adev, u32 event)
+{
+	switch (event) {
+	case 0x80:
+		chromeos_tbmc_query_switch(adev, adev->driver_data);
+		break;
+	default:
+		dev_err(&adev->dev, "Unexpected event: 0x%08X\n", event);
+	}
+}
+
+static int chromeos_tbmc_open(struct input_dev *idev)
+{
+	struct acpi_device *adev = input_get_drvdata(idev);
+
+	return chromeos_tbmc_query_switch(adev, idev);
+}
+
+static int chromeos_tbmc_add(struct acpi_device *adev)
+{
+	struct input_dev *idev;
+	struct device *dev = &adev->dev;
+	int ret;
+
+	idev = devm_input_allocate_device(dev);
+	if (!idev)
+		return -ENOMEM;
+
+	idev->name = "Tablet Mode Switch";
+	idev->phys = acpi_device_hid(adev);
+
+	idev->id.bustype = BUS_HOST;
+	idev->id.version = 1;
+	idev->id.product = 0;
+	idev->open = chromeos_tbmc_open;
+
+	input_set_drvdata(idev, adev);
+	adev->driver_data = idev;
+
+	input_set_capability(idev, EV_SW, SW_TABLET_MODE);
+	ret = input_register_device(idev);
+	if (ret) {
+		dev_err(dev, "cannot register input device\n");
+		return ret;
+	}
+	return 0;
+}
+
+static const struct acpi_device_id chromeos_tbmc_acpi_device_ids[] = {
+	{ ACPI_DRV_NAME, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, chromeos_tbmc_acpi_device_ids);
+
+static const SIMPLE_DEV_PM_OPS(chromeos_tbmc_pm_ops, NULL,
+		chromeos_tbmc_resume);
+
+static struct acpi_driver chromeos_tbmc_driver = {
+	.name = DRV_NAME,
+	.class = DRV_NAME,
+	.ids = chromeos_tbmc_acpi_device_ids,
+	.ops = {
+		.add = chromeos_tbmc_add,
+		.notify = chromeos_tbmc_notify,
+	},
+	.drv.pm = &chromeos_tbmc_pm_ops,
+};
+
+module_acpi_driver(chromeos_tbmc_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("ChromeOS ACPI tablet switch driver");
diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index 6ea79d4..68193bb 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -170,8 +170,7 @@
 			    struct device_attribute *attr, char *buf)
 {
 	uint32_t version = 0, flags = 0;
-	struct cros_ec_dev *ec = container_of(dev,
-					      struct cros_ec_dev, class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 	int ret;
 
 	ret = lb_throttle();
@@ -193,8 +192,7 @@
 	struct cros_ec_command *msg;
 	int ret;
 	unsigned int val;
-	struct cros_ec_dev *ec = container_of(dev,
-					      struct cros_ec_dev, class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 
 	if (kstrtouint(buf, 0, &val))
 		return -EINVAL;
@@ -238,8 +236,7 @@
 {
 	struct ec_params_lightbar *param;
 	struct cros_ec_command *msg;
-	struct cros_ec_dev *ec = container_of(dev,
-					      struct cros_ec_dev, class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 	unsigned int val[4];
 	int ret, i = 0, j = 0, ok = 0;
 
@@ -311,8 +308,7 @@
 	struct ec_response_lightbar *resp;
 	struct cros_ec_command *msg;
 	int ret;
-	struct cros_ec_dev *ec = container_of(dev,
-					      struct cros_ec_dev, class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 
 	msg = alloc_lightbar_cmd_msg(ec);
 	if (!msg)
@@ -439,8 +435,7 @@
 	struct cros_ec_command *msg;
 	unsigned int num;
 	int ret, len;
-	struct cros_ec_dev *ec = container_of(dev,
-					      struct cros_ec_dev, class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 
 	for (len = 0; len < count; len++)
 		if (!isalnum(buf[len]))
@@ -488,8 +483,7 @@
 	int extra_bytes, max_size, ret;
 	struct ec_params_lightbar *param;
 	struct cros_ec_command *msg;
-	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
-					      class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 
 	/*
 	 * We might need to reject the program for size reasons. The EC
@@ -599,8 +593,7 @@
 						  struct attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
-	struct cros_ec_dev *ec = container_of(dev,
-					      struct cros_ec_dev, class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 	struct platform_device *pdev = to_platform_device(ec->dev);
 	struct cros_ec_platform *pdata = pdev->dev.platform_data;
 	int is_cros_ec;
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 3682e15..31c8b8c 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -435,7 +435,13 @@
 	int ret;
 	acpi_status status;
 
-	if (!dmi_check_system(cros_ec_lpc_dmi_table)) {
+	status = acpi_get_devices(ACPI_DRV_NAME, cros_ec_lpc_parse_device,
+				  &cros_ec_lpc_acpi_device_found, NULL);
+	if (ACPI_FAILURE(status))
+		pr_warn(DRV_NAME ": Looking for %s failed\n", ACPI_DRV_NAME);
+
+	if (!cros_ec_lpc_acpi_device_found &&
+	    !dmi_check_system(cros_ec_lpc_dmi_table)) {
 		pr_err(DRV_NAME ": unsupported system.\n");
 		return -ENODEV;
 	}
@@ -450,11 +456,6 @@
 		return ret;
 	}
 
-	status = acpi_get_devices(ACPI_DRV_NAME, cros_ec_lpc_parse_device,
-				  &cros_ec_lpc_acpi_device_found, NULL);
-	if (ACPI_FAILURE(status))
-		pr_warn(DRV_NAME ": Looking for %s failed\n", ACPI_DRV_NAME);
-
 	if (!cros_ec_lpc_acpi_device_found) {
 		/* Register the device, and it'll get hooked up automatically */
 		ret = platform_device_register(&cros_ec_lpc_device);
diff --git a/drivers/platform/chrome/cros_ec_sysfs.c b/drivers/platform/chrome/cros_ec_sysfs.c
index 5a6db3f..f34a501 100644
--- a/drivers/platform/chrome/cros_ec_sysfs.c
+++ b/drivers/platform/chrome/cros_ec_sysfs.c
@@ -34,8 +34,6 @@
 #include <linux/types.h>
 #include <linux/uaccess.h>
 
-#define to_cros_ec_dev(dev)  container_of(dev, struct cros_ec_dev, class_dev)
-
 /* Accessor functions */
 
 static ssize_t reboot_show(struct device *dev,
diff --git a/drivers/platform/chrome/cros_ec_vbc.c b/drivers/platform/chrome/cros_ec_vbc.c
index 6d38e6b..5356f26b 100644
--- a/drivers/platform/chrome/cros_ec_vbc.c
+++ b/drivers/platform/chrome/cros_ec_vbc.c
@@ -29,8 +29,7 @@
 				  loff_t pos, size_t count)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
-	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
-					      class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 	struct cros_ec_device *ecdev = ec->ec_dev;
 	struct ec_params_vbnvcontext *params;
 	struct cros_ec_command *msg;
@@ -70,8 +69,7 @@
 				   loff_t pos, size_t count)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
-	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
-					      class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 	struct cros_ec_device *ecdev = ec->ec_dev;
 	struct ec_params_vbnvcontext *params;
 	struct cros_ec_command *msg;
@@ -111,8 +109,7 @@
 				      struct bin_attribute *a, int n)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
-	struct cros_ec_dev *ec = container_of(dev, struct cros_ec_dev,
-					      class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 	struct device_node *np = ec->ec_dev->dev->of_node;
 
 	if (IS_ENABLED(CONFIG_OF) && np) {
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 566644b..f27cb18 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -866,6 +866,7 @@
 config INTEL_CHT_INT33FE
 	tristate "Intel Cherry Trail ACPI INT33FE Driver"
 	depends on X86 && ACPI && I2C && REGULATOR
+	depends on CHARGER_BQ24190=y || (CHARGER_BQ24190=m && m)
 	---help---
 	  This driver add support for the INT33FE ACPI device found on
 	  some Intel Cherry Trail devices.
@@ -877,8 +878,7 @@
 	  i2c drivers for these chips can bind to the them.
 
 	  If you enable this driver it is advised to also select
-	  CONFIG_TYPEC_FUSB302=m, CONFIG_CHARGER_BQ24190=m and
-	  CONFIG_BATTERY_MAX17042=m.
+	  CONFIG_TYPEC_FUSB302=m and CONFIG_BATTERY_MAX17042=m.
 
 config INTEL_INT0002_VGPIO
 	tristate "Intel ACPI INT0002 Virtual GPIO driver"
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index e8d058c..eef76bf 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -1689,19 +1689,6 @@
 	return 0;
 }
 
-static int version_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, version_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations version_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= version_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /*
  * Proc and module init
  */
@@ -1722,8 +1709,8 @@
 	if (dev->hotkey_dev)
 		proc_create_data("keys", S_IRUGO | S_IWUSR, toshiba_proc_dir,
 				 &keys_proc_fops, dev);
-	proc_create_data("version", S_IRUGO, toshiba_proc_dir,
-			 &version_proc_fops, dev);
+	proc_create_single_data("version", S_IRUGO, toshiba_proc_dir,
+			version_proc_show, dev);
 }
 
 static void remove_toshiba_proc_entries(struct toshiba_acpi_dev *dev)
diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c
index 7d4aca7..fe1c8f5 100644
--- a/drivers/pnp/pnpbios/proc.c
+++ b/drivers/pnp/pnpbios/proc.c
@@ -47,19 +47,6 @@
 	return 0;
 }
 
-static int pnpconfig_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pnpconfig_proc_show, NULL);
-}
-
-static const struct file_operations pnpconfig_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pnpconfig_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int escd_info_proc_show(struct seq_file *m, void *v)
 {
 	struct escd_info_struc escd;
@@ -74,19 +61,6 @@
 	return 0;
 }
 
-static int escd_info_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, escd_info_proc_show, NULL);
-}
-
-static const struct file_operations escd_info_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= escd_info_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 #define MAX_SANE_ESCD_SIZE (32*1024)
 static int escd_proc_show(struct seq_file *m, void *v)
 {
@@ -129,19 +103,6 @@
 	return 0;
 }
 
-static int escd_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, escd_proc_show, NULL);
-}
-
-static const struct file_operations escd_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= escd_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int pnp_legacyres_proc_show(struct seq_file *m, void *v)
 {
 	void *buf;
@@ -159,19 +120,6 @@
 	return 0;
 }
 
-static int pnp_legacyres_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pnp_legacyres_proc_show, NULL);
-}
-
-static const struct file_operations pnp_legacyres_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pnp_legacyres_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int pnp_devices_proc_show(struct seq_file *m, void *v)
 {
 	struct pnp_bios_node *node;
@@ -202,19 +150,6 @@
 	return 0;
 }
 
-static int pnp_devices_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pnp_devices_proc_show, NULL);
-}
-
-static const struct file_operations pnp_devices_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pnp_devices_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int pnpbios_proc_show(struct seq_file *m, void *v)
 {
 	void *data = m->private;
@@ -318,12 +253,13 @@
 	proc_pnp_boot = proc_mkdir("boot", proc_pnp);
 	if (!proc_pnp_boot)
 		return -EIO;
-	proc_create("devices", 0, proc_pnp, &pnp_devices_proc_fops);
-	proc_create("configuration_info", 0, proc_pnp, &pnpconfig_proc_fops);
-	proc_create("escd_info", 0, proc_pnp, &escd_info_proc_fops);
-	proc_create("escd", S_IRUSR, proc_pnp, &escd_proc_fops);
-	proc_create("legacy_device_resources", 0, proc_pnp, &pnp_legacyres_proc_fops);
-
+	proc_create_single("devices", 0, proc_pnp, pnp_devices_proc_show);
+	proc_create_single("configuration_info", 0, proc_pnp,
+			pnpconfig_proc_show);
+	proc_create_single("escd_info", 0, proc_pnp, escd_info_proc_show);
+	proc_create_single("escd", S_IRUSR, proc_pnp, escd_proc_show);
+	proc_create_single("legacy_device_resources", 0, proc_pnp,
+			pnp_legacyres_proc_show);
 	return 0;
 }
 
diff --git a/drivers/power/avs/rockchip-io-domain.c b/drivers/power/avs/rockchip-io-domain.c
index ed2b109..d6a5e6b 100644
--- a/drivers/power/avs/rockchip-io-domain.c
+++ b/drivers/power/avs/rockchip-io-domain.c
@@ -39,6 +39,10 @@
 #define MAX_VOLTAGE_1_8		1980000
 #define MAX_VOLTAGE_3_3		3600000
 
+#define PX30_IO_VSEL			0x180
+#define PX30_IO_VSEL_VCCIO6_SRC		BIT(0)
+#define PX30_IO_VSEL_VCCIO6_SUPPLY_NUM	1
+
 #define RK3288_SOC_CON2			0x24c
 #define RK3288_SOC_CON2_FLASH0		BIT(7)
 #define RK3288_SOC_FLASH_SUPPLY_NUM	2
@@ -151,6 +155,25 @@
 	return NOTIFY_OK;
 }
 
+static void px30_iodomain_init(struct rockchip_iodomain *iod)
+{
+	int ret;
+	u32 val;
+
+	/* if no VCCIO0 supply we should leave things alone */
+	if (!iod->supplies[PX30_IO_VSEL_VCCIO6_SUPPLY_NUM].reg)
+		return;
+
+	/*
+	 * set vccio0 iodomain to also use this framework
+	 * instead of a special gpio.
+	 */
+	val = PX30_IO_VSEL_VCCIO6_SRC | (PX30_IO_VSEL_VCCIO6_SRC << 16);
+	ret = regmap_write(iod->grf, PX30_IO_VSEL, val);
+	if (ret < 0)
+		dev_warn(iod->dev, "couldn't update vccio0 ctrl\n");
+}
+
 static void rk3288_iodomain_init(struct rockchip_iodomain *iod)
 {
 	int ret;
@@ -227,6 +250,43 @@
 		dev_warn(iod->dev, "couldn't update pmu io iodomain ctrl\n");
 }
 
+static const struct rockchip_iodomain_soc_data soc_data_px30 = {
+	.grf_offset = 0x180,
+	.supply_names = {
+		NULL,
+		"vccio6",
+		"vccio1",
+		"vccio2",
+		"vccio3",
+		"vccio4",
+		"vccio5",
+		"vccio-oscgpi",
+	},
+	.init = px30_iodomain_init,
+};
+
+static const struct rockchip_iodomain_soc_data soc_data_px30_pmu = {
+	.grf_offset = 0x100,
+	.supply_names = {
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		"pmuio1",
+		"pmuio2",
+	},
+};
+
 /*
  * On the rk3188 the io-domains are handled by a shared register with the
  * lower 8 bits being still being continuing drive-strength settings.
@@ -381,6 +441,14 @@
 
 static const struct of_device_id rockchip_iodomain_match[] = {
 	{
+		.compatible = "rockchip,px30-io-voltage-domain",
+		.data = (void *)&soc_data_px30
+	},
+	{
+		.compatible = "rockchip,px30-pmu-io-voltage-domain",
+		.data = (void *)&soc_data_px30_pmu
+	},
+	{
 		.compatible = "rockchip,rk3188-io-voltage-domain",
 		.data = &soc_data_rk3188
 	},
diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index feac7b0..f57ab0a 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -19,6 +19,7 @@
 #include <linux/err.h>
 #include <linux/of.h>
 #include <linux/power_supply.h>
+#include <linux/property.h>
 #include <linux/thermal.h>
 #include "power_supply.h"
 
@@ -843,12 +844,21 @@
 {
 	struct device *dev;
 	struct power_supply *psy;
-	int rc;
+	int i, rc;
 
 	if (!parent)
 		pr_warn("%s: Expected proper parent device for '%s'\n",
 			__func__, desc->name);
 
+	if (!desc || !desc->name || !desc->properties || !desc->num_properties)
+		return ERR_PTR(-EINVAL);
+
+	for (i = 0; i < desc->num_properties; ++i) {
+		if ((desc->properties[i] == POWER_SUPPLY_PROP_USB_TYPE) &&
+		    (!desc->usb_types || !desc->num_usb_types))
+			return ERR_PTR(-EINVAL);
+	}
+
 	psy = kzalloc(sizeof(*psy), GFP_KERNEL);
 	if (!psy)
 		return ERR_PTR(-ENOMEM);
@@ -865,7 +875,8 @@
 	psy->desc = desc;
 	if (cfg) {
 		psy->drv_data = cfg->drv_data;
-		psy->of_node = cfg->of_node;
+		psy->of_node =
+			cfg->fwnode ? to_of_node(cfg->fwnode) : cfg->of_node;
 		psy->supplied_to = cfg->supplied_to;
 		psy->num_supplicants = cfg->num_supplicants;
 	}
diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
index 5204f11..1350068 100644
--- a/drivers/power/supply/power_supply_sysfs.c
+++ b/drivers/power/supply/power_supply_sysfs.c
@@ -46,6 +46,11 @@
 	"USB_PD", "USB_PD_DRP", "BrickID"
 };
 
+static const char * const power_supply_usb_type_text[] = {
+	"Unknown", "SDP", "DCP", "CDP", "ACA", "C",
+	"PD", "PD_DRP", "PD_PPS", "BrickID"
+};
+
 static const char * const power_supply_status_text[] = {
 	"Unknown", "Charging", "Discharging", "Not charging", "Full"
 };
@@ -73,6 +78,41 @@
 	"Unknown", "System", "Device"
 };
 
+static ssize_t power_supply_show_usb_type(struct device *dev,
+					  enum power_supply_usb_type *usb_types,
+					  ssize_t num_usb_types,
+					  union power_supply_propval *value,
+					  char *buf)
+{
+	enum power_supply_usb_type usb_type;
+	ssize_t count = 0;
+	bool match = false;
+	int i;
+
+	for (i = 0; i < num_usb_types; ++i) {
+		usb_type = usb_types[i];
+
+		if (value->intval == usb_type) {
+			count += sprintf(buf + count, "[%s] ",
+					 power_supply_usb_type_text[usb_type]);
+			match = true;
+		} else {
+			count += sprintf(buf + count, "%s ",
+					 power_supply_usb_type_text[usb_type]);
+		}
+	}
+
+	if (!match) {
+		dev_warn(dev, "driver reporting unsupported connected type\n");
+		return -EINVAL;
+	}
+
+	if (count)
+		buf[count - 1] = '\n';
+
+	return count;
+}
+
 static ssize_t power_supply_show_property(struct device *dev,
 					  struct device_attribute *attr,
 					  char *buf) {
@@ -115,6 +155,10 @@
 	else if (off == POWER_SUPPLY_PROP_TYPE)
 		return sprintf(buf, "%s\n",
 			       power_supply_type_text[value.intval]);
+	else if (off == POWER_SUPPLY_PROP_USB_TYPE)
+		return power_supply_show_usb_type(dev, psy->desc->usb_types,
+						  psy->desc->num_usb_types,
+						  &value, buf);
 	else if (off == POWER_SUPPLY_PROP_SCOPE)
 		return sprintf(buf, "%s\n",
 			       power_supply_scope_text[value.intval]);
@@ -241,6 +285,7 @@
 	POWER_SUPPLY_ATTR(time_to_full_now),
 	POWER_SUPPLY_ATTR(time_to_full_avg),
 	POWER_SUPPLY_ATTR(type),
+	POWER_SUPPLY_ATTR(usb_type),
 	POWER_SUPPLY_ATTR(scope),
 	POWER_SUPPLY_ATTR(precharge_current),
 	POWER_SUPPLY_ATTR(charge_term_current),
diff --git a/drivers/regulator/mc13783-regulator.c b/drivers/regulator/mc13783-regulator.c
index fe4c7d6..0e0277b 100644
--- a/drivers/regulator/mc13783-regulator.c
+++ b/drivers/regulator/mc13783-regulator.c
@@ -409,9 +409,9 @@
 	if (num_regulators <= 0)
 		return -EINVAL;
 
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv) +
-			num_regulators * sizeof(priv->regulators[0]),
-			GFP_KERNEL);
+	priv = devm_kzalloc(&pdev->dev,
+			    struct_size(priv, regulators, num_regulators),
+			    GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
diff --git a/drivers/regulator/mc13892-regulator.c b/drivers/regulator/mc13892-regulator.c
index 0d17c92..15dd7bc 100644
--- a/drivers/regulator/mc13892-regulator.c
+++ b/drivers/regulator/mc13892-regulator.c
@@ -547,9 +547,9 @@
 	if (num_regulators <= 0)
 		return -EINVAL;
 
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv) +
-		num_regulators * sizeof(priv->regulators[0]),
-		GFP_KERNEL);
+	priv = devm_kzalloc(&pdev->dev,
+			    struct_size(priv, regulators, num_regulators),
+			    GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index 6488292..225e34c 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -730,8 +730,7 @@
 	if (num < 0)
 		return optional ? NULL : ERR_PTR(num);
 
-	resets = kzalloc(sizeof(*resets) + sizeof(resets->rstc[0]) * num,
-			 GFP_KERNEL);
+	resets = kzalloc(struct_size(resets, rstc, num), GFP_KERNEL);
 	if (!resets)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/rtc/rtc-ac100.c b/drivers/rtc/rtc-ac100.c
index 3fe576f..784b676 100644
--- a/drivers/rtc/rtc-ac100.c
+++ b/drivers/rtc/rtc-ac100.c
@@ -317,10 +317,10 @@
 	const char *parents[2] = {AC100_RTC_32K_NAME};
 	int i, ret;
 
-	chip->clk_data = devm_kzalloc(chip->dev, sizeof(*chip->clk_data) +
-						 sizeof(*chip->clk_data->hws) *
-						 AC100_CLKOUT_NUM,
-						 GFP_KERNEL);
+	chip->clk_data = devm_kzalloc(chip->dev,
+				      struct_size(chip->clk_data, hws,
+						  AC100_CLKOUT_NUM),
+				      GFP_KERNEL);
 	if (!chip->clk_data)
 		return -ENOMEM;
 
diff --git a/drivers/rtc/rtc-proc.c b/drivers/rtc/rtc-proc.c
index 31e7e23..a9dd921 100644
--- a/drivers/rtc/rtc-proc.c
+++ b/drivers/rtc/rtc-proc.c
@@ -107,40 +107,11 @@
 	return 0;
 }
 
-static int rtc_proc_open(struct inode *inode, struct file *file)
-{
-	int ret;
-	struct rtc_device *rtc = PDE_DATA(inode);
-
-	if (!try_module_get(rtc->owner))
-		return -ENODEV;
-
-	ret = single_open(file, rtc_proc_show, rtc);
-	if (ret)
-		module_put(rtc->owner);
-	return ret;
-}
-
-static int rtc_proc_release(struct inode *inode, struct file *file)
-{
-	int res = single_release(inode, file);
-	struct rtc_device *rtc = PDE_DATA(inode);
-
-	module_put(rtc->owner);
-	return res;
-}
-
-static const struct file_operations rtc_proc_fops = {
-	.open		= rtc_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= rtc_proc_release,
-};
-
 void rtc_proc_add_device(struct rtc_device *rtc)
 {
 	if (is_rtc_hctosys(rtc))
-		proc_create_data("driver/rtc", 0, NULL, &rtc_proc_fops, rtc);
+		proc_create_single_data("driver/rtc", 0, NULL, rtc_proc_show,
+				rtc);
 }
 
 void rtc_proc_del_device(struct rtc_device *rtc)
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 02c03e4..73cce3e 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2569,14 +2569,11 @@
  * Cancellation of a request is an asynchronous operation! The calling
  * function has to wait until the request is properly returned via callback.
  */
-int dasd_cancel_req(struct dasd_ccw_req *cqr)
+static int __dasd_cancel_req(struct dasd_ccw_req *cqr)
 {
 	struct dasd_device *device = cqr->startdev;
-	unsigned long flags;
-	int rc;
+	int rc = 0;
 
-	rc = 0;
-	spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
 	switch (cqr->status) {
 	case DASD_CQR_QUEUED:
 		/* request was not started - just set to cleared */
@@ -2596,11 +2593,21 @@
 	default: /* already finished or clear pending - do nothing */
 		break;
 	}
-	spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
 	dasd_schedule_device_bh(device);
 	return rc;
 }
-EXPORT_SYMBOL(dasd_cancel_req);
+
+int dasd_cancel_req(struct dasd_ccw_req *cqr)
+{
+	struct dasd_device *device = cqr->startdev;
+	unsigned long flags;
+	int rc;
+
+	spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
+	rc = __dasd_cancel_req(cqr);
+	spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
+	return rc;
+}
 
 /*
  * SECTION: Operations of the dasd_block layer.
@@ -3054,7 +3061,7 @@
  *
  * Return values:
  * BLK_EH_RESET_TIMER if the request should be left running
- * BLK_EH_NOT_HANDLED if the request is handled or terminated
+ * BLK_EH_DONE if the request is handled or terminated
  *		      by the driver.
  */
 enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved)
@@ -3067,7 +3074,7 @@
 
 	cqr = *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req));
 	if (!cqr)
-		return BLK_EH_NOT_HANDLED;
+		return BLK_EH_DONE;
 
 	spin_lock_irqsave(&cqr->dq->lock, flags);
 	device = cqr->startdev ? cqr->startdev : block->base;
@@ -3084,12 +3091,10 @@
 	cqr->retries = -1;
 	cqr->intrc = -ETIMEDOUT;
 	if (cqr->status >= DASD_CQR_QUEUED) {
-		spin_unlock(get_ccwdev_lock(device->cdev));
-		rc = dasd_cancel_req(cqr);
+		rc = __dasd_cancel_req(cqr);
 	} else if (cqr->status == DASD_CQR_FILLED ||
 		   cqr->status == DASD_CQR_NEED_ERP) {
 		cqr->status = DASD_CQR_TERMINATED;
-		spin_unlock(get_ccwdev_lock(device->cdev));
 	} else if (cqr->status == DASD_CQR_IN_ERP) {
 		struct dasd_ccw_req *searchcqr, *nextcqr, *tmpcqr;
 
@@ -3104,9 +3109,7 @@
 			searchcqr->retries = -1;
 			searchcqr->intrc = -ETIMEDOUT;
 			if (searchcqr->status >= DASD_CQR_QUEUED) {
-				spin_unlock(get_ccwdev_lock(device->cdev));
-				rc = dasd_cancel_req(searchcqr);
-				spin_lock(get_ccwdev_lock(device->cdev));
+				rc = __dasd_cancel_req(searchcqr);
 			} else if ((searchcqr->status == DASD_CQR_FILLED) ||
 				   (searchcqr->status == DASD_CQR_NEED_ERP)) {
 				searchcqr->status = DASD_CQR_TERMINATED;
@@ -3120,13 +3123,13 @@
 			}
 			break;
 		}
-		spin_unlock(get_ccwdev_lock(device->cdev));
 	}
+	spin_unlock(get_ccwdev_lock(device->cdev));
 	dasd_schedule_block_bh(block);
 	spin_unlock(&block->queue_lock);
 	spin_unlock_irqrestore(&cqr->dq->lock, flags);
 
-	return rc ? BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED;
+	return rc ? BLK_EH_RESET_TIMER : BLK_EH_DONE;
 }
 
 static int dasd_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 7bdc6aa..2016e0e 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -18,7 +18,6 @@
 #include <linux/fs.h>
 #include <linux/blkpg.h>
 #include <linux/slab.h>
-#include <asm/compat.h>
 #include <asm/ccwdev.h>
 #include <asm/schid.h>
 #include <asm/cmb.h>
diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c
index c33788a..5cb80c6 100644
--- a/drivers/s390/block/dasd_proc.c
+++ b/drivers/s390/block/dasd_proc.c
@@ -131,19 +131,6 @@
 	.show		= dasd_devices_show,
 };
 
-static int dasd_devices_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &dasd_devices_seq_ops);
-}
-
-static const struct file_operations dasd_devices_file_ops = {
-	.owner		= THIS_MODULE,
-	.open		= dasd_devices_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 #ifdef CONFIG_DASD_PROFILE
 static int dasd_stats_all_block_on(void)
 {
@@ -352,10 +339,10 @@
 	dasd_proc_root_entry = proc_mkdir("dasd", NULL);
 	if (!dasd_proc_root_entry)
 		goto out_nodasd;
-	dasd_devices_entry = proc_create("devices",
+	dasd_devices_entry = proc_create_seq("devices",
 					 S_IFREG | S_IRUGO | S_IWUSR,
 					 dasd_proc_root_entry,
-					 &dasd_devices_file_ops);
+					 &dasd_devices_seq_ops);
 	if (!dasd_devices_entry)
 		goto out_nodevices;
 	dasd_statistics_entry = proc_create("statistics",
diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index d049e2d..0a4c13e 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -54,3 +54,6 @@
 
 hmcdrv-objs := hmcdrv_mod.o hmcdrv_dev.o hmcdrv_ftp.o hmcdrv_cache.o diag_ftp.o sclp_ftp.o
 obj-$(CONFIG_HMC_DRV) += hmcdrv.o
+
+chkbss := sclp_early_core.o
+include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 6182248..16a4e85 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -19,7 +19,6 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
-#include <asm/compat.h>
 #include <asm/ccwdev.h>
 #include <asm/cio.h>
 #include <asm/ebcdic.h>
diff --git a/drivers/s390/char/sclp_ctl.c b/drivers/s390/char/sclp_ctl.c
index a78cea0..248b5db 100644
--- a/drivers/s390/char/sclp_ctl.c
+++ b/drivers/s390/char/sclp_ctl.c
@@ -14,7 +14,6 @@
 #include <linux/init.h>
 #include <linux/ioctl.h>
 #include <linux/fs.h>
-#include <asm/compat.h>
 #include <asm/sclp_ctl.h>
 #include <asm/sclp.h>
 
diff --git a/drivers/s390/char/tape_proc.c b/drivers/s390/char/tape_proc.c
index faae304..32a14ee 100644
--- a/drivers/s390/char/tape_proc.c
+++ b/drivers/s390/char/tape_proc.c
@@ -105,29 +105,14 @@
 	.show		= tape_proc_show,
 };
 
-static int tape_proc_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &tape_proc_seq);
-}
-
-static const struct file_operations tape_proc_ops =
-{
-	.owner		= THIS_MODULE,
-	.open		= tape_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 /*
  * Initialize procfs stuff on startup
  */
 void
 tape_proc_init(void)
 {
-	tape_proc_devices =
-		proc_create("tapedevices", S_IFREG | S_IRUGO | S_IWUSR, NULL,
-			    &tape_proc_ops);
+	tape_proc_devices = proc_create_seq("tapedevices",
+			S_IFREG | S_IRUGO | S_IWUSR, NULL,  &tape_proc_seq);
 	if (tape_proc_devices == NULL) {
 		return;
 	}
diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c
index 17e411c..948ce82 100644
--- a/drivers/s390/char/vmcp.c
+++ b/drivers/s390/char/vmcp.c
@@ -23,7 +23,6 @@
 #include <linux/mutex.h>
 #include <linux/cma.h>
 #include <linux/mm.h>
-#include <asm/compat.h>
 #include <asm/cpcmd.h>
 #include <asm/debug.h>
 #include <asm/vmcp.h>
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index 5535312..93b2862 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -326,8 +326,7 @@
 	if (num_devices < 1)
 		return -EINVAL;
 
-	gdev = kzalloc(sizeof(*gdev) + num_devices * sizeof(gdev->cdev[0]),
-		       GFP_KERNEL);
+	gdev = kzalloc(struct_size(gdev, cdev, num_devices), GFP_KERNEL);
 	if (!gdev)
 		return -ENOMEM;
 
@@ -561,6 +560,12 @@
 	.pm = &ccwgroup_pm_ops,
 };
 
+bool dev_is_ccwgroup(struct device *dev)
+{
+	return dev->bus == &ccwgroup_bus_type;
+}
+EXPORT_SYMBOL(dev_is_ccwgroup);
+
 /**
  * ccwgroup_driver_register() - register a ccw group driver
  * @cdriver: driver to be registered
diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c
index 0015729..8d9f366 100644
--- a/drivers/s390/cio/chsc_sch.c
+++ b/drivers/s390/cio/chsc_sch.c
@@ -16,7 +16,6 @@
 #include <linux/miscdevice.h>
 #include <linux/kernel_stat.h>
 
-#include <asm/compat.h>
 #include <asm/cio.h>
 #include <asm/chsc.h>
 #include <asm/isc.h>
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index aecfebb..4435ae0 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -473,6 +473,36 @@
 }
 
 /**
+ * ccw_device_get_util_str() - return newly allocated utility strings
+ * @cdev: device to obtain the utility strings for
+ * @chp_idx: index of the channel path
+ *
+ * On success return a newly allocated copy of the utility strings
+ * associated with the given channel path. Return %NULL on error.
+ */
+u8 *ccw_device_get_util_str(struct ccw_device *cdev, int chp_idx)
+{
+	struct subchannel *sch = to_subchannel(cdev->dev.parent);
+	struct channel_path *chp;
+	struct chp_id chpid;
+	u8 *util_str;
+
+	chp_id_init(&chpid);
+	chpid.id = sch->schib.pmcw.chpid[chp_idx];
+	chp = chpid_to_chp(chpid);
+
+	util_str = kmalloc(sizeof(chp->desc_fmt3.util_str), GFP_KERNEL);
+	if (!util_str)
+		return NULL;
+
+	mutex_lock(&chp->lock);
+	memcpy(util_str, chp->desc_fmt3.util_str, sizeof(chp->desc_fmt3.util_str));
+	mutex_unlock(&chp->lock);
+
+	return util_str;
+}
+
+/**
  * ccw_device_get_id() - obtain a ccw device id
  * @cdev: device to obtain the id for
  * @dev_id: where to fill in the values
@@ -682,3 +712,4 @@
 EXPORT_SYMBOL(ccw_device_get_ciw);
 EXPORT_SYMBOL(ccw_device_get_path_mask);
 EXPORT_SYMBOL_GPL(ccw_device_get_chp_desc);
+EXPORT_SYMBOL_GPL(ccw_device_get_util_str);
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 02184cf..6a273c5 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -198,11 +198,18 @@
  */
 static inline void ap_init_message(struct ap_message *ap_msg)
 {
-	ap_msg->psmid = 0;
-	ap_msg->length = 0;
-	ap_msg->rc = 0;
-	ap_msg->special = 0;
-	ap_msg->receive = NULL;
+	memset(ap_msg, 0, sizeof(*ap_msg));
+}
+
+/**
+ * ap_release_message() - Release ap_message.
+ * Releases all memory used internal within the ap_message struct
+ * Currently this is the message and private field.
+ */
+static inline void ap_release_message(struct ap_message *ap_msg)
+{
+	kzfree(ap_msg->message);
+	kzfree(ap_msg->private);
 }
 
 #define for_each_ap_card(_ac) \
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 5efd848..febcdb5 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -371,6 +371,7 @@
 
 	trace_s390_zcrypt_req(xcRB, TB_ZSECSENDCPRB);
 
+	ap_init_message(&ap_msg);
 	rc = get_cprb_fc(xcRB, &ap_msg, &func_code, &domain);
 	if (rc)
 		goto out;
@@ -425,6 +426,7 @@
 	spin_unlock(&zcrypt_list_lock);
 
 out:
+	ap_release_message(&ap_msg);
 	trace_s390_zcrypt_rep(xcRB, func_code, rc,
 			      AP_QID_CARD(qid), AP_QID_QUEUE(qid));
 	return rc;
@@ -468,6 +470,8 @@
 
 	trace_s390_zcrypt_req(xcrb, TP_ZSENDEP11CPRB);
 
+	ap_init_message(&ap_msg);
+
 	target_num = (unsigned short) xcrb->targets_num;
 
 	/* empty list indicates autoselect (all available targets) */
@@ -485,7 +489,7 @@
 		if (copy_from_user(targets, uptr,
 				   target_num * sizeof(*targets))) {
 			rc = -EFAULT;
-			goto out;
+			goto out_free;
 		}
 	}
 
@@ -542,6 +546,7 @@
 out_free:
 	kfree(targets);
 out:
+	ap_release_message(&ap_msg);
 	trace_s390_zcrypt_rep(xcrb, func_code, rc,
 			      AP_QID_CARD(qid), AP_QID_QUEUE(qid));
 	return rc;
@@ -559,6 +564,7 @@
 
 	trace_s390_zcrypt_req(buffer, TP_HWRNGCPRB);
 
+	ap_init_message(&ap_msg);
 	rc = get_rng_fc(&ap_msg, &func_code, &domain);
 	if (rc)
 		goto out;
@@ -589,8 +595,10 @@
 	pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight);
 	spin_unlock(&zcrypt_list_lock);
 
-	if (!pref_zq)
-		return -ENODEV;
+	if (!pref_zq) {
+		rc = -ENODEV;
+		goto out;
+	}
 
 	qid = pref_zq->queue->qid;
 	rc = pref_zq->ops->rng(pref_zq, buffer, &ap_msg);
@@ -600,6 +608,7 @@
 	spin_unlock(&zcrypt_list_lock);
 
 out:
+	ap_release_message(&ap_msg);
 	trace_s390_zcrypt_rep(buffer, func_code, rc,
 			      AP_QID_CARD(qid), AP_QID_QUEUE(qid));
 	return rc;
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index f54bef4..97d4bac 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -1084,6 +1084,13 @@
 	return rc;
 }
 
+/**
+ * Fetch function code from cprb.
+ * Extracting the fc requires to copy the cprb from userspace.
+ * So this function allocates memory and needs an ap_msg prepared
+ * by the caller with ap_init_message(). Also the caller has to
+ * make sure ap_release_message() is always called even on failure.
+ */
 unsigned int get_cprb_fc(struct ica_xcRB *xcRB,
 				struct ap_message *ap_msg,
 				unsigned int *func_code, unsigned short **dom)
@@ -1091,9 +1098,7 @@
 	struct response_type resp_type = {
 		.type = PCIXCC_RESPONSE_TYPE_XCRB,
 	};
-	int rc;
 
-	ap_init_message(ap_msg);
 	ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL);
 	if (!ap_msg->message)
 		return -ENOMEM;
@@ -1101,17 +1106,10 @@
 	ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
 				atomic_inc_return(&zcrypt_step);
 	ap_msg->private = kmalloc(sizeof(resp_type), GFP_KERNEL);
-	if (!ap_msg->private) {
-		kzfree(ap_msg->message);
+	if (!ap_msg->private)
 		return -ENOMEM;
-	}
 	memcpy(ap_msg->private, &resp_type, sizeof(resp_type));
-	rc = XCRB_msg_to_type6CPRB_msgX(ap_msg, xcRB, func_code, dom);
-	if (rc) {
-		kzfree(ap_msg->message);
-		kzfree(ap_msg->private);
-	}
-	return rc;
+	return XCRB_msg_to_type6CPRB_msgX(ap_msg, xcRB, func_code, dom);
 }
 
 /**
@@ -1139,11 +1137,16 @@
 		/* Signal pending. */
 		ap_cancel_message(zq->queue, ap_msg);
 
-	kzfree(ap_msg->message);
-	kzfree(ap_msg->private);
 	return rc;
 }
 
+/**
+ * Fetch function code from ep11 cprb.
+ * Extracting the fc requires to copy the ep11 cprb from userspace.
+ * So this function allocates memory and needs an ap_msg prepared
+ * by the caller with ap_init_message(). Also the caller has to
+ * make sure ap_release_message() is always called even on failure.
+ */
 unsigned int get_ep11cprb_fc(struct ep11_urb *xcrb,
 				    struct ap_message *ap_msg,
 				    unsigned int *func_code)
@@ -1151,9 +1154,7 @@
 	struct response_type resp_type = {
 		.type = PCIXCC_RESPONSE_TYPE_EP11,
 	};
-	int rc;
 
-	ap_init_message(ap_msg);
 	ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL);
 	if (!ap_msg->message)
 		return -ENOMEM;
@@ -1161,17 +1162,10 @@
 	ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
 				atomic_inc_return(&zcrypt_step);
 	ap_msg->private = kmalloc(sizeof(resp_type), GFP_KERNEL);
-	if (!ap_msg->private) {
-		kzfree(ap_msg->message);
+	if (!ap_msg->private)
 		return -ENOMEM;
-	}
 	memcpy(ap_msg->private, &resp_type, sizeof(resp_type));
-	rc = xcrb_msg_to_type6_ep11cprb_msgx(ap_msg, xcrb, func_code);
-	if (rc) {
-		kzfree(ap_msg->message);
-		kzfree(ap_msg->private);
-	}
-	return rc;
+	return xcrb_msg_to_type6_ep11cprb_msgx(ap_msg, xcrb, func_code);
 }
 
 /**
@@ -1246,8 +1240,6 @@
 		/* Signal pending. */
 		ap_cancel_message(zq->queue, ap_msg);
 
-	kzfree(ap_msg->message);
-	kzfree(ap_msg->private);
 	return rc;
 }
 
@@ -1258,7 +1250,6 @@
 		.type = PCIXCC_RESPONSE_TYPE_XCRB,
 	};
 
-	ap_init_message(ap_msg);
 	ap_msg->message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL);
 	if (!ap_msg->message)
 		return -ENOMEM;
@@ -1266,10 +1257,8 @@
 	ap_msg->psmid = (((unsigned long long) current->pid) << 32) +
 				atomic_inc_return(&zcrypt_step);
 	ap_msg->private = kmalloc(sizeof(resp_type), GFP_KERNEL);
-	if (!ap_msg->private) {
-		kzfree(ap_msg->message);
+	if (!ap_msg->private)
 		return -ENOMEM;
-	}
 	memcpy(ap_msg->private, &resp_type, sizeof(resp_type));
 
 	rng_type6CPRB_msgX(ap_msg, ZCRYPT_RNG_BUFFER_SIZE, domain);
@@ -1313,8 +1302,6 @@
 		/* Signal pending. */
 		ap_cancel_message(zq->queue, ap_msg);
 
-	kzfree(ap_msg->message);
-	kzfree(ap_msg->private);
 	return rc;
 }
 
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 06415b6..9f28b6f 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -10,6 +10,7 @@
 #define KMSG_COMPONENT "qeth"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
+#include <linux/compat.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/string.h>
@@ -32,7 +33,6 @@
 #include <asm/chpid.h>
 #include <asm/io.h>
 #include <asm/sysinfo.h>
-#include <asm/compat.h>
 #include <asm/diag.h>
 #include <asm/cio.h>
 #include <asm/ccwdev.h>
diff --git a/drivers/sbus/char/Kconfig b/drivers/sbus/char/Kconfig
index bf3c5f7..89edd13 100644
--- a/drivers/sbus/char/Kconfig
+++ b/drivers/sbus/char/Kconfig
@@ -28,13 +28,6 @@
 	  events, and can also notice the attachment/detachment of external
 	  monitors and mice.
 
-config SUN_JSFLASH
-	tristate "JavaStation OS Flash SIMM"
-	depends on SPARC32
-	help
-	  If you say Y here, you will be able to boot from your JavaStation's
-	  Flash memory.
-
 config BBC_I2C
 	tristate "UltraSPARC-III bootbus i2c controller driver"
 	depends on PCI && SPARC64
diff --git a/drivers/sbus/char/Makefile b/drivers/sbus/char/Makefile
index 8c48ed9..44347c9 100644
--- a/drivers/sbus/char/Makefile
+++ b/drivers/sbus/char/Makefile
@@ -15,6 +15,5 @@
 obj-$(CONFIG_OBP_FLASH)			+= flash.o
 obj-$(CONFIG_SUN_OPENPROMIO)		+= openprom.o
 obj-$(CONFIG_TADPOLE_TS102_UCTRL)	+= uctrl.o
-obj-$(CONFIG_SUN_JSFLASH)		+= jsflash.o
 obj-$(CONFIG_BBC_I2C)			+= bbc.o
 obj-$(CONFIG_ORACLE_DAX) 		+= oradax.o
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
deleted file mode 100644
index 14f377a..0000000
--- a/drivers/sbus/char/jsflash.c
+++ /dev/null
@@ -1,658 +0,0 @@
-/*
- * drivers/sbus/char/jsflash.c
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds	(drivers/char/mem.c)
- *  Copyright (C) 1997  Eddie C. Dost		(drivers/sbus/char/flash.c)
- *  Copyright (C) 1997-2000 Pavel Machek <pavel@ucw.cz>   (drivers/block/nbd.c)
- *  Copyright (C) 1999-2000 Pete Zaitcev
- *
- * This driver is used to program OS into a Flash SIMM on
- * Krups and Espresso platforms.
- *
- * TODO: do not allow erase/programming if file systems are mounted.
- * TODO: Erase/program both banks of a 8MB SIMM.
- *
- * It is anticipated that programming an OS Flash will be a routine
- * procedure. In the same time it is exceedingly dangerous because
- * a user can program its OBP flash with OS image and effectively
- * kill the machine.
- *
- * This driver uses an interface different from Eddie's flash.c
- * as a silly safeguard.
- *
- * XXX The flash.c manipulates page caching characteristics in a certain
- * dubious way; also it assumes that remap_pfn_range() can remap
- * PCI bus locations, which may be false. ioremap() must be used
- * instead. We should discuss this.
- */
-
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/miscdevice.h>
-#include <linux/fcntl.h>
-#include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/genhd.h>
-#include <linux/blkdev.h>
-#include <linux/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/io.h>
-#include <asm/pcic.h>
-#include <asm/oplib.h>
-
-#include <asm/jsflash.h>		/* ioctl arguments. <linux/> ?? */
-#define JSFIDSZ		(sizeof(struct jsflash_ident_arg))
-#define JSFPRGSZ	(sizeof(struct jsflash_program_arg))
-
-/*
- * Our device numbers have no business in system headers.
- * The only thing a user knows is the device name /dev/jsflash.
- *
- * Block devices are laid out like this:
- *   minor+0	- Bootstrap, for 8MB SIMM 0x20400000[0x800000]
- *   minor+1	- Filesystem to mount, normally 0x20400400[0x7ffc00]
- *   minor+2	- Whole flash area for any case... 0x20000000[0x01000000]
- * Total 3 minors per flash device.
- *
- * It is easier to have static size vectors, so we define
- * a total minor range JSF_MAX, which must cover all minors.
- */
-/* character device */
-#define JSF_MINOR	178	/* 178 is registered with hpa */
-/* block device */
-#define JSF_MAX		 3	/* 3 minors wasted total so far. */
-#define JSF_NPART	 3	/* 3 minors per flash device */
-#define JSF_PART_BITS	 2	/* 2 bits of minors to cover JSF_NPART */
-#define JSF_PART_MASK	 0x3	/* 2 bits mask */
-
-static DEFINE_MUTEX(jsf_mutex);
-
-/*
- * Access functions.
- * We could ioremap(), but it's easier this way.
- */
-static unsigned int jsf_inl(unsigned long addr)
-{
-	unsigned long retval;
-
-	__asm__ __volatile__("lda [%1] %2, %0\n\t" :
-				"=r" (retval) :
-				"r" (addr), "i" (ASI_M_BYPASS));
-        return retval;
-}
-
-static void jsf_outl(unsigned long addr, __u32 data)
-{
-
-	__asm__ __volatile__("sta %0, [%1] %2\n\t" : :
-				"r" (data), "r" (addr), "i" (ASI_M_BYPASS) :
-				"memory");
-}
-
-/*
- * soft carrier
- */
-
-struct jsfd_part {
-	unsigned long dbase;
-	unsigned long dsize;
-};
-
-struct jsflash {
-	unsigned long base;
-	unsigned long size;
-	unsigned long busy;		/* In use? */
-	struct jsflash_ident_arg id;
-	/* int mbase; */		/* Minor base, typically zero */
-	struct jsfd_part dv[JSF_NPART];
-};
-
-/*
- * We do not map normal memory or obio as a safety precaution.
- * But offsets are real, for ease of userland programming.
- */
-#define JSF_BASE_TOP	0x30000000
-#define JSF_BASE_ALL	0x20000000
-
-#define JSF_BASE_JK	0x20400000
-
-/*
- */
-static struct gendisk *jsfd_disk[JSF_MAX];
-
-/*
- * Let's pretend we may have several of these...
- */
-static struct jsflash jsf0;
-
-/*
- * Wait for AMD to finish its embedded algorithm.
- * We use the Toggle bit DQ6 (0x40) because it does not
- * depend on the data value as /DATA bit DQ7 does.
- *
- * XXX Do we need any timeout here? So far it never hanged, beware broken hw.
- */
-static void jsf_wait(unsigned long p) {
-	unsigned int x1, x2;
-
-	for (;;) {
-		x1 = jsf_inl(p);
-		x2 = jsf_inl(p);
-		if ((x1 & 0x40404040) == (x2 & 0x40404040)) return;
-	}
-}
-
-/*
- * Programming will only work if Flash is clean,
- * we leave it to the programmer application.
- *
- * AMD must be programmed one byte at a time;
- * thus, Simple Tech SIMM must be written 4 bytes at a time.
- *
- * Write waits for the chip to become ready after the write
- * was finished. This is done so that application would read
- * consistent data after the write is done.
- */
-static void jsf_write4(unsigned long fa, u32 data) {
-
-	jsf_outl(fa, 0xAAAAAAAA);		/* Unlock 1 Write 1 */
-	jsf_outl(fa, 0x55555555);		/* Unlock 1 Write 2 */
-	jsf_outl(fa, 0xA0A0A0A0);		/* Byte Program */
-	jsf_outl(fa, data);
-
-	jsf_wait(fa);
-}
-
-/*
- */
-static void jsfd_read(char *buf, unsigned long p, size_t togo) {
-	union byte4 {
-		char s[4];
-		unsigned int n;
-	} b;
-
-	while (togo >= 4) {
-		togo -= 4;
-		b.n = jsf_inl(p);
-		memcpy(buf, b.s, 4);
-		p += 4;
-		buf += 4;
-	}
-}
-
-static int jsfd_queue;
-
-static struct request *jsfd_next_request(void)
-{
-	struct request_queue *q;
-	struct request *rq;
-	int old_pos = jsfd_queue;
-
-	do {
-		q = jsfd_disk[jsfd_queue]->queue;
-		if (++jsfd_queue == JSF_MAX)
-			jsfd_queue = 0;
-		if (q) {
-			rq = blk_fetch_request(q);
-			if (rq)
-				return rq;
-		}
-	} while (jsfd_queue != old_pos);
-
-	return NULL;
-}
-
-static void jsfd_request(void)
-{
-	struct request *req;
-
-	req = jsfd_next_request();
-	while (req) {
-		struct jsfd_part *jdp = req->rq_disk->private_data;
-		unsigned long offset = blk_rq_pos(req) << 9;
-		size_t len = blk_rq_cur_bytes(req);
-		blk_status_t err = BLK_STS_IOERR;
-
-		if ((offset + len) > jdp->dsize)
-			goto end;
-
-		if (rq_data_dir(req) != READ) {
-			printk(KERN_ERR "jsfd: write\n");
-			goto end;
-		}
-
-		if ((jdp->dbase & 0xff000000) != 0x20000000) {
-			printk(KERN_ERR "jsfd: bad base %x\n", (int)jdp->dbase);
-			goto end;
-		}
-
-		jsfd_read(bio_data(req->bio), jdp->dbase + offset, len);
-		err = BLK_STS_OK;
-	end:
-		if (!__blk_end_request_cur(req, err))
-			req = jsfd_next_request();
-	}
-}
-
-static void jsfd_do_request(struct request_queue *q)
-{
-	jsfd_request();
-}
-
-/*
- * The memory devices use the full 32/64 bits of the offset, and so we cannot
- * check against negative addresses: they are ok. The return value is weird,
- * though, in that case (0).
- *
- * also note that seeking relative to the "end of file" isn't supported:
- * it has no meaning, so it returns -EINVAL.
- */
-static loff_t jsf_lseek(struct file * file, loff_t offset, int orig)
-{
-	loff_t ret;
-
-	mutex_lock(&jsf_mutex);
-	switch (orig) {
-		case 0:
-			file->f_pos = offset;
-			ret = file->f_pos;
-			break;
-		case 1:
-			file->f_pos += offset;
-			ret = file->f_pos;
-			break;
-		default:
-			ret = -EINVAL;
-	}
-	mutex_unlock(&jsf_mutex);
-	return ret;
-}
-
-/*
- * OS SIMM Cannot be read in other size but a 32bits word.
- */
-static ssize_t jsf_read(struct file * file, char __user * buf, 
-    size_t togo, loff_t *ppos)
-{
-	unsigned long p = *ppos;
-	char __user *tmp = buf;
-
-	union byte4 {
-		char s[4];
-		unsigned int n;
-	} b;
-
-	if (p < JSF_BASE_ALL || p >= JSF_BASE_TOP) {
-		return 0;
-	}
-
-	if ((p + togo) < p	/* wrap */
-	   || (p + togo) >= JSF_BASE_TOP) {
-		togo = JSF_BASE_TOP - p;
-	}
-
-	if (p < JSF_BASE_ALL && togo != 0) {
-#if 0 /* __bzero XXX */
-		size_t x = JSF_BASE_ALL - p;
-		if (x > togo) x = togo;
-		clear_user(tmp, x);
-		tmp += x;
-		p += x;
-		togo -= x;
-#else
-		/*
-		 * Implementation of clear_user() calls __bzero
-		 * without regard to modversions,
-		 * so we cannot build a module.
-		 */
-		return 0;
-#endif
-	}
-
-	while (togo >= 4) {
-		togo -= 4;
-		b.n = jsf_inl(p);
-		if (copy_to_user(tmp, b.s, 4))
-			return -EFAULT;
-		tmp += 4;
-		p += 4;
-	}
-
-	/*
-	 * XXX Small togo may remain if 1 byte is ordered.
-	 * It would be nice if we did a word size read and unpacked it.
-	 */
-
-	*ppos = p;
-	return tmp-buf;
-}
-
-static ssize_t jsf_write(struct file * file, const char __user * buf,
-    size_t count, loff_t *ppos)
-{
-	return -ENOSPC;
-}
-
-/*
- */
-static int jsf_ioctl_erase(unsigned long arg)
-{
-	unsigned long p;
-
-	/* p = jsf0.base;	hits wrong bank */
-	p = 0x20400000;
-
-	jsf_outl(p, 0xAAAAAAAA);		/* Unlock 1 Write 1 */
-	jsf_outl(p, 0x55555555);		/* Unlock 1 Write 2 */
-	jsf_outl(p, 0x80808080);		/* Erase setup */
-	jsf_outl(p, 0xAAAAAAAA);		/* Unlock 2 Write 1 */
-	jsf_outl(p, 0x55555555);		/* Unlock 2 Write 2 */
-	jsf_outl(p, 0x10101010);		/* Chip erase */
-
-#if 0
-	/*
-	 * This code is ok, except that counter based timeout
-	 * has no place in this world. Let's just drop timeouts...
-	 */
-	{
-		int i;
-		__u32 x;
-		for (i = 0; i < 1000000; i++) {
-			x = jsf_inl(p);
-			if ((x & 0x80808080) == 0x80808080) break;
-		}
-		if ((x & 0x80808080) != 0x80808080) {
-			printk("jsf0: erase timeout with 0x%08x\n", x);
-		} else {
-			printk("jsf0: erase done with 0x%08x\n", x);
-		}
-	}
-#else
-	jsf_wait(p);
-#endif
-
-	return 0;
-}
-
-/*
- * Program a block of flash.
- * Very simple because we can do it byte by byte anyway.
- */
-static int jsf_ioctl_program(void __user *arg)
-{
-	struct jsflash_program_arg abuf;
-	char __user *uptr;
-	unsigned long p;
-	unsigned int togo;
-	union {
-		unsigned int n;
-		char s[4];
-	} b;
-
-	if (copy_from_user(&abuf, arg, JSFPRGSZ))
-		return -EFAULT; 
-	p = abuf.off;
-	togo = abuf.size;
-	if ((togo & 3) || (p & 3)) return -EINVAL;
-
-	uptr = (char __user *) (unsigned long) abuf.data;
-	while (togo != 0) {
-		togo -= 4;
-		if (copy_from_user(&b.s[0], uptr, 4))
-			return -EFAULT;
-		jsf_write4(p, b.n);
-		p += 4;
-		uptr += 4;
-	}
-
-	return 0;
-}
-
-static long jsf_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
-{
-	mutex_lock(&jsf_mutex);
-	int error = -ENOTTY;
-	void __user *argp = (void __user *)arg;
-
-	if (!capable(CAP_SYS_ADMIN)) {
-		mutex_unlock(&jsf_mutex);
-		return -EPERM;
-	}
-	switch (cmd) {
-	case JSFLASH_IDENT:
-		if (copy_to_user(argp, &jsf0.id, JSFIDSZ)) {
-			mutex_unlock(&jsf_mutex);
-			return -EFAULT;
-		}
-		break;
-	case JSFLASH_ERASE:
-		error = jsf_ioctl_erase(arg);
-		break;
-	case JSFLASH_PROGRAM:
-		error = jsf_ioctl_program(argp);
-		break;
-	}
-
-	mutex_unlock(&jsf_mutex);
-	return error;
-}
-
-static int jsf_mmap(struct file * file, struct vm_area_struct * vma)
-{
-	return -ENXIO;
-}
-
-static int jsf_open(struct inode * inode, struct file * filp)
-{
-	mutex_lock(&jsf_mutex);
-	if (jsf0.base == 0) {
-		mutex_unlock(&jsf_mutex);
-		return -ENXIO;
-	}
-	if (test_and_set_bit(0, (void *)&jsf0.busy) != 0) {
-		mutex_unlock(&jsf_mutex);
-		return -EBUSY;
-	}
-
-	mutex_unlock(&jsf_mutex);
-	return 0;	/* XXX What security? */
-}
-
-static int jsf_release(struct inode *inode, struct file *file)
-{
-	jsf0.busy = 0;
-	return 0;
-}
-
-static const struct file_operations jsf_fops = {
-	.owner =	THIS_MODULE,
-	.llseek =	jsf_lseek,
-	.read =		jsf_read,
-	.write =	jsf_write,
-	.unlocked_ioctl =	jsf_ioctl,
-	.mmap =		jsf_mmap,
-	.open =		jsf_open,
-	.release =	jsf_release,
-};
-
-static struct miscdevice jsf_dev = { JSF_MINOR, "jsflash", &jsf_fops };
-
-static const struct block_device_operations jsfd_fops = {
-	.owner =	THIS_MODULE,
-};
-
-static int jsflash_init(void)
-{
-	int rc;
-	struct jsflash *jsf;
-	phandle node;
-	char banner[128];
-	struct linux_prom_registers reg0;
-
-	node = prom_getchild(prom_root_node);
-	node = prom_searchsiblings(node, "flash-memory");
-	if (node != 0 && (s32)node != -1) {
-		if (prom_getproperty(node, "reg",
-		    (char *)&reg0, sizeof(reg0)) == -1) {
-			printk("jsflash: no \"reg\" property\n");
-			return -ENXIO;
-		}
-		if (reg0.which_io != 0) {
-			printk("jsflash: bus number nonzero: 0x%x:%x\n",
-			    reg0.which_io, reg0.phys_addr);
-			return -ENXIO;
-		}
-		/*
-		 * Flash may be somewhere else, for instance on Ebus.
-		 * So, don't do the following check for IIep flash space.
-		 */
-#if 0
-		if ((reg0.phys_addr >> 24) != 0x20) {
-			printk("jsflash: suspicious address: 0x%x:%x\n",
-			    reg0.which_io, reg0.phys_addr);
-			return -ENXIO;
-		}
-#endif
-		if ((int)reg0.reg_size <= 0) {
-			printk("jsflash: bad size 0x%x\n", (int)reg0.reg_size);
-			return -ENXIO;
-		}
-	} else {
-		/* XXX Remove this code once PROLL ID12 got widespread */
-		printk("jsflash: no /flash-memory node, use PROLL >= 12\n");
-		prom_getproperty(prom_root_node, "banner-name", banner, 128);
-		if (strcmp (banner, "JavaStation-NC") != 0 &&
-		    strcmp (banner, "JavaStation-E") != 0) {
-			return -ENXIO;
-		}
-		reg0.which_io = 0;
-		reg0.phys_addr = 0x20400000;
-		reg0.reg_size  = 0x00800000;
-	}
-
-	/* Let us be really paranoid for modifications to probing code. */
-	if (sparc_cpu_model != sun4m) {
-		/* We must be on sun4m because we use MMU Bypass ASI. */
-		return -ENXIO;
-	}
-
-	if (jsf0.base == 0) {
-		jsf = &jsf0;
-
-		jsf->base = reg0.phys_addr;
-		jsf->size = reg0.reg_size;
-
-		/* XXX Redo the userland interface. */
-		jsf->id.off = JSF_BASE_ALL;
-		jsf->id.size = 0x01000000;	/* 16M - all segments */
-		strcpy(jsf->id.name, "Krups_all");
-
-		jsf->dv[0].dbase = jsf->base;
-		jsf->dv[0].dsize = jsf->size;
-		jsf->dv[1].dbase = jsf->base + 1024;
-		jsf->dv[1].dsize = jsf->size - 1024;
-		jsf->dv[2].dbase = JSF_BASE_ALL;
-		jsf->dv[2].dsize = 0x01000000;
-
-		printk("Espresso Flash @0x%lx [%d MB]\n", jsf->base,
-		    (int) (jsf->size / (1024*1024)));
-	}
-
-	if ((rc = misc_register(&jsf_dev)) != 0) {
-		printk(KERN_ERR "jsf: unable to get misc minor %d\n",
-		    JSF_MINOR);
-		jsf0.base = 0;
-		return rc;
-	}
-
-	return 0;
-}
-
-static int jsfd_init(void)
-{
-	static DEFINE_SPINLOCK(lock);
-	struct jsflash *jsf;
-	struct jsfd_part *jdp;
-	int err;
-	int i;
-
-	if (jsf0.base == 0)
-		return -ENXIO;
-
-	err = -ENOMEM;
-	for (i = 0; i < JSF_MAX; i++) {
-		struct gendisk *disk = alloc_disk(1);
-		if (!disk)
-			goto out;
-		disk->queue = blk_init_queue(jsfd_do_request, &lock);
-		if (!disk->queue) {
-			put_disk(disk);
-			goto out;
-		}
-		blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH);
-		jsfd_disk[i] = disk;
-	}
-
-	if (register_blkdev(JSFD_MAJOR, "jsfd")) {
-		err = -EIO;
-		goto out;
-	}
-
-	for (i = 0; i < JSF_MAX; i++) {
-		struct gendisk *disk = jsfd_disk[i];
-		if ((i & JSF_PART_MASK) >= JSF_NPART) continue;
-		jsf = &jsf0;	/* actually, &jsfv[i >> JSF_PART_BITS] */
-		jdp = &jsf->dv[i&JSF_PART_MASK];
-
-		disk->major = JSFD_MAJOR;
-		disk->first_minor = i;
-		sprintf(disk->disk_name, "jsfd%d", i);
-		disk->fops = &jsfd_fops;
-		set_capacity(disk, jdp->dsize >> 9);
-		disk->private_data = jdp;
-		add_disk(disk);
-		set_disk_ro(disk, 1);
-	}
-	return 0;
-out:
-	while (i--)
-		put_disk(jsfd_disk[i]);
-	return err;
-}
-
-MODULE_LICENSE("GPL");
-
-static int __init jsflash_init_module(void) {
-	int rc;
-
-	if ((rc = jsflash_init()) == 0) {
-		jsfd_init();
-		return 0;
-	}
-	return rc;
-}
-
-static void __exit jsflash_cleanup_module(void)
-{
-	int i;
-
-	for (i = 0; i < JSF_MAX; i++) {
-		if ((i & JSF_PART_MASK) >= JSF_NPART) continue;
-		del_gendisk(jsfd_disk[i]);
-		blk_cleanup_queue(jsfd_disk[i]->queue);
-		put_disk(jsfd_disk[i]);
-	}
-	if (jsf0.busy)
-		printk("jsf0: cleaning busy unit\n");
-	jsf0.base = 0;
-	jsf0.busy = 0;
-
-	misc_deregister(&jsf_dev);
-	unregister_blkdev(JSFD_MAJOR, "jsfd");
-}
-
-module_init(jsflash_init_module);
-module_exit(jsflash_cleanup_module);
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index c35f05c..8560479 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -3882,7 +3882,7 @@
 	struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp);
 	u8 b, t;
 	unsigned long flags;
-	enum blk_eh_timer_return retval = BLK_EH_NOT_HANDLED;
+	enum blk_eh_timer_return retval = BLK_EH_DONE;
 
 	TRACE(("%s() cmd 0x%x\n", scp->cmnd[0], __func__));
 	b = scp->device->channel;
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 15a2fef..71bdc0b 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1963,7 +1963,7 @@
 
 enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 {
-	enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
+	enum blk_eh_timer_return rc = BLK_EH_DONE;
 	struct iscsi_task *task = NULL, *running_task;
 	struct iscsi_cls_session *cls_session;
 	struct iscsi_session *session;
@@ -1982,7 +1982,7 @@
 		 * Raced with completion. Blk layer has taken ownership
 		 * so let timeout code complete it now.
 		 */
-		rc = BLK_EH_HANDLED;
+		rc = BLK_EH_DONE;
 		goto done;
 	}
 
@@ -1997,7 +1997,7 @@
 		if (unlikely(system_state != SYSTEM_RUNNING)) {
 			sc->result = DID_NO_CONNECT << 16;
 			ISCSI_DBG_EH(session, "sc on shutdown, handled\n");
-			rc = BLK_EH_HANDLED;
+			rc = BLK_EH_DONE;
 			goto done;
 		}
 		/*
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 7195cff..91f5e2c 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -2731,53 +2731,6 @@
 	return proc_show_rdrv(m, m->private, 30, 39);
 }
 
-
-/*
- * seq_file wrappers for procfile show routines.
- */
-static int mega_proc_open(struct inode *inode, struct file *file)
-{
-	adapter_t *adapter = proc_get_parent_data(inode);
-	int (*show)(struct seq_file *, void *) = PDE_DATA(inode);
-
-	return single_open(file, show, adapter);
-}
-
-static const struct file_operations mega_proc_fops = {
-	.open		= mega_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-/*
- * Table of proc files we need to create.
- */
-struct mega_proc_file {
-	const char *name;
-	unsigned short ptr_offset;
-	int (*show) (struct seq_file *m, void *v);
-};
-
-static const struct mega_proc_file mega_proc_files[] = {
-	{ "config",	      offsetof(adapter_t, proc_read), proc_show_config },
-	{ "stat",	      offsetof(adapter_t, proc_stat), proc_show_stat },
-	{ "mailbox",	      offsetof(adapter_t, proc_mbox), proc_show_mbox },
-#if MEGA_HAVE_ENH_PROC
-	{ "rebuild-rate",     offsetof(adapter_t, proc_rr), proc_show_rebuild_rate },
-	{ "battery-status",   offsetof(adapter_t, proc_battery), proc_show_battery },
-	{ "diskdrives-ch0",   offsetof(adapter_t, proc_pdrvstat[0]), proc_show_pdrv_ch0 },
-	{ "diskdrives-ch1",   offsetof(adapter_t, proc_pdrvstat[1]), proc_show_pdrv_ch1 },
-	{ "diskdrives-ch2",   offsetof(adapter_t, proc_pdrvstat[2]), proc_show_pdrv_ch2 },
-	{ "diskdrives-ch3",   offsetof(adapter_t, proc_pdrvstat[3]), proc_show_pdrv_ch3 },
-	{ "raiddrives-0-9",   offsetof(adapter_t, proc_rdrvstat[0]), proc_show_rdrv_10 },
-	{ "raiddrives-10-19", offsetof(adapter_t, proc_rdrvstat[1]), proc_show_rdrv_20 },
-	{ "raiddrives-20-29", offsetof(adapter_t, proc_rdrvstat[2]), proc_show_rdrv_30 },
-	{ "raiddrives-30-39", offsetof(adapter_t, proc_rdrvstat[3]), proc_show_rdrv_40 },
-#endif
-	{ NULL }
-};
-
 /**
  * mega_create_proc_entry()
  * @index - index in soft state array
@@ -2788,31 +2741,45 @@
 static void
 mega_create_proc_entry(int index, struct proc_dir_entry *parent)
 {
-	const struct mega_proc_file *f;
-	adapter_t	*adapter = hba_soft_state[index];
-	struct proc_dir_entry	*dir, *de, **ppde;
-	u8		string[16];
+	adapter_t *adapter = hba_soft_state[index];
+	struct proc_dir_entry *dir;
+	u8 string[16];
 
 	sprintf(string, "hba%d", adapter->host->host_no);
-
-	dir = adapter->controller_proc_dir_entry =
-		proc_mkdir_data(string, 0, parent, adapter);
-	if(!dir) {
+	dir = proc_mkdir_data(string, 0, parent, adapter);
+	if (!dir) {
 		dev_warn(&adapter->dev->dev, "proc_mkdir failed\n");
 		return;
 	}
 
-	for (f = mega_proc_files; f->name; f++) {
-		de = proc_create_data(f->name, S_IRUSR, dir, &mega_proc_fops,
-				      f->show);
-		if (!de) {
-			dev_warn(&adapter->dev->dev, "proc_create failed\n");
-			return;
-		}
-
-		ppde = (void *)adapter + f->ptr_offset;
-		*ppde = de;
-	}
+	proc_create_single_data("config", S_IRUSR, dir,
+			proc_show_config, adapter);
+	proc_create_single_data("stat", S_IRUSR, dir,
+			proc_show_stat, adapter);
+	proc_create_single_data("mailbox", S_IRUSR, dir,
+			proc_show_mbox, adapter);
+#if MEGA_HAVE_ENH_PROC
+	proc_create_single_data("rebuild-rate", S_IRUSR, dir,
+			proc_show_rebuild_rate, adapter);
+	proc_create_single_data("battery-status", S_IRUSR, dir,
+			proc_show_battery, adapter);
+	proc_create_single_data("diskdrives-ch0", S_IRUSR, dir,
+			proc_show_pdrv_ch0, adapter);
+	proc_create_single_data("diskdrives-ch1", S_IRUSR, dir,
+			proc_show_pdrv_ch1, adapter);
+	proc_create_single_data("diskdrives-ch2", S_IRUSR, dir,
+			proc_show_pdrv_ch2, adapter);
+	proc_create_single_data("diskdrives-ch3", S_IRUSR, dir,
+			proc_show_pdrv_ch3, adapter);
+	proc_create_single_data("raiddrives-0-9", S_IRUSR, dir,
+			proc_show_rdrv_10, adapter);
+	proc_create_single_data("raiddrives-10-19", S_IRUSR, dir,
+			proc_show_rdrv_20, adapter);
+	proc_create_single_data("raiddrives-20-29", S_IRUSR, dir,
+			proc_show_rdrv_30, adapter);
+	proc_create_single_data("raiddrives-30-39", S_IRUSR, dir,
+			proc_show_rdrv_40, adapter);
+#endif
 }
 
 #else
@@ -4580,6 +4547,7 @@
 {
 	struct Scsi_Host *host = pci_get_drvdata(pdev);
 	adapter_t *adapter = (adapter_t *)host->hostdata;
+	char buf[12] = { 0 };
 
 	scsi_remove_host(host);
 
@@ -4594,44 +4562,8 @@
 
 	mega_free_sgl(adapter);
 
-#ifdef CONFIG_PROC_FS
-	if (adapter->controller_proc_dir_entry) {
-		remove_proc_entry("stat", adapter->controller_proc_dir_entry);
-		remove_proc_entry("config",
-				adapter->controller_proc_dir_entry);
-		remove_proc_entry("mailbox",
-				adapter->controller_proc_dir_entry);
-#if MEGA_HAVE_ENH_PROC
-		remove_proc_entry("rebuild-rate",
-				adapter->controller_proc_dir_entry);
-		remove_proc_entry("battery-status",
-				adapter->controller_proc_dir_entry);
-
-		remove_proc_entry("diskdrives-ch0",
-				adapter->controller_proc_dir_entry);
-		remove_proc_entry("diskdrives-ch1",
-				adapter->controller_proc_dir_entry);
-		remove_proc_entry("diskdrives-ch2",
-				adapter->controller_proc_dir_entry);
-		remove_proc_entry("diskdrives-ch3",
-				adapter->controller_proc_dir_entry);
-
-		remove_proc_entry("raiddrives-0-9",
-				adapter->controller_proc_dir_entry);
-		remove_proc_entry("raiddrives-10-19",
-				adapter->controller_proc_dir_entry);
-		remove_proc_entry("raiddrives-20-29",
-				adapter->controller_proc_dir_entry);
-		remove_proc_entry("raiddrives-30-39",
-				adapter->controller_proc_dir_entry);
-#endif
-		{
-			char	buf[12] = { 0 };
-			sprintf(buf, "hba%d", adapter->host->host_no);
-			remove_proc_entry(buf, mega_proc_dir_entry);
-		}
-	}
-#endif
+	sprintf(buf, "hba%d", adapter->host->host_no);
+	remove_proc_subtree(buf, mega_proc_dir_entry);
 
 	pci_free_consistent(adapter->dev, MEGA_BUFFER_SIZE,
 			adapter->mega_buffer, adapter->buf_dma_handle);
diff --git a/drivers/scsi/megaraid.h b/drivers/scsi/megaraid.h
index 21eba2f..18e85d9 100644
--- a/drivers/scsi/megaraid.h
+++ b/drivers/scsi/megaraid.h
@@ -814,18 +814,6 @@
 
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry	*controller_proc_dir_entry;
-	struct proc_dir_entry	*proc_read;
-	struct proc_dir_entry	*proc_stat;
-	struct proc_dir_entry	*proc_mbox;
-
-#if MEGA_HAVE_ENH_PROC
-	struct proc_dir_entry	*proc_rr;
-	struct proc_dir_entry	*proc_battery;
-#define MAX_PROC_CHANNELS	4
-	struct proc_dir_entry	*proc_pdrvstat[MAX_PROC_CHANNELS];
-	struct proc_dir_entry	*proc_rdrvstat[MAX_PROC_CHANNELS];
-#endif
-
 #endif
 
 	int	has_64bit_addr;		/* are we using 64-bit addressing */
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index b89c6e6..ce656c4 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -2772,7 +2772,7 @@
 
 	if (time_after(jiffies, scmd->jiffies_at_alloc +
 				(scmd_timeout * 2) * HZ)) {
-		return BLK_EH_NOT_HANDLED;
+		return BLK_EH_DONE;
 	}
 
 	instance = (struct megasas_instance *)scmd->device->host->hostdata;
diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c
index fe97401..afd2716 100644
--- a/drivers/scsi/mvumi.c
+++ b/drivers/scsi/mvumi.c
@@ -2155,7 +2155,7 @@
 	mvumi_return_cmd(mhba, cmd);
 	spin_unlock_irqrestore(mhba->shost->host_lock, flags);
 
-	return BLK_EH_NOT_HANDLED;
+	return BLK_EH_DONE;
 }
 
 static int
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index e188771..5a33e1a 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -99,7 +99,7 @@
 	int nelem = ARRAY_SIZE(get_attrs), a = 0;
 	int ret;
 
-	or = osd_start_request(od, GFP_KERNEL);
+	or = osd_start_request(od);
 	if (!or)
 		return -ENOMEM;
 
@@ -409,16 +409,15 @@
 	kfree(or);
 }
 
-struct osd_request *osd_start_request(struct osd_dev *dev, gfp_t gfp)
+struct osd_request *osd_start_request(struct osd_dev *dev)
 {
 	struct osd_request *or;
 
-	or = _osd_request_alloc(gfp);
+	or = _osd_request_alloc(GFP_KERNEL);
 	if (!or)
 		return NULL;
 
 	or->osd_dev = dev;
-	or->alloc_flags = gfp;
 	or->timeout = dev->def_timeout;
 	or->retries = OSD_REQ_RETRIES;
 
@@ -546,7 +545,7 @@
 	if (seg->alloc_size >= max_bytes)
 		return 0;
 
-	buff = krealloc(seg->buff, max_bytes, or->alloc_flags);
+	buff = krealloc(seg->buff, max_bytes, GFP_KERNEL);
 	if (!buff) {
 		OSD_ERR("Failed to Realloc %d-bytes was-%d\n", max_bytes,
 			seg->alloc_size);
@@ -728,7 +727,7 @@
 		_osd_req_encode_olist(or, list);
 
 	WARN_ON(or->in.bio);
-	bio = bio_map_kern(q, list, len, or->alloc_flags);
+	bio = bio_map_kern(q, list, len, GFP_KERNEL);
 	if (IS_ERR(bio)) {
 		OSD_ERR("!!! Failed to allocate list_objects BIO\n");
 		return PTR_ERR(bio);
@@ -1190,14 +1189,14 @@
 			pad_buff = io->pad_buff;
 
 		ret = blk_rq_map_kern(io->req->q, io->req, pad_buff, padding,
-				       or->alloc_flags);
+				       GFP_KERNEL);
 		if (ret)
 			return ret;
 		io->total_bytes += padding;
 	}
 
 	ret = blk_rq_map_kern(io->req->q, io->req, seg->buff, seg->total_bytes,
-			       or->alloc_flags);
+			       GFP_KERNEL);
 	if (ret)
 		return ret;
 
@@ -1564,14 +1563,14 @@
  * osd_finalize_request and helpers
  */
 static struct request *_make_request(struct request_queue *q, bool has_write,
-			      struct _osd_io_info *oii, gfp_t flags)
+			      struct _osd_io_info *oii)
 {
 	struct request *req;
 	struct bio *bio = oii->bio;
 	int ret;
 
 	req = blk_get_request(q, has_write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
-			flags);
+			0);
 	if (IS_ERR(req))
 		return req;
 
@@ -1589,13 +1588,12 @@
 static int _init_blk_request(struct osd_request *or,
 	bool has_in, bool has_out)
 {
-	gfp_t flags = or->alloc_flags;
 	struct scsi_device *scsi_device = or->osd_dev->scsi_device;
 	struct request_queue *q = scsi_device->request_queue;
 	struct request *req;
 	int ret;
 
-	req = _make_request(q, has_out, has_out ? &or->out : &or->in, flags);
+	req = _make_request(q, has_out, has_out ? &or->out : &or->in);
 	if (IS_ERR(req)) {
 		ret = PTR_ERR(req);
 		goto out;
@@ -1611,7 +1609,7 @@
 		or->out.req = req;
 		if (has_in) {
 			/* allocate bidi request */
-			req = _make_request(q, false, &or->in, flags);
+			req = _make_request(q, false, &or->in);
 			if (IS_ERR(req)) {
 				OSD_DEBUG("blk_get_request for bidi failed\n");
 				ret = PTR_ERR(req);
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 20ec1c0..2bbe797 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -368,7 +368,7 @@
 	int write = (data_direction == DMA_TO_DEVICE);
 
 	req = blk_get_request(SRpnt->stp->device->request_queue,
-			write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL);
+			write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
 	if (IS_ERR(req))
 		return DRIVER_ERROR << 24;
 
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 94c14ce..0e13349 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -1848,7 +1848,7 @@
 	struct iscsi_cls_session *session;
 	struct iscsi_session *sess;
 	unsigned long flags;
-	enum blk_eh_timer_return ret = BLK_EH_NOT_HANDLED;
+	enum blk_eh_timer_return ret = BLK_EH_DONE;
 
 	session = starget_to_session(scsi_target(sc->device));
 	sess = session->dd_data;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 9460391..9c02ba2 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -282,7 +282,7 @@
 enum blk_eh_timer_return scsi_times_out(struct request *req)
 {
 	struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
-	enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED;
+	enum blk_eh_timer_return rtn = BLK_EH_DONE;
 	struct Scsi_Host *host = scmd->device->host;
 
 	trace_scsi_dispatch_cmd_timeout(scmd);
@@ -294,7 +294,7 @@
 	if (host->hostt->eh_timed_out)
 		rtn = host->hostt->eh_timed_out(scmd);
 
-	if (rtn == BLK_EH_NOT_HANDLED) {
+	if (rtn == BLK_EH_DONE) {
 		if (scsi_abort_command(scmd) != SUCCESS) {
 			set_host_byte(scmd, DID_TIME_OUT);
 			scsi_eh_scmd_add(scmd);
@@ -1933,11 +1933,7 @@
 	struct request *req;
 	struct scsi_request *rq;
 
-	/*
-	 * blk_get_request with GFP_KERNEL (__GFP_RECLAIM) sleeps until a
-	 * request becomes available
-	 */
-	req = blk_get_request(sdev->request_queue, REQ_OP_SCSI_IN, GFP_KERNEL);
+	req = blk_get_request(sdev->request_queue, REQ_OP_SCSI_IN, 0);
 	if (IS_ERR(req))
 		return;
 	rq = scsi_req(req);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e9b4f27..fb38aef 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -265,7 +265,7 @@
 	struct scsi_request *rq;
 	int ret = DRIVER_ERROR << 24;
 
-	req = blk_get_request_flags(sdev->request_queue,
+	req = blk_get_request(sdev->request_queue,
 			data_direction == DMA_TO_DEVICE ?
 			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT);
 	if (IS_ERR(req))
@@ -273,7 +273,7 @@
 	rq = scsi_req(req);
 
 	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
-					buffer, bufflen, __GFP_RECLAIM))
+					buffer, bufflen, GFP_NOIO))
 		goto out;
 
 	rq->cmd_len = COMMAND_SIZE(cmd[0]);
@@ -2149,27 +2149,6 @@
 	return blk_mq_map_queues(set);
 }
 
-static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
-{
-	struct device *host_dev;
-	u64 bounce_limit = 0xffffffff;
-
-	if (shost->unchecked_isa_dma)
-		return BLK_BOUNCE_ISA;
-	/*
-	 * Platforms with virtual-DMA translation
-	 * hardware have no practical limit.
-	 */
-	if (!PCI_DMA_BUS_IS_PHYS)
-		return BLK_BOUNCE_ANY;
-
-	host_dev = scsi_get_device(shost);
-	if (host_dev && host_dev->dma_mask)
-		bounce_limit = (u64)dma_max_pfn(host_dev) << PAGE_SHIFT;
-
-	return bounce_limit;
-}
-
 void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 {
 	struct device *dev = shost->dma_dev;
@@ -2189,7 +2168,8 @@
 	}
 
 	blk_queue_max_hw_sectors(q, shost->max_sectors);
-	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
+	if (shost->unchecked_isa_dma)
+		blk_queue_bounce_limit(q, BLK_BOUNCE_ISA);
 	blk_queue_segment_boundary(q, shost->dma_boundary);
 	dma_set_seg_boundary(dev, shost->dma_boundary);
 
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index be3be0f..1da3d71 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -2087,7 +2087,7 @@
 	if (rport->port_state == FC_PORTSTATE_BLOCKED)
 		return BLK_EH_RESET_TIMER;
 
-	return BLK_EH_NOT_HANDLED;
+	return BLK_EH_DONE;
 }
 EXPORT_SYMBOL(fc_eh_timed_out);
 
@@ -3591,10 +3591,9 @@
 	}
 
 	/* the blk_end_sync_io() doesn't check the error */
-	if (!inflight)
-		return BLK_EH_NOT_HANDLED;
-	else
-		return BLK_EH_HANDLED;
+	if (inflight)
+		blk_mq_complete_request(req);
+	return BLK_EH_DONE;
 }
 
 /**
@@ -3781,8 +3780,7 @@
 	snprintf(bsg_name, sizeof(bsg_name),
 		 "fc_host%d", shost->host_no);
 
-	q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size,
-			NULL);
+	q = bsg_setup_queue(dev, bsg_name, fc_bsg_dispatch, i->f->dd_bsg_size);
 	if (IS_ERR(q)) {
 		dev_err(dev,
 			"fc_host%d: bsg interface failed to initialize - setup queue\n",
@@ -3827,8 +3825,8 @@
 	if (!i->f->bsg_request)
 		return -ENOTSUPP;
 
-	q = bsg_setup_queue(dev, NULL, fc_bsg_dispatch, i->f->dd_bsg_size,
-			NULL);
+	q = bsg_setup_queue(dev, dev_name(dev), fc_bsg_dispatch,
+			i->f->dd_bsg_size);
 	if (IS_ERR(q)) {
 		dev_err(dev, "failed to setup bsg queue\n");
 		return PTR_ERR(q);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 65f6c94..6fd2fe2 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1542,7 +1542,7 @@
 		return -ENOTSUPP;
 
 	snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no);
-	q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0, NULL);
+	q = bsg_setup_queue(dev, bsg_name, iscsi_bsg_host_dispatch, 0);
 	if (IS_ERR(q)) {
 		shost_printk(KERN_ERR, shost, "bsg interface failed to "
 			     "initialize - no request queue\n");
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 08acbab..e2953b4 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -187,16 +187,6 @@
 	return 0;
 }
 
-static void sas_host_release(struct device *dev)
-{
-	struct Scsi_Host *shost = dev_to_shost(dev);
-	struct sas_host_attrs *sas_host = to_sas_host_attrs(shost);
-	struct request_queue *q = sas_host->q;
-
-	if (q)
-		blk_cleanup_queue(q);
-}
-
 static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 {
 	struct request_queue *q;
@@ -208,7 +198,7 @@
 
 	if (rphy) {
 		q = bsg_setup_queue(&rphy->dev, dev_name(&rphy->dev),
-				sas_smp_dispatch, 0, NULL);
+				sas_smp_dispatch, 0);
 		if (IS_ERR(q))
 			return PTR_ERR(q);
 		rphy->q = q;
@@ -217,7 +207,7 @@
 
 		snprintf(name, sizeof(name), "sas_host%d", shost->host_no);
 		q = bsg_setup_queue(&shost->shost_gendev, name,
-				sas_smp_dispatch, 0, sas_host_release);
+				sas_smp_dispatch, 0);
 		if (IS_ERR(q))
 			return PTR_ERR(q);
 		to_sas_host_attrs(shost)->q = q;
@@ -260,8 +250,11 @@
 	struct Scsi_Host *shost = dev_to_shost(dev);
 	struct request_queue *q = to_sas_host_attrs(shost)->q;
 
-	if (q)
+	if (q) {
 		bsg_unregister_queue(q);
+		blk_cleanup_queue(q);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index 456ce9f..4e46fdb 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -604,7 +604,7 @@
  *
  * If a timeout occurs while an rport is in the blocked state, ask the SCSI
  * EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core
- * handle the timeout (BLK_EH_NOT_HANDLED).
+ * handle the timeout (BLK_EH_DONE).
  *
  * Note: This function is called from soft-IRQ context and with the request
  * queue lock held.
@@ -620,7 +620,7 @@
 	return rport && rport->fast_io_fail_tmo < 0 &&
 		rport->dev_loss_tmo < 0 &&
 		i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ?
-		BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED;
+		BLK_EH_RESET_TIMER : BLK_EH_DONE;
 }
 EXPORT_SYMBOL(srp_timed_out);
 
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 5c40d80..6fc58e2 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -66,7 +66,6 @@
 static char *sg_version_date = "20140603";
 
 static int sg_proc_init(void);
-static void sg_proc_cleanup(void);
 #endif
 
 #define SG_ALLOW_DIO_DEF 0
@@ -1661,7 +1660,7 @@
 exit_sg(void)
 {
 #ifdef CONFIG_SCSI_PROC_FS
-	sg_proc_cleanup();
+	remove_proc_subtree("scsi/sg", NULL);
 #endif				/* CONFIG_SCSI_PROC_FS */
 	scsi_unregister_interface(&sg_interface);
 	class_destroy(sg_sysfs_class);
@@ -1715,7 +1714,7 @@
 	 * does not sleep except under memory pressure.
 	 */
 	rq = blk_get_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ?
-			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL);
+			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
 	if (IS_ERR(rq)) {
 		kfree(long_cmdp);
 		return PTR_ERR(rq);
@@ -2274,11 +2273,6 @@
 }
 
 #ifdef CONFIG_SCSI_PROC_FS
-
-static struct proc_dir_entry *sg_proc_sgp = NULL;
-
-static char sg_proc_sg_dirname[] = "scsi/sg";
-
 static int sg_proc_seq_show_int(struct seq_file *s, void *v);
 
 static int sg_proc_single_open_adio(struct inode *inode, struct file *file);
@@ -2306,37 +2300,11 @@
 };
 
 static int sg_proc_seq_show_version(struct seq_file *s, void *v);
-static int sg_proc_single_open_version(struct inode *inode, struct file *file);
-static const struct file_operations version_fops = {
-	.owner = THIS_MODULE,
-	.open = sg_proc_single_open_version,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
 static int sg_proc_seq_show_devhdr(struct seq_file *s, void *v);
-static int sg_proc_single_open_devhdr(struct inode *inode, struct file *file);
-static const struct file_operations devhdr_fops = {
-	.owner = THIS_MODULE,
-	.open = sg_proc_single_open_devhdr,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
 static int sg_proc_seq_show_dev(struct seq_file *s, void *v);
-static int sg_proc_open_dev(struct inode *inode, struct file *file);
 static void * dev_seq_start(struct seq_file *s, loff_t *pos);
 static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos);
 static void dev_seq_stop(struct seq_file *s, void *v);
-static const struct file_operations dev_fops = {
-	.owner = THIS_MODULE,
-	.open = sg_proc_open_dev,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
 static const struct seq_operations dev_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
@@ -2345,14 +2313,6 @@
 };
 
 static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v);
-static int sg_proc_open_devstrs(struct inode *inode, struct file *file);
-static const struct file_operations devstrs_fops = {
-	.owner = THIS_MODULE,
-	.open = sg_proc_open_devstrs,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
 static const struct seq_operations devstrs_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
@@ -2361,14 +2321,6 @@
 };
 
 static int sg_proc_seq_show_debug(struct seq_file *s, void *v);
-static int sg_proc_open_debug(struct inode *inode, struct file *file);
-static const struct file_operations debug_fops = {
-	.owner = THIS_MODULE,
-	.open = sg_proc_open_debug,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
 static const struct seq_operations debug_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
@@ -2376,52 +2328,25 @@
 	.show  = sg_proc_seq_show_debug,
 };
 
-
-struct sg_proc_leaf {
-	const char * name;
-	const struct file_operations * fops;
-};
-
-static const struct sg_proc_leaf sg_proc_leaf_arr[] = {
-	{"allow_dio", &adio_fops},
-	{"debug", &debug_fops},
-	{"def_reserved_size", &dressz_fops},
-	{"device_hdr", &devhdr_fops},
-	{"devices", &dev_fops},
-	{"device_strs", &devstrs_fops},
-	{"version", &version_fops}
-};
-
 static int
 sg_proc_init(void)
 {
-	int num_leaves = ARRAY_SIZE(sg_proc_leaf_arr);
-	int k;
+	struct proc_dir_entry *p;
 
-	sg_proc_sgp = proc_mkdir(sg_proc_sg_dirname, NULL);
-	if (!sg_proc_sgp)
+	p = proc_mkdir("scsi/sg", NULL);
+	if (!p)
 		return 1;
-	for (k = 0; k < num_leaves; ++k) {
-		const struct sg_proc_leaf *leaf = &sg_proc_leaf_arr[k];
-		umode_t mask = leaf->fops->write ? S_IRUGO | S_IWUSR : S_IRUGO;
-		proc_create(leaf->name, mask, sg_proc_sgp, leaf->fops);
-	}
+
+	proc_create("allow_dio", S_IRUGO | S_IWUSR, p, &adio_fops);
+	proc_create_seq("debug", S_IRUGO, p, &debug_seq_ops);
+	proc_create("def_reserved_size", S_IRUGO | S_IWUSR, p, &dressz_fops);
+	proc_create_single("device_hdr", S_IRUGO, p, sg_proc_seq_show_devhdr);
+	proc_create_seq("devices", S_IRUGO, p, &dev_seq_ops);
+	proc_create_seq("device_strs", S_IRUGO, p, &devstrs_seq_ops);
+	proc_create_single("version", S_IRUGO, p, sg_proc_seq_show_version);
 	return 0;
 }
 
-static void
-sg_proc_cleanup(void)
-{
-	int k;
-	int num_leaves = ARRAY_SIZE(sg_proc_leaf_arr);
-
-	if (!sg_proc_sgp)
-		return;
-	for (k = 0; k < num_leaves; ++k)
-		remove_proc_entry(sg_proc_leaf_arr[k].name, sg_proc_sgp);
-	remove_proc_entry(sg_proc_sg_dirname, NULL);
-}
-
 
 static int sg_proc_seq_show_int(struct seq_file *s, void *v)
 {
@@ -2482,22 +2407,12 @@
 	return 0;
 }
 
-static int sg_proc_single_open_version(struct inode *inode, struct file *file)
-{
-	return single_open(file, sg_proc_seq_show_version, NULL);
-}
-
 static int sg_proc_seq_show_devhdr(struct seq_file *s, void *v)
 {
 	seq_puts(s, "host\tchan\tid\tlun\ttype\topens\tqdepth\tbusy\tonline\n");
 	return 0;
 }
 
-static int sg_proc_single_open_devhdr(struct inode *inode, struct file *file)
-{
-	return single_open(file, sg_proc_seq_show_devhdr, NULL);
-}
-
 struct sg_proc_deviter {
 	loff_t	index;
 	size_t	max;
@@ -2531,11 +2446,6 @@
 	kfree(s->private);
 }
 
-static int sg_proc_open_dev(struct inode *inode, struct file *file)
-{
-        return seq_open(file, &dev_seq_ops);
-}
-
 static int sg_proc_seq_show_dev(struct seq_file *s, void *v)
 {
 	struct sg_proc_deviter * it = (struct sg_proc_deviter *) v;
@@ -2562,11 +2472,6 @@
 	return 0;
 }
 
-static int sg_proc_open_devstrs(struct inode *inode, struct file *file)
-{
-        return seq_open(file, &devstrs_seq_ops);
-}
-
 static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v)
 {
 	struct sg_proc_deviter * it = (struct sg_proc_deviter *) v;
@@ -2650,11 +2555,6 @@
 	}
 }
 
-static int sg_proc_open_debug(struct inode *inode, struct file *file)
-{
-        return seq_open(file, &debug_seq_ops);
-}
-
 static int sg_proc_seq_show_debug(struct seq_file *s, void *v)
 {
 	struct sg_proc_deviter * it = (struct sg_proc_deviter *) v;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 6c39948..a427ce9 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -545,7 +545,7 @@
 
 	req = blk_get_request(SRpnt->stp->device->request_queue,
 			data_direction == DMA_TO_DEVICE ?
-			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, GFP_KERNEL);
+			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
 	if (IS_ERR(req))
 		return DRIVER_ERROR << 24;
 	rq = scsi_req(req);
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 00e7905..d0a1674 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -6497,12 +6497,12 @@
 	bool found = false;
 
 	if (!scmd || !scmd->device || !scmd->device->host)
-		return BLK_EH_NOT_HANDLED;
+		return BLK_EH_DONE;
 
 	host = scmd->device->host;
 	hba = shost_priv(host);
 	if (!hba)
-		return BLK_EH_NOT_HANDLED;
+		return BLK_EH_DONE;
 
 	spin_lock_irqsave(host->host_lock, flags);
 
@@ -6520,7 +6520,7 @@
 	 * SCSI command was not actually dispatched to UFS driver, otherwise
 	 * let SCSI layer handle the error as usual.
 	 */
-	return found ? BLK_EH_NOT_HANDLED : BLK_EH_RESET_TIMER;
+	return found ? BLK_EH_DONE : BLK_EH_RESET_TIMER;
 }
 
 static const struct attribute_group *ufshcd_driver_groups[] = {
diff --git a/drivers/slimbus/qcom-ctrl.c b/drivers/slimbus/qcom-ctrl.c
index ffb46f9..bb36a8f 100644
--- a/drivers/slimbus/qcom-ctrl.c
+++ b/drivers/slimbus/qcom-ctrl.c
@@ -439,13 +439,12 @@
 static void qcom_slim_rxwq(struct work_struct *work)
 {
 	u8 buf[SLIM_MSGQ_BUF_LEN];
-	u8 mc, mt, len;
+	u8 mc, mt;
 	int ret;
 	struct qcom_slim_ctrl *ctrl = container_of(work, struct qcom_slim_ctrl,
 						 wd);
 
 	while ((slim_get_current_rxbuf(ctrl, buf)) != -ENODATA) {
-		len = SLIM_HEADER_GET_RL(buf[0]);
 		mt = SLIM_HEADER_GET_MT(buf[0]);
 		mc = SLIM_HEADER_GET_MC(buf[1]);
 		if (mt == SLIM_MSG_MT_CORE &&
diff --git a/drivers/soc/actions/owl-sps.c b/drivers/soc/actions/owl-sps.c
index 8477f0f..032921d 100644
--- a/drivers/soc/actions/owl-sps.c
+++ b/drivers/soc/actions/owl-sps.c
@@ -117,8 +117,8 @@
 
 	sps_info = match->data;
 
-	sps = devm_kzalloc(&pdev->dev, sizeof(*sps) +
-			   sps_info->num_domains * sizeof(sps->domains[0]),
+	sps = devm_kzalloc(&pdev->dev,
+			   struct_size(sps, domains, sps_info->num_domains),
 			   GFP_KERNEL);
 	if (!sps)
 		return -ENOMEM;
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 5c4535b..d053f26 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -108,4 +108,13 @@
 	  Client driver for the WCNSS_CTRL SMD channel, used to download nv
 	  firmware to a newly booted WCNSS chip.
 
+config QCOM_APR
+	tristate "Qualcomm APR Bus (Asynchronous Packet Router)"
+	depends on ARCH_QCOM
+	depends on RPMSG
+	help
+          Enable APR IPC protocol support between
+          application processor and QDSP6. APR is
+          used by audio driver to configure QDSP6
+          ASM, ADM and AFE modules.
 endmenu
diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile
index dcebf28..39de5de 100644
--- a/drivers/soc/qcom/Makefile
+++ b/drivers/soc/qcom/Makefile
@@ -12,3 +12,4 @@
 obj-$(CONFIG_QCOM_SMP2P)	+= smp2p.o
 obj-$(CONFIG_QCOM_SMSM)	+= smsm.o
 obj-$(CONFIG_QCOM_WCNSS_CTRL) += wcnss_ctrl.o
+obj-$(CONFIG_QCOM_APR) += apr.o
diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c
new file mode 100644
index 0000000..57af8a5
--- /dev/null
+++ b/drivers/soc/qcom/apr.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2011-2017, The Linux Foundation. All rights reserved.
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/of_device.h>
+#include <linux/soc/qcom/apr.h>
+#include <linux/rpmsg.h>
+#include <linux/of.h>
+
+struct apr {
+	struct rpmsg_endpoint *ch;
+	struct device *dev;
+	spinlock_t svcs_lock;
+	struct idr svcs_idr;
+	int dest_domain_id;
+};
+
+/**
+ * apr_send_pkt() - Send a apr message from apr device
+ *
+ * @adev: Pointer to previously registered apr device.
+ * @pkt: Pointer to apr packet to send
+ *
+ * Return: Will be an negative on packet size on success.
+ */
+int apr_send_pkt(struct apr_device *adev, struct apr_pkt *pkt)
+{
+	struct apr *apr = dev_get_drvdata(adev->dev.parent);
+	struct apr_hdr *hdr;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&adev->lock, flags);
+
+	hdr = &pkt->hdr;
+	hdr->src_domain = APR_DOMAIN_APPS;
+	hdr->src_svc = adev->svc_id;
+	hdr->dest_domain = adev->domain_id;
+	hdr->dest_svc = adev->svc_id;
+
+	ret = rpmsg_trysend(apr->ch, pkt, hdr->pkt_size);
+	spin_unlock_irqrestore(&adev->lock, flags);
+
+	return ret ? ret : hdr->pkt_size;
+}
+EXPORT_SYMBOL_GPL(apr_send_pkt);
+
+static void apr_dev_release(struct device *dev)
+{
+	struct apr_device *adev = to_apr_device(dev);
+
+	kfree(adev);
+}
+
+static int apr_callback(struct rpmsg_device *rpdev, void *buf,
+				  int len, void *priv, u32 addr)
+{
+	struct apr *apr = dev_get_drvdata(&rpdev->dev);
+	uint16_t hdr_size, msg_type, ver, svc_id;
+	struct apr_device *svc = NULL;
+	struct apr_driver *adrv = NULL;
+	struct apr_resp_pkt resp;
+	struct apr_hdr *hdr;
+	unsigned long flags;
+
+	if (len <= APR_HDR_SIZE) {
+		dev_err(apr->dev, "APR: Improper apr pkt received:%p %d\n",
+			buf, len);
+		return -EINVAL;
+	}
+
+	hdr = buf;
+	ver = APR_HDR_FIELD_VER(hdr->hdr_field);
+	if (ver > APR_PKT_VER + 1)
+		return -EINVAL;
+
+	hdr_size = APR_HDR_FIELD_SIZE_BYTES(hdr->hdr_field);
+	if (hdr_size < APR_HDR_SIZE) {
+		dev_err(apr->dev, "APR: Wrong hdr size:%d\n", hdr_size);
+		return -EINVAL;
+	}
+
+	if (hdr->pkt_size < APR_HDR_SIZE || hdr->pkt_size != len) {
+		dev_err(apr->dev, "APR: Wrong paket size\n");
+		return -EINVAL;
+	}
+
+	msg_type = APR_HDR_FIELD_MT(hdr->hdr_field);
+	if (msg_type >= APR_MSG_TYPE_MAX) {
+		dev_err(apr->dev, "APR: Wrong message type: %d\n", msg_type);
+		return -EINVAL;
+	}
+
+	if (hdr->src_domain >= APR_DOMAIN_MAX ||
+			hdr->dest_domain >= APR_DOMAIN_MAX ||
+			hdr->src_svc >= APR_SVC_MAX ||
+			hdr->dest_svc >= APR_SVC_MAX) {
+		dev_err(apr->dev, "APR: Wrong APR header\n");
+		return -EINVAL;
+	}
+
+	svc_id = hdr->dest_svc;
+	spin_lock_irqsave(&apr->svcs_lock, flags);
+	svc = idr_find(&apr->svcs_idr, svc_id);
+	if (svc && svc->dev.driver)
+		adrv = to_apr_driver(svc->dev.driver);
+	spin_unlock_irqrestore(&apr->svcs_lock, flags);
+
+	if (!adrv) {
+		dev_err(apr->dev, "APR: service is not registered\n");
+		return -EINVAL;
+	}
+
+	resp.hdr = *hdr;
+	resp.payload_size = hdr->pkt_size - hdr_size;
+
+	/*
+	 * NOTE: hdr_size is not same as APR_HDR_SIZE as remote can include
+	 * optional headers in to apr_hdr which should be ignored
+	 */
+	if (resp.payload_size > 0)
+		resp.payload = buf + hdr_size;
+
+	adrv->callback(svc, &resp);
+
+	return 0;
+}
+
+static int apr_device_match(struct device *dev, struct device_driver *drv)
+{
+	struct apr_device *adev = to_apr_device(dev);
+	struct apr_driver *adrv = to_apr_driver(drv);
+	const struct apr_device_id *id = adrv->id_table;
+
+	/* Attempt an OF style match first */
+	if (of_driver_match_device(dev, drv))
+		return 1;
+
+	if (!id)
+		return 0;
+
+	while (id->domain_id != 0 || id->svc_id != 0) {
+		if (id->domain_id == adev->domain_id &&
+		    id->svc_id == adev->svc_id)
+			return 1;
+		id++;
+	}
+
+	return 0;
+}
+
+static int apr_device_probe(struct device *dev)
+{
+	struct apr_device *adev = to_apr_device(dev);
+	struct apr_driver *adrv = to_apr_driver(dev->driver);
+
+	return adrv->probe(adev);
+}
+
+static int apr_device_remove(struct device *dev)
+{
+	struct apr_device *adev = to_apr_device(dev);
+	struct apr_driver *adrv;
+	struct apr *apr = dev_get_drvdata(adev->dev.parent);
+
+	if (dev->driver) {
+		adrv = to_apr_driver(dev->driver);
+		if (adrv->remove)
+			adrv->remove(adev);
+		spin_lock(&apr->svcs_lock);
+		idr_remove(&apr->svcs_idr, adev->svc_id);
+		spin_unlock(&apr->svcs_lock);
+	}
+
+	return 0;
+}
+
+static int apr_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct apr_device *adev = to_apr_device(dev);
+	int ret;
+
+	ret = of_device_uevent_modalias(dev, env);
+	if (ret != -ENODEV)
+		return ret;
+
+	return add_uevent_var(env, "MODALIAS=apr:%s", adev->name);
+}
+
+struct bus_type aprbus = {
+	.name		= "aprbus",
+	.match		= apr_device_match,
+	.probe		= apr_device_probe,
+	.uevent		= apr_uevent,
+	.remove		= apr_device_remove,
+};
+EXPORT_SYMBOL_GPL(aprbus);
+
+static int apr_add_device(struct device *dev, struct device_node *np,
+			  const struct apr_device_id *id)
+{
+	struct apr *apr = dev_get_drvdata(dev);
+	struct apr_device *adev = NULL;
+	int ret;
+
+	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+	if (!adev)
+		return -ENOMEM;
+
+	spin_lock_init(&adev->lock);
+
+	adev->svc_id = id->svc_id;
+	adev->domain_id = id->domain_id;
+	adev->version = id->svc_version;
+	if (np)
+		strncpy(adev->name, np->name, APR_NAME_SIZE);
+	else
+		strncpy(adev->name, id->name, APR_NAME_SIZE);
+
+	dev_set_name(&adev->dev, "aprsvc:%s:%x:%x", adev->name,
+		     id->domain_id, id->svc_id);
+
+	adev->dev.bus = &aprbus;
+	adev->dev.parent = dev;
+	adev->dev.of_node = np;
+	adev->dev.release = apr_dev_release;
+	adev->dev.driver = NULL;
+
+	spin_lock(&apr->svcs_lock);
+	idr_alloc(&apr->svcs_idr, adev, id->svc_id,
+		  id->svc_id + 1, GFP_ATOMIC);
+	spin_unlock(&apr->svcs_lock);
+
+	dev_info(dev, "Adding APR dev: %s\n", dev_name(&adev->dev));
+
+	ret = device_register(&adev->dev);
+	if (ret) {
+		dev_err(dev, "device_register failed: %d\n", ret);
+		put_device(&adev->dev);
+	}
+
+	return ret;
+}
+
+static void of_register_apr_devices(struct device *dev)
+{
+	struct apr *apr = dev_get_drvdata(dev);
+	struct device_node *node;
+
+	for_each_child_of_node(dev->of_node, node) {
+		struct apr_device_id id = { {0} };
+
+		if (of_property_read_u32(node, "reg", &id.svc_id))
+			continue;
+
+		id.domain_id = apr->dest_domain_id;
+
+		if (apr_add_device(dev, node, &id))
+			dev_err(dev, "Failed to add apr %d svc\n", id.svc_id);
+	}
+}
+
+static int apr_probe(struct rpmsg_device *rpdev)
+{
+	struct device *dev = &rpdev->dev;
+	struct apr *apr;
+	int ret;
+
+	apr = devm_kzalloc(dev, sizeof(*apr), GFP_KERNEL);
+	if (!apr)
+		return -ENOMEM;
+
+	ret = of_property_read_u32(dev->of_node, "reg", &apr->dest_domain_id);
+	if (ret) {
+		dev_err(dev, "APR Domain ID not specified in DT\n");
+		return ret;
+	}
+
+	dev_set_drvdata(dev, apr);
+	apr->ch = rpdev->ept;
+	apr->dev = dev;
+	spin_lock_init(&apr->svcs_lock);
+	idr_init(&apr->svcs_idr);
+	of_register_apr_devices(dev);
+
+	return 0;
+}
+
+static int apr_remove_device(struct device *dev, void *null)
+{
+	struct apr_device *adev = to_apr_device(dev);
+
+	device_unregister(&adev->dev);
+
+	return 0;
+}
+
+static void apr_remove(struct rpmsg_device *rpdev)
+{
+	device_for_each_child(&rpdev->dev, NULL, apr_remove_device);
+}
+
+/*
+ * __apr_driver_register() - Client driver registration with aprbus
+ *
+ * @drv:Client driver to be associated with client-device.
+ * @owner: owning module/driver
+ *
+ * This API will register the client driver with the aprbus
+ * It is called from the driver's module-init function.
+ */
+int __apr_driver_register(struct apr_driver *drv, struct module *owner)
+{
+	drv->driver.bus = &aprbus;
+	drv->driver.owner = owner;
+
+	return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(__apr_driver_register);
+
+/*
+ * apr_driver_unregister() - Undo effect of apr_driver_register
+ *
+ * @drv: Client driver to be unregistered
+ */
+void apr_driver_unregister(struct apr_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(apr_driver_unregister);
+
+static const struct of_device_id apr_of_match[] = {
+	{ .compatible = "qcom,apr"},
+	{ .compatible = "qcom,apr-v2"},
+	{}
+};
+MODULE_DEVICE_TABLE(of, apr_of_match);
+
+static struct rpmsg_driver apr_driver = {
+	.probe = apr_probe,
+	.remove = apr_remove,
+	.callback = apr_callback,
+	.drv = {
+		.name = "qcom,apr",
+		.of_match_table = apr_of_match,
+	},
+};
+
+static int __init apr_init(void)
+{
+	int ret;
+
+	ret = bus_register(&aprbus);
+	if (!ret)
+		ret = register_rpmsg_driver(&apr_driver);
+	else
+		bus_unregister(&aprbus);
+
+	return ret;
+}
+
+static void __exit apr_exit(void)
+{
+	bus_unregister(&aprbus);
+	unregister_rpmsg_driver(&apr_driver);
+}
+
+subsys_initcall(apr_init);
+module_exit(apr_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Qualcomm APR Bus");
diff --git a/drivers/soc/rockchip/pm_domains.c b/drivers/soc/rockchip/pm_domains.c
index 53efc38..111c44f 100644
--- a/drivers/soc/rockchip/pm_domains.c
+++ b/drivers/soc/rockchip/pm_domains.c
@@ -626,8 +626,7 @@
 	pmu_info = match->data;
 
 	pmu = devm_kzalloc(dev,
-			   sizeof(*pmu) +
-				pmu_info->num_domains * sizeof(pmu->domains[0]),
+			   struct_size(pmu, domains, pmu_info->num_domains),
 			   GFP_KERNEL);
 	if (!pmu)
 		return -ENOMEM;
diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c
index d9fcdb5..3e3d12c 100644
--- a/drivers/soc/tegra/pmc.c
+++ b/drivers/soc/tegra/pmc.c
@@ -559,22 +559,28 @@
 int tegra_powergate_sequence_power_up(unsigned int id, struct clk *clk,
 				      struct reset_control *rst)
 {
-	struct tegra_powergate pg;
+	struct tegra_powergate *pg;
 	int err;
 
 	if (!tegra_powergate_is_available(id))
 		return -EINVAL;
 
-	pg.id = id;
-	pg.clks = &clk;
-	pg.num_clks = 1;
-	pg.reset = rst;
-	pg.pmc = pmc;
+	pg = kzalloc(sizeof(*pg), GFP_KERNEL);
+	if (!pg)
+		return -ENOMEM;
 
-	err = tegra_powergate_power_up(&pg, false);
+	pg->id = id;
+	pg->clks = &clk;
+	pg->num_clks = 1;
+	pg->reset = rst;
+	pg->pmc = pmc;
+
+	err = tegra_powergate_power_up(pg, false);
 	if (err)
 		pr_err("failed to turn on partition %d: %d\n", id, err);
 
+	kfree(pg);
+
 	return err;
 }
 EXPORT_SYMBOL(tegra_powergate_sequence_power_up);
diff --git a/drivers/soundwire/Kconfig b/drivers/soundwire/Kconfig
index b46084b..19c8efb 100644
--- a/drivers/soundwire/Kconfig
+++ b/drivers/soundwire/Kconfig
@@ -27,7 +27,7 @@
 	tristate "Intel SoundWire Master driver"
 	select SOUNDWIRE_CADENCE
 	select SOUNDWIRE_BUS
-	depends on X86 && ACPI
+	depends on X86 && ACPI && SND_SOC
 	---help---
 	  SoundWire Intel Master driver.
 	  If you have an Intel platform which has a SoundWire Master then
diff --git a/drivers/soundwire/Makefile b/drivers/soundwire/Makefile
index e1a74c5..5817bea 100644
--- a/drivers/soundwire/Makefile
+++ b/drivers/soundwire/Makefile
@@ -3,7 +3,7 @@
 #
 
 #Bus Objs
-soundwire-bus-objs := bus_type.o bus.o slave.o mipi_disco.o
+soundwire-bus-objs := bus_type.o bus.o slave.o mipi_disco.o stream.o
 obj-$(CONFIG_SOUNDWIRE_BUS) += soundwire-bus.o
 
 #Cadence Objs
diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index d6dc8e7..dcc0ff9 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -17,6 +17,7 @@
  */
 int sdw_add_bus_master(struct sdw_bus *bus)
 {
+	struct sdw_master_prop *prop = NULL;
 	int ret;
 
 	if (!bus->dev) {
@@ -32,6 +33,7 @@
 	mutex_init(&bus->msg_lock);
 	mutex_init(&bus->bus_lock);
 	INIT_LIST_HEAD(&bus->slaves);
+	INIT_LIST_HEAD(&bus->m_rt_list);
 
 	if (bus->ops->read_prop) {
 		ret = bus->ops->read_prop(bus);
@@ -77,6 +79,21 @@
 		return ret;
 	}
 
+	/*
+	 * Initialize clock values based on Master properties. The max
+	 * frequency is read from max_freq property. Current assumption
+	 * is that the bus will start at highest clock frequency when
+	 * powered on.
+	 *
+	 * Default active bank will be 0 as out of reset the Slaves have
+	 * to start with bank 0 (Table 40 of Spec)
+	 */
+	prop = &bus->prop;
+	bus->params.max_dr_freq = prop->max_freq * SDW_DOUBLE_RATE_FACTOR;
+	bus->params.curr_dr_freq = bus->params.max_dr_freq;
+	bus->params.curr_bank = SDW_BANK0;
+	bus->params.next_bank = SDW_BANK1;
+
 	return 0;
 }
 EXPORT_SYMBOL(sdw_add_bus_master);
@@ -576,6 +593,32 @@
 	mutex_unlock(&slave->bus->bus_lock);
 }
 
+int sdw_configure_dpn_intr(struct sdw_slave *slave,
+			int port, bool enable, int mask)
+{
+	u32 addr;
+	int ret;
+	u8 val = 0;
+
+	addr = SDW_DPN_INTMASK(port);
+
+	/* Set/Clear port ready interrupt mask */
+	if (enable) {
+		val |= mask;
+		val |= SDW_DPN_INT_PORT_READY;
+	} else {
+		val &= ~(mask);
+		val &= ~SDW_DPN_INT_PORT_READY;
+	}
+
+	ret = sdw_update(slave, addr, (mask | SDW_DPN_INT_PORT_READY), val);
+	if (ret < 0)
+		dev_err(slave->bus->dev,
+				"SDW_DPN_INTMASK write failed:%d", val);
+
+	return ret;
+}
+
 static int sdw_initialize_slave(struct sdw_slave *slave)
 {
 	struct sdw_slave_prop *prop = &slave->prop;
diff --git a/drivers/soundwire/bus.h b/drivers/soundwire/bus.h
index 345c34d..3b15c4e 100644
--- a/drivers/soundwire/bus.h
+++ b/drivers/soundwire/bus.h
@@ -45,6 +45,78 @@
 	bool page;
 };
 
+#define SDW_DOUBLE_RATE_FACTOR		2
+
+extern int rows[SDW_FRAME_ROWS];
+extern int cols[SDW_FRAME_COLS];
+
+/**
+ * sdw_port_runtime: Runtime port parameters for Master or Slave
+ *
+ * @num: Port number. For audio streams, valid port number ranges from
+ * [1,14]
+ * @ch_mask: Channel mask
+ * @transport_params: Transport parameters
+ * @port_params: Port parameters
+ * @port_node: List node for Master or Slave port_list
+ *
+ * SoundWire spec has no mention of ports for Master interface but the
+ * concept is logically extended.
+ */
+struct sdw_port_runtime {
+	int num;
+	int ch_mask;
+	struct sdw_transport_params transport_params;
+	struct sdw_port_params port_params;
+	struct list_head port_node;
+};
+
+/**
+ * sdw_slave_runtime: Runtime Stream parameters for Slave
+ *
+ * @slave: Slave handle
+ * @direction: Data direction for Slave
+ * @ch_count: Number of channels handled by the Slave for
+ * this stream
+ * @m_rt_node: sdw_master_runtime list node
+ * @port_list: List of Slave Ports configured for this stream
+ */
+struct sdw_slave_runtime {
+	struct sdw_slave *slave;
+	enum sdw_data_direction direction;
+	unsigned int ch_count;
+	struct list_head m_rt_node;
+	struct list_head port_list;
+};
+
+/**
+ * sdw_master_runtime: Runtime stream parameters for Master
+ *
+ * @bus: Bus handle
+ * @stream: Stream runtime handle
+ * @direction: Data direction for Master
+ * @ch_count: Number of channels handled by the Master for
+ * this stream, can be zero.
+ * @slave_rt_list: Slave runtime list
+ * @port_list: List of Master Ports configured for this stream, can be zero.
+ * @bus_node: sdw_bus m_rt_list node
+ */
+struct sdw_master_runtime {
+	struct sdw_bus *bus;
+	struct sdw_stream_runtime *stream;
+	enum sdw_data_direction direction;
+	unsigned int ch_count;
+	struct list_head slave_rt_list;
+	struct list_head port_list;
+	struct list_head bus_node;
+};
+
+struct sdw_dpn_prop *sdw_get_slave_dpn_prop(struct sdw_slave *slave,
+				enum sdw_data_direction direction,
+				unsigned int port_num);
+int sdw_configure_dpn_intr(struct sdw_slave *slave, int port,
+					bool enable, int mask);
+
 int sdw_transfer(struct sdw_bus *bus, struct sdw_msg *msg);
 int sdw_transfer_defer(struct sdw_bus *bus, struct sdw_msg *msg,
 				struct sdw_defer *defer);
diff --git a/drivers/soundwire/bus_type.c b/drivers/soundwire/bus_type.c
index d5f3a70..283b283 100644
--- a/drivers/soundwire/bus_type.c
+++ b/drivers/soundwire/bus_type.c
@@ -83,17 +83,16 @@
 	 * attach to power domain but don't turn on (last arg)
 	 */
 	ret = dev_pm_domain_attach(dev, false);
-	if (ret != -EPROBE_DEFER) {
-		ret = drv->probe(slave, id);
-		if (ret) {
-			dev_err(dev, "Probe of %s failed: %d\n", drv->name, ret);
-			dev_pm_domain_detach(dev, false);
-		}
-	}
-
 	if (ret)
 		return ret;
 
+	ret = drv->probe(slave, id);
+	if (ret) {
+		dev_err(dev, "Probe of %s failed: %d\n", drv->name, ret);
+		dev_pm_domain_detach(dev, false);
+		return ret;
+	}
+
 	/* device is probed so let's read the properties now */
 	if (slave->ops && slave->ops->read_prop)
 		slave->ops->read_prop(slave);
diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c
index 3a9b146..cb6a331 100644
--- a/drivers/soundwire/cadence_master.c
+++ b/drivers/soundwire/cadence_master.c
@@ -13,6 +13,8 @@
 #include <linux/mod_devicetable.h>
 #include <linux/soundwire/sdw_registers.h>
 #include <linux/soundwire/sdw.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
 #include "bus.h"
 #include "cadence_master.h"
 
@@ -396,7 +398,7 @@
 	return 0;
 }
 
-static enum sdw_command_response
+enum sdw_command_response
 cdns_xfer_msg(struct sdw_bus *bus, struct sdw_msg *msg)
 {
 	struct sdw_cdns *cdns = bus_to_cdns(bus);
@@ -422,8 +424,9 @@
 exit:
 	return ret;
 }
+EXPORT_SYMBOL(cdns_xfer_msg);
 
-static enum sdw_command_response
+enum sdw_command_response
 cdns_xfer_msg_defer(struct sdw_bus *bus,
 		struct sdw_msg *msg, struct sdw_defer *defer)
 {
@@ -443,8 +446,9 @@
 
 	return _cdns_xfer_msg(cdns, msg, cmd, 0, msg->len, true);
 }
+EXPORT_SYMBOL(cdns_xfer_msg_defer);
 
-static enum sdw_command_response
+enum sdw_command_response
 cdns_reset_page_addr(struct sdw_bus *bus, unsigned int dev_num)
 {
 	struct sdw_cdns *cdns = bus_to_cdns(bus);
@@ -456,6 +460,7 @@
 
 	return cdns_program_scp_addr(cdns, &msg);
 }
+EXPORT_SYMBOL(cdns_reset_page_addr);
 
 /*
  * IRQ handling
@@ -666,6 +671,120 @@
 }
 EXPORT_SYMBOL(sdw_cdns_enable_interrupt);
 
+static int cdns_allocate_pdi(struct sdw_cdns *cdns,
+			struct sdw_cdns_pdi **stream,
+			u32 num, u32 pdi_offset)
+{
+	struct sdw_cdns_pdi *pdi;
+	int i;
+
+	if (!num)
+		return 0;
+
+	pdi = devm_kcalloc(cdns->dev, num, sizeof(*pdi), GFP_KERNEL);
+	if (!pdi)
+		return -ENOMEM;
+
+	for (i = 0; i < num; i++) {
+		pdi[i].num = i + pdi_offset;
+		pdi[i].assigned = false;
+	}
+
+	*stream = pdi;
+	return 0;
+}
+
+/**
+ * sdw_cdns_pdi_init() - PDI initialization routine
+ *
+ * @cdns: Cadence instance
+ * @config: Stream configurations
+ */
+int sdw_cdns_pdi_init(struct sdw_cdns *cdns,
+			struct sdw_cdns_stream_config config)
+{
+	struct sdw_cdns_streams *stream;
+	int offset, i, ret;
+
+	cdns->pcm.num_bd = config.pcm_bd;
+	cdns->pcm.num_in = config.pcm_in;
+	cdns->pcm.num_out = config.pcm_out;
+	cdns->pdm.num_bd = config.pdm_bd;
+	cdns->pdm.num_in = config.pdm_in;
+	cdns->pdm.num_out = config.pdm_out;
+
+	/* Allocate PDIs for PCMs */
+	stream = &cdns->pcm;
+
+	/* First two PDIs are reserved for bulk transfers */
+	stream->num_bd -= CDNS_PCM_PDI_OFFSET;
+	offset = CDNS_PCM_PDI_OFFSET;
+
+	ret = cdns_allocate_pdi(cdns, &stream->bd,
+				stream->num_bd, offset);
+	if (ret)
+		return ret;
+
+	offset += stream->num_bd;
+
+	ret = cdns_allocate_pdi(cdns, &stream->in,
+				stream->num_in, offset);
+	if (ret)
+		return ret;
+
+	offset += stream->num_in;
+
+	ret = cdns_allocate_pdi(cdns, &stream->out,
+				stream->num_out, offset);
+	if (ret)
+		return ret;
+
+	/* Update total number of PCM PDIs */
+	stream->num_pdi = stream->num_bd + stream->num_in + stream->num_out;
+	cdns->num_ports = stream->num_pdi;
+
+	/* Allocate PDIs for PDMs */
+	stream = &cdns->pdm;
+	offset = CDNS_PDM_PDI_OFFSET;
+	ret = cdns_allocate_pdi(cdns, &stream->bd,
+				stream->num_bd, offset);
+	if (ret)
+		return ret;
+
+	offset += stream->num_bd;
+
+	ret = cdns_allocate_pdi(cdns, &stream->in,
+				stream->num_in, offset);
+	if (ret)
+		return ret;
+
+	offset += stream->num_in;
+
+	ret = cdns_allocate_pdi(cdns, &stream->out,
+				stream->num_out, offset);
+	if (ret)
+		return ret;
+
+	/* Update total number of PDM PDIs */
+	stream->num_pdi = stream->num_bd + stream->num_in + stream->num_out;
+	cdns->num_ports += stream->num_pdi;
+
+	cdns->ports = devm_kcalloc(cdns->dev, cdns->num_ports,
+				sizeof(*cdns->ports), GFP_KERNEL);
+	if (!cdns->ports) {
+		ret = -ENOMEM;
+		return ret;
+	}
+
+	for (i = 0; i < cdns->num_ports; i++) {
+		cdns->ports[i].assigned = false;
+		cdns->ports[i].num = i + 1; /* Port 0 reserved for bulk */
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(sdw_cdns_pdi_init);
+
 /**
  * sdw_cdns_init() - Cadence initialization
  * @cdns: Cadence instance
@@ -727,13 +846,133 @@
 }
 EXPORT_SYMBOL(sdw_cdns_init);
 
-struct sdw_master_ops sdw_cdns_master_ops = {
-	.read_prop = sdw_master_read_prop,
-	.xfer_msg = cdns_xfer_msg,
-	.xfer_msg_defer = cdns_xfer_msg_defer,
-	.reset_page_addr = cdns_reset_page_addr,
+int cdns_bus_conf(struct sdw_bus *bus, struct sdw_bus_params *params)
+{
+	struct sdw_cdns *cdns = bus_to_cdns(bus);
+	int mcp_clkctrl_off, mcp_clkctrl;
+	int divider;
+
+	if (!params->curr_dr_freq) {
+		dev_err(cdns->dev, "NULL curr_dr_freq");
+		return -EINVAL;
+	}
+
+	divider	= (params->max_dr_freq / params->curr_dr_freq) - 1;
+
+	if (params->next_bank)
+		mcp_clkctrl_off = CDNS_MCP_CLK_CTRL1;
+	else
+		mcp_clkctrl_off = CDNS_MCP_CLK_CTRL0;
+
+	mcp_clkctrl = cdns_readl(cdns, mcp_clkctrl_off);
+	mcp_clkctrl |= divider;
+	cdns_writel(cdns, mcp_clkctrl_off, mcp_clkctrl);
+
+	return 0;
+}
+EXPORT_SYMBOL(cdns_bus_conf);
+
+static int cdns_port_params(struct sdw_bus *bus,
+		struct sdw_port_params *p_params, unsigned int bank)
+{
+	struct sdw_cdns *cdns = bus_to_cdns(bus);
+	int dpn_config = 0, dpn_config_off;
+
+	if (bank)
+		dpn_config_off = CDNS_DPN_B1_CONFIG(p_params->num);
+	else
+		dpn_config_off = CDNS_DPN_B0_CONFIG(p_params->num);
+
+	dpn_config = cdns_readl(cdns, dpn_config_off);
+
+	dpn_config |= ((p_params->bps - 1) <<
+				SDW_REG_SHIFT(CDNS_DPN_CONFIG_WL));
+	dpn_config |= (p_params->flow_mode <<
+				SDW_REG_SHIFT(CDNS_DPN_CONFIG_PORT_FLOW));
+	dpn_config |= (p_params->data_mode <<
+				SDW_REG_SHIFT(CDNS_DPN_CONFIG_PORT_DAT));
+
+	cdns_writel(cdns, dpn_config_off, dpn_config);
+
+	return 0;
+}
+
+static int cdns_transport_params(struct sdw_bus *bus,
+			struct sdw_transport_params *t_params,
+			enum sdw_reg_bank bank)
+{
+	struct sdw_cdns *cdns = bus_to_cdns(bus);
+	int dpn_offsetctrl = 0, dpn_offsetctrl_off;
+	int dpn_config = 0, dpn_config_off;
+	int dpn_hctrl = 0, dpn_hctrl_off;
+	int num = t_params->port_num;
+	int dpn_samplectrl_off;
+
+	/*
+	 * Note: Only full data port is supported on the Master side for
+	 * both PCM and PDM ports.
+	 */
+
+	if (bank) {
+		dpn_config_off = CDNS_DPN_B1_CONFIG(num);
+		dpn_samplectrl_off = CDNS_DPN_B1_SAMPLE_CTRL(num);
+		dpn_hctrl_off = CDNS_DPN_B1_HCTRL(num);
+		dpn_offsetctrl_off = CDNS_DPN_B1_OFFSET_CTRL(num);
+	} else {
+		dpn_config_off = CDNS_DPN_B0_CONFIG(num);
+		dpn_samplectrl_off = CDNS_DPN_B0_SAMPLE_CTRL(num);
+		dpn_hctrl_off = CDNS_DPN_B0_HCTRL(num);
+		dpn_offsetctrl_off = CDNS_DPN_B0_OFFSET_CTRL(num);
+	}
+
+	dpn_config = cdns_readl(cdns, dpn_config_off);
+
+	dpn_config |= (t_params->blk_grp_ctrl <<
+				SDW_REG_SHIFT(CDNS_DPN_CONFIG_BGC));
+	dpn_config |= (t_params->blk_pkg_mode <<
+				SDW_REG_SHIFT(CDNS_DPN_CONFIG_BPM));
+	cdns_writel(cdns, dpn_config_off, dpn_config);
+
+	dpn_offsetctrl |= (t_params->offset1 <<
+				SDW_REG_SHIFT(CDNS_DPN_OFFSET_CTRL_1));
+	dpn_offsetctrl |= (t_params->offset2 <<
+				SDW_REG_SHIFT(CDNS_DPN_OFFSET_CTRL_2));
+	cdns_writel(cdns, dpn_offsetctrl_off,  dpn_offsetctrl);
+
+	dpn_hctrl |= (t_params->hstart <<
+				SDW_REG_SHIFT(CDNS_DPN_HCTRL_HSTART));
+	dpn_hctrl |= (t_params->hstop << SDW_REG_SHIFT(CDNS_DPN_HCTRL_HSTOP));
+	dpn_hctrl |= (t_params->lane_ctrl <<
+				SDW_REG_SHIFT(CDNS_DPN_HCTRL_LCTRL));
+
+	cdns_writel(cdns, dpn_hctrl_off, dpn_hctrl);
+	cdns_writel(cdns, dpn_samplectrl_off, (t_params->sample_interval - 1));
+
+	return 0;
+}
+
+static int cdns_port_enable(struct sdw_bus *bus,
+		struct sdw_enable_ch *enable_ch, unsigned int bank)
+{
+	struct sdw_cdns *cdns = bus_to_cdns(bus);
+	int dpn_chnen_off, ch_mask;
+
+	if (bank)
+		dpn_chnen_off = CDNS_DPN_B1_CH_EN(enable_ch->port_num);
+	else
+		dpn_chnen_off = CDNS_DPN_B0_CH_EN(enable_ch->port_num);
+
+	ch_mask = enable_ch->ch_mask * enable_ch->enable;
+	cdns_writel(cdns, dpn_chnen_off, ch_mask);
+
+	return 0;
+}
+
+static const struct sdw_master_port_ops cdns_port_ops = {
+	.dpn_set_port_params = cdns_port_params,
+	.dpn_set_port_transport_params = cdns_transport_params,
+	.dpn_port_enable_ch = cdns_port_enable,
 };
-EXPORT_SYMBOL(sdw_cdns_master_ops);
 
 /**
  * sdw_cdns_probe() - Cadence probe routine
@@ -742,10 +981,204 @@
 int sdw_cdns_probe(struct sdw_cdns *cdns)
 {
 	init_completion(&cdns->tx_complete);
+	cdns->bus.port_ops = &cdns_port_ops;
 
 	return 0;
 }
 EXPORT_SYMBOL(sdw_cdns_probe);
 
+int cdns_set_sdw_stream(struct snd_soc_dai *dai,
+		void *stream, bool pcm, int direction)
+{
+	struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai);
+	struct sdw_cdns_dma_data *dma;
+
+	dma = kzalloc(sizeof(*dma), GFP_KERNEL);
+	if (!dma)
+		return -ENOMEM;
+
+	if (pcm)
+		dma->stream_type = SDW_STREAM_PCM;
+	else
+		dma->stream_type = SDW_STREAM_PDM;
+
+	dma->bus = &cdns->bus;
+	dma->link_id = cdns->instance;
+
+	dma->stream = stream;
+
+	if (direction == SNDRV_PCM_STREAM_PLAYBACK)
+		dai->playback_dma_data = dma;
+	else
+		dai->capture_dma_data = dma;
+
+	return 0;
+}
+EXPORT_SYMBOL(cdns_set_sdw_stream);
+
+/**
+ * cdns_find_pdi() - Find a free PDI
+ *
+ * @cdns: Cadence instance
+ * @num: Number of PDIs
+ * @pdi: PDI instances
+ *
+ * Find and return a free PDI for a given PDI array
+ */
+static struct sdw_cdns_pdi *cdns_find_pdi(struct sdw_cdns *cdns,
+		unsigned int num, struct sdw_cdns_pdi *pdi)
+{
+	int i;
+
+	for (i = 0; i < num; i++) {
+		if (pdi[i].assigned == true)
+			continue;
+		pdi[i].assigned = true;
+		return &pdi[i];
+	}
+
+	return NULL;
+}
+
+/**
+ * sdw_cdns_config_stream: Configure a stream
+ *
+ * @cdns: Cadence instance
+ * @port: Cadence data port
+ * @ch: Channel count
+ * @dir: Data direction
+ * @pdi: PDI to be used
+ */
+void sdw_cdns_config_stream(struct sdw_cdns *cdns,
+				struct sdw_cdns_port *port,
+				u32 ch, u32 dir, struct sdw_cdns_pdi *pdi)
+{
+	u32 offset, val = 0;
+
+	if (dir == SDW_DATA_DIR_RX)
+		val = CDNS_PORTCTRL_DIRN;
+
+	offset = CDNS_PORTCTRL + port->num * CDNS_PORT_OFFSET;
+	cdns_updatel(cdns, offset, CDNS_PORTCTRL_DIRN, val);
+
+	val = port->num;
+	val |= ((1 << ch) - 1) << SDW_REG_SHIFT(CDNS_PDI_CONFIG_CHANNEL);
+	cdns_writel(cdns, CDNS_PDI_CONFIG(pdi->num), val);
+}
+EXPORT_SYMBOL(sdw_cdns_config_stream);
+
+/**
+ * cdns_get_num_pdi() - Get number of PDIs required
+ *
+ * @cdns: Cadence instance
+ * @pdi: PDI to be used
+ * @num: Number of PDIs
+ * @ch_count: Channel count
+ */
+static int cdns_get_num_pdi(struct sdw_cdns *cdns,
+		struct sdw_cdns_pdi *pdi,
+		unsigned int num, u32 ch_count)
+{
+	int i, pdis = 0;
+
+	for (i = 0; i < num; i++) {
+		if (pdi[i].assigned == true)
+			continue;
+
+		if (pdi[i].ch_count < ch_count)
+			ch_count -= pdi[i].ch_count;
+		else
+			ch_count = 0;
+
+		pdis++;
+
+		if (!ch_count)
+			break;
+	}
+
+	if (ch_count)
+		return 0;
+
+	return pdis;
+}
+
+/**
+ * sdw_cdns_get_stream() - Get stream information
+ *
+ * @cdns: Cadence instance
+ * @stream: Stream to be allocated
+ * @ch: Channel count
+ * @dir: Data direction
+ */
+int sdw_cdns_get_stream(struct sdw_cdns *cdns,
+			struct sdw_cdns_streams *stream,
+			u32 ch, u32 dir)
+{
+	int pdis = 0;
+
+	if (dir == SDW_DATA_DIR_RX)
+		pdis = cdns_get_num_pdi(cdns, stream->in, stream->num_in, ch);
+	else
+		pdis = cdns_get_num_pdi(cdns, stream->out, stream->num_out, ch);
+
+	/* check if we found PDI, else find in bi-directional */
+	if (!pdis)
+		pdis = cdns_get_num_pdi(cdns, stream->bd, stream->num_bd, ch);
+
+	return pdis;
+}
+EXPORT_SYMBOL(sdw_cdns_get_stream);
+
+/**
+ * sdw_cdns_alloc_stream() - Allocate a stream
+ *
+ * @cdns: Cadence instance
+ * @stream: Stream to be allocated
+ * @port: Cadence data port
+ * @ch: Channel count
+ * @dir: Data direction
+ */
+int sdw_cdns_alloc_stream(struct sdw_cdns *cdns,
+			struct sdw_cdns_streams *stream,
+			struct sdw_cdns_port *port, u32 ch, u32 dir)
+{
+	struct sdw_cdns_pdi *pdi = NULL;
+
+	if (dir == SDW_DATA_DIR_RX)
+		pdi = cdns_find_pdi(cdns, stream->num_in, stream->in);
+	else
+		pdi = cdns_find_pdi(cdns, stream->num_out, stream->out);
+
+	/* check if we found a PDI, else find in bi-directional */
+	if (!pdi)
+		pdi = cdns_find_pdi(cdns, stream->num_bd, stream->bd);
+
+	if (!pdi)
+		return -EIO;
+
+	port->pdi = pdi;
+	pdi->l_ch_num = 0;
+	pdi->h_ch_num = ch - 1;
+	pdi->dir = dir;
+	pdi->ch_count = ch;
+
+	return 0;
+}
+EXPORT_SYMBOL(sdw_cdns_alloc_stream);
+
+void sdw_cdns_shutdown(struct snd_pcm_substream *substream,
+					struct snd_soc_dai *dai)
+{
+	struct sdw_cdns_dma_data *dma;
+
+	dma = snd_soc_dai_get_dma_data(dai, substream);
+	if (!dma)
+		return;
+
+	snd_soc_dai_set_dma_data(dai, substream, NULL);
+	kfree(dma);
+}
+EXPORT_SYMBOL(sdw_cdns_shutdown);
+
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("Cadence Soundwire Library");
diff --git a/drivers/soundwire/cadence_master.h b/drivers/soundwire/cadence_master.h
index beaf6c9..eb902b1 100644
--- a/drivers/soundwire/cadence_master.h
+++ b/drivers/soundwire/cadence_master.h
@@ -1,10 +1,117 @@
 // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
 // Copyright(c) 2015-17 Intel Corporation.
+#include <sound/soc.h>
 
 #ifndef __SDW_CADENCE_H
 #define __SDW_CADENCE_H
 
 /**
+ * struct sdw_cdns_pdi: PDI (Physical Data Interface) instance
+ *
+ * @assigned: pdi assigned
+ * @num: pdi number
+ * @intel_alh_id: link identifier
+ * @l_ch_num: low channel for PDI
+ * @h_ch_num: high channel for PDI
+ * @ch_count: total channel count for PDI
+ * @dir: data direction
+ * @type: stream type, PDM or PCM
+ */
+struct sdw_cdns_pdi {
+	bool assigned;
+	int num;
+	int intel_alh_id;
+	int l_ch_num;
+	int h_ch_num;
+	int ch_count;
+	enum sdw_data_direction dir;
+	enum sdw_stream_type type;
+};
+
+/**
+ * struct sdw_cdns_port: Cadence port structure
+ *
+ * @num: port number
+ * @assigned: port assigned
+ * @ch: channel count
+ * @direction: data port direction
+ * @pdi: pdi for this port
+ */
+struct sdw_cdns_port {
+	unsigned int num;
+	bool assigned;
+	unsigned int ch;
+	enum sdw_data_direction direction;
+	struct sdw_cdns_pdi *pdi;
+};
+
+/**
+ * struct sdw_cdns_streams: Cadence stream data structure
+ *
+ * @num_bd: number of bidirectional streams
+ * @num_in: number of input streams
+ * @num_out: number of output streams
+ * @num_ch_bd: number of bidirectional stream channels
+ * @num_ch_bd: number of input stream channels
+ * @num_ch_bd: number of output stream channels
+ * @num_pdi: total number of PDIs
+ * @bd: bidirectional streams
+ * @in: input streams
+ * @out: output streams
+ */
+struct sdw_cdns_streams {
+	unsigned int num_bd;
+	unsigned int num_in;
+	unsigned int num_out;
+	unsigned int num_ch_bd;
+	unsigned int num_ch_in;
+	unsigned int num_ch_out;
+	unsigned int num_pdi;
+	struct sdw_cdns_pdi *bd;
+	struct sdw_cdns_pdi *in;
+	struct sdw_cdns_pdi *out;
+};
+
+/**
+ * struct sdw_cdns_stream_config: stream configuration
+ *
+ * @pcm_bd: number of bidirectional PCM streams supported
+ * @pcm_in: number of input PCM streams supported
+ * @pcm_out: number of output PCM streams supported
+ * @pdm_bd: number of bidirectional PDM streams supported
+ * @pdm_in: number of input PDM streams supported
+ * @pdm_out: number of output PDM streams supported
+ */
+struct sdw_cdns_stream_config {
+	unsigned int pcm_bd;
+	unsigned int pcm_in;
+	unsigned int pcm_out;
+	unsigned int pdm_bd;
+	unsigned int pdm_in;
+	unsigned int pdm_out;
+};
+
+/**
+ * struct sdw_cdns_dma_data: Cadence DMA data
+ *
+ * @name: SoundWire stream name
+ * @nr_ports: Number of ports
+ * @port: Ports
+ * @bus: Bus handle
+ * @stream_type: Stream type
+ * @link_id: Master link id
+ */
+struct sdw_cdns_dma_data {
+	char *name;
+	struct sdw_stream_runtime *stream;
+	int nr_ports;
+	struct sdw_cdns_port **port;
+	struct sdw_bus *bus;
+	enum sdw_stream_type stream_type;
+	int link_id;
+};
+
+/**
  * struct sdw_cdns - Cadence driver context
  * @dev: Linux device
  * @bus: Bus handle
@@ -12,6 +119,10 @@
  * @response_buf: SoundWire response buffer
  * @tx_complete: Tx completion
  * @defer: Defer pointer
+ * @ports: Data ports
+ * @num_ports: Total number of data ports
+ * @pcm: PCM streams
+ * @pdm: PDM streams
  * @registers: Cadence registers
  * @link_up: Link status
  * @msg_count: Messages sent on bus
@@ -25,6 +136,12 @@
 	struct completion tx_complete;
 	struct sdw_defer *defer;
 
+	struct sdw_cdns_port *ports;
+	int num_ports;
+
+	struct sdw_cdns_streams pcm;
+	struct sdw_cdns_streams pdm;
+
 	void __iomem *registers;
 
 	bool link_up;
@@ -42,7 +159,41 @@
 irqreturn_t sdw_cdns_thread(int irq, void *dev_id);
 
 int sdw_cdns_init(struct sdw_cdns *cdns);
+int sdw_cdns_pdi_init(struct sdw_cdns *cdns,
+			struct sdw_cdns_stream_config config);
 int sdw_cdns_enable_interrupt(struct sdw_cdns *cdns);
 
+int sdw_cdns_get_stream(struct sdw_cdns *cdns,
+			struct sdw_cdns_streams *stream,
+			u32 ch, u32 dir);
+int sdw_cdns_alloc_stream(struct sdw_cdns *cdns,
+			struct sdw_cdns_streams *stream,
+			struct sdw_cdns_port *port, u32 ch, u32 dir);
+void sdw_cdns_config_stream(struct sdw_cdns *cdns, struct sdw_cdns_port *port,
+			u32 ch, u32 dir, struct sdw_cdns_pdi *pdi);
 
+void sdw_cdns_shutdown(struct snd_pcm_substream *substream,
+				struct snd_soc_dai *dai);
+int sdw_cdns_pcm_set_stream(struct snd_soc_dai *dai,
+				void *stream, int direction);
+int sdw_cdns_pdm_set_stream(struct snd_soc_dai *dai,
+				void *stream, int direction);
+
+enum sdw_command_response
+cdns_reset_page_addr(struct sdw_bus *bus, unsigned int dev_num);
+
+enum sdw_command_response
+cdns_xfer_msg(struct sdw_bus *bus, struct sdw_msg *msg);
+
+enum sdw_command_response
+cdns_xfer_msg_defer(struct sdw_bus *bus,
+		struct sdw_msg *msg, struct sdw_defer *defer);
+
+enum sdw_command_response
+cdns_reset_page_addr(struct sdw_bus *bus, unsigned int dev_num);
+
+int cdns_bus_conf(struct sdw_bus *bus, struct sdw_bus_params *params);
+
+int cdns_set_sdw_stream(struct snd_soc_dai *dai,
+		void *stream, bool pcm, int direction);
 #endif /* __SDW_CADENCE_H */
diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index 86a7bd1..0a8990e 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -9,6 +9,8 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
 #include <linux/soundwire/sdw_registers.h>
 #include <linux/soundwire/sdw.h>
 #include <linux/soundwire/sdw_intel.h>
@@ -85,6 +87,12 @@
 #define SDW_ALH_STRMZCFG_DMAT		GENMASK(7, 0)
 #define SDW_ALH_STRMZCFG_CHN		GENMASK(19, 16)
 
+enum intel_pdi_type {
+	INTEL_PDI_IN = 0,
+	INTEL_PDI_OUT = 1,
+	INTEL_PDI_BD = 2,
+};
+
 struct sdw_intel {
 	struct sdw_cdns cdns;
 	int instance;
@@ -234,6 +242,490 @@
 	return ret;
 }
 
+/*
+ * PDI routines
+ */
+static void intel_pdi_init(struct sdw_intel *sdw,
+			struct sdw_cdns_stream_config *config)
+{
+	void __iomem *shim = sdw->res->shim;
+	unsigned int link_id = sdw->instance;
+	int pcm_cap, pdm_cap;
+
+	/* PCM Stream Capability */
+	pcm_cap = intel_readw(shim, SDW_SHIM_PCMSCAP(link_id));
+
+	config->pcm_bd = (pcm_cap & SDW_SHIM_PCMSCAP_BSS) >>
+					SDW_REG_SHIFT(SDW_SHIM_PCMSCAP_BSS);
+	config->pcm_in = (pcm_cap & SDW_SHIM_PCMSCAP_ISS) >>
+					SDW_REG_SHIFT(SDW_SHIM_PCMSCAP_ISS);
+	config->pcm_out = (pcm_cap & SDW_SHIM_PCMSCAP_OSS) >>
+					SDW_REG_SHIFT(SDW_SHIM_PCMSCAP_OSS);
+
+	/* PDM Stream Capability */
+	pdm_cap = intel_readw(shim, SDW_SHIM_PDMSCAP(link_id));
+
+	config->pdm_bd = (pdm_cap & SDW_SHIM_PDMSCAP_BSS) >>
+					SDW_REG_SHIFT(SDW_SHIM_PDMSCAP_BSS);
+	config->pdm_in = (pdm_cap & SDW_SHIM_PDMSCAP_ISS) >>
+					SDW_REG_SHIFT(SDW_SHIM_PDMSCAP_ISS);
+	config->pdm_out = (pdm_cap & SDW_SHIM_PDMSCAP_OSS) >>
+					SDW_REG_SHIFT(SDW_SHIM_PDMSCAP_OSS);
+}
+
+static int
+intel_pdi_get_ch_cap(struct sdw_intel *sdw, unsigned int pdi_num, bool pcm)
+{
+	void __iomem *shim = sdw->res->shim;
+	unsigned int link_id = sdw->instance;
+	int count;
+
+	if (pcm) {
+		count = intel_readw(shim, SDW_SHIM_PCMSYCHC(link_id, pdi_num));
+	} else {
+		count = intel_readw(shim, SDW_SHIM_PDMSCAP(link_id));
+		count = ((count & SDW_SHIM_PDMSCAP_CPSS) >>
+					SDW_REG_SHIFT(SDW_SHIM_PDMSCAP_CPSS));
+	}
+
+	/* zero based values for channel count in register */
+	count++;
+
+	return count;
+}
+
+static int intel_pdi_get_ch_update(struct sdw_intel *sdw,
+				struct sdw_cdns_pdi *pdi,
+				unsigned int num_pdi,
+				unsigned int *num_ch, bool pcm)
+{
+	int i, ch_count = 0;
+
+	for (i = 0; i < num_pdi; i++) {
+		pdi->ch_count = intel_pdi_get_ch_cap(sdw, pdi->num, pcm);
+		ch_count += pdi->ch_count;
+		pdi++;
+	}
+
+	*num_ch = ch_count;
+	return 0;
+}
+
+static int intel_pdi_stream_ch_update(struct sdw_intel *sdw,
+				struct sdw_cdns_streams *stream, bool pcm)
+{
+	intel_pdi_get_ch_update(sdw, stream->bd, stream->num_bd,
+			&stream->num_ch_bd, pcm);
+
+	intel_pdi_get_ch_update(sdw, stream->in, stream->num_in,
+			&stream->num_ch_in, pcm);
+
+	intel_pdi_get_ch_update(sdw, stream->out, stream->num_out,
+			&stream->num_ch_out, pcm);
+
+	return 0;
+}
+
+static int intel_pdi_ch_update(struct sdw_intel *sdw)
+{
+	/* First update PCM streams followed by PDM streams */
+	intel_pdi_stream_ch_update(sdw, &sdw->cdns.pcm, true);
+	intel_pdi_stream_ch_update(sdw, &sdw->cdns.pdm, false);
+
+	return 0;
+}
+
+static void
+intel_pdi_shim_configure(struct sdw_intel *sdw, struct sdw_cdns_pdi *pdi)
+{
+	void __iomem *shim = sdw->res->shim;
+	unsigned int link_id = sdw->instance;
+	int pdi_conf = 0;
+
+	pdi->intel_alh_id = (link_id * 16) + pdi->num + 5;
+
+	/*
+	 * Program stream parameters to stream SHIM register
+	 * This is applicable for PCM stream only.
+	 */
+	if (pdi->type != SDW_STREAM_PCM)
+		return;
+
+	if (pdi->dir == SDW_DATA_DIR_RX)
+		pdi_conf |= SDW_SHIM_PCMSYCM_DIR;
+	else
+		pdi_conf &= ~(SDW_SHIM_PCMSYCM_DIR);
+
+	pdi_conf |= (pdi->intel_alh_id <<
+			SDW_REG_SHIFT(SDW_SHIM_PCMSYCM_STREAM));
+	pdi_conf |= (pdi->l_ch_num << SDW_REG_SHIFT(SDW_SHIM_PCMSYCM_LCHN));
+	pdi_conf |= (pdi->h_ch_num << SDW_REG_SHIFT(SDW_SHIM_PCMSYCM_HCHN));
+
+	intel_writew(shim, SDW_SHIM_PCMSYCHM(link_id, pdi->num), pdi_conf);
+}
+
+static void
+intel_pdi_alh_configure(struct sdw_intel *sdw, struct sdw_cdns_pdi *pdi)
+{
+	void __iomem *alh = sdw->res->alh;
+	unsigned int link_id = sdw->instance;
+	unsigned int conf;
+
+	pdi->intel_alh_id = (link_id * 16) + pdi->num + 5;
+
+	/* Program Stream config ALH register */
+	conf = intel_readl(alh, SDW_ALH_STRMZCFG(pdi->intel_alh_id));
+
+	conf |= (SDW_ALH_STRMZCFG_DMAT_VAL <<
+			SDW_REG_SHIFT(SDW_ALH_STRMZCFG_DMAT));
+
+	conf |= ((pdi->ch_count - 1) <<
+			SDW_REG_SHIFT(SDW_ALH_STRMZCFG_CHN));
+
+	intel_writel(alh, SDW_ALH_STRMZCFG(pdi->intel_alh_id), conf);
+}
+
+static int intel_config_stream(struct sdw_intel *sdw,
+			struct snd_pcm_substream *substream,
+			struct snd_soc_dai *dai,
+			struct snd_pcm_hw_params *hw_params, int link_id)
+{
+	if (sdw->res->ops && sdw->res->ops->config_stream)
+		return sdw->res->ops->config_stream(sdw->res->arg,
+				substream, dai, hw_params, link_id);
+
+	return -EIO;
+}
+
+/*
+ * DAI routines
+ */
+
+static struct sdw_cdns_port *intel_alloc_port(struct sdw_intel *sdw,
+				u32 ch, u32 dir, bool pcm)
+{
+	struct sdw_cdns *cdns = &sdw->cdns;
+	struct sdw_cdns_port *port = NULL;
+	int i, ret = 0;
+
+	for (i = 0; i < cdns->num_ports; i++) {
+		if (cdns->ports[i].assigned == true)
+			continue;
+
+		port = &cdns->ports[i];
+		port->assigned = true;
+		port->direction = dir;
+		port->ch = ch;
+		break;
+	}
+
+	if (!port) {
+		dev_err(cdns->dev, "Unable to find a free port\n");
+		return NULL;
+	}
+
+	if (pcm) {
+		ret = sdw_cdns_alloc_stream(cdns, &cdns->pcm, port, ch, dir);
+		if (ret)
+			goto out;
+
+		intel_pdi_shim_configure(sdw, port->pdi);
+		sdw_cdns_config_stream(cdns, port, ch, dir, port->pdi);
+
+		intel_pdi_alh_configure(sdw, port->pdi);
+
+	} else {
+		ret = sdw_cdns_alloc_stream(cdns, &cdns->pdm, port, ch, dir);
+	}
+
+out:
+	if (ret) {
+		port->assigned = false;
+		port = NULL;
+	}
+
+	return port;
+}
+
+static void intel_port_cleanup(struct sdw_cdns_dma_data *dma)
+{
+	int i;
+
+	for (i = 0; i < dma->nr_ports; i++) {
+		if (dma->port[i]) {
+			dma->port[i]->pdi->assigned = false;
+			dma->port[i]->pdi = NULL;
+			dma->port[i]->assigned = false;
+			dma->port[i] = NULL;
+		}
+	}
+}
+
+static int intel_hw_params(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
+{
+	struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+	struct sdw_cdns_dma_data *dma;
+	struct sdw_stream_config sconfig;
+	struct sdw_port_config *pconfig;
+	int ret, i, ch, dir;
+	bool pcm = true;
+
+	dma = snd_soc_dai_get_dma_data(dai, substream);
+	if (!dma)
+		return -EIO;
+
+	ch = params_channels(params);
+	if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+		dir = SDW_DATA_DIR_RX;
+	else
+		dir = SDW_DATA_DIR_TX;
+
+	if (dma->stream_type == SDW_STREAM_PDM) {
+		/* TODO: Check whether PDM decimator is already in use */
+		dma->nr_ports = sdw_cdns_get_stream(cdns, &cdns->pdm, ch, dir);
+		pcm = false;
+	} else {
+		dma->nr_ports = sdw_cdns_get_stream(cdns, &cdns->pcm, ch, dir);
+	}
+
+	if (!dma->nr_ports) {
+		dev_err(dai->dev, "ports/resources not available");
+		return -EINVAL;
+	}
+
+	dma->port = kcalloc(dma->nr_ports, sizeof(*dma->port), GFP_KERNEL);
+	if (!dma->port)
+		return -ENOMEM;
+
+	for (i = 0; i < dma->nr_ports; i++) {
+		dma->port[i] = intel_alloc_port(sdw, ch, dir, pcm);
+		if (!dma->port[i]) {
+			ret = -EINVAL;
+			goto port_error;
+		}
+	}
+
+	/* Inform DSP about PDI stream number */
+	for (i = 0; i < dma->nr_ports; i++) {
+		ret = intel_config_stream(sdw, substream, dai, params,
+				dma->port[i]->pdi->intel_alh_id);
+		if (ret)
+			goto port_error;
+	}
+
+	sconfig.direction = dir;
+	sconfig.ch_count = ch;
+	sconfig.frame_rate = params_rate(params);
+	sconfig.type = dma->stream_type;
+
+	if (dma->stream_type == SDW_STREAM_PDM) {
+		sconfig.frame_rate *= 50;
+		sconfig.bps = 1;
+	} else {
+		sconfig.bps = snd_pcm_format_width(params_format(params));
+	}
+
+	/* Port configuration */
+	pconfig = kcalloc(dma->nr_ports, sizeof(*pconfig), GFP_KERNEL);
+	if (!pconfig) {
+		ret =  -ENOMEM;
+		goto port_error;
+	}
+
+	for (i = 0; i < dma->nr_ports; i++) {
+		pconfig[i].num = dma->port[i]->num;
+		pconfig[i].ch_mask = (1 << ch) - 1;
+	}
+
+	ret = sdw_stream_add_master(&cdns->bus, &sconfig,
+				pconfig, dma->nr_ports, dma->stream);
+	if (ret) {
+		dev_err(cdns->dev, "add master to stream failed:%d", ret);
+		goto stream_error;
+	}
+
+	kfree(pconfig);
+	return ret;
+
+stream_error:
+	kfree(pconfig);
+port_error:
+	intel_port_cleanup(dma);
+	kfree(dma->port);
+	return ret;
+}
+
+static int
+intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
+{
+	struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai);
+	struct sdw_cdns_dma_data *dma;
+	int ret;
+
+	dma = snd_soc_dai_get_dma_data(dai, substream);
+	if (!dma)
+		return -EIO;
+
+	ret = sdw_stream_remove_master(&cdns->bus, dma->stream);
+	if (ret < 0)
+		dev_err(dai->dev, "remove master from stream %s failed: %d",
+							dma->stream->name, ret);
+
+	intel_port_cleanup(dma);
+	kfree(dma->port);
+	return ret;
+}
+
+static int intel_pcm_set_sdw_stream(struct snd_soc_dai *dai,
+					void *stream, int direction)
+{
+	return cdns_set_sdw_stream(dai, stream, true, direction);
+}
+
+static int intel_pdm_set_sdw_stream(struct snd_soc_dai *dai,
+					void *stream, int direction)
+{
+	return cdns_set_sdw_stream(dai, stream, false, direction);
+}
+
+static struct snd_soc_dai_ops intel_pcm_dai_ops = {
+	.hw_params = intel_hw_params,
+	.hw_free = intel_hw_free,
+	.shutdown = sdw_cdns_shutdown,
+	.set_sdw_stream = intel_pcm_set_sdw_stream,
+};
+
+static struct snd_soc_dai_ops intel_pdm_dai_ops = {
+	.hw_params = intel_hw_params,
+	.hw_free = intel_hw_free,
+	.shutdown = sdw_cdns_shutdown,
+	.set_sdw_stream = intel_pdm_set_sdw_stream,
+};
+
+static const struct snd_soc_component_driver dai_component = {
+	.name           = "soundwire",
+};
+
+static int intel_create_dai(struct sdw_cdns *cdns,
+			struct snd_soc_dai_driver *dais,
+			enum intel_pdi_type type,
+			u32 num, u32 off, u32 max_ch, bool pcm)
+{
+	int i;
+
+	if (num == 0)
+		return 0;
+
+	 /* TODO: Read supported rates/formats from hardware */
+	for (i = off; i < (off + num); i++) {
+		dais[i].name = kasprintf(GFP_KERNEL, "SDW%d Pin%d",
+					cdns->instance, i);
+		if (!dais[i].name)
+			return -ENOMEM;
+
+		if (type == INTEL_PDI_BD || type == INTEL_PDI_OUT) {
+			dais[i].playback.stream_name = kasprintf(GFP_KERNEL,
+							"SDW%d Tx%d",
+							cdns->instance, i);
+			if (!dais[i].playback.stream_name) {
+				kfree(dais[i].name);
+				return -ENOMEM;
+			}
+
+			dais[i].playback.channels_min = 1;
+			dais[i].playback.channels_max = max_ch;
+			dais[i].playback.rates = SNDRV_PCM_RATE_48000;
+			dais[i].playback.formats = SNDRV_PCM_FMTBIT_S16_LE;
+		}
+
+		if (type == INTEL_PDI_BD || type == INTEL_PDI_IN) {
+			dais[i].capture.stream_name = kasprintf(GFP_KERNEL,
+							"SDW%d Rx%d",
+							cdns->instance, i);
+			if (!dais[i].capture.stream_name) {
+				kfree(dais[i].name);
+				kfree(dais[i].playback.stream_name);
+				return -ENOMEM;
+			}
+
+			dais[i].playback.channels_min = 1;
+			dais[i].playback.channels_max = max_ch;
+			dais[i].capture.rates = SNDRV_PCM_RATE_48000;
+			dais[i].capture.formats = SNDRV_PCM_FMTBIT_S16_LE;
+		}
+
+		dais[i].id = SDW_DAI_ID_RANGE_START + i;
+
+		if (pcm)
+			dais[i].ops = &intel_pcm_dai_ops;
+		else
+			dais[i].ops = &intel_pdm_dai_ops;
+	}
+
+	return 0;
+}
+
+static int intel_register_dai(struct sdw_intel *sdw)
+{
+	struct sdw_cdns *cdns = &sdw->cdns;
+	struct sdw_cdns_streams *stream;
+	struct snd_soc_dai_driver *dais;
+	int num_dai, ret, off = 0;
+
+	/* DAIs are created based on total number of PDIs supported */
+	num_dai = cdns->pcm.num_pdi + cdns->pdm.num_pdi;
+
+	dais = devm_kcalloc(cdns->dev, num_dai, sizeof(*dais), GFP_KERNEL);
+	if (!dais)
+		return -ENOMEM;
+
+	/* Create PCM DAIs */
+	stream = &cdns->pcm;
+
+	ret = intel_create_dai(cdns, dais, INTEL_PDI_IN,
+			stream->num_in, off, stream->num_ch_in, true);
+	if (ret)
+		return ret;
+
+	off += cdns->pcm.num_in;
+	ret = intel_create_dai(cdns, dais, INTEL_PDI_OUT,
+			cdns->pcm.num_out, off, stream->num_ch_out, true);
+	if (ret)
+		return ret;
+
+	off += cdns->pcm.num_out;
+	ret = intel_create_dai(cdns, dais, INTEL_PDI_BD,
+			cdns->pcm.num_bd, off, stream->num_ch_bd, true);
+	if (ret)
+		return ret;
+
+	/* Create PDM DAIs */
+	stream = &cdns->pdm;
+	off += cdns->pcm.num_bd;
+	ret = intel_create_dai(cdns, dais, INTEL_PDI_IN,
+			cdns->pdm.num_in, off, stream->num_ch_in, false);
+	if (ret)
+		return ret;
+
+	off += cdns->pdm.num_in;
+	ret = intel_create_dai(cdns, dais, INTEL_PDI_OUT,
+			cdns->pdm.num_out, off, stream->num_ch_out, false);
+	if (ret)
+		return ret;
+
+	off += cdns->pdm.num_bd;
+	ret = intel_create_dai(cdns, dais, INTEL_PDI_BD,
+			cdns->pdm.num_bd, off, stream->num_ch_bd, false);
+	if (ret)
+		return ret;
+
+	return snd_soc_register_component(cdns->dev, &dai_component,
+				dais, num_dai);
+}
+
 static int intel_prop_read(struct sdw_bus *bus)
 {
 	/* Initialize with default handler to read all DisCo properties */
@@ -252,11 +744,20 @@
 	return 0;
 }
 
+static struct sdw_master_ops sdw_intel_ops = {
+	.read_prop = sdw_master_read_prop,
+	.xfer_msg = cdns_xfer_msg,
+	.xfer_msg_defer = cdns_xfer_msg_defer,
+	.reset_page_addr = cdns_reset_page_addr,
+	.set_bus_conf = cdns_bus_conf,
+};
+
 /*
  * probe and init
  */
 static int intel_probe(struct platform_device *pdev)
 {
+	struct sdw_cdns_stream_config config;
 	struct sdw_intel *sdw;
 	int ret;
 
@@ -276,8 +777,11 @@
 	sdw_cdns_probe(&sdw->cdns);
 
 	/* Set property read ops */
-	sdw_cdns_master_ops.read_prop = intel_prop_read;
-	sdw->cdns.bus.ops = &sdw_cdns_master_ops;
+	sdw_intel_ops.read_prop = intel_prop_read;
+	sdw->cdns.bus.ops = &sdw_intel_ops;
+
+	sdw_intel_ops.read_prop = intel_prop_read;
+	sdw->cdns.bus.ops = &sdw_intel_ops;
 
 	platform_set_drvdata(pdev, sdw);
 
@@ -296,9 +800,15 @@
 		goto err_init;
 
 	ret = sdw_cdns_enable_interrupt(&sdw->cdns);
+
+	/* Read the PDI config and initialize cadence PDI */
+	intel_pdi_init(sdw, &config);
+	ret = sdw_cdns_pdi_init(&sdw->cdns, config);
 	if (ret)
 		goto err_init;
 
+	intel_pdi_ch_update(sdw);
+
 	/* Acquire IRQ */
 	ret = request_threaded_irq(sdw->res->irq, sdw_cdns_irq,
 			sdw_cdns_thread, IRQF_SHARED, KBUILD_MODNAME,
@@ -309,8 +819,18 @@
 		goto err_init;
 	}
 
+	/* Register DAIs */
+	ret = intel_register_dai(sdw);
+	if (ret) {
+		dev_err(sdw->cdns.dev, "DAI registration failed: %d", ret);
+		snd_soc_unregister_component(sdw->cdns.dev);
+		goto err_dai;
+	}
+
 	return 0;
 
+err_dai:
+	free_irq(sdw->res->irq, sdw);
 err_init:
 	sdw_delete_bus_master(&sdw->cdns.bus);
 err_master_reg:
@@ -324,6 +844,7 @@
 	sdw = platform_get_drvdata(pdev);
 
 	free_irq(sdw->res->irq, sdw);
+	snd_soc_unregister_component(sdw->cdns.dev);
 	sdw_delete_bus_master(&sdw->cdns.bus);
 
 	return 0;
diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index ffa30d9..c1a5bac 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -10,6 +10,8 @@
  * @shim: Audio shim pointer
  * @alh: ALH (Audio Link Hub) pointer
  * @irq: Interrupt line
+ * @ops: Shim callback ops
+ * @arg: Shim callback ops argument
  *
  * This is set as pdata for each link instance.
  */
@@ -18,6 +20,8 @@
 	void __iomem *shim;
 	void __iomem *alh;
 	int irq;
+	const struct sdw_intel_ops *ops;
+	void *arg;
 };
 
 #endif /* __SDW_INTEL_LOCAL_H */
diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c
index 6f2bb99..d1ea6b4 100644
--- a/drivers/soundwire/intel_init.c
+++ b/drivers/soundwire/intel_init.c
@@ -111,6 +111,9 @@
 		link->res.shim = res->mmio_base + SDW_SHIM_BASE;
 		link->res.alh = res->mmio_base + SDW_ALH_BASE;
 
+		link->res.ops = res->ops;
+		link->res.arg = res->arg;
+
 		memset(&pdevinfo, 0, sizeof(pdevinfo));
 
 		pdevinfo.parent = res->parent;
diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
new file mode 100644
index 0000000..8974a0f
--- /dev/null
+++ b/drivers/soundwire/stream.c
@@ -0,0 +1,1479 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2015-18 Intel Corporation.
+
+/*
+ *  stream.c - SoundWire Bus stream operations.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/soundwire/sdw_registers.h>
+#include <linux/soundwire/sdw.h>
+#include "bus.h"
+
+/*
+ * Array of supported rows and columns as per MIPI SoundWire Specification 1.1
+ *
+ * The rows are arranged as per the array index value programmed
+ * in register. The index 15 has dummy value 0 in order to fill hole.
+ */
+int rows[SDW_FRAME_ROWS] = {48, 50, 60, 64, 75, 80, 125, 147,
+			96, 100, 120, 128, 150, 160, 250, 0,
+			192, 200, 240, 256, 72, 144, 90, 180};
+
+int cols[SDW_FRAME_COLS] = {2, 4, 6, 8, 10, 12, 14, 16};
+
+static int sdw_find_col_index(int col)
+{
+	int i;
+
+	for (i = 0; i < SDW_FRAME_COLS; i++) {
+		if (cols[i] == col)
+			return i;
+	}
+
+	pr_warn("Requested column not found, selecting lowest column no: 2\n");
+	return 0;
+}
+
+static int sdw_find_row_index(int row)
+{
+	int i;
+
+	for (i = 0; i < SDW_FRAME_ROWS; i++) {
+		if (rows[i] == row)
+			return i;
+	}
+
+	pr_warn("Requested row not found, selecting lowest row no: 48\n");
+	return 0;
+}
+static int _sdw_program_slave_port_params(struct sdw_bus *bus,
+				struct sdw_slave *slave,
+				struct sdw_transport_params *t_params,
+				enum sdw_dpn_type type)
+{
+	u32 addr1, addr2, addr3, addr4;
+	int ret;
+	u16 wbuf;
+
+	if (bus->params.next_bank) {
+		addr1 = SDW_DPN_OFFSETCTRL2_B1(t_params->port_num);
+		addr2 = SDW_DPN_BLOCKCTRL3_B1(t_params->port_num);
+		addr3 = SDW_DPN_SAMPLECTRL2_B1(t_params->port_num);
+		addr4 = SDW_DPN_HCTRL_B1(t_params->port_num);
+	} else {
+		addr1 = SDW_DPN_OFFSETCTRL2_B0(t_params->port_num);
+		addr2 = SDW_DPN_BLOCKCTRL3_B0(t_params->port_num);
+		addr3 = SDW_DPN_SAMPLECTRL2_B0(t_params->port_num);
+		addr4 = SDW_DPN_HCTRL_B0(t_params->port_num);
+	}
+
+	/* Program DPN_OffsetCtrl2 registers */
+	ret = sdw_write(slave, addr1, t_params->offset2);
+	if (ret < 0) {
+		dev_err(bus->dev, "DPN_OffsetCtrl2 register write failed");
+		return ret;
+	}
+
+	/* Program DPN_BlockCtrl3 register */
+	ret = sdw_write(slave, addr2, t_params->blk_pkg_mode);
+	if (ret < 0) {
+		dev_err(bus->dev, "DPN_BlockCtrl3 register write failed");
+		return ret;
+	}
+
+	/*
+	 * Data ports are FULL, SIMPLE and REDUCED. This function handles
+	 * FULL and REDUCED only and and beyond this point only FULL is
+	 * handled, so bail out if we are not FULL data port type
+	 */
+	if (type != SDW_DPN_FULL)
+		return ret;
+
+	/* Program DPN_SampleCtrl2 register */
+	wbuf = (t_params->sample_interval - 1);
+	wbuf &= SDW_DPN_SAMPLECTRL_HIGH;
+	wbuf >>= SDW_REG_SHIFT(SDW_DPN_SAMPLECTRL_HIGH);
+
+	ret = sdw_write(slave, addr3, wbuf);
+	if (ret < 0) {
+		dev_err(bus->dev, "DPN_SampleCtrl2 register write failed");
+		return ret;
+	}
+
+	/* Program DPN_HCtrl register */
+	wbuf = t_params->hstart;
+	wbuf <<= SDW_REG_SHIFT(SDW_DPN_HCTRL_HSTART);
+	wbuf |= t_params->hstop;
+
+	ret = sdw_write(slave, addr4, wbuf);
+	if (ret < 0)
+		dev_err(bus->dev, "DPN_HCtrl register write failed");
+
+	return ret;
+}
+
+static int sdw_program_slave_port_params(struct sdw_bus *bus,
+			struct sdw_slave_runtime *s_rt,
+			struct sdw_port_runtime *p_rt)
+{
+	struct sdw_transport_params *t_params = &p_rt->transport_params;
+	struct sdw_port_params *p_params = &p_rt->port_params;
+	struct sdw_slave_prop *slave_prop = &s_rt->slave->prop;
+	u32 addr1, addr2, addr3, addr4, addr5, addr6;
+	struct sdw_dpn_prop *dpn_prop;
+	int ret;
+	u8 wbuf;
+
+	dpn_prop = sdw_get_slave_dpn_prop(s_rt->slave,
+					s_rt->direction,
+					t_params->port_num);
+	if (!dpn_prop)
+		return -EINVAL;
+
+	addr1 = SDW_DPN_PORTCTRL(t_params->port_num);
+	addr2 = SDW_DPN_BLOCKCTRL1(t_params->port_num);
+
+	if (bus->params.next_bank) {
+		addr3 = SDW_DPN_SAMPLECTRL1_B1(t_params->port_num);
+		addr4 = SDW_DPN_OFFSETCTRL1_B1(t_params->port_num);
+		addr5 = SDW_DPN_BLOCKCTRL2_B1(t_params->port_num);
+		addr6 = SDW_DPN_LANECTRL_B1(t_params->port_num);
+
+	} else {
+		addr3 = SDW_DPN_SAMPLECTRL1_B0(t_params->port_num);
+		addr4 = SDW_DPN_OFFSETCTRL1_B0(t_params->port_num);
+		addr5 = SDW_DPN_BLOCKCTRL2_B0(t_params->port_num);
+		addr6 = SDW_DPN_LANECTRL_B0(t_params->port_num);
+	}
+
+	/* Program DPN_PortCtrl register */
+	wbuf = p_params->data_mode << SDW_REG_SHIFT(SDW_DPN_PORTCTRL_DATAMODE);
+	wbuf |= p_params->flow_mode;
+
+	ret = sdw_update(s_rt->slave, addr1, 0xF, wbuf);
+	if (ret < 0) {
+		dev_err(&s_rt->slave->dev,
+			"DPN_PortCtrl register write failed for port %d",
+			t_params->port_num);
+		return ret;
+	}
+
+	/* Program DPN_BlockCtrl1 register */
+	ret = sdw_write(s_rt->slave, addr2, (p_params->bps - 1));
+	if (ret < 0) {
+		dev_err(&s_rt->slave->dev,
+			"DPN_BlockCtrl1 register write failed for port %d",
+			t_params->port_num);
+		return ret;
+	}
+
+	/* Program DPN_SampleCtrl1 register */
+	wbuf = (t_params->sample_interval - 1) & SDW_DPN_SAMPLECTRL_LOW;
+	ret = sdw_write(s_rt->slave, addr3, wbuf);
+	if (ret < 0) {
+		dev_err(&s_rt->slave->dev,
+			"DPN_SampleCtrl1 register write failed for port %d",
+			t_params->port_num);
+		return ret;
+	}
+
+	/* Program DPN_OffsetCtrl1 registers */
+	ret = sdw_write(s_rt->slave, addr4, t_params->offset1);
+	if (ret < 0) {
+		dev_err(&s_rt->slave->dev,
+			"DPN_OffsetCtrl1 register write failed for port %d",
+			t_params->port_num);
+		return ret;
+	}
+
+	/* Program DPN_BlockCtrl2 register*/
+	if (t_params->blk_grp_ctrl_valid) {
+		ret = sdw_write(s_rt->slave, addr5, t_params->blk_grp_ctrl);
+		if (ret < 0) {
+			dev_err(&s_rt->slave->dev,
+				"DPN_BlockCtrl2 reg write failed for port %d",
+				t_params->port_num);
+			return ret;
+		}
+	}
+
+	/* program DPN_LaneCtrl register */
+	if (slave_prop->lane_control_support) {
+		ret = sdw_write(s_rt->slave, addr6, t_params->lane_ctrl);
+		if (ret < 0) {
+			dev_err(&s_rt->slave->dev,
+				"DPN_LaneCtrl register write failed for port %d",
+				t_params->port_num);
+			return ret;
+		}
+	}
+
+	if (dpn_prop->type != SDW_DPN_SIMPLE) {
+		ret = _sdw_program_slave_port_params(bus, s_rt->slave,
+						t_params, dpn_prop->type);
+		if (ret < 0)
+			dev_err(&s_rt->slave->dev,
+				"Transport reg write failed for port: %d",
+				t_params->port_num);
+	}
+
+	return ret;
+}
+
+static int sdw_program_master_port_params(struct sdw_bus *bus,
+		struct sdw_port_runtime *p_rt)
+{
+	int ret;
+
+	/*
+	 * we need to set transport and port parameters for the port.
+	 * Transport parameters refers to the smaple interval, offsets and
+	 * hstart/stop etc of the data. Port parameters refers to word
+	 * length, flow mode etc of the port
+	 */
+	ret = bus->port_ops->dpn_set_port_transport_params(bus,
+					&p_rt->transport_params,
+					bus->params.next_bank);
+	if (ret < 0)
+		return ret;
+
+	return bus->port_ops->dpn_set_port_params(bus,
+				&p_rt->port_params,
+				bus->params.next_bank);
+}
+
+/**
+ * sdw_program_port_params() - Programs transport parameters of Master(s)
+ * and Slave(s)
+ *
+ * @m_rt: Master stream runtime
+ */
+static int sdw_program_port_params(struct sdw_master_runtime *m_rt)
+{
+	struct sdw_slave_runtime *s_rt = NULL;
+	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_port_runtime *p_rt;
+	int ret = 0;
+
+	/* Program transport & port parameters for Slave(s) */
+	list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) {
+		list_for_each_entry(p_rt, &s_rt->port_list, port_node) {
+			ret = sdw_program_slave_port_params(bus, s_rt, p_rt);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	/* Program transport & port parameters for Master(s) */
+	list_for_each_entry(p_rt, &m_rt->port_list, port_node) {
+		ret = sdw_program_master_port_params(bus, p_rt);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * sdw_enable_disable_slave_ports: Enable/disable slave data port
+ *
+ * @bus: bus instance
+ * @s_rt: slave runtime
+ * @p_rt: port runtime
+ * @en: enable or disable operation
+ *
+ * This function only sets the enable/disable bits in the relevant bank, the
+ * actual enable/disable is done with a bank switch
+ */
+static int sdw_enable_disable_slave_ports(struct sdw_bus *bus,
+				struct sdw_slave_runtime *s_rt,
+				struct sdw_port_runtime *p_rt, bool en)
+{
+	struct sdw_transport_params *t_params = &p_rt->transport_params;
+	u32 addr;
+	int ret;
+
+	if (bus->params.next_bank)
+		addr = SDW_DPN_CHANNELEN_B1(p_rt->num);
+	else
+		addr = SDW_DPN_CHANNELEN_B0(p_rt->num);
+
+	/*
+	 * Since bus doesn't support sharing a port across two streams,
+	 * it is safe to reset this register
+	 */
+	if (en)
+		ret = sdw_update(s_rt->slave, addr, 0xFF, p_rt->ch_mask);
+	else
+		ret = sdw_update(s_rt->slave, addr, 0xFF, 0x0);
+
+	if (ret < 0)
+		dev_err(&s_rt->slave->dev,
+			"Slave chn_en reg write failed:%d port:%d",
+			ret, t_params->port_num);
+
+	return ret;
+}
+
+static int sdw_enable_disable_master_ports(struct sdw_master_runtime *m_rt,
+			struct sdw_port_runtime *p_rt, bool en)
+{
+	struct sdw_transport_params *t_params = &p_rt->transport_params;
+	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_enable_ch enable_ch;
+	int ret = 0;
+
+	enable_ch.port_num = p_rt->num;
+	enable_ch.ch_mask = p_rt->ch_mask;
+	enable_ch.enable = en;
+
+	/* Perform Master port channel(s) enable/disable */
+	if (bus->port_ops->dpn_port_enable_ch) {
+		ret = bus->port_ops->dpn_port_enable_ch(bus,
+				&enable_ch, bus->params.next_bank);
+		if (ret < 0) {
+			dev_err(bus->dev,
+				"Master chn_en write failed:%d port:%d",
+				ret, t_params->port_num);
+			return ret;
+		}
+	} else {
+		dev_err(bus->dev,
+			"dpn_port_enable_ch not supported, %s failed\n",
+			en ? "enable" : "disable");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * sdw_enable_disable_ports() - Enable/disable port(s) for Master and
+ * Slave(s)
+ *
+ * @m_rt: Master stream runtime
+ * @en: mode (enable/disable)
+ */
+static int sdw_enable_disable_ports(struct sdw_master_runtime *m_rt, bool en)
+{
+	struct sdw_port_runtime *s_port, *m_port;
+	struct sdw_slave_runtime *s_rt = NULL;
+	int ret = 0;
+
+	/* Enable/Disable Slave port(s) */
+	list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) {
+		list_for_each_entry(s_port, &s_rt->port_list, port_node) {
+			ret = sdw_enable_disable_slave_ports(m_rt->bus, s_rt,
+							s_port, en);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	/* Enable/Disable Master port(s) */
+	list_for_each_entry(m_port, &m_rt->port_list, port_node) {
+		ret = sdw_enable_disable_master_ports(m_rt, m_port, en);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int sdw_do_port_prep(struct sdw_slave_runtime *s_rt,
+		struct sdw_prepare_ch prep_ch, enum sdw_port_prep_ops cmd)
+{
+	const struct sdw_slave_ops *ops = s_rt->slave->ops;
+	int ret;
+
+	if (ops->port_prep) {
+		ret = ops->port_prep(s_rt->slave, &prep_ch, cmd);
+		if (ret < 0) {
+			dev_err(&s_rt->slave->dev,
+				"Slave Port Prep cmd %d failed: %d", cmd, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus,
+			struct sdw_slave_runtime *s_rt,
+			struct sdw_port_runtime *p_rt, bool prep)
+{
+	struct completion *port_ready = NULL;
+	struct sdw_dpn_prop *dpn_prop;
+	struct sdw_prepare_ch prep_ch;
+	unsigned int time_left;
+	bool intr = false;
+	int ret = 0, val;
+	u32 addr;
+
+	prep_ch.num = p_rt->num;
+	prep_ch.ch_mask = p_rt->ch_mask;
+
+	dpn_prop = sdw_get_slave_dpn_prop(s_rt->slave,
+					s_rt->direction,
+					prep_ch.num);
+	if (!dpn_prop) {
+		dev_err(bus->dev,
+			"Slave Port:%d properties not found", prep_ch.num);
+		return -EINVAL;
+	}
+
+	prep_ch.prepare = prep;
+
+	prep_ch.bank = bus->params.next_bank;
+
+	if (dpn_prop->device_interrupts || !dpn_prop->simple_ch_prep_sm)
+		intr = true;
+
+	/*
+	 * Enable interrupt before Port prepare.
+	 * For Port de-prepare, it is assumed that port
+	 * was prepared earlier
+	 */
+	if (prep && intr) {
+		ret = sdw_configure_dpn_intr(s_rt->slave, p_rt->num, prep,
+						dpn_prop->device_interrupts);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Inform slave about the impending port prepare */
+	sdw_do_port_prep(s_rt, prep_ch, SDW_OPS_PORT_PRE_PREP);
+
+	/* Prepare Slave port implementing CP_SM */
+	if (!dpn_prop->simple_ch_prep_sm) {
+		addr = SDW_DPN_PREPARECTRL(p_rt->num);
+
+		if (prep)
+			ret = sdw_update(s_rt->slave, addr,
+					0xFF, p_rt->ch_mask);
+		else
+			ret = sdw_update(s_rt->slave, addr, 0xFF, 0x0);
+
+		if (ret < 0) {
+			dev_err(&s_rt->slave->dev,
+				"Slave prep_ctrl reg write failed");
+			return ret;
+		}
+
+		/* Wait for completion on port ready */
+		port_ready = &s_rt->slave->port_ready[prep_ch.num];
+		time_left = wait_for_completion_timeout(port_ready,
+				msecs_to_jiffies(dpn_prop->ch_prep_timeout));
+
+		val = sdw_read(s_rt->slave, SDW_DPN_PREPARESTATUS(p_rt->num));
+		val &= p_rt->ch_mask;
+		if (!time_left || val) {
+			dev_err(&s_rt->slave->dev,
+				"Chn prep failed for port:%d", prep_ch.num);
+			return -ETIMEDOUT;
+		}
+	}
+
+	/* Inform slaves about ports prepared */
+	sdw_do_port_prep(s_rt, prep_ch, SDW_OPS_PORT_POST_PREP);
+
+	/* Disable interrupt after Port de-prepare */
+	if (!prep && intr)
+		ret = sdw_configure_dpn_intr(s_rt->slave, p_rt->num, prep,
+						dpn_prop->device_interrupts);
+
+	return ret;
+}
+
+static int sdw_prep_deprep_master_ports(struct sdw_master_runtime *m_rt,
+				struct sdw_port_runtime *p_rt, bool prep)
+{
+	struct sdw_transport_params *t_params = &p_rt->transport_params;
+	struct sdw_bus *bus = m_rt->bus;
+	const struct sdw_master_port_ops *ops = bus->port_ops;
+	struct sdw_prepare_ch prep_ch;
+	int ret = 0;
+
+	prep_ch.num = p_rt->num;
+	prep_ch.ch_mask = p_rt->ch_mask;
+	prep_ch.prepare = prep; /* Prepare/De-prepare */
+	prep_ch.bank = bus->params.next_bank;
+
+	/* Pre-prepare/Pre-deprepare port(s) */
+	if (ops->dpn_port_prep) {
+		ret = ops->dpn_port_prep(bus, &prep_ch);
+		if (ret < 0) {
+			dev_err(bus->dev, "Port prepare failed for port:%d",
+					t_params->port_num);
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * sdw_prep_deprep_ports() - Prepare/De-prepare port(s) for Master(s) and
+ * Slave(s)
+ *
+ * @m_rt: Master runtime handle
+ * @prep: Prepare or De-prepare
+ */
+static int sdw_prep_deprep_ports(struct sdw_master_runtime *m_rt, bool prep)
+{
+	struct sdw_slave_runtime *s_rt = NULL;
+	struct sdw_port_runtime *p_rt;
+	int ret = 0;
+
+	/* Prepare/De-prepare Slave port(s) */
+	list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) {
+		list_for_each_entry(p_rt, &s_rt->port_list, port_node) {
+			ret = sdw_prep_deprep_slave_ports(m_rt->bus, s_rt,
+							p_rt, prep);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	/* Prepare/De-prepare Master port(s) */
+	list_for_each_entry(p_rt, &m_rt->port_list, port_node) {
+		ret = sdw_prep_deprep_master_ports(m_rt, p_rt, prep);
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
+}
+
+/**
+ * sdw_notify_config() - Notify bus configuration
+ *
+ * @m_rt: Master runtime handle
+ *
+ * This function notifies the Master(s) and Slave(s) of the
+ * new bus configuration.
+ */
+static int sdw_notify_config(struct sdw_master_runtime *m_rt)
+{
+	struct sdw_slave_runtime *s_rt;
+	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_slave *slave;
+	int ret = 0;
+
+	if (bus->ops->set_bus_conf) {
+		ret = bus->ops->set_bus_conf(bus, &bus->params);
+		if (ret < 0)
+			return ret;
+	}
+
+	list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) {
+		slave = s_rt->slave;
+
+		if (slave->ops->bus_config) {
+			ret = slave->ops->bus_config(slave, &bus->params);
+			if (ret < 0)
+				dev_err(bus->dev, "Notify Slave: %d failed",
+								slave->dev_num);
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * sdw_program_params() - Program transport and port parameters for Master(s)
+ * and Slave(s)
+ *
+ * @bus: SDW bus instance
+ */
+static int sdw_program_params(struct sdw_bus *bus)
+{
+	struct sdw_master_runtime *m_rt = NULL;
+	int ret = 0;
+
+	list_for_each_entry(m_rt, &bus->m_rt_list, bus_node) {
+		ret = sdw_program_port_params(m_rt);
+		if (ret < 0) {
+			dev_err(bus->dev,
+				"Program transport params failed: %d", ret);
+			return ret;
+		}
+
+		ret = sdw_notify_config(m_rt);
+		if (ret < 0) {
+			dev_err(bus->dev, "Notify bus config failed: %d", ret);
+			return ret;
+		}
+
+		/* Enable port(s) on alternate bank for all active streams */
+		if (m_rt->stream->state != SDW_STREAM_ENABLED)
+			continue;
+
+		ret = sdw_enable_disable_ports(m_rt, true);
+		if (ret < 0) {
+			dev_err(bus->dev, "Enable channel failed: %d", ret);
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static int sdw_bank_switch(struct sdw_bus *bus)
+{
+	int col_index, row_index;
+	struct sdw_msg *wr_msg;
+	u8 *wbuf = NULL;
+	int ret = 0;
+	u16 addr;
+
+	wr_msg = kzalloc(sizeof(*wr_msg), GFP_KERNEL);
+	if (!wr_msg)
+		return -ENOMEM;
+
+	wbuf = kzalloc(sizeof(*wbuf), GFP_KERNEL);
+	if (!wbuf) {
+		ret = -ENOMEM;
+		goto error_1;
+	}
+
+	/* Get row and column index to program register */
+	col_index = sdw_find_col_index(bus->params.col);
+	row_index = sdw_find_row_index(bus->params.row);
+	wbuf[0] = col_index | (row_index << 3);
+
+	if (bus->params.next_bank)
+		addr = SDW_SCP_FRAMECTRL_B1;
+	else
+		addr = SDW_SCP_FRAMECTRL_B0;
+
+	sdw_fill_msg(wr_msg, NULL, addr, 1, SDW_BROADCAST_DEV_NUM,
+					SDW_MSG_FLAG_WRITE, wbuf);
+	wr_msg->ssp_sync = true;
+
+	ret = sdw_transfer(bus, wr_msg);
+	if (ret < 0) {
+		dev_err(bus->dev, "Slave frame_ctrl reg write failed");
+		goto error;
+	}
+
+	kfree(wr_msg);
+	kfree(wbuf);
+	bus->defer_msg.msg = NULL;
+	bus->params.curr_bank = !bus->params.curr_bank;
+	bus->params.next_bank = !bus->params.next_bank;
+
+	return 0;
+
+error:
+	kfree(wbuf);
+error_1:
+	kfree(wr_msg);
+	return ret;
+}
+
+static int do_bank_switch(struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = stream->m_rt;
+	const struct sdw_master_ops *ops;
+	struct sdw_bus *bus = m_rt->bus;
+	int ret = 0;
+
+	ops = bus->ops;
+
+	/* Pre-bank switch */
+	if (ops->pre_bank_switch) {
+		ret = ops->pre_bank_switch(bus);
+		if (ret < 0) {
+			dev_err(bus->dev, "Pre bank switch op failed: %d", ret);
+			return ret;
+		}
+	}
+
+	/* Bank switch */
+	ret = sdw_bank_switch(bus);
+	if (ret < 0) {
+		dev_err(bus->dev, "Bank switch failed: %d", ret);
+		return ret;
+	}
+
+	/* Post-bank switch */
+	if (ops->post_bank_switch) {
+		ret = ops->post_bank_switch(bus);
+		if (ret < 0) {
+			dev_err(bus->dev,
+					"Post bank switch op failed: %d", ret);
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * sdw_release_stream() - Free the assigned stream runtime
+ *
+ * @stream: SoundWire stream runtime
+ *
+ * sdw_release_stream should be called only once per stream
+ */
+void sdw_release_stream(struct sdw_stream_runtime *stream)
+{
+	kfree(stream);
+}
+EXPORT_SYMBOL(sdw_release_stream);
+
+/**
+ * sdw_alloc_stream() - Allocate and return stream runtime
+ *
+ * @stream_name: SoundWire stream name
+ *
+ * Allocates a SoundWire stream runtime instance.
+ * sdw_alloc_stream should be called only once per stream. Typically
+ * invoked from ALSA/ASoC machine/platform driver.
+ */
+struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name)
+{
+	struct sdw_stream_runtime *stream;
+
+	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+	if (!stream)
+		return NULL;
+
+	stream->name = stream_name;
+	stream->state = SDW_STREAM_ALLOCATED;
+
+	return stream;
+}
+EXPORT_SYMBOL(sdw_alloc_stream);
+
+/**
+ * sdw_alloc_master_rt() - Allocates and initialize Master runtime handle
+ *
+ * @bus: SDW bus instance
+ * @stream_config: Stream configuration
+ * @stream: Stream runtime handle.
+ *
+ * This function is to be called with bus_lock held.
+ */
+static struct sdw_master_runtime
+*sdw_alloc_master_rt(struct sdw_bus *bus,
+			struct sdw_stream_config *stream_config,
+			struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt;
+
+	m_rt = stream->m_rt;
+
+	/*
+	 * check if Master is already allocated (as a result of Slave adding
+	 * it first), if so skip allocation and go to configure
+	 */
+	if (m_rt)
+		goto stream_config;
+
+	m_rt = kzalloc(sizeof(*m_rt), GFP_KERNEL);
+	if (!m_rt)
+		return NULL;
+
+	/* Initialization of Master runtime handle */
+	INIT_LIST_HEAD(&m_rt->port_list);
+	INIT_LIST_HEAD(&m_rt->slave_rt_list);
+	stream->m_rt = m_rt;
+
+	list_add_tail(&m_rt->bus_node, &bus->m_rt_list);
+
+stream_config:
+	m_rt->ch_count = stream_config->ch_count;
+	m_rt->bus = bus;
+	m_rt->stream = stream;
+	m_rt->direction = stream_config->direction;
+
+	return m_rt;
+}
+
+/**
+ * sdw_alloc_slave_rt() - Allocate and initialize Slave runtime handle.
+ *
+ * @slave: Slave handle
+ * @stream_config: Stream configuration
+ * @stream: Stream runtime handle
+ *
+ * This function is to be called with bus_lock held.
+ */
+static struct sdw_slave_runtime
+*sdw_alloc_slave_rt(struct sdw_slave *slave,
+			struct sdw_stream_config *stream_config,
+			struct sdw_stream_runtime *stream)
+{
+	struct sdw_slave_runtime *s_rt = NULL;
+
+	s_rt = kzalloc(sizeof(*s_rt), GFP_KERNEL);
+	if (!s_rt)
+		return NULL;
+
+	INIT_LIST_HEAD(&s_rt->port_list);
+	s_rt->ch_count = stream_config->ch_count;
+	s_rt->direction = stream_config->direction;
+	s_rt->slave = slave;
+
+	return s_rt;
+}
+
+static void sdw_master_port_release(struct sdw_bus *bus,
+			struct sdw_master_runtime *m_rt)
+{
+	struct sdw_port_runtime *p_rt, *_p_rt;
+
+	list_for_each_entry_safe(p_rt, _p_rt,
+			&m_rt->port_list, port_node) {
+		list_del(&p_rt->port_node);
+		kfree(p_rt);
+	}
+}
+
+static void sdw_slave_port_release(struct sdw_bus *bus,
+			struct sdw_slave *slave,
+			struct sdw_stream_runtime *stream)
+{
+	struct sdw_port_runtime *p_rt, *_p_rt;
+	struct sdw_master_runtime *m_rt = stream->m_rt;
+	struct sdw_slave_runtime *s_rt;
+
+	list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) {
+		if (s_rt->slave != slave)
+			continue;
+
+		list_for_each_entry_safe(p_rt, _p_rt,
+				&s_rt->port_list, port_node) {
+			list_del(&p_rt->port_node);
+			kfree(p_rt);
+		}
+	}
+}
+
+/**
+ * sdw_release_slave_stream() - Free Slave(s) runtime handle
+ *
+ * @slave: Slave handle.
+ * @stream: Stream runtime handle.
+ *
+ * This function is to be called with bus_lock held.
+ */
+static void sdw_release_slave_stream(struct sdw_slave *slave,
+			struct sdw_stream_runtime *stream)
+{
+	struct sdw_slave_runtime *s_rt, *_s_rt;
+	struct sdw_master_runtime *m_rt = stream->m_rt;
+
+	/* Retrieve Slave runtime handle */
+	list_for_each_entry_safe(s_rt, _s_rt,
+			&m_rt->slave_rt_list, m_rt_node) {
+
+		if (s_rt->slave == slave) {
+			list_del(&s_rt->m_rt_node);
+			kfree(s_rt);
+			return;
+		}
+	}
+}
+
+/**
+ * sdw_release_master_stream() - Free Master runtime handle
+ *
+ * @stream: Stream runtime handle.
+ *
+ * This function is to be called with bus_lock held
+ * It frees the Master runtime handle and associated Slave(s) runtime
+ * handle. If this is called first then sdw_release_slave_stream() will have
+ * no effect as Slave(s) runtime handle would already be freed up.
+ */
+static void sdw_release_master_stream(struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = stream->m_rt;
+	struct sdw_slave_runtime *s_rt, *_s_rt;
+
+	list_for_each_entry_safe(s_rt, _s_rt,
+			&m_rt->slave_rt_list, m_rt_node)
+		sdw_stream_remove_slave(s_rt->slave, stream);
+
+	list_del(&m_rt->bus_node);
+}
+
+/**
+ * sdw_stream_remove_master() - Remove master from sdw_stream
+ *
+ * @bus: SDW Bus instance
+ * @stream: SoundWire stream
+ *
+ * This removes and frees port_rt and master_rt from a stream
+ */
+int sdw_stream_remove_master(struct sdw_bus *bus,
+		struct sdw_stream_runtime *stream)
+{
+	mutex_lock(&bus->bus_lock);
+
+	sdw_release_master_stream(stream);
+	sdw_master_port_release(bus, stream->m_rt);
+	stream->state = SDW_STREAM_RELEASED;
+	kfree(stream->m_rt);
+	stream->m_rt = NULL;
+
+	mutex_unlock(&bus->bus_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(sdw_stream_remove_master);
+
+/**
+ * sdw_stream_remove_slave() - Remove slave from sdw_stream
+ *
+ * @slave: SDW Slave instance
+ * @stream: SoundWire stream
+ *
+ * This removes and frees port_rt and slave_rt from a stream
+ */
+int sdw_stream_remove_slave(struct sdw_slave *slave,
+		struct sdw_stream_runtime *stream)
+{
+	mutex_lock(&slave->bus->bus_lock);
+
+	sdw_slave_port_release(slave->bus, slave, stream);
+	sdw_release_slave_stream(slave, stream);
+
+	mutex_unlock(&slave->bus->bus_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(sdw_stream_remove_slave);
+
+/**
+ * sdw_config_stream() - Configure the allocated stream
+ *
+ * @dev: SDW device
+ * @stream: SoundWire stream
+ * @stream_config: Stream configuration for audio stream
+ * @is_slave: is API called from Slave or Master
+ *
+ * This function is to be called with bus_lock held.
+ */
+static int sdw_config_stream(struct device *dev,
+		struct sdw_stream_runtime *stream,
+		struct sdw_stream_config *stream_config, bool is_slave)
+{
+	/*
+	 * Update the stream rate, channel and bps based on data
+	 * source. For more than one data source (multilink),
+	 * match the rate, bps, stream type and increment number of channels.
+	 *
+	 * If rate/bps is zero, it means the values are not set, so skip
+	 * comparison and allow the value to be set and stored in stream
+	 */
+	if (stream->params.rate &&
+			stream->params.rate != stream_config->frame_rate) {
+		dev_err(dev, "rate not matching, stream:%s", stream->name);
+		return -EINVAL;
+	}
+
+	if (stream->params.bps &&
+			stream->params.bps != stream_config->bps) {
+		dev_err(dev, "bps not matching, stream:%s", stream->name);
+		return -EINVAL;
+	}
+
+	stream->type = stream_config->type;
+	stream->params.rate = stream_config->frame_rate;
+	stream->params.bps = stream_config->bps;
+
+	/* TODO: Update this check during Device-device support */
+	if (is_slave)
+		stream->params.ch_count += stream_config->ch_count;
+
+	return 0;
+}
+
+static int sdw_is_valid_port_range(struct device *dev,
+				struct sdw_port_runtime *p_rt)
+{
+	if (!SDW_VALID_PORT_RANGE(p_rt->num)) {
+		dev_err(dev,
+			"SoundWire: Invalid port number :%d", p_rt->num);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct sdw_port_runtime *sdw_port_alloc(struct device *dev,
+				struct sdw_port_config *port_config,
+				int port_index)
+{
+	struct sdw_port_runtime *p_rt;
+
+	p_rt = kzalloc(sizeof(*p_rt), GFP_KERNEL);
+	if (!p_rt)
+		return NULL;
+
+	p_rt->ch_mask = port_config[port_index].ch_mask;
+	p_rt->num = port_config[port_index].num;
+
+	return p_rt;
+}
+
+static int sdw_master_port_config(struct sdw_bus *bus,
+			struct sdw_master_runtime *m_rt,
+			struct sdw_port_config *port_config,
+			unsigned int num_ports)
+{
+	struct sdw_port_runtime *p_rt;
+	int i;
+
+	/* Iterate for number of ports to perform initialization */
+	for (i = 0; i < num_ports; i++) {
+		p_rt = sdw_port_alloc(bus->dev, port_config, i);
+		if (!p_rt)
+			return -ENOMEM;
+
+		/*
+		 * TODO: Check port capabilities for requested
+		 * configuration (audio mode support)
+		 */
+
+		list_add_tail(&p_rt->port_node, &m_rt->port_list);
+	}
+
+	return 0;
+}
+
+static int sdw_slave_port_config(struct sdw_slave *slave,
+			struct sdw_slave_runtime *s_rt,
+			struct sdw_port_config *port_config,
+			unsigned int num_config)
+{
+	struct sdw_port_runtime *p_rt;
+	int i, ret;
+
+	/* Iterate for number of ports to perform initialization */
+	for (i = 0; i < num_config; i++) {
+		p_rt = sdw_port_alloc(&slave->dev, port_config, i);
+		if (!p_rt)
+			return -ENOMEM;
+
+		/*
+		 * TODO: Check valid port range as defined by DisCo/
+		 * slave
+		 */
+		ret = sdw_is_valid_port_range(&slave->dev, p_rt);
+		if (ret < 0) {
+			kfree(p_rt);
+			return ret;
+		}
+
+		/*
+		 * TODO: Check port capabilities for requested
+		 * configuration (audio mode support)
+		 */
+
+		list_add_tail(&p_rt->port_node, &s_rt->port_list);
+	}
+
+	return 0;
+}
+
+/**
+ * sdw_stream_add_master() - Allocate and add master runtime to a stream
+ *
+ * @bus: SDW Bus instance
+ * @stream_config: Stream configuration for audio stream
+ * @port_config: Port configuration for audio stream
+ * @num_ports: Number of ports
+ * @stream: SoundWire stream
+ */
+int sdw_stream_add_master(struct sdw_bus *bus,
+		struct sdw_stream_config *stream_config,
+		struct sdw_port_config *port_config,
+		unsigned int num_ports,
+		struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = NULL;
+	int ret;
+
+	mutex_lock(&bus->bus_lock);
+
+	m_rt = sdw_alloc_master_rt(bus, stream_config, stream);
+	if (!m_rt) {
+		dev_err(bus->dev,
+				"Master runtime config failed for stream:%s",
+				stream->name);
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	ret = sdw_config_stream(bus->dev, stream, stream_config, false);
+	if (ret)
+		goto stream_error;
+
+	ret = sdw_master_port_config(bus, m_rt, port_config, num_ports);
+	if (ret)
+		goto stream_error;
+
+	stream->state = SDW_STREAM_CONFIGURED;
+
+stream_error:
+	sdw_release_master_stream(stream);
+error:
+	mutex_unlock(&bus->bus_lock);
+	return ret;
+}
+EXPORT_SYMBOL(sdw_stream_add_master);
+
+/**
+ * sdw_stream_add_slave() - Allocate and add master/slave runtime to a stream
+ *
+ * @slave: SDW Slave instance
+ * @stream_config: Stream configuration for audio stream
+ * @stream: SoundWire stream
+ * @port_config: Port configuration for audio stream
+ * @num_ports: Number of ports
+ */
+int sdw_stream_add_slave(struct sdw_slave *slave,
+		struct sdw_stream_config *stream_config,
+		struct sdw_port_config *port_config,
+		unsigned int num_ports,
+		struct sdw_stream_runtime *stream)
+{
+	struct sdw_slave_runtime *s_rt;
+	struct sdw_master_runtime *m_rt;
+	int ret;
+
+	mutex_lock(&slave->bus->bus_lock);
+
+	/*
+	 * If this API is invoked by Slave first then m_rt is not valid.
+	 * So, allocate m_rt and add Slave to it.
+	 */
+	m_rt = sdw_alloc_master_rt(slave->bus, stream_config, stream);
+	if (!m_rt) {
+		dev_err(&slave->dev,
+				"alloc master runtime failed for stream:%s",
+				stream->name);
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	s_rt = sdw_alloc_slave_rt(slave, stream_config, stream);
+	if (!s_rt) {
+		dev_err(&slave->dev,
+				"Slave runtime config failed for stream:%s",
+				stream->name);
+		ret = -ENOMEM;
+		goto stream_error;
+	}
+
+	ret = sdw_config_stream(&slave->dev, stream, stream_config, true);
+	if (ret)
+		goto stream_error;
+
+	list_add_tail(&s_rt->m_rt_node, &m_rt->slave_rt_list);
+
+	ret = sdw_slave_port_config(slave, s_rt, port_config, num_ports);
+	if (ret)
+		goto stream_error;
+
+	stream->state = SDW_STREAM_CONFIGURED;
+	goto error;
+
+stream_error:
+	/*
+	 * we hit error so cleanup the stream, release all Slave(s) and
+	 * Master runtime
+	 */
+	sdw_release_master_stream(stream);
+error:
+	mutex_unlock(&slave->bus->bus_lock);
+	return ret;
+}
+EXPORT_SYMBOL(sdw_stream_add_slave);
+
+/**
+ * sdw_get_slave_dpn_prop() - Get Slave port capabilities
+ *
+ * @slave: Slave handle
+ * @direction: Data direction.
+ * @port_num: Port number
+ */
+struct sdw_dpn_prop *sdw_get_slave_dpn_prop(struct sdw_slave *slave,
+				enum sdw_data_direction direction,
+				unsigned int port_num)
+{
+	struct sdw_dpn_prop *dpn_prop;
+	u8 num_ports;
+	int i;
+
+	if (direction == SDW_DATA_DIR_TX) {
+		num_ports = hweight32(slave->prop.source_ports);
+		dpn_prop = slave->prop.src_dpn_prop;
+	} else {
+		num_ports = hweight32(slave->prop.sink_ports);
+		dpn_prop = slave->prop.sink_dpn_prop;
+	}
+
+	for (i = 0; i < num_ports; i++) {
+		dpn_prop = &dpn_prop[i];
+
+		if (dpn_prop->num == port_num)
+			return &dpn_prop[i];
+	}
+
+	return NULL;
+}
+
+static int _sdw_prepare_stream(struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = stream->m_rt;
+	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_master_prop *prop = NULL;
+	struct sdw_bus_params params;
+	int ret;
+
+	prop = &bus->prop;
+	memcpy(&params, &bus->params, sizeof(params));
+
+	/* TODO: Support Asynchronous mode */
+	if ((prop->max_freq % stream->params.rate) != 0) {
+		dev_err(bus->dev, "Async mode not supported");
+		return -EINVAL;
+	}
+
+	/* Increment cumulative bus bandwidth */
+	/* TODO: Update this during Device-Device support */
+	bus->params.bandwidth += m_rt->stream->params.rate *
+		m_rt->ch_count * m_rt->stream->params.bps;
+
+	/* Program params */
+	ret = sdw_program_params(bus);
+	if (ret < 0) {
+		dev_err(bus->dev, "Program params failed: %d", ret);
+		goto restore_params;
+	}
+
+	ret = do_bank_switch(stream);
+	if (ret < 0) {
+		dev_err(bus->dev, "Bank switch failed: %d", ret);
+		goto restore_params;
+	}
+
+	/* Prepare port(s) on the new clock configuration */
+	ret = sdw_prep_deprep_ports(m_rt, true);
+	if (ret < 0) {
+		dev_err(bus->dev, "Prepare port(s) failed ret = %d",
+				ret);
+		return ret;
+	}
+
+	stream->state = SDW_STREAM_PREPARED;
+
+	return ret;
+
+restore_params:
+	memcpy(&bus->params, &params, sizeof(params));
+	return ret;
+}
+
+/**
+ * sdw_prepare_stream() - Prepare SoundWire stream
+ *
+ * @stream: Soundwire stream
+ *
+ * Documentation/soundwire/stream.txt explains this API in detail
+ */
+int sdw_prepare_stream(struct sdw_stream_runtime *stream)
+{
+	int ret = 0;
+
+	if (!stream) {
+		pr_err("SoundWire: Handle not found for stream");
+		return -EINVAL;
+	}
+
+	mutex_lock(&stream->m_rt->bus->bus_lock);
+
+	ret = _sdw_prepare_stream(stream);
+	if (ret < 0)
+		pr_err("Prepare for stream:%s failed: %d", stream->name, ret);
+
+	mutex_unlock(&stream->m_rt->bus->bus_lock);
+	return ret;
+}
+EXPORT_SYMBOL(sdw_prepare_stream);
+
+static int _sdw_enable_stream(struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = stream->m_rt;
+	struct sdw_bus *bus = m_rt->bus;
+	int ret;
+
+	/* Program params */
+	ret = sdw_program_params(bus);
+	if (ret < 0) {
+		dev_err(bus->dev, "Program params failed: %d", ret);
+		return ret;
+	}
+
+	/* Enable port(s) */
+	ret = sdw_enable_disable_ports(m_rt, true);
+	if (ret < 0) {
+		dev_err(bus->dev, "Enable port(s) failed ret: %d", ret);
+		return ret;
+	}
+
+	ret = do_bank_switch(stream);
+	if (ret < 0) {
+		dev_err(bus->dev, "Bank switch failed: %d", ret);
+		return ret;
+	}
+
+	stream->state = SDW_STREAM_ENABLED;
+	return 0;
+}
+
+/**
+ * sdw_enable_stream() - Enable SoundWire stream
+ *
+ * @stream: Soundwire stream
+ *
+ * Documentation/soundwire/stream.txt explains this API in detail
+ */
+int sdw_enable_stream(struct sdw_stream_runtime *stream)
+{
+	int ret = 0;
+
+	if (!stream) {
+		pr_err("SoundWire: Handle not found for stream");
+		return -EINVAL;
+	}
+
+	mutex_lock(&stream->m_rt->bus->bus_lock);
+
+	ret = _sdw_enable_stream(stream);
+	if (ret < 0)
+		pr_err("Enable for stream:%s failed: %d", stream->name, ret);
+
+	mutex_unlock(&stream->m_rt->bus->bus_lock);
+	return ret;
+}
+EXPORT_SYMBOL(sdw_enable_stream);
+
+static int _sdw_disable_stream(struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = stream->m_rt;
+	struct sdw_bus *bus = m_rt->bus;
+	int ret;
+
+	/* Disable port(s) */
+	ret = sdw_enable_disable_ports(m_rt, false);
+	if (ret < 0) {
+		dev_err(bus->dev, "Disable port(s) failed: %d", ret);
+		return ret;
+	}
+
+	stream->state = SDW_STREAM_DISABLED;
+
+	/* Program params */
+	ret = sdw_program_params(bus);
+	if (ret < 0) {
+		dev_err(bus->dev, "Program params failed: %d", ret);
+		return ret;
+	}
+
+	return do_bank_switch(stream);
+}
+
+/**
+ * sdw_disable_stream() - Disable SoundWire stream
+ *
+ * @stream: Soundwire stream
+ *
+ * Documentation/soundwire/stream.txt explains this API in detail
+ */
+int sdw_disable_stream(struct sdw_stream_runtime *stream)
+{
+	int ret = 0;
+
+	if (!stream) {
+		pr_err("SoundWire: Handle not found for stream");
+		return -EINVAL;
+	}
+
+	mutex_lock(&stream->m_rt->bus->bus_lock);
+
+	ret = _sdw_disable_stream(stream);
+	if (ret < 0)
+		pr_err("Disable for stream:%s failed: %d", stream->name, ret);
+
+	mutex_unlock(&stream->m_rt->bus->bus_lock);
+	return ret;
+}
+EXPORT_SYMBOL(sdw_disable_stream);
+
+static int _sdw_deprepare_stream(struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = stream->m_rt;
+	struct sdw_bus *bus = m_rt->bus;
+	int ret = 0;
+
+	/* De-prepare port(s) */
+	ret = sdw_prep_deprep_ports(m_rt, false);
+	if (ret < 0) {
+		dev_err(bus->dev, "De-prepare port(s) failed: %d", ret);
+		return ret;
+	}
+
+	stream->state = SDW_STREAM_DEPREPARED;
+
+	/* TODO: Update this during Device-Device support */
+	bus->params.bandwidth -= m_rt->stream->params.rate *
+		m_rt->ch_count * m_rt->stream->params.bps;
+
+	/* Program params */
+	ret = sdw_program_params(bus);
+	if (ret < 0) {
+		dev_err(bus->dev, "Program params failed: %d", ret);
+		return ret;
+	}
+
+	return do_bank_switch(stream);
+}
+
+/**
+ * sdw_deprepare_stream() - Deprepare SoundWire stream
+ *
+ * @stream: Soundwire stream
+ *
+ * Documentation/soundwire/stream.txt explains this API in detail
+ */
+int sdw_deprepare_stream(struct sdw_stream_runtime *stream)
+{
+	int ret = 0;
+
+	if (!stream) {
+		pr_err("SoundWire: Handle not found for stream");
+		return -EINVAL;
+	}
+
+	mutex_lock(&stream->m_rt->bus->bus_lock);
+
+	ret = _sdw_deprepare_stream(stream);
+	if (ret < 0)
+		pr_err("De-prepare for stream:%d failed: %d", ret, ret);
+
+	mutex_unlock(&stream->m_rt->bus->bus_lock);
+	return ret;
+}
+EXPORT_SYMBOL(sdw_deprepare_stream);
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 2d4146c..ad5d68e 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -47,6 +47,13 @@
 
 if SPI_MASTER
 
+config SPI_MEM
+	bool "SPI memory extension"
+	help
+	  Enable this option if you want to enable the SPI memory extension.
+	  This extension is meant to simplify interaction with SPI memories
+	  by providing a high-level interface to send memory-like commands.
+
 comment "SPI Master Controller Drivers"
 
 config SPI_ALTERA
@@ -71,7 +78,6 @@
 
 config SPI_ATMEL
 	tristate "Atmel SPI Controller"
-	depends on HAS_DMA
 	depends on ARCH_AT91 || COMPILE_TEST
 	help
 	  This selects a driver for the Atmel SPI Controller, present on
@@ -115,14 +121,6 @@
 	  "universal SPI master", and the regular SPI controller.
 	  This driver is for the universal/auxiliary SPI controller.
 
-config SPI_BCM53XX
-	tristate "Broadcom BCM53xx SPI controller"
-	depends on ARCH_BCM_5301X
-	depends on BCMA_POSSIBLE
-	select BCMA
-	help
-          Enable support for the SPI controller on Broadcom BCM53xx ARM SoCs.
-
 config SPI_BCM63XX
 	tristate "Broadcom BCM63xx SPI controller"
 	depends on BCM63XX || COMPILE_TEST
@@ -233,7 +231,6 @@
 
 config SPI_EP93XX
 	tristate "Cirrus Logic EP93xx SPI controller"
-	depends on HAS_DMA
 	depends on ARCH_EP93XX || COMPILE_TEST
 	help
 	  This enables using the Cirrus EP93xx SPI controller in master
@@ -355,7 +352,6 @@
 config SPI_FSL_DSPI
 	tristate "Freescale DSPI controller"
 	select REGMAP_MMIO
-	depends on HAS_DMA
 	depends on SOC_VF610 || SOC_LS1021A || ARCH_LAYERSCAPE || M5441x || COMPILE_TEST
 	help
 	  This enables support for the Freescale DSPI controller in master
@@ -431,7 +427,6 @@
 
 config SPI_OMAP24XX
 	tristate "McSPI driver for OMAP"
-	depends on HAS_DMA
 	depends on ARCH_OMAP2PLUS || COMPILE_TEST
 	select SG_SPLIT
 	help
@@ -440,7 +435,6 @@
 
 config SPI_TI_QSPI
 	tristate "DRA7xxx QSPI controller support"
-	depends on HAS_DMA
 	depends on ARCH_OMAP2PLUS || COMPILE_TEST
 	help
 	  QSPI master controller for DRA7xxx used for flash devices.
@@ -469,7 +463,6 @@
 config SPI_PIC32_SQI
 	tristate "Microchip PIC32 Quad SPI driver"
 	depends on MACH_PIC32 || COMPILE_TEST
-	depends on HAS_DMA
 	help
 	  SPI driver for PIC32 Quad SPI controller.
 
@@ -572,7 +565,7 @@
 
 config SPI_SH_MSIOF
 	tristate "SuperH MSIOF SPI controller"
-	depends on HAVE_CLK && HAS_DMA
+	depends on HAVE_CLK
 	depends on ARCH_SHMOBILE || ARCH_RENESAS || COMPILE_TEST
 	help
 	  SPI driver for SuperH and SH Mobile MSIOF blocks.
@@ -650,7 +643,7 @@
 config SPI_TEGRA114
 	tristate "NVIDIA Tegra114 SPI Controller"
 	depends on (ARCH_TEGRA && TEGRA20_APB_DMA) || COMPILE_TEST
-	depends on RESET_CONTROLLER && HAS_DMA
+	depends on RESET_CONTROLLER
 	help
 	  SPI driver for NVIDIA Tegra114 SPI Controller interface. This controller
 	  is different than the older SoCs SPI controller and also register interface
@@ -668,7 +661,7 @@
 config SPI_TEGRA20_SLINK
 	tristate "Nvidia Tegra20/Tegra30 SLINK Controller"
 	depends on (ARCH_TEGRA && TEGRA20_APB_DMA) || COMPILE_TEST
-	depends on RESET_CONTROLLER && HAS_DMA
+	depends on RESET_CONTROLLER
 	help
 	  SPI driver for Nvidia Tegra20/Tegra30 SLINK Controller interface.
 
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index b935f10..cb1f437 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -8,6 +8,7 @@
 # small core, mostly translating board-specific
 # config declarations into driver model code
 obj-$(CONFIG_SPI_MASTER)		+= spi.o
+obj-$(CONFIG_SPI_MEM)			+= spi-mem.o
 obj-$(CONFIG_SPI_SPIDEV)		+= spidev.o
 obj-$(CONFIG_SPI_LOOPBACK_TEST)		+= spi-loopback-test.o
 
@@ -20,7 +21,6 @@
 obj-$(CONFIG_SPI_AXI_SPI_ENGINE)	+= spi-axi-spi-engine.o
 obj-$(CONFIG_SPI_BCM2835)		+= spi-bcm2835.o
 obj-$(CONFIG_SPI_BCM2835AUX)		+= spi-bcm2835aux.o
-obj-$(CONFIG_SPI_BCM53XX)		+= spi-bcm53xx.o
 obj-$(CONFIG_SPI_BCM63XX)		+= spi-bcm63xx.o
 obj-$(CONFIG_SPI_BCM63XX_HSSPI)		+= spi-bcm63xx-hsspi.o
 obj-$(CONFIG_SPI_BCM_QSPI)		+= spi-iproc-qspi.o spi-brcmstb-qspi.o spi-bcm-qspi.o
diff --git a/drivers/spi/internals.h b/drivers/spi/internals.h
new file mode 100644
index 0000000..4a28a83
--- /dev/null
+++ b/drivers/spi/internals.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2018 Exceet Electronics GmbH
+ * Copyright (C) 2018 Bootlin
+ *
+ * Author: Boris Brezillon <boris.brezillon@bootlin.com>
+ *
+ * Helpers needed by the spi or spi-mem logic. Should not be used outside of
+ * spi-mem.c and spi.c.
+ */
+
+#ifndef __LINUX_SPI_INTERNALS_H
+#define __LINUX_SPI_INTERNALS_H
+
+#include <linux/device.h>
+#include <linux/dma-direction.h>
+#include <linux/scatterlist.h>
+#include <linux/spi/spi.h>
+
+void spi_flush_queue(struct spi_controller *ctrl);
+
+#ifdef CONFIG_HAS_DMA
+int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
+		struct sg_table *sgt, void *buf, size_t len,
+		enum dma_data_direction dir);
+void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev,
+		   struct sg_table *sgt, enum dma_data_direction dir);
+#else /* !CONFIG_HAS_DMA */
+static inline int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
+			      struct sg_table *sgt, void *buf, size_t len,
+			      enum dma_data_direction dir)
+{
+	return -EINVAL;
+}
+
+static inline void spi_unmap_buf(struct spi_controller *ctlr,
+				 struct device *dev, struct sg_table *sgt,
+				 enum dma_data_direction dir)
+{
+}
+#endif /* CONFIG_HAS_DMA */
+
+#endif /* __LINUX_SPI_INTERNALS_H */
diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 6573152..8612525 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -30,6 +30,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
 #include "spi-bcm-qspi.h"
@@ -215,10 +216,10 @@
 	int bspi_maj_rev;
 	int bspi_min_rev;
 	int bspi_enabled;
-	struct spi_flash_read_message *bspi_rf_msg;
-	u32 bspi_rf_msg_idx;
-	u32 bspi_rf_msg_len;
-	u32 bspi_rf_msg_status;
+	const struct spi_mem_op *bspi_rf_op;
+	u32 bspi_rf_op_idx;
+	u32 bspi_rf_op_len;
+	u32 bspi_rf_op_status;
 	struct bcm_xfer_mode xfer_mode;
 	u32 s3_strap_override_ctrl;
 	bool bspi_mode;
@@ -313,26 +314,26 @@
 
 static void bcm_qspi_bspi_lr_data_read(struct bcm_qspi *qspi)
 {
-	u32 *buf = (u32 *)qspi->bspi_rf_msg->buf;
+	u32 *buf = (u32 *)qspi->bspi_rf_op->data.buf.in;
 	u32 data = 0;
 
-	dev_dbg(&qspi->pdev->dev, "xfer %p rx %p rxlen %d\n", qspi->bspi_rf_msg,
-		qspi->bspi_rf_msg->buf, qspi->bspi_rf_msg_len);
+	dev_dbg(&qspi->pdev->dev, "xfer %p rx %p rxlen %d\n", qspi->bspi_rf_op,
+		qspi->bspi_rf_op->data.buf.in, qspi->bspi_rf_op_len);
 	while (!bcm_qspi_bspi_lr_is_fifo_empty(qspi)) {
 		data = bcm_qspi_bspi_lr_read_fifo(qspi);
-		if (likely(qspi->bspi_rf_msg_len >= 4) &&
+		if (likely(qspi->bspi_rf_op_len >= 4) &&
 		    IS_ALIGNED((uintptr_t)buf, 4)) {
-			buf[qspi->bspi_rf_msg_idx++] = data;
-			qspi->bspi_rf_msg_len -= 4;
+			buf[qspi->bspi_rf_op_idx++] = data;
+			qspi->bspi_rf_op_len -= 4;
 		} else {
 			/* Read out remaining bytes, make sure*/
-			u8 *cbuf = (u8 *)&buf[qspi->bspi_rf_msg_idx];
+			u8 *cbuf = (u8 *)&buf[qspi->bspi_rf_op_idx];
 
 			data = cpu_to_le32(data);
-			while (qspi->bspi_rf_msg_len) {
+			while (qspi->bspi_rf_op_len) {
 				*cbuf++ = (u8)data;
 				data >>= 8;
-				qspi->bspi_rf_msg_len--;
+				qspi->bspi_rf_op_len--;
 			}
 		}
 	}
@@ -349,14 +350,12 @@
 }
 
 static int bcm_qspi_bspi_set_flex_mode(struct bcm_qspi *qspi,
-				       struct spi_flash_read_message *msg,
-				       int hp)
+				       const struct spi_mem_op *op, int hp)
 {
 	int bpc = 0, bpp = 0;
-	u8 command = msg->read_opcode;
-	int width  = msg->data_nbits ? msg->data_nbits : SPI_NBITS_SINGLE;
-	int addrlen = msg->addr_width;
-	int addr_nbits = msg->addr_nbits ? msg->addr_nbits : SPI_NBITS_SINGLE;
+	u8 command = op->cmd.opcode;
+	int width  = op->cmd.buswidth ? op->cmd.buswidth : SPI_NBITS_SINGLE;
+	int addrlen = op->addr.nbytes * 8;
 	int flex_mode = 1;
 
 	dev_dbg(&qspi->pdev->dev, "set flex mode w %x addrlen %x hp %d\n",
@@ -365,7 +364,7 @@
 	if (addrlen == BSPI_ADDRLEN_4BYTES)
 		bpp = BSPI_BPP_ADDR_SELECT_MASK;
 
-	bpp |= msg->dummy_bytes * (8/addr_nbits);
+	bpp |= (op->dummy.nbytes * 8) / op->dummy.buswidth;
 
 	switch (width) {
 	case SPI_NBITS_SINGLE:
@@ -397,11 +396,10 @@
 }
 
 static int bcm_qspi_bspi_set_override(struct bcm_qspi *qspi,
-				      struct spi_flash_read_message *msg,
-				      int hp)
+				      const struct spi_mem_op *op, int hp)
 {
-	int width = msg->data_nbits ? msg->data_nbits : SPI_NBITS_SINGLE;
-	int addrlen = msg->addr_width;
+	int width = op->data.buswidth ? op->data.buswidth : SPI_NBITS_SINGLE;
+	int addrlen = op->addr.nbytes;
 	u32 data = bcm_qspi_read(qspi, BSPI, BSPI_STRAP_OVERRIDE_CTRL);
 
 	dev_dbg(&qspi->pdev->dev, "set override mode w %x addrlen %x hp %d\n",
@@ -437,17 +435,17 @@
 	/* set the override mode */
 	data |=	BSPI_STRAP_OVERRIDE_CTRL_OVERRIDE;
 	bcm_qspi_write(qspi, BSPI, BSPI_STRAP_OVERRIDE_CTRL, data);
-	bcm_qspi_bspi_set_xfer_params(qspi, msg->read_opcode, 0, 0, 0);
+	bcm_qspi_bspi_set_xfer_params(qspi, op->cmd.opcode, 0, 0, 0);
 
 	return 0;
 }
 
 static int bcm_qspi_bspi_set_mode(struct bcm_qspi *qspi,
-				  struct spi_flash_read_message *msg, int hp)
+				  const struct spi_mem_op *op, int hp)
 {
 	int error = 0;
-	int width = msg->data_nbits ? msg->data_nbits : SPI_NBITS_SINGLE;
-	int addrlen = msg->addr_width;
+	int width = op->data.buswidth ? op->data.buswidth : SPI_NBITS_SINGLE;
+	int addrlen = op->addr.nbytes;
 
 	/* default mode */
 	qspi->xfer_mode.flex_mode = true;
@@ -460,12 +458,12 @@
 		if (val & mask || qspi->s3_strap_override_ctrl & mask) {
 			qspi->xfer_mode.flex_mode = false;
 			bcm_qspi_write(qspi, BSPI, BSPI_FLEX_MODE_ENABLE, 0);
-			error = bcm_qspi_bspi_set_override(qspi, msg, hp);
+			error = bcm_qspi_bspi_set_override(qspi, op, hp);
 		}
 	}
 
 	if (qspi->xfer_mode.flex_mode)
-		error = bcm_qspi_bspi_set_flex_mode(qspi, msg, hp);
+		error = bcm_qspi_bspi_set_flex_mode(qspi, op, hp);
 
 	if (error) {
 		dev_warn(&qspi->pdev->dev,
@@ -802,19 +800,20 @@
 	return slot;
 }
 
-static int bcm_qspi_bspi_flash_read(struct spi_device *spi,
-				    struct spi_flash_read_message *msg)
+static int bcm_qspi_bspi_exec_mem_op(struct spi_device *spi,
+				     const struct spi_mem_op *op)
 {
 	struct bcm_qspi *qspi = spi_master_get_devdata(spi->master);
-	u32 addr = 0, len, rdlen, len_words;
+	u32 addr = 0, len, rdlen, len_words, from = 0;
 	int ret = 0;
 	unsigned long timeo = msecs_to_jiffies(100);
 	struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc;
 
 	if (bcm_qspi_bspi_ver_three(qspi))
-		if (msg->addr_width == BSPI_ADDRLEN_4BYTES)
+		if (op->addr.nbytes == BSPI_ADDRLEN_4BYTES)
 			return -EIO;
 
+	from = op->addr.val;
 	bcm_qspi_chip_select(qspi, spi->chip_select);
 	bcm_qspi_write(qspi, MSPI, MSPI_WRITE_LOCK, 0);
 
@@ -823,15 +822,15 @@
 	 * the upper address byte to bspi
 	 */
 	if (bcm_qspi_bspi_ver_three(qspi) == false) {
-		addr = msg->from & 0xff000000;
+		addr = from & 0xff000000;
 		bcm_qspi_write(qspi, BSPI,
 			       BSPI_BSPI_FLASH_UPPER_ADDR_BYTE, addr);
 	}
 
 	if (!qspi->xfer_mode.flex_mode)
-		addr = msg->from;
+		addr = from;
 	else
-		addr = msg->from & 0x00ffffff;
+		addr = from & 0x00ffffff;
 
 	if (bcm_qspi_bspi_ver_three(qspi) == true)
 		addr = (addr + 0xc00000) & 0xffffff;
@@ -840,8 +839,8 @@
 	 * read into the entire buffer by breaking the reads
 	 * into RAF buffer read lengths
 	 */
-	len = msg->len;
-	qspi->bspi_rf_msg_idx = 0;
+	len = op->data.nbytes;
+	qspi->bspi_rf_op_idx = 0;
 
 	do {
 		if (len > BSPI_READ_LENGTH)
@@ -852,9 +851,9 @@
 		reinit_completion(&qspi->bspi_done);
 		bcm_qspi_enable_bspi(qspi);
 		len_words = (rdlen + 3) >> 2;
-		qspi->bspi_rf_msg = msg;
-		qspi->bspi_rf_msg_status = 0;
-		qspi->bspi_rf_msg_len = rdlen;
+		qspi->bspi_rf_op = op;
+		qspi->bspi_rf_op_status = 0;
+		qspi->bspi_rf_op_len = rdlen;
 		dev_dbg(&qspi->pdev->dev,
 			"bspi xfr addr 0x%x len 0x%x", addr, rdlen);
 		bcm_qspi_write(qspi, BSPI, BSPI_RAF_START_ADDR, addr);
@@ -879,7 +878,6 @@
 		}
 
 		/* set msg return length */
-		msg->retlen += rdlen;
 		addr += rdlen;
 		len -= rdlen;
 	} while (len);
@@ -914,61 +912,63 @@
 	return 0;
 }
 
-static int bcm_qspi_mspi_flash_read(struct spi_device *spi,
-				    struct spi_flash_read_message *msg)
+static int bcm_qspi_mspi_exec_mem_op(struct spi_device *spi,
+				     const struct spi_mem_op *op)
 {
-	struct bcm_qspi *qspi = spi_master_get_devdata(spi->master);
+	struct spi_master *master = spi->master;
+	struct bcm_qspi *qspi = spi_master_get_devdata(master);
 	struct spi_transfer t[2];
-	u8 cmd[6];
-	int ret;
+	u8 cmd[6] = { };
+	int ret, i;
 
 	memset(cmd, 0, sizeof(cmd));
 	memset(t, 0, sizeof(t));
 
 	/* tx */
 	/* opcode is in cmd[0] */
-	cmd[0] = msg->read_opcode;
-	cmd[1] = msg->from >> (msg->addr_width * 8 -  8);
-	cmd[2] = msg->from >> (msg->addr_width * 8 - 16);
-	cmd[3] = msg->from >> (msg->addr_width * 8 - 24);
-	cmd[4] = msg->from >> (msg->addr_width * 8 - 32);
+	cmd[0] = op->cmd.opcode;
+	for (i = 0; i < op->addr.nbytes; i++)
+		cmd[1 + i] = op->addr.val >> (8 * (op->addr.nbytes - i - 1));
+
 	t[0].tx_buf = cmd;
-	t[0].len = msg->addr_width + msg->dummy_bytes + 1;
+	t[0].len = op->addr.nbytes + op->dummy.nbytes + 1;
 	t[0].bits_per_word = spi->bits_per_word;
-	t[0].tx_nbits = msg->opcode_nbits;
+	t[0].tx_nbits = op->cmd.buswidth;
 	/* lets mspi know that this is not last transfer */
 	qspi->trans_pos.mspi_last_trans = false;
-	ret = bcm_qspi_transfer_one(spi->master, spi, &t[0]);
+	ret = bcm_qspi_transfer_one(master, spi, &t[0]);
 
 	/* rx */
 	qspi->trans_pos.mspi_last_trans = true;
 	if (!ret) {
 		/* rx */
-		t[1].rx_buf = msg->buf;
-		t[1].len = msg->len;
-		t[1].rx_nbits =  msg->data_nbits;
+		t[1].rx_buf = op->data.buf.in;
+		t[1].len = op->data.nbytes;
+		t[1].rx_nbits =  op->data.buswidth;
 		t[1].bits_per_word = spi->bits_per_word;
-		ret = bcm_qspi_transfer_one(spi->master, spi, &t[1]);
+		ret = bcm_qspi_transfer_one(master, spi, &t[1]);
 	}
 
-	if (!ret)
-		msg->retlen = msg->len;
-
 	return ret;
 }
 
-static int bcm_qspi_flash_read(struct spi_device *spi,
-			       struct spi_flash_read_message *msg)
+static int bcm_qspi_exec_mem_op(struct spi_mem *mem,
+				const struct spi_mem_op *op)
 {
+	struct spi_device *spi = mem->spi;
 	struct bcm_qspi *qspi = spi_master_get_devdata(spi->master);
 	int ret = 0;
 	bool mspi_read = false;
-	u32 addr, len;
+	u32 addr = 0, len;
 	u_char *buf;
 
-	buf = msg->buf;
-	addr = msg->from;
-	len = msg->len;
+	if (!op->data.nbytes || !op->addr.nbytes || op->addr.nbytes > 4 ||
+	    op->data.dir != SPI_MEM_DATA_IN)
+		return -ENOTSUPP;
+
+	buf = op->data.buf.in;
+	addr = op->addr.val;
+	len = op->data.nbytes;
 
 	if (bcm_qspi_bspi_ver_three(qspi) == true) {
 		/*
@@ -990,12 +990,12 @@
 		mspi_read = true;
 
 	if (mspi_read)
-		return bcm_qspi_mspi_flash_read(spi, msg);
+		return bcm_qspi_mspi_exec_mem_op(spi, op);
 
-	ret = bcm_qspi_bspi_set_mode(qspi, msg, -1);
+	ret = bcm_qspi_bspi_set_mode(qspi, op, -1);
 
 	if (!ret)
-		ret = bcm_qspi_bspi_flash_read(spi, msg);
+		ret = bcm_qspi_bspi_exec_mem_op(spi, op);
 
 	return ret;
 }
@@ -1034,10 +1034,10 @@
 	struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc;
 	u32 status = qspi_dev_id->irqp->mask;
 
-	if (qspi->bspi_enabled && qspi->bspi_rf_msg) {
+	if (qspi->bspi_enabled && qspi->bspi_rf_op) {
 		bcm_qspi_bspi_lr_data_read(qspi);
-		if (qspi->bspi_rf_msg_len == 0) {
-			qspi->bspi_rf_msg = NULL;
+		if (qspi->bspi_rf_op_len == 0) {
+			qspi->bspi_rf_op = NULL;
 			if (qspi->soc_intc) {
 				/* disable soc BSPI interrupt */
 				soc_intc->bcm_qspi_int_set(soc_intc, BSPI_DONE,
@@ -1046,7 +1046,7 @@
 				status = INTR_BSPI_LR_SESSION_DONE_MASK;
 			}
 
-			if (qspi->bspi_rf_msg_status)
+			if (qspi->bspi_rf_op_status)
 				bcm_qspi_bspi_lr_clear(qspi);
 			else
 				bcm_qspi_bspi_flush_prefetch_buffers(qspi);
@@ -1058,7 +1058,7 @@
 	}
 
 	status &= INTR_BSPI_LR_SESSION_DONE_MASK;
-	if (qspi->bspi_enabled && status && qspi->bspi_rf_msg_len == 0)
+	if (qspi->bspi_enabled && status && qspi->bspi_rf_op_len == 0)
 		complete(&qspi->bspi_done);
 
 	return IRQ_HANDLED;
@@ -1071,7 +1071,7 @@
 	struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc;
 
 	dev_err(&qspi->pdev->dev, "BSPI INT error\n");
-	qspi->bspi_rf_msg_status = -EIO;
+	qspi->bspi_rf_op_status = -EIO;
 	if (qspi->soc_intc)
 		/* clear soc interrupt */
 		soc_intc->bcm_qspi_int_ack(soc_intc, BSPI_ERR);
@@ -1194,6 +1194,10 @@
 
 }
 
+static const struct spi_controller_mem_ops bcm_qspi_mem_ops = {
+	.exec_op = bcm_qspi_exec_mem_op,
+};
+
 static const struct of_device_id bcm_qspi_of_match[] = {
 	{ .compatible = "brcm,spi-bcm-qspi" },
 	{},
@@ -1236,7 +1240,7 @@
 	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_RX_DUAL | SPI_RX_QUAD;
 	master->setup = bcm_qspi_setup;
 	master->transfer_one = bcm_qspi_transfer_one;
-	master->spi_flash_read = bcm_qspi_flash_read;
+	master->mem_ops = &bcm_qspi_mem_ops;
 	master->cleanup = bcm_qspi_cleanup;
 	master->dev.of_node = dev->of_node;
 	master->num_chipselect = NUM_CHIPSELECT;
diff --git a/drivers/spi/spi-bcm53xx.c b/drivers/spi/spi-bcm53xx.c
deleted file mode 100644
index d02ceb7..0000000
--- a/drivers/spi/spi-bcm53xx.c
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Copyright (C) 2014-2016 Rafał Miłecki <rafal@milecki.pl>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/bcma/bcma.h>
-#include <linux/spi/spi.h>
-
-#include "spi-bcm53xx.h"
-
-#define BCM53XXSPI_MAX_SPI_BAUD	13500000	/* 216 MHz? */
-#define BCM53XXSPI_FLASH_WINDOW	SZ_32M
-
-/* The longest observed required wait was 19 ms */
-#define BCM53XXSPI_SPE_TIMEOUT_MS	80
-
-struct bcm53xxspi {
-	struct bcma_device *core;
-	struct spi_master *master;
-	void __iomem *mmio_base;
-	bool bspi;				/* Boot SPI mode with memory mapping */
-};
-
-static inline u32 bcm53xxspi_read(struct bcm53xxspi *b53spi, u16 offset)
-{
-	return bcma_read32(b53spi->core, offset);
-}
-
-static inline void bcm53xxspi_write(struct bcm53xxspi *b53spi, u16 offset,
-				    u32 value)
-{
-	bcma_write32(b53spi->core, offset, value);
-}
-
-static void bcm53xxspi_disable_bspi(struct bcm53xxspi *b53spi)
-{
-	struct device *dev = &b53spi->core->dev;
-	unsigned long deadline;
-	u32 tmp;
-
-	if (!b53spi->bspi)
-		return;
-
-	tmp = bcm53xxspi_read(b53spi, B53SPI_BSPI_MAST_N_BOOT_CTRL);
-	if (tmp & 0x1)
-		return;
-
-	deadline = jiffies + usecs_to_jiffies(200);
-	do {
-		tmp = bcm53xxspi_read(b53spi, B53SPI_BSPI_BUSY_STATUS);
-		if (!(tmp & 0x1)) {
-			bcm53xxspi_write(b53spi, B53SPI_BSPI_MAST_N_BOOT_CTRL,
-					 0x1);
-			ndelay(200);
-			b53spi->bspi = false;
-			return;
-		}
-		udelay(1);
-	} while (!time_after_eq(jiffies, deadline));
-
-	dev_warn(dev, "Timeout disabling BSPI\n");
-}
-
-static void bcm53xxspi_enable_bspi(struct bcm53xxspi *b53spi)
-{
-	u32 tmp;
-
-	if (b53spi->bspi)
-		return;
-
-	tmp = bcm53xxspi_read(b53spi, B53SPI_BSPI_MAST_N_BOOT_CTRL);
-	if (!(tmp & 0x1))
-		return;
-
-	bcm53xxspi_write(b53spi, B53SPI_BSPI_MAST_N_BOOT_CTRL, 0x0);
-	b53spi->bspi = true;
-}
-
-static inline unsigned int bcm53xxspi_calc_timeout(size_t len)
-{
-	/* Do some magic calculation based on length and buad. Add 10% and 1. */
-	return (len * 9000 / BCM53XXSPI_MAX_SPI_BAUD * 110 / 100) + 1;
-}
-
-static int bcm53xxspi_wait(struct bcm53xxspi *b53spi, unsigned int timeout_ms)
-{
-	unsigned long deadline;
-	u32 tmp;
-
-	/* SPE bit has to be 0 before we read MSPI STATUS */
-	deadline = jiffies + msecs_to_jiffies(BCM53XXSPI_SPE_TIMEOUT_MS);
-	do {
-		tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_SPCR2);
-		if (!(tmp & B53SPI_MSPI_SPCR2_SPE))
-			break;
-		udelay(5);
-	} while (!time_after_eq(jiffies, deadline));
-
-	if (tmp & B53SPI_MSPI_SPCR2_SPE)
-		goto spi_timeout;
-
-	/* Check status */
-	deadline = jiffies + msecs_to_jiffies(timeout_ms);
-	do {
-		tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_MSPI_STATUS);
-		if (tmp & B53SPI_MSPI_MSPI_STATUS_SPIF) {
-			bcm53xxspi_write(b53spi, B53SPI_MSPI_MSPI_STATUS, 0);
-			return 0;
-		}
-
-		cpu_relax();
-		udelay(100);
-	} while (!time_after_eq(jiffies, deadline));
-
-spi_timeout:
-	bcm53xxspi_write(b53spi, B53SPI_MSPI_MSPI_STATUS, 0);
-
-	pr_err("Timeout waiting for SPI to be ready!\n");
-
-	return -EBUSY;
-}
-
-static void bcm53xxspi_buf_write(struct bcm53xxspi *b53spi, u8 *w_buf,
-				 size_t len, bool cont)
-{
-	u32 tmp;
-	int i;
-
-	for (i = 0; i < len; i++) {
-		/* Transmit Register File MSB */
-		bcm53xxspi_write(b53spi, B53SPI_MSPI_TXRAM + 4 * (i * 2),
-				 (unsigned int)w_buf[i]);
-	}
-
-	for (i = 0; i < len; i++) {
-		tmp = B53SPI_CDRAM_CONT | B53SPI_CDRAM_PCS_DISABLE_ALL |
-		      B53SPI_CDRAM_PCS_DSCK;
-		if (!cont && i == len - 1)
-			tmp &= ~B53SPI_CDRAM_CONT;
-		tmp &= ~0x1;
-		/* Command Register File */
-		bcm53xxspi_write(b53spi, B53SPI_MSPI_CDRAM + 4 * i, tmp);
-	}
-
-	/* Set queue pointers */
-	bcm53xxspi_write(b53spi, B53SPI_MSPI_NEWQP, 0);
-	bcm53xxspi_write(b53spi, B53SPI_MSPI_ENDQP, len - 1);
-
-	if (cont)
-		bcm53xxspi_write(b53spi, B53SPI_MSPI_WRITE_LOCK, 1);
-
-	/* Start SPI transfer */
-	tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_SPCR2);
-	tmp |= B53SPI_MSPI_SPCR2_SPE;
-	if (cont)
-		tmp |= B53SPI_MSPI_SPCR2_CONT_AFTER_CMD;
-	bcm53xxspi_write(b53spi, B53SPI_MSPI_SPCR2, tmp);
-
-	/* Wait for SPI to finish */
-	bcm53xxspi_wait(b53spi, bcm53xxspi_calc_timeout(len));
-
-	if (!cont)
-		bcm53xxspi_write(b53spi, B53SPI_MSPI_WRITE_LOCK, 0);
-}
-
-static void bcm53xxspi_buf_read(struct bcm53xxspi *b53spi, u8 *r_buf,
-				size_t len, bool cont)
-{
-	u32 tmp;
-	int i;
-
-	for (i = 0; i < len; i++) {
-		tmp = B53SPI_CDRAM_CONT | B53SPI_CDRAM_PCS_DISABLE_ALL |
-		      B53SPI_CDRAM_PCS_DSCK;
-		if (!cont && i == len - 1)
-			tmp &= ~B53SPI_CDRAM_CONT;
-		tmp &= ~0x1;
-		/* Command Register File */
-		bcm53xxspi_write(b53spi, B53SPI_MSPI_CDRAM + 4 * i, tmp);
-	}
-
-	/* Set queue pointers */
-	bcm53xxspi_write(b53spi, B53SPI_MSPI_NEWQP, 0);
-	bcm53xxspi_write(b53spi, B53SPI_MSPI_ENDQP, len - 1);
-
-	if (cont)
-		bcm53xxspi_write(b53spi, B53SPI_MSPI_WRITE_LOCK, 1);
-
-	/* Start SPI transfer */
-	tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_SPCR2);
-	tmp |= B53SPI_MSPI_SPCR2_SPE;
-	if (cont)
-		tmp |= B53SPI_MSPI_SPCR2_CONT_AFTER_CMD;
-	bcm53xxspi_write(b53spi, B53SPI_MSPI_SPCR2, tmp);
-
-	/* Wait for SPI to finish */
-	bcm53xxspi_wait(b53spi, bcm53xxspi_calc_timeout(len));
-
-	if (!cont)
-		bcm53xxspi_write(b53spi, B53SPI_MSPI_WRITE_LOCK, 0);
-
-	for (i = 0; i < len; ++i) {
-		u16 reg = B53SPI_MSPI_RXRAM + 4 * (1 + i * 2);
-
-		/* Data stored in the transmit register file LSB */
-		r_buf[i] = (u8)bcm53xxspi_read(b53spi, reg);
-	}
-}
-
-static int bcm53xxspi_transfer_one(struct spi_master *master,
-				   struct spi_device *spi,
-				   struct spi_transfer *t)
-{
-	struct bcm53xxspi *b53spi = spi_master_get_devdata(master);
-	u8 *buf;
-	size_t left;
-
-	bcm53xxspi_disable_bspi(b53spi);
-
-	if (t->tx_buf) {
-		buf = (u8 *)t->tx_buf;
-		left = t->len;
-		while (left) {
-			size_t to_write = min_t(size_t, 16, left);
-			bool cont = !spi_transfer_is_last(master, t) ||
-				    left - to_write > 0;
-
-			bcm53xxspi_buf_write(b53spi, buf, to_write, cont);
-			left -= to_write;
-			buf += to_write;
-		}
-	}
-
-	if (t->rx_buf) {
-		buf = (u8 *)t->rx_buf;
-		left = t->len;
-		while (left) {
-			size_t to_read = min_t(size_t, 16, left);
-			bool cont = !spi_transfer_is_last(master, t) ||
-				    left - to_read > 0;
-
-			bcm53xxspi_buf_read(b53spi, buf, to_read, cont);
-			left -= to_read;
-			buf += to_read;
-		}
-	}
-
-	return 0;
-}
-
-static int bcm53xxspi_flash_read(struct spi_device *spi,
-				 struct spi_flash_read_message *msg)
-{
-	struct bcm53xxspi *b53spi = spi_master_get_devdata(spi->master);
-	int ret = 0;
-
-	if (msg->from + msg->len > BCM53XXSPI_FLASH_WINDOW)
-		return -EINVAL;
-
-	bcm53xxspi_enable_bspi(b53spi);
-	memcpy_fromio(msg->buf, b53spi->mmio_base + msg->from, msg->len);
-	msg->retlen = msg->len;
-
-	return ret;
-}
-
-/**************************************************
- * BCMA
- **************************************************/
-
-static const struct bcma_device_id bcm53xxspi_bcma_tbl[] = {
-	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_NS_QSPI, BCMA_ANY_REV, BCMA_ANY_CLASS),
-	{},
-};
-MODULE_DEVICE_TABLE(bcma, bcm53xxspi_bcma_tbl);
-
-static int bcm53xxspi_bcma_probe(struct bcma_device *core)
-{
-	struct device *dev = &core->dev;
-	struct bcm53xxspi *b53spi;
-	struct spi_master *master;
-	int err;
-
-	if (core->bus->drv_cc.core->id.rev != 42) {
-		pr_err("SPI on SoC with unsupported ChipCommon rev\n");
-		return -ENOTSUPP;
-	}
-
-	master = spi_alloc_master(dev, sizeof(*b53spi));
-	if (!master)
-		return -ENOMEM;
-
-	b53spi = spi_master_get_devdata(master);
-	b53spi->master = master;
-	b53spi->core = core;
-
-	if (core->addr_s[0])
-		b53spi->mmio_base = devm_ioremap(dev, core->addr_s[0],
-						 BCM53XXSPI_FLASH_WINDOW);
-	b53spi->bspi = true;
-	bcm53xxspi_disable_bspi(b53spi);
-
-	master->dev.of_node = dev->of_node;
-	master->transfer_one = bcm53xxspi_transfer_one;
-	if (b53spi->mmio_base)
-		master->spi_flash_read = bcm53xxspi_flash_read;
-
-	bcma_set_drvdata(core, b53spi);
-
-	err = devm_spi_register_master(dev, master);
-	if (err) {
-		spi_master_put(master);
-		bcma_set_drvdata(core, NULL);
-		return err;
-	}
-
-	return 0;
-}
-
-static struct bcma_driver bcm53xxspi_bcma_driver = {
-	.name		= KBUILD_MODNAME,
-	.id_table	= bcm53xxspi_bcma_tbl,
-	.probe		= bcm53xxspi_bcma_probe,
-};
-
-/**************************************************
- * Init & exit
- **************************************************/
-
-static int __init bcm53xxspi_module_init(void)
-{
-	int err = 0;
-
-	err = bcma_driver_register(&bcm53xxspi_bcma_driver);
-	if (err)
-		pr_err("Failed to register bcma driver: %d\n", err);
-
-	return err;
-}
-
-static void __exit bcm53xxspi_module_exit(void)
-{
-	bcma_driver_unregister(&bcm53xxspi_bcma_driver);
-}
-
-module_init(bcm53xxspi_module_init);
-module_exit(bcm53xxspi_module_exit);
-
-MODULE_DESCRIPTION("Broadcom BCM53xx SPI Controller driver");
-MODULE_AUTHOR("Rafał Miłecki <zajec5@gmail.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-bcm53xx.h b/drivers/spi/spi-bcm53xx.h
deleted file mode 100644
index 03e3442..0000000
--- a/drivers/spi/spi-bcm53xx.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef SPI_BCM53XX_H
-#define SPI_BCM53XX_H
-
-#define B53SPI_BSPI_REVISION_ID			0x000
-#define B53SPI_BSPI_SCRATCH			0x004
-#define B53SPI_BSPI_MAST_N_BOOT_CTRL		0x008
-#define B53SPI_BSPI_BUSY_STATUS			0x00c
-#define B53SPI_BSPI_INTR_STATUS			0x010
-#define B53SPI_BSPI_B0_STATUS			0x014
-#define B53SPI_BSPI_B0_CTRL			0x018
-#define B53SPI_BSPI_B1_STATUS			0x01c
-#define B53SPI_BSPI_B1_CTRL			0x020
-#define B53SPI_BSPI_STRAP_OVERRIDE_CTRL		0x024
-#define B53SPI_BSPI_FLEX_MODE_ENABLE		0x028
-#define B53SPI_BSPI_BITS_PER_CYCLE		0x02c
-#define B53SPI_BSPI_BITS_PER_PHASE		0x030
-#define B53SPI_BSPI_CMD_AND_MODE_BYTE		0x034
-#define B53SPI_BSPI_BSPI_FLASH_UPPER_ADDR_BYTE	0x038
-#define B53SPI_BSPI_BSPI_XOR_VALUE		0x03c
-#define B53SPI_BSPI_BSPI_XOR_ENABLE		0x040
-#define B53SPI_BSPI_BSPI_PIO_MODE_ENABLE	0x044
-#define B53SPI_BSPI_BSPI_PIO_IODIR		0x048
-#define B53SPI_BSPI_BSPI_PIO_DATA		0x04c
-
-/* RAF */
-#define B53SPI_RAF_START_ADDR			0x100
-#define B53SPI_RAF_NUM_WORDS			0x104
-#define B53SPI_RAF_CTRL				0x108
-#define B53SPI_RAF_FULLNESS			0x10c
-#define B53SPI_RAF_WATERMARK			0x110
-#define B53SPI_RAF_STATUS			0x114
-#define B53SPI_RAF_READ_DATA			0x118
-#define B53SPI_RAF_WORD_CNT			0x11c
-#define B53SPI_RAF_CURR_ADDR			0x120
-
-/* MSPI */
-#define B53SPI_MSPI_SPCR0_LSB			0x200
-#define B53SPI_MSPI_SPCR0_MSB			0x204
-#define B53SPI_MSPI_SPCR1_LSB			0x208
-#define B53SPI_MSPI_SPCR1_MSB			0x20c
-#define B53SPI_MSPI_NEWQP			0x210
-#define B53SPI_MSPI_ENDQP			0x214
-#define B53SPI_MSPI_SPCR2			0x218
-#define  B53SPI_MSPI_SPCR2_SPE			0x00000040
-#define  B53SPI_MSPI_SPCR2_CONT_AFTER_CMD	0x00000080
-#define B53SPI_MSPI_MSPI_STATUS			0x220
-#define  B53SPI_MSPI_MSPI_STATUS_SPIF		0x00000001
-#define B53SPI_MSPI_CPTQP			0x224
-#define B53SPI_MSPI_TXRAM			0x240 /* 32 registers, up to 0x2b8 */
-#define B53SPI_MSPI_RXRAM			0x2c0 /* 32 registers, up to 0x33c */
-#define B53SPI_MSPI_CDRAM			0x340 /* 16 registers, up to 0x37c */
-#define  B53SPI_CDRAM_PCS_PCS0			0x00000001
-#define  B53SPI_CDRAM_PCS_PCS1			0x00000002
-#define  B53SPI_CDRAM_PCS_PCS2			0x00000004
-#define  B53SPI_CDRAM_PCS_PCS3			0x00000008
-#define  B53SPI_CDRAM_PCS_DISABLE_ALL		0x0000000f
-#define  B53SPI_CDRAM_PCS_DSCK			0x00000010
-#define  B53SPI_CDRAM_BITSE			0x00000040
-#define  B53SPI_CDRAM_CONT			0x00000080
-#define B53SPI_MSPI_WRITE_LOCK			0x380
-#define B53SPI_MSPI_DISABLE_FLUSH_GEN		0x384
-
-/* Interrupt */
-#define B53SPI_INTR_RAF_LR_FULLNESS_REACHED	0x3a0
-#define B53SPI_INTR_RAF_LR_TRUNCATED		0x3a4
-#define B53SPI_INTR_RAF_LR_IMPATIENT		0x3a8
-#define B53SPI_INTR_RAF_LR_SESSION_DONE		0x3ac
-#define B53SPI_INTR_RAF_LR_OVERREAD		0x3b0
-#define B53SPI_INTR_MSPI_DONE			0x3b4
-#define B53SPI_INTR_MSPI_HALT_SET_TRANSACTION_DONE	0x3b8
-
-#endif /* SPI_BCM53XX_H */
diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c
index cbcba61..c23849f 100644
--- a/drivers/spi/spi-bcm63xx-hsspi.c
+++ b/drivers/spi/spi-bcm63xx-hsspi.c
@@ -352,22 +352,31 @@
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
+	ret = clk_prepare_enable(clk);
+	if (ret)
+		return ret;
+
 	rate = clk_get_rate(clk);
 	if (!rate) {
 		struct clk *pll_clk = devm_clk_get(dev, "pll");
 
-		if (IS_ERR(pll_clk))
-			return PTR_ERR(pll_clk);
+		if (IS_ERR(pll_clk)) {
+			ret = PTR_ERR(pll_clk);
+			goto out_disable_clk;
+		}
+
+		ret = clk_prepare_enable(pll_clk);
+		if (ret)
+			goto out_disable_clk;
 
 		rate = clk_get_rate(pll_clk);
-		if (!rate)
-			return -EINVAL;
+		clk_disable_unprepare(pll_clk);
+		if (!rate) {
+			ret = -EINVAL;
+			goto out_disable_clk;
+		}
 	}
 
-	ret = clk_prepare_enable(clk);
-	if (ret)
-		return ret;
-
 	master = spi_alloc_master(&pdev->dev, sizeof(*bs));
 	if (!master) {
 		ret = -ENOMEM;
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index 4a00163..f3dad6f 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -694,8 +694,7 @@
  */
 static int __maybe_unused cdns_spi_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct spi_master *master = platform_get_drvdata(pdev);
+	struct spi_master *master = dev_get_drvdata(dev);
 
 	return spi_master_suspend(master);
 }
@@ -710,8 +709,7 @@
  */
 static int __maybe_unused cdns_spi_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct spi_master *master = platform_get_drvdata(pdev);
+	struct spi_master *master = dev_get_drvdata(dev);
 	struct cdns_spi *xspi = spi_master_get_devdata(master);
 
 	cdns_spi_init_hw(xspi);
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index cb3c730..e6d5cc6 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -1,19 +1,8 @@
-/*
- * Freescale i.MX7ULP LPSPI driver
- *
- * Copyright 2016 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Freescale i.MX7ULP LPSPI driver
+//
+// Copyright 2016 Freescale Semiconductor, Inc.
 
 #include <linux/clk.h>
 #include <linux/completion.h>
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index a056ee8..866246f 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -1,22 +1,6 @@
-/*
- * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
- * Copyright (C) 2008 Juergen Beisert
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the
- * Free Software Foundation
- * 51 Franklin Street, Fifth Floor
- * Boston, MA  02110-1301, USA.
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+// Copyright (C) 2008 Juergen Beisert
 
 #include <linux/clk.h>
 #include <linux/completion.h>
diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
new file mode 100644
index 0000000..990770d
--- /dev/null
+++ b/drivers/spi/spi-mem.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018 Exceet Electronics GmbH
+ * Copyright (C) 2018 Bootlin
+ *
+ * Author: Boris Brezillon <boris.brezillon@bootlin.com>
+ */
+#include <linux/dmaengine.h>
+#include <linux/pm_runtime.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+#include "internals.h"
+
+/**
+ * spi_controller_dma_map_mem_op_data() - DMA-map the buffer attached to a
+ *					  memory operation
+ * @ctlr: the SPI controller requesting this dma_map()
+ * @op: the memory operation containing the buffer to map
+ * @sgt: a pointer to a non-initialized sg_table that will be filled by this
+ *	 function
+ *
+ * Some controllers might want to do DMA on the data buffer embedded in @op.
+ * This helper prepares everything for you and provides a ready-to-use
+ * sg_table. This function is not intended to be called from spi drivers.
+ * Only SPI controller drivers should use it.
+ * Note that the caller must ensure the memory region pointed by
+ * op->data.buf.{in,out} is DMA-able before calling this function.
+ *
+ * Return: 0 in case of success, a negative error code otherwise.
+ */
+int spi_controller_dma_map_mem_op_data(struct spi_controller *ctlr,
+				       const struct spi_mem_op *op,
+				       struct sg_table *sgt)
+{
+	struct device *dmadev;
+
+	if (!op->data.nbytes)
+		return -EINVAL;
+
+	if (op->data.dir == SPI_MEM_DATA_OUT && ctlr->dma_tx)
+		dmadev = ctlr->dma_tx->device->dev;
+	else if (op->data.dir == SPI_MEM_DATA_IN && ctlr->dma_rx)
+		dmadev = ctlr->dma_rx->device->dev;
+	else
+		dmadev = ctlr->dev.parent;
+
+	if (!dmadev)
+		return -EINVAL;
+
+	return spi_map_buf(ctlr, dmadev, sgt, op->data.buf.in, op->data.nbytes,
+			   op->data.dir == SPI_MEM_DATA_IN ?
+			   DMA_FROM_DEVICE : DMA_TO_DEVICE);
+}
+EXPORT_SYMBOL_GPL(spi_controller_dma_map_mem_op_data);
+
+/**
+ * spi_controller_dma_unmap_mem_op_data() - DMA-unmap the buffer attached to a
+ *					    memory operation
+ * @ctlr: the SPI controller requesting this dma_unmap()
+ * @op: the memory operation containing the buffer to unmap
+ * @sgt: a pointer to an sg_table previously initialized by
+ *	 spi_controller_dma_map_mem_op_data()
+ *
+ * Some controllers might want to do DMA on the data buffer embedded in @op.
+ * This helper prepares things so that the CPU can access the
+ * op->data.buf.{in,out} buffer again.
+ *
+ * This function is not intended to be called from SPI drivers. Only SPI
+ * controller drivers should use it.
+ *
+ * This function should be called after the DMA operation has finished and is
+ * only valid if the previous spi_controller_dma_map_mem_op_data() call
+ * returned 0.
+ *
+ * Return: 0 in case of success, a negative error code otherwise.
+ */
+void spi_controller_dma_unmap_mem_op_data(struct spi_controller *ctlr,
+					  const struct spi_mem_op *op,
+					  struct sg_table *sgt)
+{
+	struct device *dmadev;
+
+	if (!op->data.nbytes)
+		return;
+
+	if (op->data.dir == SPI_MEM_DATA_OUT && ctlr->dma_tx)
+		dmadev = ctlr->dma_tx->device->dev;
+	else if (op->data.dir == SPI_MEM_DATA_IN && ctlr->dma_rx)
+		dmadev = ctlr->dma_rx->device->dev;
+	else
+		dmadev = ctlr->dev.parent;
+
+	spi_unmap_buf(ctlr, dmadev, sgt,
+		      op->data.dir == SPI_MEM_DATA_IN ?
+		      DMA_FROM_DEVICE : DMA_TO_DEVICE);
+}
+EXPORT_SYMBOL_GPL(spi_controller_dma_unmap_mem_op_data);
+
+static int spi_check_buswidth_req(struct spi_mem *mem, u8 buswidth, bool tx)
+{
+	u32 mode = mem->spi->mode;
+
+	switch (buswidth) {
+	case 1:
+		return 0;
+
+	case 2:
+		if ((tx && (mode & (SPI_TX_DUAL | SPI_TX_QUAD))) ||
+		    (!tx && (mode & (SPI_RX_DUAL | SPI_RX_QUAD))))
+			return 0;
+
+		break;
+
+	case 4:
+		if ((tx && (mode & SPI_TX_QUAD)) ||
+		    (!tx && (mode & SPI_RX_QUAD)))
+			return 0;
+
+		break;
+
+	default:
+		break;
+	}
+
+	return -ENOTSUPP;
+}
+
+static bool spi_mem_default_supports_op(struct spi_mem *mem,
+					const struct spi_mem_op *op)
+{
+	if (spi_check_buswidth_req(mem, op->cmd.buswidth, true))
+		return false;
+
+	if (op->addr.nbytes &&
+	    spi_check_buswidth_req(mem, op->addr.buswidth, true))
+		return false;
+
+	if (op->dummy.nbytes &&
+	    spi_check_buswidth_req(mem, op->dummy.buswidth, true))
+		return false;
+
+	if (op->data.nbytes &&
+	    spi_check_buswidth_req(mem, op->data.buswidth,
+				   op->data.dir == SPI_MEM_DATA_OUT))
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(spi_mem_default_supports_op);
+
+/**
+ * spi_mem_supports_op() - Check if a memory device and the controller it is
+ *			   connected to support a specific memory operation
+ * @mem: the SPI memory
+ * @op: the memory operation to check
+ *
+ * Some controllers are only supporting Single or Dual IOs, others might only
+ * support specific opcodes, or it can even be that the controller and device
+ * both support Quad IOs but the hardware prevents you from using it because
+ * only 2 IO lines are connected.
+ *
+ * This function checks whether a specific operation is supported.
+ *
+ * Return: true if @op is supported, false otherwise.
+ */
+bool spi_mem_supports_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct spi_controller *ctlr = mem->spi->controller;
+
+	if (ctlr->mem_ops && ctlr->mem_ops->supports_op)
+		return ctlr->mem_ops->supports_op(mem, op);
+
+	return spi_mem_default_supports_op(mem, op);
+}
+EXPORT_SYMBOL_GPL(spi_mem_supports_op);
+
+/**
+ * spi_mem_exec_op() - Execute a memory operation
+ * @mem: the SPI memory
+ * @op: the memory operation to execute
+ *
+ * Executes a memory operation.
+ *
+ * This function first checks that @op is supported and then tries to execute
+ * it.
+ *
+ * Return: 0 in case of success, a negative error code otherwise.
+ */
+int spi_mem_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	unsigned int tmpbufsize, xferpos = 0, totalxferlen = 0;
+	struct spi_controller *ctlr = mem->spi->controller;
+	struct spi_transfer xfers[4] = { };
+	struct spi_message msg;
+	u8 *tmpbuf;
+	int ret;
+
+	if (!spi_mem_supports_op(mem, op))
+		return -ENOTSUPP;
+
+	if (ctlr->mem_ops) {
+		/*
+		 * Flush the message queue before executing our SPI memory
+		 * operation to prevent preemption of regular SPI transfers.
+		 */
+		spi_flush_queue(ctlr);
+
+		if (ctlr->auto_runtime_pm) {
+			ret = pm_runtime_get_sync(ctlr->dev.parent);
+			if (ret < 0) {
+				dev_err(&ctlr->dev,
+					"Failed to power device: %d\n",
+					ret);
+				return ret;
+			}
+		}
+
+		mutex_lock(&ctlr->bus_lock_mutex);
+		mutex_lock(&ctlr->io_mutex);
+		ret = ctlr->mem_ops->exec_op(mem, op);
+		mutex_unlock(&ctlr->io_mutex);
+		mutex_unlock(&ctlr->bus_lock_mutex);
+
+		if (ctlr->auto_runtime_pm)
+			pm_runtime_put(ctlr->dev.parent);
+
+		/*
+		 * Some controllers only optimize specific paths (typically the
+		 * read path) and expect the core to use the regular SPI
+		 * interface in other cases.
+		 */
+		if (!ret || ret != -ENOTSUPP)
+			return ret;
+	}
+
+	tmpbufsize = sizeof(op->cmd.opcode) + op->addr.nbytes +
+		     op->dummy.nbytes;
+
+	/*
+	 * Allocate a buffer to transmit the CMD, ADDR cycles with kmalloc() so
+	 * we're guaranteed that this buffer is DMA-able, as required by the
+	 * SPI layer.
+	 */
+	tmpbuf = kzalloc(tmpbufsize, GFP_KERNEL | GFP_DMA);
+	if (!tmpbuf)
+		return -ENOMEM;
+
+	spi_message_init(&msg);
+
+	tmpbuf[0] = op->cmd.opcode;
+	xfers[xferpos].tx_buf = tmpbuf;
+	xfers[xferpos].len = sizeof(op->cmd.opcode);
+	xfers[xferpos].tx_nbits = op->cmd.buswidth;
+	spi_message_add_tail(&xfers[xferpos], &msg);
+	xferpos++;
+	totalxferlen++;
+
+	if (op->addr.nbytes) {
+		int i;
+
+		for (i = 0; i < op->addr.nbytes; i++)
+			tmpbuf[i + 1] = op->addr.val >>
+					(8 * (op->addr.nbytes - i - 1));
+
+		xfers[xferpos].tx_buf = tmpbuf + 1;
+		xfers[xferpos].len = op->addr.nbytes;
+		xfers[xferpos].tx_nbits = op->addr.buswidth;
+		spi_message_add_tail(&xfers[xferpos], &msg);
+		xferpos++;
+		totalxferlen += op->addr.nbytes;
+	}
+
+	if (op->dummy.nbytes) {
+		memset(tmpbuf + op->addr.nbytes + 1, 0xff, op->dummy.nbytes);
+		xfers[xferpos].tx_buf = tmpbuf + op->addr.nbytes + 1;
+		xfers[xferpos].len = op->dummy.nbytes;
+		xfers[xferpos].tx_nbits = op->dummy.buswidth;
+		spi_message_add_tail(&xfers[xferpos], &msg);
+		xferpos++;
+		totalxferlen += op->dummy.nbytes;
+	}
+
+	if (op->data.nbytes) {
+		if (op->data.dir == SPI_MEM_DATA_IN) {
+			xfers[xferpos].rx_buf = op->data.buf.in;
+			xfers[xferpos].rx_nbits = op->data.buswidth;
+		} else {
+			xfers[xferpos].tx_buf = op->data.buf.out;
+			xfers[xferpos].tx_nbits = op->data.buswidth;
+		}
+
+		xfers[xferpos].len = op->data.nbytes;
+		spi_message_add_tail(&xfers[xferpos], &msg);
+		xferpos++;
+		totalxferlen += op->data.nbytes;
+	}
+
+	ret = spi_sync(mem->spi, &msg);
+
+	kfree(tmpbuf);
+
+	if (ret)
+		return ret;
+
+	if (msg.actual_length != totalxferlen)
+		return -EIO;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spi_mem_exec_op);
+
+/**
+ * spi_mem_adjust_op_size() - Adjust the data size of a SPI mem operation to
+ *			      match controller limitations
+ * @mem: the SPI memory
+ * @op: the operation to adjust
+ *
+ * Some controllers have FIFO limitations and must split a data transfer
+ * operation into multiple ones, others require a specific alignment for
+ * optimized accesses. This function allows SPI mem drivers to split a single
+ * operation into multiple sub-operations when required.
+ *
+ * Return: a negative error code if the controller can't properly adjust @op,
+ *	   0 otherwise. Note that @op->data.nbytes will be updated if @op
+ *	   can't be handled in a single step.
+ */
+int spi_mem_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+	struct spi_controller *ctlr = mem->spi->controller;
+
+	if (ctlr->mem_ops && ctlr->mem_ops->adjust_op_size)
+		return ctlr->mem_ops->adjust_op_size(mem, op);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spi_mem_adjust_op_size);
+
+static inline struct spi_mem_driver *to_spi_mem_drv(struct device_driver *drv)
+{
+	return container_of(drv, struct spi_mem_driver, spidrv.driver);
+}
+
+static int spi_mem_probe(struct spi_device *spi)
+{
+	struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver);
+	struct spi_mem *mem;
+
+	mem = devm_kzalloc(&spi->dev, sizeof(*mem), GFP_KERNEL);
+	if (!mem)
+		return -ENOMEM;
+
+	mem->spi = spi;
+	spi_set_drvdata(spi, mem);
+
+	return memdrv->probe(mem);
+}
+
+static int spi_mem_remove(struct spi_device *spi)
+{
+	struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver);
+	struct spi_mem *mem = spi_get_drvdata(spi);
+
+	if (memdrv->remove)
+		return memdrv->remove(mem);
+
+	return 0;
+}
+
+static void spi_mem_shutdown(struct spi_device *spi)
+{
+	struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver);
+	struct spi_mem *mem = spi_get_drvdata(spi);
+
+	if (memdrv->shutdown)
+		memdrv->shutdown(mem);
+}
+
+/**
+ * spi_mem_driver_register_with_owner() - Register a SPI memory driver
+ * @memdrv: the SPI memory driver to register
+ * @owner: the owner of this driver
+ *
+ * Registers a SPI memory driver.
+ *
+ * Return: 0 in case of success, a negative error core otherwise.
+ */
+
+int spi_mem_driver_register_with_owner(struct spi_mem_driver *memdrv,
+				       struct module *owner)
+{
+	memdrv->spidrv.probe = spi_mem_probe;
+	memdrv->spidrv.remove = spi_mem_remove;
+	memdrv->spidrv.shutdown = spi_mem_shutdown;
+
+	return __spi_register_driver(owner, &memdrv->spidrv);
+}
+EXPORT_SYMBOL_GPL(spi_mem_driver_register_with_owner);
+
+/**
+ * spi_mem_driver_unregister_with_owner() - Unregister a SPI memory driver
+ * @memdrv: the SPI memory driver to unregister
+ *
+ * Unregisters a SPI memory driver.
+ */
+void spi_mem_driver_unregister(struct spi_mem_driver *memdrv)
+{
+	spi_unregister_driver(&memdrv->spidrv);
+}
+EXPORT_SYMBOL_GPL(spi_mem_driver_unregister);
diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
index 5c82910..7fe4488 100644
--- a/drivers/spi/spi-meson-spicc.c
+++ b/drivers/spi/spi-meson-spicc.c
@@ -574,10 +574,15 @@
 		master->max_speed_hz = rate >> 2;
 
 	ret = devm_spi_register_master(&pdev->dev, master);
-	if (!ret)
-		return 0;
+	if (ret) {
+		dev_err(&pdev->dev, "spi master registration failed\n");
+		goto out_clk;
+	}
 
-	dev_err(&pdev->dev, "spi master registration failed\n");
+	return 0;
+
+out_clk:
+	clk_disable_unprepare(spicc->core);
 
 out_master:
 	spi_master_put(master);
diff --git a/drivers/spi/spi-mpc52xx.c b/drivers/spi/spi-mpc52xx.c
index e8b59ce..0e55784 100644
--- a/drivers/spi/spi-mpc52xx.c
+++ b/drivers/spi/spi-mpc52xx.c
@@ -447,7 +447,7 @@
 
 		for (i = 0; i < ms->gpio_cs_count; i++) {
 			gpio_cs = of_get_gpio(op->dev.of_node, i);
-			if (gpio_cs < 0) {
+			if (!gpio_is_valid(gpio_cs)) {
 				dev_err(&op->dev,
 					"could not parse the gpio field in oftree\n");
 				rc = -ENODEV;
diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c
index 3d216b9..6ac95a2 100644
--- a/drivers/spi/spi-mxs.c
+++ b/drivers/spi/spi-mxs.c
@@ -1,32 +1,22 @@
-/*
- * Freescale MXS SPI master driver
- *
- * Copyright 2012 DENX Software Engineering, GmbH.
- * Copyright 2012 Freescale Semiconductor, Inc.
- * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
- *
- * Rework and transition to new API by:
- * Marek Vasut <marex@denx.de>
- *
- * Based on previous attempt by:
- * Fabio Estevam <fabio.estevam@freescale.com>
- *
- * Based on code from U-Boot bootloader by:
- * Marek Vasut <marex@denx.de>
- *
- * Based on spi-stmp.c, which is:
- * Author: Dmitry Pervushin <dimka@embeddedalley.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Freescale MXS SPI master driver
+//
+// Copyright 2012 DENX Software Engineering, GmbH.
+// Copyright 2012 Freescale Semiconductor, Inc.
+// Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved.
+//
+// Rework and transition to new API by:
+// Marek Vasut <marex@denx.de>
+//
+// Based on previous attempt by:
+// Fabio Estevam <fabio.estevam@freescale.com>
+//
+// Based on code from U-Boot bootloader by:
+// Marek Vasut <marex@denx.de>
+//
+// Based on spi-stmp.c, which is:
+// Author: Dmitry Pervushin <dimka@embeddedalley.com>
 
 #include <linux/kernel.h>
 #include <linux/ioport.h>
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 9bf64e6..6c628a5 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -255,6 +255,7 @@
 	if (spi->controller_state) {
 		int err = pm_runtime_get_sync(mcspi->dev);
 		if (err < 0) {
+			pm_runtime_put_noidle(mcspi->dev);
 			dev_err(mcspi->dev, "failed to get sync: %d\n", err);
 			return;
 		}
@@ -350,20 +351,6 @@
 	mcspi->fifo_depth = 0;
 }
 
-static void omap2_mcspi_restore_ctx(struct omap2_mcspi *mcspi)
-{
-	struct spi_master	*spi_cntrl = mcspi->master;
-	struct omap2_mcspi_regs	*ctx = &mcspi->ctx;
-	struct omap2_mcspi_cs	*cs;
-
-	/* McSPI: context restore */
-	mcspi_write_reg(spi_cntrl, OMAP2_MCSPI_MODULCTRL, ctx->modulctrl);
-	mcspi_write_reg(spi_cntrl, OMAP2_MCSPI_WAKEUPENABLE, ctx->wakeupenable);
-
-	list_for_each_entry(cs, &ctx->cs, node)
-		writel_relaxed(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
-}
-
 static int mcspi_wait_for_reg_bit(void __iomem *reg, unsigned long bit)
 {
 	unsigned long timeout;
@@ -1065,8 +1052,11 @@
 	}
 
 	ret = pm_runtime_get_sync(mcspi->dev);
-	if (ret < 0)
+	if (ret < 0) {
+		pm_runtime_put_noidle(mcspi->dev);
+
 		return ret;
+	}
 
 	ret = omap2_mcspi_setup_transfer(spi, NULL);
 	pm_runtime_mark_last_busy(mcspi->dev);
@@ -1284,8 +1274,11 @@
 	int			ret = 0;
 
 	ret = pm_runtime_get_sync(mcspi->dev);
-	if (ret < 0)
+	if (ret < 0) {
+		pm_runtime_put_noidle(mcspi->dev);
+
 		return ret;
+	}
 
 	mcspi_write_reg(master, OMAP2_MCSPI_WAKEUPENABLE,
 			OMAP2_MCSPI_WAKEUPENABLE_WKEN);
@@ -1297,14 +1290,39 @@
 	return 0;
 }
 
+/*
+ * When SPI wake up from off-mode, CS is in activate state. If it was in
+ * inactive state when driver was suspend, then force it to inactive state at
+ * wake up.
+ */
 static int omap_mcspi_runtime_resume(struct device *dev)
 {
-	struct omap2_mcspi	*mcspi;
-	struct spi_master	*master;
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
+	struct omap2_mcspi_regs *ctx = &mcspi->ctx;
+	struct omap2_mcspi_cs *cs;
 
-	master = dev_get_drvdata(dev);
-	mcspi = spi_master_get_devdata(master);
-	omap2_mcspi_restore_ctx(mcspi);
+	/* McSPI: context restore */
+	mcspi_write_reg(master, OMAP2_MCSPI_MODULCTRL, ctx->modulctrl);
+	mcspi_write_reg(master, OMAP2_MCSPI_WAKEUPENABLE, ctx->wakeupenable);
+
+	list_for_each_entry(cs, &ctx->cs, node) {
+		/*
+		 * We need to toggle CS state for OMAP take this
+		 * change in account.
+		 */
+		if ((cs->chconf0 & OMAP2_MCSPI_CHCONF_FORCE) == 0) {
+			cs->chconf0 |= OMAP2_MCSPI_CHCONF_FORCE;
+			writel_relaxed(cs->chconf0,
+				       cs->base + OMAP2_MCSPI_CHCONF0);
+			cs->chconf0 &= ~OMAP2_MCSPI_CHCONF_FORCE;
+			writel_relaxed(cs->chconf0,
+				       cs->base + OMAP2_MCSPI_CHCONF0);
+		} else {
+			writel_relaxed(cs->chconf0,
+				       cs->base + OMAP2_MCSPI_CHCONF0);
+		}
+	}
 
 	return 0;
 }
@@ -1447,50 +1465,33 @@
 MODULE_ALIAS("platform:omap2_mcspi");
 
 #ifdef	CONFIG_SUSPEND
-/*
- * When SPI wake up from off-mode, CS is in activate state. If it was in
- * unactive state when driver was suspend, then force it to unactive state at
- * wake up.
- */
-static int omap2_mcspi_resume(struct device *dev)
-{
-	struct spi_master	*master = dev_get_drvdata(dev);
-	struct omap2_mcspi	*mcspi = spi_master_get_devdata(master);
-	struct omap2_mcspi_regs	*ctx = &mcspi->ctx;
-	struct omap2_mcspi_cs	*cs;
-
-	pm_runtime_get_sync(mcspi->dev);
-	list_for_each_entry(cs, &ctx->cs, node) {
-		if ((cs->chconf0 & OMAP2_MCSPI_CHCONF_FORCE) == 0) {
-			/*
-			 * We need to toggle CS state for OMAP take this
-			 * change in account.
-			 */
-			cs->chconf0 |= OMAP2_MCSPI_CHCONF_FORCE;
-			writel_relaxed(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
-			cs->chconf0 &= ~OMAP2_MCSPI_CHCONF_FORCE;
-			writel_relaxed(cs->chconf0, cs->base + OMAP2_MCSPI_CHCONF0);
-		}
-	}
-	pm_runtime_mark_last_busy(mcspi->dev);
-	pm_runtime_put_autosuspend(mcspi->dev);
-
-	return pinctrl_pm_select_default_state(dev);
-}
-
-static int omap2_mcspi_suspend(struct device *dev)
+static int omap2_mcspi_suspend_noirq(struct device *dev)
 {
 	return pinctrl_pm_select_sleep_state(dev);
 }
 
+static int omap2_mcspi_resume_noirq(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
+	int error;
+
+	error = pinctrl_pm_select_default_state(dev);
+	if (error)
+		dev_warn(mcspi->dev, "%s: failed to set pins: %i\n",
+			 __func__, error);
+
+	return 0;
+}
+
 #else
-#define omap2_mcspi_suspend	NULL
-#define	omap2_mcspi_resume	NULL
+#define omap2_mcspi_suspend_noirq	NULL
+#define omap2_mcspi_resume_noirq	NULL
 #endif
 
 static const struct dev_pm_ops omap2_mcspi_pm_ops = {
-	.resume = omap2_mcspi_resume,
-	.suspend = omap2_mcspi_suspend,
+	.suspend_noirq = omap2_mcspi_suspend_noirq,
+	.resume_noirq = omap2_mcspi_resume_noirq,
 	.runtime_resume	= omap_mcspi_runtime_resume,
 };
 
diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index 3d7f660..2fa7f4b 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -51,19 +51,15 @@
 		if (!pxa25x_ssp_comp(drv_data))
 			pxa2xx_spi_write(drv_data, SSTO, 0);
 
-		if (!error) {
-			msg->actual_length += drv_data->len;
-			msg->state = pxa2xx_spi_next_transfer(drv_data);
-		} else {
+		if (error) {
 			/* In case we got an error we disable the SSP now */
 			pxa2xx_spi_write(drv_data, SSCR0,
 					 pxa2xx_spi_read(drv_data, SSCR0)
 					 & ~SSCR0_SSE);
-
-			msg->state = ERROR_STATE;
+			msg->status = -EIO;
 		}
 
-		tasklet_schedule(&drv_data->pump_transfers);
+		spi_finalize_current_transfer(drv_data->master);
 	}
 }
 
@@ -74,11 +70,11 @@
 
 static struct dma_async_tx_descriptor *
 pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
-			   enum dma_transfer_direction dir)
+			   enum dma_transfer_direction dir,
+			   struct spi_transfer *xfer)
 {
 	struct chip_data *chip =
 		spi_get_ctldata(drv_data->master->cur_msg->spi);
-	struct spi_transfer *xfer = drv_data->cur_transfer;
 	enum dma_slave_buswidth width;
 	struct dma_slave_config cfg;
 	struct dma_chan *chan;
@@ -144,12 +140,13 @@
 	return IRQ_NONE;
 }
 
-int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, u32 dma_burst)
+int pxa2xx_spi_dma_prepare(struct driver_data *drv_data,
+			   struct spi_transfer *xfer)
 {
 	struct dma_async_tx_descriptor *tx_desc, *rx_desc;
 	int err;
 
-	tx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_MEM_TO_DEV);
+	tx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_MEM_TO_DEV, xfer);
 	if (!tx_desc) {
 		dev_err(&drv_data->pdev->dev,
 			"failed to get DMA TX descriptor\n");
@@ -157,7 +154,7 @@
 		goto err_tx;
 	}
 
-	rx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_DEV_TO_MEM);
+	rx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_DEV_TO_MEM, xfer);
 	if (!rx_desc) {
 		dev_err(&drv_data->pdev->dev,
 			"failed to get DMA RX descriptor\n");
@@ -187,6 +184,13 @@
 	atomic_set(&drv_data->dma_running, 1);
 }
 
+void pxa2xx_spi_dma_stop(struct driver_data *drv_data)
+{
+	atomic_set(&drv_data->dma_running, 0);
+	dmaengine_terminate_sync(drv_data->master->dma_rx);
+	dmaengine_terminate_sync(drv_data->master->dma_tx);
+}
+
 int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
 {
 	struct pxa2xx_spi_master *pdata = drv_data->master_info;
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 82dcb88..0b2d60d 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -340,9 +340,11 @@
 	}
 }
 
-static void lpss_ssp_select_cs(struct driver_data *drv_data,
+static void lpss_ssp_select_cs(struct spi_device *spi,
 			       const struct lpss_config *config)
 {
+	struct driver_data *drv_data =
+		spi_controller_get_devdata(spi->controller);
 	u32 value, cs;
 
 	if (!config->cs_sel_mask)
@@ -350,7 +352,7 @@
 
 	value = __lpss_ssp_read_priv(drv_data, config->reg_cs_ctrl);
 
-	cs = drv_data->master->cur_msg->spi->chip_select;
+	cs = spi->chip_select;
 	cs <<= config->cs_sel_shift;
 	if (cs != (value & config->cs_sel_mask)) {
 		/*
@@ -369,15 +371,17 @@
 	}
 }
 
-static void lpss_ssp_cs_control(struct driver_data *drv_data, bool enable)
+static void lpss_ssp_cs_control(struct spi_device *spi, bool enable)
 {
+	struct driver_data *drv_data =
+		spi_controller_get_devdata(spi->controller);
 	const struct lpss_config *config;
 	u32 value;
 
 	config = lpss_get_config(drv_data);
 
 	if (enable)
-		lpss_ssp_select_cs(drv_data, config);
+		lpss_ssp_select_cs(spi, config);
 
 	value = __lpss_ssp_read_priv(drv_data, config->reg_cs_ctrl);
 	if (enable)
@@ -387,10 +391,11 @@
 	__lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value);
 }
 
-static void cs_assert(struct driver_data *drv_data)
+static void cs_assert(struct spi_device *spi)
 {
-	struct chip_data *chip =
-		spi_get_ctldata(drv_data->master->cur_msg->spi);
+	struct chip_data *chip = spi_get_ctldata(spi);
+	struct driver_data *drv_data =
+		spi_controller_get_devdata(spi->controller);
 
 	if (drv_data->ssp_type == CE4100_SSP) {
 		pxa2xx_spi_write(drv_data, SSSR, chip->frm);
@@ -408,13 +413,14 @@
 	}
 
 	if (is_lpss_ssp(drv_data))
-		lpss_ssp_cs_control(drv_data, true);
+		lpss_ssp_cs_control(spi, true);
 }
 
-static void cs_deassert(struct driver_data *drv_data)
+static void cs_deassert(struct spi_device *spi)
 {
-	struct chip_data *chip =
-		spi_get_ctldata(drv_data->master->cur_msg->spi);
+	struct chip_data *chip = spi_get_ctldata(spi);
+	struct driver_data *drv_data =
+		spi_controller_get_devdata(spi->controller);
 	unsigned long timeout;
 
 	if (drv_data->ssp_type == CE4100_SSP)
@@ -437,7 +443,15 @@
 	}
 
 	if (is_lpss_ssp(drv_data))
-		lpss_ssp_cs_control(drv_data, false);
+		lpss_ssp_cs_control(spi, false);
+}
+
+static void pxa2xx_spi_set_cs(struct spi_device *spi, bool level)
+{
+	if (level)
+		cs_deassert(spi);
+	else
+		cs_assert(spi);
 }
 
 int pxa2xx_spi_flush(struct driver_data *drv_data)
@@ -549,70 +563,6 @@
 	return drv_data->rx == drv_data->rx_end;
 }
 
-void *pxa2xx_spi_next_transfer(struct driver_data *drv_data)
-{
-	struct spi_message *msg = drv_data->master->cur_msg;
-	struct spi_transfer *trans = drv_data->cur_transfer;
-
-	/* Move to next transfer */
-	if (trans->transfer_list.next != &msg->transfers) {
-		drv_data->cur_transfer =
-			list_entry(trans->transfer_list.next,
-					struct spi_transfer,
-					transfer_list);
-		return RUNNING_STATE;
-	} else
-		return DONE_STATE;
-}
-
-/* caller already set message->status; dma and pio irqs are blocked */
-static void giveback(struct driver_data *drv_data)
-{
-	struct spi_transfer* last_transfer;
-	struct spi_message *msg;
-
-	msg = drv_data->master->cur_msg;
-	drv_data->cur_transfer = NULL;
-
-	last_transfer = list_last_entry(&msg->transfers, struct spi_transfer,
-					transfer_list);
-
-	/* Delay if requested before any change in chip select */
-	if (last_transfer->delay_usecs)
-		udelay(last_transfer->delay_usecs);
-
-	/* Drop chip select UNLESS cs_change is true or we are returning
-	 * a message with an error, or next message is for another chip
-	 */
-	if (!last_transfer->cs_change)
-		cs_deassert(drv_data);
-	else {
-		struct spi_message *next_msg;
-
-		/* Holding of cs was hinted, but we need to make sure
-		 * the next message is for the same chip.  Don't waste
-		 * time with the following tests unless this was hinted.
-		 *
-		 * We cannot postpone this until pump_messages, because
-		 * after calling msg->complete (below) the driver that
-		 * sent the current message could be unloaded, which
-		 * could invalidate the cs_control() callback...
-		 */
-
-		/* get a pointer to the next message, if any */
-		next_msg = spi_get_next_queued_message(drv_data->master);
-
-		/* see if the next and current messages point
-		 * to the same chip
-		 */
-		if ((next_msg && next_msg->spi != msg->spi) ||
-		    msg->state == ERROR_STATE)
-			cs_deassert(drv_data);
-	}
-
-	spi_finalize_current_message(drv_data->master);
-}
-
 static void reset_sccr1(struct driver_data *drv_data)
 {
 	struct chip_data *chip =
@@ -648,8 +598,8 @@
 
 	dev_err(&drv_data->pdev->dev, "%s\n", msg);
 
-	drv_data->master->cur_msg->state = ERROR_STATE;
-	tasklet_schedule(&drv_data->pump_transfers);
+	drv_data->master->cur_msg->status = -EIO;
+	spi_finalize_current_transfer(drv_data->master);
 }
 
 static void int_transfer_complete(struct driver_data *drv_data)
@@ -660,19 +610,7 @@
 	if (!pxa25x_ssp_comp(drv_data))
 		pxa2xx_spi_write(drv_data, SSTO, 0);
 
-	/* Update total byte transferred return count actual bytes read */
-	drv_data->master->cur_msg->actual_length += drv_data->len -
-				(drv_data->rx_end - drv_data->rx);
-
-	/* Transfer delays and chip select release are
-	 * handled in pump_transfers or giveback
-	 */
-
-	/* Move to next transfer */
-	drv_data->master->cur_msg->state = pxa2xx_spi_next_transfer(drv_data);
-
-	/* Schedule transfer tasklet */
-	tasklet_schedule(&drv_data->pump_transfers);
+	spi_finalize_current_transfer(drv_data->master);
 }
 
 static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
@@ -973,17 +911,16 @@
 	       xfer->len >= chip->dma_burst_size;
 }
 
-static void pump_transfers(unsigned long data)
+static int pxa2xx_spi_transfer_one(struct spi_controller *master,
+				   struct spi_device *spi,
+				   struct spi_transfer *transfer)
 {
-	struct driver_data *drv_data = (struct driver_data *)data;
-	struct spi_controller *master = drv_data->master;
+	struct driver_data *drv_data = spi_controller_get_devdata(master);
 	struct spi_message *message = master->cur_msg;
 	struct chip_data *chip = spi_get_ctldata(message->spi);
 	u32 dma_thresh = chip->dma_threshold;
 	u32 dma_burst = chip->dma_burst_size;
 	u32 change_mask = pxa2xx_spi_get_ssrc1_change_mask(drv_data);
-	struct spi_transfer *transfer;
-	struct spi_transfer *previous;
 	u32 clk_div;
 	u8 bits;
 	u32 speed;
@@ -992,36 +929,6 @@
 	int err;
 	int dma_mapped;
 
-	/* Get current state information */
-	transfer = drv_data->cur_transfer;
-
-	/* Handle for abort */
-	if (message->state == ERROR_STATE) {
-		message->status = -EIO;
-		giveback(drv_data);
-		return;
-	}
-
-	/* Handle end of message */
-	if (message->state == DONE_STATE) {
-		message->status = 0;
-		giveback(drv_data);
-		return;
-	}
-
-	/* Delay if requested at end of transfer before CS change */
-	if (message->state == RUNNING_STATE) {
-		previous = list_entry(transfer->transfer_list.prev,
-					struct spi_transfer,
-					transfer_list);
-		if (previous->delay_usecs)
-			udelay(previous->delay_usecs);
-
-		/* Drop chip select only if cs_change is requested */
-		if (previous->cs_change)
-			cs_deassert(drv_data);
-	}
-
 	/* Check if we can DMA this transfer */
 	if (transfer->len > MAX_DMA_LEN && chip->enable_dma) {
 
@@ -1029,34 +936,27 @@
 		if (message->is_dma_mapped
 				|| transfer->rx_dma || transfer->tx_dma) {
 			dev_err(&drv_data->pdev->dev,
-				"pump_transfers: mapped transfer length of "
-				"%u is greater than %d\n",
+				"Mapped transfer length of %u is greater than %d\n",
 				transfer->len, MAX_DMA_LEN);
-			message->status = -EINVAL;
-			giveback(drv_data);
-			return;
+			return -EINVAL;
 		}
 
 		/* warn ... we force this to PIO mode */
 		dev_warn_ratelimited(&message->spi->dev,
-				     "pump_transfers: DMA disabled for transfer length %ld "
-				     "greater than %d\n",
-				     (long)drv_data->len, MAX_DMA_LEN);
+				     "DMA disabled for transfer length %ld greater than %d\n",
+				     (long)transfer->len, MAX_DMA_LEN);
 	}
 
 	/* Setup the transfer state based on the type of transfer */
 	if (pxa2xx_spi_flush(drv_data) == 0) {
-		dev_err(&drv_data->pdev->dev, "pump_transfers: flush failed\n");
-		message->status = -EIO;
-		giveback(drv_data);
-		return;
+		dev_err(&drv_data->pdev->dev, "Flush failed\n");
+		return -EIO;
 	}
 	drv_data->n_bytes = chip->n_bytes;
 	drv_data->tx = (void *)transfer->tx_buf;
 	drv_data->tx_end = drv_data->tx + transfer->len;
 	drv_data->rx = transfer->rx_buf;
 	drv_data->rx_end = drv_data->rx + transfer->len;
-	drv_data->len = transfer->len;
 	drv_data->write = drv_data->tx ? chip->write : null_writer;
 	drv_data->read = drv_data->rx ? chip->read : null_reader;
 
@@ -1095,11 +995,9 @@
 						bits, &dma_burst,
 						&dma_thresh))
 			dev_warn_ratelimited(&message->spi->dev,
-					     "pump_transfers: DMA burst size reduced to match bits_per_word\n");
+					     "DMA burst size reduced to match bits_per_word\n");
 	}
 
-	message->state = RUNNING_STATE;
-
 	dma_mapped = master->can_dma &&
 		     master->can_dma(master, message->spi, transfer) &&
 		     master->cur_msg_mapped;
@@ -1108,12 +1006,9 @@
 		/* Ensure we have the correct interrupt handler */
 		drv_data->transfer_handler = pxa2xx_spi_dma_transfer;
 
-		err = pxa2xx_spi_dma_prepare(drv_data, dma_burst);
-		if (err) {
-			message->status = err;
-			giveback(drv_data);
-			return;
-		}
+		err = pxa2xx_spi_dma_prepare(drv_data, transfer);
+		if (err)
+			return err;
 
 		/* Clear status and start DMA engine */
 		cr1 = chip->cr1 | dma_thresh | drv_data->dma_cr1;
@@ -1175,27 +1070,40 @@
 			pxa2xx_spi_write(drv_data, SSTO, chip->timeout);
 	}
 
-	cs_assert(drv_data);
-
-	/* after chip select, release the data by enabling service
-	 * requests and interrupts, without changing any mode bits */
+	/*
+	 * Release the data by enabling service requests and interrupts,
+	 * without changing any mode bits
+	 */
 	pxa2xx_spi_write(drv_data, SSCR1, cr1);
+
+	return 1;
 }
 
-static int pxa2xx_spi_transfer_one_message(struct spi_controller *master,
-					   struct spi_message *msg)
+static void pxa2xx_spi_handle_err(struct spi_controller *master,
+				 struct spi_message *msg)
 {
 	struct driver_data *drv_data = spi_controller_get_devdata(master);
 
-	/* Initial message state*/
-	msg->state = START_STATE;
-	drv_data->cur_transfer = list_entry(msg->transfers.next,
-						struct spi_transfer,
-						transfer_list);
+	/* Disable the SSP */
+	pxa2xx_spi_write(drv_data, SSCR0,
+			 pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE);
+	/* Clear and disable interrupts and service requests */
+	write_SSSR_CS(drv_data, drv_data->clear_sr);
+	pxa2xx_spi_write(drv_data, SSCR1,
+			 pxa2xx_spi_read(drv_data, SSCR1)
+			 & ~(drv_data->int_cr1 | drv_data->dma_cr1));
+	if (!pxa25x_ssp_comp(drv_data))
+		pxa2xx_spi_write(drv_data, SSTO, 0);
 
-	/* Mark as busy and launch transfers */
-	tasklet_schedule(&drv_data->pump_transfers);
-	return 0;
+	/*
+	 * Stop the DMA if running. Note DMA callback handler may have unset
+	 * the dma_running already, which is fine as stopping is not needed
+	 * then but we shouldn't rely this flag for anything else than
+	 * stopping. For instance to differentiate between PIO and DMA
+	 * transfers.
+	 */
+	if (atomic_read(&drv_data->dma_running))
+		pxa2xx_spi_dma_stop(drv_data);
 }
 
 static int pxa2xx_spi_unprepare_transfer(struct spi_controller *master)
@@ -1651,7 +1559,9 @@
 	master->dma_alignment = DMA_ALIGNMENT;
 	master->cleanup = cleanup;
 	master->setup = setup;
-	master->transfer_one_message = pxa2xx_spi_transfer_one_message;
+	master->set_cs = pxa2xx_spi_set_cs;
+	master->transfer_one = pxa2xx_spi_transfer_one;
+	master->handle_err = pxa2xx_spi_handle_err;
 	master->unprepare_transfer_hardware = pxa2xx_spi_unprepare_transfer;
 	master->fw_translate_cs = pxa2xx_spi_fw_translate_cs;
 	master->auto_runtime_pm = true;
@@ -1702,7 +1612,9 @@
 	}
 
 	/* Enable SOC clock */
-	clk_prepare_enable(ssp->clk);
+	status = clk_prepare_enable(ssp->clk);
+	if (status)
+		goto out_error_dma_irq_alloc;
 
 	master->max_speed_hz = clk_get_rate(ssp->clk);
 
@@ -1787,9 +1699,6 @@
 		}
 	}
 
-	tasklet_init(&drv_data->pump_transfers, pump_transfers,
-		     (unsigned long)drv_data);
-
 	pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_set_active(&pdev->dev);
@@ -1809,6 +1718,8 @@
 	pm_runtime_put_noidle(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	clk_disable_unprepare(ssp->clk);
+
+out_error_dma_irq_alloc:
 	pxa2xx_spi_dma_release(drv_data);
 	free_irq(ssp->irq, drv_data);
 
@@ -1882,8 +1793,11 @@
 	int status;
 
 	/* Enable the SSP clock */
-	if (!pm_runtime_suspended(dev))
-		clk_prepare_enable(ssp->clk);
+	if (!pm_runtime_suspended(dev)) {
+		status = clk_prepare_enable(ssp->clk);
+		if (status)
+			return status;
+	}
 
 	/* Restore LPSS private register bits */
 	if (is_lpss_ssp(drv_data))
@@ -1912,9 +1826,10 @@
 static int pxa2xx_spi_runtime_resume(struct device *dev)
 {
 	struct driver_data *drv_data = dev_get_drvdata(dev);
+	int status;
 
-	clk_prepare_enable(drv_data->ssp->clk);
-	return 0;
+	status = clk_prepare_enable(drv_data->ssp->clk);
+	return status;
 }
 #endif
 
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 0ae7def..513c53a 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -46,15 +46,10 @@
 	u32 clear_sr;
 	u32 mask_sr;
 
-	/* Message Transfer pump */
-	struct tasklet_struct pump_transfers;
-
 	/* DMA engine support */
 	atomic_t dma_running;
 
-	/* Current message transfer state info */
-	struct spi_transfer *cur_transfer;
-	size_t len;
+	/* Current transfer state info */
 	void *tx;
 	void *tx_end;
 	void *rx;
@@ -104,11 +99,6 @@
 	__raw_writel(val, drv_data->ioaddr + reg);
 }
 
-#define START_STATE ((void *)0)
-#define RUNNING_STATE ((void *)1)
-#define DONE_STATE ((void *)2)
-#define ERROR_STATE ((void *)-1)
-
 #define DMA_ALIGNMENT		8
 
 static inline int pxa25x_ssp_comp(struct driver_data *drv_data)
@@ -133,14 +123,15 @@
 }
 
 extern int pxa2xx_spi_flush(struct driver_data *drv_data);
-extern void *pxa2xx_spi_next_transfer(struct driver_data *drv_data);
 
 #define MAX_DMA_LEN		SZ_64K
 #define DEFAULT_DMA_CR1		(SSCR1_TSRE | SSCR1_RSRE | SSCR1_TRAIL)
 
 extern irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data);
-extern int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, u32 dma_burst);
+extern int pxa2xx_spi_dma_prepare(struct driver_data *drv_data,
+				  struct spi_transfer *xfer);
 extern void pxa2xx_spi_dma_start(struct driver_data *drv_data);
+extern void pxa2xx_spi_dma_stop(struct driver_data *drv_data);
 extern int pxa2xx_spi_dma_setup(struct driver_data *drv_data);
 extern void pxa2xx_spi_dma_release(struct driver_data *drv_data);
 extern int pxa2xx_spi_set_dma_burst_and_threshold(struct chip_data *chip,
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index baa3a9f..7b7151e 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -28,15 +28,15 @@
 
 #define S3C64XX_SPI_CH_CFG		0x00
 #define S3C64XX_SPI_CLK_CFG		0x04
-#define S3C64XX_SPI_MODE_CFG	0x08
-#define S3C64XX_SPI_SLAVE_SEL	0x0C
+#define S3C64XX_SPI_MODE_CFG		0x08
+#define S3C64XX_SPI_SLAVE_SEL		0x0C
 #define S3C64XX_SPI_INT_EN		0x10
 #define S3C64XX_SPI_STATUS		0x14
 #define S3C64XX_SPI_TX_DATA		0x18
 #define S3C64XX_SPI_RX_DATA		0x1C
-#define S3C64XX_SPI_PACKET_CNT	0x20
-#define S3C64XX_SPI_PENDING_CLR	0x24
-#define S3C64XX_SPI_SWAP_CFG	0x28
+#define S3C64XX_SPI_PACKET_CNT		0x20
+#define S3C64XX_SPI_PENDING_CLR		0x24
+#define S3C64XX_SPI_SWAP_CFG		0x28
 #define S3C64XX_SPI_FB_CLK		0x2C
 
 #define S3C64XX_SPI_CH_HS_EN		(1<<6)	/* High Speed Enable */
@@ -77,9 +77,9 @@
 #define S3C64XX_SPI_INT_TX_FIFORDY_EN		(1<<0)
 
 #define S3C64XX_SPI_ST_RX_OVERRUN_ERR		(1<<5)
-#define S3C64XX_SPI_ST_RX_UNDERRUN_ERR	(1<<4)
+#define S3C64XX_SPI_ST_RX_UNDERRUN_ERR		(1<<4)
 #define S3C64XX_SPI_ST_TX_OVERRUN_ERR		(1<<3)
-#define S3C64XX_SPI_ST_TX_UNDERRUN_ERR	(1<<2)
+#define S3C64XX_SPI_ST_TX_UNDERRUN_ERR		(1<<2)
 #define S3C64XX_SPI_ST_RX_FIFORDY		(1<<1)
 #define S3C64XX_SPI_ST_TX_FIFORDY		(1<<0)
 
@@ -100,7 +100,7 @@
 #define S3C64XX_SPI_SWAP_TX_BIT			(1<<1)
 #define S3C64XX_SPI_SWAP_TX_EN			(1<<0)
 
-#define S3C64XX_SPI_FBCLK_MSK		(3<<0)
+#define S3C64XX_SPI_FBCLK_MSK			(3<<0)
 
 #define FIFO_LVL_MASK(i) ((i)->port_conf->fifo_lvl_mask[i->port_id])
 #define S3C64XX_SPI_ST_TX_DONE(v, i) (((v) & \
@@ -156,7 +156,6 @@
  * @ioclk: Pointer to the i/o clock between master and slave
  * @master: Pointer to the SPI Protocol master.
  * @cntrlr_info: Platform specific data for the controller this driver manages.
- * @tgl_spi: Pointer to the last CS left untoggled by the cs_change hint.
  * @lock: Controller specific lock.
  * @state: Set of FLAGS to indicate status.
  * @rx_dmach: Controller's DMA channel for Rx.
@@ -177,7 +176,6 @@
 	struct platform_device          *pdev;
 	struct spi_master               *master;
 	struct s3c64xx_spi_info  *cntrlr_info;
-	struct spi_device               *tgl_spi;
 	spinlock_t                      lock;
 	unsigned long                   sfr_start;
 	struct completion               xfer_completion;
@@ -190,7 +188,7 @@
 	unsigned int			port_id;
 };
 
-static void flush_fifo(struct s3c64xx_spi_driver_data *sdd)
+static void s3c64xx_flush_fifo(struct s3c64xx_spi_driver_data *sdd)
 {
 	void __iomem *regs = sdd->regs;
 	unsigned long loops;
@@ -350,9 +348,8 @@
 	return xfer->len > (FIFO_LVL_MASK(sdd) >> 1) + 1;
 }
 
-static void enable_datapath(struct s3c64xx_spi_driver_data *sdd,
-				struct spi_device *spi,
-				struct spi_transfer *xfer, int dma_mode)
+static void s3c64xx_enable_datapath(struct s3c64xx_spi_driver_data *sdd,
+				    struct spi_transfer *xfer, int dma_mode)
 {
 	void __iomem *regs = sdd->regs;
 	u32 modecfg, chcfg;
@@ -442,8 +439,8 @@
 	return RX_FIFO_LVL(status, sdd);
 }
 
-static int wait_for_dma(struct s3c64xx_spi_driver_data *sdd,
-			struct spi_transfer *xfer)
+static int s3c64xx_wait_for_dma(struct s3c64xx_spi_driver_data *sdd,
+				struct spi_transfer *xfer)
 {
 	void __iomem *regs = sdd->regs;
 	unsigned long val;
@@ -485,8 +482,8 @@
 	return 0;
 }
 
-static int wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
-			struct spi_transfer *xfer)
+static int s3c64xx_wait_for_pio(struct s3c64xx_spi_driver_data *sdd,
+				struct spi_transfer *xfer)
 {
 	void __iomem *regs = sdd->regs;
 	unsigned long val;
@@ -505,6 +502,8 @@
 		status = readl(regs + S3C64XX_SPI_STATUS);
 	} while (RX_FIFO_LVL(status, sdd) < xfer->len && --val);
 
+	if (!val)
+		return -EIO;
 
 	/* If it was only Tx */
 	if (!xfer->rx_buf) {
@@ -635,11 +634,15 @@
 				    struct spi_transfer *xfer)
 {
 	struct s3c64xx_spi_driver_data *sdd = spi_master_get_devdata(master);
+	const unsigned int fifo_len = (FIFO_LVL_MASK(sdd) >> 1) + 1;
+	const void *tx_buf = NULL;
+	void *rx_buf = NULL;
+	int target_len = 0, origin_len = 0;
+	int use_dma = 0;
 	int status;
 	u32 speed;
 	u8 bpw;
 	unsigned long flags;
-	int use_dma;
 
 	reinit_completion(&sdd->xfer_completion);
 
@@ -654,48 +657,77 @@
 		s3c64xx_spi_config(sdd);
 	}
 
-	/* Polling method for xfers not bigger than FIFO capacity */
-	use_dma = 0;
-	if (!is_polling(sdd) &&
-	    (sdd->rx_dma.ch && sdd->tx_dma.ch &&
-	     (xfer->len > ((FIFO_LVL_MASK(sdd) >> 1) + 1))))
+	if (!is_polling(sdd) && (xfer->len > fifo_len) &&
+	    sdd->rx_dma.ch && sdd->tx_dma.ch) {
 		use_dma = 1;
 
-	spin_lock_irqsave(&sdd->lock, flags);
+	} else if (is_polling(sdd) && xfer->len > fifo_len) {
+		tx_buf = xfer->tx_buf;
+		rx_buf = xfer->rx_buf;
+		origin_len = xfer->len;
 
-	/* Pending only which is to be done */
-	sdd->state &= ~RXBUSY;
-	sdd->state &= ~TXBUSY;
+		target_len = xfer->len;
+		if (xfer->len > fifo_len)
+			xfer->len = fifo_len;
+	}
 
-	enable_datapath(sdd, spi, xfer, use_dma);
+	do {
+		spin_lock_irqsave(&sdd->lock, flags);
 
-	/* Start the signals */
-	s3c64xx_spi_set_cs(spi, true);
+		/* Pending only which is to be done */
+		sdd->state &= ~RXBUSY;
+		sdd->state &= ~TXBUSY;
 
-	spin_unlock_irqrestore(&sdd->lock, flags);
+		s3c64xx_enable_datapath(sdd, xfer, use_dma);
 
-	if (use_dma)
-		status = wait_for_dma(sdd, xfer);
-	else
-		status = wait_for_pio(sdd, xfer);
+		/* Start the signals */
+		s3c64xx_spi_set_cs(spi, true);
 
-	if (status) {
-		dev_err(&spi->dev, "I/O Error: rx-%d tx-%d res:rx-%c tx-%c len-%d\n",
-			xfer->rx_buf ? 1 : 0, xfer->tx_buf ? 1 : 0,
-			(sdd->state & RXBUSY) ? 'f' : 'p',
-			(sdd->state & TXBUSY) ? 'f' : 'p',
-			xfer->len);
+		spin_unlock_irqrestore(&sdd->lock, flags);
 
-		if (use_dma) {
-			if (xfer->tx_buf != NULL
-			    && (sdd->state & TXBUSY))
-				dmaengine_terminate_all(sdd->tx_dma.ch);
-			if (xfer->rx_buf != NULL
-			    && (sdd->state & RXBUSY))
-				dmaengine_terminate_all(sdd->rx_dma.ch);
+		if (use_dma)
+			status = s3c64xx_wait_for_dma(sdd, xfer);
+		else
+			status = s3c64xx_wait_for_pio(sdd, xfer);
+
+		if (status) {
+			dev_err(&spi->dev,
+				"I/O Error: rx-%d tx-%d res:rx-%c tx-%c len-%d\n",
+				xfer->rx_buf ? 1 : 0, xfer->tx_buf ? 1 : 0,
+				(sdd->state & RXBUSY) ? 'f' : 'p',
+				(sdd->state & TXBUSY) ? 'f' : 'p',
+				xfer->len);
+
+			if (use_dma) {
+				if (xfer->tx_buf && (sdd->state & TXBUSY))
+					dmaengine_terminate_all(sdd->tx_dma.ch);
+				if (xfer->rx_buf && (sdd->state & RXBUSY))
+					dmaengine_terminate_all(sdd->rx_dma.ch);
+			}
+		} else {
+			s3c64xx_flush_fifo(sdd);
 		}
-	} else {
-		flush_fifo(sdd);
+		if (target_len > 0) {
+			target_len -= xfer->len;
+
+			if (xfer->tx_buf)
+				xfer->tx_buf += xfer->len;
+
+			if (xfer->rx_buf)
+				xfer->rx_buf += xfer->len;
+
+			if (target_len > fifo_len)
+				xfer->len = fifo_len;
+			else
+				xfer->len = target_len;
+		}
+	} while (target_len > 0);
+
+	if (origin_len) {
+		/* Restore original xfer buffers and length */
+		xfer->tx_buf = tx_buf;
+		xfer->rx_buf = rx_buf;
+		xfer->len = origin_len;
 	}
 
 	return status;
@@ -891,7 +923,7 @@
 	return IRQ_HANDLED;
 }
 
-static void s3c64xx_spi_hwinit(struct s3c64xx_spi_driver_data *sdd, int channel)
+static void s3c64xx_spi_hwinit(struct s3c64xx_spi_driver_data *sdd)
 {
 	struct s3c64xx_spi_info *sci = sdd->cntrlr_info;
 	void __iomem *regs = sdd->regs;
@@ -929,7 +961,7 @@
 	val |= (S3C64XX_SPI_TRAILCNT << S3C64XX_SPI_TRAILCNT_OFF);
 	writel(val, regs + S3C64XX_SPI_MODE_CFG);
 
-	flush_fifo(sdd);
+	s3c64xx_flush_fifo(sdd);
 }
 
 #ifdef CONFIG_OF
@@ -1145,7 +1177,7 @@
 	pm_runtime_get_sync(&pdev->dev);
 
 	/* Setup Deufult Mode */
-	s3c64xx_spi_hwinit(sdd, sdd->port_id);
+	s3c64xx_spi_hwinit(sdd);
 
 	spin_lock_init(&sdd->lock);
 	init_completion(&sdd->xfer_completion);
@@ -1260,8 +1292,6 @@
 	if (ret < 0)
 		return ret;
 
-	s3c64xx_spi_hwinit(sdd, sdd->port_id);
-
 	return spi_master_resume(master);
 }
 #endif /* CONFIG_PM_SLEEP */
@@ -1299,6 +1329,8 @@
 	if (ret != 0)
 		goto err_disable_src_clk;
 
+	s3c64xx_spi_hwinit(sdd);
+
 	return 0;
 
 err_disable_src_clk:
@@ -1344,15 +1376,6 @@
 	.clk_from_cmu	= true,
 };
 
-static struct s3c64xx_spi_port_config exynos5440_spi_port_config = {
-	.fifo_lvl_mask	= { 0x1ff },
-	.rx_lvl_offset	= 15,
-	.tx_st_done	= 25,
-	.high_speed	= true,
-	.clk_from_cmu	= true,
-	.quirks		= S3C64XX_SPI_QUIRK_POLL,
-};
-
 static struct s3c64xx_spi_port_config exynos7_spi_port_config = {
 	.fifo_lvl_mask	= { 0x1ff, 0x7F, 0x7F, 0x7F, 0x7F, 0x1ff},
 	.rx_lvl_offset	= 15,
@@ -1396,9 +1419,6 @@
 	{ .compatible = "samsung,exynos4210-spi",
 			.data = (void *)&exynos4_spi_port_config,
 	},
-	{ .compatible = "samsung,exynos5440-spi",
-			.data = (void *)&exynos5440_spi_port_config,
-	},
 	{ .compatible = "samsung,exynos7-spi",
 			.data = (void *)&exynos7_spi_port_config,
 	},
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 8171eed..0e74cbf 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -39,7 +39,7 @@
 	u16 tx_fifo_size;
 	u16 rx_fifo_size;
 	u16 master_flags;
-	u16 min_div;
+	u16 min_div_pow;
 };
 
 struct sh_msiof_spi_priv {
@@ -51,7 +51,7 @@
 	struct completion done;
 	unsigned int tx_fifo_size;
 	unsigned int rx_fifo_size;
-	unsigned int min_div;
+	unsigned int min_div_pow;
 	void *tx_dma_page;
 	void *rx_dma_page;
 	dma_addr_t tx_dma_addr;
@@ -249,43 +249,46 @@
 	return IRQ_HANDLED;
 }
 
-static struct {
-	unsigned short div;
-	unsigned short brdv;
-} const sh_msiof_spi_div_table[] = {
-	{ 1,	SCR_BRDV_DIV_1 },
-	{ 2,	SCR_BRDV_DIV_2 },
-	{ 4,	SCR_BRDV_DIV_4 },
-	{ 8,	SCR_BRDV_DIV_8 },
-	{ 16,	SCR_BRDV_DIV_16 },
-	{ 32,	SCR_BRDV_DIV_32 },
+static const u32 sh_msiof_spi_div_array[] = {
+	SCR_BRDV_DIV_1, SCR_BRDV_DIV_2,	 SCR_BRDV_DIV_4,
+	SCR_BRDV_DIV_8,	SCR_BRDV_DIV_16, SCR_BRDV_DIV_32,
 };
 
 static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p,
 				      unsigned long parent_rate, u32 spi_hz)
 {
-	unsigned long div = 1024;
+	unsigned long div;
 	u32 brps, scr;
-	size_t k;
+	unsigned int div_pow = p->min_div_pow;
 
-	if (!WARN_ON(!spi_hz || !parent_rate))
-		div = DIV_ROUND_UP(parent_rate, spi_hz);
-
-	div = max_t(unsigned long, div, p->min_div);
-
-	for (k = 0; k < ARRAY_SIZE(sh_msiof_spi_div_table); k++) {
-		brps = DIV_ROUND_UP(div, sh_msiof_spi_div_table[k].div);
-		/* SCR_BRDV_DIV_1 is valid only if BRPS is x 1/1 or x 1/2 */
-		if (sh_msiof_spi_div_table[k].div == 1 && brps > 2)
-			continue;
-		if (brps <= 32) /* max of brdv is 32 */
-			break;
+	if (!spi_hz || !parent_rate) {
+		WARN(1, "Invalid clock rate parameters %lu and %u\n",
+		     parent_rate, spi_hz);
+		return;
 	}
 
-	k = min_t(int, k, ARRAY_SIZE(sh_msiof_spi_div_table) - 1);
-	brps = min_t(int, brps, 32);
+	div = DIV_ROUND_UP(parent_rate, spi_hz);
+	if (div <= 1024) {
+		/* SCR_BRDV_DIV_1 is valid only if BRPS is x 1/1 or x 1/2 */
+		if (!div_pow && div <= 32 && div > 2)
+			div_pow = 1;
 
-	scr = sh_msiof_spi_div_table[k].brdv | SCR_BRPS(brps);
+		if (div_pow)
+			brps = (div + 1) >> div_pow;
+		else
+			brps = div;
+
+		for (; brps > 32; div_pow++)
+			brps = (brps + 1) >> 1;
+	} else {
+		/* Set transfer rate composite divisor to 2^5 * 32 = 1024 */
+		dev_err(&p->pdev->dev,
+			"Requested SPI transfer rate %d is too low\n", spi_hz);
+		div_pow = 5;
+		brps = 32;
+	}
+
+	scr = sh_msiof_spi_div_array[div_pow] | SCR_BRPS(brps);
 	sh_msiof_write(p, TSCR, scr);
 	if (!(p->master->flags & SPI_MASTER_MUST_TX))
 		sh_msiof_write(p, RSCR, scr);
@@ -564,14 +567,16 @@
 
 	/* Configure native chip select mode/polarity early */
 	clr = MDR1_SYNCMD_MASK;
-	set = MDR1_TRMD | TMDR1_PCON | MDR1_SYNCMD_SPI;
+	set = MDR1_SYNCMD_SPI;
 	if (spi->mode & SPI_CS_HIGH)
 		clr |= BIT(MDR1_SYNCAC_SHIFT);
 	else
 		set |= BIT(MDR1_SYNCAC_SHIFT);
 	pm_runtime_get_sync(&p->pdev->dev);
 	tmp = sh_msiof_read(p, TMDR1) & ~clr;
-	sh_msiof_write(p, TMDR1, tmp | set);
+	sh_msiof_write(p, TMDR1, tmp | set | MDR1_TRMD | TMDR1_PCON);
+	tmp = sh_msiof_read(p, RMDR1) & ~clr;
+	sh_msiof_write(p, RMDR1, tmp | set);
 	pm_runtime_put(&p->pdev->dev);
 	p->native_cs_high = spi->mode & SPI_CS_HIGH;
 	p->native_cs_inited = true;
@@ -1041,21 +1046,21 @@
 	.tx_fifo_size = 64,
 	.rx_fifo_size = 64,
 	.master_flags = 0,
-	.min_div = 1,
+	.min_div_pow = 0,
 };
 
 static const struct sh_msiof_chipdata rcar_gen2_data = {
 	.tx_fifo_size = 64,
 	.rx_fifo_size = 64,
 	.master_flags = SPI_MASTER_MUST_TX,
-	.min_div = 1,
+	.min_div_pow = 0,
 };
 
 static const struct sh_msiof_chipdata rcar_gen3_data = {
 	.tx_fifo_size = 64,
 	.rx_fifo_size = 64,
 	.master_flags = SPI_MASTER_MUST_TX,
-	.min_div = 2,
+	.min_div_pow = 1,
 };
 
 static const struct of_device_id sh_msiof_match[] = {
@@ -1319,7 +1324,7 @@
 	platform_set_drvdata(pdev, p);
 	p->master = master;
 	p->info = info;
-	p->min_div = chipdata->min_div;
+	p->min_div_pow = chipdata->min_div_pow;
 
 	init_completion(&p->done);
 
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index ba9743f..ad1e55d 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -1129,7 +1129,7 @@
 	if (!spi->clk_rate) {
 		dev_err(&pdev->dev, "clk rate = 0\n");
 		ret = -EINVAL;
-		goto err_master_put;
+		goto err_clk_disable;
 	}
 
 	spi->rst = devm_reset_control_get_exclusive(&pdev->dev, NULL);
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index c24d9b4..5f19016 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -36,6 +36,7 @@
 #include <linux/sizes.h>
 
 #include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
 
 struct ti_qspi_regs {
 	u32 clkctrl;
@@ -50,6 +51,7 @@
 	struct spi_master	*master;
 	void __iomem            *base;
 	void __iomem            *mmap_base;
+	size_t			mmap_size;
 	struct regmap		*ctrl_base;
 	unsigned int		ctrl_reg;
 	struct clk		*fclk;
@@ -434,12 +436,10 @@
 	return 0;
 }
 
-static int ti_qspi_dma_bounce_buffer(struct ti_qspi *qspi,
-				     struct spi_flash_read_message *msg)
+static int ti_qspi_dma_bounce_buffer(struct ti_qspi *qspi, loff_t offs,
+				     void *to, size_t readsize)
 {
-	size_t readsize = msg->len;
-	void *to = msg->buf;
-	dma_addr_t dma_src = qspi->mmap_phys_base + msg->from;
+	dma_addr_t dma_src = qspi->mmap_phys_base + offs;
 	int ret = 0;
 
 	/*
@@ -507,13 +507,14 @@
 	qspi->mmap_enabled = false;
 }
 
-static void ti_qspi_setup_mmap_read(struct spi_device *spi,
-				    struct spi_flash_read_message *msg)
+static void ti_qspi_setup_mmap_read(struct spi_device *spi, u8 opcode,
+				    u8 data_nbits, u8 addr_width,
+				    u8 dummy_bytes)
 {
 	struct ti_qspi  *qspi = spi_master_get_devdata(spi->master);
-	u32 memval = msg->read_opcode;
+	u32 memval = opcode;
 
-	switch (msg->data_nbits) {
+	switch (data_nbits) {
 	case SPI_NBITS_QUAD:
 		memval |= QSPI_SETUP_RD_QUAD;
 		break;
@@ -524,48 +525,64 @@
 		memval |= QSPI_SETUP_RD_NORMAL;
 		break;
 	}
-	memval |= ((msg->addr_width - 1) << QSPI_SETUP_ADDR_SHIFT |
-		   msg->dummy_bytes << QSPI_SETUP_DUMMY_SHIFT);
+	memval |= ((addr_width - 1) << QSPI_SETUP_ADDR_SHIFT |
+		   dummy_bytes << QSPI_SETUP_DUMMY_SHIFT);
 	ti_qspi_write(qspi, memval,
 		      QSPI_SPI_SETUP_REG(spi->chip_select));
 }
 
-static bool ti_qspi_spi_flash_can_dma(struct spi_device *spi,
-				      struct spi_flash_read_message *msg)
+static int ti_qspi_exec_mem_op(struct spi_mem *mem,
+			       const struct spi_mem_op *op)
 {
-	return virt_addr_valid(msg->buf);
-}
-
-static int ti_qspi_spi_flash_read(struct spi_device *spi,
-				  struct spi_flash_read_message *msg)
-{
-	struct ti_qspi *qspi = spi_master_get_devdata(spi->master);
+	struct ti_qspi *qspi = spi_master_get_devdata(mem->spi->master);
+	u32 from = 0;
 	int ret = 0;
 
+	/* Only optimize read path. */
+	if (!op->data.nbytes || op->data.dir != SPI_MEM_DATA_IN ||
+	    !op->addr.nbytes || op->addr.nbytes > 4)
+		return -ENOTSUPP;
+
+	/* Address exceeds MMIO window size, fall back to regular mode. */
+	from = op->addr.val;
+	if (from + op->data.nbytes > qspi->mmap_size)
+		return -ENOTSUPP;
+
 	mutex_lock(&qspi->list_lock);
 
 	if (!qspi->mmap_enabled)
-		ti_qspi_enable_memory_map(spi);
-	ti_qspi_setup_mmap_read(spi, msg);
+		ti_qspi_enable_memory_map(mem->spi);
+	ti_qspi_setup_mmap_read(mem->spi, op->cmd.opcode, op->data.buswidth,
+				op->addr.nbytes, op->dummy.nbytes);
 
 	if (qspi->rx_chan) {
-		if (msg->cur_msg_mapped)
-			ret = ti_qspi_dma_xfer_sg(qspi, msg->rx_sg, msg->from);
-		else
-			ret = ti_qspi_dma_bounce_buffer(qspi, msg);
-		if (ret)
-			goto err_unlock;
-	} else {
-		memcpy_fromio(msg->buf, qspi->mmap_base + msg->from, msg->len);
-	}
-	msg->retlen = msg->len;
+		struct sg_table sgt;
 
-err_unlock:
+		if (virt_addr_valid(op->data.buf.in) &&
+		    !spi_controller_dma_map_mem_op_data(mem->spi->master, op,
+							&sgt)) {
+			ret = ti_qspi_dma_xfer_sg(qspi, sgt, from);
+			spi_controller_dma_unmap_mem_op_data(mem->spi->master,
+							     op, &sgt);
+		} else {
+			ret = ti_qspi_dma_bounce_buffer(qspi, from,
+							op->data.buf.in,
+							op->data.nbytes);
+		}
+	} else {
+		memcpy_fromio(op->data.buf.in, qspi->mmap_base + from,
+			      op->data.nbytes);
+	}
+
 	mutex_unlock(&qspi->list_lock);
 
 	return ret;
 }
 
+static const struct spi_controller_mem_ops ti_qspi_mem_ops = {
+	.exec_op = ti_qspi_exec_mem_op,
+};
+
 static int ti_qspi_start_transfer_one(struct spi_master *master,
 		struct spi_message *m)
 {
@@ -672,7 +689,7 @@
 	master->dev.of_node = pdev->dev.of_node;
 	master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) |
 				     SPI_BPW_MASK(8);
-	master->spi_flash_read = ti_qspi_spi_flash_read;
+	master->mem_ops = &ti_qspi_mem_ops;
 
 	if (!of_property_read_u32(np, "num-cs", &num_cs))
 		master->num_chipselect = num_cs;
@@ -702,6 +719,9 @@
 		}
 	}
 
+	if (res_mmap)
+		qspi->mmap_size = resource_size(res_mmap);
+
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
 		dev_err(&pdev->dev, "no irq resource?\n");
@@ -770,7 +790,6 @@
 		dma_release_channel(qspi->rx_chan);
 		goto no_dma;
 	}
-	master->spi_flash_can_dma = ti_qspi_spi_flash_can_dma;
 	master->dma_rx = qspi->rx_chan;
 	init_completion(&qspi->transfer_complete);
 	if (res_mmap)
@@ -784,7 +803,7 @@
 				 "mmap failed with error %ld using PIO mode\n",
 				 PTR_ERR(qspi->mmap_base));
 			qspi->mmap_base = NULL;
-			master->spi_flash_read = NULL;
+			master->mem_ops = NULL;
 		}
 	}
 	qspi->mmap_enabled = false;
diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
index 18aeace..cc4d310 100644
--- a/drivers/spi/spi-zynqmp-gqspi.c
+++ b/drivers/spi/spi-zynqmp-gqspi.c
@@ -20,6 +20,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
@@ -135,6 +136,7 @@
 #define GQSPI_DMA_UNALIGN		0x3
 #define GQSPI_DEFAULT_NUM_CS	1	/* Default number of chip selects */
 
+#define SPI_AUTOSUSPEND_TIMEOUT		3000
 enum mode_type {GQSPI_MODE_IO, GQSPI_MODE_DMA};
 
 /**
@@ -356,21 +358,9 @@
 static int zynqmp_prepare_transfer_hardware(struct spi_master *master)
 {
 	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
-	int ret;
-
-	ret = clk_enable(xqspi->refclk);
-	if (ret)
-		return ret;
-
-	ret = clk_enable(xqspi->pclk);
-	if (ret)
-		goto clk_err;
 
 	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, GQSPI_EN_MASK);
 	return 0;
-clk_err:
-	clk_disable(xqspi->refclk);
-	return ret;
 }
 
 /**
@@ -387,8 +377,6 @@
 	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
 
 	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0);
-	clk_disable(xqspi->refclk);
-	clk_disable(xqspi->pclk);
 	return 0;
 }
 
@@ -918,8 +906,7 @@
  */
 static int __maybe_unused zynqmp_qspi_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct spi_master *master = platform_get_drvdata(pdev);
+	struct spi_master *master = dev_get_drvdata(dev);
 
 	spi_master_suspend(master);
 
@@ -939,8 +926,7 @@
  */
 static int __maybe_unused zynqmp_qspi_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct spi_master *master = platform_get_drvdata(pdev);
+	struct spi_master *master = dev_get_drvdata(dev);
 	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
 	int ret = 0;
 
@@ -959,11 +945,67 @@
 
 	spi_master_resume(master);
 
+	clk_disable(xqspi->refclk);
+	clk_disable(xqspi->pclk);
 	return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(zynqmp_qspi_dev_pm_ops, zynqmp_qspi_suspend,
-			 zynqmp_qspi_resume);
+/**
+ * zynqmp_runtime_suspend - Runtime suspend method for the SPI driver
+ * @dev:	Address of the platform_device structure
+ *
+ * This function disables the clocks
+ *
+ * Return:	Always 0
+ */
+static int __maybe_unused zynqmp_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
+
+	clk_disable(xqspi->refclk);
+	clk_disable(xqspi->pclk);
+
+	return 0;
+}
+
+/**
+ * zynqmp_runtime_resume - Runtime resume method for the SPI driver
+ * @dev:	Address of the platform_device structure
+ *
+ * This function enables the clocks
+ *
+ * Return:	0 on success and error value on error
+ */
+static int __maybe_unused zynqmp_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
+	int ret;
+
+	ret = clk_enable(xqspi->pclk);
+	if (ret) {
+		dev_err(dev, "Cannot enable APB clock.\n");
+		return ret;
+	}
+
+	ret = clk_enable(xqspi->refclk);
+	if (ret) {
+		dev_err(dev, "Cannot enable device clock.\n");
+		clk_disable(xqspi->pclk);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops zynqmp_qspi_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(zynqmp_runtime_suspend,
+			   zynqmp_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(zynqmp_qspi_suspend, zynqmp_qspi_resume)
+};
 
 /**
  * zynqmp_qspi_probe:	Probe method for the QSPI driver
@@ -1023,9 +1065,15 @@
 		goto clk_dis_pclk;
 	}
 
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
 	/* QSPI controller initializations */
 	zynqmp_qspi_init_hw(xqspi);
 
+	pm_runtime_mark_last_busy(&pdev->dev);
+	pm_runtime_put_autosuspend(&pdev->dev);
 	xqspi->irq = platform_get_irq(pdev, 0);
 	if (xqspi->irq <= 0) {
 		ret = -ENXIO;
@@ -1063,6 +1111,8 @@
 	return 0;
 
 clk_dis_all:
+	pm_runtime_set_suspended(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
 	clk_disable_unprepare(xqspi->refclk);
 clk_dis_pclk:
 	clk_disable_unprepare(xqspi->pclk);
@@ -1090,6 +1140,8 @@
 	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0);
 	clk_disable_unprepare(xqspi->refclk);
 	clk_disable_unprepare(xqspi->pclk);
+	pm_runtime_set_suspended(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
 
 	spi_unregister_master(master);
 
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 7b213fa..efc624f 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/mod_devicetable.h>
 #include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
 #include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_domain.h>
@@ -46,6 +47,8 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/spi.h>
 
+#include "internals.h"
+
 static DEFINE_IDR(spi_master_idr);
 
 static void spidev_release(struct device *dev)
@@ -356,11 +359,12 @@
 	}
 
 	ret = dev_pm_domain_attach(dev, true);
-	if (ret != -EPROBE_DEFER) {
-		ret = sdrv->probe(spi);
-		if (ret)
-			dev_pm_domain_detach(dev, true);
-	}
+	if (ret)
+		return ret;
+
+	ret = sdrv->probe(spi);
+	if (ret)
+		dev_pm_domain_detach(dev, true);
 
 	return ret;
 }
@@ -740,9 +744,9 @@
 }
 
 #ifdef CONFIG_HAS_DMA
-static int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
-		       struct sg_table *sgt, void *buf, size_t len,
-		       enum dma_data_direction dir)
+int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
+		struct sg_table *sgt, void *buf, size_t len,
+		enum dma_data_direction dir)
 {
 	const bool vmalloced_buf = is_vmalloc_addr(buf);
 	unsigned int max_seg_size = dma_get_max_seg_size(dev);
@@ -821,8 +825,8 @@
 	return 0;
 }
 
-static void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev,
-			  struct sg_table *sgt, enum dma_data_direction dir)
+void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev,
+		   struct sg_table *sgt, enum dma_data_direction dir)
 {
 	if (sgt->orig_nents) {
 		dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir);
@@ -907,19 +911,6 @@
 	return 0;
 }
 #else /* !CONFIG_HAS_DMA */
-static inline int spi_map_buf(struct spi_controller *ctlr, struct device *dev,
-			      struct sg_table *sgt, void *buf, size_t len,
-			      enum dma_data_direction dir)
-{
-	return -EINVAL;
-}
-
-static inline void spi_unmap_buf(struct spi_controller *ctlr,
-				 struct device *dev, struct sg_table *sgt,
-				 enum dma_data_direction dir)
-{
-}
-
 static inline int __spi_map_msg(struct spi_controller *ctlr,
 				struct spi_message *msg)
 {
@@ -1222,6 +1213,7 @@
 	if (!was_busy && ctlr->auto_runtime_pm) {
 		ret = pm_runtime_get_sync(ctlr->dev.parent);
 		if (ret < 0) {
+			pm_runtime_put_noidle(ctlr->dev.parent);
 			dev_err(&ctlr->dev, "Failed to power device: %d\n",
 				ret);
 			mutex_unlock(&ctlr->io_mutex);
@@ -1533,6 +1525,22 @@
 	return ret;
 }
 
+/**
+ * spi_flush_queue - Send all pending messages in the queue from the callers'
+ *		     context
+ * @ctlr: controller to process queue for
+ *
+ * This should be used when one wants to ensure all pending messages have been
+ * sent before doing something. Is used by the spi-mem code to make sure SPI
+ * memory operations do not preempt regular SPI transfers that have been queued
+ * before the spi-mem operation.
+ */
+void spi_flush_queue(struct spi_controller *ctlr)
+{
+	if (ctlr->transfer == spi_queued_transfer)
+		__spi_pump_messages(ctlr, false);
+}
+
 /*-------------------------------------------------------------------------*/
 
 #if defined(CONFIG_OF)
@@ -2063,6 +2071,26 @@
 }
 #endif
 
+static int spi_controller_check_ops(struct spi_controller *ctlr)
+{
+	/*
+	 * The controller may implement only the high-level SPI-memory like
+	 * operations if it does not support regular SPI transfers, and this is
+	 * valid use case.
+	 * If ->mem_ops is NULL, we request that at least one of the
+	 * ->transfer_xxx() method be implemented.
+	 */
+	if (ctlr->mem_ops) {
+		if (!ctlr->mem_ops->exec_op)
+			return -EINVAL;
+	} else if (!ctlr->transfer && !ctlr->transfer_one &&
+		   !ctlr->transfer_one_message) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  * spi_register_controller - register SPI master or slave controller
  * @ctlr: initialized master, originally from spi_alloc_master() or
@@ -2096,6 +2124,14 @@
 	if (!dev)
 		return -ENODEV;
 
+	/*
+	 * Make sure all necessary hooks are implemented before registering
+	 * the SPI controller.
+	 */
+	status = spi_controller_check_ops(ctlr);
+	if (status)
+		return status;
+
 	if (!spi_controller_is_slave(ctlr)) {
 		status = of_spi_register_master(ctlr);
 		if (status)
@@ -2161,10 +2197,14 @@
 			spi_controller_is_slave(ctlr) ? "slave" : "master",
 			dev_name(&ctlr->dev));
 
-	/* If we're using a queued driver, start the queue */
-	if (ctlr->transfer)
+	/*
+	 * If we're using a queued driver, start the queue. Note that we don't
+	 * need the queueing logic if the driver is only supporting high-level
+	 * memory operations.
+	 */
+	if (ctlr->transfer) {
 		dev_info(dev, "controller is unqueued, this is deprecated\n");
-	else {
+	} else if (ctlr->transfer_one || ctlr->transfer_one_message) {
 		status = spi_controller_initialize_queue(ctlr);
 		if (status) {
 			device_del(&ctlr->dev);
@@ -2894,6 +2934,13 @@
 {
 	struct spi_controller *ctlr = spi->controller;
 
+	/*
+	 * Some controllers do not support doing regular SPI transfers. Return
+	 * ENOTSUPP when this is the case.
+	 */
+	if (!ctlr->transfer)
+		return -ENOTSUPP;
+
 	message->spi = spi;
 
 	SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics, spi_async);
@@ -3010,63 +3057,6 @@
 }
 EXPORT_SYMBOL_GPL(spi_async_locked);
 
-
-int spi_flash_read(struct spi_device *spi,
-		   struct spi_flash_read_message *msg)
-
-{
-	struct spi_controller *master = spi->controller;
-	struct device *rx_dev = NULL;
-	int ret;
-
-	if ((msg->opcode_nbits == SPI_NBITS_DUAL ||
-	     msg->addr_nbits == SPI_NBITS_DUAL) &&
-	    !(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD)))
-		return -EINVAL;
-	if ((msg->opcode_nbits == SPI_NBITS_QUAD ||
-	     msg->addr_nbits == SPI_NBITS_QUAD) &&
-	    !(spi->mode & SPI_TX_QUAD))
-		return -EINVAL;
-	if (msg->data_nbits == SPI_NBITS_DUAL &&
-	    !(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD)))
-		return -EINVAL;
-	if (msg->data_nbits == SPI_NBITS_QUAD &&
-	    !(spi->mode &  SPI_RX_QUAD))
-		return -EINVAL;
-
-	if (master->auto_runtime_pm) {
-		ret = pm_runtime_get_sync(master->dev.parent);
-		if (ret < 0) {
-			dev_err(&master->dev, "Failed to power device: %d\n",
-				ret);
-			return ret;
-		}
-	}
-
-	mutex_lock(&master->bus_lock_mutex);
-	mutex_lock(&master->io_mutex);
-	if (master->dma_rx && master->spi_flash_can_dma(spi, msg)) {
-		rx_dev = master->dma_rx->device->dev;
-		ret = spi_map_buf(master, rx_dev, &msg->rx_sg,
-				  msg->buf, msg->len,
-				  DMA_FROM_DEVICE);
-		if (!ret)
-			msg->cur_msg_mapped = true;
-	}
-	ret = master->spi_flash_read(spi, msg);
-	if (msg->cur_msg_mapped)
-		spi_unmap_buf(master, rx_dev, &msg->rx_sg,
-			      DMA_FROM_DEVICE);
-	mutex_unlock(&master->io_mutex);
-	mutex_unlock(&master->bus_lock_mutex);
-
-	if (master->auto_runtime_pm)
-		pm_runtime_put(master->dev.parent);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(spi_flash_read);
-
 /*-------------------------------------------------------------------------*/
 
 /* Utility methods for SPI protocol drivers, layered on
diff --git a/drivers/staging/comedi/drivers/serial2002.c b/drivers/staging/comedi/drivers/serial2002.c
index b3f3b4a..5471b22 100644
--- a/drivers/staging/comedi/drivers/serial2002.c
+++ b/drivers/staging/comedi/drivers/serial2002.c
@@ -113,7 +113,7 @@
 		long elapsed;
 		__poll_t mask;
 
-		mask = f->f_op->poll(f, &table.pt);
+		mask = vfs_poll(f, &table.pt);
 		if (mask & (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN |
 			    EPOLLHUP | EPOLLERR)) {
 			break;
@@ -136,7 +136,7 @@
 
 	result = -1;
 	if (!IS_ERR(f)) {
-		if (f->f_op->poll) {
+		if (file_can_poll(f)) {
 			serial2002_tty_read_poll_wait(f, timeout);
 
 			if (kernel_read(f, &ch, 1, &pos) == 1)
diff --git a/drivers/staging/comedi/proc.c b/drivers/staging/comedi/proc.c
index 50d3893..8bc8e42 100644
--- a/drivers/staging/comedi/proc.c
+++ b/drivers/staging/comedi/proc.c
@@ -62,25 +62,9 @@
 	return 0;
 }
 
-/*
- * seq_file wrappers for procfile show routines.
- */
-static int comedi_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, comedi_read, NULL);
-}
-
-static const struct file_operations comedi_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= comedi_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 void __init comedi_proc_init(void)
 {
-	if (!proc_create("comedi", 0444, NULL, &comedi_proc_fops))
+	if (!proc_create_single("comedi", 0444, NULL, comedi_read))
 		pr_warn("comedi: unable to create proc entry\n");
 }
 
diff --git a/drivers/staging/fwserial/fwserial.c b/drivers/staging/fwserial/fwserial.c
index e8bfe55..fa0dd42 100644
--- a/drivers/staging/fwserial/fwserial.c
+++ b/drivers/staging/fwserial/fwserial.c
@@ -1506,11 +1506,6 @@
 	return 0;
 }
 
-static int fwtty_proc_open(struct inode *inode, struct file *fp)
-{
-	return single_open(fp, fwtty_proc_show, NULL);
-}
-
 static int fwtty_stats_open(struct inode *inode, struct file *fp)
 {
 	return single_open(fp, fwtty_debugfs_stats_show, inode->i_private);
@@ -1537,14 +1532,6 @@
 	.release =	single_release,
 };
 
-static const struct file_operations fwtty_proc_fops = {
-	.owner =        THIS_MODULE,
-	.open =         fwtty_proc_open,
-	.read =         seq_read,
-	.llseek =       seq_lseek,
-	.release =      single_release,
-};
-
 static const struct tty_port_operations fwtty_port_ops = {
 	.dtr_rts =		fwtty_port_dtr_rts,
 	.carrier_raised =	fwtty_port_carrier_raised,
@@ -1570,7 +1557,7 @@
 	.tiocmget =		fwtty_tiocmget,
 	.tiocmset =		fwtty_tiocmset,
 	.get_icount =		fwtty_get_icount,
-	.proc_fops =		&fwtty_proc_fops,
+	.proc_show =		fwtty_proc_show,
 };
 
 static const struct tty_operations fwloop_ops = {
diff --git a/drivers/staging/greybus/module.c b/drivers/staging/greybus/module.c
index b785382..894d02e 100644
--- a/drivers/staging/greybus/module.c
+++ b/drivers/staging/greybus/module.c
@@ -94,8 +94,8 @@
 	struct gb_module *module;
 	int i;
 
-	module = kzalloc(sizeof(*module) + num_interfaces * sizeof(intf),
-				GFP_KERNEL);
+	module = kzalloc(struct_size(module, interfaces, num_interfaces),
+			 GFP_KERNEL);
 	if (!module)
 		return NULL;
 
diff --git a/drivers/staging/ipx/af_ipx.c b/drivers/staging/ipx/af_ipx.c
index 5703dd1..208b5c1 100644
--- a/drivers/staging/ipx/af_ipx.c
+++ b/drivers/staging/ipx/af_ipx.c
@@ -1965,7 +1965,7 @@
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.getname	= ipx_getname,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.ioctl		= ipx_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ipx_compat_ioctl,
diff --git a/drivers/staging/ipx/ipx_proc.c b/drivers/staging/ipx/ipx_proc.c
index b9232e4..360f0ad 100644
--- a/drivers/staging/ipx/ipx_proc.c
+++ b/drivers/staging/ipx/ipx_proc.c
@@ -244,42 +244,6 @@
 	.show   = ipx_seq_socket_show,
 };
 
-static int ipx_seq_route_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ipx_seq_route_ops);
-}
-
-static int ipx_seq_interface_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ipx_seq_interface_ops);
-}
-
-static int ipx_seq_socket_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ipx_seq_socket_ops);
-}
-
-static const struct file_operations ipx_seq_interface_fops = {
-	.open           = ipx_seq_interface_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = seq_release,
-};
-
-static const struct file_operations ipx_seq_route_fops = {
-	.open           = ipx_seq_route_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = seq_release,
-};
-
-static const struct file_operations ipx_seq_socket_fops = {
-	.open           = ipx_seq_socket_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = seq_release,
-};
-
 static struct proc_dir_entry *ipx_proc_dir;
 
 int __init ipx_proc_init(void)
@@ -291,16 +255,17 @@
 
 	if (!ipx_proc_dir)
 		goto out;
-	p = proc_create("interface", S_IRUGO,
-			ipx_proc_dir, &ipx_seq_interface_fops);
+	p = proc_create_seq("interface", S_IRUGO, ipx_proc_dir,
+			&ipx_seq_interface_ops);
 	if (!p)
 		goto out_interface;
 
-	p = proc_create("route", S_IRUGO, ipx_proc_dir, &ipx_seq_route_fops);
+	p = proc_create_seq("route", S_IRUGO, ipx_proc_dir, &ipx_seq_route_ops);
 	if (!p)
 		goto out_route;
 
-	p = proc_create("socket", S_IRUGO, ipx_proc_dir, &ipx_seq_socket_fops);
+	p = proc_create_seq("socket", S_IRUGO, ipx_proc_dir,
+			&ipx_seq_socket_ops);
 	if (!p)
 		goto out_socket;
 
diff --git a/drivers/staging/ncpfs/dir.c b/drivers/staging/ncpfs/dir.c
index 0c57c5c..072bcb1 100644
--- a/drivers/staging/ncpfs/dir.c
+++ b/drivers/staging/ncpfs/dir.c
@@ -823,12 +823,11 @@
 	struct ncp_server *server = NCP_SERVER(dir);
 	struct inode *inode = NULL;
 	struct ncp_entry_info finfo;
-	int error, res, len;
+	int res, len;
 	__u8 __name[NCP_MAXPATHLEN + 1];
 
-	error = -EIO;
 	if (!ncp_conn_valid(server))
-		goto finished;
+		return ERR_PTR(-EIO);
 
 	ncp_vdbg("server lookup for %pd2\n", dentry);
 
@@ -847,31 +846,20 @@
 			res = ncp_obtain_info(server, dir, __name, &(finfo.i));
 	}
 	ncp_vdbg("looked for %pd2, res=%d\n", dentry, res);
-	/*
-	 * If we didn't find an entry, make a negative dentry.
-	 */
-	if (res)
-		goto add_entry;
-
-	/*
-	 * Create an inode for the entry.
-	 */
-	finfo.opened = 0;
-	finfo.ino = iunique(dir->i_sb, 2);
-	finfo.volume = finfo.i.volNumber;
-	error = -EACCES;
-	inode = ncp_iget(dir->i_sb, &finfo);
-
-	if (inode) {
-		ncp_new_dentry(dentry);
-add_entry:
-		d_add(dentry, inode);
-		error = 0;
+	if (!res) {
+		/*
+		 * Entry found; create an inode for it.
+		 */
+		finfo.opened = 0;
+		finfo.ino = iunique(dir->i_sb, 2);
+		finfo.volume = finfo.i.volNumber;
+		inode = ncp_iget(dir->i_sb, &finfo);
+		if (unlikely(!inode))
+			inode = ERR_PTR(-EACCES);
+		else
+			ncp_new_dentry(dentry);
 	}
-
-finished:
-	ncp_vdbg("result=%d\n", error);
-	return ERR_PTR(error);
+	return d_splice_alias(inode, dentry);
 }
 
 /*
diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c
index d607c59..7a0dbc0 100644
--- a/drivers/staging/rtl8192u/r8192U_core.c
+++ b/drivers/staging/rtl8192u/r8192U_core.c
@@ -646,64 +646,25 @@
 	rtl8192_proc = proc_mkdir(RTL819xU_MODULE_NAME, init_net.proc_net);
 }
 
-/*
- * seq_file wrappers for procfile show routines.
- */
-static int rtl8192_proc_open(struct inode *inode, struct file *file)
-{
-	struct net_device *dev = proc_get_parent_data(inode);
-	int (*show)(struct seq_file *, void *) = PDE_DATA(inode);
-
-	return single_open(file, show, dev);
-}
-
-static const struct file_operations rtl8192_proc_fops = {
-	.open		= rtl8192_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-/*
- * Table of proc files we need to create.
- */
-struct rtl8192_proc_file {
-	char name[12];
-	int (*show)(struct seq_file *, void *);
-};
-
-static const struct rtl8192_proc_file rtl8192_proc_files[] = {
-	{ "stats-rx",	&proc_get_stats_rx },
-	{ "stats-tx",	&proc_get_stats_tx },
-	{ "stats-ap",	&proc_get_stats_ap },
-	{ "registers",	&proc_get_registers },
-	{ "" }
-};
-
 static void rtl8192_proc_init_one(struct net_device *dev)
 {
-	const struct rtl8192_proc_file *f;
 	struct proc_dir_entry *dir;
 
-	if (rtl8192_proc) {
-		dir = proc_mkdir_data(dev->name, 0, rtl8192_proc, dev);
-		if (!dir) {
-			RT_TRACE(COMP_ERR,
-				 "Unable to initialize /proc/net/rtl8192/%s\n",
-				 dev->name);
-			return;
-		}
+	if (!rtl8192_proc)
+		return;
 
-		for (f = rtl8192_proc_files; f->name[0]; f++) {
-			if (!proc_create_data(f->name, S_IFREG | S_IRUGO, dir,
-					      &rtl8192_proc_fops, f->show)) {
-				RT_TRACE(COMP_ERR,
-					 "Unable to initialize /proc/net/rtl8192/%s/%s\n",
-					 dev->name, f->name);
-				return;
-			}
-		}
-	}
+	dir = proc_mkdir_data(dev->name, 0, rtl8192_proc, dev);
+	if (!dir)
+		return;
+
+	proc_create_single("stats-rx", S_IFREG | S_IRUGO, dir,
+			proc_get_stats_rx);
+	proc_create_single("stats-tx", S_IFREG | S_IRUGO, dir,
+			proc_get_stats_tx);
+	proc_create_single("stats-ap", S_IFREG | S_IRUGO, dir,
+			proc_get_stats_ap);
+	proc_create_single("registers", S_IFREG | S_IRUGO, dir,
+			proc_get_registers);
 }
 
 static void rtl8192_proc_remove_one(struct net_device *dev)
diff --git a/drivers/staging/typec/Kconfig b/drivers/staging/typec/Kconfig
index 5359f55..3aa981f 100644
--- a/drivers/staging/typec/Kconfig
+++ b/drivers/staging/typec/Kconfig
@@ -9,6 +9,14 @@
 	help
 	  Type-C Port Controller driver for TCPCI-compliant controller.
 
+config TYPEC_RT1711H
+	tristate "Richtek RT1711H Type-C chip driver"
+	select TYPEC_TCPCI
+	help
+	  Richtek RT1711H Type-C chip driver that works with
+	  Type-C Port Controller Manager to provide USB PD and USB
+	  Type-C functionalities.
+
 endif
 
 endmenu
diff --git a/drivers/staging/typec/Makefile b/drivers/staging/typec/Makefile
index 53d649a..7803d48 100644
--- a/drivers/staging/typec/Makefile
+++ b/drivers/staging/typec/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_TYPEC_TCPCI)	+= tcpci.o
+obj-$(CONFIG_TYPEC_RT1711H)	+= tcpci_rt1711h.o
diff --git a/drivers/staging/typec/tcpci.h b/drivers/staging/typec/tcpci.h
index 34c865f..303ebde 100644
--- a/drivers/staging/typec/tcpci.h
+++ b/drivers/staging/typec/tcpci.h
@@ -59,6 +59,7 @@
 #define TCPC_POWER_CTRL_VCONN_ENABLE	BIT(0)
 
 #define TCPC_CC_STATUS			0x1d
+#define TCPC_CC_STATUS_TOGGLING		BIT(5)
 #define TCPC_CC_STATUS_TERM		BIT(4)
 #define TCPC_CC_STATUS_CC2_SHIFT	2
 #define TCPC_CC_STATUS_CC2_MASK		0x3
diff --git a/drivers/staging/typec/tcpci_rt1711h.c b/drivers/staging/typec/tcpci_rt1711h.c
new file mode 100644
index 0000000..0173890
--- /dev/null
+++ b/drivers/staging/typec/tcpci_rt1711h.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018, Richtek Technology Corporation
+ *
+ * Richtek RT1711H Type-C Chip Driver
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/gpio/consumer.h>
+#include <linux/usb/tcpm.h>
+#include <linux/regmap.h>
+#include "tcpci.h"
+
+#define RT1711H_VID		0x29CF
+#define RT1711H_PID		0x1711
+
+#define RT1711H_RTCTRL8		0x9B
+
+/* Autoidle timeout = (tout * 2 + 1) * 6.4ms */
+#define RT1711H_RTCTRL8_SET(ck300, ship_off, auto_idle, tout) \
+			    (((ck300) << 7) | ((ship_off) << 5) | \
+			    ((auto_idle) << 3) | ((tout) & 0x07))
+
+#define RT1711H_RTCTRL11	0x9E
+
+/* I2C timeout = (tout + 1) * 12.5ms */
+#define RT1711H_RTCTRL11_SET(en, tout) \
+			     (((en) << 7) | ((tout) & 0x0F))
+
+#define RT1711H_RTCTRL13	0xA0
+#define RT1711H_RTCTRL14	0xA1
+#define RT1711H_RTCTRL15	0xA2
+#define RT1711H_RTCTRL16	0xA3
+
+struct rt1711h_chip {
+	struct tcpci_data data;
+	struct tcpci *tcpci;
+	struct device *dev;
+};
+
+static int rt1711h_read16(struct rt1711h_chip *chip, unsigned int reg, u16 *val)
+{
+	return regmap_raw_read(chip->data.regmap, reg, val, sizeof(u16));
+}
+
+static int rt1711h_write16(struct rt1711h_chip *chip, unsigned int reg, u16 val)
+{
+	return regmap_raw_write(chip->data.regmap, reg, &val, sizeof(u16));
+}
+
+static int rt1711h_read8(struct rt1711h_chip *chip, unsigned int reg, u8 *val)
+{
+	return regmap_raw_read(chip->data.regmap, reg, val, sizeof(u8));
+}
+
+static int rt1711h_write8(struct rt1711h_chip *chip, unsigned int reg, u8 val)
+{
+	return regmap_raw_write(chip->data.regmap, reg, &val, sizeof(u8));
+}
+
+static const struct regmap_config rt1711h_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = 0xFF, /* 0x80 .. 0xFF are vendor defined */
+};
+
+static struct rt1711h_chip *tdata_to_rt1711h(struct tcpci_data *tdata)
+{
+	return container_of(tdata, struct rt1711h_chip, data);
+}
+
+static int rt1711h_init(struct tcpci *tcpci, struct tcpci_data *tdata)
+{
+	int ret;
+	struct rt1711h_chip *chip = tdata_to_rt1711h(tdata);
+
+	/* CK 300K from 320K, shipping off, auto_idle enable, tout = 32ms */
+	ret = rt1711h_write8(chip, RT1711H_RTCTRL8,
+			     RT1711H_RTCTRL8_SET(0, 1, 1, 2));
+	if (ret < 0)
+		return ret;
+
+	/* I2C reset : (val + 1) * 12.5ms */
+	ret = rt1711h_write8(chip, RT1711H_RTCTRL11,
+			     RT1711H_RTCTRL11_SET(1, 0x0F));
+	if (ret < 0)
+		return ret;
+
+	/* tTCPCfilter : (26.7 * val) us */
+	ret = rt1711h_write8(chip, RT1711H_RTCTRL14, 0x0F);
+	if (ret < 0)
+		return ret;
+
+	/*  tDRP : (51.2 + 6.4 * val) ms */
+	ret = rt1711h_write8(chip, RT1711H_RTCTRL15, 0x04);
+	if (ret < 0)
+		return ret;
+
+	/* dcSRC.DRP : 33% */
+	return rt1711h_write16(chip, RT1711H_RTCTRL16, 330);
+}
+
+static int rt1711h_set_vconn(struct tcpci *tcpci, struct tcpci_data *tdata,
+			     bool enable)
+{
+	struct rt1711h_chip *chip = tdata_to_rt1711h(tdata);
+
+	return rt1711h_write8(chip, RT1711H_RTCTRL8,
+			      RT1711H_RTCTRL8_SET(0, 1, !enable, 2));
+}
+
+static int rt1711h_start_drp_toggling(struct tcpci *tcpci,
+				      struct tcpci_data *tdata,
+				      enum typec_cc_status cc)
+{
+	struct rt1711h_chip *chip = tdata_to_rt1711h(tdata);
+	int ret;
+	unsigned int reg = 0;
+
+	switch (cc) {
+	default:
+	case TYPEC_CC_RP_DEF:
+		reg |= (TCPC_ROLE_CTRL_RP_VAL_DEF <<
+			TCPC_ROLE_CTRL_RP_VAL_SHIFT);
+		break;
+	case TYPEC_CC_RP_1_5:
+		reg |= (TCPC_ROLE_CTRL_RP_VAL_1_5 <<
+			TCPC_ROLE_CTRL_RP_VAL_SHIFT);
+		break;
+	case TYPEC_CC_RP_3_0:
+		reg |= (TCPC_ROLE_CTRL_RP_VAL_3_0 <<
+			TCPC_ROLE_CTRL_RP_VAL_SHIFT);
+		break;
+	}
+
+	if (cc == TYPEC_CC_RD)
+		reg |= (TCPC_ROLE_CTRL_CC_RD << TCPC_ROLE_CTRL_CC1_SHIFT) |
+			   (TCPC_ROLE_CTRL_CC_RD << TCPC_ROLE_CTRL_CC2_SHIFT);
+	else
+		reg |= (TCPC_ROLE_CTRL_CC_RP << TCPC_ROLE_CTRL_CC1_SHIFT) |
+			   (TCPC_ROLE_CTRL_CC_RP << TCPC_ROLE_CTRL_CC2_SHIFT);
+
+	ret = rt1711h_write8(chip, TCPC_ROLE_CTRL, reg);
+	if (ret < 0)
+		return ret;
+	usleep_range(500, 1000);
+
+	return 0;
+}
+
+static irqreturn_t rt1711h_irq(int irq, void *dev_id)
+{
+	int ret;
+	u16 alert;
+	u8 status;
+	struct rt1711h_chip *chip = dev_id;
+
+	if (!chip->tcpci)
+		return IRQ_HANDLED;
+
+	ret = rt1711h_read16(chip, TCPC_ALERT, &alert);
+	if (ret < 0)
+		goto out;
+
+	if (alert & TCPC_ALERT_CC_STATUS) {
+		ret = rt1711h_read8(chip, TCPC_CC_STATUS, &status);
+		if (ret < 0)
+			goto out;
+		/* Clear cc change event triggered by starting toggling */
+		if (status & TCPC_CC_STATUS_TOGGLING)
+			rt1711h_write8(chip, TCPC_ALERT, TCPC_ALERT_CC_STATUS);
+	}
+
+out:
+	return tcpci_irq(chip->tcpci);
+}
+
+static int rt1711h_init_alert(struct rt1711h_chip *chip,
+			      struct i2c_client *client)
+{
+	int ret;
+
+	/* Disable chip interrupts before requesting irq */
+	ret = rt1711h_write16(chip, TCPC_ALERT_MASK, 0);
+	if (ret < 0)
+		return ret;
+
+	ret = devm_request_threaded_irq(chip->dev, client->irq, NULL,
+					rt1711h_irq,
+					IRQF_ONESHOT | IRQF_TRIGGER_LOW,
+					dev_name(chip->dev), chip);
+	if (ret < 0)
+		return ret;
+	enable_irq_wake(client->irq);
+	return 0;
+}
+
+static int rt1711h_sw_reset(struct rt1711h_chip *chip)
+{
+	int ret;
+
+	ret = rt1711h_write8(chip, RT1711H_RTCTRL13, 0x01);
+	if (ret < 0)
+		return ret;
+
+	usleep_range(1000, 2000);
+	return 0;
+}
+
+static int rt1711h_check_revision(struct i2c_client *i2c)
+{
+	int ret;
+
+	ret = i2c_smbus_read_word_data(i2c, TCPC_VENDOR_ID);
+	if (ret < 0)
+		return ret;
+	if (ret != RT1711H_VID) {
+		dev_err(&i2c->dev, "vid is not correct, 0x%04x\n", ret);
+		return -ENODEV;
+	}
+	ret = i2c_smbus_read_word_data(i2c, TCPC_PRODUCT_ID);
+	if (ret < 0)
+		return ret;
+	if (ret != RT1711H_PID) {
+		dev_err(&i2c->dev, "pid is not correct, 0x%04x\n", ret);
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static int rt1711h_probe(struct i2c_client *client,
+			 const struct i2c_device_id *i2c_id)
+{
+	int ret;
+	struct rt1711h_chip *chip;
+
+	ret = rt1711h_check_revision(client);
+	if (ret < 0) {
+		dev_err(&client->dev, "check vid/pid fail\n");
+		return ret;
+	}
+
+	chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->data.regmap = devm_regmap_init_i2c(client,
+						 &rt1711h_regmap_config);
+	if (IS_ERR(chip->data.regmap))
+		return PTR_ERR(chip->data.regmap);
+
+	chip->dev = &client->dev;
+	i2c_set_clientdata(client, chip);
+
+	ret = rt1711h_sw_reset(chip);
+	if (ret < 0)
+		return ret;
+
+	ret = rt1711h_init_alert(chip, client);
+	if (ret < 0)
+		return ret;
+
+	chip->data.init = rt1711h_init;
+	chip->data.set_vconn = rt1711h_set_vconn;
+	chip->data.start_drp_toggling = rt1711h_start_drp_toggling;
+	chip->tcpci = tcpci_register_port(chip->dev, &chip->data);
+	if (IS_ERR_OR_NULL(chip->tcpci))
+		return PTR_ERR(chip->tcpci);
+
+	return 0;
+}
+
+static int rt1711h_remove(struct i2c_client *client)
+{
+	struct rt1711h_chip *chip = i2c_get_clientdata(client);
+
+	tcpci_unregister_port(chip->tcpci);
+	return 0;
+}
+
+static const struct i2c_device_id rt1711h_id[] = {
+	{ "rt1711h", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, rt1711h_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id rt1711h_of_match[] = {
+	{ .compatible = "richtek,rt1711h", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, rt1711h_of_match);
+#endif
+
+static struct i2c_driver rt1711h_i2c_driver = {
+	.driver = {
+		.name = "rt1711h",
+		.of_match_table = of_match_ptr(rt1711h_of_match),
+	},
+	.probe = rt1711h_probe,
+	.remove = rt1711h_remove,
+	.id_table = rt1711h_id,
+};
+module_i2c_driver(rt1711h_i2c_driver);
+
+MODULE_AUTHOR("ShuFan Lee <shufan_lee@richtek.com>");
+MODULE_DESCRIPTION("RT1711H USB Type-C Port Controller Interface Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/vboxvideo/vbox_drv.c b/drivers/staging/vboxvideo/vbox_drv.c
index e18642e..f6d26be 100644
--- a/drivers/staging/vboxvideo/vbox_drv.c
+++ b/drivers/staging/vboxvideo/vbox_drv.c
@@ -242,7 +242,7 @@
 	.minor = DRIVER_MINOR,
 	.patchlevel = DRIVER_PATCHLEVEL,
 
-	.gem_free_object = vbox_gem_free_object,
+	.gem_free_object_unlocked = vbox_gem_free_object,
 	.dumb_create = vbox_dumb_create,
 	.dumb_map_offset = vbox_dumb_mmap_offset,
 	.dumb_destroy = drm_gem_dumb_destroy,
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 6042901..ce1321a 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -94,8 +94,8 @@
 		return -EINVAL;
 	}
 
-	ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-	if (!ib_dev->ibd_bio_set) {
+	ret = bioset_init(&ib_dev->ibd_bio_set, IBLOCK_BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+	if (ret) {
 		pr_err("IBLOCK: Unable to create bioset\n");
 		goto out;
 	}
@@ -141,7 +141,7 @@
 
 	bi = bdev_get_integrity(bd);
 	if (bi) {
-		struct bio_set *bs = ib_dev->ibd_bio_set;
+		struct bio_set *bs = &ib_dev->ibd_bio_set;
 
 		if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-IP") ||
 		    !strcmp(bi->profile->name, "T10-DIF-TYPE1-IP")) {
@@ -164,7 +164,7 @@
 				goto out_blkdev_put;
 			}
 			pr_debug("IBLOCK setup BIP bs->bio_integrity_pool: %p\n",
-				 bs->bio_integrity_pool);
+				 &bs->bio_integrity_pool);
 		}
 		dev->dev_attrib.hw_pi_prot_type = dev->dev_attrib.pi_prot_type;
 	}
@@ -174,8 +174,7 @@
 out_blkdev_put:
 	blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL);
 out_free_bioset:
-	bioset_free(ib_dev->ibd_bio_set);
-	ib_dev->ibd_bio_set = NULL;
+	bioset_exit(&ib_dev->ibd_bio_set);
 out:
 	return ret;
 }
@@ -199,8 +198,7 @@
 
 	if (ib_dev->ibd_bd != NULL)
 		blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL);
-	if (ib_dev->ibd_bio_set != NULL)
-		bioset_free(ib_dev->ibd_bio_set);
+	bioset_exit(&ib_dev->ibd_bio_set);
 }
 
 static unsigned long long iblock_emulate_read_cap_with_block_size(
@@ -332,7 +330,7 @@
 	if (sg_num > BIO_MAX_PAGES)
 		sg_num = BIO_MAX_PAGES;
 
-	bio = bio_alloc_bioset(GFP_NOIO, sg_num, ib_dev->ibd_bio_set);
+	bio = bio_alloc_bioset(GFP_NOIO, sg_num, &ib_dev->ibd_bio_set);
 	if (!bio) {
 		pr_err("Unable to allocate memory for bio\n");
 		return NULL;
diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h
index b4aeb25..9cc3843 100644
--- a/drivers/target/target_core_iblock.h
+++ b/drivers/target/target_core_iblock.h
@@ -22,7 +22,7 @@
 	struct se_device dev;
 	unsigned char ibd_udev_path[SE_UDEV_PATH_LEN];
 	u32	ibd_flags;
-	struct bio_set	*ibd_bio_set;
+	struct bio_set	ibd_bio_set;
 	struct block_device *ibd_bd;
 	bool ibd_readonly;
 } ____cacheline_aligned;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 6cb933e..668934e 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -986,8 +986,7 @@
 
 	req = blk_get_request(pdv->pdv_sd->request_queue,
 			cmd->data_direction == DMA_TO_DEVICE ?
-			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
-			GFP_KERNEL);
+			REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, 0);
 	if (IS_ERR(req)) {
 		pr_err("PSCSI: blk_get_request() failed\n");
 		ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c
index a4d6a0e..23ad4f9 100644
--- a/drivers/thermal/broadcom/bcm2835_thermal.c
+++ b/drivers/thermal/broadcom/bcm2835_thermal.c
@@ -213,8 +213,8 @@
 	rate = clk_get_rate(data->clk);
 	if ((rate < 1920000) || (rate > 5000000))
 		dev_warn(&pdev->dev,
-			 "Clock %pCn running at %pCr Hz is outside of the recommended range: 1.92 to 5MHz\n",
-			 data->clk, data->clk);
+			 "Clock %pCn running at %lu Hz is outside of the recommended range: 1.92 to 5MHz\n",
+			 data->clk, rate);
 
 	/* register of thermal sensor and get info from DT */
 	tz = thermal_zone_of_sensor_register(&pdev->dev, 0, data,
diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index 3f9fe6a..c2c3442 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -112,7 +112,6 @@
 	int ret, i;
 	struct device *dev;
 	struct device_node *np;
-	struct tsens_sensor *s;
 	struct tsens_device *tmdev;
 	const struct tsens_data *data;
 	const struct of_device_id *id;
@@ -135,8 +134,9 @@
 		return -EINVAL;
 	}
 
-	tmdev = devm_kzalloc(dev, sizeof(*tmdev) +
-			     data->num_sensors * sizeof(*s), GFP_KERNEL);
+	tmdev = devm_kzalloc(dev,
+			     struct_size(tmdev, sensor, data->num_sensors),
+			     GFP_KERNEL);
 	if (!tmdev)
 		return -ENOMEM;
 
diff --git a/drivers/tty/amiserial.c b/drivers/tty/amiserial.c
index 32d7ce4..34dead6 100644
--- a/drivers/tty/amiserial.c
+++ b/drivers/tty/amiserial.c
@@ -1566,19 +1566,6 @@
 	return 0;
 }
 
-static int rs_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, rs_proc_show, NULL);
-}
-
-static const struct file_operations rs_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= rs_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /*
  * ---------------------------------------------------------------------
  * rs_init() and friends
@@ -1620,7 +1607,7 @@
 	.tiocmget = rs_tiocmget,
 	.tiocmset = rs_tiocmset,
 	.get_icount = rs_get_icount,
-	.proc_fops = &rs_proc_fops,
+	.proc_show = rs_proc_show,
 };
 
 static int amiga_carrier_raised(struct tty_port *port)
diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index cf0bde3..6d3c580 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -3972,19 +3972,6 @@
 	return 0;
 }
 
-static int cyclades_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, cyclades_proc_show, NULL);
-}
-
-static const struct file_operations cyclades_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= cyclades_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /* The serial driver boot-time initialization code!
     Hardware I/O ports are mapped to character special devices on a
     first found, first allocated manner.  That is, this code searches
@@ -4024,7 +4011,7 @@
 	.tiocmget = cy_tiocmget,
 	.tiocmset = cy_tiocmset,
 	.get_icount = cy_get_icount,
-	.proc_fops = &cyclades_proc_fops,
+	.proc_show = cyclades_proc_show,
 };
 
 static int __init cy_init(void)
diff --git a/drivers/tty/ipwireless/network.c b/drivers/tty/ipwireless/network.c
index 695439c..cf20616 100644
--- a/drivers/tty/ipwireless/network.c
+++ b/drivers/tty/ipwireless/network.c
@@ -416,7 +416,7 @@
 struct ipw_network *ipwireless_network_create(struct ipw_hardware *hw)
 {
 	struct ipw_network *network =
-		kzalloc(sizeof(struct ipw_network), GFP_ATOMIC);
+		kzalloc(sizeof(struct ipw_network), GFP_KERNEL);
 
 	if (!network)
 		return NULL;
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 1dbe27c..86b7e20 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2675,7 +2675,7 @@
 	kref_put(&mux_net->ref, net_free);
 }
 
-static int gsm_mux_net_start_xmit(struct sk_buff *skb,
+static netdev_tx_t gsm_mux_net_start_xmit(struct sk_buff *skb,
 				      struct net_device *net)
 {
 	struct gsm_mux_net *mux_net = netdev_priv(net);
diff --git a/drivers/tty/nozomi.c b/drivers/tty/nozomi.c
index b57b350..fed820e 100644
--- a/drivers/tty/nozomi.c
+++ b/drivers/tty/nozomi.c
@@ -72,19 +72,19 @@
 
 #define TMP_BUF_MAX 256
 
-#define DUMP(buf__,len__) \
-  do {  \
-    char tbuf[TMP_BUF_MAX] = {0};\
-    if (len__ > 1) {\
-	snprintf(tbuf, len__ > TMP_BUF_MAX ? TMP_BUF_MAX : len__, "%s", buf__);\
-	if (tbuf[len__-2] == '\r') {\
-		tbuf[len__-2] = 'r';\
-	} \
-	DBG1("SENDING: '%s' (%d+n)", tbuf, len__);\
-    } else {\
-	DBG1("SENDING: '%s' (%d)", tbuf, len__);\
-    } \
-} while (0)
+#define DUMP(buf__, len__)						\
+	do {								\
+		char tbuf[TMP_BUF_MAX] = {0};				\
+		if (len__ > 1) {					\
+			u32 data_len = min_t(u32, len__, TMP_BUF_MAX);	\
+			strscpy(tbuf, buf__, data_len);			\
+			if (tbuf[data_len - 2] == '\r')			\
+				tbuf[data_len - 2] = 'r';		\
+			DBG1("SENDING: '%s' (%d+n)", tbuf, len__);	\
+		} else {						\
+			DBG1("SENDING: '%s' (%d)", tbuf, len__);	\
+		}							\
+	} while (0)
 
 /*    Defines */
 #define NOZOMI_NAME		"nozomi"
@@ -102,41 +102,41 @@
 #define RECEIVE_BUF_MAX		4
 
 
-#define R_IIR		0x0000	/* Interrupt Identity Register */
-#define R_FCR		0x0000	/* Flow Control Register */
-#define R_IER		0x0004	/* Interrupt Enable Register */
+#define R_IIR			0x0000	/* Interrupt Identity Register */
+#define R_FCR			0x0000	/* Flow Control Register */
+#define R_IER			0x0004	/* Interrupt Enable Register */
 
 #define NOZOMI_CONFIG_MAGIC	0xEFEFFEFE
 #define TOGGLE_VALID		0x0000
 
 /* Definition of interrupt tokens */
-#define MDM_DL1		0x0001
-#define MDM_UL1		0x0002
-#define MDM_DL2		0x0004
-#define MDM_UL2		0x0008
-#define DIAG_DL1	0x0010
-#define DIAG_DL2	0x0020
-#define DIAG_UL		0x0040
-#define APP1_DL		0x0080
-#define APP1_UL		0x0100
-#define APP2_DL		0x0200
-#define APP2_UL		0x0400
-#define CTRL_DL		0x0800
-#define CTRL_UL		0x1000
-#define RESET		0x8000
+#define MDM_DL1			0x0001
+#define MDM_UL1			0x0002
+#define MDM_DL2			0x0004
+#define MDM_UL2			0x0008
+#define DIAG_DL1		0x0010
+#define DIAG_DL2		0x0020
+#define DIAG_UL			0x0040
+#define APP1_DL			0x0080
+#define APP1_UL			0x0100
+#define APP2_DL			0x0200
+#define APP2_UL			0x0400
+#define CTRL_DL			0x0800
+#define CTRL_UL			0x1000
+#define RESET			0x8000
 
-#define MDM_DL		(MDM_DL1  | MDM_DL2)
-#define MDM_UL		(MDM_UL1  | MDM_UL2)
-#define DIAG_DL		(DIAG_DL1 | DIAG_DL2)
+#define MDM_DL			(MDM_DL1  | MDM_DL2)
+#define MDM_UL			(MDM_UL1  | MDM_UL2)
+#define DIAG_DL			(DIAG_DL1 | DIAG_DL2)
 
 /* modem signal definition */
-#define CTRL_DSR	0x0001
-#define CTRL_DCD	0x0002
-#define CTRL_RI		0x0004
-#define CTRL_CTS	0x0008
+#define CTRL_DSR		0x0001
+#define CTRL_DCD		0x0002
+#define CTRL_RI			0x0004
+#define CTRL_CTS		0x0008
 
-#define CTRL_DTR	0x0001
-#define CTRL_RTS	0x0002
+#define CTRL_DTR		0x0001
+#define CTRL_RTS		0x0002
 
 #define MAX_PORT		4
 #define NOZOMI_MAX_PORTS	5
@@ -155,7 +155,7 @@
 
 /* Initialization states a card can be in */
 enum card_state {
-	NOZOMI_STATE_UKNOWN	= 0,
+	NOZOMI_STATE_UNKNOWN	= 0,
 	NOZOMI_STATE_ENABLED	= 1,	/* pci device enabled */
 	NOZOMI_STATE_ALLOCATED	= 2,	/* config setup done */
 	NOZOMI_STATE_READY	= 3,	/* flowcontrols received */
@@ -365,7 +365,7 @@
 	u8 *data;
 } __attribute__ ((packed));
 
-/*    Global variables */
+/* Global variables */
 static const struct pci_device_id nozomi_pci_tbl[] = {
 	{PCI_DEVICE(0x1931, 0x000c)},	/* Nozomi HSDPA */
 	{},
@@ -1686,12 +1686,12 @@
 
 	/* Note: these could change under us but it is not clear this
 	   matters if so */
-	return	(ctrl_ul->RTS ? TIOCM_RTS : 0) |
-		(ctrl_ul->DTR ? TIOCM_DTR : 0) |
-		(ctrl_dl->DCD ? TIOCM_CAR : 0) |
-		(ctrl_dl->RI  ? TIOCM_RNG : 0) |
-		(ctrl_dl->DSR ? TIOCM_DSR : 0) |
-		(ctrl_dl->CTS ? TIOCM_CTS : 0);
+	return (ctrl_ul->RTS ? TIOCM_RTS : 0)
+		| (ctrl_ul->DTR ? TIOCM_DTR : 0)
+		| (ctrl_dl->DCD ? TIOCM_CAR : 0)
+		| (ctrl_dl->RI  ? TIOCM_RNG : 0)
+		| (ctrl_dl->DSR ? TIOCM_DSR : 0)
+		| (ctrl_dl->CTS ? TIOCM_CTS : 0);
 }
 
 /* Sets io controls parameters */
@@ -1722,10 +1722,10 @@
 	const struct async_icount cnow = port->tty_icount;
 	int ret;
 
-	ret =	((flags & TIOCM_RNG) && (cnow.rng != cprev->rng)) ||
-		((flags & TIOCM_DSR) && (cnow.dsr != cprev->dsr)) ||
-		((flags & TIOCM_CD)  && (cnow.dcd != cprev->dcd)) ||
-		((flags & TIOCM_CTS) && (cnow.cts != cprev->cts));
+	ret = ((flags & TIOCM_RNG) && (cnow.rng != cprev->rng))
+		|| ((flags & TIOCM_DSR) && (cnow.dsr != cprev->dsr))
+		|| ((flags & TIOCM_CD)  && (cnow.dcd != cprev->dcd))
+		|| ((flags & TIOCM_CTS) && (cnow.cts != cprev->cts));
 
 	*cprev = cnow;
 
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 6c7151e..b0e2c48 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -110,16 +110,19 @@
 static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c)
 {
 	struct tty_struct *to = tty->link;
+	unsigned long flags;
 
 	if (tty->stopped)
 		return 0;
 
 	if (c > 0) {
+		spin_lock_irqsave(&to->port->lock, flags);
 		/* Stuff the data into the input queue of the other end */
 		c = tty_insert_flip_string(to->port, buf, c);
 		/* And shovel */
 		if (c)
 			tty_flip_buffer_push(to->port);
+		spin_unlock_irqrestore(&to->port->lock, flags);
 	}
 	return c;
 }
diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c
index 74a408d..435bec4 100644
--- a/drivers/tty/serial/8250/8250_aspeed_vuart.c
+++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c
@@ -10,6 +10,8 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
 #include <linux/clk.h>
 
 #include "8250.h"
@@ -28,9 +30,18 @@
 	void __iomem		*regs;
 	struct clk		*clk;
 	int			line;
+	struct timer_list	unthrottle_timer;
+	struct uart_8250_port	*port;
 };
 
 /*
+ * If we fill the tty flip buffers, we throttle the data ready interrupt
+ * to prevent dropped characters. This timeout defines how long we wait
+ * to (conditionally, depending on buffer state) unthrottle.
+ */
+static const int unthrottle_timeout = HZ/10;
+
+/*
  * The VUART is basically two UART 'front ends' connected by their FIFO
  * (no actual serial line in between). One is on the BMC side (management
  * controller) and one is on the host CPU side.
@@ -179,6 +190,114 @@
 	serial8250_do_shutdown(uart_port);
 }
 
+static void __aspeed_vuart_set_throttle(struct uart_8250_port *up,
+		bool throttle)
+{
+	unsigned char irqs = UART_IER_RLSI | UART_IER_RDI;
+
+	up->ier &= ~irqs;
+	if (!throttle)
+		up->ier |= irqs;
+	serial_out(up, UART_IER, up->ier);
+}
+static void aspeed_vuart_set_throttle(struct uart_port *port, bool throttle)
+{
+	struct uart_8250_port *up = up_to_u8250p(port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	__aspeed_vuart_set_throttle(up, throttle);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void aspeed_vuart_throttle(struct uart_port *port)
+{
+	aspeed_vuart_set_throttle(port, true);
+}
+
+static void aspeed_vuart_unthrottle(struct uart_port *port)
+{
+	aspeed_vuart_set_throttle(port, false);
+}
+
+static void aspeed_vuart_unthrottle_exp(struct timer_list *timer)
+{
+	struct aspeed_vuart *vuart = from_timer(vuart, timer, unthrottle_timer);
+	struct uart_8250_port *up = vuart->port;
+
+	if (!tty_buffer_space_avail(&up->port.state->port)) {
+		mod_timer(&vuart->unthrottle_timer,
+			  jiffies + unthrottle_timeout);
+		return;
+	}
+
+	aspeed_vuart_unthrottle(&up->port);
+}
+
+/*
+ * Custom interrupt handler to manage finer-grained flow control. Although we
+ * have throttle/unthrottle callbacks, we've seen that the VUART device can
+ * deliver characters faster than the ldisc has a chance to check buffer space
+ * against the throttle threshold. This results in dropped characters before
+ * the throttle.
+ *
+ * We do this by checking for flip buffer space before RX. If we have no space,
+ * throttle now and schedule an unthrottle for later, once the ldisc has had
+ * a chance to drain the buffers.
+ */
+static int aspeed_vuart_handle_irq(struct uart_port *port)
+{
+	struct uart_8250_port *up = up_to_u8250p(port);
+	unsigned int iir, lsr;
+	unsigned long flags;
+	int space, count;
+
+	iir = serial_port_in(port, UART_IIR);
+
+	if (iir & UART_IIR_NO_INT)
+		return 0;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	lsr = serial_port_in(port, UART_LSR);
+
+	if (lsr & (UART_LSR_DR | UART_LSR_BI)) {
+		space = tty_buffer_space_avail(&port->state->port);
+
+		if (!space) {
+			/* throttle and schedule an unthrottle later */
+			struct aspeed_vuart *vuart = port->private_data;
+			__aspeed_vuart_set_throttle(up, true);
+
+			if (!timer_pending(&vuart->unthrottle_timer)) {
+				vuart->port = up;
+				mod_timer(&vuart->unthrottle_timer,
+					  jiffies + unthrottle_timeout);
+			}
+
+		} else {
+			count = min(space, 256);
+
+			do {
+				serial8250_read_char(up, lsr);
+				lsr = serial_in(up, UART_LSR);
+				if (--count == 0)
+					break;
+			} while (lsr & (UART_LSR_DR | UART_LSR_BI));
+
+			tty_flip_buffer_push(&port->state->port);
+		}
+	}
+
+	serial8250_modem_status(up);
+	if (lsr & UART_LSR_THRE)
+		serial8250_tx_chars(up);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return 1;
+}
+
 static int aspeed_vuart_probe(struct platform_device *pdev)
 {
 	struct uart_8250_port port;
@@ -195,6 +314,7 @@
 		return -ENOMEM;
 
 	vuart->dev = &pdev->dev;
+	timer_setup(&vuart->unthrottle_timer, aspeed_vuart_unthrottle_exp, 0);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	vuart->regs = devm_ioremap_resource(&pdev->dev, res);
@@ -208,6 +328,9 @@
 	port.port.mapsize = resource_size(res);
 	port.port.startup = aspeed_vuart_startup;
 	port.port.shutdown = aspeed_vuart_shutdown;
+	port.port.throttle = aspeed_vuart_throttle;
+	port.port.unthrottle = aspeed_vuart_unthrottle;
+	port.port.status = UPSTAT_SYNC_FIFO;
 	port.port.dev = &pdev->dev;
 
 	rc = sysfs_create_group(&vuart->dev->kobj, &aspeed_vuart_attr_group);
@@ -253,6 +376,7 @@
 
 	port.port.irq = irq_of_parse_and_map(np, 0);
 	port.port.irqflags = IRQF_SHARED;
+	port.port.handle_irq = aspeed_vuart_handle_irq;
 	port.port.iotype = UPIO_MEM;
 	port.port.type = PORT_16550A;
 	port.port.uartclk = clk;
@@ -292,6 +416,7 @@
 {
 	struct aspeed_vuart *vuart = platform_get_drvdata(pdev);
 
+	del_timer_sync(&vuart->unthrottle_timer);
 	aspeed_vuart_set_enabled(vuart, false);
 	serial8250_unregister_port(vuart->line);
 	sysfs_remove_group(&vuart->dev->kobj, &aspeed_vuart_attr_group);
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 6fcdb90..aff04f1 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -121,25 +121,44 @@
 }
 
 /* Returns once the transmitter is empty or we run out of retries */
-static void dw8250_tx_wait_empty(struct uart_port *p, int tries)
+static void dw8250_tx_wait_empty(struct uart_port *p)
 {
+	unsigned int tries = 20000;
+	unsigned int delay_threshold = tries - 1000;
 	unsigned int lsr;
 
 	while (tries--) {
 		lsr = readb (p->membase + (UART_LSR << p->regshift));
 		if (lsr & UART_LSR_TEMT)
 			break;
-		udelay (10);
+
+		/* The device is first given a chance to empty without delay,
+		 * to avoid slowdowns at high bitrates. If after 1000 tries
+		 * the buffer has still not emptied, allow more time for low-
+		 * speed links. */
+		if (tries < delay_threshold)
+			udelay (1);
 	}
 }
 
-static void dw8250_serial_out(struct uart_port *p, int offset, int value)
+static void dw8250_serial_out38x(struct uart_port *p, int offset, int value)
 {
 	struct dw8250_data *d = p->private_data;
 
 	/* Allow the TX to drain before we reconfigure */
 	if (offset == UART_LCR)
-		dw8250_tx_wait_empty(p, 1000);
+		dw8250_tx_wait_empty(p);
+
+	writeb(value, p->membase + (offset << p->regshift));
+
+	if (offset == UART_LCR && !d->uart_16550_compatible)
+		dw8250_check_lcr(p, value);
+}
+
+
+static void dw8250_serial_out(struct uart_port *p, int offset, int value)
+{
+	struct dw8250_data *d = p->private_data;
 
 	writeb(value, p->membase + (offset << p->regshift));
 
@@ -357,6 +376,9 @@
 			p->serial_in = dw8250_serial_in32be;
 			p->serial_out = dw8250_serial_out32be;
 		}
+		if (of_device_is_compatible(np, "marvell,armada-38x-uart"))
+			p->serial_out = dw8250_serial_out38x;
+
 	} else if (acpi_dev_present("APMC0D08", NULL, -1)) {
 		p->iotype = UPIO_MEM32;
 		p->regshift = 2;
@@ -554,6 +576,10 @@
 	if (!data->skip_autocfg)
 		dw8250_setup_port(p);
 
+#ifdef CONFIG_PM
+	uart.capabilities |= UART_CAP_RPM;
+#endif
+
 	/* If we have a valid fifosize, try hooking up DMA */
 	if (p->fifosize) {
 		data->dma.rxconf.src_maxburst = p->fifosize / 4;
@@ -666,6 +692,7 @@
 static const struct of_device_id dw8250_of_match[] = {
 	{ .compatible = "snps,dw-apb-uart" },
 	{ .compatible = "cavium,octeon-3860-uart" },
+	{ .compatible = "marvell,armada-38x-uart" },
 	{ /* Sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, dw8250_of_match);
diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c
index ae6a256..5cd8c36 100644
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -122,7 +122,7 @@
 	serial8250_early_out(port, UART_FCR, 0);	/* no fifo */
 	serial8250_early_out(port, UART_MCR, 0x3);	/* DTR + RTS */
 
-	if (port->uartclk && device->baud) {
+	if (port->uartclk) {
 		divisor = DIV_ROUND_CLOSEST(port->uartclk, 16 * device->baud);
 		c = serial8250_early_in(port, UART_LCR);
 		serial8250_early_out(port, UART_LCR, c | UART_LCR_DLAB);
diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c
index 9835b1c..bfb37f0 100644
--- a/drivers/tty/serial/8250/8250_of.c
+++ b/drivers/tty/serial/8250/8250_of.c
@@ -92,13 +92,43 @@
 		goto err_unprepare;
 	}
 
+	port->flags = UPF_SHARE_IRQ | UPF_BOOT_AUTOCONF | UPF_FIXED_PORT |
+				  UPF_FIXED_TYPE;
 	spin_lock_init(&port->lock);
-	port->mapbase = resource.start;
-	port->mapsize = resource_size(&resource);
 
-	/* Check for shifted address mapping */
-	if (of_property_read_u32(np, "reg-offset", &prop) == 0)
-		port->mapbase += prop;
+	if (resource_type(&resource) == IORESOURCE_IO) {
+		port->iotype = UPIO_PORT;
+		port->iobase = resource.start;
+	} else {
+		port->mapbase = resource.start;
+		port->mapsize = resource_size(&resource);
+
+		/* Check for shifted address mapping */
+		if (of_property_read_u32(np, "reg-offset", &prop) == 0)
+			port->mapbase += prop;
+
+		port->iotype = UPIO_MEM;
+		if (of_property_read_u32(np, "reg-io-width", &prop) == 0) {
+			switch (prop) {
+			case 1:
+				port->iotype = UPIO_MEM;
+				break;
+			case 2:
+				port->iotype = UPIO_MEM16;
+				break;
+			case 4:
+				port->iotype = of_device_is_big_endian(np) ?
+					       UPIO_MEM32BE : UPIO_MEM32;
+				break;
+			default:
+				dev_warn(&ofdev->dev, "unsupported reg-io-width (%d)\n",
+					 prop);
+				ret = -EINVAL;
+				goto err_dispose;
+			}
+		}
+		port->flags |= UPF_IOREMAP;
+	}
 
 	/* Check for registers offset within the devices address range */
 	if (of_property_read_u32(np, "reg-shift", &prop) == 0)
@@ -114,26 +144,6 @@
 		port->line = ret;
 
 	port->irq = irq_of_parse_and_map(np, 0);
-	port->iotype = UPIO_MEM;
-	if (of_property_read_u32(np, "reg-io-width", &prop) == 0) {
-		switch (prop) {
-		case 1:
-			port->iotype = UPIO_MEM;
-			break;
-		case 2:
-			port->iotype = UPIO_MEM16;
-			break;
-		case 4:
-			port->iotype = of_device_is_big_endian(np) ?
-				       UPIO_MEM32BE : UPIO_MEM32;
-			break;
-		default:
-			dev_warn(&ofdev->dev, "unsupported reg-io-width (%d)\n",
-				 prop);
-			ret = -EINVAL;
-			goto err_dispose;
-		}
-	}
 
 	info->rst = devm_reset_control_get_optional_shared(&ofdev->dev, NULL);
 	if (IS_ERR(info->rst)) {
@@ -147,8 +157,7 @@
 
 	port->type = type;
 	port->uartclk = clk;
-	port->flags = UPF_SHARE_IRQ | UPF_BOOT_AUTOCONF | UPF_IOREMAP
-		| UPF_FIXED_PORT | UPF_FIXED_TYPE;
+	port->irqflags |= IRQF_SHARED;
 
 	if (of_property_read_bool(np, "no-loopback-test"))
 		port->flags |= UPF_SKIP_TEST;
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 624b501..1b337fe 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -1110,13 +1110,14 @@
 	return 0;
 }
 
+static const u8 omap4_habit = UART_ERRATA_CLOCK_DISABLE;
 static const u8 am3352_habit = OMAP_DMA_TX_KICK | UART_ERRATA_CLOCK_DISABLE;
 static const u8 dra742_habit = UART_ERRATA_CLOCK_DISABLE;
 
 static const struct of_device_id omap8250_dt_ids[] = {
 	{ .compatible = "ti,omap2-uart" },
 	{ .compatible = "ti,omap3-uart" },
-	{ .compatible = "ti,omap4-uart" },
+	{ .compatible = "ti,omap4-uart", .data = &omap4_habit, },
 	{ .compatible = "ti,am3352-uart", .data = &am3352_habit, },
 	{ .compatible = "ti,am4372-uart", .data = &am3352_habit, },
 	{ .compatible = "ti,dra742-uart", .data = &dra742_habit, },
@@ -1310,8 +1311,17 @@
 static int omap8250_suspend(struct device *dev)
 {
 	struct omap8250_priv *priv = dev_get_drvdata(dev);
+	struct uart_8250_port *up = serial8250_get_port(priv->line);
 
 	serial8250_suspend_port(priv->line);
+
+	pm_runtime_get_sync(dev);
+	if (!device_may_wakeup(dev))
+		priv->wer = 0;
+	serial_out(up, UART_OMAP_WER, priv->wer);
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
+
 	flush_work(&priv->qos_work);
 	return 0;
 }
@@ -1353,6 +1363,19 @@
 	int sysc;
 	int syss;
 
+	/*
+	 * At least on omap4, unused uarts may not idle after reset without
+	 * a basic scr dma configuration even with no dma in use. The
+	 * module clkctrl status bits will be 1 instead of 3 blocking idle
+	 * for the whole clockdomain. The softreset below will clear scr,
+	 * and we restore it on resume so this is safe to do on all SoCs
+	 * needing omap8250_soft_reset() quirk. Do it in two writes as
+	 * recommended in the comment for omap8250_update_scr().
+	 */
+	serial_out(up, UART_OMAP_SCR, OMAP_UART_SCR_DMAMODE_1);
+	serial_out(up, UART_OMAP_SCR,
+		   OMAP_UART_SCR_DMAMODE_1 | OMAP_UART_SCR_DMAMODE_CTL);
+
 	sysc = serial_in(up, UART_OMAP_SYSC);
 
 	/* softreset the UART */
@@ -1403,6 +1426,8 @@
 
 		/* Restore to UART mode after reset (for wakeup) */
 		omap8250_update_mdr1(up, priv);
+		/* Restore wakeup enable register */
+		serial_out(up, UART_OMAP_WER, priv->wer);
 	}
 
 	if (up->dma && up->dma->rxchan)
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 95833cb..cf541aa 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -243,6 +243,7 @@
 		.fifo_size	= 32,
 		.tx_loadsz	= 32,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.rxtrig_bytes	= {1, 8, 16, 30},
 		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
 	},
 	[PORT_ALTR_16550_F64] = {
@@ -250,6 +251,7 @@
 		.fifo_size	= 64,
 		.tx_loadsz	= 64,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.rxtrig_bytes	= {1, 16, 32, 62},
 		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
 	},
 	[PORT_ALTR_16550_F128] = {
@@ -257,6 +259,7 @@
 		.fifo_size	= 128,
 		.tx_loadsz	= 128,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.rxtrig_bytes	= {1, 32, 64, 126},
 		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
 	},
 	/*
@@ -1680,7 +1683,7 @@
 	serial8250_rpm_put(up);
 }
 
-static void serial8250_read_char(struct uart_8250_port *up, unsigned char lsr)
+void serial8250_read_char(struct uart_8250_port *up, unsigned char lsr)
 {
 	struct uart_port *port = &up->port;
 	unsigned char ch;
@@ -1740,6 +1743,7 @@
 
 	uart_insert_char(port, lsr, UART_LSR_OE, ch, flag);
 }
+EXPORT_SYMBOL_GPL(serial8250_read_char);
 
 /*
  * serial8250_rx_chars: processes according to the passed in LSR
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 0f058df..df8bd0c 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -115,7 +115,6 @@
 
 config SERIAL_ATMEL
 	bool "AT91 on-chip serial port support"
-	depends on HAS_DMA
 	depends on ARCH_AT91 || COMPILE_TEST
 	select SERIAL_CORE
 	select SERIAL_MCTRL_GPIO if GPIOLIB
@@ -500,7 +499,6 @@
 
 config SERIAL_IMX
 	tristate "IMX serial port support"
-	depends on HAS_DMA
 	depends on ARCH_MXC || COMPILE_TEST
 	select SERIAL_CORE
 	select RATIONAL
@@ -676,6 +674,8 @@
 
 config SERIAL_SH_SCI_NR_UARTS
 	int "Maximum number of SCI(F) serial ports" if EXPERT
+	range 1 64 if 64BIT
+	range 1 32 if !64BIT
 	depends on SERIAL_SH_SCI
 	default "3" if H8300
 	default "10" if SUPERH
@@ -1262,7 +1262,6 @@
 
 config SERIAL_MXS_AUART
 	tristate "MXS AUART support"
-	depends on HAS_DMA
 	depends on ARCH_MXS || MACH_ASM9260 || COMPILE_TEST
 	select SERIAL_CORE
 	select SERIAL_MCTRL_GPIO if GPIOLIB
@@ -1295,7 +1294,7 @@
 
 config SERIAL_AR933X
 	tristate "AR933X serial port support"
-	depends on HAVE_CLK && SOC_AR933X
+	depends on HAVE_CLK && ATH79
 	select SERIAL_CORE
 	help
 	  If you have an Atheros AR933X SOC based board and want to use the
@@ -1473,7 +1472,6 @@
 config SERIAL_STM32
 	tristate "STMicroelectronics STM32 serial port support"
 	select SERIAL_CORE
-	depends on HAS_DMA
 	depends on ARCH_STM32 || COMPILE_TEST
 	help
 	  This driver is for the on-chip Serial Controller on
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 4b40a5b..ebd33c0 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -1727,10 +1727,26 @@
  */
 static void pl011_enable_interrupts(struct uart_amba_port *uap)
 {
+	unsigned int i;
+
 	spin_lock_irq(&uap->port.lock);
 
 	/* Clear out any spuriously appearing RX interrupts */
 	pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR);
+
+	/*
+	 * RXIS is asserted only when the RX FIFO transitions from below
+	 * to above the trigger threshold.  If the RX FIFO is already
+	 * full to the threshold this can't happen and RXIS will now be
+	 * stuck off.  Drain the RX FIFO explicitly to fix this:
+	 */
+	for (i = 0; i < uap->fifosize * 2; ++i) {
+		if (pl011_read(uap, REG_FR) & UART01x_FR_RXFE)
+			break;
+
+		pl011_read(uap, REG_DR);
+	}
+
 	uap->im = UART011_RTIM;
 	if (!pl011_dma_rx_running(uap))
 		uap->im |= UART011_RXIM;
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index e287fe8..55b3eff 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -1757,7 +1757,6 @@
 {
 	struct platform_device *pdev = to_platform_device(port->dev);
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
-	struct tty_struct *tty = port->state->port.tty;
 	int retval;
 
 	/*
@@ -1772,8 +1771,8 @@
 	 * Allocate the IRQ
 	 */
 	retval = request_irq(port->irq, atmel_interrupt,
-			IRQF_SHARED | IRQF_COND_SUSPEND,
-			tty ? tty->name : "atmel_serial", port);
+			     IRQF_SHARED | IRQF_COND_SUSPEND,
+			     dev_name(&pdev->dev), port);
 	if (retval) {
 		dev_err(port->dev, "atmel_startup - Can't get irq\n");
 		return retval;
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index 2268339..c14873b 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -246,7 +246,6 @@
 		return -ENXIO;
 	}
 	port->mapbase = addr;
-	port->uartclk = BASE_BAUD * 16;
 
 	val = of_get_flat_dt_prop(node, "reg-offset", NULL);
 	if (val)
@@ -281,6 +280,10 @@
 	if (val)
 		early_console_dev.baud = be32_to_cpu(*val);
 
+	val = of_get_flat_dt_prop(node, "clock-frequency", NULL);
+	if (val)
+		port->uartclk = be32_to_cpu(*val);
+
 	if (options) {
 		early_console_dev.baud = simple_strtoul(options, NULL, 0);
 		strlcpy(early_console_dev.options, options,
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index c2fc6be..4e85357 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -1291,18 +1291,13 @@
 
 static void imx_uart_disable_dma(struct imx_port *sport)
 {
-	u32 ucr1, ucr2;
+	u32 ucr1;
 
 	/* clear UCR1 */
 	ucr1 = imx_uart_readl(sport, UCR1);
 	ucr1 &= ~(UCR1_RXDMAEN | UCR1_TXDMAEN | UCR1_ATDMAEN);
 	imx_uart_writel(sport, ucr1, UCR1);
 
-	/* clear UCR2 */
-	ucr2 = imx_uart_readl(sport, UCR2);
-	ucr2 &= ~(UCR2_CTSC | UCR2_CTS | UCR2_ATEN);
-	imx_uart_writel(sport, ucr2, UCR2);
-
 	imx_uart_setup_ufcr(sport, TXTL_DEFAULT, RXTL_DEFAULT);
 
 	sport->dma_is_enabled = 0;
@@ -1427,13 +1422,21 @@
 {
 	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long flags;
-	u32 ucr1, ucr2;
+	u32 ucr1, ucr2, ucr4;
 
 	if (sport->dma_is_enabled) {
-		sport->dma_is_rxing = 0;
-		sport->dma_is_txing = 0;
 		dmaengine_terminate_sync(sport->dma_chan_tx);
+		if (sport->dma_is_txing) {
+			dma_unmap_sg(sport->port.dev, &sport->tx_sgl[0],
+				     sport->dma_tx_nents, DMA_TO_DEVICE);
+			sport->dma_is_txing = 0;
+		}
 		dmaengine_terminate_sync(sport->dma_chan_rx);
+		if (sport->dma_is_rxing) {
+			dma_unmap_sg(sport->port.dev, &sport->rx_sgl,
+				     1, DMA_FROM_DEVICE);
+			sport->dma_is_rxing = 0;
+		}
 
 		spin_lock_irqsave(&sport->port.lock, flags);
 		imx_uart_stop_tx(port);
@@ -1449,6 +1452,10 @@
 	ucr2 = imx_uart_readl(sport, UCR2);
 	ucr2 &= ~(UCR2_TXEN | UCR2_ATEN);
 	imx_uart_writel(sport, ucr2, UCR2);
+
+	ucr4 = imx_uart_readl(sport, UCR4);
+	ucr4 &= ~UCR4_OREN;
+	imx_uart_writel(sport, ucr4, UCR4);
 	spin_unlock_irqrestore(&sport->port.lock, flags);
 
 	/*
@@ -2425,8 +2432,7 @@
 
 static int imx_uart_suspend_noirq(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct imx_port *sport = platform_get_drvdata(pdev);
+	struct imx_port *sport = dev_get_drvdata(dev);
 
 	imx_uart_save_context(sport);
 
@@ -2437,8 +2443,7 @@
 
 static int imx_uart_resume_noirq(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct imx_port *sport = platform_get_drvdata(pdev);
+	struct imx_port *sport = dev_get_drvdata(dev);
 	int ret;
 
 	ret = clk_enable(sport->clk_ipg);
@@ -2452,8 +2457,7 @@
 
 static int imx_uart_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct imx_port *sport = platform_get_drvdata(pdev);
+	struct imx_port *sport = dev_get_drvdata(dev);
 	int ret;
 
 	uart_suspend_port(&imx_uart_uart_driver, &sport->port);
@@ -2471,8 +2475,7 @@
 
 static int imx_uart_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct imx_port *sport = platform_get_drvdata(pdev);
+	struct imx_port *sport = dev_get_drvdata(dev);
 
 	/* disable wakeup from i.MX UART */
 	imx_uart_enable_wakeup(sport, false);
@@ -2487,8 +2490,7 @@
 
 static int imx_uart_freeze(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct imx_port *sport = platform_get_drvdata(pdev);
+	struct imx_port *sport = dev_get_drvdata(dev);
 
 	uart_suspend_port(&imx_uart_uart_driver, &sport->port);
 
@@ -2497,8 +2499,7 @@
 
 static int imx_uart_thaw(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct imx_port *sport = platform_get_drvdata(pdev);
+	struct imx_port *sport = dev_get_drvdata(dev);
 
 	uart_resume_port(&imx_uart_uart_driver, &sport->port);
 
diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c
index ee96cf0..736b74f 100644
--- a/drivers/tty/serial/msm_serial.c
+++ b/drivers/tty/serial/msm_serial.c
@@ -1812,11 +1812,34 @@
 };
 MODULE_DEVICE_TABLE(of, msm_match_table);
 
+static int __maybe_unused msm_serial_suspend(struct device *dev)
+{
+	struct msm_port *port = dev_get_drvdata(dev);
+
+	uart_suspend_port(&msm_uart_driver, &port->uart);
+
+	return 0;
+}
+
+static int __maybe_unused msm_serial_resume(struct device *dev)
+{
+	struct msm_port *port = dev_get_drvdata(dev);
+
+	uart_resume_port(&msm_uart_driver, &port->uart);
+
+	return 0;
+}
+
+static const struct dev_pm_ops msm_serial_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(msm_serial_suspend, msm_serial_resume)
+};
+
 static struct platform_driver msm_platform_driver = {
 	.remove = msm_serial_remove,
 	.probe = msm_serial_probe,
 	.driver = {
 		.name = "msm_serial",
+		.pm = &msm_serial_dev_pm_ops,
 		.of_match_table = msm_match_table,
 	},
 };
diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index f503fab..d04b5ee 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -71,6 +71,8 @@
 #define UART_BRDV		0x10
 #define  BRDV_BAUD_MASK         0x3FF
 
+#define UART_OSAMP		0x14
+
 #define MVEBU_NR_UARTS		2
 
 #define MVEBU_UART_TYPE		"mvebu-uart"
@@ -108,6 +110,17 @@
 	struct uart_flags flags;
 };
 
+/* Saved registers during suspend */
+struct mvebu_uart_pm_regs {
+	unsigned int rbr;
+	unsigned int tsh;
+	unsigned int ctrl;
+	unsigned int intr;
+	unsigned int stat;
+	unsigned int brdv;
+	unsigned int osamp;
+};
+
 /* MVEBU UART driver structure */
 struct mvebu_uart {
 	struct uart_port *port;
@@ -115,6 +128,9 @@
 	int irq[UART_IRQ_COUNT];
 	unsigned char __iomem *nb;
 	struct mvebu_uart_driver_data *data;
+#if defined(CONFIG_PM)
+	struct mvebu_uart_pm_regs pm_regs;
+#endif /* CONFIG_PM */
 };
 
 static struct mvebu_uart *to_mvuart(struct uart_port *port)
@@ -718,6 +734,51 @@
 #endif
 };
 
+#if defined(CONFIG_PM)
+static int mvebu_uart_suspend(struct device *dev)
+{
+	struct mvebu_uart *mvuart = dev_get_drvdata(dev);
+	struct uart_port *port = mvuart->port;
+
+	uart_suspend_port(&mvebu_uart_driver, port);
+
+	mvuart->pm_regs.rbr = readl(port->membase + UART_RBR(port));
+	mvuart->pm_regs.tsh = readl(port->membase + UART_TSH(port));
+	mvuart->pm_regs.ctrl = readl(port->membase + UART_CTRL(port));
+	mvuart->pm_regs.intr = readl(port->membase + UART_INTR(port));
+	mvuart->pm_regs.stat = readl(port->membase + UART_STAT);
+	mvuart->pm_regs.brdv = readl(port->membase + UART_BRDV);
+	mvuart->pm_regs.osamp = readl(port->membase + UART_OSAMP);
+
+	device_set_wakeup_enable(dev, true);
+
+	return 0;
+}
+
+static int mvebu_uart_resume(struct device *dev)
+{
+	struct mvebu_uart *mvuart = dev_get_drvdata(dev);
+	struct uart_port *port = mvuart->port;
+
+	writel(mvuart->pm_regs.rbr, port->membase + UART_RBR(port));
+	writel(mvuart->pm_regs.tsh, port->membase + UART_TSH(port));
+	writel(mvuart->pm_regs.ctrl, port->membase + UART_CTRL(port));
+	writel(mvuart->pm_regs.intr, port->membase + UART_INTR(port));
+	writel(mvuart->pm_regs.stat, port->membase + UART_STAT);
+	writel(mvuart->pm_regs.brdv, port->membase + UART_BRDV);
+	writel(mvuart->pm_regs.osamp, port->membase + UART_OSAMP);
+
+	uart_resume_port(&mvebu_uart_driver, port);
+
+	return 0;
+}
+
+static const struct dev_pm_ops mvebu_uart_pm_ops = {
+	.suspend        = mvebu_uart_suspend,
+	.resume         = mvebu_uart_resume,
+};
+#endif /* CONFIG_PM */
+
 static const struct of_device_id mvebu_uart_of_match[];
 
 /* Counter to keep track of each UART port id when not using CONFIG_OF */
@@ -891,6 +952,9 @@
 		.name  = "mvebu-uart",
 		.of_match_table = of_match_ptr(mvebu_uart_of_match),
 		.suppress_bind_attrs = true,
+#if defined(CONFIG_PM)
+		.pm	= &mvebu_uart_pm_ops,
+#endif /* CONFIG_PM */
 	},
 };
 
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index a1b3eb0..c62e17c 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -98,14 +98,13 @@
 	enum geni_se_xfer_mode xfer_mode;
 	bool setup;
 	int (*handle_rx)(struct uart_port *uport, u32 bytes, bool drop);
-	unsigned int xmit_size;
 	unsigned int baud;
 	unsigned int tx_bytes_pw;
 	unsigned int rx_bytes_pw;
 	bool brk;
 };
 
-static const struct uart_ops qcom_geni_serial_pops;
+static const struct uart_ops qcom_geni_console_pops;
 static struct uart_driver qcom_geni_console_driver;
 static int handle_rx_console(struct uart_port *uport, u32 bytes, bool drop);
 static unsigned int qcom_geni_serial_tx_empty(struct uart_port *port);
@@ -118,7 +117,14 @@
 #define to_dev_port(ptr, member) \
 		container_of(ptr, struct qcom_geni_serial_port, member)
 
-static struct qcom_geni_serial_port qcom_geni_console_port;
+static struct qcom_geni_serial_port qcom_geni_console_port = {
+	.uport = {
+		.iotype = UPIO_MEM,
+		.ops = &qcom_geni_console_pops,
+		.flags = UPF_BOOT_AUTOCONF,
+		.line = 0,
+	},
+};
 
 static int qcom_geni_serial_request_port(struct uart_port *uport)
 {
@@ -189,8 +195,19 @@
 		timeout_us = ((fifo_bits * USEC_PER_SEC) / baud) + 500;
 	}
 
-	return !readl_poll_timeout_atomic(uport->membase + offset, reg,
-			 (bool)(reg & field) == set, 10, timeout_us);
+	/*
+	 * Use custom implementation instead of readl_poll_atomic since ktimer
+	 * is not ready at the time of early console.
+	 */
+	timeout_us = DIV_ROUND_UP(timeout_us, 10) * 10;
+	while (timeout_us) {
+		reg = readl_relaxed(uport->membase + offset);
+		if ((bool)(reg & field) == set)
+			return true;
+		udelay(10);
+		timeout_us -= 10;
+	}
+	return false;
 }
 
 static void qcom_geni_serial_setup_tx(struct uart_port *uport, u32 xmit_size)
@@ -286,6 +303,10 @@
 	u32 bytes_to_send = count;
 
 	for (i = 0; i < count; i++) {
+		/*
+		 * uart_console_write() adds a carriage return for each newline.
+		 * Account for additional bytes to be written.
+		 */
 		if (s[i] == '\n')
 			bytes_to_send++;
 	}
@@ -305,7 +326,7 @@
 		if (!qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
 						M_TX_FIFO_WATERMARK_EN, true))
 			break;
-		chars_to_write = min_t(size_t, (size_t)(count - i), avail / 2);
+		chars_to_write = min_t(size_t, count - i, avail / 2);
 		uart_console_write(uport, s + i, chars_to_write,
 						qcom_geni_serial_wr_char);
 		writel_relaxed(M_TX_FIFO_WATERMARK_EN, uport->membase +
@@ -406,20 +427,18 @@
 	u32 status;
 
 	if (port->xfer_mode == GENI_SE_FIFO) {
-		status = readl_relaxed(uport->membase + SE_GENI_STATUS);
+		/*
+		 * readl ensures reading & writing of IRQ_EN register
+		 * is not re-ordered before checking the status of the
+		 * Serial Engine.
+		 */
+		status = readl(uport->membase + SE_GENI_STATUS);
 		if (status & M_GENI_CMD_ACTIVE)
 			return;
 
 		if (!qcom_geni_serial_tx_empty(uport))
 			return;
 
-		/*
-		 * Ensure writing to IRQ_EN & watermark registers are not
-		 * re-ordered before checking the status of the Serial
-		 * Engine and TX FIFO
-		 */
-		mb();
-
 		irq_en = readl_relaxed(uport->membase +	SE_GENI_M_IRQ_EN);
 		irq_en |= M_TX_FIFO_WATERMARK_EN | M_CMD_DONE_EN;
 
@@ -442,7 +461,6 @@
 		writel_relaxed(0, uport->membase +
 				     SE_GENI_TX_WATERMARK_REG);
 	}
-	port->xmit_size = 0;
 	writel_relaxed(irq_en, uport->membase + SE_GENI_M_IRQ_EN);
 	status = readl_relaxed(uport->membase + SE_GENI_STATUS);
 	/* Possible stop tx is called multiple times. */
@@ -572,21 +590,14 @@
 	chunk = uart_circ_chars_pending(xmit);
 	status = readl_relaxed(uport->membase + SE_GENI_TX_FIFO_STATUS);
 	/* Both FIFO and framework buffer are drained */
-	if (chunk == port->xmit_size && !status) {
-		port->xmit_size = 0;
-		uart_circ_clear(xmit);
+	if (!chunk && !status) {
 		qcom_geni_serial_stop_tx(uport);
 		goto out_write_wakeup;
 	}
-	chunk -= port->xmit_size;
 
 	avail = (port->tx_fifo_depth - port->tx_wm) * port->tx_bytes_pw;
-	tail = (xmit->tail + port->xmit_size) & (UART_XMIT_SIZE - 1);
-	if (chunk > (UART_XMIT_SIZE - tail))
-		chunk = UART_XMIT_SIZE - tail;
-	if (chunk > avail)
-		chunk = avail;
-
+	tail = xmit->tail;
+	chunk = min3((size_t)chunk, (size_t)(UART_XMIT_SIZE - tail), avail);
 	if (!chunk)
 		goto out_write_wakeup;
 
@@ -595,24 +606,27 @@
 	remaining = chunk;
 	for (i = 0; i < chunk; ) {
 		unsigned int tx_bytes;
-		unsigned int buf = 0;
+		u8 buf[sizeof(u32)];
 		int c;
 
-		tx_bytes = min_t(size_t, remaining, (size_t)port->tx_bytes_pw);
+		memset(buf, 0, ARRAY_SIZE(buf));
+		tx_bytes = min_t(size_t, remaining, port->tx_bytes_pw);
 		for (c = 0; c < tx_bytes ; c++)
-			buf |= (xmit->buf[tail + c] << (c * BITS_PER_BYTE));
+			buf[c] = xmit->buf[tail + c];
 
-		writel_relaxed(buf, uport->membase + SE_GENI_TX_FIFOn);
+		iowrite32_rep(uport->membase + SE_GENI_TX_FIFOn, buf, 1);
 
 		i += tx_bytes;
-		tail = (tail + tx_bytes) & (UART_XMIT_SIZE - 1);
+		tail += tx_bytes;
 		uport->icount.tx += tx_bytes;
 		remaining -= tx_bytes;
 	}
+
+	xmit->tail = tail & (UART_XMIT_SIZE - 1);
 	qcom_geni_serial_poll_tx_done(uport);
-	port->xmit_size += chunk;
 out_write_wakeup:
-	uart_write_wakeup(uport);
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(uport);
 }
 
 static irqreturn_t qcom_geni_serial_isr(int isr, void *dev)
@@ -627,7 +641,7 @@
 	struct qcom_geni_serial_port *port = to_dev_port(uport, uport);
 
 	if (uport->suspended)
-		return IRQ_HANDLED;
+		return IRQ_NONE;
 
 	spin_lock_irqsave(&uport->lock, flags);
 	m_irq_status = readl_relaxed(uport->membase + SE_GENI_M_IRQ_STATUS);
@@ -667,20 +681,16 @@
 	return IRQ_HANDLED;
 }
 
-static int get_tx_fifo_size(struct qcom_geni_serial_port *port)
+static void get_tx_fifo_size(struct qcom_geni_serial_port *port)
 {
 	struct uart_port *uport;
 
-	if (!port)
-		return -ENODEV;
-
 	uport = &port->uport;
 	port->tx_fifo_depth = geni_se_get_tx_fifo_depth(&port->se);
 	port->tx_fifo_width = geni_se_get_tx_fifo_width(&port->se);
 	port->rx_fifo_depth = geni_se_get_rx_fifo_depth(&port->se);
 	uport->fifosize =
 		(port->tx_fifo_depth * port->tx_fifo_width) / BITS_PER_BYTE;
-	return 0;
 }
 
 static void set_rfr_wm(struct qcom_geni_serial_port *port)
@@ -702,7 +712,6 @@
 	/* Stop the console before stopping the current tx */
 	console_stop(uport->cons);
 
-	disable_irq(uport->irq);
 	free_irq(uport->irq, uport);
 	spin_lock_irqsave(&uport->lock, flags);
 	qcom_geni_serial_stop_tx(uport);
@@ -892,7 +901,7 @@
 
 static unsigned int qcom_geni_serial_tx_empty(struct uart_port *uport)
 {
-	return !readl_relaxed(uport->membase + SE_GENI_TX_FIFO_STATUS);
+	return !readl(uport->membase + SE_GENI_TX_FIFO_STATUS);
 }
 
 #ifdef CONFIG_SERIAL_QCOM_GENI_CONSOLE
@@ -910,7 +919,7 @@
 
 	port = get_port_from_line(co->index);
 	if (IS_ERR(port)) {
-		pr_err("Invalid line %d(%d)\n", co->index, (int)PTR_ERR(port));
+		pr_err("Invalid line %d\n", co->index);
 		return PTR_ERR(port);
 	}
 
@@ -942,6 +951,65 @@
 	return uart_set_options(uport, co, baud, parity, bits, flow);
 }
 
+static void qcom_geni_serial_earlycon_write(struct console *con,
+					const char *s, unsigned int n)
+{
+	struct earlycon_device *dev = con->data;
+
+	__qcom_geni_serial_console_write(&dev->port, s, n);
+}
+
+static int __init qcom_geni_serial_earlycon_setup(struct earlycon_device *dev,
+								const char *opt)
+{
+	struct uart_port *uport = &dev->port;
+	u32 tx_trans_cfg;
+	u32 tx_parity_cfg = 0;	/* Disable Tx Parity */
+	u32 rx_trans_cfg = 0;
+	u32 rx_parity_cfg = 0;	/* Disable Rx Parity */
+	u32 stop_bit_len = 0;	/* Default stop bit length - 1 bit */
+	u32 bits_per_char;
+	struct geni_se se;
+
+	if (!uport->membase)
+		return -EINVAL;
+
+	memset(&se, 0, sizeof(se));
+	se.base = uport->membase;
+	if (geni_se_read_proto(&se) != GENI_SE_UART)
+		return -ENXIO;
+	/*
+	 * Ignore Flow control.
+	 * n = 8.
+	 */
+	tx_trans_cfg = UART_CTS_MASK;
+	bits_per_char = BITS_PER_BYTE;
+
+	/*
+	 * Make an unconditional cancel on the main sequencer to reset
+	 * it else we could end up in data loss scenarios.
+	 */
+	qcom_geni_serial_poll_tx_done(uport);
+	qcom_geni_serial_abort_rx(uport);
+	geni_se_config_packing(&se, BITS_PER_BYTE, 1, false, true, false);
+	geni_se_init(&se, DEF_FIFO_DEPTH_WORDS / 2, DEF_FIFO_DEPTH_WORDS - 2);
+	geni_se_select_mode(&se, GENI_SE_FIFO);
+
+	writel_relaxed(tx_trans_cfg, uport->membase + SE_UART_TX_TRANS_CFG);
+	writel_relaxed(tx_parity_cfg, uport->membase + SE_UART_TX_PARITY_CFG);
+	writel_relaxed(rx_trans_cfg, uport->membase + SE_UART_RX_TRANS_CFG);
+	writel_relaxed(rx_parity_cfg, uport->membase + SE_UART_RX_PARITY_CFG);
+	writel_relaxed(bits_per_char, uport->membase + SE_UART_TX_WORD_LEN);
+	writel_relaxed(bits_per_char, uport->membase + SE_UART_RX_WORD_LEN);
+	writel_relaxed(stop_bit_len, uport->membase + SE_UART_TX_STOP_BIT_LEN);
+
+	dev->con->write = qcom_geni_serial_earlycon_write;
+	dev->con->setup = NULL;
+	return 0;
+}
+OF_EARLYCON_DECLARE(qcom_geni, "qcom,geni-debug-uart",
+				qcom_geni_serial_earlycon_setup);
+
 static int __init console_register(struct uart_driver *drv)
 {
 	return uart_register_driver(drv);
@@ -1026,16 +1094,13 @@
 
 	if (pdev->dev.of_node)
 		line = of_alias_get_id(pdev->dev.of_node, "serial");
-	else
-		line = pdev->id;
 
 	if (line < 0 || line >= GENI_UART_CONS_PORTS)
 		return -ENXIO;
 	port = get_port_from_line(line);
 	if (IS_ERR(port)) {
-		ret = PTR_ERR(port);
-		dev_err(&pdev->dev, "Invalid line %d(%d)\n", line, ret);
-		return ret;
+		dev_err(&pdev->dev, "Invalid line %d\n", line);
+		return PTR_ERR(port);
 	}
 
 	uport = &port->uport;
@@ -1072,7 +1137,6 @@
 	uport->private_data = &qcom_geni_console_driver;
 	platform_set_drvdata(pdev, port);
 	port->handle_rx = handle_rx_console;
-	port->setup = false;
 	return uart_add_one_port(&qcom_geni_console_driver, uport);
 }
 
@@ -1087,8 +1151,7 @@
 
 static int __maybe_unused qcom_geni_serial_sys_suspend_noirq(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct qcom_geni_serial_port *port = platform_get_drvdata(pdev);
+	struct qcom_geni_serial_port *port = dev_get_drvdata(dev);
 	struct uart_port *uport = &port->uport;
 
 	uart_suspend_port(uport->private_data, uport);
@@ -1097,12 +1160,19 @@
 
 static int __maybe_unused qcom_geni_serial_sys_resume_noirq(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct qcom_geni_serial_port *port = platform_get_drvdata(pdev);
+	struct qcom_geni_serial_port *port = dev_get_drvdata(dev);
 	struct uart_port *uport = &port->uport;
 
 	if (console_suspend_enabled && uport->suspended) {
 		uart_resume_port(uport->private_data, uport);
+		/*
+		 * uart_suspend_port() invokes port shutdown which in turn
+		 * frees the irq. uart_resume_port invokes port startup which
+		 * performs request_irq. The request_irq auto-enables the IRQ.
+		 * In addition, resume_noirq implicitly enables the IRQ and
+		 * leads to an unbalanced IRQ enable warning. Disable the IRQ
+		 * before returning so that the warning is suppressed.
+		 */
 		disable_irq(uport->irq);
 	}
 	return 0;
@@ -1133,11 +1203,6 @@
 {
 	int ret;
 
-	qcom_geni_console_port.uport.iotype = UPIO_MEM;
-	qcom_geni_console_port.uport.ops = &qcom_geni_console_pops;
-	qcom_geni_console_port.uport.flags = UPF_BOOT_AUTOCONF;
-	qcom_geni_console_port.uport.line = 0;
-
 	ret = console_register(&qcom_geni_console_driver);
 	if (ret)
 		return ret;
diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index 3f2f8c1..2f8fa18 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -856,35 +856,54 @@
 static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p)
 {
 	struct s3c24xx_uart_dma	*dma = p->dma;
+	struct dma_slave_caps dma_caps;
+	const char *reason = NULL;
 	int ret;
 
 	/* Default slave configuration parameters */
 	dma->rx_conf.direction		= DMA_DEV_TO_MEM;
 	dma->rx_conf.src_addr_width	= DMA_SLAVE_BUSWIDTH_1_BYTE;
 	dma->rx_conf.src_addr		= p->port.mapbase + S3C2410_URXH;
-	dma->rx_conf.src_maxburst	= 16;
+	dma->rx_conf.src_maxburst	= 1;
 
 	dma->tx_conf.direction		= DMA_MEM_TO_DEV;
 	dma->tx_conf.dst_addr_width	= DMA_SLAVE_BUSWIDTH_1_BYTE;
 	dma->tx_conf.dst_addr		= p->port.mapbase + S3C2410_UTXH;
-	if (dma_get_cache_alignment() >= 16)
-		dma->tx_conf.dst_maxburst = 16;
-	else
-		dma->tx_conf.dst_maxburst = 1;
+	dma->tx_conf.dst_maxburst	= 1;
 
 	dma->rx_chan = dma_request_chan(p->port.dev, "rx");
 
-	if (IS_ERR(dma->rx_chan))
-		return PTR_ERR(dma->rx_chan);
+	if (IS_ERR(dma->rx_chan)) {
+		reason = "DMA RX channel request failed";
+		ret = PTR_ERR(dma->rx_chan);
+		goto err_warn;
+	}
+
+	ret = dma_get_slave_caps(dma->rx_chan, &dma_caps);
+	if (ret < 0 ||
+	    dma_caps.residue_granularity < DMA_RESIDUE_GRANULARITY_BURST) {
+		reason = "insufficient DMA RX engine capabilities";
+		ret = -EOPNOTSUPP;
+		goto err_release_rx;
+	}
 
 	dmaengine_slave_config(dma->rx_chan, &dma->rx_conf);
 
 	dma->tx_chan = dma_request_chan(p->port.dev, "tx");
 	if (IS_ERR(dma->tx_chan)) {
+		reason = "DMA TX channel request failed";
 		ret = PTR_ERR(dma->tx_chan);
 		goto err_release_rx;
 	}
 
+	ret = dma_get_slave_caps(dma->tx_chan, &dma_caps);
+	if (ret < 0 ||
+	    dma_caps.residue_granularity < DMA_RESIDUE_GRANULARITY_BURST) {
+		reason = "insufficient DMA TX engine capabilities";
+		ret = -EOPNOTSUPP;
+		goto err_release_tx;
+	}
+
 	dmaengine_slave_config(dma->tx_chan, &dma->tx_conf);
 
 	/* RX buffer */
@@ -899,6 +918,7 @@
 	dma->rx_addr = dma_map_single(p->port.dev, dma->rx_buf,
 				dma->rx_size, DMA_FROM_DEVICE);
 	if (dma_mapping_error(p->port.dev, dma->rx_addr)) {
+		reason = "DMA mapping error for RX buffer";
 		ret = -EIO;
 		goto err_free_rx;
 	}
@@ -907,6 +927,7 @@
 	dma->tx_addr = dma_map_single(p->port.dev, p->port.state->xmit.buf,
 				UART_XMIT_SIZE, DMA_TO_DEVICE);
 	if (dma_mapping_error(p->port.dev, dma->tx_addr)) {
+		reason = "DMA mapping error for TX buffer";
 		ret = -EIO;
 		goto err_unmap_rx;
 	}
@@ -922,6 +943,9 @@
 	dma_release_channel(dma->tx_chan);
 err_release_rx:
 	dma_release_channel(dma->rx_chan);
+err_warn:
+	if (reason)
+		dev_warn(p->port.dev, "%s, DMA will not be used\n", reason);
 	return ret;
 }
 
@@ -1040,8 +1064,6 @@
 	if (ourport->dma) {
 		ret = s3c24xx_serial_request_dma(ourport);
 		if (ret < 0) {
-			dev_warn(port->dev,
-				 "DMA request failed, DMA will not be used\n");
 			devm_kfree(port->dev, ourport->dma);
 			ourport->dma = NULL;
 		}
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 65792a3..243c960 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -1168,7 +1168,10 @@
 		else
 			return PTR_ERR(s->clk);
 	} else {
-		clk_prepare_enable(s->clk);
+		ret = clk_prepare_enable(s->clk);
+		if (ret)
+			return ret;
+
 		freq = clk_get_rate(s->clk);
 	}
 
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0466f9f..890b883 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -674,8 +674,8 @@
 static void uart_throttle(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
+	upstat_t mask = UPSTAT_SYNC_FIFO;
 	struct uart_port *port;
-	upstat_t mask = 0;
 
 	port = uart_port_ref(state);
 	if (!port)
@@ -703,8 +703,8 @@
 static void uart_unthrottle(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
+	upstat_t mask = UPSTAT_SYNC_FIFO;
 	struct uart_port *port;
-	upstat_t mask = 0;
 
 	port = uart_port_ref(state);
 	if (!port)
@@ -1829,19 +1829,6 @@
 		uart_line_info(m, drv, i);
 	return 0;
 }
-
-static int uart_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, uart_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations uart_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= uart_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
 
 #if defined(CONFIG_SERIAL_CORE_CONSOLE) || defined(CONFIG_CONSOLE_POLL)
@@ -2415,7 +2402,7 @@
 	.break_ctl	= uart_break_ctl,
 	.wait_until_sent= uart_wait_until_sent,
 #ifdef CONFIG_PROC_FS
-	.proc_fops	= &uart_proc_fops,
+	.proc_show	= uart_proc_show,
 #endif
 	.tiocmget	= uart_tiocmget,
 	.tiocmset	= uart_tiocmset,
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index fdbbff5..c181eb3 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -160,6 +160,7 @@
 #define SCI_NPORTS CONFIG_SERIAL_SH_SCI_NR_UARTS
 
 static struct sci_port sci_ports[SCI_NPORTS];
+static unsigned long sci_ports_in_use;
 static struct uart_driver sci_uart_driver;
 
 static inline struct sci_port *
@@ -2390,6 +2391,27 @@
 
 	uart_update_timeout(port, termios->c_cflag, baud);
 
+	/* byte size and parity */
+	switch (termios->c_cflag & CSIZE) {
+	case CS5:
+		bits = 7;
+		break;
+	case CS6:
+		bits = 8;
+		break;
+	case CS7:
+		bits = 9;
+		break;
+	default:
+		bits = 10;
+		break;
+	}
+
+	if (termios->c_cflag & CSTOPB)
+		bits++;
+	if (termios->c_cflag & PARENB)
+		bits++;
+
 	if (best_clk >= 0) {
 		if (port->type == PORT_SCIFA || port->type == PORT_SCIFB)
 			switch (srr + 1) {
@@ -2406,8 +2428,27 @@
 		serial_port_out(port, SCSCR, scr_val | s->hscif_tot);
 		serial_port_out(port, SCSMR, smr_val);
 		serial_port_out(port, SCBRR, brr);
-		if (sci_getreg(port, HSSRR)->size)
-			serial_port_out(port, HSSRR, srr | HSCIF_SRE);
+		if (sci_getreg(port, HSSRR)->size) {
+			unsigned int hssrr = srr | HSCIF_SRE;
+			/* Calculate deviation from intended rate at the
+			 * center of the last stop bit in sampling clocks.
+			 */
+			int last_stop = bits * 2 - 1;
+			int deviation = min_err * srr * last_stop / 2 / baud;
+
+			if (abs(deviation) >= 2) {
+				/* At least two sampling clocks off at the
+				 * last stop bit; we can increase the error
+				 * margin by shifting the sampling point.
+				 */
+				int shift = min(-8, max(7, deviation / 2));
+
+				hssrr |= (shift << HSCIF_SRHP_SHIFT) &
+					 HSCIF_SRHP_MASK;
+				hssrr |= HSCIF_SRDE;
+			}
+			serial_port_out(port, HSSRR, hssrr);
+		}
 
 		/* Wait one bit interval */
 		udelay((1000000 + (baud - 1)) / baud);
@@ -2474,27 +2515,6 @@
 	 * value obtained by this formula is too small. Therefore, if the value
 	 * is smaller than 20ms, use 20ms as the timeout value for DMA.
 	 */
-	/* byte size and parity */
-	switch (termios->c_cflag & CSIZE) {
-	case CS5:
-		bits = 7;
-		break;
-	case CS6:
-		bits = 8;
-		break;
-	case CS7:
-		bits = 9;
-		break;
-	default:
-		bits = 10;
-		break;
-	}
-
-	if (termios->c_cflag & CSTOPB)
-		bits++;
-	if (termios->c_cflag & PARENB)
-		bits++;
-
 	s->rx_frame = (10000 * bits) / (baud / 100);
 #ifdef CONFIG_SERIAL_SH_SCI_DMA
 	s->rx_timeout = s->buf_len_rx * 2 * s->rx_frame;
@@ -2704,8 +2724,8 @@
 			dev_dbg(dev, "failed to get %s (%ld)\n", clk_names[i],
 				PTR_ERR(clk));
 		else
-			dev_dbg(dev, "clk %s is %pC rate %pCr\n", clk_names[i],
-				clk, clk);
+			dev_dbg(dev, "clk %s is %pC rate %lu\n", clk_names[i],
+				clk, clk_get_rate(clk));
 		sci_port->clks[i] = IS_ERR(clk) ? NULL : clk;
 	}
 	return 0;
@@ -2890,16 +2910,15 @@
 	unsigned long flags;
 	int locked = 1;
 
-	local_irq_save(flags);
 #if defined(SUPPORT_SYSRQ)
 	if (port->sysrq)
 		locked = 0;
 	else
 #endif
 	if (oops_in_progress)
-		locked = spin_trylock(&port->lock);
+		locked = spin_trylock_irqsave(&port->lock, flags);
 	else
-		spin_lock(&port->lock);
+		spin_lock_irqsave(&port->lock, flags);
 
 	/* first save SCSCR then disable interrupts, keep clock source */
 	ctrl = serial_port_in(port, SCSCR);
@@ -2919,8 +2938,7 @@
 	serial_port_out(port, SCSCR, ctrl);
 
 	if (locked)
-		spin_unlock(&port->lock);
-	local_irq_restore(flags);
+		spin_unlock_irqrestore(&port->lock, flags);
 }
 
 static int serial_console_setup(struct console *co, char *options)
@@ -3026,6 +3044,7 @@
 {
 	struct sci_port *port = platform_get_drvdata(dev);
 
+	sci_ports_in_use &= ~BIT(port->port.line);
 	uart_remove_one_port(&sci_uart_driver, &port->port);
 
 	sci_cleanup_single(port);
@@ -3107,6 +3126,8 @@
 
 	/* Get the line number from the aliases node. */
 	id = of_alias_get_id(np, "serial");
+	if (id < 0 && ~sci_ports_in_use)
+		id = ffz(sci_ports_in_use);
 	if (id < 0) {
 		dev_err(&pdev->dev, "failed to get alias id (%d)\n", id);
 		return NULL;
@@ -3141,6 +3162,9 @@
 		dev_notice(&dev->dev, "Consider bumping CONFIG_SERIAL_SH_SCI_NR_UARTS!\n");
 		return -EINVAL;
 	}
+	BUILD_BUG_ON(SCI_NPORTS > sizeof(sci_ports_in_use) * 8);
+	if (sci_ports_in_use & BIT(index))
+		return -EBUSY;
 
 	mutex_lock(&sci_uart_registration_lock);
 	if (!sci_uart_driver.state) {
@@ -3239,6 +3263,7 @@
 	sh_bios_gdb_detach();
 #endif
 
+	sci_ports_in_use |= BIT(dev_id);
 	return 0;
 }
 
diff --git a/drivers/tty/serial/sh-sci.h b/drivers/tty/serial/sh-sci.h
index a5f792f..0b9e804 100644
--- a/drivers/tty/serial/sh-sci.h
+++ b/drivers/tty/serial/sh-sci.h
@@ -130,6 +130,10 @@
 
 /* HSSRR HSCIF */
 #define HSCIF_SRE	BIT(15)	/* Sampling Rate Register Enable */
+#define HSCIF_SRDE	BIT(14) /* Sampling Point Register Enable */
+
+#define HSCIF_SRHP_SHIFT	8
+#define HSCIF_SRHP_MASK		0x0f00
 
 /* SCPCR (Serial Port Control Register), SCIFA/SCIFB only */
 #define SCPCR_RTSC	BIT(4)	/* Serial Port RTS# Pin / Output Pin */
diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c
index 5f9f01f..7971997c 100644
--- a/drivers/tty/serial/st-asc.c
+++ b/drivers/tty/serial/st-asc.c
@@ -842,16 +842,14 @@
 #ifdef CONFIG_PM_SLEEP
 static int asc_serial_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct uart_port *port = platform_get_drvdata(pdev);
+	struct uart_port *port = dev_get_drvdata(dev);
 
 	return uart_suspend_port(&asc_uart_driver, port);
 }
 
 static int asc_serial_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct uart_port *port = platform_get_drvdata(pdev);
+	struct uart_port *port = dev_get_drvdata(dev);
 
 	return uart_resume_port(&asc_uart_driver, port);
 }
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index bd72dd8..8a3e342 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -1097,45 +1097,6 @@
 #endif
 };
 
-static struct uart_port cdns_uart_port[CDNS_UART_NR_PORTS];
-
-/**
- * cdns_uart_get_port - Configure the port from platform device resource info
- * @id: Port id
- *
- * Return: a pointer to a uart_port or NULL for failure
- */
-static struct uart_port *cdns_uart_get_port(int id)
-{
-	struct uart_port *port;
-
-	/* Try the given port id if failed use default method */
-	if (id < CDNS_UART_NR_PORTS && cdns_uart_port[id].mapbase != 0) {
-		/* Find the next unused port */
-		for (id = 0; id < CDNS_UART_NR_PORTS; id++)
-			if (cdns_uart_port[id].mapbase == 0)
-				break;
-	}
-
-	if (id >= CDNS_UART_NR_PORTS)
-		return NULL;
-
-	port = &cdns_uart_port[id];
-
-	/* At this point, we've got an empty uart_port struct, initialize it */
-	spin_lock_init(&port->lock);
-	port->membase	= NULL;
-	port->irq	= 0;
-	port->type	= PORT_UNKNOWN;
-	port->iotype	= UPIO_MEM32;
-	port->flags	= UPF_BOOT_AUTOCONF;
-	port->ops	= &cdns_uart_ops;
-	port->fifosize	= CDNS_UART_FIFO_SIZE;
-	port->line	= id;
-	port->dev	= NULL;
-	return port;
-}
-
 #ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE
 /**
  * cdns_uart_console_wait_tx - Wait for the TX to be full
@@ -1206,6 +1167,10 @@
 OF_EARLYCON_DECLARE(cdns, "cdns,uart-r1p12", cdns_early_console_setup);
 OF_EARLYCON_DECLARE(cdns, "xlnx,zynqmp-uart", cdns_early_console_setup);
 
+
+/* Static pointer to console port */
+static struct uart_port *console_port;
+
 /**
  * cdns_uart_console_write - perform write operation
  * @co: Console handle
@@ -1215,7 +1180,7 @@
 static void cdns_uart_console_write(struct console *co, const char *s,
 				unsigned int count)
 {
-	struct uart_port *port = &cdns_uart_port[co->index];
+	struct uart_port *port = console_port;
 	unsigned long flags;
 	unsigned int imr, ctrl;
 	int locked = 1;
@@ -1261,15 +1226,13 @@
  */
 static int __init cdns_uart_console_setup(struct console *co, char *options)
 {
-	struct uart_port *port = &cdns_uart_port[co->index];
+	struct uart_port *port = console_port;
+
 	int baud = 9600;
 	int bits = 8;
 	int parity = 'n';
 	int flow = 'n';
 
-	if (co->index < 0 || co->index >= CDNS_UART_NR_PORTS)
-		return -EINVAL;
-
 	if (!port->membase) {
 		pr_debug("console on " CDNS_UART_TTY_NAME "%i not present\n",
 			 co->index);
@@ -1293,20 +1256,6 @@
 	.index	= -1, /* Specified on the cmdline (e.g. console=ttyPS ) */
 	.data	= &cdns_uart_uart_driver,
 };
-
-/**
- * cdns_uart_console_init - Initialization call
- *
- * Return: 0 on success, negative errno otherwise
- */
-static int __init cdns_uart_console_init(void)
-{
-	register_console(&cdns_uart_console);
-	return 0;
-}
-
-console_initcall(cdns_uart_console_init);
-
 #endif /* CONFIG_SERIAL_XILINX_PS_UART_CONSOLE */
 
 static struct uart_driver cdns_uart_uart_driver = {
@@ -1430,8 +1379,7 @@
 #endif /* ! CONFIG_PM_SLEEP */
 static int __maybe_unused cdns_runtime_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct uart_port *port = platform_get_drvdata(pdev);
+	struct uart_port *port = dev_get_drvdata(dev);
 	struct cdns_uart *cdns_uart = port->private_data;
 
 	clk_disable(cdns_uart->uartclk);
@@ -1441,8 +1389,7 @@
 
 static int __maybe_unused cdns_runtime_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct uart_port *port = platform_get_drvdata(pdev);
+	struct uart_port *port = dev_get_drvdata(dev);
 	struct cdns_uart *cdns_uart = port->private_data;
 
 	clk_enable(cdns_uart->pclk);
@@ -1487,6 +1434,9 @@
 			GFP_KERNEL);
 	if (!cdns_uart_data)
 		return -ENOMEM;
+	port = devm_kzalloc(&pdev->dev, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
 
 	match = of_match_node(cdns_uart_of_match, pdev->dev.of_node);
 	if (match && match->data) {
@@ -1552,15 +1502,24 @@
 	if (id < 0)
 		id = 0;
 
-	/* Initialize the port structure */
-	port = cdns_uart_get_port(id);
-
-	if (!port) {
+	if (id >= CDNS_UART_NR_PORTS) {
 		dev_err(&pdev->dev, "Cannot get uart_port structure\n");
 		rc = -ENODEV;
 		goto err_out_notif_unreg;
 	}
 
+	/* At this point, we've got an empty uart_port struct, initialize it */
+	spin_lock_init(&port->lock);
+	port->membase	= NULL;
+	port->irq	= 0;
+	port->type	= PORT_UNKNOWN;
+	port->iotype	= UPIO_MEM32;
+	port->flags	= UPF_BOOT_AUTOCONF;
+	port->ops	= &cdns_uart_ops;
+	port->fifosize	= CDNS_UART_FIFO_SIZE;
+	port->line	= id;
+	port->dev	= NULL;
+
 	/*
 	 * Register the port.
 	 * This function also registers this device with the tty layer
@@ -1579,6 +1538,17 @@
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 
+#ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE
+	/*
+	 * If console hasn't been found yet try to assign this port
+	 * because it is required to be assigned for console setup function.
+	 * If register_console() don't assign value, then console_port pointer
+	 * is cleanup.
+	 */
+	if (cdns_uart_uart_driver.cons->index == -1)
+		console_port = port;
+#endif
+
 	rc = uart_add_one_port(&cdns_uart_uart_driver, port);
 	if (rc) {
 		dev_err(&pdev->dev,
@@ -1586,6 +1556,12 @@
 		goto err_out_pm_disable;
 	}
 
+#ifdef CONFIG_SERIAL_XILINX_PS_UART_CONSOLE
+	/* This is not port which is used for console that's why clean it up */
+	if (cdns_uart_uart_driver.cons->index == -1)
+		console_port = NULL;
+#endif
+
 	return 0;
 
 err_out_pm_disable:
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index 3c4ad71..fbdf4d0 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -3534,19 +3534,6 @@
 	return 0;
 }
 
-static int mgsl_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, mgsl_proc_show, NULL);
-}
-
-static const struct file_operations mgsl_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= mgsl_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /* mgsl_allocate_dma_buffers()
  * 
  * 	Allocate and format DMA buffers (ISA adapter)
@@ -4298,7 +4285,7 @@
 	.tiocmget = tiocmget,
 	.tiocmset = tiocmset,
 	.get_icount = msgl_get_icount,
-	.proc_fops = &mgsl_proc_fops,
+	.proc_show = mgsl_proc_show,
 };
 
 /*
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 255c496..a940865 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -1316,19 +1316,6 @@
 	return 0;
 }
 
-static int synclink_gt_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, synclink_gt_proc_show, NULL);
-}
-
-static const struct file_operations synclink_gt_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= synclink_gt_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /*
  * return count of bytes in transmit buffer
  */
@@ -3721,7 +3708,7 @@
 	.tiocmget = tiocmget,
 	.tiocmset = tiocmset,
 	.get_icount = get_icount,
-	.proc_fops = &synclink_gt_proc_fops,
+	.proc_show = synclink_gt_proc_show,
 };
 
 static void slgt_cleanup(void)
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index 75f11ce..1e4d5b9 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -1421,19 +1421,6 @@
 	return 0;
 }
 
-static int synclinkmp_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, synclinkmp_proc_show, NULL);
-}
-
-static const struct file_operations synclinkmp_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= synclinkmp_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /* Return the count of bytes in transmit buffer
  */
 static int chars_in_buffer(struct tty_struct *tty)
@@ -3899,7 +3886,7 @@
 	.tiocmget = tiocmget,
 	.tiocmset = tiocmset,
 	.get_icount = get_icount,
-	.proc_fops = &synclinkmp_proc_fops,
+	.proc_show = synclinkmp_proc_show,
 };
 
 
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index d9b561d..d99fec4 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -290,7 +290,7 @@
  *	between the two termios structures, or a speed change is needed.
  */
 
-int tty_termios_hw_change(struct ktermios *a, struct ktermios *b)
+int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b)
 {
 	if (a->c_ispeed != b->c_ispeed || a->c_ospeed != b->c_ospeed)
 		return 1;
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index fb7329a..fc4c97c 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -229,26 +229,13 @@
 	return 0;
 }
 
-static const struct seq_operations tty_ldiscs_seq_ops = {
+const struct seq_operations tty_ldiscs_seq_ops = {
 	.start	= tty_ldiscs_seq_start,
 	.next	= tty_ldiscs_seq_next,
 	.stop	= tty_ldiscs_seq_stop,
 	.show	= tty_ldiscs_seq_show,
 };
 
-static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &tty_ldiscs_seq_ops);
-}
-
-const struct file_operations tty_ldiscs_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= proc_tty_ldiscs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 /**
  *	tty_ldisc_ref_wait	-	wait for the tty ldisc
  *	@tty: tty device
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index f97251f..1eb1a37 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1178,15 +1178,8 @@
 			count = ((vc->vc_pos - vc->vc_origin) >> 1) + 1;
 			start = (unsigned short *)vc->vc_origin;
 			break;
-		case 3: /* erase scroll-back buffer (and whole display) */
-			scr_memsetw(vc->vc_screenbuf, vc->vc_video_erase_char,
-				    vc->vc_screenbuf_size);
-			flush_scrollback(vc);
-			set_origin(vc);
-			if (con_is_visible(vc))
-				update_screen(vc);
-			/* fall through */
 		case 2: /* erase whole display */
+		case 3: /* (and scrollback buffer later) */
 			count = vc->vc_cols * vc->vc_rows;
 			start = (unsigned short *)vc->vc_origin;
 			break;
@@ -1194,7 +1187,12 @@
 			return;
 	}
 	scr_memsetw(start, vc->vc_video_erase_char, 2 * count);
-	if (con_should_update(vc))
+	if (vpar == 3) {
+		set_origin(vc);
+		flush_scrollback(vc);
+		if (con_is_visible(vc))
+			update_screen(vc);
+	} else if (con_should_update(vc))
 		do_update_region(vc, (unsigned long) start, count);
 	vc->vc_need_wrap = 0;
 }
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index fd48483..e8f4ac9 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -270,7 +270,7 @@
 		if (!map_found) {
 			map_found = 1;
 			idev->map_dir = kobject_create_and_add("maps",
-							&idev->dev->kobj);
+							&idev->dev.kobj);
 			if (!idev->map_dir) {
 				ret = -ENOMEM;
 				goto err_map;
@@ -299,7 +299,7 @@
 		if (!portio_found) {
 			portio_found = 1;
 			idev->portio_dir = kobject_create_and_add("portio",
-							&idev->dev->kobj);
+							&idev->dev.kobj);
 			if (!idev->portio_dir) {
 				ret = -ENOMEM;
 				goto err_portio;
@@ -342,7 +342,7 @@
 		kobject_put(&map->kobj);
 	}
 	kobject_put(idev->map_dir);
-	dev_err(idev->dev, "error creating sysfs files (%d)\n", ret);
+	dev_err(&idev->dev, "error creating sysfs files (%d)\n", ret);
 	return ret;
 }
 
@@ -379,7 +379,7 @@
 		idev->minor = retval;
 		retval = 0;
 	} else if (retval == -ENOSPC) {
-		dev_err(idev->dev, "too many uio devices\n");
+		dev_err(&idev->dev, "too many uio devices\n");
 		retval = -EINVAL;
 	}
 	mutex_unlock(&minor_lock);
@@ -433,6 +433,7 @@
 	struct uio_device *idev;
 	struct uio_listener *listener;
 	int ret = 0;
+	unsigned long flags;
 
 	mutex_lock(&minor_lock);
 	idev = idr_find(&uio_idr, iminor(inode));
@@ -442,9 +443,11 @@
 		goto out;
 	}
 
+	get_device(&idev->dev);
+
 	if (!try_module_get(idev->owner)) {
 		ret = -ENODEV;
-		goto out;
+		goto err_module_get;
 	}
 
 	listener = kmalloc(sizeof(*listener), GFP_KERNEL);
@@ -457,11 +460,13 @@
 	listener->event_count = atomic_read(&idev->event);
 	filep->private_data = listener;
 
-	if (idev->info->open) {
+	spin_lock_irqsave(&idev->info_lock, flags);
+	if (idev->info && idev->info->open)
 		ret = idev->info->open(idev->info, inode);
-		if (ret)
-			goto err_infoopen;
-	}
+	spin_unlock_irqrestore(&idev->info_lock, flags);
+	if (ret)
+		goto err_infoopen;
+
 	return 0;
 
 err_infoopen:
@@ -470,6 +475,9 @@
 err_alloc_listener:
 	module_put(idev->owner);
 
+err_module_get:
+	put_device(&idev->dev);
+
 out:
 	return ret;
 }
@@ -487,12 +495,16 @@
 	int ret = 0;
 	struct uio_listener *listener = filep->private_data;
 	struct uio_device *idev = listener->dev;
+	unsigned long flags;
 
-	if (idev->info->release)
+	spin_lock_irqsave(&idev->info_lock, flags);
+	if (idev->info && idev->info->release)
 		ret = idev->info->release(idev->info, inode);
+	spin_unlock_irqrestore(&idev->info_lock, flags);
 
 	module_put(idev->owner);
 	kfree(listener);
+	put_device(&idev->dev);
 	return ret;
 }
 
@@ -500,9 +512,16 @@
 {
 	struct uio_listener *listener = filep->private_data;
 	struct uio_device *idev = listener->dev;
+	__poll_t ret = 0;
+	unsigned long flags;
 
-	if (!idev->info->irq)
-		return -EIO;
+	spin_lock_irqsave(&idev->info_lock, flags);
+	if (!idev->info || !idev->info->irq)
+		ret = -EIO;
+	spin_unlock_irqrestore(&idev->info_lock, flags);
+
+	if (ret)
+		return ret;
 
 	poll_wait(filep, &idev->wait, wait);
 	if (listener->event_count != atomic_read(&idev->event))
@@ -516,11 +535,17 @@
 	struct uio_listener *listener = filep->private_data;
 	struct uio_device *idev = listener->dev;
 	DECLARE_WAITQUEUE(wait, current);
-	ssize_t retval;
+	ssize_t retval = 0;
 	s32 event_count;
+	unsigned long flags;
 
-	if (!idev->info->irq)
-		return -EIO;
+	spin_lock_irqsave(&idev->info_lock, flags);
+	if (!idev->info || !idev->info->irq)
+		retval = -EIO;
+	spin_unlock_irqrestore(&idev->info_lock, flags);
+
+	if (retval)
+		return retval;
 
 	if (count != sizeof(s32))
 		return -EINVAL;
@@ -567,21 +592,33 @@
 	struct uio_device *idev = listener->dev;
 	ssize_t retval;
 	s32 irq_on;
+	unsigned long flags;
 
-	if (!idev->info->irq)
-		return -EIO;
+	spin_lock_irqsave(&idev->info_lock, flags);
+	if (!idev->info || !idev->info->irq) {
+		retval = -EIO;
+		goto out;
+	}
 
-	if (count != sizeof(s32))
-		return -EINVAL;
+	if (count != sizeof(s32)) {
+		retval = -EINVAL;
+		goto out;
+	}
 
-	if (!idev->info->irqcontrol)
-		return -ENOSYS;
+	if (!idev->info->irqcontrol) {
+		retval = -ENOSYS;
+		goto out;
+	}
 
-	if (copy_from_user(&irq_on, buf, count))
-		return -EFAULT;
+	if (copy_from_user(&irq_on, buf, count)) {
+		retval = -EFAULT;
+		goto out;
+	}
 
 	retval = idev->info->irqcontrol(idev->info, irq_on);
 
+out:
+	spin_unlock_irqrestore(&idev->info_lock, flags);
 	return retval ? retval : sizeof(s32);
 }
 
@@ -597,7 +634,7 @@
 	return -1;
 }
 
-static int uio_vma_fault(struct vm_fault *vmf)
+static vm_fault_t uio_vma_fault(struct vm_fault *vmf)
 {
 	struct uio_device *idev = vmf->vma->vm_private_data;
 	struct page *page;
@@ -794,6 +831,13 @@
 	uio_major_cleanup();
 }
 
+static void uio_device_release(struct device *dev)
+{
+	struct uio_device *idev = dev_get_drvdata(dev);
+
+	kfree(idev);
+}
+
 /**
  * uio_register_device - register a new userspace IO device
  * @owner:	module that creates the new device
@@ -814,13 +858,14 @@
 
 	info->uio_dev = NULL;
 
-	idev = devm_kzalloc(parent, sizeof(*idev), GFP_KERNEL);
+	idev = kzalloc(sizeof(*idev), GFP_KERNEL);
 	if (!idev) {
 		return -ENOMEM;
 	}
 
 	idev->owner = owner;
 	idev->info = info;
+	spin_lock_init(&idev->info_lock);
 	init_waitqueue_head(&idev->wait);
 	atomic_set(&idev->event, 0);
 
@@ -828,14 +873,19 @@
 	if (ret)
 		return ret;
 
-	idev->dev = device_create(&uio_class, parent,
-				  MKDEV(uio_major, idev->minor), idev,
-				  "uio%d", idev->minor);
-	if (IS_ERR(idev->dev)) {
-		printk(KERN_ERR "UIO: device register failed\n");
-		ret = PTR_ERR(idev->dev);
+	idev->dev.devt = MKDEV(uio_major, idev->minor);
+	idev->dev.class = &uio_class;
+	idev->dev.parent = parent;
+	idev->dev.release = uio_device_release;
+	dev_set_drvdata(&idev->dev, idev);
+
+	ret = dev_set_name(&idev->dev, "uio%d", idev->minor);
+	if (ret)
 		goto err_device_create;
-	}
+
+	ret = device_register(&idev->dev);
+	if (ret)
+		goto err_device_create;
 
 	ret = uio_dev_add_attributes(idev);
 	if (ret)
@@ -863,7 +913,7 @@
 err_request_irq:
 	uio_dev_del_attributes(idev);
 err_uio_dev_add_attributes:
-	device_destroy(&uio_class, MKDEV(uio_major, idev->minor));
+	device_unregister(&idev->dev);
 err_device_create:
 	uio_free_minor(idev);
 	return ret;
@@ -878,6 +928,7 @@
 void uio_unregister_device(struct uio_info *info)
 {
 	struct uio_device *idev;
+	unsigned long flags;
 
 	if (!info || !info->uio_dev)
 		return;
@@ -891,7 +942,11 @@
 	if (info->irq && info->irq != UIO_IRQ_CUSTOM)
 		free_irq(info->irq, idev);
 
-	device_destroy(&uio_class, MKDEV(uio_major, idev->minor));
+	spin_lock_irqsave(&idev->info_lock, flags);
+	idev->info = NULL;
+	spin_unlock_irqrestore(&idev->info_lock, flags);
+
+	device_unregister(&idev->dev);
 
 	return;
 }
diff --git a/drivers/uio/uio_fsl_elbc_gpcm.c b/drivers/uio/uio_fsl_elbc_gpcm.c
index b46323d..b551913 100644
--- a/drivers/uio/uio_fsl_elbc_gpcm.c
+++ b/drivers/uio/uio_fsl_elbc_gpcm.c
@@ -475,7 +475,6 @@
 static struct platform_driver uio_fsl_elbc_gpcm_driver = {
 	.driver = {
 		.name = "fsl,elbc-gpcm-uio",
-		.owner = THIS_MODULE,
 		.of_match_table = uio_fsl_elbc_gpcm_match,
 	},
 	.probe = uio_fsl_elbc_gpcm_probe,
diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h
index 98b7cb3..0bf244d 100644
--- a/drivers/usb/chipidea/ci.h
+++ b/drivers/usb/chipidea/ci.h
@@ -450,7 +450,7 @@
 
 void ci_platform_configure(struct ci_hdrc *ci);
 
-int dbg_create_files(struct ci_hdrc *ci);
+void dbg_create_files(struct ci_hdrc *ci);
 
 void dbg_remove_files(struct ci_hdrc *ci);
 #endif	/* __DRIVERS_USB_CHIPIDEA_CI_H */
diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index e431c5a..19f5f5f 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -291,7 +291,8 @@
 
 	pdata.usb_phy = data->phy;
 
-	if (of_device_is_compatible(np, "fsl,imx53-usb") && pdata.usb_phy &&
+	if ((of_device_is_compatible(np, "fsl,imx53-usb") ||
+	     of_device_is_compatible(np, "fsl,imx51-usb")) && pdata.usb_phy &&
 	    of_usb_get_phy_mode(np) == USBPHY_INTERFACE_MODE_ULPI) {
 		pdata.flags |= CI_HDRC_OVERRIDE_PHY_CONTROL;
 		data->override_phy_control = true;
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 33ae87f..85fc6db 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -1062,9 +1062,7 @@
 		ci_hdrc_otg_fsm_start(ci);
 
 	device_set_wakeup_capable(&pdev->dev, true);
-	ret = dbg_create_files(ci);
-	if (ret)
-		goto stop;
+	dbg_create_files(ci);
 
 	ret = sysfs_create_group(&dev->kobj, &ci_attr_group);
 	if (ret)
diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index ce648cb..fcc91a3 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c
@@ -340,54 +340,28 @@
  *
  * This function returns an error code
  */
-int dbg_create_files(struct ci_hdrc *ci)
+void dbg_create_files(struct ci_hdrc *ci)
 {
-	struct dentry *dent;
-
 	ci->debugfs = debugfs_create_dir(dev_name(ci->dev), NULL);
-	if (!ci->debugfs)
-		return -ENOMEM;
 
-	dent = debugfs_create_file("device", S_IRUGO, ci->debugfs, ci,
-				   &ci_device_fops);
-	if (!dent)
-		goto err;
-
-	dent = debugfs_create_file("port_test", S_IRUGO | S_IWUSR, ci->debugfs,
-				   ci, &ci_port_test_fops);
-	if (!dent)
-		goto err;
-
-	dent = debugfs_create_file("qheads", S_IRUGO, ci->debugfs, ci,
-				   &ci_qheads_fops);
-	if (!dent)
-		goto err;
-
-	dent = debugfs_create_file("requests", S_IRUGO, ci->debugfs, ci,
-				   &ci_requests_fops);
-	if (!dent)
-		goto err;
+	debugfs_create_file("device", S_IRUGO, ci->debugfs, ci,
+			    &ci_device_fops);
+	debugfs_create_file("port_test", S_IRUGO | S_IWUSR, ci->debugfs, ci,
+			    &ci_port_test_fops);
+	debugfs_create_file("qheads", S_IRUGO, ci->debugfs, ci,
+			    &ci_qheads_fops);
+	debugfs_create_file("requests", S_IRUGO, ci->debugfs, ci,
+			    &ci_requests_fops);
 
 	if (ci_otg_is_fsm_mode(ci)) {
-		dent = debugfs_create_file("otg", S_IRUGO, ci->debugfs, ci,
-					&ci_otg_fops);
-		if (!dent)
-			goto err;
+		debugfs_create_file("otg", S_IRUGO, ci->debugfs, ci,
+				    &ci_otg_fops);
 	}
 
-	dent = debugfs_create_file("role", S_IRUGO | S_IWUSR, ci->debugfs, ci,
-				   &ci_role_fops);
-	if (!dent)
-		goto err;
-
-	dent = debugfs_create_file("registers", S_IRUGO, ci->debugfs, ci,
-				&ci_registers_fops);
-
-	if (dent)
-		return 0;
-err:
-	debugfs_remove_recursive(ci->debugfs);
-	return -ENOMEM;
+	debugfs_create_file("role", S_IRUGO | S_IWUSR, ci->debugfs, ci,
+			    &ci_role_fops);
+	debugfs_create_file("registers", S_IRUGO, ci->debugfs, ci,
+			    &ci_registers_fops);
 }
 
 /**
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index bdb1de0..529295a 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -21,7 +21,6 @@
 #include <linux/usb/tmc.h>
 
 
-#define RIGOL			1
 #define USBTMC_HEADER_SIZE	12
 #define USBTMC_MINOR_BASE	176
 
@@ -93,8 +92,6 @@
 	/* coalesced usb488_caps from usbtmc_dev_capabilities */
 	__u8 usb488_caps;
 
-	u8 rigol_quirk;
-
 	/* attributes from the USB TMC spec for this device */
 	u8 TermChar;
 	bool TermCharEnabled;
@@ -110,17 +107,6 @@
 };
 #define to_usbtmc_data(d) container_of(d, struct usbtmc_device_data, kref)
 
-struct usbtmc_ID_rigol_quirk {
-	__u16 idVendor;
-	__u16 idProduct;
-};
-
-static const struct usbtmc_ID_rigol_quirk usbtmc_id_quirk[] = {
-	{ 0x1ab1, 0x0588 },
-	{ 0x1ab1, 0x04b0 },
-	{ 0, 0 }
-};
-
 /* Forward declarations */
 static struct usb_driver usbtmc_driver;
 
@@ -603,16 +589,14 @@
 		goto exit;
 	}
 
-	if (data->rigol_quirk) {
-		dev_dbg(dev, "usb_bulk_msg_in: count(%zu)\n", count);
+	dev_dbg(dev, "usb_bulk_msg_in: count(%zu)\n", count);
 
-		retval = send_request_dev_dep_msg_in(data, count);
+	retval = send_request_dev_dep_msg_in(data, count);
 
-		if (retval < 0) {
-			if (data->auto_abort)
-				usbtmc_ioctl_abort_bulk_out(data);
-			goto exit;
-		}
+	if (retval < 0) {
+		if (data->auto_abort)
+			usbtmc_ioctl_abort_bulk_out(data);
+		goto exit;
 	}
 
 	/* Loop until we have fetched everything we requested */
@@ -621,23 +605,6 @@
 	done = 0;
 
 	while (remaining > 0) {
-		if (!data->rigol_quirk) {
-			dev_dbg(dev, "usb_bulk_msg_in: remaining(%zu), count(%zu)\n", remaining, count);
-
-			if (remaining > USBTMC_SIZE_IOBUFFER - USBTMC_HEADER_SIZE - 3)
-				this_part = USBTMC_SIZE_IOBUFFER - USBTMC_HEADER_SIZE - 3;
-			else
-				this_part = remaining;
-
-			retval = send_request_dev_dep_msg_in(data, this_part);
-			if (retval < 0) {
-			dev_err(dev, "usb_bulk_msg returned %d\n", retval);
-				if (data->auto_abort)
-					usbtmc_ioctl_abort_bulk_out(data);
-				goto exit;
-			}
-		}
-
 		/* Send bulk URB */
 		retval = usb_bulk_msg(data->usb_dev,
 				      usb_rcvbulkpipe(data->usb_dev,
@@ -658,7 +625,7 @@
 		}
 
 		/* Parse header in first packet */
-		if ((done == 0) || !data->rigol_quirk) {
+		if (done == 0) {
 			/* Sanity checks for the header */
 			if (actual < USBTMC_HEADER_SIZE) {
 				dev_err(dev, "Device sent too small first packet: %u < %u\n", actual, USBTMC_HEADER_SIZE);
@@ -698,20 +665,11 @@
 			actual -= USBTMC_HEADER_SIZE;
 
 			/* Check if the message is smaller than requested */
-			if (data->rigol_quirk) {
-				if (remaining > n_characters)
-					remaining = n_characters;
-				/* Remove padding if it exists */
-				if (actual > remaining)
-					actual = remaining;
-			}
-			else {
-				if (this_part > n_characters)
-					this_part = n_characters;
-				/* Remove padding if it exists */
-				if (actual > this_part)
-					actual = this_part;
-			}
+			if (remaining > n_characters)
+				remaining = n_characters;
+			/* Remove padding if it exists */
+			if (actual > remaining)
+				actual = remaining;
 
 			dev_dbg(dev, "Bulk-IN header: N_characters(%u), bTransAttr(%u)\n", n_characters, buffer[8]);
 
@@ -1365,7 +1323,6 @@
 	struct usbtmc_device_data *data;
 	struct usb_host_interface *iface_desc;
 	struct usb_endpoint_descriptor *bulk_in, *bulk_out, *int_in;
-	int n;
 	int retcode;
 
 	dev_dbg(&intf->dev, "%s called\n", __func__);
@@ -1385,20 +1342,6 @@
 	atomic_set(&data->srq_asserted, 0);
 	data->zombie = 0;
 
-	/* Determine if it is a Rigol or not */
-	data->rigol_quirk = 0;
-	dev_dbg(&intf->dev, "Trying to find if device Vendor 0x%04X Product 0x%04X has the RIGOL quirk\n",
-		le16_to_cpu(data->usb_dev->descriptor.idVendor),
-		le16_to_cpu(data->usb_dev->descriptor.idProduct));
-	for(n = 0; usbtmc_id_quirk[n].idVendor > 0; n++) {
-		if ((usbtmc_id_quirk[n].idVendor == le16_to_cpu(data->usb_dev->descriptor.idVendor)) &&
-		    (usbtmc_id_quirk[n].idProduct == le16_to_cpu(data->usb_dev->descriptor.idProduct))) {
-			dev_dbg(&intf->dev, "Setting this device as having the RIGOL quirk\n");
-			data->rigol_quirk = 1;
-			break;
-		}
-	}
-
 	/* Initialize USBTMC bTag and other fields */
 	data->bTag	= 1;
 	data->TermCharEnabled = 0;
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 9792ced..e76e95f 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1922,4 +1922,5 @@
 	.name =		"usb",
 	.match =	usb_device_match,
 	.uevent =	usb_uevent,
+	.need_parent_lock =	true,
 };
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 0a42c5df..1c21955 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -33,7 +33,6 @@
 #include <linux/phy/phy.h>
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
-#include <linux/usb/phy.h>
 #include <linux/usb/otg.h>
 
 #include "usb.h"
@@ -568,6 +567,7 @@
 		switch (wValue & 0xff00) {
 		case USB_DT_DEVICE << 8:
 			switch (hcd->speed) {
+			case HCD_USB32:
 			case HCD_USB31:
 				bufp = usb31_rh_dev_descriptor;
 				break;
@@ -592,6 +592,7 @@
 			break;
 		case USB_DT_CONFIG << 8:
 			switch (hcd->speed) {
+			case HCD_USB32:
 			case HCD_USB31:
 			case HCD_USB3:
 				bufp = ss_rh_config_descriptor;
@@ -2742,34 +2743,14 @@
 	int retval;
 	struct usb_device *rhdev;
 
-	if (IS_ENABLED(CONFIG_USB_PHY) && !hcd->skip_phy_initialization) {
-		struct usb_phy *phy = usb_get_phy_dev(hcd->self.sysdev, 0);
-
-		if (IS_ERR(phy)) {
-			retval = PTR_ERR(phy);
-			if (retval == -EPROBE_DEFER)
-				return retval;
-		} else {
-			retval = usb_phy_init(phy);
-			if (retval) {
-				usb_put_phy(phy);
-				return retval;
-			}
-			hcd->usb_phy = phy;
-			hcd->remove_phy = 1;
-		}
-	}
-
 	if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) {
 		hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev);
-		if (IS_ERR(hcd->phy_roothub)) {
-			retval = PTR_ERR(hcd->phy_roothub);
-			goto err_phy_roothub_alloc;
-		}
+		if (IS_ERR(hcd->phy_roothub))
+			return PTR_ERR(hcd->phy_roothub);
 
 		retval = usb_phy_roothub_init(hcd->phy_roothub);
 		if (retval)
-			goto err_phy_roothub_alloc;
+			return retval;
 
 		retval = usb_phy_roothub_power_on(hcd->phy_roothub);
 		if (retval)
@@ -2819,6 +2800,9 @@
 	hcd->self.root_hub = rhdev;
 	mutex_unlock(&usb_port_peer_mutex);
 
+	rhdev->rx_lanes = 1;
+	rhdev->tx_lanes = 1;
+
 	switch (hcd->speed) {
 	case HCD_USB11:
 		rhdev->speed = USB_SPEED_FULL;
@@ -2832,6 +2816,10 @@
 	case HCD_USB3:
 		rhdev->speed = USB_SPEED_SUPER;
 		break;
+	case HCD_USB32:
+		rhdev->rx_lanes = 2;
+		rhdev->tx_lanes = 2;
+		/* fall through */
 	case HCD_USB31:
 		rhdev->speed = USB_SPEED_SUPER_PLUS;
 		break;
@@ -2943,12 +2931,7 @@
 	usb_phy_roothub_power_off(hcd->phy_roothub);
 err_usb_phy_roothub_power_on:
 	usb_phy_roothub_exit(hcd->phy_roothub);
-err_phy_roothub_alloc:
-	if (hcd->remove_phy && hcd->usb_phy) {
-		usb_phy_shutdown(hcd->usb_phy);
-		usb_put_phy(hcd->usb_phy);
-		hcd->usb_phy = NULL;
-	}
+
 	return retval;
 }
 EXPORT_SYMBOL_GPL(usb_add_hcd);
@@ -3024,12 +3007,6 @@
 	usb_phy_roothub_power_off(hcd->phy_roothub);
 	usb_phy_roothub_exit(hcd->phy_roothub);
 
-	if (hcd->remove_phy && hcd->usb_phy) {
-		usb_phy_shutdown(hcd->usb_phy);
-		usb_put_phy(hcd->usb_phy);
-		hcd->usb_phy = NULL;
-	}
-
 	usb_put_invalidate_rhdev(hcd);
 	hcd->flags = 0;
 }
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index aa9968d..26c2438 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2636,7 +2636,7 @@
 #define SET_ADDRESS_TRIES	2
 #define GET_DESCRIPTOR_TRIES	2
 #define SET_CONFIG_TRIES	(2 * (use_both_schemes + 1))
-#define USE_NEW_SCHEME(i)	((i) / 2 == (int)old_scheme_first)
+#define USE_NEW_SCHEME(i, scheme)	((i) / 2 == (int)scheme)
 
 #define HUB_ROOT_RESET_TIME	60	/* times are in msec */
 #define HUB_SHORT_RESET_TIME	10
@@ -2651,12 +2651,16 @@
  * enumeration failures, so disable this enumeration scheme for USB3
  * devices.
  */
-static bool use_new_scheme(struct usb_device *udev, int retry)
+static bool use_new_scheme(struct usb_device *udev, int retry,
+			   struct usb_port *port_dev)
 {
+	int old_scheme_first_port =
+		port_dev->quirks & USB_PORT_QUIRK_OLD_SCHEME;
+
 	if (udev->speed >= USB_SPEED_SUPER)
 		return false;
 
-	return USE_NEW_SCHEME(retry);
+	return USE_NEW_SCHEME(retry, old_scheme_first_port || old_scheme_first);
 }
 
 /* Is a USB 3.0 port in the Inactive or Compliance Mode state?
@@ -2751,6 +2755,14 @@
 	if (!udev)
 		return 0;
 
+	if (hub_is_superspeedplus(hub->hdev)) {
+		/* extended portstatus Rx and Tx lane count are zero based */
+		udev->rx_lanes = USB_EXT_PORT_RX_LANES(ext_portstatus) + 1;
+		udev->tx_lanes = USB_EXT_PORT_TX_LANES(ext_portstatus) + 1;
+	} else {
+		udev->rx_lanes = 1;
+		udev->tx_lanes = 1;
+	}
 	if (hub_is_wusb(hub))
 		udev->speed = USB_SPEED_WIRELESS;
 	else if (hub_is_superspeedplus(hub->hdev) &&
@@ -2867,7 +2879,11 @@
 done:
 	if (status == 0) {
 		/* TRSTRCY = 10 ms; plus some extra */
-		msleep(10 + 40);
+		if (port_dev->quirks & USB_PORT_QUIRK_FAST_ENUM)
+			usleep_range(10000, 12000);
+		else
+			msleep(10 + 40);
+
 		if (udev) {
 			struct usb_hcd *hcd = bus_to_hcd(udev->bus);
 
@@ -3376,6 +3392,10 @@
 	while (delay_ms < 2000) {
 		if (status || *portstatus & USB_PORT_STAT_CONNECTION)
 			break;
+		if (!port_is_power_on(hub, *portstatus)) {
+			status = -ENODEV;
+			break;
+		}
 		msleep(20);
 		delay_ms += 20;
 		status = hub_port_status(hub, *port1, portstatus, portchange);
@@ -4380,6 +4400,7 @@
 {
 	struct usb_device	*hdev = hub->hdev;
 	struct usb_hcd		*hcd = bus_to_hcd(hdev->bus);
+	struct usb_port		*port_dev = hub->ports[port1 - 1];
 	int			retries, operations, retval, i;
 	unsigned		delay = HUB_SHORT_RESET_TIME;
 	enum usb_device_speed	oldspeed = udev->speed;
@@ -4501,7 +4522,7 @@
 	for (retries = 0; retries < GET_DESCRIPTOR_TRIES; (++retries, msleep(100))) {
 		bool did_new_scheme = false;
 
-		if (use_new_scheme(udev, retry_counter)) {
+		if (use_new_scheme(udev, retry_counter, port_dev)) {
 			struct usb_device_descriptor *buf;
 			int r = 0;
 
@@ -4551,7 +4572,9 @@
 				 * reset. But only on the first attempt,
 				 * lest we get into a time out/reset loop
 				 */
-				if (r == 0  || (r == -ETIMEDOUT && retries == 0))
+				if (r == 0 || (r == -ETIMEDOUT &&
+						retries == 0 &&
+						udev->speed > USB_SPEED_FULL))
 					break;
 			}
 			udev->descriptor.bMaxPacketSize0 =
@@ -4598,9 +4621,12 @@
 			if (udev->speed >= USB_SPEED_SUPER) {
 				devnum = udev->devnum;
 				dev_info(&udev->dev,
-						"%s SuperSpeed%s USB device number %d using %s\n",
+						"%s SuperSpeed%s%s USB device number %d using %s\n",
 						(udev->config) ? "reset" : "new",
-					 (udev->speed == USB_SPEED_SUPER_PLUS) ? "Plus" : "",
+					 (udev->speed == USB_SPEED_SUPER_PLUS) ?
+							"Plus Gen 2" : " Gen 1",
+					 (udev->rx_lanes == 2 && udev->tx_lanes == 2) ?
+							"x2" : "",
 					 devnum, driver_name);
 			}
 
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index 4dc769e..4accfb6 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -98,6 +98,7 @@
 	struct mutex status_lock;
 	u32 over_current_count;
 	u8 portnum;
+	u32 quirks;
 	unsigned int is_superspeed:1;
 	unsigned int usb3_lpm_u1_permit:1;
 	unsigned int usb3_lpm_u2_permit:1;
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 0c11d40..7b13700 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -940,7 +940,7 @@
 	return usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 			USB_REQ_SET_ISOCH_DELAY,
 			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			cpu_to_le16(dev->hub_delay), 0, NULL, 0,
+			dev->hub_delay, 0, NULL, 0,
 			USB_CTRL_SET_TIMEOUT);
 }
 
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index 6979bde..4a21431 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -50,6 +50,28 @@
 }
 static DEVICE_ATTR_RO(over_current_count);
 
+static ssize_t quirks_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
+{
+	struct usb_port *port_dev = to_usb_port(dev);
+
+	return sprintf(buf, "%08x\n", port_dev->quirks);
+}
+
+static ssize_t quirks_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct usb_port *port_dev = to_usb_port(dev);
+	u32 value;
+
+	if (kstrtou32(buf, 16, &value))
+		return -EINVAL;
+
+	port_dev->quirks = value;
+	return count;
+}
+static DEVICE_ATTR_RW(quirks);
+
 static ssize_t usb3_lpm_permit_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
@@ -118,6 +140,7 @@
 
 static struct attribute *port_dev_attrs[] = {
 	&dev_attr_connect_type.attr,
+	&dev_attr_quirks.attr,
 	&dev_attr_over_current_count.attr,
 	NULL,
 };
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 27bb340..ea18284 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -175,6 +175,26 @@
 }
 static DEVICE_ATTR_RO(speed);
 
+static ssize_t rx_lanes_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct usb_device *udev;
+
+	udev = to_usb_device(dev);
+	return sprintf(buf, "%d\n", udev->rx_lanes);
+}
+static DEVICE_ATTR_RO(rx_lanes);
+
+static ssize_t tx_lanes_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct usb_device *udev;
+
+	udev = to_usb_device(dev);
+	return sprintf(buf, "%d\n", udev->tx_lanes);
+}
+static DEVICE_ATTR_RO(tx_lanes);
+
 static ssize_t busnum_show(struct device *dev, struct device_attribute *attr,
 			   char *buf)
 {
@@ -790,6 +810,8 @@
 	&dev_attr_bNumConfigurations.attr,
 	&dev_attr_bMaxPacketSize0.attr,
 	&dev_attr_speed.attr,
+	&dev_attr_rx_lanes.attr,
+	&dev_attr_tx_lanes.attr,
 	&dev_attr_busnum.attr,
 	&dev_attr_devnum.attr,
 	&dev_attr_devpath.attr,
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 0adb634..623be31 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -1167,30 +1167,16 @@
 struct dentry *usb_debug_root;
 EXPORT_SYMBOL_GPL(usb_debug_root);
 
-static struct dentry *usb_debug_devices;
-
-static int usb_debugfs_init(void)
+static void usb_debugfs_init(void)
 {
 	usb_debug_root = debugfs_create_dir("usb", NULL);
-	if (!usb_debug_root)
-		return -ENOENT;
-
-	usb_debug_devices = debugfs_create_file("devices", 0444,
-						usb_debug_root, NULL,
-						&usbfs_devices_fops);
-	if (!usb_debug_devices) {
-		debugfs_remove(usb_debug_root);
-		usb_debug_root = NULL;
-		return -ENOENT;
-	}
-
-	return 0;
+	debugfs_create_file("devices", 0444, usb_debug_root, NULL,
+			    &usbfs_devices_fops);
 }
 
 static void usb_debugfs_cleanup(void)
 {
-	debugfs_remove(usb_debug_devices);
-	debugfs_remove(usb_debug_root);
+	debugfs_remove_recursive(usb_debug_root);
 }
 
 /*
@@ -1205,9 +1191,7 @@
 	}
 	usb_init_pool_max();
 
-	retval = usb_debugfs_init();
-	if (retval)
-		goto out;
+	usb_debugfs_init();
 
 	usb_acpi_register();
 	retval = bus_register(&usb_bus_type);
diff --git a/drivers/usb/dwc2/core.c b/drivers/usb/dwc2/core.c
index 18a0a17..1c36a6a 100644
--- a/drivers/usb/dwc2/core.c
+++ b/drivers/usb/dwc2/core.c
@@ -419,6 +419,8 @@
 /**
  * dwc2_iddig_filter_enabled() - Returns true if the IDDIG debounce
  * filter is enabled.
+ *
+ * @hsotg: Programming view of DWC_otg controller
  */
 static bool dwc2_iddig_filter_enabled(struct dwc2_hsotg *hsotg)
 {
@@ -564,6 +566,9 @@
  * If a force is done, it requires a IDDIG debounce filter delay if
  * the filter is configured and enabled. We poll the current mode of
  * the controller to account for this delay.
+ *
+ * @hsotg: Programming view of DWC_otg controller
+ * @host: Host mode flag
  */
 void dwc2_force_mode(struct dwc2_hsotg *hsotg, bool host)
 {
@@ -610,6 +615,8 @@
  * or not because the value of the connector ID status is affected by
  * the force mode. We only need to call this once during probe if
  * dr_mode == OTG.
+ *
+ * @hsotg: Programming view of DWC_otg controller
  */
 static void dwc2_clear_force_mode(struct dwc2_hsotg *hsotg)
 {
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index a666e07..4a56ac7 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -164,12 +164,11 @@
  *       and has yet to be completed (maybe due to data move, or simply
  *       awaiting an ack from the core all the data has been completed).
  * @debugfs: File entry for debugfs file for this endpoint.
- * @lock: State lock to protect contents of endpoint.
  * @dir_in: Set to true if this endpoint is of the IN direction, which
  *          means that it is sending data to the Host.
  * @index: The index for the endpoint registers.
  * @mc: Multi Count - number of transactions per microframe
- * @interval - Interval for periodic endpoints, in frames or microframes.
+ * @interval: Interval for periodic endpoints, in frames or microframes.
  * @name: The name array passed to the USB core.
  * @halted: Set if the endpoint has been halted.
  * @periodic: Set if this is a periodic ep, such as Interrupt
@@ -178,10 +177,11 @@
  * @desc_list_dma: The DMA address of descriptor chain currently in use.
  * @desc_list: Pointer to descriptor DMA chain head currently in use.
  * @desc_count: Count of entries within the DMA descriptor chain of EP.
- * @isoc_chain_num: Number of ISOC chain currently in use - either 0 or 1.
  * @next_desc: index of next free descriptor in the ISOC chain under SW control.
+ * @compl_desc: index of next descriptor to be completed by xFerComplete
  * @total_data: The total number of data bytes done.
  * @fifo_size: The size of the FIFO (for periodic IN endpoints)
+ * @fifo_index: For Dedicated FIFO operation, only FIFO0 can be used for EP0.
  * @fifo_load: The amount of data loaded into the FIFO (periodic IN)
  * @last_load: The offset of data for the last start of request.
  * @size_loaded: The last loaded size for DxEPTSIZE for periodic IN
@@ -231,8 +231,8 @@
 	struct dwc2_dma_desc	*desc_list;
 	u8			desc_count;
 
-	unsigned char		isoc_chain_num;
 	unsigned int		next_desc;
+	unsigned int		compl_desc;
 
 	char                    name[10];
 };
@@ -380,6 +380,12 @@
  *                      is FS.
  *                       0 - No (default)
  *                       1 - Yes
+ * @ipg_isoc_en:        Indicates the IPG supports is enabled or disabled.
+ *                       0 - Disable (default)
+ *                       1 - Enable
+ * @acg_enable:		For enabling Active Clock Gating in the controller
+ *                       0 - No
+ *                       1 - Yes
  * @ulpi_fs_ls:         Make ULPI phy operate in FS/LS mode only
  *                       0 - No (default)
  *                       1 - Yes
@@ -511,6 +517,7 @@
 	bool hird_threshold_en;
 	u8 hird_threshold;
 	bool activate_stm_fs_transceiver;
+	bool ipg_isoc_en;
 	u16 max_packet_count;
 	u32 max_transfer_size;
 	u32 ahbcfg;
@@ -548,7 +555,7 @@
  *
  * The values that are not in dwc2_core_params are documented below.
  *
- * @op_mode             Mode of Operation
+ * @op_mode:             Mode of Operation
  *                       0 - HNP- and SRP-Capable OTG (Host & Device)
  *                       1 - SRP-Capable OTG (Host & Device)
  *                       2 - Non-HNP and Non-SRP Capable OTG (Host & Device)
@@ -556,43 +563,102 @@
  *                       4 - Non-OTG Device
  *                       5 - SRP-Capable Host
  *                       6 - Non-OTG Host
- * @arch                Architecture
+ * @arch:                Architecture
  *                       0 - Slave only
  *                       1 - External DMA
  *                       2 - Internal DMA
- * @power_optimized     Are power optimizations enabled?
- * @num_dev_ep          Number of device endpoints available
- * @num_dev_in_eps      Number of device IN endpoints available
- * @num_dev_perio_in_ep Number of device periodic IN endpoints
- *                      available
- * @dev_token_q_depth   Device Mode IN Token Sequence Learning Queue
+ * @ipg_isoc_en:        This feature indicates that the controller supports
+ *                      the worst-case scenario of Rx followed by Rx
+ *                      Interpacket Gap (IPG) (32 bitTimes) as per the utmi
+ *                      specification for any token following ISOC OUT token.
+ *                       0 - Don't support
+ *                       1 - Support
+ * @power_optimized:    Are power optimizations enabled?
+ * @num_dev_ep:         Number of device endpoints available
+ * @num_dev_in_eps:     Number of device IN endpoints available
+ * @num_dev_perio_in_ep: Number of device periodic IN endpoints
+ *                       available
+ * @dev_token_q_depth:  Device Mode IN Token Sequence Learning Queue
  *                      Depth
  *                       0 to 30
- * @host_perio_tx_q_depth
+ * @host_perio_tx_q_depth:
  *                      Host Mode Periodic Request Queue Depth
  *                       2, 4 or 8
- * @nperio_tx_q_depth
+ * @nperio_tx_q_depth:
  *                      Non-Periodic Request Queue Depth
  *                       2, 4 or 8
- * @hs_phy_type         High-speed PHY interface type
+ * @hs_phy_type:         High-speed PHY interface type
  *                       0 - High-speed interface not supported
  *                       1 - UTMI+
  *                       2 - ULPI
  *                       3 - UTMI+ and ULPI
- * @fs_phy_type         Full-speed PHY interface type
+ * @fs_phy_type:         Full-speed PHY interface type
  *                       0 - Full speed interface not supported
  *                       1 - Dedicated full speed interface
  *                       2 - FS pins shared with UTMI+ pins
  *                       3 - FS pins shared with ULPI pins
  * @total_fifo_size:    Total internal RAM for FIFOs (bytes)
- * @hibernation		Is hibernation enabled?
- * @utmi_phy_data_width UTMI+ PHY data width
+ * @hibernation:	Is hibernation enabled?
+ * @utmi_phy_data_width: UTMI+ PHY data width
  *                       0 - 8 bits
  *                       1 - 16 bits
  *                       2 - 8 or 16 bits
  * @snpsid:             Value from SNPSID register
  * @dev_ep_dirs:        Direction of device endpoints (GHWCFG1)
- * @g_tx_fifo_size[]	Power-on values of TxFIFO sizes
+ * @g_tx_fifo_size:	Power-on values of TxFIFO sizes
+ * @dma_desc_enable:    When DMA mode is enabled, specifies whether to use
+ *                      address DMA mode or descriptor DMA mode for accessing
+ *                      the data FIFOs. The driver will automatically detect the
+ *                      value for this if none is specified.
+ *                       0 - Address DMA
+ *                       1 - Descriptor DMA (default, if available)
+ * @enable_dynamic_fifo: 0 - Use coreConsultant-specified FIFO size parameters
+ *                       1 - Allow dynamic FIFO sizing (default, if available)
+ * @en_multiple_tx_fifo: Specifies whether dedicated per-endpoint transmit FIFOs
+ *                      are enabled for non-periodic IN endpoints in device
+ *                      mode.
+ * @host_nperio_tx_fifo_size: Number of 4-byte words in the non-periodic Tx FIFO
+ *                      in host mode when dynamic FIFO sizing is enabled
+ *                       16 to 32768
+ *                      Actual maximum value is autodetected and also
+ *                      the default.
+ * @host_perio_tx_fifo_size: Number of 4-byte words in the periodic Tx FIFO in
+ *                      host mode when dynamic FIFO sizing is enabled
+ *                       16 to 32768
+ *                      Actual maximum value is autodetected and also
+ *                      the default.
+ * @max_transfer_size:  The maximum transfer size supported, in bytes
+ *                       2047 to 65,535
+ *                      Actual maximum value is autodetected and also
+ *                      the default.
+ * @max_packet_count:   The maximum number of packets in a transfer
+ *                       15 to 511
+ *                      Actual maximum value is autodetected and also
+ *                      the default.
+ * @host_channels:      The number of host channel registers to use
+ *                       1 to 16
+ *                      Actual maximum value is autodetected and also
+ *                      the default.
+ * @dev_nperio_tx_fifo_size: Number of 4-byte words in the non-periodic Tx FIFO
+ *			     in device mode when dynamic FIFO sizing is enabled
+ *			     16 to 32768
+ *			     Actual maximum value is autodetected and also
+ *			     the default.
+ * @i2c_enable:         Specifies whether to use the I2Cinterface for a full
+ *                      speed PHY. This parameter is only applicable if phy_type
+ *                      is FS.
+ *                       0 - No (default)
+ *                       1 - Yes
+ * @acg_enable:		For enabling Active Clock Gating in the controller
+ *                       0 - Disable
+ *                       1 - Enable
+ * @lpm_mode:		For enabling Link Power Management in the controller
+ *                       0 - Disable
+ *                       1 - Enable
+ * @rx_fifo_size:	Number of 4-byte words in the  Rx FIFO when dynamic
+ *			FIFO sizing is enabled 16 to 32768
+ *			Actual maximum value is autodetected and also
+ *			the default.
  */
 struct dwc2_hw_params {
 	unsigned op_mode:3;
@@ -622,6 +688,7 @@
 	unsigned hibernation:1;
 	unsigned utmi_phy_data_width:2;
 	unsigned lpm_mode:1;
+	unsigned ipg_isoc_en:1;
 	u32 snpsid;
 	u32 dev_ep_dirs;
 	u32 g_tx_fifo_size[MAX_EPS_CHANNELS];
@@ -642,7 +709,11 @@
  * @gi2cctl:		Backup of GI2CCTL register
  * @glpmcfg:		Backup of GLPMCFG register
  * @gdfifocfg:		Backup of GDFIFOCFG register
+ * @pcgcctl:		Backup of PCGCCTL register
+ * @pcgcctl1:		Backup of PCGCCTL1 register
+ * @dtxfsiz:		Backup of DTXFSIZ registers for each endpoint
  * @gpwrdn:		Backup of GPWRDN register
+ * @valid:		True if registers values backuped.
  */
 struct dwc2_gregs_backup {
 	u32 gotgctl;
@@ -675,6 +746,7 @@
  * @doeptsiz:		Backup of DOEPTSIZ register
  * @doepdma:		Backup of DOEPDMA register
  * @dtxfsiz:		Backup of DTXFSIZ registers for each endpoint
+ * @valid:      True if registers values backuped.
  */
 struct dwc2_dregs_backup {
 	u32 dcfg;
@@ -698,9 +770,10 @@
  * @hcfg:		Backup of HCFG register
  * @haintmsk:		Backup of HAINTMSK register
  * @hcintmsk:		Backup of HCINTMSK register
- * @hptr0:		Backup of HPTR0 register
+ * @hprt0:		Backup of HPTR0 register
  * @hfir:		Backup of HFIR register
  * @hptxfsiz:		Backup of HPTXFSIZ register
+ * @valid:      True if registers values backuped.
  */
 struct dwc2_hregs_backup {
 	u32 hcfg;
@@ -800,7 +873,7 @@
  * @regs:		Pointer to controller regs
  * @hw_params:          Parameters that were autodetected from the
  *                      hardware registers
- * @core_params:	Parameters that define how the core should be configured
+ * @params:	Parameters that define how the core should be configured
  * @op_state:           The operational State, during transitions (a_host=>
  *                      a_peripheral and b_device=>b_host) this may not match
  *                      the core, but allows the software to determine
@@ -809,10 +882,13 @@
  *                      - USB_DR_MODE_PERIPHERAL
  *                      - USB_DR_MODE_HOST
  *                      - USB_DR_MODE_OTG
- * @hcd_enabled		Host mode sub-driver initialization indicator.
- * @gadget_enabled	Peripheral mode sub-driver initialization indicator.
- * @ll_hw_enabled	Status of low-level hardware resources.
+ * @hcd_enabled:	Host mode sub-driver initialization indicator.
+ * @gadget_enabled:	Peripheral mode sub-driver initialization indicator.
+ * @ll_hw_enabled:	Status of low-level hardware resources.
  * @hibernated:		True if core is hibernated
+ * @frame_number:       Frame number read from the core. For both device
+ *			and host modes. The value ranges are from 0
+ *			to HFNUM_MAX_FRNUM.
  * @phy:                The otg phy transceiver structure for phy control.
  * @uphy:               The otg phy transceiver structure for old USB phy
  *                      control.
@@ -832,13 +908,25 @@
  *                      interrupt
  * @wkp_timer:          Timer object for handling Wakeup Detected interrupt
  * @lx_state:           Lx state of connected device
- * @gregs_backup: Backup of global registers during suspend
- * @dregs_backup: Backup of device registers during suspend
- * @hregs_backup: Backup of host registers during suspend
+ * @gr_backup: Backup of global registers during suspend
+ * @dr_backup: Backup of device registers during suspend
+ * @hr_backup: Backup of host registers during suspend
  *
  * These are for host mode:
  *
  * @flags:              Flags for handling root port state changes
+ * @flags.d32:          Contain all root port flags
+ * @flags.b:            Separate root port flags from each other
+ * @flags.b.port_connect_status_change: True if root port connect status
+ *                      changed
+ * @flags.b.port_connect_status: True if device connected to root port
+ * @flags.b.port_reset_change: True if root port reset status changed
+ * @flags.b.port_enable_change: True if root port enable status changed
+ * @flags.b.port_suspend_change: True if root port suspend status changed
+ * @flags.b.port_over_current_change: True if root port over current state
+ *                       changed.
+ * @flags.b.port_l1_change: True if root port l1 status changed
+ * @flags.b.reserved:   Reserved bits of root port register
  * @non_periodic_sched_inactive: Inactive QHs in the non-periodic schedule.
  *                      Transfers associated with these QHs are not currently
  *                      assigned to a host channel.
@@ -847,6 +935,9 @@
  *                      assigned to a host channel.
  * @non_periodic_qh_ptr: Pointer to next QH to process in the active
  *                      non-periodic schedule
+ * @non_periodic_sched_waiting: Waiting QHs in the non-periodic schedule.
+ *                      Transfers associated with these QHs are not currently
+ *                      assigned to a host channel.
  * @periodic_sched_inactive: Inactive QHs in the periodic schedule. This is a
  *                      list of QHs for periodic transfers that are _not_
  *                      scheduled for the next frame. Each QH in the list has an
@@ -886,8 +977,6 @@
  * @hs_periodic_bitmap: Bitmap used by the microframe scheduler any time the
  *                      host is in high speed mode; low speed schedules are
  *                      stored elsewhere since we need one per TT.
- * @frame_number:       Frame number read from the core at SOF. The value ranges
- *                      from 0 to HFNUM_MAX_FRNUM.
  * @periodic_qh_count:  Count of periodic QHs, if using several eps. Used for
  *                      SOF enable/disable.
  * @free_hc_list:       Free host channels in the controller. This is a list of
@@ -898,8 +987,8 @@
  *                      host channel is available for non-periodic transactions.
  * @non_periodic_channels: Number of host channels assigned to non-periodic
  *                      transfers
- * @available_host_channels Number of host channels available for the microframe
- *                      scheduler to use
+ * @available_host_channels: Number of host channels available for the
+ *			     microframe scheduler to use
  * @hc_ptr_array:       Array of pointers to the host channel descriptors.
  *                      Allows accessing a host channel descriptor given the
  *                      host channel number. This is useful in interrupt
@@ -922,9 +1011,6 @@
  * @dedicated_fifos:    Set if the hardware has dedicated IN-EP fifos.
  * @num_of_eps:         Number of available EPs (excluding EP0)
  * @debug_root:         Root directrory for debugfs.
- * @debug_file:         Main status file for debugfs.
- * @debug_testmode:     Testmode status file for debugfs.
- * @debug_fifo:         FIFO status file for debugfs.
  * @ep0_reply:          Request used for ep0 reply.
  * @ep0_buff:           Buffer for EP0 reply data, if needed.
  * @ctrl_buff:          Buffer for EP0 control requests.
@@ -939,7 +1025,37 @@
  * @ctrl_in_desc:	EP0 IN data phase desc chain pointer
  * @ctrl_out_desc_dma:	EP0 OUT data phase desc chain DMA address
  * @ctrl_out_desc:	EP0 OUT data phase desc chain pointer
- * @eps:                The endpoints being supplied to the gadget framework
+ * @irq:		Interrupt request line number
+ * @clk:		Pointer to otg clock
+ * @reset:		Pointer to dwc2 reset controller
+ * @reset_ecc:          Pointer to dwc2 optional reset controller in Stratix10.
+ * @regset:		A pointer to a struct debugfs_regset32, which contains
+ *			a pointer to an array of register definitions, the
+ *			array size and the base address where the register bank
+ *			is to be found.
+ * @bus_suspended:	True if bus is suspended
+ * @last_frame_num:	Number of last frame. Range from 0 to  32768
+ * @frame_num_array:    Used only  if CONFIG_USB_DWC2_TRACK_MISSED_SOFS is
+ *			defined, for missed SOFs tracking. Array holds that
+ *			frame numbers, which not equal to last_frame_num +1
+ * @last_frame_num_array:   Used only  if CONFIG_USB_DWC2_TRACK_MISSED_SOFS is
+ *			    defined, for missed SOFs tracking.
+ *			    If current_frame_number != last_frame_num+1
+ *			    then last_frame_num added to this array
+ * @frame_num_idx:	Actual size of frame_num_array and last_frame_num_array
+ * @dumped_frame_num_array:	1 - if missed SOFs frame numbers dumbed
+ *				0 - if missed SOFs frame numbers not dumbed
+ * @fifo_mem:			Total internal RAM for FIFOs (bytes)
+ * @fifo_map:		Each bit intend for concrete fifo. If that bit is set,
+ *			then that fifo is used
+ * @gadget:		Represents a usb slave device
+ * @connected:		Used in slave mode. True if device connected with host
+ * @eps_in:		The IN endpoints being supplied to the gadget framework
+ * @eps_out:		The OUT endpoints being supplied to the gadget framework
+ * @new_connection:	Used in host mode. True if there are new connected
+ *			device
+ * @enabled:		Indicates the enabling state of controller
+ *
  */
 struct dwc2_hsotg {
 	struct device *dev;
@@ -954,6 +1070,7 @@
 	unsigned int gadget_enabled:1;
 	unsigned int ll_hw_enabled:1;
 	unsigned int hibernated:1;
+	u16 frame_number;
 
 	struct phy *phy;
 	struct usb_phy *uphy;
@@ -1029,7 +1146,6 @@
 	u16 periodic_usecs;
 	unsigned long hs_periodic_bitmap[
 		DIV_ROUND_UP(DWC2_HS_SCHEDULE_US, BITS_PER_LONG)];
-	u16 frame_number;
 	u16 periodic_qh_count;
 	bool bus_suspended;
 	bool new_connection;
diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c
index 2982a15..cc90b58 100644
--- a/drivers/usb/dwc2/core_intr.c
+++ b/drivers/usb/dwc2/core_intr.c
@@ -778,6 +778,14 @@
 		goto out;
 	}
 
+	/* Reading current frame number value in device or host modes. */
+	if (dwc2_is_device_mode(hsotg))
+		hsotg->frame_number = (dwc2_readl(hsotg->regs + DSTS)
+				       & DSTS_SOFFN_MASK) >> DSTS_SOFFN_SHIFT;
+	else
+		hsotg->frame_number = (dwc2_readl(hsotg->regs + HFNUM)
+				       & HFNUM_FRNUM_MASK) >> HFNUM_FRNUM_SHIFT;
+
 	gintsts = dwc2_read_common_intr(hsotg);
 	if (gintsts & ~GINTSTS_PRTINT)
 		retval = IRQ_HANDLED;
diff --git a/drivers/usb/dwc2/debug.h b/drivers/usb/dwc2/debug.h
index 6f23219..a8c565b 100644
--- a/drivers/usb/dwc2/debug.h
+++ b/drivers/usb/dwc2/debug.h
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/**
+/*
  * debug.h - Designware USB2 DRD controller debug header
  *
  * Copyright (C) 2015 Intel Corporation
diff --git a/drivers/usb/dwc2/debugfs.c b/drivers/usb/dwc2/debugfs.c
index 58c691f..d0bdb79 100644
--- a/drivers/usb/dwc2/debugfs.c
+++ b/drivers/usb/dwc2/debugfs.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/**
+/*
  * debugfs.c - Designware USB2 DRD controller debugfs
  *
  * Copyright (C) 2015 Intel Corporation
@@ -16,12 +16,13 @@
 
 #if IS_ENABLED(CONFIG_USB_DWC2_PERIPHERAL) || \
 	IS_ENABLED(CONFIG_USB_DWC2_DUAL_ROLE)
+
 /**
- * testmode_write - debugfs: change usb test mode
- * @seq: The seq file to write to.
- * @v: Unused parameter.
- *
- * This debugfs entry modify the current usb test mode.
+ * testmode_write() - change usb test mode state.
+ * @file: The  file to write to.
+ * @ubuf: The buffer where user wrote.
+ * @count: The ubuf size.
+ * @ppos: Unused parameter.
  */
 static ssize_t testmode_write(struct file *file, const char __user *ubuf, size_t
 		count, loff_t *ppos)
@@ -55,9 +56,9 @@
 }
 
 /**
- * testmode_show - debugfs: show usb test mode state
- * @seq: The seq file to write to.
- * @v: Unused parameter.
+ * testmode_show() - debugfs: show usb test mode state
+ * @s: The seq file to write to.
+ * @unused: Unused parameter.
  *
  * This debugfs entry shows which usb test mode is currently enabled.
  */
@@ -293,52 +294,30 @@
 static void dwc2_hsotg_create_debug(struct dwc2_hsotg *hsotg)
 {
 	struct dentry *root;
-	struct dentry *file;
 	unsigned int epidx;
 
 	root = hsotg->debug_root;
 
 	/* create general state file */
-
-	file = debugfs_create_file("state", 0444, root, hsotg, &state_fops);
-	if (IS_ERR(file))
-		dev_err(hsotg->dev, "%s: failed to create state\n", __func__);
-
-	file = debugfs_create_file("testmode", 0644, root, hsotg,
-				   &testmode_fops);
-	if (IS_ERR(file))
-		dev_err(hsotg->dev, "%s: failed to create testmode\n",
-			__func__);
-
-	file = debugfs_create_file("fifo", 0444, root, hsotg, &fifo_fops);
-	if (IS_ERR(file))
-		dev_err(hsotg->dev, "%s: failed to create fifo\n", __func__);
+	debugfs_create_file("state", 0444, root, hsotg, &state_fops);
+	debugfs_create_file("testmode", 0644, root, hsotg, &testmode_fops);
+	debugfs_create_file("fifo", 0444, root, hsotg, &fifo_fops);
 
 	/* Create one file for each out endpoint */
 	for (epidx = 0; epidx < hsotg->num_of_eps; epidx++) {
 		struct dwc2_hsotg_ep *ep;
 
 		ep = hsotg->eps_out[epidx];
-		if (ep) {
-			file = debugfs_create_file(ep->name, 0444,
-						   root, ep, &ep_fops);
-			if (IS_ERR(file))
-				dev_err(hsotg->dev, "failed to create %s debug file\n",
-					ep->name);
-		}
+		if (ep)
+			debugfs_create_file(ep->name, 0444, root, ep, &ep_fops);
 	}
 	/* Create one file for each in endpoint. EP0 is handled with out eps */
 	for (epidx = 1; epidx < hsotg->num_of_eps; epidx++) {
 		struct dwc2_hsotg_ep *ep;
 
 		ep = hsotg->eps_in[epidx];
-		if (ep) {
-			file = debugfs_create_file(ep->name, 0444,
-						   root, ep, &ep_fops);
-			if (IS_ERR(file))
-				dev_err(hsotg->dev, "failed to create %s debug file\n",
-					ep->name);
-		}
+		if (ep)
+			debugfs_create_file(ep->name, 0444, root, ep, &ep_fops);
 	}
 }
 #else
@@ -368,7 +347,7 @@
 	dump_register(GINTSTS),
 	dump_register(GINTMSK),
 	dump_register(GRXSTSR),
-	dump_register(GRXSTSP),
+	/* Omit GRXSTSP */
 	dump_register(GRXFSIZ),
 	dump_register(GNPTXFSIZ),
 	dump_register(GNPTXSTS),
@@ -710,6 +689,7 @@
 	print_param(seq, p, phy_ulpi_ddr);
 	print_param(seq, p, phy_ulpi_ext_vbus);
 	print_param(seq, p, i2c_enable);
+	print_param(seq, p, ipg_isoc_en);
 	print_param(seq, p, ulpi_fs_ls);
 	print_param(seq, p, host_support_fs_ls_low_power);
 	print_param(seq, p, host_ls_low_power_phy_clk);
@@ -790,32 +770,14 @@
 int dwc2_debugfs_init(struct dwc2_hsotg *hsotg)
 {
 	int			ret;
-	struct dentry		*file;
+	struct dentry		*root;
 
-	hsotg->debug_root = debugfs_create_dir(dev_name(hsotg->dev), NULL);
-	if (!hsotg->debug_root) {
-		ret = -ENOMEM;
-		goto err0;
-	}
+	root = debugfs_create_dir(dev_name(hsotg->dev), NULL);
+	hsotg->debug_root = root;
 
-	file = debugfs_create_file("params", 0444,
-				   hsotg->debug_root,
-				   hsotg, &params_fops);
-	if (IS_ERR(file))
-		dev_err(hsotg->dev, "%s: failed to create params\n", __func__);
-
-	file = debugfs_create_file("hw_params", 0444,
-				   hsotg->debug_root,
-				   hsotg, &hw_params_fops);
-	if (IS_ERR(file))
-		dev_err(hsotg->dev, "%s: failed to create hw_params\n",
-			__func__);
-
-	file = debugfs_create_file("dr_mode", 0444,
-				   hsotg->debug_root,
-				   hsotg, &dr_mode_fops);
-	if (IS_ERR(file))
-		dev_err(hsotg->dev, "%s: failed to create dr_mode\n", __func__);
+	debugfs_create_file("params", 0444, root, hsotg, &params_fops);
+	debugfs_create_file("hw_params", 0444, root, hsotg, &hw_params_fops);
+	debugfs_create_file("dr_mode", 0444, root, hsotg, &dr_mode_fops);
 
 	/* Add gadget debugfs nodes */
 	dwc2_hsotg_create_debug(hsotg);
@@ -824,24 +786,18 @@
 								GFP_KERNEL);
 	if (!hsotg->regset) {
 		ret = -ENOMEM;
-		goto err1;
+		goto err;
 	}
 
 	hsotg->regset->regs = dwc2_regs;
 	hsotg->regset->nregs = ARRAY_SIZE(dwc2_regs);
 	hsotg->regset->base = hsotg->regs;
 
-	file = debugfs_create_regset32("regdump", 0444, hsotg->debug_root,
-				       hsotg->regset);
-	if (!file) {
-		ret = -ENOMEM;
-		goto err1;
-	}
+	debugfs_create_regset32("regdump", 0444, root, hsotg->regset);
 
 	return 0;
-err1:
+err:
 	debugfs_remove_recursive(hsotg->debug_root);
-err0:
 	return ret;
 }
 
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 83cb557..f0d9ccf 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/**
+/*
  * Copyright (c) 2011 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
@@ -107,7 +107,6 @@
 /**
  * dwc2_gadget_incr_frame_num - Increments the targeted frame number.
  * @hs_ep: The endpoint
- * @increment: The value to increment by
  *
  * This function will also check if the frame number overruns DSTS_SOFFN_LIMIT.
  * If an overrun occurs it will wrap the value and set the frame_overrun flag.
@@ -190,6 +189,8 @@
 
 /**
  * dwc2_hsotg_tx_fifo_count - return count of TX FIFOs in device mode
+ *
+ * @hsotg: Programming view of the DWC_otg controller
  */
 int dwc2_hsotg_tx_fifo_count(struct dwc2_hsotg *hsotg)
 {
@@ -204,6 +205,8 @@
 /**
  * dwc2_hsotg_tx_fifo_total_depth - return total FIFO depth available for
  * device mode TX FIFOs
+ *
+ * @hsotg: Programming view of the DWC_otg controller
  */
 int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg *hsotg)
 {
@@ -227,6 +230,8 @@
 /**
  * dwc2_hsotg_tx_fifo_average_depth - returns average depth of device mode
  * TX FIFOs
+ *
+ * @hsotg: Programming view of the DWC_otg controller
  */
 int dwc2_hsotg_tx_fifo_average_depth(struct dwc2_hsotg *hsotg)
 {
@@ -327,6 +332,7 @@
 }
 
 /**
+ * dwc2_hsotg_ep_alloc_request - allocate USB rerequest structure
  * @ep: USB endpoint to allocate request for.
  * @flags: Allocation flags
  *
@@ -793,9 +799,7 @@
  * @dma_buff: usb requests dma buffer.
  * @len: usb request transfer length.
  *
- * Finds out index of first free entry either in the bottom or up half of
- * descriptor chain depend on which is under SW control and not processed
- * by HW. Then fills that descriptor with the data of the arrived usb request,
+ * Fills next free descriptor with the data of the arrived usb request,
  * frame info, sets Last and IOC bits increments next_desc. If filled
  * descriptor is not the first one, removes L bit from the previous descriptor
  * status.
@@ -810,34 +814,17 @@
 	u32 mask = 0;
 
 	maxsize = dwc2_gadget_get_desc_params(hs_ep, &mask);
-	if (len > maxsize) {
-		dev_err(hsotg->dev, "wrong len %d\n", len);
-		return -EINVAL;
-	}
 
-	/*
-	 * If SW has already filled half of chain, then return and wait for
-	 * the other chain to be processed by HW.
-	 */
-	if (hs_ep->next_desc == MAX_DMA_DESC_NUM_GENERIC / 2)
-		return -EBUSY;
-
-	/* Increment frame number by interval for IN */
-	if (hs_ep->dir_in)
-		dwc2_gadget_incr_frame_num(hs_ep);
-
-	index = (MAX_DMA_DESC_NUM_GENERIC / 2) * hs_ep->isoc_chain_num +
-		 hs_ep->next_desc;
-
-	/* Sanity check of calculated index */
-	if ((hs_ep->isoc_chain_num && index > MAX_DMA_DESC_NUM_GENERIC) ||
-	    (!hs_ep->isoc_chain_num && index > MAX_DMA_DESC_NUM_GENERIC / 2)) {
-		dev_err(hsotg->dev, "wrong index %d for iso chain\n", index);
-		return -EINVAL;
-	}
-
+	index = hs_ep->next_desc;
 	desc = &hs_ep->desc_list[index];
 
+	/* Check if descriptor chain full */
+	if ((desc->status >> DEV_DMA_BUFF_STS_SHIFT) ==
+	    DEV_DMA_BUFF_STS_HREADY) {
+		dev_dbg(hsotg->dev, "%s: desc chain full\n", __func__);
+		return 1;
+	}
+
 	/* Clear L bit of previous desc if more than one entries in the chain */
 	if (hs_ep->next_desc)
 		hs_ep->desc_list[index - 1].status &= ~DEV_DMA_L;
@@ -865,8 +852,14 @@
 	desc->status &= ~DEV_DMA_BUFF_STS_MASK;
 	desc->status |= (DEV_DMA_BUFF_STS_HREADY << DEV_DMA_BUFF_STS_SHIFT);
 
+	/* Increment frame number by interval for IN */
+	if (hs_ep->dir_in)
+		dwc2_gadget_incr_frame_num(hs_ep);
+
 	/* Update index of last configured entry in the chain */
 	hs_ep->next_desc++;
+	if (hs_ep->next_desc >= MAX_DMA_DESC_NUM_GENERIC)
+		hs_ep->next_desc = 0;
 
 	return 0;
 }
@@ -875,11 +868,8 @@
  * dwc2_gadget_start_isoc_ddma - start isochronous transfer in DDMA
  * @hs_ep: The isochronous endpoint.
  *
- * Prepare first descriptor chain for isochronous endpoints. Afterwards
+ * Prepare descriptor chain for isochronous endpoints. Afterwards
  * write DMA address to HW and enable the endpoint.
- *
- * Switch between descriptor chains via isoc_chain_num to give SW opportunity
- * to prepare second descriptor chain while first one is being processed by HW.
  */
 static void dwc2_gadget_start_isoc_ddma(struct dwc2_hsotg_ep *hs_ep)
 {
@@ -887,24 +877,34 @@
 	struct dwc2_hsotg_req *hs_req, *treq;
 	int index = hs_ep->index;
 	int ret;
+	int i;
 	u32 dma_reg;
 	u32 depctl;
 	u32 ctrl;
+	struct dwc2_dma_desc *desc;
 
 	if (list_empty(&hs_ep->queue)) {
 		dev_dbg(hsotg->dev, "%s: No requests in queue\n", __func__);
 		return;
 	}
 
+	/* Initialize descriptor chain by Host Busy status */
+	for (i = 0; i < MAX_DMA_DESC_NUM_GENERIC; i++) {
+		desc = &hs_ep->desc_list[i];
+		desc->status = 0;
+		desc->status |= (DEV_DMA_BUFF_STS_HBUSY
+				    << DEV_DMA_BUFF_STS_SHIFT);
+	}
+
+	hs_ep->next_desc = 0;
 	list_for_each_entry_safe(hs_req, treq, &hs_ep->queue, queue) {
 		ret = dwc2_gadget_fill_isoc_desc(hs_ep, hs_req->req.dma,
 						 hs_req->req.length);
-		if (ret) {
-			dev_dbg(hsotg->dev, "%s: desc chain full\n", __func__);
+		if (ret)
 			break;
-		}
 	}
 
+	hs_ep->compl_desc = 0;
 	depctl = hs_ep->dir_in ? DIEPCTL(index) : DOEPCTL(index);
 	dma_reg = hs_ep->dir_in ? DIEPDMA(index) : DOEPDMA(index);
 
@@ -914,10 +914,6 @@
 	ctrl = dwc2_readl(hsotg->regs + depctl);
 	ctrl |= DXEPCTL_EPENA | DXEPCTL_CNAK;
 	dwc2_writel(ctrl, hsotg->regs + depctl);
-
-	/* Switch ISOC descriptor chain number being processed by SW*/
-	hs_ep->isoc_chain_num = (hs_ep->isoc_chain_num ^ 1) & 0x1;
-	hs_ep->next_desc = 0;
 }
 
 /**
@@ -1235,7 +1231,7 @@
 {
 	struct dwc2_hsotg *hsotg = hs_ep->parent;
 	u32 target_frame = hs_ep->target_frame;
-	u32 current_frame = dwc2_hsotg_read_frameno(hsotg);
+	u32 current_frame = hsotg->frame_number;
 	bool frame_overrun = hs_ep->frame_overrun;
 
 	if (!frame_overrun && current_frame >= target_frame)
@@ -1291,6 +1287,9 @@
 	struct dwc2_hsotg *hs = hs_ep->parent;
 	bool first;
 	int ret;
+	u32 maxsize = 0;
+	u32 mask = 0;
+
 
 	dev_dbg(hs->dev, "%s: req %p: %d@%p, noi=%d, zero=%d, snok=%d\n",
 		ep->name, req, req->length, req->buf, req->no_interrupt,
@@ -1308,6 +1307,24 @@
 	req->actual = 0;
 	req->status = -EINPROGRESS;
 
+	/* In DDMA mode for ISOC's don't queue request if length greater
+	 * than descriptor limits.
+	 */
+	if (using_desc_dma(hs) && hs_ep->isochronous) {
+		maxsize = dwc2_gadget_get_desc_params(hs_ep, &mask);
+		if (hs_ep->dir_in && req->length > maxsize) {
+			dev_err(hs->dev, "wrong length %d (maxsize=%d)\n",
+				req->length, maxsize);
+			return -EINVAL;
+		}
+
+		if (!hs_ep->dir_in && req->length > hs_ep->ep.maxpacket) {
+			dev_err(hs->dev, "ISOC OUT: wrong length %d (mps=%d)\n",
+				req->length, hs_ep->ep.maxpacket);
+			return -EINVAL;
+		}
+	}
+
 	ret = dwc2_hsotg_handle_unaligned_buf_start(hs, hs_ep, hs_req);
 	if (ret)
 		return ret;
@@ -1330,17 +1347,15 @@
 
 	/*
 	 * Handle DDMA isochronous transfers separately - just add new entry
-	 * to the half of descriptor chain that is not processed by HW.
+	 * to the descriptor chain.
 	 * Transfer will be started once SW gets either one of NAK or
 	 * OutTknEpDis interrupts.
 	 */
-	if (using_desc_dma(hs) && hs_ep->isochronous &&
-	    hs_ep->target_frame != TARGET_FRAME_INITIAL) {
-		ret = dwc2_gadget_fill_isoc_desc(hs_ep, hs_req->req.dma,
-						 hs_req->req.length);
-		if (ret)
-			dev_dbg(hs->dev, "%s: ISO desc chain full\n", __func__);
-
+	if (using_desc_dma(hs) && hs_ep->isochronous) {
+		if (hs_ep->target_frame != TARGET_FRAME_INITIAL) {
+			dwc2_gadget_fill_isoc_desc(hs_ep, hs_req->req.dma,
+						   hs_req->req.length);
+		}
 		return 0;
 	}
 
@@ -1350,8 +1365,15 @@
 			return 0;
 		}
 
-		while (dwc2_gadget_target_frame_elapsed(hs_ep))
+		/* Update current frame number value. */
+		hs->frame_number = dwc2_hsotg_read_frameno(hs);
+		while (dwc2_gadget_target_frame_elapsed(hs_ep)) {
 			dwc2_gadget_incr_frame_num(hs_ep);
+			/* Update current frame number value once more as it
+			 * changes here.
+			 */
+			hs->frame_number = dwc2_hsotg_read_frameno(hs);
+		}
 
 		if (hs_ep->target_frame != TARGET_FRAME_INITIAL)
 			dwc2_hsotg_start_req(hs, hs_ep, hs_req, false);
@@ -2011,108 +2033,75 @@
  * @hs_ep: The endpoint the request was on.
  *
  * Get first request from the ep queue, determine descriptor on which complete
- * happened. SW based on isoc_chain_num discovers which half of the descriptor
- * chain is currently in use by HW, adjusts dma_address and calculates index
- * of completed descriptor based on the value of DEPDMA register. Update actual
- * length of request, giveback to gadget.
+ * happened. SW discovers which descriptor currently in use by HW, adjusts
+ * dma_address and calculates index of completed descriptor based on the value
+ * of DEPDMA register. Update actual length of request, giveback to gadget.
  */
 static void dwc2_gadget_complete_isoc_request_ddma(struct dwc2_hsotg_ep *hs_ep)
 {
 	struct dwc2_hsotg *hsotg = hs_ep->parent;
 	struct dwc2_hsotg_req *hs_req;
 	struct usb_request *ureq;
-	int index;
-	dma_addr_t dma_addr;
-	u32 dma_reg;
-	u32 depdma;
 	u32 desc_sts;
 	u32 mask;
 
-	hs_req = get_ep_head(hs_ep);
-	if (!hs_req) {
-		dev_warn(hsotg->dev, "%s: ISOC EP queue empty\n", __func__);
-		return;
+	desc_sts = hs_ep->desc_list[hs_ep->compl_desc].status;
+
+	/* Process only descriptors with buffer status set to DMA done */
+	while ((desc_sts & DEV_DMA_BUFF_STS_MASK) >>
+		DEV_DMA_BUFF_STS_SHIFT == DEV_DMA_BUFF_STS_DMADONE) {
+
+		hs_req = get_ep_head(hs_ep);
+		if (!hs_req) {
+			dev_warn(hsotg->dev, "%s: ISOC EP queue empty\n", __func__);
+			return;
+		}
+		ureq = &hs_req->req;
+
+		/* Check completion status */
+		if ((desc_sts & DEV_DMA_STS_MASK) >> DEV_DMA_STS_SHIFT ==
+			DEV_DMA_STS_SUCC) {
+			mask = hs_ep->dir_in ? DEV_DMA_ISOC_TX_NBYTES_MASK :
+				DEV_DMA_ISOC_RX_NBYTES_MASK;
+			ureq->actual = ureq->length - ((desc_sts & mask) >>
+				DEV_DMA_ISOC_NBYTES_SHIFT);
+
+			/* Adjust actual len for ISOC Out if len is
+			 * not align of 4
+			 */
+			if (!hs_ep->dir_in && ureq->length & 0x3)
+				ureq->actual += 4 - (ureq->length & 0x3);
+		}
+
+		dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, 0);
+
+		hs_ep->compl_desc++;
+		if (hs_ep->compl_desc > (MAX_DMA_DESC_NUM_GENERIC - 1))
+			hs_ep->compl_desc = 0;
+		desc_sts = hs_ep->desc_list[hs_ep->compl_desc].status;
 	}
-	ureq = &hs_req->req;
-
-	dma_addr = hs_ep->desc_list_dma;
-
-	/*
-	 * If lower half of  descriptor chain is currently use by SW,
-	 * that means higher half is being processed by HW, so shift
-	 * DMA address to higher half of descriptor chain.
-	 */
-	if (!hs_ep->isoc_chain_num)
-		dma_addr += sizeof(struct dwc2_dma_desc) *
-			    (MAX_DMA_DESC_NUM_GENERIC / 2);
-
-	dma_reg = hs_ep->dir_in ? DIEPDMA(hs_ep->index) : DOEPDMA(hs_ep->index);
-	depdma = dwc2_readl(hsotg->regs + dma_reg);
-
-	index = (depdma - dma_addr) / sizeof(struct dwc2_dma_desc) - 1;
-	desc_sts = hs_ep->desc_list[index].status;
-
-	mask = hs_ep->dir_in ? DEV_DMA_ISOC_TX_NBYTES_MASK :
-	       DEV_DMA_ISOC_RX_NBYTES_MASK;
-	ureq->actual = ureq->length -
-		       ((desc_sts & mask) >> DEV_DMA_ISOC_NBYTES_SHIFT);
-
-	/* Adjust actual length for ISOC Out if length is not align of 4 */
-	if (!hs_ep->dir_in && ureq->length & 0x3)
-		ureq->actual += 4 - (ureq->length & 0x3);
-
-	dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, 0);
 }
 
 /*
- * dwc2_gadget_start_next_isoc_ddma - start next isoc request, if any.
- * @hs_ep: The isochronous endpoint to be re-enabled.
+ * dwc2_gadget_handle_isoc_bna - handle BNA interrupt for ISOC.
+ * @hs_ep: The isochronous endpoint.
  *
- * If ep has been disabled due to last descriptor servicing (IN endpoint) or
- * BNA (OUT endpoint) check the status of other half of descriptor chain that
- * was under SW control till HW was busy and restart the endpoint if needed.
+ * If EP ISOC OUT then need to flush RX FIFO to remove source of BNA
+ * interrupt. Reset target frame and next_desc to allow to start
+ * ISOC's on NAK interrupt for IN direction or on OUTTKNEPDIS
+ * interrupt for OUT direction.
  */
-static void dwc2_gadget_start_next_isoc_ddma(struct dwc2_hsotg_ep *hs_ep)
+static void dwc2_gadget_handle_isoc_bna(struct dwc2_hsotg_ep *hs_ep)
 {
 	struct dwc2_hsotg *hsotg = hs_ep->parent;
-	u32 depctl;
-	u32 dma_reg;
-	u32 ctrl;
-	u32 dma_addr = hs_ep->desc_list_dma;
-	unsigned char index = hs_ep->index;
 
-	dma_reg = hs_ep->dir_in ? DIEPDMA(index) : DOEPDMA(index);
-	depctl = hs_ep->dir_in ? DIEPCTL(index) : DOEPCTL(index);
+	if (!hs_ep->dir_in)
+		dwc2_flush_rx_fifo(hsotg);
+	dwc2_hsotg_complete_request(hsotg, hs_ep, get_ep_head(hs_ep), 0);
 
-	ctrl = dwc2_readl(hsotg->regs + depctl);
-
-	/*
-	 * EP was disabled if HW has processed last descriptor or BNA was set.
-	 * So restart ep if SW has prepared new descriptor chain in ep_queue
-	 * routine while HW was busy.
-	 */
-	if (!(ctrl & DXEPCTL_EPENA)) {
-		if (!hs_ep->next_desc) {
-			dev_dbg(hsotg->dev, "%s: No more ISOC requests\n",
-				__func__);
-			return;
-		}
-
-		dma_addr += sizeof(struct dwc2_dma_desc) *
-			    (MAX_DMA_DESC_NUM_GENERIC / 2) *
-			    hs_ep->isoc_chain_num;
-		dwc2_writel(dma_addr, hsotg->regs + dma_reg);
-
-		ctrl |= DXEPCTL_EPENA | DXEPCTL_CNAK;
-		dwc2_writel(ctrl, hsotg->regs + depctl);
-
-		/* Switch ISOC descriptor chain number being processed by SW*/
-		hs_ep->isoc_chain_num = (hs_ep->isoc_chain_num ^ 1) & 0x1;
-		hs_ep->next_desc = 0;
-
-		dev_dbg(hsotg->dev, "%s: Restarted isochronous endpoint\n",
-			__func__);
-	}
+	hs_ep->target_frame = TARGET_FRAME_INITIAL;
+	hs_ep->next_desc = 0;
+	hs_ep->compl_desc = 0;
 }
 
 /**
@@ -2441,6 +2430,7 @@
  * @ep: The index number of the endpoint
  * @mps: The maximum packet size in bytes
  * @mc: The multicount value
+ * @dir_in: True if direction is in.
  *
  * Configure the maximum packet size for the given endpoint, updating
  * the hardware control registers to reflect this.
@@ -2731,6 +2721,8 @@
 			dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req,
 						    -ENODATA);
 		dwc2_gadget_incr_frame_num(hs_ep);
+		/* Update current frame number value. */
+		hsotg->frame_number = dwc2_hsotg_read_frameno(hsotg);
 	} while (dwc2_gadget_target_frame_elapsed(hs_ep));
 
 	dwc2_gadget_start_next_request(hs_ep);
@@ -2738,7 +2730,7 @@
 
 /**
  * dwc2_gadget_handle_out_token_ep_disabled - handle DXEPINT_OUTTKNEPDIS
- * @hs_ep: The endpoint on which interrupt is asserted.
+ * @ep: The endpoint on which interrupt is asserted.
  *
  * This is starting point for ISOC-OUT transfer, synchronization done with
  * first out token received from host while corresponding EP is disabled.
@@ -2763,7 +2755,7 @@
 	 */
 	tmp = dwc2_hsotg_read_frameno(hsotg);
 
-	dwc2_hsotg_complete_request(hsotg, ep, get_ep_head(ep), -ENODATA);
+	dwc2_hsotg_complete_request(hsotg, ep, get_ep_head(ep), 0);
 
 	if (using_desc_dma(hsotg)) {
 		if (ep->target_frame == TARGET_FRAME_INITIAL) {
@@ -2816,18 +2808,25 @@
 {
 	struct dwc2_hsotg *hsotg = hs_ep->parent;
 	int dir_in = hs_ep->dir_in;
+	u32 tmp;
 
 	if (!dir_in || !hs_ep->isochronous)
 		return;
 
 	if (hs_ep->target_frame == TARGET_FRAME_INITIAL) {
-		hs_ep->target_frame = dwc2_hsotg_read_frameno(hsotg);
 
+		tmp = dwc2_hsotg_read_frameno(hsotg);
 		if (using_desc_dma(hsotg)) {
+			dwc2_hsotg_complete_request(hsotg, hs_ep,
+						    get_ep_head(hs_ep), 0);
+
+			hs_ep->target_frame = tmp;
+			dwc2_gadget_incr_frame_num(hs_ep);
 			dwc2_gadget_start_isoc_ddma(hs_ep);
 			return;
 		}
 
+		hs_ep->target_frame = tmp;
 		if (hs_ep->interval > 1) {
 			u32 ctrl = dwc2_readl(hsotg->regs +
 					      DIEPCTL(hs_ep->index));
@@ -2843,7 +2842,8 @@
 					    get_ep_head(hs_ep), 0);
 	}
 
-	dwc2_gadget_incr_frame_num(hs_ep);
+	if (!using_desc_dma(hsotg))
+		dwc2_gadget_incr_frame_num(hs_ep);
 }
 
 /**
@@ -2901,9 +2901,9 @@
 
 		/* In DDMA handle isochronous requests separately */
 		if (using_desc_dma(hsotg) && hs_ep->isochronous) {
-			dwc2_gadget_complete_isoc_request_ddma(hs_ep);
-			/* Try to start next isoc request */
-			dwc2_gadget_start_next_isoc_ddma(hs_ep);
+			/* XferCompl set along with BNA */
+			if (!(ints & DXEPINT_BNAINTR))
+				dwc2_gadget_complete_isoc_request_ddma(hs_ep);
 		} else if (dir_in) {
 			/*
 			 * We get OutDone from the FIFO, so we only
@@ -2978,15 +2978,8 @@
 
 	if (ints & DXEPINT_BNAINTR) {
 		dev_dbg(hsotg->dev, "%s: BNA interrupt\n", __func__);
-
-		/*
-		 * Try to start next isoc request, if any.
-		 * Sometimes the endpoint remains enabled after BNA interrupt
-		 * assertion, which is not expected, hence we can enter here
-		 * couple of times.
-		 */
 		if (hs_ep->isochronous)
-			dwc2_gadget_start_next_isoc_ddma(hs_ep);
+			dwc2_gadget_handle_isoc_bna(hs_ep);
 	}
 
 	if (dir_in && !hs_ep->isochronous) {
@@ -3197,6 +3190,7 @@
 /**
  * dwc2_hsotg_core_init - issue softreset to the core
  * @hsotg: The device state
+ * @is_usb_reset: Usb resetting flag
  *
  * Issue a soft reset to the core, and await the core finishing it.
  */
@@ -3259,6 +3253,9 @@
 		dcfg |= DCFG_DEVSPD_HS;
 	}
 
+	if (hsotg->params.ipg_isoc_en)
+		dcfg |= DCFG_IPG_ISOC_SUPPORDED;
+
 	dwc2_writel(dcfg,  hsotg->regs + DCFG);
 
 	/* Clear any pending OTG interrupts */
@@ -3320,8 +3317,10 @@
 		hsotg->regs + DOEPMSK);
 
 	/* Enable BNA interrupt for DDMA */
-	if (using_desc_dma(hsotg))
+	if (using_desc_dma(hsotg)) {
 		dwc2_set_bit(hsotg->regs + DOEPMSK, DOEPMSK_BNAMSK);
+		dwc2_set_bit(hsotg->regs + DIEPMSK, DIEPMSK_BNAININTRMSK);
+	}
 
 	dwc2_writel(0, hsotg->regs + DAINTMSK);
 
@@ -3427,7 +3426,7 @@
 
 	daintmsk = dwc2_readl(hsotg->regs + DAINTMSK);
 
-	for (idx = 1; idx <= hsotg->num_of_eps; idx++) {
+	for (idx = 1; idx < hsotg->num_of_eps; idx++) {
 		hs_ep = hsotg->eps_in[idx];
 		/* Proceed only unmasked ISOC EPs */
 		if (!hs_ep->isochronous || (BIT(idx) & ~daintmsk))
@@ -3473,7 +3472,7 @@
 	daintmsk = dwc2_readl(hsotg->regs + DAINTMSK);
 	daintmsk >>= DAINT_OUTEP_SHIFT;
 
-	for (idx = 1; idx <= hsotg->num_of_eps; idx++) {
+	for (idx = 1; idx < hsotg->num_of_eps; idx++) {
 		hs_ep = hsotg->eps_out[idx];
 		/* Proceed only unmasked ISOC EPs */
 		if (!hs_ep->isochronous || (BIT(idx) & ~daintmsk))
@@ -3647,7 +3646,7 @@
 		dwc2_writel(gintmsk, hsotg->regs + GINTMSK);
 
 		dev_dbg(hsotg->dev, "GOUTNakEff triggered\n");
-		for (idx = 1; idx <= hsotg->num_of_eps; idx++) {
+		for (idx = 1; idx < hsotg->num_of_eps; idx++) {
 			hs_ep = hsotg->eps_out[idx];
 			/* Proceed only unmasked ISOC EPs */
 			if (!hs_ep->isochronous || (BIT(idx) & ~daintmsk))
@@ -3789,6 +3788,7 @@
 	unsigned int dir_in;
 	unsigned int i, val, size;
 	int ret = 0;
+	unsigned char ep_type;
 
 	dev_dbg(hsotg->dev,
 		"%s: ep %s: a 0x%02x, attr 0x%02x, mps 0x%04x, intr %d\n",
@@ -3807,9 +3807,26 @@
 		return -EINVAL;
 	}
 
+	ep_type = desc->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK;
 	mps = usb_endpoint_maxp(desc);
 	mc = usb_endpoint_maxp_mult(desc);
 
+	/* ISOC IN in DDMA supported bInterval up to 10 */
+	if (using_desc_dma(hsotg) && ep_type == USB_ENDPOINT_XFER_ISOC &&
+	    dir_in && desc->bInterval > 10) {
+		dev_err(hsotg->dev,
+			"%s: ISOC IN, DDMA: bInterval>10 not supported!\n", __func__);
+		return -EINVAL;
+	}
+
+	/* High bandwidth ISOC OUT in DDMA not supported */
+	if (using_desc_dma(hsotg) && ep_type == USB_ENDPOINT_XFER_ISOC &&
+	    !dir_in && mc > 1) {
+		dev_err(hsotg->dev,
+			"%s: ISOC OUT, DDMA: HB not supported!\n", __func__);
+		return -EINVAL;
+	}
+
 	/* note, we handle this here instead of dwc2_hsotg_set_ep_maxpacket */
 
 	epctrl_reg = dir_in ? DIEPCTL(index) : DOEPCTL(index);
@@ -3850,15 +3867,15 @@
 	hs_ep->halted = 0;
 	hs_ep->interval = desc->bInterval;
 
-	switch (desc->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) {
+	switch (ep_type) {
 	case USB_ENDPOINT_XFER_ISOC:
 		epctrl |= DXEPCTL_EPTYPE_ISO;
 		epctrl |= DXEPCTL_SETEVENFR;
 		hs_ep->isochronous = 1;
 		hs_ep->interval = 1 << (desc->bInterval - 1);
 		hs_ep->target_frame = TARGET_FRAME_INITIAL;
-		hs_ep->isoc_chain_num = 0;
 		hs_ep->next_desc = 0;
+		hs_ep->compl_desc = 0;
 		if (dir_in) {
 			hs_ep->periodic = 1;
 			mask = dwc2_readl(hsotg->regs + DIEPMSK);
@@ -4301,7 +4318,6 @@
 /**
  * dwc2_hsotg_udc_stop - stop the udc
  * @gadget: The usb gadget state
- * @driver: The usb gadget driver
  *
  * Stop udc hw block and stay tunned for future transmissions
  */
@@ -4453,6 +4469,7 @@
  * @hsotg: The device state.
  * @hs_ep: The endpoint to be initialised.
  * @epnum: The endpoint number
+ * @dir_in: True if direction is in.
  *
  * Initialise the given endpoint (as part of the probe and device state
  * creation) to give to the gadget driver. Setup the endpoint name, any
@@ -4526,7 +4543,7 @@
 
 /**
  * dwc2_hsotg_hw_cfg - read HW configuration registers
- * @param: The device state
+ * @hsotg: Programming view of the DWC_otg controller
  *
  * Read the USB core HW configuration registers
  */
@@ -4582,7 +4599,8 @@
 
 /**
  * dwc2_hsotg_dump - dump state of the udc
- * @param: The device state
+ * @hsotg: Programming view of the DWC_otg controller
+ *
  */
 static void dwc2_hsotg_dump(struct dwc2_hsotg *hsotg)
 {
@@ -4633,7 +4651,8 @@
 
 /**
  * dwc2_gadget_init - init function for gadget
- * @dwc2: The data structure for the DWC2 driver.
+ * @hsotg: Programming view of the DWC_otg controller
+ *
  */
 int dwc2_gadget_init(struct dwc2_hsotg *hsotg)
 {
@@ -4730,7 +4749,8 @@
 
 /**
  * dwc2_hsotg_remove - remove function for hsotg driver
- * @pdev: The platform information for the driver
+ * @hsotg: Programming view of the DWC_otg controller
+ *
  */
 int dwc2_hsotg_remove(struct dwc2_hsotg *hsotg)
 {
@@ -5011,7 +5031,7 @@
  *
  * @hsotg: Programming view of the DWC_otg controller
  * @rem_wakeup: indicates whether resume is initiated by Device or Host.
- * @param reset: indicates whether resume is initiated by Reset.
+ * @reset: indicates whether resume is initiated by Reset.
  *
  * Return non-zero if failed to exit from hibernation.
  */
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index c51b73b..1faefea 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -597,7 +597,7 @@
  * dwc2_read_packet() - Reads a packet from the Rx FIFO into the destination
  * buffer
  *
- * @core_if: Programming view of DWC_otg controller
+ * @hsotg: Programming view of DWC_otg controller
  * @dest:    Destination buffer for the packet
  * @bytes:   Number of bytes to copy to the destination
  */
@@ -4087,7 +4087,6 @@
  * then the refcount for the structure will go to 0 and we'll free it.
  *
  * @hsotg:     The HCD state structure for the DWC OTG controller.
- * @qh:        The QH structure.
  * @context:   The priv pointer from a struct dwc2_hcd_urb.
  * @mem_flags: Flags for allocating memory.
  * @ttport:    We'll return this device's port number here.  That's used to
diff --git a/drivers/usb/dwc2/hcd.h b/drivers/usb/dwc2/hcd.h
index 96a9da5..7db1ee7 100644
--- a/drivers/usb/dwc2/hcd.h
+++ b/drivers/usb/dwc2/hcd.h
@@ -80,7 +80,7 @@
  * @xfer_count:         Number of bytes transferred so far
  * @start_pkt_count:    Packet count at start of transfer
  * @xfer_started:       True if the transfer has been started
- * @ping:               True if a PING request should be issued on this channel
+ * @do_ping:            True if a PING request should be issued on this channel
  * @error_state:        True if the error count for this transaction is non-zero
  * @halt_on_queue:      True if this channel should be halted the next time a
  *                      request is queued for the channel. This is necessary in
@@ -102,7 +102,7 @@
  * @schinfo:            Scheduling micro-frame bitmap
  * @ntd:                Number of transfer descriptors for the transfer
  * @halt_status:        Reason for halting the host channel
- * @hcint               Contents of the HCINT register when the interrupt came
+ * @hcint:               Contents of the HCINT register when the interrupt came
  * @qh:                 QH for the transfer being processed by this channel
  * @hc_list_entry:      For linking to list of host channels
  * @desc_list_addr:     Current QH's descriptor list DMA address
@@ -237,7 +237,7 @@
 /**
  * struct dwc2_hs_transfer_time - Info about a transfer on the high speed bus.
  *
- * @start_schedule_usecs:  The start time on the main bus schedule.  Note that
+ * @start_schedule_us:  The start time on the main bus schedule.  Note that
  *                         the main bus schedule is tightly packed and this
  *			   time should be interpreted as tightly packed (so
  *			   uFrame 0 starts at 0 us, uFrame 1 starts at 100 us
@@ -301,7 +301,6 @@
  *                           "struct dwc2_tt".  Not used if this device is high
  *                           speed.  Note that this is in "schedule slice" which
  *                           is tightly packed.
- * @ls_duration_us:     Duration on the low speed bus schedule.
  * @ntd:                Actual number of transfer descriptors in a list
  * @qtd_list:           List of QTDs for this QH
  * @channel:            Host channel currently processing transfers for this QH
@@ -315,7 +314,7 @@
  *                      descriptor
  * @unreserve_timer:    Timer for releasing periodic reservation.
  * @wait_timer:         Timer used to wait before re-queuing.
- * @dwc2_tt:            Pointer to our tt info (or NULL if no tt).
+ * @dwc_tt:            Pointer to our tt info (or NULL if no tt).
  * @ttport:             Port number within our tt.
  * @tt_buffer_dirty     True if clear_tt_buffer_complete is pending
  * @unreserve_pending:  True if we planned to unreserve but haven't yet.
@@ -325,6 +324,7 @@
  *                      periodic transfers and is ignored for periodic ones.
  * @wait_timer_cancel:  Set to true to cancel the wait_timer.
  *
+ * @tt_buffer_dirty:	True if EP's TT buffer is not clean.
  * A Queue Head (QH) holds the static characteristics of an endpoint and
  * maintains a list of transfers (QTDs) for that endpoint. A QH structure may
  * be entered in either the non-periodic or periodic schedule.
@@ -400,6 +400,10 @@
  * @urb:                URB for this transfer
  * @qh:                 Queue head for this QTD
  * @qtd_list_entry:     For linking to the QH's list of QTDs
+ * @isoc_td_first:	Index of first activated isochronous transfer
+ *			descriptor in Descriptor DMA mode
+ * @isoc_td_last:	Index of last activated isochronous transfer
+ *			descriptor in Descriptor DMA mode
  *
  * A Queue Transfer Descriptor (QTD) holds the state of a bulk, control,
  * interrupt, or isochronous transfer. A single QTD is created for each URB
diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c
index 28c8898..74f11c8 100644
--- a/drivers/usb/dwc2/hcd_ddma.c
+++ b/drivers/usb/dwc2/hcd_ddma.c
@@ -332,6 +332,7 @@
  *
  * @hsotg: The HCD state structure for the DWC OTG controller
  * @qh:    The QH to init
+ * @mem_flags: Indicates the type of memory allocation
  *
  * Return: 0 if successful, negative error code otherwise
  *
diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c
index a5dfd9d..fbea5e3 100644
--- a/drivers/usb/dwc2/hcd_intr.c
+++ b/drivers/usb/dwc2/hcd_intr.c
@@ -478,6 +478,12 @@
  * of the URB based on the number of bytes transferred via the host channel.
  * Sets the URB status if the data transfer is finished.
  *
+ * @hsotg: Programming view of the DWC_otg controller
+ * @chan: Programming view of host channel
+ * @chnum: Channel number
+ * @urb: Processing URB
+ * @qtd: Queue transfer descriptor
+ *
  * Return: 1 if the data transfer specified by the URB is completely finished,
  * 0 otherwise
  */
@@ -566,6 +572,12 @@
  * halt_status. Completes the Isochronous URB if all the URB frames have been
  * completed.
  *
+ * @hsotg: Programming view of the DWC_otg controller
+ * @chan: Programming view of host channel
+ * @chnum: Channel number
+ * @halt_status: Reason for halting a host channel
+ * @qtd: Queue transfer descriptor
+ *
  * Return: DWC2_HC_XFER_COMPLETE if there are more frames remaining to be
  * transferred in the URB. Otherwise return DWC2_HC_XFER_URB_COMPLETE.
  */
diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c
index e34ad5e..d7c3d6c 100644
--- a/drivers/usb/dwc2/hcd_queue.c
+++ b/drivers/usb/dwc2/hcd_queue.c
@@ -679,6 +679,7 @@
  *
  * @hsotg:       The HCD state structure for the DWC OTG controller.
  * @qh:          QH for the periodic transfer.
+ * @index:       Transfer index
  */
 static void dwc2_hs_pmap_unschedule(struct dwc2_hsotg *hsotg,
 				    struct dwc2_qh *qh, int index)
@@ -1276,7 +1277,7 @@
  * release the reservation.  This worker is called after the appropriate
  * delay.
  *
- * @work: Pointer to a qh unreserve_work.
+ * @t: Address to a qh unreserve_work.
  */
 static void dwc2_unreserve_timer_fn(struct timer_list *t)
 {
@@ -1631,7 +1632,7 @@
  * @hsotg:        The HCD state structure for the DWC OTG controller
  * @urb:          Holds the information about the device/endpoint needed
  *                to initialize the QH
- * @atomic_alloc: Flag to do atomic allocation if needed
+ * @mem_flags:   Flags for allocating memory.
  *
  * Return: Pointer to the newly allocated QH, or NULL on error
  */
diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h
index 38391e4..0ca8e7b 100644
--- a/drivers/usb/dwc2/hw.h
+++ b/drivers/usb/dwc2/hw.h
@@ -311,6 +311,7 @@
 #define GHWCFG4_UTMI_PHY_DATA_WIDTH_MASK	(0x3 << 14)
 #define GHWCFG4_UTMI_PHY_DATA_WIDTH_SHIFT	14
 #define GHWCFG4_ACG_SUPPORTED			BIT(12)
+#define GHWCFG4_IPG_ISOC_SUPPORTED		BIT(11)
 #define GHWCFG4_UTMI_PHY_DATA_WIDTH_8		0
 #define GHWCFG4_UTMI_PHY_DATA_WIDTH_16		1
 #define GHWCFG4_UTMI_PHY_DATA_WIDTH_8_OR_16	2
@@ -424,6 +425,7 @@
 #define DCFG_EPMISCNT_SHIFT		18
 #define DCFG_EPMISCNT_LIMIT		0x1f
 #define DCFG_EPMISCNT(_x)		((_x) << 18)
+#define DCFG_IPG_ISOC_SUPPORDED		BIT(17)
 #define DCFG_PERFRINT_MASK		(0x3 << 11)
 #define DCFG_PERFRINT_SHIFT		11
 #define DCFG_PERFRINT_LIMIT		0x3
diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c
index f03e418..af075d4 100644
--- a/drivers/usb/dwc2/params.c
+++ b/drivers/usb/dwc2/params.c
@@ -70,6 +70,7 @@
 		GAHBCFG_HBSTLEN_SHIFT;
 	p->uframe_sched = false;
 	p->change_speed_quirk = true;
+	p->power_down = false;
 }
 
 static void dwc2_set_rk_params(struct dwc2_hsotg *hsotg)
@@ -269,6 +270,9 @@
 /**
  * dwc2_set_default_params() - Set all core parameters to their
  * auto-detected default values.
+ *
+ * @hsotg: Programming view of the DWC_otg controller
+ *
  */
 static void dwc2_set_default_params(struct dwc2_hsotg *hsotg)
 {
@@ -298,6 +302,7 @@
 	p->besl = true;
 	p->hird_threshold_en = true;
 	p->hird_threshold = 4;
+	p->ipg_isoc_en = false;
 	p->max_packet_count = hw->max_packet_count;
 	p->max_transfer_size = hw->max_transfer_size;
 	p->ahbcfg = GAHBCFG_HBSTLEN_INCR << GAHBCFG_HBSTLEN_SHIFT;
@@ -338,6 +343,8 @@
 /**
  * dwc2_get_device_properties() - Read in device properties.
  *
+ * @hsotg: Programming view of the DWC_otg controller
+ *
  * Read in the device properties and adjust core parameters if needed.
  */
 static void dwc2_get_device_properties(struct dwc2_hsotg *hsotg)
@@ -549,7 +556,7 @@
 }
 
 #define CHECK_RANGE(_param, _min, _max, _def) do {			\
-		if ((hsotg->params._param) < (_min) ||			\
+		if ((int)(hsotg->params._param) < (_min) ||		\
 		    (hsotg->params._param) > (_max)) {			\
 			dev_warn(hsotg->dev, "%s: Invalid parameter %s=%d\n", \
 				 __func__, #_param, hsotg->params._param); \
@@ -579,6 +586,7 @@
 	CHECK_BOOL(enable_dynamic_fifo, hw->enable_dynamic_fifo);
 	CHECK_BOOL(en_multiple_tx_fifo, hw->en_multiple_tx_fifo);
 	CHECK_BOOL(i2c_enable, hw->i2c_enable);
+	CHECK_BOOL(ipg_isoc_en, hw->ipg_isoc_en);
 	CHECK_BOOL(acg_enable, hw->acg_enable);
 	CHECK_BOOL(reload_ctl, (hsotg->hw_params.snpsid > DWC2_CORE_REV_2_92a));
 	CHECK_BOOL(lpm, (hsotg->hw_params.snpsid >= DWC2_CORE_REV_2_80a));
@@ -688,6 +696,9 @@
 /**
  * During device initialization, read various hardware configuration
  * registers and interpret the contents.
+ *
+ * @hsotg: Programming view of the DWC_otg controller
+ *
  */
 int dwc2_get_hwparams(struct dwc2_hsotg *hsotg)
 {
@@ -772,6 +783,7 @@
 	hw->utmi_phy_data_width = (hwcfg4 & GHWCFG4_UTMI_PHY_DATA_WIDTH_MASK) >>
 				  GHWCFG4_UTMI_PHY_DATA_WIDTH_SHIFT;
 	hw->acg_enable = !!(hwcfg4 & GHWCFG4_ACG_SUPPORTED);
+	hw->ipg_isoc_en = !!(hwcfg4 & GHWCFG4_IPG_ISOC_SUPPORTED);
 
 	/* fifo sizes */
 	hw->rx_fifo_size = (grxfsiz & GRXFSIZ_DEPTH_MASK) >>
diff --git a/drivers/usb/dwc2/pci.c b/drivers/usb/dwc2/pci.c
index bea2e8e..d257c54 100644
--- a/drivers/usb/dwc2/pci.c
+++ b/drivers/usb/dwc2/pci.c
@@ -77,6 +77,12 @@
 	return 0;
 }
 
+/**
+ * dwc2_pci_probe() - Provides the cleanup entry points for the DWC_otg PCI
+ * driver
+ *
+ * @pci: The programming view of DWC_otg PCI
+ */
 static void dwc2_pci_remove(struct pci_dev *pci)
 {
 	struct dwc2_pci_glue *glue = pci_get_drvdata(pci);
diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig
index ab8c0e0..451012e 100644
--- a/drivers/usb/dwc3/Kconfig
+++ b/drivers/usb/dwc3/Kconfig
@@ -106,4 +106,16 @@
 	  inside (i.e. STiH407).
 	  Say 'Y' or 'M' if you have one such device.
 
+config USB_DWC3_QCOM
+	tristate "Qualcomm Platform"
+	depends on ARCH_QCOM || COMPILE_TEST
+	depends on OF
+	default USB_DWC3
+	help
+	  Some Qualcomm SoCs use DesignWare Core IP for USB2/3
+	  functionality.
+	  This driver also handles Qscratch wrapper which is needed
+	  for peripheral mode support.
+	  Say 'Y' or 'M' if you have one such device.
+
 endif
diff --git a/drivers/usb/dwc3/Makefile b/drivers/usb/dwc3/Makefile
index 025bc68..5c07d8f 100644
--- a/drivers/usb/dwc3/Makefile
+++ b/drivers/usb/dwc3/Makefile
@@ -48,3 +48,4 @@
 obj-$(CONFIG_USB_DWC3_KEYSTONE)		+= dwc3-keystone.o
 obj-$(CONFIG_USB_DWC3_OF_SIMPLE)	+= dwc3-of-simple.o
 obj-$(CONFIG_USB_DWC3_ST)		+= dwc3-st.o
+obj-$(CONFIG_USB_DWC3_QCOM)		+= dwc3-qcom.o
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index a15648d..ea91310 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -8,6 +8,7 @@
  *	    Sebastian Andrzej Siewior <bigeasy@linutronix.de>
  */
 
+#include <linux/clk.h>
 #include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -24,6 +25,7 @@
 #include <linux/of.h>
 #include <linux/acpi.h>
 #include <linux/pinctrl/consumer.h>
+#include <linux/reset.h>
 
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
@@ -266,6 +268,12 @@
 	return 0;
 }
 
+static const struct clk_bulk_data dwc3_core_clks[] = {
+	{ .id = "ref" },
+	{ .id = "bus_early" },
+	{ .id = "suspend" },
+};
+
 /*
  * dwc3_frame_length_adjustment - Adjusts frame length if required
  * @dwc3: Pointer to our controller context structure
@@ -667,6 +675,9 @@
 	usb_phy_set_suspend(dwc->usb3_phy, 1);
 	phy_power_off(dwc->usb2_generic_phy);
 	phy_power_off(dwc->usb3_generic_phy);
+	clk_bulk_disable(dwc->num_clks, dwc->clks);
+	clk_bulk_unprepare(dwc->num_clks, dwc->clks);
+	reset_control_assert(dwc->reset);
 }
 
 static bool dwc3_core_is_valid(struct dwc3 *dwc)
@@ -1245,7 +1256,7 @@
 static int dwc3_probe(struct platform_device *pdev)
 {
 	struct device		*dev = &pdev->dev;
-	struct resource		*res;
+	struct resource		*res, dwc_res;
 	struct dwc3		*dwc;
 
 	int			ret;
@@ -1256,6 +1267,12 @@
 	if (!dwc)
 		return -ENOMEM;
 
+	dwc->clks = devm_kmemdup(dev, dwc3_core_clks, sizeof(dwc3_core_clks),
+				 GFP_KERNEL);
+	if (!dwc->clks)
+		return -ENOMEM;
+
+	dwc->num_clks = ARRAY_SIZE(dwc3_core_clks);
 	dwc->dev = dev;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1270,23 +1287,48 @@
 	dwc->xhci_resources[0].flags = res->flags;
 	dwc->xhci_resources[0].name = res->name;
 
-	res->start += DWC3_GLOBALS_REGS_START;
-
 	/*
 	 * Request memory region but exclude xHCI regs,
 	 * since it will be requested by the xhci-plat driver.
 	 */
-	regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(regs)) {
-		ret = PTR_ERR(regs);
-		goto err0;
-	}
+	dwc_res = *res;
+	dwc_res.start += DWC3_GLOBALS_REGS_START;
+
+	regs = devm_ioremap_resource(dev, &dwc_res);
+	if (IS_ERR(regs))
+		return PTR_ERR(regs);
 
 	dwc->regs	= regs;
-	dwc->regs_size	= resource_size(res);
+	dwc->regs_size	= resource_size(&dwc_res);
 
 	dwc3_get_properties(dwc);
 
+	dwc->reset = devm_reset_control_get_optional_shared(dev, NULL);
+	if (IS_ERR(dwc->reset))
+		return PTR_ERR(dwc->reset);
+
+	ret = clk_bulk_get(dev, dwc->num_clks, dwc->clks);
+	if (ret == -EPROBE_DEFER)
+		return ret;
+	/*
+	 * Clocks are optional, but new DT platforms should support all clocks
+	 * as required by the DT-binding.
+	 */
+	if (ret)
+		dwc->num_clks = 0;
+
+	ret = reset_control_deassert(dwc->reset);
+	if (ret)
+		goto put_clks;
+
+	ret = clk_bulk_prepare(dwc->num_clks, dwc->clks);
+	if (ret)
+		goto assert_reset;
+
+	ret = clk_bulk_enable(dwc->num_clks, dwc->clks);
+	if (ret)
+		goto unprepare_clks;
+
 	platform_set_drvdata(pdev, dwc);
 	dwc3_cache_hwparams(dwc);
 
@@ -1350,13 +1392,13 @@
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-err0:
-	/*
-	 * restore res->start back to its original value so that, in case the
-	 * probe is deferred, we don't end up getting error in request the
-	 * memory region the next time probe is called.
-	 */
-	res->start -= DWC3_GLOBALS_REGS_START;
+	clk_bulk_disable(dwc->num_clks, dwc->clks);
+unprepare_clks:
+	clk_bulk_unprepare(dwc->num_clks, dwc->clks);
+assert_reset:
+	reset_control_assert(dwc->reset);
+put_clks:
+	clk_bulk_put(dwc->num_clks, dwc->clks);
 
 	return ret;
 }
@@ -1364,15 +1406,8 @@
 static int dwc3_remove(struct platform_device *pdev)
 {
 	struct dwc3	*dwc = platform_get_drvdata(pdev);
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
 	pm_runtime_get_sync(&pdev->dev);
-	/*
-	 * restore res->start back to its original value so that, in case the
-	 * probe is deferred, we don't end up getting error in request the
-	 * memory region the next time probe is called.
-	 */
-	res->start -= DWC3_GLOBALS_REGS_START;
 
 	dwc3_debugfs_exit(dwc);
 	dwc3_core_exit_mode(dwc);
@@ -1386,14 +1421,48 @@
 
 	dwc3_free_event_buffers(dwc);
 	dwc3_free_scratch_buffers(dwc);
+	clk_bulk_put(dwc->num_clks, dwc->clks);
 
 	return 0;
 }
 
 #ifdef CONFIG_PM
+static int dwc3_core_init_for_resume(struct dwc3 *dwc)
+{
+	int ret;
+
+	ret = reset_control_deassert(dwc->reset);
+	if (ret)
+		return ret;
+
+	ret = clk_bulk_prepare(dwc->num_clks, dwc->clks);
+	if (ret)
+		goto assert_reset;
+
+	ret = clk_bulk_enable(dwc->num_clks, dwc->clks);
+	if (ret)
+		goto unprepare_clks;
+
+	ret = dwc3_core_init(dwc);
+	if (ret)
+		goto disable_clks;
+
+	return 0;
+
+disable_clks:
+	clk_bulk_disable(dwc->num_clks, dwc->clks);
+unprepare_clks:
+	clk_bulk_unprepare(dwc->num_clks, dwc->clks);
+assert_reset:
+	reset_control_assert(dwc->reset);
+
+	return ret;
+}
+
 static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
 {
 	unsigned long	flags;
+	u32 reg;
 
 	switch (dwc->current_dr_role) {
 	case DWC3_GCTL_PRTCAP_DEVICE:
@@ -1403,9 +1472,25 @@
 		dwc3_core_exit(dwc);
 		break;
 	case DWC3_GCTL_PRTCAP_HOST:
-		/* do nothing during host runtime_suspend */
-		if (!PMSG_IS_AUTO(msg))
+		if (!PMSG_IS_AUTO(msg)) {
 			dwc3_core_exit(dwc);
+			break;
+		}
+
+		/* Let controller to suspend HSPHY before PHY driver suspends */
+		if (dwc->dis_u2_susphy_quirk ||
+		    dwc->dis_enblslpm_quirk) {
+			reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+			reg |=  DWC3_GUSB2PHYCFG_ENBLSLPM |
+				DWC3_GUSB2PHYCFG_SUSPHY;
+			dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+
+			/* Give some time for USB2 PHY to suspend */
+			usleep_range(5000, 6000);
+		}
+
+		phy_pm_runtime_put_sync(dwc->usb2_generic_phy);
+		phy_pm_runtime_put_sync(dwc->usb3_generic_phy);
 		break;
 	case DWC3_GCTL_PRTCAP_OTG:
 		/* do nothing during runtime_suspend */
@@ -1433,10 +1518,11 @@
 {
 	unsigned long	flags;
 	int		ret;
+	u32		reg;
 
 	switch (dwc->current_dr_role) {
 	case DWC3_GCTL_PRTCAP_DEVICE:
-		ret = dwc3_core_init(dwc);
+		ret = dwc3_core_init_for_resume(dwc);
 		if (ret)
 			return ret;
 
@@ -1446,13 +1532,25 @@
 		spin_unlock_irqrestore(&dwc->lock, flags);
 		break;
 	case DWC3_GCTL_PRTCAP_HOST:
-		/* nothing to do on host runtime_resume */
 		if (!PMSG_IS_AUTO(msg)) {
-			ret = dwc3_core_init(dwc);
+			ret = dwc3_core_init_for_resume(dwc);
 			if (ret)
 				return ret;
 			dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_HOST);
+			break;
 		}
+		/* Restore GUSB2PHYCFG bits that were modified in suspend */
+		reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+		if (dwc->dis_u2_susphy_quirk)
+			reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
+
+		if (dwc->dis_enblslpm_quirk)
+			reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM;
+
+		dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+
+		phy_pm_runtime_get_sync(dwc->usb2_generic_phy);
+		phy_pm_runtime_get_sync(dwc->usb3_generic_phy);
 		break;
 	case DWC3_GCTL_PRTCAP_OTG:
 		/* nothing to do on runtime_resume */
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 4f3b438..285ce0e 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -639,8 +639,6 @@
  * @resource_index: Resource transfer index
  * @frame_number: set to the frame number we want this transfer to start (ISOC)
  * @interval: the interval on which the ISOC transfer is started
- * @allocated_requests: number of requests allocated
- * @queued_requests: number of requests queued for transfer
  * @name: a human readable name e.g. ep1out-bulk
  * @direction: true for TX, false for RX
  * @stream_capable: true when streams are enabled
@@ -664,11 +662,9 @@
 #define DWC3_EP_ENABLED		BIT(0)
 #define DWC3_EP_STALL		BIT(1)
 #define DWC3_EP_WEDGE		BIT(2)
-#define DWC3_EP_BUSY		BIT(4)
+#define DWC3_EP_TRANSFER_STARTED BIT(3)
 #define DWC3_EP_PENDING_REQUEST	BIT(5)
-#define DWC3_EP_MISSED_ISOC	BIT(6)
 #define DWC3_EP_END_TRANSFER_PENDING	BIT(7)
-#define DWC3_EP_TRANSFER_STARTED BIT(8)
 
 	/* This last one is specific to EP0 */
 #define DWC3_EP0_DIR_IN		BIT(31)
@@ -688,8 +684,6 @@
 	u8			number;
 	u8			type;
 	u8			resource_index;
-	u32			allocated_requests;
-	u32			queued_requests;
 	u32			frame_number;
 	u32			interval;
 
@@ -832,7 +826,9 @@
  * @list: a list_head used for request queueing
  * @dep: struct dwc3_ep owning this request
  * @sg: pointer to first incomplete sg
+ * @start_sg: pointer to the sg which should be queued next
  * @num_pending_sgs: counter to pending sgs
+ * @num_queued_sgs: counter to the number of sgs which already got queued
  * @remaining: amount of data remaining
  * @epnum: endpoint number to which this request refers
  * @trb: pointer to struct dwc3_trb
@@ -848,8 +844,10 @@
 	struct list_head	list;
 	struct dwc3_ep		*dep;
 	struct scatterlist	*sg;
+	struct scatterlist	*start_sg;
 
 	unsigned		num_pending_sgs;
+	unsigned int		num_queued_sgs;
 	unsigned		remaining;
 	u8			epnum;
 	struct dwc3_trb		*trb;
@@ -891,6 +889,9 @@
  * @eps: endpoint array
  * @gadget: device side representation of the peripheral controller
  * @gadget_driver: pointer to the gadget driver
+ * @clks: array of clocks
+ * @num_clks: number of clocks
+ * @reset: reset control
  * @regs: base address for our registers
  * @regs_size: address space size
  * @fladj: frame length adjustment
@@ -1013,6 +1014,11 @@
 	struct usb_gadget	gadget;
 	struct usb_gadget_driver *gadget_driver;
 
+	struct clk_bulk_data	*clks;
+	int			num_clks;
+
+	struct reset_control	*reset;
+
 	struct usb_phy		*usb2_phy;
 	struct usb_phy		*usb3_phy;
 
@@ -1197,11 +1203,12 @@
 /* Within XferNotReady */
 #define DEPEVT_STATUS_TRANSFER_ACTIVE	BIT(3)
 
-/* Within XferComplete */
+/* Within XferComplete or XferInProgress */
 #define DEPEVT_STATUS_BUSERR	BIT(0)
 #define DEPEVT_STATUS_SHORT	BIT(1)
 #define DEPEVT_STATUS_IOC	BIT(2)
-#define DEPEVT_STATUS_LST	BIT(3)
+#define DEPEVT_STATUS_LST	BIT(3) /* XferComplete */
+#define DEPEVT_STATUS_MISSED_ISOC BIT(3) /* XferInProgress */
 
 /* Stream event only */
 #define DEPEVT_STREAMEVT_FOUND		1
diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h
index bfb90c5..c66d216 100644
--- a/drivers/usb/dwc3/debug.h
+++ b/drivers/usb/dwc3/debug.h
@@ -475,21 +475,37 @@
 	if (ret < 0)
 		return "UNKNOWN";
 
+	status = event->status;
+
 	switch (event->endpoint_event) {
 	case DWC3_DEPEVT_XFERCOMPLETE:
-		strcat(str, "Transfer Complete");
+		len = strlen(str);
+		sprintf(str + len, "Transfer Complete (%c%c%c)",
+				status & DEPEVT_STATUS_SHORT ? 'S' : 's',
+				status & DEPEVT_STATUS_IOC ? 'I' : 'i',
+				status & DEPEVT_STATUS_LST ? 'L' : 'l');
+
 		len = strlen(str);
 
 		if (epnum <= 1)
 			sprintf(str + len, " [%s]", dwc3_ep0_state_string(ep0state));
 		break;
 	case DWC3_DEPEVT_XFERINPROGRESS:
-		strcat(str, "Transfer In-Progress");
+		len = strlen(str);
+
+		sprintf(str + len, "Transfer In Progress [%d] (%c%c%c)",
+				event->parameters,
+				status & DEPEVT_STATUS_SHORT ? 'S' : 's',
+				status & DEPEVT_STATUS_IOC ? 'I' : 'i',
+				status & DEPEVT_STATUS_LST ? 'M' : 'm');
 		break;
 	case DWC3_DEPEVT_XFERNOTREADY:
-		strcat(str, "Transfer Not Ready");
-		status = event->status & DEPEVT_STATUS_TRANSFER_ACTIVE;
-		strcat(str, status ? " (Active)" : " (Not Active)");
+		len = strlen(str);
+
+		sprintf(str + len, "Transfer Not Ready [%d]%s",
+				event->parameters,
+				status & DEPEVT_STATUS_TRANSFER_ACTIVE ?
+				" (Active)" : " (Not Active)");
 
 		/* Control Endpoints */
 		if (epnum <= 1) {
diff --git a/drivers/usb/dwc3/debugfs.c b/drivers/usb/dwc3/debugfs.c
index 2f07be1..df8e73e 100644
--- a/drivers/usb/dwc3/debugfs.c
+++ b/drivers/usb/dwc3/debugfs.c
@@ -716,9 +716,6 @@
 	struct dentry		*dir;
 
 	dir = debugfs_create_dir(dep->name, parent);
-	if (IS_ERR_OR_NULL(dir))
-		return;
-
 	dwc3_debugfs_create_endpoint_files(dep, dir);
 }
 
@@ -740,49 +737,31 @@
 void dwc3_debugfs_init(struct dwc3 *dwc)
 {
 	struct dentry		*root;
-	struct dentry           *file;
-
-	root = debugfs_create_dir(dev_name(dwc->dev), NULL);
-	if (IS_ERR_OR_NULL(root)) {
-		if (!root)
-			dev_err(dwc->dev, "Can't create debugfs root\n");
-		return;
-	}
-	dwc->root = root;
 
 	dwc->regset = kzalloc(sizeof(*dwc->regset), GFP_KERNEL);
-	if (!dwc->regset) {
-		debugfs_remove_recursive(root);
+	if (!dwc->regset)
 		return;
-	}
 
 	dwc->regset->regs = dwc3_regs;
 	dwc->regset->nregs = ARRAY_SIZE(dwc3_regs);
 	dwc->regset->base = dwc->regs - DWC3_GLOBALS_REGS_START;
 
-	file = debugfs_create_regset32("regdump", S_IRUGO, root, dwc->regset);
-	if (!file)
-		dev_dbg(dwc->dev, "Can't create debugfs regdump\n");
+	root = debugfs_create_dir(dev_name(dwc->dev), NULL);
+	dwc->root = root;
+
+	debugfs_create_regset32("regdump", S_IRUGO, root, dwc->regset);
 
 	if (IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE)) {
-		file = debugfs_create_file("mode", S_IRUGO | S_IWUSR, root,
-				dwc, &dwc3_mode_fops);
-		if (!file)
-			dev_dbg(dwc->dev, "Can't create debugfs mode\n");
+		debugfs_create_file("mode", S_IRUGO | S_IWUSR, root, dwc,
+				    &dwc3_mode_fops);
 	}
 
 	if (IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE) ||
 			IS_ENABLED(CONFIG_USB_DWC3_GADGET)) {
-		file = debugfs_create_file("testmode", S_IRUGO | S_IWUSR, root,
-				dwc, &dwc3_testmode_fops);
-		if (!file)
-			dev_dbg(dwc->dev, "Can't create debugfs testmode\n");
-
-		file = debugfs_create_file("link_state", S_IRUGO | S_IWUSR,
-				root, dwc, &dwc3_link_state_fops);
-		if (!file)
-			dev_dbg(dwc->dev, "Can't create debugfs link_state\n");
-
+		debugfs_create_file("testmode", S_IRUGO | S_IWUSR, root, dwc,
+				    &dwc3_testmode_fops);
+		debugfs_create_file("link_state", S_IRUGO | S_IWUSR, root, dwc,
+				    &dwc3_link_state_fops);
 		dwc3_debugfs_create_endpoint_dirs(dwc, root);
 	}
 }
diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c
index 1d8c557..218371f 100644
--- a/drivers/usb/dwc3/drd.c
+++ b/drivers/usb/dwc3/drd.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/extcon.h>
+#include <linux/of_graph.h>
 #include <linux/platform_device.h>
 
 #include "debug.h"
@@ -439,17 +440,38 @@
 	return NOTIFY_DONE;
 }
 
+static struct extcon_dev *dwc3_get_extcon(struct dwc3 *dwc)
+{
+	struct device *dev = dwc->dev;
+	struct device_node *np_phy, *np_conn;
+	struct extcon_dev *edev;
+
+	if (of_property_read_bool(dev->of_node, "extcon"))
+		return extcon_get_edev_by_phandle(dwc->dev, 0);
+
+	np_phy = of_parse_phandle(dev->of_node, "phys", 0);
+	np_conn = of_graph_get_remote_node(np_phy, -1, -1);
+
+	if (np_conn)
+		edev = extcon_find_edev_by_node(np_conn);
+	else
+		edev = NULL;
+
+	of_node_put(np_conn);
+	of_node_put(np_phy);
+
+	return edev;
+}
+
 int dwc3_drd_init(struct dwc3 *dwc)
 {
 	int ret, irq;
 
-	if (dwc->dev->of_node &&
-	    of_property_read_bool(dwc->dev->of_node, "extcon")) {
-		dwc->edev = extcon_get_edev_by_phandle(dwc->dev, 0);
+	dwc->edev = dwc3_get_extcon(dwc);
+	if (IS_ERR(dwc->edev))
+		return PTR_ERR(dwc->edev);
 
-		if (IS_ERR(dwc->edev))
-			return PTR_ERR(dwc->edev);
-
+	if (dwc->edev) {
 		dwc->edev_nb.notifier_call = dwc3_drd_notifier;
 		ret = extcon_register_notifier(dwc->edev, EXTCON_USB_HOST,
 					       &dwc->edev_nb);
diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c
index cb2ee96..6b3ccd5 100644
--- a/drivers/usb/dwc3/dwc3-of-simple.c
+++ b/drivers/usb/dwc3/dwc3-of-simple.c
@@ -208,13 +208,13 @@
 };
 
 static const struct of_device_id of_dwc3_simple_match[] = {
-	{ .compatible = "qcom,dwc3" },
 	{ .compatible = "rockchip,rk3399-dwc3" },
 	{ .compatible = "xlnx,zynqmp-dwc3" },
 	{ .compatible = "cavium,octeon-7130-usb-uctl" },
 	{ .compatible = "sprd,sc9860-dwc3" },
 	{ .compatible = "amlogic,meson-axg-dwc3" },
 	{ .compatible = "amlogic,meson-gxl-dwc3" },
+	{ .compatible = "allwinner,sun50i-h6-dwc3" },
 	{ /* Sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, of_dwc3_simple_match);
diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c
new file mode 100644
index 0000000..b0e67ab
--- /dev/null
+++ b/drivers/usb/dwc3/dwc3-qcom.c
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ *
+ * Inspired by dwc3-of-simple.c
+ */
+
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/extcon.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/phy/phy.h>
+#include <linux/usb/of.h>
+#include <linux/reset.h>
+#include <linux/iopoll.h>
+
+#include "core.h"
+
+/* USB QSCRATCH Hardware registers */
+#define QSCRATCH_HS_PHY_CTRL			0x10
+#define UTMI_OTG_VBUS_VALID			BIT(20)
+#define SW_SESSVLD_SEL				BIT(28)
+
+#define QSCRATCH_SS_PHY_CTRL			0x30
+#define LANE0_PWR_PRESENT			BIT(24)
+
+#define QSCRATCH_GENERAL_CFG			0x08
+#define PIPE_UTMI_CLK_SEL			BIT(0)
+#define PIPE3_PHYSTATUS_SW			BIT(3)
+#define PIPE_UTMI_CLK_DIS			BIT(8)
+
+#define PWR_EVNT_IRQ_STAT_REG			0x58
+#define PWR_EVNT_LPM_IN_L2_MASK			BIT(4)
+#define PWR_EVNT_LPM_OUT_L2_MASK		BIT(5)
+
+struct dwc3_qcom {
+	struct device		*dev;
+	void __iomem		*qscratch_base;
+	struct platform_device	*dwc3;
+	struct clk		**clks;
+	int			num_clocks;
+	struct reset_control	*resets;
+
+	int			hs_phy_irq;
+	int			dp_hs_phy_irq;
+	int			dm_hs_phy_irq;
+	int			ss_phy_irq;
+
+	struct extcon_dev	*edev;
+	struct extcon_dev	*host_edev;
+	struct notifier_block	vbus_nb;
+	struct notifier_block	host_nb;
+
+	enum usb_dr_mode	mode;
+	bool			is_suspended;
+	bool			pm_suspended;
+};
+
+static inline void dwc3_qcom_setbits(void __iomem *base, u32 offset, u32 val)
+{
+	u32 reg;
+
+	reg = readl(base + offset);
+	reg |= val;
+	writel(reg, base + offset);
+
+	/* ensure that above write is through */
+	readl(base + offset);
+}
+
+static inline void dwc3_qcom_clrbits(void __iomem *base, u32 offset, u32 val)
+{
+	u32 reg;
+
+	reg = readl(base + offset);
+	reg &= ~val;
+	writel(reg, base + offset);
+
+	/* ensure that above write is through */
+	readl(base + offset);
+}
+
+static void dwc3_qcom_vbus_overrride_enable(struct dwc3_qcom *qcom, bool enable)
+{
+	if (enable) {
+		dwc3_qcom_setbits(qcom->qscratch_base, QSCRATCH_SS_PHY_CTRL,
+				  LANE0_PWR_PRESENT);
+		dwc3_qcom_setbits(qcom->qscratch_base, QSCRATCH_HS_PHY_CTRL,
+				  UTMI_OTG_VBUS_VALID | SW_SESSVLD_SEL);
+	} else {
+		dwc3_qcom_clrbits(qcom->qscratch_base, QSCRATCH_SS_PHY_CTRL,
+				  LANE0_PWR_PRESENT);
+		dwc3_qcom_clrbits(qcom->qscratch_base, QSCRATCH_HS_PHY_CTRL,
+				  UTMI_OTG_VBUS_VALID | SW_SESSVLD_SEL);
+	}
+}
+
+static int dwc3_qcom_vbus_notifier(struct notifier_block *nb,
+				   unsigned long event, void *ptr)
+{
+	struct dwc3_qcom *qcom = container_of(nb, struct dwc3_qcom, vbus_nb);
+
+	/* enable vbus override for device mode */
+	dwc3_qcom_vbus_overrride_enable(qcom, event);
+	qcom->mode = event ? USB_DR_MODE_PERIPHERAL : USB_DR_MODE_HOST;
+
+	return NOTIFY_DONE;
+}
+
+static int dwc3_qcom_host_notifier(struct notifier_block *nb,
+				   unsigned long event, void *ptr)
+{
+	struct dwc3_qcom *qcom = container_of(nb, struct dwc3_qcom, host_nb);
+
+	/* disable vbus override in host mode */
+	dwc3_qcom_vbus_overrride_enable(qcom, !event);
+	qcom->mode = event ? USB_DR_MODE_HOST : USB_DR_MODE_PERIPHERAL;
+
+	return NOTIFY_DONE;
+}
+
+static int dwc3_qcom_register_extcon(struct dwc3_qcom *qcom)
+{
+	struct device		*dev = qcom->dev;
+	struct extcon_dev	*host_edev;
+	int			ret;
+
+	if (!of_property_read_bool(dev->of_node, "extcon"))
+		return 0;
+
+	qcom->edev = extcon_get_edev_by_phandle(dev, 0);
+	if (IS_ERR(qcom->edev))
+		return PTR_ERR(qcom->edev);
+
+	qcom->vbus_nb.notifier_call = dwc3_qcom_vbus_notifier;
+
+	qcom->host_edev = extcon_get_edev_by_phandle(dev, 1);
+	if (IS_ERR(qcom->host_edev))
+		qcom->host_edev = NULL;
+
+	ret = devm_extcon_register_notifier(dev, qcom->edev, EXTCON_USB,
+					    &qcom->vbus_nb);
+	if (ret < 0) {
+		dev_err(dev, "VBUS notifier register failed\n");
+		return ret;
+	}
+
+	if (qcom->host_edev)
+		host_edev = qcom->host_edev;
+	else
+		host_edev = qcom->edev;
+
+	qcom->host_nb.notifier_call = dwc3_qcom_host_notifier;
+	ret = devm_extcon_register_notifier(dev, host_edev, EXTCON_USB_HOST,
+					    &qcom->host_nb);
+	if (ret < 0) {
+		dev_err(dev, "Host notifier register failed\n");
+		return ret;
+	}
+
+	/* Update initial VBUS override based on extcon state */
+	if (extcon_get_state(qcom->edev, EXTCON_USB) ||
+	    !extcon_get_state(host_edev, EXTCON_USB_HOST))
+		dwc3_qcom_vbus_notifier(&qcom->vbus_nb, true, qcom->edev);
+	else
+		dwc3_qcom_vbus_notifier(&qcom->vbus_nb, false, qcom->edev);
+
+	return 0;
+}
+
+static void dwc3_qcom_disable_interrupts(struct dwc3_qcom *qcom)
+{
+	if (qcom->hs_phy_irq) {
+		disable_irq_wake(qcom->hs_phy_irq);
+		disable_irq_nosync(qcom->hs_phy_irq);
+	}
+
+	if (qcom->dp_hs_phy_irq) {
+		disable_irq_wake(qcom->dp_hs_phy_irq);
+		disable_irq_nosync(qcom->dp_hs_phy_irq);
+	}
+
+	if (qcom->dm_hs_phy_irq) {
+		disable_irq_wake(qcom->dm_hs_phy_irq);
+		disable_irq_nosync(qcom->dm_hs_phy_irq);
+	}
+
+	if (qcom->ss_phy_irq) {
+		disable_irq_wake(qcom->ss_phy_irq);
+		disable_irq_nosync(qcom->ss_phy_irq);
+	}
+}
+
+static void dwc3_qcom_enable_interrupts(struct dwc3_qcom *qcom)
+{
+	if (qcom->hs_phy_irq) {
+		enable_irq(qcom->hs_phy_irq);
+		enable_irq_wake(qcom->hs_phy_irq);
+	}
+
+	if (qcom->dp_hs_phy_irq) {
+		enable_irq(qcom->dp_hs_phy_irq);
+		enable_irq_wake(qcom->dp_hs_phy_irq);
+	}
+
+	if (qcom->dm_hs_phy_irq) {
+		enable_irq(qcom->dm_hs_phy_irq);
+		enable_irq_wake(qcom->dm_hs_phy_irq);
+	}
+
+	if (qcom->ss_phy_irq) {
+		enable_irq(qcom->ss_phy_irq);
+		enable_irq_wake(qcom->ss_phy_irq);
+	}
+}
+
+static int dwc3_qcom_suspend(struct dwc3_qcom *qcom)
+{
+	u32 val;
+	int i;
+
+	if (qcom->is_suspended)
+		return 0;
+
+	val = readl(qcom->qscratch_base + PWR_EVNT_IRQ_STAT_REG);
+	if (!(val & PWR_EVNT_LPM_IN_L2_MASK))
+		dev_err(qcom->dev, "HS-PHY not in L2\n");
+
+	for (i = qcom->num_clocks - 1; i >= 0; i--)
+		clk_disable_unprepare(qcom->clks[i]);
+
+	qcom->is_suspended = true;
+	dwc3_qcom_enable_interrupts(qcom);
+
+	return 0;
+}
+
+static int dwc3_qcom_resume(struct dwc3_qcom *qcom)
+{
+	int ret;
+	int i;
+
+	if (!qcom->is_suspended)
+		return 0;
+
+	dwc3_qcom_disable_interrupts(qcom);
+
+	for (i = 0; i < qcom->num_clocks; i++) {
+		ret = clk_prepare_enable(qcom->clks[i]);
+		if (ret < 0) {
+			while (--i >= 0)
+				clk_disable_unprepare(qcom->clks[i]);
+			return ret;
+		}
+	}
+
+	/* Clear existing events from PHY related to L2 in/out */
+	dwc3_qcom_setbits(qcom->qscratch_base, PWR_EVNT_IRQ_STAT_REG,
+			  PWR_EVNT_LPM_IN_L2_MASK | PWR_EVNT_LPM_OUT_L2_MASK);
+
+	qcom->is_suspended = false;
+
+	return 0;
+}
+
+static irqreturn_t qcom_dwc3_resume_irq(int irq, void *data)
+{
+	struct dwc3_qcom *qcom = data;
+	struct dwc3	*dwc = platform_get_drvdata(qcom->dwc3);
+
+	/* If pm_suspended then let pm_resume take care of resuming h/w */
+	if (qcom->pm_suspended)
+		return IRQ_HANDLED;
+
+	if (dwc->xhci)
+		pm_runtime_resume(&dwc->xhci->dev);
+
+	return IRQ_HANDLED;
+}
+
+static void dwc3_qcom_select_utmi_clk(struct dwc3_qcom *qcom)
+{
+	/* Configure dwc3 to use UTMI clock as PIPE clock not present */
+	dwc3_qcom_setbits(qcom->qscratch_base, QSCRATCH_GENERAL_CFG,
+			  PIPE_UTMI_CLK_DIS);
+
+	usleep_range(100, 1000);
+
+	dwc3_qcom_setbits(qcom->qscratch_base, QSCRATCH_GENERAL_CFG,
+			  PIPE_UTMI_CLK_SEL | PIPE3_PHYSTATUS_SW);
+
+	usleep_range(100, 1000);
+
+	dwc3_qcom_clrbits(qcom->qscratch_base, QSCRATCH_GENERAL_CFG,
+			  PIPE_UTMI_CLK_DIS);
+}
+
+static int dwc3_qcom_setup_irq(struct platform_device *pdev)
+{
+	struct dwc3_qcom *qcom = platform_get_drvdata(pdev);
+	int irq, ret;
+
+	irq = platform_get_irq_byname(pdev, "hs_phy_irq");
+	if (irq > 0) {
+		/* Keep wakeup interrupts disabled until suspend */
+		irq_set_status_flags(irq, IRQ_NOAUTOEN);
+		ret = devm_request_threaded_irq(qcom->dev, irq, NULL,
+					qcom_dwc3_resume_irq,
+					IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+					"qcom_dwc3 HS", qcom);
+		if (ret) {
+			dev_err(qcom->dev, "hs_phy_irq failed: %d\n", ret);
+			return ret;
+		}
+		qcom->hs_phy_irq = irq;
+	}
+
+	irq = platform_get_irq_byname(pdev, "dp_hs_phy_irq");
+	if (irq > 0) {
+		irq_set_status_flags(irq, IRQ_NOAUTOEN);
+		ret = devm_request_threaded_irq(qcom->dev, irq, NULL,
+					qcom_dwc3_resume_irq,
+					IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+					"qcom_dwc3 DP_HS", qcom);
+		if (ret) {
+			dev_err(qcom->dev, "dp_hs_phy_irq failed: %d\n", ret);
+			return ret;
+		}
+		qcom->dp_hs_phy_irq = irq;
+	}
+
+	irq = platform_get_irq_byname(pdev, "dm_hs_phy_irq");
+	if (irq > 0) {
+		irq_set_status_flags(irq, IRQ_NOAUTOEN);
+		ret = devm_request_threaded_irq(qcom->dev, irq, NULL,
+					qcom_dwc3_resume_irq,
+					IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+					"qcom_dwc3 DM_HS", qcom);
+		if (ret) {
+			dev_err(qcom->dev, "dm_hs_phy_irq failed: %d\n", ret);
+			return ret;
+		}
+		qcom->dm_hs_phy_irq = irq;
+	}
+
+	irq = platform_get_irq_byname(pdev, "ss_phy_irq");
+	if (irq > 0) {
+		irq_set_status_flags(irq, IRQ_NOAUTOEN);
+		ret = devm_request_threaded_irq(qcom->dev, irq, NULL,
+					qcom_dwc3_resume_irq,
+					IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+					"qcom_dwc3 SS", qcom);
+		if (ret) {
+			dev_err(qcom->dev, "ss_phy_irq failed: %d\n", ret);
+			return ret;
+		}
+		qcom->ss_phy_irq = irq;
+	}
+
+	return 0;
+}
+
+static int dwc3_qcom_clk_init(struct dwc3_qcom *qcom, int count)
+{
+	struct device		*dev = qcom->dev;
+	struct device_node	*np = dev->of_node;
+	int			i;
+
+	qcom->num_clocks = count;
+
+	if (!count)
+		return 0;
+
+	qcom->clks = devm_kcalloc(dev, qcom->num_clocks,
+				  sizeof(struct clk *), GFP_KERNEL);
+	if (!qcom->clks)
+		return -ENOMEM;
+
+	for (i = 0; i < qcom->num_clocks; i++) {
+		struct clk	*clk;
+		int		ret;
+
+		clk = of_clk_get(np, i);
+		if (IS_ERR(clk)) {
+			while (--i >= 0)
+				clk_put(qcom->clks[i]);
+			return PTR_ERR(clk);
+		}
+
+		ret = clk_prepare_enable(clk);
+		if (ret < 0) {
+			while (--i >= 0) {
+				clk_disable_unprepare(qcom->clks[i]);
+				clk_put(qcom->clks[i]);
+			}
+			clk_put(clk);
+
+			return ret;
+		}
+
+		qcom->clks[i] = clk;
+	}
+
+	return 0;
+}
+
+static int dwc3_qcom_probe(struct platform_device *pdev)
+{
+	struct device_node	*np = pdev->dev.of_node, *dwc3_np;
+	struct device		*dev = &pdev->dev;
+	struct dwc3_qcom	*qcom;
+	struct resource		*res;
+	int			ret, i;
+	bool			ignore_pipe_clk;
+
+	qcom = devm_kzalloc(&pdev->dev, sizeof(*qcom), GFP_KERNEL);
+	if (!qcom)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, qcom);
+	qcom->dev = &pdev->dev;
+
+	qcom->resets = devm_reset_control_array_get_optional_exclusive(dev);
+	if (IS_ERR(qcom->resets)) {
+		ret = PTR_ERR(qcom->resets);
+		dev_err(&pdev->dev, "failed to get resets, err=%d\n", ret);
+		return ret;
+	}
+
+	ret = reset_control_assert(qcom->resets);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to assert resets, err=%d\n", ret);
+		return ret;
+	}
+
+	usleep_range(10, 1000);
+
+	ret = reset_control_deassert(qcom->resets);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to deassert resets, err=%d\n", ret);
+		goto reset_assert;
+	}
+
+	ret = dwc3_qcom_clk_init(qcom, of_count_phandle_with_args(np,
+						"clocks", "#clock-cells"));
+	if (ret) {
+		dev_err(dev, "failed to get clocks\n");
+		goto reset_assert;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	qcom->qscratch_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(qcom->qscratch_base)) {
+		dev_err(dev, "failed to map qscratch, err=%d\n", ret);
+		ret = PTR_ERR(qcom->qscratch_base);
+		goto clk_disable;
+	}
+
+	ret = dwc3_qcom_setup_irq(pdev);
+	if (ret)
+		goto clk_disable;
+
+	dwc3_np = of_get_child_by_name(np, "dwc3");
+	if (!dwc3_np) {
+		dev_err(dev, "failed to find dwc3 core child\n");
+		ret = -ENODEV;
+		goto clk_disable;
+	}
+
+	/*
+	 * Disable pipe_clk requirement if specified. Used when dwc3
+	 * operates without SSPHY and only HS/FS/LS modes are supported.
+	 */
+	ignore_pipe_clk = device_property_read_bool(dev,
+				"qcom,select-utmi-as-pipe-clk");
+	if (ignore_pipe_clk)
+		dwc3_qcom_select_utmi_clk(qcom);
+
+	ret = of_platform_populate(np, NULL, NULL, dev);
+	if (ret) {
+		dev_err(dev, "failed to register dwc3 core - %d\n", ret);
+		goto clk_disable;
+	}
+
+	qcom->dwc3 = of_find_device_by_node(dwc3_np);
+	if (!qcom->dwc3) {
+		dev_err(&pdev->dev, "failed to get dwc3 platform device\n");
+		goto depopulate;
+	}
+
+	qcom->mode = usb_get_dr_mode(&qcom->dwc3->dev);
+
+	/* enable vbus override for device mode */
+	if (qcom->mode == USB_DR_MODE_PERIPHERAL)
+		dwc3_qcom_vbus_overrride_enable(qcom, true);
+
+	/* register extcon to override sw_vbus on Vbus change later */
+	ret = dwc3_qcom_register_extcon(qcom);
+	if (ret)
+		goto depopulate;
+
+	device_init_wakeup(&pdev->dev, 1);
+	qcom->is_suspended = false;
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+	pm_runtime_forbid(dev);
+
+	return 0;
+
+depopulate:
+	of_platform_depopulate(&pdev->dev);
+clk_disable:
+	for (i = qcom->num_clocks - 1; i >= 0; i--) {
+		clk_disable_unprepare(qcom->clks[i]);
+		clk_put(qcom->clks[i]);
+	}
+reset_assert:
+	reset_control_assert(qcom->resets);
+
+	return ret;
+}
+
+static int dwc3_qcom_remove(struct platform_device *pdev)
+{
+	struct dwc3_qcom *qcom = platform_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+	int i;
+
+	of_platform_depopulate(dev);
+
+	for (i = qcom->num_clocks - 1; i >= 0; i--) {
+		clk_disable_unprepare(qcom->clks[i]);
+		clk_put(qcom->clks[i]);
+	}
+	qcom->num_clocks = 0;
+
+	reset_control_assert(qcom->resets);
+
+	pm_runtime_allow(dev);
+	pm_runtime_disable(dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int dwc3_qcom_pm_suspend(struct device *dev)
+{
+	struct dwc3_qcom *qcom = dev_get_drvdata(dev);
+	int ret = 0;
+
+	ret = dwc3_qcom_suspend(qcom);
+	if (!ret)
+		qcom->pm_suspended = true;
+
+	return ret;
+}
+
+static int dwc3_qcom_pm_resume(struct device *dev)
+{
+	struct dwc3_qcom *qcom = dev_get_drvdata(dev);
+	int ret;
+
+	ret = dwc3_qcom_resume(qcom);
+	if (!ret)
+		qcom->pm_suspended = false;
+
+	return ret;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int dwc3_qcom_runtime_suspend(struct device *dev)
+{
+	struct dwc3_qcom *qcom = dev_get_drvdata(dev);
+
+	return dwc3_qcom_suspend(qcom);
+}
+
+static int dwc3_qcom_runtime_resume(struct device *dev)
+{
+	struct dwc3_qcom *qcom = dev_get_drvdata(dev);
+
+	return dwc3_qcom_resume(qcom);
+}
+#endif
+
+static const struct dev_pm_ops dwc3_qcom_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(dwc3_qcom_pm_suspend, dwc3_qcom_pm_resume)
+	SET_RUNTIME_PM_OPS(dwc3_qcom_runtime_suspend, dwc3_qcom_runtime_resume,
+			   NULL)
+};
+
+static const struct of_device_id dwc3_qcom_of_match[] = {
+	{ .compatible = "qcom,dwc3" },
+	{ .compatible = "qcom,msm8996-dwc3" },
+	{ .compatible = "qcom,sdm845-dwc3" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, dwc3_qcom_of_match);
+
+static struct platform_driver dwc3_qcom_driver = {
+	.probe		= dwc3_qcom_probe,
+	.remove		= dwc3_qcom_remove,
+	.driver		= {
+		.name	= "dwc3-qcom",
+		.pm	= &dwc3_qcom_dev_pm_ops,
+		.of_match_table	= dwc3_qcom_of_match,
+	},
+};
+
+module_platform_driver(dwc3_qcom_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DesignWare DWC3 QCOM Glue Driver");
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 5a991bc..c77ff50 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -66,7 +66,7 @@
 	struct dwc3			*dwc;
 	int				ret;
 
-	if (dep->flags & DWC3_EP_BUSY)
+	if (dep->flags & DWC3_EP_TRANSFER_STARTED)
 		return 0;
 
 	dwc = dep->dwc;
@@ -79,8 +79,6 @@
 	if (ret < 0)
 		return ret;
 
-	dep->flags |= DWC3_EP_BUSY;
-	dep->resource_index = dwc3_gadget_ep_get_transfer_index(dep);
 	dwc->ep0_next_event = DWC3_EP0_COMPLETE;
 
 	return 0;
@@ -913,7 +911,7 @@
 {
 	struct dwc3_ep		*dep = dwc->eps[event->endpoint_number];
 
-	dep->flags &= ~DWC3_EP_BUSY;
+	dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
 	dep->resource_index = 0;
 	dwc->setup_packet_pending = false;
 
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 0dedf8a..69bf137 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -27,6 +27,9 @@
 #include "gadget.h"
 #include "io.h"
 
+#define DWC3_ALIGN_FRAME(d)	(((d)->frame_number + (d)->interval) \
+					& ~((d)->interval - 1))
+
 /**
  * dwc3_gadget_set_test_mode - enables usb2 test modes
  * @dwc: pointer to our context structure
@@ -375,6 +378,7 @@
 		switch (DWC3_DEPCMD_CMD(cmd)) {
 		case DWC3_DEPCMD_STARTTRANSFER:
 			dep->flags |= DWC3_EP_TRANSFER_STARTED;
+			dwc3_gadget_ep_get_transfer_index(dep);
 			break;
 		case DWC3_DEPCMD_ENDTRANSFER:
 			dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
@@ -455,7 +459,17 @@
 	dep->trb_pool_dma = 0;
 }
 
-static int dwc3_gadget_set_xfer_resource(struct dwc3 *dwc, struct dwc3_ep *dep);
+static int dwc3_gadget_set_xfer_resource(struct dwc3_ep *dep)
+{
+	struct dwc3_gadget_ep_cmd_params params;
+
+	memset(&params, 0x00, sizeof(params));
+
+	params.param0 = DWC3_DEPXFERCFG_NUM_XFER_RES(1);
+
+	return dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_SETTRANSFRESOURCE,
+			&params);
+}
 
 /**
  * dwc3_gadget_start_config - configure ep resources
@@ -491,9 +505,10 @@
  * triggered only when called for EP0-out, which always happens first, and which
  * should only happen in one of the above conditions.
  */
-static int dwc3_gadget_start_config(struct dwc3 *dwc, struct dwc3_ep *dep)
+static int dwc3_gadget_start_config(struct dwc3_ep *dep)
 {
 	struct dwc3_gadget_ep_cmd_params params;
+	struct dwc3		*dwc;
 	u32			cmd;
 	int			i;
 	int			ret;
@@ -503,6 +518,7 @@
 
 	memset(&params, 0x00, sizeof(params));
 	cmd = DWC3_DEPCMD_DEPSTARTCFG;
+	dwc = dep->dwc;
 
 	ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
 	if (ret)
@@ -514,7 +530,7 @@
 		if (!dep)
 			continue;
 
-		ret = dwc3_gadget_set_xfer_resource(dwc, dep);
+		ret = dwc3_gadget_set_xfer_resource(dep);
 		if (ret)
 			return ret;
 	}
@@ -522,16 +538,12 @@
 	return 0;
 }
 
-static int dwc3_gadget_set_ep_config(struct dwc3 *dwc, struct dwc3_ep *dep,
-		bool modify, bool restore)
+static int dwc3_gadget_set_ep_config(struct dwc3_ep *dep, unsigned int action)
 {
 	const struct usb_ss_ep_comp_descriptor *comp_desc;
 	const struct usb_endpoint_descriptor *desc;
 	struct dwc3_gadget_ep_cmd_params params;
-
-	if (dev_WARN_ONCE(dwc->dev, modify && restore,
-					"Can't modify and restore\n"))
-		return -EINVAL;
+	struct dwc3 *dwc = dep->dwc;
 
 	comp_desc = dep->endpoint.comp_desc;
 	desc = dep->endpoint.desc;
@@ -547,14 +559,9 @@
 		params.param0 |= DWC3_DEPCFG_BURST_SIZE(burst - 1);
 	}
 
-	if (modify) {
-		params.param0 |= DWC3_DEPCFG_ACTION_MODIFY;
-	} else if (restore) {
-		params.param0 |= DWC3_DEPCFG_ACTION_RESTORE;
+	params.param0 |= action;
+	if (action == DWC3_DEPCFG_ACTION_RESTORE)
 		params.param2 |= dep->saved_state;
-	} else {
-		params.param0 |= DWC3_DEPCFG_ACTION_INIT;
-	}
 
 	if (usb_endpoint_xfer_control(desc))
 		params.param1 = DWC3_DEPCFG_XFER_COMPLETE_EN;
@@ -594,29 +601,15 @@
 	return dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_SETEPCONFIG, &params);
 }
 
-static int dwc3_gadget_set_xfer_resource(struct dwc3 *dwc, struct dwc3_ep *dep)
-{
-	struct dwc3_gadget_ep_cmd_params params;
-
-	memset(&params, 0x00, sizeof(params));
-
-	params.param0 = DWC3_DEPXFERCFG_NUM_XFER_RES(1);
-
-	return dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_SETTRANSFRESOURCE,
-			&params);
-}
-
 /**
  * __dwc3_gadget_ep_enable - initializes a hw endpoint
  * @dep: endpoint to be initialized
- * @modify: if true, modify existing endpoint configuration
- * @restore: if true, restore endpoint configuration from scratch buffer
+ * @action: one of INIT, MODIFY or RESTORE
  *
  * Caller should take care of locking. Execute all necessary commands to
  * initialize a HW endpoint so it can be used by a gadget driver.
  */
-static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
-		bool modify, bool restore)
+static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep, unsigned int action)
 {
 	const struct usb_endpoint_descriptor *desc = dep->endpoint.desc;
 	struct dwc3		*dwc = dep->dwc;
@@ -625,12 +618,12 @@
 	int			ret;
 
 	if (!(dep->flags & DWC3_EP_ENABLED)) {
-		ret = dwc3_gadget_start_config(dwc, dep);
+		ret = dwc3_gadget_start_config(dep);
 		if (ret)
 			return ret;
 	}
 
-	ret = dwc3_gadget_set_ep_config(dwc, dep, modify, restore);
+	ret = dwc3_gadget_set_ep_config(dep, action);
 	if (ret)
 		return ret;
 
@@ -671,7 +664,8 @@
 	 * Issue StartTransfer here with no-op TRB so we can always rely on No
 	 * Response Update Transfer command.
 	 */
-	if (usb_endpoint_xfer_bulk(desc)) {
+	if (usb_endpoint_xfer_bulk(desc) ||
+			usb_endpoint_xfer_int(desc)) {
 		struct dwc3_gadget_ep_cmd_params params;
 		struct dwc3_trb	*trb;
 		dma_addr_t trb_dma;
@@ -689,26 +683,20 @@
 		ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
 		if (ret < 0)
 			return ret;
-
-		dep->flags |= DWC3_EP_BUSY;
-
-		dep->resource_index = dwc3_gadget_ep_get_transfer_index(dep);
-		WARN_ON_ONCE(!dep->resource_index);
 	}
 
-
 out:
 	trace_dwc3_gadget_ep_enable(dep);
 
 	return 0;
 }
 
-static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force);
+static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force);
 static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep)
 {
 	struct dwc3_request		*req;
 
-	dwc3_stop_active_transfer(dwc, dep->number, true);
+	dwc3_stop_active_transfer(dep, true);
 
 	/* - giveback all requests to gadget driver */
 	while (!list_empty(&dep->started_list)) {
@@ -806,7 +794,7 @@
 		return 0;
 
 	spin_lock_irqsave(&dwc->lock, flags);
-	ret = __dwc3_gadget_ep_enable(dep, false, false);
+	ret = __dwc3_gadget_ep_enable(dep, DWC3_DEPCFG_ACTION_INIT);
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	return ret;
@@ -840,7 +828,7 @@
 }
 
 static struct usb_request *dwc3_gadget_ep_alloc_request(struct usb_ep *ep,
-	gfp_t gfp_flags)
+		gfp_t gfp_flags)
 {
 	struct dwc3_request		*req;
 	struct dwc3_ep			*dep = to_dwc3_ep(ep);
@@ -849,11 +837,10 @@
 	if (!req)
 		return NULL;
 
+	req->direction	= dep->direction;
 	req->epnum	= dep->number;
 	req->dep	= dep;
 
-	dep->allocated_requests++;
-
 	trace_dwc3_alloc_request(req);
 
 	return &req->request;
@@ -863,14 +850,58 @@
 		struct usb_request *request)
 {
 	struct dwc3_request		*req = to_dwc3_request(request);
-	struct dwc3_ep			*dep = to_dwc3_ep(ep);
 
-	dep->allocated_requests--;
 	trace_dwc3_free_request(req);
 	kfree(req);
 }
 
-static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep);
+/**
+ * dwc3_ep_prev_trb - returns the previous TRB in the ring
+ * @dep: The endpoint with the TRB ring
+ * @index: The index of the current TRB in the ring
+ *
+ * Returns the TRB prior to the one pointed to by the index. If the
+ * index is 0, we will wrap backwards, skip the link TRB, and return
+ * the one just before that.
+ */
+static struct dwc3_trb *dwc3_ep_prev_trb(struct dwc3_ep *dep, u8 index)
+{
+	u8 tmp = index;
+
+	if (!tmp)
+		tmp = DWC3_TRB_NUM - 1;
+
+	return &dep->trb_pool[tmp - 1];
+}
+
+static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep)
+{
+	struct dwc3_trb		*tmp;
+	u8			trbs_left;
+
+	/*
+	 * If enqueue & dequeue are equal than it is either full or empty.
+	 *
+	 * One way to know for sure is if the TRB right before us has HWO bit
+	 * set or not. If it has, then we're definitely full and can't fit any
+	 * more transfers in our ring.
+	 */
+	if (dep->trb_enqueue == dep->trb_dequeue) {
+		tmp = dwc3_ep_prev_trb(dep, dep->trb_enqueue);
+		if (tmp->ctrl & DWC3_TRB_CTRL_HWO)
+			return 0;
+
+		return DWC3_TRB_NUM - 1;
+	}
+
+	trbs_left = dep->trb_dequeue - dep->trb_enqueue;
+	trbs_left &= (DWC3_TRB_NUM - 1);
+
+	if (dep->trb_dequeue < dep->trb_enqueue)
+		trbs_left--;
+
+	return trbs_left;
+}
 
 static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
 		dma_addr_t dma, unsigned length, unsigned chain, unsigned node,
@@ -985,11 +1016,19 @@
 		struct dwc3_request *req, unsigned chain, unsigned node)
 {
 	struct dwc3_trb		*trb;
-	unsigned		length = req->request.length;
+	unsigned int		length;
+	dma_addr_t		dma;
 	unsigned		stream_id = req->request.stream_id;
 	unsigned		short_not_ok = req->request.short_not_ok;
 	unsigned		no_interrupt = req->request.no_interrupt;
-	dma_addr_t		dma = req->request.dma;
+
+	if (req->request.num_sgs > 0) {
+		length = sg_dma_len(req->start_sg);
+		dma = sg_dma_address(req->start_sg);
+	} else {
+		length = req->request.length;
+		dma = req->request.dma;
+	}
 
 	trb = &dep->trb_pool[dep->trb_enqueue];
 
@@ -997,69 +1036,23 @@
 		dwc3_gadget_move_started_request(req);
 		req->trb = trb;
 		req->trb_dma = dwc3_trb_dma_offset(dep, trb);
-		dep->queued_requests++;
 	}
 
 	__dwc3_prepare_one_trb(dep, trb, dma, length, chain, node,
 			stream_id, short_not_ok, no_interrupt);
 }
 
-/**
- * dwc3_ep_prev_trb - returns the previous TRB in the ring
- * @dep: The endpoint with the TRB ring
- * @index: The index of the current TRB in the ring
- *
- * Returns the TRB prior to the one pointed to by the index. If the
- * index is 0, we will wrap backwards, skip the link TRB, and return
- * the one just before that.
- */
-static struct dwc3_trb *dwc3_ep_prev_trb(struct dwc3_ep *dep, u8 index)
-{
-	u8 tmp = index;
-
-	if (!tmp)
-		tmp = DWC3_TRB_NUM - 1;
-
-	return &dep->trb_pool[tmp - 1];
-}
-
-static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep)
-{
-	struct dwc3_trb		*tmp;
-	u8			trbs_left;
-
-	/*
-	 * If enqueue & dequeue are equal than it is either full or empty.
-	 *
-	 * One way to know for sure is if the TRB right before us has HWO bit
-	 * set or not. If it has, then we're definitely full and can't fit any
-	 * more transfers in our ring.
-	 */
-	if (dep->trb_enqueue == dep->trb_dequeue) {
-		tmp = dwc3_ep_prev_trb(dep, dep->trb_enqueue);
-		if (tmp->ctrl & DWC3_TRB_CTRL_HWO)
-			return 0;
-
-		return DWC3_TRB_NUM - 1;
-	}
-
-	trbs_left = dep->trb_dequeue - dep->trb_enqueue;
-	trbs_left &= (DWC3_TRB_NUM - 1);
-
-	if (dep->trb_dequeue < dep->trb_enqueue)
-		trbs_left--;
-
-	return trbs_left;
-}
-
 static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep,
 		struct dwc3_request *req)
 {
-	struct scatterlist *sg = req->sg;
+	struct scatterlist *sg = req->start_sg;
 	struct scatterlist *s;
 	int		i;
 
-	for_each_sg(sg, s, req->num_pending_sgs, i) {
+	unsigned int remaining = req->request.num_mapped_sgs
+		- req->num_queued_sgs;
+
+	for_each_sg(sg, s, remaining, i) {
 		unsigned int length = req->request.length;
 		unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc);
 		unsigned int rem = length % maxp;
@@ -1088,6 +1081,18 @@
 			dwc3_prepare_one_trb(dep, req, chain, i);
 		}
 
+		/*
+		 * There can be a situation where all sgs in sglist are not
+		 * queued because of insufficient trb number. To handle this
+		 * case, update start_sg to next sg to be queued, so that
+		 * we have free trbs we can continue queuing from where we
+		 * previously stopped
+		 */
+		if (chain)
+			req->start_sg = sg_next(s);
+
+		req->num_queued_sgs++;
+
 		if (!dwc3_calc_trbs_left(dep))
 			break;
 	}
@@ -1178,6 +1183,8 @@
 			return;
 
 		req->sg			= req->request.sg;
+		req->start_sg		= req->sg;
+		req->num_queued_sgs	= 0;
 		req->num_pending_sgs	= req->request.num_mapped_sgs;
 
 		if (req->num_pending_sgs > 0)
@@ -1201,7 +1208,7 @@
 	if (!dwc3_calc_trbs_left(dep))
 		return 0;
 
-	starting = !(dep->flags & DWC3_EP_BUSY);
+	starting = !(dep->flags & DWC3_EP_TRANSFER_STARTED);
 
 	dwc3_prepare_trbs(dep);
 	req = next_request(&dep->started_list);
@@ -1233,18 +1240,10 @@
 		 */
 		if (req->trb)
 			memset(req->trb, 0, sizeof(struct dwc3_trb));
-		dep->queued_requests--;
 		dwc3_gadget_del_and_unmap_request(dep, req, ret);
 		return ret;
 	}
 
-	dep->flags |= DWC3_EP_BUSY;
-
-	if (starting) {
-		dep->resource_index = dwc3_gadget_ep_get_transfer_index(dep);
-		WARN_ON_ONCE(!dep->resource_index);
-	}
-
 	return 0;
 }
 
@@ -1256,35 +1255,19 @@
 	return DWC3_DSTS_SOFFN(reg);
 }
 
-static void __dwc3_gadget_start_isoc(struct dwc3 *dwc,
-		struct dwc3_ep *dep, u32 cur_uf)
+static void __dwc3_gadget_start_isoc(struct dwc3_ep *dep)
 {
 	if (list_empty(&dep->pending_list)) {
-		dev_info(dwc->dev, "%s: ran out of requests\n",
+		dev_info(dep->dwc->dev, "%s: ran out of requests\n",
 				dep->name);
 		dep->flags |= DWC3_EP_PENDING_REQUEST;
 		return;
 	}
 
-	/*
-	 * Schedule the first trb for one interval in the future or at
-	 * least 4 microframes.
-	 */
-	dep->frame_number = cur_uf + max_t(u32, 4, dep->interval);
+	dep->frame_number = DWC3_ALIGN_FRAME(dep);
 	__dwc3_gadget_kick_transfer(dep);
 }
 
-static void dwc3_gadget_start_isoc(struct dwc3 *dwc,
-		struct dwc3_ep *dep, const struct dwc3_event_depevt *event)
-{
-	u32 cur_uf, mask;
-
-	mask = ~(dep->interval - 1);
-	cur_uf = event->parameters & mask;
-
-	__dwc3_gadget_start_isoc(dwc, dep, cur_uf);
-}
-
 static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 {
 	struct dwc3		*dwc = dep->dwc;
@@ -1303,8 +1286,6 @@
 
 	req->request.actual	= 0;
 	req->request.status	= -EINPROGRESS;
-	req->direction		= dep->direction;
-	req->epnum		= dep->number;
 
 	trace_dwc3_ep_queue(req);
 
@@ -1319,28 +1300,18 @@
 	 * errors which will force us issue EndTransfer command.
 	 */
 	if (usb_endpoint_xfer_isoc(dep->endpoint.desc)) {
-		if ((dep->flags & DWC3_EP_PENDING_REQUEST)) {
-			if (dep->flags & DWC3_EP_TRANSFER_STARTED) {
-				dwc3_stop_active_transfer(dwc, dep->number, true);
-				dep->flags = DWC3_EP_ENABLED;
-			} else {
-				u32 cur_uf;
-
-				cur_uf = __dwc3_gadget_get_frame(dwc);
-				__dwc3_gadget_start_isoc(dwc, dep, cur_uf);
-				dep->flags &= ~DWC3_EP_PENDING_REQUEST;
-			}
+		if (!(dep->flags & DWC3_EP_PENDING_REQUEST) &&
+				!(dep->flags & DWC3_EP_TRANSFER_STARTED))
 			return 0;
+
+		if ((dep->flags & DWC3_EP_PENDING_REQUEST)) {
+			if (!(dep->flags & DWC3_EP_TRANSFER_STARTED)) {
+				__dwc3_gadget_start_isoc(dep);
+				return 0;
+			}
 		}
-
-		if ((dep->flags & DWC3_EP_BUSY) &&
-		    !(dep->flags & DWC3_EP_MISSED_ISOC))
-			goto out;
-
-		return 0;
 	}
 
-out:
 	return __dwc3_gadget_kick_transfer(dep);
 }
 
@@ -1390,7 +1361,7 @@
 		}
 		if (r == req) {
 			/* wait until it is processed */
-			dwc3_stop_active_transfer(dwc, dep->number, true);
+			dwc3_stop_active_transfer(dep, true);
 
 			/*
 			 * If request was already started, this means we had to
@@ -1463,7 +1434,7 @@
 
 out1:
 	/* giveback the request */
-	dep->queued_requests--;
+
 	dwc3_gadget_giveback(dep, req, -ECONNRESET);
 
 out0:
@@ -1878,14 +1849,14 @@
 	dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512);
 
 	dep = dwc->eps[0];
-	ret = __dwc3_gadget_ep_enable(dep, false, false);
+	ret = __dwc3_gadget_ep_enable(dep, DWC3_DEPCFG_ACTION_INIT);
 	if (ret) {
 		dev_err(dwc->dev, "failed to enable %s\n", dep->name);
 		goto err0;
 	}
 
 	dep = dwc->eps[1];
-	ret = __dwc3_gadget_ep_enable(dep, false, false);
+	ret = __dwc3_gadget_ep_enable(dep, DWC3_DEPCFG_ACTION_INIT);
 	if (ret) {
 		dev_err(dwc->dev, "failed to enable %s\n", dep->name);
 		goto err1;
@@ -2082,113 +2053,142 @@
 
 /* -------------------------------------------------------------------------- */
 
-static int dwc3_gadget_init_endpoints(struct dwc3 *dwc, u8 total)
+static int dwc3_gadget_init_control_endpoint(struct dwc3_ep *dep)
+{
+	struct dwc3 *dwc = dep->dwc;
+
+	usb_ep_set_maxpacket_limit(&dep->endpoint, 512);
+	dep->endpoint.maxburst = 1;
+	dep->endpoint.ops = &dwc3_gadget_ep0_ops;
+	if (!dep->direction)
+		dwc->gadget.ep0 = &dep->endpoint;
+
+	dep->endpoint.caps.type_control = true;
+
+	return 0;
+}
+
+static int dwc3_gadget_init_in_endpoint(struct dwc3_ep *dep)
+{
+	struct dwc3 *dwc = dep->dwc;
+	int mdwidth;
+	int kbytes;
+	int size;
+
+	mdwidth = DWC3_MDWIDTH(dwc->hwparams.hwparams0);
+	/* MDWIDTH is represented in bits, we need it in bytes */
+	mdwidth /= 8;
+
+	size = dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(dep->number >> 1));
+	if (dwc3_is_usb31(dwc))
+		size = DWC31_GTXFIFOSIZ_TXFDEF(size);
+	else
+		size = DWC3_GTXFIFOSIZ_TXFDEF(size);
+
+	/* FIFO Depth is in MDWDITH bytes. Multiply */
+	size *= mdwidth;
+
+	kbytes = size / 1024;
+	if (kbytes == 0)
+		kbytes = 1;
+
+	/*
+	 * FIFO sizes account an extra MDWIDTH * (kbytes + 1) bytes for
+	 * internal overhead. We don't really know how these are used,
+	 * but documentation say it exists.
+	 */
+	size -= mdwidth * (kbytes + 1);
+	size /= kbytes;
+
+	usb_ep_set_maxpacket_limit(&dep->endpoint, size);
+
+	dep->endpoint.max_streams = 15;
+	dep->endpoint.ops = &dwc3_gadget_ep_ops;
+	list_add_tail(&dep->endpoint.ep_list,
+			&dwc->gadget.ep_list);
+	dep->endpoint.caps.type_iso = true;
+	dep->endpoint.caps.type_bulk = true;
+	dep->endpoint.caps.type_int = true;
+
+	return dwc3_alloc_trb_pool(dep);
+}
+
+static int dwc3_gadget_init_out_endpoint(struct dwc3_ep *dep)
+{
+	struct dwc3 *dwc = dep->dwc;
+
+	usb_ep_set_maxpacket_limit(&dep->endpoint, 1024);
+	dep->endpoint.max_streams = 15;
+	dep->endpoint.ops = &dwc3_gadget_ep_ops;
+	list_add_tail(&dep->endpoint.ep_list,
+			&dwc->gadget.ep_list);
+	dep->endpoint.caps.type_iso = true;
+	dep->endpoint.caps.type_bulk = true;
+	dep->endpoint.caps.type_int = true;
+
+	return dwc3_alloc_trb_pool(dep);
+}
+
+static int dwc3_gadget_init_endpoint(struct dwc3 *dwc, u8 epnum)
 {
 	struct dwc3_ep			*dep;
+	bool				direction = epnum & 1;
+	int				ret;
+	u8				num = epnum >> 1;
+
+	dep = kzalloc(sizeof(*dep), GFP_KERNEL);
+	if (!dep)
+		return -ENOMEM;
+
+	dep->dwc = dwc;
+	dep->number = epnum;
+	dep->direction = direction;
+	dep->regs = dwc->regs + DWC3_DEP_BASE(epnum);
+	dwc->eps[epnum] = dep;
+
+	snprintf(dep->name, sizeof(dep->name), "ep%u%s", num,
+			direction ? "in" : "out");
+
+	dep->endpoint.name = dep->name;
+
+	if (!(dep->number > 1)) {
+		dep->endpoint.desc = &dwc3_gadget_ep0_desc;
+		dep->endpoint.comp_desc = NULL;
+	}
+
+	spin_lock_init(&dep->lock);
+
+	if (num == 0)
+		ret = dwc3_gadget_init_control_endpoint(dep);
+	else if (direction)
+		ret = dwc3_gadget_init_in_endpoint(dep);
+	else
+		ret = dwc3_gadget_init_out_endpoint(dep);
+
+	if (ret)
+		return ret;
+
+	dep->endpoint.caps.dir_in = direction;
+	dep->endpoint.caps.dir_out = !direction;
+
+	INIT_LIST_HEAD(&dep->pending_list);
+	INIT_LIST_HEAD(&dep->started_list);
+
+	return 0;
+}
+
+static int dwc3_gadget_init_endpoints(struct dwc3 *dwc, u8 total)
+{
 	u8				epnum;
 
 	INIT_LIST_HEAD(&dwc->gadget.ep_list);
 
 	for (epnum = 0; epnum < total; epnum++) {
-		bool			direction = epnum & 1;
-		u8			num = epnum >> 1;
+		int			ret;
 
-		dep = kzalloc(sizeof(*dep), GFP_KERNEL);
-		if (!dep)
-			return -ENOMEM;
-
-		dep->dwc = dwc;
-		dep->number = epnum;
-		dep->direction = direction;
-		dep->regs = dwc->regs + DWC3_DEP_BASE(epnum);
-		dwc->eps[epnum] = dep;
-
-		snprintf(dep->name, sizeof(dep->name), "ep%u%s", num,
-				direction ? "in" : "out");
-
-		dep->endpoint.name = dep->name;
-
-		if (!(dep->number > 1)) {
-			dep->endpoint.desc = &dwc3_gadget_ep0_desc;
-			dep->endpoint.comp_desc = NULL;
-		}
-
-		spin_lock_init(&dep->lock);
-
-		if (num == 0) {
-			usb_ep_set_maxpacket_limit(&dep->endpoint, 512);
-			dep->endpoint.maxburst = 1;
-			dep->endpoint.ops = &dwc3_gadget_ep0_ops;
-			if (!direction)
-				dwc->gadget.ep0 = &dep->endpoint;
-		} else if (direction) {
-			int mdwidth;
-			int kbytes;
-			int size;
-			int ret;
-
-			mdwidth = DWC3_MDWIDTH(dwc->hwparams.hwparams0);
-			/* MDWIDTH is represented in bits, we need it in bytes */
-			mdwidth /= 8;
-
-			size = dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(num));
-			if (dwc3_is_usb31(dwc))
-				size = DWC31_GTXFIFOSIZ_TXFDEF(size);
-			else
-				size = DWC3_GTXFIFOSIZ_TXFDEF(size);
-
-			/* FIFO Depth is in MDWDITH bytes. Multiply */
-			size *= mdwidth;
-
-			kbytes = size / 1024;
-			if (kbytes == 0)
-				kbytes = 1;
-
-			/*
-			 * FIFO sizes account an extra MDWIDTH * (kbytes + 1) bytes for
-			 * internal overhead. We don't really know how these are used,
-			 * but documentation say it exists.
-			 */
-			size -= mdwidth * (kbytes + 1);
-			size /= kbytes;
-
-			usb_ep_set_maxpacket_limit(&dep->endpoint, size);
-
-			dep->endpoint.max_streams = 15;
-			dep->endpoint.ops = &dwc3_gadget_ep_ops;
-			list_add_tail(&dep->endpoint.ep_list,
-					&dwc->gadget.ep_list);
-
-			ret = dwc3_alloc_trb_pool(dep);
-			if (ret)
-				return ret;
-		} else {
-			int		ret;
-
-			usb_ep_set_maxpacket_limit(&dep->endpoint, 1024);
-			dep->endpoint.max_streams = 15;
-			dep->endpoint.ops = &dwc3_gadget_ep_ops;
-			list_add_tail(&dep->endpoint.ep_list,
-					&dwc->gadget.ep_list);
-
-			ret = dwc3_alloc_trb_pool(dep);
-			if (ret)
-				return ret;
-		}
-
-		if (num == 0) {
-			dep->endpoint.caps.type_control = true;
-		} else {
-			dep->endpoint.caps.type_iso = true;
-			dep->endpoint.caps.type_bulk = true;
-			dep->endpoint.caps.type_int = true;
-		}
-
-		dep->endpoint.caps.dir_in = direction;
-		dep->endpoint.caps.dir_out = !direction;
-
-		INIT_LIST_HEAD(&dep->pending_list);
-		INIT_LIST_HEAD(&dep->started_list);
+		ret = dwc3_gadget_init_endpoint(dwc, epnum);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -2223,20 +2223,14 @@
 
 /* -------------------------------------------------------------------------- */
 
-static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
+static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep,
 		struct dwc3_request *req, struct dwc3_trb *trb,
-		const struct dwc3_event_depevt *event, int status,
-		int chain)
+		const struct dwc3_event_depevt *event, int status, int chain)
 {
 	unsigned int		count;
-	unsigned int		s_pkt = 0;
-	unsigned int		trb_status;
 
 	dwc3_ep_inc_deq(dep);
 
-	if (req->trb == trb)
-		dep->queued_requests--;
-
 	trace_dwc3_complete_trb(dep, trb);
 
 	/*
@@ -2268,159 +2262,140 @@
 	if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
 		return 1;
 
-	if (dep->direction) {
-		if (count) {
-			trb_status = DWC3_TRB_SIZE_TRBSTS(trb->size);
-			if (trb_status == DWC3_TRBSTS_MISSED_ISOC) {
-				/*
-				 * If missed isoc occurred and there is
-				 * no request queued then issue END
-				 * TRANSFER, so that core generates
-				 * next xfernotready and we will issue
-				 * a fresh START TRANSFER.
-				 * If there are still queued request
-				 * then wait, do not issue either END
-				 * or UPDATE TRANSFER, just attach next
-				 * request in pending_list during
-				 * giveback.If any future queued request
-				 * is successfully transferred then we
-				 * will issue UPDATE TRANSFER for all
-				 * request in the pending_list.
-				 */
-				dep->flags |= DWC3_EP_MISSED_ISOC;
-			} else {
-				dev_err(dwc->dev, "incomplete IN transfer %s\n",
-						dep->name);
-				status = -ECONNRESET;
-			}
-		} else {
-			dep->flags &= ~DWC3_EP_MISSED_ISOC;
-		}
-	} else {
-		if (count && (event->status & DEPEVT_STATUS_SHORT))
-			s_pkt = 1;
-	}
-
-	if (s_pkt && !chain)
+	if (event->status & DEPEVT_STATUS_SHORT && !chain)
 		return 1;
 
-	if ((event->status & DEPEVT_STATUS_IOC) &&
-			(trb->ctrl & DWC3_TRB_CTRL_IOC))
+	if (event->status & DEPEVT_STATUS_IOC)
 		return 1;
 
 	return 0;
 }
 
-static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
-		const struct dwc3_event_depevt *event, int status)
+static int dwc3_gadget_ep_reclaim_trb_sg(struct dwc3_ep *dep,
+		struct dwc3_request *req, const struct dwc3_event_depevt *event,
+		int status)
 {
-	struct dwc3_request	*req, *n;
-	struct dwc3_trb		*trb;
-	bool			ioc = false;
-	int			ret = 0;
+	struct dwc3_trb *trb = &dep->trb_pool[dep->trb_dequeue];
+	struct scatterlist *sg = req->sg;
+	struct scatterlist *s;
+	unsigned int pending = req->num_pending_sgs;
+	unsigned int i;
+	int ret = 0;
 
-	list_for_each_entry_safe(req, n, &dep->started_list, list) {
-		unsigned length;
-		int chain;
+	for_each_sg(sg, s, pending, i) {
+		trb = &dep->trb_pool[dep->trb_dequeue];
 
-		length = req->request.length;
-		chain = req->num_pending_sgs > 0;
-		if (chain) {
-			struct scatterlist *sg = req->sg;
-			struct scatterlist *s;
-			unsigned int pending = req->num_pending_sgs;
-			unsigned int i;
-
-			for_each_sg(sg, s, pending, i) {
-				trb = &dep->trb_pool[dep->trb_dequeue];
-
-				if (trb->ctrl & DWC3_TRB_CTRL_HWO)
-					break;
-
-				req->sg = sg_next(s);
-				req->num_pending_sgs--;
-
-				ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb,
-						event, status, chain);
-				if (ret)
-					break;
-			}
-		} else {
-			trb = &dep->trb_pool[dep->trb_dequeue];
-			ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb,
-					event, status, chain);
-		}
-
-		if (req->unaligned || req->zero) {
-			trb = &dep->trb_pool[dep->trb_dequeue];
-			ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb,
-					event, status, false);
-			req->unaligned = false;
-			req->zero = false;
-		}
-
-		req->request.actual = length - req->remaining;
-
-		if ((req->request.actual < length) && req->num_pending_sgs)
-			return __dwc3_gadget_kick_transfer(dep);
-
-		dwc3_gadget_giveback(dep, req, status);
-
-		if (ret) {
-			if ((event->status & DEPEVT_STATUS_IOC) &&
-			    (trb->ctrl & DWC3_TRB_CTRL_IOC))
-				ioc = true;
+		if (trb->ctrl & DWC3_TRB_CTRL_HWO)
 			break;
-		}
+
+		req->sg = sg_next(s);
+		req->num_pending_sgs--;
+
+		ret = dwc3_gadget_ep_reclaim_completed_trb(dep, req,
+				trb, event, status, true);
+		if (ret)
+			break;
 	}
 
-	/*
-	 * Our endpoint might get disabled by another thread during
-	 * dwc3_gadget_giveback(). If that happens, we're just gonna return 1
-	 * early on so DWC3_EP_BUSY flag gets cleared
-	 */
-	if (!dep->endpoint.desc)
-		return 1;
-
-	if (usb_endpoint_xfer_isoc(dep->endpoint.desc) &&
-			list_empty(&dep->started_list)) {
-		if (list_empty(&dep->pending_list)) {
-			/*
-			 * If there is no entry in request list then do
-			 * not issue END TRANSFER now. Just set PENDING
-			 * flag, so that END TRANSFER is issued when an
-			 * entry is added into request list.
-			 */
-			dep->flags = DWC3_EP_PENDING_REQUEST;
-		} else {
-			dwc3_stop_active_transfer(dwc, dep->number, true);
-			dep->flags = DWC3_EP_ENABLED;
-		}
-		return 1;
-	}
-
-	if (usb_endpoint_xfer_isoc(dep->endpoint.desc) && ioc)
-		return 0;
-
-	return 1;
+	return ret;
 }
 
-static void dwc3_endpoint_transfer_complete(struct dwc3 *dwc,
-		struct dwc3_ep *dep, const struct dwc3_event_depevt *event)
+static int dwc3_gadget_ep_reclaim_trb_linear(struct dwc3_ep *dep,
+		struct dwc3_request *req, const struct dwc3_event_depevt *event,
+		int status)
 {
-	unsigned		status = 0;
-	int			clean_busy;
-	u32			is_xfer_complete;
+	struct dwc3_trb *trb = &dep->trb_pool[dep->trb_dequeue];
 
-	is_xfer_complete = (event->endpoint_event == DWC3_DEPEVT_XFERCOMPLETE);
+	return dwc3_gadget_ep_reclaim_completed_trb(dep, req, trb,
+			event, status, false);
+}
+
+static bool dwc3_gadget_ep_request_completed(struct dwc3_request *req)
+{
+	return req->request.actual == req->request.length;
+}
+
+static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep,
+		const struct dwc3_event_depevt *event,
+		struct dwc3_request *req, int status)
+{
+	int ret;
+
+	if (req->num_pending_sgs)
+		ret = dwc3_gadget_ep_reclaim_trb_sg(dep, req, event,
+				status);
+	else
+		ret = dwc3_gadget_ep_reclaim_trb_linear(dep, req, event,
+				status);
+
+	if (req->unaligned || req->zero) {
+		ret = dwc3_gadget_ep_reclaim_trb_linear(dep, req, event,
+				status);
+		req->unaligned = false;
+		req->zero = false;
+	}
+
+	req->request.actual = req->request.length - req->remaining;
+
+	if (!dwc3_gadget_ep_request_completed(req) &&
+			req->num_pending_sgs) {
+		__dwc3_gadget_kick_transfer(dep);
+		goto out;
+	}
+
+	dwc3_gadget_giveback(dep, req, status);
+
+out:
+	return ret;
+}
+
+static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep,
+		const struct dwc3_event_depevt *event, int status)
+{
+	struct dwc3_request	*req;
+	struct dwc3_request	*tmp;
+
+	list_for_each_entry_safe(req, tmp, &dep->started_list, list) {
+		int ret;
+
+		ret = dwc3_gadget_ep_cleanup_completed_request(dep, event,
+				req, status);
+		if (ret)
+			break;
+	}
+}
+
+static void dwc3_gadget_endpoint_frame_from_event(struct dwc3_ep *dep,
+		const struct dwc3_event_depevt *event)
+{
+	dep->frame_number = event->parameters;
+}
+
+static void dwc3_gadget_endpoint_transfer_in_progress(struct dwc3_ep *dep,
+		const struct dwc3_event_depevt *event)
+{
+	struct dwc3		*dwc = dep->dwc;
+	unsigned		status = 0;
+	bool			stop = false;
+
+	dwc3_gadget_endpoint_frame_from_event(dep, event);
 
 	if (event->status & DEPEVT_STATUS_BUSERR)
 		status = -ECONNRESET;
 
-	clean_busy = dwc3_cleanup_done_reqs(dwc, dep, event, status);
-	if (clean_busy && (!dep->endpoint.desc || is_xfer_complete ||
-				usb_endpoint_xfer_isoc(dep->endpoint.desc)))
-		dep->flags &= ~DWC3_EP_BUSY;
+	if (event->status & DEPEVT_STATUS_MISSED_ISOC) {
+		status = -EXDEV;
+
+		if (list_empty(&dep->started_list))
+			stop = true;
+	}
+
+	dwc3_gadget_ep_cleanup_completed_requests(dep, event, status);
+
+	if (stop) {
+		dwc3_stop_active_transfer(dep, true);
+		dep->flags = DWC3_EP_ENABLED;
+	}
 
 	/*
 	 * WORKAROUND: This is the 2nd half of U1/U2 -> U0 workaround.
@@ -2446,17 +2421,13 @@
 
 		dwc->u1u2 = 0;
 	}
+}
 
-	/*
-	 * Our endpoint might get disabled by another thread during
-	 * dwc3_gadget_giveback(). If that happens, we're just gonna return 1
-	 * early on so DWC3_EP_BUSY flag gets cleared
-	 */
-	if (!dep->endpoint.desc)
-		return;
-
-	if (!usb_endpoint_xfer_isoc(dep->endpoint.desc))
-		__dwc3_gadget_kick_transfer(dep);
+static void dwc3_gadget_endpoint_transfer_not_ready(struct dwc3_ep *dep,
+		const struct dwc3_event_depevt *event)
+{
+	dwc3_gadget_endpoint_frame_from_event(dep, event);
+	__dwc3_gadget_start_isoc(dep);
 }
 
 static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
@@ -2483,32 +2454,11 @@
 	}
 
 	switch (event->endpoint_event) {
-	case DWC3_DEPEVT_XFERCOMPLETE:
-		dep->resource_index = 0;
-
-		if (usb_endpoint_xfer_isoc(dep->endpoint.desc)) {
-			dev_err(dwc->dev, "XferComplete for Isochronous endpoint\n");
-			return;
-		}
-
-		dwc3_endpoint_transfer_complete(dwc, dep, event);
-		break;
 	case DWC3_DEPEVT_XFERINPROGRESS:
-		dwc3_endpoint_transfer_complete(dwc, dep, event);
+		dwc3_gadget_endpoint_transfer_in_progress(dep, event);
 		break;
 	case DWC3_DEPEVT_XFERNOTREADY:
-		if (usb_endpoint_xfer_isoc(dep->endpoint.desc))
-			dwc3_gadget_start_isoc(dwc, dep, event);
-		else
-			__dwc3_gadget_kick_transfer(dep);
-
-		break;
-	case DWC3_DEPEVT_STREAMEVT:
-		if (!usb_endpoint_xfer_bulk(dep->endpoint.desc)) {
-			dev_err(dwc->dev, "Stream event for non-Bulk %s\n",
-					dep->name);
-			return;
-		}
+		dwc3_gadget_endpoint_transfer_not_ready(dep, event);
 		break;
 	case DWC3_DEPEVT_EPCMDCMPLT:
 		cmd = DEPEVT_PARAMETER_CMD(event->parameters);
@@ -2518,6 +2468,8 @@
 			wake_up(&dep->wait_end_transfer);
 		}
 		break;
+	case DWC3_DEPEVT_STREAMEVT:
+	case DWC3_DEPEVT_XFERCOMPLETE:
 	case DWC3_DEPEVT_RXTXFIFOEVT:
 		break;
 	}
@@ -2562,15 +2514,13 @@
 	}
 }
 
-static void dwc3_stop_active_transfer(struct dwc3 *dwc, u32 epnum, bool force)
+static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force)
 {
-	struct dwc3_ep *dep;
+	struct dwc3 *dwc = dep->dwc;
 	struct dwc3_gadget_ep_cmd_params params;
 	u32 cmd;
 	int ret;
 
-	dep = dwc->eps[epnum];
-
 	if ((dep->flags & DWC3_EP_END_TRANSFER_PENDING) ||
 	    !dep->resource_index)
 		return;
@@ -2614,7 +2564,6 @@
 	ret = dwc3_send_gadget_ep_cmd(dep, cmd, &params);
 	WARN_ON_ONCE(ret);
 	dep->resource_index = 0;
-	dep->flags &= ~DWC3_EP_BUSY;
 
 	if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A) {
 		dep->flags |= DWC3_EP_END_TRANSFER_PENDING;
@@ -2816,14 +2765,14 @@
 	}
 
 	dep = dwc->eps[0];
-	ret = __dwc3_gadget_ep_enable(dep, true, false);
+	ret = __dwc3_gadget_ep_enable(dep, DWC3_DEPCFG_ACTION_MODIFY);
 	if (ret) {
 		dev_err(dwc->dev, "failed to enable %s\n", dep->name);
 		return;
 	}
 
 	dep = dwc->eps[1];
-	ret = __dwc3_gadget_ep_enable(dep, true, false);
+	ret = __dwc3_gadget_ep_enable(dep, DWC3_DEPCFG_ACTION_MODIFY);
 	if (ret) {
 		dev_err(dwc->dev, "failed to enable %s\n", dep->name);
 		return;
diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h
index 578aa85..db610c5 100644
--- a/drivers/usb/dwc3/gadget.h
+++ b/drivers/usb/dwc3/gadget.h
@@ -98,13 +98,12 @@
  * Caller should take care of locking. Returns the transfer resource
  * index for a given endpoint.
  */
-static inline u32 dwc3_gadget_ep_get_transfer_index(struct dwc3_ep *dep)
+static inline void dwc3_gadget_ep_get_transfer_index(struct dwc3_ep *dep)
 {
 	u32			res_id;
 
 	res_id = dwc3_readl(dep->regs, DWC3_DEPCMD);
-
-	return DWC3_DEPCMD_GET_RSC_IDX(res_id);
+	dep->resource_index = DWC3_DEPCMD_GET_RSC_IDX(res_id);
 }
 
 #endif /* __DRIVERS_USB_DWC3_GADGET_H */
diff --git a/drivers/usb/dwc3/trace.h b/drivers/usb/dwc3/trace.h
index babaee9..f22714c 100644
--- a/drivers/usb/dwc3/trace.h
+++ b/drivers/usb/dwc3/trace.h
@@ -230,17 +230,14 @@
 	TP_fast_assign(
 		__assign_str(name, dep->name);
 		__entry->trb = trb;
-		__entry->allocated = dep->allocated_requests;
-		__entry->queued = dep->queued_requests;
 		__entry->bpl = trb->bpl;
 		__entry->bph = trb->bph;
 		__entry->size = trb->size;
 		__entry->ctrl = trb->ctrl;
 		__entry->type = usb_endpoint_type(dep->endpoint.desc);
 	),
-	TP_printk("%s: %d/%d trb %p buf %08x%08x size %s%d ctrl %08x (%c%c%c%c:%c%c:%s)",
-		__get_str(name), __entry->queued, __entry->allocated,
-		__entry->trb, __entry->bph, __entry->bpl,
+	TP_printk("%s: trb %p buf %08x%08x size %s%d ctrl %08x (%c%c%c%c:%c%c:%s)",
+		__get_str(name), __entry->trb, __entry->bph, __entry->bpl,
 		({char *s;
 		int pcm = ((__entry->size >> 24) & 3) + 1;
 		switch (__entry->type) {
@@ -306,7 +303,7 @@
 		__entry->trb_enqueue = dep->trb_enqueue;
 		__entry->trb_dequeue = dep->trb_dequeue;
 	),
-	TP_printk("%s: mps %d/%d streams %d burst %d ring %d/%d flags %c:%c%c%c%c%c:%c:%c",
+	TP_printk("%s: mps %d/%d streams %d burst %d ring %d/%d flags %c:%c%c%c%c:%c:%c",
 		__get_str(name), __entry->maxpacket,
 		__entry->maxpacket_limit, __entry->max_streams,
 		__entry->maxburst, __entry->trb_enqueue,
@@ -314,9 +311,8 @@
 		__entry->flags & DWC3_EP_ENABLED ? 'E' : 'e',
 		__entry->flags & DWC3_EP_STALL ? 'S' : 's',
 		__entry->flags & DWC3_EP_WEDGE ? 'W' : 'w',
-		__entry->flags & DWC3_EP_BUSY ? 'B' : 'b',
+		__entry->flags & DWC3_EP_TRANSFER_STARTED ? 'B' : 'b',
 		__entry->flags & DWC3_EP_PENDING_REQUEST ? 'P' : 'p',
-		__entry->flags & DWC3_EP_MISSED_ISOC ? 'M' : 'm',
 		__entry->flags & DWC3_EP_END_TRANSFER_PENDING ? 'E' : 'e',
 		__entry->direction ? '<' : '>'
 	)
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 63a7cb8..f242c2b 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1601,7 +1601,7 @@
 				cdev->gadget->ep0->maxpacket;
 			if (gadget_is_superspeed(gadget)) {
 				if (gadget->speed >= USB_SPEED_SUPER) {
-					cdev->desc.bcdUSB = cpu_to_le16(0x0310);
+					cdev->desc.bcdUSB = cpu_to_le16(0x0320);
 					cdev->desc.bMaxPacketSize0 = 9;
 				} else {
 					cdev->desc.bcdUSB = cpu_to_le16(0x0210);
diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c
index b104ed0..6ce0440 100644
--- a/drivers/usb/gadget/function/f_ecm.c
+++ b/drivers/usb/gadget/function/f_ecm.c
@@ -705,6 +705,8 @@
 		ecm_opts->bound = true;
 	}
 
+	ecm_string_defs[1].s = ecm->ethaddr;
+
 	us = usb_gstrings_attach(cdev, ecm_strings,
 				 ARRAY_SIZE(ecm_string_defs));
 	if (IS_ERR(us))
@@ -928,7 +930,6 @@
 		mutex_unlock(&opts->lock);
 		return ERR_PTR(-EINVAL);
 	}
-	ecm_string_defs[1].s = ecm->ethaddr;
 
 	ecm->port.ioport = netdev_priv(opts->net);
 	mutex_unlock(&opts->lock);
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 0294e4f..199d257 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1266,6 +1266,14 @@
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+static long ffs_epfile_compat_ioctl(struct file *file, unsigned code,
+		unsigned long value)
+{
+	return ffs_epfile_ioctl(file, code, value);
+}
+#endif
+
 static const struct file_operations ffs_epfile_operations = {
 	.llseek =	no_llseek,
 
@@ -1274,6 +1282,9 @@
 	.read_iter =	ffs_epfile_read_iter,
 	.release =	ffs_epfile_release,
 	.unlocked_ioctl =	ffs_epfile_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = ffs_epfile_compat_ioctl,
+#endif
 };
 
 
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index e8f35db..46af0aa 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -109,6 +109,7 @@
 
 static void f_midi_transmit(struct f_midi *midi);
 static void f_midi_rmidi_free(struct snd_rawmidi *rmidi);
+static void f_midi_free_inst(struct usb_function_instance *f);
 
 DECLARE_UAC_AC_HEADER_DESCRIPTOR(1);
 DECLARE_USB_MIDI_OUT_JACK_DESCRIPTOR(1);
@@ -1102,7 +1103,7 @@
 	u32 num;							\
 									\
 	mutex_lock(&opts->lock);					\
-	if (opts->refcnt) {						\
+	if (opts->refcnt > 1) {						\
 		ret = -EBUSY;						\
 		goto end;						\
 	}								\
@@ -1157,7 +1158,7 @@
 	char *c;
 
 	mutex_lock(&opts->lock);
-	if (opts->refcnt) {
+	if (opts->refcnt > 1) {
 		ret = -EBUSY;
 		goto end;
 	}
@@ -1198,13 +1199,21 @@
 static void f_midi_free_inst(struct usb_function_instance *f)
 {
 	struct f_midi_opts *opts;
+	bool free = false;
 
 	opts = container_of(f, struct f_midi_opts, func_inst);
 
-	if (opts->id_allocated)
-		kfree(opts->id);
+	mutex_lock(&opts->lock);
+	if (!--opts->refcnt) {
+		free = true;
+	}
+	mutex_unlock(&opts->lock);
 
-	kfree(opts);
+	if (free) {
+		if (opts->id_allocated)
+			kfree(opts->id);
+		kfree(opts);
+	}
 }
 
 static struct usb_function_instance *f_midi_alloc_inst(void)
@@ -1223,6 +1232,7 @@
 	opts->qlen = 32;
 	opts->in_ports = 1;
 	opts->out_ports = 1;
+	opts->refcnt = 1;
 
 	config_group_init_type_name(&opts->func_inst.group, "",
 				    &midi_func_type);
@@ -1234,6 +1244,7 @@
 {
 	struct f_midi *midi;
 	struct f_midi_opts *opts;
+	bool free = false;
 
 	midi = func_to_midi(f);
 	opts = container_of(f->fi, struct f_midi_opts, func_inst);
@@ -1242,9 +1253,12 @@
 		kfree(midi->id);
 		kfifo_free(&midi->in_req_fifo);
 		kfree(midi);
-		--opts->refcnt;
+		free = true;
 	}
 	mutex_unlock(&opts->lock);
+
+	if (free)
+		f_midi_free_inst(&opts->func_inst);
 }
 
 static void f_midi_rmidi_free(struct snd_rawmidi *rmidi)
@@ -1287,9 +1301,8 @@
 	}
 
 	/* allocate and initialize one new instance */
-	midi = kzalloc(
-		sizeof(*midi) + opts->in_ports * sizeof(*midi->in_ports_array),
-		GFP_KERNEL);
+	midi = kzalloc(struct_size(midi, in_ports_array, opts->in_ports),
+		       GFP_KERNEL);
 	if (!midi) {
 		status = -ENOMEM;
 		goto setup_fail;
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index d359efe..9c7ed25 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -631,19 +631,19 @@
 			return -EAGAIN;
 		}
 
+		list_add(&req->list, &dev->tx_reqs_active);
+
 		/* here, we unlock, and only unlock, to avoid deadlock. */
 		spin_unlock(&dev->lock);
 		value = usb_ep_queue(dev->in_ep, req, GFP_ATOMIC);
 		spin_lock(&dev->lock);
 		if (value) {
+			list_del(&req->list);
 			list_add(&req->list, &dev->tx_reqs);
 			spin_unlock_irqrestore(&dev->lock, flags);
 			mutex_unlock(&dev->lock_printer_io);
 			return -EAGAIN;
 		}
-
-		list_add(&req->list, &dev->tx_reqs_active);
-
 	}
 
 	spin_unlock_irqrestore(&dev->lock, flags);
diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c
index 51dd3e9..04c142c 100644
--- a/drivers/usb/gadget/function/rndis.c
+++ b/drivers/usb/gadget/function/rndis.c
@@ -851,6 +851,9 @@
 		 */
 		pr_warn("%s: unknown RNDIS message 0x%08X len %d\n",
 			__func__, MsgType, MsgLength);
+		/* Garbled message can be huge, so limit what we display */
+		if (MsgLength > 16)
+			MsgLength = 16;
 		print_hex_dump_bytes(__func__, DUMP_PREFIX_OFFSET,
 				     buf, MsgLength);
 		break;
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 6fcda62..1000d86 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -844,6 +844,10 @@
 	net->ethtool_ops = &ops;
 	SET_NETDEV_DEVTYPE(net, &gadget_type);
 
+	/* MTU range: 14 - 15412 */
+	net->min_mtu = ETH_HLEN;
+	net->max_mtu = GETHER_MAX_ETH_FRAME_LEN;
+
 	return net;
 }
 EXPORT_SYMBOL_GPL(gether_setup_name_default);
diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig
index 0875d38..1df4ded 100644
--- a/drivers/usb/gadget/udc/Kconfig
+++ b/drivers/usb/gadget/udc/Kconfig
@@ -179,7 +179,7 @@
 
 config USB_RENESAS_USBHS_UDC
 	tristate 'Renesas USBHS controller'
-	depends on USB_RENESAS_USBHS && HAS_DMA
+	depends on USB_RENESAS_USBHS
 	help
 	   Renesas USBHS is a discrete USB host and peripheral controller chip
 	   that supports both full and high speed USB 2.0 data transfers.
@@ -192,7 +192,7 @@
 config USB_RENESAS_USB3
 	tristate 'Renesas USB3.0 Peripheral controller'
 	depends on ARCH_RENESAS || COMPILE_TEST
-	depends on EXTCON && HAS_DMA
+	depends on EXTCON
 	help
 	   Renesas USB3.0 Peripheral controller is a USB peripheral controller
 	   that supports super, high, and full speed USB 3.0 data transfers.
@@ -438,6 +438,8 @@
 	  dynamically linked module called "udc-xilinx" and force all
 	  gadget drivers to also be dynamically linked.
 
+source "drivers/usb/gadget/udc/aspeed-vhub/Kconfig"
+
 #
 # LAST -- dummy/emulated controller
 #
diff --git a/drivers/usb/gadget/udc/Makefile b/drivers/usb/gadget/udc/Makefile
index ce865b1..897f648 100644
--- a/drivers/usb/gadget/udc/Makefile
+++ b/drivers/usb/gadget/udc/Makefile
@@ -39,4 +39,5 @@
 obj-$(CONFIG_USB_GR_UDC)	+= gr_udc.o
 obj-$(CONFIG_USB_GADGET_XILINX)	+= udc-xilinx.o
 obj-$(CONFIG_USB_SNP_UDC_PLAT) += snps_udc_plat.o
+obj-$(CONFIG_USB_ASPEED_VHUB)	+= aspeed-vhub/
 obj-$(CONFIG_USB_BDC_UDC)	+= bdc/
diff --git a/drivers/usb/gadget/udc/aspeed-vhub/Kconfig b/drivers/usb/gadget/udc/aspeed-vhub/Kconfig
new file mode 100644
index 0000000..f0cdf89
--- /dev/null
+++ b/drivers/usb/gadget/udc/aspeed-vhub/Kconfig
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0+
+config USB_ASPEED_VHUB
+	tristate "Aspeed vHub UDC driver"
+	depends on ARCH_ASPEED || COMPILE_TEST
+	help
+	  USB peripheral controller for the Aspeed AST2500 family
+	  SoCs supporting the "vHub" functionality and USB2.0
diff --git a/drivers/usb/gadget/udc/aspeed-vhub/Makefile b/drivers/usb/gadget/udc/aspeed-vhub/Makefile
new file mode 100644
index 0000000..9f3add6
--- /dev/null
+++ b/drivers/usb/gadget/udc/aspeed-vhub/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0+
+obj-$(CONFIG_USB_ASPEED_VHUB)	+= aspeed-vhub.o
+aspeed-vhub-y	:= core.o ep0.o epn.o dev.o hub.o
+
diff --git a/drivers/usb/gadget/udc/aspeed-vhub/core.c b/drivers/usb/gadget/udc/aspeed-vhub/core.c
new file mode 100644
index 0000000..db3628b
--- /dev/null
+++ b/drivers/usb/gadget/udc/aspeed-vhub/core.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * aspeed-vhub -- Driver for Aspeed SoC "vHub" USB gadget
+ *
+ * core.c - Top level support
+ *
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/prefetch.h>
+#include <linux/clk.h>
+#include <linux/usb/gadget.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/dma-mapping.h>
+
+#include "vhub.h"
+
+void ast_vhub_done(struct ast_vhub_ep *ep, struct ast_vhub_req *req,
+		   int status)
+{
+	bool internal = req->internal;
+
+	EPVDBG(ep, "completing request @%p, status %d\n", req, status);
+
+	list_del_init(&req->queue);
+
+	if (req->req.status == -EINPROGRESS)
+		req->req.status = status;
+
+	if (req->req.dma) {
+		if (!WARN_ON(!ep->dev))
+			usb_gadget_unmap_request(&ep->dev->gadget,
+						 &req->req, ep->epn.is_in);
+		req->req.dma = 0;
+	}
+
+	/*
+	 * If this isn't an internal EP0 request, call the core
+	 * to call the gadget completion.
+	 */
+	if (!internal) {
+		spin_unlock(&ep->vhub->lock);
+		usb_gadget_giveback_request(&ep->ep, &req->req);
+		spin_lock(&ep->vhub->lock);
+	}
+}
+
+void ast_vhub_nuke(struct ast_vhub_ep *ep, int status)
+{
+	struct ast_vhub_req *req;
+
+	EPDBG(ep, "Nuking\n");
+
+	/* Beware, lock will be dropped & req-acquired by done() */
+	while (!list_empty(&ep->queue)) {
+		req = list_first_entry(&ep->queue, struct ast_vhub_req, queue);
+		ast_vhub_done(ep, req, status);
+	}
+}
+
+struct usb_request *ast_vhub_alloc_request(struct usb_ep *u_ep,
+					   gfp_t gfp_flags)
+{
+	struct ast_vhub_req *req;
+
+	req = kzalloc(sizeof(*req), gfp_flags);
+	if (!req)
+		return NULL;
+	return &req->req;
+}
+
+void ast_vhub_free_request(struct usb_ep *u_ep, struct usb_request *u_req)
+{
+	struct ast_vhub_req *req = to_ast_req(u_req);
+
+	kfree(req);
+}
+
+static irqreturn_t ast_vhub_irq(int irq, void *data)
+{
+	struct ast_vhub *vhub = data;
+	irqreturn_t iret = IRQ_NONE;
+	u32 istat;
+
+	/* Stale interrupt while tearing down */
+	if (!vhub->ep0_bufs)
+		return IRQ_NONE;
+
+	spin_lock(&vhub->lock);
+
+	/* Read and ACK interrupts */
+	istat = readl(vhub->regs + AST_VHUB_ISR);
+	if (!istat)
+		goto bail;
+	writel(istat, vhub->regs + AST_VHUB_ISR);
+	iret = IRQ_HANDLED;
+
+	UDCVDBG(vhub, "irq status=%08x, ep_acks=%08x ep_nacks=%08x\n",
+	       istat,
+	       readl(vhub->regs + AST_VHUB_EP_ACK_ISR),
+	       readl(vhub->regs + AST_VHUB_EP_NACK_ISR));
+
+	/* Handle generic EPs first */
+	if (istat & VHUB_IRQ_EP_POOL_ACK_STALL) {
+		u32 i, ep_acks = readl(vhub->regs + AST_VHUB_EP_ACK_ISR);
+		writel(ep_acks, vhub->regs + AST_VHUB_EP_ACK_ISR);
+
+		for (i = 0; ep_acks && i < AST_VHUB_NUM_GEN_EPs; i++) {
+			u32 mask = VHUB_EP_IRQ(i);
+			if (ep_acks & mask) {
+				ast_vhub_epn_ack_irq(&vhub->epns[i]);
+				ep_acks &= ~mask;
+			}
+		}
+	}
+
+	/* Handle device interrupts */
+	if (istat & (VHUB_IRQ_DEVICE1 |
+		     VHUB_IRQ_DEVICE2 |
+		     VHUB_IRQ_DEVICE3 |
+		     VHUB_IRQ_DEVICE4 |
+		     VHUB_IRQ_DEVICE5)) {
+		if (istat & VHUB_IRQ_DEVICE1)
+			ast_vhub_dev_irq(&vhub->ports[0].dev);
+		if (istat & VHUB_IRQ_DEVICE2)
+			ast_vhub_dev_irq(&vhub->ports[1].dev);
+		if (istat & VHUB_IRQ_DEVICE3)
+			ast_vhub_dev_irq(&vhub->ports[2].dev);
+		if (istat & VHUB_IRQ_DEVICE4)
+			ast_vhub_dev_irq(&vhub->ports[3].dev);
+		if (istat & VHUB_IRQ_DEVICE5)
+			ast_vhub_dev_irq(&vhub->ports[4].dev);
+	}
+
+	/* Handle top-level vHub EP0 interrupts */
+	if (istat & (VHUB_IRQ_HUB_EP0_OUT_ACK_STALL |
+		     VHUB_IRQ_HUB_EP0_IN_ACK_STALL |
+		     VHUB_IRQ_HUB_EP0_SETUP)) {
+		if (istat & VHUB_IRQ_HUB_EP0_IN_ACK_STALL)
+			ast_vhub_ep0_handle_ack(&vhub->ep0, true);
+		if (istat & VHUB_IRQ_HUB_EP0_OUT_ACK_STALL)
+			ast_vhub_ep0_handle_ack(&vhub->ep0, false);
+		if (istat & VHUB_IRQ_HUB_EP0_SETUP)
+			ast_vhub_ep0_handle_setup(&vhub->ep0);
+	}
+
+	/* Various top level bus events */
+	if (istat & (VHUB_IRQ_BUS_RESUME |
+		     VHUB_IRQ_BUS_SUSPEND |
+		     VHUB_IRQ_BUS_RESET)) {
+		if (istat & VHUB_IRQ_BUS_RESUME)
+			ast_vhub_hub_resume(vhub);
+		if (istat & VHUB_IRQ_BUS_SUSPEND)
+			ast_vhub_hub_suspend(vhub);
+		if (istat & VHUB_IRQ_BUS_RESET)
+			ast_vhub_hub_reset(vhub);
+	}
+
+ bail:
+	spin_unlock(&vhub->lock);
+	return iret;
+}
+
+void ast_vhub_init_hw(struct ast_vhub *vhub)
+{
+	u32 ctrl;
+
+	UDCDBG(vhub,"(Re)Starting HW ...\n");
+
+	/* Enable PHY */
+	ctrl = VHUB_CTRL_PHY_CLK |
+		VHUB_CTRL_PHY_RESET_DIS;
+
+       /*
+	* We do *NOT* set the VHUB_CTRL_CLK_STOP_SUSPEND bit
+	* to stop the logic clock during suspend because
+	* it causes the registers to become inaccessible and
+	* we haven't yet figured out a good wayt to bring the
+	* controller back into life to issue a wakeup.
+	*/
+
+	/*
+	 * Set some ISO & split control bits according to Aspeed
+	 * recommendation
+	 *
+	 * VHUB_CTRL_ISO_RSP_CTRL: When set tells the HW to respond
+	 * with 0 bytes data packet to ISO IN endpoints when no data
+	 * is available.
+	 *
+	 * VHUB_CTRL_SPLIT_IN: This makes a SOF complete a split IN
+	 * transaction.
+	 */
+	ctrl |= VHUB_CTRL_ISO_RSP_CTRL | VHUB_CTRL_SPLIT_IN;
+	writel(ctrl, vhub->regs + AST_VHUB_CTRL);
+	udelay(1);
+
+	/* Set descriptor ring size */
+	if (AST_VHUB_DESCS_COUNT == 256) {
+		ctrl |= VHUB_CTRL_LONG_DESC;
+		writel(ctrl, vhub->regs + AST_VHUB_CTRL);
+	} else {
+		BUILD_BUG_ON(AST_VHUB_DESCS_COUNT != 32);
+	}
+
+	/* Reset all devices */
+	writel(VHUB_SW_RESET_ALL, vhub->regs + AST_VHUB_SW_RESET);
+	udelay(1);
+	writel(0, vhub->regs + AST_VHUB_SW_RESET);
+
+	/* Disable and cleanup EP ACK/NACK interrupts */
+	writel(0, vhub->regs + AST_VHUB_EP_ACK_IER);
+	writel(0, vhub->regs + AST_VHUB_EP_NACK_IER);
+	writel(VHUB_EP_IRQ_ALL, vhub->regs + AST_VHUB_EP_ACK_ISR);
+	writel(VHUB_EP_IRQ_ALL, vhub->regs + AST_VHUB_EP_NACK_ISR);
+
+	/* Default settings for EP0, enable HW hub EP1 */
+	writel(0, vhub->regs + AST_VHUB_EP0_CTRL);
+	writel(VHUB_EP1_CTRL_RESET_TOGGLE |
+	       VHUB_EP1_CTRL_ENABLE,
+	       vhub->regs + AST_VHUB_EP1_CTRL);
+	writel(0, vhub->regs + AST_VHUB_EP1_STS_CHG);
+
+	/* Configure EP0 DMA buffer */
+	writel(vhub->ep0.buf_dma, vhub->regs + AST_VHUB_EP0_DATA);
+
+	/* Clear address */
+	writel(0, vhub->regs + AST_VHUB_CONF);
+
+	/* Pullup hub (activate on host) */
+	if (vhub->force_usb1)
+		ctrl |= VHUB_CTRL_FULL_SPEED_ONLY;
+
+	ctrl |= VHUB_CTRL_UPSTREAM_CONNECT;
+	writel(ctrl, vhub->regs + AST_VHUB_CTRL);
+
+	/* Enable some interrupts */
+	writel(VHUB_IRQ_HUB_EP0_IN_ACK_STALL |
+	       VHUB_IRQ_HUB_EP0_OUT_ACK_STALL |
+	       VHUB_IRQ_HUB_EP0_SETUP |
+	       VHUB_IRQ_EP_POOL_ACK_STALL |
+	       VHUB_IRQ_BUS_RESUME |
+	       VHUB_IRQ_BUS_SUSPEND |
+	       VHUB_IRQ_BUS_RESET,
+	       vhub->regs + AST_VHUB_IER);
+}
+
+static int ast_vhub_remove(struct platform_device *pdev)
+{
+	struct ast_vhub *vhub = platform_get_drvdata(pdev);
+	unsigned long flags;
+	int i;
+
+	if (!vhub || !vhub->regs)
+		return 0;
+
+	/* Remove devices */
+	for (i = 0; i < AST_VHUB_NUM_PORTS; i++)
+		ast_vhub_del_dev(&vhub->ports[i].dev);
+
+	spin_lock_irqsave(&vhub->lock, flags);
+
+	/* Mask & ack all interrupts  */
+	writel(0, vhub->regs + AST_VHUB_IER);
+	writel(VHUB_IRQ_ACK_ALL, vhub->regs + AST_VHUB_ISR);
+
+	/* Pull device, leave PHY enabled */
+	writel(VHUB_CTRL_PHY_CLK |
+	       VHUB_CTRL_PHY_RESET_DIS,
+	       vhub->regs + AST_VHUB_CTRL);
+
+	if (vhub->clk)
+		clk_disable_unprepare(vhub->clk);
+
+	spin_unlock_irqrestore(&vhub->lock, flags);
+
+	if (vhub->ep0_bufs)
+		dma_free_coherent(&pdev->dev,
+				  AST_VHUB_EP0_MAX_PACKET *
+				  (AST_VHUB_NUM_PORTS + 1),
+				  vhub->ep0_bufs,
+				  vhub->ep0_bufs_dma);
+	vhub->ep0_bufs = NULL;
+
+	return 0;
+}
+
+static int ast_vhub_probe(struct platform_device *pdev)
+{
+	enum usb_device_speed max_speed;
+	struct ast_vhub *vhub;
+	struct resource *res;
+	int i, rc = 0;
+
+	vhub = devm_kzalloc(&pdev->dev, sizeof(*vhub), GFP_KERNEL);
+	if (!vhub)
+		return -ENOMEM;
+
+	spin_lock_init(&vhub->lock);
+	vhub->pdev = pdev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	vhub->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(vhub->regs)) {
+		dev_err(&pdev->dev, "Failed to map resources\n");
+		return PTR_ERR(vhub->regs);
+	}
+	UDCDBG(vhub, "vHub@%pR mapped @%p\n", res, vhub->regs);
+
+	platform_set_drvdata(pdev, vhub);
+
+	vhub->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(vhub->clk)) {
+		rc = PTR_ERR(vhub->clk);
+		goto err;
+	}
+	rc = clk_prepare_enable(vhub->clk);
+	if (rc) {
+		dev_err(&pdev->dev, "Error couldn't enable clock (%d)\n", rc);
+		goto err;
+	}
+
+	/* Check if we need to limit the HW to USB1 */
+	max_speed = usb_get_maximum_speed(&pdev->dev);
+	if (max_speed != USB_SPEED_UNKNOWN && max_speed < USB_SPEED_HIGH)
+		vhub->force_usb1 = true;
+
+	/* Mask & ack all interrupts before installing the handler */
+	writel(0, vhub->regs + AST_VHUB_IER);
+	writel(VHUB_IRQ_ACK_ALL, vhub->regs + AST_VHUB_ISR);
+
+	/* Find interrupt and install handler */
+	vhub->irq = platform_get_irq(pdev, 0);
+	if (vhub->irq < 0) {
+		dev_err(&pdev->dev, "Failed to get interrupt\n");
+		rc = vhub->irq;
+		goto err;
+	}
+	rc = devm_request_irq(&pdev->dev, vhub->irq, ast_vhub_irq, 0,
+			      KBUILD_MODNAME, vhub);
+	if (rc) {
+		dev_err(&pdev->dev, "Failed to request interrupt\n");
+		goto err;
+	}
+
+	/*
+	 * Allocate DMA buffers for all EP0s in one chunk,
+	 * one per port and one for the vHub itself
+	 */
+	vhub->ep0_bufs = dma_alloc_coherent(&pdev->dev,
+					    AST_VHUB_EP0_MAX_PACKET *
+					    (AST_VHUB_NUM_PORTS + 1),
+					    &vhub->ep0_bufs_dma, GFP_KERNEL);
+	if (!vhub->ep0_bufs) {
+		dev_err(&pdev->dev, "Failed to allocate EP0 DMA buffers\n");
+		rc = -ENOMEM;
+		goto err;
+	}
+	UDCVDBG(vhub, "EP0 DMA buffers @%p (DMA 0x%08x)\n",
+		vhub->ep0_bufs, (u32)vhub->ep0_bufs_dma);
+
+	/* Init vHub EP0 */
+	ast_vhub_init_ep0(vhub, &vhub->ep0, NULL);
+
+	/* Init devices */
+	for (i = 0; i < AST_VHUB_NUM_PORTS && rc == 0; i++)
+		rc = ast_vhub_init_dev(vhub, i);
+	if (rc)
+		goto err;
+
+	/* Init hub emulation */
+	ast_vhub_init_hub(vhub);
+
+	/* Initialize HW */
+	ast_vhub_init_hw(vhub);
+
+	dev_info(&pdev->dev, "Initialized virtual hub in USB%d mode\n",
+		 vhub->force_usb1 ? 1 : 2);
+
+	return 0;
+ err:
+	ast_vhub_remove(pdev);
+	return rc;
+}
+
+static const struct of_device_id ast_vhub_dt_ids[] = {
+	{
+		.compatible = "aspeed,ast2400-usb-vhub",
+	},
+	{
+		.compatible = "aspeed,ast2500-usb-vhub",
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ast_vhub_dt_ids);
+
+static struct platform_driver ast_vhub_driver = {
+	.probe		= ast_vhub_probe,
+	.remove		= ast_vhub_remove,
+	.driver		= {
+		.name	= KBUILD_MODNAME,
+		.of_match_table	= ast_vhub_dt_ids,
+	},
+};
+module_platform_driver(ast_vhub_driver);
+
+MODULE_DESCRIPTION("Aspeed vHub udc driver");
+MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/gadget/udc/aspeed-vhub/dev.c b/drivers/usb/gadget/udc/aspeed-vhub/dev.c
new file mode 100644
index 0000000..f0233912
--- /dev/null
+++ b/drivers/usb/gadget/udc/aspeed-vhub/dev.c
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * aspeed-vhub -- Driver for Aspeed SoC "vHub" USB gadget
+ *
+ * dev.c - Individual device/gadget management (ie, a port = a gadget)
+ *
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/prefetch.h>
+#include <linux/clk.h>
+#include <linux/usb/gadget.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/dma-mapping.h>
+#include <linux/usb.h>
+#include <linux/usb/hcd.h>
+
+#include "vhub.h"
+
+void ast_vhub_dev_irq(struct ast_vhub_dev *d)
+{
+	u32 istat = readl(d->regs + AST_VHUB_DEV_ISR);
+
+	writel(istat, d->regs + AST_VHUB_DEV_ISR);
+
+	if (istat & VHUV_DEV_IRQ_EP0_IN_ACK_STALL)
+		ast_vhub_ep0_handle_ack(&d->ep0, true);
+	if (istat & VHUV_DEV_IRQ_EP0_OUT_ACK_STALL)
+		ast_vhub_ep0_handle_ack(&d->ep0, false);
+	if (istat & VHUV_DEV_IRQ_EP0_SETUP)
+		ast_vhub_ep0_handle_setup(&d->ep0);
+}
+
+static void ast_vhub_dev_enable(struct ast_vhub_dev *d)
+{
+	u32 reg, hmsk;
+
+	if (d->enabled)
+		return;
+
+	/* Enable device and its EP0 interrupts */
+	reg = VHUB_DEV_EN_ENABLE_PORT |
+		VHUB_DEV_EN_EP0_IN_ACK_IRQEN |
+		VHUB_DEV_EN_EP0_OUT_ACK_IRQEN |
+		VHUB_DEV_EN_EP0_SETUP_IRQEN;
+	if (d->gadget.speed == USB_SPEED_HIGH)
+		reg |= VHUB_DEV_EN_SPEED_SEL_HIGH;
+	writel(reg, d->regs + AST_VHUB_DEV_EN_CTRL);
+
+	/* Enable device interrupt in the hub as well */
+	hmsk = VHUB_IRQ_DEVICE1 << d->index;
+	reg = readl(d->vhub->regs + AST_VHUB_IER);
+	reg |= hmsk;
+	writel(reg, d->vhub->regs + AST_VHUB_IER);
+
+	/* Set EP0 DMA buffer address */
+	writel(d->ep0.buf_dma, d->regs + AST_VHUB_DEV_EP0_DATA);
+
+	d->enabled = true;
+}
+
+static void ast_vhub_dev_disable(struct ast_vhub_dev *d)
+{
+	u32 reg, hmsk;
+
+	if (!d->enabled)
+		return;
+
+	/* Disable device interrupt in the hub */
+	hmsk = VHUB_IRQ_DEVICE1 << d->index;
+	reg = readl(d->vhub->regs + AST_VHUB_IER);
+	reg &= ~hmsk;
+	writel(reg, d->vhub->regs + AST_VHUB_IER);
+
+	/* Then disable device */
+	writel(0, d->regs + AST_VHUB_DEV_EN_CTRL);
+	d->gadget.speed = USB_SPEED_UNKNOWN;
+	d->enabled = false;
+	d->suspended = false;
+}
+
+static int ast_vhub_dev_feature(struct ast_vhub_dev *d,
+				u16 wIndex, u16 wValue,
+				bool is_set)
+{
+	DDBG(d, "%s_FEATURE(dev val=%02x)\n",
+	     is_set ? "SET" : "CLEAR", wValue);
+
+	if (wValue != USB_DEVICE_REMOTE_WAKEUP)
+		return std_req_driver;
+
+	d->wakeup_en = is_set;
+
+	return std_req_complete;
+}
+
+static int ast_vhub_ep_feature(struct ast_vhub_dev *d,
+			       u16 wIndex, u16 wValue, bool is_set)
+{
+	struct ast_vhub_ep *ep;
+	int ep_num;
+
+	ep_num = wIndex & USB_ENDPOINT_NUMBER_MASK;
+	DDBG(d, "%s_FEATURE(ep%d val=%02x)\n",
+	     is_set ? "SET" : "CLEAR", ep_num, wValue);
+	if (ep_num == 0)
+		return std_req_complete;
+	if (ep_num >= AST_VHUB_NUM_GEN_EPs || !d->epns[ep_num - 1])
+		return std_req_stall;
+	if (wValue != USB_ENDPOINT_HALT)
+		return std_req_driver;
+
+	ep = d->epns[ep_num - 1];
+	if (WARN_ON(!ep))
+		return std_req_stall;
+
+	if (!ep->epn.enabled || !ep->ep.desc || ep->epn.is_iso ||
+	    ep->epn.is_in != !!(wIndex & USB_DIR_IN))
+		return std_req_stall;
+
+	DDBG(d, "%s stall on EP %d\n",
+	     is_set ? "setting" : "clearing", ep_num);
+	ep->epn.stalled = is_set;
+	ast_vhub_update_epn_stall(ep);
+
+	return std_req_complete;
+}
+
+static int ast_vhub_dev_status(struct ast_vhub_dev *d,
+			       u16 wIndex, u16 wValue)
+{
+	u8 st0;
+
+	DDBG(d, "GET_STATUS(dev)\n");
+
+	st0 = d->gadget.is_selfpowered << USB_DEVICE_SELF_POWERED;
+	if (d->wakeup_en)
+		st0 |= 1 << USB_DEVICE_REMOTE_WAKEUP;
+
+	return ast_vhub_simple_reply(&d->ep0, st0, 0);
+}
+
+static int ast_vhub_ep_status(struct ast_vhub_dev *d,
+			      u16 wIndex, u16 wValue)
+{
+	int ep_num = wIndex & USB_ENDPOINT_NUMBER_MASK;
+	struct ast_vhub_ep *ep;
+	u8 st0 = 0;
+
+	DDBG(d, "GET_STATUS(ep%d)\n", ep_num);
+
+	if (ep_num >= AST_VHUB_NUM_GEN_EPs)
+		return std_req_stall;
+	if (ep_num != 0) {
+		ep = d->epns[ep_num - 1];
+		if (!ep)
+			return std_req_stall;
+		if (!ep->epn.enabled || !ep->ep.desc || ep->epn.is_iso ||
+		    ep->epn.is_in != !!(wIndex & USB_DIR_IN))
+			return std_req_stall;
+		if (ep->epn.stalled)
+			st0 |= 1 << USB_ENDPOINT_HALT;
+	}
+
+	return ast_vhub_simple_reply(&d->ep0, st0, 0);
+}
+
+static void ast_vhub_dev_set_address(struct ast_vhub_dev *d, u8 addr)
+{
+	u32 reg;
+
+	DDBG(d, "SET_ADDRESS: Got address %x\n", addr);
+
+	reg = readl(d->regs + AST_VHUB_DEV_EN_CTRL);
+	reg &= ~VHUB_DEV_EN_ADDR_MASK;
+	reg |= VHUB_DEV_EN_SET_ADDR(addr);
+	writel(reg, d->regs + AST_VHUB_DEV_EN_CTRL);
+}
+
+int ast_vhub_std_dev_request(struct ast_vhub_ep *ep,
+			     struct usb_ctrlrequest *crq)
+{
+	struct ast_vhub_dev *d = ep->dev;
+	u16 wValue, wIndex;
+
+	/* No driver, we shouldn't be enabled ... */
+	if (!d->driver || !d->enabled || d->suspended) {
+		EPDBG(ep,
+		      "Device is wrong state driver=%p enabled=%d"
+		      " suspended=%d\n",
+		      d->driver, d->enabled, d->suspended);
+		return std_req_stall;
+	}
+
+	/* First packet, grab speed */
+	if (d->gadget.speed == USB_SPEED_UNKNOWN) {
+		d->gadget.speed = ep->vhub->speed;
+		if (d->gadget.speed > d->driver->max_speed)
+			d->gadget.speed = d->driver->max_speed;
+		DDBG(d, "fist packet, captured speed %d\n",
+		     d->gadget.speed);
+	}
+
+	wValue = le16_to_cpu(crq->wValue);
+	wIndex = le16_to_cpu(crq->wIndex);
+
+	switch ((crq->bRequestType << 8) | crq->bRequest) {
+		/* SET_ADDRESS */
+	case DeviceOutRequest | USB_REQ_SET_ADDRESS:
+		ast_vhub_dev_set_address(d, wValue);
+		return std_req_complete;
+
+		/* GET_STATUS */
+	case DeviceRequest | USB_REQ_GET_STATUS:
+		return ast_vhub_dev_status(d, wIndex, wValue);
+	case InterfaceRequest | USB_REQ_GET_STATUS:
+		return ast_vhub_simple_reply(ep, 0, 0);
+	case EndpointRequest | USB_REQ_GET_STATUS:
+		return ast_vhub_ep_status(d, wIndex, wValue);
+
+		/* SET/CLEAR_FEATURE */
+	case DeviceOutRequest | USB_REQ_SET_FEATURE:
+		return ast_vhub_dev_feature(d, wIndex, wValue, true);
+	case DeviceOutRequest | USB_REQ_CLEAR_FEATURE:
+		return ast_vhub_dev_feature(d, wIndex, wValue, false);
+	case EndpointOutRequest | USB_REQ_SET_FEATURE:
+		return ast_vhub_ep_feature(d, wIndex, wValue, true);
+	case EndpointOutRequest | USB_REQ_CLEAR_FEATURE:
+		return ast_vhub_ep_feature(d, wIndex, wValue, false);
+	}
+	return std_req_driver;
+}
+
+static int ast_vhub_udc_wakeup(struct usb_gadget* gadget)
+{
+	struct ast_vhub_dev *d = to_ast_dev(gadget);
+	unsigned long flags;
+	int rc = -EINVAL;
+
+	spin_lock_irqsave(&d->vhub->lock, flags);
+	if (!d->wakeup_en)
+		goto err;
+
+	DDBG(d, "Device initiated wakeup\n");
+
+	/* Wakeup the host */
+	ast_vhub_hub_wake_all(d->vhub);
+	rc = 0;
+ err:
+	spin_unlock_irqrestore(&d->vhub->lock, flags);
+	return rc;
+}
+
+static int ast_vhub_udc_get_frame(struct usb_gadget* gadget)
+{
+	struct ast_vhub_dev *d = to_ast_dev(gadget);
+
+	return (readl(d->vhub->regs + AST_VHUB_USBSTS) >> 16) & 0x7ff;
+}
+
+static void ast_vhub_dev_nuke(struct ast_vhub_dev *d)
+{
+	unsigned int i;
+
+	for (i = 0; i < AST_VHUB_NUM_GEN_EPs; i++) {
+		if (!d->epns[i])
+			continue;
+		ast_vhub_nuke(d->epns[i], -ESHUTDOWN);
+	}
+}
+
+static int ast_vhub_udc_pullup(struct usb_gadget* gadget, int on)
+{
+	struct ast_vhub_dev *d = to_ast_dev(gadget);
+	unsigned long flags;
+
+	spin_lock_irqsave(&d->vhub->lock, flags);
+
+	DDBG(d, "pullup(%d)\n", on);
+
+	/* Mark disconnected in the hub */
+	ast_vhub_device_connect(d->vhub, d->index, on);
+
+	/*
+	 * If enabled, nuke all requests if any (there shouldn't be)
+	 * and disable the port. This will clear the address too.
+	 */
+	if (d->enabled) {
+		ast_vhub_dev_nuke(d);
+		ast_vhub_dev_disable(d);
+	}
+
+	spin_unlock_irqrestore(&d->vhub->lock, flags);
+
+	return 0;
+}
+
+static int ast_vhub_udc_start(struct usb_gadget *gadget,
+			      struct usb_gadget_driver *driver)
+{
+	struct ast_vhub_dev *d = to_ast_dev(gadget);
+	unsigned long flags;
+
+	spin_lock_irqsave(&d->vhub->lock, flags);
+
+	DDBG(d, "start\n");
+
+	/* We don't do much more until the hub enables us */
+	d->driver = driver;
+	d->gadget.is_selfpowered = 1;
+
+	spin_unlock_irqrestore(&d->vhub->lock, flags);
+
+	return 0;
+}
+
+static struct usb_ep *ast_vhub_udc_match_ep(struct usb_gadget *gadget,
+					    struct usb_endpoint_descriptor *desc,
+					    struct usb_ss_ep_comp_descriptor *ss)
+{
+	struct ast_vhub_dev *d = to_ast_dev(gadget);
+	struct ast_vhub_ep *ep;
+	struct usb_ep *u_ep;
+	unsigned int max, addr, i;
+
+	DDBG(d, "Match EP type %d\n", usb_endpoint_type(desc));
+
+	/*
+	 * First we need to look for an existing unclaimed EP as another
+	 * configuration may have already associated a bunch of EPs with
+	 * this gadget. This duplicates the code in usb_ep_autoconfig_ss()
+	 * unfortunately.
+	 */
+	list_for_each_entry(u_ep, &gadget->ep_list, ep_list) {
+		if (usb_gadget_ep_match_desc(gadget, u_ep, desc, ss)) {
+			DDBG(d, " -> using existing EP%d\n",
+			     to_ast_ep(u_ep)->d_idx);
+			return u_ep;
+		}
+	}
+
+	/*
+	 * We didn't find one, we need to grab one from the pool.
+	 *
+	 * First let's do some sanity checking
+	 */
+	switch(usb_endpoint_type(desc)) {
+	case USB_ENDPOINT_XFER_CONTROL:
+		/* Only EP0 can be a control endpoint */
+		return NULL;
+	case USB_ENDPOINT_XFER_ISOC:
+		/* ISO:	 limit 1023 bytes full speed, 1024 high/super speed */
+		if (gadget_is_dualspeed(gadget))
+			max = 1024;
+		else
+			max = 1023;
+		break;
+	case USB_ENDPOINT_XFER_BULK:
+		if (gadget_is_dualspeed(gadget))
+			max = 512;
+		else
+			max = 64;
+		break;
+	case USB_ENDPOINT_XFER_INT:
+		if (gadget_is_dualspeed(gadget))
+			max = 1024;
+		else
+			max = 64;
+		break;
+	}
+	if (usb_endpoint_maxp(desc) > max)
+		return NULL;
+
+	/*
+	 * Find a free EP address for that device. We can't
+	 * let the generic code assign these as it would
+	 * create overlapping numbers for IN and OUT which
+	 * we don't support, so also create a suitable name
+	 * that will allow the generic code to use our
+	 * assigned address.
+	 */
+	for (i = 0; i < AST_VHUB_NUM_GEN_EPs; i++)
+		if (d->epns[i] == NULL)
+			break;
+	if (i >= AST_VHUB_NUM_GEN_EPs)
+		return NULL;
+	addr = i + 1;
+
+	/*
+	 * Now grab an EP from the shared pool and associate
+	 * it with our device
+	 */
+	ep = ast_vhub_alloc_epn(d, addr);
+	if (!ep)
+		return NULL;
+	DDBG(d, "Allocated epn#%d for port EP%d\n",
+	     ep->epn.g_idx, addr);
+
+	return &ep->ep;
+}
+
+static int ast_vhub_udc_stop(struct usb_gadget *gadget)
+{
+	struct ast_vhub_dev *d = to_ast_dev(gadget);
+	unsigned long flags;
+
+	spin_lock_irqsave(&d->vhub->lock, flags);
+
+	DDBG(d, "stop\n");
+
+	d->driver = NULL;
+	d->gadget.speed = USB_SPEED_UNKNOWN;
+
+	ast_vhub_dev_nuke(d);
+
+	if (d->enabled)
+		ast_vhub_dev_disable(d);
+
+	spin_unlock_irqrestore(&d->vhub->lock, flags);
+
+	return 0;
+}
+
+static struct usb_gadget_ops ast_vhub_udc_ops = {
+	.get_frame	= ast_vhub_udc_get_frame,
+	.wakeup		= ast_vhub_udc_wakeup,
+	.pullup		= ast_vhub_udc_pullup,
+	.udc_start	= ast_vhub_udc_start,
+	.udc_stop	= ast_vhub_udc_stop,
+	.match_ep	= ast_vhub_udc_match_ep,
+};
+
+void ast_vhub_dev_suspend(struct ast_vhub_dev *d)
+{
+	d->suspended = true;
+	if (d->driver) {
+		spin_unlock(&d->vhub->lock);
+		d->driver->suspend(&d->gadget);
+		spin_lock(&d->vhub->lock);
+	}
+}
+
+void ast_vhub_dev_resume(struct ast_vhub_dev *d)
+{
+	d->suspended = false;
+	if (d->driver) {
+		spin_unlock(&d->vhub->lock);
+		d->driver->resume(&d->gadget);
+		spin_lock(&d->vhub->lock);
+	}
+}
+
+void ast_vhub_dev_reset(struct ast_vhub_dev *d)
+{
+	/*
+	 * If speed is not set, we enable the port. If it is,
+	 * send reset to the gadget and reset "speed".
+	 *
+	 * Speed is an indication that we have got the first
+	 * setup packet to the device.
+	 */
+	if (d->gadget.speed == USB_SPEED_UNKNOWN && !d->enabled) {
+		DDBG(d, "Reset at unknown speed of disabled device, enabling...\n");
+		ast_vhub_dev_enable(d);
+		d->suspended = false;
+	}
+	if (d->gadget.speed != USB_SPEED_UNKNOWN && d->driver) {
+		unsigned int i;
+
+		DDBG(d, "Reset at known speed of bound device, resetting...\n");
+		spin_unlock(&d->vhub->lock);
+		d->driver->reset(&d->gadget);
+		spin_lock(&d->vhub->lock);
+
+		/*
+		 * Disable/re-enable HW, this will clear the address
+		 * and speed setting.
+		 */
+		ast_vhub_dev_disable(d);
+		ast_vhub_dev_enable(d);
+
+		/* Clear stall on all EPs */
+		for (i = 0; i < AST_VHUB_NUM_GEN_EPs; i++) {
+			struct ast_vhub_ep *ep = d->epns[i];
+
+			if (ep && ep->epn.stalled) {
+				ep->epn.stalled = false;
+				ast_vhub_update_epn_stall(ep);
+			}
+		}
+
+		/* Additional cleanups */
+		d->wakeup_en = false;
+		d->suspended = false;
+	}
+}
+
+void ast_vhub_del_dev(struct ast_vhub_dev *d)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&d->vhub->lock, flags);
+	if (!d->registered) {
+		spin_unlock_irqrestore(&d->vhub->lock, flags);
+		return;
+	}
+	d->registered = false;
+	spin_unlock_irqrestore(&d->vhub->lock, flags);
+
+	usb_del_gadget_udc(&d->gadget);
+	device_unregister(d->port_dev);
+}
+
+static void ast_vhub_dev_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+int ast_vhub_init_dev(struct ast_vhub *vhub, unsigned int idx)
+{
+	struct ast_vhub_dev *d = &vhub->ports[idx].dev;
+	struct device *parent = &vhub->pdev->dev;
+	int rc;
+
+	d->vhub = vhub;
+	d->index = idx;
+	d->name = devm_kasprintf(parent, GFP_KERNEL, "port%d", idx+1);
+	d->regs = vhub->regs + 0x100 + 0x10 * idx;
+
+	ast_vhub_init_ep0(vhub, &d->ep0, d);
+
+	/*
+	 * The UDC core really needs us to have separate and uniquely
+	 * named "parent" devices for each port so we create a sub device
+	 * here for that purpose
+	 */
+	d->port_dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+	if (!d->port_dev)
+		return -ENOMEM;
+	device_initialize(d->port_dev);
+	d->port_dev->release = ast_vhub_dev_release;
+	d->port_dev->parent = parent;
+	dev_set_name(d->port_dev, "%s:p%d", dev_name(parent), idx + 1);
+	rc = device_add(d->port_dev);
+	if (rc)
+		goto fail_add;
+
+	/* Populate gadget */
+	INIT_LIST_HEAD(&d->gadget.ep_list);
+	d->gadget.ops = &ast_vhub_udc_ops;
+	d->gadget.ep0 = &d->ep0.ep;
+	d->gadget.name = KBUILD_MODNAME;
+	if (vhub->force_usb1)
+		d->gadget.max_speed = USB_SPEED_FULL;
+	else
+		d->gadget.max_speed = USB_SPEED_HIGH;
+	d->gadget.speed = USB_SPEED_UNKNOWN;
+	d->gadget.dev.of_node = vhub->pdev->dev.of_node;
+
+	rc = usb_add_gadget_udc(d->port_dev, &d->gadget);
+	if (rc != 0)
+		goto fail_udc;
+	d->registered = true;
+
+	return 0;
+ fail_udc:
+	device_del(d->port_dev);
+ fail_add:
+	put_device(d->port_dev);
+
+	return rc;
+}
diff --git a/drivers/usb/gadget/udc/aspeed-vhub/ep0.c b/drivers/usb/gadget/udc/aspeed-vhub/ep0.c
new file mode 100644
index 0000000..20ffb03
--- /dev/null
+++ b/drivers/usb/gadget/udc/aspeed-vhub/ep0.c
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * aspeed-vhub -- Driver for Aspeed SoC "vHub" USB gadget
+ *
+ * ep0.c - Endpoint 0 handling
+ *
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/prefetch.h>
+#include <linux/clk.h>
+#include <linux/usb/gadget.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/dma-mapping.h>
+
+#include "vhub.h"
+
+int ast_vhub_reply(struct ast_vhub_ep *ep, char *ptr, int len)
+{
+	struct usb_request *req = &ep->ep0.req.req;
+	int rc;
+
+	if (WARN_ON(ep->d_idx != 0))
+		return std_req_stall;
+	if (WARN_ON(!ep->ep0.dir_in))
+		return std_req_stall;
+	if (WARN_ON(len > AST_VHUB_EP0_MAX_PACKET))
+		return std_req_stall;
+	if (WARN_ON(req->status == -EINPROGRESS))
+		return std_req_stall;
+
+	req->buf = ptr;
+	req->length = len;
+	req->complete = NULL;
+	req->zero = true;
+
+	/*
+	 * Call internal queue directly after dropping the lock. This is
+	 * safe to do as the reply is always the last thing done when
+	 * processing a SETUP packet, usually as a tail call
+	 */
+	spin_unlock(&ep->vhub->lock);
+	if (ep->ep.ops->queue(&ep->ep, req, GFP_ATOMIC))
+		rc = std_req_stall;
+	else
+		rc = std_req_data;
+	spin_lock(&ep->vhub->lock);
+	return rc;
+}
+
+int __ast_vhub_simple_reply(struct ast_vhub_ep *ep, int len, ...)
+{
+	u8 *buffer = ep->buf;
+	unsigned int i;
+	va_list args;
+
+	va_start(args, len);
+
+	/* Copy data directly into EP buffer */
+	for (i = 0; i < len; i++)
+		buffer[i] = va_arg(args, int);
+	va_end(args);
+
+	/* req->buf NULL means data is already there */
+	return ast_vhub_reply(ep, NULL, len);
+}
+
+void ast_vhub_ep0_handle_setup(struct ast_vhub_ep *ep)
+{
+	struct usb_ctrlrequest crq;
+	enum std_req_rc std_req_rc;
+	int rc = -ENODEV;
+
+	if (WARN_ON(ep->d_idx != 0))
+		return;
+
+	/*
+	 * Grab the setup packet from the chip and byteswap
+	 * interesting fields
+	 */
+	memcpy_fromio(&crq, ep->ep0.setup, sizeof(crq));
+
+	EPDBG(ep, "SETUP packet %02x/%02x/%04x/%04x/%04x [%s] st=%d\n",
+	      crq.bRequestType, crq.bRequest,
+	       le16_to_cpu(crq.wValue),
+	       le16_to_cpu(crq.wIndex),
+	       le16_to_cpu(crq.wLength),
+	       (crq.bRequestType & USB_DIR_IN) ? "in" : "out",
+	       ep->ep0.state);
+
+	/* Check our state, cancel pending requests if needed */
+	if (ep->ep0.state != ep0_state_token) {
+		EPDBG(ep, "wrong state\n");
+		ast_vhub_nuke(ep, 0);
+		goto stall;
+	}
+
+	/* Calculate next state for EP0 */
+	ep->ep0.state = ep0_state_data;
+	ep->ep0.dir_in = !!(crq.bRequestType & USB_DIR_IN);
+
+	/* If this is the vHub, we handle requests differently */
+	std_req_rc = std_req_driver;
+	if (ep->dev == NULL) {
+		if ((crq.bRequestType & USB_TYPE_MASK) == USB_TYPE_STANDARD)
+			std_req_rc = ast_vhub_std_hub_request(ep, &crq);
+		else if ((crq.bRequestType & USB_TYPE_MASK) == USB_TYPE_CLASS)
+			std_req_rc = ast_vhub_class_hub_request(ep, &crq);
+		else
+			std_req_rc = std_req_stall;
+	} else if ((crq.bRequestType & USB_TYPE_MASK) == USB_TYPE_STANDARD)
+		std_req_rc = ast_vhub_std_dev_request(ep, &crq);
+
+	/* Act upon result */
+	switch(std_req_rc) {
+	case std_req_complete:
+		goto complete;
+	case std_req_stall:
+		goto stall;
+	case std_req_driver:
+		break;
+	case std_req_data:
+		return;
+	}
+
+	/* Pass request up to the gadget driver */
+	if (WARN_ON(!ep->dev))
+		goto stall;
+	if (ep->dev->driver) {
+		EPDBG(ep, "forwarding to gadget...\n");
+		spin_unlock(&ep->vhub->lock);
+		rc = ep->dev->driver->setup(&ep->dev->gadget, &crq);
+		spin_lock(&ep->vhub->lock);
+		EPDBG(ep, "driver returned %d\n", rc);
+	} else {
+		EPDBG(ep, "no gadget for request !\n");
+	}
+	if (rc >= 0)
+		return;
+
+ stall:
+	EPDBG(ep, "stalling\n");
+	writel(VHUB_EP0_CTRL_STALL, ep->ep0.ctlstat);
+	ep->ep0.state = ep0_state_status;
+	ep->ep0.dir_in = false;
+	return;
+
+ complete:
+	EPVDBG(ep, "sending [in] status with no data\n");
+	writel(VHUB_EP0_TX_BUFF_RDY, ep->ep0.ctlstat);
+	ep->ep0.state = ep0_state_status;
+	ep->ep0.dir_in = false;
+}
+
+
+static void ast_vhub_ep0_do_send(struct ast_vhub_ep *ep,
+				 struct ast_vhub_req *req)
+{
+	unsigned int chunk;
+	u32 reg;
+
+	/* If this is a 0-length request, it's the gadget trying to
+	 * send a status on our behalf. We take it from here.
+	 */
+	if (req->req.length == 0)
+		req->last_desc = 1;
+
+	/* Are we done ? Complete request, otherwise wait for next interrupt */
+	if (req->last_desc >= 0) {
+		EPVDBG(ep, "complete send %d/%d\n",
+		       req->req.actual, req->req.length);
+		ep->ep0.state = ep0_state_status;
+		writel(VHUB_EP0_RX_BUFF_RDY, ep->ep0.ctlstat);
+		ast_vhub_done(ep, req, 0);
+		return;
+	}
+
+	/*
+	 * Next chunk cropped to max packet size. Also check if this
+	 * is the last packet
+	 */
+	chunk = req->req.length - req->req.actual;
+	if (chunk > ep->ep.maxpacket)
+		chunk = ep->ep.maxpacket;
+	else if ((chunk < ep->ep.maxpacket) || !req->req.zero)
+		req->last_desc = 1;
+
+	EPVDBG(ep, "send chunk=%d last=%d, req->act=%d mp=%d\n",
+	       chunk, req->last_desc, req->req.actual, ep->ep.maxpacket);
+
+	/*
+	 * Copy data if any (internal requests already have data
+	 * in the EP buffer)
+	 */
+	if (chunk && req->req.buf)
+		memcpy(ep->buf, req->req.buf + req->req.actual, chunk);
+
+	/* Remember chunk size and trigger send */
+	reg = VHUB_EP0_SET_TX_LEN(chunk);
+	writel(reg, ep->ep0.ctlstat);
+	writel(reg | VHUB_EP0_TX_BUFF_RDY, ep->ep0.ctlstat);
+	req->req.actual += chunk;
+}
+
+static void ast_vhub_ep0_rx_prime(struct ast_vhub_ep *ep)
+{
+	EPVDBG(ep, "rx prime\n");
+
+	/* Prime endpoint for receiving data */
+	writel(VHUB_EP0_RX_BUFF_RDY, ep->ep0.ctlstat + AST_VHUB_EP0_CTRL);
+}
+
+static void ast_vhub_ep0_do_receive(struct ast_vhub_ep *ep, struct ast_vhub_req *req,
+				    unsigned int len)
+{
+	unsigned int remain;
+	int rc = 0;
+
+	/* We are receiving... grab request */
+	remain = req->req.length - req->req.actual;
+
+	EPVDBG(ep, "receive got=%d remain=%d\n", len, remain);
+
+	/* Are we getting more than asked ? */
+	if (len > remain) {
+		EPDBG(ep, "receiving too much (ovf: %d) !\n",
+		      len - remain);
+		len = remain;
+		rc = -EOVERFLOW;
+	}
+	if (len && req->req.buf)
+		memcpy(req->req.buf + req->req.actual, ep->buf, len);
+	req->req.actual += len;
+
+	/* Done ? */
+	if (len < ep->ep.maxpacket || len == remain) {
+		ep->ep0.state = ep0_state_status;
+		writel(VHUB_EP0_TX_BUFF_RDY, ep->ep0.ctlstat);
+		ast_vhub_done(ep, req, rc);
+	} else
+		ast_vhub_ep0_rx_prime(ep);
+}
+
+void ast_vhub_ep0_handle_ack(struct ast_vhub_ep *ep, bool in_ack)
+{
+	struct ast_vhub_req *req;
+	struct ast_vhub *vhub = ep->vhub;
+	struct device *dev = &vhub->pdev->dev;
+	bool stall = false;
+	u32 stat;
+
+	/* Read EP0 status */
+	stat = readl(ep->ep0.ctlstat);
+
+	/* Grab current request if any */
+	req = list_first_entry_or_null(&ep->queue, struct ast_vhub_req, queue);
+
+	EPVDBG(ep, "ACK status=%08x,state=%d is_in=%d in_ack=%d req=%p\n",
+		stat, ep->ep0.state, ep->ep0.dir_in, in_ack, req);
+
+	switch(ep->ep0.state) {
+	case ep0_state_token:
+		/* There should be no request queued in that state... */
+		if (req) {
+			dev_warn(dev, "request present while in TOKEN state\n");
+			ast_vhub_nuke(ep, -EINVAL);
+		}
+		dev_warn(dev, "ack while in TOKEN state\n");
+		stall = true;
+		break;
+	case ep0_state_data:
+		/* Check the state bits corresponding to our direction */
+		if ((ep->ep0.dir_in && (stat & VHUB_EP0_TX_BUFF_RDY)) ||
+		    (!ep->ep0.dir_in && (stat & VHUB_EP0_RX_BUFF_RDY)) ||
+		    (ep->ep0.dir_in != in_ack)) {
+			dev_warn(dev, "irq state mismatch");
+			stall = true;
+			break;
+		}
+		/*
+		 * We are in data phase and there's no request, something is
+		 * wrong, stall
+		 */
+		if (!req) {
+			dev_warn(dev, "data phase, no request\n");
+			stall = true;
+			break;
+		}
+
+		/* We have a request, handle data transfers */
+		if (ep->ep0.dir_in)
+			ast_vhub_ep0_do_send(ep, req);
+		else
+			ast_vhub_ep0_do_receive(ep, req, VHUB_EP0_RX_LEN(stat));
+		return;
+	case ep0_state_status:
+		/* Nuke stale requests */
+		if (req) {
+			dev_warn(dev, "request present while in STATUS state\n");
+			ast_vhub_nuke(ep, -EINVAL);
+		}
+
+		/*
+		 * If the status phase completes with the wrong ack, stall
+		 * the endpoint just in case, to abort whatever the host
+		 * was doing.
+		 */
+		if (ep->ep0.dir_in == in_ack) {
+			dev_warn(dev, "status direction mismatch\n");
+			stall = true;
+		}
+	}
+
+	/* Reset to token state */
+	ep->ep0.state = ep0_state_token;
+	if (stall)
+		writel(VHUB_EP0_CTRL_STALL, ep->ep0.ctlstat);
+}
+
+static int ast_vhub_ep0_queue(struct usb_ep* u_ep, struct usb_request *u_req,
+			      gfp_t gfp_flags)
+{
+	struct ast_vhub_req *req = to_ast_req(u_req);
+	struct ast_vhub_ep *ep = to_ast_ep(u_ep);
+	struct ast_vhub *vhub = ep->vhub;
+	struct device *dev = &vhub->pdev->dev;
+	unsigned long flags;
+
+	/* Paranoid cheks */
+	if (!u_req || (!u_req->complete && !req->internal)) {
+		dev_warn(dev, "Bogus EP0 request ! u_req=%p\n", u_req);
+		if (u_req) {
+			dev_warn(dev, "complete=%p internal=%d\n",
+				 u_req->complete, req->internal);
+		}
+		return -EINVAL;
+	}
+
+	/* Not endpoint 0 ? */
+	if (WARN_ON(ep->d_idx != 0))
+		return -EINVAL;
+
+	/* Disabled device */
+	if (ep->dev && (!ep->dev->enabled || ep->dev->suspended))
+		return -ESHUTDOWN;
+
+	/* Data, no buffer and not internal ? */
+	if (u_req->length && !u_req->buf && !req->internal) {
+		dev_warn(dev, "Request with no buffer !\n");
+		return -EINVAL;
+	}
+
+	EPVDBG(ep, "enqueue req @%p\n", req);
+	EPVDBG(ep, "  l=%d zero=%d noshort=%d is_in=%d\n",
+	       u_req->length, u_req->zero,
+	       u_req->short_not_ok, ep->ep0.dir_in);
+
+	/* Initialize request progress fields */
+	u_req->status = -EINPROGRESS;
+	u_req->actual = 0;
+	req->last_desc = -1;
+	req->active = false;
+
+	spin_lock_irqsave(&vhub->lock, flags);
+
+	/* EP0 can only support a single request at a time */
+	if (!list_empty(&ep->queue) || ep->ep0.state == ep0_state_token) {
+		dev_warn(dev, "EP0: Request in wrong state\n");
+		spin_unlock_irqrestore(&vhub->lock, flags);
+		return -EBUSY;
+	}
+
+	/* Add request to list and kick processing if empty */
+	list_add_tail(&req->queue, &ep->queue);
+
+	if (ep->ep0.dir_in) {
+		/* IN request, send data */
+		ast_vhub_ep0_do_send(ep, req);
+	} else if (u_req->length == 0) {
+		/* 0-len request, send completion as rx */
+		EPVDBG(ep, "0-length rx completion\n");
+		ep->ep0.state = ep0_state_status;
+		writel(VHUB_EP0_TX_BUFF_RDY, ep->ep0.ctlstat);
+		ast_vhub_done(ep, req, 0);
+	} else {
+		/* OUT request, start receiver */
+		ast_vhub_ep0_rx_prime(ep);
+	}
+
+	spin_unlock_irqrestore(&vhub->lock, flags);
+
+	return 0;
+}
+
+static int ast_vhub_ep0_dequeue(struct usb_ep* u_ep, struct usb_request *u_req)
+{
+	struct ast_vhub_ep *ep = to_ast_ep(u_ep);
+	struct ast_vhub *vhub = ep->vhub;
+	struct ast_vhub_req *req;
+	unsigned long flags;
+	int rc = -EINVAL;
+
+	spin_lock_irqsave(&vhub->lock, flags);
+
+	/* Only one request can be in the queue */
+	req = list_first_entry_or_null(&ep->queue, struct ast_vhub_req, queue);
+
+	/* Is it ours ? */
+	if (req && u_req == &req->req) {
+		EPVDBG(ep, "dequeue req @%p\n", req);
+
+		/*
+		 * We don't have to deal with "active" as all
+		 * DMAs go to the EP buffers, not the request.
+		 */
+		ast_vhub_done(ep, req, -ECONNRESET);
+
+		/* We do stall the EP to clean things up in HW */
+		writel(VHUB_EP0_CTRL_STALL, ep->ep0.ctlstat);
+		ep->ep0.state = ep0_state_status;
+		ep->ep0.dir_in = false;
+		rc = 0;
+	}
+	spin_unlock_irqrestore(&vhub->lock, flags);
+	return rc;
+}
+
+
+static const struct usb_ep_ops ast_vhub_ep0_ops = {
+	.queue		= ast_vhub_ep0_queue,
+	.dequeue	= ast_vhub_ep0_dequeue,
+	.alloc_request	= ast_vhub_alloc_request,
+	.free_request	= ast_vhub_free_request,
+};
+
+void ast_vhub_init_ep0(struct ast_vhub *vhub, struct ast_vhub_ep *ep,
+		       struct ast_vhub_dev *dev)
+{
+	memset(ep, 0, sizeof(*ep));
+
+	INIT_LIST_HEAD(&ep->ep.ep_list);
+	INIT_LIST_HEAD(&ep->queue);
+	ep->ep.ops = &ast_vhub_ep0_ops;
+	ep->ep.name = "ep0";
+	ep->ep.caps.type_control = true;
+	usb_ep_set_maxpacket_limit(&ep->ep, AST_VHUB_EP0_MAX_PACKET);
+	ep->d_idx = 0;
+	ep->dev = dev;
+	ep->vhub = vhub;
+	ep->ep0.state = ep0_state_token;
+	INIT_LIST_HEAD(&ep->ep0.req.queue);
+	ep->ep0.req.internal = true;
+
+	/* Small difference between vHub and devices */
+	if (dev) {
+		ep->ep0.ctlstat = dev->regs + AST_VHUB_DEV_EP0_CTRL;
+		ep->ep0.setup = vhub->regs +
+			AST_VHUB_SETUP0 + 8 * (dev->index + 1);
+		ep->buf = vhub->ep0_bufs +
+			AST_VHUB_EP0_MAX_PACKET * (dev->index + 1);
+		ep->buf_dma = vhub->ep0_bufs_dma +
+			AST_VHUB_EP0_MAX_PACKET * (dev->index + 1);
+	} else {
+		ep->ep0.ctlstat = vhub->regs + AST_VHUB_EP0_CTRL;
+		ep->ep0.setup = vhub->regs + AST_VHUB_SETUP0;
+		ep->buf = vhub->ep0_bufs;
+		ep->buf_dma = vhub->ep0_bufs_dma;
+	}
+}
diff --git a/drivers/usb/gadget/udc/aspeed-vhub/epn.c b/drivers/usb/gadget/udc/aspeed-vhub/epn.c
new file mode 100644
index 0000000..80c9fea
--- /dev/null
+++ b/drivers/usb/gadget/udc/aspeed-vhub/epn.c
@@ -0,0 +1,843 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * aspeed-vhub -- Driver for Aspeed SoC "vHub" USB gadget
+ *
+ * epn.c - Generic endpoints management
+ *
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/prefetch.h>
+#include <linux/clk.h>
+#include <linux/usb/gadget.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/dma-mapping.h>
+
+#include "vhub.h"
+
+#define EXTRA_CHECKS
+
+#ifdef EXTRA_CHECKS
+#define CHECK(ep, expr, fmt...)					\
+	do {							\
+		if (!(expr)) EPDBG(ep, "CHECK:" fmt);		\
+	} while(0)
+#else
+#define CHECK(ep, expr, fmt...)	do { } while(0)
+#endif
+
+static void ast_vhub_epn_kick(struct ast_vhub_ep *ep, struct ast_vhub_req *req)
+{
+	unsigned int act = req->req.actual;
+	unsigned int len = req->req.length;
+	unsigned int chunk;
+
+	/* There should be no DMA ongoing */
+	WARN_ON(req->active);
+
+	/* Calculate next chunk size */
+	chunk = len - act;
+	if (chunk > ep->ep.maxpacket)
+		chunk = ep->ep.maxpacket;
+	else if ((chunk < ep->ep.maxpacket) || !req->req.zero)
+		req->last_desc = 1;
+
+	EPVDBG(ep, "kick req %p act=%d/%d chunk=%d last=%d\n",
+	       req, act, len, chunk, req->last_desc);
+
+	/* If DMA unavailable, using staging EP buffer */
+	if (!req->req.dma) {
+
+		/* For IN transfers, copy data over first */
+		if (ep->epn.is_in)
+			memcpy(ep->buf, req->req.buf + act, chunk);
+		writel(ep->buf_dma, ep->epn.regs + AST_VHUB_EP_DESC_BASE);
+	} else
+		writel(req->req.dma + act, ep->epn.regs + AST_VHUB_EP_DESC_BASE);
+
+	/* Start DMA */
+	req->active = true;
+	writel(VHUB_EP_DMA_SET_TX_SIZE(chunk),
+	       ep->epn.regs + AST_VHUB_EP_DESC_STATUS);
+	writel(VHUB_EP_DMA_SET_TX_SIZE(chunk) | VHUB_EP_DMA_SINGLE_KICK,
+	       ep->epn.regs + AST_VHUB_EP_DESC_STATUS);
+}
+
+static void ast_vhub_epn_handle_ack(struct ast_vhub_ep *ep)
+{
+	struct ast_vhub_req *req;
+	unsigned int len;
+	u32 stat;
+
+	/* Read EP status */
+	stat = readl(ep->epn.regs + AST_VHUB_EP_DESC_STATUS);
+
+	/* Grab current request if any */
+	req = list_first_entry_or_null(&ep->queue, struct ast_vhub_req, queue);
+
+	EPVDBG(ep, "ACK status=%08x is_in=%d, req=%p (active=%d)\n",
+	       stat, ep->epn.is_in, req, req ? req->active : 0);
+
+	/* In absence of a request, bail out, must have been dequeued */
+	if (!req)
+		return;
+
+	/*
+	 * Request not active, move on to processing queue, active request
+	 * was probably dequeued
+	 */
+	if (!req->active)
+		goto next_chunk;
+
+	/* Check if HW has moved on */
+	if (VHUB_EP_DMA_RPTR(stat) != 0) {
+		EPDBG(ep, "DMA read pointer not 0 !\n");
+		return;
+	}
+
+	/* No current DMA ongoing */
+	req->active = false;
+
+	/* Grab lenght out of HW */
+	len = VHUB_EP_DMA_TX_SIZE(stat);
+
+	/* If not using DMA, copy data out if needed */
+	if (!req->req.dma && !ep->epn.is_in && len)
+		memcpy(req->req.buf + req->req.actual, ep->buf, len);
+
+	/* Adjust size */
+	req->req.actual += len;
+
+	/* Check for short packet */
+	if (len < ep->ep.maxpacket)
+		req->last_desc = 1;
+
+	/* That's it ? complete the request and pick a new one */
+	if (req->last_desc >= 0) {
+		ast_vhub_done(ep, req, 0);
+		req = list_first_entry_or_null(&ep->queue, struct ast_vhub_req,
+					       queue);
+
+		/*
+		 * Due to lock dropping inside "done" the next request could
+		 * already be active, so check for that and bail if needed.
+		 */
+		if (!req || req->active)
+			return;
+	}
+
+ next_chunk:
+	ast_vhub_epn_kick(ep, req);
+}
+
+static inline unsigned int ast_vhub_count_free_descs(struct ast_vhub_ep *ep)
+{
+	/*
+	 * d_next == d_last means descriptor list empty to HW,
+	 * thus we can only have AST_VHUB_DESCS_COUNT-1 descriptors
+	 * in the list
+	 */
+	return (ep->epn.d_last + AST_VHUB_DESCS_COUNT - ep->epn.d_next - 1) &
+		(AST_VHUB_DESCS_COUNT - 1);
+}
+
+static void ast_vhub_epn_kick_desc(struct ast_vhub_ep *ep,
+				   struct ast_vhub_req *req)
+{
+	unsigned int act = req->act_count;
+	unsigned int len = req->req.length;
+	unsigned int chunk;
+
+	/* Mark request active if not already */
+	req->active = true;
+
+	/* If the request was already completely written, do nothing */
+	if (req->last_desc >= 0)
+		return;
+
+	EPVDBG(ep, "kick act=%d/%d chunk_max=%d free_descs=%d\n",
+	       act, len, ep->epn.chunk_max, ast_vhub_count_free_descs(ep));
+
+	/* While we can create descriptors */
+	while (ast_vhub_count_free_descs(ep) && req->last_desc < 0) {
+		struct ast_vhub_desc *desc;
+		unsigned int d_num;
+
+		/* Grab next free descriptor */
+		d_num = ep->epn.d_next;
+		desc = &ep->epn.descs[d_num];
+		ep->epn.d_next = (d_num + 1) & (AST_VHUB_DESCS_COUNT - 1);
+
+		/* Calculate next chunk size */
+		chunk = len - act;
+		if (chunk <= ep->epn.chunk_max) {
+			/*
+			 * Is this the last packet ? Because of having up to 8
+			 * packets in a descriptor we can't just compare "chunk"
+			 * with ep.maxpacket. We have to see if it's a multiple
+			 * of it to know if we have to send a zero packet.
+			 * Sadly that involves a modulo which is a bit expensive
+			 * but probably still better than not doing it.
+			 */
+			if (!chunk || !req->req.zero || (chunk % ep->ep.maxpacket) != 0)
+				req->last_desc = d_num;
+		} else {
+			chunk = ep->epn.chunk_max;
+		}
+
+		EPVDBG(ep, " chunk: act=%d/%d chunk=%d last=%d desc=%d free=%d\n",
+		       act, len, chunk, req->last_desc, d_num,
+		       ast_vhub_count_free_descs(ep));
+
+		/* Populate descriptor */
+		desc->w0 = cpu_to_le32(req->req.dma + act);
+
+		/* Interrupt if end of request or no more descriptors */
+
+		/*
+		 * TODO: Be smarter about it, if we don't have enough
+		 * descriptors request an interrupt before queue empty
+		 * or so in order to be able to populate more before
+		 * the HW runs out. This isn't a problem at the moment
+		 * as we use 256 descriptors and only put at most one
+		 * request in the ring.
+		 */
+		desc->w1 = cpu_to_le32(VHUB_DSC1_IN_SET_LEN(chunk));
+		if (req->last_desc >= 0 || !ast_vhub_count_free_descs(ep))
+			desc->w1 |= cpu_to_le32(VHUB_DSC1_IN_INTERRUPT);
+
+		/* Account packet */
+		req->act_count = act = act + chunk;
+	}
+
+	/* Tell HW about new descriptors */
+	writel(VHUB_EP_DMA_SET_CPU_WPTR(ep->epn.d_next),
+	       ep->epn.regs + AST_VHUB_EP_DESC_STATUS);
+
+	EPVDBG(ep, "HW kicked, d_next=%d dstat=%08x\n",
+	       ep->epn.d_next, readl(ep->epn.regs + AST_VHUB_EP_DESC_STATUS));
+}
+
+static void ast_vhub_epn_handle_ack_desc(struct ast_vhub_ep *ep)
+{
+	struct ast_vhub_req *req;
+	unsigned int len, d_last;
+	u32 stat, stat1;
+
+	/* Read EP status, workaround HW race */
+	do {
+		stat = readl(ep->epn.regs + AST_VHUB_EP_DESC_STATUS);
+		stat1 = readl(ep->epn.regs + AST_VHUB_EP_DESC_STATUS);
+	} while(stat != stat1);
+
+	/* Extract RPTR */
+	d_last = VHUB_EP_DMA_RPTR(stat);
+
+	/* Grab current request if any */
+	req = list_first_entry_or_null(&ep->queue, struct ast_vhub_req, queue);
+
+	EPVDBG(ep, "ACK status=%08x is_in=%d ep->d_last=%d..%d\n",
+	       stat, ep->epn.is_in, ep->epn.d_last, d_last);
+
+	/* Check all completed descriptors */
+	while (ep->epn.d_last != d_last) {
+		struct ast_vhub_desc *desc;
+		unsigned int d_num;
+		bool is_last_desc;
+
+		/* Grab next completed descriptor */
+		d_num = ep->epn.d_last;
+		desc = &ep->epn.descs[d_num];
+		ep->epn.d_last = (d_num + 1) & (AST_VHUB_DESCS_COUNT - 1);
+
+		/* Grab len out of descriptor */
+		len = VHUB_DSC1_IN_LEN(le32_to_cpu(desc->w1));
+
+		EPVDBG(ep, " desc %d len=%d req=%p (act=%d)\n",
+		       d_num, len, req, req ? req->active : 0);
+
+		/* If no active request pending, move on */
+		if (!req || !req->active)
+			continue;
+
+		/* Adjust size */
+		req->req.actual += len;
+
+		/* Is that the last chunk ? */
+		is_last_desc = req->last_desc == d_num;
+		CHECK(ep, is_last_desc == (len < ep->ep.maxpacket ||
+					   (req->req.actual >= req->req.length &&
+					    !req->req.zero)),
+		      "Last packet discrepancy: last_desc=%d len=%d r.act=%d "
+		      "r.len=%d r.zero=%d mp=%d\n",
+		      is_last_desc, len, req->req.actual, req->req.length,
+		      req->req.zero, ep->ep.maxpacket);
+
+		if (is_last_desc) {
+			/*
+			 * Because we can only have one request at a time
+			 * in our descriptor list in this implementation,
+			 * d_last and ep->d_last should now be equal
+			 */
+			CHECK(ep, d_last == ep->epn.d_last,
+			      "DMA read ptr mismatch %d vs %d\n",
+			      d_last, ep->epn.d_last);
+
+			/* Note: done will drop and re-acquire the lock */
+			ast_vhub_done(ep, req, 0);
+			req = list_first_entry_or_null(&ep->queue,
+						       struct ast_vhub_req,
+						       queue);
+			break;
+		}
+	}
+
+	/* More work ? */
+	if (req)
+		ast_vhub_epn_kick_desc(ep, req);
+}
+
+void ast_vhub_epn_ack_irq(struct ast_vhub_ep *ep)
+{
+	if (ep->epn.desc_mode)
+		ast_vhub_epn_handle_ack_desc(ep);
+	else
+		ast_vhub_epn_handle_ack(ep);
+}
+
+static int ast_vhub_epn_queue(struct usb_ep* u_ep, struct usb_request *u_req,
+			      gfp_t gfp_flags)
+{
+	struct ast_vhub_req *req = to_ast_req(u_req);
+	struct ast_vhub_ep *ep = to_ast_ep(u_ep);
+	struct ast_vhub *vhub = ep->vhub;
+	unsigned long flags;
+	bool empty;
+	int rc;
+
+	/* Paranoid checks */
+	if (!u_req || !u_req->complete || !u_req->buf) {
+		dev_warn(&vhub->pdev->dev, "Bogus EPn request ! u_req=%p\n", u_req);
+		if (u_req) {
+			dev_warn(&vhub->pdev->dev, "complete=%p internal=%d\n",
+				 u_req->complete, req->internal);
+		}
+		return -EINVAL;
+	}
+
+	/* Endpoint enabled ? */
+	if (!ep->epn.enabled || !u_ep->desc || !ep->dev || !ep->d_idx ||
+	    !ep->dev->enabled || ep->dev->suspended) {
+		EPDBG(ep,"Enqueing request on wrong or disabled EP\n");
+		return -ESHUTDOWN;
+	}
+
+	/* Map request for DMA if possible. For now, the rule for DMA is
+	 * that:
+	 *
+	 *  * For single stage mode (no descriptors):
+	 *
+	 *   - The buffer is aligned to a 8 bytes boundary (HW requirement)
+	 *   - For a OUT endpoint, the request size is a multiple of the EP
+	 *     packet size (otherwise the controller will DMA past the end
+	 *     of the buffer if the host is sending a too long packet).
+	 *
+	 *  * For descriptor mode (tx only for now), always.
+	 *
+	 * We could relax the latter by making the decision to use the bounce
+	 * buffer based on the size of a given *segment* of the request rather
+	 * than the whole request.
+	 */
+	if (ep->epn.desc_mode ||
+	    ((((unsigned long)u_req->buf & 7) == 0) &&
+	     (ep->epn.is_in || !(u_req->length & (u_ep->maxpacket - 1))))) {
+		rc = usb_gadget_map_request(&ep->dev->gadget, u_req,
+					    ep->epn.is_in);
+		if (rc) {
+			dev_warn(&vhub->pdev->dev,
+				 "Request mapping failure %d\n", rc);
+			return rc;
+		}
+	} else
+		u_req->dma = 0;
+
+	EPVDBG(ep, "enqueue req @%p\n", req);
+	EPVDBG(ep, " l=%d dma=0x%x zero=%d noshort=%d noirq=%d is_in=%d\n",
+	       u_req->length, (u32)u_req->dma, u_req->zero,
+	       u_req->short_not_ok, u_req->no_interrupt,
+	       ep->epn.is_in);
+
+	/* Initialize request progress fields */
+	u_req->status = -EINPROGRESS;
+	u_req->actual = 0;
+	req->act_count = 0;
+	req->active = false;
+	req->last_desc = -1;
+	spin_lock_irqsave(&vhub->lock, flags);
+	empty = list_empty(&ep->queue);
+
+	/* Add request to list and kick processing if empty */
+	list_add_tail(&req->queue, &ep->queue);
+	if (empty) {
+		if (ep->epn.desc_mode)
+			ast_vhub_epn_kick_desc(ep, req);
+		else
+			ast_vhub_epn_kick(ep, req);
+	}
+	spin_unlock_irqrestore(&vhub->lock, flags);
+
+	return 0;
+}
+
+static void ast_vhub_stop_active_req(struct ast_vhub_ep *ep,
+				     bool restart_ep)
+{
+	u32 state, reg, loops;
+
+	/* Stop DMA activity */
+	writel(0, ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT);
+
+	/* Wait for it to complete */
+	for (loops = 0; loops < 1000; loops++) {
+		state = readl(ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT);
+		state = VHUB_EP_DMA_PROC_STATUS(state);
+		if (state == EP_DMA_PROC_RX_IDLE ||
+		    state == EP_DMA_PROC_TX_IDLE)
+			break;
+		udelay(1);
+	}
+	if (loops >= 1000)
+		dev_warn(&ep->vhub->pdev->dev, "Timeout waiting for DMA\n");
+
+	/* If we don't have to restart the endpoint, that's it */
+	if (!restart_ep)
+		return;
+
+	/* Restart the endpoint */
+	if (ep->epn.desc_mode) {
+		/*
+		 * Take out descriptors by resetting the DMA read
+		 * pointer to be equal to the CPU write pointer.
+		 *
+		 * Note: If we ever support creating descriptors for
+		 * requests that aren't the head of the queue, we
+		 * may have to do something more complex here,
+		 * especially if the request being taken out is
+		 * not the current head descriptors.
+		 */
+		reg = VHUB_EP_DMA_SET_RPTR(ep->epn.d_next) |
+			VHUB_EP_DMA_SET_CPU_WPTR(ep->epn.d_next);
+		writel(reg, ep->epn.regs + AST_VHUB_EP_DESC_STATUS);
+
+		/* Then turn it back on */
+		writel(ep->epn.dma_conf,
+		       ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT);
+	} else {
+		/* Single mode: just turn it back on */
+		writel(ep->epn.dma_conf,
+		       ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT);
+	}
+}
+
+static int ast_vhub_epn_dequeue(struct usb_ep* u_ep, struct usb_request *u_req)
+{
+	struct ast_vhub_ep *ep = to_ast_ep(u_ep);
+	struct ast_vhub *vhub = ep->vhub;
+	struct ast_vhub_req *req;
+	unsigned long flags;
+	int rc = -EINVAL;
+
+	spin_lock_irqsave(&vhub->lock, flags);
+
+	/* Make sure it's actually queued on this endpoint */
+	list_for_each_entry (req, &ep->queue, queue) {
+		if (&req->req == u_req)
+			break;
+	}
+
+	if (&req->req == u_req) {
+		EPVDBG(ep, "dequeue req @%p active=%d\n",
+		       req, req->active);
+		if (req->active)
+			ast_vhub_stop_active_req(ep, true);
+		ast_vhub_done(ep, req, -ECONNRESET);
+		rc = 0;
+	}
+
+	spin_unlock_irqrestore(&vhub->lock, flags);
+	return rc;
+}
+
+void ast_vhub_update_epn_stall(struct ast_vhub_ep *ep)
+{
+	u32 reg;
+
+	if (WARN_ON(ep->d_idx == 0))
+		return;
+	reg = readl(ep->epn.regs + AST_VHUB_EP_CONFIG);
+	if (ep->epn.stalled || ep->epn.wedged)
+		reg |= VHUB_EP_CFG_STALL_CTRL;
+	else
+		reg &= ~VHUB_EP_CFG_STALL_CTRL;
+	writel(reg, ep->epn.regs + AST_VHUB_EP_CONFIG);
+
+	if (!ep->epn.stalled && !ep->epn.wedged)
+		writel(VHUB_EP_TOGGLE_SET_EPNUM(ep->epn.g_idx),
+		       ep->vhub->regs + AST_VHUB_EP_TOGGLE);
+}
+
+static int ast_vhub_set_halt_and_wedge(struct usb_ep* u_ep, bool halt,
+				      bool wedge)
+{
+	struct ast_vhub_ep *ep = to_ast_ep(u_ep);
+	struct ast_vhub *vhub = ep->vhub;
+	unsigned long flags;
+
+	EPDBG(ep, "Set halt (%d) & wedge (%d)\n", halt, wedge);
+
+	if (!u_ep || !u_ep->desc)
+		return -EINVAL;
+	if (ep->d_idx == 0)
+		return 0;
+	if (ep->epn.is_iso)
+		return -EOPNOTSUPP;
+
+	spin_lock_irqsave(&vhub->lock, flags);
+
+	/* Fail with still-busy IN endpoints */
+	if (halt && ep->epn.is_in && !list_empty(&ep->queue)) {
+		spin_unlock_irqrestore(&vhub->lock, flags);
+		return -EAGAIN;
+	}
+	ep->epn.stalled = halt;
+	ep->epn.wedged = wedge;
+	ast_vhub_update_epn_stall(ep);
+
+	spin_unlock_irqrestore(&vhub->lock, flags);
+
+	return 0;
+}
+
+static int ast_vhub_epn_set_halt(struct usb_ep *u_ep, int value)
+{
+	return ast_vhub_set_halt_and_wedge(u_ep, value != 0, false);
+}
+
+static int ast_vhub_epn_set_wedge(struct usb_ep *u_ep)
+{
+	return ast_vhub_set_halt_and_wedge(u_ep, true, true);
+}
+
+static int ast_vhub_epn_disable(struct usb_ep* u_ep)
+{
+	struct ast_vhub_ep *ep = to_ast_ep(u_ep);
+	struct ast_vhub *vhub = ep->vhub;
+	unsigned long flags;
+	u32 imask, ep_ier;
+
+	EPDBG(ep, "Disabling !\n");
+
+	spin_lock_irqsave(&vhub->lock, flags);
+
+	ep->epn.enabled = false;
+
+	/* Stop active DMA if any */
+	ast_vhub_stop_active_req(ep, false);
+
+	/* Disable endpoint */
+	writel(0, ep->epn.regs + AST_VHUB_EP_CONFIG);
+
+	/* Disable ACK interrupt */
+	imask = VHUB_EP_IRQ(ep->epn.g_idx);
+	ep_ier = readl(vhub->regs + AST_VHUB_EP_ACK_IER);
+	ep_ier &= ~imask;
+	writel(ep_ier, vhub->regs + AST_VHUB_EP_ACK_IER);
+	writel(imask, vhub->regs + AST_VHUB_EP_ACK_ISR);
+
+	/* Nuke all pending requests */
+	ast_vhub_nuke(ep, -ESHUTDOWN);
+
+	/* No more descriptor associated with request */
+	ep->ep.desc = NULL;
+
+	spin_unlock_irqrestore(&vhub->lock, flags);
+
+	return 0;
+}
+
+static int ast_vhub_epn_enable(struct usb_ep* u_ep,
+			       const struct usb_endpoint_descriptor *desc)
+{
+	static const char *ep_type_string[] __maybe_unused = { "ctrl",
+							       "isoc",
+							       "bulk",
+							       "intr" };
+	struct ast_vhub_ep *ep = to_ast_ep(u_ep);
+	struct ast_vhub_dev *dev;
+	struct ast_vhub *vhub;
+	u16 maxpacket, type;
+	unsigned long flags;
+	u32 ep_conf, ep_ier, imask;
+
+	/* Check arguments */
+	if (!u_ep || !desc)
+		return -EINVAL;
+
+	maxpacket = usb_endpoint_maxp(desc);
+	if (!ep->d_idx || !ep->dev ||
+	    desc->bDescriptorType != USB_DT_ENDPOINT ||
+	    maxpacket == 0 || maxpacket > ep->ep.maxpacket) {
+		EPDBG(ep, "Invalid EP enable,d_idx=%d,dev=%p,type=%d,mp=%d/%d\n",
+		      ep->d_idx, ep->dev, desc->bDescriptorType,
+		      maxpacket, ep->ep.maxpacket);
+		return -EINVAL;
+	}
+	if (ep->d_idx != usb_endpoint_num(desc)) {
+		EPDBG(ep, "EP number mismatch !\n");
+		return -EINVAL;
+	}
+
+	if (ep->epn.enabled) {
+		EPDBG(ep, "Already enabled\n");
+		return -EBUSY;
+	}
+	dev = ep->dev;
+	vhub = ep->vhub;
+
+	/* Check device state */
+	if (!dev->driver) {
+		EPDBG(ep, "Bogus device state: driver=%p speed=%d\n",
+		       dev->driver, dev->gadget.speed);
+		return -ESHUTDOWN;
+	}
+
+	/* Grab some info from the descriptor */
+	ep->epn.is_in = usb_endpoint_dir_in(desc);
+	ep->ep.maxpacket = maxpacket;
+	type = usb_endpoint_type(desc);
+	ep->epn.d_next = ep->epn.d_last = 0;
+	ep->epn.is_iso = false;
+	ep->epn.stalled = false;
+	ep->epn.wedged = false;
+
+	EPDBG(ep, "Enabling [%s] %s num %d maxpacket=%d\n",
+	      ep->epn.is_in ? "in" : "out", ep_type_string[type],
+	      usb_endpoint_num(desc), maxpacket);
+
+	/* Can we use DMA descriptor mode ? */
+	ep->epn.desc_mode = ep->epn.descs && ep->epn.is_in;
+	if (ep->epn.desc_mode)
+		memset(ep->epn.descs, 0, 8 * AST_VHUB_DESCS_COUNT);
+
+	/*
+	 * Large send function can send up to 8 packets from
+	 * one descriptor with a limit of 4095 bytes.
+	 */
+	ep->epn.chunk_max = ep->ep.maxpacket;
+	if (ep->epn.is_in) {
+		ep->epn.chunk_max <<= 3;
+		while (ep->epn.chunk_max > 4095)
+			ep->epn.chunk_max -= ep->ep.maxpacket;
+	}
+
+	switch(type) {
+	case USB_ENDPOINT_XFER_CONTROL:
+		EPDBG(ep, "Only one control endpoint\n");
+		return -EINVAL;
+	case USB_ENDPOINT_XFER_INT:
+		ep_conf = VHUB_EP_CFG_SET_TYPE(EP_TYPE_INT);
+		break;
+	case USB_ENDPOINT_XFER_BULK:
+		ep_conf = VHUB_EP_CFG_SET_TYPE(EP_TYPE_BULK);
+		break;
+	case USB_ENDPOINT_XFER_ISOC:
+		ep_conf = VHUB_EP_CFG_SET_TYPE(EP_TYPE_ISO);
+		ep->epn.is_iso = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Encode the rest of the EP config register */
+	if (maxpacket < 1024)
+		ep_conf |= VHUB_EP_CFG_SET_MAX_PKT(maxpacket);
+	if (!ep->epn.is_in)
+		ep_conf |= VHUB_EP_CFG_DIR_OUT;
+	ep_conf |= VHUB_EP_CFG_SET_EP_NUM(usb_endpoint_num(desc));
+	ep_conf |= VHUB_EP_CFG_ENABLE;
+	ep_conf |= VHUB_EP_CFG_SET_DEV(dev->index + 1);
+	EPVDBG(ep, "config=%08x\n", ep_conf);
+
+	spin_lock_irqsave(&vhub->lock, flags);
+
+	/* Disable HW and reset DMA */
+	writel(0, ep->epn.regs + AST_VHUB_EP_CONFIG);
+	writel(VHUB_EP_DMA_CTRL_RESET,
+	       ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT);
+
+	/* Configure and enable */
+	writel(ep_conf, ep->epn.regs + AST_VHUB_EP_CONFIG);
+
+	if (ep->epn.desc_mode) {
+		/* Clear DMA status, including the DMA read ptr */
+		writel(0, ep->epn.regs + AST_VHUB_EP_DESC_STATUS);
+
+		/* Set descriptor base */
+		writel(ep->epn.descs_dma,
+		       ep->epn.regs + AST_VHUB_EP_DESC_BASE);
+
+		/* Set base DMA config value */
+		ep->epn.dma_conf = VHUB_EP_DMA_DESC_MODE;
+		if (ep->epn.is_in)
+			ep->epn.dma_conf |= VHUB_EP_DMA_IN_LONG_MODE;
+
+		/* First reset and disable all operations */
+		writel(ep->epn.dma_conf | VHUB_EP_DMA_CTRL_RESET,
+		       ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT);
+
+		/* Enable descriptor mode */
+		writel(ep->epn.dma_conf,
+		       ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT);
+	} else {
+		/* Set base DMA config value */
+		ep->epn.dma_conf = VHUB_EP_DMA_SINGLE_STAGE;
+
+		/* Reset and switch to single stage mode */
+		writel(ep->epn.dma_conf | VHUB_EP_DMA_CTRL_RESET,
+		       ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT);
+		writel(ep->epn.dma_conf,
+		       ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT);
+		writel(0, ep->epn.regs + AST_VHUB_EP_DESC_STATUS);
+	}
+
+	/* Cleanup data toggle just in case */
+	writel(VHUB_EP_TOGGLE_SET_EPNUM(ep->epn.g_idx),
+	       vhub->regs + AST_VHUB_EP_TOGGLE);
+
+	/* Cleanup and enable ACK interrupt */
+	imask = VHUB_EP_IRQ(ep->epn.g_idx);
+	writel(imask, vhub->regs + AST_VHUB_EP_ACK_ISR);
+	ep_ier = readl(vhub->regs + AST_VHUB_EP_ACK_IER);
+	ep_ier |= imask;
+	writel(ep_ier, vhub->regs + AST_VHUB_EP_ACK_IER);
+
+	/* Woot, we are online ! */
+	ep->epn.enabled = true;
+
+	spin_unlock_irqrestore(&vhub->lock, flags);
+
+	return 0;
+}
+
+static void ast_vhub_epn_dispose(struct usb_ep *u_ep)
+{
+	struct ast_vhub_ep *ep = to_ast_ep(u_ep);
+
+	if (WARN_ON(!ep->dev || !ep->d_idx))
+		return;
+
+	EPDBG(ep, "Releasing endpoint\n");
+
+	/* Take it out of the EP list */
+	list_del_init(&ep->ep.ep_list);
+
+	/* Mark the address free in the device */
+	ep->dev->epns[ep->d_idx - 1] = NULL;
+
+	/* Free name & DMA buffers */
+	kfree(ep->ep.name);
+	ep->ep.name = NULL;
+	dma_free_coherent(&ep->vhub->pdev->dev,
+			  AST_VHUB_EPn_MAX_PACKET +
+			  8 * AST_VHUB_DESCS_COUNT,
+			  ep->buf, ep->buf_dma);
+	ep->buf = NULL;
+	ep->epn.descs = NULL;
+
+	/* Mark free */
+	ep->dev = NULL;
+}
+
+static const struct usb_ep_ops ast_vhub_epn_ops = {
+	.enable		= ast_vhub_epn_enable,
+	.disable	= ast_vhub_epn_disable,
+	.dispose	= ast_vhub_epn_dispose,
+	.queue		= ast_vhub_epn_queue,
+	.dequeue	= ast_vhub_epn_dequeue,
+	.set_halt	= ast_vhub_epn_set_halt,
+	.set_wedge	= ast_vhub_epn_set_wedge,
+	.alloc_request	= ast_vhub_alloc_request,
+	.free_request	= ast_vhub_free_request,
+};
+
+struct ast_vhub_ep *ast_vhub_alloc_epn(struct ast_vhub_dev *d, u8 addr)
+{
+	struct ast_vhub *vhub = d->vhub;
+	struct ast_vhub_ep *ep;
+	unsigned long flags;
+	int i;
+
+	/* Find a free one (no device) */
+	spin_lock_irqsave(&vhub->lock, flags);
+	for (i = 0; i < AST_VHUB_NUM_GEN_EPs; i++)
+		if (vhub->epns[i].dev == NULL)
+			break;
+	if (i >= AST_VHUB_NUM_GEN_EPs) {
+		spin_unlock_irqrestore(&vhub->lock, flags);
+		return NULL;
+	}
+
+	/* Set it up */
+	ep = &vhub->epns[i];
+	ep->dev = d;
+	spin_unlock_irqrestore(&vhub->lock, flags);
+
+	DDBG(d, "Allocating gen EP %d for addr %d\n", i, addr);
+	INIT_LIST_HEAD(&ep->queue);
+	ep->d_idx = addr;
+	ep->vhub = vhub;
+	ep->ep.ops = &ast_vhub_epn_ops;
+	ep->ep.name = kasprintf(GFP_KERNEL, "ep%d", addr);
+	d->epns[addr-1] = ep;
+	ep->epn.g_idx = i;
+	ep->epn.regs = vhub->regs + 0x200 + (i * 0x10);
+
+	ep->buf = dma_alloc_coherent(&vhub->pdev->dev,
+				     AST_VHUB_EPn_MAX_PACKET +
+				     8 * AST_VHUB_DESCS_COUNT,
+				     &ep->buf_dma, GFP_KERNEL);
+	if (!ep->buf) {
+		kfree(ep->ep.name);
+		ep->ep.name = NULL;
+		return NULL;
+	}
+	ep->epn.descs = ep->buf + AST_VHUB_EPn_MAX_PACKET;
+	ep->epn.descs_dma = ep->buf_dma + AST_VHUB_EPn_MAX_PACKET;
+
+	usb_ep_set_maxpacket_limit(&ep->ep, AST_VHUB_EPn_MAX_PACKET);
+	list_add_tail(&ep->ep.ep_list, &d->gadget.ep_list);
+	ep->ep.caps.type_iso = true;
+	ep->ep.caps.type_bulk = true;
+	ep->ep.caps.type_int = true;
+	ep->ep.caps.dir_in = true;
+	ep->ep.caps.dir_out = true;
+
+	return ep;
+}
diff --git a/drivers/usb/gadget/udc/aspeed-vhub/hub.c b/drivers/usb/gadget/udc/aspeed-vhub/hub.c
new file mode 100644
index 0000000..35ba0e5
--- /dev/null
+++ b/drivers/usb/gadget/udc/aspeed-vhub/hub.c
@@ -0,0 +1,829 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * aspeed-vhub -- Driver for Aspeed SoC "vHub" USB gadget
+ *
+ * hub.c - virtual hub handling
+ *
+ * Copyright 2017 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/prefetch.h>
+#include <linux/clk.h>
+#include <linux/usb/gadget.h>
+#include <linux/of.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <linux/dma-mapping.h>
+#include <linux/bcd.h>
+#include <linux/version.h>
+#include <linux/usb.h>
+#include <linux/usb/hcd.h>
+
+#include "vhub.h"
+
+/* usb 2.0 hub device descriptor
+ *
+ * A few things we may want to improve here:
+ *
+ *    - We may need to indicate TT support
+ *    - We may need a device qualifier descriptor
+ *	as devices can pretend to be usb1 or 2
+ *    - Make vid/did overridable
+ *    - make it look like usb1 if usb1 mode forced
+ */
+#define KERNEL_REL	bin2bcd(((LINUX_VERSION_CODE >> 16) & 0x0ff))
+#define KERNEL_VER	bin2bcd(((LINUX_VERSION_CODE >> 8) & 0x0ff))
+
+enum {
+	AST_VHUB_STR_MANUF = 3,
+	AST_VHUB_STR_PRODUCT = 2,
+	AST_VHUB_STR_SERIAL = 1,
+};
+
+static const struct usb_device_descriptor ast_vhub_dev_desc = {
+	.bLength		= USB_DT_DEVICE_SIZE,
+	.bDescriptorType	= USB_DT_DEVICE,
+	.bcdUSB			= cpu_to_le16(0x0200),
+	.bDeviceClass		= USB_CLASS_HUB,
+	.bDeviceSubClass	= 0,
+	.bDeviceProtocol	= 1,
+	.bMaxPacketSize0	= 64,
+	.idVendor		= cpu_to_le16(0x1d6b),
+	.idProduct		= cpu_to_le16(0x0107),
+	.bcdDevice		= cpu_to_le16(0x0100),
+	.iManufacturer		= AST_VHUB_STR_MANUF,
+	.iProduct		= AST_VHUB_STR_PRODUCT,
+	.iSerialNumber		= AST_VHUB_STR_SERIAL,
+	.bNumConfigurations	= 1,
+};
+
+/* Patches to the above when forcing USB1 mode */
+static void ast_vhub_patch_dev_desc_usb1(struct usb_device_descriptor *desc)
+{
+	desc->bcdUSB = cpu_to_le16(0x0100);
+	desc->bDeviceProtocol = 0;
+}
+
+/*
+ * Configuration descriptor: same comments as above
+ * regarding handling USB1 mode.
+ */
+
+/*
+ * We don't use sizeof() as Linux definition of
+ * struct usb_endpoint_descriptor contains 2
+ * extra bytes
+ */
+#define AST_VHUB_CONF_DESC_SIZE	(USB_DT_CONFIG_SIZE + \
+				 USB_DT_INTERFACE_SIZE + \
+				 USB_DT_ENDPOINT_SIZE)
+
+static const struct ast_vhub_full_cdesc {
+	struct usb_config_descriptor	cfg;
+	struct usb_interface_descriptor intf;
+	struct usb_endpoint_descriptor	ep;
+} __attribute__ ((packed)) ast_vhub_conf_desc = {
+	.cfg = {
+		.bLength		= USB_DT_CONFIG_SIZE,
+		.bDescriptorType	= USB_DT_CONFIG,
+		.wTotalLength		= cpu_to_le16(AST_VHUB_CONF_DESC_SIZE),
+		.bNumInterfaces		= 1,
+		.bConfigurationValue	= 1,
+		.iConfiguration		= 0,
+		.bmAttributes		= USB_CONFIG_ATT_ONE |
+					  USB_CONFIG_ATT_SELFPOWER |
+					  USB_CONFIG_ATT_WAKEUP,
+		.bMaxPower		= 0,
+	},
+	.intf = {
+		.bLength		= USB_DT_INTERFACE_SIZE,
+		.bDescriptorType	= USB_DT_INTERFACE,
+		.bInterfaceNumber	= 0,
+		.bAlternateSetting	= 0,
+		.bNumEndpoints		= 1,
+		.bInterfaceClass	= USB_CLASS_HUB,
+		.bInterfaceSubClass	= 0,
+		.bInterfaceProtocol	= 0,
+		.iInterface		= 0,
+	},
+	.ep = {
+		.bLength		= USB_DT_ENDPOINT_SIZE,
+		.bDescriptorType	= USB_DT_ENDPOINT,
+		.bEndpointAddress	= 0x81,
+		.bmAttributes		= USB_ENDPOINT_XFER_INT,
+		.wMaxPacketSize		= cpu_to_le16(1),
+		.bInterval		= 0x0c,
+	},
+};
+
+#define AST_VHUB_HUB_DESC_SIZE	(USB_DT_HUB_NONVAR_SIZE + 2)
+
+static const struct usb_hub_descriptor ast_vhub_hub_desc = {
+	.bDescLength			= AST_VHUB_HUB_DESC_SIZE,
+	.bDescriptorType		= USB_DT_HUB,
+	.bNbrPorts			= AST_VHUB_NUM_PORTS,
+	.wHubCharacteristics		= cpu_to_le16(HUB_CHAR_NO_LPSM),
+	.bPwrOn2PwrGood			= 10,
+	.bHubContrCurrent		= 0,
+	.u.hs.DeviceRemovable[0]	= 0,
+	.u.hs.DeviceRemovable[1]	= 0xff,
+};
+
+/*
+ * These strings converted to UTF-16 must be smaller than
+ * our EP0 buffer.
+ */
+static const struct usb_string ast_vhub_str_array[] = {
+	{
+		.id = AST_VHUB_STR_SERIAL,
+		.s = "00000000"
+	},
+	{
+		.id = AST_VHUB_STR_PRODUCT,
+		.s = "USB Virtual Hub"
+	},
+	{
+		.id = AST_VHUB_STR_MANUF,
+		.s = "Aspeed"
+	},
+	{ }
+};
+
+static const struct usb_gadget_strings ast_vhub_strings = {
+	.language = 0x0409,
+	.strings = (struct usb_string *)ast_vhub_str_array
+};
+
+static int ast_vhub_hub_dev_status(struct ast_vhub_ep *ep,
+				   u16 wIndex, u16 wValue)
+{
+	u8 st0;
+
+	EPDBG(ep, "GET_STATUS(dev)\n");
+
+	/*
+	 * Mark it as self-powered, I doubt the BMC is powered off
+	 * the USB bus ...
+	 */
+	st0 = 1 << USB_DEVICE_SELF_POWERED;
+
+	/*
+	 * Need to double check how remote wakeup actually works
+	 * on that chip and what triggers it.
+	 */
+	if (ep->vhub->wakeup_en)
+		st0 |= 1 << USB_DEVICE_REMOTE_WAKEUP;
+
+	return ast_vhub_simple_reply(ep, st0, 0);
+}
+
+static int ast_vhub_hub_ep_status(struct ast_vhub_ep *ep,
+				  u16 wIndex, u16 wValue)
+{
+	int ep_num;
+	u8 st0 = 0;
+
+	ep_num = wIndex & USB_ENDPOINT_NUMBER_MASK;
+	EPDBG(ep, "GET_STATUS(ep%d)\n", ep_num);
+
+	/* On the hub we have only EP 0 and 1 */
+	if (ep_num == 1) {
+		if (ep->vhub->ep1_stalled)
+			st0 |= 1 << USB_ENDPOINT_HALT;
+	} else if (ep_num != 0)
+		return std_req_stall;
+
+	return ast_vhub_simple_reply(ep, st0, 0);
+}
+
+static int ast_vhub_hub_dev_feature(struct ast_vhub_ep *ep,
+				    u16 wIndex, u16 wValue,
+				    bool is_set)
+{
+	EPDBG(ep, "%s_FEATURE(dev val=%02x)\n",
+	      is_set ? "SET" : "CLEAR", wValue);
+
+	if (wValue != USB_DEVICE_REMOTE_WAKEUP)
+		return std_req_stall;
+
+	ep->vhub->wakeup_en = is_set;
+	EPDBG(ep, "Hub remote wakeup %s\n",
+	      is_set ? "enabled" : "disabled");
+
+	return std_req_complete;
+}
+
+static int ast_vhub_hub_ep_feature(struct ast_vhub_ep *ep,
+				   u16 wIndex, u16 wValue,
+				   bool is_set)
+{
+	int ep_num;
+	u32 reg;
+
+	ep_num = wIndex & USB_ENDPOINT_NUMBER_MASK;
+	EPDBG(ep, "%s_FEATURE(ep%d val=%02x)\n",
+	      is_set ? "SET" : "CLEAR", ep_num, wValue);
+
+	if (ep_num > 1)
+		return std_req_stall;
+	if (wValue != USB_ENDPOINT_HALT)
+		return std_req_stall;
+	if (ep_num == 0)
+		return std_req_complete;
+
+	EPDBG(ep, "%s stall on EP 1\n",
+	      is_set ? "setting" : "clearing");
+
+	ep->vhub->ep1_stalled = is_set;
+	reg = readl(ep->vhub->regs + AST_VHUB_EP1_CTRL);
+	if (is_set) {
+		reg |= VHUB_EP1_CTRL_STALL;
+	} else {
+		reg &= ~VHUB_EP1_CTRL_STALL;
+		reg |= VHUB_EP1_CTRL_RESET_TOGGLE;
+	}
+	writel(reg, ep->vhub->regs + AST_VHUB_EP1_CTRL);
+
+	return std_req_complete;
+}
+
+static int ast_vhub_rep_desc(struct ast_vhub_ep *ep,
+			     u8 desc_type, u16 len)
+{
+	size_t dsize;
+
+	EPDBG(ep, "GET_DESCRIPTOR(type:%d)\n", desc_type);
+
+	/*
+	 * Copy first to EP buffer and send from there, so
+	 * we can do some in-place patching if needed. We know
+	 * the EP buffer is big enough but ensure that doesn't
+	 * change. We do that now rather than later after we
+	 * have checked sizes etc... to avoid a gcc bug where
+	 * it thinks len is constant and barfs about read
+	 * overflows in memcpy.
+	 */
+	switch(desc_type) {
+	case USB_DT_DEVICE:
+		dsize = USB_DT_DEVICE_SIZE;
+		memcpy(ep->buf, &ast_vhub_dev_desc, dsize);
+		BUILD_BUG_ON(dsize > sizeof(ast_vhub_dev_desc));
+		BUILD_BUG_ON(USB_DT_DEVICE_SIZE >= AST_VHUB_EP0_MAX_PACKET);
+		break;
+	case USB_DT_CONFIG:
+		dsize = AST_VHUB_CONF_DESC_SIZE;
+		memcpy(ep->buf, &ast_vhub_conf_desc, dsize);
+		BUILD_BUG_ON(dsize > sizeof(ast_vhub_conf_desc));
+		BUILD_BUG_ON(AST_VHUB_CONF_DESC_SIZE >= AST_VHUB_EP0_MAX_PACKET);
+		break;
+	case USB_DT_HUB:
+		dsize = AST_VHUB_HUB_DESC_SIZE;
+		memcpy(ep->buf, &ast_vhub_hub_desc, dsize);
+		BUILD_BUG_ON(dsize > sizeof(ast_vhub_hub_desc));
+	BUILD_BUG_ON(AST_VHUB_HUB_DESC_SIZE >= AST_VHUB_EP0_MAX_PACKET);
+		break;
+	default:
+		return std_req_stall;
+	}
+
+	/* Crop requested length */
+	if (len > dsize)
+		len = dsize;
+
+	/* Patch it if forcing USB1 */
+	if (desc_type == USB_DT_DEVICE && ep->vhub->force_usb1)
+		ast_vhub_patch_dev_desc_usb1(ep->buf);
+
+	/* Shoot it from the EP buffer */
+	return ast_vhub_reply(ep, NULL, len);
+}
+
+static int ast_vhub_rep_string(struct ast_vhub_ep *ep,
+			       u8 string_id, u16 lang_id,
+			       u16 len)
+{
+	int rc = usb_gadget_get_string (&ast_vhub_strings, string_id, ep->buf);
+
+	/*
+	 * This should never happen unless we put too big strings in
+	 * the array above
+	 */
+	BUG_ON(rc >= AST_VHUB_EP0_MAX_PACKET);
+
+	if (rc < 0)
+		return std_req_stall;
+
+	/* Shoot it from the EP buffer */
+	return ast_vhub_reply(ep, NULL, min_t(u16, rc, len));
+}
+
+enum std_req_rc ast_vhub_std_hub_request(struct ast_vhub_ep *ep,
+					 struct usb_ctrlrequest *crq)
+{
+	struct ast_vhub *vhub = ep->vhub;
+	u16 wValue, wIndex, wLength;
+
+	wValue = le16_to_cpu(crq->wValue);
+	wIndex = le16_to_cpu(crq->wIndex);
+	wLength = le16_to_cpu(crq->wLength);
+
+	/* First packet, grab speed */
+	if (vhub->speed == USB_SPEED_UNKNOWN) {
+		u32 ustat = readl(vhub->regs + AST_VHUB_USBSTS);
+		if (ustat & VHUB_USBSTS_HISPEED)
+			vhub->speed = USB_SPEED_HIGH;
+		else
+			vhub->speed = USB_SPEED_FULL;
+		UDCDBG(vhub, "USB status=%08x speed=%s\n", ustat,
+		       vhub->speed == USB_SPEED_HIGH ? "high" : "full");
+	}
+
+	switch ((crq->bRequestType << 8) | crq->bRequest) {
+		/* SET_ADDRESS */
+	case DeviceOutRequest | USB_REQ_SET_ADDRESS:
+		EPDBG(ep, "SET_ADDRESS: Got address %x\n", wValue);
+		writel(wValue, vhub->regs + AST_VHUB_CONF);
+		return std_req_complete;
+
+		/* GET_STATUS */
+	case DeviceRequest | USB_REQ_GET_STATUS:
+		return ast_vhub_hub_dev_status(ep, wIndex, wValue);
+	case InterfaceRequest | USB_REQ_GET_STATUS:
+		return ast_vhub_simple_reply(ep, 0, 0);
+	case EndpointRequest | USB_REQ_GET_STATUS:
+		return ast_vhub_hub_ep_status(ep, wIndex, wValue);
+
+		/* SET/CLEAR_FEATURE */
+	case DeviceOutRequest | USB_REQ_SET_FEATURE:
+		return ast_vhub_hub_dev_feature(ep, wIndex, wValue, true);
+	case DeviceOutRequest | USB_REQ_CLEAR_FEATURE:
+		return ast_vhub_hub_dev_feature(ep, wIndex, wValue, false);
+	case EndpointOutRequest | USB_REQ_SET_FEATURE:
+		return ast_vhub_hub_ep_feature(ep, wIndex, wValue, true);
+	case EndpointOutRequest | USB_REQ_CLEAR_FEATURE:
+		return ast_vhub_hub_ep_feature(ep, wIndex, wValue, false);
+
+		/* GET/SET_CONFIGURATION */
+	case DeviceRequest | USB_REQ_GET_CONFIGURATION:
+		return ast_vhub_simple_reply(ep, 1);
+	case DeviceOutRequest | USB_REQ_SET_CONFIGURATION:
+		if (wValue != 1)
+			return std_req_stall;
+		return std_req_complete;
+
+		/* GET_DESCRIPTOR */
+	case DeviceRequest | USB_REQ_GET_DESCRIPTOR:
+		switch (wValue >> 8) {
+		case USB_DT_DEVICE:
+		case USB_DT_CONFIG:
+			return ast_vhub_rep_desc(ep, wValue >> 8,
+						 wLength);
+		case USB_DT_STRING:
+			return ast_vhub_rep_string(ep, wValue & 0xff,
+						   wIndex, wLength);
+		}
+		return std_req_stall;
+
+		/* GET/SET_INTERFACE */
+	case DeviceRequest | USB_REQ_GET_INTERFACE:
+		return ast_vhub_simple_reply(ep, 0);
+	case DeviceOutRequest | USB_REQ_SET_INTERFACE:
+		if (wValue != 0 || wIndex != 0)
+			return std_req_stall;
+		return std_req_complete;
+	}
+	return std_req_stall;
+}
+
+static void ast_vhub_update_hub_ep1(struct ast_vhub *vhub,
+				    unsigned int port)
+{
+	/* Update HW EP1 response */
+	u32 reg = readl(vhub->regs + AST_VHUB_EP1_STS_CHG);
+	u32 pmask = (1 << (port + 1));
+	if (vhub->ports[port].change)
+		reg |= pmask;
+	else
+		reg &= ~pmask;
+	writel(reg, vhub->regs + AST_VHUB_EP1_STS_CHG);
+}
+
+static void ast_vhub_change_port_stat(struct ast_vhub *vhub,
+				      unsigned int port,
+				      u16 clr_flags,
+				      u16 set_flags,
+				      bool set_c)
+{
+	struct ast_vhub_port *p = &vhub->ports[port];
+	u16 prev;
+
+	/* Update port status */
+	prev = p->status;
+	p->status = (prev & ~clr_flags) | set_flags;
+	DDBG(&p->dev, "port %d status %04x -> %04x (C=%d)\n",
+	     port + 1, prev, p->status, set_c);
+
+	/* Update change bits if needed */
+	if (set_c) {
+		u16 chg = p->status ^ prev;
+
+		/* Only these are relevant for change */
+		chg &= USB_PORT_STAT_C_CONNECTION |
+		       USB_PORT_STAT_C_ENABLE |
+		       USB_PORT_STAT_C_SUSPEND |
+		       USB_PORT_STAT_C_OVERCURRENT |
+		       USB_PORT_STAT_C_RESET |
+		       USB_PORT_STAT_C_L1;
+		p->change |= chg;
+
+		ast_vhub_update_hub_ep1(vhub, port);
+	}
+}
+
+static void ast_vhub_send_host_wakeup(struct ast_vhub *vhub)
+{
+	u32 reg = readl(vhub->regs + AST_VHUB_CTRL);
+	UDCDBG(vhub, "Waking up host !\n");
+	reg |= VHUB_CTRL_MANUAL_REMOTE_WAKEUP;
+	writel(reg, vhub->regs + AST_VHUB_CTRL);
+}
+
+void ast_vhub_device_connect(struct ast_vhub *vhub,
+			     unsigned int port, bool on)
+{
+	if (on)
+		ast_vhub_change_port_stat(vhub, port, 0,
+					  USB_PORT_STAT_CONNECTION, true);
+	else
+		ast_vhub_change_port_stat(vhub, port,
+					  USB_PORT_STAT_CONNECTION |
+					  USB_PORT_STAT_ENABLE,
+					  0, true);
+
+	/*
+	 * If the hub is set to wakup the host on connection events
+	 * then send a wakeup.
+	 */
+	if (vhub->wakeup_en)
+		ast_vhub_send_host_wakeup(vhub);
+}
+
+static void ast_vhub_wake_work(struct work_struct *work)
+{
+	struct ast_vhub *vhub = container_of(work,
+					     struct ast_vhub,
+					     wake_work);
+	unsigned long flags;
+	unsigned int i;
+
+	/*
+	 * Wake all sleeping ports. If a port is suspended by
+	 * the host suspend (without explicit state suspend),
+	 * we let the normal host wake path deal with it later.
+	 */
+	spin_lock_irqsave(&vhub->lock, flags);
+	for (i = 0; i < AST_VHUB_NUM_PORTS; i++) {
+		struct ast_vhub_port *p = &vhub->ports[i];
+
+		if (!(p->status & USB_PORT_STAT_SUSPEND))
+			continue;
+		ast_vhub_change_port_stat(vhub, i,
+					  USB_PORT_STAT_SUSPEND,
+					  0, true);
+		ast_vhub_dev_resume(&p->dev);
+	}
+	ast_vhub_send_host_wakeup(vhub);
+	spin_unlock_irqrestore(&vhub->lock, flags);
+}
+
+void ast_vhub_hub_wake_all(struct ast_vhub *vhub)
+{
+	/*
+	 * A device is trying to wake the world, because this
+	 * can recurse into the device, we break the call chain
+	 * using a work queue
+	 */
+	schedule_work(&vhub->wake_work);
+}
+
+static void ast_vhub_port_reset(struct ast_vhub *vhub, u8 port)
+{
+	struct ast_vhub_port *p = &vhub->ports[port];
+	u16 set, clr, speed;
+
+	/* First mark disabled */
+	ast_vhub_change_port_stat(vhub, port,
+				  USB_PORT_STAT_ENABLE |
+				  USB_PORT_STAT_SUSPEND,
+				  USB_PORT_STAT_RESET,
+				  false);
+
+	if (!p->dev.driver)
+		return;
+
+	/*
+	 * This will either "start" the port or reset the
+	 * device if already started...
+	 */
+	ast_vhub_dev_reset(&p->dev);
+
+	/* Grab the right speed */
+	speed = p->dev.driver->max_speed;
+	if (speed == USB_SPEED_UNKNOWN || speed > vhub->speed)
+		speed = vhub->speed;
+
+	switch (speed) {
+	case USB_SPEED_LOW:
+		set = USB_PORT_STAT_LOW_SPEED;
+		clr = USB_PORT_STAT_HIGH_SPEED;
+		break;
+	case USB_SPEED_FULL:
+		set = 0;
+		clr = USB_PORT_STAT_LOW_SPEED |
+			USB_PORT_STAT_HIGH_SPEED;
+		break;
+	case USB_SPEED_HIGH:
+		set = USB_PORT_STAT_HIGH_SPEED;
+		clr = USB_PORT_STAT_LOW_SPEED;
+		break;
+	default:
+		UDCDBG(vhub, "Unsupported speed %d when"
+		       " connecting device\n",
+		       speed);
+		return;
+	}
+	clr |= USB_PORT_STAT_RESET;
+	set |= USB_PORT_STAT_ENABLE;
+
+	/* This should ideally be delayed ... */
+	ast_vhub_change_port_stat(vhub, port, clr, set, true);
+}
+
+static enum std_req_rc ast_vhub_set_port_feature(struct ast_vhub_ep *ep,
+						 u8 port, u16 feat)
+{
+	struct ast_vhub *vhub = ep->vhub;
+	struct ast_vhub_port *p;
+
+	if (port == 0 || port > AST_VHUB_NUM_PORTS)
+		return std_req_stall;
+	port--;
+	p = &vhub->ports[port];
+
+	switch(feat) {
+	case USB_PORT_FEAT_SUSPEND:
+		if (!(p->status & USB_PORT_STAT_ENABLE))
+			return std_req_complete;
+		ast_vhub_change_port_stat(vhub, port,
+					  0, USB_PORT_STAT_SUSPEND,
+					  false);
+		ast_vhub_dev_suspend(&p->dev);
+		return std_req_complete;
+	case USB_PORT_FEAT_RESET:
+		EPDBG(ep, "Port reset !\n");
+		ast_vhub_port_reset(vhub, port);
+		return std_req_complete;
+	case USB_PORT_FEAT_POWER:
+		/*
+		 * On Power-on, we mark the connected flag changed,
+		 * if there's a connected device, some hosts will
+		 * otherwise fail to detect it.
+		 */
+		if (p->status & USB_PORT_STAT_CONNECTION) {
+			p->change |= USB_PORT_STAT_C_CONNECTION;
+			ast_vhub_update_hub_ep1(vhub, port);
+		}
+		return std_req_complete;
+	case USB_PORT_FEAT_TEST:
+	case USB_PORT_FEAT_INDICATOR:
+		/* We don't do anything with these */
+		return std_req_complete;
+	}
+	return std_req_stall;
+}
+
+static enum std_req_rc ast_vhub_clr_port_feature(struct ast_vhub_ep *ep,
+						 u8 port, u16 feat)
+{
+	struct ast_vhub *vhub = ep->vhub;
+	struct ast_vhub_port *p;
+
+	if (port == 0 || port > AST_VHUB_NUM_PORTS)
+		return std_req_stall;
+	port--;
+	p = &vhub->ports[port];
+
+	switch(feat) {
+	case USB_PORT_FEAT_ENABLE:
+		ast_vhub_change_port_stat(vhub, port,
+					  USB_PORT_STAT_ENABLE |
+					  USB_PORT_STAT_SUSPEND, 0,
+					  false);
+		ast_vhub_dev_suspend(&p->dev);
+		return std_req_complete;
+	case USB_PORT_FEAT_SUSPEND:
+		if (!(p->status & USB_PORT_STAT_SUSPEND))
+			return std_req_complete;
+		ast_vhub_change_port_stat(vhub, port,
+					  USB_PORT_STAT_SUSPEND, 0,
+					  false);
+		ast_vhub_dev_resume(&p->dev);
+		return std_req_complete;
+	case USB_PORT_FEAT_POWER:
+		/* We don't do power control */
+		return std_req_complete;
+	case USB_PORT_FEAT_INDICATOR:
+		/* We don't have indicators */
+		return std_req_complete;
+	case USB_PORT_FEAT_C_CONNECTION:
+	case USB_PORT_FEAT_C_ENABLE:
+	case USB_PORT_FEAT_C_SUSPEND:
+	case USB_PORT_FEAT_C_OVER_CURRENT:
+	case USB_PORT_FEAT_C_RESET:
+		/* Clear state-change feature */
+		p->change &= ~(1u << (feat - 16));
+		ast_vhub_update_hub_ep1(vhub, port);
+		return std_req_complete;
+	}
+	return std_req_stall;
+}
+
+static enum std_req_rc ast_vhub_get_port_stat(struct ast_vhub_ep *ep,
+					      u8 port)
+{
+	struct ast_vhub *vhub = ep->vhub;
+	u16 stat, chg;
+
+	if (port == 0 || port > AST_VHUB_NUM_PORTS)
+		return std_req_stall;
+	port--;
+
+	stat = vhub->ports[port].status;
+	chg = vhub->ports[port].change;
+
+	/* We always have power */
+	stat |= USB_PORT_STAT_POWER;
+
+	EPDBG(ep, " port status=%04x change=%04x\n", stat, chg);
+
+	return ast_vhub_simple_reply(ep,
+				     stat & 0xff,
+				     stat >> 8,
+				     chg & 0xff,
+				     chg >> 8);
+}
+
+enum std_req_rc ast_vhub_class_hub_request(struct ast_vhub_ep *ep,
+					   struct usb_ctrlrequest *crq)
+{
+	u16 wValue, wIndex, wLength;
+
+	wValue = le16_to_cpu(crq->wValue);
+	wIndex = le16_to_cpu(crq->wIndex);
+	wLength = le16_to_cpu(crq->wLength);
+
+	switch ((crq->bRequestType << 8) | crq->bRequest) {
+	case GetHubStatus:
+		EPDBG(ep, "GetHubStatus\n");
+		return ast_vhub_simple_reply(ep, 0, 0, 0, 0);
+	case GetPortStatus:
+		EPDBG(ep, "GetPortStatus(%d)\n", wIndex & 0xff);
+		return ast_vhub_get_port_stat(ep, wIndex & 0xf);
+	case GetHubDescriptor:
+		if (wValue != (USB_DT_HUB << 8))
+			return std_req_stall;
+		EPDBG(ep, "GetHubDescriptor(%d)\n", wIndex & 0xff);
+		return ast_vhub_rep_desc(ep, USB_DT_HUB, wLength);
+	case SetHubFeature:
+	case ClearHubFeature:
+		EPDBG(ep, "Get/SetHubFeature(%d)\n", wValue);
+		/* No feature, just complete the requests */
+		if (wValue == C_HUB_LOCAL_POWER ||
+		    wValue == C_HUB_OVER_CURRENT)
+			return std_req_complete;
+		return std_req_stall;
+	case SetPortFeature:
+		EPDBG(ep, "SetPortFeature(%d,%d)\n", wIndex & 0xf, wValue);
+		return ast_vhub_set_port_feature(ep, wIndex & 0xf, wValue);
+	case ClearPortFeature:
+		EPDBG(ep, "ClearPortFeature(%d,%d)\n", wIndex & 0xf, wValue);
+		return ast_vhub_clr_port_feature(ep, wIndex & 0xf, wValue);
+	default:
+		EPDBG(ep, "Unknown class request\n");
+	}
+	return std_req_stall;
+}
+
+void ast_vhub_hub_suspend(struct ast_vhub *vhub)
+{
+	unsigned int i;
+
+	UDCDBG(vhub, "USB bus suspend\n");
+
+	if (vhub->suspended)
+		return;
+
+	vhub->suspended = true;
+
+	/*
+	 * Forward to unsuspended ports without changing
+	 * their connection status.
+	 */
+	for (i = 0; i < AST_VHUB_NUM_PORTS; i++) {
+		struct ast_vhub_port *p = &vhub->ports[i];
+
+		if (!(p->status & USB_PORT_STAT_SUSPEND))
+			ast_vhub_dev_suspend(&p->dev);
+	}
+}
+
+void ast_vhub_hub_resume(struct ast_vhub *vhub)
+{
+	unsigned int i;
+
+	UDCDBG(vhub, "USB bus resume\n");
+
+	if (!vhub->suspended)
+		return;
+
+	vhub->suspended = false;
+
+	/*
+	 * Forward to unsuspended ports without changing
+	 * their connection status.
+	 */
+	for (i = 0; i < AST_VHUB_NUM_PORTS; i++) {
+		struct ast_vhub_port *p = &vhub->ports[i];
+
+		if (!(p->status & USB_PORT_STAT_SUSPEND))
+			ast_vhub_dev_resume(&p->dev);
+	}
+}
+
+void ast_vhub_hub_reset(struct ast_vhub *vhub)
+{
+	unsigned int i;
+
+	UDCDBG(vhub, "USB bus reset\n");
+
+	/*
+	 * Is the speed known ? If not we don't care, we aren't
+	 * initialized yet and ports haven't been enabled.
+	 */
+	if (vhub->speed == USB_SPEED_UNKNOWN)
+		return;
+
+	/* We aren't suspended anymore obviously */
+	vhub->suspended = false;
+
+	/* No speed set */
+	vhub->speed = USB_SPEED_UNKNOWN;
+
+	/* Wakeup not enabled anymore */
+	vhub->wakeup_en = false;
+
+	/*
+	 * Clear all port status, disable gadgets and "suspend"
+	 * them. They will be woken up by a port reset.
+	 */
+	for (i = 0; i < AST_VHUB_NUM_PORTS; i++) {
+		struct ast_vhub_port *p = &vhub->ports[i];
+
+		/* Only keep the connected flag */
+		p->status &= USB_PORT_STAT_CONNECTION;
+		p->change = 0;
+
+		/* Suspend the gadget if any */
+		ast_vhub_dev_suspend(&p->dev);
+	}
+
+	/* Cleanup HW */
+	writel(0, vhub->regs + AST_VHUB_CONF);
+	writel(0, vhub->regs + AST_VHUB_EP0_CTRL);
+	writel(VHUB_EP1_CTRL_RESET_TOGGLE |
+	       VHUB_EP1_CTRL_ENABLE,
+	       vhub->regs + AST_VHUB_EP1_CTRL);
+	writel(0, vhub->regs + AST_VHUB_EP1_STS_CHG);
+}
+
+void ast_vhub_init_hub(struct ast_vhub *vhub)
+{
+	vhub->speed = USB_SPEED_UNKNOWN;
+	INIT_WORK(&vhub->wake_work, ast_vhub_wake_work);
+}
+
diff --git a/drivers/usb/gadget/udc/aspeed-vhub/vhub.h b/drivers/usb/gadget/udc/aspeed-vhub/vhub.h
new file mode 100644
index 0000000..2b04025
--- /dev/null
+++ b/drivers/usb/gadget/udc/aspeed-vhub/vhub.h
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef __ASPEED_VHUB_H
+#define __ASPEED_VHUB_H
+
+/*****************************
+ *                           *
+ * VHUB register definitions *
+ *                           *
+ *****************************/
+
+#define	AST_VHUB_CTRL		0x00	/* Root Function Control & Status Register */
+#define	AST_VHUB_CONF		0x04	/* Root Configuration Setting Register */
+#define	AST_VHUB_IER		0x08	/* Interrupt Ctrl Register */
+#define	AST_VHUB_ISR		0x0C	/* Interrupt Status Register */
+#define	AST_VHUB_EP_ACK_IER	0x10	/* Programmable Endpoint Pool ACK Interrupt Enable Register */
+#define	AST_VHUB_EP_NACK_IER	0x14	/* Programmable Endpoint Pool NACK Interrupt Enable Register  */
+#define AST_VHUB_EP_ACK_ISR	0x18	/* Programmable Endpoint Pool ACK Interrupt Status Register  */
+#define AST_VHUB_EP_NACK_ISR	0x1C	/* Programmable Endpoint Pool NACK Interrupt Status Register  */
+#define AST_VHUB_SW_RESET	0x20	/* Device Controller Soft Reset Enable Register */
+#define AST_VHUB_USBSTS		0x24	/* USB Status Register */
+#define AST_VHUB_EP_TOGGLE	0x28	/* Programmable Endpoint Pool Data Toggle Value Set */
+#define AST_VHUB_ISO_FAIL_ACC	0x2C	/* Isochronous Transaction Fail Accumulator */
+#define AST_VHUB_EP0_CTRL	0x30	/* Endpoint 0 Contrl/Status Register */
+#define AST_VHUB_EP0_DATA	0x34	/* Base Address of Endpoint 0 In/OUT Data Buffer Register */
+#define AST_VHUB_EP1_CTRL	0x38	/* Endpoint 1 Contrl/Status Register */
+#define AST_VHUB_EP1_STS_CHG	0x3C	/* Endpoint 1 Status Change Bitmap Data */
+#define AST_VHUB_SETUP0		0x80	/* Root Device Setup Data Buffer0 */
+#define AST_VHUB_SETUP1		0x84	/* Root Device Setup Data Buffer1 */
+
+/* Main control reg */
+#define VHUB_CTRL_PHY_CLK			(1 << 31)
+#define VHUB_CTRL_PHY_LOOP_TEST			(1 << 25)
+#define VHUB_CTRL_DN_PWN			(1 << 24)
+#define VHUB_CTRL_DP_PWN			(1 << 23)
+#define VHUB_CTRL_LONG_DESC			(1 << 18)
+#define VHUB_CTRL_ISO_RSP_CTRL			(1 << 17)
+#define VHUB_CTRL_SPLIT_IN			(1 << 16)
+#define VHUB_CTRL_LOOP_T_RESULT			(1 << 15)
+#define VHUB_CTRL_LOOP_T_STS			(1 << 14)
+#define VHUB_CTRL_PHY_BIST_RESULT		(1 << 13)
+#define VHUB_CTRL_PHY_BIST_CTRL			(1 << 12)
+#define VHUB_CTRL_PHY_RESET_DIS			(1 << 11)
+#define VHUB_CTRL_SET_TEST_MODE(x)		((x) << 8)
+#define VHUB_CTRL_MANUAL_REMOTE_WAKEUP		(1 << 4)
+#define VHUB_CTRL_AUTO_REMOTE_WAKEUP		(1 << 3)
+#define VHUB_CTRL_CLK_STOP_SUSPEND		(1 << 2)
+#define VHUB_CTRL_FULL_SPEED_ONLY		(1 << 1)
+#define VHUB_CTRL_UPSTREAM_CONNECT		(1 << 0)
+
+/* IER & ISR */
+#define VHUB_IRQ_USB_CMD_DEADLOCK		(1 << 18)
+#define VHUB_IRQ_EP_POOL_NAK			(1 << 17)
+#define VHUB_IRQ_EP_POOL_ACK_STALL		(1 << 16)
+#define VHUB_IRQ_DEVICE5			(1 << 13)
+#define VHUB_IRQ_DEVICE4			(1 << 12)
+#define VHUB_IRQ_DEVICE3			(1 << 11)
+#define VHUB_IRQ_DEVICE2			(1 << 10)
+#define VHUB_IRQ_DEVICE1			(1 << 9)
+#define VHUB_IRQ_BUS_RESUME			(1 << 8)
+#define VHUB_IRQ_BUS_SUSPEND 			(1 << 7)
+#define VHUB_IRQ_BUS_RESET 			(1 << 6)
+#define VHUB_IRQ_HUB_EP1_IN_DATA_ACK		(1 << 5)
+#define VHUB_IRQ_HUB_EP0_IN_DATA_NAK		(1 << 4)
+#define VHUB_IRQ_HUB_EP0_IN_ACK_STALL		(1 << 3)
+#define VHUB_IRQ_HUB_EP0_OUT_NAK		(1 << 2)
+#define VHUB_IRQ_HUB_EP0_OUT_ACK_STALL		(1 << 1)
+#define VHUB_IRQ_HUB_EP0_SETUP			(1 << 0)
+#define VHUB_IRQ_ACK_ALL			0x1ff
+
+/* SW reset reg */
+#define VHUB_SW_RESET_EP_POOL			(1 << 9)
+#define VHUB_SW_RESET_DMA_CONTROLLER		(1 << 8)
+#define VHUB_SW_RESET_DEVICE5			(1 << 5)
+#define VHUB_SW_RESET_DEVICE4			(1 << 4)
+#define VHUB_SW_RESET_DEVICE3			(1 << 3)
+#define VHUB_SW_RESET_DEVICE2			(1 << 2)
+#define VHUB_SW_RESET_DEVICE1			(1 << 1)
+#define VHUB_SW_RESET_ROOT_HUB			(1 << 0)
+#define VHUB_SW_RESET_ALL			(VHUB_SW_RESET_EP_POOL | \
+						 VHUB_SW_RESET_DMA_CONTROLLER | \
+						 VHUB_SW_RESET_DEVICE5 | \
+						 VHUB_SW_RESET_DEVICE4 | \
+						 VHUB_SW_RESET_DEVICE3 | \
+						 VHUB_SW_RESET_DEVICE2 | \
+						 VHUB_SW_RESET_DEVICE1 | \
+						 VHUB_SW_RESET_ROOT_HUB)
+/* EP ACK/NACK IRQ masks */
+#define VHUB_EP_IRQ(n)				(1 << (n))
+#define VHUB_EP_IRQ_ALL				0x7fff	/* 15 EPs */
+
+/* USB status reg */
+#define VHUB_USBSTS_HISPEED			(1 << 27)
+
+/* EP toggle */
+#define VHUB_EP_TOGGLE_VALUE			(1 << 8)
+#define VHUB_EP_TOGGLE_SET_EPNUM(x)		((x) & 0x1f)
+
+/* HUB EP0 control */
+#define VHUB_EP0_CTRL_STALL			(1 << 0)
+#define VHUB_EP0_TX_BUFF_RDY			(1 << 1)
+#define VHUB_EP0_RX_BUFF_RDY			(1 << 2)
+#define VHUB_EP0_RX_LEN(x)			(((x) >> 16) & 0x7f)
+#define VHUB_EP0_SET_TX_LEN(x)			(((x) & 0x7f) << 8)
+
+/* HUB EP1 control */
+#define VHUB_EP1_CTRL_RESET_TOGGLE		(1 << 2)
+#define VHUB_EP1_CTRL_STALL			(1 << 1)
+#define VHUB_EP1_CTRL_ENABLE			(1 << 0)
+
+/***********************************
+ *                                 *
+ * per-device register definitions *
+ *                                 *
+ ***********************************/
+#define AST_VHUB_DEV_EN_CTRL		0x00
+#define AST_VHUB_DEV_ISR		0x04
+#define AST_VHUB_DEV_EP0_CTRL		0x08
+#define AST_VHUB_DEV_EP0_DATA		0x0c
+
+/* Device enable control */
+#define VHUB_DEV_EN_SET_ADDR(x)			((x) << 8)
+#define VHUB_DEV_EN_ADDR_MASK			((0xff) << 8)
+#define VHUB_DEV_EN_EP0_NAK_IRQEN		(1 << 6)
+#define VHUB_DEV_EN_EP0_IN_ACK_IRQEN		(1 << 5)
+#define VHUB_DEV_EN_EP0_OUT_NAK_IRQEN		(1 << 4)
+#define VHUB_DEV_EN_EP0_OUT_ACK_IRQEN		(1 << 3)
+#define VHUB_DEV_EN_EP0_SETUP_IRQEN		(1 << 2)
+#define VHUB_DEV_EN_SPEED_SEL_HIGH		(1 << 1)
+#define VHUB_DEV_EN_ENABLE_PORT			(1 << 0)
+
+/* Interrupt status */
+#define VHUV_DEV_IRQ_EP0_IN_DATA_NACK		(1 << 4)
+#define VHUV_DEV_IRQ_EP0_IN_ACK_STALL		(1 << 3)
+#define VHUV_DEV_IRQ_EP0_OUT_DATA_NACK		(1 << 2)
+#define VHUV_DEV_IRQ_EP0_OUT_ACK_STALL		(1 << 1)
+#define VHUV_DEV_IRQ_EP0_SETUP			(1 << 0)
+
+/* Control bits.
+ *
+ * Note: The driver relies on the bulk of those bits
+ *       matching corresponding vHub EP0 control bits
+ */
+#define VHUB_DEV_EP0_CTRL_STALL			VHUB_EP0_CTRL_STALL
+#define VHUB_DEV_EP0_TX_BUFF_RDY		VHUB_EP0_TX_BUFF_RDY
+#define VHUB_DEV_EP0_RX_BUFF_RDY		VHUB_EP0_RX_BUFF_RDY
+#define VHUB_DEV_EP0_RX_LEN(x)			VHUB_EP0_RX_LEN(x)
+#define VHUB_DEV_EP0_SET_TX_LEN(x)		VHUB_EP0_SET_TX_LEN(x)
+
+/*************************************
+ *                                   *
+ * per-endpoint register definitions *
+ *                                   *
+ *************************************/
+
+#define AST_VHUB_EP_CONFIG		0x00
+#define AST_VHUB_EP_DMA_CTLSTAT		0x04
+#define AST_VHUB_EP_DESC_BASE		0x08
+#define AST_VHUB_EP_DESC_STATUS		0x0C
+
+/* EP config reg */
+#define VHUB_EP_CFG_SET_MAX_PKT(x)	(((x) & 0x3ff) << 16)
+#define VHUB_EP_CFG_AUTO_DATA_DISABLE	(1 << 13)
+#define VHUB_EP_CFG_STALL_CTRL		(1 << 12)
+#define VHUB_EP_CFG_SET_EP_NUM(x)	(((x) & 0xf) << 8)
+#define VHUB_EP_CFG_SET_TYPE(x)		((x) << 5)
+#define   EP_TYPE_OFF			0
+#define   EP_TYPE_BULK			1
+#define   EP_TYPE_INT			2
+#define   EP_TYPE_ISO			3
+#define VHUB_EP_CFG_DIR_OUT		(1 << 4)
+#define VHUB_EP_CFG_SET_DEV(x)		((x) << 1)
+#define VHUB_EP_CFG_ENABLE		(1 << 0)
+
+/* EP DMA control */
+#define VHUB_EP_DMA_PROC_STATUS(x)	(((x) >> 4) & 0xf)
+#define   EP_DMA_PROC_RX_IDLE		0
+#define   EP_DMA_PROC_TX_IDLE		8
+#define VHUB_EP_DMA_IN_LONG_MODE	(1 << 3)
+#define VHUB_EP_DMA_OUT_CONTIG_MODE	(1 << 3)
+#define VHUB_EP_DMA_CTRL_RESET		(1 << 2)
+#define VHUB_EP_DMA_SINGLE_STAGE	(1 << 1)
+#define VHUB_EP_DMA_DESC_MODE		(1 << 0)
+
+/* EP DMA status */
+#define VHUB_EP_DMA_SET_TX_SIZE(x)	((x) << 16)
+#define VHUB_EP_DMA_TX_SIZE(x)		(((x) >> 16) & 0x7ff)
+#define VHUB_EP_DMA_RPTR(x)		(((x) >> 8) & 0xff)
+#define VHUB_EP_DMA_SET_RPTR(x)		(((x) & 0xff) << 8)
+#define VHUB_EP_DMA_SET_CPU_WPTR(x)	(x)
+#define VHUB_EP_DMA_SINGLE_KICK		(1 << 0) /* WPTR = 1 for single mode */
+
+/*******************************
+ *                             *
+ * DMA descriptors definitions *
+ *                             *
+ *******************************/
+
+/* Desc W1 IN */
+#define VHUB_DSC1_IN_INTERRUPT		(1 << 31)
+#define VHUB_DSC1_IN_SPID_DATA0		(0 << 14)
+#define VHUB_DSC1_IN_SPID_DATA2		(1 << 14)
+#define VHUB_DSC1_IN_SPID_DATA1		(2 << 14)
+#define VHUB_DSC1_IN_SPID_MDATA		(3 << 14)
+#define VHUB_DSC1_IN_SET_LEN(x)		((x) & 0xfff)
+#define VHUB_DSC1_IN_LEN(x)		((x) & 0xfff)
+
+/****************************************
+ *                                      *
+ * Data structures and misc definitions *
+ *                                      *
+ ****************************************/
+
+#define AST_VHUB_NUM_GEN_EPs	15	/* Generic non-0 EPs */
+#define AST_VHUB_NUM_PORTS	5	/* vHub ports */
+#define AST_VHUB_EP0_MAX_PACKET	64	/* EP0's max packet size */
+#define AST_VHUB_EPn_MAX_PACKET	1024	/* Generic EPs max packet size */
+#define AST_VHUB_DESCS_COUNT	256	/* Use 256 descriptor mode (valid
+					 * values are 256 and 32)
+					 */
+
+struct ast_vhub;
+struct ast_vhub_dev;
+
+/*
+ * DMA descriptor (generic EPs only, currently only used
+ * for IN endpoints
+ */
+struct ast_vhub_desc {
+	__le32	w0;
+	__le32	w1;
+};
+
+/* A transfer request, either core-originated or internal */
+struct ast_vhub_req {
+	struct usb_request	req;
+	struct list_head	queue;
+
+	/* Actual count written to descriptors (desc mode only) */
+	unsigned int		act_count;
+
+	/*
+	 * Desc number of the final packet or -1. For non-desc
+	 * mode (or ep0), any >= 0 value means "last packet"
+	 */
+	int			last_desc;
+
+	/* Request active (pending DMAs) */
+	bool			active  : 1;
+
+	/* Internal request (don't call back core) */
+	bool			internal : 1;
+};
+#define to_ast_req(__ureq) container_of(__ureq, struct ast_vhub_req, req)
+
+/* Current state of an EP0 */
+enum ep0_state {
+	ep0_state_token,
+	ep0_state_data,
+	ep0_state_status,
+};
+
+/*
+ * An endpoint, either generic, ep0, actual gadget EP
+ * or internal use vhub EP0. vhub EP1 doesn't have an
+ * associated structure as it's mostly HW managed.
+ */
+struct ast_vhub_ep {
+	struct usb_ep		ep;
+
+	/* Request queue */
+	struct list_head	queue;
+
+	/* EP index in the device, 0 means this is an EP0 */
+	unsigned int		d_idx;
+
+	/* Dev pointer or NULL for vHub EP0 */
+	struct ast_vhub_dev	*dev;
+
+	/* vHub itself */
+	struct ast_vhub		*vhub;
+
+	/*
+	 * DMA buffer for EP0, fallback DMA buffer for misaligned
+	 * OUT transfers for generic EPs
+	 */
+	void			*buf;
+	dma_addr_t		buf_dma;
+
+	/* The rest depends on the EP type */
+	union {
+		/* EP0 (either device or vhub) */
+		struct {
+			/*
+			 * EP0 registers are "similar" for
+			 * vHub and devices but located in
+			 * different places.
+			 */
+			void __iomem		*ctlstat;
+			void __iomem		*setup;
+
+			/* Current state & direction */
+			enum ep0_state		state;
+			bool			dir_in;
+
+			/* Internal use request */
+			struct ast_vhub_req	req;
+		} ep0;
+
+		/* Generic endpoint (aka EPn) */
+		struct {
+			/* Registers */
+			void __iomem   		*regs;
+
+			/* Index in global pool (0..14) */
+			unsigned int		g_idx;
+
+			/* DMA Descriptors */
+			struct ast_vhub_desc	*descs;
+			dma_addr_t		descs_dma;
+			unsigned int		d_next;
+			unsigned int		d_last;
+			unsigned int		dma_conf;
+
+			/* Max chunk size for IN EPs */
+			unsigned int		chunk_max;
+
+			/* State flags */
+			bool			is_in :  1;
+			bool			is_iso : 1;
+			bool			stalled : 1;
+			bool			wedged : 1;
+			bool			enabled : 1;
+			bool			desc_mode : 1;
+		} epn;
+	};
+};
+#define to_ast_ep(__uep) container_of(__uep, struct ast_vhub_ep, ep)
+
+/* A device attached to a vHub port */
+struct ast_vhub_dev {
+	struct ast_vhub			*vhub;
+	void __iomem			*regs;
+
+	/* Device index (0...4) and name string */
+	unsigned int			index;
+	const char			*name;
+
+	/* sysfs enclosure for the gadget gunk */
+	struct device			*port_dev;
+
+	/* Link to gadget core */
+	struct usb_gadget		gadget;
+	struct usb_gadget_driver	*driver;
+	bool				registered : 1;
+	bool				wakeup_en : 1;
+	bool				suspended : 1;
+	bool				enabled : 1;
+
+	/* Endpoint structures */
+	struct ast_vhub_ep		ep0;
+	struct ast_vhub_ep		*epns[AST_VHUB_NUM_GEN_EPs];
+
+};
+#define to_ast_dev(__g) container_of(__g, struct ast_vhub_dev, gadget)
+
+/* Per vhub port stateinfo structure */
+struct ast_vhub_port {
+	/* Port status & status change registers */
+	u16			status;
+	u16			change;
+
+	/* Associated device slot */
+	struct ast_vhub_dev	dev;
+};
+
+/* Global vhub structure */
+struct ast_vhub {
+	struct platform_device		*pdev;
+	void __iomem			*regs;
+	int				irq;
+	spinlock_t			lock;
+	struct work_struct		wake_work;
+	struct clk			*clk;
+
+	/* EP0 DMA buffers allocated in one chunk */
+	void				*ep0_bufs;
+	dma_addr_t			ep0_bufs_dma;
+
+	/* EP0 of the vhub itself */
+	struct ast_vhub_ep		ep0;
+
+	/* State of vhub ep1 */
+	bool				ep1_stalled : 1;
+
+	/* Per-port info */
+	struct ast_vhub_port		ports[AST_VHUB_NUM_PORTS];
+
+	/* Generic EP data structures */
+	struct ast_vhub_ep		epns[AST_VHUB_NUM_GEN_EPs];
+
+	/* Upstream bus is suspended ? */
+	bool				suspended : 1;
+
+	/* Hub itself can signal remote wakeup */
+	bool				wakeup_en : 1;
+
+	/* Force full speed only */
+	bool				force_usb1 : 1;
+
+	/* Upstream bus speed captured at bus reset */
+	unsigned int			speed;
+};
+
+/* Standard request handlers result codes */
+enum std_req_rc {
+	std_req_stall = -1,	/* Stall requested */
+	std_req_complete = 0,	/* Request completed with no data */
+	std_req_data = 1,	/* Request completed with data */
+	std_req_driver = 2,	/* Pass to driver pls */
+};
+
+#ifdef CONFIG_USB_GADGET_VERBOSE
+#define UDCVDBG(u, fmt...)	dev_dbg(&(u)->pdev->dev, fmt)
+
+#define EPVDBG(ep, fmt, ...)	do {			\
+	dev_dbg(&(ep)->vhub->pdev->dev,			\
+		"%s:EP%d " fmt,				\
+		(ep)->dev ? (ep)->dev->name : "hub",	\
+		(ep)->d_idx, ##__VA_ARGS__);		\
+	} while(0)
+
+#define DVDBG(d, fmt, ...)	do {			\
+	dev_dbg(&(d)->vhub->pdev->dev,			\
+		"%s " fmt, (d)->name,			\
+		##__VA_ARGS__);				\
+	} while(0)
+
+#else
+#define UDCVDBG(u, fmt...)	do { } while(0)
+#define EPVDBG(ep, fmt, ...)	do { } while(0)
+#define DVDBG(d, fmt, ...)	do { } while(0)
+#endif
+
+#ifdef CONFIG_USB_GADGET_DEBUG
+#define UDCDBG(u, fmt...)	dev_dbg(&(u)->pdev->dev, fmt)
+
+#define EPDBG(ep, fmt, ...)	do {			\
+	dev_dbg(&(ep)->vhub->pdev->dev,			\
+		"%s:EP%d " fmt,				\
+		(ep)->dev ? (ep)->dev->name : "hub",	\
+		(ep)->d_idx, ##__VA_ARGS__);		\
+	} while(0)
+
+#define DDBG(d, fmt, ...)	do {			\
+	dev_dbg(&(d)->vhub->pdev->dev,			\
+		"%s " fmt, (d)->name,			\
+		##__VA_ARGS__);				\
+	} while(0)
+#else
+#define UDCDBG(u, fmt...)	do { } while(0)
+#define EPDBG(ep, fmt, ...)	do { } while(0)
+#define DDBG(d, fmt, ...)	do { } while(0)
+#endif
+
+/* core.c */
+void ast_vhub_done(struct ast_vhub_ep *ep, struct ast_vhub_req *req,
+		   int status);
+void ast_vhub_nuke(struct ast_vhub_ep *ep, int status);
+struct usb_request *ast_vhub_alloc_request(struct usb_ep *u_ep,
+					   gfp_t gfp_flags);
+void ast_vhub_free_request(struct usb_ep *u_ep, struct usb_request *u_req);
+void ast_vhub_init_hw(struct ast_vhub *vhub);
+
+/* ep0.c */
+void ast_vhub_ep0_handle_ack(struct ast_vhub_ep *ep, bool in_ack);
+void ast_vhub_ep0_handle_setup(struct ast_vhub_ep *ep);
+void ast_vhub_init_ep0(struct ast_vhub *vhub, struct ast_vhub_ep *ep,
+		       struct ast_vhub_dev *dev);
+int ast_vhub_reply(struct ast_vhub_ep *ep, char *ptr, int len);
+int __ast_vhub_simple_reply(struct ast_vhub_ep *ep, int len, ...);
+#define ast_vhub_simple_reply(udc, ...)					       \
+	__ast_vhub_simple_reply((udc),					       \
+			       sizeof((u8[]) { __VA_ARGS__ })/sizeof(u8),      \
+			       __VA_ARGS__)
+
+/* hub.c */
+void ast_vhub_init_hub(struct ast_vhub *vhub);
+enum std_req_rc ast_vhub_std_hub_request(struct ast_vhub_ep *ep,
+					 struct usb_ctrlrequest *crq);
+enum std_req_rc ast_vhub_class_hub_request(struct ast_vhub_ep *ep,
+					   struct usb_ctrlrequest *crq);
+void ast_vhub_device_connect(struct ast_vhub *vhub, unsigned int port,
+			     bool on);
+void ast_vhub_hub_suspend(struct ast_vhub *vhub);
+void ast_vhub_hub_resume(struct ast_vhub *vhub);
+void ast_vhub_hub_reset(struct ast_vhub *vhub);
+void ast_vhub_hub_wake_all(struct ast_vhub *vhub);
+
+/* dev.c */
+int ast_vhub_init_dev(struct ast_vhub *vhub, unsigned int idx);
+void ast_vhub_del_dev(struct ast_vhub_dev *d);
+void ast_vhub_dev_irq(struct ast_vhub_dev *d);
+int ast_vhub_std_dev_request(struct ast_vhub_ep *ep,
+			     struct usb_ctrlrequest *crq);
+
+/* epn.c */
+void ast_vhub_epn_ack_irq(struct ast_vhub_ep *ep);
+void ast_vhub_update_epn_stall(struct ast_vhub_ep *ep);
+struct ast_vhub_ep *ast_vhub_alloc_epn(struct ast_vhub_dev *d, u8 addr);
+void ast_vhub_dev_suspend(struct ast_vhub_dev *d);
+void ast_vhub_dev_resume(struct ast_vhub_dev *d);
+void ast_vhub_dev_reset(struct ast_vhub_dev *d);
+
+#endif /* __ASPEED_VHUB_H */
diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c
index ad743a8..03959dc 100644
--- a/drivers/usb/gadget/udc/at91_udc.c
+++ b/drivers/usb/gadget/udc/at91_udc.c
@@ -234,22 +234,10 @@
 	return 0;
 }
 
-static int proc_udc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_udc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations proc_ops = {
-	.owner		= THIS_MODULE,
-	.open		= proc_udc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static void create_debug_file(struct at91_udc *udc)
 {
-	udc->pde = proc_create_data(debug_filename, 0, NULL, &proc_ops, udc);
+	udc->pde = proc_create_single_data(debug_filename, 0, NULL,
+			proc_udc_show, udc);
 }
 
 static void remove_debug_file(struct at91_udc *udc)
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index 27c1639..a4d99bf 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -20,7 +20,6 @@
 #include <linux/ctype.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
-#include <linux/usb/atmel_usba_udc.h>
 #include <linux/delay.h>
 #include <linux/of.h>
 #include <linux/irq.h>
@@ -207,94 +206,45 @@
 	struct dentry *ep_root;
 
 	ep_root = debugfs_create_dir(ep->ep.name, udc->debugfs_root);
-	if (!ep_root)
-		goto err_root;
 	ep->debugfs_dir = ep_root;
 
-	ep->debugfs_queue = debugfs_create_file("queue", 0400, ep_root,
-						ep, &queue_dbg_fops);
-	if (!ep->debugfs_queue)
-		goto err_queue;
-
-	if (ep->can_dma) {
-		ep->debugfs_dma_status
-			= debugfs_create_u32("dma_status", 0400, ep_root,
-					&ep->last_dma_status);
-		if (!ep->debugfs_dma_status)
-			goto err_dma_status;
-	}
-	if (ep_is_control(ep)) {
-		ep->debugfs_state
-			= debugfs_create_u32("state", 0400, ep_root,
-					&ep->state);
-		if (!ep->debugfs_state)
-			goto err_state;
-	}
-
-	return;
-
-err_state:
+	debugfs_create_file("queue", 0400, ep_root, ep, &queue_dbg_fops);
 	if (ep->can_dma)
-		debugfs_remove(ep->debugfs_dma_status);
-err_dma_status:
-	debugfs_remove(ep->debugfs_queue);
-err_queue:
-	debugfs_remove(ep_root);
-err_root:
-	dev_err(&ep->udc->pdev->dev,
-		"failed to create debugfs directory for %s\n", ep->ep.name);
+		debugfs_create_u32("dma_status", 0400, ep_root,
+				   &ep->last_dma_status);
+	if (ep_is_control(ep))
+		debugfs_create_u32("state", 0400, ep_root, &ep->state);
 }
 
 static void usba_ep_cleanup_debugfs(struct usba_ep *ep)
 {
-	debugfs_remove(ep->debugfs_queue);
-	debugfs_remove(ep->debugfs_dma_status);
-	debugfs_remove(ep->debugfs_state);
-	debugfs_remove(ep->debugfs_dir);
-	ep->debugfs_dma_status = NULL;
-	ep->debugfs_dir = NULL;
+	debugfs_remove_recursive(ep->debugfs_dir);
 }
 
 static void usba_init_debugfs(struct usba_udc *udc)
 {
-	struct dentry *root, *regs;
+	struct dentry *root;
 	struct resource *regs_resource;
 
 	root = debugfs_create_dir(udc->gadget.name, NULL);
-	if (IS_ERR(root) || !root)
-		goto err_root;
 	udc->debugfs_root = root;
 
 	regs_resource = platform_get_resource(udc->pdev, IORESOURCE_MEM,
 				CTRL_IOMEM_ID);
 
 	if (regs_resource) {
-		regs = debugfs_create_file_size("regs", 0400, root, udc,
-						&regs_dbg_fops,
-						resource_size(regs_resource));
-		if (!regs)
-			goto err_regs;
-		udc->debugfs_regs = regs;
+		debugfs_create_file_size("regs", 0400, root, udc,
+					 &regs_dbg_fops,
+					 resource_size(regs_resource));
 	}
 
 	usba_ep_init_debugfs(udc, to_usba_ep(udc->gadget.ep0));
-
-	return;
-
-err_regs:
-	debugfs_remove(root);
-err_root:
-	udc->debugfs_root = NULL;
-	dev_err(&udc->pdev->dev, "debugfs is not available\n");
 }
 
 static void usba_cleanup_debugfs(struct usba_udc *udc)
 {
 	usba_ep_cleanup_debugfs(to_usba_ep(udc->gadget.ep0));
-	debugfs_remove(udc->debugfs_regs);
-	debugfs_remove(udc->debugfs_root);
-	udc->debugfs_regs = NULL;
-	udc->debugfs_root = NULL;
+	debugfs_remove_recursive(udc->debugfs_root);
 }
 #else
 static inline void usba_ep_init_debugfs(struct usba_udc *udc,
@@ -417,7 +367,7 @@
 static int vbus_is_present(struct usba_udc *udc)
 {
 	if (udc->vbus_pin)
-		return gpiod_get_value(udc->vbus_pin) ^ udc->vbus_pin_inverted;
+		return gpiod_get_value(udc->vbus_pin);
 
 	/* No Vbus detection: Assume always present */
 	return 1;
@@ -2076,7 +2026,6 @@
 
 	udc->vbus_pin = devm_gpiod_get_optional(&pdev->dev, "atmel,vbus",
 						GPIOD_IN);
-	udc->vbus_pin_inverted = gpiod_is_active_low(udc->vbus_pin);
 
 	if (fifo_mode == 0) {
 		pp = NULL;
@@ -2279,15 +2228,15 @@
 	if (udc->vbus_pin) {
 		irq_set_status_flags(gpiod_to_irq(udc->vbus_pin), IRQ_NOAUTOEN);
 		ret = devm_request_threaded_irq(&pdev->dev,
-					gpiod_to_irq(udc->vbus_pin), NULL,
-					usba_vbus_irq_thread, USBA_VBUS_IRQFLAGS,
-					"atmel_usba_udc", udc);
-			if (ret) {
-				udc->vbus_pin = NULL;
-				dev_warn(&udc->pdev->dev,
-					 "failed to request vbus irq; "
-					 "assuming always on\n");
-			}
+				gpiod_to_irq(udc->vbus_pin), NULL,
+				usba_vbus_irq_thread, USBA_VBUS_IRQFLAGS,
+				"atmel_usba_udc", udc);
+		if (ret) {
+			udc->vbus_pin = NULL;
+			dev_warn(&udc->pdev->dev,
+				 "failed to request vbus irq; "
+				 "assuming always on\n");
+		}
 	}
 
 	ret = usb_add_gadget_udc(&pdev->dev, &udc->gadget);
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.h b/drivers/usb/gadget/udc/atmel_usba_udc.h
index 969ce8f..030bf79 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.h
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.h
@@ -287,9 +287,6 @@
 #ifdef CONFIG_USB_GADGET_DEBUG_FS
 	u32					last_dma_status;
 	struct dentry				*debugfs_dir;
-	struct dentry				*debugfs_queue;
-	struct dentry				*debugfs_dma_status;
-	struct dentry				*debugfs_state;
 #endif
 };
 
@@ -326,7 +323,6 @@
 	const struct usba_udc_errata *errata;
 	int irq;
 	struct gpio_desc *vbus_pin;
-	int vbus_pin_inverted;
 	int num_ep;
 	int configured_ep;
 	struct usba_fifo_cfg *fifo_cfg;
@@ -345,7 +341,6 @@
 
 #ifdef CONFIG_USB_GADGET_DEBUG_FS
 	struct dentry *debugfs_root;
-	struct dentry *debugfs_regs;
 #endif
 
 	struct regmap *pmc;
diff --git a/drivers/usb/gadget/udc/bcm63xx_udc.c b/drivers/usb/gadget/udc/bcm63xx_udc.c
index 3a8df86..c1fcc77 100644
--- a/drivers/usb/gadget/udc/bcm63xx_udc.c
+++ b/drivers/usb/gadget/udc/bcm63xx_udc.c
@@ -288,8 +288,6 @@
  * @ep0_reply: Pending reply from gadget driver.
  * @ep0_request: Outstanding ep0 request.
  * @debugfs_root: debugfs directory: /sys/kernel/debug/<DRV_MODULE_NAME>.
- * @debugfs_usbd: debugfs file "usbd" for controller state.
- * @debugfs_iudma: debugfs file "usbd" for IUDMA state.
  */
 struct bcm63xx_udc {
 	spinlock_t			lock;
@@ -330,8 +328,6 @@
 	struct usb_request		*ep0_request;
 
 	struct dentry			*debugfs_root;
-	struct dentry			*debugfs_usbd;
-	struct dentry			*debugfs_iudma;
 };
 
 static const struct usb_ep_ops bcm63xx_udc_ep_ops;
@@ -2247,34 +2243,16 @@
  */
 static void bcm63xx_udc_init_debugfs(struct bcm63xx_udc *udc)
 {
-	struct dentry *root, *usbd, *iudma;
+	struct dentry *root;
 
 	if (!IS_ENABLED(CONFIG_USB_GADGET_DEBUG_FS))
 		return;
 
 	root = debugfs_create_dir(udc->gadget.name, NULL);
-	if (IS_ERR(root) || !root)
-		goto err_root;
-
-	usbd = debugfs_create_file("usbd", 0400, root, udc,
-			&bcm63xx_usbd_dbg_fops);
-	if (!usbd)
-		goto err_usbd;
-	iudma = debugfs_create_file("iudma", 0400, root, udc,
-			&bcm63xx_iudma_dbg_fops);
-	if (!iudma)
-		goto err_iudma;
-
 	udc->debugfs_root = root;
-	udc->debugfs_usbd = usbd;
-	udc->debugfs_iudma = iudma;
-	return;
-err_iudma:
-	debugfs_remove(usbd);
-err_usbd:
-	debugfs_remove(root);
-err_root:
-	dev_err(udc->dev, "debugfs is not available\n");
+
+	debugfs_create_file("usbd", 0400, root, udc, &bcm63xx_usbd_dbg_fops);
+	debugfs_create_file("iudma", 0400, root, udc, &bcm63xx_iudma_dbg_fops);
 }
 
 /**
@@ -2285,12 +2263,7 @@
  */
 static void bcm63xx_udc_cleanup_debugfs(struct bcm63xx_udc *udc)
 {
-	debugfs_remove(udc->debugfs_iudma);
-	debugfs_remove(udc->debugfs_usbd);
-	debugfs_remove(udc->debugfs_root);
-	udc->debugfs_iudma = NULL;
-	udc->debugfs_usbd = NULL;
-	udc->debugfs_root = NULL;
+	debugfs_remove_recursive(udc->debugfs_root);
 }
 
 /***********************************************************************
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 842814b..cab5e4f 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -244,6 +244,12 @@
  * Returns zero, or a negative error code.  Endpoints that are not enabled
  * report errors; errors will also be
  * reported when the usb peripheral is disconnected.
+ *
+ * If and only if @req is successfully queued (the return value is zero),
+ * @req->complete() will be called exactly once, when the Gadget core and
+ * UDC are finished with the request.  When the completion function is called,
+ * control of the request is returned to the device driver which submitted it.
+ * The completion handler may then immediately free or reuse @req.
  */
 int usb_ep_queue(struct usb_ep *ep,
 			       struct usb_request *req, gfp_t gfp_flags)
diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index 56b517a..9a3f7db 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -253,6 +253,7 @@
 		portctrl |= PORTSCX_PTW_16BIT;
 		/* fall through */
 	case FSL_USB2_PHY_UTMI:
+	case FSL_USB2_PHY_UTMI_DUAL:
 		if (udc->pdata->have_sysif_regs) {
 			if (udc->pdata->controller_ver) {
 				/* controller version 1.6 or above */
@@ -2207,22 +2208,8 @@
 	return 0;
 }
 
-/*
- * seq_file wrappers for procfile show routines.
- */
-static int fsl_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, fsl_proc_read, NULL);
-}
-
-static const struct file_operations fsl_proc_fops = {
-	.open		= fsl_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-#define create_proc_file()	proc_create(proc_filename, 0, NULL, &fsl_proc_fops)
+#define create_proc_file() \
+	proc_create_single(proc_filename, 0, NULL, fsl_proc_read)
 #define remove_proc_file()	remove_proc_entry(proc_filename, NULL)
 
 #else				/* !CONFIG_USB_GADGET_DEBUG_FILES */
diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c
index 4504d0b..c372122 100644
--- a/drivers/usb/gadget/udc/goku_udc.c
+++ b/drivers/usb/gadget/udc/goku_udc.c
@@ -1241,22 +1241,6 @@
 	local_irq_restore(flags);
 	return 0;
 }
-
-/*
- * seq_file wrappers for procfile show routines.
- */
-static int udc_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, udc_proc_read, PDE_DATA(file_inode(file)));
-}
-
-static const struct file_operations udc_proc_fops = {
-	.open		= udc_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 #endif	/* CONFIG_USB_GADGET_DEBUG_FILES */
 
 /*-------------------------------------------------------------------------*/
@@ -1826,7 +1810,7 @@
 
 
 #ifdef CONFIG_USB_GADGET_DEBUG_FILES
-	proc_create_data(proc_node_name, 0, NULL, &udc_proc_fops, dev);
+	proc_create_single_data(proc_node_name, 0, NULL, udc_proc_read, dev);
 #endif
 
 	retval = usb_add_gadget_udc_release(&pdev->dev, &dev->gadget,
diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c
index ca83c15..729e60e 100644
--- a/drivers/usb/gadget/udc/gr_udc.c
+++ b/drivers/usb/gadget/udc/gr_udc.c
@@ -209,15 +209,12 @@
 	const char *name = "gr_udc_state";
 
 	dev->dfs_root = debugfs_create_dir(dev_name(dev->dev), NULL);
-	dev->dfs_state = debugfs_create_file(name, 0444, dev->dfs_root, dev,
-					     &gr_dfs_fops);
+	debugfs_create_file(name, 0444, dev->dfs_root, dev, &gr_dfs_fops);
 }
 
 static void gr_dfs_delete(struct gr_udc *dev)
 {
-	/* Handles NULL and ERR pointers internally */
-	debugfs_remove(dev->dfs_state);
-	debugfs_remove(dev->dfs_root);
+	debugfs_remove_recursive(dev->dfs_root);
 }
 
 #else /* !CONFIG_USB_GADGET_DEBUG_FS */
diff --git a/drivers/usb/gadget/udc/gr_udc.h b/drivers/usb/gadget/udc/gr_udc.h
index 3e91326..417ad2a 100644
--- a/drivers/usb/gadget/udc/gr_udc.h
+++ b/drivers/usb/gadget/udc/gr_udc.h
@@ -217,7 +217,6 @@
 	spinlock_t lock; /* General lock, a.k.a. "dev->lock" in comments */
 
 	struct dentry *dfs_root;
-	struct dentry *dfs_state;
 };
 
 #define to_gr_udc(gadget)	(container_of((gadget), struct gr_udc, gadget))
diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c
index dc35a54..3a16431 100644
--- a/drivers/usb/gadget/udc/omap_udc.c
+++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -2432,22 +2432,9 @@
 	return 0;
 }
 
-static int proc_udc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_udc_show, NULL);
-}
-
-static const struct file_operations proc_ops = {
-	.owner		= THIS_MODULE,
-	.open		= proc_udc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static void create_proc_file(void)
 {
-	proc_create(proc_filename, 0, NULL, &proc_ops);
+	proc_create_single(proc_filename, 0, NULL, proc_udc_show);
 }
 
 static void remove_proc_file(void)
diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c
index a58242e..0142332 100644
--- a/drivers/usb/gadget/udc/pxa27x_udc.c
+++ b/drivers/usb/gadget/udc/pxa27x_udc.c
@@ -205,50 +205,19 @@
 
 static void pxa_init_debugfs(struct pxa_udc *udc)
 {
-	struct dentry *root, *state, *queues, *eps;
+	struct dentry *root;
 
 	root = debugfs_create_dir(udc->gadget.name, NULL);
-	if (IS_ERR(root) || !root)
-		goto err_root;
-
-	state = debugfs_create_file("udcstate", 0400, root, udc,
-			&state_dbg_fops);
-	if (!state)
-		goto err_state;
-	queues = debugfs_create_file("queues", 0400, root, udc,
-			&queues_dbg_fops);
-	if (!queues)
-		goto err_queues;
-	eps = debugfs_create_file("epstate", 0400, root, udc,
-			&eps_dbg_fops);
-	if (!eps)
-		goto err_eps;
-
 	udc->debugfs_root = root;
-	udc->debugfs_state = state;
-	udc->debugfs_queues = queues;
-	udc->debugfs_eps = eps;
-	return;
-err_eps:
-	debugfs_remove(eps);
-err_queues:
-	debugfs_remove(queues);
-err_state:
-	debugfs_remove(root);
-err_root:
-	dev_err(udc->dev, "debugfs is not available\n");
+
+	debugfs_create_file("udcstate", 0400, root, udc, &state_dbg_fops);
+	debugfs_create_file("queues", 0400, root, udc, &queues_dbg_fops);
+	debugfs_create_file("epstate", 0400, root, udc, &eps_dbg_fops);
 }
 
 static void pxa_cleanup_debugfs(struct pxa_udc *udc)
 {
-	debugfs_remove(udc->debugfs_eps);
-	debugfs_remove(udc->debugfs_queues);
-	debugfs_remove(udc->debugfs_state);
-	debugfs_remove(udc->debugfs_root);
-	udc->debugfs_eps = NULL;
-	udc->debugfs_queues = NULL;
-	udc->debugfs_state = NULL;
-	udc->debugfs_root = NULL;
+	debugfs_remove_recursive(udc->debugfs_root);
 }
 
 #else
diff --git a/drivers/usb/gadget/udc/pxa27x_udc.h b/drivers/usb/gadget/udc/pxa27x_udc.h
index 1128d39..13b2977 100644
--- a/drivers/usb/gadget/udc/pxa27x_udc.h
+++ b/drivers/usb/gadget/udc/pxa27x_udc.h
@@ -476,9 +476,6 @@
 #endif
 #ifdef CONFIG_USB_GADGET_DEBUG_FS
 	struct dentry				*debugfs_root;
-	struct dentry				*debugfs_state;
-	struct dentry				*debugfs_queues;
-	struct dentry				*debugfs_eps;
 #endif
 };
 #define to_pxa(g)	(container_of((g), struct pxa_udc, gadget))
diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 409cde4..977ea1a0 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -333,6 +333,7 @@
 	struct extcon_dev *extcon;
 	struct work_struct extcon_work;
 	struct phy *phy;
+	struct dentry *dentry;
 
 	struct renesas_usb3_ep *usb3_ep;
 	int num_usb3_eps;
@@ -622,6 +623,13 @@
 	usb3_usb2_pullup(usb3, 0);
 	usb3_clear_bit(usb3, USB30_CON_B3_CONNECT, USB3_USB30_CON);
 	usb3_reset_epc(usb3);
+	usb3_disable_irq_1(usb3, USB_INT_1_B2_RSUM | USB_INT_1_B3_PLLWKUP |
+			   USB_INT_1_B3_LUPSUCS | USB_INT_1_B3_DISABLE |
+			   USB_INT_1_SPEED | USB_INT_1_B3_WRMRST |
+			   USB_INT_1_B3_HOTRST | USB_INT_1_B2_SPND |
+			   USB_INT_1_B2_L1SPND | USB_INT_1_B2_USBRST);
+	usb3_clear_bit(usb3, USB_COM_CON_SPD_MODE, USB3_USB_COM_CON);
+	usb3_init_epc_registers(usb3);
 
 	if (usb3->driver)
 		usb3->driver->disconnect(&usb3->gadget);
@@ -2383,18 +2391,10 @@
 static void renesas_usb3_debugfs_init(struct renesas_usb3 *usb3,
 				      struct device *dev)
 {
-	struct dentry *root, *file;
+	usb3->dentry = debugfs_create_dir(dev_name(dev), NULL);
 
-	root = debugfs_create_dir(dev_name(dev), NULL);
-	if (IS_ERR_OR_NULL(root)) {
-		dev_info(dev, "%s: Can't create the root\n", __func__);
-		return;
-	}
-
-	file = debugfs_create_file("b_device", 0644, root, usb3,
-				   &renesas_usb3_b_device_fops);
-	if (!file)
-		dev_info(dev, "%s: Can't create debugfs mode\n", __func__);
+	debugfs_create_file("b_device", 0644, usb3->dentry, usb3,
+			    &renesas_usb3_b_device_fops);
 }
 
 /*------- platform_driver ------------------------------------------------*/
@@ -2402,14 +2402,13 @@
 {
 	struct renesas_usb3 *usb3 = platform_get_drvdata(pdev);
 
+	debugfs_remove_recursive(usb3->dentry);
 	device_remove_file(&pdev->dev, &dev_attr_role);
 
 	usb_del_gadget_udc(&usb3->gadget);
 	renesas_usb3_dma_free_prd(usb3, &pdev->dev);
 
 	__renesas_usb3_ep_free_request(usb3->ep0_req);
-	if (usb3->phy)
-		phy_put(usb3->phy);
 	pm_runtime_disable(&pdev->dev);
 
 	return 0;
@@ -2628,6 +2627,17 @@
 	if (ret < 0)
 		goto err_alloc_prd;
 
+	/*
+	 * This is optional. So, if this driver cannot get a phy,
+	 * this driver will not handle a phy anymore.
+	 */
+	usb3->phy = devm_phy_optional_get(&pdev->dev, "usb");
+	if (IS_ERR(usb3->phy)) {
+		ret = PTR_ERR(usb3->phy);
+		goto err_add_udc;
+	}
+
+	pm_runtime_enable(&pdev->dev);
 	ret = usb_add_gadget_udc(&pdev->dev, &usb3->gadget);
 	if (ret < 0)
 		goto err_add_udc;
@@ -2636,20 +2646,11 @@
 	if (ret < 0)
 		goto err_dev_create;
 
-	/*
-	 * This is an optional. So, if this driver cannot get a phy,
-	 * this driver will not handle a phy anymore.
-	 */
-	usb3->phy = devm_phy_get(&pdev->dev, "usb");
-	if (IS_ERR(usb3->phy))
-		usb3->phy = NULL;
-
 	usb3->workaround_for_vbus = priv->workaround_for_vbus;
 
 	renesas_usb3_debugfs_init(usb3, &pdev->dev);
 
 	dev_info(&pdev->dev, "probed%s\n", usb3->phy ? " with phy" : "");
-	pm_runtime_enable(usb3_to_dev(usb3));
 
 	return 0;
 
diff --git a/drivers/usb/gadget/udc/s3c2410_udc.c b/drivers/usb/gadget/udc/s3c2410_udc.c
index f154f49..8bf5ad7 100644
--- a/drivers/usb/gadget/udc/s3c2410_udc.c
+++ b/drivers/usb/gadget/udc/s3c2410_udc.c
@@ -1871,13 +1871,9 @@
 	if (retval)
 		goto err_add_udc;
 
-	if (s3c2410_udc_debugfs_root) {
-		udc->regs_info = debugfs_create_file("registers", S_IRUGO,
-				s3c2410_udc_debugfs_root,
-				udc, &s3c2410_udc_debugfs_fops);
-		if (!udc->regs_info)
-			dev_warn(dev, "debugfs file creation failed\n");
-	}
+	udc->regs_info = debugfs_create_file("registers", S_IRUGO,
+					     s3c2410_udc_debugfs_root, udc,
+					     &s3c2410_udc_debugfs_fops);
 
 	dev_dbg(dev, "probe ok\n");
 
@@ -1994,11 +1990,6 @@
 	dprintk(DEBUG_NORMAL, "%s\n", gadget_name);
 
 	s3c2410_udc_debugfs_root = debugfs_create_dir(gadget_name, NULL);
-	if (IS_ERR(s3c2410_udc_debugfs_root)) {
-		pr_err("%s: debugfs dir creation failed %ld\n",
-			gadget_name, PTR_ERR(s3c2410_udc_debugfs_root));
-		s3c2410_udc_debugfs_root = NULL;
-	}
 
 	retval = platform_driver_register(&udc_driver_24x0);
 	if (retval)
@@ -2014,7 +2005,7 @@
 static void __exit udc_exit(void)
 {
 	platform_driver_unregister(&udc_driver_24x0);
-	debugfs_remove(s3c2410_udc_debugfs_root);
+	debugfs_remove_recursive(s3c2410_udc_debugfs_root);
 }
 
 module_init(udc_init);
diff --git a/drivers/usb/gadget/usbstring.c b/drivers/usb/gadget/usbstring.c
index 566ab26..7c24d1c 100644
--- a/drivers/usb/gadget/usbstring.c
+++ b/drivers/usb/gadget/usbstring.c
@@ -33,7 +33,7 @@
  * characters (which are also widely used in C strings).
  */
 int
-usb_gadget_get_string (struct usb_gadget_strings *table, int id, u8 *buf)
+usb_gadget_get_string (const struct usb_gadget_strings *table, int id, u8 *buf)
 {
 	struct usb_string	*s;
 	int			len;
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 5d958da..6e64d3a 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -52,6 +52,13 @@
 
 	  If unsure, say N.
 
+config USB_XHCI_HISTB
+	tristate "xHCI support for HiSilicon STB SoCs"
+	depends on USB_XHCI_PLATFORM && (ARCH_HISI || COMPILE_TEST)
+	help
+	  Say 'Y' to enable the support for the xHCI host controller
+	  found in HiSilicon STB SoCs.
+
 config USB_XHCI_MTK
 	tristate "xHCI support for MediaTek SoCs"
 	select MFD_SYSCON
@@ -234,9 +241,7 @@
        tristate "NVIDIA Tegra HCD support"
        depends on ARCH_TEGRA
        select USB_EHCI_ROOT_HUB_TT
-       select USB_PHY
-	select USB_ULPI
-	select USB_ULPI_VIEWPORT
+       select USB_TEGRA_PHY
        help
          This driver enables support for the internal USB Host Controllers
          found in NVIDIA Tegra SoCs. The controllers are EHCI compliant.
diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile
index 8a8cffe..9b669c9 100644
--- a/drivers/usb/host/Makefile
+++ b/drivers/usb/host/Makefile
@@ -74,6 +74,7 @@
 obj-$(CONFIG_USB_XHCI_HCD)	+= xhci-hcd.o
 obj-$(CONFIG_USB_XHCI_PCI)	+= xhci-pci.o
 obj-$(CONFIG_USB_XHCI_PLATFORM) += xhci-plat-hcd.o
+obj-$(CONFIG_USB_XHCI_HISTB)	+= xhci-histb.o
 obj-$(CONFIG_USB_XHCI_MTK)	+= xhci-mtk.o
 obj-$(CONFIG_USB_XHCI_TEGRA)	+= xhci-tegra.o
 obj-$(CONFIG_USB_SL811_HCD)	+= sl811-hcd.o
diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c
index 3ed75aa..7619cfb 100644
--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -1028,29 +1028,15 @@
 	struct usb_bus *bus = &ehci_to_hcd(ehci)->self;
 
 	ehci->debug_dir = debugfs_create_dir(bus->bus_name, ehci_debug_root);
-	if (!ehci->debug_dir)
-		return;
 
-	if (!debugfs_create_file("async", S_IRUGO, ehci->debug_dir, bus,
-						&debug_async_fops))
-		goto file_error;
-
-	if (!debugfs_create_file("bandwidth", S_IRUGO, ehci->debug_dir, bus,
-						&debug_bandwidth_fops))
-		goto file_error;
-
-	if (!debugfs_create_file("periodic", S_IRUGO, ehci->debug_dir, bus,
-						&debug_periodic_fops))
-		goto file_error;
-
-	if (!debugfs_create_file("registers", S_IRUGO, ehci->debug_dir, bus,
-						    &debug_registers_fops))
-		goto file_error;
-
-	return;
-
-file_error:
-	debugfs_remove_recursive(ehci->debug_dir);
+	debugfs_create_file("async", S_IRUGO, ehci->debug_dir, bus,
+			    &debug_async_fops);
+	debugfs_create_file("bandwidth", S_IRUGO, ehci->debug_dir, bus,
+			    &debug_bandwidth_fops);
+	debugfs_create_file("periodic", S_IRUGO, ehci->debug_dir, bus,
+			    &debug_periodic_fops);
+	debugfs_create_file("registers", S_IRUGO, ehci->debug_dir, bus,
+			    &debug_registers_fops);
 }
 
 static inline void remove_debug_files(struct ehci_hcd *ehci)
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index d927adf..89c47ae 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -1311,10 +1311,6 @@
 
 #ifdef CONFIG_DYNAMIC_DEBUG
 	ehci_debug_root = debugfs_create_dir("ehci", usb_debug_root);
-	if (!ehci_debug_root) {
-		retval = -ENOENT;
-		goto err_debug;
-	}
 #endif
 
 #ifdef PLATFORM_DRIVER
@@ -1361,7 +1357,6 @@
 #ifdef CONFIG_DYNAMIC_DEBUG
 	debugfs_remove(ehci_debug_root);
 	ehci_debug_root = NULL;
-err_debug:
 #endif
 	clear_bit(USB_EHCI_LOADED, &usb_hcds_loaded);
 	return retval;
diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c
index 8d8bafc..7e4c1334 100644
--- a/drivers/usb/host/ehci-omap.c
+++ b/drivers/usb/host/ehci-omap.c
@@ -157,10 +157,7 @@
 		struct usb_phy *phy;
 
 		/* get the PHY device */
-		if (dev->of_node)
-			phy = devm_usb_get_phy_by_phandle(dev, "phys", i);
-		else
-			phy = devm_usb_get_phy_dev(dev, i);
+		phy = devm_usb_get_phy_by_phandle(dev, "phys", i);
 		if (IS_ERR(phy)) {
 			/* Don't bail out if PHY is not absolutely necessary */
 			if (pdata->port_mode[i] != OMAP_EHCI_PORT_MODE_PHY)
diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index a6f4389..4d2cdec 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -36,7 +36,6 @@
 #define DRV_NAME "tegra-ehci"
 
 static struct hc_driver __read_mostly tegra_ehci_hc_driver;
-static bool usb1_reset_attempted;
 
 struct tegra_ehci_soc_config {
 	bool has_hostpc;
@@ -51,68 +50,55 @@
 	enum tegra_usb_phy_port_speed port_speed;
 };
 
-/*
- * The 1st USB controller contains some UTMI pad registers that are global for
- * all the controllers on the chip. Those registers are also cleared when
- * reset is asserted to the 1st controller. This means that the 1st controller
- * can only be reset when no other controlled has finished probing. So we'll
- * reset the 1st controller before doing any other setup on any of the
- * controllers, and then never again.
- *
- * Since this is a PHY issue, the Tegra PHY driver should probably be doing
- * the resetting of the USB controllers. But to keep compatibility with old
- * device trees that don't have reset phandles in the PHYs, do it here.
- * Those old DTs will be vulnerable to total USB breakage if the 1st EHCI
- * device isn't the first one to finish probing, so warn them.
- */
 static int tegra_reset_usb_controller(struct platform_device *pdev)
 {
 	struct device_node *phy_np;
 	struct usb_hcd *hcd = platform_get_drvdata(pdev);
 	struct tegra_ehci_hcd *tegra =
 		(struct tegra_ehci_hcd *)hcd_to_ehci(hcd)->priv;
-	bool has_utmi_pad_registers = false;
+	struct reset_control *rst;
+	int err;
 
 	phy_np = of_parse_phandle(pdev->dev.of_node, "nvidia,phy", 0);
 	if (!phy_np)
 		return -ENOENT;
 
-	if (of_property_read_bool(phy_np, "nvidia,has-utmi-pad-registers"))
-		has_utmi_pad_registers = true;
-
-	if (!usb1_reset_attempted) {
-		struct reset_control *usb1_reset;
-
-		if (!has_utmi_pad_registers)
-			usb1_reset = of_reset_control_get(phy_np, "utmi-pads");
-		else
-			usb1_reset = tegra->rst;
-
-		if (IS_ERR(usb1_reset)) {
-			dev_warn(&pdev->dev,
-				 "can't get utmi-pads reset from the PHY\n");
-			dev_warn(&pdev->dev,
-				 "continuing, but please update your DT\n");
-		} else {
-			reset_control_assert(usb1_reset);
-			udelay(1);
-			reset_control_deassert(usb1_reset);
-
-			if (!has_utmi_pad_registers)
-				reset_control_put(usb1_reset);
-		}
-
-		usb1_reset_attempted = true;
-	}
-
-	if (!has_utmi_pad_registers) {
-		reset_control_assert(tegra->rst);
-		udelay(1);
-		reset_control_deassert(tegra->rst);
+	/*
+	 * The 1st USB controller contains some UTMI pad registers that are
+	 * global for all the controllers on the chip. Those registers are
+	 * also cleared when reset is asserted to the 1st controller.
+	 */
+	rst = of_reset_control_get_shared(phy_np, "utmi-pads");
+	if (IS_ERR(rst)) {
+		dev_warn(&pdev->dev,
+			 "can't get utmi-pads reset from the PHY\n");
+		dev_warn(&pdev->dev,
+			 "continuing, but please update your DT\n");
+	} else {
+		/*
+		 * PHY driver performs UTMI-pads reset in a case of
+		 * non-legacy DT.
+		 */
+		reset_control_put(rst);
 	}
 
 	of_node_put(phy_np);
 
+	/* reset control is shared, hence initialize it first */
+	err = reset_control_deassert(tegra->rst);
+	if (err)
+		return err;
+
+	err = reset_control_assert(tegra->rst);
+	if (err)
+		return err;
+
+	udelay(1);
+
+	err = reset_control_deassert(tegra->rst);
+	if (err)
+		return err;
+
 	return 0;
 }
 
@@ -440,7 +426,7 @@
 		goto cleanup_hcd_create;
 	}
 
-	tegra->rst = devm_reset_control_get(&pdev->dev, "usb");
+	tegra->rst = devm_reset_control_get_shared(&pdev->dev, "usb");
 	if (IS_ERR(tegra->rst)) {
 		dev_err(&pdev->dev, "Can't get ehci reset\n");
 		err = PTR_ERR(tegra->rst);
@@ -452,8 +438,10 @@
 		goto cleanup_hcd_create;
 
 	err = tegra_reset_usb_controller(pdev);
-	if (err)
+	if (err) {
+		dev_err(&pdev->dev, "Failed to reset controller\n");
 		goto cleanup_clk_en;
+	}
 
 	u_phy = devm_usb_get_phy_by_phandle(&pdev->dev, "nvidia,phy", 0);
 	if (IS_ERR(u_phy)) {
@@ -538,6 +526,9 @@
 	usb_phy_shutdown(hcd->usb_phy);
 	usb_remove_hcd(hcd);
 
+	reset_control_assert(tegra->rst);
+	udelay(1);
+
 	clk_disable_unprepare(tegra->clk);
 
 	usb_put_hcd(hcd);
diff --git a/drivers/usb/host/fhci-dbg.c b/drivers/usb/host/fhci-dbg.c
index ebf9bb2..100048b 100644
--- a/drivers/usb/host/fhci-dbg.c
+++ b/drivers/usb/host/fhci-dbg.c
@@ -83,27 +83,14 @@
 	struct device *dev = fhci_to_hcd(fhci)->self.controller;
 
 	fhci->dfs_root = debugfs_create_dir(dev_name(dev), usb_debug_root);
-	if (!fhci->dfs_root) {
-		WARN_ON(1);
-		return;
-	}
 
-	fhci->dfs_regs = debugfs_create_file("regs", S_IFREG | S_IRUGO,
-		fhci->dfs_root, fhci, &fhci_dfs_regs_fops);
-
-	fhci->dfs_irq_stat = debugfs_create_file("irq_stat",
-		S_IFREG | S_IRUGO, fhci->dfs_root, fhci,
-		&fhci_dfs_irq_stat_fops);
-
-	WARN_ON(!fhci->dfs_regs || !fhci->dfs_irq_stat);
+	debugfs_create_file("regs", S_IFREG | S_IRUGO, fhci->dfs_root, fhci,
+			    &fhci_dfs_regs_fops);
+	debugfs_create_file("irq_stat", S_IFREG | S_IRUGO, fhci->dfs_root, fhci,
+			    &fhci_dfs_irq_stat_fops);
 }
 
 void fhci_dfs_destroy(struct fhci_hcd *fhci)
 {
-	if (!fhci->dfs_root)
-		return;
-
-	debugfs_remove(fhci->dfs_irq_stat);
-	debugfs_remove(fhci->dfs_regs);
-	debugfs_remove(fhci->dfs_root);
+	debugfs_remove_recursive(fhci->dfs_root);
 }
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index e7ec41d6..2ce5031 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -262,8 +262,6 @@
 #ifdef CONFIG_FHCI_DEBUG
 	int usb_irq_stat[13];
 	struct dentry *dfs_root;
-	struct dentry *dfs_regs;
-	struct dentry *dfs_irq_stat;
 #endif
 };
 
diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c
index d8abf40..e64eb47 100644
--- a/drivers/usb/host/fotg210-hcd.c
+++ b/drivers/usb/host/fotg210-hcd.c
@@ -844,28 +844,16 @@
 static inline void create_debug_files(struct fotg210_hcd *fotg210)
 {
 	struct usb_bus *bus = &fotg210_to_hcd(fotg210)->self;
+	struct dentry *root;
 
-	fotg210->debug_dir = debugfs_create_dir(bus->bus_name,
-			fotg210_debug_root);
-	if (!fotg210->debug_dir)
-		return;
+	root = debugfs_create_dir(bus->bus_name, fotg210_debug_root);
+	fotg210->debug_dir = root;
 
-	if (!debugfs_create_file("async", S_IRUGO, fotg210->debug_dir, bus,
-			&debug_async_fops))
-		goto file_error;
-
-	if (!debugfs_create_file("periodic", S_IRUGO, fotg210->debug_dir, bus,
-			&debug_periodic_fops))
-		goto file_error;
-
-	if (!debugfs_create_file("registers", S_IRUGO, fotg210->debug_dir, bus,
-			&debug_registers_fops))
-		goto file_error;
-
-	return;
-
-file_error:
-	debugfs_remove_recursive(fotg210->debug_dir);
+	debugfs_create_file("async", S_IRUGO, root, bus, &debug_async_fops);
+	debugfs_create_file("periodic", S_IRUGO, root, bus,
+			    &debug_periodic_fops);
+	debugfs_create_file("registers", S_IRUGO, root, bus,
+			    &debug_registers_fops);
 }
 
 static inline void remove_debug_files(struct fotg210_hcd *fotg210)
@@ -5686,10 +5674,6 @@
 			sizeof(struct fotg210_itd));
 
 	fotg210_debug_root = debugfs_create_dir("fotg210", usb_debug_root);
-	if (!fotg210_debug_root) {
-		retval = -ENOENT;
-		goto err_debug;
-	}
 
 	retval = platform_driver_register(&fotg210_hcd_driver);
 	if (retval < 0)
@@ -5699,7 +5683,7 @@
 clean:
 	debugfs_remove(fotg210_debug_root);
 	fotg210_debug_root = NULL;
-err_debug:
+
 	clear_bit(USB_EHCI_LOADED, &usb_hcds_loaded);
 	return retval;
 }
diff --git a/drivers/usb/host/imx21-dbg.c b/drivers/usb/host/imx21-dbg.c
index a213ed6..7fcf1d9 100644
--- a/drivers/usb/host/imx21-dbg.c
+++ b/drivers/usb/host/imx21-dbg.c
@@ -417,46 +417,22 @@
 
 static void create_debug_files(struct imx21 *imx21)
 {
-	imx21->debug_root = debugfs_create_dir(dev_name(imx21->dev), NULL);
-	if (!imx21->debug_root)
-		goto failed_create_rootdir;
+	struct dentry *root;
 
-	if (!debugfs_create_file("status", S_IRUGO,
-			imx21->debug_root, imx21, &debug_status_fops))
-		goto failed_create;
+	root = debugfs_create_dir(dev_name(imx21->dev), NULL);
+	imx21->debug_root = root;
 
-	if (!debugfs_create_file("dmem", S_IRUGO,
-			imx21->debug_root, imx21, &debug_dmem_fops))
-		goto failed_create;
-
-	if (!debugfs_create_file("etd", S_IRUGO,
-			imx21->debug_root, imx21, &debug_etd_fops))
-		goto failed_create;
-
-	if (!debugfs_create_file("statistics", S_IRUGO,
-			imx21->debug_root, imx21, &debug_statistics_fops))
-		goto failed_create;
-
-	if (!debugfs_create_file("isoc", S_IRUGO,
-			imx21->debug_root, imx21, &debug_isoc_fops))
-		goto failed_create;
-
-	return;
-
-failed_create:
-	debugfs_remove_recursive(imx21->debug_root);
-
-failed_create_rootdir:
-	imx21->debug_root = NULL;
+	debugfs_create_file("status", S_IRUGO, root, imx21, &debug_status_fops);
+	debugfs_create_file("dmem", S_IRUGO, root, imx21, &debug_dmem_fops);
+	debugfs_create_file("etd", S_IRUGO, root, imx21, &debug_etd_fops);
+	debugfs_create_file("statistics", S_IRUGO, root, imx21,
+			    &debug_statistics_fops);
+	debugfs_create_file("isoc", S_IRUGO, root, imx21, &debug_isoc_fops);
 }
 
-
 static void remove_debug_files(struct imx21 *imx21)
 {
-	if (imx21->debug_root) {
-		debugfs_remove_recursive(imx21->debug_root);
-		imx21->debug_root = NULL;
-	}
+	debugfs_remove_recursive(imx21->debug_root);
 }
 
 #endif
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
index 4602ed8..74da136 100644
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -1198,14 +1198,11 @@
 }
 DEFINE_SHOW_ATTRIBUTE(isp116x_debug);
 
-static int create_debug_file(struct isp116x *isp116x)
+static void create_debug_file(struct isp116x *isp116x)
 {
 	isp116x->dentry = debugfs_create_file(hcd_name,
 					      S_IRUGO, NULL, isp116x,
 					      &isp116x_debug_fops);
-	if (!isp116x->dentry)
-		return -ENOMEM;
-	return 0;
 }
 
 static void remove_debug_file(struct isp116x *isp116x)
@@ -1215,8 +1212,8 @@
 
 #else
 
-#define	create_debug_file(d)	0
-#define	remove_debug_file(d)	do{}while(0)
+static inline void create_debug_file(struct isp116x *isp116x) { }
+static inline void remove_debug_file(struct isp116x *isp116x) { }
 
 #endif				/* CONFIG_DEBUG_FS */
 
@@ -1643,16 +1640,10 @@
 
 	device_wakeup_enable(hcd->self.controller);
 
-	ret = create_debug_file(isp116x);
-	if (ret) {
-		ERR("Couldn't create debugfs entry\n");
-		goto err7;
-	}
+	create_debug_file(isp116x);
 
 	return 0;
 
-      err7:
-	usb_remove_hcd(hcd);
       err6:
 	usb_put_hcd(hcd);
       err5:
diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index 5ad9e9b..e986739 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -212,7 +212,7 @@
 
 	ohci_at91->sfr_regmap = at91_dt_syscon_sfr();
 	if (!ohci_at91->sfr_regmap)
-		dev_warn(dev, "failed to find sfr node\n");
+		dev_dbg(dev, "failed to find sfr node\n");
 
 	board = hcd->self.controller->platform_data;
 	ohci = hcd_to_ohci(hcd);
diff --git a/drivers/usb/host/ohci-dbg.c b/drivers/usb/host/ohci-dbg.c
index ac7d4ac..d3ee1f52a 100644
--- a/drivers/usb/host/ohci-dbg.c
+++ b/drivers/usb/host/ohci-dbg.c
@@ -762,50 +762,23 @@
 static inline void create_debug_files (struct ohci_hcd *ohci)
 {
 	struct usb_bus *bus = &ohci_to_hcd(ohci)->self;
+	struct dentry *root;
 
-	ohci->debug_dir = debugfs_create_dir(bus->bus_name, ohci_debug_root);
-	if (!ohci->debug_dir)
-		goto dir_error;
+	root = debugfs_create_dir(bus->bus_name, ohci_debug_root);
+	ohci->debug_dir = root;
 
-	ohci->debug_async = debugfs_create_file("async", S_IRUGO,
-						ohci->debug_dir, ohci,
-						&debug_async_fops);
-	if (!ohci->debug_async)
-		goto async_error;
-
-	ohci->debug_periodic = debugfs_create_file("periodic", S_IRUGO,
-						   ohci->debug_dir, ohci,
-						   &debug_periodic_fops);
-	if (!ohci->debug_periodic)
-		goto periodic_error;
-
-	ohci->debug_registers = debugfs_create_file("registers", S_IRUGO,
-						    ohci->debug_dir, ohci,
-						    &debug_registers_fops);
-	if (!ohci->debug_registers)
-		goto registers_error;
+	debugfs_create_file("async", S_IRUGO, root, ohci, &debug_async_fops);
+	debugfs_create_file("periodic", S_IRUGO, root, ohci,
+			    &debug_periodic_fops);
+	debugfs_create_file("registers", S_IRUGO, root, ohci,
+			    &debug_registers_fops);
 
 	ohci_dbg (ohci, "created debug files\n");
-	return;
-
-registers_error:
-	debugfs_remove(ohci->debug_periodic);
-periodic_error:
-	debugfs_remove(ohci->debug_async);
-async_error:
-	debugfs_remove(ohci->debug_dir);
-dir_error:
-	ohci->debug_periodic = NULL;
-	ohci->debug_async = NULL;
-	ohci->debug_dir = NULL;
 }
 
 static inline void remove_debug_files (struct ohci_hcd *ohci)
 {
-	debugfs_remove(ohci->debug_registers);
-	debugfs_remove(ohci->debug_periodic);
-	debugfs_remove(ohci->debug_async);
-	debugfs_remove(ohci->debug_dir);
+	debugfs_remove_recursive(ohci->debug_dir);
 }
 
 /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index 4806e0f..210181f 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -1258,10 +1258,6 @@
 	set_bit(USB_OHCI_LOADED, &usb_hcds_loaded);
 
 	ohci_debug_root = debugfs_create_dir("ohci", usb_debug_root);
-	if (!ohci_debug_root) {
-		retval = -ENOENT;
-		goto error_debug;
-	}
 
 #ifdef PS3_SYSTEM_BUS_DRIVER
 	retval = ps3_ohci_driver_register(&PS3_SYSTEM_BUS_DRIVER);
@@ -1318,7 +1314,6 @@
 #endif
 	debugfs_remove(ohci_debug_root);
 	ohci_debug_root = NULL;
- error_debug:
 
 	clear_bit(USB_OHCI_LOADED, &usb_hcds_loaded);
 	return retval;
diff --git a/drivers/usb/host/ohci.h b/drivers/usb/host/ohci.h
index 508a803..ef4813b 100644
--- a/drivers/usb/host/ohci.h
+++ b/drivers/usb/host/ohci.h
@@ -431,9 +431,6 @@
 	struct work_struct	nec_work;	/* Worker for NEC quirk */
 
 	struct dentry		*debug_dir;
-	struct dentry		*debug_async;
-	struct dentry		*debug_periodic;
-	struct dentry		*debug_registers;
 
 	/* platform-specific data -- must come last */
 	unsigned long           priv[0] __aligned(sizeof(s64));
diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index 67ad4bb..3625a5c 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -1268,23 +1268,3 @@
 }
 DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
 			PCI_CLASS_SERIAL_USB, 8, quirk_usb_early_handoff);
-
-bool usb_xhci_needs_pci_reset(struct pci_dev *pdev)
-{
-	/*
-	 * Our dear uPD72020{1,2} friend only partially resets when
-	 * asked to via the XHCI interface, and may end up doing DMA
-	 * at the wrong addresses, as it keeps the top 32bit of some
-	 * addresses from its previous programming under obscure
-	 * circumstances.
-	 * Give it a good wack at probe time. Unfortunately, this
-	 * needs to happen before we've had a chance to discover any
-	 * quirk, or the system will be in a rather bad state.
-	 */
-	if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
-	    (pdev->device == 0x0014 || pdev->device == 0x0015))
-		return true;
-
-	return false;
-}
-EXPORT_SYMBOL_GPL(usb_xhci_needs_pci_reset);
diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h
index 4ca0d9b..63c63307 100644
--- a/drivers/usb/host/pci-quirks.h
+++ b/drivers/usb/host/pci-quirks.h
@@ -16,7 +16,6 @@
 void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev);
 void usb_disable_xhci_ports(struct pci_dev *xhci_pdev);
 void sb800_prefetch(struct device *dev, int on);
-bool usb_xhci_needs_pci_reset(struct pci_dev *pdev);
 bool usb_amd_pt_check_port(struct device *device, int port);
 #else
 struct pci_dev;
diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
index f9c3947..6218bfe 100644
--- a/drivers/usb/host/uhci-hcd.c
+++ b/drivers/usb/host/uhci-hcd.c
@@ -590,14 +590,10 @@
 	init_waitqueue_head(&uhci->waitqh);
 
 #ifdef UHCI_DEBUG_OPS
-	dentry = debugfs_create_file(hcd->self.bus_name,
-			S_IFREG|S_IRUGO|S_IWUSR, uhci_debugfs_root,
-			uhci, &uhci_debug_operations);
-	if (!dentry) {
-		dev_err(uhci_dev(uhci), "couldn't create uhci debugfs entry\n");
-		return -ENOMEM;
-	}
-	uhci->dentry = dentry;
+	uhci->dentry = debugfs_create_file(hcd->self.bus_name,
+					   S_IFREG|S_IRUGO|S_IWUSR,
+					   uhci_debugfs_root, uhci,
+					   &uhci_debug_operations);
 #endif
 
 	uhci->frame = dma_zalloc_coherent(uhci_dev(uhci),
@@ -882,8 +878,6 @@
 	if (!errbuf)
 		goto errbuf_failed;
 	uhci_debugfs_root = debugfs_create_dir("uhci", usb_debug_root);
-	if (!uhci_debugfs_root)
-		goto debug_failed;
 #endif
 
 	uhci_up_cachep = kmem_cache_create("uhci_urb_priv",
@@ -918,7 +912,6 @@
 #if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
 	debugfs_remove(uhci_debugfs_root);
 
-debug_failed:
 	kfree(errbuf);
 
 errbuf_failed:
diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c
index c359bae..1fbfd89 100644
--- a/drivers/usb/host/xhci-dbgcap.c
+++ b/drivers/usb/host/xhci-dbgcap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /**
  * xhci-dbgcap.c - xHCI debug capability support
  *
diff --git a/drivers/usb/host/xhci-dbgcap.h b/drivers/usb/host/xhci-dbgcap.h
index e66ea07..ce0c607 100644
--- a/drivers/usb/host/xhci-dbgcap.h
+++ b/drivers/usb/host/xhci-dbgcap.h
@@ -1,4 +1,4 @@
-
+/* SPDX-License-Identifier: GPL-2.0 */
 /**
  * xhci-dbgcap.h - xHCI debug capability support
  *
diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c
index eb494ec5..aff79ff 100644
--- a/drivers/usb/host/xhci-dbgtty.c
+++ b/drivers/usb/host/xhci-dbgtty.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /**
  * xhci-dbgtty.c - tty glue for xHCI debug capability
  *
diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c
index 5851052..cadc013 100644
--- a/drivers/usb/host/xhci-debugfs.c
+++ b/drivers/usb/host/xhci-debugfs.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 
 #include "xhci.h"
 #include "xhci-debugfs.h"
@@ -333,6 +334,67 @@
 	.release		= single_release,
 };
 
+
+
+static int xhci_portsc_show(struct seq_file *s, void *unused)
+{
+	struct xhci_port	*port = s->private;
+	u32			portsc;
+
+	portsc = readl(port->addr);
+	seq_printf(s, "%s\n", xhci_decode_portsc(portsc));
+
+	return 0;
+}
+
+static int xhci_port_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xhci_portsc_show, inode->i_private);
+}
+
+static ssize_t xhci_port_write(struct file *file,  const char __user *ubuf,
+			       size_t count, loff_t *ppos)
+{
+	struct seq_file         *s = file->private_data;
+	struct xhci_port	*port = s->private;
+	struct xhci_hcd		*xhci = hcd_to_xhci(port->rhub->hcd);
+	char                    buf[32];
+	u32			portsc;
+	unsigned long		flags;
+
+	if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count)))
+		return -EFAULT;
+
+	if (!strncmp(buf, "compliance", 10)) {
+		/* If CTC is clear, compliance is enabled by default */
+		if (!HCC2_CTC(xhci->hcc_params2))
+			return count;
+		spin_lock_irqsave(&xhci->lock, flags);
+		/* compliance mode can only be enabled on ports in RxDetect */
+		portsc = readl(port->addr);
+		if ((portsc & PORT_PLS_MASK) != XDEV_RXDETECT) {
+			spin_unlock_irqrestore(&xhci->lock, flags);
+			return -EPERM;
+		}
+		portsc = xhci_port_state_to_neutral(portsc);
+		portsc &= ~PORT_PLS_MASK;
+		portsc |= PORT_LINK_STROBE | XDEV_COMP_MODE;
+		writel(portsc, port->addr);
+		spin_unlock_irqrestore(&xhci->lock, flags);
+	} else {
+		return -EINVAL;
+	}
+	return count;
+}
+
+static const struct file_operations port_fops = {
+	.open			= xhci_port_open,
+	.write                  = xhci_port_write,
+	.read			= seq_read,
+	.llseek			= seq_lseek,
+	.release		= single_release,
+};
+
 static void xhci_debugfs_create_files(struct xhci_hcd *xhci,
 				      struct xhci_file_map *files,
 				      size_t nentries, void *data,
@@ -449,6 +511,27 @@
 	dev->debugfs_private = NULL;
 }
 
+static void xhci_debugfs_create_ports(struct xhci_hcd *xhci,
+				      struct dentry *parent)
+{
+	unsigned int		num_ports;
+	char			port_name[8];
+	struct xhci_port	*port;
+	struct dentry		*dir;
+
+	num_ports = HCS_MAX_PORTS(xhci->hcs_params1);
+
+	parent = debugfs_create_dir("ports", parent);
+
+	while (num_ports--) {
+		scnprintf(port_name, sizeof(port_name), "port%02d",
+			  num_ports + 1);
+		dir = debugfs_create_dir(port_name, parent);
+		port = &xhci->hw_ports[num_ports];
+		debugfs_create_file("portsc", 0644, dir, port, &port_fops);
+	}
+}
+
 void xhci_debugfs_init(struct xhci_hcd *xhci)
 {
 	struct device		*dev = xhci_to_hcd(xhci)->self.controller;
@@ -497,6 +580,8 @@
 				     xhci->debugfs_root);
 
 	xhci->debugfs_slots = debugfs_create_dir("devices", xhci->debugfs_root);
+
+	xhci_debugfs_create_ports(xhci, xhci->debugfs_root);
 }
 
 void xhci_debugfs_exit(struct xhci_hcd *xhci)
diff --git a/drivers/usb/host/xhci-histb.c b/drivers/usb/host/xhci-histb.c
new file mode 100644
index 0000000..27f0016
--- /dev/null
+++ b/drivers/usb/host/xhci-histb.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * xHCI host controller driver for HiSilicon STB SoCs
+ *
+ * Copyright (C) 2017-2018 HiSilicon Co., Ltd. http://www.hisilicon.com
+ *
+ * Authors: Jianguo Sun <sunjianguo1@huawei.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+
+#include "xhci.h"
+
+#define GTXTHRCFG		0xc108
+#define GRXTHRCFG		0xc10c
+#define REG_GUSB2PHYCFG0	0xc200
+#define BIT_UTMI_8_16		BIT(3)
+#define BIT_UTMI_ULPI		BIT(4)
+#define BIT_FREECLK_EXIST	BIT(30)
+
+#define REG_GUSB3PIPECTL0	0xc2c0
+#define USB3_DEEMPHASIS_MASK	GENMASK(2, 1)
+#define USB3_DEEMPHASIS0	BIT(1)
+#define USB3_TX_MARGIN1		BIT(4)
+
+struct xhci_hcd_histb {
+	struct device		*dev;
+	struct usb_hcd		*hcd;
+	void __iomem		*ctrl;
+	struct clk		*bus_clk;
+	struct clk		*utmi_clk;
+	struct clk		*pipe_clk;
+	struct clk		*suspend_clk;
+	struct reset_control	*soft_reset;
+};
+
+static inline struct xhci_hcd_histb *hcd_to_histb(struct usb_hcd *hcd)
+{
+	return dev_get_drvdata(hcd->self.controller);
+}
+
+static int xhci_histb_config(struct xhci_hcd_histb *histb)
+{
+	struct device_node *np = histb->dev->of_node;
+	u32 regval;
+
+	if (of_property_match_string(np, "phys-names", "inno") >= 0) {
+		/* USB2 PHY chose ulpi 8bit interface */
+		regval = readl(histb->ctrl + REG_GUSB2PHYCFG0);
+		regval &= ~BIT_UTMI_ULPI;
+		regval &= ~(BIT_UTMI_8_16);
+		regval &= ~BIT_FREECLK_EXIST;
+		writel(regval, histb->ctrl + REG_GUSB2PHYCFG0);
+	}
+
+	if (of_property_match_string(np, "phys-names", "combo") >= 0) {
+		/*
+		 * write 0x010c0012 to GUSB3PIPECTL0
+		 * GUSB3PIPECTL0[5:3] = 010 : Tx Margin = 900mV ,
+		 * decrease TX voltage
+		 * GUSB3PIPECTL0[2:1] = 01 : Tx Deemphasis = -3.5dB,
+		 * refer to xHCI spec
+		 */
+		regval = readl(histb->ctrl + REG_GUSB3PIPECTL0);
+		regval &= ~USB3_DEEMPHASIS_MASK;
+		regval |= USB3_DEEMPHASIS0;
+		regval |= USB3_TX_MARGIN1;
+		writel(regval, histb->ctrl + REG_GUSB3PIPECTL0);
+	}
+
+	writel(0x23100000, histb->ctrl + GTXTHRCFG);
+	writel(0x23100000, histb->ctrl + GRXTHRCFG);
+
+	return 0;
+}
+
+static int xhci_histb_clks_get(struct xhci_hcd_histb *histb)
+{
+	struct device *dev = histb->dev;
+
+	histb->bus_clk = devm_clk_get(dev, "bus");
+	if (IS_ERR(histb->bus_clk)) {
+		dev_err(dev, "fail to get bus clk\n");
+		return PTR_ERR(histb->bus_clk);
+	}
+
+	histb->utmi_clk = devm_clk_get(dev, "utmi");
+	if (IS_ERR(histb->utmi_clk)) {
+		dev_err(dev, "fail to get utmi clk\n");
+		return PTR_ERR(histb->utmi_clk);
+	}
+
+	histb->pipe_clk = devm_clk_get(dev, "pipe");
+	if (IS_ERR(histb->pipe_clk)) {
+		dev_err(dev, "fail to get pipe clk\n");
+		return PTR_ERR(histb->pipe_clk);
+	}
+
+	histb->suspend_clk = devm_clk_get(dev, "suspend");
+	if (IS_ERR(histb->suspend_clk)) {
+		dev_err(dev, "fail to get suspend clk\n");
+		return PTR_ERR(histb->suspend_clk);
+	}
+
+	return 0;
+}
+
+static int xhci_histb_host_enable(struct xhci_hcd_histb *histb)
+{
+	int ret;
+
+	ret = clk_prepare_enable(histb->bus_clk);
+	if (ret) {
+		dev_err(histb->dev, "failed to enable bus clk\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(histb->utmi_clk);
+	if (ret) {
+		dev_err(histb->dev, "failed to enable utmi clk\n");
+		goto err_utmi_clk;
+	}
+
+	ret = clk_prepare_enable(histb->pipe_clk);
+	if (ret) {
+		dev_err(histb->dev, "failed to enable pipe clk\n");
+		goto err_pipe_clk;
+	}
+
+	ret = clk_prepare_enable(histb->suspend_clk);
+	if (ret) {
+		dev_err(histb->dev, "failed to enable suspend clk\n");
+		goto err_suspend_clk;
+	}
+
+	reset_control_deassert(histb->soft_reset);
+
+	return 0;
+
+err_suspend_clk:
+	clk_disable_unprepare(histb->pipe_clk);
+err_pipe_clk:
+	clk_disable_unprepare(histb->utmi_clk);
+err_utmi_clk:
+	clk_disable_unprepare(histb->bus_clk);
+
+	return ret;
+}
+
+static void xhci_histb_host_disable(struct xhci_hcd_histb *histb)
+{
+	reset_control_assert(histb->soft_reset);
+
+	clk_disable_unprepare(histb->suspend_clk);
+	clk_disable_unprepare(histb->pipe_clk);
+	clk_disable_unprepare(histb->utmi_clk);
+	clk_disable_unprepare(histb->bus_clk);
+}
+
+static void xhci_histb_quirks(struct device *dev, struct xhci_hcd *xhci)
+{
+	/*
+	 * As of now platform drivers don't provide MSI support so we ensure
+	 * here that the generic code does not try to make a pci_dev from our
+	 * dev struct in order to setup MSI
+	 */
+	xhci->quirks |= XHCI_PLAT;
+}
+
+/* called during probe() after chip reset completes */
+static int xhci_histb_setup(struct usb_hcd *hcd)
+{
+	struct xhci_hcd_histb *histb = hcd_to_histb(hcd);
+	int ret;
+
+	if (usb_hcd_is_primary_hcd(hcd)) {
+		ret = xhci_histb_config(histb);
+		if (ret)
+			return ret;
+	}
+
+	return xhci_gen_setup(hcd, xhci_histb_quirks);
+}
+
+static const struct xhci_driver_overrides xhci_histb_overrides __initconst = {
+	.reset = xhci_histb_setup,
+};
+
+static struct hc_driver __read_mostly xhci_histb_hc_driver;
+static int xhci_histb_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct xhci_hcd_histb *histb;
+	const struct hc_driver *driver;
+	struct usb_hcd *hcd;
+	struct xhci_hcd *xhci;
+	struct resource *res;
+	int irq;
+	int ret = -ENODEV;
+
+	if (usb_disabled())
+		return -ENODEV;
+
+	driver = &xhci_histb_hc_driver;
+	histb = devm_kzalloc(dev, sizeof(*histb), GFP_KERNEL);
+	if (!histb)
+		return -ENOMEM;
+
+	histb->dev = dev;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	histb->ctrl = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(histb->ctrl))
+		return PTR_ERR(histb->ctrl);
+
+	ret = xhci_histb_clks_get(histb);
+	if (ret)
+		return ret;
+
+	histb->soft_reset = devm_reset_control_get(dev, "soft");
+	if (IS_ERR(histb->soft_reset)) {
+		dev_err(dev, "failed to get soft reset\n");
+		return PTR_ERR(histb->soft_reset);
+	}
+
+	pm_runtime_enable(dev);
+	pm_runtime_get_sync(dev);
+	device_enable_async_suspend(dev);
+
+	/* Initialize dma_mask and coherent_dma_mask to 32-bits */
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	hcd = usb_create_hcd(driver, dev, dev_name(dev));
+	if (!hcd) {
+		ret = -ENOMEM;
+		goto disable_pm;
+	}
+
+	hcd->regs = histb->ctrl;
+	hcd->rsrc_start = res->start;
+	hcd->rsrc_len = resource_size(res);
+
+	histb->hcd = hcd;
+	dev_set_drvdata(hcd->self.controller, histb);
+
+	ret = xhci_histb_host_enable(histb);
+	if (ret)
+		goto put_hcd;
+
+	xhci = hcd_to_xhci(hcd);
+
+	device_wakeup_enable(hcd->self.controller);
+
+	xhci->main_hcd = hcd;
+	xhci->shared_hcd = usb_create_shared_hcd(driver, dev, dev_name(dev),
+						 hcd);
+	if (!xhci->shared_hcd) {
+		ret = -ENOMEM;
+		goto disable_host;
+	}
+
+	if (device_property_read_bool(dev, "usb2-lpm-disable"))
+		xhci->quirks |= XHCI_HW_LPM_DISABLE;
+
+	if (device_property_read_bool(dev, "usb3-lpm-capable"))
+		xhci->quirks |= XHCI_LPM_SUPPORT;
+
+	/* imod_interval is the interrupt moderation value in nanoseconds. */
+	xhci->imod_interval = 40000;
+	device_property_read_u32(dev, "imod-interval-ns",
+				 &xhci->imod_interval);
+
+	ret = usb_add_hcd(hcd, irq, IRQF_SHARED);
+	if (ret)
+		goto put_usb3_hcd;
+
+	if (HCC_MAX_PSA(xhci->hcc_params) >= 4)
+		xhci->shared_hcd->can_do_streams = 1;
+
+	ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED);
+	if (ret)
+		goto dealloc_usb2_hcd;
+
+	device_enable_async_suspend(dev);
+	pm_runtime_put_noidle(dev);
+
+	/*
+	 * Prevent runtime pm from being on as default, users should enable
+	 * runtime pm using power/control in sysfs.
+	 */
+	pm_runtime_forbid(dev);
+
+	return 0;
+
+dealloc_usb2_hcd:
+	usb_remove_hcd(hcd);
+put_usb3_hcd:
+	usb_put_hcd(xhci->shared_hcd);
+disable_host:
+	xhci_histb_host_disable(histb);
+put_hcd:
+	usb_put_hcd(hcd);
+disable_pm:
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
+
+	return ret;
+}
+
+static int xhci_histb_remove(struct platform_device *dev)
+{
+	struct xhci_hcd_histb *histb = platform_get_drvdata(dev);
+	struct usb_hcd *hcd = histb->hcd;
+	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
+
+	xhci->xhc_state |= XHCI_STATE_REMOVING;
+
+	usb_remove_hcd(xhci->shared_hcd);
+	device_wakeup_disable(&dev->dev);
+
+	usb_remove_hcd(hcd);
+	usb_put_hcd(xhci->shared_hcd);
+
+	xhci_histb_host_disable(histb);
+	usb_put_hcd(hcd);
+	pm_runtime_put_sync(&dev->dev);
+	pm_runtime_disable(&dev->dev);
+
+	return 0;
+}
+
+static int __maybe_unused xhci_histb_suspend(struct device *dev)
+{
+	struct xhci_hcd_histb *histb = dev_get_drvdata(dev);
+	struct usb_hcd *hcd = histb->hcd;
+	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
+	int ret;
+
+	ret = xhci_suspend(xhci, device_may_wakeup(dev));
+
+	if (!device_may_wakeup(dev))
+		xhci_histb_host_disable(histb);
+
+	return ret;
+}
+
+static int __maybe_unused xhci_histb_resume(struct device *dev)
+{
+	struct xhci_hcd_histb *histb = dev_get_drvdata(dev);
+	struct usb_hcd *hcd = histb->hcd;
+	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+
+	if (!device_may_wakeup(dev))
+		xhci_histb_host_enable(histb);
+
+	return xhci_resume(xhci, 0);
+}
+
+static const struct dev_pm_ops xhci_histb_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(xhci_histb_suspend, xhci_histb_resume)
+};
+#define DEV_PM_OPS (IS_ENABLED(CONFIG_PM) ? &xhci_histb_pm_ops : NULL)
+
+#ifdef CONFIG_OF
+static const struct of_device_id histb_xhci_of_match[] = {
+	{ .compatible = "hisilicon,hi3798cv200-xhci"},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, histb_xhci_of_match);
+#endif
+
+static struct platform_driver histb_xhci_driver = {
+	.probe	= xhci_histb_probe,
+	.remove	= xhci_histb_remove,
+	.driver	= {
+		.name = "xhci-histb",
+		.pm = DEV_PM_OPS,
+		.of_match_table = of_match_ptr(histb_xhci_of_match),
+	},
+};
+MODULE_ALIAS("platform:xhci-histb");
+
+static int __init xhci_histb_init(void)
+{
+	xhci_init_driver(&xhci_histb_hc_driver, &xhci_histb_overrides);
+	return platform_driver_register(&histb_xhci_driver);
+}
+module_init(xhci_histb_init);
+
+static void __exit xhci_histb_exit(void)
+{
+	platform_driver_unregister(&histb_xhci_driver);
+}
+module_exit(xhci_histb_exit);
+
+MODULE_DESCRIPTION("HiSilicon STB xHCI Host Controller Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 32cd52c..a4b95d0 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -189,9 +189,10 @@
 	__u8 port_removable[(USB_MAXCHILDREN + 1 + 7) / 8];
 	u32 portsc;
 	unsigned int i;
+	struct xhci_hub *rhub;
 
-	ports = xhci->num_usb2_ports;
-
+	rhub = &xhci->usb2_rhub;
+	ports = rhub->num_ports;
 	xhci_common_hub_descriptor(xhci, desc, ports);
 	desc->bDescriptorType = USB_DT_HUB;
 	temp = 1 + (ports / 8);
@@ -202,7 +203,7 @@
 	 */
 	memset(port_removable, 0, sizeof(port_removable));
 	for (i = 0; i < ports; i++) {
-		portsc = readl(xhci->usb2_ports[i]);
+		portsc = readl(rhub->ports[i]->addr);
 		/* If a device is removable, PORTSC reports a 0, same as in the
 		 * hub descriptor DeviceRemovable bits.
 		 */
@@ -241,8 +242,10 @@
 	u16 port_removable;
 	u32 portsc;
 	unsigned int i;
+	struct xhci_hub *rhub;
 
-	ports = xhci->num_usb3_ports;
+	rhub = &xhci->usb3_rhub;
+	ports = rhub->num_ports;
 	xhci_common_hub_descriptor(xhci, desc, ports);
 	desc->bDescriptorType = USB_DT_SS_HUB;
 	desc->bDescLength = USB_DT_SS_HUB_SIZE;
@@ -256,7 +259,7 @@
 	port_removable = 0;
 	/* bit 0 is reserved, bit 1 is for port 1, etc. */
 	for (i = 0; i < ports; i++) {
-		portsc = readl(xhci->usb3_ports[i]);
+		portsc = readl(rhub->ports[i]->addr);
 		if (portsc & PORT_DEV_REMOVE)
 			port_removable |= 1 << (i + 1);
 	}
@@ -538,28 +541,13 @@
 			port_change_bit, wIndex, port_status);
 }
 
-static int xhci_get_ports(struct usb_hcd *hcd, __le32 __iomem ***port_array)
+struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd)
 {
-	int max_ports;
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
 
-	if (hcd->speed >= HCD_USB3) {
-		max_ports = xhci->num_usb3_ports;
-		*port_array = xhci->usb3_ports;
-	} else {
-		max_ports = xhci->num_usb2_ports;
-		*port_array = xhci->usb2_ports;
-	}
-
-	return max_ports;
-}
-
-static __le32 __iomem *xhci_get_port_io_addr(struct usb_hcd *hcd, int index)
-{
-	__le32 __iomem **port_array;
-
-	xhci_get_ports(hcd, &port_array);
-	return port_array[index];
+	if (hcd->speed >= HCD_USB3)
+		return &xhci->usb3_rhub;
+	return &xhci->usb2_rhub;
 }
 
 /*
@@ -570,21 +558,23 @@
 static void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd,
 				u16 index, bool on, unsigned long *flags)
 {
-	__le32 __iomem *addr;
+	struct xhci_hub *rhub;
+	struct xhci_port *port;
 	u32 temp;
 
-	addr = xhci_get_port_io_addr(hcd, index);
-	temp = readl(addr);
+	rhub = xhci_get_rhub(hcd);
+	port = rhub->ports[index];
+	temp = readl(port->addr);
 	temp = xhci_port_state_to_neutral(temp);
 	if (on) {
 		/* Power on */
-		writel(temp | PORT_POWER, addr);
-		temp = readl(addr);
+		writel(temp | PORT_POWER, port->addr);
+		temp = readl(port->addr);
 		xhci_dbg(xhci, "set port power, actual port %d status  = 0x%x\n",
 						index, temp);
 	} else {
 		/* Power off */
-		writel(temp & ~PORT_POWER, addr);
+		writel(temp & ~PORT_POWER, port->addr);
 	}
 
 	spin_unlock_irqrestore(&xhci->lock, *flags);
@@ -600,13 +590,13 @@
 	u16 test_mode, u16 wIndex)
 {
 	u32 temp;
-	__le32 __iomem *addr;
+	struct xhci_port *port;
 
-	/* xhci only supports test mode for usb2 ports, i.e. xhci->main_hcd */
-	addr = xhci_get_port_io_addr(xhci->main_hcd, wIndex);
-	temp = readl(addr + PORTPMSC);
+	/* xhci only supports test mode for usb2 ports */
+	port = xhci->usb2_rhub.ports[wIndex];
+	temp = readl(port->addr + PORTPMSC);
 	temp |= test_mode << PORT_TEST_MODE_SHIFT;
-	writel(temp, addr + PORTPMSC);
+	writel(temp, port->addr + PORTPMSC);
 	xhci->test_mode = test_mode;
 	if (test_mode == TEST_FORCE_EN)
 		xhci_start(xhci);
@@ -633,10 +623,10 @@
 	/* Put all ports to the Disable state by clear PP */
 	xhci_dbg(xhci, "Disable all port (PP = 0)\n");
 	/* Power off USB3 ports*/
-	for (i = 0; i < xhci->num_usb3_ports; i++)
+	for (i = 0; i < xhci->usb3_rhub.num_ports; i++)
 		xhci_set_port_power(xhci, xhci->shared_hcd, i, false, flags);
 	/* Power off USB2 ports*/
-	for (i = 0; i < xhci->num_usb2_ports; i++)
+	for (i = 0; i < xhci->usb2_rhub.num_ports; i++)
 		xhci_set_port_power(xhci, xhci->main_hcd, i, false, flags);
 	/* Stop the controller */
 	xhci_dbg(xhci, "Stop controller\n");
@@ -672,24 +662,24 @@
 	return xhci_reset(xhci);
 }
 
-void xhci_set_link_state(struct xhci_hcd *xhci, __le32 __iomem **port_array,
-				int port_id, u32 link_state)
+void xhci_set_link_state(struct xhci_hcd *xhci, struct xhci_port *port,
+			 u32 link_state)
 {
 	u32 temp;
 
-	temp = readl(port_array[port_id]);
+	temp = readl(port->addr);
 	temp = xhci_port_state_to_neutral(temp);
 	temp &= ~PORT_PLS_MASK;
 	temp |= PORT_LINK_STROBE | link_state;
-	writel(temp, port_array[port_id]);
+	writel(temp, port->addr);
 }
 
 static void xhci_set_remote_wake_mask(struct xhci_hcd *xhci,
-		__le32 __iomem **port_array, int port_id, u16 wake_mask)
+				      struct xhci_port *port, u16 wake_mask)
 {
 	u32 temp;
 
-	temp = readl(port_array[port_id]);
+	temp = readl(port->addr);
 	temp = xhci_port_state_to_neutral(temp);
 
 	if (wake_mask & USB_PORT_FEAT_REMOTE_WAKE_CONNECT)
@@ -707,20 +697,20 @@
 	else
 		temp &= ~PORT_WKOC_E;
 
-	writel(temp, port_array[port_id]);
+	writel(temp, port->addr);
 }
 
 /* Test and clear port RWC bit */
-void xhci_test_and_clear_bit(struct xhci_hcd *xhci, __le32 __iomem **port_array,
-				int port_id, u32 port_bit)
+void xhci_test_and_clear_bit(struct xhci_hcd *xhci, struct xhci_port *port,
+			     u32 port_bit)
 {
 	u32 temp;
 
-	temp = readl(port_array[port_id]);
+	temp = readl(port->addr);
 	if (temp & port_bit) {
 		temp = xhci_port_state_to_neutral(temp);
 		temp |= port_bit;
-		writel(temp, port_array[port_id]);
+		writel(temp, port->addr);
 	}
 }
 
@@ -794,7 +784,7 @@
 static void xhci_del_comp_mod_timer(struct xhci_hcd *xhci, u32 status,
 				    u16 wIndex)
 {
-	u32 all_ports_seen_u0 = ((1 << xhci->num_usb3_ports)-1);
+	u32 all_ports_seen_u0 = ((1 << xhci->usb3_rhub.num_ports) - 1);
 	bool port_in_u0 = ((status & PORT_PLS_MASK) == XDEV_U0);
 
 	if (!(xhci->quirks & XHCI_COMP_MODE_QUIRK))
@@ -840,8 +830,7 @@
  */
 static u32 xhci_get_port_status(struct usb_hcd *hcd,
 		struct xhci_bus_state *bus_state,
-		__le32 __iomem **port_array,
-		u16 wIndex, u32 raw_port_status,
+	u16 wIndex, u32 raw_port_status,
 		unsigned long flags)
 	__releases(&xhci->lock)
 	__acquires(&xhci->lock)
@@ -849,6 +838,11 @@
 	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
 	u32 status = 0;
 	int slot_id;
+	struct xhci_hub *rhub;
+	struct xhci_port *port;
+
+	rhub = xhci_get_rhub(hcd);
+	port = rhub->ports[wIndex];
 
 	/* wPortChange bits */
 	if (raw_port_status & PORT_CSC)
@@ -919,10 +913,8 @@
 
 			set_bit(wIndex, &bus_state->rexit_ports);
 
-			xhci_test_and_clear_bit(xhci, port_array, wIndex,
-						PORT_PLC);
-			xhci_set_link_state(xhci, port_array, wIndex,
-					XDEV_U0);
+			xhci_test_and_clear_bit(xhci, port, PORT_PLC);
+			xhci_set_link_state(xhci, port, XDEV_U0);
 
 			spin_unlock_irqrestore(&xhci->lock, flags);
 			time_left = wait_for_completion_timeout(
@@ -940,7 +932,7 @@
 				}
 				xhci_ring_device(xhci, slot_id);
 			} else {
-				int port_status = readl(port_array[wIndex]);
+				int port_status = readl(port->addr);
 				xhci_warn(xhci, "Port resume took longer than %i msec, port status = 0x%x\n",
 						XHCI_MAX_REXIT_TIMEOUT,
 						port_status);
@@ -1024,15 +1016,18 @@
 	unsigned long flags;
 	u32 temp, status;
 	int retval = 0;
-	__le32 __iomem **port_array;
 	int slot_id;
 	struct xhci_bus_state *bus_state;
 	u16 link_state = 0;
 	u16 wake_mask = 0;
 	u16 timeout = 0;
 	u16 test_mode = 0;
+	struct xhci_hub *rhub;
+	struct xhci_port **ports;
 
-	max_ports = xhci_get_ports(hcd, &port_array);
+	rhub = xhci_get_rhub(hcd);
+	ports = rhub->ports;
+	max_ports = rhub->num_ports;
 	bus_state = &xhci->bus_state[hcd_index(hcd)];
 
 	spin_lock_irqsave(&xhci->lock, flags);
@@ -1070,15 +1065,15 @@
 		if (!wIndex || wIndex > max_ports)
 			goto error;
 		wIndex--;
-		temp = readl(port_array[wIndex]);
+		temp = readl(ports[wIndex]->addr);
 		if (temp == ~(u32)0) {
 			xhci_hc_died(xhci);
 			retval = -ENODEV;
 			break;
 		}
 		trace_xhci_get_port_status(wIndex, temp);
-		status = xhci_get_port_status(hcd, bus_state, port_array,
-				wIndex, temp, flags);
+		status = xhci_get_port_status(hcd, bus_state, wIndex, temp,
+					      flags);
 		if (status == 0xffffffff)
 			goto error;
 
@@ -1096,7 +1091,7 @@
 				retval = -EINVAL;
 				break;
 			}
-			port_li = readl(port_array[wIndex] + PORTLI);
+			port_li = readl(ports[wIndex]->addr + PORTLI);
 			status = xhci_get_ext_port_status(temp, port_li);
 			put_unaligned_le32(cpu_to_le32(status), &buf[4]);
 		}
@@ -1114,7 +1109,7 @@
 		if (!wIndex || wIndex > max_ports)
 			goto error;
 		wIndex--;
-		temp = readl(port_array[wIndex]);
+		temp = readl(ports[wIndex]->addr);
 		if (temp == ~(u32)0) {
 			xhci_hc_died(xhci);
 			retval = -ENODEV;
@@ -1124,10 +1119,10 @@
 		/* FIXME: What new port features do we need to support? */
 		switch (wValue) {
 		case USB_PORT_FEAT_SUSPEND:
-			temp = readl(port_array[wIndex]);
+			temp = readl(ports[wIndex]->addr);
 			if ((temp & PORT_PLS_MASK) != XDEV_U0) {
 				/* Resume the port to U0 first */
-				xhci_set_link_state(xhci, port_array, wIndex,
+				xhci_set_link_state(xhci, ports[wIndex],
 							XDEV_U0);
 				spin_unlock_irqrestore(&xhci->lock, flags);
 				msleep(10);
@@ -1137,7 +1132,7 @@
 			 * a port unless the port reports that it is in the
 			 * enabled (PED = ‘1’,PLS < ‘3’) state.
 			 */
-			temp = readl(port_array[wIndex]);
+			temp = readl(ports[wIndex]->addr);
 			if ((temp & PORT_PE) == 0 || (temp & PORT_RESET)
 				|| (temp & PORT_PLS_MASK) >= XDEV_U3) {
 				xhci_warn(xhci, "USB core suspending device not in U0/U1/U2.\n");
@@ -1155,18 +1150,17 @@
 			xhci_stop_device(xhci, slot_id, 1);
 			spin_lock_irqsave(&xhci->lock, flags);
 
-			xhci_set_link_state(xhci, port_array, wIndex, XDEV_U3);
+			xhci_set_link_state(xhci, ports[wIndex], XDEV_U3);
 
 			spin_unlock_irqrestore(&xhci->lock, flags);
 			msleep(10); /* wait device to enter */
 			spin_lock_irqsave(&xhci->lock, flags);
 
-			temp = readl(port_array[wIndex]);
+			temp = readl(ports[wIndex]->addr);
 			bus_state->suspended_ports |= 1 << wIndex;
 			break;
 		case USB_PORT_FEAT_LINK_STATE:
-			temp = readl(port_array[wIndex]);
-
+			temp = readl(ports[wIndex]->addr);
 			/* Disable port */
 			if (link_state == USB_SS_PORT_LS_SS_DISABLED) {
 				xhci_dbg(xhci, "Disable port %d\n", wIndex);
@@ -1178,17 +1172,17 @@
 				temp |= PORT_CSC | PORT_PEC | PORT_WRC |
 					PORT_OCC | PORT_RC | PORT_PLC |
 					PORT_CEC;
-				writel(temp | PORT_PE, port_array[wIndex]);
-				temp = readl(port_array[wIndex]);
+				writel(temp | PORT_PE, ports[wIndex]->addr);
+				temp = readl(ports[wIndex]->addr);
 				break;
 			}
 
 			/* Put link in RxDetect (enable port) */
 			if (link_state == USB_SS_PORT_LS_RX_DETECT) {
 				xhci_dbg(xhci, "Enable port %d\n", wIndex);
-				xhci_set_link_state(xhci, port_array, wIndex,
-						link_state);
-				temp = readl(port_array[wIndex]);
+				xhci_set_link_state(xhci, ports[wIndex],
+							link_state);
+				temp = readl(ports[wIndex]->addr);
 				break;
 			}
 
@@ -1219,9 +1213,10 @@
 
 				xhci_dbg(xhci, "Enable compliance mode transition for port %d\n",
 						wIndex);
-				xhci_set_link_state(xhci, port_array, wIndex,
+				xhci_set_link_state(xhci, ports[wIndex],
 						link_state);
-				temp = readl(port_array[wIndex]);
+
+				temp = readl(ports[wIndex]->addr);
 				break;
 			}
 			/* Port must be enabled */
@@ -1248,14 +1243,13 @@
 				}
 			}
 
-			xhci_set_link_state(xhci, port_array, wIndex,
-						link_state);
+			xhci_set_link_state(xhci, ports[wIndex], link_state);
 
 			spin_unlock_irqrestore(&xhci->lock, flags);
 			msleep(20); /* wait device to enter */
 			spin_lock_irqsave(&xhci->lock, flags);
 
-			temp = readl(port_array[wIndex]);
+			temp = readl(ports[wIndex]->addr);
 			if (link_state == USB_SS_PORT_LS_U3)
 				bus_state->suspended_ports |= 1 << wIndex;
 			break;
@@ -1270,40 +1264,39 @@
 			break;
 		case USB_PORT_FEAT_RESET:
 			temp = (temp | PORT_RESET);
-			writel(temp, port_array[wIndex]);
+			writel(temp, ports[wIndex]->addr);
 
-			temp = readl(port_array[wIndex]);
+			temp = readl(ports[wIndex]->addr);
 			xhci_dbg(xhci, "set port reset, actual port %d status  = 0x%x\n", wIndex, temp);
 			break;
 		case USB_PORT_FEAT_REMOTE_WAKE_MASK:
-			xhci_set_remote_wake_mask(xhci, port_array,
-					wIndex, wake_mask);
-			temp = readl(port_array[wIndex]);
+			xhci_set_remote_wake_mask(xhci, ports[wIndex],
+						  wake_mask);
+			temp = readl(ports[wIndex]->addr);
 			xhci_dbg(xhci, "set port remote wake mask, "
 					"actual port %d status  = 0x%x\n",
 					wIndex, temp);
 			break;
 		case USB_PORT_FEAT_BH_PORT_RESET:
 			temp |= PORT_WR;
-			writel(temp, port_array[wIndex]);
-
-			temp = readl(port_array[wIndex]);
+			writel(temp, ports[wIndex]->addr);
+			temp = readl(ports[wIndex]->addr);
 			break;
 		case USB_PORT_FEAT_U1_TIMEOUT:
 			if (hcd->speed < HCD_USB3)
 				goto error;
-			temp = readl(port_array[wIndex] + PORTPMSC);
+			temp = readl(ports[wIndex]->addr + PORTPMSC);
 			temp &= ~PORT_U1_TIMEOUT_MASK;
 			temp |= PORT_U1_TIMEOUT(timeout);
-			writel(temp, port_array[wIndex] + PORTPMSC);
+			writel(temp, ports[wIndex]->addr + PORTPMSC);
 			break;
 		case USB_PORT_FEAT_U2_TIMEOUT:
 			if (hcd->speed < HCD_USB3)
 				goto error;
-			temp = readl(port_array[wIndex] + PORTPMSC);
+			temp = readl(ports[wIndex]->addr + PORTPMSC);
 			temp &= ~PORT_U2_TIMEOUT_MASK;
 			temp |= PORT_U2_TIMEOUT(timeout);
-			writel(temp, port_array[wIndex] + PORTPMSC);
+			writel(temp, ports[wIndex]->addr + PORTPMSC);
 			break;
 		case USB_PORT_FEAT_TEST:
 			/* 4.19.6 Port Test Modes (USB2 Test Mode) */
@@ -1318,13 +1311,13 @@
 			goto error;
 		}
 		/* unblock any posted writes */
-		temp = readl(port_array[wIndex]);
+		temp = readl(ports[wIndex]->addr);
 		break;
 	case ClearPortFeature:
 		if (!wIndex || wIndex > max_ports)
 			goto error;
 		wIndex--;
-		temp = readl(port_array[wIndex]);
+		temp = readl(ports[wIndex]->addr);
 		if (temp == ~(u32)0) {
 			xhci_hc_died(xhci);
 			retval = -ENODEV;
@@ -1334,7 +1327,7 @@
 		temp = xhci_port_state_to_neutral(temp);
 		switch (wValue) {
 		case USB_PORT_FEAT_SUSPEND:
-			temp = readl(port_array[wIndex]);
+			temp = readl(ports[wIndex]->addr);
 			xhci_dbg(xhci, "clear USB_PORT_FEAT_SUSPEND\n");
 			xhci_dbg(xhci, "PORTSC %04x\n", temp);
 			if (temp & PORT_RESET)
@@ -1344,12 +1337,12 @@
 					goto error;
 
 				set_bit(wIndex, &bus_state->resuming_ports);
-				xhci_set_link_state(xhci, port_array, wIndex,
-							XDEV_RESUME);
+				xhci_set_link_state(xhci, ports[wIndex],
+						    XDEV_RESUME);
 				spin_unlock_irqrestore(&xhci->lock, flags);
 				msleep(USB_RESUME_TIMEOUT);
 				spin_lock_irqsave(&xhci->lock, flags);
-				xhci_set_link_state(xhci, port_array, wIndex,
+				xhci_set_link_state(xhci, ports[wIndex],
 							XDEV_U0);
 				clear_bit(wIndex, &bus_state->resuming_ports);
 			}
@@ -1374,11 +1367,11 @@
 		case USB_PORT_FEAT_C_PORT_LINK_STATE:
 		case USB_PORT_FEAT_C_PORT_CONFIG_ERROR:
 			xhci_clear_port_change_bit(xhci, wValue, wIndex,
-					port_array[wIndex], temp);
+					ports[wIndex]->addr, temp);
 			break;
 		case USB_PORT_FEAT_ENABLE:
 			xhci_disable_port(hcd, xhci, wIndex,
-					port_array[wIndex], temp);
+					ports[wIndex]->addr, temp);
 			break;
 		case USB_PORT_FEAT_POWER:
 			xhci_set_port_power(xhci, hcd, wIndex, false, &flags);
@@ -1415,11 +1408,14 @@
 	int i, retval;
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
 	int max_ports;
-	__le32 __iomem **port_array;
 	struct xhci_bus_state *bus_state;
 	bool reset_change = false;
+	struct xhci_hub *rhub;
+	struct xhci_port **ports;
 
-	max_ports = xhci_get_ports(hcd, &port_array);
+	rhub = xhci_get_rhub(hcd);
+	ports = rhub->ports;
+	max_ports = rhub->num_ports;
 	bus_state = &xhci->bus_state[hcd_index(hcd)];
 
 	/* Initial status is no changes */
@@ -1437,7 +1433,7 @@
 	spin_lock_irqsave(&xhci->lock, flags);
 	/* For each port, did anything change?  If so, set that bit in buf. */
 	for (i = 0; i < max_ports; i++) {
-		temp = readl(port_array[i]);
+		temp = readl(ports[i]->addr);
 		if (temp == ~(u32)0) {
 			xhci_hc_died(xhci);
 			retval = -ENODEV;
@@ -1469,11 +1465,14 @@
 {
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
 	int max_ports, port_index;
-	__le32 __iomem **port_array;
 	struct xhci_bus_state *bus_state;
 	unsigned long flags;
+	struct xhci_hub *rhub;
+	struct xhci_port **ports;
 
-	max_ports = xhci_get_ports(hcd, &port_array);
+	rhub = xhci_get_rhub(hcd);
+	ports = rhub->ports;
+	max_ports = rhub->num_ports;
 	bus_state = &xhci->bus_state[hcd_index(hcd)];
 
 	spin_lock_irqsave(&xhci->lock, flags);
@@ -1494,7 +1493,7 @@
 		u32 t1, t2;
 		int slot_id;
 
-		t1 = readl(port_array[port_index]);
+		t1 = readl(ports[port_index]->addr);
 		t2 = xhci_port_state_to_neutral(t1);
 
 		if ((t1 & PORT_PE) && !(t1 & PORT_PLS_MASK)) {
@@ -1534,7 +1533,7 @@
 
 		t1 = xhci_port_state_to_neutral(t1);
 		if (t1 != t2)
-			writel(t2, port_array[port_index]);
+			writel(t2, ports[port_index]->addr);
 	}
 	hcd->state = HC_STATE_SUSPENDED;
 	bus_state->next_statechange = jiffies + msecs_to_jiffies(10);
@@ -1547,12 +1546,11 @@
  * warm reset a USB3 device stuck in polling or compliance mode after resume.
  * See Intel 100/c230 series PCH specification update Doc #332692-006 Errata #8
  */
-static bool xhci_port_missing_cas_quirk(int port_index,
-					     __le32 __iomem **port_array)
+static bool xhci_port_missing_cas_quirk(struct xhci_port *port)
 {
 	u32 portsc;
 
-	portsc = readl(port_array[port_index]);
+	portsc = readl(port->addr);
 
 	/* if any of these are set we are not stuck */
 	if (portsc & (PORT_CONNECT | PORT_CAS))
@@ -1565,9 +1563,9 @@
 	/* clear wakeup/change bits, and do a warm port reset */
 	portsc &= ~(PORT_RWC_BITS | PORT_CEC | PORT_WAKE_BITS);
 	portsc |= PORT_WR;
-	writel(portsc, port_array[port_index]);
+	writel(portsc, port->addr);
 	/* flush write */
-	readl(port_array[port_index]);
+	readl(port->addr);
 	return true;
 }
 
@@ -1575,15 +1573,18 @@
 {
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
 	struct xhci_bus_state *bus_state;
-	__le32 __iomem **port_array;
 	unsigned long flags;
 	int max_ports, port_index;
 	int slot_id;
 	int sret;
 	u32 next_state;
 	u32 temp, portsc;
+	struct xhci_hub *rhub;
+	struct xhci_port **ports;
 
-	max_ports = xhci_get_ports(hcd, &port_array);
+	rhub = xhci_get_rhub(hcd);
+	ports = rhub->ports;
+	max_ports = rhub->num_ports;
 	bus_state = &xhci->bus_state[hcd_index(hcd)];
 
 	if (time_before(jiffies, bus_state->next_statechange))
@@ -1608,12 +1609,12 @@
 
 	port_index = max_ports;
 	while (port_index--) {
-		portsc = readl(port_array[port_index]);
+		portsc = readl(ports[port_index]->addr);
 
 		/* warm reset CAS limited ports stuck in polling/compliance */
 		if ((xhci->quirks & XHCI_MISSING_CAS) &&
 		    (hcd->speed >= HCD_USB3) &&
-		    xhci_port_missing_cas_quirk(port_index, port_array)) {
+		    xhci_port_missing_cas_quirk(ports[port_index])) {
 			xhci_dbg(xhci, "reset stuck port %d\n", port_index);
 			clear_bit(port_index, &bus_state->bus_suspended);
 			continue;
@@ -1637,7 +1638,7 @@
 			}
 		/* disable wake for all ports, write new link state if needed */
 		portsc &= ~(PORT_RWC_BITS | PORT_CEC | PORT_WAKE_BITS);
-		writel(portsc, port_array[port_index]);
+		writel(portsc, ports[port_index]->addr);
 	}
 
 	/* USB2 specific resume signaling delay and U0 link state transition */
@@ -1650,23 +1651,22 @@
 		for_each_set_bit(port_index, &bus_state->bus_suspended,
 				 BITS_PER_LONG) {
 			/* Clear PLC to poll it later for U0 transition */
-			xhci_test_and_clear_bit(xhci, port_array, port_index,
+			xhci_test_and_clear_bit(xhci, ports[port_index],
 						PORT_PLC);
-			xhci_set_link_state(xhci, port_array, port_index,
-					    XDEV_U0);
+			xhci_set_link_state(xhci, ports[port_index], XDEV_U0);
 		}
 	}
 
 	/* poll for U0 link state complete, both USB2 and USB3 */
 	for_each_set_bit(port_index, &bus_state->bus_suspended, BITS_PER_LONG) {
-		sret = xhci_handshake(port_array[port_index], PORT_PLC,
+		sret = xhci_handshake(ports[port_index]->addr, PORT_PLC,
 				      PORT_PLC, 10 * 1000);
 		if (sret) {
 			xhci_warn(xhci, "port %d resume PLC timeout\n",
 				  port_index);
 			continue;
 		}
-		xhci_test_and_clear_bit(xhci, port_array, port_index, PORT_PLC);
+		xhci_test_and_clear_bit(xhci, ports[port_index], PORT_PLC);
 		slot_id = xhci_find_slot_id_by_port(hcd, xhci, port_index + 1);
 		if (slot_id)
 			xhci_ring_device(xhci, slot_id);
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index e5ace89..4fe7471 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -33,8 +33,9 @@
 	struct xhci_segment *seg;
 	dma_addr_t	dma;
 	int		i;
+	struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
 
-	seg = kzalloc(sizeof *seg, flags);
+	seg = kzalloc_node(sizeof(*seg), flags, dev_to_node(dev));
 	if (!seg)
 		return NULL;
 
@@ -45,7 +46,8 @@
 	}
 
 	if (max_packet) {
-		seg->bounce_buf = kzalloc(max_packet, flags);
+		seg->bounce_buf = kzalloc_node(max_packet, flags,
+					dev_to_node(dev));
 		if (!seg->bounce_buf) {
 			dma_pool_free(xhci->segment_pool, seg->trbs, dma);
 			kfree(seg);
@@ -363,8 +365,9 @@
 {
 	struct xhci_ring	*ring;
 	int ret;
+	struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
 
-	ring = kzalloc(sizeof *(ring), flags);
+	ring = kzalloc_node(sizeof(*ring), flags, dev_to_node(dev));
 	if (!ring)
 		return NULL;
 
@@ -458,11 +461,12 @@
 						    int type, gfp_t flags)
 {
 	struct xhci_container_ctx *ctx;
+	struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
 
 	if ((type != XHCI_CTX_TYPE_DEVICE) && (type != XHCI_CTX_TYPE_INPUT))
 		return NULL;
 
-	ctx = kzalloc(sizeof(*ctx), flags);
+	ctx = kzalloc_node(sizeof(*ctx), flags, dev_to_node(dev));
 	if (!ctx)
 		return NULL;
 
@@ -615,6 +619,7 @@
 	struct xhci_ring *cur_ring;
 	u64 addr;
 	int ret;
+	struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
 
 	xhci_dbg(xhci, "Allocating %u streams and %u "
 			"stream context array entries.\n",
@@ -625,7 +630,8 @@
 	}
 	xhci->cmd_ring_reserved_trbs++;
 
-	stream_info = kzalloc(sizeof(struct xhci_stream_info), mem_flags);
+	stream_info = kzalloc_node(sizeof(*stream_info), mem_flags,
+			dev_to_node(dev));
 	if (!stream_info)
 		goto cleanup_trbs;
 
@@ -633,9 +639,9 @@
 	stream_info->num_stream_ctxs = num_stream_ctxs;
 
 	/* Initialize the array of virtual pointers to stream rings. */
-	stream_info->stream_rings = kzalloc(
-			sizeof(struct xhci_ring *)*num_streams,
-			mem_flags);
+	stream_info->stream_rings = kcalloc_node(
+			num_streams, sizeof(struct xhci_ring *), mem_flags,
+			dev_to_node(dev));
 	if (!stream_info->stream_rings)
 		goto cleanup_info;
 
@@ -831,6 +837,7 @@
 	struct xhci_tt_bw_info		*tt_info;
 	unsigned int			num_ports;
 	int				i, j;
+	struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
 
 	if (!tt->multi)
 		num_ports = 1;
@@ -840,7 +847,8 @@
 	for (i = 0; i < num_ports; i++, tt_info++) {
 		struct xhci_interval_bw_table *bw_table;
 
-		tt_info = kzalloc(sizeof(*tt_info), mem_flags);
+		tt_info = kzalloc_node(sizeof(*tt_info), mem_flags,
+				dev_to_node(dev));
 		if (!tt_info)
 			goto free_tts;
 		INIT_LIST_HEAD(&tt_info->tt_list);
@@ -1054,8 +1062,7 @@
 
 /*
  * The xHCI roothub may have ports of differing speeds in any order in the port
- * status registers.  xhci->port_array provides an array of the port speed for
- * each offset into the port status registers.
+ * status registers.
  *
  * The xHCI hardware wants to know the roothub port number that the USB device
  * is attached to (or the roothub port its ancestor hub is attached to).  All we
@@ -1642,7 +1649,8 @@
 	if (!num_sp)
 		return 0;
 
-	xhci->scratchpad = kzalloc(sizeof(*xhci->scratchpad), flags);
+	xhci->scratchpad = kzalloc_node(sizeof(*xhci->scratchpad), flags,
+				dev_to_node(dev));
 	if (!xhci->scratchpad)
 		goto fail_sp;
 
@@ -1652,7 +1660,8 @@
 	if (!xhci->scratchpad->sp_array)
 		goto fail_sp2;
 
-	xhci->scratchpad->sp_buffers = kzalloc(sizeof(void *) * num_sp, flags);
+	xhci->scratchpad->sp_buffers = kcalloc_node(num_sp, sizeof(void *),
+					flags, dev_to_node(dev));
 	if (!xhci->scratchpad->sp_buffers)
 		goto fail_sp3;
 
@@ -1720,14 +1729,16 @@
 		bool allocate_completion, gfp_t mem_flags)
 {
 	struct xhci_command *command;
+	struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
 
-	command = kzalloc(sizeof(*command), mem_flags);
+	command = kzalloc_node(sizeof(*command), mem_flags, dev_to_node(dev));
 	if (!command)
 		return NULL;
 
 	if (allocate_completion) {
 		command->completion =
-			kzalloc(sizeof(struct completion), mem_flags);
+			kzalloc_node(sizeof(struct completion), mem_flags,
+				dev_to_node(dev));
 		if (!command->completion) {
 			kfree(command);
 			return NULL;
@@ -1890,18 +1901,18 @@
 
 no_bw:
 	xhci->cmd_ring_reserved_trbs = 0;
-	xhci->num_usb2_ports = 0;
-	xhci->num_usb3_ports = 0;
+	xhci->usb2_rhub.num_ports = 0;
+	xhci->usb3_rhub.num_ports = 0;
 	xhci->num_active_eps = 0;
-	kfree(xhci->usb2_ports);
-	kfree(xhci->usb3_ports);
-	kfree(xhci->port_array);
+	kfree(xhci->usb2_rhub.ports);
+	kfree(xhci->usb3_rhub.ports);
+	kfree(xhci->hw_ports);
 	kfree(xhci->rh_bw);
 	kfree(xhci->ext_caps);
 
-	xhci->usb2_ports = NULL;
-	xhci->usb3_ports = NULL;
-	xhci->port_array = NULL;
+	xhci->usb2_rhub.ports = NULL;
+	xhci->usb3_rhub.ports = NULL;
+	xhci->hw_ports = NULL;
 	xhci->rh_bw = NULL;
 	xhci->ext_caps = NULL;
 
@@ -2100,6 +2111,7 @@
 	int i;
 	u8 major_revision, minor_revision;
 	struct xhci_hub *rhub;
+	struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
 
 	temp = readl(addr);
 	major_revision = XHCI_EXT_PORT_MAJOR(temp);
@@ -2136,8 +2148,8 @@
 
 	rhub->psi_count = XHCI_EXT_PORT_PSIC(temp);
 	if (rhub->psi_count) {
-		rhub->psi = kcalloc(rhub->psi_count, sizeof(*rhub->psi),
-				    GFP_KERNEL);
+		rhub->psi = kcalloc_node(rhub->psi_count, sizeof(*rhub->psi),
+				    GFP_KERNEL, dev_to_node(dev));
 		if (!rhub->psi)
 			rhub->psi_count = 0;
 
@@ -2186,36 +2198,53 @@
 
 	port_offset--;
 	for (i = port_offset; i < (port_offset + port_count); i++) {
+		struct xhci_port *hw_port = &xhci->hw_ports[i];
 		/* Duplicate entry.  Ignore the port if the revisions differ. */
-		if (xhci->port_array[i] != 0) {
+		if (hw_port->rhub) {
 			xhci_warn(xhci, "Duplicate port entry, Ext Cap %p,"
 					" port %u\n", addr, i);
 			xhci_warn(xhci, "Port was marked as USB %u, "
 					"duplicated as USB %u\n",
-					xhci->port_array[i], major_revision);
+					hw_port->rhub->maj_rev, major_revision);
 			/* Only adjust the roothub port counts if we haven't
 			 * found a similar duplicate.
 			 */
-			if (xhci->port_array[i] != major_revision &&
-				xhci->port_array[i] != DUPLICATE_ENTRY) {
-				if (xhci->port_array[i] == 0x03)
-					xhci->num_usb3_ports--;
-				else
-					xhci->num_usb2_ports--;
-				xhci->port_array[i] = DUPLICATE_ENTRY;
+			if (hw_port->rhub != rhub &&
+				 hw_port->hcd_portnum != DUPLICATE_ENTRY) {
+				hw_port->rhub->num_ports--;
+				hw_port->hcd_portnum = DUPLICATE_ENTRY;
 			}
-			/* FIXME: Should we disable the port? */
 			continue;
 		}
-		xhci->port_array[i] = major_revision;
-		if (major_revision == 0x03)
-			xhci->num_usb3_ports++;
-		else
-			xhci->num_usb2_ports++;
+		hw_port->rhub = rhub;
+		rhub->num_ports++;
 	}
 	/* FIXME: Should we disable ports not in the Extended Capabilities? */
 }
 
+static void xhci_create_rhub_port_array(struct xhci_hcd *xhci,
+					struct xhci_hub *rhub, gfp_t flags)
+{
+	int port_index = 0;
+	int i;
+	struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
+
+	if (!rhub->num_ports)
+		return;
+	rhub->ports = kcalloc_node(rhub->num_ports, sizeof(rhub->ports), flags,
+			dev_to_node(dev));
+	for (i = 0; i < HCS_MAX_PORTS(xhci->hcs_params1); i++) {
+		if (xhci->hw_ports[i].rhub != rhub ||
+		    xhci->hw_ports[i].hcd_portnum == DUPLICATE_ENTRY)
+			continue;
+		xhci->hw_ports[i].hcd_portnum = port_index;
+		rhub->ports[port_index] = &xhci->hw_ports[i];
+		port_index++;
+		if (port_index == rhub->num_ports)
+			break;
+	}
+}
+
 /*
  * Scan the Extended Capabilities for the "Supported Protocol Capabilities" that
  * specify what speeds each port is supposed to be.  We can't count on the port
@@ -2228,16 +2257,25 @@
 	void __iomem *base;
 	u32 offset;
 	unsigned int num_ports;
-	int i, j, port_index;
+	int i, j;
 	int cap_count = 0;
 	u32 cap_start;
+	struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
 
 	num_ports = HCS_MAX_PORTS(xhci->hcs_params1);
-	xhci->port_array = kzalloc(sizeof(*xhci->port_array)*num_ports, flags);
-	if (!xhci->port_array)
+	xhci->hw_ports = kcalloc_node(num_ports, sizeof(*xhci->hw_ports),
+				flags, dev_to_node(dev));
+	if (!xhci->hw_ports)
 		return -ENOMEM;
 
-	xhci->rh_bw = kzalloc(sizeof(*xhci->rh_bw)*num_ports, flags);
+	for (i = 0; i < num_ports; i++) {
+		xhci->hw_ports[i].addr = &xhci->op_regs->port_status_base +
+			NUM_PORT_REGS * i;
+		xhci->hw_ports[i].hw_portnum = i;
+	}
+
+	xhci->rh_bw = kzalloc_node(sizeof(*xhci->rh_bw)*num_ports, flags,
+			dev_to_node(dev));
 	if (!xhci->rh_bw)
 		return -ENOMEM;
 	for (i = 0; i < num_ports; i++) {
@@ -2264,7 +2302,8 @@
 						      XHCI_EXT_CAPS_PROTOCOL);
 	}
 
-	xhci->ext_caps = kzalloc(sizeof(*xhci->ext_caps) * cap_count, flags);
+	xhci->ext_caps = kcalloc_node(cap_count, sizeof(*xhci->ext_caps),
+				flags, dev_to_node(dev));
 	if (!xhci->ext_caps)
 		return -ENOMEM;
 
@@ -2272,86 +2311,44 @@
 
 	while (offset) {
 		xhci_add_in_port(xhci, num_ports, base + offset, cap_count);
-		if (xhci->num_usb2_ports + xhci->num_usb3_ports == num_ports)
+		if (xhci->usb2_rhub.num_ports + xhci->usb3_rhub.num_ports ==
+		    num_ports)
 			break;
 		offset = xhci_find_next_ext_cap(base, offset,
 						XHCI_EXT_CAPS_PROTOCOL);
 	}
-
-	if (xhci->num_usb2_ports == 0 && xhci->num_usb3_ports == 0) {
+	if (xhci->usb2_rhub.num_ports == 0 && xhci->usb3_rhub.num_ports == 0) {
 		xhci_warn(xhci, "No ports on the roothubs?\n");
 		return -ENODEV;
 	}
 	xhci_dbg_trace(xhci, trace_xhci_dbg_init,
-			"Found %u USB 2.0 ports and %u USB 3.0 ports.",
-			xhci->num_usb2_ports, xhci->num_usb3_ports);
+		       "Found %u USB 2.0 ports and %u USB 3.0 ports.",
+		       xhci->usb2_rhub.num_ports, xhci->usb3_rhub.num_ports);
 
 	/* Place limits on the number of roothub ports so that the hub
 	 * descriptors aren't longer than the USB core will allocate.
 	 */
-	if (xhci->num_usb3_ports > USB_SS_MAXPORTS) {
+	if (xhci->usb3_rhub.num_ports > USB_SS_MAXPORTS) {
 		xhci_dbg_trace(xhci, trace_xhci_dbg_init,
 				"Limiting USB 3.0 roothub ports to %u.",
 				USB_SS_MAXPORTS);
-		xhci->num_usb3_ports = USB_SS_MAXPORTS;
+		xhci->usb3_rhub.num_ports = USB_SS_MAXPORTS;
 	}
-	if (xhci->num_usb2_ports > USB_MAXCHILDREN) {
+	if (xhci->usb2_rhub.num_ports > USB_MAXCHILDREN) {
 		xhci_dbg_trace(xhci, trace_xhci_dbg_init,
 				"Limiting USB 2.0 roothub ports to %u.",
 				USB_MAXCHILDREN);
-		xhci->num_usb2_ports = USB_MAXCHILDREN;
+		xhci->usb2_rhub.num_ports = USB_MAXCHILDREN;
 	}
 
 	/*
 	 * Note we could have all USB 3.0 ports, or all USB 2.0 ports.
 	 * Not sure how the USB core will handle a hub with no ports...
 	 */
-	if (xhci->num_usb2_ports) {
-		xhci->usb2_ports = kmalloc(sizeof(*xhci->usb2_ports)*
-				xhci->num_usb2_ports, flags);
-		if (!xhci->usb2_ports)
-			return -ENOMEM;
 
-		port_index = 0;
-		for (i = 0; i < num_ports; i++) {
-			if (xhci->port_array[i] == 0x03 ||
-					xhci->port_array[i] == 0 ||
-					xhci->port_array[i] == DUPLICATE_ENTRY)
-				continue;
+	xhci_create_rhub_port_array(xhci, &xhci->usb2_rhub, flags);
+	xhci_create_rhub_port_array(xhci, &xhci->usb3_rhub, flags);
 
-			xhci->usb2_ports[port_index] =
-				&xhci->op_regs->port_status_base +
-				NUM_PORT_REGS*i;
-			xhci_dbg_trace(xhci, trace_xhci_dbg_init,
-					"USB 2.0 port at index %u, "
-					"addr = %p", i,
-					xhci->usb2_ports[port_index]);
-			port_index++;
-			if (port_index == xhci->num_usb2_ports)
-				break;
-		}
-	}
-	if (xhci->num_usb3_ports) {
-		xhci->usb3_ports = kmalloc(sizeof(*xhci->usb3_ports)*
-				xhci->num_usb3_ports, flags);
-		if (!xhci->usb3_ports)
-			return -ENOMEM;
-
-		port_index = 0;
-		for (i = 0; i < num_ports; i++)
-			if (xhci->port_array[i] == 0x03) {
-				xhci->usb3_ports[port_index] =
-					&xhci->op_regs->port_status_base +
-					NUM_PORT_REGS*i;
-				xhci_dbg_trace(xhci, trace_xhci_dbg_init,
-						"USB 3.0 port at index %u, "
-						"addr = %p", i,
-						xhci->usb3_ports[port_index]);
-				port_index++;
-				if (port_index == xhci->num_usb3_ports)
-					break;
-			}
-	}
 	return 0;
 }
 
diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c
index eea7360..fa33d6e 100644
--- a/drivers/usb/host/xhci-mtk-sch.c
+++ b/drivers/usb/host/xhci-mtk-sch.c
@@ -58,7 +58,7 @@
 			bw_index = (virt_dev->real_port - 1) * 2 + 1;
 	} else {
 		/* add one more for each SS port */
-		bw_index = virt_dev->real_port + xhci->num_usb3_ports - 1;
+		bw_index = virt_dev->real_port + xhci->usb3_rhub.num_ports - 1;
 	}
 
 	return bw_index;
@@ -284,7 +284,7 @@
 	int i;
 
 	/* ss IN and OUT are separated */
-	num_usb_bus = xhci->num_usb3_ports * 2 + xhci->num_usb2_ports;
+	num_usb_bus = xhci->usb3_rhub.num_ports * 2 + xhci->usb2_rhub.num_ports;
 
 	sch_array = kcalloc(num_usb_bus, sizeof(*sch_array), GFP_KERNEL);
 	if (sch_array == NULL)
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 85ffda8..6372edf 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -196,11 +196,15 @@
 		xhci->quirks |= XHCI_BROKEN_STREAMS;
 	}
 	if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
-			pdev->device == 0x0014)
+	    pdev->device == 0x0014) {
 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+		xhci->quirks |= XHCI_ZERO_64B_REGS;
+	}
 	if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
-			pdev->device == 0x0015)
+	    pdev->device == 0x0015) {
 		xhci->quirks |= XHCI_RESET_ON_RESUME;
+		xhci->quirks |= XHCI_ZERO_64B_REGS;
+	}
 	if (pdev->vendor == PCI_VENDOR_ID_VIA)
 		xhci->quirks |= XHCI_RESET_ON_RESUME;
 
@@ -284,13 +288,6 @@
 
 	driver = (struct hc_driver *)id->driver_data;
 
-	/* For some HW implementation, a XHCI reset is just not enough... */
-	if (usb_xhci_needs_pci_reset(dev)) {
-		dev_info(&dev->dev, "Resetting\n");
-		if (pci_reset_function_locked(dev))
-			dev_warn(&dev->dev, "Reset failed");
-	}
-
 	/* Prevent runtime suspending between USB-2 and USB-3 initialization */
 	pm_runtime_get_noresume(&dev->dev);
 
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 91a1a82..f0a99aa 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1497,44 +1497,6 @@
 		handle_cmd_completion(xhci, &event->event_cmd);
 }
 
-/* @port_id: the one-based port ID from the hardware (indexed from array of all
- * port registers -- USB 3.0 and USB 2.0).
- *
- * Returns a zero-based port number, which is suitable for indexing into each of
- * the split roothubs' port arrays and bus state arrays.
- * Add one to it in order to call xhci_find_slot_id_by_port.
- */
-static unsigned int find_faked_portnum_from_hw_portnum(struct usb_hcd *hcd,
-		struct xhci_hcd *xhci, u32 port_id)
-{
-	unsigned int i;
-	unsigned int num_similar_speed_ports = 0;
-
-	/* port_id from the hardware is 1-based, but port_array[], usb3_ports[],
-	 * and usb2_ports are 0-based indexes.  Count the number of similar
-	 * speed ports, up to 1 port before this port.
-	 */
-	for (i = 0; i < (port_id - 1); i++) {
-		u8 port_speed = xhci->port_array[i];
-
-		/*
-		 * Skip ports that don't have known speeds, or have duplicate
-		 * Extended Capabilities port speed entries.
-		 */
-		if (port_speed == 0 || port_speed == DUPLICATE_ENTRY)
-			continue;
-
-		/*
-		 * USB 3.0 ports are always under a USB 3.0 hub.  USB 2.0 and
-		 * 1.1 ports are under the USB 2.0 hub.  If the port speed
-		 * matches the device speed, it's a similar speed port.
-		 */
-		if ((port_speed == 0x03) == (hcd->speed >= HCD_USB3))
-			num_similar_speed_ports++;
-	}
-	return num_similar_speed_ports;
-}
-
 static void handle_device_notification(struct xhci_hcd *xhci,
 		union xhci_trb *event)
 {
@@ -1563,11 +1525,10 @@
 	u32 portsc, cmd_reg;
 	int max_ports;
 	int slot_id;
-	unsigned int faked_port_index;
-	u8 major_revision;
+	unsigned int hcd_portnum;
 	struct xhci_bus_state *bus_state;
-	__le32 __iomem **port_array;
 	bool bogus_port_status = false;
+	struct xhci_port *port;
 
 	/* Port status change events always have a successful completion code */
 	if (GET_COMP_CODE(le32_to_cpu(event->generic.field[2])) != COMP_SUCCESS)
@@ -1584,49 +1545,19 @@
 		return;
 	}
 
-	/* Figure out which usb_hcd this port is attached to:
-	 * is it a USB 3.0 port or a USB 2.0/1.1 port?
-	 */
-	major_revision = xhci->port_array[port_id - 1];
-
-	/* Find the right roothub. */
-	hcd = xhci_to_hcd(xhci);
-	if ((major_revision == 0x03) != (hcd->speed >= HCD_USB3))
-		hcd = xhci->shared_hcd;
-
-	if (major_revision == 0) {
-		xhci_warn(xhci, "Event for port %u not in "
-				"Extended Capabilities, ignoring.\n",
-				port_id);
-		bogus_port_status = true;
-		goto cleanup;
-	}
-	if (major_revision == DUPLICATE_ENTRY) {
-		xhci_warn(xhci, "Event for port %u duplicated in"
-				"Extended Capabilities, ignoring.\n",
-				port_id);
+	port = &xhci->hw_ports[port_id - 1];
+	if (!port || !port->rhub || port->hcd_portnum == DUPLICATE_ENTRY) {
+		xhci_warn(xhci, "Event for invalid port %u\n", port_id);
 		bogus_port_status = true;
 		goto cleanup;
 	}
 
-	/*
-	 * Hardware port IDs reported by a Port Status Change Event include USB
-	 * 3.0 and USB 2.0 ports.  We want to check if the port has reported a
-	 * resume event, but we first need to translate the hardware port ID
-	 * into the index into the ports on the correct split roothub, and the
-	 * correct bus_state structure.
-	 */
+	hcd = port->rhub->hcd;
 	bus_state = &xhci->bus_state[hcd_index(hcd)];
-	if (hcd->speed >= HCD_USB3)
-		port_array = xhci->usb3_ports;
-	else
-		port_array = xhci->usb2_ports;
-	/* Find the faked port hub number */
-	faked_port_index = find_faked_portnum_from_hw_portnum(hcd, xhci,
-			port_id);
-	portsc = readl(port_array[faked_port_index]);
+	hcd_portnum = port->hcd_portnum;
+	portsc = readl(port->addr);
 
-	trace_xhci_handle_port_status(faked_port_index, portsc);
+	trace_xhci_handle_port_status(hcd_portnum, portsc);
 
 	if (hcd->state == HC_STATE_SUSPENDED) {
 		xhci_dbg(xhci, "resume root hub\n");
@@ -1634,7 +1565,7 @@
 	}
 
 	if (hcd->speed >= HCD_USB3 && (portsc & PORT_PLS_MASK) == XDEV_INACTIVE)
-		bus_state->port_remote_wakeup &= ~(1 << faked_port_index);
+		bus_state->port_remote_wakeup &= ~(1 << hcd_portnum);
 
 	if ((portsc & PORT_PLC) && (portsc & PORT_PLS_MASK) == XDEV_RESUME) {
 		xhci_dbg(xhci, "port resume event for port %d\n", port_id);
@@ -1651,29 +1582,26 @@
 			 * so we can tell the difference between the end of
 			 * device and host initiated resume.
 			 */
-			bus_state->port_remote_wakeup |= 1 << faked_port_index;
-			xhci_test_and_clear_bit(xhci, port_array,
-					faked_port_index, PORT_PLC);
-			xhci_set_link_state(xhci, port_array, faked_port_index,
-						XDEV_U0);
+			bus_state->port_remote_wakeup |= 1 << hcd_portnum;
+			xhci_test_and_clear_bit(xhci, port, PORT_PLC);
+			xhci_set_link_state(xhci, port, XDEV_U0);
 			/* Need to wait until the next link state change
 			 * indicates the device is actually in U0.
 			 */
 			bogus_port_status = true;
 			goto cleanup;
-		} else if (!test_bit(faked_port_index,
-				     &bus_state->resuming_ports)) {
+		} else if (!test_bit(hcd_portnum, &bus_state->resuming_ports)) {
 			xhci_dbg(xhci, "resume HS port %d\n", port_id);
-			bus_state->resume_done[faked_port_index] = jiffies +
+			bus_state->resume_done[hcd_portnum] = jiffies +
 				msecs_to_jiffies(USB_RESUME_TIMEOUT);
-			set_bit(faked_port_index, &bus_state->resuming_ports);
+			set_bit(hcd_portnum, &bus_state->resuming_ports);
 			/* Do the rest in GetPortStatus after resume time delay.
 			 * Avoid polling roothub status before that so that a
 			 * usb device auto-resume latency around ~40ms.
 			 */
 			set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
 			mod_timer(&hcd->rh_timer,
-				  bus_state->resume_done[faked_port_index]);
+				  bus_state->resume_done[hcd_portnum]);
 			bogus_port_status = true;
 		}
 	}
@@ -1688,17 +1616,14 @@
 		 * so the roothub behavior is consistent with external
 		 * USB 3.0 hub behavior.
 		 */
-		slot_id = xhci_find_slot_id_by_port(hcd, xhci,
-				faked_port_index + 1);
+		slot_id = xhci_find_slot_id_by_port(hcd, xhci, hcd_portnum + 1);
 		if (slot_id && xhci->devs[slot_id])
 			xhci_ring_device(xhci, slot_id);
-		if (bus_state->port_remote_wakeup & (1 << faked_port_index)) {
-			bus_state->port_remote_wakeup &=
-				~(1 << faked_port_index);
-			xhci_test_and_clear_bit(xhci, port_array,
-					faked_port_index, PORT_PLC);
+		if (bus_state->port_remote_wakeup & (1 << hcd_portnum)) {
+			bus_state->port_remote_wakeup &= ~(1 << hcd_portnum);
+			xhci_test_and_clear_bit(xhci, port, PORT_PLC);
 			usb_wakeup_notification(hcd->self.root_hub,
-					faked_port_index + 1);
+					hcd_portnum + 1);
 			bogus_port_status = true;
 			goto cleanup;
 		}
@@ -1710,16 +1635,15 @@
 	 * out of the RExit state.
 	 */
 	if (!DEV_SUPERSPEED_ANY(portsc) &&
-			test_and_clear_bit(faked_port_index,
+			test_and_clear_bit(hcd_portnum,
 				&bus_state->rexit_ports)) {
-		complete(&bus_state->rexit_done[faked_port_index]);
+		complete(&bus_state->rexit_done[hcd_portnum]);
 		bogus_port_status = true;
 		goto cleanup;
 	}
 
 	if (hcd->speed < HCD_USB3)
-		xhci_test_and_clear_bit(xhci, port_array, faked_port_index,
-					PORT_PLC);
+		xhci_test_and_clear_bit(xhci, port, PORT_PLC);
 
 cleanup:
 	/* Update event ring dequeue pointer before dropping the lock */
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index 2c076ea..a8c1d07 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -18,9 +18,11 @@
 #include <linux/phy/tegra/xusb.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
+#include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 #include <linux/slab.h>
+#include <soc/tegra/pmc.h>
 
 #include "xhci.h"
 
@@ -761,6 +763,49 @@
 	}
 }
 
+static int tegra_xusb_runtime_suspend(struct device *dev)
+{
+	struct tegra_xusb *tegra = dev_get_drvdata(dev);
+
+	tegra_xusb_phy_disable(tegra);
+	regulator_bulk_disable(tegra->soc->num_supplies, tegra->supplies);
+	tegra_xusb_clk_disable(tegra);
+
+	return 0;
+}
+
+static int tegra_xusb_runtime_resume(struct device *dev)
+{
+	struct tegra_xusb *tegra = dev_get_drvdata(dev);
+	int err;
+
+	err = tegra_xusb_clk_enable(tegra);
+	if (err) {
+		dev_err(dev, "failed to enable clocks: %d\n", err);
+		return err;
+	}
+
+	err = regulator_bulk_enable(tegra->soc->num_supplies, tegra->supplies);
+	if (err) {
+		dev_err(dev, "failed to enable regulators: %d\n", err);
+		goto disable_clk;
+	}
+
+	err = tegra_xusb_phy_enable(tegra);
+	if (err < 0) {
+		dev_err(dev, "failed to enable PHYs: %d\n", err);
+		goto disable_regulator;
+	}
+
+	return 0;
+
+disable_regulator:
+	regulator_bulk_disable(tegra->soc->num_supplies, tegra->supplies);
+disable_clk:
+	tegra_xusb_clk_disable(tegra);
+	return err;
+}
+
 static int tegra_xusb_load_firmware(struct tegra_xusb *tegra)
 {
 	unsigned int code_tag_blocks, code_size_blocks, code_blocks;
@@ -930,20 +975,6 @@
 	if (IS_ERR(tegra->padctl))
 		return PTR_ERR(tegra->padctl);
 
-	tegra->host_rst = devm_reset_control_get(&pdev->dev, "xusb_host");
-	if (IS_ERR(tegra->host_rst)) {
-		err = PTR_ERR(tegra->host_rst);
-		dev_err(&pdev->dev, "failed to get xusb_host reset: %d\n", err);
-		goto put_padctl;
-	}
-
-	tegra->ss_rst = devm_reset_control_get(&pdev->dev, "xusb_ss");
-	if (IS_ERR(tegra->ss_rst)) {
-		err = PTR_ERR(tegra->ss_rst);
-		dev_err(&pdev->dev, "failed to get xusb_ss reset: %d\n", err);
-		goto put_padctl;
-	}
-
 	tegra->host_clk = devm_clk_get(&pdev->dev, "xusb_host");
 	if (IS_ERR(tegra->host_clk)) {
 		err = PTR_ERR(tegra->host_clk);
@@ -1007,11 +1038,48 @@
 		goto put_padctl;
 	}
 
+	if (!pdev->dev.pm_domain) {
+		tegra->host_rst = devm_reset_control_get(&pdev->dev,
+							 "xusb_host");
+		if (IS_ERR(tegra->host_rst)) {
+			err = PTR_ERR(tegra->host_rst);
+			dev_err(&pdev->dev,
+				"failed to get xusb_host reset: %d\n", err);
+			goto put_padctl;
+		}
+
+		tegra->ss_rst = devm_reset_control_get(&pdev->dev, "xusb_ss");
+		if (IS_ERR(tegra->ss_rst)) {
+			err = PTR_ERR(tegra->ss_rst);
+			dev_err(&pdev->dev, "failed to get xusb_ss reset: %d\n",
+				err);
+			goto put_padctl;
+		}
+
+		err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_XUSBA,
+							tegra->ss_clk,
+							tegra->ss_rst);
+		if (err) {
+			dev_err(&pdev->dev,
+				"failed to enable XUSBA domain: %d\n", err);
+			goto put_padctl;
+		}
+
+		err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_XUSBC,
+							tegra->host_clk,
+							tegra->host_rst);
+		if (err) {
+			dev_err(&pdev->dev,
+				"failed to enable XUSBC domain: %d\n", err);
+			goto disable_xusba;
+		}
+	}
+
 	tegra->supplies = devm_kcalloc(&pdev->dev, tegra->soc->num_supplies,
 				       sizeof(*tegra->supplies), GFP_KERNEL);
 	if (!tegra->supplies) {
 		err = -ENOMEM;
-		goto put_padctl;
+		goto disable_xusbc;
 	}
 
 	for (i = 0; i < tegra->soc->num_supplies; i++)
@@ -1021,7 +1089,7 @@
 				      tegra->supplies);
 	if (err) {
 		dev_err(&pdev->dev, "failed to get regulators: %d\n", err);
-		goto put_padctl;
+		goto disable_xusbc;
 	}
 
 	for (i = 0; i < tegra->soc->num_types; i++)
@@ -1031,7 +1099,7 @@
 				   sizeof(*tegra->phys), GFP_KERNEL);
 	if (!tegra->phys) {
 		err = -ENOMEM;
-		goto put_padctl;
+		goto disable_xusbc;
 	}
 
 	for (i = 0, k = 0; i < tegra->soc->num_types; i++) {
@@ -1047,44 +1115,18 @@
 					"failed to get PHY %s: %ld\n", prop,
 					PTR_ERR(phy));
 				err = PTR_ERR(phy);
-				goto put_padctl;
+				goto disable_xusbc;
 			}
 
 			tegra->phys[k++] = phy;
 		}
 	}
 
-	err = tegra_xusb_clk_enable(tegra);
-	if (err) {
-		dev_err(&pdev->dev, "failed to enable clocks: %d\n", err);
-		goto put_padctl;
-	}
-
-	err = regulator_bulk_enable(tegra->soc->num_supplies, tegra->supplies);
-	if (err) {
-		dev_err(&pdev->dev, "failed to enable regulators: %d\n", err);
-		goto disable_clk;
-	}
-
-	err = tegra_xusb_phy_enable(tegra);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to enable PHYs: %d\n", err);
-		goto disable_regulator;
-	}
-
-	tegra_xusb_ipfs_config(tegra, regs);
-
-	err = tegra_xusb_load_firmware(tegra);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to load firmware: %d\n", err);
-		goto disable_phy;
-	}
-
 	tegra->hcd = usb_create_hcd(&tegra_xhci_hc_driver, &pdev->dev,
 				    dev_name(&pdev->dev));
 	if (!tegra->hcd) {
 		err = -ENOMEM;
-		goto disable_phy;
+		goto disable_xusbc;
 	}
 
 	/*
@@ -1093,6 +1135,25 @@
 	 */
 	platform_set_drvdata(pdev, tegra);
 
+	pm_runtime_enable(&pdev->dev);
+	if (pm_runtime_enabled(&pdev->dev))
+		err = pm_runtime_get_sync(&pdev->dev);
+	else
+		err = tegra_xusb_runtime_resume(&pdev->dev);
+
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to enable device: %d\n", err);
+		goto disable_rpm;
+	}
+
+	tegra_xusb_ipfs_config(tegra, regs);
+
+	err = tegra_xusb_load_firmware(tegra);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to load firmware: %d\n", err);
+		goto put_rpm;
+	}
+
 	tegra->hcd->regs = tegra->regs;
 	tegra->hcd->rsrc_start = regs->start;
 	tegra->hcd->rsrc_len = resource_size(regs);
@@ -1100,7 +1161,7 @@
 	err = usb_add_hcd(tegra->hcd, tegra->xhci_irq, IRQF_SHARED);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to add USB HCD: %d\n", err);
-		goto put_usb2;
+		goto put_rpm;
 	}
 
 	device_wakeup_enable(tegra->hcd->self.controller);
@@ -1155,14 +1216,18 @@
 	usb_put_hcd(xhci->shared_hcd);
 remove_usb2:
 	usb_remove_hcd(tegra->hcd);
-put_usb2:
+put_rpm:
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		tegra_xusb_runtime_suspend(&pdev->dev);
+disable_rpm:
+	pm_runtime_disable(&pdev->dev);
 	usb_put_hcd(tegra->hcd);
-disable_phy:
-	tegra_xusb_phy_disable(tegra);
-disable_regulator:
-	regulator_bulk_disable(tegra->soc->num_supplies, tegra->supplies);
-disable_clk:
-	tegra_xusb_clk_disable(tegra);
+disable_xusbc:
+	if (!&pdev->dev.pm_domain)
+		tegra_powergate_power_off(TEGRA_POWERGATE_XUSBC);
+disable_xusba:
+	if (!&pdev->dev.pm_domain)
+		tegra_powergate_power_off(TEGRA_POWERGATE_XUSBA);
 put_padctl:
 	tegra_xusb_padctl_put(tegra->padctl);
 	return err;
@@ -1181,9 +1246,8 @@
 	dma_free_coherent(&pdev->dev, tegra->fw.size, tegra->fw.virt,
 			  tegra->fw.phys);
 
-	tegra_xusb_phy_disable(tegra);
-	regulator_bulk_disable(tegra->soc->num_supplies, tegra->supplies);
-	tegra_xusb_clk_disable(tegra);
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
 
 	tegra_xusb_padctl_put(tegra->padctl);
 
@@ -1211,6 +1275,8 @@
 #endif
 
 static const struct dev_pm_ops tegra_xusb_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_xusb_runtime_suspend,
+			   tegra_xusb_runtime_resume, NULL)
 	SET_SYSTEM_SLEEP_PM_OPS(tegra_xusb_suspend, tegra_xusb_resume)
 };
 
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 711da33..8c8da2d 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -33,8 +33,8 @@
 module_param(link_quirk, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(link_quirk, "Don't clear the chain bit on a link TRB");
 
-static unsigned int quirks;
-module_param(quirks, uint, S_IRUGO);
+static unsigned long long quirks;
+module_param(quirks, ullong, S_IRUGO);
 MODULE_PARM_DESC(quirks, "Bit flags for quirks to be enabled as default");
 
 /* TODO: copied from ehci-hcd.c - can this be refactored? */
@@ -209,6 +209,68 @@
 	return ret;
 }
 
+static void xhci_zero_64b_regs(struct xhci_hcd *xhci)
+{
+	struct device *dev = xhci_to_hcd(xhci)->self.sysdev;
+	int err, i;
+	u64 val;
+
+	/*
+	 * Some Renesas controllers get into a weird state if they are
+	 * reset while programmed with 64bit addresses (they will preserve
+	 * the top half of the address in internal, non visible
+	 * registers). You end up with half the address coming from the
+	 * kernel, and the other half coming from the firmware. Also,
+	 * changing the programming leads to extra accesses even if the
+	 * controller is supposed to be halted. The controller ends up with
+	 * a fatal fault, and is then ripe for being properly reset.
+	 *
+	 * Special care is taken to only apply this if the device is behind
+	 * an iommu. Doing anything when there is no iommu is definitely
+	 * unsafe...
+	 */
+	if (!(xhci->quirks & XHCI_ZERO_64B_REGS) || !dev->iommu_group)
+		return;
+
+	xhci_info(xhci, "Zeroing 64bit base registers, expecting fault\n");
+
+	/* Clear HSEIE so that faults do not get signaled */
+	val = readl(&xhci->op_regs->command);
+	val &= ~CMD_HSEIE;
+	writel(val, &xhci->op_regs->command);
+
+	/* Clear HSE (aka FATAL) */
+	val = readl(&xhci->op_regs->status);
+	val |= STS_FATAL;
+	writel(val, &xhci->op_regs->status);
+
+	/* Now zero the registers, and brace for impact */
+	val = xhci_read_64(xhci, &xhci->op_regs->dcbaa_ptr);
+	if (upper_32_bits(val))
+		xhci_write_64(xhci, 0, &xhci->op_regs->dcbaa_ptr);
+	val = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
+	if (upper_32_bits(val))
+		xhci_write_64(xhci, 0, &xhci->op_regs->cmd_ring);
+
+	for (i = 0; i < HCS_MAX_INTRS(xhci->hcs_params1); i++) {
+		struct xhci_intr_reg __iomem *ir;
+
+		ir = &xhci->run_regs->ir_set[i];
+		val = xhci_read_64(xhci, &ir->erst_base);
+		if (upper_32_bits(val))
+			xhci_write_64(xhci, 0, &ir->erst_base);
+		val= xhci_read_64(xhci, &ir->erst_dequeue);
+		if (upper_32_bits(val))
+			xhci_write_64(xhci, 0, &ir->erst_dequeue);
+	}
+
+	/* Wait for the fault to appear. It will be cleared on reset */
+	err = xhci_handshake(&xhci->op_regs->status,
+			     STS_FATAL, STS_FATAL,
+			     XHCI_MAX_HALT_USEC);
+	if (!err)
+		xhci_info(xhci, "Fault detected\n");
+}
 
 #ifdef CONFIG_USB_PCI
 /*
@@ -400,13 +462,15 @@
 {
 	struct xhci_hcd *xhci;
 	struct usb_hcd *hcd;
+	struct xhci_hub *rhub;
 	u32 temp;
 	int i;
 
 	xhci = from_timer(xhci, t, comp_mode_recovery_timer);
+	rhub = &xhci->usb3_rhub;
 
-	for (i = 0; i < xhci->num_usb3_ports; i++) {
-		temp = readl(xhci->usb3_ports[i]);
+	for (i = 0; i < rhub->num_ports; i++) {
+		temp = readl(rhub->ports[i]->addr);
 		if ((temp & PORT_PLS_MASK) == USB_SS_PORT_LS_COMP_MOD) {
 			/*
 			 * Compliance Mode Detected. Letting USB Core
@@ -426,7 +490,7 @@
 		}
 	}
 
-	if (xhci->port_status_u0 != ((1 << xhci->num_usb3_ports)-1))
+	if (xhci->port_status_u0 != ((1 << rhub->num_ports) - 1))
 		mod_timer(&xhci->comp_mode_recovery_timer,
 			jiffies + msecs_to_jiffies(COMP_MODE_RCVRY_MSECS));
 }
@@ -483,7 +547,7 @@
 
 static int xhci_all_ports_seen_u0(struct xhci_hcd *xhci)
 {
-	return (xhci->port_status_u0 == ((1 << xhci->num_usb3_ports)-1));
+	return (xhci->port_status_u0 == ((1 << xhci->usb3_rhub.num_ports) - 1));
 }
 
 
@@ -812,33 +876,33 @@
 
 static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
 {
+	struct xhci_port **ports;
 	int port_index;
-	__le32 __iomem **port_array;
 	unsigned long flags;
 	u32 t1, t2;
 
 	spin_lock_irqsave(&xhci->lock, flags);
 
 	/* disable usb3 ports Wake bits */
-	port_index = xhci->num_usb3_ports;
-	port_array = xhci->usb3_ports;
+	port_index = xhci->usb3_rhub.num_ports;
+	ports = xhci->usb3_rhub.ports;
 	while (port_index--) {
-		t1 = readl(port_array[port_index]);
+		t1 = readl(ports[port_index]->addr);
 		t1 = xhci_port_state_to_neutral(t1);
 		t2 = t1 & ~PORT_WAKE_BITS;
 		if (t1 != t2)
-			writel(t2, port_array[port_index]);
+			writel(t2, ports[port_index]->addr);
 	}
 
 	/* disable usb2 ports Wake bits */
-	port_index = xhci->num_usb2_ports;
-	port_array = xhci->usb2_ports;
+	port_index = xhci->usb2_rhub.num_ports;
+	ports = xhci->usb2_rhub.ports;
 	while (port_index--) {
-		t1 = readl(port_array[port_index]);
+		t1 = readl(ports[port_index]->addr);
 		t1 = xhci_port_state_to_neutral(t1);
 		t2 = t1 & ~PORT_WAKE_BITS;
 		if (t1 != t2)
-			writel(t2, port_array[port_index]);
+			writel(t2, ports[port_index]->addr);
 	}
 
 	spin_unlock_irqrestore(&xhci->lock, flags);
@@ -1004,6 +1068,7 @@
 
 		xhci_dbg(xhci, "Stop HCD\n");
 		xhci_halt(xhci);
+		xhci_zero_64b_regs(xhci);
 		xhci_reset(xhci);
 		spin_unlock_irq(&xhci->lock);
 		xhci_cleanup_msix(xhci);
@@ -3976,18 +4041,10 @@
  */
 int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1)
 {
-	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
-	__le32 __iomem *base_addr = &xhci->op_regs->port_status_base;
-	__le32 __iomem *addr;
-	int raw_port;
+	struct xhci_hub *rhub;
 
-	if (hcd->speed < HCD_USB3)
-		addr = xhci->usb2_ports[port1 - 1];
-	else
-		addr = xhci->usb3_ports[port1 - 1];
-
-	raw_port = (addr - base_addr)/NUM_PORT_REGS + 1;
-	return raw_port;
+	rhub = xhci_get_rhub(hcd);
+	return rhub->ports[port1 - 1]->hw_portnum + 1;
 }
 
 /*
@@ -4120,7 +4177,7 @@
 			struct usb_device *udev, int enable)
 {
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
-	__le32 __iomem	**port_array;
+	struct xhci_port **ports;
 	__le32 __iomem	*pm_addr, *hlpm_addr;
 	u32		pm_val, hlpm_val, field;
 	unsigned int	port_num;
@@ -4141,11 +4198,11 @@
 
 	spin_lock_irqsave(&xhci->lock, flags);
 
-	port_array = xhci->usb2_ports;
+	ports = xhci->usb2_rhub.ports;
 	port_num = udev->portnum - 1;
-	pm_addr = port_array[port_num] + PORTPMSC;
+	pm_addr = ports[port_num]->addr + PORTPMSC;
 	pm_val = readl(pm_addr);
-	hlpm_addr = port_array[port_num] + PORTHLPMC;
+	hlpm_addr = ports[port_num]->addr + PORTHLPMC;
 	field = le32_to_cpu(udev->bos->ext_cap->bmAttributes);
 
 	xhci_dbg(xhci, "%s port %d USB2 hardware LPM\n",
@@ -4858,6 +4915,7 @@
 
 	if (usb_hcd_is_primary_hcd(hcd)) {
 		xhci->main_hcd = hcd;
+		xhci->usb2_rhub.hcd = hcd;
 		/* Mark the first roothub as being USB 2.0.
 		 * The xHCI driver will register the USB 3.0 roothub.
 		 */
@@ -4883,6 +4941,7 @@
 			  minor_rev,
 			  minor_rev ? "Enhanced" : "");
 
+		xhci->usb3_rhub.hcd = hcd;
 		/* xHCI private pointer was set in xhci_pci_probe for the second
 		 * registered roothub.
 		 */
@@ -4921,6 +4980,8 @@
 	if (retval)
 		return retval;
 
+	xhci_zero_64b_regs(xhci);
+
 	xhci_dbg(xhci, "Resetting HCD\n");
 	/* Reset the internal HC memory state and registers. */
 	retval = xhci_reset(xhci);
@@ -4963,7 +5024,7 @@
 		return retval;
 	xhci_dbg(xhci, "Called HCD init\n");
 
-	xhci_info(xhci, "hcc params 0x%08x hci version 0x%x quirks 0x%08x\n",
+	xhci_info(xhci, "hcc params 0x%08x hci version 0x%x quirks 0x%016llx\n",
 		  xhci->hcc_params, xhci->hci_version, xhci->quirks);
 
 	return 0;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 6dfc486..939e2f86 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1683,13 +1683,23 @@
 	else
 		return 1;
 }
+struct xhci_port {
+	__le32 __iomem		*addr;
+	int			hw_portnum;
+	int			hcd_portnum;
+	struct xhci_hub		*rhub;
+};
 
 struct xhci_hub {
-	u8	maj_rev;
-	u8	min_rev;
-	u32	*psi;		/* array of protocol speed ID entries */
-	u8	psi_count;
-	u8	psi_uid_count;
+	struct xhci_port	**ports;
+	unsigned int		num_ports;
+	struct usb_hcd		*hcd;
+	/* supported prococol extended capabiliy values */
+	u8			maj_rev;
+	u8			min_rev;
+	u32			*psi;	/* array of protocol speed ID entries */
+	u8			psi_count;
+	u8			psi_uid_count;
 };
 
 /* There is one xhci_hcd structure per controller */
@@ -1787,12 +1797,12 @@
 #define XHCI_STATE_DYING	(1 << 0)
 #define XHCI_STATE_HALTED	(1 << 1)
 #define XHCI_STATE_REMOVING	(1 << 2)
-	unsigned int		quirks;
-#define	XHCI_LINK_TRB_QUIRK	(1 << 0)
-#define XHCI_RESET_EP_QUIRK	(1 << 1)
-#define XHCI_NEC_HOST		(1 << 2)
-#define XHCI_AMD_PLL_FIX	(1 << 3)
-#define XHCI_SPURIOUS_SUCCESS	(1 << 4)
+	unsigned long long	quirks;
+#define	XHCI_LINK_TRB_QUIRK	BIT_ULL(0)
+#define XHCI_RESET_EP_QUIRK	BIT_ULL(1)
+#define XHCI_NEC_HOST		BIT_ULL(2)
+#define XHCI_AMD_PLL_FIX	BIT_ULL(3)
+#define XHCI_SPURIOUS_SUCCESS	BIT_ULL(4)
 /*
  * Certain Intel host controllers have a limit to the number of endpoint
  * contexts they can handle.  Ideally, they would signal that they can't handle
@@ -1802,50 +1812,44 @@
  * commands, reset device commands, disable slot commands, and address device
  * commands.
  */
-#define XHCI_EP_LIMIT_QUIRK	(1 << 5)
-#define XHCI_BROKEN_MSI		(1 << 6)
-#define XHCI_RESET_ON_RESUME	(1 << 7)
-#define	XHCI_SW_BW_CHECKING	(1 << 8)
-#define XHCI_AMD_0x96_HOST	(1 << 9)
-#define XHCI_TRUST_TX_LENGTH	(1 << 10)
-#define XHCI_LPM_SUPPORT	(1 << 11)
-#define XHCI_INTEL_HOST		(1 << 12)
-#define XHCI_SPURIOUS_REBOOT	(1 << 13)
-#define XHCI_COMP_MODE_QUIRK	(1 << 14)
-#define XHCI_AVOID_BEI		(1 << 15)
-#define XHCI_PLAT		(1 << 16)
-#define XHCI_SLOW_SUSPEND	(1 << 17)
-#define XHCI_SPURIOUS_WAKEUP	(1 << 18)
+#define XHCI_EP_LIMIT_QUIRK	BIT_ULL(5)
+#define XHCI_BROKEN_MSI		BIT_ULL(6)
+#define XHCI_RESET_ON_RESUME	BIT_ULL(7)
+#define	XHCI_SW_BW_CHECKING	BIT_ULL(8)
+#define XHCI_AMD_0x96_HOST	BIT_ULL(9)
+#define XHCI_TRUST_TX_LENGTH	BIT_ULL(10)
+#define XHCI_LPM_SUPPORT	BIT_ULL(11)
+#define XHCI_INTEL_HOST		BIT_ULL(12)
+#define XHCI_SPURIOUS_REBOOT	BIT_ULL(13)
+#define XHCI_COMP_MODE_QUIRK	BIT_ULL(14)
+#define XHCI_AVOID_BEI		BIT_ULL(15)
+#define XHCI_PLAT		BIT_ULL(16)
+#define XHCI_SLOW_SUSPEND	BIT_ULL(17)
+#define XHCI_SPURIOUS_WAKEUP	BIT_ULL(18)
 /* For controllers with a broken beyond repair streams implementation */
-#define XHCI_BROKEN_STREAMS	(1 << 19)
-#define XHCI_PME_STUCK_QUIRK	(1 << 20)
-#define XHCI_MTK_HOST		(1 << 21)
-#define XHCI_SSIC_PORT_UNUSED	(1 << 22)
-#define XHCI_NO_64BIT_SUPPORT	(1 << 23)
-#define XHCI_MISSING_CAS	(1 << 24)
+#define XHCI_BROKEN_STREAMS	BIT_ULL(19)
+#define XHCI_PME_STUCK_QUIRK	BIT_ULL(20)
+#define XHCI_MTK_HOST		BIT_ULL(21)
+#define XHCI_SSIC_PORT_UNUSED	BIT_ULL(22)
+#define XHCI_NO_64BIT_SUPPORT	BIT_ULL(23)
+#define XHCI_MISSING_CAS	BIT_ULL(24)
 /* For controller with a broken Port Disable implementation */
-#define XHCI_BROKEN_PORT_PED	(1 << 25)
-#define XHCI_LIMIT_ENDPOINT_INTERVAL_7	(1 << 26)
-#define XHCI_U2_DISABLE_WAKE	(1 << 27)
-#define XHCI_ASMEDIA_MODIFY_FLOWCONTROL	(1 << 28)
-#define XHCI_HW_LPM_DISABLE	(1 << 29)
-#define XHCI_SUSPEND_DELAY	(1 << 30)
-#define XHCI_INTEL_USB_ROLE_SW	(1 << 31)
+#define XHCI_BROKEN_PORT_PED	BIT_ULL(25)
+#define XHCI_LIMIT_ENDPOINT_INTERVAL_7	BIT_ULL(26)
+#define XHCI_U2_DISABLE_WAKE	BIT_ULL(27)
+#define XHCI_ASMEDIA_MODIFY_FLOWCONTROL	BIT_ULL(28)
+#define XHCI_HW_LPM_DISABLE	BIT_ULL(29)
+#define XHCI_SUSPEND_DELAY	BIT_ULL(30)
+#define XHCI_INTEL_USB_ROLE_SW	BIT_ULL(31)
+#define XHCI_ZERO_64B_REGS	BIT_ULL(32)
 
 	unsigned int		num_active_eps;
 	unsigned int		limit_active_eps;
 	/* There are two roothubs to keep track of bus suspend info for */
 	struct xhci_bus_state   bus_state[2];
-	/* Is each xHCI roothub port a USB 3.0, USB 2.0, or USB 1.1 port? */
-	u8			*port_array;
-	/* Array of pointers to USB 3.0 PORTSC registers */
-	__le32 __iomem		**usb3_ports;
-	unsigned int		num_usb3_ports;
-	/* Array of pointers to USB 2.0 PORTSC registers */
-	__le32 __iomem		**usb2_ports;
+	struct xhci_port	*hw_ports;
 	struct xhci_hub		usb2_rhub;
 	struct xhci_hub		usb3_rhub;
-	unsigned int		num_usb2_ports;
 	/* support xHCI 0.96 spec USB2 software LPM */
 	unsigned		sw_lpm_support:1;
 	/* support xHCI 1.0 spec USB2 hardware LPM */
@@ -2091,14 +2095,16 @@
 unsigned int count_trbs(u64 addr, u64 len);
 
 /* xHCI roothub code */
-void xhci_set_link_state(struct xhci_hcd *xhci, __le32 __iomem **port_array,
-				int port_id, u32 link_state);
-void xhci_test_and_clear_bit(struct xhci_hcd *xhci, __le32 __iomem **port_array,
-				int port_id, u32 port_bit);
+void xhci_set_link_state(struct xhci_hcd *xhci, struct xhci_port *port,
+				u32 link_state);
+void xhci_test_and_clear_bit(struct xhci_hcd *xhci, struct xhci_port *port,
+				u32 port_bit);
 int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, u16 wIndex,
 		char *buf, u16 wLength);
 int xhci_hub_status_data(struct usb_hcd *hcd, char *buf);
 int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1);
+struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd);
+
 void xhci_hc_died(struct xhci_hcd *xhci);
 
 #ifdef CONFIG_PM
diff --git a/drivers/usb/isp1760/isp1760-core.c b/drivers/usb/isp1760/isp1760-core.c
index 05d2258..55b94fd 100644
--- a/drivers/usb/isp1760/isp1760-core.c
+++ b/drivers/usb/isp1760/isp1760-core.c
@@ -31,7 +31,7 @@
 	/* Low-level chip reset */
 	if (isp->rst_gpio) {
 		gpiod_set_value_cansleep(isp->rst_gpio, 1);
-		mdelay(50);
+		msleep(50);
 		gpiod_set_value_cansleep(isp->rst_gpio, 0);
 	}
 
diff --git a/drivers/usb/isp1760/isp1760-hcd.c b/drivers/usb/isp1760/isp1760-hcd.c
index 42672d6..1045521 100644
--- a/drivers/usb/isp1760/isp1760-hcd.c
+++ b/drivers/usb/isp1760/isp1760-hcd.c
@@ -2093,7 +2093,7 @@
 
 	isp1760_hub_control(hcd, ClearPortFeature, USB_PORT_FEAT_POWER,	1,
 			NULL, 0);
-	mdelay(20);
+	msleep(20);
 
 	spin_lock_irq(&priv->lock);
 	ehci_reset(hcd);
diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c
index 3e65bdc..f92c5df 100644
--- a/drivers/usb/misc/sisusbvga/sisusb.c
+++ b/drivers/usb/misc/sisusbvga/sisusb.c
@@ -2107,7 +2107,7 @@
 		bw = busSDR[(tmp8 & 0x03)];
 		break;
 	case 2:
-		ramtypetext1 = "asymmeric";
+		ramtypetext1 = "asymmetric";
 		sisusb->vramsize += sisusb->vramsize/2;
 		bw = busDDRA[(tmp8 & 0x03)];
 		break;
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 2761fad..34e866ad 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1227,7 +1227,7 @@
 /*
  * Map ring pages to user space.
  */
-static int mon_bin_vma_fault(struct vm_fault *vmf)
+static vm_fault_t mon_bin_vma_fault(struct vm_fault *vmf)
 {
 	struct mon_reader_bin *rp = vmf->vma->vm_private_data;
 	unsigned long offset, chunk_idx;
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index 984f7e1..bc5ecd5 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -700,7 +700,6 @@
 
 int mon_text_add(struct mon_bus *mbus, const struct usb_bus *ubus)
 {
-	struct dentry *d;
 	enum { NAMESZ = 10 };
 	char name[NAMESZ];
 	int busnum = ubus? ubus->busnum: 0;
@@ -713,42 +712,32 @@
 		rc = snprintf(name, NAMESZ, "%dt", busnum);
 		if (rc <= 0 || rc >= NAMESZ)
 			goto err_print_t;
-		d = debugfs_create_file(name, 0600, mon_dir, mbus,
+		mbus->dent_t = debugfs_create_file(name, 0600, mon_dir, mbus,
 							     &mon_fops_text_t);
-		if (d == NULL)
-			goto err_create_t;
-		mbus->dent_t = d;
 	}
 
 	rc = snprintf(name, NAMESZ, "%du", busnum);
 	if (rc <= 0 || rc >= NAMESZ)
 		goto err_print_u;
-	d = debugfs_create_file(name, 0600, mon_dir, mbus, &mon_fops_text_u);
-	if (d == NULL)
-		goto err_create_u;
-	mbus->dent_u = d;
+	mbus->dent_u = debugfs_create_file(name, 0600, mon_dir, mbus,
+					   &mon_fops_text_u);
 
 	rc = snprintf(name, NAMESZ, "%ds", busnum);
 	if (rc <= 0 || rc >= NAMESZ)
 		goto err_print_s;
-	d = debugfs_create_file(name, 0600, mon_dir, mbus, &mon_fops_stat);
-	if (d == NULL)
-		goto err_create_s;
-	mbus->dent_s = d;
+	mbus->dent_s = debugfs_create_file(name, 0600, mon_dir, mbus,
+					   &mon_fops_stat);
 
 	return 1;
 
-err_create_s:
 err_print_s:
 	debugfs_remove(mbus->dent_u);
 	mbus->dent_u = NULL;
-err_create_u:
 err_print_u:
 	if (ubus != NULL) {
 		debugfs_remove(mbus->dent_t);
 		mbus->dent_t = NULL;
 	}
-err_create_t:
 err_print_t:
 	return 0;
 }
@@ -756,8 +745,7 @@
 void mon_text_del(struct mon_bus *mbus)
 {
 	debugfs_remove(mbus->dent_u);
-	if (mbus->dent_t != NULL)
-		debugfs_remove(mbus->dent_t);
+	debugfs_remove(mbus->dent_t);
 	debugfs_remove(mbus->dent_s);
 }
 
@@ -775,18 +763,7 @@
 
 int __init mon_text_init(void)
 {
-	struct dentry *mondir;
-
-	mondir = debugfs_create_dir("usbmon", usb_debug_root);
-	if (IS_ERR(mondir)) {
-		/* debugfs not available, but we can use usbmon without it */
-		return 0;
-	}
-	if (mondir == NULL) {
-		printk(KERN_NOTICE TAG ": unable to create usbmon directory\n");
-		return -ENOMEM;
-	}
-	mon_dir = mondir;
+	mon_dir = debugfs_create_dir("usbmon", usb_debug_root);
 	return 0;
 }
 
diff --git a/drivers/usb/mtu3/Kconfig b/drivers/usb/mtu3/Kconfig
index 25cd619..40bbf1f 100644
--- a/drivers/usb/mtu3/Kconfig
+++ b/drivers/usb/mtu3/Kconfig
@@ -2,7 +2,7 @@
 
 config USB_MTU3
 	tristate "MediaTek USB3 Dual Role controller"
-	depends on EXTCON && (USB || USB_GADGET) && HAS_DMA
+	depends on USB || USB_GADGET
 	depends on ARCH_MEDIATEK || COMPILE_TEST
 	select USB_XHCI_MTK if USB_SUPPORT && USB_XHCI_HCD
 	help
@@ -40,6 +40,7 @@
 config USB_MTU3_DUAL_ROLE
 	bool "Dual Role mode"
 	depends on ((USB=y || USB=USB_MTU3) && (USB_GADGET=y || USB_GADGET=USB_MTU3))
+	depends on (EXTCON=y || EXTCON=USB_MTU3)
 	help
 	  This is the default mode of working of MTU3 controller where
 	  both host and gadget features are enabled.
diff --git a/drivers/usb/mtu3/mtu3.h b/drivers/usb/mtu3/mtu3.h
index 2cd00a2..87823ac 100644
--- a/drivers/usb/mtu3/mtu3.h
+++ b/drivers/usb/mtu3/mtu3.h
@@ -196,10 +196,12 @@
 * @vbus: vbus 5V used by host mode
 * @edev: external connector used to detect vbus and iddig changes
 * @vbus_nb: notifier for vbus detection
-* @vbus_nb: notifier for iddig(idpin) detection
-* @extcon_reg_dwork: delay work for extcon notifier register, waiting for
-*		xHCI driver initialization, it's necessary for system bootup
-*		as device.
+* @vbus_work : work of vbus detection notifier, used to avoid sleep in
+*		notifier callback which is atomic context
+* @vbus_event : event of vbus detecion notifier
+* @id_nb : notifier for iddig(idpin) detection
+* @id_work : work of iddig detection notifier
+* @id_event : event of iddig detecion notifier
 * @is_u3_drd: whether port0 supports usb3.0 dual-role device or not
 * @manual_drd_enabled: it's true when supports dual-role device by debugfs
 *		to switch host/device modes depending on user input.
@@ -208,8 +210,11 @@
 	struct regulator *vbus;
 	struct extcon_dev *edev;
 	struct notifier_block vbus_nb;
+	struct work_struct vbus_work;
+	unsigned long vbus_event;
 	struct notifier_block id_nb;
-	struct delayed_work extcon_reg_dwork;
+	struct work_struct id_work;
+	unsigned long id_event;
 	bool is_u3_drd;
 	bool manual_drd_enabled;
 };
diff --git a/drivers/usb/mtu3/mtu3_core.c b/drivers/usb/mtu3/mtu3_core.c
index b1b99a8..eecfd06 100644
--- a/drivers/usb/mtu3/mtu3_core.c
+++ b/drivers/usb/mtu3/mtu3_core.c
@@ -176,7 +176,7 @@
 	mtu3_writel(mbase, U3D_LV1IESR, value);
 
 	/* Enable U2 common USB interrupts */
-	value = SUSPEND_INTR | RESUME_INTR | RESET_INTR;
+	value = SUSPEND_INTR | RESUME_INTR | RESET_INTR | LPM_RESUME_INTR;
 	mtu3_writel(mbase, U3D_COMMON_USB_INTR_ENABLE, value);
 
 	if (mtu->is_u3_ip) {
@@ -195,6 +195,16 @@
 	mtu3_writel(mbase, U3D_DEV_LINK_INTR_ENABLE, SSUSB_DEV_SPEED_CHG_INTR);
 }
 
+/* reset: u2 - data toggle, u3 - SeqN, flow control status etc */
+static void mtu3_ep_reset(struct mtu3_ep *mep)
+{
+	struct mtu3 *mtu = mep->mtu;
+	u32 rst_bit = EP_RST(mep->is_in, mep->epnum);
+
+	mtu3_setbits(mtu->mac_base, U3D_EP_RST, rst_bit);
+	mtu3_clrbits(mtu->mac_base, U3D_EP_RST, rst_bit);
+}
+
 /* set/clear the stall and toggle bits for non-ep0 */
 void mtu3_ep_stall_set(struct mtu3_ep *mep, bool set)
 {
@@ -220,8 +230,7 @@
 	}
 
 	if (!set) {
-		mtu3_setbits(mbase, U3D_EP_RST, EP_RST(mep->is_in, epnum));
-		mtu3_clrbits(mbase, U3D_EP_RST, EP_RST(mep->is_in, epnum));
+		mtu3_ep_reset(mep);
 		mep->flags &= ~MTU3_EP_STALL;
 	} else {
 		mep->flags |= MTU3_EP_STALL;
@@ -400,6 +409,7 @@
 		mtu3_setbits(mbase, U3D_QIECR0, QMU_RX_DONE_INT(epnum));
 	}
 
+	mtu3_ep_reset(mep);
 	ep_fifo_free(mep);
 
 	dev_dbg(mtu->dev, "%s: %s\n", __func__, mep->name);
@@ -658,8 +668,10 @@
 	if (ltssm & (HOT_RST_INTR | WARM_RST_INTR))
 		mtu3_gadget_reset(mtu);
 
-	if (ltssm & VBUS_FALL_INTR)
+	if (ltssm & VBUS_FALL_INTR) {
 		mtu3_ss_func_set(mtu, false);
+		mtu3_gadget_reset(mtu);
+	}
 
 	if (ltssm & VBUS_RISE_INTR)
 		mtu3_ss_func_set(mtu, true);
@@ -692,6 +704,12 @@
 	if (u2comm & RESET_INTR)
 		mtu3_gadget_reset(mtu);
 
+	if (u2comm & LPM_RESUME_INTR) {
+		if (!(mtu3_readl(mbase, U3D_POWER_MANAGEMENT) & LPM_HRWE))
+			mtu3_setbits(mbase, U3D_USB20_MISC_CONTROL,
+				     LPM_U3_ACK_EN);
+	}
+
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/usb/mtu3/mtu3_dr.c b/drivers/usb/mtu3/mtu3_dr.c
index db7562d..ac60e9c 100644
--- a/drivers/usb/mtu3/mtu3_dr.c
+++ b/drivers/usb/mtu3/mtu3_dr.c
@@ -174,16 +174,40 @@
 	}
 }
 
+static void ssusb_id_work(struct work_struct *work)
+{
+	struct otg_switch_mtk *otg_sx =
+		container_of(work, struct otg_switch_mtk, id_work);
+
+	if (otg_sx->id_event)
+		ssusb_set_mailbox(otg_sx, MTU3_ID_GROUND);
+	else
+		ssusb_set_mailbox(otg_sx, MTU3_ID_FLOAT);
+}
+
+static void ssusb_vbus_work(struct work_struct *work)
+{
+	struct otg_switch_mtk *otg_sx =
+		container_of(work, struct otg_switch_mtk, vbus_work);
+
+	if (otg_sx->vbus_event)
+		ssusb_set_mailbox(otg_sx, MTU3_VBUS_VALID);
+	else
+		ssusb_set_mailbox(otg_sx, MTU3_VBUS_OFF);
+}
+
+/*
+ * @ssusb_id_notifier is called in atomic context, but @ssusb_set_mailbox
+ * may sleep, so use work queue here
+ */
 static int ssusb_id_notifier(struct notifier_block *nb,
 	unsigned long event, void *ptr)
 {
 	struct otg_switch_mtk *otg_sx =
 		container_of(nb, struct otg_switch_mtk, id_nb);
 
-	if (event)
-		ssusb_set_mailbox(otg_sx, MTU3_ID_GROUND);
-	else
-		ssusb_set_mailbox(otg_sx, MTU3_ID_FLOAT);
+	otg_sx->id_event = event;
+	schedule_work(&otg_sx->id_work);
 
 	return NOTIFY_DONE;
 }
@@ -194,10 +218,8 @@
 	struct otg_switch_mtk *otg_sx =
 		container_of(nb, struct otg_switch_mtk, vbus_nb);
 
-	if (event)
-		ssusb_set_mailbox(otg_sx, MTU3_VBUS_VALID);
-	else
-		ssusb_set_mailbox(otg_sx, MTU3_VBUS_OFF);
+	otg_sx->vbus_event = event;
+	schedule_work(&otg_sx->vbus_work);
 
 	return NOTIFY_DONE;
 }
@@ -238,15 +260,6 @@
 	return 0;
 }
 
-static void extcon_register_dwork(struct work_struct *work)
-{
-	struct delayed_work *dwork = to_delayed_work(work);
-	struct otg_switch_mtk *otg_sx =
-	    container_of(dwork, struct otg_switch_mtk, extcon_reg_dwork);
-
-	ssusb_extcon_register(otg_sx);
-}
-
 /*
  * We provide an interface via debugfs to switch between host and device modes
  * depending on user input.
@@ -365,10 +378,6 @@
 	struct dentry *root;
 
 	root = debugfs_create_dir(dev_name(ssusb->dev), usb_debug_root);
-	if (!root) {
-		dev_err(ssusb->dev, "create debugfs root failed\n");
-		return;
-	}
 	ssusb->dbgfs_root = root;
 
 	debugfs_create_file("mode", 0644, root, ssusb, &ssusb_mode_fops);
@@ -407,18 +416,13 @@
 {
 	struct otg_switch_mtk *otg_sx = &ssusb->otg_switch;
 
-	if (otg_sx->manual_drd_enabled) {
-		ssusb_debugfs_init(ssusb);
-	} else {
-		INIT_DELAYED_WORK(&otg_sx->extcon_reg_dwork,
-				  extcon_register_dwork);
+	INIT_WORK(&otg_sx->id_work, ssusb_id_work);
+	INIT_WORK(&otg_sx->vbus_work, ssusb_vbus_work);
 
-		/*
-		 * It is enough to delay 1s for waiting for
-		 * host initialization
-		 */
-		schedule_delayed_work(&otg_sx->extcon_reg_dwork, HZ);
-	}
+	if (otg_sx->manual_drd_enabled)
+		ssusb_debugfs_init(ssusb);
+	else
+		ssusb_extcon_register(otg_sx);
 
 	return 0;
 }
@@ -429,6 +433,7 @@
 
 	if (otg_sx->manual_drd_enabled)
 		ssusb_debugfs_exit(ssusb);
-	else
-		cancel_delayed_work(&otg_sx->extcon_reg_dwork);
+
+	cancel_work_sync(&otg_sx->id_work);
+	cancel_work_sync(&otg_sx->vbus_work);
 }
diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c
index f05f10f..5c60a8c 100644
--- a/drivers/usb/mtu3/mtu3_gadget.c
+++ b/drivers/usb/mtu3/mtu3_gadget.c
@@ -660,14 +660,10 @@
 	mtu3_gadget_init_eps(mtu);
 
 	ret = usb_add_gadget_udc(mtu->dev, &mtu->g);
-	if (ret) {
+	if (ret)
 		dev_err(mtu->dev, "failed to register udc\n");
-		return ret;
-	}
 
-	usb_gadget_set_state(&mtu->g, USB_STATE_NOTATTACHED);
-
-	return 0;
+	return ret;
 }
 
 void mtu3_gadget_cleanup(struct mtu3 *mtu)
@@ -723,4 +719,5 @@
 	mtu->u1_enable = 0;
 	mtu->u2_enable = 0;
 	mtu->delayed_status = false;
+	mtu->test_mode = false;
 }
diff --git a/drivers/usb/mtu3/mtu3_gadget_ep0.c b/drivers/usb/mtu3/mtu3_gadget_ep0.c
index ebdcf7a..25216e7 100644
--- a/drivers/usb/mtu3/mtu3_gadget_ep0.c
+++ b/drivers/usb/mtu3/mtu3_gadget_ep0.c
@@ -7,6 +7,7 @@
  * Author:  Chunfeng.Yun <chunfeng.yun@mediatek.com>
  */
 
+#include <linux/iopoll.h>
 #include <linux/usb/composite.h>
 
 #include "mtu3.h"
@@ -263,6 +264,7 @@
 {
 	void __iomem *mbase = mtu->mac_base;
 	int handled = 1;
+	u32 value;
 
 	switch (le16_to_cpu(setup->wIndex) >> 8) {
 	case TEST_J:
@@ -292,6 +294,14 @@
 	if (mtu->test_mode_nr == TEST_PACKET_MODE)
 		ep0_load_test_packet(mtu);
 
+	/* send status before entering test mode. */
+	value = mtu3_readl(mbase, U3D_EP0CSR) & EP0_W1C_BITS;
+	mtu3_writel(mbase, U3D_EP0CSR, value | EP0_SETUPPKTRDY | EP0_DATAEND);
+
+	/* wait for ACK status sent by host */
+	readl_poll_timeout_atomic(mbase + U3D_EP0CSR, value,
+			!(value & EP0_DATAEND), 100, 5000);
+
 	mtu3_writel(mbase, U3D_USB2_TEST_MODE, mtu->test_mode_nr);
 
 	mtu->ep0_state = MU3D_EP0_STATE_SETUP;
@@ -546,7 +556,7 @@
 	struct usb_request *req;
 	u32 csr;
 	u8 *src;
-	u8 count;
+	u32 count;
 	u32 maxp;
 
 	dev_dbg(mtu->dev, "%s\n", __func__);
diff --git a/drivers/usb/mtu3/mtu3_plat.c b/drivers/usb/mtu3/mtu3_plat.c
index 628d5ce..46551f6 100644
--- a/drivers/usb/mtu3/mtu3_plat.c
+++ b/drivers/usb/mtu3/mtu3_plat.c
@@ -447,8 +447,7 @@
  */
 static int __maybe_unused mtu3_suspend(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct ssusb_mtk *ssusb = platform_get_drvdata(pdev);
+	struct ssusb_mtk *ssusb = dev_get_drvdata(dev);
 
 	dev_dbg(dev, "%s\n", __func__);
 
@@ -466,8 +465,7 @@
 
 static int __maybe_unused mtu3_resume(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct ssusb_mtk *ssusb = platform_get_drvdata(pdev);
+	struct ssusb_mtk *ssusb = dev_get_drvdata(dev);
 	int ret;
 
 	dev_dbg(dev, "%s\n", __func__);
diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c
index 0ad664e..660641a 100644
--- a/drivers/usb/musb/am35x.c
+++ b/drivers/usb/musb/am35x.c
@@ -201,7 +201,6 @@
 	struct device *dev = musb->controller;
 	struct musb_hdrc_platform_data *plat = dev_get_platdata(dev);
 	struct omap_musb_board_data *data = plat->board_data;
-	struct usb_otg *otg = musb->xceiv->otg;
 	unsigned long flags;
 	irqreturn_t ret = IRQ_NONE;
 	u32 epintr, usbintr;
@@ -264,14 +263,12 @@
 			WARNING("VBUS error workaround (delay coming)\n");
 		} else if (drvvbus) {
 			MUSB_HST_MODE(musb);
-			otg->default_a = 1;
 			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			portstate(musb->port1_status |= USB_PORT_STAT_POWER);
 			del_timer(&musb->dev_timer);
 		} else {
 			musb->is_active = 0;
 			MUSB_DEV_MODE(musb);
-			otg->default_a = 0;
 			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 			portstate(musb->port1_status &= ~USB_PORT_STAT_POWER);
 		}
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index b8295ce..1c023c0 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -223,7 +223,6 @@
 {
 	struct musb		*musb = hci;
 	void __iomem		*reg_base = musb->ctrl_base;
-	struct usb_otg		*otg = musb->xceiv->otg;
 	unsigned long		flags;
 	irqreturn_t		ret = IRQ_NONE;
 	u32			status;
@@ -280,7 +279,6 @@
 			WARNING("VBUS error workaround (delay coming)\n");
 		} else if (drvvbus) {
 			MUSB_HST_MODE(musb);
-			otg->default_a = 1;
 			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			portstate(musb->port1_status |= USB_PORT_STAT_POWER);
 			del_timer(&musb->dev_timer);
@@ -295,7 +293,6 @@
 			 */
 			musb->is_active = 0;
 			MUSB_DEV_MODE(musb);
-			otg->default_a = 0;
 			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 			portstate(musb->port1_status &= ~USB_PORT_STAT_POWER);
 		}
diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c
index 2ad39dc..fb6bbd2 100644
--- a/drivers/usb/musb/davinci.c
+++ b/drivers/usb/musb/davinci.c
@@ -311,14 +311,12 @@
 			WARNING("VBUS error workaround (delay coming)\n");
 		} else if (drvvbus) {
 			MUSB_HST_MODE(musb);
-			otg->default_a = 1;
 			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			portstate(musb->port1_status |= USB_PORT_STAT_POWER);
 			del_timer(&musb->dev_timer);
 		} else {
 			musb->is_active = 0;
 			MUSB_DEV_MODE(musb);
-			otg->default_a = 0;
 			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 			portstate(musb->port1_status &= ~USB_PORT_STAT_POWER);
 		}
@@ -425,6 +423,9 @@
 
 static int davinci_musb_exit(struct musb *musb)
 {
+	int	maxdelay = 30;
+	u8	devctl, warn = 0;
+
 	del_timer_sync(&musb->dev_timer);
 
 	/* force VBUS off */
@@ -438,31 +439,27 @@
 
 	davinci_musb_source_power(musb, 0 /*off*/, 1);
 
-	/* delay, to avoid problems with module reload */
-	if (musb->xceiv->otg->default_a) {
-		int	maxdelay = 30;
-		u8	devctl, warn = 0;
+	/*
+	 * delay, to avoid problems with module reload.
+	 * if there's no peripheral connected, this can take a
+	 * long time to fall, especially on EVM with huge C133.
+	 */
+	do {
+		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
+		if (!(devctl & MUSB_DEVCTL_VBUS))
+			break;
+		if ((devctl & MUSB_DEVCTL_VBUS) != warn) {
+			warn = devctl & MUSB_DEVCTL_VBUS;
+			dev_dbg(musb->controller, "VBUS %d\n",
+				warn >> MUSB_DEVCTL_VBUS_SHIFT);
+		}
+		msleep(1000);
+		maxdelay--;
+	} while (maxdelay > 0);
 
-		/* if there's no peripheral connected, this can take a
-		 * long time to fall, especially on EVM with huge C133.
-		 */
-		do {
-			devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-			if (!(devctl & MUSB_DEVCTL_VBUS))
-				break;
-			if ((devctl & MUSB_DEVCTL_VBUS) != warn) {
-				warn = devctl & MUSB_DEVCTL_VBUS;
-				dev_dbg(musb->controller, "VBUS %d\n",
-					warn >> MUSB_DEVCTL_VBUS_SHIFT);
-			}
-			msleep(1000);
-			maxdelay--;
-		} while (maxdelay > 0);
-
-		/* in OTG mode, another host might be connected */
-		if (devctl & MUSB_DEVCTL_VBUS)
-			dev_dbg(musb->controller, "VBUS off timeout (devctl %02x)\n", devctl);
-	}
+	/* in OTG mode, another host might be connected */
+	if (devctl & MUSB_DEVCTL_VBUS)
+		dev_dbg(musb->controller, "VBUS off timeout (devctl %02x)\n", devctl);
 
 	phy_off();
 
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index fb5e452..b7d5627 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -274,20 +274,6 @@
 	__raw_writew(data, addr + offset);
 }
 
-static u32 musb_default_readl(const void __iomem *addr, unsigned offset)
-{
-	u32 data = __raw_readl(addr + offset);
-
-	trace_musb_readl(__builtin_return_address(0), addr, offset, data);
-	return data;
-}
-
-static void musb_default_writel(void __iomem *addr, unsigned offset, u32 data)
-{
-	trace_musb_writel(__builtin_return_address(0), addr, offset, data);
-	__raw_writel(data, addr + offset);
-}
-
 /*
  * Load an endpoint's FIFO
  */
@@ -390,10 +376,20 @@
 void (*musb_writew)(void __iomem *addr, unsigned offset, u16 data);
 EXPORT_SYMBOL_GPL(musb_writew);
 
-u32 (*musb_readl)(const void __iomem *addr, unsigned offset);
+u32 musb_readl(const void __iomem *addr, unsigned offset)
+{
+	u32 data = __raw_readl(addr + offset);
+
+	trace_musb_readl(__builtin_return_address(0), addr, offset, data);
+	return data;
+}
 EXPORT_SYMBOL_GPL(musb_readl);
 
-void (*musb_writel)(void __iomem *addr, unsigned offset, u32 data);
+void musb_writel(void __iomem *addr, unsigned offset, u32 data)
+{
+	trace_musb_writel(__builtin_return_address(0), addr, offset, data);
+	__raw_writel(data, addr + offset);
+}
 EXPORT_SYMBOL_GPL(musb_writel);
 
 #ifndef CONFIG_MUSB_PIO_ONLY
@@ -527,6 +523,383 @@
 
 static void musb_recover_from_babble(struct musb *musb);
 
+static void musb_handle_intr_resume(struct musb *musb, u8 devctl)
+{
+	musb_dbg(musb, "RESUME (%s)",
+			usb_otg_state_string(musb->xceiv->otg->state));
+
+	if (devctl & MUSB_DEVCTL_HM) {
+		switch (musb->xceiv->otg->state) {
+		case OTG_STATE_A_SUSPEND:
+			/* remote wakeup? */
+			musb->port1_status |=
+					(USB_PORT_STAT_C_SUSPEND << 16)
+					| MUSB_PORT_STAT_RESUME;
+			musb->rh_timer = jiffies
+				+ msecs_to_jiffies(USB_RESUME_TIMEOUT);
+			musb->xceiv->otg->state = OTG_STATE_A_HOST;
+			musb->is_active = 1;
+			musb_host_resume_root_hub(musb);
+			schedule_delayed_work(&musb->finish_resume_work,
+				msecs_to_jiffies(USB_RESUME_TIMEOUT));
+			break;
+		case OTG_STATE_B_WAIT_ACON:
+			musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+			musb->is_active = 1;
+			MUSB_DEV_MODE(musb);
+			break;
+		default:
+			WARNING("bogus %s RESUME (%s)\n",
+				"host",
+				usb_otg_state_string(musb->xceiv->otg->state));
+		}
+	} else {
+		switch (musb->xceiv->otg->state) {
+		case OTG_STATE_A_SUSPEND:
+			/* possibly DISCONNECT is upcoming */
+			musb->xceiv->otg->state = OTG_STATE_A_HOST;
+			musb_host_resume_root_hub(musb);
+			break;
+		case OTG_STATE_B_WAIT_ACON:
+		case OTG_STATE_B_PERIPHERAL:
+			/* disconnect while suspended?  we may
+			 * not get a disconnect irq...
+			 */
+			if ((devctl & MUSB_DEVCTL_VBUS)
+					!= (3 << MUSB_DEVCTL_VBUS_SHIFT)
+					) {
+				musb->int_usb |= MUSB_INTR_DISCONNECT;
+				musb->int_usb &= ~MUSB_INTR_SUSPEND;
+				break;
+			}
+			musb_g_resume(musb);
+			break;
+		case OTG_STATE_B_IDLE:
+			musb->int_usb &= ~MUSB_INTR_SUSPEND;
+			break;
+		default:
+			WARNING("bogus %s RESUME (%s)\n",
+				"peripheral",
+				usb_otg_state_string(musb->xceiv->otg->state));
+		}
+	}
+}
+
+/* return IRQ_HANDLED to tell the caller to return immediately */
+static irqreturn_t musb_handle_intr_sessreq(struct musb *musb, u8 devctl)
+{
+	void __iomem *mbase = musb->mregs;
+
+	if ((devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS
+			&& (devctl & MUSB_DEVCTL_BDEVICE)) {
+		musb_dbg(musb, "SessReq while on B state");
+		return IRQ_HANDLED;
+	}
+
+	musb_dbg(musb, "SESSION_REQUEST (%s)",
+		usb_otg_state_string(musb->xceiv->otg->state));
+
+	/* IRQ arrives from ID pin sense or (later, if VBUS power
+	 * is removed) SRP.  responses are time critical:
+	 *  - turn on VBUS (with silicon-specific mechanism)
+	 *  - go through A_WAIT_VRISE
+	 *  - ... to A_WAIT_BCON.
+	 * a_wait_vrise_tmout triggers VBUS_ERROR transitions
+	 */
+	musb_writeb(mbase, MUSB_DEVCTL, MUSB_DEVCTL_SESSION);
+	musb->ep0_stage = MUSB_EP0_START;
+	musb->xceiv->otg->state = OTG_STATE_A_IDLE;
+	MUSB_HST_MODE(musb);
+	musb_platform_set_vbus(musb, 1);
+
+	return IRQ_NONE;
+}
+
+static void musb_handle_intr_vbuserr(struct musb *musb, u8 devctl)
+{
+	int	ignore = 0;
+
+	/* During connection as an A-Device, we may see a short
+	 * current spikes causing voltage drop, because of cable
+	 * and peripheral capacitance combined with vbus draw.
+	 * (So: less common with truly self-powered devices, where
+	 * vbus doesn't act like a power supply.)
+	 *
+	 * Such spikes are short; usually less than ~500 usec, max
+	 * of ~2 msec.  That is, they're not sustained overcurrent
+	 * errors, though they're reported using VBUSERROR irqs.
+	 *
+	 * Workarounds:  (a) hardware: use self powered devices.
+	 * (b) software:  ignore non-repeated VBUS errors.
+	 *
+	 * REVISIT:  do delays from lots of DEBUG_KERNEL checks
+	 * make trouble here, keeping VBUS < 4.4V ?
+	 */
+	switch (musb->xceiv->otg->state) {
+	case OTG_STATE_A_HOST:
+		/* recovery is dicey once we've gotten past the
+		 * initial stages of enumeration, but if VBUS
+		 * stayed ok at the other end of the link, and
+		 * another reset is due (at least for high speed,
+		 * to redo the chirp etc), it might work OK...
+		 */
+	case OTG_STATE_A_WAIT_BCON:
+	case OTG_STATE_A_WAIT_VRISE:
+		if (musb->vbuserr_retry) {
+			void __iomem *mbase = musb->mregs;
+
+			musb->vbuserr_retry--;
+			ignore = 1;
+			devctl |= MUSB_DEVCTL_SESSION;
+			musb_writeb(mbase, MUSB_DEVCTL, devctl);
+		} else {
+			musb->port1_status |=
+				  USB_PORT_STAT_OVERCURRENT
+				| (USB_PORT_STAT_C_OVERCURRENT << 16);
+		}
+		break;
+	default:
+		break;
+	}
+
+	dev_printk(ignore ? KERN_DEBUG : KERN_ERR, musb->controller,
+			"VBUS_ERROR in %s (%02x, %s), retry #%d, port1 %08x\n",
+			usb_otg_state_string(musb->xceiv->otg->state),
+			devctl,
+			({ char *s;
+			switch (devctl & MUSB_DEVCTL_VBUS) {
+			case 0 << MUSB_DEVCTL_VBUS_SHIFT:
+				s = "<SessEnd"; break;
+			case 1 << MUSB_DEVCTL_VBUS_SHIFT:
+				s = "<AValid"; break;
+			case 2 << MUSB_DEVCTL_VBUS_SHIFT:
+				s = "<VBusValid"; break;
+			/* case 3 << MUSB_DEVCTL_VBUS_SHIFT: */
+			default:
+				s = "VALID"; break;
+			} s; }),
+			VBUSERR_RETRY_COUNT - musb->vbuserr_retry,
+			musb->port1_status);
+
+	/* go through A_WAIT_VFALL then start a new session */
+	if (!ignore)
+		musb_platform_set_vbus(musb, 0);
+}
+
+static void musb_handle_intr_suspend(struct musb *musb, u8 devctl)
+{
+	musb_dbg(musb, "SUSPEND (%s) devctl %02x",
+		usb_otg_state_string(musb->xceiv->otg->state), devctl);
+
+	switch (musb->xceiv->otg->state) {
+	case OTG_STATE_A_PERIPHERAL:
+		/* We also come here if the cable is removed, since
+		 * this silicon doesn't report ID-no-longer-grounded.
+		 *
+		 * We depend on T(a_wait_bcon) to shut us down, and
+		 * hope users don't do anything dicey during this
+		 * undesired detour through A_WAIT_BCON.
+		 */
+		musb_hnp_stop(musb);
+		musb_host_resume_root_hub(musb);
+		musb_root_disconnect(musb);
+		musb_platform_try_idle(musb, jiffies
+				+ msecs_to_jiffies(musb->a_wait_bcon
+					? : OTG_TIME_A_WAIT_BCON));
+
+		break;
+	case OTG_STATE_B_IDLE:
+		if (!musb->is_active)
+			break;
+		/* fall through */
+	case OTG_STATE_B_PERIPHERAL:
+		musb_g_suspend(musb);
+		musb->is_active = musb->g.b_hnp_enable;
+		if (musb->is_active) {
+			musb->xceiv->otg->state = OTG_STATE_B_WAIT_ACON;
+			musb_dbg(musb, "HNP: Setting timer for b_ase0_brst");
+			mod_timer(&musb->otg_timer, jiffies
+				+ msecs_to_jiffies(
+						OTG_TIME_B_ASE0_BRST));
+		}
+		break;
+	case OTG_STATE_A_WAIT_BCON:
+		if (musb->a_wait_bcon != 0)
+			musb_platform_try_idle(musb, jiffies
+				+ msecs_to_jiffies(musb->a_wait_bcon));
+		break;
+	case OTG_STATE_A_HOST:
+		musb->xceiv->otg->state = OTG_STATE_A_SUSPEND;
+		musb->is_active = musb->hcd->self.b_hnp_enable;
+		break;
+	case OTG_STATE_B_HOST:
+		/* Transition to B_PERIPHERAL, see 6.8.2.6 p 44 */
+		musb_dbg(musb, "REVISIT: SUSPEND as B_HOST");
+		break;
+	default:
+		/* "should not happen" */
+		musb->is_active = 0;
+		break;
+	}
+}
+
+static void musb_handle_intr_connect(struct musb *musb, u8 devctl, u8 int_usb)
+{
+	struct usb_hcd *hcd = musb->hcd;
+
+	musb->is_active = 1;
+	musb->ep0_stage = MUSB_EP0_START;
+
+	musb->intrtxe = musb->epmask;
+	musb_writew(musb->mregs, MUSB_INTRTXE, musb->intrtxe);
+	musb->intrrxe = musb->epmask & 0xfffe;
+	musb_writew(musb->mregs, MUSB_INTRRXE, musb->intrrxe);
+	musb_writeb(musb->mregs, MUSB_INTRUSBE, 0xf7);
+	musb->port1_status &= ~(USB_PORT_STAT_LOW_SPEED
+				|USB_PORT_STAT_HIGH_SPEED
+				|USB_PORT_STAT_ENABLE
+				);
+	musb->port1_status |= USB_PORT_STAT_CONNECTION
+				|(USB_PORT_STAT_C_CONNECTION << 16);
+
+	/* high vs full speed is just a guess until after reset */
+	if (devctl & MUSB_DEVCTL_LSDEV)
+		musb->port1_status |= USB_PORT_STAT_LOW_SPEED;
+
+	/* indicate new connection to OTG machine */
+	switch (musb->xceiv->otg->state) {
+	case OTG_STATE_B_PERIPHERAL:
+		if (int_usb & MUSB_INTR_SUSPEND) {
+			musb_dbg(musb, "HNP: SUSPEND+CONNECT, now b_host");
+			int_usb &= ~MUSB_INTR_SUSPEND;
+			goto b_host;
+		} else
+			musb_dbg(musb, "CONNECT as b_peripheral???");
+		break;
+	case OTG_STATE_B_WAIT_ACON:
+		musb_dbg(musb, "HNP: CONNECT, now b_host");
+b_host:
+		musb->xceiv->otg->state = OTG_STATE_B_HOST;
+		if (musb->hcd)
+			musb->hcd->self.is_b_host = 1;
+		del_timer(&musb->otg_timer);
+		break;
+	default:
+		if ((devctl & MUSB_DEVCTL_VBUS)
+				== (3 << MUSB_DEVCTL_VBUS_SHIFT)) {
+			musb->xceiv->otg->state = OTG_STATE_A_HOST;
+			if (hcd)
+				hcd->self.is_b_host = 0;
+		}
+		break;
+	}
+
+	musb_host_poke_root_hub(musb);
+
+	musb_dbg(musb, "CONNECT (%s) devctl %02x",
+			usb_otg_state_string(musb->xceiv->otg->state), devctl);
+}
+
+static void musb_handle_intr_disconnect(struct musb *musb, u8 devctl)
+{
+	musb_dbg(musb, "DISCONNECT (%s) as %s, devctl %02x",
+			usb_otg_state_string(musb->xceiv->otg->state),
+			MUSB_MODE(musb), devctl);
+
+	switch (musb->xceiv->otg->state) {
+	case OTG_STATE_A_HOST:
+	case OTG_STATE_A_SUSPEND:
+		musb_host_resume_root_hub(musb);
+		musb_root_disconnect(musb);
+		if (musb->a_wait_bcon != 0)
+			musb_platform_try_idle(musb, jiffies
+				+ msecs_to_jiffies(musb->a_wait_bcon));
+		break;
+	case OTG_STATE_B_HOST:
+		/* REVISIT this behaves for "real disconnect"
+		 * cases; make sure the other transitions from
+		 * from B_HOST act right too.  The B_HOST code
+		 * in hnp_stop() is currently not used...
+		 */
+		musb_root_disconnect(musb);
+		if (musb->hcd)
+			musb->hcd->self.is_b_host = 0;
+		musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+		MUSB_DEV_MODE(musb);
+		musb_g_disconnect(musb);
+		break;
+	case OTG_STATE_A_PERIPHERAL:
+		musb_hnp_stop(musb);
+		musb_root_disconnect(musb);
+		/* FALLTHROUGH */
+	case OTG_STATE_B_WAIT_ACON:
+		/* FALLTHROUGH */
+	case OTG_STATE_B_PERIPHERAL:
+	case OTG_STATE_B_IDLE:
+		musb_g_disconnect(musb);
+		break;
+	default:
+		WARNING("unhandled DISCONNECT transition (%s)\n",
+			usb_otg_state_string(musb->xceiv->otg->state));
+		break;
+	}
+}
+
+/*
+ * mentor saves a bit: bus reset and babble share the same irq.
+ * only host sees babble; only peripheral sees bus reset.
+ */
+static void musb_handle_intr_reset(struct musb *musb)
+{
+	if (is_host_active(musb)) {
+		/*
+		 * When BABBLE happens what we can depends on which
+		 * platform MUSB is running, because some platforms
+		 * implemented proprietary means for 'recovering' from
+		 * Babble conditions. One such platform is AM335x. In
+		 * most cases, however, the only thing we can do is
+		 * drop the session.
+		 */
+		dev_err(musb->controller, "Babble\n");
+		musb_recover_from_babble(musb);
+	} else {
+		musb_dbg(musb, "BUS RESET as %s",
+			usb_otg_state_string(musb->xceiv->otg->state));
+		switch (musb->xceiv->otg->state) {
+		case OTG_STATE_A_SUSPEND:
+			musb_g_reset(musb);
+			/* FALLTHROUGH */
+		case OTG_STATE_A_WAIT_BCON:	/* OPT TD.4.7-900ms */
+			/* never use invalid T(a_wait_bcon) */
+			musb_dbg(musb, "HNP: in %s, %d msec timeout",
+				usb_otg_state_string(musb->xceiv->otg->state),
+				TA_WAIT_BCON(musb));
+			mod_timer(&musb->otg_timer, jiffies
+				+ msecs_to_jiffies(TA_WAIT_BCON(musb)));
+			break;
+		case OTG_STATE_A_PERIPHERAL:
+			del_timer(&musb->otg_timer);
+			musb_g_reset(musb);
+			break;
+		case OTG_STATE_B_WAIT_ACON:
+			musb_dbg(musb, "HNP: RESET (%s), to b_peripheral",
+				usb_otg_state_string(musb->xceiv->otg->state));
+			musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+			musb_g_reset(musb);
+			break;
+		case OTG_STATE_B_IDLE:
+			musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+			/* FALLTHROUGH */
+		case OTG_STATE_B_PERIPHERAL:
+			musb_g_reset(musb);
+			break;
+		default:
+			musb_dbg(musb, "Unhandled BUS RESET as %s",
+				usb_otg_state_string(musb->xceiv->otg->state));
+		}
+	}
+}
+
 /*
  * Interrupt Service Routine to record USB "global" interrupts.
  * Since these do not happen often and signify things of
@@ -551,379 +924,40 @@
 	 * spurious RESUME irqs happen too, paired with SUSPEND.
 	 */
 	if (int_usb & MUSB_INTR_RESUME) {
+		musb_handle_intr_resume(musb, devctl);
 		handled = IRQ_HANDLED;
-		musb_dbg(musb, "RESUME (%s)",
-				usb_otg_state_string(musb->xceiv->otg->state));
-
-		if (devctl & MUSB_DEVCTL_HM) {
-			switch (musb->xceiv->otg->state) {
-			case OTG_STATE_A_SUSPEND:
-				/* remote wakeup? */
-				musb->port1_status |=
-						(USB_PORT_STAT_C_SUSPEND << 16)
-						| MUSB_PORT_STAT_RESUME;
-				musb->rh_timer = jiffies
-					+ msecs_to_jiffies(USB_RESUME_TIMEOUT);
-				musb->xceiv->otg->state = OTG_STATE_A_HOST;
-				musb->is_active = 1;
-				musb_host_resume_root_hub(musb);
-				schedule_delayed_work(&musb->finish_resume_work,
-					msecs_to_jiffies(USB_RESUME_TIMEOUT));
-				break;
-			case OTG_STATE_B_WAIT_ACON:
-				musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
-				musb->is_active = 1;
-				MUSB_DEV_MODE(musb);
-				break;
-			default:
-				WARNING("bogus %s RESUME (%s)\n",
-					"host",
-					usb_otg_state_string(musb->xceiv->otg->state));
-			}
-		} else {
-			switch (musb->xceiv->otg->state) {
-			case OTG_STATE_A_SUSPEND:
-				/* possibly DISCONNECT is upcoming */
-				musb->xceiv->otg->state = OTG_STATE_A_HOST;
-				musb_host_resume_root_hub(musb);
-				break;
-			case OTG_STATE_B_WAIT_ACON:
-			case OTG_STATE_B_PERIPHERAL:
-				/* disconnect while suspended?  we may
-				 * not get a disconnect irq...
-				 */
-				if ((devctl & MUSB_DEVCTL_VBUS)
-						!= (3 << MUSB_DEVCTL_VBUS_SHIFT)
-						) {
-					musb->int_usb |= MUSB_INTR_DISCONNECT;
-					musb->int_usb &= ~MUSB_INTR_SUSPEND;
-					break;
-				}
-				musb_g_resume(musb);
-				break;
-			case OTG_STATE_B_IDLE:
-				musb->int_usb &= ~MUSB_INTR_SUSPEND;
-				break;
-			default:
-				WARNING("bogus %s RESUME (%s)\n",
-					"peripheral",
-					usb_otg_state_string(musb->xceiv->otg->state));
-			}
-		}
 	}
 
 	/* see manual for the order of the tests */
 	if (int_usb & MUSB_INTR_SESSREQ) {
-		void __iomem *mbase = musb->mregs;
-
-		if ((devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS
-				&& (devctl & MUSB_DEVCTL_BDEVICE)) {
-			musb_dbg(musb, "SessReq while on B state");
+		if (musb_handle_intr_sessreq(musb, devctl))
 			return IRQ_HANDLED;
-		}
-
-		musb_dbg(musb, "SESSION_REQUEST (%s)",
-			usb_otg_state_string(musb->xceiv->otg->state));
-
-		/* IRQ arrives from ID pin sense or (later, if VBUS power
-		 * is removed) SRP.  responses are time critical:
-		 *  - turn on VBUS (with silicon-specific mechanism)
-		 *  - go through A_WAIT_VRISE
-		 *  - ... to A_WAIT_BCON.
-		 * a_wait_vrise_tmout triggers VBUS_ERROR transitions
-		 */
-		musb_writeb(mbase, MUSB_DEVCTL, MUSB_DEVCTL_SESSION);
-		musb->ep0_stage = MUSB_EP0_START;
-		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
-		MUSB_HST_MODE(musb);
-		musb_platform_set_vbus(musb, 1);
-
 		handled = IRQ_HANDLED;
 	}
 
 	if (int_usb & MUSB_INTR_VBUSERROR) {
-		int	ignore = 0;
-
-		/* During connection as an A-Device, we may see a short
-		 * current spikes causing voltage drop, because of cable
-		 * and peripheral capacitance combined with vbus draw.
-		 * (So: less common with truly self-powered devices, where
-		 * vbus doesn't act like a power supply.)
-		 *
-		 * Such spikes are short; usually less than ~500 usec, max
-		 * of ~2 msec.  That is, they're not sustained overcurrent
-		 * errors, though they're reported using VBUSERROR irqs.
-		 *
-		 * Workarounds:  (a) hardware: use self powered devices.
-		 * (b) software:  ignore non-repeated VBUS errors.
-		 *
-		 * REVISIT:  do delays from lots of DEBUG_KERNEL checks
-		 * make trouble here, keeping VBUS < 4.4V ?
-		 */
-		switch (musb->xceiv->otg->state) {
-		case OTG_STATE_A_HOST:
-			/* recovery is dicey once we've gotten past the
-			 * initial stages of enumeration, but if VBUS
-			 * stayed ok at the other end of the link, and
-			 * another reset is due (at least for high speed,
-			 * to redo the chirp etc), it might work OK...
-			 */
-		case OTG_STATE_A_WAIT_BCON:
-		case OTG_STATE_A_WAIT_VRISE:
-			if (musb->vbuserr_retry) {
-				void __iomem *mbase = musb->mregs;
-
-				musb->vbuserr_retry--;
-				ignore = 1;
-				devctl |= MUSB_DEVCTL_SESSION;
-				musb_writeb(mbase, MUSB_DEVCTL, devctl);
-			} else {
-				musb->port1_status |=
-					  USB_PORT_STAT_OVERCURRENT
-					| (USB_PORT_STAT_C_OVERCURRENT << 16);
-			}
-			break;
-		default:
-			break;
-		}
-
-		dev_printk(ignore ? KERN_DEBUG : KERN_ERR, musb->controller,
-				"VBUS_ERROR in %s (%02x, %s), retry #%d, port1 %08x\n",
-				usb_otg_state_string(musb->xceiv->otg->state),
-				devctl,
-				({ char *s;
-				switch (devctl & MUSB_DEVCTL_VBUS) {
-				case 0 << MUSB_DEVCTL_VBUS_SHIFT:
-					s = "<SessEnd"; break;
-				case 1 << MUSB_DEVCTL_VBUS_SHIFT:
-					s = "<AValid"; break;
-				case 2 << MUSB_DEVCTL_VBUS_SHIFT:
-					s = "<VBusValid"; break;
-				/* case 3 << MUSB_DEVCTL_VBUS_SHIFT: */
-				default:
-					s = "VALID"; break;
-				} s; }),
-				VBUSERR_RETRY_COUNT - musb->vbuserr_retry,
-				musb->port1_status);
-
-		/* go through A_WAIT_VFALL then start a new session */
-		if (!ignore)
-			musb_platform_set_vbus(musb, 0);
+		musb_handle_intr_vbuserr(musb, devctl);
 		handled = IRQ_HANDLED;
 	}
 
 	if (int_usb & MUSB_INTR_SUSPEND) {
-		musb_dbg(musb, "SUSPEND (%s) devctl %02x",
-			usb_otg_state_string(musb->xceiv->otg->state), devctl);
+		musb_handle_intr_suspend(musb, devctl);
 		handled = IRQ_HANDLED;
-
-		switch (musb->xceiv->otg->state) {
-		case OTG_STATE_A_PERIPHERAL:
-			/* We also come here if the cable is removed, since
-			 * this silicon doesn't report ID-no-longer-grounded.
-			 *
-			 * We depend on T(a_wait_bcon) to shut us down, and
-			 * hope users don't do anything dicey during this
-			 * undesired detour through A_WAIT_BCON.
-			 */
-			musb_hnp_stop(musb);
-			musb_host_resume_root_hub(musb);
-			musb_root_disconnect(musb);
-			musb_platform_try_idle(musb, jiffies
-					+ msecs_to_jiffies(musb->a_wait_bcon
-						? : OTG_TIME_A_WAIT_BCON));
-
-			break;
-		case OTG_STATE_B_IDLE:
-			if (!musb->is_active)
-				break;
-			/* fall through */
-		case OTG_STATE_B_PERIPHERAL:
-			musb_g_suspend(musb);
-			musb->is_active = musb->g.b_hnp_enable;
-			if (musb->is_active) {
-				musb->xceiv->otg->state = OTG_STATE_B_WAIT_ACON;
-				musb_dbg(musb, "HNP: Setting timer for b_ase0_brst");
-				mod_timer(&musb->otg_timer, jiffies
-					+ msecs_to_jiffies(
-							OTG_TIME_B_ASE0_BRST));
-			}
-			break;
-		case OTG_STATE_A_WAIT_BCON:
-			if (musb->a_wait_bcon != 0)
-				musb_platform_try_idle(musb, jiffies
-					+ msecs_to_jiffies(musb->a_wait_bcon));
-			break;
-		case OTG_STATE_A_HOST:
-			musb->xceiv->otg->state = OTG_STATE_A_SUSPEND;
-			musb->is_active = musb->hcd->self.b_hnp_enable;
-			break;
-		case OTG_STATE_B_HOST:
-			/* Transition to B_PERIPHERAL, see 6.8.2.6 p 44 */
-			musb_dbg(musb, "REVISIT: SUSPEND as B_HOST");
-			break;
-		default:
-			/* "should not happen" */
-			musb->is_active = 0;
-			break;
-		}
 	}
 
 	if (int_usb & MUSB_INTR_CONNECT) {
-		struct usb_hcd *hcd = musb->hcd;
-
+		musb_handle_intr_connect(musb, devctl, int_usb);
 		handled = IRQ_HANDLED;
-		musb->is_active = 1;
-
-		musb->ep0_stage = MUSB_EP0_START;
-
-		musb->intrtxe = musb->epmask;
-		musb_writew(musb->mregs, MUSB_INTRTXE, musb->intrtxe);
-		musb->intrrxe = musb->epmask & 0xfffe;
-		musb_writew(musb->mregs, MUSB_INTRRXE, musb->intrrxe);
-		musb_writeb(musb->mregs, MUSB_INTRUSBE, 0xf7);
-		musb->port1_status &= ~(USB_PORT_STAT_LOW_SPEED
-					|USB_PORT_STAT_HIGH_SPEED
-					|USB_PORT_STAT_ENABLE
-					);
-		musb->port1_status |= USB_PORT_STAT_CONNECTION
-					|(USB_PORT_STAT_C_CONNECTION << 16);
-
-		/* high vs full speed is just a guess until after reset */
-		if (devctl & MUSB_DEVCTL_LSDEV)
-			musb->port1_status |= USB_PORT_STAT_LOW_SPEED;
-
-		/* indicate new connection to OTG machine */
-		switch (musb->xceiv->otg->state) {
-		case OTG_STATE_B_PERIPHERAL:
-			if (int_usb & MUSB_INTR_SUSPEND) {
-				musb_dbg(musb, "HNP: SUSPEND+CONNECT, now b_host");
-				int_usb &= ~MUSB_INTR_SUSPEND;
-				goto b_host;
-			} else
-				musb_dbg(musb, "CONNECT as b_peripheral???");
-			break;
-		case OTG_STATE_B_WAIT_ACON:
-			musb_dbg(musb, "HNP: CONNECT, now b_host");
-b_host:
-			musb->xceiv->otg->state = OTG_STATE_B_HOST;
-			if (musb->hcd)
-				musb->hcd->self.is_b_host = 1;
-			del_timer(&musb->otg_timer);
-			break;
-		default:
-			if ((devctl & MUSB_DEVCTL_VBUS)
-					== (3 << MUSB_DEVCTL_VBUS_SHIFT)) {
-				musb->xceiv->otg->state = OTG_STATE_A_HOST;
-				if (hcd)
-					hcd->self.is_b_host = 0;
-			}
-			break;
-		}
-
-		musb_host_poke_root_hub(musb);
-
-		musb_dbg(musb, "CONNECT (%s) devctl %02x",
-				usb_otg_state_string(musb->xceiv->otg->state), devctl);
 	}
 
 	if (int_usb & MUSB_INTR_DISCONNECT) {
-		musb_dbg(musb, "DISCONNECT (%s) as %s, devctl %02x",
-				usb_otg_state_string(musb->xceiv->otg->state),
-				MUSB_MODE(musb), devctl);
+		musb_handle_intr_disconnect(musb, devctl);
 		handled = IRQ_HANDLED;
-
-		switch (musb->xceiv->otg->state) {
-		case OTG_STATE_A_HOST:
-		case OTG_STATE_A_SUSPEND:
-			musb_host_resume_root_hub(musb);
-			musb_root_disconnect(musb);
-			if (musb->a_wait_bcon != 0)
-				musb_platform_try_idle(musb, jiffies
-					+ msecs_to_jiffies(musb->a_wait_bcon));
-			break;
-		case OTG_STATE_B_HOST:
-			/* REVISIT this behaves for "real disconnect"
-			 * cases; make sure the other transitions from
-			 * from B_HOST act right too.  The B_HOST code
-			 * in hnp_stop() is currently not used...
-			 */
-			musb_root_disconnect(musb);
-			if (musb->hcd)
-				musb->hcd->self.is_b_host = 0;
-			musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
-			MUSB_DEV_MODE(musb);
-			musb_g_disconnect(musb);
-			break;
-		case OTG_STATE_A_PERIPHERAL:
-			musb_hnp_stop(musb);
-			musb_root_disconnect(musb);
-			/* FALLTHROUGH */
-		case OTG_STATE_B_WAIT_ACON:
-			/* FALLTHROUGH */
-		case OTG_STATE_B_PERIPHERAL:
-		case OTG_STATE_B_IDLE:
-			musb_g_disconnect(musb);
-			break;
-		default:
-			WARNING("unhandled DISCONNECT transition (%s)\n",
-				usb_otg_state_string(musb->xceiv->otg->state));
-			break;
-		}
 	}
 
-	/* mentor saves a bit: bus reset and babble share the same irq.
-	 * only host sees babble; only peripheral sees bus reset.
-	 */
 	if (int_usb & MUSB_INTR_RESET) {
+		musb_handle_intr_reset(musb);
 		handled = IRQ_HANDLED;
-		if (is_host_active(musb)) {
-			/*
-			 * When BABBLE happens what we can depends on which
-			 * platform MUSB is running, because some platforms
-			 * implemented proprietary means for 'recovering' from
-			 * Babble conditions. One such platform is AM335x. In
-			 * most cases, however, the only thing we can do is
-			 * drop the session.
-			 */
-			dev_err(musb->controller, "Babble\n");
-			musb_recover_from_babble(musb);
-		} else {
-			musb_dbg(musb, "BUS RESET as %s",
-				usb_otg_state_string(musb->xceiv->otg->state));
-			switch (musb->xceiv->otg->state) {
-			case OTG_STATE_A_SUSPEND:
-				musb_g_reset(musb);
-				/* FALLTHROUGH */
-			case OTG_STATE_A_WAIT_BCON:	/* OPT TD.4.7-900ms */
-				/* never use invalid T(a_wait_bcon) */
-				musb_dbg(musb, "HNP: in %s, %d msec timeout",
-					usb_otg_state_string(musb->xceiv->otg->state),
-					TA_WAIT_BCON(musb));
-				mod_timer(&musb->otg_timer, jiffies
-					+ msecs_to_jiffies(TA_WAIT_BCON(musb)));
-				break;
-			case OTG_STATE_A_PERIPHERAL:
-				del_timer(&musb->otg_timer);
-				musb_g_reset(musb);
-				break;
-			case OTG_STATE_B_WAIT_ACON:
-				musb_dbg(musb, "HNP: RESET (%s), to b_peripheral",
-					usb_otg_state_string(musb->xceiv->otg->state));
-				musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
-				musb_g_reset(musb);
-				break;
-			case OTG_STATE_B_IDLE:
-				musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
-				/* FALLTHROUGH */
-			case OTG_STATE_B_PERIPHERAL:
-				musb_g_reset(musb);
-				break;
-			default:
-				musb_dbg(musb, "Unhandled BUS RESET as %s",
-					usb_otg_state_string(musb->xceiv->otg->state));
-			}
-		}
 	}
 
 #if 0
@@ -1042,7 +1076,7 @@
 	 * (b) vbus present/connect IRQ, peripheral mode;
 	 * (c) peripheral initiates, using SRP
 	 */
-	if (musb->port_mode != MUSB_PORT_MODE_HOST &&
+	if (musb->port_mode != MUSB_HOST &&
 			musb->xceiv->otg->state != OTG_STATE_A_WAIT_BCON &&
 			(devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS) {
 		musb->is_active = 1;
@@ -1244,25 +1278,25 @@
 	/* REVISIT error check:  be sure ep0 can both rx and tx ... */
 	switch (cfg->style) {
 	case FIFO_TX:
-		musb_write_txfifosz(mbase, c_size);
-		musb_write_txfifoadd(mbase, c_off);
+		musb_writeb(mbase, MUSB_TXFIFOSZ, c_size);
+		musb_writew(mbase, MUSB_TXFIFOADD, c_off);
 		hw_ep->tx_double_buffered = !!(c_size & MUSB_FIFOSZ_DPB);
 		hw_ep->max_packet_sz_tx = maxpacket;
 		break;
 	case FIFO_RX:
-		musb_write_rxfifosz(mbase, c_size);
-		musb_write_rxfifoadd(mbase, c_off);
+		musb_writeb(mbase, MUSB_RXFIFOSZ, c_size);
+		musb_writew(mbase, MUSB_RXFIFOADD, c_off);
 		hw_ep->rx_double_buffered = !!(c_size & MUSB_FIFOSZ_DPB);
 		hw_ep->max_packet_sz_rx = maxpacket;
 		break;
 	case FIFO_RXTX:
-		musb_write_txfifosz(mbase, c_size);
-		musb_write_txfifoadd(mbase, c_off);
+		musb_writeb(mbase, MUSB_TXFIFOSZ, c_size);
+		musb_writew(mbase, MUSB_TXFIFOADD, c_off);
 		hw_ep->rx_double_buffered = !!(c_size & MUSB_FIFOSZ_DPB);
 		hw_ep->max_packet_sz_rx = maxpacket;
 
-		musb_write_rxfifosz(mbase, c_size);
-		musb_write_rxfifoadd(mbase, c_off);
+		musb_writeb(mbase, MUSB_RXFIFOSZ, c_size);
+		musb_writew(mbase, MUSB_RXFIFOADD, c_off);
 		hw_ep->tx_double_buffered = hw_ep->rx_double_buffered;
 		hw_ep->max_packet_sz_tx = maxpacket;
 
@@ -1470,7 +1504,7 @@
 	}
 
 	/* log release info */
-	musb->hwvers = musb_read_hwvers(mbase);
+	musb->hwvers = musb_readw(mbase, MUSB_HWVERS);
 	pr_debug("%s: %sHDRC RTL version %d.%d%s\n",
 		 musb_driver_name, type, MUSB_HWVERS_MAJOR(musb->hwvers),
 		 MUSB_HWVERS_MINOR(musb->hwvers),
@@ -1497,7 +1531,7 @@
 
 		hw_ep->fifo = musb->io.fifo_offset(i) + mbase;
 #if IS_ENABLED(CONFIG_USB_MUSB_TUSB6010)
-		if (musb->io.quirks & MUSB_IN_TUSB) {
+		if (musb->ops->quirks & MUSB_IN_TUSB) {
 			hw_ep->fifo_async = musb->async + 0x400 +
 				musb->io.fifo_offset(i);
 			hw_ep->fifo_sync = musb->sync + 0x400 +
@@ -2158,8 +2192,6 @@
 	musb_writeb = musb_default_writeb;
 	musb_readw = musb_default_readw;
 	musb_writew = musb_default_writew;
-	musb_readl = musb_default_readl;
-	musb_writel = musb_default_writel;
 
 	/* The musb_platform_init() call:
 	 *   - adjusts musb->mregs
@@ -2182,11 +2214,9 @@
 		goto fail2;
 	}
 
-	if (musb->ops->quirks)
-		musb->io.quirks = musb->ops->quirks;
 
 	/* Most devices use indexed offset or flat offset */
-	if (musb->io.quirks & MUSB_INDEXED_EP) {
+	if (musb->ops->quirks & MUSB_INDEXED_EP) {
 		musb->io.ep_offset = musb_indexed_ep_offset;
 		musb->io.ep_select = musb_indexed_ep_select;
 	} else {
@@ -2194,7 +2224,7 @@
 		musb->io.ep_select = musb_flat_ep_select;
 	}
 
-	if (musb->io.quirks & MUSB_G_NO_SKB_RESERVE)
+	if (musb->ops->quirks & MUSB_G_NO_SKB_RESERVE)
 		musb->g.quirk_avoids_skb_reserve = 1;
 
 	/* At least tusb6010 has its own offsets */
@@ -2226,10 +2256,6 @@
 		musb_readw = musb->ops->readw;
 	if (musb->ops->writew)
 		musb_writew = musb->ops->writew;
-	if (musb->ops->readl)
-		musb_readl = musb->ops->readl;
-	if (musb->ops->writel)
-		musb_writel = musb->ops->writel;
 
 #ifndef CONFIG_MUSB_PIO_ONLY
 	if (!musb->ops->dma_init || !musb->ops->dma_exit) {
@@ -2321,33 +2347,28 @@
 
 	/* program PHY to use external vBus if required */
 	if (plat->extvbus) {
-		u8 busctl = musb_read_ulpi_buscontrol(musb->mregs);
+		u8 busctl = musb_readb(musb->mregs, MUSB_ULPI_BUSCONTROL);
 		busctl |= MUSB_ULPI_USE_EXTVBUS;
-		musb_write_ulpi_buscontrol(musb->mregs, busctl);
+		musb_writeb(musb->mregs, MUSB_ULPI_BUSCONTROL, busctl);
 	}
 
-	if (musb->xceiv->otg->default_a) {
-		MUSB_HST_MODE(musb);
-		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
-	} else {
-		MUSB_DEV_MODE(musb);
-		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
-	}
+	MUSB_DEV_MODE(musb);
+	musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 
 	switch (musb->port_mode) {
-	case MUSB_PORT_MODE_HOST:
+	case MUSB_HOST:
 		status = musb_host_setup(musb, plat->power);
 		if (status < 0)
 			goto fail3;
 		status = musb_platform_set_mode(musb, MUSB_HOST);
 		break;
-	case MUSB_PORT_MODE_GADGET:
+	case MUSB_PERIPHERAL:
 		status = musb_gadget_setup(musb);
 		if (status < 0)
 			goto fail3;
 		status = musb_platform_set_mode(musb, MUSB_PERIPHERAL);
 		break;
-	case MUSB_PORT_MODE_DUAL_ROLE:
+	case MUSB_OTG:
 		status = musb_host_setup(musb, plat->power);
 		if (status < 0)
 			goto fail3;
@@ -2366,9 +2387,7 @@
 	if (status < 0)
 		goto fail3;
 
-	status = musb_init_debugfs(musb);
-	if (status < 0)
-		goto fail4;
+	musb_init_debugfs(musb);
 
 	status = sysfs_create_group(&musb->controller->kobj, &musb_attr_group);
 	if (status)
@@ -2383,7 +2402,6 @@
 fail5:
 	musb_exit_debugfs(musb);
 
-fail4:
 	musb_gadget_cleanup(musb);
 	musb_host_cleanup(musb);
 
@@ -2492,7 +2510,7 @@
 
 	musb->context.frame = musb_readw(musb_base, MUSB_FRAME);
 	musb->context.testmode = musb_readb(musb_base, MUSB_TESTMODE);
-	musb->context.busctl = musb_read_ulpi_buscontrol(musb->mregs);
+	musb->context.busctl = musb_readb(musb_base, MUSB_ULPI_BUSCONTROL);
 	musb->context.power = musb_readb(musb_base, MUSB_POWER);
 	musb->context.intrusbe = musb_readb(musb_base, MUSB_INTRUSBE);
 	musb->context.index = musb_readb(musb_base, MUSB_INDEX);
@@ -2521,13 +2539,13 @@
 
 		if (musb->dyn_fifo) {
 			musb->context.index_regs[i].txfifoadd =
-					musb_read_txfifoadd(musb_base);
+					musb_readw(musb_base, MUSB_TXFIFOADD);
 			musb->context.index_regs[i].rxfifoadd =
-					musb_read_rxfifoadd(musb_base);
+					musb_readw(musb_base, MUSB_RXFIFOADD);
 			musb->context.index_regs[i].txfifosz =
-					musb_read_txfifosz(musb_base);
+					musb_readb(musb_base, MUSB_TXFIFOSZ);
 			musb->context.index_regs[i].rxfifosz =
-					musb_read_rxfifosz(musb_base);
+					musb_readb(musb_base, MUSB_RXFIFOSZ);
 		}
 
 		musb->context.index_regs[i].txtype =
@@ -2564,7 +2582,7 @@
 
 	musb_writew(musb_base, MUSB_FRAME, musb->context.frame);
 	musb_writeb(musb_base, MUSB_TESTMODE, musb->context.testmode);
-	musb_write_ulpi_buscontrol(musb->mregs, musb->context.busctl);
+	musb_writeb(musb_base, MUSB_ULPI_BUSCONTROL, musb->context.busctl);
 
 	/* Don't affect SUSPENDM/RESUME bits in POWER reg */
 	power = musb_readb(musb_base, MUSB_POWER);
@@ -2601,13 +2619,13 @@
 			musb->context.index_regs[i].rxcsr);
 
 		if (musb->dyn_fifo) {
-			musb_write_txfifosz(musb_base,
+			musb_writeb(musb_base, MUSB_TXFIFOSZ,
 				musb->context.index_regs[i].txfifosz);
-			musb_write_rxfifosz(musb_base,
+			musb_writeb(musb_base, MUSB_RXFIFOSZ,
 				musb->context.index_regs[i].rxfifosz);
-			musb_write_txfifoadd(musb_base,
+			musb_writew(musb_base, MUSB_TXFIFOADD,
 				musb->context.index_regs[i].txfifoadd);
-			musb_write_rxfifoadd(musb_base,
+			musb_writew(musb_base, MUSB_RXFIFOADD,
 				musb->context.index_regs[i].rxfifoadd);
 		}
 
@@ -2657,7 +2675,7 @@
 		;
 	musb->flush_irq_work = false;
 
-	if (!(musb->io.quirks & MUSB_PRESERVE_SESSION))
+	if (!(musb->ops->quirks & MUSB_PRESERVE_SESSION))
 		musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 
 	WARN_ON(!list_empty(&musb->pending_list));
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index 8a74cb2..04203b7 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -53,12 +53,6 @@
 #define is_peripheral_active(m)		(!(m)->is_host)
 #define is_host_active(m)		((m)->is_host)
 
-enum {
-	MUSB_PORT_MODE_HOST	= 1,
-	MUSB_PORT_MODE_GADGET,
-	MUSB_PORT_MODE_DUAL_ROLE,
-};
-
 /****************************** CONSTANTS ********************************/
 
 #ifndef MUSB_C_NUM_EPS
@@ -127,8 +121,6 @@
  * @writeb:	write 8 bits
  * @readw:	read 16 bits
  * @writew:	write 16 bits
- * @readl:	read 32 bits
- * @writel:	write 32 bits
  * @read_fifo:	reads the fifo
  * @write_fifo:	writes to fifo
  * @dma_init:	platform specific dma init function
@@ -140,7 +132,6 @@
  * @recover:	platform-specific babble recovery
  * @vbus_status: returns vbus status if possible
  * @set_vbus:	forces vbus status
- * @adjust_channel_params: pre check for standard dma channel_program func
  * @pre_root_reset_end: called before the root usb port reset flag gets cleared
  * @post_root_reset_end: called after the root usb port reset flag gets cleared
  * @phy_callback: optional callback function for the phy to call
@@ -174,8 +165,6 @@
 	void	(*writeb)(void __iomem *addr, unsigned offset, u8 data);
 	u16	(*readw)(const void __iomem *addr, unsigned offset);
 	void	(*writew)(void __iomem *addr, unsigned offset, u16 data);
-	u32	(*readl)(const void __iomem *addr, unsigned offset);
-	void	(*writel)(void __iomem *addr, unsigned offset, u32 data);
 	void	(*read_fifo)(struct musb_hw_ep *hw_ep, u16 len, u8 *buf);
 	void	(*write_fifo)(struct musb_hw_ep *hw_ep, u16 len, const u8 *buf);
 	struct dma_controller *
@@ -188,9 +177,6 @@
 	int	(*vbus_status)(struct musb *musb);
 	void	(*set_vbus)(struct musb *musb, int on);
 
-	int	(*adjust_channel_params)(struct dma_channel *channel,
-				u16 packet_sz, u8 *mode,
-				dma_addr_t *dma_addr, u32 *len);
 	void	(*pre_root_reset_end)(struct musb *musb);
 	void	(*post_root_reset_end)(struct musb *musb);
 	int	(*phy_callback)(enum musb_vbus_id_status status);
@@ -359,7 +345,7 @@
 
 	u8			min_power;	/* vbus for periph, in mA/2 */
 
-	int			port_mode;	/* MUSB_PORT_MODE_* */
+	enum musb_mode		port_mode;
 	bool			session;
 	unsigned long		quirk_retries;
 	bool			is_host;
diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c
index d0dd4f4..7fbb8a3 100644
--- a/drivers/usb/musb/musb_cppi41.c
+++ b/drivers/usb/musb/musb_cppi41.c
@@ -614,7 +614,7 @@
 	}
 
 	/* DA8xx Advisory 2.3.27: wait 250 ms before to start the teardown */
-	if (musb->io.quirks & MUSB_DA8XX)
+	if (musb->ops->quirks & MUSB_DA8XX)
 		mdelay(250);
 
 	tdbit = 1 << cppi41_channel->port_num;
@@ -773,7 +773,7 @@
 	controller->controller.is_compatible = cppi41_is_compatible;
 	controller->controller.musb = musb;
 
-	if (musb->io.quirks & MUSB_DA8XX) {
+	if (musb->ops->quirks & MUSB_DA8XX) {
 		controller->tdown_reg = DA8XX_USB_TEARDOWN;
 		controller->autoreq_reg = DA8XX_USB_AUTOREQ;
 		controller->set_dma_mode = da8xx_set_dma_mode;
diff --git a/drivers/usb/musb/musb_debug.h b/drivers/usb/musb/musb_debug.h
index 5e0f079..c444a80 100644
--- a/drivers/usb/musb/musb_debug.h
+++ b/drivers/usb/musb/musb_debug.h
@@ -20,12 +20,11 @@
 void musb_dbg(struct musb *musb, const char *fmt, ...);
 
 #ifdef CONFIG_DEBUG_FS
-int musb_init_debugfs(struct musb *musb);
+void musb_init_debugfs(struct musb *musb);
 void musb_exit_debugfs(struct musb *musb);
 #else
-static inline int musb_init_debugfs(struct musb *musb)
+static inline void musb_init_debugfs(struct musb *musb)
 {
-	return 0;
 }
 static inline void musb_exit_debugfs(struct musb *musb)
 {
diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c
index e2050ca..f42858e2 100644
--- a/drivers/usb/musb/musb_debugfs.c
+++ b/drivers/usb/musb/musb_debugfs.c
@@ -321,48 +321,18 @@
 	.release		= single_release,
 };
 
-int musb_init_debugfs(struct musb *musb)
+void musb_init_debugfs(struct musb *musb)
 {
-	struct dentry		*root;
-	struct dentry		*file;
-	int			ret;
+	struct dentry *root;
 
 	root = debugfs_create_dir(dev_name(musb->controller), NULL);
-	if (!root) {
-		ret = -ENOMEM;
-		goto err0;
-	}
-
-	file = debugfs_create_file("regdump", S_IRUGO, root, musb,
-			&musb_regdump_fops);
-	if (!file) {
-		ret = -ENOMEM;
-		goto err1;
-	}
-
-	file = debugfs_create_file("testmode", S_IRUGO | S_IWUSR,
-			root, musb, &musb_test_mode_fops);
-	if (!file) {
-		ret = -ENOMEM;
-		goto err1;
-	}
-
-	file = debugfs_create_file("softconnect", S_IRUGO | S_IWUSR,
-			root, musb, &musb_softconnect_fops);
-	if (!file) {
-		ret = -ENOMEM;
-		goto err1;
-	}
-
 	musb->debugfs_root = root;
 
-	return 0;
-
-err1:
-	debugfs_remove_recursive(root);
-
-err0:
-	return ret;
+	debugfs_create_file("regdump", S_IRUGO, root, musb, &musb_regdump_fops);
+	debugfs_create_file("testmode", S_IRUGO | S_IWUSR, root, musb,
+			    &musb_test_mode_fops);
+	debugfs_create_file("softconnect", S_IRUGO | S_IWUSR, root, musb,
+			    &musb_softconnect_fops);
 }
 
 void /* __init_or_exit */ musb_exit_debugfs(struct musb *musb)
diff --git a/drivers/usb/musb/musb_dma.h b/drivers/usb/musb/musb_dma.h
index 0fc8cd0..8f60271 100644
--- a/drivers/usb/musb/musb_dma.h
+++ b/drivers/usb/musb/musb_dma.h
@@ -44,31 +44,31 @@
 #endif
 
 #ifdef CONFIG_USB_UX500_DMA
-#define musb_dma_ux500(musb)		(musb->io.quirks & MUSB_DMA_UX500)
+#define musb_dma_ux500(musb)		(musb->ops->quirks & MUSB_DMA_UX500)
 #else
 #define musb_dma_ux500(musb)		0
 #endif
 
 #ifdef CONFIG_USB_TI_CPPI41_DMA
-#define musb_dma_cppi41(musb)		(musb->io.quirks & MUSB_DMA_CPPI41)
+#define musb_dma_cppi41(musb)		(musb->ops->quirks & MUSB_DMA_CPPI41)
 #else
 #define musb_dma_cppi41(musb)		0
 #endif
 
 #ifdef CONFIG_USB_TI_CPPI_DMA
-#define musb_dma_cppi(musb)		(musb->io.quirks & MUSB_DMA_CPPI)
+#define musb_dma_cppi(musb)		(musb->ops->quirks & MUSB_DMA_CPPI)
 #else
 #define musb_dma_cppi(musb)		0
 #endif
 
 #ifdef CONFIG_USB_TUSB_OMAP_DMA
-#define tusb_dma_omap(musb)		(musb->io.quirks & MUSB_DMA_TUSB_OMAP)
+#define tusb_dma_omap(musb)		(musb->ops->quirks & MUSB_DMA_TUSB_OMAP)
 #else
 #define tusb_dma_omap(musb)		0
 #endif
 
 #ifdef CONFIG_USB_INVENTRA_DMA
-#define musb_dma_inventra(musb)		(musb->io.quirks & MUSB_DMA_INVENTRA)
+#define musb_dma_inventra(musb)		(musb->ops->quirks & MUSB_DMA_INVENTRA)
 #else
 #define musb_dma_inventra(musb)		0
 #endif
diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index 6a60bc0..fb871ea 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -183,7 +183,7 @@
 	musb_writel(reg_base, wrp->coreintr_set, coremask);
 	/* start polling for ID change in dual-role idle mode */
 	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE &&
-			musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
+			musb->port_mode == MUSB_OTG)
 		dsps_mod_timer(glue, -1);
 }
 
@@ -231,7 +231,7 @@
 		break;
 	case OTG_STATE_A_WAIT_BCON:
 		/* keep VBUS on for host-only mode */
-		if (musb->port_mode == MUSB_PORT_MODE_HOST) {
+		if (musb->port_mode == MUSB_HOST) {
 			dsps_mod_timer_optional(glue);
 			break;
 		}
@@ -360,13 +360,11 @@
 			WARNING("VBUS error workaround (delay coming)\n");
 		} else if (drvvbus) {
 			MUSB_HST_MODE(musb);
-			musb->xceiv->otg->default_a = 1;
 			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			dsps_mod_timer_optional(glue);
 		} else {
 			musb->is_active = 0;
 			MUSB_DEV_MODE(musb);
-			musb->xceiv->otg->default_a = 0;
 			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		}
 
@@ -401,24 +399,17 @@
 static int dsps_musb_dbg_init(struct musb *musb, struct dsps_glue *glue)
 {
 	struct dentry *root;
-	struct dentry *file;
 	char buf[128];
 
 	sprintf(buf, "%s.dsps", dev_name(musb->controller));
 	root = debugfs_create_dir(buf, NULL);
-	if (!root)
-		return -ENOMEM;
 	glue->dbgfs_root = root;
 
 	glue->regset.regs = dsps_musb_regs;
 	glue->regset.nregs = ARRAY_SIZE(dsps_musb_regs);
 	glue->regset.base = musb->ctrl_base;
 
-	file = debugfs_create_regset32("regdump", S_IRUGO, root, &glue->regset);
-	if (!file) {
-		debugfs_remove_recursive(root);
-		return -ENOMEM;
-	}
+	debugfs_create_regset32("regdump", S_IRUGO, root, &glue->regset);
 	return 0;
 }
 
@@ -729,25 +720,6 @@
 	return val;
 }
 
-static int get_musb_port_mode(struct device *dev)
-{
-	enum usb_dr_mode mode;
-
-	mode = usb_get_dr_mode(dev);
-	switch (mode) {
-	case USB_DR_MODE_HOST:
-		return MUSB_PORT_MODE_HOST;
-
-	case USB_DR_MODE_PERIPHERAL:
-		return MUSB_PORT_MODE_GADGET;
-
-	case USB_DR_MODE_UNKNOWN:
-	case USB_DR_MODE_OTG:
-	default:
-		return MUSB_PORT_MODE_DUAL_ROLE;
-	}
-}
-
 static int dsps_create_musb_pdev(struct dsps_glue *glue,
 		struct platform_device *parent)
 {
@@ -786,6 +758,7 @@
 	musb->dev.parent		= dev;
 	musb->dev.dma_mask		= &musb_dmamask;
 	musb->dev.coherent_dma_mask	= musb_dmamask;
+	device_set_of_node_from_dev(&musb->dev, &parent->dev);
 
 	glue->musb = musb;
 
@@ -807,7 +780,7 @@
 	config->num_eps = get_int_prop(dn, "mentor,num-eps");
 	config->ram_bits = get_int_prop(dn, "mentor,ram-bits");
 	config->host_port_deassert_reset_at_resume = 1;
-	pdata.mode = get_musb_port_mode(dev);
+	pdata.mode = musb_get_mode(dev);
 	/* DT keeps this entry in mA, musb expects it as per USB spec */
 	pdata.power = get_int_prop(dn, "mentor,power") / 2;
 
@@ -1047,7 +1020,7 @@
 	musb_writel(mbase, wrp->tx_mode, glue->context.tx_mode);
 	musb_writel(mbase, wrp->rx_mode, glue->context.rx_mode);
 	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE &&
-	    musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
+	    musb->port_mode == MUSB_OTG)
 		dsps_mod_timer(glue, -1);
 
 	pm_runtime_put(dev);
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 71c5835..eae8b1b 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1794,16 +1794,12 @@
 	musb->g.speed = USB_SPEED_UNKNOWN;
 
 	MUSB_DEV_MODE(musb);
-	musb->xceiv->otg->default_a = 0;
 	musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 
 	/* this "gadget" abstracts/virtualizes the controller */
 	musb->g.name = musb_driver_name;
-#if IS_ENABLED(CONFIG_USB_MUSB_DUAL_ROLE)
-	musb->g.is_otg = 1;
-#elif IS_ENABLED(CONFIG_USB_MUSB_GADGET)
+	/* don't support otg protocols */
 	musb->g.is_otg = 0;
-#endif
 	INIT_DELAYED_WORK(&musb->gadget_work, musb_gadget_work);
 	musb_g_init_endpoints(musb);
 
@@ -1823,7 +1819,7 @@
 
 void musb_gadget_cleanup(struct musb *musb)
 {
-	if (musb->port_mode == MUSB_PORT_MODE_HOST)
+	if (musb->port_mode == MUSB_HOST)
 		return;
 
 	cancel_delayed_work_sync(&musb->gadget_work);
diff --git a/drivers/usb/musb/musb_gadget.h b/drivers/usb/musb/musb_gadget.h
index 9c34aca..d026636 100644
--- a/drivers/usb/musb/musb_gadget.h
+++ b/drivers/usb/musb/musb_gadget.h
@@ -60,10 +60,7 @@
 	enum buffer_map_state map_state;
 };
 
-static inline struct musb_request *to_musb_request(struct usb_request *req)
-{
-	return req ? container_of(req, struct musb_request, request) : NULL;
-}
+#define to_musb_request(r)	container_of((r), struct musb_request, request)
 
 extern struct usb_request *
 musb_alloc_request(struct usb_ep *ep, gfp_t gfp_flags);
@@ -99,10 +96,7 @@
 	u8				hb_mult;
 };
 
-static inline struct musb_ep *to_musb_ep(struct usb_ep *ep)
-{
-	return ep ? container_of(ep, struct musb_ep, end_point) : NULL;
-}
+#define to_musb_ep(ep)	container_of((ep), struct musb_ep, end_point)
 
 static inline struct musb_request *next_request(struct musb_ep *ep)
 {
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 15a42ce..8000c7c 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2735,7 +2735,7 @@
 
 void musb_host_cleanup(struct musb *musb)
 {
-	if (musb->port_mode == MUSB_PORT_MODE_GADGET)
+	if (musb->port_mode == MUSB_PERIPHERAL)
 		return;
 	usb_remove_hcd(musb->hcd);
 }
@@ -2750,13 +2750,13 @@
 	int ret;
 	struct usb_hcd *hcd = musb->hcd;
 
-	if (musb->port_mode == MUSB_PORT_MODE_HOST) {
+	if (musb->port_mode == MUSB_HOST) {
 		MUSB_HST_MODE(musb);
-		musb->xceiv->otg->default_a = 1;
 		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 	}
 	otg_set_host(musb->xceiv->otg, &hcd->self);
-	hcd->self.otg_port = 1;
+	/* don't support otg protocols */
+	hcd->self.otg_port = 0;
 	musb->xceiv->otg->host = &hcd->self;
 	hcd->power_budget = 2 * (power_budget ? : 250);
 	hcd->skip_phy_initialization = 1;
diff --git a/drivers/usb/musb/musb_io.h b/drivers/usb/musb/musb_io.h
index b7025b2..8058a58 100644
--- a/drivers/usb/musb/musb_io.h
+++ b/drivers/usb/musb/musb_io.h
@@ -16,7 +16,6 @@
 
 /**
  * struct musb_io - IO functions for MUSB
- * @quirks:	platform specific flags
  * @ep_offset:	platform specific function to get end point offset
  * @ep_select:	platform specific function to select end point
  * @fifo_offset: platform specific function to get fifo offset
@@ -25,7 +24,6 @@
  * @busctl_offset: platform specific function to get busctl offset
  */
 struct musb_io {
-	u32	quirks;
 	u32	(*ep_offset)(u8 epnum, u16 offset);
 	void	(*ep_select)(void __iomem *mbase, u8 epnum);
 	u32	(*fifo_offset)(u8 epnum);
@@ -39,7 +37,7 @@
 extern void (*musb_writeb)(void __iomem *addr, unsigned offset, u8 data);
 extern u16 (*musb_readw)(const void __iomem *addr, unsigned offset);
 extern void (*musb_writew)(void __iomem *addr, unsigned offset, u16 data);
-extern u32 (*musb_readl)(const void __iomem *addr, unsigned offset);
-extern void (*musb_writel)(void __iomem *addr, unsigned offset, u32 data);
+extern u32 musb_readl(const void __iomem *addr, unsigned offset);
+extern void musb_writel(void __iomem *addr, unsigned offset, u32 data);
 
 #endif
diff --git a/drivers/usb/musb/musb_regs.h b/drivers/usb/musb/musb_regs.h
index 8846662..5cd7264 100644
--- a/drivers/usb/musb/musb_regs.h
+++ b/drivers/usb/musb/musb_regs.h
@@ -273,67 +273,12 @@
 #define MUSB_RXHUBADDR		0x06
 #define MUSB_RXHUBPORT		0x07
 
-static inline void musb_write_txfifosz(void __iomem *mbase, u8 c_size)
-{
-	musb_writeb(mbase, MUSB_TXFIFOSZ, c_size);
-}
-
-static inline void musb_write_txfifoadd(void __iomem *mbase, u16 c_off)
-{
-	musb_writew(mbase, MUSB_TXFIFOADD, c_off);
-}
-
-static inline void musb_write_rxfifosz(void __iomem *mbase, u8 c_size)
-{
-	musb_writeb(mbase, MUSB_RXFIFOSZ, c_size);
-}
-
-static inline void  musb_write_rxfifoadd(void __iomem *mbase, u16 c_off)
-{
-	musb_writew(mbase, MUSB_RXFIFOADD, c_off);
-}
-
-static inline void musb_write_ulpi_buscontrol(void __iomem *mbase, u8 val)
-{
-	musb_writeb(mbase, MUSB_ULPI_BUSCONTROL, val);
-}
-
-static inline u8 musb_read_txfifosz(void __iomem *mbase)
-{
-	return musb_readb(mbase, MUSB_TXFIFOSZ);
-}
-
-static inline u16 musb_read_txfifoadd(void __iomem *mbase)
-{
-	return musb_readw(mbase, MUSB_TXFIFOADD);
-}
-
-static inline u8 musb_read_rxfifosz(void __iomem *mbase)
-{
-	return musb_readb(mbase, MUSB_RXFIFOSZ);
-}
-
-static inline u16  musb_read_rxfifoadd(void __iomem *mbase)
-{
-	return musb_readw(mbase, MUSB_RXFIFOADD);
-}
-
-static inline u8 musb_read_ulpi_buscontrol(void __iomem *mbase)
-{
-	return musb_readb(mbase, MUSB_ULPI_BUSCONTROL);
-}
-
 static inline u8 musb_read_configdata(void __iomem *mbase)
 {
 	musb_writeb(mbase, MUSB_INDEX, 0);
 	return musb_readb(mbase, 0x10 + MUSB_CONFIGDATA);
 }
 
-static inline u16 musb_read_hwvers(void __iomem *mbase)
-{
-	return musb_readw(mbase, MUSB_HWVERS);
-}
-
 static inline void musb_write_rxfunaddr(struct musb *musb, u8 epnum,
 		u8 qh_addr_reg)
 {
diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c
index 2f8dd98..a84ec27 100644
--- a/drivers/usb/musb/musb_virthub.c
+++ b/drivers/usb/musb/musb_virthub.c
@@ -254,7 +254,7 @@
 #ifdef CONFIG_USB_MUSB_HOST
 	return 1;
 #else
-	return musb->port_mode == MUSB_PORT_MODE_HOST;
+	return musb->port_mode == MUSB_HOST;
 #endif
 }
 
diff --git a/drivers/usb/musb/musbhsdma.c b/drivers/usb/musb/musbhsdma.c
index 4389fc3..a688f7f 100644
--- a/drivers/usb/musb/musbhsdma.c
+++ b/drivers/usb/musb/musbhsdma.c
@@ -10,7 +10,71 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include "musb_core.h"
-#include "musbhsdma.h"
+
+#define MUSB_HSDMA_BASE		0x200
+#define MUSB_HSDMA_INTR		(MUSB_HSDMA_BASE + 0)
+#define MUSB_HSDMA_CONTROL		0x4
+#define MUSB_HSDMA_ADDRESS		0x8
+#define MUSB_HSDMA_COUNT		0xc
+
+#define MUSB_HSDMA_CHANNEL_OFFSET(_bchannel, _offset)		\
+		(MUSB_HSDMA_BASE + (_bchannel << 4) + _offset)
+
+#define musb_read_hsdma_addr(mbase, bchannel)	\
+	musb_readl(mbase,	\
+		   MUSB_HSDMA_CHANNEL_OFFSET(bchannel, MUSB_HSDMA_ADDRESS))
+
+#define musb_write_hsdma_addr(mbase, bchannel, addr) \
+	musb_writel(mbase, \
+		    MUSB_HSDMA_CHANNEL_OFFSET(bchannel, MUSB_HSDMA_ADDRESS), \
+		    addr)
+
+#define musb_read_hsdma_count(mbase, bchannel)	\
+	musb_readl(mbase,	\
+		   MUSB_HSDMA_CHANNEL_OFFSET(bchannel, MUSB_HSDMA_COUNT))
+
+#define musb_write_hsdma_count(mbase, bchannel, len) \
+	musb_writel(mbase, \
+		    MUSB_HSDMA_CHANNEL_OFFSET(bchannel, MUSB_HSDMA_COUNT), \
+		    len)
+/* control register (16-bit): */
+#define MUSB_HSDMA_ENABLE_SHIFT		0
+#define MUSB_HSDMA_TRANSMIT_SHIFT	1
+#define MUSB_HSDMA_MODE1_SHIFT		2
+#define MUSB_HSDMA_IRQENABLE_SHIFT	3
+#define MUSB_HSDMA_ENDPOINT_SHIFT	4
+#define MUSB_HSDMA_BUSERROR_SHIFT	8
+#define MUSB_HSDMA_BURSTMODE_SHIFT	9
+#define MUSB_HSDMA_BURSTMODE		(3 << MUSB_HSDMA_BURSTMODE_SHIFT)
+#define MUSB_HSDMA_BURSTMODE_UNSPEC	0
+#define MUSB_HSDMA_BURSTMODE_INCR4	1
+#define MUSB_HSDMA_BURSTMODE_INCR8	2
+#define MUSB_HSDMA_BURSTMODE_INCR16	3
+
+#define MUSB_HSDMA_CHANNELS		8
+
+struct musb_dma_controller;
+
+struct musb_dma_channel {
+	struct dma_channel		channel;
+	struct musb_dma_controller	*controller;
+	u32				start_addr;
+	u32				len;
+	u16				max_packet_sz;
+	u8				idx;
+	u8				epnum;
+	u8				transmit;
+};
+
+struct musb_dma_controller {
+	struct dma_controller		controller;
+	struct musb_dma_channel		channel[MUSB_HSDMA_CHANNELS];
+	void				*private_data;
+	void __iomem			*base;
+	u8				channel_count;
+	u8				used_channels;
+	int				irq;
+};
 
 static void dma_channel_release(struct dma_channel *channel);
 
@@ -135,14 +199,6 @@
 	BUG_ON(channel->status == MUSB_DMA_STATUS_UNKNOWN ||
 		channel->status == MUSB_DMA_STATUS_BUSY);
 
-	/* Let targets check/tweak the arguments */
-	if (musb->ops->adjust_channel_params) {
-		int ret = musb->ops->adjust_channel_params(channel,
-			packet_sz, &mode, &dma_addr, &len);
-		if (ret)
-			return ret;
-	}
-
 	/*
 	 * The DMA engine in RTL1.8 and above cannot handle
 	 * DMA addresses that are not aligned to a 4 byte boundary.
diff --git a/drivers/usb/musb/musbhsdma.h b/drivers/usb/musb/musbhsdma.h
deleted file mode 100644
index 9366513..0000000
--- a/drivers/usb/musb/musbhsdma.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * MUSB OTG driver - support for Mentor's DMA controller
- *
- * Copyright 2005 Mentor Graphics Corporation
- * Copyright (C) 2005-2007 by Texas Instruments
- */
-
-#define MUSB_HSDMA_BASE		0x200
-#define MUSB_HSDMA_INTR		(MUSB_HSDMA_BASE + 0)
-#define MUSB_HSDMA_CONTROL		0x4
-#define MUSB_HSDMA_ADDRESS		0x8
-#define MUSB_HSDMA_COUNT		0xc
-
-#define MUSB_HSDMA_CHANNEL_OFFSET(_bchannel, _offset)		\
-		(MUSB_HSDMA_BASE + (_bchannel << 4) + _offset)
-
-#define musb_read_hsdma_addr(mbase, bchannel)	\
-	musb_readl(mbase,	\
-		   MUSB_HSDMA_CHANNEL_OFFSET(bchannel, MUSB_HSDMA_ADDRESS))
-
-#define musb_write_hsdma_addr(mbase, bchannel, addr) \
-	musb_writel(mbase, \
-		    MUSB_HSDMA_CHANNEL_OFFSET(bchannel, MUSB_HSDMA_ADDRESS), \
-		    addr)
-
-#define musb_read_hsdma_count(mbase, bchannel)	\
-	musb_readl(mbase,	\
-		   MUSB_HSDMA_CHANNEL_OFFSET(bchannel, MUSB_HSDMA_COUNT))
-
-#define musb_write_hsdma_count(mbase, bchannel, len) \
-	musb_writel(mbase, \
-		    MUSB_HSDMA_CHANNEL_OFFSET(bchannel, MUSB_HSDMA_COUNT), \
-		    len)
-/* control register (16-bit): */
-#define MUSB_HSDMA_ENABLE_SHIFT		0
-#define MUSB_HSDMA_TRANSMIT_SHIFT	1
-#define MUSB_HSDMA_MODE1_SHIFT		2
-#define MUSB_HSDMA_IRQENABLE_SHIFT	3
-#define MUSB_HSDMA_ENDPOINT_SHIFT	4
-#define MUSB_HSDMA_BUSERROR_SHIFT	8
-#define MUSB_HSDMA_BURSTMODE_SHIFT	9
-#define MUSB_HSDMA_BURSTMODE		(3 << MUSB_HSDMA_BURSTMODE_SHIFT)
-#define MUSB_HSDMA_BURSTMODE_UNSPEC	0
-#define MUSB_HSDMA_BURSTMODE_INCR4	1
-#define MUSB_HSDMA_BURSTMODE_INCR8	2
-#define MUSB_HSDMA_BURSTMODE_INCR16	3
-
-#define MUSB_HSDMA_CHANNELS		8
-
-struct musb_dma_controller;
-
-struct musb_dma_channel {
-	struct dma_channel		channel;
-	struct musb_dma_controller	*controller;
-	u32				start_addr;
-	u32				len;
-	u16				max_packet_sz;
-	u8				idx;
-	u8				epnum;
-	u8				transmit;
-};
-
-struct musb_dma_controller {
-	struct dma_controller		controller;
-	struct musb_dma_channel		channel[MUSB_HSDMA_CHANNELS];
-	void				*private_data;
-	void __iomem			*base;
-	u8				channel_count;
-	u8				used_channels;
-	int				irq;
-};
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 5d70593..b1dd81f 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -77,7 +77,6 @@
 			otg_set_vbus(otg, 1);
 		} else {
 			musb->is_active = 1;
-			otg->default_a = 1;
 			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			devctl |= MUSB_DEVCTL_SESSION;
 			MUSB_HST_MODE(musb);
@@ -89,7 +88,6 @@
 		 * jumping right to B_IDLE...
 		 */
 
-		otg->default_a = 0;
 		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		devctl &= ~MUSB_DEVCTL_SESSION;
 
@@ -148,14 +146,12 @@
 	struct musb_hdrc_platform_data *pdata =
 		dev_get_platdata(musb->controller);
 	struct omap_musb_board_data *data = pdata->board_data;
-	struct usb_otg *otg = musb->xceiv->otg;
 
 	pm_runtime_get_sync(musb->controller);
 	switch (glue->status) {
 	case MUSB_ID_GROUND:
 		dev_dbg(musb->controller, "ID GND\n");
 
-		otg->default_a = true;
 		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 		musb->xceiv->last_event = USB_EVENT_ID;
 		if (musb->gadget_driver) {
@@ -168,7 +164,6 @@
 	case MUSB_VBUS_VALID:
 		dev_dbg(musb->controller, "VBUS Connect\n");
 
-		otg->default_a = false;
 		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		musb->xceiv->last_event = USB_EVENT_VBUS;
 		omap_control_usb_set_mode(glue->control_otghs, USB_MODE_DEVICE);
@@ -239,21 +234,15 @@
 	 * up through ULPI.  TWL4030-family PMICs include one,
 	 * which needs a driver, drivers aren't always needed.
 	 */
-	if (dev->parent->of_node) {
-		musb->phy = devm_phy_get(dev->parent, "usb2-phy");
+	musb->phy = devm_phy_get(dev->parent, "usb2-phy");
 
-		/* We can't totally remove musb->xceiv as of now because
-		 * musb core uses xceiv.state and xceiv.otg. Once we have
-		 * a separate state machine to handle otg, these can be moved
-		 * out of xceiv and then we can start using the generic PHY
-		 * framework
-		 */
-		musb->xceiv = devm_usb_get_phy_by_phandle(dev->parent,
-		    "usb-phy", 0);
-	} else {
-		musb->xceiv = devm_usb_get_phy_dev(dev, 0);
-		musb->phy = devm_phy_get(dev, "usb");
-	}
+	/* We can't totally remove musb->xceiv as of now because
+	 * musb core uses xceiv.state and xceiv.otg. Once we have
+	 * a separate state machine to handle otg, these can be moved
+	 * out of xceiv and then we can start using the generic PHY
+	 * framework
+	 */
+	musb->xceiv = devm_usb_get_phy_by_phandle(dev->parent, "usb-phy", 0);
 
 	if (IS_ERR(musb->xceiv)) {
 		status = PTR_ERR(musb->xceiv);
@@ -391,8 +380,13 @@
 	struct omap2430_glue		*glue;
 	struct device_node		*np = pdev->dev.of_node;
 	struct musb_hdrc_config		*config;
+	struct device_node		*control_node;
+	struct platform_device		*control_pdev;
 	int				ret = -ENOMEM, val;
 
+	if (!np)
+		return -ENODEV;
+
 	glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL);
 	if (!glue)
 		goto err0;
@@ -412,47 +406,43 @@
 	glue->status			= MUSB_UNKNOWN;
 	glue->control_otghs = ERR_PTR(-ENODEV);
 
-	if (np) {
-		struct device_node *control_node;
-		struct platform_device *control_pdev;
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		goto err2;
 
-		pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-		if (!pdata)
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		goto err2;
+
+	config = devm_kzalloc(&pdev->dev, sizeof(*config), GFP_KERNEL);
+	if (!config)
+		goto err2;
+
+	of_property_read_u32(np, "mode", (u32 *)&pdata->mode);
+	of_property_read_u32(np, "interface-type",
+			(u32 *)&data->interface_type);
+	of_property_read_u32(np, "num-eps", (u32 *)&config->num_eps);
+	of_property_read_u32(np, "ram-bits", (u32 *)&config->ram_bits);
+	of_property_read_u32(np, "power", (u32 *)&pdata->power);
+
+	ret = of_property_read_u32(np, "multipoint", &val);
+	if (!ret && val)
+		config->multipoint = true;
+
+	pdata->board_data	= data;
+	pdata->config		= config;
+
+	control_node = of_parse_phandle(np, "ctrl-module", 0);
+	if (control_node) {
+		control_pdev = of_find_device_by_node(control_node);
+		if (!control_pdev) {
+			dev_err(&pdev->dev, "Failed to get control device\n");
+			ret = -EINVAL;
 			goto err2;
-
-		data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
-		if (!data)
-			goto err2;
-
-		config = devm_kzalloc(&pdev->dev, sizeof(*config), GFP_KERNEL);
-		if (!config)
-			goto err2;
-
-		of_property_read_u32(np, "mode", (u32 *)&pdata->mode);
-		of_property_read_u32(np, "interface-type",
-						(u32 *)&data->interface_type);
-		of_property_read_u32(np, "num-eps", (u32 *)&config->num_eps);
-		of_property_read_u32(np, "ram-bits", (u32 *)&config->ram_bits);
-		of_property_read_u32(np, "power", (u32 *)&pdata->power);
-
-		ret = of_property_read_u32(np, "multipoint", &val);
-		if (!ret && val)
-			config->multipoint = true;
-
-		pdata->board_data	= data;
-		pdata->config		= config;
-
-		control_node = of_parse_phandle(np, "ctrl-module", 0);
-		if (control_node) {
-			control_pdev = of_find_device_by_node(control_node);
-			if (!control_pdev) {
-				dev_err(&pdev->dev, "Failed to get control device\n");
-				ret = -EINVAL;
-				goto err2;
-			}
-			glue->control_otghs = &control_pdev->dev;
 		}
+		glue->control_otghs = &control_pdev->dev;
 	}
+
 	pdata->platform_ops		= &omap2430_ops;
 
 	platform_set_drvdata(pdev, glue);
diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c
index 2d20121..832a41f 100644
--- a/drivers/usb/musb/sunxi.c
+++ b/drivers/usb/musb/sunxi.c
@@ -105,13 +105,11 @@
 		devctl = readb(musb->mregs + SUNXI_MUSB_DEVCTL);
 		if (test_bit(SUNXI_MUSB_FL_HOSTMODE, &glue->flags)) {
 			set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
-			musb->xceiv->otg->default_a = 1;
 			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			MUSB_HST_MODE(musb);
 			devctl |= MUSB_DEVCTL_SESSION;
 		} else {
 			clear_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
-			musb->xceiv->otg->default_a = 0;
 			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 			MUSB_DEV_MODE(musb);
 			devctl &= ~MUSB_DEVCTL_SESSION;
@@ -347,7 +345,7 @@
 	if (glue->phy_mode == new_mode)
 		return 0;
 
-	if (musb->port_mode != MUSB_PORT_MODE_DUAL_ROLE) {
+	if (musb->port_mode != MUSB_OTG) {
 		dev_err(musb->controller->parent,
 			"Error changing modes is only supported in dual role mode\n");
 		return -EINVAL;
@@ -651,10 +649,8 @@
 	.fifo_cfg_size  = ARRAY_SIZE(sunxi_musb_mode_cfg),
 	.multipoint	= true,
 	.dyn_fifo	= true,
-	.soft_con       = true,
 	.num_eps	= SUNXI_MUSB_MAX_EP_NUM,
 	.ram_bits	= SUNXI_MUSB_RAM_BITS,
-	.dma		= 0,
 };
 
 static struct musb_hdrc_config sunxi_musb_hdrc_config_h3 = {
@@ -662,10 +658,8 @@
 	.fifo_cfg_size  = ARRAY_SIZE(sunxi_musb_mode_cfg_h3),
 	.multipoint	= true,
 	.dyn_fifo	= true,
-	.soft_con       = true,
 	.num_eps	= SUNXI_MUSB_MAX_EP_NUM_H3,
 	.ram_bits	= SUNXI_MUSB_RAM_BITS,
-	.dma		= 0,
 };
 
 
@@ -690,19 +684,19 @@
 	switch (usb_get_dr_mode(&pdev->dev)) {
 #if defined CONFIG_USB_MUSB_DUAL_ROLE || defined CONFIG_USB_MUSB_HOST
 	case USB_DR_MODE_HOST:
-		pdata.mode = MUSB_PORT_MODE_HOST;
+		pdata.mode = MUSB_HOST;
 		glue->phy_mode = PHY_MODE_USB_HOST;
 		break;
 #endif
 #if defined CONFIG_USB_MUSB_DUAL_ROLE || defined CONFIG_USB_MUSB_GADGET
 	case USB_DR_MODE_PERIPHERAL:
-		pdata.mode = MUSB_PORT_MODE_GADGET;
+		pdata.mode = MUSB_PERIPHERAL;
 		glue->phy_mode = PHY_MODE_USB_DEVICE;
 		break;
 #endif
 #ifdef CONFIG_USB_MUSB_DUAL_ROLE
 	case USB_DR_MODE_OTG:
-		pdata.mode = MUSB_PORT_MODE_DUAL_ROLE;
+		pdata.mode = MUSB_OTG;
 		glue->phy_mode = PHY_MODE_USB_OTG;
 		break;
 #endif
diff --git a/drivers/usb/musb/ux500.c b/drivers/usb/musb/ux500.c
index 27b4a77..73538d1 100644
--- a/drivers/usb/musb/ux500.c
+++ b/drivers/usb/musb/ux500.c
@@ -62,7 +62,6 @@
 
 		} else {
 			musb->is_active = 1;
-			musb->xceiv->otg->default_a = 1;
 			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			devctl |= MUSB_DEVCTL_SESSION;
 			MUSB_HST_MODE(musb);
@@ -73,7 +72,6 @@
 		/* NOTE: we're skipping A_WAIT_VFALL -> A_IDLE and jumping
 		 * right to B_IDLE...
 		 */
-		musb->xceiv->otg->default_a = 0;
 		devctl &= ~MUSB_DEVCTL_SESSION;
 		MUSB_DEV_MODE(musb);
 	}
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 0f8ab98..d7312ee 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -159,6 +159,16 @@
 
 	  MXS Phy is used by some of the i.MX SoCs, for example imx23/28/6x.
 
+config USB_TEGRA_PHY
+	tristate "NVIDIA Tegra USB PHY Driver"
+	depends on ARCH_TEGRA
+	select USB_COMMON
+	select USB_PHY
+	select USB_ULPI
+	help
+	  This driver provides PHY support for the USB controllers found
+	  on NVIDIA Tegra SoC's.
+
 config USB_ULPI
 	bool "Generic ULPI Transceiver Driver"
 	depends on ARM || ARM64
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index 25e579f..df1d990 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -16,7 +16,7 @@
 obj-$(CONFIG_AM335X_PHY_USB)		+= phy-am335x.o
 obj-$(CONFIG_OMAP_OTG)			+= phy-omap-otg.o
 obj-$(CONFIG_TWL6030_USB)		+= phy-twl6030-usb.o
-obj-$(CONFIG_USB_EHCI_TEGRA)		+= phy-tegra-usb.o
+obj-$(CONFIG_USB_TEGRA_PHY)		+= phy-tegra-usb.o
 obj-$(CONFIG_USB_GPIO_VBUS)		+= phy-gpio-vbus-usb.o
 obj-$(CONFIG_USB_ISP1301)		+= phy-isp1301.o
 obj-$(CONFIG_USB_MV_OTG)		+= phy-mv-usb.o
diff --git a/drivers/usb/phy/phy-am335x.c b/drivers/usb/phy/phy-am335x.c
index b36fa8b..27bdb72 100644
--- a/drivers/usb/phy/phy-am335x.c
+++ b/drivers/usb/phy/phy-am335x.c
@@ -96,8 +96,7 @@
 #ifdef CONFIG_PM_SLEEP
 static int am335x_phy_suspend(struct device *dev)
 {
-	struct platform_device	*pdev = to_platform_device(dev);
-	struct am335x_phy *am_phy = platform_get_drvdata(pdev);
+	struct am335x_phy *am_phy = dev_get_drvdata(dev);
 
 	/*
 	 * Enable phy wakeup only if dev->power.can_wakeup is true.
@@ -117,8 +116,7 @@
 
 static int am335x_phy_resume(struct device *dev)
 {
-	struct platform_device	*pdev = to_platform_device(dev);
-	struct am335x_phy	*am_phy = platform_get_drvdata(pdev);
+	struct am335x_phy	*am_phy = dev_get_drvdata(dev);
 
 	phy_ctrl_power(am_phy->phy_ctrl, am_phy->id, am_phy->dr_mode, true);
 
diff --git a/drivers/usb/phy/phy-tegra-usb.c b/drivers/usb/phy/phy-tegra-usb.c
index 0e8d23e..ea7ef1d 100644
--- a/drivers/usb/phy/phy-tegra-usb.c
+++ b/drivers/usb/phy/phy-tegra-usb.c
@@ -236,13 +236,83 @@
 
 static int utmip_pad_open(struct tegra_usb_phy *phy)
 {
+	int ret;
+
 	phy->pad_clk = devm_clk_get(phy->u_phy.dev, "utmi-pads");
 	if (IS_ERR(phy->pad_clk)) {
-		pr_err("%s: can't get utmip pad clock\n", __func__);
-		return PTR_ERR(phy->pad_clk);
+		ret = PTR_ERR(phy->pad_clk);
+		dev_err(phy->u_phy.dev,
+			"Failed to get UTMIP pad clock: %d\n", ret);
+		return ret;
 	}
 
-	return 0;
+	phy->pad_rst = devm_reset_control_get_optional_shared(
+						phy->u_phy.dev, "utmi-pads");
+	if (IS_ERR(phy->pad_rst)) {
+		ret = PTR_ERR(phy->pad_rst);
+		dev_err(phy->u_phy.dev,
+			"Failed to get UTMI-pads reset: %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(phy->pad_clk);
+	if (ret) {
+		dev_err(phy->u_phy.dev,
+			"Failed to enable UTMI-pads clock: %d\n", ret);
+		return ret;
+	}
+
+	spin_lock(&utmip_pad_lock);
+
+	ret = reset_control_deassert(phy->pad_rst);
+	if (ret) {
+		dev_err(phy->u_phy.dev,
+			"Failed to initialize UTMI-pads reset: %d\n", ret);
+		goto unlock;
+	}
+
+	ret = reset_control_assert(phy->pad_rst);
+	if (ret) {
+		dev_err(phy->u_phy.dev,
+			"Failed to assert UTMI-pads reset: %d\n", ret);
+		goto unlock;
+	}
+
+	udelay(1);
+
+	ret = reset_control_deassert(phy->pad_rst);
+	if (ret)
+		dev_err(phy->u_phy.dev,
+			"Failed to deassert UTMI-pads reset: %d\n", ret);
+unlock:
+	spin_unlock(&utmip_pad_lock);
+
+	clk_disable_unprepare(phy->pad_clk);
+
+	return ret;
+}
+
+static int utmip_pad_close(struct tegra_usb_phy *phy)
+{
+	int ret;
+
+	ret = clk_prepare_enable(phy->pad_clk);
+	if (ret) {
+		dev_err(phy->u_phy.dev,
+			"Failed to enable UTMI-pads clock: %d\n", ret);
+		return ret;
+	}
+
+	ret = reset_control_assert(phy->pad_rst);
+	if (ret)
+		dev_err(phy->u_phy.dev,
+			"Failed to assert UTMI-pads reset: %d\n", ret);
+
+	udelay(1);
+
+	clk_disable_unprepare(phy->pad_clk);
+
+	return ret;
 }
 
 static void utmip_pad_power_on(struct tegra_usb_phy *phy)
@@ -282,7 +352,7 @@
 	void __iomem *base = phy->pad_regs;
 
 	if (!utmip_pad_count) {
-		pr_err("%s: utmip pad already powered off\n", __func__);
+		dev_err(phy->u_phy.dev, "UTMIP pad already powered off\n");
 		return -EINVAL;
 	}
 
@@ -338,7 +408,8 @@
 		set_phcd(phy, true);
 
 	if (utmi_wait_register(base + USB_SUSP_CTRL, USB_PHY_CLK_VALID, 0) < 0)
-		pr_err("%s: timeout waiting for phy to stabilize\n", __func__);
+		dev_err(phy->u_phy.dev,
+			"Timeout waiting for PHY to stabilize on disable\n");
 }
 
 static void utmi_phy_clk_enable(struct tegra_usb_phy *phy)
@@ -370,7 +441,8 @@
 
 	if (utmi_wait_register(base + USB_SUSP_CTRL, USB_PHY_CLK_VALID,
 						     USB_PHY_CLK_VALID))
-		pr_err("%s: timeout waiting for phy to stabilize\n", __func__);
+		dev_err(phy->u_phy.dev,
+			"Timeout waiting for PHY to stabilize on enable\n");
 }
 
 static int utmi_phy_power_on(struct tegra_usb_phy *phy)
@@ -617,15 +689,15 @@
 
 	ret = gpio_direction_output(phy->reset_gpio, 0);
 	if (ret < 0) {
-		dev_err(phy->u_phy.dev, "gpio %d not set to 0\n",
-			phy->reset_gpio);
+		dev_err(phy->u_phy.dev, "GPIO %d not set to 0: %d\n",
+			phy->reset_gpio, ret);
 		return ret;
 	}
 	msleep(5);
 	ret = gpio_direction_output(phy->reset_gpio, 1);
 	if (ret < 0) {
-		dev_err(phy->u_phy.dev, "gpio %d not set to 1\n",
-			phy->reset_gpio);
+		dev_err(phy->u_phy.dev, "GPIO %d not set to 1: %d\n",
+			phy->reset_gpio, ret);
 		return ret;
 	}
 
@@ -661,13 +733,13 @@
 	/* Fix VbusInvalid due to floating VBUS */
 	ret = usb_phy_io_write(phy->ulpi, 0x40, 0x08);
 	if (ret) {
-		pr_err("%s: ulpi write failed\n", __func__);
+		dev_err(phy->u_phy.dev, "ULPI write failed: %d\n", ret);
 		return ret;
 	}
 
 	ret = usb_phy_io_write(phy->ulpi, 0x80, 0x0B);
 	if (ret) {
-		pr_err("%s: ulpi write failed\n", __func__);
+		dev_err(phy->u_phy.dev, "ULPI write failed: %d\n", ret);
 		return ret;
 	}
 
@@ -694,6 +766,9 @@
 	if (!IS_ERR(phy->vbus))
 		regulator_disable(phy->vbus);
 
+	if (!phy->is_ulpi_phy)
+		utmip_pad_close(phy);
+
 	clk_disable_unprepare(phy->pll_u);
 }
 
@@ -728,28 +803,30 @@
 
 	phy->clk = devm_clk_get(phy->u_phy.dev, "ulpi-link");
 	if (IS_ERR(phy->clk)) {
-		pr_err("%s: can't get ulpi clock\n", __func__);
-		return PTR_ERR(phy->clk);
+		err = PTR_ERR(phy->clk);
+		dev_err(phy->u_phy.dev, "Failed to get ULPI clock: %d\n", err);
+		return err;
 	}
 
 	err = devm_gpio_request(phy->u_phy.dev, phy->reset_gpio,
 		"ulpi_phy_reset_b");
 	if (err < 0) {
-		dev_err(phy->u_phy.dev, "request failed for gpio: %d\n",
-		       phy->reset_gpio);
+		dev_err(phy->u_phy.dev, "Request failed for GPIO %d: %d\n",
+			phy->reset_gpio, err);
 		return err;
 	}
 
 	err = gpio_direction_output(phy->reset_gpio, 0);
 	if (err < 0) {
-		dev_err(phy->u_phy.dev, "gpio %d direction not set to output\n",
-		       phy->reset_gpio);
+		dev_err(phy->u_phy.dev,
+			"GPIO %d direction not set to output: %d\n",
+			phy->reset_gpio, err);
 		return err;
 	}
 
 	phy->ulpi = otg_ulpi_create(&ulpi_viewport_access_ops, 0);
 	if (!phy->ulpi) {
-		dev_err(phy->u_phy.dev, "otg_ulpi_create returned NULL\n");
+		dev_err(phy->u_phy.dev, "Failed to create ULPI OTG\n");
 		err = -ENOMEM;
 		return err;
 	}
@@ -766,8 +843,10 @@
 
 	phy->pll_u = devm_clk_get(phy->u_phy.dev, "pll_u");
 	if (IS_ERR(phy->pll_u)) {
-		pr_err("Can't get pll_u clock\n");
-		return PTR_ERR(phy->pll_u);
+		err = PTR_ERR(phy->pll_u);
+		dev_err(phy->u_phy.dev,
+			"Failed to get pll_u clock: %d\n", err);
+		return err;
 	}
 
 	err = clk_prepare_enable(phy->pll_u);
@@ -782,7 +861,8 @@
 		}
 	}
 	if (!phy->freq) {
-		pr_err("invalid pll_u parent rate %ld\n", parent_rate);
+		dev_err(phy->u_phy.dev, "Invalid pll_u parent rate %ld\n",
+			parent_rate);
 		err = -EINVAL;
 		goto fail;
 	}
@@ -791,7 +871,7 @@
 		err = regulator_enable(phy->vbus);
 		if (err) {
 			dev_err(phy->u_phy.dev,
-				"failed to enable usb vbus regulator: %d\n",
+				"Failed to enable USB VBUS regulator: %d\n",
 				err);
 			goto fail;
 		}
@@ -855,7 +935,8 @@
 	int err = of_property_read_u32(pdev->dev.of_node, param, &value);
 	*dest = (u8)value;
 	if (err < 0)
-		dev_err(&pdev->dev, "Failed to read USB UTMI parameter %s: %d\n",
+		dev_err(&pdev->dev,
+			"Failed to read USB UTMI parameter %s: %d\n",
 			param, err);
 	return err;
 }
@@ -871,14 +952,14 @@
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	if (!res) {
-		dev_err(&pdev->dev, "Failed to get UTMI Pad regs\n");
+		dev_err(&pdev->dev, "Failed to get UTMI pad regs\n");
 		return  -ENXIO;
 	}
 
 	tegra_phy->pad_regs = devm_ioremap(&pdev->dev, res->start,
 		resource_size(res));
 	if (!tegra_phy->pad_regs) {
-		dev_err(&pdev->dev, "Failed to remap UTMI Pad regs\n");
+		dev_err(&pdev->dev, "Failed to remap UTMI pad regs\n");
 		return -ENOMEM;
 	}
 
@@ -1020,15 +1101,16 @@
 		tegra_phy->reset_gpio =
 			of_get_named_gpio(np, "nvidia,phy-reset-gpio", 0);
 		if (!gpio_is_valid(tegra_phy->reset_gpio)) {
-			dev_err(&pdev->dev, "invalid gpio: %d\n",
-				tegra_phy->reset_gpio);
+			dev_err(&pdev->dev,
+				"Invalid GPIO: %d\n", tegra_phy->reset_gpio);
 			return tegra_phy->reset_gpio;
 		}
 		tegra_phy->config = NULL;
 		break;
 
 	default:
-		dev_err(&pdev->dev, "phy_type is invalid or unsupported\n");
+		dev_err(&pdev->dev, "phy_type %u is invalid or unsupported\n",
+			phy_type);
 		return -EINVAL;
 	}
 
diff --git a/drivers/usb/phy/phy.c b/drivers/usb/phy/phy.c
index bceb2c9..0277f627 100644
--- a/drivers/usb/phy/phy.c
+++ b/drivers/usb/phy/phy.c
@@ -27,7 +27,6 @@
 #define DEFAULT_ACA_CUR_MAX	5000
 
 static LIST_HEAD(phy_list);
-static LIST_HEAD(phy_bind_list);
 static DEFINE_SPINLOCK(phy_lock);
 
 struct phy_devm {
@@ -50,24 +49,6 @@
 	return ERR_PTR(-ENODEV);
 }
 
-static struct usb_phy *__usb_find_phy_dev(struct device *dev,
-	struct list_head *list, u8 index)
-{
-	struct usb_phy_bind *phy_bind = NULL;
-
-	list_for_each_entry(phy_bind, list, list) {
-		if (!(strcmp(phy_bind->dev_name, dev_name(dev))) &&
-				phy_bind->index == index) {
-			if (phy_bind->phy)
-				return phy_bind->phy;
-			else
-				return ERR_PTR(-EPROBE_DEFER);
-		}
-	}
-
-	return ERR_PTR(-ENODEV);
-}
-
 static struct usb_phy *__of_usb_find_phy(struct device_node *node)
 {
 	struct usb_phy  *phy;
@@ -585,72 +566,6 @@
 EXPORT_SYMBOL_GPL(devm_usb_get_phy_by_phandle);
 
 /**
- * usb_get_phy_dev - find the USB PHY
- * @dev - device that requests this phy
- * @index - the index of the phy
- *
- * Returns the phy driver, after getting a refcount to it; or
- * -ENODEV if there is no such phy.  The caller is responsible for
- * calling usb_put_phy() to release that count.
- *
- * For use by USB host and peripheral drivers.
- */
-struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index)
-{
-	struct usb_phy	*phy = NULL;
-	unsigned long	flags;
-
-	spin_lock_irqsave(&phy_lock, flags);
-
-	phy = __usb_find_phy_dev(dev, &phy_bind_list, index);
-	if (IS_ERR(phy) || !try_module_get(phy->dev->driver->owner)) {
-		dev_dbg(dev, "unable to find transceiver\n");
-		if (!IS_ERR(phy))
-			phy = ERR_PTR(-ENODEV);
-
-		goto err0;
-	}
-
-	get_device(phy->dev);
-
-err0:
-	spin_unlock_irqrestore(&phy_lock, flags);
-
-	return phy;
-}
-EXPORT_SYMBOL_GPL(usb_get_phy_dev);
-
-/**
- * devm_usb_get_phy_dev - find the USB PHY using device ptr and index
- * @dev - device that requests this phy
- * @index - the index of the phy
- *
- * Gets the phy using usb_get_phy_dev(), and associates a device with it using
- * devres. On driver detach, release function is invoked on the devres data,
- * then, devres data is freed.
- *
- * For use by USB host and peripheral drivers.
- */
-struct usb_phy *devm_usb_get_phy_dev(struct device *dev, u8 index)
-{
-	struct usb_phy **ptr, *phy;
-
-	ptr = devres_alloc(devm_usb_phy_release, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return NULL;
-
-	phy = usb_get_phy_dev(dev, index);
-	if (!IS_ERR(phy)) {
-		*ptr = phy;
-		devres_add(dev, ptr);
-	} else
-		devres_free(ptr);
-
-	return phy;
-}
-EXPORT_SYMBOL_GPL(devm_usb_get_phy_dev);
-
-/**
  * devm_usb_put_phy - release the USB PHY
  * @dev - device that wants to release this phy
  * @phy - the phy returned by devm_usb_get_phy()
@@ -745,7 +660,6 @@
  */
 int usb_add_phy_dev(struct usb_phy *x)
 {
-	struct usb_phy_bind *phy_bind;
 	unsigned long flags;
 	int ret;
 
@@ -762,13 +676,9 @@
 	ATOMIC_INIT_NOTIFIER_HEAD(&x->notifier);
 
 	spin_lock_irqsave(&phy_lock, flags);
-	list_for_each_entry(phy_bind, &phy_bind_list, list)
-		if (!(strcmp(phy_bind->phy_dev_name, dev_name(x->dev))))
-			phy_bind->phy = x;
-
 	list_add_tail(&x->head, &phy_list);
-
 	spin_unlock_irqrestore(&phy_lock, flags);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(usb_add_phy_dev);
@@ -782,54 +692,15 @@
 void usb_remove_phy(struct usb_phy *x)
 {
 	unsigned long	flags;
-	struct usb_phy_bind *phy_bind;
 
 	spin_lock_irqsave(&phy_lock, flags);
-	if (x) {
-		list_for_each_entry(phy_bind, &phy_bind_list, list)
-			if (phy_bind->phy == x)
-				phy_bind->phy = NULL;
+	if (x)
 		list_del(&x->head);
-	}
 	spin_unlock_irqrestore(&phy_lock, flags);
 }
 EXPORT_SYMBOL_GPL(usb_remove_phy);
 
 /**
- * usb_bind_phy - bind the phy and the controller that uses the phy
- * @dev_name: the device name of the device that will bind to the phy
- * @index: index to specify the port number
- * @phy_dev_name: the device name of the phy
- *
- * Fills the phy_bind structure with the dev_name and phy_dev_name. This will
- * be used when the phy driver registers the phy and when the controller
- * requests this phy.
- *
- * To be used by platform specific initialization code.
- */
-int usb_bind_phy(const char *dev_name, u8 index,
-				const char *phy_dev_name)
-{
-	struct usb_phy_bind *phy_bind;
-	unsigned long flags;
-
-	phy_bind = kzalloc(sizeof(*phy_bind), GFP_KERNEL);
-	if (!phy_bind)
-		return -ENOMEM;
-
-	phy_bind->dev_name = dev_name;
-	phy_bind->phy_dev_name = phy_dev_name;
-	phy_bind->index = index;
-
-	spin_lock_irqsave(&phy_lock, flags);
-	list_add_tail(&phy_bind->list, &phy_bind_list);
-	spin_unlock_irqrestore(&phy_lock, flags);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(usb_bind_phy);
-
-/**
  * usb_phy_set_event - set event to phy event
  * @x: the phy returned by usb_get_phy();
  *
diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h
index f619afe..6137f79 100644
--- a/drivers/usb/renesas_usbhs/common.h
+++ b/drivers/usb/renesas_usbhs/common.h
@@ -276,7 +276,6 @@
 	 */
 	struct usbhs_fifo_info fifo_info;
 
-	struct usb_phy *usb_phy;
 	struct phy *phy;
 };
 
diff --git a/drivers/usb/renesas_usbhs/rcar2.c b/drivers/usb/renesas_usbhs/rcar2.c
index 85a0e09..0027092 100644
--- a/drivers/usb/renesas_usbhs/rcar2.c
+++ b/drivers/usb/renesas_usbhs/rcar2.c
@@ -8,7 +8,6 @@
 #include <linux/gpio.h>
 #include <linux/of_gpio.h>
 #include <linux/phy/phy.h>
-#include <linux/usb/phy.h>
 #include "common.h"
 #include "rcar2.h"
 
@@ -26,16 +25,6 @@
 		return 0;
 	}
 
-	if (IS_ENABLED(CONFIG_USB_PHY)) {
-		struct usb_phy *usb_phy = usb_get_phy_dev(&pdev->dev, 0);
-
-		if (IS_ERR(usb_phy))
-			return PTR_ERR(usb_phy);
-
-		priv->usb_phy = usb_phy;
-		return 0;
-	}
-
 	return -ENXIO;
 }
 
@@ -48,11 +37,6 @@
 		priv->phy = NULL;
 	}
 
-	if (priv->usb_phy) {
-		usb_put_phy(priv->usb_phy);
-		priv->usb_phy = NULL;
-	}
-
 	return 0;
 }
 
@@ -75,19 +59,6 @@
 		}
 	}
 
-	if (priv->usb_phy) {
-		if (enable) {
-			retval = usb_phy_init(priv->usb_phy);
-
-			if (!retval)
-				retval = usb_phy_set_suspend(priv->usb_phy, 0);
-		} else {
-			usb_phy_set_suspend(priv->usb_phy, 1);
-			usb_phy_shutdown(priv->usb_phy);
-			retval = 0;
-		}
-	}
-
 	return retval;
 }
 
diff --git a/drivers/usb/roles/intel-xhci-usb-role-switch.c b/drivers/usb/roles/intel-xhci-usb-role-switch.c
index de72eed..1fb3dd0 100644
--- a/drivers/usb/roles/intel-xhci-usb-role-switch.c
+++ b/drivers/usb/roles/intel-xhci-usb-role-switch.c
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/usb/role.h>
 
 /* register definition */
@@ -38,20 +39,6 @@
 	void __iomem *base;
 };
 
-struct intel_xhci_acpi_match {
-	const char *hid;
-	int hrv;
-};
-
-/*
- * ACPI IDs for PMICs which do not support separate data and power role
- * detection (USB ACA detection for micro USB OTG), we allow userspace to
- * change the role manually on these.
- */
-static const struct intel_xhci_acpi_match allow_userspace_ctrl_ids[] = {
-	{ "INT33F4",  3 }, /* X-Powers AXP288 PMIC */
-};
-
 static int intel_xhci_usb_set_role(struct device *dev, enum usb_role role)
 {
 	struct intel_xhci_usb_data *data = dev_get_drvdata(dev);
@@ -70,6 +57,8 @@
 		return -EIO;
 	}
 
+	pm_runtime_get_sync(dev);
+
 	/* Set idpin value as requested */
 	val = readl(data->base + DUAL_ROLE_CFG0);
 	switch (role) {
@@ -98,13 +87,17 @@
 	/* Polling on CFG1 register to confirm mode switch.*/
 	do {
 		val = readl(data->base + DUAL_ROLE_CFG1);
-		if (!!(val & HOST_MODE) == (role == USB_ROLE_HOST))
+		if (!!(val & HOST_MODE) == (role == USB_ROLE_HOST)) {
+			pm_runtime_put(dev);
 			return 0;
+		}
 
 		/* Interval for polling is set to about 5 - 10 ms */
 		usleep_range(5000, 10000);
 	} while (time_before(jiffies, timeout));
 
+	pm_runtime_put(dev);
+
 	dev_warn(dev, "Timeout waiting for role-switch\n");
 	return -ETIMEDOUT;
 }
@@ -115,7 +108,9 @@
 	enum usb_role role;
 	u32 val;
 
+	pm_runtime_get_sync(dev);
 	val = readl(data->base + DUAL_ROLE_CFG0);
+	pm_runtime_put(dev);
 
 	if (!(val & SW_IDPIN))
 		role = USB_ROLE_HOST;
@@ -127,9 +122,10 @@
 	return role;
 }
 
-static struct usb_role_switch_desc sw_desc = {
+static const struct usb_role_switch_desc sw_desc = {
 	.set = intel_xhci_usb_set_role,
 	.get = intel_xhci_usb_get_role,
+	.allow_userspace_control = true,
 };
 
 static int intel_xhci_usb_probe(struct platform_device *pdev)
@@ -137,28 +133,27 @@
 	struct device *dev = &pdev->dev;
 	struct intel_xhci_usb_data *data;
 	struct resource *res;
-	int i;
 
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
 	data->base = devm_ioremap_nocache(dev, res->start, resource_size(res));
 	if (!data->base)
 		return -ENOMEM;
 
-	for (i = 0; i < ARRAY_SIZE(allow_userspace_ctrl_ids); i++)
-		if (acpi_dev_present(allow_userspace_ctrl_ids[i].hid, "1",
-				     allow_userspace_ctrl_ids[i].hrv))
-			sw_desc.allow_userspace_control = true;
-
 	platform_set_drvdata(pdev, data);
 
 	data->role_sw = usb_role_switch_register(dev, &sw_desc);
 	if (IS_ERR(data->role_sw))
 		return PTR_ERR(data->role_sw);
 
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+
 	return 0;
 }
 
diff --git a/drivers/usb/serial/bus.c b/drivers/usb/serial/bus.c
index 9e265eb..eb0195c 100644
--- a/drivers/usb/serial/bus.c
+++ b/drivers/usb/serial/bus.c
@@ -60,7 +60,8 @@
 	}
 
 	minor = port->minor;
-	tty_dev = tty_register_device(usb_serial_tty_driver, minor, dev);
+	tty_dev = tty_port_register_device(&port->port, usb_serial_tty_driver,
+					   minor, dev);
 	if (IS_ERR(tty_dev)) {
 		retval = PTR_ERR(tty_dev);
 		goto err_port_remove;
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 7ea221d..b5cef32 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -54,15 +54,14 @@
 	int custom_divisor;	/* custom_divisor kludge, this is for
 				   baud_base (different from what goes to the
 				   chip!) */
-	__u16 last_set_data_urb_value ;
-				/* the last data state set - needed for doing
-				 * a break
-				 */
+	u16 last_set_data_value; /* the last data state set - needed for doing
+				  * a break
+				  */
 	int flags;		/* some ASYNC_xxxx flags are supported */
 	unsigned long last_dtr_rts;	/* saved modem control outputs */
 	char prev_status;        /* Used for TIOCMIWAIT */
 	char transmit_empty;	/* If transmitter is empty or not */
-	__u16 interface;	/* FT2232C, FT2232H or FT4232H port interface
+	u16 interface;		/* FT2232C, FT2232H or FT4232H port interface
 				   (0 for FT232/245) */
 
 	speed_t force_baud;	/* if non-zero, force the baud rate to
@@ -1063,10 +1062,10 @@
 
 static unsigned short int ftdi_232am_baud_base_to_divisor(int baud, int base);
 static unsigned short int ftdi_232am_baud_to_divisor(int baud);
-static __u32 ftdi_232bm_baud_base_to_divisor(int baud, int base);
-static __u32 ftdi_232bm_baud_to_divisor(int baud);
-static __u32 ftdi_2232h_baud_base_to_divisor(int baud, int base);
-static __u32 ftdi_2232h_baud_to_divisor(int baud);
+static u32 ftdi_232bm_baud_base_to_divisor(int baud, int base);
+static u32 ftdi_232bm_baud_to_divisor(int baud);
+static u32 ftdi_2232h_baud_base_to_divisor(int baud, int base);
+static u32 ftdi_2232h_baud_to_divisor(int baud);
 
 static struct usb_serial_driver ftdi_sio_device = {
 	.driver = {
@@ -1136,14 +1135,14 @@
 	 return ftdi_232am_baud_base_to_divisor(baud, 48000000);
 }
 
-static __u32 ftdi_232bm_baud_base_to_divisor(int baud, int base)
+static u32 ftdi_232bm_baud_base_to_divisor(int baud, int base)
 {
 	static const unsigned char divfrac[8] = { 0, 3, 2, 4, 1, 5, 6, 7 };
-	__u32 divisor;
+	u32 divisor;
 	/* divisor shifted 3 bits to the left */
 	int divisor3 = base / 2 / baud;
 	divisor = divisor3 >> 3;
-	divisor |= (__u32)divfrac[divisor3 & 0x7] << 14;
+	divisor |= (u32)divfrac[divisor3 & 0x7] << 14;
 	/* Deal with special cases for highest baud rates. */
 	if (divisor == 1)
 		divisor = 0;
@@ -1152,22 +1151,22 @@
 	return divisor;
 }
 
-static __u32 ftdi_232bm_baud_to_divisor(int baud)
+static u32 ftdi_232bm_baud_to_divisor(int baud)
 {
 	 return ftdi_232bm_baud_base_to_divisor(baud, 48000000);
 }
 
-static __u32 ftdi_2232h_baud_base_to_divisor(int baud, int base)
+static u32 ftdi_2232h_baud_base_to_divisor(int baud, int base)
 {
 	static const unsigned char divfrac[8] = { 0, 3, 2, 4, 1, 5, 6, 7 };
-	__u32 divisor;
+	u32 divisor;
 	int divisor3;
 
 	/* hi-speed baud rate is 10-bit sampling instead of 16-bit */
 	divisor3 = base * 8 / (baud * 10);
 
 	divisor = divisor3 >> 3;
-	divisor |= (__u32)divfrac[divisor3 & 0x7] << 14;
+	divisor |= (u32)divfrac[divisor3 & 0x7] << 14;
 	/* Deal with special cases for highest baud rates. */
 	if (divisor == 1)
 		divisor = 0;
@@ -1182,7 +1181,7 @@
 	return divisor;
 }
 
-static __u32 ftdi_2232h_baud_to_divisor(int baud)
+static u32 ftdi_2232h_baud_to_divisor(int baud)
 {
 	 return ftdi_2232h_baud_base_to_divisor(baud, 120000000);
 }
@@ -1195,7 +1194,7 @@
 {
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
 	struct device *dev = &port->dev;
-	unsigned urb_value;
+	unsigned value;
 	int rv;
 
 	if (((set | clear) & (TIOCM_DTR | TIOCM_RTS)) == 0) {
@@ -1204,20 +1203,20 @@
 	}
 
 	clear &= ~set;	/* 'set' takes precedence over 'clear' */
-	urb_value = 0;
+	value = 0;
 	if (clear & TIOCM_DTR)
-		urb_value |= FTDI_SIO_SET_DTR_LOW;
+		value |= FTDI_SIO_SET_DTR_LOW;
 	if (clear & TIOCM_RTS)
-		urb_value |= FTDI_SIO_SET_RTS_LOW;
+		value |= FTDI_SIO_SET_RTS_LOW;
 	if (set & TIOCM_DTR)
-		urb_value |= FTDI_SIO_SET_DTR_HIGH;
+		value |= FTDI_SIO_SET_DTR_HIGH;
 	if (set & TIOCM_RTS)
-		urb_value |= FTDI_SIO_SET_RTS_HIGH;
+		value |= FTDI_SIO_SET_RTS_HIGH;
 	rv = usb_control_msg(port->serial->dev,
 			       usb_sndctrlpipe(port->serial->dev, 0),
 			       FTDI_SIO_SET_MODEM_CTRL_REQUEST,
 			       FTDI_SIO_SET_MODEM_CTRL_REQUEST_TYPE,
-			       urb_value, priv->interface,
+			       value, priv->interface,
 			       NULL, 0, WDR_TIMEOUT);
 	if (rv < 0) {
 		dev_dbg(dev, "%s Error from MODEM_CTRL urb: DTR %s, RTS %s\n",
@@ -1236,12 +1235,12 @@
 }
 
 
-static __u32 get_ftdi_divisor(struct tty_struct *tty,
+static u32 get_ftdi_divisor(struct tty_struct *tty,
 						struct usb_serial_port *port)
 {
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
 	struct device *dev = &port->dev;
-	__u32 div_value = 0;
+	u32 div_value = 0;
 	int div_okay = 1;
 	int baud;
 
@@ -1299,7 +1298,7 @@
 	case FT232RL: /* FT232RL chip */
 	case FTX:     /* FT-X series */
 		if (baud <= 3000000) {
-			__u16 product_id = le16_to_cpu(
+			u16 product_id = le16_to_cpu(
 				port->serial->dev->descriptor.idProduct);
 			if (((product_id == FTDI_NDI_HUC_PID)		||
 			     (product_id == FTDI_NDI_SPECTRA_SCU_PID)	||
@@ -1346,26 +1345,26 @@
 static int change_speed(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
-	__u16 urb_value;
-	__u16 urb_index;
-	__u32 urb_index_value;
+	u16 value;
+	u16 index;
+	u32 index_value;
 	int rv;
 
-	urb_index_value = get_ftdi_divisor(tty, port);
-	urb_value = (__u16)urb_index_value;
-	urb_index = (__u16)(urb_index_value >> 16);
+	index_value = get_ftdi_divisor(tty, port);
+	value = (u16)index_value;
+	index = (u16)(index_value >> 16);
 	if ((priv->chip_type == FT2232C) || (priv->chip_type == FT2232H) ||
 		(priv->chip_type == FT4232H) || (priv->chip_type == FT232H)) {
 		/* Probably the BM type needs the MSB of the encoded fractional
 		 * divider also moved like for the chips above. Any infos? */
-		urb_index = (__u16)((urb_index << 8) | priv->interface);
+		index = (u16)((index << 8) | priv->interface);
 	}
 
 	rv = usb_control_msg(port->serial->dev,
 			    usb_sndctrlpipe(port->serial->dev, 0),
 			    FTDI_SIO_SET_BAUDRATE_REQUEST,
 			    FTDI_SIO_SET_BAUDRATE_REQUEST_TYPE,
-			    urb_value, urb_index,
+			    value, index,
 			    NULL, 0, WDR_SHORT_TIMEOUT);
 	return rv;
 }
@@ -2140,29 +2139,29 @@
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
-	__u16 urb_value;
+	u16 value;
 
 	/* break_state = -1 to turn on break, and 0 to turn off break */
 	/* see drivers/char/tty_io.c to see it used */
-	/* last_set_data_urb_value NEVER has the break bit set in it */
+	/* last_set_data_value NEVER has the break bit set in it */
 
 	if (break_state)
-		urb_value = priv->last_set_data_urb_value | FTDI_SIO_SET_BREAK;
+		value = priv->last_set_data_value | FTDI_SIO_SET_BREAK;
 	else
-		urb_value = priv->last_set_data_urb_value;
+		value = priv->last_set_data_value;
 
 	if (usb_control_msg(port->serial->dev,
 			usb_sndctrlpipe(port->serial->dev, 0),
 			FTDI_SIO_SET_DATA_REQUEST,
 			FTDI_SIO_SET_DATA_REQUEST_TYPE,
-			urb_value , priv->interface,
+			value , priv->interface,
 			NULL, 0, WDR_TIMEOUT) < 0) {
 		dev_err(&port->dev, "%s FAILED to enable/disable break state (state was %d)\n",
 			__func__, break_state);
 	}
 
 	dev_dbg(&port->dev, "%s break state is %d - urb is %d\n", __func__,
-		break_state, urb_value);
+		break_state, value);
 
 }
 
@@ -2192,12 +2191,8 @@
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
 	struct ktermios *termios = &tty->termios;
 	unsigned int cflag = termios->c_cflag;
-	__u16 urb_value; /* will hold the new flags */
-
-	/* Added for xon/xoff support */
-	unsigned int iflag = termios->c_iflag;
-	unsigned char vstop;
-	unsigned char vstart;
+	u16 value, index;
+	int ret;
 
 	/* Force baud rate if this device requires it, unless it is set to
 	   B0. */
@@ -2258,44 +2253,44 @@
 no_skip:
 	/* Set number of data bits, parity, stop bits */
 
-	urb_value = 0;
-	urb_value |= (cflag & CSTOPB ? FTDI_SIO_SET_DATA_STOP_BITS_2 :
-		      FTDI_SIO_SET_DATA_STOP_BITS_1);
+	value = 0;
+	value |= (cflag & CSTOPB ? FTDI_SIO_SET_DATA_STOP_BITS_2 :
+			FTDI_SIO_SET_DATA_STOP_BITS_1);
 	if (cflag & PARENB) {
 		if (cflag & CMSPAR)
-			urb_value |= cflag & PARODD ?
-				     FTDI_SIO_SET_DATA_PARITY_MARK :
-				     FTDI_SIO_SET_DATA_PARITY_SPACE;
+			value |= cflag & PARODD ?
+					FTDI_SIO_SET_DATA_PARITY_MARK :
+					FTDI_SIO_SET_DATA_PARITY_SPACE;
 		else
-			urb_value |= cflag & PARODD ?
-				     FTDI_SIO_SET_DATA_PARITY_ODD :
-				     FTDI_SIO_SET_DATA_PARITY_EVEN;
+			value |= cflag & PARODD ?
+					FTDI_SIO_SET_DATA_PARITY_ODD :
+					FTDI_SIO_SET_DATA_PARITY_EVEN;
 	} else {
-		urb_value |= FTDI_SIO_SET_DATA_PARITY_NONE;
+		value |= FTDI_SIO_SET_DATA_PARITY_NONE;
 	}
 	switch (cflag & CSIZE) {
 	case CS5:
 		dev_dbg(ddev, "Setting CS5 quirk\n");
 		break;
 	case CS7:
-		urb_value |= 7;
+		value |= 7;
 		dev_dbg(ddev, "Setting CS7\n");
 		break;
 	default:
 	case CS8:
-		urb_value |= 8;
+		value |= 8;
 		dev_dbg(ddev, "Setting CS8\n");
 		break;
 	}
 
 	/* This is needed by the break command since it uses the same command
 	   - but is or'ed with this value  */
-	priv->last_set_data_urb_value = urb_value;
+	priv->last_set_data_value = value;
 
 	if (usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 			    FTDI_SIO_SET_DATA_REQUEST,
 			    FTDI_SIO_SET_DATA_REQUEST_TYPE,
-			    urb_value , priv->interface,
+			    value , priv->interface,
 			    NULL, 0, WDR_SHORT_TIMEOUT) < 0) {
 		dev_err(ddev, "%s FAILED to set databits/stopbits/parity\n",
 			__func__);
@@ -2326,65 +2321,30 @@
 			set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
 	}
 
-	/* Set flow control */
-	/* Note device also supports DTR/CD (ugh) and Xon/Xoff in hardware */
 no_c_cflag_changes:
-	if (cflag & CRTSCTS) {
-		dev_dbg(ddev, "%s Setting to CRTSCTS flow control\n", __func__);
-		if (usb_control_msg(dev,
-				    usb_sndctrlpipe(dev, 0),
-				    FTDI_SIO_SET_FLOW_CTRL_REQUEST,
-				    FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
-				    0 , (FTDI_SIO_RTS_CTS_HS | priv->interface),
-				    NULL, 0, WDR_TIMEOUT) < 0) {
-			dev_err(ddev, "urb failed to set to rts/cts flow control\n");
-		}
-	} else {
-		/*
-		 * Xon/Xoff code
-		 *
-		 * Check the IXOFF status in the iflag component of the
-		 * termios structure. If IXOFF is not set, the pre-xon/xoff
-		 * code is executed.
-		 */
-		if (iflag & IXOFF) {
-			dev_dbg(ddev, "%s  request to enable xonxoff iflag=%04x\n",
-				__func__, iflag);
-			/* Try to enable the XON/XOFF on the ftdi_sio
-			 * Set the vstart and vstop -- could have been done up
-			 * above where a lot of other dereferencing is done but
-			 * that would be very inefficient as vstart and vstop
-			 * are not always needed.
-			 */
-			vstart = termios->c_cc[VSTART];
-			vstop = termios->c_cc[VSTOP];
-			urb_value = (vstop << 8) | (vstart);
+	/* Set hardware-assisted flow control */
+	value = 0;
 
-			if (usb_control_msg(dev,
-					    usb_sndctrlpipe(dev, 0),
-					    FTDI_SIO_SET_FLOW_CTRL_REQUEST,
-					    FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
-					    urb_value , (FTDI_SIO_XON_XOFF_HS
-							 | priv->interface),
-					    NULL, 0, WDR_TIMEOUT) < 0) {
-				dev_err(&port->dev, "urb failed to set to "
-					"xon/xoff flow control\n");
-			}
-		} else {
-			/* else clause to only run if cflag ! CRTSCTS and iflag
-			 * ! XOFF. CHECKME Assuming XON/XOFF handled by tty
-			 * stack - not by device */
-			dev_dbg(ddev, "%s Turning off hardware flow control\n", __func__);
-			if (usb_control_msg(dev,
-					    usb_sndctrlpipe(dev, 0),
-					    FTDI_SIO_SET_FLOW_CTRL_REQUEST,
-					    FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
-					    0, priv->interface,
-					    NULL, 0, WDR_TIMEOUT) < 0) {
-				dev_err(ddev, "urb failed to clear flow control\n");
-			}
-		}
+	if (C_CRTSCTS(tty)) {
+		dev_dbg(&port->dev, "enabling rts/cts flow control\n");
+		index = FTDI_SIO_RTS_CTS_HS;
+	} else if (I_IXON(tty)) {
+		dev_dbg(&port->dev, "enabling xon/xoff flow control\n");
+		index = FTDI_SIO_XON_XOFF_HS;
+		value = STOP_CHAR(tty) << 8 | START_CHAR(tty);
+	} else {
+		dev_dbg(&port->dev, "disabling flow control\n");
+		index = FTDI_SIO_DISABLE_FLOW_CTRL;
 	}
+
+	index |= priv->interface;
+
+	ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
+			FTDI_SIO_SET_FLOW_CTRL_REQUEST,
+			FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
+			value, index, NULL, 0, WDR_TIMEOUT);
+	if (ret < 0)
+		dev_err(&port->dev, "failed to set flow control: %d\n", ret);
 }
 
 /*
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 2058852..664e61f 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1916,7 +1916,8 @@
 	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d01, 0xff) },			/* D-Link DWM-156 (variant) */
 	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d02, 0xff) },
 	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d03, 0xff) },
-	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff) },			/* D-Link DWM-158 */
+	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff),			/* D-Link DWM-158 */
+	 .driver_info = RSVD(4) | RSVD(5) },
 	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d0e, 0xff) },			/* D-Link DWM-157 C1 */
 	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff),			/* D-Link DWM-221 B1 */
 	  .driver_info = RSVD(4) },
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 46dd09d..5d1a193 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -533,6 +533,17 @@
 	return 0;
 }
 
+static bool pl2303_termios_change(const struct ktermios *a, const struct ktermios *b)
+{
+	bool ixon_change;
+
+	ixon_change = ((a->c_iflag ^ b->c_iflag) & (IXON | IXANY)) ||
+			a->c_cc[VSTART] != b->c_cc[VSTART] ||
+			a->c_cc[VSTOP] != b->c_cc[VSTOP];
+
+	return tty_termios_hw_change(a, b) || ixon_change;
+}
+
 static void pl2303_set_termios(struct tty_struct *tty,
 		struct usb_serial_port *port, struct ktermios *old_termios)
 {
@@ -544,7 +555,7 @@
 	int ret;
 	u8 control;
 
-	if (old_termios && !tty_termios_hw_change(&tty->termios, old_termios))
+	if (old_termios && !pl2303_termios_change(&tty->termios, old_termios))
 		return;
 
 	buf = kzalloc(7, GFP_KERNEL);
@@ -662,6 +673,9 @@
 			pl2303_vendor_write(serial, 0x0, 0x41);
 		else
 			pl2303_vendor_write(serial, 0x0, 0x61);
+	} else if (I_IXON(tty) && !I_IXANY(tty) && START_CHAR(tty) == 0x11 &&
+			STOP_CHAR(tty) == 0x13) {
+		pl2303_vendor_write(serial, 0x0, 0xc0);
 	} else {
 		pl2303_vendor_write(serial, 0x0, 0x0);
 	}
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 790e0cb..f7aaa7f 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -192,7 +192,7 @@
 	if (retval)
 		goto error_get_interface;
 
-	retval = tty_port_install(&port->port, driver, tty);
+	retval = tty_standard_install(driver, tty);
 	if (retval)
 		goto error_init_termios;
 
@@ -476,19 +476,6 @@
 	return 0;
 }
 
-static int serial_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, serial_proc_show, NULL);
-}
-
-static const struct file_operations serial_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= serial_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int serial_tiocmget(struct tty_struct *tty)
 {
 	struct usb_serial_port *port = tty->driver_data;
@@ -1192,7 +1179,7 @@
 	.get_icount =		serial_get_icount,
 	.cleanup =		serial_cleanup,
 	.install =		serial_install,
-	.proc_fops =		&serial_proc_fops,
+	.proc_show =		serial_proc_show,
 };
 
 
diff --git a/drivers/usb/storage/freecom.c b/drivers/usb/storage/freecom.c
index ec4d92c..4f542df 100644
--- a/drivers/usb/storage/freecom.c
+++ b/drivers/usb/storage/freecom.c
@@ -464,7 +464,7 @@
 	usb_stor_dbg(us, "result from activate reset is %d\n", result);
 
 	/* wait 250ms */
-	mdelay(250);
+	msleep(250);
 
 	/* clear reset */
 	result = usb_stor_control_msg(us, us->send_ctrl_pipe,
@@ -472,7 +472,7 @@
 	usb_stor_dbg(us, "result from clear reset is %d\n", result);
 
 	/* wait 3 seconds */
-	mdelay(3 * 1000);
+	msleep(3 * 1000);
 
 	return USB_STOR_TRANSPORT_GOOD;
 }
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 6034c39..9e9de54 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -836,6 +836,12 @@
 	if (devinfo->flags & US_FL_BROKEN_FUA)
 		sdev->broken_fua = 1;
 
+	/* UAS also needs to support FL_ALWAYS_SYNC */
+	if (devinfo->flags & US_FL_ALWAYS_SYNC) {
+		sdev->skip_ms_page_3f = 1;
+		sdev->skip_ms_page_8 = 1;
+		sdev->wce_default_on = 1;
+	}
 	scsi_change_queue_depth(sdev, devinfo->qdepth - 2);
 	return 0;
 }
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 747d3a9..22fcfcc 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -2321,6 +2321,15 @@
 		"Micro Mini 1GB",
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE ),
 
+/* "G-DRIVE" external HDD hangs on write without these.
+ * Patch submitted by Alexander Kappner <agk@godking.net>
+ */
+UNUSUAL_DEV(0x4971, 0x8024, 0x0000, 0x9999,
+		"SimpleTech",
+		"External HDD",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_ALWAYS_SYNC),
+
 /*
  * Nick Bowler <nbowler@elliptictech.com>
  * SCSI stack spams (otherwise harmless) error messages.
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index 38434d8..d0bdebd 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -107,3 +107,12 @@
 		"External HDD",
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NO_REPORT_OPCODES),
+
+/* "G-DRIVE" external HDD hangs on write without these.
+ * Patch submitted by Alexander Kappner <agk@godking.net>
+ */
+UNUSUAL_DEV(0x4971, 0x8024, 0x0000, 0x9999,
+		"SimpleTech",
+		"External HDD",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_ALWAYS_SYNC),
diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig
index 030f88c..2c8eab1 100644
--- a/drivers/usb/typec/Kconfig
+++ b/drivers/usb/typec/Kconfig
@@ -49,6 +49,7 @@
 	tristate "USB Type-C Port Controller Manager"
 	depends on USB
 	select USB_ROLE_SWITCH
+	select POWER_SUPPLY
 	help
 	  The Type-C Port Controller Manager provides a USB PD and USB Type-C
 	  state machine for use with Type-C Port Controllers.
diff --git a/drivers/usb/typec/fusb302/Kconfig b/drivers/usb/typec/fusb302/Kconfig
index 48a4f2f..fce099f 100644
--- a/drivers/usb/typec/fusb302/Kconfig
+++ b/drivers/usb/typec/fusb302/Kconfig
@@ -1,6 +1,6 @@
 config TYPEC_FUSB302
 	tristate "Fairchild FUSB302 Type-C chip driver"
-	depends on I2C && POWER_SUPPLY
+	depends on I2C
 	help
 	  The Fairchild FUSB302 Type-C chip driver that works with
 	  Type-C Port Controller Manager to provide USB PD and USB
diff --git a/drivers/usb/typec/fusb302/fusb302.c b/drivers/usb/typec/fusb302/fusb302.c
index 7036171..1e68da1 100644
--- a/drivers/usb/typec/fusb302/fusb302.c
+++ b/drivers/usb/typec/fusb302/fusb302.c
@@ -18,7 +18,6 @@
 #include <linux/of_device.h>
 #include <linux/of_gpio.h>
 #include <linux/pinctrl/consumer.h>
-#include <linux/power_supply.h>
 #include <linux/proc_fs.h>
 #include <linux/regulator/consumer.h>
 #include <linux/sched/clock.h>
@@ -99,11 +98,6 @@
 	/* lock for sharing chip states */
 	struct mutex lock;
 
-	/* psy + psy status */
-	struct power_supply *psy;
-	u32 current_limit;
-	u32 supply_voltage;
-
 	/* chip status */
 	enum toggling_mode toggling_mode;
 	enum src_current_status src_current_status;
@@ -120,6 +114,7 @@
 	enum typec_cc_polarity cc_polarity;
 	enum typec_cc_status cc1;
 	enum typec_cc_status cc2;
+	u32 snk_pdo[PDO_MAX_OBJECTS];
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dentry;
@@ -220,32 +215,28 @@
 
 static struct dentry *rootdir;
 
-static int fusb302_debugfs_init(struct fusb302_chip *chip)
+static void fusb302_debugfs_init(struct fusb302_chip *chip)
 {
 	mutex_init(&chip->logbuffer_lock);
-	if (!rootdir) {
+	if (!rootdir)
 		rootdir = debugfs_create_dir("fusb302", NULL);
-		if (!rootdir)
-			return -ENOMEM;
-	}
 
 	chip->dentry = debugfs_create_file(dev_name(chip->dev),
 					   S_IFREG | 0444, rootdir,
 					   chip, &fusb302_debug_fops);
-
-	return 0;
 }
 
 static void fusb302_debugfs_exit(struct fusb302_chip *chip)
 {
 	debugfs_remove(chip->dentry);
+	debugfs_remove(rootdir);
 }
 
 #else
 
 static void fusb302_log(const struct fusb302_chip *chip,
 			const char *fmt, ...) { }
-static int fusb302_debugfs_init(const struct fusb302_chip *chip) { return 0; }
+static void fusb302_debugfs_init(const struct fusb302_chip *chip) { }
 static void fusb302_debugfs_exit(const struct fusb302_chip *chip) { }
 
 #endif
@@ -861,13 +852,11 @@
 		chip->vbus_on = on;
 		fusb302_log(chip, "vbus := %s", on ? "On" : "Off");
 	}
-	if (chip->charge_on == charge) {
+	if (chip->charge_on == charge)
 		fusb302_log(chip, "charge is already %s",
 			    charge ? "On" : "Off");
-	} else {
+	else
 		chip->charge_on = charge;
-		power_supply_changed(chip->psy);
-	}
 
 done:
 	mutex_unlock(&chip->lock);
@@ -883,11 +872,6 @@
 	fusb302_log(chip, "current limit: %d ma, %d mv (not implemented)",
 		    max_ma, mv);
 
-	chip->supply_voltage = mv;
-	chip->current_limit = max_ma;
-
-	power_supply_changed(chip->psy);
-
 	return 0;
 }
 
@@ -1212,11 +1196,6 @@
 static const struct tcpc_config fusb302_tcpc_config = {
 	.src_pdo = src_pdo,
 	.nr_src_pdo = ARRAY_SIZE(src_pdo),
-	.snk_pdo = snk_pdo,
-	.nr_snk_pdo = ARRAY_SIZE(snk_pdo),
-	.max_snk_mv = 5000,
-	.max_snk_ma = 3000,
-	.max_snk_mw = 15000,
 	.operating_snk_mw = 2500,
 	.type = TYPEC_PORT_DRP,
 	.data = TYPEC_PORT_DRD,
@@ -1686,43 +1665,6 @@
 	return IRQ_HANDLED;
 }
 
-static int fusb302_psy_get_property(struct power_supply *psy,
-				    enum power_supply_property psp,
-				    union power_supply_propval *val)
-{
-	struct fusb302_chip *chip = power_supply_get_drvdata(psy);
-
-	switch (psp) {
-	case POWER_SUPPLY_PROP_ONLINE:
-		val->intval = chip->charge_on;
-		break;
-	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
-		val->intval = chip->supply_voltage * 1000; /* mV -> µV */
-		break;
-	case POWER_SUPPLY_PROP_CURRENT_MAX:
-		val->intval = chip->current_limit * 1000; /* mA -> µA */
-		break;
-	default:
-		return -ENODATA;
-	}
-
-	return 0;
-}
-
-static enum power_supply_property fusb302_psy_properties[] = {
-	POWER_SUPPLY_PROP_ONLINE,
-	POWER_SUPPLY_PROP_VOLTAGE_NOW,
-	POWER_SUPPLY_PROP_CURRENT_MAX,
-};
-
-static const struct power_supply_desc fusb302_psy_desc = {
-	.name		= "fusb302-typec-source",
-	.type		= POWER_SUPPLY_TYPE_USB_TYPE_C,
-	.properties	= fusb302_psy_properties,
-	.num_properties	= ARRAY_SIZE(fusb302_psy_properties),
-	.get_property	= fusb302_psy_get_property,
-};
-
 static int init_gpio(struct fusb302_chip *chip)
 {
 	struct device_node *node;
@@ -1756,13 +1698,35 @@
 	return 0;
 }
 
+static int fusb302_composite_snk_pdo_array(struct fusb302_chip *chip)
+{
+	struct device *dev = chip->dev;
+	u32 max_uv, max_ua;
+
+	chip->snk_pdo[0] = PDO_FIXED(5000, 400, PDO_FIXED_FLAGS);
+
+	/*
+	 * As max_snk_ma/mv/mw is not needed for tcpc_config,
+	 * those settings should be passed in via sink PDO, so
+	 * "fcs, max-sink-*" properties will be deprecated, to
+	 * perserve compatibility with existing users of them,
+	 * we read those properties to convert them to be a var
+	 * PDO.
+	 */
+	if (device_property_read_u32(dev, "fcs,max-sink-microvolt", &max_uv) ||
+		device_property_read_u32(dev, "fcs,max-sink-microamp", &max_ua))
+		return 1;
+
+	chip->snk_pdo[1] = PDO_VAR(5000, max_uv / 1000, max_ua / 1000);
+	return 2;
+}
+
 static int fusb302_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
 	struct fusb302_chip *chip;
 	struct i2c_adapter *adapter;
 	struct device *dev = &client->dev;
-	struct power_supply_config cfg = {};
 	const char *name;
 	int ret = 0;
 	u32 v;
@@ -1784,18 +1748,13 @@
 	chip->tcpc_dev.config = &chip->tcpc_config;
 	mutex_init(&chip->lock);
 
-	if (!device_property_read_u32(dev, "fcs,max-sink-microvolt", &v))
-		chip->tcpc_config.max_snk_mv = v / 1000;
-
-	if (!device_property_read_u32(dev, "fcs,max-sink-microamp", &v))
-		chip->tcpc_config.max_snk_ma = v / 1000;
-
-	if (!device_property_read_u32(dev, "fcs,max-sink-microwatt", &v))
-		chip->tcpc_config.max_snk_mw = v / 1000;
-
 	if (!device_property_read_u32(dev, "fcs,operating-sink-microwatt", &v))
 		chip->tcpc_config.operating_snk_mw = v / 1000;
 
+	/* Composite sink PDO */
+	chip->tcpc_config.nr_snk_pdo = fusb302_composite_snk_pdo_array(chip);
+	chip->tcpc_config.snk_pdo = chip->snk_pdo;
+
 	/*
 	 * Devicetree platforms should get extcon via phandle (not yet
 	 * supported). On ACPI platforms, we get the name from a device prop.
@@ -1809,17 +1768,7 @@
 			return -EPROBE_DEFER;
 	}
 
-	cfg.drv_data = chip;
-	chip->psy = devm_power_supply_register(dev, &fusb302_psy_desc, &cfg);
-	if (IS_ERR(chip->psy)) {
-		ret = PTR_ERR(chip->psy);
-		dev_err(chip->dev, "Error registering power-supply: %d\n", ret);
-		return ret;
-	}
-
-	ret = fusb302_debugfs_init(chip);
-	if (ret < 0)
-		return ret;
+	fusb302_debugfs_init(chip);
 
 	chip->wq = create_singlethread_workqueue(dev_name(chip->dev));
 	if (!chip->wq) {
diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c
index f89093b..9d8330e 100644
--- a/drivers/usb/typec/mux.c
+++ b/drivers/usb/typec/mux.c
@@ -178,7 +178,7 @@
 
 /**
  * typec_mux_unregister - Unregister Multiplexer Switch
- * @sw: USB Type-C Connector Multiplexer/DeMultiplexer
+ * @mux: USB Type-C Connector Multiplexer/DeMultiplexer
  *
  * Unregister mux that was registered with typec_mux_register().
  */
diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c
index ded49e3..8a201dd 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm.c
@@ -12,13 +12,17 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/power_supply.h>
 #include <linux/proc_fs.h>
+#include <linux/property.h>
 #include <linux/sched/clock.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/usb/pd.h>
+#include <linux/usb/pd_ado.h>
 #include <linux/usb/pd_bdo.h>
+#include <linux/usb/pd_ext_sdb.h>
 #include <linux/usb/pd_vdo.h>
 #include <linux/usb/role.h>
 #include <linux/usb/tcpm.h>
@@ -48,6 +52,7 @@
 	S(SNK_DISCOVERY_DEBOUNCE_DONE),		\
 	S(SNK_WAIT_CAPABILITIES),		\
 	S(SNK_NEGOTIATE_CAPABILITIES),		\
+	S(SNK_NEGOTIATE_PPS_CAPABILITIES),	\
 	S(SNK_TRANSITION_SINK),			\
 	S(SNK_TRANSITION_SINK_VBUS),		\
 	S(SNK_READY),				\
@@ -112,6 +117,11 @@
 	S(SNK_TRYWAIT_VBUS),			\
 	S(BIST_RX),				\
 						\
+	S(GET_STATUS_SEND),			\
+	S(GET_STATUS_SEND_TIMEOUT),		\
+	S(GET_PPS_STATUS_SEND),			\
+	S(GET_PPS_STATUS_SEND_TIMEOUT),		\
+						\
 	S(ERROR_RECOVERY),			\
 	S(PORT_RESET),				\
 	S(PORT_RESET_WAIT_OFF)
@@ -142,6 +152,7 @@
 	PD_MSG_NONE = 0,
 	PD_MSG_CTRL_REJECT,
 	PD_MSG_CTRL_WAIT,
+	PD_MSG_CTRL_NOT_SUPP,
 	PD_MSG_DATA_SINK_CAP,
 	PD_MSG_DATA_SOURCE_CAP,
 };
@@ -167,6 +178,16 @@
 	struct typec_altmode_desc altmode_desc[SVID_DISCOVERY_MAX];
 };
 
+struct pd_pps_data {
+	u32 min_volt;
+	u32 max_volt;
+	u32 max_curr;
+	u32 out_volt;
+	u32 op_curr;
+	bool supported;
+	bool active;
+};
+
 struct tcpm_port {
 	struct device *dev;
 
@@ -235,6 +256,7 @@
 	struct completion swap_complete;
 	int swap_status;
 
+	unsigned int negotiated_rev;
 	unsigned int message_id;
 	unsigned int caps_count;
 	unsigned int hard_reset_count;
@@ -257,15 +279,18 @@
 	u32 snk_vdo[VDO_MAX_OBJECTS];
 	unsigned int nr_snk_vdo;
 
-	unsigned int max_snk_mv;
-	unsigned int max_snk_ma;
-	unsigned int max_snk_mw;
 	unsigned int operating_snk_mw;
+	bool update_sink_caps;
 
 	/* Requested current / voltage */
 	u32 current_limit;
 	u32 supply_voltage;
 
+	/* Used to export TA voltage and current */
+	struct power_supply *psy;
+	struct power_supply_desc psy_desc;
+	enum power_supply_usb_type usb_type;
+
 	u32 bist_request;
 
 	/* PD state for Vendor Defined Messages */
@@ -277,8 +302,13 @@
 	/* VDO to retry if UFP responder replied busy */
 	u32 vdo_retry;
 
-	/* Alternate mode data */
+	/* PPS */
+	struct pd_pps_data pps_data;
+	struct completion pps_complete;
+	bool pps_pending;
+	int pps_status;
 
+	/* Alternate mode data */
 	struct pd_mode_data mode_data;
 	struct typec_altmode *partner_altmode[SVID_DISCOVERY_MAX];
 	struct typec_altmode *port_altmode[SVID_DISCOVERY_MAX];
@@ -496,6 +526,16 @@
 				  pdo_max_voltage(pdo),
 				  pdo_max_power(pdo));
 			break;
+		case PDO_TYPE_APDO:
+			if (pdo_apdo_type(pdo) == APDO_TYPE_PPS)
+				scnprintf(msg, sizeof(msg),
+					  "%u-%u mV, %u mA",
+					  pdo_pps_apdo_min_voltage(pdo),
+					  pdo_pps_apdo_max_voltage(pdo),
+					  pdo_pps_apdo_max_current(pdo));
+			else
+				strcpy(msg, "undefined APDO");
+			break;
 		default:
 			strcpy(msg, "undefined");
 			break;
@@ -526,21 +566,16 @@
 
 static struct dentry *rootdir;
 
-static int tcpm_debugfs_init(struct tcpm_port *port)
+static void tcpm_debugfs_init(struct tcpm_port *port)
 {
 	mutex_init(&port->logbuffer_lock);
 	/* /sys/kernel/debug/tcpm/usbcX */
-	if (!rootdir) {
+	if (!rootdir)
 		rootdir = debugfs_create_dir("tcpm", NULL);
-		if (!rootdir)
-			return -ENOMEM;
-	}
 
 	port->dentry = debugfs_create_file(dev_name(port->dev),
 					   S_IFREG | 0444, rootdir,
 					   port, &tcpm_debug_fops);
-
-	return 0;
 }
 
 static void tcpm_debugfs_exit(struct tcpm_port *port)
@@ -555,7 +590,7 @@
 __printf(2, 3)
 static void tcpm_log_force(struct tcpm_port *port, const char *fmt, ...) { }
 static void tcpm_log_source_caps(struct tcpm_port *port) { }
-static int tcpm_debugfs_init(const struct tcpm_port *port) { return 0; }
+static void tcpm_debugfs_init(const struct tcpm_port *port) { }
 static void tcpm_debugfs_exit(const struct tcpm_port *port) { }
 
 #endif
@@ -793,11 +828,13 @@
 		msg.header = PD_HEADER_LE(PD_CTRL_REJECT,
 					  port->pwr_role,
 					  port->data_role,
+					  port->negotiated_rev,
 					  port->message_id, 0);
 	} else {
 		msg.header = PD_HEADER_LE(PD_DATA_SOURCE_CAP,
 					  port->pwr_role,
 					  port->data_role,
+					  port->negotiated_rev,
 					  port->message_id,
 					  port->nr_src_pdo);
 	}
@@ -818,11 +855,13 @@
 		msg.header = PD_HEADER_LE(PD_CTRL_REJECT,
 					  port->pwr_role,
 					  port->data_role,
+					  port->negotiated_rev,
 					  port->message_id, 0);
 	} else {
 		msg.header = PD_HEADER_LE(PD_DATA_SINK_CAP,
 					  port->pwr_role,
 					  port->data_role,
+					  port->negotiated_rev,
 					  port->message_id,
 					  port->nr_snk_pdo);
 	}
@@ -1189,6 +1228,7 @@
 		msg.header = PD_HEADER_LE(PD_DATA_VENDOR_DEF,
 					  port->pwr_role,
 					  port->data_role,
+					  port->negotiated_rev,
 					  port->message_id, port->vdo_count);
 		for (i = 0; i < port->vdo_count; i++)
 			msg.payload[i] = cpu_to_le32(port->vdo_data[i]);
@@ -1260,6 +1300,8 @@
 	PDO_ERR_FIXED_NOT_SORTED,
 	PDO_ERR_VARIABLE_BATT_NOT_SORTED,
 	PDO_ERR_DUPE_PDO,
+	PDO_ERR_PPS_APDO_NOT_SORTED,
+	PDO_ERR_DUPE_PPS_APDO,
 };
 
 static const char * const pdo_err_msg[] = {
@@ -1275,6 +1317,10 @@
 	" err: Variable/Battery supply pdos should be in increasing order of their minimum voltage",
 	[PDO_ERR_DUPE_PDO] =
 	" err: Variable/Batt supply pdos cannot have same min/max voltage",
+	[PDO_ERR_PPS_APDO_NOT_SORTED] =
+	" err: Programmable power supply apdos should be in increasing order of their maximum voltage",
+	[PDO_ERR_DUPE_PPS_APDO] =
+	" err: Programmable power supply apdos cannot have same min/max voltage and max current",
 };
 
 static enum pdo_err tcpm_caps_err(struct tcpm_port *port, const u32 *pdo,
@@ -1324,6 +1370,26 @@
 					  pdo_min_voltage(pdo[i - 1])))
 					return PDO_ERR_DUPE_PDO;
 				break;
+			/*
+			 * The Programmable Power Supply APDOs, if present,
+			 * shall be sent in Maximum Voltage order;
+			 * lowest to highest.
+			 */
+			case PDO_TYPE_APDO:
+				if (pdo_apdo_type(pdo[i]) != APDO_TYPE_PPS)
+					break;
+
+				if (pdo_pps_apdo_max_current(pdo[i]) <
+				    pdo_pps_apdo_max_current(pdo[i - 1]))
+					return PDO_ERR_PPS_APDO_NOT_SORTED;
+				else if (pdo_pps_apdo_min_voltage(pdo[i]) ==
+					  pdo_pps_apdo_min_voltage(pdo[i - 1]) &&
+					 pdo_pps_apdo_max_voltage(pdo[i]) ==
+					  pdo_pps_apdo_max_voltage(pdo[i - 1]) &&
+					 pdo_pps_apdo_max_current(pdo[i]) ==
+					  pdo_pps_apdo_max_current(pdo[i - 1]))
+					return PDO_ERR_DUPE_PPS_APDO;
+				break;
 			default:
 				tcpm_log_force(port, " Unknown pdo type");
 			}
@@ -1349,11 +1415,48 @@
 /*
  * PD (data, control) command handling functions
  */
+static inline enum tcpm_state ready_state(struct tcpm_port *port)
+{
+	if (port->pwr_role == TYPEC_SOURCE)
+		return SRC_READY;
+	else
+		return SNK_READY;
+}
+
+static int tcpm_pd_send_control(struct tcpm_port *port,
+				enum pd_ctrl_msg_type type);
+
+static void tcpm_handle_alert(struct tcpm_port *port, const __le32 *payload,
+			      int cnt)
+{
+	u32 p0 = le32_to_cpu(payload[0]);
+	unsigned int type = usb_pd_ado_type(p0);
+
+	if (!type) {
+		tcpm_log(port, "Alert message received with no type");
+		return;
+	}
+
+	/* Just handling non-battery alerts for now */
+	if (!(type & USB_PD_ADO_TYPE_BATT_STATUS_CHANGE)) {
+		switch (port->state) {
+		case SRC_READY:
+		case SNK_READY:
+			tcpm_set_state(port, GET_STATUS_SEND, 0);
+			break;
+		default:
+			tcpm_queue_message(port, PD_MSG_CTRL_WAIT);
+			break;
+		}
+	}
+}
+
 static void tcpm_pd_data_request(struct tcpm_port *port,
 				 const struct pd_message *msg)
 {
 	enum pd_data_msg_type type = pd_header_type_le(msg->header);
 	unsigned int cnt = pd_header_cnt_le(msg->header);
+	unsigned int rev = pd_header_rev_le(msg->header);
 	unsigned int i;
 
 	switch (type) {
@@ -1372,6 +1475,17 @@
 				   port->nr_source_caps);
 
 		/*
+		 * Adjust revision in subsequent message headers, as required,
+		 * to comply with 6.2.1.1.5 of the USB PD 3.0 spec. We don't
+		 * support Rev 1.0 so just do nothing in that scenario.
+		 */
+		if (rev == PD_REV10)
+			break;
+
+		if (rev < PD_MAX_REV)
+			port->negotiated_rev = rev;
+
+		/*
 		 * This message may be received even if VBUS is not
 		 * present. This is quite unexpected; see USB PD
 		 * specification, sections 8.3.3.6.3.1 and 8.3.3.6.3.2.
@@ -1392,6 +1506,20 @@
 			tcpm_queue_message(port, PD_MSG_CTRL_REJECT);
 			break;
 		}
+
+		/*
+		 * Adjust revision in subsequent message headers, as required,
+		 * to comply with 6.2.1.1.5 of the USB PD 3.0 spec. We don't
+		 * support Rev 1.0 so just reject in that scenario.
+		 */
+		if (rev == PD_REV10) {
+			tcpm_queue_message(port, PD_MSG_CTRL_REJECT);
+			break;
+		}
+
+		if (rev < PD_MAX_REV)
+			port->negotiated_rev = rev;
+
 		port->sink_request = le32_to_cpu(msg->payload[0]);
 		tcpm_set_state(port, SRC_NEGOTIATE_CAPABILITIES, 0);
 		break;
@@ -1410,12 +1538,29 @@
 			tcpm_set_state(port, BIST_RX, 0);
 		}
 		break;
+	case PD_DATA_ALERT:
+		tcpm_handle_alert(port, msg->payload, cnt);
+		break;
+	case PD_DATA_BATT_STATUS:
+	case PD_DATA_GET_COUNTRY_INFO:
+		/* Currently unsupported */
+		tcpm_queue_message(port, PD_MSG_CTRL_NOT_SUPP);
+		break;
 	default:
 		tcpm_log(port, "Unhandled data message type %#x", type);
 		break;
 	}
 }
 
+static void tcpm_pps_complete(struct tcpm_port *port, int result)
+{
+	if (port->pps_pending) {
+		port->pps_status = result;
+		port->pps_pending = false;
+		complete(&port->pps_complete);
+	}
+}
+
 static void tcpm_pd_ctrl_request(struct tcpm_port *port,
 				 const struct pd_message *msg)
 {
@@ -1483,6 +1628,7 @@
 		break;
 	case PD_CTRL_REJECT:
 	case PD_CTRL_WAIT:
+	case PD_CTRL_NOT_SUPP:
 		switch (port->state) {
 		case SNK_NEGOTIATE_CAPABILITIES:
 			/* USB PD specification, Figure 8-43 */
@@ -1492,6 +1638,14 @@
 				next_state = SNK_WAIT_CAPABILITIES;
 			tcpm_set_state(port, next_state, 0);
 			break;
+		case SNK_NEGOTIATE_PPS_CAPABILITIES:
+			/* Revert data back from any requested PPS updates */
+			port->pps_data.out_volt = port->supply_voltage;
+			port->pps_data.op_curr = port->current_limit;
+			port->pps_status = (type == PD_CTRL_WAIT ?
+					    -EAGAIN : -EOPNOTSUPP);
+			tcpm_set_state(port, SNK_READY, 0);
+			break;
 		case DR_SWAP_SEND:
 			port->swap_status = (type == PD_CTRL_WAIT ?
 					     -EAGAIN : -EOPNOTSUPP);
@@ -1514,6 +1668,13 @@
 	case PD_CTRL_ACCEPT:
 		switch (port->state) {
 		case SNK_NEGOTIATE_CAPABILITIES:
+			port->pps_data.active = false;
+			tcpm_set_state(port, SNK_TRANSITION_SINK, 0);
+			break;
+		case SNK_NEGOTIATE_PPS_CAPABILITIES:
+			port->pps_data.active = true;
+			port->supply_voltage = port->pps_data.out_volt;
+			port->current_limit = port->pps_data.op_curr;
 			tcpm_set_state(port, SNK_TRANSITION_SINK, 0);
 			break;
 		case SOFT_RESET_SEND:
@@ -1587,12 +1748,75 @@
 			break;
 		}
 		break;
+	case PD_CTRL_GET_SOURCE_CAP_EXT:
+	case PD_CTRL_GET_STATUS:
+	case PD_CTRL_FR_SWAP:
+	case PD_CTRL_GET_PPS_STATUS:
+	case PD_CTRL_GET_COUNTRY_CODES:
+		/* Currently not supported */
+		tcpm_queue_message(port, PD_MSG_CTRL_NOT_SUPP);
+		break;
 	default:
 		tcpm_log(port, "Unhandled ctrl message type %#x", type);
 		break;
 	}
 }
 
+static void tcpm_pd_ext_msg_request(struct tcpm_port *port,
+				    const struct pd_message *msg)
+{
+	enum pd_ext_msg_type type = pd_header_type_le(msg->header);
+	unsigned int data_size = pd_ext_header_data_size_le(msg->ext_msg.header);
+
+	if (!(msg->ext_msg.header & PD_EXT_HDR_CHUNKED)) {
+		tcpm_log(port, "Unchunked extended messages unsupported");
+		return;
+	}
+
+	if (data_size > PD_EXT_MAX_CHUNK_DATA) {
+		tcpm_log(port, "Chunk handling not yet supported");
+		return;
+	}
+
+	switch (type) {
+	case PD_EXT_STATUS:
+		/*
+		 * If PPS related events raised then get PPS status to clear
+		 * (see USB PD 3.0 Spec, 6.5.2.4)
+		 */
+		if (msg->ext_msg.data[USB_PD_EXT_SDB_EVENT_FLAGS] &
+		    USB_PD_EXT_SDB_PPS_EVENTS)
+			tcpm_set_state(port, GET_PPS_STATUS_SEND, 0);
+		else
+			tcpm_set_state(port, ready_state(port), 0);
+		break;
+	case PD_EXT_PPS_STATUS:
+		/*
+		 * For now the PPS status message is used to clear events
+		 * and nothing more.
+		 */
+		tcpm_set_state(port, ready_state(port), 0);
+		break;
+	case PD_EXT_SOURCE_CAP_EXT:
+	case PD_EXT_GET_BATT_CAP:
+	case PD_EXT_GET_BATT_STATUS:
+	case PD_EXT_BATT_CAP:
+	case PD_EXT_GET_MANUFACTURER_INFO:
+	case PD_EXT_MANUFACTURER_INFO:
+	case PD_EXT_SECURITY_REQUEST:
+	case PD_EXT_SECURITY_RESPONSE:
+	case PD_EXT_FW_UPDATE_REQUEST:
+	case PD_EXT_FW_UPDATE_RESPONSE:
+	case PD_EXT_COUNTRY_INFO:
+	case PD_EXT_COUNTRY_CODES:
+		tcpm_queue_message(port, PD_MSG_CTRL_NOT_SUPP);
+		break;
+	default:
+		tcpm_log(port, "Unhandled extended message type %#x", type);
+		break;
+	}
+}
+
 static void tcpm_pd_rx_handler(struct work_struct *work)
 {
 	struct pd_rx_event *event = container_of(work,
@@ -1633,7 +1857,9 @@
 				 "Data role mismatch, initiating error recovery");
 			tcpm_set_state(port, ERROR_RECOVERY, 0);
 		} else {
-			if (cnt)
+			if (msg->header & PD_HEADER_EXT_HDR)
+				tcpm_pd_ext_msg_request(port, msg);
+			else if (cnt)
 				tcpm_pd_data_request(port, msg);
 			else
 				tcpm_pd_ctrl_request(port, msg);
@@ -1668,6 +1894,7 @@
 	memset(&msg, 0, sizeof(msg));
 	msg.header = PD_HEADER_LE(type, port->pwr_role,
 				  port->data_role,
+				  port->negotiated_rev,
 				  port->message_id, 0);
 
 	return tcpm_pd_transmit(port, TCPC_TX_SOP, &msg);
@@ -1693,6 +1920,9 @@
 		case PD_MSG_CTRL_REJECT:
 			tcpm_pd_send_control(port, PD_CTRL_REJECT);
 			break;
+		case PD_MSG_CTRL_NOT_SUPP:
+			tcpm_pd_send_control(port, PD_CTRL_NOT_SUPP);
+			break;
 		case PD_MSG_DATA_SINK_CAP:
 			tcpm_pd_send_sink_caps(port);
 			break;
@@ -1772,84 +2002,254 @@
 	return 0;
 }
 
-static int tcpm_pd_select_pdo(struct tcpm_port *port)
+#define min_power(x, y) min(pdo_max_power(x), pdo_max_power(y))
+#define min_current(x, y) min(pdo_max_current(x), pdo_max_current(y))
+
+static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo,
+			      int *src_pdo)
 {
-	unsigned int i, max_mw = 0, max_mv = 0;
+	unsigned int i, j, max_src_mv = 0, min_src_mv = 0, max_mw = 0,
+		     max_mv = 0, src_mw = 0, src_ma = 0, max_snk_mv = 0,
+		     min_snk_mv = 0;
 	int ret = -EINVAL;
 
+	port->pps_data.supported = false;
+	port->usb_type = POWER_SUPPLY_USB_TYPE_PD;
+
 	/*
-	 * Select the source PDO providing the most power while staying within
-	 * the board's voltage limits. Prefer PDO providing exp
+	 * Select the source PDO providing the most power which has a
+	 * matchig sink cap.
 	 */
 	for (i = 0; i < port->nr_source_caps; i++) {
 		u32 pdo = port->source_caps[i];
 		enum pd_pdo_type type = pdo_type(pdo);
-		unsigned int mv, ma, mw;
 
-		if (type == PDO_TYPE_FIXED)
-			mv = pdo_fixed_voltage(pdo);
-		else
-			mv = pdo_min_voltage(pdo);
-
-		if (type == PDO_TYPE_BATT) {
-			mw = pdo_max_power(pdo);
-		} else {
-			ma = min(pdo_max_current(pdo),
-				 port->max_snk_ma);
-			mw = ma * mv / 1000;
+		switch (type) {
+		case PDO_TYPE_FIXED:
+			max_src_mv = pdo_fixed_voltage(pdo);
+			min_src_mv = max_src_mv;
+			break;
+		case PDO_TYPE_BATT:
+		case PDO_TYPE_VAR:
+			max_src_mv = pdo_max_voltage(pdo);
+			min_src_mv = pdo_min_voltage(pdo);
+			break;
+		case PDO_TYPE_APDO:
+			if (pdo_apdo_type(pdo) == APDO_TYPE_PPS) {
+				port->pps_data.supported = true;
+				port->usb_type =
+					POWER_SUPPLY_USB_TYPE_PD_PPS;
+			}
+			continue;
+		default:
+			tcpm_log(port, "Invalid source PDO type, ignoring");
+			continue;
 		}
 
-		/* Perfer higher voltages if available */
-		if ((mw > max_mw || (mw == max_mw && mv > max_mv)) &&
-		    mv <= port->max_snk_mv) {
-			ret = i;
-			max_mw = mw;
-			max_mv = mv;
+		switch (type) {
+		case PDO_TYPE_FIXED:
+		case PDO_TYPE_VAR:
+			src_ma = pdo_max_current(pdo);
+			src_mw = src_ma * min_src_mv / 1000;
+			break;
+		case PDO_TYPE_BATT:
+			src_mw = pdo_max_power(pdo);
+			break;
+		case PDO_TYPE_APDO:
+			continue;
+		default:
+			tcpm_log(port, "Invalid source PDO type, ignoring");
+			continue;
+		}
+
+		for (j = 0; j < port->nr_snk_pdo; j++) {
+			pdo = port->snk_pdo[j];
+
+			switch (pdo_type(pdo)) {
+			case PDO_TYPE_FIXED:
+				max_snk_mv = pdo_fixed_voltage(pdo);
+				min_snk_mv = max_snk_mv;
+				break;
+			case PDO_TYPE_BATT:
+			case PDO_TYPE_VAR:
+				max_snk_mv = pdo_max_voltage(pdo);
+				min_snk_mv = pdo_min_voltage(pdo);
+				break;
+			case PDO_TYPE_APDO:
+				continue;
+			default:
+				tcpm_log(port, "Invalid sink PDO type, ignoring");
+				continue;
+			}
+
+			if (max_src_mv <= max_snk_mv &&
+				min_src_mv >= min_snk_mv) {
+				/* Prefer higher voltages if available */
+				if ((src_mw == max_mw && min_src_mv > max_mv) ||
+							src_mw > max_mw) {
+					*src_pdo = i;
+					*sink_pdo = j;
+					max_mw = src_mw;
+					max_mv = min_src_mv;
+					ret = 0;
+				}
+			}
 		}
 	}
 
 	return ret;
 }
 
+#define min_pps_apdo_current(x, y)	\
+	min(pdo_pps_apdo_max_current(x), pdo_pps_apdo_max_current(y))
+
+static unsigned int tcpm_pd_select_pps_apdo(struct tcpm_port *port)
+{
+	unsigned int i, j, max_mw = 0, max_mv = 0;
+	unsigned int min_src_mv, max_src_mv, src_ma, src_mw;
+	unsigned int min_snk_mv, max_snk_mv, snk_ma;
+	u32 pdo;
+	unsigned int src_pdo = 0, snk_pdo = 0;
+
+	/*
+	 * Select the source PPS APDO providing the most power while staying
+	 * within the board's limits. We skip the first PDO as this is always
+	 * 5V 3A.
+	 */
+	for (i = 1; i < port->nr_source_caps; ++i) {
+		pdo = port->source_caps[i];
+
+		switch (pdo_type(pdo)) {
+		case PDO_TYPE_APDO:
+			if (pdo_apdo_type(pdo) != APDO_TYPE_PPS) {
+				tcpm_log(port, "Not PPS APDO (source), ignoring");
+				continue;
+			}
+
+			min_src_mv = pdo_pps_apdo_min_voltage(pdo);
+			max_src_mv = pdo_pps_apdo_max_voltage(pdo);
+			src_ma = pdo_pps_apdo_max_current(pdo);
+			src_mw = (src_ma * max_src_mv) / 1000;
+
+			/*
+			 * Now search through the sink PDOs to find a matching
+			 * PPS APDO. Again skip the first sink PDO as this will
+			 * always be 5V 3A.
+			 */
+			for (j = i; j < port->nr_snk_pdo; j++) {
+				pdo = port->snk_pdo[j];
+
+				switch (pdo_type(pdo)) {
+				case PDO_TYPE_APDO:
+					if (pdo_apdo_type(pdo) != APDO_TYPE_PPS) {
+						tcpm_log(port,
+							 "Not PPS APDO (sink), ignoring");
+						continue;
+					}
+
+					min_snk_mv =
+						pdo_pps_apdo_min_voltage(pdo);
+					max_snk_mv =
+						pdo_pps_apdo_max_voltage(pdo);
+					snk_ma =
+						pdo_pps_apdo_max_current(pdo);
+					break;
+				default:
+					tcpm_log(port,
+						 "Not APDO type (sink), ignoring");
+					continue;
+				}
+
+				if (max_src_mv <= max_snk_mv &&
+				    min_src_mv >= min_snk_mv) {
+					/* Prefer higher voltages if available */
+					if ((src_mw == max_mw &&
+					     min_src_mv > max_mv) ||
+					    src_mw > max_mw) {
+						src_pdo = i;
+						snk_pdo = j;
+						max_mw = src_mw;
+						max_mv = max_src_mv;
+					}
+				}
+			}
+
+			break;
+		default:
+			tcpm_log(port, "Not APDO type (source), ignoring");
+			continue;
+		}
+	}
+
+	if (src_pdo) {
+		pdo = port->source_caps[src_pdo];
+
+		port->pps_data.min_volt = pdo_pps_apdo_min_voltage(pdo);
+		port->pps_data.max_volt = pdo_pps_apdo_max_voltage(pdo);
+		port->pps_data.max_curr =
+			min_pps_apdo_current(pdo, port->snk_pdo[snk_pdo]);
+		port->pps_data.out_volt =
+			min(pdo_pps_apdo_max_voltage(pdo), port->pps_data.out_volt);
+		port->pps_data.op_curr =
+			min(port->pps_data.max_curr, port->pps_data.op_curr);
+	}
+
+	return src_pdo;
+}
+
 static int tcpm_pd_build_request(struct tcpm_port *port, u32 *rdo)
 {
 	unsigned int mv, ma, mw, flags;
 	unsigned int max_ma, max_mw;
 	enum pd_pdo_type type;
-	int index;
-	u32 pdo;
+	u32 pdo, matching_snk_pdo;
+	int src_pdo_index = 0;
+	int snk_pdo_index = 0;
+	int ret;
 
-	index = tcpm_pd_select_pdo(port);
-	if (index < 0)
-		return -EINVAL;
-	pdo = port->source_caps[index];
+	ret = tcpm_pd_select_pdo(port, &snk_pdo_index, &src_pdo_index);
+	if (ret < 0)
+		return ret;
+
+	pdo = port->source_caps[src_pdo_index];
+	matching_snk_pdo = port->snk_pdo[snk_pdo_index];
 	type = pdo_type(pdo);
 
-	if (type == PDO_TYPE_FIXED)
+	switch (type) {
+	case PDO_TYPE_FIXED:
 		mv = pdo_fixed_voltage(pdo);
-	else
+		break;
+	case PDO_TYPE_BATT:
+	case PDO_TYPE_VAR:
 		mv = pdo_min_voltage(pdo);
-
-	/* Select maximum available current within the board's power limit */
-	if (type == PDO_TYPE_BATT) {
-		mw = pdo_max_power(pdo);
-		ma = 1000 * min(mw, port->max_snk_mw) / mv;
-	} else {
-		ma = min(pdo_max_current(pdo),
-			 1000 * port->max_snk_mw / mv);
+		break;
+	default:
+		tcpm_log(port, "Invalid PDO selected!");
+		return -EINVAL;
 	}
-	ma = min(ma, port->max_snk_ma);
+
+	/* Select maximum available current within the sink pdo's limit */
+	if (type == PDO_TYPE_BATT) {
+		mw = min_power(pdo, matching_snk_pdo);
+		ma = 1000 * mw / mv;
+	} else {
+		ma = min_current(pdo, matching_snk_pdo);
+		mw = ma * mv / 1000;
+	}
 
 	flags = RDO_USB_COMM | RDO_NO_SUSPEND;
 
 	/* Set mismatch bit if offered power is less than operating power */
-	mw = ma * mv / 1000;
 	max_ma = ma;
 	max_mw = mw;
 	if (mw < port->operating_snk_mw) {
 		flags |= RDO_CAP_MISMATCH;
-		max_mw = port->operating_snk_mw;
-		max_ma = max_mw * 1000 / mv;
+		if (type == PDO_TYPE_BATT &&
+		    (pdo_max_power(matching_snk_pdo) > pdo_max_power(pdo)))
+			max_mw = pdo_max_power(matching_snk_pdo);
+		else if (pdo_max_current(matching_snk_pdo) >
+			 pdo_max_current(pdo))
+			max_ma = pdo_max_current(matching_snk_pdo);
 	}
 
 	tcpm_log(port, "cc=%d cc1=%d cc2=%d vbus=%d vconn=%s polarity=%d",
@@ -1858,16 +2258,16 @@
 		 port->polarity);
 
 	if (type == PDO_TYPE_BATT) {
-		*rdo = RDO_BATT(index + 1, mw, max_mw, flags);
+		*rdo = RDO_BATT(src_pdo_index + 1, mw, max_mw, flags);
 
 		tcpm_log(port, "Requesting PDO %d: %u mV, %u mW%s",
-			 index, mv, mw,
+			 src_pdo_index, mv, mw,
 			 flags & RDO_CAP_MISMATCH ? " [mismatch]" : "");
 	} else {
-		*rdo = RDO_FIXED(index + 1, ma, max_ma, flags);
+		*rdo = RDO_FIXED(src_pdo_index + 1, ma, max_ma, flags);
 
 		tcpm_log(port, "Requesting PDO %d: %u mV, %u mA%s",
-			 index, mv, ma,
+			 src_pdo_index, mv, ma,
 			 flags & RDO_CAP_MISMATCH ? " [mismatch]" : "");
 	}
 
@@ -1891,6 +2291,105 @@
 	msg.header = PD_HEADER_LE(PD_DATA_REQUEST,
 				  port->pwr_role,
 				  port->data_role,
+				  port->negotiated_rev,
+				  port->message_id, 1);
+	msg.payload[0] = cpu_to_le32(rdo);
+
+	return tcpm_pd_transmit(port, TCPC_TX_SOP, &msg);
+}
+
+static int tcpm_pd_build_pps_request(struct tcpm_port *port, u32 *rdo)
+{
+	unsigned int out_mv, op_ma, op_mw, min_mv, max_mv, max_ma, flags;
+	enum pd_pdo_type type;
+	unsigned int src_pdo_index;
+	u32 pdo;
+
+	src_pdo_index = tcpm_pd_select_pps_apdo(port);
+	if (!src_pdo_index)
+		return -EOPNOTSUPP;
+
+	pdo = port->source_caps[src_pdo_index];
+	type = pdo_type(pdo);
+
+	switch (type) {
+	case PDO_TYPE_APDO:
+		if (pdo_apdo_type(pdo) != APDO_TYPE_PPS) {
+			tcpm_log(port, "Invalid APDO selected!");
+			return -EINVAL;
+		}
+		min_mv = port->pps_data.min_volt;
+		max_mv = port->pps_data.max_volt;
+		max_ma = port->pps_data.max_curr;
+		out_mv = port->pps_data.out_volt;
+		op_ma = port->pps_data.op_curr;
+		break;
+	default:
+		tcpm_log(port, "Invalid PDO selected!");
+		return -EINVAL;
+	}
+
+	flags = RDO_USB_COMM | RDO_NO_SUSPEND;
+
+	op_mw = (op_ma * out_mv) / 1000;
+	if (op_mw < port->operating_snk_mw) {
+		/*
+		 * Try raising current to meet power needs. If that's not enough
+		 * then try upping the voltage. If that's still not enough
+		 * then we've obviously chosen a PPS APDO which really isn't
+		 * suitable so abandon ship.
+		 */
+		op_ma = (port->operating_snk_mw * 1000) / out_mv;
+		if ((port->operating_snk_mw * 1000) % out_mv)
+			++op_ma;
+		op_ma += RDO_PROG_CURR_MA_STEP - (op_ma % RDO_PROG_CURR_MA_STEP);
+
+		if (op_ma > max_ma) {
+			op_ma = max_ma;
+			out_mv = (port->operating_snk_mw * 1000) / op_ma;
+			if ((port->operating_snk_mw * 1000) % op_ma)
+				++out_mv;
+			out_mv += RDO_PROG_VOLT_MV_STEP -
+				  (out_mv % RDO_PROG_VOLT_MV_STEP);
+
+			if (out_mv > max_mv) {
+				tcpm_log(port, "Invalid PPS APDO selected!");
+				return -EINVAL;
+			}
+		}
+	}
+
+	tcpm_log(port, "cc=%d cc1=%d cc2=%d vbus=%d vconn=%s polarity=%d",
+		 port->cc_req, port->cc1, port->cc2, port->vbus_source,
+		 port->vconn_role == TYPEC_SOURCE ? "source" : "sink",
+		 port->polarity);
+
+	*rdo = RDO_PROG(src_pdo_index + 1, out_mv, op_ma, flags);
+
+	tcpm_log(port, "Requesting APDO %d: %u mV, %u mA",
+		 src_pdo_index, out_mv, op_ma);
+
+	port->pps_data.op_curr = op_ma;
+	port->pps_data.out_volt = out_mv;
+
+	return 0;
+}
+
+static int tcpm_pd_send_pps_request(struct tcpm_port *port)
+{
+	struct pd_message msg;
+	int ret;
+	u32 rdo;
+
+	ret = tcpm_pd_build_pps_request(port, &rdo);
+	if (ret < 0)
+		return ret;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.header = PD_HEADER_LE(PD_DATA_REQUEST,
+				  port->pwr_role,
+				  port->data_role,
+				  port->negotiated_rev,
 				  port->message_id, 1);
 	msg.payload[0] = cpu_to_le32(rdo);
 
@@ -2077,6 +2576,7 @@
 	tcpm_typec_disconnect(port);
 	port->attached = false;
 	port->pd_capable = false;
+	port->pps_data.supported = false;
 
 	/*
 	 * First Rx ID should be 0; set this to a sentinel of -1 so that
@@ -2094,6 +2594,11 @@
 	tcpm_set_attached_state(port, false);
 	port->try_src_count = 0;
 	port->try_snk_count = 0;
+	port->supply_voltage = 0;
+	port->current_limit = 0;
+	port->usb_type = POWER_SUPPLY_USB_TYPE_C;
+
+	power_supply_changed(port->psy);
 }
 
 static void tcpm_detach(struct tcpm_port *port)
@@ -2181,14 +2686,6 @@
 	return SNK_UNATTACHED;
 }
 
-static inline enum tcpm_state ready_state(struct tcpm_port *port)
-{
-	if (port->pwr_role == TYPEC_SOURCE)
-		return SRC_READY;
-	else
-		return SNK_READY;
-}
-
 static inline enum tcpm_state unattached_state(struct tcpm_port *port)
 {
 	if (port->port_type == TYPEC_PORT_DRP) {
@@ -2338,6 +2835,7 @@
 		typec_set_pwr_opmode(port->typec_port, opmode);
 		port->pwr_opmode = TYPEC_PWR_MODE_USB;
 		port->caps_count = 0;
+		port->negotiated_rev = PD_MAX_REV;
 		port->message_id = 0;
 		port->rx_msgid = -1;
 		port->explicit_contract = false;
@@ -2398,6 +2896,7 @@
 
 		tcpm_swap_complete(port, 0);
 		tcpm_typec_connect(port);
+
 		tcpm_check_send_discover(port);
 		/*
 		 * 6.3.5
@@ -2421,6 +2920,7 @@
 	case SNK_UNATTACHED:
 		if (!port->non_pd_role_swap)
 			tcpm_swap_complete(port, -ENOTCONN);
+		tcpm_pps_complete(port, -ENOTCONN);
 		tcpm_snk_detach(port);
 		if (tcpm_start_drp_toggling(port)) {
 			tcpm_set_state(port, DRP_TOGGLING, 0);
@@ -2510,6 +3010,7 @@
 					      port->cc2 : port->cc1);
 		typec_set_pwr_opmode(port->typec_port, opmode);
 		port->pwr_opmode = TYPEC_PWR_MODE_USB;
+		port->negotiated_rev = PD_MAX_REV;
 		port->message_id = 0;
 		port->rx_msgid = -1;
 		port->explicit_contract = false;
@@ -2580,6 +3081,24 @@
 					    PD_T_SENDER_RESPONSE);
 		}
 		break;
+	case SNK_NEGOTIATE_PPS_CAPABILITIES:
+		ret = tcpm_pd_send_pps_request(port);
+		if (ret < 0) {
+			port->pps_status = ret;
+			/*
+			 * If this was called due to updates to sink
+			 * capabilities, and pps is no longer valid, we should
+			 * safely fall back to a standard PDO.
+			 */
+			if (port->update_sink_caps)
+				tcpm_set_state(port, SNK_NEGOTIATE_CAPABILITIES, 0);
+			else
+				tcpm_set_state(port, SNK_READY, 0);
+		} else {
+			tcpm_set_state_cond(port, hard_reset_state(port),
+					    PD_T_SENDER_RESPONSE);
+		}
+		break;
 	case SNK_TRANSITION_SINK:
 	case SNK_TRANSITION_SINK_VBUS:
 		tcpm_set_state(port, hard_reset_state(port),
@@ -2587,6 +3106,7 @@
 		break;
 	case SNK_READY:
 		port->try_snk_count = 0;
+		port->update_sink_caps = false;
 		if (port->explicit_contract) {
 			typec_set_pwr_opmode(port->typec_port,
 					     TYPEC_PWR_MODE_PD);
@@ -2596,6 +3116,10 @@
 		tcpm_swap_complete(port, 0);
 		tcpm_typec_connect(port);
 		tcpm_check_send_discover(port);
+		tcpm_pps_complete(port, port->pps_status);
+
+		power_supply_changed(port->psy);
+
 		break;
 
 	/* Accessory states */
@@ -2642,6 +3166,7 @@
 		tcpm_set_state(port, SRC_UNATTACHED, PD_T_PS_SOURCE_ON);
 		break;
 	case SNK_HARD_RESET_SINK_OFF:
+		memset(&port->pps_data, 0, sizeof(port->pps_data));
 		tcpm_set_vconn(port, false);
 		tcpm_set_charge(port, false);
 		tcpm_set_roles(port, false, TYPEC_SINK, TYPEC_DEVICE);
@@ -2860,8 +3385,25 @@
 		/* Always switch to unattached state */
 		tcpm_set_state(port, unattached_state(port), 0);
 		break;
+	case GET_STATUS_SEND:
+		tcpm_pd_send_control(port, PD_CTRL_GET_STATUS);
+		tcpm_set_state(port, GET_STATUS_SEND_TIMEOUT,
+			       PD_T_SENDER_RESPONSE);
+		break;
+	case GET_STATUS_SEND_TIMEOUT:
+		tcpm_set_state(port, ready_state(port), 0);
+		break;
+	case GET_PPS_STATUS_SEND:
+		tcpm_pd_send_control(port, PD_CTRL_GET_PPS_STATUS);
+		tcpm_set_state(port, GET_PPS_STATUS_SEND_TIMEOUT,
+			       PD_T_SENDER_RESPONSE);
+		break;
+	case GET_PPS_STATUS_SEND_TIMEOUT:
+		tcpm_set_state(port, ready_state(port), 0);
+		break;
 	case ERROR_RECOVERY:
 		tcpm_swap_complete(port, -EPROTO);
+		tcpm_pps_complete(port, -EPROTO);
 		tcpm_set_state(port, PORT_RESET, 0);
 		break;
 	case PORT_RESET:
@@ -3444,6 +3986,162 @@
 	return ret;
 }
 
+static int tcpm_pps_set_op_curr(struct tcpm_port *port, u16 op_curr)
+{
+	unsigned int target_mw;
+	int ret;
+
+	mutex_lock(&port->swap_lock);
+	mutex_lock(&port->lock);
+
+	if (!port->pps_data.active) {
+		ret = -EOPNOTSUPP;
+		goto port_unlock;
+	}
+
+	if (port->state != SNK_READY) {
+		ret = -EAGAIN;
+		goto port_unlock;
+	}
+
+	if (op_curr > port->pps_data.max_curr) {
+		ret = -EINVAL;
+		goto port_unlock;
+	}
+
+	target_mw = (op_curr * port->pps_data.out_volt) / 1000;
+	if (target_mw < port->operating_snk_mw) {
+		ret = -EINVAL;
+		goto port_unlock;
+	}
+
+	reinit_completion(&port->pps_complete);
+	port->pps_data.op_curr = op_curr;
+	port->pps_status = 0;
+	port->pps_pending = true;
+	tcpm_set_state(port, SNK_NEGOTIATE_PPS_CAPABILITIES, 0);
+	mutex_unlock(&port->lock);
+
+	if (!wait_for_completion_timeout(&port->pps_complete,
+				msecs_to_jiffies(PD_PPS_CTRL_TIMEOUT)))
+		ret = -ETIMEDOUT;
+	else
+		ret = port->pps_status;
+
+	goto swap_unlock;
+
+port_unlock:
+	mutex_unlock(&port->lock);
+swap_unlock:
+	mutex_unlock(&port->swap_lock);
+
+	return ret;
+}
+
+static int tcpm_pps_set_out_volt(struct tcpm_port *port, u16 out_volt)
+{
+	unsigned int target_mw;
+	int ret;
+
+	mutex_lock(&port->swap_lock);
+	mutex_lock(&port->lock);
+
+	if (!port->pps_data.active) {
+		ret = -EOPNOTSUPP;
+		goto port_unlock;
+	}
+
+	if (port->state != SNK_READY) {
+		ret = -EAGAIN;
+		goto port_unlock;
+	}
+
+	if (out_volt < port->pps_data.min_volt ||
+	    out_volt > port->pps_data.max_volt) {
+		ret = -EINVAL;
+		goto port_unlock;
+	}
+
+	target_mw = (port->pps_data.op_curr * out_volt) / 1000;
+	if (target_mw < port->operating_snk_mw) {
+		ret = -EINVAL;
+		goto port_unlock;
+	}
+
+	reinit_completion(&port->pps_complete);
+	port->pps_data.out_volt = out_volt;
+	port->pps_status = 0;
+	port->pps_pending = true;
+	tcpm_set_state(port, SNK_NEGOTIATE_PPS_CAPABILITIES, 0);
+	mutex_unlock(&port->lock);
+
+	if (!wait_for_completion_timeout(&port->pps_complete,
+				msecs_to_jiffies(PD_PPS_CTRL_TIMEOUT)))
+		ret = -ETIMEDOUT;
+	else
+		ret = port->pps_status;
+
+	goto swap_unlock;
+
+port_unlock:
+	mutex_unlock(&port->lock);
+swap_unlock:
+	mutex_unlock(&port->swap_lock);
+
+	return ret;
+}
+
+static int tcpm_pps_activate(struct tcpm_port *port, bool activate)
+{
+	int ret = 0;
+
+	mutex_lock(&port->swap_lock);
+	mutex_lock(&port->lock);
+
+	if (!port->pps_data.supported) {
+		ret = -EOPNOTSUPP;
+		goto port_unlock;
+	}
+
+	/* Trying to deactivate PPS when already deactivated so just bail */
+	if (!port->pps_data.active && !activate)
+		goto port_unlock;
+
+	if (port->state != SNK_READY) {
+		ret = -EAGAIN;
+		goto port_unlock;
+	}
+
+	reinit_completion(&port->pps_complete);
+	port->pps_status = 0;
+	port->pps_pending = true;
+
+	/* Trigger PPS request or move back to standard PDO contract */
+	if (activate) {
+		port->pps_data.out_volt = port->supply_voltage;
+		port->pps_data.op_curr = port->current_limit;
+		tcpm_set_state(port, SNK_NEGOTIATE_PPS_CAPABILITIES, 0);
+	} else {
+		tcpm_set_state(port, SNK_NEGOTIATE_CAPABILITIES, 0);
+	}
+	mutex_unlock(&port->lock);
+
+	if (!wait_for_completion_timeout(&port->pps_complete,
+				msecs_to_jiffies(PD_PPS_CTRL_TIMEOUT)))
+		ret = -ETIMEDOUT;
+	else
+		ret = port->pps_status;
+
+	goto swap_unlock;
+
+port_unlock:
+	mutex_unlock(&port->lock);
+swap_unlock:
+	mutex_unlock(&port->swap_lock);
+
+	return ret;
+}
+
 static void tcpm_init(struct tcpm_port *port)
 {
 	enum typec_cc_status cc1, cc2;
@@ -3569,9 +4267,6 @@
 
 int tcpm_update_sink_capabilities(struct tcpm_port *port, const u32 *pdo,
 				  unsigned int nr_pdo,
-				  unsigned int max_snk_mv,
-				  unsigned int max_snk_ma,
-				  unsigned int max_snk_mw,
 				  unsigned int operating_snk_mw)
 {
 	if (tcpm_validate_caps(port, pdo, nr_pdo))
@@ -3579,17 +4274,19 @@
 
 	mutex_lock(&port->lock);
 	port->nr_snk_pdo = tcpm_copy_pdos(port->snk_pdo, pdo, nr_pdo);
-	port->max_snk_mv = max_snk_mv;
-	port->max_snk_ma = max_snk_ma;
-	port->max_snk_mw = max_snk_mw;
 	port->operating_snk_mw = operating_snk_mw;
+	port->update_sink_caps = true;
 
 	switch (port->state) {
 	case SNK_NEGOTIATE_CAPABILITIES:
+	case SNK_NEGOTIATE_PPS_CAPABILITIES:
 	case SNK_READY:
 	case SNK_TRANSITION_SINK:
 	case SNK_TRANSITION_SINK_VBUS:
-		tcpm_set_state(port, SNK_NEGOTIATE_CAPABILITIES, 0);
+		if (port->pps_data.active)
+			tcpm_set_state(port, SNK_NEGOTIATE_PPS_CAPABILITIES, 0);
+		else
+			tcpm_set_state(port, SNK_NEGOTIATE_CAPABILITIES, 0);
 		break;
 	default:
 		break;
@@ -3599,6 +4296,231 @@
 }
 EXPORT_SYMBOL_GPL(tcpm_update_sink_capabilities);
 
+/* Power Supply access to expose source power information */
+enum tcpm_psy_online_states {
+	TCPM_PSY_OFFLINE = 0,
+	TCPM_PSY_FIXED_ONLINE,
+	TCPM_PSY_PROG_ONLINE,
+};
+
+static enum power_supply_property tcpm_psy_props[] = {
+	POWER_SUPPLY_PROP_USB_TYPE,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_MIN,
+	POWER_SUPPLY_PROP_VOLTAGE_MAX,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_CURRENT_MAX,
+	POWER_SUPPLY_PROP_CURRENT_NOW,
+};
+
+static int tcpm_psy_get_online(struct tcpm_port *port,
+			       union power_supply_propval *val)
+{
+	if (port->vbus_charge) {
+		if (port->pps_data.active)
+			val->intval = TCPM_PSY_PROG_ONLINE;
+		else
+			val->intval = TCPM_PSY_FIXED_ONLINE;
+	} else {
+		val->intval = TCPM_PSY_OFFLINE;
+	}
+
+	return 0;
+}
+
+static int tcpm_psy_get_voltage_min(struct tcpm_port *port,
+				    union power_supply_propval *val)
+{
+	if (port->pps_data.active)
+		val->intval = port->pps_data.min_volt * 1000;
+	else
+		val->intval = port->supply_voltage * 1000;
+
+	return 0;
+}
+
+static int tcpm_psy_get_voltage_max(struct tcpm_port *port,
+				    union power_supply_propval *val)
+{
+	if (port->pps_data.active)
+		val->intval = port->pps_data.max_volt * 1000;
+	else
+		val->intval = port->supply_voltage * 1000;
+
+	return 0;
+}
+
+static int tcpm_psy_get_voltage_now(struct tcpm_port *port,
+				    union power_supply_propval *val)
+{
+	val->intval = port->supply_voltage * 1000;
+
+	return 0;
+}
+
+static int tcpm_psy_get_current_max(struct tcpm_port *port,
+				    union power_supply_propval *val)
+{
+	if (port->pps_data.active)
+		val->intval = port->pps_data.max_curr * 1000;
+	else
+		val->intval = port->current_limit * 1000;
+
+	return 0;
+}
+
+static int tcpm_psy_get_current_now(struct tcpm_port *port,
+				    union power_supply_propval *val)
+{
+	val->intval = port->current_limit * 1000;
+
+	return 0;
+}
+
+static int tcpm_psy_get_prop(struct power_supply *psy,
+			     enum power_supply_property psp,
+			     union power_supply_propval *val)
+{
+	struct tcpm_port *port = power_supply_get_drvdata(psy);
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_USB_TYPE:
+		val->intval = port->usb_type;
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = tcpm_psy_get_online(port, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MIN:
+		ret = tcpm_psy_get_voltage_min(port, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_MAX:
+		ret = tcpm_psy_get_voltage_max(port, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = tcpm_psy_get_voltage_now(port, val);
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_MAX:
+		ret = tcpm_psy_get_current_max(port, val);
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		ret = tcpm_psy_get_current_now(port, val);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int tcpm_psy_set_online(struct tcpm_port *port,
+			       const union power_supply_propval *val)
+{
+	int ret;
+
+	switch (val->intval) {
+	case TCPM_PSY_FIXED_ONLINE:
+		ret = tcpm_pps_activate(port, false);
+		break;
+	case TCPM_PSY_PROG_ONLINE:
+		ret = tcpm_pps_activate(port, true);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int tcpm_psy_set_prop(struct power_supply *psy,
+			     enum power_supply_property psp,
+			     const union power_supply_propval *val)
+{
+	struct tcpm_port *port = power_supply_get_drvdata(psy);
+	int ret;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = tcpm_psy_set_online(port, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		if (val->intval < port->pps_data.min_volt * 1000 ||
+		    val->intval > port->pps_data.max_volt * 1000)
+			ret = -EINVAL;
+		else
+			ret = tcpm_pps_set_out_volt(port, val->intval / 1000);
+		break;
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		if (val->intval > port->pps_data.max_curr * 1000)
+			ret = -EINVAL;
+		else
+			ret = tcpm_pps_set_op_curr(port, val->intval / 1000);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int tcpm_psy_prop_writeable(struct power_supply *psy,
+				   enum power_supply_property psp)
+{
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+	case POWER_SUPPLY_PROP_CURRENT_NOW:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static enum power_supply_usb_type tcpm_psy_usb_types[] = {
+	POWER_SUPPLY_USB_TYPE_C,
+	POWER_SUPPLY_USB_TYPE_PD,
+	POWER_SUPPLY_USB_TYPE_PD_PPS,
+};
+
+static const char *tcpm_psy_name_prefix = "tcpm-source-psy-";
+
+static int devm_tcpm_psy_register(struct tcpm_port *port)
+{
+	struct power_supply_config psy_cfg = {};
+	const char *port_dev_name = dev_name(port->dev);
+	size_t psy_name_len = strlen(tcpm_psy_name_prefix) +
+				     strlen(port_dev_name) + 1;
+	char *psy_name;
+
+	psy_cfg.drv_data = port;
+	psy_cfg.fwnode = dev_fwnode(port->dev);
+	psy_name = devm_kzalloc(port->dev, psy_name_len, GFP_KERNEL);
+	if (!psy_name)
+		return -ENOMEM;
+
+	snprintf(psy_name, psy_name_len, "%s%s", tcpm_psy_name_prefix,
+		 port_dev_name);
+	port->psy_desc.name = psy_name;
+	port->psy_desc.type = POWER_SUPPLY_TYPE_USB,
+	port->psy_desc.usb_types = tcpm_psy_usb_types;
+	port->psy_desc.num_usb_types = ARRAY_SIZE(tcpm_psy_usb_types);
+	port->psy_desc.properties = tcpm_psy_props,
+	port->psy_desc.num_properties = ARRAY_SIZE(tcpm_psy_props),
+	port->psy_desc.get_property = tcpm_psy_get_prop,
+	port->psy_desc.set_property = tcpm_psy_set_prop,
+	port->psy_desc.property_is_writeable = tcpm_psy_prop_writeable,
+
+	port->usb_type = POWER_SUPPLY_USB_TYPE_C;
+
+	port->psy = devm_power_supply_register(port->dev, &port->psy_desc,
+					       &psy_cfg);
+
+	return PTR_ERR_OR_ZERO(port->psy);
+}
+
 struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 {
 	struct tcpm_port *port;
@@ -3631,6 +4553,7 @@
 
 	init_completion(&port->tx_complete);
 	init_completion(&port->swap_complete);
+	init_completion(&port->pps_complete);
 	tcpm_debugfs_init(port);
 
 	if (tcpm_validate_caps(port, tcpc->config->src_pdo,
@@ -3647,9 +4570,6 @@
 	port->nr_snk_vdo = tcpm_copy_vdos(port->snk_vdo, tcpc->config->snk_vdo,
 					  tcpc->config->nr_snk_vdo);
 
-	port->max_snk_mv = tcpc->config->max_snk_mv;
-	port->max_snk_ma = tcpc->config->max_snk_ma;
-	port->max_snk_mw = tcpc->config->max_snk_mw;
 	port->operating_snk_mw = tcpc->config->operating_snk_mw;
 	if (!tcpc->config->try_role_hw)
 		port->try_role = tcpc->config->default_role;
@@ -3660,7 +4580,7 @@
 	port->typec_caps.type = tcpc->config->type;
 	port->typec_caps.data = tcpc->config->data;
 	port->typec_caps.revision = 0x0120;	/* Type-C spec release 1.2 */
-	port->typec_caps.pd_revision = 0x0200;	/* USB-PD spec release 2.0 */
+	port->typec_caps.pd_revision = 0x0300;	/* USB-PD spec release 3.0 */
 	port->typec_caps.dr_set = tcpm_dr_set;
 	port->typec_caps.pr_set = tcpm_pr_set;
 	port->typec_caps.vconn_set = tcpm_vconn_set;
@@ -3676,6 +4596,10 @@
 		goto out_destroy_wq;
 	}
 
+	err = devm_tcpm_psy_register(port);
+	if (err)
+		goto out_destroy_wq;
+
 	port->typec_port = typec_register_port(port->dev, &port->typec_caps);
 	if (IS_ERR(port->typec_port)) {
 		err = PTR_ERR(port->typec_port);
diff --git a/drivers/usb/typec/typec_wcove.c b/drivers/usb/typec/typec_wcove.c
index 19cca7f..423208e 100644
--- a/drivers/usb/typec/typec_wcove.c
+++ b/drivers/usb/typec/typec_wcove.c
@@ -202,6 +202,10 @@
 	struct wcove_typec *wcove = tcpc_to_wcove(tcpc);
 	int ret;
 
+	ret = regmap_write(wcove->regmap, USBC_CONTROL1, 0);
+	if (ret)
+		return ret;
+
 	/* Unmask everything */
 	ret = regmap_write(wcove->regmap, USBC_IRQMASK1, 0);
 	if (ret)
@@ -285,8 +289,30 @@
 
 static int wcove_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc)
 {
-	/* XXX: Relying on the HW FSM to configure things correctly for now */
-	return 0;
+	struct wcove_typec *wcove = tcpc_to_wcove(tcpc);
+	unsigned int ctrl;
+
+	switch (cc) {
+	case TYPEC_CC_RD:
+		ctrl = USBC_CONTROL1_MODE_SNK;
+		break;
+	case TYPEC_CC_RP_DEF:
+		ctrl = USBC_CONTROL1_CURSRC_UA_80 | USBC_CONTROL1_MODE_SRC;
+		break;
+	case TYPEC_CC_RP_1_5:
+		ctrl = USBC_CONTROL1_CURSRC_UA_180 | USBC_CONTROL1_MODE_SRC;
+		break;
+	case TYPEC_CC_RP_3_0:
+		ctrl = USBC_CONTROL1_CURSRC_UA_330 | USBC_CONTROL1_MODE_SRC;
+		break;
+	case TYPEC_CC_OPEN:
+		ctrl = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return regmap_write(wcove->regmap, USBC_CONTROL1, ctrl);
 }
 
 static int wcove_set_polarity(struct tcpc_dev *tcpc, enum typec_cc_polarity pol)
@@ -558,6 +584,7 @@
 static const u32 snk_pdo[] = {
 	PDO_FIXED(5000, 500, PDO_FIXED_DUAL_ROLE | PDO_FIXED_DATA_SWAP |
 		  PDO_FIXED_USB_COMM),
+	PDO_VAR(5000, 12000, 3000),
 };
 
 static struct tcpc_config wcove_typec_config = {
@@ -566,9 +593,6 @@
 	.snk_pdo = snk_pdo,
 	.nr_snk_pdo = ARRAY_SIZE(snk_pdo),
 
-	.max_snk_mv = 12000,
-	.max_snk_ma = 3000,
-	.max_snk_mw = 36000,
 	.operating_snk_mw = 15000,
 
 	.type = TYPEC_PORT_DRP,
diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c
index 4880838..be37aec 100644
--- a/drivers/usb/usbip/vhci_sysfs.c
+++ b/drivers/usb/usbip/vhci_sysfs.c
@@ -10,6 +10,9 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
+/* Hardening for Spectre-v1 */
+#include <linux/nospec.h>
+
 #include "usbip_common.h"
 #include "vhci.h"
 
@@ -205,16 +208,20 @@
 	return 0;
 }
 
-static int valid_port(__u32 pdev_nr, __u32 rhport)
+static int valid_port(__u32 *pdev_nr, __u32 *rhport)
 {
-	if (pdev_nr >= vhci_num_controllers) {
-		pr_err("pdev %u\n", pdev_nr);
+	if (*pdev_nr >= vhci_num_controllers) {
+		pr_err("pdev %u\n", *pdev_nr);
 		return 0;
 	}
-	if (rhport >= VHCI_HC_PORTS) {
-		pr_err("rhport %u\n", rhport);
+	*pdev_nr = array_index_nospec(*pdev_nr, vhci_num_controllers);
+
+	if (*rhport >= VHCI_HC_PORTS) {
+		pr_err("rhport %u\n", *rhport);
 		return 0;
 	}
+	*rhport = array_index_nospec(*rhport, VHCI_HC_PORTS);
+
 	return 1;
 }
 
@@ -232,7 +239,7 @@
 	pdev_nr = port_to_pdev_nr(port);
 	rhport = port_to_rhport(port);
 
-	if (!valid_port(pdev_nr, rhport))
+	if (!valid_port(&pdev_nr, &rhport))
 		return -EINVAL;
 
 	hcd = platform_get_drvdata(vhcis[pdev_nr].pdev);
@@ -258,7 +265,8 @@
 }
 static DEVICE_ATTR_WO(detach);
 
-static int valid_args(__u32 pdev_nr, __u32 rhport, enum usb_device_speed speed)
+static int valid_args(__u32 *pdev_nr, __u32 *rhport,
+		      enum usb_device_speed speed)
 {
 	if (!valid_port(pdev_nr, rhport)) {
 		return 0;
@@ -322,7 +330,7 @@
 			     sockfd, devid, speed);
 
 	/* check received parameters */
-	if (!valid_args(pdev_nr, rhport, speed))
+	if (!valid_args(&pdev_nr, &rhport, speed))
 		return -EINVAL;
 
 	hcd = platform_get_drvdata(vhcis[pdev_nr].pdev);
diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c
index 085700f..2a1be85 100644
--- a/drivers/vfio/virqfd.c
+++ b/drivers/vfio/virqfd.c
@@ -166,7 +166,7 @@
 	init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
 	init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
 
-	events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt);
+	events = vfs_poll(irqfd.file, &virqfd->pt);
 
 	/*
 	 * Check if there was an event already pending on the eventfd
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index f0be5f3..895eaa2 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -208,7 +208,7 @@
 	if (poll->wqh)
 		return 0;
 
-	mask = file->f_op->poll(file, &poll->table);
+	mask = vfs_poll(file, &poll->table);
 	if (mask)
 		vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
 	if (mask & EPOLLERR) {
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index f741ba8..924d073 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -713,19 +713,6 @@
 	.show	= fb_seq_show,
 };
 
-static int proc_fb_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &proc_fb_seq_ops);
-}
-
-static const struct file_operations fb_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= proc_fb_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 /*
  * We hold a reference to the fb_info in file->private_data,
  * but if the current registered fb has changed, we don't
@@ -1877,7 +1864,7 @@
 {
 	int ret;
 
-	if (!proc_create("fb", 0, NULL, &fb_proc_fops))
+	if (!proc_create_seq("fb", 0, NULL, &proc_fb_seq_ops))
 		return -ENOMEM;
 
 	ret = register_chrdev(FB_MAJOR, "fb", &fb_fops);
diff --git a/drivers/video/fbdev/via/viafbdev.c b/drivers/video/fbdev/via/viafbdev.c
index badee04..9b45125 100644
--- a/drivers/video/fbdev/via/viafbdev.c
+++ b/drivers/video/fbdev/via/viafbdev.c
@@ -1475,19 +1475,6 @@
 	return 0;
 }
 
-static int viafb_sup_odev_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, viafb_sup_odev_proc_show, NULL);
-}
-
-static const struct file_operations viafb_sup_odev_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= viafb_sup_odev_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static ssize_t odev_update(const char __user *buffer, size_t count, u32 *odev)
 {
 	char buf[64], *ptr = buf;
@@ -1616,8 +1603,8 @@
 				&viafb_vt1636_proc_fops);
 #endif /* CONFIG_FB_VIA_DIRECT_PROCFS */
 
-		proc_create("supported_output_devices", 0, viafb_entry,
-			&viafb_sup_odev_proc_fops);
+		proc_create_single("supported_output_devices", 0, viafb_entry,
+			viafb_sup_odev_proc_show);
 		iga1_entry = proc_mkdir("iga1", viafb_entry);
 		shared->iga1_proc_entry = iga1_entry;
 		proc_create("output_devices", 0, iga1_entry,
diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c
index 111a0ab..38716eb5 100644
--- a/drivers/video/hdmi.c
+++ b/drivers/video/hdmi.c
@@ -93,6 +93,9 @@
 	if (size < length)
 		return -ENOSPC;
 
+	if (frame->picture_aspect > HDMI_PICTURE_ASPECT_16_9)
+		return -EINVAL;
+
 	memset(buffer, 0, size);
 
 	ptr[0] = frame->type;
diff --git a/drivers/virt/vboxguest/vboxguest_linux.c b/drivers/virt/vboxguest/vboxguest_linux.c
index 398d226..6e2a961 100644
--- a/drivers/virt/vboxguest/vboxguest_linux.c
+++ b/drivers/virt/vboxguest/vboxguest_linux.c
@@ -121,7 +121,9 @@
 	if (!buf)
 		return -ENOMEM;
 
-	if (copy_from_user(buf, (void *)arg, hdr.size_in)) {
+	*((struct vbg_ioctl_hdr *)buf) = hdr;
+	if (copy_from_user(buf + sizeof(hdr), (void *)arg + sizeof(hdr),
+			   hdr.size_in - sizeof(hdr))) {
 		ret = -EFAULT;
 		goto out;
 	}
diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c
index 74f2e6e..8851d44 100644
--- a/drivers/w1/masters/mxc_w1.c
+++ b/drivers/w1/masters/mxc_w1.c
@@ -112,6 +112,10 @@
 	if (IS_ERR(mdev->clk))
 		return PTR_ERR(mdev->clk);
 
+	err = clk_prepare_enable(mdev->clk);
+	if (err)
+		return err;
+
 	clkrate = clk_get_rate(mdev->clk);
 	if (clkrate < 10000000)
 		dev_warn(&pdev->dev,
@@ -125,12 +129,10 @@
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	mdev->regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(mdev->regs))
-		return PTR_ERR(mdev->regs);
-
-	err = clk_prepare_enable(mdev->clk);
-	if (err)
-		return err;
+	if (IS_ERR(mdev->regs)) {
+		err = PTR_ERR(mdev->regs);
+		goto out_disable_clk;
+	}
 
 	/* Software reset 1-Wire module */
 	writeb(MXC_W1_RESET_RST, mdev->regs + MXC_W1_RESET);
@@ -146,8 +148,12 @@
 
 	err = w1_add_master_device(&mdev->bus_master);
 	if (err)
-		clk_disable_unprepare(mdev->clk);
+		goto out_disable_clk;
 
+	return 0;
+
+out_disable_clk:
+	clk_disable_unprepare(mdev->clk);
 	return err;
 }
 
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index 80a778b..caef0e0 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -751,7 +751,7 @@
 
 	/* slave modules need to be loaded in a context with unlocked mutex */
 	mutex_unlock(&dev->mutex);
-	request_module("w1-family-0x%02x", rn->family);
+	request_module("w1-family-0x%02X", rn->family);
 	mutex_lock(&dev->mutex);
 
 	spin_lock(&w1_flock);
diff --git a/drivers/w1/w1_io.c b/drivers/w1/w1_io.c
index 075d120..0364d33 100644
--- a/drivers/w1/w1_io.c
+++ b/drivers/w1/w1_io.c
@@ -194,6 +194,7 @@
  *  bit 0 = id_bit
  *  bit 1 = comp_bit
  *  bit 2 = dir_taken
+ *
  * If both bits 0 & 1 are set, the search should be restarted.
  *
  * Return:        bit fields - see above
diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c
index df05a26..2e4ca4d 100644
--- a/drivers/zorro/proc.c
+++ b/drivers/zorro/proc.c
@@ -96,19 +96,6 @@
 	.show  = zorro_seq_show,
 };
 
-static int zorro_devices_proc_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &zorro_devices_seq_ops);
-}
-
-static const struct file_operations zorro_devices_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= zorro_devices_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static struct proc_dir_entry *proc_bus_zorro_dir;
 
 static int __init zorro_proc_attach_device(unsigned int slot)
@@ -132,8 +119,8 @@
 
 	if (MACH_IS_AMIGA && AMIGAHW_PRESENT(ZORRO)) {
 		proc_bus_zorro_dir = proc_mkdir("bus/zorro", NULL);
-		proc_create("devices", 0, proc_bus_zorro_dir,
-			    &zorro_devices_proc_fops);
+		proc_create_seq("devices", 0, proc_bus_zorro_dir,
+			    &zorro_devices_seq_ops);
 		for (slot = 0; slot < zorro_num_autocon; slot++)
 			zorro_proc_attach_device(slot);
 	}
diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c
index 4772847..8eeb84c 100644
--- a/drivers/zorro/zorro.c
+++ b/drivers/zorro/zorro.c
@@ -101,6 +101,7 @@
 	end = end > Z2RAM_END ? Z2RAM_SIZE : end-Z2RAM_START;
 	while (start < end) {
 		u32 chunk = start>>Z2RAM_CHUNKSHIFT;
+
 		if (flag)
 			set_bit(chunk, zorro_unused_z2ram);
 		else
@@ -117,6 +118,7 @@
 
 	for (i = 0; i < bridge->num_resources; i++) {
 		struct resource *r = &bridge->resource[i];
+
 		if (zorro_resource_start(z) >= r->start &&
 		    zorro_resource_end(z) <= r->end)
 			return r;
@@ -136,8 +138,7 @@
 	int error;
 
 	/* Initialize the Zorro bus */
-	bus = kzalloc(sizeof(*bus) +
-		      zorro_num_autocon * sizeof(bus->devices[0]),
+	bus = kzalloc(struct_size(bus, devices, zorro_num_autocon),
 		      GFP_KERNEL);
 	if (!bus)
 		return -ENOMEM;
@@ -168,6 +169,7 @@
 		if (z->id == ZORRO_PROD_GVP_EPC_BASE) {
 			/* GVP quirk */
 			unsigned long magic = zi->boardaddr + 0x8000;
+
 			z->id |= *(u16 *)ZTWO_VADDR(magic) & GVP_PRODMASK;
 		}
 		z->slotaddr = zi->slotaddr;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9ee5341..42e102e 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -823,28 +823,21 @@
 	if (IS_ERR(dfid))
 		return ERR_CAST(dfid);
 
-	name = dentry->d_name.name;
-	fid = p9_client_walk(dfid, 1, &name, 1);
-	if (IS_ERR(fid)) {
-		if (fid == ERR_PTR(-ENOENT)) {
-			d_add(dentry, NULL);
-			return NULL;
-		}
-		return ERR_CAST(fid);
-	}
 	/*
 	 * Make sure we don't use a wrong inode due to parallel
 	 * unlink. For cached mode create calls request for new
 	 * inode. But with cache disabled, lookup should do this.
 	 */
-	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
+	name = dentry->d_name.name;
+	fid = p9_client_walk(dfid, 1, &name, 1);
+	if (fid == ERR_PTR(-ENOENT))
+		inode = NULL;
+	else if (IS_ERR(fid))
+		inode = ERR_CAST(fid);
+	else if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
 		inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
 	else
 		inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
-	if (IS_ERR(inode)) {
-		p9_client_clunk(fid);
-		return ERR_CAST(inode);
-	}
 	/*
 	 * If we had a rename on the server and a parallel lookup
 	 * for the new name, then make sure we instantiate with
@@ -853,12 +846,14 @@
 	 * k/b.
 	 */
 	res = d_splice_alias(inode, dentry);
-	if (!res)
-		v9fs_fid_add(dentry, fid);
-	else if (!IS_ERR(res))
-		v9fs_fid_add(res, fid);
-	else
-		p9_client_clunk(fid);
+	if (!IS_ERR(fid)) {
+		if (!res)
+			v9fs_fid_add(dentry, fid);
+		else if (!IS_ERR(res))
+			v9fs_fid_add(res, fid);
+		else
+			p9_client_clunk(fid);
+	}
 	return res;
 }
 
diff --git a/fs/Kconfig b/fs/Kconfig
index bc821a8..ac4ac90 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -196,7 +196,7 @@
 	help
 	  hugetlbfs is a filesystem backing for HugeTLB pages, based on
 	  ramfs. For architectures that support it, say Y here and read
-	  <file:Documentation/vm/hugetlbpage.txt> for details.
+	  <file:Documentation/admin-guide/mm/hugetlbpage.rst> for details.
 
 	  If unsure, say N.
 
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 29444c8..e18eff8 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -146,20 +146,6 @@
 
 	obj->parent_id = inode->i_ino;
 
-	/*
-	 * '.' is handled by reserved_lookup() in fs/namei.c
-	 */
-	if (name->len == 2 && name->name[0] == '.' && name->name[1] == '.') {
-		/*
-		 * Currently unable to fill in the rest of 'obj',
-		 * but this is better than nothing.  We need to
-		 * ascend one level to find it's parent.
-		 */
-		obj->name_len = 0;
-		obj->file_id  = obj->parent_id;
-		goto free_out;
-	}
-
 	read_lock(&adfs_dir_lock);
 
 	ret = ops->setpos(&dir, 0);
@@ -266,17 +252,17 @@
 
 	error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj);
 	if (error == 0) {
-		error = -EACCES;
 		/*
 		 * This only returns NULL if get_empty_inode
 		 * fails.
 		 */
 		inode = adfs_iget(dir->i_sb, &obj);
-		if (inode)
-			error = 0;
+		if (!inode)
+			inode = ERR_PTR(-EACCES);
+	} else if (error != -ENOENT) {
+		inode = ERR_PTR(error);
 	}
-	d_add(dentry, inode);
-	return ERR_PTR(error);
+	return d_splice_alias(inode, dentry);
 }
 
 /*
diff --git a/fs/affs/super.c b/fs/affs/super.c
index e602619..d1ad11a 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -241,6 +241,7 @@
 			affs_set_opt(*mount_opts, SF_NO_TRUNCATE);
 			break;
 		case Opt_prefix:
+			kfree(*prefix);
 			*prefix = match_strdup(&args[0]);
 			if (!*prefix)
 				return 0;
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 7587fb6..2c46c46 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -43,8 +43,7 @@
 
 	_enter("%u,%u,%u", nr, service, port);
 
-	alist = kzalloc(sizeof(*alist) + sizeof(alist->addrs[0]) * nr,
-			GFP_KERNEL);
+	alist = kzalloc(struct_size(alist, addrs, nr), GFP_KERNEL);
 	if (!alist)
 		return NULL;
 
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 839a222..3aad327 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -62,7 +62,6 @@
 	.llseek		= no_llseek,
 };
 
-static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file);
 static void *afs_proc_cell_volumes_start(struct seq_file *p, loff_t *pos);
 static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
 					loff_t *pos);
@@ -76,15 +75,6 @@
 	.show	= afs_proc_cell_volumes_show,
 };
 
-static const struct file_operations afs_proc_cell_volumes_fops = {
-	.open		= afs_proc_cell_volumes_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int afs_proc_cell_vlservers_open(struct inode *inode,
-					struct file *file);
 static void *afs_proc_cell_vlservers_start(struct seq_file *p, loff_t *pos);
 static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
 					  loff_t *pos);
@@ -98,14 +88,6 @@
 	.show	= afs_proc_cell_vlservers_show,
 };
 
-static const struct file_operations afs_proc_cell_vlservers_fops = {
-	.open		= afs_proc_cell_vlservers_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int afs_proc_servers_open(struct inode *inode, struct file *file);
 static void *afs_proc_servers_start(struct seq_file *p, loff_t *pos);
 static void *afs_proc_servers_next(struct seq_file *p, void *v,
 					loff_t *pos);
@@ -119,13 +101,6 @@
 	.show	= afs_proc_servers_show,
 };
 
-static const struct file_operations afs_proc_servers_fops = {
-	.open		= afs_proc_servers_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static int afs_proc_sysname_open(struct inode *inode, struct file *file);
 static int afs_proc_sysname_release(struct inode *inode, struct file *file);
 static void *afs_proc_sysname_start(struct seq_file *p, loff_t *pos);
@@ -152,7 +127,7 @@
 	.write		= afs_proc_sysname_write,
 };
 
-static const struct file_operations afs_proc_stats_fops;
+static int afs_proc_stats_show(struct seq_file *m, void *v);
 
 /*
  * initialise the /proc/fs/afs/ directory
@@ -167,8 +142,8 @@
 
 	if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) ||
 	    !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) ||
-	    !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops) ||
-	    !proc_create("stats", 0644, net->proc_afs, &afs_proc_stats_fops) ||
+	    !proc_create_seq("servers", 0644, net->proc_afs, &afs_proc_servers_ops) ||
+	    !proc_create_single("stats", 0644, net->proc_afs, afs_proc_stats_show) ||
 	    !proc_create("sysname", 0644, net->proc_afs, &afs_proc_sysname_fops))
 		goto error_tree;
 
@@ -196,16 +171,7 @@
  */
 static int afs_proc_cells_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *m;
-	int ret;
-
-	ret = seq_open(file, &afs_proc_cells_ops);
-	if (ret < 0)
-		return ret;
-
-	m = file->private_data;
-	m->private = PDE_DATA(inode);
-	return 0;
+	return seq_open(file, &afs_proc_cells_ops);
 }
 
 /*
@@ -430,10 +396,11 @@
 	if (!dir)
 		goto error_dir;
 
-	if (!proc_create_data("vlservers", 0, dir,
-			      &afs_proc_cell_vlservers_fops, cell) ||
-	    !proc_create_data("volumes", 0, dir,
-			      &afs_proc_cell_volumes_fops, cell))
+	if (!proc_create_seq_data("vlservers", 0, dir,
+			&afs_proc_cell_vlservers_ops, cell))
+		goto error_tree;
+	if (!proc_create_seq_data("volumes", 0, dir, &afs_proc_cell_volumes_ops,
+			cell))
 		goto error_tree;
 
 	_leave(" = 0");
@@ -459,36 +426,13 @@
 }
 
 /*
- * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells
- */
-static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
-{
-	struct afs_cell *cell;
-	struct seq_file *m;
-	int ret;
-
-	cell = PDE_DATA(inode);
-	if (!cell)
-		return -ENOENT;
-
-	ret = seq_open(file, &afs_proc_cell_volumes_ops);
-	if (ret < 0)
-		return ret;
-
-	m = file->private_data;
-	m->private = cell;
-
-	return 0;
-}
-
-/*
  * set up the iterator to start reading from the cells list and return the
  * first item
  */
 static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
 	__acquires(cell->proc_lock)
 {
-	struct afs_cell *cell = m->private;
+	struct afs_cell *cell = PDE_DATA(file_inode(m->file));
 
 	_enter("cell=%p pos=%Ld", cell, *_pos);
 
@@ -502,7 +446,7 @@
 static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
 					loff_t *_pos)
 {
-	struct afs_cell *cell = p->private;
+	struct afs_cell *cell = PDE_DATA(file_inode(p->file));
 
 	_enter("cell=%p pos=%Ld", cell, *_pos);
 	return seq_list_next(v, &cell->proc_volumes, _pos);
@@ -514,7 +458,7 @@
 static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
 	__releases(cell->proc_lock)
 {
-	struct afs_cell *cell = p->private;
+	struct afs_cell *cell = PDE_DATA(file_inode(p->file));
 
 	read_unlock(&cell->proc_lock);
 }
@@ -530,7 +474,7 @@
  */
 static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
 {
-	struct afs_cell *cell = m->private;
+	struct afs_cell *cell = PDE_DATA(file_inode(m->file));
 	struct afs_volume *vol = list_entry(v, struct afs_volume, proc_link);
 
 	/* Display header on line 1 */
@@ -547,30 +491,6 @@
 }
 
 /*
- * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume
- * location server
- */
-static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
-{
-	struct afs_cell *cell;
-	struct seq_file *m;
-	int ret;
-
-	cell = PDE_DATA(inode);
-	if (!cell)
-		return -ENOENT;
-
-	ret = seq_open(file, &afs_proc_cell_vlservers_ops);
-	if (ret<0)
-		return ret;
-
-	m = file->private_data;
-	m->private = cell;
-
-	return 0;
-}
-
-/*
  * set up the iterator to start reading from the cells list and return the
  * first item
  */
@@ -578,7 +498,7 @@
 	__acquires(rcu)
 {
 	struct afs_addr_list *alist;
-	struct afs_cell *cell = m->private;
+	struct afs_cell *cell = PDE_DATA(file_inode(m->file));
 	loff_t pos = *_pos;
 
 	rcu_read_lock();
@@ -603,7 +523,7 @@
 					  loff_t *_pos)
 {
 	struct afs_addr_list *alist;
-	struct afs_cell *cell = p->private;
+	struct afs_cell *cell = PDE_DATA(file_inode(p->file));
 	loff_t pos;
 
 	alist = rcu_dereference(cell->vl_addrs);
@@ -644,15 +564,6 @@
 }
 
 /*
- * open "/proc/fs/afs/servers" which provides a summary of active
- * servers
- */
-static int afs_proc_servers_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &afs_proc_servers_ops);
-}
-
-/*
  * Set up the iterator to start reading from the server list and return the
  * first item.
  */
@@ -931,18 +842,3 @@
 		   atomic_long_read(&net->n_store_bytes));
 	return 0;
 }
-
-/*
- * Open "/proc/fs/afs/stats" to allow reading of the stat counters.
- */
-static int afs_proc_stats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, afs_proc_stats_show, NULL);
-}
-
-static const struct file_operations afs_proc_stats_fops = {
-	.open		= afs_proc_stats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release        = single_release,
-};
diff --git a/fs/aio.c b/fs/aio.c
index 8061d97..b850e92 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -5,6 +5,7 @@
  *	Implements an efficient asynchronous io interface.
  *
  *	Copyright 2000, 2001, 2002 Red Hat, Inc.  All Rights Reserved.
+ *	Copyright 2018 Christoph Hellwig.
  *
  *	See ../COPYING for licensing terms.
  */
@@ -46,6 +47,8 @@
 
 #include "internal.h"
 
+#define KIOCB_KEY		0
+
 #define AIO_RING_MAGIC			0xa10a10a1
 #define AIO_RING_COMPAT_FEATURES	1
 #define AIO_RING_INCOMPAT_FEATURES	0
@@ -156,21 +159,29 @@
 	unsigned		id;
 };
 
-/*
- * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
- * cancelled or completed (this makes a certain amount of sense because
- * successful cancellation - io_cancel() - does deliver the completion to
- * userspace).
- *
- * And since most things don't implement kiocb cancellation and we'd really like
- * kiocb completion to be lockless when possible, we use ki_cancel to
- * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
- * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
- */
-#define KIOCB_CANCELLED		((void *) (~0ULL))
+struct fsync_iocb {
+	struct work_struct	work;
+	struct file		*file;
+	bool			datasync;
+};
+
+struct poll_iocb {
+	struct file		*file;
+	__poll_t		events;
+	struct wait_queue_head	*head;
+
+	union {
+		struct wait_queue_entry	wait;
+		struct work_struct	work;
+	};
+};
 
 struct aio_kiocb {
-	struct kiocb		common;
+	union {
+		struct kiocb		rw;
+		struct fsync_iocb	fsync;
+		struct poll_iocb	poll;
+	};
 
 	struct kioctx		*ki_ctx;
 	kiocb_cancel_fn		*ki_cancel;
@@ -264,9 +275,6 @@
 
 	kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
-
-	pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
-
 	return 0;
 }
 __initcall(aio_setup);
@@ -552,42 +560,20 @@
 
 void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
 {
-	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
+	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
 	struct kioctx *ctx = req->ki_ctx;
 	unsigned long flags;
 
+	if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
+		return;
+
 	spin_lock_irqsave(&ctx->ctx_lock, flags);
-
-	if (!req->ki_list.next)
-		list_add(&req->ki_list, &ctx->active_reqs);
-
+	list_add_tail(&req->ki_list, &ctx->active_reqs);
 	req->ki_cancel = cancel;
-
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 }
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
-static int kiocb_cancel(struct aio_kiocb *kiocb)
-{
-	kiocb_cancel_fn *old, *cancel;
-
-	/*
-	 * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it
-	 * actually has a cancel function, hence the cmpxchg()
-	 */
-
-	cancel = READ_ONCE(kiocb->ki_cancel);
-	do {
-		if (!cancel || cancel == KIOCB_CANCELLED)
-			return -EINVAL;
-
-		old = cancel;
-		cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
-	} while (cancel != old);
-
-	return cancel(&kiocb->common);
-}
-
 /*
  * free_ioctx() should be RCU delayed to synchronize against the RCU
  * protected lookup_ioctx() and also needs process context to call
@@ -634,9 +620,8 @@
 	while (!list_empty(&ctx->active_reqs)) {
 		req = list_first_entry(&ctx->active_reqs,
 				       struct aio_kiocb, ki_list);
-
+		req->ki_cancel(&req->rw);
 		list_del_init(&req->ki_list);
-		kiocb_cancel(req);
 	}
 
 	spin_unlock_irq(&ctx->ctx_lock);
@@ -1042,7 +1027,7 @@
 		goto out_put;
 
 	percpu_ref_get(&ctx->reqs);
-
+	INIT_LIST_HEAD(&req->ki_list);
 	req->ki_ctx = ctx;
 	return req;
 out_put:
@@ -1050,15 +1035,6 @@
 	return NULL;
 }
 
-static void kiocb_free(struct aio_kiocb *req)
-{
-	if (req->common.ki_filp)
-		fput(req->common.ki_filp);
-	if (req->ki_eventfd != NULL)
-		eventfd_ctx_put(req->ki_eventfd);
-	kmem_cache_free(kiocb_cachep, req);
-}
-
 static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 {
 	struct aio_ring __user *ring  = (void __user *)ctx_id;
@@ -1089,44 +1065,14 @@
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  */
-static void aio_complete(struct kiocb *kiocb, long res, long res2)
+static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
 {
-	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
 	struct kioctx	*ctx = iocb->ki_ctx;
 	struct aio_ring	*ring;
 	struct io_event	*ev_page, *event;
 	unsigned tail, pos, head;
 	unsigned long	flags;
 
-	if (kiocb->ki_flags & IOCB_WRITE) {
-		struct file *file = kiocb->ki_filp;
-
-		/*
-		 * Tell lockdep we inherited freeze protection from submission
-		 * thread.
-		 */
-		if (S_ISREG(file_inode(file)->i_mode))
-			__sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
-		file_end_write(file);
-	}
-
-	/*
-	 * Special case handling for sync iocbs:
-	 *  - events go directly into the iocb for fast handling
-	 *  - the sync task with the iocb in its stack holds the single iocb
-	 *    ref, no other paths have a way to get another ref
-	 *  - the sync task helpfully left a reference to itself in the iocb
-	 */
-	BUG_ON(is_sync_kiocb(kiocb));
-
-	if (iocb->ki_list.next) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&ctx->ctx_lock, flags);
-		list_del(&iocb->ki_list);
-		spin_unlock_irqrestore(&ctx->ctx_lock, flags);
-	}
-
 	/*
 	 * Add a completion event to the ring buffer. Must be done holding
 	 * ctx->completion_lock to prevent other code from messing with the tail
@@ -1180,11 +1126,12 @@
 	 * eventfd. The eventfd_signal() function is safe to be called
 	 * from IRQ context.
 	 */
-	if (iocb->ki_eventfd != NULL)
+	if (iocb->ki_eventfd) {
 		eventfd_signal(iocb->ki_eventfd, 1);
+		eventfd_ctx_put(iocb->ki_eventfd);
+	}
 
-	/* everything turned out well, dispose of the aiocb. */
-	kiocb_free(iocb);
+	kmem_cache_free(kiocb_cachep, iocb);
 
 	/*
 	 * We have to order our ring_info tail store above and test
@@ -1250,14 +1197,13 @@
 		if (head == tail)
 			break;
 
-		avail = min(avail, nr - ret);
-		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE -
-			    ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
-
 		pos = head + AIO_EVENTS_OFFSET;
 		page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
 		pos %= AIO_EVENTS_PER_PAGE;
 
+		avail = min(avail, nr - ret);
+		avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
+
 		ev = kmap(page);
 		copy_ret = copy_to_user(event + ret, ev + pos,
 					sizeof(*ev) * avail);
@@ -1328,10 +1274,6 @@
 		wait_event_interruptible_hrtimeout(ctx->wait,
 				aio_read_events(ctx, min_nr, nr, event, &ret),
 				until);
-
-	if (!ret && signal_pending(current))
-		ret = -EINTR;
-
 	return ret;
 }
 
@@ -1447,6 +1389,58 @@
 	return -EINVAL;
 }
 
+static void aio_remove_iocb(struct aio_kiocb *iocb)
+{
+	struct kioctx *ctx = iocb->ki_ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->ctx_lock, flags);
+	list_del(&iocb->ki_list);
+	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+}
+
+static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
+{
+	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
+
+	if (!list_empty_careful(&iocb->ki_list))
+		aio_remove_iocb(iocb);
+
+	if (kiocb->ki_flags & IOCB_WRITE) {
+		struct inode *inode = file_inode(kiocb->ki_filp);
+
+		/*
+		 * Tell lockdep we inherited freeze protection from submission
+		 * thread.
+		 */
+		if (S_ISREG(inode->i_mode))
+			__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+		file_end_write(kiocb->ki_filp);
+	}
+
+	fput(kiocb->ki_filp);
+	aio_complete(iocb, res, res2);
+}
+
+static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
+{
+	int ret;
+
+	req->ki_filp = fget(iocb->aio_fildes);
+	if (unlikely(!req->ki_filp))
+		return -EBADF;
+	req->ki_complete = aio_complete_rw;
+	req->ki_pos = iocb->aio_offset;
+	req->ki_flags = iocb_flags(req->ki_filp);
+	if (iocb->aio_flags & IOCB_FLAG_RESFD)
+		req->ki_flags |= IOCB_EVENTFD;
+	req->ki_hint = file_write_hint(req->ki_filp);
+	ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
+	if (unlikely(ret))
+		fput(req->ki_filp);
+	return ret;
+}
+
 static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
 		bool vectored, bool compat, struct iov_iter *iter)
 {
@@ -1466,11 +1460,11 @@
 	return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
 }
 
-static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
+static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
 {
 	switch (ret) {
 	case -EIOCBQUEUED:
-		return ret;
+		break;
 	case -ERESTARTSYS:
 	case -ERESTARTNOINTR:
 	case -ERESTARTNOHAND:
@@ -1482,85 +1476,270 @@
 		ret = -EINTR;
 		/*FALLTHRU*/
 	default:
-		aio_complete(req, ret, 0);
-		return 0;
+		aio_complete_rw(req, ret, 0);
 	}
 }
 
 static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
 		bool compat)
 {
-	struct file *file = req->ki_filp;
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct iov_iter iter;
+	struct file *file;
 	ssize_t ret;
 
+	ret = aio_prep_rw(req, iocb);
+	if (ret)
+		return ret;
+	file = req->ki_filp;
+
+	ret = -EBADF;
 	if (unlikely(!(file->f_mode & FMODE_READ)))
-		return -EBADF;
+		goto out_fput;
+	ret = -EINVAL;
 	if (unlikely(!file->f_op->read_iter))
-		return -EINVAL;
+		goto out_fput;
 
 	ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
 	if (ret)
-		return ret;
+		goto out_fput;
 	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
 	if (!ret)
-		ret = aio_ret(req, call_read_iter(file, req, &iter));
+		aio_rw_done(req, call_read_iter(file, req, &iter));
 	kfree(iovec);
+out_fput:
+	if (unlikely(ret))
+		fput(file);
 	return ret;
 }
 
 static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
 		bool compat)
 {
-	struct file *file = req->ki_filp;
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct iov_iter iter;
-	ssize_t ret;
-
-	if (unlikely(!(file->f_mode & FMODE_WRITE)))
-		return -EBADF;
-	if (unlikely(!file->f_op->write_iter))
-		return -EINVAL;
-
-	ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
-	if (ret)
-		return ret;
-	ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
-	if (!ret) {
-		req->ki_flags |= IOCB_WRITE;
-		file_start_write(file);
-		ret = aio_ret(req, call_write_iter(file, req, &iter));
-		/*
-		 * We release freeze protection in aio_complete().  Fool lockdep
-		 * by telling it the lock got released so that it doesn't
-		 * complain about held lock when we return to userspace.
-		 */
-		if (S_ISREG(file_inode(file)->i_mode))
-			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
-	}
-	kfree(iovec);
-	return ret;
-}
-
-static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
-			 struct iocb *iocb, bool compat)
-{
-	struct aio_kiocb *req;
 	struct file *file;
 	ssize_t ret;
 
+	ret = aio_prep_rw(req, iocb);
+	if (ret)
+		return ret;
+	file = req->ki_filp;
+
+	ret = -EBADF;
+	if (unlikely(!(file->f_mode & FMODE_WRITE)))
+		goto out_fput;
+	ret = -EINVAL;
+	if (unlikely(!file->f_op->write_iter))
+		goto out_fput;
+
+	ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
+	if (ret)
+		goto out_fput;
+	ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
+	if (!ret) {
+		/*
+		 * Open-code file_start_write here to grab freeze protection,
+		 * which will be released by another thread in
+		 * aio_complete_rw().  Fool lockdep by telling it the lock got
+		 * released so that it doesn't complain about the held lock when
+		 * we return to userspace.
+		 */
+		if (S_ISREG(file_inode(file)->i_mode)) {
+			__sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
+			__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+		}
+		req->ki_flags |= IOCB_WRITE;
+		aio_rw_done(req, call_write_iter(file, req, &iter));
+	}
+	kfree(iovec);
+out_fput:
+	if (unlikely(ret))
+		fput(file);
+	return ret;
+}
+
+static void aio_fsync_work(struct work_struct *work)
+{
+	struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
+	int ret;
+
+	ret = vfs_fsync(req->file, req->datasync);
+	fput(req->file);
+	aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
+}
+
+static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
+{
+	if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
+			iocb->aio_rw_flags))
+		return -EINVAL;
+	req->file = fget(iocb->aio_fildes);
+	if (unlikely(!req->file))
+		return -EBADF;
+	if (unlikely(!req->file->f_op->fsync)) {
+		fput(req->file);
+		return -EINVAL;
+	}
+
+	req->datasync = datasync;
+	INIT_WORK(&req->work, aio_fsync_work);
+	schedule_work(&req->work);
+	return 0;
+}
+
+/* need to use list_del_init so we can check if item was present */
+static inline bool __aio_poll_remove(struct poll_iocb *req)
+{
+	if (list_empty(&req->wait.entry))
+		return false;
+	list_del_init(&req->wait.entry);
+	return true;
+}
+
+static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
+{
+	fput(iocb->poll.file);
+	aio_complete(iocb, mangle_poll(mask), 0);
+}
+
+static void aio_poll_work(struct work_struct *work)
+{
+	struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work);
+
+	if (!list_empty_careful(&iocb->ki_list))
+		aio_remove_iocb(iocb);
+	__aio_poll_complete(iocb, iocb->poll.events);
+}
+
+static int aio_poll_cancel(struct kiocb *iocb)
+{
+	struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
+	struct poll_iocb *req = &aiocb->poll;
+	struct wait_queue_head *head = req->head;
+	bool found = false;
+
+	spin_lock(&head->lock);
+	found = __aio_poll_remove(req);
+	spin_unlock(&head->lock);
+
+	if (found) {
+		req->events = 0;
+		INIT_WORK(&req->work, aio_poll_work);
+		schedule_work(&req->work);
+	}
+	return 0;
+}
+
+static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+		void *key)
+{
+	struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
+	struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+	struct file *file = req->file;
+	__poll_t mask = key_to_poll(key);
+
+	assert_spin_locked(&req->head->lock);
+
+	/* for instances that support it check for an event match first: */
+	if (mask && !(mask & req->events))
+		return 0;
+
+	mask = file->f_op->poll_mask(file, req->events);
+	if (!mask)
+		return 0;
+
+	__aio_poll_remove(req);
+
+	/*
+	 * Try completing without a context switch if we can acquire ctx_lock
+	 * without spinning.  Otherwise we need to defer to a workqueue to
+	 * avoid a deadlock due to the lock order.
+	 */
+	if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
+		list_del_init(&iocb->ki_list);
+		spin_unlock(&iocb->ki_ctx->ctx_lock);
+
+		__aio_poll_complete(iocb, mask);
+	} else {
+		req->events = mask;
+		INIT_WORK(&req->work, aio_poll_work);
+		schedule_work(&req->work);
+	}
+
+	return 1;
+}
+
+static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
+{
+	struct kioctx *ctx = aiocb->ki_ctx;
+	struct poll_iocb *req = &aiocb->poll;
+	__poll_t mask;
+
+	/* reject any unknown events outside the normal event mask. */
+	if ((u16)iocb->aio_buf != iocb->aio_buf)
+		return -EINVAL;
+	/* reject fields that are not defined for poll */
+	if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
+		return -EINVAL;
+
+	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+	req->file = fget(iocb->aio_fildes);
+	if (unlikely(!req->file))
+		return -EBADF;
+	if (!file_has_poll_mask(req->file))
+		goto out_fail;
+
+	req->head = req->file->f_op->get_poll_head(req->file, req->events);
+	if (!req->head)
+		goto out_fail;
+	if (IS_ERR(req->head)) {
+		mask = EPOLLERR;
+		goto done;
+	}
+
+	init_waitqueue_func_entry(&req->wait, aio_poll_wake);
+	aiocb->ki_cancel = aio_poll_cancel;
+
+	spin_lock_irq(&ctx->ctx_lock);
+	spin_lock(&req->head->lock);
+	mask = req->file->f_op->poll_mask(req->file, req->events);
+	if (!mask) {
+		__add_wait_queue(req->head, &req->wait);
+		list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+	}
+	spin_unlock(&req->head->lock);
+	spin_unlock_irq(&ctx->ctx_lock);
+done:
+	if (mask)
+		__aio_poll_complete(aiocb, mask);
+	return 0;
+out_fail:
+	fput(req->file);
+	return -EINVAL; /* same as no support for IOCB_CMD_POLL */
+}
+
+static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
+			 bool compat)
+{
+	struct aio_kiocb *req;
+	struct iocb iocb;
+	ssize_t ret;
+
+	if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
+		return -EFAULT;
+
 	/* enforce forwards compatibility on users */
-	if (unlikely(iocb->aio_reserved2)) {
+	if (unlikely(iocb.aio_reserved2)) {
 		pr_debug("EINVAL: reserve field set\n");
 		return -EINVAL;
 	}
 
 	/* prevent overflows */
 	if (unlikely(
-	    (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
-	    (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
-	    ((ssize_t)iocb->aio_nbytes < 0)
+	    (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
+	    (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
+	    ((ssize_t)iocb.aio_nbytes < 0)
 	   )) {
 		pr_debug("EINVAL: overflow check\n");
 		return -EINVAL;
@@ -1570,37 +1749,19 @@
 	if (unlikely(!req))
 		return -EAGAIN;
 
-	req->common.ki_filp = file = fget(iocb->aio_fildes);
-	if (unlikely(!req->common.ki_filp)) {
-		ret = -EBADF;
-		goto out_put_req;
-	}
-	req->common.ki_pos = iocb->aio_offset;
-	req->common.ki_complete = aio_complete;
-	req->common.ki_flags = iocb_flags(req->common.ki_filp);
-	req->common.ki_hint = file_write_hint(file);
-
-	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
+	if (iocb.aio_flags & IOCB_FLAG_RESFD) {
 		/*
 		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
 		 * instance of the file* now. The file descriptor must be
 		 * an eventfd() fd, and will be signaled for each completed
 		 * event using the eventfd_signal() function.
 		 */
-		req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
+		req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd);
 		if (IS_ERR(req->ki_eventfd)) {
 			ret = PTR_ERR(req->ki_eventfd);
 			req->ki_eventfd = NULL;
 			goto out_put_req;
 		}
-
-		req->common.ki_flags |= IOCB_EVENTFD;
-	}
-
-	ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
-	if (unlikely(ret)) {
-		pr_debug("EINVAL: aio_rw_flags\n");
-		goto out_put_req;
 	}
 
 	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1610,92 +1771,53 @@
 	}
 
 	req->ki_user_iocb = user_iocb;
-	req->ki_user_data = iocb->aio_data;
+	req->ki_user_data = iocb.aio_data;
 
-	get_file(file);
-	switch (iocb->aio_lio_opcode) {
+	switch (iocb.aio_lio_opcode) {
 	case IOCB_CMD_PREAD:
-		ret = aio_read(&req->common, iocb, false, compat);
+		ret = aio_read(&req->rw, &iocb, false, compat);
 		break;
 	case IOCB_CMD_PWRITE:
-		ret = aio_write(&req->common, iocb, false, compat);
+		ret = aio_write(&req->rw, &iocb, false, compat);
 		break;
 	case IOCB_CMD_PREADV:
-		ret = aio_read(&req->common, iocb, true, compat);
+		ret = aio_read(&req->rw, &iocb, true, compat);
 		break;
 	case IOCB_CMD_PWRITEV:
-		ret = aio_write(&req->common, iocb, true, compat);
+		ret = aio_write(&req->rw, &iocb, true, compat);
+		break;
+	case IOCB_CMD_FSYNC:
+		ret = aio_fsync(&req->fsync, &iocb, false);
+		break;
+	case IOCB_CMD_FDSYNC:
+		ret = aio_fsync(&req->fsync, &iocb, true);
+		break;
+	case IOCB_CMD_POLL:
+		ret = aio_poll(req, &iocb);
 		break;
 	default:
-		pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
+		pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
 		ret = -EINVAL;
 		break;
 	}
-	fput(file);
 
-	if (ret && ret != -EIOCBQUEUED)
+	/*
+	 * If ret is 0, we'd either done aio_complete() ourselves or have
+	 * arranged for that to be done asynchronously.  Anything non-zero
+	 * means that we need to destroy req ourselves.
+	 */
+	if (ret)
 		goto out_put_req;
 	return 0;
 out_put_req:
 	put_reqs_available(ctx, 1);
 	percpu_ref_put(&ctx->reqs);
-	kiocb_free(req);
+	if (req->ki_eventfd)
+		eventfd_ctx_put(req->ki_eventfd);
+	kmem_cache_free(kiocb_cachep, req);
 	return ret;
 }
 
-static long do_io_submit(aio_context_t ctx_id, long nr,
-			  struct iocb __user *__user *iocbpp, bool compat)
-{
-	struct kioctx *ctx;
-	long ret = 0;
-	int i = 0;
-	struct blk_plug plug;
-
-	if (unlikely(nr < 0))
-		return -EINVAL;
-
-	if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
-		nr = LONG_MAX/sizeof(*iocbpp);
-
-	if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
-		return -EFAULT;
-
-	ctx = lookup_ioctx(ctx_id);
-	if (unlikely(!ctx)) {
-		pr_debug("EINVAL: invalid context id\n");
-		return -EINVAL;
-	}
-
-	blk_start_plug(&plug);
-
-	/*
-	 * AKPM: should this return a partial result if some of the IOs were
-	 * successfully submitted?
-	 */
-	for (i=0; i<nr; i++) {
-		struct iocb __user *user_iocb;
-		struct iocb tmp;
-
-		if (unlikely(__get_user(user_iocb, iocbpp + i))) {
-			ret = -EFAULT;
-			break;
-		}
-
-		if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
-			ret = -EFAULT;
-			break;
-		}
-
-		ret = io_submit_one(ctx, user_iocb, &tmp, compat);
-		if (ret)
-			break;
-	}
-	blk_finish_plug(&plug);
-
-	percpu_ref_put(&ctx->users);
-	return i ? i : ret;
-}
-
 /* sys_io_submit:
  *	Queue the nr iocbs pointed to by iocbpp for processing.  Returns
  *	the number of iocbs queued.  May return -EINVAL if the aio_context
@@ -1711,44 +1833,80 @@
 SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
 		struct iocb __user * __user *, iocbpp)
 {
-	return do_io_submit(ctx_id, nr, iocbpp, 0);
-}
-
-#ifdef CONFIG_COMPAT
-static inline long
-copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
-{
-	compat_uptr_t uptr;
-	int i;
-
-	for (i = 0; i < nr; ++i) {
-		if (get_user(uptr, ptr32 + i))
-			return -EFAULT;
-		if (put_user(compat_ptr(uptr), ptr64 + i))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-#define MAX_AIO_SUBMITS 	(PAGE_SIZE/sizeof(struct iocb *))
-
-COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
-		       int, nr, u32 __user *, iocb)
-{
-	struct iocb __user * __user *iocb64;
-	long ret;
+	struct kioctx *ctx;
+	long ret = 0;
+	int i = 0;
+	struct blk_plug plug;
 
 	if (unlikely(nr < 0))
 		return -EINVAL;
 
-	if (nr > MAX_AIO_SUBMITS)
-		nr = MAX_AIO_SUBMITS;
+	ctx = lookup_ioctx(ctx_id);
+	if (unlikely(!ctx)) {
+		pr_debug("EINVAL: invalid context id\n");
+		return -EINVAL;
+	}
 
-	iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
-	ret = copy_iocb(nr, iocb, iocb64);
-	if (!ret)
-		ret = do_io_submit(ctx_id, nr, iocb64, 1);
-	return ret;
+	if (nr > ctx->nr_events)
+		nr = ctx->nr_events;
+
+	blk_start_plug(&plug);
+	for (i = 0; i < nr; i++) {
+		struct iocb __user *user_iocb;
+
+		if (unlikely(get_user(user_iocb, iocbpp + i))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		ret = io_submit_one(ctx, user_iocb, false);
+		if (ret)
+			break;
+	}
+	blk_finish_plug(&plug);
+
+	percpu_ref_put(&ctx->users);
+	return i ? i : ret;
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
+		       int, nr, compat_uptr_t __user *, iocbpp)
+{
+	struct kioctx *ctx;
+	long ret = 0;
+	int i = 0;
+	struct blk_plug plug;
+
+	if (unlikely(nr < 0))
+		return -EINVAL;
+
+	ctx = lookup_ioctx(ctx_id);
+	if (unlikely(!ctx)) {
+		pr_debug("EINVAL: invalid context id\n");
+		return -EINVAL;
+	}
+
+	if (nr > ctx->nr_events)
+		nr = ctx->nr_events;
+
+	blk_start_plug(&plug);
+	for (i = 0; i < nr; i++) {
+		compat_uptr_t user_iocb;
+
+		if (unlikely(get_user(user_iocb, iocbpp + i))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		ret = io_submit_one(ctx, compat_ptr(user_iocb), true);
+		if (ret)
+			break;
+	}
+	blk_finish_plug(&plug);
+
+	percpu_ref_put(&ctx->users);
+	return i ? i : ret;
 }
 #endif
 
@@ -1756,15 +1914,12 @@
  *	Finds a given iocb for cancellation.
  */
 static struct aio_kiocb *
-lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb)
 {
 	struct aio_kiocb *kiocb;
 
 	assert_spin_locked(&ctx->ctx_lock);
 
-	if (key != KIOCB_KEY)
-		return NULL;
-
 	/* TODO: use a hash or array, this sucks. */
 	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
 		if (kiocb->ki_user_iocb == iocb)
@@ -1788,25 +1943,24 @@
 {
 	struct kioctx *ctx;
 	struct aio_kiocb *kiocb;
+	int ret = -EINVAL;
 	u32 key;
-	int ret;
 
-	ret = get_user(key, &iocb->aio_key);
-	if (unlikely(ret))
+	if (unlikely(get_user(key, &iocb->aio_key)))
 		return -EFAULT;
+	if (unlikely(key != KIOCB_KEY))
+		return -EINVAL;
 
 	ctx = lookup_ioctx(ctx_id);
 	if (unlikely(!ctx))
 		return -EINVAL;
 
 	spin_lock_irq(&ctx->ctx_lock);
-
-	kiocb = lookup_kiocb(ctx, iocb, key);
-	if (kiocb)
-		ret = kiocb_cancel(kiocb);
-	else
-		ret = -EINVAL;
-
+	kiocb = lookup_kiocb(ctx, iocb);
+	if (kiocb) {
+		ret = kiocb->ki_cancel(&kiocb->rw);
+		list_del_init(&kiocb->ki_list);
+	}
 	spin_unlock_irq(&ctx->ctx_lock);
 
 	if (!ret) {
@@ -1861,13 +2015,60 @@
 		struct timespec __user *, timeout)
 {
 	struct timespec64	ts;
+	int			ret;
 
-	if (timeout) {
-		if (unlikely(get_timespec64(&ts, timeout)))
+	if (timeout && unlikely(get_timespec64(&ts, timeout)))
+		return -EFAULT;
+
+	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+	if (!ret && signal_pending(current))
+		ret = -EINTR;
+	return ret;
+}
+
+SYSCALL_DEFINE6(io_pgetevents,
+		aio_context_t, ctx_id,
+		long, min_nr,
+		long, nr,
+		struct io_event __user *, events,
+		struct timespec __user *, timeout,
+		const struct __aio_sigset __user *, usig)
+{
+	struct __aio_sigset	ksig = { NULL, };
+	sigset_t		ksigmask, sigsaved;
+	struct timespec64	ts;
+	int ret;
+
+	if (timeout && unlikely(get_timespec64(&ts, timeout)))
+		return -EFAULT;
+
+	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+		return -EFAULT;
+
+	if (ksig.sigmask) {
+		if (ksig.sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask)))
 			return -EFAULT;
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
 	}
 
-	return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+	if (signal_pending(current)) {
+		if (ksig.sigmask) {
+			current->saved_sigmask = sigsaved;
+			set_restore_sigmask();
+		}
+
+		if (!ret)
+			ret = -ERESTARTNOHAND;
+	} else {
+		if (ksig.sigmask)
+			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	}
+
+	return ret;
 }
 
 #ifdef CONFIG_COMPAT
@@ -1878,13 +2079,64 @@
 		       struct compat_timespec __user *, timeout)
 {
 	struct timespec64 t;
+	int ret;
 
-	if (timeout) {
-		if (compat_get_timespec64(&t, timeout))
+	if (timeout && compat_get_timespec64(&t, timeout))
+		return -EFAULT;
+
+	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+	if (!ret && signal_pending(current))
+		ret = -EINTR;
+	return ret;
+}
+
+
+struct __compat_aio_sigset {
+	compat_sigset_t __user	*sigmask;
+	compat_size_t		sigsetsize;
+};
+
+COMPAT_SYSCALL_DEFINE6(io_pgetevents,
+		compat_aio_context_t, ctx_id,
+		compat_long_t, min_nr,
+		compat_long_t, nr,
+		struct io_event __user *, events,
+		struct compat_timespec __user *, timeout,
+		const struct __compat_aio_sigset __user *, usig)
+{
+	struct __compat_aio_sigset ksig = { NULL, };
+	sigset_t ksigmask, sigsaved;
+	struct timespec64 t;
+	int ret;
+
+	if (timeout && compat_get_timespec64(&t, timeout))
+		return -EFAULT;
+
+	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+		return -EFAULT;
+
+	if (ksig.sigmask) {
+		if (ksig.sigsetsize != sizeof(compat_sigset_t))
+			return -EINVAL;
+		if (get_compat_sigset(&ksigmask, ksig.sigmask))
 			return -EFAULT;
-
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
 	}
 
-	return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+	if (signal_pending(current)) {
+		if (ksig.sigmask) {
+			current->saved_sigmask = sigsaved;
+			set_restore_sigmask();
+		}
+		if (!ret)
+			ret = -ERESTARTNOHAND;
+	} else {
+		if (ksig.sigmask)
+			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	}
+
+	return ret;
 }
 #endif
diff --git a/fs/attr.c b/fs/attr.c
index 12ffdb6..d0b4d34 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,6 +18,32 @@
 #include <linux/evm.h>
 #include <linux/ima.h>
 
+static bool chown_ok(const struct inode *inode, kuid_t uid)
+{
+	if (uid_eq(current_fsuid(), inode->i_uid) &&
+	    uid_eq(uid, inode->i_uid))
+		return true;
+	if (capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+		return true;
+	if (uid_eq(inode->i_uid, INVALID_UID) &&
+	    ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
+		return true;
+	return false;
+}
+
+static bool chgrp_ok(const struct inode *inode, kgid_t gid)
+{
+	if (uid_eq(current_fsuid(), inode->i_uid) &&
+	    (in_group_p(gid) || gid_eq(gid, inode->i_gid)))
+		return true;
+	if (capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+		return true;
+	if (gid_eq(inode->i_gid, INVALID_GID) &&
+	    ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
+		return true;
+	return false;
+}
+
 /**
  * setattr_prepare - check if attribute changes to a dentry are allowed
  * @dentry:	dentry to check
@@ -52,17 +78,11 @@
 		goto kill_priv;
 
 	/* Make sure a caller can chown. */
-	if ((ia_valid & ATTR_UID) &&
-	    (!uid_eq(current_fsuid(), inode->i_uid) ||
-	     !uid_eq(attr->ia_uid, inode->i_uid)) &&
-	    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+	if ((ia_valid & ATTR_UID) && !chown_ok(inode, attr->ia_uid))
 		return -EPERM;
 
 	/* Make sure caller can chgrp. */
-	if ((ia_valid & ATTR_GID) &&
-	    (!uid_eq(current_fsuid(), inode->i_uid) ||
-	    (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
-	    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+	if ((ia_valid & ATTR_GID) && !chgrp_ok(inode, attr->ia_gid))
 		return -EPERM;
 
 	/* Make sure a caller can chmod. */
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index ee832ca..f32f21c 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -21,10 +21,9 @@
 #define dprintf(x...)
 #endif
 
-static int bfs_add_entry(struct inode *dir, const unsigned char *name,
-						int namelen, int ino);
+static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino);
 static struct buffer_head *bfs_find_entry(struct inode *dir,
-				const unsigned char *name, int namelen,
+				const struct qstr *child,
 				struct bfs_dirent **res_dir);
 
 static int bfs_readdir(struct file *f, struct dir_context *ctx)
@@ -111,8 +110,7 @@
         mark_inode_dirty(inode);
 	bfs_dump_imap("create", s);
 
-	err = bfs_add_entry(dir, dentry->d_name.name, dentry->d_name.len,
-							inode->i_ino);
+	err = bfs_add_entry(dir, &dentry->d_name, inode->i_ino);
 	if (err) {
 		inode_dec_link_count(inode);
 		mutex_unlock(&info->bfs_lock);
@@ -136,19 +134,14 @@
 		return ERR_PTR(-ENAMETOOLONG);
 
 	mutex_lock(&info->bfs_lock);
-	bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
+	bh = bfs_find_entry(dir, &dentry->d_name, &de);
 	if (bh) {
 		unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
 		brelse(bh);
 		inode = bfs_iget(dir->i_sb, ino);
-		if (IS_ERR(inode)) {
-			mutex_unlock(&info->bfs_lock);
-			return ERR_CAST(inode);
-		}
 	}
 	mutex_unlock(&info->bfs_lock);
-	d_add(dentry, inode);
-	return NULL;
+	return d_splice_alias(inode, dentry);
 }
 
 static int bfs_link(struct dentry *old, struct inode *dir,
@@ -159,8 +152,7 @@
 	int err;
 
 	mutex_lock(&info->bfs_lock);
-	err = bfs_add_entry(dir, new->d_name.name, new->d_name.len,
-							inode->i_ino);
+	err = bfs_add_entry(dir, &new->d_name, inode->i_ino);
 	if (err) {
 		mutex_unlock(&info->bfs_lock);
 		return err;
@@ -183,7 +175,7 @@
 	struct bfs_sb_info *info = BFS_SB(inode->i_sb);
 
 	mutex_lock(&info->bfs_lock);
-	bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
+	bh = bfs_find_entry(dir, &dentry->d_name, &de);
 	if (!bh || (le16_to_cpu(de->ino) != inode->i_ino))
 		goto out_brelse;
 
@@ -228,27 +220,21 @@
 	info = BFS_SB(old_inode->i_sb);
 
 	mutex_lock(&info->bfs_lock);
-	old_bh = bfs_find_entry(old_dir, 
-				old_dentry->d_name.name, 
-				old_dentry->d_name.len, &old_de);
+	old_bh = bfs_find_entry(old_dir, &old_dentry->d_name, &old_de);
 
 	if (!old_bh || (le16_to_cpu(old_de->ino) != old_inode->i_ino))
 		goto end_rename;
 
 	error = -EPERM;
 	new_inode = d_inode(new_dentry);
-	new_bh = bfs_find_entry(new_dir, 
-				new_dentry->d_name.name, 
-				new_dentry->d_name.len, &new_de);
+	new_bh = bfs_find_entry(new_dir, &new_dentry->d_name, &new_de);
 
 	if (new_bh && !new_inode) {
 		brelse(new_bh);
 		new_bh = NULL;
 	}
 	if (!new_bh) {
-		error = bfs_add_entry(new_dir, 
-					new_dentry->d_name.name,
-					new_dentry->d_name.len,
+		error = bfs_add_entry(new_dir, &new_dentry->d_name,
 					old_inode->i_ino);
 		if (error)
 			goto end_rename;
@@ -278,9 +264,10 @@
 	.rename			= bfs_rename,
 };
 
-static int bfs_add_entry(struct inode *dir, const unsigned char *name,
-							int namelen, int ino)
+static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino)
 {
+	const unsigned char *name = child->name;
+	int namelen = child->len;
 	struct buffer_head *bh;
 	struct bfs_dirent *de;
 	int block, sblock, eblock, off, pos;
@@ -332,12 +319,14 @@
 }
 
 static struct buffer_head *bfs_find_entry(struct inode *dir,
-			const unsigned char *name, int namelen,
+			const struct qstr *child,
 			struct bfs_dirent **res_dir)
 {
 	unsigned long block = 0, offset = 0;
 	struct buffer_head *bh = NULL;
 	struct bfs_dirent *de;
+	const unsigned char *name = child->name;
+	int namelen = child->len;
 
 	*res_dir = NULL;
 	if (namelen > BFS_NAMELEN)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7ec920e..bef6934 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -272,7 +272,7 @@
 	struct bio		bio;
 };
 
-static struct bio_set *blkdev_dio_pool __read_mostly;
+static struct bio_set blkdev_dio_pool;
 
 static void blkdev_bio_end_io(struct bio *bio)
 {
@@ -334,7 +334,7 @@
 	    (bdev_logical_block_size(bdev) - 1))
 		return -EINVAL;
 
-	bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, blkdev_dio_pool);
+	bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
 	bio_get(bio); /* extra ref for the completion handler */
 
 	dio = container_of(bio, struct blkdev_dio, bio);
@@ -432,10 +432,7 @@
 
 static __init int blkdev_init(void)
 {
-	blkdev_dio_pool = bioset_create(4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
-	if (!blkdev_dio_pool)
-		return -ENOMEM;
-	return 0;
+	return bioset_init(&blkdev_dio_pool, 4, offsetof(struct blkdev_dio, bio), BIOSET_NEED_BVECS);
 }
 module_init(blkdev_init);
 
@@ -1322,27 +1319,30 @@
  * check_disk_size_change - checks for disk size change and adjusts bdev size.
  * @disk: struct gendisk to check
  * @bdev: struct bdev to adjust.
+ * @verbose: if %true log a message about a size change if there is any
  *
  * This routine checks to see if the bdev size does not match the disk size
  * and adjusts it if it differs. When shrinking the bdev size, its all caches
  * are freed.
  */
-void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
+void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
+		bool verbose)
 {
 	loff_t disk_size, bdev_size;
 
 	disk_size = (loff_t)get_capacity(disk) << 9;
 	bdev_size = i_size_read(bdev->bd_inode);
 	if (disk_size != bdev_size) {
-		printk(KERN_INFO
-		       "%s: detected capacity change from %lld to %lld\n",
-		       disk->disk_name, bdev_size, disk_size);
+		if (verbose) {
+			printk(KERN_INFO
+			       "%s: detected capacity change from %lld to %lld\n",
+			       disk->disk_name, bdev_size, disk_size);
+		}
 		i_size_write(bdev->bd_inode, disk_size);
 		if (bdev_size > disk_size)
 			flush_disk(bdev, false);
 	}
 }
-EXPORT_SYMBOL(check_disk_size_change);
 
 /**
  * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
@@ -1364,7 +1364,7 @@
 		return ret;
 
 	mutex_lock(&bdev->bd_mutex);
-	check_disk_size_change(disk, bdev);
+	check_disk_size_change(disk, bdev, ret == 0);
 	bdev->bd_invalidated = 0;
 	mutex_unlock(&bdev->bd_mutex);
 	bdput(bdev);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 234bae5..7e07534 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -19,17 +19,17 @@
  * ordered operations list so that we make sure to flush out any
  * new data the application may have written before commit.
  */
-#define BTRFS_INODE_ORDERED_DATA_CLOSE		0
-#define BTRFS_INODE_ORPHAN_META_RESERVED	1
-#define BTRFS_INODE_DUMMY			2
-#define BTRFS_INODE_IN_DEFRAG			3
-#define BTRFS_INODE_HAS_ORPHAN_ITEM		4
-#define BTRFS_INODE_HAS_ASYNC_EXTENT		5
-#define BTRFS_INODE_NEEDS_FULL_SYNC		6
-#define BTRFS_INODE_COPY_EVERYTHING		7
-#define BTRFS_INODE_IN_DELALLOC_LIST		8
-#define BTRFS_INODE_READDIO_NEED_LOCK		9
-#define BTRFS_INODE_HAS_PROPS		        10
+enum {
+	BTRFS_INODE_ORDERED_DATA_CLOSE = 0,
+	BTRFS_INODE_DUMMY,
+	BTRFS_INODE_IN_DEFRAG,
+	BTRFS_INODE_HAS_ASYNC_EXTENT,
+	BTRFS_INODE_NEEDS_FULL_SYNC,
+	BTRFS_INODE_COPY_EVERYTHING,
+	BTRFS_INODE_IN_DELALLOC_LIST,
+	BTRFS_INODE_READDIO_NEED_LOCK,
+	BTRFS_INODE_HAS_PROPS,
+};
 
 /* in memory btrfs inode */
 struct btrfs_inode {
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 1061575..d3e447b 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -990,12 +990,7 @@
 		btrfs_compress_op[idx]->free_workspace(workspace);
 	atomic_dec(total_ws);
 wake:
-	/*
-	 * Make sure counter is updated before we wake up waiters.
-	 */
-	smp_mb();
-	if (waitqueue_active(ws_wait))
-		wake_up(ws_wait);
+	cond_wake_up(ws_wait);
 }
 
 static void free_workspace(int type, struct list_head *ws)
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index cc605f7..ddda9b8 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -6,6 +6,8 @@
 #ifndef BTRFS_COMPRESSION_H
 #define BTRFS_COMPRESSION_H
 
+#include <linux/sizes.h>
+
 /*
  * We want to make sure that amount of RAM required to uncompress an extent is
  * reasonable, so we limit the total size in ram of a compressed extent to
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 8c68961..4bc326d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2330,7 +2330,7 @@
 			no_skips = 1;
 
 		t = path->nodes[i];
-		if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
+		if (i >= lowest_unlock && i > skip_level) {
 			btrfs_tree_unlock_rw(t, path->locks[i]);
 			path->locks[i] = 0;
 			if (write_lock_level &&
@@ -2432,7 +2432,6 @@
 	btrfs_unlock_up_safe(p, level + 1);
 	btrfs_set_path_blocking(p);
 
-	free_extent_buffer(tmp);
 	if (p->reada != READA_NONE)
 		reada_for_search(fs_info, p, level, slot, key->objectid);
 
@@ -2446,7 +2445,7 @@
 		 * and give up so that our caller doesn't loop forever
 		 * on our EAGAINs.
 		 */
-		if (!btrfs_buffer_uptodate(tmp, 0, 0))
+		if (!extent_buffer_uptodate(tmp))
 			ret = -EIO;
 		free_extent_buffer(tmp);
 	} else {
@@ -2599,6 +2598,78 @@
 	return 0;
 }
 
+static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
+							struct btrfs_path *p,
+							int write_lock_level)
+{
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct extent_buffer *b;
+	int root_lock;
+	int level = 0;
+
+	/* We try very hard to do read locks on the root */
+	root_lock = BTRFS_READ_LOCK;
+
+	if (p->search_commit_root) {
+		/* The commit roots are read only so we always do read locks */
+		if (p->need_commit_sem)
+			down_read(&fs_info->commit_root_sem);
+		b = root->commit_root;
+		extent_buffer_get(b);
+		level = btrfs_header_level(b);
+		if (p->need_commit_sem)
+			up_read(&fs_info->commit_root_sem);
+		/*
+		 * Ensure that all callers have set skip_locking when
+		 * p->search_commit_root = 1.
+		 */
+		ASSERT(p->skip_locking == 1);
+
+		goto out;
+	}
+
+	if (p->skip_locking) {
+		b = btrfs_root_node(root);
+		level = btrfs_header_level(b);
+		goto out;
+	}
+
+	/*
+	 * If the level is set to maximum, we can skip trying to get the read
+	 * lock.
+	 */
+	if (write_lock_level < BTRFS_MAX_LEVEL) {
+		/*
+		 * We don't know the level of the root node until we actually
+		 * have it read locked
+		 */
+		b = btrfs_read_lock_root_node(root);
+		level = btrfs_header_level(b);
+		if (level > write_lock_level)
+			goto out;
+
+		/* Whoops, must trade for write lock */
+		btrfs_tree_read_unlock(b);
+		free_extent_buffer(b);
+	}
+
+	b = btrfs_lock_root_node(root);
+	root_lock = BTRFS_WRITE_LOCK;
+
+	/* The level might have changed, check again */
+	level = btrfs_header_level(b);
+
+out:
+	p->nodes[level] = b;
+	if (!p->skip_locking)
+		p->locks[level] = root_lock;
+	/*
+	 * Callers are responsible for dropping b's references.
+	 */
+	return b;
+}
+
+
 /*
  * btrfs_search_slot - look for a key in a tree and perform necessary
  * modifications to preserve tree invariants.
@@ -2635,7 +2706,6 @@
 	int err;
 	int level;
 	int lowest_unlock = 1;
-	int root_lock;
 	/* everything at write_lock_level or lower must be write locked */
 	int write_lock_level = 0;
 	u8 lowest_level = 0;
@@ -2673,50 +2743,7 @@
 
 again:
 	prev_cmp = -1;
-	/*
-	 * we try very hard to do read locks on the root
-	 */
-	root_lock = BTRFS_READ_LOCK;
-	level = 0;
-	if (p->search_commit_root) {
-		/*
-		 * the commit roots are read only
-		 * so we always do read locks
-		 */
-		if (p->need_commit_sem)
-			down_read(&fs_info->commit_root_sem);
-		b = root->commit_root;
-		extent_buffer_get(b);
-		level = btrfs_header_level(b);
-		if (p->need_commit_sem)
-			up_read(&fs_info->commit_root_sem);
-		if (!p->skip_locking)
-			btrfs_tree_read_lock(b);
-	} else {
-		if (p->skip_locking) {
-			b = btrfs_root_node(root);
-			level = btrfs_header_level(b);
-		} else {
-			/* we don't know the level of the root node
-			 * until we actually have it read locked
-			 */
-			b = btrfs_read_lock_root_node(root);
-			level = btrfs_header_level(b);
-			if (level <= write_lock_level) {
-				/* whoops, must trade for write lock */
-				btrfs_tree_read_unlock(b);
-				free_extent_buffer(b);
-				b = btrfs_lock_root_node(root);
-				root_lock = BTRFS_WRITE_LOCK;
-
-				/* the level might have changed, check again */
-				level = btrfs_header_level(b);
-			}
-		}
-	}
-	p->nodes[level] = b;
-	if (!p->skip_locking)
-		p->locks[level] = root_lock;
+	b = btrfs_search_slot_get_root(root, p, write_lock_level);
 
 	while (b) {
 		level = btrfs_header_level(b);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0d422c9..f4bf787 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -739,6 +739,12 @@
  */
 #define BTRFS_FS_NEED_ASYNC_COMMIT		17
 
+/*
+ * Indicate that balance has been set up from the ioctl and is in the main
+ * phase. The fs_info::balance_ctl is initialized.
+ */
+#define BTRFS_FS_BALANCE_RUNNING		18
+
 struct btrfs_fs_info {
 	u8 fsid[BTRFS_FSID_SIZE];
 	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
@@ -838,7 +844,6 @@
 	struct mutex transaction_kthread_mutex;
 	struct mutex cleaner_mutex;
 	struct mutex chunk_mutex;
-	struct mutex volume_mutex;
 
 	/*
 	 * this is taken to make sure we don't set block groups ro after
@@ -1004,7 +1009,6 @@
 	/* restriper state */
 	spinlock_t balance_lock;
 	struct mutex balance_mutex;
-	atomic_t balance_running;
 	atomic_t balance_pause_req;
 	atomic_t balance_cancel_req;
 	struct btrfs_balance_control *balance_ctl;
@@ -1219,9 +1223,6 @@
 	spinlock_t log_extents_lock[2];
 	struct list_head logged_list[2];
 
-	spinlock_t orphan_lock;
-	atomic_t orphan_inodes;
-	struct btrfs_block_rsv *orphan_block_rsv;
 	int orphan_cleanup_state;
 
 	spinlock_t inode_lock;
@@ -2764,13 +2765,9 @@
 void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
 					    u64 len);
 void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
-int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
-				  struct btrfs_inode *inode);
-void btrfs_orphan_release_metadata(struct btrfs_inode *inode);
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 				     struct btrfs_block_rsv *rsv,
-				     int nitems,
-				     u64 *qgroup_reserved, bool use_global_rsv);
+				     int nitems, bool use_global_rsv);
 void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
 				      struct btrfs_block_rsv *rsv);
 void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
@@ -2828,7 +2825,7 @@
 void check_system_chunk(struct btrfs_trans_handle *trans,
 			struct btrfs_fs_info *fs_info, const u64 type);
 u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
-		       struct btrfs_fs_info *info, u64 start, u64 end);
+		       u64 start, u64 end);
 
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
@@ -3042,11 +3039,9 @@
 			     struct btrfs_root *root);
 
 /* uuid-tree.c */
-int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
+int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
 			u64 subid);
-int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
+int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
 			u64 subid);
 int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
 			    int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
@@ -3163,18 +3158,6 @@
 				     struct extent_map *em);
 
 /* inode.c */
-struct btrfs_delalloc_work {
-	struct inode *inode;
-	int delay_iput;
-	struct completion completion;
-	struct list_head list;
-	struct btrfs_work work;
-};
-
-struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
-						    int delay_iput);
-void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
-
 struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
 		struct page *page, size_t pg_offset, u64 start,
 		u64 len, int create);
@@ -3193,10 +3176,7 @@
 int btrfs_add_link(struct btrfs_trans_handle *trans,
 		   struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
 		   const char *name, int name_len, int add_backref, u64 index);
-int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
-			struct btrfs_root *root,
-			struct inode *dir, u64 objectid,
-			const char *name, int name_len);
+int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry);
 int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
 			int front);
 int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
@@ -3204,9 +3184,8 @@
 			       struct inode *inode, u64 new_size,
 			       u32 min_type);
 
-int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
-			       int nr);
+int btrfs_start_delalloc_inodes(struct btrfs_root *root);
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr);
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
 			      unsigned int extra_bits,
 			      struct extent_state **cached_state, int dedupe);
@@ -3240,10 +3219,7 @@
 int btrfs_orphan_add(struct btrfs_trans_handle *trans,
 		struct btrfs_inode *inode);
 int btrfs_orphan_cleanup(struct btrfs_root *root);
-void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
-			      struct btrfs_root *root);
 int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
-void btrfs_invalidate_inodes(struct btrfs_root *root);
 void btrfs_add_delayed_iput(struct inode *inode);
 void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
 int btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -3262,14 +3238,14 @@
 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 int btrfs_ioctl_get_supported_features(void __user *arg);
-void btrfs_update_iflags(struct inode *inode);
+void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
 int btrfs_is_empty_uuid(u8 *uuid);
 int btrfs_defrag_file(struct inode *inode, struct file *file,
 		      struct btrfs_ioctl_defrag_range_args *range,
 		      u64 newer_than, unsigned long max_pages);
 void btrfs_get_block_group_info(struct list_head *groups_list,
 				struct btrfs_ioctl_space_info *space);
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
 			       struct btrfs_ioctl_balance_args *bargs);
 ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 			   struct file *dst_file, u64 dst_loff);
@@ -3767,4 +3743,26 @@
 	return 0;
 }
 
+static inline void cond_wake_up(struct wait_queue_head *wq)
+{
+	/*
+	 * This implies a full smp_mb barrier, see comments for
+	 * waitqueue_active why.
+	 */
+	if (wq_has_sleeper(wq))
+		wake_up(wq);
+}
+
+static inline void cond_wake_up_nomb(struct wait_queue_head *wq)
+{
+	/*
+	 * Special case for conditional wakeup where the barrier required for
+	 * waitqueue_active is implied by some of the preceding code. Eg. one
+	 * of such atomic operations (atomic_dec_and_return, ...), or a
+	 * unlock/lock sequence, etc.
+	 */
+	if (waitqueue_active(wq))
+		wake_up(wq);
+}
+
 #endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index a8d492db..fe6caa7 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -460,13 +460,10 @@
 {
 	int seq = atomic_inc_return(&delayed_root->items_seq);
 
-	/*
-	 * atomic_dec_return implies a barrier for waitqueue_active
-	 */
+	/* atomic_dec_return implies a barrier */
 	if ((atomic_dec_return(&delayed_root->items) <
-	    BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
-	    waitqueue_active(&delayed_root->wait))
-		wake_up(&delayed_root->wait);
+	    BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0))
+		cond_wake_up_nomb(&delayed_root->wait);
 }
 
 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index e1b0651..03dec67 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -286,10 +286,10 @@
 }
 
 void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
-			      struct btrfs_fs_info *fs_info,
 			      struct btrfs_delayed_ref_root *delayed_refs,
 			      struct btrfs_delayed_ref_head *head)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_delayed_ref_node *ref;
 	struct rb_node *node;
 	u64 seq = 0;
@@ -323,9 +323,7 @@
 	}
 }
 
-int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
-			    struct btrfs_delayed_ref_root *delayed_refs,
-			    u64 seq)
+int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
 {
 	struct seq_list *elem;
 	int ret = 0;
@@ -336,10 +334,9 @@
 					struct seq_list, list);
 		if (seq >= elem->seq) {
 			btrfs_debug(fs_info,
-				"holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)",
+				"holding back delayed_ref %#x.%x, lowest is %#x.%x",
 				(u32)(seq >> 32), (u32)seq,
-				(u32)(elem->seq >> 32), (u32)elem->seq,
-				delayed_refs);
+				(u32)(elem->seq >> 32), (u32)elem->seq);
 			ret = 1;
 		}
 	}
@@ -529,33 +526,20 @@
 	spin_unlock(&existing->lock);
 }
 
-/*
- * helper function to actually insert a head node into the rbtree.
- * this does all the dirty work in terms of maintaining the correct
- * overall modification count.
- */
-static noinline struct btrfs_delayed_ref_head *
-add_delayed_ref_head(struct btrfs_fs_info *fs_info,
-		     struct btrfs_trans_handle *trans,
-		     struct btrfs_delayed_ref_head *head_ref,
-		     struct btrfs_qgroup_extent_record *qrecord,
-		     u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
-		     int action, int is_data, int is_system,
-		     int *qrecord_inserted_ret,
-		     int *old_ref_mod, int *new_ref_mod)
-
+static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
+				  struct btrfs_qgroup_extent_record *qrecord,
+				  u64 bytenr, u64 num_bytes, u64 ref_root,
+				  u64 reserved, int action, bool is_data,
+				  bool is_system)
 {
-	struct btrfs_delayed_ref_head *existing;
-	struct btrfs_delayed_ref_root *delayed_refs;
 	int count_mod = 1;
 	int must_insert_reserved = 0;
-	int qrecord_inserted = 0;
 
 	/* If reserved is provided, it must be a data extent. */
 	BUG_ON(!is_data && reserved);
 
 	/*
-	 * the head node stores the sum of all the mods, so dropping a ref
+	 * The head node stores the sum of all the mods, so dropping a ref
 	 * should drop the sum in the head node by one.
 	 */
 	if (action == BTRFS_UPDATE_DELAYED_HEAD)
@@ -564,12 +548,11 @@
 		count_mod = -1;
 
 	/*
-	 * BTRFS_ADD_DELAYED_EXTENT means that we need to update
-	 * the reserved accounting when the extent is finally added, or
-	 * if a later modification deletes the delayed ref without ever
-	 * inserting the extent into the extent allocation tree.
-	 * ref->must_insert_reserved is the flag used to record
-	 * that accounting mods are required.
+	 * BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
+	 * accounting when the extent is finally added, or if a later
+	 * modification deletes the delayed ref without ever inserting the
+	 * extent into the extent allocation tree.  ref->must_insert_reserved
+	 * is the flag used to record that accounting mods are required.
 	 *
 	 * Once we record must_insert_reserved, switch the action to
 	 * BTRFS_ADD_DELAYED_REF because other special casing is not required.
@@ -579,8 +562,6 @@
 	else
 		must_insert_reserved = 0;
 
-	delayed_refs = &trans->transaction->delayed_refs;
-
 	refcount_set(&head_ref->refs, 1);
 	head_ref->bytenr = bytenr;
 	head_ref->num_bytes = num_bytes;
@@ -598,7 +579,6 @@
 	spin_lock_init(&head_ref->lock);
 	mutex_init(&head_ref->mutex);
 
-	/* Record qgroup extent info if provided */
 	if (qrecord) {
 		if (ref_root && reserved) {
 			head_ref->qgroup_ref_root = ref_root;
@@ -608,20 +588,44 @@
 		qrecord->bytenr = bytenr;
 		qrecord->num_bytes = num_bytes;
 		qrecord->old_roots = NULL;
+	}
+}
 
-		if(btrfs_qgroup_trace_extent_nolock(fs_info,
+/*
+ * helper function to actually insert a head node into the rbtree.
+ * this does all the dirty work in terms of maintaining the correct
+ * overall modification count.
+ */
+static noinline struct btrfs_delayed_ref_head *
+add_delayed_ref_head(struct btrfs_trans_handle *trans,
+		     struct btrfs_delayed_ref_head *head_ref,
+		     struct btrfs_qgroup_extent_record *qrecord,
+		     int action, int *qrecord_inserted_ret,
+		     int *old_ref_mod, int *new_ref_mod)
+{
+	struct btrfs_delayed_ref_head *existing;
+	struct btrfs_delayed_ref_root *delayed_refs;
+	int qrecord_inserted = 0;
+
+	delayed_refs = &trans->transaction->delayed_refs;
+
+	/* Record qgroup extent info if provided */
+	if (qrecord) {
+		if (btrfs_qgroup_trace_extent_nolock(trans->fs_info,
 					delayed_refs, qrecord))
 			kfree(qrecord);
 		else
 			qrecord_inserted = 1;
 	}
 
-	trace_add_delayed_ref_head(fs_info, head_ref, action);
+	trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
 
 	existing = htree_insert(&delayed_refs->href_root,
 				&head_ref->href_node);
 	if (existing) {
-		WARN_ON(ref_root && reserved && existing->qgroup_ref_root
+		WARN_ON(qrecord && head_ref->qgroup_ref_root
+			&& head_ref->qgroup_reserved
+			&& existing->qgroup_ref_root
 			&& existing->qgroup_reserved);
 		update_existing_head_ref(delayed_refs, existing, head_ref,
 					 old_ref_mod);
@@ -634,8 +638,8 @@
 	} else {
 		if (old_ref_mod)
 			*old_ref_mod = 0;
-		if (is_data && count_mod < 0)
-			delayed_refs->pending_csums += num_bytes;
+		if (head_ref->is_data && head_ref->ref_mod < 0)
+			delayed_refs->pending_csums += head_ref->num_bytes;
 		delayed_refs->num_heads++;
 		delayed_refs->num_heads_ready++;
 		atomic_inc(&delayed_refs->num_entries);
@@ -645,33 +649,48 @@
 		*qrecord_inserted_ret = qrecord_inserted;
 	if (new_ref_mod)
 		*new_ref_mod = head_ref->total_ref_mod;
+
 	return head_ref;
 }
 
 /*
- * helper to insert a delayed tree ref into the rbtree.
+ * init_delayed_ref_common - Initialize the structure which represents a
+ *			     modification to a an extent.
+ *
+ * @fs_info:    Internal to the mounted filesystem mount structure.
+ *
+ * @ref:	The structure which is going to be initialized.
+ *
+ * @bytenr:	The logical address of the extent for which a modification is
+ *		going to be recorded.
+ *
+ * @num_bytes:  Size of the extent whose modification is being recorded.
+ *
+ * @ref_root:	The id of the root where this modification has originated, this
+ *		can be either one of the well-known metadata trees or the
+ *		subvolume id which references this extent.
+ *
+ * @action:	Can be one of BTRFS_ADD_DELAYED_REF/BTRFS_DROP_DELAYED_REF or
+ *		BTRFS_ADD_DELAYED_EXTENT
+ *
+ * @ref_type:	Holds the type of the extent which is being recorded, can be
+ *		one of BTRFS_SHARED_BLOCK_REF_KEY/BTRFS_TREE_BLOCK_REF_KEY
+ *		when recording a metadata extent or BTRFS_SHARED_DATA_REF_KEY/
+ *		BTRFS_EXTENT_DATA_REF_KEY when recording data extent
  */
-static noinline void
-add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
-		     struct btrfs_trans_handle *trans,
-		     struct btrfs_delayed_ref_head *head_ref,
-		     struct btrfs_delayed_ref_node *ref, u64 bytenr,
-		     u64 num_bytes, u64 parent, u64 ref_root, int level,
-		     int action)
+static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
+				    struct btrfs_delayed_ref_node *ref,
+				    u64 bytenr, u64 num_bytes, u64 ref_root,
+				    int action, u8 ref_type)
 {
-	struct btrfs_delayed_tree_ref *full_ref;
-	struct btrfs_delayed_ref_root *delayed_refs;
 	u64 seq = 0;
-	int ret;
 
 	if (action == BTRFS_ADD_DELAYED_EXTENT)
 		action = BTRFS_ADD_DELAYED_REF;
 
 	if (is_fstree(ref_root))
 		seq = atomic64_read(&fs_info->tree_mod_seq);
-	delayed_refs = &trans->transaction->delayed_refs;
 
-	/* first set the basic ref node struct up */
 	refcount_set(&ref->refs, 1);
 	ref->bytenr = bytenr;
 	ref->num_bytes = num_bytes;
@@ -680,82 +699,9 @@
 	ref->is_head = 0;
 	ref->in_tree = 1;
 	ref->seq = seq;
+	ref->type = ref_type;
 	RB_CLEAR_NODE(&ref->ref_node);
 	INIT_LIST_HEAD(&ref->add_list);
-
-	full_ref = btrfs_delayed_node_to_tree_ref(ref);
-	full_ref->parent = parent;
-	full_ref->root = ref_root;
-	if (parent)
-		ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
-	else
-		ref->type = BTRFS_TREE_BLOCK_REF_KEY;
-	full_ref->level = level;
-
-	trace_add_delayed_tree_ref(fs_info, ref, full_ref, action);
-
-	ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
-
-	/*
-	 * XXX: memory should be freed at the same level allocated.
-	 * But bad practice is anywhere... Follow it now. Need cleanup.
-	 */
-	if (ret > 0)
-		kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
-}
-
-/*
- * helper to insert a delayed data ref into the rbtree.
- */
-static noinline void
-add_delayed_data_ref(struct btrfs_fs_info *fs_info,
-		     struct btrfs_trans_handle *trans,
-		     struct btrfs_delayed_ref_head *head_ref,
-		     struct btrfs_delayed_ref_node *ref, u64 bytenr,
-		     u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
-		     u64 offset, int action)
-{
-	struct btrfs_delayed_data_ref *full_ref;
-	struct btrfs_delayed_ref_root *delayed_refs;
-	u64 seq = 0;
-	int ret;
-
-	if (action == BTRFS_ADD_DELAYED_EXTENT)
-		action = BTRFS_ADD_DELAYED_REF;
-
-	delayed_refs = &trans->transaction->delayed_refs;
-
-	if (is_fstree(ref_root))
-		seq = atomic64_read(&fs_info->tree_mod_seq);
-
-	/* first set the basic ref node struct up */
-	refcount_set(&ref->refs, 1);
-	ref->bytenr = bytenr;
-	ref->num_bytes = num_bytes;
-	ref->ref_mod = 1;
-	ref->action = action;
-	ref->is_head = 0;
-	ref->in_tree = 1;
-	ref->seq = seq;
-	RB_CLEAR_NODE(&ref->ref_node);
-	INIT_LIST_HEAD(&ref->add_list);
-
-	full_ref = btrfs_delayed_node_to_data_ref(ref);
-	full_ref->parent = parent;
-	full_ref->root = ref_root;
-	if (parent)
-		ref->type = BTRFS_SHARED_DATA_REF_KEY;
-	else
-		ref->type = BTRFS_EXTENT_DATA_REF_KEY;
-
-	full_ref->objectid = owner;
-	full_ref->offset = offset;
-
-	trace_add_delayed_data_ref(fs_info, ref, full_ref, action);
-
-	ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref);
-	if (ret > 0)
-		kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
 }
 
 /*
@@ -775,13 +721,25 @@
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_qgroup_extent_record *record = NULL;
 	int qrecord_inserted;
-	int is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
+	bool is_system = (ref_root == BTRFS_CHUNK_TREE_OBJECTID);
+	int ret;
+	u8 ref_type;
 
 	BUG_ON(extent_op && extent_op->is_data);
 	ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
 	if (!ref)
 		return -ENOMEM;
 
+	if (parent)
+		ref_type = BTRFS_SHARED_BLOCK_REF_KEY;
+	else
+		ref_type = BTRFS_TREE_BLOCK_REF_KEY;
+	init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
+				ref_root, action, ref_type);
+	ref->root = ref_root;
+	ref->parent = parent;
+	ref->level = level;
+
 	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 	if (!head_ref)
 		goto free_ref;
@@ -793,6 +751,8 @@
 			goto free_head_ref;
 	}
 
+	init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
+			      ref_root, 0, action, false, is_system);
 	head_ref->extent_op = extent_op;
 
 	delayed_refs = &trans->transaction->delayed_refs;
@@ -802,15 +762,19 @@
 	 * insert both the head node and the new ref without dropping
 	 * the spin lock
 	 */
-	head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
-					bytenr, num_bytes, 0, 0, action, 0,
-					is_system, &qrecord_inserted,
+	head_ref = add_delayed_ref_head(trans, head_ref, record,
+					action, &qrecord_inserted,
 					old_ref_mod, new_ref_mod);
 
-	add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
-			     num_bytes, parent, ref_root, level, action);
+	ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
 	spin_unlock(&delayed_refs->lock);
 
+	trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
+				   action == BTRFS_ADD_DELAYED_EXTENT ?
+				   BTRFS_ADD_DELAYED_REF : action);
+	if (ret > 0)
+		kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+
 	if (qrecord_inserted)
 		btrfs_qgroup_trace_extent_post(fs_info, record);
 
@@ -839,11 +803,25 @@
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_qgroup_extent_record *record = NULL;
 	int qrecord_inserted;
+	int ret;
+	u8 ref_type;
 
 	ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
 	if (!ref)
 		return -ENOMEM;
 
+	if (parent)
+	        ref_type = BTRFS_SHARED_DATA_REF_KEY;
+	else
+	        ref_type = BTRFS_EXTENT_DATA_REF_KEY;
+	init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
+				ref_root, action, ref_type);
+	ref->root = ref_root;
+	ref->parent = parent;
+	ref->objectid = owner;
+	ref->offset = offset;
+
+
 	head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
 	if (!head_ref) {
 		kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
@@ -861,6 +839,8 @@
 		}
 	}
 
+	init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root,
+			      reserved, action, true, false);
 	head_ref->extent_op = NULL;
 
 	delayed_refs = &trans->transaction->delayed_refs;
@@ -870,16 +850,20 @@
 	 * insert both the head node and the new ref without dropping
 	 * the spin lock
 	 */
-	head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record,
-					bytenr, num_bytes, ref_root, reserved,
-					action, 1, 0, &qrecord_inserted,
+	head_ref = add_delayed_ref_head(trans, head_ref, record,
+					action, &qrecord_inserted,
 					old_ref_mod, new_ref_mod);
 
-	add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
-				   num_bytes, parent, ref_root, owner, offset,
-				   action);
+	ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node);
 	spin_unlock(&delayed_refs->lock);
 
+	trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
+				   action == BTRFS_ADD_DELAYED_EXTENT ?
+				   BTRFS_ADD_DELAYED_REF : action);
+	if (ret > 0)
+		kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
+
+
 	if (qrecord_inserted)
 		return btrfs_qgroup_trace_extent_post(fs_info, record);
 	return 0;
@@ -897,19 +881,16 @@
 	if (!head_ref)
 		return -ENOMEM;
 
+	init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0,
+			      BTRFS_UPDATE_DELAYED_HEAD, extent_op->is_data,
+			      false);
 	head_ref->extent_op = extent_op;
 
 	delayed_refs = &trans->transaction->delayed_refs;
 	spin_lock(&delayed_refs->lock);
 
-	/*
-	 * extent_ops just modify the flags of an extent and they don't result
-	 * in ref count changes, hence it's safe to pass false/0 for is_system
-	 * argument
-	 */
-	add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr,
-			     num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
-			     extent_op->is_data, 0, NULL, NULL, NULL);
+	add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD,
+			     NULL, NULL, NULL);
 
 	spin_unlock(&delayed_refs->lock);
 	return 0;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 7f00db5..ea1aecb 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -251,7 +251,6 @@
 				u64 bytenr, u64 num_bytes,
 				struct btrfs_delayed_extent_op *extent_op);
 void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
-			      struct btrfs_fs_info *fs_info,
 			      struct btrfs_delayed_ref_root *delayed_refs,
 			      struct btrfs_delayed_ref_head *head);
 
@@ -269,9 +268,7 @@
 struct btrfs_delayed_ref_head *
 btrfs_select_ref_head(struct btrfs_trans_handle *trans);
 
-int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
-			    struct btrfs_delayed_ref_root *delayed_refs,
-			    u64 seq);
+int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);
 
 /*
  * helper functions to cast a node into its container
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index f82be26..e2ba041 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -33,8 +33,6 @@
 						struct btrfs_device *srcdev,
 						struct btrfs_device *tgtdev);
 static int btrfs_dev_replace_kthread(void *data);
-static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info);
-
 
 int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
 {
@@ -179,6 +177,105 @@
 }
 
 /*
+ * Initialize a new device for device replace target from a given source dev
+ * and path.
+ *
+ * Return 0 and new device in @device_out, otherwise return < 0
+ */
+static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
+				  const char *device_path,
+				  struct btrfs_device *srcdev,
+				  struct btrfs_device **device_out)
+{
+	struct btrfs_device *device;
+	struct block_device *bdev;
+	struct list_head *devices;
+	struct rcu_string *name;
+	u64 devid = BTRFS_DEV_REPLACE_DEVID;
+	int ret = 0;
+
+	*device_out = NULL;
+	if (fs_info->fs_devices->seeding) {
+		btrfs_err(fs_info, "the filesystem is a seed filesystem!");
+		return -EINVAL;
+	}
+
+	bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
+				  fs_info->bdev_holder);
+	if (IS_ERR(bdev)) {
+		btrfs_err(fs_info, "target device %s is invalid!", device_path);
+		return PTR_ERR(bdev);
+	}
+
+	filemap_write_and_wait(bdev->bd_inode->i_mapping);
+
+	devices = &fs_info->fs_devices->devices;
+	list_for_each_entry(device, devices, dev_list) {
+		if (device->bdev == bdev) {
+			btrfs_err(fs_info,
+				  "target device is in the filesystem!");
+			ret = -EEXIST;
+			goto error;
+		}
+	}
+
+
+	if (i_size_read(bdev->bd_inode) <
+	    btrfs_device_get_total_bytes(srcdev)) {
+		btrfs_err(fs_info,
+			  "target device is smaller than source device!");
+		ret = -EINVAL;
+		goto error;
+	}
+
+
+	device = btrfs_alloc_device(NULL, &devid, NULL);
+	if (IS_ERR(device)) {
+		ret = PTR_ERR(device);
+		goto error;
+	}
+
+	name = rcu_string_strdup(device_path, GFP_KERNEL);
+	if (!name) {
+		btrfs_free_device(device);
+		ret = -ENOMEM;
+		goto error;
+	}
+	rcu_assign_pointer(device->name, name);
+
+	mutex_lock(&fs_info->fs_devices->device_list_mutex);
+	set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+	device->generation = 0;
+	device->io_width = fs_info->sectorsize;
+	device->io_align = fs_info->sectorsize;
+	device->sector_size = fs_info->sectorsize;
+	device->total_bytes = btrfs_device_get_total_bytes(srcdev);
+	device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
+	device->bytes_used = btrfs_device_get_bytes_used(srcdev);
+	device->commit_total_bytes = srcdev->commit_total_bytes;
+	device->commit_bytes_used = device->bytes_used;
+	device->fs_info = fs_info;
+	device->bdev = bdev;
+	set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
+	set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
+	device->mode = FMODE_EXCL;
+	device->dev_stats_valid = 1;
+	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
+	device->fs_devices = fs_info->fs_devices;
+	list_add(&device->dev_list, &fs_info->fs_devices->devices);
+	fs_info->fs_devices->num_devices++;
+	fs_info->fs_devices->open_devices++;
+	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+
+	*device_out = device;
+	return 0;
+
+error:
+	blkdev_put(bdev, FMODE_EXCL);
+	return ret;
+}
+
+/*
  * called from commit_transaction. Writes changed device replace state to
  * disk.
  */
@@ -317,18 +414,13 @@
 	struct btrfs_device *tgt_device = NULL;
 	struct btrfs_device *src_device = NULL;
 
-	/* the disk copy procedure reuses the scrub code */
-	mutex_lock(&fs_info->volume_mutex);
 	ret = btrfs_find_device_by_devspec(fs_info, srcdevid,
 					    srcdev_name, &src_device);
-	if (ret) {
-		mutex_unlock(&fs_info->volume_mutex);
+	if (ret)
 		return ret;
-	}
 
 	ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name,
 					    src_device, &tgt_device);
-	mutex_unlock(&fs_info->volume_mutex);
 	if (ret)
 		return ret;
 
@@ -360,7 +452,6 @@
 	dev_replace->cont_reading_from_srcdev_mode = read_src;
 	WARN_ON(!src_device);
 	dev_replace->srcdev = src_device;
-	WARN_ON(!tgt_device);
 	dev_replace->tgtdev = tgt_device;
 
 	btrfs_info_in_rcu(fs_info,
@@ -503,7 +594,7 @@
 	 * flush all outstanding I/O and inode extent mappings before the
 	 * copy operation is declared as being finished
 	 */
-	ret = btrfs_start_delalloc_roots(fs_info, 0, -1);
+	ret = btrfs_start_delalloc_roots(fs_info, -1);
 	if (ret) {
 		mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
 		return ret;
@@ -518,7 +609,6 @@
 	ret = btrfs_commit_transaction(trans);
 	WARN_ON(ret);
 
-	mutex_lock(&uuid_mutex);
 	/* keep away write_all_supers() during the finishing procedure */
 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
 	mutex_lock(&fs_info->chunk_mutex);
@@ -545,7 +635,6 @@
 		btrfs_dev_replace_write_unlock(dev_replace);
 		mutex_unlock(&fs_info->chunk_mutex);
 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-		mutex_unlock(&uuid_mutex);
 		btrfs_rm_dev_replace_blocked(fs_info);
 		if (tgt_device)
 			btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
@@ -596,7 +685,6 @@
 	 */
 	mutex_unlock(&fs_info->chunk_mutex);
 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-	mutex_unlock(&uuid_mutex);
 
 	/* replace the sysfs entry */
 	btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
@@ -800,7 +888,17 @@
 	}
 	btrfs_dev_replace_write_unlock(dev_replace);
 
-	WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
+	/*
+	 * This could collide with a paused balance, but the exclusive op logic
+	 * should never allow both to start and pause. We don't want to allow
+	 * dev-replace to start anyway.
+	 */
+	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
+		btrfs_info(fs_info,
+		"cannot resume dev-replace, other exclusive operation running");
+		return 0;
+	}
+
 	task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
 	return PTR_ERR_OR_ZERO(task);
 }
@@ -810,6 +908,7 @@
 	struct btrfs_fs_info *fs_info = data;
 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
 	u64 progress;
+	int ret;
 
 	progress = btrfs_dev_replace_progress(fs_info);
 	progress = div_u64(progress, 10);
@@ -820,23 +919,14 @@
 		btrfs_dev_name(dev_replace->tgtdev),
 		(unsigned int)progress);
 
-	btrfs_dev_replace_continue_on_mount(fs_info);
-	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
-
-	return 0;
-}
-
-static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info)
-{
-	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
-	int ret;
-
 	ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid,
 			      dev_replace->committed_cursor_left,
 			      btrfs_device_get_total_bytes(dev_replace->srcdev),
 			      &dev_replace->scrub_progress, 0, 1);
 	ret = btrfs_dev_replace_finishing(fs_info, ret);
 	WARN_ON(ret);
+
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 	return 0;
 }
 
@@ -916,9 +1006,9 @@
 	ASSERT(atomic_read(&dev_replace->read_locks) > 0);
 	ASSERT(atomic_read(&dev_replace->blocking_readers) > 0);
 	read_lock(&dev_replace->lock);
-	if (atomic_dec_and_test(&dev_replace->blocking_readers) &&
-	    waitqueue_active(&dev_replace->read_lock_wq))
-		wake_up(&dev_replace->read_lock_wq);
+	/* Barrier implied by atomic_dec_and_test */
+	if (atomic_dec_and_test(&dev_replace->blocking_readers))
+		cond_wake_up_nomb(&dev_replace->read_lock_wq);
 }
 
 void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
@@ -929,9 +1019,7 @@
 void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
 {
 	percpu_counter_sub(&fs_info->bio_counter, amount);
-
-	if (waitqueue_active(&fs_info->replace_wait))
-		wake_up(&fs_info->replace_wait);
+	cond_wake_up_nomb(&fs_info->replace_wait);
 }
 
 void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c3504b4..205092d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -55,7 +55,6 @@
 static const struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
 static void free_fs_root(struct btrfs_root *root);
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info);
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 				      struct btrfs_fs_info *fs_info);
@@ -416,7 +415,7 @@
 
 static int verify_level_key(struct btrfs_fs_info *fs_info,
 			    struct extent_buffer *eb, int level,
-			    struct btrfs_key *first_key)
+			    struct btrfs_key *first_key, u64 parent_transid)
 {
 	int found_level;
 	struct btrfs_key found_key;
@@ -454,10 +453,11 @@
 	if (ret) {
 		WARN_ON(1);
 		btrfs_err(fs_info,
-"tree first key mismatch detected, bytenr=%llu key expected=(%llu, %u, %llu) has=(%llu, %u, %llu)",
-			  eb->start, first_key->objectid, first_key->type,
-			  first_key->offset, found_key.objectid,
-			  found_key.type, found_key.offset);
+"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
+			  eb->start, parent_transid, first_key->objectid,
+			  first_key->type, first_key->offset,
+			  found_key.objectid, found_key.type,
+			  found_key.offset);
 	}
 #endif
 	return ret;
@@ -493,7 +493,7 @@
 						   parent_transid, 0))
 				ret = -EIO;
 			else if (verify_level_key(fs_info, eb, level,
-						  first_key))
+						  first_key, parent_transid))
 				ret = -EUCLEAN;
 			else
 				break;
@@ -1185,7 +1185,6 @@
 	root->inode_tree = RB_ROOT;
 	INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
 	root->block_rsv = NULL;
-	root->orphan_block_rsv = NULL;
 
 	INIT_LIST_HEAD(&root->dirty_list);
 	INIT_LIST_HEAD(&root->root_list);
@@ -1195,7 +1194,6 @@
 	INIT_LIST_HEAD(&root->ordered_root);
 	INIT_LIST_HEAD(&root->logged_list[0]);
 	INIT_LIST_HEAD(&root->logged_list[1]);
-	spin_lock_init(&root->orphan_lock);
 	spin_lock_init(&root->inode_lock);
 	spin_lock_init(&root->delalloc_lock);
 	spin_lock_init(&root->ordered_extent_lock);
@@ -1216,7 +1214,6 @@
 	atomic_set(&root->log_commit[1], 0);
 	atomic_set(&root->log_writers, 0);
 	atomic_set(&root->log_batch, 0);
-	atomic_set(&root->orphan_inodes, 0);
 	refcount_set(&root->refs, 1);
 	atomic_set(&root->will_be_snapshotted, 0);
 	root->log_transid = 0;
@@ -2164,7 +2161,6 @@
 {
 	spin_lock_init(&fs_info->balance_lock);
 	mutex_init(&fs_info->balance_mutex);
-	atomic_set(&fs_info->balance_running, 0);
 	atomic_set(&fs_info->balance_pause_req, 0);
 	atomic_set(&fs_info->balance_cancel_req, 0);
 	fs_info->balance_ctl = NULL;
@@ -2442,6 +2438,211 @@
 	return ret;
 }
 
+/*
+ * Real super block validation
+ * NOTE: super csum type and incompat features will not be checked here.
+ *
+ * @sb:		super block to check
+ * @mirror_num:	the super block number to check its bytenr:
+ * 		0	the primary (1st) sb
+ * 		1, 2	2nd and 3rd backup copy
+ * 	       -1	skip bytenr check
+ */
+static int validate_super(struct btrfs_fs_info *fs_info,
+			    struct btrfs_super_block *sb, int mirror_num)
+{
+	u64 nodesize = btrfs_super_nodesize(sb);
+	u64 sectorsize = btrfs_super_sectorsize(sb);
+	int ret = 0;
+
+	if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
+		btrfs_err(fs_info, "no valid FS found");
+		ret = -EINVAL;
+	}
+	if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
+		btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
+				btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
+		ret = -EINVAL;
+	}
+	if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
+		btrfs_err(fs_info, "tree_root level too big: %d >= %d",
+				btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
+		ret = -EINVAL;
+	}
+	if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
+		btrfs_err(fs_info, "chunk_root level too big: %d >= %d",
+				btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
+		ret = -EINVAL;
+	}
+	if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
+		btrfs_err(fs_info, "log_root level too big: %d >= %d",
+				btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
+		ret = -EINVAL;
+	}
+
+	/*
+	 * Check sectorsize and nodesize first, other check will need it.
+	 * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
+	 */
+	if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
+	    sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+		btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
+		ret = -EINVAL;
+	}
+	/* Only PAGE SIZE is supported yet */
+	if (sectorsize != PAGE_SIZE) {
+		btrfs_err(fs_info,
+			"sectorsize %llu not supported yet, only support %lu",
+			sectorsize, PAGE_SIZE);
+		ret = -EINVAL;
+	}
+	if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
+	    nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+		btrfs_err(fs_info, "invalid nodesize %llu", nodesize);
+		ret = -EINVAL;
+	}
+	if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
+		btrfs_err(fs_info, "invalid leafsize %u, should be %llu",
+			  le32_to_cpu(sb->__unused_leafsize), nodesize);
+		ret = -EINVAL;
+	}
+
+	/* Root alignment check */
+	if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
+		btrfs_warn(fs_info, "tree_root block unaligned: %llu",
+			   btrfs_super_root(sb));
+		ret = -EINVAL;
+	}
+	if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
+		btrfs_warn(fs_info, "chunk_root block unaligned: %llu",
+			   btrfs_super_chunk_root(sb));
+		ret = -EINVAL;
+	}
+	if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
+		btrfs_warn(fs_info, "log_root block unaligned: %llu",
+			   btrfs_super_log_root(sb));
+		ret = -EINVAL;
+	}
+
+	if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
+		btrfs_err(fs_info,
+			   "dev_item UUID does not match fsid: %pU != %pU",
+			   fs_info->fsid, sb->dev_item.fsid);
+		ret = -EINVAL;
+	}
+
+	/*
+	 * Hint to catch really bogus numbers, bitflips or so, more exact checks are
+	 * done later
+	 */
+	if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
+		btrfs_err(fs_info, "bytes_used is too small %llu",
+			  btrfs_super_bytes_used(sb));
+		ret = -EINVAL;
+	}
+	if (!is_power_of_2(btrfs_super_stripesize(sb))) {
+		btrfs_err(fs_info, "invalid stripesize %u",
+			  btrfs_super_stripesize(sb));
+		ret = -EINVAL;
+	}
+	if (btrfs_super_num_devices(sb) > (1UL << 31))
+		btrfs_warn(fs_info, "suspicious number of devices: %llu",
+			   btrfs_super_num_devices(sb));
+	if (btrfs_super_num_devices(sb) == 0) {
+		btrfs_err(fs_info, "number of devices is 0");
+		ret = -EINVAL;
+	}
+
+	if (mirror_num >= 0 &&
+	    btrfs_super_bytenr(sb) != btrfs_sb_offset(mirror_num)) {
+		btrfs_err(fs_info, "super offset mismatch %llu != %u",
+			  btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
+		ret = -EINVAL;
+	}
+
+	/*
+	 * Obvious sys_chunk_array corruptions, it must hold at least one key
+	 * and one chunk
+	 */
+	if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
+		btrfs_err(fs_info, "system chunk array too big %u > %u",
+			  btrfs_super_sys_array_size(sb),
+			  BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+		ret = -EINVAL;
+	}
+	if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
+			+ sizeof(struct btrfs_chunk)) {
+		btrfs_err(fs_info, "system chunk array too small %u < %zu",
+			  btrfs_super_sys_array_size(sb),
+			  sizeof(struct btrfs_disk_key)
+			  + sizeof(struct btrfs_chunk));
+		ret = -EINVAL;
+	}
+
+	/*
+	 * The generation is a global counter, we'll trust it more than the others
+	 * but it's still possible that it's the one that's wrong.
+	 */
+	if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb))
+		btrfs_warn(fs_info,
+			"suspicious: generation < chunk_root_generation: %llu < %llu",
+			btrfs_super_generation(sb),
+			btrfs_super_chunk_root_generation(sb));
+	if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb)
+	    && btrfs_super_cache_generation(sb) != (u64)-1)
+		btrfs_warn(fs_info,
+			"suspicious: generation < cache_generation: %llu < %llu",
+			btrfs_super_generation(sb),
+			btrfs_super_cache_generation(sb));
+
+	return ret;
+}
+
+/*
+ * Validation of super block at mount time.
+ * Some checks already done early at mount time, like csum type and incompat
+ * flags will be skipped.
+ */
+static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info)
+{
+	return validate_super(fs_info, fs_info->super_copy, 0);
+}
+
+/*
+ * Validation of super block at write time.
+ * Some checks like bytenr check will be skipped as their values will be
+ * overwritten soon.
+ * Extra checks like csum type and incompat flags will be done here.
+ */
+static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
+				      struct btrfs_super_block *sb)
+{
+	int ret;
+
+	ret = validate_super(fs_info, sb, -1);
+	if (ret < 0)
+		goto out;
+	if (btrfs_super_csum_type(sb) != BTRFS_CSUM_TYPE_CRC32) {
+		ret = -EUCLEAN;
+		btrfs_err(fs_info, "invalid csum type, has %u want %u",
+			  btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32);
+		goto out;
+	}
+	if (btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
+		ret = -EUCLEAN;
+		btrfs_err(fs_info,
+		"invalid incompat flags, has 0x%llx valid mask 0x%llx",
+			  btrfs_super_incompat_flags(sb),
+			  (unsigned long long)BTRFS_FEATURE_INCOMPAT_SUPP);
+		goto out;
+	}
+out:
+	if (ret < 0)
+		btrfs_err(fs_info,
+		"super block corruption detected before writing it to disk");
+	return ret;
+}
+
 int open_ctree(struct super_block *sb,
 	       struct btrfs_fs_devices *fs_devices,
 	       char *options)
@@ -2601,7 +2802,6 @@
 	mutex_init(&fs_info->chunk_mutex);
 	mutex_init(&fs_info->transaction_kthread_mutex);
 	mutex_init(&fs_info->cleaner_mutex);
-	mutex_init(&fs_info->volume_mutex);
 	mutex_init(&fs_info->ro_block_group_mutex);
 	init_rwsem(&fs_info->commit_root_sem);
 	init_rwsem(&fs_info->cleanup_work_sem);
@@ -2668,7 +2868,7 @@
 
 	memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
 
-	ret = btrfs_check_super_valid(fs_info);
+	ret = btrfs_validate_mount_super(fs_info);
 	if (ret) {
 		btrfs_err(fs_info, "superblock contains fatal errors");
 		err = -EINVAL;
@@ -3523,7 +3723,7 @@
 	for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
 		if (raid_type == BTRFS_RAID_SINGLE)
 			continue;
-		if (!(flags & btrfs_raid_group[raid_type]))
+		if (!(flags & btrfs_raid_array[raid_type].bg_flag))
 			continue;
 		min_tolerated = min(min_tolerated,
 				    btrfs_raid_array[raid_type].
@@ -3603,6 +3803,14 @@
 		flags = btrfs_super_flags(sb);
 		btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
 
+		ret = btrfs_validate_write_super(fs_info, sb);
+		if (ret < 0) {
+			mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+			btrfs_handle_fs_error(fs_info, -EUCLEAN,
+				"unexpected superblock corruption detected");
+			return -EUCLEAN;
+		}
+
 		ret = write_dev_supers(dev, sb, max_mirrors);
 		if (ret)
 			total_errors++;
@@ -3674,8 +3882,6 @@
 {
 	iput(root->ino_cache_inode);
 	WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
-	btrfs_free_block_rsv(root->fs_info, root->orphan_block_rsv);
-	root->orphan_block_rsv = NULL;
 	if (root->anon_dev)
 		free_anon_bdev(root->anon_dev);
 	if (root->subv_writers)
@@ -3766,7 +3972,6 @@
 
 void close_ctree(struct btrfs_fs_info *fs_info)
 {
-	struct btrfs_root *root = fs_info->tree_root;
 	int ret;
 
 	set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
@@ -3862,9 +4067,6 @@
 	btrfs_free_stripe_hash_table(fs_info);
 	btrfs_free_ref_cache(fs_info);
 
-	__btrfs_free_block_rsv(root->orphan_block_rsv);
-	root->orphan_block_rsv = NULL;
-
 	while (!list_empty(&fs_info->pinned_chunks)) {
 		struct extent_map *em;
 
@@ -3975,155 +4177,6 @@
 					      level, first_key);
 }
 
-static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
-{
-	struct btrfs_super_block *sb = fs_info->super_copy;
-	u64 nodesize = btrfs_super_nodesize(sb);
-	u64 sectorsize = btrfs_super_sectorsize(sb);
-	int ret = 0;
-
-	if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
-		btrfs_err(fs_info, "no valid FS found");
-		ret = -EINVAL;
-	}
-	if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
-		btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
-				btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
-		ret = -EINVAL;
-	}
-	if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
-		btrfs_err(fs_info, "tree_root level too big: %d >= %d",
-				btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
-		ret = -EINVAL;
-	}
-	if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
-		btrfs_err(fs_info, "chunk_root level too big: %d >= %d",
-				btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
-		ret = -EINVAL;
-	}
-	if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
-		btrfs_err(fs_info, "log_root level too big: %d >= %d",
-				btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
-		ret = -EINVAL;
-	}
-
-	/*
-	 * Check sectorsize and nodesize first, other check will need it.
-	 * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
-	 */
-	if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
-	    sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
-		btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
-		ret = -EINVAL;
-	}
-	/* Only PAGE SIZE is supported yet */
-	if (sectorsize != PAGE_SIZE) {
-		btrfs_err(fs_info,
-			"sectorsize %llu not supported yet, only support %lu",
-			sectorsize, PAGE_SIZE);
-		ret = -EINVAL;
-	}
-	if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
-	    nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
-		btrfs_err(fs_info, "invalid nodesize %llu", nodesize);
-		ret = -EINVAL;
-	}
-	if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
-		btrfs_err(fs_info, "invalid leafsize %u, should be %llu",
-			  le32_to_cpu(sb->__unused_leafsize), nodesize);
-		ret = -EINVAL;
-	}
-
-	/* Root alignment check */
-	if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
-		btrfs_warn(fs_info, "tree_root block unaligned: %llu",
-			   btrfs_super_root(sb));
-		ret = -EINVAL;
-	}
-	if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
-		btrfs_warn(fs_info, "chunk_root block unaligned: %llu",
-			   btrfs_super_chunk_root(sb));
-		ret = -EINVAL;
-	}
-	if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
-		btrfs_warn(fs_info, "log_root block unaligned: %llu",
-			   btrfs_super_log_root(sb));
-		ret = -EINVAL;
-	}
-
-	if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
-		btrfs_err(fs_info,
-			   "dev_item UUID does not match fsid: %pU != %pU",
-			   fs_info->fsid, sb->dev_item.fsid);
-		ret = -EINVAL;
-	}
-
-	/*
-	 * Hint to catch really bogus numbers, bitflips or so, more exact checks are
-	 * done later
-	 */
-	if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
-		btrfs_err(fs_info, "bytes_used is too small %llu",
-			  btrfs_super_bytes_used(sb));
-		ret = -EINVAL;
-	}
-	if (!is_power_of_2(btrfs_super_stripesize(sb))) {
-		btrfs_err(fs_info, "invalid stripesize %u",
-			  btrfs_super_stripesize(sb));
-		ret = -EINVAL;
-	}
-	if (btrfs_super_num_devices(sb) > (1UL << 31))
-		btrfs_warn(fs_info, "suspicious number of devices: %llu",
-			   btrfs_super_num_devices(sb));
-	if (btrfs_super_num_devices(sb) == 0) {
-		btrfs_err(fs_info, "number of devices is 0");
-		ret = -EINVAL;
-	}
-
-	if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
-		btrfs_err(fs_info, "super offset mismatch %llu != %u",
-			  btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
-		ret = -EINVAL;
-	}
-
-	/*
-	 * Obvious sys_chunk_array corruptions, it must hold at least one key
-	 * and one chunk
-	 */
-	if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
-		btrfs_err(fs_info, "system chunk array too big %u > %u",
-			  btrfs_super_sys_array_size(sb),
-			  BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
-		ret = -EINVAL;
-	}
-	if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
-			+ sizeof(struct btrfs_chunk)) {
-		btrfs_err(fs_info, "system chunk array too small %u < %zu",
-			  btrfs_super_sys_array_size(sb),
-			  sizeof(struct btrfs_disk_key)
-			  + sizeof(struct btrfs_chunk));
-		ret = -EINVAL;
-	}
-
-	/*
-	 * The generation is a global counter, we'll trust it more than the others
-	 * but it's still possible that it's the one that's wrong.
-	 */
-	if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb))
-		btrfs_warn(fs_info,
-			"suspicious: generation < chunk_root_generation: %llu < %llu",
-			btrfs_super_generation(sb),
-			btrfs_super_chunk_root_generation(sb));
-	if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb)
-	    && btrfs_super_cache_generation(sb) != (u64)-1)
-		btrfs_warn(fs_info,
-			"suspicious: generation < cache_generation: %llu < %llu",
-			btrfs_super_generation(sb),
-			btrfs_super_cache_generation(sb));
-
-	return ret;
-}
-
 static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
 {
 	/* cleanup FS via transaction */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 51b5e2da..3d9fe58 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -66,10 +66,8 @@
 				      u64 flags, u64 owner, u64 offset,
 				      struct btrfs_key *ins, int ref_mod);
 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
-				     struct btrfs_fs_info *fs_info,
-				     u64 parent, u64 root_objectid,
-				     u64 flags, struct btrfs_disk_key *key,
-				     int level, struct btrfs_key *ins);
+				     struct btrfs_delayed_ref_node *node,
+				     struct btrfs_delayed_extent_op *extent_op);
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			  struct btrfs_fs_info *fs_info, u64 flags,
 			  int force);
@@ -256,7 +254,7 @@
 	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
 		bytenr = btrfs_sb_offset(i);
 		ret = btrfs_rmap_block(fs_info, cache->key.objectid,
-				       bytenr, 0, &logical, &nr, &stripe_len);
+				       bytenr, &logical, &nr, &stripe_len);
 		if (ret)
 			return ret;
 
@@ -343,8 +341,9 @@
  * since their free space will be released as soon as the transaction commits.
  */
 u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
-		       struct btrfs_fs_info *info, u64 start, u64 end)
+		       u64 start, u64 end)
 {
+	struct btrfs_fs_info *info = block_group->fs_info;
 	u64 extent_start, extent_end, size, total_added = 0;
 	int ret;
 
@@ -489,8 +488,7 @@
 
 		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
 		    key.type == BTRFS_METADATA_ITEM_KEY) {
-			total_found += add_new_free_space(block_group,
-							  fs_info, last,
+			total_found += add_new_free_space(block_group, last,
 							  key.objectid);
 			if (key.type == BTRFS_METADATA_ITEM_KEY)
 				last = key.objectid +
@@ -508,7 +506,7 @@
 	}
 	ret = 0;
 
-	total_found += add_new_free_space(block_group, fs_info, last,
+	total_found += add_new_free_space(block_group, last,
 					  block_group->key.objectid +
 					  block_group->key.offset);
 	caching_ctl->progress = (u64)-1;
@@ -744,12 +742,12 @@
 }
 
 static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
-			     u64 owner, u64 root_objectid)
+			     bool metadata, u64 root_objectid)
 {
 	struct btrfs_space_info *space_info;
 	u64 flags;
 
-	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+	if (metadata) {
 		if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
 			flags = BTRFS_BLOCK_GROUP_SYSTEM;
 		else
@@ -2200,8 +2198,11 @@
 						 &old_ref_mod, &new_ref_mod);
 	}
 
-	if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
-		add_pinned_bytes(fs_info, -num_bytes, owner, root_objectid);
+	if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) {
+		bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+
+		add_pinned_bytes(fs_info, -num_bytes, metadata, root_objectid);
+	}
 
 	return ret;
 }
@@ -2428,10 +2429,8 @@
 {
 	int ret = 0;
 	struct btrfs_delayed_tree_ref *ref;
-	struct btrfs_key ins;
 	u64 parent = 0;
 	u64 ref_root = 0;
-	bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
 
 	ref = btrfs_delayed_node_to_tree_ref(node);
 	trace_run_delayed_tree_ref(fs_info, node, ref, node->action);
@@ -2440,15 +2439,6 @@
 		parent = ref->parent;
 	ref_root = ref->root;
 
-	ins.objectid = node->bytenr;
-	if (skinny_metadata) {
-		ins.offset = ref->level;
-		ins.type = BTRFS_METADATA_ITEM_KEY;
-	} else {
-		ins.offset = node->num_bytes;
-		ins.type = BTRFS_EXTENT_ITEM_KEY;
-	}
-
 	if (node->ref_mod != 1) {
 		btrfs_err(fs_info,
 	"btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
@@ -2458,11 +2448,7 @@
 	}
 	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
 		BUG_ON(!extent_op || !extent_op->update_flags);
-		ret = alloc_reserved_tree_block(trans, fs_info,
-						parent, ref_root,
-						extent_op->flags_to_set,
-						&extent_op->key,
-						ref->level, &ins);
+		ret = alloc_reserved_tree_block(trans, node, extent_op);
 	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
 		ret = __btrfs_inc_extent_ref(trans, fs_info, node,
 					     parent, ref_root,
@@ -2594,8 +2580,8 @@
 	delayed_refs->num_heads--;
 	rb_erase(&head->href_node, &delayed_refs->href_root);
 	RB_CLEAR_NODE(&head->href_node);
-	spin_unlock(&delayed_refs->lock);
 	spin_unlock(&head->lock);
+	spin_unlock(&delayed_refs->lock);
 	atomic_dec(&delayed_refs->num_entries);
 
 	trace_run_delayed_ref_head(fs_info, head, 0);
@@ -2700,17 +2686,12 @@
 		 * insert_inline_extent_backref()).
 		 */
 		spin_lock(&locked_ref->lock);
-		btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
-					 locked_ref);
+		btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref);
 
-		/*
-		 * locked_ref is the head node, so we have to go one
-		 * node back for any delayed ref updates
-		 */
 		ref = select_delayed_ref(locked_ref);
 
 		if (ref && ref->seq &&
-		    btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
+		    btrfs_check_delayed_seq(fs_info, ref->seq)) {
 			spin_unlock(&locked_ref->lock);
 			unselect_delayed_ref_head(delayed_refs, locked_ref);
 			locked_ref = NULL;
@@ -3291,7 +3272,7 @@
 
 	path = btrfs_alloc_path();
 	if (!path)
-		return -ENOENT;
+		return -ENOMEM;
 
 	do {
 		ret = check_committed_ref(root, path, objectid,
@@ -4026,8 +4007,7 @@
 	};
 }
 
-static int create_space_info(struct btrfs_fs_info *info, u64 flags,
-			     struct btrfs_space_info **new)
+static int create_space_info(struct btrfs_fs_info *info, u64 flags)
 {
 
 	struct btrfs_space_info *space_info;
@@ -4065,7 +4045,6 @@
 		return ret;
 	}
 
-	*new = space_info;
 	list_add_rcu(&space_info->list, &info->space_info);
 	if (flags & BTRFS_BLOCK_GROUP_DATA)
 		info->data_sinfo = space_info;
@@ -4122,7 +4101,7 @@
  * returns target flags in extended format or 0 if restripe for this
  * chunk_type is not in progress
  *
- * should be called with either volume_mutex or balance_lock held
+ * should be called with balance_lock held
  */
 static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
 {
@@ -4178,7 +4157,7 @@
 	/* First, mask out the RAID levels which aren't possible */
 	for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
 		if (num_devices >= btrfs_raid_array[raid_type].devs_min)
-			allowed |= btrfs_raid_group[raid_type];
+			allowed |= btrfs_raid_array[raid_type].bg_flag;
 	}
 	allowed &= flags;
 
@@ -4341,7 +4320,7 @@
 			need_commit--;
 
 			if (need_commit > 0) {
-				btrfs_start_delalloc_roots(fs_info, 0, -1);
+				btrfs_start_delalloc_roots(fs_info, -1);
 				btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
 							 (u64)-1);
 			}
@@ -4678,12 +4657,14 @@
 	trans->allocating_chunk = false;
 
 	spin_lock(&space_info->lock);
-	if (ret < 0 && ret != -ENOSPC)
-		goto out;
-	if (ret)
-		space_info->full = 1;
-	else
+	if (ret < 0) {
+		if (ret == -ENOSPC)
+			space_info->full = 1;
+		else
+			goto out;
+	} else {
 		ret = 1;
+	}
 
 	space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
 out:
@@ -4792,7 +4773,7 @@
 		 * the filesystem is readonly(all dirty pages are written to
 		 * the disk).
 		 */
-		btrfs_start_delalloc_roots(fs_info, 0, nr_items);
+		btrfs_start_delalloc_roots(fs_info, nr_items);
 		if (!current->journal_info)
 			btrfs_wait_ordered_roots(fs_info, nr_items, 0, (u64)-1);
 	}
@@ -5949,44 +5930,6 @@
 	trans->chunk_bytes_reserved = 0;
 }
 
-/* Can only return 0 or -ENOSPC */
-int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
-				  struct btrfs_inode *inode)
-{
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
-	struct btrfs_root *root = inode->root;
-	/*
-	 * We always use trans->block_rsv here as we will have reserved space
-	 * for our orphan when starting the transaction, using get_block_rsv()
-	 * here will sometimes make us choose the wrong block rsv as we could be
-	 * doing a reloc inode for a non refcounted root.
-	 */
-	struct btrfs_block_rsv *src_rsv = trans->block_rsv;
-	struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
-
-	/*
-	 * We need to hold space in order to delete our orphan item once we've
-	 * added it, so this takes the reservation so we can release it later
-	 * when we are truly done with the orphan item.
-	 */
-	u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
-
-	trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
-			num_bytes, 1);
-	return btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1);
-}
-
-void btrfs_orphan_release_metadata(struct btrfs_inode *inode)
-{
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
-	struct btrfs_root *root = inode->root;
-	u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
-
-	trace_btrfs_space_reservation(fs_info, "orphan", btrfs_ino(inode),
-			num_bytes, 0);
-	btrfs_block_rsv_release(fs_info, root->orphan_block_rsv, num_bytes);
-}
-
 /*
  * btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
  * root: the root of the parent directory
@@ -6004,7 +5947,6 @@
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 				     struct btrfs_block_rsv *rsv,
 				     int items,
-				     u64 *qgroup_reserved,
 				     bool use_global_rsv)
 {
 	u64 num_bytes;
@@ -6022,8 +5964,6 @@
 		num_bytes = 0;
 	}
 
-	*qgroup_reserved = num_bytes;
-
 	num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
 	rsv->space_info = __find_space_info(fs_info,
 					    BTRFS_BLOCK_GROUP_METADATA);
@@ -6033,8 +5973,8 @@
 	if (ret == -ENOSPC && use_global_rsv)
 		ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
 
-	if (ret && *qgroup_reserved)
-		btrfs_qgroup_free_meta_prealloc(root, *qgroup_reserved);
+	if (ret && num_bytes)
+		btrfs_qgroup_free_meta_prealloc(root, num_bytes);
 
 	return ret;
 }
@@ -6354,6 +6294,7 @@
 			spin_lock(&info->unused_bgs_lock);
 			if (list_empty(&cache->bg_list)) {
 				btrfs_get_block_group(cache);
+				trace_btrfs_add_unused_block_group(cache);
 				list_add_tail(&cache->bg_list,
 					      &info->unused_bgs);
 			}
@@ -6511,6 +6452,7 @@
 	struct btrfs_key key;
 	int found_type;
 	int i;
+	int ret = 0;
 
 	if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
 		return 0;
@@ -6527,10 +6469,12 @@
 			continue;
 		key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
 		key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
-		__exclude_logged_extent(fs_info, key.objectid, key.offset);
+		ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
+		if (ret)
+			break;
 	}
 
-	return 0;
+	return ret;
 }
 
 static void
@@ -7122,7 +7066,7 @@
 			}
 		}
 
-		ret = add_to_free_space_tree(trans, info, bytenr, num_bytes);
+		ret = add_to_free_space_tree(trans, bytenr, num_bytes);
 		if (ret) {
 			btrfs_abort_transaction(trans, ret);
 			goto out;
@@ -7266,7 +7210,7 @@
 	}
 out:
 	if (pin)
-		add_pinned_bytes(fs_info, buf->len, btrfs_header_level(buf),
+		add_pinned_bytes(fs_info, buf->len, true,
 				 root->root_key.objectid);
 
 	if (last_ref) {
@@ -7320,8 +7264,11 @@
 						 &old_ref_mod, &new_ref_mod);
 	}
 
-	if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
-		add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
+	if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) {
+		bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID;
+
+		add_pinned_bytes(fs_info, num_bytes, metadata, root_objectid);
+	}
 
 	return ret;
 }
@@ -7373,24 +7320,6 @@
 	return ret;
 }
 
-static const char *btrfs_raid_type_names[BTRFS_NR_RAID_TYPES] = {
-	[BTRFS_RAID_RAID10]	= "raid10",
-	[BTRFS_RAID_RAID1]	= "raid1",
-	[BTRFS_RAID_DUP]	= "dup",
-	[BTRFS_RAID_RAID0]	= "raid0",
-	[BTRFS_RAID_SINGLE]	= "single",
-	[BTRFS_RAID_RAID5]	= "raid5",
-	[BTRFS_RAID_RAID6]	= "raid6",
-};
-
-static const char *get_raid_name(enum btrfs_raid_types type)
-{
-	if (type >= BTRFS_NR_RAID_TYPES)
-		return NULL;
-
-	return btrfs_raid_type_names[type];
-}
-
 enum btrfs_loop_type {
 	LOOP_CACHING_NOWAIT = 0,
 	LOOP_CACHING_WAIT = 1,
@@ -7662,7 +7591,7 @@
 			if (offset) {
 				/* we have a block, we're done */
 				spin_unlock(&last_ptr->refill_lock);
-				trace_btrfs_reserve_extent_cluster(fs_info,
+				trace_btrfs_reserve_extent_cluster(
 						used_block_group,
 						search_start, num_bytes);
 				if (used_block_group != block_group) {
@@ -7735,7 +7664,7 @@
 				if (offset) {
 					/* we found one, proceed */
 					spin_unlock(&last_ptr->refill_lock);
-					trace_btrfs_reserve_extent_cluster(fs_info,
+					trace_btrfs_reserve_extent_cluster(
 						block_group, search_start,
 						num_bytes);
 					goto checks;
@@ -7835,8 +7764,7 @@
 		ins->objectid = search_start;
 		ins->offset = num_bytes;
 
-		trace_btrfs_reserve_extent(fs_info, block_group,
-					   search_start, num_bytes);
+		trace_btrfs_reserve_extent(block_group, search_start, num_bytes);
 		btrfs_release_block_group(block_group, delalloc);
 		break;
 loop:
@@ -8184,8 +8112,7 @@
 	btrfs_mark_buffer_dirty(path->nodes[0]);
 	btrfs_free_path(path);
 
-	ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
-					  ins->offset);
+	ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
 	if (ret)
 		return ret;
 
@@ -8200,37 +8127,52 @@
 }
 
 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
-				     struct btrfs_fs_info *fs_info,
-				     u64 parent, u64 root_objectid,
-				     u64 flags, struct btrfs_disk_key *key,
-				     int level, struct btrfs_key *ins)
+				     struct btrfs_delayed_ref_node *node,
+				     struct btrfs_delayed_extent_op *extent_op)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int ret;
 	struct btrfs_extent_item *extent_item;
+	struct btrfs_key extent_key;
 	struct btrfs_tree_block_info *block_info;
 	struct btrfs_extent_inline_ref *iref;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
+	struct btrfs_delayed_tree_ref *ref;
 	u32 size = sizeof(*extent_item) + sizeof(*iref);
-	u64 num_bytes = ins->offset;
+	u64 num_bytes;
+	u64 flags = extent_op->flags_to_set;
 	bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
 
-	if (!skinny_metadata)
+	ref = btrfs_delayed_node_to_tree_ref(node);
+
+	extent_key.objectid = node->bytenr;
+	if (skinny_metadata) {
+		extent_key.offset = ref->level;
+		extent_key.type = BTRFS_METADATA_ITEM_KEY;
+		num_bytes = fs_info->nodesize;
+	} else {
+		extent_key.offset = node->num_bytes;
+		extent_key.type = BTRFS_EXTENT_ITEM_KEY;
 		size += sizeof(*block_info);
+		num_bytes = node->num_bytes;
+	}
 
 	path = btrfs_alloc_path();
 	if (!path) {
-		btrfs_free_and_pin_reserved_extent(fs_info, ins->objectid,
+		btrfs_free_and_pin_reserved_extent(fs_info,
+						   extent_key.objectid,
 						   fs_info->nodesize);
 		return -ENOMEM;
 	}
 
 	path->leave_spinning = 1;
 	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
-				      ins, size);
+				      &extent_key, size);
 	if (ret) {
 		btrfs_free_path(path);
-		btrfs_free_and_pin_reserved_extent(fs_info, ins->objectid,
+		btrfs_free_and_pin_reserved_extent(fs_info,
+						   extent_key.objectid,
 						   fs_info->nodesize);
 		return ret;
 	}
@@ -8245,42 +8187,41 @@
 
 	if (skinny_metadata) {
 		iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
-		num_bytes = fs_info->nodesize;
 	} else {
 		block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
-		btrfs_set_tree_block_key(leaf, block_info, key);
-		btrfs_set_tree_block_level(leaf, block_info, level);
+		btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
+		btrfs_set_tree_block_level(leaf, block_info, ref->level);
 		iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
 	}
 
-	if (parent > 0) {
+	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
 		BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
 		btrfs_set_extent_inline_ref_type(leaf, iref,
 						 BTRFS_SHARED_BLOCK_REF_KEY);
-		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
+		btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
 	} else {
 		btrfs_set_extent_inline_ref_type(leaf, iref,
 						 BTRFS_TREE_BLOCK_REF_KEY);
-		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
+		btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
 	}
 
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_free_path(path);
 
-	ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
+	ret = remove_from_free_space_tree(trans, extent_key.objectid,
 					  num_bytes);
 	if (ret)
 		return ret;
 
-	ret = update_block_group(trans, fs_info, ins->objectid,
+	ret = update_block_group(trans, fs_info, extent_key.objectid,
 				 fs_info->nodesize, 1);
 	if (ret) { /* -ENOENT, logic error */
 		btrfs_err(fs_info, "update block group failed for %llu %llu",
-			ins->objectid, ins->offset);
+			extent_key.objectid, extent_key.offset);
 		BUG();
 	}
 
-	trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid,
+	trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
 					  fs_info->nodesize);
 	return ret;
 }
@@ -10173,8 +10114,7 @@
 		} else if (btrfs_block_group_used(&cache->item) == 0) {
 			cache->last_byte_to_unpin = (u64)-1;
 			cache->cached = BTRFS_CACHE_FINISHED;
-			add_new_free_space(cache, info,
-					   found_key.objectid,
+			add_new_free_space(cache, found_key.objectid,
 					   found_key.objectid +
 					   found_key.offset);
 			free_excluded_extents(info, cache);
@@ -10204,6 +10144,7 @@
 			/* Should always be true but just in case. */
 			if (list_empty(&cache->bg_list)) {
 				btrfs_get_block_group(cache);
+				trace_btrfs_add_unused_block_group(cache);
 				list_add_tail(&cache->bg_list,
 					      &info->unused_bgs);
 			}
@@ -10269,7 +10210,7 @@
 					       key.offset);
 		if (ret)
 			btrfs_abort_transaction(trans, ret);
-		add_block_group_free_space(trans, fs_info, block_group);
+		add_block_group_free_space(trans, block_group);
 		/* already aborted the transaction if it failed. */
 next:
 		list_del_init(&block_group->bg_list);
@@ -10310,7 +10251,7 @@
 		return ret;
 	}
 
-	add_new_free_space(cache, fs_info, chunk_offset, chunk_offset + size);
+	add_new_free_space(cache, chunk_offset, chunk_offset + size);
 
 	free_excluded_extents(fs_info, cache);
 
@@ -10391,6 +10332,7 @@
 	BUG_ON(!block_group);
 	BUG_ON(!block_group->ro);
 
+	trace_btrfs_remove_block_group(block_group);
 	/*
 	 * Free the reserved super bytes from this block group before
 	 * remove it.
@@ -10648,7 +10590,7 @@
 
 	mutex_unlock(&fs_info->chunk_mutex);
 
-	ret = remove_block_group_free_space(trans, fs_info, block_group);
+	ret = remove_block_group_free_space(trans, block_group);
 	if (ret)
 		goto out;
 
@@ -10755,6 +10697,7 @@
 			 * the ro check in case balance is currently acting on
 			 * this block group.
 			 */
+			trace_btrfs_skip_unused_block_group(block_group);
 			spin_unlock(&block_group->lock);
 			up_write(&space_info->groups_sem);
 			goto next;
@@ -10877,7 +10820,6 @@
 
 int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
 {
-	struct btrfs_space_info *space_info;
 	struct btrfs_super_block *disk_super;
 	u64 features;
 	u64 flags;
@@ -10893,21 +10835,21 @@
 		mixed = 1;
 
 	flags = BTRFS_BLOCK_GROUP_SYSTEM;
-	ret = create_space_info(fs_info, flags, &space_info);
+	ret = create_space_info(fs_info, flags);
 	if (ret)
 		goto out;
 
 	if (mixed) {
 		flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
-		ret = create_space_info(fs_info, flags, &space_info);
+		ret = create_space_info(fs_info, flags);
 	} else {
 		flags = BTRFS_BLOCK_GROUP_METADATA;
-		ret = create_space_info(fs_info, flags, &space_info);
+		ret = create_space_info(fs_info, flags);
 		if (ret)
 			goto out;
 
 		flags = BTRFS_BLOCK_GROUP_DATA;
-		ret = create_space_info(fs_info, flags, &space_info);
+		ret = create_space_info(fs_info, flags);
 	}
 out:
 	return ret;
@@ -11092,12 +11034,7 @@
 void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
 {
 	percpu_counter_dec(&root->subv_writers->counter);
-	/*
-	 * Make sure counter is updated before we wake up waiters.
-	 */
-	smp_mb();
-	if (waitqueue_active(&root->subv_writers->wait))
-		wake_up(&root->subv_writers->wait);
+	cond_wake_up(&root->subv_writers->wait);
 }
 
 int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e99b329..51fc015 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -26,7 +26,7 @@
 
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
-static struct bio_set *btrfs_bioset;
+static struct bio_set btrfs_bioset;
 
 static inline bool extent_state_in_tree(const struct extent_state *state)
 {
@@ -162,20 +162,18 @@
 	if (!extent_buffer_cache)
 		goto free_state_cache;
 
-	btrfs_bioset = bioset_create(BIO_POOL_SIZE,
-				     offsetof(struct btrfs_io_bio, bio),
-				     BIOSET_NEED_BVECS);
-	if (!btrfs_bioset)
+	if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
+			offsetof(struct btrfs_io_bio, bio),
+			BIOSET_NEED_BVECS))
 		goto free_buffer_cache;
 
-	if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
+	if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE))
 		goto free_bioset;
 
 	return 0;
 
 free_bioset:
-	bioset_free(btrfs_bioset);
-	btrfs_bioset = NULL;
+	bioset_exit(&btrfs_bioset);
 
 free_buffer_cache:
 	kmem_cache_destroy(extent_buffer_cache);
@@ -198,8 +196,7 @@
 	rcu_barrier();
 	kmem_cache_destroy(extent_state_cache);
 	kmem_cache_destroy(extent_buffer_cache);
-	if (btrfs_bioset)
-		bioset_free(btrfs_bioset);
+	bioset_exit(&btrfs_bioset);
 }
 
 void extent_io_tree_init(struct extent_io_tree *tree,
@@ -2679,7 +2676,7 @@
 {
 	struct bio *bio;
 
-	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset);
+	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
 	bio_set_dev(bio, bdev);
 	bio->bi_iter.bi_sector = first_byte >> 9;
 	btrfs_io_bio_init(btrfs_io_bio(bio));
@@ -2692,7 +2689,7 @@
 	struct bio *new;
 
 	/* Bio allocation backed by a bioset does not fail */
-	new = bio_clone_fast(bio, GFP_NOFS, btrfs_bioset);
+	new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
 	btrfs_bio = btrfs_io_bio(new);
 	btrfs_io_bio_init(btrfs_bio);
 	btrfs_bio->iter = bio->bi_iter;
@@ -2704,7 +2701,7 @@
 	struct bio *bio;
 
 	/* Bio allocation backed by a bioset does not fail */
-	bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, btrfs_bioset);
+	bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
 	btrfs_io_bio_init(btrfs_io_bio(bio));
 	return bio;
 }
@@ -2715,7 +2712,7 @@
 	struct btrfs_io_bio *btrfs_bio;
 
 	/* this will never fail when it's backed by a bioset */
-	bio = bio_clone_fast(orig, GFP_NOFS, btrfs_bioset);
+	bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
 	ASSERT(bio);
 
 	btrfs_bio = btrfs_io_bio(bio);
@@ -4109,14 +4106,13 @@
 	return ret;
 }
 
-int extent_writepages(struct extent_io_tree *tree,
-		      struct address_space *mapping,
+int extent_writepages(struct address_space *mapping,
 		      struct writeback_control *wbc)
 {
 	int ret = 0;
 	struct extent_page_data epd = {
 		.bio = NULL,
-		.tree = tree,
+		.tree = &BTRFS_I(mapping->host)->io_tree,
 		.extent_locked = 0,
 		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
 	};
@@ -4126,9 +4122,8 @@
 	return ret;
 }
 
-int extent_readpages(struct extent_io_tree *tree,
-		     struct address_space *mapping,
-		     struct list_head *pages, unsigned nr_pages)
+int extent_readpages(struct address_space *mapping, struct list_head *pages,
+		     unsigned nr_pages)
 {
 	struct bio *bio = NULL;
 	unsigned page_idx;
@@ -4136,6 +4131,7 @@
 	struct page *pagepool[16];
 	struct page *page;
 	struct extent_map *em_cached = NULL;
+	struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
 	int nr = 0;
 	u64 prev_em_start = (u64)-1;
 
@@ -4202,8 +4198,7 @@
  * are locked or under IO and drops the related state bits if it is safe
  * to drop the page.
  */
-static int try_release_extent_state(struct extent_map_tree *map,
-				    struct extent_io_tree *tree,
+static int try_release_extent_state(struct extent_io_tree *tree,
 				    struct page *page, gfp_t mask)
 {
 	u64 start = page_offset(page);
@@ -4238,13 +4233,13 @@
  * in the range corresponding to the page, both state records and extent
  * map records are removed
  */
-int try_release_extent_mapping(struct extent_map_tree *map,
-			       struct extent_io_tree *tree, struct page *page,
-			       gfp_t mask)
+int try_release_extent_mapping(struct page *page, gfp_t mask)
 {
 	struct extent_map *em;
 	u64 start = page_offset(page);
 	u64 end = start + PAGE_SIZE - 1;
+	struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree;
+	struct extent_map_tree *map = &BTRFS_I(page->mapping->host)->extent_tree;
 
 	if (gfpflags_allow_blocking(mask) &&
 	    page->mapping->host->i_size > SZ_16M) {
@@ -4278,7 +4273,7 @@
 			free_extent_map(em);
 		}
 	}
-	return try_release_extent_state(map, tree, page, mask);
+	return try_release_extent_state(tree, page, mask);
 }
 
 /*
@@ -5620,46 +5615,6 @@
 	}
 }
 
-void le_bitmap_set(u8 *map, unsigned int start, int len)
-{
-	u8 *p = map + BIT_BYTE(start);
-	const unsigned int size = start + len;
-	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
-	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
-
-	while (len - bits_to_set >= 0) {
-		*p |= mask_to_set;
-		len -= bits_to_set;
-		bits_to_set = BITS_PER_BYTE;
-		mask_to_set = ~0;
-		p++;
-	}
-	if (len) {
-		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
-		*p |= mask_to_set;
-	}
-}
-
-void le_bitmap_clear(u8 *map, unsigned int start, int len)
-{
-	u8 *p = map + BIT_BYTE(start);
-	const unsigned int size = start + len;
-	int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE);
-	u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start);
-
-	while (len - bits_to_clear >= 0) {
-		*p &= ~mask_to_clear;
-		len -= bits_to_clear;
-		bits_to_clear = BITS_PER_BYTE;
-		mask_to_clear = ~0;
-		p++;
-	}
-	if (len) {
-		mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
-		*p &= ~mask_to_clear;
-	}
-}
-
 /*
  * eb_bitmap_offset() - calculate the page and offset of the byte containing the
  * given bit number
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index a530096..0bfd4ae 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -79,14 +79,6 @@
 #define BITMAP_LAST_BYTE_MASK(nbits) \
 	(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
 
-static inline int le_test_bit(int nr, const u8 *addr)
-{
-	return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
-}
-
-void le_bitmap_set(u8 *map, unsigned int start, int len);
-void le_bitmap_clear(u8 *map, unsigned int start, int len);
-
 struct extent_state;
 struct btrfs_root;
 struct btrfs_inode;
@@ -278,9 +270,7 @@
 					  int create);
 
 void extent_io_tree_init(struct extent_io_tree *tree, void *private_data);
-int try_release_extent_mapping(struct extent_map_tree *map,
-			       struct extent_io_tree *tree, struct page *page,
-			       gfp_t mask);
+int try_release_extent_mapping(struct page *page, gfp_t mask);
 int try_release_extent_buffer(struct page *page);
 int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
 		     struct extent_state **cached);
@@ -421,14 +411,12 @@
 int extent_write_full_page(struct page *page, struct writeback_control *wbc);
 int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
 			      int mode);
-int extent_writepages(struct extent_io_tree *tree,
-		      struct address_space *mapping,
+int extent_writepages(struct address_space *mapping,
 		      struct writeback_control *wbc);
 int btree_write_cache_pages(struct address_space *mapping,
 			    struct writeback_control *wbc);
-int extent_readpages(struct extent_io_tree *tree,
-		     struct address_space *mapping,
-		     struct list_head *pages, unsigned nr_pages);
+int extent_readpages(struct address_space *mapping, struct list_head *pages,
+		     unsigned nr_pages);
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		__u64 start, __u64 len);
 void set_page_extent_mapped(struct page *page);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 1b8a078..6648d55 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -518,6 +518,7 @@
 
 /**
  * btrfs_add_extent_mapping - add extent mapping into em_tree
+ * @fs_info - used for tracepoint
  * @em_tree - the extent tree into which we want to insert the extent mapping
  * @em_in   - extent we are inserting
  * @start   - start of the logical range btrfs_get_extent() is requesting
@@ -535,7 +536,8 @@
  * Return 0 on success, otherwise -EEXIST.
  *
  */
-int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
+int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
+			     struct extent_map_tree *em_tree,
 			     struct extent_map **em_in, u64 start, u64 len)
 {
 	int ret;
@@ -553,7 +555,7 @@
 
 		existing = search_extent_mapping(em_tree, start, len);
 
-		trace_btrfs_handle_em_exist(existing, em, start, len);
+		trace_btrfs_handle_em_exist(fs_info, existing, em, start, len);
 
 		/*
 		 * existing will always be non-NULL, since there must be
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 5fcb80a..25d985e 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -92,7 +92,8 @@
 void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
 struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
 					 u64 start, u64 len);
-int btrfs_add_extent_mapping(struct extent_map_tree *em_tree,
+int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
+			     struct extent_map_tree *em_tree,
 			     struct extent_map **em_in, u64 start, u64 len);
 
 #endif
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index e5b569b..d5f80cb 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -253,10 +253,8 @@
 	truncate_pagecache(inode, 0);
 
 	/*
-	 * We don't need an orphan item because truncating the free space cache
-	 * will never be split across transactions.
-	 * We don't need to check for -EAGAIN because we're a free space
-	 * cache inode
+	 * We skip the throttling logic for free space cache inodes, so we don't
+	 * need to check for -EAGAIN.
 	 */
 	ret = btrfs_truncate_inode_items(trans, root, inode,
 					 0, BTRFS_EXTENT_DATA_KEY);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 32a0f6c..b5950aa 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -12,7 +12,6 @@
 #include "transaction.h"
 
 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
-					struct btrfs_fs_info *fs_info,
 					struct btrfs_block_group_cache *block_group,
 					struct btrfs_path *path);
 
@@ -45,11 +44,10 @@
 }
 
 static int add_new_free_space_info(struct btrfs_trans_handle *trans,
-				   struct btrfs_fs_info *fs_info,
 				   struct btrfs_block_group_cache *block_group,
 				   struct btrfs_path *path)
 {
-	struct btrfs_root *root = fs_info->free_space_root;
+	struct btrfs_root *root = trans->fs_info->free_space_root;
 	struct btrfs_free_space_info *info;
 	struct btrfs_key key;
 	struct extent_buffer *leaf;
@@ -138,10 +136,11 @@
 	return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
 }
 
-static u8 *alloc_bitmap(u32 bitmap_size)
+static unsigned long *alloc_bitmap(u32 bitmap_size)
 {
-	u8 *ret;
+	unsigned long *ret;
 	unsigned int nofs_flag;
+	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
 
 	/*
 	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
@@ -152,21 +151,42 @@
 	 * know that recursion is unsafe.
 	 */
 	nofs_flag = memalloc_nofs_save();
-	ret = kvzalloc(bitmap_size, GFP_KERNEL);
+	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
 	memalloc_nofs_restore(nofs_flag);
 	return ret;
 }
 
+static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
+{
+	u8 *p = ((u8 *)map) + BIT_BYTE(start);
+	const unsigned int size = start + len;
+	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
+
+	while (len - bits_to_set >= 0) {
+		*p |= mask_to_set;
+		len -= bits_to_set;
+		bits_to_set = BITS_PER_BYTE;
+		mask_to_set = ~0;
+		p++;
+	}
+	if (len) {
+		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+		*p |= mask_to_set;
+	}
+}
+
 int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info,
 				  struct btrfs_block_group_cache *block_group,
 				  struct btrfs_path *path)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *root = fs_info->free_space_root;
 	struct btrfs_free_space_info *info;
 	struct btrfs_key key, found_key;
 	struct extent_buffer *leaf;
-	u8 *bitmap, *bitmap_cursor;
+	unsigned long *bitmap;
+	char *bitmap_cursor;
 	u64 start, end;
 	u64 bitmap_range, i;
 	u32 bitmap_size, flags, expected_extent_count;
@@ -255,7 +275,7 @@
 		goto out;
 	}
 
-	bitmap_cursor = bitmap;
+	bitmap_cursor = (char *)bitmap;
 	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
 	i = start;
 	while (i < end) {
@@ -296,21 +316,18 @@
 }
 
 int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info,
 				  struct btrfs_block_group_cache *block_group,
 				  struct btrfs_path *path)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *root = fs_info->free_space_root;
 	struct btrfs_free_space_info *info;
 	struct btrfs_key key, found_key;
 	struct extent_buffer *leaf;
-	u8 *bitmap;
+	unsigned long *bitmap;
 	u64 start, end;
-	/* Initialize to silence GCC. */
-	u64 extent_start = 0;
-	u64 offset;
 	u32 bitmap_size, flags, expected_extent_count;
-	int prev_bit = 0, bit, bitnr;
+	unsigned long nrbits, start_bit, end_bit;
 	u32 extent_count = 0;
 	int done = 0, nr;
 	int ret;
@@ -348,7 +365,7 @@
 				break;
 			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
 				unsigned long ptr;
-				u8 *bitmap_cursor;
+				char *bitmap_cursor;
 				u32 bitmap_pos, data_size;
 
 				ASSERT(found_key.objectid >= start);
@@ -358,7 +375,7 @@
 				bitmap_pos = div_u64(found_key.objectid - start,
 						     fs_info->sectorsize *
 						     BITS_PER_BYTE);
-				bitmap_cursor = bitmap + bitmap_pos;
+				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
 				data_size = free_space_bitmap_size(found_key.offset,
 								   fs_info->sectorsize);
 
@@ -392,32 +409,16 @@
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_release_path(path);
 
-	offset = start;
-	bitnr = 0;
-	while (offset < end) {
-		bit = !!le_test_bit(bitnr, bitmap);
-		if (prev_bit == 0 && bit == 1) {
-			extent_start = offset;
-		} else if (prev_bit == 1 && bit == 0) {
-			key.objectid = extent_start;
-			key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
-			key.offset = offset - extent_start;
+	nrbits = div_u64(block_group->key.offset, block_group->fs_info->sectorsize);
+	start_bit = find_next_bit_le(bitmap, nrbits, 0);
 
-			ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
-			if (ret)
-				goto out;
-			btrfs_release_path(path);
+	while (start_bit < nrbits) {
+		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
+		ASSERT(start_bit < end_bit);
 
-			extent_count++;
-		}
-		prev_bit = bit;
-		offset += fs_info->sectorsize;
-		bitnr++;
-	}
-	if (prev_bit == 1) {
-		key.objectid = extent_start;
+		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
 		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
-		key.offset = end - extent_start;
+		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
 
 		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
 		if (ret)
@@ -425,6 +426,8 @@
 		btrfs_release_path(path);
 
 		extent_count++;
+
+		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
 	}
 
 	if (extent_count != expected_extent_count) {
@@ -446,7 +449,6 @@
 }
 
 static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
-					  struct btrfs_fs_info *fs_info,
 					  struct btrfs_block_group_cache *block_group,
 					  struct btrfs_path *path,
 					  int new_extents)
@@ -459,7 +461,8 @@
 	if (new_extents == 0)
 		return 0;
 
-	info = search_free_space_info(trans, fs_info, block_group, path, 1);
+	info = search_free_space_info(trans, trans->fs_info, block_group, path,
+				      1);
 	if (IS_ERR(info)) {
 		ret = PTR_ERR(info);
 		goto out;
@@ -474,12 +477,10 @@
 
 	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 	    extent_count > block_group->bitmap_high_thresh) {
-		ret = convert_free_space_to_bitmaps(trans, fs_info, block_group,
-						    path);
+		ret = convert_free_space_to_bitmaps(trans, block_group, path);
 	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
 		   extent_count < block_group->bitmap_low_thresh) {
-		ret = convert_free_space_to_extents(trans, fs_info, block_group,
-						    path);
+		ret = convert_free_space_to_extents(trans, block_group, path);
 	}
 
 out:
@@ -576,12 +577,11 @@
  * the bitmap.
  */
 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
-				    struct btrfs_fs_info *fs_info,
 				    struct btrfs_block_group_cache *block_group,
 				    struct btrfs_path *path,
 				    u64 start, u64 size, int remove)
 {
-	struct btrfs_root *root = fs_info->free_space_root;
+	struct btrfs_root *root = block_group->fs_info->free_space_root;
 	struct btrfs_key key;
 	u64 end = start + size;
 	u64 cur_start, cur_size;
@@ -682,7 +682,7 @@
 	}
 
 	btrfs_release_path(path);
-	ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+	ret = update_free_space_extent_count(trans, block_group, path,
 					     new_extents);
 
 out:
@@ -690,12 +690,11 @@
 }
 
 static int remove_free_space_extent(struct btrfs_trans_handle *trans,
-				    struct btrfs_fs_info *fs_info,
 				    struct btrfs_block_group_cache *block_group,
 				    struct btrfs_path *path,
 				    u64 start, u64 size)
 {
-	struct btrfs_root *root = fs_info->free_space_root;
+	struct btrfs_root *root = trans->fs_info->free_space_root;
 	struct btrfs_key key;
 	u64 found_start, found_end;
 	u64 end = start + size;
@@ -769,7 +768,7 @@
 	}
 
 	btrfs_release_path(path);
-	ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+	ret = update_free_space_extent_count(trans, block_group, path,
 					     new_extents);
 
 out:
@@ -777,7 +776,6 @@
 }
 
 int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info,
 				  struct btrfs_block_group_cache *block_group,
 				  struct btrfs_path *path, u64 start, u64 size)
 {
@@ -786,36 +784,35 @@
 	int ret;
 
 	if (block_group->needs_free_space) {
-		ret = __add_block_group_free_space(trans, fs_info, block_group,
-						   path);
+		ret = __add_block_group_free_space(trans, block_group, path);
 		if (ret)
 			return ret;
 	}
 
-	info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+	info = search_free_space_info(NULL, trans->fs_info, block_group, path,
+				      0);
 	if (IS_ERR(info))
 		return PTR_ERR(info);
 	flags = btrfs_free_space_flags(path->nodes[0], info);
 	btrfs_release_path(path);
 
 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
-		return modify_free_space_bitmap(trans, fs_info, block_group,
-						path, start, size, 1);
+		return modify_free_space_bitmap(trans, block_group, path,
+						start, size, 1);
 	} else {
-		return remove_free_space_extent(trans, fs_info, block_group,
-						path, start, size);
+		return remove_free_space_extent(trans, block_group, path,
+						start, size);
 	}
 }
 
 int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
-				struct btrfs_fs_info *fs_info,
 				u64 start, u64 size)
 {
 	struct btrfs_block_group_cache *block_group;
 	struct btrfs_path *path;
 	int ret;
 
-	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
 		return 0;
 
 	path = btrfs_alloc_path();
@@ -824,7 +821,7 @@
 		goto out;
 	}
 
-	block_group = btrfs_lookup_block_group(fs_info, start);
+	block_group = btrfs_lookup_block_group(trans->fs_info, start);
 	if (!block_group) {
 		ASSERT(0);
 		ret = -ENOENT;
@@ -832,8 +829,8 @@
 	}
 
 	mutex_lock(&block_group->free_space_lock);
-	ret = __remove_from_free_space_tree(trans, fs_info, block_group, path,
-					    start, size);
+	ret = __remove_from_free_space_tree(trans, block_group, path, start,
+					    size);
 	mutex_unlock(&block_group->free_space_lock);
 
 	btrfs_put_block_group(block_group);
@@ -845,12 +842,11 @@
 }
 
 static int add_free_space_extent(struct btrfs_trans_handle *trans,
-				 struct btrfs_fs_info *fs_info,
 				 struct btrfs_block_group_cache *block_group,
 				 struct btrfs_path *path,
 				 u64 start, u64 size)
 {
-	struct btrfs_root *root = fs_info->free_space_root;
+	struct btrfs_root *root = trans->fs_info->free_space_root;
 	struct btrfs_key key, new_key;
 	u64 found_start, found_end;
 	u64 end = start + size;
@@ -965,7 +961,7 @@
 		goto out;
 
 	btrfs_release_path(path);
-	ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+	ret = update_free_space_extent_count(trans, block_group, path,
 					     new_extents);
 
 out:
@@ -973,17 +969,16 @@
 }
 
 int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
-			     struct btrfs_fs_info *fs_info,
 			     struct btrfs_block_group_cache *block_group,
 			     struct btrfs_path *path, u64 start, u64 size)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_free_space_info *info;
 	u32 flags;
 	int ret;
 
 	if (block_group->needs_free_space) {
-		ret = __add_block_group_free_space(trans, fs_info, block_group,
-						   path);
+		ret = __add_block_group_free_space(trans, block_group, path);
 		if (ret)
 			return ret;
 	}
@@ -995,23 +990,22 @@
 	btrfs_release_path(path);
 
 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
-		return modify_free_space_bitmap(trans, fs_info, block_group,
-						path, start, size, 0);
+		return modify_free_space_bitmap(trans, block_group, path,
+						start, size, 0);
 	} else {
-		return add_free_space_extent(trans, fs_info, block_group, path,
-					     start, size);
+		return add_free_space_extent(trans, block_group, path, start,
+					     size);
 	}
 }
 
 int add_to_free_space_tree(struct btrfs_trans_handle *trans,
-			   struct btrfs_fs_info *fs_info,
 			   u64 start, u64 size)
 {
 	struct btrfs_block_group_cache *block_group;
 	struct btrfs_path *path;
 	int ret;
 
-	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
 		return 0;
 
 	path = btrfs_alloc_path();
@@ -1020,7 +1014,7 @@
 		goto out;
 	}
 
-	block_group = btrfs_lookup_block_group(fs_info, start);
+	block_group = btrfs_lookup_block_group(trans->fs_info, start);
 	if (!block_group) {
 		ASSERT(0);
 		ret = -ENOENT;
@@ -1028,8 +1022,7 @@
 	}
 
 	mutex_lock(&block_group->free_space_lock);
-	ret = __add_to_free_space_tree(trans, fs_info, block_group, path, start,
-				       size);
+	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
 	mutex_unlock(&block_group->free_space_lock);
 
 	btrfs_put_block_group(block_group);
@@ -1046,10 +1039,9 @@
  * through the normal add/remove hooks.
  */
 static int populate_free_space_tree(struct btrfs_trans_handle *trans,
-				    struct btrfs_fs_info *fs_info,
 				    struct btrfs_block_group_cache *block_group)
 {
-	struct btrfs_root *extent_root = fs_info->extent_root;
+	struct btrfs_root *extent_root = trans->fs_info->extent_root;
 	struct btrfs_path *path, *path2;
 	struct btrfs_key key;
 	u64 start, end;
@@ -1066,7 +1058,7 @@
 		return -ENOMEM;
 	}
 
-	ret = add_new_free_space_info(trans, fs_info, block_group, path2);
+	ret = add_new_free_space_info(trans, block_group, path2);
 	if (ret)
 		goto out;
 
@@ -1099,7 +1091,7 @@
 				break;
 
 			if (start < key.objectid) {
-				ret = __add_to_free_space_tree(trans, fs_info,
+				ret = __add_to_free_space_tree(trans,
 							       block_group,
 							       path2, start,
 							       key.objectid -
@@ -1109,7 +1101,7 @@
 			}
 			start = key.objectid;
 			if (key.type == BTRFS_METADATA_ITEM_KEY)
-				start += fs_info->nodesize;
+				start += trans->fs_info->nodesize;
 			else
 				start += key.offset;
 		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
@@ -1124,8 +1116,8 @@
 			break;
 	}
 	if (start < end) {
-		ret = __add_to_free_space_tree(trans, fs_info, block_group,
-					       path2, start, end - start);
+		ret = __add_to_free_space_tree(trans, block_group, path2,
+					       start, end - start);
 		if (ret)
 			goto out_locked;
 	}
@@ -1165,7 +1157,7 @@
 	while (node) {
 		block_group = rb_entry(node, struct btrfs_block_group_cache,
 				       cache_node);
-		ret = populate_free_space_tree(trans, fs_info, block_group);
+		ret = populate_free_space_tree(trans, block_group);
 		if (ret)
 			goto abort;
 		node = rb_next(node);
@@ -1269,7 +1261,6 @@
 }
 
 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
-					struct btrfs_fs_info *fs_info,
 					struct btrfs_block_group_cache *block_group,
 					struct btrfs_path *path)
 {
@@ -1277,19 +1268,19 @@
 
 	block_group->needs_free_space = 0;
 
-	ret = add_new_free_space_info(trans, fs_info, block_group, path);
+	ret = add_new_free_space_info(trans, block_group, path);
 	if (ret)
 		return ret;
 
-	return __add_to_free_space_tree(trans, fs_info, block_group, path,
+	return __add_to_free_space_tree(trans, block_group, path,
 					block_group->key.objectid,
 					block_group->key.offset);
 }
 
 int add_block_group_free_space(struct btrfs_trans_handle *trans,
-			       struct btrfs_fs_info *fs_info,
 			       struct btrfs_block_group_cache *block_group)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_path *path = NULL;
 	int ret = 0;
 
@@ -1306,7 +1297,7 @@
 		goto out;
 	}
 
-	ret = __add_block_group_free_space(trans, fs_info, block_group, path);
+	ret = __add_block_group_free_space(trans, block_group, path);
 
 out:
 	btrfs_free_path(path);
@@ -1317,10 +1308,9 @@
 }
 
 int remove_block_group_free_space(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info,
 				  struct btrfs_block_group_cache *block_group)
 {
-	struct btrfs_root *root = fs_info->free_space_root;
+	struct btrfs_root *root = trans->fs_info->free_space_root;
 	struct btrfs_path *path;
 	struct btrfs_key key, found_key;
 	struct extent_buffer *leaf;
@@ -1328,7 +1318,7 @@
 	int done = 0, nr;
 	int ret;
 
-	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
 		return 0;
 
 	if (block_group->needs_free_space) {
@@ -1439,7 +1429,6 @@
 				extent_start = offset;
 			} else if (prev_bit == 1 && bit == 0) {
 				total_found += add_new_free_space(block_group,
-								  fs_info,
 								  extent_start,
 								  offset);
 				if (total_found > CACHING_CTL_WAKE_UP) {
@@ -1453,8 +1442,8 @@
 		}
 	}
 	if (prev_bit == 1) {
-		total_found += add_new_free_space(block_group, fs_info,
-						  extent_start, end);
+		total_found += add_new_free_space(block_group, extent_start,
+						  end);
 		extent_count++;
 	}
 
@@ -1511,8 +1500,7 @@
 
 		caching_ctl->progress = key.objectid;
 
-		total_found += add_new_free_space(block_group, fs_info,
-						  key.objectid,
+		total_found += add_new_free_space(block_group, key.objectid,
 						  key.objectid + key.offset);
 		if (total_found > CACHING_CTL_WAKE_UP) {
 			total_found = 0;
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index 874b4fe..3133651d 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -19,16 +19,12 @@
 int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info);
 int load_free_space_tree(struct btrfs_caching_control *caching_ctl);
 int add_block_group_free_space(struct btrfs_trans_handle *trans,
-			       struct btrfs_fs_info *fs_info,
 			       struct btrfs_block_group_cache *block_group);
 int remove_block_group_free_space(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info,
 				  struct btrfs_block_group_cache *block_group);
 int add_to_free_space_tree(struct btrfs_trans_handle *trans,
-			   struct btrfs_fs_info *fs_info,
 			   u64 start, u64 size);
 int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
-				struct btrfs_fs_info *fs_info,
 				u64 start, u64 size);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
@@ -38,19 +34,15 @@
 		       struct btrfs_block_group_cache *block_group,
 		       struct btrfs_path *path, int cow);
 int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
-			     struct btrfs_fs_info *fs_info,
 			     struct btrfs_block_group_cache *block_group,
 			     struct btrfs_path *path, u64 start, u64 size);
 int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info,
 				  struct btrfs_block_group_cache *block_group,
 				  struct btrfs_path *path, u64 start, u64 size);
 int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info,
 				  struct btrfs_block_group_cache *block_group,
 				  struct btrfs_path *path);
 int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
-				  struct btrfs_fs_info *fs_info,
 				  struct btrfs_block_group_cache *block_group,
 				  struct btrfs_path *path);
 int free_space_test_bit(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0b86cf1..89b2082 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1018,8 +1018,10 @@
 				  ram_size, /* ram_bytes */
 				  BTRFS_COMPRESS_NONE, /* compress_type */
 				  BTRFS_ORDERED_REGULAR /* type */);
-		if (IS_ERR(em))
+		if (IS_ERR(em)) {
+			ret = PTR_ERR(em);
 			goto out_reserve;
+		}
 		free_extent_map(em);
 
 		ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
@@ -1156,13 +1158,10 @@
 	nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
 		PAGE_SHIFT;
 
-	/*
-	 * atomic_sub_return implies a barrier for waitqueue_active
-	 */
+	/* atomic_sub_return implies a barrier */
 	if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
-	    5 * SZ_1M &&
-	    waitqueue_active(&fs_info->async_submit_wait))
-		wake_up(&fs_info->async_submit_wait);
+	    5 * SZ_1M)
+		cond_wake_up_nomb(&fs_info->async_submit_wait);
 
 	if (async_cow->inode)
 		submit_compressed_extents(async_cow->inode, async_cow);
@@ -1373,6 +1372,13 @@
 			    btrfs_file_extent_encryption(leaf, fi) ||
 			    btrfs_file_extent_other_encoding(leaf, fi))
 				goto out_check;
+			/*
+			 * Do the same check as in btrfs_cross_ref_exist but
+			 * without the unnecessary search.
+			 */
+			if (btrfs_file_extent_generation(leaf, fi) <=
+			    btrfs_root_last_snapshot(&root->root_item))
+				goto out_check;
 			if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
 				goto out_check;
 			if (btrfs_extent_readonly(fs_info, disk_bytenr))
@@ -1754,6 +1760,7 @@
 			  &inode->runtime_flags);
 		root->nr_delalloc_inodes--;
 		if (!root->nr_delalloc_inodes) {
+			ASSERT(list_empty(&root->delalloc_inodes));
 			spin_lock(&fs_info->delalloc_root_lock);
 			BUG_ON(list_empty(&root->delalloc_root));
 			list_del_init(&root->delalloc_root);
@@ -3158,6 +3165,9 @@
 	/* once for the tree */
 	btrfs_put_ordered_extent(ordered_extent);
 
+	/* Try to release some metadata so we don't get an OOM but don't wait */
+	btrfs_btree_balance_dirty_nodelay(fs_info);
+
 	return ret;
 }
 
@@ -3300,177 +3310,31 @@
 }
 
 /*
- * This is called in transaction commit time. If there are no orphan
- * files in the subvolume, it removes orphan item and frees block_rsv
- * structure.
- */
-void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
-			      struct btrfs_root *root)
-{
-	struct btrfs_fs_info *fs_info = root->fs_info;
-	struct btrfs_block_rsv *block_rsv;
-	int ret;
-
-	if (atomic_read(&root->orphan_inodes) ||
-	    root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
-		return;
-
-	spin_lock(&root->orphan_lock);
-	if (atomic_read(&root->orphan_inodes)) {
-		spin_unlock(&root->orphan_lock);
-		return;
-	}
-
-	if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) {
-		spin_unlock(&root->orphan_lock);
-		return;
-	}
-
-	block_rsv = root->orphan_block_rsv;
-	root->orphan_block_rsv = NULL;
-	spin_unlock(&root->orphan_lock);
-
-	if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state) &&
-	    btrfs_root_refs(&root->root_item) > 0) {
-		ret = btrfs_del_orphan_item(trans, fs_info->tree_root,
-					    root->root_key.objectid);
-		if (ret)
-			btrfs_abort_transaction(trans, ret);
-		else
-			clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
-				  &root->state);
-	}
-
-	if (block_rsv) {
-		WARN_ON(block_rsv->size > 0);
-		btrfs_free_block_rsv(fs_info, block_rsv);
-	}
-}
-
-/*
- * This creates an orphan entry for the given inode in case something goes
- * wrong in the middle of an unlink/truncate.
- *
- * NOTE: caller of this function should reserve 5 units of metadata for
- *	 this function.
+ * This creates an orphan entry for the given inode in case something goes wrong
+ * in the middle of an unlink.
  */
 int btrfs_orphan_add(struct btrfs_trans_handle *trans,
-		struct btrfs_inode *inode)
+		     struct btrfs_inode *inode)
 {
-	struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
-	struct btrfs_root *root = inode->root;
-	struct btrfs_block_rsv *block_rsv = NULL;
-	int reserve = 0;
-	bool insert = false;
 	int ret;
 
-	if (!root->orphan_block_rsv) {
-		block_rsv = btrfs_alloc_block_rsv(fs_info,
-						  BTRFS_BLOCK_RSV_TEMP);
-		if (!block_rsv)
-			return -ENOMEM;
-	}
-
-	if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-			      &inode->runtime_flags))
-		insert = true;
-
-	if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-			      &inode->runtime_flags))
-		reserve = 1;
-
-	spin_lock(&root->orphan_lock);
-	/* If someone has created ->orphan_block_rsv, be happy to use it. */
-	if (!root->orphan_block_rsv) {
-		root->orphan_block_rsv = block_rsv;
-	} else if (block_rsv) {
-		btrfs_free_block_rsv(fs_info, block_rsv);
-		block_rsv = NULL;
-	}
-
-	if (insert)
-		atomic_inc(&root->orphan_inodes);
-	spin_unlock(&root->orphan_lock);
-
-	/* grab metadata reservation from transaction handle */
-	if (reserve) {
-		ret = btrfs_orphan_reserve_metadata(trans, inode);
-		ASSERT(!ret);
-		if (ret) {
-			/*
-			 * dec doesn't need spin_lock as ->orphan_block_rsv
-			 * would be released only if ->orphan_inodes is
-			 * zero.
-			 */
-			atomic_dec(&root->orphan_inodes);
-			clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-				  &inode->runtime_flags);
-			if (insert)
-				clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-					  &inode->runtime_flags);
-			return ret;
-		}
-	}
-
-	/* insert an orphan item to track this unlinked/truncated file */
-	if (insert) {
-		ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
-		if (ret) {
-			if (reserve) {
-				clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-					  &inode->runtime_flags);
-				btrfs_orphan_release_metadata(inode);
-			}
-			/*
-			 * btrfs_orphan_commit_root may race with us and set
-			 * ->orphan_block_rsv to zero, in order to avoid that,
-			 * decrease ->orphan_inodes after everything is done.
-			 */
-			atomic_dec(&root->orphan_inodes);
-			if (ret != -EEXIST) {
-				clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-					  &inode->runtime_flags);
-				btrfs_abort_transaction(trans, ret);
-				return ret;
-			}
-		}
-		ret = 0;
+	ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode));
+	if (ret && ret != -EEXIST) {
+		btrfs_abort_transaction(trans, ret);
+		return ret;
 	}
 
 	return 0;
 }
 
 /*
- * We have done the truncate/delete so we can go ahead and remove the orphan
- * item for this particular inode.
+ * We have done the delete so we can go ahead and remove the orphan item for
+ * this particular inode.
  */
 static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
 			    struct btrfs_inode *inode)
 {
-	struct btrfs_root *root = inode->root;
-	int delete_item = 0;
-	int ret = 0;
-
-	if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-			       &inode->runtime_flags))
-		delete_item = 1;
-
-	if (delete_item && trans)
-		ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
-
-	if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
-			       &inode->runtime_flags))
-		btrfs_orphan_release_metadata(inode);
-
-	/*
-	 * btrfs_orphan_commit_root may race with us and set ->orphan_block_rsv
-	 * to zero, in order to avoid that, decrease ->orphan_inodes after
-	 * everything is done.
-	 */
-	if (delete_item)
-		atomic_dec(&root->orphan_inodes);
-
-	return ret;
+	return btrfs_del_orphan_item(trans, inode->root, btrfs_ino(inode));
 }
 
 /*
@@ -3486,7 +3350,7 @@
 	struct btrfs_trans_handle *trans;
 	struct inode *inode;
 	u64 last_objectid = 0;
-	int ret = 0, nr_unlink = 0, nr_truncate = 0;
+	int ret = 0, nr_unlink = 0;
 
 	if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
 		return 0;
@@ -3586,12 +3450,31 @@
 				key.offset = found_key.objectid - 1;
 				continue;
 			}
+
 		}
+
 		/*
-		 * Inode is already gone but the orphan item is still there,
-		 * kill the orphan item.
+		 * If we have an inode with links, there are a couple of
+		 * possibilities. Old kernels (before v3.12) used to create an
+		 * orphan item for truncate indicating that there were possibly
+		 * extent items past i_size that needed to be deleted. In v3.12,
+		 * truncate was changed to update i_size in sync with the extent
+		 * items, but the (useless) orphan item was still created. Since
+		 * v4.18, we don't create the orphan item for truncate at all.
+		 *
+		 * So, this item could mean that we need to do a truncate, but
+		 * only if this filesystem was last used on a pre-v3.12 kernel
+		 * and was not cleanly unmounted. The odds of that are quite
+		 * slim, and it's a pain to do the truncate now, so just delete
+		 * the orphan item.
+		 *
+		 * It's also possible that this orphan item was supposed to be
+		 * deleted but wasn't. The inode number may have been reused,
+		 * but either way, we can delete the orphan item.
 		 */
-		if (ret == -ENOENT) {
+		if (ret == -ENOENT || inode->i_nlink) {
+			if (!ret)
+				iput(inode);
 			trans = btrfs_start_transaction(root, 1);
 			if (IS_ERR(trans)) {
 				ret = PTR_ERR(trans);
@@ -3607,42 +3490,7 @@
 			continue;
 		}
 
-		/*
-		 * add this inode to the orphan list so btrfs_orphan_del does
-		 * the proper thing when we hit it
-		 */
-		set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-			&BTRFS_I(inode)->runtime_flags);
-		atomic_inc(&root->orphan_inodes);
-
-		/* if we have links, this was a truncate, lets do that */
-		if (inode->i_nlink) {
-			if (WARN_ON(!S_ISREG(inode->i_mode))) {
-				iput(inode);
-				continue;
-			}
-			nr_truncate++;
-
-			/* 1 for the orphan item deletion. */
-			trans = btrfs_start_transaction(root, 1);
-			if (IS_ERR(trans)) {
-				iput(inode);
-				ret = PTR_ERR(trans);
-				goto out;
-			}
-			ret = btrfs_orphan_add(trans, BTRFS_I(inode));
-			btrfs_end_transaction(trans);
-			if (ret) {
-				iput(inode);
-				goto out;
-			}
-
-			ret = btrfs_truncate(inode, false);
-			if (ret)
-				btrfs_orphan_del(NULL, BTRFS_I(inode));
-		} else {
-			nr_unlink++;
-		}
+		nr_unlink++;
 
 		/* this will do delete_inode and everything for us */
 		iput(inode);
@@ -3654,12 +3502,7 @@
 
 	root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
 
-	if (root->orphan_block_rsv)
-		btrfs_block_rsv_release(fs_info, root->orphan_block_rsv,
-					(u64)-1);
-
-	if (root->orphan_block_rsv ||
-	    test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
+	if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
 		trans = btrfs_join_transaction(root);
 		if (!IS_ERR(trans))
 			btrfs_end_transaction(trans);
@@ -3667,8 +3510,6 @@
 
 	if (nr_unlink)
 		btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
-	if (nr_truncate)
-		btrfs_debug(fs_info, "truncated %d orphans", nr_truncate);
 
 out:
 	if (ret)
@@ -3931,7 +3772,7 @@
 		break;
 	}
 
-	btrfs_update_iflags(inode);
+	btrfs_sync_inode_flags_to_i_flags(inode);
 	return 0;
 
 make_bad:
@@ -4245,7 +4086,7 @@
 	return ret;
 }
 
-int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root,
 			struct inode *dir, u64 objectid,
 			const char *name, int name_len)
@@ -4326,6 +4167,262 @@
 	return ret;
 }
 
+/*
+ * Helper to check if the subvolume references other subvolumes or if it's
+ * default.
+ */
+static noinline int may_destroy_subvol(struct btrfs_root *root)
+{
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_path *path;
+	struct btrfs_dir_item *di;
+	struct btrfs_key key;
+	u64 dir_id;
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	/* Make sure this root isn't set as the default subvol */
+	dir_id = btrfs_super_root_dir(fs_info->super_copy);
+	di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
+				   dir_id, "default", 7, 0);
+	if (di && !IS_ERR(di)) {
+		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
+		if (key.objectid == root->root_key.objectid) {
+			ret = -EPERM;
+			btrfs_err(fs_info,
+				  "deleting default subvolume %llu is not allowed",
+				  key.objectid);
+			goto out;
+		}
+		btrfs_release_path(path);
+	}
+
+	key.objectid = root->root_key.objectid;
+	key.type = BTRFS_ROOT_REF_KEY;
+	key.offset = (u64)-1;
+
+	ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+	BUG_ON(ret == 0);
+
+	ret = 0;
+	if (path->slots[0] > 0) {
+		path->slots[0]--;
+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+		if (key.objectid == root->root_key.objectid &&
+		    key.type == BTRFS_ROOT_REF_KEY)
+			ret = -ENOTEMPTY;
+	}
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
+/* Delete all dentries for inodes belonging to the root */
+static void btrfs_prune_dentries(struct btrfs_root *root)
+{
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct rb_node *node;
+	struct rb_node *prev;
+	struct btrfs_inode *entry;
+	struct inode *inode;
+	u64 objectid = 0;
+
+	if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+		WARN_ON(btrfs_root_refs(&root->root_item) != 0);
+
+	spin_lock(&root->inode_lock);
+again:
+	node = root->inode_tree.rb_node;
+	prev = NULL;
+	while (node) {
+		prev = node;
+		entry = rb_entry(node, struct btrfs_inode, rb_node);
+
+		if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+			node = node->rb_left;
+		else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
+			node = node->rb_right;
+		else
+			break;
+	}
+	if (!node) {
+		while (prev) {
+			entry = rb_entry(prev, struct btrfs_inode, rb_node);
+			if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) {
+				node = prev;
+				break;
+			}
+			prev = rb_next(prev);
+		}
+	}
+	while (node) {
+		entry = rb_entry(node, struct btrfs_inode, rb_node);
+		objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1;
+		inode = igrab(&entry->vfs_inode);
+		if (inode) {
+			spin_unlock(&root->inode_lock);
+			if (atomic_read(&inode->i_count) > 1)
+				d_prune_aliases(inode);
+			/*
+			 * btrfs_drop_inode will have it removed from the inode
+			 * cache when its usage count hits zero.
+			 */
+			iput(inode);
+			cond_resched();
+			spin_lock(&root->inode_lock);
+			goto again;
+		}
+
+		if (cond_resched_lock(&root->inode_lock))
+			goto again;
+
+		node = rb_next(node);
+	}
+	spin_unlock(&root->inode_lock);
+}
+
+int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
+{
+	struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
+	struct btrfs_root *root = BTRFS_I(dir)->root;
+	struct inode *inode = d_inode(dentry);
+	struct btrfs_root *dest = BTRFS_I(inode)->root;
+	struct btrfs_trans_handle *trans;
+	struct btrfs_block_rsv block_rsv;
+	u64 root_flags;
+	int ret;
+	int err;
+
+	/*
+	 * Don't allow to delete a subvolume with send in progress. This is
+	 * inside the inode lock so the error handling that has to drop the bit
+	 * again is not run concurrently.
+	 */
+	spin_lock(&dest->root_item_lock);
+	root_flags = btrfs_root_flags(&dest->root_item);
+	if (dest->send_in_progress == 0) {
+		btrfs_set_root_flags(&dest->root_item,
+				root_flags | BTRFS_ROOT_SUBVOL_DEAD);
+		spin_unlock(&dest->root_item_lock);
+	} else {
+		spin_unlock(&dest->root_item_lock);
+		btrfs_warn(fs_info,
+			   "attempt to delete subvolume %llu during send",
+			   dest->root_key.objectid);
+		return -EPERM;
+	}
+
+	down_write(&fs_info->subvol_sem);
+
+	err = may_destroy_subvol(dest);
+	if (err)
+		goto out_up_write;
+
+	btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
+	/*
+	 * One for dir inode,
+	 * two for dir entries,
+	 * two for root ref/backref.
+	 */
+	err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
+	if (err)
+		goto out_up_write;
+
+	trans = btrfs_start_transaction(root, 0);
+	if (IS_ERR(trans)) {
+		err = PTR_ERR(trans);
+		goto out_release;
+	}
+	trans->block_rsv = &block_rsv;
+	trans->bytes_reserved = block_rsv.size;
+
+	btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
+
+	ret = btrfs_unlink_subvol(trans, root, dir,
+				dest->root_key.objectid,
+				dentry->d_name.name,
+				dentry->d_name.len);
+	if (ret) {
+		err = ret;
+		btrfs_abort_transaction(trans, ret);
+		goto out_end_trans;
+	}
+
+	btrfs_record_root_in_trans(trans, dest);
+
+	memset(&dest->root_item.drop_progress, 0,
+		sizeof(dest->root_item.drop_progress));
+	dest->root_item.drop_level = 0;
+	btrfs_set_root_refs(&dest->root_item, 0);
+
+	if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
+		ret = btrfs_insert_orphan_item(trans,
+					fs_info->tree_root,
+					dest->root_key.objectid);
+		if (ret) {
+			btrfs_abort_transaction(trans, ret);
+			err = ret;
+			goto out_end_trans;
+		}
+	}
+
+	ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid,
+				  BTRFS_UUID_KEY_SUBVOL,
+				  dest->root_key.objectid);
+	if (ret && ret != -ENOENT) {
+		btrfs_abort_transaction(trans, ret);
+		err = ret;
+		goto out_end_trans;
+	}
+	if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
+		ret = btrfs_uuid_tree_remove(trans,
+					  dest->root_item.received_uuid,
+					  BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+					  dest->root_key.objectid);
+		if (ret && ret != -ENOENT) {
+			btrfs_abort_transaction(trans, ret);
+			err = ret;
+			goto out_end_trans;
+		}
+	}
+
+out_end_trans:
+	trans->block_rsv = NULL;
+	trans->bytes_reserved = 0;
+	ret = btrfs_end_transaction(trans);
+	if (ret && !err)
+		err = ret;
+	inode->i_flags |= S_DEAD;
+out_release:
+	btrfs_subvolume_release_metadata(fs_info, &block_rsv);
+out_up_write:
+	up_write(&fs_info->subvol_sem);
+	if (err) {
+		spin_lock(&dest->root_item_lock);
+		root_flags = btrfs_root_flags(&dest->root_item);
+		btrfs_set_root_flags(&dest->root_item,
+				root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
+		spin_unlock(&dest->root_item_lock);
+	} else {
+		d_invalidate(dentry);
+		btrfs_prune_dentries(dest);
+		ASSERT(dest->send_in_progress == 0);
+
+		/* the last ref */
+		if (dest->ino_cache_inode) {
+			iput(dest->ino_cache_inode);
+			dest->ino_cache_inode = NULL;
+		}
+	}
+
+	return err;
+}
+
 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	struct inode *inode = d_inode(dentry);
@@ -4337,7 +4434,7 @@
 	if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
 		return -ENOTEMPTY;
 	if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
-		return -EPERM;
+		return btrfs_delete_subvolume(dir, dentry);
 
 	trans = __unlink_start_trans(dir);
 	if (IS_ERR(trans))
@@ -4449,7 +4546,6 @@
 	int pending_del_slot = 0;
 	int extent_type = -1;
 	int ret;
-	int err = 0;
 	u64 ino = btrfs_ino(BTRFS_I(inode));
 	u64 bytes_deleted = 0;
 	bool be_nice = false;
@@ -4501,22 +4597,19 @@
 	 * up a huge file in a single leaf.  Most of the time that
 	 * bytes_deleted is > 0, it will be huge by the time we get here
 	 */
-	if (be_nice && bytes_deleted > SZ_32M) {
-		if (btrfs_should_end_transaction(trans)) {
-			err = -EAGAIN;
-			goto error;
-		}
-	}
-
-
-	path->leave_spinning = 1;
-	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
-	if (ret < 0) {
-		err = ret;
+	if (be_nice && bytes_deleted > SZ_32M &&
+	    btrfs_should_end_transaction(trans)) {
+		ret = -EAGAIN;
 		goto out;
 	}
 
+	path->leave_spinning = 1;
+	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+	if (ret < 0)
+		goto out;
+
 	if (ret > 0) {
+		ret = 0;
 		/* there are no items in the tree for us to truncate, we're
 		 * done
 		 */
@@ -4627,7 +4720,7 @@
 				 * We have to bail so the last_size is set to
 				 * just before this extent.
 				 */
-				err = NEED_TRUNCATE_BLOCK;
+				ret = NEED_TRUNCATE_BLOCK;
 				break;
 			}
 
@@ -4666,7 +4759,10 @@
 						extent_num_bytes, 0,
 						btrfs_header_owner(leaf),
 						ino, extent_offset);
-			BUG_ON(ret);
+			if (ret) {
+				btrfs_abort_transaction(trans, ret);
+				break;
+			}
 			if (btrfs_should_throttle_delayed_refs(trans, fs_info))
 				btrfs_async_run_delayed_refs(fs_info,
 					trans->delayed_ref_updates * 2,
@@ -4694,7 +4790,7 @@
 						pending_del_nr);
 				if (ret) {
 					btrfs_abort_transaction(trans, ret);
-					goto error;
+					break;
 				}
 				pending_del_nr = 0;
 			}
@@ -4705,8 +4801,8 @@
 					trans->delayed_ref_updates = 0;
 					ret = btrfs_run_delayed_refs(trans,
 								   updates * 2);
-					if (ret && !err)
-						err = ret;
+					if (ret)
+						break;
 				}
 			}
 			/*
@@ -4714,8 +4810,8 @@
 			 * and let the transaction restart
 			 */
 			if (should_end) {
-				err = -EAGAIN;
-				goto error;
+				ret = -EAGAIN;
+				break;
 			}
 			goto search_again;
 		} else {
@@ -4723,32 +4819,37 @@
 		}
 	}
 out:
-	if (pending_del_nr) {
-		ret = btrfs_del_items(trans, root, path, pending_del_slot,
+	if (ret >= 0 && pending_del_nr) {
+		int err;
+
+		err = btrfs_del_items(trans, root, path, pending_del_slot,
 				      pending_del_nr);
-		if (ret)
-			btrfs_abort_transaction(trans, ret);
+		if (err) {
+			btrfs_abort_transaction(trans, err);
+			ret = err;
+		}
 	}
-error:
 	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
 		ASSERT(last_size >= new_size);
-		if (!err && last_size > new_size)
+		if (!ret && last_size > new_size)
 			last_size = new_size;
 		btrfs_ordered_update_i_size(inode, last_size, NULL);
 	}
 
 	btrfs_free_path(path);
 
-	if (be_nice && bytes_deleted > SZ_32M) {
+	if (be_nice && bytes_deleted > SZ_32M && (ret >= 0 || ret == -EAGAIN)) {
 		unsigned long updates = trans->delayed_ref_updates;
+		int err;
+
 		if (updates) {
 			trans->delayed_ref_updates = 0;
-			ret = btrfs_run_delayed_refs(trans, updates * 2);
-			if (ret && !err)
-				err = ret;
+			err = btrfs_run_delayed_refs(trans, updates * 2);
+			if (err)
+				ret = err;
 		}
 	}
-	return err;
+	return ret;
 }
 
 /*
@@ -5090,30 +5191,6 @@
 			set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
 				&BTRFS_I(inode)->runtime_flags);
 
-		/*
-		 * 1 for the orphan item we're going to add
-		 * 1 for the orphan item deletion.
-		 */
-		trans = btrfs_start_transaction(root, 2);
-		if (IS_ERR(trans))
-			return PTR_ERR(trans);
-
-		/*
-		 * We need to do this in case we fail at _any_ point during the
-		 * actual truncate.  Once we do the truncate_setsize we could
-		 * invalidate pages which forces any outstanding ordered io to
-		 * be instantly completed which will give us extents that need
-		 * to be truncated.  If we fail to get an orphan inode down we
-		 * could have left over extents that were never meant to live,
-		 * so we need to guarantee from this point on that everything
-		 * will be consistent.
-		 */
-		ret = btrfs_orphan_add(trans, BTRFS_I(inode));
-		btrfs_end_transaction(trans);
-		if (ret)
-			return ret;
-
-		/* we don't support swapfiles, so vmtruncate shouldn't fail */
 		truncate_setsize(inode, newsize);
 
 		/* Disable nonlocked read DIO to avoid the end less truncate */
@@ -5125,29 +5202,16 @@
 		if (ret && inode->i_nlink) {
 			int err;
 
-			/* To get a stable disk_i_size */
-			err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
-			if (err) {
-				btrfs_orphan_del(NULL, BTRFS_I(inode));
-				return err;
-			}
-
 			/*
-			 * failed to truncate, disk_i_size is only adjusted down
-			 * as we remove extents, so it should represent the true
-			 * size of the inode, so reset the in memory size and
-			 * delete our orphan entry.
+			 * Truncate failed, so fix up the in-memory size. We
+			 * adjusted disk_i_size down as we removed extents, so
+			 * wait for disk_i_size to be stable and then update the
+			 * in-memory size to match.
 			 */
-			trans = btrfs_join_transaction(root);
-			if (IS_ERR(trans)) {
-				btrfs_orphan_del(NULL, BTRFS_I(inode));
-				return ret;
-			}
-			i_size_write(inode, BTRFS_I(inode)->disk_i_size);
-			err = btrfs_orphan_del(trans, BTRFS_I(inode));
+			err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
 			if (err)
-				btrfs_abort_transaction(trans, err);
-			btrfs_end_transaction(trans);
+				return err;
+			i_size_write(inode, BTRFS_I(inode)->disk_i_size);
 		}
 	}
 
@@ -5277,13 +5341,52 @@
 	spin_unlock(&io_tree->lock);
 }
 
+static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
+							struct btrfs_block_rsv *rsv,
+							u64 min_size)
+{
+	struct btrfs_fs_info *fs_info = root->fs_info;
+	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+	int failures = 0;
+
+	for (;;) {
+		struct btrfs_trans_handle *trans;
+		int ret;
+
+		ret = btrfs_block_rsv_refill(root, rsv, min_size,
+					     BTRFS_RESERVE_FLUSH_LIMIT);
+
+		if (ret && ++failures > 2) {
+			btrfs_warn(fs_info,
+				   "could not allocate space for a delete; will truncate on mount");
+			return ERR_PTR(-ENOSPC);
+		}
+
+		trans = btrfs_join_transaction(root);
+		if (IS_ERR(trans) || !ret)
+			return trans;
+
+		/*
+		 * Try to steal from the global reserve if there is space for
+		 * it.
+		 */
+		if (!btrfs_check_space_for_delayed_refs(trans, fs_info) &&
+		    !btrfs_block_rsv_migrate(global_rsv, rsv, min_size, 0))
+			return trans;
+
+		/* If not, commit and try again. */
+		ret = btrfs_commit_transaction(trans);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+}
+
 void btrfs_evict_inode(struct inode *inode)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	struct btrfs_block_rsv *rsv, *global_rsv;
-	int steal_from_global = 0;
+	struct btrfs_block_rsv *rsv;
 	u64 min_size;
 	int ret;
 
@@ -5304,21 +5407,16 @@
 	     btrfs_is_free_space_inode(BTRFS_I(inode))))
 		goto no_delete;
 
-	if (is_bad_inode(inode)) {
-		btrfs_orphan_del(NULL, BTRFS_I(inode));
+	if (is_bad_inode(inode))
 		goto no_delete;
-	}
 	/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
 	if (!special_file(inode->i_mode))
 		btrfs_wait_ordered_range(inode, 0, (u64)-1);
 
 	btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
 
-	if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
-		BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-				 &BTRFS_I(inode)->runtime_flags));
+	if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
 		goto no_delete;
-	}
 
 	if (inode->i_nlink > 0) {
 		BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
@@ -5327,130 +5425,63 @@
 	}
 
 	ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
-	if (ret) {
-		btrfs_orphan_del(NULL, BTRFS_I(inode));
+	if (ret)
 		goto no_delete;
-	}
 
 	rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
-	if (!rsv) {
-		btrfs_orphan_del(NULL, BTRFS_I(inode));
+	if (!rsv)
 		goto no_delete;
-	}
 	rsv->size = min_size;
 	rsv->failfast = 1;
-	global_rsv = &fs_info->global_block_rsv;
 
 	btrfs_i_size_write(BTRFS_I(inode), 0);
 
-	/*
-	 * This is a bit simpler than btrfs_truncate since we've already
-	 * reserved our space for our orphan item in the unlink, so we just
-	 * need to reserve some slack space in case we add bytes and update
-	 * inode item when doing the truncate.
-	 */
 	while (1) {
-		ret = btrfs_block_rsv_refill(root, rsv, min_size,
-					     BTRFS_RESERVE_FLUSH_LIMIT);
-
-		/*
-		 * Try and steal from the global reserve since we will
-		 * likely not use this space anyway, we want to try as
-		 * hard as possible to get this to work.
-		 */
-		if (ret)
-			steal_from_global++;
-		else
-			steal_from_global = 0;
-		ret = 0;
-
-		/*
-		 * steal_from_global == 0: we reserved stuff, hooray!
-		 * steal_from_global == 1: we didn't reserve stuff, boo!
-		 * steal_from_global == 2: we've committed, still not a lot of
-		 * room but maybe we'll have room in the global reserve this
-		 * time.
-		 * steal_from_global == 3: abandon all hope!
-		 */
-		if (steal_from_global > 2) {
-			btrfs_warn(fs_info,
-				   "Could not get space for a delete, will truncate on mount %d",
-				   ret);
-			btrfs_orphan_del(NULL, BTRFS_I(inode));
-			btrfs_free_block_rsv(fs_info, rsv);
-			goto no_delete;
-		}
-
-		trans = btrfs_join_transaction(root);
-		if (IS_ERR(trans)) {
-			btrfs_orphan_del(NULL, BTRFS_I(inode));
-			btrfs_free_block_rsv(fs_info, rsv);
-			goto no_delete;
-		}
-
-		/*
-		 * We can't just steal from the global reserve, we need to make
-		 * sure there is room to do it, if not we need to commit and try
-		 * again.
-		 */
-		if (steal_from_global) {
-			if (!btrfs_check_space_for_delayed_refs(trans, fs_info))
-				ret = btrfs_block_rsv_migrate(global_rsv, rsv,
-							      min_size, 0);
-			else
-				ret = -ENOSPC;
-		}
-
-		/*
-		 * Couldn't steal from the global reserve, we have too much
-		 * pending stuff built up, commit the transaction and try it
-		 * again.
-		 */
-		if (ret) {
-			ret = btrfs_commit_transaction(trans);
-			if (ret) {
-				btrfs_orphan_del(NULL, BTRFS_I(inode));
-				btrfs_free_block_rsv(fs_info, rsv);
-				goto no_delete;
-			}
-			continue;
-		} else {
-			steal_from_global = 0;
-		}
+		trans = evict_refill_and_join(root, rsv, min_size);
+		if (IS_ERR(trans))
+			goto free_rsv;
 
 		trans->block_rsv = rsv;
 
 		ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
-		if (ret != -ENOSPC && ret != -EAGAIN)
-			break;
-
 		trans->block_rsv = &fs_info->trans_block_rsv;
 		btrfs_end_transaction(trans);
-		trans = NULL;
 		btrfs_btree_balance_dirty(fs_info);
+		if (ret && ret != -ENOSPC && ret != -EAGAIN)
+			goto free_rsv;
+		else if (!ret)
+			break;
 	}
 
-	btrfs_free_block_rsv(fs_info, rsv);
-
 	/*
-	 * Errors here aren't a big deal, it just means we leave orphan items
-	 * in the tree.  They will be cleaned up on the next mount.
+	 * Errors here aren't a big deal, it just means we leave orphan items in
+	 * the tree. They will be cleaned up on the next mount. If the inode
+	 * number gets reused, cleanup deletes the orphan item without doing
+	 * anything, and unlink reuses the existing orphan item.
+	 *
+	 * If it turns out that we are dropping too many of these, we might want
+	 * to add a mechanism for retrying these after a commit.
 	 */
-	if (ret == 0) {
-		trans->block_rsv = root->orphan_block_rsv;
+	trans = evict_refill_and_join(root, rsv, min_size);
+	if (!IS_ERR(trans)) {
+		trans->block_rsv = rsv;
 		btrfs_orphan_del(trans, BTRFS_I(inode));
-	} else {
-		btrfs_orphan_del(NULL, BTRFS_I(inode));
+		trans->block_rsv = &fs_info->trans_block_rsv;
+		btrfs_end_transaction(trans);
 	}
 
-	trans->block_rsv = &fs_info->trans_block_rsv;
 	if (!(root == fs_info->tree_root ||
 	      root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
 		btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode)));
 
-	btrfs_end_transaction(trans);
-	btrfs_btree_balance_dirty(fs_info);
+free_rsv:
+	btrfs_free_block_rsv(fs_info, rsv);
 no_delete:
+	/*
+	 * If we didn't successfully delete, the orphan item will still be in
+	 * the tree and we'll retry on the next mount. Again, we might also want
+	 * to retry these periodically in the future.
+	 */
 	btrfs_remove_delayed_node(BTRFS_I(inode));
 	clear_inode(inode);
 }
@@ -5626,69 +5657,6 @@
 	}
 }
 
-void btrfs_invalidate_inodes(struct btrfs_root *root)
-{
-	struct btrfs_fs_info *fs_info = root->fs_info;
-	struct rb_node *node;
-	struct rb_node *prev;
-	struct btrfs_inode *entry;
-	struct inode *inode;
-	u64 objectid = 0;
-
-	if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
-		WARN_ON(btrfs_root_refs(&root->root_item) != 0);
-
-	spin_lock(&root->inode_lock);
-again:
-	node = root->inode_tree.rb_node;
-	prev = NULL;
-	while (node) {
-		prev = node;
-		entry = rb_entry(node, struct btrfs_inode, rb_node);
-
-		if (objectid < btrfs_ino(BTRFS_I(&entry->vfs_inode)))
-			node = node->rb_left;
-		else if (objectid > btrfs_ino(BTRFS_I(&entry->vfs_inode)))
-			node = node->rb_right;
-		else
-			break;
-	}
-	if (!node) {
-		while (prev) {
-			entry = rb_entry(prev, struct btrfs_inode, rb_node);
-			if (objectid <= btrfs_ino(BTRFS_I(&entry->vfs_inode))) {
-				node = prev;
-				break;
-			}
-			prev = rb_next(prev);
-		}
-	}
-	while (node) {
-		entry = rb_entry(node, struct btrfs_inode, rb_node);
-		objectid = btrfs_ino(BTRFS_I(&entry->vfs_inode)) + 1;
-		inode = igrab(&entry->vfs_inode);
-		if (inode) {
-			spin_unlock(&root->inode_lock);
-			if (atomic_read(&inode->i_count) > 1)
-				d_prune_aliases(inode);
-			/*
-			 * btrfs_drop_inode will have it removed from
-			 * the inode cache when its usage count
-			 * hits zero.
-			 */
-			iput(inode);
-			cond_resched();
-			spin_lock(&root->inode_lock);
-			goto again;
-		}
-
-		if (cond_resched_lock(&root->inode_lock))
-			goto again;
-
-		node = rb_next(node);
-	}
-	spin_unlock(&root->inode_lock);
-}
 
 static int btrfs_init_locked_inode(struct inode *inode, void *p)
 {
@@ -5850,11 +5818,6 @@
 	return 0;
 }
 
-static void btrfs_dentry_release(struct dentry *dentry)
-{
-	kfree(dentry->d_fsdata);
-}
-
 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 				   unsigned int flags)
 {
@@ -6270,7 +6233,7 @@
 			BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
 	}
 
-	btrfs_update_iflags(inode);
+	btrfs_sync_inode_flags_to_i_flags(inode);
 }
 
 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
@@ -6705,8 +6668,9 @@
 	 * 2 items for inode and inode ref
 	 * 2 items for dir items
 	 * 1 item for parent inode
+	 * 1 item for orphan item deletion if O_TMPFILE
 	 */
-	trans = btrfs_start_transaction(root, 5);
+	trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6);
 	if (IS_ERR(trans)) {
 		err = PTR_ERR(trans);
 		trans = NULL;
@@ -7083,7 +7047,7 @@
 
 	err = 0;
 	write_lock(&em_tree->lock);
-	err = btrfs_add_extent_mapping(em_tree, &em, start, len);
+	err = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
 	write_unlock(&em_tree->lock);
 out:
 
@@ -7368,6 +7332,14 @@
 	    btrfs_file_extent_other_encoding(leaf, fi))
 		goto out;
 
+	/*
+	 * Do the same check as in btrfs_cross_ref_exist but without the
+	 * unnecessary search.
+	 */
+	if (btrfs_file_extent_generation(leaf, fi) <=
+	    btrfs_root_last_snapshot(&root->root_item))
+		goto out;
+
 	backref_offset = btrfs_file_extent_offset(leaf, fi);
 
 	if (orig_start) {
@@ -7568,6 +7540,125 @@
 	return em;
 }
 
+
+static int btrfs_get_blocks_direct_read(struct extent_map *em,
+					struct buffer_head *bh_result,
+					struct inode *inode,
+					u64 start, u64 len)
+{
+	if (em->block_start == EXTENT_MAP_HOLE ||
+			test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+		return -ENOENT;
+
+	len = min(len, em->len - (start - em->start));
+
+	bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
+		inode->i_blkbits;
+	bh_result->b_size = len;
+	bh_result->b_bdev = em->bdev;
+	set_buffer_mapped(bh_result);
+
+	return 0;
+}
+
+static int btrfs_get_blocks_direct_write(struct extent_map **map,
+					 struct buffer_head *bh_result,
+					 struct inode *inode,
+					 struct btrfs_dio_data *dio_data,
+					 u64 start, u64 len)
+{
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	struct extent_map *em = *map;
+	int ret = 0;
+
+	/*
+	 * We don't allocate a new extent in the following cases
+	 *
+	 * 1) The inode is marked as NODATACOW. In this case we'll just use the
+	 * existing extent.
+	 * 2) The extent is marked as PREALLOC. We're good to go here and can
+	 * just use the extent.
+	 *
+	 */
+	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
+	    ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
+	     em->block_start != EXTENT_MAP_HOLE)) {
+		int type;
+		u64 block_start, orig_start, orig_block_len, ram_bytes;
+
+		if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+			type = BTRFS_ORDERED_PREALLOC;
+		else
+			type = BTRFS_ORDERED_NOCOW;
+		len = min(len, em->len - (start - em->start));
+		block_start = em->block_start + (start - em->start);
+
+		if (can_nocow_extent(inode, start, &len, &orig_start,
+				     &orig_block_len, &ram_bytes) == 1 &&
+		    btrfs_inc_nocow_writers(fs_info, block_start)) {
+			struct extent_map *em2;
+
+			em2 = btrfs_create_dio_extent(inode, start, len,
+						      orig_start, block_start,
+						      len, orig_block_len,
+						      ram_bytes, type);
+			btrfs_dec_nocow_writers(fs_info, block_start);
+			if (type == BTRFS_ORDERED_PREALLOC) {
+				free_extent_map(em);
+				*map = em = em2;
+			}
+
+			if (em2 && IS_ERR(em2)) {
+				ret = PTR_ERR(em2);
+				goto out;
+			}
+			/*
+			 * For inode marked NODATACOW or extent marked PREALLOC,
+			 * use the existing or preallocated extent, so does not
+			 * need to adjust btrfs_space_info's bytes_may_use.
+			 */
+			btrfs_free_reserved_data_space_noquota(inode, start,
+							       len);
+			goto skip_cow;
+		}
+	}
+
+	/* this will cow the extent */
+	len = bh_result->b_size;
+	free_extent_map(em);
+	*map = em = btrfs_new_extent_direct(inode, start, len);
+	if (IS_ERR(em)) {
+		ret = PTR_ERR(em);
+		goto out;
+	}
+
+	len = min(len, em->len - (start - em->start));
+
+skip_cow:
+	bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
+		inode->i_blkbits;
+	bh_result->b_size = len;
+	bh_result->b_bdev = em->bdev;
+	set_buffer_mapped(bh_result);
+
+	if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+		set_buffer_new(bh_result);
+
+	/*
+	 * Need to update the i_size under the extent lock so buffered
+	 * readers will get the updated i_size when we unlock.
+	 */
+	if (!dio_data->overwrite && start + len > i_size_read(inode))
+		i_size_write(inode, start + len);
+
+	WARN_ON(dio_data->reserve < len);
+	dio_data->reserve -= len;
+	dio_data->unsubmitted_oe_range_end = start + len;
+	current->journal_info = dio_data;
+out:
+	return ret;
+}
+
 static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
 				   struct buffer_head *bh_result, int create)
 {
@@ -7636,116 +7727,36 @@
 		goto unlock_err;
 	}
 
-	/* Just a good old fashioned hole, return */
-	if (!create && (em->block_start == EXTENT_MAP_HOLE ||
-			test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
-		free_extent_map(em);
-		goto unlock_err;
-	}
-
-	/*
-	 * We don't allocate a new extent in the following cases
-	 *
-	 * 1) The inode is marked as NODATACOW.  In this case we'll just use the
-	 * existing extent.
-	 * 2) The extent is marked as PREALLOC.  We're good to go here and can
-	 * just use the extent.
-	 *
-	 */
-	if (!create) {
-		len = min(len, em->len - (start - em->start));
-		lockstart = start + len;
-		goto unlock;
-	}
-
-	if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
-	    ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
-	     em->block_start != EXTENT_MAP_HOLE)) {
-		int type;
-		u64 block_start, orig_start, orig_block_len, ram_bytes;
-
-		if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
-			type = BTRFS_ORDERED_PREALLOC;
-		else
-			type = BTRFS_ORDERED_NOCOW;
-		len = min(len, em->len - (start - em->start));
-		block_start = em->block_start + (start - em->start);
-
-		if (can_nocow_extent(inode, start, &len, &orig_start,
-				     &orig_block_len, &ram_bytes) == 1 &&
-		    btrfs_inc_nocow_writers(fs_info, block_start)) {
-			struct extent_map *em2;
-
-			em2 = btrfs_create_dio_extent(inode, start, len,
-						      orig_start, block_start,
-						      len, orig_block_len,
-						      ram_bytes, type);
-			btrfs_dec_nocow_writers(fs_info, block_start);
-			if (type == BTRFS_ORDERED_PREALLOC) {
-				free_extent_map(em);
-				em = em2;
-			}
-			if (em2 && IS_ERR(em2)) {
-				ret = PTR_ERR(em2);
-				goto unlock_err;
-			}
-			/*
-			 * For inode marked NODATACOW or extent marked PREALLOC,
-			 * use the existing or preallocated extent, so does not
-			 * need to adjust btrfs_space_info's bytes_may_use.
-			 */
-			btrfs_free_reserved_data_space_noquota(inode,
-					start, len);
-			goto unlock;
-		}
-	}
-
-	/*
-	 * this will cow the extent, reset the len in case we changed
-	 * it above
-	 */
-	len = bh_result->b_size;
-	free_extent_map(em);
-	em = btrfs_new_extent_direct(inode, start, len);
-	if (IS_ERR(em)) {
-		ret = PTR_ERR(em);
-		goto unlock_err;
-	}
-	len = min(len, em->len - (start - em->start));
-unlock:
-	bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
-		inode->i_blkbits;
-	bh_result->b_size = len;
-	bh_result->b_bdev = em->bdev;
-	set_buffer_mapped(bh_result);
 	if (create) {
-		if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
-			set_buffer_new(bh_result);
+		ret = btrfs_get_blocks_direct_write(&em, bh_result, inode,
+						    dio_data, start, len);
+		if (ret < 0)
+			goto unlock_err;
 
-		/*
-		 * Need to update the i_size under the extent lock so buffered
-		 * readers will get the updated i_size when we unlock.
-		 */
-		if (!dio_data->overwrite && start + len > i_size_read(inode))
-			i_size_write(inode, start + len);
-
-		WARN_ON(dio_data->reserve < len);
-		dio_data->reserve -= len;
-		dio_data->unsubmitted_oe_range_end = start + len;
-		current->journal_info = dio_data;
-	}
-
-	/*
-	 * In the case of write we need to clear and unlock the entire range,
-	 * in the case of read we need to unlock only the end area that we
-	 * aren't using if there is any left over space.
-	 */
-	if (lockstart < lockend) {
-		clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
-				 lockend, unlock_bits, 1, 0,
-				 &cached_state);
+		/* clear and unlock the entire range */
+		clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+				 unlock_bits, 1, 0, &cached_state);
 	} else {
-		free_extent_state(cached_state);
+		ret = btrfs_get_blocks_direct_read(em, bh_result, inode,
+						   start, len);
+		/* Can be negative only if we read from a hole */
+		if (ret < 0) {
+			ret = 0;
+			free_extent_map(em);
+			goto unlock_err;
+		}
+		/*
+		 * We need to unlock only the end area that we aren't using.
+		 * The rest is going to be unlocked by the endio routine.
+		 */
+		lockstart = start + bh_result->b_size;
+		if (lockstart < lockend) {
+			clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+					 lockend, unlock_bits, 1, 0,
+					 &cached_state);
+		} else {
+			free_extent_state(cached_state);
+		}
 	}
 
 	free_extent_map(em);
@@ -8131,7 +8142,6 @@
 	u64 ordered_offset = offset;
 	u64 ordered_bytes = bytes;
 	u64 last_offset;
-	int ret;
 
 	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 		wq = fs_info->endio_freespace_worker;
@@ -8141,32 +8151,31 @@
 		func = btrfs_endio_write_helper;
 	}
 
-again:
-	last_offset = ordered_offset;
-	ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
-						   &ordered_offset,
-						   ordered_bytes,
-						   uptodate);
-	if (!ret)
-		goto out_test;
-
-	btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL);
-	btrfs_queue_work(wq, &ordered->work);
-out_test:
-	/*
-	 * If btrfs_dec_test_ordered_pending does not find any ordered extent
-	 * in the range, we can exit.
-	 */
-	if (ordered_offset == last_offset)
-		return;
-	/*
-	 * our bio might span multiple ordered extents.  If we haven't
-	 * completed the accounting for the whole dio, go back and try again
-	 */
-	if (ordered_offset < offset + bytes) {
-		ordered_bytes = offset + bytes - ordered_offset;
-		ordered = NULL;
-		goto again;
+	while (ordered_offset < offset + bytes) {
+		last_offset = ordered_offset;
+		if (btrfs_dec_test_first_ordered_pending(inode, &ordered,
+							   &ordered_offset,
+							   ordered_bytes,
+							   uptodate)) {
+			btrfs_init_work(&ordered->work, func,
+					finish_ordered_fn,
+					NULL, NULL);
+			btrfs_queue_work(wq, &ordered->work);
+		}
+		/*
+		 * If btrfs_dec_test_ordered_pending does not find any ordered
+		 * extent in the range, we can exit.
+		 */
+		if (ordered_offset == last_offset)
+			return;
+		/*
+		 * Our bio might span multiple ordered extents. In this case
+		 * we keep goin until we have accounted the whole dio.
+		 */
+		if (ordered_offset < offset + bytes) {
+			ordered_bytes = offset + bytes - ordered_offset;
+			ordered = NULL;
+		}
 	}
 }
 
@@ -8705,29 +8714,19 @@
 static int btrfs_writepages(struct address_space *mapping,
 			    struct writeback_control *wbc)
 {
-	struct extent_io_tree *tree;
-
-	tree = &BTRFS_I(mapping->host)->io_tree;
-	return extent_writepages(tree, mapping, wbc);
+	return extent_writepages(mapping, wbc);
 }
 
 static int
 btrfs_readpages(struct file *file, struct address_space *mapping,
 		struct list_head *pages, unsigned nr_pages)
 {
-	struct extent_io_tree *tree;
-	tree = &BTRFS_I(mapping->host)->io_tree;
-	return extent_readpages(tree, mapping, pages, nr_pages);
+	return extent_readpages(mapping, pages, nr_pages);
 }
+
 static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
 {
-	struct extent_io_tree *tree;
-	struct extent_map_tree *map;
-	int ret;
-
-	tree = &BTRFS_I(page->mapping->host)->io_tree;
-	map = &BTRFS_I(page->mapping->host)->extent_tree;
-	ret = try_release_extent_mapping(map, tree, page, gfp_flags);
+	int ret = try_release_extent_mapping(page, gfp_flags);
 	if (ret == 1) {
 		ClearPagePrivate(page);
 		set_page_private(page, 0);
@@ -8868,8 +8867,8 @@
  *
  * We are not allowed to take the i_mutex here so we have to play games to
  * protect against truncate races as the page could now be beyond EOF.  Because
- * vmtruncate() writes the inode size before removing pages, once we have the
- * page lock we can determine safely if the page is beyond EOF. If it is not
+ * truncate_setsize() writes the inode size before removing pages, once we have
+ * the page lock we can determine safely if the page is beyond EOF. If it is not
  * beyond EOF, then the page is guaranteed safe against truncation until we
  * unlock the page.
  */
@@ -9031,8 +9030,7 @@
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_block_rsv *rsv;
-	int ret = 0;
-	int err = 0;
+	int ret;
 	struct btrfs_trans_handle *trans;
 	u64 mask = fs_info->sectorsize - 1;
 	u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
@@ -9045,39 +9043,31 @@
 	}
 
 	/*
-	 * Yes ladies and gentlemen, this is indeed ugly.  The fact is we have
-	 * 3 things going on here
+	 * Yes ladies and gentlemen, this is indeed ugly.  We have a couple of
+	 * things going on here:
 	 *
-	 * 1) We need to reserve space for our orphan item and the space to
-	 * delete our orphan item.  Lord knows we don't want to have a dangling
-	 * orphan item because we didn't reserve space to remove it.
+	 * 1) We need to reserve space to update our inode.
 	 *
-	 * 2) We need to reserve space to update our inode.
-	 *
-	 * 3) We need to have something to cache all the space that is going to
+	 * 2) We need to have something to cache all the space that is going to
 	 * be free'd up by the truncate operation, but also have some slack
 	 * space reserved in case it uses space during the truncate (thank you
 	 * very much snapshotting).
 	 *
-	 * And we need these to all be separate.  The fact is we can use a lot of
+	 * And we need these to be separate.  The fact is we can use a lot of
 	 * space doing the truncate, and we have no earthly idea how much space
 	 * we will use, so we need the truncate reservation to be separate so it
-	 * doesn't end up using space reserved for updating the inode or
-	 * removing the orphan item.  We also need to be able to stop the
-	 * transaction and start a new one, which means we need to be able to
-	 * update the inode several times, and we have no idea of knowing how
-	 * many times that will be, so we can't just reserve 1 item for the
-	 * entirety of the operation, so that has to be done separately as well.
-	 * Then there is the orphan item, which does indeed need to be held on
-	 * to for the whole operation, and we need nobody to touch this reserved
-	 * space except the orphan code.
+	 * doesn't end up using space reserved for updating the inode.  We also
+	 * need to be able to stop the transaction and start a new one, which
+	 * means we need to be able to update the inode several times, and we
+	 * have no idea of knowing how many times that will be, so we can't just
+	 * reserve 1 item for the entirety of the operation, so that has to be
+	 * done separately as well.
 	 *
 	 * So that leaves us with
 	 *
-	 * 1) root->orphan_block_rsv - for the orphan deletion.
-	 * 2) rsv - for the truncate reservation, which we will steal from the
+	 * 1) rsv - for the truncate reservation, which we will steal from the
 	 * transaction reservation.
-	 * 3) fs_info->trans_block_rsv - this will have 1 items worth left for
+	 * 2) fs_info->trans_block_rsv - this will have 1 items worth left for
 	 * updating the inode.
 	 */
 	rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
@@ -9092,7 +9082,7 @@
 	 */
 	trans = btrfs_start_transaction(root, 2);
 	if (IS_ERR(trans)) {
-		err = PTR_ERR(trans);
+		ret = PTR_ERR(trans);
 		goto out;
 	}
 
@@ -9116,24 +9106,19 @@
 						 inode->i_size,
 						 BTRFS_EXTENT_DATA_KEY);
 		trans->block_rsv = &fs_info->trans_block_rsv;
-		if (ret != -ENOSPC && ret != -EAGAIN) {
-			if (ret < 0)
-				err = ret;
+		if (ret != -ENOSPC && ret != -EAGAIN)
 			break;
-		}
 
 		ret = btrfs_update_inode(trans, root, inode);
-		if (ret) {
-			err = ret;
+		if (ret)
 			break;
-		}
 
 		btrfs_end_transaction(trans);
 		btrfs_btree_balance_dirty(fs_info);
 
 		trans = btrfs_start_transaction(root, 2);
 		if (IS_ERR(trans)) {
-			ret = err = PTR_ERR(trans);
+			ret = PTR_ERR(trans);
 			trans = NULL;
 			break;
 		}
@@ -9166,29 +9151,23 @@
 		btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
 	}
 
-	if (ret == 0 && inode->i_nlink > 0) {
-		trans->block_rsv = root->orphan_block_rsv;
-		ret = btrfs_orphan_del(trans, BTRFS_I(inode));
-		if (ret)
-			err = ret;
-	}
-
 	if (trans) {
-		trans->block_rsv = &fs_info->trans_block_rsv;
-		ret = btrfs_update_inode(trans, root, inode);
-		if (ret && !err)
-			err = ret;
+		int ret2;
 
-		ret = btrfs_end_transaction(trans);
+		trans->block_rsv = &fs_info->trans_block_rsv;
+		ret2 = btrfs_update_inode(trans, root, inode);
+		if (ret2 && !ret)
+			ret = ret2;
+
+		ret2 = btrfs_end_transaction(trans);
+		if (ret2 && !ret)
+			ret = ret2;
 		btrfs_btree_balance_dirty(fs_info);
 	}
 out:
 	btrfs_free_block_rsv(fs_info, rsv);
 
-	if (ret && !err)
-		err = ret;
-
-	return err;
+	return ret;
 }
 
 /*
@@ -9324,13 +9303,6 @@
 	if (!root)
 		goto free;
 
-	if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-		     &BTRFS_I(inode)->runtime_flags)) {
-		btrfs_info(fs_info, "inode %llu still on the orphan list",
-			   btrfs_ino(BTRFS_I(inode)));
-		atomic_dec(&root->orphan_inodes);
-	}
-
 	while (1) {
 		ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
 		if (!ordered)
@@ -9964,6 +9936,13 @@
 	return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
 }
 
+struct btrfs_delalloc_work {
+	struct inode *inode;
+	struct completion completion;
+	struct list_head list;
+	struct btrfs_work work;
+};
+
 static void btrfs_run_delalloc_work(struct btrfs_work *work)
 {
 	struct btrfs_delalloc_work *delalloc_work;
@@ -9977,15 +9956,11 @@
 				&BTRFS_I(inode)->runtime_flags))
 		filemap_flush(inode->i_mapping);
 
-	if (delalloc_work->delay_iput)
-		btrfs_add_delayed_iput(inode);
-	else
-		iput(inode);
+	iput(inode);
 	complete(&delalloc_work->completion);
 }
 
-struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
-						    int delay_iput)
+static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode)
 {
 	struct btrfs_delalloc_work *work;
 
@@ -9996,7 +9971,6 @@
 	init_completion(&work->completion);
 	INIT_LIST_HEAD(&work->list);
 	work->inode = inode;
-	work->delay_iput = delay_iput;
 	WARN_ON_ONCE(!inode);
 	btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
 			btrfs_run_delalloc_work, NULL, NULL);
@@ -10004,18 +9978,11 @@
 	return work;
 }
 
-void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
-{
-	wait_for_completion(&work->completion);
-	kfree(work);
-}
-
 /*
  * some fairly slow code that needs optimization. This walks the list
  * of all the inodes with pending delalloc and forces them to disk.
  */
-static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
-				   int nr)
+static int start_delalloc_inodes(struct btrfs_root *root, int nr)
 {
 	struct btrfs_inode *binode;
 	struct inode *inode;
@@ -10043,12 +10010,9 @@
 		}
 		spin_unlock(&root->delalloc_lock);
 
-		work = btrfs_alloc_delalloc_work(inode, delay_iput);
+		work = btrfs_alloc_delalloc_work(inode);
 		if (!work) {
-			if (delay_iput)
-				btrfs_add_delayed_iput(inode);
-			else
-				iput(inode);
+			iput(inode);
 			ret = -ENOMEM;
 			goto out;
 		}
@@ -10066,10 +10030,11 @@
 out:
 	list_for_each_entry_safe(work, next, &works, list) {
 		list_del_init(&work->list);
-		btrfs_wait_and_free_delalloc_work(work);
+		wait_for_completion(&work->completion);
+		kfree(work);
 	}
 
-	if (!list_empty_careful(&splice)) {
+	if (!list_empty(&splice)) {
 		spin_lock(&root->delalloc_lock);
 		list_splice_tail(&splice, &root->delalloc_inodes);
 		spin_unlock(&root->delalloc_lock);
@@ -10078,7 +10043,7 @@
 	return ret;
 }
 
-int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
+int btrfs_start_delalloc_inodes(struct btrfs_root *root)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	int ret;
@@ -10086,14 +10051,13 @@
 	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
 		return -EROFS;
 
-	ret = __start_delalloc_inodes(root, delay_iput, -1);
+	ret = start_delalloc_inodes(root, -1);
 	if (ret > 0)
 		ret = 0;
 	return ret;
 }
 
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
-			       int nr)
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr)
 {
 	struct btrfs_root *root;
 	struct list_head splice;
@@ -10116,7 +10080,7 @@
 			       &fs_info->delalloc_roots);
 		spin_unlock(&fs_info->delalloc_root_lock);
 
-		ret = __start_delalloc_inodes(root, delay_iput, nr);
+		ret = start_delalloc_inodes(root, nr);
 		btrfs_put_fs_root(root);
 		if (ret < 0)
 			goto out;
@@ -10131,7 +10095,7 @@
 
 	ret = 0;
 out:
-	if (!list_empty_careful(&splice)) {
+	if (!list_empty(&splice)) {
 		spin_lock(&fs_info->delalloc_root_lock);
 		list_splice_tail(&splice, &fs_info->delalloc_roots);
 		spin_unlock(&fs_info->delalloc_root_lock);
@@ -10669,5 +10633,4 @@
 
 const struct dentry_operations btrfs_dentry_operations = {
 	.d_delete	= btrfs_dentry_delete,
-	.d_release	= btrfs_dentry_release,
 };
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 632e26d..d29992f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -93,20 +93,22 @@
 		       int no_time_update);
 
 /* Mask out flags that are inappropriate for the given type of inode. */
-static unsigned int btrfs_mask_flags(umode_t mode, unsigned int flags)
+static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
+		unsigned int flags)
 {
-	if (S_ISDIR(mode))
+	if (S_ISDIR(inode->i_mode))
 		return flags;
-	else if (S_ISREG(mode))
+	else if (S_ISREG(inode->i_mode))
 		return flags & ~FS_DIRSYNC_FL;
 	else
 		return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
 }
 
 /*
- * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
+ * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
+ * ioctl.
  */
-static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
+static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
 {
 	unsigned int iflags = 0;
 
@@ -136,20 +138,20 @@
 /*
  * Update inode->i_flags based on the btrfs internal flags.
  */
-void btrfs_update_iflags(struct inode *inode)
+void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
 {
-	struct btrfs_inode *ip = BTRFS_I(inode);
+	struct btrfs_inode *binode = BTRFS_I(inode);
 	unsigned int new_fl = 0;
 
-	if (ip->flags & BTRFS_INODE_SYNC)
+	if (binode->flags & BTRFS_INODE_SYNC)
 		new_fl |= S_SYNC;
-	if (ip->flags & BTRFS_INODE_IMMUTABLE)
+	if (binode->flags & BTRFS_INODE_IMMUTABLE)
 		new_fl |= S_IMMUTABLE;
-	if (ip->flags & BTRFS_INODE_APPEND)
+	if (binode->flags & BTRFS_INODE_APPEND)
 		new_fl |= S_APPEND;
-	if (ip->flags & BTRFS_INODE_NOATIME)
+	if (binode->flags & BTRFS_INODE_NOATIME)
 		new_fl |= S_NOATIME;
-	if (ip->flags & BTRFS_INODE_DIRSYNC)
+	if (binode->flags & BTRFS_INODE_DIRSYNC)
 		new_fl |= S_DIRSYNC;
 
 	set_mask_bits(&inode->i_flags,
@@ -159,15 +161,16 @@
 
 static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
 {
-	struct btrfs_inode *ip = BTRFS_I(file_inode(file));
-	unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
+	struct btrfs_inode *binode = BTRFS_I(file_inode(file));
+	unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags);
 
 	if (copy_to_user(arg, &flags, sizeof(flags)))
 		return -EFAULT;
 	return 0;
 }
 
-static int check_flags(unsigned int flags)
+/* Check if @flags are a supported and valid set of FS_*_FL flags */
+static int check_fsflags(unsigned int flags)
 {
 	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
 		      FS_NOATIME_FL | FS_NODUMP_FL | \
@@ -186,13 +189,13 @@
 {
 	struct inode *inode = file_inode(file);
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_inode *ip = BTRFS_I(inode);
-	struct btrfs_root *root = ip->root;
+	struct btrfs_inode *binode = BTRFS_I(inode);
+	struct btrfs_root *root = binode->root;
 	struct btrfs_trans_handle *trans;
-	unsigned int flags, oldflags;
+	unsigned int fsflags, old_fsflags;
 	int ret;
-	u64 ip_oldflags;
-	unsigned int i_oldflags;
+	u64 old_flags;
+	unsigned int old_i_flags;
 	umode_t mode;
 
 	if (!inode_owner_or_capable(inode))
@@ -201,10 +204,10 @@
 	if (btrfs_root_readonly(root))
 		return -EROFS;
 
-	if (copy_from_user(&flags, arg, sizeof(flags)))
+	if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
 		return -EFAULT;
 
-	ret = check_flags(flags);
+	ret = check_fsflags(fsflags);
 	if (ret)
 		return ret;
 
@@ -214,44 +217,44 @@
 
 	inode_lock(inode);
 
-	ip_oldflags = ip->flags;
-	i_oldflags = inode->i_flags;
+	old_flags = binode->flags;
+	old_i_flags = inode->i_flags;
 	mode = inode->i_mode;
 
-	flags = btrfs_mask_flags(inode->i_mode, flags);
-	oldflags = btrfs_flags_to_ioctl(ip->flags);
-	if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
+	fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
+	old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+	if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
 		if (!capable(CAP_LINUX_IMMUTABLE)) {
 			ret = -EPERM;
 			goto out_unlock;
 		}
 	}
 
-	if (flags & FS_SYNC_FL)
-		ip->flags |= BTRFS_INODE_SYNC;
+	if (fsflags & FS_SYNC_FL)
+		binode->flags |= BTRFS_INODE_SYNC;
 	else
-		ip->flags &= ~BTRFS_INODE_SYNC;
-	if (flags & FS_IMMUTABLE_FL)
-		ip->flags |= BTRFS_INODE_IMMUTABLE;
+		binode->flags &= ~BTRFS_INODE_SYNC;
+	if (fsflags & FS_IMMUTABLE_FL)
+		binode->flags |= BTRFS_INODE_IMMUTABLE;
 	else
-		ip->flags &= ~BTRFS_INODE_IMMUTABLE;
-	if (flags & FS_APPEND_FL)
-		ip->flags |= BTRFS_INODE_APPEND;
+		binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+	if (fsflags & FS_APPEND_FL)
+		binode->flags |= BTRFS_INODE_APPEND;
 	else
-		ip->flags &= ~BTRFS_INODE_APPEND;
-	if (flags & FS_NODUMP_FL)
-		ip->flags |= BTRFS_INODE_NODUMP;
+		binode->flags &= ~BTRFS_INODE_APPEND;
+	if (fsflags & FS_NODUMP_FL)
+		binode->flags |= BTRFS_INODE_NODUMP;
 	else
-		ip->flags &= ~BTRFS_INODE_NODUMP;
-	if (flags & FS_NOATIME_FL)
-		ip->flags |= BTRFS_INODE_NOATIME;
+		binode->flags &= ~BTRFS_INODE_NODUMP;
+	if (fsflags & FS_NOATIME_FL)
+		binode->flags |= BTRFS_INODE_NOATIME;
 	else
-		ip->flags &= ~BTRFS_INODE_NOATIME;
-	if (flags & FS_DIRSYNC_FL)
-		ip->flags |= BTRFS_INODE_DIRSYNC;
+		binode->flags &= ~BTRFS_INODE_NOATIME;
+	if (fsflags & FS_DIRSYNC_FL)
+		binode->flags |= BTRFS_INODE_DIRSYNC;
 	else
-		ip->flags &= ~BTRFS_INODE_DIRSYNC;
-	if (flags & FS_NOCOW_FL) {
+		binode->flags &= ~BTRFS_INODE_DIRSYNC;
+	if (fsflags & FS_NOCOW_FL) {
 		if (S_ISREG(mode)) {
 			/*
 			 * It's safe to turn csums off here, no extents exist.
@@ -259,10 +262,10 @@
 			 * status of the file and will not set it.
 			 */
 			if (inode->i_size == 0)
-				ip->flags |= BTRFS_INODE_NODATACOW
-					   | BTRFS_INODE_NODATASUM;
+				binode->flags |= BTRFS_INODE_NODATACOW
+					      | BTRFS_INODE_NODATASUM;
 		} else {
-			ip->flags |= BTRFS_INODE_NODATACOW;
+			binode->flags |= BTRFS_INODE_NODATACOW;
 		}
 	} else {
 		/*
@@ -270,10 +273,10 @@
 		 */
 		if (S_ISREG(mode)) {
 			if (inode->i_size == 0)
-				ip->flags &= ~(BTRFS_INODE_NODATACOW
+				binode->flags &= ~(BTRFS_INODE_NODATACOW
 				             | BTRFS_INODE_NODATASUM);
 		} else {
-			ip->flags &= ~BTRFS_INODE_NODATACOW;
+			binode->flags &= ~BTRFS_INODE_NODATACOW;
 		}
 	}
 
@@ -282,18 +285,18 @@
 	 * flag may be changed automatically if compression code won't make
 	 * things smaller.
 	 */
-	if (flags & FS_NOCOMP_FL) {
-		ip->flags &= ~BTRFS_INODE_COMPRESS;
-		ip->flags |= BTRFS_INODE_NOCOMPRESS;
+	if (fsflags & FS_NOCOMP_FL) {
+		binode->flags &= ~BTRFS_INODE_COMPRESS;
+		binode->flags |= BTRFS_INODE_NOCOMPRESS;
 
 		ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
 		if (ret && ret != -ENODATA)
 			goto out_drop;
-	} else if (flags & FS_COMPR_FL) {
+	} else if (fsflags & FS_COMPR_FL) {
 		const char *comp;
 
-		ip->flags |= BTRFS_INODE_COMPRESS;
-		ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+		binode->flags |= BTRFS_INODE_COMPRESS;
+		binode->flags &= ~BTRFS_INODE_NOCOMPRESS;
 
 		comp = btrfs_compress_type2str(fs_info->compress_type);
 		if (!comp || comp[0] == 0)
@@ -308,7 +311,7 @@
 		ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
 		if (ret && ret != -ENODATA)
 			goto out_drop;
-		ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
+		binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
 	}
 
 	trans = btrfs_start_transaction(root, 1);
@@ -317,7 +320,7 @@
 		goto out_drop;
 	}
 
-	btrfs_update_iflags(inode);
+	btrfs_sync_inode_flags_to_i_flags(inode);
 	inode_inc_iversion(inode);
 	inode->i_ctime = current_time(inode);
 	ret = btrfs_update_inode(trans, root, inode);
@@ -325,8 +328,8 @@
 	btrfs_end_transaction(trans);
  out_drop:
 	if (ret) {
-		ip->flags = ip_oldflags;
-		inode->i_flags = i_oldflags;
+		binode->flags = old_flags;
+		inode->i_flags = old_i_flags;
 	}
 
  out_unlock:
@@ -335,6 +338,148 @@
 	return ret;
 }
 
+/*
+ * Translate btrfs internal inode flags to xflags as expected by the
+ * FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are
+ * silently dropped.
+ */
+static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags)
+{
+	unsigned int xflags = 0;
+
+	if (flags & BTRFS_INODE_APPEND)
+		xflags |= FS_XFLAG_APPEND;
+	if (flags & BTRFS_INODE_IMMUTABLE)
+		xflags |= FS_XFLAG_IMMUTABLE;
+	if (flags & BTRFS_INODE_NOATIME)
+		xflags |= FS_XFLAG_NOATIME;
+	if (flags & BTRFS_INODE_NODUMP)
+		xflags |= FS_XFLAG_NODUMP;
+	if (flags & BTRFS_INODE_SYNC)
+		xflags |= FS_XFLAG_SYNC;
+
+	return xflags;
+}
+
+/* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */
+static int check_xflags(unsigned int flags)
+{
+	if (flags & ~(FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE | FS_XFLAG_NOATIME |
+		      FS_XFLAG_NODUMP | FS_XFLAG_SYNC))
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+/*
+ * Set the xflags from the internal inode flags. The remaining items of fsxattr
+ * are zeroed.
+ */
+static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg)
+{
+	struct btrfs_inode *binode = BTRFS_I(file_inode(file));
+	struct fsxattr fa;
+
+	memset(&fa, 0, sizeof(fa));
+	fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);
+
+	if (copy_to_user(arg, &fa, sizeof(fa)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
+{
+	struct inode *inode = file_inode(file);
+	struct btrfs_inode *binode = BTRFS_I(inode);
+	struct btrfs_root *root = binode->root;
+	struct btrfs_trans_handle *trans;
+	struct fsxattr fa;
+	unsigned old_flags;
+	unsigned old_i_flags;
+	int ret = 0;
+
+	if (!inode_owner_or_capable(inode))
+		return -EPERM;
+
+	if (btrfs_root_readonly(root))
+		return -EROFS;
+
+	memset(&fa, 0, sizeof(fa));
+	if (copy_from_user(&fa, arg, sizeof(fa)))
+		return -EFAULT;
+
+	ret = check_xflags(fa.fsx_xflags);
+	if (ret)
+		return ret;
+
+	if (fa.fsx_extsize != 0 || fa.fsx_projid != 0 || fa.fsx_cowextsize != 0)
+		return -EOPNOTSUPP;
+
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
+
+	inode_lock(inode);
+
+	old_flags = binode->flags;
+	old_i_flags = inode->i_flags;
+
+	/* We need the capabilities to change append-only or immutable inode */
+	if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) ||
+	     (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) &&
+	    !capable(CAP_LINUX_IMMUTABLE)) {
+		ret = -EPERM;
+		goto out_unlock;
+	}
+
+	if (fa.fsx_xflags & FS_XFLAG_SYNC)
+		binode->flags |= BTRFS_INODE_SYNC;
+	else
+		binode->flags &= ~BTRFS_INODE_SYNC;
+	if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE)
+		binode->flags |= BTRFS_INODE_IMMUTABLE;
+	else
+		binode->flags &= ~BTRFS_INODE_IMMUTABLE;
+	if (fa.fsx_xflags & FS_XFLAG_APPEND)
+		binode->flags |= BTRFS_INODE_APPEND;
+	else
+		binode->flags &= ~BTRFS_INODE_APPEND;
+	if (fa.fsx_xflags & FS_XFLAG_NODUMP)
+		binode->flags |= BTRFS_INODE_NODUMP;
+	else
+		binode->flags &= ~BTRFS_INODE_NODUMP;
+	if (fa.fsx_xflags & FS_XFLAG_NOATIME)
+		binode->flags |= BTRFS_INODE_NOATIME;
+	else
+		binode->flags &= ~BTRFS_INODE_NOATIME;
+
+	/* 1 item for the inode */
+	trans = btrfs_start_transaction(root, 1);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		goto out_unlock;
+	}
+
+	btrfs_sync_inode_flags_to_i_flags(inode);
+	inode_inc_iversion(inode);
+	inode->i_ctime = current_time(inode);
+	ret = btrfs_update_inode(trans, root, inode);
+
+	btrfs_end_transaction(trans);
+
+out_unlock:
+	if (ret) {
+		binode->flags = old_flags;
+		inode->i_flags = old_i_flags;
+	}
+
+	inode_unlock(inode);
+	mnt_drop_write_file(file);
+
+	return ret;
+}
+
 static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
 {
 	struct inode *inode = file_inode(file);
@@ -424,7 +569,6 @@
 	u64 objectid;
 	u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
 	u64 index = 0;
-	u64 qgroup_reserved;
 	uuid_le new_uuid;
 
 	root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
@@ -449,8 +593,7 @@
 	 * The same as the snapshot creation, please see the comment
 	 * of create_snapshot().
 	 */
-	ret = btrfs_subvolume_reserve_metadata(root, &block_rsv,
-					       8, &qgroup_reserved, false);
+	ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false);
 	if (ret)
 		goto fail_free;
 
@@ -573,7 +716,7 @@
 				 btrfs_ino(BTRFS_I(dir)), index, name, namelen);
 	BUG_ON(ret);
 
-	ret = btrfs_uuid_tree_add(trans, fs_info, root_item->uuid,
+	ret = btrfs_uuid_tree_add(trans, root_item->uuid,
 				  BTRFS_UUID_KEY_SUBVOL, objectid);
 	if (ret)
 		btrfs_abort_transaction(trans, ret);
@@ -640,7 +783,7 @@
 	wait_event(root->subv_writers->wait,
 		   percpu_counter_sum(&root->subv_writers->counter) == 0);
 
-	ret = btrfs_start_delalloc_inodes(root, 0);
+	ret = btrfs_start_delalloc_inodes(root);
 	if (ret)
 		goto dec_and_free;
 
@@ -658,7 +801,6 @@
 	 */
 	ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
 					&pending_snapshot->block_rsv, 8,
-					&pending_snapshot->qgroup_reserved,
 					false);
 	if (ret)
 		goto dec_and_free;
@@ -1457,7 +1599,6 @@
 		return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 	}
 
-	mutex_lock(&fs_info->volume_mutex);
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args)) {
 		ret = PTR_ERR(vol_args);
@@ -1565,7 +1706,6 @@
 out_free:
 	kfree(vol_args);
 out:
-	mutex_unlock(&fs_info->volume_mutex);
 	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 	mnt_drop_write_file(file);
 	return ret;
@@ -1832,60 +1972,6 @@
 	return ret;
 }
 
-/*
- * helper to check if the subvolume references other subvolumes
- */
-static noinline int may_destroy_subvol(struct btrfs_root *root)
-{
-	struct btrfs_fs_info *fs_info = root->fs_info;
-	struct btrfs_path *path;
-	struct btrfs_dir_item *di;
-	struct btrfs_key key;
-	u64 dir_id;
-	int ret;
-
-	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
-
-	/* Make sure this root isn't set as the default subvol */
-	dir_id = btrfs_super_root_dir(fs_info->super_copy);
-	di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
-				   dir_id, "default", 7, 0);
-	if (di && !IS_ERR(di)) {
-		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
-		if (key.objectid == root->root_key.objectid) {
-			ret = -EPERM;
-			btrfs_err(fs_info,
-				  "deleting default subvolume %llu is not allowed",
-				  key.objectid);
-			goto out;
-		}
-		btrfs_release_path(path);
-	}
-
-	key.objectid = root->root_key.objectid;
-	key.type = BTRFS_ROOT_REF_KEY;
-	key.offset = (u64)-1;
-
-	ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
-	if (ret < 0)
-		goto out;
-	BUG_ON(ret == 0);
-
-	ret = 0;
-	if (path->slots[0] > 0) {
-		path->slots[0]--;
-		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
-		if (key.objectid == root->root_key.objectid &&
-		    key.type == BTRFS_ROOT_REF_KEY)
-			ret = -ENOTEMPTY;
-	}
-out:
-	btrfs_free_path(path);
-	return ret;
-}
-
 static noinline int key_in_sk(struct btrfs_key *key,
 			      struct btrfs_ioctl_search_key *sk)
 {
@@ -2066,7 +2152,7 @@
 		root = btrfs_read_fs_root_no_name(info, &key);
 		if (IS_ERR(root)) {
 			btrfs_free_path(path);
-			return -ENOENT;
+			return PTR_ERR(root);
 		}
 	}
 
@@ -2200,8 +2286,7 @@
 	key.offset = (u64)-1;
 	root = btrfs_read_fs_root_no_name(info, &key);
 	if (IS_ERR(root)) {
-		btrfs_err(info, "could not find root %llu", tree_id);
-		ret = -ENOENT;
+		ret = PTR_ERR(root);
 		goto out;
 	}
 
@@ -2256,6 +2341,165 @@
 	return ret;
 }
 
+static int btrfs_search_path_in_tree_user(struct inode *inode,
+				struct btrfs_ioctl_ino_lookup_user_args *args)
+{
+	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+	struct super_block *sb = inode->i_sb;
+	struct btrfs_key upper_limit = BTRFS_I(inode)->location;
+	u64 treeid = BTRFS_I(inode)->root->root_key.objectid;
+	u64 dirid = args->dirid;
+	unsigned long item_off;
+	unsigned long item_len;
+	struct btrfs_inode_ref *iref;
+	struct btrfs_root_ref *rref;
+	struct btrfs_root *root;
+	struct btrfs_path *path;
+	struct btrfs_key key, key2;
+	struct extent_buffer *leaf;
+	struct inode *temp_inode;
+	char *ptr;
+	int slot;
+	int len;
+	int total_len = 0;
+	int ret;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	/*
+	 * If the bottom subvolume does not exist directly under upper_limit,
+	 * construct the path in from the bottom up.
+	 */
+	if (dirid != upper_limit.objectid) {
+		ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
+
+		key.objectid = treeid;
+		key.type = BTRFS_ROOT_ITEM_KEY;
+		key.offset = (u64)-1;
+		root = btrfs_read_fs_root_no_name(fs_info, &key);
+		if (IS_ERR(root)) {
+			ret = PTR_ERR(root);
+			goto out;
+		}
+
+		key.objectid = dirid;
+		key.type = BTRFS_INODE_REF_KEY;
+		key.offset = (u64)-1;
+		while (1) {
+			ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+			if (ret < 0) {
+				goto out;
+			} else if (ret > 0) {
+				ret = btrfs_previous_item(root, path, dirid,
+							  BTRFS_INODE_REF_KEY);
+				if (ret < 0) {
+					goto out;
+				} else if (ret > 0) {
+					ret = -ENOENT;
+					goto out;
+				}
+			}
+
+			leaf = path->nodes[0];
+			slot = path->slots[0];
+			btrfs_item_key_to_cpu(leaf, &key, slot);
+
+			iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref);
+			len = btrfs_inode_ref_name_len(leaf, iref);
+			ptr -= len + 1;
+			total_len += len + 1;
+			if (ptr < args->path) {
+				ret = -ENAMETOOLONG;
+				goto out;
+			}
+
+			*(ptr + len) = '/';
+			read_extent_buffer(leaf, ptr,
+					(unsigned long)(iref + 1), len);
+
+			/* Check the read+exec permission of this directory */
+			ret = btrfs_previous_item(root, path, dirid,
+						  BTRFS_INODE_ITEM_KEY);
+			if (ret < 0) {
+				goto out;
+			} else if (ret > 0) {
+				ret = -ENOENT;
+				goto out;
+			}
+
+			leaf = path->nodes[0];
+			slot = path->slots[0];
+			btrfs_item_key_to_cpu(leaf, &key2, slot);
+			if (key2.objectid != dirid) {
+				ret = -ENOENT;
+				goto out;
+			}
+
+			temp_inode = btrfs_iget(sb, &key2, root, NULL);
+			ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC);
+			iput(temp_inode);
+			if (ret) {
+				ret = -EACCES;
+				goto out;
+			}
+
+			if (key.offset == upper_limit.objectid)
+				break;
+			if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
+				ret = -EACCES;
+				goto out;
+			}
+
+			btrfs_release_path(path);
+			key.objectid = key.offset;
+			key.offset = (u64)-1;
+			dirid = key.objectid;
+		}
+
+		memmove(args->path, ptr, total_len);
+		args->path[total_len] = '\0';
+		btrfs_release_path(path);
+	}
+
+	/* Get the bottom subvolume's name from ROOT_REF */
+	root = fs_info->tree_root;
+	key.objectid = treeid;
+	key.type = BTRFS_ROOT_REF_KEY;
+	key.offset = args->treeid;
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		goto out;
+	} else if (ret > 0) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	leaf = path->nodes[0];
+	slot = path->slots[0];
+	btrfs_item_key_to_cpu(leaf, &key, slot);
+
+	item_off = btrfs_item_ptr_offset(leaf, slot);
+	item_len = btrfs_item_size_nr(leaf, slot);
+	/* Check if dirid in ROOT_REF corresponds to passed dirid */
+	rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
+	if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Copy subvolume's name */
+	item_off += sizeof(struct btrfs_root_ref);
+	item_len -= sizeof(struct btrfs_root_ref);
+	read_extent_buffer(leaf, args->name, item_off, item_len);
+	args->name[item_len] = 0;
+
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
 static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 					   void __user *argp)
 {
@@ -2298,6 +2542,265 @@
 	return ret;
 }
 
+/*
+ * Version of ino_lookup ioctl (unprivileged)
+ *
+ * The main differences from ino_lookup ioctl are:
+ *
+ *   1. Read + Exec permission will be checked using inode_permission() during
+ *      path construction. -EACCES will be returned in case of failure.
+ *   2. Path construction will be stopped at the inode number which corresponds
+ *      to the fd with which this ioctl is called. If constructed path does not
+ *      exist under fd's inode, -EACCES will be returned.
+ *   3. The name of bottom subvolume is also searched and filled.
+ */
+static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
+{
+	struct btrfs_ioctl_ino_lookup_user_args *args;
+	struct inode *inode;
+	int ret;
+
+	args = memdup_user(argp, sizeof(*args));
+	if (IS_ERR(args))
+		return PTR_ERR(args);
+
+	inode = file_inode(file);
+
+	if (args->dirid == BTRFS_FIRST_FREE_OBJECTID &&
+	    BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) {
+		/*
+		 * The subvolume does not exist under fd with which this is
+		 * called
+		 */
+		kfree(args);
+		return -EACCES;
+	}
+
+	ret = btrfs_search_path_in_tree_user(inode, args);
+
+	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
+		ret = -EFAULT;
+
+	kfree(args);
+	return ret;
+}
+
+/* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */
+static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
+{
+	struct btrfs_ioctl_get_subvol_info_args *subvol_info;
+	struct btrfs_fs_info *fs_info;
+	struct btrfs_root *root;
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	struct btrfs_root_item *root_item;
+	struct btrfs_root_ref *rref;
+	struct extent_buffer *leaf;
+	unsigned long item_off;
+	unsigned long item_len;
+	struct inode *inode;
+	int slot;
+	int ret = 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	subvol_info = kzalloc(sizeof(*subvol_info), GFP_KERNEL);
+	if (!subvol_info) {
+		btrfs_free_path(path);
+		return -ENOMEM;
+	}
+
+	inode = file_inode(file);
+	fs_info = BTRFS_I(inode)->root->fs_info;
+
+	/* Get root_item of inode's subvolume */
+	key.objectid = BTRFS_I(inode)->root->root_key.objectid;
+	key.type = BTRFS_ROOT_ITEM_KEY;
+	key.offset = (u64)-1;
+	root = btrfs_read_fs_root_no_name(fs_info, &key);
+	if (IS_ERR(root)) {
+		ret = PTR_ERR(root);
+		goto out;
+	}
+	root_item = &root->root_item;
+
+	subvol_info->treeid = key.objectid;
+
+	subvol_info->generation = btrfs_root_generation(root_item);
+	subvol_info->flags = btrfs_root_flags(root_item);
+
+	memcpy(subvol_info->uuid, root_item->uuid, BTRFS_UUID_SIZE);
+	memcpy(subvol_info->parent_uuid, root_item->parent_uuid,
+						    BTRFS_UUID_SIZE);
+	memcpy(subvol_info->received_uuid, root_item->received_uuid,
+						    BTRFS_UUID_SIZE);
+
+	subvol_info->ctransid = btrfs_root_ctransid(root_item);
+	subvol_info->ctime.sec = btrfs_stack_timespec_sec(&root_item->ctime);
+	subvol_info->ctime.nsec = btrfs_stack_timespec_nsec(&root_item->ctime);
+
+	subvol_info->otransid = btrfs_root_otransid(root_item);
+	subvol_info->otime.sec = btrfs_stack_timespec_sec(&root_item->otime);
+	subvol_info->otime.nsec = btrfs_stack_timespec_nsec(&root_item->otime);
+
+	subvol_info->stransid = btrfs_root_stransid(root_item);
+	subvol_info->stime.sec = btrfs_stack_timespec_sec(&root_item->stime);
+	subvol_info->stime.nsec = btrfs_stack_timespec_nsec(&root_item->stime);
+
+	subvol_info->rtransid = btrfs_root_rtransid(root_item);
+	subvol_info->rtime.sec = btrfs_stack_timespec_sec(&root_item->rtime);
+	subvol_info->rtime.nsec = btrfs_stack_timespec_nsec(&root_item->rtime);
+
+	if (key.objectid != BTRFS_FS_TREE_OBJECTID) {
+		/* Search root tree for ROOT_BACKREF of this subvolume */
+		root = fs_info->tree_root;
+
+		key.type = BTRFS_ROOT_BACKREF_KEY;
+		key.offset = 0;
+		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+		if (ret < 0) {
+			goto out;
+		} else if (path->slots[0] >=
+			   btrfs_header_nritems(path->nodes[0])) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0) {
+				goto out;
+			} else if (ret > 0) {
+				ret = -EUCLEAN;
+				goto out;
+			}
+		}
+
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (key.objectid == subvol_info->treeid &&
+		    key.type == BTRFS_ROOT_BACKREF_KEY) {
+			subvol_info->parent_id = key.offset;
+
+			rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
+			subvol_info->dirid = btrfs_root_ref_dirid(leaf, rref);
+
+			item_off = btrfs_item_ptr_offset(leaf, slot)
+					+ sizeof(struct btrfs_root_ref);
+			item_len = btrfs_item_size_nr(leaf, slot)
+					- sizeof(struct btrfs_root_ref);
+			read_extent_buffer(leaf, subvol_info->name,
+					   item_off, item_len);
+		} else {
+			ret = -ENOENT;
+			goto out;
+		}
+	}
+
+	if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
+		ret = -EFAULT;
+
+out:
+	btrfs_free_path(path);
+	kzfree(subvol_info);
+	return ret;
+}
+
+/*
+ * Return ROOT_REF information of the subvolume containing this inode
+ * except the subvolume name.
+ */
+static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
+{
+	struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
+	struct btrfs_root_ref *rref;
+	struct btrfs_root *root;
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	struct extent_buffer *leaf;
+	struct inode *inode;
+	u64 objectid;
+	int slot;
+	int ret;
+	u8 found;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	rootrefs = memdup_user(argp, sizeof(*rootrefs));
+	if (IS_ERR(rootrefs)) {
+		btrfs_free_path(path);
+		return PTR_ERR(rootrefs);
+	}
+
+	inode = file_inode(file);
+	root = BTRFS_I(inode)->root->fs_info->tree_root;
+	objectid = BTRFS_I(inode)->root->root_key.objectid;
+
+	key.objectid = objectid;
+	key.type = BTRFS_ROOT_REF_KEY;
+	key.offset = rootrefs->min_treeid;
+	found = 0;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		goto out;
+	} else if (path->slots[0] >=
+		   btrfs_header_nritems(path->nodes[0])) {
+		ret = btrfs_next_leaf(root, path);
+		if (ret < 0) {
+			goto out;
+		} else if (ret > 0) {
+			ret = -EUCLEAN;
+			goto out;
+		}
+	}
+	while (1) {
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+
+		btrfs_item_key_to_cpu(leaf, &key, slot);
+		if (key.objectid != objectid || key.type != BTRFS_ROOT_REF_KEY) {
+			ret = 0;
+			goto out;
+		}
+
+		if (found == BTRFS_MAX_ROOTREF_BUFFER_NUM) {
+			ret = -EOVERFLOW;
+			goto out;
+		}
+
+		rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref);
+		rootrefs->rootref[found].treeid = key.offset;
+		rootrefs->rootref[found].dirid =
+				  btrfs_root_ref_dirid(leaf, rref);
+		found++;
+
+		ret = btrfs_next_item(root, path);
+		if (ret < 0) {
+			goto out;
+		} else if (ret > 0) {
+			ret = -EUCLEAN;
+			goto out;
+		}
+	}
+
+out:
+	if (!ret || ret == -EOVERFLOW) {
+		rootrefs->num_items = found;
+		/* update min_treeid for next search */
+		if (found)
+			rootrefs->min_treeid =
+				rootrefs->rootref[found - 1].treeid + 1;
+		if (copy_to_user(argp, rootrefs, sizeof(*rootrefs)))
+			ret = -EFAULT;
+	}
+
+	kfree(rootrefs);
+	btrfs_free_path(path);
+
+	return ret;
+}
+
 static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 					     void __user *arg)
 {
@@ -2309,12 +2812,7 @@
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct btrfs_root *dest = NULL;
 	struct btrfs_ioctl_vol_args *vol_args;
-	struct btrfs_trans_handle *trans;
-	struct btrfs_block_rsv block_rsv;
-	u64 root_flags;
-	u64 qgroup_reserved;
 	int namelen;
-	int ret;
 	int err = 0;
 
 	if (!S_ISDIR(dir->i_mode))
@@ -2398,133 +2896,11 @@
 	}
 
 	inode_lock(inode);
-
-	/*
-	 * Don't allow to delete a subvolume with send in progress. This is
-	 * inside the i_mutex so the error handling that has to drop the bit
-	 * again is not run concurrently.
-	 */
-	spin_lock(&dest->root_item_lock);
-	root_flags = btrfs_root_flags(&dest->root_item);
-	if (dest->send_in_progress == 0) {
-		btrfs_set_root_flags(&dest->root_item,
-				root_flags | BTRFS_ROOT_SUBVOL_DEAD);
-		spin_unlock(&dest->root_item_lock);
-	} else {
-		spin_unlock(&dest->root_item_lock);
-		btrfs_warn(fs_info,
-			   "Attempt to delete subvolume %llu during send",
-			   dest->root_key.objectid);
-		err = -EPERM;
-		goto out_unlock_inode;
-	}
-
-	down_write(&fs_info->subvol_sem);
-
-	err = may_destroy_subvol(dest);
-	if (err)
-		goto out_up_write;
-
-	btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
-	/*
-	 * One for dir inode, two for dir entries, two for root
-	 * ref/backref.
-	 */
-	err = btrfs_subvolume_reserve_metadata(root, &block_rsv,
-					       5, &qgroup_reserved, true);
-	if (err)
-		goto out_up_write;
-
-	trans = btrfs_start_transaction(root, 0);
-	if (IS_ERR(trans)) {
-		err = PTR_ERR(trans);
-		goto out_release;
-	}
-	trans->block_rsv = &block_rsv;
-	trans->bytes_reserved = block_rsv.size;
-
-	btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
-
-	ret = btrfs_unlink_subvol(trans, root, dir,
-				dest->root_key.objectid,
-				dentry->d_name.name,
-				dentry->d_name.len);
-	if (ret) {
-		err = ret;
-		btrfs_abort_transaction(trans, ret);
-		goto out_end_trans;
-	}
-
-	btrfs_record_root_in_trans(trans, dest);
-
-	memset(&dest->root_item.drop_progress, 0,
-		sizeof(dest->root_item.drop_progress));
-	dest->root_item.drop_level = 0;
-	btrfs_set_root_refs(&dest->root_item, 0);
-
-	if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
-		ret = btrfs_insert_orphan_item(trans,
-					fs_info->tree_root,
-					dest->root_key.objectid);
-		if (ret) {
-			btrfs_abort_transaction(trans, ret);
-			err = ret;
-			goto out_end_trans;
-		}
-	}
-
-	ret = btrfs_uuid_tree_rem(trans, fs_info, dest->root_item.uuid,
-				  BTRFS_UUID_KEY_SUBVOL,
-				  dest->root_key.objectid);
-	if (ret && ret != -ENOENT) {
-		btrfs_abort_transaction(trans, ret);
-		err = ret;
-		goto out_end_trans;
-	}
-	if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
-		ret = btrfs_uuid_tree_rem(trans, fs_info,
-					  dest->root_item.received_uuid,
-					  BTRFS_UUID_KEY_RECEIVED_SUBVOL,
-					  dest->root_key.objectid);
-		if (ret && ret != -ENOENT) {
-			btrfs_abort_transaction(trans, ret);
-			err = ret;
-			goto out_end_trans;
-		}
-	}
-
-out_end_trans:
-	trans->block_rsv = NULL;
-	trans->bytes_reserved = 0;
-	ret = btrfs_end_transaction(trans);
-	if (ret && !err)
-		err = ret;
-	inode->i_flags |= S_DEAD;
-out_release:
-	btrfs_subvolume_release_metadata(fs_info, &block_rsv);
-out_up_write:
-	up_write(&fs_info->subvol_sem);
-	if (err) {
-		spin_lock(&dest->root_item_lock);
-		root_flags = btrfs_root_flags(&dest->root_item);
-		btrfs_set_root_flags(&dest->root_item,
-				root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
-		spin_unlock(&dest->root_item_lock);
-	}
-out_unlock_inode:
+	err = btrfs_delete_subvolume(dir, dentry);
 	inode_unlock(inode);
-	if (!err) {
-		d_invalidate(dentry);
-		btrfs_invalidate_inodes(dest);
+	if (!err)
 		d_delete(dentry);
-		ASSERT(dest->send_in_progress == 0);
 
-		/* the last ref */
-		if (dest->ino_cache_inode) {
-			iput(dest->ino_cache_inode);
-			dest->ino_cache_inode = NULL;
-		}
-	}
 out_dput:
 	dput(dentry);
 out_unlock_dir:
@@ -2613,7 +2989,6 @@
 	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
 		return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
 
-	mutex_lock(&fs_info->volume_mutex);
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args)) {
 		ret = PTR_ERR(vol_args);
@@ -2628,7 +3003,6 @@
 
 	kfree(vol_args);
 out:
-	mutex_unlock(&fs_info->volume_mutex);
 	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 	return ret;
 }
@@ -2654,8 +3028,10 @@
 	}
 
 	/* Check for compatibility reject unknown flags */
-	if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED)
-		return -EOPNOTSUPP;
+	if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
 
 	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
 		ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
@@ -2954,8 +3330,6 @@
 			put_page(pg);
 		}
 	}
-	kfree(cmp->src_pages);
-	kfree(cmp->dst_pages);
 }
 
 static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
@@ -2964,40 +3338,14 @@
 {
 	int ret;
 	int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT;
-	struct page **src_pgarr, **dst_pgarr;
 
-	/*
-	 * We must gather up all the pages before we initiate our
-	 * extent locking. We use an array for the page pointers. Size
-	 * of the array is bounded by len, which is in turn bounded by
-	 * BTRFS_MAX_DEDUPE_LEN.
-	 */
-	src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
-	dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
-	if (!src_pgarr || !dst_pgarr) {
-		kfree(src_pgarr);
-		kfree(dst_pgarr);
-		return -ENOMEM;
-	}
 	cmp->num_pages = num_pages;
-	cmp->src_pages = src_pgarr;
-	cmp->dst_pages = dst_pgarr;
 
-	/*
-	 * If deduping ranges in the same inode, locking rules make it mandatory
-	 * to always lock pages in ascending order to avoid deadlocks with
-	 * concurrent tasks (such as starting writeback/delalloc).
-	 */
-	if (src == dst && dst_loff < loff) {
-		swap(src_pgarr, dst_pgarr);
-		swap(loff, dst_loff);
-	}
-
-	ret = gather_extent_pages(src, src_pgarr, cmp->num_pages, loff);
+	ret = gather_extent_pages(src, cmp->src_pages, num_pages, loff);
 	if (ret)
 		goto out;
 
-	ret = gather_extent_pages(dst, dst_pgarr, cmp->num_pages, dst_loff);
+	ret = gather_extent_pages(dst, cmp->dst_pages, num_pages, dst_loff);
 
 out:
 	if (ret)
@@ -3067,31 +3415,23 @@
 	return 0;
 }
 
-static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
-			     struct inode *dst, u64 dst_loff)
+static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
+				   struct inode *dst, u64 dst_loff,
+				   struct cmp_pages *cmp)
 {
 	int ret;
 	u64 len = olen;
-	struct cmp_pages cmp;
 	bool same_inode = (src == dst);
 	u64 same_lock_start = 0;
 	u64 same_lock_len = 0;
 
-	if (len == 0)
-		return 0;
-
-	if (same_inode)
-		inode_lock(src);
-	else
-		btrfs_double_inode_lock(src, dst);
-
 	ret = extent_same_check_offsets(src, loff, &len, olen);
 	if (ret)
-		goto out_unlock;
+		return ret;
 
 	ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
 	if (ret)
-		goto out_unlock;
+		return ret;
 
 	if (same_inode) {
 		/*
@@ -3108,32 +3448,21 @@
 		 * allow an unaligned length so long as it ends at
 		 * i_size.
 		 */
-		if (len != olen) {
-			ret = -EINVAL;
-			goto out_unlock;
-		}
+		if (len != olen)
+			return -EINVAL;
 
 		/* Check for overlapping ranges */
-		if (dst_loff + len > loff && dst_loff < loff + len) {
-			ret = -EINVAL;
-			goto out_unlock;
-		}
+		if (dst_loff + len > loff && dst_loff < loff + len)
+			return -EINVAL;
 
 		same_lock_start = min_t(u64, loff, dst_loff);
 		same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
 	}
 
-	/* don't make the dst file partly checksummed */
-	if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
-	    (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-
 again:
-	ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
+	ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, cmp);
 	if (ret)
-		goto out_unlock;
+		return ret;
 
 	if (same_inode)
 		ret = lock_extent_range(src, same_lock_start, same_lock_len,
@@ -3154,7 +3483,7 @@
 		 * Ranges in the io trees already unlocked. Now unlock all
 		 * pages before waiting for all IO to complete.
 		 */
-		btrfs_cmp_data_free(&cmp);
+		btrfs_cmp_data_free(cmp);
 		if (same_inode) {
 			btrfs_wait_ordered_range(src, same_lock_start,
 						 same_lock_len);
@@ -3167,12 +3496,12 @@
 	ASSERT(ret == 0);
 	if (WARN_ON(ret)) {
 		/* ranges in the io trees already unlocked */
-		btrfs_cmp_data_free(&cmp);
+		btrfs_cmp_data_free(cmp);
 		return ret;
 	}
 
 	/* pass original length for comparison so we stay within i_size */
-	ret = btrfs_cmp_data(olen, &cmp);
+	ret = btrfs_cmp_data(olen, cmp);
 	if (ret == 0)
 		ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
 
@@ -3182,18 +3511,91 @@
 	else
 		btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
 
-	btrfs_cmp_data_free(&cmp);
+	btrfs_cmp_data_free(cmp);
+
+	return ret;
+}
+
+#define BTRFS_MAX_DEDUPE_LEN	SZ_16M
+
+static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
+			     struct inode *dst, u64 dst_loff)
+{
+	int ret;
+	struct cmp_pages cmp;
+	int num_pages = PAGE_ALIGN(BTRFS_MAX_DEDUPE_LEN) >> PAGE_SHIFT;
+	bool same_inode = (src == dst);
+	u64 i, tail_len, chunk_count;
+
+	if (olen == 0)
+		return 0;
+
+	if (same_inode)
+		inode_lock(src);
+	else
+		btrfs_double_inode_lock(src, dst);
+
+	/* don't make the dst file partly checksummed */
+	if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+	    (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
+	chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
+	if (chunk_count == 0)
+		num_pages = PAGE_ALIGN(tail_len) >> PAGE_SHIFT;
+
+	/*
+	 * If deduping ranges in the same inode, locking rules make it
+	 * mandatory to always lock pages in ascending order to avoid deadlocks
+	 * with concurrent tasks (such as starting writeback/delalloc).
+	 */
+	if (same_inode && dst_loff < loff)
+		swap(loff, dst_loff);
+
+	/*
+	 * We must gather up all the pages before we initiate our extent
+	 * locking. We use an array for the page pointers. Size of the array is
+	 * bounded by len, which is in turn bounded by BTRFS_MAX_DEDUPE_LEN.
+	 */
+	cmp.src_pages = kvmalloc_array(num_pages, sizeof(struct page *),
+				       GFP_KERNEL | __GFP_ZERO);
+	cmp.dst_pages = kvmalloc_array(num_pages, sizeof(struct page *),
+				       GFP_KERNEL | __GFP_ZERO);
+	if (!cmp.src_pages || !cmp.dst_pages) {
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
+	for (i = 0; i < chunk_count; i++) {
+		ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
+					      dst, dst_loff, &cmp);
+		if (ret)
+			goto out_unlock;
+
+		loff += BTRFS_MAX_DEDUPE_LEN;
+		dst_loff += BTRFS_MAX_DEDUPE_LEN;
+	}
+
+	if (tail_len > 0)
+		ret = btrfs_extent_same_range(src, loff, tail_len, dst,
+					      dst_loff, &cmp);
+
 out_unlock:
 	if (same_inode)
 		inode_unlock(src);
 	else
 		btrfs_double_inode_unlock(src, dst);
 
+out_free:
+	kvfree(cmp.src_pages);
+	kvfree(cmp.dst_pages);
+
 	return ret;
 }
 
-#define BTRFS_MAX_DEDUPE_LEN	SZ_16M
-
 ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 				struct file *dst_file, u64 dst_loff)
 {
@@ -3202,9 +3604,6 @@
 	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
 	ssize_t res;
 
-	if (olen > BTRFS_MAX_DEDUPE_LEN)
-		olen = BTRFS_MAX_DEDUPE_LEN;
-
 	if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
 		/*
 		 * Btrfs does not support blocksize < page_size. As a
@@ -3826,11 +4225,6 @@
 	    src->i_sb != inode->i_sb)
 		return -EXDEV;
 
-	/* don't make the dst file partly checksummed */
-	if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
-	    (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
-		return -EINVAL;
-
 	if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
 		return -EISDIR;
 
@@ -3840,6 +4234,13 @@
 		inode_lock(src);
 	}
 
+	/* don't make the dst file partly checksummed */
+	if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+	    (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
 	/* determine range to clone */
 	ret = -EINVAL;
 	if (off + len > src->i_size || off + len < off)
@@ -4007,8 +4408,8 @@
 	return ret;
 }
 
-void btrfs_get_block_group_info(struct list_head *groups_list,
-				struct btrfs_ioctl_space_info *space)
+static void get_block_group_info(struct list_head *groups_list,
+				 struct btrfs_ioctl_space_info *space)
 {
 	struct btrfs_block_group_cache *block_group;
 
@@ -4124,8 +4525,8 @@
 		down_read(&info->groups_sem);
 		for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
 			if (!list_empty(&info->block_groups[c])) {
-				btrfs_get_block_group_info(
-					&info->block_groups[c], &space);
+				get_block_group_info(&info->block_groups[c],
+						     &space);
 				memcpy(dest, &space, sizeof(space));
 				dest++;
 				space_args.total_spaces++;
@@ -4490,14 +4891,14 @@
 	return ret;
 }
 
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
 			       struct btrfs_ioctl_balance_args *bargs)
 {
 	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
 
 	bargs->flags = bctl->flags;
 
-	if (atomic_read(&fs_info->balance_running))
+	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags))
 		bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
 	if (atomic_read(&fs_info->balance_pause_req))
 		bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
@@ -4508,13 +4909,9 @@
 	memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
 	memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
 
-	if (lock) {
-		spin_lock(&fs_info->balance_lock);
-		memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
-		spin_unlock(&fs_info->balance_lock);
-	} else {
-		memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
-	}
+	spin_lock(&fs_info->balance_lock);
+	memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
+	spin_unlock(&fs_info->balance_lock);
 }
 
 static long btrfs_ioctl_balance(struct file *file, void __user *arg)
@@ -4535,7 +4932,6 @@
 
 again:
 	if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
-		mutex_lock(&fs_info->volume_mutex);
 		mutex_lock(&fs_info->balance_mutex);
 		need_unlock = true;
 		goto locked;
@@ -4550,21 +4946,22 @@
 	mutex_lock(&fs_info->balance_mutex);
 	if (fs_info->balance_ctl) {
 		/* this is either (2) or (3) */
-		if (!atomic_read(&fs_info->balance_running)) {
+		if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
 			mutex_unlock(&fs_info->balance_mutex);
-			if (!mutex_trylock(&fs_info->volume_mutex))
-				goto again;
+			/*
+			 * Lock released to allow other waiters to continue,
+			 * we'll reexamine the status again.
+			 */
 			mutex_lock(&fs_info->balance_mutex);
 
 			if (fs_info->balance_ctl &&
-			    !atomic_read(&fs_info->balance_running)) {
+			    !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
 				/* this is (3) */
 				need_unlock = false;
 				goto locked;
 			}
 
 			mutex_unlock(&fs_info->balance_mutex);
-			mutex_unlock(&fs_info->volume_mutex);
 			goto again;
 		} else {
 			/* this is (2) */
@@ -4617,7 +5014,6 @@
 		goto out_bargs;
 	}
 
-	bctl->fs_info = fs_info;
 	if (arg) {
 		memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
 		memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
@@ -4636,14 +5032,14 @@
 
 do_balance:
 	/*
-	 * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP
-	 * goes to to btrfs_balance.  bctl is freed in __cancel_balance,
-	 * or, if restriper was paused all the way until unmount, in
-	 * free_fs_info.  The flag is cleared in __cancel_balance.
+	 * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to
+	 * btrfs_balance.  bctl is freed in reset_balance_state, or, if
+	 * restriper was paused all the way until unmount, in free_fs_info.
+	 * The flag should be cleared after reset_balance_state.
 	 */
 	need_unlock = false;
 
-	ret = btrfs_balance(bctl, bargs);
+	ret = btrfs_balance(fs_info, bctl, bargs);
 	bctl = NULL;
 
 	if (arg) {
@@ -4657,7 +5053,6 @@
 	kfree(bargs);
 out_unlock:
 	mutex_unlock(&fs_info->balance_mutex);
-	mutex_unlock(&fs_info->volume_mutex);
 	if (need_unlock)
 		clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 out:
@@ -4701,7 +5096,7 @@
 		goto out;
 	}
 
-	update_ioctl_balance_args(fs_info, 1, bargs);
+	btrfs_update_ioctl_balance_args(fs_info, bargs);
 
 	if (copy_to_user(arg, bargs, sizeof(*bargs)))
 		ret = -EFAULT;
@@ -5038,8 +5433,7 @@
 				       BTRFS_UUID_SIZE);
 	if (received_uuid_changed &&
 	    !btrfs_is_empty_uuid(root_item->received_uuid)) {
-		ret = btrfs_uuid_tree_rem(trans, fs_info,
-					  root_item->received_uuid,
+		ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid,
 					  BTRFS_UUID_KEY_RECEIVED_SUBVOL,
 					  root->root_key.objectid);
 		if (ret && ret != -ENOENT) {
@@ -5063,7 +5457,7 @@
 		goto out;
 	}
 	if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) {
-		ret = btrfs_uuid_tree_add(trans, fs_info, sa->uuid,
+		ret = btrfs_uuid_tree_add(trans, sa->uuid,
 					  BTRFS_UUID_KEY_RECEIVED_SUBVOL,
 					  root->root_key.objectid);
 		if (ret < 0 && ret != -EEXIST) {
@@ -5497,7 +5891,7 @@
 	case BTRFS_IOC_SYNC: {
 		int ret;
 
-		ret = btrfs_start_delalloc_roots(fs_info, 0, -1);
+		ret = btrfs_start_delalloc_roots(fs_info, -1);
 		if (ret)
 			return ret;
 		ret = btrfs_sync_fs(inode->i_sb, 1);
@@ -5565,6 +5959,16 @@
 		return btrfs_ioctl_get_features(file, argp);
 	case BTRFS_IOC_SET_FEATURES:
 		return btrfs_ioctl_set_features(file, argp);
+	case FS_IOC_FSGETXATTR:
+		return btrfs_ioctl_fsgetxattr(file, argp);
+	case FS_IOC_FSSETXATTR:
+		return btrfs_ioctl_fssetxattr(file, argp);
+	case BTRFS_IOC_GET_SUBVOL_INFO:
+		return btrfs_ioctl_get_subvol_info(file, argp);
+	case BTRFS_IOC_GET_SUBVOL_ROOTREF:
+		return btrfs_ioctl_get_subvol_rootref(file, argp);
+	case BTRFS_IOC_INO_LOOKUP_USER:
+		return btrfs_ioctl_ino_lookup_user(file, argp);
 	}
 
 	return -ENOTTY;
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index e4faefa..1da768e 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -66,22 +66,16 @@
 		write_lock(&eb->lock);
 		WARN_ON(atomic_read(&eb->spinning_writers));
 		atomic_inc(&eb->spinning_writers);
-		/*
-		 * atomic_dec_and_test implies a barrier for waitqueue_active
-		 */
-		if (atomic_dec_and_test(&eb->blocking_writers) &&
-		    waitqueue_active(&eb->write_lock_wq))
-			wake_up(&eb->write_lock_wq);
+		/* atomic_dec_and_test implies a barrier */
+		if (atomic_dec_and_test(&eb->blocking_writers))
+			cond_wake_up_nomb(&eb->write_lock_wq);
 	} else if (rw == BTRFS_READ_LOCK_BLOCKING) {
 		BUG_ON(atomic_read(&eb->blocking_readers) == 0);
 		read_lock(&eb->lock);
 		atomic_inc(&eb->spinning_readers);
-		/*
-		 * atomic_dec_and_test implies a barrier for waitqueue_active
-		 */
-		if (atomic_dec_and_test(&eb->blocking_readers) &&
-		    waitqueue_active(&eb->read_lock_wq))
-			wake_up(&eb->read_lock_wq);
+		/* atomic_dec_and_test implies a barrier */
+		if (atomic_dec_and_test(&eb->blocking_readers))
+			cond_wake_up_nomb(&eb->read_lock_wq);
 	}
 }
 
@@ -221,12 +215,9 @@
 	}
 	btrfs_assert_tree_read_locked(eb);
 	WARN_ON(atomic_read(&eb->blocking_readers) == 0);
-	/*
-	 * atomic_dec_and_test implies a barrier for waitqueue_active
-	 */
-	if (atomic_dec_and_test(&eb->blocking_readers) &&
-	    waitqueue_active(&eb->read_lock_wq))
-		wake_up(&eb->read_lock_wq);
+	/* atomic_dec_and_test implies a barrier */
+	if (atomic_dec_and_test(&eb->blocking_readers))
+		cond_wake_up_nomb(&eb->read_lock_wq);
 	atomic_dec(&eb->read_locks);
 }
 
@@ -275,12 +266,9 @@
 	if (blockers) {
 		WARN_ON(atomic_read(&eb->spinning_writers));
 		atomic_dec(&eb->blocking_writers);
-		/*
-		 * Make sure counter is updated before we wake up waiters.
-		 */
+		/* Use the lighter barrier after atomic */
 		smp_mb__after_atomic();
-		if (waitqueue_active(&eb->write_lock_wq))
-			wake_up(&eb->write_lock_wq);
+		cond_wake_up_nomb(&eb->write_lock_wq);
 	} else {
 		WARN_ON(atomic_read(&eb->spinning_writers) != 1);
 		atomic_dec(&eb->spinning_writers);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 0667ea0..b6a4cc1 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -17,6 +17,43 @@
 
 #define LZO_LEN	4
 
+/*
+ * Btrfs LZO compression format
+ *
+ * Regular and inlined LZO compressed data extents consist of:
+ *
+ * 1.  Header
+ *     Fixed size. LZO_LEN (4) bytes long, LE32.
+ *     Records the total size (including the header) of compressed data.
+ *
+ * 2.  Segment(s)
+ *     Variable size. Each segment includes one segment header, followd by data
+ *     payload.
+ *     One regular LZO compressed extent can have one or more segments.
+ *     For inlined LZO compressed extent, only one segment is allowed.
+ *     One segment represents at most one page of uncompressed data.
+ *
+ * 2.1 Segment header
+ *     Fixed size. LZO_LEN (4) bytes long, LE32.
+ *     Records the total size of the segment (not including the header).
+ *     Segment header never crosses page boundary, thus it's possible to
+ *     have at most 3 padding zeros at the end of the page.
+ *
+ * 2.2 Data Payload
+ *     Variable size. Size up limit should be lzo1x_worst_compress(PAGE_SIZE)
+ *     which is 4419 for a 4KiB page.
+ *
+ * Example:
+ * Page 1:
+ *          0     0x2   0x4   0x6   0x8   0xa   0xc   0xe     0x10
+ * 0x0000   |  Header   | SegHdr 01 | Data payload 01 ...     |
+ * ...
+ * 0x0ff0   | SegHdr  N | Data payload  N     ...          |00|
+ *                                                          ^^ padding zeros
+ * Page 2:
+ * 0x1000   | SegHdr N+1| Data payload N+1 ...                |
+ */
+
 struct workspace {
 	void *mem;
 	void *buf;	/* where decompressed data goes */
@@ -258,6 +295,7 @@
 	unsigned long working_bytes;
 	size_t in_len;
 	size_t out_len;
+	const size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
 	unsigned long in_offset;
 	unsigned long in_page_bytes_left;
 	unsigned long tot_in;
@@ -271,10 +309,22 @@
 
 	data_in = kmap(pages_in[0]);
 	tot_len = read_compress_length(data_in);
+	/*
+	 * Compressed data header check.
+	 *
+	 * The real compressed size can't exceed the maximum extent length, and
+	 * all pages should be used (whole unused page with just the segment
+	 * header is not possible).  If this happens it means the compressed
+	 * extent is corrupted.
+	 */
+	if (tot_len > min_t(size_t, BTRFS_MAX_COMPRESSED, srclen) ||
+	    tot_len < srclen - PAGE_SIZE) {
+		ret = -EUCLEAN;
+		goto done;
+	}
 
 	tot_in = LZO_LEN;
 	in_offset = LZO_LEN;
-	tot_len = min_t(size_t, srclen, tot_len);
 	in_page_bytes_left = PAGE_SIZE - LZO_LEN;
 
 	tot_out = 0;
@@ -285,6 +335,17 @@
 		in_offset += LZO_LEN;
 		tot_in += LZO_LEN;
 
+		/*
+		 * Segment header check.
+		 *
+		 * The segment length must not exceed the maximum LZO
+		 * compression size, nor the total compressed size.
+		 */
+		if (in_len > max_segment_len || tot_in + in_len > tot_len) {
+			ret = -EUCLEAN;
+			goto done;
+		}
+
 		tot_in += in_len;
 		working_bytes = in_len;
 		may_late_unmap = need_unmap = false;
@@ -335,7 +396,7 @@
 			}
 		}
 
-		out_len = lzo1x_worst_compress(PAGE_SIZE);
+		out_len = max_segment_len;
 		ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
 					    &out_len);
 		if (need_unmap)
@@ -369,15 +430,24 @@
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	size_t in_len;
 	size_t out_len;
+	size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
 	int ret = 0;
 	char *kaddr;
 	unsigned long bytes;
 
-	BUG_ON(srclen < LZO_LEN);
+	if (srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2)
+		return -EUCLEAN;
 
+	in_len = read_compress_length(data_in);
+	if (in_len != srclen)
+		return -EUCLEAN;
 	data_in += LZO_LEN;
 
 	in_len = read_compress_length(data_in);
+	if (in_len != srclen - LZO_LEN * 2) {
+		ret = -EUCLEAN;
+		goto out;
+	}
 	data_in += LZO_LEN;
 
 	out_len = PAGE_SIZE;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 6db8bb2..2e1a169 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -343,11 +343,8 @@
 
 	if (entry->bytes_left == 0) {
 		ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
-		/*
-		 * Implicit memory barrier after test_and_set_bit
-		 */
-		if (waitqueue_active(&entry->wait))
-			wake_up(&entry->wait);
+		/* test_and_set_bit implies a barrier */
+		cond_wake_up_nomb(&entry->wait);
 	} else {
 		ret = 1;
 	}
@@ -410,11 +407,8 @@
 
 	if (entry->bytes_left == 0) {
 		ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
-		/*
-		 * Implicit memory barrier after test_and_set_bit
-		 */
-		if (waitqueue_active(&entry->wait))
-			wake_up(&entry->wait);
+		/* test_and_set_bit implies a barrier */
+		cond_wake_up_nomb(&entry->wait);
 	} else {
 		ret = 1;
 	}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 21a831d..a4e11cf 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -166,6 +166,25 @@
 	}
 }
 
+/*
+ * Helper to output refs and locking status of extent buffer.  Useful to debug
+ * race condition related problems.
+ */
+static void print_eb_refs_lock(struct extent_buffer *eb)
+{
+#ifdef CONFIG_BTRFS_DEBUG
+	btrfs_info(eb->fs_info,
+"refs %u lock (w:%d r:%d bw:%d br:%d sw:%d sr:%d) lock_owner %u current %u",
+		   atomic_read(&eb->refs), atomic_read(&eb->write_locks),
+		   atomic_read(&eb->read_locks),
+		   atomic_read(&eb->blocking_writers),
+		   atomic_read(&eb->blocking_readers),
+		   atomic_read(&eb->spinning_writers),
+		   atomic_read(&eb->spinning_readers),
+		   eb->lock_owner, current->pid);
+#endif
+}
+
 void btrfs_print_leaf(struct extent_buffer *l)
 {
 	struct btrfs_fs_info *fs_info;
@@ -193,6 +212,7 @@
 		   "leaf %llu gen %llu total ptrs %d free space %d owner %llu",
 		   btrfs_header_bytenr(l), btrfs_header_generation(l), nr,
 		   btrfs_leaf_free_space(fs_info, l), btrfs_header_owner(l));
+	print_eb_refs_lock(l);
 	for (i = 0 ; i < nr ; i++) {
 		item = btrfs_item_nr(i);
 		btrfs_item_key_to_cpu(l, &key, i);
@@ -347,6 +367,7 @@
 		   btrfs_header_bytenr(c), level, btrfs_header_generation(c),
 		   nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(fs_info) - nr,
 		   btrfs_header_owner(c));
+	print_eb_refs_lock(c);
 	for (i = 0; i < nr; i++) {
 		btrfs_node_key_to_cpu(c, &key, i);
 		pr_info("\tkey %d (%llu %u %llu) block %llu gen %llu\n",
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9fb758d..1874a6d 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1882,8 +1882,8 @@
 		cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
 		cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
 
-		trace_qgroup_update_counters(fs_info, qg->qgroupid,
-					     cur_old_count, cur_new_count);
+		trace_qgroup_update_counters(fs_info, qg, cur_old_count,
+					     cur_new_count);
 
 		/* Rfer update part */
 		if (cur_old_count == 0 && cur_new_count > 0) {
@@ -2014,8 +2014,8 @@
 
 	BUG_ON(!fs_info->quota_root);
 
-	trace_btrfs_qgroup_account_extent(fs_info, bytenr, num_bytes,
-					  nr_old_roots, nr_new_roots);
+	trace_btrfs_qgroup_account_extent(fs_info, trans->transid, bytenr,
+					num_bytes, nr_old_roots, nr_new_roots);
 
 	qgroups = ulist_alloc(GFP_NOFS);
 	if (!qgroups) {
@@ -2580,6 +2580,21 @@
 }
 
 /*
+ * Check if the leaf is the last leaf. Which means all node pointers
+ * are at their last position.
+ */
+static bool is_last_leaf(struct btrfs_path *path)
+{
+	int i;
+
+	for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
+		if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1)
+			return false;
+	}
+	return true;
+}
+
+/*
  * returns < 0 on error, 0 when more leafs are to be scanned.
  * returns 1 when done.
  */
@@ -2590,8 +2605,8 @@
 	struct btrfs_key found;
 	struct extent_buffer *scratch_leaf = NULL;
 	struct ulist *roots = NULL;
-	struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
 	u64 num_bytes;
+	bool done;
 	int slot;
 	int ret;
 
@@ -2620,12 +2635,12 @@
 		mutex_unlock(&fs_info->qgroup_rescan_lock);
 		return ret;
 	}
+	done = is_last_leaf(path);
 
 	btrfs_item_key_to_cpu(path->nodes[0], &found,
 			      btrfs_header_nritems(path->nodes[0]) - 1);
 	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
 
-	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
 	scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
 	if (!scratch_leaf) {
 		ret = -ENOMEM;
@@ -2664,8 +2679,9 @@
 		btrfs_tree_read_unlock_blocking(scratch_leaf);
 		free_extent_buffer(scratch_leaf);
 	}
-	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
 
+	if (done && !ret)
+		ret = 1;
 	return ret;
 }
 
@@ -2681,6 +2697,12 @@
 	path = btrfs_alloc_path();
 	if (!path)
 		goto out;
+	/*
+	 * Rescan should only search for commit root, and any later difference
+	 * should be recorded by qgroup
+	 */
+	path->search_commit_root = 1;
+	path->skip_locking = 1;
 
 	err = 0;
 	while (!err && !btrfs_fs_closing(fs_info)) {
@@ -2760,26 +2782,36 @@
 {
 	int ret = 0;
 
-	if (!init_flags &&
-	    (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
-	     !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
-		ret = -EINVAL;
-		goto err;
+	if (!init_flags) {
+		/* we're resuming qgroup rescan at mount time */
+		if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN))
+			btrfs_warn(fs_info,
+			"qgroup rescan init failed, qgroup is not enabled");
+		else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+			btrfs_warn(fs_info,
+			"qgroup rescan init failed, qgroup rescan is not queued");
+		return -EINVAL;
 	}
 
 	mutex_lock(&fs_info->qgroup_rescan_lock);
 	spin_lock(&fs_info->qgroup_lock);
 
 	if (init_flags) {
-		if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
+		if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+			btrfs_warn(fs_info,
+				   "qgroup rescan is already in progress");
 			ret = -EINPROGRESS;
-		else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+		} else if (!(fs_info->qgroup_flags &
+			     BTRFS_QGROUP_STATUS_FLAG_ON)) {
+			btrfs_warn(fs_info,
+			"qgroup rescan init failed, qgroup is not enabled");
 			ret = -EINVAL;
+		}
 
 		if (ret) {
 			spin_unlock(&fs_info->qgroup_lock);
 			mutex_unlock(&fs_info->qgroup_rescan_lock);
-			goto err;
+			return ret;
 		}
 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
 	}
@@ -2798,13 +2830,6 @@
 	btrfs_init_work(&fs_info->qgroup_rescan_work,
 			btrfs_qgroup_rescan_helper,
 			btrfs_qgroup_rescan_worker, NULL, NULL);
-
-	if (ret) {
-err:
-		btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
-		return ret;
-	}
-
 	return 0;
 }
 
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 9abd950e..5e4ad134 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -163,6 +163,12 @@
 	 * bitmap to record which horizontal stripe has data
 	 */
 	unsigned long *dbitmap;
+
+	/* allocated with real_stripes-many pointers for finish_*() calls */
+	void **finish_pointers;
+
+	/* allocated with stripe_npages-many bits for finish_*() calls */
+	unsigned long *finish_pbitmap;
 };
 
 static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
@@ -981,9 +987,14 @@
 	int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
 	void *p;
 
-	rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
-		       DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
-		       sizeof(long), GFP_NOFS);
+	rbio = kzalloc(sizeof(*rbio) +
+		       sizeof(*rbio->stripe_pages) * num_pages +
+		       sizeof(*rbio->bio_pages) * num_pages +
+		       sizeof(*rbio->finish_pointers) * real_stripes +
+		       sizeof(*rbio->dbitmap) * BITS_TO_LONGS(stripe_npages) +
+		       sizeof(*rbio->finish_pbitmap) *
+				BITS_TO_LONGS(stripe_npages),
+		       GFP_NOFS);
 	if (!rbio)
 		return ERR_PTR(-ENOMEM);
 
@@ -1005,13 +1016,20 @@
 	atomic_set(&rbio->stripes_pending, 0);
 
 	/*
-	 * the stripe_pages and bio_pages array point to the extra
+	 * the stripe_pages, bio_pages, etc arrays point to the extra
 	 * memory we allocated past the end of the rbio
 	 */
 	p = rbio + 1;
-	rbio->stripe_pages = p;
-	rbio->bio_pages = p + sizeof(struct page *) * num_pages;
-	rbio->dbitmap = p + sizeof(struct page *) * num_pages * 2;
+#define CONSUME_ALLOC(ptr, count)	do {				\
+		ptr = p;						\
+		p = (unsigned char *)p + sizeof(*(ptr)) * (count);	\
+	} while (0)
+	CONSUME_ALLOC(rbio->stripe_pages, num_pages);
+	CONSUME_ALLOC(rbio->bio_pages, num_pages);
+	CONSUME_ALLOC(rbio->finish_pointers, real_stripes);
+	CONSUME_ALLOC(rbio->dbitmap, BITS_TO_LONGS(stripe_npages));
+	CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages));
+#undef  CONSUME_ALLOC
 
 	if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
 		nr_data = real_stripes - 1;
@@ -1180,7 +1198,7 @@
 static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
 {
 	struct btrfs_bio *bbio = rbio->bbio;
-	void *pointers[rbio->real_stripes];
+	void **pointers = rbio->finish_pointers;
 	int nr_data = rbio->nr_data;
 	int stripe;
 	int pagenr;
@@ -2350,8 +2368,8 @@
 					 int need_check)
 {
 	struct btrfs_bio *bbio = rbio->bbio;
-	void *pointers[rbio->real_stripes];
-	DECLARE_BITMAP(pbitmap, rbio->stripe_npages);
+	void **pointers = rbio->finish_pointers;
+	unsigned long *pbitmap = rbio->finish_pbitmap;
 	int nr_data = rbio->nr_data;
 	int stripe;
 	int pagenr;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b041b94..879b76f 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4299,7 +4299,7 @@
 	return inode;
 }
 
-static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
+static struct reloc_control *alloc_reloc_control(void)
 {
 	struct reloc_control *rc;
 
@@ -4344,7 +4344,7 @@
 		DESCRIBE_FLAG(RAID5,    "raid5");
 		DESCRIBE_FLAG(RAID6,    "raid6");
 		if (flags)
-			snprintf(buf, buf - bp + sizeof(buf), "|0x%llx", flags);
+			snprintf(bp, buf - bp + sizeof(buf), "|0x%llx", flags);
 #undef DESCRIBE_FLAG
 	}
 
@@ -4366,7 +4366,7 @@
 	int rw = 0;
 	int err = 0;
 
-	rc = alloc_reloc_control(fs_info);
+	rc = alloc_reloc_control();
 	if (!rc)
 		return -ENOMEM;
 
@@ -4562,7 +4562,7 @@
 	if (list_empty(&reloc_roots))
 		goto out;
 
-	rc = alloc_reloc_control(fs_info);
+	rc = alloc_reloc_control();
 	if (!rc) {
 		err = -ENOMEM;
 		goto out;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 52b39a0..a590058 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3984,6 +3984,7 @@
 			spin_lock(&fs_info->unused_bgs_lock);
 			if (list_empty(&cache->bg_list)) {
 				btrfs_get_block_group(cache);
+				trace_btrfs_add_unused_block_group(cache);
 				list_add_tail(&cache->bg_list,
 					      &fs_info->unused_bgs);
 			}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c0074d2..c47f62b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -235,6 +235,7 @@
 	struct rb_node node;
 	u64 ino;
 	u64 gen;
+	u64 last_dir_index_offset;
 };
 
 struct name_cache_entry {
@@ -2844,12 +2845,6 @@
 	struct rb_node *parent = NULL;
 	struct orphan_dir_info *entry, *odi;
 
-	odi = kmalloc(sizeof(*odi), GFP_KERNEL);
-	if (!odi)
-		return ERR_PTR(-ENOMEM);
-	odi->ino = dir_ino;
-	odi->gen = 0;
-
 	while (*p) {
 		parent = *p;
 		entry = rb_entry(parent, struct orphan_dir_info, node);
@@ -2858,11 +2853,17 @@
 		} else if (dir_ino > entry->ino) {
 			p = &(*p)->rb_right;
 		} else {
-			kfree(odi);
 			return entry;
 		}
 	}
 
+	odi = kmalloc(sizeof(*odi), GFP_KERNEL);
+	if (!odi)
+		return ERR_PTR(-ENOMEM);
+	odi->ino = dir_ino;
+	odi->gen = 0;
+	odi->last_dir_index_offset = 0;
+
 	rb_link_node(&odi->node, parent, p);
 	rb_insert_color(&odi->node, &sctx->orphan_dirs);
 	return odi;
@@ -2917,6 +2918,7 @@
 	struct btrfs_key found_key;
 	struct btrfs_key loc;
 	struct btrfs_dir_item *di;
+	struct orphan_dir_info *odi = NULL;
 
 	/*
 	 * Don't try to rmdir the top/root subvolume dir.
@@ -2931,6 +2933,11 @@
 	key.objectid = dir;
 	key.type = BTRFS_DIR_INDEX_KEY;
 	key.offset = 0;
+
+	odi = get_orphan_dir_info(sctx, dir);
+	if (odi)
+		key.offset = odi->last_dir_index_offset;
+
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	if (ret < 0)
 		goto out;
@@ -2958,30 +2965,33 @@
 
 		dm = get_waiting_dir_move(sctx, loc.objectid);
 		if (dm) {
-			struct orphan_dir_info *odi;
-
 			odi = add_orphan_dir_info(sctx, dir);
 			if (IS_ERR(odi)) {
 				ret = PTR_ERR(odi);
 				goto out;
 			}
 			odi->gen = dir_gen;
+			odi->last_dir_index_offset = found_key.offset;
 			dm->rmdir_ino = dir;
 			ret = 0;
 			goto out;
 		}
 
 		if (loc.objectid > send_progress) {
-			struct orphan_dir_info *odi;
-
-			odi = get_orphan_dir_info(sctx, dir);
-			free_orphan_dir_info(sctx, odi);
+			odi = add_orphan_dir_info(sctx, dir);
+			if (IS_ERR(odi)) {
+				ret = PTR_ERR(odi);
+				goto out;
+			}
+			odi->gen = dir_gen;
+			odi->last_dir_index_offset = found_key.offset;
 			ret = 0;
 			goto out;
 		}
 
 		path->slots[0]++;
 	}
+	free_orphan_dir_info(sctx, odi);
 
 	ret = 1;
 
@@ -3259,13 +3269,16 @@
 
 	if (rmdir_ino) {
 		struct orphan_dir_info *odi;
+		u64 gen;
 
 		odi = get_orphan_dir_info(sctx, rmdir_ino);
 		if (!odi) {
 			/* already deleted */
 			goto finish;
 		}
-		ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
+		gen = odi->gen;
+
+		ret = can_rmdir(sctx, rmdir_ino, gen, sctx->cur_ino);
 		if (ret < 0)
 			goto out;
 		if (!ret)
@@ -3276,13 +3289,12 @@
 			ret = -ENOMEM;
 			goto out;
 		}
-		ret = get_cur_path(sctx, rmdir_ino, odi->gen, name);
+		ret = get_cur_path(sctx, rmdir_ino, gen, name);
 		if (ret < 0)
 			goto out;
 		ret = send_rmdir(sctx, name);
 		if (ret < 0)
 			goto out;
-		free_orphan_dir_info(sctx, odi);
 	}
 
 finish:
@@ -6454,7 +6466,7 @@
 	 */
 	if (root->send_in_progress < 0)
 		btrfs_err(root->fs_info,
-			  "send_in_progres unbalanced %d root %llu",
+			  "send_in_progress unbalanced %d root %llu",
 			  root->send_in_progress, root->root_key.objectid);
 	spin_unlock(&root->root_item_lock);
 }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0628092..81107ad 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -323,6 +323,7 @@
 	Opt_ssd, Opt_nossd,
 	Opt_ssd_spread, Opt_nossd_spread,
 	Opt_subvol,
+	Opt_subvol_empty,
 	Opt_subvolid,
 	Opt_thread_pool,
 	Opt_treelog, Opt_notreelog,
@@ -388,6 +389,7 @@
 	{Opt_ssd_spread, "ssd_spread"},
 	{Opt_nossd_spread, "nossd_spread"},
 	{Opt_subvol, "subvol=%s"},
+	{Opt_subvol_empty, "subvol="},
 	{Opt_subvolid, "subvolid=%s"},
 	{Opt_thread_pool, "thread_pool=%u"},
 	{Opt_treelog, "treelog"},
@@ -461,6 +463,7 @@
 			btrfs_set_opt(info->mount_opt, DEGRADED);
 			break;
 		case Opt_subvol:
+		case Opt_subvol_empty:
 		case Opt_subvolid:
 		case Opt_subvolrootid:
 		case Opt_device:
@@ -1782,10 +1785,8 @@
 	}
 
 	ret = btrfs_parse_options(fs_info, data, *flags);
-	if (ret) {
-		ret = -EINVAL;
+	if (ret)
 		goto restore;
-	}
 
 	btrfs_remount_begin(fs_info, old_opts, *flags);
 	btrfs_resize_thread_pool(fs_info,
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4848a43..4a4e960 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -210,12 +210,42 @@
 	NULL
 };
 
+/*
+ * Features which depend on feature bits and may differ between each fs.
+ *
+ * /sys/fs/btrfs/features lists all available features of this kernel while
+ * /sys/fs/btrfs/UUID/features shows features of the fs which are enabled or
+ * can be changed online.
+ */
 static const struct attribute_group btrfs_feature_attr_group = {
 	.name = "features",
 	.is_visible = btrfs_feature_visible,
 	.attrs = btrfs_supported_feature_attrs,
 };
 
+static ssize_t rmdir_subvol_show(struct kobject *kobj,
+				 struct kobj_attribute *ka, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "0\n");
+}
+BTRFS_ATTR(static_feature, rmdir_subvol, rmdir_subvol_show);
+
+static struct attribute *btrfs_supported_static_feature_attrs[] = {
+	BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
+	NULL
+};
+
+/*
+ * Features which only depend on kernel version.
+ *
+ * These are listed in /sys/fs/btrfs/features along with
+ * btrfs_feature_attr_group
+ */
+static const struct attribute_group btrfs_static_feature_attr_group = {
+	.name = "features",
+	.attrs = btrfs_supported_static_feature_attrs,
+};
+
 static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
 {
 	u64 val;
@@ -514,10 +544,11 @@
 }
 
 #define NUM_FEATURE_BITS 64
-static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13];
-static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS];
+#define BTRFS_FEATURE_NAME_MAX 13
+static char btrfs_unknown_feature_names[FEAT_MAX][NUM_FEATURE_BITS][BTRFS_FEATURE_NAME_MAX];
+static struct btrfs_feature_attr btrfs_feature_attrs[FEAT_MAX][NUM_FEATURE_BITS];
 
-static const u64 supported_feature_masks[3] = {
+static const u64 supported_feature_masks[FEAT_MAX] = {
 	[FEAT_COMPAT]    = BTRFS_FEATURE_COMPAT_SUPP,
 	[FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
 	[FEAT_INCOMPAT]  = BTRFS_FEATURE_INCOMPAT_SUPP,
@@ -589,7 +620,7 @@
 		return;
 	}
 
-	list_for_each_entry(fs_devs, fs_uuids, list) {
+	list_for_each_entry(fs_devs, fs_uuids, fs_list) {
 		__btrfs_sysfs_remove_fsid(fs_devs);
 	}
 }
@@ -609,7 +640,7 @@
 	btrfs_sysfs_rm_device_link(fs_info->fs_devices, NULL);
 }
 
-const char * const btrfs_feature_set_names[3] = {
+const char * const btrfs_feature_set_names[FEAT_MAX] = {
 	[FEAT_COMPAT]	 = "compat",
 	[FEAT_COMPAT_RO] = "compat_ro",
 	[FEAT_INCOMPAT]	 = "incompat",
@@ -673,7 +704,7 @@
 			if (fa->kobj_attr.attr.name)
 				continue;
 
-			snprintf(name, 13, "%s:%u",
+			snprintf(name, BTRFS_FEATURE_NAME_MAX, "%s:%u",
 				 btrfs_feature_set_names[set], i);
 
 			fa->kobj_attr.attr.name = name;
@@ -900,8 +931,15 @@
 	ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
 	if (ret)
 		goto out2;
+	ret = sysfs_merge_group(&btrfs_kset->kobj,
+				&btrfs_static_feature_attr_group);
+	if (ret)
+		goto out_remove_group;
 
 	return 0;
+
+out_remove_group:
+	sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
 out2:
 	debugfs_remove_recursive(btrfs_debugfs_root_dentry);
 out1:
@@ -912,6 +950,8 @@
 
 void __cold btrfs_exit_sysfs(void)
 {
+	sysfs_unmerge_group(&btrfs_kset->kobj,
+			    &btrfs_static_feature_attr_group);
 	sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
 	kset_unregister(btrfs_kset);
 	debugfs_remove_recursive(btrfs_debugfs_root_dentry);
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index b567560..c6ee600 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -9,7 +9,7 @@
 extern u64 btrfs_debugfs_test;
 
 enum btrfs_feature_set {
-	FEAT_COMPAT,
+	FEAT_COMPAT = 0,
 	FEAT_COMPAT_RO,
 	FEAT_INCOMPAT,
 	FEAT_MAX
@@ -77,7 +77,7 @@
 }
 
 char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
-extern const char * const btrfs_feature_set_names[3];
+extern const char * const btrfs_feature_set_names[FEAT_MAX];
 extern struct kobj_type space_info_ktype;
 extern struct kobj_type btrfs_raid_ktype;
 int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 30ed438..db72b3b 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -219,11 +219,13 @@
 	kfree(cache);
 }
 
-void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans)
+void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans,
+			    struct btrfs_fs_info *fs_info)
 {
 	memset(trans, 0, sizeof(*trans));
 	trans->transid = 1;
 	trans->type = __TRANS_DUMMY;
+	trans->fs_info = fs_info;
 }
 
 int btrfs_run_sanity_tests(void)
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index a5a0b95..70ff9f9 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -9,7 +9,8 @@
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 int btrfs_run_sanity_tests(void);
 
-#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__)
+#define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__)
+#define test_err(fmt, ...) pr_err("BTRFS: selftest: " fmt "\n", ##__VA_ARGS__)
 
 struct btrfs_root;
 struct btrfs_trans_handle;
@@ -28,7 +29,8 @@
 struct btrfs_block_group_cache *
 btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info, unsigned long length);
 void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache);
-void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans);
+void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans,
+			    struct btrfs_fs_info *fs_info);
 #else
 static inline int btrfs_run_sanity_tests(void)
 {
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
index 31e8a9e..7d72eab 100644
--- a/fs/btrfs/tests/extent-buffer-tests.c
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -26,31 +26,31 @@
 	u32 value_len = strlen(value);
 	int ret = 0;
 
-	test_msg("Running btrfs_split_item tests\n");
+	test_msg("running btrfs_split_item tests");
 
 	fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
 	if (!fs_info) {
-		test_msg("Could not allocate fs_info\n");
+		test_err("could not allocate fs_info");
 		return -ENOMEM;
 	}
 
 	root = btrfs_alloc_dummy_root(fs_info);
 	if (IS_ERR(root)) {
-		test_msg("Could not allocate root\n");
+		test_err("could not allocate root");
 		ret = PTR_ERR(root);
 		goto out;
 	}
 
 	path = btrfs_alloc_path();
 	if (!path) {
-		test_msg("Could not allocate path\n");
+		test_err("could not allocate path");
 		ret = -ENOMEM;
 		goto out;
 	}
 
 	path->nodes[0] = eb = alloc_dummy_extent_buffer(fs_info, nodesize);
 	if (!eb) {
-		test_msg("Could not allocate dummy buffer\n");
+		test_err("could not allocate dummy buffer");
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -75,7 +75,7 @@
 	 */
 	ret = btrfs_split_item(NULL, root, path, &key, 17);
 	if (ret) {
-		test_msg("Split item failed %d\n", ret);
+		test_err("split item failed %d", ret);
 		goto out;
 	}
 
@@ -86,14 +86,14 @@
 	btrfs_item_key_to_cpu(eb, &key, 0);
 	if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
 	    key.offset != 0) {
-		test_msg("Invalid key at slot 0\n");
+		test_err("invalid key at slot 0");
 		ret = -EINVAL;
 		goto out;
 	}
 
 	item = btrfs_item_nr(0);
 	if (btrfs_item_size(eb, item) != strlen(split1)) {
-		test_msg("Invalid len in the first split\n");
+		test_err("invalid len in the first split");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -101,8 +101,8 @@
 	read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
 			   strlen(split1));
 	if (memcmp(buf, split1, strlen(split1))) {
-		test_msg("Data in the buffer doesn't match what it should "
-			 "in the first split have='%.*s' want '%s'\n",
+		test_err(
+"data in the buffer doesn't match what it should in the first split have='%.*s' want '%s'",
 			 (int)strlen(split1), buf, split1);
 		ret = -EINVAL;
 		goto out;
@@ -111,14 +111,14 @@
 	btrfs_item_key_to_cpu(eb, &key, 1);
 	if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
 	    key.offset != 3) {
-		test_msg("Invalid key at slot 1\n");
+		test_err("invalid key at slot 1");
 		ret = -EINVAL;
 		goto out;
 	}
 
 	item = btrfs_item_nr(1);
 	if (btrfs_item_size(eb, item) != strlen(split2)) {
-		test_msg("Invalid len in the second split\n");
+		test_err("invalid len in the second split");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -126,8 +126,8 @@
 	read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
 			   strlen(split2));
 	if (memcmp(buf, split2, strlen(split2))) {
-		test_msg("Data in the buffer doesn't match what it should "
-			 "in the second split\n");
+		test_err(
+	"data in the buffer doesn't match what it should in the second split");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -136,21 +136,21 @@
 	/* Do it again so we test memmoving the other items in the leaf */
 	ret = btrfs_split_item(NULL, root, path, &key, 4);
 	if (ret) {
-		test_msg("Second split item failed %d\n", ret);
+		test_err("second split item failed %d", ret);
 		goto out;
 	}
 
 	btrfs_item_key_to_cpu(eb, &key, 0);
 	if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
 	    key.offset != 0) {
-		test_msg("Invalid key at slot 0\n");
+		test_err("invalid key at slot 0");
 		ret = -EINVAL;
 		goto out;
 	}
 
 	item = btrfs_item_nr(0);
 	if (btrfs_item_size(eb, item) != strlen(split3)) {
-		test_msg("Invalid len in the first split\n");
+		test_err("invalid len in the first split");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -158,8 +158,8 @@
 	read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
 			   strlen(split3));
 	if (memcmp(buf, split3, strlen(split3))) {
-		test_msg("Data in the buffer doesn't match what it should "
-			 "in the third split");
+		test_err(
+	"data in the buffer doesn't match what it should in the third split");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -167,14 +167,14 @@
 	btrfs_item_key_to_cpu(eb, &key, 1);
 	if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
 	    key.offset != 1) {
-		test_msg("Invalid key at slot 1\n");
+		test_err("invalid key at slot 1");
 		ret = -EINVAL;
 		goto out;
 	}
 
 	item = btrfs_item_nr(1);
 	if (btrfs_item_size(eb, item) != strlen(split4)) {
-		test_msg("Invalid len in the second split\n");
+		test_err("invalid len in the second split");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -182,8 +182,8 @@
 	read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
 			   strlen(split4));
 	if (memcmp(buf, split4, strlen(split4))) {
-		test_msg("Data in the buffer doesn't match what it should "
-			 "in the fourth split\n");
+		test_err(
+	"data in the buffer doesn't match what it should in the fourth split");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -191,14 +191,14 @@
 	btrfs_item_key_to_cpu(eb, &key, 2);
 	if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
 	    key.offset != 3) {
-		test_msg("Invalid key at slot 2\n");
+		test_err("invalid key at slot 2");
 		ret = -EINVAL;
 		goto out;
 	}
 
 	item = btrfs_item_nr(2);
 	if (btrfs_item_size(eb, item) != strlen(split2)) {
-		test_msg("Invalid len in the second split\n");
+		test_err("invalid len in the second split");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -206,8 +206,8 @@
 	read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 2),
 			   strlen(split2));
 	if (memcmp(buf, split2, strlen(split2))) {
-		test_msg("Data in the buffer doesn't match what it should "
-			 "in the last chunk\n");
+		test_err(
+	"data in the buffer doesn't match what it should in the last chunk");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -220,6 +220,6 @@
 
 int btrfs_test_extent_buffer_operations(u32 sectorsize, u32 nodesize)
 {
-	test_msg("Running extent buffer operation tests\n");
+	test_msg("running extent buffer operation tests");
 	return test_btrfs_split_item(sectorsize, nodesize);
 }
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 76aa5a6..d9269a5 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -46,7 +46,9 @@
 		cond_resched();
 		loops++;
 		if (loops > 100000) {
-			printk(KERN_ERR "stuck in a loop, start %Lu, end %Lu, nr_pages %lu, ret %d\n", start, end, nr_pages, ret);
+			printk(KERN_ERR
+		"stuck in a loop, start %llu, end %llu, nr_pages %lu, ret %d\n",
+				start, end, nr_pages, ret);
 			break;
 		}
 	}
@@ -66,11 +68,11 @@
 	u64 found;
 	int ret = -EINVAL;
 
-	test_msg("Running find delalloc tests\n");
+	test_msg("running find delalloc tests");
 
 	inode = btrfs_new_test_inode();
 	if (!inode) {
-		test_msg("Failed to allocate test inode\n");
+		test_err("failed to allocate test inode");
 		return -ENOMEM;
 	}
 
@@ -84,7 +86,7 @@
 	for (index = 0; index < (total_dirty >> PAGE_SHIFT); index++) {
 		page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
 		if (!page) {
-			test_msg("Failed to allocate test page\n");
+			test_err("failed to allocate test page");
 			ret = -ENOMEM;
 			goto out;
 		}
@@ -107,11 +109,11 @@
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
 					 &end, max_bytes);
 	if (!found) {
-		test_msg("Should have found at least one delalloc\n");
+		test_err("should have found at least one delalloc");
 		goto out_bits;
 	}
 	if (start != 0 || end != (sectorsize - 1)) {
-		test_msg("Expected start 0 end %u, got start %llu end %llu\n",
+		test_err("expected start 0 end %u, got start %llu end %llu",
 			sectorsize - 1, start, end);
 		goto out_bits;
 	}
@@ -129,7 +131,7 @@
 	locked_page = find_lock_page(inode->i_mapping,
 				     test_start >> PAGE_SHIFT);
 	if (!locked_page) {
-		test_msg("Couldn't find the locked page\n");
+		test_err("couldn't find the locked page");
 		goto out_bits;
 	}
 	set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, 0, NULL);
@@ -138,17 +140,17 @@
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
 					 &end, max_bytes);
 	if (!found) {
-		test_msg("Couldn't find delalloc in our range\n");
+		test_err("couldn't find delalloc in our range");
 		goto out_bits;
 	}
 	if (start != test_start || end != max_bytes - 1) {
-		test_msg("Expected start %Lu end %Lu, got start %Lu, end "
-			 "%Lu\n", test_start, max_bytes - 1, start, end);
+		test_err("expected start %llu end %llu, got start %llu, end %llu",
+				test_start, max_bytes - 1, start, end);
 		goto out_bits;
 	}
 	if (process_page_range(inode, start, end,
 			       PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
-		test_msg("There were unlocked pages in the range\n");
+		test_err("there were unlocked pages in the range");
 		goto out_bits;
 	}
 	unlock_extent(&tmp, start, end);
@@ -164,7 +166,7 @@
 	locked_page = find_lock_page(inode->i_mapping, test_start >>
 				     PAGE_SHIFT);
 	if (!locked_page) {
-		test_msg("Couldn't find the locked page\n");
+		test_err("couldn't find the locked page");
 		goto out_bits;
 	}
 	start = test_start;
@@ -172,11 +174,11 @@
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
 					 &end, max_bytes);
 	if (found) {
-		test_msg("Found range when we shouldn't have\n");
+		test_err("found range when we shouldn't have");
 		goto out_bits;
 	}
 	if (end != (u64)-1) {
-		test_msg("Did not return the proper end offset\n");
+		test_err("did not return the proper end offset");
 		goto out_bits;
 	}
 
@@ -193,17 +195,17 @@
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
 					 &end, max_bytes);
 	if (!found) {
-		test_msg("Didn't find our range\n");
+		test_err("didn't find our range");
 		goto out_bits;
 	}
 	if (start != test_start || end != total_dirty - 1) {
-		test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
+		test_err("expected start %llu end %llu, got start %llu end %llu",
 			 test_start, total_dirty - 1, start, end);
 		goto out_bits;
 	}
 	if (process_page_range(inode, start, end,
 			       PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
-		test_msg("Pages in range were not all locked\n");
+		test_err("pages in range were not all locked");
 		goto out_bits;
 	}
 	unlock_extent(&tmp, start, end);
@@ -215,7 +217,7 @@
 	page = find_get_page(inode->i_mapping,
 			     (max_bytes + SZ_1M) >> PAGE_SHIFT);
 	if (!page) {
-		test_msg("Couldn't find our page\n");
+		test_err("couldn't find our page");
 		goto out_bits;
 	}
 	ClearPageDirty(page);
@@ -234,18 +236,17 @@
 	found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
 					 &end, max_bytes);
 	if (!found) {
-		test_msg("Didn't find our range\n");
+		test_err("didn't find our range");
 		goto out_bits;
 	}
 	if (start != test_start && end != test_start + PAGE_SIZE - 1) {
-		test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
-			 test_start, test_start + PAGE_SIZE - 1, start,
-			 end);
+		test_err("expected start %llu end %llu, got start %llu end %llu",
+			 test_start, test_start + PAGE_SIZE - 1, start, end);
 		goto out_bits;
 	}
 	if (process_page_range(inode, start, end, PROCESS_TEST_LOCKED |
 			       PROCESS_UNLOCK)) {
-		test_msg("Pages in range were not all locked\n");
+		test_err("pages in range were not all locked");
 		goto out_bits;
 	}
 	ret = 0;
@@ -271,14 +272,14 @@
 		bit = !!test_bit(i, bitmap);
 		bit1 = !!extent_buffer_test_bit(eb, 0, i);
 		if (bit1 != bit) {
-			test_msg("Bits do not match\n");
+			test_err("bits do not match");
 			return -EINVAL;
 		}
 
 		bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
 						i % BITS_PER_BYTE);
 		if (bit1 != bit) {
-			test_msg("Offset bits do not match\n");
+			test_err("offset bits do not match");
 			return -EINVAL;
 		}
 	}
@@ -295,7 +296,7 @@
 	memset(bitmap, 0, len);
 	memzero_extent_buffer(eb, 0, len);
 	if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
-		test_msg("Bitmap was not zeroed\n");
+		test_err("bitmap was not zeroed");
 		return -EINVAL;
 	}
 
@@ -303,7 +304,7 @@
 	extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
 	ret = check_eb_bitmap(bitmap, eb, len);
 	if (ret) {
-		test_msg("Setting all bits failed\n");
+		test_err("setting all bits failed");
 		return ret;
 	}
 
@@ -311,7 +312,7 @@
 	extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
 	ret = check_eb_bitmap(bitmap, eb, len);
 	if (ret) {
-		test_msg("Clearing all bits failed\n");
+		test_err("clearing all bits failed");
 		return ret;
 	}
 
@@ -324,7 +325,7 @@
 					sizeof(long) * BITS_PER_BYTE);
 		ret = check_eb_bitmap(bitmap, eb, len);
 		if (ret) {
-			test_msg("Setting straddling pages failed\n");
+			test_err("setting straddling pages failed");
 			return ret;
 		}
 
@@ -337,7 +338,7 @@
 					sizeof(long) * BITS_PER_BYTE);
 		ret = check_eb_bitmap(bitmap, eb, len);
 		if (ret) {
-			test_msg("Clearing straddling pages failed\n");
+			test_err("clearing straddling pages failed");
 			return ret;
 		}
 	}
@@ -361,7 +362,7 @@
 
 	ret = check_eb_bitmap(bitmap, eb, len);
 	if (ret) {
-		test_msg("Random bit pattern failed\n");
+		test_err("random bit pattern failed");
 		return ret;
 	}
 
@@ -376,7 +377,7 @@
 	struct extent_buffer *eb;
 	int ret;
 
-	test_msg("Running extent buffer bitmap tests\n");
+	test_msg("running extent buffer bitmap tests");
 
 	/*
 	 * In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than
@@ -389,13 +390,13 @@
 
 	bitmap = kmalloc(len, GFP_KERNEL);
 	if (!bitmap) {
-		test_msg("Couldn't allocate test bitmap\n");
+		test_err("couldn't allocate test bitmap");
 		return -ENOMEM;
 	}
 
 	eb = __alloc_dummy_extent_buffer(fs_info, 0, len);
 	if (!eb) {
-		test_msg("Couldn't allocate test extent buffer\n");
+		test_err("couldn't allocate test extent buffer");
 		kfree(bitmap);
 		return -ENOMEM;
 	}
@@ -408,7 +409,7 @@
 	free_extent_buffer(eb);
 	eb = __alloc_dummy_extent_buffer(NULL, nodesize / 2, len);
 	if (!eb) {
-		test_msg("Couldn't allocate test extent buffer\n");
+		test_err("couldn't allocate test extent buffer");
 		kfree(bitmap);
 		return -ENOMEM;
 	}
@@ -424,7 +425,7 @@
 {
 	int ret;
 
-	test_msg("Running extent I/O tests\n");
+	test_msg("running extent I/O tests");
 
 	ret = test_find_delalloc(sectorsize);
 	if (ret)
@@ -432,6 +433,6 @@
 
 	ret = test_eb_bitmaps(sectorsize, nodesize);
 out:
-	test_msg("Extent I/O tests finished\n");
+	test_msg("extent I/O tests finished");
 	return ret;
 }
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index 79e0a5f..385a531 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -19,8 +19,8 @@
 
 #ifdef CONFIG_BTRFS_DEBUG
 		if (refcount_read(&em->refs) != 1) {
-			test_msg(
-"em leak: em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx) refs %d\n",
+			test_err(
+"em leak: em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx) refs %d",
 				 em->start, em->len, em->block_start,
 				 em->block_len, refcount_read(&em->refs));
 
@@ -47,7 +47,8 @@
  *                                    ->add_extent_mapping(0, 16K)
  *                                    -> #handle -EEXIST
  */
-static void test_case_1(struct extent_map_tree *em_tree)
+static void test_case_1(struct btrfs_fs_info *fs_info,
+		struct extent_map_tree *em_tree)
 {
 	struct extent_map *em;
 	u64 start = 0;
@@ -90,14 +91,14 @@
 	em->len = len;
 	em->block_start = start;
 	em->block_len = len;
-	ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len);
+	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
 	if (ret)
-		test_msg("case1 [%llu %llu]: ret %d\n", start, start + len, ret);
+		test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
 	if (em &&
 	    (em->start != 0 || extent_map_end(em) != SZ_16K ||
 	     em->block_start != 0 || em->block_len != SZ_16K))
-		test_msg(
-"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu\n",
+		test_err(
+"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
 			 start, start + len, ret, em->start, em->len,
 			 em->block_start, em->block_len);
 	free_extent_map(em);
@@ -112,7 +113,8 @@
  * Reading the inline ending up with EEXIST, ie. read an inline
  * extent and discard page cache and read it again.
  */
-static void test_case_2(struct extent_map_tree *em_tree)
+static void test_case_2(struct btrfs_fs_info *fs_info,
+		struct extent_map_tree *em_tree)
 {
 	struct extent_map *em;
 	int ret;
@@ -153,14 +155,14 @@
 	em->len = SZ_1K;
 	em->block_start = EXTENT_MAP_INLINE;
 	em->block_len = (u64)-1;
-	ret = btrfs_add_extent_mapping(em_tree, &em, em->start, em->len);
+	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
 	if (ret)
-		test_msg("case2 [0 1K]: ret %d\n", ret);
+		test_err("case2 [0 1K]: ret %d", ret);
 	if (em &&
 	    (em->start != 0 || extent_map_end(em) != SZ_1K ||
 	     em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1))
-		test_msg(
-"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu\n",
+		test_err(
+"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
 			 ret, em->start, em->len, em->block_start,
 			 em->block_len);
 	free_extent_map(em);
@@ -169,7 +171,8 @@
 	free_extent_map_tree(em_tree);
 }
 
-static void __test_case_3(struct extent_map_tree *em_tree, u64 start)
+static void __test_case_3(struct btrfs_fs_info *fs_info,
+		struct extent_map_tree *em_tree, u64 start)
 {
 	struct extent_map *em;
 	u64 len = SZ_4K;
@@ -198,9 +201,9 @@
 	em->len = SZ_16K;
 	em->block_start = 0;
 	em->block_len = SZ_16K;
-	ret = btrfs_add_extent_mapping(em_tree, &em, start, len);
+	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
 	if (ret)
-		test_msg("case3 [0x%llx 0x%llx): ret %d\n",
+		test_err("case3 [0x%llx 0x%llx): ret %d",
 			 start, start + len, ret);
 	/*
 	 * Since bytes within em are contiguous, em->block_start is identical to
@@ -209,8 +212,8 @@
 	if (em &&
 	    (start < em->start || start + len > extent_map_end(em) ||
 	     em->start != em->block_start || em->len != em->block_len))
-		test_msg(
-"case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)\n",
+		test_err(
+"case3 [0x%llx 0x%llx): ret %d em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
 			 start, start + len, ret, em->start, em->len,
 			 em->block_start, em->block_len);
 	free_extent_map(em);
@@ -235,14 +238,16 @@
  *   -> add_extent_mapping()
  *                            -> add_extent_mapping()
  */
-static void test_case_3(struct extent_map_tree *em_tree)
+static void test_case_3(struct btrfs_fs_info *fs_info,
+		struct extent_map_tree *em_tree)
 {
-	__test_case_3(em_tree, 0);
-	__test_case_3(em_tree, SZ_8K);
-	__test_case_3(em_tree, (12 * 1024ULL));
+	__test_case_3(fs_info, em_tree, 0);
+	__test_case_3(fs_info, em_tree, SZ_8K);
+	__test_case_3(fs_info, em_tree, (12 * 1024ULL));
 }
 
-static void __test_case_4(struct extent_map_tree *em_tree, u64 start)
+static void __test_case_4(struct btrfs_fs_info *fs_info,
+		struct extent_map_tree *em_tree, u64 start)
 {
 	struct extent_map *em;
 	u64 len = SZ_4K;
@@ -283,14 +288,14 @@
 	em->len = SZ_32K;
 	em->block_start = 0;
 	em->block_len = SZ_32K;
-	ret = btrfs_add_extent_mapping(em_tree, &em, start, len);
+	ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
 	if (ret)
-		test_msg("case4 [0x%llx 0x%llx): ret %d\n",
+		test_err("case4 [0x%llx 0x%llx): ret %d",
 			 start, len, ret);
 	if (em &&
 	    (start < em->start || start + len > extent_map_end(em)))
-		test_msg(
-"case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)\n",
+		test_err(
+"case4 [0x%llx 0x%llx): ret %d, added wrong em (start 0x%llx len 0x%llx block_start 0x%llx block_len 0x%llx)",
 			 start, len, ret, em->start, em->len, em->block_start,
 			 em->block_len);
 	free_extent_map(em);
@@ -324,30 +329,45 @@
  *                                             # handle -EEXIST when adding
  *                                             # [0, 32K)
  */
-static void test_case_4(struct extent_map_tree *em_tree)
+static void test_case_4(struct btrfs_fs_info *fs_info,
+		struct extent_map_tree *em_tree)
 {
-	__test_case_4(em_tree, 0);
-	__test_case_4(em_tree, SZ_4K);
+	__test_case_4(fs_info, em_tree, 0);
+	__test_case_4(fs_info, em_tree, SZ_4K);
 }
 
 int btrfs_test_extent_map(void)
 {
+	struct btrfs_fs_info *fs_info = NULL;
 	struct extent_map_tree *em_tree;
 
-	test_msg("Running extent_map tests\n");
+	test_msg("running extent_map tests");
+
+	/*
+	 * Note: the fs_info is not set up completely, we only need
+	 * fs_info::fsid for the tracepoint.
+	 */
+	fs_info = btrfs_alloc_dummy_fs_info(PAGE_SIZE, PAGE_SIZE);
+	if (!fs_info) {
+		test_msg("Couldn't allocate dummy fs info");
+		return -ENOMEM;
+	}
 
 	em_tree = kzalloc(sizeof(*em_tree), GFP_KERNEL);
 	if (!em_tree)
 		/* Skip the test on error. */
-		return 0;
+		goto out;
 
 	extent_map_tree_init(em_tree);
 
-	test_case_1(em_tree);
-	test_case_2(em_tree);
-	test_case_3(em_tree);
-	test_case_4(em_tree);
+	test_case_1(fs_info, em_tree);
+	test_case_2(fs_info, em_tree);
+	test_case_3(fs_info, em_tree);
+	test_case_4(fs_info, em_tree);
 
 	kfree(em_tree);
+out:
+	btrfs_free_dummy_fs_info(fs_info);
+
 	return 0;
 }
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index d3c9f8a..5c2f77e 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -20,63 +20,63 @@
 {
 	int ret = 0;
 
-	test_msg("Running extent only tests\n");
+	test_msg("running extent only tests");
 
 	/* First just make sure we can remove an entire entry */
 	ret = btrfs_add_free_space(cache, 0, SZ_4M);
 	if (ret) {
-		test_msg("Error adding initial extents %d\n", ret);
+		test_err("error adding initial extents %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, 0, SZ_4M);
 	if (ret) {
-		test_msg("Error removing extent %d\n", ret);
+		test_err("error removing extent %d", ret);
 		return ret;
 	}
 
 	if (test_check_exists(cache, 0, SZ_4M)) {
-		test_msg("Full remove left some lingering space\n");
+		test_err("full remove left some lingering space");
 		return -1;
 	}
 
 	/* Ok edge and middle cases now */
 	ret = btrfs_add_free_space(cache, 0, SZ_4M);
 	if (ret) {
-		test_msg("Error adding half extent %d\n", ret);
+		test_err("error adding half extent %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, 3 * SZ_1M, SZ_1M);
 	if (ret) {
-		test_msg("Error removing tail end %d\n", ret);
+		test_err("error removing tail end %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, 0, SZ_1M);
 	if (ret) {
-		test_msg("Error removing front end %d\n", ret);
+		test_err("error removing front end %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, SZ_2M, 4096);
 	if (ret) {
-		test_msg("Error removing middle piece %d\n", ret);
+		test_err("error removing middle piece %d", ret);
 		return ret;
 	}
 
 	if (test_check_exists(cache, 0, SZ_1M)) {
-		test_msg("Still have space at the front\n");
+		test_err("still have space at the front");
 		return -1;
 	}
 
 	if (test_check_exists(cache, SZ_2M, 4096)) {
-		test_msg("Still have space in the middle\n");
+		test_err("still have space in the middle");
 		return -1;
 	}
 
 	if (test_check_exists(cache, 3 * SZ_1M, SZ_1M)) {
-		test_msg("Still have space at the end\n");
+		test_err("still have space at the end");
 		return -1;
 	}
 
@@ -92,34 +92,34 @@
 	u64 next_bitmap_offset;
 	int ret;
 
-	test_msg("Running bitmap only tests\n");
+	test_msg("running bitmap only tests");
 
 	ret = test_add_free_space_entry(cache, 0, SZ_4M, 1);
 	if (ret) {
-		test_msg("Couldn't create a bitmap entry %d\n", ret);
+		test_err("couldn't create a bitmap entry %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, 0, SZ_4M);
 	if (ret) {
-		test_msg("Error removing bitmap full range %d\n", ret);
+		test_err("error removing bitmap full range %d", ret);
 		return ret;
 	}
 
 	if (test_check_exists(cache, 0, SZ_4M)) {
-		test_msg("Left some space in bitmap\n");
+		test_err("left some space in bitmap");
 		return -1;
 	}
 
 	ret = test_add_free_space_entry(cache, 0, SZ_4M, 1);
 	if (ret) {
-		test_msg("Couldn't add to our bitmap entry %d\n", ret);
+		test_err("couldn't add to our bitmap entry %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, SZ_1M, SZ_2M);
 	if (ret) {
-		test_msg("Couldn't remove middle chunk %d\n", ret);
+		test_err("couldn't remove middle chunk %d", ret);
 		return ret;
 	}
 
@@ -133,19 +133,19 @@
 	ret = test_add_free_space_entry(cache, next_bitmap_offset - SZ_2M,
 					SZ_4M, 1);
 	if (ret) {
-		test_msg("Couldn't add space that straddles two bitmaps %d\n",
+		test_err("couldn't add space that straddles two bitmaps %d",
 				ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, next_bitmap_offset - SZ_1M, SZ_2M);
 	if (ret) {
-		test_msg("Couldn't remove overlapping space %d\n", ret);
+		test_err("couldn't remove overlapping space %d", ret);
 		return ret;
 	}
 
 	if (test_check_exists(cache, next_bitmap_offset - SZ_1M, SZ_2M)) {
-		test_msg("Left some space when removing overlapping\n");
+		test_err("left some space when removing overlapping");
 		return -1;
 	}
 
@@ -161,7 +161,7 @@
 	u64 bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize);
 	int ret;
 
-	test_msg("Running bitmap and extent tests\n");
+	test_msg("running bitmap and extent tests");
 
 	/*
 	 * First let's do something simple, an extent at the same offset as the
@@ -170,42 +170,42 @@
 	 */
 	ret = test_add_free_space_entry(cache, SZ_4M, SZ_1M, 1);
 	if (ret) {
-		test_msg("Couldn't create bitmap entry %d\n", ret);
+		test_err("couldn't create bitmap entry %d", ret);
 		return ret;
 	}
 
 	ret = test_add_free_space_entry(cache, 0, SZ_1M, 0);
 	if (ret) {
-		test_msg("Couldn't add extent entry %d\n", ret);
+		test_err("couldn't add extent entry %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, 0, SZ_1M);
 	if (ret) {
-		test_msg("Couldn't remove extent entry %d\n", ret);
+		test_err("couldn't remove extent entry %d", ret);
 		return ret;
 	}
 
 	if (test_check_exists(cache, 0, SZ_1M)) {
-		test_msg("Left remnants after our remove\n");
+		test_err("left remnants after our remove");
 		return -1;
 	}
 
 	/* Now to add back the extent entry and remove from the bitmap */
 	ret = test_add_free_space_entry(cache, 0, SZ_1M, 0);
 	if (ret) {
-		test_msg("Couldn't re-add extent entry %d\n", ret);
+		test_err("couldn't re-add extent entry %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, SZ_4M, SZ_1M);
 	if (ret) {
-		test_msg("Couldn't remove from bitmap %d\n", ret);
+		test_err("couldn't remove from bitmap %d", ret);
 		return ret;
 	}
 
 	if (test_check_exists(cache, SZ_4M, SZ_1M)) {
-		test_msg("Left remnants in the bitmap\n");
+		test_err("left remnants in the bitmap");
 		return -1;
 	}
 
@@ -215,18 +215,18 @@
 	 */
 	ret = test_add_free_space_entry(cache, SZ_1M, SZ_4M, 1);
 	if (ret) {
-		test_msg("Couldn't add to a bitmap %d\n", ret);
+		test_err("couldn't add to a bitmap %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, SZ_512K, 3 * SZ_1M);
 	if (ret) {
-		test_msg("Couldn't remove overlapping space %d\n", ret);
+		test_err("couldn't remove overlapping space %d", ret);
 		return ret;
 	}
 
 	if (test_check_exists(cache, SZ_512K, 3 * SZ_1M)) {
-		test_msg("Left over pieces after removing overlapping\n");
+		test_err("left over pieces after removing overlapping");
 		return -1;
 	}
 
@@ -235,24 +235,24 @@
 	/* Now with the extent entry offset into the bitmap */
 	ret = test_add_free_space_entry(cache, SZ_4M, SZ_4M, 1);
 	if (ret) {
-		test_msg("Couldn't add space to the bitmap %d\n", ret);
+		test_err("couldn't add space to the bitmap %d", ret);
 		return ret;
 	}
 
 	ret = test_add_free_space_entry(cache, SZ_2M, SZ_2M, 0);
 	if (ret) {
-		test_msg("Couldn't add extent to the cache %d\n", ret);
+		test_err("couldn't add extent to the cache %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, 3 * SZ_1M, SZ_4M);
 	if (ret) {
-		test_msg("Problem removing overlapping space %d\n", ret);
+		test_err("problem removing overlapping space %d", ret);
 		return ret;
 	}
 
 	if (test_check_exists(cache, 3 * SZ_1M, SZ_4M)) {
-		test_msg("Left something behind when removing space");
+		test_err("left something behind when removing space");
 		return -1;
 	}
 
@@ -269,25 +269,25 @@
 	__btrfs_remove_free_space_cache(cache->free_space_ctl);
 	ret = test_add_free_space_entry(cache, bitmap_offset + SZ_4M, SZ_4M, 1);
 	if (ret) {
-		test_msg("Couldn't add bitmap %d\n", ret);
+		test_err("couldn't add bitmap %d", ret);
 		return ret;
 	}
 
 	ret = test_add_free_space_entry(cache, bitmap_offset - SZ_1M,
 					5 * SZ_1M, 0);
 	if (ret) {
-		test_msg("Couldn't add extent entry %d\n", ret);
+		test_err("couldn't add extent entry %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, bitmap_offset + SZ_1M, 5 * SZ_1M);
 	if (ret) {
-		test_msg("Failed to free our space %d\n", ret);
+		test_err("failed to free our space %d", ret);
 		return ret;
 	}
 
 	if (test_check_exists(cache, bitmap_offset + SZ_1M, 5 * SZ_1M)) {
-		test_msg("Left stuff over\n");
+		test_err("left stuff over");
 		return -1;
 	}
 
@@ -301,19 +301,19 @@
 	 */
 	ret = test_add_free_space_entry(cache, SZ_1M, SZ_2M, 1);
 	if (ret) {
-		test_msg("Couldn't add bitmap entry %d\n", ret);
+		test_err("couldn't add bitmap entry %d", ret);
 		return ret;
 	}
 
 	ret = test_add_free_space_entry(cache, 3 * SZ_1M, SZ_1M, 0);
 	if (ret) {
-		test_msg("Couldn't add extent entry %d\n", ret);
+		test_err("couldn't add extent entry %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_remove_free_space(cache, SZ_1M, 3 * SZ_1M);
 	if (ret) {
-		test_msg("Error removing bitmap and extent overlapping %d\n", ret);
+		test_err("error removing bitmap and extent overlapping %d", ret);
 		return ret;
 	}
 
@@ -335,12 +335,14 @@
 			      const int num_bitmaps)
 {
 	if (cache->free_space_ctl->free_extents != num_extents) {
-		test_msg("Incorrect # of extent entries in the cache: %d, expected %d\n",
+		test_err(
+		"incorrect # of extent entries in the cache: %d, expected %d",
 			 cache->free_space_ctl->free_extents, num_extents);
 		return -EINVAL;
 	}
 	if (cache->free_space_ctl->total_bitmaps != num_bitmaps) {
-		test_msg("Incorrect # of extent entries in the cache: %d, expected %d\n",
+		test_err(
+		"incorrect # of extent entries in the cache: %d, expected %d",
 			 cache->free_space_ctl->total_bitmaps, num_bitmaps);
 		return -EINVAL;
 	}
@@ -358,7 +360,7 @@
 	 * allocate.
 	 */
 	if (cache->free_space_ctl->free_space != 0) {
-		test_msg("Cache free space is not 0\n");
+		test_err("cache free space is not 0");
 		return -EINVAL;
 	}
 
@@ -366,7 +368,7 @@
 	offset = btrfs_find_space_for_alloc(cache, 0, 4096, 0,
 					    &max_extent_size);
 	if (offset != 0) {
-		test_msg("Space allocation did not fail, returned offset: %llu",
+		test_err("space allocation did not fail, returned offset: %llu",
 			 offset);
 		return -EINVAL;
 	}
@@ -402,7 +404,7 @@
 	};
 	const struct btrfs_free_space_op *orig_free_space_ops;
 
-	test_msg("Running space stealing from bitmap to extent\n");
+	test_msg("running space stealing from bitmap to extent");
 
 	/*
 	 * For this test, we want to ensure we end up with an extent entry
@@ -430,7 +432,7 @@
 	 */
 	ret = test_add_free_space_entry(cache, SZ_128M - SZ_256K, SZ_128K, 0);
 	if (ret) {
-		test_msg("Couldn't add extent entry %d\n", ret);
+		test_err("couldn't add extent entry %d", ret);
 		return ret;
 	}
 
@@ -438,7 +440,7 @@
 	ret = test_add_free_space_entry(cache, SZ_128M + SZ_512K,
 					SZ_128M - SZ_512K, 1);
 	if (ret) {
-		test_msg("Couldn't add bitmap entry %d\n", ret);
+		test_err("couldn't add bitmap entry %d", ret);
 		return ret;
 	}
 
@@ -457,17 +459,17 @@
 				      SZ_128M + 768 * SZ_1K,
 				      SZ_128M - 768 * SZ_1K);
 	if (ret) {
-		test_msg("Failed to free part of bitmap space %d\n", ret);
+		test_err("failed to free part of bitmap space %d", ret);
 		return ret;
 	}
 
 	/* Confirm that only those 2 ranges are marked as free. */
 	if (!test_check_exists(cache, SZ_128M - SZ_256K, SZ_128K)) {
-		test_msg("Free space range missing\n");
+		test_err("free space range missing");
 		return -ENOENT;
 	}
 	if (!test_check_exists(cache, SZ_128M + SZ_512K, SZ_256K)) {
-		test_msg("Free space range missing\n");
+		test_err("free space range missing");
 		return -ENOENT;
 	}
 
@@ -477,7 +479,7 @@
 	 */
 	if (test_check_exists(cache, SZ_128M + 768 * SZ_1K,
 			      SZ_128M - 768 * SZ_1K)) {
-		test_msg("Bitmap region not removed from space cache\n");
+		test_err("bitmap region not removed from space cache");
 		return -EINVAL;
 	}
 
@@ -486,7 +488,7 @@
 	 * covered by the bitmap, isn't marked as free.
 	 */
 	if (test_check_exists(cache, SZ_128M + SZ_256K, SZ_256K)) {
-		test_msg("Invalid bitmap region marked as free\n");
+		test_err("invalid bitmap region marked as free");
 		return -EINVAL;
 	}
 
@@ -495,7 +497,7 @@
 	 * by the bitmap too, isn't marked as free either.
 	 */
 	if (test_check_exists(cache, SZ_128M, SZ_256K)) {
-		test_msg("Invalid bitmap region marked as free\n");
+		test_err("invalid bitmap region marked as free");
 		return -EINVAL;
 	}
 
@@ -506,12 +508,12 @@
 	 */
 	ret = btrfs_add_free_space(cache, SZ_128M, SZ_512K);
 	if (ret) {
-		test_msg("Error adding free space: %d\n", ret);
+		test_err("error adding free space: %d", ret);
 		return ret;
 	}
 	/* Confirm the region is marked as free. */
 	if (!test_check_exists(cache, SZ_128M, SZ_512K)) {
-		test_msg("Bitmap region not marked as free\n");
+		test_err("bitmap region not marked as free");
 		return -ENOENT;
 	}
 
@@ -531,7 +533,7 @@
 	 */
 	ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, sectorsize);
 	if (ret) {
-		test_msg("Error adding free space: %d\n", ret);
+		test_err("error adding free space: %d", ret);
 		return ret;
 	}
 
@@ -550,12 +552,12 @@
 	 */
 	ret = btrfs_add_free_space(cache, SZ_128M - SZ_128K, SZ_128K);
 	if (ret) {
-		test_msg("Error adding free space: %d\n", ret);
+		test_err("error adding free space: %d", ret);
 		return ret;
 	}
 	/* Confirm the region is marked as free. */
 	if (!test_check_exists(cache, SZ_128M - SZ_128K, SZ_128K)) {
-		test_msg("Extent region not marked as free\n");
+		test_err("extent region not marked as free");
 		return -ENOENT;
 	}
 
@@ -583,12 +585,12 @@
 	 * allocate the whole free space at once.
 	 */
 	if (!test_check_exists(cache, SZ_128M - SZ_256K, SZ_1M)) {
-		test_msg("Expected region not marked as free\n");
+		test_err("expected region not marked as free");
 		return -ENOENT;
 	}
 
 	if (cache->free_space_ctl->free_space != (SZ_1M + sectorsize)) {
-		test_msg("Cache free space is not 1Mb + %u\n", sectorsize);
+		test_err("cache free space is not 1Mb + %u", sectorsize);
 		return -EINVAL;
 	}
 
@@ -596,7 +598,8 @@
 					    0, SZ_1M, 0,
 					    &max_extent_size);
 	if (offset != (SZ_128M - SZ_256K)) {
-		test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n",
+		test_err(
+	"failed to allocate 1Mb from space cache, returned offset is: %llu",
 			 offset);
 		return -EINVAL;
 	}
@@ -610,7 +613,7 @@
 		return ret;
 
 	if (cache->free_space_ctl->free_space != sectorsize) {
-		test_msg("Cache free space is not %u\n", sectorsize);
+		test_err("cache free space is not %u", sectorsize);
 		return -EINVAL;
 	}
 
@@ -618,7 +621,7 @@
 					    0, sectorsize, 0,
 					    &max_extent_size);
 	if (offset != (SZ_128M + SZ_16M)) {
-		test_msg("Failed to allocate %u, returned offset : %llu\n",
+		test_err("failed to allocate %u, returned offset : %llu",
 			 sectorsize, offset);
 		return -EINVAL;
 	}
@@ -640,14 +643,14 @@
 	 */
 	ret = test_add_free_space_entry(cache, SZ_128M + SZ_128K, SZ_128K, 0);
 	if (ret) {
-		test_msg("Couldn't add extent entry %d\n", ret);
+		test_err("couldn't add extent entry %d", ret);
 		return ret;
 	}
 
 	/* Bitmap entry covering free space range [0, 128Mb - 512Kb[ */
 	ret = test_add_free_space_entry(cache, 0, SZ_128M - SZ_512K, 1);
 	if (ret) {
-		test_msg("Couldn't add bitmap entry %d\n", ret);
+		test_err("couldn't add bitmap entry %d", ret);
 		return ret;
 	}
 
@@ -664,17 +667,17 @@
 	 */
 	ret = btrfs_remove_free_space(cache, 0, SZ_128M - 768 * SZ_1K);
 	if (ret) {
-		test_msg("Failed to free part of bitmap space %d\n", ret);
+		test_err("failed to free part of bitmap space %d", ret);
 		return ret;
 	}
 
 	/* Confirm that only those 2 ranges are marked as free. */
 	if (!test_check_exists(cache, SZ_128M + SZ_128K, SZ_128K)) {
-		test_msg("Free space range missing\n");
+		test_err("free space range missing");
 		return -ENOENT;
 	}
 	if (!test_check_exists(cache, SZ_128M - 768 * SZ_1K, SZ_256K)) {
-		test_msg("Free space range missing\n");
+		test_err("free space range missing");
 		return -ENOENT;
 	}
 
@@ -683,7 +686,7 @@
 	 * as free anymore.
 	 */
 	if (test_check_exists(cache, 0, SZ_128M - 768 * SZ_1K)) {
-		test_msg("Bitmap region not removed from space cache\n");
+		test_err("bitmap region not removed from space cache");
 		return -EINVAL;
 	}
 
@@ -692,7 +695,7 @@
 	 * covered by the bitmap, isn't marked as free.
 	 */
 	if (test_check_exists(cache, SZ_128M - SZ_512K, SZ_512K)) {
-		test_msg("Invalid bitmap region marked as free\n");
+		test_err("invalid bitmap region marked as free");
 		return -EINVAL;
 	}
 
@@ -703,12 +706,12 @@
 	 */
 	ret = btrfs_add_free_space(cache, SZ_128M - SZ_512K, SZ_512K);
 	if (ret) {
-		test_msg("Error adding free space: %d\n", ret);
+		test_err("error adding free space: %d", ret);
 		return ret;
 	}
 	/* Confirm the region is marked as free. */
 	if (!test_check_exists(cache, SZ_128M - SZ_512K, SZ_512K)) {
-		test_msg("Bitmap region not marked as free\n");
+		test_err("bitmap region not marked as free");
 		return -ENOENT;
 	}
 
@@ -728,7 +731,7 @@
 	 */
 	ret = btrfs_add_free_space(cache, SZ_32M, 2 * sectorsize);
 	if (ret) {
-		test_msg("Error adding free space: %d\n", ret);
+		test_err("error adding free space: %d", ret);
 		return ret;
 	}
 
@@ -739,12 +742,12 @@
 	 */
 	ret = btrfs_add_free_space(cache, SZ_128M, SZ_128K);
 	if (ret) {
-		test_msg("Error adding free space: %d\n", ret);
+		test_err("error adding free space: %d", ret);
 		return ret;
 	}
 	/* Confirm the region is marked as free. */
 	if (!test_check_exists(cache, SZ_128M, SZ_128K)) {
-		test_msg("Extent region not marked as free\n");
+		test_err("extent region not marked as free");
 		return -ENOENT;
 	}
 
@@ -772,19 +775,20 @@
 	 * allocate the whole free space at once.
 	 */
 	if (!test_check_exists(cache, SZ_128M - 768 * SZ_1K, SZ_1M)) {
-		test_msg("Expected region not marked as free\n");
+		test_err("expected region not marked as free");
 		return -ENOENT;
 	}
 
 	if (cache->free_space_ctl->free_space != (SZ_1M + 2 * sectorsize)) {
-		test_msg("Cache free space is not 1Mb + %u\n", 2 * sectorsize);
+		test_err("cache free space is not 1Mb + %u", 2 * sectorsize);
 		return -EINVAL;
 	}
 
 	offset = btrfs_find_space_for_alloc(cache, 0, SZ_1M, 0,
 					    &max_extent_size);
 	if (offset != (SZ_128M - 768 * SZ_1K)) {
-		test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n",
+		test_err(
+	"failed to allocate 1Mb from space cache, returned offset is: %llu",
 			 offset);
 		return -EINVAL;
 	}
@@ -798,7 +802,7 @@
 		return ret;
 
 	if (cache->free_space_ctl->free_space != 2 * sectorsize) {
-		test_msg("Cache free space is not %u\n", 2 * sectorsize);
+		test_err("cache free space is not %u", 2 * sectorsize);
 		return -EINVAL;
 	}
 
@@ -806,9 +810,8 @@
 					    0, 2 * sectorsize, 0,
 					    &max_extent_size);
 	if (offset != SZ_32M) {
-		test_msg("Failed to allocate %u, offset: %llu\n",
-			 2 * sectorsize,
-			 offset);
+		test_err("failed to allocate %u, offset: %llu",
+			 2 * sectorsize, offset);
 		return -EINVAL;
 	}
 
@@ -829,7 +832,7 @@
 	struct btrfs_root *root = NULL;
 	int ret = -ENOMEM;
 
-	test_msg("Running btrfs free space cache tests\n");
+	test_msg("running btrfs free space cache tests");
 	fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
 	if (!fs_info)
 		return -ENOMEM;
@@ -843,7 +846,7 @@
 	cache = btrfs_alloc_dummy_block_group(fs_info,
 				      BITS_PER_BITMAP * sectorsize + PAGE_SIZE);
 	if (!cache) {
-		test_msg("Couldn't run the tests\n");
+		test_err("couldn't run the tests");
 		btrfs_free_dummy_fs_info(fs_info);
 		return 0;
 	}
@@ -871,6 +874,6 @@
 	btrfs_free_dummy_block_group(cache);
 	btrfs_free_dummy_root(root);
 	btrfs_free_dummy_fs_info(fs_info);
-	test_msg("Free space cache tests finished\n");
+	test_msg("free space cache tests finished");
 	return ret;
 }
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index e1f9666..89346da 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -32,7 +32,7 @@
 
 	info = search_free_space_info(trans, fs_info, cache, path, 0);
 	if (IS_ERR(info)) {
-		test_msg("Could not find free space info\n");
+		test_err("could not find free space info");
 		ret = PTR_ERR(info);
 		goto out;
 	}
@@ -40,7 +40,7 @@
 	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
 
 	if (extent_count != num_extents) {
-		test_msg("Extent count is wrong\n");
+		test_err("extent count is wrong");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -99,7 +99,7 @@
 	btrfs_release_path(path);
 	return ret;
 invalid:
-	test_msg("Free space tree is invalid\n");
+	test_err("free space tree is invalid");
 	ret = -EINVAL;
 	goto out;
 }
@@ -117,7 +117,7 @@
 
 	info = search_free_space_info(trans, fs_info, cache, path, 0);
 	if (IS_ERR(info)) {
-		test_msg("Could not find free space info\n");
+		test_err("could not find free space info");
 		btrfs_release_path(path);
 		return PTR_ERR(info);
 	}
@@ -131,15 +131,15 @@
 
 	/* Flip it to the other format and check that for good measure. */
 	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
-		ret = convert_free_space_to_extents(trans, fs_info, cache, path);
+		ret = convert_free_space_to_extents(trans, cache, path);
 		if (ret) {
-			test_msg("Could not convert to extents\n");
+			test_err("could not convert to extents");
 			return ret;
 		}
 	} else {
-		ret = convert_free_space_to_bitmaps(trans, fs_info, cache, path);
+		ret = convert_free_space_to_bitmaps(trans, cache, path);
 		if (ret) {
-			test_msg("Could not convert to bitmaps\n");
+			test_err("could not convert to bitmaps");
 			return ret;
 		}
 	}
@@ -170,11 +170,11 @@
 	const struct free_space_extent extents[] = {};
 	int ret;
 
-	ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+	ret = __remove_from_free_space_tree(trans, cache, path,
 					    cache->key.objectid,
 					    cache->key.offset);
 	if (ret) {
-		test_msg("Could not remove free space\n");
+		test_err("could not remove free space");
 		return ret;
 	}
 
@@ -194,10 +194,10 @@
 	};
 	int ret;
 
-	ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+	ret = __remove_from_free_space_tree(trans, cache, path,
 					    cache->key.objectid, alignment);
 	if (ret) {
-		test_msg("Could not remove free space\n");
+		test_err("could not remove free space");
 		return ret;
 	}
 
@@ -217,12 +217,12 @@
 	};
 	int ret;
 
-	ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+	ret = __remove_from_free_space_tree(trans, cache, path,
 					    cache->key.objectid +
 					    cache->key.offset - alignment,
 					    alignment);
 	if (ret) {
-		test_msg("Could not remove free space\n");
+		test_err("could not remove free space");
 		return ret;
 	}
 
@@ -243,11 +243,11 @@
 	};
 	int ret;
 
-	ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+	ret = __remove_from_free_space_tree(trans, cache, path,
 					    cache->key.objectid + alignment,
 					    alignment);
 	if (ret) {
-		test_msg("Could not remove free space\n");
+		test_err("could not remove free space");
 		return ret;
 	}
 
@@ -266,26 +266,26 @@
 	};
 	int ret;
 
-	ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+	ret = __remove_from_free_space_tree(trans, cache, path,
 					    cache->key.objectid,
 					    cache->key.offset);
 	if (ret) {
-		test_msg("Could not remove free space\n");
+		test_err("could not remove free space");
 		return ret;
 	}
 
-	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid, alignment);
+	ret = __add_to_free_space_tree(trans, cache, path, cache->key.objectid,
+				       alignment);
 	if (ret) {
-		test_msg("Could not add free space\n");
+		test_err("could not add free space");
 		return ret;
 	}
 
-	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+	ret = __add_to_free_space_tree(trans, cache, path,
 				       cache->key.objectid + alignment,
 				       alignment);
 	if (ret) {
-		test_msg("Could not add free space\n");
+		test_err("could not add free space");
 		return ret;
 	}
 
@@ -304,27 +304,27 @@
 	};
 	int ret;
 
-	ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+	ret = __remove_from_free_space_tree(trans, cache, path,
 					    cache->key.objectid,
 					    cache->key.offset);
 	if (ret) {
-		test_msg("Could not remove free space\n");
+		test_err("could not remove free space");
 		return ret;
 	}
 
-	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+	ret = __add_to_free_space_tree(trans, cache, path,
 				       cache->key.objectid + 2 * alignment,
 				       alignment);
 	if (ret) {
-		test_msg("Could not add free space\n");
+		test_err("could not add free space");
 		return ret;
 	}
 
-	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+	ret = __add_to_free_space_tree(trans, cache, path,
 				       cache->key.objectid + alignment,
 				       alignment);
 	if (ret) {
-		test_msg("Could not add free space\n");
+		test_err("could not add free space");
 		return ret;
 	}
 
@@ -343,34 +343,34 @@
 	};
 	int ret;
 
-	ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+	ret = __remove_from_free_space_tree(trans, cache, path,
 					    cache->key.objectid,
 					    cache->key.offset);
 	if (ret) {
-		test_msg("Could not remove free space\n");
+		test_err("could not remove free space");
 		return ret;
 	}
 
-	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid, alignment);
+	ret = __add_to_free_space_tree(trans, cache, path, cache->key.objectid,
+				       alignment);
 	if (ret) {
-		test_msg("Could not add free space\n");
+		test_err("could not add free space");
 		return ret;
 	}
 
-	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+	ret = __add_to_free_space_tree(trans, cache, path,
 				       cache->key.objectid + 2 * alignment,
 				       alignment);
 	if (ret) {
-		test_msg("Could not add free space\n");
+		test_err("could not add free space");
 		return ret;
 	}
 
-	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+	ret = __add_to_free_space_tree(trans, cache, path,
 				       cache->key.objectid + alignment,
 				       alignment);
 	if (ret) {
-		test_msg("Could not add free space\n");
+		test_err("could not add free space");
 		return ret;
 	}
 
@@ -391,34 +391,34 @@
 	};
 	int ret;
 
-	ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+	ret = __remove_from_free_space_tree(trans, cache, path,
 					    cache->key.objectid,
 					    cache->key.offset);
 	if (ret) {
-		test_msg("Could not remove free space\n");
+		test_err("could not remove free space");
 		return ret;
 	}
 
-	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-				       cache->key.objectid, alignment);
+	ret = __add_to_free_space_tree(trans, cache, path, cache->key.objectid,
+				       alignment);
 	if (ret) {
-		test_msg("Could not add free space\n");
+		test_err("could not add free space");
 		return ret;
 	}
 
-	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+	ret = __add_to_free_space_tree(trans, cache, path,
 				       cache->key.objectid + 4 * alignment,
 				       alignment);
 	if (ret) {
-		test_msg("Could not add free space\n");
+		test_err("could not add free space");
 		return ret;
 	}
 
-	ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+	ret = __add_to_free_space_tree(trans, cache, path,
 				       cache->key.objectid + 2 * alignment,
 				       alignment);
 	if (ret) {
-		test_msg("Could not add free space\n");
+		test_err("could not add free space");
 		return ret;
 	}
 
@@ -444,14 +444,14 @@
 
 	fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
 	if (!fs_info) {
-		test_msg("Couldn't allocate dummy fs info\n");
+		test_err("couldn't allocate dummy fs info");
 		ret = -ENOMEM;
 		goto out;
 	}
 
 	root = btrfs_alloc_dummy_root(fs_info);
 	if (IS_ERR(root)) {
-		test_msg("Couldn't allocate dummy root\n");
+		test_err("couldn't allocate dummy root");
 		ret = PTR_ERR(root);
 		goto out;
 	}
@@ -463,7 +463,7 @@
 
 	root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
 	if (!root->node) {
-		test_msg("Couldn't allocate dummy buffer\n");
+		test_err("couldn't allocate dummy buffer");
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -473,7 +473,7 @@
 
 	cache = btrfs_alloc_dummy_block_group(fs_info, 8 * alignment);
 	if (!cache) {
-		test_msg("Couldn't allocate dummy block group cache\n");
+		test_err("couldn't allocate dummy block group cache");
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -482,26 +482,25 @@
 	cache->needs_free_space = 1;
 	cache->fs_info = root->fs_info;
 
-	btrfs_init_dummy_trans(&trans);
+	btrfs_init_dummy_trans(&trans, root->fs_info);
 
 	path = btrfs_alloc_path();
 	if (!path) {
-		test_msg("Couldn't allocate path\n");
+		test_err("couldn't allocate path");
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	ret = add_block_group_free_space(&trans, root->fs_info, cache);
+	ret = add_block_group_free_space(&trans, cache);
 	if (ret) {
-		test_msg("Could not add block group free space\n");
+		test_err("could not add block group free space");
 		goto out;
 	}
 
 	if (bitmaps) {
-		ret = convert_free_space_to_bitmaps(&trans, root->fs_info,
-						    cache, path);
+		ret = convert_free_space_to_bitmaps(&trans, cache, path);
 		if (ret) {
-			test_msg("Could not convert block group to bitmaps\n");
+			test_err("could not convert block group to bitmaps");
 			goto out;
 		}
 	}
@@ -510,14 +509,14 @@
 	if (ret)
 		goto out;
 
-	ret = remove_block_group_free_space(&trans, root->fs_info, cache);
+	ret = remove_block_group_free_space(&trans, cache);
 	if (ret) {
-		test_msg("Could not remove block group free space\n");
+		test_err("could not remove block group free space");
 		goto out;
 	}
 
 	if (btrfs_header_nritems(root->node) != 0) {
-		test_msg("Free space tree has leftover items\n");
+		test_err("free space tree has leftover items");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -539,14 +538,16 @@
 
 	ret = run_test(test_func, 0, sectorsize, nodesize, alignment);
 	if (ret) {
-		test_msg("%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u\n",
+		test_err(
+	"%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u",
 			 test_func, sectorsize, nodesize, alignment);
 		test_ret = ret;
 	}
 
 	ret = run_test(test_func, 1, sectorsize, nodesize, alignment);
 	if (ret) {
-		test_msg("%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u\n",
+		test_err(
+	"%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u",
 			 test_func, sectorsize, nodesize, alignment);
 		test_ret = ret;
 	}
@@ -577,7 +578,7 @@
 	 */
 	bitmap_alignment = BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE;
 
-	test_msg("Running free space tree tests\n");
+	test_msg("running free space tree tests");
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		int ret;
 
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index e0ba799..64043f0 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -228,7 +228,7 @@
 
 	inode = btrfs_new_test_inode();
 	if (!inode) {
-		test_msg("Couldn't allocate inode\n");
+		test_err("couldn't allocate inode");
 		return ret;
 	}
 
@@ -238,19 +238,19 @@
 
 	fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
 	if (!fs_info) {
-		test_msg("Couldn't allocate dummy fs info\n");
+		test_err("couldn't allocate dummy fs info");
 		goto out;
 	}
 
 	root = btrfs_alloc_dummy_root(fs_info);
 	if (IS_ERR(root)) {
-		test_msg("Couldn't allocate root\n");
+		test_err("couldn't allocate root");
 		goto out;
 	}
 
 	root->node = alloc_dummy_extent_buffer(fs_info, nodesize);
 	if (!root->node) {
-		test_msg("Couldn't allocate dummy buffer\n");
+		test_err("couldn't allocate dummy buffer");
 		goto out;
 	}
 
@@ -268,11 +268,11 @@
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize, 0);
 	if (IS_ERR(em)) {
 		em = NULL;
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start != EXTENT_MAP_HOLE) {
-		test_msg("Expected a hole, got %llu\n", em->block_start);
+		test_err("expected a hole, got %llu", em->block_start);
 		goto out;
 	}
 	free_extent_map(em);
@@ -287,20 +287,21 @@
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start != EXTENT_MAP_HOLE) {
-		test_msg("Expected a hole, got %llu\n", em->block_start);
+		test_err("expected a hole, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != 0 || em->len != 5) {
-		test_msg("Unexpected extent wanted start 0 len 5, got start "
-			 "%llu len %llu\n", em->start, em->len);
+		test_err(
+		"unexpected extent wanted start 0 len 5, got start %llu len %llu",
+			em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
-		test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+		test_err("unexpected flags set, want 0 have %lu", em->flags);
 		goto out;
 	}
 	offset = em->start + em->len;
@@ -308,21 +309,22 @@
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start != EXTENT_MAP_INLINE) {
-		test_msg("Expected an inline, got %llu\n", em->block_start);
+		test_err("expected an inline, got %llu", em->block_start);
 		goto out;
 	}
 
 	if (em->start != offset || em->len != (sectorsize - 5)) {
-		test_msg("Unexpected extent wanted start %llu len 1, got start "
-			 "%llu len %llu\n", offset, em->start, em->len);
+		test_err(
+	"unexpected extent wanted start %llu len 1, got start %llu len %llu",
+			offset, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
-		test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+		test_err("unexpected flags set, want 0 have %lu", em->flags);
 		goto out;
 	}
 	/*
@@ -335,20 +337,21 @@
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start != EXTENT_MAP_HOLE) {
-		test_msg("Expected a hole, got %llu\n", em->block_start);
+		test_err("expected a hole, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != 4) {
-		test_msg("Unexpected extent wanted start %llu len 4, got start "
-			 "%llu len %llu\n", offset, em->start, em->len);
+		test_err(
+	"unexpected extent wanted start %llu len 4, got start %llu len %llu",
+			offset, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
-		test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+		test_err("unexpected flags set, want 0 have %lu", em->flags);
 		goto out;
 	}
 	offset = em->start + em->len;
@@ -357,24 +360,25 @@
 	/* Regular extent */
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != sectorsize - 1) {
-		test_msg("Unexpected extent wanted start %llu len 4095, got "
-			 "start %llu len %llu\n", offset, em->start, em->len);
+		test_err(
+	"unexpected extent wanted start %llu len 4095, got start %llu len %llu",
+			offset, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
-		test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+		test_err("unexpected flags set, want 0 have %lu", em->flags);
 		goto out;
 	}
 	if (em->orig_start != em->start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+		test_err("wrong orig offset, want %llu, have %llu", em->start,
 			 em->orig_start);
 		goto out;
 	}
@@ -384,25 +388,25 @@
 	/* The next 3 are split extents */
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != sectorsize) {
-		test_msg("Unexpected extent start %llu len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+		"unexpected extent start %llu len %u, got start %llu len %llu",
 			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
-		test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+		test_err("unexpected flags set, want 0 have %lu", em->flags);
 		goto out;
 	}
 	if (em->orig_start != em->start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+		test_err("wrong orig offset, want %llu, have %llu", em->start,
 			 em->orig_start);
 		goto out;
 	}
@@ -413,21 +417,21 @@
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start != EXTENT_MAP_HOLE) {
-		test_msg("Expected a hole, got %llu\n", em->block_start);
+		test_err("expected a hole, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
-		test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+		test_err("unexpected flags set, want 0 have %lu", em->flags);
 		goto out;
 	}
 	offset = em->start + em->len;
@@ -435,31 +439,31 @@
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != 2 * sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, 2 * sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
-		test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+		test_err("unexpected flags set, want 0 have %lu", em->flags);
 		goto out;
 	}
 	if (em->orig_start != orig_start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n",
+		test_err("wrong orig offset, want %llu, have %llu",
 			 orig_start, em->orig_start);
 		goto out;
 	}
 	disk_bytenr += (em->start - orig_start);
 	if (em->block_start != disk_bytenr) {
-		test_msg("Wrong block start, want %llu, have %llu\n",
+		test_err("wrong block start, want %llu, have %llu",
 			 disk_bytenr, em->block_start);
 		goto out;
 	}
@@ -469,26 +473,26 @@
 	/* Prealloc extent */
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != prealloc_only) {
-		test_msg("Unexpected flags set, want %lu have %lu\n",
+		test_err("unexpected flags set, want %lu have %lu",
 			 prealloc_only, em->flags);
 		goto out;
 	}
 	if (em->orig_start != em->start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+		test_err("wrong orig offset, want %llu, have %llu", em->start,
 			 em->orig_start);
 		goto out;
 	}
@@ -498,26 +502,26 @@
 	/* The next 3 are a half written prealloc extent */
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != prealloc_only) {
-		test_msg("Unexpected flags set, want %lu have %lu\n",
+		test_err("unexpected flags set, want %lu have %lu",
 			 prealloc_only, em->flags);
 		goto out;
 	}
 	if (em->orig_start != em->start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+		test_err("wrong orig offset, want %llu, have %llu", em->start,
 			 em->orig_start);
 		goto out;
 	}
@@ -528,30 +532,30 @@
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start >= EXTENT_MAP_HOLE) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
-		test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+		test_err("unexpected flags set, want 0 have %lu", em->flags);
 		goto out;
 	}
 	if (em->orig_start != orig_start) {
-		test_msg("Unexpected orig offset, wanted %llu, have %llu\n",
+		test_err("unexpected orig offset, wanted %llu, have %llu",
 			 orig_start, em->orig_start);
 		goto out;
 	}
 	if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
-		test_msg("Unexpected block start, wanted %llu, have %llu\n",
+		test_err("unexpected block start, wanted %llu, have %llu",
 			 disk_bytenr + (em->start - em->orig_start),
 			 em->block_start);
 		goto out;
@@ -561,31 +565,31 @@
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != 2 * sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, 2 * sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != prealloc_only) {
-		test_msg("Unexpected flags set, want %lu have %lu\n",
+		test_err("unexpected flags set, want %lu have %lu",
 			 prealloc_only, em->flags);
 		goto out;
 	}
 	if (em->orig_start != orig_start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n", orig_start,
+		test_err("wrong orig offset, want %llu, have %llu", orig_start,
 			 em->orig_start);
 		goto out;
 	}
 	if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
-		test_msg("Unexpected block start, wanted %llu, have %llu\n",
+		test_err("unexpected block start, wanted %llu, have %llu",
 			 disk_bytenr + (em->start - em->orig_start),
 			 em->block_start);
 		goto out;
@@ -596,31 +600,31 @@
 	/* Now for the compressed extent */
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != 2 * sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u,"
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, 2 * sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != compressed_only) {
-		test_msg("Unexpected flags set, want %lu have %lu\n",
+		test_err("unexpected flags set, want %lu have %lu",
 			 compressed_only, em->flags);
 		goto out;
 	}
 	if (em->orig_start != em->start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n",
+		test_err("wrong orig offset, want %llu, have %llu",
 			 em->start, em->orig_start);
 		goto out;
 	}
 	if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
-		test_msg("Unexpected compress type, wanted %d, got %d\n",
+		test_err("unexpected compress type, wanted %d, got %d",
 			 BTRFS_COMPRESS_ZLIB, em->compress_type);
 		goto out;
 	}
@@ -630,31 +634,31 @@
 	/* Split compressed extent */
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u,"
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != compressed_only) {
-		test_msg("Unexpected flags set, want %lu have %lu\n",
+		test_err("unexpected flags set, want %lu have %lu",
 			 compressed_only, em->flags);
 		goto out;
 	}
 	if (em->orig_start != em->start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n",
+		test_err("wrong orig offset, want %llu, have %llu",
 			 em->start, em->orig_start);
 		goto out;
 	}
 	if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
-		test_msg("Unexpected compress type, wanted %d, got %d\n",
+		test_err("unexpected compress type, wanted %d, got %d",
 			 BTRFS_COMPRESS_ZLIB, em->compress_type);
 		goto out;
 	}
@@ -665,25 +669,25 @@
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
-		test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+		test_err("unexpected flags set, want 0 have %lu", em->flags);
 		goto out;
 	}
 	if (em->orig_start != em->start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+		test_err("wrong orig offset, want %llu, have %llu", em->start,
 			 em->orig_start);
 		goto out;
 	}
@@ -692,32 +696,32 @@
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start != disk_bytenr) {
-		test_msg("Block start does not match, want %llu got %llu\n",
+		test_err("block start does not match, want %llu got %llu",
 			 disk_bytenr, em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != 2 * sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, 2 * sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != compressed_only) {
-		test_msg("Unexpected flags set, want %lu have %lu\n",
+		test_err("unexpected flags set, want %lu have %lu",
 			 compressed_only, em->flags);
 		goto out;
 	}
 	if (em->orig_start != orig_start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n",
+		test_err("wrong orig offset, want %llu, have %llu",
 			 em->start, orig_start);
 		goto out;
 	}
 	if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
-		test_msg("Unexpected compress type, wanted %d, got %d\n",
+		test_err("unexpected compress type, wanted %d, got %d",
 			 BTRFS_COMPRESS_ZLIB, em->compress_type);
 		goto out;
 	}
@@ -728,25 +732,25 @@
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6,
 			sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
-		test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+		test_err("unexpected flags set, want 0 have %lu", em->flags);
 		goto out;
 	}
 	if (em->orig_start != em->start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+		test_err("wrong orig offset, want %llu, have %llu", em->start,
 			 em->orig_start);
 		goto out;
 	}
@@ -755,11 +759,11 @@
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start != EXTENT_MAP_HOLE) {
-		test_msg("Expected a hole extent, got %llu\n", em->block_start);
+		test_err("expected a hole extent, got %llu", em->block_start);
 		goto out;
 	}
 	/*
@@ -768,18 +772,18 @@
 	 * test.
 	 */
 	if (em->start != offset || em->len != 3 * sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, 3 * sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != vacancy_only) {
-		test_msg("Unexpected flags set, want %lu have %lu\n",
+		test_err("unexpected flags set, want %lu have %lu",
 			 vacancy_only, em->flags);
 		goto out;
 	}
 	if (em->orig_start != em->start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+		test_err("wrong orig offset, want %llu, have %llu", em->start,
 			 em->orig_start);
 		goto out;
 	}
@@ -788,25 +792,25 @@
 
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != offset || em->len != sectorsize) {
-		test_msg("Unexpected extent wanted start %llu len %u,"
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %llu len %u, got start %llu len %llu",
 			offset, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
-		test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+		test_err("unexpected flags set, want 0 have %lu", em->flags);
 		goto out;
 	}
 	if (em->orig_start != em->start) {
-		test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+		test_err("wrong orig offset, want %llu, have %llu", em->start,
 			 em->orig_start);
 		goto out;
 	}
@@ -830,7 +834,7 @@
 
 	inode = btrfs_new_test_inode();
 	if (!inode) {
-		test_msg("Couldn't allocate inode\n");
+		test_err("couldn't allocate inode");
 		return ret;
 	}
 
@@ -840,19 +844,19 @@
 
 	fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
 	if (!fs_info) {
-		test_msg("Couldn't allocate dummy fs info\n");
+		test_err("couldn't allocate dummy fs info");
 		goto out;
 	}
 
 	root = btrfs_alloc_dummy_root(fs_info);
 	if (IS_ERR(root)) {
-		test_msg("Couldn't allocate root\n");
+		test_err("couldn't allocate root");
 		goto out;
 	}
 
 	root->node = alloc_dummy_extent_buffer(fs_info, nodesize);
 	if (!root->node) {
-		test_msg("Couldn't allocate dummy buffer\n");
+		test_err("couldn't allocate dummy buffer");
 		goto out;
 	}
 
@@ -871,21 +875,21 @@
 		      sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start != EXTENT_MAP_HOLE) {
-		test_msg("Expected a hole, got %llu\n", em->block_start);
+		test_err("expected a hole, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != 0 || em->len != sectorsize) {
-		test_msg("Unexpected extent wanted start 0 len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start 0 len %u, got start %llu len %llu",
 			sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != vacancy_only) {
-		test_msg("Wrong flags, wanted %lu, have %lu\n", vacancy_only,
+		test_err("wrong flags, wanted %lu, have %lu", vacancy_only,
 			 em->flags);
 		goto out;
 	}
@@ -894,21 +898,21 @@
 	em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize,
 			2 * sectorsize, 0);
 	if (IS_ERR(em)) {
-		test_msg("Got an error when we shouldn't have\n");
+		test_err("got an error when we shouldn't have");
 		goto out;
 	}
 	if (em->block_start != sectorsize) {
-		test_msg("Expected a real extent, got %llu\n", em->block_start);
+		test_err("expected a real extent, got %llu", em->block_start);
 		goto out;
 	}
 	if (em->start != sectorsize || em->len != sectorsize) {
-		test_msg("Unexpected extent wanted start %u len %u, "
-			"got start %llu len %llu\n",
+		test_err(
+	"unexpected extent wanted start %u len %u, got start %llu len %llu",
 			sectorsize, sectorsize, em->start, em->len);
 		goto out;
 	}
 	if (em->flags != 0) {
-		test_msg("Unexpected flags set, wanted 0 got %lu\n",
+		test_err("unexpected flags set, wanted 0 got %lu",
 			 em->flags);
 		goto out;
 	}
@@ -931,19 +935,19 @@
 
 	inode = btrfs_new_test_inode();
 	if (!inode) {
-		test_msg("Couldn't allocate inode\n");
+		test_err("couldn't allocate inode");
 		return ret;
 	}
 
 	fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
 	if (!fs_info) {
-		test_msg("Couldn't allocate dummy fs info\n");
+		test_err("couldn't allocate dummy fs info");
 		goto out;
 	}
 
 	root = btrfs_alloc_dummy_root(fs_info);
 	if (IS_ERR(root)) {
-		test_msg("Couldn't allocate root\n");
+		test_err("couldn't allocate root");
 		goto out;
 	}
 
@@ -954,12 +958,12 @@
 	ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, 0,
 					NULL, 0);
 	if (ret) {
-		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+		test_err("btrfs_set_extent_delalloc returned %d", ret);
 		goto out;
 	}
 	if (BTRFS_I(inode)->outstanding_extents != 1) {
 		ret = -EINVAL;
-		test_msg("Miscount, wanted 1, got %u\n",
+		test_err("miscount, wanted 1, got %u",
 			 BTRFS_I(inode)->outstanding_extents);
 		goto out;
 	}
@@ -969,12 +973,12 @@
 					BTRFS_MAX_EXTENT_SIZE + sectorsize - 1,
 					0, NULL, 0);
 	if (ret) {
-		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+		test_err("btrfs_set_extent_delalloc returned %d", ret);
 		goto out;
 	}
 	if (BTRFS_I(inode)->outstanding_extents != 2) {
 		ret = -EINVAL;
-		test_msg("Miscount, wanted 2, got %u\n",
+		test_err("miscount, wanted 2, got %u",
 			 BTRFS_I(inode)->outstanding_extents);
 		goto out;
 	}
@@ -986,12 +990,12 @@
 			       EXTENT_DELALLOC | EXTENT_DIRTY |
 			       EXTENT_UPTODATE, 0, 0, NULL);
 	if (ret) {
-		test_msg("clear_extent_bit returned %d\n", ret);
+		test_err("clear_extent_bit returned %d", ret);
 		goto out;
 	}
 	if (BTRFS_I(inode)->outstanding_extents != 2) {
 		ret = -EINVAL;
-		test_msg("Miscount, wanted 2, got %u\n",
+		test_err("miscount, wanted 2, got %u",
 			 BTRFS_I(inode)->outstanding_extents);
 		goto out;
 	}
@@ -1002,12 +1006,12 @@
 					+ sectorsize - 1,
 					0, NULL, 0);
 	if (ret) {
-		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+		test_err("btrfs_set_extent_delalloc returned %d", ret);
 		goto out;
 	}
 	if (BTRFS_I(inode)->outstanding_extents != 2) {
 		ret = -EINVAL;
-		test_msg("Miscount, wanted 2, got %u\n",
+		test_err("miscount, wanted 2, got %u",
 			 BTRFS_I(inode)->outstanding_extents);
 		goto out;
 	}
@@ -1020,12 +1024,12 @@
 			(BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1,
 			0, NULL, 0);
 	if (ret) {
-		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+		test_err("btrfs_set_extent_delalloc returned %d", ret);
 		goto out;
 	}
 	if (BTRFS_I(inode)->outstanding_extents != 4) {
 		ret = -EINVAL;
-		test_msg("Miscount, wanted 4, got %u\n",
+		test_err("miscount, wanted 4, got %u",
 			 BTRFS_I(inode)->outstanding_extents);
 		goto out;
 	}
@@ -1037,12 +1041,12 @@
 			BTRFS_MAX_EXTENT_SIZE + sectorsize,
 			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0);
 	if (ret) {
-		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+		test_err("btrfs_set_extent_delalloc returned %d", ret);
 		goto out;
 	}
 	if (BTRFS_I(inode)->outstanding_extents != 3) {
 		ret = -EINVAL;
-		test_msg("Miscount, wanted 3, got %u\n",
+		test_err("miscount, wanted 3, got %u",
 			 BTRFS_I(inode)->outstanding_extents);
 		goto out;
 	}
@@ -1054,12 +1058,12 @@
 			       EXTENT_DIRTY | EXTENT_DELALLOC |
 			       EXTENT_UPTODATE, 0, 0, NULL);
 	if (ret) {
-		test_msg("clear_extent_bit returned %d\n", ret);
+		test_err("clear_extent_bit returned %d", ret);
 		goto out;
 	}
 	if (BTRFS_I(inode)->outstanding_extents != 4) {
 		ret = -EINVAL;
-		test_msg("Miscount, wanted 4, got %u\n",
+		test_err("miscount, wanted 4, got %u",
 			 BTRFS_I(inode)->outstanding_extents);
 		goto out;
 	}
@@ -1072,12 +1076,12 @@
 			BTRFS_MAX_EXTENT_SIZE + sectorsize,
 			BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, 0, NULL, 0);
 	if (ret) {
-		test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+		test_err("btrfs_set_extent_delalloc returned %d", ret);
 		goto out;
 	}
 	if (BTRFS_I(inode)->outstanding_extents != 3) {
 		ret = -EINVAL;
-		test_msg("Miscount, wanted 3, got %u\n",
+		test_err("miscount, wanted 3, got %u",
 			 BTRFS_I(inode)->outstanding_extents);
 		goto out;
 	}
@@ -1087,12 +1091,12 @@
 			       EXTENT_DIRTY | EXTENT_DELALLOC |
 			       EXTENT_UPTODATE, 0, 0, NULL);
 	if (ret) {
-		test_msg("clear_extent_bit returned %d\n", ret);
+		test_err("clear_extent_bit returned %d", ret);
 		goto out;
 	}
 	if (BTRFS_I(inode)->outstanding_extents) {
 		ret = -EINVAL;
-		test_msg("Miscount, wanted 0, got %u\n",
+		test_err("miscount, wanted 0, got %u",
 			 BTRFS_I(inode)->outstanding_extents);
 		goto out;
 	}
@@ -1115,14 +1119,14 @@
 	set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
 	set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
 
-	test_msg("Running btrfs_get_extent tests\n");
+	test_msg("running btrfs_get_extent tests");
 	ret = test_btrfs_get_extent(sectorsize, nodesize);
 	if (ret)
 		return ret;
-	test_msg("Running hole first btrfs_get_extent test\n");
+	test_msg("running hole first btrfs_get_extent test");
 	ret = test_hole_first(sectorsize, nodesize);
 	if (ret)
 		return ret;
-	test_msg("Running outstanding_extents tests\n");
+	test_msg("running outstanding_extents tests");
 	return test_extent_accounting(sectorsize, nodesize);
 }
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 39b9578..ace94db 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -24,7 +24,7 @@
 	u32 size = sizeof(*item) + sizeof(*iref) + sizeof(*block_info);
 	int ret;
 
-	btrfs_init_dummy_trans(&trans);
+	btrfs_init_dummy_trans(&trans, NULL);
 
 	ins.objectid = bytenr;
 	ins.type = BTRFS_EXTENT_ITEM_KEY;
@@ -32,14 +32,14 @@
 
 	path = btrfs_alloc_path();
 	if (!path) {
-		test_msg("Couldn't allocate path\n");
+		test_err("couldn't allocate path");
 		return -ENOMEM;
 	}
 
 	path->leave_spinning = 1;
 	ret = btrfs_insert_empty_item(&trans, root, path, &ins, size);
 	if (ret) {
-		test_msg("Couldn't insert ref %d\n", ret);
+		test_err("couldn't insert ref %d", ret);
 		btrfs_free_path(path);
 		return ret;
 	}
@@ -74,7 +74,7 @@
 	u64 refs;
 	int ret;
 
-	btrfs_init_dummy_trans(&trans);
+	btrfs_init_dummy_trans(&trans, NULL);
 
 	key.objectid = bytenr;
 	key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -82,14 +82,14 @@
 
 	path = btrfs_alloc_path();
 	if (!path) {
-		test_msg("Couldn't allocate path\n");
+		test_err("couldn't allocate path");
 		return -ENOMEM;
 	}
 
 	path->leave_spinning = 1;
 	ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
 	if (ret) {
-		test_msg("Couldn't find extent ref\n");
+		test_err("couldn't find extent ref");
 		btrfs_free_path(path);
 		return ret;
 	}
@@ -111,7 +111,7 @@
 
 	ret = btrfs_insert_empty_item(&trans, root, path, &key, 0);
 	if (ret)
-		test_msg("Failed to insert backref\n");
+		test_err("failed to insert backref");
 	btrfs_free_path(path);
 	return ret;
 }
@@ -124,7 +124,7 @@
 	struct btrfs_path *path;
 	int ret;
 
-	btrfs_init_dummy_trans(&trans);
+	btrfs_init_dummy_trans(&trans, NULL);
 
 	key.objectid = bytenr;
 	key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -132,14 +132,14 @@
 
 	path = btrfs_alloc_path();
 	if (!path) {
-		test_msg("Couldn't allocate path\n");
+		test_err("couldn't allocate path");
 		return -ENOMEM;
 	}
 	path->leave_spinning = 1;
 
 	ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
 	if (ret) {
-		test_msg("Didn't find our key %d\n", ret);
+		test_err("didn't find our key %d", ret);
 		btrfs_free_path(path);
 		return ret;
 	}
@@ -158,7 +158,7 @@
 	u64 refs;
 	int ret;
 
-	btrfs_init_dummy_trans(&trans);
+	btrfs_init_dummy_trans(&trans, NULL);
 
 	key.objectid = bytenr;
 	key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -166,14 +166,14 @@
 
 	path = btrfs_alloc_path();
 	if (!path) {
-		test_msg("Couldn't allocate path\n");
+		test_err("couldn't allocate path");
 		return -ENOMEM;
 	}
 
 	path->leave_spinning = 1;
 	ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
 	if (ret) {
-		test_msg("Couldn't find extent ref\n");
+		test_err("couldn't find extent ref");
 		btrfs_free_path(path);
 		return ret;
 	}
@@ -195,7 +195,7 @@
 
 	ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
 	if (ret) {
-		test_msg("Couldn't find backref %d\n", ret);
+		test_err("couldn't find backref %d", ret);
 		btrfs_free_path(path);
 		return ret;
 	}
@@ -213,12 +213,12 @@
 	struct ulist *new_roots = NULL;
 	int ret;
 
-	btrfs_init_dummy_trans(&trans);
+	btrfs_init_dummy_trans(&trans, fs_info);
 
-	test_msg("Qgroup basic add\n");
+	test_msg("qgroup basic add");
 	ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FS_TREE_OBJECTID);
 	if (ret) {
-		test_msg("Couldn't create a qgroup %d\n", ret);
+		test_err("couldn't create a qgroup %d", ret);
 		return ret;
 	}
 
@@ -231,7 +231,7 @@
 			false);
 	if (ret) {
 		ulist_free(old_roots);
-		test_msg("Couldn't find old roots: %d\n", ret);
+		test_err("couldn't find old roots: %d", ret);
 		return ret;
 	}
 
@@ -245,20 +245,20 @@
 	if (ret) {
 		ulist_free(old_roots);
 		ulist_free(new_roots);
-		test_msg("Couldn't find old roots: %d\n", ret);
+		test_err("couldn't find old roots: %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
 					  nodesize, old_roots, new_roots);
 	if (ret) {
-		test_msg("Couldn't account space for a qgroup %d\n", ret);
+		test_err("couldn't account space for a qgroup %d", ret);
 		return ret;
 	}
 
 	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
 				nodesize, nodesize)) {
-		test_msg("Qgroup counts didn't match expected values\n");
+		test_err("qgroup counts didn't match expected values");
 		return -EINVAL;
 	}
 	old_roots = NULL;
@@ -268,7 +268,7 @@
 			false);
 	if (ret) {
 		ulist_free(old_roots);
-		test_msg("Couldn't find old roots: %d\n", ret);
+		test_err("couldn't find old roots: %d", ret);
 		return ret;
 	}
 
@@ -281,19 +281,19 @@
 	if (ret) {
 		ulist_free(old_roots);
 		ulist_free(new_roots);
-		test_msg("Couldn't find old roots: %d\n", ret);
+		test_err("couldn't find old roots: %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
 					  nodesize, old_roots, new_roots);
 	if (ret) {
-		test_msg("Couldn't account space for a qgroup %d\n", ret);
+		test_err("couldn't account space for a qgroup %d", ret);
 		return -EINVAL;
 	}
 
 	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID, 0, 0)) {
-		test_msg("Qgroup counts didn't match expected values\n");
+		test_err("qgroup counts didn't match expected values");
 		return -EINVAL;
 	}
 
@@ -314,9 +314,9 @@
 	struct ulist *new_roots = NULL;
 	int ret;
 
-	btrfs_init_dummy_trans(&trans);
+	btrfs_init_dummy_trans(&trans, fs_info);
 
-	test_msg("Qgroup multiple refs test\n");
+	test_msg("qgroup multiple refs test");
 
 	/*
 	 * We have BTRFS_FS_TREE_OBJECTID created already from the
@@ -324,7 +324,7 @@
 	 */
 	ret = btrfs_create_qgroup(NULL, fs_info, BTRFS_FIRST_FREE_OBJECTID);
 	if (ret) {
-		test_msg("Couldn't create a qgroup %d\n", ret);
+		test_err("couldn't create a qgroup %d", ret);
 		return ret;
 	}
 
@@ -332,7 +332,7 @@
 			false);
 	if (ret) {
 		ulist_free(old_roots);
-		test_msg("Couldn't find old roots: %d\n", ret);
+		test_err("couldn't find old roots: %d", ret);
 		return ret;
 	}
 
@@ -346,20 +346,20 @@
 	if (ret) {
 		ulist_free(old_roots);
 		ulist_free(new_roots);
-		test_msg("Couldn't find old roots: %d\n", ret);
+		test_err("couldn't find old roots: %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
 					  nodesize, old_roots, new_roots);
 	if (ret) {
-		test_msg("Couldn't account space for a qgroup %d\n", ret);
+		test_err("couldn't account space for a qgroup %d", ret);
 		return ret;
 	}
 
 	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
 				       nodesize, nodesize)) {
-		test_msg("Qgroup counts didn't match expected values\n");
+		test_err("qgroup counts didn't match expected values");
 		return -EINVAL;
 	}
 
@@ -367,7 +367,7 @@
 			false);
 	if (ret) {
 		ulist_free(old_roots);
-		test_msg("Couldn't find old roots: %d\n", ret);
+		test_err("couldn't find old roots: %d", ret);
 		return ret;
 	}
 
@@ -381,26 +381,26 @@
 	if (ret) {
 		ulist_free(old_roots);
 		ulist_free(new_roots);
-		test_msg("Couldn't find old roots: %d\n", ret);
+		test_err("couldn't find old roots: %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
 					  nodesize, old_roots, new_roots);
 	if (ret) {
-		test_msg("Couldn't account space for a qgroup %d\n", ret);
+		test_err("couldn't account space for a qgroup %d", ret);
 		return ret;
 	}
 
 	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
 					nodesize, 0)) {
-		test_msg("Qgroup counts didn't match expected values\n");
+		test_err("qgroup counts didn't match expected values");
 		return -EINVAL;
 	}
 
 	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID,
 					nodesize, 0)) {
-		test_msg("Qgroup counts didn't match expected values\n");
+		test_err("qgroup counts didn't match expected values");
 		return -EINVAL;
 	}
 
@@ -408,7 +408,7 @@
 			false);
 	if (ret) {
 		ulist_free(old_roots);
-		test_msg("Couldn't find old roots: %d\n", ret);
+		test_err("couldn't find old roots: %d", ret);
 		return ret;
 	}
 
@@ -422,26 +422,26 @@
 	if (ret) {
 		ulist_free(old_roots);
 		ulist_free(new_roots);
-		test_msg("Couldn't find old roots: %d\n", ret);
+		test_err("couldn't find old roots: %d", ret);
 		return ret;
 	}
 
 	ret = btrfs_qgroup_account_extent(&trans, fs_info, nodesize,
 					  nodesize, old_roots, new_roots);
 	if (ret) {
-		test_msg("Couldn't account space for a qgroup %d\n", ret);
+		test_err("couldn't account space for a qgroup %d", ret);
 		return ret;
 	}
 
 	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FIRST_FREE_OBJECTID,
 					0, 0)) {
-		test_msg("Qgroup counts didn't match expected values\n");
+		test_err("qgroup counts didn't match expected values");
 		return -EINVAL;
 	}
 
 	if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
 					nodesize, nodesize)) {
-		test_msg("Qgroup counts didn't match expected values\n");
+		test_err("qgroup counts didn't match expected values");
 		return -EINVAL;
 	}
 
@@ -457,13 +457,13 @@
 
 	fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
 	if (!fs_info) {
-		test_msg("Couldn't allocate dummy fs info\n");
+		test_err("couldn't allocate dummy fs info");
 		return -ENOMEM;
 	}
 
 	root = btrfs_alloc_dummy_root(fs_info);
 	if (IS_ERR(root)) {
-		test_msg("Couldn't allocate root\n");
+		test_err("couldn't allocate root");
 		ret = PTR_ERR(root);
 		goto out;
 	}
@@ -485,7 +485,7 @@
 	 */
 	root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
 	if (!root->node) {
-		test_msg("Couldn't allocate dummy buffer\n");
+		test_err("couldn't allocate dummy buffer");
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -495,7 +495,7 @@
 
 	tmp_root = btrfs_alloc_dummy_root(fs_info);
 	if (IS_ERR(tmp_root)) {
-		test_msg("Couldn't allocate a fs root\n");
+		test_err("couldn't allocate a fs root");
 		ret = PTR_ERR(tmp_root);
 		goto out;
 	}
@@ -504,13 +504,13 @@
 	root->fs_info->fs_root = tmp_root;
 	ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
 	if (ret) {
-		test_msg("Couldn't insert fs root %d\n", ret);
+		test_err("couldn't insert fs root %d", ret);
 		goto out;
 	}
 
 	tmp_root = btrfs_alloc_dummy_root(fs_info);
 	if (IS_ERR(tmp_root)) {
-		test_msg("Couldn't allocate a fs root\n");
+		test_err("couldn't allocate a fs root");
 		ret = PTR_ERR(tmp_root);
 		goto out;
 	}
@@ -518,11 +518,11 @@
 	tmp_root->root_key.objectid = BTRFS_FIRST_FREE_OBJECTID;
 	ret = btrfs_insert_fs_root(root->fs_info, tmp_root);
 	if (ret) {
-		test_msg("Couldn't insert fs root %d\n", ret);
+		test_err("couldn't insert fs root %d", ret);
 		goto out;
 	}
 
-	test_msg("Running qgroup tests\n");
+	test_msg("running qgroup tests");
 	ret = test_no_shared_qgroup(root, sectorsize, nodesize);
 	if (ret)
 		goto out;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c944b47..4485eae 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -877,12 +877,7 @@
 	atomic_dec(&cur_trans->num_writers);
 	extwriter_counter_dec(cur_trans, trans->type);
 
-	/*
-	 * Make sure counter is updated before we wake up waiters.
-	 */
-	smp_mb();
-	if (waitqueue_active(&cur_trans->writer_wait))
-		wake_up(&cur_trans->writer_wait);
+	cond_wake_up(&cur_trans->writer_wait);
 	btrfs_put_transaction(cur_trans);
 
 	if (current->journal_info == trans)
@@ -1250,7 +1245,6 @@
 
 			btrfs_free_log(trans, root);
 			btrfs_update_reloc_root(trans, root);
-			btrfs_orphan_commit_root(trans, root);
 
 			btrfs_save_ino_cache(root, trans);
 
@@ -1640,15 +1634,14 @@
 		btrfs_abort_transaction(trans, ret);
 		goto fail;
 	}
-	ret = btrfs_uuid_tree_add(trans, fs_info, new_uuid.b,
-				  BTRFS_UUID_KEY_SUBVOL, objectid);
+	ret = btrfs_uuid_tree_add(trans, new_uuid.b, BTRFS_UUID_KEY_SUBVOL,
+				  objectid);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		goto fail;
 	}
 	if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
-		ret = btrfs_uuid_tree_add(trans, fs_info,
-					  new_root_item->received_uuid,
+		ret = btrfs_uuid_tree_add(trans, new_root_item->received_uuid,
 					  BTRFS_UUID_KEY_RECEIVED_SUBVOL,
 					  objectid);
 		if (ret && ret != -EEXIST) {
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d8c0826..9443948 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -139,7 +139,6 @@
 	struct btrfs_path *path;
 	/* block reservation for the operation */
 	struct btrfs_block_rsv block_rsv;
-	u64 qgroup_reserved;
 	/* extra metadata reservation for relocation */
 	int error;
 	bool readonly;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 8f23a94..f8220ec 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -222,11 +222,8 @@
 void btrfs_end_log_trans(struct btrfs_root *root)
 {
 	if (atomic_dec_and_test(&root->log_writers)) {
-		/*
-		 * Implicit memory barrier after atomic_dec_and_test
-		 */
-		if (waitqueue_active(&root->log_writer_wait))
-			wake_up(&root->log_writer_wait);
+		/* atomic_dec_and_test implies a barrier */
+		cond_wake_up_nomb(&root->log_writer_wait);
 	}
 }
 
@@ -2988,11 +2985,8 @@
 
 	mutex_lock(&log_root_tree->log_mutex);
 	if (atomic_dec_and_test(&log_root_tree->log_writers)) {
-		/*
-		 * Implicit memory barrier after atomic_dec_and_test
-		 */
-		if (waitqueue_active(&log_root_tree->log_writer_wait))
-			wake_up(&log_root_tree->log_writer_wait);
+		/* atomic_dec_and_test implies a barrier */
+		cond_wake_up_nomb(&log_root_tree->log_writer_wait);
 	}
 
 	if (ret) {
@@ -3116,10 +3110,11 @@
 	mutex_unlock(&log_root_tree->log_mutex);
 
 	/*
-	 * The barrier before waitqueue_active is implied by mutex_unlock
+	 * The barrier before waitqueue_active (in cond_wake_up) is needed so
+	 * all the updates above are seen by the woken threads. It might not be
+	 * necessary, but proving that seems to be hard.
 	 */
-	if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
-		wake_up(&log_root_tree->log_commit_wait[index2]);
+	cond_wake_up(&log_root_tree->log_commit_wait[index2]);
 out:
 	mutex_lock(&root->log_mutex);
 	btrfs_remove_all_log_ctxs(root, index1, ret);
@@ -3128,10 +3123,11 @@
 	mutex_unlock(&root->log_mutex);
 
 	/*
-	 * The barrier before waitqueue_active is implied by mutex_unlock
+	 * The barrier before waitqueue_active (in cond_wake_up) is needed so
+	 * all the updates above are seen by the woken threads. It might not be
+	 * necessary, but proving that seems to be hard.
 	 */
-	if (waitqueue_active(&root->log_commit_wait[index1]))
-		wake_up(&root->log_commit_wait[index1]);
+	cond_wake_up(&root->log_commit_wait[index1]);
 	return ret;
 }
 
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 1ba7ca2..3b2ae34 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -79,10 +79,10 @@
 	return ret;
 }
 
-int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
+int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
 			u64 subid_cpu)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *uuid_root = fs_info->uuid_root;
 	int ret;
 	struct btrfs_path *path = NULL;
@@ -144,10 +144,10 @@
 	return ret;
 }
 
-int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
-			struct btrfs_fs_info *fs_info, u8 *uuid, u8 type,
+int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
 			u64 subid)
 {
+	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_root *uuid_root = fs_info->uuid_root;
 	int ret;
 	struct btrfs_path *path = NULL;
@@ -239,7 +239,7 @@
 		goto out;
 	}
 
-	ret = btrfs_uuid_tree_rem(trans, uuid_root->fs_info, uuid, type, subid);
+	ret = btrfs_uuid_tree_remove(trans, uuid, type, subid);
 	btrfs_end_transaction(trans);
 
 out:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index be3fc70..e034ad9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -40,6 +40,9 @@
 		.tolerated_failures = 1,
 		.devs_increment	= 2,
 		.ncopies	= 2,
+		.raid_name	= "raid10",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID10,
+		.mindev_error	= BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
 	},
 	[BTRFS_RAID_RAID1] = {
 		.sub_stripes	= 1,
@@ -49,6 +52,9 @@
 		.tolerated_failures = 1,
 		.devs_increment	= 2,
 		.ncopies	= 2,
+		.raid_name	= "raid1",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID1,
+		.mindev_error	= BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
 	},
 	[BTRFS_RAID_DUP] = {
 		.sub_stripes	= 1,
@@ -58,6 +64,9 @@
 		.tolerated_failures = 0,
 		.devs_increment	= 1,
 		.ncopies	= 2,
+		.raid_name	= "dup",
+		.bg_flag	= BTRFS_BLOCK_GROUP_DUP,
+		.mindev_error	= 0,
 	},
 	[BTRFS_RAID_RAID0] = {
 		.sub_stripes	= 1,
@@ -67,6 +76,9 @@
 		.tolerated_failures = 0,
 		.devs_increment	= 1,
 		.ncopies	= 1,
+		.raid_name	= "raid0",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID0,
+		.mindev_error	= 0,
 	},
 	[BTRFS_RAID_SINGLE] = {
 		.sub_stripes	= 1,
@@ -76,6 +88,9 @@
 		.tolerated_failures = 0,
 		.devs_increment	= 1,
 		.ncopies	= 1,
+		.raid_name	= "single",
+		.bg_flag	= 0,
+		.mindev_error	= 0,
 	},
 	[BTRFS_RAID_RAID5] = {
 		.sub_stripes	= 1,
@@ -85,6 +100,9 @@
 		.tolerated_failures = 1,
 		.devs_increment	= 1,
 		.ncopies	= 2,
+		.raid_name	= "raid5",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID5,
+		.mindev_error	= BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
 	},
 	[BTRFS_RAID_RAID6] = {
 		.sub_stripes	= 1,
@@ -94,33 +112,19 @@
 		.tolerated_failures = 2,
 		.devs_increment	= 1,
 		.ncopies	= 3,
+		.raid_name	= "raid6",
+		.bg_flag	= BTRFS_BLOCK_GROUP_RAID6,
+		.mindev_error	= BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
 	},
 };
 
-const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
-	[BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
-	[BTRFS_RAID_RAID1]  = BTRFS_BLOCK_GROUP_RAID1,
-	[BTRFS_RAID_DUP]    = BTRFS_BLOCK_GROUP_DUP,
-	[BTRFS_RAID_RAID0]  = BTRFS_BLOCK_GROUP_RAID0,
-	[BTRFS_RAID_SINGLE] = 0,
-	[BTRFS_RAID_RAID5]  = BTRFS_BLOCK_GROUP_RAID5,
-	[BTRFS_RAID_RAID6]  = BTRFS_BLOCK_GROUP_RAID6,
-};
+const char *get_raid_name(enum btrfs_raid_types type)
+{
+	if (type >= BTRFS_NR_RAID_TYPES)
+		return NULL;
 
-/*
- * Table to convert BTRFS_RAID_* to the error code if minimum number of devices
- * condition is not met. Zero means there's no corresponding
- * BTRFS_ERROR_DEV_*_NOT_MET value.
- */
-const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
-	[BTRFS_RAID_RAID10] = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
-	[BTRFS_RAID_RAID1]  = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
-	[BTRFS_RAID_DUP]    = 0,
-	[BTRFS_RAID_RAID0]  = 0,
-	[BTRFS_RAID_SINGLE] = 0,
-	[BTRFS_RAID_RAID5]  = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
-	[BTRFS_RAID_RAID6]  = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
-};
+	return btrfs_raid_array[type].raid_name;
+}
 
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
 				struct btrfs_fs_info *fs_info);
@@ -167,12 +171,6 @@
  * may be used to exclude some operations from running concurrently without any
  * modifications to the list (see write_all_supers)
  *
- * volume_mutex
- * ------------
- * coarse lock owned by a mounted filesystem; used to exclude some operations
- * that cannot run in parallel and affect the higher-level properties of the
- * filesystem like: device add/deleting/resize/replace, or balance
- *
  * balance_mutex
  * -------------
  * protects balance structures (status, state) and context accessed from
@@ -197,6 +195,41 @@
  *     device_list_mutex
  *       chunk_mutex
  *     balance_mutex
+ *
+ *
+ * Exclusive operations, BTRFS_FS_EXCL_OP
+ * ======================================
+ *
+ * Maintains the exclusivity of the following operations that apply to the
+ * whole filesystem and cannot run in parallel.
+ *
+ * - Balance (*)
+ * - Device add
+ * - Device remove
+ * - Device replace (*)
+ * - Resize
+ *
+ * The device operations (as above) can be in one of the following states:
+ *
+ * - Running state
+ * - Paused state
+ * - Completed state
+ *
+ * Only device operations marked with (*) can go into the Paused state for the
+ * following reasons:
+ *
+ * - ioctl (only Balance can be Paused through ioctl)
+ * - filesystem remounted as read-only
+ * - filesystem unmounted and mounted as read-only
+ * - system power-cycle and filesystem mounted as read-only
+ * - filesystem or device errors leading to forced read-only
+ *
+ * BTRFS_FS_EXCL_OP flag is set and cleared using atomic operations.
+ * During the course of Paused state, the BTRFS_FS_EXCL_OP remains set.
+ * A device operation in Paused or Running state can be canceled or resumed
+ * either by ioctl (Balance only) or when remounted as read-write.
+ * BTRFS_FS_EXCL_OP flag is cleared when the device operation is canceled or
+ * completed.
  */
 
 DEFINE_MUTEX(uuid_mutex);
@@ -227,14 +260,14 @@
 	INIT_LIST_HEAD(&fs_devs->devices);
 	INIT_LIST_HEAD(&fs_devs->resized_devices);
 	INIT_LIST_HEAD(&fs_devs->alloc_list);
-	INIT_LIST_HEAD(&fs_devs->list);
+	INIT_LIST_HEAD(&fs_devs->fs_list);
 	if (fsid)
 		memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
 
 	return fs_devs;
 }
 
-static void free_device(struct btrfs_device *device)
+void btrfs_free_device(struct btrfs_device *device)
 {
 	rcu_string_free(device->name);
 	bio_put(device->flush_bio);
@@ -249,7 +282,7 @@
 		device = list_entry(fs_devices->devices.next,
 				    struct btrfs_device, dev_list);
 		list_del(&device->dev_list);
-		free_device(device);
+		btrfs_free_device(device);
 	}
 	kfree(fs_devices);
 }
@@ -273,8 +306,8 @@
 
 	while (!list_empty(&fs_uuids)) {
 		fs_devices = list_entry(fs_uuids.next,
-					struct btrfs_fs_devices, list);
-		list_del(&fs_devices->list);
+					struct btrfs_fs_devices, fs_list);
+		list_del(&fs_devices->fs_list);
 		free_fs_devices(fs_devices);
 	}
 }
@@ -282,7 +315,7 @@
 /*
  * Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
  * Returned struct is not linked onto any lists and must be destroyed using
- * free_device.
+ * btrfs_free_device.
  */
 static struct btrfs_device *__alloc_device(void)
 {
@@ -327,10 +360,9 @@
 static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
 		u64 devid, const u8 *uuid)
 {
-	struct list_head *head = &fs_devices->devices;
 	struct btrfs_device *dev;
 
-	list_for_each_entry(dev, head, dev_list) {
+	list_for_each_entry(dev, &fs_devices->devices, dev_list) {
 		if (dev->devid == devid &&
 		    (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
 			return dev;
@@ -343,7 +375,7 @@
 {
 	struct btrfs_fs_devices *fs_devices;
 
-	list_for_each_entry(fs_devices, &fs_uuids, list) {
+	list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
 		if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
 			return fs_devices;
 	}
@@ -607,7 +639,7 @@
 	struct btrfs_fs_devices *fs_devs, *tmp_fs_devs;
 	struct btrfs_device *dev, *tmp_dev;
 
-	list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, list) {
+	list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, fs_list) {
 
 		if (fs_devs->opened)
 			continue;
@@ -632,13 +664,13 @@
 			/* delete the stale device */
 			if (fs_devs->num_devices == 1) {
 				btrfs_sysfs_remove_fsid(fs_devs);
-				list_del(&fs_devs->list);
+				list_del(&fs_devs->fs_list);
 				free_fs_devices(fs_devs);
 				break;
 			} else {
 				fs_devs->num_devices--;
 				list_del(&dev->dev_list);
-				free_device(dev);
+				btrfs_free_device(dev);
 			}
 		}
 	}
@@ -732,7 +764,7 @@
 		if (IS_ERR(fs_devices))
 			return ERR_CAST(fs_devices);
 
-		list_add(&fs_devices->list, &fs_uuids);
+		list_add(&fs_devices->fs_list, &fs_uuids);
 
 		device = NULL;
 	} else {
@@ -753,7 +785,7 @@
 
 		name = rcu_string_strdup(path, GFP_NOFS);
 		if (!name) {
-			free_device(device);
+			btrfs_free_device(device);
 			return ERR_PTR(-ENOMEM);
 		}
 		rcu_assign_pointer(device->name, name);
@@ -866,7 +898,7 @@
 			name = rcu_string_strdup(orig_dev->name->str,
 					GFP_KERNEL);
 			if (!name) {
-				free_device(device);
+				btrfs_free_device(device);
 				goto error;
 			}
 			rcu_assign_pointer(device->name, name);
@@ -938,7 +970,7 @@
 		}
 		list_del_init(&device->dev_list);
 		fs_devices->num_devices--;
-		free_device(device);
+		btrfs_free_device(device);
 	}
 
 	if (fs_devices->seed) {
@@ -956,7 +988,7 @@
 	struct btrfs_device *device;
 
 	device = container_of(head, struct btrfs_device, rcu);
-	free_device(device);
+	btrfs_free_device(device);
 }
 
 static void btrfs_close_bdev(struct btrfs_device *device)
@@ -1005,7 +1037,7 @@
 	new_device->fs_devices = device->fs_devices;
 }
 
-static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
+static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
 {
 	struct btrfs_device *device, *tmp;
 	struct list_head pending_put;
@@ -1050,7 +1082,7 @@
 	int ret;
 
 	mutex_lock(&uuid_mutex);
-	ret = __btrfs_close_devices(fs_devices);
+	ret = close_fs_devices(fs_devices);
 	if (!fs_devices->opened) {
 		seed_devices = fs_devices->seed;
 		fs_devices->seed = NULL;
@@ -1060,23 +1092,22 @@
 	while (seed_devices) {
 		fs_devices = seed_devices;
 		seed_devices = fs_devices->seed;
-		__btrfs_close_devices(fs_devices);
+		close_fs_devices(fs_devices);
 		free_fs_devices(fs_devices);
 	}
 	return ret;
 }
 
-static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
+static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
 				fmode_t flags, void *holder)
 {
-	struct list_head *head = &fs_devices->devices;
 	struct btrfs_device *device;
 	struct btrfs_device *latest_dev = NULL;
 	int ret = 0;
 
 	flags |= FMODE_EXCL;
 
-	list_for_each_entry(device, head, dev_list) {
+	list_for_each_entry(device, &fs_devices->devices, dev_list) {
 		/* Just open everything we can; ignore failures here */
 		if (btrfs_open_one_device(fs_devices, device, flags, holder))
 			continue;
@@ -1115,15 +1146,16 @@
 {
 	int ret;
 
-	mutex_lock(&uuid_mutex);
+	mutex_lock(&fs_devices->device_list_mutex);
 	if (fs_devices->opened) {
 		fs_devices->opened++;
 		ret = 0;
 	} else {
 		list_sort(NULL, &fs_devices->devices, devid_cmp);
-		ret = __btrfs_open_devices(fs_devices, flags, holder);
+		ret = open_fs_devices(fs_devices, flags, holder);
 	}
-	mutex_unlock(&uuid_mutex);
+	mutex_unlock(&fs_devices->device_list_mutex);
+
 	return ret;
 }
 
@@ -1201,31 +1233,29 @@
 	 */
 	bytenr = btrfs_sb_offset(0);
 	flags |= FMODE_EXCL;
-	mutex_lock(&uuid_mutex);
 
 	bdev = blkdev_get_by_path(path, flags, holder);
-	if (IS_ERR(bdev)) {
-		ret = PTR_ERR(bdev);
-		goto error;
-	}
+	if (IS_ERR(bdev))
+		return PTR_ERR(bdev);
 
 	if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
 		ret = -EINVAL;
 		goto error_bdev_put;
 	}
 
+	mutex_lock(&uuid_mutex);
 	device = device_list_add(path, disk_super);
 	if (IS_ERR(device))
 		ret = PTR_ERR(device);
 	else
 		*fs_devices_ret = device->fs_devices;
+	mutex_unlock(&uuid_mutex);
 
 	btrfs_release_disk_super(page);
 
 error_bdev_put:
 	blkdev_put(bdev, flags);
-error:
-	mutex_unlock(&uuid_mutex);
+
 	return ret;
 }
 
@@ -1857,11 +1887,11 @@
 	} while (read_seqretry(&fs_info->profiles_lock, seq));
 
 	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
-		if (!(all_avail & btrfs_raid_group[i]))
+		if (!(all_avail & btrfs_raid_array[i].bg_flag))
 			continue;
 
 		if (num_devices < btrfs_raid_array[i].devs_min) {
-			int ret = btrfs_raid_mindev_error[i];
+			int ret = btrfs_raid_array[i].mindev_error;
 
 			if (ret)
 				return ret;
@@ -1917,13 +1947,13 @@
 {
 	struct btrfs_device *device;
 	struct btrfs_fs_devices *cur_devices;
+	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
 	u64 num_devices;
 	int ret = 0;
 
-	mutex_lock(&fs_info->volume_mutex);
 	mutex_lock(&uuid_mutex);
 
-	num_devices = fs_info->fs_devices->num_devices;
+	num_devices = fs_devices->num_devices;
 	btrfs_dev_replace_read_lock(&fs_info->dev_replace);
 	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
 		WARN_ON(num_devices < 1);
@@ -1986,27 +2016,32 @@
 	 * (super_copy) should hold the device list mutex.
 	 */
 
+	/*
+	 * In normal cases the cur_devices == fs_devices. But in case
+	 * of deleting a seed device, the cur_devices should point to
+	 * its own fs_devices listed under the fs_devices->seed.
+	 */
 	cur_devices = device->fs_devices;
-	mutex_lock(&fs_info->fs_devices->device_list_mutex);
+	mutex_lock(&fs_devices->device_list_mutex);
 	list_del_rcu(&device->dev_list);
 
-	device->fs_devices->num_devices--;
-	device->fs_devices->total_devices--;
+	cur_devices->num_devices--;
+	cur_devices->total_devices--;
 
 	if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
-		device->fs_devices->missing_devices--;
+		cur_devices->missing_devices--;
 
 	btrfs_assign_next_active_device(fs_info, device, NULL);
 
 	if (device->bdev) {
-		device->fs_devices->open_devices--;
+		cur_devices->open_devices--;
 		/* remove sysfs entry */
-		btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
+		btrfs_sysfs_rm_device_link(fs_devices, device);
 	}
 
 	num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
 	btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+	mutex_unlock(&fs_devices->device_list_mutex);
 
 	/*
 	 * at this point, the device is zero sized and detached from
@@ -2020,8 +2055,6 @@
 	call_rcu(&device->rcu, free_device_rcu);
 
 	if (cur_devices->open_devices == 0) {
-		struct btrfs_fs_devices *fs_devices;
-		fs_devices = fs_info->fs_devices;
 		while (fs_devices) {
 			if (fs_devices->seed == cur_devices) {
 				fs_devices->seed = cur_devices->seed;
@@ -2030,20 +2063,19 @@
 			fs_devices = fs_devices->seed;
 		}
 		cur_devices->seed = NULL;
-		__btrfs_close_devices(cur_devices);
+		close_fs_devices(cur_devices);
 		free_fs_devices(cur_devices);
 	}
 
 out:
 	mutex_unlock(&uuid_mutex);
-	mutex_unlock(&fs_info->volume_mutex);
 	return ret;
 
 error_undo:
 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
 		mutex_lock(&fs_info->chunk_mutex);
 		list_add(&device->dev_alloc_list,
-			 &fs_info->fs_devices->alloc_list);
+			 &fs_devices->alloc_list);
 		device->fs_devices->rw_devices++;
 		mutex_unlock(&fs_info->chunk_mutex);
 	}
@@ -2112,7 +2144,7 @@
 			tmp_fs_devices = tmp_fs_devices->seed;
 		}
 		fs_devices->seed = NULL;
-		__btrfs_close_devices(fs_devices);
+		close_fs_devices(fs_devices);
 		free_fs_devices(fs_devices);
 	}
 }
@@ -2120,23 +2152,23 @@
 void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 				      struct btrfs_device *tgtdev)
 {
-	mutex_lock(&uuid_mutex);
-	WARN_ON(!tgtdev);
-	mutex_lock(&fs_info->fs_devices->device_list_mutex);
+	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
 
-	btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
+	WARN_ON(!tgtdev);
+	mutex_lock(&fs_devices->device_list_mutex);
+
+	btrfs_sysfs_rm_device_link(fs_devices, tgtdev);
 
 	if (tgtdev->bdev)
-		fs_info->fs_devices->open_devices--;
+		fs_devices->open_devices--;
 
-	fs_info->fs_devices->num_devices--;
+	fs_devices->num_devices--;
 
 	btrfs_assign_next_active_device(fs_info, tgtdev, NULL);
 
 	list_del_rcu(&tgtdev->dev_list);
 
-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-	mutex_unlock(&uuid_mutex);
+	mutex_unlock(&fs_devices->device_list_mutex);
 
 	/*
 	 * The update_dev_time() with in btrfs_scratch_superblocks()
@@ -2188,10 +2220,6 @@
 		struct btrfs_device *tmp;
 
 		devices = &fs_info->fs_devices->devices;
-		/*
-		 * It is safe to read the devices since the volume_mutex
-		 * is held by the caller.
-		 */
 		list_for_each_entry(tmp, devices, dev_list) {
 			if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
 					&tmp->dev_state) && !tmp->bdev) {
@@ -2259,7 +2287,7 @@
 		return PTR_ERR(old_devices);
 	}
 
-	list_add(&old_devices->list, &fs_uuids);
+	list_add(&old_devices->fs_list, &fs_uuids);
 
 	memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
 	seed_devices->opened = 1;
@@ -2570,7 +2598,7 @@
 	if (trans)
 		btrfs_end_transaction(trans);
 error_free_device:
-	free_device(device);
+	btrfs_free_device(device);
 error:
 	blkdev_put(bdev, FMODE_EXCL);
 	if (seeding_dev && !unlocked) {
@@ -2580,99 +2608,6 @@
 	return ret;
 }
 
-int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
-				  const char *device_path,
-				  struct btrfs_device *srcdev,
-				  struct btrfs_device **device_out)
-{
-	struct btrfs_device *device;
-	struct block_device *bdev;
-	struct list_head *devices;
-	struct rcu_string *name;
-	u64 devid = BTRFS_DEV_REPLACE_DEVID;
-	int ret = 0;
-
-	*device_out = NULL;
-	if (fs_info->fs_devices->seeding) {
-		btrfs_err(fs_info, "the filesystem is a seed filesystem!");
-		return -EINVAL;
-	}
-
-	bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
-				  fs_info->bdev_holder);
-	if (IS_ERR(bdev)) {
-		btrfs_err(fs_info, "target device %s is invalid!", device_path);
-		return PTR_ERR(bdev);
-	}
-
-	filemap_write_and_wait(bdev->bd_inode->i_mapping);
-
-	devices = &fs_info->fs_devices->devices;
-	list_for_each_entry(device, devices, dev_list) {
-		if (device->bdev == bdev) {
-			btrfs_err(fs_info,
-				  "target device is in the filesystem!");
-			ret = -EEXIST;
-			goto error;
-		}
-	}
-
-
-	if (i_size_read(bdev->bd_inode) <
-	    btrfs_device_get_total_bytes(srcdev)) {
-		btrfs_err(fs_info,
-			  "target device is smaller than source device!");
-		ret = -EINVAL;
-		goto error;
-	}
-
-
-	device = btrfs_alloc_device(NULL, &devid, NULL);
-	if (IS_ERR(device)) {
-		ret = PTR_ERR(device);
-		goto error;
-	}
-
-	name = rcu_string_strdup(device_path, GFP_KERNEL);
-	if (!name) {
-		free_device(device);
-		ret = -ENOMEM;
-		goto error;
-	}
-	rcu_assign_pointer(device->name, name);
-
-	mutex_lock(&fs_info->fs_devices->device_list_mutex);
-	set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
-	device->generation = 0;
-	device->io_width = fs_info->sectorsize;
-	device->io_align = fs_info->sectorsize;
-	device->sector_size = fs_info->sectorsize;
-	device->total_bytes = btrfs_device_get_total_bytes(srcdev);
-	device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
-	device->bytes_used = btrfs_device_get_bytes_used(srcdev);
-	device->commit_total_bytes = srcdev->commit_total_bytes;
-	device->commit_bytes_used = device->bytes_used;
-	device->fs_info = fs_info;
-	device->bdev = bdev;
-	set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
-	set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
-	device->mode = FMODE_EXCL;
-	device->dev_stats_valid = 1;
-	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
-	device->fs_devices = fs_info->fs_devices;
-	list_add(&device->dev_list, &fs_info->fs_devices->devices);
-	fs_info->fs_devices->num_devices++;
-	fs_info->fs_devices->open_devices++;
-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-
-	*device_out = device;
-	return ret;
-
-error:
-	blkdev_put(bdev, FMODE_EXCL);
-	return ret;
-}
-
 static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
 					struct btrfs_device *device)
 {
@@ -3273,24 +3208,12 @@
 }
 
 /*
- * Should be called with both balance and volume mutexes held to
- * serialize other volume operations (add_dev/rm_dev/resize) with
- * restriper.  Same goes for unset_balance_control.
+ * Clear the balance status in fs_info and delete the balance item from disk.
  */
-static void set_balance_control(struct btrfs_balance_control *bctl)
-{
-	struct btrfs_fs_info *fs_info = bctl->fs_info;
-
-	BUG_ON(fs_info->balance_ctl);
-
-	spin_lock(&fs_info->balance_lock);
-	fs_info->balance_ctl = bctl;
-	spin_unlock(&fs_info->balance_lock);
-}
-
-static void unset_balance_control(struct btrfs_fs_info *fs_info)
+static void reset_balance_state(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
+	int ret;
 
 	BUG_ON(!fs_info->balance_ctl);
 
@@ -3299,6 +3222,9 @@
 	spin_unlock(&fs_info->balance_lock);
 
 	kfree(bctl);
+	ret = del_balance_item(fs_info);
+	if (ret)
+		btrfs_handle_fs_error(fs_info, ret, NULL);
 }
 
 /*
@@ -3835,18 +3761,6 @@
 		 atomic_read(&fs_info->balance_cancel_req) == 0);
 }
 
-static void __cancel_balance(struct btrfs_fs_info *fs_info)
-{
-	int ret;
-
-	unset_balance_control(fs_info);
-	ret = del_balance_item(fs_info);
-	if (ret)
-		btrfs_handle_fs_error(fs_info, ret, NULL);
-
-	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
-}
-
 /* Non-zero return value signifies invalidity */
 static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
 		u64 allowed)
@@ -3857,12 +3771,12 @@
 }
 
 /*
- * Should be called with both balance and volume mutexes held
+ * Should be called with balance mutexe held
  */
-int btrfs_balance(struct btrfs_balance_control *bctl,
+int btrfs_balance(struct btrfs_fs_info *fs_info,
+		  struct btrfs_balance_control *bctl,
 		  struct btrfs_ioctl_balance_args *bargs)
 {
-	struct btrfs_fs_info *fs_info = bctl->fs_info;
 	u64 meta_target, data_target;
 	u64 allowed;
 	int mixed = 0;
@@ -3891,7 +3805,7 @@
 		    !(bctl->flags & BTRFS_BALANCE_METADATA) ||
 		    memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
 			btrfs_err(fs_info,
-				  "with mixed groups data and metadata balance options must be the same");
+	  "balance: mixed groups data and metadata options must be the same");
 			ret = -EINVAL;
 			goto out;
 		}
@@ -3913,23 +3827,29 @@
 		allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
 			    BTRFS_BLOCK_GROUP_RAID6);
 	if (validate_convert_profile(&bctl->data, allowed)) {
+		int index = btrfs_bg_flags_to_raid_index(bctl->data.target);
+
 		btrfs_err(fs_info,
-			  "unable to start balance with target data profile %llu",
-			  bctl->data.target);
+			  "balance: invalid convert data profile %s",
+			  get_raid_name(index));
 		ret = -EINVAL;
 		goto out;
 	}
 	if (validate_convert_profile(&bctl->meta, allowed)) {
+		int index = btrfs_bg_flags_to_raid_index(bctl->meta.target);
+
 		btrfs_err(fs_info,
-			  "unable to start balance with target metadata profile %llu",
-			  bctl->meta.target);
+			  "balance: invalid convert metadata profile %s",
+			  get_raid_name(index));
 		ret = -EINVAL;
 		goto out;
 	}
 	if (validate_convert_profile(&bctl->sys, allowed)) {
+		int index = btrfs_bg_flags_to_raid_index(bctl->sys.target);
+
 		btrfs_err(fs_info,
-			  "unable to start balance with target system profile %llu",
-			  bctl->sys.target);
+			  "balance: invalid convert system profile %s",
+			  get_raid_name(index));
 		ret = -EINVAL;
 		goto out;
 	}
@@ -3950,10 +3870,10 @@
 		     !(bctl->meta.target & allowed))) {
 			if (bctl->flags & BTRFS_BALANCE_FORCE) {
 				btrfs_info(fs_info,
-					   "force reducing metadata integrity");
+				"balance: force reducing metadata integrity");
 			} else {
 				btrfs_err(fs_info,
-					  "balance will reduce metadata integrity, use force if you want this");
+	"balance: reduces metadata integrity, use --force if you want this");
 				ret = -EINVAL;
 				goto out;
 			}
@@ -3967,9 +3887,12 @@
 		bctl->data.target : fs_info->avail_data_alloc_bits;
 	if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
 		btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
+		int meta_index = btrfs_bg_flags_to_raid_index(meta_target);
+		int data_index = btrfs_bg_flags_to_raid_index(data_target);
+
 		btrfs_warn(fs_info,
-			   "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
-			   meta_target, data_target);
+	"balance: metadata profile %s has lower redundancy than data profile %s",
+			   get_raid_name(meta_index), get_raid_name(data_index));
 	}
 
 	ret = insert_balance_item(fs_info, bctl);
@@ -3978,7 +3901,10 @@
 
 	if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
 		BUG_ON(ret == -EEXIST);
-		set_balance_control(bctl);
+		BUG_ON(fs_info->balance_ctl);
+		spin_lock(&fs_info->balance_lock);
+		fs_info->balance_ctl = bctl;
+		spin_unlock(&fs_info->balance_lock);
 	} else {
 		BUG_ON(ret != -EEXIST);
 		spin_lock(&fs_info->balance_lock);
@@ -3986,22 +3912,24 @@
 		spin_unlock(&fs_info->balance_lock);
 	}
 
-	atomic_inc(&fs_info->balance_running);
+	ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+	set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
 	mutex_unlock(&fs_info->balance_mutex);
 
 	ret = __btrfs_balance(fs_info);
 
 	mutex_lock(&fs_info->balance_mutex);
-	atomic_dec(&fs_info->balance_running);
+	clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
 
 	if (bargs) {
 		memset(bargs, 0, sizeof(*bargs));
-		update_ioctl_balance_args(fs_info, 0, bargs);
+		btrfs_update_ioctl_balance_args(fs_info, bargs);
 	}
 
 	if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
 	    balance_need_close(fs_info)) {
-		__cancel_balance(fs_info);
+		reset_balance_state(fs_info);
+		clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
 	}
 
 	wake_up(&fs_info->balance_wait_q);
@@ -4009,11 +3937,11 @@
 	return ret;
 out:
 	if (bctl->flags & BTRFS_BALANCE_RESUME)
-		__cancel_balance(fs_info);
-	else {
+		reset_balance_state(fs_info);
+	else
 		kfree(bctl);
-		clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
-	}
+	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+
 	return ret;
 }
 
@@ -4022,16 +3950,12 @@
 	struct btrfs_fs_info *fs_info = data;
 	int ret = 0;
 
-	mutex_lock(&fs_info->volume_mutex);
 	mutex_lock(&fs_info->balance_mutex);
-
 	if (fs_info->balance_ctl) {
-		btrfs_info(fs_info, "continuing balance");
-		ret = btrfs_balance(fs_info->balance_ctl, NULL);
+		btrfs_info(fs_info, "balance: resuming");
+		ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
 	}
-
 	mutex_unlock(&fs_info->balance_mutex);
-	mutex_unlock(&fs_info->volume_mutex);
 
 	return ret;
 }
@@ -4040,15 +3964,15 @@
 {
 	struct task_struct *tsk;
 
-	spin_lock(&fs_info->balance_lock);
+	mutex_lock(&fs_info->balance_mutex);
 	if (!fs_info->balance_ctl) {
-		spin_unlock(&fs_info->balance_lock);
+		mutex_unlock(&fs_info->balance_mutex);
 		return 0;
 	}
-	spin_unlock(&fs_info->balance_lock);
+	mutex_unlock(&fs_info->balance_mutex);
 
 	if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
-		btrfs_info(fs_info, "force skipping balance");
+		btrfs_info(fs_info, "balance: resume skipped");
 		return 0;
 	}
 
@@ -4100,7 +4024,6 @@
 	leaf = path->nodes[0];
 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
 
-	bctl->fs_info = fs_info;
 	bctl->flags = btrfs_balance_flags(leaf, item);
 	bctl->flags |= BTRFS_BALANCE_RESUME;
 
@@ -4111,15 +4034,26 @@
 	btrfs_balance_sys(leaf, item, &disk_bargs);
 	btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
 
-	WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
+	/*
+	 * This should never happen, as the paused balance state is recovered
+	 * during mount without any chance of other exclusive ops to collide.
+	 *
+	 * This gives the exclusive op status to balance and keeps in paused
+	 * state until user intervention (cancel or umount). If the ownership
+	 * cannot be assigned, show a message but do not fail. The balance
+	 * is in a paused state and must have fs_info::balance_ctl properly
+	 * set up.
+	 */
+	if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
+		btrfs_warn(fs_info,
+	"balance: cannot set exclusive op status, resume manually");
 
-	mutex_lock(&fs_info->volume_mutex);
 	mutex_lock(&fs_info->balance_mutex);
-
-	set_balance_control(bctl);
-
+	BUG_ON(fs_info->balance_ctl);
+	spin_lock(&fs_info->balance_lock);
+	fs_info->balance_ctl = bctl;
+	spin_unlock(&fs_info->balance_lock);
 	mutex_unlock(&fs_info->balance_mutex);
-	mutex_unlock(&fs_info->volume_mutex);
 out:
 	btrfs_free_path(path);
 	return ret;
@@ -4135,16 +4069,16 @@
 		return -ENOTCONN;
 	}
 
-	if (atomic_read(&fs_info->balance_running)) {
+	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
 		atomic_inc(&fs_info->balance_pause_req);
 		mutex_unlock(&fs_info->balance_mutex);
 
 		wait_event(fs_info->balance_wait_q,
-			   atomic_read(&fs_info->balance_running) == 0);
+			   !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
 
 		mutex_lock(&fs_info->balance_mutex);
 		/* we are good with balance_ctl ripped off from under us */
-		BUG_ON(atomic_read(&fs_info->balance_running));
+		BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
 		atomic_dec(&fs_info->balance_pause_req);
 	} else {
 		ret = -ENOTCONN;
@@ -4156,38 +4090,49 @@
 
 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
 {
-	if (sb_rdonly(fs_info->sb))
-		return -EROFS;
-
 	mutex_lock(&fs_info->balance_mutex);
 	if (!fs_info->balance_ctl) {
 		mutex_unlock(&fs_info->balance_mutex);
 		return -ENOTCONN;
 	}
 
+	/*
+	 * A paused balance with the item stored on disk can be resumed at
+	 * mount time if the mount is read-write. Otherwise it's still paused
+	 * and we must not allow cancelling as it deletes the item.
+	 */
+	if (sb_rdonly(fs_info->sb)) {
+		mutex_unlock(&fs_info->balance_mutex);
+		return -EROFS;
+	}
+
 	atomic_inc(&fs_info->balance_cancel_req);
 	/*
 	 * if we are running just wait and return, balance item is
 	 * deleted in btrfs_balance in this case
 	 */
-	if (atomic_read(&fs_info->balance_running)) {
+	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
 		mutex_unlock(&fs_info->balance_mutex);
 		wait_event(fs_info->balance_wait_q,
-			   atomic_read(&fs_info->balance_running) == 0);
+			   !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
 		mutex_lock(&fs_info->balance_mutex);
 	} else {
-		/* __cancel_balance needs volume_mutex */
 		mutex_unlock(&fs_info->balance_mutex);
-		mutex_lock(&fs_info->volume_mutex);
+		/*
+		 * Lock released to allow other waiters to continue, we'll
+		 * reexamine the status again.
+		 */
 		mutex_lock(&fs_info->balance_mutex);
 
-		if (fs_info->balance_ctl)
-			__cancel_balance(fs_info);
-
-		mutex_unlock(&fs_info->volume_mutex);
+		if (fs_info->balance_ctl) {
+			reset_balance_state(fs_info);
+			clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
+			btrfs_info(fs_info, "balance: canceled");
+		}
 	}
 
-	BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
+	BUG_ON(fs_info->balance_ctl ||
+		test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
 	atomic_dec(&fs_info->balance_cancel_req);
 	mutex_unlock(&fs_info->balance_mutex);
 	return 0;
@@ -4264,8 +4209,7 @@
 		}
 update_tree:
 		if (!btrfs_is_empty_uuid(root_item.uuid)) {
-			ret = btrfs_uuid_tree_add(trans, fs_info,
-						  root_item.uuid,
+			ret = btrfs_uuid_tree_add(trans, root_item.uuid,
 						  BTRFS_UUID_KEY_SUBVOL,
 						  key.objectid);
 			if (ret < 0) {
@@ -4276,7 +4220,7 @@
 		}
 
 		if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
-			ret = btrfs_uuid_tree_add(trans, fs_info,
+			ret = btrfs_uuid_tree_add(trans,
 						  root_item.received_uuid,
 						 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
 						  key.objectid);
@@ -4482,7 +4426,7 @@
 	if (!path)
 		return -ENOMEM;
 
-	path->reada = READA_FORWARD;
+	path->reada = READA_BACK;
 
 	mutex_lock(&fs_info->chunk_mutex);
 
@@ -6043,9 +5987,8 @@
 	return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
 }
 
-int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
-		     u64 chunk_start, u64 physical, u64 devid,
-		     u64 **logical, int *naddrs, int *stripe_len)
+int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
+		     u64 physical, u64 **logical, int *naddrs, int *stripe_len)
 {
 	struct extent_map *em;
 	struct map_lookup *map;
@@ -6077,8 +6020,6 @@
 	BUG_ON(!buf); /* -ENOMEM */
 
 	for (i = 0; i < map->num_stripes; i++) {
-		if (devid && map->stripes[i].dev->devid != devid)
-			continue;
 		if (map->stripes[i].physical > physical ||
 		    map->stripes[i].physical + length <= physical)
 			continue;
@@ -6410,7 +6351,7 @@
  *
  * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
  * on error.  Returned struct is not linked onto any lists and must be
- * destroyed with free_device.
+ * destroyed with btrfs_free_device.
  */
 struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 					const u64 *devid,
@@ -6433,7 +6374,7 @@
 
 		ret = find_next_devid(fs_info, &tmp);
 		if (ret) {
-			free_device(dev);
+			btrfs_free_device(dev);
 			return ERR_PTR(ret);
 		}
 	}
@@ -6684,8 +6625,7 @@
 	if (IS_ERR(fs_devices))
 		return fs_devices;
 
-	ret = __btrfs_open_devices(fs_devices, FMODE_READ,
-				   fs_info->bdev_holder);
+	ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
 	if (ret) {
 		free_fs_devices(fs_devices);
 		fs_devices = ERR_PTR(ret);
@@ -6693,7 +6633,7 @@
 	}
 
 	if (!fs_devices->seeding) {
-		__btrfs_close_devices(fs_devices);
+		close_fs_devices(fs_devices);
 		free_fs_devices(fs_devices);
 		fs_devices = ERR_PTR(-EINVAL);
 		goto out;
@@ -7002,6 +6942,10 @@
 	if (!path)
 		return -ENOMEM;
 
+	/*
+	 * uuid_mutex is needed only if we are mounting a sprout FS
+	 * otherwise we don't need it.
+	 */
 	mutex_lock(&uuid_mutex);
 	mutex_lock(&fs_info->chunk_mutex);
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 7909688..5139ec8 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -208,6 +208,7 @@
 
 struct btrfs_fs_devices {
 	u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+	struct list_head fs_list;
 
 	u64 num_devices;
 	u64 open_devices;
@@ -229,7 +230,6 @@
 	struct list_head resized_devices;
 	/* devices not currently being allocated */
 	struct list_head alloc_list;
-	struct list_head list;
 
 	struct btrfs_fs_devices *seed;
 	int seeding;
@@ -329,11 +329,12 @@
 	int tolerated_failures; /* max tolerated fail devs */
 	int devs_increment;	/* ndevs has to be a multiple of this */
 	int ncopies;		/* how many copies to data has */
+	int mindev_error;	/* error code if min devs requisite is unmet */
+	const char raid_name[8]; /* name of the raid */
+	u64 bg_flag;		/* block group flag of the raid */
 };
 
 extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES];
-extern const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES];
-extern const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES];
 
 struct map_lookup {
 	u64 type;
@@ -351,8 +352,6 @@
 struct btrfs_balance_args;
 struct btrfs_balance_progress;
 struct btrfs_balance_control {
-	struct btrfs_fs_info *fs_info;
-
 	struct btrfs_balance_args data;
 	struct btrfs_balance_args meta;
 	struct btrfs_balance_args sys;
@@ -393,9 +392,8 @@
 int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 		     u64 logical, u64 *length,
 		     struct btrfs_bio **bbio_ret);
-int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
-		     u64 chunk_start, u64 physical, u64 devid,
-		     u64 **logical, int *naddrs, int *stripe_len);
+int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
+		     u64 physical, u64 **logical, int *naddrs, int *stripe_len);
 int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
@@ -421,6 +419,7 @@
 struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
 					const u64 *devid,
 					const u8 *uuid);
+void btrfs_free_device(struct btrfs_device *device);
 int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 		    const char *device_path, u64 devid);
 void __exit btrfs_cleanup_fs_uuids(void);
@@ -431,11 +430,8 @@
 				       u8 *uuid, u8 *fsid);
 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
 int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
-int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
-				  const char *device_path,
-				  struct btrfs_device *srcdev,
-				  struct btrfs_device **device_out);
-int btrfs_balance(struct btrfs_balance_control *bctl,
+int btrfs_balance(struct btrfs_fs_info *fs_info,
+		  struct btrfs_balance_control *bctl,
 		  struct btrfs_ioctl_balance_args *bargs);
 int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
 int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
@@ -553,6 +549,8 @@
 	return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
 }
 
+const char *get_raid_name(enum btrfs_raid_types type);
+
 void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info);
 void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans);
 
diff --git a/fs/buffer.c b/fs/buffer.c
index 249b83f..cabc045 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3427,120 +3427,6 @@
 }
 EXPORT_SYMBOL(bh_submit_read);
 
-/*
- * Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
- *
- * Returns the offset within the file on success, and -ENOENT otherwise.
- */
-static loff_t
-page_seek_hole_data(struct page *page, loff_t lastoff, int whence)
-{
-	loff_t offset = page_offset(page);
-	struct buffer_head *bh, *head;
-	bool seek_data = whence == SEEK_DATA;
-
-	if (lastoff < offset)
-		lastoff = offset;
-
-	bh = head = page_buffers(page);
-	do {
-		offset += bh->b_size;
-		if (lastoff >= offset)
-			continue;
-
-		/*
-		 * Unwritten extents that have data in the page cache covering
-		 * them can be identified by the BH_Unwritten state flag.
-		 * Pages with multiple buffers might have a mix of holes, data
-		 * and unwritten extents - any buffer with valid data in it
-		 * should have BH_Uptodate flag set on it.
-		 */
-
-		if ((buffer_unwritten(bh) || buffer_uptodate(bh)) == seek_data)
-			return lastoff;
-
-		lastoff = offset;
-	} while ((bh = bh->b_this_page) != head);
-	return -ENOENT;
-}
-
-/*
- * Seek for SEEK_DATA / SEEK_HOLE in the page cache.
- *
- * Within unwritten extents, the page cache determines which parts are holes
- * and which are data: unwritten and uptodate buffer heads count as data;
- * everything else counts as a hole.
- *
- * Returns the resulting offset on successs, and -ENOENT otherwise.
- */
-loff_t
-page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
-			  int whence)
-{
-	pgoff_t index = offset >> PAGE_SHIFT;
-	pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
-	loff_t lastoff = offset;
-	struct pagevec pvec;
-
-	if (length <= 0)
-		return -ENOENT;
-
-	pagevec_init(&pvec);
-
-	do {
-		unsigned nr_pages, i;
-
-		nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, &index,
-						end - 1);
-		if (nr_pages == 0)
-			break;
-
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-
-			/*
-			 * At this point, the page may be truncated or
-			 * invalidated (changing page->mapping to NULL), or
-			 * even swizzled back from swapper_space to tmpfs file
-			 * mapping.  However, page->index will not change
-			 * because we have a reference on the page.
-                         *
-			 * If current page offset is beyond where we've ended,
-			 * we've found a hole.
-                         */
-			if (whence == SEEK_HOLE &&
-			    lastoff < page_offset(page))
-				goto check_range;
-
-			lock_page(page);
-			if (likely(page->mapping == inode->i_mapping) &&
-			    page_has_buffers(page)) {
-				lastoff = page_seek_hole_data(page, lastoff, whence);
-				if (lastoff >= 0) {
-					unlock_page(page);
-					goto check_range;
-				}
-			}
-			unlock_page(page);
-			lastoff = page_offset(page) + PAGE_SIZE;
-		}
-		pagevec_release(&pvec);
-	} while (index < end);
-
-	/* When no page at lastoff and we are not done, we found a hole. */
-	if (whence != SEEK_HOLE)
-		goto not_found;
-
-check_range:
-	if (lastoff < offset + length)
-		goto out;
-not_found:
-	lastoff = -ENOENT;
-out:
-	pagevec_release(&pvec);
-	return lastoff;
-}
-
 void __init buffer_init(void)
 {
 	unsigned long nrpages;
diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c
index 125b90f..0ce1aa5 100644
--- a/fs/cachefiles/proc.c
+++ b/fs/cachefiles/proc.c
@@ -85,21 +85,6 @@
 };
 
 /*
- * open "/proc/fs/cachefiles/XXX" which provide statistics summaries
- */
-static int cachefiles_histogram_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &cachefiles_histogram_ops);
-}
-
-static const struct file_operations cachefiles_histogram_fops = {
-	.open		= cachefiles_histogram_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-/*
  * initialise the /proc/fs/cachefiles/ directory
  */
 int __init cachefiles_proc_init(void)
@@ -109,8 +94,8 @@
 	if (!proc_mkdir("fs/cachefiles", NULL))
 		goto error_dir;
 
-	if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL,
-			 &cachefiles_histogram_fops))
+	if (!proc_create_seq("fs/cachefiles/histogram", S_IFREG | 0444, NULL,
+			 &cachefiles_histogram_ops))
 		goto error_histogram;
 
 	_leave(" = 0");
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 7e4a1e2..8581799 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -1,11 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
 #
-# Makefile for Linux CIFS VFS client 
+# Makefile for Linux CIFS/SMB2/SMB3 VFS client
 #
+ccflags-y += -I$(src)		# needed for trace events
 obj-$(CONFIG_CIFS) += cifs.o
 
-cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
-	  link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
+cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \
+	  inode.o link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
 	  cifs_unicode.o nterr.o cifsencrypt.o \
 	  readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o \
 	  smb2ops.o smb2maperror.o smb2transport.o \
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 9d69ea4..1161460 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -42,7 +42,7 @@
 		       data, length, true);
 }
 
-void cifs_dump_detail(void *buf)
+void cifs_dump_detail(void *buf, struct TCP_Server_Info *server)
 {
 #ifdef CONFIG_CIFS_DEBUG2
 	struct smb_hdr *smb = (struct smb_hdr *)buf;
@@ -50,7 +50,8 @@
 	cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d\n",
 		 smb->Command, smb->Status.CifsError,
 		 smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
-	cifs_dbg(VFS, "smb buf %p len %u\n", smb, smbCalcSize(smb));
+	cifs_dbg(VFS, "smb buf %p len %u\n", smb,
+		 server->ops->calc_smb_size(smb, server));
 #endif /* CONFIG_CIFS_DEBUG2 */
 }
 
@@ -83,7 +84,7 @@
 		cifs_dbg(VFS, "IsMult: %d IsEnd: %d\n",
 			 mid_entry->multiRsp, mid_entry->multiEnd);
 		if (mid_entry->resp_buf) {
-			cifs_dump_detail(mid_entry->resp_buf);
+			cifs_dump_detail(mid_entry->resp_buf, server);
 			cifs_dump_mem("existing buf: ",
 				mid_entry->resp_buf, 62);
 		}
@@ -113,6 +114,8 @@
 		seq_printf(m, " type: %d ", dev_type);
 	if (tcon->seal)
 		seq_printf(m, " Encrypted");
+	if (tcon->nocase)
+		seq_printf(m, " nocase");
 	if (tcon->unix_ext)
 		seq_printf(m, " POSIX Extensions");
 	if (tcon->ses->server->ops->dump_share_caps)
@@ -237,6 +240,10 @@
 			server->credits,  server->dialect);
 		if (server->sign)
 			seq_printf(m, " signed");
+#ifdef CONFIG_CIFS_SMB311
+		if (server->posix_ext_supported)
+			seq_printf(m, " posix");
+#endif /* 3.1.1 */
 		i++;
 		list_for_each(tmp2, &server->smb_ses_list) {
 			ses = list_entry(tmp2, struct cifs_ses,
@@ -314,18 +321,6 @@
 	return 0;
 }
 
-static int cifs_debug_data_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, cifs_debug_data_proc_show, NULL);
-}
-
-static const struct file_operations cifs_debug_data_proc_fops = {
-	.open		= cifs_debug_data_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 #ifdef CONFIG_CIFS_STATS
 static ssize_t cifs_stats_proc_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *ppos)
@@ -497,35 +492,36 @@
 	if (proc_fs_cifs == NULL)
 		return;
 
-	proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops);
+	proc_create_single("DebugData", 0, proc_fs_cifs,
+			cifs_debug_data_proc_show);
 
 #ifdef CONFIG_CIFS_STATS
-	proc_create("Stats", 0, proc_fs_cifs, &cifs_stats_proc_fops);
+	proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_fops);
 #endif /* STATS */
-	proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
-	proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
-	proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
+	proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_fops);
+	proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_fops);
+	proc_create("LinuxExtensionsEnabled", 0644, proc_fs_cifs,
 		    &cifs_linux_ext_proc_fops);
-	proc_create("SecurityFlags", 0, proc_fs_cifs,
+	proc_create("SecurityFlags", 0644, proc_fs_cifs,
 		    &cifs_security_flags_proc_fops);
-	proc_create("LookupCacheEnabled", 0, proc_fs_cifs,
+	proc_create("LookupCacheEnabled", 0644, proc_fs_cifs,
 		    &cifs_lookup_cache_proc_fops);
 #ifdef CONFIG_CIFS_SMB_DIRECT
-	proc_create("rdma_readwrite_threshold", 0, proc_fs_cifs,
+	proc_create("rdma_readwrite_threshold", 0644, proc_fs_cifs,
 		&cifs_rdma_readwrite_threshold_proc_fops);
-	proc_create("smbd_max_frmr_depth", 0, proc_fs_cifs,
+	proc_create("smbd_max_frmr_depth", 0644, proc_fs_cifs,
 		&cifs_smbd_max_frmr_depth_proc_fops);
-	proc_create("smbd_keep_alive_interval", 0, proc_fs_cifs,
+	proc_create("smbd_keep_alive_interval", 0644, proc_fs_cifs,
 		&cifs_smbd_keep_alive_interval_proc_fops);
-	proc_create("smbd_max_receive_size", 0, proc_fs_cifs,
+	proc_create("smbd_max_receive_size", 0644, proc_fs_cifs,
 		&cifs_smbd_max_receive_size_proc_fops);
-	proc_create("smbd_max_fragmented_recv_size", 0, proc_fs_cifs,
+	proc_create("smbd_max_fragmented_recv_size", 0644, proc_fs_cifs,
 		&cifs_smbd_max_fragmented_recv_size_proc_fops);
-	proc_create("smbd_max_send_size", 0, proc_fs_cifs,
+	proc_create("smbd_max_send_size", 0644, proc_fs_cifs,
 		&cifs_smbd_max_send_size_proc_fops);
-	proc_create("smbd_send_credit_target", 0, proc_fs_cifs,
+	proc_create("smbd_send_credit_target", 0644, proc_fs_cifs,
 		&cifs_smbd_send_credit_target_proc_fops);
-	proc_create("smbd_receive_credit_max", 0, proc_fs_cifs,
+	proc_create("smbd_receive_credit_max", 0644, proc_fs_cifs,
 		&cifs_smbd_receive_credit_max_proc_fops);
 #endif
 }
@@ -583,6 +579,8 @@
 		cifsFYI = bv;
 	else if ((c[0] > '1') && (c[0] <= '9'))
 		cifsFYI = (int) (c[0] - '0'); /* see cifs_debug.h for meanings */
+	else
+		return -EINVAL;
 
 	return count;
 }
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 0e74690..f4f3f08 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -23,7 +23,7 @@
 #define _H_CIFS_DEBUG
 
 void cifs_dump_mem(char *label, void *data, int length);
-void cifs_dump_detail(void *);
+void cifs_dump_detail(void *buf, struct TCP_Server_Info *ptcp_info);
 void cifs_dump_mids(struct TCP_Server_Info *);
 extern bool traceSMB;		/* flag which enables the function below */
 void dump_smb(void *, int);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 350fa55..9731d0d 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -50,6 +50,7 @@
 					      * root mountable
 					      */
 #define CIFS_MOUNT_UID_FROM_ACL 0x2000000 /* try to get UID via special SID */
+#define CIFS_MOUNT_NO_HANDLE_CACHE 0x4000000 /* disable caching dir handles */
 
 struct cifs_sb_info {
 	struct rb_root tlink_tree;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5a5a015..eb7b657 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -58,13 +58,15 @@
 bool enable_oplocks = true;
 bool linuxExtEnabled = true;
 bool lookupCacheEnabled = true;
+bool disable_legacy_dialects; /* false by default */
 unsigned int global_secflags = CIFSSEC_DEF;
 /* unsigned int ntlmv2_support = 0; */
 unsigned int sign_CIFS_PDUs = 1;
 static const struct super_operations cifs_super_ops;
 unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
 module_param(CIFSMaxBufSize, uint, 0444);
-MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header). "
+MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header) "
+				 "for CIFS requests. "
 				 "Default: 16384 Range: 8192 to 130048");
 unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL;
 module_param(cifs_min_rcv, uint, 0444);
@@ -76,11 +78,21 @@
 				 "Range: 2 to 256");
 unsigned int cifs_max_pending = CIFS_MAX_REQ;
 module_param(cifs_max_pending, uint, 0444);
-MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. "
+MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
+				   "CIFS/SMB1 dialect (N/A for SMB3) "
 				   "Default: 32767 Range: 2 to 32767.");
 module_param(enable_oplocks, bool, 0644);
 MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1");
 
+module_param(disable_legacy_dialects, bool, 0644);
+MODULE_PARM_DESC(disable_legacy_dialects, "To improve security it may be "
+				  "helpful to restrict the ability to "
+				  "override the default dialects (SMB2.1, "
+				  "SMB3 and SMB3.02) on mount with old "
+				  "dialects (CIFS/SMB1 and SMB2) since "
+				  "vers=1.0 (CIFS/SMB1) and vers=2.0 are weaker"
+				  " and less secure. Default: n/N/0");
+
 extern mempool_t *cifs_sm_req_poolp;
 extern mempool_t *cifs_req_poolp;
 extern mempool_t *cifs_mid_poolp;
@@ -469,10 +481,20 @@
 		seq_puts(s, ",persistenthandles");
 	else if (tcon->use_resilient)
 		seq_puts(s, ",resilienthandles");
+
+#ifdef CONFIG_CIFS_SMB311
+	if (tcon->posix_extensions)
+		seq_puts(s, ",posix");
+	else if (tcon->unix_ext)
+		seq_puts(s, ",unix");
+	else
+		seq_puts(s, ",nounix");
+#else
 	if (tcon->unix_ext)
 		seq_puts(s, ",unix");
 	else
 		seq_puts(s, ",nounix");
+#endif /* SMB311 */
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
 		seq_puts(s, ",posixpaths");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
@@ -495,6 +517,8 @@
 		seq_puts(s, ",sfu");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
 		seq_puts(s, ",nobrl");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_HANDLE_CACHE)
+		seq_puts(s, ",nohandlecache");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
 		seq_puts(s, ",cifsacl");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
@@ -897,6 +921,17 @@
 	/*  .fs_flags */
 };
 MODULE_ALIAS_FS("cifs");
+
+static struct file_system_type smb3_fs_type = {
+	.owner = THIS_MODULE,
+	.name = "smb3",
+	.mount = cifs_do_mount,
+	.kill_sb = cifs_kill_sb,
+	/*  .fs_flags */
+};
+MODULE_ALIAS_FS("smb3");
+MODULE_ALIAS("smb3");
+
 const struct inode_operations cifs_dir_inode_ops = {
 	.create = cifs_create,
 	.atomic_open = cifs_atomic_open,
@@ -1435,6 +1470,12 @@
 	if (rc)
 		goto out_init_cifs_idmap;
 
+	rc = register_filesystem(&smb3_fs_type);
+	if (rc) {
+		unregister_filesystem(&cifs_fs_type);
+		goto out_init_cifs_idmap;
+	}
+
 	return 0;
 
 out_init_cifs_idmap:
@@ -1465,8 +1506,9 @@
 static void __exit
 exit_cifs(void)
 {
-	cifs_dbg(NOISY, "exit_cifs\n");
+	cifs_dbg(NOISY, "exit_smb3\n");
 	unregister_filesystem(&cifs_fs_type);
+	unregister_filesystem(&smb3_fs_type);
 	cifs_dfs_release_automount_timer();
 #ifdef CONFIG_CIFS_ACL
 	exit_cifs_idmap();
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 013ba2a..5f02318 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -149,5 +149,5 @@
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.11"
+#define CIFS_VERSION   "2.12"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index cb950a5..08d1cdd 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -176,6 +176,7 @@
 	struct kvec	*rq_iov;	/* array of kvecs */
 	unsigned int	rq_nvec;	/* number of kvecs in array */
 	struct page	**rq_pages;	/* pointer to array of page ptrs */
+	unsigned int	rq_offset;	/* the offset to the 1st page */
 	unsigned int	rq_npages;	/* number pages in array */
 	unsigned int	rq_pagesz;	/* page size to use */
 	unsigned int	rq_tailsz;	/* length of last page */
@@ -244,7 +245,7 @@
 	int (*map_error)(char *, bool);
 	/* find mid corresponding to the response message */
 	struct mid_q_entry * (*find_mid)(struct TCP_Server_Info *, char *);
-	void (*dump_detail)(void *);
+	void (*dump_detail)(void *buf, struct TCP_Server_Info *ptcp_info);
 	void (*clear_stats)(struct cifs_tcon *);
 	void (*print_stats)(struct seq_file *m, struct cifs_tcon *);
 	void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *);
@@ -372,7 +373,7 @@
 	int (*close_dir)(const unsigned int, struct cifs_tcon *,
 			 struct cifs_fid *);
 	/* calculate a size of SMB message */
-	unsigned int (*calc_smb_size)(void *);
+	unsigned int (*calc_smb_size)(void *buf, struct TCP_Server_Info *ptcpi);
 	/* check for STATUS_PENDING and process it in a positive case */
 	bool (*is_status_pending)(char *, struct TCP_Server_Info *, int);
 	/* check for STATUS_NETWORK_SESSION_EXPIRED */
@@ -417,7 +418,7 @@
 	/* create lease context buffer for CREATE request */
 	char * (*create_lease_buf)(u8 *, u8);
 	/* parse lease context buffer and return oplock/epoch info */
-	__u8 (*parse_lease_buf)(void *, unsigned int *);
+	__u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey);
 	ssize_t (*copychunk_range)(const unsigned int,
 			struct cifsFileInfo *src_file,
 			struct cifsFileInfo *target_file,
@@ -457,7 +458,7 @@
 				 struct mid_q_entry **);
 	enum securityEnum (*select_sectype)(struct TCP_Server_Info *,
 			    enum securityEnum);
-
+	int (*next_header)(char *);
 };
 
 struct smb_version_values {
@@ -521,10 +522,12 @@
 	bool sfu_remap:1;  /* remap seven reserved chars ala SFU */
 	bool posix_paths:1; /* unset to not ask for posix pathnames. */
 	bool no_linux_ext:1;
+	bool linux_ext:1;
 	bool sfu_emul:1;
 	bool nullauth:1;   /* attempt to authenticate with null user */
 	bool nocase:1;     /* request case insensitive filenames */
 	bool nobrl:1;      /* disable sending byte range locks to srv */
+	bool nohandlecache:1; /* disable caching dir handles if srvr probs */
 	bool mand_lock:1;  /* send mandatory not posix byte range lock reqs */
 	bool seal:1;       /* request transport encryption on share */
 	bool nodfs:1;      /* Do not request DFS, even if available */
@@ -630,7 +633,7 @@
 	bool oplocks:1; /* enable oplocks */
 	unsigned int maxReq;	/* Clients should submit no more */
 	/* than maxReq distinct unanswered SMBs to the server when using  */
-	/* multiplexed reads or writes */
+	/* multiplexed reads or writes (for SMB1/CIFS only, not SMB2/SMB3) */
 	unsigned int maxBuf;	/* maxBuf specifies the maximum */
 	/* message size the server can send or receive for non-raw SMBs */
 	/* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */
@@ -681,6 +684,7 @@
 	__le16	cipher_type;
 	 /* save initital negprot hash */
 	__u8	preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
+	bool	posix_ext_supported;
 #endif /* 3.1.1 */
 	struct delayed_work reconnect; /* reconnect workqueue job */
 	struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
@@ -953,9 +957,13 @@
 	bool print:1; /* set if connection to printer share */
 	bool retry:1;
 	bool nocase:1;
+	bool nohandlecache:1; /* if strange server resource prob can turn off */
 	bool seal:1;      /* transport encryption for this mounted share */
 	bool unix_ext:1;  /* if false disable Linux extensions to CIFS protocol
 				for this mount even if server would support */
+#ifdef CONFIG_CIFS_SMB311
+	bool posix_extensions; /* if true SMB3.11 posix extensions enabled */
+#endif /* CIFS_311 */
 	bool local_lease:1; /* check leases (only) on local system not remote */
 	bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */
 	bool broken_sparse_sup; /* if server or share does not support sparse */
@@ -979,6 +987,9 @@
 	struct fscache_cookie *fscache;	/* cookie for share */
 #endif
 	struct list_head pending_opens;	/* list of incomplete opens */
+	bool valid_root_fid:1;	/* Do we have a useable root fid */
+	struct mutex prfid_mutex; /* prevents reopen race after dead ses*/
+	struct cifs_fid *prfid;	/* handle to the directory at top of share */
 	/* BB add field for back pointer to sb struct(s)? */
 };
 
@@ -1071,6 +1082,7 @@
 	int create_options;
 	const char *path;
 	struct cifs_fid *fid;
+	umode_t mode;
 	bool reconnect:1;
 };
 
@@ -1169,10 +1181,11 @@
 	struct smbd_mr			*mr;
 #endif
 	unsigned int			pagesz;
+	unsigned int			page_offset;
 	unsigned int			tailsz;
 	unsigned int			credits;
 	unsigned int			nr_pages;
-	struct page			*pages[];
+	struct page			**pages;
 };
 
 struct cifs_writedata;
@@ -1194,10 +1207,11 @@
 	struct smbd_mr			*mr;
 #endif
 	unsigned int			pagesz;
+	unsigned int			page_offset;
 	unsigned int			tailsz;
 	unsigned int			credits;
 	unsigned int			nr_pages;
-	struct page			*pages[];
+	struct page			**pages;
 };
 
 /*
@@ -1692,16 +1706,17 @@
 GLOBAL_EXTERN atomic_t midCount;
 
 /* Misc globals */
-GLOBAL_EXTERN bool enable_oplocks; /* enable or disable oplocks */
-GLOBAL_EXTERN bool lookupCacheEnabled;
-GLOBAL_EXTERN unsigned int global_secflags;	/* if on, session setup sent
+extern bool enable_oplocks; /* enable or disable oplocks */
+extern bool lookupCacheEnabled;
+extern unsigned int global_secflags;	/* if on, session setup sent
 				with more secure ntlmssp2 challenge/resp */
-GLOBAL_EXTERN unsigned int sign_CIFS_PDUs;  /* enable smb packet signing */
-GLOBAL_EXTERN bool linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
-GLOBAL_EXTERN unsigned int CIFSMaxBufSize;  /* max size not including hdr */
-GLOBAL_EXTERN unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
-GLOBAL_EXTERN unsigned int cifs_min_small;  /* min size of small buf pool */
-GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
+extern unsigned int sign_CIFS_PDUs;  /* enable smb packet signing */
+extern bool linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
+extern unsigned int CIFSMaxBufSize;  /* max size not including hdr */
+extern unsigned int cifs_min_rcv;    /* min size of big ntwrk buf pool */
+extern unsigned int cifs_min_small;  /* min size of small buf pool */
+extern unsigned int cifs_max_pending; /* MAX requests at once to server*/
+extern bool disable_legacy_dialects;  /* forbid vers=1.0 and vers=2.0 mounts */
 
 #ifdef CONFIG_CIFS_ACL
 GLOBAL_EXTERN struct rb_root uidtree;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 365a414..7933c5f 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -21,6 +21,7 @@
 #ifndef _CIFSPROTO_H
 #define _CIFSPROTO_H
 #include <linux/nls.h>
+#include "trace.h"
 
 struct statfs;
 struct smb_vol;
@@ -47,6 +48,7 @@
 	cifs_dbg(FYI, "CIFS VFS: in %s as Xid: %u with uid: %d\n",	\
 		 __func__, __xid,					\
 		 from_kuid(&init_user_ns, current_fsuid()));		\
+	trace_smb3_enter(__xid, __func__);			\
 	__xid;							\
 })
 
@@ -54,7 +56,11 @@
 do {								\
 	_free_xid(curr_xid);					\
 	cifs_dbg(FYI, "CIFS VFS: leaving %s (xid = %u) rc = %d\n",	\
-		 __func__, curr_xid, (int)rc);				\
+		 __func__, curr_xid, (int)rc);			\
+	if (rc)							\
+		trace_smb3_exit_err(curr_xid, __func__, (int)rc);	\
+	else							\
+		trace_smb3_exit_done(curr_xid, __func__);	\
 } while (0)
 extern int init_cifs_idmap(void);
 extern void exit_cifs_idmap(void);
@@ -124,7 +130,7 @@
 			    unsigned int bytes_written);
 extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
 extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
-extern unsigned int smbCalcSize(void *buf);
+extern unsigned int smbCalcSize(void *buf, struct TCP_Server_Info *server);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
 			struct TCP_Server_Info *server);
 extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
@@ -197,7 +203,9 @@
 extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
 			         unsigned int to_read);
 extern int cifs_read_page_from_socket(struct TCP_Server_Info *server,
-				      struct page *page, unsigned int to_read);
+					struct page *page,
+					unsigned int page_offset,
+					unsigned int to_read);
 extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 			       struct cifs_sb_info *cifs_sb);
 extern int cifs_match_super(struct super_block *, void *);
@@ -525,6 +533,8 @@
 void cifs_writev_complete(struct work_struct *work);
 struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages,
 						work_func_t complete);
+struct cifs_writedata *cifs_writedata_direct_alloc(struct page **pages,
+						work_func_t complete);
 void cifs_writedata_release(struct kref *refcount);
 int cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
 			  struct cifs_sb_info *cifs_sb,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 1529a08..5aca336 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -106,6 +106,12 @@
 		open_file->oplock_break_cancelled = true;
 	}
 	spin_unlock(&tcon->open_file_lock);
+
+	mutex_lock(&tcon->prfid_mutex);
+	tcon->valid_root_fid = false;
+	memset(tcon->prfid, 0, sizeof(struct cifs_fid));
+	mutex_unlock(&tcon->prfid_mutex);
+
 	/*
 	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
 	 * to this tcon.
@@ -1946,6 +1952,7 @@
 	if (wdata->cfile)
 		cifsFileInfo_put(wdata->cfile);
 
+	kvfree(wdata->pages);
 	kfree(wdata);
 }
 
@@ -2069,12 +2076,22 @@
 struct cifs_writedata *
 cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
 {
+	struct page **pages =
+		kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+	if (pages)
+		return cifs_writedata_direct_alloc(pages, complete);
+
+	return NULL;
+}
+
+struct cifs_writedata *
+cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
+{
 	struct cifs_writedata *wdata;
 
-	/* writedata + number of page pointers */
-	wdata = kzalloc(sizeof(*wdata) +
-			sizeof(struct page *) * nr_pages, GFP_NOFS);
+	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
 	if (wdata != NULL) {
+		wdata->pages = pages;
 		kref_init(&wdata->refcount);
 		INIT_LIST_HEAD(&wdata->list);
 		init_completion(&wdata->done);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7a10a5d..e5a2fe7 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -61,6 +61,7 @@
 #define RFC1001_PORT 139
 
 extern mempool_t *cifs_req_poolp;
+extern bool disable_legacy_dialects;
 
 /* FIXME: should these be tunable? */
 #define TLINK_ERROR_EXPIRE	(1 * HZ)
@@ -76,9 +77,10 @@
 	Opt_mapposix, Opt_nomapposix,
 	Opt_mapchars, Opt_nomapchars, Opt_sfu,
 	Opt_nosfu, Opt_nodfs, Opt_posixpaths,
-	Opt_noposixpaths, Opt_nounix,
+	Opt_noposixpaths, Opt_nounix, Opt_unix,
 	Opt_nocase,
 	Opt_brl, Opt_nobrl,
+	Opt_handlecache, Opt_nohandlecache,
 	Opt_forcemandatorylock, Opt_setuidfromacl, Opt_setuids,
 	Opt_nosetuids, Opt_dynperm, Opt_nodynperm,
 	Opt_nohard, Opt_nosoft,
@@ -144,10 +146,16 @@
 	{ Opt_noposixpaths, "noposixpaths" },
 	{ Opt_nounix, "nounix" },
 	{ Opt_nounix, "nolinux" },
+	{ Opt_nounix, "noposix" },
+	{ Opt_unix, "unix" },
+	{ Opt_unix, "linux" },
+	{ Opt_unix, "posix" },
 	{ Opt_nocase, "nocase" },
 	{ Opt_nocase, "ignorecase" },
 	{ Opt_brl, "brl" },
 	{ Opt_nobrl, "nobrl" },
+	{ Opt_handlecache, "handlecache" },
+	{ Opt_nohandlecache, "nohandlecache" },
 	{ Opt_nobrl, "nolock" },
 	{ Opt_forcemandatorylock, "forcemandatorylock" },
 	{ Opt_forcemandatorylock, "forcemand" },
@@ -591,10 +599,11 @@
 
 int
 cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
-		      unsigned int to_read)
+	unsigned int page_offset, unsigned int to_read)
 {
 	struct msghdr smb_msg;
-	struct bio_vec bv = {.bv_page = page, .bv_len = to_read};
+	struct bio_vec bv = {
+		.bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
 	iov_iter_bvec(&smb_msg.msg_iter, READ | ITER_BVEC, &bv, 1, to_read);
 	return cifs_readv_from_socket(server, &smb_msg);
 }
@@ -848,6 +857,7 @@
 	int length;
 	struct TCP_Server_Info *server = p;
 	unsigned int pdu_length;
+	unsigned int next_offset;
 	char *buf = NULL;
 	struct task_struct *task_to_wake = NULL;
 	struct mid_q_entry *mid_entry;
@@ -874,24 +884,29 @@
 		length = cifs_read_from_socket(server, buf, pdu_length);
 		if (length < 0)
 			continue;
-		server->total_read = length;
+
+		if (server->vals->header_preamble_size == 0)
+			server->total_read = 0;
+		else
+			server->total_read = length;
 
 		/*
 		 * The right amount was read from socket - 4 bytes,
 		 * so we can now interpret the length field.
 		 */
 		pdu_length = get_rfc1002_length(buf);
-		server->pdu_size = pdu_length;
 
 		cifs_dbg(FYI, "RFC1002 header 0x%x\n", pdu_length);
 		if (!is_smb_response(server, buf[0]))
 			continue;
+next_pdu:
+		server->pdu_size = pdu_length;
 
 		/* make sure we have enough to get to the MID */
-		if (pdu_length < HEADER_SIZE(server) - 1 -
+		if (server->pdu_size < HEADER_SIZE(server) - 1 -
 		    server->vals->header_preamble_size) {
 			cifs_dbg(VFS, "SMB response too short (%u bytes)\n",
-				 pdu_length);
+				 server->pdu_size);
 			cifs_reconnect(server);
 			wake_up(&server->response_q);
 			continue;
@@ -906,6 +921,12 @@
 			continue;
 		server->total_read += length;
 
+		if (server->ops->next_header) {
+			next_offset = server->ops->next_header(buf);
+			if (next_offset)
+				server->pdu_size = next_offset;
+		}
+
 		if (server->ops->is_transform_hdr &&
 		    server->ops->receive_transform &&
 		    server->ops->is_transform_hdr(buf)) {
@@ -948,10 +969,18 @@
 				      HEADER_SIZE(server));
 #ifdef CONFIG_CIFS_DEBUG2
 			if (server->ops->dump_detail)
-				server->ops->dump_detail(buf);
+				server->ops->dump_detail(buf, server);
 			cifs_dump_mids(server);
 #endif /* CIFS_DEBUG2 */
-
+		}
+		if (pdu_length > server->pdu_size) {
+			if (!allocate_buffers(server))
+				continue;
+			pdu_length -= server->pdu_size;
+			server->total_read = 0;
+			server->large_buf = false;
+			buf = server->smallbuf;
+			goto next_pdu;
 		}
 	} /* end while !EXITING */
 
@@ -1143,10 +1172,18 @@
 
 	switch (match_token(value, cifs_smb_version_tokens, args)) {
 	case Smb_1:
+		if (disable_legacy_dialects) {
+			cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+			return 1;
+		}
 		vol->ops = &smb1_operations;
 		vol->vals = &smb1_values;
 		break;
 	case Smb_20:
+		if (disable_legacy_dialects) {
+			cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+			return 1;
+		}
 		vol->ops = &smb20_operations;
 		vol->vals = &smb20_values;
 		break;
@@ -1426,8 +1463,17 @@
 			vol->posix_paths = 0;
 			break;
 		case Opt_nounix:
+			if (vol->linux_ext)
+				cifs_dbg(VFS,
+					"conflicting unix mount options\n");
 			vol->no_linux_ext = 1;
 			break;
+		case Opt_unix:
+			if (vol->no_linux_ext)
+				cifs_dbg(VFS,
+					"conflicting unix mount options\n");
+			vol->linux_ext = 1;
+			break;
 		case Opt_nocase:
 			vol->nocase = 1;
 			break;
@@ -1445,6 +1491,12 @@
 				(S_IALLUGO & ~(S_ISUID | S_IXGRP)))
 				vol->file_mode = S_IALLUGO;
 			break;
+		case Opt_nohandlecache:
+			vol->nohandlecache = 1;
+			break;
+		case Opt_handlecache:
+			vol->nohandlecache = 0;
+			break;
 		case Opt_forcemandatorylock:
 			vol->mand_lock = 1;
 			break;
@@ -2967,6 +3019,13 @@
 		}
 	}
 
+#ifdef CONFIG_CIFS_SMB311
+	if ((volume_info->linux_ext) && (ses->server->posix_ext_supported)) {
+		if (ses->server->vals->protocol_id == SMB311_PROT_ID)
+			tcon->posix_extensions = true;
+	}
+#endif /* 311 */
+
 	/*
 	 * BB Do we need to wrap session_mutex around this TCon call and Unix
 	 * SetFS as we do on SessSetup and reconnect?
@@ -3022,6 +3081,7 @@
 	 */
 	tcon->retry = volume_info->retry;
 	tcon->nocase = volume_info->nocase;
+	tcon->nohandlecache = volume_info->nohandlecache;
 	tcon->local_lease = volume_info->local_lease;
 	INIT_LIST_HEAD(&tcon->pending_opens);
 
@@ -3580,6 +3640,8 @@
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
 	if (pvolume_info->nobrl)
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
+	if (pvolume_info->nohandlecache)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_HANDLE_CACHE;
 	if (pvolume_info->nostrictsync)
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC;
 	if (pvolume_info->mand_lock)
@@ -3922,6 +3984,12 @@
 		goto remote_path_check;
 	}
 
+#ifdef CONFIG_CIFS_SMB311
+	/* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
+	if (tcon->posix_extensions)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
+#endif /* SMB3.11 */
+
 	/* tell server which Unix caps we support */
 	if (cap_unix(tcon->ses)) {
 		/* reset of caps checks mount to see if unix extensions
@@ -4353,6 +4421,7 @@
 	vol_info->UNC = master_tcon->treeName;
 	vol_info->retry = master_tcon->retry;
 	vol_info->nocase = master_tcon->nocase;
+	vol_info->nohandlecache = master_tcon->nohandlecache;
 	vol_info->local_lease = master_tcon->local_lease;
 	vol_info->no_linux_ext = !master_tcon->unix_ext;
 	vol_info->sectype = master_tcon->ses->sectype;
@@ -4382,8 +4451,14 @@
 		goto out;
 	}
 
+#ifdef CONFIG_CIFS_SMB311
+	/* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
+	if (tcon->posix_extensions)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
+#endif /* SMB3.11 */
 	if (cap_unix(ses))
 		reset_cifs_unix_caps(0, tcon, NULL, vol_info);
+
 out:
 	kfree(vol_info->username);
 	kzfree(vol_info->password);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 9258443..ddae52b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -369,7 +369,7 @@
 	oparms.path = full_path;
 	oparms.fid = fid;
 	oparms.reconnect = false;
-
+	oparms.mode = mode;
 	rc = server->ops->open(xid, &oparms, oplock, buf);
 	if (rc) {
 		cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc);
@@ -780,21 +780,25 @@
 	tlink = cifs_sb_tlink(cifs_sb);
 	if (IS_ERR(tlink)) {
 		free_xid(xid);
-		return (struct dentry *)tlink;
+		return ERR_CAST(tlink);
 	}
 	pTcon = tlink_tcon(tlink);
 
 	rc = check_name(direntry, pTcon);
-	if (rc)
-		goto lookup_out;
+	if (unlikely(rc)) {
+		cifs_put_tlink(tlink);
+		free_xid(xid);
+		return ERR_PTR(rc);
+	}
 
 	/* can not grab the rename sem here since it would
 	deadlock in the cases (beginning of sys_rename itself)
 	in which we already have the sb rename sem */
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
-		rc = -ENOMEM;
-		goto lookup_out;
+		cifs_put_tlink(tlink);
+		free_xid(xid);
+		return ERR_PTR(-ENOMEM);
 	}
 
 	if (d_really_is_positive(direntry)) {
@@ -813,29 +817,25 @@
 				parent_dir_inode->i_sb, xid, NULL);
 	}
 
-	if ((rc == 0) && (newInode != NULL)) {
-		d_add(direntry, newInode);
+	if (rc == 0) {
 		/* since paths are not looked up by component - the parent
 		   directories are presumed to be good here */
 		renew_parental_timestamps(direntry);
-
 	} else if (rc == -ENOENT) {
-		rc = 0;
 		cifs_set_time(direntry, jiffies);
-		d_add(direntry, NULL);
-	/*	if it was once a directory (but how can we tell?) we could do
-		shrink_dcache_parent(direntry); */
-	} else if (rc != -EACCES) {
-		cifs_dbg(FYI, "Unexpected lookup error %d\n", rc);
-		/* We special case check for Access Denied - since that
-		is a common return code */
+		newInode = NULL;
+	} else {
+		if (rc != -EACCES) {
+			cifs_dbg(FYI, "Unexpected lookup error %d\n", rc);
+			/* We special case check for Access Denied - since that
+			is a common return code */
+		}
+		newInode = ERR_PTR(rc);
 	}
-
-lookup_out:
 	kfree(full_path);
 	cifs_put_tlink(tlink);
 	free_xid(xid);
-	return ERR_PTR(rc);
+	return d_splice_alias(newInode, direntry);
 }
 
 static int
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 23fd430..87eece6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2880,13 +2880,13 @@
 }
 
 static struct cifs_readdata *
-cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
+cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
 {
 	struct cifs_readdata *rdata;
 
-	rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
-			GFP_KERNEL);
+	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
 	if (rdata != NULL) {
+		rdata->pages = pages;
 		kref_init(&rdata->refcount);
 		INIT_LIST_HEAD(&rdata->list);
 		init_completion(&rdata->done);
@@ -2896,6 +2896,22 @@
 	return rdata;
 }
 
+static struct cifs_readdata *
+cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
+{
+	struct page **pages =
+		kzalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
+	struct cifs_readdata *ret = NULL;
+
+	if (pages) {
+		ret = cifs_readdata_direct_alloc(pages, complete);
+		if (!ret)
+			kfree(pages);
+	}
+
+	return ret;
+}
+
 void
 cifs_readdata_release(struct kref *refcount)
 {
@@ -2910,6 +2926,7 @@
 	if (rdata->cfile)
 		cifsFileInfo_put(rdata->cfile);
 
+	kvfree(rdata->pages);
 	kfree(rdata);
 }
 
@@ -3009,12 +3026,20 @@
 	int result = 0;
 	unsigned int i;
 	unsigned int nr_pages = rdata->nr_pages;
+	unsigned int page_offset = rdata->page_offset;
 
 	rdata->got_bytes = 0;
 	rdata->tailsz = PAGE_SIZE;
 	for (i = 0; i < nr_pages; i++) {
 		struct page *page = rdata->pages[i];
 		size_t n;
+		unsigned int segment_size = rdata->pagesz;
+
+		if (i == 0)
+			segment_size -= page_offset;
+		else
+			page_offset = 0;
+
 
 		if (len <= 0) {
 			/* no need to hold page hostage */
@@ -3023,24 +3048,25 @@
 			put_page(page);
 			continue;
 		}
+
 		n = len;
-		if (len >= PAGE_SIZE) {
+		if (len >= segment_size)
 			/* enough data to fill the page */
-			n = PAGE_SIZE;
-			len -= n;
-		} else {
-			zero_user(page, len, PAGE_SIZE - len);
+			n = segment_size;
+		else
 			rdata->tailsz = len;
-			len = 0;
-		}
+		len -= n;
+
 		if (iter)
-			result = copy_page_from_iter(page, 0, n, iter);
+			result = copy_page_from_iter(
+					page, page_offset, n, iter);
 #ifdef CONFIG_CIFS_SMB_DIRECT
 		else if (rdata->mr)
 			result = n;
 #endif
 		else
-			result = cifs_read_page_from_socket(server, page, n);
+			result = cifs_read_page_from_socket(
+					server, page, page_offset, n);
 		if (result < 0)
 			break;
 
@@ -3113,6 +3139,7 @@
 		rdata->bytes = cur_len;
 		rdata->pid = pid;
 		rdata->pagesz = PAGE_SIZE;
+		rdata->tailsz = PAGE_SIZE;
 		rdata->read_into_pages = cifs_uncached_read_into_pages;
 		rdata->copy_into_pages = cifs_uncached_copy_into_pages;
 		rdata->credits = credits;
@@ -3557,6 +3584,7 @@
 	u64 eof;
 	pgoff_t eof_index;
 	unsigned int nr_pages = rdata->nr_pages;
+	unsigned int page_offset = rdata->page_offset;
 
 	/* determine the eof that the server (probably) has */
 	eof = CIFS_I(rdata->mapping->host)->server_eof;
@@ -3567,13 +3595,21 @@
 	rdata->tailsz = PAGE_SIZE;
 	for (i = 0; i < nr_pages; i++) {
 		struct page *page = rdata->pages[i];
-		size_t n = PAGE_SIZE;
+		unsigned int to_read = rdata->pagesz;
+		size_t n;
 
-		if (len >= PAGE_SIZE) {
-			len -= PAGE_SIZE;
+		if (i == 0)
+			to_read -= page_offset;
+		else
+			page_offset = 0;
+
+		n = to_read;
+
+		if (len >= to_read) {
+			len -= to_read;
 		} else if (len > 0) {
 			/* enough for partial page, fill and zero the rest */
-			zero_user(page, len, PAGE_SIZE - len);
+			zero_user(page, len + page_offset, to_read - len);
 			n = rdata->tailsz = len;
 			len = 0;
 		} else if (page->index > eof_index) {
@@ -3605,13 +3641,15 @@
 		}
 
 		if (iter)
-			result = copy_page_from_iter(page, 0, n, iter);
+			result = copy_page_from_iter(
+					page, page_offset, n, iter);
 #ifdef CONFIG_CIFS_SMB_DIRECT
 		else if (rdata->mr)
 			result = n;
 #endif
 		else
-			result = cifs_read_page_from_socket(server, page, n);
+			result = cifs_read_page_from_socket(
+					server, page, page_offset, n);
 		if (result < 0)
 			break;
 
@@ -3790,6 +3828,7 @@
 		rdata->bytes = bytes;
 		rdata->pid = pid;
 		rdata->pagesz = PAGE_SIZE;
+		rdata->tailsz = PAGE_SIZE;
 		rdata->read_into_pages = cifs_readpages_read_into_pages;
 		rdata->copy_into_pages = cifs_readpages_copy_into_pages;
 		rdata->credits = credits;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3c371f7..745fd7f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -746,7 +746,8 @@
 	cifs_dbg(FYI, "Getting info on %s\n", full_path);
 
 	if ((data == NULL) && (*inode != NULL)) {
-		if (CIFS_CACHE_READ(CIFS_I(*inode))) {
+		if (CIFS_CACHE_READ(CIFS_I(*inode)) &&
+		    CIFS_I(*inode)->time != 0) {
 			cifs_dbg(FYI, "No need to revalidate cached inode sizes\n");
 			goto cgii_exit;
 		}
@@ -1857,15 +1858,15 @@
 	struct cifsInodeInfo *cifs_i = CIFS_I(inode);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 
+	if (cifs_i->time == 0)
+		return true;
+
 	if (CIFS_CACHE_READ(cifs_i))
 		return false;
 
 	if (!lookupCacheEnabled)
 		return true;
 
-	if (cifs_i->time == 0)
-		return true;
-
 	if (!cifs_sb->actimeo)
 		return true;
 
@@ -2104,10 +2105,14 @@
 
 static void cifs_setsize(struct inode *inode, loff_t offset)
 {
+	struct cifsInodeInfo *cifs_i = CIFS_I(inode);
+
 	spin_lock(&inode->i_lock);
 	i_size_write(inode, offset);
 	spin_unlock(&inode->i_lock);
 
+	/* Cached inode must be refreshed on truncate */
+	cifs_i->time = 0;
 	truncate_pagecache(inode, offset);
 }
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 460084a..aba3fc3 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -117,6 +117,8 @@
 		INIT_LIST_HEAD(&ret_buf->openFileList);
 		INIT_LIST_HEAD(&ret_buf->tcon_list);
 		spin_lock_init(&ret_buf->open_file_lock);
+		mutex_init(&ret_buf->prfid_mutex);
+		ret_buf->prfid = kzalloc(sizeof(struct cifs_fid), GFP_KERNEL);
 #ifdef CONFIG_CIFS_STATS
 		spin_lock_init(&ret_buf->stat_lock);
 #endif
@@ -134,6 +136,7 @@
 	atomic_dec(&tconInfoAllocCount);
 	kfree(buf_to_free->nativeFileSystem);
 	kzfree(buf_to_free->password);
+	kfree(buf_to_free->prfid);
 	kfree(buf_to_free);
 }
 
@@ -145,7 +148,7 @@
 	 * SMB2 header is bigger than CIFS one - no problems to clean some
 	 * more bytes for CIFS.
 	 */
-	size_t buf_size = sizeof(struct smb2_hdr);
+	size_t buf_size = sizeof(struct smb2_sync_hdr);
 
 	/*
 	 * We could use negotiated size instead of max_msgsize -
@@ -339,7 +342,7 @@
 	/* otherwise, there is enough to get to the BCC */
 	if (check_smb_hdr(smb))
 		return -EIO;
-	clc_len = smbCalcSize(smb);
+	clc_len = smbCalcSize(smb, server);
 
 	if (4 + rfclen != total_read) {
 		cifs_dbg(VFS, "Length read does not match RFC1001 length %d\n",
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index cc88f4f..d7ad0df 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -903,7 +903,7 @@
  * portion, the number of word parameters and the data portion of the message
  */
 unsigned int
-smbCalcSize(void *buf)
+smbCalcSize(void *buf, struct TCP_Server_Info *server)
 {
 	struct smb_hdr *ptr = (struct smb_hdr *)buf;
 	return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index a27fc87..eeab81c 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -650,7 +650,8 @@
 		char *cur_ent;
 		char *end_of_smb = cfile->srch_inf.ntwrk_buf_start +
 			server->ops->calc_smb_size(
-					cfile->srch_inf.ntwrk_buf_start);
+					cfile->srch_inf.ntwrk_buf_start,
+					server);
 
 		cur_ent = cfile->srch_inf.srch_entries_start;
 		first_entry_in_buffer = cfile->srch_inf.index_of_last_entry
@@ -831,7 +832,8 @@
 	cifs_dbg(FYI, "loop through %d times filling dir for net buf %p\n",
 		 num_to_fill, cifsFile->srch_inf.ntwrk_buf_start);
 	max_len = tcon->ses->server->ops->calc_smb_size(
-			cifsFile->srch_inf.ntwrk_buf_start);
+			cifsFile->srch_inf.ntwrk_buf_start,
+			tcon->ses->server);
 	end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
 
 	tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 401a5d8..0ffa180 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -61,9 +61,4 @@
 /* Maximum buffer size value we can send with 1 credit */
 #define SMB2_MAX_BUFFER_SIZE 65536
 
-static inline struct smb2_sync_hdr *get_sync_hdr(void *buf)
-{
-	return &(((struct smb2_hdr *)buf)->sync_hdr);
-}
-
 #endif	/* _SMB2_GLOB_H */
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 1238cd3..a6e786e 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -44,26 +44,38 @@
 		   __u32 create_options, void *data, int command)
 {
 	int rc, tmprc = 0;
-	__le16 *utf16_path;
+	__le16 *utf16_path = NULL;
 	__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
 	struct cifs_open_parms oparms;
 	struct cifs_fid fid;
+	bool use_cached_root_handle = false;
 
-	utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
-	if (!utf16_path)
-		return -ENOMEM;
+	if ((strcmp(full_path, "") == 0) && (create_options == 0) &&
+	    (desired_access == FILE_READ_ATTRIBUTES) &&
+	    (create_disposition == FILE_OPEN) &&
+	    (tcon->nohandlecache == false)) {
+		rc = open_shroot(xid, tcon, &fid);
+		if (rc == 0)
+			use_cached_root_handle = true;
+	}
 
-	oparms.tcon = tcon;
-	oparms.desired_access = desired_access;
-	oparms.disposition = create_disposition;
-	oparms.create_options = create_options;
-	oparms.fid = &fid;
-	oparms.reconnect = false;
+	if (use_cached_root_handle == false) {
+		utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
+		if (!utf16_path)
+			return -ENOMEM;
 
-	rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
-	if (rc) {
-		kfree(utf16_path);
-		return rc;
+		oparms.tcon = tcon;
+		oparms.desired_access = desired_access;
+		oparms.disposition = create_disposition;
+		oparms.create_options = create_options;
+		oparms.fid = &fid;
+		oparms.reconnect = false;
+
+		rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
+		if (rc) {
+			kfree(utf16_path);
+			return rc;
+		}
 	}
 
 	switch (command) {
@@ -107,7 +119,8 @@
 		break;
 	}
 
-	rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+	if (use_cached_root_handle == false)
+		rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
 	if (tmprc)
 		rc = tmprc;
 	kfree(utf16_path);
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 3bfc9c9..20a2d30 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -27,6 +27,7 @@
 #include "smb2proto.h"
 #include "smb2status.h"
 #include "smb2glob.h"
+#include "trace.h"
 
 struct status_to_posix_error {
 	__le32 smb2_status;
@@ -2450,13 +2451,16 @@
 int
 map_smb2_to_linux_error(char *buf, bool log_err)
 {
-	struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
 	unsigned int i;
 	int rc = -EIO;
 	__le32 smb2err = shdr->Status;
 
-	if (smb2err == 0)
+	if (smb2err == 0) {
+		trace_smb3_cmd_done(shdr->TreeId, shdr->SessionId,
+			le16_to_cpu(shdr->Command), le64_to_cpu(shdr->MessageId));
 		return 0;
+	}
 
 	/* mask facility */
 	if (log_err && (smb2err != STATUS_MORE_PROCESSING_REQUIRED) &&
@@ -2478,5 +2482,8 @@
 	cifs_dbg(FYI, "Mapping SMB2 status code 0x%08x to POSIX err %d\n",
 		 __le32_to_cpu(smb2err), rc);
 
+	trace_smb3_cmd_err(shdr->TreeId, shdr->SessionId,
+			le16_to_cpu(shdr->Command),
+			le64_to_cpu(shdr->MessageId), le32_to_cpu(smb2err), rc);
 	return rc;
 }
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 68ea849..cb5728e 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -94,8 +94,8 @@
 };
 
 #ifdef CONFIG_CIFS_SMB311
-static __u32 get_neg_ctxt_len(struct smb2_hdr *hdr, __u32 len, __u32 non_ctxlen,
-				size_t hdr_preamble_size)
+static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
+			      __u32 non_ctxlen)
 {
 	__u16 neg_count;
 	__u32 nc_offset, size_of_pad_before_neg_ctxts;
@@ -109,12 +109,11 @@
 
 	/* Make sure that negotiate contexts start after gss security blob */
 	nc_offset = le32_to_cpu(pneg_rsp->NegotiateContextOffset);
-	if (nc_offset < non_ctxlen - hdr_preamble_size /* RFC1001 len */) {
+	if (nc_offset < non_ctxlen) {
 		printk_once(KERN_WARNING "invalid negotiate context offset\n");
 		return 0;
 	}
-	size_of_pad_before_neg_ctxts = nc_offset -
-					(non_ctxlen - hdr_preamble_size);
+	size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen;
 
 	/* Verify that at least minimal negotiate contexts fit within frame */
 	if (len < nc_offset + (neg_count * sizeof(struct smb2_neg_context))) {
@@ -131,25 +130,20 @@
 #endif /* CIFS_SMB311 */
 
 int
-smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
+smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
 {
-	struct smb2_pdu *pdu = (struct smb2_pdu *)buf;
-	struct smb2_hdr *hdr = &pdu->hdr;
-	struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
+	struct smb2_sync_pdu *pdu = (struct smb2_sync_pdu *)shdr;
 	__u64 mid;
-	__u32 len = get_rfc1002_length(buf);
 	__u32 clc_len;  /* calculated length */
 	int command;
-
-	/* BB disable following printk later */
-	cifs_dbg(FYI, "%s length: 0x%x, smb_buf_length: 0x%x\n",
-		 __func__, length, len);
+	int pdu_size = sizeof(struct smb2_sync_pdu);
+	int hdr_size = sizeof(struct smb2_sync_hdr);
 
 	/*
 	 * Add function to do table lookup of StructureSize by command
 	 * ie Validate the wct via smb2_struct_sizes table above
 	 */
-
 	if (shdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
 		struct smb2_transform_hdr *thdr =
 			(struct smb2_transform_hdr *)buf;
@@ -173,8 +167,8 @@
 	}
 
 	mid = le64_to_cpu(shdr->MessageId);
-	if (length < sizeof(struct smb2_pdu)) {
-		if ((length >= sizeof(struct smb2_hdr))
+	if (len < pdu_size) {
+		if ((len >= hdr_size)
 		    && (shdr->Status != 0)) {
 			pdu->StructureSize2 = 0;
 			/*
@@ -187,8 +181,7 @@
 		}
 		return 1;
 	}
-	if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE -
-	    srvr->vals->header_preamble_size) {
+	if (len > CIFSMaxBufSize + MAX_SMB2_HDR_SIZE) {
 		cifs_dbg(VFS, "SMB length greater than maximum, mid=%llu\n",
 			 mid);
 		return 1;
@@ -227,44 +220,38 @@
 		}
 	}
 
-	if (srvr->vals->header_preamble_size + len != length) {
-		cifs_dbg(VFS, "Total length %u RFC1002 length %zu mismatch mid %llu\n",
-			 length, srvr->vals->header_preamble_size + len, mid);
-		return 1;
-	}
-
-	clc_len = smb2_calc_size(hdr);
+	clc_len = smb2_calc_size(buf, srvr);
 
 #ifdef CONFIG_CIFS_SMB311
 	if (shdr->Command == SMB2_NEGOTIATE)
-		clc_len += get_neg_ctxt_len(hdr, len, clc_len,
-					srvr->vals->header_preamble_size);
+		clc_len += get_neg_ctxt_len(shdr, len, clc_len);
 #endif /* SMB311 */
-	if (srvr->vals->header_preamble_size + len != clc_len) {
-		cifs_dbg(FYI, "Calculated size %u length %zu mismatch mid %llu\n",
-			 clc_len, srvr->vals->header_preamble_size + len, mid);
+	if (len != clc_len) {
+		cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n",
+			 clc_len, len, mid);
 		/* create failed on symlink */
 		if (command == SMB2_CREATE_HE &&
 		    shdr->Status == STATUS_STOPPED_ON_SYMLINK)
 			return 0;
 		/* Windows 7 server returns 24 bytes more */
-		if (clc_len + 24 - srvr->vals->header_preamble_size == len && command == SMB2_OPLOCK_BREAK_HE)
+		if (clc_len + 24 == len && command == SMB2_OPLOCK_BREAK_HE)
 			return 0;
 		/* server can return one byte more due to implied bcc[0] */
-		if (clc_len == srvr->vals->header_preamble_size + len + 1)
+		if (clc_len == len + 1)
 			return 0;
 
 		/*
 		 * MacOS server pads after SMB2.1 write response with 3 bytes
 		 * of junk. Other servers match RFC1001 len to actual
 		 * SMB2/SMB3 frame length (header + smb2 response specific data)
+		 * Some windows servers do too when compounding is used.
 		 * Log the server error (once), but allow it and continue
 		 * since the frame is parseable.
 		 */
-		if (clc_len < srvr->vals->header_preamble_size /* RFC1001 header size */ + len) {
+		if (clc_len < len) {
 			printk_once(KERN_WARNING
-				"SMB2 server sent bad RFC1001 len %d not %zu\n",
-				len, clc_len - srvr->vals->header_preamble_size);
+				"SMB2 server sent bad RFC1001 len %d not %d\n",
+				len, clc_len);
 			return 0;
 		}
 
@@ -305,15 +292,14 @@
  * area and the offset to it (from the beginning of the smb are also returned.
  */
 char *
-smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
+smb2_get_data_area_len(int *off, int *len, struct smb2_sync_hdr *shdr)
 {
-	struct smb2_sync_hdr *shdr = get_sync_hdr(hdr);
 	*off = 0;
 	*len = 0;
 
 	/* error responses do not have data area */
 	if (shdr->Status && shdr->Status != STATUS_MORE_PROCESSING_REQUIRED &&
-	    (((struct smb2_err_rsp *)hdr)->StructureSize) ==
+	    (((struct smb2_err_rsp *)shdr)->StructureSize) ==
 						SMB2_ERROR_STRUCTURE_SIZE2)
 		return NULL;
 
@@ -325,42 +311,44 @@
 	switch (shdr->Command) {
 	case SMB2_NEGOTIATE:
 		*off = le16_to_cpu(
-		    ((struct smb2_negotiate_rsp *)hdr)->SecurityBufferOffset);
+		  ((struct smb2_negotiate_rsp *)shdr)->SecurityBufferOffset);
 		*len = le16_to_cpu(
-		    ((struct smb2_negotiate_rsp *)hdr)->SecurityBufferLength);
+		  ((struct smb2_negotiate_rsp *)shdr)->SecurityBufferLength);
 		break;
 	case SMB2_SESSION_SETUP:
 		*off = le16_to_cpu(
-		    ((struct smb2_sess_setup_rsp *)hdr)->SecurityBufferOffset);
+		  ((struct smb2_sess_setup_rsp *)shdr)->SecurityBufferOffset);
 		*len = le16_to_cpu(
-		    ((struct smb2_sess_setup_rsp *)hdr)->SecurityBufferLength);
+		  ((struct smb2_sess_setup_rsp *)shdr)->SecurityBufferLength);
 		break;
 	case SMB2_CREATE:
 		*off = le32_to_cpu(
-		    ((struct smb2_create_rsp *)hdr)->CreateContextsOffset);
+		    ((struct smb2_create_rsp *)shdr)->CreateContextsOffset);
 		*len = le32_to_cpu(
-		    ((struct smb2_create_rsp *)hdr)->CreateContextsLength);
+		    ((struct smb2_create_rsp *)shdr)->CreateContextsLength);
 		break;
 	case SMB2_QUERY_INFO:
 		*off = le16_to_cpu(
-		    ((struct smb2_query_info_rsp *)hdr)->OutputBufferOffset);
+		    ((struct smb2_query_info_rsp *)shdr)->OutputBufferOffset);
 		*len = le32_to_cpu(
-		    ((struct smb2_query_info_rsp *)hdr)->OutputBufferLength);
+		    ((struct smb2_query_info_rsp *)shdr)->OutputBufferLength);
 		break;
 	case SMB2_READ:
-		*off = ((struct smb2_read_rsp *)hdr)->DataOffset;
-		*len = le32_to_cpu(((struct smb2_read_rsp *)hdr)->DataLength);
+		/* TODO: is this a bug ? */
+		*off = ((struct smb2_read_rsp *)shdr)->DataOffset;
+		*len = le32_to_cpu(((struct smb2_read_rsp *)shdr)->DataLength);
 		break;
 	case SMB2_QUERY_DIRECTORY:
 		*off = le16_to_cpu(
-		  ((struct smb2_query_directory_rsp *)hdr)->OutputBufferOffset);
+		  ((struct smb2_query_directory_rsp *)shdr)->OutputBufferOffset);
 		*len = le32_to_cpu(
-		  ((struct smb2_query_directory_rsp *)hdr)->OutputBufferLength);
+		  ((struct smb2_query_directory_rsp *)shdr)->OutputBufferLength);
 		break;
 	case SMB2_IOCTL:
 		*off = le32_to_cpu(
-		  ((struct smb2_ioctl_rsp *)hdr)->OutputOffset);
-		*len = le32_to_cpu(((struct smb2_ioctl_rsp *)hdr)->OutputCount);
+		  ((struct smb2_ioctl_rsp *)shdr)->OutputOffset);
+		*len = le32_to_cpu(
+		  ((struct smb2_ioctl_rsp *)shdr)->OutputCount);
 		break;
 	case SMB2_CHANGE_NOTIFY:
 	default:
@@ -403,15 +391,14 @@
  * portion, the number of word parameters and the data portion of the message.
  */
 unsigned int
-smb2_calc_size(void *buf)
+smb2_calc_size(void *buf, struct TCP_Server_Info *srvr)
 {
-	struct smb2_pdu *pdu = (struct smb2_pdu *)buf;
-	struct smb2_hdr *hdr = &pdu->hdr;
-	struct smb2_sync_hdr *shdr = get_sync_hdr(hdr);
+	struct smb2_sync_pdu *pdu = (struct smb2_sync_pdu *)buf;
+	struct smb2_sync_hdr *shdr = &pdu->sync_hdr;
 	int offset; /* the offset from the beginning of SMB to data area */
 	int data_length; /* the length of the variable length data area */
 	/* Structure Size has already been checked to make sure it is 64 */
-	int len = 4 + le16_to_cpu(shdr->StructureSize);
+	int len = le16_to_cpu(shdr->StructureSize);
 
 	/*
 	 * StructureSize2, ie length of fixed parameter area has already
@@ -422,7 +409,7 @@
 	if (has_smb2_data_area[le16_to_cpu(shdr->Command)] == false)
 		goto calc_size_exit;
 
-	smb2_get_data_area_len(&offset, &data_length, hdr);
+	smb2_get_data_area_len(&offset, &data_length, shdr);
 	cifs_dbg(FYI, "SMB2 data length %d offset %d\n", data_length, offset);
 
 	if (data_length > 0) {
@@ -430,15 +417,14 @@
 		 * Check to make sure that data area begins after fixed area,
 		 * Note that last byte of the fixed area is part of data area
 		 * for some commands, typically those with odd StructureSize,
-		 * so we must add one to the calculation (and 4 to account for
-		 * the size of the RFC1001 hdr.
+		 * so we must add one to the calculation.
 		 */
-		if (offset + 4 + 1 < len) {
+		if (offset + 1 < len) {
 			cifs_dbg(VFS, "data area offset %d overlaps SMB2 header %d\n",
-				 offset + 4 + 1, len);
+				 offset + 1, len);
 			data_length = 0;
 		} else {
-			len = 4 + offset + data_length;
+			len = offset + data_length;
 		}
 	}
 calc_size_exit:
@@ -465,8 +451,14 @@
 	/* Windows doesn't allow paths beginning with \ */
 	if (from[0] == '\\')
 		start_of_path = from + 1;
+#ifdef CONFIG_CIFS_SMB311
+	/* SMB311 POSIX extensions paths do not include leading slash */
+	else if (cifs_sb_master_tcon(cifs_sb)->posix_extensions)
+		start_of_path = from + 1;
+#endif /* 311 */
 	else
 		start_of_path = from;
+
 	to = cifs_strndup_to_utf16(start_of_path, PATH_MAX, &len,
 				   cifs_sb->local_nls, map_type);
 	return to;
@@ -621,7 +613,7 @@
 bool
 smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 {
-	struct smb2_oplock_break_rsp *rsp = (struct smb2_oplock_break_rsp *)buffer;
+	struct smb2_oplock_break *rsp = (struct smb2_oplock_break *)buffer;
 	struct list_head *tmp, *tmp1, *tmp2;
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon;
@@ -630,7 +622,7 @@
 
 	cifs_dbg(FYI, "Checking for oplock break\n");
 
-	if (rsp->hdr.sync_hdr.Command != SMB2_OPLOCK_BREAK)
+	if (rsp->sync_hdr.Command != SMB2_OPLOCK_BREAK)
 		return false;
 
 	if (rsp->StructureSize !=
@@ -721,7 +713,7 @@
 int
 smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
 {
-	struct smb2_sync_hdr *sync_hdr = get_sync_hdr(buffer);
+	struct smb2_sync_hdr *sync_hdr = (struct smb2_sync_hdr *)buffer;
 	struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer;
 	struct cifs_tcon *tcon;
 	struct close_cancelled_open *cancelled;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 9c6d95f..950d0ab 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -123,7 +123,7 @@
 static unsigned int
 smb2_get_credits(struct mid_q_entry *mid)
 {
-	struct smb2_sync_hdr *shdr = get_sync_hdr(mid->resp_buf);
+	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)mid->resp_buf;
 
 	return le16_to_cpu(shdr->CreditRequest);
 }
@@ -190,7 +190,7 @@
 smb2_find_mid(struct TCP_Server_Info *server, char *buf)
 {
 	struct mid_q_entry *mid;
-	struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
 	__u64 wire_mid = le64_to_cpu(shdr->MessageId);
 
 	if (shdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
@@ -212,15 +212,16 @@
 }
 
 static void
-smb2_dump_detail(void *buf)
+smb2_dump_detail(void *buf, struct TCP_Server_Info *server)
 {
 #ifdef CONFIG_CIFS_DEBUG2
-	struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
 
 	cifs_dbg(VFS, "Cmd: %d Err: 0x%x Flags: 0x%x Mid: %llu Pid: %d\n",
 		 shdr->Command, shdr->Status, shdr->Flags, shdr->MessageId,
 		 shdr->ProcessId);
-	cifs_dbg(VFS, "smb buf %p len %u\n", buf, smb2_calc_size(buf));
+	cifs_dbg(VFS, "smb buf %p len %u\n", buf,
+		 server->ops->calc_smb_size(buf, server));
 #endif
 }
 
@@ -322,6 +323,40 @@
 }
 #endif /* STATS2 */
 
+/*
+ * Open the directory at the root of a share
+ */
+int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
+{
+	struct cifs_open_parms oparams;
+	int rc;
+	__le16 srch_path = 0; /* Null - since an open of top of share */
+	u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+
+	mutex_lock(&tcon->prfid_mutex);
+	if (tcon->valid_root_fid) {
+		cifs_dbg(FYI, "found a cached root file handle\n");
+		memcpy(pfid, tcon->prfid, sizeof(struct cifs_fid));
+		mutex_unlock(&tcon->prfid_mutex);
+		return 0;
+	}
+
+	oparams.tcon = tcon;
+	oparams.create_options = 0;
+	oparams.desired_access = FILE_READ_ATTRIBUTES;
+	oparams.disposition = FILE_OPEN;
+	oparams.fid = pfid;
+	oparams.reconnect = false;
+
+	rc = SMB2_open(xid, &oparams, &srch_path, &oplock, NULL, NULL);
+	if (rc == 0) {
+		memcpy(tcon->prfid, pfid, sizeof(struct cifs_fid));
+		tcon->valid_root_fid = true;
+	}
+	mutex_unlock(&tcon->prfid_mutex);
+	return rc;
+}
+
 static void
 smb3_qfs_tcon(const unsigned int xid, struct cifs_tcon *tcon)
 {
@@ -330,6 +365,7 @@
 	u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
 	struct cifs_open_parms oparms;
 	struct cifs_fid fid;
+	bool no_cached_open = tcon->nohandlecache;
 
 	oparms.tcon = tcon;
 	oparms.desired_access = FILE_READ_ATTRIBUTES;
@@ -338,7 +374,11 @@
 	oparms.fid = &fid;
 	oparms.reconnect = false;
 
-	rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL);
+	if (no_cached_open)
+		rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL);
+	else
+		rc = open_shroot(xid, tcon, &fid);
+
 	if (rc)
 		return;
 
@@ -352,7 +392,8 @@
 			FS_DEVICE_INFORMATION);
 	SMB2_QFS_attr(xid, tcon, fid.persistent_fid, fid.volatile_fid,
 			FS_SECTOR_SIZE_INFORMATION); /* SMB3 specific */
-	SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
+	if (no_cached_open)
+		SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
 	return;
 }
 
@@ -394,6 +435,9 @@
 	struct cifs_open_parms oparms;
 	struct cifs_fid fid;
 
+	if ((*full_path == 0) && tcon->valid_root_fid)
+		return 0;
+
 	utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
 	if (!utf16_path)
 		return -ENOMEM;
@@ -704,9 +748,11 @@
 		seq_puts(m, " TRIM-support,");
 
 	seq_printf(m, "\tShare Flags: 0x%x", tcon->share_flags);
+	seq_printf(m, "\n\ttid: 0x%x", tcon->tid);
 	if (tcon->perf_sector_size)
 		seq_printf(m, "\tOptimal sector size: 0x%x",
 			   tcon->perf_sector_size);
+	seq_printf(m, "\tMaximal Access: 0x%x", tcon->maximal_access);
 }
 
 static void
@@ -1257,7 +1303,7 @@
 static bool
 smb2_is_status_pending(char *buf, struct TCP_Server_Info *server, int length)
 {
-	struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
 
 	if (shdr->Status != STATUS_PENDING)
 		return false;
@@ -1275,12 +1321,13 @@
 static bool
 smb2_is_session_expired(char *buf)
 {
-	struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
 
-	if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED)
+	if (shdr->Status != STATUS_NETWORK_SESSION_EXPIRED &&
+	    shdr->Status != STATUS_USER_SESSION_DELETED)
 		return false;
 
-	cifs_dbg(FYI, "Session expired\n");
+	cifs_dbg(FYI, "Session expired or deleted\n");
 	return true;
 }
 
@@ -1474,8 +1521,6 @@
 	unsigned int sub_offset;
 	unsigned int print_len;
 	unsigned int print_offset;
-	struct cifs_ses *ses = tcon->ses;
-	struct TCP_Server_Info *server = ses->server;
 
 	cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
 
@@ -1499,7 +1544,7 @@
 
 	err_buf = err_iov.iov_base;
 	if (le32_to_cpu(err_buf->ByteCount) < sizeof(struct smb2_symlink_err_rsp) ||
-	    err_iov.iov_len + server->vals->header_preamble_size < SMB2_SYMLINK_STRUCT_SIZE) {
+	    err_iov.iov_len < SMB2_SYMLINK_STRUCT_SIZE) {
 		kfree(utf16_path);
 		return -ENOENT;
 	}
@@ -1512,14 +1557,13 @@
 	print_len = le16_to_cpu(symlink->PrintNameLength);
 	print_offset = le16_to_cpu(symlink->PrintNameOffset);
 
-	if (err_iov.iov_len + server->vals->header_preamble_size <
-			SMB2_SYMLINK_STRUCT_SIZE + sub_offset + sub_len) {
+	if (err_iov.iov_len < SMB2_SYMLINK_STRUCT_SIZE + sub_offset + sub_len) {
 		kfree(utf16_path);
 		return -ENOENT;
 	}
 
-	if (err_iov.iov_len + server->vals->header_preamble_size <
-			SMB2_SYMLINK_STRUCT_SIZE + print_offset + print_len) {
+	if (err_iov.iov_len <
+	    SMB2_SYMLINK_STRUCT_SIZE + print_offset + print_len) {
 		kfree(utf16_path);
 		return -ENOENT;
 	}
@@ -1593,8 +1637,11 @@
 		oparms.create_options = 0;
 
 	utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
-	if (!utf16_path)
-		return ERR_PTR(-ENOMEM);
+	if (!utf16_path) {
+		rc = -ENOMEM;
+		free_xid(xid);
+		return ERR_PTR(rc);
+	}
 
 	oparms.tcon = tcon;
 	oparms.desired_access = READ_CONTROL;
@@ -1652,8 +1699,11 @@
 		access_flags = WRITE_DAC;
 
 	utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
-	if (!utf16_path)
-		return -ENOMEM;
+	if (!utf16_path) {
+		rc = -ENOMEM;
+		free_xid(xid);
+		return rc;
+	}
 
 	oparms.tcon = tcon;
 	oparms.desired_access = access_flags;
@@ -1713,15 +1763,21 @@
 
 	/* if file not oplocked can't be sure whether asking to extend size */
 	if (!CIFS_CACHE_READ(cifsi))
-		if (keep_size == false)
-			return -EOPNOTSUPP;
+		if (keep_size == false) {
+			rc = -EOPNOTSUPP;
+			free_xid(xid);
+			return rc;
+		}
 
 	/*
 	 * Must check if file sparse since fallocate -z (zero range) assumes
 	 * non-sparse allocation
 	 */
-	if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE))
-		return -EOPNOTSUPP;
+	if (!(cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE)) {
+		rc = -EOPNOTSUPP;
+		free_xid(xid);
+		return rc;
+	}
 
 	/*
 	 * need to make sure we are not asked to extend the file since the SMB3
@@ -1730,8 +1786,11 @@
 	 * which for a non sparse file would zero the newly extended range
 	 */
 	if (keep_size == false)
-		if (i_size_read(inode) < offset + len)
-			return -EOPNOTSUPP;
+		if (i_size_read(inode) < offset + len) {
+			rc = -EOPNOTSUPP;
+			free_xid(xid);
+			return rc;
+		}
 
 	cifs_dbg(FYI, "offset %lld len %lld", offset, len);
 
@@ -1764,8 +1823,11 @@
 
 	/* Need to make file sparse, if not already, before freeing range. */
 	/* Consider adding equivalent for compressed since it could also work */
-	if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse))
-		return -EOPNOTSUPP;
+	if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) {
+		rc = -EOPNOTSUPP;
+		free_xid(xid);
+		return rc;
+	}
 
 	cifs_dbg(FYI, "offset %lld len %lld", offset, len);
 
@@ -1796,8 +1858,10 @@
 
 	/* if file not oplocked can't be sure whether asking to extend size */
 	if (!CIFS_CACHE_READ(cifsi))
-		if (keep_size == false)
-			return -EOPNOTSUPP;
+		if (keep_size == false) {
+			free_xid(xid);
+			return rc;
+		}
 
 	/*
 	 * Files are non-sparse by default so falloc may be a no-op
@@ -1806,14 +1870,16 @@
 	 */
 	if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) {
 		if (keep_size == true)
-			return 0;
+			rc = 0;
 		/* check if extending file */
 		else if (i_size_read(inode) >= off + len)
 			/* not extending file and already not sparse */
-			return 0;
+			rc = 0;
 		/* BB: in future add else clause to extend file */
 		else
-			return -EOPNOTSUPP;
+			rc = -EOPNOTSUPP;
+		free_xid(xid);
+		return rc;
 	}
 
 	if ((keep_size == true) || (i_size_read(inode) >= off + len)) {
@@ -1825,8 +1891,11 @@
 		 * ie potentially making a few extra pages at the beginning
 		 * or end of the file non-sparse via set_sparse is harmless.
 		 */
-		if ((off > 8192) || (off + len + 8192 < i_size_read(inode)))
-			return -EOPNOTSUPP;
+		if ((off > 8192) || (off + len + 8192 < i_size_read(inode))) {
+			rc = -EOPNOTSUPP;
+			free_xid(xid);
+			return rc;
+		}
 
 		rc = smb2_set_sparse(xid, tcon, cfile, inode, false);
 	}
@@ -2035,7 +2104,7 @@
 }
 
 static __u8
-smb2_parse_lease_buf(void *buf, unsigned int *epoch)
+smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
 {
 	struct create_lease *lc = (struct create_lease *)buf;
 
@@ -2046,13 +2115,16 @@
 }
 
 static __u8
-smb3_parse_lease_buf(void *buf, unsigned int *epoch)
+smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
 {
 	struct create_lease_v2 *lc = (struct create_lease_v2 *)buf;
 
 	*epoch = le16_to_cpu(lc->lcontext.Epoch);
 	if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS)
 		return SMB2_OPLOCK_LEVEL_NOCHANGE;
+	if (lease_key)
+		memcpy(lease_key, &lc->lcontext.LeaseKeyLow,
+		       SMB2_LEASE_KEY_SIZE);
 	return le32_to_cpu(lc->lcontext.LeaseState);
 }
 
@@ -2070,12 +2142,11 @@
 }
 
 static void
-fill_transform_hdr(struct TCP_Server_Info *server,
-		   struct smb2_transform_hdr *tr_hdr, struct smb_rqst *old_rq)
+fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len,
+		   struct smb_rqst *old_rq)
 {
 	struct smb2_sync_hdr *shdr =
 			(struct smb2_sync_hdr *)old_rq->rq_iov[1].iov_base;
-	unsigned int orig_len = get_rfc1002_length(old_rq->rq_iov[0].iov_base);
 
 	memset(tr_hdr, 0, sizeof(struct smb2_transform_hdr));
 	tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM;
@@ -2083,8 +2154,6 @@
 	tr_hdr->Flags = cpu_to_le16(0x01);
 	get_random_bytes(&tr_hdr->Nonce, SMB3_AES128CMM_NONCE);
 	memcpy(&tr_hdr->SessionId, &shdr->SessionId, 8);
-	inc_rfc1001_len(tr_hdr, sizeof(struct smb2_transform_hdr) - server->vals->header_preamble_size);
-	inc_rfc1001_len(tr_hdr, orig_len);
 }
 
 /* We can not use the normal sg_set_buf() as we will sometimes pass a
@@ -2096,11 +2165,16 @@
 	sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
 }
 
+/* Assumes:
+ * rqst->rq_iov[0]  is rfc1002 length
+ * rqst->rq_iov[1]  is tranform header
+ * rqst->rq_iov[2+] data to be encrypted/decrypted
+ */
 static struct scatterlist *
 init_sg(struct smb_rqst *rqst, u8 *sign)
 {
-	unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages + 1;
-	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+	unsigned int sg_len = rqst->rq_nvec + rqst->rq_npages;
+	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
 	struct scatterlist *sg;
 	unsigned int i;
 	unsigned int j;
@@ -2110,10 +2184,10 @@
 		return NULL;
 
 	sg_init_table(sg, sg_len);
-	smb2_sg_set_buf(&sg[0], rqst->rq_iov[0].iov_base + 24, assoc_data_len);
-	for (i = 1; i < rqst->rq_nvec; i++)
-		smb2_sg_set_buf(&sg[i], rqst->rq_iov[i].iov_base,
-						rqst->rq_iov[i].iov_len);
+	smb2_sg_set_buf(&sg[0], rqst->rq_iov[1].iov_base + 20, assoc_data_len);
+	for (i = 1; i < rqst->rq_nvec - 1; i++)
+		smb2_sg_set_buf(&sg[i], rqst->rq_iov[i+1].iov_base,
+						rqst->rq_iov[i+1].iov_len);
 	for (j = 0; i < sg_len - 1; i++, j++) {
 		unsigned int len = (j < rqst->rq_npages - 1) ? rqst->rq_pagesz
 							: rqst->rq_tailsz;
@@ -2145,9 +2219,10 @@
 }
 /*
  * Encrypt or decrypt @rqst message. @rqst has the following format:
- * iov[0] - transform header (associate data),
- * iov[1-N] and pages - data to encrypt.
- * On success return encrypted data in iov[1-N] and pages, leave iov[0]
+ * iov[0] - rfc1002 length
+ * iov[1] - transform header (associate data),
+ * iov[2-N] and pages - data to encrypt.
+ * On success return encrypted data in iov[2-N] and pages, leave iov[0-1]
  * untouched.
  */
 static int
@@ -2155,7 +2230,7 @@
 {
 	struct smb2_transform_hdr *tr_hdr =
 			(struct smb2_transform_hdr *)rqst->rq_iov[0].iov_base;
-	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20 - server->vals->header_preamble_size;
+	unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
 	int rc = 0;
 	struct scatterlist *sg;
 	u8 sign[SMB2_SIGNATURE_SIZE] = {};
@@ -2242,6 +2317,10 @@
 	return rc;
 }
 
+/*
+ * This is called from smb_send_rqst. At this point we have the rfc1002
+ * header as the first element in the vector.
+ */
 static int
 smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
 		       struct smb_rqst *old_rq)
@@ -2250,6 +2329,7 @@
 	struct page **pages;
 	struct smb2_transform_hdr *tr_hdr;
 	unsigned int npages = old_rq->rq_npages;
+	unsigned int orig_len = get_rfc1002_length(old_rq->rq_iov[0].iov_base);
 	int i;
 	int rc = -ENOMEM;
 
@@ -2268,24 +2348,34 @@
 			goto err_free_pages;
 	}
 
-	iov = kmalloc_array(old_rq->rq_nvec, sizeof(struct kvec), GFP_KERNEL);
+	/* Make space for one extra iov to hold the transform header */
+	iov = kmalloc_array(old_rq->rq_nvec + 1, sizeof(struct kvec),
+			    GFP_KERNEL);
 	if (!iov)
 		goto err_free_pages;
 
 	/* copy all iovs from the old except the 1st one (rfc1002 length) */
-	memcpy(&iov[1], &old_rq->rq_iov[1],
+	memcpy(&iov[2], &old_rq->rq_iov[1],
 				sizeof(struct kvec) * (old_rq->rq_nvec - 1));
+	/* copy the rfc1002 iov */
+	iov[0].iov_base = old_rq->rq_iov[0].iov_base;
+	iov[0].iov_len  = old_rq->rq_iov[0].iov_len;
+
 	new_rq->rq_iov = iov;
-	new_rq->rq_nvec = old_rq->rq_nvec;
+	new_rq->rq_nvec = old_rq->rq_nvec + 1;
 
 	tr_hdr = kmalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL);
 	if (!tr_hdr)
 		goto err_free_iov;
 
-	/* fill the 1st iov with a transform header */
-	fill_transform_hdr(server, tr_hdr, old_rq);
-	new_rq->rq_iov[0].iov_base = tr_hdr;
-	new_rq->rq_iov[0].iov_len = sizeof(struct smb2_transform_hdr);
+	/* fill the 2nd iov with a transform header */
+	fill_transform_hdr(tr_hdr, orig_len, old_rq);
+	new_rq->rq_iov[1].iov_base = tr_hdr;
+	new_rq->rq_iov[1].iov_len = sizeof(struct smb2_transform_hdr);
+
+	/* Update rfc1002 header */
+	inc_rfc1001_len(new_rq->rq_iov[0].iov_base,
+			sizeof(struct smb2_transform_hdr));
 
 	/* copy pages form the old */
 	for (i = 0; i < npages; i++) {
@@ -2325,7 +2415,7 @@
 		put_page(rqst->rq_pages[i]);
 	kfree(rqst->rq_pages);
 	/* free transform header */
-	kfree(rqst->rq_iov[0].iov_base);
+	kfree(rqst->rq_iov[1].iov_base);
 	kfree(rqst->rq_iov);
 }
 
@@ -2342,18 +2432,19 @@
 		 unsigned int buf_data_size, struct page **pages,
 		 unsigned int npages, unsigned int page_data_size)
 {
-	struct kvec iov[2];
+	struct kvec iov[3];
 	struct smb_rqst rqst = {NULL};
-	struct smb2_hdr *hdr;
 	int rc;
 
-	iov[0].iov_base = buf;
-	iov[0].iov_len = sizeof(struct smb2_transform_hdr);
-	iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr);
-	iov[1].iov_len = buf_data_size;
+	iov[0].iov_base = NULL;
+	iov[0].iov_len = 0;
+	iov[1].iov_base = buf;
+	iov[1].iov_len = sizeof(struct smb2_transform_hdr);
+	iov[2].iov_base = buf + sizeof(struct smb2_transform_hdr);
+	iov[2].iov_len = buf_data_size;
 
 	rqst.rq_iov = iov;
-	rqst.rq_nvec = 2;
+	rqst.rq_nvec = 3;
 	rqst.rq_pages = pages;
 	rqst.rq_npages = npages;
 	rqst.rq_pagesz = PAGE_SIZE;
@@ -2365,10 +2456,9 @@
 	if (rc)
 		return rc;
 
-	memmove(buf + server->vals->header_preamble_size, iov[1].iov_base, buf_data_size);
-	hdr = (struct smb2_hdr *)buf;
-	hdr->smb2_buf_length = cpu_to_be32(buf_data_size + page_data_size);
-	server->total_read = buf_data_size + page_data_size + server->vals->header_preamble_size;
+	memmove(buf, iov[2].iov_base, buf_data_size);
+
+	server->total_read = buf_data_size + page_data_size;
 
 	return rc;
 }
@@ -2393,7 +2483,7 @@
 			zero_user(page, len, PAGE_SIZE - len);
 			len = 0;
 		}
-		length = cifs_read_page_from_socket(server, page, n);
+		length = cifs_read_page_from_socket(server, page, 0, n);
 		if (length < 0)
 			return length;
 		server->total_read += length;
@@ -2441,7 +2531,7 @@
 	unsigned int cur_page_idx;
 	unsigned int pad_len;
 	struct cifs_readdata *rdata = mid->callback_data;
-	struct smb2_sync_hdr *shdr = get_sync_hdr(buf);
+	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
 	struct bio_vec *bvec = NULL;
 	struct iov_iter iter;
 	struct kvec iov;
@@ -2472,7 +2562,7 @@
 		return 0;
 	}
 
-	data_offset = server->ops->read_data_offset(buf) + server->vals->header_preamble_size;
+	data_offset = server->ops->read_data_offset(buf);
 #ifdef CONFIG_CIFS_SMB_DIRECT
 	use_rdma_mr = rdata->mr;
 #endif
@@ -2568,12 +2658,11 @@
 	unsigned int npages;
 	struct page **pages;
 	unsigned int len;
-	unsigned int buflen = server->pdu_size + server->vals->header_preamble_size;
+	unsigned int buflen = server->pdu_size;
 	int rc;
 	int i = 0;
 
-	len = min_t(unsigned int, buflen, server->vals->read_rsp_size -
-		server->vals->header_preamble_size +
+	len = min_t(unsigned int, buflen, server->vals->read_rsp_size +
 		sizeof(struct smb2_transform_hdr)) - HEADER_SIZE(server) + 1;
 
 	rc = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1, len);
@@ -2581,8 +2670,7 @@
 		return rc;
 	server->total_read += rc;
 
-	len = le32_to_cpu(tr_hdr->OriginalMessageSize) +
-		server->vals->header_preamble_size -
+	len = le32_to_cpu(tr_hdr->OriginalMessageSize) -
 		server->vals->read_rsp_size;
 	npages = DIV_ROUND_UP(len, PAGE_SIZE);
 
@@ -2609,8 +2697,7 @@
 	if (rc)
 		goto free_pages;
 
-	rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size -
-			      server->vals->header_preamble_size,
+	rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size,
 			      pages, npages, len);
 	if (rc)
 		goto free_pages;
@@ -2647,7 +2734,7 @@
 	struct mid_q_entry *mid_entry;
 
 	/* switch to large buffer if too big for a small one */
-	if (pdu_length + server->vals->header_preamble_size > MAX_CIFS_SMALL_BUFFER_SIZE) {
+	if (pdu_length > MAX_CIFS_SMALL_BUFFER_SIZE) {
 		server->large_buf = true;
 		memcpy(server->bigbuf, buf, server->total_read);
 		buf = server->bigbuf;
@@ -2655,13 +2742,12 @@
 
 	/* now read the rest */
 	length = cifs_read_from_socket(server, buf + HEADER_SIZE(server) - 1,
-				pdu_length - HEADER_SIZE(server) + 1 +
-				server->vals->header_preamble_size);
+				pdu_length - HEADER_SIZE(server) + 1);
 	if (length < 0)
 		return length;
 	server->total_read += length;
 
-	buf_size = pdu_length + server->vals->header_preamble_size - sizeof(struct smb2_transform_hdr);
+	buf_size = pdu_length - sizeof(struct smb2_transform_hdr);
 	length = decrypt_raw_data(server, buf, buf_size, NULL, 0, 0);
 	if (length)
 		return length;
@@ -2690,7 +2776,7 @@
 	struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
 	unsigned int orig_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
 
-	if (pdu_length + server->vals->header_preamble_size < sizeof(struct smb2_transform_hdr) +
+	if (pdu_length < sizeof(struct smb2_transform_hdr) +
 						sizeof(struct smb2_sync_hdr)) {
 		cifs_dbg(VFS, "Transform message is too small (%u)\n",
 			 pdu_length);
@@ -2699,14 +2785,14 @@
 		return -ECONNABORTED;
 	}
 
-	if (pdu_length + server->vals->header_preamble_size < orig_len + sizeof(struct smb2_transform_hdr)) {
+	if (pdu_length < orig_len + sizeof(struct smb2_transform_hdr)) {
 		cifs_dbg(VFS, "Transform message is broken\n");
 		cifs_reconnect(server);
 		wake_up(&server->response_q);
 		return -ECONNABORTED;
 	}
 
-	if (pdu_length + server->vals->header_preamble_size > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
+	if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server))
 		return receive_encrypted_read(server, mid);
 
 	return receive_encrypted_standard(server, mid);
@@ -2717,11 +2803,23 @@
 {
 	char *buf = server->large_buf ? server->bigbuf : server->smallbuf;
 
-	return handle_read_data(server, mid, buf, server->pdu_size +
-				server->vals->header_preamble_size,
+	return handle_read_data(server, mid, buf, server->pdu_size,
 				NULL, 0, 0);
 }
 
+static int
+smb2_next_header(char *buf)
+{
+	struct smb2_sync_hdr *hdr = (struct smb2_sync_hdr *)buf;
+	struct smb2_transform_hdr *t_hdr = (struct smb2_transform_hdr *)buf;
+
+	if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM)
+		return sizeof(struct smb2_transform_hdr) +
+		  le32_to_cpu(t_hdr->OriginalMessageSize);
+
+	return le32_to_cpu(hdr->NextCommand);
+}
+
 struct smb_version_operations smb20_operations = {
 	.compare_fids = smb2_compare_fids,
 	.setup_request = smb2_setup_request,
@@ -2813,6 +2911,7 @@
 	.get_acl_by_fid = get_smb2_acl_by_fid,
 	.set_acl = set_smb2_acl,
 #endif /* CIFS_ACL */
+	.next_header = smb2_next_header,
 };
 
 struct smb_version_operations smb21_operations = {
@@ -2907,6 +3006,7 @@
 	.get_acl_by_fid = get_smb2_acl_by_fid,
 	.set_acl = set_smb2_acl,
 #endif /* CIFS_ACL */
+	.next_header = smb2_next_header,
 };
 
 struct smb_version_operations smb30_operations = {
@@ -3011,6 +3111,7 @@
 	.get_acl_by_fid = get_smb2_acl_by_fid,
 	.set_acl = set_smb2_acl,
 #endif /* CIFS_ACL */
+	.next_header = smb2_next_header,
 };
 
 #ifdef CONFIG_CIFS_SMB311
@@ -3111,6 +3212,7 @@
 	.query_all_EAs = smb2_query_eas,
 	.set_EA = smb2_set_ea,
 #endif /* CIFS_XATTR */
+	.next_header = smb2_next_header,
 };
 #endif /* CIFS_SMB311 */
 
@@ -3122,8 +3224,8 @@
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
-	.header_size = sizeof(struct smb2_hdr),
-	.header_preamble_size = 4,
+	.header_size = sizeof(struct smb2_sync_hdr),
+	.header_preamble_size = 0,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
@@ -3143,8 +3245,8 @@
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
-	.header_size = sizeof(struct smb2_hdr),
-	.header_preamble_size = 4,
+	.header_size = sizeof(struct smb2_sync_hdr),
+	.header_preamble_size = 0,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
@@ -3164,8 +3266,8 @@
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
-	.header_size = sizeof(struct smb2_hdr),
-	.header_preamble_size = 4,
+	.header_size = sizeof(struct smb2_sync_hdr),
+	.header_preamble_size = 0,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
@@ -3185,8 +3287,8 @@
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
-	.header_size = sizeof(struct smb2_hdr),
-	.header_preamble_size = 4,
+	.header_size = sizeof(struct smb2_sync_hdr),
+	.header_preamble_size = 0,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
@@ -3206,8 +3308,8 @@
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
-	.header_size = sizeof(struct smb2_hdr),
-	.header_preamble_size = 4,
+	.header_size = sizeof(struct smb2_sync_hdr),
+	.header_preamble_size = 0,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
@@ -3227,8 +3329,8 @@
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
-	.header_size = sizeof(struct smb2_hdr),
-	.header_preamble_size = 4,
+	.header_size = sizeof(struct smb2_sync_hdr),
+	.header_preamble_size = 0,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
@@ -3249,8 +3351,8 @@
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
 	.unlock_lock_type = SMB2_LOCKFLAG_UNLOCK,
-	.header_size = sizeof(struct smb2_hdr),
-	.header_preamble_size = 4,
+	.header_size = sizeof(struct smb2_sync_hdr),
+	.header_preamble_size = 0,
 	.max_header_size = MAX_SMB2_HDR_SIZE,
 	.read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
 	.lock_cmd = SMB2_LOCK,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 0f48741..281fbc1 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -49,6 +49,7 @@
 #include "cifspdu.h"
 #include "cifs_spnego.h"
 #include "smbdirect.h"
+#include "trace.h"
 
 /*
  *  The following table defines the expected "StructureSize" of SMB2 requests
@@ -79,7 +80,7 @@
 	/* SMB2_OPLOCK_BREAK */ 24 /* BB this is 36 for LEASE_BREAK variant */
 };
 
-static int encryption_required(const struct cifs_tcon *tcon)
+static int smb3_encryption_required(const struct cifs_tcon *tcon)
 {
 	if (!tcon)
 		return 0;
@@ -145,7 +146,7 @@
 		shdr->Flags |= SMB2_FLAGS_DFS_OPERATIONS; */
 
 	if (tcon->ses && tcon->ses->server && tcon->ses->server->sign &&
-	    !encryption_required(tcon))
+	    !smb3_encryption_required(tcon))
 		shdr->Flags |= SMB2_FLAGS_SIGNED;
 out:
 	return;
@@ -367,6 +368,7 @@
 
 #define SMB2_PREAUTH_INTEGRITY_CAPABILITIES	cpu_to_le16(1)
 #define SMB2_ENCRYPTION_CAPABILITIES		cpu_to_le16(2)
+#define SMB2_POSIX_EXTENSIONS_AVAILABLE		cpu_to_le16(0x100)
 
 static void
 build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt)
@@ -390,21 +392,35 @@
 }
 
 static void
+build_posix_ctxt(struct smb2_posix_neg_context *pneg_ctxt)
+{
+	pneg_ctxt->ContextType = SMB2_POSIX_EXTENSIONS_AVAILABLE;
+	pneg_ctxt->DataLength = cpu_to_le16(POSIX_CTXT_DATA_LEN);
+}
+
+static void
 assemble_neg_contexts(struct smb2_negotiate_req *req,
 		      unsigned int *total_len)
 {
 	char *pneg_ctxt = (char *)req + OFFSET_OF_NEG_CONTEXT;
+	unsigned int ctxt_len;
 
+	*total_len += 2; /* Add 2 due to round to 8 byte boundary for 1st ctxt */
 	build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt);
-	/* Add 2 to size to round to 8 byte boundary */
+	ctxt_len = DIV_ROUND_UP(sizeof(struct smb2_preauth_neg_context), 8) * 8;
+	*total_len += ctxt_len;
+	pneg_ctxt += ctxt_len;
 
-	pneg_ctxt += 2 + sizeof(struct smb2_preauth_neg_context);
 	build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt);
-	req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
-	req->NegotiateContextCount = cpu_to_le16(2);
+	ctxt_len = DIV_ROUND_UP(sizeof(struct smb2_encryption_neg_context), 8) * 8;
+	*total_len += ctxt_len;
+	pneg_ctxt += ctxt_len;
 
-	*total_len += 4 + sizeof(struct smb2_preauth_neg_context)
-		+ sizeof(struct smb2_encryption_neg_context);
+	build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt);
+	*total_len += sizeof(struct smb2_posix_neg_context);
+
+	req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
+	req->NegotiateContextCount = cpu_to_le16(3);
 }
 
 static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt)
@@ -449,12 +465,12 @@
 }
 
 static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
-				     struct TCP_Server_Info *server)
+				     struct TCP_Server_Info *server,
+				     unsigned int len_of_smb)
 {
 	struct smb2_neg_context *pctx;
 	unsigned int offset = le32_to_cpu(rsp->NegotiateContextOffset);
 	unsigned int ctxt_cnt = le16_to_cpu(rsp->NegotiateContextCount);
-	unsigned int len_of_smb = be32_to_cpu(rsp->hdr.smb2_buf_length);
 	unsigned int len_of_ctxts, i;
 	int rc = 0;
 
@@ -475,8 +491,7 @@
 		if (len_of_ctxts < sizeof(struct smb2_neg_context))
 			break;
 
-		pctx = (struct smb2_neg_context *)(offset +
-			server->vals->header_preamble_size + (char *)rsp);
+		pctx = (struct smb2_neg_context *)(offset + (char *)rsp);
 		clen = le16_to_cpu(pctx->DataLength);
 		if (clen > len_of_ctxts)
 			break;
@@ -487,6 +502,8 @@
 		else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES)
 			rc = decode_encrypt_ctx(server,
 				(struct smb2_encryption_neg_context *)pctx);
+		else if (pctx->ContextType == SMB2_POSIX_EXTENSIONS_AVAILABLE)
+			server->posix_ext_supported = true;
 		else
 			cifs_dbg(VFS, "unknown negcontext of type %d ignored\n",
 				le16_to_cpu(pctx->ContextType));
@@ -501,6 +518,64 @@
 	return rc;
 }
 
+static struct create_posix *
+create_posix_buf(umode_t mode)
+{
+	struct create_posix *buf;
+
+	buf = kzalloc(sizeof(struct create_posix),
+			GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	buf->ccontext.DataOffset =
+		cpu_to_le16(offsetof(struct create_posix, Mode));
+	buf->ccontext.DataLength = cpu_to_le32(4);
+	buf->ccontext.NameOffset =
+		cpu_to_le16(offsetof(struct create_posix, Name));
+	buf->ccontext.NameLength = cpu_to_le16(16);
+
+	/* SMB2_CREATE_TAG_POSIX is "0x93AD25509CB411E7B42383DE968BCD7C" */
+	buf->Name[0] = 0x93;
+	buf->Name[1] = 0xAD;
+	buf->Name[2] = 0x25;
+	buf->Name[3] = 0x50;
+	buf->Name[4] = 0x9C;
+	buf->Name[5] = 0xB4;
+	buf->Name[6] = 0x11;
+	buf->Name[7] = 0xE7;
+	buf->Name[8] = 0xB4;
+	buf->Name[9] = 0x23;
+	buf->Name[10] = 0x83;
+	buf->Name[11] = 0xDE;
+	buf->Name[12] = 0x96;
+	buf->Name[13] = 0x8B;
+	buf->Name[14] = 0xCD;
+	buf->Name[15] = 0x7C;
+	buf->Mode = cpu_to_le32(mode);
+	cifs_dbg(FYI, "mode on posix create 0%o", mode);
+	return buf;
+}
+
+static int
+add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode)
+{
+	struct smb2_create_req *req = iov[0].iov_base;
+	unsigned int num = *num_iovec;
+
+	iov[num].iov_base = create_posix_buf(mode);
+	if (iov[num].iov_base == NULL)
+		return -ENOMEM;
+	iov[num].iov_len = sizeof(struct create_posix);
+	if (!req->CreateContextsOffset)
+		req->CreateContextsOffset = cpu_to_le32(
+				sizeof(struct smb2_create_req) +
+				iov[num - 1].iov_len);
+	le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_posix));
+	*num_iovec = num + 1;
+	return 0;
+}
+
 #else
 static void assemble_neg_contexts(struct smb2_negotiate_req *req,
 				  unsigned int *total_len)
@@ -691,7 +766,7 @@
 	server->capabilities |= SMB2_NT_FIND | SMB2_LARGE_FILES;
 
 	security_blob = smb2_get_data_area_len(&blob_offset, &blob_length,
-					       &rsp->hdr);
+					       (struct smb2_sync_hdr *)rsp);
 	/*
 	 * See MS-SMB2 section 2.2.4: if no blob, client picks default which
 	 * for us will be
@@ -718,7 +793,8 @@
 #ifdef CONFIG_CIFS_SMB311
 	if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
 		if (rsp->NegotiateContextCount)
-			rc = smb311_decode_neg_context(rsp, server);
+			rc = smb311_decode_neg_context(rsp, server,
+						       rsp_iov.iov_len);
 		else
 			cifs_dbg(VFS, "Missing expected negotiate contexts\n");
 	}
@@ -1054,7 +1130,7 @@
 		goto out_put_spnego_key;
 
 	rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
-	ses->Suid = rsp->hdr.sync_hdr.SessionId;
+	ses->Suid = rsp->sync_hdr.SessionId;
 
 	ses->session_flags = le16_to_cpu(rsp->SessionFlags);
 
@@ -1130,13 +1206,13 @@
 
 	/* If true, rc here is expected and not an error */
 	if (sess_data->buf0_type != CIFS_NO_BUFFER &&
-		rsp->hdr.sync_hdr.Status == STATUS_MORE_PROCESSING_REQUIRED)
+		rsp->sync_hdr.Status == STATUS_MORE_PROCESSING_REQUIRED)
 		rc = 0;
 
 	if (rc)
 		goto out;
 
-	if (offsetof(struct smb2_sess_setup_rsp, Buffer) - ses->server->vals->header_preamble_size !=
+	if (offsetof(struct smb2_sess_setup_rsp, Buffer) !=
 			le16_to_cpu(rsp->SecurityBufferOffset)) {
 		cifs_dbg(VFS, "Invalid security buffer offset %d\n",
 			le16_to_cpu(rsp->SecurityBufferOffset));
@@ -1151,7 +1227,7 @@
 	cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
 
 
-	ses->Suid = rsp->hdr.sync_hdr.SessionId;
+	ses->Suid = rsp->sync_hdr.SessionId;
 	ses->session_flags = le16_to_cpu(rsp->SessionFlags);
 
 out:
@@ -1209,7 +1285,7 @@
 
 	rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
 
-	ses->Suid = rsp->hdr.sync_hdr.SessionId;
+	ses->Suid = rsp->sync_hdr.SessionId;
 	ses->session_flags = le16_to_cpu(rsp->SessionFlags);
 
 	rc = SMB2_sess_establish_session(sess_data);
@@ -1276,6 +1352,7 @@
 	sess_data->ses = ses;
 	sess_data->buf0_type = CIFS_NO_BUFFER;
 	sess_data->nls_cp = (struct nls_table *) nls_cp;
+	sess_data->previous_session = ses->Suid;
 
 #ifdef CONFIG_CIFS_SMB311
 	/*
@@ -1403,7 +1480,7 @@
 		return rc;
 	}
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	iov[0].iov_base = (char *)req;
@@ -1419,7 +1496,7 @@
 
 	/* 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1 */
 	if ((ses->server->dialect == SMB311_PROT_ID) &&
-	    !encryption_required(tcon))
+	    !smb3_encryption_required(tcon))
 		req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED;
 
 	rc = smb2_send_recv(xid, ses, iov, 2, &resp_buftype, flags, &rsp_iov);
@@ -1457,7 +1534,7 @@
 	tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess);
 	tcon->tidStatus = CifsGood;
 	tcon->need_reconnect = false;
-	tcon->tid = rsp->hdr.sync_hdr.TreeId;
+	tcon->tid = rsp->sync_hdr.TreeId;
 	strlcpy(tcon->treeName, tree, sizeof(tcon->treeName));
 
 	if ((rsp->Capabilities & SMB2_SHARE_CAP_DFS) &&
@@ -1477,7 +1554,7 @@
 	return rc;
 
 tcon_error_exit:
-	if (rsp && rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) {
+	if (rsp && rsp->sync_hdr.Status == STATUS_BAD_NETWORK_NAME) {
 		cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
 	}
 	goto tcon_exit;
@@ -1508,7 +1585,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	flags |= CIFS_NO_RESP;
@@ -1575,7 +1652,7 @@
 
 static __u8
 parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
-		  unsigned int *epoch)
+		  unsigned int *epoch, char *lease_key)
 {
 	char *data_offset;
 	struct create_context *cc;
@@ -1583,14 +1660,15 @@
 	unsigned int remaining;
 	char *name;
 
-	data_offset = (char *)rsp + server->vals->header_preamble_size + le32_to_cpu(rsp->CreateContextsOffset);
+	data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset);
 	remaining = le32_to_cpu(rsp->CreateContextsLength);
 	cc = (struct create_context *)data_offset;
 	while (remaining >= sizeof(struct create_context)) {
 		name = le16_to_cpu(cc->NameOffset) + (char *)cc;
 		if (le16_to_cpu(cc->NameLength) == 4 &&
 		    strncmp(name, "RqLs", 4) == 0)
-			return server->ops->parse_lease_buf(cc, epoch);
+			return server->ops->parse_lease_buf(cc, epoch,
+							    lease_key);
 
 		next = le32_to_cpu(cc->Next);
 		if (!next)
@@ -1818,7 +1896,7 @@
 	struct TCP_Server_Info *server;
 	struct cifs_tcon *tcon = oparms->tcon;
 	struct cifs_ses *ses = tcon->ses;
-	struct kvec iov[4];
+	struct kvec iov[5]; /* make sure at least one for each open context */
 	struct kvec rsp_iov = {NULL, 0};
 	int resp_buftype;
 	int uni_path_len;
@@ -1827,7 +1905,7 @@
 	int rc = 0;
 	unsigned int n_iov = 2;
 	__u32 file_attributes = 0;
-	char *dhc_buf = NULL, *lc_buf = NULL;
+	char *dhc_buf = NULL, *lc_buf = NULL, *pc_buf = NULL;
 	int flags = 0;
 	unsigned int total_len;
 
@@ -1843,7 +1921,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	if (oparms->create_options & CREATE_OPTION_READONLY)
@@ -1944,6 +2022,27 @@
 		dhc_buf = iov[n_iov-1].iov_base;
 	}
 
+#ifdef CONFIG_CIFS_SMB311
+	if (tcon->posix_extensions) {
+		if (n_iov > 2) {
+			struct create_context *ccontext =
+			    (struct create_context *)iov[n_iov-1].iov_base;
+			ccontext->Next =
+				cpu_to_le32(iov[n_iov-1].iov_len);
+		}
+
+		rc = add_posix_context(iov, &n_iov, oparms->mode);
+		if (rc) {
+			cifs_small_buf_release(req);
+			kfree(copy_path);
+			kfree(lc_buf);
+			kfree(dhc_buf);
+			return rc;
+		}
+		pc_buf = iov[n_iov-1].iov_base;
+	}
+#endif /* SMB311 */
+
 	rc = smb2_send_recv(xid, ses, iov, n_iov, &resp_buftype, flags,
 			    &rsp_iov);
 	cifs_small_buf_release(req);
@@ -1956,8 +2055,13 @@
 			resp_buftype = CIFS_NO_BUFFER;
 			rsp = NULL;
 		}
+		trace_smb3_open_err(xid, tcon->tid, ses->Suid,
+				    oparms->create_options, oparms->desired_access, rc);
 		goto creat_exit;
-	}
+	} else
+		trace_smb3_open_done(xid, rsp->PersistentFileId, tcon->tid,
+				     ses->Suid, oparms->create_options,
+				     oparms->desired_access);
 
 	oparms->fid->persistent_fid = rsp->PersistentFileId;
 	oparms->fid->volatile_fid = rsp->VolatileFileId;
@@ -1972,13 +2076,15 @@
 	}
 
 	if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
-		*oplock = parse_lease_state(server, rsp, &oparms->fid->epoch);
+		*oplock = parse_lease_state(server, rsp, &oparms->fid->epoch,
+					    oparms->fid->lease_key);
 	else
 		*oplock = rsp->OplockLevel;
 creat_exit:
 	kfree(copy_path);
 	kfree(lc_buf);
 	kfree(dhc_buf);
+	kfree(pc_buf);
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
 }
@@ -1994,7 +2100,6 @@
 {
 	struct smb2_ioctl_req *req;
 	struct smb2_ioctl_rsp *rsp;
-	struct smb2_sync_hdr *shdr;
 	struct cifs_ses *ses;
 	struct kvec iov[2];
 	struct kvec rsp_iov;
@@ -2025,7 +2130,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	req->CtlCode = cpu_to_le32(opcode);
@@ -2088,6 +2193,10 @@
 	cifs_small_buf_release(req);
 	rsp = (struct smb2_ioctl_rsp *)rsp_iov.iov_base;
 
+	if (rc != 0)
+		trace_smb3_fsctl_err(xid, persistent_fid, tcon->tid,
+				ses->Suid, 0, opcode, rc);
+
 	if ((rc != 0) && (rc != -EINVAL)) {
 		cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
 		goto ioctl_exit;
@@ -2115,7 +2224,7 @@
 		goto ioctl_exit;
 	}
 
-	if (get_rfc1002_length(rsp) < le32_to_cpu(rsp->OutputOffset) + *plen) {
+	if (rsp_iov.iov_len < le32_to_cpu(rsp->OutputOffset) + *plen) {
 		cifs_dbg(VFS, "Malformed ioctl resp: len %d offset %d\n", *plen,
 			le32_to_cpu(rsp->OutputOffset));
 		*plen = 0;
@@ -2129,8 +2238,7 @@
 		goto ioctl_exit;
 	}
 
-	shdr = get_sync_hdr(rsp);
-	memcpy(*out_data, (char *)shdr + le32_to_cpu(rsp->OutputOffset), *plen);
+	memcpy(*out_data, (char *)rsp + le32_to_cpu(rsp->OutputOffset), *plen);
 ioctl_exit:
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
@@ -2162,8 +2270,8 @@
 }
 
 int
-SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
-	   u64 persistent_fid, u64 volatile_fid)
+SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
+		 u64 persistent_fid, u64 volatile_fid, int flags)
 {
 	struct smb2_close_req *req;
 	struct smb2_close_rsp *rsp;
@@ -2172,7 +2280,6 @@
 	struct kvec rsp_iov;
 	int resp_buftype;
 	int rc = 0;
-	int flags = 0;
 	unsigned int total_len;
 
 	cifs_dbg(FYI, "Close\n");
@@ -2184,7 +2291,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	req->PersistentFileId = persistent_fid;
@@ -2199,6 +2306,8 @@
 
 	if (rc != 0) {
 		cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE);
+		trace_smb3_close_err(xid, persistent_fid, tcon->tid, ses->Suid,
+				     rc);
 		goto close_exit;
 	}
 
@@ -2209,14 +2318,20 @@
 	return rc;
 }
 
+int
+SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
+	   u64 persistent_fid, u64 volatile_fid)
+{
+	return SMB2_close_flags(xid, tcon, persistent_fid, volatile_fid, 0);
+}
+
 static int
-validate_iov(struct TCP_Server_Info *server,
-	     unsigned int offset, unsigned int buffer_length,
+validate_iov(unsigned int offset, unsigned int buffer_length,
 	     struct kvec *iov, unsigned int min_buf_size)
 {
 	unsigned int smb_len = iov->iov_len;
-	char *end_of_smb = smb_len + server->vals->header_preamble_size + (char *)iov->iov_base;
-	char *begin_of_buf = server->vals->header_preamble_size + offset + (char *)iov->iov_base;
+	char *end_of_smb = smb_len + (char *)iov->iov_base;
+	char *begin_of_buf = offset + (char *)iov->iov_base;
 	char *end_of_buf = begin_of_buf + buffer_length;
 
 
@@ -2246,18 +2361,17 @@
  * Caller must free buffer.
  */
 static int
-validate_and_copy_iov(struct TCP_Server_Info *server,
-		      unsigned int offset, unsigned int buffer_length,
+validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
 		      struct kvec *iov, unsigned int minbufsize,
 		      char *data)
 {
-	char *begin_of_buf = server->vals->header_preamble_size + offset + (char *)(iov->iov_base);
+	char *begin_of_buf = offset + (char *)iov->iov_base;
 	int rc;
 
 	if (!data)
 		return -EINVAL;
 
-	rc = validate_iov(server, offset, buffer_length, iov, minbufsize);
+	rc = validate_iov(offset, buffer_length, iov, minbufsize);
 	if (rc)
 		return rc;
 
@@ -2292,7 +2406,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	req->InfoType = info_type;
@@ -2318,6 +2432,8 @@
 
 	if (rc) {
 		cifs_stats_fail_inc(tcon, SMB2_QUERY_INFO_HE);
+		trace_smb3_query_info_err(xid, persistent_fid, tcon->tid,
+				ses->Suid, info_class, (__u32)info_type, rc);
 		goto qinf_exit;
 	}
 
@@ -2335,8 +2451,7 @@
 		}
 	}
 
-	rc = validate_and_copy_iov(ses->server,
-				   le16_to_cpu(rsp->OutputBufferOffset),
+	rc = validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
 				   le32_to_cpu(rsp->OutputBufferLength),
 				   &rsp_iov, min_len, *data);
 
@@ -2407,7 +2522,7 @@
 	unsigned int credits_received = 1;
 
 	if (mid->mid_state == MID_RESPONSE_RECEIVED)
-		credits_received = le16_to_cpu(rsp->hdr.sync_hdr.CreditRequest);
+		credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest);
 
 	DeleteMidQEntry(mid);
 	add_credits(server, credits_received, CIFS_ECHO_OP);
@@ -2536,7 +2651,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	req->PersistentFileId = persistent_fid;
@@ -2548,8 +2663,11 @@
 	rc = smb2_send_recv(xid, ses, iov, 1, &resp_buftype, flags, &rsp_iov);
 	cifs_small_buf_release(req);
 
-	if (rc != 0)
+	if (rc != 0) {
 		cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE);
+		trace_smb3_flush_err(xid, persistent_fid, tcon->tid, ses->Suid,
+				     rc);
+	}
 
 	free_rsp_buf(resp_buftype, rsp_iov.iov_base);
 	return rc;
@@ -2658,11 +2776,12 @@
 	struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink);
 	struct TCP_Server_Info *server = tcon->ses->server;
 	struct smb2_sync_hdr *shdr =
-				(struct smb2_sync_hdr *)rdata->iov[1].iov_base;
+				(struct smb2_sync_hdr *)rdata->iov[0].iov_base;
 	unsigned int credits_received = 1;
 	struct smb_rqst rqst = { .rq_iov = rdata->iov,
 				 .rq_nvec = 2,
 				 .rq_pages = rdata->pages,
+				 .rq_offset = rdata->page_offset,
 				 .rq_npages = rdata->nr_pages,
 				 .rq_pagesz = rdata->pagesz,
 				 .rq_tailsz = rdata->tailsz };
@@ -2760,7 +2879,7 @@
 		return rc;
 	}
 
-	if (encryption_required(io_parms.tcon))
+	if (smb3_encryption_required(io_parms.tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	req_len = cpu_to_be32(total_len);
@@ -2791,7 +2910,13 @@
 	if (rc) {
 		kref_put(&rdata->refcount, cifs_readdata_release);
 		cifs_stats_fail_inc(io_parms.tcon, SMB2_READ_HE);
-	}
+		trace_smb3_read_err(rc, 0 /* xid */, io_parms.persistent_fid,
+				   io_parms.tcon->tid, io_parms.tcon->ses->Suid,
+				   io_parms.offset, io_parms.length);
+	} else
+		trace_smb3_read_done(0 /* xid */, io_parms.persistent_fid,
+				   io_parms.tcon->tid, io_parms.tcon->ses->Suid,
+				   io_parms.offset, io_parms.length);
 
 	cifs_small_buf_release(buf);
 	return rc;
@@ -2804,7 +2929,6 @@
 	int resp_buftype, rc = -EACCES;
 	struct smb2_read_plain_req *req = NULL;
 	struct smb2_read_rsp *rsp = NULL;
-	struct smb2_sync_hdr *shdr;
 	struct kvec iov[1];
 	struct kvec rsp_iov;
 	unsigned int total_len;
@@ -2816,7 +2940,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(io_parms->tcon))
+	if (smb3_encryption_required(io_parms->tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	iov[0].iov_base = (char *)req;
@@ -2832,9 +2956,15 @@
 			cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
 			cifs_dbg(VFS, "Send error in read = %d\n", rc);
 		}
+		trace_smb3_read_err(rc, xid, req->PersistentFileId,
+				    io_parms->tcon->tid, ses->Suid,
+				    io_parms->offset, io_parms->length);
 		free_rsp_buf(resp_buftype, rsp_iov.iov_base);
 		return rc == -ENODATA ? 0 : rc;
-	}
+	} else
+		trace_smb3_read_done(xid, req->PersistentFileId,
+				    io_parms->tcon->tid, ses->Suid,
+				    io_parms->offset, io_parms->length);
 
 	*nbytes = le32_to_cpu(rsp->DataLength);
 	if ((*nbytes > CIFS_MAX_MSGSIZE) ||
@@ -2845,10 +2975,8 @@
 		*nbytes = 0;
 	}
 
-	shdr = get_sync_hdr(rsp);
-
 	if (*buf) {
-		memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes);
+		memcpy(*buf, (char *)rsp + rsp->DataOffset, *nbytes);
 		free_rsp_buf(resp_buftype, rsp_iov.iov_base);
 	} else if (resp_buftype != CIFS_NO_BUFFER) {
 		*buf = rsp_iov.iov_base;
@@ -2875,7 +3003,7 @@
 
 	switch (mid->mid_state) {
 	case MID_RESPONSE_RECEIVED:
-		credits_received = le16_to_cpu(rsp->hdr.sync_hdr.CreditRequest);
+		credits_received = le16_to_cpu(rsp->sync_hdr.CreditRequest);
 		wdata->result = smb2_check_receive(mid, tcon->ses->server, 0);
 		if (wdata->result != 0)
 			break;
@@ -2952,7 +3080,7 @@
 		goto async_writev_out;
 	}
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	shdr = (struct smb2_sync_hdr *)req;
@@ -3013,6 +3141,7 @@
 	rqst.rq_iov = iov;
 	rqst.rq_nvec = 2;
 	rqst.rq_pages = wdata->pages;
+	rqst.rq_offset = wdata->page_offset;
 	rqst.rq_npages = wdata->nr_pages;
 	rqst.rq_pagesz = wdata->pagesz;
 	rqst.rq_tailsz = wdata->tailsz;
@@ -3050,9 +3179,15 @@
 			     wdata, flags);
 
 	if (rc) {
+		trace_smb3_write_err(0 /* no xid */, req->PersistentFileId,
+				     tcon->tid, tcon->ses->Suid, wdata->offset,
+				     wdata->bytes, rc);
 		kref_put(&wdata->refcount, release);
 		cifs_stats_fail_inc(tcon, SMB2_WRITE_HE);
-	}
+	} else
+		trace_smb3_write_done(0 /* no xid */, req->PersistentFileId,
+				     tcon->tid, tcon->ses->Suid, wdata->offset,
+				     wdata->bytes);
 
 async_writev_out:
 	cifs_small_buf_release(req);
@@ -3090,7 +3225,7 @@
 	if (io_parms->tcon->ses->server == NULL)
 		return -ECONNABORTED;
 
-	if (encryption_required(io_parms->tcon))
+	if (smb3_encryption_required(io_parms->tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	req->sync_hdr.ProcessId = cpu_to_le32(io_parms->pid);
@@ -3116,10 +3251,19 @@
 	rsp = (struct smb2_write_rsp *)rsp_iov.iov_base;
 
 	if (rc) {
+		trace_smb3_write_err(xid, req->PersistentFileId,
+				     io_parms->tcon->tid,
+				     io_parms->tcon->ses->Suid,
+				     io_parms->offset, io_parms->length, rc);
 		cifs_stats_fail_inc(io_parms->tcon, SMB2_WRITE_HE);
 		cifs_dbg(VFS, "Send error in write = %d\n", rc);
-	} else
+	} else {
 		*nbytes = le32_to_cpu(rsp->DataLength);
+		trace_smb3_write_done(xid, req->PersistentFileId,
+				     io_parms->tcon->tid,
+				     io_parms->tcon->ses->Suid,
+				     io_parms->offset, *nbytes);
+	}
 
 	free_rsp_buf(resp_buftype, rsp);
 	return rc;
@@ -3200,7 +3344,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	switch (srch_inf->info_level) {
@@ -3251,7 +3395,7 @@
 
 	if (rc) {
 		if (rc == -ENODATA &&
-		    rsp->hdr.sync_hdr.Status == STATUS_NO_MORE_FILES) {
+		    rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) {
 			srch_inf->endOfSearch = true;
 			rc = 0;
 		}
@@ -3259,8 +3403,7 @@
 		goto qdir_exit;
 	}
 
-	rc = validate_iov(server,
-			  le16_to_cpu(rsp->OutputBufferOffset),
+	rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
 			  le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
 			  info_buf_size);
 	if (rc)
@@ -3275,10 +3418,9 @@
 			cifs_buf_release(srch_inf->ntwrk_buf_start);
 	}
 	srch_inf->ntwrk_buf_start = (char *)rsp;
-	srch_inf->srch_entries_start = srch_inf->last_entry = 4 /* rfclen */ +
-		(char *)&rsp->hdr + le16_to_cpu(rsp->OutputBufferOffset);
-	/* 4 for rfc1002 length field */
-	end_of_smb = get_rfc1002_length(rsp) + 4 + (char *)&rsp->hdr;
+	srch_inf->srch_entries_start = srch_inf->last_entry =
+		(char *)rsp + le16_to_cpu(rsp->OutputBufferOffset);
+	end_of_smb = rsp_iov.iov_len + (char *)rsp;
 	srch_inf->entries_in_buffer =
 			num_entries(srch_inf->srch_entries_start, end_of_smb,
 				    &srch_inf->last_entry, info_buf_size);
@@ -3333,7 +3475,7 @@
 		return rc;
 	}
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	req->sync_hdr.ProcessId = cpu_to_le32(pid);
@@ -3366,8 +3508,11 @@
 	cifs_small_buf_release(req);
 	rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base;
 
-	if (rc != 0)
+	if (rc != 0) {
 		cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE);
+		trace_smb3_set_info_err(xid, persistent_fid, tcon->tid,
+				ses->Suid, info_class, (__u32)info_type, rc);
+	}
 
 	free_rsp_buf(resp_buftype, rsp);
 	kfree(iov);
@@ -3514,7 +3659,7 @@
 		  __u8 oplock_level)
 {
 	int rc;
-	struct smb2_oplock_break_req *req = NULL;
+	struct smb2_oplock_break *req = NULL;
 	struct cifs_ses *ses = tcon->ses;
 	int flags = CIFS_OBREAK_OP;
 	unsigned int total_len;
@@ -3528,7 +3673,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	req->VolatileFid = volatile_fid;
@@ -3593,7 +3738,7 @@
 	req->InputBufferOffset =
 			cpu_to_le16(sizeof(struct smb2_query_info_req) - 1);
 	req->OutputBufferLength = cpu_to_le32(
-		outbuf_len + sizeof(struct smb2_query_info_rsp) - 1 - server->vals->header_preamble_size);
+		outbuf_len + sizeof(struct smb2_query_info_rsp) - 1);
 
 	iov->iov_base = (char *)req;
 	iov->iov_len = total_len;
@@ -3610,7 +3755,6 @@
 	int rc = 0;
 	int resp_buftype;
 	struct cifs_ses *ses = tcon->ses;
-	struct TCP_Server_Info *server = ses->server;
 	struct smb2_fs_full_size_info *info = NULL;
 	int flags = 0;
 
@@ -3620,7 +3764,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
@@ -3631,10 +3775,9 @@
 	}
 	rsp = (struct smb2_query_info_rsp *)rsp_iov.iov_base;
 
-	info = (struct smb2_fs_full_size_info *)(server->vals->header_preamble_size +
-		le16_to_cpu(rsp->OutputBufferOffset) + (char *)&rsp->hdr);
-	rc = validate_iov(server,
-			  le16_to_cpu(rsp->OutputBufferOffset),
+	info = (struct smb2_fs_full_size_info *)(
+		le16_to_cpu(rsp->OutputBufferOffset) + (char *)rsp);
+	rc = validate_iov(le16_to_cpu(rsp->OutputBufferOffset),
 			  le32_to_cpu(rsp->OutputBufferLength), &rsp_iov,
 			  sizeof(struct smb2_fs_full_size_info));
 	if (!rc)
@@ -3655,7 +3798,6 @@
 	int rc = 0;
 	int resp_buftype, max_len, min_len;
 	struct cifs_ses *ses = tcon->ses;
-	struct TCP_Server_Info *server = ses->server;
 	unsigned int rsp_len, offset;
 	int flags = 0;
 
@@ -3678,7 +3820,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	rc = smb2_send_recv(xid, ses, &iov, 1, &resp_buftype, flags, &rsp_iov);
@@ -3691,20 +3833,20 @@
 
 	rsp_len = le32_to_cpu(rsp->OutputBufferLength);
 	offset = le16_to_cpu(rsp->OutputBufferOffset);
-	rc = validate_iov(server, offset, rsp_len, &rsp_iov, min_len);
+	rc = validate_iov(offset, rsp_len, &rsp_iov, min_len);
 	if (rc)
 		goto qfsattr_exit;
 
 	if (level == FS_ATTRIBUTE_INFORMATION)
-		memcpy(&tcon->fsAttrInfo, server->vals->header_preamble_size + offset
-			+ (char *)&rsp->hdr, min_t(unsigned int,
+		memcpy(&tcon->fsAttrInfo, offset
+			+ (char *)rsp, min_t(unsigned int,
 			rsp_len, max_len));
 	else if (level == FS_DEVICE_INFORMATION)
-		memcpy(&tcon->fsDevInfo, server->vals->header_preamble_size + offset
-			+ (char *)&rsp->hdr, sizeof(FILE_SYSTEM_DEVICE_INFO));
+		memcpy(&tcon->fsDevInfo, offset
+			+ (char *)rsp, sizeof(FILE_SYSTEM_DEVICE_INFO));
 	else if (level == FS_SECTOR_SIZE_INFORMATION) {
 		struct smb3_fs_ss_info *ss_info = (struct smb3_fs_ss_info *)
-			(server->vals->header_preamble_size + offset + (char *)&rsp->hdr);
+			(offset + (char *)rsp);
 		tcon->ss_flags = le32_to_cpu(ss_info->Flags);
 		tcon->perf_sector_size =
 			le32_to_cpu(ss_info->PhysicalBytesPerSectorForPerf);
@@ -3735,7 +3877,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	req->sync_hdr.ProcessId = cpu_to_le32(pid);
@@ -3758,6 +3900,8 @@
 	if (rc) {
 		cifs_dbg(FYI, "Send error in smb2_lockv = %d\n", rc);
 		cifs_stats_fail_inc(tcon, SMB2_LOCK_HE);
+		trace_smb3_lock_err(xid, persist_fid, tcon->tid,
+				    tcon->ses->Suid, rc);
 	}
 
 	return rc;
@@ -3799,7 +3943,7 @@
 	if (rc)
 		return rc;
 
-	if (encryption_required(tcon))
+	if (smb3_encryption_required(tcon))
 		flags |= CIFS_TRANSFORM_REQ;
 
 	req->sync_hdr.CreditRequest = cpu_to_le16(1);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index d28f358..a345560 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -122,25 +122,10 @@
 	__le16 StructureSize2; /* size of wct area (varies, request specific) */
 } __packed;
 
-struct smb2_hdr {
-	__be32 smb2_buf_length;	/* big endian on wire */
-				/* length is only two or three bytes - with */
-				/* one or two byte type preceding it that MBZ */
-	struct smb2_sync_hdr sync_hdr;
-} __packed;
-
-struct smb2_pdu {
-	struct smb2_hdr hdr;
-	__le16 StructureSize2; /* size of wct area (varies, request specific) */
-} __packed;
-
 #define SMB3_AES128CMM_NONCE 11
 #define SMB3_AES128GCM_NONCE 12
 
 struct smb2_transform_hdr {
-	__be32 smb2_buf_length;	/* big endian on wire */
-				/* length is only two or three bytes - with
-				 one or two byte type preceding it that MBZ */
 	__le32 ProtocolId;	/* 0xFD 'S' 'M' 'B' */
 	__u8   Signature[16];
 	__u8   Nonce[16];
@@ -171,7 +156,7 @@
 #define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
 
 struct smb2_err_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize;
 	__le16 Reserved; /* MBZ */
 	__le32 ByteCount;  /* even if zero, at least one byte follows */
@@ -300,8 +285,16 @@
 	__le16	Ciphers[1]; /* Ciphers[0] since only one used now */
 } __packed;
 
+#define POSIX_CTXT_DATA_LEN	8
+struct smb2_posix_neg_context {
+	__le16	ContextType; /* 0x100 */
+	__le16	DataLength;
+	__le32	Reserved;
+	__le64	Reserved1; /* In case needed for future (eg version or caps) */
+} __packed;
+
 struct smb2_negotiate_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize;	/* Must be 65 */
 	__le16 SecurityMode;
 	__le16 DialectRevision;
@@ -341,7 +334,7 @@
 #define SMB2_SESSION_FLAG_IS_NULL	0x0002
 #define SMB2_SESSION_FLAG_ENCRYPT_DATA	0x0004
 struct smb2_sess_setup_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize; /* Must be 9 */
 	__le16 SessionFlags;
 	__le16 SecurityBufferOffset;
@@ -356,7 +349,7 @@
 } __packed;
 
 struct smb2_logoff_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize;	/* Must be 4 */
 	__le16 Reserved;
 } __packed;
@@ -452,7 +445,7 @@
 } __packed;
 
 struct smb2_tree_connect_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize;	/* Must be 16 */
 	__u8   ShareType;  /* see below */
 	__u8   Reserved;
@@ -503,7 +496,7 @@
 } __packed;
 
 struct smb2_tree_disconnect_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize;	/* Must be 4 */
 	__le16 Reserved;
 } __packed;
@@ -615,7 +608,9 @@
 #define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2	"DH2Q"
 #define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2	"DH2C"
 #define SMB2_CREATE_APP_INSTANCE_ID	0x45BCA66AEFA7F74A9008FA462E144D74
-#define SVHDX_OPEN_DEVICE_CONTEXT	0x83CE6F1AD851E0986E34401CC9BCFCE9
+#define SVHDX_OPEN_DEVICE_CONTEX	0x9CCBCF9E04C1E643980E158DA1F6EC83
+#define SMB2_CREATE_TAG_POSIX		0x93AD25509CB411E7B42383DE968BCD7C
+
 
 struct smb2_create_req {
 	struct smb2_sync_hdr sync_hdr;
@@ -638,7 +633,7 @@
 } __packed;
 
 struct smb2_create_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize;	/* Must be 89 */
 	__u8   OplockLevel;
 	__u8   Reserved;
@@ -727,6 +722,13 @@
 	} Data;
 } __packed;
 
+struct create_posix {
+	struct create_context ccontext;
+	__u8	Name[16];
+	__le32  Mode;
+	__u32	Reserved;
+} __packed;
+
 /* See MS-SMB2 2.2.13.2.11 */
 /* Flags */
 #define SMB2_DHANDLE_FLAG_PERSISTENT	0x00000002
@@ -894,7 +896,7 @@
 } __packed;
 
 struct smb2_ioctl_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize;	/* Must be 57 */
 	__u16 Reserved;
 	__le32 CtlCode;
@@ -921,7 +923,7 @@
 } __packed;
 
 struct smb2_close_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize; /* 60 */
 	__le16 Flags;
 	__le32 Reserved;
@@ -944,7 +946,7 @@
 } __packed;
 
 struct smb2_flush_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize;
 	__le16 Reserved;
 } __packed;
@@ -976,7 +978,7 @@
 } __packed;
 
 struct smb2_read_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize; /* Must be 17 */
 	__u8   DataOffset;
 	__u8   Reserved;
@@ -1007,7 +1009,7 @@
 } __packed;
 
 struct smb2_write_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize; /* Must be 17 */
 	__u8   DataOffset;
 	__u8   Reserved;
@@ -1041,7 +1043,7 @@
 } __packed;
 
 struct smb2_lock_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize; /* Must be 4 */
 	__le16 Reserved;
 } __packed;
@@ -1053,7 +1055,7 @@
 } __packed;
 
 struct smb2_echo_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize;	/* Must be 4 */
 	__u16  Reserved;
 } __packed;
@@ -1079,7 +1081,7 @@
 } __packed;
 
 struct smb2_query_directory_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize; /* Must be 9 */
 	__le16 OutputBufferOffset;
 	__le32 OutputBufferLength;
@@ -1128,7 +1130,7 @@
 } __packed;
 
 struct smb2_query_info_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize; /* Must be 9 */
 	__le16 OutputBufferOffset;
 	__le32 OutputBufferLength;
@@ -1150,12 +1152,11 @@
 } __packed;
 
 struct smb2_set_info_rsp {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize; /* Must be 2 */
 } __packed;
 
-/* oplock break without an rfc1002 header */
-struct smb2_oplock_break_req {
+struct smb2_oplock_break {
 	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize; /* Must be 24 */
 	__u8   OplockLevel;
@@ -1165,21 +1166,10 @@
 	__u64  VolatileFid;
 } __packed;
 
-/* oplock break with an rfc1002 header */
-struct smb2_oplock_break_rsp {
-	struct smb2_hdr hdr;
-	__le16 StructureSize; /* Must be 24 */
-	__u8   OplockLevel;
-	__u8   Reserved;
-	__le32 Reserved2;
-	__u64  PersistentFid;
-	__u64  VolatileFid;
-} __packed;
-
 #define SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED cpu_to_le32(0x01)
 
 struct smb2_lease_break {
-	struct smb2_hdr hdr;
+	struct smb2_sync_hdr sync_hdr;
 	__le16 StructureSize; /* Must be 44 */
 	__le16 Reserved;
 	__le32 Flags;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 8ba24a9..908555b 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -36,8 +36,9 @@
 extern int map_smb2_to_linux_error(char *buf, bool log_err);
 extern int smb2_check_message(char *buf, unsigned int length,
 			      struct TCP_Server_Info *server);
-extern unsigned int smb2_calc_size(void *buf);
-extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
+extern unsigned int smb2_calc_size(void *buf, struct TCP_Server_Info *server);
+extern char *smb2_get_data_area_len(int *off, int *len,
+				    struct smb2_sync_hdr *shdr);
 extern __le16 *cifs_convert_path_to_utf16(const char *from,
 					  struct cifs_sb_info *cifs_sb);
 
@@ -65,6 +66,8 @@
 extern int smb3_handle_read_data(struct TCP_Server_Info *server,
 				 struct mid_q_entry *mid);
 
+extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
+			struct cifs_fid *pfid);
 extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
 				   struct smb2_file_all_info *src);
 extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
@@ -129,6 +132,8 @@
 		     char **out_data, u32 *plen /* returned data len */);
 extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
 		      u64 persistent_file_id, u64 volatile_file_id);
+extern int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
+			    u64 persistent_fid, u64 volatile_fid, int flags);
 extern int SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon,
 		      u64 persistent_file_id, u64 volatile_file_id);
 extern int SMB2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 8806f3f..2c67112 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -480,7 +480,7 @@
 	unsigned int rc;
 	char server_response_sig[16];
 	struct smb2_sync_hdr *shdr =
-			(struct smb2_sync_hdr *)rqst->rq_iov[1].iov_base;
+			(struct smb2_sync_hdr *)rqst->rq_iov[0].iov_base;
 
 	if ((shdr->Command == SMB2_NEGOTIATE) ||
 	    (shdr->Command == SMB2_SESSION_SETUP) ||
@@ -605,14 +605,12 @@
 		   bool log_error)
 {
 	unsigned int len = mid->resp_buf_size;
-	struct kvec iov[2];
+	struct kvec iov[1];
 	struct smb_rqst rqst = { .rq_iov = iov,
-				 .rq_nvec = 2 };
+				 .rq_nvec = 1 };
 
 	iov[0].iov_base = (char *)mid->resp_buf;
-	iov[0].iov_len = 4;
-	iov[1].iov_base = (char *)mid->resp_buf + 4;
-	iov[1].iov_len = len;
+	iov[0].iov_len = len;
 
 	dump_smb(mid->resp_buf, min_t(u32, 80, len));
 	/* convert the length into a more usable form */
diff --git a/fs/cifs/trace.c b/fs/cifs/trace.c
new file mode 100644
index 0000000..bd4a546f
--- /dev/null
+++ b/fs/cifs/trace.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *   Copyright (C) 2018, Microsoft Corporation.
+ *
+ *   Author(s): Steve French <stfrench@microsoft.com>
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ */
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
new file mode 100644
index 0000000..61e74d4
--- /dev/null
+++ b/fs/cifs/trace.h
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *   Copyright (C) 2018, Microsoft Corporation.
+ *
+ *   Author(s): Steve French <stfrench@microsoft.com>
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cifs
+
+#if !defined(_CIFS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _CIFS_TRACE_H
+
+#include <linux/tracepoint.h>
+
+/* For logging errors in read or write */
+DECLARE_EVENT_CLASS(smb3_rw_err_class,
+	TP_PROTO(unsigned int xid,
+		__u64	fid,
+		__u32	tid,
+		__u64	sesid,
+		__u64	offset,
+		__u32	len,
+		int	rc),
+	TP_ARGS(xid, fid, tid, sesid, offset, len, rc),
+	TP_STRUCT__entry(
+		__field(unsigned int, xid)
+		__field(__u64, fid)
+		__field(__u32, tid)
+		__field(__u64, sesid)
+		__field(__u64, offset)
+		__field(__u32, len)
+		__field(int, rc)
+	),
+	TP_fast_assign(
+		__entry->xid = xid;
+		__entry->fid = fid;
+		__entry->tid = tid;
+		__entry->sesid = sesid;
+		__entry->offset = offset;
+		__entry->len = len;
+		__entry->rc = rc;
+	),
+	TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d",
+		__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+		__entry->offset, __entry->len, __entry->rc)
+)
+
+#define DEFINE_SMB3_RW_ERR_EVENT(name)          \
+DEFINE_EVENT(smb3_rw_err_class, smb3_##name,    \
+	TP_PROTO(unsigned int xid,		\
+		__u64	fid,			\
+		__u32	tid,			\
+		__u64	sesid,			\
+		__u64	offset,			\
+		__u32	len,			\
+		int	rc),			\
+	TP_ARGS(xid, fid, tid, sesid, offset, len, rc))
+
+DEFINE_SMB3_RW_ERR_EVENT(write_err);
+DEFINE_SMB3_RW_ERR_EVENT(read_err);
+
+
+/* For logging successful read or write */
+DECLARE_EVENT_CLASS(smb3_rw_done_class,
+	TP_PROTO(unsigned int xid,
+		__u64	fid,
+		__u32	tid,
+		__u64	sesid,
+		__u64	offset,
+		__u32	len),
+	TP_ARGS(xid, fid, tid, sesid, offset, len),
+	TP_STRUCT__entry(
+		__field(unsigned int, xid)
+		__field(__u64, fid)
+		__field(__u32, tid)
+		__field(__u64, sesid)
+		__field(__u64, offset)
+		__field(__u32, len)
+	),
+	TP_fast_assign(
+		__entry->xid = xid;
+		__entry->fid = fid;
+		__entry->tid = tid;
+		__entry->sesid = sesid;
+		__entry->offset = offset;
+		__entry->len = len;
+	),
+	TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x",
+		__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+		__entry->offset, __entry->len)
+)
+
+#define DEFINE_SMB3_RW_DONE_EVENT(name)         \
+DEFINE_EVENT(smb3_rw_done_class, smb3_##name,   \
+	TP_PROTO(unsigned int xid,		\
+		__u64	fid,			\
+		__u32	tid,			\
+		__u64	sesid,			\
+		__u64	offset,			\
+		__u32	len),			\
+	TP_ARGS(xid, fid, tid, sesid, offset, len))
+
+DEFINE_SMB3_RW_DONE_EVENT(write_done);
+DEFINE_SMB3_RW_DONE_EVENT(read_done);
+
+/*
+ * For handle based calls other than read and write, and get/set info
+ */
+DECLARE_EVENT_CLASS(smb3_fd_err_class,
+	TP_PROTO(unsigned int xid,
+		__u64	fid,
+		__u32	tid,
+		__u64	sesid,
+		int	rc),
+	TP_ARGS(xid, fid, tid, sesid, rc),
+	TP_STRUCT__entry(
+		__field(unsigned int, xid)
+		__field(__u64, fid)
+		__field(__u32, tid)
+		__field(__u64, sesid)
+		__field(int, rc)
+	),
+	TP_fast_assign(
+		__entry->xid = xid;
+		__entry->fid = fid;
+		__entry->tid = tid;
+		__entry->sesid = sesid;
+		__entry->rc = rc;
+	),
+	TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d",
+		__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+		__entry->rc)
+)
+
+#define DEFINE_SMB3_FD_ERR_EVENT(name)          \
+DEFINE_EVENT(smb3_fd_err_class, smb3_##name,    \
+	TP_PROTO(unsigned int xid,		\
+		__u64	fid,			\
+		__u32	tid,			\
+		__u64	sesid,			\
+		int	rc),			\
+	TP_ARGS(xid, fid, tid, sesid, rc))
+
+DEFINE_SMB3_FD_ERR_EVENT(flush_err);
+DEFINE_SMB3_FD_ERR_EVENT(lock_err);
+DEFINE_SMB3_FD_ERR_EVENT(close_err);
+
+/*
+ * For handle based query/set info calls
+ */
+DECLARE_EVENT_CLASS(smb3_inf_err_class,
+	TP_PROTO(unsigned int xid,
+		__u64	fid,
+		__u32	tid,
+		__u64	sesid,
+		__u8	infclass,
+		__u32	type,
+		int	rc),
+	TP_ARGS(xid, fid, tid, sesid, infclass, type, rc),
+	TP_STRUCT__entry(
+		__field(unsigned int, xid)
+		__field(__u64, fid)
+		__field(__u32, tid)
+		__field(__u64, sesid)
+		__field(__u8, infclass)
+		__field(__u32, type)
+		__field(int, rc)
+	),
+	TP_fast_assign(
+		__entry->xid = xid;
+		__entry->fid = fid;
+		__entry->tid = tid;
+		__entry->sesid = sesid;
+		__entry->infclass = infclass;
+		__entry->type = type;
+		__entry->rc = rc;
+	),
+	TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx class=%u type=0x%x rc=%d",
+		__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+		__entry->infclass, __entry->type, __entry->rc)
+)
+
+#define DEFINE_SMB3_INF_ERR_EVENT(name)          \
+DEFINE_EVENT(smb3_inf_err_class, smb3_##name,    \
+	TP_PROTO(unsigned int xid,		\
+		__u64	fid,			\
+		__u32	tid,			\
+		__u64	sesid,			\
+		__u8	infclass,		\
+		__u32	type,			\
+		int	rc),			\
+	TP_ARGS(xid, fid, tid, sesid, infclass, type, rc))
+
+DEFINE_SMB3_INF_ERR_EVENT(query_info_err);
+DEFINE_SMB3_INF_ERR_EVENT(set_info_err);
+DEFINE_SMB3_INF_ERR_EVENT(fsctl_err);
+
+/*
+ * For logging SMB3 Status code and Command for responses which return errors
+ */
+DECLARE_EVENT_CLASS(smb3_cmd_err_class,
+	TP_PROTO(__u32	tid,
+		__u64	sesid,
+		__u16	cmd,
+		__u64	mid,
+		__u32	status,
+		int	rc),
+	TP_ARGS(tid, sesid, cmd, mid, status, rc),
+	TP_STRUCT__entry(
+		__field(__u32, tid)
+		__field(__u64, sesid)
+		__field(__u16, cmd)
+		__field(__u64, mid)
+		__field(__u32, status)
+		__field(int, rc)
+	),
+	TP_fast_assign(
+		__entry->tid = tid;
+		__entry->sesid = sesid;
+		__entry->cmd = cmd;
+		__entry->mid = mid;
+		__entry->status = status;
+		__entry->rc = rc;
+	),
+	TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d",
+		__entry->sesid, __entry->tid, __entry->cmd, __entry->mid,
+		__entry->status, __entry->rc)
+)
+
+#define DEFINE_SMB3_CMD_ERR_EVENT(name)          \
+DEFINE_EVENT(smb3_cmd_err_class, smb3_##name,    \
+	TP_PROTO(__u32	tid,			\
+		__u64	sesid,			\
+		__u16	cmd,			\
+		__u64	mid,			\
+		__u32	status,			\
+		int	rc),			\
+	TP_ARGS(tid, sesid, cmd, mid, status, rc))
+
+DEFINE_SMB3_CMD_ERR_EVENT(cmd_err);
+
+DECLARE_EVENT_CLASS(smb3_cmd_done_class,
+	TP_PROTO(__u32	tid,
+		__u64	sesid,
+		__u16	cmd,
+		__u64	mid),
+	TP_ARGS(tid, sesid, cmd, mid),
+	TP_STRUCT__entry(
+		__field(__u32, tid)
+		__field(__u64, sesid)
+		__field(__u16, cmd)
+		__field(__u64, mid)
+	),
+	TP_fast_assign(
+		__entry->tid = tid;
+		__entry->sesid = sesid;
+		__entry->cmd = cmd;
+		__entry->mid = mid;
+	),
+	TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu",
+		__entry->sesid, __entry->tid,
+		__entry->cmd, __entry->mid)
+)
+
+#define DEFINE_SMB3_CMD_DONE_EVENT(name)          \
+DEFINE_EVENT(smb3_cmd_done_class, smb3_##name,    \
+	TP_PROTO(__u32	tid,			\
+		__u64	sesid,			\
+		__u16	cmd,			\
+		__u64	mid),			\
+	TP_ARGS(tid, sesid, cmd, mid))
+
+DEFINE_SMB3_CMD_DONE_EVENT(cmd_done);
+
+DECLARE_EVENT_CLASS(smb3_exit_err_class,
+	TP_PROTO(unsigned int xid,
+		const char *func_name,
+		int	rc),
+	TP_ARGS(xid, func_name, rc),
+	TP_STRUCT__entry(
+		__field(unsigned int, xid)
+		__field(const char *, func_name)
+		__field(int, rc)
+	),
+	TP_fast_assign(
+		__entry->xid = xid;
+		__entry->func_name = func_name;
+		__entry->rc = rc;
+	),
+	TP_printk("\t%s: xid=%u rc=%d",
+		__entry->func_name, __entry->xid, __entry->rc)
+)
+
+#define DEFINE_SMB3_EXIT_ERR_EVENT(name)          \
+DEFINE_EVENT(smb3_exit_err_class, smb3_##name,    \
+	TP_PROTO(unsigned int xid,		\
+		const char *func_name,		\
+		int	rc),			\
+	TP_ARGS(xid, func_name, rc))
+
+DEFINE_SMB3_EXIT_ERR_EVENT(exit_err);
+
+DECLARE_EVENT_CLASS(smb3_enter_exit_class,
+	TP_PROTO(unsigned int xid,
+		const char *func_name),
+	TP_ARGS(xid, func_name),
+	TP_STRUCT__entry(
+		__field(unsigned int, xid)
+		__field(const char *, func_name)
+	),
+	TP_fast_assign(
+		__entry->xid = xid;
+		__entry->func_name = func_name;
+	),
+	TP_printk("\t%s: xid=%u",
+		__entry->func_name, __entry->xid)
+)
+
+#define DEFINE_SMB3_ENTER_EXIT_EVENT(name)        \
+DEFINE_EVENT(smb3_enter_exit_class, smb3_##name,  \
+	TP_PROTO(unsigned int xid,		\
+		const char *func_name),		\
+	TP_ARGS(xid, func_name))
+
+DEFINE_SMB3_ENTER_EXIT_EVENT(enter);
+DEFINE_SMB3_ENTER_EXIT_EVENT(exit_done);
+
+/*
+ * For smb2/smb3 open call
+ */
+DECLARE_EVENT_CLASS(smb3_open_err_class,
+	TP_PROTO(unsigned int xid,
+		__u32	tid,
+		__u64	sesid,
+		int	create_options,
+		int	desired_access,
+		int	rc),
+	TP_ARGS(xid, tid, sesid, create_options, desired_access, rc),
+	TP_STRUCT__entry(
+		__field(unsigned int, xid)
+		__field(__u32, tid)
+		__field(__u64, sesid)
+		__field(int,   create_options)
+		__field(int, desired_access)
+		__field(int, rc)
+	),
+	TP_fast_assign(
+		__entry->xid = xid;
+		__entry->tid = tid;
+		__entry->sesid = sesid;
+		__entry->create_options = create_options;
+		__entry->desired_access = desired_access;
+		__entry->rc = rc;
+	),
+	TP_printk("xid=%u sid=0x%llx tid=0x%x cr_opts=0x%x des_access=0x%x rc=%d",
+		__entry->xid, __entry->sesid, __entry->tid,
+		__entry->create_options, __entry->desired_access, __entry->rc)
+)
+
+#define DEFINE_SMB3_OPEN_ERR_EVENT(name)          \
+DEFINE_EVENT(smb3_open_err_class, smb3_##name,    \
+	TP_PROTO(unsigned int xid,		\
+		__u32	tid,			\
+		__u64	sesid,			\
+		int	create_options,		\
+		int	desired_access,		\
+		int	rc),			\
+	TP_ARGS(xid, tid, sesid, create_options, desired_access, rc))
+
+DEFINE_SMB3_OPEN_ERR_EVENT(open_err);
+
+
+DECLARE_EVENT_CLASS(smb3_open_done_class,
+	TP_PROTO(unsigned int xid,
+		__u64	fid,
+		__u32	tid,
+		__u64	sesid,
+		int	create_options,
+		int	desired_access),
+	TP_ARGS(xid, fid, tid, sesid, create_options, desired_access),
+	TP_STRUCT__entry(
+		__field(unsigned int, xid)
+		__field(__u64, fid)
+		__field(__u32, tid)
+		__field(__u64, sesid)
+		__field(int, create_options)
+		__field(int, desired_access)
+	),
+	TP_fast_assign(
+		__entry->xid = xid;
+		__entry->fid = fid;
+		__entry->tid = tid;
+		__entry->sesid = sesid;
+		__entry->create_options = create_options;
+		__entry->desired_access = desired_access;
+	),
+	TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx cr_opts=0x%x des_access=0x%x",
+		__entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+		__entry->create_options, __entry->desired_access)
+)
+
+#define DEFINE_SMB3_OPEN_DONE_EVENT(name)        \
+DEFINE_EVENT(smb3_open_done_class, smb3_##name,  \
+	TP_PROTO(unsigned int xid,		\
+		__u64	fid,			\
+		__u32	tid,			\
+		__u64	sesid,			\
+		int	create_options,		\
+		int	desired_access),	\
+	TP_ARGS(xid, fid, tid, sesid, create_options, desired_access))
+
+DEFINE_SMB3_OPEN_DONE_EVENT(open_done);
+
+#endif /* _CIFS_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 927226a..e7254e3 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -800,8 +800,8 @@
 #ifdef CONFIG_CIFS_SMB311
 	if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
 		struct kvec iov = {
-			.iov_base = buf + 4,
-			.iov_len = get_rfc1002_length(buf)
+			.iov_base = buf,
+			.iov_len = midQ->resp_buf_size
 		};
 		smb311_update_preauth_hash(ses, &iov, 1);
 	}
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 124b093..c4fb9ad 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -808,10 +808,7 @@
 	}
 out:
 	mutex_unlock(&read_mutex);
-	if (IS_ERR(inode))
-		return ERR_CAST(inode);
-	d_add(dentry, inode);
-	return NULL;
+	return d_splice_alias(inode, dentry);
 }
 
 static int cramfs_readpage(struct file *file, struct page *page)
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index ce65452..243a269 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -156,12 +156,8 @@
 	}
 
 	req = skcipher_request_alloc(tfm, gfp_flags);
-	if (!req) {
-		printk_ratelimited(KERN_ERR
-				"%s: crypto_request_alloc() failed\n",
-				__func__);
+	if (!req)
 		return -ENOMEM;
-	}
 
 	skcipher_request_set_callback(
 		req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
@@ -178,9 +174,10 @@
 		res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
 	skcipher_request_free(req);
 	if (res) {
-		printk_ratelimited(KERN_ERR
-			"%s: crypto_skcipher_encrypt() returned %d\n",
-			__func__, res);
+		fscrypt_err(inode->i_sb,
+			    "%scryption failed for inode %lu, block %llu: %d",
+			    (rw == FS_DECRYPT ? "de" : "en"),
+			    inode->i_ino, lblk_num, res);
 		return res;
 	}
 	return 0;
@@ -326,7 +323,6 @@
 		return 0;
 	}
 
-	/* this should eventually be an flag in d_flags */
 	spin_lock(&dentry->d_lock);
 	cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY;
 	spin_unlock(&dentry->d_lock);
@@ -353,7 +349,6 @@
 const struct dentry_operations fscrypt_d_ops = {
 	.d_revalidate = fscrypt_d_revalidate,
 };
-EXPORT_SYMBOL(fscrypt_d_ops);
 
 void fscrypt_restore_control_page(struct page *page)
 {
@@ -422,13 +417,43 @@
 	return res;
 }
 
+void fscrypt_msg(struct super_block *sb, const char *level,
+		 const char *fmt, ...)
+{
+	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+				      DEFAULT_RATELIMIT_BURST);
+	struct va_format vaf;
+	va_list args;
+
+	if (!__ratelimit(&rs))
+		return;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	if (sb)
+		printk("%sfscrypt (%s): %pV\n", level, sb->s_id, &vaf);
+	else
+		printk("%sfscrypt: %pV\n", level, &vaf);
+	va_end(args);
+}
+
 /**
  * fscrypt_init() - Set up for fs encryption.
  */
 static int __init fscrypt_init(void)
 {
+	/*
+	 * Use an unbound workqueue to allow bios to be decrypted in parallel
+	 * even when they happen to complete on the same CPU.  This sacrifices
+	 * locality, but it's worthwhile since decryption is CPU-intensive.
+	 *
+	 * Also use a high-priority workqueue to prioritize decryption work,
+	 * which blocks reads from completing, over regular application tasks.
+	 */
 	fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue",
-							WQ_HIGHPRI, 0);
+						 WQ_UNBOUND | WQ_HIGHPRI,
+						 num_online_cpus());
 	if (!fscrypt_read_workqueue)
 		goto fail;
 
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index e33f3d3..d7a0f68 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -59,11 +59,8 @@
 
 	/* Set up the encryption request */
 	req = skcipher_request_alloc(tfm, GFP_NOFS);
-	if (!req) {
-		printk_ratelimited(KERN_ERR
-			"%s: skcipher_request_alloc() failed\n", __func__);
+	if (!req)
 		return -ENOMEM;
-	}
 	skcipher_request_set_callback(req,
 			CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
 			crypto_req_done, &wait);
@@ -74,8 +71,9 @@
 	res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
 	skcipher_request_free(req);
 	if (res < 0) {
-		printk_ratelimited(KERN_ERR
-				"%s: Error (error code %d)\n", __func__, res);
+		fscrypt_err(inode->i_sb,
+			    "Filename encryption failed for inode %lu: %d",
+			    inode->i_ino, res);
 		return res;
 	}
 
@@ -96,23 +94,14 @@
 	struct skcipher_request *req = NULL;
 	DECLARE_CRYPTO_WAIT(wait);
 	struct scatterlist src_sg, dst_sg;
-	struct fscrypt_info *ci = inode->i_crypt_info;
-	struct crypto_skcipher *tfm = ci->ci_ctfm;
+	struct crypto_skcipher *tfm = inode->i_crypt_info->ci_ctfm;
 	int res = 0;
 	char iv[FS_CRYPTO_BLOCK_SIZE];
-	unsigned lim;
-
-	lim = inode->i_sb->s_cop->max_namelen(inode);
-	if (iname->len <= 0 || iname->len > lim)
-		return -EIO;
 
 	/* Allocate request */
 	req = skcipher_request_alloc(tfm, GFP_NOFS);
-	if (!req) {
-		printk_ratelimited(KERN_ERR
-			"%s: crypto_request_alloc() failed\n",  __func__);
+	if (!req)
 		return -ENOMEM;
-	}
 	skcipher_request_set_callback(req,
 		CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
 		crypto_req_done, &wait);
@@ -127,8 +116,9 @@
 	res = crypto_wait_req(crypto_skcipher_decrypt(req), &wait);
 	skcipher_request_free(req);
 	if (res < 0) {
-		printk_ratelimited(KERN_ERR
-				"%s: Error (error code %d)\n", __func__, res);
+		fscrypt_err(inode->i_sb,
+			    "Filename decryption failed for inode %lu: %d",
+			    inode->i_ino, res);
 		return res;
 	}
 
@@ -341,12 +331,12 @@
 		return 0;
 	}
 	ret = fscrypt_get_encryption_info(dir);
-	if (ret && ret != -EOPNOTSUPP)
+	if (ret)
 		return ret;
 
 	if (dir->i_crypt_info) {
 		if (!fscrypt_fname_encrypted_size(dir, iname->len,
-						  dir->i_sb->s_cop->max_namelen(dir),
+						  dir->i_sb->s_cop->max_namelen,
 						  &fname->crypto_buf.len))
 			return -ENAMETOOLONG;
 		fname->crypto_buf.name = kmalloc(fname->crypto_buf.len,
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index ad6722b..3756239 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -18,15 +18,7 @@
 
 /* Encryption parameters */
 #define FS_IV_SIZE			16
-#define FS_AES_128_ECB_KEY_SIZE		16
-#define FS_AES_128_CBC_KEY_SIZE		16
-#define FS_AES_128_CTS_KEY_SIZE		16
-#define FS_AES_256_GCM_KEY_SIZE		32
-#define FS_AES_256_CBC_KEY_SIZE		32
-#define FS_AES_256_CTS_KEY_SIZE		32
-#define FS_AES_256_XTS_KEY_SIZE		64
-
-#define FS_KEY_DERIVATION_NONCE_SIZE		16
+#define FS_KEY_DERIVATION_NONCE_SIZE	16
 
 /**
  * Encryption context for inode
@@ -91,6 +83,10 @@
 	    filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
 		return true;
 
+	if (contents_mode == FS_ENCRYPTION_MODE_SPECK128_256_XTS &&
+	    filenames_mode == FS_ENCRYPTION_MODE_SPECK128_256_CTS)
+		return true;
+
 	return false;
 }
 
@@ -106,6 +102,15 @@
 				  gfp_t gfp_flags);
 extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
 					      gfp_t gfp_flags);
+extern const struct dentry_operations fscrypt_d_ops;
+
+extern void __printf(3, 4) __cold
+fscrypt_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+
+#define fscrypt_warn(sb, fmt, ...)		\
+	fscrypt_msg(sb, KERN_WARNING, fmt, ##__VA_ARGS__)
+#define fscrypt_err(sb, fmt, ...)		\
+	fscrypt_msg(sb, KERN_ERR, fmt, ##__VA_ARGS__)
 
 /* fname.c */
 extern int fname_encrypt(struct inode *inode, const struct qstr *iname,
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index bec0649..926e5df 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -39,8 +39,9 @@
 	dir = dget_parent(file_dentry(filp));
 	if (IS_ENCRYPTED(d_inode(dir)) &&
 	    !fscrypt_has_permitted_context(d_inode(dir), inode)) {
-		pr_warn_ratelimited("fscrypt: inconsistent encryption contexts: %lu/%lu",
-				    d_inode(dir)->i_ino, inode->i_ino);
+		fscrypt_warn(inode->i_sb,
+			     "inconsistent encryption contexts: %lu/%lu",
+			     d_inode(dir)->i_ino, inode->i_ino);
 		err = -EPERM;
 	}
 	dput(dir);
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 05f5ee1..e997ca5 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -19,17 +19,16 @@
 
 static struct crypto_shash *essiv_hash_tfm;
 
-/**
- * derive_key_aes() - Derive a key using AES-128-ECB
- * @deriving_key: Encryption key used for derivation.
- * @source_key:   Source key to which to apply derivation.
- * @derived_raw_key:  Derived raw key.
+/*
+ * Key derivation function.  This generates the derived key by encrypting the
+ * master key with AES-128-ECB using the inode's nonce as the AES key.
  *
- * Return: Zero on success; non-zero otherwise.
+ * The master key must be at least as long as the derived key.  If the master
+ * key is longer, then only the first 'derived_keysize' bytes are used.
  */
-static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
-				const struct fscrypt_key *source_key,
-				u8 derived_raw_key[FS_MAX_KEY_SIZE])
+static int derive_key_aes(const u8 *master_key,
+			  const struct fscrypt_context *ctx,
+			  u8 *derived_key, unsigned int derived_keysize)
 {
 	int res = 0;
 	struct skcipher_request *req = NULL;
@@ -51,14 +50,13 @@
 	skcipher_request_set_callback(req,
 			CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
 			crypto_req_done, &wait);
-	res = crypto_skcipher_setkey(tfm, deriving_key,
-					FS_AES_128_ECB_KEY_SIZE);
+	res = crypto_skcipher_setkey(tfm, ctx->nonce, sizeof(ctx->nonce));
 	if (res < 0)
 		goto out;
 
-	sg_init_one(&src_sg, source_key->raw, source_key->size);
-	sg_init_one(&dst_sg, derived_raw_key, source_key->size);
-	skcipher_request_set_crypt(req, &src_sg, &dst_sg, source_key->size,
+	sg_init_one(&src_sg, master_key, derived_keysize);
+	sg_init_one(&dst_sg, derived_key, derived_keysize);
+	skcipher_request_set_crypt(req, &src_sg, &dst_sg, derived_keysize,
 				   NULL);
 	res = crypto_wait_req(crypto_skcipher_encrypt(req), &wait);
 out:
@@ -67,101 +65,147 @@
 	return res;
 }
 
-static int validate_user_key(struct fscrypt_info *crypt_info,
-			struct fscrypt_context *ctx, u8 *raw_key,
-			const char *prefix, int min_keysize)
+/*
+ * Search the current task's subscribed keyrings for a "logon" key with
+ * description prefix:descriptor, and if found acquire a read lock on it and
+ * return a pointer to its validated payload in *payload_ret.
+ */
+static struct key *
+find_and_lock_process_key(const char *prefix,
+			  const u8 descriptor[FS_KEY_DESCRIPTOR_SIZE],
+			  unsigned int min_keysize,
+			  const struct fscrypt_key **payload_ret)
 {
 	char *description;
-	struct key *keyring_key;
-	struct fscrypt_key *master_key;
+	struct key *key;
 	const struct user_key_payload *ukp;
-	int res;
+	const struct fscrypt_key *payload;
 
 	description = kasprintf(GFP_NOFS, "%s%*phN", prefix,
-				FS_KEY_DESCRIPTOR_SIZE,
-				ctx->master_key_descriptor);
+				FS_KEY_DESCRIPTOR_SIZE, descriptor);
 	if (!description)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
-	keyring_key = request_key(&key_type_logon, description, NULL);
+	key = request_key(&key_type_logon, description, NULL);
 	kfree(description);
-	if (IS_ERR(keyring_key))
-		return PTR_ERR(keyring_key);
-	down_read(&keyring_key->sem);
+	if (IS_ERR(key))
+		return key;
 
-	if (keyring_key->type != &key_type_logon) {
-		printk_once(KERN_WARNING
-				"%s: key type must be logon\n", __func__);
-		res = -ENOKEY;
-		goto out;
-	}
-	ukp = user_key_payload_locked(keyring_key);
-	if (!ukp) {
-		/* key was revoked before we acquired its semaphore */
-		res = -EKEYREVOKED;
-		goto out;
-	}
-	if (ukp->datalen != sizeof(struct fscrypt_key)) {
-		res = -EINVAL;
-		goto out;
-	}
-	master_key = (struct fscrypt_key *)ukp->data;
-	BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
+	down_read(&key->sem);
+	ukp = user_key_payload_locked(key);
 
-	if (master_key->size < min_keysize || master_key->size > FS_MAX_KEY_SIZE
-	    || master_key->size % AES_BLOCK_SIZE != 0) {
-		printk_once(KERN_WARNING
-				"%s: key size incorrect: %d\n",
-				__func__, master_key->size);
-		res = -ENOKEY;
-		goto out;
+	if (!ukp) /* was the key revoked before we acquired its semaphore? */
+		goto invalid;
+
+	payload = (const struct fscrypt_key *)ukp->data;
+
+	if (ukp->datalen != sizeof(struct fscrypt_key) ||
+	    payload->size < 1 || payload->size > FS_MAX_KEY_SIZE) {
+		fscrypt_warn(NULL,
+			     "key with description '%s' has invalid payload",
+			     key->description);
+		goto invalid;
 	}
-	res = derive_key_aes(ctx->nonce, master_key, raw_key);
-out:
-	up_read(&keyring_key->sem);
-	key_put(keyring_key);
-	return res;
+
+	if (payload->size < min_keysize) {
+		fscrypt_warn(NULL,
+			     "key with description '%s' is too short (got %u bytes, need %u+ bytes)",
+			     key->description, payload->size, min_keysize);
+		goto invalid;
+	}
+
+	*payload_ret = payload;
+	return key;
+
+invalid:
+	up_read(&key->sem);
+	key_put(key);
+	return ERR_PTR(-ENOKEY);
 }
 
-static const struct {
+/* Find the master key, then derive the inode's actual encryption key */
+static int find_and_derive_key(const struct inode *inode,
+			       const struct fscrypt_context *ctx,
+			       u8 *derived_key, unsigned int derived_keysize)
+{
+	struct key *key;
+	const struct fscrypt_key *payload;
+	int err;
+
+	key = find_and_lock_process_key(FS_KEY_DESC_PREFIX,
+					ctx->master_key_descriptor,
+					derived_keysize, &payload);
+	if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) {
+		key = find_and_lock_process_key(inode->i_sb->s_cop->key_prefix,
+						ctx->master_key_descriptor,
+						derived_keysize, &payload);
+	}
+	if (IS_ERR(key))
+		return PTR_ERR(key);
+	err = derive_key_aes(payload->raw, ctx, derived_key, derived_keysize);
+	up_read(&key->sem);
+	key_put(key);
+	return err;
+}
+
+static struct fscrypt_mode {
+	const char *friendly_name;
 	const char *cipher_str;
 	int keysize;
+	bool logged_impl_name;
 } available_modes[] = {
-	[FS_ENCRYPTION_MODE_AES_256_XTS] = { "xts(aes)",
-					     FS_AES_256_XTS_KEY_SIZE },
-	[FS_ENCRYPTION_MODE_AES_256_CTS] = { "cts(cbc(aes))",
-					     FS_AES_256_CTS_KEY_SIZE },
-	[FS_ENCRYPTION_MODE_AES_128_CBC] = { "cbc(aes)",
-					     FS_AES_128_CBC_KEY_SIZE },
-	[FS_ENCRYPTION_MODE_AES_128_CTS] = { "cts(cbc(aes))",
-					     FS_AES_128_CTS_KEY_SIZE },
+	[FS_ENCRYPTION_MODE_AES_256_XTS] = {
+		.friendly_name = "AES-256-XTS",
+		.cipher_str = "xts(aes)",
+		.keysize = 64,
+	},
+	[FS_ENCRYPTION_MODE_AES_256_CTS] = {
+		.friendly_name = "AES-256-CTS-CBC",
+		.cipher_str = "cts(cbc(aes))",
+		.keysize = 32,
+	},
+	[FS_ENCRYPTION_MODE_AES_128_CBC] = {
+		.friendly_name = "AES-128-CBC",
+		.cipher_str = "cbc(aes)",
+		.keysize = 16,
+	},
+	[FS_ENCRYPTION_MODE_AES_128_CTS] = {
+		.friendly_name = "AES-128-CTS-CBC",
+		.cipher_str = "cts(cbc(aes))",
+		.keysize = 16,
+	},
+	[FS_ENCRYPTION_MODE_SPECK128_256_XTS] = {
+		.friendly_name = "Speck128/256-XTS",
+		.cipher_str = "xts(speck128)",
+		.keysize = 64,
+	},
+	[FS_ENCRYPTION_MODE_SPECK128_256_CTS] = {
+		.friendly_name = "Speck128/256-CTS-CBC",
+		.cipher_str = "cts(cbc(speck128))",
+		.keysize = 32,
+	},
 };
 
-static int determine_cipher_type(struct fscrypt_info *ci, struct inode *inode,
-				 const char **cipher_str_ret, int *keysize_ret)
+static struct fscrypt_mode *
+select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode)
 {
-	u32 mode;
-
 	if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) {
-		pr_warn_ratelimited("fscrypt: inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)\n",
-				    inode->i_ino,
-				    ci->ci_data_mode, ci->ci_filename_mode);
-		return -EINVAL;
+		fscrypt_warn(inode->i_sb,
+			     "inode %lu uses unsupported encryption modes (contents mode %d, filenames mode %d)",
+			     inode->i_ino, ci->ci_data_mode,
+			     ci->ci_filename_mode);
+		return ERR_PTR(-EINVAL);
 	}
 
-	if (S_ISREG(inode->i_mode)) {
-		mode = ci->ci_data_mode;
-	} else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
-		mode = ci->ci_filename_mode;
-	} else {
-		WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
-			  inode->i_ino, (inode->i_mode & S_IFMT));
-		return -EINVAL;
-	}
+	if (S_ISREG(inode->i_mode))
+		return &available_modes[ci->ci_data_mode];
 
-	*cipher_str_ret = available_modes[mode].cipher_str;
-	*keysize_ret = available_modes[mode].keysize;
-	return 0;
+	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+		return &available_modes[ci->ci_filename_mode];
+
+	WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
+		  inode->i_ino, (inode->i_mode & S_IFMT));
+	return ERR_PTR(-EINVAL);
 }
 
 static void put_crypt_info(struct fscrypt_info *ci)
@@ -184,8 +228,9 @@
 
 		tfm = crypto_alloc_shash("sha256", 0, 0);
 		if (IS_ERR(tfm)) {
-			pr_warn_ratelimited("fscrypt: error allocating SHA-256 transform: %ld\n",
-					    PTR_ERR(tfm));
+			fscrypt_warn(NULL,
+				     "error allocating SHA-256 transform: %ld",
+				     PTR_ERR(tfm));
 			return PTR_ERR(tfm);
 		}
 		prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm);
@@ -245,8 +290,7 @@
 	struct fscrypt_info *crypt_info;
 	struct fscrypt_context ctx;
 	struct crypto_skcipher *ctfm;
-	const char *cipher_str;
-	int keysize;
+	struct fscrypt_mode *mode;
 	u8 *raw_key = NULL;
 	int res;
 
@@ -290,57 +334,59 @@
 	memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
 				sizeof(crypt_info->ci_master_key));
 
-	res = determine_cipher_type(crypt_info, inode, &cipher_str, &keysize);
-	if (res)
+	mode = select_encryption_mode(crypt_info, inode);
+	if (IS_ERR(mode)) {
+		res = PTR_ERR(mode);
 		goto out;
+	}
 
 	/*
 	 * This cannot be a stack buffer because it is passed to the scatterlist
 	 * crypto API as part of key derivation.
 	 */
 	res = -ENOMEM;
-	raw_key = kmalloc(FS_MAX_KEY_SIZE, GFP_NOFS);
+	raw_key = kmalloc(mode->keysize, GFP_NOFS);
 	if (!raw_key)
 		goto out;
 
-	res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX,
-				keysize);
-	if (res && inode->i_sb->s_cop->key_prefix) {
-		int res2 = validate_user_key(crypt_info, &ctx, raw_key,
-					     inode->i_sb->s_cop->key_prefix,
-					     keysize);
-		if (res2) {
-			if (res2 == -ENOKEY)
-				res = -ENOKEY;
-			goto out;
-		}
-	} else if (res) {
+	res = find_and_derive_key(inode, &ctx, raw_key, mode->keysize);
+	if (res)
+		goto out;
+
+	ctfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
+	if (IS_ERR(ctfm)) {
+		res = PTR_ERR(ctfm);
+		fscrypt_warn(inode->i_sb,
+			     "error allocating '%s' transform for inode %lu: %d",
+			     mode->cipher_str, inode->i_ino, res);
 		goto out;
 	}
-	ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
-	if (!ctfm || IS_ERR(ctfm)) {
-		res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
-		pr_debug("%s: error %d (inode %lu) allocating crypto tfm\n",
-			 __func__, res, inode->i_ino);
-		goto out;
+	if (unlikely(!mode->logged_impl_name)) {
+		/*
+		 * fscrypt performance can vary greatly depending on which
+		 * crypto algorithm implementation is used.  Help people debug
+		 * performance problems by logging the ->cra_driver_name the
+		 * first time a mode is used.  Note that multiple threads can
+		 * race here, but it doesn't really matter.
+		 */
+		mode->logged_impl_name = true;
+		pr_info("fscrypt: %s using implementation \"%s\"\n",
+			mode->friendly_name,
+			crypto_skcipher_alg(ctfm)->base.cra_driver_name);
 	}
 	crypt_info->ci_ctfm = ctfm;
-	crypto_skcipher_clear_flags(ctfm, ~0);
 	crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
-	/*
-	 * if the provided key is longer than keysize, we use the first
-	 * keysize bytes of the derived key only
-	 */
-	res = crypto_skcipher_setkey(ctfm, raw_key, keysize);
+	res = crypto_skcipher_setkey(ctfm, raw_key, mode->keysize);
 	if (res)
 		goto out;
 
 	if (S_ISREG(inode->i_mode) &&
 	    crypt_info->ci_data_mode == FS_ENCRYPTION_MODE_AES_128_CBC) {
-		res = init_essiv_generator(crypt_info, raw_key, keysize);
+		res = init_essiv_generator(crypt_info, raw_key, mode->keysize);
 		if (res) {
-			pr_debug("%s: error %d (inode %lu) allocating essiv tfm\n",
-				 __func__, res, inode->i_ino);
+			fscrypt_warn(inode->i_sb,
+				     "error initializing ESSIV generator for inode %lu: %d",
+				     inode->i_ino, res);
 			goto out;
 		}
 	}
diff --git a/fs/dax.c b/fs/dax.c
index aaec72de..aa86d9f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -677,7 +677,7 @@
 		 * downgrading page table protection not changing it to point
 		 * to a new page.
 		 *
-		 * See Documentation/vm/mmu_notifier.txt
+		 * See Documentation/vm/mmu_notifier.rst
 		 */
 		if (pmdp) {
 #ifdef CONFIG_FS_DAX_PMD
diff --git a/fs/dcache.c b/fs/dcache.c
index 2acfc69..0e8e5de 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -580,6 +580,7 @@
 	spin_unlock(&dentry->d_lock);
 	if (likely(can_free))
 		dentry_free(dentry);
+	cond_resched();
 }
 
 static struct dentry *__lock_parent(struct dentry *dentry)
@@ -827,30 +828,24 @@
  */
 void dput(struct dentry *dentry)
 {
-	if (unlikely(!dentry))
-		return;
+	while (dentry) {
+		might_sleep();
 
-repeat:
-	might_sleep();
+		rcu_read_lock();
+		if (likely(fast_dput(dentry))) {
+			rcu_read_unlock();
+			return;
+		}
 
-	rcu_read_lock();
-	if (likely(fast_dput(dentry))) {
+		/* Slow case: now with the dentry lock held */
 		rcu_read_unlock();
-		return;
-	}
 
-	/* Slow case: now with the dentry lock held */
-	rcu_read_unlock();
+		if (likely(retain_dentry(dentry))) {
+			spin_unlock(&dentry->d_lock);
+			return;
+		}
 
-	if (likely(retain_dentry(dentry))) {
-		spin_unlock(&dentry->d_lock);
-		return;
-	}
-
-	dentry = dentry_kill(dentry);
-	if (dentry) {
-		cond_resched();
-		goto repeat;
+		dentry = dentry_kill(dentry);
 	}
 }
 EXPORT_SYMBOL(dput);
@@ -907,6 +902,35 @@
 }
 EXPORT_SYMBOL(dget_parent);
 
+static struct dentry * __d_find_any_alias(struct inode *inode)
+{
+	struct dentry *alias;
+
+	if (hlist_empty(&inode->i_dentry))
+		return NULL;
+	alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
+	__dget(alias);
+	return alias;
+}
+
+/**
+ * d_find_any_alias - find any alias for a given inode
+ * @inode: inode to find an alias for
+ *
+ * If any aliases exist for the given inode, take and return a
+ * reference for one of them.  If no aliases exist, return %NULL.
+ */
+struct dentry *d_find_any_alias(struct inode *inode)
+{
+	struct dentry *de;
+
+	spin_lock(&inode->i_lock);
+	de = __d_find_any_alias(inode);
+	spin_unlock(&inode->i_lock);
+	return de;
+}
+EXPORT_SYMBOL(d_find_any_alias);
+
 /**
  * d_find_alias - grab a hashed alias of inode
  * @inode: inode in question
@@ -923,34 +947,19 @@
  */
 static struct dentry *__d_find_alias(struct inode *inode)
 {
-	struct dentry *alias, *discon_alias;
+	struct dentry *alias;
 
-again:
-	discon_alias = NULL;
+	if (S_ISDIR(inode->i_mode))
+		return __d_find_any_alias(inode);
+
 	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
 		spin_lock(&alias->d_lock);
- 		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
-			if (IS_ROOT(alias) &&
-			    (alias->d_flags & DCACHE_DISCONNECTED)) {
-				discon_alias = alias;
-			} else {
-				__dget_dlock(alias);
-				spin_unlock(&alias->d_lock);
-				return alias;
-			}
-		}
-		spin_unlock(&alias->d_lock);
-	}
-	if (discon_alias) {
-		alias = discon_alias;
-		spin_lock(&alias->d_lock);
-		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
+ 		if (!d_unhashed(alias)) {
 			__dget_dlock(alias);
 			spin_unlock(&alias->d_lock);
 			return alias;
 		}
 		spin_unlock(&alias->d_lock);
-		goto again;
 	}
 	return NULL;
 }
@@ -1052,8 +1061,6 @@
 	while (!list_empty(list)) {
 		struct dentry *dentry, *parent;
 
-		cond_resched();
-
 		dentry = list_entry(list->prev, struct dentry, d_lru);
 		spin_lock(&dentry->d_lock);
 		rcu_read_lock();
@@ -1230,13 +1237,11 @@
  * @parent:	start of walk
  * @data:	data passed to @enter() and @finish()
  * @enter:	callback when first entering the dentry
- * @finish:	callback when successfully finished the walk
  *
- * The @enter() and @finish() callbacks are called with d_lock held.
+ * The @enter() callbacks are called with d_lock held.
  */
 static void d_walk(struct dentry *parent, void *data,
-		   enum d_walk_ret (*enter)(void *, struct dentry *),
-		   void (*finish)(void *))
+		   enum d_walk_ret (*enter)(void *, struct dentry *))
 {
 	struct dentry *this_parent;
 	struct list_head *next;
@@ -1325,8 +1330,6 @@
 	if (need_seqretry(&rename_lock, seq))
 		goto rename_retry;
 	rcu_read_unlock();
-	if (finish)
-		finish(data);
 
 out_unlock:
 	spin_unlock(&this_parent->d_lock);
@@ -1375,7 +1378,7 @@
 	struct check_mount data = { .mnt = parent->mnt, .mounted = 0 };
 
 	read_seqlock_excl(&mount_lock);
-	d_walk(parent->dentry, &data, path_check_mount, NULL);
+	d_walk(parent->dentry, &data, path_check_mount);
 	read_sequnlock_excl(&mount_lock);
 
 	return data.mounted;
@@ -1483,11 +1486,16 @@
 		data.start = parent;
 		data.found = 0;
 
-		d_walk(parent, &data, select_collect, NULL);
+		d_walk(parent, &data, select_collect);
+
+		if (!list_empty(&data.dispose)) {
+			shrink_dentry_list(&data.dispose);
+			continue;
+		}
+
+		cond_resched();
 		if (!data.found)
 			break;
-
-		shrink_dentry_list(&data.dispose);
 	}
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
@@ -1518,7 +1526,7 @@
 static void do_one_tree(struct dentry *dentry)
 {
 	shrink_dcache_parent(dentry);
-	d_walk(dentry, dentry, umount_check, NULL);
+	d_walk(dentry, dentry, umount_check);
 	d_drop(dentry);
 	dput(dentry);
 }
@@ -1542,78 +1550,48 @@
 	}
 }
 
-struct detach_data {
-	struct select_data select;
-	struct dentry *mountpoint;
-};
-static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry)
+static enum d_walk_ret find_submount(void *_data, struct dentry *dentry)
 {
-	struct detach_data *data = _data;
-
+	struct dentry **victim = _data;
 	if (d_mountpoint(dentry)) {
 		__dget_dlock(dentry);
-		data->mountpoint = dentry;
+		*victim = dentry;
 		return D_WALK_QUIT;
 	}
-
-	return select_collect(&data->select, dentry);
-}
-
-static void check_and_drop(void *_data)
-{
-	struct detach_data *data = _data;
-
-	if (!data->mountpoint && list_empty(&data->select.dispose))
-		__d_drop(data->select.start);
+	return D_WALK_CONTINUE;
 }
 
 /**
  * d_invalidate - detach submounts, prune dcache, and drop
  * @dentry: dentry to invalidate (aka detach, prune and drop)
- *
- * no dcache lock.
- *
- * The final d_drop is done as an atomic operation relative to
- * rename_lock ensuring there are no races with d_set_mounted.  This
- * ensures there are no unhashed dentries on the path to a mountpoint.
  */
 void d_invalidate(struct dentry *dentry)
 {
-	/*
-	 * If it's already been dropped, return OK.
-	 */
+	bool had_submounts = false;
 	spin_lock(&dentry->d_lock);
 	if (d_unhashed(dentry)) {
 		spin_unlock(&dentry->d_lock);
 		return;
 	}
+	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
 
 	/* Negative dentries can be dropped without further checks */
-	if (!dentry->d_inode) {
-		d_drop(dentry);
+	if (!dentry->d_inode)
 		return;
-	}
 
+	shrink_dcache_parent(dentry);
 	for (;;) {
-		struct detach_data data;
-
-		data.mountpoint = NULL;
-		INIT_LIST_HEAD(&data.select.dispose);
-		data.select.start = dentry;
-		data.select.found = 0;
-
-		d_walk(dentry, &data, detach_and_collect, check_and_drop);
-
-		if (!list_empty(&data.select.dispose))
-			shrink_dentry_list(&data.select.dispose);
-		else if (!data.mountpoint)
+		struct dentry *victim = NULL;
+		d_walk(dentry, &victim, find_submount);
+		if (!victim) {
+			if (had_submounts)
+				shrink_dcache_parent(dentry);
 			return;
-
-		if (data.mountpoint) {
-			detach_mounts(data.mountpoint);
-			dput(data.mountpoint);
 		}
+		had_submounts = true;
+		detach_mounts(victim);
+		dput(victim);
 	}
 }
 EXPORT_SYMBOL(d_invalidate);
@@ -1963,35 +1941,6 @@
 }
 EXPORT_SYMBOL(d_make_root);
 
-static struct dentry * __d_find_any_alias(struct inode *inode)
-{
-	struct dentry *alias;
-
-	if (hlist_empty(&inode->i_dentry))
-		return NULL;
-	alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
-	__dget(alias);
-	return alias;
-}
-
-/**
- * d_find_any_alias - find any alias for a given inode
- * @inode: inode to find an alias for
- *
- * If any aliases exist for the given inode, take and return a
- * reference for one of them.  If no aliases exist, return %NULL.
- */
-struct dentry *d_find_any_alias(struct inode *inode)
-{
-	struct dentry *de;
-
-	spin_lock(&inode->i_lock);
-	de = __d_find_any_alias(inode);
-	spin_unlock(&inode->i_lock);
-	return de;
-}
-EXPORT_SYMBOL(d_find_any_alias);
-
 static struct dentry *__d_instantiate_anon(struct dentry *dentry,
 					   struct inode *inode,
 					   bool disconnected)
@@ -3134,7 +3083,7 @@
 
 void d_genocide(struct dentry *parent)
 {
-	d_walk(parent, parent, d_genocide_kill, NULL);
+	d_walk(parent, parent, d_genocide_kill);
 }
 
 EXPORT_SYMBOL(d_genocide);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 1f99678..4fce1da 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -796,19 +796,13 @@
 ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
 				size_t count, loff_t *ppos)
 {
-	char buf[32];
-	size_t buf_size;
 	bool bv;
 	int r;
 	bool *val = file->private_data;
 	struct dentry *dentry = F_DENTRY(file);
 
-	buf_size = min(count, (sizeof(buf)-1));
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	buf[buf_size] = '\0';
-	if (strtobool(buf, &bv) == 0) {
+	r = kstrtobool_from_user(user_buf, count, &bv);
+	if (!r) {
 		r = debugfs_file_get(dentry);
 		if (unlikely(r))
 			return r;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 13b0135..a913b12 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -512,7 +512,9 @@
 	if (unlikely(!inode))
 		return failed_creating(dentry);
 
-	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+	if (!parent)
+		parent = debugfs_mount->mnt_root;
+	inode->i_mode = S_IFDIR | ((d_inode(parent)->i_mode & 0770));
 	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 874607b..093fb54 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -432,8 +432,8 @@
 	struct bio *bio;
 
 	/*
-	 * bio_alloc() is guaranteed to return a bio when called with
-	 * __GFP_RECLAIM and we request a valid number of vectors.
+	 * bio_alloc() is guaranteed to return a bio when allowed to sleep and
+	 * we request a valid number of vectors.
 	 */
 	bio = bio_alloc(GFP_KERNEL, nr_vecs);
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 5243989..a5e4a22 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1037,6 +1037,7 @@
 	int result;
 	int addr_len;
 	struct socket *sock;
+	struct timeval tv = { .tv_sec = 5, .tv_usec = 0 };
 
 	if (con->nodeid == 0) {
 		log_print("attempt to connect sock 0 foiled");
@@ -1080,11 +1081,22 @@
 	log_print("connecting to %d", con->nodeid);
 
 	/* Turn off Nagle's algorithm */
-	kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
+	kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
 			  sizeof(one));
 
+	/*
+	 * Make sock->ops->connect() function return in specified time,
+	 * since O_NONBLOCK argument in connect() function does not work here,
+	 * then, we should restore the default value of this attribute.
+	 */
+	kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv,
+			  sizeof(tv));
 	result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
-				   O_NONBLOCK);
+				   0);
+	memset(&tv, 0, sizeof(tv));
+	kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv,
+			  sizeof(tv));
+
 	if (result == -EINPROGRESS)
 		result = 0;
 	if (result == 0)
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 08d3bd6..61c9514 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -101,14 +101,20 @@
 	return 0;
 }
 
-static __poll_t eventfd_poll(struct file *file, poll_table *wait)
+static struct wait_queue_head *
+eventfd_get_poll_head(struct file *file, __poll_t events)
+{
+	struct eventfd_ctx *ctx = file->private_data;
+
+	return &ctx->wqh;
+}
+
+static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask)
 {
 	struct eventfd_ctx *ctx = file->private_data;
 	__poll_t events = 0;
 	u64 count;
 
-	poll_wait(file, &ctx->wqh, wait);
-
 	/*
 	 * All writes to ctx->count occur within ctx->wqh.lock.  This read
 	 * can be done outside ctx->wqh.lock because we know that poll_wait
@@ -305,7 +311,8 @@
 	.show_fdinfo	= eventfd_show_fdinfo,
 #endif
 	.release	= eventfd_release,
-	.poll		= eventfd_poll,
+	.get_poll_head	= eventfd_get_poll_head,
+	.poll_mask	= eventfd_poll_mask,
 	.read		= eventfd_read,
 	.write		= eventfd_write,
 	.llseek		= noop_llseek,
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 602ca42..67db22f 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -884,8 +884,7 @@
 
 	pt->_key = epi->event.events;
 	if (!is_file_epoll(epi->ffd.file))
-		return epi->ffd.file->f_op->poll(epi->ffd.file, pt) &
-		       epi->event.events;
+		return vfs_poll(epi->ffd.file, pt) & epi->event.events;
 
 	ep = epi->ffd.file->private_data;
 	poll_wait(epi->ffd.file, &ep->poll_wait, pt);
@@ -2025,7 +2024,7 @@
 
 	/* The target file descriptor must support poll */
 	error = -EPERM;
-	if (!tf.file->f_op->poll)
+	if (!file_can_poll(tf.file))
 		goto error_tgt_fput;
 
 	/* Check if EPOLLWAKEUP is allowed */
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 3c6a9c1..ddbf872 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -790,7 +790,7 @@
 	for (i = 0; i < ios->oc->numdevs; i++) {
 		struct osd_request *or;
 
-		or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
+		or = osd_start_request(_ios_od(ios, i));
 		if (unlikely(!or)) {
 			ORE_ERR("%s: osd_start_request failed\n", __func__);
 			ret = -ENOMEM;
@@ -815,7 +815,7 @@
 	for (i = 0; i < ios->oc->numdevs; i++) {
 		struct osd_request *or;
 
-		or = osd_start_request(_ios_od(ios, i), GFP_KERNEL);
+		or = osd_start_request(_ios_od(ios, i));
 		if (unlikely(!or)) {
 			ORE_ERR("%s: osd_start_request failed\n", __func__);
 			ret = -ENOMEM;
@@ -847,7 +847,7 @@
 		struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
 		struct osd_request *or;
 
-		or = osd_start_request(_ios_od(ios, dev), GFP_KERNEL);
+		or = osd_start_request(_ios_od(ios, dev));
 		if (unlikely(!or)) {
 			ORE_ERR("%s: osd_start_request failed\n", __func__);
 			ret = -ENOMEM;
@@ -966,7 +966,7 @@
 		return 0; /* Just an empty slot */
 
 	first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
-	or = osd_start_request(_ios_od(ios, first_dev), GFP_KERNEL);
+	or = osd_start_request(_ios_od(ios, first_dev));
 	if (unlikely(!or)) {
 		ORE_ERR("%s: osd_start_request failed\n", __func__);
 		return -ENOMEM;
@@ -1060,7 +1060,7 @@
 		struct ore_per_dev_state *per_dev = &ios->per_dev[cur_comp];
 		struct osd_request *or;
 
-		or = osd_start_request(_ios_od(ios, cur_comp), GFP_KERNEL);
+		or = osd_start_request(_ios_od(ios, cur_comp));
 		if (unlikely(!or)) {
 			ORE_ERR("%s: osd_start_request failed\n", __func__);
 			return -ENOMEM;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 179cd5c..719a315 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -229,7 +229,7 @@
 static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
 		    u64 offset, void *p, unsigned length)
 {
-	struct osd_request *or = osd_start_request(od, GFP_KERNEL);
+	struct osd_request *or = osd_start_request(od);
 /*	struct osd_sense_info osi = {.key = 0};*/
 	int ret;
 
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index de16945..c09289a 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -961,8 +961,7 @@
 	blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
 
 	if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
-		err = bdev_dax_supported(sb, blocksize);
-		if (err) {
+		if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
 			ext2_msg(sb, KERN_ERR,
 				"DAX unsupported by block device. Turning off DAX.");
 			sbi->s_mount_opt &= ~EXT2_MOUNT_DAX;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 508b905..b00481c 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -185,25 +185,15 @@
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_fsblk_t start, tmp;
 	int flex_bg = 0;
-	struct ext4_group_info *grp;
 
 	J_ASSERT_BH(bh, buffer_locked(bh));
 
 	/* If checksum is bad mark all blocks used to prevent allocation
 	 * essentially implementing a per-group read-only flag. */
 	if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-		grp = ext4_get_group_info(sb, block_group);
-		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-			percpu_counter_sub(&sbi->s_freeclusters_counter,
-					   grp->bb_free);
-		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-		if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-			int count;
-			count = ext4_free_inodes_count(sb, gdp);
-			percpu_counter_sub(&sbi->s_freeinodes_counter,
-					   count);
-		}
-		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
+		ext4_mark_group_bitmap_corrupted(sb, block_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT |
+					EXT4_GROUP_INFO_IBITMAP_CORRUPT);
 		return -EFSBADCRC;
 	}
 	memset(bh->b_data, 0, sb->s_blocksize);
@@ -375,7 +365,6 @@
 {
 	ext4_fsblk_t	blk;
 	struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (buffer_verified(bh))
 		return 0;
@@ -387,10 +376,8 @@
 			desc, bh))) {
 		ext4_unlock_group(sb, block_group);
 		ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
-		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-			percpu_counter_sub(&sbi->s_freeclusters_counter,
-					   grp->bb_free);
-		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+		ext4_mark_group_bitmap_corrupted(sb, block_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 		return -EFSBADCRC;
 	}
 	blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
@@ -398,10 +385,8 @@
 		ext4_unlock_group(sb, block_group);
 		ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
 			   block_group, blk);
-		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-			percpu_counter_sub(&sbi->s_freeclusters_counter,
-					   grp->bb_free);
-		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+		ext4_mark_group_bitmap_corrupted(sb, block_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 		return -EFSCORRUPTED;
 	}
 	set_buffer_verified(bh);
@@ -436,6 +421,8 @@
 	    (bitmap_blk >= ext4_blocks_count(sbi->s_es))) {
 		ext4_error(sb, "Invalid block bitmap block %llu in "
 			   "block_group %u", bitmap_blk, block_group);
+		ext4_mark_group_bitmap_corrupted(sb, block_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 		return ERR_PTR(-EFSCORRUPTED);
 	}
 	bh = sb_getblk(sb, bitmap_blk);
@@ -514,6 +501,8 @@
 		ext4_error(sb, "Cannot read block bitmap - "
 			   "block_group = %u, block_bitmap = %llu",
 			   block_group, (unsigned long long) bh->b_blocknr);
+		ext4_mark_group_bitmap_corrupted(sb, block_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 		return -EIO;
 	}
 	clear_buffer_new(bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a42e712..df95412 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2390,7 +2390,7 @@
 extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
 
 /* mballoc.c */
-extern const struct file_operations ext4_seq_mb_groups_fops;
+extern const struct seq_operations ext4_mb_seq_groups_ops;
 extern long ext4_mb_stats;
 extern long ext4_mb_max_to_scan;
 extern int ext4_mb_init(struct super_block *);
@@ -2530,6 +2530,9 @@
 				    ext4_group_t ngroup);
 extern const char *ext4_decode_error(struct super_block *sb, int errno,
 				     char nbuf[16]);
+extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
+					     ext4_group_t block_group,
+					     unsigned int flags);
 
 extern __printf(4, 5)
 void __ext4_error(struct super_block *, const char *, unsigned int,
@@ -2857,6 +2860,10 @@
 #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT		1
 #define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT	2
 #define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT	3
+#define EXT4_GROUP_INFO_BBITMAP_CORRUPT		\
+	(1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT)
+#define EXT4_GROUP_INFO_IBITMAP_CORRUPT		\
+	(1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
 
 #define EXT4_MB_GRP_NEED_INIT(grp)	\
 	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 763ef18..c4e6fb1 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -162,8 +162,7 @@
 
 void ext4_exit_es(void)
 {
-	if (ext4_es_cachep)
-		kmem_cache_destroy(ext4_es_cachep);
+	kmem_cache_destroy(ext4_es_cachep);
 }
 
 void ext4_es_init_tree(struct ext4_es_tree *tree)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index fb6f023..7f80233 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -277,10 +277,11 @@
 }
 
 #ifdef CONFIG_FS_DAX
-static int ext4_dax_huge_fault(struct vm_fault *vmf,
+static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
 		enum page_entry_size pe_size)
 {
-	int result, error = 0;
+	int error = 0;
+	vm_fault_t result;
 	int retries = 0;
 	handle_t *handle = NULL;
 	struct inode *inode = file_inode(vmf->vma->vm_file);
@@ -335,7 +336,7 @@
 	return result;
 }
 
-static int ext4_dax_fault(struct vm_fault *vmf)
+static vm_fault_t ext4_dax_fault(struct vm_fault *vmf)
 {
 	return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
 }
@@ -380,50 +381,64 @@
 	return 0;
 }
 
-static int ext4_file_open(struct inode * inode, struct file * filp)
+static int ext4_sample_last_mounted(struct super_block *sb,
+				    struct vfsmount *mnt)
 {
-	struct super_block *sb = inode->i_sb;
-	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-	struct vfsmount *mnt = filp->f_path.mnt;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct path path;
 	char buf[64], *cp;
+	handle_t *handle;
+	int err;
+
+	if (likely(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED))
+		return 0;
+
+	if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
+		return 0;
+
+	sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
+	/*
+	 * Sample where the filesystem has been mounted and
+	 * store it in the superblock for sysadmin convenience
+	 * when trying to sort through large numbers of block
+	 * devices or filesystem images.
+	 */
+	memset(buf, 0, sizeof(buf));
+	path.mnt = mnt;
+	path.dentry = mnt->mnt_root;
+	cp = d_path(&path, buf, sizeof(buf));
+	err = 0;
+	if (IS_ERR(cp))
+		goto out;
+
+	handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
+	err = PTR_ERR(handle);
+	if (IS_ERR(handle))
+		goto out;
+	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
+	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
+	if (err)
+		goto out_journal;
+	strlcpy(sbi->s_es->s_last_mounted, cp,
+		sizeof(sbi->s_es->s_last_mounted));
+	ext4_handle_dirty_super(handle, sb);
+out_journal:
+	ext4_journal_stop(handle);
+out:
+	sb_end_intwrite(sb);
+	return err;
+}
+
+static int ext4_file_open(struct inode * inode, struct file * filp)
+{
 	int ret;
 
 	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
 		return -EIO;
 
-	if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
-		     !sb_rdonly(sb))) {
-		sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
-		/*
-		 * Sample where the filesystem has been mounted and
-		 * store it in the superblock for sysadmin convenience
-		 * when trying to sort through large numbers of block
-		 * devices or filesystem images.
-		 */
-		memset(buf, 0, sizeof(buf));
-		path.mnt = mnt;
-		path.dentry = mnt->mnt_root;
-		cp = d_path(&path, buf, sizeof(buf));
-		if (!IS_ERR(cp)) {
-			handle_t *handle;
-			int err;
-
-			handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
-			if (IS_ERR(handle))
-				return PTR_ERR(handle);
-			BUFFER_TRACE(sbi->s_sbh, "get_write_access");
-			err = ext4_journal_get_write_access(handle, sbi->s_sbh);
-			if (err) {
-				ext4_journal_stop(handle);
-				return err;
-			}
-			strlcpy(sbi->s_es->s_last_mounted, cp,
-				sizeof(sbi->s_es->s_last_mounted));
-			ext4_handle_dirty_super(handle, sb);
-			ext4_journal_stop(handle);
-		}
-	}
+	ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
+	if (ret)
+		return ret;
 
 	ret = fscrypt_file_open(inode, filp);
 	if (ret)
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index e871c4b..4b99e2d 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -402,8 +402,8 @@
 }
 
 /* Find all the fixed metadata in the filesystem. */
-int ext4_getfsmap_find_fixed_metadata(struct super_block *sb,
-				      struct list_head *meta_list)
+static int ext4_getfsmap_find_fixed_metadata(struct super_block *sb,
+					     struct list_head *meta_list)
 {
 	struct ext4_group_desc *gdp;
 	ext4_group_t agno;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index df92e3e..4d6e007 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -83,7 +83,6 @@
 {
 	ext4_fsblk_t	blk;
 	struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (buffer_verified(bh))
 		return 0;
@@ -97,14 +96,8 @@
 		ext4_unlock_group(sb, block_group);
 		ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
 			   "inode_bitmap = %llu", block_group, blk);
-		grp = ext4_get_group_info(sb, block_group);
-		if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-			int count;
-			count = ext4_free_inodes_count(sb, desc);
-			percpu_counter_sub(&sbi->s_freeinodes_counter,
-					   count);
-		}
-		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
+		ext4_mark_group_bitmap_corrupted(sb, block_group,
+					EXT4_GROUP_INFO_IBITMAP_CORRUPT);
 		return -EFSBADCRC;
 	}
 	set_buffer_verified(bh);
@@ -136,6 +129,8 @@
 	    (bitmap_blk >= ext4_blocks_count(sbi->s_es))) {
 		ext4_error(sb, "Invalid inode bitmap blk %llu in "
 			   "block_group %u", bitmap_blk, block_group);
+		ext4_mark_group_bitmap_corrupted(sb, block_group,
+					EXT4_GROUP_INFO_IBITMAP_CORRUPT);
 		return ERR_PTR(-EFSCORRUPTED);
 	}
 	bh = sb_getblk(sb, bitmap_blk);
@@ -143,7 +138,7 @@
 		ext4_error(sb, "Cannot read inode bitmap - "
 			    "block_group = %u, inode_bitmap = %llu",
 			    block_group, bitmap_blk);
-		return ERR_PTR(-EIO);
+		return ERR_PTR(-ENOMEM);
 	}
 	if (bitmap_uptodate(bh))
 		goto verify;
@@ -190,6 +185,8 @@
 		ext4_error(sb, "Cannot read inode bitmap - "
 			   "block_group = %u, inode_bitmap = %llu",
 			   block_group, bitmap_blk);
+		ext4_mark_group_bitmap_corrupted(sb, block_group,
+				EXT4_GROUP_INFO_IBITMAP_CORRUPT);
 		return ERR_PTR(-EIO);
 	}
 
@@ -337,13 +334,8 @@
 			fatal = err;
 	} else {
 		ext4_error(sb, "bit already cleared for inode %lu", ino);
-		if (gdp && !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
-			int count;
-			count = ext4_free_inodes_count(sb, gdp);
-			percpu_counter_sub(&sbi->s_freeinodes_counter,
-					   count);
-		}
-		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
+		ext4_mark_group_bitmap_corrupted(sb, block_group,
+					EXT4_GROUP_INFO_IBITMAP_CORRUPT);
 	}
 
 error_return:
@@ -914,6 +906,8 @@
 		if (group == 0 && (ino + 1) < EXT4_FIRST_INO(sb)) {
 			ext4_error(sb, "reserved inode found cleared - "
 				   "inode=%lu", ino + 1);
+			ext4_mark_group_bitmap_corrupted(sb, group,
+					EXT4_GROUP_INFO_IBITMAP_CORRUPT);
 			goto next_group;
 		}
 
@@ -1105,6 +1099,8 @@
 		err = -EIO;
 		ext4_error(sb, "failed to insert inode %lu: doubly allocated?",
 			   inode->i_ino);
+		ext4_mark_group_bitmap_corrupted(sb, group,
+					EXT4_GROUP_INFO_IBITMAP_CORRUPT);
 		goto out;
 	}
 	inode->i_generation = prandom_u32();
@@ -1206,11 +1202,8 @@
 	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
 	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
 	bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
-	if (IS_ERR(bitmap_bh)) {
-		ext4_error(sb, "inode bitmap error %ld for orphan %lu",
-			   ino, PTR_ERR(bitmap_bh));
+	if (IS_ERR(bitmap_bh))
 		return (struct inode *) bitmap_bh;
-	}
 
 	/* Having the inode bit set should be a 100% indicator that this
 	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index c32802c..bf7fa15 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -561,10 +561,16 @@
 		unsigned epb = inode->i_sb->s_blocksize / sizeof(u32);
 		int i;
 
-		/* Count number blocks in a subtree under 'partial' */
-		count = 1;
-		for (i = 0; partial + i != chain + depth - 1; i++)
-			count *= epb;
+		/*
+		 * Count number blocks in a subtree under 'partial'. At each
+		 * level we count number of complete empty subtrees beyond
+		 * current offset and then descend into the subtree only
+		 * partially beyond current offset.
+		 */
+		count = 0;
+		for (i = partial - chain + 1; i < depth; i++)
+			count = count * epb + (epb - offsets[i] - 1);
+		count++;
 		/* Fill in size of a hole we found */
 		map->m_pblk = 0;
 		map->m_len = min_t(unsigned int, map->m_len, count);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 70cf4c7..285ed15 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -144,6 +144,12 @@
 		goto out;
 
 	if (!is.s.not_found) {
+		if (is.s.here->e_value_inum) {
+			EXT4_ERROR_INODE(inode, "inline data xattr refers "
+					 "to an external xattr inode");
+			error = -EFSCORRUPTED;
+			goto out;
+		}
 		EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
 					(void *)ext4_raw_inode(&is.iloc));
 		EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
@@ -1835,8 +1841,8 @@
 	iomap->offset = 0;
 	iomap->length = min_t(loff_t, ext4_get_inline_size(inode),
 			      i_size_read(inode));
-	iomap->type = 0;
-	iomap->flags = IOMAP_F_DATA_INLINE;
+	iomap->type = IOMAP_INLINE;
+	iomap->flags = 0;
 
 out:
 	up_read(&EXT4_I(inode)->xattr_sem);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1e50c5e..2ea07ef 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4298,28 +4298,28 @@
 		EXT4_BLOCK_SIZE_BITS(sb);
 	stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
 
-	/* If there are no blocks to remove, return now */
-	if (first_block >= stop_block)
-		goto out_stop;
+	/* If there are blocks to remove, do it */
+	if (stop_block > first_block) {
 
-	down_write(&EXT4_I(inode)->i_data_sem);
-	ext4_discard_preallocations(inode);
+		down_write(&EXT4_I(inode)->i_data_sem);
+		ext4_discard_preallocations(inode);
 
-	ret = ext4_es_remove_extent(inode, first_block,
-				    stop_block - first_block);
-	if (ret) {
+		ret = ext4_es_remove_extent(inode, first_block,
+					    stop_block - first_block);
+		if (ret) {
+			up_write(&EXT4_I(inode)->i_data_sem);
+			goto out_stop;
+		}
+
+		if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+			ret = ext4_ext_remove_space(inode, first_block,
+						    stop_block - 1);
+		else
+			ret = ext4_ind_remove_space(handle, inode, first_block,
+						    stop_block);
+
 		up_write(&EXT4_I(inode)->i_data_sem);
-		goto out_stop;
 	}
-
-	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-		ret = ext4_ext_remove_space(inode, first_block,
-					    stop_block - 1);
-	else
-		ret = ext4_ind_remove_space(handle, inode, first_block,
-					    stop_block);
-
-	up_write(&EXT4_I(inode)->i_data_sem);
 	if (IS_SYNC(inode))
 		ext4_handle_sync(handle);
 
@@ -4701,19 +4701,21 @@
 	}
 }
 
-static inline void ext4_iget_extra_inode(struct inode *inode,
+static inline int ext4_iget_extra_inode(struct inode *inode,
 					 struct ext4_inode *raw_inode,
 					 struct ext4_inode_info *ei)
 {
 	__le32 *magic = (void *)raw_inode +
 			EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
+
 	if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
 	    EXT4_INODE_SIZE(inode->i_sb) &&
 	    *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
 		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
-		ext4_find_inline_data_nolock(inode);
+		return ext4_find_inline_data_nolock(inode);
 	} else
 		EXT4_I(inode)->i_inline_off = 0;
+	return 0;
 }
 
 int ext4_get_projid(struct inode *inode, kprojid_t *projid)
@@ -4724,6 +4726,26 @@
 	return 0;
 }
 
+/*
+ * ext4 has self-managed i_version for ea inodes, it stores the lower 32bit of
+ * refcount in i_version, so use raw values if inode has EXT4_EA_INODE_FL flag
+ * set.
+ */
+static inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val)
+{
+	if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
+		inode_set_iversion_raw(inode, val);
+	else
+		inode_set_iversion_queried(inode, val);
+}
+static inline u64 ext4_inode_peek_iversion(const struct inode *inode)
+{
+	if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
+		return inode_peek_iversion_raw(inode);
+	else
+		return inode_peek_iversion(inode);
+}
+
 struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 {
 	struct ext4_iloc iloc;
@@ -4893,7 +4915,9 @@
 			ei->i_extra_isize = sizeof(struct ext4_inode) -
 					    EXT4_GOOD_OLD_INODE_SIZE;
 		} else {
-			ext4_iget_extra_inode(inode, raw_inode, ei);
+			ret = ext4_iget_extra_inode(inode, raw_inode, ei);
+			if (ret)
+				goto bad_inode;
 		}
 	}
 
@@ -4910,7 +4934,7 @@
 				ivers |=
 		    (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
 		}
-		inode_set_iversion_queried(inode, ivers);
+		ext4_inode_set_iversion_queried(inode, ivers);
 	}
 
 	ret = 0;
@@ -4945,6 +4969,13 @@
 		inode->i_op = &ext4_dir_inode_operations;
 		inode->i_fop = &ext4_dir_operations;
 	} else if (S_ISLNK(inode->i_mode)) {
+		/* VFS does not allow setting these so must be corruption */
+		if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
+			EXT4_ERROR_INODE(inode,
+			  "immutable or append flags not allowed on symlinks");
+			ret = -EFSCORRUPTED;
+			goto bad_inode;
+		}
 		if (ext4_encrypted_inode(inode)) {
 			inode->i_op = &ext4_encrypted_symlink_inode_operations;
 			ext4_set_aops(inode);
@@ -5196,7 +5227,7 @@
 	}
 
 	if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) {
-		u64 ivers = inode_peek_iversion(inode);
+		u64 ivers = ext4_inode_peek_iversion(inode);
 
 		raw_inode->i_disk_version = cpu_to_le32(ivers);
 		if (ei->i_extra_isize) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 769a627..6eae2b9 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -470,6 +470,8 @@
 					      "freeing block already freed "
 					      "(bit %u)",
 					      first + i);
+			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 		}
 		mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
 	}
@@ -747,10 +749,8 @@
 		 * corrupt and update bb_free using bitmap value
 		 */
 		grp->bb_free = free;
-		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
-			percpu_counter_sub(&sbi->s_freeclusters_counter,
-					   grp->bb_free);
-		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+		ext4_mark_group_bitmap_corrupted(sb, group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 	}
 	mb_set_largest_free_order(sb, grp);
 
@@ -1454,12 +1454,8 @@
 				      "freeing already freed block "
 				      "(bit %u); block bitmap corrupt.",
 				      block);
-		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))
-			percpu_counter_sub(&sbi->s_freeclusters_counter,
-					   e4b->bd_info->bb_free);
-		/* Mark the block group as corrupt. */
-		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
-			&e4b->bd_info->bb_state);
+		ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+				EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 		mb_regenerate_buddy(e4b);
 		goto done;
 	}
@@ -1956,6 +1952,8 @@
 					"%d free clusters as per "
 					"group info. But bitmap says 0",
 					free);
+			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 			break;
 		}
 
@@ -1966,6 +1964,8 @@
 					"%d free clusters as per "
 					"group info. But got %d blocks",
 					free, ex.fe_len);
+			ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
+					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 			/*
 			 * The number of free blocks differs. This mostly
 			 * indicate that the bitmap is corrupt. So exit
@@ -2254,7 +2254,7 @@
 
 static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
 {
-	struct super_block *sb = seq->private;
+	struct super_block *sb = PDE_DATA(file_inode(seq->file));
 	ext4_group_t group;
 
 	if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
@@ -2265,7 +2265,7 @@
 
 static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct super_block *sb = seq->private;
+	struct super_block *sb = PDE_DATA(file_inode(seq->file));
 	ext4_group_t group;
 
 	++*pos;
@@ -2277,7 +2277,7 @@
 
 static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
 {
-	struct super_block *sb = seq->private;
+	struct super_block *sb = PDE_DATA(file_inode(seq->file));
 	ext4_group_t group = (ext4_group_t) ((unsigned long) v);
 	int i;
 	int err, buddy_loaded = 0;
@@ -2330,34 +2330,13 @@
 {
 }
 
-static const struct seq_operations ext4_mb_seq_groups_ops = {
+const struct seq_operations ext4_mb_seq_groups_ops = {
 	.start  = ext4_mb_seq_groups_start,
 	.next   = ext4_mb_seq_groups_next,
 	.stop   = ext4_mb_seq_groups_stop,
 	.show   = ext4_mb_seq_groups_show,
 };
 
-static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
-{
-	struct super_block *sb = PDE_DATA(inode);
-	int rc;
-
-	rc = seq_open(file, &ext4_mb_seq_groups_ops);
-	if (rc == 0) {
-		struct seq_file *m = file->private_data;
-		m->private = sb;
-	}
-	return rc;
-
-}
-
-const struct file_operations ext4_seq_mb_groups_fops = {
-	.open		= ext4_mb_seq_groups_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
 {
 	int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
@@ -2537,8 +2516,7 @@
 	int i;
 
 	for (i = 0; i < NR_GRPINFO_CACHES; i++) {
-		if (ext4_groupinfo_caches[i])
-			kmem_cache_destroy(ext4_groupinfo_caches[i]);
+		kmem_cache_destroy(ext4_groupinfo_caches[i]);
 		ext4_groupinfo_caches[i] = NULL;
 	}
 }
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b6bec27..d792b76 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1933,7 +1933,7 @@
 		return 0;
 
 	n_group = ext4_get_group_number(sb, n_blocks_count - 1);
-	if (n_group > (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) {
+	if (n_group >= (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) {
 		ext4_warning(sb, "resize would cause inodes_count overflow");
 		return -EINVAL;
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index eb104e8..00fe75a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -763,6 +763,36 @@
 	return;
 }
 
+void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
+				     ext4_group_t group,
+				     unsigned int flags)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+
+	if ((flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) &&
+	    !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) {
+		percpu_counter_sub(&sbi->s_freeclusters_counter,
+					grp->bb_free);
+		set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
+			&grp->bb_state);
+	}
+
+	if ((flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) &&
+	    !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
+		if (gdp) {
+			int count;
+
+			count = ext4_free_inodes_count(sb, gdp);
+			percpu_counter_sub(&sbi->s_freeinodes_counter,
+					   count);
+		}
+		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
+			&grp->bb_state);
+	}
+}
+
 void ext4_update_dynamic_rev(struct super_block *sb)
 {
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
@@ -1237,19 +1267,13 @@
 	return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
 }
 
-static unsigned ext4_max_namelen(struct inode *inode)
-{
-	return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
-		EXT4_NAME_LEN;
-}
-
 static const struct fscrypt_operations ext4_cryptops = {
 	.key_prefix		= "ext4:",
 	.get_context		= ext4_get_context,
 	.set_context		= ext4_set_context,
 	.dummy_context		= ext4_dummy_context,
 	.empty_dir		= ext4_empty_dir,
-	.max_namelen		= ext4_max_namelen,
+	.max_namelen		= EXT4_NAME_LEN,
 };
 #endif
 
@@ -2116,12 +2140,12 @@
 			    int read_only)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	int res = 0;
+	int err = 0;
 
 	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
 		ext4_msg(sb, KERN_ERR, "revision level too high, "
 			 "forcing read-only mode");
-		res = SB_RDONLY;
+		err = -EROFS;
 	}
 	if (read_only)
 		goto done;
@@ -2154,7 +2178,7 @@
 	if (sbi->s_journal)
 		ext4_set_feature_journal_needs_recovery(sb);
 
-	ext4_commit_super(sb, 1);
+	err = ext4_commit_super(sb, 1);
 done:
 	if (test_opt(sb, DEBUG))
 		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
@@ -2166,7 +2190,7 @@
 			sbi->s_mount_opt, sbi->s_mount_opt2);
 
 	cleancache_init_fs(sb);
-	return res;
+	return err;
 }
 
 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
@@ -3732,8 +3756,7 @@
 					" that may contain inline data");
 			sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
 		}
-		err = bdev_dax_supported(sb, blocksize);
-		if (err) {
+		if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
 			ext4_msg(sb, KERN_ERR,
 				"DAX unsupported by block device. Turning off DAX.");
 			sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
@@ -4224,8 +4247,12 @@
 		goto failed_mount4;
 	}
 
-	if (ext4_setup_super(sb, es, sb_rdonly(sb)))
+	ret = ext4_setup_super(sb, es, sb_rdonly(sb));
+	if (ret == -EROFS) {
 		sb->s_flags |= SB_RDONLY;
+		ret = 0;
+	} else if (ret)
+		goto failed_mount4a;
 
 	/* determine the minimum size of new large inodes, if present */
 	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
@@ -4760,11 +4787,7 @@
 		unlock_buffer(sbh);
 		error = __sync_dirty_buffer(sbh,
 			REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
-		if (error)
-			return error;
-
-		error = buffer_write_io_error(sbh);
-		if (error) {
+		if (buffer_write_io_error(sbh)) {
 			ext4_msg(sb, KERN_ERR, "I/O error while writing "
 			       "superblock");
 			clear_buffer_write_io_error(sbh);
@@ -5165,8 +5188,12 @@
 			if (sbi->s_journal)
 				ext4_clear_journal_err(sb, es);
 			sbi->s_mount_state = le16_to_cpu(es->s_state);
-			if (!ext4_setup_super(sb, es, 0))
-				sb->s_flags &= ~SB_RDONLY;
+
+			err = ext4_setup_super(sb, es, 0);
+			if (err)
+				goto restore_opts;
+
+			sb->s_flags &= ~SB_RDONLY;
 			if (ext4_has_feature_mmp(sb))
 				if (ext4_multi_mount_protect(sb,
 						le64_to_cpu(es->s_mmp_block))) {
@@ -5190,8 +5217,11 @@
 	}
 
 	ext4_setup_system_zone(sb);
-	if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY))
-		ext4_commit_super(sb, 1);
+	if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
+		err = ext4_commit_super(sb, 1);
+		if (err)
+			goto restore_opts;
+	}
 
 #ifdef CONFIG_QUOTA
 	/* Release old quota file names */
@@ -5252,7 +5282,8 @@
 		 dquot->dq_dqb.dqb_bsoftlimit :
 		 dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
 	if (limit && buf->f_blocks > limit) {
-		curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+		curblock = (dquot->dq_dqb.dqb_curspace +
+			    dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
 		buf->f_blocks = limit;
 		buf->f_bfree = buf->f_bavail =
 			(buf->f_blocks > curblock) ?
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 9ebd26c..f34da0b 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -346,39 +346,9 @@
 
 static struct kobject *ext4_feat;
 
-#define PROC_FILE_SHOW_DEFN(name) \
-static int name##_open(struct inode *inode, struct file *file) \
-{ \
-	return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \
-} \
-\
-static const struct file_operations ext4_seq_##name##_fops = { \
-	.open		= name##_open, \
-	.read		= seq_read, \
-	.llseek		= seq_lseek, \
-	.release	= single_release, \
-}
-
-#define PROC_FILE_LIST(name) \
-	{ __stringify(name), &ext4_seq_##name##_fops }
-
-PROC_FILE_SHOW_DEFN(es_shrinker_info);
-PROC_FILE_SHOW_DEFN(options);
-
-static const struct ext4_proc_files {
-	const char *name;
-	const struct file_operations *fops;
-} proc_files[] = {
-	PROC_FILE_LIST(options),
-	PROC_FILE_LIST(es_shrinker_info),
-	PROC_FILE_LIST(mb_groups),
-	{ NULL, NULL },
-};
-
 int ext4_register_sysfs(struct super_block *sb)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	const struct ext4_proc_files *p;
 	int err;
 
 	init_completion(&sbi->s_kobj_unregister);
@@ -392,11 +362,14 @@
 
 	if (ext4_proc_root)
 		sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
-
 	if (sbi->s_proc) {
-		for (p = proc_files; p->name; p++)
-			proc_create_data(p->name, S_IRUGO, sbi->s_proc,
-					 p->fops, sb);
+		proc_create_single_data("options", S_IRUGO, sbi->s_proc,
+				ext4_seq_options_show, sb);
+		proc_create_single_data("es_shrinker_info", S_IRUGO,
+				sbi->s_proc, ext4_seq_es_shrinker_info_show,
+				sb);
+		proc_create_seq_data("mb_groups", S_IRUGO, sbi->s_proc,
+				&ext4_mb_seq_groups_ops, sb);
 	}
 	return 0;
 }
@@ -404,13 +377,9 @@
 void ext4_unregister_sysfs(struct super_block *sb)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	const struct ext4_proc_files *p;
 
-	if (sbi->s_proc) {
-		for (p = proc_files; p->name; p++)
-			remove_proc_entry(p->name, sbi->s_proc);
-		remove_proc_entry(sb->s_id, ext4_proc_root);
-	}
+	if (sbi->s_proc)
+		remove_proc_subtree(sb->s_id, ext4_proc_root);
 	kobject_del(&sbi->s_kobj);
 }
 
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 499cb4b..fc4ced5 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1688,7 +1688,7 @@
 
 	/* No failures allowed past this point. */
 
-	if (!s->not_found && here->e_value_offs) {
+	if (!s->not_found && here->e_value_size && here->e_value_offs) {
 		/* Remove the old value. */
 		void *first_val = s->base + min_offs;
 		size_t offs = le16_to_cpu(here->e_value_offs);
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 629001b..197a9d8 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -43,7 +43,7 @@
 		err = ext4_xattr_set_handle(handle, inode,
 					    EXT4_XATTR_INDEX_SECURITY,
 					    xattr->name, xattr->value,
-					    xattr->value_len, 0);
+					    xattr->value_len, XATTR_CREATE);
 		if (err < 0)
 			break;
 	}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 42d564c..970ae27 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1930,19 +1930,13 @@
 	return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode));
 }
 
-static unsigned f2fs_max_namelen(struct inode *inode)
-{
-	return S_ISLNK(inode->i_mode) ?
-			inode->i_sb->s_blocksize : F2FS_NAME_LEN;
-}
-
 static const struct fscrypt_operations f2fs_cryptops = {
 	.key_prefix	= "f2fs:",
 	.get_context	= f2fs_get_context,
 	.set_context	= f2fs_set_context,
 	.dummy_context	= f2fs_dummy_context,
 	.empty_dir	= f2fs_empty_dir,
-	.max_namelen	= f2fs_max_namelen,
+	.max_namelen	= F2FS_NAME_LEN,
 };
 #endif
 
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index f33a56d..4b47ca6 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -572,23 +572,6 @@
 	return 0;
 }
 
-#define F2FS_PROC_FILE_DEF(_name)					\
-static int _name##_open_fs(struct inode *inode, struct file *file)	\
-{									\
-	return single_open(file, _name##_seq_show, PDE_DATA(inode));	\
-}									\
-									\
-static const struct file_operations f2fs_seq_##_name##_fops = {		\
-	.open = _name##_open_fs,					\
-	.read = seq_read,						\
-	.llseek = seq_lseek,						\
-	.release = single_release,					\
-};
-
-F2FS_PROC_FILE_DEF(segment_info);
-F2FS_PROC_FILE_DEF(segment_bits);
-F2FS_PROC_FILE_DEF(iostat_info);
-
 int __init f2fs_init_sysfs(void)
 {
 	int ret;
@@ -632,12 +615,12 @@
 		sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
 
 	if (sbi->s_proc) {
-		proc_create_data("segment_info", S_IRUGO, sbi->s_proc,
-				 &f2fs_seq_segment_info_fops, sb);
-		proc_create_data("segment_bits", S_IRUGO, sbi->s_proc,
-				 &f2fs_seq_segment_bits_fops, sb);
-		proc_create_data("iostat_info", S_IRUGO, sbi->s_proc,
-				&f2fs_seq_iostat_info_fops, sb);
+		proc_create_single_data("segment_info", S_IRUGO, sbi->s_proc,
+				segment_info_seq_show, sb);
+		proc_create_single_data("segment_bits", S_IRUGO, sbi->s_proc,
+				segment_bits_seq_show, sb);
+		proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc,
+				iostat_info_seq_show, sb);
 	}
 	return 0;
 }
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 582ca73..484ce67 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -314,10 +314,6 @@
 	int err;
 
 	mutex_lock(&MSDOS_SB(sb)->s_lock);
-	/*
-	 * Check whether the directory is not in use, then check
-	 * whether it is empty.
-	 */
 	err = fat_dir_empty(inode);
 	if (err)
 		goto out;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 2649759..4f4362d 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -697,15 +697,6 @@
 	return fat_search_long(dir, qname->name, len, sinfo);
 }
 
-/*
- * (nfsd's) anonymous disconnected dentry?
- * NOTE: !IS_ROOT() is not anonymous (I.e. d_splice_alias() did the job).
- */
-static int vfat_d_anon_disconn(struct dentry *dentry)
-{
-	return IS_ROOT(dentry) && (dentry->d_flags & DCACHE_DISCONNECTED);
-}
-
 static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
 				  unsigned int flags)
 {
@@ -738,8 +729,7 @@
 	 * Checking "alias->d_parent == dentry->d_parent" to make sure
 	 * FS is not corrupted (especially double linked dir).
 	 */
-	if (alias && alias->d_parent == dentry->d_parent &&
-	    !vfat_d_anon_disconn(alias)) {
+	if (alias && alias->d_parent == dentry->d_parent) {
 		/*
 		 * This inode has non anonymous-DCACHE_DISCONNECTED
 		 * dentry. This means, the user did ->lookup() by an
@@ -747,7 +737,6 @@
 		 *
 		 * Switch to new one for reason of locality if possible.
 		 */
-		BUG_ON(d_unhashed(alias));
 		if (!S_ISDIR(inode->i_mode))
 			d_move(alias, dentry);
 		iput(inode);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index d737ff0..c421694 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -871,9 +871,9 @@
 		if (fa->fa_file != filp)
 			continue;
 
-		spin_lock_irq(&fa->fa_lock);
+		write_lock_irq(&fa->fa_lock);
 		fa->fa_file = NULL;
-		spin_unlock_irq(&fa->fa_lock);
+		write_unlock_irq(&fa->fa_lock);
 
 		*fp = fa->fa_next;
 		call_rcu(&fa->fa_rcu, fasync_free_rcu);
@@ -918,13 +918,13 @@
 		if (fa->fa_file != filp)
 			continue;
 
-		spin_lock_irq(&fa->fa_lock);
+		write_lock_irq(&fa->fa_lock);
 		fa->fa_fd = fd;
-		spin_unlock_irq(&fa->fa_lock);
+		write_unlock_irq(&fa->fa_lock);
 		goto out;
 	}
 
-	spin_lock_init(&new->fa_lock);
+	rwlock_init(&new->fa_lock);
 	new->magic = FASYNC_MAGIC;
 	new->fa_file = filp;
 	new->fa_fd = fd;
@@ -987,14 +987,13 @@
 {
 	while (fa) {
 		struct fown_struct *fown;
-		unsigned long flags;
 
 		if (fa->magic != FASYNC_MAGIC) {
 			printk(KERN_ERR "kill_fasync: bad magic number in "
 			       "fasync_struct!\n");
 			return;
 		}
-		spin_lock_irqsave(&fa->fa_lock, flags);
+		read_lock(&fa->fa_lock);
 		if (fa->fa_file) {
 			fown = &fa->fa_file->f_owner;
 			/* Don't send SIGURG to processes which have not set a
@@ -1003,7 +1002,7 @@
 			if (!(sig == SIGURG && fown->signum == 0))
 				send_sigio(fown, fa->fa_fd, band);
 		}
-		spin_unlock_irqrestore(&fa->fa_lock, flags);
+		read_unlock(&fa->fa_lock);
 		fa = rcu_dereference(fa->fa_next);
 	}
 }
diff --git a/fs/filesystems.c b/fs/filesystems.c
index f2728a4..b03f57b 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -238,21 +238,9 @@
 	return 0;
 }
 
-static int filesystems_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, filesystems_proc_show, NULL);
-}
-
-static const struct file_operations filesystems_proc_fops = {
-	.open		= filesystems_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init proc_filesystems_init(void)
 {
-	proc_create("filesystems", 0, NULL, &filesystems_proc_fops);
+	proc_create_single("filesystems", 0, NULL, filesystems_proc_show);
 	return 0;
 }
 module_init(proc_filesystems_init);
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index ce4785f..a514256 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -193,13 +193,9 @@
 		return ERR_PTR(-ENAMETOOLONG);
 				 
 	ino = vxfs_inode_by_name(dip, dp);
-	if (ino) {
+	if (ino)
 		ip = vxfs_iget(dip->i_sb, ino);
-		if (IS_ERR(ip))
-			return ERR_CAST(ip);
-	}
-	d_add(dp, ip);
-	return NULL;
+	return d_splice_alias(ip, dp);
 }
 
 /**
diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c
index 15a3d04..9a13e9e 100644
--- a/fs/fscache/histogram.c
+++ b/fs/fscache/histogram.c
@@ -83,24 +83,9 @@
 {
 }
 
-static const struct seq_operations fscache_histogram_ops = {
+const struct seq_operations fscache_histogram_ops = {
 	.start		= fscache_histogram_start,
 	.stop		= fscache_histogram_stop,
 	.next		= fscache_histogram_next,
 	.show		= fscache_histogram_show,
 };
-
-/*
- * open "/proc/fs/fscache/histogram" to provide latency data
- */
-static int fscache_histogram_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &fscache_histogram_ops);
-}
-
-const struct file_operations fscache_histogram_fops = {
-	.open		= fscache_histogram_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 500650f..f83328a 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -31,6 +31,7 @@
 #include <linux/fscache-cache.h>
 #include <trace/events/fscache.h>
 #include <linux/sched.h>
+#include <linux/seq_file.h>
 
 #define FSCACHE_MIN_THREADS	4
 #define FSCACHE_MAX_THREADS	32
@@ -84,7 +85,7 @@
 	atomic_inc(&histogram[jif]);
 }
 
-extern const struct file_operations fscache_histogram_fops;
+extern const struct seq_operations fscache_histogram_ops;
 
 #else
 #define fscache_hist(hist, start_jif) do {} while (0)
@@ -294,7 +295,7 @@
 
 #define __fscache_stat(stat) (stat)
 
-extern const struct file_operations fscache_stats_fops;
+int fscache_stats_show(struct seq_file *m, void *v);
 #else
 
 #define __fscache_stat(stat) (NULL)
diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c
index 1d9e495..49a8c90 100644
--- a/fs/fscache/proc.c
+++ b/fs/fscache/proc.c
@@ -26,14 +26,14 @@
 		goto error_dir;
 
 #ifdef CONFIG_FSCACHE_STATS
-	if (!proc_create("fs/fscache/stats", S_IFREG | 0444, NULL,
-			 &fscache_stats_fops))
+	if (!proc_create_single("fs/fscache/stats", S_IFREG | 0444, NULL,
+			fscache_stats_show))
 		goto error_stats;
 #endif
 
 #ifdef CONFIG_FSCACHE_HISTOGRAM
-	if (!proc_create("fs/fscache/histogram", S_IFREG | 0444, NULL,
-			 &fscache_histogram_fops))
+	if (!proc_create_seq("fs/fscache/histogram", S_IFREG | 0444, NULL,
+			 &fscache_histogram_ops))
 		goto error_histogram;
 #endif
 
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index fcc8c2f..00564a1 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -138,7 +138,7 @@
 /*
  * display the general statistics
  */
-static int fscache_stats_show(struct seq_file *m, void *v)
+int fscache_stats_show(struct seq_file *m, void *v)
 {
 	seq_puts(m, "FS-Cache statistics\n");
 
@@ -284,18 +284,3 @@
 		   atomic_read(&fscache_n_cache_culled_objects));
 	return 0;
 }
-
-/*
- * open "/proc/fs/fscache/stats" allowing provision of a statistical summary
- */
-static int fscache_stats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, fscache_stats_show, NULL);
-}
-
-const struct file_operations fscache_stats_fops = {
-	.open		= fscache_stats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release        = single_release,
-};
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index f587165..35f5ee2 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -54,8 +54,7 @@
 			continue;
 		if (start >= to)
 			break;
-		if (gfs2_is_jdata(ip))
-			set_buffer_uptodate(bh);
+		set_buffer_uptodate(bh);
 		gfs2_trans_add_data(ip->i_gl, bh);
 	}
 }
@@ -747,18 +746,21 @@
 	put_page(page);
 
 	gfs2_trans_end(sdp);
-	if (pos + len > ip->i_inode.i_size)
-		gfs2_trim_blocks(&ip->i_inode);
-	goto out_trans_fail;
+	if (alloc_required) {
+		gfs2_inplace_release(ip);
+		if (pos + len > ip->i_inode.i_size)
+			gfs2_trim_blocks(&ip->i_inode);
+	}
+	goto out_qunlock;
 
 out_endtrans:
 	gfs2_trans_end(sdp);
 out_trans_fail:
-	if (alloc_required) {
+	if (alloc_required)
 		gfs2_inplace_release(ip);
 out_qunlock:
+	if (alloc_required)
 		gfs2_quota_unlock(ip);
-	}
 out_unlock:
 	if (&ip->i_inode == sdp->sd_rindex) {
 		gfs2_glock_dq(&m_ip->i_gh);
@@ -814,7 +816,6 @@
  * @inode: The inode
  * @dibh: The buffer_head containing the on-disk inode
  * @pos: The file position
- * @len: The length of the write
  * @copied: How much was actually copied by the VFS
  * @page: The page
  *
@@ -824,17 +825,15 @@
  * Returns: errno
  */
 static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
-				  loff_t pos, unsigned len, unsigned copied,
+				  loff_t pos, unsigned copied,
 				  struct page *page)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
 	u64 to = pos + copied;
 	void *kaddr;
 	unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
 
-	BUG_ON(pos + len > gfs2_max_stuffed_size(ip));
+	BUG_ON(pos + copied > gfs2_max_stuffed_size(ip));
 
 	kaddr = kmap_atomic(page);
 	memcpy(buf + pos, kaddr + pos, copied);
@@ -850,20 +849,6 @@
 			i_size_write(inode, to);
 		mark_inode_dirty(inode);
 	}
-
-	if (inode == sdp->sd_rindex) {
-		adjust_fs_space(inode);
-		sdp->sd_rindex_uptodate = 0;
-	}
-
-	brelse(dibh);
-	gfs2_trans_end(sdp);
-	if (inode == sdp->sd_rindex) {
-		gfs2_glock_dq(&m_ip->i_gh);
-		gfs2_holder_uninit(&m_ip->i_gh);
-	}
-	gfs2_glock_dq(&ip->i_gh);
-	gfs2_holder_uninit(&ip->i_gh);
 	return copied;
 }
 
@@ -877,9 +862,8 @@
  * @page: The page that has been written
  * @fsdata: The fsdata (unused in GFS2)
  *
- * The main write_end function for GFS2. We have a separate one for
- * stuffed files as they are slightly different, otherwise we just
- * put our locking around the VFS provided functions.
+ * The main write_end function for GFS2. We just put our locking around the VFS
+ * provided functions.
  *
  * Returns: errno
  */
@@ -900,32 +884,39 @@
 	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
 
 	ret = gfs2_meta_inode_buffer(ip, &dibh);
-	if (unlikely(ret)) {
-		unlock_page(page);
-		put_page(page);
-		goto failed;
+	if (unlikely(ret))
+		goto out;
+
+	if (gfs2_is_stuffed(ip)) {
+		ret = gfs2_stuffed_write_end(inode, dibh, pos, copied, page);
+		page = NULL;
+		goto out2;
 	}
 
-	if (gfs2_is_stuffed(ip))
-		return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
-
-	if (!gfs2_is_writeback(ip))
+	if (gfs2_is_jdata(ip))
 		gfs2_page_add_databufs(ip, page, pos & ~PAGE_MASK, len);
+	else
+		gfs2_ordered_add_inode(ip);
 
 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+	page = NULL;
 	if (tr->tr_num_buf_new)
 		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 	else
 		gfs2_trans_add_meta(ip->i_gl, dibh);
 
-
+out2:
 	if (inode == sdp->sd_rindex) {
 		adjust_fs_space(inode);
 		sdp->sd_rindex_uptodate = 0;
 	}
 
 	brelse(dibh);
-failed:
+out:
+	if (page) {
+		unlock_page(page);
+		put_page(page);
+	}
 	gfs2_trans_end(sdp);
 	gfs2_inplace_release(ip);
 	if (ip->i_qadata && ip->i_qadata->qa_qd_num)
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 278ed08..ed66997 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -89,10 +89,12 @@
 		map_bh(bh, inode->i_sb, block);
 
 	set_buffer_uptodate(bh);
-	if (!gfs2_is_jdata(ip))
-		mark_buffer_dirty(bh);
-	if (!gfs2_is_writeback(ip))
+	if (gfs2_is_jdata(ip))
 		gfs2_trans_add_data(ip->i_gl, bh);
+	else {
+		mark_buffer_dirty(bh);
+		gfs2_ordered_add_inode(ip);
+	}
 
 	if (release) {
 		unlock_page(page);
@@ -176,8 +178,8 @@
 /**
  * find_metapath - Find path through the metadata tree
  * @sdp: The superblock
- * @mp: The metapath to return the result in
  * @block: The disk block to look up
+ * @mp: The metapath to return the result in
  * @height: The pre-calculated height of the metadata tree
  *
  *   This routine returns a struct metapath structure that defines a path
@@ -188,8 +190,7 @@
  *   filesystem with a blocksize of 4096.
  *
  *   find_metapath() would return a struct metapath structure set to:
- *   mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
- *   and mp_list[2] = 165.
+ *   mp_fheight = 3, mp_list[0] = 0, mp_list[1] = 48, and mp_list[2] = 165.
  *
  *   That means that in order to get to the block containing the byte at
  *   offset 101342453, we would load the indirect block pointed to by pointer
@@ -279,6 +280,21 @@
 	return p + mp->mp_list[height];
 }
 
+static inline const __be64 *metaend(unsigned int height, const struct metapath *mp)
+{
+	const struct buffer_head *bh = mp->mp_bh[height];
+	return (const __be64 *)(bh->b_data + bh->b_size);
+}
+
+static void clone_metapath(struct metapath *clone, struct metapath *mp)
+{
+	unsigned int hgt;
+
+	*clone = *mp;
+	for (hgt = 0; hgt < mp->mp_aheight; hgt++)
+		get_bh(clone->mp_bh[hgt]);
+}
+
 static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end)
 {
 	const __be64 *t;
@@ -420,20 +436,140 @@
 	return (ptr - first);
 }
 
-static inline void bmap_lock(struct gfs2_inode *ip, int create)
+typedef const __be64 *(*gfs2_metadata_walker)(
+		struct metapath *mp,
+		const __be64 *start, const __be64 *end,
+		u64 factor, void *data);
+
+#define WALK_STOP ((__be64 *)0)
+#define WALK_NEXT ((__be64 *)1)
+
+static int gfs2_walk_metadata(struct inode *inode, sector_t lblock,
+		u64 len, struct metapath *mp, gfs2_metadata_walker walker,
+		void *data)
 {
-	if (create)
-		down_write(&ip->i_rw_mutex);
-	else
-		down_read(&ip->i_rw_mutex);
+	struct metapath clone;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	const __be64 *start, *end, *ptr;
+	u64 factor = 1;
+	unsigned int hgt;
+	int ret = 0;
+
+	for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--)
+		factor *= sdp->sd_inptrs;
+
+	for (;;) {
+		u64 step;
+
+		/* Walk indirect block. */
+		start = metapointer(hgt, mp);
+		end = metaend(hgt, mp);
+
+		step = (end - start) * factor;
+		if (step > len)
+			end = start + DIV_ROUND_UP_ULL(len, factor);
+
+		ptr = walker(mp, start, end, factor, data);
+		if (ptr == WALK_STOP)
+			break;
+		if (step >= len)
+			break;
+		len -= step;
+		if (ptr != WALK_NEXT) {
+			BUG_ON(!*ptr);
+			mp->mp_list[hgt] += ptr - start;
+			goto fill_up_metapath;
+		}
+
+lower_metapath:
+		/* Decrease height of metapath. */
+		if (mp != &clone) {
+			clone_metapath(&clone, mp);
+			mp = &clone;
+		}
+		brelse(mp->mp_bh[hgt]);
+		mp->mp_bh[hgt] = NULL;
+		if (!hgt)
+			break;
+		hgt--;
+		factor *= sdp->sd_inptrs;
+
+		/* Advance in metadata tree. */
+		(mp->mp_list[hgt])++;
+		start = metapointer(hgt, mp);
+		end = metaend(hgt, mp);
+		if (start >= end) {
+			mp->mp_list[hgt] = 0;
+			if (!hgt)
+				break;
+			goto lower_metapath;
+		}
+
+fill_up_metapath:
+		/* Increase height of metapath. */
+		if (mp != &clone) {
+			clone_metapath(&clone, mp);
+			mp = &clone;
+		}
+		ret = fillup_metapath(ip, mp, ip->i_height - 1);
+		if (ret < 0)
+			break;
+		hgt += ret;
+		for (; ret; ret--)
+			do_div(factor, sdp->sd_inptrs);
+		mp->mp_aheight = hgt + 1;
+	}
+	if (mp == &clone)
+		release_metapath(mp);
+	return ret;
 }
 
-static inline void bmap_unlock(struct gfs2_inode *ip, int create)
+struct gfs2_hole_walker_args {
+	u64 blocks;
+};
+
+static const __be64 *gfs2_hole_walker(struct metapath *mp,
+		const __be64 *start, const __be64 *end,
+		u64 factor, void *data)
 {
-	if (create)
-		up_write(&ip->i_rw_mutex);
-	else
-		up_read(&ip->i_rw_mutex);
+	struct gfs2_hole_walker_args *args = data;
+	const __be64 *ptr;
+
+	for (ptr = start; ptr < end; ptr++) {
+		if (*ptr) {
+			args->blocks += (ptr - start) * factor;
+			if (mp->mp_aheight == mp->mp_fheight)
+				return WALK_STOP;
+			return ptr;  /* increase height */
+		}
+	}
+	args->blocks += (end - start) * factor;
+	return WALK_NEXT;
+}
+
+/**
+ * gfs2_hole_size - figure out the size of a hole
+ * @inode: The inode
+ * @lblock: The logical starting block number
+ * @len: How far to look (in blocks)
+ * @mp: The metapath at lblock
+ * @iomap: The iomap to store the hole size in
+ *
+ * This function modifies @mp.
+ *
+ * Returns: errno on error
+ */
+static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
+			  struct metapath *mp, struct iomap *iomap)
+{
+	struct gfs2_hole_walker_args args = { };
+	int ret = 0;
+
+	ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args);
+	if (!ret)
+		iomap->length = args.blocks << inode->i_blkbits;
+	return ret;
 }
 
 static inline __be64 *gfs2_indirect_init(struct metapath *mp,
@@ -462,15 +598,11 @@
 };
 
 /**
- * gfs2_bmap_alloc - Build a metadata tree of the requested height
+ * gfs2_iomap_alloc - Build a metadata tree of the requested height
  * @inode: The GFS2 inode
- * @lblock: The logical starting block of the extent
- * @bh_map: This is used to return the mapping details
- * @zero_new: True if newly allocated blocks should be zeroed
+ * @iomap: The iomap structure
+ * @flags: iomap flags
  * @mp: The metapath, with proper height information calculated
- * @maxlen: The max number of data blocks to alloc
- * @dblock: Pointer to return the resulting new block
- * @dblks: Pointer to return the number of blocks allocated
  *
  * In this routine we may have to alloc:
  *   i) Indirect blocks to grow the metadata tree height
@@ -483,6 +615,13 @@
  * blocks are available, there will only be one request per bmap call)
  * and uses the state machine to initialise the blocks in order.
  *
+ * Right now, this function will allocate at most one indirect block
+ * worth of data -- with a default block size of 4K, that's slightly
+ * less than 2M.  If this limitation is ever removed to allow huge
+ * allocations, we would probably still want to limit the iomap size we
+ * return to avoid stalling other tasks during huge writes; the next
+ * iomap iteration would then find the blocks already allocated.
+ *
  * Returns: errno on error
  */
 
@@ -497,6 +636,7 @@
 	unsigned dblks = 0;
 	unsigned ptrs_per_blk;
 	const unsigned end_of_metadata = mp->mp_fheight - 1;
+	int ret;
 	enum alloc_state state;
 	__be64 *ptr;
 	__be64 zero_bn = 0;
@@ -507,6 +647,8 @@
 
 	gfs2_trans_add_meta(ip->i_gl, dibh);
 
+	down_write(&ip->i_rw_mutex);
+
 	if (mp->mp_fheight == mp->mp_aheight) {
 		struct buffer_head *bh;
 		int eob;
@@ -542,11 +684,10 @@
 	blks = dblks + iblks;
 	i = mp->mp_aheight;
 	do {
-		int error;
 		n = blks - alloced;
-		error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
-		if (error)
-			return error;
+		ret = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
+		if (ret)
+			goto out;
 		alloced += n;
 		if (state != ALLOC_DATA || gfs2_is_jdata(ip))
 			gfs2_trans_add_unrevoke(sdp, bn, n);
@@ -602,7 +743,7 @@
 			dblks = n;
 			ptr = metapointer(end_of_metadata, mp);
 			iomap->addr = bn << inode->i_blkbits;
-			iomap->flags |= IOMAP_F_NEW;
+			iomap->flags |= IOMAP_F_MERGED | IOMAP_F_NEW;
 			while (n-- > 0)
 				*ptr++ = cpu_to_be64(bn++);
 			break;
@@ -612,64 +753,10 @@
 	iomap->length = (u64)dblks << inode->i_blkbits;
 	ip->i_height = mp->mp_fheight;
 	gfs2_add_inode_blocks(&ip->i_inode, alloced);
-	gfs2_dinode_out(ip, mp->mp_bh[0]->b_data);
-	return 0;
-}
-
-/**
- * hole_size - figure out the size of a hole
- * @inode: The inode
- * @lblock: The logical starting block number
- * @mp: The metapath
- *
- * Returns: The hole size in bytes
- *
- */
-static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct metapath mp_eof;
-	u64 factor = 1;
-	int hgt;
-	u64 holesz = 0;
-	const __be64 *first, *end, *ptr;
-	const struct buffer_head *bh;
-	u64 lblock_stop = (i_size_read(inode) - 1) >> inode->i_blkbits;
-	int zeroptrs;
-	bool done = false;
-
-	/* Get another metapath, to the very last byte */
-	find_metapath(sdp, lblock_stop, &mp_eof, ip->i_height);
-	for (hgt = ip->i_height - 1; hgt >= 0 && !done; hgt--) {
-		bh = mp->mp_bh[hgt];
-		if (bh) {
-			zeroptrs = 0;
-			first = metapointer(hgt, mp);
-			end = (const __be64 *)(bh->b_data + bh->b_size);
-
-			for (ptr = first; ptr < end; ptr++) {
-				if (*ptr) {
-					done = true;
-					break;
-				} else {
-					zeroptrs++;
-				}
-			}
-		} else {
-			zeroptrs = sdp->sd_inptrs;
-		}
-		if (factor * zeroptrs >= lblock_stop - lblock + 1) {
-			holesz = lblock_stop - lblock + 1;
-			break;
-		}
-		holesz += factor * zeroptrs;
-
-		factor *= sdp->sd_inptrs;
-		if (hgt && (mp->mp_list[hgt - 1] < mp_eof.mp_list[hgt - 1]))
-			(mp->mp_list[hgt - 1])++;
-	}
-	return holesz << inode->i_blkbits;
+	gfs2_dinode_out(ip, dibh->b_data);
+out:
+	up_write(&ip->i_rw_mutex);
+	return ret;
 }
 
 static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
@@ -680,126 +767,136 @@
 		      sizeof(struct gfs2_dinode);
 	iomap->offset = 0;
 	iomap->length = i_size_read(inode);
-	iomap->type = IOMAP_MAPPED;
-	iomap->flags = IOMAP_F_DATA_INLINE;
+	iomap->type = IOMAP_INLINE;
 }
 
+#define IOMAP_F_GFS2_BOUNDARY IOMAP_F_PRIVATE
+
 /**
- * gfs2_iomap_begin - Map blocks from an inode to disk blocks
+ * gfs2_iomap_get - Map blocks from an inode to disk blocks
  * @inode: The inode
  * @pos: Starting position in bytes
  * @length: Length to map, in bytes
  * @flags: iomap flags
  * @iomap: The iomap structure
+ * @mp: The metapath
  *
  * Returns: errno
  */
-int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
-		     unsigned flags, struct iomap *iomap)
+static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
+			  unsigned flags, struct iomap *iomap,
+			  struct metapath *mp)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct metapath mp = { .mp_aheight = 1, };
-	unsigned int factor = sdp->sd_sb.sb_bsize;
-	const u64 *arr = sdp->sd_heightsize;
 	__be64 *ptr;
 	sector_t lblock;
-	sector_t lend;
-	int ret = 0;
+	sector_t lblock_stop;
+	int ret;
 	int eob;
-	unsigned int len;
+	u64 len;
 	struct buffer_head *bh;
 	u8 height;
 
-	trace_gfs2_iomap_start(ip, pos, length, flags);
-	if (!length) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (!length)
+		return -EINVAL;
 
 	if (gfs2_is_stuffed(ip)) {
 		if (flags & IOMAP_REPORT) {
+			if (pos >= i_size_read(inode))
+				return -ENOENT;
 			gfs2_stuffed_iomap(inode, iomap);
-			if (pos >= iomap->length)
-				ret = -ENOENT;
-			goto out;
+			return 0;
 		}
 		BUG_ON(!(flags & IOMAP_WRITE));
 	}
-
 	lblock = pos >> inode->i_blkbits;
-	lend = (pos + length + sdp->sd_sb.sb_bsize - 1) >> inode->i_blkbits;
-
 	iomap->offset = lblock << inode->i_blkbits;
-	iomap->addr = IOMAP_NULL_ADDR;
-	iomap->type = IOMAP_HOLE;
-	iomap->length = (u64)(lend - lblock) << inode->i_blkbits;
-	iomap->flags = IOMAP_F_MERGED;
-	bmap_lock(ip, flags & IOMAP_WRITE);
+	lblock_stop = (pos + length - 1) >> inode->i_blkbits;
+	len = lblock_stop - lblock + 1;
 
-	/*
-	 * Directory data blocks have a struct gfs2_meta_header header, so the
-	 * remaining size is smaller than the filesystem block size.  Logical
-	 * block numbers for directories are in units of this remaining size!
-	 */
-	if (gfs2_is_dir(ip)) {
-		factor = sdp->sd_jbsize;
-		arr = sdp->sd_jheightsize;
-	}
+	down_read(&ip->i_rw_mutex);
 
-	ret = gfs2_meta_inode_buffer(ip, &mp.mp_bh[0]);
+	ret = gfs2_meta_inode_buffer(ip, &mp->mp_bh[0]);
 	if (ret)
-		goto out_release;
+		goto unlock;
 
 	height = ip->i_height;
-	while ((lblock + 1) * factor > arr[height])
+	while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
 		height++;
-	find_metapath(sdp, lblock, &mp, height);
+	find_metapath(sdp, lblock, mp, height);
 	if (height > ip->i_height || gfs2_is_stuffed(ip))
 		goto do_alloc;
 
-	ret = lookup_metapath(ip, &mp);
+	ret = lookup_metapath(ip, mp);
 	if (ret)
-		goto out_release;
+		goto unlock;
 
-	if (mp.mp_aheight != ip->i_height)
+	if (mp->mp_aheight != ip->i_height)
 		goto do_alloc;
 
-	ptr = metapointer(ip->i_height - 1, &mp);
+	ptr = metapointer(ip->i_height - 1, mp);
 	if (*ptr == 0)
 		goto do_alloc;
 
-	iomap->type = IOMAP_MAPPED;
+	bh = mp->mp_bh[ip->i_height - 1];
+	len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, len, &eob);
+
 	iomap->addr = be64_to_cpu(*ptr) << inode->i_blkbits;
-
-	bh = mp.mp_bh[ip->i_height - 1];
-	len = gfs2_extent_length(bh->b_data, bh->b_size, ptr, lend - lblock, &eob);
+	iomap->length = len << inode->i_blkbits;
+	iomap->type = IOMAP_MAPPED;
+	iomap->flags = IOMAP_F_MERGED;
 	if (eob)
-		iomap->flags |= IOMAP_F_BOUNDARY;
-	iomap->length = (u64)len << inode->i_blkbits;
+		iomap->flags |= IOMAP_F_GFS2_BOUNDARY;
 
-out_release:
-	release_metapath(&mp);
-	bmap_unlock(ip, flags & IOMAP_WRITE);
 out:
-	trace_gfs2_iomap_end(ip, iomap, ret);
+	iomap->bdev = inode->i_sb->s_bdev;
+unlock:
+	up_read(&ip->i_rw_mutex);
 	return ret;
 
 do_alloc:
-	if (flags & IOMAP_WRITE) {
-		ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
-	} else if (flags & IOMAP_REPORT) {
+	iomap->addr = IOMAP_NULL_ADDR;
+	iomap->length = len << inode->i_blkbits;
+	iomap->type = IOMAP_HOLE;
+	iomap->flags = 0;
+	if (flags & IOMAP_REPORT) {
 		loff_t size = i_size_read(inode);
 		if (pos >= size)
 			ret = -ENOENT;
-		else if (height <= ip->i_height)
-			iomap->length = hole_size(inode, lblock, &mp);
+		else if (height == ip->i_height)
+			ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
 		else
 			iomap->length = size - pos;
 	}
-	goto out_release;
+	goto out;
 }
 
+static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
+			    unsigned flags, struct iomap *iomap)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct metapath mp = { .mp_aheight = 1, };
+	int ret;
+
+	trace_gfs2_iomap_start(ip, pos, length, flags);
+	if (flags & IOMAP_WRITE) {
+		ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+		if (!ret && iomap->type == IOMAP_HOLE)
+			ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+		release_metapath(&mp);
+	} else {
+		ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+		release_metapath(&mp);
+	}
+	trace_gfs2_iomap_end(ip, iomap, ret);
+	return ret;
+}
+
+const struct iomap_ops gfs2_iomap_ops = {
+	.iomap_begin = gfs2_iomap_begin,
+};
+
 /**
  * gfs2_block_map - Map one or more blocks of an inode to a disk block
  * @inode: The inode
@@ -825,34 +922,43 @@
 		   struct buffer_head *bh_map, int create)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct iomap iomap;
-	int ret, flags = 0;
+	loff_t pos = (loff_t)lblock << inode->i_blkbits;
+	loff_t length = bh_map->b_size;
+	struct metapath mp = { .mp_aheight = 1, };
+	struct iomap iomap = { };
+	int ret;
 
 	clear_buffer_mapped(bh_map);
 	clear_buffer_new(bh_map);
 	clear_buffer_boundary(bh_map);
 	trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
 
-	if (create)
-		flags |= IOMAP_WRITE;
-	ret = gfs2_iomap_begin(inode, (loff_t)lblock << inode->i_blkbits,
-			       bh_map->b_size, flags, &iomap);
-	if (ret) {
-		if (!create && ret == -ENOENT) {
-			/* Return unmapped buffer beyond the end of file.  */
+	if (create) {
+		ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, &iomap, &mp);
+		if (!ret && iomap.type == IOMAP_HOLE)
+			ret = gfs2_iomap_alloc(inode, &iomap, IOMAP_WRITE, &mp);
+		release_metapath(&mp);
+	} else {
+		ret = gfs2_iomap_get(inode, pos, length, 0, &iomap, &mp);
+		release_metapath(&mp);
+
+		/* Return unmapped buffer beyond the end of file. */
+		if (ret == -ENOENT) {
 			ret = 0;
+			goto out;
 		}
-		goto out;
 	}
+	if (ret)
+		goto out;
 
 	if (iomap.length > bh_map->b_size) {
 		iomap.length = bh_map->b_size;
-		iomap.flags &= ~IOMAP_F_BOUNDARY;
+		iomap.flags &= ~IOMAP_F_GFS2_BOUNDARY;
 	}
 	if (iomap.addr != IOMAP_NULL_ADDR)
 		map_bh(bh_map, inode->i_sb, iomap.addr >> inode->i_blkbits);
 	bh_map->b_size = iomap.length;
-	if (iomap.flags & IOMAP_F_BOUNDARY)
+	if (iomap.flags & IOMAP_F_GFS2_BOUNDARY)
 		set_buffer_boundary(bh_map);
 	if (iomap.flags & IOMAP_F_NEW)
 		set_buffer_new(bh_map);
@@ -945,8 +1051,10 @@
 		err = 0;
 	}
 
-	if (!gfs2_is_writeback(ip))
+	if (gfs2_is_jdata(ip))
 		gfs2_trans_add_data(ip->i_gl, bh);
+	else
+		gfs2_ordered_add_inode(ip);
 
 	zero_user(page, offset, length);
 	mark_buffer_dirty(bh);
@@ -1056,6 +1164,19 @@
 	return error;
 }
 
+int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
+			 struct iomap *iomap)
+{
+	struct metapath mp = { .mp_aheight = 1, };
+	int ret;
+
+	ret = gfs2_iomap_get(inode, pos, length, IOMAP_WRITE, iomap, &mp);
+	if (!ret && iomap->type == IOMAP_HOLE)
+		ret = gfs2_iomap_alloc(inode, iomap, IOMAP_WRITE, &mp);
+	release_metapath(&mp);
+	return ret;
+}
+
 /**
  * sweep_bh_for_rgrps - find an rgrp in a meta buffer and free blocks therein
  * @ip: inode
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h
index c3402fe..6b18fb3 100644
--- a/fs/gfs2/bmap.h
+++ b/fs/gfs2/bmap.h
@@ -46,11 +46,13 @@
 	}
 }
 
+extern const struct iomap_ops gfs2_iomap_ops;
+
 extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
 extern int gfs2_block_map(struct inode *inode, sector_t lblock,
 			  struct buffer_head *bh, int create);
-extern int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
-			    unsigned flags, struct iomap *iomap);
+extern int gfs2_iomap_get_alloc(struct inode *inode, loff_t pos, loff_t length,
+				struct iomap *iomap);
 extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new,
 			   u64 *dblock, unsigned *extlen);
 extern int gfs2_setattr_size(struct inode *inode, u64 size);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4b71f021..7137db7 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -733,7 +733,7 @@
 	struct gfs2_inode *ip = GFS2_I(inode);
 	loff_t end = offset + len;
 	struct buffer_head *dibh;
-	struct iomap iomap;
+	struct iomap iomap = { };
 	int error;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -749,8 +749,8 @@
 	}
 
 	while (offset < end) {
-		error = gfs2_iomap_begin(inode, offset, end - offset,
-					 IOMAP_WRITE, &iomap);
+		error = gfs2_iomap_get_alloc(inode, offset, end - offset,
+					     &iomap);
 		if (error)
 			goto out;
 		offset = iomap.offset + iomap.length;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 1b6b1e3..d2ad817 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -116,6 +116,7 @@
 
 static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
 {
+	BUG_ON(rbm->offset >= rbm->rgd->rd_data);
 	return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
 		rbm->offset;
 }
@@ -696,8 +697,6 @@
 	u32 sd_max_dirres;	/* Max blocks needed to add a directory entry */
 	u32 sd_max_height;	/* Max height of a file's metadata tree */
 	u64 sd_heightsize[GFS2_MAX_META_HEIGHT + 1];
-	u32 sd_max_jheight; /* Max height of journaled file's meta tree */
-	u64 sd_jheightsize[GFS2_MAX_META_HEIGHT + 1];
 	u32 sd_max_dents_per_leaf; /* Max number of dirents in a leaf block */
 
 	struct gfs2_args sd_args;	/* Mount arguments */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8700eb8..feda55f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -2006,10 +2006,6 @@
 	return 0;
 }
 
-const struct iomap_ops gfs2_iomap_ops = {
-	.iomap_begin = gfs2_iomap_begin,
-};
-
 static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		       u64 start, u64 len)
 {
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 1862e31..2024143 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -14,6 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/writeback.h>
 #include "incore.h"
+#include "inode.h"
 
 /**
  * gfs2_log_lock - acquire the right to mess with the log manager
@@ -50,8 +51,12 @@
 
 static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
 {
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	struct gfs2_sbd *sdp;
 
+	if (!gfs2_is_ordered(ip))
+		return;
+
+	sdp = GFS2_SB(&ip->i_inode);
 	if (!test_bit(GIF_ORDERED, &ip->i_flags)) {
 		spin_lock(&sdp->sd_ordered_lock);
 		if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags))
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3ba3f16..c2469833b 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -335,25 +335,6 @@
 	sdp->sd_heightsize[x] = ~0;
 	gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
 
-	sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
-				 sizeof(struct gfs2_dinode);
-	sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
-	for (x = 2;; x++) {
-		u64 space, d;
-		u32 m;
-
-		space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
-		d = space;
-		m = do_div(d, sdp->sd_inptrs);
-
-		if (d != sdp->sd_jheightsize[x - 1] || m)
-			break;
-		sdp->sd_jheightsize[x] = space;
-	}
-	sdp->sd_max_jheight = x;
-	sdp->sd_jheightsize[x] = ~0;
-	gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
-
 	sdp->sd_max_dents_per_leaf = (sdp->sd_sb.sb_bsize -
 				      sizeof(struct gfs2_leaf)) /
 				     GFS2_MIN_DIRENT_SIZE;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 7a98abd..e8585df 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -735,7 +735,10 @@
 			if (!buffer_uptodate(bh))
 				goto unlock_out;
 		}
-		gfs2_trans_add_data(ip->i_gl, bh);
+		if (gfs2_is_jdata(ip))
+			gfs2_trans_add_data(ip->i_gl, bh);
+		else
+			gfs2_ordered_add_inode(ip);
 
 		/* If we need to write to the next block as well */
 		if (to_write > (bsize - boff)) {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8b68391..6bc5cfe 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -372,8 +372,8 @@
 		start = bi->bi_bh->b_data;
 		if (bi->bi_clone)
 			start = bi->bi_clone;
-		end = start + bi->bi_bh->b_size;
 		start += bi->bi_offset;
+		end = start + bi->bi_len;
 		BUG_ON(rbm.offset & 3);
 		start += (rbm.offset / GFS2_NBBY);
 		bytes = min_t(u32, len / GFS2_NBBY, (end - start));
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index c75caca..064c9a0 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -143,32 +143,21 @@
  * @gl: The inode glock associated with the buffer
  * @bh: The buffer to add
  *
- * This is used in two distinct cases:
- * i) In ordered write mode
- *    We put the data buffer on a list so that we can ensure that it's
- *    synced to disk at the right time
- * ii) In journaled data mode
- *    We need to journal the data block in the same way as metadata in
- *    the functions above. The difference is that here we have a tag
- *    which is two __be64's being the block number (as per meta data)
- *    and a flag which says whether the data block needs escaping or
- *    not. This means we need a new log entry for each 251 or so data
- *    blocks, which isn't an enormous overhead but twice as much as
- *    for normal metadata blocks.
+ * This is used in journaled data mode.
+ * We need to journal the data block in the same way as metadata in
+ * the functions above. The difference is that here we have a tag
+ * which is two __be64's being the block number (as per meta data)
+ * and a flag which says whether the data block needs escaping or
+ * not. This means we need a new log entry for each 251 or so data
+ * blocks, which isn't an enormous overhead but twice as much as
+ * for normal metadata blocks.
  */
 void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh)
 {
 	struct gfs2_trans *tr = current->journal_info;
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
-	struct address_space *mapping = bh->b_page->mapping;
-	struct gfs2_inode *ip = GFS2_I(mapping->host);
 	struct gfs2_bufdata *bd;
 
-	if (!gfs2_is_jdata(ip)) {
-		gfs2_ordered_add_inode(ip);
-		return;
-	}
-
 	lock_buffer(bh);
 	if (buffer_pinned(bh)) {
 		set_bit(TR_TOUCHED, &tr->tr_flags);
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 75b2542..3bf2ae0 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -31,21 +31,15 @@
 	hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
 	res = hfs_brec_read(&fd, &rec, sizeof(rec));
 	if (res) {
-		hfs_find_exit(&fd);
-		if (res == -ENOENT) {
-			/* No such entry */
-			inode = NULL;
-			goto done;
-		}
-		return ERR_PTR(res);
+		if (res != -ENOENT)
+			inode = ERR_PTR(res);
+	} else {
+		inode = hfs_iget(dir->i_sb, &fd.search_key->cat, &rec);
+		if (!inode)
+			inode = ERR_PTR(-EACCES);
 	}
-	inode = hfs_iget(dir->i_sb, &fd.search_key->cat, &rec);
 	hfs_find_exit(&fd);
-	if (!inode)
-		return ERR_PTR(-EACCES);
-done:
-	d_add(dentry, inode);
-	return NULL;
+	return d_splice_alias(inode, dentry);
 }
 
 /*
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 2538b49..b3309b8 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -543,9 +543,9 @@
 	igrab(dir);
 	hlist_add_fake(&inode->i_hash);
 	mark_inode_dirty(inode);
+	dont_mount(dentry);
 out:
-	d_add(dentry, inode);
-	return NULL;
+	return d_splice_alias(inode, dentry);
 }
 
 void hfs_evict_inode(struct inode *inode)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 15e06fb..b525437 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -122,8 +122,7 @@
 	if (S_ISREG(inode->i_mode))
 		HFSPLUS_I(inode)->linkid = linkid;
 out:
-	d_add(dentry, inode);
-	return NULL;
+	return d_splice_alias(inode, dentry);
 fail:
 	hfs_find_exit(&fd);
 	return ERR_PTR(err);
diff --git a/fs/internal.h b/fs/internal.h
index e08972d..980d005 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -125,6 +125,7 @@
 int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
 		int flag);
 
+extern int open_check_o_direct(struct file *f);
 extern int vfs_open(const struct path *, struct file *, const struct cred *);
 extern struct file *filp_clone_open(struct file *);
 
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 4823431..b445b13 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -549,7 +549,7 @@
 {
 	struct super_block *sb = file_inode(filp)->i_sb;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/* If filesystem doesn't support freeze feature, return. */
@@ -566,7 +566,7 @@
 {
 	struct super_block *sb = file_inode(filp)->i_sb;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/* Thaw */
diff --git a/fs/iomap.c b/fs/iomap.c
index afd1635..206539d3 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/pagemap.h>
+#include <linux/pagevec.h>
 #include <linux/file.h>
 #include <linux/uio.h>
 #include <linux/backing-dev.h>
@@ -27,6 +28,7 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/dax.h>
 #include <linux/sched/signal.h>
+#include <linux/swap.h>
 
 #include "internal.h"
 
@@ -95,6 +97,12 @@
 	return written ? written : ret;
 }
 
+static sector_t
+iomap_sector(struct iomap *iomap, loff_t pos)
+{
+	return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT;
+}
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
@@ -352,11 +360,8 @@
 static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
 		struct iomap *iomap)
 {
-	sector_t sector = (iomap->addr +
-			   (pos & PAGE_MASK) - iomap->offset) >> 9;
-
-	return __dax_zero_page_range(iomap->bdev, iomap->dax_dev, sector,
-			offset, bytes);
+	return __dax_zero_page_range(iomap->bdev, iomap->dax_dev,
+			iomap_sector(iomap, pos & PAGE_MASK), offset, bytes);
 }
 
 static loff_t
@@ -501,10 +506,13 @@
 	case IOMAP_DELALLOC:
 		flags |= FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN;
 		break;
+	case IOMAP_MAPPED:
+		break;
 	case IOMAP_UNWRITTEN:
 		flags |= FIEMAP_EXTENT_UNWRITTEN;
 		break;
-	case IOMAP_MAPPED:
+	case IOMAP_INLINE:
+		flags |= FIEMAP_EXTENT_DATA_INLINE;
 		break;
 	}
 
@@ -512,8 +520,6 @@
 		flags |= FIEMAP_EXTENT_MERGED;
 	if (iomap->flags & IOMAP_F_SHARED)
 		flags |= FIEMAP_EXTENT_SHARED;
-	if (iomap->flags & IOMAP_F_DATA_INLINE)
-		flags |= FIEMAP_EXTENT_DATA_INLINE;
 
 	return fiemap_fill_next_extent(fi, iomap->offset,
 			iomap->addr != IOMAP_NULL_ADDR ? iomap->addr : 0,
@@ -587,6 +593,113 @@
 }
 EXPORT_SYMBOL_GPL(iomap_fiemap);
 
+/*
+ * Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
+ * Returns true if found and updates @lastoff to the offset in file.
+ */
+static bool
+page_seek_hole_data(struct inode *inode, struct page *page, loff_t *lastoff,
+		int whence)
+{
+	const struct address_space_operations *ops = inode->i_mapping->a_ops;
+	unsigned int bsize = i_blocksize(inode), off;
+	bool seek_data = whence == SEEK_DATA;
+	loff_t poff = page_offset(page);
+
+	if (WARN_ON_ONCE(*lastoff >= poff + PAGE_SIZE))
+		return false;
+
+	if (*lastoff < poff) {
+		/*
+		 * Last offset smaller than the start of the page means we found
+		 * a hole:
+		 */
+		if (whence == SEEK_HOLE)
+			return true;
+		*lastoff = poff;
+	}
+
+	/*
+	 * Just check the page unless we can and should check block ranges:
+	 */
+	if (bsize == PAGE_SIZE || !ops->is_partially_uptodate)
+		return PageUptodate(page) == seek_data;
+
+	lock_page(page);
+	if (unlikely(page->mapping != inode->i_mapping))
+		goto out_unlock_not_found;
+
+	for (off = 0; off < PAGE_SIZE; off += bsize) {
+		if ((*lastoff & ~PAGE_MASK) >= off + bsize)
+			continue;
+		if (ops->is_partially_uptodate(page, off, bsize) == seek_data) {
+			unlock_page(page);
+			return true;
+		}
+		*lastoff = poff + off + bsize;
+	}
+
+out_unlock_not_found:
+	unlock_page(page);
+	return false;
+}
+
+/*
+ * Seek for SEEK_DATA / SEEK_HOLE in the page cache.
+ *
+ * Within unwritten extents, the page cache determines which parts are holes
+ * and which are data: uptodate buffer heads count as data; everything else
+ * counts as a hole.
+ *
+ * Returns the resulting offset on successs, and -ENOENT otherwise.
+ */
+static loff_t
+page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
+		int whence)
+{
+	pgoff_t index = offset >> PAGE_SHIFT;
+	pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
+	loff_t lastoff = offset;
+	struct pagevec pvec;
+
+	if (length <= 0)
+		return -ENOENT;
+
+	pagevec_init(&pvec);
+
+	do {
+		unsigned nr_pages, i;
+
+		nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, &index,
+						end - 1);
+		if (nr_pages == 0)
+			break;
+
+		for (i = 0; i < nr_pages; i++) {
+			struct page *page = pvec.pages[i];
+
+			if (page_seek_hole_data(inode, page, &lastoff, whence))
+				goto check_range;
+			lastoff = page_offset(page) + PAGE_SIZE;
+		}
+		pagevec_release(&pvec);
+	} while (index < end);
+
+	/* When no page at lastoff and we are not done, we found a hole. */
+	if (whence != SEEK_HOLE)
+		goto not_found;
+
+check_range:
+	if (lastoff < offset + length)
+		goto out;
+not_found:
+	lastoff = -ENOENT;
+out:
+	pagevec_release(&pvec);
+	return lastoff;
+}
+
+
 static loff_t
 iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
 		      void *data, struct iomap *iomap)
@@ -685,6 +798,8 @@
  * Private flags for iomap_dio, must not overlap with the public ones in
  * iomap.h:
  */
+#define IOMAP_DIO_WRITE_FUA	(1 << 28)
+#define IOMAP_DIO_NEED_SYNC	(1 << 29)
 #define IOMAP_DIO_WRITE		(1 << 30)
 #define IOMAP_DIO_DIRTY		(1 << 31)
 
@@ -759,6 +874,13 @@
 			dio_warn_stale_pagecache(iocb->ki_filp);
 	}
 
+	/*
+	 * If this is a DSYNC write, make sure we push it to stable storage now
+	 * that we've written data.
+	 */
+	if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
+		ret = generic_write_sync(iocb, ret);
+
 	inode_dio_end(file_inode(iocb->ki_filp));
 	kfree(dio);
 
@@ -769,13 +891,8 @@
 {
 	struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
 	struct kiocb *iocb = dio->iocb;
-	bool is_write = (dio->flags & IOMAP_DIO_WRITE);
-	ssize_t ret;
 
-	ret = iomap_dio_complete(dio);
-	if (is_write && ret > 0)
-		ret = generic_write_sync(iocb, ret);
-	iocb->ki_complete(iocb, ret, 0);
+	iocb->ki_complete(iocb, iomap_dio_complete(dio), 0);
 }
 
 /*
@@ -833,14 +950,12 @@
 
 	bio = bio_alloc(GFP_KERNEL, 1);
 	bio_set_dev(bio, iomap->bdev);
-	bio->bi_iter.bi_sector =
-		(iomap->addr + pos - iomap->offset) >> 9;
+	bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
 	bio->bi_private = dio;
 	bio->bi_end_io = iomap_dio_bio_end_io;
 
 	get_page(page);
-	if (bio_add_page(bio, page, len, 0) != len)
-		BUG();
+	__bio_add_page(bio, page, len, 0);
 	bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
 
 	atomic_inc(&dio->ref);
@@ -858,6 +973,7 @@
 	struct iov_iter iter;
 	struct bio *bio;
 	bool need_zeroout = false;
+	bool use_fua = false;
 	int nr_pages, ret;
 	size_t copied = 0;
 
@@ -881,8 +997,20 @@
 	case IOMAP_MAPPED:
 		if (iomap->flags & IOMAP_F_SHARED)
 			dio->flags |= IOMAP_DIO_COW;
-		if (iomap->flags & IOMAP_F_NEW)
+		if (iomap->flags & IOMAP_F_NEW) {
 			need_zeroout = true;
+		} else {
+			/*
+			 * Use a FUA write if we need datasync semantics, this
+			 * is a pure data IO that doesn't require any metadata
+			 * updates and the underlying device supports FUA. This
+			 * allows us to avoid cache flushes on IO completion.
+			 */
+			if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
+			    (dio->flags & IOMAP_DIO_WRITE_FUA) &&
+			    blk_queue_fua(bdev_get_queue(iomap->bdev)))
+				use_fua = true;
+		}
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -916,8 +1044,7 @@
 
 		bio = bio_alloc(GFP_KERNEL, nr_pages);
 		bio_set_dev(bio, iomap->bdev);
-		bio->bi_iter.bi_sector =
-			(iomap->addr + pos - iomap->offset) >> 9;
+		bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
 		bio->bi_write_hint = dio->iocb->ki_hint;
 		bio->bi_private = dio;
 		bio->bi_end_io = iomap_dio_bio_end_io;
@@ -930,10 +1057,14 @@
 
 		n = bio->bi_iter.bi_size;
 		if (dio->flags & IOMAP_DIO_WRITE) {
-			bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
+			bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
+			if (use_fua)
+				bio->bi_opf |= REQ_FUA;
+			else
+				dio->flags &= ~IOMAP_DIO_WRITE_FUA;
 			task_io_account_write(n);
 		} else {
-			bio_set_op_attrs(bio, REQ_OP_READ, 0);
+			bio->bi_opf = REQ_OP_READ;
 			if (dio->flags & IOMAP_DIO_DIRTY)
 				bio_set_pages_dirty(bio);
 		}
@@ -961,6 +1092,15 @@
 	return copied;
 }
 
+/*
+ * iomap_dio_rw() always completes O_[D]SYNC writes regardless of whether the IO
+ * is being issued as AIO or not.  This allows us to optimise pure data writes
+ * to use REQ_FUA rather than requiring generic_write_sync() to issue a
+ * REQ_FLUSH post write. This is slightly tricky because a single request here
+ * can be mapped into multiple disjoint IOs and only a subset of the IOs issued
+ * may be pure data writes. In that case, we still need to do a full data sync
+ * completion.
+ */
 ssize_t
 iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, iomap_dio_end_io_t end_io)
@@ -1005,8 +1145,21 @@
 		if (iter->type == ITER_IOVEC)
 			dio->flags |= IOMAP_DIO_DIRTY;
 	} else {
-		dio->flags |= IOMAP_DIO_WRITE;
 		flags |= IOMAP_WRITE;
+		dio->flags |= IOMAP_DIO_WRITE;
+
+		/* for data sync or sync, we need sync completion processing */
+		if (iocb->ki_flags & IOCB_DSYNC)
+			dio->flags |= IOMAP_DIO_NEED_SYNC;
+
+		/*
+		 * For datasync only writes, we optimistically try using FUA for
+		 * this IO.  Any non-FUA write that occurs will clear this flag,
+		 * hence we know before completion whether a cache flush is
+		 * necessary.
+		 */
+		if ((iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) == IOCB_DSYNC)
+			dio->flags |= IOMAP_DIO_WRITE_FUA;
 	}
 
 	if (iocb->ki_flags & IOCB_NOWAIT) {
@@ -1062,6 +1215,13 @@
 	if (ret < 0)
 		iomap_dio_set_error(dio, ret);
 
+	/*
+	 * If all the writes we issued were FUA, we don't need to flush the
+	 * cache on IO completion. Clear the sync flag for this case.
+	 */
+	if (dio->flags & IOMAP_DIO_WRITE_FUA)
+		dio->flags &= ~IOMAP_DIO_NEED_SYNC;
+
 	if (!atomic_dec_and_test(&dio->ref)) {
 		if (!is_sync_kiocb(iocb))
 			return -EIOCBQUEUED;
@@ -1089,3 +1249,203 @@
 	return ret;
 }
 EXPORT_SYMBOL_GPL(iomap_dio_rw);
+
+/* Swapfile activation */
+
+#ifdef CONFIG_SWAP
+struct iomap_swapfile_info {
+	struct iomap iomap;		/* accumulated iomap */
+	struct swap_info_struct *sis;
+	uint64_t lowest_ppage;		/* lowest physical addr seen (pages) */
+	uint64_t highest_ppage;		/* highest physical addr seen (pages) */
+	unsigned long nr_pages;		/* number of pages collected */
+	int nr_extents;			/* extent count */
+};
+
+/*
+ * Collect physical extents for this swap file.  Physical extents reported to
+ * the swap code must be trimmed to align to a page boundary.  The logical
+ * offset within the file is irrelevant since the swapfile code maps logical
+ * page numbers of the swap device to the physical page-aligned extents.
+ */
+static int iomap_swapfile_add_extent(struct iomap_swapfile_info *isi)
+{
+	struct iomap *iomap = &isi->iomap;
+	unsigned long nr_pages;
+	uint64_t first_ppage;
+	uint64_t first_ppage_reported;
+	uint64_t next_ppage;
+	int error;
+
+	/*
+	 * Round the start up and the end down so that the physical
+	 * extent aligns to a page boundary.
+	 */
+	first_ppage = ALIGN(iomap->addr, PAGE_SIZE) >> PAGE_SHIFT;
+	next_ppage = ALIGN_DOWN(iomap->addr + iomap->length, PAGE_SIZE) >>
+			PAGE_SHIFT;
+
+	/* Skip too-short physical extents. */
+	if (first_ppage >= next_ppage)
+		return 0;
+	nr_pages = next_ppage - first_ppage;
+
+	/*
+	 * Calculate how much swap space we're adding; the first page contains
+	 * the swap header and doesn't count.  The mm still wants that first
+	 * page fed to add_swap_extent, however.
+	 */
+	first_ppage_reported = first_ppage;
+	if (iomap->offset == 0)
+		first_ppage_reported++;
+	if (isi->lowest_ppage > first_ppage_reported)
+		isi->lowest_ppage = first_ppage_reported;
+	if (isi->highest_ppage < (next_ppage - 1))
+		isi->highest_ppage = next_ppage - 1;
+
+	/* Add extent, set up for the next call. */
+	error = add_swap_extent(isi->sis, isi->nr_pages, nr_pages, first_ppage);
+	if (error < 0)
+		return error;
+	isi->nr_extents += error;
+	isi->nr_pages += nr_pages;
+	return 0;
+}
+
+/*
+ * Accumulate iomaps for this swap file.  We have to accumulate iomaps because
+ * swap only cares about contiguous page-aligned physical extents and makes no
+ * distinction between written and unwritten extents.
+ */
+static loff_t iomap_swapfile_activate_actor(struct inode *inode, loff_t pos,
+		loff_t count, void *data, struct iomap *iomap)
+{
+	struct iomap_swapfile_info *isi = data;
+	int error;
+
+	switch (iomap->type) {
+	case IOMAP_MAPPED:
+	case IOMAP_UNWRITTEN:
+		/* Only real or unwritten extents. */
+		break;
+	case IOMAP_INLINE:
+		/* No inline data. */
+		pr_err("swapon: file is inline\n");
+		return -EINVAL;
+	default:
+		pr_err("swapon: file has unallocated extents\n");
+		return -EINVAL;
+	}
+
+	/* No uncommitted metadata or shared blocks. */
+	if (iomap->flags & IOMAP_F_DIRTY) {
+		pr_err("swapon: file is not committed\n");
+		return -EINVAL;
+	}
+	if (iomap->flags & IOMAP_F_SHARED) {
+		pr_err("swapon: file has shared extents\n");
+		return -EINVAL;
+	}
+
+	/* Only one bdev per swap file. */
+	if (iomap->bdev != isi->sis->bdev) {
+		pr_err("swapon: file is on multiple devices\n");
+		return -EINVAL;
+	}
+
+	if (isi->iomap.length == 0) {
+		/* No accumulated extent, so just store it. */
+		memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
+	} else if (isi->iomap.addr + isi->iomap.length == iomap->addr) {
+		/* Append this to the accumulated extent. */
+		isi->iomap.length += iomap->length;
+	} else {
+		/* Otherwise, add the retained iomap and store this one. */
+		error = iomap_swapfile_add_extent(isi);
+		if (error)
+			return error;
+		memcpy(&isi->iomap, iomap, sizeof(isi->iomap));
+	}
+	return count;
+}
+
+/*
+ * Iterate a swap file's iomaps to construct physical extents that can be
+ * passed to the swapfile subsystem.
+ */
+int iomap_swapfile_activate(struct swap_info_struct *sis,
+		struct file *swap_file, sector_t *pagespan,
+		const struct iomap_ops *ops)
+{
+	struct iomap_swapfile_info isi = {
+		.sis = sis,
+		.lowest_ppage = (sector_t)-1ULL,
+	};
+	struct address_space *mapping = swap_file->f_mapping;
+	struct inode *inode = mapping->host;
+	loff_t pos = 0;
+	loff_t len = ALIGN_DOWN(i_size_read(inode), PAGE_SIZE);
+	loff_t ret;
+
+	ret = filemap_write_and_wait(inode->i_mapping);
+	if (ret)
+		return ret;
+
+	while (len > 0) {
+		ret = iomap_apply(inode, pos, len, IOMAP_REPORT,
+				ops, &isi, iomap_swapfile_activate_actor);
+		if (ret <= 0)
+			return ret;
+
+		pos += ret;
+		len -= ret;
+	}
+
+	if (isi.iomap.length) {
+		ret = iomap_swapfile_add_extent(&isi);
+		if (ret)
+			return ret;
+	}
+
+	*pagespan = 1 + isi.highest_ppage - isi.lowest_ppage;
+	sis->max = isi.nr_pages;
+	sis->pages = isi.nr_pages - 1;
+	sis->highest_bit = isi.nr_pages - 1;
+	return isi.nr_extents;
+}
+EXPORT_SYMBOL_GPL(iomap_swapfile_activate);
+#endif /* CONFIG_SWAP */
+
+static loff_t
+iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
+		void *data, struct iomap *iomap)
+{
+	sector_t *bno = data, addr;
+
+	if (iomap->type == IOMAP_MAPPED) {
+		addr = (pos - iomap->offset + iomap->addr) >> inode->i_blkbits;
+		if (addr > INT_MAX)
+			WARN(1, "would truncate bmap result\n");
+		else
+			*bno = addr;
+	}
+	return 0;
+}
+
+/* legacy ->bmap interface.  0 is the error return (!) */
+sector_t
+iomap_bmap(struct address_space *mapping, sector_t bno,
+		const struct iomap_ops *ops)
+{
+	struct inode *inode = mapping->host;
+	loff_t pos = bno >> inode->i_blkbits;
+	unsigned blocksize = i_blocksize(inode);
+
+	if (filemap_write_and_wait(mapping))
+		return 0;
+
+	bno = 0;
+	iomap_apply(inode, pos, blocksize, 0, ops, &bno, iomap_bmap_actor);
+	return bno;
+}
+EXPORT_SYMBOL_GPL(iomap_bmap);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index dfb0579..8ef6b6d 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -114,7 +114,7 @@
 	va_start(args, fmt);
 	vaf.fmt = fmt;
 	vaf.va = &args;
-	printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf);
+	printk(KERN_DEBUG "%s: (%s, %u): %pV", file, func, line, &vaf);
 	va_end(args);
 }
 EXPORT_SYMBOL(__jbd2_debug);
@@ -2302,8 +2302,7 @@
 	int i;
 
 	for (i = 0; i < JBD2_MAX_SLABS; i++) {
-		if (jbd2_slab[i])
-			kmem_cache_destroy(jbd2_slab[i]);
+		kmem_cache_destroy(jbd2_slab[i]);
 		jbd2_slab[i] = NULL;
 	}
 }
@@ -2404,10 +2403,8 @@
 
 static void jbd2_journal_destroy_journal_head_cache(void)
 {
-	if (jbd2_journal_head_cache) {
-		kmem_cache_destroy(jbd2_journal_head_cache);
-		jbd2_journal_head_cache = NULL;
-	}
+	kmem_cache_destroy(jbd2_journal_head_cache);
+	jbd2_journal_head_cache = NULL;
 }
 
 /*
@@ -2665,11 +2662,10 @@
 
 static void jbd2_journal_destroy_handle_cache(void)
 {
-	if (jbd2_handle_cache)
-		kmem_cache_destroy(jbd2_handle_cache);
-	if (jbd2_inode_cache)
-		kmem_cache_destroy(jbd2_inode_cache);
-
+	kmem_cache_destroy(jbd2_handle_cache);
+	jbd2_handle_cache = NULL;
+	kmem_cache_destroy(jbd2_inode_cache);
+	jbd2_inode_cache = NULL;
 }
 
 /*
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 696ef15..240779e 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -180,14 +180,10 @@
 
 void jbd2_journal_destroy_revoke_caches(void)
 {
-	if (jbd2_revoke_record_cache) {
-		kmem_cache_destroy(jbd2_revoke_record_cache);
-		jbd2_revoke_record_cache = NULL;
-	}
-	if (jbd2_revoke_table_cache) {
-		kmem_cache_destroy(jbd2_revoke_table_cache);
-		jbd2_revoke_table_cache = NULL;
-	}
+	kmem_cache_destroy(jbd2_revoke_record_cache);
+	jbd2_revoke_record_cache = NULL;
+	kmem_cache_destroy(jbd2_revoke_table_cache);
+	jbd2_revoke_table_cache = NULL;
 }
 
 int __init jbd2_journal_init_revoke_caches(void)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 8aa4537..51dd68e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -49,10 +49,8 @@
 
 void jbd2_journal_destroy_transaction_cache(void)
 {
-	if (transaction_cache) {
-		kmem_cache_destroy(transaction_cache);
-		transaction_cache = NULL;
-	}
+	kmem_cache_destroy(transaction_cache);
+	transaction_cache = NULL;
 }
 
 void jbd2_journal_free_transaction(transaction_t *transaction)
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index a709076..35a5b2a 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -29,7 +29,6 @@
 
 #ifdef PROC_FS_JFS /* see jfs_debug.h */
 
-static struct proc_dir_entry *base;
 #ifdef CONFIG_JFS_DEBUG
 static int jfs_loglevel_proc_show(struct seq_file *m, void *v)
 {
@@ -66,43 +65,29 @@
 };
 #endif
 
-static struct {
-	const char	*name;
-	const struct file_operations *proc_fops;
-} Entries[] = {
-#ifdef CONFIG_JFS_STATISTICS
-	{ "lmstats",	&jfs_lmstats_proc_fops, },
-	{ "txstats",	&jfs_txstats_proc_fops, },
-	{ "xtstat",	&jfs_xtstat_proc_fops, },
-	{ "mpstat",	&jfs_mpstat_proc_fops, },
-#endif
-#ifdef CONFIG_JFS_DEBUG
-	{ "TxAnchor",	&jfs_txanchor_proc_fops, },
-	{ "loglevel",	&jfs_loglevel_proc_fops }
-#endif
-};
-#define NPROCENT	ARRAY_SIZE(Entries)
-
 void jfs_proc_init(void)
 {
-	int i;
+	struct proc_dir_entry *base;
 
-	if (!(base = proc_mkdir("fs/jfs", NULL)))
+	base = proc_mkdir("fs/jfs", NULL);
+	if (!base)
 		return;
 
-	for (i = 0; i < NPROCENT; i++)
-		proc_create(Entries[i].name, 0, base, Entries[i].proc_fops);
+#ifdef CONFIG_JFS_STATISTICS
+	proc_create_single("lmstats", 0, base, jfs_lmstats_proc_show);
+	proc_create_single("txstats", 0, base, jfs_txstats_proc_show);
+	proc_create_single("xtstat", 0, base, jfs_xtstat_proc_show);
+	proc_create_single("mpstat", 0, base, jfs_mpstat_proc_show);
+#endif
+#ifdef CONFIG_JFS_DEBUG
+	proc_create_single("TxAnchor", 0, base, jfs_txanchor_proc_show);
+	proc_create("loglevel", 0, base, &jfs_loglevel_proc_fops);
+#endif
 }
 
 void jfs_proc_clean(void)
 {
-	int i;
-
-	if (base) {
-		for (i = 0; i < NPROCENT; i++)
-			remove_proc_entry(Entries[i].name, base);
-		remove_proc_entry("fs/jfs", NULL);
-	}
+	remove_proc_subtree("fs/jfs", NULL);
 }
 
 #endif /* PROC_FS_JFS */
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index eafd130..0d9e35d 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -62,7 +62,7 @@
 
 extern int jfsloglevel;
 
-extern const struct file_operations jfs_txanchor_proc_fops;
+int jfs_txanchor_proc_show(struct seq_file *m, void *v);
 
 /* information message: e.g., configuration, major event */
 #define jfs_info(fmt, arg...) do {			\
@@ -105,10 +105,10 @@
  *	----------
  */
 #ifdef	CONFIG_JFS_STATISTICS
-extern const struct file_operations jfs_lmstats_proc_fops;
-extern const struct file_operations jfs_txstats_proc_fops;
-extern const struct file_operations jfs_mpstat_proc_fops;
-extern const struct file_operations jfs_xtstat_proc_fops;
+int jfs_lmstats_proc_show(struct seq_file *m, void *v);
+int jfs_txstats_proc_show(struct seq_file *m, void *v);
+int jfs_mpstat_proc_show(struct seq_file *m, void *v);
+int jfs_xtstat_proc_show(struct seq_file *m, void *v);
 
 #define	INCREMENT(x)		((x)++)
 #define	DECREMENT(x)		((x)--)
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 0e5d412..6b68df3 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2493,7 +2493,7 @@
 }
 
 #ifdef CONFIG_JFS_STATISTICS
-static int jfs_lmstats_proc_show(struct seq_file *m, void *v)
+int jfs_lmstats_proc_show(struct seq_file *m, void *v)
 {
 	seq_printf(m,
 		       "JFS Logmgr stats\n"
@@ -2510,16 +2510,4 @@
 		       lmStat.partial_page);
 	return 0;
 }
-
-static int jfs_lmstats_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, jfs_lmstats_proc_show, NULL);
-}
-
-const struct file_operations jfs_lmstats_proc_fops = {
-	.open		= jfs_lmstats_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif /* CONFIG_JFS_STATISTICS */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 1a3b0cc..fa2c682 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -815,7 +815,7 @@
 }
 
 #ifdef CONFIG_JFS_STATISTICS
-static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
+int jfs_mpstat_proc_show(struct seq_file *m, void *v)
 {
 	seq_printf(m,
 		       "JFS Metapage statistics\n"
@@ -828,16 +828,4 @@
 		       mpStat.lockwait);
 	return 0;
 }
-
-static int jfs_mpstat_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, jfs_mpstat_proc_show, NULL);
-}
-
-const struct file_operations jfs_mpstat_proc_fops = {
-	.open		= jfs_mpstat_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 4d973524..a5663cb 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2998,7 +2998,7 @@
 }
 
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG)
-static int jfs_txanchor_proc_show(struct seq_file *m, void *v)
+int jfs_txanchor_proc_show(struct seq_file *m, void *v)
 {
 	char *freewait;
 	char *freelockwait;
@@ -3032,22 +3032,10 @@
 		       list_empty(&TxAnchor.unlock_queue) ? "" : "not ");
 	return 0;
 }
-
-static int jfs_txanchor_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, jfs_txanchor_proc_show, NULL);
-}
-
-const struct file_operations jfs_txanchor_proc_fops = {
-	.open		= jfs_txanchor_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
 
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS)
-static int jfs_txstats_proc_show(struct seq_file *m, void *v)
+int jfs_txstats_proc_show(struct seq_file *m, void *v)
 {
 	seq_printf(m,
 		       "JFS TxStats\n"
@@ -3072,16 +3060,4 @@
 		       TxStat.txLockAlloc_freelock);
 	return 0;
 }
-
-static int jfs_txstats_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, jfs_txstats_proc_show, NULL);
-}
-
-const struct file_operations jfs_txstats_proc_fops = {
-	.open		= jfs_txstats_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 5cde6d2..2c200b5 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -3874,7 +3874,7 @@
 }
 
 #ifdef CONFIG_JFS_STATISTICS
-static int jfs_xtstat_proc_show(struct seq_file *m, void *v)
+int jfs_xtstat_proc_show(struct seq_file *m, void *v)
 {
 	seq_printf(m,
 		       "JFS Xtree statistics\n"
@@ -3887,16 +3887,4 @@
 		       xtStat.split);
 	return 0;
 }
-
-static int jfs_xtstat_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, jfs_xtstat_proc_show, NULL);
-}
-
-const struct file_operations jfs_xtstat_proc_fops = {
-	.open		= jfs_xtstat_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index fd5ce88..2015d8c 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -348,11 +348,11 @@
 	kernfs_put_active(of->kn);
 }
 
-static int kernfs_vma_fault(struct vm_fault *vmf)
+static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
 {
 	struct file *file = vmf->vma->vm_file;
 	struct kernfs_open_file *of = kernfs_of(file);
-	int ret;
+	vm_fault_t ret;
 
 	if (!of->vm_ops)
 		return VM_FAULT_SIGBUS;
@@ -368,11 +368,11 @@
 	return ret;
 }
 
-static int kernfs_vma_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
 {
 	struct file *file = vmf->vma->vm_file;
 	struct kernfs_open_file *of = kernfs_of(file);
-	int ret;
+	vm_fault_t ret;
 
 	if (!of->vm_ops)
 		return VM_FAULT_SIGBUS;
diff --git a/fs/locks.c b/fs/locks.c
index 62bbe8b..05e211be 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2788,22 +2788,10 @@
 	.show	= locks_show,
 };
 
-static int locks_open(struct inode *inode, struct file *filp)
-{
-	return seq_open_private(filp, &locks_seq_operations,
-					sizeof(struct locks_iterator));
-}
-
-static const struct file_operations proc_locks_operations = {
-	.open		= locks_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_private,
-};
-
 static int __init proc_locks_init(void)
 {
-	proc_create("locks", 0, NULL, &proc_locks_operations);
+	proc_create_seq_private("locks", 0, NULL, &locks_seq_operations,
+			sizeof(struct locks_iterator), NULL);
 	return 0;
 }
 fs_initcall(proc_locks_init);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ccf0f00..1a6084d 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -28,13 +28,9 @@
 		return ERR_PTR(-ENAMETOOLONG);
 
 	ino = minix_inode_by_name(dentry);
-	if (ino) {
+	if (ino)
 		inode = minix_iget(dir->i_sb, ino);
-		if (IS_ERR(inode))
-			return ERR_CAST(inode);
-	}
-	d_add(dentry, inode);
-	return NULL;
+	return d_splice_alias(inode, dentry);
 }
 
 static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
diff --git a/fs/namei.c b/fs/namei.c
index 186bd24..6df1f61 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -984,13 +984,15 @@
  */
 static int may_linkat(struct path *link)
 {
-	struct inode *inode;
+	struct inode *inode = link->dentry->d_inode;
+
+	/* Inode writeback is not safe when the uid or gid are invalid. */
+	if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+		return -EOVERFLOW;
 
 	if (!sysctl_protected_hardlinks)
 		return 0;
 
-	inode = link->dentry->d_inode;
-
 	/* Source inode owner (or CAP_FOWNER) can hardlink all they like,
 	 * otherwise, it must be a safe source.
 	 */
@@ -1438,10 +1440,8 @@
 static int follow_dotdot(struct nameidata *nd)
 {
 	while(1) {
-		if (nd->path.dentry == nd->root.dentry &&
-		    nd->path.mnt == nd->root.mnt) {
+		if (path_equal(&nd->path, &nd->root))
 			break;
-		}
 		if (nd->path.dentry != nd->path.mnt->mnt_root) {
 			int ret = path_parent_directory(&nd->path);
 			if (ret)
@@ -2749,6 +2749,11 @@
 	BUG_ON(!inode);
 
 	BUG_ON(victim->d_parent->d_inode != dir);
+
+	/* Inode writeback is not safe when the uid or gid are invalid. */
+	if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+		return -EOVERFLOW;
+
 	audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
 
 	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
@@ -3367,7 +3372,9 @@
 		goto out;
 	*opened |= FILE_OPENED;
 opened:
-	error = ima_file_check(file, op->acc_mode, *opened);
+	error = open_check_o_direct(file);
+	if (!error)
+		error = ima_file_check(file, op->acc_mode, *opened);
 	if (!error && will_truncate)
 		error = handle_truncate(file);
 out:
@@ -3447,6 +3454,9 @@
 	error = finish_open(file, child, NULL, opened);
 	if (error)
 		goto out2;
+	error = open_check_o_direct(file);
+	if (error)
+		fput(file);
 out2:
 	mnt_drop_write(path.mnt);
 out:
@@ -3672,7 +3682,8 @@
 	if (error)
 		return error;
 
-	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
+	if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
+	    !ns_capable(dentry->d_sb->s_user_ns, CAP_MKNOD))
 		return -EPERM;
 
 	if (!dir->i_op->mknod)
@@ -3847,11 +3858,11 @@
 	if (error)
 		goto out;
 
-	shrink_dcache_parent(dentry);
 	error = dir->i_op->rmdir(dir, dentry);
 	if (error)
 		goto out;
 
+	shrink_dcache_parent(dentry);
 	dentry->d_inode->i_flags |= S_DEAD;
 	dont_mount(dentry);
 	detach_mounts(dentry);
@@ -4434,8 +4445,6 @@
 		    old_dir->i_nlink >= max_links)
 			goto out;
 	}
-	if (is_dir && !(flags & RENAME_EXCHANGE) && target)
-		shrink_dcache_parent(new_dentry);
 	if (!is_dir) {
 		error = try_break_deleg(source, delegated_inode);
 		if (error)
@@ -4452,8 +4461,10 @@
 		goto out;
 
 	if (!(flags & RENAME_EXCHANGE) && target) {
-		if (is_dir)
+		if (is_dir) {
+			shrink_dcache_parent(new_dentry);
 			target->i_flags |= S_DEAD;
+		}
 		dont_mount(new_dentry);
 		detach_mounts(new_dentry);
 	}
diff --git a/fs/namespace.c b/fs/namespace.c
index 5f75969..8ddd148 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1590,7 +1590,7 @@
 		 * Special case for "unmounting" root ...
 		 * we just try to remount it readonly.
 		 */
-		if (!capable(CAP_SYS_ADMIN))
+		if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 			return -EPERM;
 		down_write(&sb->s_umount);
 		if (!sb_rdonly(sb))
@@ -2333,7 +2333,7 @@
 	down_write(&sb->s_umount);
 	if (ms_flags & MS_BIND)
 		err = change_mount_flags(path->mnt, ms_flags);
-	else if (!capable(CAP_SYS_ADMIN))
+	else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 		err = -EPERM;
 	else
 		err = do_remount_sb(sb, sb_flags, data, 0);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b9129e2..bbc91d7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1067,7 +1067,6 @@
 }
 
 #ifdef CONFIG_PROC_FS
-static int nfs_server_list_open(struct inode *inode, struct file *file);
 static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
 static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
 static void nfs_server_list_stop(struct seq_file *p, void *v);
@@ -1080,14 +1079,6 @@
 	.show	= nfs_server_list_show,
 };
 
-static const struct file_operations nfs_server_list_fops = {
-	.open		= nfs_server_list_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
-static int nfs_volume_list_open(struct inode *inode, struct file *file);
 static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
 static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
 static void nfs_volume_list_stop(struct seq_file *p, void *v);
@@ -1100,23 +1091,6 @@
 	.show	= nfs_volume_list_show,
 };
 
-static const struct file_operations nfs_volume_list_fops = {
-	.open		= nfs_volume_list_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
-/*
- * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
- * we're dealing
- */
-static int nfs_server_list_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &nfs_server_list_ops,
-			   sizeof(struct seq_net_private));
-}
-
 /*
  * set up the iterator to start reading from the server list and return the first item
  */
@@ -1185,15 +1159,6 @@
 }
 
 /*
- * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
- */
-static int nfs_volume_list_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &nfs_volume_list_ops,
-			   sizeof(struct seq_net_private));
-}
-
-/*
  * set up the iterator to start reading from the volume list and return the first item
  */
 static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
@@ -1278,14 +1243,14 @@
 		goto error_0;
 
 	/* a file of servers with which we're dealing */
-	p = proc_create("servers", S_IFREG|S_IRUGO,
-			nn->proc_nfsfs, &nfs_server_list_fops);
+	p = proc_create_net("servers", S_IFREG|S_IRUGO, nn->proc_nfsfs,
+			&nfs_server_list_ops, sizeof(struct seq_net_private));
 	if (!p)
 		goto error_1;
 
 	/* a file of volumes that we have mounted */
-	p = proc_create("volumes", S_IFREG|S_IRUGO,
-			nn->proc_nfsfs, &nfs_volume_list_fops);
+	p = proc_create_net("volumes", S_IFREG|S_IRUGO, nn->proc_nfsfs,
+			&nfs_volume_list_ops, sizeof(struct seq_net_private));
 	if (!p)
 		goto error_1;
 	return 0;
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 70b8bf7..a43dfed 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -227,7 +227,7 @@
 	if (!buf)
 		return -ENOMEM;
 
-	rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL);
+	rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
 	if (IS_ERR(rq)) {
 		error = -ENOMEM;
 		goto out_free_buf;
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index b714652..4bee3a7 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -305,11 +305,10 @@
 		ino_t ino = be64_to_cpu(oi->i_head.h_self);
 		brelse(bh);
 		inode = omfs_iget(dir->i_sb, ino);
-		if (IS_ERR(inode))
-			return ERR_CAST(inode);
+	} else if (bh != ERR_PTR(-ENOENT)) {
+		inode = ERR_CAST(bh);
 	}
-	d_add(dentry, inode);
-	return NULL;
+	return d_splice_alias(inode, dentry);
 }
 
 /* sanity check block's self pointer */
diff --git a/fs/open.c b/fs/open.c
index c5ee7cd..d0e955b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -724,6 +724,16 @@
 	return ksys_fchown(fd, user, group);
 }
 
+int open_check_o_direct(struct file *f)
+{
+	/* NB: we're sure to have correct a_ops only after f_op->open */
+	if (f->f_flags & O_DIRECT) {
+		if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
+			return -EINVAL;
+	}
+	return 0;
+}
+
 static int do_dentry_open(struct file *f,
 			  struct inode *inode,
 			  int (*open)(struct inode *, struct file *),
@@ -745,7 +755,7 @@
 	if (unlikely(f->f_flags & O_PATH)) {
 		f->f_mode = FMODE_PATH;
 		f->f_op = &empty_fops;
-		goto done;
+		return 0;
 	}
 
 	if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
@@ -798,12 +808,7 @@
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
-done:
-	/* NB: we're sure to have correct a_ops only after f_op->open */
-	error = -EINVAL;
-	if ((f->f_flags & O_DIRECT) &&
-	    (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO))
-	    	goto out_fput;
+
 	return 0;
 
 cleanup_all:
@@ -818,9 +823,6 @@
 	f->f_path.dentry = NULL;
 	f->f_inode = NULL;
 	return error;
-out_fput:
-    	fput(f);
-	return error;
 }
 
 /**
@@ -918,14 +920,20 @@
 	BUG_ON(!path->mnt);
 
 	f = get_empty_filp();
-	if (IS_ERR(f))
-		return f;
-
-	f->f_flags = flags;
-	error = vfs_open(path, f, cred);
-	if (error) {
-		put_filp(f);
-		return ERR_PTR(error);
+	if (!IS_ERR(f)) {
+		f->f_flags = flags;
+		error = vfs_open(path, f, cred);
+		if (!error) {
+			/* from now on we need fput() to dispose of f */
+			error = open_check_o_direct(f);
+			if (error) {
+				fput(f);
+				f = ERR_PTR(error);
+			}
+		} else { 
+			put_filp(f);
+			f = ERR_PTR(error);
+		}
 	}
 	return f;
 }
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 2200662..607092f 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -256,8 +256,7 @@
 		break;
 	}
 
-	d_add(dentry, inode);
-	return NULL;
+	return d_splice_alias(inode, dentry);
 }
 
 static int openpromfs_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index 1b5707c..365cd73 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -110,7 +110,6 @@
 	struct orangefs_inode_s *parent = ORANGEFS_I(dir);
 	struct orangefs_kernel_op_s *new_op;
 	struct inode *inode;
-	struct dentry *res;
 	int ret = -EINVAL;
 
 	/*
@@ -158,65 +157,18 @@
 		     new_op->downcall.resp.lookup.refn.fs_id,
 		     ret);
 
-	if (ret < 0) {
-		if (ret == -ENOENT) {
-			/*
-			 * if no inode was found, add a negative dentry to
-			 * dcache anyway; if we don't, we don't hold expected
-			 * lookup semantics and we most noticeably break
-			 * during directory renames.
-			 *
-			 * however, if the operation failed or exited, do not
-			 * add the dentry (e.g. in the case that a touch is
-			 * issued on a file that already exists that was
-			 * interrupted during this lookup -- no need to add
-			 * another negative dentry for an existing file)
-			 */
-
-			gossip_debug(GOSSIP_NAME_DEBUG,
-				     "orangefs_lookup: Adding *negative* dentry "
-				     "%p for %pd\n",
-				     dentry,
-				     dentry);
-
-			d_add(dentry, NULL);
-			res = NULL;
-			goto out;
-		}
-
+	if (ret >= 0) {
+		orangefs_set_timeout(dentry);
+		inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
+	} else if (ret == -ENOENT) {
+		inode = NULL;
+	} else {
 		/* must be a non-recoverable error */
-		res = ERR_PTR(ret);
-		goto out;
+		inode = ERR_PTR(ret);
 	}
 
-	orangefs_set_timeout(dentry);
-
-	inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
-	if (IS_ERR(inode)) {
-		gossip_debug(GOSSIP_NAME_DEBUG,
-			"error %ld from iget\n", PTR_ERR(inode));
-		res = ERR_CAST(inode);
-		goto out;
-	}
-
-	gossip_debug(GOSSIP_NAME_DEBUG,
-		     "%s:%s:%d "
-		     "Found good inode [%lu] with count [%d]\n",
-		     __FILE__,
-		     __func__,
-		     __LINE__,
-		     inode->i_ino,
-		     (int)atomic_read(&inode->i_count));
-
-	/* update dentry/inode pair into dcache */
-	res = d_splice_alias(inode, dentry);
-
-	gossip_debug(GOSSIP_NAME_DEBUG,
-		     "Lookup success (inode ct = %d)\n",
-		     (int)atomic_read(&inode->i_count));
-out:
 	op_release(new_op);
-	return res;
+	return d_splice_alias(inode, dentry);
 }
 
 /* return 0 on success; non-zero otherwise */
diff --git a/fs/pipe.c b/fs/pipe.c
index 39d6f43..bb0840e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -509,19 +509,22 @@
 	}
 }
 
-/* No kernel lock held - fine */
-static __poll_t
-pipe_poll(struct file *filp, poll_table *wait)
+static struct wait_queue_head *
+pipe_get_poll_head(struct file *filp, __poll_t events)
 {
-	__poll_t mask;
 	struct pipe_inode_info *pipe = filp->private_data;
-	int nrbufs;
 
-	poll_wait(filp, &pipe->wait, wait);
+	return &pipe->wait;
+}
+
+/* No kernel lock held - fine */
+static __poll_t pipe_poll_mask(struct file *filp, __poll_t events)
+{
+	struct pipe_inode_info *pipe = filp->private_data;
+	int nrbufs = pipe->nrbufs;
+	__poll_t mask = 0;
 
 	/* Reading only -- no need for acquiring the semaphore.  */
-	nrbufs = pipe->nrbufs;
-	mask = 0;
 	if (filp->f_mode & FMODE_READ) {
 		mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0;
 		if (!pipe->writers && filp->f_version != pipe->w_counter)
@@ -1020,7 +1023,8 @@
 	.llseek		= no_llseek,
 	.read_iter	= pipe_read,
 	.write_iter	= pipe_write,
-	.poll		= pipe_poll,
+	.get_poll_head	= pipe_get_poll_head,
+	.poll_mask	= pipe_poll_mask,
 	.unlocked_ioctl	= pipe_ioctl,
 	.release	= pipe_release,
 	.fasync		= pipe_fasync,
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 72391b3..004077f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -96,22 +96,29 @@
 #include <asm/processor.h>
 #include "internal.h"
 
-static inline void task_name(struct seq_file *m, struct task_struct *p)
+void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape)
 {
 	char *buf;
 	size_t size;
-	char tcomm[sizeof(p->comm)];
+	char tcomm[64];
 	int ret;
 
-	get_task_comm(tcomm, p);
-
-	seq_puts(m, "Name:\t");
+	if (p->flags & PF_WQ_WORKER)
+		wq_worker_comm(tcomm, sizeof(tcomm), p);
+	else
+		__get_task_comm(tcomm, sizeof(tcomm), p);
 
 	size = seq_get_buf(m, &buf);
-	ret = string_escape_str(tcomm, buf, size, ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
-	seq_commit(m, ret < size ? ret : -1);
+	if (escape) {
+		ret = string_escape_str(tcomm, buf, size,
+					ESCAPE_SPACE | ESCAPE_SPECIAL, "\n\\");
+		if (ret >= size)
+			ret = -1;
+	} else {
+		ret = strscpy(buf, tcomm, size);
+	}
 
-	seq_putc(m, '\n');
+	seq_commit(m, ret);
 }
 
 /*
@@ -390,7 +397,10 @@
 {
 	struct mm_struct *mm = get_task_mm(task);
 
-	task_name(m, task);
+	seq_puts(m, "Name:\t");
+	proc_task_name(m, task, true);
+	seq_putc(m, '\n');
+
 	task_state(m, ns, pid, task);
 
 	if (mm) {
@@ -425,7 +435,6 @@
 	u64 cutime, cstime, utime, stime;
 	u64 cgtime, gtime;
 	unsigned long rsslim = 0;
-	char tcomm[sizeof(task->comm)];
 	unsigned long flags;
 
 	state = *get_task_state(task);
@@ -452,8 +461,6 @@
 		}
 	}
 
-	get_task_comm(tcomm, task);
-
 	sigemptyset(&sigign);
 	sigemptyset(&sigcatch);
 	cutime = cstime = utime = stime = 0;
@@ -520,7 +527,7 @@
 
 	seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
 	seq_puts(m, " (");
-	seq_puts(m, tcomm);
+	proc_task_name(m, task, false);
 	seq_puts(m, ") ");
 	seq_putc(m, state);
 	seq_put_decimal_ll(m, " ", ppid);
@@ -702,25 +709,22 @@
 
 static int children_seq_show(struct seq_file *seq, void *v)
 {
-	struct inode *inode = seq->private;
-	pid_t pid;
+	struct inode *inode = file_inode(seq->file);
 
-	pid = pid_nr_ns(v, inode->i_sb->s_fs_info);
-	seq_printf(seq, "%d ", pid);
-
+	seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode)));
 	return 0;
 }
 
 static void *children_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	return get_children_pid(seq->private, NULL, *pos);
+	return get_children_pid(file_inode(seq->file), NULL, *pos);
 }
 
 static void *children_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct pid *pid;
 
-	pid = get_children_pid(seq->private, v, *pos + 1);
+	pid = get_children_pid(file_inode(seq->file), v, *pos + 1);
 	put_pid(v);
 
 	++*pos;
@@ -741,17 +745,7 @@
 
 static int children_seq_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *m;
-	int ret;
-
-	ret = seq_open(file, &children_seq_ops);
-	if (ret)
-		return ret;
-
-	m = file->private_data;
-	m->private = inode;
-
-	return ret;
+	return seq_open(file, &children_seq_ops);
 }
 
 const struct file_operations proc_tid_children_operations = {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1a76d75..af128b3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -698,7 +698,7 @@
 
 static int proc_pid_permission(struct inode *inode, int mask)
 {
-	struct pid_namespace *pid = inode->i_sb->s_fs_info;
+	struct pid_namespace *pid = proc_pid_ns(inode);
 	struct task_struct *task;
 	bool has_perms;
 
@@ -733,13 +733,11 @@
 static int proc_single_show(struct seq_file *m, void *v)
 {
 	struct inode *inode = m->private;
-	struct pid_namespace *ns;
-	struct pid *pid;
+	struct pid_namespace *ns = proc_pid_ns(inode);
+	struct pid *pid = proc_pid(inode);
 	struct task_struct *task;
 	int ret;
 
-	ns = inode->i_sb->s_fs_info;
-	pid = proc_pid(inode);
 	task = get_pid_task(pid, PIDTYPE_PID);
 	if (!task)
 		return -ESRCH;
@@ -1410,7 +1408,7 @@
 static int sched_show(struct seq_file *m, void *v)
 {
 	struct inode *inode = m->private;
-	struct pid_namespace *ns = inode->i_sb->s_fs_info;
+	struct pid_namespace *ns = proc_pid_ns(inode);
 	struct task_struct *p;
 
 	p = get_proc_task(inode);
@@ -1565,9 +1563,8 @@
 	if (!p)
 		return -ESRCH;
 
-	task_lock(p);
-	seq_printf(m, "%s\n", p->comm);
-	task_unlock(p);
+	proc_task_name(m, p, false);
+	seq_putc(m, '\n');
 
 	put_task_struct(p);
 
@@ -1782,8 +1779,8 @@
 		u32 request_mask, unsigned int query_flags)
 {
 	struct inode *inode = d_inode(path->dentry);
+	struct pid_namespace *pid = proc_pid_ns(inode);
 	struct task_struct *task;
-	struct pid_namespace *pid = path->dentry->d_sb->s_fs_info;
 
 	generic_fillattr(inode, stat);
 
@@ -1809,15 +1806,22 @@
 /* dentry stuff */
 
 /*
- *	Exceptional case: normally we are not allowed to unhash a busy
- * directory. In this case, however, we can do it - no aliasing problems
- * due to the way we treat inodes.
- *
+ * Set <pid>/... inode ownership (can change due to setuid(), etc.)
+ */
+void pid_update_inode(struct task_struct *task, struct inode *inode)
+{
+	task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
+
+	inode->i_mode &= ~(S_ISUID | S_ISGID);
+	security_task_to_inode(task, inode);
+}
+
+/*
  * Rewrite the inode's ownerships here because the owning task may have
  * performed a setuid(), etc.
  *
  */
-int pid_revalidate(struct dentry *dentry, unsigned int flags)
+static int pid_revalidate(struct dentry *dentry, unsigned int flags)
 {
 	struct inode *inode;
 	struct task_struct *task;
@@ -1829,10 +1833,7 @@
 	task = get_proc_task(inode);
 
 	if (task) {
-		task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
-
-		inode->i_mode &= ~(S_ISUID | S_ISGID);
-		security_task_to_inode(task, inode);
+		pid_update_inode(task, inode);
 		put_task_struct(task);
 		return 1;
 	}
@@ -1880,8 +1881,8 @@
 	struct dentry *child, *dir = file->f_path.dentry;
 	struct qstr qname = QSTR_INIT(name, len);
 	struct inode *inode;
-	unsigned type;
-	ino_t ino;
+	unsigned type = DT_UNKNOWN;
+	ino_t ino = 1;
 
 	child = d_hash_and_lookup(dir, &qname);
 	if (!child) {
@@ -1890,22 +1891,23 @@
 		if (IS_ERR(child))
 			goto end_instantiate;
 		if (d_in_lookup(child)) {
-			int err = instantiate(d_inode(dir), child, task, ptr);
+			struct dentry *res;
+			res = instantiate(child, task, ptr);
 			d_lookup_done(child);
-			if (err < 0) {
-				dput(child);
+			if (IS_ERR(res))
 				goto end_instantiate;
+			if (unlikely(res)) {
+				dput(child);
+				child = res;
 			}
 		}
 	}
 	inode = d_inode(child);
 	ino = inode->i_ino;
 	type = inode->i_mode >> 12;
+end_instantiate:
 	dput(child);
 	return dir_emit(ctx, name, len, ino, type);
-
-end_instantiate:
-	return dir_emit(ctx, name, len, 1, DT_UNKNOWN);
 }
 
 /*
@@ -2067,19 +2069,19 @@
 	.setattr	= proc_setattr,
 };
 
-static int
-proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
+static struct dentry *
+proc_map_files_instantiate(struct dentry *dentry,
 			   struct task_struct *task, const void *ptr)
 {
 	fmode_t mode = (fmode_t)(unsigned long)ptr;
 	struct proc_inode *ei;
 	struct inode *inode;
 
-	inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK |
+	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK |
 				    ((mode & FMODE_READ ) ? S_IRUSR : 0) |
 				    ((mode & FMODE_WRITE) ? S_IWUSR : 0));
 	if (!inode)
-		return -ENOENT;
+		return ERR_PTR(-ENOENT);
 
 	ei = PROC_I(inode);
 	ei->op.proc_get_link = map_files_get_link;
@@ -2088,9 +2090,7 @@
 	inode->i_size = 64;
 
 	d_set_d_op(dentry, &tid_map_files_dentry_operations);
-	d_add(dentry, inode);
-
-	return 0;
+	return d_splice_alias(inode, dentry);
 }
 
 static struct dentry *proc_map_files_lookup(struct inode *dir,
@@ -2099,19 +2099,19 @@
 	unsigned long vm_start, vm_end;
 	struct vm_area_struct *vma;
 	struct task_struct *task;
-	int result;
+	struct dentry *result;
 	struct mm_struct *mm;
 
-	result = -ENOENT;
+	result = ERR_PTR(-ENOENT);
 	task = get_proc_task(dir);
 	if (!task)
 		goto out;
 
-	result = -EACCES;
+	result = ERR_PTR(-EACCES);
 	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
 		goto out_put_task;
 
-	result = -ENOENT;
+	result = ERR_PTR(-ENOENT);
 	if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
 		goto out_put_task;
 
@@ -2125,7 +2125,7 @@
 		goto out_no_vma;
 
 	if (vma->vm_file)
-		result = proc_map_files_instantiate(dir, dentry, task,
+		result = proc_map_files_instantiate(dentry, task,
 				(void *)(unsigned long)vma->vm_file->f_mode);
 
 out_no_vma:
@@ -2134,7 +2134,7 @@
 out_put_task:
 	put_task_struct(task);
 out:
-	return ERR_PTR(result);
+	return result;
 }
 
 static const struct inode_operations proc_map_files_inode_operations = {
@@ -2337,7 +2337,7 @@
 		return -ENOMEM;
 
 	tp->pid = proc_pid(inode);
-	tp->ns = inode->i_sb->s_fs_info;
+	tp->ns = proc_pid_ns(inode);
 	return 0;
 }
 
@@ -2435,16 +2435,16 @@
 	.release	= single_release,
 };
 
-static int proc_pident_instantiate(struct inode *dir,
-	struct dentry *dentry, struct task_struct *task, const void *ptr)
+static struct dentry *proc_pident_instantiate(struct dentry *dentry,
+	struct task_struct *task, const void *ptr)
 {
 	const struct pid_entry *p = ptr;
 	struct inode *inode;
 	struct proc_inode *ei;
 
-	inode = proc_pid_make_inode(dir->i_sb, task, p->mode);
+	inode = proc_pid_make_inode(dentry->d_sb, task, p->mode);
 	if (!inode)
-		goto out;
+		return ERR_PTR(-ENOENT);
 
 	ei = PROC_I(inode);
 	if (S_ISDIR(inode->i_mode))
@@ -2454,13 +2454,9 @@
 	if (p->fop)
 		inode->i_fop = p->fop;
 	ei->op = p->op;
+	pid_update_inode(task, inode);
 	d_set_d_op(dentry, &pid_dentry_operations);
-	d_add(dentry, inode);
-	/* Close the race of the process dying before we return the dentry */
-	if (pid_revalidate(dentry, 0))
-		return 0;
-out:
-	return -ENOENT;
+	return d_splice_alias(inode, dentry);
 }
 
 static struct dentry *proc_pident_lookup(struct inode *dir, 
@@ -2468,11 +2464,9 @@
 					 const struct pid_entry *ents,
 					 unsigned int nents)
 {
-	int error;
 	struct task_struct *task = get_proc_task(dir);
 	const struct pid_entry *p, *last;
-
-	error = -ENOENT;
+	struct dentry *res = ERR_PTR(-ENOENT);
 
 	if (!task)
 		goto out_no_task;
@@ -2491,11 +2485,11 @@
 	if (p >= last)
 		goto out;
 
-	error = proc_pident_instantiate(dir, dentry, task, p);
+	res = proc_pident_instantiate(dentry, task, p);
 out:
 	put_task_struct(task);
 out_no_task:
-	return ERR_PTR(error);
+	return res;
 }
 
 static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
@@ -3138,38 +3132,32 @@
 	}
 }
 
-static int proc_pid_instantiate(struct inode *dir,
-				   struct dentry * dentry,
+static struct dentry *proc_pid_instantiate(struct dentry * dentry,
 				   struct task_struct *task, const void *ptr)
 {
 	struct inode *inode;
 
-	inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
+	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
 	if (!inode)
-		goto out;
+		return ERR_PTR(-ENOENT);
 
 	inode->i_op = &proc_tgid_base_inode_operations;
 	inode->i_fop = &proc_tgid_base_operations;
 	inode->i_flags|=S_IMMUTABLE;
 
 	set_nlink(inode, nlink_tgid);
+	pid_update_inode(task, inode);
 
 	d_set_d_op(dentry, &pid_dentry_operations);
-
-	d_add(dentry, inode);
-	/* Close the race of the process dying before we return the dentry */
-	if (pid_revalidate(dentry, 0))
-		return 0;
-out:
-	return -ENOENT;
+	return d_splice_alias(inode, dentry);
 }
 
 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
-	int result = -ENOENT;
 	struct task_struct *task;
 	unsigned tgid;
 	struct pid_namespace *ns;
+	struct dentry *result = ERR_PTR(-ENOENT);
 
 	tgid = name_to_int(&dentry->d_name);
 	if (tgid == ~0U)
@@ -3184,10 +3172,10 @@
 	if (!task)
 		goto out;
 
-	result = proc_pid_instantiate(dir, dentry, task, NULL);
+	result = proc_pid_instantiate(dentry, task, NULL);
 	put_task_struct(task);
 out:
-	return ERR_PTR(result);
+	return result;
 }
 
 /*
@@ -3239,7 +3227,7 @@
 int proc_pid_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct tgid_iter iter;
-	struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info;
+	struct pid_namespace *ns = proc_pid_ns(file_inode(file));
 	loff_t pos = ctx->pos;
 
 	if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
@@ -3435,37 +3423,32 @@
 	.setattr	= proc_setattr,
 };
 
-static int proc_task_instantiate(struct inode *dir,
-	struct dentry *dentry, struct task_struct *task, const void *ptr)
+static struct dentry *proc_task_instantiate(struct dentry *dentry,
+	struct task_struct *task, const void *ptr)
 {
 	struct inode *inode;
-	inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
-
+	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO);
 	if (!inode)
-		goto out;
+		return ERR_PTR(-ENOENT);
+
 	inode->i_op = &proc_tid_base_inode_operations;
 	inode->i_fop = &proc_tid_base_operations;
-	inode->i_flags|=S_IMMUTABLE;
+	inode->i_flags |= S_IMMUTABLE;
 
 	set_nlink(inode, nlink_tid);
+	pid_update_inode(task, inode);
 
 	d_set_d_op(dentry, &pid_dentry_operations);
-
-	d_add(dentry, inode);
-	/* Close the race of the process dying before we return the dentry */
-	if (pid_revalidate(dentry, 0))
-		return 0;
-out:
-	return -ENOENT;
+	return d_splice_alias(inode, dentry);
 }
 
 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
-	int result = -ENOENT;
 	struct task_struct *task;
 	struct task_struct *leader = get_proc_task(dir);
 	unsigned tid;
 	struct pid_namespace *ns;
+	struct dentry *result = ERR_PTR(-ENOENT);
 
 	if (!leader)
 		goto out_no_task;
@@ -3485,13 +3468,13 @@
 	if (!same_thread_group(leader, task))
 		goto out_drop_task;
 
-	result = proc_task_instantiate(dir, dentry, task, NULL);
+	result = proc_task_instantiate(dentry, task, NULL);
 out_drop_task:
 	put_task_struct(task);
 out:
 	put_task_struct(leader);
 out_no_task:
-	return ERR_PTR(result);
+	return result;
 }
 
 /*
@@ -3588,7 +3571,7 @@
 	/* f_version caches the tgid value that the last readdir call couldn't
 	 * return. lseek aka telldir automagically resets f_version to 0.
 	 */
-	ns = inode->i_sb->s_fs_info;
+	ns = proc_pid_ns(inode);
 	tid = (int)file->f_version;
 	file->f_version = 0;
 	for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
index 8233e7a..fa762c5 100644
--- a/fs/proc/cmdline.c
+++ b/fs/proc/cmdline.c
@@ -11,21 +11,9 @@
 	return 0;
 }
 
-static int cmdline_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, cmdline_proc_show, NULL);
-}
-
-static const struct file_operations cmdline_proc_fops = {
-	.open		= cmdline_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init proc_cmdline_init(void)
 {
-	proc_create("cmdline", 0, NULL, &cmdline_proc_fops);
+	proc_create_single("cmdline", 0, NULL, cmdline_proc_show);
 	return 0;
 }
 fs_initcall(proc_cmdline_init);
diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c
index a8ac48a..954caf0 100644
--- a/fs/proc/consoles.c
+++ b/fs/proc/consoles.c
@@ -91,21 +91,9 @@
 	.show	= show_console_dev
 };
 
-static int consoles_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &consoles_op);
-}
-
-static const struct file_operations proc_consoles_operations = {
-	.open		= consoles_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static int __init proc_consoles_init(void)
 {
-	proc_create("consoles", 0, NULL, &proc_consoles_operations);
+	proc_create_seq("consoles", 0, NULL, &consoles_op);
 	return 0;
 }
 fs_initcall(proc_consoles_init);
diff --git a/fs/proc/devices.c b/fs/proc/devices.c
index 2c7f22b..37d3869 100644
--- a/fs/proc/devices.c
+++ b/fs/proc/devices.c
@@ -51,21 +51,9 @@
 	.show  = devinfo_show
 };
 
-static int devinfo_open(struct inode *inode, struct file *filp)
-{
-	return seq_open(filp, &devinfo_ops);
-}
-
-static const struct file_operations proc_devinfo_operations = {
-	.open		= devinfo_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static int __init proc_devices_init(void)
 {
-	proc_create("devices", 0, NULL, &proc_devinfo_operations);
+	proc_create_seq("devices", 0, NULL, &devinfo_ops);
 	return 0;
 }
 fs_initcall(proc_devices_init);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 6b80cd1..05b9893 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -81,9 +81,41 @@
 	.release	= single_release,
 };
 
+static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
+{
+	struct files_struct *files = get_files_struct(task);
+	struct file *file;
+
+	if (!files)
+		return false;
+
+	rcu_read_lock();
+	file = fcheck_files(files, fd);
+	if (file)
+		*mode = file->f_mode;
+	rcu_read_unlock();
+	put_files_struct(files);
+	return !!file;
+}
+
+static void tid_fd_update_inode(struct task_struct *task, struct inode *inode,
+				fmode_t f_mode)
+{
+	task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
+
+	if (S_ISLNK(inode->i_mode)) {
+		unsigned i_mode = S_IFLNK;
+		if (f_mode & FMODE_READ)
+			i_mode |= S_IRUSR | S_IXUSR;
+		if (f_mode & FMODE_WRITE)
+			i_mode |= S_IWUSR | S_IXUSR;
+		inode->i_mode = i_mode;
+	}
+	security_task_to_inode(task, inode);
+}
+
 static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
 {
-	struct files_struct *files;
 	struct task_struct *task;
 	struct inode *inode;
 	unsigned int fd;
@@ -96,35 +128,11 @@
 	fd = proc_fd(inode);
 
 	if (task) {
-		files = get_files_struct(task);
-		if (files) {
-			struct file *file;
-
-			rcu_read_lock();
-			file = fcheck_files(files, fd);
-			if (file) {
-				unsigned f_mode = file->f_mode;
-
-				rcu_read_unlock();
-				put_files_struct(files);
-
-				task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
-
-				if (S_ISLNK(inode->i_mode)) {
-					unsigned i_mode = S_IFLNK;
-					if (f_mode & FMODE_READ)
-						i_mode |= S_IRUSR | S_IXUSR;
-					if (f_mode & FMODE_WRITE)
-						i_mode |= S_IWUSR | S_IXUSR;
-					inode->i_mode = i_mode;
-				}
-
-				security_task_to_inode(task, inode);
-				put_task_struct(task);
-				return 1;
-			}
-			rcu_read_unlock();
-			put_files_struct(files);
+		fmode_t f_mode;
+		if (tid_fd_mode(task, fd, &f_mode)) {
+			tid_fd_update_inode(task, inode, f_mode);
+			put_task_struct(task);
+			return 1;
 		}
 		put_task_struct(task);
 	}
@@ -166,34 +174,33 @@
 	return ret;
 }
 
-static int
-proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
-		    struct task_struct *task, const void *ptr)
+struct fd_data {
+	fmode_t mode;
+	unsigned fd;
+};
+
+static struct dentry *proc_fd_instantiate(struct dentry *dentry,
+	struct task_struct *task, const void *ptr)
 {
-	unsigned fd = (unsigned long)ptr;
+	const struct fd_data *data = ptr;
 	struct proc_inode *ei;
 	struct inode *inode;
 
-	inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK);
+	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK);
 	if (!inode)
-		goto out;
+		return ERR_PTR(-ENOENT);
 
 	ei = PROC_I(inode);
-	ei->fd = fd;
+	ei->fd = data->fd;
 
 	inode->i_op = &proc_pid_link_inode_operations;
 	inode->i_size = 64;
 
 	ei->op.proc_get_link = proc_fd_link;
+	tid_fd_update_inode(task, inode, data->mode);
 
 	d_set_d_op(dentry, &tid_fd_dentry_operations);
-	d_add(dentry, inode);
-
-	/* Close the race of the process dying before we return the dentry */
-	if (tid_fd_revalidate(dentry, 0))
-		return 0;
- out:
-	return -ENOENT;
+	return d_splice_alias(inode, dentry);
 }
 
 static struct dentry *proc_lookupfd_common(struct inode *dir,
@@ -201,19 +208,21 @@
 					   instantiate_t instantiate)
 {
 	struct task_struct *task = get_proc_task(dir);
-	int result = -ENOENT;
-	unsigned fd = name_to_int(&dentry->d_name);
+	struct fd_data data = {.fd = name_to_int(&dentry->d_name)};
+	struct dentry *result = ERR_PTR(-ENOENT);
 
 	if (!task)
 		goto out_no_task;
-	if (fd == ~0U)
+	if (data.fd == ~0U)
+		goto out;
+	if (!tid_fd_mode(task, data.fd, &data.mode))
 		goto out;
 
-	result = instantiate(dir, dentry, task, (void *)(unsigned long)fd);
+	result = instantiate(dentry, task, &data);
 out:
 	put_task_struct(task);
 out_no_task:
-	return ERR_PTR(result);
+	return result;
 }
 
 static int proc_readfd_common(struct file *file, struct dir_context *ctx,
@@ -236,17 +245,22 @@
 	for (fd = ctx->pos - 2;
 	     fd < files_fdtable(files)->max_fds;
 	     fd++, ctx->pos++) {
+		struct file *f;
+		struct fd_data data;
 		char name[10 + 1];
 		int len;
 
-		if (!fcheck_files(files, fd))
+		f = fcheck_files(files, fd);
+		if (!f)
 			continue;
+		data.mode = f->f_mode;
 		rcu_read_unlock();
+		data.fd = fd;
 
 		len = snprintf(name, sizeof(name), "%u", fd);
 		if (!proc_fill_cache(file, ctx,
 				     name, len, instantiate, p,
-				     (void *)(unsigned long)fd))
+				     &data))
 			goto out_fd_loop;
 		cond_resched();
 		rcu_read_lock();
@@ -304,31 +318,25 @@
 	.setattr	= proc_setattr,
 };
 
-static int
-proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
-			struct task_struct *task, const void *ptr)
+static struct dentry *proc_fdinfo_instantiate(struct dentry *dentry,
+	struct task_struct *task, const void *ptr)
 {
-	unsigned fd = (unsigned long)ptr;
+	const struct fd_data *data = ptr;
 	struct proc_inode *ei;
 	struct inode *inode;
 
-	inode = proc_pid_make_inode(dir->i_sb, task, S_IFREG | S_IRUSR);
+	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFREG | S_IRUSR);
 	if (!inode)
-		goto out;
+		return ERR_PTR(-ENOENT);
 
 	ei = PROC_I(inode);
-	ei->fd = fd;
+	ei->fd = data->fd;
 
 	inode->i_fop = &proc_fdinfo_file_operations;
+	tid_fd_update_inode(task, inode, 0);
 
 	d_set_d_op(dentry, &tid_fd_dentry_operations);
-	d_add(dentry, inode);
-
-	/* Close the race of the process dying before we return the dentry */
-	if (tid_fd_revalidate(dentry, 0))
-		return 0;
- out:
-	return -ENOENT;
+	return d_splice_alias(inode, dentry);
 }
 
 static struct dentry *
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2078e70..7b4d971 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -25,6 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/completion.h>
 #include <linux/uaccess.h>
+#include <linux/seq_file.h>
 
 #include "internal.h"
 
@@ -256,8 +257,7 @@
 		if (!inode)
 			return ERR_PTR(-ENOMEM);
 		d_set_d_op(dentry, &proc_misc_dentry_ops);
-		d_add(dentry, inode);
-		return NULL;
+		return d_splice_alias(inode, dentry);
 	}
 	read_unlock(&proc_subdir_lock);
 	return ERR_PTR(-ENOENT);
@@ -346,13 +346,12 @@
 	.setattr	= proc_notify_change,
 };
 
-static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
+/* returns the registered entry, or frees dp and returns NULL on failure */
+struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
+		struct proc_dir_entry *dp)
 {
-	int ret;
-
-	ret = proc_alloc_inum(&dp->low_ino);
-	if (ret)
-		return ret;
+	if (proc_alloc_inum(&dp->low_ino))
+		goto out_free_entry;
 
 	write_lock(&proc_subdir_lock);
 	dp->parent = dir;
@@ -360,12 +359,16 @@
 		WARN(1, "proc_dir_entry '%s/%s' already registered\n",
 		     dir->name, dp->name);
 		write_unlock(&proc_subdir_lock);
-		proc_free_inum(dp->low_ino);
-		return -EEXIST;
+		goto out_free_inum;
 	}
 	write_unlock(&proc_subdir_lock);
 
-	return 0;
+	return dp;
+out_free_inum:
+	proc_free_inum(dp->low_ino);
+out_free_entry:
+	pde_free(dp);
+	return NULL;
 }
 
 static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
@@ -443,10 +446,7 @@
 		if (ent->data) {
 			strcpy((char*)ent->data,dest);
 			ent->proc_iops = &proc_link_inode_operations;
-			if (proc_register(parent, ent) < 0) {
-				pde_free(ent);
-				ent = NULL;
-			}
+			ent = proc_register(parent, ent);
 		} else {
 			pde_free(ent);
 			ent = NULL;
@@ -470,11 +470,9 @@
 		ent->proc_fops = &proc_dir_operations;
 		ent->proc_iops = &proc_dir_inode_operations;
 		parent->nlink++;
-		if (proc_register(parent, ent) < 0) {
-			pde_free(ent);
+		ent = proc_register(parent, ent);
+		if (!ent)
 			parent->nlink--;
-			ent = NULL;
-		}
 	}
 	return ent;
 }
@@ -505,47 +503,47 @@
 		ent->proc_fops = NULL;
 		ent->proc_iops = NULL;
 		parent->nlink++;
-		if (proc_register(parent, ent) < 0) {
-			pde_free(ent);
+		ent = proc_register(parent, ent);
+		if (!ent)
 			parent->nlink--;
-			ent = NULL;
-		}
 	}
 	return ent;
 }
 EXPORT_SYMBOL(proc_create_mount_point);
 
-struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
-					struct proc_dir_entry *parent,
-					const struct file_operations *proc_fops,
-					void *data)
+struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
+		struct proc_dir_entry **parent, void *data)
 {
-	struct proc_dir_entry *pde;
+	struct proc_dir_entry *p;
+
 	if ((mode & S_IFMT) == 0)
 		mode |= S_IFREG;
-
-	if (!S_ISREG(mode)) {
-		WARN_ON(1);	/* use proc_mkdir() */
+	if ((mode & S_IALLUGO) == 0)
+		mode |= S_IRUGO;
+	if (WARN_ON_ONCE(!S_ISREG(mode)))
 		return NULL;
+
+	p = __proc_create(parent, name, mode, 1);
+	if (p) {
+		p->proc_iops = &proc_file_inode_operations;
+		p->data = data;
 	}
+	return p;
+}
+
+struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
+		struct proc_dir_entry *parent,
+		const struct file_operations *proc_fops, void *data)
+{
+	struct proc_dir_entry *p;
 
 	BUG_ON(proc_fops == NULL);
 
-	if ((mode & S_IALLUGO) == 0)
-		mode |= S_IRUGO;
-	pde = __proc_create(&parent, name, mode, 1);
-	if (!pde)
-		goto out;
-	pde->proc_fops = proc_fops;
-	pde->data = data;
-	pde->proc_iops = &proc_file_inode_operations;
-	if (proc_register(parent, pde) < 0)
-		goto out_free;
-	return pde;
-out_free:
-	pde_free(pde);
-out:
-	return NULL;
+	p = proc_create_reg(name, mode, &parent, data);
+	if (!p)
+		return NULL;
+	p->proc_fops = proc_fops;
+	return proc_register(parent, p);
 }
 EXPORT_SYMBOL(proc_create_data);
  
@@ -557,6 +555,67 @@
 }
 EXPORT_SYMBOL(proc_create);
 
+static int proc_seq_open(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *de = PDE(inode);
+
+	if (de->state_size)
+		return seq_open_private(file, de->seq_ops, de->state_size);
+	return seq_open(file, de->seq_ops);
+}
+
+static const struct file_operations proc_seq_fops = {
+	.open		= proc_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
+		struct proc_dir_entry *parent, const struct seq_operations *ops,
+		unsigned int state_size, void *data)
+{
+	struct proc_dir_entry *p;
+
+	p = proc_create_reg(name, mode, &parent, data);
+	if (!p)
+		return NULL;
+	p->proc_fops = &proc_seq_fops;
+	p->seq_ops = ops;
+	p->state_size = state_size;
+	return proc_register(parent, p);
+}
+EXPORT_SYMBOL(proc_create_seq_private);
+
+static int proc_single_open(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *de = PDE(inode);
+
+	return single_open(file, de->single_show, de->data);
+}
+
+static const struct file_operations proc_single_fops = {
+	.open		= proc_single_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
+		struct proc_dir_entry *parent,
+		int (*show)(struct seq_file *, void *), void *data)
+{
+	struct proc_dir_entry *p;
+
+	p = proc_create_reg(name, mode, &parent, data);
+	if (!p)
+		return NULL;
+	p->proc_fops = &proc_single_fops;
+	p->single_show = show;
+	return proc_register(parent, p);
+}
+EXPORT_SYMBOL(proc_create_single_data);
+
 void proc_set_size(struct proc_dir_entry *de, loff_t size)
 {
 	de->size = size;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 0f1692e..93eb190 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -44,7 +44,12 @@
 	struct completion *pde_unload_completion;
 	const struct inode_operations *proc_iops;
 	const struct file_operations *proc_fops;
+	union {
+		const struct seq_operations *seq_ops;
+		int (*single_show)(struct seq_file *, void *);
+	};
 	void *data;
+	unsigned int state_size;
 	unsigned int low_ino;
 	nlink_t nlink;
 	kuid_t uid;
@@ -57,9 +62,9 @@
 	umode_t mode;
 	u8 namelen;
 #ifdef CONFIG_64BIT
-#define SIZEOF_PDE_INLINE_NAME	(192-139)
+#define SIZEOF_PDE_INLINE_NAME	(192-155)
 #else
-#define SIZEOF_PDE_INLINE_NAME	(128-87)
+#define SIZEOF_PDE_INLINE_NAME	(128-95)
 #endif
 	char inline_name[SIZEOF_PDE_INLINE_NAME];
 } __randomize_layout;
@@ -131,6 +136,8 @@
  */
 extern const struct file_operations proc_tid_children_operations;
 
+extern void proc_task_name(struct seq_file *m, struct task_struct *p,
+			   bool escape);
 extern int proc_tid_stat(struct seq_file *, struct pid_namespace *,
 			 struct pid *, struct task_struct *);
 extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *,
@@ -147,14 +154,14 @@
 extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int proc_setattr(struct dentry *, struct iattr *);
 extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
-extern int pid_revalidate(struct dentry *, unsigned int);
+extern void pid_update_inode(struct task_struct *, struct inode *);
 extern int pid_delete_dentry(const struct dentry *);
 extern int proc_pid_readdir(struct file *, struct dir_context *);
 extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int);
 extern loff_t mem_lseek(struct file *, loff_t, int);
 
 /* Lookups */
-typedef int instantiate_t(struct inode *, struct dentry *,
+typedef struct dentry *instantiate_t(struct dentry *,
 				     struct task_struct *, const void *);
 extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int,
 			   instantiate_t, struct task_struct *, const void *);
@@ -162,6 +169,10 @@
 /*
  * generic.c
  */
+struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode,
+		struct proc_dir_entry **parent, void *data);
+struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
+		struct proc_dir_entry *dp);
 extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
 struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *);
 extern int proc_readdir(struct file *, struct dir_context *);
diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c
index 6a6bee9..cb0edc7 100644
--- a/fs/proc/interrupts.c
+++ b/fs/proc/interrupts.c
@@ -34,21 +34,9 @@
 	.show  = show_interrupts
 };
 
-static int interrupts_open(struct inode *inode, struct file *filp)
-{
-	return seq_open(filp, &int_seq_ops);
-}
-
-static const struct file_operations proc_interrupts_operations = {
-	.open		= interrupts_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static int __init proc_interrupts_init(void)
 {
-	proc_create("interrupts", 0, NULL, &proc_interrupts_operations);
+	proc_create_seq("interrupts", 0, NULL, &int_seq_ops);
 	return 0;
 }
 fs_initcall(proc_interrupts_init);
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index b572cc8..d066947 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -28,21 +28,9 @@
 	return 0;
 }
 
-static int loadavg_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, loadavg_proc_show, NULL);
-}
-
-static const struct file_operations loadavg_proc_fops = {
-	.open		= loadavg_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init proc_loadavg_init(void)
 {
-	proc_create("loadavg", 0, NULL, &loadavg_proc_fops);
+	proc_create_single("loadavg", 0, NULL, loadavg_proc_show);
 	return 0;
 }
 fs_initcall(proc_loadavg_init);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 65a72ab..2fb0484 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -149,21 +149,9 @@
 	return 0;
 }
 
-static int meminfo_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, meminfo_proc_show, NULL);
-}
-
-static const struct file_operations meminfo_proc_fops = {
-	.open		= meminfo_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init proc_meminfo_init(void)
 {
-	proc_create("meminfo", 0, NULL, &meminfo_proc_fops);
+	proc_create_single("meminfo", 0, NULL, meminfo_proc_show);
 	return 0;
 }
 fs_initcall(proc_meminfo_init);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 59b17e5..dd2b35f 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -87,28 +87,24 @@
 	.setattr	= proc_setattr,
 };
 
-static int proc_ns_instantiate(struct inode *dir,
-	struct dentry *dentry, struct task_struct *task, const void *ptr)
+static struct dentry *proc_ns_instantiate(struct dentry *dentry,
+	struct task_struct *task, const void *ptr)
 {
 	const struct proc_ns_operations *ns_ops = ptr;
 	struct inode *inode;
 	struct proc_inode *ei;
 
-	inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | S_IRWXUGO);
+	inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO);
 	if (!inode)
-		goto out;
+		return ERR_PTR(-ENOENT);
 
 	ei = PROC_I(inode);
 	inode->i_op = &proc_ns_link_inode_operations;
 	ei->ns_ops = ns_ops;
+	pid_update_inode(task, inode);
 
 	d_set_d_op(dentry, &pid_dentry_operations);
-	d_add(dentry, inode);
-	/* Close the race of the process dying before we return the dentry */
-	if (pid_revalidate(dentry, 0))
-		return 0;
-out:
-	return -ENOENT;
+	return d_splice_alias(inode, dentry);
 }
 
 static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx)
@@ -147,12 +143,10 @@
 static struct dentry *proc_ns_dir_lookup(struct inode *dir,
 				struct dentry *dentry, unsigned int flags)
 {
-	int error;
 	struct task_struct *task = get_proc_task(dir);
 	const struct proc_ns_operations **entry, **last;
 	unsigned int len = dentry->d_name.len;
-
-	error = -ENOENT;
+	struct dentry *res = ERR_PTR(-ENOENT);
 
 	if (!task)
 		goto out_no_task;
@@ -167,11 +161,11 @@
 	if (entry == last)
 		goto out;
 
-	error = proc_ns_instantiate(dir, dentry, task, *entry);
+	res = proc_ns_instantiate(dentry, task, *entry);
 out:
 	put_task_struct(task);
 out_no_task:
-	return ERR_PTR(error);
+	return res;
 }
 
 const struct inode_operations proc_ns_dir_inode_operations = {
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 7563437..3b63be6 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -113,21 +113,9 @@
 	.show	= nommu_region_list_show
 };
 
-static int proc_nommu_region_list_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &proc_nommu_region_list_seqop);
-}
-
-static const struct file_operations proc_nommu_region_list_operations = {
-	.open    = proc_nommu_region_list_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
 static int __init proc_nommu_init(void)
 {
-	proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations);
+	proc_create_seq("maps", S_IRUGO, NULL, &proc_nommu_region_list_seqop);
 	return 0;
 }
 
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 1763f37..7d94fa0 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -38,20 +38,20 @@
 	return maybe_get_net(PDE_NET(PDE(inode)));
 }
 
-int seq_open_net(struct inode *ino, struct file *f,
-		 const struct seq_operations *ops, int size)
+static int seq_open_net(struct inode *inode, struct file *file)
 {
-	struct net *net;
+	unsigned int state_size = PDE(inode)->state_size;
 	struct seq_net_private *p;
+	struct net *net;
 
-	BUG_ON(size < sizeof(*p));
+	WARN_ON_ONCE(state_size < sizeof(*p));
 
-	net = get_proc_net(ino);
-	if (net == NULL)
+	net = get_proc_net(inode);
+	if (!net)
 		return -ENXIO;
 
-	p = __seq_open_private(f, ops, size);
-	if (p == NULL) {
+	p = __seq_open_private(file, PDE(inode)->seq_ops, state_size);
+	if (!p) {
 		put_net(net);
 		return -ENOMEM;
 	}
@@ -60,51 +60,83 @@
 #endif
 	return 0;
 }
-EXPORT_SYMBOL_GPL(seq_open_net);
 
-int single_open_net(struct inode *inode, struct file *file,
-		int (*show)(struct seq_file *, void *))
+static int seq_release_net(struct inode *ino, struct file *f)
 {
-	int err;
-	struct net *net;
-
-	err = -ENXIO;
-	net = get_proc_net(inode);
-	if (net == NULL)
-		goto err_net;
-
-	err = single_open(file, show, net);
-	if (err < 0)
-		goto err_open;
-
-	return 0;
-
-err_open:
-	put_net(net);
-err_net:
-	return err;
-}
-EXPORT_SYMBOL_GPL(single_open_net);
-
-int seq_release_net(struct inode *ino, struct file *f)
-{
-	struct seq_file *seq;
-
-	seq = f->private_data;
+	struct seq_file *seq = f->private_data;
 
 	put_net(seq_file_net(seq));
 	seq_release_private(ino, f);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(seq_release_net);
 
-int single_release_net(struct inode *ino, struct file *f)
+static const struct file_operations proc_net_seq_fops = {
+	.open		= seq_open_net,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_net,
+};
+
+struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
+		struct proc_dir_entry *parent, const struct seq_operations *ops,
+		unsigned int state_size, void *data)
+{
+	struct proc_dir_entry *p;
+
+	p = proc_create_reg(name, mode, &parent, data);
+	if (!p)
+		return NULL;
+	p->proc_fops = &proc_net_seq_fops;
+	p->seq_ops = ops;
+	p->state_size = state_size;
+	return proc_register(parent, p);
+}
+EXPORT_SYMBOL_GPL(proc_create_net_data);
+
+static int single_open_net(struct inode *inode, struct file *file)
+{
+	struct proc_dir_entry *de = PDE(inode);
+	struct net *net;
+	int err;
+
+	net = get_proc_net(inode);
+	if (!net)
+		return -ENXIO;
+
+	err = single_open(file, de->single_show, net);
+	if (err)
+		put_net(net);
+	return err;
+}
+
+static int single_release_net(struct inode *ino, struct file *f)
 {
 	struct seq_file *seq = f->private_data;
 	put_net(seq->private);
 	return single_release(ino, f);
 }
-EXPORT_SYMBOL_GPL(single_release_net);
+
+static const struct file_operations proc_net_single_fops = {
+	.open		= single_open_net,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release_net,
+};
+
+struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
+		struct proc_dir_entry *parent,
+		int (*show)(struct seq_file *, void *), void *data)
+{
+	struct proc_dir_entry *p;
+
+	p = proc_create_reg(name, mode, &parent, data);
+	if (!p)
+		return NULL;
+	p->proc_fops = &proc_net_single_fops;
+	p->single_show = show;
+	return proc_register(parent, p);
+}
+EXPORT_SYMBOL_GPL(proc_create_net_single);
 
 static struct net *get_proc_task_net(struct inode *dir)
 {
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 8989936..4d765e5 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -554,9 +554,8 @@
 	if (!inode)
 		goto out;
 
-	err = NULL;
 	d_set_d_op(dentry, &proc_sys_dentry_operations);
-	d_add(dentry, inode);
+	err = d_splice_alias(inode, dentry);
 
 out:
 	if (h)
@@ -684,6 +683,7 @@
 		if (IS_ERR(child))
 			return false;
 		if (d_in_lookup(child)) {
+			struct dentry *res;
 			inode = proc_sys_make_inode(dir->d_sb, head, table);
 			if (!inode) {
 				d_lookup_done(child);
@@ -691,7 +691,16 @@
 				return false;
 			}
 			d_set_d_op(child, &proc_sys_dentry_operations);
-			d_add(child, inode);
+			res = d_splice_alias(inode, child);
+			d_lookup_done(child);
+			if (unlikely(res)) {
+				if (IS_ERR(res)) {
+					dput(child);
+					return false;
+				}
+				dput(child);
+				child = res;
+			}
 		}
 	}
 	inode = d_inode(child);
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index d0cf1c5..c69ff19 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -126,18 +126,6 @@
 	.show	= show_tty_driver
 };
 
-static int tty_drivers_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &tty_drivers_op);
-}
-
-static const struct file_operations proc_tty_drivers_operations = {
-	.open		= tty_drivers_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 /*
  * This function is called by tty_register_driver() to handle
  * registering the driver's /proc handler into /proc/tty/driver/<foo>
@@ -147,11 +135,11 @@
 	struct proc_dir_entry *ent;
 		
 	if (!driver->driver_name || driver->proc_entry ||
-	    !driver->ops->proc_fops)
+	    !driver->ops->proc_show)
 		return;
 
-	ent = proc_create_data(driver->driver_name, 0, proc_tty_driver,
-			       driver->ops->proc_fops, driver);
+	ent = proc_create_single_data(driver->driver_name, 0, proc_tty_driver,
+			       driver->ops->proc_show, driver);
 	driver->proc_entry = ent;
 }
 
@@ -186,6 +174,6 @@
 	 * entry.
 	 */
 	proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR|S_IXUSR, NULL);
-	proc_create("tty/ldiscs", 0, NULL, &tty_ldiscs_proc_fops);
-	proc_create("tty/drivers", 0, NULL, &proc_tty_drivers_operations);
+	proc_create_seq("tty/ldiscs", 0, NULL, &tty_ldiscs_seq_ops);
+	proc_create_seq("tty/drivers", 0, NULL, &tty_drivers_op);
 }
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 4d7d061..127265e 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -12,7 +12,7 @@
 				      struct inode *inode,
 				      struct delayed_call *done)
 {
-	struct pid_namespace *ns = inode->i_sb->s_fs_info;
+	struct pid_namespace *ns = proc_pid_ns(inode);
 	pid_t tgid = task_tgid_nr_ns(current, ns);
 	char *name;
 
@@ -36,7 +36,7 @@
 int proc_setup_self(struct super_block *s)
 {
 	struct inode *root_inode = d_inode(s->s_root);
-	struct pid_namespace *ns = s->s_fs_info;
+	struct pid_namespace *ns = proc_pid_ns(root_inode);
 	struct dentry *self;
 	
 	inode_lock(root_inode);
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 24072cc..12901dc 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -25,21 +25,9 @@
 	return 0;
 }
 
-static int softirqs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, show_softirqs, NULL);
-}
-
-static const struct file_operations proc_softirqs_operations = {
-	.open		= softirqs_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init proc_softirqs_init(void)
 {
-	proc_create("softirqs", 0, NULL, &proc_softirqs_operations);
+	proc_create_single("softirqs", 0, NULL, show_softirqs);
 	return 0;
 }
 fs_initcall(proc_softirqs_init);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index c486ad4..a20c6e4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -937,7 +937,7 @@
 	/*
 	 * The soft-dirty tracker uses #PF-s to catch writes
 	 * to pages, so write-protect the pte as well. See the
-	 * Documentation/vm/soft-dirty.txt for full description
+	 * Documentation/admin-guide/mm/soft-dirty.rst for full description
 	 * of how soft-dirty works.
 	 */
 	pte_t ptent = *pte;
@@ -1421,7 +1421,7 @@
  * Bits 0-54  page frame number (PFN) if present
  * Bits 0-4   swap type if swapped
  * Bits 5-54  swap offset if swapped
- * Bit  55    pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
+ * Bit  55    pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
  * Bit  56    page exclusively mapped
  * Bits 57-60 zero
  * Bit  61    page is file-page or shared-anon
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 9d2efac..b905010 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -12,7 +12,7 @@
 					     struct inode *inode,
 					     struct delayed_call *done)
 {
-	struct pid_namespace *ns = inode->i_sb->s_fs_info;
+	struct pid_namespace *ns = proc_pid_ns(inode);
 	pid_t tgid = task_tgid_nr_ns(current, ns);
 	pid_t pid = task_pid_nr_ns(current, ns);
 	char *name;
@@ -36,7 +36,7 @@
 int proc_setup_thread_self(struct super_block *s)
 {
 	struct inode *root_inode = d_inode(s->s_root);
-	struct pid_namespace *ns = s->s_fs_info;
+	struct pid_namespace *ns = proc_pid_ns(root_inode);
 	struct dentry *thread_self;
 
 	inode_lock(root_inode);
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 95a708d..3bd12f9 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -30,21 +30,9 @@
 	return 0;
 }
 
-static int uptime_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, uptime_proc_show, NULL);
-}
-
-static const struct file_operations uptime_proc_fops = {
-	.open		= uptime_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init proc_uptime_init(void)
 {
-	proc_create("uptime", 0, NULL, &uptime_proc_fops);
+	proc_create_single("uptime", 0, NULL, uptime_proc_show);
 	return 0;
 }
 fs_initcall(proc_uptime_init);
diff --git a/fs/proc/version.c b/fs/proc/version.c
index 94901e8..b449f18 100644
--- a/fs/proc/version.c
+++ b/fs/proc/version.c
@@ -15,21 +15,9 @@
 	return 0;
 }
 
-static int version_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, version_proc_show, NULL);
-}
-
-static const struct file_operations version_proc_fops = {
-	.open		= version_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init proc_version_init(void)
 {
-	proc_create("version", 0, NULL, &version_proc_fops);
+	proc_create_single("version", 0, NULL, version_proc_show);
 	return 0;
 }
 fs_initcall(proc_version_init);
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index eca2787..8d72221 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -114,13 +114,9 @@
 	brelse(bh);
 
 	foundinode = qnx4_iget(dir->i_sb, ino);
-	if (IS_ERR(foundinode)) {
+	if (IS_ERR(foundinode))
 		QNX4DEBUG((KERN_ERR "qnx4: lookup->iget -> error %ld\n",
 			   PTR_ERR(foundinode)));
-		return ERR_CAST(foundinode);
-	}
 out:
-	d_add(dentry, foundinode);
-
-	return NULL;
+	return d_splice_alias(foundinode, dentry);
 }
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index 72c2770..e2e98e6 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -29,15 +29,11 @@
 	if (ino) {
 		foundinode = qnx6_iget(dir->i_sb, ino);
 		qnx6_put_page(page);
-		if (IS_ERR(foundinode)) {
+		if (IS_ERR(foundinode))
 			pr_debug("lookup->iget ->  error %ld\n",
 				 PTR_ERR(foundinode));
-			return ERR_CAST(foundinode);
-		}
 	} else {
 		pr_debug("%s(): not found %s\n", __func__, name);
-		return NULL;
 	}
-	d_add(dentry, foundinode);
-	return NULL;
+	return d_splice_alias(foundinode, dentry);
 }
diff --git a/fs/read_write.c b/fs/read_write.c
index c4eabbf..e83bd97 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -2023,7 +2023,7 @@
 		ret = mnt_want_write_file(dst_file);
 		if (ret) {
 			info->status = ret;
-			goto next_loop;
+			goto next_fdput;
 		}
 
 		dst_off = info->dest_offset;
@@ -2058,9 +2058,9 @@
 
 next_file:
 		mnt_drop_write_file(dst_file);
-next_loop:
+next_fdput:
 		fdput(dst_fd);
-
+next_loop:
 		if (fatal_signal_pending(current))
 			goto out;
 	}
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index fe99915..e39b391 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -389,27 +389,13 @@
 	return 0;
 }
 
-static int r_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, PDE_DATA(inode), 
-				proc_get_parent_data(inode));
-}
-
-static const struct file_operations r_file_operations = {
-	.open = r_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
 static struct proc_dir_entry *proc_info_root = NULL;
 static const char proc_info_root_name[] = "fs/reiserfs";
 
 static void add_file(struct super_block *sb, char *name,
 		     int (*func) (struct seq_file *, void *))
 {
-	proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
-			 &r_file_operations, func);
+	proc_create_single_data(name, 0, REISERFS_SB(sb)->procdir, func, sb);
 }
 
 int reiserfs_proc_info_init(struct super_block *sb)
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 8f06fd1..6ccb519 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -213,7 +213,7 @@
 				   unsigned int flags)
 {
 	unsigned long offset, maxoff;
-	struct inode *inode;
+	struct inode *inode = NULL;
 	struct romfs_inode ri;
 	const char *name;		/* got from dentry */
 	int len, ret;
@@ -233,7 +233,7 @@
 
 	for (;;) {
 		if (!offset || offset >= maxoff)
-			goto out0;
+			break;
 
 		ret = romfs_dev_read(dir->i_sb, offset, &ri, sizeof(ri));
 		if (ret < 0)
@@ -244,37 +244,19 @@
 				       len);
 		if (ret < 0)
 			goto error;
-		if (ret == 1)
+		if (ret == 1) {
+			/* Hard link handling */
+			if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD)
+				offset = be32_to_cpu(ri.spec) & ROMFH_MASK;
+			inode = romfs_iget(dir->i_sb, offset);
 			break;
+		}
 
 		/* next entry */
 		offset = be32_to_cpu(ri.next) & ROMFH_MASK;
 	}
 
-	/* Hard link handling */
-	if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD)
-		offset = be32_to_cpu(ri.spec) & ROMFH_MASK;
-
-	inode = romfs_iget(dir->i_sb, offset);
-	if (IS_ERR(inode)) {
-		ret = PTR_ERR(inode);
-		goto error;
-	}
-	goto outi;
-
-	/*
-	 * it's a bit funky, _lookup needs to return an error code
-	 * (negative) or a NULL, both as a dentry.  ENOENT should not
-	 * be returned, instead we need to create a negative dentry by
-	 * d_add(dentry, NULL); and return 0 as no error.
-	 * (Although as I see, it only matters on writable file
-	 * systems).
-	 */
-out0:
-	inode = NULL;
-outi:
-	d_add(dentry, inode);
-	ret = 0;
+	return d_splice_alias(inode, dentry);
 error:
 	return ERR_PTR(ret);
 }
diff --git a/fs/select.c b/fs/select.c
index ba879c5..bc3cc0f 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -34,6 +34,29 @@
 
 #include <linux/uaccess.h>
 
+__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt)
+{
+	if (file->f_op->poll) {
+		return file->f_op->poll(file, pt);
+	} else if (file_has_poll_mask(file)) {
+		unsigned int events = poll_requested_events(pt);
+		struct wait_queue_head *head;
+
+		if (pt && pt->_qproc) {
+			head = file->f_op->get_poll_head(file, events);
+			if (!head)
+				return DEFAULT_POLLMASK;
+			if (IS_ERR(head))
+				return EPOLLERR;
+			pt->_qproc(file, head, pt);
+		}
+
+		return file->f_op->poll_mask(file, events);
+	} else {
+		return DEFAULT_POLLMASK;
+	}
+}
+EXPORT_SYMBOL_GPL(vfs_poll);
 
 /*
  * Estimate expected accuracy in ns from a timeval.
@@ -233,7 +256,7 @@
 	add_wait_queue(wait_address, &entry->wait);
 }
 
-int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
+static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
 			  ktime_t *expires, unsigned long slack)
 {
 	int rc = -EINTR;
@@ -258,7 +281,6 @@
 
 	return rc;
 }
-EXPORT_SYMBOL(poll_schedule_timeout);
 
 /**
  * poll_select_set_timeout - helper function to setup the timeout value
@@ -503,14 +525,10 @@
 					continue;
 				f = fdget(i);
 				if (f.file) {
-					const struct file_operations *f_op;
-					f_op = f.file->f_op;
-					mask = DEFAULT_POLLMASK;
-					if (f_op->poll) {
-						wait_key_set(wait, in, out,
-							     bit, busy_flag);
-						mask = (*f_op->poll)(f.file, wait);
-					}
+					wait_key_set(wait, in, out, bit,
+						     busy_flag);
+					mask = vfs_poll(f.file, wait);
+
 					fdput(f);
 					if ((mask & POLLIN_SET) && (in & bit)) {
 						res_in |= bit;
@@ -813,34 +831,29 @@
 				     bool *can_busy_poll,
 				     __poll_t busy_flag)
 {
-	__poll_t mask;
-	int fd;
+	int fd = pollfd->fd;
+	__poll_t mask = 0, filter;
+	struct fd f;
 
-	mask = 0;
-	fd = pollfd->fd;
-	if (fd >= 0) {
-		struct fd f = fdget(fd);
-		mask = EPOLLNVAL;
-		if (f.file) {
-			/* userland u16 ->events contains POLL... bitmap */
-			__poll_t filter = demangle_poll(pollfd->events) |
-						EPOLLERR | EPOLLHUP;
-			mask = DEFAULT_POLLMASK;
-			if (f.file->f_op->poll) {
-				pwait->_key = filter;
-				pwait->_key |= busy_flag;
-				mask = f.file->f_op->poll(f.file, pwait);
-				if (mask & busy_flag)
-					*can_busy_poll = true;
-			}
-			/* Mask out unneeded events. */
-			mask &= filter;
-			fdput(f);
-		}
-	}
+	if (fd < 0)
+		goto out;
+	mask = EPOLLNVAL;
+	f = fdget(fd);
+	if (!f.file)
+		goto out;
+
+	/* userland u16 ->events contains POLL... bitmap */
+	filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
+	pwait->_key = filter | busy_flag;
+	mask = vfs_poll(f.file, pwait);
+	if (mask & busy_flag)
+		*can_busy_poll = true;
+	mask &= filter;		/* Mask out unneeded events. */
+	fdput(f);
+
+out:
 	/* ... and so does ->revents */
 	pollfd->revents = mangle_poll(mask);
-
 	return mask;
 }
 
diff --git a/fs/signalfd.c b/fs/signalfd.c
index d2187a8..cbb42f7 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -81,83 +81,86 @@
 static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 			     siginfo_t const *kinfo)
 {
-	long err;
+	struct signalfd_siginfo new;
 
 	BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128);
 
 	/*
 	 * Unused members should be zero ...
 	 */
-	err = __clear_user(uinfo, sizeof(*uinfo));
+	memset(&new, 0, sizeof(new));
 
 	/*
 	 * If you change siginfo_t structure, please be sure
 	 * this code is fixed accordingly.
 	 */
-	err |= __put_user(kinfo->si_signo, &uinfo->ssi_signo);
-	err |= __put_user(kinfo->si_errno, &uinfo->ssi_errno);
-	err |= __put_user(kinfo->si_code, &uinfo->ssi_code);
+	new.ssi_signo = kinfo->si_signo;
+	new.ssi_errno = kinfo->si_errno;
+	new.ssi_code  = kinfo->si_code;
 	switch (siginfo_layout(kinfo->si_signo, kinfo->si_code)) {
 	case SIL_KILL:
-		err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
-		err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
+		new.ssi_pid = kinfo->si_pid;
+		new.ssi_uid = kinfo->si_uid;
 		break;
 	case SIL_TIMER:
-		 err |= __put_user(kinfo->si_tid, &uinfo->ssi_tid);
-		 err |= __put_user(kinfo->si_overrun, &uinfo->ssi_overrun);
-		 err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr);
-		 err |= __put_user(kinfo->si_int, &uinfo->ssi_int);
+		new.ssi_tid = kinfo->si_tid;
+		new.ssi_overrun = kinfo->si_overrun;
+		new.ssi_ptr = (long) kinfo->si_ptr;
+		new.ssi_int = kinfo->si_int;
 		break;
 	case SIL_POLL:
-		err |= __put_user(kinfo->si_band, &uinfo->ssi_band);
-		err |= __put_user(kinfo->si_fd, &uinfo->ssi_fd);
+		new.ssi_band = kinfo->si_band;
+		new.ssi_fd   = kinfo->si_fd;
 		break;
+	case SIL_FAULT_BNDERR:
+	case SIL_FAULT_PKUERR:
+		/*
+		 * Fall through to the SIL_FAULT case.  Both SIL_FAULT_BNDERR
+		 * and SIL_FAULT_PKUERR are only generated by faults that
+		 * deliver them synchronously to userspace.  In case someone
+		 * injects one of these signals and signalfd catches it treat
+		 * it as SIL_FAULT.
+		 */
 	case SIL_FAULT:
-		err |= __put_user((long) kinfo->si_addr, &uinfo->ssi_addr);
+		new.ssi_addr = (long) kinfo->si_addr;
 #ifdef __ARCH_SI_TRAPNO
-		err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno);
+		new.ssi_trapno = kinfo->si_trapno;
 #endif
-#ifdef BUS_MCEERR_AO
-		/*
-		 * Other callers might not initialize the si_lsb field,
-		 * so check explicitly for the right codes here.
-		 */
-		if (kinfo->si_signo == SIGBUS &&
-		     kinfo->si_code == BUS_MCEERR_AO)
-			err |= __put_user((short) kinfo->si_addr_lsb,
-					  &uinfo->ssi_addr_lsb);
+		break;
+	case SIL_FAULT_MCEERR:
+		new.ssi_addr = (long) kinfo->si_addr;
+#ifdef __ARCH_SI_TRAPNO
+		new.ssi_trapno = kinfo->si_trapno;
 #endif
-#ifdef BUS_MCEERR_AR
-		/*
-		 * Other callers might not initialize the si_lsb field,
-		 * so check explicitly for the right codes here.
-		 */
-		if (kinfo->si_signo == SIGBUS &&
-		    kinfo->si_code == BUS_MCEERR_AR)
-			err |= __put_user((short) kinfo->si_addr_lsb,
-					  &uinfo->ssi_addr_lsb);
-#endif
+		new.ssi_addr_lsb = (short) kinfo->si_addr_lsb;
 		break;
 	case SIL_CHLD:
-		err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
-		err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
-		err |= __put_user(kinfo->si_status, &uinfo->ssi_status);
-		err |= __put_user(kinfo->si_utime, &uinfo->ssi_utime);
-		err |= __put_user(kinfo->si_stime, &uinfo->ssi_stime);
+		new.ssi_pid    = kinfo->si_pid;
+		new.ssi_uid    = kinfo->si_uid;
+		new.ssi_status = kinfo->si_status;
+		new.ssi_utime  = kinfo->si_utime;
+		new.ssi_stime  = kinfo->si_stime;
 		break;
 	case SIL_RT:
-	default:
 		/*
 		 * This case catches also the signals queued by sigqueue().
 		 */
-		err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
-		err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
-		err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr);
-		err |= __put_user(kinfo->si_int, &uinfo->ssi_int);
+		new.ssi_pid = kinfo->si_pid;
+		new.ssi_uid = kinfo->si_uid;
+		new.ssi_ptr = (long) kinfo->si_ptr;
+		new.ssi_int = kinfo->si_int;
+		break;
+	case SIL_SYS:
+		new.ssi_call_addr = (long) kinfo->si_call_addr;
+		new.ssi_syscall   = kinfo->si_syscall;
+		new.ssi_arch      = kinfo->si_arch;
 		break;
 	}
 
-	return err ? -EFAULT: sizeof(*uinfo);
+	if (copy_to_user(uinfo, &new, sizeof(struct signalfd_siginfo)))
+		return -EFAULT;
+
+	return sizeof(*uinfo);
 }
 
 static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info,
diff --git a/fs/super.c b/fs/super.c
index 4b5b562..50728d9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -947,7 +947,7 @@
 static void do_thaw_all_callback(struct super_block *sb)
 {
 	down_write(&sb->s_umount);
-	if (sb->s_root && sb->s_flags & MS_BORN) {
+	if (sb->s_root && sb->s_flags & SB_BORN) {
 		emergency_thaw_bdev(sb);
 		thaw_super_locked(sb);
 	} else {
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 250b075..4d5d204 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -51,14 +51,9 @@
 	if (dentry->d_name.len > SYSV_NAMELEN)
 		return ERR_PTR(-ENAMETOOLONG);
 	ino = sysv_inode_by_name(dentry);
-
-	if (ino) {
+	if (ino)
 		inode = sysv_iget(dir->i_sb, ino);
-		if (IS_ERR(inode))
-			return ERR_CAST(inode);
-	}
-	d_add(dentry, inode);
-	return NULL;
+	return d_splice_alias(inode, dentry);
 }
 
 static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode, dev_t rdev)
diff --git a/fs/timerfd.c b/fs/timerfd.c
index cdad49d..d84a2be 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -226,21 +226,20 @@
 	kfree_rcu(ctx, rcu);
 	return 0;
 }
-
-static __poll_t timerfd_poll(struct file *file, poll_table *wait)
+	
+static struct wait_queue_head *timerfd_get_poll_head(struct file *file,
+		__poll_t eventmask)
 {
 	struct timerfd_ctx *ctx = file->private_data;
-	__poll_t events = 0;
-	unsigned long flags;
 
-	poll_wait(file, &ctx->wqh, wait);
+	return &ctx->wqh;
+}
 
-	spin_lock_irqsave(&ctx->wqh.lock, flags);
-	if (ctx->ticks)
-		events |= EPOLLIN;
-	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+static __poll_t timerfd_poll_mask(struct file *file, __poll_t eventmask)
+{
+	struct timerfd_ctx *ctx = file->private_data;
 
-	return events;
+	return ctx->ticks ? EPOLLIN : 0;
 }
 
 static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
@@ -364,7 +363,8 @@
 
 static const struct file_operations timerfd_fops = {
 	.release	= timerfd_release,
-	.poll		= timerfd_poll,
+	.get_poll_head	= timerfd_get_poll_head,
+	.poll_mask	= timerfd_poll_mask,
 	.read		= timerfd_read,
 	.llseek		= noop_llseek,
 	.show_fdinfo	= timerfd_show,
diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c
index 616a688..55c508f 100644
--- a/fs/ubifs/crypto.c
+++ b/fs/ubifs/crypto.c
@@ -24,14 +24,6 @@
 	return ubifs_check_dir_empty(inode) == 0;
 }
 
-static unsigned int ubifs_crypt_max_namelen(struct inode *inode)
-{
-	if (S_ISLNK(inode->i_mode))
-		return UBIFS_MAX_INO_DATA;
-	else
-		return UBIFS_MAX_NLEN;
-}
-
 int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn,
 		  unsigned int in_len, unsigned int *out_len, int block)
 {
@@ -89,5 +81,5 @@
 	.get_context		= ubifs_crypt_get_context,
 	.set_context		= ubifs_crypt_set_context,
 	.empty_dir		= ubifs_crypt_empty_dir,
-	.max_namelen		= ubifs_crypt_max_namelen,
+	.max_namelen		= UBIFS_MAX_NLEN,
 };
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 9d7fb88..4e267cc 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -214,7 +214,7 @@
 	int err;
 	union ubifs_key key;
 	struct inode *inode = NULL;
-	struct ubifs_dent_node *dent;
+	struct ubifs_dent_node *dent = NULL;
 	struct ubifs_info *c = dir->i_sb->s_fs_info;
 	struct fscrypt_name nm;
 
@@ -229,14 +229,14 @@
 		return ERR_PTR(err);
 
 	if (fname_len(&nm) > UBIFS_MAX_NLEN) {
-		err = -ENAMETOOLONG;
-		goto out_fname;
+		inode = ERR_PTR(-ENAMETOOLONG);
+		goto done;
 	}
 
 	dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
 	if (!dent) {
-		err = -ENOMEM;
-		goto out_fname;
+		inode = ERR_PTR(-ENOMEM);
+		goto done;
 	}
 
 	if (nm.hash) {
@@ -250,16 +250,16 @@
 	}
 
 	if (err) {
-		if (err == -ENOENT) {
+		if (err == -ENOENT)
 			dbg_gen("not found");
-			goto done;
-		}
-		goto out_dent;
+		else
+			inode = ERR_PTR(err);
+		goto done;
 	}
 
 	if (dbg_check_name(c, dent, &nm)) {
-		err = -EINVAL;
-		goto out_dent;
+		inode = ERR_PTR(-EINVAL);
+		goto done;
 	}
 
 	inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum));
@@ -272,7 +272,7 @@
 		ubifs_err(c, "dead directory entry '%pd', error %d",
 			  dentry, err);
 		ubifs_ro_mode(c, err);
-		goto out_dent;
+		goto done;
 	}
 
 	if (ubifs_crypt_is_encrypted(dir) &&
@@ -280,27 +280,14 @@
 	    !fscrypt_has_permitted_context(dir, inode)) {
 		ubifs_warn(c, "Inconsistent encryption contexts: %lu/%lu",
 			   dir->i_ino, inode->i_ino);
-		err = -EPERM;
-		goto out_inode;
+		iput(inode);
+		inode = ERR_PTR(-EPERM);
 	}
 
 done:
 	kfree(dent);
 	fscrypt_free_filename(&nm);
-	/*
-	 * Note, d_splice_alias() would be required instead if we supported
-	 * NFS.
-	 */
-	d_add(dentry, inode);
-	return NULL;
-
-out_inode:
-	iput(inode);
-out_dent:
-	kfree(dent);
-out_fname:
-	fscrypt_free_filename(&nm);
-	return ERR_PTR(err);
+	return d_splice_alias(inode, dentry);
 }
 
 static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
diff --git a/fs/xattr.c b/fs/xattr.c
index 61cd28b..f9cb1db 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -229,7 +229,7 @@
 }
 EXPORT_SYMBOL_GPL(vfs_setxattr);
 
-ssize_t
+static ssize_t
 xattr_getsecurity(struct inode *inode, const char *name, void *value,
 			size_t size)
 {
@@ -254,7 +254,6 @@
 out_noalloc:
 	return len;
 }
-EXPORT_SYMBOL_GPL(xattr_getsecurity);
 
 /*
  * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr
@@ -354,7 +353,6 @@
 	if (error)
 		return error;
 	if (inode->i_op->listxattr && (inode->i_opflags & IOP_XATTR)) {
-		error = -EOPNOTSUPP;
 		error = inode->i_op->listxattr(dentry, list, size);
 	} else {
 		error = security_inode_listsecurity(inode, list, size);
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 46bcf0e6..457ac9f 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -85,6 +85,24 @@
 
 	  If unsure, say N.
 
+config XFS_ONLINE_REPAIR
+	bool "XFS online metadata repair support"
+	default n
+	depends on XFS_FS && XFS_ONLINE_SCRUB
+	help
+	  If you say Y here you will be able to repair metadata on a
+	  mounted XFS filesystem.  This feature is intended to reduce
+	  filesystem downtime by fixing minor problems before they cause the
+	  filesystem to go down.  However, it requires that the filesystem be
+	  formatted with secondary metadata, such as reverse mappings and inode
+	  parent pointers.
+
+	  This feature is considered EXPERIMENTAL.  Use with caution!
+
+	  See the xfs_scrub man page in section 8 for additional information.
+
+	  If unsure, say N.
+
 config XFS_WARN
 	bool "XFS Verbose Warnings"
 	depends on XFS_FS && !XFS_DEBUG
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 7ceb41a..e8d67a4 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -28,6 +28,7 @@
 
 # build the libxfs code first
 xfs-y				+= $(addprefix libxfs/, \
+				   xfs_ag.o \
 				   xfs_alloc.o \
 				   xfs_alloc_btree.o \
 				   xfs_attr.o \
@@ -163,4 +164,12 @@
 
 xfs-$(CONFIG_XFS_RT)		+= scrub/rtbitmap.o
 xfs-$(CONFIG_XFS_QUOTA)		+= scrub/quota.o
+
+# online repair
+ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
+xfs-y				+= $(addprefix scrub/, \
+				   agheader_repair.o \
+				   repair.o \
+				   )
+endif
 endif
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
new file mode 100644
index 0000000..9345802
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -0,0 +1,464 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2018 Red Hat, Inc.
+ * All rights reserved.
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_rmap.h"
+#include "xfs_ag.h"
+
+static struct xfs_buf *
+xfs_get_aghdr_buf(
+	struct xfs_mount	*mp,
+	xfs_daddr_t		blkno,
+	size_t			numblks,
+	int			flags,
+	const struct xfs_buf_ops *ops)
+{
+	struct xfs_buf		*bp;
+
+	bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
+	if (!bp)
+		return NULL;
+
+	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
+	bp->b_bn = blkno;
+	bp->b_maps[0].bm_bn = blkno;
+	bp->b_ops = ops;
+
+	return bp;
+}
+
+/*
+ * Generic btree root block init function
+ */
+static void
+xfs_btroot_init(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp,
+	struct aghdr_init_data	*id)
+{
+	xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno, 0);
+}
+
+/*
+ * Alloc btree root block init functions
+ */
+static void
+xfs_bnoroot_init(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp,
+	struct aghdr_init_data	*id)
+{
+	struct xfs_alloc_rec	*arec;
+
+	xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0);
+	arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
+	arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
+	arec->ar_blockcount = cpu_to_be32(id->agsize -
+					  be32_to_cpu(arec->ar_startblock));
+}
+
+static void
+xfs_cntroot_init(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp,
+	struct aghdr_init_data	*id)
+{
+	struct xfs_alloc_rec	*arec;
+
+	xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0);
+	arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
+	arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
+	arec->ar_blockcount = cpu_to_be32(id->agsize -
+					  be32_to_cpu(arec->ar_startblock));
+}
+
+/*
+ * Reverse map root block init
+ */
+static void
+xfs_rmaproot_init(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp,
+	struct aghdr_init_data	*id)
+{
+	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
+	struct xfs_rmap_rec	*rrec;
+
+	xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0);
+
+	/*
+	 * mark the AG header regions as static metadata The BNO
+	 * btree block is the first block after the headers, so
+	 * it's location defines the size of region the static
+	 * metadata consumes.
+	 *
+	 * Note: unlike mkfs, we never have to account for log
+	 * space when growing the data regions
+	 */
+	rrec = XFS_RMAP_REC_ADDR(block, 1);
+	rrec->rm_startblock = 0;
+	rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
+	rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
+	rrec->rm_offset = 0;
+
+	/* account freespace btree root blocks */
+	rrec = XFS_RMAP_REC_ADDR(block, 2);
+	rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
+	rrec->rm_blockcount = cpu_to_be32(2);
+	rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+	rrec->rm_offset = 0;
+
+	/* account inode btree root blocks */
+	rrec = XFS_RMAP_REC_ADDR(block, 3);
+	rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
+	rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
+					  XFS_IBT_BLOCK(mp));
+	rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
+	rrec->rm_offset = 0;
+
+	/* account for rmap btree root */
+	rrec = XFS_RMAP_REC_ADDR(block, 4);
+	rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
+	rrec->rm_blockcount = cpu_to_be32(1);
+	rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
+	rrec->rm_offset = 0;
+
+	/* account for refc btree root */
+	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+		rrec = XFS_RMAP_REC_ADDR(block, 5);
+		rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp));
+		rrec->rm_blockcount = cpu_to_be32(1);
+		rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC);
+		rrec->rm_offset = 0;
+		be16_add_cpu(&block->bb_numrecs, 1);
+	}
+}
+
+/*
+ * Initialise new secondary superblocks with the pre-grow geometry, but mark
+ * them as "in progress" so we know they haven't yet been activated. This will
+ * get cleared when the update with the new geometry information is done after
+ * changes to the primary are committed. This isn't strictly necessary, but we
+ * get it for free with the delayed buffer write lists and it means we can tell
+ * if a grow operation didn't complete properly after the fact.
+ */
+static void
+xfs_sbblock_init(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp,
+	struct aghdr_init_data	*id)
+{
+	struct xfs_dsb		*dsb = XFS_BUF_TO_SBP(bp);
+
+	xfs_sb_to_disk(dsb, &mp->m_sb);
+	dsb->sb_inprogress = 1;
+}
+
+static void
+xfs_agfblock_init(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp,
+	struct aghdr_init_data	*id)
+{
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(bp);
+	xfs_extlen_t		tmpsize;
+
+	agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
+	agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
+	agf->agf_seqno = cpu_to_be32(id->agno);
+	agf->agf_length = cpu_to_be32(id->agsize);
+	agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
+	agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
+	agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
+	agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+		agf->agf_roots[XFS_BTNUM_RMAPi] =
+					cpu_to_be32(XFS_RMAP_BLOCK(mp));
+		agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+		agf->agf_rmap_blocks = cpu_to_be32(1);
+	}
+
+	agf->agf_flfirst = cpu_to_be32(1);
+	agf->agf_fllast = 0;
+	agf->agf_flcount = 0;
+	tmpsize = id->agsize - mp->m_ag_prealloc_blocks;
+	agf->agf_freeblks = cpu_to_be32(tmpsize);
+	agf->agf_longest = cpu_to_be32(tmpsize);
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
+	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
+		agf->agf_refcount_root = cpu_to_be32(
+				xfs_refc_block(mp));
+		agf->agf_refcount_level = cpu_to_be32(1);
+		agf->agf_refcount_blocks = cpu_to_be32(1);
+	}
+}
+
+static void
+xfs_agflblock_init(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp,
+	struct aghdr_init_data	*id)
+{
+	struct xfs_agfl		*agfl = XFS_BUF_TO_AGFL(bp);
+	__be32			*agfl_bno;
+	int			bucket;
+
+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+		agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
+		agfl->agfl_seqno = cpu_to_be32(id->agno);
+		uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
+	}
+
+	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
+	for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
+		agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
+}
+
+static void
+xfs_agiblock_init(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp,
+	struct aghdr_init_data	*id)
+{
+	struct xfs_agi		*agi = XFS_BUF_TO_AGI(bp);
+	int			bucket;
+
+	agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
+	agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
+	agi->agi_seqno = cpu_to_be32(id->agno);
+	agi->agi_length = cpu_to_be32(id->agsize);
+	agi->agi_count = 0;
+	agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp));
+	agi->agi_level = cpu_to_be32(1);
+	agi->agi_freecount = 0;
+	agi->agi_newino = cpu_to_be32(NULLAGINO);
+	agi->agi_dirino = cpu_to_be32(NULLAGINO);
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
+	if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
+		agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
+		agi->agi_free_level = cpu_to_be32(1);
+	}
+	for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
+		agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
+}
+
+typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp,
+				  struct aghdr_init_data *id);
+static int
+xfs_ag_init_hdr(
+	struct xfs_mount	*mp,
+	struct aghdr_init_data	*id,
+	aghdr_init_work_f	work,
+	const struct xfs_buf_ops *ops)
+
+{
+	struct xfs_buf		*bp;
+
+	bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, 0, ops);
+	if (!bp)
+		return -ENOMEM;
+
+	(*work)(mp, bp, id);
+
+	xfs_buf_delwri_queue(bp, &id->buffer_list);
+	xfs_buf_relse(bp);
+	return 0;
+}
+
+struct xfs_aghdr_grow_data {
+	xfs_daddr_t		daddr;
+	size_t			numblks;
+	const struct xfs_buf_ops *ops;
+	aghdr_init_work_f	work;
+	xfs_btnum_t		type;
+	bool			need_init;
+};
+
+/*
+ * Prepare new AG headers to be written to disk. We use uncached buffers here,
+ * as it is assumed these new AG headers are currently beyond the currently
+ * valid filesystem address space. Using cached buffers would trip over EOFS
+ * corruption detection alogrithms in the buffer cache lookup routines.
+ *
+ * This is a non-transactional function, but the prepared buffers are added to a
+ * delayed write buffer list supplied by the caller so they can submit them to
+ * disk and wait on them as required.
+ */
+int
+xfs_ag_init_headers(
+	struct xfs_mount	*mp,
+	struct aghdr_init_data	*id)
+
+{
+	struct xfs_aghdr_grow_data aghdr_data[] = {
+	{ /* SB */
+		.daddr = XFS_AG_DADDR(mp, id->agno, XFS_SB_DADDR),
+		.numblks = XFS_FSS_TO_BB(mp, 1),
+		.ops = &xfs_sb_buf_ops,
+		.work = &xfs_sbblock_init,
+		.need_init = true
+	},
+	{ /* AGF */
+		.daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGF_DADDR(mp)),
+		.numblks = XFS_FSS_TO_BB(mp, 1),
+		.ops = &xfs_agf_buf_ops,
+		.work = &xfs_agfblock_init,
+		.need_init = true
+	},
+	{ /* AGFL */
+		.daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGFL_DADDR(mp)),
+		.numblks = XFS_FSS_TO_BB(mp, 1),
+		.ops = &xfs_agfl_buf_ops,
+		.work = &xfs_agflblock_init,
+		.need_init = true
+	},
+	{ /* AGI */
+		.daddr = XFS_AG_DADDR(mp, id->agno, XFS_AGI_DADDR(mp)),
+		.numblks = XFS_FSS_TO_BB(mp, 1),
+		.ops = &xfs_agi_buf_ops,
+		.work = &xfs_agiblock_init,
+		.need_init = true
+	},
+	{ /* BNO root block */
+		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_BNO_BLOCK(mp)),
+		.numblks = BTOBB(mp->m_sb.sb_blocksize),
+		.ops = &xfs_allocbt_buf_ops,
+		.work = &xfs_bnoroot_init,
+		.need_init = true
+	},
+	{ /* CNT root block */
+		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)),
+		.numblks = BTOBB(mp->m_sb.sb_blocksize),
+		.ops = &xfs_allocbt_buf_ops,
+		.work = &xfs_cntroot_init,
+		.need_init = true
+	},
+	{ /* INO root block */
+		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_IBT_BLOCK(mp)),
+		.numblks = BTOBB(mp->m_sb.sb_blocksize),
+		.ops = &xfs_inobt_buf_ops,
+		.work = &xfs_btroot_init,
+		.type = XFS_BTNUM_INO,
+		.need_init = true
+	},
+	{ /* FINO root block */
+		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_FIBT_BLOCK(mp)),
+		.numblks = BTOBB(mp->m_sb.sb_blocksize),
+		.ops = &xfs_inobt_buf_ops,
+		.work = &xfs_btroot_init,
+		.type = XFS_BTNUM_FINO,
+		.need_init =  xfs_sb_version_hasfinobt(&mp->m_sb)
+	},
+	{ /* RMAP root block */
+		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)),
+		.numblks = BTOBB(mp->m_sb.sb_blocksize),
+		.ops = &xfs_rmapbt_buf_ops,
+		.work = &xfs_rmaproot_init,
+		.need_init = xfs_sb_version_hasrmapbt(&mp->m_sb)
+	},
+	{ /* REFC root block */
+		.daddr = XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)),
+		.numblks = BTOBB(mp->m_sb.sb_blocksize),
+		.ops = &xfs_refcountbt_buf_ops,
+		.work = &xfs_btroot_init,
+		.type = XFS_BTNUM_REFC,
+		.need_init = xfs_sb_version_hasreflink(&mp->m_sb)
+	},
+	{ /* NULL terminating block */
+		.daddr = XFS_BUF_DADDR_NULL,
+	}
+	};
+	struct  xfs_aghdr_grow_data *dp;
+	int			error = 0;
+
+	/* Account for AG free space in new AG */
+	id->nfree += id->agsize - mp->m_ag_prealloc_blocks;
+	for (dp = &aghdr_data[0]; dp->daddr != XFS_BUF_DADDR_NULL; dp++) {
+		if (!dp->need_init)
+			continue;
+
+		id->daddr = dp->daddr;
+		id->numblks = dp->numblks;
+		id->type = dp->type;
+		error = xfs_ag_init_hdr(mp, id, dp->work, dp->ops);
+		if (error)
+			break;
+	}
+	return error;
+}
+
+/*
+ * Extent the AG indicated by the @id by the length passed in
+ */
+int
+xfs_ag_extend_space(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	struct aghdr_init_data	*id,
+	xfs_extlen_t		len)
+{
+	struct xfs_owner_info	oinfo;
+	struct xfs_buf		*bp;
+	struct xfs_agi		*agi;
+	struct xfs_agf		*agf;
+	int			error;
+
+	/*
+	 * Change the agi length.
+	 */
+	error = xfs_ialloc_read_agi(mp, tp, id->agno, &bp);
+	if (error)
+		return error;
+
+	agi = XFS_BUF_TO_AGI(bp);
+	be32_add_cpu(&agi->agi_length, len);
+	ASSERT(id->agno == mp->m_sb.sb_agcount - 1 ||
+	       be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
+	xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
+
+	/*
+	 * Change agf length.
+	 */
+	error = xfs_alloc_read_agf(mp, tp, id->agno, 0, &bp);
+	if (error)
+		return error;
+
+	agf = XFS_BUF_TO_AGF(bp);
+	be32_add_cpu(&agf->agf_length, len);
+	ASSERT(agf->agf_length == agi->agi_length);
+	xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
+
+	/*
+	 * Free the new space.
+	 *
+	 * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
+	 * this doesn't actually exist in the rmap btree.
+	 */
+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
+	error = xfs_rmap_free(tp, bp, id->agno,
+				be32_to_cpu(agf->agf_length) - len,
+				len, &oinfo);
+	if (error)
+		return error;
+
+	return  xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, id->agno,
+					be32_to_cpu(agf->agf_length) - len),
+				len, &oinfo, XFS_AG_RESV_NONE);
+}
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
new file mode 100644
index 0000000..412702e
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 Red Hat, Inc.
+ * All rights reserved.
+ */
+
+#ifndef __LIBXFS_AG_H
+#define __LIBXFS_AG_H 1
+
+struct xfs_mount;
+struct xfs_trans;
+
+struct aghdr_init_data {
+	/* per ag data */
+	xfs_agblock_t		agno;		/* ag to init */
+	xfs_extlen_t		agsize;		/* new AG size */
+	struct list_head	buffer_list;	/* buffer writeback list */
+	xfs_rfsblock_t		nfree;		/* cumulative new free space */
+
+	/* per header data */
+	xfs_daddr_t		daddr;		/* header location */
+	size_t			numblks;	/* size of header */
+	xfs_btnum_t		type;		/* type of btree root block */
+};
+
+int xfs_ag_init_headers(struct xfs_mount *mp, struct aghdr_init_data *id);
+int xfs_ag_extend_space(struct xfs_mount *mp, struct xfs_trans *tp,
+			struct aghdr_init_data *id, xfs_extlen_t len);
+
+#endif /* __LIBXFS_AG_H */
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 4bcc095..dc9dd38 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -39,6 +39,9 @@
 #include "xfs_buf_item.h"
 #include "xfs_log.h"
 #include "xfs_ag_resv.h"
+#include "xfs_bmap.h"
+
+extern kmem_zone_t	*xfs_bmap_free_item_zone;
 
 struct workqueue_struct *xfs_alloc_wq;
 
@@ -2060,6 +2063,30 @@
 	return true;
 }
 
+int
+xfs_free_agfl_block(
+	struct xfs_trans	*tp,
+	xfs_agnumber_t		agno,
+	xfs_agblock_t		agbno,
+	struct xfs_buf		*agbp,
+	struct xfs_owner_info	*oinfo)
+{
+	int			error;
+	struct xfs_buf		*bp;
+
+	error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
+				   XFS_AG_RESV_AGFL);
+	if (error)
+		return error;
+
+	bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0);
+	if (!bp)
+		return -EFSCORRUPTED;
+	xfs_trans_binval(tp, bp);
+
+	return 0;
+}
+
 /*
  * Check the agfl fields of the agf for inconsistency or corruption. The purpose
  * is to detect an agfl header padding mismatch between current and early v5
@@ -2148,6 +2175,40 @@
 }
 
 /*
+ * Defer an AGFL block free. This is effectively equivalent to
+ * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
+ *
+ * Deferring AGFL frees helps prevent log reservation overruns due to too many
+ * allocation operations in a transaction. AGFL frees are prone to this problem
+ * because for one they are always freed one at a time. Further, an immediate
+ * AGFL block free can cause a btree join and require another block free before
+ * the real allocation can proceed. Deferring the free disconnects freeing up
+ * the AGFL slot from freeing the block.
+ */
+STATIC void
+xfs_defer_agfl_block(
+	struct xfs_mount		*mp,
+	struct xfs_defer_ops		*dfops,
+	xfs_agnumber_t			agno,
+	xfs_fsblock_t			agbno,
+	struct xfs_owner_info		*oinfo)
+{
+	struct xfs_extent_free_item	*new;		/* new element */
+
+	ASSERT(xfs_bmap_free_item_zone != NULL);
+	ASSERT(oinfo != NULL);
+
+	new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
+	new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
+	new->xefi_blockcount = 1;
+	new->xefi_oinfo = *oinfo;
+
+	trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
+
+	xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
+}
+
+/*
  * Decide whether to use this allocation group for this allocation.
  * If so, fix up the btree freelist's size.
  */
@@ -2247,21 +2308,20 @@
 	else
 		xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG);
 	while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) {
-		struct xfs_buf	*bp;
-
 		error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
 		if (error)
 			goto out_agbp_relse;
-		error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
-					   &targs.oinfo, XFS_AG_RESV_AGFL);
-		if (error)
-			goto out_agbp_relse;
-		bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
-		if (!bp) {
-			error = -EFSCORRUPTED;
-			goto out_agbp_relse;
+
+		/* defer agfl frees if dfops is provided */
+		if (tp->t_agfl_dfops) {
+			xfs_defer_agfl_block(mp, tp->t_agfl_dfops, args->agno,
+					     bno, &targs.oinfo);
+		} else {
+			error = xfs_free_agfl_block(tp, args->agno, bno, agbp,
+						    &targs.oinfo);
+			if (error)
+				goto out_agbp_relse;
 		}
-		xfs_trans_binval(tp, bp);
 	}
 
 	targs.tp = tp;
@@ -2949,18 +3009,20 @@
  * after fixing up the freelist.
  */
 int				/* error */
-xfs_free_extent(
+__xfs_free_extent(
 	struct xfs_trans	*tp,	/* transaction pointer */
 	xfs_fsblock_t		bno,	/* starting block number of extent */
 	xfs_extlen_t		len,	/* length of extent */
 	struct xfs_owner_info	*oinfo,	/* extent owner */
-	enum xfs_ag_resv_type	type)	/* block reservation type */
+	enum xfs_ag_resv_type	type,	/* block reservation type */
+	bool			skip_discard)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
 	struct xfs_buf		*agbp;
 	xfs_agnumber_t		agno = XFS_FSB_TO_AGNO(mp, bno);
 	xfs_agblock_t		agbno = XFS_FSB_TO_AGBNO(mp, bno);
 	int			error;
+	unsigned int		busy_flags = 0;
 
 	ASSERT(len != 0);
 	ASSERT(type != XFS_AG_RESV_AGFL);
@@ -2984,7 +3046,9 @@
 	if (error)
 		goto err;
 
-	xfs_extent_busy_insert(tp, agno, agbno, len, 0);
+	if (skip_discard)
+		busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
+	xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags);
 	return 0;
 
 err:
@@ -3116,3 +3180,40 @@
 
 	return xfs_btree_has_record(cur, &low, &high, exists);
 }
+
+/*
+ * Walk all the blocks in the AGFL.  The @walk_fn can return any negative
+ * error code or XFS_BTREE_QUERY_RANGE_ABORT.
+ */
+int
+xfs_agfl_walk(
+	struct xfs_mount	*mp,
+	struct xfs_agf		*agf,
+	struct xfs_buf		*agflbp,
+	xfs_agfl_walk_fn	walk_fn,
+	void			*priv)
+{
+	__be32			*agfl_bno;
+	unsigned int		i;
+	int			error;
+
+	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp);
+	i = be32_to_cpu(agf->agf_flfirst);
+
+	/* Nothing to walk in an empty AGFL. */
+	if (agf->agf_flcount == cpu_to_be32(0))
+		return 0;
+
+	/* Otherwise, walk from first to last, wrapping as needed. */
+	for (;;) {
+		error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
+		if (error)
+			return error;
+		if (i == be32_to_cpu(agf->agf_fllast))
+			break;
+		if (++i == xfs_agfl_size(mp))
+			i = 0;
+	}
+
+	return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index cbf789e..0747adc 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -191,12 +191,24 @@
  * Free an extent.
  */
 int				/* error */
-xfs_free_extent(
+__xfs_free_extent(
 	struct xfs_trans	*tp,	/* transaction pointer */
 	xfs_fsblock_t		bno,	/* starting block number of extent */
 	xfs_extlen_t		len,	/* length of extent */
 	struct xfs_owner_info	*oinfo,	/* extent owner */
-	enum xfs_ag_resv_type	type);	/* block reservation type */
+	enum xfs_ag_resv_type	type,	/* block reservation type */
+	bool			skip_discard);
+
+static inline int
+xfs_free_extent(
+	struct xfs_trans	*tp,
+	xfs_fsblock_t		bno,
+	xfs_extlen_t		len,
+	struct xfs_owner_info	*oinfo,
+	enum xfs_ag_resv_type	type)
+{
+	return __xfs_free_extent(tp, bno, len, oinfo, type, false);
+}
 
 int				/* error */
 xfs_alloc_lookup_le(
@@ -223,6 +235,8 @@
 			xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
 int xfs_alloc_read_agfl(struct xfs_mount *mp, struct xfs_trans *tp,
 			xfs_agnumber_t agno, struct xfs_buf **bpp);
+int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t,
+			struct xfs_buf *, struct xfs_owner_info *);
 int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
 int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno,
 		struct xfs_buf **agbp);
@@ -248,4 +262,9 @@
 int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
 		xfs_extlen_t len, bool *exist);
 
+typedef int (*xfs_agfl_walk_fn)(struct xfs_mount *mp, xfs_agblock_t bno,
+		void *priv);
+int xfs_agfl_walk(struct xfs_mount *mp, struct xfs_agf *agf,
+		struct xfs_buf *agflbp, xfs_agfl_walk_fn walk_fn, void *priv);
+
 #endif	/* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index b451649..18aec7a 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -547,3 +547,12 @@
 		return blocklen / sizeof(xfs_alloc_rec_t);
 	return blocklen / (sizeof(xfs_alloc_key_t) + sizeof(xfs_alloc_ptr_t));
 }
+
+/* Calculate the freespace btree size for some records. */
+xfs_extlen_t
+xfs_allocbt_calc_size(
+	struct xfs_mount	*mp,
+	unsigned long long	len)
+{
+	return xfs_btree_calc_size(mp->m_alloc_mnr, len);
+}
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h
index 45e189e..2fd5472 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.h
+++ b/fs/xfs/libxfs/xfs_alloc_btree.h
@@ -61,5 +61,7 @@
 		struct xfs_trans *, struct xfs_buf *,
 		xfs_agnumber_t, xfs_btnum_t);
 extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
+extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp,
+		unsigned long long len);
 
 #endif	/* __XFS_ALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 35a1244..c3d02a6 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -236,7 +236,7 @@
 	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
 	args.total = xfs_attr_calc_size(&args, &local);
 
-	error = xfs_qm_dqattach(dp, 0);
+	error = xfs_qm_dqattach(dp);
 	if (error)
 		return error;
 
@@ -427,7 +427,7 @@
 	 */
 	args.op_flags = XFS_DA_OP_OKNOENT;
 
-	error = xfs_qm_dqattach(dp, 0);
+	error = xfs_qm_dqattach(dp);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 21be186..83a6d3c 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -620,7 +620,7 @@
 		/*
 		 * If the "remote" value is in the cache, remove it.
 		 */
-		bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
+		bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
 		if (bp) {
 			xfs_buf_stale(bp);
 			xfs_buf_relse(bp);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 040eeda..7b0e2b5 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -246,7 +246,7 @@
 	struct xfs_btree_cur	*cur,
 	xfs_fsblock_t		bno)
 {
-	struct xfs_log_item_desc *lidp;
+	struct xfs_log_item	*lip;
 	int			i;
 
 	if (!cur)
@@ -260,9 +260,9 @@
 	}
 
 	/* Chase down all the log items to see if the bp is there */
-	list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
-		struct xfs_buf_log_item	*bip;
-		bip = (struct xfs_buf_log_item *)lidp->lid_item;
+	list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) {
+		struct xfs_buf_log_item	*bip = (struct xfs_buf_log_item *)lip;
+
 		if (bip->bli_item.li_type == XFS_LI_BUF &&
 		    XFS_BUF_ADDR(bip->bli_buf) == bno)
 			return bip->bli_buf;
@@ -312,8 +312,9 @@
 				xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
 					__func__, j, i,
 					(unsigned long long)be64_to_cpu(*thispa));
-				panic("%s: ptrs are equal in node\n",
+				xfs_err(mp, "%s: ptrs are equal in node\n",
 					__func__);
+				xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 			}
 		}
 	}
@@ -483,7 +484,8 @@
 error_norelse:
 	xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
 		__func__, i);
-	panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
+	xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__);
+	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 	return;
 }
 
@@ -542,12 +544,13 @@
  * The list is maintained sorted (by block number).
  */
 void
-xfs_bmap_add_free(
+__xfs_bmap_add_free(
 	struct xfs_mount		*mp,
 	struct xfs_defer_ops		*dfops,
 	xfs_fsblock_t			bno,
 	xfs_filblks_t			len,
-	struct xfs_owner_info		*oinfo)
+	struct xfs_owner_info		*oinfo,
+	bool				skip_discard)
 {
 	struct xfs_extent_free_item	*new;		/* new element */
 #ifdef DEBUG
@@ -574,6 +577,7 @@
 		new->xefi_oinfo = *oinfo;
 	else
 		xfs_rmap_skip_owner_update(&new->xefi_oinfo);
+	new->xefi_skip_discard = skip_discard;
 	trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0,
 			XFS_FSB_TO_AGBNO(mp, bno), len);
 	xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
@@ -2001,10 +2005,13 @@
 		ASSERT(0);
 	}
 
-	/* add reverse mapping */
-	error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
-	if (error)
-		goto done;
+	/* add reverse mapping unless caller opted out */
+	if (!(bma->flags & XFS_BMAPI_NORMAP)) {
+		error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip,
+				whichfork, new);
+		if (error)
+			goto done;
+	}
 
 	/* convert to a btree if necessary */
 	if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
@@ -2668,7 +2675,8 @@
 	struct xfs_bmbt_irec	*new,
 	xfs_fsblock_t		*first,
 	struct xfs_defer_ops	*dfops,
-	int			*logflagsp)
+	int			*logflagsp,
+	int			flags)
 {
 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	struct xfs_mount	*mp = ip->i_mount;
@@ -2845,10 +2853,12 @@
 		break;
 	}
 
-	/* add reverse mapping */
-	error = xfs_rmap_map_extent(mp, dfops, ip, whichfork, new);
-	if (error)
-		goto done;
+	/* add reverse mapping unless caller opted out */
+	if (!(flags & XFS_BMAPI_NORMAP)) {
+		error = xfs_rmap_map_extent(mp, dfops, ip, whichfork, new);
+		if (error)
+			goto done;
+	}
 
 	/* convert to a btree if necessary */
 	if (xfs_bmap_needs_btree(ip, whichfork)) {
@@ -4123,7 +4133,8 @@
 	else
 		error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
 				whichfork, &bma->icur, &bma->cur, &bma->got,
-				bma->firstblock, bma->dfops, &bma->logflags);
+				bma->firstblock, bma->dfops, &bma->logflags,
+				bma->flags);
 
 	bma->logflags |= tmp_logflags;
 	if (error)
@@ -4509,30 +4520,37 @@
 	return error;
 }
 
-static int
+int
 xfs_bmapi_remap(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
 	xfs_fileoff_t		bno,
 	xfs_filblks_t		len,
 	xfs_fsblock_t		startblock,
-	struct xfs_defer_ops	*dfops)
+	struct xfs_defer_ops	*dfops,
+	int			flags)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	struct xfs_ifork	*ifp;
 	struct xfs_btree_cur	*cur = NULL;
 	xfs_fsblock_t		firstblock = NULLFSBLOCK;
 	struct xfs_bmbt_irec	got;
 	struct xfs_iext_cursor	icur;
+	int			whichfork = xfs_bmapi_whichfork(flags);
 	int			logflags = 0, error;
 
+	ifp = XFS_IFORK_PTR(ip, whichfork);
 	ASSERT(len > 0);
 	ASSERT(len <= (xfs_filblks_t)MAXEXTLEN);
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
+			   XFS_BMAPI_NORMAP)));
+	ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
+			(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
 
 	if (unlikely(XFS_TEST_ERROR(
-	    (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
-	     XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
+	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
 	     mp, XFS_ERRTAG_BMAPIFORMAT))) {
 		XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp);
 		return -EFSCORRUPTED;
@@ -4542,7 +4560,7 @@
 		return -EIO;
 
 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
-		error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+		error = xfs_iread_extents(tp, ip, whichfork);
 		if (error)
 			return error;
 	}
@@ -4557,7 +4575,7 @@
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
 	if (ifp->if_flags & XFS_IFBROOT) {
-		cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
+		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
 		cur->bc_private.b.firstblock = firstblock;
 		cur->bc_private.b.dfops = dfops;
 		cur->bc_private.b.flags = 0;
@@ -4566,18 +4584,21 @@
 	got.br_startoff = bno;
 	got.br_startblock = startblock;
 	got.br_blockcount = len;
-	got.br_state = XFS_EXT_NORM;
+	if (flags & XFS_BMAPI_PREALLOC)
+		got.br_state = XFS_EXT_UNWRITTEN;
+	else
+		got.br_state = XFS_EXT_NORM;
 
-	error = xfs_bmap_add_extent_hole_real(tp, ip, XFS_DATA_FORK, &icur,
-			&cur, &got, &firstblock, dfops, &logflags);
+	error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur,
+			&cur, &got, &firstblock, dfops, &logflags, flags);
 	if (error)
 		goto error0;
 
-	if (xfs_bmap_wants_extents(ip, XFS_DATA_FORK)) {
+	if (xfs_bmap_wants_extents(ip, whichfork)) {
 		int		tmp_logflags = 0;
 
 		error = xfs_bmap_btree_to_extents(tp, ip, cur,
-			&tmp_logflags, XFS_DATA_FORK);
+			&tmp_logflags, whichfork);
 		logflags |= tmp_logflags;
 	}
 
@@ -5104,9 +5125,12 @@
 			error = xfs_refcount_decrease_extent(mp, dfops, del);
 			if (error)
 				goto done;
-		} else
-			xfs_bmap_add_free(mp, dfops, del->br_startblock,
-					del->br_blockcount, NULL);
+		} else {
+			__xfs_bmap_add_free(mp, dfops, del->br_startblock,
+					del->br_blockcount, NULL,
+					(bflags & XFS_BMAPI_NODISCARD) ||
+					del->br_state == XFS_EXT_UNWRITTEN);
+		}
 	}
 
 	/*
@@ -6148,7 +6172,7 @@
 	switch (type) {
 	case XFS_BMAP_MAP:
 		error = xfs_bmapi_remap(tp, ip, startoff, *blockcount,
-				startblock, dfops);
+				startblock, dfops, 0);
 		*blockcount = 0;
 		break;
 	case XFS_BMAP_UNMAP:
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 2b766b3..2c233f9 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -68,6 +68,7 @@
 	xfs_extlen_t		xefi_blockcount;/* number of blocks in extent */
 	struct list_head	xefi_list;
 	struct xfs_owner_info	xefi_oinfo;	/* extent owner */
+	bool			xefi_skip_discard;
 };
 
 #define	XFS_BMAP_MAX_NMAP	4
@@ -116,6 +117,12 @@
 /* Only convert unwritten extents, don't allocate new blocks */
 #define XFS_BMAPI_CONVERT_ONLY	0x800
 
+/* Skip online discard of freed extents */
+#define XFS_BMAPI_NODISCARD	0x1000
+
+/* Do not update the rmap btree.  Used for reconstructing bmbt from rmapbt. */
+#define XFS_BMAPI_NORMAP	0x2000
+
 #define XFS_BMAPI_FLAGS \
 	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
 	{ XFS_BMAPI_METADATA,	"METADATA" }, \
@@ -128,7 +135,9 @@
 	{ XFS_BMAPI_REMAP,	"REMAP" }, \
 	{ XFS_BMAPI_COWFORK,	"COWFORK" }, \
 	{ XFS_BMAPI_DELALLOC,	"DELALLOC" }, \
-	{ XFS_BMAPI_CONVERT_ONLY, "CONVERT_ONLY" }
+	{ XFS_BMAPI_CONVERT_ONLY, "CONVERT_ONLY" }, \
+	{ XFS_BMAPI_NODISCARD,	"NODISCARD" }, \
+	{ XFS_BMAPI_NORMAP,	"NORMAP" }
 
 
 static inline int xfs_bmapi_aflag(int w)
@@ -192,9 +201,9 @@
 void	xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
 int	xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
 void	xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
-void	xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
+void	__xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
 			  xfs_fsblock_t bno, xfs_filblks_t len,
-			  struct xfs_owner_info *oinfo);
+			  struct xfs_owner_info *oinfo, bool skip_discard);
 void	xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
 int	xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
@@ -240,6 +249,17 @@
 		struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
 		int eof);
 
+static inline void
+xfs_bmap_add_free(
+	struct xfs_mount		*mp,
+	struct xfs_defer_ops		*dfops,
+	xfs_fsblock_t			bno,
+	xfs_filblks_t			len,
+	struct xfs_owner_info		*oinfo)
+{
+	__xfs_bmap_add_free(mp, dfops, bno, len, oinfo, false);
+}
+
 enum xfs_bmap_intent_type {
 	XFS_BMAP_MAP = 1,
 	XFS_BMAP_UNMAP,
@@ -277,4 +297,8 @@
 xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork,
 		struct xfs_bmbt_irec *irec);
 
+int	xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip,
+		xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock,
+		struct xfs_defer_ops *dfops, int flags);
+
 #endif	/* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index d89d06b..ac9d4ae 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -660,3 +660,12 @@
 	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
 	return error;
 }
+
+/* Calculate the bmap btree size for some records. */
+unsigned long long
+xfs_bmbt_calc_size(
+	struct xfs_mount	*mp,
+	unsigned long long	len)
+{
+	return xfs_btree_calc_size(mp->m_bmap_dmnr, len);
+}
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index e450574..fb3cd2d 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -118,4 +118,7 @@
 extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
 		struct xfs_trans *, struct xfs_inode *, int);
 
+extern unsigned long long xfs_bmbt_calc_size(struct xfs_mount *mp,
+		unsigned long long len);
+
 #endif	/* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index ac7d664..c825c81 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4836,14 +4836,14 @@
  * Calculate the number of blocks needed to store a given number of records
  * in a short-format (per-AG metadata) btree.
  */
-xfs_extlen_t
+unsigned long long
 xfs_btree_calc_size(
 	uint			*limits,
 	unsigned long long	len)
 {
 	int			level;
 	int			maxrecs;
-	xfs_extlen_t		rval;
+	unsigned long long	rval;
 
 	maxrecs = limits[0];
 	for (level = 0, rval = 0; len > 1; level++) {
@@ -4919,3 +4919,24 @@
 	*exists = false;
 	return error;
 }
+
+/* Are there more records in this btree? */
+bool
+xfs_btree_has_more_records(
+	struct xfs_btree_cur	*cur)
+{
+	struct xfs_btree_block	*block;
+	struct xfs_buf		*bp;
+
+	block = xfs_btree_get_block(cur, 0, &bp);
+
+	/* There are still records in this block. */
+	if (cur->bc_ptrs[0] < xfs_btree_get_numrecs(block))
+		return true;
+
+	/* There are more record blocks. */
+	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+		return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
+	else
+		return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
+}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 9227159..d7911efe 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -482,7 +482,7 @@
 		unsigned int max_recs);
 
 uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len);
-xfs_extlen_t xfs_btree_calc_size(uint *limits, unsigned long long len);
+unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len);
 
 /* return codes */
 #define XFS_BTREE_QUERY_RANGE_CONTINUE	0	/* keep iterating */
@@ -528,5 +528,6 @@
 		union xfs_btree_key *key);
 int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low,
 		union xfs_btree_irec *high, bool *exists);
+bool xfs_btree_has_more_records(struct xfs_btree_cur *cur);
 
 #endif	/* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 087fea0..3daf175 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -220,7 +220,7 @@
 {
 	struct xfs_defer_pending	*dfp;
 
-	trace_xfs_defer_trans_abort(tp->t_mountp, dop);
+	trace_xfs_defer_trans_abort(tp->t_mountp, dop, _RET_IP_);
 
 	/* Abort intent items that don't have a done item. */
 	list_for_each_entry(dfp, &dop->dop_pending, dfp_list) {
@@ -253,7 +253,7 @@
 	for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
 		xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
 
-	trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
+	trace_xfs_defer_trans_roll((*tp)->t_mountp, dop, _RET_IP_);
 
 	/* Roll the transaction. */
 	error = xfs_trans_roll(tp);
@@ -352,10 +352,21 @@
 	void				*state;
 	int				error = 0;
 	void				(*cleanup_fn)(struct xfs_trans *, void *, int);
+	struct xfs_defer_ops		*orig_dop;
 
 	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
 
-	trace_xfs_defer_finish((*tp)->t_mountp, dop);
+	trace_xfs_defer_finish((*tp)->t_mountp, dop, _RET_IP_);
+
+	/*
+	 * Attach dfops to the transaction during deferred ops processing. This
+	 * explicitly causes calls into the allocator to defer AGFL block frees.
+	 * Note that this code can go away once all dfops users attach to the
+	 * associated tp.
+	 */
+	ASSERT(!(*tp)->t_agfl_dfops || ((*tp)->t_agfl_dfops == dop));
+	orig_dop = (*tp)->t_agfl_dfops;
+	(*tp)->t_agfl_dfops = dop;
 
 	/* Until we run out of pending work to finish... */
 	while (xfs_defer_has_unfinished_work(dop)) {
@@ -428,10 +439,11 @@
 	}
 
 out:
+	(*tp)->t_agfl_dfops = orig_dop;
 	if (error)
 		trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error);
 	else
-		trace_xfs_defer_finish_done((*tp)->t_mountp, dop);
+		trace_xfs_defer_finish_done((*tp)->t_mountp, dop, _RET_IP_);
 	return error;
 }
 
@@ -447,7 +459,7 @@
 	struct list_head		*pwi;
 	struct list_head		*n;
 
-	trace_xfs_defer_cancel(NULL, dop);
+	trace_xfs_defer_cancel(NULL, dop, _RET_IP_);
 
 	/*
 	 * Free the pending items.  Caller should already have arranged
@@ -532,5 +544,5 @@
 	*fbp = NULLFSBLOCK;
 	INIT_LIST_HEAD(&dop->dop_intake);
 	INIT_LIST_HEAD(&dop->dop_pending);
-	trace_xfs_defer_init(NULL, dop);
+	trace_xfs_defer_init(NULL, dop, _RET_IP_);
 }
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 045beac..e70725b 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -55,6 +55,7 @@
 	XFS_DEFER_OPS_TYPE_REFCOUNT,
 	XFS_DEFER_OPS_TYPE_RMAP,
 	XFS_DEFER_OPS_TYPE_FREE,
+	XFS_DEFER_OPS_TYPE_AGFL_FREE,
 	XFS_DEFER_OPS_TYPE_MAX,
 };
 
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 8b7a6c3..cce520b 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -41,14 +41,18 @@
 
 /*
  * Do some primitive error checking on ondisk dquot data structures.
+ *
+ * The xfs_dqblk structure /contains/ the xfs_disk_dquot structure;
+ * we verify them separately because at some points we have only the
+ * smaller xfs_disk_dquot structure available.
  */
+
 xfs_failaddr_t
 xfs_dquot_verify(
 	struct xfs_mount *mp,
 	xfs_disk_dquot_t *ddq,
 	xfs_dqid_t	 id,
-	uint		 type,	  /* used only when IO_dorepair is true */
-	uint		 flags)
+	uint		 type)	  /* used only during quotacheck */
 {
 	/*
 	 * We can encounter an uninitialized dquot buffer for 2 reasons:
@@ -70,6 +74,8 @@
 	if (ddq->d_version != XFS_DQUOT_VERSION)
 		return __this_address;
 
+	if (type && ddq->d_flags != type)
+		return __this_address;
 	if (ddq->d_flags != XFS_DQ_USER &&
 	    ddq->d_flags != XFS_DQ_PROJ &&
 	    ddq->d_flags != XFS_DQ_GROUP)
@@ -99,33 +105,44 @@
 	return NULL;
 }
 
+xfs_failaddr_t
+xfs_dqblk_verify(
+	struct xfs_mount	*mp,
+	struct xfs_dqblk	*dqb,
+	xfs_dqid_t	 	id,
+	uint		 	type)	/* used only during quotacheck */
+{
+	if (xfs_sb_version_hascrc(&mp->m_sb) &&
+	    !uuid_equal(&dqb->dd_uuid, &mp->m_sb.sb_meta_uuid))
+		return __this_address;
+
+	return xfs_dquot_verify(mp, &dqb->dd_diskdq, id, type);
+}
+
 /*
  * Do some primitive error checking on ondisk dquot data structures.
  */
 int
-xfs_dquot_repair(
+xfs_dqblk_repair(
 	struct xfs_mount	*mp,
-	struct xfs_disk_dquot	*ddq,
+	struct xfs_dqblk	*dqb,
 	xfs_dqid_t		id,
 	uint			type)
 {
-	struct xfs_dqblk	*d = (struct xfs_dqblk *)ddq;
-
-
 	/*
 	 * Typically, a repair is only requested by quotacheck.
 	 */
 	ASSERT(id != -1);
-	memset(d, 0, sizeof(xfs_dqblk_t));
+	memset(dqb, 0, sizeof(xfs_dqblk_t));
 
-	d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
-	d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
-	d->dd_diskdq.d_flags = type;
-	d->dd_diskdq.d_id = cpu_to_be32(id);
+	dqb->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
+	dqb->dd_diskdq.d_version = XFS_DQUOT_VERSION;
+	dqb->dd_diskdq.d_flags = type;
+	dqb->dd_diskdq.d_id = cpu_to_be32(id);
 
 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
-		uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
-		xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
+		uuid_copy(&dqb->dd_uuid, &mp->m_sb.sb_meta_uuid);
+		xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
 				 XFS_DQUOT_CRC_OFF);
 	}
 
@@ -135,7 +152,8 @@
 STATIC bool
 xfs_dquot_buf_verify_crc(
 	struct xfs_mount	*mp,
-	struct xfs_buf		*bp)
+	struct xfs_buf		*bp,
+	bool			readahead)
 {
 	struct xfs_dqblk	*d = (struct xfs_dqblk *)bp->b_addr;
 	int			ndquots;
@@ -156,10 +174,12 @@
 
 	for (i = 0; i < ndquots; i++, d++) {
 		if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
-				 XFS_DQUOT_CRC_OFF))
+				 XFS_DQUOT_CRC_OFF)) {
+			if (!readahead)
+				xfs_buf_verifier_error(bp, -EFSBADCRC, __func__,
+					d, sizeof(*d), __this_address);
 			return false;
-		if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_meta_uuid))
-			return false;
+		}
 	}
 	return true;
 }
@@ -167,9 +187,10 @@
 STATIC xfs_failaddr_t
 xfs_dquot_buf_verify(
 	struct xfs_mount	*mp,
-	struct xfs_buf		*bp)
+	struct xfs_buf		*bp,
+	bool			readahead)
 {
-	struct xfs_dqblk	*d = (struct xfs_dqblk *)bp->b_addr;
+	struct xfs_dqblk	*dqb = bp->b_addr;
 	xfs_failaddr_t		fa;
 	xfs_dqid_t		id = 0;
 	int			ndquots;
@@ -195,14 +216,19 @@
 	for (i = 0; i < ndquots; i++) {
 		struct xfs_disk_dquot	*ddq;
 
-		ddq = &d[i].dd_diskdq;
+		ddq = &dqb[i].dd_diskdq;
 
 		if (i == 0)
 			id = be32_to_cpu(ddq->d_id);
 
-		fa = xfs_dquot_verify(mp, ddq, id + i, 0, 0);
-		if (fa)
+		fa = xfs_dqblk_verify(mp, &dqb[i], id + i, 0);
+		if (fa) {
+			if (!readahead)
+				xfs_buf_verifier_error(bp, -EFSCORRUPTED,
+					__func__, &dqb[i],
+					sizeof(struct xfs_dqblk), fa);
 			return fa;
+		}
 	}
 
 	return NULL;
@@ -214,7 +240,7 @@
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 
-	return xfs_dquot_buf_verify(mp, bp);
+	return xfs_dquot_buf_verify(mp, bp, false);
 }
 
 static void
@@ -222,15 +248,10 @@
 	struct xfs_buf		*bp)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
-	xfs_failaddr_t		fa;
 
-	if (!xfs_dquot_buf_verify_crc(mp, bp))
-		xfs_verifier_error(bp, -EFSBADCRC, __this_address);
-	else {
-		fa = xfs_dquot_buf_verify(mp, bp);
-		if (fa)
-			xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
-	}
+	if (!xfs_dquot_buf_verify_crc(mp, bp, false))
+		return;
+	xfs_dquot_buf_verify(mp, bp, false);
 }
 
 /*
@@ -245,8 +266,8 @@
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
 
-	if (!xfs_dquot_buf_verify_crc(mp, bp) ||
-	    xfs_dquot_buf_verify(mp, bp) != NULL) {
+	if (!xfs_dquot_buf_verify_crc(mp, bp, true) ||
+	    xfs_dquot_buf_verify(mp, bp, true) != NULL) {
 		xfs_buf_ioerror(bp, -EIO);
 		bp->b_flags &= ~XBF_DONE;
 	}
@@ -262,11 +283,8 @@
 	struct xfs_buf		*bp)
 {
 	struct xfs_mount	*mp = bp->b_target->bt_mount;
-	xfs_failaddr_t		fa;
 
-	fa = xfs_dquot_buf_verify(mp, bp);
-	if (fa)
-		xfs_verifier_error(bp, -EFSCORRUPTED, __this_address);
+	xfs_dquot_buf_verify(mp, bp, false);
 }
 
 const struct xfs_buf_ops xfs_dquot_buf_ops = {
diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
index bc1789d..d47b916 100644
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -65,7 +65,8 @@
 #define XFS_ERRTAG_LOG_BAD_CRC				29
 #define XFS_ERRTAG_LOG_ITEM_PIN				30
 #define XFS_ERRTAG_BUF_LRU_REF				31
-#define XFS_ERRTAG_MAX					32
+#define XFS_ERRTAG_FORCE_SCRUB_REPAIR			32
+#define XFS_ERRTAG_MAX					33
 
 /*
  * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -102,5 +103,6 @@
 #define XFS_RANDOM_LOG_BAD_CRC				1
 #define XFS_RANDOM_LOG_ITEM_PIN				1
 #define XFS_RANDOM_BUF_LRU_REF				2
+#define XFS_RANDOM_FORCE_SCRUB_REPAIR			1
 
 #endif /* __XFS_ERRORTAG_H_ */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 42956d8..c1cb29a 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -98,6 +98,9 @@
 	 XFS_SB_VERSION2_PROJID32BIT	| \
 	 XFS_SB_VERSION2_FTYPE)
 
+/* Maximum size of the xfs filesystem label, no terminating NULL */
+#define XFSLABEL_MAX			12
+
 /*
  * Superblock - in core version.  Must match the ondisk version below.
  * Must be padded to 64 bit alignment.
@@ -122,7 +125,7 @@
 	uint16_t	sb_sectsize;	/* volume sector size, bytes */
 	uint16_t	sb_inodesize;	/* inode size, bytes */
 	uint16_t	sb_inopblock;	/* inodes per block */
-	char		sb_fname[12];	/* file system name */
+	char		sb_fname[XFSLABEL_MAX]; /* file system name */
 	uint8_t		sb_blocklog;	/* log2 of sb_blocksize */
 	uint8_t		sb_sectlog;	/* log2 of sb_sectsize */
 	uint8_t		sb_inodelog;	/* log2 of sb_inodesize */
@@ -213,7 +216,7 @@
 	__be16		sb_sectsize;	/* volume sector size, bytes */
 	__be16		sb_inodesize;	/* inode size, bytes */
 	__be16		sb_inopblock;	/* inodes per block */
-	char		sb_fname[12];	/* file system name */
+	char		sb_fname[XFSLABEL_MAX]; /* file system name */
 	__u8		sb_blocklog;	/* log2 of sb_blocksize */
 	__u8		sb_sectlog;	/* log2 of sb_sectsize */
 	__u8		sb_inodelog;	/* log2 of sb_inodesize */
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index faf1a4e..dddc75e 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -542,13 +542,20 @@
 /* o: Metadata object looked funny but isn't corrupt. */
 #define XFS_SCRUB_OFLAG_WARNING		(1 << 6)
 
+/*
+ * o: IFLAG_REPAIR was set but metadata object did not need fixing or
+ *    optimization and has therefore not been altered.
+ */
+#define XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED (1 << 7)
+
 #define XFS_SCRUB_FLAGS_IN	(XFS_SCRUB_IFLAG_REPAIR)
 #define XFS_SCRUB_FLAGS_OUT	(XFS_SCRUB_OFLAG_CORRUPT | \
 				 XFS_SCRUB_OFLAG_PREEN | \
 				 XFS_SCRUB_OFLAG_XFAIL | \
 				 XFS_SCRUB_OFLAG_XCORRUPT | \
 				 XFS_SCRUB_OFLAG_INCOMPLETE | \
-				 XFS_SCRUB_OFLAG_WARNING)
+				 XFS_SCRUB_OFLAG_WARNING | \
+				 XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED)
 #define XFS_SCRUB_FLAGS_ALL	(XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
 
 /*
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index de627fa..4ca4ff7 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -148,7 +148,7 @@
 /*
  * Insert a single inobt record. Cursor must already point to desired location.
  */
-STATIC int
+int
 xfs_inobt_insert_rec(
 	struct xfs_btree_cur	*cur,
 	uint16_t		holemask,
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index c5402bb..77fffce 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -176,6 +176,9 @@
 		xfs_agino_t high, bool *exists);
 int xfs_ialloc_count_inodes(struct xfs_btree_cur *cur, xfs_agino_t *count,
 		xfs_agino_t *freecount);
+int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
+		uint8_t count, int32_t freecount, xfs_inofree_t free,
+		int *stat);
 
 int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
 void xfs_ialloc_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 367e9a0..b04c555 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -296,7 +296,7 @@
 	case cpu_to_be32(XFS_FIBT_MAGIC):
 		break;
 	default:
-		return NULL;
+		return __this_address;
 	}
 
 	/* level verification */
@@ -608,3 +608,12 @@
 	*used += tree_len;
 	return 0;
 }
+
+/* Calculate the inobt btree size for some records. */
+xfs_extlen_t
+xfs_iallocbt_calc_size(
+	struct xfs_mount	*mp,
+	unsigned long long	len)
+{
+	return xfs_btree_calc_size(mp->m_inobt_mnr, len);
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index aa81e2e..4acdd54 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -74,5 +74,7 @@
 
 int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno,
 		xfs_extlen_t *ask, xfs_extlen_t *used);
+extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp,
+		unsigned long long len);
 
 #endif	/* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index bb1b13a..d4af280 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -107,14 +107,12 @@
  * to a single function. None of these XFS_QMOPT_* flags are meant to have
  * persistent values (ie. their values can and will change between versions)
  */
-#define XFS_QMOPT_DQALLOC	0x0000002 /* alloc dquot ondisk if needed */
 #define XFS_QMOPT_UQUOTA	0x0000004 /* user dquot requested */
 #define XFS_QMOPT_PQUOTA	0x0000008 /* project dquot requested */
 #define XFS_QMOPT_FORCE_RES	0x0000010 /* ignore quota limits */
 #define XFS_QMOPT_SBVERSION	0x0000040 /* change superblock version num */
 #define XFS_QMOPT_GQUOTA	0x0002000 /* group dquot requested */
 #define XFS_QMOPT_ENOSPC	0x0004000 /* enospc instead of edquot (prj) */
-#define XFS_QMOPT_DQNEXT	0x0008000 /* return next dquot >= this ID */
 
 /*
  * flags to xfs_trans_mod_dquot to indicate which field needs to be
@@ -152,10 +150,11 @@
 #define XFS_QMOPT_RESBLK_MASK	(XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
 
 extern xfs_failaddr_t xfs_dquot_verify(struct xfs_mount *mp,
-		struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type,
-		uint flags);
+		struct xfs_disk_dquot *ddq, xfs_dqid_t id, uint type);
+extern xfs_failaddr_t xfs_dqblk_verify(struct xfs_mount *mp,
+		struct xfs_dqblk *dqb, xfs_dqid_t id, uint type);
 extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
-extern int xfs_dquot_repair(struct xfs_mount *mp, struct xfs_disk_dquot *ddq,
+extern int xfs_dqblk_repair(struct xfs_mount *mp, struct xfs_dqblk *dqb,
 		xfs_dqid_t id, uint type);
 
 #endif	/* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 560e284..418d532 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -88,8 +88,25 @@
 	return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
 }
 
+/*
+ * Look up the first record equal to [bno, len] in the btree
+ * given by cur.
+ */
+int
+xfs_refcount_lookup_eq(
+	struct xfs_btree_cur	*cur,
+	xfs_agblock_t		bno,
+	int			*stat)
+{
+	trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno,
+			XFS_LOOKUP_LE);
+	cur->bc_rec.rc.rc_startblock = bno;
+	cur->bc_rec.rc.rc_blockcount = 0;
+	return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+}
+
 /* Convert on-disk record to in-core format. */
-static inline void
+void
 xfs_refcount_btrec_to_irec(
 	union xfs_btree_rec		*rec,
 	struct xfs_refcount_irec	*irec)
@@ -149,7 +166,7 @@
  * by [bno, len, refcount].
  * This either works (return 0) or gets an EFSCORRUPTED error.
  */
-STATIC int
+int
 xfs_refcount_insert(
 	struct xfs_btree_cur		*cur,
 	struct xfs_refcount_irec	*irec,
@@ -162,7 +179,10 @@
 	cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
 	cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
 	error = xfs_btree_insert(cur, i);
+	if (error)
+		goto out_error;
 	XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error);
+
 out_error:
 	if (error)
 		trace_xfs_refcount_insert_error(cur->bc_mp,
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 2a731ac..a92ad90 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -24,6 +24,8 @@
 		xfs_agblock_t bno, int *stat);
 extern int xfs_refcount_lookup_ge(struct xfs_btree_cur *cur,
 		xfs_agblock_t bno, int *stat);
+extern int xfs_refcount_lookup_eq(struct xfs_btree_cur *cur,
+		xfs_agblock_t bno, int *stat);
 extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur,
 		struct xfs_refcount_irec *irec, int *stat);
 
@@ -85,5 +87,10 @@
 
 extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
 		xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
+union xfs_btree_rec;
+extern void xfs_refcount_btrec_to_irec(union xfs_btree_rec *rec,
+		struct xfs_refcount_irec *irec);
+extern int xfs_refcount_insert(struct xfs_btree_cur *cur,
+		struct xfs_refcount_irec *irec, int *stat);
 
 #endif	/* __XFS_REFCOUNT_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index fba8d27..c0644f1 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -1374,6 +1374,8 @@
 	 */
 	error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags,
 			&PREV, &i);
+	if (error)
+		goto done;
 	XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
 
 	ASSERT(PREV.rm_offset <= offset);
@@ -2030,6 +2032,34 @@
 	return error;
 }
 
+/* Insert a raw rmap into the rmapbt. */
+int
+xfs_rmap_map_raw(
+	struct xfs_btree_cur	*cur,
+	struct xfs_rmap_irec	*rmap)
+{
+	struct xfs_owner_info	oinfo;
+
+	oinfo.oi_owner = rmap->rm_owner;
+	oinfo.oi_offset = rmap->rm_offset;
+	oinfo.oi_flags = 0;
+	if (rmap->rm_flags & XFS_RMAP_ATTR_FORK)
+		oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
+	if (rmap->rm_flags & XFS_RMAP_BMBT_BLOCK)
+		oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
+
+	if (rmap->rm_flags || XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
+		return xfs_rmap_map(cur, rmap->rm_startblock,
+				rmap->rm_blockcount,
+				rmap->rm_flags & XFS_RMAP_UNWRITTEN,
+				&oinfo);
+
+	return xfs_rmap_map_shared(cur, rmap->rm_startblock,
+			rmap->rm_blockcount,
+			rmap->rm_flags & XFS_RMAP_UNWRITTEN,
+			&oinfo);
+}
+
 struct xfs_rmap_query_range_info {
 	xfs_rmap_query_range_fn	fn;
 	void				*priv;
@@ -2453,3 +2483,56 @@
 		     irec.rm_startblock + irec.rm_blockcount >= bno + len);
 	return 0;
 }
+
+struct xfs_rmap_key_state {
+	uint64_t			owner;
+	uint64_t			offset;
+	unsigned int			flags;
+	bool				has_rmap;
+};
+
+/* For each rmap given, figure out if it doesn't match the key we want. */
+STATIC int
+xfs_rmap_has_other_keys_helper(
+	struct xfs_btree_cur		*cur,
+	struct xfs_rmap_irec		*rec,
+	void				*priv)
+{
+	struct xfs_rmap_key_state	*rks = priv;
+
+	if (rks->owner == rec->rm_owner && rks->offset == rec->rm_offset &&
+	    ((rks->flags & rec->rm_flags) & XFS_RMAP_KEY_FLAGS) == rks->flags)
+		return 0;
+	rks->has_rmap = true;
+	return XFS_BTREE_QUERY_RANGE_ABORT;
+}
+
+/*
+ * Given an extent and some owner info, can we find records overlapping
+ * the extent whose owner info does not match the given owner?
+ */
+int
+xfs_rmap_has_other_keys(
+	struct xfs_btree_cur		*cur,
+	xfs_agblock_t			bno,
+	xfs_extlen_t			len,
+	struct xfs_owner_info		*oinfo,
+	bool				*has_rmap)
+{
+	struct xfs_rmap_irec		low = {0};
+	struct xfs_rmap_irec		high;
+	struct xfs_rmap_key_state	rks;
+	int				error;
+
+	xfs_owner_info_unpack(oinfo, &rks.owner, &rks.offset, &rks.flags);
+	rks.has_rmap = false;
+
+	low.rm_startblock = bno;
+	memset(&high, 0xFF, sizeof(high));
+	high.rm_startblock = bno + len - 1;
+
+	error = xfs_rmap_query_range(cur, &low, &high,
+			xfs_rmap_has_other_keys_helper, &rks);
+	*has_rmap = rks.has_rmap;
+	return error;
+}
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 380e53b..43e506f 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -238,5 +238,9 @@
 int xfs_rmap_record_exists(struct xfs_btree_cur *cur, xfs_agblock_t bno,
 		xfs_extlen_t len, struct xfs_owner_info *oinfo,
 		bool *has_rmap);
+int xfs_rmap_has_other_keys(struct xfs_btree_cur *cur, xfs_agblock_t bno,
+		xfs_extlen_t len, struct xfs_owner_info *oinfo,
+		bool *has_rmap);
+int xfs_rmap_map_raw(struct xfs_btree_cur *cur, struct xfs_rmap_irec *rmap);
 
 #endif	/* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 106be2d..369eeb7 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -90,6 +90,9 @@
 	if (error)
 		return error;
 
+	if (nmap == 0 || !xfs_bmap_is_real_extent(&map))
+		return -EFSCORRUPTED;
+
 	ASSERT(map.br_startblock != NULLFSBLOCK);
 	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
 				   XFS_FSB_TO_DADDR(mp, map.br_startblock),
@@ -1033,14 +1036,17 @@
 	int				is_free;
 	int				error = 0;
 
-	if (low_rec->ar_startblock > high_rec->ar_startblock)
+	if (low_rec->ar_startext > high_rec->ar_startext)
 		return -EINVAL;
-	else if (low_rec->ar_startblock == high_rec->ar_startblock)
+	if (low_rec->ar_startext >= mp->m_sb.sb_rextents ||
+	    low_rec->ar_startext == high_rec->ar_startext)
 		return 0;
+	if (high_rec->ar_startext >= mp->m_sb.sb_rextents)
+		high_rec->ar_startext = mp->m_sb.sb_rextents - 1;
 
 	/* Iterate the bitmap, looking for discrepancies. */
-	rtstart = low_rec->ar_startblock;
-	rem = high_rec->ar_startblock - rtstart;
+	rtstart = low_rec->ar_startext;
+	rem = high_rec->ar_startext - rtstart;
 	while (rem) {
 		/* Is the first block free? */
 		error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend,
@@ -1050,13 +1056,13 @@
 
 		/* How long does the extent go for? */
 		error = xfs_rtfind_forw(mp, tp, rtstart,
-				high_rec->ar_startblock - 1, &rtend);
+				high_rec->ar_startext - 1, &rtend);
 		if (error)
 			break;
 
 		if (is_free) {
-			rec.ar_startblock = rtstart;
-			rec.ar_blockcount = rtend - rtstart + 1;
+			rec.ar_startext = rtstart;
+			rec.ar_extcount = rtend - rtstart + 1;
 
 			error = fn(tp, &rec, priv);
 			if (error)
@@ -1079,9 +1085,9 @@
 {
 	struct xfs_rtalloc_rec		keys[2];
 
-	keys[0].ar_startblock = 0;
-	keys[1].ar_startblock = tp->t_mountp->m_sb.sb_rblocks;
-	keys[0].ar_blockcount = keys[1].ar_blockcount = 0;
+	keys[0].ar_startext = 0;
+	keys[1].ar_startext = tp->t_mountp->m_sb.sb_rextents - 1;
+	keys[0].ar_extcount = keys[1].ar_extcount = 0;
 
 	return xfs_rtalloc_query_range(tp, &keys[0], &keys[1], fn, priv);
 }
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index d9b94bd..d485e14 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -888,6 +888,109 @@
 	return xfs_trans_commit(tp);
 }
 
+/*
+ * Update all the secondary superblocks to match the new state of the primary.
+ * Because we are completely overwriting all the existing fields in the
+ * secondary superblock buffers, there is no need to read them in from disk.
+ * Just get a new buffer, stamp it and write it.
+ *
+ * The sb buffers need to be cached here so that we serialise against other
+ * operations that access the secondary superblocks, but we don't want to keep
+ * them in memory once it is written so we mark it as a one-shot buffer.
+ */
+int
+xfs_update_secondary_sbs(
+	struct xfs_mount	*mp)
+{
+	xfs_agnumber_t		agno;
+	int			saved_error = 0;
+	int			error = 0;
+	LIST_HEAD		(buffer_list);
+
+	/* update secondary superblocks. */
+	for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) {
+		struct xfs_buf		*bp;
+
+		bp = xfs_buf_get(mp->m_ddev_targp,
+				 XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
+				 XFS_FSS_TO_BB(mp, 1), 0);
+		/*
+		 * If we get an error reading or writing alternate superblocks,
+		 * continue.  xfs_repair chooses the "best" superblock based
+		 * on most matches; if we break early, we'll leave more
+		 * superblocks un-updated than updated, and xfs_repair may
+		 * pick them over the properly-updated primary.
+		 */
+		if (!bp) {
+			xfs_warn(mp,
+		"error allocating secondary superblock for ag %d",
+				agno);
+			if (!saved_error)
+				saved_error = -ENOMEM;
+			continue;
+		}
+
+		bp->b_ops = &xfs_sb_buf_ops;
+		xfs_buf_oneshot(bp);
+		xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
+		xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
+		xfs_buf_delwri_queue(bp, &buffer_list);
+		xfs_buf_relse(bp);
+
+		/* don't hold too many buffers at once */
+		if (agno % 16)
+			continue;
+
+		error = xfs_buf_delwri_submit(&buffer_list);
+		if (error) {
+			xfs_warn(mp,
+		"write error %d updating a secondary superblock near ag %d",
+				error, agno);
+			if (!saved_error)
+				saved_error = error;
+			continue;
+		}
+	}
+	error = xfs_buf_delwri_submit(&buffer_list);
+	if (error) {
+		xfs_warn(mp,
+		"write error %d updating a secondary superblock near ag %d",
+			error, agno);
+	}
+
+	return saved_error ? saved_error : error;
+}
+
+/*
+ * Same behavior as xfs_sync_sb, except that it is always synchronous and it
+ * also writes the superblock buffer to disk sector 0 immediately.
+ */
+int
+xfs_sync_sb_buf(
+	struct xfs_mount	*mp)
+{
+	struct xfs_trans	*tp;
+	int			error;
+
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, 0, &tp);
+	if (error)
+		return error;
+
+	xfs_log_sb(tp);
+	xfs_trans_bhold(tp, mp->m_sb_bp);
+	xfs_trans_set_sync(tp);
+	error = xfs_trans_commit(tp);
+	if (error)
+		goto out;
+	/*
+	 * write out the sb buffer to get the changes to disk
+	 */
+	error = xfs_bwrite(mp->m_sb_bp);
+out:
+	xfs_buf_relse(mp->m_sb_bp);
+	return error;
+}
+
 int
 xfs_fs_geometry(
 	struct xfs_sb		*sbp,
@@ -972,3 +1075,47 @@
 
 	return 0;
 }
+
+/* Read a secondary superblock. */
+int
+xfs_sb_read_secondary(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	xfs_agnumber_t		agno,
+	struct xfs_buf		**bpp)
+{
+	struct xfs_buf		*bp;
+	int			error;
+
+	ASSERT(agno != 0 && agno != NULLAGNUMBER);
+	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+			XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
+			XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
+	if (error)
+		return error;
+	xfs_buf_set_ref(bp, XFS_SSB_REF);
+	*bpp = bp;
+	return 0;
+}
+
+/* Get an uninitialised secondary superblock buffer. */
+int
+xfs_sb_get_secondary(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	xfs_agnumber_t		agno,
+	struct xfs_buf		**bpp)
+{
+	struct xfs_buf		*bp;
+
+	ASSERT(agno != 0 && agno != NULLAGNUMBER);
+	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+			XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
+			XFS_FSS_TO_BB(mp, 1), 0);
+	if (!bp)
+		return -ENOMEM;
+	bp->b_ops = &xfs_sb_buf_ops;
+	xfs_buf_oneshot(bp);
+	*bpp = bp;
+	return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 63dcd2a..244e016 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -18,6 +18,13 @@
 #ifndef __XFS_SB_H__
 #define	__XFS_SB_H__
 
+struct xfs_mount;
+struct xfs_sb;
+struct xfs_dsb;
+struct xfs_trans;
+struct xfs_fsop_geom;
+struct xfs_perag;
+
 /*
  * perag get/put wrappers for ref counting
  */
@@ -29,13 +36,22 @@
 
 extern void	xfs_log_sb(struct xfs_trans *tp);
 extern int	xfs_sync_sb(struct xfs_mount *mp, bool wait);
+extern int	xfs_sync_sb_buf(struct xfs_mount *mp);
 extern void	xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
 extern void	xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from);
 extern void	xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from);
 extern void	xfs_sb_quota_from_disk(struct xfs_sb *sbp);
 
+extern int	xfs_update_secondary_sbs(struct xfs_mount *mp);
+
 #define XFS_FS_GEOM_MAX_STRUCT_VER	(4)
 extern int	xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo,
 				int struct_version);
+extern int	xfs_sb_read_secondary(struct xfs_mount *mp,
+				struct xfs_trans *tp, xfs_agnumber_t agno,
+				struct xfs_buf **bpp);
+extern int	xfs_sb_get_secondary(struct xfs_mount *mp,
+				struct xfs_trans *tp, xfs_agnumber_t agno,
+				struct xfs_buf **bpp);
 
 #endif	/* __XFS_SB_H__ */
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index d0b84da..ae99c26 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -57,21 +57,6 @@
 extern const struct xfs_buf_ops xfs_symlink_buf_ops;
 extern const struct xfs_buf_ops xfs_rtbuf_ops;
 
-/*
- * This structure is used to track log items associated with
- * a transaction.  It points to the log item and keeps some
- * flags to track the state of the log item.  It also tracks
- * the amount of space needed to log the item it describes
- * once we get to commit processing (see xfs_trans_commit()).
- */
-struct xfs_log_item_desc {
-	struct xfs_log_item	*lid_item;
-	struct list_head	lid_trans;
-	unsigned char		lid_flags;
-};
-
-#define XFS_LID_DIRTY		0x1
-
 /* log size calculation functions */
 int	xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
 int	xfs_log_calc_minimum_size(struct xfs_mount *);
@@ -127,6 +112,7 @@
 #define	XFS_ATTR_BTREE_REF	1
 #define	XFS_DQUOT_REF		1
 #define	XFS_REFC_BTREE_REF	1
+#define	XFS_SSB_REF		0
 
 /*
  * Flags for xfs_trans_ichgtime().
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 3c56069..ea18449 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -30,7 +30,7 @@
 typedef uint64_t	xfs_ufsize_t;	/* unsigned bytes in a file */
 
 typedef int32_t		xfs_suminfo_t;	/* type of bitmap summary info */
-typedef int32_t		xfs_rtword_t;	/* word type for bitmap manipulations */
+typedef uint32_t	xfs_rtword_t;	/* word type for bitmap manipulations */
 
 typedef int64_t		xfs_lsn_t;	/* log sequence number */
 typedef int32_t		xfs_tid_t;	/* transaction identifier */
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 018aabbd..1f71793 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -38,68 +38,6 @@
 #include "scrub/common.h"
 #include "scrub/trace.h"
 
-/*
- * Walk all the blocks in the AGFL.  The fn function can return any negative
- * error code or XFS_BTREE_QUERY_RANGE_ABORT.
- */
-int
-xfs_scrub_walk_agfl(
-	struct xfs_scrub_context	*sc,
-	int				(*fn)(struct xfs_scrub_context *,
-					      xfs_agblock_t bno, void *),
-	void				*priv)
-{
-	struct xfs_agf			*agf;
-	__be32				*agfl_bno;
-	struct xfs_mount		*mp = sc->mp;
-	unsigned int			flfirst;
-	unsigned int			fllast;
-	int				i;
-	int				error;
-
-	agf = XFS_BUF_TO_AGF(sc->sa.agf_bp);
-	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, sc->sa.agfl_bp);
-	flfirst = be32_to_cpu(agf->agf_flfirst);
-	fllast = be32_to_cpu(agf->agf_fllast);
-
-	/* Nothing to walk in an empty AGFL. */
-	if (agf->agf_flcount == cpu_to_be32(0))
-		return 0;
-
-	/* first to last is a consecutive list. */
-	if (fllast >= flfirst) {
-		for (i = flfirst; i <= fllast; i++) {
-			error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
-			if (error)
-				return error;
-			if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
-				return error;
-		}
-
-		return 0;
-	}
-
-	/* first to the end */
-	for (i = flfirst; i < xfs_agfl_size(mp); i++) {
-		error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
-		if (error)
-			return error;
-		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
-			return error;
-	}
-
-	/* the start to last. */
-	for (i = 0; i <= fllast; i++) {
-		error = fn(sc, be32_to_cpu(agfl_bno[i]), priv);
-		if (error)
-			return error;
-		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
-			return error;
-	}
-
-	return 0;
-}
-
 /* Superblock */
 
 /* Cross-reference with the other btrees. */
@@ -157,9 +95,7 @@
 	if (agno == 0)
 		return 0;
 
-	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
-		  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
-		  XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
+	error = xfs_sb_read_secondary(mp, sc->tp, agno, &bp);
 	/*
 	 * The superblock verifier can return several different error codes
 	 * if it thinks the superblock doesn't look right.  For a mount these
@@ -680,6 +616,7 @@
 	unsigned int			sz_entries;
 	unsigned int			nr_entries;
 	xfs_agblock_t			*entries;
+	struct xfs_scrub_context	*sc;
 };
 
 /* Cross-reference with the other btrees. */
@@ -701,12 +638,12 @@
 /* Scrub an AGFL block. */
 STATIC int
 xfs_scrub_agfl_block(
-	struct xfs_scrub_context	*sc,
+	struct xfs_mount		*mp,
 	xfs_agblock_t			agbno,
 	void				*priv)
 {
-	struct xfs_mount		*mp = sc->mp;
 	struct xfs_scrub_agfl_info	*sai = priv;
+	struct xfs_scrub_context	*sc = sai->sc;
 	xfs_agnumber_t			agno = sc->sa.agno;
 
 	if (xfs_verify_agbno(mp, agno, agbno) &&
@@ -717,6 +654,9 @@
 
 	xfs_scrub_agfl_block_xref(sc, agbno, priv);
 
+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+		return XFS_BTREE_QUERY_RANGE_ABORT;
+
 	return 0;
 }
 
@@ -796,8 +736,10 @@
 		goto out;
 	}
 	memset(&sai, 0, sizeof(sai));
+	sai.sc = sc;
 	sai.sz_entries = agflcount;
-	sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount, KM_NOFS);
+	sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount,
+			KM_MAYFAIL);
 	if (!sai.entries) {
 		error = -ENOMEM;
 		goto out;
@@ -805,7 +747,12 @@
 
 	/* Check the blocks in the AGFL. */
 	xfs_rmap_ag_owner(&sai.oinfo, XFS_RMAP_OWN_AG);
-	error = xfs_scrub_walk_agfl(sc, xfs_scrub_agfl_block, &sai);
+	error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
+			sc->sa.agfl_bp, xfs_scrub_agfl_block, &sai);
+	if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+		error = 0;
+		goto out_free;
+	}
 	if (error)
 		goto out_free;
 
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
new file mode 100644
index 0000000..8b91e9e
--- /dev/null
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2018 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_rmap.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+
+/* Superblock */
+
+/* Repair the superblock. */
+int
+xfs_repair_superblock(
+	struct xfs_scrub_context	*sc)
+{
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_buf			*bp;
+	xfs_agnumber_t			agno;
+	int				error;
+
+	/* Don't try to repair AG 0's sb; let xfs_repair deal with it. */
+	agno = sc->sm->sm_agno;
+	if (agno == 0)
+		return -EOPNOTSUPP;
+
+	error = xfs_sb_get_secondary(mp, sc->tp, agno, &bp);
+	if (error)
+		return error;
+
+	/* Copy AG 0's superblock to this one. */
+	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
+	xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
+
+	/* Write this to disk. */
+	xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF);
+	xfs_trans_log_buf(sc->tp, bp, 0, BBTOB(bp->b_length) - 1);
+	return error;
+}
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 517c079..941a0a5 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -70,7 +70,7 @@
 		pcur = &sc->sa.cnt_cur;
 	else
 		pcur = &sc->sa.bno_cur;
-	if (!*pcur)
+	if (!*pcur || xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	error = xfs_alloc_lookup_le(*pcur, agbno, len, &has_otherrec);
@@ -172,7 +172,7 @@
 	bool				is_freesp;
 	int				error;
 
-	if (!sc->sa.bno_cur)
+	if (!sc->sa.bno_cur || xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	error = xfs_alloc_has_record(sc->sa.bno_cur, agbno, len, &is_freesp);
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 127575f..84b6d6b 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -126,8 +126,9 @@
 	if (args.valuelen != valuelen)
 		xfs_scrub_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK,
 					     args.blkno);
-
 fail_xref:
+	if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+		context->seen_enough = 1;
 	return;
 }
 
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 639d14b..eeadb33 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -51,7 +51,6 @@
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
-	struct xfs_mount		*mp = sc->mp;
 	int				error;
 
 	error = xfs_scrub_get_inode(sc, ip);
@@ -75,7 +74,7 @@
 	}
 
 	/* Got the inode, lock it and we're ready to go. */
-	error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
+	error = xfs_scrub_trans_alloc(sc, 0);
 	if (error)
 		goto out;
 	sc->ilock_flags |= XFS_ILOCK_EXCL;
@@ -175,7 +174,7 @@
 	unsigned long long		rmap_end;
 	uint64_t			owner;
 
-	if (!info->sc->sa.rmap_cur)
+	if (!info->sc->sa.rmap_cur || xfs_scrub_skip_xref(info->sc->sm))
 		return;
 
 	if (info->whichfork == XFS_COW_FORK)
@@ -684,7 +683,8 @@
 	info.lastoff = 0;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	for_each_xfs_iext(ifp, &icur, &irec) {
-		if (xfs_scrub_should_terminate(sc, &error))
+		if (xfs_scrub_should_terminate(sc, &error) ||
+		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 			break;
 		if (isnullstartblock(irec.br_startblock))
 			continue;
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index 5421816..2d29dce 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -442,7 +442,7 @@
 	 */
 	if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
 		co = kmem_alloc(sizeof(struct check_owner),
-				KM_MAYFAIL | KM_NOFS);
+				KM_MAYFAIL);
 		if (!co)
 			return -ENOMEM;
 		co->level = level;
@@ -455,6 +455,44 @@
 }
 
 /*
+ * Check that this btree block has at least minrecs records or is one of the
+ * special blocks that don't require that.
+ */
+STATIC void
+xfs_scrub_btree_check_minrecs(
+	struct xfs_scrub_btree	*bs,
+	int			level,
+	struct xfs_btree_block	*block)
+{
+	unsigned int		numrecs;
+	int			ok_level;
+
+	numrecs = be16_to_cpu(block->bb_numrecs);
+
+	/* More records than minrecs means the block is ok. */
+	if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level))
+		return;
+
+	/*
+	 * Certain btree blocks /can/ have fewer than minrecs records.  Any
+	 * level greater than or equal to the level of the highest dedicated
+	 * btree block are allowed to violate this constraint.
+	 *
+	 * For a btree rooted in a block, the btree root can have fewer than
+	 * minrecs records.  If the btree is rooted in an inode and does not
+	 * store records in the root, the direct children of the root and the
+	 * root itself can have fewer than minrecs records.
+	 */
+	ok_level = bs->cur->bc_nlevels - 1;
+	if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+		ok_level--;
+	if (level >= ok_level)
+		return;
+
+	xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, level);
+}
+
+/*
  * Grab and scrub a btree block given a btree pointer.  Returns block
  * and buffer pointers (if applicable) if they're ok to use.
  */
@@ -491,6 +529,8 @@
 	if (*pbp)
 		xfs_scrub_buffer_recheck(bs->sc, *pbp);
 
+	xfs_scrub_btree_check_minrecs(bs, level, *pblock);
+
 	/*
 	 * Check the block's owner; this function absorbs error codes
 	 * for us.
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 8ed91d5..41198a5 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -44,11 +44,14 @@
 #include "xfs_rmap_btree.h"
 #include "xfs_log.h"
 #include "xfs_trans_priv.h"
+#include "xfs_attr.h"
+#include "xfs_reflink.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
 #include "scrub/btree.h"
+#include "scrub/repair.h"
 
 /* Common code for the metadata scrubbers. */
 
@@ -539,6 +542,10 @@
 		xfs_trans_brelse(sc->tp, sa->agi_bp);
 		sa->agi_bp = NULL;
 	}
+	if (sa->pag) {
+		xfs_perag_put(sa->pag);
+		sa->pag = NULL;
+	}
 	sa->agno = NULLAGNUMBER;
 }
 
@@ -566,15 +573,53 @@
 	return xfs_scrub_ag_btcur_init(sc, sa);
 }
 
+/*
+ * Grab the per-ag structure if we haven't already gotten it.  Teardown of the
+ * xfs_scrub_ag will release it for us.
+ */
+void
+xfs_scrub_perag_get(
+	struct xfs_mount	*mp,
+	struct xfs_scrub_ag	*sa)
+{
+	if (!sa->pag)
+		sa->pag = xfs_perag_get(mp, sa->agno);
+}
+
 /* Per-scrubber setup functions */
 
+/*
+ * Grab an empty transaction so that we can re-grab locked buffers if
+ * one of our btrees turns out to be cyclic.
+ *
+ * If we're going to repair something, we need to ask for the largest possible
+ * log reservation so that we can handle the worst case scenario for metadata
+ * updates while rebuilding a metadata item.  We also need to reserve as many
+ * blocks in the head transaction as we think we're going to need to rebuild
+ * the metadata object.
+ */
+int
+xfs_scrub_trans_alloc(
+	struct xfs_scrub_context	*sc,
+	uint				resblks)
+{
+	if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
+		return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
+				resblks, 0, 0, &sc->tp);
+
+	return xfs_trans_alloc_empty(sc->mp, &sc->tp);
+}
+
 /* Set us up with a transaction and an empty context. */
 int
 xfs_scrub_setup_fs(
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
-	return xfs_scrub_trans_alloc(sc->sm, sc->mp, &sc->tp);
+	uint				resblks;
+
+	resblks = xfs_repair_calc_ag_resblks(sc);
+	return xfs_scrub_trans_alloc(sc, resblks);
 }
 
 /* Set us up with AG headers and btree cursors. */
@@ -695,7 +740,6 @@
 	struct xfs_inode		*ip,
 	unsigned int			resblks)
 {
-	struct xfs_mount		*mp = sc->mp;
 	int				error;
 
 	error = xfs_scrub_get_inode(sc, ip);
@@ -705,7 +749,7 @@
 	/* Got the inode, lock it and we're ready to go. */
 	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
 	xfs_ilock(sc->ip, sc->ilock_flags);
-	error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
+	error = xfs_scrub_trans_alloc(sc, resblks);
 	if (error)
 		goto out;
 	sc->ilock_flags |= XFS_ILOCK_EXCL;
@@ -727,6 +771,10 @@
 	int				*error,
 	struct xfs_btree_cur		**curpp)
 {
+	/* No point in xref if we already know we're corrupt. */
+	if (xfs_scrub_skip_xref(sc->sm))
+		return false;
+
 	if (*error == 0)
 		return true;
 
@@ -773,3 +821,80 @@
 	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 	trace_xfs_scrub_block_error(sc, bp->b_bn, fa);
 }
+
+/*
+ * Scrub the attr/data forks of a metadata inode.  The metadata inode must be
+ * pointed to by sc->ip and the ILOCK must be held.
+ */
+int
+xfs_scrub_metadata_inode_forks(
+	struct xfs_scrub_context	*sc)
+{
+	__u32				smtype;
+	bool				shared;
+	int				error;
+
+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+		return 0;
+
+	/* Metadata inodes don't live on the rt device. */
+	if (sc->ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
+		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
+		return 0;
+	}
+
+	/* They should never participate in reflink. */
+	if (xfs_is_reflink_inode(sc->ip)) {
+		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
+		return 0;
+	}
+
+	/* They also should never have extended attributes. */
+	if (xfs_inode_hasattr(sc->ip)) {
+		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
+		return 0;
+	}
+
+	/* Invoke the data fork scrubber. */
+	smtype = sc->sm->sm_type;
+	sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
+	error = xfs_scrub_bmap_data(sc);
+	sc->sm->sm_type = smtype;
+	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+		return error;
+
+	/* Look for incorrect shared blocks. */
+	if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
+		error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
+				&shared);
+		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0,
+				&error))
+			return error;
+		if (shared)
+			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
+	}
+
+	return error;
+}
+
+/*
+ * Try to lock an inode in violation of the usual locking order rules.  For
+ * example, trying to get the IOLOCK while in transaction context, or just
+ * plain breaking AG-order or inode-order inode locking rules.  Either way,
+ * the only way to avoid an ABBA deadlock is to use trylock and back off if
+ * we can't.
+ */
+int
+xfs_scrub_ilock_inverted(
+	struct xfs_inode	*ip,
+	uint			lock_mode)
+{
+	int			i;
+
+	for (i = 0; i < 20; i++) {
+		if (xfs_ilock_nowait(ip, lock_mode))
+			return 0;
+		delay(1);
+	}
+	return -EDEADLOCK;
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index deaf604..76bb2d1 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -38,19 +38,7 @@
 	return false;
 }
 
-/*
- * Grab an empty transaction so that we can re-grab locked buffers if
- * one of our btrees turns out to be cyclic.
- */
-static inline int
-xfs_scrub_trans_alloc(
-	struct xfs_scrub_metadata	*sm,
-	struct xfs_mount		*mp,
-	struct xfs_trans		**tpp)
-{
-	return xfs_trans_alloc_empty(mp, tpp);
-}
-
+int xfs_scrub_trans_alloc(struct xfs_scrub_context *sc, uint resblks);
 bool xfs_scrub_process_error(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
 		xfs_agblock_t bno, int *error);
 bool xfs_scrub_fblock_process_error(struct xfs_scrub_context *sc, int whichfork,
@@ -135,16 +123,13 @@
 void xfs_scrub_ag_free(struct xfs_scrub_context *sc, struct xfs_scrub_ag *sa);
 int xfs_scrub_ag_init(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
 		      struct xfs_scrub_ag *sa);
+void xfs_scrub_perag_get(struct xfs_mount *mp, struct xfs_scrub_ag *sa);
 int xfs_scrub_ag_read_headers(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
 			      struct xfs_buf **agi, struct xfs_buf **agf,
 			      struct xfs_buf **agfl);
 void xfs_scrub_ag_btcur_free(struct xfs_scrub_ag *sa);
 int xfs_scrub_ag_btcur_init(struct xfs_scrub_context *sc,
 			    struct xfs_scrub_ag *sa);
-int xfs_scrub_walk_agfl(struct xfs_scrub_context *sc,
-			int (*fn)(struct xfs_scrub_context *, xfs_agblock_t bno,
-				  void *),
-			void *priv);
 int xfs_scrub_count_rmap_ownedby_ag(struct xfs_scrub_context *sc,
 				    struct xfs_btree_cur *cur,
 				    struct xfs_owner_info *oinfo,
@@ -157,4 +142,17 @@
 				   struct xfs_inode *ip, unsigned int resblks);
 void xfs_scrub_buffer_recheck(struct xfs_scrub_context *sc, struct xfs_buf *bp);
 
+/*
+ * Don't bother cross-referencing if we already found corruption or cross
+ * referencing discrepancies.
+ */
+static inline bool xfs_scrub_skip_xref(struct xfs_scrub_metadata *sm)
+{
+	return sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
+			       XFS_SCRUB_OFLAG_XCORRUPT);
+}
+
+int xfs_scrub_metadata_inode_forks(struct xfs_scrub_context *sc);
+int xfs_scrub_ilock_inverted(struct xfs_inode *ip, uint lock_mode);
+
 #endif	/* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 38f2980..1a4309b 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -172,7 +172,7 @@
 	error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL);
 	if (!xfs_scrub_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset,
 			&error))
-		goto fail_xref;
+		goto out;
 	if (lookup_ino != ino) {
 		xfs_scrub_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
 		goto out;
@@ -183,8 +183,13 @@
 	if (error)
 		goto out;
 out:
-	return error;
-fail_xref:
+	/*
+	 * A negative error code returned here is supposed to cause the
+	 * dir_emit caller (xfs_readdir) to abort the directory iteration
+	 * and return zero to xfs_scrub_directory.
+	 */
+	if (error == 0 && sdc->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+		return -EFSCORRUPTED;
 	return error;
 }
 
@@ -240,6 +245,9 @@
 	}
 	xfs_scrub_buffer_recheck(ds->sc, bp);
 
+	if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+		goto out_relse;
+
 	dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off);
 
 	/* Make sure we got a real directory entry. */
@@ -357,6 +365,9 @@
 
 	/* XXX: Check xfs_dir3_data_hdr.pad is zero once we start setting it. */
 
+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+		goto out_buf;
+
 	/* Do the bestfrees correspond to actual free space? */
 	bf = d_ops->data_bestfree_p(bp->b_addr);
 	smallest_bestfree = UINT_MAX;
@@ -413,14 +424,18 @@
 
 		/* Spot check this free entry */
 		tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
-		if (tag != ((char *)dup - (char *)bp->b_addr))
+		if (tag != ((char *)dup - (char *)bp->b_addr)) {
 			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+			goto out_buf;
+		}
 
 		/*
 		 * Either this entry is a bestfree or it's smaller than
 		 * any of the bestfrees.
 		 */
 		xfs_scrub_directory_check_free_entry(sc, lblk, bf, dup);
+		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+			goto out_buf;
 
 		/* Move on. */
 		newlen = be16_to_cpu(dup->length);
@@ -546,6 +561,8 @@
 	}
 	if (leafhdr.stale != stale)
 		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+		goto out;
 
 	/* Check all the bestfree entries. */
 	for (i = 0; i < bestcount; i++, bestp++) {
@@ -556,9 +573,11 @@
 				i * args->geo->fsbcount, -1, &dbp);
 		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk,
 				&error))
-			continue;
+			break;
 		xfs_scrub_directory_check_freesp(sc, lblk, dbp, best);
 		xfs_trans_brelse(sc->tp, dbp);
+		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+			goto out;
 	}
 out:
 	return error;
@@ -607,7 +626,7 @@
 				-1, &dbp);
 		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, lblk,
 				&error))
-			continue;
+			break;
 		xfs_scrub_directory_check_freesp(sc, lblk, dbp, best);
 		xfs_trans_brelse(sc->tp, dbp);
 	}
@@ -656,7 +675,7 @@
 
 	/* Iterate all the data extents in the directory... */
 	found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
-	while (found) {
+	while (found && !(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
 		/* Block directories only have a single block at offset 0. */
 		if (is_block &&
 		    (got.br_startoff > 0 ||
@@ -719,7 +738,7 @@
 	/* Scan for free blocks */
 	lblk = free_lblk;
 	found = xfs_iext_lookup_extent(sc->ip, ifp, lblk, &icur, &got);
-	while (found) {
+	while (found && !(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
 		/*
 		 * Dirs can't have blocks mapped above 2^32.
 		 * Single-block dirs shouldn't even be here.
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 106ca4b..00a834d 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -387,7 +387,8 @@
 	int				error;
 
 	if (!sc->sa.ino_cur || !sc->sa.rmap_cur ||
-	    (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur))
+	    (xfs_sb_version_hasfinobt(&sc->mp->m_sb) && !sc->sa.fino_cur) ||
+	    xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	/* Check that we saw as many inobt blocks as the rmap says. */
@@ -424,7 +425,7 @@
 	xfs_filblks_t			blocks;
 	int				error;
 
-	if (!sc->sa.rmap_cur)
+	if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	/* Check that we saw as many inode blocks as the rmap knows about. */
@@ -496,7 +497,7 @@
 	bool				has_inodes;
 	int				error;
 
-	if (!(*icur))
+	if (!(*icur) || xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &has_inodes);
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index df14930..0c696f7 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -55,7 +55,6 @@
 	struct xfs_scrub_context	*sc,
 	struct xfs_inode		*ip)
 {
-	struct xfs_mount		*mp = sc->mp;
 	int				error;
 
 	/*
@@ -68,7 +67,7 @@
 		break;
 	case -EFSCORRUPTED:
 	case -EFSBADCRC:
-		return xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
+		return xfs_scrub_trans_alloc(sc, 0);
 	default:
 		return error;
 	}
@@ -76,7 +75,7 @@
 	/* Got the inode, lock it and we're ready to go. */
 	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
 	xfs_ilock(sc->ip, sc->ilock_flags);
-	error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
+	error = xfs_scrub_trans_alloc(sc, 0);
 	if (error)
 		goto out;
 	sc->ilock_flags |= XFS_ILOCK_EXCL;
@@ -449,7 +448,7 @@
 	int				has_record;
 	int				error;
 
-	if (!sc->sa.fino_cur)
+	if (!sc->sa.fino_cur || xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	agino = XFS_INO_TO_AGINO(sc->mp, ino);
@@ -492,6 +491,9 @@
 	xfs_filblks_t			acount;
 	int				error;
 
+	if (xfs_scrub_skip_xref(sc->sm))
+		return;
+
 	/* Walk all the extents to check nextents/naextents/nblocks. */
 	error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
 			&nextents, &count);
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 1fb88c1..77c6b22 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -147,6 +147,9 @@
 
 	*try_again = false;
 
+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+		goto out;
+
 	/* '..' must not point to ourselves. */
 	if (sc->ip->i_ino == dnum) {
 		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
@@ -211,7 +214,9 @@
 	 */
 	xfs_iunlock(sc->ip, sc->ilock_flags);
 	sc->ilock_flags = 0;
-	xfs_ilock(dp, XFS_IOLOCK_SHARED);
+	error = xfs_scrub_ilock_inverted(dp, XFS_IOLOCK_SHARED);
+	if (error)
+		goto out_rele;
 
 	/* Go looking for our dentry. */
 	error = xfs_scrub_parent_count_parent_dentries(sc, dp, &nlink);
@@ -220,8 +225,10 @@
 
 	/* Drop the parent lock, relock this inode. */
 	xfs_iunlock(dp, XFS_IOLOCK_SHARED);
+	error = xfs_scrub_ilock_inverted(sc->ip, XFS_IOLOCK_EXCL);
+	if (error)
+		goto out_rele;
 	sc->ilock_flags = XFS_IOLOCK_EXCL;
-	xfs_ilock(sc->ip, sc->ilock_flags);
 
 	/*
 	 * If we're an unlinked directory, the parent /won't/ have a link
@@ -323,5 +330,13 @@
 	if (try_again && tries == 20)
 		xfs_scrub_set_incomplete(sc);
 out:
+	/*
+	 * If we failed to lock the parent inode even after a retry, just mark
+	 * this scrub incomplete and return.
+	 */
+	if (sc->try_harder && error == -EDEADLOCK) {
+		error = 0;
+		xfs_scrub_set_incomplete(sc);
+	}
 	return error;
 }
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 6ba465e..15ae4d2 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -66,25 +66,43 @@
 	struct xfs_inode		*ip)
 {
 	uint				dqtype;
+	int				error;
+
+	if (!XFS_IS_QUOTA_RUNNING(sc->mp) || !XFS_IS_QUOTA_ON(sc->mp))
+		return -ENOENT;
 
 	dqtype = xfs_scrub_quota_to_dqtype(sc);
 	if (dqtype == 0)
 		return -EINVAL;
+	sc->has_quotaofflock = true;
+	mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock);
 	if (!xfs_this_quota_on(sc->mp, dqtype))
 		return -ENOENT;
+	error = xfs_scrub_setup_fs(sc, ip);
+	if (error)
+		return error;
+	sc->ip = xfs_quota_inode(sc->mp, dqtype);
+	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
+	sc->ilock_flags = XFS_ILOCK_EXCL;
 	return 0;
 }
 
 /* Quotas. */
 
+struct xfs_scrub_quota_info {
+	struct xfs_scrub_context	*sc;
+	xfs_dqid_t			last_id;
+};
+
 /* Scrub the fields in an individual quota item. */
-STATIC void
+STATIC int
 xfs_scrub_quota_item(
-	struct xfs_scrub_context	*sc,
-	uint				dqtype,
 	struct xfs_dquot		*dq,
-	xfs_dqid_t			id)
+	uint				dqtype,
+	void				*priv)
 {
+	struct xfs_scrub_quota_info	*sqi = priv;
+	struct xfs_scrub_context	*sc = sqi->sc;
 	struct xfs_mount		*mp = sc->mp;
 	struct xfs_disk_dquot		*d = &dq->q_core;
 	struct xfs_quotainfo		*qi = mp->m_quotainfo;
@@ -99,17 +117,18 @@
 	unsigned long long		icount;
 	unsigned long long		rcount;
 	xfs_ino_t			fs_icount;
-
-	offset = id / qi->qi_dqperchunk;
+	xfs_dqid_t			id = be32_to_cpu(d->d_id);
 
 	/*
-	 * We fed $id and DQNEXT into the xfs_qm_dqget call, which means
-	 * that the actual dquot we got must either have the same id or
-	 * the next higher id.
+	 * Except for the root dquot, the actual dquot we got must either have
+	 * the same or higher id as we saw before.
 	 */
-	if (id > be32_to_cpu(d->d_id))
+	offset = id / qi->qi_dqperchunk;
+	if (id && id <= sqi->last_id)
 		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
 
+	sqi->last_id = id;
+
 	/* Did we get the dquot type we wanted? */
 	if (dqtype != (d->d_flags & XFS_DQ_ALLTYPES))
 		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
@@ -183,6 +202,47 @@
 		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
 	if (id != 0 && rhard != 0 && rcount > rhard)
 		xfs_scrub_fblock_set_warning(sc, XFS_DATA_FORK, offset);
+
+	return 0;
+}
+
+/* Check the quota's data fork. */
+STATIC int
+xfs_scrub_quota_data_fork(
+	struct xfs_scrub_context	*sc)
+{
+	struct xfs_bmbt_irec		irec = { 0 };
+	struct xfs_iext_cursor		icur;
+	struct xfs_quotainfo		*qi = sc->mp->m_quotainfo;
+	struct xfs_ifork		*ifp;
+	xfs_fileoff_t			max_dqid_off;
+	int				error = 0;
+
+	/* Invoke the fork scrubber. */
+	error = xfs_scrub_metadata_inode_forks(sc);
+	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+		return error;
+
+	/* Check for data fork problems that apply only to quota files. */
+	max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk;
+	ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK);
+	for_each_xfs_iext(ifp, &icur, &irec) {
+		if (xfs_scrub_should_terminate(sc, &error))
+			break;
+		/*
+		 * delalloc extents or blocks mapped above the highest
+		 * quota id shouldn't happen.
+		 */
+		if (isnullstartblock(irec.br_startblock) ||
+		    irec.br_startoff > max_dqid_off ||
+		    irec.br_startoff + irec.br_blockcount - 1 > max_dqid_off) {
+			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
+					irec.br_startoff);
+			break;
+		}
+	}
+
+	return error;
 }
 
 /* Scrub all of a quota type's items. */
@@ -190,108 +250,37 @@
 xfs_scrub_quota(
 	struct xfs_scrub_context	*sc)
 {
-	struct xfs_bmbt_irec		irec = { 0 };
+	struct xfs_scrub_quota_info	sqi;
 	struct xfs_mount		*mp = sc->mp;
-	struct xfs_inode		*ip;
 	struct xfs_quotainfo		*qi = mp->m_quotainfo;
-	struct xfs_dquot		*dq;
-	xfs_fileoff_t			max_dqid_off;
-	xfs_fileoff_t			off = 0;
-	xfs_dqid_t			id = 0;
 	uint				dqtype;
-	int				nimaps;
 	int				error = 0;
 
-	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
-		return -ENOENT;
-
-	mutex_lock(&qi->qi_quotaofflock);
 	dqtype = xfs_scrub_quota_to_dqtype(sc);
-	if (!xfs_this_quota_on(sc->mp, dqtype)) {
-		error = -ENOENT;
-		goto out_unlock_quota;
-	}
-
-	/* Attach to the quota inode and set sc->ip so that reporting works. */
-	ip = xfs_quota_inode(sc->mp, dqtype);
-	sc->ip = ip;
 
 	/* Look for problem extents. */
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
-		xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino);
-		goto out_unlock_inode;
-	}
-	max_dqid_off = ((xfs_dqid_t)-1) / qi->qi_dqperchunk;
-	while (1) {
-		if (xfs_scrub_should_terminate(sc, &error))
-			break;
-
-		off = irec.br_startoff + irec.br_blockcount;
-		nimaps = 1;
-		error = xfs_bmapi_read(ip, off, -1, &irec, &nimaps,
-				XFS_BMAPI_ENTIRE);
-		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, off,
-				&error))
-			goto out_unlock_inode;
-		if (!nimaps)
-			break;
-		if (irec.br_startblock == HOLESTARTBLOCK)
-			continue;
-
-		/* Check the extent record doesn't point to crap. */
-		if (irec.br_startblock + irec.br_blockcount <=
-		    irec.br_startblock)
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
-					irec.br_startoff);
-		if (!xfs_verify_fsbno(mp, irec.br_startblock) ||
-		    !xfs_verify_fsbno(mp, irec.br_startblock +
-					irec.br_blockcount - 1))
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK,
-					irec.br_startoff);
-
-		/*
-		 * Unwritten extents or blocks mapped above the highest
-		 * quota id shouldn't happen.
-		 */
-		if (isnullstartblock(irec.br_startblock) ||
-		    irec.br_startoff > max_dqid_off ||
-		    irec.br_startoff + irec.br_blockcount > max_dqid_off + 1)
-			xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, off);
-	}
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	error = xfs_scrub_quota_data_fork(sc);
+	if (error)
+		goto out;
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		goto out;
 
-	/* Check all the quota items. */
-	while (id < ((xfs_dqid_t)-1ULL)) {
-		if (xfs_scrub_should_terminate(sc, &error))
-			break;
-
-		error = xfs_qm_dqget(mp, NULL, id, dqtype, XFS_QMOPT_DQNEXT,
-				&dq);
-		if (error == -ENOENT)
-			break;
-		if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK,
-				id * qi->qi_dqperchunk, &error))
-			break;
-
-		xfs_scrub_quota_item(sc, dqtype, dq, id);
-
-		id = be32_to_cpu(dq->q_core.d_id) + 1;
-		xfs_qm_dqput(dq);
-		if (!id)
-			break;
-	}
+	/*
+	 * Check all the quota items.  Now that we've checked the quota inode
+	 * data fork we have to drop ILOCK_EXCL to use the regular dquot
+	 * functions.
+	 */
+	xfs_iunlock(sc->ip, sc->ilock_flags);
+	sc->ilock_flags = 0;
+	sqi.sc = sc;
+	sqi.last_id = 0;
+	error = xfs_qm_dqiterate(mp, dqtype, xfs_scrub_quota_item, &sqi);
+	sc->ilock_flags = XFS_ILOCK_EXCL;
+	xfs_ilock(sc->ip, sc->ilock_flags);
+	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK,
+			sqi.last_id * qi->qi_dqperchunk, &error))
+		goto out;
 
 out:
-	/* We set sc->ip earlier, so make sure we clear it now. */
-	sc->ip = NULL;
-out_unlock_quota:
-	mutex_unlock(&qi->qi_quotaofflock);
 	return error;
-
-out_unlock_inode:
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	goto out;
 }
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 400f156..324a5f1 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -150,7 +150,7 @@
 		 * so we don't need insertion sort here.
 		 */
 		frag = kmem_alloc(sizeof(struct xfs_scrub_refcnt_frag),
-				KM_MAYFAIL | KM_NOFS);
+				KM_MAYFAIL);
 		if (!frag)
 			return -ENOMEM;
 		memcpy(&frag->rm, rec, sizeof(frag->rm));
@@ -310,7 +310,7 @@
 	struct xfs_scrub_refcnt_frag	*n;
 	int				error;
 
-	if (!sc->sa.rmap_cur)
+	if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	/* Cross-reference with the rmapbt to confirm the refcount. */
@@ -404,7 +404,7 @@
 	xfs_filblks_t			blocks;
 	int				error;
 
-	if (!sc->sa.rmap_cur)
+	if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	/* Check that we saw as many refcbt blocks as the rmap knows about. */
@@ -460,7 +460,7 @@
 	int				has_refcount;
 	int				error;
 
-	if (!sc->sa.refc_cur)
+	if (!sc->sa.refc_cur || xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	/* Find the CoW staging extent. */
@@ -504,7 +504,7 @@
 	bool				shared;
 	int				error;
 
-	if (!sc->sa.refc_cur)
+	if (!sc->sa.refc_cur || xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared);
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
new file mode 100644
index 0000000..e3e8fba
--- /dev/null
+++ b/fs/xfs/scrub/repair.c
@@ -0,0 +1,1089 @@
+/*
+ * Copyright (C) 2018 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_alloc.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_refcount.h"
+#include "xfs_refcount_btree.h"
+#include "xfs_extent_busy.h"
+#include "xfs_ag_resv.h"
+#include "xfs_trans_space.h"
+#include "xfs_quota.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+
+/*
+ * Attempt to repair some metadata, if the metadata is corrupt and userspace
+ * told us to fix it.  This function returns -EAGAIN to mean "re-run scrub",
+ * and will set *fixed to true if it thinks it repaired anything.
+ */
+int
+xfs_repair_attempt(
+	struct xfs_inode		*ip,
+	struct xfs_scrub_context	*sc,
+	bool				*fixed)
+{
+	int				error = 0;
+
+	trace_xfs_repair_attempt(ip, sc->sm, error);
+
+	xfs_scrub_ag_btcur_free(&sc->sa);
+
+	/* Repair whatever's broken. */
+	ASSERT(sc->ops->repair);
+	error = sc->ops->repair(sc);
+	trace_xfs_repair_done(ip, sc->sm, error);
+	switch (error) {
+	case 0:
+		/*
+		 * Repair succeeded.  Commit the fixes and perform a second
+		 * scrub so that we can tell userspace if we fixed the problem.
+		 */
+		sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
+		*fixed = true;
+		return -EAGAIN;
+	case -EDEADLOCK:
+	case -EAGAIN:
+		/* Tell the caller to try again having grabbed all the locks. */
+		if (!sc->try_harder) {
+			sc->try_harder = true;
+			return -EAGAIN;
+		}
+		/*
+		 * We tried harder but still couldn't grab all the resources
+		 * we needed to fix it.  The corruption has not been fixed,
+		 * so report back to userspace.
+		 */
+		return -EFSCORRUPTED;
+	default:
+		return error;
+	}
+}
+
+/*
+ * Complain about unfixable problems in the filesystem.  We don't log
+ * corruptions when IFLAG_REPAIR wasn't set on the assumption that the driver
+ * program is xfs_scrub, which will call back with IFLAG_REPAIR set if the
+ * administrator isn't running xfs_scrub in no-repairs mode.
+ *
+ * Use this helper function because _ratelimited silently declares a static
+ * structure to track rate limiting information.
+ */
+void
+xfs_repair_failure(
+	struct xfs_mount		*mp)
+{
+	xfs_alert_ratelimited(mp,
+"Corruption not fixed during online repair.  Unmount and run xfs_repair.");
+}
+
+/*
+ * Repair probe -- userspace uses this to probe if we're willing to repair a
+ * given mountpoint.
+ */
+int
+xfs_repair_probe(
+	struct xfs_scrub_context	*sc)
+{
+	int				error = 0;
+
+	if (xfs_scrub_should_terminate(sc, &error))
+		return error;
+
+	return 0;
+}
+
+/*
+ * Roll a transaction, keeping the AG headers locked and reinitializing
+ * the btree cursors.
+ */
+int
+xfs_repair_roll_ag_trans(
+	struct xfs_scrub_context	*sc)
+{
+	int				error;
+
+	/* Keep the AG header buffers locked so we can keep going. */
+	xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
+	xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
+	xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
+
+	/* Roll the transaction. */
+	error = xfs_trans_roll(&sc->tp);
+	if (error)
+		goto out_release;
+
+	/* Join AG headers to the new transaction. */
+	xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
+	xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
+	xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
+
+	return 0;
+
+out_release:
+	/*
+	 * Rolling failed, so release the hold on the buffers.  The
+	 * buffers will be released during teardown on our way out
+	 * of the kernel.
+	 */
+	xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
+	xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
+	xfs_trans_bhold_release(sc->tp, sc->sa.agfl_bp);
+
+	return error;
+}
+
+/*
+ * Does the given AG have enough space to rebuild a btree?  Neither AG
+ * reservation can be critical, and we must have enough space (factoring
+ * in AG reservations) to construct a whole btree.
+ */
+bool
+xfs_repair_ag_has_space(
+	struct xfs_perag		*pag,
+	xfs_extlen_t			nr_blocks,
+	enum xfs_ag_resv_type		type)
+{
+	return  !xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) &&
+		!xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA) &&
+		pag->pagf_freeblks > xfs_ag_resv_needed(pag, type) + nr_blocks;
+}
+
+/*
+ * Figure out how many blocks to reserve for an AG repair.  We calculate the
+ * worst case estimate for the number of blocks we'd need to rebuild one of
+ * any type of per-AG btree.
+ */
+xfs_extlen_t
+xfs_repair_calc_ag_resblks(
+	struct xfs_scrub_context	*sc)
+{
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_scrub_metadata	*sm = sc->sm;
+	struct xfs_perag		*pag;
+	struct xfs_buf			*bp;
+	xfs_agino_t			icount = 0;
+	xfs_extlen_t			aglen = 0;
+	xfs_extlen_t			usedlen;
+	xfs_extlen_t			freelen;
+	xfs_extlen_t			bnobt_sz;
+	xfs_extlen_t			inobt_sz;
+	xfs_extlen_t			rmapbt_sz;
+	xfs_extlen_t			refcbt_sz;
+	int				error;
+
+	if (!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
+		return 0;
+
+	/* Use in-core counters if possible. */
+	pag = xfs_perag_get(mp, sm->sm_agno);
+	if (pag->pagi_init)
+		icount = pag->pagi_count;
+
+	/*
+	 * Otherwise try to get the actual counters from disk; if not, make
+	 * some worst case assumptions.
+	 */
+	if (icount == 0) {
+		error = xfs_ialloc_read_agi(mp, NULL, sm->sm_agno, &bp);
+		if (error) {
+			icount = mp->m_sb.sb_agblocks / mp->m_sb.sb_inopblock;
+		} else {
+			icount = pag->pagi_count;
+			xfs_buf_relse(bp);
+		}
+	}
+
+	/* Now grab the block counters from the AGF. */
+	error = xfs_alloc_read_agf(mp, NULL, sm->sm_agno, 0, &bp);
+	if (error) {
+		aglen = mp->m_sb.sb_agblocks;
+		freelen = aglen;
+		usedlen = aglen;
+	} else {
+		aglen = be32_to_cpu(XFS_BUF_TO_AGF(bp)->agf_length);
+		freelen = pag->pagf_freeblks;
+		usedlen = aglen - freelen;
+		xfs_buf_relse(bp);
+	}
+	xfs_perag_put(pag);
+
+	trace_xfs_repair_calc_ag_resblks(mp, sm->sm_agno, icount, aglen,
+			freelen, usedlen);
+
+	/*
+	 * Figure out how many blocks we'd need worst case to rebuild
+	 * each type of btree.  Note that we can only rebuild the
+	 * bnobt/cntbt or inobt/finobt as pairs.
+	 */
+	bnobt_sz = 2 * xfs_allocbt_calc_size(mp, freelen);
+	if (xfs_sb_version_hassparseinodes(&mp->m_sb))
+		inobt_sz = xfs_iallocbt_calc_size(mp, icount /
+				XFS_INODES_PER_HOLEMASK_BIT);
+	else
+		inobt_sz = xfs_iallocbt_calc_size(mp, icount /
+				XFS_INODES_PER_CHUNK);
+	if (xfs_sb_version_hasfinobt(&mp->m_sb))
+		inobt_sz *= 2;
+	if (xfs_sb_version_hasreflink(&mp->m_sb))
+		refcbt_sz = xfs_refcountbt_calc_size(mp, usedlen);
+	else
+		refcbt_sz = 0;
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+		/*
+		 * Guess how many blocks we need to rebuild the rmapbt.
+		 * For non-reflink filesystems we can't have more records than
+		 * used blocks.  However, with reflink it's possible to have
+		 * more than one rmap record per AG block.  We don't know how
+		 * many rmaps there could be in the AG, so we start off with
+		 * what we hope is an generous over-estimation.
+		 */
+		if (xfs_sb_version_hasreflink(&mp->m_sb))
+			rmapbt_sz = xfs_rmapbt_calc_size(mp,
+					(unsigned long long)aglen * 2);
+		else
+			rmapbt_sz = xfs_rmapbt_calc_size(mp, usedlen);
+	} else {
+		rmapbt_sz = 0;
+	}
+
+	trace_xfs_repair_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz,
+			inobt_sz, rmapbt_sz, refcbt_sz);
+
+	return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
+}
+
+/* Allocate a block in an AG. */
+int
+xfs_repair_alloc_ag_block(
+	struct xfs_scrub_context	*sc,
+	struct xfs_owner_info		*oinfo,
+	xfs_fsblock_t			*fsbno,
+	enum xfs_ag_resv_type		resv)
+{
+	struct xfs_alloc_arg		args = {0};
+	xfs_agblock_t			bno;
+	int				error;
+
+	switch (resv) {
+	case XFS_AG_RESV_AGFL:
+	case XFS_AG_RESV_RMAPBT:
+		error = xfs_alloc_get_freelist(sc->tp, sc->sa.agf_bp, &bno, 1);
+		if (error)
+			return error;
+		if (bno == NULLAGBLOCK)
+			return -ENOSPC;
+		xfs_extent_busy_reuse(sc->mp, sc->sa.agno, bno,
+				1, false);
+		*fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.agno, bno);
+		if (resv == XFS_AG_RESV_RMAPBT)
+			xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.agno);
+		return 0;
+	default:
+		break;
+	}
+
+	args.tp = sc->tp;
+	args.mp = sc->mp;
+	args.oinfo = *oinfo;
+	args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.agno, 0);
+	args.minlen = 1;
+	args.maxlen = 1;
+	args.prod = 1;
+	args.type = XFS_ALLOCTYPE_THIS_AG;
+	args.resv = resv;
+
+	error = xfs_alloc_vextent(&args);
+	if (error)
+		return error;
+	if (args.fsbno == NULLFSBLOCK)
+		return -ENOSPC;
+	ASSERT(args.len == 1);
+	*fsbno = args.fsbno;
+
+	return 0;
+}
+
+/* Initialize a new AG btree root block with zero entries. */
+int
+xfs_repair_init_btblock(
+	struct xfs_scrub_context	*sc,
+	xfs_fsblock_t			fsb,
+	struct xfs_buf			**bpp,
+	xfs_btnum_t			btnum,
+	const struct xfs_buf_ops	*ops)
+{
+	struct xfs_trans		*tp = sc->tp;
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_buf			*bp;
+
+	trace_xfs_repair_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
+			XFS_FSB_TO_AGBNO(mp, fsb), btnum);
+
+	ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno);
+	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
+			XFS_FSB_TO_BB(mp, 1), 0);
+	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
+	xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0);
+	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
+	xfs_trans_log_buf(tp, bp, 0, bp->b_length);
+	bp->b_ops = ops;
+	*bpp = bp;
+
+	return 0;
+}
+
+/*
+ * Reconstructing per-AG Btrees
+ *
+ * When a space btree is corrupt, we don't bother trying to fix it.  Instead,
+ * we scan secondary space metadata to derive the records that should be in
+ * the damaged btree, initialize a fresh btree root, and insert the records.
+ * Note that for rebuilding the rmapbt we scan all the primary data to
+ * generate the new records.
+ *
+ * However, that leaves the matter of removing all the metadata describing the
+ * old broken structure.  For primary metadata we use the rmap data to collect
+ * every extent with a matching rmap owner (exlist); we then iterate all other
+ * metadata structures with the same rmap owner to collect the extents that
+ * cannot be removed (sublist).  We then subtract sublist from exlist to
+ * derive the blocks that were used by the old btree.  These blocks can be
+ * reaped.
+ *
+ * For rmapbt reconstructions we must use different tactics for extent
+ * collection.  First we iterate all primary metadata (this excludes the old
+ * rmapbt, obviously) to generate new rmap records.  The gaps in the rmap
+ * records are collected as exlist.  The bnobt records are collected as
+ * sublist.  As with the other btrees we subtract sublist from exlist, and the
+ * result (since the rmapbt lives in the free space) are the blocks from the
+ * old rmapbt.
+ */
+
+/* Collect a dead btree extent for later disposal. */
+int
+xfs_repair_collect_btree_extent(
+	struct xfs_scrub_context	*sc,
+	struct xfs_repair_extent_list	*exlist,
+	xfs_fsblock_t			fsbno,
+	xfs_extlen_t			len)
+{
+	struct xfs_repair_extent	*rex;
+
+	trace_xfs_repair_collect_btree_extent(sc->mp,
+			XFS_FSB_TO_AGNO(sc->mp, fsbno),
+			XFS_FSB_TO_AGBNO(sc->mp, fsbno), len);
+
+	rex = kmem_alloc(sizeof(struct xfs_repair_extent), KM_MAYFAIL);
+	if (!rex)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&rex->list);
+	rex->fsbno = fsbno;
+	rex->len = len;
+	list_add_tail(&rex->list, &exlist->list);
+
+	return 0;
+}
+
+/*
+ * An error happened during the rebuild so the transaction will be cancelled.
+ * The fs will shut down, and the administrator has to unmount and run repair.
+ * Therefore, free all the memory associated with the list so we can die.
+ */
+void
+xfs_repair_cancel_btree_extents(
+	struct xfs_scrub_context	*sc,
+	struct xfs_repair_extent_list	*exlist)
+{
+	struct xfs_repair_extent	*rex;
+	struct xfs_repair_extent	*n;
+
+	for_each_xfs_repair_extent_safe(rex, n, exlist) {
+		list_del(&rex->list);
+		kmem_free(rex);
+	}
+}
+
+/* Compare two btree extents. */
+static int
+xfs_repair_btree_extent_cmp(
+	void				*priv,
+	struct list_head		*a,
+	struct list_head		*b)
+{
+	struct xfs_repair_extent	*ap;
+	struct xfs_repair_extent	*bp;
+
+	ap = container_of(a, struct xfs_repair_extent, list);
+	bp = container_of(b, struct xfs_repair_extent, list);
+
+	if (ap->fsbno > bp->fsbno)
+		return 1;
+	if (ap->fsbno < bp->fsbno)
+		return -1;
+	return 0;
+}
+
+/*
+ * Remove all the blocks mentioned in @sublist from the extents in @exlist.
+ *
+ * The intent is that callers will iterate the rmapbt for all of its records
+ * for a given owner to generate @exlist; and iterate all the blocks of the
+ * metadata structures that are not being rebuilt and have the same rmapbt
+ * owner to generate @sublist.  This routine subtracts all the extents
+ * mentioned in sublist from all the extents linked in @exlist, which leaves
+ * @exlist as the list of blocks that are not accounted for, which we assume
+ * are the dead blocks of the old metadata structure.  The blocks mentioned in
+ * @exlist can be reaped.
+ */
+#define LEFT_ALIGNED	(1 << 0)
+#define RIGHT_ALIGNED	(1 << 1)
+int
+xfs_repair_subtract_extents(
+	struct xfs_scrub_context	*sc,
+	struct xfs_repair_extent_list	*exlist,
+	struct xfs_repair_extent_list	*sublist)
+{
+	struct list_head		*lp;
+	struct xfs_repair_extent	*ex;
+	struct xfs_repair_extent	*newex;
+	struct xfs_repair_extent	*subex;
+	xfs_fsblock_t			sub_fsb;
+	xfs_extlen_t			sub_len;
+	int				state;
+	int				error = 0;
+
+	if (list_empty(&exlist->list) || list_empty(&sublist->list))
+		return 0;
+	ASSERT(!list_empty(&sublist->list));
+
+	list_sort(NULL, &exlist->list, xfs_repair_btree_extent_cmp);
+	list_sort(NULL, &sublist->list, xfs_repair_btree_extent_cmp);
+
+	/*
+	 * Now that we've sorted both lists, we iterate exlist once, rolling
+	 * forward through sublist and/or exlist as necessary until we find an
+	 * overlap or reach the end of either list.  We do not reset lp to the
+	 * head of exlist nor do we reset subex to the head of sublist.  The
+	 * list traversal is similar to merge sort, but we're deleting
+	 * instead.  In this manner we avoid O(n^2) operations.
+	 */
+	subex = list_first_entry(&sublist->list, struct xfs_repair_extent,
+			list);
+	lp = exlist->list.next;
+	while (lp != &exlist->list) {
+		ex = list_entry(lp, struct xfs_repair_extent, list);
+
+		/*
+		 * Advance subex and/or ex until we find a pair that
+		 * intersect or we run out of extents.
+		 */
+		while (subex->fsbno + subex->len <= ex->fsbno) {
+			if (list_is_last(&subex->list, &sublist->list))
+				goto out;
+			subex = list_next_entry(subex, list);
+		}
+		if (subex->fsbno >= ex->fsbno + ex->len) {
+			lp = lp->next;
+			continue;
+		}
+
+		/* trim subex to fit the extent we have */
+		sub_fsb = subex->fsbno;
+		sub_len = subex->len;
+		if (subex->fsbno < ex->fsbno) {
+			sub_len -= ex->fsbno - subex->fsbno;
+			sub_fsb = ex->fsbno;
+		}
+		if (sub_len > ex->len)
+			sub_len = ex->len;
+
+		state = 0;
+		if (sub_fsb == ex->fsbno)
+			state |= LEFT_ALIGNED;
+		if (sub_fsb + sub_len == ex->fsbno + ex->len)
+			state |= RIGHT_ALIGNED;
+		switch (state) {
+		case LEFT_ALIGNED:
+			/* Coincides with only the left. */
+			ex->fsbno += sub_len;
+			ex->len -= sub_len;
+			break;
+		case RIGHT_ALIGNED:
+			/* Coincides with only the right. */
+			ex->len -= sub_len;
+			lp = lp->next;
+			break;
+		case LEFT_ALIGNED | RIGHT_ALIGNED:
+			/* Total overlap, just delete ex. */
+			lp = lp->next;
+			list_del(&ex->list);
+			kmem_free(ex);
+			break;
+		case 0:
+			/*
+			 * Deleting from the middle: add the new right extent
+			 * and then shrink the left extent.
+			 */
+			newex = kmem_alloc(sizeof(struct xfs_repair_extent),
+					KM_MAYFAIL);
+			if (!newex) {
+				error = -ENOMEM;
+				goto out;
+			}
+			INIT_LIST_HEAD(&newex->list);
+			newex->fsbno = sub_fsb + sub_len;
+			newex->len = ex->fsbno + ex->len - newex->fsbno;
+			list_add(&newex->list, &ex->list);
+			ex->len = sub_fsb - ex->fsbno;
+			lp = lp->next;
+			break;
+		default:
+			ASSERT(0);
+			break;
+		}
+	}
+
+out:
+	return error;
+}
+#undef LEFT_ALIGNED
+#undef RIGHT_ALIGNED
+
+/*
+ * Disposal of Blocks from Old per-AG Btrees
+ *
+ * Now that we've constructed a new btree to replace the damaged one, we want
+ * to dispose of the blocks that (we think) the old btree was using.
+ * Previously, we used the rmapbt to collect the extents (exlist) with the
+ * rmap owner corresponding to the tree we rebuilt, collected extents for any
+ * blocks with the same rmap owner that are owned by another data structure
+ * (sublist), and subtracted sublist from exlist.  In theory the extents
+ * remaining in exlist are the old btree's blocks.
+ *
+ * Unfortunately, it's possible that the btree was crosslinked with other
+ * blocks on disk.  The rmap data can tell us if there are multiple owners, so
+ * if the rmapbt says there is an owner of this block other than @oinfo, then
+ * the block is crosslinked.  Remove the reverse mapping and continue.
+ *
+ * If there is one rmap record, we can free the block, which removes the
+ * reverse mapping but doesn't add the block to the free space.  Our repair
+ * strategy is to hope the other metadata objects crosslinked on this block
+ * will be rebuilt (atop different blocks), thereby removing all the cross
+ * links.
+ *
+ * If there are no rmap records at all, we also free the block.  If the btree
+ * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
+ * supposed to be a rmap record and everything is ok.  For other btrees there
+ * had to have been an rmap entry for the block to have ended up on @exlist,
+ * so if it's gone now there's something wrong and the fs will shut down.
+ *
+ * Note: If there are multiple rmap records with only the same rmap owner as
+ * the btree we're trying to rebuild and the block is indeed owned by another
+ * data structure with the same rmap owner, then the block will be in sublist
+ * and therefore doesn't need disposal.  If there are multiple rmap records
+ * with only the same rmap owner but the block is not owned by something with
+ * the same rmap owner, the block will be freed.
+ *
+ * The caller is responsible for locking the AG headers for the entire rebuild
+ * operation so that nothing else can sneak in and change the AG state while
+ * we're not looking.  We also assume that the caller already invalidated any
+ * buffers associated with @exlist.
+ */
+
+/*
+ * Invalidate buffers for per-AG btree blocks we're dumping.  This function
+ * is not intended for use with file data repairs; we have bunmapi for that.
+ */
+int
+xfs_repair_invalidate_blocks(
+	struct xfs_scrub_context	*sc,
+	struct xfs_repair_extent_list	*exlist)
+{
+	struct xfs_repair_extent	*rex;
+	struct xfs_repair_extent	*n;
+	struct xfs_buf			*bp;
+	xfs_fsblock_t			fsbno;
+	xfs_agblock_t			i;
+
+	/*
+	 * For each block in each extent, see if there's an incore buffer for
+	 * exactly that block; if so, invalidate it.  The buffer cache only
+	 * lets us look for one buffer at a time, so we have to look one block
+	 * at a time.  Avoid invalidating AG headers and post-EOFS blocks
+	 * because we never own those; and if we can't TRYLOCK the buffer we
+	 * assume it's owned by someone else.
+	 */
+	for_each_xfs_repair_extent_safe(rex, n, exlist) {
+		for (fsbno = rex->fsbno, i = rex->len; i > 0; fsbno++, i--) {
+			/* Skip AG headers and post-EOFS blocks */
+			if (!xfs_verify_fsbno(sc->mp, fsbno))
+				continue;
+			bp = xfs_buf_incore(sc->mp->m_ddev_targp,
+					XFS_FSB_TO_DADDR(sc->mp, fsbno),
+					XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK);
+			if (bp) {
+				xfs_trans_bjoin(sc->tp, bp);
+				xfs_trans_binval(sc->tp, bp);
+			}
+		}
+	}
+
+	return 0;
+}
+
+/* Ensure the freelist is the correct size. */
+int
+xfs_repair_fix_freelist(
+	struct xfs_scrub_context	*sc,
+	bool				can_shrink)
+{
+	struct xfs_alloc_arg		args = {0};
+
+	args.mp = sc->mp;
+	args.tp = sc->tp;
+	args.agno = sc->sa.agno;
+	args.alignment = 1;
+	args.pag = sc->sa.pag;
+
+	return xfs_alloc_fix_freelist(&args,
+			can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK);
+}
+
+/*
+ * Put a block back on the AGFL.
+ */
+STATIC int
+xfs_repair_put_freelist(
+	struct xfs_scrub_context	*sc,
+	xfs_agblock_t			agbno)
+{
+	struct xfs_owner_info		oinfo;
+	int				error;
+
+	/* Make sure there's space on the freelist. */
+	error = xfs_repair_fix_freelist(sc, true);
+	if (error)
+		return error;
+
+	/*
+	 * Since we're "freeing" a lost block onto the AGFL, we have to
+	 * create an rmap for the block prior to merging it or else other
+	 * parts will break.
+	 */
+	xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+	error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.agno, agbno, 1,
+			&oinfo);
+	if (error)
+		return error;
+
+	/* Put the block on the AGFL. */
+	error = xfs_alloc_put_freelist(sc->tp, sc->sa.agf_bp, sc->sa.agfl_bp,
+			agbno, 0);
+	if (error)
+		return error;
+	xfs_extent_busy_insert(sc->tp, sc->sa.agno, agbno, 1,
+			XFS_EXTENT_BUSY_SKIP_DISCARD);
+
+	return 0;
+}
+
+/* Dispose of a single metadata block. */
+STATIC int
+xfs_repair_dispose_btree_block(
+	struct xfs_scrub_context	*sc,
+	xfs_fsblock_t			fsbno,
+	struct xfs_owner_info		*oinfo,
+	enum xfs_ag_resv_type		resv)
+{
+	struct xfs_btree_cur		*cur;
+	struct xfs_buf			*agf_bp = NULL;
+	xfs_agnumber_t			agno;
+	xfs_agblock_t			agbno;
+	bool				has_other_rmap;
+	int				error;
+
+	agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
+	agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
+
+	/*
+	 * If we are repairing per-inode metadata, we need to read in the AGF
+	 * buffer.  Otherwise, we're repairing a per-AG structure, so reuse
+	 * the AGF buffer that the setup functions already grabbed.
+	 */
+	if (sc->ip) {
+		error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf_bp);
+		if (error)
+			return error;
+		if (!agf_bp)
+			return -ENOMEM;
+	} else {
+		agf_bp = sc->sa.agf_bp;
+	}
+	cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf_bp, agno);
+
+	/* Can we find any other rmappings? */
+	error = xfs_rmap_has_other_keys(cur, agbno, 1, oinfo, &has_other_rmap);
+	if (error)
+		goto out_cur;
+	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+
+	/*
+	 * If there are other rmappings, this block is cross linked and must
+	 * not be freed.  Remove the reverse mapping and move on.  Otherwise,
+	 * we were the only owner of the block, so free the extent, which will
+	 * also remove the rmap.
+	 *
+	 * XXX: XFS doesn't support detecting the case where a single block
+	 * metadata structure is crosslinked with a multi-block structure
+	 * because the buffer cache doesn't detect aliasing problems, so we
+	 * can't fix 100% of crosslinking problems (yet).  The verifiers will
+	 * blow on writeout, the filesystem will shut down, and the admin gets
+	 * to run xfs_repair.
+	 */
+	if (has_other_rmap)
+		error = xfs_rmap_free(sc->tp, agf_bp, agno, agbno, 1, oinfo);
+	else if (resv == XFS_AG_RESV_AGFL)
+		error = xfs_repair_put_freelist(sc, agbno);
+	else
+		error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv);
+	if (agf_bp != sc->sa.agf_bp)
+		xfs_trans_brelse(sc->tp, agf_bp);
+	if (error)
+		return error;
+
+	if (sc->ip)
+		return xfs_trans_roll_inode(&sc->tp, sc->ip);
+	return xfs_repair_roll_ag_trans(sc);
+
+out_cur:
+	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+	if (agf_bp != sc->sa.agf_bp)
+		xfs_trans_brelse(sc->tp, agf_bp);
+	return error;
+}
+
+/* Dispose of btree blocks from an old per-AG btree. */
+int
+xfs_repair_reap_btree_extents(
+	struct xfs_scrub_context	*sc,
+	struct xfs_repair_extent_list	*exlist,
+	struct xfs_owner_info		*oinfo,
+	enum xfs_ag_resv_type		type)
+{
+	struct xfs_repair_extent	*rex;
+	struct xfs_repair_extent	*n;
+	int				error = 0;
+
+	ASSERT(xfs_sb_version_hasrmapbt(&sc->mp->m_sb));
+
+	/* Dispose of every block from the old btree. */
+	for_each_xfs_repair_extent_safe(rex, n, exlist) {
+		ASSERT(sc->ip != NULL ||
+		       XFS_FSB_TO_AGNO(sc->mp, rex->fsbno) == sc->sa.agno);
+
+		trace_xfs_repair_dispose_btree_extent(sc->mp,
+				XFS_FSB_TO_AGNO(sc->mp, rex->fsbno),
+				XFS_FSB_TO_AGBNO(sc->mp, rex->fsbno), rex->len);
+
+		for (; rex->len > 0; rex->len--, rex->fsbno++) {
+			error = xfs_repair_dispose_btree_block(sc, rex->fsbno,
+					oinfo, type);
+			if (error)
+				goto out;
+		}
+		list_del(&rex->list);
+		kmem_free(rex);
+	}
+
+out:
+	xfs_repair_cancel_btree_extents(sc, exlist);
+	return error;
+}
+
+/*
+ * Finding per-AG Btree Roots for AGF/AGI Reconstruction
+ *
+ * If the AGF or AGI become slightly corrupted, it may be necessary to rebuild
+ * the AG headers by using the rmap data to rummage through the AG looking for
+ * btree roots.  This is not guaranteed to work if the AG is heavily damaged
+ * or the rmap data are corrupt.
+ *
+ * Callers of xfs_repair_find_ag_btree_roots must lock the AGF and AGFL
+ * buffers if the AGF is being rebuilt; or the AGF and AGI buffers if the
+ * AGI is being rebuilt.  It must maintain these locks until it's safe for
+ * other threads to change the btrees' shapes.  The caller provides
+ * information about the btrees to look for by passing in an array of
+ * xfs_repair_find_ag_btree with the (rmap owner, buf_ops, magic) fields set.
+ * The (root, height) fields will be set on return if anything is found.  The
+ * last element of the array should have a NULL buf_ops to mark the end of the
+ * array.
+ *
+ * For every rmapbt record matching any of the rmap owners in btree_info,
+ * read each block referenced by the rmap record.  If the block is a btree
+ * block from this filesystem matching any of the magic numbers and has a
+ * level higher than what we've already seen, remember the block and the
+ * height of the tree required to have such a block.  When the call completes,
+ * we return the highest block we've found for each btree description; those
+ * should be the roots.
+ */
+
+struct xfs_repair_findroot {
+	struct xfs_scrub_context	*sc;
+	struct xfs_buf			*agfl_bp;
+	struct xfs_agf			*agf;
+	struct xfs_repair_find_ag_btree	*btree_info;
+};
+
+/* See if our block is in the AGFL. */
+STATIC int
+xfs_repair_findroot_agfl_walk(
+	struct xfs_mount		*mp,
+	xfs_agblock_t			bno,
+	void				*priv)
+{
+	xfs_agblock_t			*agbno = priv;
+
+	return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0;
+}
+
+/* Does this block match the btree information passed in? */
+STATIC int
+xfs_repair_findroot_block(
+	struct xfs_repair_findroot	*ri,
+	struct xfs_repair_find_ag_btree	*fab,
+	uint64_t			owner,
+	xfs_agblock_t			agbno,
+	bool				*found_it)
+{
+	struct xfs_mount		*mp = ri->sc->mp;
+	struct xfs_buf			*bp;
+	struct xfs_btree_block		*btblock;
+	xfs_daddr_t			daddr;
+	int				error;
+
+	daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.agno, agbno);
+
+	/*
+	 * Blocks in the AGFL have stale contents that might just happen to
+	 * have a matching magic and uuid.  We don't want to pull these blocks
+	 * in as part of a tree root, so we have to filter out the AGFL stuff
+	 * here.  If the AGFL looks insane we'll just refuse to repair.
+	 */
+	if (owner == XFS_RMAP_OWN_AG) {
+		error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
+				xfs_repair_findroot_agfl_walk, &agbno);
+		if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+			return 0;
+		if (error)
+			return error;
+	}
+
+	error = xfs_trans_read_buf(mp, ri->sc->tp, mp->m_ddev_targp, daddr,
+			mp->m_bsize, 0, &bp, NULL);
+	if (error)
+		return error;
+
+	/*
+	 * Does this look like a block matching our fs and higher than any
+	 * other block we've found so far?  If so, reattach buffer verifiers
+	 * so the AIL won't complain if the buffer is also dirty.
+	 */
+	btblock = XFS_BUF_TO_BLOCK(bp);
+	if (be32_to_cpu(btblock->bb_magic) != fab->magic)
+		goto out;
+	if (xfs_sb_version_hascrc(&mp->m_sb) &&
+	    !uuid_equal(&btblock->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
+		goto out;
+	bp->b_ops = fab->buf_ops;
+
+	/* Ignore this block if it's lower in the tree than we've seen. */
+	if (fab->root != NULLAGBLOCK &&
+	    xfs_btree_get_level(btblock) < fab->height)
+		goto out;
+
+	/* Make sure we pass the verifiers. */
+	bp->b_ops->verify_read(bp);
+	if (bp->b_error)
+		goto out;
+	fab->root = agbno;
+	fab->height = xfs_btree_get_level(btblock) + 1;
+	*found_it = true;
+
+	trace_xfs_repair_findroot_block(mp, ri->sc->sa.agno, agbno,
+			be32_to_cpu(btblock->bb_magic), fab->height - 1);
+out:
+	xfs_trans_brelse(ri->sc->tp, bp);
+	return error;
+}
+
+/*
+ * Do any of the blocks in this rmap record match one of the btrees we're
+ * looking for?
+ */
+STATIC int
+xfs_repair_findroot_rmap(
+	struct xfs_btree_cur		*cur,
+	struct xfs_rmap_irec		*rec,
+	void				*priv)
+{
+	struct xfs_repair_findroot	*ri = priv;
+	struct xfs_repair_find_ag_btree	*fab;
+	xfs_agblock_t			b;
+	bool				found_it;
+	int				error = 0;
+
+	/* Ignore anything that isn't AG metadata. */
+	if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner))
+		return 0;
+
+	/* Otherwise scan each block + btree type. */
+	for (b = 0; b < rec->rm_blockcount; b++) {
+		found_it = false;
+		for (fab = ri->btree_info; fab->buf_ops; fab++) {
+			if (rec->rm_owner != fab->rmap_owner)
+				continue;
+			error = xfs_repair_findroot_block(ri, fab,
+					rec->rm_owner, rec->rm_startblock + b,
+					&found_it);
+			if (error)
+				return error;
+			if (found_it)
+				break;
+		}
+	}
+
+	return 0;
+}
+
+/* Find the roots of the per-AG btrees described in btree_info. */
+int
+xfs_repair_find_ag_btree_roots(
+	struct xfs_scrub_context	*sc,
+	struct xfs_buf			*agf_bp,
+	struct xfs_repair_find_ag_btree	*btree_info,
+	struct xfs_buf			*agfl_bp)
+{
+	struct xfs_mount		*mp = sc->mp;
+	struct xfs_repair_findroot	ri;
+	struct xfs_repair_find_ag_btree	*fab;
+	struct xfs_btree_cur		*cur;
+	int				error;
+
+	ASSERT(xfs_buf_islocked(agf_bp));
+	ASSERT(agfl_bp == NULL || xfs_buf_islocked(agfl_bp));
+
+	ri.sc = sc;
+	ri.btree_info = btree_info;
+	ri.agf = XFS_BUF_TO_AGF(agf_bp);
+	ri.agfl_bp = agfl_bp;
+	for (fab = btree_info; fab->buf_ops; fab++) {
+		ASSERT(agfl_bp || fab->rmap_owner != XFS_RMAP_OWN_AG);
+		ASSERT(XFS_RMAP_NON_INODE_OWNER(fab->rmap_owner));
+		fab->root = NULLAGBLOCK;
+		fab->height = 0;
+	}
+
+	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
+	error = xfs_rmap_query_all(cur, xfs_repair_findroot_rmap, &ri);
+	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+
+	return error;
+}
+
+/* Force a quotacheck the next time we mount. */
+void
+xfs_repair_force_quotacheck(
+	struct xfs_scrub_context	*sc,
+	uint				dqtype)
+{
+	uint				flag;
+
+	flag = xfs_quota_chkd_flag(dqtype);
+	if (!(flag & sc->mp->m_qflags))
+		return;
+
+	sc->mp->m_qflags &= ~flag;
+	spin_lock(&sc->mp->m_sb_lock);
+	sc->mp->m_sb.sb_qflags &= ~flag;
+	spin_unlock(&sc->mp->m_sb_lock);
+	xfs_log_sb(sc->tp);
+}
+
+/*
+ * Attach dquots to this inode, or schedule quotacheck to fix them.
+ *
+ * This function ensures that the appropriate dquots are attached to an inode.
+ * We cannot allow the dquot code to allocate an on-disk dquot block here
+ * because we're already in transaction context with the inode locked.  The
+ * on-disk dquot should already exist anyway.  If the quota code signals
+ * corruption or missing quota information, schedule quotacheck, which will
+ * repair corruptions in the quota metadata.
+ */
+int
+xfs_repair_ino_dqattach(
+	struct xfs_scrub_context	*sc)
+{
+	int				error;
+
+	error = xfs_qm_dqattach_locked(sc->ip, false);
+	switch (error) {
+	case -EFSBADCRC:
+	case -EFSCORRUPTED:
+	case -ENOENT:
+		xfs_err_ratelimited(sc->mp,
+"inode %llu repair encountered quota error %d, quotacheck forced.",
+				(unsigned long long)sc->ip->i_ino, error);
+		if (XFS_IS_UQUOTA_ON(sc->mp) && !sc->ip->i_udquot)
+			xfs_repair_force_quotacheck(sc, XFS_DQ_USER);
+		if (XFS_IS_GQUOTA_ON(sc->mp) && !sc->ip->i_gdquot)
+			xfs_repair_force_quotacheck(sc, XFS_DQ_GROUP);
+		if (XFS_IS_PQUOTA_ON(sc->mp) && !sc->ip->i_pdquot)
+			xfs_repair_force_quotacheck(sc, XFS_DQ_PROJ);
+		/* fall through */
+	case -ESRCH:
+		error = 0;
+		break;
+	default:
+		break;
+	}
+
+	return error;
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
new file mode 100644
index 0000000..f2b0895
--- /dev/null
+++ b/fs/xfs/scrub/repair.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2018 Oracle.  All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#ifndef __XFS_SCRUB_REPAIR_H__
+#define __XFS_SCRUB_REPAIR_H__
+
+static inline int xfs_repair_notsupported(struct xfs_scrub_context *sc)
+{
+	return -EOPNOTSUPP;
+}
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+
+/* Repair helpers */
+
+int xfs_repair_attempt(struct xfs_inode *ip, struct xfs_scrub_context *sc,
+		bool *fixed);
+void xfs_repair_failure(struct xfs_mount *mp);
+int xfs_repair_roll_ag_trans(struct xfs_scrub_context *sc);
+bool xfs_repair_ag_has_space(struct xfs_perag *pag, xfs_extlen_t nr_blocks,
+		enum xfs_ag_resv_type type);
+xfs_extlen_t xfs_repair_calc_ag_resblks(struct xfs_scrub_context *sc);
+int xfs_repair_alloc_ag_block(struct xfs_scrub_context *sc,
+		struct xfs_owner_info *oinfo, xfs_fsblock_t *fsbno,
+		enum xfs_ag_resv_type resv);
+int xfs_repair_init_btblock(struct xfs_scrub_context *sc, xfs_fsblock_t fsb,
+		struct xfs_buf **bpp, xfs_btnum_t btnum,
+		const struct xfs_buf_ops *ops);
+
+struct xfs_repair_extent {
+	struct list_head		list;
+	xfs_fsblock_t			fsbno;
+	xfs_extlen_t			len;
+};
+
+struct xfs_repair_extent_list {
+	struct list_head		list;
+};
+
+static inline void
+xfs_repair_init_extent_list(
+	struct xfs_repair_extent_list	*exlist)
+{
+	INIT_LIST_HEAD(&exlist->list);
+}
+
+#define for_each_xfs_repair_extent_safe(rbe, n, exlist) \
+	list_for_each_entry_safe((rbe), (n), &(exlist)->list, list)
+int xfs_repair_collect_btree_extent(struct xfs_scrub_context *sc,
+		struct xfs_repair_extent_list *btlist, xfs_fsblock_t fsbno,
+		xfs_extlen_t len);
+void xfs_repair_cancel_btree_extents(struct xfs_scrub_context *sc,
+		struct xfs_repair_extent_list *btlist);
+int xfs_repair_subtract_extents(struct xfs_scrub_context *sc,
+		struct xfs_repair_extent_list *exlist,
+		struct xfs_repair_extent_list *sublist);
+int xfs_repair_fix_freelist(struct xfs_scrub_context *sc, bool can_shrink);
+int xfs_repair_invalidate_blocks(struct xfs_scrub_context *sc,
+		struct xfs_repair_extent_list *btlist);
+int xfs_repair_reap_btree_extents(struct xfs_scrub_context *sc,
+		struct xfs_repair_extent_list *exlist,
+		struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
+
+struct xfs_repair_find_ag_btree {
+	/* in: rmap owner of the btree we're looking for */
+	uint64_t			rmap_owner;
+
+	/* in: buffer ops */
+	const struct xfs_buf_ops	*buf_ops;
+
+	/* in: magic number of the btree */
+	uint32_t			magic;
+
+	/* out: the highest btree block found and the tree height */
+	xfs_agblock_t			root;
+	unsigned int			height;
+};
+
+int xfs_repair_find_ag_btree_roots(struct xfs_scrub_context *sc,
+		struct xfs_buf *agf_bp,
+		struct xfs_repair_find_ag_btree *btree_info,
+		struct xfs_buf *agfl_bp);
+void xfs_repair_force_quotacheck(struct xfs_scrub_context *sc, uint dqtype);
+int xfs_repair_ino_dqattach(struct xfs_scrub_context *sc);
+
+/* Metadata repairers */
+
+int xfs_repair_probe(struct xfs_scrub_context *sc);
+int xfs_repair_superblock(struct xfs_scrub_context *sc);
+
+#else
+
+static inline int xfs_repair_attempt(
+	struct xfs_inode		*ip,
+	struct xfs_scrub_context	*sc,
+	bool				*fixed)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void xfs_repair_failure(struct xfs_mount *mp) {}
+
+static inline xfs_extlen_t
+xfs_repair_calc_ag_resblks(
+	struct xfs_scrub_context	*sc)
+{
+	ASSERT(!(sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR));
+	return 0;
+}
+
+#define xfs_repair_probe		xfs_repair_notsupported
+#define xfs_repair_superblock		xfs_repair_notsupported
+
+#endif /* CONFIG_XFS_ONLINE_REPAIR */
+
+#endif	/* __XFS_SCRUB_REPAIR_H__ */
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 8f2a7c3..b376a9a 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -66,7 +66,7 @@
 	bool				is_unwritten;
 	int				error;
 
-	if (!sc->sa.refc_cur)
+	if (!sc->sa.refc_cur || xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	non_inode = XFS_RMAP_NON_INODE_OWNER(irec->rm_owner);
@@ -207,7 +207,7 @@
 	bool				has_rmap;
 	int				error;
 
-	if (!sc->sa.rmap_cur)
+	if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	error = xfs_rmap_record_exists(sc->sa.rmap_cur, bno, len, oinfo,
@@ -250,7 +250,7 @@
 	bool				has_rmap;
 	int				error;
 
-	if (!sc->sa.rmap_cur)
+	if (!sc->sa.rmap_cur || xfs_scrub_skip_xref(sc->sm))
 		return;
 
 	error = xfs_rmap_has_record(sc->sa.rmap_cur, bno, len, &has_rmap);
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 39c41dfe..40f462a 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -66,11 +66,15 @@
 	void				*priv)
 {
 	struct xfs_scrub_context	*sc = priv;
+	xfs_rtblock_t			startblock;
+	xfs_rtblock_t			blockcount;
 
-	if (rec->ar_startblock + rec->ar_blockcount <= rec->ar_startblock ||
-	    !xfs_verify_rtbno(sc->mp, rec->ar_startblock) ||
-	    !xfs_verify_rtbno(sc->mp, rec->ar_startblock +
-			rec->ar_blockcount - 1))
+	startblock = rec->ar_startext * tp->t_mountp->m_sb.sb_rextsize;
+	blockcount = rec->ar_extcount * tp->t_mountp->m_sb.sb_rextsize;
+
+	if (startblock + blockcount <= startblock ||
+	    !xfs_verify_rtbno(sc->mp, startblock) ||
+	    !xfs_verify_rtbno(sc->mp, startblock + blockcount - 1))
 		xfs_scrub_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 	return 0;
 }
@@ -82,6 +86,11 @@
 {
 	int				error;
 
+	/* Invoke the fork scrubber. */
+	error = xfs_scrub_metadata_inode_forks(sc);
+	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+		return error;
+
 	error = xfs_rtalloc_query_all(sc->tp, xfs_scrub_rtbitmap_rec, sc);
 	if (!xfs_scrub_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
 		goto out;
@@ -95,8 +104,35 @@
 xfs_scrub_rtsummary(
 	struct xfs_scrub_context	*sc)
 {
+	struct xfs_inode		*rsumip = sc->mp->m_rsumip;
+	struct xfs_inode		*old_ip = sc->ip;
+	uint				old_ilock_flags = sc->ilock_flags;
+	int				error = 0;
+
+	/*
+	 * We ILOCK'd the rt bitmap ip in the setup routine, now lock the
+	 * rt summary ip in compliance with the rt inode locking rules.
+	 *
+	 * Since we switch sc->ip to rsumip we have to save the old ilock
+	 * flags so that we don't mix up the inode state that @sc tracks.
+	 */
+	sc->ip = rsumip;
+	sc->ilock_flags = XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM;
+	xfs_ilock(sc->ip, sc->ilock_flags);
+
+	/* Invoke the fork scrubber. */
+	error = xfs_scrub_metadata_inode_forks(sc);
+	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+		goto out;
+
 	/* XXX: implement this some day */
-	return -ENOENT;
+	xfs_scrub_set_incomplete(sc);
+out:
+	/* Switch back to the rtbitmap inode and lock flags. */
+	xfs_iunlock(sc->ip, sc->ilock_flags);
+	sc->ilock_flags = old_ilock_flags;
+	sc->ip = old_ip;
+	return error;
 }
 
 
@@ -107,11 +143,23 @@
 	xfs_rtblock_t			fsbno,
 	xfs_extlen_t			len)
 {
+	xfs_rtblock_t			startext;
+	xfs_rtblock_t			endext;
+	xfs_rtblock_t			extcount;
 	bool				is_free;
 	int				error;
 
+	if (xfs_scrub_skip_xref(sc->sm))
+		return;
+
+	startext = fsbno;
+	endext = fsbno + len - 1;
+	do_div(startext, sc->mp->m_sb.sb_rextsize);
+	if (do_div(endext, sc->mp->m_sb.sb_rextsize))
+		endext++;
+	extcount = endext - startext;
 	xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
-	error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, fsbno, len,
+	error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, startext, extcount,
 			&is_free);
 	if (!xfs_scrub_should_check_xref(sc, &error, NULL))
 		goto out_unlock;
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 26c7596..36db098 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -42,11 +42,18 @@
 #include "xfs_refcount_btree.h"
 #include "xfs_rmap.h"
 #include "xfs_rmap_btree.h"
+#include "xfs_quota.h"
+#include "xfs_qm.h"
+#include "xfs_errortag.h"
+#include "xfs_error.h"
+#include "xfs_log.h"
+#include "xfs_trans_priv.h"
 #include "scrub/xfs_scrub.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
 #include "scrub/btree.h"
+#include "scrub/repair.h"
 
 /*
  * Online Scrub and Repair
@@ -120,6 +127,24 @@
  * XCORRUPT flag; btree query function errors are noted by setting the
  * XFAIL flag and deleting the cursor to prevent further attempts to
  * cross-reference with a defective btree.
+ *
+ * If a piece of metadata proves corrupt or suboptimal, the userspace
+ * program can ask the kernel to apply some tender loving care (TLC) to
+ * the metadata object by setting the REPAIR flag and re-calling the
+ * scrub ioctl.  "Corruption" is defined by metadata violating the
+ * on-disk specification; operations cannot continue if the violation is
+ * left untreated.  It is possible for XFS to continue if an object is
+ * "suboptimal", however performance may be degraded.  Repairs are
+ * usually performed by rebuilding the metadata entirely out of
+ * redundant metadata.  Optimizing, on the other hand, can sometimes be
+ * done without rebuilding entire structures.
+ *
+ * Generally speaking, the repair code has the following code structure:
+ * Lock -> scrub -> repair -> commit -> re-lock -> re-scrub -> unlock.
+ * The first check helps us figure out if we need to rebuild or simply
+ * optimize the structure so that the rebuild knows what to do.  The
+ * second check evaluates the completeness of the repair; that is what
+ * is reported to userspace.
  */
 
 /*
@@ -155,7 +180,10 @@
 {
 	xfs_scrub_ag_free(sc, &sc->sa);
 	if (sc->tp) {
-		xfs_trans_cancel(sc->tp);
+		if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
+			error = xfs_trans_commit(sc->tp);
+		else
+			xfs_trans_cancel(sc->tp);
 		sc->tp = NULL;
 	}
 	if (sc->ip) {
@@ -166,6 +194,8 @@
 			iput(VFS_I(sc->ip));
 		sc->ip = NULL;
 	}
+	if (sc->has_quotaofflock)
+		mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
 	if (sc->buf) {
 		kmem_free(sc->buf);
 		sc->buf = NULL;
@@ -180,126 +210,150 @@
 		.type	= ST_NONE,
 		.setup	= xfs_scrub_setup_fs,
 		.scrub	= xfs_scrub_probe,
+		.repair = xfs_repair_probe,
 	},
 	[XFS_SCRUB_TYPE_SB] = {		/* superblock */
 		.type	= ST_PERAG,
 		.setup	= xfs_scrub_setup_fs,
 		.scrub	= xfs_scrub_superblock,
+		.repair	= xfs_repair_superblock,
 	},
 	[XFS_SCRUB_TYPE_AGF] = {	/* agf */
 		.type	= ST_PERAG,
 		.setup	= xfs_scrub_setup_fs,
 		.scrub	= xfs_scrub_agf,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_AGFL]= {	/* agfl */
 		.type	= ST_PERAG,
 		.setup	= xfs_scrub_setup_fs,
 		.scrub	= xfs_scrub_agfl,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_AGI] = {	/* agi */
 		.type	= ST_PERAG,
 		.setup	= xfs_scrub_setup_fs,
 		.scrub	= xfs_scrub_agi,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_BNOBT] = {	/* bnobt */
 		.type	= ST_PERAG,
 		.setup	= xfs_scrub_setup_ag_allocbt,
 		.scrub	= xfs_scrub_bnobt,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_CNTBT] = {	/* cntbt */
 		.type	= ST_PERAG,
 		.setup	= xfs_scrub_setup_ag_allocbt,
 		.scrub	= xfs_scrub_cntbt,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_INOBT] = {	/* inobt */
 		.type	= ST_PERAG,
 		.setup	= xfs_scrub_setup_ag_iallocbt,
 		.scrub	= xfs_scrub_inobt,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_FINOBT] = {	/* finobt */
 		.type	= ST_PERAG,
 		.setup	= xfs_scrub_setup_ag_iallocbt,
 		.scrub	= xfs_scrub_finobt,
 		.has	= xfs_sb_version_hasfinobt,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_RMAPBT] = {	/* rmapbt */
 		.type	= ST_PERAG,
 		.setup	= xfs_scrub_setup_ag_rmapbt,
 		.scrub	= xfs_scrub_rmapbt,
 		.has	= xfs_sb_version_hasrmapbt,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_REFCNTBT] = {	/* refcountbt */
 		.type	= ST_PERAG,
 		.setup	= xfs_scrub_setup_ag_refcountbt,
 		.scrub	= xfs_scrub_refcountbt,
 		.has	= xfs_sb_version_hasreflink,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_INODE] = {	/* inode record */
 		.type	= ST_INODE,
 		.setup	= xfs_scrub_setup_inode,
 		.scrub	= xfs_scrub_inode,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_BMBTD] = {	/* inode data fork */
 		.type	= ST_INODE,
 		.setup	= xfs_scrub_setup_inode_bmap,
 		.scrub	= xfs_scrub_bmap_data,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_BMBTA] = {	/* inode attr fork */
 		.type	= ST_INODE,
 		.setup	= xfs_scrub_setup_inode_bmap,
 		.scrub	= xfs_scrub_bmap_attr,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_BMBTC] = {	/* inode CoW fork */
 		.type	= ST_INODE,
 		.setup	= xfs_scrub_setup_inode_bmap,
 		.scrub	= xfs_scrub_bmap_cow,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_DIR] = {	/* directory */
 		.type	= ST_INODE,
 		.setup	= xfs_scrub_setup_directory,
 		.scrub	= xfs_scrub_directory,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_XATTR] = {	/* extended attributes */
 		.type	= ST_INODE,
 		.setup	= xfs_scrub_setup_xattr,
 		.scrub	= xfs_scrub_xattr,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_SYMLINK] = {	/* symbolic link */
 		.type	= ST_INODE,
 		.setup	= xfs_scrub_setup_symlink,
 		.scrub	= xfs_scrub_symlink,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_PARENT] = {	/* parent pointers */
 		.type	= ST_INODE,
 		.setup	= xfs_scrub_setup_parent,
 		.scrub	= xfs_scrub_parent,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_RTBITMAP] = {	/* realtime bitmap */
 		.type	= ST_FS,
 		.setup	= xfs_scrub_setup_rt,
 		.scrub	= xfs_scrub_rtbitmap,
 		.has	= xfs_sb_version_hasrealtime,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_RTSUM] = {	/* realtime summary */
 		.type	= ST_FS,
 		.setup	= xfs_scrub_setup_rt,
 		.scrub	= xfs_scrub_rtsummary,
 		.has	= xfs_sb_version_hasrealtime,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_UQUOTA] = {	/* user quota */
 		.type	= ST_FS,
 		.setup	= xfs_scrub_setup_quota,
 		.scrub	= xfs_scrub_quota,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_GQUOTA] = {	/* group quota */
 		.type	= ST_FS,
 		.setup	= xfs_scrub_setup_quota,
 		.scrub	= xfs_scrub_quota,
+		.repair	= xfs_repair_notsupported,
 	},
 	[XFS_SCRUB_TYPE_PQUOTA] = {	/* project quota */
 		.type	= ST_FS,
 		.setup	= xfs_scrub_setup_quota,
 		.scrub	= xfs_scrub_quota,
+		.repair	= xfs_repair_notsupported,
 	},
 };
 
@@ -379,15 +433,54 @@
 	if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
 		goto out;
 
-	/* We don't know how to repair anything yet. */
-	if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
-		goto out;
+	/*
+	 * We only want to repair read-write v5+ filesystems.  Defer the check
+	 * for ops->repair until after our scrub confirms that we need to
+	 * perform repairs so that we avoid failing due to not supporting
+	 * repairing an object that doesn't need repairs.
+	 */
+	if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
+		error = -EOPNOTSUPP;
+		if (!xfs_sb_version_hascrc(&mp->m_sb))
+			goto out;
+
+		error = -EROFS;
+		if (mp->m_flags & XFS_MOUNT_RDONLY)
+			goto out;
+	}
 
 	error = 0;
 out:
 	return error;
 }
 
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+static inline void xfs_scrub_postmortem(struct xfs_scrub_context *sc)
+{
+	/*
+	 * Userspace asked us to repair something, we repaired it, rescanned
+	 * it, and the rescan says it's still broken.  Scream about this in
+	 * the system logs.
+	 */
+	if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
+	    (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
+				 XFS_SCRUB_OFLAG_XCORRUPT)))
+		xfs_repair_failure(sc->mp);
+}
+#else
+static inline void xfs_scrub_postmortem(struct xfs_scrub_context *sc)
+{
+	/*
+	 * Userspace asked us to scrub something, it's broken, and we have no
+	 * way of fixing it.  Scream in the logs.
+	 */
+	if (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
+				XFS_SCRUB_OFLAG_XCORRUPT))
+		xfs_alert_ratelimited(sc->mp,
+				"Corruption detected during scrub.");
+}
+#endif /* CONFIG_XFS_ONLINE_REPAIR */
+
 /* Dispatch metadata scrubbing. */
 int
 xfs_scrub_metadata(
@@ -397,6 +490,7 @@
 	struct xfs_scrub_context	sc;
 	struct xfs_mount		*mp = ip->i_mount;
 	bool				try_harder = false;
+	bool				already_fixed = false;
 	int				error = 0;
 
 	BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
@@ -446,10 +540,44 @@
 	} else if (error)
 		goto out_teardown;
 
-	if (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
-			       XFS_SCRUB_OFLAG_XCORRUPT))
-		xfs_alert_ratelimited(mp, "Corruption detected during scrub.");
+	if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && !already_fixed) {
+		bool needs_fix;
 
+		/* Let debug users force us into the repair routines. */
+		if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
+			sc.sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+
+		needs_fix = (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
+						XFS_SCRUB_OFLAG_XCORRUPT |
+						XFS_SCRUB_OFLAG_PREEN));
+		/*
+		 * If userspace asked for a repair but it wasn't necessary,
+		 * report that back to userspace.
+		 */
+		if (!needs_fix) {
+			sc.sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED;
+			goto out_nofix;
+		}
+
+		/*
+		 * If it's broken, userspace wants us to fix it, and we haven't
+		 * already tried to fix it, then attempt a repair.
+		 */
+		error = xfs_repair_attempt(ip, &sc, &already_fixed);
+		if (error == -EAGAIN) {
+			if (sc.try_harder)
+				try_harder = true;
+			error = xfs_scrub_teardown(&sc, ip, 0);
+			if (error) {
+				xfs_repair_failure(mp);
+				goto out;
+			}
+			goto retry_op;
+		}
+	}
+
+out_nofix:
+	xfs_scrub_postmortem(&sc);
 out_teardown:
 	error = xfs_scrub_teardown(&sc, ip, error);
 out:
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 0d92af8..636424d 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -38,6 +38,9 @@
 	/* Examine metadata for errors. */
 	int		(*scrub)(struct xfs_scrub_context *);
 
+	/* Repair or optimize the metadata. */
+	int		(*repair)(struct xfs_scrub_context *);
+
 	/* Decide if we even have this piece of metadata. */
 	bool		(*has)(struct xfs_sb *);
 
@@ -48,6 +51,7 @@
 /* Buffer pointers and btree cursors for an entire AG. */
 struct xfs_scrub_ag {
 	xfs_agnumber_t			agno;
+	struct xfs_perag		*pag;
 
 	/* AG btree roots */
 	struct xfs_buf			*agf_bp;
@@ -73,6 +77,7 @@
 	void				*buf;
 	uint				ilock_flags;
 	bool				try_harder;
+	bool				has_quotaofflock;
 
 	/* State tracking for single-AG operations. */
 	struct xfs_scrub_ag		sa;
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 5d2b1c2..794d56b 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -69,6 +69,8 @@
 DEFINE_SCRUB_EVENT(xfs_scrub_start);
 DEFINE_SCRUB_EVENT(xfs_scrub_done);
 DEFINE_SCRUB_EVENT(xfs_scrub_deadlock_retry);
+DEFINE_SCRUB_EVENT(xfs_repair_attempt);
+DEFINE_SCRUB_EVENT(xfs_repair_done);
 
 TRACE_EVENT(xfs_scrub_op_error,
 	TP_PROTO(struct xfs_scrub_context *sc, xfs_agnumber_t agno,
@@ -492,6 +494,262 @@
 		  __entry->ret_ip)
 );
 
+/* repair tracepoints */
+#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
+
+DECLARE_EVENT_CLASS(xfs_repair_extent_class,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_agblock_t agbno, xfs_extlen_t len),
+	TP_ARGS(mp, agno, agbno, len),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, agbno)
+		__field(xfs_extlen_t, len)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->agbno = agbno;
+		__entry->len = len;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u len %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->len)
+);
+#define DEFINE_REPAIR_EXTENT_EVENT(name) \
+DEFINE_EVENT(xfs_repair_extent_class, name, \
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+		 xfs_agblock_t agbno, xfs_extlen_t len), \
+	TP_ARGS(mp, agno, agbno, len))
+DEFINE_REPAIR_EXTENT_EVENT(xfs_repair_dispose_btree_extent);
+DEFINE_REPAIR_EXTENT_EVENT(xfs_repair_collect_btree_extent);
+DEFINE_REPAIR_EXTENT_EVENT(xfs_repair_agfl_insert);
+
+DECLARE_EVENT_CLASS(xfs_repair_rmap_class,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_agblock_t agbno, xfs_extlen_t len,
+		 uint64_t owner, uint64_t offset, unsigned int flags),
+	TP_ARGS(mp, agno, agbno, len, owner, offset, flags),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, agbno)
+		__field(xfs_extlen_t, len)
+		__field(uint64_t, owner)
+		__field(uint64_t, offset)
+		__field(unsigned int, flags)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->agbno = agbno;
+		__entry->len = len;
+		__entry->owner = owner;
+		__entry->offset = offset;
+		__entry->flags = flags;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%x",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->len,
+		  __entry->owner,
+		  __entry->offset,
+		  __entry->flags)
+);
+#define DEFINE_REPAIR_RMAP_EVENT(name) \
+DEFINE_EVENT(xfs_repair_rmap_class, name, \
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
+		 xfs_agblock_t agbno, xfs_extlen_t len, \
+		 uint64_t owner, uint64_t offset, unsigned int flags), \
+	TP_ARGS(mp, agno, agbno, len, owner, offset, flags))
+DEFINE_REPAIR_RMAP_EVENT(xfs_repair_alloc_extent_fn);
+DEFINE_REPAIR_RMAP_EVENT(xfs_repair_ialloc_extent_fn);
+DEFINE_REPAIR_RMAP_EVENT(xfs_repair_rmap_extent_fn);
+DEFINE_REPAIR_RMAP_EVENT(xfs_repair_bmap_extent_fn);
+
+TRACE_EVENT(xfs_repair_refcount_extent_fn,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 struct xfs_refcount_irec *irec),
+	TP_ARGS(mp, agno, irec),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, startblock)
+		__field(xfs_extlen_t, blockcount)
+		__field(xfs_nlink_t, refcount)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->startblock = irec->rc_startblock;
+		__entry->blockcount = irec->rc_blockcount;
+		__entry->refcount = irec->rc_refcount;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u len %u refcount %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->startblock,
+		  __entry->blockcount,
+		  __entry->refcount)
+)
+
+TRACE_EVENT(xfs_repair_init_btblock,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
+		 xfs_btnum_t btnum),
+	TP_ARGS(mp, agno, agbno, btnum),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, agbno)
+		__field(uint32_t, btnum)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->agbno = agbno;
+		__entry->btnum = btnum;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u btnum %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->btnum)
+)
+TRACE_EVENT(xfs_repair_findroot_block,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
+		 uint32_t magic, uint16_t level),
+	TP_ARGS(mp, agno, agbno, magic, level),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, agbno)
+		__field(uint32_t, magic)
+		__field(uint16_t, level)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->agbno = agbno;
+		__entry->magic = magic;
+		__entry->level = level;
+	),
+	TP_printk("dev %d:%d agno %u agbno %u magic 0x%x level %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->agbno,
+		  __entry->magic,
+		  __entry->level)
+)
+TRACE_EVENT(xfs_repair_calc_ag_resblks,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_agino_t icount, xfs_agblock_t aglen, xfs_agblock_t freelen,
+		 xfs_agblock_t usedlen),
+	TP_ARGS(mp, agno, icount, aglen, freelen, usedlen),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agino_t, icount)
+		__field(xfs_agblock_t, aglen)
+		__field(xfs_agblock_t, freelen)
+		__field(xfs_agblock_t, usedlen)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->icount = icount;
+		__entry->aglen = aglen;
+		__entry->freelen = freelen;
+		__entry->usedlen = usedlen;
+	),
+	TP_printk("dev %d:%d agno %d icount %u aglen %u freelen %u usedlen %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->icount,
+		  __entry->aglen,
+		  __entry->freelen,
+		  __entry->usedlen)
+)
+TRACE_EVENT(xfs_repair_calc_ag_resblks_btsize,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_agblock_t bnobt_sz, xfs_agblock_t inobt_sz,
+		 xfs_agblock_t rmapbt_sz, xfs_agblock_t refcbt_sz),
+	TP_ARGS(mp, agno, bnobt_sz, inobt_sz, rmapbt_sz, refcbt_sz),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agblock_t, bnobt_sz)
+		__field(xfs_agblock_t, inobt_sz)
+		__field(xfs_agblock_t, rmapbt_sz)
+		__field(xfs_agblock_t, refcbt_sz)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->bnobt_sz = bnobt_sz;
+		__entry->inobt_sz = inobt_sz;
+		__entry->rmapbt_sz = rmapbt_sz;
+		__entry->refcbt_sz = refcbt_sz;
+	),
+	TP_printk("dev %d:%d agno %d bno %u ino %u rmap %u refcount %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->bnobt_sz,
+		  __entry->inobt_sz,
+		  __entry->rmapbt_sz,
+		  __entry->refcbt_sz)
+)
+TRACE_EVENT(xfs_repair_reset_counters,
+	TP_PROTO(struct xfs_mount *mp),
+	TP_ARGS(mp),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+	),
+	TP_printk("dev %d:%d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev))
+)
+
+TRACE_EVENT(xfs_repair_ialloc_insert,
+	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+		 xfs_agino_t startino, uint16_t holemask, uint8_t count,
+		 uint8_t freecount, uint64_t freemask),
+	TP_ARGS(mp, agno, startino, holemask, count, freecount, freemask),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_agnumber_t, agno)
+		__field(xfs_agino_t, startino)
+		__field(uint16_t, holemask)
+		__field(uint8_t, count)
+		__field(uint8_t, freecount)
+		__field(uint64_t, freemask)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->agno = agno;
+		__entry->startino = startino;
+		__entry->holemask = holemask;
+		__entry->count = count;
+		__entry->freecount = freecount;
+		__entry->freemask = freemask;
+	),
+	TP_printk("dev %d:%d agno %d startino %u holemask 0x%x count %u freecount %u freemask 0x%llx",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->agno,
+		  __entry->startino,
+		  __entry->holemask,
+		  __entry->count,
+		  __entry->freecount,
+		  __entry->freemask)
+)
+
+#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
+
 #endif /* _TRACE_XFS_SCRUB_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0ab824f..ca69037 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -594,7 +594,7 @@
 	struct xfs_ioend	*ioend;
 	struct bio		*bio;
 
-	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
+	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
 	xfs_init_bio_from_bh(bio, bh);
 
 	ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
@@ -1378,10 +1378,9 @@
 	struct address_space	*mapping,
 	sector_t		block)
 {
-	struct inode		*inode = (struct inode *)mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_inode	*ip = XFS_I(mapping->host);
 
-	trace_xfs_vm_bmap(XFS_I(inode));
+	trace_xfs_vm_bmap(ip);
 
 	/*
 	 * The swap code (ab-)uses ->bmap to get a block mapping and then
@@ -1394,9 +1393,7 @@
 	 */
 	if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip))
 		return 0;
-
-	filemap_write_and_wait(mapping);
-	return generic_block_bmap(mapping, block, xfs_get_blocks);
+	return iomap_bmap(mapping, block, &xfs_iomap_ops);
 }
 
 STATIC int
@@ -1475,6 +1472,16 @@
 	return newly_dirty;
 }
 
+static int
+xfs_iomap_swapfile_activate(
+	struct swap_info_struct		*sis,
+	struct file			*swap_file,
+	sector_t			*span)
+{
+	sis->bdev = xfs_find_bdev_for_inode(file_inode(swap_file));
+	return iomap_swapfile_activate(sis, swap_file, span, &xfs_iomap_ops);
+}
+
 const struct address_space_operations xfs_address_space_operations = {
 	.readpage		= xfs_vm_readpage,
 	.readpages		= xfs_vm_readpages,
@@ -1488,6 +1495,7 @@
 	.migratepage		= buffer_migrate_page,
 	.is_partially_uptodate  = block_is_partially_uptodate,
 	.error_remove_page	= generic_error_remove_page,
+	.swap_activate		= xfs_iomap_swapfile_activate,
 };
 
 const struct address_space_operations xfs_dax_aops = {
@@ -1495,4 +1503,5 @@
 	.direct_IO		= noop_direct_IO,
 	.set_page_dirty		= noop_set_page_dirty,
 	.invalidatepage		= noop_invalidatepage,
+	.swap_activate		= xfs_iomap_swapfile_activate,
 };
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 69346d4..694c85b 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -18,7 +18,7 @@
 #ifndef __XFS_AOPS_H__
 #define __XFS_AOPS_H__
 
-extern struct bio_set *xfs_ioend_bioset;
+extern struct bio_set xfs_ioend_bioset;
 
 /*
  * Types of I/O for bmap clustering and I/O completion tracking.
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 2203465..618bb71 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -160,7 +160,7 @@
 xfs_bui_item_unlock(
 	struct xfs_log_item	*lip)
 {
-	if (lip->li_flags & XFS_LI_ABORTED)
+	if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
 		xfs_bui_release(BUI_ITEM(lip));
 }
 
@@ -305,7 +305,7 @@
 {
 	struct xfs_bud_log_item	*budp = BUD_ITEM(lip);
 
-	if (lip->li_flags & XFS_LI_ABORTED) {
+	if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
 		xfs_bui_release(budp->bud_buip);
 		kmem_zone_free(xfs_bud_zone, budp);
 	}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 8cd8c41..06badcb 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -848,7 +848,7 @@
 		/*
 		 * Attach the dquots to the inode up front.
 		 */
-		error = xfs_qm_dqattach(ip, 0);
+		error = xfs_qm_dqattach(ip);
 		if (error)
 			return error;
 
@@ -871,8 +871,8 @@
 		 * contents of the file are flushed to disk then the files
 		 * may be full of holes (ie NULL files bug).
 		 */
-		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
-					      XFS_ISIZE(ip));
+		error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK,
+					XFS_ISIZE(ip), XFS_BMAPI_NODISCARD);
 		if (error) {
 			/*
 			 * If we get an error at this point we simply don't
@@ -918,7 +918,7 @@
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	error = xfs_qm_dqattach(ip, 0);
+	error = xfs_qm_dqattach(ip);
 	if (error)
 		return error;
 
@@ -1169,7 +1169,7 @@
 
 	trace_xfs_free_file_space(ip);
 
-	error = xfs_qm_dqattach(ip, 0);
+	error = xfs_qm_dqattach(ip);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 55661cb..5179ab9 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -549,17 +549,31 @@
 }
 
 /*
- *	Look up, and creates if absent, a lockable buffer for
- *	a given range of an inode.  The buffer is returned
- *	locked.	No I/O is implied by this call.
+ * Look up a buffer in the buffer cache and return it referenced and locked
+ * in @found_bp.
+ *
+ * If @new_bp is supplied and we have a lookup miss, insert @new_bp into the
+ * cache.
+ *
+ * If XBF_TRYLOCK is set in @flags, only try to lock the buffer and return
+ * -EAGAIN if we fail to lock it.
+ *
+ * Return values are:
+ *	-EFSCORRUPTED if have been supplied with an invalid address
+ *	-EAGAIN on trylock failure
+ *	-ENOENT if we fail to find a match and @new_bp was NULL
+ *	0, with @found_bp:
+ *		- @new_bp if we inserted it into the cache
+ *		- the buffer we found and locked.
  */
-xfs_buf_t *
-_xfs_buf_find(
+static int
+xfs_buf_find(
 	struct xfs_buftarg	*btp,
 	struct xfs_buf_map	*map,
 	int			nmaps,
 	xfs_buf_flags_t		flags,
-	xfs_buf_t		*new_bp)
+	struct xfs_buf		*new_bp,
+	struct xfs_buf		**found_bp)
 {
 	struct xfs_perag	*pag;
 	xfs_buf_t		*bp;
@@ -567,6 +581,8 @@
 	xfs_daddr_t		eofs;
 	int			i;
 
+	*found_bp = NULL;
+
 	for (i = 0; i < nmaps; i++)
 		cmap.bm_len += map[i].bm_len;
 
@@ -580,16 +596,11 @@
 	 */
 	eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
 	if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
-		/*
-		 * XXX (dgc): we should really be returning -EFSCORRUPTED here,
-		 * but none of the higher level infrastructure supports
-		 * returning a specific error on buffer lookup failures.
-		 */
 		xfs_alert(btp->bt_mount,
 			  "%s: daddr 0x%llx out of range, EOFS 0x%llx",
 			  __func__, cmap.bm_bn, eofs);
 		WARN_ON(1);
-		return NULL;
+		return -EFSCORRUPTED;
 	}
 
 	pag = xfs_perag_get(btp->bt_mount,
@@ -604,19 +615,20 @@
 	}
 
 	/* No match found */
-	if (new_bp) {
-		/* the buffer keeps the perag reference until it is freed */
-		new_bp->b_pag = pag;
-		rhashtable_insert_fast(&pag->pag_buf_hash,
-				       &new_bp->b_rhash_head,
-				       xfs_buf_hash_params);
-		spin_unlock(&pag->pag_buf_lock);
-	} else {
+	if (!new_bp) {
 		XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
 		spin_unlock(&pag->pag_buf_lock);
 		xfs_perag_put(pag);
+		return -ENOENT;
 	}
-	return new_bp;
+
+	/* the buffer keeps the perag reference until it is freed */
+	new_bp->b_pag = pag;
+	rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head,
+			       xfs_buf_hash_params);
+	spin_unlock(&pag->pag_buf_lock);
+	*found_bp = new_bp;
+	return 0;
 
 found:
 	spin_unlock(&pag->pag_buf_lock);
@@ -626,7 +638,7 @@
 		if (flags & XBF_TRYLOCK) {
 			xfs_buf_rele(bp);
 			XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
-			return NULL;
+			return -EAGAIN;
 		}
 		xfs_buf_lock(bp);
 		XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
@@ -646,6 +658,24 @@
 
 	trace_xfs_buf_find(bp, flags, _RET_IP_);
 	XFS_STATS_INC(btp->bt_mount, xb_get_locked);
+	*found_bp = bp;
+	return 0;
+}
+
+struct xfs_buf *
+xfs_buf_incore(
+	struct xfs_buftarg	*target,
+	xfs_daddr_t		blkno,
+	size_t			numblks,
+	xfs_buf_flags_t		flags)
+{
+	struct xfs_buf		*bp;
+	int			error;
+	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+
+	error = xfs_buf_find(target, &map, 1, flags, NULL, &bp);
+	if (error)
+		return NULL;
 	return bp;
 }
 
@@ -665,9 +695,27 @@
 	struct xfs_buf		*new_bp;
 	int			error = 0;
 
-	bp = _xfs_buf_find(target, map, nmaps, flags, NULL);
-	if (likely(bp))
+	error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
+
+	switch (error) {
+	case 0:
+		/* cache hit */
 		goto found;
+	case -EAGAIN:
+		/* cache hit, trylock failure, caller handles failure */
+		ASSERT(flags & XBF_TRYLOCK);
+		return NULL;
+	case -ENOENT:
+		/* cache miss, go for insert */
+		break;
+	case -EFSCORRUPTED:
+	default:
+		/*
+		 * None of the higher layers understand failure types
+		 * yet, so return NULL to signal a fatal lookup error.
+		 */
+		return NULL;
+	}
 
 	new_bp = _xfs_buf_alloc(target, map, nmaps, flags);
 	if (unlikely(!new_bp))
@@ -679,8 +727,8 @@
 		return NULL;
 	}
 
-	bp = _xfs_buf_find(target, map, nmaps, flags, new_bp);
-	if (!bp) {
+	error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
+	if (error) {
 		xfs_buf_free(new_bp);
 		return NULL;
 	}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index edced16..f5f2b71 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -218,20 +218,9 @@
 } xfs_buf_t;
 
 /* Finding and Reading Buffers */
-struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target,
-			      struct xfs_buf_map *map, int nmaps,
-			      xfs_buf_flags_t flags, struct xfs_buf *new_bp);
-
-static inline struct xfs_buf *
-xfs_incore(
-	struct xfs_buftarg	*target,
-	xfs_daddr_t		blkno,
-	size_t			numblks,
-	xfs_buf_flags_t		flags)
-{
-	DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
-	return _xfs_buf_find(target, &map, 1, flags, NULL);
-}
+struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target,
+			   xfs_daddr_t blkno, size_t numblks,
+			   xfs_buf_flags_t flags);
 
 struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target,
 			       struct xfs_buf_map *map, int nmaps,
@@ -358,6 +347,18 @@
 
 void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref);
 
+/*
+ * If the buffer is already on the LRU, do nothing. Otherwise set the buffer
+ * up with a reference count of 0 so it will be tossed from the cache when
+ * released.
+ */
+static inline void xfs_buf_oneshot(struct xfs_buf *bp)
+{
+	if (!list_empty(&bp->b_lru) || atomic_read(&bp->b_lru_ref) > 1)
+		return;
+	atomic_set(&bp->b_lru_ref, 0);
+}
+
 static inline int xfs_buf_ispinned(struct xfs_buf *bp)
 {
 	return atomic_read(&bp->b_pin_count);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 82ad270..c231137 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -438,7 +438,7 @@
 			 * xfs_trans_uncommit() will try to reference the
 			 * buffer which we no longer have a hold on.
 			 */
-			if (lip->li_desc)
+			if (!list_empty(&lip->li_trans))
 				xfs_trans_del_item(lip);
 
 			/*
@@ -568,13 +568,15 @@
 {
 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
 	struct xfs_buf		*bp = bip->bli_buf;
-	bool			aborted = !!(lip->li_flags & XFS_LI_ABORTED);
+	bool			aborted;
 	bool			hold = !!(bip->bli_flags & XFS_BLI_HOLD);
 	bool			dirty = !!(bip->bli_flags & XFS_BLI_DIRTY);
 #if defined(DEBUG) || defined(XFS_WARN)
 	bool			ordered = !!(bip->bli_flags & XFS_BLI_ORDERED);
 #endif
 
+	aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags);
+
 	/* Clear the buffer's association with this transaction. */
 	bp->b_transp = NULL;
 
@@ -743,8 +745,10 @@
 	 * nothing to do here so return.
 	 */
 	ASSERT(bp->b_target->bt_mount == mp);
-	if (bip != NULL) {
+	if (bip) {
 		ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
+		ASSERT(!bp->b_transp);
+		ASSERT(bip->bli_buf == bp);
 		return 0;
 	}
 
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index a7daef9..2567391 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -288,49 +288,43 @@
 }
 
 /*
- * Allocate a block and fill it with dquots.
- * This is called when the bmapi finds a hole.
+ * Ensure that the given in-core dquot has a buffer on disk backing it, and
+ * return the buffer. This is called when the bmapi finds a hole.
  */
 STATIC int
-xfs_qm_dqalloc(
-	xfs_trans_t	**tpp,
-	xfs_mount_t	*mp,
-	xfs_dquot_t	*dqp,
-	xfs_inode_t	*quotip,
-	xfs_fileoff_t	offset_fsb,
-	xfs_buf_t	**O_bpp)
+xfs_dquot_disk_alloc(
+	struct xfs_trans	**tpp,
+	struct xfs_dquot	*dqp,
+	struct xfs_buf		**bpp)
 {
-	xfs_fsblock_t	firstblock;
-	struct xfs_defer_ops dfops;
-	xfs_bmbt_irec_t map;
-	int		nmaps, error;
-	xfs_buf_t	*bp;
-	xfs_trans_t	*tp = *tpp;
-
-	ASSERT(tp != NULL);
+	struct xfs_bmbt_irec	map;
+	struct xfs_defer_ops	dfops;
+	struct xfs_mount	*mp = (*tpp)->t_mountp;
+	struct xfs_buf		*bp;
+	struct xfs_inode	*quotip = xfs_quota_inode(mp, dqp->dq_flags);
+	xfs_fsblock_t		firstblock;
+	int			nmaps = 1;
+	int			error;
 
 	trace_xfs_dqalloc(dqp);
 
-	/*
-	 * Initialize the bmap freelist prior to calling bmapi code.
-	 */
 	xfs_defer_init(&dfops, &firstblock);
 	xfs_ilock(quotip, XFS_ILOCK_EXCL);
-	/*
-	 * Return if this type of quotas is turned off while we didn't
-	 * have an inode lock
-	 */
 	if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
+		/*
+		 * Return if this type of quotas is turned off while we didn't
+		 * have an inode lock
+		 */
 		xfs_iunlock(quotip, XFS_ILOCK_EXCL);
 		return -ESRCH;
 	}
 
-	xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
-	nmaps = 1;
-	error = xfs_bmapi_write(tp, quotip, offset_fsb,
-				XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
-				&firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
-				&map, &nmaps, &dfops);
+	/* Create the block mapping. */
+	xfs_trans_ijoin(*tpp, quotip, XFS_ILOCK_EXCL);
+	error = xfs_bmapi_write(*tpp, quotip, dqp->q_fileoffset,
+			XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
+			&firstblock, XFS_QM_DQALLOC_SPACE_RES(mp),
+			&map, &nmaps, &dfops);
 	if (error)
 		goto error0;
 	ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -344,10 +338,8 @@
 	dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
 
 	/* now we can just get the buffer (there's nothing to read yet) */
-	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
-			       dqp->q_blkno,
-			       mp->m_quotainfo->qi_dqchunklen,
-			       0);
+	bp = xfs_trans_get_buf(*tpp, mp->m_ddev_targp, dqp->q_blkno,
+			mp->m_quotainfo->qi_dqchunklen, 0);
 	if (!bp) {
 		error = -ENOMEM;
 		goto error1;
@@ -358,37 +350,45 @@
 	 * Make a chunk of dquots out of this buffer and log
 	 * the entire thing.
 	 */
-	xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
+	xfs_qm_init_dquot_blk(*tpp, mp, be32_to_cpu(dqp->q_core.d_id),
 			      dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
+	xfs_buf_set_ref(bp, XFS_DQUOT_REF);
 
 	/*
-	 * xfs_defer_finish() may commit the current transaction and
-	 * start a second transaction if the freelist is not empty.
+	 * Hold the buffer and join it to the dfops so that we'll still own
+	 * the buffer when we return to the caller.  The buffer disposal on
+	 * error must be paid attention to very carefully, as it has been
+	 * broken since commit efa092f3d4c6 "[XFS] Fixes a bug in the quota
+	 * code when allocating a new dquot record" in 2005, and the later
+	 * conversion to xfs_defer_ops in commit 310a75a3c6c747 failed to keep
+	 * the buffer locked across the _defer_finish call.  We can now do
+	 * this correctly with xfs_defer_bjoin.
 	 *
-	 * Since we still want to modify this buffer, we need to
-	 * ensure that the buffer is not released on commit of
-	 * the first transaction and ensure the buffer is added to the
-	 * second transaction.
+	 * Above, we allocated a disk block for the dquot information and
+	 * used get_buf to initialize the dquot.  If the _defer_bjoin fails,
+	 * the buffer is still locked to *tpp, so we must _bhold_release and
+	 * then _trans_brelse the buffer.  If the _defer_finish fails, the old
+	 * transaction is gone but the new buffer is not joined or held to any
+	 * transaction, so we must _buf_relse it.
 	 *
-	 * If there is only one transaction then don't stop the buffer
-	 * from being released when it commits later on.
+	 * If everything succeeds, the caller of this function is returned a
+	 * buffer that is locked and held to the transaction.  The caller
+	 * is responsible for unlocking any buffer passed back, either
+	 * manually or by committing the transaction.
 	 */
-
-	xfs_trans_bhold(tp, bp);
-
-	error = xfs_defer_finish(tpp, &dfops);
-	if (error)
+	xfs_trans_bhold(*tpp, bp);
+	error = xfs_defer_bjoin(&dfops, bp);
+	if (error) {
+		xfs_trans_bhold_release(*tpp, bp);
+		xfs_trans_brelse(*tpp, bp);
 		goto error1;
-
-	/* Transaction was committed? */
-	if (*tpp != tp) {
-		tp = *tpp;
-		xfs_trans_bjoin(tp, bp);
-	} else {
-		xfs_trans_bhold_release(tp, bp);
 	}
-
-	*O_bpp = bp;
+	error = xfs_defer_finish(tpp, &dfops);
+	if (error) {
+		xfs_buf_relse(bp);
+		goto error1;
+	}
+	*bpp = bp;
 	return 0;
 
 error1:
@@ -398,32 +398,24 @@
 }
 
 /*
- * Maps a dquot to the buffer containing its on-disk version.
- * This returns a ptr to the buffer containing the on-disk dquot
- * in the bpp param, and a ptr to the on-disk dquot within that buffer
+ * Read in the in-core dquot's on-disk metadata and return the buffer.
+ * Returns ENOENT to signal a hole.
  */
 STATIC int
-xfs_qm_dqtobp(
-	xfs_trans_t		**tpp,
-	xfs_dquot_t		*dqp,
-	xfs_disk_dquot_t	**O_ddpp,
-	xfs_buf_t		**O_bpp,
-	uint			flags)
+xfs_dquot_disk_read(
+	struct xfs_mount	*mp,
+	struct xfs_dquot	*dqp,
+	struct xfs_buf		**bpp)
 {
 	struct xfs_bmbt_irec	map;
-	int			nmaps = 1, error;
 	struct xfs_buf		*bp;
-	struct xfs_inode	*quotip;
-	struct xfs_mount	*mp = dqp->q_mount;
-	xfs_dqid_t		id = be32_to_cpu(dqp->q_core.d_id);
-	struct xfs_trans	*tp = (tpp ? *tpp : NULL);
+	struct xfs_inode	*quotip = xfs_quota_inode(mp, dqp->dq_flags);
 	uint			lock_mode;
-
-	quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
-	dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
+	int			nmaps = 1;
+	int			error;
 
 	lock_mode = xfs_ilock_data_map_shared(quotip);
-	if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {
+	if (!xfs_this_quota_on(mp, dqp->dq_flags)) {
 		/*
 		 * Return if this type of quotas is turned off while we
 		 * didn't have the quota inode lock.
@@ -436,81 +428,48 @@
 	 * Find the block map; no allocations yet
 	 */
 	error = xfs_bmapi_read(quotip, dqp->q_fileoffset,
-			       XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
-
+			XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0);
 	xfs_iunlock(quotip, lock_mode);
 	if (error)
 		return error;
 
 	ASSERT(nmaps == 1);
-	ASSERT(map.br_blockcount == 1);
+	ASSERT(map.br_blockcount >= 1);
+	ASSERT(map.br_startblock != DELAYSTARTBLOCK);
+	if (map.br_startblock == HOLESTARTBLOCK)
+		return -ENOENT;
+
+	trace_xfs_dqtobp_read(dqp);
 
 	/*
-	 * Offset of dquot in the (fixed sized) dquot chunk.
+	 * store the blkno etc so that we don't have to do the
+	 * mapping all the time
 	 */
-	dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
-		sizeof(xfs_dqblk_t);
+	dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
 
-	ASSERT(map.br_startblock != DELAYSTARTBLOCK);
-	if (map.br_startblock == HOLESTARTBLOCK) {
-		/*
-		 * We don't allocate unless we're asked to
-		 */
-		if (!(flags & XFS_QMOPT_DQALLOC))
-			return -ENOENT;
-
-		ASSERT(tp);
-		error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
-					dqp->q_fileoffset, &bp);
-		if (error)
-			return error;
-		tp = *tpp;
-	} else {
-		trace_xfs_dqtobp_read(dqp);
-
-		/*
-		 * store the blkno etc so that we don't have to do the
-		 * mapping all the time
-		 */
-		dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
-		error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-					   dqp->q_blkno,
-					   mp->m_quotainfo->qi_dqchunklen,
-					   0, &bp, &xfs_dquot_buf_ops);
-		if (error) {
-			ASSERT(bp == NULL);
-			return error;
-		}
+	error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
+			mp->m_quotainfo->qi_dqchunklen, 0, &bp,
+			&xfs_dquot_buf_ops);
+	if (error) {
+		ASSERT(bp == NULL);
+		return error;
 	}
 
 	ASSERT(xfs_buf_islocked(bp));
-	*O_bpp = bp;
-	*O_ddpp = bp->b_addr + dqp->q_bufoffset;
+	xfs_buf_set_ref(bp, XFS_DQUOT_REF);
+	*bpp = bp;
 
 	return 0;
 }
 
-
-/*
- * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
- * and release the buffer immediately.
- *
- * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
- */
-int
-xfs_qm_dqread(
+/* Allocate and initialize everything we need for an incore dquot. */
+STATIC struct xfs_dquot *
+xfs_dquot_alloc(
 	struct xfs_mount	*mp,
 	xfs_dqid_t		id,
-	uint			type,
-	uint			flags,
-	struct xfs_dquot	**O_dqpp)
+	uint			type)
 {
 	struct xfs_dquot	*dqp;
-	struct xfs_disk_dquot	*ddqp;
-	struct xfs_buf		*bp;
-	struct xfs_trans	*tp = NULL;
-	int			error;
 
 	dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
 
@@ -520,6 +479,12 @@
 	INIT_LIST_HEAD(&dqp->q_lru);
 	mutex_init(&dqp->q_qlock);
 	init_waitqueue_head(&dqp->q_pinwait);
+	dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
+	/*
+	 * Offset of dquot in the (fixed sized) dquot chunk.
+	 */
+	dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
+			sizeof(xfs_dqblk_t);
 
 	/*
 	 * Because we want to use a counting completion, complete
@@ -548,35 +513,22 @@
 		break;
 	}
 
+	xfs_qm_dquot_logitem_init(dqp);
+
 	XFS_STATS_INC(mp, xs_qm_dquot);
+	return dqp;
+}
 
-	trace_xfs_dqread(dqp);
-
-	if (flags & XFS_QMOPT_DQALLOC) {
-		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
-				XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
-		if (error)
-			goto error0;
-	}
-
-	/*
-	 * get a pointer to the on-disk dquot and the buffer containing it
-	 * dqp already knows its own type (GROUP/USER).
-	 */
-	error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
-	if (error) {
-		/*
-		 * This can happen if quotas got turned off (ESRCH),
-		 * or if the dquot didn't exist on disk and we ask to
-		 * allocate (ENOENT).
-		 */
-		trace_xfs_dqread_fail(dqp);
-		goto error1;
-	}
+/* Copy the in-core quota fields in from the on-disk buffer. */
+STATIC void
+xfs_dquot_from_disk(
+	struct xfs_dquot	*dqp,
+	struct xfs_buf		*bp)
+{
+	struct xfs_disk_dquot	*ddqp = bp->b_addr + dqp->q_bufoffset;
 
 	/* copy everything from disk dquot to the incore dquot */
 	memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
-	xfs_qm_dquot_logitem_init(dqp);
 
 	/*
 	 * Reservation counters are defined as reservation plus current usage
@@ -588,40 +540,90 @@
 
 	/* initialize the dquot speculative prealloc thresholds */
 	xfs_dquot_set_prealloc_limits(dqp);
+}
 
-	/* Mark the buf so that this will stay incore a little longer */
-	xfs_buf_set_ref(bp, XFS_DQUOT_REF);
+/* Allocate and initialize the dquot buffer for this in-core dquot. */
+static int
+xfs_qm_dqread_alloc(
+	struct xfs_mount	*mp,
+	struct xfs_dquot	*dqp,
+	struct xfs_buf		**bpp)
+{
+	struct xfs_trans	*tp;
+	struct xfs_buf		*bp;
+	int			error;
+
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
+			XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
+	if (error)
+		goto err;
+
+	error = xfs_dquot_disk_alloc(&tp, dqp, &bp);
+	if (error)
+		goto err_cancel;
+
+	error = xfs_trans_commit(tp);
+	if (error) {
+		/*
+		 * Buffer was held to the transaction, so we have to unlock it
+		 * manually here because we're not passing it back.
+		 */
+		xfs_buf_relse(bp);
+		goto err;
+	}
+	*bpp = bp;
+	return 0;
+
+err_cancel:
+	xfs_trans_cancel(tp);
+err:
+	return error;
+}
+
+/*
+ * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
+ * and release the buffer immediately.  If @can_alloc is true, fill any
+ * holes in the on-disk metadata.
+ */
+static int
+xfs_qm_dqread(
+	struct xfs_mount	*mp,
+	xfs_dqid_t		id,
+	uint			type,
+	bool			can_alloc,
+	struct xfs_dquot	**dqpp)
+{
+	struct xfs_dquot	*dqp;
+	struct xfs_buf		*bp;
+	int			error;
+
+	dqp = xfs_dquot_alloc(mp, id, type);
+	trace_xfs_dqread(dqp);
+
+	/* Try to read the buffer, allocating if necessary. */
+	error = xfs_dquot_disk_read(mp, dqp, &bp);
+	if (error == -ENOENT && can_alloc)
+		error = xfs_qm_dqread_alloc(mp, dqp, &bp);
+	if (error)
+		goto err;
 
 	/*
-	 * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
-	 * So we need to release with xfs_trans_brelse().
-	 * The strategy here is identical to that of inodes; we lock
-	 * the dquot in xfs_qm_dqget() before making it accessible to
-	 * others. This is because dquots, like inodes, need a good level of
-	 * concurrency, and we don't want to take locks on the entire buffers
-	 * for dquot accesses.
-	 * Note also that the dquot buffer may even be dirty at this point, if
-	 * this particular dquot was repaired. We still aren't afraid to
-	 * brelse it because we have the changes incore.
+	 * At this point we should have a clean locked buffer.  Copy the data
+	 * to the incore dquot and release the buffer since the incore dquot
+	 * has its own locking protocol so we needn't tie up the buffer any
+	 * further.
 	 */
 	ASSERT(xfs_buf_islocked(bp));
-	xfs_trans_brelse(tp, bp);
+	xfs_dquot_from_disk(dqp, bp);
 
-	if (tp) {
-		error = xfs_trans_commit(tp);
-		if (error)
-			goto error0;
-	}
-
-	*O_dqpp = dqp;
+	xfs_buf_relse(bp);
+	*dqpp = dqp;
 	return error;
 
-error1:
-	if (tp)
-		xfs_trans_cancel(tp);
-error0:
+err:
+	trace_xfs_dqread_fail(dqp);
 	xfs_qm_dqdestroy(dqp);
-	*O_dqpp = NULL;
+	*dqpp = NULL;
 	return error;
 }
 
@@ -679,77 +681,230 @@
 }
 
 /*
- * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
- * a locked dquot, doing an allocation (if requested) as needed.
- * When both an inode and an id are given, the inode's id takes precedence.
- * That is, if the id changes while we don't hold the ilock inside this
- * function, the new dquot is returned, not necessarily the one requested
- * in the id argument.
+ * Look up the dquot in the in-core cache.  If found, the dquot is returned
+ * locked and ready to go.
  */
-int
-xfs_qm_dqget(
-	xfs_mount_t	*mp,
-	xfs_inode_t	*ip,	  /* locked inode (optional) */
-	xfs_dqid_t	id,	  /* uid/projid/gid depending on type */
-	uint		type,	  /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
-	uint		flags,	  /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
-	xfs_dquot_t	**O_dqpp) /* OUT : locked incore dquot */
+static struct xfs_dquot *
+xfs_qm_dqget_cache_lookup(
+	struct xfs_mount	*mp,
+	struct xfs_quotainfo	*qi,
+	struct radix_tree_root	*tree,
+	xfs_dqid_t		id)
 {
-	struct xfs_quotainfo	*qi = mp->m_quotainfo;
-	struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
 	struct xfs_dquot	*dqp;
-	int			error;
-
-	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-	if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
-	    (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
-	    (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
-		return -ESRCH;
-	}
-
-	ASSERT(type == XFS_DQ_USER ||
-	       type == XFS_DQ_PROJ ||
-	       type == XFS_DQ_GROUP);
-	if (ip) {
-		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-		ASSERT(xfs_inode_dquot(ip, type) == NULL);
-	}
 
 restart:
 	mutex_lock(&qi->qi_tree_lock);
 	dqp = radix_tree_lookup(tree, id);
-	if (dqp) {
-		xfs_dqlock(dqp);
-		if (dqp->dq_flags & XFS_DQ_FREEING) {
-			xfs_dqunlock(dqp);
-			mutex_unlock(&qi->qi_tree_lock);
-			trace_xfs_dqget_freeing(dqp);
-			delay(1);
-			goto restart;
-		}
-
-		/* uninit / unused quota found in radix tree, keep looking  */
-		if (flags & XFS_QMOPT_DQNEXT) {
-			if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
-				xfs_dqunlock(dqp);
-				mutex_unlock(&qi->qi_tree_lock);
-				error = xfs_dq_get_next_id(mp, type, &id);
-				if (error)
-					return error;
-				goto restart;
-			}
-		}
-
-		dqp->q_nrefs++;
+	if (!dqp) {
 		mutex_unlock(&qi->qi_tree_lock);
+		XFS_STATS_INC(mp, xs_qm_dqcachemisses);
+		return NULL;
+	}
 
-		trace_xfs_dqget_hit(dqp);
-		XFS_STATS_INC(mp, xs_qm_dqcachehits);
+	xfs_dqlock(dqp);
+	if (dqp->dq_flags & XFS_DQ_FREEING) {
+		xfs_dqunlock(dqp);
+		mutex_unlock(&qi->qi_tree_lock);
+		trace_xfs_dqget_freeing(dqp);
+		delay(1);
+		goto restart;
+	}
+
+	dqp->q_nrefs++;
+	mutex_unlock(&qi->qi_tree_lock);
+
+	trace_xfs_dqget_hit(dqp);
+	XFS_STATS_INC(mp, xs_qm_dqcachehits);
+	return dqp;
+}
+
+/*
+ * Try to insert a new dquot into the in-core cache.  If an error occurs the
+ * caller should throw away the dquot and start over.  Otherwise, the dquot
+ * is returned locked (and held by the cache) as if there had been a cache
+ * hit.
+ */
+static int
+xfs_qm_dqget_cache_insert(
+	struct xfs_mount	*mp,
+	struct xfs_quotainfo	*qi,
+	struct radix_tree_root	*tree,
+	xfs_dqid_t		id,
+	struct xfs_dquot	*dqp)
+{
+	int			error;
+
+	mutex_lock(&qi->qi_tree_lock);
+	error = radix_tree_insert(tree, id, dqp);
+	if (unlikely(error)) {
+		/* Duplicate found!  Caller must try again. */
+		WARN_ON(error != -EEXIST);
+		mutex_unlock(&qi->qi_tree_lock);
+		trace_xfs_dqget_dup(dqp);
+		return error;
+	}
+
+	/* Return a locked dquot to the caller, with a reference taken. */
+	xfs_dqlock(dqp);
+	dqp->q_nrefs = 1;
+
+	qi->qi_dquots++;
+	mutex_unlock(&qi->qi_tree_lock);
+
+	return 0;
+}
+
+/* Check our input parameters. */
+static int
+xfs_qm_dqget_checks(
+	struct xfs_mount	*mp,
+	uint			type)
+{
+	if (WARN_ON_ONCE(!XFS_IS_QUOTA_RUNNING(mp)))
+		return -ESRCH;
+
+	switch (type) {
+	case XFS_DQ_USER:
+		if (!XFS_IS_UQUOTA_ON(mp))
+			return -ESRCH;
+		return 0;
+	case XFS_DQ_GROUP:
+		if (!XFS_IS_GQUOTA_ON(mp))
+			return -ESRCH;
+		return 0;
+	case XFS_DQ_PROJ:
+		if (!XFS_IS_PQUOTA_ON(mp))
+			return -ESRCH;
+		return 0;
+	default:
+		WARN_ON_ONCE(0);
+		return -EINVAL;
+	}
+}
+
+/*
+ * Given the file system, id, and type (UDQUOT/GDQUOT), return a a locked
+ * dquot, doing an allocation (if requested) as needed.
+ */
+int
+xfs_qm_dqget(
+	struct xfs_mount	*mp,
+	xfs_dqid_t		id,
+	uint			type,
+	bool			can_alloc,
+	struct xfs_dquot	**O_dqpp)
+{
+	struct xfs_quotainfo	*qi = mp->m_quotainfo;
+	struct radix_tree_root	*tree = xfs_dquot_tree(qi, type);
+	struct xfs_dquot	*dqp;
+	int			error;
+
+	error = xfs_qm_dqget_checks(mp, type);
+	if (error)
+		return error;
+
+restart:
+	dqp = xfs_qm_dqget_cache_lookup(mp, qi, tree, id);
+	if (dqp) {
 		*O_dqpp = dqp;
 		return 0;
 	}
-	mutex_unlock(&qi->qi_tree_lock);
-	XFS_STATS_INC(mp, xs_qm_dqcachemisses);
+
+	error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp);
+	if (error)
+		return error;
+
+	error = xfs_qm_dqget_cache_insert(mp, qi, tree, id, dqp);
+	if (error) {
+		/*
+		 * Duplicate found. Just throw away the new dquot and start
+		 * over.
+		 */
+		xfs_qm_dqdestroy(dqp);
+		XFS_STATS_INC(mp, xs_qm_dquot_dups);
+		goto restart;
+	}
+
+	trace_xfs_dqget_miss(dqp);
+	*O_dqpp = dqp;
+	return 0;
+}
+
+/*
+ * Given a dquot id and type, read and initialize a dquot from the on-disk
+ * metadata.  This function is only for use during quota initialization so
+ * it ignores the dquot cache assuming that the dquot shrinker isn't set up.
+ * The caller is responsible for _qm_dqdestroy'ing the returned dquot.
+ */
+int
+xfs_qm_dqget_uncached(
+	struct xfs_mount	*mp,
+	xfs_dqid_t		id,
+	uint			type,
+	struct xfs_dquot	**dqpp)
+{
+	int			error;
+
+	error = xfs_qm_dqget_checks(mp, type);
+	if (error)
+		return error;
+
+	return xfs_qm_dqread(mp, id, type, 0, dqpp);
+}
+
+/* Return the quota id for a given inode and type. */
+xfs_dqid_t
+xfs_qm_id_for_quotatype(
+	struct xfs_inode	*ip,
+	uint			type)
+{
+	switch (type) {
+	case XFS_DQ_USER:
+		return ip->i_d.di_uid;
+	case XFS_DQ_GROUP:
+		return ip->i_d.di_gid;
+	case XFS_DQ_PROJ:
+		return xfs_get_projid(ip);
+	}
+	ASSERT(0);
+	return 0;
+}
+
+/*
+ * Return the dquot for a given inode and type.  If @can_alloc is true, then
+ * allocate blocks if needed.  The inode's ILOCK must be held and it must not
+ * have already had an inode attached.
+ */
+int
+xfs_qm_dqget_inode(
+	struct xfs_inode	*ip,
+	uint			type,
+	bool			can_alloc,
+	struct xfs_dquot	**O_dqpp)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_quotainfo	*qi = mp->m_quotainfo;
+	struct radix_tree_root	*tree = xfs_dquot_tree(qi, type);
+	struct xfs_dquot	*dqp;
+	xfs_dqid_t		id;
+	int			error;
+
+	error = xfs_qm_dqget_checks(mp, type);
+	if (error)
+		return error;
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(xfs_inode_dquot(ip, type) == NULL);
+
+	id = xfs_qm_id_for_quotatype(ip, type);
+
+restart:
+	dqp = xfs_qm_dqget_cache_lookup(mp, qi, tree, id);
+	if (dqp) {
+		*O_dqpp = dqp;
+		return 0;
+	}
 
 	/*
 	 * Dquot cache miss. We don't want to keep the inode lock across
@@ -758,90 +913,84 @@
 	 * lock here means dealing with a chown that can happen before
 	 * we re-acquire the lock.
 	 */
-	if (ip)
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-	error = xfs_qm_dqread(mp, id, type, flags, &dqp);
-
-	if (ip)
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-	/* If we are asked to find next active id, keep looking */
-	if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
-		error = xfs_dq_get_next_id(mp, type, &id);
-		if (!error)
-			goto restart;
-	}
-
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp);
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	if (error)
 		return error;
 
-	if (ip) {
-		/*
-		 * A dquot could be attached to this inode by now, since
-		 * we had dropped the ilock.
-		 */
-		if (xfs_this_quota_on(mp, type)) {
-			struct xfs_dquot	*dqp1;
+	/*
+	 * A dquot could be attached to this inode by now, since we had
+	 * dropped the ilock.
+	 */
+	if (xfs_this_quota_on(mp, type)) {
+		struct xfs_dquot	*dqp1;
 
-			dqp1 = xfs_inode_dquot(ip, type);
-			if (dqp1) {
-				xfs_qm_dqdestroy(dqp);
-				dqp = dqp1;
-				xfs_dqlock(dqp);
-				goto dqret;
-			}
-		} else {
-			/* inode stays locked on return */
+		dqp1 = xfs_inode_dquot(ip, type);
+		if (dqp1) {
 			xfs_qm_dqdestroy(dqp);
-			return -ESRCH;
+			dqp = dqp1;
+			xfs_dqlock(dqp);
+			goto dqret;
 		}
+	} else {
+		/* inode stays locked on return */
+		xfs_qm_dqdestroy(dqp);
+		return -ESRCH;
 	}
 
-	mutex_lock(&qi->qi_tree_lock);
-	error = radix_tree_insert(tree, id, dqp);
-	if (unlikely(error)) {
-		WARN_ON(error != -EEXIST);
-
+	error = xfs_qm_dqget_cache_insert(mp, qi, tree, id, dqp);
+	if (error) {
 		/*
 		 * Duplicate found. Just throw away the new dquot and start
 		 * over.
 		 */
-		mutex_unlock(&qi->qi_tree_lock);
-		trace_xfs_dqget_dup(dqp);
 		xfs_qm_dqdestroy(dqp);
 		XFS_STATS_INC(mp, xs_qm_dquot_dups);
 		goto restart;
 	}
 
-	/*
-	 * We return a locked dquot to the caller, with a reference taken
-	 */
-	xfs_dqlock(dqp);
-	dqp->q_nrefs = 1;
-
-	qi->qi_dquots++;
-	mutex_unlock(&qi->qi_tree_lock);
-
-	/* If we are asked to find next active id, keep looking */
-	if (flags & XFS_QMOPT_DQNEXT) {
-		if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
-			xfs_qm_dqput(dqp);
-			error = xfs_dq_get_next_id(mp, type, &id);
-			if (error)
-				return error;
-			goto restart;
-		}
-	}
-
- dqret:
-	ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
+dqret:
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 	trace_xfs_dqget_miss(dqp);
 	*O_dqpp = dqp;
 	return 0;
 }
 
 /*
+ * Starting at @id and progressing upwards, look for an initialized incore
+ * dquot, lock it, and return it.
+ */
+int
+xfs_qm_dqget_next(
+	struct xfs_mount	*mp,
+	xfs_dqid_t		id,
+	uint			type,
+	struct xfs_dquot	**dqpp)
+{
+	struct xfs_dquot	*dqp;
+	int			error = 0;
+
+	*dqpp = NULL;
+	for (; !error; error = xfs_dq_get_next_id(mp, type, &id)) {
+		error = xfs_qm_dqget(mp, id, type, false, &dqp);
+		if (error == -ENOENT)
+			continue;
+		else if (error != 0)
+			break;
+
+		if (!XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+			*dqpp = dqp;
+			return 0;
+		}
+
+		xfs_qm_dqput(dqp);
+	}
+
+	return error;
+}
+
+/*
  * Release a reference to the dquot (decrement ref-count) and unlock it.
  *
  * If there is a group quota attached to this dquot, carefully release that
@@ -913,9 +1062,9 @@
 	 * since it's cheaper, and then we recheck while
 	 * holding the lock before removing the dquot from the AIL.
 	 */
-	if ((lip->li_flags & XFS_LI_IN_AIL) &&
+	if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) &&
 	    ((lip->li_lsn == qip->qli_flush_lsn) ||
-	     (lip->li_flags & XFS_LI_FAILED))) {
+	     test_bit(XFS_LI_FAILED, &lip->li_flags))) {
 
 		/* xfs_trans_ail_delete() drops the AIL lock. */
 		spin_lock(&ailp->ail_lock);
@@ -926,8 +1075,7 @@
 			 * Clear the failed state since we are about to drop the
 			 * flush lock
 			 */
-			if (lip->li_flags & XFS_LI_FAILED)
-				xfs_clear_li_failed(lip);
+			xfs_clear_li_failed(lip);
 			spin_unlock(&ailp->ail_lock);
 		}
 	}
@@ -953,6 +1101,7 @@
 {
 	struct xfs_mount	*mp = dqp->q_mount;
 	struct xfs_buf		*bp;
+	struct xfs_dqblk	*dqb;
 	struct xfs_disk_dquot	*ddqp;
 	xfs_failaddr_t		fa;
 	int			error;
@@ -996,12 +1145,13 @@
 	/*
 	 * Calculate the location of the dquot inside the buffer.
 	 */
-	ddqp = bp->b_addr + dqp->q_bufoffset;
+	dqb = bp->b_addr + dqp->q_bufoffset;
+	ddqp = &dqb->dd_diskdq;
 
 	/*
-	 * A simple sanity check in case we got a corrupted dquot..
+	 * A simple sanity check in case we got a corrupted dquot.
 	 */
-	fa = xfs_dquot_verify(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 0);
+	fa = xfs_dqblk_verify(mp, dqb, be32_to_cpu(ddqp->d_id), 0);
 	if (fa) {
 		xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",
 				be32_to_cpu(ddqp->d_id), fa);
@@ -1032,8 +1182,6 @@
 	 * of a dquot without an up-to-date CRC getting to disk.
 	 */
 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
-		struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
-
 		dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
 		xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
 				 XFS_DQUOT_CRC_OFF);
@@ -1119,3 +1267,35 @@
 	kmem_zone_destroy(xfs_qm_dqtrxzone);
 	kmem_zone_destroy(xfs_qm_dqzone);
 }
+
+/*
+ * Iterate every dquot of a particular type.  The caller must ensure that the
+ * particular quota type is active.  iter_fn can return negative error codes,
+ * or XFS_BTREE_QUERY_RANGE_ABORT to indicate that it wants to stop iterating.
+ */
+int
+xfs_qm_dqiterate(
+	struct xfs_mount	*mp,
+	uint			dqtype,
+	xfs_qm_dqiterate_fn	iter_fn,
+	void			*priv)
+{
+	struct xfs_dquot	*dq;
+	xfs_dqid_t		id = 0;
+	int			error;
+
+	do {
+		error = xfs_qm_dqget_next(mp, id, dqtype, &dq);
+		if (error == -ENOENT)
+			return 0;
+		if (error)
+			return error;
+
+		error = iter_fn(dq, dqtype, priv);
+		id = be32_to_cpu(dq->q_core.d_id);
+		xfs_qm_dqput(dq);
+		id++;
+	} while (error == 0 && id != 0);
+
+	return error;
+}
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 2f536f3..bdd6bd9 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -160,8 +160,6 @@
 #define XFS_QM_ISPDQ(dqp)	((dqp)->dq_flags & XFS_DQ_PROJ)
 #define XFS_QM_ISGDQ(dqp)	((dqp)->dq_flags & XFS_DQ_GROUP)
 
-extern int		xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
-					uint, struct xfs_dquot	**);
 extern void		xfs_qm_dqdestroy(xfs_dquot_t *);
 extern int		xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **);
 extern void		xfs_qm_dqunpin_wait(xfs_dquot_t *);
@@ -169,8 +167,19 @@
 					xfs_disk_dquot_t *);
 extern void		xfs_qm_adjust_dqlimits(struct xfs_mount *,
 					       struct xfs_dquot *);
-extern int		xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
-					xfs_dqid_t, uint, uint, xfs_dquot_t **);
+extern xfs_dqid_t	xfs_qm_id_for_quotatype(struct xfs_inode *ip,
+					uint type);
+extern int		xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id,
+					uint type, bool can_alloc,
+					struct xfs_dquot **dqpp);
+extern int		xfs_qm_dqget_inode(struct xfs_inode *ip, uint type,
+					bool can_alloc,
+					struct xfs_dquot **dqpp);
+extern int		xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id,
+					uint type, struct xfs_dquot **dqpp);
+extern int		xfs_qm_dqget_uncached(struct xfs_mount *mp,
+					xfs_dqid_t id, uint type,
+					struct xfs_dquot **dqpp);
 extern void		xfs_qm_dqput(xfs_dquot_t *);
 
 extern void		xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
@@ -185,4 +194,9 @@
 	return dqp;
 }
 
+typedef int (*xfs_qm_dqiterate_fn)(struct xfs_dquot *dq, uint dqtype,
+		void *priv);
+int xfs_qm_dqiterate(struct xfs_mount *mp, uint dqtype,
+		xfs_qm_dqiterate_fn iter_fn, void *priv);
+
 #endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 4b331e3..8eb7415 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -173,7 +173,7 @@
 	 * The buffer containing this item failed to be written back
 	 * previously. Resubmit the buffer for IO
 	 */
-	if (lip->li_flags & XFS_LI_FAILED) {
+	if (test_bit(XFS_LI_FAILED, &lip->li_flags)) {
 		if (!xfs_buf_trylock(bp))
 			return XFS_ITEM_LOCKED;
 
@@ -209,10 +209,7 @@
 	spin_unlock(&lip->li_ailp->ail_lock);
 
 	error = xfs_qm_dqflush(dqp, &bp);
-	if (error) {
-		xfs_warn(dqp->q_mount, "%s: push error %d on dqp "PTR_FMT,
-			__func__, error, dqp);
-	} else {
+	if (!error) {
 		if (!xfs_buf_delwri_queue(bp, buffer_list))
 			rval = XFS_ITEM_FLUSHING;
 		xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index a63f508..7975634 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -61,6 +61,7 @@
 	XFS_RANDOM_LOG_BAD_CRC,
 	XFS_RANDOM_LOG_ITEM_PIN,
 	XFS_RANDOM_BUF_LRU_REF,
+	XFS_RANDOM_FORCE_SCRUB_REPAIR,
 };
 
 struct xfs_errortag_attr {
@@ -167,6 +168,7 @@
 XFS_ERRORTAG_ATTR_RW(log_bad_crc,	XFS_ERRTAG_LOG_BAD_CRC);
 XFS_ERRORTAG_ATTR_RW(log_item_pin,	XFS_ERRTAG_LOG_ITEM_PIN);
 XFS_ERRORTAG_ATTR_RW(buf_lru_ref,	XFS_ERRTAG_BUF_LRU_REF);
+XFS_ERRORTAG_ATTR_RW(force_repair,	XFS_ERRTAG_FORCE_SCRUB_REPAIR);
 
 static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(noerror),
@@ -201,6 +203,7 @@
 	XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
 	XFS_ERRORTAG_ATTR_LIST(log_item_pin),
 	XFS_ERRORTAG_ATTR_LIST(buf_lru_ref),
+	XFS_ERRORTAG_ATTR_LIST(force_repair),
 	NULL,
 };
 
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index b5b1e56..a889b55 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -168,7 +168,7 @@
 xfs_efi_item_unlock(
 	struct xfs_log_item	*lip)
 {
-	if (lip->li_flags & XFS_LI_ABORTED)
+	if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
 		xfs_efi_release(EFI_ITEM(lip));
 }
 
@@ -402,7 +402,7 @@
 {
 	struct xfs_efd_log_item	*efdp = EFD_ITEM(lip);
 
-	if (lip->li_flags & XFS_LI_ABORTED) {
+	if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
 		xfs_efi_release(efdp->efd_efip);
 		xfs_efd_item_free(efdp);
 	}
@@ -542,7 +542,7 @@
 	for (i = 0; i < efip->efi_format.efi_nextents; i++) {
 		extp = &efip->efi_format.efi_extents[i];
 		error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
-					      extp->ext_len, &oinfo);
+					      extp->ext_len, &oinfo, false);
 		if (error)
 			goto abort_error;
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e70fb8c..0e3fb89 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -414,6 +414,12 @@
 	if (size <= 0)
 		return size;
 
+	/*
+	 * Capture amount written on completion as we can't reliably account
+	 * for it on submission.
+	 */
+	XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
+
 	if (flags & IOMAP_DIO_COW) {
 		error = xfs_reflink_end_cow(ip, offset, size);
 		if (error)
@@ -599,7 +605,16 @@
 	}
 out:
 	xfs_iunlock(ip, iolock);
-	return error ? error : ret;
+	if (error)
+		return error;
+
+	if (ret > 0) {
+		XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
+
+		/* Handle various SYNC-type writes */
+		ret = generic_write_sync(iocb, ret);
+	}
+	return ret;
 }
 
 STATIC ssize_t
@@ -669,6 +684,12 @@
 out:
 	if (iolock)
 		xfs_iunlock(ip, iolock);
+
+	if (ret > 0) {
+		XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
+		/* Handle various SYNC-type writes */
+		ret = generic_write_sync(iocb, ret);
+	}
 	return ret;
 }
 
@@ -693,8 +714,9 @@
 		return -EIO;
 
 	if (IS_DAX(inode))
-		ret = xfs_file_dax_write(iocb, from);
-	else if (iocb->ki_flags & IOCB_DIRECT) {
+		return xfs_file_dax_write(iocb, from);
+
+	if (iocb->ki_flags & IOCB_DIRECT) {
 		/*
 		 * Allow a directio write to fall back to a buffered
 		 * write *only* in the case that we're doing a reflink
@@ -702,20 +724,11 @@
 		 * allow an operation to fall back to buffered mode.
 		 */
 		ret = xfs_file_dio_aio_write(iocb, from);
-		if (ret == -EREMCHG)
-			goto buffered;
-	} else {
-buffered:
-		ret = xfs_file_buffered_aio_write(iocb, from);
+		if (ret != -EREMCHG)
+			return ret;
 	}
 
-	if (ret > 0) {
-		XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
-
-		/* Handle various SYNC-type writes */
-		ret = generic_write_sync(iocb, ret);
-	}
-	return ret;
+	return xfs_file_buffered_aio_write(iocb, from);
 }
 
 #define	XFS_FALLOC_FL_SUPPORTED						\
@@ -1007,7 +1020,7 @@
  *       page_lock (MM)
  *         i_lock (XFS - extent map serialisation)
  */
-static int
+static vm_fault_t
 __xfs_filemap_fault(
 	struct vm_fault		*vmf,
 	enum page_entry_size	pe_size,
@@ -1015,7 +1028,7 @@
 {
 	struct inode		*inode = file_inode(vmf->vma->vm_file);
 	struct xfs_inode	*ip = XFS_I(inode);
-	int			ret;
+	vm_fault_t		ret;
 
 	trace_xfs_filemap_fault(ip, pe_size, write_fault);
 
@@ -1044,7 +1057,7 @@
 	return ret;
 }
 
-static int
+static vm_fault_t
 xfs_filemap_fault(
 	struct vm_fault		*vmf)
 {
@@ -1054,7 +1067,7 @@
 			(vmf->flags & FAULT_FLAG_WRITE));
 }
 
-static int
+static vm_fault_t
 xfs_filemap_huge_fault(
 	struct vm_fault		*vmf,
 	enum page_entry_size	pe_size)
@@ -1067,7 +1080,7 @@
 			(vmf->flags & FAULT_FLAG_WRITE));
 }
 
-static int
+static vm_fault_t
 xfs_filemap_page_mkwrite(
 	struct vm_fault		*vmf)
 {
@@ -1079,7 +1092,7 @@
  * on write faults. In reality, it needs to serialise against truncate and
  * prepare memory for writing so handle is as standard write fault.
  */
-static int
+static vm_fault_t
 xfs_filemap_pfn_mkwrite(
 	struct vm_fault		*vmf)
 {
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 43cfc07..0299feb 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -465,10 +465,9 @@
 	struct xfs_rmap_irec		irec;
 	xfs_daddr_t			rec_daddr;
 
-	rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
-
-	irec.rm_startblock = rec->ar_startblock;
-	irec.rm_blockcount = rec->ar_blockcount;
+	irec.rm_startblock = rec->ar_startext * mp->m_sb.sb_rextsize;
+	rec_daddr = XFS_FSB_TO_BB(mp, irec.rm_startblock);
+	irec.rm_blockcount = rec->ar_extcount * mp->m_sb.sb_rextsize;
 	irec.rm_owner = XFS_RMAP_OWN_NULL;	/* "free" */
 	irec.rm_offset = 0;
 	irec.rm_flags = 0;
@@ -534,8 +533,11 @@
 
 	xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED);
 
-	alow.ar_startblock = info->low.rm_startblock;
-	ahigh.ar_startblock = info->high.rm_startblock;
+	alow.ar_startext = info->low.rm_startblock;
+	ahigh.ar_startext = info->high.rm_startblock;
+	do_div(alow.ar_startext, tp->t_mountp->m_sb.sb_rextsize);
+	if (do_div(ahigh.ar_startext, tp->t_mountp->m_sb.sb_rextsize))
+		ahigh.ar_startext++;
 	error = xfs_rtalloc_query_range(tp, &alow, &ahigh,
 			xfs_getfsmap_rtdev_rtbitmap_helper, info);
 	if (error)
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 5237927..bc7ef18 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -24,85 +24,42 @@
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_inode.h"
 #include "xfs_trans.h"
-#include "xfs_inode_item.h"
 #include "xfs_error.h"
 #include "xfs_btree.h"
-#include "xfs_alloc_btree.h"
 #include "xfs_alloc.h"
-#include "xfs_rmap_btree.h"
-#include "xfs_ialloc.h"
 #include "xfs_fsops.h"
-#include "xfs_itable.h"
 #include "xfs_trans_space.h"
 #include "xfs_rtalloc.h"
 #include "xfs_trace.h"
 #include "xfs_log.h"
-#include "xfs_filestream.h"
-#include "xfs_rmap.h"
+#include "xfs_ag.h"
 #include "xfs_ag_resv.h"
 
 /*
- * File system operations
+ * growfs operations
  */
-
-static struct xfs_buf *
-xfs_growfs_get_hdr_buf(
-	struct xfs_mount	*mp,
-	xfs_daddr_t		blkno,
-	size_t			numblks,
-	int			flags,
-	const struct xfs_buf_ops *ops)
-{
-	struct xfs_buf		*bp;
-
-	bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
-	if (!bp)
-		return NULL;
-
-	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
-	bp->b_bn = blkno;
-	bp->b_maps[0].bm_bn = blkno;
-	bp->b_ops = ops;
-
-	return bp;
-}
-
 static int
 xfs_growfs_data_private(
 	xfs_mount_t		*mp,		/* mount point for filesystem */
 	xfs_growfs_data_t	*in)		/* growfs data input struct */
 {
-	xfs_agf_t		*agf;
-	struct xfs_agfl		*agfl;
-	xfs_agi_t		*agi;
-	xfs_agnumber_t		agno;
-	xfs_extlen_t		agsize;
-	xfs_extlen_t		tmpsize;
-	xfs_alloc_rec_t		*arec;
 	xfs_buf_t		*bp;
-	int			bucket;
-	int			dpct;
-	int			error, saved_error = 0;
+	int			error;
 	xfs_agnumber_t		nagcount;
 	xfs_agnumber_t		nagimax = 0;
 	xfs_rfsblock_t		nb, nb_mod;
 	xfs_rfsblock_t		new;
-	xfs_rfsblock_t		nfree;
 	xfs_agnumber_t		oagcount;
-	int			pct;
 	xfs_trans_t		*tp;
+	LIST_HEAD		(buffer_list);
+	struct aghdr_init_data	id = {};
 
 	nb = in->newblocks;
-	pct = in->imaxpct;
-	if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
+	if (nb < mp->m_sb.sb_dblocks)
 		return -EINVAL;
 	if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
 		return error;
-	dpct = pct - mp->m_sb.sb_imax_pct;
 	error = xfs_buf_read_uncached(mp->m_ddev_targp,
 				XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
 				XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
@@ -135,376 +92,45 @@
 		return error;
 
 	/*
-	 * Write new AG headers to disk. Non-transactional, but written
-	 * synchronously so they are completed prior to the growfs transaction
-	 * being logged.
+	 * Write new AG headers to disk. Non-transactional, but need to be
+	 * written and completed prior to the growfs transaction being logged.
+	 * To do this, we use a delayed write buffer list and wait for
+	 * submission and IO completion of the list as a whole. This allows the
+	 * IO subsystem to merge all the AG headers in a single AG into a single
+	 * IO and hide most of the latency of the IO from us.
+	 *
+	 * This also means that if we get an error whilst building the buffer
+	 * list to write, we can cancel the entire list without having written
+	 * anything.
 	 */
-	nfree = 0;
-	for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
-		__be32	*agfl_bno;
+	INIT_LIST_HEAD(&id.buffer_list);
+	for (id.agno = nagcount - 1;
+	     id.agno >= oagcount;
+	     id.agno--, new -= id.agsize) {
 
-		/*
-		 * AG freespace header block
-		 */
-		bp = xfs_growfs_get_hdr_buf(mp,
-				XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
-				XFS_FSS_TO_BB(mp, 1), 0,
-				&xfs_agf_buf_ops);
-		if (!bp) {
-			error = -ENOMEM;
-			goto error0;
-		}
-
-		agf = XFS_BUF_TO_AGF(bp);
-		agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
-		agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
-		agf->agf_seqno = cpu_to_be32(agno);
-		if (agno == nagcount - 1)
-			agsize =
-				nb -
-				(agno * (xfs_rfsblock_t)mp->m_sb.sb_agblocks);
+		if (id.agno == nagcount - 1)
+			id.agsize = nb -
+				(id.agno * (xfs_rfsblock_t)mp->m_sb.sb_agblocks);
 		else
-			agsize = mp->m_sb.sb_agblocks;
-		agf->agf_length = cpu_to_be32(agsize);
-		agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
-		agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
-		agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
-		agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
-		if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
-			agf->agf_roots[XFS_BTNUM_RMAPi] =
-						cpu_to_be32(XFS_RMAP_BLOCK(mp));
-			agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
-			agf->agf_rmap_blocks = cpu_to_be32(1);
-		}
+			id.agsize = mp->m_sb.sb_agblocks;
 
-		agf->agf_flfirst = cpu_to_be32(1);
-		agf->agf_fllast = 0;
-		agf->agf_flcount = 0;
-		tmpsize = agsize - mp->m_ag_prealloc_blocks;
-		agf->agf_freeblks = cpu_to_be32(tmpsize);
-		agf->agf_longest = cpu_to_be32(tmpsize);
-		if (xfs_sb_version_hascrc(&mp->m_sb))
-			uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
-		if (xfs_sb_version_hasreflink(&mp->m_sb)) {
-			agf->agf_refcount_root = cpu_to_be32(
-					xfs_refc_block(mp));
-			agf->agf_refcount_level = cpu_to_be32(1);
-			agf->agf_refcount_blocks = cpu_to_be32(1);
-		}
-
-		error = xfs_bwrite(bp);
-		xfs_buf_relse(bp);
-		if (error)
-			goto error0;
-
-		/*
-		 * AG freelist header block
-		 */
-		bp = xfs_growfs_get_hdr_buf(mp,
-				XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
-				XFS_FSS_TO_BB(mp, 1), 0,
-				&xfs_agfl_buf_ops);
-		if (!bp) {
-			error = -ENOMEM;
-			goto error0;
-		}
-
-		agfl = XFS_BUF_TO_AGFL(bp);
-		if (xfs_sb_version_hascrc(&mp->m_sb)) {
-			agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
-			agfl->agfl_seqno = cpu_to_be32(agno);
-			uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
-		}
-
-		agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
-		for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++)
-			agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
-
-		error = xfs_bwrite(bp);
-		xfs_buf_relse(bp);
-		if (error)
-			goto error0;
-
-		/*
-		 * AG inode header block
-		 */
-		bp = xfs_growfs_get_hdr_buf(mp,
-				XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
-				XFS_FSS_TO_BB(mp, 1), 0,
-				&xfs_agi_buf_ops);
-		if (!bp) {
-			error = -ENOMEM;
-			goto error0;
-		}
-
-		agi = XFS_BUF_TO_AGI(bp);
-		agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
-		agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
-		agi->agi_seqno = cpu_to_be32(agno);
-		agi->agi_length = cpu_to_be32(agsize);
-		agi->agi_count = 0;
-		agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp));
-		agi->agi_level = cpu_to_be32(1);
-		agi->agi_freecount = 0;
-		agi->agi_newino = cpu_to_be32(NULLAGINO);
-		agi->agi_dirino = cpu_to_be32(NULLAGINO);
-		if (xfs_sb_version_hascrc(&mp->m_sb))
-			uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
-		if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
-			agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
-			agi->agi_free_level = cpu_to_be32(1);
-		}
-		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
-			agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
-
-		error = xfs_bwrite(bp);
-		xfs_buf_relse(bp);
-		if (error)
-			goto error0;
-
-		/*
-		 * BNO btree root block
-		 */
-		bp = xfs_growfs_get_hdr_buf(mp,
-				XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
-				BTOBB(mp->m_sb.sb_blocksize), 0,
-				&xfs_allocbt_buf_ops);
-
-		if (!bp) {
-			error = -ENOMEM;
-			goto error0;
-		}
-
-		xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, agno, 0);
-
-		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
-		arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
-		arec->ar_blockcount = cpu_to_be32(
-			agsize - be32_to_cpu(arec->ar_startblock));
-
-		error = xfs_bwrite(bp);
-		xfs_buf_relse(bp);
-		if (error)
-			goto error0;
-
-		/*
-		 * CNT btree root block
-		 */
-		bp = xfs_growfs_get_hdr_buf(mp,
-				XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
-				BTOBB(mp->m_sb.sb_blocksize), 0,
-				&xfs_allocbt_buf_ops);
-		if (!bp) {
-			error = -ENOMEM;
-			goto error0;
-		}
-
-		xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, agno, 0);
-
-		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
-		arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
-		arec->ar_blockcount = cpu_to_be32(
-			agsize - be32_to_cpu(arec->ar_startblock));
-		nfree += be32_to_cpu(arec->ar_blockcount);
-
-		error = xfs_bwrite(bp);
-		xfs_buf_relse(bp);
-		if (error)
-			goto error0;
-
-		/* RMAP btree root block */
-		if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
-			struct xfs_rmap_rec	*rrec;
-			struct xfs_btree_block	*block;
-
-			bp = xfs_growfs_get_hdr_buf(mp,
-				XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
-				BTOBB(mp->m_sb.sb_blocksize), 0,
-				&xfs_rmapbt_buf_ops);
-			if (!bp) {
-				error = -ENOMEM;
-				goto error0;
-			}
-
-			xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 0,
-						agno, 0);
-			block = XFS_BUF_TO_BLOCK(bp);
-
-
-			/*
-			 * mark the AG header regions as static metadata The BNO
-			 * btree block is the first block after the headers, so
-			 * it's location defines the size of region the static
-			 * metadata consumes.
-			 *
-			 * Note: unlike mkfs, we never have to account for log
-			 * space when growing the data regions
-			 */
-			rrec = XFS_RMAP_REC_ADDR(block, 1);
-			rrec->rm_startblock = 0;
-			rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
-			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
-			rrec->rm_offset = 0;
-			be16_add_cpu(&block->bb_numrecs, 1);
-
-			/* account freespace btree root blocks */
-			rrec = XFS_RMAP_REC_ADDR(block, 2);
-			rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
-			rrec->rm_blockcount = cpu_to_be32(2);
-			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
-			rrec->rm_offset = 0;
-			be16_add_cpu(&block->bb_numrecs, 1);
-
-			/* account inode btree root blocks */
-			rrec = XFS_RMAP_REC_ADDR(block, 3);
-			rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
-			rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
-							XFS_IBT_BLOCK(mp));
-			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
-			rrec->rm_offset = 0;
-			be16_add_cpu(&block->bb_numrecs, 1);
-
-			/* account for rmap btree root */
-			rrec = XFS_RMAP_REC_ADDR(block, 4);
-			rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
-			rrec->rm_blockcount = cpu_to_be32(1);
-			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
-			rrec->rm_offset = 0;
-			be16_add_cpu(&block->bb_numrecs, 1);
-
-			/* account for refc btree root */
-			if (xfs_sb_version_hasreflink(&mp->m_sb)) {
-				rrec = XFS_RMAP_REC_ADDR(block, 5);
-				rrec->rm_startblock = cpu_to_be32(
-						xfs_refc_block(mp));
-				rrec->rm_blockcount = cpu_to_be32(1);
-				rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC);
-				rrec->rm_offset = 0;
-				be16_add_cpu(&block->bb_numrecs, 1);
-			}
-
-			error = xfs_bwrite(bp);
-			xfs_buf_relse(bp);
-			if (error)
-				goto error0;
-		}
-
-		/*
-		 * INO btree root block
-		 */
-		bp = xfs_growfs_get_hdr_buf(mp,
-				XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
-				BTOBB(mp->m_sb.sb_blocksize), 0,
-				&xfs_inobt_buf_ops);
-		if (!bp) {
-			error = -ENOMEM;
-			goto error0;
-		}
-
-		xfs_btree_init_block(mp, bp, XFS_BTNUM_INO , 0, 0, agno, 0);
-
-		error = xfs_bwrite(bp);
-		xfs_buf_relse(bp);
-		if (error)
-			goto error0;
-
-		/*
-		 * FINO btree root block
-		 */
-		if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
-			bp = xfs_growfs_get_hdr_buf(mp,
-				XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
-				BTOBB(mp->m_sb.sb_blocksize), 0,
-				&xfs_inobt_buf_ops);
-			if (!bp) {
-				error = -ENOMEM;
-				goto error0;
-			}
-
-			xfs_btree_init_block(mp, bp, XFS_BTNUM_FINO,
-						     0, 0, agno, 0);
-
-			error = xfs_bwrite(bp);
-			xfs_buf_relse(bp);
-			if (error)
-				goto error0;
-		}
-
-		/*
-		 * refcount btree root block
-		 */
-		if (xfs_sb_version_hasreflink(&mp->m_sb)) {
-			bp = xfs_growfs_get_hdr_buf(mp,
-				XFS_AGB_TO_DADDR(mp, agno, xfs_refc_block(mp)),
-				BTOBB(mp->m_sb.sb_blocksize), 0,
-				&xfs_refcountbt_buf_ops);
-			if (!bp) {
-				error = -ENOMEM;
-				goto error0;
-			}
-
-			xfs_btree_init_block(mp, bp, XFS_BTNUM_REFC,
-					     0, 0, agno, 0);
-
-			error = xfs_bwrite(bp);
-			xfs_buf_relse(bp);
-			if (error)
-				goto error0;
+		error = xfs_ag_init_headers(mp, &id);
+		if (error) {
+			xfs_buf_delwri_cancel(&id.buffer_list);
+			goto out_trans_cancel;
 		}
 	}
-	xfs_trans_agblocks_delta(tp, nfree);
-	/*
-	 * There are new blocks in the old last a.g.
-	 */
+	error = xfs_buf_delwri_submit(&id.buffer_list);
+	if (error)
+		goto out_trans_cancel;
+
+	xfs_trans_agblocks_delta(tp, id.nfree);
+
+	/* If there are new blocks in the old last AG, extend it. */
 	if (new) {
-		struct xfs_owner_info	oinfo;
-
-		/*
-		 * Change the agi length.
-		 */
-		error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
-		if (error) {
-			goto error0;
-		}
-		ASSERT(bp);
-		agi = XFS_BUF_TO_AGI(bp);
-		be32_add_cpu(&agi->agi_length, new);
-		ASSERT(nagcount == oagcount ||
-		       be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
-		xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
-		/*
-		 * Change agf length.
-		 */
-		error = xfs_alloc_read_agf(mp, tp, agno, 0, &bp);
-		if (error) {
-			goto error0;
-		}
-		ASSERT(bp);
-		agf = XFS_BUF_TO_AGF(bp);
-		be32_add_cpu(&agf->agf_length, new);
-		ASSERT(be32_to_cpu(agf->agf_length) ==
-		       be32_to_cpu(agi->agi_length));
-
-		xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
-
-		/*
-		 * Free the new space.
-		 *
-		 * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
-		 * this doesn't actually exist in the rmap btree.
-		 */
-		xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
-		error = xfs_rmap_free(tp, bp, agno,
-				be32_to_cpu(agf->agf_length) - new,
-				new, &oinfo);
+		error = xfs_ag_extend_space(mp, tp, &id, new);
 		if (error)
-			goto error0;
-		error = xfs_free_extent(tp,
-				XFS_AGB_TO_FSB(mp, agno,
-					be32_to_cpu(agf->agf_length) - new),
-				new, &oinfo, XFS_AG_RESV_NONE);
-		if (error)
-			goto error0;
+			goto out_trans_cancel;
 	}
 
 	/*
@@ -517,10 +143,8 @@
 	if (nb > mp->m_sb.sb_dblocks)
 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS,
 				 nb - mp->m_sb.sb_dblocks);
-	if (nfree)
-		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree);
-	if (dpct)
-		xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
+	if (id.nfree)
+		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, id.nfree);
 	xfs_trans_set_sync(tp);
 	error = xfs_trans_commit(tp);
 	if (error)
@@ -529,12 +153,6 @@
 	/* New allocation groups fully initialized, so update mount struct */
 	if (nagimax)
 		mp->m_maxagi = nagimax;
-	if (mp->m_sb.sb_imax_pct) {
-		uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
-		do_div(icount, 100);
-		mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
-	} else
-		mp->m_maxicount = 0;
 	xfs_set_low_space_thresholds(mp);
 	mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
 
@@ -545,73 +163,24 @@
 	if (new) {
 		struct xfs_perag	*pag;
 
-		pag = xfs_perag_get(mp, agno);
+		pag = xfs_perag_get(mp, id.agno);
 		error = xfs_ag_resv_free(pag);
 		xfs_perag_put(pag);
 		if (error)
-			goto out;
+			return error;
 	}
 
-	/* Reserve AG metadata blocks. */
+	/*
+	 * Reserve AG metadata blocks. ENOSPC here does not mean there was a
+	 * growfs failure, just that there still isn't space for new user data
+	 * after the grow has been run.
+	 */
 	error = xfs_fs_reserve_ag_blocks(mp);
-	if (error && error != -ENOSPC)
-		goto out;
-
-	/* update secondary superblocks. */
-	for (agno = 1; agno < nagcount; agno++) {
+	if (error == -ENOSPC)
 		error = 0;
-		/*
-		 * new secondary superblocks need to be zeroed, not read from
-		 * disk as the contents of the new area we are growing into is
-		 * completely unknown.
-		 */
-		if (agno < oagcount) {
-			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
-				  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
-				  XFS_FSS_TO_BB(mp, 1), 0, &bp,
-				  &xfs_sb_buf_ops);
-		} else {
-			bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
-				  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
-				  XFS_FSS_TO_BB(mp, 1), 0);
-			if (bp) {
-				bp->b_ops = &xfs_sb_buf_ops;
-				xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
-			} else
-				error = -ENOMEM;
-		}
+	return error;
 
-		/*
-		 * If we get an error reading or writing alternate superblocks,
-		 * continue.  xfs_repair chooses the "best" superblock based
-		 * on most matches; if we break early, we'll leave more
-		 * superblocks un-updated than updated, and xfs_repair may
-		 * pick them over the properly-updated primary.
-		 */
-		if (error) {
-			xfs_warn(mp,
-		"error %d reading secondary superblock for ag %d",
-				error, agno);
-			saved_error = error;
-			continue;
-		}
-		xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
-
-		error = xfs_bwrite(bp);
-		xfs_buf_relse(bp);
-		if (error) {
-			xfs_warn(mp,
-		"write error %d updating secondary superblock for ag %d",
-				error, agno);
-			saved_error = error;
-			continue;
-		}
-	}
-
- out:
-	return saved_error ? saved_error : error;
-
- error0:
+out_trans_cancel:
 	xfs_trans_cancel(tp);
 	return error;
 }
@@ -638,25 +207,71 @@
 	return -ENOSYS;
 }
 
+static int
+xfs_growfs_imaxpct(
+	struct xfs_mount	*mp,
+	__u32			imaxpct)
+{
+	struct xfs_trans	*tp;
+	int			dpct;
+	int			error;
+
+	if (imaxpct > 100)
+		return -EINVAL;
+
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
+			XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
+	if (error)
+		return error;
+
+	dpct = imaxpct - mp->m_sb.sb_imax_pct;
+	xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
+	xfs_trans_set_sync(tp);
+	return xfs_trans_commit(tp);
+}
+
 /*
  * protected versions of growfs function acquire and release locks on the mount
  * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG,
  * XFS_IOC_FSGROWFSRT
  */
-
-
 int
 xfs_growfs_data(
-	xfs_mount_t		*mp,
-	xfs_growfs_data_t	*in)
+	struct xfs_mount	*mp,
+	struct xfs_growfs_data	*in)
 {
-	int error;
+	int			error = 0;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	if (!mutex_trylock(&mp->m_growlock))
 		return -EWOULDBLOCK;
-	error = xfs_growfs_data_private(mp, in);
+
+	/* update imaxpct separately to the physical grow of the filesystem */
+	if (in->imaxpct != mp->m_sb.sb_imax_pct) {
+		error = xfs_growfs_imaxpct(mp, in->imaxpct);
+		if (error)
+			goto out_error;
+	}
+
+	if (in->newblocks != mp->m_sb.sb_dblocks) {
+		error = xfs_growfs_data_private(mp, in);
+		if (error)
+			goto out_error;
+	}
+
+	/* Post growfs calculations needed to reflect new state in operations */
+	if (mp->m_sb.sb_imax_pct) {
+		uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
+		do_div(icount, 100);
+		mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
+	} else
+		mp->m_maxicount = 0;
+
+	/* Update secondary superblocks now the physical grow has completed */
+	error = xfs_update_secondary_sbs(mp);
+
+out_error:
 	/*
 	 * Increment the generation unconditionally, the error could be from
 	 * updating the secondary superblocks, in which case the new size
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index 3e1cc30..fdde17a 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -47,6 +47,7 @@
 
 struct xfs_globals xfs_globals = {
 	.log_recovery_delay	=	0,	/* no delay by default */
+	.mount_delay		=	0,	/* no delay by default */
 #ifdef XFS_ASSERT_FATAL
 	.bug_on_assert		=	true,	/* assert failures BUG() */
 #else
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 9a18f69..164350d 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -107,7 +107,8 @@
 		xfs_idestroy_fork(ip, XFS_COW_FORK);
 
 	if (ip->i_itemp) {
-		ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL));
+		ASSERT(!test_bit(XFS_LI_IN_AIL,
+				 &ip->i_itemp->ili_item.li_flags));
 		xfs_inode_item_destroy(ip);
 		ip->i_itemp = NULL;
 	}
@@ -309,6 +310,46 @@
 }
 
 /*
+ * If we are allocating a new inode, then check what was returned is
+ * actually a free, empty inode. If we are not allocating an inode,
+ * then check we didn't find a free inode.
+ *
+ * Returns:
+ *	0		if the inode free state matches the lookup context
+ *	-ENOENT		if the inode is free and we are not allocating
+ *	-EFSCORRUPTED	if there is any state mismatch at all
+ */
+static int
+xfs_iget_check_free_state(
+	struct xfs_inode	*ip,
+	int			flags)
+{
+	if (flags & XFS_IGET_CREATE) {
+		/* should be a free inode */
+		if (VFS_I(ip)->i_mode != 0) {
+			xfs_warn(ip->i_mount,
+"Corruption detected! Free inode 0x%llx not marked free! (mode 0x%x)",
+				ip->i_ino, VFS_I(ip)->i_mode);
+			return -EFSCORRUPTED;
+		}
+
+		if (ip->i_d.di_nblocks != 0) {
+			xfs_warn(ip->i_mount,
+"Corruption detected! Free inode 0x%llx has blocks allocated!",
+				ip->i_ino);
+			return -EFSCORRUPTED;
+		}
+		return 0;
+	}
+
+	/* should be an allocated inode */
+	if (VFS_I(ip)->i_mode == 0)
+		return -ENOENT;
+
+	return 0;
+}
+
+/*
  * Check the validity of the inode we just found it the cache
  */
 static int
@@ -357,12 +398,12 @@
 	}
 
 	/*
-	 * If lookup is racing with unlink return an error immediately.
+	 * Check the inode free state is valid. This also detects lookup
+	 * racing with unlinks.
 	 */
-	if (VFS_I(ip)->i_mode == 0 && !(flags & XFS_IGET_CREATE)) {
-		error = -ENOENT;
+	error = xfs_iget_check_free_state(ip, flags);
+	if (error)
 		goto out_error;
-	}
 
 	/*
 	 * If IRECLAIMABLE is set, we've torn down the VFS inode already.
@@ -485,29 +526,12 @@
 
 
 	/*
-	 * If we are allocating a new inode, then check what was returned is
-	 * actually a free, empty inode. If we are not allocating an inode,
-	 * the check we didn't find a free inode.
+	 * Check the inode free state is valid. This also detects lookup
+	 * racing with unlinks.
 	 */
-	if (flags & XFS_IGET_CREATE) {
-		if (VFS_I(ip)->i_mode != 0) {
-			xfs_warn(mp,
-"Corruption detected! Free inode 0x%llx not marked free on disk",
-				ino);
-			error = -EFSCORRUPTED;
-			goto out_destroy;
-		}
-		if (ip->i_d.di_nblocks != 0) {
-			xfs_warn(mp,
-"Corruption detected! Free inode 0x%llx has blocks allocated!",
-				ino);
-			error = -EFSCORRUPTED;
-			goto out_destroy;
-		}
-	} else if (VFS_I(ip)->i_mode == 0) {
-		error = -ENOENT;
+	error = xfs_iget_check_free_state(ip, flags);
+	if (error)
 		goto out_destroy;
-	}
 
 	/*
 	 * Preload the radix tree so we can insert safely under the
@@ -1802,3 +1826,21 @@
 	return __xfs_inode_clear_blocks_tag(ip,
 			trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
 }
+
+/* Disable post-EOF and CoW block auto-reclamation. */
+void
+xfs_icache_disable_reclaim(
+	struct xfs_mount	*mp)
+{
+	cancel_delayed_work_sync(&mp->m_eofblocks_work);
+	cancel_delayed_work_sync(&mp->m_cowblocks_work);
+}
+
+/* Enable post-EOF and CoW block auto-reclamation. */
+void
+xfs_icache_enable_reclaim(
+	struct xfs_mount	*mp)
+{
+	xfs_queue_eofblocks(mp);
+	xfs_queue_cowblocks(mp);
+}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index d4a7758..d69a0f5 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -131,4 +131,7 @@
 int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp,
 				  xfs_ino_t ino, bool *inuse);
 
+void xfs_icache_disable_reclaim(struct xfs_mount *mp);
+void xfs_icache_enable_reclaim(struct xfs_mount *mp);
+
 #endif
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index 865ad13..5da9599 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -91,7 +91,7 @@
 {
 	struct xfs_icreate_item	*icp = ICR_ITEM(lip);
 
-	if (icp->ic_item.li_flags & XFS_LI_ABORTED)
+	if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
 		kmem_zone_free(xfs_icreate_zone, icp);
 	return;
 }
@@ -184,5 +184,5 @@
 
 	xfs_trans_add_item(tp, &icp->ic_item);
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	icp->ic_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &icp->ic_item.li_flags);
 }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2b70c8b..05207a6 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -498,7 +498,7 @@
 		if (!try_lock) {
 			for (j = (i - 1); j >= 0 && !try_lock; j--) {
 				lp = (xfs_log_item_t *)ips[j]->i_itemp;
-				if (lp && (lp->li_flags & XFS_LI_IN_AIL))
+				if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
 					try_lock++;
 			}
 		}
@@ -598,7 +598,7 @@
 	 * and try again.
 	 */
 	lp = (xfs_log_item_t *)ip0->i_itemp;
-	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
+	if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
 		if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
 			xfs_iunlock(ip0, ip0_mode);
 			if ((++attempts % 5) == 0)
@@ -791,6 +791,18 @@
 	ASSERT(*ialloc_context == NULL);
 
 	/*
+	 * Protect against obviously corrupt allocation btree records. Later
+	 * xfs_iget checks will catch re-allocation of other active in-memory
+	 * and on-disk inodes. If we don't catch reallocating the parent inode
+	 * here we will deadlock in xfs_iget() so we have to do these checks
+	 * first.
+	 */
+	if ((pip && ino == pip->i_ino) || !xfs_verify_dir_ino(mp, ino)) {
+		xfs_alert(mp, "Allocated a known in-use inode 0x%llx!", ino);
+		return -EFSCORRUPTED;
+	}
+
+	/*
 	 * Get the in-core inode with the lock held exclusively.
 	 * This is because we're setting fields here we need
 	 * to prevent others from looking at until we're done.
@@ -1196,6 +1208,7 @@
 	unlock_dp_on_error = true;
 
 	xfs_defer_init(&dfops, &first_block);
+	tp->t_agfl_dfops = &dfops;
 
 	/*
 	 * Reserve disk quota and the inode.
@@ -1411,11 +1424,11 @@
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	error = xfs_qm_dqattach(sip, 0);
+	error = xfs_qm_dqattach(sip);
 	if (error)
 		goto std_return;
 
-	error = xfs_qm_dqattach(tdp, 0);
+	error = xfs_qm_dqattach(tdp);
 	if (error)
 		goto std_return;
 
@@ -1451,6 +1464,7 @@
 	}
 
 	xfs_defer_init(&dfops, &first_block);
+	tp->t_agfl_dfops = &dfops;
 
 	/*
 	 * Handle initial link state of O_TMPFILE inode
@@ -1534,11 +1548,12 @@
  * dirty on error so that transactions can be easily aborted if possible.
  */
 int
-xfs_itruncate_extents(
+xfs_itruncate_extents_flags(
 	struct xfs_trans	**tpp,
 	struct xfs_inode	*ip,
 	int			whichfork,
-	xfs_fsize_t		new_size)
+	xfs_fsize_t		new_size,
+	int			flags)
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp = *tpp;
@@ -1561,6 +1576,8 @@
 
 	trace_xfs_itruncate_extents_start(ip, new_size);
 
+	flags |= xfs_bmapi_aflag(whichfork);
+
 	/*
 	 * Since it is possible for space to become allocated beyond
 	 * the end of the file (in a crash where the space is allocated
@@ -1579,12 +1596,9 @@
 	unmap_len = last_block - first_unmap_block + 1;
 	while (!done) {
 		xfs_defer_init(&dfops, &first_block);
-		error = xfs_bunmapi(tp, ip,
-				    first_unmap_block, unmap_len,
-				    xfs_bmapi_aflag(whichfork),
-				    XFS_ITRUNC_MAX_EXTENTS,
-				    &first_block, &dfops,
-				    &done);
+		error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
+				    XFS_ITRUNC_MAX_EXTENTS, &first_block,
+				    &dfops, &done);
 		if (error)
 			goto out_bmap_cancel;
 
@@ -1811,6 +1825,7 @@
 	xfs_trans_ijoin(tp, ip, 0);
 
 	xfs_defer_init(&dfops, &first_block);
+	tp->t_agfl_dfops = &dfops;
 	error = xfs_ifree(tp, ip, &dfops);
 	if (error) {
 		/*
@@ -1911,7 +1926,7 @@
 	     ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
 		truncate = 1;
 
-	error = xfs_qm_dqattach(ip, 0);
+	error = xfs_qm_dqattach(ip);
 	if (error)
 		return;
 
@@ -2574,11 +2589,11 @@
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	error = xfs_qm_dqattach(dp, 0);
+	error = xfs_qm_dqattach(dp);
 	if (error)
 		goto std_return;
 
-	error = xfs_qm_dqattach(ip, 0);
+	error = xfs_qm_dqattach(ip);
 	if (error)
 		goto std_return;
 
@@ -2647,6 +2662,7 @@
 		goto out_trans_cancel;
 
 	xfs_defer_init(&dfops, &first_block);
+	tp->t_agfl_dfops = &dfops;
 	error = xfs_dir_removename(tp, dp, name, ip->i_ino,
 					&first_block, &dfops, resblks);
 	if (error) {
@@ -3014,6 +3030,7 @@
 	}
 
 	xfs_defer_init(&dfops, &first_block);
+	tp->t_agfl_dfops = &dfops;
 
 	/* RENAME_EXCHANGE is unique from here on. */
 	if (flags & RENAME_EXCHANGE)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1eebc53..00fee68 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -415,8 +415,8 @@
 uint		xfs_ip2xflags(struct xfs_inode *);
 int		xfs_ifree(struct xfs_trans *, xfs_inode_t *,
 			   struct xfs_defer_ops *);
-int		xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
-				      int, xfs_fsize_t);
+int		xfs_itruncate_extents_flags(struct xfs_trans **,
+				struct xfs_inode *, int, xfs_fsize_t, int);
 void		xfs_iext_realloc(xfs_inode_t *, int, int);
 
 void		xfs_iunpin_wait(xfs_inode_t *);
@@ -433,6 +433,16 @@
 			       xfs_nlink_t, dev_t, prid_t,
 			       struct xfs_inode **);
 
+static inline int
+xfs_itruncate_extents(
+	struct xfs_trans	**tpp,
+	struct xfs_inode	*ip,
+	int			whichfork,
+	xfs_fsize_t		new_size)
+{
+	return xfs_itruncate_extents_flags(tpp, ip, whichfork, new_size, 0);
+}
+
 /* from xfs_file.c */
 enum xfs_prealloc_flags {
 	XFS_PREALLOC_SET	= (1 << 1),
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 34b91b7..3e5b857 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -518,7 +518,7 @@
 	 * The buffer containing this item failed to be written back
 	 * previously. Resubmit the buffer for IO.
 	 */
-	if (lip->li_flags & XFS_LI_FAILED) {
+	if (test_bit(XFS_LI_FAILED, &lip->li_flags)) {
 		if (!xfs_buf_trylock(bp))
 			return XFS_ITEM_LOCKED;
 
@@ -729,14 +729,14 @@
 		 */
 		iip = INODE_ITEM(blip);
 		if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
-		    (blip->li_flags & XFS_LI_FAILED))
+		    test_bit(XFS_LI_FAILED, &blip->li_flags))
 			need_ail++;
 	}
 
 	/* make sure we capture the state of the initial inode. */
 	iip = INODE_ITEM(lip);
 	if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
-	    lip->li_flags & XFS_LI_FAILED)
+	    test_bit(XFS_LI_FAILED, &lip->li_flags))
 		need_ail++;
 
 	/*
@@ -803,7 +803,7 @@
 	xfs_inode_log_item_t	*iip = ip->i_itemp;
 
 	if (iip) {
-		if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
+		if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags)) {
 			xfs_trans_ail_remove(&iip->ili_item,
 					     stale ? SHUTDOWN_LOG_IO_ERROR :
 						     SHUTDOWN_CORRUPT_INCORE);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 89fb1eb..5dd9e22 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1103,7 +1103,8 @@
 	if (fa->fsx_xflags & FS_XFLAG_DAX) {
 		if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
 			return -EINVAL;
-		if (bdev_dax_supported(sb, sb->s_blocksize) < 0)
+		if (!bdev_dax_supported(xfs_find_bdev_for_inode(VFS_I(ip)),
+				sb->s_blocksize))
 			return -EINVAL;
 	}
 
@@ -1811,6 +1812,88 @@
 	return error;
 }
 
+static int
+xfs_ioc_getlabel(
+	struct xfs_mount	*mp,
+	char			__user *user_label)
+{
+	struct xfs_sb		*sbp = &mp->m_sb;
+	char			label[XFSLABEL_MAX + 1];
+
+	/* Paranoia */
+	BUILD_BUG_ON(sizeof(sbp->sb_fname) > FSLABEL_MAX);
+
+	spin_lock(&mp->m_sb_lock);
+	strncpy(label, sbp->sb_fname, sizeof(sbp->sb_fname));
+	spin_unlock(&mp->m_sb_lock);
+
+	/* xfs on-disk label is 12 chars, be sure we send a null to user */
+	label[XFSLABEL_MAX] = '\0';
+	if (copy_to_user(user_label, label, sizeof(sbp->sb_fname)))
+		return -EFAULT;
+	return 0;
+}
+
+static int
+xfs_ioc_setlabel(
+	struct file		*filp,
+	struct xfs_mount	*mp,
+	char			__user *newlabel)
+{
+	struct xfs_sb		*sbp = &mp->m_sb;
+	char			label[XFSLABEL_MAX + 1];
+	size_t			len;
+	int			error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	/*
+	 * The generic ioctl allows up to FSLABEL_MAX chars, but XFS is much
+	 * smaller, at 12 bytes.  We copy one more to be sure we find the
+	 * (required) NULL character to test the incoming label length.
+	 * NB: The on disk label doesn't need to be null terminated.
+	 */
+	if (copy_from_user(label, newlabel, XFSLABEL_MAX + 1))
+		return -EFAULT;
+	len = strnlen(label, XFSLABEL_MAX + 1);
+	if (len > sizeof(sbp->sb_fname))
+		return -EINVAL;
+
+	error = mnt_want_write_file(filp);
+	if (error)
+		return error;
+
+	spin_lock(&mp->m_sb_lock);
+	memset(sbp->sb_fname, 0, sizeof(sbp->sb_fname));
+	strncpy(sbp->sb_fname, label, sizeof(sbp->sb_fname));
+	spin_unlock(&mp->m_sb_lock);
+
+	/*
+	 * Now we do several things to satisfy userspace.
+	 * In addition to normal logging of the primary superblock, we also
+	 * immediately write these changes to sector zero for the primary, then
+	 * update all backup supers (as xfs_db does for a label change), then
+	 * invalidate the block device page cache.  This is so that any prior
+	 * buffered reads from userspace (i.e. from blkid) are invalidated,
+	 * and userspace will see the newly-written label.
+	 */
+	error = xfs_sync_sb_buf(mp);
+	if (error)
+		goto out;
+	/*
+	 * growfs also updates backup supers so lock against that.
+	 */
+	mutex_lock(&mp->m_growlock);
+	error = xfs_update_secondary_sbs(mp);
+	mutex_unlock(&mp->m_growlock);
+
+	invalidate_bdev(mp->m_ddev_targp->bt_bdev);
+
+out:
+	mnt_drop_write_file(filp);
+	return error;
+}
+
 /*
  * Note: some of the ioctl's return positive numbers as a
  * byte count indicating success, such as readlink_by_handle.
@@ -1834,6 +1917,10 @@
 	switch (cmd) {
 	case FITRIM:
 		return xfs_ioc_trim(mp, arg);
+	case FS_IOC_GETFSLABEL:
+		return xfs_ioc_getlabel(mp, arg);
+	case FS_IOC_SETFSLABEL:
+		return xfs_ioc_setlabel(filp, mp, arg);
 	case XFS_IOC_ALLOCSP:
 	case XFS_IOC_FREESP:
 	case XFS_IOC_RESVSP:
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 046469f..c6ce6f9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -224,7 +224,7 @@
 	 * necessary and move on to transaction setup.
 	 */
 	xfs_iunlock(ip, lockmode);
-	error = xfs_qm_dqattach(ip, 0);
+	error = xfs_qm_dqattach(ip);
 	if (error)
 		return error;
 
@@ -576,7 +576,7 @@
 		goto done;
 	}
 
-	error = xfs_qm_dqattach_locked(ip, 0);
+	error = xfs_qm_dqattach_locked(ip, false);
 	if (error)
 		goto out_unlock;
 
@@ -692,7 +692,7 @@
 	/*
 	 * Make sure that the dquots are there.
 	 */
-	error = xfs_qm_dqattach(ip, 0);
+	error = xfs_qm_dqattach(ip);
 	if (error)
 		return error;
 
@@ -946,8 +946,11 @@
 	return error;
 }
 
-static inline bool imap_needs_alloc(struct inode *inode,
-		struct xfs_bmbt_irec *imap, int nimaps)
+static inline bool
+imap_needs_alloc(
+	struct inode		*inode,
+	struct xfs_bmbt_irec	*imap,
+	int			nimaps)
 {
 	return !nimaps ||
 		imap->br_startblock == HOLESTARTBLOCK ||
@@ -955,31 +958,58 @@
 		(IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN);
 }
 
-static inline bool needs_cow_for_zeroing(struct xfs_bmbt_irec *imap, int nimaps)
+static inline bool
+needs_cow_for_zeroing(
+	struct xfs_bmbt_irec	*imap,
+	int			nimaps)
 {
 	return nimaps &&
 		imap->br_startblock != HOLESTARTBLOCK &&
 		imap->br_state != XFS_EXT_UNWRITTEN;
 }
 
-static inline bool need_excl_ilock(struct xfs_inode *ip, unsigned flags)
+static int
+xfs_ilock_for_iomap(
+	struct xfs_inode	*ip,
+	unsigned		flags,
+	unsigned		*lockmode)
 {
+	unsigned		mode = XFS_ILOCK_SHARED;
+
 	/*
 	 * COW writes may allocate delalloc space or convert unwritten COW
 	 * extents, so we need to make sure to take the lock exclusively here.
 	 */
-	if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO)))
-		return true;
+	if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) {
+		/*
+		 * FIXME: It could still overwrite on unshared extents and not
+		 * need allocation.
+		 */
+		if (flags & IOMAP_NOWAIT)
+			return -EAGAIN;
+		mode = XFS_ILOCK_EXCL;
+	}
 
 	/*
-	 * Extents not yet cached requires exclusive access, don't block.
-	 * This is an opencoded xfs_ilock_data_map_shared() to cater for the
+	 * Extents not yet cached requires exclusive access, don't block.  This
+	 * is an opencoded xfs_ilock_data_map_shared() call but with
 	 * non-blocking behaviour.
 	 */
-	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
-	    !(ip->i_df.if_flags & XFS_IFEXTENTS))
-		return true;
-	return false;
+	if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
+		if (flags & IOMAP_NOWAIT)
+			return -EAGAIN;
+		mode = XFS_ILOCK_EXCL;
+	}
+
+	if (flags & IOMAP_NOWAIT) {
+		if (!xfs_ilock_nowait(ip, mode))
+			return -EAGAIN;
+	} else {
+		xfs_ilock(ip, mode);
+	}
+
+	*lockmode = mode;
+	return 0;
 }
 
 static int
@@ -1007,19 +1037,15 @@
 		return xfs_file_iomap_begin_delay(inode, offset, length, iomap);
 	}
 
-	if (need_excl_ilock(ip, flags))
-		lockmode = XFS_ILOCK_EXCL;
-	else
-		lockmode = XFS_ILOCK_SHARED;
-
-	if (flags & IOMAP_NOWAIT) {
-		if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
-			return -EAGAIN;
-		if (!xfs_ilock_nowait(ip, lockmode))
-			return -EAGAIN;
-	} else {
-		xfs_ilock(ip, lockmode);
-	}
+	/*
+	 * Lock the inode in the manner required for the specified operation and
+	 * check for as many conditions that would result in blocking as
+	 * possible. This removes most of the non-blocking checks from the
+	 * mapping code below.
+	 */
+	error = xfs_ilock_for_iomap(ip, flags, &lockmode);
+	if (error)
+		return error;
 
 	ASSERT(offset <= mp->m_super->s_maxbytes);
 	if (offset > mp->m_super->s_maxbytes - length)
@@ -1040,19 +1066,21 @@
 			goto out_unlock;
 	}
 
-	if (xfs_is_reflink_inode(ip) &&
-	    ((flags & IOMAP_WRITE) ||
-	     ((flags & IOMAP_ZERO) && needs_cow_for_zeroing(&imap, nimaps)))) {
+	/* Non-modifying mapping requested, so we are done */
+	if (!(flags & (IOMAP_WRITE | IOMAP_ZERO)))
+		goto out_found;
+
+	/*
+	 * Break shared extents if necessary. Checks for non-blocking IO have
+	 * been done up front, so we don't need to do them here.
+	 */
+	if (xfs_is_reflink_inode(ip)) {
+		/* if zeroing doesn't need COW allocation, then we are done. */
+		if ((flags & IOMAP_ZERO) &&
+		    !needs_cow_for_zeroing(&imap, nimaps))
+			goto out_found;
+
 		if (flags & IOMAP_DIRECT) {
-			/*
-			 * A reflinked inode will result in CoW alloc.
-			 * FIXME: It could still overwrite on unshared extents
-			 * and not need allocation.
-			 */
-			if (flags & IOMAP_NOWAIT) {
-				error = -EAGAIN;
-				goto out_unlock;
-			}
 			/* may drop and re-acquire the ilock */
 			error = xfs_reflink_allocate_cow(ip, &imap, &shared,
 					&lockmode);
@@ -1068,46 +1096,45 @@
 		length = XFS_FSB_TO_B(mp, end_fsb) - offset;
 	}
 
-	if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
-		/*
-		 * If nowait is set bail since we are going to make
-		 * allocations.
-		 */
-		if (flags & IOMAP_NOWAIT) {
-			error = -EAGAIN;
-			goto out_unlock;
-		}
-		/*
-		 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
-		 * pages to keep the chunks of work done where somewhat symmetric
-		 * with the work writeback does. This is a completely arbitrary
-		 * number pulled out of thin air as a best guess for initial
-		 * testing.
-		 *
-		 * Note that the values needs to be less than 32-bits wide until
-		 * the lower level functions are updated.
-		 */
-		length = min_t(loff_t, length, 1024 * PAGE_SIZE);
-		/*
-		 * xfs_iomap_write_direct() expects the shared lock. It
-		 * is unlocked on return.
-		 */
-		if (lockmode == XFS_ILOCK_EXCL)
-			xfs_ilock_demote(ip, lockmode);
-		error = xfs_iomap_write_direct(ip, offset, length, &imap,
-				nimaps);
-		if (error)
-			return error;
+	/* Don't need to allocate over holes when doing zeroing operations. */
+	if (flags & IOMAP_ZERO)
+		goto out_found;
 
-		iomap->flags = IOMAP_F_NEW;
-		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
-	} else {
-		ASSERT(nimaps);
+	if (!imap_needs_alloc(inode, &imap, nimaps))
+		goto out_found;
 
-		xfs_iunlock(ip, lockmode);
-		trace_xfs_iomap_found(ip, offset, length, 0, &imap);
+	/* If nowait is set bail since we are going to make allocations. */
+	if (flags & IOMAP_NOWAIT) {
+		error = -EAGAIN;
+		goto out_unlock;
 	}
 
+	/*
+	 * We cap the maximum length we map to a sane size  to keep the chunks
+	 * of work done where somewhat symmetric with the work writeback does.
+	 * This is a completely arbitrary number pulled out of thin air as a
+	 * best guess for initial testing.
+	 *
+	 * Note that the values needs to be less than 32-bits wide until the
+	 * lower level functions are updated.
+	 */
+	length = min_t(loff_t, length, 1024 * PAGE_SIZE);
+
+	/*
+	 * xfs_iomap_write_direct() expects the shared lock. It is unlocked on
+	 * return.
+	 */
+	if (lockmode == XFS_ILOCK_EXCL)
+		xfs_ilock_demote(ip, lockmode);
+	error = xfs_iomap_write_direct(ip, offset, length, &imap,
+			nimaps);
+	if (error)
+		return error;
+
+	iomap->flags = IOMAP_F_NEW;
+	trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
+
+out_finish:
 	if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields
 				& ~XFS_ILOG_TIMESTAMP))
 		iomap->flags |= IOMAP_F_DIRTY;
@@ -1117,6 +1144,13 @@
 	if (shared)
 		iomap->flags |= IOMAP_F_SHARED;
 	return 0;
+
+out_found:
+	ASSERT(nimaps);
+	xfs_iunlock(ip, lockmode);
+	trace_xfs_iomap_found(ip, offset, length, 0, &imap);
+	goto out_finish;
+
 out_unlock:
 	xfs_iunlock(ip, lockmode);
 	return error;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index a3ed3c8..b0eb49b 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -260,6 +260,7 @@
 	struct dentry	*dentry,
 	unsigned int flags)
 {
+	struct inode *inode;
 	struct xfs_inode *cip;
 	struct xfs_name	name;
 	int		error;
@@ -269,14 +270,13 @@
 
 	xfs_dentry_to_name(&name, dentry);
 	error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
-	if (unlikely(error)) {
-		if (unlikely(error != -ENOENT))
-			return ERR_PTR(error);
-		d_add(dentry, NULL);
-		return NULL;
-	}
-
-	return d_splice_alias(VFS_I(cip), dentry);
+	if (likely(!error))
+		inode = VFS_I(cip);
+	else if (likely(error == -ENOENT))
+		inode = NULL;
+	else
+		inode = ERR_PTR(error);
+	return d_splice_alias(inode, dentry);
 }
 
 STATIC struct dentry *
@@ -855,7 +855,7 @@
 	/*
 	 * Make sure that the dquots are attached to the inode.
 	 */
-	error = xfs_qm_dqattach(ip, 0);
+	error = xfs_qm_dqattach(ip);
 	if (error)
 		return error;
 
@@ -1195,6 +1195,30 @@
 	.update_time		= xfs_vn_update_time,
 };
 
+/* Figure out if this file actually supports DAX. */
+static bool
+xfs_inode_supports_dax(
+	struct xfs_inode	*ip)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+
+	/* Only supported on non-reflinked files. */
+	if (!S_ISREG(VFS_I(ip)->i_mode) || xfs_is_reflink_inode(ip))
+		return false;
+
+	/* DAX mount option or DAX iflag must be set. */
+	if (!(mp->m_flags & XFS_MOUNT_DAX) &&
+	    !(ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
+		return false;
+
+	/* Block size must match page size */
+	if (mp->m_sb.sb_blocksize != PAGE_SIZE)
+		return false;
+
+	/* Device has to support DAX too. */
+	return xfs_find_daxdev_for_inode(VFS_I(ip)) != NULL;
+}
+
 STATIC void
 xfs_diflags_to_iflags(
 	struct inode		*inode,
@@ -1213,11 +1237,7 @@
 		inode->i_flags |= S_SYNC;
 	if (flags & XFS_DIFLAG_NOATIME)
 		inode->i_flags |= S_NOATIME;
-	if (S_ISREG(inode->i_mode) &&
-	    ip->i_mount->m_sb.sb_blocksize == PAGE_SIZE &&
-	    !xfs_is_reflink_inode(ip) &&
-	    (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
-	     ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
+	if (xfs_inode_supports_dax(ip))
 		inode->i_flags |= S_DAX;
 }
 
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2fcd9ed..c21039f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1047,6 +1047,7 @@
 	INIT_LIST_HEAD(&item->li_ail);
 	INIT_LIST_HEAD(&item->li_cil);
 	INIT_LIST_HEAD(&item->li_bio_list);
+	INIT_LIST_HEAD(&item->li_trans);
 }
 
 /*
@@ -2110,10 +2111,10 @@
  */
 void
 xlog_print_trans(
-	struct xfs_trans		*tp)
+	struct xfs_trans	*tp)
 {
-	struct xfs_mount		*mp = tp->t_mountp;
-	struct xfs_log_item_desc	*lidp;
+	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_log_item	*lip;
 
 	/* dump core transaction and ticket info */
 	xfs_warn(mp, "transaction summary:");
@@ -2124,15 +2125,14 @@
 	xlog_print_tic_res(mp, tp->t_ticket);
 
 	/* dump each log item */
-	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-		struct xfs_log_item	*lip = lidp->lid_item;
+	list_for_each_entry(lip, &tp->t_items, li_trans) {
 		struct xfs_log_vec	*lv = lip->li_lv;
 		struct xfs_log_iovec	*vec;
 		int			i;
 
 		xfs_warn(mp, "log item: ");
 		xfs_warn(mp, "  type	= 0x%x", lip->li_type);
-		xfs_warn(mp, "  flags	= 0x%x", lip->li_flags);
+		xfs_warn(mp, "  flags	= 0x%lx", lip->li_flags);
 		if (!lv)
 			continue;
 		xfs_warn(mp, "  niovecs	= %d", lv->lv_niovecs);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 4668403..c156877 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -141,10 +141,9 @@
 	struct xlog		*log,
 	struct xfs_trans	*tp)
 {
-	struct xfs_log_item_desc *lidp;
+	struct xfs_log_item	*lip;
 
-	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-		struct xfs_log_item *lip = lidp->lid_item;
+	list_for_each_entry(lip, &tp->t_items, li_trans) {
 		struct xfs_log_vec *lv;
 		int	niovecs = 0;
 		int	nbytes = 0;
@@ -152,7 +151,7 @@
 		bool	ordered = false;
 
 		/* Skip items which aren't dirty in this transaction. */
-		if (!(lidp->lid_flags & XFS_LID_DIRTY))
+		if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
 			continue;
 
 		/* get number of vecs and size of data to be stored */
@@ -317,7 +316,7 @@
 	int			*diff_len,
 	int			*diff_iovecs)
 {
-	struct xfs_log_item_desc *lidp;
+	struct xfs_log_item	*lip;
 
 
 	/* Bail out if we didn't find a log item.  */
@@ -326,15 +325,14 @@
 		return;
 	}
 
-	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-		struct xfs_log_item *lip = lidp->lid_item;
+	list_for_each_entry(lip, &tp->t_items, li_trans) {
 		struct xfs_log_vec *lv;
 		struct xfs_log_vec *old_lv = NULL;
 		struct xfs_log_vec *shadow;
 		bool	ordered = false;
 
 		/* Skip items which aren't dirty in this transaction. */
-		if (!(lidp->lid_flags & XFS_LID_DIRTY))
+		if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
 			continue;
 
 		/*
@@ -406,7 +404,7 @@
 {
 	struct xfs_cil		*cil = log->l_cilp;
 	struct xfs_cil_ctx	*ctx = cil->xc_ctx;
-	struct xfs_log_item_desc *lidp;
+	struct xfs_log_item	*lip;
 	int			len = 0;
 	int			diff_iovecs = 0;
 	int			iclog_space;
@@ -479,11 +477,10 @@
 	 * We do this here so we only need to take the CIL lock once during
 	 * the transaction commit.
 	 */
-	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-		struct xfs_log_item	*lip = lidp->lid_item;
+	list_for_each_entry(lip, &tp->t_items, li_trans) {
 
 		/* Skip items which aren't dirty in this transaction. */
-		if (!(lidp->lid_flags & XFS_LID_DIRTY))
+		if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
 			continue;
 
 		/*
@@ -1013,6 +1010,7 @@
 		*commit_lsn = xc_commit_lsn;
 
 	xfs_log_done(mp, tp->t_ticket, NULL, regrant);
+	tp->t_ticket = NULL;
 	xfs_trans_unreserve_and_mod_sb(tp);
 
 	/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 2b2383f..06a09cb 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2702,7 +2702,7 @@
 				goto next;
 			}
 			fa = xfs_dquot_verify(mp, item->ri_buf[i].i_addr,
-					       -1, 0, 0);
+					       -1, 0);
 			if (fa) {
 				xfs_alert(mp,
 	"dquot corrupt at %pS trying to replay into block 0x%llx",
@@ -3348,7 +3348,7 @@
 	 */
 	dq_f = item->ri_buf[0].i_addr;
 	ASSERT(dq_f);
-	fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0, 0);
+	fa = xfs_dquot_verify(mp, recddq, dq_f->qlf_id, 0);
 	if (fa) {
 		xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS",
 				dq_f->qlf_id, fa);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index a901b86..73ed8fe 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1072,9 +1072,7 @@
 	uint64_t		resblks;
 	int			error;
 
-	cancel_delayed_work_sync(&mp->m_eofblocks_work);
-	cancel_delayed_work_sync(&mp->m_cowblocks_work);
-
+	xfs_icache_disable_reclaim(mp);
 	xfs_fs_unreserve_ag_blocks(mp);
 	xfs_qm_unmount_quotas(mp);
 	xfs_rtunmount_inodes(mp);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index ec39ae2..c3e014b 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -161,10 +161,7 @@
 		 * to purge this dquot anyway, so we go ahead regardless.
 		 */
 		error = xfs_qm_dqflush(dqp, &bp);
-		if (error) {
-			xfs_warn(mp, "%s: dquot "PTR_FMT" flush failed",
-				__func__, dqp);
-		} else {
+		if (!error) {
 			error = xfs_bwrite(bp);
 			xfs_buf_relse(bp);
 		}
@@ -173,7 +170,7 @@
 
 	ASSERT(atomic_read(&dqp->q_pincount) == 0);
 	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
-	       !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
+		!test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags));
 
 	xfs_dqfunlock(dqp);
 	xfs_dqunlock(dqp);
@@ -265,7 +262,7 @@
 	xfs_inode_t	*ip,
 	xfs_dqid_t	id,
 	uint		type,
-	uint		doalloc,
+	bool		doalloc,
 	xfs_dquot_t	**IO_idqpp)
 {
 	xfs_dquot_t	*dqp;
@@ -291,7 +288,7 @@
 	 * exist on disk and we didn't ask it to allocate; ESRCH if quotas got
 	 * turned off suddenly.
 	 */
-	error = xfs_qm_dqget(ip->i_mount, ip, id, type, doalloc, &dqp);
+	error = xfs_qm_dqget_inode(ip, type, doalloc, &dqp);
 	if (error)
 		return error;
 
@@ -326,14 +323,14 @@
 /*
  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
  * into account.
- * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
+ * If @doalloc is true, the dquot(s) will be allocated if needed.
  * Inode may get unlocked and relocked in here, and the caller must deal with
  * the consequences.
  */
 int
 xfs_qm_dqattach_locked(
 	xfs_inode_t	*ip,
-	uint		flags)
+	bool		doalloc)
 {
 	xfs_mount_t	*mp = ip->i_mount;
 	int		error = 0;
@@ -345,8 +342,7 @@
 
 	if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
 		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
-						flags & XFS_QMOPT_DQALLOC,
-						&ip->i_udquot);
+				doalloc, &ip->i_udquot);
 		if (error)
 			goto done;
 		ASSERT(ip->i_udquot);
@@ -354,8 +350,7 @@
 
 	if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {
 		error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
-						flags & XFS_QMOPT_DQALLOC,
-						&ip->i_gdquot);
+				doalloc, &ip->i_gdquot);
 		if (error)
 			goto done;
 		ASSERT(ip->i_gdquot);
@@ -363,8 +358,7 @@
 
 	if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {
 		error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
-						flags & XFS_QMOPT_DQALLOC,
-						&ip->i_pdquot);
+				doalloc, &ip->i_pdquot);
 		if (error)
 			goto done;
 		ASSERT(ip->i_pdquot);
@@ -381,8 +375,7 @@
 
 int
 xfs_qm_dqattach(
-	struct xfs_inode	*ip,
-	uint			flags)
+	struct xfs_inode	*ip)
 {
 	int			error;
 
@@ -390,7 +383,7 @@
 		return 0;
 
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	error = xfs_qm_dqattach_locked(ip, flags);
+	error = xfs_qm_dqattach_locked(ip, false);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 
 	return error;
@@ -479,11 +472,8 @@
 		spin_unlock(lru_lock);
 
 		error = xfs_qm_dqflush(dqp, &bp);
-		if (error) {
-			xfs_warn(dqp->q_mount, "%s: dquot "PTR_FMT" flush failed",
-				 __func__, dqp);
+		if (error)
 			goto out_unlock_dirty;
-		}
 
 		xfs_buf_delwri_queue(bp, &isol->buffers);
 		xfs_buf_relse(bp);
@@ -571,27 +561,88 @@
 {
 	xfs_dquot_t		*dqp;
 	struct xfs_def_quota    *defq;
+	struct xfs_disk_dquot	*ddqp;
 	int			error;
 
-	error = xfs_qm_dqread(mp, 0, type, 0, &dqp);
+	error = xfs_qm_dqget_uncached(mp, 0, type, &dqp);
+	if (error)
+		return;
 
-	if (!error) {
-		xfs_disk_dquot_t        *ddqp = &dqp->q_core;
+	ddqp = &dqp->q_core;
+	defq = xfs_get_defquota(dqp, qinf);
 
-		defq = xfs_get_defquota(dqp, qinf);
+	/*
+	 * Timers and warnings have been already set, let's just set the
+	 * default limits for this quota type
+	 */
+	defq->bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
+	defq->bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
+	defq->ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
+	defq->isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
+	defq->rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
+	defq->rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
+	xfs_qm_dqdestroy(dqp);
+}
 
-		/*
-		 * Timers and warnings have been already set, let's just set the
-		 * default limits for this quota type
-		 */
-		defq->bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
-		defq->bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
-		defq->ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
-		defq->isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
-		defq->rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
-		defq->rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
-		xfs_qm_dqdestroy(dqp);
-	}
+/* Initialize quota time limits from the root dquot. */
+static void
+xfs_qm_init_timelimits(
+	struct xfs_mount	*mp,
+	struct xfs_quotainfo	*qinf)
+{
+	struct xfs_disk_dquot	*ddqp;
+	struct xfs_dquot	*dqp;
+	uint			type;
+	int			error;
+
+	qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
+	qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
+	qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
+	qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
+	qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
+	qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
+
+	/*
+	 * We try to get the limits from the superuser's limits fields.
+	 * This is quite hacky, but it is standard quota practice.
+	 *
+	 * Since we may not have done a quotacheck by this point, just read
+	 * the dquot without attaching it to any hashtables or lists.
+	 *
+	 * Timers and warnings are globally set by the first timer found in
+	 * user/group/proj quota types, otherwise a default value is used.
+	 * This should be split into different fields per quota type.
+	 */
+	if (XFS_IS_UQUOTA_RUNNING(mp))
+		type = XFS_DQ_USER;
+	else if (XFS_IS_GQUOTA_RUNNING(mp))
+		type = XFS_DQ_GROUP;
+	else
+		type = XFS_DQ_PROJ;
+	error = xfs_qm_dqget_uncached(mp, 0, type, &dqp);
+	if (error)
+		return;
+
+	ddqp = &dqp->q_core;
+	/*
+	 * The warnings and timers set the grace period given to
+	 * a user or group before he or she can not perform any
+	 * more writing. If it is zero, a default is used.
+	 */
+	if (ddqp->d_btimer)
+		qinf->qi_btimelimit = be32_to_cpu(ddqp->d_btimer);
+	if (ddqp->d_itimer)
+		qinf->qi_itimelimit = be32_to_cpu(ddqp->d_itimer);
+	if (ddqp->d_rtbtimer)
+		qinf->qi_rtbtimelimit = be32_to_cpu(ddqp->d_rtbtimer);
+	if (ddqp->d_bwarns)
+		qinf->qi_bwarnlimit = be16_to_cpu(ddqp->d_bwarns);
+	if (ddqp->d_iwarns)
+		qinf->qi_iwarnlimit = be16_to_cpu(ddqp->d_iwarns);
+	if (ddqp->d_rtbwarns)
+		qinf->qi_rtbwarnlimit = be16_to_cpu(ddqp->d_rtbwarns);
+
+	xfs_qm_dqdestroy(dqp);
 }
 
 /*
@@ -600,11 +651,10 @@
  */
 STATIC int
 xfs_qm_init_quotainfo(
-	xfs_mount_t	*mp)
+	struct xfs_mount	*mp)
 {
-	xfs_quotainfo_t *qinf;
-	int		error;
-	xfs_dquot_t	*dqp;
+	struct xfs_quotainfo	*qinf;
+	int			error;
 
 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
@@ -636,52 +686,7 @@
 
 	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
 
-	/*
-	 * We try to get the limits from the superuser's limits fields.
-	 * This is quite hacky, but it is standard quota practice.
-	 *
-	 * Since we may not have done a quotacheck by this point, just read
-	 * the dquot without attaching it to any hashtables or lists.
-	 *
-	 * Timers and warnings are globally set by the first timer found in
-	 * user/group/proj quota types, otherwise a default value is used.
-	 * This should be split into different fields per quota type.
-	 */
-	error = xfs_qm_dqread(mp, 0,
-			XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
-			 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
-			  XFS_DQ_PROJ),
-			0, &dqp);
-
-	if (!error) {
-		xfs_disk_dquot_t	*ddqp = &dqp->q_core;
-
-		/*
-		 * The warnings and timers set the grace period given to
-		 * a user or group before he or she can not perform any
-		 * more writing. If it is zero, a default is used.
-		 */
-		qinf->qi_btimelimit = ddqp->d_btimer ?
-			be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
-		qinf->qi_itimelimit = ddqp->d_itimer ?
-			be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
-		qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
-			be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
-		qinf->qi_bwarnlimit = ddqp->d_bwarns ?
-			be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
-		qinf->qi_iwarnlimit = ddqp->d_iwarns ?
-			be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
-		qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
-			be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
-		xfs_qm_dqdestroy(dqp);
-	} else {
-		qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
-		qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
-		qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
-		qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
-		qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
-		qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
-	}
+	xfs_qm_init_timelimits(mp, qinf);
 
 	if (XFS_IS_UQUOTA_RUNNING(mp))
 		xfs_qm_set_defquota(mp, XFS_DQ_USER, qinf);
@@ -865,9 +870,9 @@
 		 * find uninitialised dquot blks. See comment in
 		 * xfs_dquot_verify.
 		 */
-		fa = xfs_dquot_verify(mp, ddq, id + j, type, 0);
+		fa = xfs_dqblk_verify(mp, &dqb[j], id + j, type);
 		if (fa)
-			xfs_dquot_repair(mp, ddq, id + j, type);
+			xfs_dqblk_repair(mp, &dqb[j], id + j, type);
 
 		/*
 		 * Reset type in case we are reusing group quota file for
@@ -893,7 +898,7 @@
 }
 
 STATIC int
-xfs_qm_dqiter_bufs(
+xfs_qm_reset_dqcounts_all(
 	struct xfs_mount	*mp,
 	xfs_dqid_t		firstid,
 	xfs_fsblock_t		bno,
@@ -961,11 +966,11 @@
 }
 
 /*
- * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
- * caller supplied function for every chunk of dquots that we find.
+ * Iterate over all allocated dquot blocks in this quota inode, zeroing all
+ * counters for every chunk of dquots that we find.
  */
 STATIC int
-xfs_qm_dqiterate(
+xfs_qm_reset_dqcounts_buf(
 	struct xfs_mount	*mp,
 	struct xfs_inode	*qip,
 	uint			flags,
@@ -1041,7 +1046,7 @@
 			 * Iterate thru all the blks in the extent and
 			 * reset the counters of all the dquots inside them.
 			 */
-			error = xfs_qm_dqiter_bufs(mp, firstid,
+			error = xfs_qm_reset_dqcounts_all(mp, firstid,
 						   map[i].br_startblock,
 						   map[i].br_blockcount,
 						   flags, buffer_list);
@@ -1066,16 +1071,17 @@
 STATIC int
 xfs_qm_quotacheck_dqadjust(
 	struct xfs_inode	*ip,
-	xfs_dqid_t		id,
 	uint			type,
 	xfs_qcnt_t		nblks,
 	xfs_qcnt_t		rtblks)
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_dquot	*dqp;
+	xfs_dqid_t		id;
 	int			error;
 
-	error = xfs_qm_dqget(mp, ip, id, type, XFS_QMOPT_DQALLOC, &dqp);
+	id = xfs_qm_id_for_quotatype(ip, type);
+	error = xfs_qm_dqget(mp, id, type, true, &dqp);
 	if (error) {
 		/*
 		 * Shouldn't be able to turn off quotas here.
@@ -1148,13 +1154,10 @@
 	}
 
 	/*
-	 * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
-	 * interface expects the inode to be exclusively locked because that's
-	 * the case in all other instances. It's OK that we do this because
-	 * quotacheck is done only at mount time.
+	 * We don't _need_ to take the ilock EXCL here because quotacheck runs
+	 * at mount time and therefore nobody will be racing chown/chproj.
 	 */
-	error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, XFS_ILOCK_EXCL,
-			 &ip);
+	error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, 0, &ip);
 	if (error) {
 		*res = BULKSTAT_RV_NOTHING;
 		return error;
@@ -1189,33 +1192,31 @@
 	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
 	 */
 	if (XFS_IS_UQUOTA_ON(mp)) {
-		error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
-						   XFS_DQ_USER, nblks, rtblks);
+		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQ_USER, nblks,
+				rtblks);
 		if (error)
 			goto error0;
 	}
 
 	if (XFS_IS_GQUOTA_ON(mp)) {
-		error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
-						   XFS_DQ_GROUP, nblks, rtblks);
+		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQ_GROUP, nblks,
+				rtblks);
 		if (error)
 			goto error0;
 	}
 
 	if (XFS_IS_PQUOTA_ON(mp)) {
-		error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
-						   XFS_DQ_PROJ, nblks, rtblks);
+		error = xfs_qm_quotacheck_dqadjust(ip, XFS_DQ_PROJ, nblks,
+				rtblks);
 		if (error)
 			goto error0;
 	}
 
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	IRELE(ip);
 	*res = BULKSTAT_RV_DIDONE;
 	return 0;
 
 error0:
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	IRELE(ip);
 	*res = BULKSTAT_RV_GIVEUP;
 	return error;
@@ -1247,9 +1248,8 @@
 	 */
 	if (!xfs_dqflock_nowait(dqp)) {
 		/* buf is pinned in-core by delwri list */
-		DEFINE_SINGLE_BUF_MAP(map, dqp->q_blkno,
-				      mp->m_quotainfo->qi_dqchunklen);
-		bp = _xfs_buf_find(mp->m_ddev_targp, &map, 1, 0, NULL);
+		bp = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno,
+				mp->m_quotainfo->qi_dqchunklen, 0);
 		if (!bp) {
 			error = -EINVAL;
 			goto out_unlock;
@@ -1307,7 +1307,7 @@
 	 * We don't log our changes till later.
 	 */
 	if (uip) {
-		error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA,
+		error = xfs_qm_reset_dqcounts_buf(mp, uip, XFS_QMOPT_UQUOTA,
 					 &buffer_list);
 		if (error)
 			goto error_return;
@@ -1315,7 +1315,7 @@
 	}
 
 	if (gip) {
-		error = xfs_qm_dqiterate(mp, gip, XFS_QMOPT_GQUOTA,
+		error = xfs_qm_reset_dqcounts_buf(mp, gip, XFS_QMOPT_GQUOTA,
 					 &buffer_list);
 		if (error)
 			goto error_return;
@@ -1323,7 +1323,7 @@
 	}
 
 	if (pip) {
-		error = xfs_qm_dqiterate(mp, pip, XFS_QMOPT_PQUOTA,
+		error = xfs_qm_reset_dqcounts_buf(mp, pip, XFS_QMOPT_PQUOTA,
 					 &buffer_list);
 		if (error)
 			goto error_return;
@@ -1675,7 +1675,7 @@
 	 * if necessary. The dquot(s) will not be locked.
 	 */
 	if (XFS_NOT_DQATTACHED(mp, ip)) {
-		error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
+		error = xfs_qm_dqattach_locked(ip, true);
 		if (error) {
 			xfs_iunlock(ip, lockflags);
 			return error;
@@ -1694,10 +1694,7 @@
 			 * holding ilock.
 			 */
 			xfs_iunlock(ip, lockflags);
-			error = xfs_qm_dqget(mp, NULL, uid,
-						 XFS_DQ_USER,
-						 XFS_QMOPT_DQALLOC,
-						 &uq);
+			error = xfs_qm_dqget(mp, uid, XFS_DQ_USER, true, &uq);
 			if (error) {
 				ASSERT(error != -ENOENT);
 				return error;
@@ -1720,10 +1717,7 @@
 	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
 		if (ip->i_d.di_gid != gid) {
 			xfs_iunlock(ip, lockflags);
-			error = xfs_qm_dqget(mp, NULL, gid,
-						 XFS_DQ_GROUP,
-						 XFS_QMOPT_DQALLOC,
-						 &gq);
+			error = xfs_qm_dqget(mp, gid, XFS_DQ_GROUP, true, &gq);
 			if (error) {
 				ASSERT(error != -ENOENT);
 				goto error_rele;
@@ -1739,10 +1733,8 @@
 	if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
 		if (xfs_get_projid(ip) != prid) {
 			xfs_iunlock(ip, lockflags);
-			error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
-						 XFS_DQ_PROJ,
-						 XFS_QMOPT_DQALLOC,
-						 &pq);
+			error = xfs_qm_dqget(mp, (xfs_dqid_t)prid, XFS_DQ_PROJ,
+					true, &pq);
 			if (error) {
 				ASSERT(error != -ENOENT);
 				goto error_rele;
@@ -1933,7 +1925,7 @@
 		 */
 		if (i == 0 || ip != i_tab[i-1]) {
 			if (XFS_NOT_DQATTACHED(mp, ip)) {
-				error = xfs_qm_dqattach(ip, 0);
+				error = xfs_qm_dqattach(ip);
 				if (error)
 					return error;
 			}
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 2975a82..e3129b2 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -170,8 +170,10 @@
 
 /* quota ops */
 extern int		xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint);
-extern int		xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t *,
-					uint, struct qc_dqblk *, uint);
+extern int		xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t,
+					uint, struct qc_dqblk *);
+extern int		xfs_qm_scall_getquota_next(struct xfs_mount *,
+					xfs_dqid_t *, uint, struct qc_dqblk *);
 extern int		xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
 					struct qc_dqblk *);
 extern int		xfs_qm_scall_quotaon(struct xfs_mount *, uint);
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 2be6d27..36b89e2 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -72,7 +72,7 @@
 	xfs_mount_t		*mp = ip->i_mount;
 	xfs_dquot_t		*dqp;
 
-	if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
+	if (!xfs_qm_dqget(mp, xfs_get_projid(ip), XFS_DQ_PROJ, false, &dqp)) {
 		xfs_fill_statvfs_from_dquot(statp, dqp);
 		xfs_qm_dqput(dqp);
 	}
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 9cb5c38..3e05d30 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -425,7 +425,7 @@
 	 * a reference to the dquot, so it's safe to do this unlock/lock without
 	 * it being reclaimed in the mean time.
 	 */
-	error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp);
+	error = xfs_qm_dqget(mp, id, type, true, &dqp);
 	if (error) {
 		ASSERT(error != -ENOENT);
 		goto out_unlock;
@@ -622,39 +622,14 @@
 	return error;
 }
 
-
-int
-xfs_qm_scall_getquota(
+/* Fill out the quota context. */
+static void
+xfs_qm_scall_getquota_fill_qc(
 	struct xfs_mount	*mp,
-	xfs_dqid_t		*id,
 	uint			type,
-	struct qc_dqblk		*dst,
-	uint			dqget_flags)
+	const struct xfs_dquot	*dqp,
+	struct qc_dqblk		*dst)
 {
-	struct xfs_dquot	*dqp;
-	int			error;
-
-	/*
-	 * Try to get the dquot. We don't want it allocated on disk, so
-	 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
-	 * exist, we'll get ENOENT back.
-	 */
-	error = xfs_qm_dqget(mp, NULL, *id, type, dqget_flags, &dqp);
-	if (error)
-		return error;
-
-	/*
-	 * If everything's NULL, this dquot doesn't quite exist as far as
-	 * our utility programs are concerned.
-	 */
-	if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
-		error = -ENOENT;
-		goto out_put;
-	}
-
-	/* Fill in the ID we actually read from disk */
-	*id = be32_to_cpu(dqp->q_core.d_id);
-
 	memset(dst, 0, sizeof(*dst));
 	dst->d_spc_hardlimit =
 		XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
@@ -696,7 +671,7 @@
 	if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) ||
 	     (XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) ||
 	     (XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) &&
-	    *id != 0) {
+	    dqp->q_core.d_id != 0) {
 		if ((dst->d_space > dst->d_spc_softlimit) &&
 		    (dst->d_spc_softlimit > 0)) {
 			ASSERT(dst->d_spc_timer != 0);
@@ -707,11 +682,69 @@
 		}
 	}
 #endif
+}
+
+/* Return the quota information for the dquot matching id. */
+int
+xfs_qm_scall_getquota(
+	struct xfs_mount	*mp,
+	xfs_dqid_t		id,
+	uint			type,
+	struct qc_dqblk		*dst)
+{
+	struct xfs_dquot	*dqp;
+	int			error;
+
+	/*
+	 * Try to get the dquot. We don't want it allocated on disk, so don't
+	 * set doalloc. If it doesn't exist, we'll get ENOENT back.
+	 */
+	error = xfs_qm_dqget(mp, id, type, false, &dqp);
+	if (error)
+		return error;
+
+	/*
+	 * If everything's NULL, this dquot doesn't quite exist as far as
+	 * our utility programs are concerned.
+	 */
+	if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+		error = -ENOENT;
+		goto out_put;
+	}
+
+	xfs_qm_scall_getquota_fill_qc(mp, type, dqp, dst);
+
 out_put:
 	xfs_qm_dqput(dqp);
 	return error;
 }
 
+/*
+ * Return the quota information for the first initialized dquot whose id
+ * is at least as high as id.
+ */
+int
+xfs_qm_scall_getquota_next(
+	struct xfs_mount	*mp,
+	xfs_dqid_t		*id,
+	uint			type,
+	struct qc_dqblk		*dst)
+{
+	struct xfs_dquot	*dqp;
+	int			error;
+
+	error = xfs_qm_dqget_next(mp, *id, type, &dqp);
+	if (error)
+		return error;
+
+	/* Fill in the ID we actually read from disk */
+	*id = be32_to_cpu(dqp->q_core.d_id);
+
+	xfs_qm_scall_getquota_fill_qc(mp, type, dqp, dst);
+
+	xfs_qm_dqput(dqp);
+	return error;
+}
 
 STATIC int
 xfs_dqrele_inode(
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index ce6506a..3edf52b 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -48,6 +48,22 @@
 	 (XFS_IS_PQUOTA_ON(mp) && \
 		(mp->m_sb.sb_qflags & XFS_PQUOTA_CHKD) == 0))
 
+static inline uint
+xfs_quota_chkd_flag(
+	uint		dqtype)
+{
+	switch (dqtype) {
+	case XFS_DQ_USER:
+		return XFS_UQUOTA_CHKD;
+	case XFS_DQ_GROUP:
+		return XFS_GQUOTA_CHKD;
+	case XFS_DQ_PROJ:
+		return XFS_PQUOTA_CHKD;
+	default:
+		return 0;
+	}
+}
+
 /*
  * The structure kept inside the xfs_trans_t keep track of dquot changes
  * within a transaction and apply them later.
@@ -90,8 +106,8 @@
 extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *,
 		struct xfs_dquot *, struct xfs_dquot *,
 		struct xfs_dquot *, uint);
-extern int xfs_qm_dqattach(struct xfs_inode *, uint);
-extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
+extern int xfs_qm_dqattach(struct xfs_inode *);
+extern int xfs_qm_dqattach_locked(struct xfs_inode *ip, bool doalloc);
 extern void xfs_qm_dqdetach(struct xfs_inode *);
 extern void xfs_qm_dqrele(struct xfs_dquot *);
 extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
@@ -132,7 +148,7 @@
 #define xfs_qm_vop_rename_dqattach(it)					(0)
 #define xfs_qm_vop_chown(tp, ip, old, new)				(NULL)
 #define xfs_qm_vop_chown_reserve(tp, ip, u, g, p, fl)			(0)
-#define xfs_qm_dqattach(ip, fl)						(0)
+#define xfs_qm_dqattach(ip)						(0)
 #define xfs_qm_dqattach_locked(ip, fl)					(0)
 #define xfs_qm_dqdetach(ip)
 #define xfs_qm_dqrele(d)
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index a651085..c93fc91 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -239,8 +239,7 @@
 		return -ESRCH;
 
 	id = from_kqid(&init_user_ns, qid);
-	return xfs_qm_scall_getquota(mp, &id,
-				      xfs_quota_type(qid.type), qdq, 0);
+	return xfs_qm_scall_getquota(mp, id, xfs_quota_type(qid.type), qdq);
 }
 
 /* Return quota info for active quota >= this qid */
@@ -260,9 +259,8 @@
 		return -ESRCH;
 
 	id = from_kqid(&init_user_ns, *qid);
-	ret = xfs_qm_scall_getquota(mp, &id,
-				    xfs_quota_type(qid->type), qdq,
-				    XFS_QMOPT_DQNEXT);
+	ret = xfs_qm_scall_getquota_next(mp, &id, xfs_quota_type(qid->type),
+			qdq);
 	if (ret)
 		return ret;
 
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 15c9393..e5866b7 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -159,7 +159,7 @@
 xfs_cui_item_unlock(
 	struct xfs_log_item	*lip)
 {
-	if (lip->li_flags & XFS_LI_ABORTED)
+	if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
 		xfs_cui_release(CUI_ITEM(lip));
 }
 
@@ -310,7 +310,7 @@
 {
 	struct xfs_cud_log_item	*cudp = CUD_ITEM(lip);
 
-	if (lip->li_flags & XFS_LI_ABORTED) {
+	if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
 		xfs_cui_release(cudp->cud_cuip);
 		kmem_zone_free(xfs_cud_zone, cudp);
 	}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index cdbd342..713e857 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -305,7 +305,7 @@
 	 * Fork all the shared blocks from our write offset until the end of
 	 * the extent.
 	 */
-	error = xfs_qm_dqattach_locked(ip, 0);
+	error = xfs_qm_dqattach_locked(ip, false);
 	if (error)
 		return error;
 
@@ -431,7 +431,7 @@
 		if (error)
 			return error;
 
-		error = xfs_qm_dqattach_locked(ip, 0);
+		error = xfs_qm_dqattach_locked(ip, false);
 		if (error)
 			goto out;
 		goto retry;
@@ -552,6 +552,9 @@
  *
  * If cancel_real is true this function cancels all COW fork extents for the
  * inode; if cancel_real is false, real extents are not cleared.
+ *
+ * Caller must have already joined the inode to the current transaction. The
+ * inode will be joined to the transaction returned to the caller.
  */
 int
 xfs_reflink_cancel_cow_blocks(
@@ -592,7 +595,6 @@
 			if (error)
 				break;
 		} else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
-			xfs_trans_ijoin(*tpp, ip, 0);
 			xfs_defer_init(&dfops, &firstfsb);
 
 			/* Free the CoW orphan record. */
@@ -1359,7 +1361,7 @@
 		goto out_unlock;
 
 	/* Attach dquots to dest inode before changing block map */
-	ret = xfs_qm_dqattach(dest, 0);
+	ret = xfs_qm_dqattach(dest);
 	if (ret)
 		goto out_unlock;
 
@@ -1551,7 +1553,12 @@
 	return 0;
 }
 
-/* Clear the inode reflink flag if there are no shared extents. */
+/*
+ * Clear the inode reflink flag if there are no shared extents.
+ *
+ * The caller is responsible for joining the inode to the transaction passed in.
+ * The inode will be joined to the transaction that is returned to the caller.
+ */
 int
 xfs_reflink_clear_inode_flag(
 	struct xfs_inode	*ip,
@@ -1578,7 +1585,6 @@
 	trace_xfs_reflink_unset_inode_flag(ip);
 	ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
 	xfs_inode_clear_cowblocks_tag(ip);
-	xfs_trans_ijoin(*tpp, ip, 0);
 	xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
 
 	return error;
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 06a0784..e5b5b3e 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -158,7 +158,7 @@
 xfs_rui_item_unlock(
 	struct xfs_log_item	*lip)
 {
-	if (lip->li_flags & XFS_LI_ABORTED)
+	if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
 		xfs_rui_release(RUI_ITEM(lip));
 }
 
@@ -331,7 +331,7 @@
 {
 	struct xfs_rud_log_item	*rudp = RUD_ITEM(lip);
 
-	if (lip->li_flags & XFS_LI_ABORTED) {
+	if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
 		xfs_rui_release(rudp->rud_ruip);
 		kmem_zone_free(xfs_rud_zone, rudp);
 	}
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index dfee3c9..52632ab 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -23,9 +23,14 @@
 struct xfs_mount;
 struct xfs_trans;
 
+/*
+ * XXX: Most of the realtime allocation functions deal in units of realtime
+ * extents, not realtime blocks.  This looks funny when paired with the type
+ * name and screams for a larger cleanup.
+ */
 struct xfs_rtalloc_rec {
-	xfs_rtblock_t		ar_startblock;
-	xfs_rtblock_t		ar_blockcount;
+	xfs_rtblock_t		ar_startext;
+	xfs_rtblock_t		ar_extcount;
 };
 
 typedef int (*xfs_rtalloc_query_range_fn)(
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index 056e12b..1cc7990 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -113,6 +113,7 @@
 	}
 }
 
+#ifdef CONFIG_PROC_FS
 /* legacy quota interfaces */
 #ifdef CONFIG_XFS_QUOTA
 static int xqm_proc_show(struct seq_file *m, void *v)
@@ -124,18 +125,6 @@
 	return 0;
 }
 
-static int xqm_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, xqm_proc_show, NULL);
-}
-
-static const struct file_operations xqm_proc_fops = {
-	.open		= xqm_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /* legacy quota stats interface no 2 */
 static int xqmstat_proc_show(struct seq_file *m, void *v)
 {
@@ -147,22 +136,8 @@
 	seq_putc(m, '\n');
 	return 0;
 }
-
-static int xqmstat_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, xqmstat_proc_show, NULL);
-}
-
-static const struct file_operations xqmstat_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= xqmstat_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif /* CONFIG_XFS_QUOTA */
 
-#ifdef CONFIG_PROC_FS
 int
 xfs_init_procfs(void)
 {
@@ -174,11 +149,9 @@
 		goto out;
 
 #ifdef CONFIG_XFS_QUOTA
-	if (!proc_create("fs/xfs/xqmstat", 0, NULL,
-			 &xqmstat_proc_fops))
+	if (!proc_create_single("fs/xfs/xqmstat", 0, NULL, xqmstat_proc_show))
 		goto out;
-	if (!proc_create("fs/xfs/xqm", 0, NULL,
-			 &xqm_proc_fops))
+	if (!proc_create_single("fs/xfs/xqm", 0, NULL, xqm_proc_show))
 		goto out;
 #endif
 	return 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d714240..ed67389 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -63,7 +63,7 @@
 #include <linux/parser.h>
 
 static const struct super_operations xfs_super_operations;
-struct bio_set *xfs_ioend_bioset;
+struct bio_set xfs_ioend_bioset;
 
 static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
 #ifdef DEBUG
@@ -1372,7 +1372,6 @@
 		 */
 		xfs_restore_resvblks(mp);
 		xfs_log_work_queue(mp);
-		xfs_queue_eofblocks(mp);
 
 		/* Recover any CoW blocks that never got remapped. */
 		error = xfs_reflink_recover_cow(mp);
@@ -1382,7 +1381,7 @@
 			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
 			return error;
 		}
-		xfs_queue_cowblocks(mp);
+		xfs_icache_enable_reclaim(mp);
 
 		/* Create the per-AG metadata reservation pool .*/
 		error = xfs_fs_reserve_ag_blocks(mp);
@@ -1392,8 +1391,13 @@
 
 	/* rw -> ro */
 	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
+		/*
+		 * Cancel background eofb scanning so it cannot race with the
+		 * final log force+buftarg wait and deadlock the remount.
+		 */
+		xfs_icache_disable_reclaim(mp);
+
 		/* Get rid of any leftover CoW reservations... */
-		cancel_delayed_work_sync(&mp->m_cowblocks_work);
 		error = xfs_icache_free_cowblocks(mp, NULL);
 		if (error) {
 			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
@@ -1416,12 +1420,6 @@
 		 */
 		xfs_save_resvblks(mp);
 
-		/*
-		 * Cancel background eofb scanning so it cannot race with the
-		 * final log force+buftarg wait and deadlock the remount.
-		 */
-		cancel_delayed_work_sync(&mp->m_eofblocks_work);
-
 		xfs_quiesce_attr(mp);
 		mp->m_flags |= XFS_MOUNT_RDONLY;
 	}
@@ -1441,6 +1439,7 @@
 {
 	struct xfs_mount	*mp = XFS_M(sb);
 
+	xfs_icache_disable_reclaim(mp);
 	xfs_save_resvblks(mp);
 	xfs_quiesce_attr(mp);
 	return xfs_sync_sb(mp, true);
@@ -1454,6 +1453,7 @@
 
 	xfs_restore_resvblks(mp);
 	xfs_log_work_queue(mp);
+	xfs_icache_enable_reclaim(mp);
 	return 0;
 }
 
@@ -1635,6 +1635,17 @@
 #endif
 	sb->s_op = &xfs_super_operations;
 
+	/*
+	 * Delay mount work if the debug hook is set. This is debug
+	 * instrumention to coordinate simulation of xfs mount failures with
+	 * VFS superblock operations
+	 */
+	if (xfs_globals.mount_delay) {
+		xfs_notice(mp, "Delaying mount for %d seconds.",
+			xfs_globals.mount_delay);
+		msleep(xfs_globals.mount_delay * 1000);
+	}
+
 	if (silent)
 		flags |= XFS_MFSI_QUIET;
 
@@ -1690,11 +1701,17 @@
 		sb->s_flags |= SB_I_VERSION;
 
 	if (mp->m_flags & XFS_MOUNT_DAX) {
+		bool rtdev_is_dax = false, datadev_is_dax;
+
 		xfs_warn(mp,
 		"DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
 
-		error = bdev_dax_supported(sb, sb->s_blocksize);
-		if (error) {
+		datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
+			sb->s_blocksize);
+		if (mp->m_rtdev_targp)
+			rtdev_is_dax = bdev_dax_supported(
+				mp->m_rtdev_targp->bt_bdev, sb->s_blocksize);
+		if (!rtdev_is_dax && !datadev_is_dax) {
 			xfs_alert(mp,
 			"DAX unsupported by block device. Turning off DAX.");
 			mp->m_flags &= ~XFS_MOUNT_DAX;
@@ -1761,6 +1778,7 @@
  out_close_devices:
 	xfs_close_devices(mp);
  out_free_fsname:
+	sb->s_fs_info = NULL;
 	xfs_free_fsname(mp);
 	kfree(mp);
  out:
@@ -1778,6 +1796,10 @@
 {
 	struct xfs_mount	*mp = XFS_M(sb);
 
+	/* if ->fill_super failed, we have no mount to tear down */
+	if (!sb->s_fs_info)
+		return;
+
 	xfs_notice(mp, "Unmounting Filesystem");
 	xfs_filestream_unmount(mp);
 	xfs_unmountfs(mp);
@@ -1787,6 +1809,8 @@
 	xfs_destroy_percpu_counters(mp);
 	xfs_destroy_mount_workqueues(mp);
 	xfs_close_devices(mp);
+
+	sb->s_fs_info = NULL;
 	xfs_free_fsname(mp);
 	kfree(mp);
 }
@@ -1806,6 +1830,9 @@
 	struct super_block	*sb,
 	struct shrink_control	*sc)
 {
+	/* Paranoia: catch incorrect calls during mount setup or teardown */
+	if (WARN_ON_ONCE(!sb->s_fs_info))
+		return 0;
 	return xfs_reclaim_inodes_count(XFS_M(sb));
 }
 
@@ -1845,10 +1872,9 @@
 STATIC int __init
 xfs_init_zones(void)
 {
-	xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
+	if (bioset_init(&xfs_ioend_bioset, 4 * MAX_BUF_PER_PAGE,
 			offsetof(struct xfs_ioend, io_inline_bio),
-			BIOSET_NEED_BVECS);
-	if (!xfs_ioend_bioset)
+			BIOSET_NEED_BVECS))
 		goto out;
 
 	xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
@@ -1880,11 +1906,6 @@
 	if (!xfs_trans_zone)
 		goto out_destroy_ifork_zone;
 
-	xfs_log_item_desc_zone =
-		kmem_zone_init(sizeof(struct xfs_log_item_desc),
-			       "xfs_log_item_desc");
-	if (!xfs_log_item_desc_zone)
-		goto out_destroy_trans_zone;
 
 	/*
 	 * The size of the zone allocated buf log item is the maximum
@@ -1894,7 +1915,7 @@
 	xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
 					   "xfs_buf_item");
 	if (!xfs_buf_item_zone)
-		goto out_destroy_log_item_desc_zone;
+		goto out_destroy_trans_zone;
 
 	xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
 			((XFS_EFD_MAX_FAST_EXTENTS - 1) *
@@ -1982,8 +2003,6 @@
 	kmem_zone_destroy(xfs_efd_zone);
  out_destroy_buf_item_zone:
 	kmem_zone_destroy(xfs_buf_item_zone);
- out_destroy_log_item_desc_zone:
-	kmem_zone_destroy(xfs_log_item_desc_zone);
  out_destroy_trans_zone:
 	kmem_zone_destroy(xfs_trans_zone);
  out_destroy_ifork_zone:
@@ -1997,7 +2016,7 @@
  out_destroy_log_ticket_zone:
 	kmem_zone_destroy(xfs_log_ticket_zone);
  out_free_ioend_bioset:
-	bioset_free(xfs_ioend_bioset);
+	bioset_exit(&xfs_ioend_bioset);
  out:
 	return -ENOMEM;
 }
@@ -2022,14 +2041,13 @@
 	kmem_zone_destroy(xfs_efi_zone);
 	kmem_zone_destroy(xfs_efd_zone);
 	kmem_zone_destroy(xfs_buf_item_zone);
-	kmem_zone_destroy(xfs_log_item_desc_zone);
 	kmem_zone_destroy(xfs_trans_zone);
 	kmem_zone_destroy(xfs_ifork_zone);
 	kmem_zone_destroy(xfs_da_state_zone);
 	kmem_zone_destroy(xfs_btree_cur_zone);
 	kmem_zone_destroy(xfs_bmap_free_item_zone);
 	kmem_zone_destroy(xfs_log_ticket_zone);
-	bioset_free(xfs_ioend_bioset);
+	bioset_exit(&xfs_ioend_bioset);
 }
 
 STATIC int __init
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 5b66ac1..aed03da 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -259,6 +259,7 @@
 	 * bmapi or the directory create code.
 	 */
 	xfs_defer_init(&dfops, &first_block);
+	tp->t_agfl_dfops = &dfops;
 
 	/*
 	 * Allocate an inode for the symlink.
@@ -488,16 +489,11 @@
 	error = xfs_defer_finish(&tp, &dfops);
 	if (error)
 		goto error_bmap_cancel;
-	/*
-	 * The first xact was committed, so add the inode to the new one.
-	 * Mark it dirty so it will be logged and moved forward in the log as
-	 * part of every commit.
-	 */
-	xfs_trans_ijoin(tp, ip, 0);
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
 	/*
 	 * Commit the transaction containing extent freeing and EFDs.
 	 */
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 	error = xfs_trans_commit(tp);
 	if (error) {
 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index 82afee0..b53a33e 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -95,6 +95,7 @@
 
 struct xfs_globals {
 	int	log_recovery_delay;	/* log recovery delay (secs) */
+	int	mount_delay;		/* mount setup delay (secs) */
 	bool	bug_on_assert;		/* BUG() the kernel on assert failure */
 };
 extern struct xfs_globals	xfs_globals;
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 8b2ccc2..2d5cd25 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -165,9 +165,40 @@
 }
 XFS_SYSFS_ATTR_RW(log_recovery_delay);
 
+STATIC ssize_t
+mount_delay_store(
+	struct kobject	*kobject,
+	const char	*buf,
+	size_t		count)
+{
+	int		ret;
+	int		val;
+
+	ret = kstrtoint(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	if (val < 0 || val > 60)
+		return -EINVAL;
+
+	xfs_globals.mount_delay = val;
+
+	return count;
+}
+
+STATIC ssize_t
+mount_delay_show(
+	struct kobject	*kobject,
+	char		*buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.mount_delay);
+}
+XFS_SYSFS_ATTR_RW(mount_delay);
+
 static struct attribute *xfs_dbg_attrs[] = {
 	ATTR_LIST(bug_on_assert),
 	ATTR_LIST(log_recovery_delay),
+	ATTR_LIST(mount_delay),
 	NULL,
 };
 
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 8955254..9d4c4ca 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -441,8 +441,7 @@
 		__field(unsigned, bli_recur)
 		__field(int, bli_refcount)
 		__field(unsigned, bli_flags)
-		__field(void *, li_desc)
-		__field(unsigned, li_flags)
+		__field(unsigned long, li_flags)
 	),
 	TP_fast_assign(
 		__entry->dev = bip->bli_buf->b_target->bt_dev;
@@ -455,12 +454,11 @@
 		__entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
 		__entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
 		__entry->buf_lockval = bip->bli_buf->b_sema.count;
-		__entry->li_desc = bip->bli_item.li_desc;
 		__entry->li_flags = bip->bli_item.li_flags;
 	),
 	TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
 		  "lock %d flags %s recur %d refcount %d bliflags %s "
-		  "lidesc %p liflags %s",
+		  "liflags %s",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long long)__entry->buf_bno,
 		  __entry->buf_len,
@@ -471,7 +469,6 @@
 		  __entry->bli_recur,
 		  __entry->bli_refcount,
 		  __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
-		  __entry->li_desc,
 		  __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
 )
 
@@ -1018,7 +1015,7 @@
 		__field(dev_t, dev)
 		__field(void *, lip)
 		__field(uint, type)
-		__field(uint, flags)
+		__field(unsigned long, flags)
 		__field(xfs_lsn_t, lsn)
 	),
 	TP_fast_assign(
@@ -1070,7 +1067,7 @@
 		__field(dev_t, dev)
 		__field(void *, lip)
 		__field(uint, type)
-		__field(uint, flags)
+		__field(unsigned long, flags)
 		__field(xfs_lsn_t, old_lsn)
 		__field(xfs_lsn_t, new_lsn)
 	),
@@ -1750,6 +1747,7 @@
 		__field(int, namelen)
 		__field(int, valuelen)
 		__field(xfs_dahash_t, hashval)
+		__field(int, flags)
 		__field(int, op_flags)
 	),
 	TP_fast_assign(
@@ -1760,10 +1758,11 @@
 		__entry->namelen = args->namelen;
 		__entry->valuelen = args->valuelen;
 		__entry->hashval = args->hashval;
+		__entry->flags = args->flags;
 		__entry->op_flags = args->op_flags;
 	),
 	TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d valuelen %d "
-		  "hashval 0x%x op_flags %s",
+		  "hashval 0x%x flags %s op_flags %s",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->namelen,
@@ -1771,6 +1770,7 @@
 		  __entry->namelen,
 		  __entry->valuelen,
 		  __entry->hashval,
+		  __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
 		  __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
 )
 
@@ -2243,30 +2243,35 @@
 struct xfs_defer_ops;
 
 DECLARE_EVENT_CLASS(xfs_defer_class,
-	TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop),
-	TP_ARGS(mp, dop),
+	TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop,
+		 unsigned long caller_ip),
+	TP_ARGS(mp, dop, caller_ip),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(void *, dop)
 		__field(char, committed)
 		__field(char, low)
+		__field(unsigned long, caller_ip)
 	),
 	TP_fast_assign(
 		__entry->dev = mp ? mp->m_super->s_dev : 0;
 		__entry->dop = dop;
 		__entry->committed = dop->dop_committed;
 		__entry->low = dop->dop_low;
+		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d ops %p committed %d low %d",
+	TP_printk("dev %d:%d ops %p committed %d low %d, caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->dop,
 		  __entry->committed,
-		  __entry->low)
+		  __entry->low,
+		  (char *)__entry->caller_ip)
 )
 #define DEFINE_DEFER_EVENT(name) \
 DEFINE_EVENT(xfs_defer_class, name, \
-	TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop), \
-	TP_ARGS(mp, dop))
+	TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, \
+		 unsigned long caller_ip), \
+	TP_ARGS(mp, dop, caller_ip))
 
 DECLARE_EVENT_CLASS(xfs_defer_error_class,
 	TP_PROTO(struct xfs_mount *mp, struct xfs_defer_ops *dop, int error),
@@ -2433,6 +2438,8 @@
 #define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
 DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer);
 DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_deferred);
+DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_agfl_free_defer);
+DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_agfl_free_deferred);
 
 /* rmap tracepoints */
 DECLARE_EVENT_CLASS(xfs_rmap_class,
@@ -3346,6 +3353,43 @@
 		  __entry->logflags)
 );
 
+DECLARE_EVENT_CLASS(xfs_trans_class,
+	TP_PROTO(struct xfs_trans *tp, unsigned long caller_ip),
+	TP_ARGS(tp, caller_ip),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(uint32_t, tid)
+		__field(uint32_t, flags)
+		__field(unsigned long, caller_ip)
+	),
+	TP_fast_assign(
+		__entry->dev = tp->t_mountp->m_super->s_dev;
+		__entry->tid = 0;
+		if (tp->t_ticket)
+			__entry->tid = tp->t_ticket->t_tid;
+		__entry->flags = tp->t_flags;
+		__entry->caller_ip = caller_ip;
+	),
+	TP_printk("dev %d:%d trans %x flags 0x%x caller %pS",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->tid,
+		  __entry->flags,
+		  (char *)__entry->caller_ip)
+)
+
+#define DEFINE_TRANS_EVENT(name) \
+DEFINE_EVENT(xfs_trans_class, name, \
+	TP_PROTO(struct xfs_trans *tp, unsigned long caller_ip), \
+	TP_ARGS(tp, caller_ip))
+DEFINE_TRANS_EVENT(xfs_trans_alloc);
+DEFINE_TRANS_EVENT(xfs_trans_cancel);
+DEFINE_TRANS_EVENT(xfs_trans_commit);
+DEFINE_TRANS_EVENT(xfs_trans_dup);
+DEFINE_TRANS_EVENT(xfs_trans_free);
+DEFINE_TRANS_EVENT(xfs_trans_roll);
+DEFINE_TRANS_EVENT(xfs_trans_add_item);
+DEFINE_TRANS_EVENT(xfs_trans_free_items);
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index d6d8f9d..fc7ba75 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -31,9 +31,9 @@
 #include "xfs_log.h"
 #include "xfs_trace.h"
 #include "xfs_error.h"
+#include "xfs_defer.h"
 
 kmem_zone_t	*xfs_trans_zone;
-kmem_zone_t	*xfs_log_item_desc_zone;
 
 #if defined(CONFIG_TRACEPOINTS)
 static void
@@ -79,6 +79,7 @@
 	xfs_extent_busy_sort(&tp->t_busy);
 	xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
 
+	trace_xfs_trans_free(tp, _RET_IP_);
 	atomic_dec(&tp->t_mountp->m_active_trans);
 	if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
 		sb_end_intwrite(tp->t_mountp->m_super);
@@ -94,11 +95,13 @@
  * blocks.  Locks and log items, however, are no inherited.  They must
  * be added to the new transaction explicitly.
  */
-STATIC xfs_trans_t *
+STATIC struct xfs_trans *
 xfs_trans_dup(
-	xfs_trans_t	*tp)
+	struct xfs_trans	*tp)
 {
-	xfs_trans_t	*ntp;
+	struct xfs_trans	*ntp;
+
+	trace_xfs_trans_dup(tp, _RET_IP_);
 
 	ntp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
 
@@ -127,6 +130,7 @@
 	ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
 	tp->t_rtx_res = tp->t_rtx_res_used;
 	ntp->t_pflags = tp->t_pflags;
+	ntp->t_agfl_dfops = tp->t_agfl_dfops;
 
 	xfs_trans_dup_dqinfo(tp, ntp);
 
@@ -283,6 +287,8 @@
 		return error;
 	}
 
+	trace_xfs_trans_alloc(tp, _RET_IP_);
+
 	*tpp = tp;
 	return 0;
 }
@@ -727,73 +733,52 @@
 	return;
 }
 
-/*
- * Add the given log item to the transaction's list of log items.
- *
- * The log item will now point to its new descriptor with its li_desc field.
- */
+/* Add the given log item to the transaction's list of log items. */
 void
 xfs_trans_add_item(
 	struct xfs_trans	*tp,
 	struct xfs_log_item	*lip)
 {
-	struct xfs_log_item_desc *lidp;
-
 	ASSERT(lip->li_mountp == tp->t_mountp);
 	ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
+	ASSERT(list_empty(&lip->li_trans));
+	ASSERT(!test_bit(XFS_LI_DIRTY, &lip->li_flags));
 
-	lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS);
-
-	lidp->lid_item = lip;
-	lidp->lid_flags = 0;
-	list_add_tail(&lidp->lid_trans, &tp->t_items);
-
-	lip->li_desc = lidp;
-}
-
-STATIC void
-xfs_trans_free_item_desc(
-	struct xfs_log_item_desc *lidp)
-{
-	list_del_init(&lidp->lid_trans);
-	kmem_zone_free(xfs_log_item_desc_zone, lidp);
+	list_add_tail(&lip->li_trans, &tp->t_items);
+	trace_xfs_trans_add_item(tp, _RET_IP_);
 }
 
 /*
- * Unlink and free the given descriptor.
+ * Unlink the log item from the transaction. the log item is no longer
+ * considered dirty in this transaction, as the linked transaction has
+ * finished, either by abort or commit completion.
  */
 void
 xfs_trans_del_item(
 	struct xfs_log_item	*lip)
 {
-	xfs_trans_free_item_desc(lip->li_desc);
-	lip->li_desc = NULL;
+	clear_bit(XFS_LI_DIRTY, &lip->li_flags);
+	list_del_init(&lip->li_trans);
 }
 
-/*
- * Unlock all of the items of a transaction and free all the descriptors
- * of that transaction.
- */
+/* Detach and unlock all of the items in a transaction */
 void
 xfs_trans_free_items(
 	struct xfs_trans	*tp,
 	xfs_lsn_t		commit_lsn,
 	bool			abort)
 {
-	struct xfs_log_item_desc *lidp, *next;
+	struct xfs_log_item	*lip, *next;
 
-	list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
-		struct xfs_log_item	*lip = lidp->lid_item;
+	trace_xfs_trans_free_items(tp, _RET_IP_);
 
-		lip->li_desc = NULL;
-
+	list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
+		xfs_trans_del_item(lip);
 		if (commit_lsn != NULLCOMMITLSN)
 			lip->li_ops->iop_committing(lip, commit_lsn);
 		if (abort)
-			lip->li_flags |= XFS_LI_ABORTED;
+			set_bit(XFS_LI_ABORTED, &lip->li_flags);
 		lip->li_ops->iop_unlock(lip);
-
-		xfs_trans_free_item_desc(lidp);
 	}
 }
 
@@ -861,7 +846,7 @@
 		xfs_lsn_t		item_lsn;
 
 		if (aborted)
-			lip->li_flags |= XFS_LI_ABORTED;
+			set_bit(XFS_LI_ABORTED, &lip->li_flags);
 		item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
 
 		/* item_lsn of -1 means the item needs no further processing */
@@ -936,6 +921,11 @@
 	int			error = 0;
 	int			sync = tp->t_flags & XFS_TRANS_SYNC;
 
+	ASSERT(!tp->t_agfl_dfops ||
+	       !xfs_defer_has_unfinished_work(tp->t_agfl_dfops) || regrant);
+
+	trace_xfs_trans_commit(tp, _RET_IP_);
+
 	/*
 	 * If there is nothing to be logged by the transaction,
 	 * then unlock all of the items associated with the
@@ -991,6 +981,7 @@
 		commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, regrant);
 		if (commit_lsn == -1 && !error)
 			error = -EIO;
+		tp->t_ticket = NULL;
 	}
 	current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
 	xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
@@ -1022,6 +1013,8 @@
 	struct xfs_mount	*mp = tp->t_mountp;
 	bool			dirty = (tp->t_flags & XFS_TRANS_DIRTY);
 
+	trace_xfs_trans_cancel(tp, _RET_IP_);
+
 	/*
 	 * See if the caller is relying on us to shut down the
 	 * filesystem.  This happens in paths where we detect
@@ -1033,17 +1026,19 @@
 	}
 #ifdef DEBUG
 	if (!dirty && !XFS_FORCED_SHUTDOWN(mp)) {
-		struct xfs_log_item_desc *lidp;
+		struct xfs_log_item *lip;
 
-		list_for_each_entry(lidp, &tp->t_items, lid_trans)
-			ASSERT(!(lidp->lid_item->li_type == XFS_LI_EFD));
+		list_for_each_entry(lip, &tp->t_items, li_trans)
+			ASSERT(!(lip->li_type == XFS_LI_EFD));
 	}
 #endif
 	xfs_trans_unreserve_and_mod_sb(tp);
 	xfs_trans_unreserve_and_mod_dquots(tp);
 
-	if (tp->t_ticket)
+	if (tp->t_ticket) {
 		xfs_log_done(mp, tp->t_ticket, NULL, false);
+		tp->t_ticket = NULL;
+	}
 
 	/* mark this thread as no longer being in a transaction */
 	current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
@@ -1067,6 +1062,8 @@
 	struct xfs_trans_res	tres;
 	int			error;
 
+	trace_xfs_trans_roll(trans, _RET_IP_);
+
 	/*
 	 * Copy the critical parameters from one trans to the next.
 	 */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 9d542df..29706b8 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -27,7 +27,6 @@
 struct xfs_inode;
 struct xfs_item_ops;
 struct xfs_log_iovec;
-struct xfs_log_item_desc;
 struct xfs_mount;
 struct xfs_trans;
 struct xfs_trans_res;
@@ -43,12 +42,12 @@
 
 typedef struct xfs_log_item {
 	struct list_head		li_ail;		/* AIL pointers */
+	struct list_head		li_trans;	/* transaction list */
 	xfs_lsn_t			li_lsn;		/* last on-disk lsn */
-	struct xfs_log_item_desc	*li_desc;	/* ptr to current desc*/
 	struct xfs_mount		*li_mountp;	/* ptr to fs mount */
 	struct xfs_ail			*li_ailp;	/* ptr to AIL */
 	uint				li_type;	/* item type */
-	uint				li_flags;	/* misc flags */
+	unsigned long			li_flags;	/* misc flags */
 	struct xfs_buf			*li_buf;	/* real buffer pointer */
 	struct list_head		li_bio_list;	/* buffer item list */
 	void				(*li_cb)(struct xfs_buf *,
@@ -64,14 +63,21 @@
 	xfs_lsn_t			li_seq;		/* CIL commit seq */
 } xfs_log_item_t;
 
-#define	XFS_LI_IN_AIL	0x1
-#define	XFS_LI_ABORTED	0x2
-#define	XFS_LI_FAILED	0x4
+/*
+ * li_flags use the (set/test/clear)_bit atomic interfaces because updates can
+ * race with each other and we don't want to have to use the AIL lock to
+ * serialise all updates.
+ */
+#define	XFS_LI_IN_AIL	0
+#define	XFS_LI_ABORTED	1
+#define	XFS_LI_FAILED	2
+#define	XFS_LI_DIRTY	3	/* log item dirty in transaction */
 
 #define XFS_LI_FLAGS \
-	{ XFS_LI_IN_AIL,	"IN_AIL" }, \
-	{ XFS_LI_ABORTED,	"ABORTED" }, \
-	{ XFS_LI_FAILED,	"FAILED" }
+	{ (1 << XFS_LI_IN_AIL),		"IN_AIL" }, \
+	{ (1 << XFS_LI_ABORTED),	"ABORTED" }, \
+	{ (1 << XFS_LI_FAILED),		"FAILED" }, \
+	{ (1 << XFS_LI_DIRTY),		"DIRTY" }
 
 struct xfs_item_ops {
 	void (*iop_size)(xfs_log_item_t *, int *, int *);
@@ -111,6 +117,7 @@
 	struct xlog_ticket	*t_ticket;	/* log mgr ticket */
 	struct xfs_mount	*t_mountp;	/* ptr to fs mount struct */
 	struct xfs_dquot_acct   *t_dqinfo;	/* acctg info for dquots */
+	struct xfs_defer_ops	*t_agfl_dfops;	/* optional agfl fixup dfops */
 	unsigned int		t_flags;	/* misc flags */
 	int64_t			t_icount_delta;	/* superblock icount change */
 	int64_t			t_ifree_delta;	/* superblock ifree change */
@@ -228,7 +235,8 @@
 				  uint);
 int		xfs_trans_free_extent(struct xfs_trans *,
 				      struct xfs_efd_log_item *, xfs_fsblock_t,
-				      xfs_extlen_t, struct xfs_owner_info *);
+				      xfs_extlen_t, struct xfs_owner_info *,
+				      bool);
 int		xfs_trans_commit(struct xfs_trans *);
 int		xfs_trans_roll(struct xfs_trans **);
 int		xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *);
@@ -242,7 +250,6 @@
 					struct xfs_buf *src_bp);
 
 extern kmem_zone_t	*xfs_trans_zone;
-extern kmem_zone_t	*xfs_log_item_desc_zone;
 
 /* rmap updates */
 enum xfs_rmap_intent_type;
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index d4a2445..41e280e 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -32,30 +32,51 @@
 #ifdef DEBUG
 /*
  * Check that the list is sorted as it should be.
+ *
+ * Called with the ail lock held, but we don't want to assert fail with it
+ * held otherwise we'll lock everything up and won't be able to debug the
+ * cause. Hence we sample and check the state under the AIL lock and return if
+ * everything is fine, otherwise we drop the lock and run the ASSERT checks.
+ * Asserts may not be fatal, so pick the lock back up and continue onwards.
  */
 STATIC void
 xfs_ail_check(
-	struct xfs_ail	*ailp,
-	xfs_log_item_t	*lip)
+	struct xfs_ail		*ailp,
+	struct xfs_log_item	*lip)
 {
-	xfs_log_item_t	*prev_lip;
+	struct xfs_log_item	*prev_lip;
+	struct xfs_log_item	*next_lip;
+	xfs_lsn_t		prev_lsn = NULLCOMMITLSN;
+	xfs_lsn_t		next_lsn = NULLCOMMITLSN;
+	xfs_lsn_t		lsn;
+	bool			in_ail;
+
 
 	if (list_empty(&ailp->ail_head))
 		return;
 
 	/*
-	 * Check the next and previous entries are valid.
+	 * Sample then check the next and previous entries are valid.
 	 */
-	ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
-	prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
+	in_ail = test_bit(XFS_LI_IN_AIL, &lip->li_flags);
+	prev_lip = list_entry(lip->li_ail.prev, struct xfs_log_item, li_ail);
 	if (&prev_lip->li_ail != &ailp->ail_head)
-		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
+		prev_lsn = prev_lip->li_lsn;
+	next_lip = list_entry(lip->li_ail.next, struct xfs_log_item, li_ail);
+	if (&next_lip->li_ail != &ailp->ail_head)
+		next_lsn = next_lip->li_lsn;
+	lsn = lip->li_lsn;
 
-	prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
-	if (&prev_lip->li_ail != &ailp->ail_head)
-		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
+	if (in_ail &&
+	    (prev_lsn == NULLCOMMITLSN || XFS_LSN_CMP(prev_lsn, lsn) <= 0) &&
+	    (next_lsn == NULLCOMMITLSN || XFS_LSN_CMP(next_lsn, lsn) >= 0))
+		return;
 
-
+	spin_unlock(&ailp->ail_lock);
+	ASSERT(in_ail);
+	ASSERT(prev_lsn == NULLCOMMITLSN || XFS_LSN_CMP(prev_lsn, lsn) <= 0);
+	ASSERT(next_lsn == NULLCOMMITLSN || XFS_LSN_CMP(next_lsn, lsn) >= 0);
+	spin_lock(&ailp->ail_lock);
 }
 #else /* !DEBUG */
 #define	xfs_ail_check(a,l)
@@ -684,7 +705,7 @@
 
 	for (i = 0; i < nr_items; i++) {
 		struct xfs_log_item *lip = log_items[i];
-		if (lip->li_flags & XFS_LI_IN_AIL) {
+		if (test_and_set_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
 			/* check if we really need to move the item */
 			if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)
 				continue;
@@ -694,7 +715,6 @@
 			if (mlip == lip)
 				mlip_changed = 1;
 		} else {
-			lip->li_flags |= XFS_LI_IN_AIL;
 			trace_xfs_ail_insert(lip, 0, lsn);
 		}
 		lip->li_lsn = lsn;
@@ -725,7 +745,7 @@
 	trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
 	xfs_ail_delete(ailp, lip);
 	xfs_clear_li_failed(lip);
-	lip->li_flags &= ~XFS_LI_IN_AIL;
+	clear_bit(XFS_LI_IN_AIL, &lip->li_flags);
 	lip->li_lsn = 0;
 
 	return mlip == lip;
@@ -761,7 +781,7 @@
 	struct xfs_mount	*mp = ailp->ail_mount;
 	bool			mlip_changed;
 
-	if (!(lip->li_flags & XFS_LI_IN_AIL)) {
+	if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
 		spin_unlock(&ailp->ail_lock);
 		if (!XFS_FORCED_SHUTDOWN(mp)) {
 			xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c
index 14543d9..230a21d 100644
--- a/fs/xfs/xfs_trans_bmap.c
+++ b/fs/xfs/xfs_trans_bmap.c
@@ -79,7 +79,7 @@
 	 * 2.) shuts down the filesystem
 	 */
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	budp->bud_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
 
 	return error;
 }
@@ -158,7 +158,7 @@
 	bmap = container_of(item, struct xfs_bmap_intent, bi_list);
 
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	buip->bui_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
 
 	/*
 	 * atomic_inc_return gives us the value after the increment;
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index a5d9dfc..a8ddb4e 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -40,7 +40,7 @@
 	struct xfs_buf_map	*map,
 	int			nmaps)
 {
-	struct xfs_log_item_desc *lidp;
+	struct xfs_log_item	*lip;
 	struct xfs_buf_log_item	*blip;
 	int			len = 0;
 	int			i;
@@ -48,8 +48,8 @@
 	for (i = 0; i < nmaps; i++)
 		len += map[i].bm_len;
 
-	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-		blip = (struct xfs_buf_log_item *)lidp->lid_item;
+	list_for_each_entry(lip, &tp->t_items, li_trans) {
+		blip = (struct xfs_buf_log_item *)lip;
 		if (blip->bli_item.li_type == XFS_LI_BUF &&
 		    blip->bli_buf->b_target == target &&
 		    XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn &&
@@ -100,14 +100,10 @@
 	atomic_inc(&bip->bli_refcount);
 
 	/*
-	 * Get a log_item_desc to point at the new item.
+	 * Attach the item to the transaction so we can find it in
+	 * xfs_trans_get_buf() and friends.
 	 */
 	xfs_trans_add_item(tp, &bip->bli_item);
-
-	/*
-	 * Initialize b_fsprivate2 so we can find it with incore_match()
-	 * in xfs_trans_get_buf() and friends above.
-	 */
 	bp->b_transp = tp;
 
 }
@@ -391,7 +387,7 @@
 	 * If the buffer is dirty within this transaction, we can't
 	 * release it until we commit.
 	 */
-	if (bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY)
+	if (test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags))
 		return;
 
 	/*
@@ -442,7 +438,7 @@
 		ASSERT(bp->b_pincount == 0);
 ***/
 		ASSERT(atomic_read(&bip->bli_refcount) == 0);
-		ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
+		ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
 		ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
 		xfs_buf_item_relse(bp);
 	}
@@ -542,7 +538,7 @@
 	bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED;
 
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags);
 }
 
 /*
@@ -626,7 +622,7 @@
 		ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
 		ASSERT(!(bip->__bli_format.blf_flags & XFS_BLFT_MASK));
 		ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
-		ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
+		ASSERT(test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags));
 		ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
 		return;
 	}
@@ -642,7 +638,7 @@
 		memset(bip->bli_formats[i].blf_data_map, 0,
 		       (bip->bli_formats[i].blf_map_size * sizeof(uint)));
 	}
-	bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags);
 	tp->t_flags |= XFS_TRANS_DIRTY;
 }
 
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index c3d5472..c381c02 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -77,7 +77,7 @@
 	ASSERT(XFS_DQ_IS_LOCKED(dqp));
 
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &dqp->q_logitem.qli_item.li_flags);
 }
 
 /*
@@ -879,7 +879,7 @@
 	xfs_qoff_logitem_t	*qlp)
 {
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &qlp->qql_item.li_flags);
 }
 
 STATIC void
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index ab43864..2f44a08 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -68,7 +68,8 @@
 	struct xfs_efd_log_item	*efdp,
 	xfs_fsblock_t		start_block,
 	xfs_extlen_t		ext_len,
-	struct xfs_owner_info	*oinfo)
+	struct xfs_owner_info	*oinfo,
+	bool			skip_discard)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
 	uint			next_extent;
@@ -79,9 +80,8 @@
 
 	trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
 
-	error = xfs_free_extent(tp, start_block, ext_len, oinfo,
-			XFS_AG_RESV_NONE);
-
+	error = __xfs_free_extent(tp, start_block, ext_len,
+				  oinfo, XFS_AG_RESV_NONE, skip_discard);
 	/*
 	 * Mark the transaction dirty, even on error. This ensures the
 	 * transaction is aborted, which:
@@ -90,7 +90,7 @@
 	 * 2.) shuts down the filesystem
 	 */
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	efdp->efd_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
 
 	next_extent = efdp->efd_next_extent;
 	ASSERT(next_extent < efdp->efd_format.efd_nextents);
@@ -155,7 +155,7 @@
 	free = container_of(item, struct xfs_extent_free_item, xefi_list);
 
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	efip->efi_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
 
 	/*
 	 * atomic_inc_return gives us the value after the increment;
@@ -195,7 +195,7 @@
 	error = xfs_trans_free_extent(tp, done_item,
 			free->xefi_startblock,
 			free->xefi_blockcount,
-			&free->xefi_oinfo);
+			&free->xefi_oinfo, free->xefi_skip_discard);
 	kmem_free(free);
 	return error;
 }
@@ -231,9 +231,79 @@
 	.cancel_item	= xfs_extent_free_cancel_item,
 };
 
+/*
+ * AGFL blocks are accounted differently in the reserve pools and are not
+ * inserted into the busy extent list.
+ */
+STATIC int
+xfs_agfl_free_finish_item(
+	struct xfs_trans		*tp,
+	struct xfs_defer_ops		*dop,
+	struct list_head		*item,
+	void				*done_item,
+	void				**state)
+{
+	struct xfs_mount		*mp = tp->t_mountp;
+	struct xfs_efd_log_item		*efdp = done_item;
+	struct xfs_extent_free_item	*free;
+	struct xfs_extent		*extp;
+	struct xfs_buf			*agbp;
+	int				error;
+	xfs_agnumber_t			agno;
+	xfs_agblock_t			agbno;
+	uint				next_extent;
+
+	free = container_of(item, struct xfs_extent_free_item, xefi_list);
+	ASSERT(free->xefi_blockcount == 1);
+	agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
+	agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
+
+	trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
+
+	error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+	if (!error)
+		error = xfs_free_agfl_block(tp, agno, agbno, agbp,
+					    &free->xefi_oinfo);
+
+	/*
+	 * Mark the transaction dirty, even on error. This ensures the
+	 * transaction is aborted, which:
+	 *
+	 * 1.) releases the EFI and frees the EFD
+	 * 2.) shuts down the filesystem
+	 */
+	tp->t_flags |= XFS_TRANS_DIRTY;
+	set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
+
+	next_extent = efdp->efd_next_extent;
+	ASSERT(next_extent < efdp->efd_format.efd_nextents);
+	extp = &(efdp->efd_format.efd_extents[next_extent]);
+	extp->ext_start = free->xefi_startblock;
+	extp->ext_len = free->xefi_blockcount;
+	efdp->efd_next_extent++;
+
+	kmem_free(free);
+	return error;
+}
+
+
+/* sub-type with special handling for AGFL deferred frees */
+static const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
+	.type		= XFS_DEFER_OPS_TYPE_AGFL_FREE,
+	.max_items	= XFS_EFI_MAX_FAST_EXTENTS,
+	.diff_items	= xfs_extent_free_diff_items,
+	.create_intent	= xfs_extent_free_create_intent,
+	.abort_intent	= xfs_extent_free_abort_intent,
+	.log_item	= xfs_extent_free_log_item,
+	.create_done	= xfs_extent_free_create_done,
+	.finish_item	= xfs_agfl_free_finish_item,
+	.cancel_item	= xfs_extent_free_cancel_item,
+};
+
 /* Register the deferred op type. */
 void
 xfs_extent_free_init_defer_op(void)
 {
 	xfs_defer_init_op_type(&xfs_extent_free_defer_type);
+	xfs_defer_init_op_type(&xfs_agfl_free_defer_type);
 }
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 07cea59..f7bd796 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -133,14 +133,13 @@
 	 * set however, then go ahead and bump the i_version counter
 	 * unconditionally.
 	 */
-	if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&
+	if (!test_and_set_bit(XFS_LI_DIRTY, &ip->i_itemp->ili_item.li_flags) &&
 	    IS_I_VERSION(VFS_I(ip))) {
 		if (inode_maybe_inc_iversion(VFS_I(ip), flags & XFS_ILOG_CORE))
 			flags |= XFS_ILOG_CORE;
 	}
 
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	ip->i_itemp->ili_item.li_desc->lid_flags |= XFS_LID_DIRTY;
 
 	/*
 	 * Always OR in the bits from the ili_last_fields field.
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index be24b0c8..9717ae7 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -19,7 +19,6 @@
 #define	__XFS_TRANS_PRIV_H__
 
 struct xfs_log_item;
-struct xfs_log_item_desc;
 struct xfs_mount;
 struct xfs_trans;
 struct xfs_ail;
@@ -119,7 +118,7 @@
 
 	spin_lock(&ailp->ail_lock);
 	/* xfs_trans_ail_delete() drops the AIL lock */
-	if (lip->li_flags & XFS_LI_IN_AIL)
+	if (test_bit(XFS_LI_IN_AIL, &lip->li_flags))
 		xfs_trans_ail_delete(ailp, lip, shutdown_type);
 	else
 		spin_unlock(&ailp->ail_lock);
@@ -171,11 +170,10 @@
 {
 	struct xfs_buf	*bp = lip->li_buf;
 
-	ASSERT(lip->li_flags & XFS_LI_IN_AIL);
+	ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
 	lockdep_assert_held(&lip->li_ailp->ail_lock);
 
-	if (lip->li_flags & XFS_LI_FAILED) {
-		lip->li_flags &= ~XFS_LI_FAILED;
+	if (test_and_clear_bit(XFS_LI_FAILED, &lip->li_flags)) {
 		lip->li_buf = NULL;
 		xfs_buf_rele(bp);
 	}
@@ -188,9 +186,8 @@
 {
 	lockdep_assert_held(&lip->li_ailp->ail_lock);
 
-	if (!(lip->li_flags & XFS_LI_FAILED)) {
+	if (!test_and_set_bit(XFS_LI_FAILED, &lip->li_flags)) {
 		xfs_buf_hold(bp);
-		lip->li_flags |= XFS_LI_FAILED;
 		lip->li_buf = bp;
 	}
 }
diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c
index 94c1877..c7f8e82 100644
--- a/fs/xfs/xfs_trans_refcount.c
+++ b/fs/xfs/xfs_trans_refcount.c
@@ -77,7 +77,7 @@
 	 * 2.) shuts down the filesystem
 	 */
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	cudp->cud_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
 
 	return error;
 }
@@ -154,7 +154,7 @@
 	refc = container_of(item, struct xfs_refcount_intent, ri_list);
 
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	cuip->cui_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
 
 	/*
 	 * atomic_inc_return gives us the value after the increment;
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
index 9b577be..5831ca0 100644
--- a/fs/xfs/xfs_trans_rmap.c
+++ b/fs/xfs/xfs_trans_rmap.c
@@ -117,7 +117,7 @@
 	 * 2.) shuts down the filesystem
 	 */
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	rudp->rud_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
 
 	return error;
 }
@@ -175,7 +175,7 @@
 	rmap = container_of(item, struct xfs_rmap_intent, ri_list);
 
 	tp->t_flags |= XFS_TRANS_DIRTY;
-	ruip->rui_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+	set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
 
 	/*
 	 * atomic_inc_return gives us the value after the increment;
diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h
index 7b289dd..6f69a4f 100644
--- a/include/acpi/acnames.h
+++ b/include/acpi/acnames.h
@@ -49,11 +49,14 @@
 /* Definitions of the predefined namespace names  */
 
 #define ACPI_UNKNOWN_NAME       (u32) 0x3F3F3F3F	/* Unknown name is "????" */
-#define ACPI_ROOT_NAME          (u32) 0x5F5F5F5C	/* Root name is    "\___" */
-
 #define ACPI_PREFIX_MIXED       (u32) 0x69706341	/* "Acpi" */
 #define ACPI_PREFIX_LOWER       (u32) 0x69706361	/* "acpi" */
 
+/* Root name stuff */
+
+#define ACPI_ROOT_NAME          (u32) 0x5F5F5F5C	/* Root name is    "\___" */
+#define ACPI_ROOT_PATHNAME      "\\___"
+#define ACPI_NAMESPACE_ROOT     "Namespace Root"
 #define ACPI_NS_ROOT_PATH       "\\"
 
 #endif				/* __ACNAMES_H__  */
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 540d35f..eb1f21a 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -98,6 +98,27 @@
 #endif
 
 /*
+ * RAW spinlock primitives. If the OS does not provide them, fallback to
+ * spinlock primitives
+ */
+#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_raw_lock
+# define acpi_os_create_raw_lock(out_handle)	acpi_os_create_lock(out_handle)
+#endif
+
+#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_delete_raw_lock
+# define acpi_os_delete_raw_lock(handle)	acpi_os_delete_lock(handle)
+#endif
+
+#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_acquire_raw_lock
+# define acpi_os_acquire_raw_lock(handle)	acpi_os_acquire_lock(handle)
+#endif
+
+#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_release_raw_lock
+# define acpi_os_release_raw_lock(handle, flags)	\
+	acpi_os_release_lock(handle, flags)
+#endif
+
+/*
  * Semaphore primitives
  */
 #ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_semaphore
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index da0215e..77d71bd 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20180313
+#define ACPI_CA_VERSION                 0x20180508
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
@@ -753,6 +753,7 @@
 						     u32 gpe_number,
 						     acpi_event_status
 						     *event_status))
+ACPI_HW_DEPENDENT_RETURN_VOID(void acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number))
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void))
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void))
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void))
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 1c530f9..2b1bafa 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -245,6 +245,10 @@
 #define acpi_spinlock                   void *
 #endif
 
+#ifndef acpi_raw_spinlock
+#define acpi_raw_spinlock		acpi_spinlock
+#endif
+
 #ifndef acpi_semaphore
 #define acpi_semaphore                  void *
 #endif
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 2010c05..8e0b825 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -20,14 +20,16 @@
 #include <acpi/pcc.h>
 #include <acpi/processor.h>
 
-/* Only support CPPCv2 for now. */
-#define CPPC_NUM_ENT	21
-#define CPPC_REV	2
+/* Support CPPCv2 and CPPCv3  */
+#define CPPC_V2_REV	2
+#define CPPC_V3_REV	3
+#define CPPC_V2_NUM_ENT	21
+#define CPPC_V3_NUM_ENT	23
 
 #define PCC_CMD_COMPLETE_MASK	(1 << 0)
 #define PCC_ERROR_MASK		(1 << 2)
 
-#define MAX_CPC_REG_ENT 19
+#define MAX_CPC_REG_ENT 21
 
 /* CPPC specific PCC commands. */
 #define	CMD_READ 0
@@ -91,6 +93,8 @@
 	AUTO_ACT_WINDOW,
 	ENERGY_PERF,
 	REFERENCE_PERF,
+	LOWEST_FREQ,
+	NOMINAL_FREQ,
 };
 
 /*
@@ -104,6 +108,8 @@
 	u32 nominal_perf;
 	u32 lowest_perf;
 	u32 lowest_nonlinear_perf;
+	u32 lowest_freq;
+	u32 nominal_freq;
 };
 
 struct cppc_perf_ctrls {
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 8feb0c8..1624e2b 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -55,22 +55,21 @@
 /* From drivers/edac/ghes_edac.c */
 
 #ifdef CONFIG_EDAC_GHES
-void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
-				struct cper_sec_mem_err *mem_err);
+void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err);
 
 int ghes_edac_register(struct ghes *ghes, struct device *dev);
 
 void ghes_edac_unregister(struct ghes *ghes);
 
 #else
-static inline void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
+static inline void ghes_edac_report_mem_error(int sev,
 				       struct cper_sec_mem_err *mem_err)
 {
 }
 
 static inline int ghes_edac_register(struct ghes *ghes, struct device *dev)
 {
-	return 0;
+	return -ENODEV;
 }
 
 static inline void ghes_edac_unregister(struct ghes *ghes)
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index a0b2327..7451b3b 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -102,6 +102,7 @@
 
 #define acpi_cache_t                        struct kmem_cache
 #define acpi_spinlock                       spinlock_t *
+#define acpi_raw_spinlock                   raw_spinlock_t *
 #define acpi_cpu_flags                      unsigned long
 
 /* Use native linux version of acpi_os_allocate_zeroed */
@@ -119,6 +120,10 @@
 #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_acquire_object
 #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
 #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
+#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_raw_lock
+#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_delete_raw_lock
+#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_acquire_raw_lock
+#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_release_raw_lock
 
 /*
  * OSL interfaces used by debugger/disassembler
diff --git a/include/acpi/platform/aclinuxex.h b/include/acpi/platform/aclinuxex.h
index 7e81475..d754a1b 100644
--- a/include/acpi/platform/aclinuxex.h
+++ b/include/acpi/platform/aclinuxex.h
@@ -90,6 +90,36 @@
 		lock ? AE_OK : AE_NO_MEMORY; \
 	})
 
+
+#define acpi_os_create_raw_lock(__handle) \
+	({ \
+		raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \
+		if (lock) { \
+			*(__handle) = lock; \
+			raw_spin_lock_init(*(__handle)); \
+		} \
+		lock ? AE_OK : AE_NO_MEMORY; \
+	})
+
+static inline acpi_cpu_flags acpi_os_acquire_raw_lock(acpi_raw_spinlock lockp)
+{
+	acpi_cpu_flags flags;
+
+	raw_spin_lock_irqsave(lockp, flags);
+	return flags;
+}
+
+static inline void acpi_os_release_raw_lock(acpi_raw_spinlock lockp,
+					    acpi_cpu_flags flags)
+{
+	raw_spin_unlock_irqrestore(lockp, flags);
+}
+
+static inline void acpi_os_delete_raw_lock(acpi_raw_spinlock handle)
+{
+	ACPI_FREE(handle);
+}
+
 static inline u8 acpi_os_readable(void *pointer, acpi_size length)
 {
 	return TRUE;
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index 34a028a..87d14476 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -25,6 +25,7 @@
 
 #define ATOMIC_LONG_INIT(i)	ATOMIC64_INIT(i)
 #define ATOMIC_LONG_PFX(x)	atomic64 ## x
+#define ATOMIC_LONG_TYPE	s64
 
 #else
 
@@ -32,6 +33,7 @@
 
 #define ATOMIC_LONG_INIT(i)	ATOMIC_INIT(i)
 #define ATOMIC_LONG_PFX(x)	atomic ## x
+#define ATOMIC_LONG_TYPE	int
 
 #endif
 
@@ -90,6 +92,21 @@
 #define atomic_long_cmpxchg(l, old, new) \
 	(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
 
+
+#define atomic_long_try_cmpxchg_relaxed(l, old, new) \
+	(ATOMIC_LONG_PFX(_try_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
+#define atomic_long_try_cmpxchg_acquire(l, old, new) \
+	(ATOMIC_LONG_PFX(_try_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
+#define atomic_long_try_cmpxchg_release(l, old, new) \
+	(ATOMIC_LONG_PFX(_try_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
+#define atomic_long_try_cmpxchg(l, old, new) \
+	(ATOMIC_LONG_PFX(_try_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), \
+				       (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
+
+
 #define atomic_long_xchg_relaxed(v, new) \
 	(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
 #define atomic_long_xchg_acquire(v, new) \
@@ -244,6 +261,8 @@
 #define atomic_long_inc_not_zero(l) \
 	ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
 
+#define atomic_long_cond_read_relaxed(v, c) \
+	ATOMIC_LONG_PFX(_cond_read_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (c))
 #define atomic_long_cond_read_acquire(v, c) \
 	ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c))
 
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 29458bb..2cafdbb 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -221,18 +221,17 @@
 #endif
 
 /**
- * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
+ * smp_cond_load_relaxed() - (Spin) wait for cond with no ordering guarantees
  * @ptr: pointer to the variable to wait on
  * @cond: boolean expression to wait for
  *
- * Equivalent to using smp_load_acquire() on the condition variable but employs
- * the control dependency of the wait to reduce the barrier on many platforms.
+ * Equivalent to using READ_ONCE() on the condition variable.
  *
  * Due to C lacking lambda expressions we load the value of *ptr into a
  * pre-named variable @VAL to be used in @cond.
  */
-#ifndef smp_cond_load_acquire
-#define smp_cond_load_acquire(ptr, cond_expr) ({		\
+#ifndef smp_cond_load_relaxed
+#define smp_cond_load_relaxed(ptr, cond_expr) ({		\
 	typeof(ptr) __PTR = (ptr);				\
 	typeof(*ptr) VAL;					\
 	for (;;) {						\
@@ -241,10 +240,26 @@
 			break;					\
 		cpu_relax();					\
 	}							\
-	smp_acquire__after_ctrl_dep();				\
 	VAL;							\
 })
 #endif
 
+/**
+ * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
+ * @ptr: pointer to the variable to wait on
+ * @cond: boolean expression to wait for
+ *
+ * Equivalent to using smp_load_acquire() on the condition variable but employs
+ * the control dependency of the wait to reduce the barrier on many platforms.
+ */
+#ifndef smp_cond_load_acquire
+#define smp_cond_load_acquire(ptr, cond_expr) ({		\
+	typeof(*ptr) _val;					\
+	_val = smp_cond_load_relaxed(ptr, cond_expr);		\
+	smp_acquire__after_ctrl_dep();				\
+	_val;							\
+})
+#endif
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __ASM_GENERIC_BARRIER_H */
diff --git a/include/asm-generic/compat.h b/include/asm-generic/compat.h
new file mode 100644
index 0000000..2881945
--- /dev/null
+++ b/include/asm-generic/compat.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* This is an empty stub for 32-bit-only architectures */
diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h
index 880a292..ad28682 100644
--- a/include/asm-generic/dma-mapping.h
+++ b/include/asm-generic/dma-mapping.h
@@ -4,7 +4,16 @@
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
+	/*
+	 * Use the non-coherent ops if available.  If an architecture wants a
+	 * more fine-grained selection of operations it will have to implement
+	 * get_arch_dma_ops itself or use the per-device dma_ops.
+	 */
+#ifdef CONFIG_DMA_NONCOHERENT_OPS
+	return &dma_noncoherent_ops;
+#else
 	return &dma_direct_ops;
+#endif
 }
 
 #endif /* _ASM_GENERIC_DMA_MAPPING_H */
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 719db19..68efb95 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -19,42 +19,32 @@
 #define KCRC_ALIGN 4
 #endif
 
-#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
-#define KSYM(name) _##name
-#else
-#define KSYM(name) name
-#endif
-
 /*
  * note on .section use: @progbits vs %progbits nastiness doesn't matter,
  * since we immediately emit into those sections anyway.
  */
 .macro ___EXPORT_SYMBOL name,val,sec
 #ifdef CONFIG_MODULES
-	.globl KSYM(__ksymtab_\name)
+	.globl __ksymtab_\name
 	.section ___ksymtab\sec+\name,"a"
 	.balign KSYM_ALIGN
-KSYM(__ksymtab_\name):
-	__put \val, KSYM(__kstrtab_\name)
+__ksymtab_\name:
+	__put \val, __kstrtab_\name
 	.previous
 	.section __ksymtab_strings,"a"
-KSYM(__kstrtab_\name):
-#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
-	.asciz "_\name"
-#else
+__kstrtab_\name:
 	.asciz "\name"
-#endif
 	.previous
 #ifdef CONFIG_MODVERSIONS
 	.section ___kcrctab\sec+\name,"a"
 	.balign KCRC_ALIGN
-KSYM(__kcrctab_\name):
+__kcrctab_\name:
 #if defined(CONFIG_MODULE_REL_CRCS)
-	.long KSYM(__crc_\name) - .
+	.long __crc_\name - .
 #else
-	.long KSYM(__crc_\name)
+	.long __crc_\name
 #endif
-	.weak KSYM(__crc_\name)
+	.weak __crc_\name
 	.previous
 #endif
 #endif
@@ -84,12 +74,12 @@
 #endif
 
 #define EXPORT_SYMBOL(name)					\
-	__EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)),)
+	__EXPORT_SYMBOL(name, KSYM_FUNC(name),)
 #define EXPORT_SYMBOL_GPL(name) 				\
-	__EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), _gpl)
+	__EXPORT_SYMBOL(name, KSYM_FUNC(name), _gpl)
 #define EXPORT_DATA_SYMBOL(name)				\
-	__EXPORT_SYMBOL(name, KSYM(name),)
+	__EXPORT_SYMBOL(name, name,)
 #define EXPORT_DATA_SYMBOL_GPL(name)				\
-	__EXPORT_SYMBOL(name, KSYM(name),_gpl)
+	__EXPORT_SYMBOL(name, name,_gpl)
 
 #endif
diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h
index 830d765..6bb3cd3 100644
--- a/include/asm-generic/pci.h
+++ b/include/asm-generic/pci.h
@@ -14,12 +14,4 @@
 }
 #endif /* HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ */
 
-/*
- * By default, assume that no iommu is in use and that the PCI
- * space is mapped to address physical 0.
- */
-#ifndef PCI_DMA_BUS_IS_PHYS
-#define PCI_DMA_BUS_IS_PHYS	(1)
-#endif
-
 #endif /* _ASM_GENERIC_PCI_H */
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index b37b4ad..9cc4575 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -26,7 +26,6 @@
  * @lock: Pointer to queued spinlock structure
  * Return: 1 if it is locked, 0 otherwise
  */
-#ifndef queued_spin_is_locked
 static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
 {
 	/*
@@ -35,7 +34,6 @@
 	 */
 	return atomic_read(&lock->val);
 }
-#endif
 
 /**
  * queued_spin_value_unlocked - is the spinlock structure unlocked?
@@ -100,7 +98,7 @@
 	/*
 	 * unlock() needs release semantics:
 	 */
-	(void)atomic_sub_return_release(_Q_LOCKED_VAL, &lock->val);
+	smp_store_release(&lock->locked, 0);
 }
 #endif
 
diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h
index 034acd0..0763f06 100644
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -29,13 +29,41 @@
 #endif
 
 typedef struct qspinlock {
-	atomic_t	val;
+	union {
+		atomic_t val;
+
+		/*
+		 * By using the whole 2nd least significant byte for the
+		 * pending bit, we can allow better optimization of the lock
+		 * acquisition for the pending bit holder.
+		 */
+#ifdef __LITTLE_ENDIAN
+		struct {
+			u8	locked;
+			u8	pending;
+		};
+		struct {
+			u16	locked_pending;
+			u16	tail;
+		};
+#else
+		struct {
+			u16	tail;
+			u16	locked_pending;
+		};
+		struct {
+			u8	reserved[2];
+			u8	pending;
+			u8	locked;
+		};
+#endif
+	};
 } arch_spinlock_t;
 
 /*
  * Initializier
  */
-#define	__ARCH_SPIN_LOCK_UNLOCKED	{ ATOMIC_INIT(0) }
+#define	__ARCH_SPIN_LOCK_UNLOCKED	{ .val = ATOMIC_INIT(0) }
 
 /*
  * Bitfields in the atomic value:
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index af24057..e373e2e 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -64,15 +64,24 @@
  * generates .data.identifier sections, which need to be pulled in with
  * .data. We don't want to pull in .data..other sections, which Linux
  * has defined. Same for text and bss.
+ *
+ * RODATA_MAIN is not used because existing code already defines .rodata.x
+ * sections to be brought in with rodata.
  */
 #ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
 #define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
 #define DATA_MAIN .data .data.[0-9a-zA-Z_]*
+#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
+#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]*
 #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]*
+#define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]*
 #else
 #define TEXT_MAIN .text
 #define DATA_MAIN .data
+#define SDATA_MAIN .sdata
+#define RODATA_MAIN .rodata
 #define BSS_MAIN .bss
+#define SBSS_MAIN .sbss
 #endif
 
 /*
@@ -104,66 +113,66 @@
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 #define MCOUNT_REC()	. = ALIGN(8);				\
-			VMLINUX_SYMBOL(__start_mcount_loc) = .; \
-			*(__mcount_loc)				\
-			VMLINUX_SYMBOL(__stop_mcount_loc) = .;
+			__start_mcount_loc = .;			\
+			KEEP(*(__mcount_loc))			\
+			__stop_mcount_loc = .;
 #else
 #define MCOUNT_REC()
 #endif
 
 #ifdef CONFIG_TRACE_BRANCH_PROFILING
-#define LIKELY_PROFILE()	VMLINUX_SYMBOL(__start_annotated_branch_profile) = .; \
-				*(_ftrace_annotated_branch)			      \
-				VMLINUX_SYMBOL(__stop_annotated_branch_profile) = .;
+#define LIKELY_PROFILE()	__start_annotated_branch_profile = .;	\
+				KEEP(*(_ftrace_annotated_branch))	\
+				__stop_annotated_branch_profile = .;
 #else
 #define LIKELY_PROFILE()
 #endif
 
 #ifdef CONFIG_PROFILE_ALL_BRANCHES
-#define BRANCH_PROFILE()	VMLINUX_SYMBOL(__start_branch_profile) = .;   \
-				*(_ftrace_branch)			      \
-				VMLINUX_SYMBOL(__stop_branch_profile) = .;
+#define BRANCH_PROFILE()	__start_branch_profile = .;		\
+				KEEP(*(_ftrace_branch))			\
+				__stop_branch_profile = .;
 #else
 #define BRANCH_PROFILE()
 #endif
 
 #ifdef CONFIG_KPROBES
 #define KPROBE_BLACKLIST()	. = ALIGN(8);				      \
-				VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \
+				__start_kprobe_blacklist = .;		      \
 				KEEP(*(_kprobe_blacklist))		      \
-				VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .;
+				__stop_kprobe_blacklist = .;
 #else
 #define KPROBE_BLACKLIST()
 #endif
 
 #ifdef CONFIG_FUNCTION_ERROR_INJECTION
 #define ERROR_INJECT_WHITELIST()	STRUCT_ALIGN();			      \
-			VMLINUX_SYMBOL(__start_error_injection_whitelist) = .;\
+			__start_error_injection_whitelist = .;		      \
 			KEEP(*(_error_injection_whitelist))		      \
-			VMLINUX_SYMBOL(__stop_error_injection_whitelist) = .;
+			__stop_error_injection_whitelist = .;
 #else
 #define ERROR_INJECT_WHITELIST()
 #endif
 
 #ifdef CONFIG_EVENT_TRACING
 #define FTRACE_EVENTS()	. = ALIGN(8);					\
-			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
+			__start_ftrace_events = .;			\
 			KEEP(*(_ftrace_events))				\
-			VMLINUX_SYMBOL(__stop_ftrace_events) = .;	\
-			VMLINUX_SYMBOL(__start_ftrace_eval_maps) = .;	\
+			__stop_ftrace_events = .;			\
+			__start_ftrace_eval_maps = .;			\
 			KEEP(*(_ftrace_eval_map))			\
-			VMLINUX_SYMBOL(__stop_ftrace_eval_maps) = .;
+			__stop_ftrace_eval_maps = .;
 #else
 #define FTRACE_EVENTS()
 #endif
 
 #ifdef CONFIG_TRACING
-#define TRACE_PRINTKS() VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .;      \
+#define TRACE_PRINTKS()	 __start___trace_bprintk_fmt = .;      \
 			 KEEP(*(__trace_printk_fmt)) /* Trace_printk fmt' pointer */ \
-			 VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .;
-#define TRACEPOINT_STR() VMLINUX_SYMBOL(__start___tracepoint_str) = .;	\
+			 __stop___trace_bprintk_fmt = .;
+#define TRACEPOINT_STR() __start___tracepoint_str = .;	\
 			 KEEP(*(__tracepoint_str)) /* Trace_printk fmt' pointer */ \
-			 VMLINUX_SYMBOL(__stop___tracepoint_str) = .;
+			 __stop___tracepoint_str = .;
 #else
 #define TRACE_PRINTKS()
 #define TRACEPOINT_STR()
@@ -171,27 +180,27 @@
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 #define TRACE_SYSCALLS() . = ALIGN(8);					\
-			 VMLINUX_SYMBOL(__start_syscalls_metadata) = .;	\
+			 __start_syscalls_metadata = .;			\
 			 KEEP(*(__syscalls_metadata))			\
-			 VMLINUX_SYMBOL(__stop_syscalls_metadata) = .;
+			 __stop_syscalls_metadata = .;
 #else
 #define TRACE_SYSCALLS()
 #endif
 
 #ifdef CONFIG_BPF_EVENTS
 #define BPF_RAW_TP() STRUCT_ALIGN();					\
-			 VMLINUX_SYMBOL(__start__bpf_raw_tp) = .;	\
+			 __start__bpf_raw_tp = .;			\
 			 KEEP(*(__bpf_raw_tp_map))			\
-			 VMLINUX_SYMBOL(__stop__bpf_raw_tp) = .;
+			 __stop__bpf_raw_tp = .;
 #else
 #define BPF_RAW_TP()
 #endif
 
 #ifdef CONFIG_SERIAL_EARLYCON
 #define EARLYCON_TABLE() . = ALIGN(8);				\
-			 VMLINUX_SYMBOL(__earlycon_table) = .;	\
+			 __earlycon_table = .;			\
 			 KEEP(*(__earlycon_table))		\
-			 VMLINUX_SYMBOL(__earlycon_table_end) = .;
+			 __earlycon_table_end = .;
 #else
 #define EARLYCON_TABLE()
 #endif
@@ -202,7 +211,7 @@
 #define _OF_TABLE_0(name)
 #define _OF_TABLE_1(name)						\
 	. = ALIGN(8);							\
-	VMLINUX_SYMBOL(__##name##_of_table) = .;			\
+	__##name##_of_table = .;					\
 	KEEP(*(__##name##_of_table))					\
 	KEEP(*(__##name##_of_table_end))
 
@@ -217,18 +226,18 @@
 #ifdef CONFIG_ACPI
 #define ACPI_PROBE_TABLE(name)						\
 	. = ALIGN(8);							\
-	VMLINUX_SYMBOL(__##name##_acpi_probe_table) = .;		\
+	__##name##_acpi_probe_table = .;				\
 	KEEP(*(__##name##_acpi_probe_table))				\
-	VMLINUX_SYMBOL(__##name##_acpi_probe_table_end) = .;
+	__##name##_acpi_probe_table_end = .;
 #else
 #define ACPI_PROBE_TABLE(name)
 #endif
 
 #define KERNEL_DTB()							\
 	STRUCT_ALIGN();							\
-	VMLINUX_SYMBOL(__dtb_start) = .;				\
+	__dtb_start = .;						\
 	KEEP(*(.dtb.init.rodata))					\
-	VMLINUX_SYMBOL(__dtb_end) = .;
+	__dtb_end = .;
 
 /*
  * .data section
@@ -238,23 +247,23 @@
 	*(DATA_MAIN)							\
 	*(.ref.data)							\
 	*(.data..shared_aligned) /* percpu related */			\
-	MEM_KEEP(init.data)						\
-	MEM_KEEP(exit.data)						\
+	MEM_KEEP(init.data*)						\
+	MEM_KEEP(exit.data*)						\
 	*(.data.unlikely)						\
-	VMLINUX_SYMBOL(__start_once) = .;				\
+	__start_once = .;						\
 	*(.data.once)							\
-	VMLINUX_SYMBOL(__end_once) = .;					\
+	__end_once = .;							\
 	STRUCT_ALIGN();							\
 	*(__tracepoints)						\
 	/* implement dynamic printk debug */				\
 	. = ALIGN(8);                                                   \
-	VMLINUX_SYMBOL(__start___jump_table) = .;                       \
+	__start___jump_table = .;					\
 	KEEP(*(__jump_table))                                           \
-	VMLINUX_SYMBOL(__stop___jump_table) = .;                        \
+	__stop___jump_table = .;					\
 	. = ALIGN(8);							\
-	VMLINUX_SYMBOL(__start___verbose) = .;                          \
+	__start___verbose = .;						\
 	KEEP(*(__verbose))                                              \
-	VMLINUX_SYMBOL(__stop___verbose) = .;				\
+	__stop___verbose = .;						\
 	LIKELY_PROFILE()		       				\
 	BRANCH_PROFILE()						\
 	TRACE_PRINTKS()							\
@@ -266,10 +275,10 @@
  */
 #define NOSAVE_DATA							\
 	. = ALIGN(PAGE_SIZE);						\
-	VMLINUX_SYMBOL(__nosave_begin) = .;				\
+	__nosave_begin = .;						\
 	*(.data..nosave)						\
 	. = ALIGN(PAGE_SIZE);						\
-	VMLINUX_SYMBOL(__nosave_end) = .;
+	__nosave_end = .;
 
 #define PAGE_ALIGNED_DATA(page_align)					\
 	. = ALIGN(page_align);						\
@@ -286,13 +295,13 @@
 
 #define INIT_TASK_DATA(align)						\
 	. = ALIGN(align);						\
-	VMLINUX_SYMBOL(__start_init_task) = .;				\
-	VMLINUX_SYMBOL(init_thread_union) = .;				\
-	VMLINUX_SYMBOL(init_stack) = .;					\
-	*(.data..init_task)						\
-	*(.data..init_thread_info)					\
-	. = VMLINUX_SYMBOL(__start_init_task) + THREAD_SIZE;		\
-	VMLINUX_SYMBOL(__end_init_task) = .;
+	__start_init_task = .;						\
+	init_thread_union = .;						\
+	init_stack = .;							\
+	KEEP(*(.data..init_task))					\
+	KEEP(*(.data..init_thread_info))				\
+	. = __start_init_task + THREAD_SIZE;				\
+	__end_init_task = .;
 
 /*
  * Allow architectures to handle ro_after_init data on their
@@ -300,9 +309,9 @@
  */
 #ifndef RO_AFTER_INIT_DATA
 #define RO_AFTER_INIT_DATA						\
-	VMLINUX_SYMBOL(__start_ro_after_init) = .;			\
+	__start_ro_after_init = .;					\
 	*(.data..ro_after_init)						\
-	VMLINUX_SYMBOL(__end_ro_after_init) = .;
+	__end_ro_after_init = .;
 #endif
 
 /*
@@ -311,14 +320,14 @@
 #define RO_DATA_SECTION(align)						\
 	. = ALIGN((align));						\
 	.rodata           : AT(ADDR(.rodata) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(__start_rodata) = .;			\
+		__start_rodata = .;					\
 		*(.rodata) *(.rodata.*)					\
 		RO_AFTER_INIT_DATA	/* Read only after init */	\
 		KEEP(*(__vermagic))	/* Kernel version magic */	\
 		. = ALIGN(8);						\
-		VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .;		\
+		__start___tracepoints_ptrs = .;				\
 		KEEP(*(__tracepoints_ptrs)) /* Tracepoints: pointer array */ \
-		VMLINUX_SYMBOL(__stop___tracepoints_ptrs) = .;		\
+		__stop___tracepoints_ptrs = .;				\
 		*(__tracepoints_strings)/* Tracepoints: strings */	\
 	}								\
 									\
@@ -328,109 +337,109 @@
 									\
 	/* PCI quirks */						\
 	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {	\
-		VMLINUX_SYMBOL(__start_pci_fixups_early) = .;		\
+		__start_pci_fixups_early = .;				\
 		KEEP(*(.pci_fixup_early))				\
-		VMLINUX_SYMBOL(__end_pci_fixups_early) = .;		\
-		VMLINUX_SYMBOL(__start_pci_fixups_header) = .;		\
+		__end_pci_fixups_early = .;				\
+		__start_pci_fixups_header = .;				\
 		KEEP(*(.pci_fixup_header))				\
-		VMLINUX_SYMBOL(__end_pci_fixups_header) = .;		\
-		VMLINUX_SYMBOL(__start_pci_fixups_final) = .;		\
+		__end_pci_fixups_header = .;				\
+		__start_pci_fixups_final = .;				\
 		KEEP(*(.pci_fixup_final))				\
-		VMLINUX_SYMBOL(__end_pci_fixups_final) = .;		\
-		VMLINUX_SYMBOL(__start_pci_fixups_enable) = .;		\
+		__end_pci_fixups_final = .;				\
+		__start_pci_fixups_enable = .;				\
 		KEEP(*(.pci_fixup_enable))				\
-		VMLINUX_SYMBOL(__end_pci_fixups_enable) = .;		\
-		VMLINUX_SYMBOL(__start_pci_fixups_resume) = .;		\
+		__end_pci_fixups_enable = .;				\
+		__start_pci_fixups_resume = .;				\
 		KEEP(*(.pci_fixup_resume))				\
-		VMLINUX_SYMBOL(__end_pci_fixups_resume) = .;		\
-		VMLINUX_SYMBOL(__start_pci_fixups_resume_early) = .;	\
+		__end_pci_fixups_resume = .;				\
+		__start_pci_fixups_resume_early = .;			\
 		KEEP(*(.pci_fixup_resume_early))			\
-		VMLINUX_SYMBOL(__end_pci_fixups_resume_early) = .;	\
-		VMLINUX_SYMBOL(__start_pci_fixups_suspend) = .;		\
+		__end_pci_fixups_resume_early = .;			\
+		__start_pci_fixups_suspend = .;				\
 		KEEP(*(.pci_fixup_suspend))				\
-		VMLINUX_SYMBOL(__end_pci_fixups_suspend) = .;		\
-		VMLINUX_SYMBOL(__start_pci_fixups_suspend_late) = .;	\
+		__end_pci_fixups_suspend = .;				\
+		__start_pci_fixups_suspend_late = .;			\
 		KEEP(*(.pci_fixup_suspend_late))			\
-		VMLINUX_SYMBOL(__end_pci_fixups_suspend_late) = .;	\
+		__end_pci_fixups_suspend_late = .;			\
 	}								\
 									\
 	/* Built-in firmware blobs */					\
 	.builtin_fw        : AT(ADDR(.builtin_fw) - LOAD_OFFSET) {	\
-		VMLINUX_SYMBOL(__start_builtin_fw) = .;			\
+		__start_builtin_fw = .;					\
 		KEEP(*(.builtin_fw))					\
-		VMLINUX_SYMBOL(__end_builtin_fw) = .;			\
+		__end_builtin_fw = .;					\
 	}								\
 									\
 	TRACEDATA							\
 									\
 	/* Kernel symbol table: Normal symbols */			\
 	__ksymtab         : AT(ADDR(__ksymtab) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(__start___ksymtab) = .;			\
+		__start___ksymtab = .;					\
 		KEEP(*(SORT(___ksymtab+*)))				\
-		VMLINUX_SYMBOL(__stop___ksymtab) = .;			\
+		__stop___ksymtab = .;					\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only symbols */			\
 	__ksymtab_gpl     : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) {	\
-		VMLINUX_SYMBOL(__start___ksymtab_gpl) = .;		\
+		__start___ksymtab_gpl = .;				\
 		KEEP(*(SORT(___ksymtab_gpl+*)))				\
-		VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .;		\
+		__stop___ksymtab_gpl = .;				\
 	}								\
 									\
 	/* Kernel symbol table: Normal unused symbols */		\
 	__ksymtab_unused  : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) {	\
-		VMLINUX_SYMBOL(__start___ksymtab_unused) = .;		\
+		__start___ksymtab_unused = .;				\
 		KEEP(*(SORT(___ksymtab_unused+*)))			\
-		VMLINUX_SYMBOL(__stop___ksymtab_unused) = .;		\
+		__stop___ksymtab_unused = .;				\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only unused symbols */		\
 	__ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
-		VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .;	\
+		__start___ksymtab_unused_gpl = .;			\
 		KEEP(*(SORT(___ksymtab_unused_gpl+*)))			\
-		VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .;	\
+		__stop___ksymtab_unused_gpl = .;			\
 	}								\
 									\
 	/* Kernel symbol table: GPL-future-only symbols */		\
 	__ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
-		VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .;	\
+		__start___ksymtab_gpl_future = .;			\
 		KEEP(*(SORT(___ksymtab_gpl_future+*)))			\
-		VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .;	\
+		__stop___ksymtab_gpl_future = .;			\
 	}								\
 									\
 	/* Kernel symbol table: Normal symbols */			\
 	__kcrctab         : AT(ADDR(__kcrctab) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(__start___kcrctab) = .;			\
+		__start___kcrctab = .;					\
 		KEEP(*(SORT(___kcrctab+*)))				\
-		VMLINUX_SYMBOL(__stop___kcrctab) = .;			\
+		__stop___kcrctab = .;					\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only symbols */			\
 	__kcrctab_gpl     : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) {	\
-		VMLINUX_SYMBOL(__start___kcrctab_gpl) = .;		\
+		__start___kcrctab_gpl = .;				\
 		KEEP(*(SORT(___kcrctab_gpl+*)))				\
-		VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .;		\
+		__stop___kcrctab_gpl = .;				\
 	}								\
 									\
 	/* Kernel symbol table: Normal unused symbols */		\
 	__kcrctab_unused  : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) {	\
-		VMLINUX_SYMBOL(__start___kcrctab_unused) = .;		\
+		__start___kcrctab_unused = .;				\
 		KEEP(*(SORT(___kcrctab_unused+*)))			\
-		VMLINUX_SYMBOL(__stop___kcrctab_unused) = .;		\
+		__stop___kcrctab_unused = .;				\
 	}								\
 									\
 	/* Kernel symbol table: GPL-only unused symbols */		\
 	__kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \
-		VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .;	\
+		__start___kcrctab_unused_gpl = .;			\
 		KEEP(*(SORT(___kcrctab_unused_gpl+*)))			\
-		VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .;	\
+		__stop___kcrctab_unused_gpl = .;			\
 	}								\
 									\
 	/* Kernel symbol table: GPL-future-only symbols */		\
 	__kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \
-		VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .;	\
+		__start___kcrctab_gpl_future = .;			\
 		KEEP(*(SORT(___kcrctab_gpl_future+*)))			\
-		VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .;	\
+		__stop___kcrctab_gpl_future = .;			\
 	}								\
 									\
 	/* Kernel symbol table: strings */				\
@@ -447,18 +456,18 @@
 									\
 	/* Built-in module parameters. */				\
 	__param : AT(ADDR(__param) - LOAD_OFFSET) {			\
-		VMLINUX_SYMBOL(__start___param) = .;			\
+		__start___param = .;					\
 		KEEP(*(__param))					\
-		VMLINUX_SYMBOL(__stop___param) = .;			\
+		__stop___param = .;					\
 	}								\
 									\
 	/* Built-in module versions. */					\
 	__modver : AT(ADDR(__modver) - LOAD_OFFSET) {			\
-		VMLINUX_SYMBOL(__start___modver) = .;			\
+		__start___modver = .;					\
 		KEEP(*(__modver))					\
-		VMLINUX_SYMBOL(__stop___modver) = .;			\
+		__stop___modver = .;					\
 		. = ALIGN((align));					\
-		VMLINUX_SYMBOL(__end_rodata) = .;			\
+		__end_rodata = .;					\
 	}								\
 	. = ALIGN((align));
 
@@ -469,9 +478,9 @@
 
 #define SECURITY_INIT							\
 	.security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
-		VMLINUX_SYMBOL(__security_initcall_start) = .;		\
+		__security_initcall_start = .;				\
 		KEEP(*(.security_initcall.init))			\
-		VMLINUX_SYMBOL(__security_initcall_end) = .;		\
+		__security_initcall_end = .;				\
 	}
 
 /*
@@ -487,58 +496,58 @@
 		*(.text.hot TEXT_MAIN .text.fixup .text.unlikely)	\
 		*(.text..refcount)					\
 		*(.ref.text)						\
-	MEM_KEEP(init.text)						\
-	MEM_KEEP(exit.text)						\
+	MEM_KEEP(init.text*)						\
+	MEM_KEEP(exit.text*)						\
 
 
 /* sched.text is aling to function alignment to secure we have same
  * address even at second ld pass when generating System.map */
 #define SCHED_TEXT							\
 		ALIGN_FUNCTION();					\
-		VMLINUX_SYMBOL(__sched_text_start) = .;			\
+		__sched_text_start = .;					\
 		*(.sched.text)						\
-		VMLINUX_SYMBOL(__sched_text_end) = .;
+		__sched_text_end = .;
 
 /* spinlock.text is aling to function alignment to secure we have same
  * address even at second ld pass when generating System.map */
 #define LOCK_TEXT							\
 		ALIGN_FUNCTION();					\
-		VMLINUX_SYMBOL(__lock_text_start) = .;			\
+		__lock_text_start = .;					\
 		*(.spinlock.text)					\
-		VMLINUX_SYMBOL(__lock_text_end) = .;
+		__lock_text_end = .;
 
 #define CPUIDLE_TEXT							\
 		ALIGN_FUNCTION();					\
-		VMLINUX_SYMBOL(__cpuidle_text_start) = .;		\
+		__cpuidle_text_start = .;				\
 		*(.cpuidle.text)					\
-		VMLINUX_SYMBOL(__cpuidle_text_end) = .;
+		__cpuidle_text_end = .;
 
 #define KPROBES_TEXT							\
 		ALIGN_FUNCTION();					\
-		VMLINUX_SYMBOL(__kprobes_text_start) = .;		\
+		__kprobes_text_start = .;				\
 		*(.kprobes.text)					\
-		VMLINUX_SYMBOL(__kprobes_text_end) = .;
+		__kprobes_text_end = .;
 
 #define ENTRY_TEXT							\
 		ALIGN_FUNCTION();					\
-		VMLINUX_SYMBOL(__entry_text_start) = .;			\
+		__entry_text_start = .;					\
 		*(.entry.text)						\
-		VMLINUX_SYMBOL(__entry_text_end) = .;
+		__entry_text_end = .;
 
 #define IRQENTRY_TEXT							\
 		ALIGN_FUNCTION();					\
-		VMLINUX_SYMBOL(__irqentry_text_start) = .;		\
+		__irqentry_text_start = .;				\
 		*(.irqentry.text)					\
-		VMLINUX_SYMBOL(__irqentry_text_end) = .;
+		__irqentry_text_end = .;
 
 #define SOFTIRQENTRY_TEXT						\
 		ALIGN_FUNCTION();					\
-		VMLINUX_SYMBOL(__softirqentry_text_start) = .;		\
+		__softirqentry_text_start = .;				\
 		*(.softirqentry.text)					\
-		VMLINUX_SYMBOL(__softirqentry_text_end) = .;
+		__softirqentry_text_end = .;
 
 /* Section used for early init (in .S files) */
-#define HEAD_TEXT  *(.head.text)
+#define HEAD_TEXT  KEEP(*(.head.text))
 
 #define HEAD_TEXT_SECTION							\
 	.head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {		\
@@ -551,9 +560,9 @@
 #define EXCEPTION_TABLE(align)						\
 	. = ALIGN(align);						\
 	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(__start___ex_table) = .;			\
+		__start___ex_table = .;					\
 		KEEP(*(__ex_table))					\
-		VMLINUX_SYMBOL(__stop___ex_table) = .;			\
+		__stop___ex_table = .;					\
 	}
 
 /*
@@ -567,11 +576,11 @@
 
 #ifdef CONFIG_CONSTRUCTORS
 #define KERNEL_CTORS()	. = ALIGN(8);			   \
-			VMLINUX_SYMBOL(__ctors_start) = .; \
+			__ctors_start = .;		   \
 			KEEP(*(.ctors))			   \
 			KEEP(*(SORT(.init_array.*)))	   \
 			KEEP(*(.init_array))		   \
-			VMLINUX_SYMBOL(__ctors_end) = .;
+			__ctors_end = .;
 #else
 #define KERNEL_CTORS()
 #endif
@@ -579,11 +588,11 @@
 /* init and exit section handling */
 #define INIT_DATA							\
 	KEEP(*(SORT(___kentry+*)))					\
-	*(.init.data)							\
-	MEM_DISCARD(init.data)						\
+	*(.init.data init.data.*)					\
+	MEM_DISCARD(init.data*)						\
 	KERNEL_CTORS()							\
 	MCOUNT_REC()							\
-	*(.init.rodata)							\
+	*(.init.rodata .init.rodata.*)					\
 	FTRACE_EVENTS()							\
 	TRACE_SYSCALLS()						\
 	KPROBE_BLACKLIST()						\
@@ -602,16 +611,16 @@
 	EARLYCON_TABLE()
 
 #define INIT_TEXT							\
-	*(.init.text)							\
+	*(.init.text .init.text.*)					\
 	*(.text.startup)						\
-	MEM_DISCARD(init.text)
+	MEM_DISCARD(init.text*)
 
 #define EXIT_DATA							\
-	*(.exit.data)							\
+	*(.exit.data .exit.data.*)					\
 	*(.fini_array)							\
 	*(.dtors)							\
-	MEM_DISCARD(exit.data)						\
-	MEM_DISCARD(exit.rodata)
+	MEM_DISCARD(exit.data*)						\
+	MEM_DISCARD(exit.rodata*)
 
 #define EXIT_TEXT							\
 	*(.exit.text)							\
@@ -629,7 +638,7 @@
 	. = ALIGN(sbss_align);						\
 	.sbss : AT(ADDR(.sbss) - LOAD_OFFSET) {				\
 		*(.dynsbss)						\
-		*(.sbss)						\
+		*(SBSS_MAIN)						\
 		*(.scommon)						\
 	}
 
@@ -706,9 +715,9 @@
 #define BUG_TABLE							\
 	. = ALIGN(8);							\
 	__bug_table : AT(ADDR(__bug_table) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(__start___bug_table) = .;		\
+		__start___bug_table = .;				\
 		KEEP(*(__bug_table))					\
-		VMLINUX_SYMBOL(__stop___bug_table) = .;			\
+		__stop___bug_table = .;					\
 	}
 #else
 #define BUG_TABLE
@@ -718,22 +727,22 @@
 #define ORC_UNWIND_TABLE						\
 	. = ALIGN(4);							\
 	.orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) {	\
-		VMLINUX_SYMBOL(__start_orc_unwind_ip) = .;		\
+		__start_orc_unwind_ip = .;				\
 		KEEP(*(.orc_unwind_ip))					\
-		VMLINUX_SYMBOL(__stop_orc_unwind_ip) = .;		\
+		__stop_orc_unwind_ip = .;				\
 	}								\
 	. = ALIGN(6);							\
 	.orc_unwind : AT(ADDR(.orc_unwind) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(__start_orc_unwind) = .;			\
+		__start_orc_unwind = .;					\
 		KEEP(*(.orc_unwind))					\
-		VMLINUX_SYMBOL(__stop_orc_unwind) = .;			\
+		__stop_orc_unwind = .;					\
 	}								\
 	. = ALIGN(4);							\
 	.orc_lookup : AT(ADDR(.orc_lookup) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(orc_lookup) = .;				\
+		orc_lookup = .;						\
 		. += (((SIZEOF(.text) + LOOKUP_BLOCK_SIZE - 1) /	\
 			LOOKUP_BLOCK_SIZE) + 1) * 4;			\
-		VMLINUX_SYMBOL(orc_lookup_end) = .;			\
+		orc_lookup_end = .;					\
 	}
 #else
 #define ORC_UNWIND_TABLE
@@ -743,9 +752,9 @@
 #define TRACEDATA							\
 	. = ALIGN(4);							\
 	.tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(__tracedata_start) = .;			\
+		__tracedata_start = .;					\
 		KEEP(*(.tracedata))					\
-		VMLINUX_SYMBOL(__tracedata_end) = .;			\
+		__tracedata_end = .;					\
 	}
 #else
 #define TRACEDATA
@@ -753,24 +762,24 @@
 
 #define NOTES								\
 	.notes : AT(ADDR(.notes) - LOAD_OFFSET) {			\
-		VMLINUX_SYMBOL(__start_notes) = .;			\
-		*(.note.*)						\
-		VMLINUX_SYMBOL(__stop_notes) = .;			\
+		__start_notes = .;					\
+		KEEP(*(.note.*))					\
+		__stop_notes = .;					\
 	}
 
 #define INIT_SETUP(initsetup_align)					\
 		. = ALIGN(initsetup_align);				\
-		VMLINUX_SYMBOL(__setup_start) = .;			\
+		__setup_start = .;					\
 		KEEP(*(.init.setup))					\
-		VMLINUX_SYMBOL(__setup_end) = .;
+		__setup_end = .;
 
 #define INIT_CALLS_LEVEL(level)						\
-		VMLINUX_SYMBOL(__initcall##level##_start) = .;		\
+		__initcall##level##_start = .;				\
 		KEEP(*(.initcall##level##.init))			\
 		KEEP(*(.initcall##level##s.init))			\
 
 #define INIT_CALLS							\
-		VMLINUX_SYMBOL(__initcall_start) = .;			\
+		__initcall_start = .;					\
 		KEEP(*(.initcallearly.init))				\
 		INIT_CALLS_LEVEL(0)					\
 		INIT_CALLS_LEVEL(1)					\
@@ -781,22 +790,22 @@
 		INIT_CALLS_LEVEL(rootfs)				\
 		INIT_CALLS_LEVEL(6)					\
 		INIT_CALLS_LEVEL(7)					\
-		VMLINUX_SYMBOL(__initcall_end) = .;
+		__initcall_end = .;
 
 #define CON_INITCALL							\
-		VMLINUX_SYMBOL(__con_initcall_start) = .;		\
+		__con_initcall_start = .;				\
 		KEEP(*(.con_initcall.init))				\
-		VMLINUX_SYMBOL(__con_initcall_end) = .;
+		__con_initcall_end = .;
 
 #define SECURITY_INITCALL						\
-		VMLINUX_SYMBOL(__security_initcall_start) = .;		\
+		__security_initcall_start = .;				\
 		KEEP(*(.security_initcall.init))			\
-		VMLINUX_SYMBOL(__security_initcall_end) = .;
+		__security_initcall_end = .;
 
 #ifdef CONFIG_BLK_DEV_INITRD
 #define INIT_RAM_FS							\
 	. = ALIGN(4);							\
-	VMLINUX_SYMBOL(__initramfs_start) = .;				\
+	__initramfs_start = .;						\
 	KEEP(*(.init.ramfs))						\
 	. = ALIGN(8);							\
 	KEEP(*(.init.ramfs.info))
@@ -851,7 +860,7 @@
  * sharing between subsections for different purposes.
  */
 #define PERCPU_INPUT(cacheline)						\
-	VMLINUX_SYMBOL(__per_cpu_start) = .;				\
+	__per_cpu_start = .;						\
 	*(.data..percpu..first)						\
 	. = ALIGN(PAGE_SIZE);						\
 	*(.data..percpu..page_aligned)					\
@@ -861,7 +870,7 @@
 	*(.data..percpu)						\
 	*(.data..percpu..shared_aligned)				\
 	PERCPU_DECRYPTED_SECTION					\
-	VMLINUX_SYMBOL(__per_cpu_end) = .;
+	__per_cpu_end = .;
 
 /**
  * PERCPU_VADDR - define output section for percpu area
@@ -888,12 +897,11 @@
  * address, use PERCPU_SECTION.
  */
 #define PERCPU_VADDR(cacheline, vaddr, phdr)				\
-	VMLINUX_SYMBOL(__per_cpu_load) = .;				\
-	.data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load)		\
-				- LOAD_OFFSET) {			\
+	__per_cpu_load = .;						\
+	.data..percpu vaddr : AT(__per_cpu_load - LOAD_OFFSET) {	\
 		PERCPU_INPUT(cacheline)					\
 	} phdr								\
-	. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data..percpu);
+	. = __per_cpu_load + SIZEOF(.data..percpu);
 
 /**
  * PERCPU_SECTION - define output section for percpu area, simple version
@@ -910,7 +918,7 @@
 #define PERCPU_SECTION(cacheline)					\
 	. = ALIGN(PAGE_SIZE);						\
 	.data..percpu	: AT(ADDR(.data..percpu) - LOAD_OFFSET) {	\
-		VMLINUX_SYMBOL(__per_cpu_load) = .;			\
+		__per_cpu_load = .;					\
 		PERCPU_INPUT(cacheline)					\
 	}
 
@@ -949,9 +957,9 @@
 #define INIT_TEXT_SECTION(inittext_align)				\
 	. = ALIGN(inittext_align);					\
 	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {		\
-		VMLINUX_SYMBOL(_sinittext) = .;				\
+		_sinittext = .;						\
 		INIT_TEXT						\
-		VMLINUX_SYMBOL(_einittext) = .;				\
+		_einittext = .;						\
 	}
 
 #define INIT_DATA_SECTION(initsetup_align)				\
@@ -966,8 +974,8 @@
 
 #define BSS_SECTION(sbss_align, bss_align, stop_align)			\
 	. = ALIGN(sbss_align);						\
-	VMLINUX_SYMBOL(__bss_start) = .;				\
+	__bss_start = .;						\
 	SBSS(sbss_align)						\
 	BSS(bss_align)							\
 	. = ALIGN(stop_align);						\
-	VMLINUX_SYMBOL(__bss_stop) = .;
+	__bss_stop = .;
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 1aba888..bd5e8cc 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -17,6 +17,14 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 
+/*
+ * Maximum values for blocksize and alignmask, used to allocate
+ * static buffers that are big enough for any combination of
+ * ciphers and architectures.
+ */
+#define MAX_CIPHER_BLOCKSIZE		16
+#define MAX_CIPHER_ALIGNMASK		15
+
 struct crypto_aead;
 struct crypto_instance;
 struct module;
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 482461d..cc414db 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -245,8 +245,7 @@
 			int offset, size_t size, int flags);
 void af_alg_free_resources(struct af_alg_async_req *areq);
 void af_alg_async_cb(struct crypto_async_request *_req, int err);
-__poll_t af_alg_poll(struct file *file, struct socket *sock,
-			 poll_table *wait);
+__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events);
 struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
 					   unsigned int areqlen);
 int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
diff --git a/include/crypto/morus1280_glue.h b/include/crypto/morus1280_glue.h
new file mode 100644
index 0000000..b26dd70
--- /dev/null
+++ b/include/crypto/morus1280_glue.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The MORUS-1280 Authenticated-Encryption Algorithm
+ *   Common glue skeleton -- header file
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef _CRYPTO_MORUS1280_GLUE_H
+#define _CRYPTO_MORUS1280_GLUE_H
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+#include <crypto/aead.h>
+#include <crypto/morus_common.h>
+
+#define MORUS1280_WORD_SIZE 8
+#define MORUS1280_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS1280_WORD_SIZE)
+
+struct morus1280_block {
+	u8 bytes[MORUS1280_BLOCK_SIZE];
+};
+
+struct morus1280_glue_ops {
+	void (*init)(void *state, const void *key, const void *iv);
+	void (*ad)(void *state, const void *data, unsigned int length);
+	void (*enc)(void *state, const void *src, void *dst, unsigned int length);
+	void (*dec)(void *state, const void *src, void *dst, unsigned int length);
+	void (*enc_tail)(void *state, const void *src, void *dst, unsigned int length);
+	void (*dec_tail)(void *state, const void *src, void *dst, unsigned int length);
+	void (*final)(void *state, void *tag_xor, u64 assoclen, u64 cryptlen);
+};
+
+struct morus1280_ctx {
+	const struct morus1280_glue_ops *ops;
+	struct morus1280_block key;
+};
+
+void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
+				    const struct morus1280_glue_ops *ops);
+int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
+				 unsigned int keylen);
+int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm,
+				      unsigned int authsize);
+int crypto_morus1280_glue_encrypt(struct aead_request *req);
+int crypto_morus1280_glue_decrypt(struct aead_request *req);
+
+int cryptd_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
+				 unsigned int keylen);
+int cryptd_morus1280_glue_setauthsize(struct crypto_aead *aead,
+				      unsigned int authsize);
+int cryptd_morus1280_glue_encrypt(struct aead_request *req);
+int cryptd_morus1280_glue_decrypt(struct aead_request *req);
+int cryptd_morus1280_glue_init_tfm(struct crypto_aead *aead);
+void cryptd_morus1280_glue_exit_tfm(struct crypto_aead *aead);
+
+#define MORUS1280_DECLARE_ALGS(id, driver_name, priority) \
+	static const struct morus1280_glue_ops crypto_morus1280_##id##_ops = {\
+		.init = crypto_morus1280_##id##_init, \
+		.ad = crypto_morus1280_##id##_ad, \
+		.enc = crypto_morus1280_##id##_enc, \
+		.enc_tail = crypto_morus1280_##id##_enc_tail, \
+		.dec = crypto_morus1280_##id##_dec, \
+		.dec_tail = crypto_morus1280_##id##_dec_tail, \
+		.final = crypto_morus1280_##id##_final, \
+	}; \
+	\
+	static int crypto_morus1280_##id##_init_tfm(struct crypto_aead *tfm) \
+	{ \
+		crypto_morus1280_glue_init_ops(tfm, &crypto_morus1280_##id##_ops); \
+		return 0; \
+	} \
+	\
+	static void crypto_morus1280_##id##_exit_tfm(struct crypto_aead *tfm) \
+	{ \
+	} \
+	\
+	struct aead_alg crypto_morus1280_##id##_algs[] = {\
+		{ \
+			.setkey = crypto_morus1280_glue_setkey, \
+			.setauthsize = crypto_morus1280_glue_setauthsize, \
+			.encrypt = crypto_morus1280_glue_encrypt, \
+			.decrypt = crypto_morus1280_glue_decrypt, \
+			.init = crypto_morus1280_##id##_init_tfm, \
+			.exit = crypto_morus1280_##id##_exit_tfm, \
+			\
+			.ivsize = MORUS_NONCE_SIZE, \
+			.maxauthsize = MORUS_MAX_AUTH_SIZE, \
+			.chunksize = MORUS1280_BLOCK_SIZE, \
+			\
+			.base = { \
+				.cra_flags = CRYPTO_ALG_INTERNAL, \
+				.cra_blocksize = 1, \
+				.cra_ctxsize = sizeof(struct morus1280_ctx), \
+				.cra_alignmask = 0, \
+				\
+				.cra_name = "__morus1280", \
+				.cra_driver_name = "__"driver_name, \
+				\
+				.cra_module = THIS_MODULE, \
+			} \
+		}, { \
+			.setkey = cryptd_morus1280_glue_setkey, \
+			.setauthsize = cryptd_morus1280_glue_setauthsize, \
+			.encrypt = cryptd_morus1280_glue_encrypt, \
+			.decrypt = cryptd_morus1280_glue_decrypt, \
+			.init = cryptd_morus1280_glue_init_tfm, \
+			.exit = cryptd_morus1280_glue_exit_tfm, \
+			\
+			.ivsize = MORUS_NONCE_SIZE, \
+			.maxauthsize = MORUS_MAX_AUTH_SIZE, \
+			.chunksize = MORUS1280_BLOCK_SIZE, \
+			\
+			.base = { \
+				.cra_flags = CRYPTO_ALG_ASYNC, \
+				.cra_blocksize = 1, \
+				.cra_ctxsize = sizeof(struct crypto_aead *), \
+				.cra_alignmask = 0, \
+				\
+				.cra_priority = priority, \
+				\
+				.cra_name = "morus1280", \
+				.cra_driver_name = driver_name, \
+				\
+				.cra_module = THIS_MODULE, \
+			} \
+		} \
+	}
+
+#endif /* _CRYPTO_MORUS1280_GLUE_H */
diff --git a/include/crypto/morus640_glue.h b/include/crypto/morus640_glue.h
new file mode 100644
index 0000000..90c8db0
--- /dev/null
+++ b/include/crypto/morus640_glue.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The MORUS-640 Authenticated-Encryption Algorithm
+ *   Common glue skeleton -- header file
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef _CRYPTO_MORUS640_GLUE_H
+#define _CRYPTO_MORUS640_GLUE_H
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+#include <crypto/aead.h>
+#include <crypto/morus_common.h>
+
+#define MORUS640_WORD_SIZE 4
+#define MORUS640_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS640_WORD_SIZE)
+
+struct morus640_block {
+	u8 bytes[MORUS640_BLOCK_SIZE];
+};
+
+struct morus640_glue_ops {
+	void (*init)(void *state, const void *key, const void *iv);
+	void (*ad)(void *state, const void *data, unsigned int length);
+	void (*enc)(void *state, const void *src, void *dst, unsigned int length);
+	void (*dec)(void *state, const void *src, void *dst, unsigned int length);
+	void (*enc_tail)(void *state, const void *src, void *dst, unsigned int length);
+	void (*dec_tail)(void *state, const void *src, void *dst, unsigned int length);
+	void (*final)(void *state, void *tag_xor, u64 assoclen, u64 cryptlen);
+};
+
+struct morus640_ctx {
+	const struct morus640_glue_ops *ops;
+	struct morus640_block key;
+};
+
+void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
+				   const struct morus640_glue_ops *ops);
+int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
+				unsigned int keylen);
+int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm,
+				     unsigned int authsize);
+int crypto_morus640_glue_encrypt(struct aead_request *req);
+int crypto_morus640_glue_decrypt(struct aead_request *req);
+
+int cryptd_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
+				unsigned int keylen);
+int cryptd_morus640_glue_setauthsize(struct crypto_aead *aead,
+				     unsigned int authsize);
+int cryptd_morus640_glue_encrypt(struct aead_request *req);
+int cryptd_morus640_glue_decrypt(struct aead_request *req);
+int cryptd_morus640_glue_init_tfm(struct crypto_aead *aead);
+void cryptd_morus640_glue_exit_tfm(struct crypto_aead *aead);
+
+#define MORUS640_DECLARE_ALGS(id, driver_name, priority) \
+	static const struct morus640_glue_ops crypto_morus640_##id##_ops = {\
+		.init = crypto_morus640_##id##_init, \
+		.ad = crypto_morus640_##id##_ad, \
+		.enc = crypto_morus640_##id##_enc, \
+		.enc_tail = crypto_morus640_##id##_enc_tail, \
+		.dec = crypto_morus640_##id##_dec, \
+		.dec_tail = crypto_morus640_##id##_dec_tail, \
+		.final = crypto_morus640_##id##_final, \
+	}; \
+	\
+	static int crypto_morus640_##id##_init_tfm(struct crypto_aead *tfm) \
+	{ \
+		crypto_morus640_glue_init_ops(tfm, &crypto_morus640_##id##_ops); \
+		return 0; \
+	} \
+	\
+	static void crypto_morus640_##id##_exit_tfm(struct crypto_aead *tfm) \
+	{ \
+	} \
+	\
+	struct aead_alg crypto_morus640_##id##_algs[] = {\
+		{ \
+			.setkey = crypto_morus640_glue_setkey, \
+			.setauthsize = crypto_morus640_glue_setauthsize, \
+			.encrypt = crypto_morus640_glue_encrypt, \
+			.decrypt = crypto_morus640_glue_decrypt, \
+			.init = crypto_morus640_##id##_init_tfm, \
+			.exit = crypto_morus640_##id##_exit_tfm, \
+			\
+			.ivsize = MORUS_NONCE_SIZE, \
+			.maxauthsize = MORUS_MAX_AUTH_SIZE, \
+			.chunksize = MORUS640_BLOCK_SIZE, \
+			\
+			.base = { \
+				.cra_flags = CRYPTO_ALG_INTERNAL, \
+				.cra_blocksize = 1, \
+				.cra_ctxsize = sizeof(struct morus640_ctx), \
+				.cra_alignmask = 0, \
+				\
+				.cra_name = "__morus640", \
+				.cra_driver_name = "__"driver_name, \
+				\
+				.cra_module = THIS_MODULE, \
+			} \
+		}, { \
+			.setkey = cryptd_morus640_glue_setkey, \
+			.setauthsize = cryptd_morus640_glue_setauthsize, \
+			.encrypt = cryptd_morus640_glue_encrypt, \
+			.decrypt = cryptd_morus640_glue_decrypt, \
+			.init = cryptd_morus640_glue_init_tfm, \
+			.exit = cryptd_morus640_glue_exit_tfm, \
+			\
+			.ivsize = MORUS_NONCE_SIZE, \
+			.maxauthsize = MORUS_MAX_AUTH_SIZE, \
+			.chunksize = MORUS640_BLOCK_SIZE, \
+			\
+			.base = { \
+				.cra_flags = CRYPTO_ALG_ASYNC, \
+				.cra_blocksize = 1, \
+				.cra_ctxsize = sizeof(struct crypto_aead *), \
+				.cra_alignmask = 0, \
+				\
+				.cra_priority = priority, \
+				\
+				.cra_name = "morus640", \
+				.cra_driver_name = driver_name, \
+				\
+				.cra_module = THIS_MODULE, \
+			} \
+		} \
+	}
+
+#endif /* _CRYPTO_MORUS640_GLUE_H */
diff --git a/include/crypto/morus_common.h b/include/crypto/morus_common.h
new file mode 100644
index 0000000..39f28c7
--- /dev/null
+++ b/include/crypto/morus_common.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The MORUS Authenticated-Encryption Algorithm
+ *   Common definitions
+ *
+ * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
+ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef _CRYPTO_MORUS_COMMON_H
+#define _CRYPTO_MORUS_COMMON_H
+
+#define MORUS_BLOCK_WORDS 4
+#define MORUS_STATE_BLOCKS 5
+#define MORUS_NONCE_SIZE 16
+#define MORUS_MAX_AUTH_SIZE 16
+
+#endif /* _CRYPTO_MORUS_COMMON_H */
diff --git a/include/crypto/salsa20.h b/include/crypto/salsa20.h
deleted file mode 100644
index 19ed48a..0000000
--- a/include/crypto/salsa20.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Common values for the Salsa20 algorithm
- */
-
-#ifndef _CRYPTO_SALSA20_H
-#define _CRYPTO_SALSA20_H
-
-#include <linux/types.h>
-
-#define SALSA20_IV_SIZE		8
-#define SALSA20_MIN_KEY_SIZE	16
-#define SALSA20_MAX_KEY_SIZE	32
-#define SALSA20_BLOCK_SIZE	64
-
-struct crypto_skcipher;
-
-struct salsa20_ctx {
-	u32 initial_state[16];
-};
-
-void crypto_salsa20_init(u32 *state, const struct salsa20_ctx *ctx,
-			 const u8 *iv);
-int crypto_salsa20_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			  unsigned int keysize);
-
-#endif /* _CRYPTO_SALSA20_H */
diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h
index b64e64d..7afd730 100644
--- a/include/crypto/sm4.h
+++ b/include/crypto/sm4.h
@@ -25,4 +25,7 @@
 int crypto_sm4_expand_key(struct crypto_sm4_ctx *ctx, const u8 *in_key,
 			  unsigned int key_len);
 
+void crypto_sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+void crypto_sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in);
+
 #endif
diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h
index 6c731c5..dd63d08 100644
--- a/include/drm/amd_asic_type.h
+++ b/include/drm/amd_asic_type.h
@@ -44,8 +44,10 @@
 	CHIP_POLARIS10,
 	CHIP_POLARIS11,
 	CHIP_POLARIS12,
+	CHIP_VEGAM,
 	CHIP_VEGA10,
 	CHIP_VEGA12,
+	CHIP_VEGA20,
 	CHIP_RAVEN,
 	CHIP_LAST,
 };
diff --git a/include/drm/bridge/analogix_dp.h b/include/drm/bridge/analogix_dp.h
index e9a1116..475b706b 100644
--- a/include/drm/bridge/analogix_dp.h
+++ b/include/drm/bridge/analogix_dp.h
@@ -33,7 +33,8 @@
 	struct drm_connector *connector;
 	bool skip_connector;
 
-	int (*power_on)(struct analogix_dp_plat_data *);
+	int (*power_on_start)(struct analogix_dp_plat_data *);
+	int (*power_on_end)(struct analogix_dp_plat_data *);
 	int (*power_off)(struct analogix_dp_plat_data *);
 	int (*attach)(struct analogix_dp_plat_data *, struct drm_bridge *,
 		      struct drm_connector *);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index c6666cd..f5099c1 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -95,14 +95,6 @@
 struct pci_dev;
 struct pci_controller;
 
-/***********************************************************************/
-/** \name DRM template customization defaults */
-/*@{*/
-
-/***********************************************************************/
-/** \name Internal types and structures */
-/*@{*/
-
 #define DRM_IF_VERSION(maj, min) (maj << 16 | min)
 
 /**
@@ -123,27 +115,13 @@
 #define DRM_SWITCH_POWER_CHANGING 2
 #define DRM_SWITCH_POWER_DYNAMIC_OFF 3
 
-static __inline__ int drm_core_check_feature(struct drm_device *dev,
-					     int feature)
+static inline bool drm_core_check_feature(struct drm_device *dev, int feature)
 {
-	return ((dev->driver->driver_features & feature) ? 1 : 0);
+	return dev->driver->driver_features & feature;
 }
 
-/******************************************************************/
-/** \name Internal function definitions */
-/*@{*/
-
-				/* Driver support (drm_drv.h) */
-
-/*
- * These are exported to drivers so that they can implement fencing using
- * DMA quiscent + idle. DMA quiescent usually requires the hardware lock.
- */
-
-/*@}*/
-
 /* returns true if currently okay to sleep */
-static __inline__ bool drm_can_sleep(void)
+static inline bool drm_can_sleep(void)
 {
 	if (in_atomic() || in_dbg_master() || irqs_disabled())
 		return false;
diff --git a/include/drm/drm_blend.h b/include/drm/drm_blend.h
index 1760602..330c561 100644
--- a/include/drm/drm_blend.h
+++ b/include/drm/drm_blend.h
@@ -36,6 +36,9 @@
 	return rotation & (DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270);
 }
 
+#define DRM_BLEND_ALPHA_OPAQUE		0xffff
+
+int drm_plane_create_alpha_property(struct drm_plane *plane);
 int drm_plane_create_rotation_property(struct drm_plane *plane,
 				       unsigned int rotation,
 				       unsigned int supported_rotations);
diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
index 7c4fa32..858ba19 100644
--- a/include/drm/drm_device.h
+++ b/include/drm/drm_device.h
@@ -38,7 +38,6 @@
 	struct device *dev;		/**< Device structure of bus-device */
 	struct drm_driver *driver;	/**< DRM driver managing the device */
 	void *dev_private;		/**< DRM driver private data */
-	struct drm_minor *control;		/**< Control node */
 	struct drm_minor *primary;		/**< Primary node */
 	struct drm_minor *render;		/**< Render node */
 	bool registered;
@@ -46,7 +45,14 @@
 	/* currently active master for this device. Protected by master_mutex */
 	struct drm_master *master;
 
-	atomic_t unplugged;			/**< Flag whether dev is dead */
+	/**
+	 * @unplugged:
+	 *
+	 * Flag to tell if the device has been unplugged.
+	 * See drm_dev_enter() and drm_dev_is_unplugged().
+	 */
+	bool unplugged;
+
 	struct inode *anon_inode;		/**< inode for private address-space */
 	char *unique;				/**< unique name of the device */
 	/*@} */
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index 62903ba..c015649 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -64,6 +64,11 @@
 /* AUX CH addresses */
 /* DPCD */
 #define DP_DPCD_REV                         0x000
+# define DP_DPCD_REV_10                     0x10
+# define DP_DPCD_REV_11                     0x11
+# define DP_DPCD_REV_12                     0x12
+# define DP_DPCD_REV_13                     0x13
+# define DP_DPCD_REV_14                     0x14
 
 #define DP_MAX_LINK_RATE                    0x001
 
@@ -119,6 +124,7 @@
 # define DP_DPCD_DISPLAY_CONTROL_CAPABLE     (1 << 3) /* edp v1.2 or higher */
 
 #define DP_TRAINING_AUX_RD_INTERVAL         0x00e   /* XXX 1.2? */
+# define DP_TRAINING_AUX_RD_MASK            0x7F    /* XXX 1.2? */
 
 #define DP_ADAPTER_CAP			    0x00f   /* 1.2 */
 # define DP_FORCE_LOAD_SENSE_CAP	    (1 << 0)
@@ -478,6 +484,7 @@
 # define DP_PSR_FRAME_CAPTURE		    (1 << 3)
 # define DP_PSR_SELECTIVE_UPDATE	    (1 << 4)
 # define DP_PSR_IRQ_HPD_WITH_CRC_ERRORS     (1 << 5)
+# define DP_PSR_ENABLE_PSR2		    (1 << 6) /* eDP 1.4a */
 
 #define DP_ADAPTER_CTRL			    0x1a0
 # define DP_ADAPTER_CTRL_FORCE_LOAD_SENSE   (1 << 0)
@@ -794,6 +801,15 @@
 # define DP_LAST_ACTUAL_SYNCHRONIZATION_LATENCY_MASK	(0xf << 4)
 # define DP_LAST_ACTUAL_SYNCHRONIZATION_LATENCY_SHIFT	4
 
+#define DP_LAST_RECEIVED_PSR_SDP	    0x200a /* eDP 1.2 */
+# define DP_PSR_STATE_BIT		    (1 << 0) /* eDP 1.2 */
+# define DP_UPDATE_RFB_BIT		    (1 << 1) /* eDP 1.2 */
+# define DP_CRC_VALID_BIT		    (1 << 2) /* eDP 1.2 */
+# define DP_SU_VALID			    (1 << 3) /* eDP 1.4 */
+# define DP_FIRST_SCAN_LINE_SU_REGION	    (1 << 4) /* eDP 1.4 */
+# define DP_LAST_SCAN_LINE_SU_REGION	    (1 << 5) /* eDP 1.4 */
+# define DP_Y_COORDINATE_VALID		    (1 << 6) /* eDP 1.4a */
+
 #define DP_RECEIVER_ALPM_STATUS		    0x200b  /* eDP 1.4 */
 # define DP_ALPM_LOCK_TIMEOUT_ERROR	    (1 << 0)
 
@@ -967,18 +983,18 @@
 #define DP_SDP_VSC_EXT_CEA		0x21 /* DP 1.4 */
 /* 0x80+ CEA-861 infoframe types */
 
-struct edp_sdp_header {
+struct dp_sdp_header {
 	u8 HB0; /* Secondary Data Packet ID */
 	u8 HB1; /* Secondary Data Packet Type */
-	u8 HB2; /* 7:5 reserved, 4:0 revision number */
-	u8 HB3; /* 7:5 reserved, 4:0 number of valid data bytes */
+	u8 HB2; /* Secondary Data Packet Specific header, Byte 0 */
+	u8 HB3; /* Secondary Data packet Specific header, Byte 1 */
 } __packed;
 
 #define EDP_SDP_HEADER_REVISION_MASK		0x1F
 #define EDP_SDP_HEADER_VALID_PAYLOAD_BYTES	0x1F
 
 struct edp_vsc_psr {
-	struct edp_sdp_header sdp_header;
+	struct dp_sdp_header sdp_header;
 	u8 DB0; /* Stereo Interface */
 	u8 DB1; /* 0 - PSR State; 1 - Update RFB; 2 - CRC Valid */
 	u8 DB2; /* CRC value bits 7:0 of the R or Cr component */
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index d23dcdd..7e545f5 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -624,6 +624,8 @@
 void drm_dev_put(struct drm_device *dev);
 void drm_dev_unref(struct drm_device *dev);
 void drm_put_dev(struct drm_device *dev);
+bool drm_dev_enter(struct drm_device *dev, int *idx);
+void drm_dev_exit(int idx);
 void drm_dev_unplug(struct drm_device *dev);
 
 /**
@@ -635,11 +637,16 @@
  * unplugged, these two functions guarantee that any store before calling
  * drm_dev_unplug() is visible to callers of this function after it completes
  */
-static inline int drm_dev_is_unplugged(struct drm_device *dev)
+static inline bool drm_dev_is_unplugged(struct drm_device *dev)
 {
-	int ret = atomic_read(&dev->unplugged);
-	smp_rmb();
-	return ret;
+	int idx;
+
+	if (drm_dev_enter(dev, &idx)) {
+		drm_dev_exit(idx);
+		return false;
+	}
+
+	return true;
 }
 
 
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index 8d89a9c..b25d12e 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -465,8 +465,6 @@
 struct edid *drm_get_edid_switcheroo(struct drm_connector *connector,
 				     struct i2c_adapter *adapter);
 struct edid *drm_edid_duplicate(const struct edid *edid);
-void drm_reset_display_info(struct drm_connector *connector);
-u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid);
 int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid);
 
 u8 drm_match_cea_mode(const struct drm_display_mode *to_match);
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 5176c37..027ac16 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -47,6 +47,9 @@
  * header include loops we need it here for now.
  */
 
+/* Note that the order of this enum is ABI (it determines
+ * /dev/dri/renderD* numbers).
+ */
 enum drm_minor_type {
 	DRM_MINOR_PRIMARY,
 	DRM_MINOR_CONTROL,
@@ -182,6 +185,14 @@
 	unsigned atomic:1;
 
 	/**
+	 * @aspect_ratio_allowed:
+	 *
+	 * True, if client can handle picture aspect ratios, and has requested
+	 * to pass this information along with the mode.
+	 */
+	unsigned aspect_ratio_allowed:1;
+
+	/**
 	 * @is_master:
 	 *
 	 * This client is the creator of @master. Protected by struct
@@ -348,18 +359,6 @@
 	return file_priv->minor->type == DRM_MINOR_RENDER;
 }
 
-/**
- * drm_is_control_client - is this an open file of the control node
- * @file_priv: DRM file
- *
- * Control nodes are deprecated and in the process of getting removed from the
- * DRM userspace API. Do not ever use!
- */
-static inline bool drm_is_control_client(const struct drm_file *file_priv)
-{
-	return file_priv->minor->type == DRM_MINOR_CONTROL;
-}
-
 int drm_open(struct inode *inode, struct file *filp);
 ssize_t drm_read(struct file *filp, char __user *buffer,
 		 size_t count, loff_t *offset);
diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h
index 5ca7cdc..a38de7e 100644
--- a/include/drm/drm_gem_framebuffer_helper.h
+++ b/include/drm/drm_gem_framebuffer_helper.h
@@ -10,6 +10,7 @@
 struct drm_mode_fb_cmd2;
 struct drm_plane;
 struct drm_plane_state;
+struct drm_simple_display_pipe;
 
 struct drm_gem_object *drm_gem_fb_get_obj(struct drm_framebuffer *fb,
 					  unsigned int plane);
@@ -27,6 +28,8 @@
 
 int drm_gem_fb_prepare_fb(struct drm_plane *plane,
 			  struct drm_plane_state *state);
+int drm_gem_fb_simple_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
+					      struct drm_plane_state *plane_state);
 
 struct drm_framebuffer *
 drm_gem_fbdev_fb_create(struct drm_device *dev,
diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h
index add4280..fafb6f5 100644
--- a/include/drm/drm_ioctl.h
+++ b/include/drm/drm_ioctl.h
@@ -109,13 +109,6 @@
 	 */
 	DRM_ROOT_ONLY		= BIT(2),
 	/**
-	 * @DRM_CONTROL_ALLOW:
-	 *
-	 * Deprecated, do not use. Control nodes are in the process of getting
-	 * removed.
-	 */
-	DRM_CONTROL_ALLOW	= BIT(3),
-	/**
 	 * @DRM_UNLOCKED:
 	 *
 	 * Whether &drm_ioctl_desc.func should be called with the DRM BKL held
diff --git a/include/drm/drm_legacy.h b/include/drm/drm_legacy.h
index cf0e7d8..8fad66f 100644
--- a/include/drm/drm_legacy.h
+++ b/include/drm/drm_legacy.h
@@ -194,8 +194,8 @@
 void drm_legacy_ioremap_wc(struct drm_local_map *map, struct drm_device *dev);
 void drm_legacy_ioremapfree(struct drm_local_map *map, struct drm_device *dev);
 
-static __inline__ struct drm_local_map *drm_legacy_findmap(struct drm_device *dev,
-							   unsigned int token)
+static inline struct drm_local_map *drm_legacy_findmap(struct drm_device *dev,
+						       unsigned int token)
 {
 	struct drm_map_list *_entry;
 	list_for_each_entry(_entry, &dev->maplist, head)
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index 7569f22..33b3a96 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -796,6 +796,14 @@
 	bool allow_fb_modifiers;
 
 	/**
+	 * @normalize_zpos:
+	 *
+	 * If true the drm core will call drm_atomic_normalize_zpos() as part of
+	 * atomic mode checking from drm_atomic_helper_check()
+	 */
+	bool normalize_zpos;
+
+	/**
 	 * @modifiers_property: Plane property to list support modifier/format
 	 * combination.
 	 */
diff --git a/include/drm/drm_modes.h b/include/drm/drm_modes.h
index 0d310be..b159fe0 100644
--- a/include/drm/drm_modes.h
+++ b/include/drm/drm_modes.h
@@ -147,6 +147,12 @@
 
 #define DRM_MODE_FLAG_3D_MAX	DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF
 
+#define DRM_MODE_MATCH_TIMINGS (1 << 0)
+#define DRM_MODE_MATCH_CLOCK (1 << 1)
+#define DRM_MODE_MATCH_FLAGS (1 << 2)
+#define DRM_MODE_MATCH_3D_FLAGS (1 << 3)
+#define DRM_MODE_MATCH_ASPECT_RATIO (1 << 4)
+
 /**
  * struct drm_display_mode - DRM kernel-internal display mode structure
  * @hdisplay: horizontal display size
@@ -405,6 +411,19 @@
 	 * Field for setting the HDMI picture aspect ratio of a mode.
 	 */
 	enum hdmi_picture_aspect picture_aspect_ratio;
+
+	/**
+	 * @export_head:
+	 *
+	 * struct list_head for modes to be exposed to the userspace.
+	 * This is to maintain a list of exposed modes while preparing
+	 * user-mode's list in drm_mode_getconnector ioctl. The purpose of this
+	 * list_head only lies in the ioctl function, and is not expected to be
+	 * used outside the function.
+	 * Once used, the stale pointers are not reset, but left as it is, to
+	 * avoid overhead of protecting it by mode_config.mutex.
+	 */
+	struct list_head export_head;
 };
 
 /**
@@ -490,6 +509,9 @@
 		   const struct drm_display_mode *src);
 struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev,
 					    const struct drm_display_mode *mode);
+bool drm_mode_match(const struct drm_display_mode *mode1,
+		    const struct drm_display_mode *mode2,
+		    unsigned int match_flags);
 bool drm_mode_equal(const struct drm_display_mode *mode1,
 		    const struct drm_display_mode *mode2);
 bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1,
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index 3e76ca8..35e2a3a 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -1004,11 +1004,14 @@
 	 * This function must not block for outstanding rendering, since it is
 	 * called in the context of the atomic IOCTL even for async commits to
 	 * be able to return any errors to userspace. Instead the recommended
-	 * way is to fill out the fence member of the passed-in
+	 * way is to fill out the &drm_plane_state.fence of the passed-in
 	 * &drm_plane_state. If the driver doesn't support native fences then
 	 * equivalent functionality should be implemented through private
 	 * members in the plane structure.
 	 *
+	 * Drivers which always have their buffers pinned should use
+	 * drm_gem_fb_prepare_fb() for this hook.
+	 *
 	 * The helpers will call @cleanup_fb with matching arguments for every
 	 * successful call to this hook.
 	 *
diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h
index f7bf4a4..26fa50c 100644
--- a/include/drm/drm_plane.h
+++ b/include/drm/drm_plane.h
@@ -43,6 +43,7 @@
  *	plane (in 16.16)
  * @src_w: width of visible portion of plane (in 16.16)
  * @src_h: height of visible portion of plane (in 16.16)
+ * @alpha: opacity of the plane
  * @rotation: rotation of the plane
  * @zpos: priority of the given plane on crtc (optional)
  *	Note that multiple active planes on the same crtc can have an identical
@@ -51,8 +52,8 @@
  *	plane with a lower ID.
  * @normalized_zpos: normalized value of zpos: unique, range from 0 to N-1
  *	where N is the number of active planes for given crtc. Note that
- *	the driver must call drm_atomic_normalize_zpos() to update this before
- *	it can be trusted.
+ *	the driver must set drm_mode_config.normalize_zpos or call
+ *	drm_atomic_normalize_zpos() to update this before it can be trusted.
  * @src: clipped source coordinates of the plane (in 16.16)
  * @dst: clipped destination coordinates of the plane
  * @state: backpointer to global drm_atomic_state
@@ -79,8 +80,15 @@
 	/**
 	 * @fence:
 	 *
-	 * Optional fence to wait for before scanning out @fb. Do not write this
-	 * directly, use drm_atomic_set_fence_for_plane()
+	 * Optional fence to wait for before scanning out @fb. The core atomic
+	 * code will set this when userspace is using explicit fencing. Do not
+	 * write this directly for a driver's implicit fence, use
+	 * drm_atomic_set_fence_for_plane() to ensure that an explicit fence is
+	 * preserved.
+	 *
+	 * Drivers should store any implicit fence in this from their
+	 * &drm_plane_helper.prepare_fb callback. See drm_gem_fb_prepare_fb()
+	 * and drm_gem_fb_simple_display_pipe_prepare_fb() for suitable helpers.
 	 */
 	struct dma_fence *fence;
 
@@ -106,6 +114,9 @@
 	uint32_t src_x, src_y;
 	uint32_t src_h, src_w;
 
+	/* Plane opacity */
+	u16 alpha;
+
 	/* Plane rotation */
 	unsigned int rotation;
 
@@ -496,6 +507,7 @@
  * @funcs: helper functions
  * @properties: property tracking for this plane
  * @type: type of plane (overlay, primary, cursor)
+ * @alpha_property: alpha property for this plane
  * @zpos_property: zpos property for this plane
  * @rotation_property: rotation property for this plane
  * @helper_private: mid-layer private data
@@ -571,6 +583,7 @@
 	 */
 	struct drm_plane_state *state;
 
+	struct drm_property *alpha_property;
 	struct drm_property *zpos_property;
 	struct drm_property *rotation_property;
 
diff --git a/include/drm/drm_property.h b/include/drm/drm_property.h
index d1423c7f..1d5c0b2 100644
--- a/include/drm/drm_property.h
+++ b/include/drm/drm_property.h
@@ -260,7 +260,7 @@
 						uint32_t type);
 struct drm_property *drm_property_create_bool(struct drm_device *dev,
 					      u32 flags, const char *name);
-int drm_property_add_enum(struct drm_property *property, int index,
+int drm_property_add_enum(struct drm_property *property,
 			  uint64_t value, const char *name);
 void drm_property_destroy(struct drm_device *dev, struct drm_property *property);
 
@@ -281,32 +281,6 @@
 void drm_property_blob_put(struct drm_property_blob *blob);
 
 /**
- * drm_property_reference_blob - acquire a blob property reference
- * @blob: DRM blob property
- *
- * This is a compatibility alias for drm_property_blob_get() and should not be
- * used by new code.
- */
-static inline struct drm_property_blob *
-drm_property_reference_blob(struct drm_property_blob *blob)
-{
-	return drm_property_blob_get(blob);
-}
-
-/**
- * drm_property_unreference_blob - release a blob property reference
- * @blob: DRM blob property
- *
- * This is a compatibility alias for drm_property_blob_put() and should not be
- * used by new code.
- */
-static inline void
-drm_property_unreference_blob(struct drm_property_blob *blob)
-{
-	drm_property_blob_put(blob);
-}
-
-/**
  * drm_property_find - find property object
  * @dev: DRM device
  * @file_priv: drm file to check for lease against.
diff --git a/include/drm/drm_rect.h b/include/drm/drm_rect.h
index 44bc122..6c54544 100644
--- a/include/drm/drm_rect.h
+++ b/include/drm/drm_rect.h
@@ -175,8 +175,7 @@
 
 bool drm_rect_intersect(struct drm_rect *r, const struct drm_rect *clip);
 bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst,
-			  const struct drm_rect *clip,
-			  int hscale, int vscale);
+			  const struct drm_rect *clip);
 int drm_rect_calc_hscale(const struct drm_rect *src,
 			 const struct drm_rect *dst,
 			 int min_hscale, int max_hscale);
diff --git a/include/drm/drm_simple_kms_helper.h b/include/drm/drm_simple_kms_helper.h
index 1b4e352..4519604 100644
--- a/include/drm/drm_simple_kms_helper.h
+++ b/include/drm/drm_simple_kms_helper.h
@@ -64,7 +64,8 @@
 	 * This hook is optional.
 	 */
 	void (*enable)(struct drm_simple_display_pipe *pipe,
-		       struct drm_crtc_state *crtc_state);
+		       struct drm_crtc_state *crtc_state,
+		       struct drm_plane_state *plane_state);
 	/**
 	 * @disable:
 	 *
@@ -115,6 +116,9 @@
 	 * Optional, called by &drm_plane_helper_funcs.prepare_fb.  Please read
 	 * the documentation for the &drm_plane_helper_funcs.prepare_fb hook for
 	 * more details.
+	 *
+	 * Drivers which always have their buffers pinned should use
+	 * drm_gem_fb_simple_display_pipe_prepare_fb() for this hook.
 	 */
 	int (*prepare_fb)(struct drm_simple_display_pipe *pipe,
 			  struct drm_plane_state *plane_state);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index dfd54fb..dec6558 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -43,10 +43,12 @@
 };
 
 /**
- * A scheduler entity is a wrapper around a job queue or a group
- * of other entities. Entities take turns emitting jobs from their
- * job queues to corresponding hardware ring based on scheduling
- * policy.
+ * drm_sched_entity - A wrapper around a job queue (typically attached
+ * to the DRM file_priv).
+ *
+ * Entities will emit jobs in order to their corresponding hardware
+ * ring, and the scheduler will alternate between entities based on
+ * scheduling policy.
 */
 struct drm_sched_entity {
 	struct list_head		list;
@@ -54,7 +56,6 @@
 	spinlock_t			rq_lock;
 	struct drm_gpu_scheduler	*sched;
 
-	spinlock_t			queue_lock;
 	struct spsc_queue		job_queue;
 
 	atomic_t			fence_seq;
@@ -63,6 +64,8 @@
 	struct dma_fence		*dependency;
 	struct dma_fence_cb		cb;
 	atomic_t			*guilty; /* points to ctx's guilty */
+	int            fini_status;
+	struct dma_fence    *last_scheduled;
 };
 
 /**
@@ -78,7 +81,18 @@
 
 struct drm_sched_fence {
 	struct dma_fence		scheduled;
+
+	/* This fence is what will be signaled by the scheduler when
+	 * the job is completed.
+	 *
+	 * When setting up an out fence for the job, you should use
+	 * this, since it's available immediately upon
+	 * drm_sched_job_init(), and the fence returned by the driver
+	 * from run_job() won't be created until the dependencies have
+	 * resolved.
+	 */
 	struct dma_fence		finished;
+
 	struct dma_fence_cb		cb;
 	struct dma_fence		*parent;
 	struct drm_gpu_scheduler	*sched;
@@ -88,6 +102,13 @@
 
 struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
 
+/**
+ * drm_sched_job - A job to be run by an entity.
+ *
+ * A job is created by the driver using drm_sched_job_init(), and
+ * should call drm_sched_entity_push_job() once it wants the scheduler
+ * to schedule the job.
+ */
 struct drm_sched_job {
 	struct spsc_node		queue_node;
 	struct drm_gpu_scheduler	*sched;
@@ -99,6 +120,7 @@
 	uint64_t			id;
 	atomic_t			karma;
 	enum drm_sched_priority		s_priority;
+	struct drm_sched_entity  *entity;
 };
 
 static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
@@ -112,10 +134,28 @@
  * these functions should be implemented in driver side
 */
 struct drm_sched_backend_ops {
+	/* Called when the scheduler is considering scheduling this
+	 * job next, to get another struct dma_fence for this job to
+	 * block on.  Once it returns NULL, run_job() may be called.
+	 */
 	struct dma_fence *(*dependency)(struct drm_sched_job *sched_job,
 					struct drm_sched_entity *s_entity);
+
+	/* Called to execute the job once all of the dependencies have
+	 * been resolved.  This may be called multiple times, if
+	 * timedout_job() has happened and drm_sched_job_recovery()
+	 * decides to try it again.
+	 */
 	struct dma_fence *(*run_job)(struct drm_sched_job *sched_job);
+
+	/* Called when a job has taken too long to execute, to trigger
+	 * GPU recovery.
+	 */
 	void (*timedout_job)(struct drm_sched_job *sched_job);
+
+	/* Called once the job's finished fence has been signaled and
+	 * it's time to clean it up.
+	 */
 	void (*free_job)(struct drm_sched_job *sched_job);
 };
 
@@ -147,7 +187,11 @@
 int drm_sched_entity_init(struct drm_gpu_scheduler *sched,
 			  struct drm_sched_entity *entity,
 			  struct drm_sched_rq *rq,
-			  uint32_t jobs, atomic_t *guilty);
+			  atomic_t *guilty);
+void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched,
+			   struct drm_sched_entity *entity);
+void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched,
+			   struct drm_sched_entity *entity);
 void drm_sched_entity_fini(struct drm_gpu_scheduler *sched,
 			   struct drm_sched_entity *entity);
 void drm_sched_entity_push_job(struct drm_sched_job *sched_job,
diff --git a/include/drm/gpu_scheduler_trace.h b/include/drm/gpu_scheduler_trace.h
deleted file mode 100644
index 0789e8d..0000000
--- a/include/drm/gpu_scheduler_trace.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#if !defined(_GPU_SCHED_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _GPU_SCHED_TRACE_H_
-
-#include <linux/stringify.h>
-#include <linux/types.h>
-#include <linux/tracepoint.h>
-
-#include <drm/drmP.h>
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM gpu_scheduler
-#define TRACE_INCLUDE_FILE gpu_scheduler_trace
-
-TRACE_EVENT(drm_sched_job,
-	    TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
-	    TP_ARGS(sched_job, entity),
-	    TP_STRUCT__entry(
-			     __field(struct drm_sched_entity *, entity)
-			     __field(struct dma_fence *, fence)
-			     __field(const char *, name)
-			     __field(uint64_t, id)
-			     __field(u32, job_count)
-			     __field(int, hw_job_count)
-			     ),
-
-	    TP_fast_assign(
-			   __entry->entity = entity;
-			   __entry->id = sched_job->id;
-			   __entry->fence = &sched_job->s_fence->finished;
-			   __entry->name = sched_job->sched->name;
-			   __entry->job_count = spsc_queue_count(&entity->job_queue);
-			   __entry->hw_job_count = atomic_read(
-				   &sched_job->sched->hw_rq_count);
-			   ),
-	    TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
-		      __entry->entity, __entry->id,
-		      __entry->fence, __entry->name,
-		      __entry->job_count, __entry->hw_job_count)
-);
-
-TRACE_EVENT(drm_sched_process_job,
-	    TP_PROTO(struct drm_sched_fence *fence),
-	    TP_ARGS(fence),
-	    TP_STRUCT__entry(
-		    __field(struct dma_fence *, fence)
-		    ),
-
-	    TP_fast_assign(
-		    __entry->fence = &fence->finished;
-		    ),
-	    TP_printk("fence=%p signaled", __entry->fence)
-);
-
-#endif
-
-/* This part must be outside protection */
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#include <trace/define_trace.h>
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 70f0c25..bab70ff 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -349,6 +349,7 @@
 #define INTEL_KBL_GT2_IDS(info)	\
 	INTEL_VGA_DEVICE(0x5916, info), /* ULT GT2 */ \
 	INTEL_VGA_DEVICE(0x5917, info), /* Mobile GT2 */ \
+	INTEL_VGA_DEVICE(0x591C, info), /* ULX GT2 */ \
 	INTEL_VGA_DEVICE(0x5921, info), /* ULT GT2F */ \
 	INTEL_VGA_DEVICE(0x591E, info), /* ULX GT2 */ \
 	INTEL_VGA_DEVICE(0x5912, info), /* DT  GT2 */ \
diff --git a/include/drm/tinydrm/mipi-dbi.h b/include/drm/tinydrm/mipi-dbi.h
index 44e824a..b8ba588 100644
--- a/include/drm/tinydrm/mipi-dbi.h
+++ b/include/drm/tinydrm/mipi-dbi.h
@@ -67,7 +67,9 @@
 		  const struct drm_simple_display_pipe_funcs *pipe_funcs,
 		  struct drm_driver *driver,
 		  const struct drm_display_mode *mode, unsigned int rotation);
-void mipi_dbi_enable_flush(struct mipi_dbi *mipi);
+void mipi_dbi_enable_flush(struct mipi_dbi *mipi,
+			   struct drm_crtc_state *crtc_state,
+			   struct drm_plane_state *plan_state);
 void mipi_dbi_pipe_disable(struct drm_simple_display_pipe *pipe);
 void mipi_dbi_hw_reset(struct mipi_dbi *mipi);
 bool mipi_dbi_display_is_on(struct mipi_dbi *mipi);
diff --git a/include/drm/tinydrm/tinydrm-helpers.h b/include/drm/tinydrm/tinydrm-helpers.h
index 0a4ddbc..5b96f0b 100644
--- a/include/drm/tinydrm/tinydrm-helpers.h
+++ b/include/drm/tinydrm/tinydrm-helpers.h
@@ -36,6 +36,11 @@
 bool tinydrm_merge_clips(struct drm_clip_rect *dst,
 			 struct drm_clip_rect *src, unsigned int num_clips,
 			 unsigned int flags, u32 max_width, u32 max_height);
+int tinydrm_fb_dirty(struct drm_framebuffer *fb,
+		     struct drm_file *file_priv,
+		     unsigned int flags, unsigned int color,
+		     struct drm_clip_rect *clips,
+		     unsigned int num_clips);
 void tinydrm_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
 		    struct drm_clip_rect *clip);
 void tinydrm_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb,
diff --git a/include/drm/tinydrm/tinydrm.h b/include/drm/tinydrm/tinydrm.h
index 07a9a11..56e4a91 100644
--- a/include/drm/tinydrm/tinydrm.h
+++ b/include/drm/tinydrm/tinydrm.h
@@ -26,6 +26,10 @@
 	struct drm_simple_display_pipe pipe;
 	struct mutex dirty_lock;
 	const struct drm_framebuffer_funcs *fb_funcs;
+	int (*fb_dirty)(struct drm_framebuffer *framebuffer,
+			struct drm_file *file_priv, unsigned flags,
+			unsigned color, struct drm_clip_rect *clips,
+			unsigned num_clips);
 };
 
 static inline struct tinydrm_device *
@@ -41,7 +45,7 @@
  * the &drm_driver structure.
  */
 #define TINYDRM_GEM_DRIVER_OPS \
-	.gem_free_object	= tinydrm_gem_cma_free_object, \
+	.gem_free_object_unlocked = tinydrm_gem_cma_free_object, \
 	.gem_print_info		= drm_gem_cma_print_info, \
 	.gem_vm_ops		= &drm_gem_cma_vm_ops, \
 	.prime_handle_to_fd	= drm_gem_prime_handle_to_fd, \
@@ -91,8 +95,6 @@
 
 void tinydrm_display_pipe_update(struct drm_simple_display_pipe *pipe,
 				 struct drm_plane_state *old_state);
-int tinydrm_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
-				    struct drm_plane_state *plane_state);
 int
 tinydrm_display_pipe_init(struct tinydrm_device *tdev,
 			  const struct drm_simple_display_pipe_funcs *funcs,
diff --git a/include/dt-bindings/dma/jz4780-dma.h b/include/dt-bindings/dma/jz4780-dma.h
new file mode 100644
index 0000000..df017fd
--- /dev/null
+++ b/include/dt-bindings/dma/jz4780-dma.h
@@ -0,0 +1,49 @@
+#ifndef __DT_BINDINGS_DMA_JZ4780_DMA_H__
+#define __DT_BINDINGS_DMA_JZ4780_DMA_H__
+
+/*
+ * Request type numbers for the JZ4780 DMA controller (written to the DRTn
+ * register for the channel).
+ */
+#define JZ4780_DMA_I2S1_TX	0x4
+#define JZ4780_DMA_I2S1_RX	0x5
+#define JZ4780_DMA_I2S0_TX	0x6
+#define JZ4780_DMA_I2S0_RX	0x7
+#define JZ4780_DMA_AUTO		0x8
+#define JZ4780_DMA_SADC_RX	0x9
+#define JZ4780_DMA_UART4_TX	0xc
+#define JZ4780_DMA_UART4_RX	0xd
+#define JZ4780_DMA_UART3_TX	0xe
+#define JZ4780_DMA_UART3_RX	0xf
+#define JZ4780_DMA_UART2_TX	0x10
+#define JZ4780_DMA_UART2_RX	0x11
+#define JZ4780_DMA_UART1_TX	0x12
+#define JZ4780_DMA_UART1_RX	0x13
+#define JZ4780_DMA_UART0_TX	0x14
+#define JZ4780_DMA_UART0_RX	0x15
+#define JZ4780_DMA_SSI0_TX	0x16
+#define JZ4780_DMA_SSI0_RX	0x17
+#define JZ4780_DMA_SSI1_TX	0x18
+#define JZ4780_DMA_SSI1_RX	0x19
+#define JZ4780_DMA_MSC0_TX	0x1a
+#define JZ4780_DMA_MSC0_RX	0x1b
+#define JZ4780_DMA_MSC1_TX	0x1c
+#define JZ4780_DMA_MSC1_RX	0x1d
+#define JZ4780_DMA_MSC2_TX	0x1e
+#define JZ4780_DMA_MSC2_RX	0x1f
+#define JZ4780_DMA_PCM0_TX	0x20
+#define JZ4780_DMA_PCM0_RX	0x21
+#define JZ4780_DMA_SMB0_TX	0x24
+#define JZ4780_DMA_SMB0_RX	0x25
+#define JZ4780_DMA_SMB1_TX	0x26
+#define JZ4780_DMA_SMB1_RX	0x27
+#define JZ4780_DMA_SMB2_TX	0x28
+#define JZ4780_DMA_SMB2_RX	0x29
+#define JZ4780_DMA_SMB3_TX	0x2a
+#define JZ4780_DMA_SMB3_RX	0x2b
+#define JZ4780_DMA_SMB4_TX	0x2c
+#define JZ4780_DMA_SMB4_RX	0x2d
+#define JZ4780_DMA_DES_TX	0x2e
+#define JZ4780_DMA_DES_RX	0x2f
+
+#endif /* __DT_BINDINGS_DMA_JZ4780_DMA_H__ */
diff --git a/include/dt-bindings/phy/phy-qcom-qusb2.h b/include/dt-bindings/phy/phy-qcom-qusb2.h
new file mode 100644
index 0000000..5c5e4d8
--- /dev/null
+++ b/include/dt-bindings/phy/phy-qcom-qusb2.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_QCOM_PHY_QUSB2_H_
+#define _DT_BINDINGS_QCOM_PHY_QUSB2_H_
+
+/* PHY HSTX TRIM bit values (24mA to 15mA) */
+#define QUSB2_V2_HSTX_TRIM_24_0_MA		0x0
+#define QUSB2_V2_HSTX_TRIM_23_4_MA		0x1
+#define QUSB2_V2_HSTX_TRIM_22_8_MA		0x2
+#define QUSB2_V2_HSTX_TRIM_22_2_MA		0x3
+#define QUSB2_V2_HSTX_TRIM_21_6_MA		0x4
+#define QUSB2_V2_HSTX_TRIM_21_0_MA		0x5
+#define QUSB2_V2_HSTX_TRIM_20_4_MA		0x6
+#define QUSB2_V2_HSTX_TRIM_19_8_MA		0x7
+#define QUSB2_V2_HSTX_TRIM_19_2_MA		0x8
+#define QUSB2_V2_HSTX_TRIM_18_6_MA		0x9
+#define QUSB2_V2_HSTX_TRIM_18_0_MA		0xa
+#define QUSB2_V2_HSTX_TRIM_17_4_MA		0xb
+#define QUSB2_V2_HSTX_TRIM_16_8_MA		0xc
+#define QUSB2_V2_HSTX_TRIM_16_2_MA		0xd
+#define QUSB2_V2_HSTX_TRIM_15_6_MA		0xe
+#define QUSB2_V2_HSTX_TRIM_15_0_MA		0xf
+
+/* PHY PREEMPHASIS bit values */
+#define QUSB2_V2_PREEMPHASIS_NONE		0
+#define QUSB2_V2_PREEMPHASIS_5_PERCENT		1
+#define QUSB2_V2_PREEMPHASIS_10_PERCENT		2
+#define QUSB2_V2_PREEMPHASIS_15_PERCENT		3
+
+/* PHY PREEMPHASIS-WIDTH bit values */
+#define QUSB2_V2_PREEMPHASIS_WIDTH_FULL_BIT	0
+#define QUSB2_V2_PREEMPHASIS_WIDTH_HALF_BIT	1
+
+#endif
diff --git a/include/dt-bindings/soc/qcom,apr.h b/include/dt-bindings/soc/qcom,apr.h
new file mode 100644
index 0000000..0063624
--- /dev/null
+++ b/include/dt-bindings/soc/qcom,apr.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DT_BINDINGS_QCOM_APR_H
+#define __DT_BINDINGS_QCOM_APR_H
+
+/* Domain IDs */
+#define APR_DOMAIN_SIM		0x1
+#define APR_DOMAIN_PC		0x2
+#define APR_DOMAIN_MODEM	0x3
+#define APR_DOMAIN_ADSP		0x4
+#define APR_DOMAIN_APPS		0x5
+#define APR_DOMAIN_MAX		0x6
+
+/* ADSP service IDs */
+#define APR_SVC_ADSP_CORE	0x3
+#define APR_SVC_AFE		0x4
+#define APR_SVC_VSM		0x5
+#define APR_SVC_VPM		0x6
+#define APR_SVC_ASM		0x7
+#define APR_SVC_ADM		0x8
+#define APR_SVC_ADSP_MVM	0x09
+#define APR_SVC_ADSP_CVS	0x0A
+#define APR_SVC_ADSP_CVP	0x0B
+#define APR_SVC_USM		0x0C
+#define APR_SVC_LSM		0x0D
+#define APR_SVC_VIDC		0x16
+#define APR_SVC_MAX		0x17
+
+#endif /* __DT_BINDINGS_QCOM_APR_H */
diff --git a/include/dt-bindings/sound/fsl-imx-audmux.h b/include/dt-bindings/sound/fsl-imx-audmux.h
index 751fe14..15f138b 100644
--- a/include/dt-bindings/sound/fsl-imx-audmux.h
+++ b/include/dt-bindings/sound/fsl-imx-audmux.h
@@ -25,6 +25,13 @@
 #define MX51_AUDMUX_PORT6		5
 #define MX51_AUDMUX_PORT7		6
 
+/*
+ * TFCSEL/RFCSEL (i.MX27) or TFSEL/TCSEL/RFSEL/RCSEL (i.MX31/51/53/6Q)
+ * can be sourced from Rx/Tx.
+ */
+#define IMX_AUDMUX_RXFS			0x8
+#define IMX_AUDMUX_RXCLK		0x8
+
 /* Register definitions for the i.MX21/27 Digital Audio Multiplexer */
 #define IMX_AUDMUX_V1_PCR_INMMASK(x)	((x) & 0xff)
 #define IMX_AUDMUX_V1_PCR_INMEN		(1 << 8)
diff --git a/include/dt-bindings/sound/qcom,q6afe.h b/include/dt-bindings/sound/qcom,q6afe.h
new file mode 100644
index 0000000..e2d3892
--- /dev/null
+++ b/include/dt-bindings/sound/qcom,q6afe.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DT_BINDINGS_Q6_AFE_H__
+#define __DT_BINDINGS_Q6_AFE_H__
+
+/* Audio Front End (AFE) virtual ports IDs */
+#define HDMI_RX		1
+#define SLIMBUS_0_RX    2
+#define SLIMBUS_0_TX    3
+#define SLIMBUS_1_RX    4
+#define SLIMBUS_1_TX    5
+#define SLIMBUS_2_RX    6
+#define SLIMBUS_2_TX    7
+#define SLIMBUS_3_RX    8
+#define SLIMBUS_3_TX    9
+#define SLIMBUS_4_RX    10
+#define SLIMBUS_4_TX    11
+#define SLIMBUS_5_RX    12
+#define SLIMBUS_5_TX    13
+#define SLIMBUS_6_RX    14
+#define SLIMBUS_6_TX    15
+#define PRIMARY_MI2S_RX		16
+#define PRIMARY_MI2S_TX		17
+#define SECONDARY_MI2S_RX	18
+#define SECONDARY_MI2S_TX	19
+#define TERTIARY_MI2S_RX	20
+#define TERTIARY_MI2S_TX	21
+#define QUATERNARY_MI2S_RX	22
+#define QUATERNARY_MI2S_TX	23
+#define PRIMARY_TDM_RX_0	24
+#define PRIMARY_TDM_TX_0	25
+#define PRIMARY_TDM_RX_1	26
+#define PRIMARY_TDM_TX_1	27
+#define PRIMARY_TDM_RX_2	28
+#define PRIMARY_TDM_TX_2	29
+#define PRIMARY_TDM_RX_3	30
+#define PRIMARY_TDM_TX_3	31
+#define PRIMARY_TDM_RX_4	32
+#define PRIMARY_TDM_TX_4	33
+#define PRIMARY_TDM_RX_5	34
+#define PRIMARY_TDM_TX_5	35
+#define PRIMARY_TDM_RX_6	36
+#define PRIMARY_TDM_TX_6	37
+#define PRIMARY_TDM_RX_7	38
+#define PRIMARY_TDM_TX_7	39
+#define SECONDARY_TDM_RX_0	40
+#define SECONDARY_TDM_TX_0	41
+#define SECONDARY_TDM_RX_1	42
+#define SECONDARY_TDM_TX_1	43
+#define SECONDARY_TDM_RX_2	44
+#define SECONDARY_TDM_TX_2	45
+#define SECONDARY_TDM_RX_3	46
+#define SECONDARY_TDM_TX_3	47
+#define SECONDARY_TDM_RX_4	48
+#define SECONDARY_TDM_TX_4	49
+#define SECONDARY_TDM_RX_5	50
+#define SECONDARY_TDM_TX_5	51
+#define SECONDARY_TDM_RX_6	52
+#define SECONDARY_TDM_TX_6	53
+#define SECONDARY_TDM_RX_7	54
+#define SECONDARY_TDM_TX_7	55
+#define TERTIARY_TDM_RX_0	56
+#define TERTIARY_TDM_TX_0	57
+#define TERTIARY_TDM_RX_1	58
+#define TERTIARY_TDM_TX_1	59
+#define TERTIARY_TDM_RX_2	60
+#define TERTIARY_TDM_TX_2	61
+#define TERTIARY_TDM_RX_3	62
+#define TERTIARY_TDM_TX_3	63
+#define TERTIARY_TDM_RX_4	64
+#define TERTIARY_TDM_TX_4	65
+#define TERTIARY_TDM_RX_5	66
+#define TERTIARY_TDM_TX_5	67
+#define TERTIARY_TDM_RX_6	68
+#define TERTIARY_TDM_TX_6	69
+#define TERTIARY_TDM_RX_7	70
+#define TERTIARY_TDM_TX_7	71
+#define QUATERNARY_TDM_RX_0	72
+#define QUATERNARY_TDM_TX_0	73
+#define QUATERNARY_TDM_RX_1	74
+#define QUATERNARY_TDM_TX_1	75
+#define QUATERNARY_TDM_RX_2	76
+#define QUATERNARY_TDM_TX_2	77
+#define QUATERNARY_TDM_RX_3	78
+#define QUATERNARY_TDM_TX_3	79
+#define QUATERNARY_TDM_RX_4	80
+#define QUATERNARY_TDM_TX_4	81
+#define QUATERNARY_TDM_RX_5	82
+#define QUATERNARY_TDM_TX_5	83
+#define QUATERNARY_TDM_RX_6	84
+#define QUATERNARY_TDM_TX_6	85
+#define QUATERNARY_TDM_RX_7	86
+#define QUATERNARY_TDM_TX_7	87
+#define QUINARY_TDM_RX_0	88
+#define QUINARY_TDM_TX_0	89
+#define QUINARY_TDM_RX_1	90
+#define QUINARY_TDM_TX_1	91
+#define QUINARY_TDM_RX_2	92
+#define QUINARY_TDM_TX_2	93
+#define QUINARY_TDM_RX_3	94
+#define QUINARY_TDM_TX_3	95
+#define QUINARY_TDM_RX_4	96
+#define QUINARY_TDM_TX_4	97
+#define QUINARY_TDM_RX_5	98
+#define QUINARY_TDM_TX_5	99
+#define QUINARY_TDM_RX_6	100
+#define QUINARY_TDM_TX_6	101
+#define QUINARY_TDM_RX_7	102
+#define QUINARY_TDM_TX_7	103
+
+#endif /* __DT_BINDINGS_Q6_AFE_H__ */
+
diff --git a/include/dt-bindings/sound/qcom,q6asm.h b/include/dt-bindings/sound/qcom,q6asm.h
new file mode 100644
index 0000000..1eb77d8
--- /dev/null
+++ b/include/dt-bindings/sound/qcom,q6asm.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DT_BINDINGS_Q6_ASM_H__
+#define __DT_BINDINGS_Q6_ASM_H__
+
+#define	MSM_FRONTEND_DAI_MULTIMEDIA1	0
+#define	MSM_FRONTEND_DAI_MULTIMEDIA2	1
+#define	MSM_FRONTEND_DAI_MULTIMEDIA3	2
+#define	MSM_FRONTEND_DAI_MULTIMEDIA4	3
+#define	MSM_FRONTEND_DAI_MULTIMEDIA5	4
+#define	MSM_FRONTEND_DAI_MULTIMEDIA6	5
+#define	MSM_FRONTEND_DAI_MULTIMEDIA7	6
+#define	MSM_FRONTEND_DAI_MULTIMEDIA8	7
+#define	MSM_FRONTEND_DAI_MULTIMEDIA9	8
+#define	MSM_FRONTEND_DAI_MULTIMEDIA10	9
+#define	MSM_FRONTEND_DAI_MULTIMEDIA11	10
+#define	MSM_FRONTEND_DAI_MULTIMEDIA12	11
+#define	MSM_FRONTEND_DAI_MULTIMEDIA13	12
+#define	MSM_FRONTEND_DAI_MULTIMEDIA14	13
+#define	MSM_FRONTEND_DAI_MULTIMEDIA15	14
+#define	MSM_FRONTEND_DAI_MULTIMEDIA16	15
+
+#endif /* __DT_BINDINGS_Q6_ASM_H__ */
diff --git a/include/dt-bindings/sound/rt5640.h b/include/dt-bindings/sound/rt5640.h
new file mode 100644
index 0000000..154c9b4
--- /dev/null
+++ b/include/dt-bindings/sound/rt5640.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DT_RT5640_H
+#define __DT_RT5640_H
+
+#define RT5640_DMIC1_DATA_PIN_NONE	0
+#define RT5640_DMIC1_DATA_PIN_IN1P	1
+#define RT5640_DMIC1_DATA_PIN_GPIO3	2
+
+#define RT5640_DMIC2_DATA_PIN_NONE	0
+#define RT5640_DMIC2_DATA_PIN_IN1N	1
+#define RT5640_DMIC2_DATA_PIN_GPIO4	2
+
+#define RT5640_JD_SRC_GPIO1		1
+#define RT5640_JD_SRC_JD1_IN4P		2
+#define RT5640_JD_SRC_JD2_IN4N		3
+#define RT5640_JD_SRC_GPIO2		4
+#define RT5640_JD_SRC_GPIO3		5
+#define RT5640_JD_SRC_GPIO4		6
+
+#define RT5640_OVCD_SF_0P5		0
+#define RT5640_OVCD_SF_0P75		1
+#define RT5640_OVCD_SF_1P0		2
+#define RT5640_OVCD_SF_1P5		3
+
+#endif /* __DT_RT5640_H */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 15bfb15..fd0ea6a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -578,6 +578,7 @@
 
 extern void acpi_early_init(void);
 extern void acpi_subsystem_init(void);
+extern void arch_post_acpi_subsys_init(void);
 
 extern int acpi_nvs_register(__u64 start, __u64 size);
 
@@ -899,7 +900,7 @@
 static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; }
 static inline int acpi_dev_pm_attach(struct device *dev, bool power_on)
 {
-	return -ENODEV;
+	return 0;
 }
 #endif
 
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 9d8aabe..b83e68d 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -8,8 +8,6 @@
 struct kiocb;
 struct mm_struct;
 
-#define KIOCB_KEY		0
-
 typedef int (kiocb_cancel_fn)(struct kiocb *);
 
 /* prototypes */
diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 4037392..23f8055 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -145,7 +145,12 @@
 
 extern struct atalk_route atrtr_default;
 
-extern const struct file_operations atalk_seq_arp_fops;
+struct aarp_iter_state {
+	int bucket;
+	struct aarp_entry **table;
+};
+
+extern const struct seq_operations aarp_seq_ops;
 
 extern int sysctl_aarp_expiry_time;
 extern int sysctl_aarp_tick_time;
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 8b276fd..01ce399 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -654,6 +654,7 @@
 }
 #endif
 
+#define atomic_cond_read_relaxed(v, c)	smp_cond_load_relaxed(&(v)->counter, (c))
 #define atomic_cond_read_acquire(v, c)	smp_cond_load_acquire(&(v)->counter, (c))
 
 #ifdef CONFIG_GENERIC_ATOMIC64
@@ -1075,6 +1076,7 @@
 }
 #endif
 
+#define atomic64_cond_read_relaxed(v, c)	smp_cond_load_relaxed(&(v)->counter, (c))
 #define atomic64_cond_read_acquire(v, c)	smp_cond_load_acquire(&(v)->counter, (c))
 
 #include <asm-generic/atomic-long.h>
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 75d5b03..69c7847 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -232,12 +232,24 @@
 extern void __audit_inode_child(struct inode *parent,
 				const struct dentry *dentry,
 				const unsigned char type);
-extern void __audit_seccomp(unsigned long syscall, long signr, int code);
+extern void audit_seccomp(unsigned long syscall, long signr, int code);
+extern void audit_seccomp_actions_logged(const char *names,
+					 const char *old_names, int res);
 extern void __audit_ptrace(struct task_struct *t);
 
+static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx)
+{
+	task->audit_context = ctx;
+}
+
+static inline struct audit_context *audit_context(void)
+{
+	return current->audit_context;
+}
+
 static inline bool audit_dummy_context(void)
 {
-	void *p = current->audit_context;
+	void *p = audit_context();
 	return !p || *(int *)p;
 }
 static inline void audit_free(struct task_struct *task)
@@ -249,12 +261,12 @@
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
 {
-	if (unlikely(current->audit_context))
+	if (unlikely(audit_context()))
 		__audit_syscall_entry(major, a0, a1, a2, a3);
 }
 static inline void audit_syscall_exit(void *pt_regs)
 {
-	if (unlikely(current->audit_context)) {
+	if (unlikely(audit_context())) {
 		int success = is_syscall_success(pt_regs);
 		long return_code = regs_return_value(pt_regs);
 
@@ -302,12 +314,6 @@
 }
 void audit_core_dumps(long signr);
 
-static inline void audit_seccomp(unsigned long syscall, long signr, int code)
-{
-	if (audit_enabled && unlikely(!audit_dummy_context()))
-		__audit_seccomp(syscall, signr, code);
-}
-
 static inline void audit_ptrace(struct task_struct *t)
 {
 	if (unlikely(!audit_dummy_context()))
@@ -468,6 +474,12 @@
 {
 	return true;
 }
+static inline void audit_set_context(struct task_struct *task, struct audit_context *ctx)
+{ }
+static inline struct audit_context *audit_context(void)
+{
+	return NULL;
+}
 static inline struct filename *audit_reusename(const __user char *name)
 {
 	return NULL;
@@ -498,10 +510,11 @@
 { }
 static inline void audit_core_dumps(long signr)
 { }
-static inline void __audit_seccomp(unsigned long syscall, long signr, int code)
-{ }
 static inline void audit_seccomp(unsigned long syscall, long signr, int code)
 { }
+static inline void audit_seccomp_actions_logged(const char *names,
+						const char *old_names, int res)
+{ }
 static inline int auditsc_get_stamp(struct audit_context *ctx,
 			      struct timespec64 *t, unsigned int *serial)
 {
@@ -513,7 +526,7 @@
 }
 static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
 {
-	return -1;
+	return AUDIT_SID_UNSET;
 }
 static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 { }
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ce547a2..397a38a 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -67,8 +67,12 @@
 
 #define bio_multiple_segments(bio)				\
 	((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len)
-#define bio_sectors(bio)	((bio)->bi_iter.bi_size >> 9)
-#define bio_end_sector(bio)	((bio)->bi_iter.bi_sector + bio_sectors((bio)))
+
+#define bvec_iter_sectors(iter)	((iter).bi_size >> 9)
+#define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter)))
+
+#define bio_sectors(bio)	bvec_iter_sectors((bio)->bi_iter)
+#define bio_end_sector(bio)	bvec_iter_end_sector((bio)->bi_iter)
 
 /*
  * Return the data direction, READ or WRITE.
@@ -123,6 +127,11 @@
 	return NULL;
 }
 
+static inline bool bio_full(struct bio *bio)
+{
+	return bio->bi_vcnt >= bio->bi_max_vecs;
+}
+
 /*
  * will die
  */
@@ -406,13 +415,13 @@
 	return bio_split(bio, sectors, gfp, bs);
 }
 
-extern struct bio_set *bioset_create(unsigned int, unsigned int, int flags);
 enum {
 	BIOSET_NEED_BVECS = BIT(0),
 	BIOSET_NEED_RESCUER = BIT(1),
 };
-extern void bioset_free(struct bio_set *);
-extern mempool_t *biovec_create_pool(int pool_entries);
+extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags);
+extern void bioset_exit(struct bio_set *);
+extern int biovec_init_pool(mempool_t *pool, int pool_entries);
 
 extern struct bio *bio_alloc_bioset(gfp_t, unsigned int, struct bio_set *);
 extern void bio_put(struct bio *);
@@ -421,11 +430,11 @@
 extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
 extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
 
-extern struct bio_set *fs_bio_set;
+extern struct bio_set fs_bio_set;
 
 static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 {
-	return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
+	return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set);
 }
 
 static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
@@ -470,6 +479,10 @@
 extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
+bool __bio_try_merge_page(struct bio *bio, struct page *page,
+		unsigned int len, unsigned int off);
+void __bio_add_page(struct bio *bio, struct page *page,
+		unsigned int len, unsigned int off);
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
 struct rq_map_data;
 extern struct bio *bio_map_user_iov(struct request_queue *,
@@ -499,7 +512,10 @@
 }
 #endif
 
+extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+			       struct bio *src, struct bvec_iter *src_iter);
 extern void bio_copy_data(struct bio *dst, struct bio *src);
+extern void bio_list_copy_data(struct bio *dst, struct bio *src);
 extern void bio_free_pages(struct bio *bio);
 
 extern struct bio *bio_copy_user_iov(struct request_queue *,
@@ -507,7 +523,13 @@
 				     struct iov_iter *,
 				     gfp_t);
 extern int bio_uncopy_user(struct bio *);
-void zero_fill_bio(struct bio *bio);
+void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter);
+
+static inline void zero_fill_bio(struct bio *bio)
+{
+	zero_fill_bio_iter(bio, bio->bi_iter);
+}
+
 extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
 extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
 extern unsigned int bvec_nr_vecs(unsigned short idx);
@@ -722,11 +744,11 @@
 	struct kmem_cache *bio_slab;
 	unsigned int front_pad;
 
-	mempool_t *bio_pool;
-	mempool_t *bvec_pool;
+	mempool_t bio_pool;
+	mempool_t bvec_pool;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
-	mempool_t *bio_integrity_pool;
-	mempool_t *bvec_integrity_pool;
+	mempool_t bio_integrity_pool;
+	mempool_t bvec_integrity_pool;
 #endif
 
 	/*
@@ -745,6 +767,11 @@
 	struct kmem_cache *slab;
 };
 
+static inline bool bioset_initialized(struct bio_set *bs)
+{
+	return bs->bio_slab != NULL;
+}
+
 /*
  * a small number of entries is fine, not going to be performance critical.
  * basically we just need to survive
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ebc34a5..fb35517 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -259,7 +259,8 @@
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 void blk_mq_complete_request(struct request *rq);
-
+bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
+			   struct bio *bio);
 bool blk_mq_queue_stopped(struct request_queue *q);
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 17b18b9..3c4f390 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 #include <linux/bvec.h>
+#include <linux/ktime.h>
 
 struct bio_set;
 struct bio;
@@ -90,10 +91,52 @@
 	return true;
 }
 
-struct blk_issue_stat {
-	u64 stat;
+/*
+ * From most significant bit:
+ * 1 bit: reserved for other usage, see below
+ * 12 bits: original size of bio
+ * 51 bits: issue time of bio
+ */
+#define BIO_ISSUE_RES_BITS      1
+#define BIO_ISSUE_SIZE_BITS     12
+#define BIO_ISSUE_RES_SHIFT     (64 - BIO_ISSUE_RES_BITS)
+#define BIO_ISSUE_SIZE_SHIFT    (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS)
+#define BIO_ISSUE_TIME_MASK     ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1)
+#define BIO_ISSUE_SIZE_MASK     \
+	(((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT)
+#define BIO_ISSUE_RES_MASK      (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1))
+
+/* Reserved bit for blk-throtl */
+#define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63)
+
+struct bio_issue {
+	u64 value;
 };
 
+static inline u64 __bio_issue_time(u64 time)
+{
+	return time & BIO_ISSUE_TIME_MASK;
+}
+
+static inline u64 bio_issue_time(struct bio_issue *issue)
+{
+	return __bio_issue_time(issue->value);
+}
+
+static inline sector_t bio_issue_size(struct bio_issue *issue)
+{
+	return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT);
+}
+
+static inline void bio_issue_init(struct bio_issue *issue,
+				       sector_t size)
+{
+	size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1;
+	issue->value = ((issue->value & BIO_ISSUE_RES_MASK) |
+			(ktime_get_ns() & BIO_ISSUE_TIME_MASK) |
+			((u64)size << BIO_ISSUE_SIZE_SHIFT));
+}
+
 /*
  * main unit of I/O for the block layer and lower layers (ie drivers and
  * stacking drivers)
@@ -138,7 +181,7 @@
 	struct cgroup_subsys_state *bi_css;
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 	void			*bi_cg_private;
-	struct blk_issue_stat	bi_issue_stat;
+	struct bio_issue	bi_issue;
 #endif
 #endif
 	union {
@@ -186,6 +229,8 @@
 				 * throttling rules. Don't do it again. */
 #define BIO_TRACE_COMPLETION 10	/* bio_endio() should trace the final completion
 				 * of this bio. */
+#define BIO_QUEUE_ENTERED 11	/* can use blk_queue_enter_live() */
+
 /* See BVEC_POOL_OFFSET below before adding new flags */
 
 /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5c4eee0..bca3a92 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -125,16 +125,23 @@
 #define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
 /* The per-zone write lock is held for this request */
 #define RQF_ZONE_WRITE_LOCKED	((__force req_flags_t)(1 << 19))
-/* timeout is expired */
-#define RQF_MQ_TIMEOUT_EXPIRED	((__force req_flags_t)(1 << 20))
 /* already slept for hybrid poll */
-#define RQF_MQ_POLL_SLEPT	((__force req_flags_t)(1 << 21))
+#define RQF_MQ_POLL_SLEPT	((__force req_flags_t)(1 << 20))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
 	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
 
 /*
+ * Request state for blk-mq.
+ */
+enum mq_rq_state {
+	MQ_RQ_IDLE		= 0,
+	MQ_RQ_IN_FLIGHT		= 1,
+	MQ_RQ_COMPLETE		= 2,
+};
+
+/*
  * Try to put the fields that are referenced together in the same cacheline.
  *
  * If you modify this structure, make sure to update blk_rq_init() and
@@ -205,9 +212,20 @@
 
 	struct gendisk *rq_disk;
 	struct hd_struct *part;
-	unsigned long start_time;
-	struct blk_issue_stat issue_stat;
-	/* Number of scatter-gather DMA addr+len pairs after
+	/* Time that I/O was submitted to the kernel. */
+	u64 start_time_ns;
+	/* Time that I/O was submitted to the device. */
+	u64 io_start_time_ns;
+
+#ifdef CONFIG_BLK_WBT
+	unsigned short wbt_flags;
+#endif
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+	unsigned short throtl_size;
+#endif
+
+	/*
+	 * Number of scatter-gather DMA addr+len pairs after
 	 * physical address coalescing is performed.
 	 */
 	unsigned short nr_phys_segments;
@@ -219,32 +237,14 @@
 	unsigned short write_hint;
 	unsigned short ioprio;
 
-	unsigned int timeout;
-
 	void *special;		/* opaque pointer available for LLD use */
 
 	unsigned int extra_len;	/* length of alignment and padding */
 
-	/*
-	 * On blk-mq, the lower bits of ->gstate (generation number and
-	 * state) carry the MQ_RQ_* state value and the upper bits the
-	 * generation number which is monotonically incremented and used to
-	 * distinguish the reuse instances.
-	 *
-	 * ->gstate_seq allows updates to ->gstate and other fields
-	 * (currently ->deadline) during request start to be read
-	 * atomically from the timeout path, so that it can operate on a
-	 * coherent set of information.
-	 */
-	seqcount_t gstate_seq;
-	u64 gstate;
+	enum mq_rq_state state;
+	refcount_t ref;
 
-	/*
-	 * ->aborted_gstate is used by the timeout to claim a specific
-	 * recycle instance of this request.  See blk_mq_timeout_work().
-	 */
-	struct u64_stats_sync aborted_gstate_sync;
-	u64 aborted_gstate;
+	unsigned int timeout;
 
 	/* access through blk_rq_set_deadline, blk_rq_deadline */
 	unsigned long __deadline;
@@ -267,8 +267,6 @@
 
 #ifdef CONFIG_BLK_CGROUP
 	struct request_list *rl;		/* rl this rq is alloced from */
-	unsigned long long start_time_ns;
-	unsigned long long io_start_time_ns;    /* when passed to hardware */
 #endif
 };
 
@@ -328,9 +326,8 @@
 typedef void (exit_rq_fn)(struct request_queue *, struct request *);
 
 enum blk_eh_timer_return {
-	BLK_EH_NOT_HANDLED,
-	BLK_EH_HANDLED,
-	BLK_EH_RESET_TIMER,
+	BLK_EH_DONE,		/* drivers has completed the command */
+	BLK_EH_RESET_TIMER,	/* reset timer and try again */
 };
 
 typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
@@ -655,7 +652,7 @@
 
 	struct blk_mq_tag_set	*tag_set;
 	struct list_head	tag_set_list;
-	struct bio_set		*bio_split;
+	struct bio_set		bio_split;
 
 #ifdef CONFIG_BLK_DEBUG_FS
 	struct dentry		*debugfs_dir;
@@ -967,11 +964,8 @@
 extern void blk_init_request_from_bio(struct request *req, struct bio *bio);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
-extern struct request *blk_get_request_flags(struct request_queue *,
-					     unsigned int op,
-					     blk_mq_req_flags_t flags);
 extern struct request *blk_get_request(struct request_queue *, unsigned int op,
-				       gfp_t gfp_mask);
+				       blk_mq_req_flags_t flags);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
@@ -1788,48 +1782,6 @@
 int kblockd_schedule_work_on(int cpu, struct work_struct *work);
 int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
 
-#ifdef CONFIG_BLK_CGROUP
-/*
- * This should not be using sched_clock(). A real patch is in progress
- * to fix this up, until that is in place we need to disable preemption
- * around sched_clock() in this function and set_io_start_time_ns().
- */
-static inline void set_start_time_ns(struct request *req)
-{
-	preempt_disable();
-	req->start_time_ns = sched_clock();
-	preempt_enable();
-}
-
-static inline void set_io_start_time_ns(struct request *req)
-{
-	preempt_disable();
-	req->io_start_time_ns = sched_clock();
-	preempt_enable();
-}
-
-static inline uint64_t rq_start_time_ns(struct request *req)
-{
-        return req->start_time_ns;
-}
-
-static inline uint64_t rq_io_start_time_ns(struct request *req)
-{
-        return req->io_start_time_ns;
-}
-#else
-static inline void set_start_time_ns(struct request *req) {}
-static inline void set_io_start_time_ns(struct request *req) {}
-static inline uint64_t rq_start_time_ns(struct request *req)
-{
-	return 0;
-}
-static inline uint64_t rq_io_start_time_ns(struct request *req)
-{
-	return 0;
-}
-#endif
-
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index 28a7ccc..6aeaf64 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -72,8 +72,7 @@
 void bsg_job_done(struct bsg_job *job, int result,
 		  unsigned int reply_payload_rcv_len);
 struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
-		bsg_job_fn *job_fn, int dd_job_size,
-		void (*release)(struct device *));
+		bsg_job_fn *job_fn, int dd_job_size);
 void bsg_job_put(struct bsg_job *job);
 int __must_check bsg_job_get(struct bsg_job *job);
 
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 0c7dd9c..dac37b6 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -17,17 +17,13 @@
 
 struct bsg_class_device {
 	struct device *class_dev;
-	struct device *parent;
 	int minor;
 	struct request_queue *queue;
-	struct kref ref;
 	const struct bsg_ops *ops;
-	void (*release)(struct device *);
 };
 
 int bsg_register_queue(struct request_queue *q, struct device *parent,
-		const char *name, const struct bsg_ops *ops,
-		void (*release)(struct device *));
+		const char *name, const struct bsg_ops *ops);
 int bsg_scsi_register_queue(struct request_queue *q, struct device *parent);
 void bsg_unregister_queue(struct request_queue *q);
 #else
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 894e5d1..96225a7 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -205,8 +205,6 @@
 			sector_t bblock, unsigned blocksize);
 int bh_uptodate_or_lock(struct buffer_head *bh);
 int bh_submit_read(struct buffer_head *bh);
-loff_t page_cache_seek_hole_data(struct inode *inode, loff_t offset,
-				 loff_t length, int whence);
 
 extern int buffer_heads_over_limit;
 
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index dc5b704..c0e68f9 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -105,6 +105,8 @@
 struct cgroup_file {
 	/* do not access any fields from outside cgroup core */
 	struct kernfs_node *kn;
+	unsigned long notified_at;
+	struct timer_list notify_timer;
 };
 
 /*
@@ -128,6 +130,9 @@
 	struct list_head sibling;
 	struct list_head children;
 
+	/* flush target list anchored at cgrp->rstat_css_list */
+	struct list_head rstat_css_node;
+
 	/*
 	 * PI: Subsys-unique ID.  0 is unused and root is always 1.  The
 	 * matching css can be looked up using css_from_id().
@@ -256,12 +261,16 @@
 	struct rcu_head rcu_head;
 };
 
+struct cgroup_base_stat {
+	struct task_cputime cputime;
+};
+
 /*
- * cgroup basic resource usage statistics.  Accounting is done per-cpu in
- * cgroup_cpu_stat which is then lazily propagated up the hierarchy on
- * reads.
+ * rstat - cgroup scalable recursive statistics.  Accounting is done
+ * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the
+ * hierarchy on reads.
  *
- * When a stat gets updated, the cgroup_cpu_stat and its ancestors are
+ * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are
  * linked into the updated tree.  On the following read, propagation only
  * considers and consumes the updated tree.  This makes reading O(the
  * number of descendants which have been active since last read) instead of
@@ -271,20 +280,24 @@
  * aren't active and stat may be read frequently.  The combination can
  * become very expensive.  By propagating selectively, increasing reading
  * frequency decreases the cost of each read.
+ *
+ * This struct hosts both the fields which implement the above -
+ * updated_children and updated_next - and the fields which track basic
+ * resource statistics on top of it - bsync, bstat and last_bstat.
  */
-struct cgroup_cpu_stat {
+struct cgroup_rstat_cpu {
 	/*
-	 * ->sync protects all the current counters.  These are the only
-	 * fields which get updated in the hot path.
+	 * ->bsync protects ->bstat.  These are the only fields which get
+	 * updated in the hot path.
 	 */
-	struct u64_stats_sync sync;
-	struct task_cputime cputime;
+	struct u64_stats_sync bsync;
+	struct cgroup_base_stat bstat;
 
 	/*
 	 * Snapshots at the last reading.  These are used to calculate the
 	 * deltas to propagate to the global counters.
 	 */
-	struct task_cputime last_cputime;
+	struct cgroup_base_stat last_bstat;
 
 	/*
 	 * Child cgroups with stat updates on this cpu since the last read
@@ -295,18 +308,12 @@
 	 * to the cgroup makes it unnecessary for each per-cpu struct to
 	 * point back to the associated cgroup.
 	 *
-	 * Protected by per-cpu cgroup_cpu_stat_lock.
+	 * Protected by per-cpu cgroup_rstat_cpu_lock.
 	 */
 	struct cgroup *updated_children;	/* terminated by self cgroup */
 	struct cgroup *updated_next;		/* NULL iff not on the list */
 };
 
-struct cgroup_stat {
-	/* per-cpu statistics are collected into the folowing global counters */
-	struct task_cputime cputime;
-	struct prev_cputime prev_cputime;
-};
-
 struct cgroup {
 	/* self css with NULL ->ss, points back to this cgroup */
 	struct cgroup_subsys_state self;
@@ -406,10 +413,14 @@
 	 */
 	struct cgroup *dom_cgrp;
 
+	/* per-cpu recursive resource statistics */
+	struct cgroup_rstat_cpu __percpu *rstat_cpu;
+	struct list_head rstat_css_list;
+
 	/* cgroup basic resource statistics */
-	struct cgroup_cpu_stat __percpu *cpu_stat;
-	struct cgroup_stat pending_stat;	/* pending from children */
-	struct cgroup_stat stat;
+	struct cgroup_base_stat pending_bstat;	/* pending from children */
+	struct cgroup_base_stat bstat;
+	struct prev_cputime prev_cputime;	/* for printing out cputime */
 
 	/*
 	 * list of pidlists, up to two for each namespace (one for procs, one
@@ -570,6 +581,7 @@
 	void (*css_released)(struct cgroup_subsys_state *css);
 	void (*css_free)(struct cgroup_subsys_state *css);
 	void (*css_reset)(struct cgroup_subsys_state *css);
+	void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
 	int (*css_extra_stat_show)(struct seq_file *seq,
 				   struct cgroup_subsys_state *css);
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 473e0c0..c9fdf6f 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -690,11 +690,19 @@
 	char *buf, size_t buflen) {}
 #endif /* !CONFIG_CGROUPS */
 
+#ifdef CONFIG_CGROUPS
+/*
+ * cgroup scalable recursive statistics.
+ */
+void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
+void cgroup_rstat_flush(struct cgroup *cgrp);
+void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp);
+void cgroup_rstat_flush_hold(struct cgroup *cgrp);
+void cgroup_rstat_flush_release(void);
+
 /*
  * Basic resource stats.
  */
-#ifdef CONFIG_CGROUPS
-
 #ifdef CONFIG_CGROUP_CPUACCT
 void cpuacct_charge(struct task_struct *tsk, u64 cputime);
 void cpuacct_account_field(struct task_struct *tsk, int index, u64 val);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 081281a..b1a5562 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -7,8 +7,7 @@
  */
 
 #include <linux/types.h>
-
-#ifdef CONFIG_COMPAT
+#include <linux/compat_time.h>
 
 #include <linux/stat.h>
 #include <linux/param.h>	/* for HZ */
@@ -21,8 +20,11 @@
 #include <linux/unistd.h>
 
 #include <asm/compat.h>
+
+#ifdef CONFIG_COMPAT
 #include <asm/siginfo.h>
 #include <asm/signal.h>
+#endif
 
 #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
 /*
@@ -83,6 +85,8 @@
 	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 #endif /* COMPAT_SYSCALL_DEFINEx */
 
+#ifdef CONFIG_COMPAT
+
 #ifndef compat_user_stack_pointer
 #define compat_user_stack_pointer() current_user_stack_pointer()
 #endif
@@ -290,8 +294,6 @@
 extern int compat_put_timespec(const struct timespec *, void __user *);
 extern int compat_get_timeval(struct timeval *, const void __user *);
 extern int compat_put_timeval(const struct timeval *, void __user *);
-extern int compat_get_timespec64(struct timespec64 *, const void __user *);
-extern int compat_put_timespec64(const struct timespec64 *, void __user *);
 extern int get_compat_itimerspec64(struct itimerspec64 *its,
 			const struct compat_itimerspec __user *uits);
 extern int put_compat_itimerspec64(const struct itimerspec64 *its,
@@ -330,6 +332,7 @@
 			     struct compat_rusage __user *);
 
 struct compat_siginfo;
+struct __compat_aio_sigset;
 
 struct compat_dirent {
 	u32		d_ino;
@@ -553,6 +556,12 @@
 					compat_long_t nr,
 					struct io_event __user *events,
 					struct compat_timespec __user *timeout);
+asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id,
+					compat_long_t min_nr,
+					compat_long_t nr,
+					struct io_event __user *events,
+					struct compat_timespec __user *timeout,
+					const struct __compat_aio_sigset __user *usig);
 
 /* fs/cookies.c */
 asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t);
@@ -1016,7 +1025,9 @@
 #else /* !CONFIG_COMPAT */
 
 #define is_compat_task() (0)
+#ifndef in_compat_syscall
 static inline bool in_compat_syscall(void) { return false; }
+#endif
 
 #endif /* CONFIG_COMPAT */
 
diff --git a/include/linux/compat_time.h b/include/linux/compat_time.h
new file mode 100644
index 0000000..31f2774
--- /dev/null
+++ b/include/linux/compat_time.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_COMPAT_TIME_H
+#define _LINUX_COMPAT_TIME_H
+
+#include <linux/types.h>
+#include <linux/time64.h>
+
+typedef s32		compat_time_t;
+
+struct compat_timespec {
+	compat_time_t	tv_sec;
+	s32		tv_nsec;
+};
+
+struct compat_timeval {
+	compat_time_t	tv_sec;
+	s32		tv_usec;
+};
+
+extern int compat_get_timespec64(struct timespec64 *, const void __user *);
+extern int compat_put_timespec64(const struct timespec64 *, void __user *);
+
+#endif /* _LINUX_COMPAT_TIME_H */
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 7d98e26..7087446c 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -32,3 +32,17 @@
 #ifdef __noretpoline
 #undef __noretpoline
 #endif
+
+/*
+ * Not all versions of clang implement the the type-generic versions
+ * of the builtin overflow checkers. Fortunately, clang implements
+ * __has_builtin allowing us to avoid awkward version
+ * checks. Unfortunately, we don't know which version of gcc clang
+ * pretends to be, so the macro may or may not be defined.
+ */
+#undef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+#if __has_builtin(__builtin_mul_overflow) && \
+    __has_builtin(__builtin_add_overflow) && \
+    __has_builtin(__builtin_sub_overflow)
+#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
+#endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index b4bf73f..f1a7492 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -343,3 +343,7 @@
  * code
  */
 #define uninitialized_var(x) x = x
+
+#if GCC_VERSION >= 50100
+#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
+#endif
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index bfa0816..547cdc9 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -44,3 +44,7 @@
 #define __builtin_bswap16 _bswap16
 #endif
 
+/*
+ * icc defines __GNUC__, but does not implement the builtin overflow checkers.
+ */
+#undef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ab4711c..42506e4 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -21,7 +21,7 @@
 #define unlikely_notrace(x)	__builtin_expect(!!(x), 0)
 
 #define __branch_check__(x, expect, is_constant) ({			\
-			int ______r;					\
+			long ______r;					\
 			static struct ftrace_likely_data		\
 				__attribute__((__aligned__(4)))		\
 				__attribute__((section("_ftrace_annotated_branch"))) \
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index d950dad..c265e04 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -1,13 +1,6 @@
-/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
  */
 
 #ifndef _LINUX_CORESIGHT_H
diff --git a/include/linux/cper.h b/include/linux/cper.h
index d14ef4e..9c703a0 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -381,7 +381,7 @@
 /* IA32/X64 Processor Error Section */
 struct cper_sec_proc_ia {
 	__u64	validation_bits;
-	__u8	lapic_id;
+	__u64	lapic_id;
 	__u8	cpuid[48];
 };
 
@@ -551,5 +551,7 @@
 				struct cper_mem_err_compact *);
 void cper_print_proc_arm(const char *pfx,
 			 const struct cper_sec_proc_arm *proc);
+void cper_print_proc_ia(const char *pfx,
+			const struct cper_sec_proc_ia *proc);
 
 #endif
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 87f48dd..882a9b9 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -571,7 +571,7 @@
 			 size_t count);
 };
 
-static inline bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy)
+static inline bool cpufreq_this_cpu_can_update(struct cpufreq_policy *policy)
 {
 	/*
 	 * Allow remote callbacks if:
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 1eefabf..4325d6f 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -258,6 +258,7 @@
 
 #ifdef CONFIG_CPU_IDLE
 extern int cpuidle_register_governor(struct cpuidle_governor *gov);
+extern int cpuidle_governor_latency_req(unsigned int cpu);
 #else
 static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
 {return 0;}
diff --git a/include/linux/dax.h b/include/linux/dax.h
index f9eb22a..c99692d 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -64,10 +64,10 @@
 struct writeback_control;
 int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
-int __bdev_dax_supported(struct super_block *sb, int blocksize);
-static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
+bool __bdev_dax_supported(struct block_device *bdev, int blocksize);
+static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize)
 {
-	return __bdev_dax_supported(sb, blocksize);
+	return __bdev_dax_supported(bdev, blocksize);
 }
 
 static inline struct dax_device *fs_dax_get_by_host(const char *host)
@@ -84,9 +84,10 @@
 int dax_writeback_mapping_range(struct address_space *mapping,
 		struct block_device *bdev, struct writeback_control *wbc);
 #else
-static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
+static inline bool bdev_dax_supported(struct block_device *bdev,
+		int blocksize)
 {
-	return -EOPNOTSUPP;
+	return false;
 }
 
 static inline struct dax_device *fs_dax_get_by_host(const char *host)
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 5e335b6..e6c0448 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -29,7 +29,7 @@
 
 #ifdef CONFIG_TASK_DELAY_ACCT
 struct task_delay_info {
-	spinlock_t	lock;
+	raw_spinlock_t	lock;
 	unsigned int	flags;	/* Private per-task flags */
 
 	/* For each stat XXX, add following, aligned appropriately
diff --git a/include/linux/device.h b/include/linux/device.h
index 4779569..055a69d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -25,6 +25,7 @@
 #include <linux/ratelimit.h>
 #include <linux/uidgid.h>
 #include <linux/gfp.h>
+#include <linux/overflow.h>
 #include <asm/device.h>
 
 struct device;
@@ -88,6 +89,8 @@
  * @resume:	Called to bring a device on this bus out of sleep mode.
  * @num_vf:	Called to find out how many virtual functions a device on this
  *		bus supports.
+ * @dma_configure:	Called to setup DMA configuration on a device on
+			this bus.
  * @pm:		Power management operations of this bus, callback the specific
  *		device driver's pm-ops.
  * @iommu_ops:  IOMMU specific operations for this bus, used to attach IOMMU
@@ -96,8 +99,8 @@
  * @p:		The private data of the driver core, only the driver core can
  *		touch this.
  * @lock_key:	Lock class key for use by the lock validator
- * @force_dma:	Assume devices on this bus should be set up by dma_configure()
- * 		even if DMA capability is not explicitly described by firmware.
+ * @need_parent_lock:	When probing or removing a device on this bus, the
+ *			device core should lock the device's parent.
  *
  * A bus is a channel between the processor and one or more devices. For the
  * purposes of the device model, all devices are connected via a bus, even if
@@ -130,6 +133,8 @@
 
 	int (*num_vf)(struct device *dev);
 
+	int (*dma_configure)(struct device *dev);
+
 	const struct dev_pm_ops *pm;
 
 	const struct iommu_ops *iommu_ops;
@@ -137,7 +142,7 @@
 	struct subsys_private *p;
 	struct lock_class_key lock_key;
 
-	bool force_dma;
+	bool need_parent_lock;
 };
 
 extern int __must_check bus_register(struct bus_type *bus);
@@ -668,9 +673,12 @@
 static inline void *devm_kmalloc_array(struct device *dev,
 				       size_t n, size_t size, gfp_t flags)
 {
-	if (size != 0 && n > SIZE_MAX / size)
+	size_t bytes;
+
+	if (unlikely(check_mul_overflow(n, size, &bytes)))
 		return NULL;
-	return devm_kmalloc(dev, n * size, flags);
+
+	return devm_kmalloc(dev, bytes, flags);
 }
 static inline void *devm_kcalloc(struct device *dev,
 				 size_t n, size_t size, gfp_t flags)
@@ -904,6 +912,8 @@
  * @offline:	Set after successful invocation of bus type's .offline().
  * @of_node_reused: Set if the device-tree node is shared with an ancestor
  *              device.
+ * @dma_32bit_limit: bridge limited to 32bit DMA even if the device itself
+ *		indicates support for a higher limit in the dma_mask field.
  *
  * At the lowest level, every device in a Linux system is represented by an
  * instance of struct device. The device structure contains the information
@@ -992,6 +1002,7 @@
 	bool			offline_disabled:1;
 	bool			offline:1;
 	bool			of_node_reused:1;
+	bool			dma_32bit_limit:1;
 };
 
 static inline struct device *kobj_to_dev(struct kobject *kobj)
diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index c7d844f..a785f25 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -30,8 +30,6 @@
 
 extern void dma_debug_add_bus(struct bus_type *bus);
 
-extern void dma_debug_init(u32 num_entries);
-
 extern int dma_debug_resize_entries(u32 num_entries);
 
 extern void debug_dma_map_page(struct device *dev, struct page *page,
@@ -100,10 +98,6 @@
 {
 }
 
-static inline void dma_debug_init(u32 num_entries)
-{
-}
-
 static inline int dma_debug_resize_entries(u32 num_entries)
 {
 	return 0;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 53ad6a4..8d9f33f 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -59,6 +59,11 @@
 		gfp_t gfp, unsigned long attrs);
 void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs);
+dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size, enum dma_data_direction dir,
+		unsigned long attrs);
+int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+		enum dma_data_direction dir, unsigned long attrs);
 int dma_direct_supported(struct device *dev, u64 mask);
-
+int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr);
 #endif /* _LINUX_DMA_DIRECT_H */
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 4c00817..eb9b05a 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -94,11 +94,11 @@
 				 struct dma_fence_cb *cb);
 
 /**
- * struct dma_fence_cb - callback for dma_fence_add_callback
- * @node: used by dma_fence_add_callback to append this struct to fence::cb_list
+ * struct dma_fence_cb - callback for dma_fence_add_callback()
+ * @node: used by dma_fence_add_callback() to append this struct to fence::cb_list
  * @func: dma_fence_func_t to call
  *
- * This struct will be initialized by dma_fence_add_callback, additional
+ * This struct will be initialized by dma_fence_add_callback(), additional
  * data can be passed along by embedding dma_fence_cb in another struct.
  */
 struct dma_fence_cb {
@@ -108,75 +108,143 @@
 
 /**
  * struct dma_fence_ops - operations implemented for fence
- * @get_driver_name: returns the driver name.
- * @get_timeline_name: return the name of the context this fence belongs to.
- * @enable_signaling: enable software signaling of fence.
- * @signaled: [optional] peek whether the fence is signaled, can be null.
- * @wait: custom wait implementation, or dma_fence_default_wait.
- * @release: [optional] called on destruction of fence, can be null
- * @fill_driver_data: [optional] callback to fill in free-form debug info
- * Returns amount of bytes filled, or -errno.
- * @fence_value_str: [optional] fills in the value of the fence as a string
- * @timeline_value_str: [optional] fills in the current value of the timeline
- * as a string
  *
- * Notes on enable_signaling:
- * For fence implementations that have the capability for hw->hw
- * signaling, they can implement this op to enable the necessary
- * irqs, or insert commands into cmdstream, etc.  This is called
- * in the first wait() or add_callback() path to let the fence
- * implementation know that there is another driver waiting on
- * the signal (ie. hw->sw case).
- *
- * This function can be called from atomic context, but not
- * from irq context, so normal spinlocks can be used.
- *
- * A return value of false indicates the fence already passed,
- * or some failure occurred that made it impossible to enable
- * signaling. True indicates successful enabling.
- *
- * fence->error may be set in enable_signaling, but only when false is
- * returned.
- *
- * Calling dma_fence_signal before enable_signaling is called allows
- * for a tiny race window in which enable_signaling is called during,
- * before, or after dma_fence_signal. To fight this, it is recommended
- * that before enable_signaling returns true an extra reference is
- * taken on the fence, to be released when the fence is signaled.
- * This will mean dma_fence_signal will still be called twice, but
- * the second time will be a noop since it was already signaled.
- *
- * Notes on signaled:
- * May set fence->error if returning true.
- *
- * Notes on wait:
- * Must not be NULL, set to dma_fence_default_wait for default implementation.
- * the dma_fence_default_wait implementation should work for any fence, as long
- * as enable_signaling works correctly.
- *
- * Must return -ERESTARTSYS if the wait is intr = true and the wait was
- * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
- * timed out. Can also return other error values on custom implementations,
- * which should be treated as if the fence is signaled. For example a hardware
- * lockup could be reported like that.
- *
- * Notes on release:
- * Can be NULL, this function allows additional commands to run on
- * destruction of the fence. Can be called from irq context.
- * If pointer is set to NULL, kfree will get called instead.
  */
-
 struct dma_fence_ops {
+	/**
+	 * @get_driver_name:
+	 *
+	 * Returns the driver name. This is a callback to allow drivers to
+	 * compute the name at runtime, without having it to store permanently
+	 * for each fence, or build a cache of some sort.
+	 *
+	 * This callback is mandatory.
+	 */
 	const char * (*get_driver_name)(struct dma_fence *fence);
+
+	/**
+	 * @get_timeline_name:
+	 *
+	 * Return the name of the context this fence belongs to. This is a
+	 * callback to allow drivers to compute the name at runtime, without
+	 * having it to store permanently for each fence, or build a cache of
+	 * some sort.
+	 *
+	 * This callback is mandatory.
+	 */
 	const char * (*get_timeline_name)(struct dma_fence *fence);
+
+	/**
+	 * @enable_signaling:
+	 *
+	 * Enable software signaling of fence.
+	 *
+	 * For fence implementations that have the capability for hw->hw
+	 * signaling, they can implement this op to enable the necessary
+	 * interrupts, or insert commands into cmdstream, etc, to avoid these
+	 * costly operations for the common case where only hw->hw
+	 * synchronization is required.  This is called in the first
+	 * dma_fence_wait() or dma_fence_add_callback() path to let the fence
+	 * implementation know that there is another driver waiting on the
+	 * signal (ie. hw->sw case).
+	 *
+	 * This function can be called from atomic context, but not
+	 * from irq context, so normal spinlocks can be used.
+	 *
+	 * A return value of false indicates the fence already passed,
+	 * or some failure occurred that made it impossible to enable
+	 * signaling. True indicates successful enabling.
+	 *
+	 * &dma_fence.error may be set in enable_signaling, but only when false
+	 * is returned.
+	 *
+	 * Since many implementations can call dma_fence_signal() even when before
+	 * @enable_signaling has been called there's a race window, where the
+	 * dma_fence_signal() might result in the final fence reference being
+	 * released and its memory freed. To avoid this, implementations of this
+	 * callback should grab their own reference using dma_fence_get(), to be
+	 * released when the fence is signalled (through e.g. the interrupt
+	 * handler).
+	 *
+	 * This callback is mandatory.
+	 */
 	bool (*enable_signaling)(struct dma_fence *fence);
+
+	/**
+	 * @signaled:
+	 *
+	 * Peek whether the fence is signaled, as a fastpath optimization for
+	 * e.g. dma_fence_wait() or dma_fence_add_callback(). Note that this
+	 * callback does not need to make any guarantees beyond that a fence
+	 * once indicates as signalled must always return true from this
+	 * callback. This callback may return false even if the fence has
+	 * completed already, in this case information hasn't propogated throug
+	 * the system yet. See also dma_fence_is_signaled().
+	 *
+	 * May set &dma_fence.error if returning true.
+	 *
+	 * This callback is optional.
+	 */
 	bool (*signaled)(struct dma_fence *fence);
+
+	/**
+	 * @wait:
+	 *
+	 * Custom wait implementation, or dma_fence_default_wait.
+	 *
+	 * Must not be NULL, set to dma_fence_default_wait for default implementation.
+	 * the dma_fence_default_wait implementation should work for any fence, as long
+	 * as enable_signaling works correctly.
+	 *
+	 * Must return -ERESTARTSYS if the wait is intr = true and the wait was
+	 * interrupted, and remaining jiffies if fence has signaled, or 0 if wait
+	 * timed out. Can also return other error values on custom implementations,
+	 * which should be treated as if the fence is signaled. For example a hardware
+	 * lockup could be reported like that.
+	 *
+	 * This callback is mandatory.
+	 */
 	signed long (*wait)(struct dma_fence *fence,
 			    bool intr, signed long timeout);
+
+	/**
+	 * @release:
+	 *
+	 * Called on destruction of fence to release additional resources.
+	 * Can be called from irq context.  This callback is optional. If it is
+	 * NULL, then dma_fence_free() is instead called as the default
+	 * implementation.
+	 */
 	void (*release)(struct dma_fence *fence);
 
+	/**
+	 * @fill_driver_data:
+	 *
+	 * Callback to fill in free-form debug info.
+	 *
+	 * Returns amount of bytes filled, or negative error on failure.
+	 *
+	 * This callback is optional.
+	 */
 	int (*fill_driver_data)(struct dma_fence *fence, void *data, int size);
+
+	/**
+	 * @fence_value_str:
+	 *
+	 * Callback to fill in free-form debug info specific to this fence, like
+	 * the sequence number.
+	 *
+	 * This callback is optional.
+	 */
 	void (*fence_value_str)(struct dma_fence *fence, char *str, int size);
+
+	/**
+	 * @timeline_value_str:
+	 *
+	 * Fills in the current value of the timeline as a string, like the
+	 * sequence number. This should match what @fill_driver_data prints for
+	 * the most recently signalled fence (assuming no delayed signalling).
+	 */
 	void (*timeline_value_str)(struct dma_fence *fence,
 				   char *str, int size);
 };
@@ -189,7 +257,7 @@
 
 /**
  * dma_fence_put - decreases refcount of the fence
- * @fence:	[in]	fence to reduce refcount of
+ * @fence: fence to reduce refcount of
  */
 static inline void dma_fence_put(struct dma_fence *fence)
 {
@@ -199,7 +267,7 @@
 
 /**
  * dma_fence_get - increases refcount of the fence
- * @fence:	[in]	fence to increase refcount of
+ * @fence: fence to increase refcount of
  *
  * Returns the same fence, with refcount increased by 1.
  */
@@ -213,7 +281,7 @@
 /**
  * dma_fence_get_rcu - get a fence from a reservation_object_list with
  *                     rcu read lock
- * @fence:	[in]	fence to increase refcount of
+ * @fence: fence to increase refcount of
  *
  * Function returns NULL if no refcount could be obtained, or the fence.
  */
@@ -227,7 +295,7 @@
 
 /**
  * dma_fence_get_rcu_safe  - acquire a reference to an RCU tracked fence
- * @fencep:	[in]	pointer to fence to increase refcount of
+ * @fencep: pointer to fence to increase refcount of
  *
  * Function returns NULL if no refcount could be obtained, or the fence.
  * This function handles acquiring a reference to a fence that may be
@@ -289,14 +357,16 @@
 /**
  * dma_fence_is_signaled_locked - Return an indication if the fence
  *                                is signaled yet.
- * @fence:	[in]	the fence to check
+ * @fence: the fence to check
  *
  * Returns true if the fence was already signaled, false if not. Since this
  * function doesn't enable signaling, it is not guaranteed to ever return
- * true if dma_fence_add_callback, dma_fence_wait or
- * dma_fence_enable_sw_signaling haven't been called before.
+ * true if dma_fence_add_callback(), dma_fence_wait() or
+ * dma_fence_enable_sw_signaling() haven't been called before.
  *
- * This function requires fence->lock to be held.
+ * This function requires &dma_fence.lock to be held.
+ *
+ * See also dma_fence_is_signaled().
  */
 static inline bool
 dma_fence_is_signaled_locked(struct dma_fence *fence)
@@ -314,17 +384,19 @@
 
 /**
  * dma_fence_is_signaled - Return an indication if the fence is signaled yet.
- * @fence:	[in]	the fence to check
+ * @fence: the fence to check
  *
  * Returns true if the fence was already signaled, false if not. Since this
  * function doesn't enable signaling, it is not guaranteed to ever return
- * true if dma_fence_add_callback, dma_fence_wait or
- * dma_fence_enable_sw_signaling haven't been called before.
+ * true if dma_fence_add_callback(), dma_fence_wait() or
+ * dma_fence_enable_sw_signaling() haven't been called before.
  *
  * It's recommended for seqno fences to call dma_fence_signal when the
  * operation is complete, it makes it possible to prevent issues from
  * wraparound between time of issue and time of use by checking the return
  * value of this function before calling hardware-specific wait instructions.
+ *
+ * See also dma_fence_is_signaled_locked().
  */
 static inline bool
 dma_fence_is_signaled(struct dma_fence *fence)
@@ -342,8 +414,8 @@
 
 /**
  * __dma_fence_is_later - return if f1 is chronologically later than f2
- * @f1:	[in]	the first fence's seqno
- * @f2:	[in]	the second fence's seqno from the same context
+ * @f1: the first fence's seqno
+ * @f2: the second fence's seqno from the same context
  *
  * Returns true if f1 is chronologically later than f2. Both fences must be
  * from the same context, since a seqno is not common across contexts.
@@ -355,8 +427,8 @@
 
 /**
  * dma_fence_is_later - return if f1 is chronologically later than f2
- * @f1:	[in]	the first fence from the same context
- * @f2:	[in]	the second fence from the same context
+ * @f1: the first fence from the same context
+ * @f2: the second fence from the same context
  *
  * Returns true if f1 is chronologically later than f2. Both fences must be
  * from the same context, since a seqno is not re-used across contexts.
@@ -372,8 +444,8 @@
 
 /**
  * dma_fence_later - return the chronologically later fence
- * @f1:	[in]	the first fence from the same context
- * @f2:	[in]	the second fence from the same context
+ * @f1:	the first fence from the same context
+ * @f2:	the second fence from the same context
  *
  * Returns NULL if both fences are signaled, otherwise the fence that would be
  * signaled last. Both fences must be from the same context, since a seqno is
@@ -398,7 +470,7 @@
 
 /**
  * dma_fence_get_status_locked - returns the status upon completion
- * @fence: [in]	the dma_fence to query
+ * @fence: the dma_fence to query
  *
  * Drivers can supply an optional error status condition before they signal
  * the fence (to indicate whether the fence was completed due to an error
@@ -422,8 +494,8 @@
 
 /**
  * dma_fence_set_error - flag an error condition on the fence
- * @fence: [in]	the dma_fence
- * @error: [in]	the error to store
+ * @fence: the dma_fence
+ * @error: the error to store
  *
  * Drivers can supply an optional error status condition before they signal
  * the fence, to indicate that the fence was completed due to an error
@@ -449,8 +521,8 @@
 
 /**
  * dma_fence_wait - sleep until the fence gets signaled
- * @fence:	[in]	the fence to wait on
- * @intr:	[in]	if true, do an interruptible wait
+ * @fence: the fence to wait on
+ * @intr: if true, do an interruptible wait
  *
  * This function will return -ERESTARTSYS if interrupted by a signal,
  * or 0 if the fence was signaled. Other error values may be
@@ -459,6 +531,8 @@
  * Performs a synchronous wait on this fence. It is assumed the caller
  * directly or indirectly holds a reference to the fence, otherwise the
  * fence might be freed before return, resulting in undefined behavior.
+ *
+ * See also dma_fence_wait_timeout() and dma_fence_wait_any_timeout().
  */
 static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
 {
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 92f2083..e8ca5e6 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -17,6 +17,7 @@
 #define __DMA_IOMMU_H
 
 #ifdef __KERNEL__
+#include <linux/types.h>
 #include <asm/errno.h>
 
 #ifdef CONFIG_IOMMU_DMA
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f8ab1c0..f9cc309 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -133,10 +133,10 @@
 #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
 	u64 (*get_required_mask)(struct device *dev);
 #endif
-	int is_phys;
 };
 
 extern const struct dma_map_ops dma_direct_ops;
+extern const struct dma_map_ops dma_noncoherent_ops;
 extern const struct dma_map_ops dma_virt_ops;
 
 #define DMA_BIT_MASK(n)	(((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
@@ -502,7 +502,7 @@
 #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0)
 
 #ifndef arch_dma_alloc_attrs
-#define arch_dma_alloc_attrs(dev, flag)	(true)
+#define arch_dma_alloc_attrs(dev)	(true)
 #endif
 
 static inline void *dma_alloc_attrs(struct device *dev, size_t size,
@@ -521,7 +521,7 @@
 	/* let the implementation decide on the zone to allocate from: */
 	flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
 
-	if (!arch_dma_alloc_attrs(&dev, &flag))
+	if (!arch_dma_alloc_attrs(&dev))
 		return NULL;
 	if (!ops->alloc)
 		return NULL;
@@ -572,14 +572,6 @@
 	return 0;
 }
 
-/*
- * This is a hack for the legacy x86 forbid_dac and iommu_sac_force. Please
- * don't use this in new code.
- */
-#ifndef arch_dma_supported
-#define arch_dma_supported(dev, mask)	(1)
-#endif
-
 static inline void dma_check_mask(struct device *dev, u64 mask)
 {
 	if (sme_active() && (mask < (((u64)sme_get_me_mask() << 1) - 1)))
@@ -592,9 +584,6 @@
 
 	if (!ops)
 		return 0;
-	if (!arch_dma_supported(dev, mask))
-		return 0;
-
 	if (!ops->dma_supported)
 		return 1;
 	return ops->dma_supported(dev, mask);
@@ -839,7 +828,7 @@
 #define dma_mmap_writecombine dma_mmap_wc
 #endif
 
-#if defined(CONFIG_NEED_DMA_MAP_STATE) || defined(CONFIG_DMA_API_DEBUG)
+#ifdef CONFIG_NEED_DMA_MAP_STATE
 #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME
 #define DEFINE_DMA_UNMAP_LEN(LEN_NAME)          __u32 LEN_NAME
 #define dma_unmap_addr(PTR, ADDR_NAME)           ((PTR)->ADDR_NAME)
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
new file mode 100644
index 0000000..10b2654
--- /dev/null
+++ b/include/linux/dma-noncoherent.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_DMA_NONCOHERENT_H
+#define _LINUX_DMA_NONCOHERENT_H 1
+
+#include <linux/dma-mapping.h>
+
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs);
+void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t dma_addr, unsigned long attrs);
+
+#ifdef CONFIG_DMA_NONCOHERENT_MMAP
+int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		unsigned long attrs);
+#else
+#define arch_dma_mmap NULL
+#endif /* CONFIG_DMA_NONCOHERENT_MMAP */
+
+#ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC
+void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+		enum dma_data_direction direction);
+#else
+#define arch_dma_cache_sync NULL
+#endif /* CONFIG_DMA_NONCOHERENT_CACHE_SYNC */
+
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir);
+#else
+static inline void arch_sync_dma_for_device(struct device *dev,
+		phys_addr_t paddr, size_t size, enum dma_data_direction dir)
+{
+}
+#endif /* ARCH_HAS_SYNC_DMA_FOR_DEVICE */
+
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir);
+#else
+static inline void arch_sync_dma_for_cpu(struct device *dev,
+		phys_addr_t paddr, size_t size, enum dma_data_direction dir)
+{
+}
+#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */
+
+#endif /* _LINUX_DMA_NONCOHERENT_H */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 3016d8c..56add82 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -397,7 +397,7 @@
 	u32 set_bar_attributes;
 	u64 romsize;
 	u32 romimage;
-} efi_pci_io_protocol_32;
+} efi_pci_io_protocol_32_t;
 
 typedef struct {
 	u64 poll_mem;
@@ -417,7 +417,7 @@
 	u64 set_bar_attributes;
 	u64 romsize;
 	u64 romimage;
-} efi_pci_io_protocol_64;
+} efi_pci_io_protocol_64_t;
 
 typedef struct {
 	void *poll_mem;
@@ -437,7 +437,7 @@
 	void *set_bar_attributes;
 	uint64_t romsize;
 	void *romimage;
-} efi_pci_io_protocol;
+} efi_pci_io_protocol_t;
 
 #define EFI_PCI_IO_ATTRIBUTE_ISA_MOTHERBOARD_IO 0x0001
 #define EFI_PCI_IO_ATTRIBUTE_ISA_IO 0x0002
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 6d9e230..a02deea 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -218,8 +218,6 @@
 extern ssize_t elv_iosched_show(struct request_queue *, char *);
 extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
 
-extern int elevator_init(struct request_queue *, char *);
-extern void elevator_exit(struct request_queue *, struct elevator_queue *);
 extern bool elv_bio_merge_ok(struct request *, struct bio *);
 extern struct elevator_queue *elevator_alloc(struct request_queue *,
 					struct elevator_type *);
diff --git a/include/linux/export.h b/include/linux/export.h
index 1a1dfdb..b768d6d 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -10,14 +10,8 @@
  * hackers place grumpy comments in header files.
  */
 
-/* Some toolchains use a `_' prefix for all user symbols. */
-#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
-#define __VMLINUX_SYMBOL(x) _##x
-#define __VMLINUX_SYMBOL_STR(x) "_" #x
-#else
 #define __VMLINUX_SYMBOL(x) x
 #define __VMLINUX_SYMBOL_STR(x) #x
-#endif
 
 /* Indirect, so macros are expanded before pasting. */
 #define VMLINUX_SYMBOL(x) __VMLINUX_SYMBOL(x)
@@ -46,14 +40,14 @@
 #if defined(CONFIG_MODULE_REL_CRCS)
 #define __CRC_SYMBOL(sym, sec)						\
 	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\
-	    "	.weak	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
-	    "	.long	" VMLINUX_SYMBOL_STR(__crc_##sym) " - .	\n"	\
+	    "	.weak	__crc_" #sym "				\n"	\
+	    "	.long	__crc_" #sym " - .			\n"	\
 	    "	.previous					\n");
 #else
 #define __CRC_SYMBOL(sym, sec)						\
 	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\
-	    "	.weak	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
-	    "	.long	" VMLINUX_SYMBOL_STR(__crc_##sym) "	\n"	\
+	    "	.weak	__crc_" #sym "				\n"	\
+	    "	.long	__crc_" #sym "				\n"	\
 	    "	.previous					\n");
 #endif
 #else
@@ -66,7 +60,7 @@
 	__CRC_SYMBOL(sym, sec)						\
 	static const char __kstrtab_##sym[]				\
 	__attribute__((section("__ksymtab_strings"), aligned(1)))	\
-	= VMLINUX_SYMBOL_STR(sym);					\
+	= #sym;								\
 	static const struct kernel_symbol __ksymtab_##sym		\
 	__used								\
 	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index 4105041..2dd566c 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -42,6 +42,8 @@
 #if defined(CONFIG_FW_LOADER) || (defined(CONFIG_FW_LOADER_MODULE) && defined(MODULE))
 int request_firmware(const struct firmware **fw, const char *name,
 		     struct device *device);
+int firmware_request_nowarn(const struct firmware **fw, const char *name,
+			    struct device *device);
 int request_firmware_nowait(
 	struct module *module, bool uevent,
 	const char *name, struct device *device, gfp_t gfp, void *context,
@@ -59,6 +61,14 @@
 {
 	return -EINVAL;
 }
+
+static inline int firmware_request_nowarn(const struct firmware **fw,
+					  const char *name,
+					  struct device *device)
+{
+	return -EINVAL;
+}
+
 static inline int request_firmware_nowait(
 	struct module *module, bool uevent,
 	const char *name, struct device *device, gfp_t gfp, void *context,
diff --git a/include/linux/fpga/altera-pr-ip-core.h b/include/linux/fpga/altera-pr-ip-core.h
index 3810a90..7d46647 100644
--- a/include/linux/fpga/altera-pr-ip-core.h
+++ b/include/linux/fpga/altera-pr-ip-core.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Driver for Altera Partial Reconfiguration IP Core
  *
@@ -5,18 +6,6 @@
  *
  * Based on socfpga-a10.c Copyright (C) 2015-2016 Altera Corporation
  *  by Alan Tull <atull@opensource.altera.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _ALT_PR_IP_CORE_H
diff --git a/include/linux/fpga/fpga-bridge.h b/include/linux/fpga/fpga-bridge.h
index 3694821..ce550fc 100644
--- a/include/linux/fpga/fpga-bridge.h
+++ b/include/linux/fpga/fpga-bridge.h
@@ -62,8 +62,11 @@
 			       struct fpga_image_info *info,
 			       struct list_head *bridge_list);
 
-int fpga_bridge_register(struct device *dev, const char *name,
-			 const struct fpga_bridge_ops *br_ops, void *priv);
-void fpga_bridge_unregister(struct device *dev);
+struct fpga_bridge *fpga_bridge_create(struct device *dev, const char *name,
+				       const struct fpga_bridge_ops *br_ops,
+				       void *priv);
+void fpga_bridge_free(struct fpga_bridge *br);
+int fpga_bridge_register(struct fpga_bridge *br);
+void fpga_bridge_unregister(struct fpga_bridge *br);
 
 #endif /* _LINUX_FPGA_BRIDGE_H */
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 3c6de23..eec7c24 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -1,20 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * FPGA Framework
  *
  *  Copyright (C) 2013-2016 Altera Corporation
  *  Copyright (C) 2017 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #ifndef _LINUX_FPGA_MGR_H
 #define _LINUX_FPGA_MGR_H
@@ -170,9 +159,11 @@
 
 void fpga_mgr_put(struct fpga_manager *mgr);
 
-int fpga_mgr_register(struct device *dev, const char *name,
-		      const struct fpga_manager_ops *mops, void *priv);
-
-void fpga_mgr_unregister(struct device *dev);
+struct fpga_manager *fpga_mgr_create(struct device *dev, const char *name,
+				     const struct fpga_manager_ops *mops,
+				     void *priv);
+void fpga_mgr_free(struct fpga_manager *mgr);
+int fpga_mgr_register(struct fpga_manager *mgr);
+void fpga_mgr_unregister(struct fpga_manager *mgr);
 
 #endif /*_LINUX_FPGA_MGR_H */
diff --git a/include/linux/fpga/fpga-region.h b/include/linux/fpga/fpga-region.h
index b652031..d7071cd 100644
--- a/include/linux/fpga/fpga-region.h
+++ b/include/linux/fpga/fpga-region.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
 #ifndef _FPGA_REGION_H
 #define _FPGA_REGION_H
 
@@ -14,7 +16,6 @@
  * @info: FPGA image info
  * @priv: private data
  * @get_bridges: optional function to get bridges to a list
- * @groups: optional attribute groups.
  */
 struct fpga_region {
 	struct device dev;
@@ -24,7 +25,6 @@
 	struct fpga_image_info *info;
 	void *priv;
 	int (*get_bridges)(struct fpga_region *region);
-	const struct attribute_group **groups;
 };
 
 #define to_fpga_region(d) container_of(d, struct fpga_region, dev)
@@ -34,7 +34,12 @@
 	int (*match)(struct device *, const void *));
 
 int fpga_region_program_fpga(struct fpga_region *region);
-int fpga_region_register(struct device *dev, struct fpga_region *region);
-int fpga_region_unregister(struct fpga_region *region);
+
+struct fpga_region
+*fpga_region_create(struct device *dev, struct fpga_manager *mgr,
+		    int (*get_bridges)(struct fpga_region *));
+void fpga_region_free(struct fpga_region *region);
+int fpga_region_register(struct fpga_region *region);
+void fpga_region_unregister(struct fpga_region *region);
 
 #endif /* _FPGA_REGION_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 760d8da..7ef7193 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -94,7 +94,7 @@
 
 /*
  * flags in file.f_mode.  Note that FMODE_READ and FMODE_WRITE must correspond
- * to O_WRONLY and O_RDWR via the strange trick in __dentry_open()
+ * to O_WRONLY and O_RDWR via the strange trick in do_dentry_open()
  */
 
 /* file is open for reading */
@@ -1250,7 +1250,7 @@
 }
 
 struct fasync_struct {
-	spinlock_t		fa_lock;
+	rwlock_t		fa_lock;
 	int			magic;
 	int			fa_fd;
 	struct fasync_struct	*fa_next; /* singly linked list */
@@ -1364,9 +1364,9 @@
 	void                    *s_security;
 #endif
 	const struct xattr_handler **s_xattr;
-
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
 	const struct fscrypt_operations	*s_cop;
-
+#endif
 	struct hlist_bl_head	s_roots;	/* alternate root dentries for NFS */
 	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
 	struct block_device	*s_bdev;
@@ -1597,6 +1597,11 @@
 	__sb_start_write(sb, SB_FREEZE_FS, true);
 }
 
+static inline int sb_start_intwrite_trylock(struct super_block *sb)
+{
+	return __sb_start_write(sb, SB_FREEZE_FS, false);
+}
+
 
 extern bool inode_owner_or_capable(const struct inode *inode);
 
@@ -1711,6 +1716,8 @@
 	int (*iterate) (struct file *, struct dir_context *);
 	int (*iterate_shared) (struct file *, struct dir_context *);
 	__poll_t (*poll) (struct file *, struct poll_table_struct *);
+	struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
+	__poll_t (*poll_mask) (struct file *, __poll_t);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
@@ -2570,7 +2577,7 @@
 
 #ifdef CONFIG_BLOCK
 extern void check_disk_size_change(struct gendisk *disk,
-				   struct block_device *bdev);
+		struct block_device *bdev, bool verbose);
 extern int revalidate_disk(struct gendisk *);
 extern int check_disk_change(struct block_device *);
 extern int __invalidate_device(struct block_device *, bool);
diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h
index 44b50c0..25b6492 100644
--- a/include/linux/fscrypt_notsupp.h
+++ b/include/linux/fscrypt_notsupp.h
@@ -64,16 +64,6 @@
 	return;
 }
 
-static inline void fscrypt_set_d_op(struct dentry *dentry)
-{
-	return;
-}
-
-static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
-{
-	return;
-}
-
 /* policy.c */
 static inline int fscrypt_ioctl_set_policy(struct file *filp,
 					   const void __user *arg)
diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h
index 477a7a6..5080cb1 100644
--- a/include/linux/fscrypt_supp.h
+++ b/include/linux/fscrypt_supp.h
@@ -29,7 +29,7 @@
 	int (*set_context)(struct inode *, const void *, size_t, void *);
 	bool (*dummy_context)(struct inode *);
 	bool (*empty_dir)(struct inode *);
-	unsigned (*max_namelen)(struct inode *);
+	unsigned int max_namelen;
 };
 
 struct fscrypt_ctx {
@@ -74,20 +74,6 @@
 
 extern void fscrypt_restore_control_page(struct page *);
 
-extern const struct dentry_operations fscrypt_d_ops;
-
-static inline void fscrypt_set_d_op(struct dentry *dentry)
-{
-	d_set_d_op(dentry, &fscrypt_d_ops);
-}
-
-static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
-{
-	spin_lock(&dentry->d_lock);
-	dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY;
-	spin_unlock(&dentry->d_lock);
-}
-
 /* policy.c */
 extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
 extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 3998892..2f1327c 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -16,7 +16,7 @@
 /*
  * Heterogeneous Memory Management (HMM)
  *
- * See Documentation/vm/hmm.txt for reasons and overview of what HMM is and it
+ * See Documentation/vm/hmm.rst for reasons and overview of what HMM is and it
  * is for. Here we focus on the HMM API description, with some explanation of
  * the underlying implementation.
  *
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index ddf7f9c..89110d8 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -192,13 +192,6 @@
 	unsigned long shift;
 };
 
-struct host1x_waitchk {
-	struct host1x_bo *bo;
-	u32 offset;
-	u32 syncpt_id;
-	u32 thresh;
-};
-
 struct host1x_job {
 	/* When refcount goes to zero, job can be freed */
 	struct kref ref;
@@ -209,19 +202,15 @@
 	/* Channel where job is submitted to */
 	struct host1x_channel *channel;
 
-	u32 client;
+	/* client where the job originated */
+	struct host1x_client *client;
 
 	/* Gathers and their memory */
 	struct host1x_job_gather *gathers;
 	unsigned int num_gathers;
 
-	/* Wait checks to be processed at submit time */
-	struct host1x_waitchk *waitchk;
-	unsigned int num_waitchk;
-	u32 waitchk_mask;
-
 	/* Array of handles to be pinned & unpinned */
-	struct host1x_reloc *relocarray;
+	struct host1x_reloc *relocs;
 	unsigned int num_relocs;
 	struct host1x_job_unpin_data *unpins;
 	unsigned int num_unpins;
@@ -261,10 +250,9 @@
 };
 
 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
-				    u32 num_cmdbufs, u32 num_relocs,
-				    u32 num_waitchks);
-void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *mem_id,
-			   u32 words, u32 offset);
+				    u32 num_cmdbufs, u32 num_relocs);
+void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
+			   unsigned int words, unsigned int offset);
 struct host1x_job *host1x_job_get(struct host1x_job *job);
 void host1x_job_put(struct host1x_job *job);
 int host1x_job_pin(struct host1x_job *job, struct device *dev);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 192ed8f..11b5612 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -163,6 +163,7 @@
  * 2 . 4  (Windows 8)
  * 3 . 0  (Windows 8 R2)
  * 4 . 0  (Windows 10)
+ * 5 . 0  (Newer Windows 10)
  */
 
 #define VERSION_WS2008  ((0 << 16) | (13))
@@ -170,10 +171,11 @@
 #define VERSION_WIN8    ((2 << 16) | (4))
 #define VERSION_WIN8_1    ((3 << 16) | (0))
 #define VERSION_WIN10	((4 << 16) | (0))
+#define VERSION_WIN10_V5 ((5 << 16) | (0))
 
 #define VERSION_INVAL -1
 
-#define VERSION_CURRENT VERSION_WIN10
+#define VERSION_CURRENT VERSION_WIN10_V5
 
 /* Make maximum size of pipe payload of 16K */
 #define MAX_PIPE_DATA_PAYLOAD		(sizeof(u8) * 16384)
@@ -570,7 +572,14 @@
 	struct vmbus_channel_message_header header;
 	u32 vmbus_version_requested;
 	u32 target_vcpu; /* The VCPU the host should respond to */
-	u64 interrupt_page;
+	union {
+		u64 interrupt_page;
+		struct {
+			u8	msg_sint;
+			u8	padding1[3];
+			u32	padding2;
+		};
+	};
 	u64 monitor_page1;
 	u64 monitor_page2;
 } __packed;
@@ -585,6 +594,19 @@
 struct vmbus_channel_version_response {
 	struct vmbus_channel_message_header header;
 	u8 version_supported;
+
+	u8 connection_state;
+	u16 padding;
+
+	/*
+	 * On new hosts that support VMBus protocol 5.0, we must use
+	 * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message,
+	 * and for subsequent messages, we must use the Message Connection ID
+	 * field in the host-returned Version Response Message.
+	 *
+	 * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1).
+	 */
+	u32 msg_conn_id;
 } __packed;
 
 enum vmbus_channel_state {
diff --git a/include/linux/ide.h b/include/linux/ide.h
index ca9d34f..c74b032 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -961,7 +961,7 @@
 typedef struct {
 	const char	*name;
 	umode_t		mode;
-	const struct file_operations *proc_fops;
+	int (*show)(struct seq_file *, void *);
 } ide_proc_entry_t;
 
 void proc_ide_create(void);
@@ -973,8 +973,8 @@
 void ide_proc_register_driver(ide_drive_t *, struct ide_driver *);
 void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *);
 
-extern const struct file_operations ide_capacity_proc_fops;
-extern const struct file_operations ide_geometry_proc_fops;
+int ide_capacity_proc_show(struct seq_file *m, void *v);
+int ide_geometry_proc_show(struct seq_file *m, void *v);
 #else
 static inline void proc_ide_create(void) { ; }
 static inline void proc_ide_destroy(void) { ; }
@@ -1508,8 +1508,6 @@
 	hwif->hwif_data = data;
 }
 
-extern void ide_toggle_bounce(ide_drive_t *drive, int on);
-
 u64 ide_get_lba_addr(struct ide_cmd *, int);
 u8 ide_dump_status(ide_drive_t *, const char *, u8);
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5426627..eeceac3 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -432,11 +432,18 @@
 #define force_irqthreads	(0)
 #endif
 
-#ifndef __ARCH_SET_SOFTIRQ_PENDING
-#define set_softirq_pending(x) (local_softirq_pending() = (x))
-#define or_softirq_pending(x)  (local_softirq_pending() |= (x))
+#ifndef local_softirq_pending
+
+#ifndef local_softirq_pending_ref
+#define local_softirq_pending_ref irq_stat.__softirq_pending
 #endif
 
+#define local_softirq_pending()	(__this_cpu_read(local_softirq_pending_ref))
+#define set_softirq_pending(x)	(__this_cpu_write(local_softirq_pending_ref, (x)))
+#define or_softirq_pending(x)	(__this_cpu_or(local_softirq_pending_ref, (x)))
+
+#endif /* local_softirq_pending */
+
 /* Some architectures might implement lazy enabling/disabling of
  * interrupts. In some cases, such as stop_machine, we might want
  * to ensure that after a local_irq_disable(), interrupts have
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 19a07de..a044a82 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -4,6 +4,7 @@
 
 #include <linux/types.h>
 
+struct address_space;
 struct fiemap_extent_info;
 struct inode;
 struct iov_iter;
@@ -18,6 +19,7 @@
 #define IOMAP_DELALLOC	0x02	/* delayed allocation blocks */
 #define IOMAP_MAPPED	0x03	/* blocks allocated at @addr */
 #define IOMAP_UNWRITTEN	0x04	/* blocks allocated at @addr in unwritten state */
+#define IOMAP_INLINE	0x05	/* data inline in the inode */
 
 /*
  * Flags for all iomap mappings:
@@ -26,15 +28,19 @@
  * written data and requires fdatasync to commit them to persistent storage.
  */
 #define IOMAP_F_NEW		0x01	/* blocks have been newly allocated */
-#define IOMAP_F_BOUNDARY	0x02	/* mapping ends at metadata boundary */
-#define IOMAP_F_DIRTY		0x04	/* uncommitted metadata */
+#define IOMAP_F_DIRTY		0x02	/* uncommitted metadata */
 
 /*
  * Flags that only need to be reported for IOMAP_REPORT requests:
  */
 #define IOMAP_F_MERGED		0x10	/* contains multiple blocks/extents */
 #define IOMAP_F_SHARED		0x20	/* block shared with another file */
-#define IOMAP_F_DATA_INLINE	0x40	/* data inline in the inode */
+
+/*
+ * Flags from 0x1000 up are for file system specific usage:
+ */
+#define IOMAP_F_PRIVATE		0x1000
+
 
 /*
  * Magic value for addr:
@@ -59,7 +65,7 @@
 #define IOMAP_REPORT		(1 << 2) /* report extent status, e.g. FIEMAP */
 #define IOMAP_FAULT		(1 << 3) /* mapping for page fault */
 #define IOMAP_DIRECT		(1 << 4) /* direct I/O */
-#define IOMAP_NOWAIT		(1 << 5) /* Don't wait for writeback */
+#define IOMAP_NOWAIT		(1 << 5) /* do not block */
 
 struct iomap_ops {
 	/*
@@ -95,6 +101,8 @@
 		const struct iomap_ops *ops);
 loff_t iomap_seek_data(struct inode *inode, loff_t offset,
 		const struct iomap_ops *ops);
+sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
+		const struct iomap_ops *ops);
 
 /*
  * Flags for direct I/O ->end_io:
@@ -106,4 +114,15 @@
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
 
+#ifdef CONFIG_SWAP
+struct file;
+struct swap_info_struct;
+
+int iomap_swapfile_activate(struct swap_info_struct *sis,
+		struct file *swap_file, sector_t *pagespan,
+		const struct iomap_ops *ops);
+#else
+# define iomap_swapfile_activate(sis, swapfile, pagespan, ops)	(-EIO)
+#endif /* CONFIG_SWAP */
+
 #endif /* LINUX_IOMAP_H */
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index cb9a924..70d01ed 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -2,6 +2,7 @@
 #ifndef _LINUX_IOMMU_HELPER_H
 #define _LINUX_IOMMU_HELPER_H
 
+#include <linux/bug.h>
 #include <linux/kernel.h>
 
 static inline unsigned long iommu_device_max_index(unsigned long size,
@@ -14,9 +15,15 @@
 		return size;
 }
 
-extern int iommu_is_span_boundary(unsigned int index, unsigned int nr,
-				  unsigned long shift,
-				  unsigned long boundary_size);
+static inline int iommu_is_span_boundary(unsigned int index, unsigned int nr,
+		unsigned long shift, unsigned long boundary_size)
+{
+	BUG_ON(!is_power_of_2(boundary_size));
+
+	shift = (shift + index) & (boundary_size - 1);
+	return shift + nr > boundary_size;
+}
+
 extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 				      unsigned long start, unsigned int nr,
 				      unsigned long shift,
diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 8b0626c..41f5c08 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -23,8 +23,10 @@
 struct module;
 struct device;
 
-/* Opaque type for a IPMI message user.  One of these is needed to
-   send and receive messages. */
+/*
+ * Opaque type for a IPMI message user.  One of these is needed to
+ * send and receive messages.
+ */
 typedef struct ipmi_user *ipmi_user_t;
 
 /*
@@ -37,28 +39,36 @@
 struct ipmi_recv_msg {
 	struct list_head link;
 
-	/* The type of message as defined in the "Receive Types"
-	   defines above. */
+	/*
+	 * The type of message as defined in the "Receive Types"
+	 * defines above.
+	 */
 	int              recv_type;
 
-	ipmi_user_t      user;
+	struct ipmi_user *user;
 	struct ipmi_addr addr;
 	long             msgid;
 	struct kernel_ipmi_msg  msg;
 
-	/* The user_msg_data is the data supplied when a message was
-	   sent, if this is a response to a sent message.  If this is
-	   not a response to a sent message, then user_msg_data will
-	   be NULL.  If the user above is NULL, then this will be the
-	   intf. */
+	/*
+	 * The user_msg_data is the data supplied when a message was
+	 * sent, if this is a response to a sent message.  If this is
+	 * not a response to a sent message, then user_msg_data will
+	 * be NULL.  If the user above is NULL, then this will be the
+	 * intf.
+	 */
 	void             *user_msg_data;
 
-	/* Call this when done with the message.  It will presumably free
-	   the message and do any other necessary cleanup. */
+	/*
+	 * Call this when done with the message.  It will presumably free
+	 * the message and do any other necessary cleanup.
+	 */
 	void (*done)(struct ipmi_recv_msg *msg);
 
-	/* Place-holder for the data, don't make any assumptions about
-	   the size or existence of this, since it may change. */
+	/*
+	 * Place-holder for the data, don't make any assumptions about
+	 * the size or existence of this, since it may change.
+	 */
 	unsigned char   msg_data[IPMI_MAX_MSG_LENGTH];
 };
 
@@ -66,54 +76,77 @@
 void ipmi_free_recv_msg(struct ipmi_recv_msg *msg);
 
 struct ipmi_user_hndl {
-	/* Routine type to call when a message needs to be routed to
-	   the upper layer.  This will be called with some locks held,
-	   the only IPMI routines that can be called are ipmi_request
-	   and the alloc/free operations.  The handler_data is the
-	   variable supplied when the receive handler was registered. */
+	/*
+	 * Routine type to call when a message needs to be routed to
+	 * the upper layer.  This will be called with some locks held,
+	 * the only IPMI routines that can be called are ipmi_request
+	 * and the alloc/free operations.  The handler_data is the
+	 * variable supplied when the receive handler was registered.
+	 */
 	void (*ipmi_recv_hndl)(struct ipmi_recv_msg *msg,
 			       void                 *user_msg_data);
 
-	/* Called when the interface detects a watchdog pre-timeout.  If
-	   this is NULL, it will be ignored for the user. */
+	/*
+	 * Called when the interface detects a watchdog pre-timeout.  If
+	 * this is NULL, it will be ignored for the user.
+	 */
 	void (*ipmi_watchdog_pretimeout)(void *handler_data);
+
+	/*
+	 * If not NULL, called at panic time after the interface has
+	 * been set up to handle run to completion.
+	 */
+	void (*ipmi_panic_handler)(void *handler_data);
+
+	/*
+	 * Called when the interface has been removed.  After this returns
+	 * the user handle will be invalid.  The interface may or may
+	 * not be usable when this is called, but it will return errors
+	 * if it is not usable.
+	 */
+	void (*shutdown)(void *handler_data);
 };
 
 /* Create a new user of the IPMI layer on the given interface number. */
 int ipmi_create_user(unsigned int          if_num,
 		     const struct ipmi_user_hndl *handler,
 		     void                  *handler_data,
-		     ipmi_user_t           *user);
+		     struct ipmi_user      **user);
 
-/* Destroy the given user of the IPMI layer.  Note that after this
-   function returns, the system is guaranteed to not call any
-   callbacks for the user.  Thus as long as you destroy all the users
-   before you unload a module, you will be safe.  And if you destroy
-   the users before you destroy the callback structures, it should be
-   safe, too. */
-int ipmi_destroy_user(ipmi_user_t user);
+/*
+ * Destroy the given user of the IPMI layer.  Note that after this
+ * function returns, the system is guaranteed to not call any
+ * callbacks for the user.  Thus as long as you destroy all the users
+ * before you unload a module, you will be safe.  And if you destroy
+ * the users before you destroy the callback structures, it should be
+ * safe, too.
+ */
+int ipmi_destroy_user(struct ipmi_user *user);
 
 /* Get the IPMI version of the BMC we are talking to. */
-int ipmi_get_version(ipmi_user_t   user,
+int ipmi_get_version(struct ipmi_user *user,
 		     unsigned char *major,
 		     unsigned char *minor);
 
-/* Set and get the slave address and LUN that we will use for our
-   source messages.  Note that this affects the interface, not just
-   this user, so it will affect all users of this interface.  This is
-   so some initialization code can come in and do the OEM-specific
-   things it takes to determine your address (if not the BMC) and set
-   it for everyone else.  Note that each channel can have its own address. */
-int ipmi_set_my_address(ipmi_user_t   user,
+/*
+ * Set and get the slave address and LUN that we will use for our
+ * source messages.  Note that this affects the interface, not just
+ * this user, so it will affect all users of this interface.  This is
+ * so some initialization code can come in and do the OEM-specific
+ * things it takes to determine your address (if not the BMC) and set
+ * it for everyone else.  Note that each channel can have its own
+ * address.
+ */
+int ipmi_set_my_address(struct ipmi_user *user,
 			unsigned int  channel,
 			unsigned char address);
-int ipmi_get_my_address(ipmi_user_t   user,
+int ipmi_get_my_address(struct ipmi_user *user,
 			unsigned int  channel,
 			unsigned char *address);
-int ipmi_set_my_LUN(ipmi_user_t   user,
+int ipmi_set_my_LUN(struct ipmi_user *user,
 		    unsigned int  channel,
 		    unsigned char LUN);
-int ipmi_get_my_LUN(ipmi_user_t   user,
+int ipmi_get_my_LUN(struct ipmi_user *user,
 		    unsigned int  channel,
 		    unsigned char *LUN);
 
@@ -130,7 +163,7 @@
  * it makes no sense to do it here.  However, this can be used if you
  * have unusual requirements.
  */
-int ipmi_request_settime(ipmi_user_t      user,
+int ipmi_request_settime(struct ipmi_user *user,
 			 struct ipmi_addr *addr,
 			 long             msgid,
 			 struct kernel_ipmi_msg  *msg,
@@ -148,7 +181,7 @@
  * change as the system changes, so don't use it unless you REALLY
  * have to.
  */
-int ipmi_request_supply_msgs(ipmi_user_t          user,
+int ipmi_request_supply_msgs(struct ipmi_user     *user,
 			     struct ipmi_addr     *addr,
 			     long                 msgid,
 			     struct kernel_ipmi_msg *msg,
@@ -164,7 +197,7 @@
  * way.  This is useful if you need to spin waiting for something to
  * happen in the IPMI driver.
  */
-void ipmi_poll_interface(ipmi_user_t user);
+void ipmi_poll_interface(struct ipmi_user *user);
 
 /*
  * When commands come in to the SMS, the user can register to receive
@@ -175,11 +208,11 @@
  * error.  Channels are specified as a bitfield, use IPMI_CHAN_ALL to
  * mean all channels.
  */
-int ipmi_register_for_cmd(ipmi_user_t   user,
+int ipmi_register_for_cmd(struct ipmi_user *user,
 			  unsigned char netfn,
 			  unsigned char cmd,
 			  unsigned int  chans);
-int ipmi_unregister_for_cmd(ipmi_user_t   user,
+int ipmi_unregister_for_cmd(struct ipmi_user *user,
 			    unsigned char netfn,
 			    unsigned char cmd,
 			    unsigned int  chans);
@@ -210,8 +243,8 @@
  *
  * See the IPMI_MAINTENANCE_MODE_xxx defines for what the mode means.
  */
-int ipmi_get_maintenance_mode(ipmi_user_t user);
-int ipmi_set_maintenance_mode(ipmi_user_t user, int mode);
+int ipmi_get_maintenance_mode(struct ipmi_user *user);
+int ipmi_set_maintenance_mode(struct ipmi_user *user, int mode);
 
 /*
  * When the user is created, it will not receive IPMI events by
@@ -219,7 +252,7 @@
  * The first user that sets this to TRUE will receive all events that
  * have been queued while no one was waiting for events.
  */
-int ipmi_set_gets_events(ipmi_user_t user, bool val);
+int ipmi_set_gets_events(struct ipmi_user *user, bool val);
 
 /*
  * Called when a new SMI is registered.  This will also be called on
@@ -229,14 +262,18 @@
 struct ipmi_smi_watcher {
 	struct list_head link;
 
-	/* You must set the owner to the current module, if you are in
-	   a module (generally just set it to "THIS_MODULE"). */
+	/*
+	 * You must set the owner to the current module, if you are in
+	 * a module (generally just set it to "THIS_MODULE").
+	 */
 	struct module *owner;
 
-	/* These two are called with read locks held for the interface
-	   the watcher list.  So you can add and remove users from the
-	   IPMI interface, send messages, etc., but you cannot add
-	   or remove SMI watchers or SMI interfaces. */
+	/*
+	 * These two are called with read locks held for the interface
+	 * the watcher list.  So you can add and remove users from the
+	 * IPMI interface, send messages, etc., but you cannot add
+	 * or remove SMI watchers or SMI interfaces.
+	 */
 	void (*new_smi)(int if_num, struct device *dev);
 	void (*smi_gone)(int if_num);
 };
@@ -244,8 +281,10 @@
 int ipmi_smi_watcher_register(struct ipmi_smi_watcher *watcher);
 int ipmi_smi_watcher_unregister(struct ipmi_smi_watcher *watcher);
 
-/* The following are various helper functions for dealing with IPMI
-   addresses. */
+/*
+ * The following are various helper functions for dealing with IPMI
+ * addresses.
+ */
 
 /* Return the maximum length of an IPMI address given it's type. */
 unsigned int ipmi_addr_length(int addr_type);
@@ -291,7 +330,7 @@
 	union ipmi_smi_info_union addr_info;
 };
 
-/* This is to get the private info of ipmi_smi_t */
+/* This is to get the private info of struct ipmi_smi */
 extern int ipmi_get_smi_info(int if_num, struct ipmi_smi_info *data);
 
 #endif /* __LINUX_IPMI_H */
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index af457b5..7d5fd38 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -22,8 +22,10 @@
 
 struct device;
 
-/* This files describes the interface for IPMI system management interface
-   drivers to bind into the IPMI message handler. */
+/*
+ * This files describes the interface for IPMI system management interface
+ * drivers to bind into the IPMI message handler.
+ */
 
 /* Structure for the low-level drivers. */
 typedef struct ipmi_smi *ipmi_smi_t;
@@ -61,12 +63,20 @@
 struct ipmi_smi_handlers {
 	struct module *owner;
 
-	/* The low-level interface cannot start sending messages to
-	   the upper layer until this function is called.  This may
-	   not be NULL, the lower layer must take the interface from
-	   this call. */
-	int (*start_processing)(void       *send_info,
-				ipmi_smi_t new_intf);
+	/*
+	 * The low-level interface cannot start sending messages to
+	 * the upper layer until this function is called.  This may
+	 * not be NULL, the lower layer must take the interface from
+	 * this call.
+	 */
+	int (*start_processing)(void            *send_info,
+				struct ipmi_smi *new_intf);
+
+	/*
+	 * When called, the low-level interface should disable all
+	 * processing, it should be complete shut down when it returns.
+	 */
+	void (*shutdown)(void *send_info);
 
 	/*
 	 * Get the detailed private info of the low level interface and store
@@ -75,25 +85,31 @@
 	 */
 	int (*get_smi_info)(void *send_info, struct ipmi_smi_info *data);
 
-	/* Called to enqueue an SMI message to be sent.  This
-	   operation is not allowed to fail.  If an error occurs, it
-	   should report back the error in a received message.  It may
-	   do this in the current call context, since no write locks
-	   are held when this is run.  Message are delivered one at
-	   a time by the message handler, a new message will not be
-	   delivered until the previous message is returned. */
+	/*
+	 * Called to enqueue an SMI message to be sent.  This
+	 * operation is not allowed to fail.  If an error occurs, it
+	 * should report back the error in a received message.  It may
+	 * do this in the current call context, since no write locks
+	 * are held when this is run.  Message are delivered one at
+	 * a time by the message handler, a new message will not be
+	 * delivered until the previous message is returned.
+	 */
 	void (*sender)(void                *send_info,
 		       struct ipmi_smi_msg *msg);
 
-	/* Called by the upper layer to request that we try to get
-	   events from the BMC we are attached to. */
+	/*
+	 * Called by the upper layer to request that we try to get
+	 * events from the BMC we are attached to.
+	 */
 	void (*request_events)(void *send_info);
 
-	/* Called by the upper layer when some user requires that the
-	   interface watch for events, received messages, watchdog
-	   pretimeouts, or not.  Used by the SMI to know if it should
-	   watch for these.  This may be NULL if the SMI does not
-	   implement it. */
+	/*
+	 * Called by the upper layer when some user requires that the
+	 * interface watch for events, received messages, watchdog
+	 * pretimeouts, or not.  Used by the SMI to know if it should
+	 * watch for these.  This may be NULL if the SMI does not
+	 * implement it.
+	 */
 	void (*set_need_watch)(void *send_info, bool enable);
 
 	/*
@@ -101,30 +117,29 @@
 	 */
 	void (*flush_messages)(void *send_info);
 
-	/* Called when the interface should go into "run to
-	   completion" mode.  If this call sets the value to true, the
-	   interface should make sure that all messages are flushed
-	   out and that none are pending, and any new requests are run
-	   to completion immediately. */
+	/*
+	 * Called when the interface should go into "run to
+	 * completion" mode.  If this call sets the value to true, the
+	 * interface should make sure that all messages are flushed
+	 * out and that none are pending, and any new requests are run
+	 * to completion immediately.
+	 */
 	void (*set_run_to_completion)(void *send_info, bool run_to_completion);
 
-	/* Called to poll for work to do.  This is so upper layers can
-	   poll for operations during things like crash dumps. */
+	/*
+	 * Called to poll for work to do.  This is so upper layers can
+	 * poll for operations during things like crash dumps.
+	 */
 	void (*poll)(void *send_info);
 
-	/* Enable/disable firmware maintenance mode.  Note that this
-	   is *not* the modes defined, this is simply an on/off
-	   setting.  The message handler does the mode handling.  Note
-	   that this is called from interrupt context, so it cannot
-	   block. */
+	/*
+	 * Enable/disable firmware maintenance mode.  Note that this
+	 * is *not* the modes defined, this is simply an on/off
+	 * setting.  The message handler does the mode handling.  Note
+	 * that this is called from interrupt context, so it cannot
+	 * block.
+	 */
 	void (*set_maintenance_mode)(void *send_info, bool enable);
-
-	/* Tell the handler that we are using it/not using it.  The
-	   message handler get the modules that this handler belongs
-	   to; this function lets the SMI claim any modules that it
-	   uses.  These may be NULL if this is not required. */
-	int (*inc_usecount)(void *send_info);
-	void (*dec_usecount)(void *send_info);
 };
 
 struct ipmi_device_id {
@@ -143,7 +158,8 @@
 #define ipmi_version_major(v) ((v)->ipmi_version & 0xf)
 #define ipmi_version_minor(v) ((v)->ipmi_version >> 4)
 
-/* Take a pointer to an IPMI response and extract device id information from
+/*
+ * Take a pointer to an IPMI response and extract device id information from
  * it. @netfn is in the IPMI_NETFN_ format, so may need to be shifted from
  * a SI response.
  */
@@ -187,12 +203,14 @@
 	return 0;
 }
 
-/* Add a low-level interface to the IPMI driver.  Note that if the
-   interface doesn't know its slave address, it should pass in zero.
-   The low-level interface should not deliver any messages to the
-   upper layer until the start_processing() function in the handlers
-   is called, and the lower layer must get the interface from that
-   call. */
+/*
+ * Add a low-level interface to the IPMI driver.  Note that if the
+ * interface doesn't know its slave address, it should pass in zero.
+ * The low-level interface should not deliver any messages to the
+ * upper layer until the start_processing() function in the handlers
+ * is called, and the lower layer must get the interface from that
+ * call.
+ */
 int ipmi_register_smi(const struct ipmi_smi_handlers *handlers,
 		      void                     *send_info,
 		      struct device            *dev,
@@ -202,7 +220,7 @@
  * Remove a low-level interface from the IPMI driver.  This will
  * return an error if the interface is still in use by a user.
  */
-int ipmi_unregister_smi(ipmi_smi_t intf);
+void ipmi_unregister_smi(struct ipmi_smi *intf);
 
 /*
  * The lower layer reports received messages through this interface.
@@ -210,11 +228,11 @@
  * the lower layer gets an error sending a message, it should format
  * an error response in the message response.
  */
-void ipmi_smi_msg_received(ipmi_smi_t          intf,
+void ipmi_smi_msg_received(struct ipmi_smi     *intf,
 			   struct ipmi_smi_msg *msg);
 
 /* The lower layer received a watchdog pre-timeout on interface. */
-void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf);
+void ipmi_smi_watchdog_pretimeout(struct ipmi_smi *intf);
 
 struct ipmi_smi_msg *ipmi_alloc_smi_msg(void);
 static inline void ipmi_free_smi_msg(struct ipmi_smi_msg *msg)
@@ -222,13 +240,4 @@
 	msg->done(msg);
 }
 
-#ifdef CONFIG_IPMI_PROC_INTERFACE
-/* Allow the lower layer to add things to the proc filesystem
-   directory for this interface.  Note that the entry will
-   automatically be dstroyed when the interface is destroyed. */
-int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name,
-			    const struct file_operations *proc_ops,
-			    void *data);
-#endif
-
 #endif /* __LINUX_IPMI_SMI_H */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 65916a3..b206708 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -512,6 +512,7 @@
 	IRQCHIP_SKIP_SET_WAKE		= (1 <<  4),
 	IRQCHIP_ONESHOT_SAFE		= (1 <<  5),
 	IRQCHIP_EOI_THREADED		= (1 <<  6),
+	IRQCHIP_SUPPORTS_LEVEL_MSI	= (1 <<  7),
 };
 
 #include <linux/irqdesc.h>
diff --git a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h
index 4954948..6e8895c 100644
--- a/include/linux/irq_cpustat.h
+++ b/include/linux/irq_cpustat.h
@@ -18,15 +18,11 @@
  */
 
 #ifndef __ARCH_IRQ_STAT
-extern irq_cpustat_t irq_stat[];		/* defined in asm/hardirq.h */
-#define __IRQ_STAT(cpu, member)	(irq_stat[cpu].member)
+DECLARE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);	/* defined in asm/hardirq.h */
+#define __IRQ_STAT(cpu, member)	(per_cpu(irq_stat.member, cpu))
 #endif
 
-  /* arch independent irq_stat fields */
-#define local_softirq_pending() \
-	__IRQ_STAT(smp_processor_id(), __softirq_pending)
-
-  /* arch dependent irq_stat fields */
+/* arch dependent irq_stat fields */
 #define nmi_count(cpu)		__IRQ_STAT((cpu), __nmi_count)	/* i386 */
 
 #endif	/* __irq_cpustat_h */
diff --git a/include/linux/irq_sim.h b/include/linux/irq_sim.h
index 0380d89..630a57e 100644
--- a/include/linux/irq_sim.h
+++ b/include/linux/irq_sim.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2017-2018 Bartosz Golaszewski <brgl@bgdev.pl>
+ */
+
 #ifndef _LINUX_IRQ_SIM_H
 #define _LINUX_IRQ_SIM_H
-/*
- * Copyright (C) 2017 Bartosz Golaszewski <brgl@bgdev.pl>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
 
 #include <linux/irq_work.h>
 #include <linux/device.h>
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index f5af3b5..cbb872c 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -587,6 +587,7 @@
 int its_cpu_init(void);
 int its_init(struct fwnode_handle *handle, struct rdists *rdists,
 	     struct irq_domain *domain);
+int mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent);
 
 static inline bool gic_enable_sre(void)
 {
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 48c7e86..dccfa65 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -301,7 +301,13 @@
 
 static inline struct irq_domain *irq_find_host(struct device_node *node)
 {
-	return irq_find_matching_host(node, DOMAIN_BUS_ANY);
+	struct irq_domain *d;
+
+	d = irq_find_matching_host(node, DOMAIN_BUS_WIRED);
+	if (!d)
+		d = irq_find_matching_host(node, DOMAIN_BUS_ANY);
+
+	return d;
 }
 
 /**
diff --git a/include/linux/isdn/capilli.h b/include/linux/isdn/capilli.h
index 11b57c4..d75e1ad 100644
--- a/include/linux/isdn/capilli.h
+++ b/include/linux/isdn/capilli.h
@@ -50,7 +50,7 @@
 	u16  (*send_message)(struct capi_ctr *, struct sk_buff *skb);
 	
 	char *(*procinfo)(struct capi_ctr *);
-	const struct file_operations *proc_fops;
+	int (*proc_show)(struct seq_file *, void *);
 
 	/* filled in before calling ready callback */
 	u8 manu[CAPI_MANUFACTURER_LEN];		/* CAPI_GET_MANUFACTURER */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 6a1eb0b..7aed926 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -542,6 +542,7 @@
 	SYSTEM_HALT,
 	SYSTEM_POWER_OFF,
 	SYSTEM_RESTART,
+	SYSTEM_SUSPEND,
 } system_state;
 
 /* This cannot be an enum because some may be used in assembly source. */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 1795fec..9db9043 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -125,9 +125,8 @@
 	LIBATA_MAX_PRD		= ATA_MAX_PRD / 2,
 	LIBATA_DUMB_MAX_PRD	= ATA_MAX_PRD / 4,	/* Worst case */
 	ATA_DEF_QUEUE		= 1,
-	/* tag ATA_MAX_QUEUE - 1 is reserved for internal commands */
 	ATA_MAX_QUEUE		= 32,
-	ATA_TAG_INTERNAL	= ATA_MAX_QUEUE - 1,
+	ATA_TAG_INTERNAL	= ATA_MAX_QUEUE,
 	ATA_SHORT_PAUSE		= 16,
 
 	ATAPI_MAX_DRAIN		= 16 << 10,
@@ -637,7 +636,8 @@
 	u8			cdb[ATAPI_CDB_LEN];
 
 	unsigned long		flags;		/* ATA_QCFLAG_xxx */
-	unsigned int		tag;
+	unsigned int		tag;		/* libata core tag */
+	unsigned int		hw_tag;		/* driver tag */
 	unsigned int		n_elem;
 	unsigned int		orig_n_elem;
 
@@ -849,9 +849,9 @@
 	unsigned int		udma_mask;
 	unsigned int		cbl;	/* cable type; ATA_CBL_xxx */
 
-	struct ata_queued_cmd	qcmd[ATA_MAX_QUEUE];
+	struct ata_queued_cmd	qcmd[ATA_MAX_QUEUE + 1];
 	unsigned long		sas_tag_allocated; /* for sas tag allocation only */
-	unsigned int		qc_active;
+	u64			qc_active;
 	int			nr_active_links; /* #links with active qcs */
 	unsigned int		sas_last_tag;	/* track next tag hw expects */
 
@@ -1133,7 +1133,6 @@
 extern void ata_sas_port_stop(struct ata_port *ap);
 extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *);
 extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
-extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
 extern int sata_scr_valid(struct ata_link *link);
 extern int sata_scr_read(struct ata_link *link, int reg, u32 *val);
 extern int sata_scr_write(struct ata_link *link, int reg, u32 val);
@@ -1184,7 +1183,7 @@
 extern unsigned int ata_do_dev_read_id(struct ata_device *dev,
 					struct ata_taskfile *tf, u16 *id);
 extern void ata_qc_complete(struct ata_queued_cmd *qc);
-extern int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active);
+extern int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active);
 extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd);
 extern int ata_std_bios_param(struct scsi_device *sdev,
 			      struct block_device *bdev,
@@ -1359,7 +1358,6 @@
 	.proc_name		= drv_name,			\
 	.slave_configure	= ata_scsi_slave_config,	\
 	.slave_destroy		= ata_scsi_slave_destroy,	\
-	.eh_timed_out		= ata_scsi_timed_out,		\
 	.bios_param		= ata_std_bios_param,		\
 	.unlock_native_capacity	= ata_scsi_unlock_native_capacity, \
 	.sdev_attrs		= ata_common_sdev_attrs
@@ -1485,16 +1483,16 @@
 			       const char *name);
 #endif
 
-static inline unsigned int ata_tag_valid(unsigned int tag)
-{
-	return (tag < ATA_MAX_QUEUE) ? 1 : 0;
-}
-
-static inline unsigned int ata_tag_internal(unsigned int tag)
+static inline bool ata_tag_internal(unsigned int tag)
 {
 	return tag == ATA_TAG_INTERNAL;
 }
 
+static inline bool ata_tag_valid(unsigned int tag)
+{
+	return tag < ATA_MAX_QUEUE || ata_tag_internal(tag);
+}
+
 /*
  * device helpers
  */
@@ -1655,7 +1653,7 @@
 static inline struct ata_queued_cmd *__ata_qc_from_tag(struct ata_port *ap,
 						       unsigned int tag)
 {
-	if (likely(ata_tag_valid(tag)))
+	if (ata_tag_valid(tag))
 		return &ap->qcmd[tag];
 	return NULL;
 }
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 6e0859b..e9e0d1c 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -489,7 +489,7 @@
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
 typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
 				int flags);
-typedef void (nvm_tgt_exit_fn)(void *);
+typedef void (nvm_tgt_exit_fn)(void *, bool);
 typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *);
 typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *);
 
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 9d0b286..8f1131c 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -757,6 +757,11 @@
  *	@type contains the requested communications type.
  *	@protocol contains the requested protocol.
  *	@kern set to 1 if a kernel socket.
+ * @socket_socketpair:
+ *	Check permissions before creating a fresh pair of sockets.
+ *	@socka contains the first socket structure.
+ *	@sockb contains the second socket structure.
+ *	Return 0 if permission is granted and the connection was established.
  * @socket_bind:
  *	Check permission before socket protocol layer bind operation is
  *	performed and the socket @sock is bound to the address specified in the
@@ -1656,6 +1661,7 @@
 	int (*socket_create)(int family, int type, int protocol, int kern);
 	int (*socket_post_create)(struct socket *sock, int family, int type,
 					int protocol, int kern);
+	int (*socket_socketpair)(struct socket *socka, struct socket *sockb);
 	int (*socket_bind)(struct socket *sock, struct sockaddr *address,
 				int addrlen);
 	int (*socket_connect)(struct socket *sock, struct sockaddr *address,
@@ -1922,6 +1928,7 @@
 	struct hlist_head unix_may_send;
 	struct hlist_head socket_create;
 	struct hlist_head socket_post_create;
+	struct hlist_head socket_socketpair;
 	struct hlist_head socket_bind;
 	struct hlist_head socket_connect;
 	struct hlist_head socket_listen;
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index b51f5c4..0c964ac 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -25,6 +25,18 @@
 	wait_queue_head_t wait;
 } mempool_t;
 
+static inline bool mempool_initialized(mempool_t *pool)
+{
+	return pool->elements != NULL;
+}
+
+void mempool_exit(mempool_t *pool);
+int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+		      mempool_free_t *free_fn, void *pool_data,
+		      gfp_t gfp_mask, int node_id);
+int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+		 mempool_free_t *free_fn, void *pool_data);
+
 extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
 			mempool_free_t *free_fn, void *pool_data);
 extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
@@ -43,6 +55,14 @@
  */
 void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data);
 void mempool_free_slab(void *element, void *pool_data);
+
+static inline int
+mempool_init_slab_pool(mempool_t *pool, int min_nr, struct kmem_cache *kc)
+{
+	return mempool_init(pool, min_nr, mempool_alloc_slab,
+			    mempool_free_slab, (void *) kc);
+}
+
 static inline mempool_t *
 mempool_create_slab_pool(int min_nr, struct kmem_cache *kc)
 {
@@ -56,6 +76,13 @@
  */
 void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data);
 void mempool_kfree(void *element, void *pool_data);
+
+static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t size)
+{
+	return mempool_init(pool, min_nr, mempool_kmalloc,
+			    mempool_kfree, (void *) size);
+}
+
 static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
 {
 	return mempool_create(min_nr, mempool_kmalloc, mempool_kfree,
@@ -68,6 +95,13 @@
  */
 void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data);
 void mempool_free_pages(void *element, void *pool_data);
+
+static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order)
+{
+	return mempool_init(pool, min_nr, mempool_alloc_pages,
+			    mempool_free_pages, (void *)(long)order);
+}
+
 static inline mempool_t *mempool_create_page_pool(int min_nr, int order)
 {
 	return mempool_create(min_nr, mempool_alloc_pages, mempool_free_pages,
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 7b4899c..74ea5e2 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -45,7 +45,7 @@
  * must be treated as an opaque object, rather than a "normal" struct page.
  *
  * A more complete discussion of unaddressable memory may be found in
- * include/linux/hmm.h and Documentation/vm/hmm.txt.
+ * include/linux/hmm.h and Documentation/vm/hmm.rst.
  *
  * MEMORY_DEVICE_PUBLIC:
  * Device memory that is cache coherent from device and CPU point of view. This
@@ -67,7 +67,7 @@
  *   page_free()
  *
  * Additional notes about MEMORY_DEVICE_PRIVATE may be found in
- * include/linux/hmm.h and Documentation/vm/hmm.txt. There is also a brief
+ * include/linux/hmm.h and Documentation/vm/hmm.rst. There is also a brief
  * explanation in include/linux/memory_hotplug.h.
  *
  * The page_fault() callback must migrate page back, from device memory to
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 2d4e23c..f09e9cf 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -197,6 +197,8 @@
 	u32 features[2];
 };
 
+#define to_cros_ec_dev(dev)  container_of(dev, struct cros_ec_dev, class_dev)
+
 /**
  * cros_ec_suspend - Handle a suspend operation for the ChromeOS EC device
  *
diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index 638222e..54a3cd8 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -243,6 +243,8 @@
 #define MC13XXX_ADC0_LICELLCON		(1 << 0)
 #define MC13XXX_ADC0_CHRGICON		(1 << 1)
 #define MC13XXX_ADC0_BATICON		(1 << 2)
+#define MC13XXX_ADC0_ADIN7SEL_DIE	(1 << 4)
+#define MC13XXX_ADC0_ADIN7SEL_UID	(2 << 4)
 #define MC13XXX_ADC0_ADREFEN		(1 << 10)
 #define MC13XXX_ADC0_TSMOD0		(1 << 12)
 #define MC13XXX_ADC0_TSMOD1		(1 << 13)
diff --git a/include/linux/mfd/wm8350/audio.h b/include/linux/mfd/wm8350/audio.h
index bd581c6..0bc41c4 100644
--- a/include/linux/mfd/wm8350/audio.h
+++ b/include/linux/mfd/wm8350/audio.h
@@ -617,11 +617,8 @@
 	u32 codec_current_charge:2;	/* codec current @ vmid charge */
 };
 
-struct snd_soc_codec;
-
 struct wm8350_codec {
 	struct platform_device *pdev;
-	struct snd_soc_codec *codec;
 	struct wm8350_audio_platform_data *platform_data;
 };
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 02a616e..611f19d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -25,6 +25,7 @@
 #include <linux/err.h>
 #include <linux/page_ref.h>
 #include <linux/memremap.h>
+#include <linux/overflow.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -560,10 +561,12 @@
 
 static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
 {
-	if (size != 0 && n > SIZE_MAX / size)
+	size_t bytes;
+
+	if (unlikely(check_mul_overflow(n, size, &bytes)))
 		return NULL;
 
-	return kvmalloc(n * size, flags);
+	return kvmalloc(bytes, flags);
 }
 
 extern void kvfree(const void *addr);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 279b390..de73778 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -156,6 +156,7 @@
 #define UHS_DDR50_MAX_DTR	50000000
 #define UHS_SDR25_MAX_DTR	UHS_DDR50_MAX_DTR
 #define UHS_SDR12_MAX_DTR	25000000
+#define DEFAULT_SPEED_MAX_DTR	UHS_SDR12_MAX_DTR
 	unsigned int		sd3_bus_mode;
 #define UHS_SDR12_BUS_SPEED	0
 #define HIGH_SPEED_BUS_SPEED	1
@@ -252,6 +253,7 @@
 #define MMC_TYPE_SD_COMBO	3		/* SD combo (IO+mem) card */
 	unsigned int		state;		/* (our) card state */
 	unsigned int		quirks; 	/* card quirks */
+	unsigned int		quirk_max_rate;	/* max rate set by quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
 						/* for byte mode */
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 9275193..134a648 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -177,6 +177,7 @@
 		int retries);
 
 int mmc_hw_reset(struct mmc_host *host);
+int mmc_sw_reset(struct mmc_host *host);
 void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card);
 
 #endif /* LINUX_MMC_CORE_H */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 8514623..64300a4 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -22,6 +22,7 @@
 struct mmc_ios {
 	unsigned int	clock;			/* clock rate */
 	unsigned short	vdd;
+	unsigned int	power_delay_ms;		/* waiting for stable power */
 
 /* vdd stores the bit number of the selected voltage range from below. */
 
@@ -320,6 +321,9 @@
 #define MMC_CAP_UHS_SDR50	(1 << 18)	/* Host supports UHS SDR50 mode */
 #define MMC_CAP_UHS_SDR104	(1 << 19)	/* Host supports UHS SDR104 mode */
 #define MMC_CAP_UHS_DDR50	(1 << 20)	/* Host supports UHS DDR50 mode */
+#define MMC_CAP_UHS		(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 | \
+				 MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | \
+				 MMC_CAP_UHS_DDR50)
 /* (1 << 21) is free for reuse */
 #define MMC_CAP_DRIVER_TYPE_A	(1 << 23)	/* Host supports Driver Type A */
 #define MMC_CAP_DRIVER_TYPE_C	(1 << 24)	/* Host supports Driver Type C */
@@ -345,6 +349,7 @@
 #define MMC_CAP2_HS400_1_2V	(1 << 16)	/* Can support HS400 1.2V */
 #define MMC_CAP2_HS400		(MMC_CAP2_HS400_1_8V | \
 				 MMC_CAP2_HS400_1_2V)
+#define MMC_CAP2_HSX00_1_8V	(MMC_CAP2_HS200_1_8V_SDR | MMC_CAP2_HS400_1_8V)
 #define MMC_CAP2_HSX00_1_2V	(MMC_CAP2_HS200_1_2V_SDR | MMC_CAP2_HS400_1_2V)
 #define MMC_CAP2_SDIO_IRQ_NOTHREAD (1 << 17)
 #define MMC_CAP2_NO_WRITE_PROTECT (1 << 18)	/* No physical write protect pin, assume that card is always read-write */
@@ -354,6 +359,7 @@
 #define MMC_CAP2_NO_MMC		(1 << 22)	/* Do not send (e)MMC commands during initialization */
 #define MMC_CAP2_CQE		(1 << 23)	/* Has eMMC command queue engine */
 #define MMC_CAP2_CQE_DCMD	(1 << 24)	/* CQE can issue a direct command */
+#define MMC_CAP2_AVOID_3_3V	(1 << 25)	/* Host must negotiate down from 3.3V */
 
 	int			fixed_drv_type;	/* fixed driver type for non-removable media */
 
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 0a7abe8..4224902 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -56,6 +56,7 @@
 #define SDIO_DEVICE_ID_MARVELL_8688WLAN		0x9104
 #define SDIO_DEVICE_ID_MARVELL_8688BT		0x9105
 #define SDIO_DEVICE_ID_MARVELL_8797_F0		0x9128
+#define SDIO_DEVICE_ID_MARVELL_8887WLAN	0x9134
 
 #define SDIO_VENDOR_ID_SIANO			0x039a
 #define SDIO_DEVICE_ID_SIANO_NOVA_B0		0x0201
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 2d07a1e..392e6af 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -174,7 +174,7 @@
 	 * invalidate_range_start()/end() notifiers, as
 	 * invalidate_range() alread catches the points in time when an
 	 * external TLB range needs to be flushed. For more in depth
-	 * discussion on this see Documentation/vm/mmu_notifier.txt
+	 * discussion on this see Documentation/vm/mmu_notifier.rst
 	 *
 	 * Note that this function might be called with just a sub-range
 	 * of what was passed to invalidate_range_start()/end(), if
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 7d361be..2014bd1 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -471,6 +471,17 @@
 	kernel_ulong_t driver_data;
 };
 
+#define APR_NAME_SIZE	32
+#define APR_MODULE_PREFIX "apr:"
+
+struct apr_device_id {
+	char name[APR_NAME_SIZE];
+	__u32 domain_id;
+	__u32 svc_id;
+	__u32 svc_version;
+	kernel_ulong_t driver_data;	/* Data private to the driver */
+};
+
 #define SPMI_NAME_SIZE	32
 #define SPMI_MODULE_PREFIX "spmi:"
 
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 1f1bbb5..5839d80 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -289,6 +289,8 @@
 	 * MSI_FLAG_ACTIVATE_EARLY has been set.
 	 */
 	MSI_FLAG_MUST_REACTIVATE	= (1 << 5),
+	/* Is level-triggered capable, using two messages */
+	MSI_FLAG_LEVEL_CAPABLE		= (1 << 6),
 };
 
 int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 14bc0d5..3093dd1 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -146,9 +146,6 @@
  */
 static inline bool mutex_is_locked(struct mutex *lock)
 {
-	/*
-	 * XXX think about spin_is_locked
-	 */
 	return __mutex_owner(lock) != NULL;
 }
 
diff --git a/include/linux/net.h b/include/linux/net.h
index 6554d3b..08b6eb9 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -147,6 +147,7 @@
 	int		(*getname)   (struct socket *sock,
 				      struct sockaddr *addr,
 				      int peer);
+	__poll_t	(*poll_mask) (struct socket *sock, __poll_t events);
 	__poll_t	(*poll)	     (struct file *file, struct socket *sock,
 				      struct poll_table_struct *wait);
 	int		(*ioctl)     (struct socket *sock, unsigned int cmd,
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 6d73111..f35c7bf 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -43,9 +43,7 @@
  * in srcu_notifier_call_chain(): no cache bounces and no memory barriers.
  * As compensation, srcu_notifier_chain_unregister() is rather expensive.
  * SRCU notifier chains should be used when the chain will be called very
- * often but notifier_blocks will seldom be removed.  Also, SRCU notifier
- * chains are slightly more difficult to use because they require special
- * runtime initialization.
+ * often but notifier_blocks will seldom be removed.
  */
 
 struct notifier_block;
@@ -91,7 +89,7 @@
 		(name)->head = NULL;		\
 	} while (0)
 
-/* srcu_notifier_heads must be initialized and cleaned up dynamically */
+/* srcu_notifier_heads must be cleaned up dynamically */
 extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
 #define srcu_cleanup_notifier_head(name)	\
 		cleanup_srcu_struct(&(name)->srcu);
@@ -104,7 +102,13 @@
 		.head = NULL }
 #define RAW_NOTIFIER_INIT(name)	{				\
 		.head = NULL }
-/* srcu_notifier_heads cannot be initialized statically */
+
+#define SRCU_NOTIFIER_INIT(name, pcpu)				\
+	{							\
+		.mutex = __MUTEX_INITIALIZER(name.mutex),	\
+		.head = NULL,					\
+		.srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu),	\
+	}
 
 #define ATOMIC_NOTIFIER_HEAD(name)				\
 	struct atomic_notifier_head name =			\
@@ -116,6 +120,26 @@
 	struct raw_notifier_head name =				\
 		RAW_NOTIFIER_INIT(name)
 
+#ifdef CONFIG_TREE_SRCU
+#define _SRCU_NOTIFIER_HEAD(name, mod)				\
+	static DEFINE_PER_CPU(struct srcu_data,			\
+			name##_head_srcu_data);			\
+	mod struct srcu_notifier_head name =			\
+			SRCU_NOTIFIER_INIT(name, name##_head_srcu_data)
+
+#else
+#define _SRCU_NOTIFIER_HEAD(name, mod)				\
+	mod struct srcu_notifier_head name =			\
+			SRCU_NOTIFIER_INIT(name, name)
+
+#endif
+
+#define SRCU_NOTIFIER_HEAD(name)				\
+	_SRCU_NOTIFIER_HEAD(name, /* not static */)
+
+#define SRCU_NOTIFIER_HEAD_STATIC(name)				\
+	_SRCU_NOTIFIER_HEAD(name, static)
+
 #ifdef __KERNEL__
 
 extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
diff --git a/include/linux/nubus.h b/include/linux/nubus.h
index 6e82002..eba50b0 100644
--- a/include/linux/nubus.h
+++ b/include/linux/nubus.h
@@ -163,7 +163,7 @@
 unsigned char *nubus_dirptr(const struct nubus_dirent *nd);
 
 /* Declarations relating to driver model objects */
-int nubus_bus_register(void);
+int nubus_parent_device_register(void);
 int nubus_device_register(struct nubus_board *board);
 int nubus_driver_register(struct nubus_driver *ndrv);
 void nubus_driver_unregister(struct nubus_driver *ndrv);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 4112e2b..2950ce9 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -436,10 +436,19 @@
 enum {
 	NVME_AER_ERROR			= 0,
 	NVME_AER_SMART			= 1,
+	NVME_AER_NOTICE			= 2,
 	NVME_AER_CSS			= 6,
 	NVME_AER_VS			= 7,
-	NVME_AER_NOTICE_NS_CHANGED	= 0x0002,
-	NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102,
+};
+
+enum {
+	NVME_AER_NOTICE_NS_CHANGED	= 0x00,
+	NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
+};
+
+enum {
+	NVME_AEN_CFG_NS_ATTR		= 1 << 8,
+	NVME_AEN_CFG_FW_ACT		= 1 << 9,
 };
 
 struct nvme_lba_range_type {
@@ -747,6 +756,7 @@
 	NVME_LOG_ERROR		= 0x01,
 	NVME_LOG_SMART		= 0x02,
 	NVME_LOG_FW_SLOT	= 0x03,
+	NVME_LOG_CHANGED_NS	= 0x04,
 	NVME_LOG_CMD_EFFECTS	= 0x05,
 	NVME_LOG_DISC		= 0x70,
 	NVME_LOG_RESERVATION	= 0x80,
@@ -755,6 +765,8 @@
 	NVME_FWACT_ACTV		= (2 << 3),
 };
 
+#define NVME_MAX_CHANGED_NAMESPACES	1024
+
 struct nvme_identify {
 	__u8			opcode;
 	__u8			flags;
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index f89598b..24def6a 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -77,6 +77,9 @@
 
 int devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem);
 
+int nvmem_add_cells(struct nvmem_device *nvmem,
+		    const struct nvmem_cell_info *info,
+		    int ncells);
 #else
 
 static inline struct nvmem_device *nvmem_register(const struct nvmem_config *c)
@@ -99,6 +102,14 @@
 devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem)
 {
 	return nvmem_unregister(nvmem);
+
+}
+
+static inline int nvmem_add_cells(struct nvmem_device *nvmem,
+				  const struct nvmem_cell_info *info,
+				  int ncells)
+{
+	return -ENOSYS;
 }
 
 #endif /* CONFIG_NVMEM */
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 8da5a1b..165fd30 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -55,7 +55,9 @@
 	return of_node_get(cpu_dev->of_node);
 }
 
-int of_dma_configure(struct device *dev, struct device_node *np);
+int of_dma_configure(struct device *dev,
+		     struct device_node *np,
+		     bool force_dma);
 void of_dma_deconfigure(struct device *dev);
 #else /* CONFIG_OF */
 
@@ -105,7 +107,9 @@
 	return NULL;
 }
 
-static inline int of_dma_configure(struct device *dev, struct device_node *np)
+static inline int of_dma_configure(struct device *dev,
+				   struct device_node *np,
+				   bool force_dma)
 {
 	return 0;
 }
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
new file mode 100644
index 0000000..8712ff7
--- /dev/null
+++ b/include/linux/overflow.h
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+#ifndef __LINUX_OVERFLOW_H
+#define __LINUX_OVERFLOW_H
+
+#include <linux/compiler.h>
+
+/*
+ * In the fallback code below, we need to compute the minimum and
+ * maximum values representable in a given type. These macros may also
+ * be useful elsewhere, so we provide them outside the
+ * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
+ *
+ * It would seem more obvious to do something like
+ *
+ * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
+ * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
+ *
+ * Unfortunately, the middle expressions, strictly speaking, have
+ * undefined behaviour, and at least some versions of gcc warn about
+ * the type_max expression (but not if -fsanitize=undefined is in
+ * effect; in that case, the warning is deferred to runtime...).
+ *
+ * The slightly excessive casting in type_min is to make sure the
+ * macros also produce sensible values for the exotic type _Bool. [The
+ * overflow checkers only almost work for _Bool, but that's
+ * a-feature-not-a-bug, since people shouldn't be doing arithmetic on
+ * _Bools. Besides, the gcc builtins don't allow _Bool* as third
+ * argument.]
+ *
+ * Idea stolen from
+ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html -
+ * credit to Christian Biere.
+ */
+#define is_signed_type(type)       (((type)(-1)) < (type)1)
+#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
+#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+
+
+#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+/*
+ * For simplicity and code hygiene, the fallback code below insists on
+ * a, b and *d having the same type (similar to the min() and max()
+ * macros), whereas gcc's type-generic overflow checkers accept
+ * different types. Hence we don't just make check_add_overflow an
+ * alias for __builtin_add_overflow, but add type checks similar to
+ * below.
+ */
+#define check_add_overflow(a, b, d) ({		\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	__builtin_add_overflow(__a, __b, __d);	\
+})
+
+#define check_sub_overflow(a, b, d) ({		\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	__builtin_sub_overflow(__a, __b, __d);	\
+})
+
+#define check_mul_overflow(a, b, d) ({		\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	__builtin_mul_overflow(__a, __b, __d);	\
+})
+
+#else
+
+
+/* Checking for unsigned overflow is relatively easy without causing UB. */
+#define __unsigned_add_overflow(a, b, d) ({	\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	*__d = __a + __b;			\
+	*__d < __a;				\
+})
+#define __unsigned_sub_overflow(a, b, d) ({	\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	*__d = __a - __b;			\
+	__a < __b;				\
+})
+/*
+ * If one of a or b is a compile-time constant, this avoids a division.
+ */
+#define __unsigned_mul_overflow(a, b, d) ({		\
+	typeof(a) __a = (a);				\
+	typeof(b) __b = (b);				\
+	typeof(d) __d = (d);				\
+	(void) (&__a == &__b);				\
+	(void) (&__a == __d);				\
+	*__d = __a * __b;				\
+	__builtin_constant_p(__b) ?			\
+	  __b > 0 && __a > type_max(typeof(__a)) / __b : \
+	  __a > 0 && __b > type_max(typeof(__b)) / __a;	 \
+})
+
+/*
+ * For signed types, detecting overflow is much harder, especially if
+ * we want to avoid UB. But the interface of these macros is such that
+ * we must provide a result in *d, and in fact we must produce the
+ * result promised by gcc's builtins, which is simply the possibly
+ * wrapped-around value. Fortunately, we can just formally do the
+ * operations in the widest relevant unsigned type (u64) and then
+ * truncate the result - gcc is smart enough to generate the same code
+ * with and without the (u64) casts.
+ */
+
+/*
+ * Adding two signed integers can overflow only if they have the same
+ * sign, and overflow has happened iff the result has the opposite
+ * sign.
+ */
+#define __signed_add_overflow(a, b, d) ({	\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	*__d = (u64)__a + (u64)__b;		\
+	(((~(__a ^ __b)) & (*__d ^ __a))	\
+		& type_min(typeof(__a))) != 0;	\
+})
+
+/*
+ * Subtraction is similar, except that overflow can now happen only
+ * when the signs are opposite. In this case, overflow has happened if
+ * the result has the opposite sign of a.
+ */
+#define __signed_sub_overflow(a, b, d) ({	\
+	typeof(a) __a = (a);			\
+	typeof(b) __b = (b);			\
+	typeof(d) __d = (d);			\
+	(void) (&__a == &__b);			\
+	(void) (&__a == __d);			\
+	*__d = (u64)__a - (u64)__b;		\
+	((((__a ^ __b)) & (*__d ^ __a))		\
+		& type_min(typeof(__a))) != 0;	\
+})
+
+/*
+ * Signed multiplication is rather hard. gcc always follows C99, so
+ * division is truncated towards 0. This means that we can write the
+ * overflow check like this:
+ *
+ * (a > 0 && (b > MAX/a || b < MIN/a)) ||
+ * (a < -1 && (b > MIN/a || b < MAX/a) ||
+ * (a == -1 && b == MIN)
+ *
+ * The redundant casts of -1 are to silence an annoying -Wtype-limits
+ * (included in -Wextra) warning: When the type is u8 or u16, the
+ * __b_c_e in check_mul_overflow obviously selects
+ * __unsigned_mul_overflow, but unfortunately gcc still parses this
+ * code and warns about the limited range of __b.
+ */
+
+#define __signed_mul_overflow(a, b, d) ({				\
+	typeof(a) __a = (a);						\
+	typeof(b) __b = (b);						\
+	typeof(d) __d = (d);						\
+	typeof(a) __tmax = type_max(typeof(a));				\
+	typeof(a) __tmin = type_min(typeof(a));				\
+	(void) (&__a == &__b);						\
+	(void) (&__a == __d);						\
+	*__d = (u64)__a * (u64)__b;					\
+	(__b > 0   && (__a > __tmax/__b || __a < __tmin/__b)) ||	\
+	(__b < (typeof(__b))-1  && (__a > __tmin/__b || __a < __tmax/__b)) || \
+	(__b == (typeof(__b))-1 && __a == __tmin);			\
+})
+
+
+#define check_add_overflow(a, b, d)					\
+	__builtin_choose_expr(is_signed_type(typeof(a)),		\
+			__signed_add_overflow(a, b, d),			\
+			__unsigned_add_overflow(a, b, d))
+
+#define check_sub_overflow(a, b, d)					\
+	__builtin_choose_expr(is_signed_type(typeof(a)),		\
+			__signed_sub_overflow(a, b, d),			\
+			__unsigned_sub_overflow(a, b, d))
+
+#define check_mul_overflow(a, b, d)					\
+	__builtin_choose_expr(is_signed_type(typeof(a)),		\
+			__signed_mul_overflow(a, b, d),			\
+			__unsigned_mul_overflow(a, b, d))
+
+
+#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
+
+/**
+ * array_size() - Calculate size of 2-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ *
+ * Calculates size of 2-dimensional array: @a * @b.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array_size(size_t a, size_t b)
+{
+	size_t bytes;
+
+	if (check_mul_overflow(a, b, &bytes))
+		return SIZE_MAX;
+
+	return bytes;
+}
+
+/**
+ * array3_size() - Calculate size of 3-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ * @c: dimension three
+ *
+ * Calculates size of 3-dimensional array: @a * @b * @c.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
+{
+	size_t bytes;
+
+	if (check_mul_overflow(a, b, &bytes))
+		return SIZE_MAX;
+	if (check_mul_overflow(bytes, c, &bytes))
+		return SIZE_MAX;
+
+	return bytes;
+}
+
+static inline __must_check size_t __ab_c_size(size_t n, size_t size, size_t c)
+{
+	size_t bytes;
+
+	if (check_mul_overflow(n, size, &bytes))
+		return SIZE_MAX;
+	if (check_add_overflow(bytes, c, &bytes))
+		return SIZE_MAX;
+
+	return bytes;
+}
+
+/**
+ * struct_size() - Calculate size of structure with trailing array.
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @n: Number of elements in the array.
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @n @member elements.
+ *
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+#define struct_size(p, member, n)					\
+	__ab_c_size(n,							\
+		    sizeof(*(p)->member) + __must_be_array((p)->member),\
+		    sizeof(*(p)))
+
+#endif /* __LINUX_OVERFLOW_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 73178a2..55371cb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -670,7 +670,7 @@
 int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
 		  int reg, int len, u32 val);
 
-#ifdef CONFIG_PCI_BUS_ADDR_T_64BIT
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 typedef u64 pci_bus_addr_t;
 #else
 typedef u32 pci_bus_addr_t;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index eec302b..1fa1288 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -467,7 +467,7 @@
  */
 struct perf_addr_filter {
 	struct list_head	entry;
-	struct inode		*inode;
+	struct path		path;
 	unsigned long		offset;
 	unsigned long		size;
 	enum perf_addr_filter_action_t	action;
@@ -1017,6 +1017,14 @@
 	return event->event_caps & PERF_EV_CAP_SOFTWARE;
 }
 
+/*
+ * Return 1 for event in sw context, 0 for event in hw context
+ */
+static inline int in_software_context(struct perf_event *event)
+{
+	return event->ctx->pmu->task_ctx_nr == perf_sw_context;
+}
+
 extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
 
 extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
index 93d142a..1746015 100644
--- a/include/linux/pktcdvd.h
+++ b/include/linux/pktcdvd.h
@@ -186,7 +186,7 @@
 	sector_t		current_sector;	/* Keep track of where the elevator is */
 	atomic_t		scan_queue;	/* Set to non-zero when pkt_handle_queue */
 						/* needs to be run. */
-	mempool_t		*rb_pool;	/* mempool for pkt_rb_node allocations */
+	mempool_t		rb_pool;	/* mempool for pkt_rb_node allocations */
 
 	struct packet_iosched   iosched;
 	struct gendisk		*disk;
diff --git a/include/linux/platform_data/clk-st.h b/include/linux/platform_data/clk-st.h
new file mode 100644
index 0000000..7cdb6a4
--- /dev/null
+++ b/include/linux/platform_data/clk-st.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * clock framework for AMD Stoney based clock
+ *
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ */
+
+#ifndef __CLK_ST_H
+#define __CLK_ST_H
+
+#include <linux/compiler.h>
+
+struct st_clk_data {
+	void __iomem *base;
+};
+
+#endif /* __CLK_ST_H */
diff --git a/include/linux/platform_data/tda9950.h b/include/linux/platform_data/tda9950.h
new file mode 100644
index 0000000..c65efd4
--- /dev/null
+++ b/include/linux/platform_data/tda9950.h
@@ -0,0 +1,16 @@
+#ifndef LINUX_PLATFORM_DATA_TDA9950_H
+#define LINUX_PLATFORM_DATA_TDA9950_H
+
+struct device;
+
+struct tda9950_glue {
+	struct device *parent;
+	unsigned long irq_flags;
+	void *data;
+	int (*init)(void *);
+	void (*exit)(void *);
+	int (*open)(void *);
+	void (*release)(void *);
+};
+
+#endif
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 49f634d..3097c94 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -356,6 +356,8 @@
 #define platform_pm_restore		NULL
 #endif
 
+extern int platform_dma_configure(struct device *dev);
+
 #ifdef CONFIG_PM_SLEEP
 #define USE_PLATFORM_PM_SLEEP_OPS \
 	.suspend = platform_pm_suspend, \
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 04dbef9..42e0d64 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -47,8 +47,10 @@
 };
 
 struct genpd_lock_ops;
+struct dev_pm_opp;
 
 struct generic_pm_domain {
+	struct device dev;
 	struct dev_pm_domain domain;	/* PM domain operations */
 	struct list_head gpd_list_node;	/* Node in the global PM domains list */
 	struct list_head master_links;	/* Links with PM domain as a master */
@@ -67,6 +69,8 @@
 	unsigned int performance_state;	/* Aggregated max performance state */
 	int (*power_off)(struct generic_pm_domain *domain);
 	int (*power_on)(struct generic_pm_domain *domain);
+	unsigned int (*opp_to_performance_state)(struct generic_pm_domain *genpd,
+						 struct dev_pm_opp *opp);
 	int (*set_performance_state)(struct generic_pm_domain *genpd,
 				     unsigned int state);
 	struct gpd_dev_ops dev_ops;
@@ -139,21 +143,16 @@
 	return to_gpd_data(dev->power.subsys_data->domain_data);
 }
 
-extern int __pm_genpd_add_device(struct generic_pm_domain *genpd,
-				 struct device *dev,
-				 struct gpd_timing_data *td);
-
-extern int pm_genpd_remove_device(struct generic_pm_domain *genpd,
-				  struct device *dev);
-extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
-				  struct generic_pm_domain *new_subdomain);
-extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
-				     struct generic_pm_domain *target);
-extern int pm_genpd_init(struct generic_pm_domain *genpd,
-			 struct dev_power_governor *gov, bool is_off);
-extern int pm_genpd_remove(struct generic_pm_domain *genpd);
-extern int dev_pm_genpd_set_performance_state(struct device *dev,
-					      unsigned int state);
+int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev);
+int pm_genpd_remove_device(struct device *dev);
+int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
+			   struct generic_pm_domain *new_subdomain);
+int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
+			      struct generic_pm_domain *target);
+int pm_genpd_init(struct generic_pm_domain *genpd,
+		  struct dev_power_governor *gov, bool is_off);
+int pm_genpd_remove(struct generic_pm_domain *genpd);
+int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state);
 
 extern struct dev_power_governor simple_qos_governor;
 extern struct dev_power_governor pm_domain_always_on_gov;
@@ -163,14 +162,12 @@
 {
 	return ERR_PTR(-ENOSYS);
 }
-static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd,
-					struct device *dev,
-					struct gpd_timing_data *td)
+static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
+				      struct device *dev)
 {
 	return -ENOSYS;
 }
-static inline int pm_genpd_remove_device(struct generic_pm_domain *genpd,
-					 struct device *dev)
+static inline int pm_genpd_remove_device(struct device *dev)
 {
 	return -ENOSYS;
 }
@@ -204,15 +201,9 @@
 #define pm_domain_always_on_gov		(*(struct dev_power_governor *)(NULL))
 #endif
 
-static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
-				      struct device *dev)
-{
-	return __pm_genpd_add_device(genpd, dev, NULL);
-}
-
 #ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP
-extern void pm_genpd_syscore_poweroff(struct device *dev);
-extern void pm_genpd_syscore_poweron(struct device *dev);
+void pm_genpd_syscore_poweroff(struct device *dev);
+void pm_genpd_syscore_poweron(struct device *dev);
 #else
 static inline void pm_genpd_syscore_poweroff(struct device *dev) {}
 static inline void pm_genpd_syscore_poweron(struct device *dev) {}
@@ -236,13 +227,14 @@
 int of_genpd_add_provider_onecell(struct device_node *np,
 				  struct genpd_onecell_data *data);
 void of_genpd_del_provider(struct device_node *np);
-extern int of_genpd_add_device(struct of_phandle_args *args,
-			       struct device *dev);
-extern int of_genpd_add_subdomain(struct of_phandle_args *parent,
-				  struct of_phandle_args *new_subdomain);
-extern struct generic_pm_domain *of_genpd_remove_last(struct device_node *np);
-extern int of_genpd_parse_idle_states(struct device_node *dn,
-			struct genpd_power_state **states, int *n);
+int of_genpd_add_device(struct of_phandle_args *args, struct device *dev);
+int of_genpd_add_subdomain(struct of_phandle_args *parent,
+			   struct of_phandle_args *new_subdomain);
+struct generic_pm_domain *of_genpd_remove_last(struct device_node *np);
+int of_genpd_parse_idle_states(struct device_node *dn,
+			       struct genpd_power_state **states, int *n);
+unsigned int of_genpd_opp_to_performance_state(struct device *dev,
+				struct device_node *opp_node);
 
 int genpd_dev_pm_attach(struct device *dev);
 #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */
@@ -278,11 +270,18 @@
 	return -ENODEV;
 }
 
-static inline int genpd_dev_pm_attach(struct device *dev)
+static inline unsigned int
+of_genpd_opp_to_performance_state(struct device *dev,
+				  struct device_node *opp_node)
 {
 	return -ENODEV;
 }
 
+static inline int genpd_dev_pm_attach(struct device *dev)
+{
+	return 0;
+}
+
 static inline
 struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
 {
@@ -291,13 +290,13 @@
 #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
 
 #ifdef CONFIG_PM
-extern int dev_pm_domain_attach(struct device *dev, bool power_on);
-extern void dev_pm_domain_detach(struct device *dev, bool power_off);
-extern void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd);
+int dev_pm_domain_attach(struct device *dev, bool power_on);
+void dev_pm_domain_detach(struct device *dev, bool power_off);
+void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd);
 #else
 static inline int dev_pm_domain_attach(struct device *dev, bool power_on)
 {
-	return -ENODEV;
+	return 0;
 }
 static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
 static inline void dev_pm_domain_set(struct device *dev,
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 6c2d2e8..099b319 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -125,8 +125,6 @@
 void dev_pm_opp_put_clkname(struct opp_table *opp_table);
 struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data));
 void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table);
-struct opp_table *dev_pm_opp_register_get_pstate_helper(struct device *dev, int (*get_pstate)(struct device *dev, unsigned long rate));
-void dev_pm_opp_unregister_get_pstate_helper(struct opp_table *opp_table);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
 int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask);
 int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
@@ -247,14 +245,6 @@
 
 static inline void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) {}
 
-static inline struct opp_table *dev_pm_opp_register_get_pstate_helper(struct device *dev,
-		int (*get_pstate)(struct device *dev, unsigned long rate))
-{
-	return ERR_PTR(-ENOTSUPP);
-}
-
-static inline void dev_pm_opp_unregister_get_pstate_helper(struct opp_table *opp_table) {}
-
 static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name)
 {
 	return ERR_PTR(-ENOTSUPP);
@@ -303,17 +293,25 @@
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
 int dev_pm_opp_of_add_table(struct device *dev);
+int dev_pm_opp_of_add_table_indexed(struct device *dev, int index);
 void dev_pm_opp_of_remove_table(struct device *dev);
 int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask);
 void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask);
 int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask);
 struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev);
+struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev, struct device_node *np);
+struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp);
 #else
 static inline int dev_pm_opp_of_add_table(struct device *dev)
 {
 	return -ENOTSUPP;
 }
 
+static inline int dev_pm_opp_of_add_table_indexed(struct device *dev, int index)
+{
+	return -ENOTSUPP;
+}
+
 static inline void dev_pm_opp_of_remove_table(struct device *dev)
 {
 }
@@ -336,6 +334,15 @@
 {
 	return NULL;
 }
+
+static inline struct dev_pm_opp *of_dev_pm_opp_find_required_opp(struct device *dev, struct device_node *np)
+{
+	return NULL;
+}
+static inline struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp)
+{
+	return NULL;
+}
 #endif
 
 #endif		/* __LINUX_OPP_H__ */
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index f0fc470..db5dbbf 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -56,8 +56,7 @@
 						 s64 delta_ns);
 extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
 extern void pm_runtime_clean_up_links(struct device *dev);
-extern void pm_runtime_get_suppliers(struct device *dev);
-extern void pm_runtime_put_suppliers(struct device *dev);
+extern void pm_runtime_resume_suppliers(struct device *dev);
 extern void pm_runtime_new_link(struct device *dev);
 extern void pm_runtime_drop_link(struct device *dev);
 
@@ -173,8 +172,7 @@
 static inline void pm_runtime_set_memalloc_noio(struct device *dev,
 						bool enable){}
 static inline void pm_runtime_clean_up_links(struct device *dev) {}
-static inline void pm_runtime_get_suppliers(struct device *dev) {}
-static inline void pm_runtime_put_suppliers(struct device *dev) {}
+static inline void pm_runtime_resume_suppliers(struct device *dev) {}
 static inline void pm_runtime_new_link(struct device *dev) {}
 static inline void pm_runtime_drop_link(struct device *dev) {}
 
diff --git a/include/linux/poll.h b/include/linux/poll.h
index f45ebd0..fdf86b4 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -74,6 +74,18 @@
 	pt->_key   = ~(__poll_t)0; /* all events enabled */
 }
 
+static inline bool file_has_poll_mask(struct file *file)
+{
+	return file->f_op->get_poll_head && file->f_op->poll_mask;
+}
+
+static inline bool file_can_poll(struct file *file)
+{
+	return file->f_op->poll || file_has_poll_mask(file);
+}
+
+__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt);
+
 struct poll_table_entry {
 	struct file *filp;
 	__poll_t key;
@@ -96,8 +108,6 @@
 
 extern void poll_initwait(struct poll_wqueues *pwq);
 extern void poll_freewait(struct poll_wqueues *pwq);
-extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
-				 ktime_t *expires, unsigned long slack);
 extern u64 select_estimate_accuracy(struct timespec64 *tv);
 
 #define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index f0139b4..b21c4bd9 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -145,6 +145,7 @@
 	POWER_SUPPLY_PROP_TIME_TO_FULL_NOW,
 	POWER_SUPPLY_PROP_TIME_TO_FULL_AVG,
 	POWER_SUPPLY_PROP_TYPE, /* use power_supply.type instead */
+	POWER_SUPPLY_PROP_USB_TYPE,
 	POWER_SUPPLY_PROP_SCOPE,
 	POWER_SUPPLY_PROP_PRECHARGE_CURRENT,
 	POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT,
@@ -170,6 +171,19 @@
 	POWER_SUPPLY_TYPE_APPLE_BRICK_ID,	/* Apple Charging Method */
 };
 
+enum power_supply_usb_type {
+	POWER_SUPPLY_USB_TYPE_UNKNOWN = 0,
+	POWER_SUPPLY_USB_TYPE_SDP,		/* Standard Downstream Port */
+	POWER_SUPPLY_USB_TYPE_DCP,		/* Dedicated Charging Port */
+	POWER_SUPPLY_USB_TYPE_CDP,		/* Charging Downstream Port */
+	POWER_SUPPLY_USB_TYPE_ACA,		/* Accessory Charger Adapters */
+	POWER_SUPPLY_USB_TYPE_C,		/* Type C Port */
+	POWER_SUPPLY_USB_TYPE_PD,		/* Power Delivery Port */
+	POWER_SUPPLY_USB_TYPE_PD_DRP,		/* PD Dual Role Port */
+	POWER_SUPPLY_USB_TYPE_PD_PPS,		/* PD Programmable Power Supply */
+	POWER_SUPPLY_USB_TYPE_APPLE_BRICK_ID,	/* Apple Charging Method */
+};
+
 enum power_supply_notifier_events {
 	PSY_EVENT_PROP_CHANGED,
 };
@@ -185,6 +199,8 @@
 /* Run-time specific power supply configuration */
 struct power_supply_config {
 	struct device_node *of_node;
+	struct fwnode_handle *fwnode;
+
 	/* Driver private data */
 	void *drv_data;
 
@@ -196,6 +212,8 @@
 struct power_supply_desc {
 	const char *name;
 	enum power_supply_type type;
+	enum power_supply_usb_type *usb_types;
+	size_t num_usb_types;
 	enum power_supply_property *properties;
 	size_t num_properties;
 
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 928ef9e..e518352 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -9,6 +9,8 @@
 #include <linux/fs.h>
 
 struct proc_dir_entry;
+struct seq_file;
+struct seq_operations;
 
 #ifdef CONFIG_PROC_FS
 
@@ -23,6 +25,19 @@
 extern struct proc_dir_entry *proc_mkdir_mode(const char *, umode_t,
 					      struct proc_dir_entry *);
 struct proc_dir_entry *proc_create_mount_point(const char *name);
+
+struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
+		struct proc_dir_entry *parent, const struct seq_operations *ops,
+		unsigned int state_size, void *data);
+#define proc_create_seq_data(name, mode, parent, ops, data) \
+	proc_create_seq_private(name, mode, parent, ops, 0, data)
+#define proc_create_seq(name, mode, parent, ops) \
+	proc_create_seq_private(name, mode, parent, ops, 0, NULL)
+struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
+		struct proc_dir_entry *parent,
+		int (*show)(struct seq_file *, void *), void *data);
+#define proc_create_single(name, mode, parent, show) \
+	proc_create_single_data(name, mode, parent, show, NULL)
  
 extern struct proc_dir_entry *proc_create_data(const char *, umode_t,
 					       struct proc_dir_entry *,
@@ -38,6 +53,15 @@
 extern void remove_proc_entry(const char *, struct proc_dir_entry *);
 extern int remove_proc_subtree(const char *, struct proc_dir_entry *);
 
+struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
+		struct proc_dir_entry *parent, const struct seq_operations *ops,
+		unsigned int state_size, void *data);
+#define proc_create_net(name, mode, parent, state_size, ops) \
+	proc_create_net_data(name, mode, parent, state_size, ops, NULL)
+struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
+		struct proc_dir_entry *parent,
+		int (*show)(struct seq_file *, void *), void *data);
+
 #else /* CONFIG_PROC_FS */
 
 static inline void proc_root_init(void)
@@ -57,6 +81,11 @@
 	umode_t mode, struct proc_dir_entry *parent, void *data) { return NULL; }
 static inline struct proc_dir_entry *proc_mkdir_mode(const char *name,
 	umode_t mode, struct proc_dir_entry *parent) { return NULL; }
+#define proc_create_seq_private(name, mode, parent, ops, size, data) ({NULL;})
+#define proc_create_seq_data(name, mode, parent, ops, data) ({NULL;})
+#define proc_create_seq(name, mode, parent, ops) ({NULL;})
+#define proc_create_single(name, mode, parent, show) ({NULL;})
+#define proc_create_single_data(name, mode, parent, show, data) ({NULL;})
 #define proc_create(name, mode, parent, proc_fops) ({NULL;})
 #define proc_create_data(name, mode, parent, proc_fops, data) ({NULL;})
 
@@ -69,6 +98,10 @@
 #define remove_proc_entry(name, parent) do {} while (0)
 static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) { return 0; }
 
+#define proc_create_net_data(name, mode, parent, ops, state_size, data) ({NULL;})
+#define proc_create_net(name, mode, parent, state_size, ops) ({NULL;})
+#define proc_create_net_single(name, mode, parent, show, data) ({NULL;})
+
 #endif /* CONFIG_PROC_FS */
 
 struct net;
@@ -83,4 +116,10 @@
 int open_related_ns(struct ns_common *ns,
 		   struct ns_common *(*get_ns)(struct ns_common *ns));
 
+/* get the associated pid namespace for a file in procfs */
+static inline struct pid_namespace *proc_pid_ns(struct inode *inode)
+{
+	return inode->i_sb->s_fs_info;
+}
+
 #endif /* _LINUX_PROC_FS_H */
diff --git a/include/linux/property.h b/include/linux/property.h
index 2eea4b3..ac8a1ebc 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -178,7 +178,7 @@
  * @name: Name of the property.
  * @length: Length of data making up the value.
  * @is_array: True when the property is an array.
- * @is_string: True when property is a string.
+ * @type: Type of the data in unions.
  * @pointer: Pointer to the property (an array of items of the given type).
  * @value: Value of the property (when it is a single item of the given type).
  */
@@ -186,10 +186,9 @@
 	const char *name;
 	size_t length;
 	bool is_array;
-	bool is_string;
+	enum dev_prop_type type;
 	union {
 		union {
-			const void *raw_data;
 			const u8 *u8_data;
 			const u16 *u16_data;
 			const u32 *u32_data;
@@ -197,7 +196,6 @@
 			const char * const *str;
 		} pointer;
 		union {
-			unsigned long long raw_data;
 			u8 u8_data;
 			u16 u16_data;
 			u32 u32_data;
@@ -213,55 +211,55 @@
  * and structs.
  */
 
-#define PROPERTY_ENTRY_INTEGER_ARRAY(_name_, _type_, _val_)	\
-(struct property_entry) {					\
-	.name = _name_,						\
-	.length = ARRAY_SIZE(_val_) * sizeof(_type_),		\
-	.is_array = true,					\
-	.is_string = false,					\
-	{ .pointer = { ._type_##_data = _val_ } },		\
+#define PROPERTY_ENTRY_INTEGER_ARRAY(_name_, _type_, _Type_, _val_)	\
+(struct property_entry) {						\
+	.name = _name_,							\
+	.length = ARRAY_SIZE(_val_) * sizeof(_type_),			\
+	.is_array = true,						\
+	.type = DEV_PROP_##_Type_,					\
+	{ .pointer = { ._type_##_data = _val_ } },			\
 }
 
 #define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_)			\
-	PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u8, _val_)
+	PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u8, U8, _val_)
 #define PROPERTY_ENTRY_U16_ARRAY(_name_, _val_)			\
-	PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u16, _val_)
+	PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u16, U16, _val_)
 #define PROPERTY_ENTRY_U32_ARRAY(_name_, _val_)			\
-	PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u32, _val_)
+	PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u32, U32, _val_)
 #define PROPERTY_ENTRY_U64_ARRAY(_name_, _val_)			\
-	PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u64, _val_)
+	PROPERTY_ENTRY_INTEGER_ARRAY(_name_, u64, U64, _val_)
 
 #define PROPERTY_ENTRY_STRING_ARRAY(_name_, _val_)		\
 (struct property_entry) {					\
 	.name = _name_,						\
 	.length = ARRAY_SIZE(_val_) * sizeof(const char *),	\
 	.is_array = true,					\
-	.is_string = true,					\
+	.type = DEV_PROP_STRING,				\
 	{ .pointer = { .str = _val_ } },			\
 }
 
-#define PROPERTY_ENTRY_INTEGER(_name_, _type_, _val_)	\
-(struct property_entry) {				\
-	.name = _name_,					\
-	.length = sizeof(_type_),			\
-	.is_string = false,				\
-	{ .value = { ._type_##_data = _val_ } },	\
+#define PROPERTY_ENTRY_INTEGER(_name_, _type_, _Type_, _val_)	\
+(struct property_entry) {					\
+	.name = _name_,						\
+	.length = sizeof(_type_),				\
+	.type = DEV_PROP_##_Type_,				\
+	{ .value = { ._type_##_data = _val_ } },		\
 }
 
 #define PROPERTY_ENTRY_U8(_name_, _val_)		\
-	PROPERTY_ENTRY_INTEGER(_name_, u8, _val_)
+	PROPERTY_ENTRY_INTEGER(_name_, u8, U8, _val_)
 #define PROPERTY_ENTRY_U16(_name_, _val_)		\
-	PROPERTY_ENTRY_INTEGER(_name_, u16, _val_)
+	PROPERTY_ENTRY_INTEGER(_name_, u16, U16, _val_)
 #define PROPERTY_ENTRY_U32(_name_, _val_)		\
-	PROPERTY_ENTRY_INTEGER(_name_, u32, _val_)
+	PROPERTY_ENTRY_INTEGER(_name_, u32, U32, _val_)
 #define PROPERTY_ENTRY_U64(_name_, _val_)		\
-	PROPERTY_ENTRY_INTEGER(_name_, u64, _val_)
+	PROPERTY_ENTRY_INTEGER(_name_, u64, U64, _val_)
 
 #define PROPERTY_ENTRY_STRING(_name_, _val_)		\
 (struct property_entry) {				\
 	.name = _name_,					\
 	.length = sizeof(_val_),			\
-	.is_string = true,				\
+	.type = DEV_PROP_STRING,			\
 	{ .value = { .str = _val_ } },			\
 }
 
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index 93addfa..827c601 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -54,6 +54,8 @@
 	SEV_CMD_PDH_CERT_EXPORT		= 0x008,
 	SEV_CMD_PDH_GEN			= 0x009,
 	SEV_CMD_DF_FLUSH		= 0x00A,
+	SEV_CMD_DOWNLOAD_FIRMWARE	= 0x00B,
+	SEV_CMD_GET_ID			= 0x00C,
 
 	/* Guest commands */
 	SEV_CMD_DECOMMISSION		= 0x020,
@@ -130,6 +132,27 @@
 } __packed;
 
 /**
+ * struct sev_data_download_firmware - DOWNLOAD_FIRMWARE command parameters
+ *
+ * @address: physical address of firmware image
+ * @len: len of the firmware image
+ */
+struct sev_data_download_firmware {
+	u64 address;				/* In */
+	u32 len;				/* In */
+} __packed;
+
+/**
+ * struct sev_data_get_id - GET_ID command parameters
+ *
+ * @address: physical address of region to place unique CPU ID(s)
+ * @len: len of the region
+ */
+struct sev_data_get_id {
+	u64 address;				/* In */
+	u32 len;				/* In/Out */
+} __packed;
+/**
  * struct sev_data_pdh_cert_export - PDH_CERT_EXPORT command parameters
  *
  * @pdh_address: PDH certificate address
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 919b2a0..037bf0e 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -345,7 +345,6 @@
 static inline void user_single_step_siginfo(struct task_struct *tsk,
 				struct pt_regs *regs, siginfo_t *info)
 {
-	memset(info, 0, sizeof(*info));
 	info->si_signo = SIGTRAP;
 }
 #endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 36360d0..e679b17 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -108,7 +108,6 @@
 void rcu_bh_qs(void);
 void rcu_check_callbacks(int user);
 void rcu_report_dead(unsigned int cpu);
-void rcu_cpu_starting(unsigned int cpu);
 void rcutree_migrate_callbacks(int cpu);
 
 #ifdef CONFIG_RCU_STALL_COMMON
@@ -188,13 +187,13 @@
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 
 /**
- * cond_resched_rcu_qs - Report potential quiescent states to RCU
+ * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
  *
  * This macro resembles cond_resched(), except that it is defined to
  * report potential quiescent states to RCU-tasks even if the cond_resched()
  * machinery were to be shut off, as some advocate for PREEMPT kernels.
  */
-#define cond_resched_rcu_qs() \
+#define cond_resched_tasks_rcu_qs() \
 do { \
 	if (!cond_resched()) \
 		rcu_note_voluntary_context_switch_lite(current); \
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ce9beec..7b3c82e 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -132,5 +132,6 @@
 #define rcutree_offline_cpu      NULL
 #define rcutree_dead_cpu         NULL
 #define rcutree_dying_cpu        NULL
+static inline void rcu_cpu_starting(unsigned int cpu) { }
 
 #endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index fd996cd..9146558 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -74,6 +74,7 @@
 void rcu_barrier(void);
 void rcu_barrier_bh(void);
 void rcu_barrier_sched(void);
+bool rcu_eqs_special_set(int cpu);
 unsigned long get_state_synchronize_rcu(void);
 void cond_synchronize_rcu(unsigned long oldstate);
 unsigned long get_state_synchronize_sched(void);
@@ -100,5 +101,6 @@
 int rcutree_offline_cpu(unsigned int cpu);
 int rcutree_dead_cpu(unsigned int cpu);
 int rcutree_dying_cpu(unsigned int cpu);
+void rcu_cpu_starting(unsigned int cpu);
 
 #endif /* __LINUX_RCUTREE_H */
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 5f7ad05..4f38068 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -15,6 +15,7 @@
 
 #include <linux/list.h>
 #include <linux/rbtree.h>
+#include <linux/ktime.h>
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/bug.h>
@@ -587,7 +588,10 @@
 				 const struct regmap_config *config,
 				 struct lock_class_key *lock_key,
 				 const char *lock_name);
-
+struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus,
+				 const struct regmap_config *config,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name);
 /*
  * Wrapper for regmap_init macros to include a unique lockdep key and name
  * for each call. No-op if CONFIG_LOCKDEP is not set.
@@ -906,6 +910,19 @@
 	__regmap_lockdep_wrapper(__devm_regmap_init_sdw, #config,	\
 				sdw, config)
 
+/**
+ * devm_regmap_init_slimbus() - Initialise managed register map
+ *
+ * @slimbus: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap. The regmap will be automatically freed by the
+ * device management code.
+ */
+#define devm_regmap_init_slimbus(slimbus, config)			\
+	__regmap_lockdep_wrapper(__devm_regmap_init_slimbus, #config,	\
+				slimbus, config)
 int regmap_mmio_attach_clk(struct regmap *map, struct clk *clk);
 void regmap_mmio_detach_clk(struct regmap *map);
 void regmap_exit(struct regmap *map);
diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
index bcfdb91..5d83d0c 100644
--- a/include/linux/restart_block.h
+++ b/include/linux/restart_block.h
@@ -7,6 +7,7 @@
 
 #include <linux/compiler.h>
 #include <linux/types.h>
+#include <linux/time64.h>
 
 struct timespec;
 struct compat_timespec;
@@ -15,9 +16,7 @@
 enum timespec_type {
 	TT_NONE		= 0,
 	TT_NATIVE	= 1,
-#ifdef CONFIG_COMPAT
 	TT_COMPAT	= 2,
-#endif
 };
 
 /*
@@ -40,10 +39,8 @@
 			clockid_t clockid;
 			enum timespec_type type;
 			union {
-				struct timespec __user *rmtp;
-#ifdef CONFIG_COMPAT
+				struct __kernel_timespec __user *rmtp;
 				struct compat_timespec __user *compat_rmtp;
-#endif
 			};
 			u64 expires;
 		} nanosleep;
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index a0233ed..b72ebdf 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -65,7 +65,7 @@
 
 /*
  * ring_buffer_discard_commit will remove an event that has not
- *   ben committed yet. If this is used, then ring_buffer_unlock_commit
+ *   been committed yet. If this is used, then ring_buffer_unlock_commit
  *   must not be called on the discarded event. This function
  *   will try to remove the event from the ring buffer completely
  *   if another event has not been written after it.
diff --git a/include/linux/rslib.h b/include/linux/rslib.h
index 746580c..5974ced 100644
--- a/include/linux/rslib.h
+++ b/include/linux/rslib.h
@@ -1,28 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * include/linux/rslib.h
- *
- * Overview:
- *   Generic Reed Solomon encoder / decoder library
+ * Generic Reed Solomon encoder / decoder library
  *
  * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de)
  *
  * RS code lifted from reed solomon library written by Phil Karn
  * Copyright 2002 Phil Karn, KA9Q
- *
- * $Id: rslib.h,v 1.4 2005/11/07 11:14:52 gleixner Exp $
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
-
 #ifndef _RSLIB_H_
 #define _RSLIB_H_
 
 #include <linux/list.h>
+#include <linux/types.h>	/* for gfp_t */
+#include <linux/gfp.h>		/* for GFP_KERNEL */
 
 /**
- * struct rs_control - rs control structure
+ * struct rs_codec - rs codec data
  *
  * @mm:		Bits per symbol
  * @nn:		Symbols per block (= (1<<mm)-1)
@@ -36,24 +29,34 @@
  * @gfpoly:	The primitive generator polynominal
  * @gffunc:	Function to generate the field, if non-canonical representation
  * @users:	Users of this structure
- * @list:	List entry for the rs control list
+ * @list:	List entry for the rs codec list
 */
-struct rs_control {
-	int 		mm;
-	int 		nn;
+struct rs_codec {
+	int		mm;
+	int		nn;
 	uint16_t	*alpha_to;
 	uint16_t	*index_of;
 	uint16_t	*genpoly;
-	int 		nroots;
-	int 		fcr;
-	int 		prim;
-	int 		iprim;
+	int		nroots;
+	int		fcr;
+	int		prim;
+	int		iprim;
 	int		gfpoly;
 	int		(*gffunc)(int);
 	int		users;
 	struct list_head list;
 };
 
+/**
+ * struct rs_control - rs control structure per instance
+ * @codec:	The codec used for this instance
+ * @buffers:	Internal scratch buffers used in calls to decode_rs()
+ */
+struct rs_control {
+	struct rs_codec	*codec;
+	uint16_t	buffers[0];
+};
+
 /* General purpose RS codec, 8-bit data width, symbol width 1-15 bit  */
 #ifdef CONFIG_REED_SOLOMON_ENC8
 int encode_rs8(struct rs_control *rs, uint8_t *data, int len, uint16_t *par,
@@ -76,18 +79,37 @@
 		uint16_t *corr);
 #endif
 
-/* Create or get a matching rs control structure */
-struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim,
-			   int nroots);
+struct rs_control *init_rs_gfp(int symsize, int gfpoly, int fcr, int prim,
+			       int nroots, gfp_t gfp);
+
+/**
+ * init_rs - Create a RS control struct and initialize it
+ *  @symsize:	the symbol size (number of bits)
+ *  @gfpoly:	the extended Galois field generator polynomial coefficients,
+ *		with the 0th coefficient in the low order bit. The polynomial
+ *		must be primitive;
+ *  @fcr:	the first consecutive root of the rs code generator polynomial
+ *		in index form
+ *  @prim:	primitive element to generate polynomial roots
+ *  @nroots:	RS code generator polynomial degree (number of roots)
+ *
+ * Allocations use GFP_KERNEL.
+ */
+static inline struct rs_control *init_rs(int symsize, int gfpoly, int fcr,
+					 int prim, int nroots)
+{
+	return init_rs_gfp(symsize, gfpoly, fcr, prim, nroots, GFP_KERNEL);
+}
+
 struct rs_control *init_rs_non_canonical(int symsize, int (*func)(int),
-                                         int fcr, int prim, int nroots);
+					 int fcr, int prim, int nroots);
 
 /* Release a rs control structure */
 void free_rs(struct rs_control *rs);
 
 /** modulo replacement for galois field arithmetics
  *
- *  @rs:	the rs control structure
+ *  @rs:	Pointer to the RS codec
  *  @x:		the value to reduce
  *
  *  where
@@ -97,7 +119,7 @@
  *  Simple arithmetic modulo would return a wrong result for values
  *  >= 3 * rs->nn
 */
-static inline int rs_modnn(struct rs_control *rs, int x)
+static inline int rs_modnn(struct rs_codec *rs, int x)
 {
 	while (x >= rs->nn) {
 		x -= rs->nn;
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 841585f..e653953 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -127,6 +127,12 @@
 	 * @round_robin: Allocate bits in strict round-robin order.
 	 */
 	bool round_robin;
+
+	/**
+	 * @min_shallow_depth: The minimum shallow depth which may be passed to
+	 * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow().
+	 */
+	unsigned int min_shallow_depth;
 };
 
 /**
@@ -390,6 +396,9 @@
  * @shallow_depth: The maximum number of bits to allocate from a single word.
  * See sbitmap_get_shallow().
  *
+ * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
+ * initializing @sbq.
+ *
  * Return: Non-negative allocated bit number if successful, -1 otherwise.
  */
 int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
@@ -424,6 +433,9 @@
  * @shallow_depth: The maximum number of bits to allocate from a single word.
  * See sbitmap_get_shallow().
  *
+ * If you call this, make sure to call sbitmap_queue_min_shallow_depth() after
+ * initializing @sbq.
+ *
  * Return: Non-negative allocated bit number if successful, -1 otherwise.
  */
 static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
@@ -439,6 +451,23 @@
 }
 
 /**
+ * sbitmap_queue_min_shallow_depth() - Inform a &struct sbitmap_queue of the
+ * minimum shallow depth that will be used.
+ * @sbq: Bitmap queue in question.
+ * @min_shallow_depth: The minimum shallow depth that will be passed to
+ * sbitmap_queue_get_shallow() or __sbitmap_queue_get_shallow().
+ *
+ * sbitmap_queue_clear() batches wakeups as an optimization. The batch size
+ * depends on the depth of the bitmap. Since the shallow allocation functions
+ * effectively operate with a different depth, the shallow depth must be taken
+ * into account when calculating the batch size. This function must be called
+ * with the minimum shallow depth that will be used. Failure to do so can result
+ * in missed wakeups.
+ */
+void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
+				     unsigned int min_shallow_depth);
+
+/**
  * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a
  * &struct sbitmap_queue.
  * @sbq: Bitmap to free from.
@@ -484,6 +513,13 @@
 void sbitmap_queue_wake_all(struct sbitmap_queue *sbq);
 
 /**
+ * sbitmap_queue_wake_up() - Wake up some of waiters in one waitqueue
+ * on a &struct sbitmap_queue.
+ * @sbq: Bitmap queue to wake up.
+ */
+void sbitmap_queue_wake_up(struct sbitmap_queue *sbq);
+
+/**
  * sbitmap_queue_show() - Dump &struct sbitmap_queue information to a &struct
  * seq_file.
  * @sbq: Bitmap queue to show.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ca3f3ea..14e4f9c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1512,6 +1512,7 @@
 extern int can_nice(const struct task_struct *p, const int nice);
 extern int task_curr(const struct task_struct *p);
 extern int idle_cpu(int cpu);
+extern int available_idle_cpu(int cpu);
 extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
 extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
 extern int sched_setattr(struct task_struct *, const struct sched_attr *);
@@ -1661,7 +1662,6 @@
  * explicit rescheduling in places that are safe. The return
  * value indicates whether a reschedule was done in fact.
  * cond_resched_lock() will drop the spinlock before scheduling,
- * cond_resched_softirq() will enable bhs before scheduling.
  */
 #ifndef CONFIG_PREEMPT
 extern int _cond_resched(void);
@@ -1681,13 +1681,6 @@
 	__cond_resched_lock(lock);				\
 })
 
-extern int __cond_resched_softirq(void);
-
-#define cond_resched_softirq() ({					\
-	___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);	\
-	__cond_resched_softirq();					\
-})
-
 static inline void cond_resched_rcu(void)
 {
 #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 2c570cd9..76a8cb4 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -28,7 +28,7 @@
  *
  * Use mmdrop() to release the reference acquired by mmgrab().
  *
- * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * See also <Documentation/vm/active_mm.rst> for an in-depth explanation
  * of &mm_struct.mm_count vs &mm_struct.mm_users.
  */
 static inline void mmgrab(struct mm_struct *mm)
@@ -62,7 +62,7 @@
  *
  * Use mmput() to release the reference acquired by mmget().
  *
- * See also <Documentation/vm/active_mm.txt> for an in-depth explanation
+ * See also <Documentation/vm/active_mm.rst> for an in-depth explanation
  * of &mm_struct.mm_count vs &mm_struct.mm_users.
  */
 static inline void mmget(struct mm_struct *mm)
@@ -170,6 +170,17 @@
 static inline void fs_reclaim_release(gfp_t gfp_mask) { }
 #endif
 
+/**
+ * memalloc_noio_save - Marks implicit GFP_NOIO allocation scope.
+ *
+ * This functions marks the beginning of the GFP_NOIO allocation scope.
+ * All further allocations will implicitly drop __GFP_IO flag and so
+ * they are safe for the IO critical section from the allocation recursion
+ * point of view. Use memalloc_noio_restore to end the scope with flags
+ * returned by this function.
+ *
+ * This function is safe to be used from any context.
+ */
 static inline unsigned int memalloc_noio_save(void)
 {
 	unsigned int flags = current->flags & PF_MEMALLOC_NOIO;
@@ -177,11 +188,30 @@
 	return flags;
 }
 
+/**
+ * memalloc_noio_restore - Ends the implicit GFP_NOIO scope.
+ * @flags: Flags to restore.
+ *
+ * Ends the implicit GFP_NOIO scope started by memalloc_noio_save function.
+ * Always make sure that that the given flags is the return value from the
+ * pairing memalloc_noio_save call.
+ */
 static inline void memalloc_noio_restore(unsigned int flags)
 {
 	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
 }
 
+/**
+ * memalloc_nofs_save - Marks implicit GFP_NOFS allocation scope.
+ *
+ * This functions marks the beginning of the GFP_NOFS allocation scope.
+ * All further allocations will implicitly drop __GFP_FS flag and so
+ * they are safe for the FS critical section from the allocation recursion
+ * point of view. Use memalloc_nofs_restore to end the scope with flags
+ * returned by this function.
+ *
+ * This function is safe to be used from any context.
+ */
 static inline unsigned int memalloc_nofs_save(void)
 {
 	unsigned int flags = current->flags & PF_MEMALLOC_NOFS;
@@ -189,6 +219,14 @@
 	return flags;
 }
 
+/**
+ * memalloc_nofs_restore - Ends the implicit GFP_NOFS scope.
+ * @flags: Flags to restore.
+ *
+ * Ends the implicit GFP_NOFS scope started by memalloc_nofs_save function.
+ * Always make sure that that the given flags is the return value from the
+ * pairing memalloc_nofs_save call.
+ */
 static inline void memalloc_nofs_restore(unsigned int flags)
 {
 	current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags;
diff --git a/include/linux/security.h b/include/linux/security.h
index 200920f..63030c8 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -220,12 +220,6 @@
 int security_quota_on(struct dentry *dentry);
 int security_syslog(int type);
 int security_settime64(const struct timespec64 *ts, const struct timezone *tz);
-static inline int security_settime(const struct timespec *ts, const struct timezone *tz)
-{
-	struct timespec64 ts64 = timespec_to_timespec64(*ts);
-
-	return security_settime64(&ts64, tz);
-}
 int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
 int security_bprm_set_creds(struct linux_binprm *bprm);
 int security_bprm_check(struct linux_binprm *bprm);
@@ -508,14 +502,6 @@
 	return cap_settime(ts, tz);
 }
 
-static inline int security_settime(const struct timespec *ts,
-				   const struct timezone *tz)
-{
-	struct timespec64 ts64 = timespec_to_timespec64(*ts);
-
-	return cap_settime(&ts64, tz);
-}
-
 static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
 {
 	return __vm_enough_memory(mm, pages, cap_vm_enough_memory(mm, pages));
@@ -1191,6 +1177,7 @@
 int security_socket_create(int family, int type, int protocol, int kern);
 int security_socket_post_create(struct socket *sock, int family,
 				int type, int protocol, int kern);
+int security_socket_socketpair(struct socket *socka, struct socket *sockb);
 int security_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen);
 int security_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen);
 int security_socket_listen(struct socket *sock, int backlog);
@@ -1262,6 +1249,12 @@
 	return 0;
 }
 
+static inline int security_socket_socketpair(struct socket *socka,
+					     struct socket *sockb)
+{
+	return 0;
+}
+
 static inline int security_socket_bind(struct socket *sock,
 				       struct sockaddr *address,
 				       int addrlen)
diff --git a/include/linux/seq_file_net.h b/include/linux/seq_file_net.h
index 43ccd84..0fdbe1d 100644
--- a/include/linux/seq_file_net.h
+++ b/include/linux/seq_file_net.h
@@ -13,12 +13,6 @@
 #endif
 };
 
-int seq_open_net(struct inode *, struct file *,
-		 const struct seq_operations *, int);
-int single_open_net(struct inode *, struct file *file,
-		int (*show)(struct seq_file *, void *));
-int seq_release_net(struct inode *, struct file *);
-int single_release_net(struct inode *, struct file *);
 static inline struct net *seq_file_net(struct seq_file *seq)
 {
 #ifdef CONFIG_NET_NS
@@ -28,4 +22,17 @@
 #endif
 }
 
+/*
+ * This one is needed for proc_create_net_single since net is stored directly
+ * in private not as a struct i.e. seq_file_net can't be used.
+ */
+static inline struct net *seq_file_single_net(struct seq_file *seq)
+{
+#ifdef CONFIG_NET_NS
+	return (struct net *)seq->private;
+#else
+	return &init_net;
+#endif
+}
+
 #endif
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index a27ef5f..76b9db7 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -163,6 +163,7 @@
 extern int fsl8250_handle_irq(struct uart_port *port);
 int serial8250_handle_irq(struct uart_port *port, unsigned int iir);
 unsigned char serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr);
+void serial8250_read_char(struct uart_8250_port *up, unsigned char lsr);
 void serial8250_tx_chars(struct uart_8250_port *up);
 unsigned int serial8250_modem_status(struct uart_8250_port *up);
 void serial8250_init_port(struct uart_8250_port *up);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index b4c9fda..06ea4ee 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -233,6 +233,7 @@
 #define UPSTAT_AUTORTS		((__force upstat_t) (1 << 2))
 #define UPSTAT_AUTOCTS		((__force upstat_t) (1 << 3))
 #define UPSTAT_AUTOXOFF		((__force upstat_t) (1 << 4))
+#define UPSTAT_SYNC_FIFO	((__force upstat_t) (1 << 5))
 
 	int			hw_stopped;		/* sw-assisted CTS flow state */
 	unsigned int		mctrl;			/* current modem ctrl settings */
@@ -348,7 +349,8 @@
 };
 
 struct earlycon_id {
-	char	name[16];
+	char	name[15];
+	char	name_term;	/* In case compiler didn't '\0' term name */
 	char	compatible[128];
 	int	(*setup)(struct earlycon_device *, const char *options);
 };
diff --git a/include/linux/signal.h b/include/linux/signal.h
index a9bc7e1..3c52001 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -28,6 +28,9 @@
 	SIL_TIMER,
 	SIL_POLL,
 	SIL_FAULT,
+	SIL_FAULT_MCEERR,
+	SIL_FAULT_BNDERR,
+	SIL_FAULT_PKUERR,
 	SIL_CHLD,
 	SIL_RT,
 	SIL_SYS,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 164cded..c868859 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3252,8 +3252,7 @@
 				    int *peeked, int *off, int *err);
 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
 				  int *err);
-__poll_t datagram_poll(struct file *file, struct socket *sock,
-			   struct poll_table_struct *wait);
+__poll_t datagram_poll_mask(struct socket *sock, __poll_t events);
 int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
 			   struct iov_iter *to, int size);
 static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 81ebd71..4d759e1 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -13,6 +13,7 @@
 #define	_LINUX_SLAB_H
 
 #include <linux/gfp.h>
+#include <linux/overflow.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
@@ -624,11 +625,13 @@
  */
 static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
 {
-	if (size != 0 && n > SIZE_MAX / size)
+	size_t bytes;
+
+	if (unlikely(check_mul_overflow(n, size, &bytes)))
 		return NULL;
 	if (__builtin_constant_p(n) && __builtin_constant_p(size))
-		return kmalloc(n * size, flags);
-	return __kmalloc(n * size, flags);
+		return kmalloc(bytes, flags);
+	return __kmalloc(bytes, flags);
 }
 
 /**
@@ -657,11 +660,13 @@
 static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
 				       int node)
 {
-	if (size != 0 && n > SIZE_MAX / size)
+	size_t bytes;
+
+	if (unlikely(check_mul_overflow(n, size, &bytes)))
 		return NULL;
 	if (__builtin_constant_p(n) && __builtin_constant_p(size))
-		return kmalloc_node(n * size, flags, node);
-	return __kmalloc_node(n * size, flags, node);
+		return kmalloc_node(bytes, flags, node);
+	return __kmalloc_node(bytes, flags, node);
 }
 
 static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h
new file mode 100644
index 0000000..c5d52e2
--- /dev/null
+++ b/include/linux/soc/qcom/apr.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __QCOM_APR_H_
+#define __QCOM_APR_H_
+
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+#include <dt-bindings/soc/qcom,apr.h>
+
+extern struct bus_type aprbus;
+
+#define APR_HDR_LEN(hdr_len) ((hdr_len)/4)
+
+/*
+ * HEADER field
+ * version:0:3
+ * header_size : 4:7
+ * message_type : 8:9
+ * reserved: 10:15
+ */
+#define APR_HDR_FIELD(msg_type, hdr_len, ver)\
+	(((msg_type & 0x3) << 8) | ((hdr_len & 0xF) << 4) | (ver & 0xF))
+
+#define APR_HDR_SIZE sizeof(struct apr_hdr)
+#define APR_SEQ_CMD_HDR_FIELD APR_HDR_FIELD(APR_MSG_TYPE_SEQ_CMD, \
+					    APR_HDR_LEN(APR_HDR_SIZE), \
+					    APR_PKT_VER)
+/* Version */
+#define APR_PKT_VER		0x0
+
+/* Command and Response Types */
+#define APR_MSG_TYPE_EVENT	0x0
+#define APR_MSG_TYPE_CMD_RSP	0x1
+#define APR_MSG_TYPE_SEQ_CMD	0x2
+#define APR_MSG_TYPE_NSEQ_CMD	0x3
+#define APR_MSG_TYPE_MAX	0x04
+
+/* APR Basic Response Message */
+#define APR_BASIC_RSP_RESULT 0x000110E8
+#define APR_RSP_ACCEPTED     0x000100BE
+
+struct aprv2_ibasic_rsp_result_t {
+	uint32_t opcode;
+	uint32_t status;
+};
+
+/* hdr field Ver [0:3], Size [4:7], Message type [8:10] */
+#define APR_HDR_FIELD_VER(h)		(h & 0x000F)
+#define APR_HDR_FIELD_SIZE(h)		((h & 0x00F0) >> 4)
+#define APR_HDR_FIELD_SIZE_BYTES(h)	(((h & 0x00F0) >> 4) * 4)
+#define APR_HDR_FIELD_MT(h)		((h & 0x0300) >> 8)
+
+struct apr_hdr {
+	uint16_t hdr_field;
+	uint16_t pkt_size;
+	uint8_t src_svc;
+	uint8_t src_domain;
+	uint16_t src_port;
+	uint8_t dest_svc;
+	uint8_t dest_domain;
+	uint16_t dest_port;
+	uint32_t token;
+	uint32_t opcode;
+} __packed;
+
+struct apr_pkt {
+	struct apr_hdr hdr;
+	uint8_t payload[];
+};
+
+struct apr_resp_pkt {
+	struct apr_hdr hdr;
+	void *payload;
+	int payload_size;
+};
+
+/* Bits 0 to 15 -- Minor version,  Bits 16 to 31 -- Major version */
+#define APR_SVC_MAJOR_VERSION(v)	((v >> 16) & 0xFF)
+#define APR_SVC_MINOR_VERSION(v)	(v & 0xFF)
+
+struct apr_device {
+	struct device	dev;
+	uint16_t	svc_id;
+	uint16_t	domain_id;
+	uint32_t	version;
+	char name[APR_NAME_SIZE];
+	spinlock_t	lock;
+	struct list_head node;
+};
+
+#define to_apr_device(d) container_of(d, struct apr_device, dev)
+
+struct apr_driver {
+	int	(*probe)(struct apr_device *sl);
+	int	(*remove)(struct apr_device *sl);
+	int	(*callback)(struct apr_device *a,
+			    struct apr_resp_pkt *d);
+	struct device_driver		driver;
+	const struct apr_device_id	*id_table;
+};
+
+#define to_apr_driver(d) container_of(d, struct apr_driver, driver)
+
+/*
+ * use a macro to avoid include chaining to get THIS_MODULE
+ */
+#define apr_driver_register(drv) __apr_driver_register(drv, THIS_MODULE)
+
+int __apr_driver_register(struct apr_driver *drv, struct module *owner);
+void apr_driver_unregister(struct apr_driver *drv);
+
+/**
+ * module_apr_driver() - Helper macro for registering a aprbus driver
+ * @__aprbus_driver: aprbus_driver struct
+ *
+ * Helper macro for aprbus drivers which do not do anything special in
+ * module init/exit. This eliminates a lot of boilerplate. Each module
+ * may only use this macro once, and calling it replaces module_init()
+ * and module_exit()
+ */
+#define module_apr_driver(__apr_driver) \
+	module_driver(__apr_driver, apr_driver_register, \
+			apr_driver_unregister)
+
+int apr_send_pkt(struct apr_device *adev, struct apr_pkt *pkt);
+
+#endif /* __QCOM_APR_H_ */
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index e91fdcf..962971e 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -23,9 +23,24 @@
 #define SDW_MASTER_DEV_NUM		14
 
 #define SDW_NUM_DEV_ID_REGISTERS	6
+/* frame shape defines */
 
+/*
+ * Note: The maximum row define in SoundWire spec 1.1 is 23. In order to
+ * fill hole with 0, one more dummy entry is added
+ */
+#define SDW_FRAME_ROWS		24
+#define SDW_FRAME_COLS		8
+#define SDW_FRAME_ROW_COLS		(SDW_FRAME_ROWS * SDW_FRAME_COLS)
+
+#define SDW_FRAME_CTRL_BITS		48
 #define SDW_MAX_DEVICES			11
 
+#define SDW_VALID_PORT_RANGE(n)		(n <= 14 && n >= 1)
+
+#define SDW_DAI_ID_RANGE_START		100
+#define SDW_DAI_ID_RANGE_END		200
+
 /**
  * enum sdw_slave_status - Slave status
  * @SDW_SLAVE_UNATTACHED: Slave is not attached with the bus.
@@ -61,6 +76,30 @@
 	SDW_CMD_FAIL_OTHER = 4,
 };
 
+/**
+ * enum sdw_stream_type: data stream type
+ *
+ * @SDW_STREAM_PCM: PCM data stream
+ * @SDW_STREAM_PDM: PDM data stream
+ *
+ * spec doesn't define this, but is used in implementation
+ */
+enum sdw_stream_type {
+	SDW_STREAM_PCM = 0,
+	SDW_STREAM_PDM = 1,
+};
+
+/**
+ * enum sdw_data_direction: Data direction
+ *
+ * @SDW_DATA_DIR_RX: Data into Port
+ * @SDW_DATA_DIR_TX: Data out of Port
+ */
+enum sdw_data_direction {
+	SDW_DATA_DIR_RX = 0,
+	SDW_DATA_DIR_TX = 1,
+};
+
 /*
  * SDW properties, defined in MIPI DisCo spec v1.0
  */
@@ -341,11 +380,92 @@
 };
 
 /**
- * struct sdw_slave_ops - Slave driver callback ops
+ * sdw_reg_bank - SoundWire register banks
+ * @SDW_BANK0: Soundwire register bank 0
+ * @SDW_BANK1: Soundwire register bank 1
+ */
+enum sdw_reg_bank {
+	SDW_BANK0,
+	SDW_BANK1,
+};
+
+/**
+ * struct sdw_bus_conf: Bus configuration
+ *
+ * @clk_freq: Clock frequency, in Hz
+ * @num_rows: Number of rows in frame
+ * @num_cols: Number of columns in frame
+ * @bank: Next register bank
+ */
+struct sdw_bus_conf {
+	unsigned int clk_freq;
+	unsigned int num_rows;
+	unsigned int num_cols;
+	unsigned int bank;
+};
+
+/**
+ * struct sdw_prepare_ch: Prepare/De-prepare Data Port channel
+ *
+ * @num: Port number
+ * @ch_mask: Active channel mask
+ * @prepare: Prepare (true) /de-prepare (false) channel
+ * @bank: Register bank, which bank Slave/Master driver should program for
+ * implementation defined registers. This is always updated to next_bank
+ * value read from bus params.
+ *
+ */
+struct sdw_prepare_ch {
+	unsigned int num;
+	unsigned int ch_mask;
+	bool prepare;
+	unsigned int bank;
+};
+
+/**
+ * enum sdw_port_prep_ops: Prepare operations for Data Port
+ *
+ * @SDW_OPS_PORT_PRE_PREP: Pre prepare operation for the Port
+ * @SDW_OPS_PORT_PREP: Prepare operation for the Port
+ * @SDW_OPS_PORT_POST_PREP: Post prepare operation for the Port
+ */
+enum sdw_port_prep_ops {
+	SDW_OPS_PORT_PRE_PREP = 0,
+	SDW_OPS_PORT_PREP = 1,
+	SDW_OPS_PORT_POST_PREP = 2,
+};
+
+/**
+ * struct sdw_bus_params: Structure holding bus configuration
+ *
+ * @curr_bank: Current bank in use (BANK0/BANK1)
+ * @next_bank: Next bank to use (BANK0/BANK1). next_bank will always be
+ * set to !curr_bank
+ * @max_dr_freq: Maximum double rate clock frequency supported, in Hz
+ * @curr_dr_freq: Current double rate clock frequency, in Hz
+ * @bandwidth: Current bandwidth
+ * @col: Active columns
+ * @row: Active rows
+ */
+struct sdw_bus_params {
+	enum sdw_reg_bank curr_bank;
+	enum sdw_reg_bank next_bank;
+	unsigned int max_dr_freq;
+	unsigned int curr_dr_freq;
+	unsigned int bandwidth;
+	unsigned int col;
+	unsigned int row;
+};
+
+/**
+ * struct sdw_slave_ops: Slave driver callback ops
+ *
  * @read_prop: Read Slave properties
  * @interrupt_callback: Device interrupt notification (invoked in thread
  * context)
  * @update_status: Update Slave status
+ * @bus_config: Update the bus config for Slave
+ * @port_prep: Prepare the port with parameters
  */
 struct sdw_slave_ops {
 	int (*read_prop)(struct sdw_slave *sdw);
@@ -353,6 +473,11 @@
 			struct sdw_slave_intr_status *status);
 	int (*update_status)(struct sdw_slave *slave,
 			enum sdw_slave_status status);
+	int (*bus_config)(struct sdw_slave *slave,
+			struct sdw_bus_params *params);
+	int (*port_prep)(struct sdw_slave *slave,
+			struct sdw_prepare_ch *prepare_ch,
+			enum sdw_port_prep_ops pre_ops);
 };
 
 /**
@@ -406,6 +531,93 @@
  * SDW master structures and APIs
  */
 
+/**
+ * struct sdw_port_params: Data Port parameters
+ *
+ * @num: Port number
+ * @bps: Word length of the Port
+ * @flow_mode: Port Data flow mode
+ * @data_mode: Test modes or normal mode
+ *
+ * This is used to program the Data Port based on Data Port stream
+ * parameters.
+ */
+struct sdw_port_params {
+	unsigned int num;
+	unsigned int bps;
+	unsigned int flow_mode;
+	unsigned int data_mode;
+};
+
+/**
+ * struct sdw_transport_params: Data Port Transport Parameters
+ *
+ * @blk_grp_ctrl_valid: Port implements block group control
+ * @num: Port number
+ * @blk_grp_ctrl: Block group control value
+ * @sample_interval: Sample interval
+ * @offset1: Blockoffset of the payload data
+ * @offset2: Blockoffset of the payload data
+ * @hstart: Horizontal start of the payload data
+ * @hstop: Horizontal stop of the payload data
+ * @blk_pkg_mode: Block per channel or block per port
+ * @lane_ctrl: Data lane Port uses for Data transfer. Currently only single
+ * data lane is supported in bus
+ *
+ * This is used to program the Data Port based on Data Port transport
+ * parameters. All these parameters are banked and can be modified
+ * during a bank switch without any artifacts in audio stream.
+ */
+struct sdw_transport_params {
+	bool blk_grp_ctrl_valid;
+	unsigned int port_num;
+	unsigned int blk_grp_ctrl;
+	unsigned int sample_interval;
+	unsigned int offset1;
+	unsigned int offset2;
+	unsigned int hstart;
+	unsigned int hstop;
+	unsigned int blk_pkg_mode;
+	unsigned int lane_ctrl;
+};
+
+/**
+ * struct sdw_enable_ch: Enable/disable Data Port channel
+ *
+ * @num: Port number
+ * @ch_mask: Active channel mask
+ * @enable: Enable (true) /disable (false) channel
+ */
+struct sdw_enable_ch {
+	unsigned int port_num;
+	unsigned int ch_mask;
+	bool enable;
+};
+
+/**
+ * struct sdw_master_port_ops: Callback functions from bus to Master
+ * driver to set Master Data ports.
+ *
+ * @dpn_set_port_params: Set the Port parameters for the Master Port.
+ * Mandatory callback
+ * @dpn_set_port_transport_params: Set transport parameters for the Master
+ * Port. Mandatory callback
+ * @dpn_port_prep: Port prepare operations for the Master Data Port.
+ * @dpn_port_enable_ch: Enable the channels of Master Port.
+ */
+struct sdw_master_port_ops {
+	int (*dpn_set_port_params)(struct sdw_bus *bus,
+			struct sdw_port_params *port_params,
+			unsigned int bank);
+	int (*dpn_set_port_transport_params)(struct sdw_bus *bus,
+			struct sdw_transport_params *transport_params,
+			enum sdw_reg_bank bank);
+	int (*dpn_port_prep)(struct sdw_bus *bus,
+			struct sdw_prepare_ch *prepare_ch);
+	int (*dpn_port_enable_ch)(struct sdw_bus *bus,
+			struct sdw_enable_ch *enable_ch, unsigned int bank);
+};
+
 struct sdw_msg;
 
 /**
@@ -426,6 +638,9 @@
  * @xfer_msg: Transfer message callback
  * @xfer_msg_defer: Defer version of transfer message callback
  * @reset_page_addr: Reset the SCP page address registers
+ * @set_bus_conf: Set the bus configuration
+ * @pre_bank_switch: Callback for pre bank switch
+ * @post_bank_switch: Callback for post bank switch
  */
 struct sdw_master_ops {
 	int (*read_prop)(struct sdw_bus *bus);
@@ -437,6 +652,11 @@
 			struct sdw_defer *defer);
 	enum sdw_command_response (*reset_page_addr)
 			(struct sdw_bus *bus, unsigned int dev_num);
+	int (*set_bus_conf)(struct sdw_bus *bus,
+			struct sdw_bus_params *params);
+	int (*pre_bank_switch)(struct sdw_bus *bus);
+	int (*post_bank_switch)(struct sdw_bus *bus);
+
 };
 
 /**
@@ -449,9 +669,15 @@
  * @bus_lock: bus lock
  * @msg_lock: message lock
  * @ops: Master callback ops
+ * @port_ops: Master port callback ops
+ * @params: Current bus parameters
  * @prop: Master properties
+ * @m_rt_list: List of Master instance of all stream(s) running on Bus. This
+ * is used to compute and program bus bandwidth, clock, frame shape,
+ * transport and port parameters
  * @defer_msg: Defer message
  * @clk_stop_timeout: Clock stop timeout computed
+ * @bank_switch_timeout: Bank switch timeout computed
  */
 struct sdw_bus {
 	struct device *dev;
@@ -461,14 +687,118 @@
 	struct mutex bus_lock;
 	struct mutex msg_lock;
 	const struct sdw_master_ops *ops;
+	const struct sdw_master_port_ops *port_ops;
+	struct sdw_bus_params params;
 	struct sdw_master_prop prop;
+	struct list_head m_rt_list;
 	struct sdw_defer defer_msg;
 	unsigned int clk_stop_timeout;
+	u32 bank_switch_timeout;
 };
 
 int sdw_add_bus_master(struct sdw_bus *bus);
 void sdw_delete_bus_master(struct sdw_bus *bus);
 
+/**
+ * sdw_port_config: Master or Slave Port configuration
+ *
+ * @num: Port number
+ * @ch_mask: channels mask for port
+ */
+struct sdw_port_config {
+	unsigned int num;
+	unsigned int ch_mask;
+};
+
+/**
+ * sdw_stream_config: Master or Slave stream configuration
+ *
+ * @frame_rate: Audio frame rate of the stream, in Hz
+ * @ch_count: Channel count of the stream
+ * @bps: Number of bits per audio sample
+ * @direction: Data direction
+ * @type: Stream type PCM or PDM
+ */
+struct sdw_stream_config {
+	unsigned int frame_rate;
+	unsigned int ch_count;
+	unsigned int bps;
+	enum sdw_data_direction direction;
+	enum sdw_stream_type type;
+};
+
+/**
+ * sdw_stream_state: Stream states
+ *
+ * @SDW_STREAM_ALLOCATED: New stream allocated.
+ * @SDW_STREAM_CONFIGURED: Stream configured
+ * @SDW_STREAM_PREPARED: Stream prepared
+ * @SDW_STREAM_ENABLED: Stream enabled
+ * @SDW_STREAM_DISABLED: Stream disabled
+ * @SDW_STREAM_DEPREPARED: Stream de-prepared
+ * @SDW_STREAM_RELEASED: Stream released
+ */
+enum sdw_stream_state {
+	SDW_STREAM_ALLOCATED = 0,
+	SDW_STREAM_CONFIGURED = 1,
+	SDW_STREAM_PREPARED = 2,
+	SDW_STREAM_ENABLED = 3,
+	SDW_STREAM_DISABLED = 4,
+	SDW_STREAM_DEPREPARED = 5,
+	SDW_STREAM_RELEASED = 6,
+};
+
+/**
+ * sdw_stream_params: Stream parameters
+ *
+ * @rate: Sampling frequency, in Hz
+ * @ch_count: Number of channels
+ * @bps: bits per channel sample
+ */
+struct sdw_stream_params {
+	unsigned int rate;
+	unsigned int ch_count;
+	unsigned int bps;
+};
+
+/**
+ * sdw_stream_runtime: Runtime stream parameters
+ *
+ * @name: SoundWire stream name
+ * @params: Stream parameters
+ * @state: Current state of the stream
+ * @type: Stream type PCM or PDM
+ * @m_rt: Master runtime
+ */
+struct sdw_stream_runtime {
+	char *name;
+	struct sdw_stream_params params;
+	enum sdw_stream_state state;
+	enum sdw_stream_type type;
+	struct sdw_master_runtime *m_rt;
+};
+
+struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name);
+void sdw_release_stream(struct sdw_stream_runtime *stream);
+int sdw_stream_add_master(struct sdw_bus *bus,
+		struct sdw_stream_config *stream_config,
+		struct sdw_port_config *port_config,
+		unsigned int num_ports,
+		struct sdw_stream_runtime *stream);
+int sdw_stream_add_slave(struct sdw_slave *slave,
+		struct sdw_stream_config *stream_config,
+		struct sdw_port_config *port_config,
+		unsigned int num_ports,
+		struct sdw_stream_runtime *stream);
+int sdw_stream_remove_master(struct sdw_bus *bus,
+		struct sdw_stream_runtime *stream);
+int sdw_stream_remove_slave(struct sdw_slave *slave,
+		struct sdw_stream_runtime *stream);
+int sdw_prepare_stream(struct sdw_stream_runtime *stream);
+int sdw_enable_stream(struct sdw_stream_runtime *stream);
+int sdw_disable_stream(struct sdw_stream_runtime *stream);
+int sdw_deprepare_stream(struct sdw_stream_runtime *stream);
+
 /* messaging and data APIs */
 
 int sdw_read(struct sdw_slave *slave, u32 addr);
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 4b37528..2b9573b 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -5,17 +5,31 @@
 #define __SDW_INTEL_H
 
 /**
+ * struct sdw_intel_ops: Intel audio driver callback ops
+ *
+ * @config_stream: configure the stream with the hw_params
+ */
+struct sdw_intel_ops {
+	int (*config_stream)(void *arg, void *substream,
+			void *dai, void *hw_params, int stream_num);
+};
+
+/**
  * struct sdw_intel_res - Soundwire Intel resource structure
  * @mmio_base: mmio base of SoundWire registers
  * @irq: interrupt number
  * @handle: ACPI parent handle
  * @parent: parent device
+ * @ops: callback ops
+ * @arg: callback arg
  */
 struct sdw_intel_res {
 	void __iomem *mmio_base;
 	int irq;
 	acpi_handle handle;
 	struct device *parent;
+	const struct sdw_intel_ops *ops;
+	void *arg;
 };
 
 void *sdw_intel_init(acpi_handle *parent_handle, struct sdw_intel_res *res);
diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
new file mode 100644
index 0000000..bb4bd15
--- /dev/null
+++ b/include/linux/spi/spi-mem.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2018 Exceet Electronics GmbH
+ * Copyright (C) 2018 Bootlin
+ *
+ * Author: Boris Brezillon <boris.brezillon@bootlin.com>
+ */
+
+#ifndef __LINUX_SPI_MEM_H
+#define __LINUX_SPI_MEM_H
+
+#include <linux/spi/spi.h>
+
+#define SPI_MEM_OP_CMD(__opcode, __buswidth)			\
+	{							\
+		.buswidth = __buswidth,				\
+		.opcode = __opcode,				\
+	}
+
+#define SPI_MEM_OP_ADDR(__nbytes, __val, __buswidth)		\
+	{							\
+		.nbytes = __nbytes,				\
+		.val = __val,					\
+		.buswidth = __buswidth,				\
+	}
+
+#define SPI_MEM_OP_NO_ADDR	{ }
+
+#define SPI_MEM_OP_DUMMY(__nbytes, __buswidth)			\
+	{							\
+		.nbytes = __nbytes,				\
+		.buswidth = __buswidth,				\
+	}
+
+#define SPI_MEM_OP_NO_DUMMY	{ }
+
+#define SPI_MEM_OP_DATA_IN(__nbytes, __buf, __buswidth)		\
+	{							\
+		.dir = SPI_MEM_DATA_IN,				\
+		.nbytes = __nbytes,				\
+		.buf.in = __buf,				\
+		.buswidth = __buswidth,				\
+	}
+
+#define SPI_MEM_OP_DATA_OUT(__nbytes, __buf, __buswidth)	\
+	{							\
+		.dir = SPI_MEM_DATA_OUT,			\
+		.nbytes = __nbytes,				\
+		.buf.out = __buf,				\
+		.buswidth = __buswidth,				\
+	}
+
+#define SPI_MEM_OP_NO_DATA	{ }
+
+/**
+ * enum spi_mem_data_dir - describes the direction of a SPI memory data
+ *			   transfer from the controller perspective
+ * @SPI_MEM_DATA_IN: data coming from the SPI memory
+ * @SPI_MEM_DATA_OUT: data sent the SPI memory
+ */
+enum spi_mem_data_dir {
+	SPI_MEM_DATA_IN,
+	SPI_MEM_DATA_OUT,
+};
+
+/**
+ * struct spi_mem_op - describes a SPI memory operation
+ * @cmd.buswidth: number of IO lines used to transmit the command
+ * @cmd.opcode: operation opcode
+ * @addr.nbytes: number of address bytes to send. Can be zero if the operation
+ *		 does not need to send an address
+ * @addr.buswidth: number of IO lines used to transmit the address cycles
+ * @addr.val: address value. This value is always sent MSB first on the bus.
+ *	      Note that only @addr.nbytes are taken into account in this
+ *	      address value, so users should make sure the value fits in the
+ *	      assigned number of bytes.
+ * @dummy.nbytes: number of dummy bytes to send after an opcode or address. Can
+ *		  be zero if the operation does not require dummy bytes
+ * @dummy.buswidth: number of IO lanes used to transmit the dummy bytes
+ * @data.buswidth: number of IO lanes used to send/receive the data
+ * @data.dir: direction of the transfer
+ * @data.buf.in: input buffer
+ * @data.buf.out: output buffer
+ */
+struct spi_mem_op {
+	struct {
+		u8 buswidth;
+		u8 opcode;
+	} cmd;
+
+	struct {
+		u8 nbytes;
+		u8 buswidth;
+		u64 val;
+	} addr;
+
+	struct {
+		u8 nbytes;
+		u8 buswidth;
+	} dummy;
+
+	struct {
+		u8 buswidth;
+		enum spi_mem_data_dir dir;
+		unsigned int nbytes;
+		/* buf.{in,out} must be DMA-able. */
+		union {
+			void *in;
+			const void *out;
+		} buf;
+	} data;
+};
+
+#define SPI_MEM_OP(__cmd, __addr, __dummy, __data)		\
+	{							\
+		.cmd = __cmd,					\
+		.addr = __addr,					\
+		.dummy = __dummy,				\
+		.data = __data,					\
+	}
+
+/**
+ * struct spi_mem - describes a SPI memory device
+ * @spi: the underlying SPI device
+ * @drvpriv: spi_mem_drviver private data
+ *
+ * Extra information that describe the SPI memory device and may be needed by
+ * the controller to properly handle this device should be placed here.
+ *
+ * One example would be the device size since some controller expose their SPI
+ * mem devices through a io-mapped region.
+ */
+struct spi_mem {
+	struct spi_device *spi;
+	void *drvpriv;
+};
+
+/**
+ * struct spi_mem_set_drvdata() - attach driver private data to a SPI mem
+ *				  device
+ * @mem: memory device
+ * @data: data to attach to the memory device
+ */
+static inline void spi_mem_set_drvdata(struct spi_mem *mem, void *data)
+{
+	mem->drvpriv = data;
+}
+
+/**
+ * struct spi_mem_get_drvdata() - get driver private data attached to a SPI mem
+ *				  device
+ * @mem: memory device
+ *
+ * Return: the data attached to the mem device.
+ */
+static inline void *spi_mem_get_drvdata(struct spi_mem *mem)
+{
+	return mem->drvpriv;
+}
+
+/**
+ * struct spi_controller_mem_ops - SPI memory operations
+ * @adjust_op_size: shrink the data xfer of an operation to match controller's
+ *		    limitations (can be alignment of max RX/TX size
+ *		    limitations)
+ * @supports_op: check if an operation is supported by the controller
+ * @exec_op: execute a SPI memory operation
+ *
+ * This interface should be implemented by SPI controllers providing an
+ * high-level interface to execute SPI memory operation, which is usually the
+ * case for QSPI controllers.
+ */
+struct spi_controller_mem_ops {
+	int (*adjust_op_size)(struct spi_mem *mem, struct spi_mem_op *op);
+	bool (*supports_op)(struct spi_mem *mem,
+			    const struct spi_mem_op *op);
+	int (*exec_op)(struct spi_mem *mem,
+		       const struct spi_mem_op *op);
+};
+
+/**
+ * struct spi_mem_driver - SPI memory driver
+ * @spidrv: inherit from a SPI driver
+ * @probe: probe a SPI memory. Usually where detection/initialization takes
+ *	   place
+ * @remove: remove a SPI memory
+ * @shutdown: take appropriate action when the system is shutdown
+ *
+ * This is just a thin wrapper around a spi_driver. The core takes care of
+ * allocating the spi_mem object and forwarding the probe/remove/shutdown
+ * request to the spi_mem_driver. The reason we use this wrapper is because
+ * we might have to stuff more information into the spi_mem struct to let
+ * SPI controllers know more about the SPI memory they interact with, and
+ * having this intermediate layer allows us to do that without adding more
+ * useless fields to the spi_device object.
+ */
+struct spi_mem_driver {
+	struct spi_driver spidrv;
+	int (*probe)(struct spi_mem *mem);
+	int (*remove)(struct spi_mem *mem);
+	void (*shutdown)(struct spi_mem *mem);
+};
+
+#if IS_ENABLED(CONFIG_SPI_MEM)
+int spi_controller_dma_map_mem_op_data(struct spi_controller *ctlr,
+				       const struct spi_mem_op *op,
+				       struct sg_table *sg);
+
+void spi_controller_dma_unmap_mem_op_data(struct spi_controller *ctlr,
+					  const struct spi_mem_op *op,
+					  struct sg_table *sg);
+#else
+static inline int
+spi_controller_dma_map_mem_op_data(struct spi_controller *ctlr,
+				   const struct spi_mem_op *op,
+				   struct sg_table *sg)
+{
+	return -ENOTSUPP;
+}
+
+static inline void
+spi_controller_dma_unmap_mem_op_data(struct spi_controller *ctlr,
+				     const struct spi_mem_op *op,
+				     struct sg_table *sg)
+{
+}
+#endif /* CONFIG_SPI_MEM */
+
+int spi_mem_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op);
+
+bool spi_mem_supports_op(struct spi_mem *mem,
+			 const struct spi_mem_op *op);
+
+int spi_mem_exec_op(struct spi_mem *mem,
+		    const struct spi_mem_op *op);
+
+int spi_mem_driver_register_with_owner(struct spi_mem_driver *drv,
+				       struct module *owner);
+
+void spi_mem_driver_unregister(struct spi_mem_driver *drv);
+
+#define spi_mem_driver_register(__drv)                                  \
+	spi_mem_driver_register_with_owner(__drv, THIS_MODULE)
+
+#define module_spi_mem_driver(__drv)                                    \
+	module_driver(__drv, spi_mem_driver_register,                   \
+		      spi_mem_driver_unregister)
+
+#endif /* __LINUX_SPI_MEM_H */
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index bc6bb32..a64235e 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -26,7 +26,7 @@
 struct property_entry;
 struct spi_controller;
 struct spi_transfer;
-struct spi_flash_read_message;
+struct spi_controller_mem_ops;
 
 /*
  * INTERFACES between SPI master-side drivers and SPI slave protocol handlers,
@@ -376,13 +376,11 @@
  *                    transfer_one callback.
  * @handle_err: the subsystem calls the driver to handle an error that occurs
  *		in the generic implementation of transfer_one_message().
+ * @mem_ops: optimized/dedicated operations for interactions with SPI memory.
+ *	     This field is optional and should only be implemented if the
+ *	     controller has native support for memory like operations.
  * @unprepare_message: undo any work done by prepare_message().
  * @slave_abort: abort the ongoing transfer request on an SPI slave controller
- * @spi_flash_read: to support spi-controller hardwares that provide
- *                  accelerated interface to read from flash devices.
- * @spi_flash_can_dma: analogous to can_dma() interface, but for
- *		       controllers implementing spi_flash_read.
- * @flash_read_supported: spi device supports flash read
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
  *	number. Any individual value may be -ENOENT for CS lines that
  *	are not GPIOs (driven by the SPI controller itself).
@@ -548,11 +546,6 @@
 	int (*unprepare_message)(struct spi_controller *ctlr,
 				 struct spi_message *message);
 	int (*slave_abort)(struct spi_controller *ctlr);
-	int (*spi_flash_read)(struct  spi_device *spi,
-			      struct spi_flash_read_message *msg);
-	bool (*spi_flash_can_dma)(struct spi_device *spi,
-				  struct spi_flash_read_message *msg);
-	bool (*flash_read_supported)(struct spi_device *spi);
 
 	/*
 	 * These hooks are for drivers that use a generic implementation
@@ -564,6 +557,9 @@
 	void (*handle_err)(struct spi_controller *ctlr,
 			   struct spi_message *message);
 
+	/* Optimized handlers for SPI memory-like operations. */
+	const struct spi_controller_mem_ops *mem_ops;
+
 	/* gpio chip select */
 	int			*cs_gpios;
 
@@ -1183,48 +1179,6 @@
 	return be16_to_cpu(result);
 }
 
-/**
- * struct spi_flash_read_message - flash specific information for
- * spi-masters that provide accelerated flash read interfaces
- * @buf: buffer to read data
- * @from: offset within the flash from where data is to be read
- * @len: length of data to be read
- * @retlen: actual length of data read
- * @read_opcode: read_opcode to be used to communicate with flash
- * @addr_width: number of address bytes
- * @dummy_bytes: number of dummy bytes
- * @opcode_nbits: number of lines to send opcode
- * @addr_nbits: number of lines to send address
- * @data_nbits: number of lines for data
- * @rx_sg: Scatterlist for receive data read from flash
- * @cur_msg_mapped: message has been mapped for DMA
- */
-struct spi_flash_read_message {
-	void *buf;
-	loff_t from;
-	size_t len;
-	size_t retlen;
-	u8 read_opcode;
-	u8 addr_width;
-	u8 dummy_bytes;
-	u8 opcode_nbits;
-	u8 addr_nbits;
-	u8 data_nbits;
-	struct sg_table rx_sg;
-	bool cur_msg_mapped;
-};
-
-/* SPI core interface for flash read support */
-static inline bool spi_flash_read_supported(struct spi_device *spi)
-{
-	return spi->controller->spi_flash_read &&
-	       (!spi->controller->flash_read_supported ||
-	       spi->controller->flash_read_supported(spi));
-}
-
-int spi_flash_read(struct spi_device *spi,
-		   struct spi_flash_read_message *msg);
-
 /*---------------------------------------------------------------------------*/
 
 /*
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 4894d32..1e8a464 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -380,6 +380,24 @@
 	raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
 })
 
+/**
+ * spin_is_locked() - Check whether a spinlock is locked.
+ * @lock: Pointer to the spinlock.
+ *
+ * This function is NOT required to provide any memory ordering
+ * guarantees; it could be used for debugging purposes or, when
+ * additional synchronization is needed, accompanied with other
+ * constructs (memory barriers) enforcing the synchronization.
+ *
+ * Returns: 1 if @lock is locked, 0 otherwise.
+ *
+ * Note that the function only tells you that the spinlock is
+ * seen to be locked, not that it is locked on your CPU.
+ *
+ * Further, on CONFIG_SMP=n builds with CONFIG_DEBUG_SPINLOCK=n,
+ * the return value is always 0 (see include/linux/spinlock_up.h).
+ * Therefore you should not rely heavily on the return value.
+ */
 static __always_inline int spin_is_locked(spinlock_t *lock)
 {
 	return raw_spin_is_locked(&lock->rlock);
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 33c1c69..91494d7 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -69,11 +69,45 @@
 
 void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 		void (*func)(struct rcu_head *head));
-void cleanup_srcu_struct(struct srcu_struct *sp);
+void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced);
 int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
 void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
 void synchronize_srcu(struct srcu_struct *sp);
 
+/**
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+static inline void cleanup_srcu_struct(struct srcu_struct *sp)
+{
+	_cleanup_srcu_struct(sp, false);
+}
+
+/**
+ * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.  Also,
+ * all grace-period processing must have completed.
+ *
+ * "Completed" means that the last synchronize_srcu() and
+ * synchronize_srcu_expedited() calls must have returned before the call
+ * to cleanup_srcu_struct_quiesced().  It also means that the callback
+ * from the last call_srcu() must have been invoked before the call to
+ * cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help
+ * with this last.  Violating these rules will get you a WARN_ON() splat
+ * (with high probability, anyway), and will also cause the srcu_struct
+ * to be leaked.
+ */
+static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp)
+{
+	_cleanup_srcu_struct(sp, true);
+}
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 /**
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 261471f..f41d2fb 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -43,7 +43,7 @@
 
 void srcu_drive_gp(struct work_struct *wp);
 
-#define __SRCU_STRUCT_INIT(name)					\
+#define __SRCU_STRUCT_INIT(name, __ignored)				\
 {									\
 	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
 	.srcu_cb_tail = &name.srcu_cb_head,				\
@@ -56,9 +56,9 @@
  * Tree SRCU, which needs some per-CPU data.
  */
 #define DEFINE_SRCU(name) \
-	struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+	struct srcu_struct name = __SRCU_STRUCT_INIT(name, name)
 #define DEFINE_STATIC_SRCU(name) \
-	static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+	static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name)
 
 void synchronize_srcu(struct srcu_struct *sp);
 
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 4eda108..745d4ca 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -104,9 +104,9 @@
 #define SRCU_STATE_SCAN1	1
 #define SRCU_STATE_SCAN2	2
 
-#define __SRCU_STRUCT_INIT(name)					\
+#define __SRCU_STRUCT_INIT(name, pcpu_name)				\
 	{								\
-		.sda = &name##_srcu_data,				\
+		.sda = &pcpu_name,					\
 		.lock = __SPIN_LOCK_UNLOCKED(name.lock),		\
 		.srcu_gp_seq_needed = 0 - 1,				\
 		__SRCU_DEP_MAP_INIT(name)				\
@@ -133,7 +133,7 @@
  */
 #define __DEFINE_SRCU(name, is_static)					\
 	static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\
-	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
+	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_data)
 #define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */)
 #define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static)
 
diff --git a/include/linux/string.h b/include/linux/string.h
index dd39a69..4a5a0eb 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -147,8 +147,8 @@
 extern void * memchr(const void *,int,__kernel_size_t);
 #endif
 #ifndef __HAVE_ARCH_MEMCPY_MCSAFE
-static inline __must_check int memcpy_mcsafe(void *dst, const void *src,
-		size_t cnt)
+static inline __must_check unsigned long memcpy_mcsafe(void *dst,
+		const void *src, size_t cnt)
 {
 	memcpy(dst, src, cnt);
 	return 0;
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index a5704da..e90b9bd 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -122,8 +122,6 @@
 					   struct cache_detail *);
 extern void rpc_remove_cache_dir(struct dentry *);
 
-extern int rpc_rmdir(struct dentry *dentry);
-
 struct rpc_pipe *rpc_mkpipe_data(const struct rpc_pipe_ops *ops, int flags);
 void rpc_destroy_pipe_data(struct rpc_pipe *pipe);
 extern struct dentry *rpc_mkpipe_dentry(struct dentry *, const char *, void *,
diff --git a/include/linux/swait.h b/include/linux/swait.h
index c98aaf6..bf8cb0d 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -5,10 +5,23 @@
 #include <linux/list.h>
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
+#include <linux/wait.h>
 #include <asm/current.h>
 
 /*
- * Simple wait queues
+ * BROKEN wait-queues.
+ *
+ * These "simple" wait-queues are broken garbage, and should never be
+ * used. The comments below claim that they are "similar" to regular
+ * wait-queues, but the semantics are actually completely different, and
+ * every single user we have ever had has been buggy (or pointless).
+ *
+ * A "swake_up()" only wakes up _one_ waiter, which is not at all what
+ * "wake_up()" does, and has led to problems. In other cases, it has
+ * been fine, because there's only ever one waiter (kvm), but in that
+ * case gthe whole "simple" wait-queue is just pointless to begin with,
+ * since there is no "queue". Use "wake_up_process()" with a direct
+ * pointer instead.
  *
  * While these are very similar to regular wait queues (wait.h) the most
  * important difference is that the simple waitqueue allows for deterministic
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2417d28..c063443 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -53,7 +53,7 @@
 
 /*
  * Unaddressable device memory support. See include/linux/hmm.h and
- * Documentation/vm/hmm.txt. Short description is we need struct pages for
+ * Documentation/vm/hmm.rst. Short description is we need struct pages for
  * device memory that is unaddressable (inaccessible) by CPU, so that we can
  * migrate part of a process memory to device memory.
  *
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 70fcda1..390e814 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -290,6 +290,12 @@
 				long nr,
 				struct io_event __user *events,
 				struct timespec __user *timeout);
+asmlinkage long sys_io_pgetevents(aio_context_t ctx_id,
+				long min_nr,
+				long nr,
+				struct io_event __user *events,
+				struct timespec __user *timeout,
+				const struct __aio_sigset *sig);
 
 /* fs/xattr.c */
 asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
@@ -536,7 +542,8 @@
 				    size_t len);
 
 /* kernel/hrtimer.c */
-asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp);
+asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
+			      struct __kernel_timespec __user *rmtp);
 
 /* kernel/itimer.c */
 asmlinkage long sys_getitimer(int which, struct itimerval __user *value);
@@ -567,14 +574,14 @@
 				struct itimerspec __user *old_setting);
 asmlinkage long sys_timer_delete(timer_t timer_id);
 asmlinkage long sys_clock_settime(clockid_t which_clock,
-				const struct timespec __user *tp);
+				const struct __kernel_timespec __user *tp);
 asmlinkage long sys_clock_gettime(clockid_t which_clock,
-				struct timespec __user *tp);
+				struct __kernel_timespec __user *tp);
 asmlinkage long sys_clock_getres(clockid_t which_clock,
-				struct timespec __user *tp);
+				struct __kernel_timespec __user *tp);
 asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags,
-				const struct timespec __user *rqtp,
-				struct timespec __user *rmtp);
+				const struct __kernel_timespec __user *rqtp,
+				struct __kernel_timespec __user *rmtp);
 
 /* kernel/printk.c */
 asmlinkage long sys_syslog(int type, char __user *buf, int len);
@@ -679,8 +686,8 @@
 /* ipc/mqueue.c */
 asmlinkage long sys_mq_open(const char __user *name, int oflag, umode_t mode, struct mq_attr __user *attr);
 asmlinkage long sys_mq_unlink(const char __user *name);
-asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec __user *abs_timeout);
-asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout);
+asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct __kernel_timespec __user *abs_timeout);
+asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct __kernel_timespec __user *abs_timeout);
 asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification);
 asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat);
 
@@ -697,7 +704,7 @@
 asmlinkage long sys_semctl(int semid, int semnum, int cmd, unsigned long arg);
 asmlinkage long sys_semtimedop(int semid, struct sembuf __user *sops,
 				unsigned nsops,
-				const struct timespec __user *timeout);
+				const struct __kernel_timespec __user *timeout);
 asmlinkage long sys_semop(int semid, struct sembuf __user *sops,
 				unsigned nsops);
 
diff --git a/include/linux/time.h b/include/linux/time.h
index 4b62a2c..aed7446 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -10,9 +10,9 @@
 extern struct timezone sys_tz;
 
 int get_timespec64(struct timespec64 *ts,
-		const struct timespec __user *uts);
+		const struct __kernel_timespec __user *uts);
 int put_timespec64(const struct timespec64 *ts,
-		struct timespec __user *uts);
+		struct __kernel_timespec __user *uts);
 int get_itimerspec64(struct itimerspec64 *it,
 			const struct itimerspec __user *uit);
 int put_itimerspec64(const struct itimerspec64 *it,
diff --git a/include/linux/time32.h b/include/linux/time32.h
index d2bcd43..0b14f93 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -18,25 +18,14 @@
 /* timespec64 is defined as timespec here */
 static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64)
 {
-	return ts64;
+	return *(const struct timespec *)&ts64;
 }
 
 static inline struct timespec64 timespec_to_timespec64(const struct timespec ts)
 {
-	return ts;
+	return *(const struct timespec64 *)&ts;
 }
 
-# define timespec_equal			timespec64_equal
-# define timespec_compare		timespec64_compare
-# define set_normalized_timespec	set_normalized_timespec64
-# define timespec_add			timespec64_add
-# define timespec_sub			timespec64_sub
-# define timespec_valid			timespec64_valid
-# define timespec_valid_strict		timespec64_valid_strict
-# define timespec_to_ns			timespec64_to_ns
-# define ns_to_timespec			ns_to_timespec64
-# define timespec_add_ns		timespec64_add_ns
-
 #else
 static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64)
 {
@@ -55,6 +44,7 @@
 	ret.tv_nsec = ts.tv_nsec;
 	return ret;
 }
+#endif
 
 static inline int timespec_equal(const struct timespec *a,
 				 const struct timespec *b)
@@ -159,8 +149,6 @@
 	a->tv_nsec = ns;
 }
 
-#endif
-
 /**
  * time_to_tm - converts the calendar time to local broken-down time
  *
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 93d3949..0a7b2f7 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -2,17 +2,20 @@
 #ifndef _LINUX_TIME64_H
 #define _LINUX_TIME64_H
 
-#include <uapi/linux/time.h>
 #include <linux/math64.h>
 
 typedef __s64 time64_t;
 typedef __u64 timeu64_t;
 
-#if __BITS_PER_LONG == 64
-/* this trick allows us to optimize out timespec64_to_timespec */
-# define timespec64 timespec
-#define itimerspec64 itimerspec
-#else
+/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path
+ * and 32-bit emulation.
+ */
+#ifndef CONFIG_64BIT_TIME
+#define __kernel_timespec timespec
+#endif
+
+#include <uapi/linux/time.h>
+
 struct timespec64 {
 	time64_t	tv_sec;			/* seconds */
 	long		tv_nsec;		/* nanoseconds */
@@ -23,8 +26,6 @@
 	struct timespec64 it_value;
 };
 
-#endif
-
 /* Parameters used to convert the timespec values: */
 #define MSEC_PER_SEC	1000L
 #define USEC_PER_MSEC	1000L
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 588a0e4..86bc202 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -19,27 +19,25 @@
 extern int do_settimeofday64(const struct timespec64 *ts);
 extern int do_sys_settimeofday64(const struct timespec64 *tv,
 				 const struct timezone *tz);
-/*
- * Kernel time accessors
- */
-struct timespec64 current_kernel_time64(void);
 
 /*
  * timespec64 based interfaces
  */
-struct timespec64 get_monotonic_coarse64(void);
-extern void getrawmonotonic64(struct timespec64 *ts);
+extern void ktime_get_raw_ts64(struct timespec64 *ts);
 extern void ktime_get_ts64(struct timespec64 *ts);
+extern void ktime_get_real_ts64(struct timespec64 *tv);
+extern void ktime_get_coarse_ts64(struct timespec64 *ts);
+extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
+
+void getboottime64(struct timespec64 *ts);
+
+/*
+ * time64_t base interfaces
+ */
 extern time64_t ktime_get_seconds(void);
 extern time64_t __ktime_get_real_seconds(void);
 extern time64_t ktime_get_real_seconds(void);
 
-extern int __getnstimeofday64(struct timespec64 *tv);
-extern void getnstimeofday64(struct timespec64 *tv);
-extern void getboottime64(struct timespec64 *ts);
-
-#define ktime_get_real_ts64(ts)	getnstimeofday64(ts)
-
 /*
  * ktime_t based interfaces
  */
@@ -53,6 +51,7 @@
 
 extern ktime_t ktime_get(void);
 extern ktime_t ktime_get_with_offset(enum tk_offsets offs);
+extern ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs);
 extern ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs);
 extern ktime_t ktime_get_raw(void);
 extern u32 ktime_get_resolution_ns(void);
@@ -65,6 +64,11 @@
 	return ktime_get_with_offset(TK_OFFS_REAL);
 }
 
+static inline ktime_t ktime_get_coarse_real(void)
+{
+	return ktime_get_coarse_with_offset(TK_OFFS_REAL);
+}
+
 /**
  * ktime_get_boottime - Returns monotonic time since boot in ktime_t format
  *
@@ -76,6 +80,11 @@
 	return ktime_get_with_offset(TK_OFFS_BOOT);
 }
 
+static inline ktime_t ktime_get_coarse_boottime(void)
+{
+	return ktime_get_coarse_with_offset(TK_OFFS_BOOT);
+}
+
 /**
  * ktime_get_clocktai - Returns the TAI time of day in ktime_t format
  */
@@ -84,6 +93,11 @@
 	return ktime_get_with_offset(TK_OFFS_TAI);
 }
 
+static inline ktime_t ktime_get_coarse_clocktai(void)
+{
+	return ktime_get_coarse_with_offset(TK_OFFS_TAI);
+}
+
 /**
  * ktime_mono_to_real - Convert monotonic time to clock realtime
  */
@@ -123,18 +137,40 @@
 extern u64 ktime_get_real_fast_ns(void);
 
 /*
- * timespec64 interfaces utilizing the ktime based ones
+ * timespec64/time64_t interfaces utilizing the ktime based ones
+ * for API completeness, these could be implemented more efficiently
+ * if needed.
  */
-static inline void get_monotonic_boottime64(struct timespec64 *ts)
+static inline void ktime_get_boottime_ts64(struct timespec64 *ts)
 {
 	*ts = ktime_to_timespec64(ktime_get_boottime());
 }
 
-static inline void timekeeping_clocktai64(struct timespec64 *ts)
+static inline void ktime_get_coarse_boottime_ts64(struct timespec64 *ts)
+{
+	*ts = ktime_to_timespec64(ktime_get_coarse_boottime());
+}
+
+static inline time64_t ktime_get_boottime_seconds(void)
+{
+	return ktime_divns(ktime_get_coarse_boottime(), NSEC_PER_SEC);
+}
+
+static inline void ktime_get_clocktai_ts64(struct timespec64 *ts)
 {
 	*ts = ktime_to_timespec64(ktime_get_clocktai());
 }
 
+static inline void ktime_get_coarse_clocktai_ts64(struct timespec64 *ts)
+{
+	*ts = ktime_to_timespec64(ktime_get_coarse_clocktai());
+}
+
+static inline time64_t ktime_get_clocktai_seconds(void)
+{
+	return ktime_divns(ktime_get_coarse_clocktai(), NSEC_PER_SEC);
+}
+
 /*
  * RTC specific
  */
@@ -210,5 +246,30 @@
 extern void read_boot_clock64(struct timespec64 *ts);
 extern int update_persistent_clock64(struct timespec64 now);
 
+/*
+ * deprecated aliases, don't use in new code
+ */
+#define getnstimeofday64(ts)		ktime_get_real_ts64(ts)
+#define get_monotonic_boottime64(ts)	ktime_get_boottime_ts64(ts)
+#define getrawmonotonic64(ts)		ktime_get_raw_ts64(ts)
+#define timekeeping_clocktai64(ts)	ktime_get_clocktai_ts64(ts)
+
+static inline struct timespec64 current_kernel_time64(void)
+{
+	struct timespec64 ts;
+
+	ktime_get_coarse_real_ts64(&ts);
+
+	return ts;
+}
+
+static inline struct timespec64 get_monotonic_coarse64(void)
+{
+	struct timespec64 ts;
+
+	ktime_get_coarse_ts64(&ts);
+
+	return ts;
+}
 
 #endif
diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h
index 3616b4b..8762c2f 100644
--- a/include/linux/timekeeping32.h
+++ b/include/linux/timekeeping32.h
@@ -11,55 +11,13 @@
 
 static inline struct timespec current_kernel_time(void)
 {
-	struct timespec64 now = current_kernel_time64();
+	struct timespec64 ts64;
 
-	return timespec64_to_timespec(now);
+	ktime_get_coarse_real_ts64(&ts64);
+
+	return timespec64_to_timespec(ts64);
 }
 
-#if BITS_PER_LONG == 64
-/**
- * Deprecated. Use do_settimeofday64().
- */
-static inline int do_settimeofday(const struct timespec *ts)
-{
-	return do_settimeofday64(ts);
-}
-
-static inline int __getnstimeofday(struct timespec *ts)
-{
-	return __getnstimeofday64(ts);
-}
-
-static inline void getnstimeofday(struct timespec *ts)
-{
-	getnstimeofday64(ts);
-}
-
-static inline void ktime_get_ts(struct timespec *ts)
-{
-	ktime_get_ts64(ts);
-}
-
-static inline void ktime_get_real_ts(struct timespec *ts)
-{
-	getnstimeofday64(ts);
-}
-
-static inline void getrawmonotonic(struct timespec *ts)
-{
-	getrawmonotonic64(ts);
-}
-
-static inline struct timespec get_monotonic_coarse(void)
-{
-	return get_monotonic_coarse64();
-}
-
-static inline void getboottime(struct timespec *ts)
-{
-	return getboottime64(ts);
-}
-#else
 /**
  * Deprecated. Use do_settimeofday64().
  */
@@ -71,20 +29,11 @@
 	return do_settimeofday64(&ts64);
 }
 
-static inline int __getnstimeofday(struct timespec *ts)
-{
-	struct timespec64 ts64;
-	int ret = __getnstimeofday64(&ts64);
-
-	*ts = timespec64_to_timespec(ts64);
-	return ret;
-}
-
 static inline void getnstimeofday(struct timespec *ts)
 {
 	struct timespec64 ts64;
 
-	getnstimeofday64(&ts64);
+	ktime_get_real_ts64(&ts64);
 	*ts = timespec64_to_timespec(ts64);
 }
 
@@ -100,7 +49,7 @@
 {
 	struct timespec64 ts64;
 
-	getnstimeofday64(&ts64);
+	ktime_get_real_ts64(&ts64);
 	*ts = timespec64_to_timespec(ts64);
 }
 
@@ -108,13 +57,17 @@
 {
 	struct timespec64 ts64;
 
-	getrawmonotonic64(&ts64);
+	ktime_get_raw_ts64(&ts64);
 	*ts = timespec64_to_timespec(ts64);
 }
 
 static inline struct timespec get_monotonic_coarse(void)
 {
-	return timespec64_to_timespec(get_monotonic_coarse64());
+	struct timespec64 ts64;
+
+	ktime_get_coarse_ts64(&ts64);
+
+	return timespec64_to_timespec(ts64);
 }
 
 static inline void getboottime(struct timespec *ts)
@@ -124,7 +77,6 @@
 	getboottime64(&ts64);
 	*ts = timespec64_to_timespec(ts64);
 }
-#endif
 
 /*
  * Timespec interfaces utilizing the ktime based ones
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index d34144a..78a010e 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -435,8 +435,7 @@
 		    struct ring_buffer_event *event);
 extern void
 event_triggers_post_call(struct trace_event_file *file,
-			 enum event_trigger_type tt,
-			 void *rec, struct ring_buffer_event *event);
+			 enum event_trigger_type tt);
 
 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
 
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 26c1521..4a88419 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -124,6 +124,7 @@
 {
 	if (step) {
 		siginfo_t info;
+		clear_siginfo(&info);
 		user_single_step_siginfo(current, regs, &info);
 		force_sig_info(SIGTRAP, &info, current);
 		return;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 1dd587ba..c56e397 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -10,6 +10,7 @@
 #include <linux/tty_ldisc.h>
 #include <linux/mutex.h>
 #include <linux/tty_flags.h>
+#include <linux/seq_file.h>
 #include <uapi/linux/tty.h>
 #include <linux/rwsem.h>
 #include <linux/llist.h>
@@ -527,7 +528,7 @@
 }
 
 extern void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old);
-extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b);
+extern int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b);
 extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt);
 
 extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *);
@@ -535,7 +536,7 @@
 extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *);
 extern void tty_ldisc_hangup(struct tty_struct *tty, bool reset);
 extern int tty_ldisc_reinit(struct tty_struct *tty, int disc);
-extern const struct file_operations tty_ldiscs_proc_fops;
+extern const struct seq_operations tty_ldiscs_seq_ops;
 
 extern void tty_wakeup(struct tty_struct *tty);
 extern void tty_ldisc_flush(struct tty_struct *tty);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 31c2b5b..71dbc89 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -293,7 +293,7 @@
 	int (*poll_get_char)(struct tty_driver *driver, int line);
 	void (*poll_put_char)(struct tty_driver *driver, int line, char ch);
 #endif
-	const struct file_operations *proc_fops;
+	int (*proc_show)(struct seq_file *, void *);
 } __randomize_layout;
 
 struct tty_driver {
diff --git a/include/linux/uio.h b/include/linux/uio.h
index e67e12a..f5766e8 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -154,6 +154,12 @@
 #define _copy_from_iter_flushcache _copy_from_iter_nocache
 #endif
 
+#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
+size_t _copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i);
+#else
+#define _copy_to_iter_mcsafe _copy_to_iter
+#endif
+
 static __always_inline __must_check
 size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 {
@@ -163,6 +169,15 @@
 		return _copy_from_iter_flushcache(addr, bytes, i);
 }
 
+static __always_inline __must_check
+size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
+{
+	if (unlikely(!check_copy_size(addr, bytes, false)))
+		return 0;
+	else
+		return _copy_to_iter_mcsafe(addr, bytes, i);
+}
+
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 3c85c81..6c5f207 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -14,6 +14,7 @@
 #ifndef _UIO_DRIVER_H_
 #define _UIO_DRIVER_H_
 
+#include <linux/device.h>
 #include <linux/fs.h>
 #include <linux/interrupt.h>
 
@@ -68,12 +69,13 @@
 
 struct uio_device {
         struct module           *owner;
-        struct device           *dev;
+	struct device		dev;
         int                     minor;
         atomic_t                event;
         struct fasync_struct    *async_queue;
         wait_queue_head_t       wait;
         struct uio_info         *info;
+	spinlock_t		info_lock;
         struct kobject          *map_dir;
         struct kobject          *portio_dir;
 };
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 0173597..4cdd515 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -490,6 +490,16 @@
 };
 
 /*
+ * USB port quirks.
+ */
+
+/* For the given port, prefer the old (faster) enumeration scheme. */
+#define USB_PORT_QUIRK_OLD_SCHEME	BIT(0)
+
+/* Decrease TRSTRCY to 10ms during device enumeration. */
+#define USB_PORT_QUIRK_FAST_ENUM	BIT(1)
+
+/*
  * USB 2.0 Link Power Management (LPM) parameters.
  */
 struct usb2_lpm_parameters {
@@ -551,6 +561,8 @@
  * @route: tree topology hex string for use with xHCI
  * @state: device state: configured, not attached, etc.
  * @speed: device speed: high/full/low (or error)
+ * @rx_lanes: number of rx lanes in use, USB 3.2 adds dual-lane support
+ * @tx_lanes: number of tx lanes in use, USB 3.2 adds dual-lane support
  * @tt: Transaction Translator info; used with low/full speed dev, highspeed hub
  * @ttport: device port on that tt hub
  * @toggle: one bit for each endpoint, with ([0] = IN, [1] = OUT) endpoints
@@ -624,6 +636,8 @@
 	u32		route;
 	enum usb_device_state	state;
 	enum usb_device_speed	speed;
+	unsigned int		rx_lanes;
+	unsigned int		tx_lanes;
 
 	struct usb_tt	*tt;
 	int		ttport;
diff --git a/include/linux/usb/atmel_usba_udc.h b/include/linux/usb/atmel_usba_udc.h
deleted file mode 100644
index 9bb00df..0000000
--- a/include/linux/usb/atmel_usba_udc.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Platform data definitions for Atmel USBA gadget driver.
- */
-#ifndef __LINUX_USB_USBA_H
-#define __LINUX_USB_USBA_H
-
-struct usba_ep_data {
-	char	*name;
-	int	index;
-	int	fifo_size;
-	int	nr_banks;
-	int	can_dma;
-	int	can_isoc;
-};
-
-struct usba_platform_data {
-	int			vbus_pin;
-	int			vbus_pin_inverted;
-	int			num_ep;
-	struct usba_ep_data	ep[0];
-};
-
-#endif /* __LINUX_USB_USBA_H */
diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h
index aaafecf..ba4b3e3 100644
--- a/include/linux/usb/audio-v2.h
+++ b/include/linux/usb/audio-v2.h
@@ -94,7 +94,7 @@
 	__u8 bClockID;
 	__u8 bNrInPins;
 	__u8 baCSourceID[];
-	/* bmControls, bAssocTerminal and iClockSource omitted */
+	/* bmControls and iClockSource omitted */
 } __attribute__((packed));
 
 /* 4.7.2.3 Clock Multiplier Descriptor */
@@ -189,6 +189,13 @@
 #define UAC2_CONTROL_DATA_OVERRUN	(3 << 2)
 #define UAC2_CONTROL_DATA_UNDERRUN	(3 << 4)
 
+/* 5.2.5.4.2 Connector Control Parameter Block */
+struct uac2_connectors_ctl_blk {
+	__u8 bNrChannels;
+	__le32 bmChannelConfig;
+	__u8 iChannelNames;
+} __attribute__((packed));
+
 /* 6.1 Interrupt Data Message */
 
 #define UAC2_INTERRUPT_DATA_MSG_VENDOR	(1 << 0)
diff --git a/include/linux/usb/audio-v3.h b/include/linux/usb/audio-v3.h
index a8959aa..a710e28 100644
--- a/include/linux/usb/audio-v3.h
+++ b/include/linux/usb/audio-v3.h
@@ -221,6 +221,12 @@
 	__le16 wLockDelay;
 } __attribute__((packed));
 
+/* 5.2.1.6.1 INSERTION CONTROL PARAMETER BLOCK */
+struct uac3_insertion_ctl_blk {
+	__u8 bSize;
+	__u8 bmConInserted;
+} __attribute__ ((packed));
+
 /* 6.1 INTERRUPT DATA MESSAGE */
 struct uac3_interrupt_data_msg {
 	__u8 bInfo;
@@ -392,4 +398,38 @@
 #define UAC3_AC_ACTIVE_INTERFACE_CONTROL	0x01
 #define UAC3_AC_POWER_DOMAIN_CONTROL		0x02
 
+/* A.23.5 TERMINAL CONTROL SELECTORS */
+#define UAC3_TE_UNDEFINED			0x00
+#define UAC3_TE_INSERTION			0x01
+#define UAC3_TE_OVERLOAD			0x02
+#define UAC3_TE_UNDERFLOW			0x03
+#define UAC3_TE_OVERFLOW			0x04
+#define UAC3_TE_LATENCY 			0x05
+
+/* BADD predefined Unit/Terminal values */
+#define UAC3_BADD_IT_ID1	1  /* Input Terminal ID1: bTerminalID = 1 */
+#define UAC3_BADD_FU_ID2	2  /* Feature Unit ID2: bUnitID = 2 */
+#define UAC3_BADD_OT_ID3	3  /* Output Terminal ID3: bTerminalID = 3 */
+#define UAC3_BADD_IT_ID4	4  /* Input Terminal ID4: bTerminalID = 4 */
+#define UAC3_BADD_FU_ID5	5  /* Feature Unit ID5: bUnitID = 5 */
+#define UAC3_BADD_OT_ID6	6  /* Output Terminal ID6: bTerminalID = 6 */
+#define UAC3_BADD_FU_ID7	7  /* Feature Unit ID7: bUnitID = 7 */
+#define UAC3_BADD_MU_ID8	8  /* Mixer Unit ID8: bUnitID = 8 */
+#define UAC3_BADD_CS_ID9	9  /* Clock Source Entity ID9: bClockID = 9 */
+#define UAC3_BADD_PD_ID10	10 /* Power Domain ID10: bPowerDomainID = 10 */
+#define UAC3_BADD_PD_ID11	11 /* Power Domain ID11: bPowerDomainID = 11 */
+
+/* BADD wMaxPacketSize of AS endpoints */
+#define UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_16		0x0060
+#define UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_16		0x0062
+#define UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_24		0x0090
+#define UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_24		0x0093
+#define UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_16		0x00C0
+#define UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_16		0x00C4
+#define UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_24		0x0120
+#define UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_24		0x0126
+
+/* BADD sample rate is always fixed to 48kHz */
+#define UAC3_BADD_SAMPLING_RATE				48000
+
 #endif /* __LINUX_USB_AUDIO_V3_H */
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 847f423..e5cd84a 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -763,7 +763,7 @@
 };
 
 /* put descriptor for string with that id into buf (buflen >= 256) */
-int usb_gadget_get_string(struct usb_gadget_strings *table, int id, u8 *buf);
+int usb_gadget_get_string(const struct usb_gadget_strings *table, int id, u8 *buf);
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index aef50cb..34a6ded 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -150,7 +150,6 @@
 	unsigned		rh_pollable:1;	/* may we poll the root hub? */
 	unsigned		msix_enabled:1;	/* driver has MSI-X enabled? */
 	unsigned		msi_enabled:1;	/* driver has MSI enabled? */
-	unsigned		remove_phy:1;	/* auto-remove USB phy */
 	/*
 	 * do not manage the PHY state in the HCD core, instead let the driver
 	 * handle this (for example if the PHY can only be turned on after a
@@ -261,6 +260,7 @@
 #define	HCD_USB25	0x0030		/* Wireless USB 1.0 (USB 2.5)*/
 #define	HCD_USB3	0x0040		/* USB 3.0 */
 #define	HCD_USB31	0x0050		/* USB 3.1 */
+#define	HCD_USB32	0x0060		/* USB 3.2 */
 #define	HCD_MASK	0x0070
 #define	HCD_BH		0x0100		/* URB complete in BH context */
 
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index 9eb908a..fc6c779 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -67,28 +67,13 @@
 	/* MUSB configuration-specific details */
 	unsigned	multipoint:1;	/* multipoint device */
 	unsigned	dyn_fifo:1 __deprecated; /* supports dynamic fifo sizing */
-	unsigned	soft_con:1 __deprecated; /* soft connect required */
-	unsigned	utm_16:1 __deprecated; /* utm data witdh is 16 bits */
-	unsigned	big_endian:1;	/* true if CPU uses big-endian */
-	unsigned	mult_bulk_tx:1;	/* Tx ep required for multbulk pkts */
-	unsigned	mult_bulk_rx:1;	/* Rx ep required for multbulk pkts */
-	unsigned	high_iso_tx:1;	/* Tx ep required for HB iso */
-	unsigned	high_iso_rx:1;	/* Rx ep required for HD iso */
-	unsigned	dma:1 __deprecated; /* supports DMA */
-	unsigned	vendor_req:1 __deprecated; /* vendor registers required */
 
 	/* need to explicitly de-assert the port reset after resume? */
 	unsigned	host_port_deassert_reset_at_resume:1;
 
 	u8		num_eps;	/* number of endpoints _with_ ep0 */
-	u8		dma_channels __deprecated; /* number of dma channels */
-	u8		dyn_fifo_size;	/* dynamic size in bytes */
-	u8		vendor_ctrl __deprecated; /* vendor control reg width */
-	u8		vendor_stat __deprecated; /* vendor status reg witdh */
-	u8		dma_req_chan __deprecated; /* bitmask for required dma channels */
 	u8		ram_bits;	/* ram address size */
 
-	struct musb_hdrc_eps_bits *eps_bits __deprecated;
 	u32		maximum_speed;
 };
 
diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h
index ff359bdf..09b570f 100644
--- a/include/linux/usb/pd.h
+++ b/include/linux/usb/pd.h
@@ -103,8 +103,8 @@
 	 (((cnt) & PD_HEADER_CNT_MASK) << PD_HEADER_CNT_SHIFT) |	\
 	 ((ext_hdr) ? PD_HEADER_EXT_HDR : 0))
 
-#define PD_HEADER_LE(type, pwr, data, id, cnt) \
-	cpu_to_le16(PD_HEADER((type), (pwr), (data), PD_REV20, (id), (cnt), (0)))
+#define PD_HEADER_LE(type, pwr, data, rev, id, cnt) \
+	cpu_to_le16(PD_HEADER((type), (pwr), (data), (rev), (id), (cnt), (0)))
 
 static inline unsigned int pd_header_cnt(u16 header)
 {
diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h
index b7a2625..e4de6bc 100644
--- a/include/linux/usb/phy.h
+++ b/include/linux/usb/phy.h
@@ -157,22 +157,6 @@
 	enum usb_charger_type (*charger_detect)(struct usb_phy *x);
 };
 
-/**
- * struct usb_phy_bind - represent the binding for the phy
- * @dev_name: the device name of the device that will bind to the phy
- * @phy_dev_name: the device name of the phy
- * @index: used if a single controller uses multiple phys
- * @phy: reference to the phy
- * @list: to maintain a linked list of the binding information
- */
-struct usb_phy_bind {
-	const char	*dev_name;
-	const char	*phy_dev_name;
-	u8		index;
-	struct usb_phy	*phy;
-	struct list_head list;
-};
-
 /* for board-specific init logic */
 extern int usb_add_phy(struct usb_phy *, enum usb_phy_type type);
 extern int usb_add_phy_dev(struct usb_phy *);
@@ -234,16 +218,12 @@
 extern struct usb_phy *usb_get_phy(enum usb_phy_type type);
 extern struct usb_phy *devm_usb_get_phy(struct device *dev,
 	enum usb_phy_type type);
-extern struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index);
-extern struct usb_phy *devm_usb_get_phy_dev(struct device *dev, u8 index);
 extern struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev,
 	const char *phandle, u8 index);
 extern struct usb_phy *devm_usb_get_phy_by_node(struct device *dev,
 	struct device_node *node, struct notifier_block *nb);
 extern void usb_put_phy(struct usb_phy *);
 extern void devm_usb_put_phy(struct device *dev, struct usb_phy *x);
-extern int usb_bind_phy(const char *dev_name, u8 index,
-				const char *phy_dev_name);
 extern void usb_phy_set_event(struct usb_phy *x, unsigned long event);
 extern void usb_phy_set_charger_current(struct usb_phy *usb_phy,
 					unsigned int mA);
@@ -263,16 +243,6 @@
 	return ERR_PTR(-ENXIO);
 }
 
-static inline struct usb_phy *usb_get_phy_dev(struct device *dev, u8 index)
-{
-	return ERR_PTR(-ENXIO);
-}
-
-static inline struct usb_phy *devm_usb_get_phy_dev(struct device *dev, u8 index)
-{
-	return ERR_PTR(-ENXIO);
-}
-
 static inline struct usb_phy *devm_usb_get_phy_by_phandle(struct device *dev,
 	const char *phandle, u8 index)
 {
@@ -293,12 +263,6 @@
 {
 }
 
-static inline int usb_bind_phy(const char *dev_name, u8 index,
-				const char *phy_dev_name)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline void usb_phy_set_event(struct usb_phy *x, unsigned long event)
 {
 }
diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h
index f0d839d..b231b93 100644
--- a/include/linux/usb/tcpm.h
+++ b/include/linux/usb/tcpm.h
@@ -36,6 +36,7 @@
 /* Time to wait for TCPC to complete transmit */
 #define PD_T_TCPC_TX_TIMEOUT	100		/* in ms	*/
 #define PD_ROLE_SWAP_TIMEOUT	(MSEC_PER_SEC * 10)
+#define PD_PPS_CTRL_TIMEOUT	(MSEC_PER_SEC * 10)
 
 enum tcpm_transmit_status {
 	TCPC_TX_SUCCESS = 0,
@@ -62,9 +63,6 @@
  * @snk_pdo:	PDO parameters sent to partner as response to
  *		PD_CTRL_GET_SINK_CAP message
  * @nr_snk_pdo:	Number of entries in @snk_pdo
- * @max_snk_mv:	Maximum acceptable sink voltage in mV
- * @max_snk_ma:	Maximum sink current in mA
- * @max_snk_mw:	Maximum required sink power in mW
  * @operating_snk_mw:
  *		Required operating sink power in mW
  * @type:	Port type (TYPEC_PORT_DFP, TYPEC_PORT_UFP, or
@@ -85,9 +83,6 @@
 	const u32 *snk_vdo;
 	unsigned int nr_snk_vdo;
 
-	unsigned int max_snk_mv;
-	unsigned int max_snk_ma;
-	unsigned int max_snk_mw;
 	unsigned int operating_snk_mw;
 
 	enum typec_port_type type;
@@ -174,9 +169,6 @@
 				    unsigned int nr_pdo);
 int tcpm_update_sink_capabilities(struct tcpm_port *port, const u32 *pdo,
 				  unsigned int nr_pdo,
-				  unsigned int max_snk_mv,
-				  unsigned int max_snk_ma,
-				  unsigned int max_snk_mw,
 				  unsigned int operating_snk_mw);
 
 void tcpm_vbus_change(struct tcpm_port *port);
diff --git a/include/linux/usb/tegra_usb_phy.h b/include/linux/usb/tegra_usb_phy.h
index d641ea1..0c5c3ea 100644
--- a/include/linux/usb/tegra_usb_phy.h
+++ b/include/linux/usb/tegra_usb_phy.h
@@ -17,6 +17,7 @@
 #define __TEGRA_USB_PHY_H
 
 #include <linux/clk.h>
+#include <linux/reset.h>
 #include <linux/usb/otg.h>
 
 /*
@@ -76,6 +77,7 @@
 	bool is_legacy_phy;
 	bool is_ulpi_phy;
 	int reset_gpio;
+	struct reset_control *pad_rst;
 };
 
 void tegra_usb_phy_preresume(struct usb_phy *phy);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 1e5d8c3..398e9c9 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -8,6 +8,7 @@
 #include <linux/llist.h>
 #include <asm/page.h>		/* pgprot_t */
 #include <linux/rbtree.h>
+#include <linux/overflow.h>
 
 struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
 struct notifier_block;		/* in notifier.h */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 39a0e21..60d673e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -494,6 +494,7 @@
 extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
 extern void print_worker_info(const char *log_lvl, struct task_struct *task);
 extern void show_workqueue_state(void);
+extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task);
 
 /**
  * queue_work - queue work on a workqueue
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d70f77a4..6dad031 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -46,7 +46,6 @@
 	size_t value_len;
 };
 
-ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
 ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t);
 ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t);
 ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
diff --git a/include/net/ax25.h b/include/net/ax25.h
index c91bc87..3f9aea8 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -15,6 +15,7 @@
 #include <linux/refcount.h>
 #include <net/neighbour.h>
 #include <net/sock.h>
+#include <linux/seq_file.h>
 
 #define	AX25_T1CLAMPLO  		1
 #define	AX25_T1CLAMPHI 			(30 * HZ)
@@ -399,7 +400,7 @@
 /* ax25_route.c */
 void ax25_rt_device_down(struct net_device *);
 int ax25_rt_ioctl(unsigned int, void __user *);
-extern const struct file_operations ax25_route_fops;
+extern const struct seq_operations ax25_rt_seqops;
 ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev);
 int ax25_rt_autobind(ax25_cb *, ax25_address *);
 struct sk_buff *ax25_rt_build_path(struct sk_buff *, ax25_address *,
@@ -455,7 +456,7 @@
 extern int  ax25_uid_policy;
 ax25_uid_assoc *ax25_findbyuid(kuid_t);
 int __must_check ax25_uid_ioctl(int, struct sockaddr_ax25 *);
-extern const struct file_operations ax25_uid_fops;
+extern const struct seq_operations ax25_uid_seqops;
 void ax25_uid_free(void);
 
 /* sysctl_net_ax25.c */
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index ec9d6bc..53ce817 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -271,7 +271,7 @@
 		     int flags);
 int  bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
 			    size_t len, int flags);
-__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events);
 int  bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int  bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
 int  bt_sock_wait_ready(struct sock *sk, unsigned long flags);
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 71c72a9..c518743 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -121,6 +121,21 @@
 #endif
 }
 
+static inline void sock_poll_busy_loop(struct socket *sock, __poll_t events)
+{
+	if (sk_can_busy_loop(sock->sk) &&
+	    events && (events & POLL_BUSY_LOOP)) {
+		/* once, only if requested by syscall */
+		sk_busy_loop(sock->sk, 1);
+	}
+}
+
+/* if this socket can poll_ll, tell the system call */
+static inline __poll_t sock_poll_busy_flag(struct socket *sock)
+{
+	return sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0;
+}
+
 /* used in the NIC receive handler to mark the skb */
 static inline void skb_mark_napi_id(struct sk_buff *skb,
 				    struct napi_struct *napi)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 7897efe..5cba71d 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -427,7 +427,15 @@
 
 int fib6_init(void);
 
-int ipv6_route_open(struct inode *inode, struct file *file);
+struct ipv6_route_iter {
+	struct seq_net_private p;
+	struct fib6_walker w;
+	loff_t skip;
+	struct fib6_table *tbl;
+	int sernum;
+};
+
+extern const struct seq_operations ipv6_route_seq_ops;
 
 int call_fib6_notifier(struct notifier_block *nb, struct net *net,
 		       enum fib_event_type event_type,
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 03f567e..6d6e21d 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -41,18 +41,6 @@
 	return net->ipvs;
 }
 
-/* This one needed for single_open_net since net is stored directly in
- * private not as a struct i.e. seq_file_net can't be used.
- */
-static inline struct net *seq_file_single_net(struct seq_file *seq)
-{
-#ifdef CONFIG_NET_NS
-	return (struct net *)seq->private;
-#else
-	return &init_net;
-#endif
-}
-
 /* Connections' size value needed by ip_vs_ctl.c */
 extern int ip_vs_conn_tab_size;
 
diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index f4c21b5..b0eaeb0 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -153,8 +153,6 @@
 	atomic_t	  autobind_name;
 };
 
-__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
-			    poll_table *wait);
 void iucv_sock_link(struct iucv_sock_list *l, struct sock *s);
 void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s);
 void iucv_accept_enqueue(struct sock *parent, struct sock *sk);
diff --git a/include/net/netrom.h b/include/net/netrom.h
index 0dad2dd..5a0714f 100644
--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <net/sock.h>
 #include <linux/refcount.h>
+#include <linux/seq_file.h>
 
 #define	NR_NETWORK_LEN			15
 #define	NR_TRANSPORT_LEN		5
@@ -216,8 +217,8 @@
 int nr_rt_ioctl(unsigned int, void __user *);
 void nr_link_failed(ax25_cb *, int);
 int nr_route_frame(struct sk_buff *, ax25_cb *);
-extern const struct file_operations nr_nodes_fops;
-extern const struct file_operations nr_neigh_fops;
+extern const struct seq_operations nr_node_seqops;
+extern const struct seq_operations nr_neigh_seqops;
 void nr_rt_free(void);
 
 /* nr_subr.c */
diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index 8639de5..cbee32b 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -56,7 +56,7 @@
 
 #define PN_NO_ADDR	0xff
 
-extern const struct file_operations pn_sock_seq_fops;
-extern const struct file_operations pn_res_seq_fops;
+extern const struct seq_operations pn_sock_seq_ops;
+extern const struct seq_operations pn_res_seq_ops;
 
 #endif
diff --git a/include/net/ping.h b/include/net/ping.h
index 4cd90d6..fd080e0 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -83,20 +83,9 @@
 bool ping_rcv(struct sk_buff *skb);
 
 #ifdef CONFIG_PROC_FS
-struct ping_seq_afinfo {
-	char				*name;
-	sa_family_t			family;
-	const struct file_operations	*seq_fops;
-	const struct seq_operations	seq_ops;
-};
-
-extern const struct file_operations ping_seq_fops;
-
 void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family);
 void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos);
 void ping_seq_stop(struct seq_file *seq, void *v);
-int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo);
-void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo);
 
 int __init ping_proc_init(void);
 void ping_proc_exit(void);
diff --git a/include/net/raw.h b/include/net/raw.h
index 99d26d0..9c9fa98 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -48,7 +48,6 @@
 struct raw_iter_state {
 	struct seq_net_private p;
 	int bucket;
-	struct raw_hashinfo *h;
 };
 
 static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq)
@@ -58,9 +57,6 @@
 void *raw_seq_start(struct seq_file *seq, loff_t *pos);
 void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos);
 void raw_seq_stop(struct seq_file *seq, void *v);
-int raw_seq_open(struct inode *ino, struct file *file,
-		 struct raw_hashinfo *h, const struct seq_operations *ops);
-
 #endif
 
 int raw_hash_sk(struct sock *sk);
diff --git a/include/net/rose.h b/include/net/rose.h
index 04b7268..cf517d3 100644
--- a/include/net/rose.h
+++ b/include/net/rose.h
@@ -200,9 +200,9 @@
 
 /* rose_route.c */
 extern struct rose_neigh *rose_loopback_neigh;
-extern const struct file_operations rose_neigh_fops;
-extern const struct file_operations rose_nodes_fops;
-extern const struct file_operations rose_routes_fops;
+extern const struct seq_operations rose_neigh_seqops;
+extern const struct seq_operations rose_node_seqops;
+extern struct seq_operations rose_route_seqops;
 
 void rose_add_loopback_neigh(void);
 int __must_check rose_add_loopback_node(rose_address *);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 8c2caa3..30b3e2f 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -109,8 +109,7 @@
 int sctp_inet_listen(struct socket *sock, int backlog);
 void sctp_write_space(struct sock *sk);
 void sctp_data_ready(struct sock *sk);
-__poll_t sctp_poll(struct file *file, struct socket *sock,
-		poll_table *wait);
+__poll_t sctp_poll_mask(struct socket *sock, __poll_t events);
 void sctp_sock_rfree(struct sk_buff *skb);
 void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 		    struct sctp_association *asoc);
diff --git a/include/net/sock.h b/include/net/sock.h
index 4f7c584..b3b7541 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1596,8 +1596,6 @@
 int sock_no_socketpair(struct socket *, struct socket *);
 int sock_no_accept(struct socket *, struct socket *, int, bool);
 int sock_no_getname(struct socket *, struct sockaddr *, int);
-__poll_t sock_no_poll(struct file *, struct socket *,
-			  struct poll_table_struct *);
 int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
 int sock_no_listen(struct socket *, int);
 int sock_no_shutdown(struct socket *, int);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 029a51b..0448e7c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -388,8 +388,7 @@
 void tcp_close(struct sock *sk, long timeout);
 void tcp_init_sock(struct sock *sk);
 void tcp_init_transfer(struct sock *sk, int bpf_op);
-__poll_t tcp_poll(struct file *file, struct socket *sock,
-		      struct poll_table_struct *wait);
+__poll_t tcp_poll_mask(struct socket *sock, __poll_t events);
 int tcp_getsockopt(struct sock *sk, int level, int optname,
 		   char __user *optval, int __user *optlen);
 int tcp_setsockopt(struct sock *sk, int level, int optname,
@@ -1755,27 +1754,22 @@
 	TCP_SEQ_STATE_ESTABLISHED,
 };
 
-int tcp_seq_open(struct inode *inode, struct file *file);
+void *tcp_seq_start(struct seq_file *seq, loff_t *pos);
+void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+void tcp_seq_stop(struct seq_file *seq, void *v);
 
 struct tcp_seq_afinfo {
-	char				*name;
 	sa_family_t			family;
-	const struct file_operations	*seq_fops;
-	struct seq_operations		seq_ops;
 };
 
 struct tcp_iter_state {
 	struct seq_net_private	p;
-	sa_family_t		family;
 	enum tcp_seq_states	state;
 	struct sock		*syn_wait_sk;
 	int			bucket, offset, sbucket, num;
 	loff_t			last_pos;
 };
 
-int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
-void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo);
-
 extern struct request_sock_ops tcp_request_sock_ops;
 extern struct request_sock_ops tcp6_request_sock_ops;
 
diff --git a/include/net/udp.h b/include/net/udp.h
index 9289b64..7ba0ed2 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -280,7 +280,7 @@
 int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 int __udp_disconnect(struct sock *sk, int flags);
 int udp_disconnect(struct sock *sk, int flags);
-__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t udp_poll_mask(struct socket *sock, __poll_t events);
 struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 				       netdev_features_t features,
 				       bool is_ipv6);
@@ -412,31 +412,27 @@
 #define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0)
 #endif
 
-/* /proc */
-int udp_seq_open(struct inode *inode, struct file *file);
-
+#ifdef CONFIG_PROC_FS
 struct udp_seq_afinfo {
-	char				*name;
 	sa_family_t			family;
 	struct udp_table		*udp_table;
-	const struct file_operations	*seq_fops;
-	struct seq_operations		seq_ops;
 };
 
 struct udp_iter_state {
 	struct seq_net_private  p;
-	sa_family_t		family;
 	int			bucket;
-	struct udp_table	*udp_table;
 };
 
-#ifdef CONFIG_PROC_FS
-int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo);
-void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo);
+void *udp_seq_start(struct seq_file *seq, loff_t *pos);
+void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+void udp_seq_stop(struct seq_file *seq, void *v);
+
+extern const struct seq_operations udp_seq_ops;
+extern const struct seq_operations udp6_seq_ops;
 
 int udp4_proc_init(void);
 void udp4_proc_exit(void);
-#endif
+#endif /* CONFIG_PROC_FS */
 
 int udpv4_offload_init(void);
 
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 45e75c3..5571228 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -737,7 +737,7 @@
 
 	if (audit_enabled == 0)
 		return NULL;
-	audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC,
+	audit_buf = audit_log_start(audit_context(), GFP_ATOMIC,
 				    AUDIT_MAC_IPSEC_EVENT);
 	if (audit_buf == NULL)
 		return NULL;
@@ -752,7 +752,7 @@
 					    audit_get_loginuid(current) :
 					    INVALID_UID);
 	const unsigned int ses = task_valid ? audit_get_sessionid(current) :
-		(unsigned int) -1;
+		AUDIT_SID_UNSET;
 
 	audit_log_format(audit_buf, " auid=%u ses=%u", auid, ses);
 	audit_log_task_context(audit_buf);
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index a29d308..86a569d 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -148,7 +148,6 @@
 		u8 *pad_buff;
 	} out, in;
 
-	gfp_t alloc_flags;
 	unsigned timeout;
 	unsigned retries;
 	unsigned sense_len;
@@ -202,14 +201,11 @@
  *
  * @osd_dev:    OSD device that holds the scsi-device and default values
  *              that the request is associated with.
- * @gfp:        The allocation flags to use for request allocation, and all
- *              subsequent allocations. This will be stored at
- *              osd_request->alloc_flags, can be changed by user later
  *
  * Allocate osd_request and initialize all members to the
  * default/initial state.
  */
-struct osd_request *osd_start_request(struct osd_dev *od, gfp_t gfp);
+struct osd_request *osd_start_request(struct osd_dev *od);
 
 enum osd_req_options {
 	OSD_REQ_FUA = 0x08,	/* Force Unit Access */
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 12f454c..53b485f 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -307,7 +307,7 @@
 	 * EH_HANDLED:		I fixed the error, please complete the command
 	 * EH_RESET_TIMER:	I need more time, reset the timer and
 	 *			begin counting again
-	 * EH_NOT_HANDLED	Begin normal error recovery
+	 * EH_DONE:		Begin normal error recovery
 	 *
 	 * Status: OPTIONAL
 	 */
diff --git a/include/sound/core.h b/include/sound/core.h
index 5f181b8..36a5934 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -51,7 +51,6 @@
  */
 enum snd_device_type {
 	SNDRV_DEV_LOWLEVEL,
-	SNDRV_DEV_CONTROL,
 	SNDRV_DEV_INFO,
 	SNDRV_DEV_BUS,
 	SNDRV_DEV_CODEC,
@@ -62,6 +61,7 @@
 	SNDRV_DEV_SEQUENCER,
 	SNDRV_DEV_HWDEP,
 	SNDRV_DEV_JACK,
+	SNDRV_DEV_CONTROL,	/* NOTE: this must be the last one */
 };
 
 enum snd_device_state {
diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 5ebcc51..8c1572d 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -1610,7 +1610,7 @@
 	struct snd_pcm_indirect pcm_rec;
 	unsigned int tram_pos;
 	unsigned int tram_shift;
-	struct snd_emu10k1_fx8010_irq *irq;
+	struct snd_emu10k1_fx8010_irq irq;
 };
 
 struct snd_emu10k1_fx8010 {
@@ -1902,7 +1902,7 @@
 					    snd_fx8010_irq_handler_t *handler,
 					    unsigned char gpr_running,
 					    void *private_data,
-					    struct snd_emu10k1_fx8010_irq **r_irq);
+					    struct snd_emu10k1_fx8010_irq *irq);
 int snd_emu10k1_fx8010_unregister_irq_handler(struct snd_emu10k1 *emu,
 					      struct snd_emu10k1_fx8010_irq *irq);
 
diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index 06536e0..c052afc 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -571,4 +571,9 @@
 	return (unsigned long)(ptr - array->list) / array->elem_size;
 }
 
+/* a helper macro to iterate for each snd_array element */
+#define snd_array_for_each(array, idx, ptr) \
+	for ((idx) = 0, (ptr) = (array)->list; (idx) < (array)->used; \
+	     (ptr) = snd_array_elem(array, ++(idx)))
+
 #endif /* __SOUND_HDAUDIO_H */
diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h
index 782d1df..9c3db3d 100644
--- a/include/sound/memalloc.h
+++ b/include/sound/memalloc.h
@@ -34,11 +34,9 @@
 	struct device *dev;		/* generic device */
 };
 
-#ifndef snd_dma_pci_data
 #define snd_dma_pci_data(pci)	(&(pci)->dev)
 #define snd_dma_isa_data()	NULL
 #define snd_dma_continuous_data(x)	((struct device *)(__force unsigned long)(x))
-#endif
 
 
 /*
diff --git a/include/sound/omap-pcm.h b/include/sound/omap-pcm.h
deleted file mode 100644
index c1d2f31..0000000
--- a/include/sound/omap-pcm.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * omap-pcm.h - OMAP PCM driver
- *
- * Copyright (C) 2014 Texas Instruments, Inc.
- *
- * Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#ifndef __OMAP_PCM_H__
-#define __OMAP_PCM_H__
-
-#if IS_ENABLED(CONFIG_SND_OMAP_SOC)
-int omap_pcm_platform_register(struct device *dev);
-#else
-static inline int omap_pcm_platform_register(struct device *dev)
-{
-	return 0;
-}
-#endif /* CONFIG_SND_OMAP_SOC */
-
-#endif /* __OMAP_PCM_H__ */
diff --git a/include/sound/rt5640.h b/include/sound/rt5640.h
deleted file mode 100644
index e3c84b9..0000000
--- a/include/sound/rt5640.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * linux/sound/rt5640.h -- Platform data for RT5640
- *
- * Copyright 2011 Realtek Microelectronics
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __LINUX_SND_RT5640_H
-#define __LINUX_SND_RT5640_H
-
-struct rt5640_platform_data {
-	/* IN1 & IN2 & IN3 can optionally be differential */
-	bool in1_diff;
-	bool in2_diff;
-	bool in3_diff;
-
-	bool dmic_en;
-	bool dmic1_data_pin; /* 0 = IN1P; 1 = GPIO3 */
-	bool dmic2_data_pin; /* 0 = IN1N; 1 = GPIO4 */
-
-	int ldo1_en; /* GPIO for LDO1_EN */
-};
-
-#endif
diff --git a/include/sound/rt5668.h b/include/sound/rt5668.h
new file mode 100644
index 0000000..f907b78
--- /dev/null
+++ b/include/sound/rt5668.h
@@ -0,0 +1,40 @@
+/*
+ * linux/sound/rt5668.h -- Platform data for RT5668
+ *
+ * Copyright 2018 Realtek Microelectronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_SND_RT5668_H
+#define __LINUX_SND_RT5668_H
+
+enum rt5668_dmic1_data_pin {
+	RT5668_DMIC1_NULL,
+	RT5668_DMIC1_DATA_GPIO2,
+	RT5668_DMIC1_DATA_GPIO5,
+};
+
+enum rt5668_dmic1_clk_pin {
+	RT5668_DMIC1_CLK_GPIO1,
+	RT5668_DMIC1_CLK_GPIO3,
+};
+
+enum rt5668_jd_src {
+	RT5668_JD_NULL,
+	RT5668_JD1,
+};
+
+struct rt5668_platform_data {
+
+	int ldo1_en; /* GPIO for LDO1_EN */
+
+	enum rt5668_dmic1_data_pin dmic1_data_pin;
+	enum rt5668_dmic1_clk_pin dmic1_clk_pin;
+	enum rt5668_jd_src jd_src;
+};
+
+#endif
+
diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 8ad1166..e6f8c40 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -170,6 +170,8 @@
 		unsigned int rx_num, unsigned int *rx_slot);
 	int (*set_tristate)(struct snd_soc_dai *dai, int tristate);
 
+	int (*set_sdw_stream)(struct snd_soc_dai *dai,
+			void *stream, int direction);
 	/*
 	 * DAI digital mute - optional.
 	 * Called by soc-core to minimise any pops.
@@ -294,8 +296,8 @@
 	struct snd_soc_dai_driver *driver;
 
 	/* DAI runtime info */
-	unsigned int capture_active:1;		/* stream is in use */
-	unsigned int playback_active:1;		/* stream is in use */
+	unsigned int capture_active;		/* stream usage count */
+	unsigned int playback_active;		/* stream usage count */
 	unsigned int probed:1;
 
 	unsigned int active;
@@ -313,7 +315,6 @@
 	unsigned int sample_bits;
 
 	/* parent platform/codec */
-	struct snd_soc_codec *codec;
 	struct snd_soc_component *component;
 
 	/* CODEC TDM slot masks and params (for fixup) */
@@ -358,4 +359,25 @@
 	return dev_get_drvdata(dai->dev);
 }
 
+/**
+ * snd_soc_dai_set_sdw_stream() - Configures a DAI for SDW stream operation
+ * @dai: DAI
+ * @stream: STREAM
+ * @direction: Stream direction(Playback/Capture)
+ * SoundWire subsystem doesn't have a notion of direction and we reuse
+ * the ASoC stream direction to configure sink/source ports.
+ * Playback maps to source ports and Capture for sink ports.
+ *
+ * This should be invoked with NULL to clear the stream set previously.
+ * Returns 0 on success, a negative error code otherwise.
+ */
+static inline int snd_soc_dai_set_sdw_stream(struct snd_soc_dai *dai,
+				void *stream, int direction)
+{
+	if (dai->driver->ops->set_sdw_stream)
+		return dai->driver->ops->set_sdw_stream(dai, stream, direction);
+	else
+		return -ENOTSUPP;
+}
+
 #endif
diff --git a/include/sound/soc.h b/include/sound/soc.h
index ad266d7..1378dcd 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -401,11 +401,7 @@
 struct snd_soc_pcm_runtime;
 struct snd_soc_dai;
 struct snd_soc_dai_driver;
-struct snd_soc_platform;
 struct snd_soc_dai_link;
-struct snd_soc_platform_driver;
-struct snd_soc_codec;
-struct snd_soc_codec_driver;
 struct snd_soc_component;
 struct snd_soc_component_driver;
 struct soc_enum;
@@ -430,13 +426,6 @@
 	SND_SOC_CARD_CLASS_RUNTIME	= 1,
 };
 
-int snd_soc_codec_set_sysclk(struct snd_soc_codec *codec, int clk_id,
-			     int source, unsigned int freq, int dir);
-int snd_soc_codec_set_pll(struct snd_soc_codec *codec, int pll_id, int source,
-			  unsigned int freq_in, unsigned int freq_out);
-int snd_soc_codec_set_jack(struct snd_soc_codec *codec,
-			   struct snd_soc_jack *jack, void *data);
-
 int snd_soc_register_card(struct snd_soc_card *card);
 int snd_soc_unregister_card(struct snd_soc_card *card);
 int devm_snd_soc_register_card(struct device *dev, struct snd_soc_card *card);
@@ -455,19 +444,6 @@
 }
 #endif
 int snd_soc_poweroff(struct device *dev);
-int snd_soc_register_platform(struct device *dev,
-		const struct snd_soc_platform_driver *platform_drv);
-int devm_snd_soc_register_platform(struct device *dev,
-		const struct snd_soc_platform_driver *platform_drv);
-void snd_soc_unregister_platform(struct device *dev);
-int snd_soc_add_platform(struct device *dev, struct snd_soc_platform *platform,
-		const struct snd_soc_platform_driver *platform_drv);
-void snd_soc_remove_platform(struct snd_soc_platform *platform);
-struct snd_soc_platform *snd_soc_lookup_platform(struct device *dev);
-int snd_soc_register_codec(struct device *dev,
-		const struct snd_soc_codec_driver *codec_drv,
-		struct snd_soc_dai_driver *dai_drv, int num_dai);
-void snd_soc_unregister_codec(struct device *dev);
 int snd_soc_add_component(struct device *dev,
 		struct snd_soc_component *component,
 		const struct snd_soc_component_driver *component_driver,
@@ -482,16 +458,15 @@
 void snd_soc_unregister_component(struct device *dev);
 struct snd_soc_component *snd_soc_lookup_component(struct device *dev,
 						   const char *driver_name);
-int snd_soc_cache_init(struct snd_soc_codec *codec);
-int snd_soc_cache_exit(struct snd_soc_codec *codec);
 
-int snd_soc_platform_read(struct snd_soc_platform *platform,
-					unsigned int reg);
-int snd_soc_platform_write(struct snd_soc_platform *platform,
-					unsigned int reg, unsigned int val);
 int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num);
 #ifdef CONFIG_SND_SOC_COMPRESS
 int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num);
+#else
+static inline int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num)
+{
+	return 0;
+}
 #endif
 
 void snd_soc_disconnect_sync(struct device *dev);
@@ -576,23 +551,7 @@
 }
 #endif
 
-/* codec register bit access */
-int snd_soc_update_bits(struct snd_soc_codec *codec, unsigned int reg,
-				unsigned int mask, unsigned int value);
-int snd_soc_update_bits_locked(struct snd_soc_codec *codec,
-			       unsigned int reg, unsigned int mask,
-			       unsigned int value);
-int snd_soc_test_bits(struct snd_soc_codec *codec, unsigned int reg,
-				unsigned int mask, unsigned int value);
-
 #ifdef CONFIG_SND_SOC_AC97_BUS
-#define snd_soc_alloc_ac97_codec(codec) \
-	snd_soc_alloc_ac97_component(&codec->component)
-#define snd_soc_new_ac97_codec(codec, id, id_mask) \
-	snd_soc_new_ac97_component(&codec->component, id, id_mask)
-#define snd_soc_free_ac97_codec(ac97) \
-	snd_soc_free_ac97_component(ac97)
-
 struct snd_ac97 *snd_soc_alloc_ac97_component(struct snd_soc_component *component);
 struct snd_ac97 *snd_soc_new_ac97_component(struct snd_soc_component *component,
 	unsigned int id, unsigned int id_mask);
@@ -626,10 +585,6 @@
 					       const char *name);
 int snd_soc_add_component_controls(struct snd_soc_component *component,
 	const struct snd_kcontrol_new *controls, unsigned int num_controls);
-int snd_soc_add_codec_controls(struct snd_soc_codec *codec,
-	const struct snd_kcontrol_new *controls, unsigned int num_controls);
-int snd_soc_add_platform_controls(struct snd_soc_platform *platform,
-	const struct snd_kcontrol_new *controls, unsigned int num_controls);
 int snd_soc_add_card_controls(struct snd_soc_card *soc_card,
 	const struct snd_kcontrol_new *controls, int num_controls);
 int snd_soc_add_dai_controls(struct snd_soc_dai *dai,
@@ -862,8 +817,6 @@
 
 	unsigned int active;
 
-	unsigned int ignore_pmdown_time:1; /* pmdown_time is ignored at stop */
-	unsigned int registered_as_component:1;
 	unsigned int suspended:1; /* is in suspend PM state */
 
 	struct list_head list;
@@ -875,9 +828,6 @@
 	struct list_head dai_list;
 	int num_dai;
 
-	int (*read)(struct snd_soc_component *, unsigned int, unsigned int *);
-	int (*write)(struct snd_soc_component *, unsigned int, unsigned int);
-
 	struct regmap *regmap;
 	int val_bytes;
 
@@ -886,10 +836,6 @@
 	/* attached dynamic objects */
 	struct list_head dobj_list;
 
-#ifdef CONFIG_DEBUG_FS
-	struct dentry *debugfs_root;
-#endif
-
 	/*
 	* DO NOT use any of the fields below in drivers, they are temporary and
 	* are going to be removed again soon. If you use them in driver code the
@@ -899,29 +845,11 @@
 	/* Don't use these, use snd_soc_component_get_dapm() */
 	struct snd_soc_dapm_context dapm;
 
-	struct snd_soc_codec *codec;
-
-	int (*probe)(struct snd_soc_component *);
-	void (*remove)(struct snd_soc_component *);
-	int (*suspend)(struct snd_soc_component *);
-	int (*resume)(struct snd_soc_component *);
-	int (*pcm_new)(struct snd_soc_component *, struct snd_soc_pcm_runtime *);
-	void (*pcm_free)(struct snd_soc_component *, struct snd_pcm *);
-
-	int (*set_sysclk)(struct snd_soc_component *component,
-			  int clk_id, int source, unsigned int freq, int dir);
-	int (*set_pll)(struct snd_soc_component *component, int pll_id,
-		       int source, unsigned int freq_in, unsigned int freq_out);
-	int (*set_jack)(struct snd_soc_component *component,
-			struct snd_soc_jack *jack,  void *data);
-	int (*set_bias_level)(struct snd_soc_component *component,
-			      enum snd_soc_bias_level level);
-
 	/* machine specific init */
 	int (*init)(struct snd_soc_component *component);
 
 #ifdef CONFIG_DEBUG_FS
-	void (*init_debugfs)(struct snd_soc_component *component);
+	struct dentry *debugfs_root;
 	const char *debugfs_prefix;
 #endif
 };
@@ -938,97 +866,12 @@
 #define for_each_rtdcom_safe(rtd, rtdcom1, rtdcom2) \
 	list_for_each_entry_safe(rtdcom1, rtdcom2, &(rtd)->component_list, list)
 
-/* SoC Audio Codec device */
-struct snd_soc_codec {
-	struct device *dev;
-	const struct snd_soc_codec_driver *driver;
-
-	struct list_head list;
-
-	/* runtime */
-	unsigned int cache_init:1; /* codec cache has been initialized */
-
-	/* codec IO */
-	void *control_data; /* codec control (i2c/3wire) data */
-	hw_write_t hw_write;
-	void *reg_cache;
-
-	/* component */
-	struct snd_soc_component component;
-};
-
-/* codec driver */
-struct snd_soc_codec_driver {
-
-	/* driver ops */
-	int (*probe)(struct snd_soc_codec *);
-	int (*remove)(struct snd_soc_codec *);
-	int (*suspend)(struct snd_soc_codec *);
-	int (*resume)(struct snd_soc_codec *);
-	struct snd_soc_component_driver component_driver;
-
-	/* codec wide operations */
-	int (*set_sysclk)(struct snd_soc_codec *codec,
-			  int clk_id, int source, unsigned int freq, int dir);
-	int (*set_pll)(struct snd_soc_codec *codec, int pll_id, int source,
-		unsigned int freq_in, unsigned int freq_out);
-	int (*set_jack)(struct snd_soc_codec *codec,
-			struct snd_soc_jack *jack,  void *data);
-
-	/* codec IO */
-	struct regmap *(*get_regmap)(struct device *);
-	unsigned int (*read)(struct snd_soc_codec *, unsigned int);
-	int (*write)(struct snd_soc_codec *, unsigned int, unsigned int);
-	unsigned int reg_cache_size;
-	short reg_cache_step;
-	short reg_word_size;
-	const void *reg_cache_default;
-
-	/* codec bias level */
-	int (*set_bias_level)(struct snd_soc_codec *,
-			      enum snd_soc_bias_level level);
-	bool idle_bias_off;
-	bool suspend_bias_off;
-
-	void (*seq_notifier)(struct snd_soc_dapm_context *,
-			     enum snd_soc_dapm_type, int);
-
-	bool ignore_pmdown_time;  /* Doesn't benefit from pmdown delay */
-};
-
-/* SoC platform interface */
-struct snd_soc_platform_driver {
-
-	int (*probe)(struct snd_soc_platform *);
-	int (*remove)(struct snd_soc_platform *);
-	struct snd_soc_component_driver component_driver;
-
-	/* pcm creation and destruction */
-	int (*pcm_new)(struct snd_soc_pcm_runtime *);
-	void (*pcm_free)(struct snd_pcm *);
-
-	/* platform stream pcm ops */
-	const struct snd_pcm_ops *ops;
-
-	/* platform stream compress ops */
-	const struct snd_compr_ops *compr_ops;
-};
-
 struct snd_soc_dai_link_component {
 	const char *name;
 	struct device_node *of_node;
 	const char *dai_name;
 };
 
-struct snd_soc_platform {
-	struct device *dev;
-	const struct snd_soc_platform_driver *driver;
-
-	struct list_head list;
-
-	struct snd_soc_component component;
-};
-
 struct snd_soc_dai_link {
 	/* config - must be set by machine driver */
 	const char *name;			/* Codec name */
@@ -1276,8 +1119,6 @@
 	/* runtime devices */
 	struct snd_pcm *pcm;
 	struct snd_compr *compr;
-	struct snd_soc_codec *codec;
-	struct snd_soc_platform *platform; /* will be removed */
 	struct snd_soc_dai *codec_dai;
 	struct snd_soc_dai *cpu_dai;
 
@@ -1346,32 +1187,6 @@
 };
 
 /**
- * snd_soc_component_to_codec() - Casts a component to the CODEC it is embedded in
- * @component: The component to cast to a CODEC
- *
- * This function must only be used on components that are known to be CODECs.
- * Otherwise the behavior is undefined.
- */
-static inline struct snd_soc_codec *snd_soc_component_to_codec(
-	struct snd_soc_component *component)
-{
-	return container_of(component, struct snd_soc_codec, component);
-}
-
-/**
- * snd_soc_component_to_platform() - Casts a component to the platform it is embedded in
- * @component: The component to cast to a platform
- *
- * This function must only be used on components that are known to be platforms.
- * Otherwise the behavior is undefined.
- */
-static inline struct snd_soc_platform *snd_soc_component_to_platform(
-	struct snd_soc_component *component)
-{
-	return container_of(component, struct snd_soc_platform, component);
-}
-
-/**
  * snd_soc_dapm_to_component() - Casts a DAPM context to the component it is
  *  embedded in
  * @dapm: The DAPM context to cast to the component
@@ -1387,33 +1202,6 @@
 }
 
 /**
- * snd_soc_dapm_to_codec() - Casts a DAPM context to the CODEC it is embedded in
- * @dapm: The DAPM context to cast to the CODEC
- *
- * This function must only be used on DAPM contexts that are known to be part of
- * a CODEC (e.g. in a CODEC driver). Otherwise the behavior is undefined.
- */
-static inline struct snd_soc_codec *snd_soc_dapm_to_codec(
-	struct snd_soc_dapm_context *dapm)
-{
-	return snd_soc_component_to_codec(snd_soc_dapm_to_component(dapm));
-}
-
-/**
- * snd_soc_dapm_to_platform() - Casts a DAPM context to the platform it is
- *  embedded in
- * @dapm: The DAPM context to cast to the platform.
- *
- * This function must only be used on DAPM contexts that are known to be part of
- * a platform (e.g. in a platform driver). Otherwise the behavior is undefined.
- */
-static inline struct snd_soc_platform *snd_soc_dapm_to_platform(
-	struct snd_soc_dapm_context *dapm)
-{
-	return snd_soc_component_to_platform(snd_soc_dapm_to_component(dapm));
-}
-
-/**
  * snd_soc_component_get_dapm() - Returns the DAPM context associated with a
  *  component
  * @component: The component for which to get the DAPM context
@@ -1425,31 +1213,6 @@
 }
 
 /**
- * snd_soc_codec_get_dapm() - Returns the DAPM context for the CODEC
- * @codec: The CODEC for which to get the DAPM context
- *
- * Note: Use this function instead of directly accessing the CODEC's dapm field
- */
-static inline struct snd_soc_dapm_context *snd_soc_codec_get_dapm(
-	struct snd_soc_codec *codec)
-{
-	return snd_soc_component_get_dapm(&codec->component);
-}
-
-/**
- * snd_soc_dapm_init_bias_level() - Initialize CODEC DAPM bias level
- * @codec: The CODEC for which to initialize the DAPM bias level
- * @level: The DAPM level to initialize to
- *
- * Initializes the CODEC DAPM bias level. See snd_soc_dapm_init_bias_level().
- */
-static inline void snd_soc_codec_init_bias_level(struct snd_soc_codec *codec,
-	enum snd_soc_bias_level level)
-{
-	snd_soc_dapm_init_bias_level(snd_soc_codec_get_dapm(codec), level);
-}
-
-/**
  * snd_soc_component_init_bias_level() - Initialize COMPONENT DAPM bias level
  * @component: The COMPONENT for which to initialize the DAPM bias level
  * @level: The DAPM level to initialize to
@@ -1465,18 +1228,6 @@
 }
 
 /**
- * snd_soc_dapm_get_bias_level() - Get current CODEC DAPM bias level
- * @codec: The CODEC for which to get the DAPM bias level
- *
- * Returns: The current DAPM bias level of the CODEC.
- */
-static inline enum snd_soc_bias_level snd_soc_codec_get_bias_level(
-	struct snd_soc_codec *codec)
-{
-	return snd_soc_dapm_get_bias_level(snd_soc_codec_get_dapm(codec));
-}
-
-/**
  * snd_soc_component_get_bias_level() - Get current COMPONENT DAPM bias level
  * @component: The COMPONENT for which to get the DAPM bias level
  *
@@ -1490,21 +1241,6 @@
 }
 
 /**
- * snd_soc_codec_force_bias_level() - Set the CODEC DAPM bias level
- * @codec: The CODEC for which to set the level
- * @level: The level to set to
- *
- * Forces the CODEC bias level to a specific state. See
- * snd_soc_dapm_force_bias_level().
- */
-static inline int snd_soc_codec_force_bias_level(struct snd_soc_codec *codec,
-	enum snd_soc_bias_level level)
-{
-	return snd_soc_dapm_force_bias_level(snd_soc_codec_get_dapm(codec),
-		level);
-}
-
-/**
  * snd_soc_component_force_bias_level() - Set the COMPONENT DAPM bias level
  * @component: The COMPONENT for which to set the level
  * @level: The level to set to
@@ -1522,19 +1258,6 @@
 }
 
 /**
- * snd_soc_dapm_kcontrol_codec() - Returns the codec associated to a kcontrol
- * @kcontrol: The kcontrol
- *
- * This function must only be used on DAPM contexts that are known to be part of
- * a CODEC (e.g. in a CODEC driver). Otherwise the behavior is undefined.
- */
-static inline struct snd_soc_codec *snd_soc_dapm_kcontrol_codec(
-	struct snd_kcontrol *kcontrol)
-{
-	return snd_soc_dapm_to_codec(snd_soc_dapm_kcontrol_dapm(kcontrol));
-}
-
-/**
  * snd_soc_dapm_kcontrol_component() - Returns the component associated to a kcontrol
  * @kcontrol: The kcontrol
  *
@@ -1547,22 +1270,6 @@
 	return snd_soc_dapm_to_component(snd_soc_dapm_kcontrol_dapm(kcontrol));
 }
 
-/* codec IO */
-unsigned int snd_soc_read(struct snd_soc_codec *codec, unsigned int reg);
-int snd_soc_write(struct snd_soc_codec *codec, unsigned int reg,
-	unsigned int val);
-
-/**
- * snd_soc_cache_sync() - Sync the register cache with the hardware
- * @codec: CODEC to sync
- *
- * Note: This function will call regcache_sync()
- */
-static inline int snd_soc_cache_sync(struct snd_soc_codec *codec)
-{
-	return regcache_sync(codec->component.regmap);
-}
-
 /**
  * snd_soc_component_cache_sync() - Sync the register cache with the hardware
  * @component: COMPONENT to sync
@@ -1605,37 +1312,6 @@
 	struct regmap *regmap);
 void snd_soc_component_exit_regmap(struct snd_soc_component *component);
 
-/**
- * snd_soc_codec_init_regmap() - Initialize regmap instance for the CODEC
- * @codec: The CODEC for which to initialize the regmap instance
- * @regmap: The regmap instance that should be used by the CODEC
- *
- * This function allows deferred assignment of the regmap instance that is
- * associated with the CODEC. Only use this if the regmap instance is not yet
- * ready when the CODEC is registered. The function must also be called before
- * the first IO attempt of the CODEC.
- */
-static inline void snd_soc_codec_init_regmap(struct snd_soc_codec *codec,
-	struct regmap *regmap)
-{
-	snd_soc_component_init_regmap(&codec->component, regmap);
-}
-
-/**
- * snd_soc_codec_exit_regmap() - De-initialize regmap instance for the CODEC
- * @codec: The CODEC for which to de-initialize the regmap instance
- *
- * Calls regmap_exit() on the regmap instance associated to the CODEC and
- * removes the regmap instance from the CODEC.
- *
- * This function should only be used if snd_soc_codec_init_regmap() was used to
- * initialize the regmap instance.
- */
-static inline void snd_soc_codec_exit_regmap(struct snd_soc_codec *codec)
-{
-	snd_soc_component_exit_regmap(&codec->component);
-}
-
 #endif
 
 /* device driver data */
@@ -1662,28 +1338,6 @@
 	return dev_get_drvdata(c->dev);
 }
 
-static inline void snd_soc_codec_set_drvdata(struct snd_soc_codec *codec,
-		void *data)
-{
-	snd_soc_component_set_drvdata(&codec->component, data);
-}
-
-static inline void *snd_soc_codec_get_drvdata(struct snd_soc_codec *codec)
-{
-	return snd_soc_component_get_drvdata(&codec->component);
-}
-
-static inline void snd_soc_platform_set_drvdata(struct snd_soc_platform *platform,
-		void *data)
-{
-	snd_soc_component_set_drvdata(&platform->component, data);
-}
-
-static inline void *snd_soc_platform_get_drvdata(struct snd_soc_platform *platform)
-{
-	return snd_soc_component_get_drvdata(&platform->component);
-}
-
 static inline void snd_soc_initialize_card_lists(struct snd_soc_card *card)
 {
 	INIT_LIST_HEAD(&card->widgets);
@@ -1735,20 +1389,15 @@
 	return component->active != 0;
 }
 
-static inline bool snd_soc_codec_is_active(struct snd_soc_codec *codec)
-{
-	return snd_soc_component_is_active(&codec->component);
-}
-
 /**
  * snd_soc_kcontrol_component() - Returns the component that registered the
  *  control
  * @kcontrol: The control for which to get the component
  *
  * Note: This function will work correctly if the control has been registered
- * for a component. Either with snd_soc_add_codec_controls() or
- * snd_soc_add_platform_controls() or via  table based setup for either a
- * CODEC, a platform or component driver. Otherwise the behavior is undefined.
+ * for a component. With snd_soc_add_codec_controls() or via table based
+ * setup for either a CODEC or component driver. Otherwise the behavior is
+ * undefined.
  */
 static inline struct snd_soc_component *snd_soc_kcontrol_component(
 	struct snd_kcontrol *kcontrol)
@@ -1756,34 +1405,6 @@
 	return snd_kcontrol_chip(kcontrol);
 }
 
-/**
- * snd_soc_kcontrol_codec() - Returns the CODEC that registered the control
- * @kcontrol: The control for which to get the CODEC
- *
- * Note: This function will only work correctly if the control has been
- * registered with snd_soc_add_codec_controls() or via table based setup of
- * snd_soc_codec_driver. Otherwise the behavior is undefined.
- */
-static inline struct snd_soc_codec *snd_soc_kcontrol_codec(
-	struct snd_kcontrol *kcontrol)
-{
-	return snd_soc_component_to_codec(snd_soc_kcontrol_component(kcontrol));
-}
-
-/**
- * snd_soc_kcontrol_platform() - Returns the platform that registered the control
- * @kcontrol: The control for which to get the platform
- *
- * Note: This function will only work correctly if the control has been
- * registered with snd_soc_add_platform_controls() or via table based setup of
- * a snd_soc_platform_driver. Otherwise the behavior is undefined.
- */
-static inline struct snd_soc_platform *snd_soc_kcontrol_platform(
-	struct snd_kcontrol *kcontrol)
-{
-	return snd_soc_component_to_platform(snd_soc_kcontrol_component(kcontrol));
-}
-
 int snd_soc_util_init(void);
 void snd_soc_util_exit(void);
 
diff --git a/include/trace/events/asoc.h b/include/trace/events/asoc.h
index ccd1a3b..40c300f 100644
--- a/include/trace/events/asoc.h
+++ b/include/trace/events/asoc.h
@@ -12,7 +12,6 @@
 #define DAPM_ARROW(dir) (((dir) == SND_SOC_DAPM_DIR_OUT) ? "->" : "<-")
 
 struct snd_soc_jack;
-struct snd_soc_codec;
 struct snd_soc_card;
 struct snd_soc_dapm_widget;
 struct snd_soc_dapm_path;
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 965c650..39b94ec 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -121,9 +121,9 @@
 		__entry->root_objectid	= root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root = %llu(%s), gen = %llu",
+	TP_printk_btrfs("root=%llu(%s) gen=%llu",
 		  show_root_type(__entry->root_objectid),
-		  (unsigned long long)__entry->generation)
+		  __entry->generation)
 );
 
 DECLARE_EVENT_CLASS(btrfs__inode,
@@ -133,7 +133,7 @@
 	TP_ARGS(inode),
 
 	TP_STRUCT__entry_btrfs(
-		__field(	ino_t,  ino			)
+		__field(	u64,  ino			)
 		__field(	blkcnt_t,  blocks		)
 		__field(	u64,  disk_i_size		)
 		__field(	u64,  generation		)
@@ -143,7 +143,7 @@
 	),
 
 	TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
-		__entry->ino	= inode->i_ino;
+		__entry->ino	= btrfs_ino(BTRFS_I(inode));
 		__entry->blocks	= inode->i_blocks;
 		__entry->disk_i_size  = BTRFS_I(inode)->disk_i_size;
 		__entry->generation = BTRFS_I(inode)->generation;
@@ -153,15 +153,15 @@
 				BTRFS_I(inode)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%lu blocks=%llu "
+	TP_printk_btrfs("root=%llu(%s) gen=%llu ino=%llu blocks=%llu "
 		  "disk_i_size=%llu last_trans=%llu logged_trans=%llu",
 		  show_root_type(__entry->root_objectid),
-		  (unsigned long long)__entry->generation,
-		  (unsigned long)__entry->ino,
+		  __entry->generation,
+		  __entry->ino,
 		  (unsigned long long)__entry->blocks,
-		  (unsigned long long)__entry->disk_i_size,
-		  (unsigned long long)__entry->last_trans,
-		  (unsigned long long)__entry->logged_trans)
+		  __entry->disk_i_size,
+		  __entry->last_trans,
+		  __entry->logged_trans)
 );
 
 DEFINE_EVENT(btrfs__inode, btrfs_inode_new,
@@ -244,23 +244,25 @@
 		  "block_len=%llu flags=%s refs=%u "
 		  "compress_type=%u",
 		  show_root_type(__entry->root_objectid),
-		  (unsigned long long)__entry->ino,
-		  (unsigned long long)__entry->start,
-		  (unsigned long long)__entry->len,
-		  (unsigned long long)__entry->orig_start,
+		  __entry->ino,
+		  __entry->start,
+		  __entry->len,
+		  __entry->orig_start,
 		  show_map_type(__entry->block_start),
-		  (unsigned long long)__entry->block_len,
+		  __entry->block_len,
 		  show_map_flags(__entry->flags),
 		  __entry->refs, __entry->compress_type)
 );
 
 TRACE_EVENT(btrfs_handle_em_exist,
 
-	TP_PROTO(const struct extent_map *existing, const struct extent_map *map, u64 start, u64 len),
+	TP_PROTO(struct btrfs_fs_info *fs_info,
+		const struct extent_map *existing, const struct extent_map *map,
+		u64 start, u64 len),
 
-	TP_ARGS(existing, map, start, len),
+	TP_ARGS(fs_info, existing, map, start, len),
 
-	TP_STRUCT__entry(
+	TP_STRUCT__entry_btrfs(
 		__field(	u64,  e_start		)
 		__field(	u64,  e_len		)
 		__field(	u64,  map_start		)
@@ -269,7 +271,7 @@
 		__field(	u64,  len		)
 	),
 
-	TP_fast_assign(
+	TP_fast_assign_btrfs(fs_info,
 		__entry->e_start	= existing->start;
 		__entry->e_len		= existing->len;
 		__entry->map_start	= map->start;
@@ -278,15 +280,15 @@
 		__entry->len		= len;
 	),
 
-	TP_printk("start=%llu len=%llu "
+	TP_printk_btrfs("start=%llu len=%llu "
 		  "existing(start=%llu len=%llu) "
 		  "em(start=%llu len=%llu)",
-		  (unsigned long long)__entry->start,
-		  (unsigned long long)__entry->len,
-		  (unsigned long long)__entry->e_start,
-		  (unsigned long long)__entry->e_len,
-		  (unsigned long long)__entry->map_start,
-		  (unsigned long long)__entry->map_len)
+		  __entry->start,
+		  __entry->len,
+		  __entry->e_start,
+		  __entry->e_len,
+		  __entry->map_start,
+		  __entry->map_len)
 );
 
 /* file extent item */
@@ -443,7 +445,7 @@
 	TP_ARGS(inode, ordered),
 
 	TP_STRUCT__entry_btrfs(
-		__field(	ino_t,  ino		)
+		__field(	u64,  ino		)
 		__field(	u64,  file_offset	)
 		__field(	u64,  start		)
 		__field(	u64,  len		)
@@ -457,7 +459,7 @@
 	),
 
 	TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
-		__entry->ino 		= inode->i_ino;
+		__entry->ino 		= btrfs_ino(BTRFS_I(inode));
 		__entry->file_offset	= ordered->file_offset;
 		__entry->start		= ordered->start;
 		__entry->len		= ordered->len;
@@ -477,13 +479,13 @@
 		  "bytes_left=%llu flags=%s compress_type=%d "
 		  "refs=%d",
 		  show_root_type(__entry->root_objectid),
-		  (unsigned long long)__entry->ino,
-		  (unsigned long long)__entry->file_offset,
-		  (unsigned long long)__entry->start,
-		  (unsigned long long)__entry->len,
-		  (unsigned long long)__entry->disk_len,
-		  (unsigned long long)__entry->truncated_len,
-		  (unsigned long long)__entry->bytes_left,
+		  __entry->ino,
+		  __entry->file_offset,
+		  __entry->start,
+		  __entry->len,
+		  __entry->disk_len,
+		  __entry->truncated_len,
+		  __entry->bytes_left,
 		  show_ordered_flags(__entry->flags),
 		  __entry->compress_type, __entry->refs)
 );
@@ -528,7 +530,7 @@
 	TP_ARGS(page, inode, wbc),
 
 	TP_STRUCT__entry_btrfs(
-		__field(	ino_t,  ino			)
+		__field(	u64,	ino			)
 		__field(	pgoff_t,  index			)
 		__field(	long,   nr_to_write		)
 		__field(	long,   pages_skipped		)
@@ -542,7 +544,7 @@
 	),
 
 	TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
-		__entry->ino		= inode->i_ino;
+		__entry->ino		= btrfs_ino(BTRFS_I(inode));
 		__entry->index		= page->index;
 		__entry->nr_to_write	= wbc->nr_to_write;
 		__entry->pages_skipped	= wbc->pages_skipped;
@@ -556,12 +558,12 @@
 				 BTRFS_I(inode)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root=%llu(%s) ino=%lu page_index=%lu "
+	TP_printk_btrfs("root=%llu(%s) ino=%llu page_index=%lu "
 		  "nr_to_write=%ld pages_skipped=%ld range_start=%llu "
 		  "range_end=%llu for_kupdate=%d "
 		  "for_reclaim=%d range_cyclic=%d writeback_index=%lu",
 		  show_root_type(__entry->root_objectid),
-		  (unsigned long)__entry->ino, __entry->index,
+		  __entry->ino, __entry->index,
 		  __entry->nr_to_write, __entry->pages_skipped,
 		  __entry->range_start, __entry->range_end,
 		  __entry->for_kupdate,
@@ -584,7 +586,7 @@
 	TP_ARGS(page, start, end, uptodate),
 
 	TP_STRUCT__entry_btrfs(
-		__field(	ino_t,	 ino		)
+		__field(	u64,	 ino		)
 		__field(	pgoff_t, index		)
 		__field(	u64,	 start		)
 		__field(	u64,	 end		)
@@ -593,7 +595,7 @@
 	),
 
 	TP_fast_assign_btrfs(btrfs_sb(page->mapping->host->i_sb),
-		__entry->ino	= page->mapping->host->i_ino;
+		__entry->ino	= btrfs_ino(BTRFS_I(page->mapping->host));
 		__entry->index	= page->index;
 		__entry->start	= start;
 		__entry->end	= end;
@@ -602,12 +604,12 @@
 			 BTRFS_I(page->mapping->host)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root=%llu(%s) ino=%lu page_index=%lu start=%llu "
+	TP_printk_btrfs("root=%llu(%s) ino=%llu page_index=%lu start=%llu "
 		  "end=%llu uptodate=%d",
 		  show_root_type(__entry->root_objectid),
-		  (unsigned long)__entry->ino, (unsigned long)__entry->index,
-		  (unsigned long long)__entry->start,
-		  (unsigned long long)__entry->end, __entry->uptodate)
+		  __entry->ino, (unsigned long)__entry->index,
+		  __entry->start,
+		  __entry->end, __entry->uptodate)
 );
 
 TRACE_EVENT(btrfs_sync_file,
@@ -617,8 +619,8 @@
 	TP_ARGS(file, datasync),
 
 	TP_STRUCT__entry_btrfs(
-		__field(	ino_t,  ino		)
-		__field(	ino_t,  parent		)
+		__field(	u64,	ino		)
+		__field(	u64,	parent		)
 		__field(	int,    datasync	)
 		__field(	u64,    root_objectid	)
 	),
@@ -628,16 +630,17 @@
 		const struct inode *inode = d_inode(dentry);
 
 		TP_fast_assign_fsid(btrfs_sb(file->f_path.dentry->d_sb));
-		__entry->ino		= inode->i_ino;
-		__entry->parent		= d_inode(dentry->d_parent)->i_ino;
+		__entry->ino		= btrfs_ino(BTRFS_I(inode));
+		__entry->parent		= btrfs_ino(BTRFS_I(d_inode(dentry->d_parent)));
 		__entry->datasync	= datasync;
 		__entry->root_objectid	=
 				 BTRFS_I(inode)->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root=%llu(%s) ino=%ld parent=%ld datasync=%d",
+	TP_printk_btrfs("root=%llu(%s) ino=%llu parent=%llu datasync=%d",
 		  show_root_type(__entry->root_objectid),
-		  (unsigned long)__entry->ino, (unsigned long)__entry->parent,
+		  __entry->ino,
+		  __entry->parent,
 		  __entry->datasync)
 );
 
@@ -655,7 +658,7 @@
 		__entry->wait	= wait;
 	),
 
-	TP_printk_btrfs("wait = %d", __entry->wait)
+	TP_printk_btrfs("wait=%d", __entry->wait)
 );
 
 TRACE_EVENT(btrfs_add_block_group,
@@ -665,8 +668,7 @@
 
 	TP_ARGS(fs_info, block_group, create),
 
-	TP_STRUCT__entry(
-		__array(	u8,	fsid,	BTRFS_FSID_SIZE	)
+	TP_STRUCT__entry_btrfs(
 		__field(	u64,	offset			)
 		__field(	u64,	size			)
 		__field(	u64,	flags			)
@@ -675,8 +677,7 @@
 		__field(	int,	create			)
 	),
 
-	TP_fast_assign(
-		memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE);
+	TP_fast_assign_btrfs(fs_info,
 		__entry->offset		= block_group->key.objectid;
 		__entry->size		= block_group->key.offset;
 		__entry->flags		= block_group->flags;
@@ -686,16 +687,16 @@
 		__entry->create		= create;
 	),
 
-	TP_printk("%pU: block_group offset=%llu size=%llu "
+	TP_printk_btrfs("block_group offset=%llu size=%llu "
 		  "flags=%llu(%s) bytes_used=%llu bytes_super=%llu "
-		  "create=%d", __entry->fsid,
-		  (unsigned long long)__entry->offset,
-		  (unsigned long long)__entry->size,
-		  (unsigned long long)__entry->flags,
+		  "create=%d",
+		  __entry->offset,
+		  __entry->size,
+		  __entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
 				BTRFS_GROUP_FLAGS),
-		  (unsigned long long)__entry->bytes_used,
-		  (unsigned long long)__entry->bytes_super, __entry->create)
+		  __entry->bytes_used,
+		  __entry->bytes_super, __entry->create)
 );
 
 #define show_ref_action(action)						\
@@ -740,13 +741,13 @@
 	TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s "
 		  "parent=%llu(%s) ref_root=%llu(%s) level=%d "
 		  "type=%s seq=%llu",
-		  (unsigned long long)__entry->bytenr,
-		  (unsigned long long)__entry->num_bytes,
+		  __entry->bytenr,
+		  __entry->num_bytes,
 		  show_ref_action(__entry->action),
 		  show_root_type(__entry->parent),
 		  show_root_type(__entry->ref_root),
 		  __entry->level, show_ref_type(__entry->type),
-		  (unsigned long long)__entry->seq)
+		  __entry->seq)
 );
 
 DEFINE_EVENT(btrfs_delayed_tree_ref,  add_delayed_tree_ref,
@@ -805,15 +806,15 @@
 	TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s "
 		  "parent=%llu(%s) ref_root=%llu(%s) owner=%llu "
 		  "offset=%llu type=%s seq=%llu",
-		  (unsigned long long)__entry->bytenr,
-		  (unsigned long long)__entry->num_bytes,
+		  __entry->bytenr,
+		  __entry->num_bytes,
 		  show_ref_action(__entry->action),
 		  show_root_type(__entry->parent),
 		  show_root_type(__entry->ref_root),
-		  (unsigned long long)__entry->owner,
-		  (unsigned long long)__entry->offset,
+		  __entry->owner,
+		  __entry->offset,
 		  show_ref_type(__entry->type),
-		  (unsigned long long)__entry->seq)
+		  __entry->seq)
 );
 
 DEFINE_EVENT(btrfs_delayed_data_ref,  add_delayed_data_ref,
@@ -859,8 +860,8 @@
 	),
 
 	TP_printk_btrfs("bytenr=%llu num_bytes=%llu action=%s is_data=%d",
-		  (unsigned long long)__entry->bytenr,
-		  (unsigned long long)__entry->num_bytes,
+		  __entry->bytenr,
+		  __entry->num_bytes,
 		  show_ref_action(__entry->action),
 		  __entry->is_data)
 );
@@ -923,8 +924,8 @@
 	TP_printk_btrfs("root=%llu(%s) offset=%llu size=%llu "
 		  "num_stripes=%d sub_stripes=%d type=%s",
 		  show_root_type(__entry->root_objectid),
-		  (unsigned long long)__entry->offset,
-		  (unsigned long long)__entry->size,
+		  __entry->offset,
+		  __entry->size,
 		  __entry->num_stripes, __entry->sub_stripes,
 		  show_chunk_type(__entry->type))
 );
@@ -974,9 +975,9 @@
 		  "(orig_level=%d) cow_buf=%llu (cow_level=%d)",
 		  show_root_type(__entry->root_objectid),
 		  __entry->refs,
-		  (unsigned long long)__entry->buf_start,
+		  __entry->buf_start,
 		  __entry->buf_level,
-		  (unsigned long long)__entry->cow_start,
+		  __entry->cow_start,
 		  __entry->cow_level)
 );
 
@@ -1001,7 +1002,7 @@
 		__entry->reserve	= reserve;
 	),
 
-	TP_printk_btrfs("%s: %Lu %s %Lu", __get_str(type), __entry->val,
+	TP_printk_btrfs("%s: %llu %s %llu", __get_str(type), __entry->val,
 			__entry->reserve ? "reserve" : "release",
 			__entry->bytes)
 );
@@ -1019,29 +1020,27 @@
 
 	TP_ARGS(fs_info, flags, bytes, flush, reason),
 
-	TP_STRUCT__entry(
-		__array(	u8,	fsid,	BTRFS_FSID_SIZE	)
+	TP_STRUCT__entry_btrfs(
 		__field(	u64,	flags			)
 		__field(	u64,	bytes			)
 		__field(	int,	flush			)
 		__string(	reason,	reason			)
 	),
 
-	TP_fast_assign(
-		memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE);
+	TP_fast_assign_btrfs(fs_info,
 		__entry->flags	= flags;
 		__entry->bytes	= bytes;
 		__entry->flush	= flush;
 		__assign_str(reason, reason)
 	),
 
-	TP_printk("%pU: %s: flush=%d(%s) flags=%llu(%s) bytes=%llu",
-		  __entry->fsid, __get_str(reason), __entry->flush,
+	TP_printk_btrfs("%s: flush=%d(%s) flags=%llu(%s) bytes=%llu",
+		  __get_str(reason), __entry->flush,
 		  show_flush_action(__entry->flush),
-		  (unsigned long long)__entry->flags,
+		  __entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
 				BTRFS_GROUP_FLAGS),
-		  (unsigned long long)__entry->bytes)
+		  __entry->bytes)
 );
 
 #define show_flush_state(state)							\
@@ -1060,29 +1059,27 @@
 
 	TP_ARGS(fs_info, flags, num_bytes, state, ret),
 
-	TP_STRUCT__entry(
-		__array(	u8,	fsid,	BTRFS_FSID_SIZE	)
+	TP_STRUCT__entry_btrfs(
 		__field(	u64,	flags			)
 		__field(	u64,	num_bytes		)
 		__field(	int,	state			)
 		__field(	int,	ret			)
 	),
 
-	TP_fast_assign(
-		memcpy(__entry->fsid, fs_info->fsid, BTRFS_FSID_SIZE);
+	TP_fast_assign_btrfs(fs_info,
 		__entry->flags		=	flags;
 		__entry->num_bytes	=	num_bytes;
 		__entry->state		=	state;
 		__entry->ret		=	ret;
 	),
 
-	TP_printk("%pU: state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d",
-		  __entry->fsid, __entry->state,
+	TP_printk_btrfs("state=%d(%s) flags=%llu(%s) num_bytes=%llu ret=%d",
+		  __entry->state,
 		  show_flush_state(__entry->state),
-		  (unsigned long long)__entry->flags,
+		  __entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
 				BTRFS_GROUP_FLAGS),
-		  (unsigned long long)__entry->num_bytes, __entry->ret)
+		  __entry->num_bytes, __entry->ret)
 );
 
 DECLARE_EVENT_CLASS(btrfs__reserved_extent,
@@ -1103,8 +1100,8 @@
 
 	TP_printk_btrfs("root=%llu(%s) start=%llu len=%llu",
 		  show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
-		  (unsigned long long)__entry->start,
-		  (unsigned long long)__entry->len)
+		  __entry->start,
+		  __entry->len)
 );
 
 DEFINE_EVENT(btrfs__reserved_extent,  btrfs_reserved_extent_alloc,
@@ -1140,7 +1137,7 @@
 		__entry->data		= data;
 	),
 
-	TP_printk_btrfs("root=%Lu(%s) len=%Lu empty_size=%Lu flags=%Lu(%s)",
+	TP_printk_btrfs("root=%llu(%s) len=%llu empty_size=%llu flags=%llu(%s)",
 		  show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
 		  __entry->num_bytes, __entry->empty_size, __entry->data,
 		  __print_flags((unsigned long)__entry->data, "|",
@@ -1149,11 +1146,10 @@
 
 DECLARE_EVENT_CLASS(btrfs__reserve_extent,
 
-	TP_PROTO(const struct btrfs_fs_info *fs_info,
-		 const struct btrfs_block_group_cache *block_group, u64 start,
+	TP_PROTO(const struct btrfs_block_group_cache *block_group, u64 start,
 		 u64 len),
 
-	TP_ARGS(fs_info, block_group, start, len),
+	TP_ARGS(block_group, start, len),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	u64,	bg_objectid		)
@@ -1162,15 +1158,15 @@
 		__field(	u64,	len			)
 	),
 
-	TP_fast_assign_btrfs(fs_info,
+	TP_fast_assign_btrfs(block_group->fs_info,
 		__entry->bg_objectid	= block_group->key.objectid;
 		__entry->flags		= block_group->flags;
 		__entry->start		= start;
 		__entry->len		= len;
 	),
 
-	TP_printk_btrfs("root=%Lu(%s) block_group=%Lu flags=%Lu(%s) "
-		  "start=%Lu len=%Lu",
+	TP_printk_btrfs("root=%llu(%s) block_group=%llu flags=%llu(%s) "
+		  "start=%llu len=%llu",
 		  show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
 		  __entry->bg_objectid,
 		  __entry->flags, __print_flags((unsigned long)__entry->flags,
@@ -1180,20 +1176,18 @@
 
 DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent,
 
-	TP_PROTO(const struct btrfs_fs_info *fs_info,
-		 const struct btrfs_block_group_cache *block_group, u64 start,
+	TP_PROTO(const struct btrfs_block_group_cache *block_group, u64 start,
 		 u64 len),
 
-	TP_ARGS(fs_info, block_group, start, len)
+	TP_ARGS(block_group, start, len)
 );
 
 DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster,
 
-	TP_PROTO(const struct btrfs_fs_info *fs_info,
-		 const struct btrfs_block_group_cache *block_group, u64 start,
+	TP_PROTO(const struct btrfs_block_group_cache *block_group, u64 start,
 		 u64 len),
 
-	TP_ARGS(fs_info, block_group, start, len)
+	TP_ARGS(block_group, start, len)
 );
 
 TRACE_EVENT(btrfs_find_cluster,
@@ -1221,8 +1215,8 @@
 		__entry->min_bytes	= min_bytes;
 	),
 
-	TP_printk_btrfs("block_group=%Lu flags=%Lu(%s) start=%Lu len=%Lu "
-		  "empty_size=%Lu min_bytes=%Lu", __entry->bg_objectid,
+	TP_printk_btrfs("block_group=%llu flags=%llu(%s) start=%llu len=%llu "
+		  "empty_size=%llu min_bytes=%llu", __entry->bg_objectid,
 		  __entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
 				BTRFS_GROUP_FLAGS), __entry->start,
@@ -1243,7 +1237,7 @@
 		__entry->bg_objectid	= block_group->key.objectid;
 	),
 
-	TP_printk_btrfs("block_group=%Lu", __entry->bg_objectid)
+	TP_printk_btrfs("block_group=%llu", __entry->bg_objectid)
 );
 
 TRACE_EVENT(btrfs_setup_cluster,
@@ -1272,8 +1266,8 @@
 		__entry->bitmap		= bitmap;
 	),
 
-	TP_printk_btrfs("block_group=%Lu flags=%Lu(%s) window_start=%Lu "
-		  "size=%Lu max_size=%Lu bitmap=%d",
+	TP_printk_btrfs("block_group=%llu flags=%llu(%s) window_start=%llu "
+		  "size=%llu max_size=%llu bitmap=%d",
 		  __entry->bg_objectid,
 		  __entry->flags,
 		  __print_flags((unsigned long)__entry->flags, "|",
@@ -1476,7 +1470,7 @@
 
 	TP_STRUCT__entry_btrfs(
 		__field(	u64,		rootid		)
-		__field(	unsigned long,	ino		)
+		__field(	u64,		ino		)
 		__field(	u64,		start		)
 		__field(	u64,		len		)
 		__field(	u64,		reserved	)
@@ -1485,14 +1479,14 @@
 
 	TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
 		__entry->rootid		= BTRFS_I(inode)->root->objectid;
-		__entry->ino		= inode->i_ino;
+		__entry->ino		= btrfs_ino(BTRFS_I(inode));
 		__entry->start		= start;
 		__entry->len		= len;
 		__entry->reserved	= reserved;
 		__entry->op		= op;
 	),
 
-	TP_printk_btrfs("root=%llu ino=%lu start=%llu len=%llu reserved=%llu op=%s",
+	TP_printk_btrfs("root=%llu ino=%llu start=%llu len=%llu reserved=%llu op=%s",
 		  __entry->rootid, __entry->ino, __entry->start, __entry->len,
 		  __entry->reserved,
 		  __print_flags((unsigned long)__entry->op, "",
@@ -1584,12 +1578,14 @@
 
 TRACE_EVENT(btrfs_qgroup_account_extent,
 
-	TP_PROTO(const struct btrfs_fs_info *fs_info, u64 bytenr,
+	TP_PROTO(const struct btrfs_fs_info *fs_info, u64 transid, u64 bytenr,
 		 u64 num_bytes, u64 nr_old_roots, u64 nr_new_roots),
 
-	TP_ARGS(fs_info, bytenr, num_bytes, nr_old_roots, nr_new_roots),
+	TP_ARGS(fs_info, transid, bytenr, num_bytes, nr_old_roots,
+		nr_new_roots),
 
 	TP_STRUCT__entry_btrfs(
+		__field(	u64,  transid			)
 		__field(	u64,  bytenr			)
 		__field(	u64,  num_bytes			)
 		__field(	u64,  nr_old_roots		)
@@ -1597,43 +1593,49 @@
 	),
 
 	TP_fast_assign_btrfs(fs_info,
+		__entry->transid	= transid;
 		__entry->bytenr		= bytenr;
 		__entry->num_bytes	= num_bytes;
 		__entry->nr_old_roots	= nr_old_roots;
 		__entry->nr_new_roots	= nr_new_roots;
 	),
 
-	TP_printk_btrfs("bytenr=%llu num_bytes=%llu nr_old_roots=%llu "
-		  "nr_new_roots=%llu",
-		  __entry->bytenr,
-		  __entry->num_bytes,
-		  __entry->nr_old_roots,
-		  __entry->nr_new_roots)
+	TP_printk_btrfs(
+"transid=%llu bytenr=%llu num_bytes=%llu nr_old_roots=%llu nr_new_roots=%llu",
+		__entry->transid,
+		__entry->bytenr,
+		__entry->num_bytes,
+		__entry->nr_old_roots,
+		__entry->nr_new_roots)
 );
 
 TRACE_EVENT(qgroup_update_counters,
 
-	TP_PROTO(const struct btrfs_fs_info *fs_info, u64 qgid,
+	TP_PROTO(const struct btrfs_fs_info *fs_info,
+		 struct btrfs_qgroup *qgroup,
 		 u64 cur_old_count, u64 cur_new_count),
 
-	TP_ARGS(fs_info, qgid, cur_old_count, cur_new_count),
+	TP_ARGS(fs_info, qgroup, cur_old_count, cur_new_count),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	u64,  qgid			)
+		__field(	u64,  old_rfer			)
+		__field(	u64,  old_excl			)
 		__field(	u64,  cur_old_count		)
 		__field(	u64,  cur_new_count		)
 	),
 
 	TP_fast_assign_btrfs(fs_info,
-		__entry->qgid		= qgid;
+		__entry->qgid		= qgroup->qgroupid;
+		__entry->old_rfer	= qgroup->rfer;
+		__entry->old_excl	= qgroup->excl;
 		__entry->cur_old_count	= cur_old_count;
 		__entry->cur_new_count	= cur_new_count;
 	),
 
-	TP_printk_btrfs("qgid=%llu cur_old_count=%llu cur_new_count=%llu",
-		  __entry->qgid,
-		  __entry->cur_old_count,
-		  __entry->cur_new_count)
+	TP_printk_btrfs("qgid=%llu old_rfer=%llu old_excl=%llu cur_old_count=%llu cur_new_count=%llu",
+		  __entry->qgid, __entry->old_rfer, __entry->old_excl,
+		  __entry->cur_old_count, __entry->cur_new_count)
 );
 
 TRACE_EVENT(qgroup_update_reserve,
@@ -1765,14 +1767,14 @@
 	),
 
 	TP_printk_btrfs("root_id=%llu key=[%llu,%u,%llu] level=%d count=[%d+%d=%d] parent=%llu wanted_disk_byte=%llu nodes=%llu",
-			(unsigned long long)__entry->root_id,
-			(unsigned long long)__entry->objectid, __entry->type,
-			(unsigned long long)__entry->offset, __entry->level,
+			__entry->root_id,
+			__entry->objectid, __entry->type,
+			__entry->offset, __entry->level,
 			__entry->old_count, __entry->mod_count,
 			__entry->old_count + __entry->mod_count,
-			(unsigned long long)__entry->parent,
-			(unsigned long long)__entry->bytenr,
-			(unsigned long long)__entry->tree_size)
+			__entry->parent,
+			__entry->bytenr,
+			__entry->tree_size)
 );
 
 DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_merge,
@@ -1808,8 +1810,51 @@
 
 	TP_printk_btrfs("root=%llu(%s) ino=%llu mod=%d",
 			show_root_type(__entry->root_objectid),
-			(unsigned long long)__entry->ino, __entry->mod)
+			__entry->ino, __entry->mod)
 );
+
+DECLARE_EVENT_CLASS(btrfs__block_group,
+	TP_PROTO(const struct btrfs_block_group_cache *bg_cache),
+
+	TP_ARGS(bg_cache),
+
+	TP_STRUCT__entry_btrfs(
+		__field(	u64,	bytenr		)
+		__field(	u64,	len		)
+		__field(	u64,	used		)
+		__field(	u64,	flags		)
+	),
+
+	TP_fast_assign_btrfs(bg_cache->fs_info,
+		__entry->bytenr = bg_cache->key.objectid,
+		__entry->len	= bg_cache->key.offset,
+		__entry->used	= btrfs_block_group_used(&bg_cache->item);
+		__entry->flags	= bg_cache->flags;
+	),
+
+	TP_printk_btrfs("bg bytenr=%llu len=%llu used=%llu flags=%llu(%s)",
+		__entry->bytenr, __entry->len, __entry->used, __entry->flags,
+		__print_flags(__entry->flags, "|", BTRFS_GROUP_FLAGS))
+);
+
+DEFINE_EVENT(btrfs__block_group, btrfs_remove_block_group,
+	TP_PROTO(const struct btrfs_block_group_cache *bg_cache),
+
+	TP_ARGS(bg_cache)
+);
+
+DEFINE_EVENT(btrfs__block_group, btrfs_add_unused_block_group,
+	TP_PROTO(const struct btrfs_block_group_cache *bg_cache),
+
+	TP_ARGS(bg_cache)
+);
+
+DEFINE_EVENT(btrfs__block_group, btrfs_skip_unused_block_group,
+	TP_PROTO(const struct btrfs_block_group_cache *bg_cache),
+
+	TP_ARGS(bg_cache)
+);
+
 #endif /* _TRACE_BTRFS_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/host1x.h b/include/trace/events/host1x.h
index 6311636..a37ef73 100644
--- a/include/trace/events/host1x.h
+++ b/include/trace/events/host1x.h
@@ -115,16 +115,15 @@
 );
 
 TRACE_EVENT(host1x_channel_submit,
-	TP_PROTO(const char *name, u32 cmdbufs, u32 relocs, u32 waitchks,
-			u32 syncpt_id, u32 syncpt_incrs),
+	TP_PROTO(const char *name, u32 cmdbufs, u32 relocs, u32 syncpt_id,
+		 u32 syncpt_incrs),
 
-	TP_ARGS(name, cmdbufs, relocs, waitchks, syncpt_id, syncpt_incrs),
+	TP_ARGS(name, cmdbufs, relocs, syncpt_id, syncpt_incrs),
 
 	TP_STRUCT__entry(
 		__field(const char *, name)
 		__field(u32, cmdbufs)
 		__field(u32, relocs)
-		__field(u32, waitchks)
 		__field(u32, syncpt_id)
 		__field(u32, syncpt_incrs)
 	),
@@ -133,15 +132,14 @@
 		__entry->name = name;
 		__entry->cmdbufs = cmdbufs;
 		__entry->relocs = relocs;
-		__entry->waitchks = waitchks;
 		__entry->syncpt_id = syncpt_id;
 		__entry->syncpt_incrs = syncpt_incrs;
 	),
 
-	TP_printk("name=%s, cmdbufs=%u, relocs=%u, waitchks=%d,"
-		"syncpt_id=%u, syncpt_incrs=%u",
-	  __entry->name, __entry->cmdbufs, __entry->relocs, __entry->waitchks,
-	  __entry->syncpt_id, __entry->syncpt_incrs)
+	TP_printk("name=%s, cmdbufs=%u, relocs=%u, syncpt_id=%u, "
+		  "syncpt_incrs=%u",
+		  __entry->name, __entry->cmdbufs, __entry->relocs,
+		  __entry->syncpt_id, __entry->syncpt_incrs)
 );
 
 TRACE_EVENT(host1x_channel_submitted,
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d8c3329..5936aac 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -84,20 +84,21 @@
 );
 
 /*
- * Tracepoint for future grace-period events, including those for no-callbacks
- * CPUs.  The caller should pull the data from the rcu_node structure,
- * other than rcuname, which comes from the rcu_state structure, and event,
- * which is one of the following:
+ * Tracepoint for future grace-period events.  The caller should pull
+ * the data from the rcu_node structure, other than rcuname, which comes
+ * from the rcu_state structure, and event, which is one of the following:
  *
- * "Startleaf": Request a nocb grace period based on leaf-node data.
+ * "Startleaf": Request a grace period based on leaf-node data.
+ * "Prestarted": Someone beat us to the request
  * "Startedleaf": Leaf-node start proved sufficient.
  * "Startedleafroot": Leaf-node start proved sufficient after checking root.
  * "Startedroot": Requested a nocb grace period based on root-node data.
+ * "NoGPkthread": The RCU grace-period kthread has not yet started.
  * "StartWait": Start waiting for the requested grace period.
  * "ResumeWait": Resume waiting after signal.
  * "EndWait": Complete wait.
  * "Cleanup": Clean up rcu_node structure after previous GP.
- * "CleanupMore": Clean up, and another no-CB GP is needed.
+ * "CleanupMore": Clean up, and another GP is needed.
  */
 TRACE_EVENT(rcu_future_grace_period,
 
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index bfda803..4ecdfe2 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -422,6 +422,7 @@
 	do {								\
 		char *type_str = #type"["__stringify(len)"]";		\
 		BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);			\
+		BUILD_BUG_ON(len <= 0);					\
 		ret = trace_define_field(event_call, type_str, #item,	\
 				 offsetof(typeof(field), item),		\
 				 sizeof(field.item),			\
diff --git a/include/uapi/asm-generic/msgbuf.h b/include/uapi/asm-generic/msgbuf.h
index fb306eb..9fe4881 100644
--- a/include/uapi/asm-generic/msgbuf.h
+++ b/include/uapi/asm-generic/msgbuf.h
@@ -18,31 +18,30 @@
  * On big-endian systems, the padding is in the wrong place.
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
  */
 
 struct msqid64_ds {
 	struct ipc64_perm msg_perm;
+#if __BITS_PER_LONG == 64
 	__kernel_time_t msg_stime;	/* last msgsnd time */
-#if __BITS_PER_LONG != 64
-	unsigned long	__unused1;
-#endif
 	__kernel_time_t msg_rtime;	/* last msgrcv time */
-#if __BITS_PER_LONG != 64
-	unsigned long	__unused2;
-#endif
 	__kernel_time_t msg_ctime;	/* last change time */
-#if __BITS_PER_LONG != 64
-	unsigned long	__unused3;
+#else
+	unsigned long	msg_stime;	/* last msgsnd time */
+	unsigned long	msg_stime_high;
+	unsigned long	msg_rtime;	/* last msgrcv time */
+	unsigned long	msg_rtime_high;
+	unsigned long	msg_ctime;	/* last change time */
+	unsigned long	msg_ctime_high;
 #endif
-	__kernel_ulong_t msg_cbytes;	/* current number of bytes on queue */
-	__kernel_ulong_t msg_qnum;	/* number of messages in queue */
-	__kernel_ulong_t msg_qbytes;	/* max number of bytes on queue */
+	unsigned long	msg_cbytes;	/* current number of bytes on queue */
+	unsigned long	msg_qnum;	/* number of messages in queue */
+	unsigned long	 msg_qbytes;	/* max number of bytes on queue */
 	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
 	__kernel_pid_t msg_lrpid;	/* last receive pid */
-	__kernel_ulong_t __unused4;
-	__kernel_ulong_t __unused5;
+	unsigned long	 __unused4;
+	unsigned long	 __unused5;
 };
 
 #endif /* __ASM_GENERIC_MSGBUF_H */
diff --git a/include/uapi/asm-generic/posix_types.h b/include/uapi/asm-generic/posix_types.h
index 5e6ea22..f0733a2 100644
--- a/include/uapi/asm-generic/posix_types.h
+++ b/include/uapi/asm-generic/posix_types.h
@@ -87,6 +87,7 @@
 typedef __kernel_long_t	__kernel_off_t;
 typedef long long	__kernel_loff_t;
 typedef __kernel_long_t	__kernel_time_t;
+typedef long long __kernel_time64_t;
 typedef __kernel_long_t	__kernel_clock_t;
 typedef int		__kernel_timer_t;
 typedef int		__kernel_clockid_t;
diff --git a/include/uapi/asm-generic/sembuf.h b/include/uapi/asm-generic/sembuf.h
index cbf9cfe..0bae010 100644
--- a/include/uapi/asm-generic/sembuf.h
+++ b/include/uapi/asm-generic/sembuf.h
@@ -13,23 +13,29 @@
  * everyone just ended up making identical copies without specific
  * optimizations, so we may just as well all use the same one.
  *
- * 64 bit architectures typically define a 64 bit __kernel_time_t,
+ * 64 bit architectures use a 64-bit __kernel_time_t here, while
+ * 32 bit architectures have a pair of unsigned long values.
  * so they do not need the first two padding words.
- * On big-endian systems, the padding is in the wrong place.
  *
- * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
+ * On big-endian systems, the padding is in the wrong place for
+ * historic reasons, so user space has to reconstruct a time_t
+ * value using
+ *
+ * user_semid_ds.sem_otime = kernel_semid64_ds.sem_otime +
+ *		((long long)kernel_semid64_ds.sem_otime_high << 32)
+ *
+ * Pad space is left for 2 miscellaneous 32-bit values
  */
 struct semid64_ds {
 	struct ipc64_perm sem_perm;	/* permissions .. see ipc.h */
+#if __BITS_PER_LONG == 64
 	__kernel_time_t	sem_otime;	/* last semop time */
-#if __BITS_PER_LONG != 64
-	unsigned long	__unused1;
-#endif
 	__kernel_time_t	sem_ctime;	/* last change time */
-#if __BITS_PER_LONG != 64
-	unsigned long	__unused2;
+#else
+	unsigned long	sem_otime;	/* last semop time */
+	unsigned long	sem_otime_high;
+	unsigned long	sem_ctime;	/* last change time */
+	unsigned long	sem_ctime_high;
 #endif
 	unsigned long	sem_nsems;	/* no. of semaphores in array */
 	unsigned long	__unused3;
diff --git a/include/uapi/asm-generic/shmbuf.h b/include/uapi/asm-generic/shmbuf.h
index 2b6c3bb9..e504422 100644
--- a/include/uapi/asm-generic/shmbuf.h
+++ b/include/uapi/asm-generic/shmbuf.h
@@ -19,42 +19,41 @@
  *
  *
  * Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
  * - 2 miscellaneous 32-bit values
  */
 
 struct shmid64_ds {
 	struct ipc64_perm	shm_perm;	/* operation perms */
 	size_t			shm_segsz;	/* size of segment (bytes) */
+#if __BITS_PER_LONG == 64
 	__kernel_time_t		shm_atime;	/* last attach time */
-#if __BITS_PER_LONG != 64
-	unsigned long		__unused1;
-#endif
 	__kernel_time_t		shm_dtime;	/* last detach time */
-#if __BITS_PER_LONG != 64
-	unsigned long		__unused2;
-#endif
 	__kernel_time_t		shm_ctime;	/* last change time */
-#if __BITS_PER_LONG != 64
-	unsigned long		__unused3;
+#else
+	unsigned long		shm_atime;	/* last attach time */
+	unsigned long		shm_atime_high;
+	unsigned long		shm_dtime;	/* last detach time */
+	unsigned long		shm_dtime_high;
+	unsigned long		shm_ctime;	/* last change time */
+	unsigned long		shm_ctime_high;
 #endif
 	__kernel_pid_t		shm_cpid;	/* pid of creator */
 	__kernel_pid_t		shm_lpid;	/* pid of last operator */
-	__kernel_ulong_t	shm_nattch;	/* no. of current attaches */
-	__kernel_ulong_t	__unused4;
-	__kernel_ulong_t	__unused5;
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused4;
+	unsigned long		__unused5;
 };
 
 struct shminfo64 {
-	__kernel_ulong_t	shmmax;
-	__kernel_ulong_t	shmmin;
-	__kernel_ulong_t	shmmni;
-	__kernel_ulong_t	shmseg;
-	__kernel_ulong_t	shmall;
-	__kernel_ulong_t	__unused1;
-	__kernel_ulong_t	__unused2;
-	__kernel_ulong_t	__unused3;
-	__kernel_ulong_t	__unused4;
+	unsigned long		shmmax;
+	unsigned long		shmmin;
+	unsigned long		shmmni;
+	unsigned long		shmseg;
+	unsigned long		shmall;
+	unsigned long		__unused1;
+	unsigned long		__unused2;
+	unsigned long		__unused3;
+	unsigned long		__unused4;
 };
 
 #endif /* __ASM_GENERIC_SHMBUF_H */
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index 558b902..80e2a72 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -249,7 +249,8 @@
 #define TRAP_TRACE	2	/* process trace trap */
 #define TRAP_BRANCH     3	/* process taken branch trap */
 #define TRAP_HWBKPT     4	/* hardware breakpoint/watchpoint */
-#define NSIGTRAP	4
+#define TRAP_UNK	5	/* undiagnosed trap */
+#define NSIGTRAP	5
 
 /*
  * There is an additional set of SIGTRAP si_codes used by ptrace
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 8bcb186..4299067 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -732,9 +732,11 @@
 __SYSCALL(__NR_pkey_free,     sys_pkey_free)
 #define __NR_statx 291
 __SYSCALL(__NR_statx,     sys_statx)
+#define __NR_io_pgetevents 292
+__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
 
 #undef __NR_syscalls
-#define __NR_syscalls 292
+#define __NR_syscalls 293
 
 /*
  * 32 bit systems traditionally used different
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index c363b67..78b4dd8 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -78,6 +78,12 @@
 #define AMDGPU_GEM_DOMAIN_GDS		0x8
 #define AMDGPU_GEM_DOMAIN_GWS		0x10
 #define AMDGPU_GEM_DOMAIN_OA		0x20
+#define AMDGPU_GEM_DOMAIN_MASK		(AMDGPU_GEM_DOMAIN_CPU | \
+					 AMDGPU_GEM_DOMAIN_GTT | \
+					 AMDGPU_GEM_DOMAIN_VRAM | \
+					 AMDGPU_GEM_DOMAIN_GDS | \
+					 AMDGPU_GEM_DOMAIN_GWS | \
+					 AMDGPU_GEM_DOMAIN_OA)
 
 /* Flag that CPU access will be required for the case of VRAM domain */
 #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED	(1 << 0)
@@ -95,6 +101,10 @@
 #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID	(1 << 6)
 /* Flag that BO sharing will be explicitly synchronized */
 #define AMDGPU_GEM_CREATE_EXPLICIT_SYNC		(1 << 7)
+/* Flag that indicates allocating MQD gart on GFX9, where the mtype
+ * for the second page onward should be set to NC.
+ */
+#define AMDGPU_GEM_CREATE_MQD_GFX9		(1 << 8)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
@@ -520,6 +530,10 @@
 /* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */
 #define AMDGPU_IB_FLAG_PREEMPT (1<<2)
 
+/* The IB fence should do the L2 writeback but not invalidate any shader
+ * caches (L2/vL1/sL1/I$). */
+#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
+
 struct drm_amdgpu_cs_chunk_ib {
 	__u32 _pad;
 	/** AMDGPU_IB_FLAG_* */
@@ -620,6 +634,12 @@
 	#define AMDGPU_INFO_FW_ASD		0x0d
 	/* Subquery id: Query VCN firmware version */
 	#define AMDGPU_INFO_FW_VCN		0x0e
+	/* Subquery id: Query GFX RLC SRLC firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f
+	/* Subquery id: Query GFX RLC SRLG firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10
+	/* Subquery id: Query GFX RLC SRLS firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11
 /* number of bytes moved for TTM migration */
 #define AMDGPU_INFO_NUM_BYTES_MOVED		0x0f
 /* the used VRAM size */
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 6fdff59..9c660e1 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -680,6 +680,13 @@
  */
 #define DRM_CLIENT_CAP_ATOMIC	3
 
+/**
+ * DRM_CLIENT_CAP_ASPECT_RATIO
+ *
+ * If set to 1, the DRM core will provide aspect ratio information in modes.
+ */
+#define DRM_CLIENT_CAP_ASPECT_RATIO    4
+
 /** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
 struct drm_set_client_cap {
 	__u64 capability;
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 50bcf42..4b3a1bb 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -93,6 +93,8 @@
 #define DRM_MODE_PICTURE_ASPECT_NONE		0
 #define DRM_MODE_PICTURE_ASPECT_4_3		1
 #define DRM_MODE_PICTURE_ASPECT_16_9		2
+#define DRM_MODE_PICTURE_ASPECT_64_27		3
+#define DRM_MODE_PICTURE_ASPECT_256_135		4
 
 /* Aspect ratio flag bitmask (4 bits 22:19) */
 #define DRM_MODE_FLAG_PIC_AR_MASK		(0x0F<<19)
@@ -102,6 +104,10 @@
 			(DRM_MODE_PICTURE_ASPECT_4_3<<19)
 #define  DRM_MODE_FLAG_PIC_AR_16_9 \
 			(DRM_MODE_PICTURE_ASPECT_16_9<<19)
+#define  DRM_MODE_FLAG_PIC_AR_64_27 \
+			(DRM_MODE_PICTURE_ASPECT_64_27<<19)
+#define  DRM_MODE_FLAG_PIC_AR_256_135 \
+			(DRM_MODE_PICTURE_ASPECT_256_135<<19)
 
 #define  DRM_MODE_FLAG_ALL	(DRM_MODE_FLAG_PHSYNC |		\
 				 DRM_MODE_FLAG_NHSYNC |		\
diff --git a/include/uapi/drm/exynos_drm.h b/include/uapi/drm/exynos_drm.h
index 4a54305..3e59b83 100644
--- a/include/uapi/drm/exynos_drm.h
+++ b/include/uapi/drm/exynos_drm.h
@@ -135,6 +135,219 @@
 	__u64					async;
 };
 
+/* Exynos DRM IPP v2 API */
+
+/**
+ * Enumerate available IPP hardware modules.
+ *
+ * @count_ipps: size of ipp_id array / number of ipp modules (set by driver)
+ * @reserved: padding
+ * @ipp_id_ptr: pointer to ipp_id array or NULL
+ */
+struct drm_exynos_ioctl_ipp_get_res {
+	__u32 count_ipps;
+	__u32 reserved;
+	__u64 ipp_id_ptr;
+};
+
+enum drm_exynos_ipp_format_type {
+	DRM_EXYNOS_IPP_FORMAT_SOURCE		= 0x01,
+	DRM_EXYNOS_IPP_FORMAT_DESTINATION	= 0x02,
+};
+
+struct drm_exynos_ipp_format {
+	__u32 fourcc;
+	__u32 type;
+	__u64 modifier;
+};
+
+enum drm_exynos_ipp_capability {
+	DRM_EXYNOS_IPP_CAP_CROP		= 0x01,
+	DRM_EXYNOS_IPP_CAP_ROTATE	= 0x02,
+	DRM_EXYNOS_IPP_CAP_SCALE	= 0x04,
+	DRM_EXYNOS_IPP_CAP_CONVERT	= 0x08,
+};
+
+/**
+ * Get IPP hardware capabilities and supported image formats.
+ *
+ * @ipp_id: id of IPP module to query
+ * @capabilities: bitmask of drm_exynos_ipp_capability (set by driver)
+ * @reserved: padding
+ * @formats_count: size of formats array (in entries) / number of filled
+ *		   formats (set by driver)
+ * @formats_ptr: pointer to formats array or NULL
+ */
+struct drm_exynos_ioctl_ipp_get_caps {
+	__u32 ipp_id;
+	__u32 capabilities;
+	__u32 reserved;
+	__u32 formats_count;
+	__u64 formats_ptr;
+};
+
+enum drm_exynos_ipp_limit_type {
+	/* size (horizontal/vertial) limits, in pixels (min, max, alignment) */
+	DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE		= 0x0001,
+	/* scale ratio (horizonta/vertial), 16.16 fixed point (min, max) */
+	DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE		= 0x0002,
+
+	/* image buffer area */
+	DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER	= 0x0001 << 16,
+	/* src/dst rectangle area */
+	DRM_EXYNOS_IPP_LIMIT_SIZE_AREA		= 0x0002 << 16,
+	/* src/dst rectangle area when rotation enabled */
+	DRM_EXYNOS_IPP_LIMIT_SIZE_ROTATED	= 0x0003 << 16,
+
+	DRM_EXYNOS_IPP_LIMIT_TYPE_MASK		= 0x000f,
+	DRM_EXYNOS_IPP_LIMIT_SIZE_MASK		= 0x000f << 16,
+};
+
+struct drm_exynos_ipp_limit_val {
+	__u32 min;
+	__u32 max;
+	__u32 align;
+	__u32 reserved;
+};
+
+/**
+ * IPP module limitation.
+ *
+ * @type: limit type (see drm_exynos_ipp_limit_type enum)
+ * @reserved: padding
+ * @h: horizontal limits
+ * @v: vertical limits
+ */
+struct drm_exynos_ipp_limit {
+	__u32 type;
+	__u32 reserved;
+	struct drm_exynos_ipp_limit_val h;
+	struct drm_exynos_ipp_limit_val v;
+};
+
+/**
+ * Get IPP limits for given image format.
+ *
+ * @ipp_id: id of IPP module to query
+ * @fourcc: image format code (see DRM_FORMAT_* in drm_fourcc.h)
+ * @modifier: image format modifier (see DRM_FORMAT_MOD_* in drm_fourcc.h)
+ * @type: source/destination identifier (drm_exynos_ipp_format_flag enum)
+ * @limits_count: size of limits array (in entries) / number of filled entries
+ *		 (set by driver)
+ * @limits_ptr: pointer to limits array or NULL
+ */
+struct drm_exynos_ioctl_ipp_get_limits {
+	__u32 ipp_id;
+	__u32 fourcc;
+	__u64 modifier;
+	__u32 type;
+	__u32 limits_count;
+	__u64 limits_ptr;
+};
+
+enum drm_exynos_ipp_task_id {
+	/* buffer described by struct drm_exynos_ipp_task_buffer */
+	DRM_EXYNOS_IPP_TASK_BUFFER		= 0x0001,
+	/* rectangle described by struct drm_exynos_ipp_task_rect */
+	DRM_EXYNOS_IPP_TASK_RECTANGLE		= 0x0002,
+	/* transformation described by struct drm_exynos_ipp_task_transform */
+	DRM_EXYNOS_IPP_TASK_TRANSFORM		= 0x0003,
+	/* alpha configuration described by struct drm_exynos_ipp_task_alpha */
+	DRM_EXYNOS_IPP_TASK_ALPHA		= 0x0004,
+
+	/* source image data (for buffer and rectangle chunks) */
+	DRM_EXYNOS_IPP_TASK_TYPE_SOURCE		= 0x0001 << 16,
+	/* destination image data (for buffer and rectangle chunks) */
+	DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION	= 0x0002 << 16,
+};
+
+/**
+ * Memory buffer with image data.
+ *
+ * @id: must be DRM_EXYNOS_IPP_TASK_BUFFER
+ * other parameters are same as for AddFB2 generic DRM ioctl
+ */
+struct drm_exynos_ipp_task_buffer {
+	__u32	id;
+	__u32	fourcc;
+	__u32	width, height;
+	__u32	gem_id[4];
+	__u32	offset[4];
+	__u32	pitch[4];
+	__u64	modifier;
+};
+
+/**
+ * Rectangle for processing.
+ *
+ * @id: must be DRM_EXYNOS_IPP_TASK_RECTANGLE
+ * @reserved: padding
+ * @x,@y: left corner in pixels
+ * @w,@h: width/height in pixels
+ */
+struct drm_exynos_ipp_task_rect {
+	__u32	id;
+	__u32	reserved;
+	__u32	x;
+	__u32	y;
+	__u32	w;
+	__u32	h;
+};
+
+/**
+ * Image tranformation description.
+ *
+ * @id: must be DRM_EXYNOS_IPP_TASK_TRANSFORM
+ * @rotation: DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* values
+ */
+struct drm_exynos_ipp_task_transform {
+	__u32	id;
+	__u32	rotation;
+};
+
+/**
+ * Image global alpha configuration for formats without alpha values.
+ *
+ * @id: must be DRM_EXYNOS_IPP_TASK_ALPHA
+ * @value: global alpha value (0-255)
+ */
+struct drm_exynos_ipp_task_alpha {
+	__u32	id;
+	__u32	value;
+};
+
+enum drm_exynos_ipp_flag {
+	/* generate DRM event after processing */
+	DRM_EXYNOS_IPP_FLAG_EVENT	= 0x01,
+	/* dry run, only check task parameters */
+	DRM_EXYNOS_IPP_FLAG_TEST_ONLY	= 0x02,
+	/* non-blocking processing */
+	DRM_EXYNOS_IPP_FLAG_NONBLOCK	= 0x04,
+};
+
+#define DRM_EXYNOS_IPP_FLAGS (DRM_EXYNOS_IPP_FLAG_EVENT |\
+		DRM_EXYNOS_IPP_FLAG_TEST_ONLY | DRM_EXYNOS_IPP_FLAG_NONBLOCK)
+
+/**
+ * Perform image processing described by array of drm_exynos_ipp_task_*
+ * structures (parameters array).
+ *
+ * @ipp_id: id of IPP module to run the task
+ * @flags: bitmask of drm_exynos_ipp_flag values
+ * @reserved: padding
+ * @params_size: size of parameters array (in bytes)
+ * @params_ptr: pointer to parameters array or NULL
+ * @user_data: (optional) data for drm event
+ */
+struct drm_exynos_ioctl_ipp_commit {
+	__u32 ipp_id;
+	__u32 flags;
+	__u32 reserved;
+	__u32 params_size;
+	__u64 params_ptr;
+	__u64 user_data;
+};
+
 #define DRM_EXYNOS_GEM_CREATE		0x00
 #define DRM_EXYNOS_GEM_MAP		0x01
 /* Reserved 0x03 ~ 0x05 for exynos specific gem ioctl */
@@ -147,6 +360,11 @@
 #define DRM_EXYNOS_G2D_EXEC		0x22
 
 /* Reserved 0x30 ~ 0x33 for obsolete Exynos IPP ioctls */
+/* IPP - Image Post Processing */
+#define DRM_EXYNOS_IPP_GET_RESOURCES	0x40
+#define DRM_EXYNOS_IPP_GET_CAPS		0x41
+#define DRM_EXYNOS_IPP_GET_LIMITS	0x42
+#define DRM_EXYNOS_IPP_COMMIT		0x43
 
 #define DRM_IOCTL_EXYNOS_GEM_CREATE		DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_GEM_CREATE, struct drm_exynos_gem_create)
@@ -165,8 +383,20 @@
 #define DRM_IOCTL_EXYNOS_G2D_EXEC		DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_G2D_EXEC, struct drm_exynos_g2d_exec)
 
+#define DRM_IOCTL_EXYNOS_IPP_GET_RESOURCES	DRM_IOWR(DRM_COMMAND_BASE + \
+		DRM_EXYNOS_IPP_GET_RESOURCES, \
+		struct drm_exynos_ioctl_ipp_get_res)
+#define DRM_IOCTL_EXYNOS_IPP_GET_CAPS		DRM_IOWR(DRM_COMMAND_BASE + \
+		DRM_EXYNOS_IPP_GET_CAPS, struct drm_exynos_ioctl_ipp_get_caps)
+#define DRM_IOCTL_EXYNOS_IPP_GET_LIMITS		DRM_IOWR(DRM_COMMAND_BASE + \
+		DRM_EXYNOS_IPP_GET_LIMITS, \
+		struct drm_exynos_ioctl_ipp_get_limits)
+#define DRM_IOCTL_EXYNOS_IPP_COMMIT		DRM_IOWR(DRM_COMMAND_BASE + \
+		DRM_EXYNOS_IPP_COMMIT, struct drm_exynos_ioctl_ipp_commit)
+
 /* EXYNOS specific events */
 #define DRM_EXYNOS_G2D_EVENT		0x80000000
+#define DRM_EXYNOS_IPP_EVENT		0x80000002
 
 struct drm_exynos_g2d_event {
 	struct drm_event	base;
@@ -177,6 +407,16 @@
 	__u32			reserved;
 };
 
+struct drm_exynos_ipp_event {
+	struct drm_event	base;
+	__u64			user_data;
+	__u32			tv_sec;
+	__u32			tv_usec;
+	__u32			ipp_id;
+	__u32			sequence;
+	__u64			reserved;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/uapi/drm/tegra_drm.h b/include/uapi/drm/tegra_drm.h
index d954f8c..c4df3c3 100644
--- a/include/uapi/drm/tegra_drm.h
+++ b/include/uapi/drm/tegra_drm.h
@@ -32,143 +32,615 @@
 #define DRM_TEGRA_GEM_CREATE_TILED     (1 << 0)
 #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1)
 
+/**
+ * struct drm_tegra_gem_create - parameters for the GEM object creation IOCTL
+ */
 struct drm_tegra_gem_create {
+	/**
+	 * @size:
+	 *
+	 * The size, in bytes, of the buffer object to be created.
+	 */
 	__u64 size;
+
+	/**
+	 * @flags:
+	 *
+	 * A bitmask of flags that influence the creation of GEM objects:
+	 *
+	 * DRM_TEGRA_GEM_CREATE_TILED
+	 *   Use the 16x16 tiling format for this buffer.
+	 *
+	 * DRM_TEGRA_GEM_CREATE_BOTTOM_UP
+	 *   The buffer has a bottom-up layout.
+	 */
 	__u32 flags;
+
+	/**
+	 * @handle:
+	 *
+	 * The handle of the created GEM object. Set by the kernel upon
+	 * successful completion of the IOCTL.
+	 */
 	__u32 handle;
 };
 
+/**
+ * struct drm_tegra_gem_mmap - parameters for the GEM mmap IOCTL
+ */
 struct drm_tegra_gem_mmap {
+	/**
+	 * @handle:
+	 *
+	 * Handle of the GEM object to obtain an mmap offset for.
+	 */
 	__u32 handle;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
+
+	/**
+	 * @offset:
+	 *
+	 * The mmap offset for the given GEM object. Set by the kernel upon
+	 * successful completion of the IOCTL.
+	 */
 	__u64 offset;
 };
 
+/**
+ * struct drm_tegra_syncpt_read - parameters for the read syncpoint IOCTL
+ */
 struct drm_tegra_syncpt_read {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to read the current value from.
+	 */
 	__u32 id;
+
+	/**
+	 * @value:
+	 *
+	 * The current syncpoint value. Set by the kernel upon successful
+	 * completion of the IOCTL.
+	 */
 	__u32 value;
 };
 
+/**
+ * struct drm_tegra_syncpt_incr - parameters for the increment syncpoint IOCTL
+ */
 struct drm_tegra_syncpt_incr {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to increment.
+	 */
 	__u32 id;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
+/**
+ * struct drm_tegra_syncpt_wait - parameters for the wait syncpoint IOCTL
+ */
 struct drm_tegra_syncpt_wait {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to wait on.
+	 */
 	__u32 id;
+
+	/**
+	 * @thresh:
+	 *
+	 * Threshold value for which to wait.
+	 */
 	__u32 thresh;
+
+	/**
+	 * @timeout:
+	 *
+	 * Timeout, in milliseconds, to wait.
+	 */
 	__u32 timeout;
+
+	/**
+	 * @value:
+	 *
+	 * The new syncpoint value after the wait. Set by the kernel upon
+	 * successful completion of the IOCTL.
+	 */
 	__u32 value;
 };
 
 #define DRM_TEGRA_NO_TIMEOUT	(0xffffffff)
 
+/**
+ * struct drm_tegra_open_channel - parameters for the open channel IOCTL
+ */
 struct drm_tegra_open_channel {
+	/**
+	 * @client:
+	 *
+	 * The client ID for this channel.
+	 */
 	__u32 client;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
+
+	/**
+	 * @context:
+	 *
+	 * The application context of this channel. Set by the kernel upon
+	 * successful completion of the IOCTL. This context needs to be passed
+	 * to the DRM_TEGRA_CHANNEL_CLOSE or the DRM_TEGRA_SUBMIT IOCTLs.
+	 */
 	__u64 context;
 };
 
+/**
+ * struct drm_tegra_close_channel - parameters for the close channel IOCTL
+ */
 struct drm_tegra_close_channel {
+	/**
+	 * @context:
+	 *
+	 * The application context of this channel. This is obtained from the
+	 * DRM_TEGRA_OPEN_CHANNEL IOCTL.
+	 */
 	__u64 context;
 };
 
+/**
+ * struct drm_tegra_get_syncpt - parameters for the get syncpoint IOCTL
+ */
 struct drm_tegra_get_syncpt {
+	/**
+	 * @context:
+	 *
+	 * The application context identifying the channel for which to obtain
+	 * the syncpoint ID.
+	 */
 	__u64 context;
+
+	/**
+	 * @index:
+	 *
+	 * Index of the client syncpoint for which to obtain the ID.
+	 */
 	__u32 index;
+
+	/**
+	 * @id:
+	 *
+	 * The ID of the given syncpoint. Set by the kernel upon successful
+	 * completion of the IOCTL.
+	 */
 	__u32 id;
 };
 
+/**
+ * struct drm_tegra_get_syncpt_base - parameters for the get wait base IOCTL
+ */
 struct drm_tegra_get_syncpt_base {
+	/**
+	 * @context:
+	 *
+	 * The application context identifying for which channel to obtain the
+	 * wait base.
+	 */
 	__u64 context;
+
+	/**
+	 * @syncpt:
+	 *
+	 * ID of the syncpoint for which to obtain the wait base.
+	 */
 	__u32 syncpt;
+
+	/**
+	 * @id:
+	 *
+	 * The ID of the wait base corresponding to the client syncpoint. Set
+	 * by the kernel upon successful completion of the IOCTL.
+	 */
 	__u32 id;
 };
 
+/**
+ * struct drm_tegra_syncpt - syncpoint increment operation
+ */
 struct drm_tegra_syncpt {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to operate on.
+	 */
 	__u32 id;
+
+	/**
+	 * @incrs:
+	 *
+	 * Number of increments to perform for the syncpoint.
+	 */
 	__u32 incrs;
 };
 
+/**
+ * struct drm_tegra_cmdbuf - structure describing a command buffer
+ */
 struct drm_tegra_cmdbuf {
+	/**
+	 * @handle:
+	 *
+	 * Handle to a GEM object containing the command buffer.
+	 */
 	__u32 handle;
+
+	/**
+	 * @offset:
+	 *
+	 * Offset, in bytes, into the GEM object identified by @handle at
+	 * which the command buffer starts.
+	 */
 	__u32 offset;
+
+	/**
+	 * @words:
+	 *
+	 * Number of 32-bit words in this command buffer.
+	 */
 	__u32 words;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
+/**
+ * struct drm_tegra_reloc - GEM object relocation structure
+ */
 struct drm_tegra_reloc {
 	struct {
+		/**
+		 * @cmdbuf.handle:
+		 *
+		 * Handle to the GEM object containing the command buffer for
+		 * which to perform this GEM object relocation.
+		 */
 		__u32 handle;
+
+		/**
+		 * @cmdbuf.offset:
+		 *
+		 * Offset, in bytes, into the command buffer at which to
+		 * insert the relocated address.
+		 */
 		__u32 offset;
 	} cmdbuf;
 	struct {
+		/**
+		 * @target.handle:
+		 *
+		 * Handle to the GEM object to be relocated.
+		 */
 		__u32 handle;
+
+		/**
+		 * @target.offset:
+		 *
+		 * Offset, in bytes, into the target GEM object at which the
+		 * relocated data starts.
+		 */
 		__u32 offset;
 	} target;
+
+	/**
+	 * @shift:
+	 *
+	 * The number of bits by which to shift relocated addresses.
+	 */
 	__u32 shift;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
+/**
+ * struct drm_tegra_waitchk - wait check structure
+ */
 struct drm_tegra_waitchk {
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object containing a command stream on which to
+	 * perform the wait check.
+	 */
 	__u32 handle;
+
+	/**
+	 * @offset:
+	 *
+	 * Offset, in bytes, of the location in the command stream to perform
+	 * the wait check on.
+	 */
 	__u32 offset;
+
+	/**
+	 * @syncpt:
+	 *
+	 * ID of the syncpoint to wait check.
+	 */
 	__u32 syncpt;
+
+	/**
+	 * @thresh:
+	 *
+	 * Threshold value for which to check.
+	 */
 	__u32 thresh;
 };
 
+/**
+ * struct drm_tegra_submit - job submission structure
+ */
 struct drm_tegra_submit {
+	/**
+	 * @context:
+	 *
+	 * The application context identifying the channel to use for the
+	 * execution of this job.
+	 */
 	__u64 context;
-	__u32 num_syncpts;
-	__u32 num_cmdbufs;
-	__u32 num_relocs;
-	__u32 num_waitchks;
-	__u32 waitchk_mask;
-	__u32 timeout;
-	__u64 syncpts;
-	__u64 cmdbufs;
-	__u64 relocs;
-	__u64 waitchks;
-	__u32 fence;		/* Return value */
 
-	__u32 reserved[5];	/* future expansion */
+	/**
+	 * @num_syncpts:
+	 *
+	 * The number of syncpoints operated on by this job. This defines the
+	 * length of the array pointed to by @syncpts.
+	 */
+	__u32 num_syncpts;
+
+	/**
+	 * @num_cmdbufs:
+	 *
+	 * The number of command buffers to execute as part of this job. This
+	 * defines the length of the array pointed to by @cmdbufs.
+	 */
+	__u32 num_cmdbufs;
+
+	/**
+	 * @num_relocs:
+	 *
+	 * The number of relocations to perform before executing this job.
+	 * This defines the length of the array pointed to by @relocs.
+	 */
+	__u32 num_relocs;
+
+	/**
+	 * @num_waitchks:
+	 *
+	 * The number of wait checks to perform as part of this job. This
+	 * defines the length of the array pointed to by @waitchks.
+	 */
+	__u32 num_waitchks;
+
+	/**
+	 * @waitchk_mask:
+	 *
+	 * Bitmask of valid wait checks.
+	 */
+	__u32 waitchk_mask;
+
+	/**
+	 * @timeout:
+	 *
+	 * Timeout, in milliseconds, before this job is cancelled.
+	 */
+	__u32 timeout;
+
+	/**
+	 * @syncpts:
+	 *
+	 * A pointer to an array of &struct drm_tegra_syncpt structures that
+	 * specify the syncpoint operations performed as part of this job.
+	 * The number of elements in the array must be equal to the value
+	 * given by @num_syncpts.
+	 */
+	__u64 syncpts;
+
+	/**
+	 * @cmdbufs:
+	 *
+	 * A pointer to an array of &struct drm_tegra_cmdbuf structures that
+	 * define the command buffers to execute as part of this job. The
+	 * number of elements in the array must be equal to the value given
+	 * by @num_syncpts.
+	 */
+	__u64 cmdbufs;
+
+	/**
+	 * @relocs:
+	 *
+	 * A pointer to an array of &struct drm_tegra_reloc structures that
+	 * specify the relocations that need to be performed before executing
+	 * this job. The number of elements in the array must be equal to the
+	 * value given by @num_relocs.
+	 */
+	__u64 relocs;
+
+	/**
+	 * @waitchks:
+	 *
+	 * A pointer to an array of &struct drm_tegra_waitchk structures that
+	 * specify the wait checks to be performed while executing this job.
+	 * The number of elements in the array must be equal to the value
+	 * given by @num_waitchks.
+	 */
+	__u64 waitchks;
+
+	/**
+	 * @fence:
+	 *
+	 * The threshold of the syncpoint associated with this job after it
+	 * has been completed. Set by the kernel upon successful completion of
+	 * the IOCTL. This can be used with the DRM_TEGRA_SYNCPT_WAIT IOCTL to
+	 * wait for this job to be finished.
+	 */
+	__u32 fence;
+
+	/**
+	 * @reserved:
+	 *
+	 * This field is reserved for future use. Must be 0.
+	 */
+	__u32 reserved[5];
 };
 
 #define DRM_TEGRA_GEM_TILING_MODE_PITCH 0
 #define DRM_TEGRA_GEM_TILING_MODE_TILED 1
 #define DRM_TEGRA_GEM_TILING_MODE_BLOCK 2
 
+/**
+ * struct drm_tegra_gem_set_tiling - parameters for the set tiling IOCTL
+ */
 struct drm_tegra_gem_set_tiling {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to set the tiling parameters.
+	 */
 	__u32 handle;
+
+	/**
+	 * @mode:
+	 *
+	 * The tiling mode to set. Must be one of:
+	 *
+	 * DRM_TEGRA_GEM_TILING_MODE_PITCH
+	 *   pitch linear format
+	 *
+	 * DRM_TEGRA_GEM_TILING_MODE_TILED
+	 *   16x16 tiling format
+	 *
+	 * DRM_TEGRA_GEM_TILING_MODE_BLOCK
+	 *   16Bx2 tiling format
+	 */
 	__u32 mode;
+
+	/**
+	 * @value:
+	 *
+	 * The value to set for the tiling mode parameter.
+	 */
 	__u32 value;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
+/**
+ * struct drm_tegra_gem_get_tiling - parameters for the get tiling IOCTL
+ */
 struct drm_tegra_gem_get_tiling {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to query the tiling parameters.
+	 */
 	__u32 handle;
-	/* output */
+
+	/**
+	 * @mode:
+	 *
+	 * The tiling mode currently associated with the GEM object. Set by
+	 * the kernel upon successful completion of the IOCTL.
+	 */
 	__u32 mode;
+
+	/**
+	 * @value:
+	 *
+	 * The tiling mode parameter currently associated with the GEM object.
+	 * Set by the kernel upon successful completion of the IOCTL.
+	 */
 	__u32 value;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
 #define DRM_TEGRA_GEM_BOTTOM_UP		(1 << 0)
 #define DRM_TEGRA_GEM_FLAGS		(DRM_TEGRA_GEM_BOTTOM_UP)
 
+/**
+ * struct drm_tegra_gem_set_flags - parameters for the set flags IOCTL
+ */
 struct drm_tegra_gem_set_flags {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to set the flags.
+	 */
 	__u32 handle;
-	/* output */
+
+	/**
+	 * @flags:
+	 *
+	 * The flags to set for the GEM object.
+	 */
 	__u32 flags;
 };
 
+/**
+ * struct drm_tegra_gem_get_flags - parameters for the get flags IOCTL
+ */
 struct drm_tegra_gem_get_flags {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to query the flags.
+	 */
 	__u32 handle;
-	/* output */
+
+	/**
+	 * @flags:
+	 *
+	 * The flags currently associated with the GEM object. Set by the
+	 * kernel upon successful completion of the IOCTL.
+	 */
 	__u32 flags;
 };
 
@@ -193,7 +665,7 @@
 #define DRM_IOCTL_TEGRA_SYNCPT_INCR DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_INCR, struct drm_tegra_syncpt_incr)
 #define DRM_IOCTL_TEGRA_SYNCPT_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_WAIT, struct drm_tegra_syncpt_wait)
 #define DRM_IOCTL_TEGRA_OPEN_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_OPEN_CHANNEL, struct drm_tegra_open_channel)
-#define DRM_IOCTL_TEGRA_CLOSE_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_CLOSE_CHANNEL, struct drm_tegra_open_channel)
+#define DRM_IOCTL_TEGRA_CLOSE_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_CLOSE_CHANNEL, struct drm_tegra_close_channel)
 #define DRM_IOCTL_TEGRA_GET_SYNCPT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT, struct drm_tegra_get_syncpt)
 #define DRM_IOCTL_TEGRA_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SUBMIT, struct drm_tegra_submit)
 #define DRM_IOCTL_TEGRA_GET_SYNCPT_BASE DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT_BASE, struct drm_tegra_get_syncpt_base)
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
new file mode 100644
index 0000000..7b66277
--- /dev/null
+++ b/include/uapi/drm/v3d_drm.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright © 2014-2018 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _V3D_DRM_H_
+#define _V3D_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_V3D_SUBMIT_CL                         0x00
+#define DRM_V3D_WAIT_BO                           0x01
+#define DRM_V3D_CREATE_BO                         0x02
+#define DRM_V3D_MMAP_BO                           0x03
+#define DRM_V3D_GET_PARAM                         0x04
+#define DRM_V3D_GET_BO_OFFSET                     0x05
+
+#define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
+#define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
+#define DRM_IOCTL_V3D_CREATE_BO           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_CREATE_BO, struct drm_v3d_create_bo)
+#define DRM_IOCTL_V3D_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo)
+#define DRM_IOCTL_V3D_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
+#define DRM_IOCTL_V3D_GET_BO_OFFSET       DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
+
+/**
+ * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
+ * engine.
+ *
+ * This asks the kernel to have the GPU execute an optional binner
+ * command list, and a render command list.
+ */
+struct drm_v3d_submit_cl {
+	/* Pointer to the binner command list.
+	 *
+	 * This is the first set of commands executed, which runs the
+	 * coordinate shader to determine where primitives land on the screen,
+	 * then writes out the state updates and draw calls necessary per tile
+	 * to the tile allocation BO.
+	 */
+	__u32 bcl_start;
+
+	 /** End address of the BCL (first byte after the BCL) */
+	__u32 bcl_end;
+
+	/* Offset of the render command list.
+	 *
+	 * This is the second set of commands executed, which will either
+	 * execute the tiles that have been set up by the BCL, or a fixed set
+	 * of tiles (in the case of RCL-only blits).
+	 */
+	__u32 rcl_start;
+
+	 /** End address of the RCL (first byte after the RCL) */
+	__u32 rcl_end;
+
+	/** An optional sync object to wait on before starting the BCL. */
+	__u32 in_sync_bcl;
+	/** An optional sync object to wait on before starting the RCL. */
+	__u32 in_sync_rcl;
+	/** An optional sync object to place the completion fence in. */
+	__u32 out_sync;
+
+	/* Offset of the tile alloc memory
+	 *
+	 * This is optional on V3D 3.3 (where the CL can set the value) but
+	 * required on V3D 4.1.
+	 */
+	__u32 qma;
+
+	/** Size of the tile alloc memory. */
+	__u32 qms;
+
+	/** Offset of the tile state data array. */
+	__u32 qts;
+
+	/* Pointer to a u32 array of the BOs that are referenced by the job.
+	 */
+	__u64 bo_handles;
+
+	/* Number of BO handles passed in (size is that times 4). */
+	__u32 bo_handle_count;
+
+	/* Pad, must be zero-filled. */
+	__u32 pad;
+};
+
+/**
+ * struct drm_v3d_wait_bo - ioctl argument for waiting for
+ * completion of the last DRM_V3D_SUBMIT_CL on a BO.
+ *
+ * This is useful for cases where multiple processes might be
+ * rendering to a BO and you want to wait for all rendering to be
+ * completed.
+ */
+struct drm_v3d_wait_bo {
+	__u32 handle;
+	__u32 pad;
+	__u64 timeout_ns;
+};
+
+/**
+ * struct drm_v3d_create_bo - ioctl argument for creating V3D BOs.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_v3d_create_bo {
+	__u32 size;
+	__u32 flags;
+	/** Returned GEM handle for the BO. */
+	__u32 handle;
+	/**
+	 * Returned offset for the BO in the V3D address space.  This offset
+	 * is private to the DRM fd and is valid for the lifetime of the GEM
+	 * handle.
+	 *
+	 * This offset value will always be nonzero, since various HW
+	 * units treat 0 specially.
+	 */
+	__u32 offset;
+};
+
+/**
+ * struct drm_v3d_mmap_bo - ioctl argument for mapping V3D BOs.
+ *
+ * This doesn't actually perform an mmap.  Instead, it returns the
+ * offset you need to use in an mmap on the DRM device node.  This
+ * means that tools like valgrind end up knowing about the mapped
+ * memory.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_v3d_mmap_bo {
+	/** Handle for the object being mapped. */
+	__u32 handle;
+	__u32 flags;
+	/** offset into the drm node to use for subsequent mmap call. */
+	__u64 offset;
+};
+
+enum drm_v3d_param {
+	DRM_V3D_PARAM_V3D_UIFCFG,
+	DRM_V3D_PARAM_V3D_HUB_IDENT1,
+	DRM_V3D_PARAM_V3D_HUB_IDENT2,
+	DRM_V3D_PARAM_V3D_HUB_IDENT3,
+	DRM_V3D_PARAM_V3D_CORE0_IDENT0,
+	DRM_V3D_PARAM_V3D_CORE0_IDENT1,
+	DRM_V3D_PARAM_V3D_CORE0_IDENT2,
+};
+
+struct drm_v3d_get_param {
+	__u32 param;
+	__u32 pad;
+	__u64 value;
+};
+
+/**
+ * Returns the offset for the BO in the V3D address space for this DRM fd.
+ * This is the same value returned by drm_v3d_create_bo, if that was called
+ * from this DRM fd.
+ */
+struct drm_v3d_get_bo_offset {
+	__u32 handle;
+	__u32 offset;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _V3D_DRM_H_ */
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index b95a0e1..2cac627 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -183,10 +183,17 @@
 	/* ID of the perfmon to attach to this job. 0 means no perfmon. */
 	__u32 perfmonid;
 
-	/* Unused field to align this struct on 64 bits. Must be set to 0.
-	 * If one ever needs to add an u32 field to this struct, this field
-	 * can be used.
+	/* Syncobj handle to wait on. If set, processing of this render job
+	 * will not start until the syncobj is signaled. 0 means ignore.
 	 */
+	__u32 in_sync;
+
+	/* Syncobj handle to export fence to. If set, the fence in the syncobj
+	 * will be replaced with a fence that signals upon completion of this
+	 * render job. 0 means ignore.
+	 */
+	__u32 out_sync;
+
 	__u32 pad2;
 };
 
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index a04adbc..ed01859 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -29,6 +29,7 @@
 
 #include <linux/types.h>
 #include <linux/fs.h>
+#include <linux/signal.h>
 #include <asm/byteorder.h>
 
 typedef __kernel_ulong_t aio_context_t;
@@ -38,10 +39,8 @@
 	IOCB_CMD_PWRITE = 1,
 	IOCB_CMD_FSYNC = 2,
 	IOCB_CMD_FDSYNC = 3,
-	/* These two are experimental.
-	 * IOCB_CMD_PREADX = 4,
-	 * IOCB_CMD_POLL = 5,
-	 */
+	/* 4 was the experimental IOCB_CMD_PREADX */
+	IOCB_CMD_POLL = 5,
 	IOCB_CMD_NOOP = 6,
 	IOCB_CMD_PREADV = 7,
 	IOCB_CMD_PWRITEV = 8,
@@ -108,5 +107,10 @@
 #undef IFBIG
 #undef IFLITTLE
 
+struct __aio_sigset {
+	sigset_t __user	*sigmask;
+	size_t		sigsetsize;
+};
+
 #endif /* __LINUX__AIO_ABI_H */
 
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..04f9bd2 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -465,6 +465,7 @@
 };
 
 #define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_SID_UNSET ((unsigned int)-1)
 
 /* audit_rule_data supports filter rules with both integer and string
  * fields.  It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index c8d99b9..5ca1d21 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -422,6 +422,21 @@
 	char name[BTRFS_INO_LOOKUP_PATH_MAX];
 };
 
+#define BTRFS_INO_LOOKUP_USER_PATH_MAX (4080 - BTRFS_VOL_NAME_MAX - 1)
+struct btrfs_ioctl_ino_lookup_user_args {
+	/* in, inode number containing the subvolume of 'subvolid' */
+	__u64 dirid;
+	/* in */
+	__u64 treeid;
+	/* out, name of the subvolume of 'treeid' */
+	char name[BTRFS_VOL_NAME_MAX + 1];
+	/*
+	 * out, constructed path from the directory with which the ioctl is
+	 * called to dirid
+	 */
+	char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
+};
+
 /* Search criteria for the btrfs SEARCH ioctl family. */
 struct btrfs_ioctl_search_key {
 	/*
@@ -725,6 +740,82 @@
 	__u64 reserved[4];		/* in */
 };
 
+/*
+ * Information about a fs tree root.
+ *
+ * All items are filled by the ioctl
+ */
+struct btrfs_ioctl_get_subvol_info_args {
+	/* Id of this subvolume */
+	__u64 treeid;
+
+	/* Name of this subvolume, used to get the real name at mount point */
+	char name[BTRFS_VOL_NAME_MAX + 1];
+
+	/*
+	 * Id of the subvolume which contains this subvolume.
+	 * Zero for top-level subvolume or a deleted subvolume.
+	 */
+	__u64 parent_id;
+
+	/*
+	 * Inode number of the directory which contains this subvolume.
+	 * Zero for top-level subvolume or a deleted subvolume
+	 */
+	__u64 dirid;
+
+	/* Latest transaction id of this subvolume */
+	__u64 generation;
+
+	/* Flags of this subvolume */
+	__u64 flags;
+
+	/* UUID of this subvolume */
+	__u8 uuid[BTRFS_UUID_SIZE];
+
+	/*
+	 * UUID of the subvolume of which this subvolume is a snapshot.
+	 * All zero for a non-snapshot subvolume.
+	 */
+	__u8 parent_uuid[BTRFS_UUID_SIZE];
+
+	/*
+	 * UUID of the subvolume from which this subvolume was received.
+	 * All zero for non-received subvolume.
+	 */
+	__u8 received_uuid[BTRFS_UUID_SIZE];
+
+	/* Transaction id indicating when change/create/send/receive happened */
+	__u64 ctransid;
+	__u64 otransid;
+	__u64 stransid;
+	__u64 rtransid;
+	/* Time corresponding to c/o/s/rtransid */
+	struct btrfs_ioctl_timespec ctime;
+	struct btrfs_ioctl_timespec otime;
+	struct btrfs_ioctl_timespec stime;
+	struct btrfs_ioctl_timespec rtime;
+
+	/* Must be zero */
+	__u64 reserved[8];
+};
+
+#define BTRFS_MAX_ROOTREF_BUFFER_NUM 255
+struct btrfs_ioctl_get_subvol_rootref_args {
+		/* in/out, minimum id of rootref's treeid to be searched */
+		__u64 min_treeid;
+
+		/* out */
+		struct {
+			__u64 treeid;
+			__u64 dirid;
+		} rootref[BTRFS_MAX_ROOTREF_BUFFER_NUM];
+
+		/* out, number of found items */
+		__u8 num_items;
+		__u8 align[7];
+};
+
 /* Error codes as returned by the kernel */
 enum btrfs_err_code {
 	BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@@ -843,5 +934,11 @@
 				   struct btrfs_ioctl_vol_args_v2)
 #define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \
 					struct btrfs_ioctl_logical_ino_args)
+#define BTRFS_IOC_GET_SUBVOL_INFO _IOR(BTRFS_IOCTL_MAGIC, 60, \
+				struct btrfs_ioctl_get_subvol_info_args)
+#define BTRFS_IOC_GET_SUBVOL_ROOTREF _IOWR(BTRFS_IOCTL_MAGIC, 61, \
+				struct btrfs_ioctl_get_subvol_rootref_args)
+#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \
+				struct btrfs_ioctl_ino_lookup_user_args)
 
 #endif /* _UAPI_LINUX_BTRFS_H */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index d2a8313..73e0191 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -242,6 +242,8 @@
 #define FICLONERANGE	_IOW(0x94, 13, struct file_clone_range)
 #define FIDEDUPERANGE	_IOWR(0x94, 54, struct file_dedupe_range)
 
+#define FSLABEL_MAX 256	/* Max chars for the interface; each fs may differ */
+
 #define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
 #define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
 #define	FS_IOC_GETVERSION		_IOR('v', 1, long)
@@ -251,8 +253,10 @@
 #define FS_IOC32_SETFLAGS		_IOW('f', 2, int)
 #define FS_IOC32_GETVERSION		_IOR('v', 1, int)
 #define FS_IOC32_SETVERSION		_IOW('v', 2, int)
-#define FS_IOC_FSGETXATTR		_IOR ('X', 31, struct fsxattr)
-#define FS_IOC_FSSETXATTR		_IOW ('X', 32, struct fsxattr)
+#define FS_IOC_FSGETXATTR		_IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR		_IOW('X', 32, struct fsxattr)
+#define FS_IOC_GETFSLABEL		_IOR(0x94, 49, char[FSLABEL_MAX])
+#define FS_IOC_SETFSLABEL		_IOW(0x94, 50, char[FSLABEL_MAX])
 
 /*
  * File system encryption support
@@ -275,6 +279,8 @@
 #define FS_ENCRYPTION_MODE_AES_256_CTS		4
 #define FS_ENCRYPTION_MODE_AES_128_CBC		5
 #define FS_ENCRYPTION_MODE_AES_128_CTS		6
+#define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7
+#define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8
 
 struct fscrypt_policy {
 	__u8 version;
diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h
index 9008f31..ac8c60b 100644
--- a/include/uapi/linux/psp-sev.h
+++ b/include/uapi/linux/psp-sev.h
@@ -30,6 +30,7 @@
 	SEV_PDH_GEN,
 	SEV_PDH_CERT_EXPORT,
 	SEV_PEK_CERT_IMPORT,
+	SEV_GET_ID,
 
 	SEV_MAX,
 };
@@ -124,6 +125,17 @@
 } __packed;
 
 /**
+ * struct sev_user_data_get_id - GET_ID command parameters
+ *
+ * @socket1: Buffer to pass unique ID of first socket
+ * @socket2: Buffer to pass unique ID of second socket
+ */
+struct sev_user_data_get_id {
+	__u8 socket1[64];			/* Out */
+	__u8 socket2[64];			/* Out */
+} __packed;
+
+/**
  * struct sev_issue_cmd - SEV ioctl parameters
  *
  * @cmd: SEV commands to execute
diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h
index 6f0da42..83429a0 100644
--- a/include/uapi/linux/signalfd.h
+++ b/include/uapi/linux/signalfd.h
@@ -35,6 +35,10 @@
 	__u64 ssi_stime;
 	__u64 ssi_addr;
 	__u16 ssi_addr_lsb;
+	__u16 __pad2;
+	__s32 ssi_syscall;
+	__u64 ssi_call_addr;
+	__u32 ssi_arch;
 
 	/*
 	 * Pad strcture to 128 bytes. Remember to update the
@@ -45,7 +49,7 @@
 	 * comes out of a read(2) and we really don't want to have
 	 * a compat on read(2).
 	 */
-	__u8 __pad[46];
+	__u8 __pad[28];
 };
 
 
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index 4c0338e..fcf9366 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -42,6 +42,13 @@
 	struct timeval it_value;	/* current value */
 };
 
+#ifndef __kernel_timespec
+struct __kernel_timespec {
+	__kernel_time64_t       tv_sec;                 /* seconds */
+	long long               tv_nsec;                /* nanoseconds */
+};
+#endif
+
 /*
  * legacy timeval structure, only embedded in structures that
  * traditionally used 'timeval' to pass time intervals (not absolute
diff --git a/include/uapi/linux/tty_flags.h b/include/uapi/linux/tty_flags.h
index 6ac609a..900a32e 100644
--- a/include/uapi/linux/tty_flags.h
+++ b/include/uapi/linux/tty_flags.h
@@ -13,7 +13,7 @@
  */
 #define ASYNCB_HUP_NOTIFY	 0 /* Notify getty on hangups and closes
 				    * on the callout port */
-#define ASYNCB_FOURPORT		 1 /* Set OU1, OUT2 per AST Fourport settings */
+#define ASYNCB_FOURPORT		 1 /* Set OUT1, OUT2 per AST Fourport settings */
 #define ASYNCB_SAK		 2 /* Secure Attention Key (Orange book) */
 #define ASYNCB_SPLIT_TERMIOS	 3 /* [x] Separate termios for dialin/callout */
 #define ASYNCB_SPD_HI		 4 /* Use 57600 instead of 38400 bps */
diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h
index cd4f0b8..2fce8b6 100644
--- a/include/uapi/linux/types.h
+++ b/include/uapi/linux/types.h
@@ -49,11 +49,7 @@
 #define __aligned_be64 __be64 __attribute__((aligned(8)))
 #define __aligned_le64 __le64 __attribute__((aligned(8)))
 
-#ifdef __CHECK_POLL
 typedef unsigned __bitwise __poll_t;
-#else
-typedef unsigned __poll_t;
-#endif
 
 #endif /*  __ASSEMBLY__ */
 #endif /* _UAPI_LINUX_TYPES_H */
diff --git a/include/uapi/linux/usb/audio.h b/include/uapi/linux/usb/audio.h
index 3a78e71..13d98e6 100644
--- a/include/uapi/linux/usb/audio.h
+++ b/include/uapi/linux/usb/audio.h
@@ -285,9 +285,22 @@
 static inline __u8 *uac_mixer_unit_bmControls(struct uac_mixer_unit_descriptor *desc,
 					      int protocol)
 {
-	return (protocol == UAC_VERSION_1) ?
-		&desc->baSourceID[desc->bNrInPins + 4] :
-		&desc->baSourceID[desc->bNrInPins + 6];
+	switch (protocol) {
+	case UAC_VERSION_1:
+		return &desc->baSourceID[desc->bNrInPins + 4];
+	case UAC_VERSION_2:
+		return &desc->baSourceID[desc->bNrInPins + 6];
+	case UAC_VERSION_3:
+		return &desc->baSourceID[desc->bNrInPins + 2];
+	default:
+		return NULL;
+	}
+}
+
+static inline __u16 uac3_mixer_unit_wClusterDescrID(struct uac_mixer_unit_descriptor *desc)
+{
+	return (desc->baSourceID[desc->bNrInPins + 1] << 8) |
+		desc->baSourceID[desc->bNrInPins];
 }
 
 static inline __u8 uac_mixer_unit_iMixer(struct uac_mixer_unit_descriptor *desc)
diff --git a/include/uapi/linux/usb/ch11.h b/include/uapi/linux/usb/ch11.h
index 29c120c..fb0cd24 100644
--- a/include/uapi/linux/usb/ch11.h
+++ b/include/uapi/linux/usb/ch11.h
@@ -197,6 +197,11 @@
 #define USB_EXT_PORT_STAT_RX_LANES	0x00000f00
 #define USB_EXT_PORT_STAT_TX_LANES	0x0000f000
 
+#define USB_EXT_PORT_RX_LANES(p) \
+			(((p) & USB_EXT_PORT_STAT_RX_LANES) >> 8)
+#define USB_EXT_PORT_TX_LANES(p) \
+			(((p) & USB_EXT_PORT_STAT_TX_LANES) >> 12)
+
 /*
  * wHubCharacteristics (masks)
  * See USB 2.0 spec Table 11-13, offset 3
diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 4b04ead..f43c3c6 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -260,6 +260,7 @@
 };
 
 #define VIRTIO_GPU_CAPSET_VIRGL 1
+#define VIRTIO_GPU_CAPSET_VIRGL2 2
 
 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */
 struct virtio_gpu_get_capset_info {
diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h
index 69c37ec..a74ca23 100644
--- a/include/uapi/sound/asoc.h
+++ b/include/uapi/sound/asoc.h
@@ -139,6 +139,15 @@
 #define SND_SOC_TPLG_DAI_FLGBIT_SYMMETRIC_CHANNELS      (1 << 1)
 #define SND_SOC_TPLG_DAI_FLGBIT_SYMMETRIC_SAMPLEBITS    (1 << 2)
 
+/* DAI clock gating */
+#define SND_SOC_TPLG_DAI_CLK_GATE_UNDEFINED	0
+#define SND_SOC_TPLG_DAI_CLK_GATE_GATED	1
+#define SND_SOC_TPLG_DAI_CLK_GATE_CONT		2
+
+/* DAI mclk_direction */
+#define SND_SOC_TPLG_MCLK_CO            0 /* for codec, mclk is output */
+#define SND_SOC_TPLG_MCLK_CI            1 /* for codec, mclk is input */
+
 /* DAI physical PCM data formats.
  * Add new formats to the end of the list.
  */
@@ -160,6 +169,18 @@
 #define SND_SOC_TPLG_LNK_FLGBIT_SYMMETRIC_SAMPLEBITS    (1 << 2)
 #define SND_SOC_TPLG_LNK_FLGBIT_VOICE_WAKEUP            (1 << 3)
 
+/* DAI topology BCLK parameter
+ * For the backwards capability, by default codec is bclk master
+ */
+#define SND_SOC_TPLG_BCLK_CM         0 /* codec is bclk master */
+#define SND_SOC_TPLG_BCLK_CS         1 /* codec is bclk slave */
+
+/* DAI topology FSYNC parameter
+ * For the backwards capability, by default codec is fsync master
+ */
+#define SND_SOC_TPLG_FSYNC_CM         0 /* codec is fsync master */
+#define SND_SOC_TPLG_FSYNC_CS         1 /* codec is fsync slave */
+
 /*
  * Block Header.
  * This header precedes all object and object arrays below.
@@ -312,12 +333,12 @@
 	__le32 size;            /* in bytes of this structure */
 	__le32 id;		/* unique ID - - used to match */
 	__le32 fmt;		/* SND_SOC_DAI_FORMAT_ format value */
-	__u8 clock_gated;	/* 1 if clock can be gated to save power */
+	__u8 clock_gated;	/* SND_SOC_TPLG_DAI_CLK_GATE_ value */
 	__u8 invert_bclk;	/* 1 for inverted BCLK, 0 for normal */
 	__u8 invert_fsync;	/* 1 for inverted frame clock, 0 for normal */
-	__u8 bclk_master;	/* 1 for master of BCLK, 0 for slave */
-	__u8 fsync_master;	/* 1 for master of FSYNC, 0 for slave */
-	__u8 mclk_direction;    /* 0 for input, 1 for output */
+	__u8 bclk_master;	/* SND_SOC_TPLG_BCLK_ value */
+	__u8 fsync_master;	/* SND_SOC_TPLG_FSYNC_ value */
+	__u8 mclk_direction;    /* SND_SOC_TPLG_MCLK_ value */
 	__le16 reserved;	/* for 32bit alignment */
 	__le32 mclk_rate;	/* MCLK or SYSCLK freqency in Hz */
 	__le32 bclk_rate;	/* BCLK freqency in Hz */
@@ -552,4 +573,61 @@
 	__le32 flags;           /* SND_SOC_TPLG_DAI_FLGBIT_* */
 	struct snd_soc_tplg_private priv;
 } __attribute__((packed));
+
+/*
+ * Old version of ABI structs, supported for backward compatibility.
+ */
+
+/* Manifest v4 */
+struct snd_soc_tplg_manifest_v4 {
+	__le32 size;		/* in bytes of this structure */
+	__le32 control_elems;	/* number of control elements */
+	__le32 widget_elems;	/* number of widget elements */
+	__le32 graph_elems;	/* number of graph elements */
+	__le32 pcm_elems;	/* number of PCM elements */
+	__le32 dai_link_elems;	/* number of DAI link elements */
+	struct snd_soc_tplg_private priv;
+} __packed;
+
+/* Stream Capabilities v4 */
+struct snd_soc_tplg_stream_caps_v4 {
+	__le32 size;		/* in bytes of this structure */
+	char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
+	__le64 formats;	/* supported formats SNDRV_PCM_FMTBIT_* */
+	__le32 rates;		/* supported rates SNDRV_PCM_RATE_* */
+	__le32 rate_min;	/* min rate */
+	__le32 rate_max;	/* max rate */
+	__le32 channels_min;	/* min channels */
+	__le32 channels_max;	/* max channels */
+	__le32 periods_min;	/* min number of periods */
+	__le32 periods_max;	/* max number of periods */
+	__le32 period_size_min;	/* min period size bytes */
+	__le32 period_size_max;	/* max period size bytes */
+	__le32 buffer_size_min;	/* min buffer size bytes */
+	__le32 buffer_size_max;	/* max buffer size bytes */
+} __packed;
+
+/* PCM v4 */
+struct snd_soc_tplg_pcm_v4 {
+	__le32 size;		/* in bytes of this structure */
+	char pcm_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
+	char dai_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
+	__le32 pcm_id;		/* unique ID - used to match with DAI link */
+	__le32 dai_id;		/* unique ID - used to match */
+	__le32 playback;	/* supports playback mode */
+	__le32 capture;		/* supports capture mode */
+	__le32 compress;	/* 1 = compressed; 0 = PCM */
+	struct snd_soc_tplg_stream stream[SND_SOC_TPLG_STREAM_CONFIG_MAX]; /* for DAI link */
+	__le32 num_streams;	/* number of streams */
+	struct snd_soc_tplg_stream_caps_v4 caps[2]; /* playback and capture for DAI */
+} __packed;
+
+/* Physical link config v4 */
+struct snd_soc_tplg_link_config_v4 {
+	__le32 size;            /* in bytes of this structure */
+	__le32 id;              /* unique ID - used to match */
+	struct snd_soc_tplg_stream stream[SND_SOC_TPLG_STREAM_CONFIG_MAX]; /* supported configs playback and captrure */
+	__le32 num_streams;     /* number of streams */
+} __packed;
+
 #endif
diff --git a/include/uapi/sound/skl-tplg-interface.h b/include/uapi/sound/skl-tplg-interface.h
new file mode 100644
index 0000000..f58cafa
--- /dev/null
+++ b/include/uapi/sound/skl-tplg-interface.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * skl-tplg-interface.h - Intel DSP FW private data interface
+ *
+ * Copyright (C) 2015 Intel Corp
+ * Author: Jeeja KP <jeeja.kp@intel.com>
+ *	    Nilofer, Samreen <samreen.nilofer@intel.com>
+ */
+
+#ifndef __HDA_TPLG_INTERFACE_H__
+#define __HDA_TPLG_INTERFACE_H__
+
+/*
+ * Default types range from 0~12. type can range from 0 to 0xff
+ * SST types start at higher to avoid any overlapping in future
+ */
+#define SKL_CONTROL_TYPE_BYTE_TLV	0x100
+#define SKL_CONTROL_TYPE_MIC_SELECT	0x102
+
+#define HDA_SST_CFG_MAX	900 /* size of copier cfg*/
+#define MAX_IN_QUEUE 8
+#define MAX_OUT_QUEUE 8
+
+#define SKL_UUID_STR_SZ 40
+/* Event types goes here */
+/* Reserve event type 0 for no event handlers */
+enum skl_event_types {
+	SKL_EVENT_NONE = 0,
+	SKL_MIXER_EVENT,
+	SKL_MUX_EVENT,
+	SKL_VMIXER_EVENT,
+	SKL_PGA_EVENT
+};
+
+/**
+ * enum skl_ch_cfg - channel configuration
+ *
+ * @SKL_CH_CFG_MONO:	One channel only
+ * @SKL_CH_CFG_STEREO:	L & R
+ * @SKL_CH_CFG_2_1:	L, R & LFE
+ * @SKL_CH_CFG_3_0:	L, C & R
+ * @SKL_CH_CFG_3_1:	L, C, R & LFE
+ * @SKL_CH_CFG_QUATRO:	L, R, Ls & Rs
+ * @SKL_CH_CFG_4_0:	L, C, R & Cs
+ * @SKL_CH_CFG_5_0:	L, C, R, Ls & Rs
+ * @SKL_CH_CFG_5_1:	L, C, R, Ls, Rs & LFE
+ * @SKL_CH_CFG_DUAL_MONO: One channel replicated in two
+ * @SKL_CH_CFG_I2S_DUAL_STEREO_0: Stereo(L,R) in 4 slots, 1st stream:[ L, R, -, - ]
+ * @SKL_CH_CFG_I2S_DUAL_STEREO_1: Stereo(L,R) in 4 slots, 2nd stream:[ -, -, L, R ]
+ * @SKL_CH_CFG_INVALID:	Invalid
+ */
+enum skl_ch_cfg {
+	SKL_CH_CFG_MONO = 0,
+	SKL_CH_CFG_STEREO = 1,
+	SKL_CH_CFG_2_1 = 2,
+	SKL_CH_CFG_3_0 = 3,
+	SKL_CH_CFG_3_1 = 4,
+	SKL_CH_CFG_QUATRO = 5,
+	SKL_CH_CFG_4_0 = 6,
+	SKL_CH_CFG_5_0 = 7,
+	SKL_CH_CFG_5_1 = 8,
+	SKL_CH_CFG_DUAL_MONO = 9,
+	SKL_CH_CFG_I2S_DUAL_STEREO_0 = 10,
+	SKL_CH_CFG_I2S_DUAL_STEREO_1 = 11,
+	SKL_CH_CFG_4_CHANNEL = 12,
+	SKL_CH_CFG_INVALID
+};
+
+enum skl_module_type {
+	SKL_MODULE_TYPE_MIXER = 0,
+	SKL_MODULE_TYPE_COPIER,
+	SKL_MODULE_TYPE_UPDWMIX,
+	SKL_MODULE_TYPE_SRCINT,
+	SKL_MODULE_TYPE_ALGO,
+	SKL_MODULE_TYPE_BASE_OUTFMT,
+	SKL_MODULE_TYPE_KPB,
+	SKL_MODULE_TYPE_MIC_SELECT,
+};
+
+enum skl_core_affinity {
+	SKL_AFFINITY_CORE_0 = 0,
+	SKL_AFFINITY_CORE_1,
+	SKL_AFFINITY_CORE_MAX
+};
+
+enum skl_pipe_conn_type {
+	SKL_PIPE_CONN_TYPE_NONE = 0,
+	SKL_PIPE_CONN_TYPE_FE,
+	SKL_PIPE_CONN_TYPE_BE
+};
+
+enum skl_hw_conn_type {
+	SKL_CONN_NONE = 0,
+	SKL_CONN_SOURCE = 1,
+	SKL_CONN_SINK = 2
+};
+
+enum skl_dev_type {
+	SKL_DEVICE_BT = 0x0,
+	SKL_DEVICE_DMIC = 0x1,
+	SKL_DEVICE_I2S = 0x2,
+	SKL_DEVICE_SLIMBUS = 0x3,
+	SKL_DEVICE_HDALINK = 0x4,
+	SKL_DEVICE_HDAHOST = 0x5,
+	SKL_DEVICE_NONE
+};
+
+/**
+ * enum skl_interleaving - interleaving style
+ *
+ * @SKL_INTERLEAVING_PER_CHANNEL: [s1_ch1...s1_chN,...,sM_ch1...sM_chN]
+ * @SKL_INTERLEAVING_PER_SAMPLE: [s1_ch1...sM_ch1,...,s1_chN...sM_chN]
+ */
+enum skl_interleaving {
+	SKL_INTERLEAVING_PER_CHANNEL = 0,
+	SKL_INTERLEAVING_PER_SAMPLE = 1,
+};
+
+enum skl_sample_type {
+	SKL_SAMPLE_TYPE_INT_MSB = 0,
+	SKL_SAMPLE_TYPE_INT_LSB = 1,
+	SKL_SAMPLE_TYPE_INT_SIGNED = 2,
+	SKL_SAMPLE_TYPE_INT_UNSIGNED = 3,
+	SKL_SAMPLE_TYPE_FLOAT = 4
+};
+
+enum module_pin_type {
+	/* All pins of the module takes same PCM inputs or outputs
+	* e.g. mixout
+	*/
+	SKL_PIN_TYPE_HOMOGENEOUS,
+	/* All pins of the module takes different PCM inputs or outputs
+	* e.g mux
+	*/
+	SKL_PIN_TYPE_HETEROGENEOUS,
+};
+
+enum skl_module_param_type {
+	SKL_PARAM_DEFAULT = 0,
+	SKL_PARAM_INIT,
+	SKL_PARAM_SET,
+	SKL_PARAM_BIND
+};
+
+struct skl_dfw_algo_data {
+	u32 set_params:2;
+	u32 rsvd:30;
+	u32 param_id;
+	u32 max;
+	char params[0];
+} __packed;
+
+enum skl_tkn_dir {
+	SKL_DIR_IN,
+	SKL_DIR_OUT
+};
+
+enum skl_tuple_type {
+	SKL_TYPE_TUPLE,
+	SKL_TYPE_DATA
+};
+
+/* v4 configuration data */
+
+struct skl_dfw_v4_module_pin {
+	u16 module_id;
+	u16 instance_id;
+} __packed;
+
+struct skl_dfw_v4_module_fmt {
+	u32 channels;
+	u32 freq;
+	u32 bit_depth;
+	u32 valid_bit_depth;
+	u32 ch_cfg;
+	u32 interleaving_style;
+	u32 sample_type;
+	u32 ch_map;
+} __packed;
+
+struct skl_dfw_v4_module_caps {
+	u32 set_params:2;
+	u32 rsvd:30;
+	u32 param_id;
+	u32 caps_size;
+	u32 caps[HDA_SST_CFG_MAX];
+} __packed;
+
+struct skl_dfw_v4_pipe {
+	u8 pipe_id;
+	u8 pipe_priority;
+	u16 conn_type:4;
+	u16 rsvd:4;
+	u16 memory_pages:8;
+} __packed;
+
+struct skl_dfw_v4_module {
+	char uuid[SKL_UUID_STR_SZ];
+
+	u16 module_id;
+	u16 instance_id;
+	u32 max_mcps;
+	u32 mem_pages;
+	u32 obs;
+	u32 ibs;
+	u32 vbus_id;
+
+	u32 max_in_queue:8;
+	u32 max_out_queue:8;
+	u32 time_slot:8;
+	u32 core_id:4;
+	u32 rsvd1:4;
+
+	u32 module_type:8;
+	u32 conn_type:4;
+	u32 dev_type:4;
+	u32 hw_conn_type:4;
+	u32 rsvd2:12;
+
+	u32 params_fixup:8;
+	u32 converter:8;
+	u32 input_pin_type:1;
+	u32 output_pin_type:1;
+	u32 is_dynamic_in_pin:1;
+	u32 is_dynamic_out_pin:1;
+	u32 is_loadable:1;
+	u32 rsvd3:11;
+
+	struct skl_dfw_v4_pipe pipe;
+	struct skl_dfw_v4_module_fmt in_fmt[MAX_IN_QUEUE];
+	struct skl_dfw_v4_module_fmt out_fmt[MAX_OUT_QUEUE];
+	struct skl_dfw_v4_module_pin in_pin[MAX_IN_QUEUE];
+	struct skl_dfw_v4_module_pin out_pin[MAX_OUT_QUEUE];
+	struct skl_dfw_v4_module_caps caps;
+} __packed;
+
+#endif
diff --git a/include/uapi/sound/tlv.h b/include/uapi/sound/tlv.h
index be5371f..7d6d65f 100644
--- a/include/uapi/sound/tlv.h
+++ b/include/uapi/sound/tlv.h
@@ -42,6 +42,10 @@
 #define SNDRV_CTL_TLVD_LENGTH(...) \
 	((unsigned int)sizeof((const unsigned int[]) { __VA_ARGS__ }))
 
+/* Accessor offsets for TLV data items */
+#define SNDRV_CTL_TLVO_TYPE		0
+#define SNDRV_CTL_TLVO_LEN		1
+
 #define SNDRV_CTL_TLVD_CONTAINER_ITEM(...) \
 	SNDRV_CTL_TLVD_ITEM(SNDRV_CTL_TLVT_CONTAINER, __VA_ARGS__)
 #define SNDRV_CTL_TLVD_DECLARE_CONTAINER(name, ...) \
@@ -61,6 +65,10 @@
 		SNDRV_CTL_TLVD_DB_SCALE_ITEM(min, step, mute) \
 	}
 
+/* Accessor offsets for min, mute and step items in dB scale type TLV */
+#define SNDRV_CTL_TLVO_DB_SCALE_MIN		2
+#define SNDRV_CTL_TLVO_DB_SCALE_MUTE_AND_STEP	3
+
 /* dB scale specified with min/max values instead of step */
 #define SNDRV_CTL_TLVD_DB_MINMAX_ITEM(min_dB, max_dB) \
 	SNDRV_CTL_TLVD_ITEM(SNDRV_CTL_TLVT_DB_MINMAX, (min_dB), (max_dB))
@@ -75,6 +83,10 @@
 		SNDRV_CTL_TLVD_DB_MINMAX_MUTE_ITEM(min_dB, max_dB) \
 	}
 
+/* Accessor offsets for min, max items in db-minmax types of TLV. */
+#define SNDRV_CTL_TLVO_DB_MINMAX_MIN	2
+#define SNDRV_CTL_TLVO_DB_MINMAX_MAX	3
+
 /* linear volume between min_dB and max_dB (.01dB unit) */
 #define SNDRV_CTL_TLVD_DB_LINEAR_ITEM(min_dB, max_dB) \
 	SNDRV_CTL_TLVD_ITEM(SNDRV_CTL_TLVT_DB_LINEAR, (min_dB), (max_dB))
@@ -83,6 +95,10 @@
 		SNDRV_CTL_TLVD_DB_LINEAR_ITEM(min_dB, max_dB) \
 	}
 
+/* Accessor offsets for min, max items in db-linear type of TLV. */
+#define SNDRV_CTL_TLVO_DB_LINEAR_MIN	2
+#define SNDRV_CTL_TLVO_DB_LINEAR_MAX	3
+
 /* dB range container:
  * Items in dB range container must be ordered by their values and by their
  * dB values. This implies that larger values must correspond with larger
diff --git a/init/Kconfig b/init/Kconfig
index 1fecd5b..22ca30f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1,20 +1,12 @@
-config ARCH
-	string
-	option env="ARCH"
-
-config KERNELVERSION
-	string
-	option env="KERNELVERSION"
-
 config DEFCONFIG_LIST
 	string
 	depends on !UML
 	option defconfig_list
-	default "/lib/modules/$UNAME_RELEASE/.config"
+	default "/lib/modules/$(shell,uname --release)/.config"
 	default "/etc/kernel-config"
-	default "/boot/config-$UNAME_RELEASE"
-	default "$ARCH_DEFCONFIG"
-	default "arch/$ARCH/defconfig"
+	default "/boot/config-$(shell,uname --release)"
+	default ARCH_DEFCONFIG
+	default "arch/$(ARCH)/defconfig"
 
 config CONSTRUCTORS
 	bool
@@ -54,15 +46,6 @@
 	  Maximum of each of the number of arguments and environment
 	  variables passed to init from the kernel command line.
 
-
-config CROSS_COMPILE
-	string "Cross-compiler tool prefix"
-	help
-	  Same as running 'make CROSS_COMPILE=prefix-' but stored for
-	  default make runs in this kernel build directory.  You don't
-	  need to set this unless you want the configured kernel build
-	  directory to select the cross-compiler automatically.
-
 config COMPILE_TEST
 	bool "Compile also drivers which will not load"
 	depends on !UML
@@ -1038,6 +1021,33 @@
 
 endchoice
 
+config HAVE_LD_DEAD_CODE_DATA_ELIMINATION
+	bool
+	help
+	  This requires that the arch annotates or otherwise protects
+	  its external entry points from being discarded. Linker scripts
+	  must also merge .text.*, .data.*, and .bss.* correctly into
+	  output sections. Care must be taken not to pull in unrelated
+	  sections (e.g., '.text.init'). Typically '.' in section names
+	  is used to distinguish them from label names / C identifiers.
+
+config LD_DEAD_CODE_DATA_ELIMINATION
+	bool "Dead code and data elimination (EXPERIMENTAL)"
+	depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION
+	depends on EXPERT
+	help
+	  Select this if the architecture wants to do dead code and
+	  data elimination with the linker by compiling with
+	  -ffunction-sections -fdata-sections, and linking with
+	  --gc-sections.
+
+	  This can reduce on disk and in-memory size of the kernel
+	  code and static data, particularly for small configs and
+	  on small systems. This has the possibility of introducing
+	  silently broken kernel if the required annotations are not
+	  present. This option is not well tested yet, so use at your
+	  own risk.
+
 config SYSCTL
 	bool
 
diff --git a/init/init_task.c b/init/init_task.c
index 3ac6e75..74f60ba 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -9,6 +9,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/audit.h>
 
 #include <asm/pgtable.h>
 #include <linux/uaccess.h>
@@ -119,7 +120,7 @@
 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
 #ifdef CONFIG_AUDITSYSCALL
 	.loginuid	= INVALID_UID,
-	.sessionid	= (unsigned int)-1,
+	.sessionid	= AUDIT_SID_UNSET,
 #endif
 #ifdef CONFIG_PERF_EVENTS
 	.perf_event_mutex = __MUTEX_INITIALIZER(init_task.perf_event_mutex),
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index a808f29..c0d58f3 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -691,7 +691,7 @@
 	wake_up(&info->wait_q);
 }
 
-static int prepare_timeout(const struct timespec __user *u_abs_timeout,
+static int prepare_timeout(const struct __kernel_timespec __user *u_abs_timeout,
 			   struct timespec64 *ts)
 {
 	if (get_timespec64(ts, u_abs_timeout))
@@ -1128,7 +1128,7 @@
 
 SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
 		size_t, msg_len, unsigned int, msg_prio,
-		const struct timespec __user *, u_abs_timeout)
+		const struct __kernel_timespec __user *, u_abs_timeout)
 {
 	struct timespec64 ts, *p = NULL;
 	if (u_abs_timeout) {
@@ -1142,7 +1142,7 @@
 
 SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
 		size_t, msg_len, unsigned int __user *, u_msg_prio,
-		const struct timespec __user *, u_abs_timeout)
+		const struct __kernel_timespec __user *, u_abs_timeout)
 {
 	struct timespec64 ts, *p = NULL;
 	if (u_abs_timeout) {
@@ -1420,6 +1420,47 @@
 	return do_mq_open(u_name, oflag, mode, p);
 }
 
+COMPAT_SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
+		       const struct compat_sigevent __user *, u_notification)
+{
+	struct sigevent n, *p = NULL;
+	if (u_notification) {
+		if (get_compat_sigevent(&n, u_notification))
+			return -EFAULT;
+		if (n.sigev_notify == SIGEV_THREAD)
+			n.sigev_value.sival_ptr = compat_ptr(n.sigev_value.sival_int);
+		p = &n;
+	}
+	return do_mq_notify(mqdes, p);
+}
+
+COMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
+		       const struct compat_mq_attr __user *, u_mqstat,
+		       struct compat_mq_attr __user *, u_omqstat)
+{
+	int ret;
+	struct mq_attr mqstat, omqstat;
+	struct mq_attr *new = NULL, *old = NULL;
+
+	if (u_mqstat) {
+		new = &mqstat;
+		if (get_compat_mq_attr(new, u_mqstat))
+			return -EFAULT;
+	}
+	if (u_omqstat)
+		old = &omqstat;
+
+	ret = do_mq_getsetattr(mqdes, new, old);
+	if (ret || !old)
+		return ret;
+
+	if (put_compat_mq_attr(old, u_omqstat))
+		return -EFAULT;
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_COMPAT_32BIT_TIME
 static int compat_prepare_timeout(const struct compat_timespec __user *p,
 				   struct timespec64 *ts)
 {
@@ -1459,45 +1500,6 @@
 	}
 	return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p);
 }
-
-COMPAT_SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
-		       const struct compat_sigevent __user *, u_notification)
-{
-	struct sigevent n, *p = NULL;
-	if (u_notification) {
-		if (get_compat_sigevent(&n, u_notification))
-			return -EFAULT;
-		if (n.sigev_notify == SIGEV_THREAD)
-			n.sigev_value.sival_ptr = compat_ptr(n.sigev_value.sival_int);
-		p = &n;
-	}
-	return do_mq_notify(mqdes, p);
-}
-
-COMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
-		       const struct compat_mq_attr __user *, u_mqstat,
-		       struct compat_mq_attr __user *, u_omqstat)
-{
-	int ret;
-	struct mq_attr mqstat, omqstat;
-	struct mq_attr *new = NULL, *old = NULL;
-
-	if (u_mqstat) {
-		new = &mqstat;
-		if (get_compat_mq_attr(new, u_mqstat))
-			return -EFAULT;
-	}
-	if (u_omqstat)
-		old = &omqstat;
-
-	ret = do_mq_getsetattr(mqdes, new, old);
-	if (ret || !old)
-		return ret;
-
-	if (put_compat_mq_attr(old, u_omqstat))
-		return -EFAULT;
-	return 0;
-}
 #endif
 
 static const struct inode_operations mqueue_dir_inode_operations = {
diff --git a/ipc/msg.c b/ipc/msg.c
index 56fd1c7..3b65453 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -537,6 +537,11 @@
 	p->msg_stime  = msq->q_stime;
 	p->msg_rtime  = msq->q_rtime;
 	p->msg_ctime  = msq->q_ctime;
+#ifndef CONFIG_64BIT
+	p->msg_stime_high = msq->q_stime >> 32;
+	p->msg_rtime_high = msq->q_rtime >> 32;
+	p->msg_ctime_high = msq->q_ctime >> 32;
+#endif
 	p->msg_cbytes = msq->q_cbytes;
 	p->msg_qnum   = msq->q_qnum;
 	p->msg_qbytes = msq->q_qbytes;
@@ -646,9 +651,12 @@
 		struct compat_msqid64_ds v;
 		memset(&v, 0, sizeof(v));
 		to_compat_ipc64_perm(&v.msg_perm, &in->msg_perm);
-		v.msg_stime = in->msg_stime;
-		v.msg_rtime = in->msg_rtime;
-		v.msg_ctime = in->msg_ctime;
+		v.msg_stime	 = lower_32_bits(in->msg_stime);
+		v.msg_stime_high = upper_32_bits(in->msg_stime);
+		v.msg_rtime	 = lower_32_bits(in->msg_rtime);
+		v.msg_rtime_high = upper_32_bits(in->msg_rtime);
+		v.msg_ctime	 = lower_32_bits(in->msg_ctime);
+		v.msg_ctime_high = upper_32_bits(in->msg_ctime);
 		v.msg_cbytes = in->msg_cbytes;
 		v.msg_qnum = in->msg_qnum;
 		v.msg_qbytes = in->msg_qbytes;
@@ -758,7 +766,7 @@
 				WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG));
 			} else {
 				ipc_update_pid(&msq->q_lrpid, task_pid(msr->r_tsk));
-				msq->q_rtime = get_seconds();
+				msq->q_rtime = ktime_get_real_seconds();
 
 				wake_q_add(wake_q, msr->r_tsk);
 				WRITE_ONCE(msr->r_msg, msg);
@@ -859,7 +867,7 @@
 	}
 
 	ipc_update_pid(&msq->q_lspid, task_tgid(current));
-	msq->q_stime = get_seconds();
+	msq->q_stime = ktime_get_real_seconds();
 
 	if (!pipelined_send(msq, msg, &wake_q)) {
 		/* no one is waiting for this message, enqueue it */
@@ -1087,7 +1095,7 @@
 
 			list_del(&msg->m_list);
 			msq->q_qnum--;
-			msq->q_rtime = get_seconds();
+			msq->q_rtime = ktime_get_real_seconds();
 			ipc_update_pid(&msq->q_lrpid, task_tgid(current));
 			msq->q_cbytes -= msg->m_ts;
 			atomic_sub(msg->m_ts, &ns->msg_bytes);
diff --git a/ipc/sem.c b/ipc/sem.c
index 06be75d..cfd94d4 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -70,6 +70,7 @@
  *   The worst-case behavior is nevertheless O(N^2) for N wakeups.
  */
 
+#include <linux/compat.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
@@ -104,7 +105,7 @@
 					/* that alter the semaphore */
 	struct list_head pending_const; /* pending single-sop operations */
 					/* that do not alter the semaphore*/
-	time_t	sem_otime;	/* candidate for sem_otime */
+	time64_t	 sem_otime;	/* candidate for sem_otime */
 } ____cacheline_aligned_in_smp;
 
 /* One sem_array data structure for each set of semaphores in the system. */
@@ -984,10 +985,10 @@
 static void set_semotime(struct sem_array *sma, struct sembuf *sops)
 {
 	if (sops == NULL) {
-		sma->sems[0].sem_otime = get_seconds();
+		sma->sems[0].sem_otime = ktime_get_real_seconds();
 	} else {
 		sma->sems[sops[0].sem_num].sem_otime =
-							get_seconds();
+						ktime_get_real_seconds();
 	}
 }
 
@@ -1214,6 +1215,7 @@
 			 int cmd, struct semid64_ds *semid64)
 {
 	struct sem_array *sma;
+	time64_t semotime;
 	int id = 0;
 	int err;
 
@@ -1257,8 +1259,13 @@
 	}
 
 	kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm);
-	semid64->sem_otime = get_semotime(sma);
+	semotime = get_semotime(sma);
+	semid64->sem_otime = semotime;
 	semid64->sem_ctime = sma->sem_ctime;
+#ifndef CONFIG_64BIT
+	semid64->sem_otime_high = semotime >> 32;
+	semid64->sem_ctime_high = sma->sem_ctime >> 32;
+#endif
 	semid64->sem_nsems = sma->sem_nsems;
 
 	ipc_unlock_object(&sma->sem_perm);
@@ -1704,8 +1711,10 @@
 		struct compat_semid64_ds v;
 		memset(&v, 0, sizeof(v));
 		to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm);
-		v.sem_otime = in->sem_otime;
-		v.sem_ctime = in->sem_ctime;
+		v.sem_otime	 = lower_32_bits(in->sem_otime);
+		v.sem_otime_high = upper_32_bits(in->sem_otime);
+		v.sem_ctime	 = lower_32_bits(in->sem_ctime);
+		v.sem_ctime_high = upper_32_bits(in->sem_ctime);
 		v.sem_nsems = in->sem_nsems;
 		return copy_to_user(buf, &v, sizeof(v));
 	} else {
@@ -2168,7 +2177,7 @@
 }
 
 long ksys_semtimedop(int semid, struct sembuf __user *tsops,
-		     unsigned int nsops, const struct timespec __user *timeout)
+		     unsigned int nsops, const struct __kernel_timespec __user *timeout)
 {
 	if (timeout) {
 		struct timespec64 ts;
@@ -2180,12 +2189,12 @@
 }
 
 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
-		unsigned int, nsops, const struct timespec __user *, timeout)
+		unsigned int, nsops, const struct __kernel_timespec __user *, timeout)
 {
 	return ksys_semtimedop(semid, tsops, nsops, timeout);
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
 			    unsigned int nsops,
 			    const struct compat_timespec __user *timeout)
diff --git a/ipc/shm.c b/ipc/shm.c
index d732693..29978ee 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1002,6 +1002,11 @@
 	tbuf->shm_atime	= shp->shm_atim;
 	tbuf->shm_dtime	= shp->shm_dtim;
 	tbuf->shm_ctime	= shp->shm_ctim;
+#ifndef CONFIG_64BIT
+	tbuf->shm_atime_high = shp->shm_atim >> 32;
+	tbuf->shm_dtime_high = shp->shm_dtim >> 32;
+	tbuf->shm_ctime_high = shp->shm_ctim >> 32;
+#endif
 	tbuf->shm_cpid	= pid_vnr(shp->shm_cprid);
 	tbuf->shm_lpid	= pid_vnr(shp->shm_lprid);
 	tbuf->shm_nattch = shp->shm_nattch;
@@ -1233,9 +1238,12 @@
 		struct compat_shmid64_ds v;
 		memset(&v, 0, sizeof(v));
 		to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
-		v.shm_atime = in->shm_atime;
-		v.shm_dtime = in->shm_dtime;
-		v.shm_ctime = in->shm_ctime;
+		v.shm_atime	 = lower_32_bits(in->shm_atime);
+		v.shm_atime_high = upper_32_bits(in->shm_atime);
+		v.shm_dtime	 = lower_32_bits(in->shm_dtime);
+		v.shm_dtime_high = upper_32_bits(in->shm_dtime);
+		v.shm_ctime	 = lower_32_bits(in->shm_ctime);
+		v.shm_ctime_high = upper_32_bits(in->shm_ctime);
 		v.shm_segsz = in->shm_segsz;
 		v.shm_nattch = in->shm_nattch;
 		v.shm_cpid = in->shm_cpid;
diff --git a/ipc/syscall.c b/ipc/syscall.c
index 77a883e..65d405f 100644
--- a/ipc/syscall.c
+++ b/ipc/syscall.c
@@ -30,9 +30,14 @@
 		return ksys_semtimedop(first, (struct sembuf __user *)ptr,
 				       second, NULL);
 	case SEMTIMEDOP:
-		return ksys_semtimedop(first, (struct sembuf __user *)ptr,
-				       second,
-				       (const struct timespec __user *)fifth);
+		if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
+			return ksys_semtimedop(first, ptr, second,
+			        (const struct __kernel_timespec __user *)fifth);
+		else if (IS_ENABLED(CONFIG_COMPAT_32BIT_TIME))
+			return compat_ksys_semtimedop(first, ptr, second,
+			        (const struct compat_timespec __user *)fifth);
+		else
+			return -ENOSYS;
 
 	case SEMGET:
 		return ksys_semget(first, second, third);
@@ -130,6 +135,8 @@
 		/* struct sembuf is the same on 32 and 64bit :)) */
 		return ksys_semtimedop(first, compat_ptr(ptr), second, NULL);
 	case SEMTIMEDOP:
+		if (!IS_ENABLED(CONFIG_COMPAT_32BIT_TIME))
+			return -ENOSYS;
 		return compat_ksys_semtimedop(first, compat_ptr(ptr), second,
 						compat_ptr(fifth));
 	case SEMGET:
diff --git a/ipc/util.h b/ipc/util.h
index acc5159..0aba323 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -251,7 +251,7 @@
 /* for __ARCH_WANT_SYS_IPC */
 long ksys_semtimedop(int semid, struct sembuf __user *tsops,
 		     unsigned int nsops,
-		     const struct timespec __user *timeout);
+		     const struct __kernel_timespec __user *timeout);
 long ksys_semget(key_t key, int nsems, int semflg);
 long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg);
 long ksys_msgget(key_t key, int msgflg);
@@ -265,10 +265,10 @@
 long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
 
 /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */
-#ifdef CONFIG_COMPAT
 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
 			    unsigned int nsops,
 			    const struct compat_timespec __user *timeout);
+#ifdef CONFIG_COMPAT
 long compat_ksys_semctl(int semid, int semnum, int cmd, int arg);
 long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr);
 long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz,
diff --git a/kernel/audit.c b/kernel/audit.c
index 670665c..e7478cb 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1099,8 +1099,7 @@
 
 	if (audit_enabled == AUDIT_OFF)
 		return;
-
-	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
+	ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_FEATURE_CHANGE);
 	if (!ab)
 		return;
 	audit_log_task_info(ab, current);
@@ -2317,8 +2316,7 @@
 		return;
 
 	/* Generate AUDIT_ANOM_LINK with subject, operation, outcome. */
-	ab = audit_log_start(current->audit_context, GFP_KERNEL,
-			     AUDIT_ANOM_LINK);
+	ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_ANOM_LINK);
 	if (!ab)
 		return;
 	audit_log_format(ab, "op=%s", operation);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 9eb8b35..f1ba889 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -274,7 +274,7 @@
 		/* If the update involves invalidating rules, do the inode-based
 		 * filtering now, so we don't omit records. */
 		if (invalidating && !audit_dummy_context())
-			audit_filter_inodes(current, current->audit_context);
+			audit_filter_inodes(current, audit_context());
 
 		/* updating ino will likely change which audit_hash_list we
 		 * are on so we need a new watch for the new list */
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index d7a807e..eaa3201 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -426,7 +426,7 @@
 			return -EINVAL;
 		break;
 	case AUDIT_EXE:
-		if (f->op != Audit_equal)
+		if (f->op != Audit_not_equal && f->op != Audit_equal)
 			return -EINVAL;
 		if (entry->rule.listnr != AUDIT_FILTER_EXIT)
 			return -EINVAL;
@@ -1089,8 +1089,6 @@
 static void audit_log_rule_change(char *action, struct audit_krule *rule, int res)
 {
 	struct audit_buffer *ab;
-	uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(current));
-	unsigned int sessionid = audit_get_sessionid(current);
 
 	if (!audit_enabled)
 		return;
@@ -1098,7 +1096,7 @@
 	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
 	if (!ab)
 		return;
-	audit_log_format(ab, "auid=%u ses=%u" ,loginuid, sessionid);
+	audit_log_session_info(ab);
 	audit_log_task_context(ab);
 	audit_log_format(ab, " op=%s", action);
 	audit_log_key(ab, rule->filterkey);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..ceb1c45 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -374,7 +374,7 @@
 	case AUDIT_COMPARE_EGID_TO_OBJ_GID:
 		return audit_compare_gid(cred->egid, name, f, ctx);
 	case AUDIT_COMPARE_AUID_TO_OBJ_UID:
-		return audit_compare_uid(tsk->loginuid, name, f, ctx);
+		return audit_compare_uid(audit_get_loginuid(tsk), name, f, ctx);
 	case AUDIT_COMPARE_SUID_TO_OBJ_UID:
 		return audit_compare_uid(cred->suid, name, f, ctx);
 	case AUDIT_COMPARE_SGID_TO_OBJ_GID:
@@ -385,7 +385,8 @@
 		return audit_compare_gid(cred->fsgid, name, f, ctx);
 	/* uid comparisons */
 	case AUDIT_COMPARE_UID_TO_AUID:
-		return audit_uid_comparator(cred->uid, f->op, tsk->loginuid);
+		return audit_uid_comparator(cred->uid, f->op,
+					    audit_get_loginuid(tsk));
 	case AUDIT_COMPARE_UID_TO_EUID:
 		return audit_uid_comparator(cred->uid, f->op, cred->euid);
 	case AUDIT_COMPARE_UID_TO_SUID:
@@ -394,11 +395,14 @@
 		return audit_uid_comparator(cred->uid, f->op, cred->fsuid);
 	/* auid comparisons */
 	case AUDIT_COMPARE_AUID_TO_EUID:
-		return audit_uid_comparator(tsk->loginuid, f->op, cred->euid);
+		return audit_uid_comparator(audit_get_loginuid(tsk), f->op,
+					    cred->euid);
 	case AUDIT_COMPARE_AUID_TO_SUID:
-		return audit_uid_comparator(tsk->loginuid, f->op, cred->suid);
+		return audit_uid_comparator(audit_get_loginuid(tsk), f->op,
+					    cred->suid);
 	case AUDIT_COMPARE_AUID_TO_FSUID:
-		return audit_uid_comparator(tsk->loginuid, f->op, cred->fsuid);
+		return audit_uid_comparator(audit_get_loginuid(tsk), f->op,
+					    cred->fsuid);
 	/* euid comparisons */
 	case AUDIT_COMPARE_EUID_TO_SUID:
 		return audit_uid_comparator(cred->euid, f->op, cred->suid);
@@ -471,6 +475,8 @@
 			break;
 		case AUDIT_EXE:
 			result = audit_exe_compare(tsk, rule->exe);
+			if (f->op == Audit_not_equal)
+				result = !result;
 			break;
 		case AUDIT_UID:
 			result = audit_uid_comparator(cred->uid, f->op, f->uid);
@@ -511,7 +517,7 @@
 			result = audit_gid_comparator(cred->fsgid, f->op, f->gid);
 			break;
 		case AUDIT_SESSIONID:
-			sessionid = audit_get_sessionid(current);
+			sessionid = audit_get_sessionid(tsk);
 			result = audit_comparator(sessionid, f->op, f->val);
 			break;
 		case AUDIT_PERS:
@@ -609,7 +615,8 @@
 				result = match_tree_refs(ctx, rule->tree);
 			break;
 		case AUDIT_LOGINUID:
-			result = audit_uid_comparator(tsk->loginuid, f->op, f->uid);
+			result = audit_uid_comparator(audit_get_loginuid(tsk),
+						      f->op, f->uid);
 			break;
 		case AUDIT_LOGINUID_SET:
 			result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
@@ -863,7 +870,7 @@
 		audit_filter_inodes(tsk, context);
 	}
 
-	tsk->audit_context = NULL;
+	audit_set_context(tsk, NULL);
 	return context;
 }
 
@@ -950,7 +957,7 @@
 	}
 	context->filterkey = key;
 
-	tsk->audit_context  = context;
+	audit_set_context(tsk, context);
 	set_tsk_thread_flag(tsk, TIF_SYSCALL_AUDIT);
 	return 0;
 }
@@ -1507,8 +1514,7 @@
 void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2,
 			   unsigned long a3, unsigned long a4)
 {
-	struct task_struct *tsk = current;
-	struct audit_context *context = tsk->audit_context;
+	struct audit_context *context = audit_context();
 	enum audit_state     state;
 
 	if (!audit_enabled || !context)
@@ -1523,7 +1529,7 @@
 	context->dummy = !audit_n_rules;
 	if (!context->dummy && state == AUDIT_BUILD_CONTEXT) {
 		context->prio = 0;
-		if (auditd_test_task(tsk))
+		if (auditd_test_task(current))
 			return;
 	}
 
@@ -1553,7 +1559,6 @@
  */
 void __audit_syscall_exit(int success, long return_code)
 {
-	struct task_struct *tsk = current;
 	struct audit_context *context;
 
 	if (success)
@@ -1561,12 +1566,12 @@
 	else
 		success = AUDITSC_FAILURE;
 
-	context = audit_take_context(tsk, success, return_code);
+	context = audit_take_context(current, success, return_code);
 	if (!context)
 		return;
 
 	if (context->in_syscall && context->current_state == AUDIT_RECORD_CONTEXT)
-		audit_log_exit(context, tsk);
+		audit_log_exit(context, current);
 
 	context->in_syscall = 0;
 	context->prio = context->state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
@@ -1588,7 +1593,7 @@
 		kfree(context->filterkey);
 		context->filterkey = NULL;
 	}
-	tsk->audit_context = context;
+	audit_set_context(current, context);
 }
 
 static inline void handle_one(const struct inode *inode)
@@ -1600,7 +1605,7 @@
 	int count;
 	if (likely(!inode->i_fsnotify_marks))
 		return;
-	context = current->audit_context;
+	context = audit_context();
 	p = context->trees;
 	count = context->tree_count;
 	rcu_read_lock();
@@ -1631,7 +1636,7 @@
 	unsigned long seq;
 	int count;
 
-	context = current->audit_context;
+	context = audit_context();
 	p = context->trees;
 	count = context->tree_count;
 retry:
@@ -1713,7 +1718,7 @@
 struct filename *
 __audit_reusename(const __user char *uptr)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 	struct audit_names *n;
 
 	list_for_each_entry(n, &context->names_list, list) {
@@ -1736,7 +1741,7 @@
  */
 void __audit_getname(struct filename *name)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 	struct audit_names *n;
 
 	if (!context->in_syscall)
@@ -1764,7 +1769,7 @@
 void __audit_inode(struct filename *name, const struct dentry *dentry,
 		   unsigned int flags)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 	struct inode *inode = d_backing_inode(dentry);
 	struct audit_names *n;
 	bool parent = flags & AUDIT_INODE_PARENT;
@@ -1863,7 +1868,7 @@
 			 const struct dentry *dentry,
 			 const unsigned char type)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 	struct inode *inode = d_backing_inode(dentry);
 	const char *dname = dentry->d_name.name;
 	struct audit_names *n, *found_parent = NULL, *found_child = NULL;
@@ -2048,7 +2053,7 @@
 int audit_set_loginuid(kuid_t loginuid)
 {
 	struct task_struct *task = current;
-	unsigned int oldsessionid, sessionid = (unsigned int)-1;
+	unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET;
 	kuid_t oldloginuid;
 	int rc;
 
@@ -2062,7 +2067,7 @@
 	/* are we setting or clearing? */
 	if (uid_valid(loginuid)) {
 		sessionid = (unsigned int)atomic_inc_return(&session_id);
-		if (unlikely(sessionid == (unsigned int)-1))
+		if (unlikely(sessionid == AUDIT_SID_UNSET))
 			sessionid = (unsigned int)atomic_inc_return(&session_id);
 	}
 
@@ -2082,7 +2087,7 @@
  */
 void __audit_mq_open(int oflag, umode_t mode, struct mq_attr *attr)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 
 	if (attr)
 		memcpy(&context->mq_open.attr, attr, sizeof(struct mq_attr));
@@ -2106,7 +2111,7 @@
 void __audit_mq_sendrecv(mqd_t mqdes, size_t msg_len, unsigned int msg_prio,
 			const struct timespec64 *abs_timeout)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 	struct timespec64 *p = &context->mq_sendrecv.abs_timeout;
 
 	if (abs_timeout)
@@ -2130,7 +2135,7 @@
 
 void __audit_mq_notify(mqd_t mqdes, const struct sigevent *notification)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 
 	if (notification)
 		context->mq_notify.sigev_signo = notification->sigev_signo;
@@ -2149,7 +2154,7 @@
  */
 void __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 	context->mq_getsetattr.mqdes = mqdes;
 	context->mq_getsetattr.mqstat = *mqstat;
 	context->type = AUDIT_MQ_GETSETATTR;
@@ -2162,7 +2167,7 @@
  */
 void __audit_ipc_obj(struct kern_ipc_perm *ipcp)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 	context->ipc.uid = ipcp->uid;
 	context->ipc.gid = ipcp->gid;
 	context->ipc.mode = ipcp->mode;
@@ -2182,7 +2187,7 @@
  */
 void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 
 	context->ipc.qbytes = qbytes;
 	context->ipc.perm_uid = uid;
@@ -2193,7 +2198,7 @@
 
 void __audit_bprm(struct linux_binprm *bprm)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 
 	context->type = AUDIT_EXECVE;
 	context->execve.argc = bprm->argc;
@@ -2208,7 +2213,7 @@
  */
 int __audit_socketcall(int nargs, unsigned long *args)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 
 	if (nargs <= 0 || nargs > AUDITSC_ARGS || !args)
 		return -EINVAL;
@@ -2226,7 +2231,7 @@
  */
 void __audit_fd_pair(int fd1, int fd2)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 	context->fds[0] = fd1;
 	context->fds[1] = fd2;
 }
@@ -2240,7 +2245,7 @@
  */
 int __audit_sockaddr(int len, void *a)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 
 	if (!context->sockaddr) {
 		void *p = kmalloc(sizeof(struct sockaddr_storage), GFP_KERNEL);
@@ -2256,7 +2261,7 @@
 
 void __audit_ptrace(struct task_struct *t)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 
 	context->target_pid = task_tgid_nr(t);
 	context->target_auid = audit_get_loginuid(t);
@@ -2277,19 +2282,19 @@
 int audit_signal_info(int sig, struct task_struct *t)
 {
 	struct audit_aux_data_pids *axp;
-	struct task_struct *tsk = current;
-	struct audit_context *ctx = tsk->audit_context;
-	kuid_t uid = current_uid(), t_uid = task_uid(t);
+	struct audit_context *ctx = audit_context();
+	kuid_t uid = current_uid(), auid, t_uid = task_uid(t);
 
 	if (auditd_test_task(t) &&
 	    (sig == SIGTERM || sig == SIGHUP ||
 	     sig == SIGUSR1 || sig == SIGUSR2)) {
-		audit_sig_pid = task_tgid_nr(tsk);
-		if (uid_valid(tsk->loginuid))
-			audit_sig_uid = tsk->loginuid;
+		audit_sig_pid = task_tgid_nr(current);
+		auid = audit_get_loginuid(current);
+		if (uid_valid(auid))
+			audit_sig_uid = auid;
 		else
 			audit_sig_uid = uid;
-		security_task_getsecid(tsk, &audit_sig_sid);
+		security_task_getsecid(current, &audit_sig_sid);
 	}
 
 	if (!audit_signals || audit_dummy_context())
@@ -2345,7 +2350,7 @@
 			   const struct cred *new, const struct cred *old)
 {
 	struct audit_aux_data_bprm_fcaps *ax;
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 	struct cpu_vfs_cap_data vcaps;
 
 	ax = kmalloc(sizeof(*ax), GFP_KERNEL);
@@ -2385,7 +2390,7 @@
  */
 void __audit_log_capset(const struct cred *new, const struct cred *old)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 	context->capset.pid = task_tgid_nr(current);
 	context->capset.cap.effective   = new->cap_effective;
 	context->capset.cap.inheritable = new->cap_effective;
@@ -2396,7 +2401,7 @@
 
 void __audit_mmap_fd(int fd, int flags)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 	context->mmap.fd = fd;
 	context->mmap.flags = flags;
 	context->type = AUDIT_MMAP;
@@ -2404,7 +2409,7 @@
 
 void __audit_log_kern_module(char *name)
 {
-	struct audit_context *context = current->audit_context;
+	struct audit_context *context = audit_context();
 
 	context->module.name = kmalloc(strlen(name) + 1, GFP_KERNEL);
 	strcpy(context->module.name, name);
@@ -2413,7 +2418,7 @@
 
 void __audit_fanotify(unsigned int response)
 {
-	audit_log(current->audit_context, GFP_KERNEL,
+	audit_log(audit_context(), GFP_KERNEL,
 		AUDIT_FANOTIFY,	"resp=%u", response);
 }
 
@@ -2464,7 +2469,19 @@
 	audit_log_end(ab);
 }
 
-void __audit_seccomp(unsigned long syscall, long signr, int code)
+/**
+ * audit_seccomp - record information about a seccomp action
+ * @syscall: syscall number
+ * @signr: signal value
+ * @code: the seccomp action
+ *
+ * Record the information associated with a seccomp action. Event filtering for
+ * seccomp actions that are not to be logged is done in seccomp_log().
+ * Therefore, this function forces auditing independent of the audit_enabled
+ * and dummy context state because seccomp actions should be logged even when
+ * audit is not in use.
+ */
+void audit_seccomp(unsigned long syscall, long signr, int code)
 {
 	struct audit_buffer *ab;
 
@@ -2478,9 +2495,29 @@
 	audit_log_end(ab);
 }
 
+void audit_seccomp_actions_logged(const char *names, const char *old_names,
+				  int res)
+{
+	struct audit_buffer *ab;
+
+	if (!audit_enabled)
+		return;
+
+	ab = audit_log_start(audit_context(), GFP_KERNEL,
+			     AUDIT_CONFIG_CHANGE);
+	if (unlikely(!ab))
+		return;
+
+	audit_log_format(ab, "op=seccomp-logging");
+	audit_log_format(ab, " actions=%s", names);
+	audit_log_format(ab, " old-actions=%s", old_names);
+	audit_log_format(ab, " res=%d", res);
+	audit_log_end(ab);
+}
+
 struct list_head *audit_killed_trees(void)
 {
-	struct audit_context *ctx = current->audit_context;
+	struct audit_context *ctx = audit_context();
 	if (likely(!ctx || !ctx->in_syscall))
 		return NULL;
 	return &ctx->killed_trees;
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index 2be89a00..bfcdae8 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-y := cgroup.o stat.o namespace.o cgroup-v1.o
+obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
 
 obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
 obj-$(CONFIG_CGROUP_PIDS) += pids.o
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index b928b27..77ff1cd 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -201,13 +201,12 @@
 int cgroup_task_count(const struct cgroup *cgrp);
 
 /*
- * stat.c
+ * rstat.c
  */
-void cgroup_stat_flush(struct cgroup *cgrp);
-int cgroup_stat_init(struct cgroup *cgrp);
-void cgroup_stat_exit(struct cgroup *cgrp);
-void cgroup_stat_show_cputime(struct seq_file *seq);
-void cgroup_stat_boot(void);
+int cgroup_rstat_init(struct cgroup *cgrp);
+void cgroup_rstat_exit(struct cgroup *cgrp);
+void cgroup_rstat_boot(void);
+void cgroup_base_stat_cputime_show(struct seq_file *seq);
 
 /*
  * namespace.c
@@ -218,9 +217,9 @@
  * cgroup-v1.c
  */
 extern struct cftype cgroup1_base_files[];
-extern const struct file_operations proc_cgroupstats_operations;
 extern struct kernfs_syscall_ops cgroup1_kf_syscall_ops;
 
+int proc_cgroupstats_show(struct seq_file *m, void *v);
 bool cgroup1_ssid_disabled(int ssid);
 void cgroup1_pidlist_destroy_all(struct cgroup *cgrp);
 void cgroup1_release_agent(struct work_struct *work);
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index a2c05d2..e06c97f 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -682,7 +682,7 @@
 };
 
 /* Display information about each subsystem and each hierarchy */
-static int proc_cgroupstats_show(struct seq_file *m, void *v)
+int proc_cgroupstats_show(struct seq_file *m, void *v)
 {
 	struct cgroup_subsys *ss;
 	int i;
@@ -705,18 +705,6 @@
 	return 0;
 }
 
-static int cgroupstats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_cgroupstats_show, NULL);
-}
-
-const struct file_operations proc_cgroupstats_operations = {
-	.open = cgroupstats_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
 /**
  * cgroupstats_build - build and fill cgroupstats
  * @stats: cgroupstats to fill information into
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index a662bfc..077370b 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -54,6 +54,7 @@
 #include <linux/proc_ns.h>
 #include <linux/nsproxy.h>
 #include <linux/file.h>
+#include <linux/sched/cputime.h>
 #include <net/sock.h>
 
 #define CREATE_TRACE_POINTS
@@ -61,6 +62,8 @@
 
 #define CGROUP_FILE_NAME_MAX		(MAX_CGROUP_TYPE_NAMELEN +	\
 					 MAX_CFTYPE_NAME + 2)
+/* let's not notify more than 100 times per second */
+#define CGROUP_FILE_NOTIFY_MIN_INTV	DIV_ROUND_UP(HZ, 100)
 
 /*
  * cgroup_mutex is the master lock.  Any modification to cgroup or its
@@ -142,14 +145,14 @@
 };
 #undef SUBSYS
 
-static DEFINE_PER_CPU(struct cgroup_cpu_stat, cgrp_dfl_root_cpu_stat);
+static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
 
 /*
  * The default hierarchy, reserved for the subsystems that are otherwise
  * unattached - it never has more than a single cgroup, and all tasks are
  * part of that cgroup.
  */
-struct cgroup_root cgrp_dfl_root = { .cgrp.cpu_stat = &cgrp_dfl_root_cpu_stat };
+struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
 EXPORT_SYMBOL_GPL(cgrp_dfl_root);
 
 /*
@@ -1554,6 +1557,8 @@
 		spin_lock_irq(&cgroup_file_kn_lock);
 		cfile->kn = NULL;
 		spin_unlock_irq(&cgroup_file_kn_lock);
+
+		del_timer_sync(&cfile->notify_timer);
 	}
 
 	kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
@@ -1573,8 +1578,17 @@
 
 	css->flags &= ~CSS_VISIBLE;
 
-	list_for_each_entry(cfts, &css->ss->cfts, node)
+	if (!css->ss) {
+		if (cgroup_on_dfl(cgrp))
+			cfts = cgroup_base_files;
+		else
+			cfts = cgroup1_base_files;
+
 		cgroup_addrm_files(css, cgrp, cfts, false);
+	} else {
+		list_for_each_entry(cfts, &css->ss->cfts, node)
+			cgroup_addrm_files(css, cgrp, cfts, false);
+	}
 }
 
 /**
@@ -1598,14 +1612,16 @@
 		else
 			cfts = cgroup1_base_files;
 
-		return cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
-	}
-
-	list_for_each_entry(cfts, &css->ss->cfts, node) {
-		ret = cgroup_addrm_files(css, cgrp, cfts, true);
-		if (ret < 0) {
-			failed_cfts = cfts;
-			goto err;
+		ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
+		if (ret < 0)
+			return ret;
+	} else {
+		list_for_each_entry(cfts, &css->ss->cfts, node) {
+			ret = cgroup_addrm_files(css, cgrp, cfts, true);
+			if (ret < 0) {
+				failed_cfts = cfts;
+				goto err;
+			}
 		}
 	}
 
@@ -1782,13 +1798,6 @@
 {
 	struct task_struct *p, *g;
 
-	spin_lock_irq(&css_set_lock);
-
-	if (use_task_css_set_links)
-		goto out_unlock;
-
-	use_task_css_set_links = true;
-
 	/*
 	 * We need tasklist_lock because RCU is not safe against
 	 * while_each_thread(). Besides, a forking task that has passed
@@ -1797,6 +1806,13 @@
 	 * tasklist if we walk through it with RCU.
 	 */
 	read_lock(&tasklist_lock);
+	spin_lock_irq(&css_set_lock);
+
+	if (use_task_css_set_links)
+		goto out_unlock;
+
+	use_task_css_set_links = true;
+
 	do_each_thread(g, p) {
 		WARN_ON_ONCE(!list_empty(&p->cg_list) ||
 			     task_css_set(p) != &init_css_set);
@@ -1824,9 +1840,9 @@
 		}
 		spin_unlock(&p->sighand->siglock);
 	} while_each_thread(g, p);
-	read_unlock(&tasklist_lock);
 out_unlock:
 	spin_unlock_irq(&css_set_lock);
+	read_unlock(&tasklist_lock);
 }
 
 static void init_cgroup_housekeeping(struct cgroup *cgrp)
@@ -1844,6 +1860,8 @@
 	cgrp->dom_cgrp = cgrp;
 	cgrp->max_descendants = INT_MAX;
 	cgrp->max_depth = INT_MAX;
+	INIT_LIST_HEAD(&cgrp->rstat_css_list);
+	prev_cputime_init(&cgrp->prev_cputime);
 
 	for_each_subsys(ss, ssid)
 		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
@@ -3381,7 +3399,7 @@
 	struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
 	int ret = 0;
 
-	cgroup_stat_show_cputime(seq);
+	cgroup_base_stat_cputime_show(seq);
 #ifdef CONFIG_CGROUP_SCHED
 	ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
 #endif
@@ -3521,6 +3539,12 @@
 	return kernfs_setattr(kn, &iattr);
 }
 
+static void cgroup_file_notify_timer(struct timer_list *timer)
+{
+	cgroup_file_notify(container_of(timer, struct cgroup_file,
+					notify_timer));
+}
+
 static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
 			   struct cftype *cft)
 {
@@ -3547,6 +3571,8 @@
 	if (cft->file_offset) {
 		struct cgroup_file *cfile = (void *)css + cft->file_offset;
 
+		timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
+
 		spin_lock_irq(&cgroup_file_kn_lock);
 		cfile->kn = kn;
 		spin_unlock_irq(&cgroup_file_kn_lock);
@@ -3796,8 +3822,17 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&cgroup_file_kn_lock, flags);
-	if (cfile->kn)
-		kernfs_notify(cfile->kn);
+	if (cfile->kn) {
+		unsigned long last = cfile->notified_at;
+		unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
+
+		if (time_in_range(jiffies, last, next)) {
+			timer_reduce(&cfile->notify_timer, next);
+		} else {
+			kernfs_notify(cfile->kn);
+			cfile->notified_at = jiffies;
+		}
+	}
 	spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
 }
 
@@ -4560,7 +4595,7 @@
 			cgroup_put(cgroup_parent(cgrp));
 			kernfs_put(cgrp->kn);
 			if (cgroup_on_dfl(cgrp))
-				cgroup_stat_exit(cgrp);
+				cgroup_rstat_exit(cgrp);
 			kfree(cgrp);
 		} else {
 			/*
@@ -4587,6 +4622,11 @@
 
 	if (ss) {
 		/* css release path */
+		if (!list_empty(&css->rstat_css_node)) {
+			cgroup_rstat_flush(cgrp);
+			list_del_rcu(&css->rstat_css_node);
+		}
+
 		cgroup_idr_replace(&ss->css_idr, NULL, css->id);
 		if (ss->css_released)
 			ss->css_released(css);
@@ -4597,7 +4637,7 @@
 		trace_cgroup_release(cgrp);
 
 		if (cgroup_on_dfl(cgrp))
-			cgroup_stat_flush(cgrp);
+			cgroup_rstat_flush(cgrp);
 
 		for (tcgrp = cgroup_parent(cgrp); tcgrp;
 		     tcgrp = cgroup_parent(tcgrp))
@@ -4648,6 +4688,7 @@
 	css->id = -1;
 	INIT_LIST_HEAD(&css->sibling);
 	INIT_LIST_HEAD(&css->children);
+	INIT_LIST_HEAD(&css->rstat_css_node);
 	css->serial_nr = css_serial_nr_next++;
 	atomic_set(&css->online_cnt, 0);
 
@@ -4656,6 +4697,9 @@
 		css_get(css->parent);
 	}
 
+	if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush)
+		list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
+
 	BUG_ON(cgroup_css(cgrp, ss));
 }
 
@@ -4757,6 +4801,7 @@
 err_list_del:
 	list_del_rcu(&css->sibling);
 err_free_css:
+	list_del_rcu(&css->rstat_css_node);
 	INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
 	queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
 	return ERR_PTR(err);
@@ -4775,8 +4820,8 @@
 	int ret;
 
 	/* allocate the cgroup and its ID, 0 is reserved for the root */
-	cgrp = kzalloc(sizeof(*cgrp) +
-		       sizeof(cgrp->ancestor_ids[0]) * (level + 1), GFP_KERNEL);
+	cgrp = kzalloc(struct_size(cgrp, ancestor_ids, (level + 1)),
+		       GFP_KERNEL);
 	if (!cgrp)
 		return ERR_PTR(-ENOMEM);
 
@@ -4785,7 +4830,7 @@
 		goto out_free_cgrp;
 
 	if (cgroup_on_dfl(parent)) {
-		ret = cgroup_stat_init(cgrp);
+		ret = cgroup_rstat_init(cgrp);
 		if (ret)
 			goto out_cancel_ref;
 	}
@@ -4850,7 +4895,7 @@
 	cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
 out_stat_exit:
 	if (cgroup_on_dfl(parent))
-		cgroup_stat_exit(cgrp);
+		cgroup_rstat_exit(cgrp);
 out_cancel_ref:
 	percpu_ref_exit(&cgrp->self.refcnt);
 out_free_cgrp:
@@ -5090,10 +5135,8 @@
 	for_each_css(css, ssid, cgrp)
 		kill_css(css);
 
-	/*
-	 * Remove @cgrp directory along with the base files.  @cgrp has an
-	 * extra ref on its kn.
-	 */
+	/* clear and remove @cgrp dir, @cgrp has an extra ref on its kn */
+	css_clear_dir(&cgrp->self);
 	kernfs_remove(cgrp->kn);
 
 	if (parent && cgroup_is_threaded(cgrp))
@@ -5245,7 +5288,7 @@
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
 
-	cgroup_stat_boot();
+	cgroup_rstat_boot();
 
 	/*
 	 * The latency of the synchronize_sched() is too high for cgroups,
@@ -5335,7 +5378,7 @@
 	WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
 	WARN_ON(register_filesystem(&cgroup_fs_type));
 	WARN_ON(register_filesystem(&cgroup2_fs_type));
-	WARN_ON(!proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations));
+	WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
 
 	return 0;
 }
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c
index defad3c..d3bbb75 100644
--- a/kernel/cgroup/rdma.c
+++ b/kernel/cgroup/rdma.c
@@ -362,35 +362,32 @@
 static int parse_resource(char *c, int *intval)
 {
 	substring_t argstr;
-	const char **table = &rdmacg_resource_names[0];
 	char *name, *value = c;
 	size_t len;
-	int ret, i = 0;
+	int ret, i;
 
 	name = strsep(&value, "=");
 	if (!name || !value)
 		return -EINVAL;
 
+	i = match_string(rdmacg_resource_names, RDMACG_RESOURCE_MAX, name);
+	if (i < 0)
+		return i;
+
 	len = strlen(value);
 
-	for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
-		if (strcmp(table[i], name))
-			continue;
+	argstr.from = value;
+	argstr.to = value + len;
 
-		argstr.from = value;
-		argstr.to = value + len;
-
-		ret = match_int(&argstr, intval);
-		if (ret >= 0) {
-			if (*intval < 0)
-				break;
-			return i;
-		}
-		if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
-			*intval = S32_MAX;
-			return i;
-		}
-		break;
+	ret = match_int(&argstr, intval);
+	if (ret >= 0) {
+		if (*intval < 0)
+			return -EINVAL;
+		return i;
+	}
+	if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
+		*intval = S32_MAX;
+		return i;
 	}
 	return -EINVAL;
 }
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
new file mode 100644
index 0000000..d503d1a
--- /dev/null
+++ b/kernel/cgroup/rstat.c
@@ -0,0 +1,416 @@
+#include "cgroup-internal.h"
+
+#include <linux/sched/cputime.h>
+
+static DEFINE_SPINLOCK(cgroup_rstat_lock);
+static DEFINE_PER_CPU(raw_spinlock_t, cgroup_rstat_cpu_lock);
+
+static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu);
+
+static struct cgroup_rstat_cpu *cgroup_rstat_cpu(struct cgroup *cgrp, int cpu)
+{
+	return per_cpu_ptr(cgrp->rstat_cpu, cpu);
+}
+
+/**
+ * cgroup_rstat_updated - keep track of updated rstat_cpu
+ * @cgrp: target cgroup
+ * @cpu: cpu on which rstat_cpu was updated
+ *
+ * @cgrp's rstat_cpu on @cpu was updated.  Put it on the parent's matching
+ * rstat_cpu->updated_children list.  See the comment on top of
+ * cgroup_rstat_cpu definition for details.
+ */
+void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
+{
+	raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
+	struct cgroup *parent;
+	unsigned long flags;
+
+	/* nothing to do for root */
+	if (!cgroup_parent(cgrp))
+		return;
+
+	/*
+	 * Paired with the one in cgroup_rstat_cpu_pop_upated().  Either we
+	 * see NULL updated_next or they see our updated stat.
+	 */
+	smp_mb();
+
+	/*
+	 * Because @parent's updated_children is terminated with @parent
+	 * instead of NULL, we can tell whether @cgrp is on the list by
+	 * testing the next pointer for NULL.
+	 */
+	if (cgroup_rstat_cpu(cgrp, cpu)->updated_next)
+		return;
+
+	raw_spin_lock_irqsave(cpu_lock, flags);
+
+	/* put @cgrp and all ancestors on the corresponding updated lists */
+	for (parent = cgroup_parent(cgrp); parent;
+	     cgrp = parent, parent = cgroup_parent(cgrp)) {
+		struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
+		struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
+
+		/*
+		 * Both additions and removals are bottom-up.  If a cgroup
+		 * is already in the tree, all ancestors are.
+		 */
+		if (rstatc->updated_next)
+			break;
+
+		rstatc->updated_next = prstatc->updated_children;
+		prstatc->updated_children = cgrp;
+	}
+
+	raw_spin_unlock_irqrestore(cpu_lock, flags);
+}
+EXPORT_SYMBOL_GPL(cgroup_rstat_updated);
+
+/**
+ * cgroup_rstat_cpu_pop_updated - iterate and dismantle rstat_cpu updated tree
+ * @pos: current position
+ * @root: root of the tree to traversal
+ * @cpu: target cpu
+ *
+ * Walks the udpated rstat_cpu tree on @cpu from @root.  %NULL @pos starts
+ * the traversal and %NULL return indicates the end.  During traversal,
+ * each returned cgroup is unlinked from the tree.  Must be called with the
+ * matching cgroup_rstat_cpu_lock held.
+ *
+ * The only ordering guarantee is that, for a parent and a child pair
+ * covered by a given traversal, if a child is visited, its parent is
+ * guaranteed to be visited afterwards.
+ */
+static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
+						   struct cgroup *root, int cpu)
+{
+	struct cgroup_rstat_cpu *rstatc;
+	struct cgroup *parent;
+
+	if (pos == root)
+		return NULL;
+
+	/*
+	 * We're gonna walk down to the first leaf and visit/remove it.  We
+	 * can pick whatever unvisited node as the starting point.
+	 */
+	if (!pos)
+		pos = root;
+	else
+		pos = cgroup_parent(pos);
+
+	/* walk down to the first leaf */
+	while (true) {
+		rstatc = cgroup_rstat_cpu(pos, cpu);
+		if (rstatc->updated_children == pos)
+			break;
+		pos = rstatc->updated_children;
+	}
+
+	/*
+	 * Unlink @pos from the tree.  As the updated_children list is
+	 * singly linked, we have to walk it to find the removal point.
+	 * However, due to the way we traverse, @pos will be the first
+	 * child in most cases. The only exception is @root.
+	 */
+	parent = cgroup_parent(pos);
+	if (parent && rstatc->updated_next) {
+		struct cgroup_rstat_cpu *prstatc = cgroup_rstat_cpu(parent, cpu);
+		struct cgroup_rstat_cpu *nrstatc;
+		struct cgroup **nextp;
+
+		nextp = &prstatc->updated_children;
+		while (true) {
+			nrstatc = cgroup_rstat_cpu(*nextp, cpu);
+			if (*nextp == pos)
+				break;
+
+			WARN_ON_ONCE(*nextp == parent);
+			nextp = &nrstatc->updated_next;
+		}
+
+		*nextp = rstatc->updated_next;
+		rstatc->updated_next = NULL;
+
+		/*
+		 * Paired with the one in cgroup_rstat_cpu_updated().
+		 * Either they see NULL updated_next or we see their
+		 * updated stat.
+		 */
+		smp_mb();
+	}
+
+	return pos;
+}
+
+/* see cgroup_rstat_flush() */
+static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
+	__releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock)
+{
+	int cpu;
+
+	lockdep_assert_held(&cgroup_rstat_lock);
+
+	for_each_possible_cpu(cpu) {
+		raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
+						       cpu);
+		struct cgroup *pos = NULL;
+
+		raw_spin_lock(cpu_lock);
+		while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
+			struct cgroup_subsys_state *css;
+
+			cgroup_base_stat_flush(pos, cpu);
+
+			rcu_read_lock();
+			list_for_each_entry_rcu(css, &pos->rstat_css_list,
+						rstat_css_node)
+				css->ss->css_rstat_flush(css, cpu);
+			rcu_read_unlock();
+		}
+		raw_spin_unlock(cpu_lock);
+
+		/* if @may_sleep, play nice and yield if necessary */
+		if (may_sleep && (need_resched() ||
+				  spin_needbreak(&cgroup_rstat_lock))) {
+			spin_unlock_irq(&cgroup_rstat_lock);
+			if (!cond_resched())
+				cpu_relax();
+			spin_lock_irq(&cgroup_rstat_lock);
+		}
+	}
+}
+
+/**
+ * cgroup_rstat_flush - flush stats in @cgrp's subtree
+ * @cgrp: target cgroup
+ *
+ * Collect all per-cpu stats in @cgrp's subtree into the global counters
+ * and propagate them upwards.  After this function returns, all cgroups in
+ * the subtree have up-to-date ->stat.
+ *
+ * This also gets all cgroups in the subtree including @cgrp off the
+ * ->updated_children lists.
+ *
+ * This function may block.
+ */
+void cgroup_rstat_flush(struct cgroup *cgrp)
+{
+	might_sleep();
+
+	spin_lock_irq(&cgroup_rstat_lock);
+	cgroup_rstat_flush_locked(cgrp, true);
+	spin_unlock_irq(&cgroup_rstat_lock);
+}
+
+/**
+ * cgroup_rstat_flush_irqsafe - irqsafe version of cgroup_rstat_flush()
+ * @cgrp: target cgroup
+ *
+ * This function can be called from any context.
+ */
+void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cgroup_rstat_lock, flags);
+	cgroup_rstat_flush_locked(cgrp, false);
+	spin_unlock_irqrestore(&cgroup_rstat_lock, flags);
+}
+
+/**
+ * cgroup_rstat_flush_begin - flush stats in @cgrp's subtree and hold
+ * @cgrp: target cgroup
+ *
+ * Flush stats in @cgrp's subtree and prevent further flushes.  Must be
+ * paired with cgroup_rstat_flush_release().
+ *
+ * This function may block.
+ */
+void cgroup_rstat_flush_hold(struct cgroup *cgrp)
+	__acquires(&cgroup_rstat_lock)
+{
+	might_sleep();
+	spin_lock_irq(&cgroup_rstat_lock);
+	cgroup_rstat_flush_locked(cgrp, true);
+}
+
+/**
+ * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold()
+ */
+void cgroup_rstat_flush_release(void)
+	__releases(&cgroup_rstat_lock)
+{
+	spin_unlock_irq(&cgroup_rstat_lock);
+}
+
+int cgroup_rstat_init(struct cgroup *cgrp)
+{
+	int cpu;
+
+	/* the root cgrp has rstat_cpu preallocated */
+	if (!cgrp->rstat_cpu) {
+		cgrp->rstat_cpu = alloc_percpu(struct cgroup_rstat_cpu);
+		if (!cgrp->rstat_cpu)
+			return -ENOMEM;
+	}
+
+	/* ->updated_children list is self terminated */
+	for_each_possible_cpu(cpu) {
+		struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
+
+		rstatc->updated_children = cgrp;
+		u64_stats_init(&rstatc->bsync);
+	}
+
+	return 0;
+}
+
+void cgroup_rstat_exit(struct cgroup *cgrp)
+{
+	int cpu;
+
+	cgroup_rstat_flush(cgrp);
+
+	/* sanity check */
+	for_each_possible_cpu(cpu) {
+		struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
+
+		if (WARN_ON_ONCE(rstatc->updated_children != cgrp) ||
+		    WARN_ON_ONCE(rstatc->updated_next))
+			return;
+	}
+
+	free_percpu(cgrp->rstat_cpu);
+	cgrp->rstat_cpu = NULL;
+}
+
+void __init cgroup_rstat_boot(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		raw_spin_lock_init(per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu));
+
+	BUG_ON(cgroup_rstat_init(&cgrp_dfl_root.cgrp));
+}
+
+/*
+ * Functions for cgroup basic resource statistics implemented on top of
+ * rstat.
+ */
+static void cgroup_base_stat_accumulate(struct cgroup_base_stat *dst_bstat,
+					struct cgroup_base_stat *src_bstat)
+{
+	dst_bstat->cputime.utime += src_bstat->cputime.utime;
+	dst_bstat->cputime.stime += src_bstat->cputime.stime;
+	dst_bstat->cputime.sum_exec_runtime += src_bstat->cputime.sum_exec_runtime;
+}
+
+static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu)
+{
+	struct cgroup *parent = cgroup_parent(cgrp);
+	struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu);
+	struct task_cputime *last_cputime = &rstatc->last_bstat.cputime;
+	struct task_cputime cputime;
+	struct cgroup_base_stat delta;
+	unsigned seq;
+
+	/* fetch the current per-cpu values */
+	do {
+		seq = __u64_stats_fetch_begin(&rstatc->bsync);
+		cputime = rstatc->bstat.cputime;
+	} while (__u64_stats_fetch_retry(&rstatc->bsync, seq));
+
+	/* calculate the delta to propgate */
+	delta.cputime.utime = cputime.utime - last_cputime->utime;
+	delta.cputime.stime = cputime.stime - last_cputime->stime;
+	delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
+					 last_cputime->sum_exec_runtime;
+	*last_cputime = cputime;
+
+	/* transfer the pending stat into delta */
+	cgroup_base_stat_accumulate(&delta, &cgrp->pending_bstat);
+	memset(&cgrp->pending_bstat, 0, sizeof(cgrp->pending_bstat));
+
+	/* propagate delta into the global stat and the parent's pending */
+	cgroup_base_stat_accumulate(&cgrp->bstat, &delta);
+	if (parent)
+		cgroup_base_stat_accumulate(&parent->pending_bstat, &delta);
+}
+
+static struct cgroup_rstat_cpu *
+cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp)
+{
+	struct cgroup_rstat_cpu *rstatc;
+
+	rstatc = get_cpu_ptr(cgrp->rstat_cpu);
+	u64_stats_update_begin(&rstatc->bsync);
+	return rstatc;
+}
+
+static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
+						 struct cgroup_rstat_cpu *rstatc)
+{
+	u64_stats_update_end(&rstatc->bsync);
+	cgroup_rstat_updated(cgrp, smp_processor_id());
+	put_cpu_ptr(rstatc);
+}
+
+void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
+{
+	struct cgroup_rstat_cpu *rstatc;
+
+	rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
+	rstatc->bstat.cputime.sum_exec_runtime += delta_exec;
+	cgroup_base_stat_cputime_account_end(cgrp, rstatc);
+}
+
+void __cgroup_account_cputime_field(struct cgroup *cgrp,
+				    enum cpu_usage_stat index, u64 delta_exec)
+{
+	struct cgroup_rstat_cpu *rstatc;
+
+	rstatc = cgroup_base_stat_cputime_account_begin(cgrp);
+
+	switch (index) {
+	case CPUTIME_USER:
+	case CPUTIME_NICE:
+		rstatc->bstat.cputime.utime += delta_exec;
+		break;
+	case CPUTIME_SYSTEM:
+	case CPUTIME_IRQ:
+	case CPUTIME_SOFTIRQ:
+		rstatc->bstat.cputime.stime += delta_exec;
+		break;
+	default:
+		break;
+	}
+
+	cgroup_base_stat_cputime_account_end(cgrp, rstatc);
+}
+
+void cgroup_base_stat_cputime_show(struct seq_file *seq)
+{
+	struct cgroup *cgrp = seq_css(seq)->cgroup;
+	u64 usage, utime, stime;
+
+	if (!cgroup_parent(cgrp))
+		return;
+
+	cgroup_rstat_flush_hold(cgrp);
+	usage = cgrp->bstat.cputime.sum_exec_runtime;
+	cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime, &utime, &stime);
+	cgroup_rstat_flush_release();
+
+	do_div(usage, NSEC_PER_USEC);
+	do_div(utime, NSEC_PER_USEC);
+	do_div(stime, NSEC_PER_USEC);
+
+	seq_printf(seq, "usage_usec %llu\n"
+		   "user_usec %llu\n"
+		   "system_usec %llu\n",
+		   usage, utime, stime);
+}
diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c
deleted file mode 100644
index 1e111dd..0000000
--- a/kernel/cgroup/stat.c
+++ /dev/null
@@ -1,338 +0,0 @@
-#include "cgroup-internal.h"
-
-#include <linux/sched/cputime.h>
-
-static DEFINE_MUTEX(cgroup_stat_mutex);
-static DEFINE_PER_CPU(raw_spinlock_t, cgroup_cpu_stat_lock);
-
-static struct cgroup_cpu_stat *cgroup_cpu_stat(struct cgroup *cgrp, int cpu)
-{
-	return per_cpu_ptr(cgrp->cpu_stat, cpu);
-}
-
-/**
- * cgroup_cpu_stat_updated - keep track of updated cpu_stat
- * @cgrp: target cgroup
- * @cpu: cpu on which cpu_stat was updated
- *
- * @cgrp's cpu_stat on @cpu was updated.  Put it on the parent's matching
- * cpu_stat->updated_children list.  See the comment on top of
- * cgroup_cpu_stat definition for details.
- */
-static void cgroup_cpu_stat_updated(struct cgroup *cgrp, int cpu)
-{
-	raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
-	struct cgroup *parent;
-	unsigned long flags;
-
-	/*
-	 * Speculative already-on-list test.  This may race leading to
-	 * temporary inaccuracies, which is fine.
-	 *
-	 * Because @parent's updated_children is terminated with @parent
-	 * instead of NULL, we can tell whether @cgrp is on the list by
-	 * testing the next pointer for NULL.
-	 */
-	if (cgroup_cpu_stat(cgrp, cpu)->updated_next)
-		return;
-
-	raw_spin_lock_irqsave(cpu_lock, flags);
-
-	/* put @cgrp and all ancestors on the corresponding updated lists */
-	for (parent = cgroup_parent(cgrp); parent;
-	     cgrp = parent, parent = cgroup_parent(cgrp)) {
-		struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
-		struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
-
-		/*
-		 * Both additions and removals are bottom-up.  If a cgroup
-		 * is already in the tree, all ancestors are.
-		 */
-		if (cstat->updated_next)
-			break;
-
-		cstat->updated_next = pcstat->updated_children;
-		pcstat->updated_children = cgrp;
-	}
-
-	raw_spin_unlock_irqrestore(cpu_lock, flags);
-}
-
-/**
- * cgroup_cpu_stat_pop_updated - iterate and dismantle cpu_stat updated tree
- * @pos: current position
- * @root: root of the tree to traversal
- * @cpu: target cpu
- *
- * Walks the udpated cpu_stat tree on @cpu from @root.  %NULL @pos starts
- * the traversal and %NULL return indicates the end.  During traversal,
- * each returned cgroup is unlinked from the tree.  Must be called with the
- * matching cgroup_cpu_stat_lock held.
- *
- * The only ordering guarantee is that, for a parent and a child pair
- * covered by a given traversal, if a child is visited, its parent is
- * guaranteed to be visited afterwards.
- */
-static struct cgroup *cgroup_cpu_stat_pop_updated(struct cgroup *pos,
-						  struct cgroup *root, int cpu)
-{
-	struct cgroup_cpu_stat *cstat;
-	struct cgroup *parent;
-
-	if (pos == root)
-		return NULL;
-
-	/*
-	 * We're gonna walk down to the first leaf and visit/remove it.  We
-	 * can pick whatever unvisited node as the starting point.
-	 */
-	if (!pos)
-		pos = root;
-	else
-		pos = cgroup_parent(pos);
-
-	/* walk down to the first leaf */
-	while (true) {
-		cstat = cgroup_cpu_stat(pos, cpu);
-		if (cstat->updated_children == pos)
-			break;
-		pos = cstat->updated_children;
-	}
-
-	/*
-	 * Unlink @pos from the tree.  As the updated_children list is
-	 * singly linked, we have to walk it to find the removal point.
-	 * However, due to the way we traverse, @pos will be the first
-	 * child in most cases. The only exception is @root.
-	 */
-	parent = cgroup_parent(pos);
-	if (parent && cstat->updated_next) {
-		struct cgroup_cpu_stat *pcstat = cgroup_cpu_stat(parent, cpu);
-		struct cgroup_cpu_stat *ncstat;
-		struct cgroup **nextp;
-
-		nextp = &pcstat->updated_children;
-		while (true) {
-			ncstat = cgroup_cpu_stat(*nextp, cpu);
-			if (*nextp == pos)
-				break;
-
-			WARN_ON_ONCE(*nextp == parent);
-			nextp = &ncstat->updated_next;
-		}
-
-		*nextp = cstat->updated_next;
-		cstat->updated_next = NULL;
-	}
-
-	return pos;
-}
-
-static void cgroup_stat_accumulate(struct cgroup_stat *dst_stat,
-				   struct cgroup_stat *src_stat)
-{
-	dst_stat->cputime.utime += src_stat->cputime.utime;
-	dst_stat->cputime.stime += src_stat->cputime.stime;
-	dst_stat->cputime.sum_exec_runtime += src_stat->cputime.sum_exec_runtime;
-}
-
-static void cgroup_cpu_stat_flush_one(struct cgroup *cgrp, int cpu)
-{
-	struct cgroup *parent = cgroup_parent(cgrp);
-	struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
-	struct task_cputime *last_cputime = &cstat->last_cputime;
-	struct task_cputime cputime;
-	struct cgroup_stat delta;
-	unsigned seq;
-
-	lockdep_assert_held(&cgroup_stat_mutex);
-
-	/* fetch the current per-cpu values */
-	do {
-		seq = __u64_stats_fetch_begin(&cstat->sync);
-		cputime = cstat->cputime;
-	} while (__u64_stats_fetch_retry(&cstat->sync, seq));
-
-	/* accumulate the deltas to propgate */
-	delta.cputime.utime = cputime.utime - last_cputime->utime;
-	delta.cputime.stime = cputime.stime - last_cputime->stime;
-	delta.cputime.sum_exec_runtime = cputime.sum_exec_runtime -
-					 last_cputime->sum_exec_runtime;
-	*last_cputime = cputime;
-
-	/* transfer the pending stat into delta */
-	cgroup_stat_accumulate(&delta, &cgrp->pending_stat);
-	memset(&cgrp->pending_stat, 0, sizeof(cgrp->pending_stat));
-
-	/* propagate delta into the global stat and the parent's pending */
-	cgroup_stat_accumulate(&cgrp->stat, &delta);
-	if (parent)
-		cgroup_stat_accumulate(&parent->pending_stat, &delta);
-}
-
-/* see cgroup_stat_flush() */
-static void cgroup_stat_flush_locked(struct cgroup *cgrp)
-{
-	int cpu;
-
-	lockdep_assert_held(&cgroup_stat_mutex);
-
-	for_each_possible_cpu(cpu) {
-		raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_cpu_stat_lock, cpu);
-		struct cgroup *pos = NULL;
-
-		raw_spin_lock_irq(cpu_lock);
-		while ((pos = cgroup_cpu_stat_pop_updated(pos, cgrp, cpu)))
-			cgroup_cpu_stat_flush_one(pos, cpu);
-		raw_spin_unlock_irq(cpu_lock);
-	}
-}
-
-/**
- * cgroup_stat_flush - flush stats in @cgrp's subtree
- * @cgrp: target cgroup
- *
- * Collect all per-cpu stats in @cgrp's subtree into the global counters
- * and propagate them upwards.  After this function returns, all cgroups in
- * the subtree have up-to-date ->stat.
- *
- * This also gets all cgroups in the subtree including @cgrp off the
- * ->updated_children lists.
- */
-void cgroup_stat_flush(struct cgroup *cgrp)
-{
-	mutex_lock(&cgroup_stat_mutex);
-	cgroup_stat_flush_locked(cgrp);
-	mutex_unlock(&cgroup_stat_mutex);
-}
-
-static struct cgroup_cpu_stat *cgroup_cpu_stat_account_begin(struct cgroup *cgrp)
-{
-	struct cgroup_cpu_stat *cstat;
-
-	cstat = get_cpu_ptr(cgrp->cpu_stat);
-	u64_stats_update_begin(&cstat->sync);
-	return cstat;
-}
-
-static void cgroup_cpu_stat_account_end(struct cgroup *cgrp,
-					struct cgroup_cpu_stat *cstat)
-{
-	u64_stats_update_end(&cstat->sync);
-	cgroup_cpu_stat_updated(cgrp, smp_processor_id());
-	put_cpu_ptr(cstat);
-}
-
-void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec)
-{
-	struct cgroup_cpu_stat *cstat;
-
-	cstat = cgroup_cpu_stat_account_begin(cgrp);
-	cstat->cputime.sum_exec_runtime += delta_exec;
-	cgroup_cpu_stat_account_end(cgrp, cstat);
-}
-
-void __cgroup_account_cputime_field(struct cgroup *cgrp,
-				    enum cpu_usage_stat index, u64 delta_exec)
-{
-	struct cgroup_cpu_stat *cstat;
-
-	cstat = cgroup_cpu_stat_account_begin(cgrp);
-
-	switch (index) {
-	case CPUTIME_USER:
-	case CPUTIME_NICE:
-		cstat->cputime.utime += delta_exec;
-		break;
-	case CPUTIME_SYSTEM:
-	case CPUTIME_IRQ:
-	case CPUTIME_SOFTIRQ:
-		cstat->cputime.stime += delta_exec;
-		break;
-	default:
-		break;
-	}
-
-	cgroup_cpu_stat_account_end(cgrp, cstat);
-}
-
-void cgroup_stat_show_cputime(struct seq_file *seq)
-{
-	struct cgroup *cgrp = seq_css(seq)->cgroup;
-	u64 usage, utime, stime;
-
-	if (!cgroup_parent(cgrp))
-		return;
-
-	mutex_lock(&cgroup_stat_mutex);
-
-	cgroup_stat_flush_locked(cgrp);
-
-	usage = cgrp->stat.cputime.sum_exec_runtime;
-	cputime_adjust(&cgrp->stat.cputime, &cgrp->stat.prev_cputime,
-		       &utime, &stime);
-
-	mutex_unlock(&cgroup_stat_mutex);
-
-	do_div(usage, NSEC_PER_USEC);
-	do_div(utime, NSEC_PER_USEC);
-	do_div(stime, NSEC_PER_USEC);
-
-	seq_printf(seq, "usage_usec %llu\n"
-		   "user_usec %llu\n"
-		   "system_usec %llu\n",
-		   usage, utime, stime);
-}
-
-int cgroup_stat_init(struct cgroup *cgrp)
-{
-	int cpu;
-
-	/* the root cgrp has cpu_stat preallocated */
-	if (!cgrp->cpu_stat) {
-		cgrp->cpu_stat = alloc_percpu(struct cgroup_cpu_stat);
-		if (!cgrp->cpu_stat)
-			return -ENOMEM;
-	}
-
-	/* ->updated_children list is self terminated */
-	for_each_possible_cpu(cpu) {
-		struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
-
-		cstat->updated_children = cgrp;
-		u64_stats_init(&cstat->sync);
-	}
-
-	prev_cputime_init(&cgrp->stat.prev_cputime);
-
-	return 0;
-}
-
-void cgroup_stat_exit(struct cgroup *cgrp)
-{
-	int cpu;
-
-	cgroup_stat_flush(cgrp);
-
-	/* sanity check */
-	for_each_possible_cpu(cpu) {
-		struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
-
-		if (WARN_ON_ONCE(cstat->updated_children != cgrp) ||
-		    WARN_ON_ONCE(cstat->updated_next))
-			return;
-	}
-
-	free_percpu(cgrp->cpu_stat);
-	cgrp->cpu_stat = NULL;
-}
-
-void __init cgroup_stat_boot(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		raw_spin_lock_init(per_cpu_ptr(&cgroup_cpu_stat_lock, cpu));
-
-	BUG_ON(cgroup_stat_init(&cgrp_dfl_root.cgrp));
-}
diff --git a/kernel/compat.c b/kernel/compat.c
index 92d8c98..702aa84 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -121,50 +121,6 @@
 			__put_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
 }
 
-static int __compat_get_timespec64(struct timespec64 *ts64,
-				   const struct compat_timespec __user *cts)
-{
-	struct compat_timespec ts;
-	int ret;
-
-	ret = copy_from_user(&ts, cts, sizeof(ts));
-	if (ret)
-		return -EFAULT;
-
-	ts64->tv_sec = ts.tv_sec;
-	ts64->tv_nsec = ts.tv_nsec;
-
-	return 0;
-}
-
-static int __compat_put_timespec64(const struct timespec64 *ts64,
-				   struct compat_timespec __user *cts)
-{
-	struct compat_timespec ts = {
-		.tv_sec = ts64->tv_sec,
-		.tv_nsec = ts64->tv_nsec
-	};
-	return copy_to_user(cts, &ts, sizeof(ts)) ? -EFAULT : 0;
-}
-
-int compat_get_timespec64(struct timespec64 *ts, const void __user *uts)
-{
-	if (COMPAT_USE_64BIT_TIME)
-		return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0;
-	else
-		return __compat_get_timespec64(ts, uts);
-}
-EXPORT_SYMBOL_GPL(compat_get_timespec64);
-
-int compat_put_timespec64(const struct timespec64 *ts, void __user *uts)
-{
-	if (COMPAT_USE_64BIT_TIME)
-		return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0;
-	else
-		return __compat_put_timespec64(ts, uts);
-}
-EXPORT_SYMBOL_GPL(compat_put_timespec64);
-
 int compat_get_timeval(struct timeval *tv, const void __user *utv)
 {
 	if (COMPAT_USE_64BIT_TIME)
@@ -368,6 +324,14 @@
 	return ret;
 }
 
+/* Todo: Delete these extern declarations when get/put_compat_itimerspec64()
+ * are moved to kernel/time/time.c .
+ */
+extern int __compat_get_timespec64(struct timespec64 *ts64,
+				   const struct compat_timespec __user *cts);
+extern int __compat_put_timespec64(const struct timespec64 *ts64,
+				   struct compat_timespec __user *cts);
+
 int get_compat_itimerspec64(struct itimerspec64 *its,
 			const struct compat_itimerspec __user *uits)
 {
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index e2764d7..ca8ac28 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -44,23 +44,24 @@
 {
 	tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
 	if (tsk->delays)
-		spin_lock_init(&tsk->delays->lock);
+		raw_spin_lock_init(&tsk->delays->lock);
 }
 
 /*
  * Finish delay accounting for a statistic using its timestamps (@start),
  * accumalator (@total) and @count
  */
-static void delayacct_end(spinlock_t *lock, u64 *start, u64 *total, u32 *count)
+static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total,
+			  u32 *count)
 {
 	s64 ns = ktime_get_ns() - *start;
 	unsigned long flags;
 
 	if (ns > 0) {
-		spin_lock_irqsave(lock, flags);
+		raw_spin_lock_irqsave(lock, flags);
 		*total += ns;
 		(*count)++;
-		spin_unlock_irqrestore(lock, flags);
+		raw_spin_unlock_irqrestore(lock, flags);
 	}
 }
 
@@ -127,7 +128,7 @@
 
 	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
 
-	spin_lock_irqsave(&tsk->delays->lock, flags);
+	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
 	tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
 	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
 	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
@@ -137,7 +138,7 @@
 	d->blkio_count += tsk->delays->blkio_count;
 	d->swapin_count += tsk->delays->swapin_count;
 	d->freepages_count += tsk->delays->freepages_count;
-	spin_unlock_irqrestore(&tsk->delays->lock, flags);
+	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
 
 	return 0;
 }
@@ -147,10 +148,10 @@
 	__u64 ret;
 	unsigned long flags;
 
-	spin_lock_irqsave(&tsk->delays->lock, flags);
+	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
 	ret = nsec_to_clock_t(tsk->delays->blkio_delay +
 				tsk->delays->swapin_delay);
-	spin_unlock_irqrestore(&tsk->delays->lock, flags);
+	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
 	return ret;
 }
 
diff --git a/kernel/dma.c b/kernel/dma.c
index 3506fc3..40f1529 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -135,21 +135,9 @@
 }
 #endif /* MAX_DMA_CHANNELS */
 
-static int proc_dma_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, proc_dma_show, NULL);
-}
-
-static const struct file_operations proc_dma_operations = {
-	.open		= proc_dma_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init proc_dma_init(void)
 {
-	proc_create("dma", 0, NULL, &proc_dma_operations);
+	proc_create_single("dma", 0, NULL, proc_dma_show);
 	return 0;
 }
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6eeab86..80cca2b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5120,6 +5120,8 @@
 	switch (_IOC_NR(cmd)) {
 	case _IOC_NR(PERF_EVENT_IOC_SET_FILTER):
 	case _IOC_NR(PERF_EVENT_IOC_ID):
+	case _IOC_NR(PERF_EVENT_IOC_QUERY_BPF):
+	case _IOC_NR(PERF_EVENT_IOC_MODIFY_ATTRIBUTES):
 		/* Fix up pointer size (usually 4 -> 8 in 32-on-64-bit case */
 		if (_IOC_SIZE(cmd) == sizeof(compat_uptr_t)) {
 			cmd &= ~IOCSIZE_MASK;
@@ -6668,7 +6670,7 @@
 
 	raw_spin_lock_irqsave(&ifh->lock, flags);
 	list_for_each_entry(filter, &ifh->list, entry) {
-		if (filter->inode) {
+		if (filter->path.dentry) {
 			event->addr_filters_offs[count] = 0;
 			restart++;
 		}
@@ -7333,7 +7335,7 @@
 				     struct file *file, unsigned long offset,
 				     unsigned long size)
 {
-	if (filter->inode != file_inode(file))
+	if (d_inode(filter->path.dentry) != file_inode(file))
 		return false;
 
 	if (filter->offset > offset + size)
@@ -8686,8 +8688,7 @@
 	struct perf_addr_filter *filter, *iter;
 
 	list_for_each_entry_safe(filter, iter, filters, entry) {
-		if (filter->inode)
-			iput(filter->inode);
+		path_put(&filter->path);
 		list_del(&filter->entry);
 		kfree(filter);
 	}
@@ -8784,7 +8785,7 @@
 		 * Adjust base offset if the filter is associated to a binary
 		 * that needs to be mapped:
 		 */
-		if (filter->inode)
+		if (filter->path.dentry)
 			event->addr_filters_offs[count] =
 				perf_addr_filter_apply(filter, mm);
 
@@ -8858,7 +8859,6 @@
 {
 	struct perf_addr_filter *filter = NULL;
 	char *start, *orig, *filename = NULL;
-	struct path path;
 	substring_t args[MAX_OPT_ARGS];
 	int state = IF_STATE_ACTION, token;
 	unsigned int kernel = 0;
@@ -8971,19 +8971,18 @@
 					goto fail_free_name;
 
 				/* look up the path and grab its inode */
-				ret = kern_path(filename, LOOKUP_FOLLOW, &path);
+				ret = kern_path(filename, LOOKUP_FOLLOW,
+						&filter->path);
 				if (ret)
 					goto fail_free_name;
 
-				filter->inode = igrab(d_inode(path.dentry));
-				path_put(&path);
 				kfree(filename);
 				filename = NULL;
 
 				ret = -EINVAL;
-				if (!filter->inode ||
-				    !S_ISREG(filter->inode->i_mode))
-					/* free_filters_list() will iput() */
+				if (!filter->path.dentry ||
+				    !S_ISREG(d_inode(filter->path.dentry)
+					     ->i_mode))
 					goto fail;
 
 				event->addr_filters.nr_file_filters++;
@@ -10521,19 +10520,20 @@
 	if (pmu->task_ctx_nr == perf_sw_context)
 		event->event_caps |= PERF_EV_CAP_SOFTWARE;
 
-	if (group_leader &&
-	    (is_software_event(event) != is_software_event(group_leader))) {
-		if (is_software_event(event)) {
+	if (group_leader) {
+		if (is_software_event(event) &&
+		    !in_software_context(group_leader)) {
 			/*
-			 * If event and group_leader are not both a software
-			 * event, and event is, then group leader is not.
+			 * If the event is a sw event, but the group_leader
+			 * is on hw context.
 			 *
-			 * Allow the addition of software events to !software
-			 * groups, this is safe because software events never
-			 * fail to schedule.
+			 * Allow the addition of software events to hw
+			 * groups, this is safe because software events
+			 * never fail to schedule.
 			 */
-			pmu = group_leader->pmu;
-		} else if (is_software_event(group_leader) &&
+			pmu = group_leader->ctx->pmu;
+		} else if (!is_software_event(event) &&
+			   is_software_event(group_leader) &&
 			   (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
 			/*
 			 * In case the group is a pure software group, and we
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index a569711..33f07c5 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -27,21 +27,9 @@
 	return 0;
 }
 
-static int execdomains_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, execdomains_proc_show, NULL);
-}
-
-static const struct file_operations execdomains_proc_fops = {
-	.open		= execdomains_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int __init proc_execdomains_init(void)
 {
-	proc_create("execdomains", 0, NULL, &execdomains_proc_fops);
+	proc_create_single("execdomains", 0, NULL, execdomains_proc_show);
 	return 0;
 }
 module_init(proc_execdomains_init);
diff --git a/kernel/fork.c b/kernel/fork.c
index a5d21c4..80b48a8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1712,7 +1712,7 @@
 	p->start_time = ktime_get_ns();
 	p->real_start_time = ktime_get_boot_ns();
 	p->io_context = NULL;
-	p->audit_context = NULL;
+	audit_set_context(p, NULL);
 	cgroup_fork(p);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c
index fc4f361..dd20d0d 100644
--- a/kernel/irq/irq_sim.c
+++ b/kernel/irq/irq_sim.c
@@ -1,11 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- * Copyright (C) 2017 Bartosz Golaszewski <brgl@bgdev.pl>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
+ * Copyright (C) 2017-2018 Bartosz Golaszewski <brgl@bgdev.pl>
  */
 
 #include <linux/slab.h>
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 2a8571f..4ca2fd4 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -76,6 +76,19 @@
 	data->chip->irq_write_msi_msg(data, msg);
 }
 
+static void msi_check_level(struct irq_domain *domain, struct msi_msg *msg)
+{
+	struct msi_domain_info *info = domain->host_data;
+
+	/*
+	 * If the MSI provider has messed with the second message and
+	 * not advertized that it is level-capable, signal the breakage.
+	 */
+	WARN_ON(!((info->flags & MSI_FLAG_LEVEL_CAPABLE) &&
+		  (info->chip->flags & IRQCHIP_SUPPORTS_LEVEL_MSI)) &&
+		(msg[1].address_lo || msg[1].address_hi || msg[1].data));
+}
+
 /**
  * msi_domain_set_affinity - Generic affinity setter function for MSI domains
  * @irq_data:	The irq data associated to the interrupt
@@ -89,13 +102,14 @@
 			    const struct cpumask *mask, bool force)
 {
 	struct irq_data *parent = irq_data->parent_data;
-	struct msi_msg msg;
+	struct msi_msg msg[2] = { [1] = { }, };
 	int ret;
 
 	ret = parent->chip->irq_set_affinity(parent, mask, force);
 	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
-		BUG_ON(irq_chip_compose_msi_msg(irq_data, &msg));
-		irq_chip_write_msi_msg(irq_data, &msg);
+		BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
+		msi_check_level(irq_data->domain, msg);
+		irq_chip_write_msi_msg(irq_data, msg);
 	}
 
 	return ret;
@@ -104,20 +118,21 @@
 static int msi_domain_activate(struct irq_domain *domain,
 			       struct irq_data *irq_data, bool early)
 {
-	struct msi_msg msg;
+	struct msi_msg msg[2] = { [1] = { }, };
 
-	BUG_ON(irq_chip_compose_msi_msg(irq_data, &msg));
-	irq_chip_write_msi_msg(irq_data, &msg);
+	BUG_ON(irq_chip_compose_msi_msg(irq_data, msg));
+	msi_check_level(irq_data->domain, msg);
+	irq_chip_write_msi_msg(irq_data, msg);
 	return 0;
 }
 
 static void msi_domain_deactivate(struct irq_domain *domain,
 				  struct irq_data *irq_data)
 {
-	struct msi_msg msg;
+	struct msi_msg msg[2];
 
-	memset(&msg, 0, sizeof(msg));
-	irq_chip_write_msi_msg(irq_data, &msg);
+	memset(msg, 0, sizeof(msg));
+	irq_chip_write_msi_msg(irq_data, msg);
 }
 
 static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 7cb091d..37eda10 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -185,11 +185,6 @@
 	return single_open(file, irq_affinity_list_proc_show, PDE_DATA(inode));
 }
 
-static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, irq_affinity_hint_proc_show, PDE_DATA(inode));
-}
-
 static const struct file_operations irq_affinity_proc_fops = {
 	.open		= irq_affinity_proc_open,
 	.read		= seq_read,
@@ -198,13 +193,6 @@
 	.write		= irq_affinity_proc_write,
 };
 
-static const struct file_operations irq_affinity_hint_proc_fops = {
-	.open		= irq_affinity_hint_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static const struct file_operations irq_affinity_list_proc_fops = {
 	.open		= irq_affinity_list_proc_open,
 	.read		= seq_read,
@@ -223,32 +211,6 @@
 {
 	return show_irq_affinity(EFFECTIVE_LIST, m);
 }
-
-static int irq_effective_aff_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, irq_effective_aff_proc_show, PDE_DATA(inode));
-}
-
-static int irq_effective_aff_list_proc_open(struct inode *inode,
-					    struct file *file)
-{
-	return single_open(file, irq_effective_aff_list_proc_show,
-			   PDE_DATA(inode));
-}
-
-static const struct file_operations irq_effective_aff_proc_fops = {
-	.open		= irq_effective_aff_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static const struct file_operations irq_effective_aff_list_proc_fops = {
-	.open		= irq_effective_aff_list_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
 
 static int default_affinity_show(struct seq_file *m, void *v)
@@ -313,18 +275,6 @@
 	seq_printf(m, "%d\n", irq_desc_get_node(desc));
 	return 0;
 }
-
-static int irq_node_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, irq_node_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations irq_node_proc_fops = {
-	.open		= irq_node_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
 
 static int irq_spurious_proc_show(struct seq_file *m, void *v)
@@ -337,18 +287,6 @@
 	return 0;
 }
 
-static int irq_spurious_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, irq_spurious_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations irq_spurious_proc_fops = {
-	.open		= irq_spurious_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 #define MAX_NAMELEN 128
 
 static int name_unique(unsigned int irq, struct irqaction *new_action)
@@ -421,24 +359,24 @@
 			 &irq_affinity_proc_fops, irqp);
 
 	/* create /proc/irq/<irq>/affinity_hint */
-	proc_create_data("affinity_hint", 0444, desc->dir,
-			 &irq_affinity_hint_proc_fops, irqp);
+	proc_create_single_data("affinity_hint", 0444, desc->dir,
+			irq_affinity_hint_proc_show, irqp);
 
 	/* create /proc/irq/<irq>/smp_affinity_list */
 	proc_create_data("smp_affinity_list", 0644, desc->dir,
 			 &irq_affinity_list_proc_fops, irqp);
 
-	proc_create_data("node", 0444, desc->dir,
-			 &irq_node_proc_fops, irqp);
+	proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show,
+			irqp);
 # ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
-	proc_create_data("effective_affinity", 0444, desc->dir,
-			 &irq_effective_aff_proc_fops, irqp);
-	proc_create_data("effective_affinity_list", 0444, desc->dir,
-			 &irq_effective_aff_list_proc_fops, irqp);
+	proc_create_single_data("effective_affinity", 0444, desc->dir,
+			irq_effective_aff_proc_show, irqp);
+	proc_create_single_data("effective_affinity_list", 0444, desc->dir,
+			irq_effective_aff_list_proc_show, irqp);
 # endif
 #endif
-	proc_create_data("spurious", 0444, desc->dir,
-			 &irq_spurious_proc_fops, (void *)(long)irq);
+	proc_create_single_data("spurious", 0444, desc->dir,
+			irq_spurious_proc_show, (void *)(long)irq);
 
 out_unlock:
 	mutex_unlock(&register_lock);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 0233863..edcac5d 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -561,20 +561,24 @@
 	printk(KERN_CONT ", at: %pS\n", (void *)hlock->acquire_ip);
 }
 
-static void lockdep_print_held_locks(struct task_struct *curr)
+static void lockdep_print_held_locks(struct task_struct *p)
 {
-	int i, depth = curr->lockdep_depth;
+	int i, depth = READ_ONCE(p->lockdep_depth);
 
-	if (!depth) {
-		printk("no locks held by %s/%d.\n", curr->comm, task_pid_nr(curr));
+	if (!depth)
+		printk("no locks held by %s/%d.\n", p->comm, task_pid_nr(p));
+	else
+		printk("%d lock%s held by %s/%d:\n", depth,
+		       depth > 1 ? "s" : "", p->comm, task_pid_nr(p));
+	/*
+	 * It's not reliable to print a task's held locks if it's not sleeping
+	 * and it's not the current task.
+	 */
+	if (p->state == TASK_RUNNING && p != current)
 		return;
-	}
-	printk("%d lock%s held by %s/%d:\n",
-		depth, depth > 1 ? "s" : "", curr->comm, task_pid_nr(curr));
-
 	for (i = 0; i < depth; i++) {
 		printk(" #%d: ", i);
-		print_lock(curr->held_locks + i);
+		print_lock(p->held_locks + i);
 	}
 }
 
@@ -4451,8 +4455,6 @@
 void debug_show_all_locks(void)
 {
 	struct task_struct *g, *p;
-	int count = 10;
-	int unlock = 1;
 
 	if (unlikely(!debug_locks)) {
 		pr_warn("INFO: lockdep is turned off.\n");
@@ -4460,50 +4462,18 @@
 	}
 	pr_warn("\nShowing all locks held in the system:\n");
 
-	/*
-	 * Here we try to get the tasklist_lock as hard as possible,
-	 * if not successful after 2 seconds we ignore it (but keep
-	 * trying). This is to enable a debug printout even if a
-	 * tasklist_lock-holding task deadlocks or crashes.
-	 */
-retry:
-	if (!read_trylock(&tasklist_lock)) {
-		if (count == 10)
-			pr_warn("hm, tasklist_lock locked, retrying... ");
-		if (count) {
-			count--;
-			pr_cont(" #%d", 10-count);
-			mdelay(200);
-			goto retry;
-		}
-		pr_cont(" ignoring it.\n");
-		unlock = 0;
-	} else {
-		if (count != 10)
-			pr_cont(" locked it.\n");
-	}
-
-	do_each_thread(g, p) {
-		/*
-		 * It's not reliable to print a task's held locks
-		 * if it's not sleeping (or if it's not the current
-		 * task):
-		 */
-		if (p->state == TASK_RUNNING && p != current)
+	rcu_read_lock();
+	for_each_process_thread(g, p) {
+		if (!p->lockdep_depth)
 			continue;
-		if (p->lockdep_depth)
-			lockdep_print_held_locks(p);
-		if (!unlock)
-			if (read_trylock(&tasklist_lock))
-				unlock = 1;
+		lockdep_print_held_locks(p);
 		touch_nmi_watchdog();
-	} while_each_thread(g, p);
+		touch_all_softlockup_watchdogs();
+	}
+	rcu_read_unlock();
 
 	pr_warn("\n");
 	pr_warn("=============================================\n\n");
-
-	if (unlock)
-		read_unlock(&tasklist_lock);
 }
 EXPORT_SYMBOL_GPL(debug_show_all_locks);
 #endif
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index ad69bbc..3dd980d 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -101,18 +101,6 @@
 	.show	= l_show,
 };
 
-static int lockdep_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &lockdep_ops);
-}
-
-static const struct file_operations proc_lockdep_operations = {
-	.open		= lockdep_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 #ifdef CONFIG_PROVE_LOCKING
 static void *lc_start(struct seq_file *m, loff_t *pos)
 {
@@ -170,18 +158,6 @@
 	.stop	= lc_stop,
 	.show	= lc_show,
 };
-
-static int lockdep_chains_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &lockdep_chains_ops);
-}
-
-static const struct file_operations proc_lockdep_chains_operations = {
-	.open		= lockdep_chains_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
 #endif /* CONFIG_PROVE_LOCKING */
 
 static void lockdep_stats_debug_show(struct seq_file *m)
@@ -355,18 +331,6 @@
 	return 0;
 }
 
-static int lockdep_stats_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, lockdep_stats_show, NULL);
-}
-
-static const struct file_operations proc_lockdep_stats_operations = {
-	.open		= lockdep_stats_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 #ifdef CONFIG_LOCK_STAT
 
 struct lock_stat_data {
@@ -682,14 +646,11 @@
 
 static int __init lockdep_proc_init(void)
 {
-	proc_create("lockdep", S_IRUSR, NULL, &proc_lockdep_operations);
+	proc_create_seq("lockdep", S_IRUSR, NULL, &lockdep_ops);
 #ifdef CONFIG_PROVE_LOCKING
-	proc_create("lockdep_chains", S_IRUSR, NULL,
-		    &proc_lockdep_chains_operations);
+	proc_create_seq("lockdep_chains", S_IRUSR, NULL, &lockdep_chains_ops);
 #endif
-	proc_create("lockdep_stats", S_IRUSR, NULL,
-		    &proc_lockdep_stats_operations);
-
+	proc_create_single("lockdep_stats", S_IRUSR, NULL, lockdep_stats_show);
 #ifdef CONFIG_LOCK_STAT
 	proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
 		    &proc_lock_stat_operations);
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index f046b7c..5e10153 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -23,13 +23,15 @@
 
 #ifndef arch_mcs_spin_lock_contended
 /*
- * Using smp_load_acquire() provides a memory barrier that ensures
- * subsequent operations happen after the lock is acquired.
+ * Using smp_cond_load_acquire() provides the acquire semantics
+ * required so that subsequent operations happen after the
+ * lock is acquired. Additionally, some architectures such as
+ * ARM64 would like to do spin-waiting instead of purely
+ * spinning, and smp_cond_load_acquire() provides that behavior.
  */
 #define arch_mcs_spin_lock_contended(l)					\
 do {									\
-	while (!(smp_load_acquire(l)))					\
-		cpu_relax();						\
+	smp_cond_load_acquire(l, VAL);					\
 } while (0)
 #endif
 
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 2048359..f44f658 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -139,8 +139,9 @@
 static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
 {
 	unsigned long curr = (unsigned long)current;
+	unsigned long zero = 0UL;
 
-	if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr))
+	if (atomic_long_try_cmpxchg_acquire(&lock->owner, &zero, curr))
 		return true;
 
 	return false;
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index d880296..bfaeb05 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -12,11 +12,11 @@
  * GNU General Public License for more details.
  *
  * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
- * (C) Copyright 2013-2014 Red Hat, Inc.
+ * (C) Copyright 2013-2014,2018 Red Hat, Inc.
  * (C) Copyright 2015 Intel Corp.
  * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
  *
- * Authors: Waiman Long <waiman.long@hpe.com>
+ * Authors: Waiman Long <longman@redhat.com>
  *          Peter Zijlstra <peterz@infradead.org>
  */
 
@@ -33,6 +33,11 @@
 #include <asm/qspinlock.h>
 
 /*
+ * Include queued spinlock statistics code
+ */
+#include "qspinlock_stat.h"
+
+/*
  * The basic principle of a queue-based spinlock can best be understood
  * by studying a classic queue-based spinlock implementation called the
  * MCS lock. The paper below provides a good description for this kind
@@ -77,6 +82,18 @@
 #endif
 
 /*
+ * The pending bit spinning loop count.
+ * This heuristic is used to limit the number of lockword accesses
+ * made by atomic_cond_read_relaxed when waiting for the lock to
+ * transition out of the "== _Q_PENDING_VAL" state. We don't spin
+ * indefinitely because there's no guarantee that we'll make forward
+ * progress.
+ */
+#ifndef _Q_PENDING_LOOPS
+#define _Q_PENDING_LOOPS	1
+#endif
+
+/*
  * Per-CPU queue node structures; we can never have more than 4 nested
  * contexts: task, softirq, hardirq, nmi.
  *
@@ -114,42 +131,19 @@
 
 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
 
-/*
- * By using the whole 2nd least significant byte for the pending bit, we
- * can allow better optimization of the lock acquisition for the pending
- * bit holder.
- *
- * This internal structure is also used by the set_locked function which
- * is not restricted to _Q_PENDING_BITS == 8.
- */
-struct __qspinlock {
-	union {
-		atomic_t val;
-#ifdef __LITTLE_ENDIAN
-		struct {
-			u8	locked;
-			u8	pending;
-		};
-		struct {
-			u16	locked_pending;
-			u16	tail;
-		};
-#else
-		struct {
-			u16	tail;
-			u16	locked_pending;
-		};
-		struct {
-			u8	reserved[2];
-			u8	pending;
-			u8	locked;
-		};
-#endif
-	};
-};
-
 #if _Q_PENDING_BITS == 8
 /**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,* -> *,0,*
+ */
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+	WRITE_ONCE(lock->pending, 0);
+}
+
+/**
  * clear_pending_set_locked - take ownership and clear the pending bit.
  * @lock: Pointer to queued spinlock structure
  *
@@ -159,9 +153,7 @@
  */
 static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
 {
-	struct __qspinlock *l = (void *)lock;
-
-	WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
+	WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
 }
 
 /*
@@ -176,19 +168,28 @@
  */
 static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 {
-	struct __qspinlock *l = (void *)lock;
-
 	/*
-	 * Use release semantics to make sure that the MCS node is properly
-	 * initialized before changing the tail code.
+	 * We can use relaxed semantics since the caller ensures that the
+	 * MCS node is properly initialized before updating the tail.
 	 */
-	return (u32)xchg_release(&l->tail,
+	return (u32)xchg_relaxed(&lock->tail,
 				 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
 }
 
 #else /* _Q_PENDING_BITS == 8 */
 
 /**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,* -> *,0,*
+ */
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+	atomic_andnot(_Q_PENDING_VAL, &lock->val);
+}
+
+/**
  * clear_pending_set_locked - take ownership and clear the pending bit.
  * @lock: Pointer to queued spinlock structure
  *
@@ -216,10 +217,11 @@
 	for (;;) {
 		new = (val & _Q_LOCKED_PENDING_MASK) | tail;
 		/*
-		 * Use release semantics to make sure that the MCS node is
-		 * properly initialized before changing the tail code.
+		 * We can use relaxed semantics since the caller ensures that
+		 * the MCS node is properly initialized before updating the
+		 * tail.
 		 */
-		old = atomic_cmpxchg_release(&lock->val, val, new);
+		old = atomic_cmpxchg_relaxed(&lock->val, val, new);
 		if (old == val)
 			break;
 
@@ -237,9 +239,7 @@
  */
 static __always_inline void set_locked(struct qspinlock *lock)
 {
-	struct __qspinlock *l = (void *)lock;
-
-	WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
+	WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
 }
 
 
@@ -294,86 +294,83 @@
 void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 {
 	struct mcs_spinlock *prev, *next, *node;
-	u32 new, old, tail;
+	u32 old, tail;
 	int idx;
 
 	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
 
 	if (pv_enabled())
-		goto queue;
+		goto pv_queue;
 
 	if (virt_spin_lock(lock))
 		return;
 
 	/*
-	 * wait for in-progress pending->locked hand-overs
+	 * Wait for in-progress pending->locked hand-overs with a bounded
+	 * number of spins so that we guarantee forward progress.
 	 *
 	 * 0,1,0 -> 0,0,1
 	 */
 	if (val == _Q_PENDING_VAL) {
-		while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
-			cpu_relax();
+		int cnt = _Q_PENDING_LOOPS;
+		val = atomic_cond_read_relaxed(&lock->val,
+					       (VAL != _Q_PENDING_VAL) || !cnt--);
 	}
 
 	/*
+	 * If we observe any contention; queue.
+	 */
+	if (val & ~_Q_LOCKED_MASK)
+		goto queue;
+
+	/*
 	 * trylock || pending
 	 *
 	 * 0,0,0 -> 0,0,1 ; trylock
 	 * 0,0,1 -> 0,1,1 ; pending
 	 */
-	for (;;) {
+	val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
+	if (!(val & ~_Q_LOCKED_MASK)) {
 		/*
-		 * If we observe any contention; queue.
+		 * We're pending, wait for the owner to go away.
+		 *
+		 * *,1,1 -> *,1,0
+		 *
+		 * this wait loop must be a load-acquire such that we match the
+		 * store-release that clears the locked bit and create lock
+		 * sequentiality; this is because not all
+		 * clear_pending_set_locked() implementations imply full
+		 * barriers.
 		 */
-		if (val & ~_Q_LOCKED_MASK)
-			goto queue;
-
-		new = _Q_LOCKED_VAL;
-		if (val == new)
-			new |= _Q_PENDING_VAL;
+		if (val & _Q_LOCKED_MASK) {
+			atomic_cond_read_acquire(&lock->val,
+						 !(VAL & _Q_LOCKED_MASK));
+		}
 
 		/*
-		 * Acquire semantic is required here as the function may
-		 * return immediately if the lock was free.
+		 * take ownership and clear the pending bit.
+		 *
+		 * *,1,0 -> *,0,1
 		 */
-		old = atomic_cmpxchg_acquire(&lock->val, val, new);
-		if (old == val)
-			break;
-
-		val = old;
+		clear_pending_set_locked(lock);
+		qstat_inc(qstat_lock_pending, true);
+		return;
 	}
 
 	/*
-	 * we won the trylock
+	 * If pending was clear but there are waiters in the queue, then
+	 * we need to undo our setting of pending before we queue ourselves.
 	 */
-	if (new == _Q_LOCKED_VAL)
-		return;
-
-	/*
-	 * we're pending, wait for the owner to go away.
-	 *
-	 * *,1,1 -> *,1,0
-	 *
-	 * this wait loop must be a load-acquire such that we match the
-	 * store-release that clears the locked bit and create lock
-	 * sequentiality; this is because not all clear_pending_set_locked()
-	 * implementations imply full barriers.
-	 */
-	smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
-
-	/*
-	 * take ownership and clear the pending bit.
-	 *
-	 * *,1,0 -> *,0,1
-	 */
-	clear_pending_set_locked(lock);
-	return;
+	if (!(val & _Q_PENDING_MASK))
+		clear_pending(lock);
 
 	/*
 	 * End of pending bit optimistic spinning and beginning of MCS
 	 * queuing.
 	 */
 queue:
+	qstat_inc(qstat_lock_slowpath, true);
+pv_queue:
 	node = this_cpu_ptr(&mcs_nodes[0]);
 	idx = node->count++;
 	tail = encode_tail(smp_processor_id(), idx);
@@ -400,12 +397,18 @@
 		goto release;
 
 	/*
+	 * Ensure that the initialisation of @node is complete before we
+	 * publish the updated tail via xchg_tail() and potentially link
+	 * @node into the waitqueue via WRITE_ONCE(prev->next, node) below.
+	 */
+	smp_wmb();
+
+	/*
+	 * Publish the updated tail.
 	 * We have already touched the queueing cacheline; don't bother with
 	 * pending stuff.
 	 *
 	 * p,*,* -> n,*,*
-	 *
-	 * RELEASE, such that the stores to @node must be complete.
 	 */
 	old = xchg_tail(lock, tail);
 	next = NULL;
@@ -417,14 +420,8 @@
 	if (old & _Q_TAIL_MASK) {
 		prev = decode_tail(old);
 
-		/*
-		 * We must ensure that the stores to @node are observed before
-		 * the write to prev->next. The address dependency from
-		 * xchg_tail is not sufficient to ensure this because the read
-		 * component of xchg_tail is unordered with respect to the
-		 * initialisation of @node.
-		 */
-		smp_store_release(&prev->next, node);
+		/* Link @node into the waitqueue. */
+		WRITE_ONCE(prev->next, node);
 
 		pv_wait_node(node, prev);
 		arch_mcs_spin_lock_contended(&node->locked);
@@ -453,8 +450,8 @@
 	 *
 	 * The PV pv_wait_head_or_lock function, if active, will acquire
 	 * the lock and return a non-zero value. So we have to skip the
-	 * smp_cond_load_acquire() call. As the next PV queue head hasn't been
-	 * designated yet, there is no way for the locked value to become
+	 * atomic_cond_read_acquire() call. As the next PV queue head hasn't
+	 * been designated yet, there is no way for the locked value to become
 	 * _Q_SLOW_VAL. So both the set_locked() and the
 	 * atomic_cmpxchg_relaxed() calls will be safe.
 	 *
@@ -464,44 +461,38 @@
 	if ((val = pv_wait_head_or_lock(lock, node)))
 		goto locked;
 
-	val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));
+	val = atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK));
 
 locked:
 	/*
 	 * claim the lock:
 	 *
 	 * n,0,0 -> 0,0,1 : lock, uncontended
-	 * *,0,0 -> *,0,1 : lock, contended
+	 * *,*,0 -> *,*,1 : lock, contended
 	 *
-	 * If the queue head is the only one in the queue (lock value == tail),
-	 * clear the tail code and grab the lock. Otherwise, we only need
-	 * to grab the lock.
+	 * If the queue head is the only one in the queue (lock value == tail)
+	 * and nobody is pending, clear the tail code and grab the lock.
+	 * Otherwise, we only need to grab the lock.
 	 */
-	for (;;) {
-		/* In the PV case we might already have _Q_LOCKED_VAL set */
-		if ((val & _Q_TAIL_MASK) != tail) {
-			set_locked(lock);
-			break;
-		}
-		/*
-		 * The smp_cond_load_acquire() call above has provided the
-		 * necessary acquire semantics required for locking. At most
-		 * two iterations of this loop may be ran.
-		 */
-		old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
-		if (old == val)
-			goto release;	/* No contention */
 
-		val = old;
-	}
+	/*
+	 * In the PV case we might already have _Q_LOCKED_VAL set.
+	 *
+	 * The atomic_cond_read_acquire() call above has provided the
+	 * necessary acquire semantics required for locking.
+	 */
+	if (((val & _Q_TAIL_MASK) == tail) &&
+	    atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
+		goto release; /* No contention */
+
+	/* Either somebody is queued behind us or _Q_PENDING_VAL is set */
+	set_locked(lock);
 
 	/*
 	 * contended path; wait for next if not observed yet, release.
 	 */
-	if (!next) {
-		while (!(next = READ_ONCE(node->next)))
-			cpu_relax();
-	}
+	if (!next)
+		next = smp_cond_load_relaxed(&node->next, (VAL));
 
 	arch_mcs_spin_unlock_contended(&next->locked);
 	pv_kick_node(lock, next);
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 6ee4777..5a0cf5f 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -56,11 +56,6 @@
 };
 
 /*
- * Include queued spinlock statistics code
- */
-#include "qspinlock_stat.h"
-
-/*
  * Hybrid PV queued/unfair lock
  *
  * By replacing the regular queued_spin_trylock() with the function below,
@@ -87,8 +82,6 @@
 #define queued_spin_trylock(l)	pv_hybrid_queued_unfair_trylock(l)
 static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
 {
-	struct __qspinlock *l = (void *)lock;
-
 	/*
 	 * Stay in unfair lock mode as long as queued mode waiters are
 	 * present in the MCS wait queue but the pending bit isn't set.
@@ -97,7 +90,7 @@
 		int val = atomic_read(&lock->val);
 
 		if (!(val & _Q_LOCKED_PENDING_MASK) &&
-		   (cmpxchg_acquire(&l->locked, 0, _Q_LOCKED_VAL) == 0)) {
+		   (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
 			qstat_inc(qstat_pv_lock_stealing, true);
 			return true;
 		}
@@ -117,16 +110,7 @@
 #if _Q_PENDING_BITS == 8
 static __always_inline void set_pending(struct qspinlock *lock)
 {
-	struct __qspinlock *l = (void *)lock;
-
-	WRITE_ONCE(l->pending, 1);
-}
-
-static __always_inline void clear_pending(struct qspinlock *lock)
-{
-	struct __qspinlock *l = (void *)lock;
-
-	WRITE_ONCE(l->pending, 0);
+	WRITE_ONCE(lock->pending, 1);
 }
 
 /*
@@ -136,10 +120,8 @@
  */
 static __always_inline int trylock_clear_pending(struct qspinlock *lock)
 {
-	struct __qspinlock *l = (void *)lock;
-
-	return !READ_ONCE(l->locked) &&
-	       (cmpxchg_acquire(&l->locked_pending, _Q_PENDING_VAL,
+	return !READ_ONCE(lock->locked) &&
+	       (cmpxchg_acquire(&lock->locked_pending, _Q_PENDING_VAL,
 				_Q_LOCKED_VAL) == _Q_PENDING_VAL);
 }
 #else /* _Q_PENDING_BITS == 8 */
@@ -148,11 +130,6 @@
 	atomic_or(_Q_PENDING_VAL, &lock->val);
 }
 
-static __always_inline void clear_pending(struct qspinlock *lock)
-{
-	atomic_andnot(_Q_PENDING_VAL, &lock->val);
-}
-
 static __always_inline int trylock_clear_pending(struct qspinlock *lock)
 {
 	int val = atomic_read(&lock->val);
@@ -384,7 +361,6 @@
 static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 {
 	struct pv_node *pn = (struct pv_node *)node;
-	struct __qspinlock *l = (void *)lock;
 
 	/*
 	 * If the vCPU is indeed halted, advance its state to match that of
@@ -413,7 +389,7 @@
 	 * the hash table later on at unlock time, no atomic instruction is
 	 * needed.
 	 */
-	WRITE_ONCE(l->locked, _Q_SLOW_VAL);
+	WRITE_ONCE(lock->locked, _Q_SLOW_VAL);
 	(void)pv_hash(lock, pn);
 }
 
@@ -428,7 +404,6 @@
 pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
 {
 	struct pv_node *pn = (struct pv_node *)node;
-	struct __qspinlock *l = (void *)lock;
 	struct qspinlock **lp = NULL;
 	int waitcnt = 0;
 	int loop;
@@ -443,7 +418,7 @@
 	/*
 	 * Tracking # of slowpath locking operations
 	 */
-	qstat_inc(qstat_pv_lock_slowpath, true);
+	qstat_inc(qstat_lock_slowpath, true);
 
 	for (;; waitcnt++) {
 		/*
@@ -479,13 +454,13 @@
 			 *
 			 * Matches the smp_rmb() in __pv_queued_spin_unlock().
 			 */
-			if (xchg(&l->locked, _Q_SLOW_VAL) == 0) {
+			if (xchg(&lock->locked, _Q_SLOW_VAL) == 0) {
 				/*
 				 * The lock was free and now we own the lock.
 				 * Change the lock value back to _Q_LOCKED_VAL
 				 * and unhash the table.
 				 */
-				WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
+				WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
 				WRITE_ONCE(*lp, NULL);
 				goto gotlock;
 			}
@@ -493,7 +468,7 @@
 		WRITE_ONCE(pn->state, vcpu_hashed);
 		qstat_inc(qstat_pv_wait_head, true);
 		qstat_inc(qstat_pv_wait_again, waitcnt);
-		pv_wait(&l->locked, _Q_SLOW_VAL);
+		pv_wait(&lock->locked, _Q_SLOW_VAL);
 
 		/*
 		 * Because of lock stealing, the queue head vCPU may not be
@@ -518,7 +493,6 @@
 __visible void
 __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
 {
-	struct __qspinlock *l = (void *)lock;
 	struct pv_node *node;
 
 	if (unlikely(locked != _Q_SLOW_VAL)) {
@@ -547,7 +521,7 @@
 	 * Now that we have a reference to the (likely) blocked pv_node,
 	 * release the lock.
 	 */
-	smp_store_release(&l->locked, 0);
+	smp_store_release(&lock->locked, 0);
 
 	/*
 	 * At this point the memory pointed at by lock can be freed/reused,
@@ -573,7 +547,6 @@
 #ifndef __pv_queued_spin_unlock
 __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 {
-	struct __qspinlock *l = (void *)lock;
 	u8 locked;
 
 	/*
@@ -581,7 +554,7 @@
 	 * unhash. Otherwise it would be possible to have multiple @lock
 	 * entries, which would be BAD.
 	 */
-	locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0);
+	locked = cmpxchg_release(&lock->locked, _Q_LOCKED_VAL, 0);
 	if (likely(locked == _Q_LOCKED_VAL))
 		return;
 
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index 4a30ef6..6bd78c0 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -22,13 +22,14 @@
  *   pv_kick_wake	- # of vCPU kicks used for computing pv_latency_wake
  *   pv_latency_kick	- average latency (ns) of vCPU kick operation
  *   pv_latency_wake	- average latency (ns) from vCPU kick to wakeup
- *   pv_lock_slowpath	- # of locking operations via the slowpath
  *   pv_lock_stealing	- # of lock stealing operations
  *   pv_spurious_wakeup	- # of spurious wakeups in non-head vCPUs
  *   pv_wait_again	- # of wait's after a queue head vCPU kick
  *   pv_wait_early	- # of early vCPU wait's
  *   pv_wait_head	- # of vCPU wait's at the queue head
  *   pv_wait_node	- # of vCPU wait's at a non-head queue node
+ *   lock_pending	- # of locking operations via pending code
+ *   lock_slowpath	- # of locking operations via MCS lock queue
  *
  * Writing to the "reset_counters" file will reset all the above counter
  * values.
@@ -46,13 +47,14 @@
 	qstat_pv_kick_wake,
 	qstat_pv_latency_kick,
 	qstat_pv_latency_wake,
-	qstat_pv_lock_slowpath,
 	qstat_pv_lock_stealing,
 	qstat_pv_spurious_wakeup,
 	qstat_pv_wait_again,
 	qstat_pv_wait_early,
 	qstat_pv_wait_head,
 	qstat_pv_wait_node,
+	qstat_lock_pending,
+	qstat_lock_slowpath,
 	qstat_num,	/* Total number of statistical counters */
 	qstat_reset_cnts = qstat_num,
 };
@@ -73,12 +75,13 @@
 	[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
 	[qstat_pv_latency_kick]	   = "pv_latency_kick",
 	[qstat_pv_latency_wake]    = "pv_latency_wake",
-	[qstat_pv_lock_slowpath]   = "pv_lock_slowpath",
 	[qstat_pv_lock_stealing]   = "pv_lock_stealing",
 	[qstat_pv_wait_again]      = "pv_wait_again",
 	[qstat_pv_wait_early]      = "pv_wait_early",
 	[qstat_pv_wait_head]       = "pv_wait_head",
 	[qstat_pv_wait_node]       = "pv_wait_node",
+	[qstat_lock_pending]       = "lock_pending",
+	[qstat_lock_slowpath]      = "lock_slowpath",
 	[qstat_reset_cnts]         = "reset_counters",
 };
 
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index a903367..3064c50 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -347,6 +347,15 @@
 	}
 }
 
+static inline bool owner_on_cpu(struct task_struct *owner)
+{
+	/*
+	 * As lock holder preemption issue, we both skip spinning if
+	 * task is not on cpu or its cpu is preempted
+	 */
+	return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
+}
+
 static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
 {
 	struct task_struct *owner;
@@ -359,17 +368,10 @@
 
 	rcu_read_lock();
 	owner = READ_ONCE(sem->owner);
-	if (!owner || !is_rwsem_owner_spinnable(owner)) {
-		ret = !owner;	/* !owner is spinnable */
-		goto done;
+	if (owner) {
+		ret = is_rwsem_owner_spinnable(owner) &&
+		      owner_on_cpu(owner);
 	}
-
-	/*
-	 * As lock holder preemption issue, we both skip spinning if task is not
-	 * on cpu or its cpu is preempted
-	 */
-	ret = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner));
-done:
 	rcu_read_unlock();
 	return ret;
 }
@@ -398,8 +400,7 @@
 		 * abort spinning when need_resched or owner is not running or
 		 * owner's cpu is preempted.
 		 */
-		if (!owner->on_cpu || need_resched() ||
-				vcpu_is_preempted(task_cpu(owner))) {
+		if (need_resched() || !owner_on_cpu(owner)) {
 			rcu_read_unlock();
 			return false;
 		}
diff --git a/kernel/module.c b/kernel/module.c
index c9bea7f..68469b3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1604,8 +1604,7 @@
 	if (notes == 0)
 		return;
 
-	notes_attrs = kzalloc(sizeof(*notes_attrs)
-			      + notes * sizeof(notes_attrs->attrs[0]),
+	notes_attrs = kzalloc(struct_size(notes_attrs, attrs, notes),
 			      GFP_KERNEL);
 	if (notes_attrs == NULL)
 		return;
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 5454cc6..9c85c78 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -287,6 +287,8 @@
 
 	local_irq_disable();
 
+	system_state = SYSTEM_SUSPEND;
+
 	error = syscore_suspend();
 	if (error) {
 		pr_err("Some system devices failed to power down, aborting hibernation\n");
@@ -317,6 +319,7 @@
 	syscore_resume();
 
  Enable_irqs:
+	system_state = SYSTEM_RUNNING;
 	local_irq_enable();
 
  Enable_cpus:
@@ -445,6 +448,7 @@
 		goto Enable_cpus;
 
 	local_irq_disable();
+	system_state = SYSTEM_SUSPEND;
 
 	error = syscore_suspend();
 	if (error)
@@ -478,6 +482,7 @@
 	syscore_resume();
 
  Enable_irqs:
+	system_state = SYSTEM_RUNNING;
 	local_irq_enable();
 
  Enable_cpus:
@@ -563,6 +568,7 @@
 		goto Enable_cpus;
 
 	local_irq_disable();
+	system_state = SYSTEM_SUSPEND;
 	syscore_suspend();
 	if (pm_wakeup_pending()) {
 		error = -EAGAIN;
@@ -575,6 +581,7 @@
 
  Power_up:
 	syscore_resume();
+	system_state = SYSTEM_RUNNING;
 	local_irq_enable();
 
  Enable_cpus:
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index fa39092..86d72ffb 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -184,7 +184,6 @@
 	c->target_value = value;
 }
 
-static inline int pm_qos_get_value(struct pm_qos_constraints *c);
 static int pm_qos_dbg_show_requests(struct seq_file *s, void *unused)
 {
 	struct pm_qos_object *qos = (struct pm_qos_object *)s->private;
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 4c10be0..8733156 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -27,6 +27,7 @@
 #include <linux/export.h>
 #include <linux/suspend.h>
 #include <linux/syscore_ops.h>
+#include <linux/swait.h>
 #include <linux/ftrace.h>
 #include <trace/events/power.h>
 #include <linux/compiler.h>
@@ -57,10 +58,10 @@
 
 static const struct platform_suspend_ops *suspend_ops;
 static const struct platform_s2idle_ops *s2idle_ops;
-static DECLARE_WAIT_QUEUE_HEAD(s2idle_wait_head);
+static DECLARE_SWAIT_QUEUE_HEAD(s2idle_wait_head);
 
 enum s2idle_states __read_mostly s2idle_state;
-static DEFINE_SPINLOCK(s2idle_lock);
+static DEFINE_RAW_SPINLOCK(s2idle_lock);
 
 void s2idle_set_ops(const struct platform_s2idle_ops *ops)
 {
@@ -78,12 +79,12 @@
 {
 	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_TO_IDLE, true);
 
-	spin_lock_irq(&s2idle_lock);
+	raw_spin_lock_irq(&s2idle_lock);
 	if (pm_wakeup_pending())
 		goto out;
 
 	s2idle_state = S2IDLE_STATE_ENTER;
-	spin_unlock_irq(&s2idle_lock);
+	raw_spin_unlock_irq(&s2idle_lock);
 
 	get_online_cpus();
 	cpuidle_resume();
@@ -91,17 +92,17 @@
 	/* Push all the CPUs into the idle loop. */
 	wake_up_all_idle_cpus();
 	/* Make the current CPU wait so it can enter the idle loop too. */
-	wait_event(s2idle_wait_head,
-		   s2idle_state == S2IDLE_STATE_WAKE);
+	swait_event(s2idle_wait_head,
+		    s2idle_state == S2IDLE_STATE_WAKE);
 
 	cpuidle_pause();
 	put_online_cpus();
 
-	spin_lock_irq(&s2idle_lock);
+	raw_spin_lock_irq(&s2idle_lock);
 
  out:
 	s2idle_state = S2IDLE_STATE_NONE;
-	spin_unlock_irq(&s2idle_lock);
+	raw_spin_unlock_irq(&s2idle_lock);
 
 	trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_TO_IDLE, false);
 }
@@ -156,12 +157,12 @@
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&s2idle_lock, flags);
+	raw_spin_lock_irqsave(&s2idle_lock, flags);
 	if (s2idle_state > S2IDLE_STATE_NONE) {
 		s2idle_state = S2IDLE_STATE_WAKE;
-		wake_up(&s2idle_wait_head);
+		swake_up(&s2idle_wait_head);
 	}
-	spin_unlock_irqrestore(&s2idle_lock, flags);
+	raw_spin_unlock_irqrestore(&s2idle_lock, flags);
 }
 EXPORT_SYMBOL_GPL(s2idle_wake);
 
@@ -428,6 +429,8 @@
 	arch_suspend_disable_irqs();
 	BUG_ON(!irqs_disabled());
 
+	system_state = SYSTEM_SUSPEND;
+
 	error = syscore_suspend();
 	if (!error) {
 		*wakeup = pm_wakeup_pending();
@@ -443,6 +446,8 @@
 		syscore_resume();
 	}
 
+	system_state = SYSTEM_RUNNING;
+
 	arch_suspend_enable_irqs();
 	BUG_ON(irqs_disabled());
 
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 11b4282..1efcb5b 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -269,7 +269,7 @@
 	struct bio *bio;
 	int error = 0;
 
-	bio = bio_alloc(__GFP_RECLAIM | __GFP_HIGH, 1);
+	bio = bio_alloc(GFP_NOIO | __GFP_HIGH, 1);
 	bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
 	bio_set_dev(bio, hib_resume_bdev);
 	bio_set_op_attrs(bio, op, op_flags);
@@ -376,7 +376,7 @@
 		return -ENOSPC;
 
 	if (hb) {
-		src = (void *)__get_free_page(__GFP_RECLAIM | __GFP_NOWARN |
+		src = (void *)__get_free_page(GFP_NOIO | __GFP_NOWARN |
 		                              __GFP_NORETRY);
 		if (src) {
 			copy_page(src, buf);
@@ -384,7 +384,7 @@
 			ret = hib_wait_io(hb); /* Free pages */
 			if (ret)
 				return ret;
-			src = (void *)__get_free_page(__GFP_RECLAIM |
+			src = (void *)__get_free_page(GFP_NOIO |
 			                              __GFP_NOWARN |
 			                              __GFP_NORETRY);
 			if (src) {
@@ -691,7 +691,7 @@
 	nr_threads = num_online_cpus() - 1;
 	nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
 
-	page = (void *)__get_free_page(__GFP_RECLAIM | __GFP_HIGH);
+	page = (void *)__get_free_page(GFP_NOIO | __GFP_HIGH);
 	if (!page) {
 		pr_err("Failed to allocate LZO page\n");
 		ret = -ENOMEM;
@@ -989,7 +989,7 @@
 		last = tmp;
 
 		tmp->map = (struct swap_map_page *)
-			   __get_free_page(__GFP_RECLAIM | __GFP_HIGH);
+			   __get_free_page(GFP_NOIO | __GFP_HIGH);
 		if (!tmp->map) {
 			release_swap_reader(handle);
 			return -ENOMEM;
@@ -1261,8 +1261,8 @@
 
 	for (i = 0; i < read_pages; i++) {
 		page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
-						  __GFP_RECLAIM | __GFP_HIGH :
-						  __GFP_RECLAIM | __GFP_NOWARN |
+						  GFP_NOIO | __GFP_HIGH :
+						  GFP_NOIO | __GFP_NOWARN |
 						  __GFP_NORETRY);
 
 		if (!page[i]) {
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 75c959d..abd2255 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -186,6 +186,11 @@
 		res = PAGE_SIZE - pg_offp;
 	}
 
+	if (!data_of(data->handle)) {
+		res = -EINVAL;
+		goto unlock;
+	}
+
 	res = simple_write_to_buffer(data_of(data->handle), res, &pg_offp,
 			buf, count);
 	if (res > 0)
diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
index dfba59b..4210152 100644
--- a/kernel/power/wakelock.c
+++ b/kernel/power/wakelock.c
@@ -188,6 +188,7 @@
 		return ERR_PTR(-ENOMEM);
 	}
 	wl->ws.name = wl->name;
+	wl->ws.last_time = ktime_get();
 	wakeup_source_add(&wl->ws);
 	rb_link_node(&wl->node, parent, node);
 	rb_insert_color(&wl->node, &wakelocks_tree);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 2f4af21..2478083 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1908,6 +1908,7 @@
 		preempt_enable();
 	}
 
+	wake_up_klogd();
 	return printed_len;
 }
 EXPORT_SYMBOL(vprintk_emit);
@@ -2289,9 +2290,7 @@
 {
 	static char ext_text[CONSOLE_EXT_LOG_MAX];
 	static char text[LOG_LINE_MAX + PREFIX_MAX];
-	static u64 seen_seq;
 	unsigned long flags;
-	bool wake_klogd = false;
 	bool do_cond_resched, retry;
 
 	if (console_suspended) {
@@ -2335,11 +2334,6 @@
 
 		printk_safe_enter_irqsave(flags);
 		raw_spin_lock(&logbuf_lock);
-		if (seen_seq != log_next_seq) {
-			wake_klogd = true;
-			seen_seq = log_next_seq;
-		}
-
 		if (console_seq < log_first_seq) {
 			len = sprintf(text, "** %u printk messages dropped **\n",
 				      (unsigned)(log_first_seq - console_seq));
@@ -2397,7 +2391,7 @@
 
 		if (console_lock_spinning_disable_and_check()) {
 			printk_safe_exit_irqrestore(flags);
-			goto out;
+			return;
 		}
 
 		printk_safe_exit_irqrestore(flags);
@@ -2429,10 +2423,6 @@
 
 	if (retry && console_trylock())
 		goto again;
-
-out:
-	if (wake_klogd)
-		wake_up_klogd();
 }
 EXPORT_SYMBOL(console_unlock);
 
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index 3e3c200..d7d0913 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -82,6 +82,7 @@
 {
 	int add;
 	size_t len;
+	va_list ap;
 
 again:
 	len = atomic_read(&s->len);
@@ -100,7 +101,9 @@
 	if (!len)
 		smp_rmb();
 
-	add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args);
+	va_copy(ap, args);
+	add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, ap);
+	va_end(ap);
 	if (!add)
 		return 0;
 
@@ -278,7 +281,7 @@
 	 * Make sure that we could access the main ring buffer.
 	 * Do not risk a double release when more CPUs are up.
 	 */
-	if (in_nmi() && raw_spin_is_locked(&logbuf_lock)) {
+	if (raw_spin_is_locked(&logbuf_lock)) {
 		if (num_online_cpus() > 1)
 			return;
 
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 7a693e3..40cea67 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -270,6 +270,12 @@
 	}
 }
 
+/* Returns first leaf rcu_node of the specified RCU flavor. */
+#define rcu_first_leaf_node(rsp) ((rsp)->level[rcu_num_lvls - 1])
+
+/* Is this rcu_node a leaf? */
+#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1)
+
 /*
  * Do a full breadth-first scan of the rcu_node structures for the
  * specified rcu_state structure.
@@ -284,8 +290,7 @@
  * rcu_node tree with but one rcu_node structure, this loop is a no-op.
  */
 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
-	for ((rnp) = &(rsp)->node[0]; \
-	     (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
+	for ((rnp) = &(rsp)->node[0]; !rcu_is_leaf_node(rsp, rnp); (rnp)++)
 
 /*
  * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
@@ -294,7 +299,7 @@
  * It is still a leaf node, even if it is also the root node.
  */
 #define rcu_for_each_leaf_node(rsp, rnp) \
-	for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
+	for ((rnp) = rcu_first_leaf_node(rsp); \
 	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
 
 /*
@@ -486,6 +491,7 @@
 void rcu_bh_force_quiescent_state(void);
 void rcu_sched_force_quiescent_state(void);
 extern struct workqueue_struct *rcu_gp_wq;
+extern struct workqueue_struct *rcu_par_gp_wq;
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 
 #ifdef CONFIG_RCU_NOCB_CPU
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 88cba7c..5aff271 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -404,24 +404,6 @@
 }
 
 /*
- * Scan the specified rcu_segcblist structure for callbacks that need
- * a grace period later than the one specified by "seq".  We don't look
- * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
- * have a grace-period sequence number.
- */
-bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
-				    unsigned long seq)
-{
-	int i;
-
-	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
-		if (rsclp->tails[i - 1] != rsclp->tails[i] &&
-		    ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
-			return true;
-	return false;
-}
-
-/*
  * Merge the source rcu_segcblist structure into the destination
  * rcu_segcblist structure, then initialize the source.  Any pending
  * callbacks from the source get to start over.  It is best to
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 581c12b..948470c 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -134,7 +134,5 @@
 				   struct rcu_cblist *rclp);
 void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
 bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
-bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
-				    unsigned long seq);
 void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
 			 struct rcu_segcblist *src_rsclp);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 777e7a6..e232846 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -369,7 +369,7 @@
  */
 static void rcu_perf_wait_shutdown(void)
 {
-	cond_resched_rcu_qs();
+	cond_resched_tasks_rcu_qs();
 	if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
 		return;
 	while (!torture_must_stop())
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 680c96d..e628fcf 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -593,7 +593,12 @@
 
 static void srcu_torture_cleanup(void)
 {
-	cleanup_srcu_struct(&srcu_ctld);
+	static DEFINE_TORTURE_RANDOM(rand);
+
+	if (torture_random(&rand) & 0x800)
+		cleanup_srcu_struct(&srcu_ctld);
+	else
+		cleanup_srcu_struct_quiesced(&srcu_ctld);
 	srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
 }
 
@@ -1609,6 +1614,9 @@
 static void
 rcu_torture_cleanup(void)
 {
+	int flags = 0;
+	unsigned long gpnum = 0;
+	unsigned long completed = 0;
 	int i;
 
 	rcutorture_record_test_transition();
@@ -1639,6 +1647,11 @@
 		fakewriter_tasks = NULL;
 	}
 
+	rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed);
+	srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
+				&flags, &gpnum, &completed);
+	pr_alert("%s:  End-test grace-period state: g%lu c%lu f%#x\n",
+		 cur_ops->name, gpnum, completed, flags);
 	torture_stop_kthread(rcu_torture_stats, stats_task);
 	torture_stop_kthread(rcu_torture_fqs, fqs_task);
 	for (i = 0; i < ncbflooders; i++)
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 76ac5f5..622792a 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -86,16 +86,19 @@
  * Must invoke this after you are finished using a given srcu_struct that
  * was initialized via init_srcu_struct(), else you leak memory.
  */
-void cleanup_srcu_struct(struct srcu_struct *sp)
+void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
 {
 	WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
-	flush_work(&sp->srcu_work);
+	if (quiesced)
+		WARN_ON(work_pending(&sp->srcu_work));
+	else
+		flush_work(&sp->srcu_work);
 	WARN_ON(sp->srcu_gp_running);
 	WARN_ON(sp->srcu_gp_waiting);
 	WARN_ON(sp->srcu_cb_head);
 	WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);
 }
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
 
 /*
  * Removes the count for the old reader from the appropriate element of
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index fb560fc..b4123d7 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -366,24 +366,28 @@
 	return SRCU_INTERVAL;
 }
 
-/**
- * cleanup_srcu_struct - deconstruct a sleep-RCU structure
- * @sp: structure to clean up.
- *
- * Must invoke this after you are finished using a given srcu_struct that
- * was initialized via init_srcu_struct(), else you leak memory.
- */
-void cleanup_srcu_struct(struct srcu_struct *sp)
+/* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */
+void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
 {
 	int cpu;
 
 	if (WARN_ON(!srcu_get_delay(sp)))
-		return; /* Leakage unless caller handles error. */
+		return; /* Just leak it! */
 	if (WARN_ON(srcu_readers_active(sp)))
-		return; /* Leakage unless caller handles error. */
-	flush_delayed_work(&sp->work);
+		return; /* Just leak it! */
+	if (quiesced) {
+		if (WARN_ON(delayed_work_pending(&sp->work)))
+			return; /* Just leak it! */
+	} else {
+		flush_delayed_work(&sp->work);
+	}
 	for_each_possible_cpu(cpu)
-		flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
+		if (quiesced) {
+			if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work)))
+				return; /* Just leak it! */
+		} else {
+			flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
+		}
 	if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
 	    WARN_ON(srcu_readers_active(sp))) {
 		pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
@@ -392,7 +396,7 @@
 	free_percpu(sp->sda);
 	sp->sda = NULL;
 }
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
 
 /*
  * Counts the new reader in the appropriate per-CPU element of the
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2a73469..aa7cade 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -524,8 +524,6 @@
 static ulong jiffies_till_sched_qs = HZ / 10;
 module_param(jiffies_till_sched_qs, ulong, 0444);
 
-static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
-				  struct rcu_data *rdp);
 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));
 static void force_quiescent_state(struct rcu_state *rsp);
 static int rcu_pending(void);
@@ -711,44 +709,6 @@
 }
 
 /*
- * Is there any need for future grace periods?
- * Interrupts must be disabled.  If the caller does not hold the root
- * rnp_node structure's ->lock, the results are advisory only.
- */
-static int rcu_future_needs_gp(struct rcu_state *rsp)
-{
-	struct rcu_node *rnp = rcu_get_root(rsp);
-	int idx = (READ_ONCE(rnp->completed) + 1) & 0x1;
-	int *fp = &rnp->need_future_gp[idx];
-
-	lockdep_assert_irqs_disabled();
-	return READ_ONCE(*fp);
-}
-
-/*
- * Does the current CPU require a not-yet-started grace period?
- * The caller must have disabled interrupts to prevent races with
- * normal callback registry.
- */
-static bool
-cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
-{
-	lockdep_assert_irqs_disabled();
-	if (rcu_gp_in_progress(rsp))
-		return false;  /* No, a grace period is already in progress. */
-	if (rcu_future_needs_gp(rsp))
-		return true;  /* Yes, a no-CBs CPU needs one. */
-	if (!rcu_segcblist_is_enabled(&rdp->cblist))
-		return false;  /* No, this is a no-CBs (or offline) CPU. */
-	if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
-		return true;  /* Yes, CPU has newly registered callbacks. */
-	if (rcu_segcblist_future_gp_needed(&rdp->cblist,
-					   READ_ONCE(rsp->completed)))
-		return true;  /* Yes, CBs for future grace period. */
-	return false; /* No grace period needed. */
-}
-
-/*
  * Enter an RCU extended quiescent state, which can be either the
  * idle loop or adaptive-tickless usermode execution.
  *
@@ -1234,10 +1194,10 @@
 	}
 
 	/*
-	 * Has this CPU encountered a cond_resched_rcu_qs() since the
-	 * beginning of the grace period?  For this to be the case,
-	 * the CPU has to have noticed the current grace period.  This
-	 * might not be the case for nohz_full CPUs looping in the kernel.
+	 * Has this CPU encountered a cond_resched() since the beginning
+	 * of the grace period?  For this to be the case, the CPU has to
+	 * have noticed the current grace period.  This might not be the
+	 * case for nohz_full CPUs looping in the kernel.
 	 */
 	jtsq = jiffies_till_sched_qs;
 	ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
@@ -1642,18 +1602,30 @@
 		return rnp->completed + 1;
 
 	/*
+	 * If the current rcu_node structure believes that RCU is
+	 * idle, and if the rcu_state structure does not yet reflect
+	 * the start of a new grace period, then the next grace period
+	 * will suffice.  The memory barrier is needed to accurately
+	 * sample the rsp->gpnum, and pairs with the second lock
+	 * acquisition in rcu_gp_init(), which is augmented with
+	 * smp_mb__after_unlock_lock() for this purpose.
+	 */
+	if (rnp->gpnum == rnp->completed) {
+		smp_mb(); /* See above block comment. */
+		if (READ_ONCE(rsp->gpnum) == rnp->completed)
+			return rnp->completed + 1;
+	}
+
+	/*
 	 * Otherwise, wait for a possible partial grace period and
 	 * then the subsequent full grace period.
 	 */
 	return rnp->completed + 2;
 }
 
-/*
- * Trace-event helper function for rcu_start_future_gp() and
- * rcu_nocb_wait_gp().
- */
-static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
-				unsigned long c, const char *s)
+/* Trace-event wrapper function for trace_rcu_future_grace_period.  */
+static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+			      unsigned long c, const char *s)
 {
 	trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
 				      rnp->completed, c, rnp->level,
@@ -1661,96 +1633,67 @@
 }
 
 /*
- * Start some future grace period, as needed to handle newly arrived
+ * Start the specified grace period, as needed to handle newly arrived
  * callbacks.  The required future grace periods are recorded in each
- * rcu_node structure's ->need_future_gp field.  Returns true if there
+ * rcu_node structure's ->need_future_gp[] field.  Returns true if there
  * is reason to awaken the grace-period kthread.
  *
- * The caller must hold the specified rcu_node structure's ->lock.
+ * The caller must hold the specified rcu_node structure's ->lock, which
+ * is why the caller is responsible for waking the grace-period kthread.
  */
-static bool __maybe_unused
-rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
-		    unsigned long *c_out)
+static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+			      unsigned long c)
 {
-	unsigned long c;
 	bool ret = false;
-	struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
+	struct rcu_state *rsp = rdp->rsp;
+	struct rcu_node *rnp_root;
 
+	/*
+	 * Use funnel locking to either acquire the root rcu_node
+	 * structure's lock or bail out if the need for this grace period
+	 * has already been recorded -- or has already started.  If there
+	 * is already a grace period in progress in a non-leaf node, no
+	 * recording is needed because the end of the grace period will
+	 * scan the leaf rcu_node structures.  Note that rnp->lock must
+	 * not be released.
+	 */
 	raw_lockdep_assert_held_rcu_node(rnp);
-
-	/*
-	 * Pick up grace-period number for new callbacks.  If this
-	 * grace period is already marked as needed, return to the caller.
-	 */
-	c = rcu_cbs_completed(rdp->rsp, rnp);
-	trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
-	if (rnp->need_future_gp[c & 0x1]) {
-		trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
-		goto out;
+	trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf"));
+	for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) {
+		if (rnp_root != rnp)
+			raw_spin_lock_rcu_node(rnp_root);
+		WARN_ON_ONCE(ULONG_CMP_LT(rnp_root->gpnum +
+					  need_future_gp_mask(), c));
+		if (need_future_gp_element(rnp_root, c) ||
+		    ULONG_CMP_GE(rnp_root->gpnum, c) ||
+		    (rnp != rnp_root &&
+		     rnp_root->gpnum != rnp_root->completed)) {
+			trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted"));
+			goto unlock_out;
+		}
+		need_future_gp_element(rnp_root, c) = true;
+		if (rnp_root != rnp && rnp_root->parent != NULL)
+			raw_spin_unlock_rcu_node(rnp_root);
+		if (!rnp_root->parent)
+			break;  /* At root, and perhaps also leaf. */
 	}
 
-	/*
-	 * If either this rcu_node structure or the root rcu_node structure
-	 * believe that a grace period is in progress, then we must wait
-	 * for the one following, which is in "c".  Because our request
-	 * will be noticed at the end of the current grace period, we don't
-	 * need to explicitly start one.  We only do the lockless check
-	 * of rnp_root's fields if the current rcu_node structure thinks
-	 * there is no grace period in flight, and because we hold rnp->lock,
-	 * the only possible change is when rnp_root's two fields are
-	 * equal, in which case rnp_root->gpnum might be concurrently
-	 * incremented.  But that is OK, as it will just result in our
-	 * doing some extra useless work.
-	 */
-	if (rnp->gpnum != rnp->completed ||
-	    READ_ONCE(rnp_root->gpnum) != READ_ONCE(rnp_root->completed)) {
-		rnp->need_future_gp[c & 0x1]++;
-		trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
-		goto out;
-	}
-
-	/*
-	 * There might be no grace period in progress.  If we don't already
-	 * hold it, acquire the root rcu_node structure's lock in order to
-	 * start one (if needed).
-	 */
-	if (rnp != rnp_root)
-		raw_spin_lock_rcu_node(rnp_root);
-
-	/*
-	 * Get a new grace-period number.  If there really is no grace
-	 * period in progress, it will be smaller than the one we obtained
-	 * earlier.  Adjust callbacks as needed.
-	 */
-	c = rcu_cbs_completed(rdp->rsp, rnp_root);
-	if (!rcu_is_nocb_cpu(rdp->cpu))
-		(void)rcu_segcblist_accelerate(&rdp->cblist, c);
-
-	/*
-	 * If the needed for the required grace period is already
-	 * recorded, trace and leave.
-	 */
-	if (rnp_root->need_future_gp[c & 0x1]) {
-		trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
+	/* If GP already in progress, just leave, otherwise start one. */
+	if (rnp_root->gpnum != rnp_root->completed) {
+		trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedleafroot"));
 		goto unlock_out;
 	}
-
-	/* Record the need for the future grace period. */
-	rnp_root->need_future_gp[c & 0x1]++;
-
-	/* If a grace period is not already in progress, start one. */
-	if (rnp_root->gpnum != rnp_root->completed) {
-		trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
-	} else {
-		trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
-		ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
+	trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot"));
+	WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT);
+	if (!rsp->gp_kthread) {
+		trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread"));
+		goto unlock_out;
 	}
+	trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq"));
+	ret = true;  /* Caller must wake GP kthread. */
 unlock_out:
 	if (rnp != rnp_root)
 		raw_spin_unlock_rcu_node(rnp_root);
-out:
-	if (c_out != NULL)
-		*c_out = c;
 	return ret;
 }
 
@@ -1758,16 +1701,16 @@
  * Clean up any old requests for the just-ended grace period.  Also return
  * whether any additional grace periods have been requested.
  */
-static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 {
-	int c = rnp->completed;
-	int needmore;
+	unsigned long c = rnp->completed;
+	bool needmore;
 	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
 
-	rnp->need_future_gp[c & 0x1] = 0;
-	needmore = rnp->need_future_gp[(c + 1) & 0x1];
-	trace_rcu_future_gp(rnp, rdp, c,
-			    needmore ? TPS("CleanupMore") : TPS("Cleanup"));
+	need_future_gp_element(rnp, c) = false;
+	needmore = need_any_future_gp(rnp);
+	trace_rcu_this_gp(rnp, rdp, c,
+			  needmore ? TPS("CleanupMore") : TPS("Cleanup"));
 	return needmore;
 }
 
@@ -1802,6 +1745,7 @@
 static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
 			       struct rcu_data *rdp)
 {
+	unsigned long c;
 	bool ret = false;
 
 	raw_lockdep_assert_held_rcu_node(rnp);
@@ -1820,8 +1764,9 @@
 	 * accelerating callback invocation to an earlier grace-period
 	 * number.
 	 */
-	if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp)))
-		ret = rcu_start_future_gp(rnp, rdp, NULL);
+	c = rcu_cbs_completed(rsp, rnp);
+	if (rcu_segcblist_accelerate(&rdp->cblist, c))
+		ret = rcu_start_this_gp(rnp, rdp, c);
 
 	/* Trace depending on how much we were able to accelerate. */
 	if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
@@ -2049,7 +1994,7 @@
 					    rnp->level, rnp->grplo,
 					    rnp->grphi, rnp->qsmask);
 		raw_spin_unlock_irq_rcu_node(rnp);
-		cond_resched_rcu_qs();
+		cond_resched_tasks_rcu_qs();
 		WRITE_ONCE(rsp->gp_activity, jiffies);
 	}
 
@@ -2108,7 +2053,6 @@
 {
 	unsigned long gp_duration;
 	bool needgp = false;
-	int nocb = 0;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp = rcu_get_root(rsp);
 	struct swait_queue_head *sq;
@@ -2147,31 +2091,35 @@
 		if (rnp == rdp->mynode)
 			needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
 		/* smp_mb() provided by prior unlock-lock pair. */
-		nocb += rcu_future_gp_cleanup(rsp, rnp);
+		needgp = rcu_future_gp_cleanup(rsp, rnp) || needgp;
 		sq = rcu_nocb_gp_get(rnp);
 		raw_spin_unlock_irq_rcu_node(rnp);
 		rcu_nocb_gp_cleanup(sq);
-		cond_resched_rcu_qs();
+		cond_resched_tasks_rcu_qs();
 		WRITE_ONCE(rsp->gp_activity, jiffies);
 		rcu_gp_slow(rsp, gp_cleanup_delay);
 	}
 	rnp = rcu_get_root(rsp);
 	raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */
-	rcu_nocb_gp_set(rnp, nocb);
 
 	/* Declare grace period done. */
 	WRITE_ONCE(rsp->completed, rsp->gpnum);
 	trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
 	rsp->gp_state = RCU_GP_IDLE;
+	/* Check for GP requests since above loop. */
 	rdp = this_cpu_ptr(rsp->rda);
+	if (need_any_future_gp(rnp)) {
+		trace_rcu_this_gp(rnp, rdp, rsp->completed - 1,
+				  TPS("CleanupMore"));
+		needgp = true;
+	}
 	/* Advance CBs to reduce false positives below. */
-	needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp;
-	if (needgp || cpu_needs_another_gp(rsp, rdp)) {
+	if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {
 		WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
-		trace_rcu_grace_period(rsp->name,
-				       READ_ONCE(rsp->gpnum),
+		trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
 				       TPS("newreq"));
 	}
+	WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT);
 	raw_spin_unlock_irq_rcu_node(rnp);
 }
 
@@ -2202,7 +2150,7 @@
 			/* Locking provides needed memory barrier. */
 			if (rcu_gp_init(rsp))
 				break;
-			cond_resched_rcu_qs();
+			cond_resched_tasks_rcu_qs();
 			WRITE_ONCE(rsp->gp_activity, jiffies);
 			WARN_ON(signal_pending(current));
 			trace_rcu_grace_period(rsp->name,
@@ -2247,7 +2195,7 @@
 				trace_rcu_grace_period(rsp->name,
 						       READ_ONCE(rsp->gpnum),
 						       TPS("fqsend"));
-				cond_resched_rcu_qs();
+				cond_resched_tasks_rcu_qs();
 				WRITE_ONCE(rsp->gp_activity, jiffies);
 				ret = 0; /* Force full wait till next FQS. */
 				j = jiffies_till_next_fqs;
@@ -2260,7 +2208,7 @@
 				}
 			} else {
 				/* Deal with stray signal. */
-				cond_resched_rcu_qs();
+				cond_resched_tasks_rcu_qs();
 				WRITE_ONCE(rsp->gp_activity, jiffies);
 				WARN_ON(signal_pending(current));
 				trace_rcu_grace_period(rsp->name,
@@ -2283,71 +2231,6 @@
 }
 
 /*
- * Start a new RCU grace period if warranted, re-initializing the hierarchy
- * in preparation for detecting the next grace period.  The caller must hold
- * the root node's ->lock and hard irqs must be disabled.
- *
- * Note that it is legal for a dying CPU (which is marked as offline) to
- * invoke this function.  This can happen when the dying CPU reports its
- * quiescent state.
- *
- * Returns true if the grace-period kthread must be awakened.
- */
-static bool
-rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
-		      struct rcu_data *rdp)
-{
-	raw_lockdep_assert_held_rcu_node(rnp);
-	if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
-		/*
-		 * Either we have not yet spawned the grace-period
-		 * task, this CPU does not need another grace period,
-		 * or a grace period is already in progress.
-		 * Either way, don't start a new grace period.
-		 */
-		return false;
-	}
-	WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
-	trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
-			       TPS("newreq"));
-
-	/*
-	 * We can't do wakeups while holding the rnp->lock, as that
-	 * could cause possible deadlocks with the rq->lock. Defer
-	 * the wakeup to our caller.
-	 */
-	return true;
-}
-
-/*
- * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
- * callbacks.  Note that rcu_start_gp_advanced() cannot do this because it
- * is invoked indirectly from rcu_advance_cbs(), which would result in
- * endless recursion -- or would do so if it wasn't for the self-deadlock
- * that is encountered beforehand.
- *
- * Returns true if the grace-period kthread needs to be awakened.
- */
-static bool rcu_start_gp(struct rcu_state *rsp)
-{
-	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
-	struct rcu_node *rnp = rcu_get_root(rsp);
-	bool ret = false;
-
-	/*
-	 * If there is no grace period in progress right now, any
-	 * callbacks we have up to this point will be satisfied by the
-	 * next grace period.  Also, advancing the callbacks reduces the
-	 * probability of false positives from cpu_needs_another_gp()
-	 * resulting in pointless grace periods.  So, advance callbacks
-	 * then start the grace period!
-	 */
-	ret = rcu_advance_cbs(rsp, rnp, rdp) || ret;
-	ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret;
-	return ret;
-}
-
-/*
  * Report a full set of quiescent states to the specified rcu_state data
  * structure.  Invoke rcu_gp_kthread_wake() to awaken the grace-period
  * kthread if another grace period is required.  Whether we wake
@@ -2398,7 +2281,7 @@
 			return;
 		}
 		WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
-		WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 &&
+		WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&
 			     rcu_preempt_blocked_readers_cgp(rnp));
 		rnp->qsmask &= ~mask;
 		trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
@@ -2782,7 +2665,7 @@
 	struct rcu_node *rnp;
 
 	rcu_for_each_leaf_node(rsp, rnp) {
-		cond_resched_rcu_qs();
+		cond_resched_tasks_rcu_qs();
 		mask = 0;
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		if (rnp->qsmask == 0) {
@@ -2874,22 +2757,27 @@
 	unsigned long flags;
 	bool needwake;
 	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
+	struct rcu_node *rnp;
 
 	WARN_ON_ONCE(!rdp->beenonline);
 
 	/* Update RCU state based on any recent quiescent states. */
 	rcu_check_quiescent_state(rsp, rdp);
 
-	/* Does this CPU require a not-yet-started grace period? */
-	local_irq_save(flags);
-	if (cpu_needs_another_gp(rsp, rdp)) {
-		raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */
-		needwake = rcu_start_gp(rsp);
-		raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
-		if (needwake)
-			rcu_gp_kthread_wake(rsp);
-	} else {
-		local_irq_restore(flags);
+	/* No grace period and unregistered callbacks? */
+	if (!rcu_gp_in_progress(rsp) &&
+	    rcu_segcblist_is_enabled(&rdp->cblist)) {
+		local_irq_save(flags);
+		if (rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) {
+			local_irq_restore(flags);
+		} else {
+			rnp = rdp->mynode;
+			raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
+			needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+			if (needwake)
+				rcu_gp_kthread_wake(rsp);
+		}
 	}
 
 	/* If there are callbacks ready, invoke them. */
@@ -2973,11 +2861,11 @@
 
 		/* Start a new grace period if one not already started. */
 		if (!rcu_gp_in_progress(rsp)) {
-			struct rcu_node *rnp_root = rcu_get_root(rsp);
+			struct rcu_node *rnp = rdp->mynode;
 
-			raw_spin_lock_rcu_node(rnp_root);
-			needwake = rcu_start_gp(rsp);
-			raw_spin_unlock_rcu_node(rnp_root);
+			raw_spin_lock_rcu_node(rnp);
+			needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
+			raw_spin_unlock_rcu_node(rnp);
 			if (needwake)
 				rcu_gp_kthread_wake(rsp);
 		} else {
@@ -3368,7 +3256,9 @@
 		return 1;
 
 	/* Has RCU gone idle with this CPU needing another grace period? */
-	if (cpu_needs_another_gp(rsp, rdp))
+	if (!rcu_gp_in_progress(rsp) &&
+	    rcu_segcblist_is_enabled(&rdp->cblist) &&
+	    !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
 		return 1;
 
 	/* Has another RCU grace period completed?  */
@@ -3775,6 +3665,8 @@
 	return 0;
 }
 
+static DEFINE_PER_CPU(int, rcu_cpu_started);
+
 /*
  * Mark the specified CPU as being online so that subsequent grace periods
  * (both expedited and normal) will wait on it.  Note that this means that
@@ -3796,6 +3688,11 @@
 	struct rcu_node *rnp;
 	struct rcu_state *rsp;
 
+	if (per_cpu(rcu_cpu_started, cpu))
+		return;
+
+	per_cpu(rcu_cpu_started, cpu) = 1;
+
 	for_each_rcu_flavor(rsp) {
 		rdp = per_cpu_ptr(rsp->rda, cpu);
 		rnp = rdp->mynode;
@@ -3852,6 +3749,8 @@
 	preempt_enable();
 	for_each_rcu_flavor(rsp)
 		rcu_cleanup_dying_idle_cpu(cpu, rsp);
+
+	per_cpu(rcu_cpu_started, cpu) = 0;
 }
 
 /* Migrate the dead CPU's callbacks to the current CPU. */
@@ -3861,6 +3760,7 @@
 	struct rcu_data *my_rdp;
 	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
 	struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
+	bool needwake;
 
 	if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))
 		return;  /* No callbacks to migrate. */
@@ -3872,12 +3772,15 @@
 		return;
 	}
 	raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
-	rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */
-	rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */
+	/* Leverage recent GPs and set GP for new callbacks. */
+	needwake = rcu_advance_cbs(rsp, rnp_root, rdp) ||
+		   rcu_advance_cbs(rsp, rnp_root, my_rdp);
 	rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
 	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
 		     !rcu_segcblist_n_cbs(&my_rdp->cblist));
 	raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags);
+	if (needwake)
+		rcu_gp_kthread_wake(rsp);
 	WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
 		  !rcu_segcblist_empty(&rdp->cblist),
 		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
@@ -4056,7 +3959,7 @@
 
 	init_swait_queue_head(&rsp->gp_wq);
 	init_swait_queue_head(&rsp->expedited_wq);
-	rnp = rsp->level[rcu_num_lvls - 1];
+	rnp = rcu_first_leaf_node(rsp);
 	for_each_possible_cpu(i) {
 		while (i > rnp->grphi)
 			rnp++;
@@ -4168,6 +4071,7 @@
 }
 
 struct workqueue_struct *rcu_gp_wq;
+struct workqueue_struct *rcu_par_gp_wq;
 
 void __init rcu_init(void)
 {
@@ -4199,6 +4103,8 @@
 	/* Create workqueue for expedited GPs and for Tree SRCU. */
 	rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
 	WARN_ON(!rcu_gp_wq);
+	rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
+	WARN_ON(!rcu_par_gp_wq);
 }
 
 #include "tree_exp.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f491ab4..78e051d 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -58,6 +58,14 @@
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
 };
 
+/* Communicate arguments to a workqueue handler. */
+struct rcu_exp_work {
+	smp_call_func_t rew_func;
+	struct rcu_state *rew_rsp;
+	unsigned long rew_s;
+	struct work_struct rew_work;
+};
+
 /* RCU's kthread states for tracing. */
 #define RCU_KTHREAD_STOPPED  0
 #define RCU_KTHREAD_RUNNING  1
@@ -150,15 +158,32 @@
 	struct swait_queue_head nocb_gp_wq[2];
 				/* Place for rcu_nocb_kthread() to wait GP. */
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
-	int need_future_gp[2];
-				/* Counts of upcoming no-CB GP requests. */
+	u8 need_future_gp[4];	/* Counts of upcoming GP requests. */
 	raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
 
 	spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
 	unsigned long exp_seq_rq;
 	wait_queue_head_t exp_wq[4];
+	struct rcu_exp_work rew;
+	bool exp_need_flush;	/* Need to flush workitem? */
 } ____cacheline_internodealigned_in_smp;
 
+/* Accessors for ->need_future_gp[] array. */
+#define need_future_gp_mask() \
+	(ARRAY_SIZE(((struct rcu_node *)NULL)->need_future_gp) - 1)
+#define need_future_gp_element(rnp, c) \
+	((rnp)->need_future_gp[(c) & need_future_gp_mask()])
+#define need_any_future_gp(rnp)						\
+({									\
+	int __i;							\
+	bool __nonzero = false;						\
+									\
+	for (__i = 0; __i < ARRAY_SIZE((rnp)->need_future_gp); __i++)	\
+		__nonzero = __nonzero ||				\
+			    READ_ONCE((rnp)->need_future_gp[__i]);	\
+	__nonzero;							\
+})
+
 /*
  * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and
  * are indexed relative to this interval rather than the global CPU ID space.
@@ -224,10 +249,6 @@
 #ifdef CONFIG_RCU_FAST_NO_HZ
 	struct rcu_head oom_head;
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
-	atomic_long_t exp_workdone0;	/* # done by workqueue. */
-	atomic_long_t exp_workdone1;	/* # done by others #1. */
-	atomic_long_t exp_workdone2;	/* # done by others #2. */
-	atomic_long_t exp_workdone3;	/* # done by others #3. */
 	int exp_dynticks_snap;		/* Double-check need for IPI. */
 
 	/* 6) Callback offloading. */
@@ -408,7 +429,6 @@
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 
 int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
-bool rcu_eqs_special_set(int cpu);
 
 #ifdef CONFIG_RCU_BOOST
 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
@@ -438,7 +458,6 @@
 static void invoke_rcu_callbacks_kthread(void);
 static bool rcu_is_callbacks_kthread(void);
 #ifdef CONFIG_RCU_BOOST
-static void rcu_preempt_do_callbacks(void);
 static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
 						 struct rcu_node *rnp);
 #endif /* #ifdef CONFIG_RCU_BOOST */
@@ -454,7 +473,6 @@
 static void zero_cpu_stall_ticks(struct rcu_data *rdp);
 static void increment_cpu_stall_ticks(void);
 static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
-static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
 static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
 static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
 static void rcu_init_one_nocb(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index f72eefa..d40708e 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -20,6 +20,8 @@
  * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
  */
 
+#include <linux/lockdep.h>
+
 /*
  * Record the start of an expedited grace period.
  */
@@ -154,15 +156,35 @@
  * for the current expedited grace period.  Works only for preemptible
  * RCU -- other RCU implementation use other means.
  *
- * Caller must hold the rcu_state's exp_mutex.
+ * Caller must hold the specificed rcu_node structure's ->lock
  */
 static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 {
+	raw_lockdep_assert_held_rcu_node(rnp);
+
 	return rnp->exp_tasks == NULL &&
 	       READ_ONCE(rnp->expmask) == 0;
 }
 
 /*
+ * Like sync_rcu_preempt_exp_done(), but this function assumes the caller
+ * doesn't hold the rcu_node's ->lock, and will acquire and release the lock
+ * itself
+ */
+static bool sync_rcu_preempt_exp_done_unlocked(struct rcu_node *rnp)
+{
+	unsigned long flags;
+	bool ret;
+
+	raw_spin_lock_irqsave_rcu_node(rnp, flags);
+	ret = sync_rcu_preempt_exp_done(rnp);
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+
+	return ret;
+}
+
+
+/*
  * Report the exit from RCU read-side critical section for the last task
  * that queued itself during or before the current expedited preemptible-RCU
  * grace period.  This event is reported either to the rcu_node structure on
@@ -170,8 +192,7 @@
  * recursively up the tree.  (Calm down, calm down, we do the recursion
  * iteratively!)
  *
- * Caller must hold the rcu_state's exp_mutex and the specified rcu_node
- * structure's ->lock.
+ * Caller must hold the specified rcu_node structure's ->lock.
  */
 static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 				 bool wake, unsigned long flags)
@@ -207,8 +228,6 @@
 /*
  * Report expedited quiescent state for specified node.  This is a
  * lock-acquisition wrapper function for __rcu_report_exp_rnp().
- *
- * Caller must hold the rcu_state's exp_mutex.
  */
 static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
 					      struct rcu_node *rnp, bool wake)
@@ -221,8 +240,7 @@
 
 /*
  * Report expedited quiescent state for multiple CPUs, all covered by the
- * specified leaf rcu_node structure.  Caller must hold the rcu_state's
- * exp_mutex.
+ * specified leaf rcu_node structure.
  */
 static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
 				    unsigned long mask, bool wake)
@@ -248,14 +266,12 @@
 }
 
 /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
-static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
-			       unsigned long s)
+static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
 {
 	if (rcu_exp_gp_seq_done(rsp, s)) {
 		trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
 		/* Ensure test happens before caller kfree(). */
 		smp_mb__before_atomic(); /* ^^^ */
-		atomic_long_inc(stat);
 		return true;
 	}
 	return false;
@@ -289,7 +305,7 @@
 	 * promoting locality and is not strictly needed for correctness.
 	 */
 	for (; rnp != NULL; rnp = rnp->parent) {
-		if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
+		if (sync_exp_work_done(rsp, s))
 			return true;
 
 		/* Work not done, either wait here or go up. */
@@ -302,8 +318,7 @@
 						  rnp->grplo, rnp->grphi,
 						  TPS("wait"));
 			wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
-				   sync_exp_work_done(rsp,
-						      &rdp->exp_workdone2, s));
+				   sync_exp_work_done(rsp, s));
 			return true;
 		}
 		rnp->exp_seq_rq = s; /* Followers can wait on us. */
@@ -313,7 +328,7 @@
 	}
 	mutex_lock(&rsp->exp_mutex);
 fastpath:
-	if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
+	if (sync_exp_work_done(rsp, s)) {
 		mutex_unlock(&rsp->exp_mutex);
 		return true;
 	}
@@ -362,93 +377,129 @@
 }
 
 /*
+ * Select the CPUs within the specified rcu_node that the upcoming
+ * expedited grace period needs to wait for.
+ */
+static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
+{
+	int cpu;
+	unsigned long flags;
+	smp_call_func_t func;
+	unsigned long mask_ofl_test;
+	unsigned long mask_ofl_ipi;
+	int ret;
+	struct rcu_exp_work *rewp =
+		container_of(wp, struct rcu_exp_work, rew_work);
+	struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
+	struct rcu_state *rsp = rewp->rew_rsp;
+
+	func = rewp->rew_func;
+	raw_spin_lock_irqsave_rcu_node(rnp, flags);
+
+	/* Each pass checks a CPU for identity, offline, and idle. */
+	mask_ofl_test = 0;
+	for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+		unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
+		struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+		struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
+		int snap;
+
+		if (raw_smp_processor_id() == cpu ||
+		    !(rnp->qsmaskinitnext & mask)) {
+			mask_ofl_test |= mask;
+		} else {
+			snap = rcu_dynticks_snap(rdtp);
+			if (rcu_dynticks_in_eqs(snap))
+				mask_ofl_test |= mask;
+			else
+				rdp->exp_dynticks_snap = snap;
+		}
+	}
+	mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
+
+	/*
+	 * Need to wait for any blocked tasks as well.	Note that
+	 * additional blocking tasks will also block the expedited GP
+	 * until such time as the ->expmask bits are cleared.
+	 */
+	if (rcu_preempt_has_tasks(rnp))
+		rnp->exp_tasks = rnp->blkd_tasks.next;
+	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+
+	/* IPI the remaining CPUs for expedited quiescent state. */
+	for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+		unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
+		struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+
+		if (!(mask_ofl_ipi & mask))
+			continue;
+retry_ipi:
+		if (rcu_dynticks_in_eqs_since(rdp->dynticks,
+					      rdp->exp_dynticks_snap)) {
+			mask_ofl_test |= mask;
+			continue;
+		}
+		ret = smp_call_function_single(cpu, func, rsp, 0);
+		if (!ret) {
+			mask_ofl_ipi &= ~mask;
+			continue;
+		}
+		/* Failed, raced with CPU hotplug operation. */
+		raw_spin_lock_irqsave_rcu_node(rnp, flags);
+		if ((rnp->qsmaskinitnext & mask) &&
+		    (rnp->expmask & mask)) {
+			/* Online, so delay for a bit and try again. */
+			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+			trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
+			schedule_timeout_uninterruptible(1);
+			goto retry_ipi;
+		}
+		/* CPU really is offline, so we can ignore it. */
+		if (!(rnp->expmask & mask))
+			mask_ofl_ipi &= ~mask;
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+	}
+	/* Report quiescent states for those that went offline. */
+	mask_ofl_test |= mask_ofl_ipi;
+	if (mask_ofl_test)
+		rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
+}
+
+/*
  * Select the nodes that the upcoming expedited grace period needs
  * to wait for.
  */
 static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
 				     smp_call_func_t func)
 {
-	int cpu;
-	unsigned long flags;
-	unsigned long mask_ofl_test;
-	unsigned long mask_ofl_ipi;
-	int ret;
 	struct rcu_node *rnp;
 
 	trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
 	sync_exp_reset_tree(rsp);
 	trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
+
+	/* Schedule work for each leaf rcu_node structure. */
 	rcu_for_each_leaf_node(rsp, rnp) {
-		raw_spin_lock_irqsave_rcu_node(rnp, flags);
-
-		/* Each pass checks a CPU for identity, offline, and idle. */
-		mask_ofl_test = 0;
-		for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
-			unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
-			struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-			struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
-			int snap;
-
-			if (raw_smp_processor_id() == cpu ||
-			    !(rnp->qsmaskinitnext & mask)) {
-				mask_ofl_test |= mask;
-			} else {
-				snap = rcu_dynticks_snap(rdtp);
-				if (rcu_dynticks_in_eqs(snap))
-					mask_ofl_test |= mask;
-				else
-					rdp->exp_dynticks_snap = snap;
-			}
+		rnp->exp_need_flush = false;
+		if (!READ_ONCE(rnp->expmask))
+			continue; /* Avoid early boot non-existent wq. */
+		rnp->rew.rew_func = func;
+		rnp->rew.rew_rsp = rsp;
+		if (!READ_ONCE(rcu_par_gp_wq) ||
+		    rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
+			/* No workqueues yet. */
+			sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
+			continue;
 		}
-		mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
-
-		/*
-		 * Need to wait for any blocked tasks as well.  Note that
-		 * additional blocking tasks will also block the expedited
-		 * GP until such time as the ->expmask bits are cleared.
-		 */
-		if (rcu_preempt_has_tasks(rnp))
-			rnp->exp_tasks = rnp->blkd_tasks.next;
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-
-		/* IPI the remaining CPUs for expedited quiescent state. */
-		for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
-			unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
-			struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-
-			if (!(mask_ofl_ipi & mask))
-				continue;
-retry_ipi:
-			if (rcu_dynticks_in_eqs_since(rdp->dynticks,
-						      rdp->exp_dynticks_snap)) {
-				mask_ofl_test |= mask;
-				continue;
-			}
-			ret = smp_call_function_single(cpu, func, rsp, 0);
-			if (!ret) {
-				mask_ofl_ipi &= ~mask;
-				continue;
-			}
-			/* Failed, raced with CPU hotplug operation. */
-			raw_spin_lock_irqsave_rcu_node(rnp, flags);
-			if ((rnp->qsmaskinitnext & mask) &&
-			    (rnp->expmask & mask)) {
-				/* Online, so delay for a bit and try again. */
-				raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-				trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
-				schedule_timeout_uninterruptible(1);
-				goto retry_ipi;
-			}
-			/* CPU really is offline, so we can ignore it. */
-			if (!(rnp->expmask & mask))
-				mask_ofl_ipi &= ~mask;
-			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-		}
-		/* Report quiescent states for those that went offline. */
-		mask_ofl_test |= mask_ofl_ipi;
-		if (mask_ofl_test)
-			rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
+		INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
+		queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work);
+		rnp->exp_need_flush = true;
 	}
+
+	/* Wait for workqueue jobs (if any) to complete. */
+	rcu_for_each_leaf_node(rsp, rnp)
+		if (rnp->exp_need_flush)
+			flush_work(&rnp->rew.rew_work);
 }
 
 static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
@@ -469,9 +520,9 @@
 	for (;;) {
 		ret = swait_event_timeout(
 				rsp->expedited_wq,
-				sync_rcu_preempt_exp_done(rnp_root),
+				sync_rcu_preempt_exp_done_unlocked(rnp_root),
 				jiffies_stall);
-		if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
+		if (ret > 0 || sync_rcu_preempt_exp_done_unlocked(rnp_root))
 			return;
 		WARN_ON(ret < 0);  /* workqueues should not be signaled. */
 		if (rcu_cpu_stall_suppress)
@@ -504,7 +555,7 @@
 			rcu_for_each_node_breadth_first(rsp, rnp) {
 				if (rnp == rnp_root)
 					continue; /* printed unconditionally */
-				if (sync_rcu_preempt_exp_done(rnp))
+				if (sync_rcu_preempt_exp_done_unlocked(rnp))
 					continue;
 				pr_cont(" l=%u:%d-%d:%#lx/%c",
 					rnp->level, rnp->grplo, rnp->grphi,
@@ -560,14 +611,6 @@
 	mutex_unlock(&rsp->exp_wake_mutex);
 }
 
-/* Let the workqueue handler know what it is supposed to do. */
-struct rcu_exp_work {
-	smp_call_func_t rew_func;
-	struct rcu_state *rew_rsp;
-	unsigned long rew_s;
-	struct work_struct rew_work;
-};
-
 /*
  * Common code to drive an expedited grace period forward, used by
  * workqueues and mid-boot-time tasks.
@@ -633,7 +676,7 @@
 	rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
 	rnp = rcu_get_root(rsp);
 	wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
-		   sync_exp_work_done(rsp, &rdp->exp_workdone0, s));
+		   sync_exp_work_done(rsp, s));
 	smp_mb(); /* Workqueue actions happen before return. */
 
 	/* Let the next expedited grace period start. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 84fbee4..7fd1203 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -182,7 +182,7 @@
 
 	raw_lockdep_assert_held_rcu_node(rnp);
 	WARN_ON_ONCE(rdp->mynode != rnp);
-	WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
+	WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
 
 	/*
 	 * Decide where to queue the newly blocked task.  In theory,
@@ -384,6 +384,50 @@
 }
 
 /*
+ * Preemptible RCU implementation for rcu_read_lock().
+ * Just increment ->rcu_read_lock_nesting, shared state will be updated
+ * if we block.
+ */
+void __rcu_read_lock(void)
+{
+	current->rcu_read_lock_nesting++;
+	barrier();  /* critical section after entry code. */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+
+/*
+ * Preemptible RCU implementation for rcu_read_unlock().
+ * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
+ * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
+ * invoke rcu_read_unlock_special() to clean up after a context switch
+ * in an RCU read-side critical section and other special cases.
+ */
+void __rcu_read_unlock(void)
+{
+	struct task_struct *t = current;
+
+	if (t->rcu_read_lock_nesting != 1) {
+		--t->rcu_read_lock_nesting;
+	} else {
+		barrier();  /* critical section before exit code. */
+		t->rcu_read_lock_nesting = INT_MIN;
+		barrier();  /* assign before ->rcu_read_unlock_special load */
+		if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
+			rcu_read_unlock_special(t);
+		barrier();  /* ->rcu_read_unlock_special load before assign */
+		t->rcu_read_lock_nesting = 0;
+	}
+#ifdef CONFIG_PROVE_LOCKING
+	{
+		int rrln = READ_ONCE(t->rcu_read_lock_nesting);
+
+		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
+	}
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+
+/*
  * Advance a ->blkd_tasks-list pointer to the next entry, instead
  * returning NULL if at the end of the list.
  */
@@ -489,7 +533,7 @@
 		rnp = t->rcu_blocked_node;
 		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
 		WARN_ON_ONCE(rnp != t->rcu_blocked_node);
-		WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
+		WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
 		empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
 		empty_exp = sync_rcu_preempt_exp_done(rnp);
 		smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
@@ -685,15 +729,6 @@
 		t->rcu_read_unlock_special.b.need_qs = true;
 }
 
-#ifdef CONFIG_RCU_BOOST
-
-static void rcu_preempt_do_callbacks(void)
-{
-	rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
-}
-
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
 /**
  * call_rcu() - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
@@ -1140,7 +1175,7 @@
 {
 	rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
 	rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
-	rcu_preempt_do_callbacks();
+	rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
 }
 
 static void rcu_cpu_kthread_setup(unsigned int cpu)
@@ -1607,7 +1642,7 @@
 
 	for_each_online_cpu(cpu) {
 		smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
-		cond_resched_rcu_qs();
+		cond_resched_tasks_rcu_qs();
 	}
 
 	/* Unconditionally decrement: no need to wake ourselves up. */
@@ -1780,19 +1815,6 @@
 	swake_up_all(sq);
 }
 
-/*
- * Set the root rcu_node structure's ->need_future_gp field
- * based on the sum of those of all rcu_node structures.  This does
- * double-count the root rcu_node structure's requests, but this
- * is necessary to handle the possibility of a rcu_nocb_kthread()
- * having awakened during the time that the rcu_node structures
- * were being updated for the end of the previous grace period.
- */
-static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
-{
-	rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
-}
-
 static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
 {
 	return &rnp->nocb_gp_wq[rnp->completed & 0x1];
@@ -1966,7 +1988,7 @@
 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
 					    TPS("WakeOvf"));
 		} else {
-			wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,
+			wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE_FORCE,
 					       TPS("WakeOvfIsDeferred"));
 		}
 		rdp->qlen_last_fqs_check = LONG_MAX / 2;
@@ -2048,7 +2070,8 @@
 	struct rcu_node *rnp = rdp->mynode;
 
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
-	needwake = rcu_start_future_gp(rnp, rdp, &c);
+	c = rcu_cbs_completed(rdp->rsp, rnp);
+	needwake = rcu_start_this_gp(rnp, rdp, c);
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	if (needwake)
 		rcu_gp_kthread_wake(rdp->rsp);
@@ -2057,7 +2080,7 @@
 	 * Wait for the grace period.  Do so interruptibly to avoid messing
 	 * up the load average.
 	 */
-	trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
+	trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));
 	for (;;) {
 		swait_event_interruptible(
 			rnp->nocb_gp_wq[c & 0x1],
@@ -2065,9 +2088,9 @@
 		if (likely(d))
 			break;
 		WARN_ON(signal_pending(current));
-		trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait"));
+		trace_rcu_this_gp(rnp, rdp, c, TPS("ResumeWait"));
 	}
-	trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait"));
+	trace_rcu_this_gp(rnp, rdp, c, TPS("EndWait"));
 	smp_mb(); /* Ensure that CB invocation happens after GP end. */
 }
 
@@ -2236,7 +2259,7 @@
 				cl++;
 			c++;
 			local_bh_enable();
-			cond_resched_rcu_qs();
+			cond_resched_tasks_rcu_qs();
 			list = next;
 		}
 		trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
@@ -2292,7 +2315,7 @@
 void __init rcu_init_nohz(void)
 {
 	int cpu;
-	bool need_rcu_nocb_mask = true;
+	bool need_rcu_nocb_mask = false;
 	struct rcu_state *rsp;
 
 #if defined(CONFIG_NO_HZ_FULL)
@@ -2315,7 +2338,7 @@
 #endif /* #if defined(CONFIG_NO_HZ_FULL) */
 
 	if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
-		pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
+		pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
 		cpumask_and(rcu_nocb_mask, cpu_possible_mask,
 			    rcu_nocb_mask);
 	}
@@ -2495,10 +2518,6 @@
 {
 }
 
-static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
-{
-}
-
 static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
 {
 	return NULL;
@@ -2587,8 +2606,7 @@
 }
 
 /*
- * Bind the grace-period kthread for the sysidle flavor of RCU to the
- * timekeeping CPU.
+ * Bind the RCU grace-period kthreads to the housekeeping CPU.
  */
 static void rcu_bind_gp_kthread(void)
 {
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 68fa19a..4c230a6 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -226,54 +226,6 @@
 
 #endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */
 
-#ifdef CONFIG_PREEMPT_RCU
-
-/*
- * Preemptible RCU implementation for rcu_read_lock().
- * Just increment ->rcu_read_lock_nesting, shared state will be updated
- * if we block.
- */
-void __rcu_read_lock(void)
-{
-	current->rcu_read_lock_nesting++;
-	barrier();  /* critical section after entry code. */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_lock);
-
-/*
- * Preemptible RCU implementation for rcu_read_unlock().
- * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
- * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
- * invoke rcu_read_unlock_special() to clean up after a context switch
- * in an RCU read-side critical section and other special cases.
- */
-void __rcu_read_unlock(void)
-{
-	struct task_struct *t = current;
-
-	if (t->rcu_read_lock_nesting != 1) {
-		--t->rcu_read_lock_nesting;
-	} else {
-		barrier();  /* critical section before exit code. */
-		t->rcu_read_lock_nesting = INT_MIN;
-		barrier();  /* assign before ->rcu_read_unlock_special load */
-		if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
-			rcu_read_unlock_special(t);
-		barrier();  /* ->rcu_read_unlock_special load before assign */
-		t->rcu_read_lock_nesting = 0;
-	}
-#ifdef CONFIG_PROVE_LOCKING
-	{
-		int rrln = READ_ONCE(t->rcu_read_lock_nesting);
-
-		WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
-	}
-#endif /* #ifdef CONFIG_PROVE_LOCKING */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_unlock);
-
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
 struct lockdep_map rcu_lock_map =
@@ -624,7 +576,7 @@
  * grace period has elapsed, in other words after all currently
  * executing rcu-tasks read-side critical sections have elapsed.  These
  * read-side critical sections are delimited by calls to schedule(),
- * cond_resched_rcu_qs(), idle execution, userspace execution, calls
+ * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
  * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
  *
  * This is a very specialized primitive, intended only for a few uses in
diff --git a/kernel/resource.c b/kernel/resource.c
index 2af6c03..b589dda 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -87,7 +87,7 @@
 static void *r_start(struct seq_file *m, loff_t *pos)
 	__acquires(resource_lock)
 {
-	struct resource *p = m->private;
+	struct resource *p = PDE_DATA(file_inode(m->file));
 	loff_t l = 0;
 	read_lock(&resource_lock);
 	for (p = p->child; p && l < *pos; p = r_next(m, p, &l))
@@ -103,7 +103,7 @@
 
 static int r_show(struct seq_file *m, void *v)
 {
-	struct resource *root = m->private;
+	struct resource *root = PDE_DATA(file_inode(m->file));
 	struct resource *r = v, *p;
 	unsigned long long start, end;
 	int width = root->end < 0x10000 ? 4 : 8;
@@ -135,44 +135,11 @@
 	.show	= r_show,
 };
 
-static int ioports_open(struct inode *inode, struct file *file)
-{
-	int res = seq_open(file, &resource_op);
-	if (!res) {
-		struct seq_file *m = file->private_data;
-		m->private = &ioport_resource;
-	}
-	return res;
-}
-
-static int iomem_open(struct inode *inode, struct file *file)
-{
-	int res = seq_open(file, &resource_op);
-	if (!res) {
-		struct seq_file *m = file->private_data;
-		m->private = &iomem_resource;
-	}
-	return res;
-}
-
-static const struct file_operations proc_ioports_operations = {
-	.open		= ioports_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static const struct file_operations proc_iomem_operations = {
-	.open		= iomem_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static int __init ioresources_init(void)
 {
-	proc_create("ioports", 0, NULL, &proc_ioports_operations);
-	proc_create("iomem", 0, NULL, &proc_iomem_operations);
+	proc_create_seq_data("ioports", 0, NULL, &resource_op,
+			&ioport_resource);
+	proc_create_seq_data("iomem", 0, NULL, &resource_op, &iomem_resource);
 	return 0;
 }
 __initcall(ioresources_init);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 092f7c4..e9866f8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -881,6 +881,33 @@
 }
 
 #ifdef CONFIG_SMP
+
+static inline bool is_per_cpu_kthread(struct task_struct *p)
+{
+	if (!(p->flags & PF_KTHREAD))
+		return false;
+
+	if (p->nr_cpus_allowed != 1)
+		return false;
+
+	return true;
+}
+
+/*
+ * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
+ * __set_cpus_allowed_ptr() and select_fallback_rq().
+ */
+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
+{
+	if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+		return false;
+
+	if (is_per_cpu_kthread(p))
+		return cpu_online(cpu);
+
+	return cpu_active(cpu);
+}
+
 /*
  * This is how migration works:
  *
@@ -938,16 +965,8 @@
 static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
 				 struct task_struct *p, int dest_cpu)
 {
-	if (p->flags & PF_KTHREAD) {
-		if (unlikely(!cpu_online(dest_cpu)))
-			return rq;
-	} else {
-		if (unlikely(!cpu_active(dest_cpu)))
-			return rq;
-	}
-
 	/* Affinity changed (again). */
-	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+	if (!is_cpu_allowed(p, dest_cpu))
 		return rq;
 
 	update_rq_clock(rq);
@@ -1476,10 +1495,9 @@
 	for (;;) {
 		/* Any allowed, online CPU? */
 		for_each_cpu(dest_cpu, &p->cpus_allowed) {
-			if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
+			if (!is_cpu_allowed(p, dest_cpu))
 				continue;
-			if (!cpu_online(dest_cpu))
-				continue;
+
 			goto out;
 		}
 
@@ -1542,8 +1560,7 @@
 	 * [ this allows ->select_task() to simply return task_cpu(p) and
 	 *   not worry about this generic constraint ]
 	 */
-	if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
-		     !cpu_online(cpu)))
+	if (unlikely(!is_cpu_allowed(p, cpu)))
 		cpu = select_fallback_rq(task_cpu(p), p);
 
 	return cpu;
@@ -2177,27 +2194,7 @@
 	INIT_HLIST_HEAD(&p->preempt_notifiers);
 #endif
 
-#ifdef CONFIG_NUMA_BALANCING
-	if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
-		p->mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
-		p->mm->numa_scan_seq = 0;
-	}
-
-	if (clone_flags & CLONE_VM)
-		p->numa_preferred_nid = current->numa_preferred_nid;
-	else
-		p->numa_preferred_nid = -1;
-
-	p->node_stamp = 0ULL;
-	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
-	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
-	p->numa_work.next = &p->numa_work;
-	p->numa_faults = NULL;
-	p->last_task_numa_placement = 0;
-	p->last_sum_exec_runtime = 0;
-
-	p->numa_group = NULL;
-#endif /* CONFIG_NUMA_BALANCING */
+	init_numa_balancing(clone_flags, p);
 }
 
 DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -4033,6 +4030,23 @@
 }
 
 /**
+ * available_idle_cpu - is a given CPU idle for enqueuing work.
+ * @cpu: the CPU in question.
+ *
+ * Return: 1 if the CPU is currently idle. 0 otherwise.
+ */
+int available_idle_cpu(int cpu)
+{
+	if (!idle_cpu(cpu))
+		return 0;
+
+	if (vcpu_is_preempted(cpu))
+		return 0;
+
+	return 1;
+}
+
+/**
  * idle_task - return the idle task for a given CPU.
  * @cpu: the processor in question.
  *
@@ -5008,20 +5022,6 @@
 }
 EXPORT_SYMBOL(__cond_resched_lock);
 
-int __sched __cond_resched_softirq(void)
-{
-	BUG_ON(!in_softirq());
-
-	if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
-		local_bh_enable();
-		preempt_schedule_common();
-		local_bh_disable();
-		return 1;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(__cond_resched_softirq);
-
 /**
  * yield - yield the current processor to other threads.
  *
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index e13df95..3cde464 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -51,7 +51,7 @@
 	bool			iowait_boost_pending;
 	unsigned int		iowait_boost;
 	unsigned int		iowait_boost_max;
-	u64 last_update;
+	u64			last_update;
 
 	/* The fields below are only needed when sharing a policy: */
 	unsigned long		util_cfs;
@@ -89,46 +89,52 @@
 	 * schedule the kthread.
 	 */
 	if (sg_policy->policy->fast_switch_enabled &&
-	    !cpufreq_can_do_remote_dvfs(sg_policy->policy))
+	    !cpufreq_this_cpu_can_update(sg_policy->policy))
 		return false;
 
-	if (sg_policy->work_in_progress)
-		return false;
-
-	if (unlikely(sg_policy->need_freq_update)) {
-		sg_policy->need_freq_update = false;
-		/*
-		 * This happens when limits change, so forget the previous
-		 * next_freq value and force an update.
-		 */
-		sg_policy->next_freq = UINT_MAX;
+	if (unlikely(sg_policy->need_freq_update))
 		return true;
-	}
 
 	delta_ns = time - sg_policy->last_freq_update_time;
 
 	return delta_ns >= sg_policy->freq_update_delay_ns;
 }
 
-static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
-				unsigned int next_freq)
+static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time,
+				   unsigned int next_freq)
 {
-	struct cpufreq_policy *policy = sg_policy->policy;
-
 	if (sg_policy->next_freq == next_freq)
-		return;
+		return false;
 
 	sg_policy->next_freq = next_freq;
 	sg_policy->last_freq_update_time = time;
 
-	if (policy->fast_switch_enabled) {
-		next_freq = cpufreq_driver_fast_switch(policy, next_freq);
-		if (!next_freq)
-			return;
+	return true;
+}
 
-		policy->cur = next_freq;
-		trace_cpu_frequency(next_freq, smp_processor_id());
-	} else {
+static void sugov_fast_switch(struct sugov_policy *sg_policy, u64 time,
+			      unsigned int next_freq)
+{
+	struct cpufreq_policy *policy = sg_policy->policy;
+
+	if (!sugov_update_next_freq(sg_policy, time, next_freq))
+		return;
+
+	next_freq = cpufreq_driver_fast_switch(policy, next_freq);
+	if (!next_freq)
+		return;
+
+	policy->cur = next_freq;
+	trace_cpu_frequency(next_freq, smp_processor_id());
+}
+
+static void sugov_deferred_update(struct sugov_policy *sg_policy, u64 time,
+				  unsigned int next_freq)
+{
+	if (!sugov_update_next_freq(sg_policy, time, next_freq))
+		return;
+
+	if (!sg_policy->work_in_progress) {
 		sg_policy->work_in_progress = true;
 		irq_work_queue(&sg_policy->irq_work);
 	}
@@ -165,8 +171,10 @@
 
 	freq = (freq + (freq >> 2)) * util / max;
 
-	if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
+	if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
 		return sg_policy->next_freq;
+
+	sg_policy->need_freq_update = false;
 	sg_policy->cached_raw_freq = freq;
 	return cpufreq_driver_resolve_freq(policy, freq);
 }
@@ -183,61 +191,137 @@
 static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
 {
 	struct rq *rq = cpu_rq(sg_cpu->cpu);
-	unsigned long util;
 
-	if (rq->rt.rt_nr_running) {
-		util = sg_cpu->max;
-	} else {
-		util = sg_cpu->util_dl;
-		if (rq->cfs.h_nr_running)
-			util += sg_cpu->util_cfs;
-	}
+	if (rq->rt.rt_nr_running)
+		return sg_cpu->max;
 
 	/*
+	 * Utilization required by DEADLINE must always be granted while, for
+	 * FAIR, we use blocked utilization of IDLE CPUs as a mechanism to
+	 * gracefully reduce the frequency when no tasks show up for longer
+	 * periods of time.
+	 *
 	 * Ideally we would like to set util_dl as min/guaranteed freq and
 	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
 	 * ready for such an interface. So, we only do the latter for now.
 	 */
-	return min(util, sg_cpu->max);
+	return min(sg_cpu->max, (sg_cpu->util_dl + sg_cpu->util_cfs));
 }
 
-static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags)
+/**
+ * sugov_iowait_reset() - Reset the IO boost status of a CPU.
+ * @sg_cpu: the sugov data for the CPU to boost
+ * @time: the update time from the caller
+ * @set_iowait_boost: true if an IO boost has been requested
+ *
+ * The IO wait boost of a task is disabled after a tick since the last update
+ * of a CPU. If a new IO wait boost is requested after more then a tick, then
+ * we enable the boost starting from the minimum frequency, which improves
+ * energy efficiency by ignoring sporadic wakeups from IO.
+ */
+static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
+			       bool set_iowait_boost)
 {
-	if (flags & SCHED_CPUFREQ_IOWAIT) {
-		if (sg_cpu->iowait_boost_pending)
-			return;
+	s64 delta_ns = time - sg_cpu->last_update;
 
-		sg_cpu->iowait_boost_pending = true;
+	/* Reset boost only if a tick has elapsed since last request */
+	if (delta_ns <= TICK_NSEC)
+		return false;
 
-		if (sg_cpu->iowait_boost) {
-			sg_cpu->iowait_boost <<= 1;
-			if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
-				sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
-		} else {
-			sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
-		}
-	} else if (sg_cpu->iowait_boost) {
-		s64 delta_ns = time - sg_cpu->last_update;
+	sg_cpu->iowait_boost = set_iowait_boost
+		? sg_cpu->sg_policy->policy->min : 0;
+	sg_cpu->iowait_boost_pending = set_iowait_boost;
 
-		/* Clear iowait_boost if the CPU apprears to have been idle. */
-		if (delta_ns > TICK_NSEC) {
-			sg_cpu->iowait_boost = 0;
-			sg_cpu->iowait_boost_pending = false;
-		}
-	}
+	return true;
 }
 
-static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
-			       unsigned long *max)
+/**
+ * sugov_iowait_boost() - Updates the IO boost status of a CPU.
+ * @sg_cpu: the sugov data for the CPU to boost
+ * @time: the update time from the caller
+ * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait
+ *
+ * Each time a task wakes up after an IO operation, the CPU utilization can be
+ * boosted to a certain utilization which doubles at each "frequent and
+ * successive" wakeup from IO, ranging from the utilization of the minimum
+ * OPP to the utilization of the maximum OPP.
+ * To keep doubling, an IO boost has to be requested at least once per tick,
+ * otherwise we restart from the utilization of the minimum OPP.
+ */
+static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
+			       unsigned int flags)
+{
+	bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT;
+
+	/* Reset boost if the CPU appears to have been idle enough */
+	if (sg_cpu->iowait_boost &&
+	    sugov_iowait_reset(sg_cpu, time, set_iowait_boost))
+		return;
+
+	/* Boost only tasks waking up after IO */
+	if (!set_iowait_boost)
+		return;
+
+	/* Ensure boost doubles only one time at each request */
+	if (sg_cpu->iowait_boost_pending)
+		return;
+	sg_cpu->iowait_boost_pending = true;
+
+	/* Double the boost at each request */
+	if (sg_cpu->iowait_boost) {
+		sg_cpu->iowait_boost <<= 1;
+		if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
+			sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+		return;
+	}
+
+	/* First wakeup after IO: start with minimum boost */
+	sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
+}
+
+/**
+ * sugov_iowait_apply() - Apply the IO boost to a CPU.
+ * @sg_cpu: the sugov data for the cpu to boost
+ * @time: the update time from the caller
+ * @util: the utilization to (eventually) boost
+ * @max: the maximum value the utilization can be boosted to
+ *
+ * A CPU running a task which woken up after an IO operation can have its
+ * utilization boosted to speed up the completion of those IO operations.
+ * The IO boost value is increased each time a task wakes up from IO, in
+ * sugov_iowait_apply(), and it's instead decreased by this function,
+ * each time an increase has not been requested (!iowait_boost_pending).
+ *
+ * A CPU which also appears to have been idle for at least one tick has also
+ * its IO boost utilization reset.
+ *
+ * This mechanism is designed to boost high frequently IO waiting tasks, while
+ * being more conservative on tasks which does sporadic IO operations.
+ */
+static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
+			       unsigned long *util, unsigned long *max)
 {
 	unsigned int boost_util, boost_max;
 
+	/* No boost currently required */
 	if (!sg_cpu->iowait_boost)
 		return;
 
+	/* Reset boost if the CPU appears to have been idle enough */
+	if (sugov_iowait_reset(sg_cpu, time, false))
+		return;
+
+	/*
+	 * An IO waiting task has just woken up:
+	 * allow to further double the boost value
+	 */
 	if (sg_cpu->iowait_boost_pending) {
 		sg_cpu->iowait_boost_pending = false;
 	} else {
+		/*
+		 * Otherwise: reduce the boost value and disable it when we
+		 * reach the minimum.
+		 */
 		sg_cpu->iowait_boost >>= 1;
 		if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
 			sg_cpu->iowait_boost = 0;
@@ -245,9 +329,12 @@
 		}
 	}
 
+	/*
+	 * Apply the current boost value: a CPU is boosted only if its current
+	 * utilization is smaller then the current IO boost level.
+	 */
 	boost_util = sg_cpu->iowait_boost;
 	boost_max = sg_cpu->iowait_boost_max;
-
 	if (*util * boost_max < *max * boost_util) {
 		*util = boost_util;
 		*max = boost_max;
@@ -286,7 +373,7 @@
 	unsigned int next_f;
 	bool busy;
 
-	sugov_set_iowait_boost(sg_cpu, time, flags);
+	sugov_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
 
 	ignore_dl_rate_limit(sg_cpu, sg_policy);
@@ -299,21 +386,31 @@
 	sugov_get_util(sg_cpu);
 	max = sg_cpu->max;
 	util = sugov_aggregate_util(sg_cpu);
-	sugov_iowait_boost(sg_cpu, &util, &max);
+	sugov_iowait_apply(sg_cpu, time, &util, &max);
 	next_f = get_next_freq(sg_policy, util, max);
 	/*
 	 * Do not reduce the frequency if the CPU has not been idle
 	 * recently, as the reduction is likely to be premature then.
 	 */
-	if (busy && next_f < sg_policy->next_freq &&
-	    sg_policy->next_freq != UINT_MAX) {
+	if (busy && next_f < sg_policy->next_freq) {
 		next_f = sg_policy->next_freq;
 
 		/* Reset cached freq as next_freq has changed */
 		sg_policy->cached_raw_freq = 0;
 	}
 
-	sugov_update_commit(sg_policy, time, next_f);
+	/*
+	 * This code runs under rq->lock for the target CPU, so it won't run
+	 * concurrently on two different CPUs for the same target and it is not
+	 * necessary to acquire the lock in the fast switch case.
+	 */
+	if (sg_policy->policy->fast_switch_enabled) {
+		sugov_fast_switch(sg_policy, time, next_f);
+	} else {
+		raw_spin_lock(&sg_policy->update_lock);
+		sugov_deferred_update(sg_policy, time, next_f);
+		raw_spin_unlock(&sg_policy->update_lock);
+	}
 }
 
 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
@@ -326,28 +423,12 @@
 	for_each_cpu(j, policy->cpus) {
 		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
 		unsigned long j_util, j_max;
-		s64 delta_ns;
 
 		sugov_get_util(j_sg_cpu);
-
-		/*
-		 * If the CFS CPU utilization was last updated before the
-		 * previous frequency update and the time elapsed between the
-		 * last update of the CPU utilization and the last frequency
-		 * update is long enough, reset iowait_boost and util_cfs, as
-		 * they are now probably stale. However, still consider the
-		 * CPU contribution if it has some DEADLINE utilization
-		 * (util_dl).
-		 */
-		delta_ns = time - j_sg_cpu->last_update;
-		if (delta_ns > TICK_NSEC) {
-			j_sg_cpu->iowait_boost = 0;
-			j_sg_cpu->iowait_boost_pending = false;
-		}
-
 		j_max = j_sg_cpu->max;
 		j_util = sugov_aggregate_util(j_sg_cpu);
-		sugov_iowait_boost(j_sg_cpu, &j_util, &j_max);
+		sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
+
 		if (j_util * max > j_max * util) {
 			util = j_util;
 			max = j_max;
@@ -366,14 +447,18 @@
 
 	raw_spin_lock(&sg_policy->update_lock);
 
-	sugov_set_iowait_boost(sg_cpu, time, flags);
+	sugov_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
 
 	ignore_dl_rate_limit(sg_cpu, sg_policy);
 
 	if (sugov_should_update_freq(sg_policy, time)) {
 		next_f = sugov_next_freq_shared(sg_cpu, time);
-		sugov_update_commit(sg_policy, time, next_f);
+
+		if (sg_policy->policy->fast_switch_enabled)
+			sugov_fast_switch(sg_policy, time, next_f);
+		else
+			sugov_deferred_update(sg_policy, time, next_f);
 	}
 
 	raw_spin_unlock(&sg_policy->update_lock);
@@ -382,13 +467,27 @@
 static void sugov_work(struct kthread_work *work)
 {
 	struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
+	unsigned int freq;
+	unsigned long flags;
+
+	/*
+	 * Hold sg_policy->update_lock shortly to handle the case where:
+	 * incase sg_policy->next_freq is read here, and then updated by
+	 * sugov_deferred_update() just before work_in_progress is set to false
+	 * here, we may miss queueing the new update.
+	 *
+	 * Note: If a work was queued after the update_lock is released,
+	 * sugov_work() will just be called again by kthread_work code; and the
+	 * request will be proceed before the sugov thread sleeps.
+	 */
+	raw_spin_lock_irqsave(&sg_policy->update_lock, flags);
+	freq = sg_policy->next_freq;
+	sg_policy->work_in_progress = false;
+	raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags);
 
 	mutex_lock(&sg_policy->work_lock);
-	__cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
-				CPUFREQ_RELATION_L);
+	__cpufreq_driver_target(sg_policy->policy, freq, CPUFREQ_RELATION_L);
 	mutex_unlock(&sg_policy->work_lock);
-
-	sg_policy->work_in_progress = false;
 }
 
 static void sugov_irq_work(struct irq_work *irq_work)
@@ -511,11 +610,7 @@
 	}
 
 	sg_policy->thread = thread;
-
-	/* Kthread is bound to all CPUs by default */
-	if (!policy->dvfs_possible_from_any_cpu)
-		kthread_bind_mask(thread, policy->related_cpus);
-
+	kthread_bind_mask(thread, policy->related_cpus);
 	init_irq_work(&sg_policy->irq_work, sugov_irq_work);
 	mutex_init(&sg_policy->work_lock);
 
@@ -658,7 +753,7 @@
 
 	sg_policy->freq_update_delay_ns	= sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
 	sg_policy->last_freq_update_time	= 0;
-	sg_policy->next_freq			= UINT_MAX;
+	sg_policy->next_freq			= 0;
 	sg_policy->work_in_progress		= false;
 	sg_policy->need_freq_update		= false;
 	sg_policy->cached_raw_freq		= 0;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 1356afd..fbfc3f1 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1259,6 +1259,9 @@
 
 	rq = task_rq_lock(p, &rf);
 
+	sched_clock_tick();
+	update_rq_clock(rq);
+
 	if (!dl_task(p) || p->state == TASK_DEAD) {
 		struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
 
@@ -1278,9 +1281,6 @@
 	if (dl_se->dl_non_contending == 0)
 		goto unlock;
 
-	sched_clock_tick();
-	update_rq_clock(rq);
-
 	sub_running_bw(dl_se, &rq->dl);
 	dl_se->dl_non_contending = 0;
 unlock:
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 15b10e2..e593b41 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -823,35 +823,9 @@
 	.show		= sched_debug_show,
 };
 
-static int sched_debug_release(struct inode *inode, struct file *file)
-{
-	seq_release(inode, file);
-
-	return 0;
-}
-
-static int sched_debug_open(struct inode *inode, struct file *filp)
-{
-	int ret = 0;
-
-	ret = seq_open(filp, &sched_debug_sops);
-
-	return ret;
-}
-
-static const struct file_operations sched_debug_fops = {
-	.open		= sched_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= sched_debug_release,
-};
-
 static int __init init_sched_debug_procfs(void)
 {
-	struct proc_dir_entry *pe;
-
-	pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops);
-	if (!pe)
+	if (!proc_create_seq("sched_debug", 0444, NULL, &sched_debug_sops))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 79f574d..e497c05 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1139,6 +1139,47 @@
 	return max(smin, smax);
 }
 
+void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
+{
+	int mm_users = 0;
+	struct mm_struct *mm = p->mm;
+
+	if (mm) {
+		mm_users = atomic_read(&mm->mm_users);
+		if (mm_users == 1) {
+			mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
+			mm->numa_scan_seq = 0;
+		}
+	}
+	p->node_stamp			= 0;
+	p->numa_scan_seq		= mm ? mm->numa_scan_seq : 0;
+	p->numa_scan_period		= sysctl_numa_balancing_scan_delay;
+	p->numa_work.next		= &p->numa_work;
+	p->numa_faults			= NULL;
+	p->numa_group			= NULL;
+	p->last_task_numa_placement	= 0;
+	p->last_sum_exec_runtime	= 0;
+
+	/* New address space, reset the preferred nid */
+	if (!(clone_flags & CLONE_VM)) {
+		p->numa_preferred_nid = -1;
+		return;
+	}
+
+	/*
+	 * New thread, keep existing numa_preferred_nid which should be copied
+	 * already by arch_dup_task_struct but stagger when scans start.
+	 */
+	if (mm) {
+		unsigned int delay;
+
+		delay = min_t(unsigned int, task_scan_max(current),
+			current->numa_scan_period * mm_users * NSEC_PER_MSEC);
+		delay += 2 * TICK_NSEC;
+		p->node_stamp = delay;
+	}
+}
+
 static void account_numa_enqueue(struct rq *rq, struct task_struct *p)
 {
 	rq->nr_numa_running += (p->numa_preferred_nid != -1);
@@ -5345,6 +5386,14 @@
 	struct sched_entity *se = &p->se;
 
 	/*
+	 * The code below (indirectly) updates schedutil which looks at
+	 * the cfs_rq utilization to select a frequency.
+	 * Let's add the task's estimated utilization to the cfs_rq's
+	 * estimated utilization, before we update schedutil.
+	 */
+	util_est_enqueue(&rq->cfs, p);
+
+	/*
 	 * If in_iowait is set, the code below may not trigger any cpufreq
 	 * utilization updates, so do it here explicitly with the IOWAIT flag
 	 * passed.
@@ -5385,7 +5434,6 @@
 	if (!se)
 		add_nr_running(rq, 1);
 
-	util_est_enqueue(&rq->cfs, p);
 	hrtick_update(rq);
 }
 
@@ -5858,8 +5906,8 @@
 	 * a cpufreq perspective, it's better to have higher utilisation
 	 * on one CPU.
 	 */
-	if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
-		return idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
+	if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
+		return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
 
 	if (sync && cpu_rq(this_cpu)->nr_running == 1)
 		return this_cpu;
@@ -6102,7 +6150,7 @@
 
 	/* Traverse only the allowed CPUs */
 	for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
-		if (idle_cpu(i)) {
+		if (available_idle_cpu(i)) {
 			struct rq *rq = cpu_rq(i);
 			struct cpuidle_state *idle = idle_get_state(rq);
 			if (idle && idle->exit_latency < min_exit_latency) {
@@ -6144,6 +6192,13 @@
 	if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
 		return prev_cpu;
 
+	/*
+	 * We need task's util for capacity_spare_wake, sync it up to prev_cpu's
+	 * last_update_time.
+	 */
+	if (!(sd_flag & SD_BALANCE_FORK))
+		sync_entity_load_avg(&p->se);
+
 	while (sd) {
 		struct sched_group *group;
 		struct sched_domain *tmp;
@@ -6224,7 +6279,7 @@
 		if (cpu == core)
 			continue;
 
-		if (!idle_cpu(cpu))
+		if (!available_idle_cpu(cpu))
 			goto unlock;
 	}
 
@@ -6256,7 +6311,7 @@
 
 		for_each_cpu(cpu, cpu_smt_mask(core)) {
 			cpumask_clear_cpu(cpu, cpus);
-			if (!idle_cpu(cpu))
+			if (!available_idle_cpu(cpu))
 				idle = false;
 		}
 
@@ -6285,7 +6340,7 @@
 	for_each_cpu(cpu, cpu_smt_mask(target)) {
 		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
-		if (idle_cpu(cpu))
+		if (available_idle_cpu(cpu))
 			return cpu;
 	}
 
@@ -6348,7 +6403,7 @@
 			return -1;
 		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
-		if (idle_cpu(cpu))
+		if (available_idle_cpu(cpu))
 			break;
 	}
 
@@ -6368,13 +6423,13 @@
 	struct sched_domain *sd;
 	int i, recent_used_cpu;
 
-	if (idle_cpu(target))
+	if (available_idle_cpu(target))
 		return target;
 
 	/*
 	 * If the previous CPU is cache affine and idle, don't be stupid:
 	 */
-	if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
+	if (prev != target && cpus_share_cache(prev, target) && available_idle_cpu(prev))
 		return prev;
 
 	/* Check a recently used CPU as a potential idle candidate: */
@@ -6382,7 +6437,7 @@
 	if (recent_used_cpu != prev &&
 	    recent_used_cpu != target &&
 	    cpus_share_cache(recent_used_cpu, target) &&
-	    idle_cpu(recent_used_cpu) &&
+	    available_idle_cpu(recent_used_cpu) &&
 	    cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
 		/*
 		 * Replace recent_used_cpu with prev as it is a potential
@@ -6558,7 +6613,7 @@
 static int
 select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
 {
-	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
+	struct sched_domain *tmp, *sd = NULL;
 	int cpu = smp_processor_id();
 	int new_cpu = prev_cpu;
 	int want_affine = 0;
@@ -6581,7 +6636,10 @@
 		 */
 		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
 		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
-			affine_sd = tmp;
+			if (cpu != prev_cpu)
+				new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);
+
+			sd = NULL; /* Prefer wake_affine over balance flags */
 			break;
 		}
 
@@ -6591,33 +6649,16 @@
 			break;
 	}
 
-	if (affine_sd) {
-		sd = NULL; /* Prefer wake_affine over balance flags */
-		if (cpu == prev_cpu)
-			goto pick_cpu;
-
-		new_cpu = wake_affine(affine_sd, p, cpu, prev_cpu, sync);
-	}
-
-	if (sd && !(sd_flag & SD_BALANCE_FORK)) {
-		/*
-		 * We're going to need the task's util for capacity_spare_wake
-		 * in find_idlest_group. Sync it up to prev_cpu's
-		 * last_update_time.
-		 */
-		sync_entity_load_avg(&p->se);
-	}
-
-	if (!sd) {
-pick_cpu:
-		if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
-			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
-
-			if (want_affine)
-				current->recent_used_cpu = cpu;
-		}
-	} else {
+	if (unlikely(sd)) {
+		/* Slow path */
 		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
+	} else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
+		/* Fast path */
+
+		new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
+
+		if (want_affine)
+			current->recent_used_cpu = cpu;
 	}
 	rcu_read_unlock();
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1f0a4bc..6601baf 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -983,7 +983,7 @@
 }
 
 /*
- * See rt task throttoling, which is the only time a skip
+ * See rt task throttling, which is the only time a skip
  * request is cancelled.
  */
 static inline void rq_clock_cancel_skipupdate(struct rq *rq)
@@ -1069,6 +1069,12 @@
 extern void sched_setnuma(struct task_struct *p, int node);
 extern int migrate_task_to(struct task_struct *p, int cpu);
 extern int migrate_swap(struct task_struct *, struct task_struct *);
+extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
+#else
+static inline void
+init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
+{
+}
 #endif /* CONFIG_NUMA_BALANCING */
 
 #ifdef CONFIG_SMP
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index ab112cb..750fb3c 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -120,22 +120,9 @@
 	.show  = show_schedstat,
 };
 
-static int schedstat_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &schedstat_sops);
-}
-
-static const struct file_operations proc_schedstat_operations = {
-	.open    = schedstat_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
 static int __init proc_schedstat_init(void)
 {
-	proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
-
+	proc_create_seq("schedstat", 0, NULL, &schedstat_sops);
 	return 0;
 }
 subsys_initcall(proc_schedstat_init);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index e691d9a..fd023ac 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -593,18 +593,15 @@
 	}
 
 	/*
-	 * Force an audit message to be emitted when the action is RET_KILL_*,
-	 * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is
-	 * allowed to be logged by the admin.
+	 * Emit an audit message when the action is RET_KILL_*, RET_LOG, or the
+	 * FILTER_FLAG_LOG bit was set. The admin has the ability to silence
+	 * any action from being logged by removing the action name from the
+	 * seccomp_actions_logged sysctl.
 	 */
-	if (log)
-		return __audit_seccomp(syscall, signr, action);
+	if (!log)
+		return;
 
-	/*
-	 * Let the audit subsystem decide if the action should be audited based
-	 * on whether the current task itself is being audited.
-	 */
-	return audit_seccomp(syscall, signr, action);
+	audit_seccomp(syscall, signr, action);
 }
 
 /*
@@ -1144,10 +1141,11 @@
 };
 
 static bool seccomp_names_from_actions_logged(char *names, size_t size,
-					      u32 actions_logged)
+					      u32 actions_logged,
+					      const char *sep)
 {
 	const struct seccomp_log_name *cur;
-	bool append_space = false;
+	bool append_sep = false;
 
 	for (cur = seccomp_log_names; cur->name && size; cur++) {
 		ssize_t ret;
@@ -1155,15 +1153,15 @@
 		if (!(actions_logged & cur->log))
 			continue;
 
-		if (append_space) {
-			ret = strscpy(names, " ", size);
+		if (append_sep) {
+			ret = strscpy(names, sep, size);
 			if (ret < 0)
 				return false;
 
 			names += ret;
 			size -= ret;
 		} else
-			append_space = true;
+			append_sep = true;
 
 		ret = strscpy(names, cur->name, size);
 		if (ret < 0)
@@ -1208,48 +1206,104 @@
 	return true;
 }
 
-static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
-					  void __user *buffer, size_t *lenp,
-					  loff_t *ppos)
+static int read_actions_logged(struct ctl_table *ro_table, void __user *buffer,
+			       size_t *lenp, loff_t *ppos)
+{
+	char names[sizeof(seccomp_actions_avail)];
+	struct ctl_table table;
+
+	memset(names, 0, sizeof(names));
+
+	if (!seccomp_names_from_actions_logged(names, sizeof(names),
+					       seccomp_actions_logged, " "))
+		return -EINVAL;
+
+	table = *ro_table;
+	table.data = names;
+	table.maxlen = sizeof(names);
+	return proc_dostring(&table, 0, buffer, lenp, ppos);
+}
+
+static int write_actions_logged(struct ctl_table *ro_table, void __user *buffer,
+				size_t *lenp, loff_t *ppos, u32 *actions_logged)
 {
 	char names[sizeof(seccomp_actions_avail)];
 	struct ctl_table table;
 	int ret;
 
-	if (write && !capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
 	memset(names, 0, sizeof(names));
 
-	if (!write) {
-		if (!seccomp_names_from_actions_logged(names, sizeof(names),
-						       seccomp_actions_logged))
-			return -EINVAL;
-	}
-
 	table = *ro_table;
 	table.data = names;
 	table.maxlen = sizeof(names);
-	ret = proc_dostring(&table, write, buffer, lenp, ppos);
+	ret = proc_dostring(&table, 1, buffer, lenp, ppos);
 	if (ret)
 		return ret;
 
-	if (write) {
-		u32 actions_logged;
+	if (!seccomp_actions_logged_from_names(actions_logged, table.data))
+		return -EINVAL;
 
-		if (!seccomp_actions_logged_from_names(&actions_logged,
-						       table.data))
-			return -EINVAL;
+	if (*actions_logged & SECCOMP_LOG_ALLOW)
+		return -EINVAL;
 
-		if (actions_logged & SECCOMP_LOG_ALLOW)
-			return -EINVAL;
-
-		seccomp_actions_logged = actions_logged;
-	}
-
+	seccomp_actions_logged = *actions_logged;
 	return 0;
 }
 
+static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged,
+				 int ret)
+{
+	char names[sizeof(seccomp_actions_avail)];
+	char old_names[sizeof(seccomp_actions_avail)];
+	const char *new = names;
+	const char *old = old_names;
+
+	if (!audit_enabled)
+		return;
+
+	memset(names, 0, sizeof(names));
+	memset(old_names, 0, sizeof(old_names));
+
+	if (ret)
+		new = "?";
+	else if (!actions_logged)
+		new = "(none)";
+	else if (!seccomp_names_from_actions_logged(names, sizeof(names),
+						    actions_logged, ","))
+		new = "?";
+
+	if (!old_actions_logged)
+		old = "(none)";
+	else if (!seccomp_names_from_actions_logged(old_names,
+						    sizeof(old_names),
+						    old_actions_logged, ","))
+		old = "?";
+
+	return audit_seccomp_actions_logged(new, old, !ret);
+}
+
+static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
+					  void __user *buffer, size_t *lenp,
+					  loff_t *ppos)
+{
+	int ret;
+
+	if (write) {
+		u32 actions_logged = 0;
+		u32 old_actions_logged = seccomp_actions_logged;
+
+		ret = write_actions_logged(ro_table, buffer, lenp, ppos,
+					   &actions_logged);
+		audit_actions_logged(actions_logged, old_actions_logged, ret);
+	} else
+		ret = read_actions_logged(ro_table, buffer, lenp, ppos);
+
+	return ret;
+}
+
 static struct ctl_path seccomp_sysctl_path[] = {
 	{ .procname = "kernel", },
 	{ .procname = "seccomp", },
diff --git a/kernel/signal.c b/kernel/signal.c
index 9c33163..0f865d6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1539,7 +1539,6 @@
 	return send_sig_info(info.si_signo, &info, t);
 }
 
-#if defined(BUS_MCEERR_AO) && defined(BUS_MCEERR_AR)
 int force_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t)
 {
 	struct siginfo info;
@@ -1568,9 +1567,7 @@
 	return send_sig_info(info.si_signo, &info, t);
 }
 EXPORT_SYMBOL(send_sig_mceerr);
-#endif
 
-#ifdef SEGV_BNDERR
 int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper)
 {
 	struct siginfo info;
@@ -1584,7 +1581,6 @@
 	info.si_upper = upper;
 	return force_sig_info(info.si_signo, &info, current);
 }
-#endif
 
 #ifdef SEGV_PKUERR
 int force_sig_pkuerr(void __user *addr, u32 pkey)
@@ -2837,8 +2833,19 @@
 			[SIGPOLL] = { NSIGPOLL, SIL_POLL },
 			[SIGSYS]  = { NSIGSYS,  SIL_SYS },
 		};
-		if ((sig < ARRAY_SIZE(filter)) && (si_code <= filter[sig].limit))
+		if ((sig < ARRAY_SIZE(filter)) && (si_code <= filter[sig].limit)) {
 			layout = filter[sig].layout;
+			/* Handle the exceptions */
+			if ((sig == SIGBUS) &&
+			    (si_code >= BUS_MCEERR_AR) && (si_code <= BUS_MCEERR_AO))
+				layout = SIL_FAULT_MCEERR;
+			else if ((sig == SIGSEGV) && (si_code == SEGV_BNDERR))
+				layout = SIL_FAULT_BNDERR;
+#ifdef SEGV_PKUERR
+			else if ((sig == SIGSEGV) && (si_code == SEGV_PKUERR))
+				layout = SIL_FAULT_PKUERR;
+#endif
+		}
 		else if (si_code <= NSIGPOLL)
 			layout = SIL_POLL;
 	} else {
@@ -2848,104 +2855,15 @@
 			layout = SIL_POLL;
 		else if (si_code < 0)
 			layout = SIL_RT;
-		/* Tests to support buggy kernel ABIs */
-#ifdef TRAP_FIXME
-		if ((sig == SIGTRAP) && (si_code == TRAP_FIXME))
-			layout = SIL_FAULT;
-#endif
-#ifdef FPE_FIXME
-		if ((sig == SIGFPE) && (si_code == FPE_FIXME))
-			layout = SIL_FAULT;
-#endif
 	}
 	return layout;
 }
 
 int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
 {
-	int err;
-
-	if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
+	if (copy_to_user(to, from , sizeof(struct siginfo)))
 		return -EFAULT;
-	if (from->si_code < 0)
-		return __copy_to_user(to, from, sizeof(siginfo_t))
-			? -EFAULT : 0;
-	/*
-	 * If you change siginfo_t structure, please be sure
-	 * this code is fixed accordingly.
-	 * Please remember to update the signalfd_copyinfo() function
-	 * inside fs/signalfd.c too, in case siginfo_t changes.
-	 * It should never copy any pad contained in the structure
-	 * to avoid security leaks, but must copy the generic
-	 * 3 ints plus the relevant union member.
-	 */
-	err = __put_user(from->si_signo, &to->si_signo);
-	err |= __put_user(from->si_errno, &to->si_errno);
-	err |= __put_user(from->si_code, &to->si_code);
-	switch (siginfo_layout(from->si_signo, from->si_code)) {
-	case SIL_KILL:
-		err |= __put_user(from->si_pid, &to->si_pid);
-		err |= __put_user(from->si_uid, &to->si_uid);
-		break;
-	case SIL_TIMER:
-		/* Unreached SI_TIMER is negative */
-		break;
-	case SIL_POLL:
-		err |= __put_user(from->si_band, &to->si_band);
-		err |= __put_user(from->si_fd, &to->si_fd);
-		break;
-	case SIL_FAULT:
-		err |= __put_user(from->si_addr, &to->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		err |= __put_user(from->si_trapno, &to->si_trapno);
-#endif
-#ifdef __ia64__
-		err |= __put_user(from->si_imm, &to->si_imm);
-		err |= __put_user(from->si_flags, &to->si_flags);
-		err |= __put_user(from->si_isr, &to->si_isr);
-#endif
-		/*
-		 * Other callers might not initialize the si_lsb field,
-		 * so check explicitly for the right codes here.
-		 */
-#ifdef BUS_MCEERR_AR
-		if (from->si_signo == SIGBUS && from->si_code == BUS_MCEERR_AR)
-			err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
-#endif
-#ifdef BUS_MCEERR_AO
-		if (from->si_signo == SIGBUS && from->si_code == BUS_MCEERR_AO)
-			err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
-#endif
-#ifdef SEGV_BNDERR
-		if (from->si_signo == SIGSEGV && from->si_code == SEGV_BNDERR) {
-			err |= __put_user(from->si_lower, &to->si_lower);
-			err |= __put_user(from->si_upper, &to->si_upper);
-		}
-#endif
-#ifdef SEGV_PKUERR
-		if (from->si_signo == SIGSEGV && from->si_code == SEGV_PKUERR)
-			err |= __put_user(from->si_pkey, &to->si_pkey);
-#endif
-		break;
-	case SIL_CHLD:
-		err |= __put_user(from->si_pid, &to->si_pid);
-		err |= __put_user(from->si_uid, &to->si_uid);
-		err |= __put_user(from->si_status, &to->si_status);
-		err |= __put_user(from->si_utime, &to->si_utime);
-		err |= __put_user(from->si_stime, &to->si_stime);
-		break;
-	case SIL_RT:
-		err |= __put_user(from->si_pid, &to->si_pid);
-		err |= __put_user(from->si_uid, &to->si_uid);
-		err |= __put_user(from->si_ptr, &to->si_ptr);
-		break;
-	case SIL_SYS:
-		err |= __put_user(from->si_call_addr, &to->si_call_addr);
-		err |= __put_user(from->si_syscall, &to->si_syscall);
-		err |= __put_user(from->si_arch, &to->si_arch);
-		break;
-	}
-	return err;
+	return 0;
 }
 
 #ifdef CONFIG_COMPAT
@@ -2984,27 +2902,28 @@
 #ifdef __ARCH_SI_TRAPNO
 		new.si_trapno = from->si_trapno;
 #endif
-#ifdef BUS_MCEERR_AR
-		if ((from->si_signo == SIGBUS) && (from->si_code == BUS_MCEERR_AR))
-			new.si_addr_lsb = from->si_addr_lsb;
+		break;
+	case SIL_FAULT_MCEERR:
+		new.si_addr = ptr_to_compat(from->si_addr);
+#ifdef __ARCH_SI_TRAPNO
+		new.si_trapno = from->si_trapno;
 #endif
-#ifdef BUS_MCEERR_AO
-		if ((from->si_signo == SIGBUS) && (from->si_code == BUS_MCEERR_AO))
-			new.si_addr_lsb = from->si_addr_lsb;
+		new.si_addr_lsb = from->si_addr_lsb;
+		break;
+	case SIL_FAULT_BNDERR:
+		new.si_addr = ptr_to_compat(from->si_addr);
+#ifdef __ARCH_SI_TRAPNO
+		new.si_trapno = from->si_trapno;
 #endif
-#ifdef SEGV_BNDERR
-		if ((from->si_signo == SIGSEGV) &&
-		    (from->si_code == SEGV_BNDERR)) {
-			new.si_lower = ptr_to_compat(from->si_lower);
-			new.si_upper = ptr_to_compat(from->si_upper);
-		}
+		new.si_lower = ptr_to_compat(from->si_lower);
+		new.si_upper = ptr_to_compat(from->si_upper);
+		break;
+	case SIL_FAULT_PKUERR:
+		new.si_addr = ptr_to_compat(from->si_addr);
+#ifdef __ARCH_SI_TRAPNO
+		new.si_trapno = from->si_trapno;
 #endif
-#ifdef SEGV_PKUERR
-		if ((from->si_signo == SIGSEGV) &&
-		    (from->si_code == SEGV_PKUERR))
-			new.si_pkey = from->si_pkey;
-#endif
-
+		new.si_pkey = from->si_pkey;
 		break;
 	case SIL_CHLD:
 		new.si_pid    = from->si_pid;
@@ -3070,24 +2989,28 @@
 #ifdef __ARCH_SI_TRAPNO
 		to->si_trapno = from.si_trapno;
 #endif
-#ifdef BUS_MCEERR_AR
-		if ((from.si_signo == SIGBUS) && (from.si_code == BUS_MCEERR_AR))
-			to->si_addr_lsb = from.si_addr_lsb;
+		break;
+	case SIL_FAULT_MCEERR:
+		to->si_addr = compat_ptr(from.si_addr);
+#ifdef __ARCH_SI_TRAPNO
+		to->si_trapno = from.si_trapno;
 #endif
-#ifdef BUS_MCEER_AO
-		if ((from.si_signo == SIGBUS) && (from.si_code == BUS_MCEERR_AO))
-			to->si_addr_lsb = from.si_addr_lsb;
+		to->si_addr_lsb = from.si_addr_lsb;
+		break;
+	case SIL_FAULT_BNDERR:
+		to->si_addr = compat_ptr(from.si_addr);
+#ifdef __ARCH_SI_TRAPNO
+		to->si_trapno = from.si_trapno;
 #endif
-#ifdef SEGV_BNDERR
-		if ((from.si_signo == SIGSEGV) && (from.si_code == SEGV_BNDERR)) {
-			to->si_lower = compat_ptr(from.si_lower);
-			to->si_upper = compat_ptr(from.si_upper);
-		}
+		to->si_lower = compat_ptr(from.si_lower);
+		to->si_upper = compat_ptr(from.si_upper);
+		break;
+	case SIL_FAULT_PKUERR:
+		to->si_addr = compat_ptr(from.si_addr);
+#ifdef __ARCH_SI_TRAPNO
+		to->si_trapno = from.si_trapno;
 #endif
-#ifdef SEGV_PKUERR
-		if ((from.si_signo == SIGSEGV) && (from.si_code == SEGV_PKUERR))
-			to->si_pkey = from.si_pkey;
-#endif
+		to->si_pkey = from.si_pkey;
 		break;
 	case SIL_CHLD:
 		to->si_pid    = from.si_pid;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 177de36..de2f57f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -49,8 +49,8 @@
  */
 
 #ifndef __ARCH_IRQ_STAT
-irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
-EXPORT_SYMBOL(irq_stat);
+DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
 #endif
 
 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
@@ -145,8 +145,7 @@
 }
 
 /*
- * Special-case - softirqs can safely be enabled in
- * cond_resched_softirq(), or by __do_softirq(),
+ * Special-case - softirqs can safely be enabled by __do_softirq(),
  * without processing still-pending softirqs:
  */
 void _local_bh_enable(void)
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 64c0291..f89014a 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -37,7 +37,7 @@
 struct cpu_stopper {
 	struct task_struct	*thread;
 
-	spinlock_t		lock;
+	raw_spinlock_t		lock;
 	bool			enabled;	/* is this stopper enabled? */
 	struct list_head	works;		/* list of pending works */
 
@@ -81,13 +81,13 @@
 	unsigned long flags;
 	bool enabled;
 
-	spin_lock_irqsave(&stopper->lock, flags);
+	raw_spin_lock_irqsave(&stopper->lock, flags);
 	enabled = stopper->enabled;
 	if (enabled)
 		__cpu_stop_queue_work(stopper, work, &wakeq);
 	else if (work->done)
 		cpu_stop_signal_done(work->done);
-	spin_unlock_irqrestore(&stopper->lock, flags);
+	raw_spin_unlock_irqrestore(&stopper->lock, flags);
 
 	wake_up_q(&wakeq);
 
@@ -237,8 +237,8 @@
 	DEFINE_WAKE_Q(wakeq);
 	int err;
 retry:
-	spin_lock_irq(&stopper1->lock);
-	spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
+	raw_spin_lock_irq(&stopper1->lock);
+	raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
 
 	err = -ENOENT;
 	if (!stopper1->enabled || !stopper2->enabled)
@@ -261,8 +261,8 @@
 	__cpu_stop_queue_work(stopper1, work1, &wakeq);
 	__cpu_stop_queue_work(stopper2, work2, &wakeq);
 unlock:
-	spin_unlock(&stopper2->lock);
-	spin_unlock_irq(&stopper1->lock);
+	raw_spin_unlock(&stopper2->lock);
+	raw_spin_unlock_irq(&stopper1->lock);
 
 	if (unlikely(err == -EDEADLK)) {
 		while (stop_cpus_in_progress)
@@ -457,9 +457,9 @@
 	unsigned long flags;
 	int run;
 
-	spin_lock_irqsave(&stopper->lock, flags);
+	raw_spin_lock_irqsave(&stopper->lock, flags);
 	run = !list_empty(&stopper->works);
-	spin_unlock_irqrestore(&stopper->lock, flags);
+	raw_spin_unlock_irqrestore(&stopper->lock, flags);
 	return run;
 }
 
@@ -470,13 +470,13 @@
 
 repeat:
 	work = NULL;
-	spin_lock_irq(&stopper->lock);
+	raw_spin_lock_irq(&stopper->lock);
 	if (!list_empty(&stopper->works)) {
 		work = list_first_entry(&stopper->works,
 					struct cpu_stop_work, list);
 		list_del_init(&work->list);
 	}
-	spin_unlock_irq(&stopper->lock);
+	raw_spin_unlock_irq(&stopper->lock);
 
 	if (work) {
 		cpu_stop_fn_t fn = work->fn;
@@ -550,7 +550,7 @@
 	for_each_possible_cpu(cpu) {
 		struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
 
-		spin_lock_init(&stopper->lock);
+		raw_spin_lock_init(&stopper->lock);
 		INIT_LIST_HEAD(&stopper->works);
 	}
 
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 9791364..183169c 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -43,7 +43,9 @@
 COND_SYSCALL_COMPAT(io_submit);
 COND_SYSCALL(io_cancel);
 COND_SYSCALL(io_getevents);
+COND_SYSCALL(io_pgetevents);
 COND_SYSCALL_COMPAT(io_getevents);
+COND_SYSCALL_COMPAT(io_pgetevents);
 
 /* fs/xattr.c */
 
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 84f3742..f89a78e 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -129,31 +129,19 @@
 	spin_unlock_irqrestore(&watchdog_lock, *flags);
 }
 
-static int clocksource_watchdog_kthread(void *data);
-static void __clocksource_change_rating(struct clocksource *cs, int rating);
-
 /*
  * Interval: 0.5sec Threshold: 0.0625s
  */
 #define WATCHDOG_INTERVAL (HZ >> 1)
 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
 
-static void clocksource_watchdog_work(struct work_struct *work)
-{
-	/*
-	 * If kthread_run fails the next watchdog scan over the
-	 * watchdog_list will find the unstable clock again.
-	 */
-	kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
-}
-
 static void __clocksource_unstable(struct clocksource *cs)
 {
 	cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
 	cs->flags |= CLOCK_SOURCE_UNSTABLE;
 
 	/*
-	 * If the clocksource is registered clocksource_watchdog_kthread() will
+	 * If the clocksource is registered clocksource_watchdog_work() will
 	 * re-rate and re-select.
 	 */
 	if (list_empty(&cs->list)) {
@@ -164,7 +152,7 @@
 	if (cs->mark_unstable)
 		cs->mark_unstable(cs);
 
-	/* kick clocksource_watchdog_kthread() */
+	/* kick clocksource_watchdog_work() */
 	if (finished_booting)
 		schedule_work(&watchdog_work);
 }
@@ -174,7 +162,7 @@
  * @cs:		clocksource to be marked unstable
  *
  * This function is called by the x86 TSC code to mark clocksources as unstable;
- * it defers demotion and re-selection to a kthread.
+ * it defers demotion and re-selection to a work.
  */
 void clocksource_mark_unstable(struct clocksource *cs)
 {
@@ -399,7 +387,9 @@
 	}
 }
 
-static int __clocksource_watchdog_kthread(void)
+static void __clocksource_change_rating(struct clocksource *cs, int rating);
+
+static int __clocksource_watchdog_work(void)
 {
 	struct clocksource *cs, *tmp;
 	unsigned long flags;
@@ -424,13 +414,12 @@
 	return select;
 }
 
-static int clocksource_watchdog_kthread(void *data)
+static void clocksource_watchdog_work(struct work_struct *work)
 {
 	mutex_lock(&clocksource_mutex);
-	if (__clocksource_watchdog_kthread())
+	if (__clocksource_watchdog_work())
 		clocksource_select();
 	mutex_unlock(&clocksource_mutex);
-	return 0;
 }
 
 static bool clocksource_is_watchdog(struct clocksource *cs)
@@ -449,12 +438,12 @@
 static void clocksource_select_watchdog(bool fallback) { }
 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
 static inline void clocksource_resume_watchdog(void) { }
-static inline int __clocksource_watchdog_kthread(void) { return 0; }
+static inline int __clocksource_watchdog_work(void) { return 0; }
 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
 void clocksource_mark_unstable(struct clocksource *cs) { }
 
-static void inline clocksource_watchdog_lock(unsigned long *flags) { }
-static void inline clocksource_watchdog_unlock(unsigned long *flags) { }
+static inline void clocksource_watchdog_lock(unsigned long *flags) { }
+static inline void clocksource_watchdog_unlock(unsigned long *flags) { }
 
 #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
 
@@ -683,7 +672,7 @@
 	/*
 	 * Run the watchdog first to eliminate unstable clock sources
 	 */
-	__clocksource_watchdog_kthread();
+	__clocksource_watchdog_work();
 	clocksource_select();
 	mutex_unlock(&clocksource_mutex);
 	return 0;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 14e8587..055a4a7 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1759,8 +1759,10 @@
 	return ret;
 }
 
-SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
-		struct timespec __user *, rmtp)
+#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT)
+
+SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
+		struct __kernel_timespec __user *, rmtp)
 {
 	struct timespec64 tu;
 
@@ -1775,7 +1777,9 @@
 	return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
 }
 
-#ifdef CONFIG_COMPAT
+#endif
+
+#ifdef CONFIG_COMPAT_32BIT_TIME
 
 COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp,
 		       struct compat_timespec __user *, rmtp)
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 69a937c..26aa956 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -59,7 +59,7 @@
  */
 
 SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
-		const struct timespec __user *, tp)
+		const struct __kernel_timespec __user *, tp)
 {
 	struct timespec64 new_tp;
 
@@ -90,7 +90,7 @@
 	return 0;
 }
 SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
-		struct timespec __user *, tp)
+		struct __kernel_timespec __user *, tp)
 {
 	int ret;
 	struct timespec64 kernel_tp;
@@ -104,7 +104,7 @@
 	return 0;
 }
 
-SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct timespec __user *, tp)
+SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct __kernel_timespec __user *, tp)
 {
 	struct timespec64 rtn_tp = {
 		.tv_sec = 0,
@@ -124,8 +124,8 @@
 }
 
 SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
-		const struct timespec __user *, rqtp,
-		struct timespec __user *, rmtp)
+		const struct __kernel_timespec __user *, rqtp,
+		struct __kernel_timespec __user *, rmtp)
 {
 	struct timespec64 t;
 
@@ -158,7 +158,9 @@
 COMPAT_SYS_NI(timer_gettime);
 COMPAT_SYS_NI(getitimer);
 COMPAT_SYS_NI(setitimer);
+#endif
 
+#ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
 		       struct compat_timespec __user *, tp)
 {
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 10b7186..e08ce3f 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1040,7 +1040,7 @@
 }
 
 SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
-		const struct timespec __user *, tp)
+		const struct __kernel_timespec __user *, tp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 new_tp;
@@ -1055,7 +1055,7 @@
 }
 
 SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
-		struct timespec __user *,tp)
+		struct __kernel_timespec __user *, tp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 kernel_tp;
@@ -1096,7 +1096,7 @@
 }
 
 SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
-		struct timespec __user *, tp)
+		struct __kernel_timespec __user *, tp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 rtn_tp;
@@ -1113,7 +1113,7 @@
 	return error;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 
 COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock,
 		       struct compat_timespec __user *, tp)
@@ -1148,6 +1148,10 @@
 	return err;
 }
 
+#endif
+
+#ifdef CONFIG_COMPAT
+
 COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
 		       struct compat_timex __user *, utp)
 {
@@ -1172,6 +1176,10 @@
 	return err;
 }
 
+#endif
+
+#ifdef CONFIG_COMPAT_32BIT_TIME
+
 COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
 		       struct compat_timespec __user *, tp)
 {
@@ -1203,8 +1211,8 @@
 }
 
 SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
-		const struct timespec __user *, rqtp,
-		struct timespec __user *, rmtp)
+		const struct __kernel_timespec __user *, rqtp,
+		struct __kernel_timespec __user *, rmtp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 t;
@@ -1227,7 +1235,8 @@
 	return kc->nsleep(which_clock, flags, &t);
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
+
 COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
 		       struct compat_timespec __user *, rqtp,
 		       struct compat_timespec __user *, rmtp)
@@ -1252,6 +1261,7 @@
 
 	return kc->nsleep(which_clock, flags, &t);
 }
+
 #endif
 
 static const struct k_clock clock_realtime = {
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 49edc1c..b7005dd 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -277,7 +277,8 @@
 	 */
 	return !curdev ||
 		newdev->rating > curdev->rating ||
-	       !cpumask_equal(curdev->cpumask, newdev->cpumask);
+	       (!cpumask_equal(curdev->cpumask, newdev->cpumask) &&
+	        !tick_check_percpu(curdev, newdev, smp_processor_id()));
 }
 
 /*
@@ -490,6 +491,7 @@
 	if (tick_freeze_depth == num_online_cpus()) {
 		trace_suspend_resume(TPS("timekeeping_freeze"),
 				     smp_processor_id(), true);
+		system_state = SYSTEM_SUSPEND;
 		timekeeping_suspend();
 	} else {
 		tick_suspend_local();
@@ -513,6 +515,7 @@
 
 	if (tick_freeze_depth == num_online_cpus()) {
 		timekeeping_resume();
+		system_state = SYSTEM_RUNNING;
 		trace_suspend_resume(TPS("timekeeping_freeze"),
 				     smp_processor_id(), false);
 	} else {
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 3044d48..6fa9921 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -407,7 +407,6 @@
 }
 EXPORT_SYMBOL(mktime64);
 
-#if __BITS_PER_LONG == 32
 /**
  * set_normalized_timespec - set timespec sec and nsec parts and normalize
  *
@@ -468,7 +467,6 @@
 	return ts;
 }
 EXPORT_SYMBOL(ns_to_timespec);
-#endif
 
 /**
  * ns_to_timeval - Convert nanoseconds to timeval
@@ -853,9 +851,9 @@
 }
 
 int get_timespec64(struct timespec64 *ts,
-		   const struct timespec __user *uts)
+		   const struct __kernel_timespec __user *uts)
 {
-	struct timespec kts;
+	struct __kernel_timespec kts;
 	int ret;
 
 	ret = copy_from_user(&kts, uts, sizeof(kts));
@@ -863,6 +861,11 @@
 		return -EFAULT;
 
 	ts->tv_sec = kts.tv_sec;
+
+	/* Zero out the padding for 32 bit systems or in compat mode */
+	if (IS_ENABLED(CONFIG_64BIT_TIME) && (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()))
+		kts.tv_nsec &= 0xFFFFFFFFUL;
+
 	ts->tv_nsec = kts.tv_nsec;
 
 	return 0;
@@ -870,16 +873,61 @@
 EXPORT_SYMBOL_GPL(get_timespec64);
 
 int put_timespec64(const struct timespec64 *ts,
-		   struct timespec __user *uts)
+		   struct __kernel_timespec __user *uts)
 {
-	struct timespec kts = {
+	struct __kernel_timespec kts = {
 		.tv_sec = ts->tv_sec,
 		.tv_nsec = ts->tv_nsec
 	};
+
 	return copy_to_user(uts, &kts, sizeof(kts)) ? -EFAULT : 0;
 }
 EXPORT_SYMBOL_GPL(put_timespec64);
 
+int __compat_get_timespec64(struct timespec64 *ts64,
+				   const struct compat_timespec __user *cts)
+{
+	struct compat_timespec ts;
+	int ret;
+
+	ret = copy_from_user(&ts, cts, sizeof(ts));
+	if (ret)
+		return -EFAULT;
+
+	ts64->tv_sec = ts.tv_sec;
+	ts64->tv_nsec = ts.tv_nsec;
+
+	return 0;
+}
+
+int __compat_put_timespec64(const struct timespec64 *ts64,
+				   struct compat_timespec __user *cts)
+{
+	struct compat_timespec ts = {
+		.tv_sec = ts64->tv_sec,
+		.tv_nsec = ts64->tv_nsec
+	};
+	return copy_to_user(cts, &ts, sizeof(ts)) ? -EFAULT : 0;
+}
+
+int compat_get_timespec64(struct timespec64 *ts, const void __user *uts)
+{
+	if (COMPAT_USE_64BIT_TIME)
+		return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0;
+	else
+		return __compat_get_timespec64(ts, uts);
+}
+EXPORT_SYMBOL_GPL(compat_get_timespec64);
+
+int compat_put_timespec64(const struct timespec64 *ts, void __user *uts)
+{
+	if (COMPAT_USE_64BIT_TIME)
+		return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0;
+	else
+		return __compat_put_timespec64(ts, uts);
+}
+EXPORT_SYMBOL_GPL(compat_put_timespec64);
+
 int get_itimerspec64(struct itimerspec64 *it,
 			const struct itimerspec __user *uit)
 {
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 49cbcee..4786df9 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -705,18 +705,19 @@
 }
 
 /**
- * __getnstimeofday64 - Returns the time of day in a timespec64.
+ * ktime_get_real_ts64 - Returns the time of day in a timespec64.
  * @ts:		pointer to the timespec to be set
  *
- * Updates the time of day in the timespec.
- * Returns 0 on success, or -ve when suspended (timespec will be undefined).
+ * Returns the time of day in a timespec64 (WARN if suspended).
  */
-int __getnstimeofday64(struct timespec64 *ts)
+void ktime_get_real_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long seq;
 	u64 nsecs;
 
+	WARN_ON(timekeeping_suspended);
+
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
@@ -727,28 +728,8 @@
 
 	ts->tv_nsec = 0;
 	timespec64_add_ns(ts, nsecs);
-
-	/*
-	 * Do not bail out early, in case there were callers still using
-	 * the value, even in the face of the WARN_ON.
-	 */
-	if (unlikely(timekeeping_suspended))
-		return -EAGAIN;
-	return 0;
 }
-EXPORT_SYMBOL(__getnstimeofday64);
-
-/**
- * getnstimeofday64 - Returns the time of day in a timespec64.
- * @ts:		pointer to the timespec64 to be set
- *
- * Returns the time of day in a timespec64 (WARN if suspended).
- */
-void getnstimeofday64(struct timespec64 *ts)
-{
-	WARN_ON(__getnstimeofday64(ts));
-}
-EXPORT_SYMBOL(getnstimeofday64);
+EXPORT_SYMBOL(ktime_get_real_ts64);
 
 ktime_t ktime_get(void)
 {
@@ -814,6 +795,25 @@
 }
 EXPORT_SYMBOL_GPL(ktime_get_with_offset);
 
+ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+	unsigned int seq;
+	ktime_t base, *offset = offsets[offs];
+
+	WARN_ON(timekeeping_suspended);
+
+	do {
+		seq = read_seqcount_begin(&tk_core.seq);
+		base = ktime_add(tk->tkr_mono.base, *offset);
+
+	} while (read_seqcount_retry(&tk_core.seq, seq));
+
+	return base;
+
+}
+EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
+
 /**
  * ktime_mono_to_any() - convert mononotic time to any other time
  * @tmono:	time to convert.
@@ -1410,12 +1410,12 @@
 }
 
 /**
- * getrawmonotonic64 - Returns the raw monotonic time in a timespec
+ * ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec
  * @ts:		pointer to the timespec64 to be set
  *
  * Returns the raw monotonic time (completely un-modified by ntp)
  */
-void getrawmonotonic64(struct timespec64 *ts)
+void ktime_get_raw_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long seq;
@@ -1431,7 +1431,7 @@
 	ts->tv_nsec = 0;
 	timespec64_add_ns(ts, nsecs);
 }
-EXPORT_SYMBOL(getrawmonotonic64);
+EXPORT_SYMBOL(ktime_get_raw_ts64);
 
 
 /**
@@ -2133,23 +2133,20 @@
 }
 EXPORT_SYMBOL(get_seconds);
 
-struct timespec64 current_kernel_time64(void)
+void ktime_get_coarse_real_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	struct timespec64 now;
 	unsigned long seq;
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		now = tk_xtime(tk);
+		*ts = tk_xtime(tk);
 	} while (read_seqcount_retry(&tk_core.seq, seq));
-
-	return now;
 }
-EXPORT_SYMBOL(current_kernel_time64);
+EXPORT_SYMBOL(ktime_get_coarse_real_ts64);
 
-struct timespec64 get_monotonic_coarse64(void)
+void ktime_get_coarse_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 now, mono;
@@ -2162,12 +2159,10 @@
 		mono = tk->wall_to_monotonic;
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
-	set_normalized_timespec64(&now, now.tv_sec + mono.tv_sec,
+	set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec,
 				now.tv_nsec + mono.tv_nsec);
-
-	return now;
 }
-EXPORT_SYMBOL(get_monotonic_coarse64);
+EXPORT_SYMBOL(ktime_get_coarse_ts64);
 
 /*
  * Must hold jiffies_lock
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 4a4fd56..cc2d23e 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1251,18 +1251,18 @@
  *
  * Note: For !irqsafe timers, you must not hold locks that are held in
  *   interrupt context while calling this function. Even if the lock has
- *   nothing to do with the timer in question.  Here's why:
+ *   nothing to do with the timer in question.  Here's why::
  *
  *    CPU0                             CPU1
  *    ----                             ----
- *                                   <SOFTIRQ>
- *                                   call_timer_fn();
- *                                     base->running_timer = mytimer;
- *  spin_lock_irq(somelock);
+ *                                     <SOFTIRQ>
+ *                                       call_timer_fn();
+ *                                       base->running_timer = mytimer;
+ *    spin_lock_irq(somelock);
  *                                     <IRQ>
  *                                        spin_lock(somelock);
- *  del_timer_sync(mytimer);
- *   while (base->running_timer == mytimer);
+ *    del_timer_sync(mytimer);
+ *    while (base->running_timer == mytimer);
  *
  * Now del_timer_sync() will never return and never release somelock.
  * The interrupt on the other CPU is waiting to grab somelock but
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 0ed768b..d647dab 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -28,8 +28,6 @@
 	u64 now;
 };
 
-typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
-
 /*
  * This allows printing both to /proc/timer_list and
  * to the console (on SysRq-Q):
@@ -372,24 +370,12 @@
 	.show = timer_list_show,
 };
 
-static int timer_list_open(struct inode *inode, struct file *filp)
-{
-	return seq_open_private(filp, &timer_list_sops,
-			sizeof(struct timer_list_iter));
-}
-
-static const struct file_operations timer_list_fops = {
-	.open		= timer_list_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_private,
-};
-
 static int __init init_timer_list_procfs(void)
 {
 	struct proc_dir_entry *pe;
 
-	pe = proc_create("timer_list", 0400, NULL, &timer_list_fops);
+	pe = proc_create_seq_private("timer_list", 0400, NULL, &timer_list_sops,
+			sizeof(struct timer_list_iter), NULL);
 	if (!pe)
 		return -ENOMEM;
 	return 0;
diff --git a/kernel/torture.c b/kernel/torture.c
index 37b9401..3de1efb 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -574,7 +574,7 @@
 {
 	int spt;
 
-	cond_resched_rcu_qs();
+	cond_resched_tasks_rcu_qs();
 	spt = READ_ONCE(stutter_pause_test);
 	for (; spt; spt = READ_ONCE(stutter_pause_test)) {
 		if (spt == 1) {
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c9cb976..6a46af2 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -809,7 +809,7 @@
  *
  *  You can see, it is legitimate for the previous pointer of
  *  the head (or any page) not to point back to itself. But only
- *  temporarially.
+ *  temporarily.
  */
 
 #define RB_PAGE_NORMAL		0UL
@@ -906,7 +906,7 @@
 }
 
 /*
- * rb_head_page_dactivate - clears head page ptr (for free list)
+ * rb_head_page_deactivate - clears head page ptr (for free list)
  */
 static void
 rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
@@ -1780,7 +1780,7 @@
 
 		put_online_cpus();
 	} else {
-		/* Make sure this CPU has been intitialized */
+		/* Make sure this CPU has been initialized */
 		if (!cpumask_test_cpu(cpu_id, buffer->cpumask))
 			goto out;
 
@@ -2325,7 +2325,7 @@
 
 	/*
 	 * If we need to add a timestamp, then we
-	 * add it to the start of the resevered space.
+	 * add it to the start of the reserved space.
 	 */
 	if (unlikely(info->add_timestamp)) {
 		bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
@@ -2681,7 +2681,7 @@
  * ring_buffer_nest_start - Allow to trace while nested
  * @buffer: The ring buffer to modify
  *
- * The ring buffer has a safty mechanism to prevent recursion.
+ * The ring buffer has a safety mechanism to prevent recursion.
  * But there may be a case where a trace needs to be done while
  * tracing something else. In this case, calling this function
  * will allow this function to nest within a currently active
@@ -2699,7 +2699,7 @@
 	preempt_disable_notrace();
 	cpu = raw_smp_processor_id();
 	cpu_buffer = buffer->buffers[cpu];
-	/* This is the shift value for the above recusive locking */
+	/* This is the shift value for the above recursive locking */
 	cpu_buffer->nest += NESTED_BITS;
 }
 
@@ -2718,7 +2718,7 @@
 	/* disabled by ring_buffer_nest_start() */
 	cpu = raw_smp_processor_id();
 	cpu_buffer = buffer->buffers[cpu];
-	/* This is the shift value for the above recusive locking */
+	/* This is the shift value for the above recursive locking */
 	cpu_buffer->nest -= NESTED_BITS;
 	preempt_enable_notrace();
 }
@@ -2907,7 +2907,7 @@
  * @buffer: the ring buffer to reserve from
  * @length: the length of the data to reserve (excluding event header)
  *
- * Returns a reseverd event on the ring buffer to copy directly to.
+ * Returns a reserved event on the ring buffer to copy directly to.
  * The user of this interface will need to get the body to write into
  * and can use the ring_buffer_event_data() interface.
  *
@@ -3009,7 +3009,7 @@
  * This function lets the user discard an event in the ring buffer
  * and then that event will not be read later.
  *
- * This function only works if it is called before the the item has been
+ * This function only works if it is called before the item has been
  * committed. It will try to free the event from the ring buffer
  * if another event has not been added behind it.
  *
@@ -4127,7 +4127,7 @@
  * through the buffer.  Memory is allocated, buffer recording
  * is disabled, and the iterator pointer is returned to the caller.
  *
- * Disabling buffer recordng prevents the reading from being
+ * Disabling buffer recording prevents the reading from being
  * corrupted. This is not a consuming read, so a producer is not
  * expected.
  *
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bcd9303..108ce3e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4395,8 +4395,7 @@
 {
 	char *cmp;
 	int neg = 0;
-	int ret = -ENODEV;
-	int i;
+	int ret;
 	size_t orig_len = strlen(option);
 
 	cmp = strstrip(option);
@@ -4408,16 +4407,12 @@
 
 	mutex_lock(&trace_types_lock);
 
-	for (i = 0; trace_options[i]; i++) {
-		if (strcmp(cmp, trace_options[i]) == 0) {
-			ret = set_tracer_flag(tr, 1 << i, !neg);
-			break;
-		}
-	}
-
+	ret = match_string(trace_options, -1, cmp);
 	/* If no option could be set, test the specific tracer options */
-	if (!trace_options[i])
+	if (ret < 0)
 		ret = set_tracer_option(tr, cmp, neg);
+	else
+		ret = set_tracer_flag(tr, 1 << ret, !neg);
 
 	mutex_unlock(&trace_types_lock);
 
@@ -6074,6 +6069,7 @@
 {
 	struct trace_array *tr = filp->private_data;
 	struct ring_buffer_event *event;
+	enum event_trigger_type tt = ETT_NONE;
 	struct ring_buffer *buffer;
 	struct print_entry *entry;
 	unsigned long irq_flags;
@@ -6122,6 +6118,12 @@
 		written = cnt;
 	len = cnt;
 
+	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
+		/* do not add \n before testing triggers, but add \0 */
+		entry->buf[cnt] = '\0';
+		tt = event_triggers_call(tr->trace_marker_file, entry, event);
+	}
+
 	if (entry->buf[cnt - 1] != '\n') {
 		entry->buf[cnt] = '\n';
 		entry->buf[cnt + 1] = '\0';
@@ -6130,6 +6132,9 @@
 
 	__buffer_unlock_commit(buffer, event);
 
+	if (tt)
+		event_triggers_post_call(tr->trace_marker_file, tt);
+
 	if (written > 0)
 		*fpos += written;
 
@@ -7896,6 +7901,7 @@
 static void
 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 {
+	struct trace_event_file *file;
 	int cpu;
 
 	trace_create_file("available_tracers", 0444, d_tracer,
@@ -7928,6 +7934,12 @@
 	trace_create_file("trace_marker", 0220, d_tracer,
 			  tr, &tracing_mark_fops);
 
+	file = __find_event_file(tr, "ftrace", "print");
+	if (file && file->dir)
+		trace_create_file("trigger", 0644, file->dir, file,
+				  &event_trigger_fops);
+	tr->trace_marker_file = file;
+
 	trace_create_file("trace_marker_raw", 0220, d_tracer,
 			  tr, &tracing_mark_raw_fops);
 
@@ -8111,6 +8123,8 @@
 	if (IS_ERR(d_tracer))
 		return 0;
 
+	event_trace_init();
+
 	init_tracer_tracefs(&global_trace, d_tracer);
 	ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 507954b..630c5a2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -259,6 +259,7 @@
 	struct trace_options	*topts;
 	struct list_head	systems;
 	struct list_head	events;
+	struct trace_event_file *trace_marker_file;
 	cpumask_var_t		tracing_cpumask; /* only trace on set CPUs */
 	int			ref;
 #ifdef CONFIG_FUNCTION_TRACER
@@ -1334,7 +1335,7 @@
 		trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc);
 
 	if (tt)
-		event_triggers_post_call(file, tt, entry, event);
+		event_triggers_post_call(file, tt);
 }
 
 /**
@@ -1367,7 +1368,7 @@
 						irq_flags, pc, regs);
 
 	if (tt)
-		event_triggers_post_call(file, tt, entry, event);
+		event_triggers_post_call(file, tt);
 }
 
 #define FILTER_PRED_INVALID	((unsigned short)-1)
@@ -1451,9 +1452,13 @@
 extern void trace_event_enable_cmd_record(bool enable);
 extern void trace_event_enable_tgid_record(bool enable);
 
+extern int event_trace_init(void);
 extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
 extern int event_trace_del_tracer(struct trace_array *tr);
 
+extern struct trace_event_file *__find_event_file(struct trace_array *tr,
+						  const char *system,
+						  const char *event);
 extern struct trace_event_file *find_event_file(struct trace_array *tr,
 						const char *system,
 						const char *event);
diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
index 22fee76..80e0b2a 100644
--- a/kernel/trace/trace_benchmark.c
+++ b/kernel/trace/trace_benchmark.c
@@ -159,13 +159,13 @@
 		 * wants to run, schedule in, but if the CPU is idle,
 		 * we'll keep burning cycles.
 		 *
-		 * Note the _rcu_qs() version of cond_resched() will
+		 * Note the tasks_rcu_qs() version of cond_resched() will
 		 * notify synchronize_rcu_tasks() that this thread has
 		 * passed a quiescent state for rcu_tasks. Otherwise
 		 * this thread will never voluntarily schedule which would
 		 * block synchronize_rcu_tasks() indefinitely.
 		 */
-		cond_resched();
+		cond_resched_tasks_rcu_qs();
 	}
 
 	return 0;
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e3a658b..1d67464 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -230,7 +230,7 @@
 	FILTER_OTHER
 );
 
-FTRACE_ENTRY(print, print_entry,
+FTRACE_ENTRY_REG(print, print_entry,
 
 	TRACE_PRINT,
 
@@ -242,7 +242,9 @@
 	F_printk("%ps: %s",
 		 (void *)__entry->ip, __entry->buf),
 
-	FILTER_OTHER
+	FILTER_OTHER,
+
+	ftrace_event_register
 );
 
 FTRACE_ENTRY(raw_data, raw_data_entry,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 05c7172..14ff4ff 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2007,16 +2007,18 @@
 			return -1;
 		}
 	}
-	trace_create_file("filter", 0644, file->dir, file,
-			  &ftrace_event_filter_fops);
 
 	/*
 	 * Only event directories that can be enabled should have
-	 * triggers.
+	 * triggers or filters.
 	 */
-	if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
+	if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
+		trace_create_file("filter", 0644, file->dir, file,
+				  &ftrace_event_filter_fops);
+
 		trace_create_file("trigger", 0644, file->dir, file,
 				  &event_trigger_fops);
+	}
 
 #ifdef CONFIG_HIST_TRIGGERS
 	trace_create_file("hist", 0444, file->dir, file,
@@ -2473,8 +2475,9 @@
 	}
 }
 
+/* Returns any file that matches the system and event */
 struct trace_event_file *
-find_event_file(struct trace_array *tr, const char *system,  const char *event)
+__find_event_file(struct trace_array *tr, const char *system, const char *event)
 {
 	struct trace_event_file *file;
 	struct trace_event_call *call;
@@ -2485,10 +2488,7 @@
 		call = file->event_call;
 		name = trace_event_name(call);
 
-		if (!name || !call->class || !call->class->reg)
-			continue;
-
-		if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
+		if (!name || !call->class)
 			continue;
 
 		if (strcmp(event, name) == 0 &&
@@ -2498,6 +2498,20 @@
 	return NULL;
 }
 
+/* Returns valid trace event files that match system and event */
+struct trace_event_file *
+find_event_file(struct trace_array *tr, const char *system, const char *event)
+{
+	struct trace_event_file *file;
+
+	file = __find_event_file(tr, system, event);
+	if (!file || !file->event_call->class->reg ||
+	    file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
+		return NULL;
+
+	return file;
+}
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 /* Avoid typos */
@@ -3132,7 +3146,7 @@
 
 early_initcall(event_trace_enable_again);
 
-static __init int event_trace_init(void)
+__init int event_trace_init(void)
 {
 	struct trace_array *tr;
 	struct dentry *d_tracer;
@@ -3177,8 +3191,6 @@
 	event_trace_enable();
 }
 
-fs_initcall(event_trace_init);
-
 #ifdef CONFIG_FTRACE_STARTUP_TEST
 
 static DEFINE_SPINLOCK(test_spinlock);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 7d306b7..0171407 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -750,31 +750,32 @@
  *
  * Note:
  * - @str might not be NULL-terminated if it's of type DYN_STRING
- *   or STATIC_STRING
+ *   or STATIC_STRING, unless @len is zero.
  */
 
 static int regex_match_full(char *str, struct regex *r, int len)
 {
-	if (strncmp(str, r->pattern, len) == 0)
-		return 1;
-	return 0;
+	/* len of zero means str is dynamic and ends with '\0' */
+	if (!len)
+		return strcmp(str, r->pattern) == 0;
+
+	return strncmp(str, r->pattern, len) == 0;
 }
 
 static int regex_match_front(char *str, struct regex *r, int len)
 {
-	if (len < r->len)
+	if (len && len < r->len)
 		return 0;
 
-	if (strncmp(str, r->pattern, r->len) == 0)
-		return 1;
-	return 0;
+	return strncmp(str, r->pattern, r->len) == 0;
 }
 
 static int regex_match_middle(char *str, struct regex *r, int len)
 {
-	if (strnstr(str, r->pattern, len))
-		return 1;
-	return 0;
+	if (!len)
+		return strstr(str, r->pattern) != NULL;
+
+	return strnstr(str, r->pattern, len) != NULL;
 }
 
 static int regex_match_end(char *str, struct regex *r, int len)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index b9061ed..046c716 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -2865,7 +2865,7 @@
 {
 	struct trace_event_file *file;
 
-	file = find_event_file(tr, system, event_name);
+	file = __find_event_file(tr, system, event_name);
 	if (!file)
 		return ERR_PTR(-EINVAL);
 
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 8b5bdcf..d182496 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -97,7 +97,6 @@
  * event_triggers_post_call - Call 'post_triggers' for a trace event
  * @file: The trace_event_file associated with the event
  * @tt: enum event_trigger_type containing a set bit for each trigger to invoke
- * @rec: The trace entry for the event
  *
  * For each trigger associated with an event, invoke the trigger
  * function registered with the associated trigger command, if the
@@ -108,8 +107,7 @@
  */
 void
 event_triggers_post_call(struct trace_event_file *file,
-			 enum event_trigger_type tt,
-			 void *rec, struct ring_buffer_event *event)
+			 enum event_trigger_type tt)
 {
 	struct event_trigger_data *data;
 
@@ -117,7 +115,7 @@
 		if (data->paused)
 			continue;
 		if (data->cmd_ops->trigger_type & tt)
-			data->ops->func(data, rec, event);
+			data->ops->func(data, NULL, NULL);
 	}
 }
 EXPORT_SYMBOL_GPL(event_triggers_post_call);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 548e62e..45630a7 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -14,6 +14,13 @@
 
 #include "trace_output.h"
 
+/* Stub function for events with triggers */
+static int ftrace_event_register(struct trace_event_call *call,
+				 enum trace_reg type, void *data)
+{
+	return 0;
+}
+
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM	ftrace
 
@@ -117,7 +124,7 @@
 
 #undef __dynamic_array
 #define __dynamic_array(type, item)					\
-	ret = trace_define_field(event_call, #type, #item,		\
+	ret = trace_define_field(event_call, #type "[]", #item,  \
 				 offsetof(typeof(field), item),		\
 				 0, is_signed_type(type), filter_type);\
 	if (ret)							\
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 1e37da2..6dc6356 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -257,7 +257,7 @@
 }
 
 /**
- * tracepoint_probe_register -  Connect a probe to a tracepoint
+ * tracepoint_probe_register_prio -  Connect a probe to a tracepoint with priority
  * @tp: tracepoint
  * @probe: probe handler
  * @data: tracepoint data
@@ -290,7 +290,6 @@
  * @tp: tracepoint
  * @probe: probe handler
  * @data: tracepoint data
- * @prio: priority of this function over other registered functions
  *
  * Returns 0 if ok, error value on error.
  * Note: if @tp is within a module, the caller is responsible for
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ca7959b..9f9983b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -66,7 +66,7 @@
 	 * be executing on any CPU.  The pool behaves as an unbound one.
 	 *
 	 * Note that DISASSOCIATED should be flipped only while holding
-	 * attach_mutex to avoid changing binding state while
+	 * wq_pool_attach_mutex to avoid changing binding state while
 	 * worker_attach_to_pool() is in progress.
 	 */
 	POOL_MANAGER_ACTIVE	= 1 << 0,	/* being managed */
@@ -123,7 +123,7 @@
  *    cpu or grabbing pool->lock is enough for read access.  If
  *    POOL_DISASSOCIATED is set, it's identical to L.
  *
- * A: pool->attach_mutex protected.
+ * A: wq_pool_attach_mutex protected.
  *
  * PL: wq_pool_mutex protected.
  *
@@ -166,7 +166,6 @@
 						/* L: hash of busy workers */
 
 	struct worker		*manager;	/* L: purely informational */
-	struct mutex		attach_mutex;	/* attach/detach exclusion */
 	struct list_head	workers;	/* A: attached workers */
 	struct completion	*detach_completion; /* all workers detached */
 
@@ -297,6 +296,7 @@
 static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
 
 static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
+static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
 static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
 
@@ -399,14 +399,14 @@
  * @worker: iteration cursor
  * @pool: worker_pool to iterate workers of
  *
- * This must be called with @pool->attach_mutex.
+ * This must be called with wq_pool_attach_mutex.
  *
  * The if/else clause exists only for the lockdep assertion and can be
  * ignored.
  */
 #define for_each_pool_worker(worker, pool)				\
 	list_for_each_entry((worker), &(pool)->workers, node)		\
-		if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
+		if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
 		else
 
 /**
@@ -1724,7 +1724,7 @@
 static void worker_attach_to_pool(struct worker *worker,
 				   struct worker_pool *pool)
 {
-	mutex_lock(&pool->attach_mutex);
+	mutex_lock(&wq_pool_attach_mutex);
 
 	/*
 	 * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
@@ -1733,37 +1733,40 @@
 	set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
 
 	/*
-	 * The pool->attach_mutex ensures %POOL_DISASSOCIATED remains
-	 * stable across this function.  See the comments above the
-	 * flag definition for details.
+	 * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains
+	 * stable across this function.  See the comments above the flag
+	 * definition for details.
 	 */
 	if (pool->flags & POOL_DISASSOCIATED)
 		worker->flags |= WORKER_UNBOUND;
 
 	list_add_tail(&worker->node, &pool->workers);
+	worker->pool = pool;
 
-	mutex_unlock(&pool->attach_mutex);
+	mutex_unlock(&wq_pool_attach_mutex);
 }
 
 /**
  * worker_detach_from_pool() - detach a worker from its pool
  * @worker: worker which is attached to its pool
- * @pool: the pool @worker is attached to
  *
  * Undo the attaching which had been done in worker_attach_to_pool().  The
  * caller worker shouldn't access to the pool after detached except it has
  * other reference to the pool.
  */
-static void worker_detach_from_pool(struct worker *worker,
-				    struct worker_pool *pool)
+static void worker_detach_from_pool(struct worker *worker)
 {
+	struct worker_pool *pool = worker->pool;
 	struct completion *detach_completion = NULL;
 
-	mutex_lock(&pool->attach_mutex);
+	mutex_lock(&wq_pool_attach_mutex);
+
 	list_del(&worker->node);
+	worker->pool = NULL;
+
 	if (list_empty(&pool->workers))
 		detach_completion = pool->detach_completion;
-	mutex_unlock(&pool->attach_mutex);
+	mutex_unlock(&wq_pool_attach_mutex);
 
 	/* clear leftover flags without pool->lock after it is detached */
 	worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
@@ -1799,7 +1802,6 @@
 	if (!worker)
 		goto fail;
 
-	worker->pool = pool;
 	worker->id = id;
 
 	if (pool->cpu >= 0)
@@ -2086,6 +2088,12 @@
 	worker->current_pwq = pwq;
 	work_color = get_work_color(work);
 
+	/*
+	 * Record wq name for cmdline and debug reporting, may get
+	 * overridden through set_worker_desc().
+	 */
+	strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);
+
 	list_del_init(&work->entry);
 
 	/*
@@ -2181,7 +2189,6 @@
 	worker->current_work = NULL;
 	worker->current_func = NULL;
 	worker->current_pwq = NULL;
-	worker->desc_valid = false;
 	pwq_dec_nr_in_flight(pwq, work_color);
 }
 
@@ -2206,6 +2213,16 @@
 	}
 }
 
+static void set_pf_worker(bool val)
+{
+	mutex_lock(&wq_pool_attach_mutex);
+	if (val)
+		current->flags |= PF_WQ_WORKER;
+	else
+		current->flags &= ~PF_WQ_WORKER;
+	mutex_unlock(&wq_pool_attach_mutex);
+}
+
 /**
  * worker_thread - the worker thread function
  * @__worker: self
@@ -2224,7 +2241,7 @@
 	struct worker_pool *pool = worker->pool;
 
 	/* tell the scheduler that this is a workqueue worker */
-	worker->task->flags |= PF_WQ_WORKER;
+	set_pf_worker(true);
 woke_up:
 	spin_lock_irq(&pool->lock);
 
@@ -2232,11 +2249,11 @@
 	if (unlikely(worker->flags & WORKER_DIE)) {
 		spin_unlock_irq(&pool->lock);
 		WARN_ON_ONCE(!list_empty(&worker->entry));
-		worker->task->flags &= ~PF_WQ_WORKER;
+		set_pf_worker(false);
 
 		set_task_comm(worker->task, "kworker/dying");
 		ida_simple_remove(&pool->worker_ida, worker->id);
-		worker_detach_from_pool(worker, pool);
+		worker_detach_from_pool(worker);
 		kfree(worker);
 		return 0;
 	}
@@ -2335,7 +2352,7 @@
 	 * Mark rescuer as worker too.  As WORKER_PREP is never cleared, it
 	 * doesn't participate in concurrency management.
 	 */
-	rescuer->task->flags |= PF_WQ_WORKER;
+	set_pf_worker(true);
 repeat:
 	set_current_state(TASK_IDLE);
 
@@ -2367,7 +2384,6 @@
 		worker_attach_to_pool(rescuer, pool);
 
 		spin_lock_irq(&pool->lock);
-		rescuer->pool = pool;
 
 		/*
 		 * Slurp in all works issued via this workqueue and
@@ -2417,10 +2433,9 @@
 		if (need_more_worker(pool))
 			wake_up_worker(pool);
 
-		rescuer->pool = NULL;
 		spin_unlock_irq(&pool->lock);
 
-		worker_detach_from_pool(rescuer, pool);
+		worker_detach_from_pool(rescuer);
 
 		spin_lock_irq(&wq_mayday_lock);
 	}
@@ -2429,7 +2444,7 @@
 
 	if (should_stop) {
 		__set_current_state(TASK_RUNNING);
-		rescuer->task->flags &= ~PF_WQ_WORKER;
+		set_pf_worker(false);
 		return 0;
 	}
 
@@ -3271,7 +3286,6 @@
 
 	timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
 
-	mutex_init(&pool->attach_mutex);
 	INIT_LIST_HEAD(&pool->workers);
 
 	ida_init(&pool->worker_ida);
@@ -3354,10 +3368,10 @@
 	WARN_ON(pool->nr_workers || pool->nr_idle);
 	spin_unlock_irq(&pool->lock);
 
-	mutex_lock(&pool->attach_mutex);
+	mutex_lock(&wq_pool_attach_mutex);
 	if (!list_empty(&pool->workers))
 		pool->detach_completion = &detach_completion;
-	mutex_unlock(&pool->attach_mutex);
+	mutex_unlock(&wq_pool_attach_mutex);
 
 	if (pool->detach_completion)
 		wait_for_completion(pool->detach_completion);
@@ -3700,8 +3714,7 @@
 
 	lockdep_assert_held(&wq_pool_mutex);
 
-	ctx = kzalloc(sizeof(*ctx) + nr_node_ids * sizeof(ctx->pwq_tbl[0]),
-		      GFP_KERNEL);
+	ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
 
 	new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
 	tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
@@ -4347,7 +4360,6 @@
 		va_start(args, fmt);
 		vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
 		va_end(args);
-		worker->desc_valid = true;
 	}
 }
 
@@ -4371,7 +4383,6 @@
 	char desc[WORKER_DESC_LEN] = { };
 	struct pool_workqueue *pwq = NULL;
 	struct workqueue_struct *wq = NULL;
-	bool desc_valid = false;
 	struct worker *worker;
 
 	if (!(task->flags & PF_WQ_WORKER))
@@ -4384,22 +4395,18 @@
 	worker = kthread_probe_data(task);
 
 	/*
-	 * Carefully copy the associated workqueue's workfn and name.  Keep
-	 * the original last '\0' in case the original contains garbage.
+	 * Carefully copy the associated workqueue's workfn, name and desc.
+	 * Keep the original last '\0' in case the original is garbage.
 	 */
 	probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
 	probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
 	probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
 	probe_kernel_read(name, wq->name, sizeof(name) - 1);
-
-	/* copy worker description */
-	probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid));
-	if (desc_valid)
-		probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
+	probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
 
 	if (fn || name[0] || desc[0]) {
 		printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
-		if (desc[0])
+		if (strcmp(name, desc))
 			pr_cont(" (%s)", desc);
 		pr_cont("\n");
 	}
@@ -4579,6 +4586,47 @@
 	rcu_read_unlock_sched();
 }
 
+/* used to show worker information through /proc/PID/{comm,stat,status} */
+void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
+{
+	int off;
+
+	/* always show the actual comm */
+	off = strscpy(buf, task->comm, size);
+	if (off < 0)
+		return;
+
+	/* stabilize PF_WQ_WORKER and worker pool association */
+	mutex_lock(&wq_pool_attach_mutex);
+
+	if (task->flags & PF_WQ_WORKER) {
+		struct worker *worker = kthread_data(task);
+		struct worker_pool *pool = worker->pool;
+
+		if (pool) {
+			spin_lock_irq(&pool->lock);
+			/*
+			 * ->desc tracks information (wq name or
+			 * set_worker_desc()) for the latest execution.  If
+			 * current, prepend '+', otherwise '-'.
+			 */
+			if (worker->desc[0] != '\0') {
+				if (worker->current_work)
+					scnprintf(buf + off, size - off, "+%s",
+						  worker->desc);
+				else
+					scnprintf(buf + off, size - off, "-%s",
+						  worker->desc);
+			}
+			spin_unlock_irq(&pool->lock);
+		}
+	}
+
+	mutex_unlock(&wq_pool_attach_mutex);
+}
+
+#ifdef CONFIG_SMP
+
 /*
  * CPU hotplug.
  *
@@ -4600,7 +4648,7 @@
 	struct worker *worker;
 
 	for_each_cpu_worker_pool(pool, cpu) {
-		mutex_lock(&pool->attach_mutex);
+		mutex_lock(&wq_pool_attach_mutex);
 		spin_lock_irq(&pool->lock);
 
 		/*
@@ -4616,7 +4664,7 @@
 		pool->flags |= POOL_DISASSOCIATED;
 
 		spin_unlock_irq(&pool->lock);
-		mutex_unlock(&pool->attach_mutex);
+		mutex_unlock(&wq_pool_attach_mutex);
 
 		/*
 		 * Call schedule() so that we cross rq->lock and thus can
@@ -4657,7 +4705,7 @@
 {
 	struct worker *worker;
 
-	lockdep_assert_held(&pool->attach_mutex);
+	lockdep_assert_held(&wq_pool_attach_mutex);
 
 	/*
 	 * Restore CPU affinity of all workers.  As all idle workers should
@@ -4727,7 +4775,7 @@
 	static cpumask_t cpumask;
 	struct worker *worker;
 
-	lockdep_assert_held(&pool->attach_mutex);
+	lockdep_assert_held(&wq_pool_attach_mutex);
 
 	/* is @cpu allowed for @pool? */
 	if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
@@ -4762,14 +4810,14 @@
 	mutex_lock(&wq_pool_mutex);
 
 	for_each_pool(pool, pi) {
-		mutex_lock(&pool->attach_mutex);
+		mutex_lock(&wq_pool_attach_mutex);
 
 		if (pool->cpu == cpu)
 			rebind_workers(pool);
 		else if (pool->cpu < 0)
 			restore_unbound_workers_cpumask(pool, cpu);
 
-		mutex_unlock(&pool->attach_mutex);
+		mutex_unlock(&wq_pool_attach_mutex);
 	}
 
 	/* update NUMA affinity of unbound workqueues */
@@ -4799,8 +4847,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_SMP
-
 struct work_for_cpu {
 	struct work_struct work;
 	long (*fn)(void *);
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index d390d1b..66fbb5a 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -31,13 +31,12 @@
 	struct work_struct	*current_work;	/* L: work being processed */
 	work_func_t		current_func;	/* L: current_work's fn */
 	struct pool_workqueue	*current_pwq; /* L: current_work's pwq */
-	bool			desc_valid;	/* ->desc is valid */
 	struct list_head	scheduled;	/* L: scheduled works */
 
 	/* 64 bytes boundary on 64bit, 32 on 32bit */
 
 	struct task_struct	*task;		/* I: worker task */
-	struct worker_pool	*pool;		/* I: the associated pool */
+	struct worker_pool	*pool;		/* A: the associated pool */
 						/* L: for rescuers */
 	struct list_head	node;		/* A: anchored at pool->workers */
 						/* A: runs through worker->node */
diff --git a/lib/Kconfig b/lib/Kconfig
index 5fe5776..7a91393 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -429,15 +429,50 @@
 	bool
 	default n
 
+config NEED_SG_DMA_LENGTH
+	bool
+
+config NEED_DMA_MAP_STATE
+	bool
+
+config ARCH_DMA_ADDR_T_64BIT
+	def_bool 64BIT || PHYS_ADDR_T_64BIT
+
+config IOMMU_HELPER
+	bool
+
+config ARCH_HAS_SYNC_DMA_FOR_DEVICE
+	bool
+
+config ARCH_HAS_SYNC_DMA_FOR_CPU
+	bool
+	select NEED_DMA_MAP_STATE
+
 config DMA_DIRECT_OPS
 	bool
-	depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
-	default n
+	depends on HAS_DMA
+
+config DMA_NONCOHERENT_OPS
+	bool
+	depends on HAS_DMA
+	select DMA_DIRECT_OPS
+
+config DMA_NONCOHERENT_MMAP
+	bool
+	depends on DMA_NONCOHERENT_OPS
+
+config DMA_NONCOHERENT_CACHE_SYNC
+	bool
+	depends on DMA_NONCOHERENT_OPS
 
 config DMA_VIRT_OPS
 	bool
-	depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
-	default n
+	depends on HAS_DMA
+
+config SWIOTLB
+	bool
+	select DMA_DIRECT_OPS
+	select NEED_DMA_MAP_STATE
 
 config CHECK_SIGNATURE
 	bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c40c7b7..eb885942 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1634,7 +1634,7 @@
 
 config DMA_API_DEBUG
 	bool "Enable debugging of DMA-API usage"
-	depends on HAVE_DMA_API_DEBUG
+	select NEED_DMA_MAP_STATE
 	help
 	  Enable this option to debug the use of the DMA API by device drivers.
 	  With this option you will be able to detect common bugs in device
@@ -1651,6 +1651,23 @@
 
 	  If unsure, say N.
 
+config DMA_API_DEBUG_SG
+	bool "Debug DMA scatter-gather usage"
+	default y
+	depends on DMA_API_DEBUG
+	help
+	  Perform extra checking that callers of dma_map_sg() have respected the
+	  appropriate segment length/boundary limits for the given device when
+	  preparing DMA scatterlists.
+
+	  This is particularly likely to have been overlooked in cases where the
+	  dma_map_sg() API is used for general bulk mapping of pages rather than
+	  preparing literal scatter-gather descriptors, where there is a risk of
+	  unexpected behaviour from DMA API implementations if the scatterlist
+	  is technically out-of-spec.
+
+	  If unsure, say N.
+
 menuconfig RUNTIME_TESTING_MENU
 	bool "Runtime Testing"
 	def_bool y
@@ -1785,6 +1802,9 @@
 config TEST_UUID
 	tristate "Test functions located in the uuid module at runtime"
 
+config TEST_OVERFLOW
+	tristate "Test check_*_overflow() functions at runtime"
+
 config TEST_RHASHTABLE
 	tristate "Perform selftest on resizable hash table"
 	default n
diff --git a/lib/Makefile b/lib/Makefile
index ce20696..84c6dcb 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -30,6 +30,7 @@
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o
+lib-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o
 lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
 
 lib-y	+= kobject.o klist.o
@@ -59,6 +60,7 @@
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
+obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o
 obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
 obj-$(CONFIG_TEST_SORT) += test_sort.o
 obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
@@ -147,7 +149,7 @@
 obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
 
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
-obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o
+obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
 obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
 obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 7f5cdc1..c007d25 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -41,6 +41,11 @@
 #define HASH_FN_SHIFT   13
 #define HASH_FN_MASK    (HASH_SIZE - 1)
 
+/* allow architectures to override this if absolutely required */
+#ifndef PREALLOC_DMA_DEBUG_ENTRIES
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+#endif
+
 enum {
 	dma_debug_single,
 	dma_debug_page,
@@ -127,7 +132,7 @@
 static u32 nr_total_entries;
 
 /* number of preallocated entries requested by kernel cmdline */
-static u32 req_entries;
+static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
 
 /* debugfs dentry's for the stuff above */
 static struct dentry *dma_debug_dent        __read_mostly;
@@ -439,7 +444,6 @@
 		spin_unlock_irqrestore(&bucket->lock, flags);
 	}
 }
-EXPORT_SYMBOL(debug_dma_dump_mappings);
 
 /*
  * For each mapping (initial cacheline in the case of
@@ -748,7 +752,6 @@
 
 	return ret;
 }
-EXPORT_SYMBOL(dma_debug_resize_entries);
 
 /*
  * DMA-API debugging init code
@@ -1004,10 +1007,7 @@
 	bus_register_notifier(bus, nb);
 }
 
-/*
- * Let the architectures decide how many entries should be preallocated.
- */
-void dma_debug_init(u32 num_entries)
+static int dma_debug_init(void)
 {
 	int i;
 
@@ -1015,7 +1015,7 @@
 	 * called to set dma_debug_initialized
 	 */
 	if (global_disable)
-		return;
+		return 0;
 
 	for (i = 0; i < HASH_SIZE; ++i) {
 		INIT_LIST_HEAD(&dma_entry_hash[i].list);
@@ -1026,17 +1026,14 @@
 		pr_err("DMA-API: error creating debugfs entries - disabling\n");
 		global_disable = true;
 
-		return;
+		return 0;
 	}
 
-	if (req_entries)
-		num_entries = req_entries;
-
-	if (prealloc_memory(num_entries) != 0) {
+	if (prealloc_memory(nr_prealloc_entries) != 0) {
 		pr_err("DMA-API: debugging out of memory error - disabled\n");
 		global_disable = true;
 
-		return;
+		return 0;
 	}
 
 	nr_total_entries = num_free_entries;
@@ -1044,7 +1041,9 @@
 	dma_debug_initialized = true;
 
 	pr_info("DMA-API: debugging enabled by kernel config\n");
+	return 0;
 }
+core_initcall(dma_debug_init);
 
 static __init int dma_debug_cmdline(char *str)
 {
@@ -1061,16 +1060,10 @@
 
 static __init int dma_debug_entries_cmdline(char *str)
 {
-	int res;
-
 	if (!str)
 		return -EINVAL;
-
-	res = get_option(&str, &req_entries);
-
-	if (!res)
-		req_entries = 0;
-
+	if (!get_option(&str, &nr_prealloc_entries))
+		nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
 	return 0;
 }
 
@@ -1293,6 +1286,32 @@
 	put_hash_bucket(bucket, &flags);
 }
 
+static void check_sg_segment(struct device *dev, struct scatterlist *sg)
+{
+#ifdef CONFIG_DMA_API_DEBUG_SG
+	unsigned int max_seg = dma_get_max_seg_size(dev);
+	u64 start, end, boundary = dma_get_seg_boundary(dev);
+
+	/*
+	 * Either the driver forgot to set dma_parms appropriately, or
+	 * whoever generated the list forgot to check them.
+	 */
+	if (sg->length > max_seg)
+		err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n",
+			   sg->length, max_seg);
+	/*
+	 * In some cases this could potentially be the DMA API
+	 * implementation's fault, but it would usually imply that
+	 * the scatterlist was built inappropriately to begin with.
+	 */
+	start = sg_dma_address(sg);
+	end = start + sg_dma_len(sg) - 1;
+	if ((start ^ end) & ~boundary)
+		err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n",
+			   start, end, boundary);
+#endif
+}
+
 void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 			size_t size, int direction, dma_addr_t dma_addr,
 			bool map_single)
@@ -1423,6 +1442,8 @@
 			check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
 		}
 
+		check_sg_segment(dev, s);
+
 		add_dma_entry(entry);
 	}
 }
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index bbfb229..8be8106 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -34,6 +34,13 @@
 		const char *caller)
 {
 	if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
+		if (!dev->dma_mask) {
+			dev_err(dev,
+				"%s: call on device without dma_mask\n",
+				caller);
+			return false;
+		}
+
 		if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
 			dev_err(dev,
 				"%s: overflow %pad+%zu of device mask %llx\n",
@@ -84,6 +91,13 @@
 		__free_pages(page, page_order);
 		page = NULL;
 
+		if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
+		    dev->coherent_dma_mask < DMA_BIT_MASK(64) &&
+		    !(gfp & (GFP_DMA32 | GFP_DMA))) {
+			gfp |= GFP_DMA32;
+			goto again;
+		}
+
 		if (IS_ENABLED(CONFIG_ZONE_DMA) &&
 		    dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
 		    !(gfp & GFP_DMA)) {
@@ -121,7 +135,7 @@
 		free_pages((unsigned long)cpu_addr, page_order);
 }
 
-static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
+dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
 		unsigned long attrs)
 {
@@ -132,8 +146,8 @@
 	return dma_addr;
 }
 
-static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
-		int nents, enum dma_data_direction dir, unsigned long attrs)
+int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+		enum dma_data_direction dir, unsigned long attrs)
 {
 	int i;
 	struct scatterlist *sg;
@@ -165,10 +179,16 @@
 	if (mask < DMA_BIT_MASK(32))
 		return 0;
 #endif
+	/*
+	 * Various PCI/PCIe bridges have broken support for > 32bit DMA even
+	 * if the device itself might support it.
+	 */
+	if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32))
+		return 0;
 	return 1;
 }
 
-static int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
+int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
 	return dma_addr == DIRECT_MAPPING_ERROR;
 }
@@ -180,6 +200,5 @@
 	.map_sg			= dma_direct_map_sg,
 	.dma_supported		= dma_direct_supported,
 	.mapping_error		= dma_direct_mapping_error,
-	.is_phys		= 1,
 };
 EXPORT_SYMBOL(dma_direct_ops);
diff --git a/lib/dma-noncoherent.c b/lib/dma-noncoherent.c
new file mode 100644
index 0000000..79e9a75
--- /dev/null
+++ b/lib/dma-noncoherent.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Christoph Hellwig.
+ *
+ * DMA operations that map physical memory directly without providing cache
+ * coherence.
+ */
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/scatterlist.h>
+
+static void dma_noncoherent_sync_single_for_device(struct device *dev,
+		dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+	arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir);
+}
+
+static void dma_noncoherent_sync_sg_for_device(struct device *dev,
+		struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
+}
+
+static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	dma_addr_t addr;
+
+	addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
+	if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		arch_sync_dma_for_device(dev, page_to_phys(page) + offset,
+				size, dir);
+	return addr;
+}
+
+static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs);
+	if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir);
+	return nents;
+}
+
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+static void dma_noncoherent_sync_single_for_cpu(struct device *dev,
+		dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+	arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
+}
+
+static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
+		struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
+}
+
+static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir);
+}
+#endif
+
+const struct dma_map_ops dma_noncoherent_ops = {
+	.alloc			= arch_dma_alloc,
+	.free			= arch_dma_free,
+	.mmap			= arch_dma_mmap,
+	.sync_single_for_device	= dma_noncoherent_sync_single_for_device,
+	.sync_sg_for_device	= dma_noncoherent_sync_sg_for_device,
+	.map_page		= dma_noncoherent_map_page,
+	.map_sg			= dma_noncoherent_map_sg,
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+	.sync_single_for_cpu	= dma_noncoherent_sync_single_for_cpu,
+	.sync_sg_for_cpu	= dma_noncoherent_sync_sg_for_cpu,
+	.unmap_page		= dma_noncoherent_unmap_page,
+	.unmap_sg		= dma_noncoherent_unmap_sg,
+#endif
+	.dma_supported		= dma_direct_supported,
+	.mapping_error		= dma_direct_mapping_error,
+	.cache_sync		= arch_dma_cache_sync,
+};
+EXPORT_SYMBOL(dma_noncoherent_ops);
diff --git a/lib/iommu-common.c b/lib/iommu-common.c
deleted file mode 100644
index 55b00de..0000000
--- a/lib/iommu-common.c
+++ /dev/null
@@ -1,267 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * IOMMU mmap management and range allocation functions.
- * Based almost entirely upon the powerpc iommu allocator.
- */
-
-#include <linux/export.h>
-#include <linux/bitmap.h>
-#include <linux/bug.h>
-#include <linux/iommu-helper.h>
-#include <linux/iommu-common.h>
-#include <linux/dma-mapping.h>
-#include <linux/hash.h>
-
-static unsigned long iommu_large_alloc = 15;
-
-static	DEFINE_PER_CPU(unsigned int, iommu_hash_common);
-
-static inline bool need_flush(struct iommu_map_table *iommu)
-{
-	return ((iommu->flags & IOMMU_NEED_FLUSH) != 0);
-}
-
-static inline void set_flush(struct iommu_map_table *iommu)
-{
-	iommu->flags |= IOMMU_NEED_FLUSH;
-}
-
-static inline void clear_flush(struct iommu_map_table *iommu)
-{
-	iommu->flags &= ~IOMMU_NEED_FLUSH;
-}
-
-static void setup_iommu_pool_hash(void)
-{
-	unsigned int i;
-	static bool do_once;
-
-	if (do_once)
-		return;
-	do_once = true;
-	for_each_possible_cpu(i)
-		per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS);
-}
-
-/*
- * Initialize iommu_pool entries for the iommu_map_table. `num_entries'
- * is the number of table entries. If `large_pool' is set to true,
- * the top 1/4 of the table will be set aside for pool allocations
- * of more than iommu_large_alloc pages.
- */
-void iommu_tbl_pool_init(struct iommu_map_table *iommu,
-			 unsigned long num_entries,
-			 u32 table_shift,
-			 void (*lazy_flush)(struct iommu_map_table *),
-			 bool large_pool, u32 npools,
-			 bool skip_span_boundary_check)
-{
-	unsigned int start, i;
-	struct iommu_pool *p = &(iommu->large_pool);
-
-	setup_iommu_pool_hash();
-	if (npools == 0)
-		iommu->nr_pools = IOMMU_NR_POOLS;
-	else
-		iommu->nr_pools = npools;
-	BUG_ON(npools > IOMMU_NR_POOLS);
-
-	iommu->table_shift = table_shift;
-	iommu->lazy_flush = lazy_flush;
-	start = 0;
-	if (skip_span_boundary_check)
-		iommu->flags |= IOMMU_NO_SPAN_BOUND;
-	if (large_pool)
-		iommu->flags |= IOMMU_HAS_LARGE_POOL;
-
-	if (!large_pool)
-		iommu->poolsize = num_entries/iommu->nr_pools;
-	else
-		iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
-	for (i = 0; i < iommu->nr_pools; i++) {
-		spin_lock_init(&(iommu->pools[i].lock));
-		iommu->pools[i].start = start;
-		iommu->pools[i].hint = start;
-		start += iommu->poolsize; /* start for next pool */
-		iommu->pools[i].end = start - 1;
-	}
-	if (!large_pool)
-		return;
-	/* initialize large_pool */
-	spin_lock_init(&(p->lock));
-	p->start = start;
-	p->hint = p->start;
-	p->end = num_entries;
-}
-EXPORT_SYMBOL(iommu_tbl_pool_init);
-
-unsigned long iommu_tbl_range_alloc(struct device *dev,
-				struct iommu_map_table *iommu,
-				unsigned long npages,
-				unsigned long *handle,
-				unsigned long mask,
-				unsigned int align_order)
-{
-	unsigned int pool_hash = __this_cpu_read(iommu_hash_common);
-	unsigned long n, end, start, limit, boundary_size;
-	struct iommu_pool *pool;
-	int pass = 0;
-	unsigned int pool_nr;
-	unsigned int npools = iommu->nr_pools;
-	unsigned long flags;
-	bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
-	bool largealloc = (large_pool && npages > iommu_large_alloc);
-	unsigned long shift;
-	unsigned long align_mask = 0;
-
-	if (align_order > 0)
-		align_mask = ~0ul >> (BITS_PER_LONG - align_order);
-
-	/* Sanity check */
-	if (unlikely(npages == 0)) {
-		WARN_ON_ONCE(1);
-		return IOMMU_ERROR_CODE;
-	}
-
-	if (largealloc) {
-		pool = &(iommu->large_pool);
-		pool_nr = 0; /* to keep compiler happy */
-	} else {
-		/* pick out pool_nr */
-		pool_nr =  pool_hash & (npools - 1);
-		pool = &(iommu->pools[pool_nr]);
-	}
-	spin_lock_irqsave(&pool->lock, flags);
-
- again:
-	if (pass == 0 && handle && *handle &&
-	    (*handle >= pool->start) && (*handle < pool->end))
-		start = *handle;
-	else
-		start = pool->hint;
-
-	limit = pool->end;
-
-	/* The case below can happen if we have a small segment appended
-	 * to a large, or when the previous alloc was at the very end of
-	 * the available space. If so, go back to the beginning. If a
-	 * flush is needed, it will get done based on the return value
-	 * from iommu_area_alloc() below.
-	 */
-	if (start >= limit)
-		start = pool->start;
-	shift = iommu->table_map_base >> iommu->table_shift;
-	if (limit + shift > mask) {
-		limit = mask - shift + 1;
-		/* If we're constrained on address range, first try
-		 * at the masked hint to avoid O(n) search complexity,
-		 * but on second pass, start at 0 in pool 0.
-		 */
-		if ((start & mask) >= limit || pass > 0) {
-			spin_unlock(&(pool->lock));
-			pool = &(iommu->pools[0]);
-			spin_lock(&(pool->lock));
-			start = pool->start;
-		} else {
-			start &= mask;
-		}
-	}
-
-	if (dev)
-		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
-				      1 << iommu->table_shift);
-	else
-		boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift);
-
-	boundary_size = boundary_size >> iommu->table_shift;
-	/*
-	 * if the skip_span_boundary_check had been set during init, we set
-	 * things up so that iommu_is_span_boundary() merely checks if the
-	 * (index + npages) < num_tsb_entries
-	 */
-	if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
-		shift = 0;
-		boundary_size = iommu->poolsize * iommu->nr_pools;
-	}
-	n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
-			     boundary_size, align_mask);
-	if (n == -1) {
-		if (likely(pass == 0)) {
-			/* First failure, rescan from the beginning.  */
-			pool->hint = pool->start;
-			set_flush(iommu);
-			pass++;
-			goto again;
-		} else if (!largealloc && pass <= iommu->nr_pools) {
-			spin_unlock(&(pool->lock));
-			pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
-			pool = &(iommu->pools[pool_nr]);
-			spin_lock(&(pool->lock));
-			pool->hint = pool->start;
-			set_flush(iommu);
-			pass++;
-			goto again;
-		} else {
-			/* give up */
-			n = IOMMU_ERROR_CODE;
-			goto bail;
-		}
-	}
-	if (iommu->lazy_flush &&
-	    (n < pool->hint || need_flush(iommu))) {
-		clear_flush(iommu);
-		iommu->lazy_flush(iommu);
-	}
-
-	end = n + npages;
-	pool->hint = end;
-
-	/* Update handle for SG allocations */
-	if (handle)
-		*handle = end;
-bail:
-	spin_unlock_irqrestore(&(pool->lock), flags);
-
-	return n;
-}
-EXPORT_SYMBOL(iommu_tbl_range_alloc);
-
-static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
-				   unsigned long entry)
-{
-	struct iommu_pool *p;
-	unsigned long largepool_start = tbl->large_pool.start;
-	bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
-
-	/* The large pool is the last pool at the top of the table */
-	if (large_pool && entry >= largepool_start) {
-		p = &tbl->large_pool;
-	} else {
-		unsigned int pool_nr = entry / tbl->poolsize;
-
-		BUG_ON(pool_nr >= tbl->nr_pools);
-		p = &tbl->pools[pool_nr];
-	}
-	return p;
-}
-
-/* Caller supplies the index of the entry into the iommu map table
- * itself when the mapping from dma_addr to the entry is not the
- * default addr->entry mapping below.
- */
-void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
-			  unsigned long npages, unsigned long entry)
-{
-	struct iommu_pool *pool;
-	unsigned long flags;
-	unsigned long shift = iommu->table_shift;
-
-	if (entry == IOMMU_ERROR_CODE) /* use default addr->entry mapping */
-		entry = (dma_addr - iommu->table_map_base) >> shift;
-	pool = get_pool(iommu, entry);
-
-	spin_lock_irqsave(&(pool->lock), flags);
-	bitmap_clear(iommu->map, entry, npages);
-	spin_unlock_irqrestore(&(pool->lock), flags);
-}
-EXPORT_SYMBOL(iommu_tbl_range_free);
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 23633c0..92a9f24 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -3,19 +3,8 @@
  * IOMMU helper functions for the free area management
  */
 
-#include <linux/export.h>
 #include <linux/bitmap.h>
-#include <linux/bug.h>
-
-int iommu_is_span_boundary(unsigned int index, unsigned int nr,
-			   unsigned long shift,
-			   unsigned long boundary_size)
-{
-	BUG_ON(!is_power_of_2(boundary_size));
-
-	shift = (shift + index) & (boundary_size - 1);
-	return shift + nr > boundary_size;
-}
+#include <linux/iommu-helper.h>
 
 unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 			       unsigned long start, unsigned int nr,
@@ -38,4 +27,3 @@
 	}
 	return -1;
 }
-EXPORT_SYMBOL(iommu_area_alloc);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index fdae394..7e43cd5 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -573,6 +573,67 @@
 }
 EXPORT_SYMBOL(_copy_to_iter);
 
+#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
+static int copyout_mcsafe(void __user *to, const void *from, size_t n)
+{
+	if (access_ok(VERIFY_WRITE, to, n)) {
+		kasan_check_read(from, n);
+		n = copy_to_user_mcsafe((__force void *) to, from, n);
+	}
+	return n;
+}
+
+static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
+		const char *from, size_t len)
+{
+	unsigned long ret;
+	char *to;
+
+	to = kmap_atomic(page);
+	ret = memcpy_mcsafe(to + offset, from, len);
+	kunmap_atomic(to);
+
+	return ret;
+}
+
+size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
+{
+	const char *from = addr;
+	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
+
+	if (unlikely(i->type & ITER_PIPE)) {
+		WARN_ON(1);
+		return 0;
+	}
+	if (iter_is_iovec(i))
+		might_fault();
+	iterate_and_advance(i, bytes, v,
+		copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
+		({
+		rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
+                               (from += v.bv_len) - v.bv_len, v.bv_len);
+		if (rem) {
+			curr_addr = (unsigned long) from;
+			bytes = curr_addr - s_addr - rem;
+			return bytes;
+		}
+		}),
+		({
+		rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
+				v.iov_len);
+		if (rem) {
+			curr_addr = (unsigned long) from;
+			bytes = curr_addr - s_addr - rem;
+			return bytes;
+		}
+		})
+	)
+
+	return bytes;
+}
+EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
+#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
+
 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
diff --git a/lib/reed_solomon/decode_rs.c b/lib/reed_solomon/decode_rs.c
index 0ec3f25..1db74eb 100644
--- a/lib/reed_solomon/decode_rs.c
+++ b/lib/reed_solomon/decode_rs.c
@@ -1,22 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * lib/reed_solomon/decode_rs.c
- *
- * Overview:
- *   Generic Reed Solomon encoder / decoder library
+ * Generic Reed Solomon encoder / decoder library
  *
  * Copyright 2002, Phil Karn, KA9Q
  * May be used under the terms of the GNU General Public License (GPL)
  *
  * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de)
  *
- * $Id: decode_rs.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $
- *
- */
-
-/* Generic data width independent code which is included by the
- * wrappers.
+ * Generic data width independent code which is included by the wrappers.
  */
 {
+	struct rs_codec *rs = rsc->codec;
 	int deg_lambda, el, deg_omega;
 	int i, j, r, k, pad;
 	int nn = rs->nn;
@@ -27,16 +21,22 @@
 	uint16_t *alpha_to = rs->alpha_to;
 	uint16_t *index_of = rs->index_of;
 	uint16_t u, q, tmp, num1, num2, den, discr_r, syn_error;
-	/* Err+Eras Locator poly and syndrome poly The maximum value
-	 * of nroots is 8. So the necessary stack size will be about
-	 * 220 bytes max.
-	 */
-	uint16_t lambda[nroots + 1], syn[nroots];
-	uint16_t b[nroots + 1], t[nroots + 1], omega[nroots + 1];
-	uint16_t root[nroots], reg[nroots + 1], loc[nroots];
 	int count = 0;
 	uint16_t msk = (uint16_t) rs->nn;
 
+	/*
+	 * The decoder buffers are in the rs control struct. They are
+	 * arrays sized [nroots + 1]
+	 */
+	uint16_t *lambda = rsc->buffers + RS_DECODE_LAMBDA * (nroots + 1);
+	uint16_t *syn = rsc->buffers + RS_DECODE_SYN * (nroots + 1);
+	uint16_t *b = rsc->buffers + RS_DECODE_B * (nroots + 1);
+	uint16_t *t = rsc->buffers + RS_DECODE_T * (nroots + 1);
+	uint16_t *omega = rsc->buffers + RS_DECODE_OMEGA * (nroots + 1);
+	uint16_t *root = rsc->buffers + RS_DECODE_ROOT * (nroots + 1);
+	uint16_t *reg = rsc->buffers + RS_DECODE_REG * (nroots + 1);
+	uint16_t *loc = rsc->buffers + RS_DECODE_LOC * (nroots + 1);
+
 	/* Check length parameter for validity */
 	pad = nn - nroots - len;
 	BUG_ON(pad < 0 || pad >= nn);
diff --git a/lib/reed_solomon/encode_rs.c b/lib/reed_solomon/encode_rs.c
index 0b5b1a6..9112d46 100644
--- a/lib/reed_solomon/encode_rs.c
+++ b/lib/reed_solomon/encode_rs.c
@@ -1,23 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * lib/reed_solomon/encode_rs.c
- *
- * Overview:
- *   Generic Reed Solomon encoder / decoder library
+ * Generic Reed Solomon encoder / decoder library
  *
  * Copyright 2002, Phil Karn, KA9Q
  * May be used under the terms of the GNU General Public License (GPL)
  *
  * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de)
  *
- * $Id: encode_rs.c,v 1.5 2005/11/07 11:14:59 gleixner Exp $
- *
- */
-
-/* Generic data width independent code which is included by the
- * wrappers.
- * int encode_rsX (struct rs_control *rs, uintX_t *data, int len, uintY_t *par)
+ * Generic data width independent code which is included by the wrappers.
  */
 {
+	struct rs_codec *rs = rsc->codec;
 	int i, j, pad;
 	int nn = rs->nn;
 	int nroots = rs->nroots;
diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c
index 06d04cf..dfcf542 100644
--- a/lib/reed_solomon/reed_solomon.c
+++ b/lib/reed_solomon/reed_solomon.c
@@ -1,43 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * lib/reed_solomon/reed_solomon.c
- *
- * Overview:
- *   Generic Reed Solomon encoder / decoder library
+ * Generic Reed Solomon encoder / decoder library
  *
  * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de)
  *
  * Reed Solomon code lifted from reed solomon library written by Phil Karn
  * Copyright 2002 Phil Karn, KA9Q
  *
- * $Id: rslib.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
  * Description:
  *
  * The generic Reed Solomon library provides runtime configurable
  * encoding / decoding of RS codes.
- * Each user must call init_rs to get a pointer to a rs_control
- * structure for the given rs parameters. This structure is either
- * generated or a already available matching control structure is used.
- * If a structure is generated then the polynomial arrays for
- * fast encoding / decoding are built. This can take some time so
- * make sure not to call this function from a time critical path.
- * Usually a module / driver should initialize the necessary
- * rs_control structure on module / driver init and release it
- * on exit.
- * The encoding puts the calculated syndrome into a given syndrome
- * buffer.
- * The decoding is a two step process. The first step calculates
- * the syndrome over the received (data + syndrome) and calls the
- * second stage, which does the decoding / error correction itself.
- * Many hw encoders provide a syndrome calculation over the received
- * data + syndrome and can call the second stage directly.
  *
+ * Each user must call init_rs to get a pointer to a rs_control structure
+ * for the given rs parameters. The control struct is unique per instance.
+ * It points to a codec which can be shared by multiple control structures.
+ * If a codec is newly allocated then the polynomial arrays for fast
+ * encoding / decoding are built. This can take some time so make sure not
+ * to call this function from a time critical path.  Usually a module /
+ * driver should initialize the necessary rs_control structure on module /
+ * driver init and release it on exit.
+ *
+ * The encoding puts the calculated syndrome into a given syndrome buffer.
+ *
+ * The decoding is a two step process. The first step calculates the
+ * syndrome over the received (data + syndrome) and calls the second stage,
+ * which does the decoding / error correction itself.  Many hw encoders
+ * provide a syndrome calculation over the received data + syndrome and can
+ * call the second stage directly.
  */
-
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -46,32 +37,44 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 
-/* This list holds all currently allocated rs control structures */
-static LIST_HEAD (rslist);
+enum {
+	RS_DECODE_LAMBDA,
+	RS_DECODE_SYN,
+	RS_DECODE_B,
+	RS_DECODE_T,
+	RS_DECODE_OMEGA,
+	RS_DECODE_ROOT,
+	RS_DECODE_REG,
+	RS_DECODE_LOC,
+	RS_DECODE_NUM_BUFFERS
+};
+
+/* This list holds all currently allocated rs codec structures */
+static LIST_HEAD(codec_list);
 /* Protection for the list */
 static DEFINE_MUTEX(rslistlock);
 
 /**
- * rs_init - Initialize a Reed-Solomon codec
+ * codec_init - Initialize a Reed-Solomon codec
  * @symsize:	symbol size, bits (1-8)
  * @gfpoly:	Field generator polynomial coefficients
  * @gffunc:	Field generator function
  * @fcr:	first root of RS code generator polynomial, index form
  * @prim:	primitive element to generate polynomial roots
  * @nroots:	RS code generator polynomial degree (number of roots)
+ * @gfp:	GFP_ flags for allocations
  *
- * Allocate a control structure and the polynom arrays for faster
+ * Allocate a codec structure and the polynom arrays for faster
  * en/decoding. Fill the arrays according to the given parameters.
  */
-static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int),
-                                  int fcr, int prim, int nroots)
+static struct rs_codec *codec_init(int symsize, int gfpoly, int (*gffunc)(int),
+				   int fcr, int prim, int nroots, gfp_t gfp)
 {
-	struct rs_control *rs;
 	int i, j, sr, root, iprim;
+	struct rs_codec *rs;
 
-	/* Allocate the control structure */
-	rs = kmalloc(sizeof (struct rs_control), GFP_KERNEL);
-	if (rs == NULL)
+	rs = kzalloc(sizeof(*rs), gfp);
+	if (!rs)
 		return NULL;
 
 	INIT_LIST_HEAD(&rs->list);
@@ -85,17 +88,17 @@
 	rs->gffunc = gffunc;
 
 	/* Allocate the arrays */
-	rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL);
+	rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), gfp);
 	if (rs->alpha_to == NULL)
-		goto errrs;
+		goto err;
 
-	rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL);
+	rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), gfp);
 	if (rs->index_of == NULL)
-		goto erralp;
+		goto err;
 
-	rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), GFP_KERNEL);
+	rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), gfp);
 	if(rs->genpoly == NULL)
-		goto erridx;
+		goto err;
 
 	/* Generate Galois field lookup tables */
 	rs->index_of[0] = rs->nn;	/* log(zero) = -inf */
@@ -120,7 +123,7 @@
 	}
 	/* If it's not primitive, exit */
 	if(sr != rs->alpha_to[0])
-		goto errpol;
+		goto err;
 
 	/* Find prim-th root of 1, used in decoding */
 	for(iprim = 1; (iprim % prim) != 0; iprim += rs->nn);
@@ -148,42 +151,52 @@
 	/* convert rs->genpoly[] to index form for quicker encoding */
 	for (i = 0; i <= nroots; i++)
 		rs->genpoly[i] = rs->index_of[rs->genpoly[i]];
+
+	rs->users = 1;
+	list_add(&rs->list, &codec_list);
 	return rs;
 
-	/* Error exit */
-errpol:
+err:
 	kfree(rs->genpoly);
-erridx:
 	kfree(rs->index_of);
-erralp:
 	kfree(rs->alpha_to);
-errrs:
 	kfree(rs);
 	return NULL;
 }
 
 
 /**
- *  free_rs - Free the rs control structure, if it is no longer used
- *  @rs:	the control structure which is not longer used by the
+ *  free_rs - Free the rs control structure
+ *  @rs:	The control structure which is not longer used by the
  *		caller
+ *
+ * Free the control structure. If @rs is the last user of the associated
+ * codec, free the codec as well.
  */
 void free_rs(struct rs_control *rs)
 {
+	struct rs_codec *cd;
+
+	if (!rs)
+		return;
+
+	cd = rs->codec;
 	mutex_lock(&rslistlock);
-	rs->users--;
-	if(!rs->users) {
-		list_del(&rs->list);
-		kfree(rs->alpha_to);
-		kfree(rs->index_of);
-		kfree(rs->genpoly);
-		kfree(rs);
+	cd->users--;
+	if(!cd->users) {
+		list_del(&cd->list);
+		kfree(cd->alpha_to);
+		kfree(cd->index_of);
+		kfree(cd->genpoly);
+		kfree(cd);
 	}
 	mutex_unlock(&rslistlock);
+	kfree(rs);
 }
+EXPORT_SYMBOL_GPL(free_rs);
 
 /**
- * init_rs_internal - Find a matching or allocate a new rs control structure
+ * init_rs_internal - Allocate rs control, find a matching codec or allocate a new one
  *  @symsize:	the symbol size (number of bits)
  *  @gfpoly:	the extended Galois field generator polynomial coefficients,
  *		with the 0th coefficient in the low order bit. The polynomial
@@ -191,55 +204,69 @@
  *  @gffunc:	pointer to function to generate the next field element,
  *		or the multiplicative identity element if given 0.  Used
  *		instead of gfpoly if gfpoly is 0
- *  @fcr:  	the first consecutive root of the rs code generator polynomial
+ *  @fcr:	the first consecutive root of the rs code generator polynomial
  *		in index form
  *  @prim:	primitive element to generate polynomial roots
  *  @nroots:	RS code generator polynomial degree (number of roots)
+ *  @gfp:	GFP_ flags for allocations
  */
 static struct rs_control *init_rs_internal(int symsize, int gfpoly,
-                                           int (*gffunc)(int), int fcr,
-                                           int prim, int nroots)
+					   int (*gffunc)(int), int fcr,
+					   int prim, int nroots, gfp_t gfp)
 {
-	struct list_head	*tmp;
-	struct rs_control	*rs;
+	struct list_head *tmp;
+	struct rs_control *rs;
+	unsigned int bsize;
 
 	/* Sanity checks */
 	if (symsize < 1)
 		return NULL;
 	if (fcr < 0 || fcr >= (1<<symsize))
-    		return NULL;
+		return NULL;
 	if (prim <= 0 || prim >= (1<<symsize))
-    		return NULL;
+		return NULL;
 	if (nroots < 0 || nroots >= (1<<symsize))
 		return NULL;
 
+	/*
+	 * The decoder needs buffers in each control struct instance to
+	 * avoid variable size or large fixed size allocations on
+	 * stack. Size the buffers to arrays of [nroots + 1].
+	 */
+	bsize = sizeof(uint16_t) * RS_DECODE_NUM_BUFFERS * (nroots + 1);
+	rs = kzalloc(sizeof(*rs) + bsize, gfp);
+	if (!rs)
+		return NULL;
+
 	mutex_lock(&rslistlock);
 
 	/* Walk through the list and look for a matching entry */
-	list_for_each(tmp, &rslist) {
-		rs = list_entry(tmp, struct rs_control, list);
-		if (symsize != rs->mm)
+	list_for_each(tmp, &codec_list) {
+		struct rs_codec *cd = list_entry(tmp, struct rs_codec, list);
+
+		if (symsize != cd->mm)
 			continue;
-		if (gfpoly != rs->gfpoly)
+		if (gfpoly != cd->gfpoly)
 			continue;
-		if (gffunc != rs->gffunc)
+		if (gffunc != cd->gffunc)
 			continue;
-		if (fcr != rs->fcr)
+		if (fcr != cd->fcr)
 			continue;
-		if (prim != rs->prim)
+		if (prim != cd->prim)
 			continue;
-		if (nroots != rs->nroots)
+		if (nroots != cd->nroots)
 			continue;
 		/* We have a matching one already */
-		rs->users++;
+		cd->users++;
+		rs->codec = cd;
 		goto out;
 	}
 
 	/* Create a new one */
-	rs = rs_init(symsize, gfpoly, gffunc, fcr, prim, nroots);
-	if (rs) {
-		rs->users = 1;
-		list_add(&rs->list, &rslist);
+	rs->codec = codec_init(symsize, gfpoly, gffunc, fcr, prim, nroots, gfp);
+	if (!rs->codec) {
+		kfree(rs);
+		rs = NULL;
 	}
 out:
 	mutex_unlock(&rslistlock);
@@ -247,45 +274,48 @@
 }
 
 /**
- * init_rs - Find a matching or allocate a new rs control structure
+ * init_rs_gfp - Create a RS control struct and initialize it
  *  @symsize:	the symbol size (number of bits)
  *  @gfpoly:	the extended Galois field generator polynomial coefficients,
  *		with the 0th coefficient in the low order bit. The polynomial
  *		must be primitive;
- *  @fcr:  	the first consecutive root of the rs code generator polynomial
+ *  @fcr:	the first consecutive root of the rs code generator polynomial
  *		in index form
  *  @prim:	primitive element to generate polynomial roots
  *  @nroots:	RS code generator polynomial degree (number of roots)
+ *  @gfp:	GFP_ flags for allocations
  */
-struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim,
-                           int nroots)
+struct rs_control *init_rs_gfp(int symsize, int gfpoly, int fcr, int prim,
+			       int nroots, gfp_t gfp)
 {
-	return init_rs_internal(symsize, gfpoly, NULL, fcr, prim, nroots);
+	return init_rs_internal(symsize, gfpoly, NULL, fcr, prim, nroots, gfp);
 }
+EXPORT_SYMBOL_GPL(init_rs_gfp);
 
 /**
- * init_rs_non_canonical - Find a matching or allocate a new rs control
- *                         structure, for fields with non-canonical
- *                         representation
+ * init_rs_non_canonical - Allocate rs control struct for fields with
+ *                         non-canonical representation
  *  @symsize:	the symbol size (number of bits)
  *  @gffunc:	pointer to function to generate the next field element,
  *		or the multiplicative identity element if given 0.  Used
  *		instead of gfpoly if gfpoly is 0
- *  @fcr:  	the first consecutive root of the rs code generator polynomial
+ *  @fcr:	the first consecutive root of the rs code generator polynomial
  *		in index form
  *  @prim:	primitive element to generate polynomial roots
  *  @nroots:	RS code generator polynomial degree (number of roots)
  */
 struct rs_control *init_rs_non_canonical(int symsize, int (*gffunc)(int),
-                                         int fcr, int prim, int nroots)
+					 int fcr, int prim, int nroots)
 {
-	return init_rs_internal(symsize, 0, gffunc, fcr, prim, nroots);
+	return init_rs_internal(symsize, 0, gffunc, fcr, prim, nroots,
+				GFP_KERNEL);
 }
+EXPORT_SYMBOL_GPL(init_rs_non_canonical);
 
 #ifdef CONFIG_REED_SOLOMON_ENC8
 /**
  *  encode_rs8 - Calculate the parity for data values (8bit data width)
- *  @rs:	the rs control structure
+ *  @rsc:	the rs control structure
  *  @data:	data field of a given type
  *  @len:	data length
  *  @par:	parity data, must be initialized by caller (usually all 0)
@@ -295,7 +325,7 @@
  *  symbol size > 8. The calling code must take care of encoding of the
  *  syndrome result for storage itself.
  */
-int encode_rs8(struct rs_control *rs, uint8_t *data, int len, uint16_t *par,
+int encode_rs8(struct rs_control *rsc, uint8_t *data, int len, uint16_t *par,
 	       uint16_t invmsk)
 {
 #include "encode_rs.c"
@@ -306,7 +336,7 @@
 #ifdef CONFIG_REED_SOLOMON_DEC8
 /**
  *  decode_rs8 - Decode codeword (8bit data width)
- *  @rs:	the rs control structure
+ *  @rsc:	the rs control structure
  *  @data:	data field of a given type
  *  @par:	received parity data field
  *  @len:	data length
@@ -319,9 +349,14 @@
  *  The syndrome and parity uses a uint16_t data type to enable
  *  symbol size > 8. The calling code must take care of decoding of the
  *  syndrome result and the received parity before calling this code.
+ *
+ *  Note: The rs_control struct @rsc contains buffers which are used for
+ *  decoding, so the caller has to ensure that decoder invocations are
+ *  serialized.
+ *
  *  Returns the number of corrected bits or -EBADMSG for uncorrectable errors.
  */
-int decode_rs8(struct rs_control *rs, uint8_t *data, uint16_t *par, int len,
+int decode_rs8(struct rs_control *rsc, uint8_t *data, uint16_t *par, int len,
 	       uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
 	       uint16_t *corr)
 {
@@ -333,7 +368,7 @@
 #ifdef CONFIG_REED_SOLOMON_ENC16
 /**
  *  encode_rs16 - Calculate the parity for data values (16bit data width)
- *  @rs:	the rs control structure
+ *  @rsc:	the rs control structure
  *  @data:	data field of a given type
  *  @len:	data length
  *  @par:	parity data, must be initialized by caller (usually all 0)
@@ -341,7 +376,7 @@
  *
  *  Each field in the data array contains up to symbol size bits of valid data.
  */
-int encode_rs16(struct rs_control *rs, uint16_t *data, int len, uint16_t *par,
+int encode_rs16(struct rs_control *rsc, uint16_t *data, int len, uint16_t *par,
 	uint16_t invmsk)
 {
 #include "encode_rs.c"
@@ -352,7 +387,7 @@
 #ifdef CONFIG_REED_SOLOMON_DEC16
 /**
  *  decode_rs16 - Decode codeword (16bit data width)
- *  @rs:	the rs control structure
+ *  @rsc:	the rs control structure
  *  @data:	data field of a given type
  *  @par:	received parity data field
  *  @len:	data length
@@ -363,9 +398,14 @@
  *  @corr:	buffer to store correction bitmask on eras_pos
  *
  *  Each field in the data array contains up to symbol size bits of valid data.
+ *
+ *  Note: The rc_control struct @rsc contains buffers which are used for
+ *  decoding, so the caller has to ensure that decoder invocations are
+ *  serialized.
+ *
  *  Returns the number of corrected bits or -EBADMSG for uncorrectable errors.
  */
-int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len,
+int decode_rs16(struct rs_control *rsc, uint16_t *data, uint16_t *par, int len,
 		uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
 		uint16_t *corr)
 {
@@ -374,10 +414,6 @@
 EXPORT_SYMBOL_GPL(decode_rs16);
 #endif
 
-EXPORT_SYMBOL_GPL(init_rs);
-EXPORT_SYMBOL_GPL(init_rs_non_canonical);
-EXPORT_SYMBOL_GPL(free_rs);
-
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Reed Solomon encoder/decoder");
 MODULE_AUTHOR("Phil Karn, Thomas Gleixner");
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index e6a9c06..6fdc626 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -270,18 +270,33 @@
 }
 EXPORT_SYMBOL_GPL(sbitmap_bitmap_show);
 
-static unsigned int sbq_calc_wake_batch(unsigned int depth)
+static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq,
+					unsigned int depth)
 {
 	unsigned int wake_batch;
+	unsigned int shallow_depth;
 
 	/*
 	 * For each batch, we wake up one queue. We need to make sure that our
-	 * batch size is small enough that the full depth of the bitmap is
-	 * enough to wake up all of the queues.
+	 * batch size is small enough that the full depth of the bitmap,
+	 * potentially limited by a shallow depth, is enough to wake up all of
+	 * the queues.
+	 *
+	 * Each full word of the bitmap has bits_per_word bits, and there might
+	 * be a partial word. There are depth / bits_per_word full words and
+	 * depth % bits_per_word bits left over. In bitwise arithmetic:
+	 *
+	 * bits_per_word = 1 << shift
+	 * depth / bits_per_word = depth >> shift
+	 * depth % bits_per_word = depth & ((1 << shift) - 1)
+	 *
+	 * Each word can be limited to sbq->min_shallow_depth bits.
 	 */
-	wake_batch = SBQ_WAKE_BATCH;
-	if (wake_batch > depth / SBQ_WAIT_QUEUES)
-		wake_batch = max(1U, depth / SBQ_WAIT_QUEUES);
+	shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth);
+	depth = ((depth >> sbq->sb.shift) * shallow_depth +
+		 min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth));
+	wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1,
+			     SBQ_WAKE_BATCH);
 
 	return wake_batch;
 }
@@ -307,7 +322,8 @@
 			*per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
 	}
 
-	sbq->wake_batch = sbq_calc_wake_batch(depth);
+	sbq->min_shallow_depth = UINT_MAX;
+	sbq->wake_batch = sbq_calc_wake_batch(sbq, depth);
 	atomic_set(&sbq->wake_index, 0);
 
 	sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
@@ -327,21 +343,28 @@
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
 
-void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
+static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
+					    unsigned int depth)
 {
-	unsigned int wake_batch = sbq_calc_wake_batch(depth);
+	unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
 	int i;
 
 	if (sbq->wake_batch != wake_batch) {
 		WRITE_ONCE(sbq->wake_batch, wake_batch);
 		/*
-		 * Pairs with the memory barrier in sbq_wake_up() to ensure that
-		 * the batch size is updated before the wait counts.
+		 * Pairs with the memory barrier in sbitmap_queue_wake_up()
+		 * to ensure that the batch size is updated before the wait
+		 * counts.
 		 */
 		smp_mb__before_atomic();
 		for (i = 0; i < SBQ_WAIT_QUEUES; i++)
 			atomic_set(&sbq->ws[i].wait_cnt, 1);
 	}
+}
+
+void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
+{
+	sbitmap_queue_update_wake_batch(sbq, depth);
 	sbitmap_resize(&sbq->sb, depth);
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
@@ -380,6 +403,8 @@
 	unsigned int hint, depth;
 	int nr;
 
+	WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth);
+
 	hint = this_cpu_read(*sbq->alloc_hint);
 	depth = READ_ONCE(sbq->sb.depth);
 	if (unlikely(hint >= depth)) {
@@ -403,6 +428,14 @@
 }
 EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow);
 
+void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
+				     unsigned int min_shallow_depth)
+{
+	sbq->min_shallow_depth = min_shallow_depth;
+	sbitmap_queue_update_wake_batch(sbq, sbq->sb.depth);
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
+
 static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
 {
 	int i, wake_index;
@@ -425,52 +458,67 @@
 	return NULL;
 }
 
-static void sbq_wake_up(struct sbitmap_queue *sbq)
+static bool __sbq_wake_up(struct sbitmap_queue *sbq)
 {
 	struct sbq_wait_state *ws;
 	unsigned int wake_batch;
 	int wait_cnt;
 
-	/*
-	 * Pairs with the memory barrier in set_current_state() to ensure the
-	 * proper ordering of clear_bit()/waitqueue_active() in the waker and
-	 * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
-	 * waiter. See the comment on waitqueue_active(). This is __after_atomic
-	 * because we just did clear_bit_unlock() in the caller.
-	 */
-	smp_mb__after_atomic();
-
 	ws = sbq_wake_ptr(sbq);
 	if (!ws)
-		return;
+		return false;
 
 	wait_cnt = atomic_dec_return(&ws->wait_cnt);
 	if (wait_cnt <= 0) {
+		int ret;
+
 		wake_batch = READ_ONCE(sbq->wake_batch);
+
 		/*
 		 * Pairs with the memory barrier in sbitmap_queue_resize() to
 		 * ensure that we see the batch size update before the wait
 		 * count is reset.
 		 */
 		smp_mb__before_atomic();
+
 		/*
-		 * If there are concurrent callers to sbq_wake_up(), the last
-		 * one to decrement the wait count below zero will bump it back
-		 * up. If there is a concurrent resize, the count reset will
-		 * either cause the cmpxchg to fail or overwrite after the
-		 * cmpxchg.
+		 * For concurrent callers of this, the one that failed the
+		 * atomic_cmpxhcg() race should call this function again
+		 * to wakeup a new batch on a different 'ws'.
 		 */
-		atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
-		sbq_index_atomic_inc(&sbq->wake_index);
-		wake_up_nr(&ws->wait, wake_batch);
+		ret = atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wake_batch);
+		if (ret == wait_cnt) {
+			sbq_index_atomic_inc(&sbq->wake_index);
+			wake_up_nr(&ws->wait, wake_batch);
+			return false;
+		}
+
+		return true;
 	}
+
+	return false;
 }
 
+void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
+{
+	while (__sbq_wake_up(sbq))
+		;
+}
+EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
+
 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
 			 unsigned int cpu)
 {
 	sbitmap_clear_bit_unlock(&sbq->sb, nr);
-	sbq_wake_up(sbq);
+	/*
+	 * Pairs with the memory barrier in set_current_state() to ensure the
+	 * proper ordering of clear_bit_unlock()/waitqueue_active() in the waker
+	 * and test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
+	 * waiter. See the comment on waitqueue_active().
+	 */
+	smp_mb__after_atomic();
+	sbitmap_queue_wake_up(sbq);
+
 	if (likely(!sbq->round_robin && nr < sbq->sb.depth))
 		*per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
 }
@@ -482,7 +530,7 @@
 
 	/*
 	 * Pairs with the memory barrier in set_current_state() like in
-	 * sbq_wake_up().
+	 * sbitmap_queue_wake_up().
 	 */
 	smp_mb();
 	wake_index = atomic_read(&sbq->wake_index);
@@ -528,5 +576,6 @@
 	seq_puts(m, "}\n");
 
 	seq_printf(m, "round_robin=%d\n", sbq->round_robin);
+	seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth);
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_show);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index cc64058..04b68d9 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -593,9 +593,8 @@
 }
 
 /*
- * Allocates bounce buffer and returns its kernel virtual address.
+ * Allocates bounce buffer and returns its physical address.
  */
-
 static phys_addr_t
 map_single(struct device *hwdev, phys_addr_t phys, size_t size,
 	   enum dma_data_direction dir, unsigned long attrs)
@@ -614,7 +613,7 @@
 }
 
 /*
- * dma_addr is the kernel virtual address of the bounce buffer to unmap.
+ * tlb_addr is the physical address of the bounce buffer to unmap.
  */
 void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 			      size_t size, enum dma_data_direction dir,
@@ -692,7 +691,6 @@
 	}
 }
 
-#ifdef CONFIG_DMA_DIRECT_OPS
 static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
 		size_t size)
 {
@@ -727,7 +725,7 @@
 
 out_unmap:
 	dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-		(unsigned long long)(dev ? dev->coherent_dma_mask : 0),
+		(unsigned long long)dev->coherent_dma_mask,
 		(unsigned long long)*dma_handle);
 
 	/*
@@ -764,7 +762,6 @@
 				 DMA_ATTR_SKIP_CPU_SYNC);
 	return true;
 }
-#endif
 
 static void
 swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
@@ -1045,7 +1042,6 @@
 	return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
 }
 
-#ifdef CONFIG_DMA_DIRECT_OPS
 void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs)
 {
@@ -1089,4 +1085,3 @@
 	.unmap_page		= swiotlb_unmap_page,
 	.dma_supported		= dma_direct_supported,
 };
-#endif /* CONFIG_DMA_DIRECT_OPS */
diff --git a/lib/test_overflow.c b/lib/test_overflow.c
new file mode 100644
index 0000000..aecbbb2
--- /dev/null
+++ b/lib/test_overflow.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Test cases for arithmetic overflow checks.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/overflow.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+
+#define DEFINE_TEST_ARRAY(t)			\
+	static const struct test_ ## t {	\
+		t a, b;				\
+		t sum, diff, prod;		\
+		bool s_of, d_of, p_of;		\
+	} t ## _tests[] __initconst
+
+DEFINE_TEST_ARRAY(u8) = {
+	{0, 0, 0, 0, 0, false, false, false},
+	{1, 1, 2, 0, 1, false, false, false},
+	{0, 1, 1, U8_MAX, 0, false, true, false},
+	{1, 0, 1, 1, 0, false, false, false},
+	{0, U8_MAX, U8_MAX, 1, 0, false, true, false},
+	{U8_MAX, 0, U8_MAX, U8_MAX, 0, false, false, false},
+	{1, U8_MAX, 0, 2, U8_MAX, true, true, false},
+	{U8_MAX, 1, 0, U8_MAX-1, U8_MAX, true, false, false},
+	{U8_MAX, U8_MAX, U8_MAX-1, 0, 1, true, false, true},
+
+	{U8_MAX, U8_MAX-1, U8_MAX-2, 1, 2, true, false, true},
+	{U8_MAX-1, U8_MAX, U8_MAX-2, U8_MAX, 2, true, true, true},
+
+	{1U << 3, 1U << 3, 1U << 4, 0, 1U << 6, false, false, false},
+	{1U << 4, 1U << 4, 1U << 5, 0, 0, false, false, true},
+	{1U << 4, 1U << 3, 3*(1U << 3), 1U << 3, 1U << 7, false, false, false},
+	{1U << 7, 1U << 7, 0, 0, 0, true, false, true},
+
+	{48, 32, 80, 16, 0, false, false, true},
+	{128, 128, 0, 0, 0, true, false, true},
+	{123, 234, 101, 145, 110, true, true, true},
+};
+DEFINE_TEST_ARRAY(u16) = {
+	{0, 0, 0, 0, 0, false, false, false},
+	{1, 1, 2, 0, 1, false, false, false},
+	{0, 1, 1, U16_MAX, 0, false, true, false},
+	{1, 0, 1, 1, 0, false, false, false},
+	{0, U16_MAX, U16_MAX, 1, 0, false, true, false},
+	{U16_MAX, 0, U16_MAX, U16_MAX, 0, false, false, false},
+	{1, U16_MAX, 0, 2, U16_MAX, true, true, false},
+	{U16_MAX, 1, 0, U16_MAX-1, U16_MAX, true, false, false},
+	{U16_MAX, U16_MAX, U16_MAX-1, 0, 1, true, false, true},
+
+	{U16_MAX, U16_MAX-1, U16_MAX-2, 1, 2, true, false, true},
+	{U16_MAX-1, U16_MAX, U16_MAX-2, U16_MAX, 2, true, true, true},
+
+	{1U << 7, 1U << 7, 1U << 8, 0, 1U << 14, false, false, false},
+	{1U << 8, 1U << 8, 1U << 9, 0, 0, false, false, true},
+	{1U << 8, 1U << 7, 3*(1U << 7), 1U << 7, 1U << 15, false, false, false},
+	{1U << 15, 1U << 15, 0, 0, 0, true, false, true},
+
+	{123, 234, 357, 65425, 28782, false, true, false},
+	{1234, 2345, 3579, 64425, 10146, false, true, true},
+};
+DEFINE_TEST_ARRAY(u32) = {
+	{0, 0, 0, 0, 0, false, false, false},
+	{1, 1, 2, 0, 1, false, false, false},
+	{0, 1, 1, U32_MAX, 0, false, true, false},
+	{1, 0, 1, 1, 0, false, false, false},
+	{0, U32_MAX, U32_MAX, 1, 0, false, true, false},
+	{U32_MAX, 0, U32_MAX, U32_MAX, 0, false, false, false},
+	{1, U32_MAX, 0, 2, U32_MAX, true, true, false},
+	{U32_MAX, 1, 0, U32_MAX-1, U32_MAX, true, false, false},
+	{U32_MAX, U32_MAX, U32_MAX-1, 0, 1, true, false, true},
+
+	{U32_MAX, U32_MAX-1, U32_MAX-2, 1, 2, true, false, true},
+	{U32_MAX-1, U32_MAX, U32_MAX-2, U32_MAX, 2, true, true, true},
+
+	{1U << 15, 1U << 15, 1U << 16, 0, 1U << 30, false, false, false},
+	{1U << 16, 1U << 16, 1U << 17, 0, 0, false, false, true},
+	{1U << 16, 1U << 15, 3*(1U << 15), 1U << 15, 1U << 31, false, false, false},
+	{1U << 31, 1U << 31, 0, 0, 0, true, false, true},
+
+	{-2U, 1U, -1U, -3U, -2U, false, false, false},
+	{-4U, 5U, 1U, -9U, -20U, true, false, true},
+};
+
+DEFINE_TEST_ARRAY(u64) = {
+	{0, 0, 0, 0, 0, false, false, false},
+	{1, 1, 2, 0, 1, false, false, false},
+	{0, 1, 1, U64_MAX, 0, false, true, false},
+	{1, 0, 1, 1, 0, false, false, false},
+	{0, U64_MAX, U64_MAX, 1, 0, false, true, false},
+	{U64_MAX, 0, U64_MAX, U64_MAX, 0, false, false, false},
+	{1, U64_MAX, 0, 2, U64_MAX, true, true, false},
+	{U64_MAX, 1, 0, U64_MAX-1, U64_MAX, true, false, false},
+	{U64_MAX, U64_MAX, U64_MAX-1, 0, 1, true, false, true},
+
+	{U64_MAX, U64_MAX-1, U64_MAX-2, 1, 2, true, false, true},
+	{U64_MAX-1, U64_MAX, U64_MAX-2, U64_MAX, 2, true, true, true},
+
+	{1ULL << 31, 1ULL << 31, 1ULL << 32, 0, 1ULL << 62, false, false, false},
+	{1ULL << 32, 1ULL << 32, 1ULL << 33, 0, 0, false, false, true},
+	{1ULL << 32, 1ULL << 31, 3*(1ULL << 31), 1ULL << 31, 1ULL << 63, false, false, false},
+	{1ULL << 63, 1ULL << 63, 0, 0, 0, true, false, true},
+	{1000000000ULL /* 10^9 */, 10000000000ULL /* 10^10 */,
+	 11000000000ULL, 18446744064709551616ULL, 10000000000000000000ULL,
+	 false, true, false},
+	{-15ULL, 10ULL, -5ULL, -25ULL, -150ULL, false, false, true},
+};
+
+DEFINE_TEST_ARRAY(s8) = {
+	{0, 0, 0, 0, 0, false, false, false},
+
+	{0, S8_MAX, S8_MAX, -S8_MAX, 0, false, false, false},
+	{S8_MAX, 0, S8_MAX, S8_MAX, 0, false, false, false},
+	{0, S8_MIN, S8_MIN, S8_MIN, 0, false, true, false},
+	{S8_MIN, 0, S8_MIN, S8_MIN, 0, false, false, false},
+
+	{-1, S8_MIN, S8_MAX, S8_MAX, S8_MIN, true, false, true},
+	{S8_MIN, -1, S8_MAX, -S8_MAX, S8_MIN, true, false, true},
+	{-1, S8_MAX, S8_MAX-1, S8_MIN, -S8_MAX, false, false, false},
+	{S8_MAX, -1, S8_MAX-1, S8_MIN, -S8_MAX, false, true, false},
+	{-1, -S8_MAX, S8_MIN, S8_MAX-1, S8_MAX, false, false, false},
+	{-S8_MAX, -1, S8_MIN, S8_MIN+2, S8_MAX, false, false, false},
+
+	{1, S8_MIN, -S8_MAX, -S8_MAX, S8_MIN, false, true, false},
+	{S8_MIN, 1, -S8_MAX, S8_MAX, S8_MIN, false, true, false},
+	{1, S8_MAX, S8_MIN, S8_MIN+2, S8_MAX, true, false, false},
+	{S8_MAX, 1, S8_MIN, S8_MAX-1, S8_MAX, true, false, false},
+
+	{S8_MIN, S8_MIN, 0, 0, 0, true, false, true},
+	{S8_MAX, S8_MAX, -2, 0, 1, true, false, true},
+
+	{-4, -32, -36, 28, -128, false, false, true},
+	{-4, 32, 28, -36, -128, false, false, false},
+};
+
+DEFINE_TEST_ARRAY(s16) = {
+	{0, 0, 0, 0, 0, false, false, false},
+
+	{0, S16_MAX, S16_MAX, -S16_MAX, 0, false, false, false},
+	{S16_MAX, 0, S16_MAX, S16_MAX, 0, false, false, false},
+	{0, S16_MIN, S16_MIN, S16_MIN, 0, false, true, false},
+	{S16_MIN, 0, S16_MIN, S16_MIN, 0, false, false, false},
+
+	{-1, S16_MIN, S16_MAX, S16_MAX, S16_MIN, true, false, true},
+	{S16_MIN, -1, S16_MAX, -S16_MAX, S16_MIN, true, false, true},
+	{-1, S16_MAX, S16_MAX-1, S16_MIN, -S16_MAX, false, false, false},
+	{S16_MAX, -1, S16_MAX-1, S16_MIN, -S16_MAX, false, true, false},
+	{-1, -S16_MAX, S16_MIN, S16_MAX-1, S16_MAX, false, false, false},
+	{-S16_MAX, -1, S16_MIN, S16_MIN+2, S16_MAX, false, false, false},
+
+	{1, S16_MIN, -S16_MAX, -S16_MAX, S16_MIN, false, true, false},
+	{S16_MIN, 1, -S16_MAX, S16_MAX, S16_MIN, false, true, false},
+	{1, S16_MAX, S16_MIN, S16_MIN+2, S16_MAX, true, false, false},
+	{S16_MAX, 1, S16_MIN, S16_MAX-1, S16_MAX, true, false, false},
+
+	{S16_MIN, S16_MIN, 0, 0, 0, true, false, true},
+	{S16_MAX, S16_MAX, -2, 0, 1, true, false, true},
+};
+DEFINE_TEST_ARRAY(s32) = {
+	{0, 0, 0, 0, 0, false, false, false},
+
+	{0, S32_MAX, S32_MAX, -S32_MAX, 0, false, false, false},
+	{S32_MAX, 0, S32_MAX, S32_MAX, 0, false, false, false},
+	{0, S32_MIN, S32_MIN, S32_MIN, 0, false, true, false},
+	{S32_MIN, 0, S32_MIN, S32_MIN, 0, false, false, false},
+
+	{-1, S32_MIN, S32_MAX, S32_MAX, S32_MIN, true, false, true},
+	{S32_MIN, -1, S32_MAX, -S32_MAX, S32_MIN, true, false, true},
+	{-1, S32_MAX, S32_MAX-1, S32_MIN, -S32_MAX, false, false, false},
+	{S32_MAX, -1, S32_MAX-1, S32_MIN, -S32_MAX, false, true, false},
+	{-1, -S32_MAX, S32_MIN, S32_MAX-1, S32_MAX, false, false, false},
+	{-S32_MAX, -1, S32_MIN, S32_MIN+2, S32_MAX, false, false, false},
+
+	{1, S32_MIN, -S32_MAX, -S32_MAX, S32_MIN, false, true, false},
+	{S32_MIN, 1, -S32_MAX, S32_MAX, S32_MIN, false, true, false},
+	{1, S32_MAX, S32_MIN, S32_MIN+2, S32_MAX, true, false, false},
+	{S32_MAX, 1, S32_MIN, S32_MAX-1, S32_MAX, true, false, false},
+
+	{S32_MIN, S32_MIN, 0, 0, 0, true, false, true},
+	{S32_MAX, S32_MAX, -2, 0, 1, true, false, true},
+};
+DEFINE_TEST_ARRAY(s64) = {
+	{0, 0, 0, 0, 0, false, false, false},
+
+	{0, S64_MAX, S64_MAX, -S64_MAX, 0, false, false, false},
+	{S64_MAX, 0, S64_MAX, S64_MAX, 0, false, false, false},
+	{0, S64_MIN, S64_MIN, S64_MIN, 0, false, true, false},
+	{S64_MIN, 0, S64_MIN, S64_MIN, 0, false, false, false},
+
+	{-1, S64_MIN, S64_MAX, S64_MAX, S64_MIN, true, false, true},
+	{S64_MIN, -1, S64_MAX, -S64_MAX, S64_MIN, true, false, true},
+	{-1, S64_MAX, S64_MAX-1, S64_MIN, -S64_MAX, false, false, false},
+	{S64_MAX, -1, S64_MAX-1, S64_MIN, -S64_MAX, false, true, false},
+	{-1, -S64_MAX, S64_MIN, S64_MAX-1, S64_MAX, false, false, false},
+	{-S64_MAX, -1, S64_MIN, S64_MIN+2, S64_MAX, false, false, false},
+
+	{1, S64_MIN, -S64_MAX, -S64_MAX, S64_MIN, false, true, false},
+	{S64_MIN, 1, -S64_MAX, S64_MAX, S64_MIN, false, true, false},
+	{1, S64_MAX, S64_MIN, S64_MIN+2, S64_MAX, true, false, false},
+	{S64_MAX, 1, S64_MIN, S64_MAX-1, S64_MAX, true, false, false},
+
+	{S64_MIN, S64_MIN, 0, 0, 0, true, false, true},
+	{S64_MAX, S64_MAX, -2, 0, 1, true, false, true},
+
+	{-1, -1, -2, 0, 1, false, false, false},
+	{-1, -128, -129, 127, 128, false, false, false},
+	{-128, -1, -129, -127, 128, false, false, false},
+	{0, -S64_MAX, -S64_MAX, S64_MAX, 0, false, false, false},
+};
+
+#define check_one_op(t, fmt, op, sym, a, b, r, of) do {		\
+	t _r;							\
+	bool _of;						\
+								\
+	_of = check_ ## op ## _overflow(a, b, &_r);		\
+	if (_of != of) {					\
+		pr_warn("expected "fmt" "sym" "fmt		\
+			" to%s overflow (type %s)\n",		\
+			a, b, of ? "" : " not", #t);		\
+		err = 1;					\
+	}							\
+	if (_r != r) {						\
+		pr_warn("expected "fmt" "sym" "fmt" == "	\
+			fmt", got "fmt" (type %s)\n",		\
+			a, b, r, _r, #t);			\
+		err = 1;					\
+	}							\
+} while (0)
+
+#define DEFINE_TEST_FUNC(t, fmt)					\
+static int __init do_test_ ## t(const struct test_ ## t *p)		\
+{							   		\
+	int err = 0;							\
+									\
+	check_one_op(t, fmt, add, "+", p->a, p->b, p->sum, p->s_of);	\
+	check_one_op(t, fmt, add, "+", p->b, p->a, p->sum, p->s_of);	\
+	check_one_op(t, fmt, sub, "-", p->a, p->b, p->diff, p->d_of);	\
+	check_one_op(t, fmt, mul, "*", p->a, p->b, p->prod, p->p_of);	\
+	check_one_op(t, fmt, mul, "*", p->b, p->a, p->prod, p->p_of);	\
+									\
+	return err;							\
+}									\
+									\
+static int __init test_ ## t ## _overflow(void) {			\
+	int err = 0;							\
+	unsigned i;							\
+									\
+	pr_info("%-3s: %zu tests\n", #t, ARRAY_SIZE(t ## _tests));	\
+	for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i)			\
+		err |= do_test_ ## t(&t ## _tests[i]);			\
+	return err;							\
+}
+
+DEFINE_TEST_FUNC(u8, "%d");
+DEFINE_TEST_FUNC(s8, "%d");
+DEFINE_TEST_FUNC(u16, "%d");
+DEFINE_TEST_FUNC(s16, "%d");
+DEFINE_TEST_FUNC(u32, "%u");
+DEFINE_TEST_FUNC(s32, "%d");
+#if BITS_PER_LONG == 64
+DEFINE_TEST_FUNC(u64, "%llu");
+DEFINE_TEST_FUNC(s64, "%lld");
+#endif
+
+static int __init test_overflow_calculation(void)
+{
+	int err = 0;
+
+	err |= test_u8_overflow();
+	err |= test_s8_overflow();
+	err |= test_u16_overflow();
+	err |= test_s16_overflow();
+	err |= test_u32_overflow();
+	err |= test_s32_overflow();
+#if BITS_PER_LONG == 64
+	err |= test_u64_overflow();
+	err |= test_s64_overflow();
+#endif
+
+	return err;
+}
+
+/*
+ * Deal with the various forms of allocator arguments. See comments above
+ * the DEFINE_TEST_ALLOC() instances for mapping of the "bits".
+ */
+#define alloc010(alloc, arg, sz) alloc(sz, GFP_KERNEL)
+#define alloc011(alloc, arg, sz) alloc(sz, GFP_KERNEL, NUMA_NO_NODE)
+#define alloc000(alloc, arg, sz) alloc(sz)
+#define alloc001(alloc, arg, sz) alloc(sz, NUMA_NO_NODE)
+#define alloc110(alloc, arg, sz) alloc(arg, sz, GFP_KERNEL)
+#define free0(free, arg, ptr)	 free(ptr)
+#define free1(free, arg, ptr)	 free(arg, ptr)
+
+/* Wrap around to 8K */
+#define TEST_SIZE		(9 << PAGE_SHIFT)
+
+#define DEFINE_TEST_ALLOC(func, free_func, want_arg, want_gfp, want_node)\
+static int __init test_ ## func (void *arg)				\
+{									\
+	volatile size_t a = TEST_SIZE;					\
+	volatile size_t b = (SIZE_MAX / TEST_SIZE) + 1;			\
+	void *ptr;							\
+									\
+	/* Tiny allocation test. */					\
+	ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg, 1);\
+	if (!ptr) {							\
+		pr_warn(#func " failed regular allocation?!\n");	\
+		return 1;						\
+	}								\
+	free ## want_arg (free_func, arg, ptr);				\
+									\
+	/* Wrapped allocation test. */					\
+	ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg,	\
+							  a * b);	\
+	if (!ptr) {							\
+		pr_warn(#func " unexpectedly failed bad wrapping?!\n");	\
+		return 1;						\
+	}								\
+	free ## want_arg (free_func, arg, ptr);				\
+									\
+	/* Saturated allocation test. */				\
+	ptr = alloc ## want_arg ## want_gfp ## want_node (func, arg,	\
+						   array_size(a, b));	\
+	if (ptr) {							\
+		pr_warn(#func " missed saturation!\n");			\
+		free ## want_arg (free_func, arg, ptr);			\
+		return 1;						\
+	}								\
+	pr_info(#func " detected saturation\n");			\
+	return 0;							\
+}
+
+/*
+ * Allocator uses a trailing node argument --------+  (e.g. kmalloc_node())
+ * Allocator uses the gfp_t argument -----------+  |  (e.g. kmalloc())
+ * Allocator uses a special leading argument +  |  |  (e.g. devm_kmalloc())
+ *                                           |  |  |
+ */
+DEFINE_TEST_ALLOC(kmalloc,	 kfree,	     0, 1, 0);
+DEFINE_TEST_ALLOC(kmalloc_node,	 kfree,	     0, 1, 1);
+DEFINE_TEST_ALLOC(kzalloc,	 kfree,	     0, 1, 0);
+DEFINE_TEST_ALLOC(kzalloc_node,  kfree,	     0, 1, 1);
+DEFINE_TEST_ALLOC(vmalloc,	 vfree,	     0, 0, 0);
+DEFINE_TEST_ALLOC(vmalloc_node,  vfree,	     0, 0, 1);
+DEFINE_TEST_ALLOC(vzalloc,	 vfree,	     0, 0, 0);
+DEFINE_TEST_ALLOC(vzalloc_node,  vfree,	     0, 0, 1);
+DEFINE_TEST_ALLOC(kvmalloc,	 kvfree,     0, 1, 0);
+DEFINE_TEST_ALLOC(kvmalloc_node, kvfree,     0, 1, 1);
+DEFINE_TEST_ALLOC(kvzalloc,	 kvfree,     0, 1, 0);
+DEFINE_TEST_ALLOC(kvzalloc_node, kvfree,     0, 1, 1);
+DEFINE_TEST_ALLOC(devm_kmalloc,  devm_kfree, 1, 1, 0);
+DEFINE_TEST_ALLOC(devm_kzalloc,  devm_kfree, 1, 1, 0);
+
+static int __init test_overflow_allocation(void)
+{
+	const char device_name[] = "overflow-test";
+	struct device *dev;
+	int err = 0;
+
+	/* Create dummy device for devm_kmalloc()-family tests. */
+	dev = root_device_register(device_name);
+	if (!dev) {
+		pr_warn("Cannot register test device\n");
+		return 1;
+	}
+
+	err |= test_kmalloc(NULL);
+	err |= test_kmalloc_node(NULL);
+	err |= test_kzalloc(NULL);
+	err |= test_kzalloc_node(NULL);
+	err |= test_kvmalloc(NULL);
+	err |= test_kvmalloc_node(NULL);
+	err |= test_kvzalloc(NULL);
+	err |= test_kvzalloc_node(NULL);
+	err |= test_vmalloc(NULL);
+	err |= test_vmalloc_node(NULL);
+	err |= test_vzalloc(NULL);
+	err |= test_vzalloc_node(NULL);
+	err |= test_devm_kmalloc(dev);
+	err |= test_devm_kzalloc(dev);
+
+	device_unregister(dev);
+
+	return err;
+}
+
+static int __init test_module_init(void)
+{
+	int err = 0;
+
+	err |= test_overflow_calculation();
+	err |= test_overflow_allocation();
+
+	if (err) {
+		pr_warn("FAIL!\n");
+		err = -EINVAL;
+	} else {
+		pr_info("all tests passed\n");
+	}
+
+	return err;
+}
+
+static void __exit test_module_exit(void)
+{ }
+
+module_init(test_module_init);
+module_exit(test_module_exit);
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/lib/test_printf.c b/lib/test_printf.c
index 71ebfa4..cea592f 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -204,7 +204,7 @@
 #if BITS_PER_LONG == 64
 
 #define PTR_WIDTH 16
-#define PTR ((void *)0xffff0123456789ab)
+#define PTR ((void *)0xffff0123456789abUL)
 #define PTR_STR "ffff0123456789ab"
 #define ZEROS "00000000"	/* hex 32 zero bits */
 
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 23920c5..a48aaa7 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -703,6 +703,22 @@
 #endif
 }
 
+static const struct printf_spec default_str_spec = {
+	.field_width = -1,
+	.precision = -1,
+};
+
+static const struct printf_spec default_flag_spec = {
+	.base = 16,
+	.precision = -1,
+	.flags = SPECIAL | SMALL,
+};
+
+static const struct printf_spec default_dec_spec = {
+	.base = 10,
+	.precision = -1,
+};
+
 static noinline_for_stack
 char *resource_string(char *buf, char *end, struct resource *res,
 		      struct printf_spec spec, const char *fmt)
@@ -732,21 +748,11 @@
 		.precision = -1,
 		.flags = SMALL | ZEROPAD,
 	};
-	static const struct printf_spec dec_spec = {
-		.base = 10,
-		.precision = -1,
-		.flags = 0,
-	};
 	static const struct printf_spec str_spec = {
 		.field_width = -1,
 		.precision = 10,
 		.flags = LEFT,
 	};
-	static const struct printf_spec flag_spec = {
-		.base = 16,
-		.precision = -1,
-		.flags = SPECIAL | SMALL,
-	};
 
 	/* 32-bit res (sizeof==4): 10 chars in dec, 10 in hex ("0x" + 8)
 	 * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */
@@ -770,10 +776,10 @@
 		specp = &mem_spec;
 	} else if (res->flags & IORESOURCE_IRQ) {
 		p = string(p, pend, "irq ", str_spec);
-		specp = &dec_spec;
+		specp = &default_dec_spec;
 	} else if (res->flags & IORESOURCE_DMA) {
 		p = string(p, pend, "dma ", str_spec);
-		specp = &dec_spec;
+		specp = &default_dec_spec;
 	} else if (res->flags & IORESOURCE_BUS) {
 		p = string(p, pend, "bus ", str_spec);
 		specp = &bus_spec;
@@ -803,7 +809,7 @@
 			p = string(p, pend, " disabled", str_spec);
 	} else {
 		p = string(p, pend, " flags ", str_spec);
-		p = number(p, pend, res->flags, flag_spec);
+		p = number(p, pend, res->flags, default_flag_spec);
 	}
 	*p++ = ']';
 	*p = '\0';
@@ -913,9 +919,6 @@
 	int cur, rbot, rtop;
 	bool first = true;
 
-	/* reused to print numbers */
-	spec = (struct printf_spec){ .base = 10 };
-
 	rbot = cur = find_first_bit(bitmap, nr_bits);
 	while (cur < nr_bits) {
 		rtop = cur;
@@ -930,13 +933,13 @@
 		}
 		first = false;
 
-		buf = number(buf, end, rbot, spec);
+		buf = number(buf, end, rbot, default_dec_spec);
 		if (rbot < rtop) {
 			if (buf < end)
 				*buf = '-';
 			buf++;
 
-			buf = number(buf, end, rtop, spec);
+			buf = number(buf, end, rtop, default_dec_spec);
 		}
 
 		rbot = cur;
@@ -1354,11 +1357,9 @@
 	return string(buf, end, uuid, spec);
 }
 
-int kptr_restrict __read_mostly;
-
 static noinline_for_stack
-char *restricted_pointer(char *buf, char *end, const void *ptr,
-			 struct printf_spec spec)
+char *pointer_string(char *buf, char *end, const void *ptr,
+		     struct printf_spec spec)
 {
 	spec.base = 16;
 	spec.flags |= SMALL;
@@ -1367,6 +1368,15 @@
 		spec.flags |= ZEROPAD;
 	}
 
+	return number(buf, end, (unsigned long int)ptr, spec);
+}
+
+int kptr_restrict __read_mostly;
+
+static noinline_for_stack
+char *restricted_pointer(char *buf, char *end, const void *ptr,
+			 struct printf_spec spec)
+{
 	switch (kptr_restrict) {
 	case 0:
 		/* Always print %pK values */
@@ -1378,8 +1388,11 @@
 		 * kptr_restrict==1 cannot be used in IRQ context
 		 * because its test for CAP_SYSLOG would be meaningless.
 		 */
-		if (in_irq() || in_serving_softirq() || in_nmi())
+		if (in_irq() || in_serving_softirq() || in_nmi()) {
+			if (spec.field_width == -1)
+				spec.field_width = 2 * sizeof(ptr);
 			return string(buf, end, "pK-error", spec);
+		}
 
 		/*
 		 * Only print the real pointer value if the current
@@ -1404,7 +1417,7 @@
 		break;
 	}
 
-	return number(buf, end, (unsigned long)ptr, spec);
+	return pointer_string(buf, end, ptr, spec);
 }
 
 static noinline_for_stack
@@ -1456,9 +1469,6 @@
 		return string(buf, end, NULL, spec);
 
 	switch (fmt[1]) {
-	case 'r':
-		return number(buf, end, clk_get_rate(clk), spec);
-
 	case 'n':
 	default:
 #ifdef CONFIG_COMMON_CLK
@@ -1474,23 +1484,13 @@
 					const struct trace_print_flags *names)
 {
 	unsigned long mask;
-	const struct printf_spec strspec = {
-		.field_width = -1,
-		.precision = -1,
-	};
-	const struct printf_spec numspec = {
-		.flags = SPECIAL|SMALL,
-		.field_width = -1,
-		.precision = -1,
-		.base = 16,
-	};
 
 	for ( ; flags && names->name; names++) {
 		mask = names->mask;
 		if ((flags & mask) != mask)
 			continue;
 
-		buf = string(buf, end, names->name, strspec);
+		buf = string(buf, end, names->name, default_str_spec);
 
 		flags &= ~mask;
 		if (flags) {
@@ -1501,7 +1501,7 @@
 	}
 
 	if (flags)
-		buf = number(buf, end, flags, numspec);
+		buf = number(buf, end, flags, default_flag_spec);
 
 	return buf;
 }
@@ -1548,22 +1548,18 @@
 {
 	int depth;
 	const struct device_node *parent = np->parent;
-	static const struct printf_spec strspec = {
-		.field_width = -1,
-		.precision = -1,
-	};
 
 	/* special case for root node */
 	if (!parent)
-		return string(buf, end, "/", strspec);
+		return string(buf, end, "/", default_str_spec);
 
 	for (depth = 0; parent->parent; depth++)
 		parent = parent->parent;
 
 	for ( ; depth >= 0; depth--) {
-		buf = string(buf, end, "/", strspec);
+		buf = string(buf, end, "/", default_str_spec);
 		buf = string(buf, end, device_node_name_for_depth(np, depth),
-			     strspec);
+			     default_str_spec);
 	}
 	return buf;
 }
@@ -1655,20 +1651,6 @@
 	return widen_string(buf, buf - buf_start, end, spec);
 }
 
-static noinline_for_stack
-char *pointer_string(char *buf, char *end, const void *ptr,
-		     struct printf_spec spec)
-{
-	spec.base = 16;
-	spec.flags |= SMALL;
-	if (spec.field_width == -1) {
-		spec.field_width = 2 * sizeof(ptr);
-		spec.flags |= ZEROPAD;
-	}
-
-	return number(buf, end, (unsigned long int)ptr, spec);
-}
-
 static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key);
 static siphash_key_t ptr_key __read_mostly;
 
@@ -1710,13 +1692,13 @@
 /* Maps a pointer to a 32 bit unique identifier. */
 static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
 {
+	const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)";
 	unsigned long hashval;
-	const int default_width = 2 * sizeof(ptr);
 
 	if (static_branch_unlikely(&not_filled_random_ptr_key)) {
-		spec.field_width = default_width;
+		spec.field_width = 2 * sizeof(ptr);
 		/* string length must be less than default_width */
-		return string(buf, end, "(ptrval)", spec);
+		return string(buf, end, str, spec);
 	}
 
 #ifdef CONFIG_64BIT
@@ -1729,15 +1711,7 @@
 #else
 	hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key);
 #endif
-
-	spec.flags |= SMALL;
-	if (spec.field_width == -1) {
-		spec.field_width = default_width;
-		spec.flags |= ZEROPAD;
-	}
-	spec.base = 16;
-
-	return number(buf, end, hashval, spec);
+	return pointer_string(buf, end, (const void *)hashval, spec);
 }
 
 /*
@@ -1750,10 +1724,10 @@
  *
  * Right now we handle:
  *
- * - 'F' For symbolic function descriptor pointers with offset
- * - 'f' For simple symbolic function names without offset
- * - 'S' For symbolic direct pointers with offset
- * - 's' For symbolic direct pointers without offset
+ * - 'S' For symbolic direct pointers (or function descriptors) with offset
+ * - 's' For symbolic direct pointers (or function descriptors) without offset
+ * - 'F' Same as 'S'
+ * - 'f' Same as 's'
  * - '[FfSs]R' as above with __builtin_extract_return_addr() translation
  * - 'B' For backtraced symbolic direct pointers with offset
  * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
@@ -1850,10 +1824,6 @@
  * ** When making changes please also update:
  *	Documentation/core-api/printk-formats.rst
  *
- * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
- * function pointers are really function descriptors, which contain a
- * pointer to the real address.
- *
  * Note: The default behaviour (unadorned %p) is to hash the address,
  * rendering it useful as a unique identifier.
  */
@@ -2129,6 +2099,7 @@
 
 	case 'x':
 		spec->flags |= SMALL;
+		/* fall through */
 
 	case 'X':
 		spec->base = 16;
@@ -3087,8 +3058,10 @@
 			break;
 		case 'i':
 			base = 0;
+			/* fall through */
 		case 'd':
 			is_sign = true;
+			/* fall through */
 		case 'u':
 			break;
 		case '%':
diff --git a/mm/Kconfig b/mm/Kconfig
index e14c015..3e0b6e8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -266,7 +266,7 @@
 	bool
 
 config PHYS_ADDR_T_64BIT
-	def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT
+	def_bool 64BIT
 
 config BOUNCE
 	bool "Enable bounce buffers"
@@ -305,7 +305,7 @@
 	  the many instances by a single page with that content, so
 	  saving memory until one or another app needs to modify the content.
 	  Recommended for use with KVM, or with other duplicative applications.
-	  See Documentation/vm/ksm.txt for more information: KSM is inactive
+	  See Documentation/vm/ksm.rst for more information: KSM is inactive
 	  until a program has madvised that an area is MADV_MERGEABLE, and
 	  root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set).
 
@@ -530,7 +530,7 @@
 	  into a page just as regular dirty bit, but unlike the latter
 	  it can be cleared by hands.
 
-	  See Documentation/vm/soft-dirty.txt for more details.
+	  See Documentation/admin-guide/mm/soft-dirty.rst for more details.
 
 config ZSWAP
 	bool "Compressed cache for swap pages (EXPERIMENTAL)"
@@ -657,7 +657,8 @@
 	  be useful to tune memory cgroup limits and/or for job placement
 	  within a compute cluster.
 
-	  See Documentation/vm/idle_page_tracking.txt for more details.
+	  See Documentation/admin-guide/mm/idle_page_tracking.rst for
+	  more details.
 
 # arch_add_memory() comprehends device memory
 config ARCH_HAS_ZONE_DEVICE
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 7441bd9..8fe3ebd 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -412,6 +412,7 @@
  * protected.
  */
 static DEFINE_SPINLOCK(cgwb_lock);
+static struct workqueue_struct *cgwb_release_wq;
 
 /**
  * wb_congested_get_create - get or create a wb_congested
@@ -522,7 +523,7 @@
 {
 	struct bdi_writeback *wb = container_of(refcnt, struct bdi_writeback,
 						refcnt);
-	schedule_work(&wb->release_work);
+	queue_work(cgwb_release_wq, &wb->release_work);
 }
 
 static void cgwb_kill(struct bdi_writeback *wb)
@@ -784,6 +785,21 @@
 	spin_unlock_irq(&cgwb_lock);
 }
 
+static int __init cgwb_init(void)
+{
+	/*
+	 * There can be many concurrent release work items overwhelming
+	 * system_wq.  Put them in a separate wq and limit concurrency.
+	 * There's no point in executing many of these in parallel.
+	 */
+	cgwb_release_wq = alloc_workqueue("cgwb_release", 0, 1);
+	if (!cgwb_release_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+subsys_initcall(cgwb_init);
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static int cgwb_bdi_init(struct backing_dev_info *bdi)
diff --git a/mm/cleancache.c b/mm/cleancache.c
index f7b9fdc..126548b 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -3,7 +3,7 @@
  *
  * This code provides the generic "frontend" layer to call a matching
  * "backend" driver implementation of cleancache.  See
- * Documentation/vm/cleancache.txt for more information.
+ * Documentation/vm/cleancache.rst for more information.
  *
  * Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
  * Author: Dan Magenheimer
diff --git a/mm/frontswap.c b/mm/frontswap.c
index fec8b50..4f5476a 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -3,7 +3,7 @@
  *
  * This code provides the generic "frontend" layer to call a matching
  * "backend" driver implementation of frontswap.  See
- * Documentation/vm/frontswap.txt for more information.
+ * Documentation/vm/frontswap.rst for more information.
  *
  * Copyright (C) 2009-2012 Oracle Corp.  All rights reserved.
  * Author: Dan Magenheimer
diff --git a/mm/hmm.c b/mm/hmm.c
index 486dc39..e63e353 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -37,7 +37,7 @@
 
 #if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
 /*
- * Device private memory see HMM (Documentation/vm/hmm.txt) or hmm.h
+ * Device private memory see HMM (Documentation/vm/hmm.rst) or hmm.h
  */
 DEFINE_STATIC_KEY_FALSE(device_private_key);
 EXPORT_SYMBOL(device_private_key);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b9f3dbd..ac5591d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1185,7 +1185,7 @@
 	 * mmu_notifier_invalidate_range_end() happens which can lead to a
 	 * device seeing memory write in different order than CPU.
 	 *
-	 * See Documentation/vm/mmu_notifier.txt
+	 * See Documentation/vm/mmu_notifier.rst
 	 */
 	pmdp_huge_clear_flush_notify(vma, haddr, vmf->pmd);
 
@@ -2037,7 +2037,7 @@
 	 * replacing a zero pmd write protected page with a zero pte write
 	 * protected page.
 	 *
-	 * See Documentation/vm/mmu_notifier.txt
+	 * See Documentation/vm/mmu_notifier.rst
 	 */
 	pmdp_huge_clear_flush(vma, haddr, pmd);
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2186791..1290887 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3291,7 +3291,7 @@
 				 * table protection not changing it to point
 				 * to a new page.
 				 *
-				 * See Documentation/vm/mmu_notifier.txt
+				 * See Documentation/vm/mmu_notifier.rst
 				 */
 				huge_ptep_set_wrprotect(src, addr, src_pte);
 			}
@@ -4357,7 +4357,7 @@
 	 * No need to call mmu_notifier_invalidate_range() we are downgrading
 	 * page table protection not changing it to point to a new page.
 	 *
-	 * See Documentation/vm/mmu_notifier.txt
+	 * See Documentation/vm/mmu_notifier.rst
 	 */
 	i_mmap_unlock_write(vma->vm_file->f_mapping);
 	mmu_notifier_invalidate_range_end(mm, start, end);
diff --git a/mm/internal.h b/mm/internal.h
index 502d141..9e3654d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -53,7 +53,7 @@
 			     unsigned long addr, unsigned long end,
 			     struct zap_details *details);
 
-extern int __do_page_cache_readahead(struct address_space *mapping,
+extern unsigned int __do_page_cache_readahead(struct address_space *mapping,
 		struct file *filp, pgoff_t offset, unsigned long nr_to_read,
 		unsigned long lookahead_size);
 
diff --git a/mm/ksm.c b/mm/ksm.c
index e3cbf9a..7d6558f 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -51,7 +51,9 @@
 #define DO_NUMA(x)	do { } while (0)
 #endif
 
-/*
+/**
+ * DOC: Overview
+ *
  * A few notes about the KSM scanning process,
  * to make it easier to understand the data structures below:
  *
@@ -67,6 +69,21 @@
  * this tree is fully assured to be working (except when pages are unmapped),
  * and therefore this tree is called the stable tree.
  *
+ * The stable tree node includes information required for reverse
+ * mapping from a KSM page to virtual addresses that map this page.
+ *
+ * In order to avoid large latencies of the rmap walks on KSM pages,
+ * KSM maintains two types of nodes in the stable tree:
+ *
+ * * the regular nodes that keep the reverse mapping structures in a
+ *   linked list
+ * * the "chains" that link nodes ("dups") that represent the same
+ *   write protected memory content, but each "dup" corresponds to a
+ *   different KSM page copy of that content
+ *
+ * Internally, the regular nodes, "dups" and "chains" are represented
+ * using the same :c:type:`struct stable_node` structure.
+ *
  * In addition to the stable tree, KSM uses a second data structure called the
  * unstable tree: this tree holds pointers to pages which have been found to
  * be "unchanged for a period of time".  The unstable tree sorts these pages
@@ -1049,7 +1066,7 @@
 		 * No need to notify as we are downgrading page table to read
 		 * only not changing it to point to a new page.
 		 *
-		 * See Documentation/vm/mmu_notifier.txt
+		 * See Documentation/vm/mmu_notifier.rst
 		 */
 		entry = ptep_clear_flush(vma, pvmw.address, pvmw.pte);
 		/*
@@ -1145,7 +1162,7 @@
 	 * No need to notify as we are replacing a read only page with another
 	 * read only page with the same content.
 	 *
-	 * See Documentation/vm/mmu_notifier.txt
+	 * See Documentation/vm/mmu_notifier.rst
 	 */
 	ptep_clear_flush(vma, addr, ptep);
 	set_pte_at_notify(mm, addr, ptep, newpte);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2bd3df3..1695f38 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3849,7 +3849,7 @@
 	if (ret)
 		goto out_put_css;
 
-	efile.file->f_op->poll(efile.file, &event->pt);
+	vfs_poll(efile.file, &event->pt);
 
 	spin_lock(&memcg->event_list_lock);
 	list_add(&event->list, &memcg->event_list);
diff --git a/mm/mempool.c b/mm/mempool.c
index 5c9dce3..b54f2c2 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -138,6 +138,28 @@
 }
 
 /**
+ * mempool_exit - exit a mempool initialized with mempool_init()
+ * @pool:      pointer to the memory pool which was initialized with
+ *             mempool_init().
+ *
+ * Free all reserved elements in @pool and @pool itself.  This function
+ * only sleeps if the free_fn() function sleeps.
+ *
+ * May be called on a zeroed but uninitialized mempool (i.e. allocated with
+ * kzalloc()).
+ */
+void mempool_exit(mempool_t *pool)
+{
+	while (pool->curr_nr) {
+		void *element = remove_element(pool, GFP_KERNEL);
+		pool->free(element, pool->pool_data);
+	}
+	kfree(pool->elements);
+	pool->elements = NULL;
+}
+EXPORT_SYMBOL(mempool_exit);
+
+/**
  * mempool_destroy - deallocate a memory pool
  * @pool:      pointer to the memory pool which was allocated via
  *             mempool_create().
@@ -150,15 +172,65 @@
 	if (unlikely(!pool))
 		return;
 
-	while (pool->curr_nr) {
-		void *element = remove_element(pool, GFP_KERNEL);
-		pool->free(element, pool->pool_data);
-	}
-	kfree(pool->elements);
+	mempool_exit(pool);
 	kfree(pool);
 }
 EXPORT_SYMBOL(mempool_destroy);
 
+int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+		      mempool_free_t *free_fn, void *pool_data,
+		      gfp_t gfp_mask, int node_id)
+{
+	spin_lock_init(&pool->lock);
+	pool->min_nr	= min_nr;
+	pool->pool_data = pool_data;
+	pool->alloc	= alloc_fn;
+	pool->free	= free_fn;
+	init_waitqueue_head(&pool->wait);
+
+	pool->elements = kmalloc_array_node(min_nr, sizeof(void *),
+					    gfp_mask, node_id);
+	if (!pool->elements)
+		return -ENOMEM;
+
+	/*
+	 * First pre-allocate the guaranteed number of buffers.
+	 */
+	while (pool->curr_nr < pool->min_nr) {
+		void *element;
+
+		element = pool->alloc(gfp_mask, pool->pool_data);
+		if (unlikely(!element)) {
+			mempool_exit(pool);
+			return -ENOMEM;
+		}
+		add_element(pool, element);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mempool_init_node);
+
+/**
+ * mempool_init - initialize a memory pool
+ * @min_nr:    the minimum number of elements guaranteed to be
+ *             allocated for this pool.
+ * @alloc_fn:  user-defined element-allocation function.
+ * @free_fn:   user-defined element-freeing function.
+ * @pool_data: optional private data available to the user-defined functions.
+ *
+ * Like mempool_create(), but initializes the pool in (i.e. embedded in another
+ * structure).
+ */
+int mempool_init(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
+		 mempool_free_t *free_fn, void *pool_data)
+{
+	return mempool_init_node(pool, min_nr, alloc_fn, free_fn,
+				 pool_data, GFP_KERNEL, NUMA_NO_NODE);
+
+}
+EXPORT_SYMBOL(mempool_init);
+
 /**
  * mempool_create - create a memory pool
  * @min_nr:    the minimum number of elements guaranteed to be
@@ -186,35 +258,17 @@
 			       gfp_t gfp_mask, int node_id)
 {
 	mempool_t *pool;
+
 	pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id);
 	if (!pool)
 		return NULL;
-	pool->elements = kmalloc_array_node(min_nr, sizeof(void *),
-				      gfp_mask, node_id);
-	if (!pool->elements) {
+
+	if (mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data,
+			      gfp_mask, node_id)) {
 		kfree(pool);
 		return NULL;
 	}
-	spin_lock_init(&pool->lock);
-	pool->min_nr = min_nr;
-	pool->pool_data = pool_data;
-	init_waitqueue_head(&pool->wait);
-	pool->alloc = alloc_fn;
-	pool->free = free_fn;
 
-	/*
-	 * First pre-allocate the guaranteed number of buffers.
-	 */
-	while (pool->curr_nr < pool->min_nr) {
-		void *element;
-
-		element = pool->alloc(gfp_mask, pool->pool_data);
-		if (unlikely(!element)) {
-			mempool_destroy(pool);
-			return NULL;
-		}
-		add_element(pool, element);
-	}
 	return pool;
 }
 EXPORT_SYMBOL(mempool_create_node);
diff --git a/mm/mmap.c b/mm/mmap.c
index fc41c05..d817764 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2828,7 +2828,7 @@
 	unsigned long ret = -EINVAL;
 	struct file *file;
 
-	pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.txt.\n",
+	pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n",
 		     current->comm, current->pid);
 
 	if (prot)
diff --git a/mm/readahead.c b/mm/readahead.c
index 539bbb6..e273f0d 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -140,23 +140,23 @@
 }
 
 /*
- * __do_page_cache_readahead() actually reads a chunk of disk.  It allocates all
- * the pages first, then submits them all for I/O. This avoids the very bad
+ * __do_page_cache_readahead() actually reads a chunk of disk.  It allocates
+ * the pages first, then submits them for I/O. This avoids the very bad
  * behaviour which would occur if page allocations are causing VM writeback.
  * We really don't want to intermingle reads and writes like that.
  *
  * Returns the number of pages requested, or the maximum amount of I/O allowed.
  */
-int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
-			pgoff_t offset, unsigned long nr_to_read,
-			unsigned long lookahead_size)
+unsigned int __do_page_cache_readahead(struct address_space *mapping,
+		struct file *filp, pgoff_t offset, unsigned long nr_to_read,
+		unsigned long lookahead_size)
 {
 	struct inode *inode = mapping->host;
 	struct page *page;
 	unsigned long end_index;	/* The last page we want to read */
 	LIST_HEAD(page_pool);
 	int page_idx;
-	int ret = 0;
+	unsigned int nr_pages = 0;
 	loff_t isize = i_size_read(inode);
 	gfp_t gfp_mask = readahead_gfp_mask(mapping);
 
@@ -177,8 +177,18 @@
 		rcu_read_lock();
 		page = radix_tree_lookup(&mapping->i_pages, page_offset);
 		rcu_read_unlock();
-		if (page && !radix_tree_exceptional_entry(page))
+		if (page && !radix_tree_exceptional_entry(page)) {
+			/*
+			 * Page already present?  Kick off the current batch of
+			 * contiguous pages before continuing with the next
+			 * batch.
+			 */
+			if (nr_pages)
+				read_pages(mapping, filp, &page_pool, nr_pages,
+						gfp_mask);
+			nr_pages = 0;
 			continue;
+		}
 
 		page = __page_cache_alloc(gfp_mask);
 		if (!page)
@@ -187,7 +197,7 @@
 		list_add(&page->lru, &page_pool);
 		if (page_idx == nr_to_read - lookahead_size)
 			SetPageReadahead(page);
-		ret++;
+		nr_pages++;
 	}
 
 	/*
@@ -195,11 +205,11 @@
 	 * uptodate then the caller will launch readpage again, and
 	 * will then handle the error.
 	 */
-	if (ret)
-		read_pages(mapping, filp, &page_pool, ret, gfp_mask);
+	if (nr_pages)
+		read_pages(mapping, filp, &page_pool, nr_pages, gfp_mask);
 	BUG_ON(!list_empty(&page_pool));
 out:
-	return ret;
+	return nr_pages;
 }
 
 /*
@@ -223,16 +233,11 @@
 	max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
 	nr_to_read = min(nr_to_read, max_pages);
 	while (nr_to_read) {
-		int err;
-
 		unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
 
 		if (this_chunk > nr_to_read)
 			this_chunk = nr_to_read;
-		err = __do_page_cache_readahead(mapping, filp,
-						offset, this_chunk, 0);
-		if (err < 0)
-			return err;
+		__do_page_cache_readahead(mapping, filp, offset, this_chunk, 0);
 
 		offset += this_chunk;
 		nr_to_read -= this_chunk;
diff --git a/mm/rmap.c b/mm/rmap.c
index 8d5337f..6db729d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -942,7 +942,7 @@
 		 * downgrading page table protection not changing it to point
 		 * to a new page.
 		 *
-		 * See Documentation/vm/mmu_notifier.txt
+		 * See Documentation/vm/mmu_notifier.rst
 		 */
 		if (ret)
 			(*cleaned)++;
@@ -1599,7 +1599,7 @@
 			 * point at new page while a device still is using this
 			 * page.
 			 *
-			 * See Documentation/vm/mmu_notifier.txt
+			 * See Documentation/vm/mmu_notifier.rst
 			 */
 			dec_mm_counter(mm, mm_counter_file(page));
 		}
@@ -1609,7 +1609,7 @@
 		 * done above for all cases requiring it to happen under page
 		 * table lock before mmu_notifier_invalidate_range_end()
 		 *
-		 * See Documentation/vm/mmu_notifier.txt
+		 * See Documentation/vm/mmu_notifier.rst
 		 */
 		page_remove_rmap(subpage, PageHuge(page));
 		put_page(page);
diff --git a/mm/util.c b/mm/util.c
index 45fc316..c2d0a7c 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -621,7 +621,7 @@
  * succeed and -ENOMEM implies there is not.
  *
  * We currently support three overcommit policies, which are set via the
- * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting
+ * vm.overcommit_memory sysctl.  See Documentation/vm/overcommit-accounting.rst
  *
  * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
  * Additional code 2002 Jul 20 by Robert Love.
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ebff729..63a5f50 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2751,25 +2751,14 @@
 	.show = s_show,
 };
 
-static int vmalloc_open(struct inode *inode, struct file *file)
-{
-	if (IS_ENABLED(CONFIG_NUMA))
-		return seq_open_private(file, &vmalloc_op,
-					nr_node_ids * sizeof(unsigned int));
-	else
-		return seq_open(file, &vmalloc_op);
-}
-
-static const struct file_operations proc_vmalloc_operations = {
-	.open		= vmalloc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_private,
-};
-
 static int __init proc_vmalloc_init(void)
 {
-	proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations);
+	if (IS_ENABLED(CONFIG_NUMA))
+		proc_create_seq_private("vmallocinfo", S_IRUSR, NULL,
+				&vmalloc_op,
+				nr_node_ids * sizeof(unsigned int), NULL);
+	else
+		proc_create_seq("vmallocinfo", S_IRUSR, NULL, &vmalloc_op);
 	return 0;
 }
 module_init(proc_vmalloc_init);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index a2b9518..75eda9c2 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1516,18 +1516,6 @@
 	.show	= frag_show,
 };
 
-static int fragmentation_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &fragmentation_op);
-}
-
-static const struct file_operations buddyinfo_file_operations = {
-	.open		= fragmentation_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static const struct seq_operations pagetypeinfo_op = {
 	.start	= frag_start,
 	.next	= frag_next,
@@ -1535,18 +1523,6 @@
 	.show	= pagetypeinfo_show,
 };
 
-static int pagetypeinfo_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &pagetypeinfo_op);
-}
-
-static const struct file_operations pagetypeinfo_file_operations = {
-	.open		= pagetypeinfo_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
 {
 	int zid;
@@ -1663,18 +1639,6 @@
 	.show	= zoneinfo_show,
 };
 
-static int zoneinfo_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &zoneinfo_op);
-}
-
-static const struct file_operations zoneinfo_file_operations = {
-	.open		= zoneinfo_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 enum writeback_stat_item {
 	NR_DIRTY_THRESHOLD,
 	NR_DIRTY_BG_THRESHOLD,
@@ -1762,18 +1726,6 @@
 	.stop	= vmstat_stop,
 	.show	= vmstat_show,
 };
-
-static int vmstat_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &vmstat_op);
-}
-
-static const struct file_operations vmstat_file_operations = {
-	.open		= vmstat_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_SMP
@@ -2020,10 +1972,10 @@
 	start_shepherd_timer();
 #endif
 #ifdef CONFIG_PROC_FS
-	proc_create("buddyinfo", 0444, NULL, &buddyinfo_file_operations);
-	proc_create("pagetypeinfo", 0444, NULL, &pagetypeinfo_file_operations);
-	proc_create("vmstat", 0444, NULL, &vmstat_file_operations);
-	proc_create("zoneinfo", 0444, NULL, &zoneinfo_file_operations);
+	proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
+	proc_create_seq("pagetypeinfo", 0444, NULL, &pagetypeinfo_op);
+	proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
+	proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
 #endif
 }
 
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index a627a5d..d36e8c4 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -73,35 +73,6 @@
 	.show = vlan_seq_show,
 };
 
-static int vlan_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &vlan_seq_ops,
-			sizeof(struct seq_net_private));
-}
-
-static const struct file_operations vlan_fops = {
-	.open    = vlan_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-/*
- *	/proc/net/vlan/<device> file and inode operations
- */
-
-static int vlandev_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, vlandev_seq_show, PDE_DATA(inode));
-}
-
-static const struct file_operations vlandev_fops = {
-	.open    = vlandev_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = single_release,
-};
-
 /*
  * Proc filesystem directory entries.
  */
@@ -148,8 +119,9 @@
 	if (!vn->proc_vlan_dir)
 		goto err;
 
-	vn->proc_vlan_conf = proc_create(name_conf, S_IFREG | 0600,
-					 vn->proc_vlan_dir, &vlan_fops);
+	vn->proc_vlan_conf = proc_create_net(name_conf, S_IFREG | 0600,
+			vn->proc_vlan_dir, &vlan_seq_ops,
+			sizeof(struct seq_net_private));
 	if (!vn->proc_vlan_conf)
 		goto err;
 	return 0;
@@ -171,9 +143,8 @@
 
 	if (!strcmp(vlandev->name, name_conf))
 		return -EINVAL;
-	vlan->dent =
-		proc_create_data(vlandev->name, S_IFREG | 0600,
-				 vn->proc_vlan_dir, &vlandev_fops, vlandev);
+	vlan->dent = proc_create_single_data(vlandev->name, S_IFREG | 0600,
+			vn->proc_vlan_dir, vlandev_seq_show, vlandev);
 	if (!vlan->dent)
 		return -ENOBUFS;
 	return 0;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 848969f..588bf88 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -231,7 +231,7 @@
 static __poll_t
 p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
 {
-	__poll_t ret, n;
+	__poll_t ret;
 	struct p9_trans_fd *ts = NULL;
 
 	if (client && client->status == Connected)
@@ -243,19 +243,9 @@
 		return EPOLLERR;
 	}
 
-	if (!ts->rd->f_op->poll)
-		ret = DEFAULT_POLLMASK;
-	else
-		ret = ts->rd->f_op->poll(ts->rd, pt);
-
-	if (ts->rd != ts->wr) {
-		if (!ts->wr->f_op->poll)
-			n = DEFAULT_POLLMASK;
-		else
-			n = ts->wr->f_op->poll(ts->wr, pt);
-		ret = (ret & ~EPOLLOUT) | (n & ~EPOLLIN);
-	}
-
+	ret = vfs_poll(ts->rd, pt);
+	if (ts->rd != ts->wr)
+		ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN);
 	return ret;
 }
 
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index d4c1021..49a16ce 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -907,11 +907,6 @@
 }
 
 #ifdef CONFIG_PROC_FS
-struct aarp_iter_state {
-	int bucket;
-	struct aarp_entry **table;
-};
-
 /*
  * Get the aarp entry that is in the chain described
  * by the iterator.
@@ -1033,25 +1028,12 @@
 	return 0;
 }
 
-static const struct seq_operations aarp_seq_ops = {
+const struct seq_operations aarp_seq_ops = {
 	.start  = aarp_seq_start,
 	.next   = aarp_seq_next,
 	.stop   = aarp_seq_stop,
 	.show   = aarp_seq_show,
 };
-
-static int aarp_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_private(file, &aarp_seq_ops,
-			sizeof(struct aarp_iter_state));
-}
-
-const struct file_operations atalk_seq_arp_fops = {
-	.open           = aarp_seq_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release	= seq_release_private,
-};
 #endif
 
 /* General module cleanup. Called from cleanup_module() in ddp.c. */
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index 7214aea..8006295 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -210,42 +210,6 @@
 	.show   = atalk_seq_socket_show,
 };
 
-static int atalk_seq_interface_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &atalk_seq_interface_ops);
-}
-
-static int atalk_seq_route_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &atalk_seq_route_ops);
-}
-
-static int atalk_seq_socket_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &atalk_seq_socket_ops);
-}
-
-static const struct file_operations atalk_seq_interface_fops = {
-	.open		= atalk_seq_interface_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static const struct file_operations atalk_seq_route_fops = {
-	.open		= atalk_seq_route_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static const struct file_operations atalk_seq_socket_fops = {
-	.open		= atalk_seq_socket_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static struct proc_dir_entry *atalk_proc_dir;
 
 int __init atalk_proc_init(void)
@@ -257,22 +221,23 @@
 	if (!atalk_proc_dir)
 		goto out;
 
-	p = proc_create("interface", 0444, atalk_proc_dir,
-			&atalk_seq_interface_fops);
+	p = proc_create_seq("interface", 0444, atalk_proc_dir,
+			&atalk_seq_interface_ops);
 	if (!p)
 		goto out_interface;
 
-	p = proc_create("route", 0444, atalk_proc_dir,
-			&atalk_seq_route_fops);
+	p = proc_create_seq("route", 0444, atalk_proc_dir,
+			&atalk_seq_route_ops);
 	if (!p)
 		goto out_route;
 
-	p = proc_create("socket", 0444, atalk_proc_dir,
-			&atalk_seq_socket_fops);
+	p = proc_create_seq("socket", 0444, atalk_proc_dir,
+			&atalk_seq_socket_ops);
 	if (!p)
 		goto out_socket;
 
-	p = proc_create("arp", 0444, atalk_proc_dir, &atalk_seq_arp_fops);
+	p = proc_create_seq_private("arp", 0444, atalk_proc_dir, &aarp_seq_ops,
+			sizeof(struct aarp_iter_state), NULL);
 	if (!p)
 		goto out_arp;
 
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 9b6bc5a..55fdba0 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1869,7 +1869,7 @@
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.getname	= atalk_getname,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.ioctl		= atalk_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= atalk_compat_ioctl,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index fd94bea..36b3ada 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -818,18 +818,6 @@
 	.show = br2684_seq_show,
 };
 
-static int br2684_proc_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &br2684_seq_ops);
-}
-
-static const struct file_operations br2684_proc_ops = {
-	.open = br2684_proc_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
-
 extern struct proc_dir_entry *atm_proc_root;	/* from proc.c */
 #endif /* CONFIG_PROC_FS */
 
@@ -837,7 +825,7 @@
 {
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *p;
-	p = proc_create("br2684", 0, atm_proc_root, &br2684_proc_ops);
+	p = proc_create_seq("br2684", 0, atm_proc_root, &br2684_seq_ops);
 	if (p == NULL)
 		return -ENOMEM;
 #endif
diff --git a/net/atm/clip.c b/net/atm/clip.c
index f07dbc6..66caa48 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -863,20 +863,6 @@
 	.stop	= neigh_seq_stop,
 	.show	= clip_seq_show,
 };
-
-static int arp_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &arp_seq_ops,
-			    sizeof(struct clip_seq_state));
-}
-
-static const struct file_operations arp_seq_fops = {
-	.open		= arp_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-	.owner		= THIS_MODULE
-};
 #endif
 
 static void atm_clip_exit_noproc(void);
@@ -893,7 +879,8 @@
 	{
 		struct proc_dir_entry *p;
 
-		p = proc_create("arp", 0444, atm_proc_root, &arp_seq_fops);
+		p = proc_create_net("arp", 0444, atm_proc_root, &arp_seq_ops,
+				sizeof(struct clip_seq_state));
 		if (!p) {
 			pr_err("Unable to initialize /proc/net/atm/arp\n");
 			atm_clip_exit_noproc();
diff --git a/net/atm/common.c b/net/atm/common.c
index fc78a05..1f2af59 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -648,16 +648,11 @@
 	return error;
 }
 
-__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t vcc_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
-	struct atm_vcc *vcc;
-	__poll_t mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
-
-	vcc = ATM_SD(sock);
+	struct atm_vcc *vcc = ATM_SD(sock);
+	__poll_t mask = 0;
 
 	/* exceptional events */
 	if (sk->sk_err)
diff --git a/net/atm/common.h b/net/atm/common.h
index 58506490..526796a 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -17,7 +17,7 @@
 int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		int flags);
 int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
-__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t vcc_poll_mask(struct socket *sock, __poll_t events);
 int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int vcc_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 3138a86..5a95fcf 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -990,18 +990,6 @@
 	.stop = lec_seq_stop,
 	.show = lec_seq_show,
 };
-
-static int lec_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_private(file, &lec_seq_ops, sizeof(struct lec_state));
-}
-
-static const struct file_operations lec_seq_fops = {
-	.open = lec_seq_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release_private,
-};
 #endif
 
 static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
@@ -1047,7 +1035,8 @@
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *p;
 
-	p = proc_create("lec", 0444, atm_proc_root, &lec_seq_fops);
+	p = proc_create_seq_private("lec", 0444, atm_proc_root, &lec_seq_ops,
+			sizeof(struct lec_state), NULL);
 	if (!p) {
 		pr_err("Unable to initialize /proc/net/atm/lec\n");
 		return -ENOMEM;
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 55410c0..0b0495a 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -68,7 +68,6 @@
 struct vcc_state {
 	int bucket;
 	struct sock *sk;
-	int family;
 };
 
 static inline int compare_family(struct sock *sk, int family)
@@ -106,25 +105,15 @@
 	return (l < 0);
 }
 
-static inline void *vcc_walk(struct vcc_state *state, loff_t l)
+static inline void *vcc_walk(struct seq_file *seq, loff_t l)
 {
-	return __vcc_walk(&state->sk, state->family, &state->bucket, l) ?
+	struct vcc_state *state = seq->private;
+	int family = (uintptr_t)(PDE_DATA(file_inode(seq->file)));
+
+	return __vcc_walk(&state->sk, family, &state->bucket, l) ?
 	       state : NULL;
 }
 
-static int __vcc_seq_open(struct inode *inode, struct file *file,
-	int family, const struct seq_operations *ops)
-{
-	struct vcc_state *state;
-
-	state = __seq_open_private(file, ops, sizeof(*state));
-	if (state == NULL)
-		return -ENOMEM;
-
-	state->family = family;
-	return 0;
-}
-
 static void *vcc_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(vcc_sklist_lock)
 {
@@ -133,7 +122,7 @@
 
 	read_lock(&vcc_sklist_lock);
 	state->sk = SEQ_START_TOKEN;
-	return left ? vcc_walk(state, left) : SEQ_START_TOKEN;
+	return left ? vcc_walk(seq, left) : SEQ_START_TOKEN;
 }
 
 static void vcc_seq_stop(struct seq_file *seq, void *v)
@@ -144,9 +133,7 @@
 
 static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct vcc_state *state = seq->private;
-
-	v = vcc_walk(state, 1);
+	v = vcc_walk(seq, 1);
 	*pos += !!PTR_ERR(v);
 	return v;
 }
@@ -257,18 +244,6 @@
 	.show	= atm_dev_seq_show,
 };
 
-static int atm_dev_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &atm_dev_seq_ops);
-}
-
-static const struct file_operations devices_seq_fops = {
-	.open		= atm_dev_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static int pvc_seq_show(struct seq_file *seq, void *v)
 {
 	static char atm_pvc_banner[] =
@@ -292,18 +267,6 @@
 	.show	= pvc_seq_show,
 };
 
-static int pvc_seq_open(struct inode *inode, struct file *file)
-{
-	return __vcc_seq_open(inode, file, PF_ATMPVC, &pvc_seq_ops);
-}
-
-static const struct file_operations pvc_seq_fops = {
-	.open		= pvc_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_private,
-};
-
 static int vcc_seq_show(struct seq_file *seq, void *v)
 {
 	if (v == SEQ_START_TOKEN) {
@@ -326,18 +289,6 @@
 	.show	= vcc_seq_show,
 };
 
-static int vcc_seq_open(struct inode *inode, struct file *file)
-{
-	return __vcc_seq_open(inode, file, 0, &vcc_seq_ops);
-}
-
-static const struct file_operations vcc_seq_fops = {
-	.open		= vcc_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_private,
-};
-
 static int svc_seq_show(struct seq_file *seq, void *v)
 {
 	static const char atm_svc_banner[] =
@@ -361,18 +312,6 @@
 	.show	= svc_seq_show,
 };
 
-static int svc_seq_open(struct inode *inode, struct file *file)
-{
-	return __vcc_seq_open(inode, file, PF_ATMSVC, &svc_seq_ops);
-}
-
-static const struct file_operations svc_seq_fops = {
-	.open		= svc_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_private,
-};
-
 static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
 				 size_t count, loff_t *pos)
 {
@@ -440,58 +379,22 @@
 	kfree(dev->proc_name);
 }
 
-static struct atm_proc_entry {
-	char *name;
-	const struct file_operations *proc_fops;
-	struct proc_dir_entry *dirent;
-} atm_proc_ents[] = {
-	{ .name = "devices",	.proc_fops = &devices_seq_fops },
-	{ .name = "pvc",	.proc_fops = &pvc_seq_fops },
-	{ .name = "svc",	.proc_fops = &svc_seq_fops },
-	{ .name = "vc",		.proc_fops = &vcc_seq_fops },
-	{ .name = NULL,		.proc_fops = NULL }
-};
-
-static void atm_proc_dirs_remove(void)
-{
-	static struct atm_proc_entry *e;
-
-	for (e = atm_proc_ents; e->name; e++) {
-		if (e->dirent)
-			remove_proc_entry(e->name, atm_proc_root);
-	}
-	remove_proc_entry("atm", init_net.proc_net);
-}
-
 int __init atm_proc_init(void)
 {
-	static struct atm_proc_entry *e;
-	int ret;
-
 	atm_proc_root = proc_net_mkdir(&init_net, "atm", init_net.proc_net);
 	if (!atm_proc_root)
-		goto err_out;
-	for (e = atm_proc_ents; e->name; e++) {
-		struct proc_dir_entry *dirent;
-
-		dirent = proc_create(e->name, 0444,
-				     atm_proc_root, e->proc_fops);
-		if (!dirent)
-			goto err_out_remove;
-		e->dirent = dirent;
-	}
-	ret = 0;
-out:
-	return ret;
-
-err_out_remove:
-	atm_proc_dirs_remove();
-err_out:
-	ret = -ENOMEM;
-	goto out;
+		return -ENOMEM;
+	proc_create_seq("devices", 0444, atm_proc_root, &atm_dev_seq_ops);
+	proc_create_seq_private("pvc", 0444, atm_proc_root, &pvc_seq_ops,
+			sizeof(struct vcc_state), (void *)(uintptr_t)PF_ATMPVC);
+	proc_create_seq_private("svc", 0444, atm_proc_root, &svc_seq_ops,
+			sizeof(struct vcc_state), (void *)(uintptr_t)PF_ATMSVC);
+	proc_create_seq_private("vc", 0444, atm_proc_root, &vcc_seq_ops,
+			sizeof(struct vcc_state), NULL);
+	return 0;
 }
 
 void atm_proc_exit(void)
 {
-	atm_proc_dirs_remove();
+	remove_proc_subtree("atm", init_net.proc_net);
 }
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 2cb10af..9f75092 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -113,7 +113,7 @@
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.getname =	pvc_getname,
-	.poll =		vcc_poll,
+	.poll_mask =	vcc_poll_mask,
 	.ioctl =	vcc_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = vcc_compat_ioctl,
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 2f91b76..53f4ad7 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -636,7 +636,7 @@
 	.socketpair =	sock_no_socketpair,
 	.accept =	svc_accept,
 	.getname =	svc_getname,
-	.poll =		vcc_poll,
+	.poll_mask =	vcc_poll_mask,
 	.ioctl =	svc_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl =	svc_compat_ioctl,
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 2b41366..d1d2442 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1924,19 +1924,6 @@
 	.stop = ax25_info_stop,
 	.show = ax25_info_show,
 };
-
-static int ax25_info_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ax25_info_seqops);
-}
-
-static const struct file_operations ax25_info_fops = {
-	.open = ax25_info_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
-
 #endif
 
 static const struct net_proto_family ax25_family_ops = {
@@ -1954,7 +1941,7 @@
 	.socketpair	= sock_no_socketpair,
 	.accept		= ax25_accept,
 	.getname	= ax25_getname,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.ioctl		= ax25_ioctl,
 	.listen		= ax25_listen,
 	.shutdown	= ax25_shutdown,
@@ -1989,10 +1976,10 @@
 	dev_add_pack(&ax25_packet_type);
 	register_netdevice_notifier(&ax25_dev_notifier);
 
-	proc_create("ax25_route", 0444, init_net.proc_net,
-		    &ax25_route_fops);
-	proc_create("ax25", 0444, init_net.proc_net, &ax25_info_fops);
-	proc_create("ax25_calls", 0444, init_net.proc_net, &ax25_uid_fops);
+	proc_create_seq("ax25_route", 0444, init_net.proc_net, &ax25_rt_seqops);
+	proc_create_seq("ax25", 0444, init_net.proc_net, &ax25_info_seqops);
+	proc_create_seq("ax25_calls", 0444, init_net.proc_net,
+			&ax25_uid_seqops);
 out:
 	return rc;
 }
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 5255589..a0eff32 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -323,25 +323,12 @@
 	return 0;
 }
 
-static const struct seq_operations ax25_rt_seqops = {
+const struct seq_operations ax25_rt_seqops = {
 	.start = ax25_rt_seq_start,
 	.next = ax25_rt_seq_next,
 	.stop = ax25_rt_seq_stop,
 	.show = ax25_rt_seq_show,
 };
-
-static int ax25_rt_info_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ax25_rt_seqops);
-}
-
-const struct file_operations ax25_route_fops = {
-	.open = ax25_rt_info_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
-
 #endif
 
 /*
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 4ebe91b..99d02e3 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -181,25 +181,12 @@
 	return 0;
 }
 
-static const struct seq_operations ax25_uid_seqops = {
+const struct seq_operations ax25_uid_seqops = {
 	.start = ax25_uid_seq_start,
 	.next = ax25_uid_seq_next,
 	.stop = ax25_uid_seq_stop,
 	.show = ax25_uid_seq_show,
 };
-
-static int ax25_uid_info_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ax25_uid_seqops);
-}
-
-const struct file_operations ax25_uid_fops = {
-	.open = ax25_uid_info_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
-
 #endif
 
 /*
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 84d92a0..510ab4f 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -437,16 +437,13 @@
 	return 0;
 }
 
-__poll_t bt_sock_poll(struct file *file, struct socket *sock,
-			  poll_table *wait)
+__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
 	__poll_t mask = 0;
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
-	poll_wait(file, sk_sleep(sk), wait);
-
 	if (sk->sk_state == BT_LISTEN)
 		return bt_accept_poll(sk);
 
@@ -478,7 +475,7 @@
 
 	return mask;
 }
-EXPORT_SYMBOL(bt_sock_poll);
+EXPORT_SYMBOL(bt_sock_poll_mask);
 
 int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
@@ -605,15 +602,10 @@
 EXPORT_SYMBOL(bt_sock_wait_ready);
 
 #ifdef CONFIG_PROC_FS
-struct bt_seq_state {
-	struct bt_sock_list *l;
-};
-
 static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(seq->private->l->lock)
 {
-	struct bt_seq_state *s = seq->private;
-	struct bt_sock_list *l = s->l;
+	struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
 
 	read_lock(&l->lock);
 	return seq_hlist_start_head(&l->head, *pos);
@@ -621,8 +613,7 @@
 
 static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct bt_seq_state *s = seq->private;
-	struct bt_sock_list *l = s->l;
+	struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
 
 	return seq_hlist_next(v, &l->head, pos);
 }
@@ -630,16 +621,14 @@
 static void bt_seq_stop(struct seq_file *seq, void *v)
 	__releases(seq->private->l->lock)
 {
-	struct bt_seq_state *s = seq->private;
-	struct bt_sock_list *l = s->l;
+	struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
 
 	read_unlock(&l->lock);
 }
 
 static int bt_seq_show(struct seq_file *seq, void *v)
 {
-	struct bt_seq_state *s = seq->private;
-	struct bt_sock_list *l = s->l;
+	struct bt_sock_list *l = PDE_DATA(file_inode(seq->file));
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq ,"sk               RefCnt Rmem   Wmem   User   Inode  Parent");
@@ -681,35 +670,13 @@
 	.show  = bt_seq_show,
 };
 
-static int bt_seq_open(struct inode *inode, struct file *file)
-{
-	struct bt_sock_list *sk_list;
-	struct bt_seq_state *s;
-
-	sk_list = PDE_DATA(inode);
-	s = __seq_open_private(file, &bt_seq_ops,
-			       sizeof(struct bt_seq_state));
-	if (!s)
-		return -ENOMEM;
-
-	s->l = sk_list;
-	return 0;
-}
-
-static const struct file_operations bt_fops = {
-	.open = bt_seq_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release_private
-};
-
 int bt_procfs_init(struct net *net, const char *name,
 		   struct bt_sock_list *sk_list,
 		   int (* seq_show)(struct seq_file *, void *))
 {
 	sk_list->custom_seq_show = seq_show;
 
-	if (!proc_create_data(name, 0, net->proc_net, &bt_fops, sk_list))
+	if (!proc_create_seq_data(name, 0, net->proc_net, &bt_seq_ops, sk_list))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index b5116fa..00deacd 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -175,7 +175,6 @@
 	.getname	= sock_no_getname,
 	.sendmsg	= sock_no_sendmsg,
 	.recvmsg	= sock_no_recvmsg,
-	.poll		= sock_no_poll,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 426a92f..eb415560 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -521,18 +521,6 @@
 	return 0;
 }
 
-static int cmtp_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, cmtp_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations cmtp_proc_fops = {
-	.open		= cmtp_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 int cmtp_attach_device(struct cmtp_session *session)
 {
 	unsigned char buf[4];
@@ -571,7 +559,7 @@
 	session->ctrl.send_message  = cmtp_send_message;
 
 	session->ctrl.procinfo      = cmtp_procinfo;
-	session->ctrl.proc_fops = &cmtp_proc_fops;
+	session->ctrl.proc_show     = cmtp_proc_show;
 
 	if (attach_capi_ctr(&session->ctrl) < 0) {
 		BT_ERR("Can't attach new controller");
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index ce86a7b..e08f28fa 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -178,7 +178,6 @@
 	.getname	= sock_no_getname,
 	.sendmsg	= sock_no_sendmsg,
 	.recvmsg	= sock_no_recvmsg,
-	.poll		= sock_no_poll,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1506e16..d6c0998 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1975,7 +1975,7 @@
 	.sendmsg	= hci_sock_sendmsg,
 	.recvmsg	= hci_sock_recvmsg,
 	.ioctl		= hci_sock_ioctl,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= hci_sock_setsockopt,
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 008ba43..1eaac01 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -208,7 +208,6 @@
 	.getname	= sock_no_getname,
 	.sendmsg	= sock_no_sendmsg,
 	.recvmsg	= sock_no_recvmsg,
-	.poll		= sock_no_poll,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 686bdc6..742a190 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1653,7 +1653,7 @@
 	.getname	= l2cap_sock_getname,
 	.sendmsg	= l2cap_sock_sendmsg,
 	.recvmsg	= l2cap_sock_recvmsg,
-	.poll		= bt_sock_poll,
+	.poll_mask	= bt_sock_poll_mask,
 	.ioctl		= bt_sock_ioctl,
 	.mmap		= sock_no_mmap,
 	.socketpair	= sock_no_socketpair,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index d606e92..1cf5762 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1049,7 +1049,7 @@
 	.setsockopt	= rfcomm_sock_setsockopt,
 	.getsockopt	= rfcomm_sock_getsockopt,
 	.ioctl		= rfcomm_sock_ioctl,
-	.poll		= bt_sock_poll,
+	.poll_mask	= bt_sock_poll_mask,
 	.socketpair	= sock_no_socketpair,
 	.mmap		= sock_no_mmap
 };
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 413b8ee..d60dbc6 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1197,7 +1197,7 @@
 	.getname	= sco_sock_getname,
 	.sendmsg	= sco_sock_sendmsg,
 	.recvmsg	= sco_sock_recvmsg,
-	.poll		= bt_sock_poll,
+	.poll_mask	= bt_sock_poll_mask,
 	.ioctl		= bt_sock_ioctl,
 	.mmap		= sock_no_mmap,
 	.socketpair	= sock_no_socketpair,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index c2138e7..28f68a2 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1071,7 +1071,7 @@
 
 #ifdef CONFIG_AUDIT
 	if (audit_enabled) {
-		audit_log(current->audit_context, GFP_KERNEL,
+		audit_log(audit_context(), GFP_KERNEL,
 			  AUDIT_NETFILTER_CFG,
 			  "table=%s family=%u entries=%u",
 			  repl->name, AF_BRIDGE, repl->nentries);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index a6fb1b3..c799186 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -934,15 +934,11 @@
 }
 
 /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
-static __poll_t caif_poll(struct file *file,
-			      struct socket *sock, poll_table *wait)
+static __poll_t caif_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
-	__poll_t mask;
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	__poll_t mask = 0;
 
 	/* exceptional events? */
 	if (sk->sk_err)
@@ -976,7 +972,7 @@
 	.socketpair = sock_no_socketpair,
 	.accept = sock_no_accept,
 	.getname = sock_no_getname,
-	.poll = caif_poll,
+	.poll_mask = caif_poll_mask,
 	.ioctl = sock_no_ioctl,
 	.listen = sock_no_listen,
 	.shutdown = sock_no_shutdown,
@@ -997,7 +993,7 @@
 	.socketpair = sock_no_socketpair,
 	.accept = sock_no_accept,
 	.getname = sock_no_getname,
-	.poll = caif_poll,
+	.poll_mask = caif_poll_mask,
 	.ioctl = sock_no_ioctl,
 	.listen = sock_no_listen,
 	.shutdown = sock_no_shutdown,
diff --git a/net/can/bcm.c b/net/can/bcm.c
index ac5e5e3..97fedff 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -239,18 +239,6 @@
 	seq_putc(m, '\n');
 	return 0;
 }
-
-static int bcm_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, bcm_proc_show);
-}
-
-static const struct file_operations bcm_proc_fops = {
-	.open		= bcm_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif /* CONFIG_PROC_FS */
 
 /*
@@ -1606,9 +1594,9 @@
 	if (net->can.bcmproc_dir) {
 		/* unique socket address as filename */
 		sprintf(bo->procname, "%lu", sock_i_ino(sk));
-		bo->bcm_proc_read = proc_create_data(bo->procname, 0644,
+		bo->bcm_proc_read = proc_create_net_single(bo->procname, 0644,
 						     net->can.bcmproc_dir,
-						     &bcm_proc_fops, sk);
+						     bcm_proc_show, sk);
 		if (!bo->bcm_proc_read) {
 			ret = -ENOMEM;
 			goto fail;
@@ -1669,7 +1657,7 @@
 	.socketpair    = sock_no_socketpair,
 	.accept        = sock_no_accept,
 	.getname       = sock_no_getname,
-	.poll          = datagram_poll,
+	.poll_mask     = datagram_poll_mask,
 	.ioctl         = can_ioctl,	/* use can_ioctl() from af_can.c */
 	.listen        = sock_no_listen,
 	.shutdown      = sock_no_shutdown,
diff --git a/net/can/proc.c b/net/can/proc.c
index fdf704e..70fea17 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -270,18 +270,6 @@
 	return 0;
 }
 
-static int can_stats_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, can_stats_proc_show);
-}
-
-static const struct file_operations can_stats_proc_fops = {
-	.open		= can_stats_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int can_reset_stats_proc_show(struct seq_file *m, void *v)
 {
 	struct net *net = m->private;
@@ -303,36 +291,12 @@
 	return 0;
 }
 
-static int can_reset_stats_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, can_reset_stats_proc_show);
-}
-
-static const struct file_operations can_reset_stats_proc_fops = {
-	.open		= can_reset_stats_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static int can_version_proc_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "%s\n", CAN_VERSION_STRING);
 	return 0;
 }
 
-static int can_version_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, can_version_proc_show);
-}
-
-static const struct file_operations can_version_proc_fops = {
-	.open		= can_version_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx,
 					     struct net_device *dev,
 					     struct can_dev_rcv_lists *d)
@@ -373,18 +337,6 @@
 	return 0;
 }
 
-static int can_rcvlist_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, can_rcvlist_proc_show);
-}
-
-static const struct file_operations can_rcvlist_proc_fops = {
-	.open		= can_rcvlist_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 static inline void can_rcvlist_proc_show_array(struct seq_file *m,
 					       struct net_device *dev,
 					       struct hlist_head *rcv_array,
@@ -440,19 +392,6 @@
 	return 0;
 }
 
-static int can_rcvlist_sff_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, can_rcvlist_sff_proc_show);
-}
-
-static const struct file_operations can_rcvlist_sff_proc_fops = {
-	.open		= can_rcvlist_sff_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-
 static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
 {
 	struct net_device *dev;
@@ -483,18 +422,6 @@
 	return 0;
 }
 
-static int can_rcvlist_eff_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, can_rcvlist_eff_proc_show);
-}
-
-static const struct file_operations can_rcvlist_eff_proc_fops = {
-	.open		= can_rcvlist_eff_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /*
  * can_init_proc - create main CAN proc directory and procfs entries
  */
@@ -510,37 +437,29 @@
 	}
 
 	/* own procfs entries from the AF_CAN core */
-	net->can.pde_version     = proc_create(CAN_PROC_VERSION, 0644,
-					       net->can.proc_dir,
-					       &can_version_proc_fops);
-	net->can.pde_stats       = proc_create(CAN_PROC_STATS, 0644,
-					       net->can.proc_dir,
-					       &can_stats_proc_fops);
-	net->can.pde_reset_stats = proc_create(CAN_PROC_RESET_STATS, 0644,
-					       net->can.proc_dir,
-					       &can_reset_stats_proc_fops);
-	net->can.pde_rcvlist_err = proc_create_data(CAN_PROC_RCVLIST_ERR, 0644,
-						    net->can.proc_dir,
-						    &can_rcvlist_proc_fops,
-						    (void *)RX_ERR);
-	net->can.pde_rcvlist_all = proc_create_data(CAN_PROC_RCVLIST_ALL, 0644,
-						    net->can.proc_dir,
-						    &can_rcvlist_proc_fops,
-						    (void *)RX_ALL);
-	net->can.pde_rcvlist_fil = proc_create_data(CAN_PROC_RCVLIST_FIL, 0644,
-						    net->can.proc_dir,
-						    &can_rcvlist_proc_fops,
-						    (void *)RX_FIL);
-	net->can.pde_rcvlist_inv = proc_create_data(CAN_PROC_RCVLIST_INV, 0644,
-						    net->can.proc_dir,
-						    &can_rcvlist_proc_fops,
-						    (void *)RX_INV);
-	net->can.pde_rcvlist_eff = proc_create(CAN_PROC_RCVLIST_EFF, 0644,
-					       net->can.proc_dir,
-					       &can_rcvlist_eff_proc_fops);
-	net->can.pde_rcvlist_sff = proc_create(CAN_PROC_RCVLIST_SFF, 0644,
-					       net->can.proc_dir,
-					       &can_rcvlist_sff_proc_fops);
+	net->can.pde_version = proc_create_net_single(CAN_PROC_VERSION, 0644,
+			net->can.proc_dir, can_version_proc_show, NULL);
+	net->can.pde_stats = proc_create_net_single(CAN_PROC_STATS, 0644,
+			net->can.proc_dir, can_stats_proc_show, NULL);
+	net->can.pde_reset_stats = proc_create_net_single(CAN_PROC_RESET_STATS,
+			0644, net->can.proc_dir, can_reset_stats_proc_show,
+			NULL);
+	net->can.pde_rcvlist_err = proc_create_net_single(CAN_PROC_RCVLIST_ERR,
+			0644, net->can.proc_dir, can_rcvlist_proc_show,
+			(void *)RX_ERR);
+	net->can.pde_rcvlist_all = proc_create_net_single(CAN_PROC_RCVLIST_ALL,
+			0644, net->can.proc_dir, can_rcvlist_proc_show,
+			(void *)RX_ALL);
+	net->can.pde_rcvlist_fil = proc_create_net_single(CAN_PROC_RCVLIST_FIL,
+			0644, net->can.proc_dir, can_rcvlist_proc_show,
+			(void *)RX_FIL);
+	net->can.pde_rcvlist_inv = proc_create_net_single(CAN_PROC_RCVLIST_INV,
+			0644, net->can.proc_dir, can_rcvlist_proc_show,
+			(void *)RX_INV);
+	net->can.pde_rcvlist_eff = proc_create_net_single(CAN_PROC_RCVLIST_EFF,
+			0644, net->can.proc_dir, can_rcvlist_eff_proc_show, NULL);
+	net->can.pde_rcvlist_sff = proc_create_net_single(CAN_PROC_RCVLIST_SFF,
+			0644, net->can.proc_dir, can_rcvlist_sff_proc_show, NULL);
 }
 
 /*
diff --git a/net/can/raw.c b/net/can/raw.c
index 1051eee..fd7e2f4 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -843,7 +843,7 @@
 	.socketpair    = sock_no_socketpair,
 	.accept        = sock_no_accept,
 	.getname       = raw_getname,
-	.poll          = datagram_poll,
+	.poll_mask     = datagram_poll_mask,
 	.ioctl         = can_ioctl,	/* use can_ioctl() from af_can.c */
 	.listen        = sock_no_listen,
 	.shutdown      = sock_no_shutdown,
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 21ac6e3..d7a7a23 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -62,7 +62,7 @@
 
 	if (num_mon > CEPH_MAX_MON)
 		goto bad;
-	m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS);
+	m = kmalloc(struct_size(m, mon_inst, num_mon), GFP_NOFS);
 	if (m == NULL)
 		return ERR_PTR(-ENOMEM);
 	m->fsid = fsid;
@@ -1000,8 +1000,7 @@
 	int i;
 
 	/* build initial monmap */
-	monc->monmap = kzalloc(sizeof(*monc->monmap) +
-			       num_mon*sizeof(monc->monmap->mon_inst[0]),
+	monc->monmap = kzalloc(struct_size(monc->monmap, mon_inst, num_mon),
 			       GFP_KERNEL);
 	if (!monc->monmap)
 		return -ENOMEM;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d2667e5..69a2581 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -584,8 +584,7 @@
 		req = kmem_cache_alloc(ceph_osd_request_cache, gfp_flags);
 	} else {
 		BUG_ON(num_ops > CEPH_OSD_MAX_OPS);
-		req = kmalloc(sizeof(*req) + num_ops * sizeof(req->r_ops[0]),
-			      gfp_flags);
+		req = kmalloc(struct_size(req, r_ops, num_ops), gfp_flags);
 	}
 	if (unlikely(!req))
 		return NULL;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 9938952..f19bf3d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -819,9 +819,8 @@
 
 /**
  * 	datagram_poll - generic datagram poll
- *	@file: file struct
  *	@sock: socket
- *	@wait: poll table
+ *	@events to wait for
  *
  *	Datagram poll: Again totally generic. This also handles
  *	sequenced packet sockets providing the socket receive queue
@@ -831,14 +830,10 @@
  *	and you use a different write policy from sock_writeable()
  *	then please supply your own write_space callback.
  */
-__poll_t datagram_poll(struct file *file, struct socket *sock,
-			   poll_table *wait)
+__poll_t datagram_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
-	__poll_t mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	__poll_t mask = 0;
 
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
@@ -871,4 +866,4 @@
 
 	return mask;
 }
-EXPORT_SYMBOL(datagram_poll);
+EXPORT_SYMBOL(datagram_poll_mask);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1844d9b..6e18242 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2883,11 +2883,7 @@
 EXPORT_SYMBOL(netdev_rx_csum_fault);
 #endif
 
-/* Actually, we should eliminate this check as soon as we know, that:
- * 1. IOMMU is present and allows to map all the memory.
- * 2. No high memory really exists on this machine.
- */
-
+/* XXX: check that highmem exists at all on the given machine. */
 static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
@@ -2901,20 +2897,6 @@
 				return 1;
 		}
 	}
-
-	if (PCI_DMA_BUS_IS_PHYS) {
-		struct device *pdev = dev->dev.parent;
-
-		if (!pdev)
-			return 0;
-		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-			dma_addr_t addr = page_to_phys(skb_frag_page(frag));
-
-			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
-				return 1;
-		}
-	}
 #endif
 	return 0;
 }
@@ -6803,15 +6785,15 @@
 			dev->flags & IFF_PROMISC ? "entered" : "left");
 		if (audit_enabled) {
 			current_uid_gid(&uid, &gid);
-			audit_log(current->audit_context, GFP_ATOMIC,
-				AUDIT_ANOM_PROMISCUOUS,
-				"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
-				dev->name, (dev->flags & IFF_PROMISC),
-				(old_flags & IFF_PROMISC),
-				from_kuid(&init_user_ns, audit_get_loginuid(current)),
-				from_kuid(&init_user_ns, uid),
-				from_kgid(&init_user_ns, gid),
-				audit_get_sessionid(current));
+			audit_log(audit_context(), GFP_ATOMIC,
+				  AUDIT_ANOM_PROMISCUOUS,
+				  "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
+				  dev->name, (dev->flags & IFF_PROMISC),
+				  (old_flags & IFF_PROMISC),
+				  from_kuid(&init_user_ns, audit_get_loginuid(current)),
+				  from_kuid(&init_user_ns, uid),
+				  from_kgid(&init_user_ns, gid),
+				  audit_get_sessionid(current));
 		}
 
 		dev_change_rx_flags(dev, IFF_PROMISC);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5afae29..a7a9c3d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -59,7 +59,7 @@
 				    struct net_device *dev);
 
 #ifdef CONFIG_PROC_FS
-static const struct file_operations neigh_stat_seq_fops;
+static const struct seq_operations neigh_stat_seq_ops;
 #endif
 
 /*
@@ -1561,8 +1561,8 @@
 		panic("cannot create neighbour cache statistics");
 
 #ifdef CONFIG_PROC_FS
-	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
-			      &neigh_stat_seq_fops, tbl))
+	if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat,
+			      &neigh_stat_seq_ops, tbl))
 		panic("cannot create neighbour proc dir entry");
 #endif
 
@@ -2792,7 +2792,7 @@
 
 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct neigh_table *tbl = seq->private;
+	struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
 	int cpu;
 
 	if (*pos == 0)
@@ -2809,7 +2809,7 @@
 
 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct neigh_table *tbl = seq->private;
+	struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
 	int cpu;
 
 	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
@@ -2828,7 +2828,7 @@
 
 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
 {
-	struct neigh_table *tbl = seq->private;
+	struct neigh_table *tbl = PDE_DATA(file_inode(seq->file));
 	struct neigh_statistics *st = v;
 
 	if (v == SEQ_START_TOKEN) {
@@ -2867,25 +2867,6 @@
 	.stop	= neigh_stat_seq_stop,
 	.show	= neigh_stat_seq_show,
 };
-
-static int neigh_stat_seq_open(struct inode *inode, struct file *file)
-{
-	int ret = seq_open(file, &neigh_stat_seq_ops);
-
-	if (!ret) {
-		struct seq_file *sf = file->private_data;
-		sf->private = PDE_DATA(inode);
-	}
-	return ret;
-};
-
-static const struct file_operations neigh_stat_seq_fops = {
-	.open 	 = neigh_stat_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release,
-};
-
 #endif /* CONFIG_PROC_FS */
 
 static inline size_t neigh_nlmsg_size(void)
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 9737302..63881f7 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -175,19 +175,6 @@
 	.show  = dev_seq_show,
 };
 
-static int dev_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &dev_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_seq_fops = {
-	.open    = dev_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
 static const struct seq_operations softnet_seq_ops = {
 	.start = softnet_seq_start,
 	.next  = softnet_seq_next,
@@ -195,18 +182,6 @@
 	.show  = softnet_seq_show,
 };
 
-static int softnet_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &softnet_seq_ops);
-}
-
-static const struct file_operations softnet_seq_fops = {
-	.open    = softnet_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
 static void *ptype_get_idx(loff_t pos)
 {
 	struct packet_type *pt = NULL;
@@ -297,30 +272,18 @@
 	.show  = ptype_seq_show,
 };
 
-static int ptype_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ptype_seq_ops,
-			sizeof(struct seq_net_private));
-}
-
-static const struct file_operations ptype_seq_fops = {
-	.open    = ptype_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-
 static int __net_init dev_proc_net_init(struct net *net)
 {
 	int rc = -ENOMEM;
 
-	if (!proc_create("dev", 0444, net->proc_net, &dev_seq_fops))
+	if (!proc_create_net("dev", 0444, net->proc_net, &dev_seq_ops,
+			sizeof(struct seq_net_private)))
 		goto out;
-	if (!proc_create("softnet_stat", 0444, net->proc_net,
-			 &softnet_seq_fops))
+	if (!proc_create_seq("softnet_stat", 0444, net->proc_net,
+			 &softnet_seq_ops))
 		goto out_dev;
-	if (!proc_create("ptype", 0444, net->proc_net, &ptype_seq_fops))
+	if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops,
+			sizeof(struct seq_net_private)))
 		goto out_softnet;
 
 	if (wext_proc_init(net))
@@ -377,22 +340,10 @@
 	.show  = dev_mc_seq_show,
 };
 
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_mc_seq_fops = {
-	.open    = dev_mc_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
 static int __net_init dev_mc_net_init(struct net *net)
 {
-	if (!proc_create("dev_mcast", 0, net->proc_net, &dev_mc_seq_fops))
+	if (!proc_create_net("dev_mcast", 0, net->proc_net, &dev_mc_seq_ops,
+			sizeof(struct seq_net_private)))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index feca4c9..f333d75 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2587,12 +2587,6 @@
 }
 EXPORT_SYMBOL(sock_no_getname);
 
-__poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
-{
-	return 0;
-}
-EXPORT_SYMBOL(sock_no_poll);
-
 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	return -EOPNOTSUPP;
@@ -3459,22 +3453,10 @@
 	.show   = proto_seq_show,
 };
 
-static int proto_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &proto_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations proto_seq_fops = {
-	.open		= proto_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
 static __net_init int proto_init_net(struct net *net)
 {
-	if (!proc_create("protocols", 0444, net->proc_net, &proto_seq_fops))
+	if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
+			sizeof(struct seq_net_private)))
 		return -ENOMEM;
 
 	return 0;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f91e381..0ea2ee5 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -316,8 +316,7 @@
 		 int flags, int *addr_len);
 void dccp_shutdown(struct sock *sk, int how);
 int inet_dccp_listen(struct socket *sock, int backlog);
-__poll_t dccp_poll(struct file *file, struct socket *sock,
-		       poll_table *wait);
+__poll_t dccp_poll_mask(struct socket *sock, __poll_t events);
 int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
 void dccp_req_err(struct sock *sk, u64 seq);
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b08feb2..a9e478c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -984,7 +984,7 @@
 	.accept		   = inet_accept,
 	.getname	   = inet_getname,
 	/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
-	.poll		   = dccp_poll,
+	.poll_mask	   = dccp_poll_mask,
 	.ioctl		   = inet_ioctl,
 	/* FIXME: work on inet_listen to rename it to sock_common_listen */
 	.listen		   = inet_dccp_listen,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6344f1b..17fc4e0 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1070,7 +1070,7 @@
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = inet_accept,
 	.getname	   = inet6_getname,
-	.poll		   = dccp_poll,
+	.poll_mask	   = dccp_poll_mask,
 	.ioctl		   = inet6_ioctl,
 	.listen		   = inet_dccp_listen,
 	.shutdown	   = inet_shutdown,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 0d56e36..ca21c1c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -312,20 +312,11 @@
 
 EXPORT_SYMBOL_GPL(dccp_disconnect);
 
-/*
- *	Wait for a DCCP event.
- *
- *	Note that we don't need to lock the socket, as the upper poll layers
- *	take care of normal races (between the test and the event) and we don't
- *	go look at any of the socket buffers directly.
- */
-__poll_t dccp_poll(struct file *file, struct socket *sock,
-		       poll_table *wait)
+__poll_t dccp_poll_mask(struct socket *sock, __poll_t events)
 {
 	__poll_t mask;
 	struct sock *sk = sock->sk;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
 	if (sk->sk_state == DCCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 
@@ -367,7 +358,7 @@
 	return mask;
 }
 
-EXPORT_SYMBOL_GPL(dccp_poll);
+EXPORT_SYMBOL_GPL(dccp_poll_mask);
 
 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 3275160..9a686d8 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1207,11 +1207,11 @@
 }
 
 
-static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table  *wait)
+static __poll_t dn_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
 	struct dn_scp *scp = DN_SK(sk);
-	__poll_t mask = datagram_poll(file, sock, wait);
+	__poll_t mask = datagram_poll_mask(sock, events);
 
 	if (!skb_queue_empty(&scp->other_receive_queue))
 		mask |= EPOLLRDBAND;
@@ -2314,19 +2314,6 @@
 	.stop	= dn_socket_seq_stop,
 	.show	= dn_socket_seq_show,
 };
-
-static int dn_socket_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_private(file, &dn_socket_seq_ops,
-			sizeof(struct dn_iter_state));
-}
-
-static const struct file_operations dn_socket_seq_fops = {
-	.open		= dn_socket_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_private,
-};
 #endif
 
 static const struct net_proto_family	dn_family_ops = {
@@ -2344,7 +2331,7 @@
 	.socketpair =	sock_no_socketpair,
 	.accept =	dn_accept,
 	.getname =	dn_getname,
-	.poll =		dn_poll,
+	.poll_mask =	dn_poll_mask,
 	.ioctl =	dn_ioctl,
 	.listen =	dn_listen,
 	.shutdown =	dn_shutdown,
@@ -2383,7 +2370,9 @@
 	dev_add_pack(&dn_dix_packet_type);
 	register_netdevice_notifier(&dn_dev_notifier);
 
-	proc_create("decnet", 0444, init_net.proc_net, &dn_socket_seq_fops);
+	proc_create_seq_private("decnet", 0444, init_net.proc_net,
+			&dn_socket_seq_ops, sizeof(struct dn_iter_state),
+			NULL);
 	dn_register_sysctl();
 out:
 	return rc;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index c03b046..bfd43e8 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1382,19 +1382,6 @@
 	.stop	= dn_dev_seq_stop,
 	.show	= dn_dev_seq_show,
 };
-
-static int dn_dev_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &dn_dev_seq_ops);
-}
-
-static const struct file_operations dn_dev_seq_fops = {
-	.open	 = dn_dev_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release,
-};
-
 #endif /* CONFIG_PROC_FS */
 
 static int addr[2];
@@ -1424,7 +1411,7 @@
 	rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
 			     NULL, dn_nl_dump_ifaddr, 0);
 
-	proc_create("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_fops);
+	proc_create_seq("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_ops);
 
 #ifdef CONFIG_SYSCTL
 	{
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 1315616..94b306f 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -589,27 +589,13 @@
 	.stop  = neigh_seq_stop,
 	.show  = dn_neigh_seq_show,
 };
-
-static int dn_neigh_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &dn_neigh_seq_ops,
-			    sizeof(struct neigh_seq_state));
-}
-
-static const struct file_operations dn_neigh_seq_fops = {
-	.open		= dn_neigh_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
 #endif
 
 void __init dn_neigh_init(void)
 {
 	neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table);
-	proc_create("decnet_neigh", 0444, init_net.proc_net,
-		    &dn_neigh_seq_fops);
+	proc_create_net("decnet_neigh", 0444, init_net.proc_net,
+			&dn_neigh_seq_ops, sizeof(struct neigh_seq_state));
 }
 
 void __exit dn_neigh_cleanup(void)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index eca0cc6..e747650 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1852,20 +1852,6 @@
 	.stop	= dn_rt_cache_seq_stop,
 	.show	= dn_rt_cache_seq_show,
 };
-
-static int dn_rt_cache_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_private(file, &dn_rt_cache_seq_ops,
-			sizeof(struct dn_rt_cache_iter_state));
-}
-
-static const struct file_operations dn_rt_cache_seq_fops = {
-	.open	 = dn_rt_cache_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_private,
-};
-
 #endif /* CONFIG_PROC_FS */
 
 void __init dn_route_init(void)
@@ -1918,8 +1904,9 @@
 
 	dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
 
-	proc_create("decnet_cache", 0444, init_net.proc_net,
-		    &dn_rt_cache_seq_fops);
+	proc_create_seq_private("decnet_cache", 0444, init_net.proc_net,
+			&dn_rt_cache_seq_ops,
+			sizeof(struct dn_rt_cache_iter_state), NULL);
 
 #ifdef CONFIG_DECNET_ROUTER
 	rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a60658c..a0768d2 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -423,7 +423,7 @@
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = sock_no_accept,
 	.getname	   = sock_no_getname,
-	.poll		   = datagram_poll,
+	.poll_mask	   = datagram_poll_mask,
 	.ioctl		   = ieee802154_sock_ioctl,
 	.listen		   = sock_no_listen,
 	.shutdown	   = sock_no_shutdown,
@@ -969,7 +969,7 @@
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = sock_no_accept,
 	.getname	   = sock_no_getname,
-	.poll		   = datagram_poll,
+	.poll_mask	   = datagram_poll_mask,
 	.ioctl		   = ieee802154_sock_ioctl,
 	.listen		   = sock_no_listen,
 	.shutdown	   = sock_no_shutdown,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b403499..15e1255 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -986,7 +986,7 @@
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = inet_accept,
 	.getname	   = inet_getname,
-	.poll		   = tcp_poll,
+	.poll_mask	   = tcp_poll_mask,
 	.ioctl		   = inet_ioctl,
 	.listen		   = inet_listen,
 	.shutdown	   = inet_shutdown,
@@ -1021,7 +1021,7 @@
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = sock_no_accept,
 	.getname	   = inet_getname,
-	.poll		   = udp_poll,
+	.poll_mask	   = udp_poll_mask,
 	.ioctl		   = inet_ioctl,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
@@ -1042,7 +1042,7 @@
 
 /*
  * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
- * udp_poll
+ * udp_poll_mask
  */
 static const struct proto_ops inet_sockraw_ops = {
 	.family		   = PF_INET,
@@ -1053,7 +1053,7 @@
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = sock_no_accept,
 	.getname	   = inet_getname,
-	.poll		   = datagram_poll,
+	.poll_mask	   = datagram_poll_mask,
 	.ioctl		   = inet_ioctl,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index bf6c2d4..e90c89e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1418,23 +1418,12 @@
 	.show	= arp_seq_show,
 };
 
-static int arp_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &arp_seq_ops,
-			    sizeof(struct neigh_seq_state));
-}
-
-static const struct file_operations arp_seq_fops = {
-	.open           = arp_seq_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release	= seq_release_net,
-};
-
+/* ------------------------------------------------------------------------ */
 
 static int __net_init arp_net_init(struct net *net)
 {
-	if (!proc_create("arp", 0444, net->proc_net, &arp_seq_fops))
+	if (!proc_create_net("arp", 0444, net->proc_net, &arp_seq_ops,
+			sizeof(struct neigh_seq_state)))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 65c340f..5bc0c89 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2352,18 +2352,6 @@
 	return 0;
 }
 
-static int fib_triestat_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, fib_triestat_seq_show);
-}
-
-static const struct file_operations fib_triestat_fops = {
-	.open	= fib_triestat_seq_open,
-	.read	= seq_read,
-	.llseek	= seq_lseek,
-	.release = single_release_net,
-};
-
 static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
 {
 	struct fib_trie_iter *iter = seq->private;
@@ -2537,19 +2525,6 @@
 	.show   = fib_trie_seq_show,
 };
 
-static int fib_trie_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &fib_trie_seq_ops,
-			    sizeof(struct fib_trie_iter));
-}
-
-static const struct file_operations fib_trie_fops = {
-	.open   = fib_trie_seq_open,
-	.read   = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release_net,
-};
-
 struct fib_route_iter {
 	struct seq_net_private p;
 	struct fib_table *main_tb;
@@ -2730,29 +2705,18 @@
 	.show   = fib_route_seq_show,
 };
 
-static int fib_route_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &fib_route_seq_ops,
-			    sizeof(struct fib_route_iter));
-}
-
-static const struct file_operations fib_route_fops = {
-	.open   = fib_route_seq_open,
-	.read   = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release_net,
-};
-
 int __net_init fib_proc_init(struct net *net)
 {
-	if (!proc_create("fib_trie", 0444, net->proc_net, &fib_trie_fops))
+	if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
+			sizeof(struct fib_trie_iter)))
 		goto out1;
 
-	if (!proc_create("fib_triestat", 0444, net->proc_net,
-			 &fib_triestat_fops))
+	if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
+			fib_triestat_seq_show, NULL))
 		goto out2;
 
-	if (!proc_create("route", 0444, net->proc_net, &fib_route_fops))
+	if (!proc_create_net("route", 0444, net->proc_net, &fib_route_seq_ops,
+			sizeof(struct fib_route_iter)))
 		goto out3;
 
 	return 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b26a81a..85b617b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2829,19 +2829,6 @@
 	.show	=	igmp_mc_seq_show,
 };
 
-static int igmp_mc_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &igmp_mc_seq_ops,
-			sizeof(struct igmp_mc_iter_state));
-}
-
-static const struct file_operations igmp_mc_seq_fops = {
-	.open		=	igmp_mc_seq_open,
-	.read		=	seq_read,
-	.llseek		=	seq_lseek,
-	.release	=	seq_release_net,
-};
-
 struct igmp_mcf_iter_state {
 	struct seq_net_private p;
 	struct net_device *dev;
@@ -2975,29 +2962,17 @@
 	.show	=	igmp_mcf_seq_show,
 };
 
-static int igmp_mcf_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &igmp_mcf_seq_ops,
-			sizeof(struct igmp_mcf_iter_state));
-}
-
-static const struct file_operations igmp_mcf_seq_fops = {
-	.open		=	igmp_mcf_seq_open,
-	.read		=	seq_read,
-	.llseek		=	seq_lseek,
-	.release	=	seq_release_net,
-};
-
 static int __net_init igmp_net_init(struct net *net)
 {
 	struct proc_dir_entry *pde;
 	int err;
 
-	pde = proc_create("igmp", 0444, net->proc_net, &igmp_mc_seq_fops);
+	pde = proc_create_net("igmp", 0444, net->proc_net, &igmp_mc_seq_ops,
+			sizeof(struct igmp_mc_iter_state));
 	if (!pde)
 		goto out_igmp;
-	pde = proc_create("mcfilter", 0444, net->proc_net,
-			  &igmp_mcf_seq_fops);
+	pde = proc_create_net("mcfilter", 0444, net->proc_net,
+			&igmp_mcf_seq_ops, sizeof(struct igmp_mcf_iter_state));
 	if (!pde)
 		goto out_mcfilter;
 	err = inet_ctl_sock_create(&net->ipv4.mc_autojoin_sk, AF_INET,
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 86c9f75..8821261 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1318,18 +1318,6 @@
 	return 0;
 }
 
-static int pnp_seq_open(struct inode *indoe, struct file *file)
-{
-	return single_open(file, pnp_seq_show, NULL);
-}
-
-static const struct file_operations pnp_seq_fops = {
-	.open		= pnp_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
 /* Create the /proc/net/ipconfig directory */
 static int __init ipconfig_proc_net_init(void)
 {
@@ -1469,7 +1457,7 @@
 	}
 
 #ifdef CONFIG_PROC_FS
-	proc_create("pnp", 0444, init_net.proc_net, &pnp_seq_fops);
+	proc_create_single("pnp", 0444, init_net.proc_net, pnp_seq_show);
 
 	if (ipconfig_proc_net_init() == 0)
 		ipconfig_proc_net_create("ntp_servers", &ntp_servers_seq_fops);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 38e092e..9f79b98 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2829,19 +2829,6 @@
 	.show  = ipmr_vif_seq_show,
 };
 
-static int ipmr_vif_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ipmr_vif_seq_ops,
-			    sizeof(struct mr_vif_iter));
-}
-
-static const struct file_operations ipmr_vif_fops = {
-	.open    = ipmr_vif_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct net *net = seq_file_net(seq);
@@ -2901,19 +2888,6 @@
 	.stop  = mr_mfc_seq_stop,
 	.show  = ipmr_mfc_seq_show,
 };
-
-static int ipmr_mfc_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-			    sizeof(struct mr_mfc_iter));
-}
-
-static const struct file_operations ipmr_mfc_fops = {
-	.open    = ipmr_mfc_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
 #endif
 
 #ifdef CONFIG_IP_PIMSM_V2
@@ -2978,9 +2952,11 @@
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
-	if (!proc_create("ip_mr_vif", 0, net->proc_net, &ipmr_vif_fops))
+	if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops,
+			sizeof(struct mr_vif_iter)))
 		goto proc_vif_fail;
-	if (!proc_create("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_fops))
+	if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
+			sizeof(struct mr_mfc_iter)))
 		goto proc_cache_fail;
 #endif
 	return 0;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 56a0106..2ed64bc 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1150,58 +1150,24 @@
 	return 0;
 }
 
-static int ping_seq_open(struct inode *inode, struct file *file)
-{
-	struct ping_seq_afinfo *afinfo = PDE_DATA(inode);
-	return seq_open_net(inode, file, &afinfo->seq_ops,
-			   sizeof(struct ping_iter_state));
-}
-
-const struct file_operations ping_seq_fops = {
-	.open		= ping_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
+static const struct seq_operations ping_v4_seq_ops = {
+	.start		= ping_v4_seq_start,
+	.show		= ping_v4_seq_show,
+	.next		= ping_seq_next,
+	.stop		= ping_seq_stop,
 };
-EXPORT_SYMBOL_GPL(ping_seq_fops);
-
-static struct ping_seq_afinfo ping_v4_seq_afinfo = {
-	.name		= "icmp",
-	.family		= AF_INET,
-	.seq_fops	= &ping_seq_fops,
-	.seq_ops	= {
-		.start		= ping_v4_seq_start,
-		.show		= ping_v4_seq_show,
-		.next		= ping_seq_next,
-		.stop		= ping_seq_stop,
-	},
-};
-
-int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo)
-{
-	struct proc_dir_entry *p;
-	p = proc_create_data(afinfo->name, 0444, net->proc_net,
-			     afinfo->seq_fops, afinfo);
-	if (!p)
-		return -ENOMEM;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ping_proc_register);
-
-void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo)
-{
-	remove_proc_entry(afinfo->name, net->proc_net);
-}
-EXPORT_SYMBOL_GPL(ping_proc_unregister);
 
 static int __net_init ping_v4_proc_init_net(struct net *net)
 {
-	return ping_proc_register(net, &ping_v4_seq_afinfo);
+	if (!proc_create_net("icmp", 0444, net->proc_net, &ping_v4_seq_ops,
+			sizeof(struct ping_iter_state)))
+		return -ENOMEM;
+	return 0;
 }
 
 static void __net_exit ping_v4_proc_exit_net(struct net *net)
 {
-	ping_proc_unregister(net, &ping_v4_seq_afinfo);
+	remove_proc_entry("icmp", net->proc_net);
 }
 
 static struct pernet_operations ping_v4_net_ops = {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 6c1ff89..77350c1 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -77,18 +77,6 @@
 	return 0;
 }
 
-static int sockstat_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, sockstat_seq_show);
-}
-
-static const struct file_operations sockstat_seq_fops = {
-	.open	 = sockstat_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release_net,
-};
-
 /* snmp items */
 static const struct snmp_mib snmp4_ipstats_list[] = {
 	SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INPKTS),
@@ -463,20 +451,6 @@
 	return 0;
 }
 
-static int snmp_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, snmp_seq_show);
-}
-
-static const struct file_operations snmp_seq_fops = {
-	.open	 = snmp_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release_net,
-};
-
-
-
 /*
  *	Output /proc/net/netstat
  */
@@ -510,26 +484,16 @@
 	return 0;
 }
 
-static int netstat_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, netstat_seq_show);
-}
-
-static const struct file_operations netstat_seq_fops = {
-	.open	 = netstat_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release_net,
-};
-
 static __net_init int ip_proc_init_net(struct net *net)
 {
-	if (!proc_create("sockstat", 0444, net->proc_net,
-			 &sockstat_seq_fops))
+	if (!proc_create_net_single("sockstat", 0444, net->proc_net,
+			sockstat_seq_show, NULL))
 		goto out_sockstat;
-	if (!proc_create("netstat", 0444, net->proc_net, &netstat_seq_fops))
+	if (!proc_create_net_single("netstat", 0444, net->proc_net,
+			netstat_seq_show, NULL))
 		goto out_netstat;
-	if (!proc_create("snmp", 0444, net->proc_net, &snmp_seq_fops))
+	if (!proc_create_net_single("snmp", 0444, net->proc_net, snmp_seq_show,
+			NULL))
 		goto out_snmp;
 
 	return 0;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 1b4d335..abb3c94 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1003,11 +1003,12 @@
 static struct sock *raw_get_first(struct seq_file *seq)
 {
 	struct sock *sk;
+	struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
 	struct raw_iter_state *state = raw_seq_private(seq);
 
 	for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE;
 			++state->bucket) {
-		sk_for_each(sk, &state->h->ht[state->bucket])
+		sk_for_each(sk, &h->ht[state->bucket])
 			if (sock_net(sk) == seq_file_net(seq))
 				goto found;
 	}
@@ -1018,6 +1019,7 @@
 
 static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk)
 {
+	struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
 	struct raw_iter_state *state = raw_seq_private(seq);
 
 	do {
@@ -1027,7 +1029,7 @@
 	} while (sk && sock_net(sk) != seq_file_net(seq));
 
 	if (!sk && ++state->bucket < RAW_HTABLE_SIZE) {
-		sk = sk_head(&state->h->ht[state->bucket]);
+		sk = sk_head(&h->ht[state->bucket]);
 		goto try_again;
 	}
 	return sk;
@@ -1045,9 +1047,9 @@
 
 void *raw_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct raw_iter_state *state = raw_seq_private(seq);
+	struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
 
-	read_lock(&state->h->lock);
+	read_lock(&h->lock);
 	return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 EXPORT_SYMBOL_GPL(raw_seq_start);
@@ -1067,9 +1069,9 @@
 
 void raw_seq_stop(struct seq_file *seq, void *v)
 {
-	struct raw_iter_state *state = raw_seq_private(seq);
+	struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file));
 
-	read_unlock(&state->h->lock);
+	read_unlock(&h->lock);
 }
 EXPORT_SYMBOL_GPL(raw_seq_stop);
 
@@ -1110,37 +1112,10 @@
 	.show  = raw_seq_show,
 };
 
-int raw_seq_open(struct inode *ino, struct file *file,
-		 struct raw_hashinfo *h, const struct seq_operations *ops)
-{
-	int err;
-	struct raw_iter_state *i;
-
-	err = seq_open_net(ino, file, ops, sizeof(struct raw_iter_state));
-	if (err < 0)
-		return err;
-
-	i = raw_seq_private((struct seq_file *)file->private_data);
-	i->h = h;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(raw_seq_open);
-
-static int raw_v4_seq_open(struct inode *inode, struct file *file)
-{
-	return raw_seq_open(inode, file, &raw_v4_hashinfo, &raw_seq_ops);
-}
-
-static const struct file_operations raw_seq_fops = {
-	.open	 = raw_v4_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_net,
-};
-
 static __net_init int raw_init_net(struct net *net)
 {
-	if (!proc_create("raw", 0444, net->proc_net, &raw_seq_fops))
+	if (!proc_create_net_data("raw", 0444, net->proc_net, &raw_seq_ops,
+			sizeof(struct raw_iter_state), &raw_v4_hashinfo))
 		return -ENOMEM;
 
 	return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 45ad258..bf4e4ad 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -360,18 +360,6 @@
 	kfree(dst);
 	return 0;
 }
-
-static int rt_acct_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, rt_acct_proc_show, NULL);
-}
-
-static const struct file_operations rt_acct_proc_fops = {
-	.open		= rt_acct_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
 #endif
 
 static int __net_init ip_rt_do_proc_init(struct net *net)
@@ -389,7 +377,8 @@
 		goto err2;
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
-	pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
+	pde = proc_create_single("rt_acct", 0, net->proc_net,
+			rt_acct_proc_show);
 	if (!pde)
 		goto err3;
 #endif
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0a2ea0b..2741953 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -494,32 +494,21 @@
 }
 
 /*
- *	Wait for a TCP event.
- *
- *	Note that we don't need to lock the socket, as the upper poll layers
- *	take care of normal races (between the test and the event) and we don't
- *	go look at any of the socket buffers directly.
+ * Socket is not locked. We are protected from async events by poll logic and
+ * correct handling of state changes made by other threads is impossible in
+ * any case.
  */
-__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t tcp_poll_mask(struct socket *sock, __poll_t events)
 {
-	__poll_t mask;
 	struct sock *sk = sock->sk;
 	const struct tcp_sock *tp = tcp_sk(sk);
+	__poll_t mask = 0;
 	int state;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
-
 	state = inet_sk_state_load(sk);
 	if (state == TCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 
-	/* Socket is not locked. We are protected from async events
-	 * by poll logic and correct handling of state changes
-	 * made by other threads is impossible in any case.
-	 */
-
-	mask = 0;
-
 	/*
 	 * EPOLLHUP is certainly not done right. But poll() doesn't
 	 * have a notion of HUP in just one direction, and for a
@@ -600,7 +589,7 @@
 
 	return mask;
 }
-EXPORT_SYMBOL(tcp_poll);
+EXPORT_SYMBOL(tcp_poll_mask);
 
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 633963e..fed3f1c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2002,6 +2002,7 @@
  */
 static void *listening_get_next(struct seq_file *seq, void *cur)
 {
+	struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 	struct inet_listen_hashbucket *ilb;
@@ -2024,7 +2025,7 @@
 	sk_for_each_from(sk) {
 		if (!net_eq(sock_net(sk), net))
 			continue;
-		if (sk->sk_family == st->family)
+		if (sk->sk_family == afinfo->family)
 			return sk;
 	}
 	spin_unlock(&ilb->lock);
@@ -2061,6 +2062,7 @@
  */
 static void *established_get_first(struct seq_file *seq)
 {
+	struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 	void *rc = NULL;
@@ -2077,7 +2079,7 @@
 
 		spin_lock_bh(lock);
 		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
-			if (sk->sk_family != st->family ||
+			if (sk->sk_family != afinfo->family ||
 			    !net_eq(sock_net(sk), net)) {
 				continue;
 			}
@@ -2092,6 +2094,7 @@
 
 static void *established_get_next(struct seq_file *seq, void *cur)
 {
+	struct tcp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
 	struct sock *sk = cur;
 	struct hlist_nulls_node *node;
 	struct tcp_iter_state *st = seq->private;
@@ -2103,7 +2106,8 @@
 	sk = sk_nulls_next(sk);
 
 	sk_nulls_for_each_from(sk, node) {
-		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
+		if (sk->sk_family == afinfo->family &&
+		    net_eq(sock_net(sk), net))
 			return sk;
 	}
 
@@ -2176,7 +2180,7 @@
 	return rc;
 }
 
-static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
+void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct tcp_iter_state *st = seq->private;
 	void *rc;
@@ -2197,8 +2201,9 @@
 	st->last_pos = *pos;
 	return rc;
 }
+EXPORT_SYMBOL(tcp_seq_start);
 
-static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct tcp_iter_state *st = seq->private;
 	void *rc = NULL;
@@ -2227,8 +2232,9 @@
 	st->last_pos = *pos;
 	return rc;
 }
+EXPORT_SYMBOL(tcp_seq_next);
 
-static void tcp_seq_stop(struct seq_file *seq, void *v)
+void tcp_seq_stop(struct seq_file *seq, void *v)
 {
 	struct tcp_iter_state *st = seq->private;
 
@@ -2243,47 +2249,7 @@
 		break;
 	}
 }
-
-int tcp_seq_open(struct inode *inode, struct file *file)
-{
-	struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
-	struct tcp_iter_state *s;
-	int err;
-
-	err = seq_open_net(inode, file, &afinfo->seq_ops,
-			  sizeof(struct tcp_iter_state));
-	if (err < 0)
-		return err;
-
-	s = ((struct seq_file *)file->private_data)->private;
-	s->family		= afinfo->family;
-	s->last_pos		= 0;
-	return 0;
-}
-EXPORT_SYMBOL(tcp_seq_open);
-
-int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
-{
-	int rc = 0;
-	struct proc_dir_entry *p;
-
-	afinfo->seq_ops.start		= tcp_seq_start;
-	afinfo->seq_ops.next		= tcp_seq_next;
-	afinfo->seq_ops.stop		= tcp_seq_stop;
-
-	p = proc_create_data(afinfo->name, 0444, net->proc_net,
-			     afinfo->seq_fops, afinfo);
-	if (!p)
-		rc = -ENOMEM;
-	return rc;
-}
-EXPORT_SYMBOL(tcp_proc_register);
-
-void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
-{
-	remove_proc_entry(afinfo->name, net->proc_net);
-}
-EXPORT_SYMBOL(tcp_proc_unregister);
+EXPORT_SYMBOL(tcp_seq_stop);
 
 static void get_openreq4(const struct request_sock *req,
 			 struct seq_file *f, int i)
@@ -2418,30 +2384,28 @@
 	return 0;
 }
 
-static const struct file_operations tcp_afinfo_seq_fops = {
-	.open    = tcp_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net
+static const struct seq_operations tcp4_seq_ops = {
+	.show		= tcp4_seq_show,
+	.start		= tcp_seq_start,
+	.next		= tcp_seq_next,
+	.stop		= tcp_seq_stop,
 };
 
 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
-	.name		= "tcp",
 	.family		= AF_INET,
-	.seq_fops	= &tcp_afinfo_seq_fops,
-	.seq_ops	= {
-		.show		= tcp4_seq_show,
-	},
 };
 
 static int __net_init tcp4_proc_init_net(struct net *net)
 {
-	return tcp_proc_register(net, &tcp4_seq_afinfo);
+	if (!proc_create_net_data("tcp", 0444, net->proc_net, &tcp4_seq_ops,
+			sizeof(struct tcp_iter_state), &tcp4_seq_afinfo))
+		return -ENOMEM;
+	return 0;
 }
 
 static void __net_exit tcp4_proc_exit_net(struct net *net)
 {
-	tcp_proc_unregister(net, &tcp4_seq_afinfo);
+	remove_proc_entry("tcp", net->proc_net);
 }
 
 static struct pernet_operations tcp4_net_ops = {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9c41a45..3365362 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2591,7 +2591,7 @@
  * 	udp_poll - wait for a UDP event.
  *	@file - file struct
  *	@sock - socket
- *	@wait - poll table
+ *	@events - events to wait for
  *
  *	This is same as datagram poll, except for the special case of
  *	blocking sockets. If application is using a blocking fd
@@ -2600,23 +2600,23 @@
  *	but then block when reading it. Add special case code
  *	to work around these arguably broken applications.
  */
-__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t udp_poll_mask(struct socket *sock, __poll_t events)
 {
-	__poll_t mask = datagram_poll(file, sock, wait);
+	__poll_t mask = datagram_poll_mask(sock, events);
 	struct sock *sk = sock->sk;
 
 	if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	/* Check for false positives due to checksum errors */
-	if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
+	if ((mask & EPOLLRDNORM) && !(sock->file->f_flags & O_NONBLOCK) &&
 	    !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
 		mask &= ~(EPOLLIN | EPOLLRDNORM);
 
 	return mask;
 
 }
-EXPORT_SYMBOL(udp_poll);
+EXPORT_SYMBOL(udp_poll_mask);
 
 int udp_abort(struct sock *sk, int err)
 {
@@ -2672,12 +2672,13 @@
 static struct sock *udp_get_first(struct seq_file *seq, int start)
 {
 	struct sock *sk;
+	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
 	struct udp_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
 
-	for (state->bucket = start; state->bucket <= state->udp_table->mask;
+	for (state->bucket = start; state->bucket <= afinfo->udp_table->mask;
 	     ++state->bucket) {
-		struct udp_hslot *hslot = &state->udp_table->hash[state->bucket];
+		struct udp_hslot *hslot = &afinfo->udp_table->hash[state->bucket];
 
 		if (hlist_empty(&hslot->head))
 			continue;
@@ -2686,7 +2687,7 @@
 		sk_for_each(sk, &hslot->head) {
 			if (!net_eq(sock_net(sk), net))
 				continue;
-			if (sk->sk_family == state->family)
+			if (sk->sk_family == afinfo->family)
 				goto found;
 		}
 		spin_unlock_bh(&hslot->lock);
@@ -2698,16 +2699,17 @@
 
 static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
 {
+	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
 	struct udp_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
 
 	do {
 		sk = sk_next(sk);
-	} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
+	} while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != afinfo->family));
 
 	if (!sk) {
-		if (state->bucket <= state->udp_table->mask)
-			spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
+		if (state->bucket <= afinfo->udp_table->mask)
+			spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
 		return udp_get_first(seq, state->bucket + 1);
 	}
 	return sk;
@@ -2723,15 +2725,16 @@
 	return pos ? NULL : sk;
 }
 
-static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
+void *udp_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct udp_iter_state *state = seq->private;
 	state->bucket = MAX_UDP_PORTS;
 
 	return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
 }
+EXPORT_SYMBOL(udp_seq_start);
 
-static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct sock *sk;
 
@@ -2743,56 +2746,17 @@
 	++*pos;
 	return sk;
 }
+EXPORT_SYMBOL(udp_seq_next);
 
-static void udp_seq_stop(struct seq_file *seq, void *v)
+void udp_seq_stop(struct seq_file *seq, void *v)
 {
+	struct udp_seq_afinfo *afinfo = PDE_DATA(file_inode(seq->file));
 	struct udp_iter_state *state = seq->private;
 
-	if (state->bucket <= state->udp_table->mask)
-		spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
+	if (state->bucket <= afinfo->udp_table->mask)
+		spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock);
 }
-
-int udp_seq_open(struct inode *inode, struct file *file)
-{
-	struct udp_seq_afinfo *afinfo = PDE_DATA(inode);
-	struct udp_iter_state *s;
-	int err;
-
-	err = seq_open_net(inode, file, &afinfo->seq_ops,
-			   sizeof(struct udp_iter_state));
-	if (err < 0)
-		return err;
-
-	s = ((struct seq_file *)file->private_data)->private;
-	s->family		= afinfo->family;
-	s->udp_table		= afinfo->udp_table;
-	return err;
-}
-EXPORT_SYMBOL(udp_seq_open);
-
-/* ------------------------------------------------------------------------ */
-int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
-{
-	struct proc_dir_entry *p;
-	int rc = 0;
-
-	afinfo->seq_ops.start		= udp_seq_start;
-	afinfo->seq_ops.next		= udp_seq_next;
-	afinfo->seq_ops.stop		= udp_seq_stop;
-
-	p = proc_create_data(afinfo->name, 0444, net->proc_net,
-			     afinfo->seq_fops, afinfo);
-	if (!p)
-		rc = -ENOMEM;
-	return rc;
-}
-EXPORT_SYMBOL(udp_proc_register);
-
-void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
-{
-	remove_proc_entry(afinfo->name, net->proc_net);
-}
-EXPORT_SYMBOL(udp_proc_unregister);
+EXPORT_SYMBOL(udp_seq_stop);
 
 /* ------------------------------------------------------------------------ */
 static void udp4_format_sock(struct sock *sp, struct seq_file *f,
@@ -2832,32 +2796,30 @@
 	return 0;
 }
 
-static const struct file_operations udp_afinfo_seq_fops = {
-	.open     = udp_seq_open,
-	.read     = seq_read,
-	.llseek   = seq_lseek,
-	.release  = seq_release_net
+const struct seq_operations udp_seq_ops = {
+	.start		= udp_seq_start,
+	.next		= udp_seq_next,
+	.stop		= udp_seq_stop,
+	.show		= udp4_seq_show,
 };
+EXPORT_SYMBOL(udp_seq_ops);
 
-/* ------------------------------------------------------------------------ */
 static struct udp_seq_afinfo udp4_seq_afinfo = {
-	.name		= "udp",
 	.family		= AF_INET,
 	.udp_table	= &udp_table,
-	.seq_fops	= &udp_afinfo_seq_fops,
-	.seq_ops	= {
-		.show		= udp4_seq_show,
-	},
 };
 
 static int __net_init udp4_proc_init_net(struct net *net)
 {
-	return udp_proc_register(net, &udp4_seq_afinfo);
+	if (!proc_create_net_data("udp", 0444, net->proc_net, &udp_seq_ops,
+			sizeof(struct udp_iter_state), &udp4_seq_afinfo))
+		return -ENOMEM;
+	return 0;
 }
 
 static void __net_exit udp4_proc_exit_net(struct net *net)
 {
-	udp_proc_unregister(net, &udp4_seq_afinfo);
+	remove_proc_entry("udp", net->proc_net);
 }
 
 static struct pernet_operations udp4_net_ops = {
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index f96614e..8545457 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) "UDPLite: " fmt
 
 #include <linux/export.h>
+#include <linux/proc_fs.h>
 #include "udp_impl.h"
 
 struct udp_table 	udplite_table __read_mostly;
@@ -73,32 +74,22 @@
 };
 
 #ifdef CONFIG_PROC_FS
-
-static const struct file_operations udplite_afinfo_seq_fops = {
-	.open     = udp_seq_open,
-	.read     = seq_read,
-	.llseek   = seq_lseek,
-	.release  = seq_release_net
-};
-
 static struct udp_seq_afinfo udplite4_seq_afinfo = {
-	.name		= "udplite",
 	.family		= AF_INET,
 	.udp_table 	= &udplite_table,
-	.seq_fops	= &udplite_afinfo_seq_fops,
-	.seq_ops	= {
-		.show		= udp4_seq_show,
-	},
 };
 
 static int __net_init udplite4_proc_init_net(struct net *net)
 {
-	return udp_proc_register(net, &udplite4_seq_afinfo);
+	if (!proc_create_net_data("udplite", 0444, net->proc_net, &udp_seq_ops,
+			sizeof(struct udp_iter_state), &udplite4_seq_afinfo))
+		return -ENOMEM;
+	return 0;
 }
 
 static void __net_exit udplite4_proc_exit_net(struct net *net)
 {
-	udp_proc_unregister(net, &udplite4_seq_afinfo);
+	remove_proc_entry("udplite", net->proc_net);
 }
 
 static struct pernet_operations udplite4_net_ops = {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5596d87..89019bf 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4282,22 +4282,10 @@
 	.stop	= if6_seq_stop,
 };
 
-static int if6_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &if6_seq_ops,
-			    sizeof(struct if6_iter_state));
-}
-
-static const struct file_operations if6_fops = {
-	.open		= if6_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
 static int __net_init if6_proc_net_init(struct net *net)
 {
-	if (!proc_create("if_inet6", 0444, net->proc_net, &if6_fops))
+	if (!proc_create_net("if_inet6", 0444, net->proc_net, &if6_seq_ops,
+			sizeof(struct if6_iter_state)))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9ed0eae..74f2a26 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -570,7 +570,7 @@
 	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
 	.accept		   = inet_accept,		/* ok		*/
 	.getname	   = inet6_getname,
-	.poll		   = tcp_poll,			/* ok		*/
+	.poll_mask	   = tcp_poll_mask,		/* ok		*/
 	.ioctl		   = inet6_ioctl,		/* must change  */
 	.listen		   = inet_listen,		/* ok		*/
 	.shutdown	   = inet_shutdown,		/* ok		*/
@@ -603,7 +603,7 @@
 	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
 	.accept		   = sock_no_accept,		/* a do nothing	*/
 	.getname	   = inet6_getname,
-	.poll		   = udp_poll,			/* ok		*/
+	.poll_mask	   = udp_poll_mask,		/* ok		*/
 	.ioctl		   = inet6_ioctl,		/* must change  */
 	.listen		   = sock_no_listen,		/* ok		*/
 	.shutdown	   = inet_shutdown,		/* ok		*/
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0250d19..4e0ff70 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -526,22 +526,10 @@
 	.show	=	ac6_seq_show,
 };
 
-static int ac6_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ac6_seq_ops,
-			    sizeof(struct ac6_iter_state));
-}
-
-static const struct file_operations ac6_seq_fops = {
-	.open		=	ac6_seq_open,
-	.read		=	seq_read,
-	.llseek		=	seq_lseek,
-	.release	=	seq_release_net,
-};
-
 int __net_init ac6_proc_init(struct net *net)
 {
-	if (!proc_create("anycast6", 0444, net->proc_net, &ac6_seq_fops))
+	if (!proc_create_net("anycast6", 0444, net->proc_net, &ac6_seq_ops,
+			sizeof(struct ac6_iter_state)))
 		return -ENOMEM;
 
 	return 0;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f9132a6..7aa4c41 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2229,15 +2229,6 @@
 }
 
 #ifdef CONFIG_PROC_FS
-
-struct ipv6_route_iter {
-	struct seq_net_private p;
-	struct fib6_walker w;
-	loff_t skip;
-	struct fib6_table *tbl;
-	int sernum;
-};
-
 static int ipv6_route_seq_show(struct seq_file *seq, void *v)
 {
 	struct fib6_info *rt = v;
@@ -2404,17 +2395,10 @@
 	rcu_read_unlock_bh();
 }
 
-static const struct seq_operations ipv6_route_seq_ops = {
+const struct seq_operations ipv6_route_seq_ops = {
 	.start	= ipv6_route_seq_start,
 	.next	= ipv6_route_seq_next,
 	.stop	= ipv6_route_seq_stop,
 	.show	= ipv6_route_seq_show
 };
-
-int ipv6_route_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ipv6_route_seq_ops,
-			    sizeof(struct ipv6_route_iter));
-}
-
 #endif /* CONFIG_PROC_FS */
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index c05c4e8..3eee763 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -754,6 +754,10 @@
 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(RCU)
 {
+	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+
+	state->pid_ns = proc_pid_ns(file_inode(seq->file));
+
 	rcu_read_lock_bh();
 	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
@@ -808,44 +812,10 @@
 	.show	=	ip6fl_seq_show,
 };
 
-static int ip6fl_seq_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq;
-	struct ip6fl_iter_state *state;
-	int err;
-
-	err = seq_open_net(inode, file, &ip6fl_seq_ops,
-			   sizeof(struct ip6fl_iter_state));
-
-	if (!err) {
-		seq = file->private_data;
-		state = ip6fl_seq_private(seq);
-		rcu_read_lock();
-		state->pid_ns = get_pid_ns(task_active_pid_ns(current));
-		rcu_read_unlock();
-	}
-	return err;
-}
-
-static int ip6fl_seq_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
-	put_pid_ns(state->pid_ns);
-	return seq_release_net(inode, file);
-}
-
-static const struct file_operations ip6fl_seq_fops = {
-	.open		=	ip6fl_seq_open,
-	.read		=	seq_read,
-	.llseek		=	seq_lseek,
-	.release	=	ip6fl_seq_release,
-};
-
 static int __net_init ip6_flowlabel_proc_init(struct net *net)
 {
-	if (!proc_create("ip6_flowlabel", 0444, net->proc_net,
-			 &ip6fl_seq_fops))
+	if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net,
+			&ip6fl_seq_ops, sizeof(struct ip6fl_iter_state)))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 058fc05..0d0f005 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -445,19 +445,6 @@
 	.show  = ip6mr_vif_seq_show,
 };
 
-static int ip6mr_vif_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
-			    sizeof(struct mr_vif_iter));
-}
-
-static const struct file_operations ip6mr_vif_fops = {
-	.open    = ip6mr_vif_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct net *net = seq_file_net(seq);
@@ -518,19 +505,6 @@
 	.stop  = mr_mfc_seq_stop,
 	.show  = ipmr_mfc_seq_show,
 };
-
-static int ipmr_mfc_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
-			    sizeof(struct mr_mfc_iter));
-}
-
-static const struct file_operations ip6mr_mfc_fops = {
-	.open    = ipmr_mfc_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
 #endif
 
 #ifdef CONFIG_IPV6_PIMSM_V2
@@ -1322,9 +1296,11 @@
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
-	if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
+	if (!proc_create_net("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_seq_ops,
+			sizeof(struct mr_vif_iter)))
 		goto proc_vif_fail;
-	if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
+	if (!proc_create_net("ip6_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
+			sizeof(struct mr_mfc_iter)))
 		goto proc_cache_fail;
 #endif
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 793159d..975021d 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2749,19 +2749,6 @@
 	.show	=	igmp6_mc_seq_show,
 };
 
-static int igmp6_mc_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &igmp6_mc_seq_ops,
-			    sizeof(struct igmp6_mc_iter_state));
-}
-
-static const struct file_operations igmp6_mc_seq_fops = {
-	.open		=	igmp6_mc_seq_open,
-	.read		=	seq_read,
-	.llseek		=	seq_lseek,
-	.release	=	seq_release_net,
-};
-
 struct igmp6_mcf_iter_state {
 	struct seq_net_private p;
 	struct net_device *dev;
@@ -2903,28 +2890,17 @@
 	.show	=	igmp6_mcf_seq_show,
 };
 
-static int igmp6_mcf_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &igmp6_mcf_seq_ops,
-			    sizeof(struct igmp6_mcf_iter_state));
-}
-
-static const struct file_operations igmp6_mcf_seq_fops = {
-	.open		=	igmp6_mcf_seq_open,
-	.read		=	seq_read,
-	.llseek		=	seq_lseek,
-	.release	=	seq_release_net,
-};
-
 static int __net_init igmp6_proc_init(struct net *net)
 {
 	int err;
 
 	err = -ENOMEM;
-	if (!proc_create("igmp6", 0444, net->proc_net, &igmp6_mc_seq_fops))
+	if (!proc_create_net("igmp6", 0444, net->proc_net, &igmp6_mc_seq_ops,
+			sizeof(struct igmp6_mc_iter_state)))
 		goto out;
-	if (!proc_create("mcfilter6", 0444, net->proc_net,
-			 &igmp6_mcf_seq_fops))
+	if (!proc_create_net("mcfilter6", 0444, net->proc_net,
+			&igmp6_mcf_seq_ops,
+			sizeof(struct igmp6_mcf_iter_state)))
 		goto out_proc_net_igmp6;
 
 	err = 0;
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 746eeae..96f56bf 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -24,6 +24,7 @@
 #include <net/protocol.h>
 #include <net/udp.h>
 #include <net/transp_v6.h>
+#include <linux/proc_fs.h>
 #include <net/ping.h>
 
 /* Compatibility glue so we can support IPv6 when it's compiled as a module */
@@ -215,26 +216,24 @@
 	return 0;
 }
 
-static struct ping_seq_afinfo ping_v6_seq_afinfo = {
-	.name		= "icmp6",
-	.family		= AF_INET6,
-	.seq_fops       = &ping_seq_fops,
-	.seq_ops	= {
-		.start		= ping_v6_seq_start,
-		.show		= ping_v6_seq_show,
-		.next		= ping_seq_next,
-		.stop		= ping_seq_stop,
-	},
+static const struct seq_operations ping_v6_seq_ops = {
+	.start		= ping_v6_seq_start,
+	.show		= ping_v6_seq_show,
+	.next		= ping_seq_next,
+	.stop		= ping_seq_stop,
 };
 
 static int __net_init ping_v6_proc_init_net(struct net *net)
 {
-	return ping_proc_register(net, &ping_v6_seq_afinfo);
+	if (!proc_create_net("icmp6", 0444, net->proc_net, &ping_v6_seq_ops,
+			sizeof(struct ping_iter_state)))
+		return -ENOMEM;
+	return 0;
 }
 
 static void __net_init ping_v6_proc_exit_net(struct net *net)
 {
-	return ping_proc_unregister(net, &ping_v6_seq_afinfo);
+	remove_proc_entry("icmp6", net->proc_net);
 }
 
 static struct pernet_operations ping_v6_net_ops = {
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index a85f7e0..2356b4a 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -53,18 +53,6 @@
 	return 0;
 }
 
-static int sockstat6_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, sockstat6_seq_show);
-}
-
-static const struct file_operations sockstat6_seq_fops = {
-	.open	 = sockstat6_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release_net,
-};
-
 static const struct snmp_mib snmp6_ipstats_list[] = {
 /* ipv6 mib according to RFC 2465 */
 	SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS),
@@ -242,18 +230,6 @@
 	return 0;
 }
 
-static int snmp6_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, snmp6_seq_show);
-}
-
-static const struct file_operations snmp6_seq_fops = {
-	.open	 = snmp6_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release_net,
-};
-
 static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
 {
 	struct inet6_dev *idev = (struct inet6_dev *)seq->private;
@@ -267,18 +243,6 @@
 	return 0;
 }
 
-static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, snmp6_dev_seq_show, PDE_DATA(inode));
-}
-
-static const struct file_operations snmp6_dev_seq_fops = {
-	.open	 = snmp6_dev_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release,
-};
-
 int snmp6_register_dev(struct inet6_dev *idev)
 {
 	struct proc_dir_entry *p;
@@ -291,9 +255,8 @@
 	if (!net->mib.proc_net_devsnmp6)
 		return -ENOENT;
 
-	p = proc_create_data(idev->dev->name, 0444,
-			     net->mib.proc_net_devsnmp6,
-			     &snmp6_dev_seq_fops, idev);
+	p = proc_create_single_data(idev->dev->name, 0444,
+			net->mib.proc_net_devsnmp6, snmp6_dev_seq_show, idev);
 	if (!p)
 		return -ENOMEM;
 
@@ -315,11 +278,12 @@
 
 static int __net_init ipv6_proc_init_net(struct net *net)
 {
-	if (!proc_create("sockstat6", 0444, net->proc_net,
-			 &sockstat6_seq_fops))
+	if (!proc_create_net_single("sockstat6", 0444, net->proc_net,
+			sockstat6_seq_show, NULL))
 		return -ENOMEM;
 
-	if (!proc_create("snmp6", 0444, net->proc_net, &snmp6_seq_fops))
+	if (!proc_create_net_single("snmp6", 0444, net->proc_net,
+			snmp6_seq_show, NULL))
 		goto proc_snmp6_fail;
 
 	net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5eb9b08..ce6f0d1 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1304,21 +1304,10 @@
 	.show =		raw6_seq_show,
 };
 
-static int raw6_seq_open(struct inode *inode, struct file *file)
-{
-	return raw_seq_open(inode, file, &raw_v6_hashinfo, &raw6_seq_ops);
-}
-
-static const struct file_operations raw6_seq_fops = {
-	.open =		raw6_seq_open,
-	.read =		seq_read,
-	.llseek =	seq_lseek,
-	.release =	seq_release_net,
-};
-
 static int __net_init raw6_init_net(struct net *net)
 {
-	if (!proc_create("raw6", 0444, net->proc_net, &raw6_seq_fops))
+	if (!proc_create_net_data("raw6", 0444, net->proc_net, &raw6_seq_ops,
+			sizeof(struct raw_iter_state), &raw_v6_hashinfo))
 		return -ENOMEM;
 
 	return 0;
@@ -1345,7 +1334,7 @@
 }
 #endif	/* CONFIG_PROC_FS */
 
-/* Same as inet6_dgram_ops, sans udp_poll.  */
+/* Same as inet6_dgram_ops, sans udp_poll_mask.  */
 const struct proto_ops inet6_sockraw_ops = {
 	.family		   = PF_INET6,
 	.owner		   = THIS_MODULE,
@@ -1355,7 +1344,7 @@
 	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
 	.accept		   = sock_no_accept,		/* a do nothing	*/
 	.getname	   = inet6_getname,
-	.poll		   = datagram_poll,		/* ok		*/
+	.poll_mask	   = datagram_poll_mask,	/* ok		*/
 	.ioctl		   = inet6_ioctl,		/* must change  */
 	.listen		   = sock_no_listen,		/* ok		*/
 	.shutdown	   = inet_shutdown,		/* ok		*/
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1dc9871..fb95698 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5013,14 +5013,6 @@
  */
 
 #ifdef CONFIG_PROC_FS
-
-static const struct file_operations ipv6_route_proc_fops = {
-	.open		= ipv6_route_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = (struct net *)seq->private;
@@ -5035,18 +5027,6 @@
 
 	return 0;
 }
-
-static int rt6_stats_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, rt6_stats_seq_show);
-}
-
-static const struct file_operations rt6_stats_seq_fops = {
-	.open	 = rt6_stats_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release_net,
-};
 #endif	/* CONFIG_PROC_FS */
 
 #ifdef CONFIG_SYSCTL
@@ -5260,8 +5240,10 @@
 static int __net_init ip6_route_net_init_late(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
-	proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
+	proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
+			sizeof(struct ipv6_route_iter));
+	proc_create_net_single("rt6_stats", 0444, net->proc_net,
+			rt6_stats_seq_show, NULL);
 #endif
 	return 0;
 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8764a63..b620d9b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1913,30 +1913,28 @@
 	return 0;
 }
 
-static const struct file_operations tcp6_afinfo_seq_fops = {
-	.open    = tcp_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net
+static const struct seq_operations tcp6_seq_ops = {
+	.show		= tcp6_seq_show,
+	.start		= tcp_seq_start,
+	.next		= tcp_seq_next,
+	.stop		= tcp_seq_stop,
 };
 
 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
-	.name		= "tcp6",
 	.family		= AF_INET6,
-	.seq_fops	= &tcp6_afinfo_seq_fops,
-	.seq_ops	= {
-		.show		= tcp6_seq_show,
-	},
 };
 
 int __net_init tcp6_proc_init(struct net *net)
 {
-	return tcp_proc_register(net, &tcp6_seq_afinfo);
+	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
+			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
+		return -ENOMEM;
+	return 0;
 }
 
 void tcp6_proc_exit(struct net *net)
 {
-	tcp_proc_unregister(net, &tcp6_seq_afinfo);
+	remove_proc_entry("tcp6", net->proc_net);
 }
 #endif
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 19d7d4c..164afd3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1528,31 +1528,30 @@
 	return 0;
 }
 
-static const struct file_operations udp6_afinfo_seq_fops = {
-	.open     = udp_seq_open,
-	.read     = seq_read,
-	.llseek   = seq_lseek,
-	.release  = seq_release_net
+const struct seq_operations udp6_seq_ops = {
+	.start		= udp_seq_start,
+	.next		= udp_seq_next,
+	.stop		= udp_seq_stop,
+	.show		= udp6_seq_show,
 };
+EXPORT_SYMBOL(udp6_seq_ops);
 
 static struct udp_seq_afinfo udp6_seq_afinfo = {
-	.name		= "udp6",
 	.family		= AF_INET6,
 	.udp_table	= &udp_table,
-	.seq_fops	= &udp6_afinfo_seq_fops,
-	.seq_ops	= {
-		.show		= udp6_seq_show,
-	},
 };
 
 int __net_init udp6_proc_init(struct net *net)
 {
-	return udp_proc_register(net, &udp6_seq_afinfo);
+	if (!proc_create_net_data("udp6", 0444, net->proc_net, &udp6_seq_ops,
+			sizeof(struct udp_iter_state), &udp6_seq_afinfo))
+		return -ENOMEM;
+	return 0;
 }
 
 void udp6_proc_exit(struct net *net)
 {
-	udp_proc_unregister(net, &udp6_seq_afinfo);
+	remove_proc_entry("udp6", net->proc_net);
 }
 #endif /* CONFIG_PROC_FS */
 
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 14ae32b..5000ad6 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -12,6 +12,7 @@
  *		2 of the License, or (at your option) any later version.
  */
 #include <linux/export.h>
+#include <linux/proc_fs.h>
 #include "udp_impl.h"
 
 static int udplitev6_rcv(struct sk_buff *skb)
@@ -92,32 +93,23 @@
 }
 
 #ifdef CONFIG_PROC_FS
-
-static const struct file_operations udplite6_afinfo_seq_fops = {
-	.open     = udp_seq_open,
-	.read     = seq_read,
-	.llseek   = seq_lseek,
-	.release  = seq_release_net
-};
-
 static struct udp_seq_afinfo udplite6_seq_afinfo = {
-	.name		= "udplite6",
 	.family		= AF_INET6,
 	.udp_table	= &udplite_table,
-	.seq_fops	= &udplite6_afinfo_seq_fops,
-	.seq_ops	= {
-		.show		= udp6_seq_show,
-	},
 };
 
 static int __net_init udplite6_proc_init_net(struct net *net)
 {
-	return udp_proc_register(net, &udplite6_seq_afinfo);
+	if (!proc_create_net_data("udplite6", 0444, net->proc_net,
+			&udp6_seq_ops, sizeof(struct udp_iter_state),
+			&udplite6_seq_afinfo))
+		return -ENOMEM;
+	return 0;
 }
 
 static void __net_exit udplite6_proc_exit_net(struct net *net)
 {
-	udp_proc_unregister(net, &udplite6_seq_afinfo);
+	remove_proc_entry("udplite6", net->proc_net);
 }
 
 static struct pernet_operations udplite6_net_ops = {
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 893a022..68e8625 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1488,14 +1488,11 @@
 	return 0;
 }
 
-__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
-			    poll_table *wait)
+static __poll_t iucv_sock_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
 	__poll_t mask = 0;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
-
 	if (sk->sk_state == IUCV_LISTEN)
 		return iucv_accept_poll(sk);
 
@@ -2388,7 +2385,7 @@
 	.getname	= iucv_sock_getname,
 	.sendmsg	= iucv_sock_sendmsg,
 	.recvmsg	= iucv_sock_recvmsg,
-	.poll		= iucv_sock_poll,
+	.poll_mask	= iucv_sock_poll_mask,
 	.ioctl		= sock_no_ioctl,
 	.mmap		= sock_no_mmap,
 	.socketpair	= sock_no_socketpair,
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 1fac925..370da2f 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -15,12 +15,6 @@
 #include <net/tcp.h>
 
 #ifdef CONFIG_PROC_FS
-struct kcm_seq_muxinfo {
-	char				*name;
-	const struct file_operations	*seq_fops;
-	const struct seq_operations	seq_ops;
-};
-
 static struct kcm_mux *kcm_get_first(struct seq_file *seq)
 {
 	struct net *net = seq_file_net(seq);
@@ -86,14 +80,6 @@
 	int idx;
 };
 
-static int kcm_seq_open(struct inode *inode, struct file *file)
-{
-	struct kcm_seq_muxinfo *muxinfo = PDE_DATA(inode);
-
-	return seq_open_net(inode, file, &muxinfo->seq_ops,
-			   sizeof(struct kcm_proc_mux_state));
-}
-
 static void kcm_format_mux_header(struct seq_file *seq)
 {
 	struct net *net = seq_file_net(seq);
@@ -246,44 +232,13 @@
 	return 0;
 }
 
-static const struct file_operations kcm_seq_fops = {
-	.open		= kcm_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
+static const struct seq_operations kcm_seq_ops = {
+	.show	= kcm_seq_show,
+	.start	= kcm_seq_start,
+	.next	= kcm_seq_next,
+	.stop	= kcm_seq_stop,
 };
 
-static struct kcm_seq_muxinfo kcm_seq_muxinfo = {
-	.name		= "kcm",
-	.seq_fops	= &kcm_seq_fops,
-	.seq_ops	= {
-		.show	= kcm_seq_show,
-		.start	= kcm_seq_start,
-		.next	= kcm_seq_next,
-		.stop	= kcm_seq_stop,
-	}
-};
-
-static int kcm_proc_register(struct net *net, struct kcm_seq_muxinfo *muxinfo)
-{
-	struct proc_dir_entry *p;
-	int rc = 0;
-
-	p = proc_create_data(muxinfo->name, 0444, net->proc_net,
-			     muxinfo->seq_fops, muxinfo);
-	if (!p)
-		rc = -ENOMEM;
-	return rc;
-}
-EXPORT_SYMBOL(kcm_proc_register);
-
-static void kcm_proc_unregister(struct net *net,
-				struct kcm_seq_muxinfo *muxinfo)
-{
-	remove_proc_entry(muxinfo->name, net->proc_net);
-}
-EXPORT_SYMBOL(kcm_proc_unregister);
-
 static int kcm_stats_seq_show(struct seq_file *seq, void *v)
 {
 	struct kcm_psock_stats psock_stats;
@@ -390,30 +345,14 @@
 	return 0;
 }
 
-static int kcm_stats_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, kcm_stats_seq_show);
-}
-
-static const struct file_operations kcm_stats_seq_fops = {
-	.open    = kcm_stats_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = single_release_net,
-};
-
 static int kcm_proc_init_net(struct net *net)
 {
-	int err;
-
-	if (!proc_create("kcm_stats", 0444, net->proc_net,
-			 &kcm_stats_seq_fops)) {
-		err = -ENOMEM;
+	if (!proc_create_net_single("kcm_stats", 0444, net->proc_net,
+			 kcm_stats_seq_show, NULL))
 		goto out_kcm_stats;
-	}
 
-	err = kcm_proc_register(net, &kcm_seq_muxinfo);
-	if (err)
+	if (!proc_create_net("kcm", 0444, net->proc_net, &kcm_seq_ops,
+			sizeof(struct kcm_proc_mux_state)))
 		goto out_kcm;
 
 	return 0;
@@ -421,12 +360,12 @@
 out_kcm:
 	remove_proc_entry("kcm_stats", net->proc_net);
 out_kcm_stats:
-	return err;
+	return -ENOMEM;
 }
 
 static void kcm_proc_exit_net(struct net *net)
 {
-	kcm_proc_unregister(net, &kcm_seq_muxinfo);
+	remove_proc_entry("kcm", net->proc_net);
 	remove_proc_entry("kcm_stats", net->proc_net);
 }
 
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index d3601d4..84b7d5c 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1336,9 +1336,9 @@
 	struct list_head *head;
 	int index = 0;
 
-	/* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
-	 * we set sk_state, otherwise epoll_wait always returns right away with
-	 * EPOLLHUP
+	/* For SOCK_SEQPACKET sock type, datagram_poll_mask checks the sk_state,
+	 * so  we set sk_state, otherwise epoll_wait always returns right away
+	 * with EPOLLHUP
 	 */
 	kcm->sk.sk_state = TCP_ESTABLISHED;
 
@@ -1903,7 +1903,7 @@
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.getname =	sock_no_getname,
-	.poll =		datagram_poll,
+	.poll_mask =	datagram_poll_mask,
 	.ioctl =	kcm_ioctl,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
@@ -1924,7 +1924,7 @@
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.getname =	sock_no_getname,
-	.poll =		datagram_poll,
+	.poll_mask =	datagram_poll_mask,
 	.ioctl =	kcm_ioctl,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index e62e52e..8bdc1cb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3751,7 +3751,7 @@
 
 	/* Now the operations that really occur. */
 	.release	=	pfkey_release,
-	.poll		=	datagram_poll,
+	.poll_mask	=	datagram_poll_mask,
 	.sendmsg	=	pfkey_sendmsg,
 	.recvmsg	=	pfkey_recvmsg,
 };
@@ -3812,24 +3812,12 @@
 	.show	= pfkey_seq_show,
 };
 
-static int pfkey_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &pfkey_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations pfkey_proc_ops = {
-	.open	 = pfkey_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_net,
-};
-
 static int __net_init pfkey_init_proc(struct net *net)
 {
 	struct proc_dir_entry *e;
 
-	e = proc_create("pfkey", 0, net->proc_net, &pfkey_proc_ops);
+	e = proc_create_net("pfkey", 0, net->proc_net, &pfkey_seq_ops,
+			sizeof(struct seq_net_private));
 	if (e == NULL)
 		return -ENOMEM;
 
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index a9c05b2..181073b 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -613,7 +613,7 @@
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = sock_no_accept,
 	.getname	   = l2tp_ip_getname,
-	.poll		   = datagram_poll,
+	.poll_mask	   = datagram_poll_mask,
 	.ioctl		   = inet_ioctl,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 9573691..336e4c0 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -754,7 +754,7 @@
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = sock_no_accept,
 	.getname	   = l2tp_ip6_getname,
-	.poll		   = datagram_poll,
+	.poll_mask	   = datagram_poll_mask,
 	.ioctl		   = inet6_ioctl,
 	.listen		   = sock_no_listen,
 	.shutdown	   = inet_shutdown,
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index f1c9c32..b56cb1d 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1746,24 +1746,6 @@
 	.stop		= pppol2tp_seq_stop,
 	.show		= pppol2tp_seq_show,
 };
-
-/* Called when our /proc file is opened. We allocate data for use when
- * iterating our tunnel / session contexts and store it in the private
- * data of the seq_file.
- */
-static int pppol2tp_proc_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &pppol2tp_seq_ops,
-			    sizeof(struct pppol2tp_seq_data));
-}
-
-static const struct file_operations pppol2tp_proc_fops = {
-	.open		= pppol2tp_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
 #endif /* CONFIG_PROC_FS */
 
 /*****************************************************************************
@@ -1775,8 +1757,8 @@
 	struct proc_dir_entry *pde;
 	int err = 0;
 
-	pde = proc_create("pppol2tp", 0444, net->proc_net,
-			  &pppol2tp_proc_fops);
+	pde = proc_create_net("pppol2tp", 0444, net->proc_net,
+			&pppol2tp_seq_ops, sizeof(struct pppol2tp_seq_data));
 	if (!pde) {
 		err = -ENOMEM;
 		goto out;
@@ -1810,7 +1792,7 @@
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.getname	= pppol2tp_getname,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= pppol2tp_setsockopt,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 1beeea9..804de84 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1192,7 +1192,7 @@
 	.socketpair  = sock_no_socketpair,
 	.accept      = llc_ui_accept,
 	.getname     = llc_ui_getname,
-	.poll	     = datagram_poll,
+	.poll_mask   = datagram_poll_mask,
 	.ioctl       = llc_ui_ioctl,
 	.listen      = llc_ui_listen,
 	.shutdown    = llc_ui_shutdown,
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 62ea0ae..f3a36c1 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -214,30 +214,6 @@
 	.show   = llc_seq_core_show,
 };
 
-static int llc_seq_socket_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &llc_seq_socket_ops);
-}
-
-static int llc_seq_core_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &llc_seq_core_ops);
-}
-
-static const struct file_operations llc_seq_socket_fops = {
-	.open		= llc_seq_socket_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static const struct file_operations llc_seq_core_fops = {
-	.open		= llc_seq_core_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 static struct proc_dir_entry *llc_proc_dir;
 
 int __init llc_proc_init(void)
@@ -249,11 +225,11 @@
 	if (!llc_proc_dir)
 		goto out;
 
-	p = proc_create("socket", 0444, llc_proc_dir, &llc_seq_socket_fops);
+	p = proc_create_seq("socket", 0444, llc_proc_dir, &llc_seq_socket_ops);
 	if (!p)
 		goto out_socket;
 
-	p = proc_create("core", 0444, llc_proc_dir, &llc_seq_core_fops);
+	p = proc_create_seq("core", 0444, llc_proc_dir, &llc_seq_core_ops);
 	if (!p)
 		goto out_core;
 
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 12d7489..7588aea 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -591,25 +591,13 @@
 	.stop  = ip_vs_app_seq_stop,
 	.show  = ip_vs_app_seq_show,
 };
-
-static int ip_vs_app_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ip_vs_app_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations ip_vs_app_fops = {
-	.open	 = ip_vs_app_open,
-	.read	 = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
 #endif
 
 int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
 {
 	INIT_LIST_HEAD(&ipvs->app_list);
-	proc_create("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_fops);
+	proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops,
+			sizeof(struct seq_net_private));
 	return 0;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 75de465..61c3a38 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1131,19 +1131,6 @@
 	.show  = ip_vs_conn_seq_show,
 };
 
-static int ip_vs_conn_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
-			    sizeof(struct ip_vs_iter_state));
-}
-
-static const struct file_operations ip_vs_conn_fops = {
-	.open    = ip_vs_conn_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
 static const char *ip_vs_origin_name(unsigned int flags)
 {
 	if (flags & IP_VS_CONN_F_SYNC)
@@ -1207,20 +1194,6 @@
 	.stop  = ip_vs_conn_seq_stop,
 	.show  = ip_vs_conn_sync_seq_show,
 };
-
-static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
-			    sizeof(struct ip_vs_iter_state));
-}
-
-static const struct file_operations ip_vs_conn_sync_fops = {
-	.open    = ip_vs_conn_sync_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
 #endif
 
 
@@ -1380,9 +1353,11 @@
 {
 	atomic_set(&ipvs->conn_count, 0);
 
-	proc_create("ip_vs_conn", 0, ipvs->net->proc_net, &ip_vs_conn_fops);
-	proc_create("ip_vs_conn_sync", 0, ipvs->net->proc_net,
-		    &ip_vs_conn_sync_fops);
+	proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net,
+			&ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state));
+	proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net,
+			&ip_vs_conn_sync_seq_ops,
+			sizeof(struct ip_vs_iter_state));
 	return 0;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index af89bb5..0c03c0e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2113,19 +2113,6 @@
 	.show  = ip_vs_info_seq_show,
 };
 
-static int ip_vs_info_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ip_vs_info_seq_ops,
-			sizeof(struct ip_vs_iter));
-}
-
-static const struct file_operations ip_vs_info_fops = {
-	.open    = ip_vs_info_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq_file_single_net(seq);
@@ -2158,18 +2145,6 @@
 	return 0;
 }
 
-static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, ip_vs_stats_show);
-}
-
-static const struct file_operations ip_vs_stats_fops = {
-	.open = ip_vs_stats_seq_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release_net,
-};
-
 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq_file_single_net(seq);
@@ -2225,18 +2200,6 @@
 
 	return 0;
 }
-
-static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, ip_vs_stats_percpu_show);
-}
-
-static const struct file_operations ip_vs_stats_percpu_fops = {
-	.open = ip_vs_stats_percpu_seq_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release_net,
-};
 #endif
 
 /*
@@ -4043,10 +4006,12 @@
 
 	spin_lock_init(&ipvs->tot_stats.lock);
 
-	proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops);
-	proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops);
-	proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
-		    &ip_vs_stats_percpu_fops);
+	proc_create_net("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_seq_ops,
+			sizeof(struct ip_vs_iter));
+	proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net,
+			ip_vs_stats_show, NULL);
+	proc_create_net_single("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
+			ip_vs_stats_percpu_show, NULL);
 
 	if (ip_vs_control_net_init_sysctl(ipvs))
 		goto err;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4b2b3d5..853b232 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -644,19 +644,6 @@
 	.stop = exp_seq_stop,
 	.show = exp_seq_show
 };
-
-static int exp_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &exp_seq_ops,
-			sizeof(struct ct_expect_iter_state));
-}
-
-static const struct file_operations exp_file_ops = {
-	.open    = exp_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
 
 static int exp_proc_init(struct net *net)
@@ -666,8 +653,8 @@
 	kuid_t root_uid;
 	kgid_t root_gid;
 
-	proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
-			   &exp_file_ops);
+	proc = proc_create_net("nf_conntrack_expect", 0440, net->proc_net,
+			&exp_seq_ops, sizeof(struct ct_expect_iter_state));
 	if (!proc)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 037fec5..b642c0b 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -375,19 +375,6 @@
 	.show  = ct_seq_show
 };
 
-static int ct_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ct_seq_ops,
-			sizeof(struct ct_iter_state));
-}
-
-static const struct file_operations ct_file_ops = {
-	.open    = ct_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
 static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct net *net = seq_file_net(seq);
@@ -467,26 +454,14 @@
 	.show	= ct_cpu_seq_show,
 };
 
-static int ct_cpu_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ct_cpu_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations ct_cpu_seq_fops = {
-	.open	 = ct_cpu_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_net,
-};
-
 static int nf_conntrack_standalone_init_proc(struct net *net)
 {
 	struct proc_dir_entry *pde;
 	kuid_t root_uid;
 	kgid_t root_gid;
 
-	pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops);
+	pde = proc_create_net("nf_conntrack", 0440, net->proc_net, &ct_seq_ops,
+			sizeof(struct ct_iter_state));
 	if (!pde)
 		goto out_nf_conntrack;
 
@@ -495,8 +470,8 @@
 	if (uid_valid(root_uid) && gid_valid(root_gid))
 		proc_set_user(pde, root_uid, root_gid);
 
-	pde = proc_create("nf_conntrack", 0444, net->proc_net_stat,
-			  &ct_cpu_seq_fops);
+	pde = proc_create_net("nf_conntrack", 0444, net->proc_net_stat,
+			&ct_cpu_seq_ops, sizeof(struct seq_net_private));
 	if (!pde)
 		goto out_stat_nf_conntrack;
 	return 0;
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 6d03578..4264570 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -394,21 +394,6 @@
 	.stop	= seq_stop,
 	.show	= seq_show,
 };
-
-static int nflog_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &nflog_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations nflog_file_ops = {
-	.open	 = nflog_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_net,
-};
-
-
 #endif /* PROC_FS */
 
 #ifdef CONFIG_SYSCTL
@@ -549,8 +534,8 @@
 	int ret = -ENOMEM;
 
 #ifdef CONFIG_PROC_FS
-	if (!proc_create("nf_log", 0444,
-			 net->nf.proc_netfilter, &nflog_file_ops))
+	if (!proc_create_net("nf_log", 0444, net->nf.proc_netfilter,
+			&nflog_seq_ops, sizeof(struct seq_net_private)))
 		return ret;
 #endif
 	ret = netfilter_log_sysctl_init(net);
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 6039b35..8ff4d22 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -310,23 +310,10 @@
 	.show		= synproxy_cpu_seq_show,
 };
 
-static int synproxy_cpu_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &synproxy_cpu_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations synproxy_cpu_seq_fops = {
-	.open		= synproxy_cpu_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
 static int __net_init synproxy_proc_init(struct net *net)
 {
-	if (!proc_create("synproxy", 0444, net->proc_net_stat,
-			 &synproxy_cpu_seq_fops))
+	if (!proc_create_net("synproxy", 0444, net->proc_net_stat,
+			&synproxy_cpu_seq_ops, sizeof(struct seq_net_private)))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e5cc4d9..332c69d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1044,20 +1044,6 @@
 	.stop	= seq_stop,
 	.show	= seq_show,
 };
-
-static int nful_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &nful_seq_ops,
-			    sizeof(struct iter_state));
-}
-
-static const struct file_operations nful_file_ops = {
-	.open	 = nful_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_net,
-};
-
 #endif /* PROC_FS */
 
 static int __net_init nfnl_log_net_init(struct net *net)
@@ -1075,8 +1061,8 @@
 	spin_lock_init(&log->instances_lock);
 
 #ifdef CONFIG_PROC_FS
-	proc = proc_create("nfnetlink_log", 0440,
-			   net->nf.proc_netfilter, &nful_file_ops);
+	proc = proc_create_net("nfnetlink_log", 0440, net->nf.proc_netfilter,
+			&nful_seq_ops, sizeof(struct iter_state));
 	if (!proc)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 2c17304..4ccd298 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1489,20 +1489,6 @@
 	.stop	= seq_stop,
 	.show	= seq_show,
 };
-
-static int nfqnl_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &nfqnl_seq_ops,
-			sizeof(struct iter_state));
-}
-
-static const struct file_operations nfqnl_file_ops = {
-	.open	 = nfqnl_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_net,
-};
-
 #endif /* PROC_FS */
 
 static int __net_init nfnl_queue_net_init(struct net *net)
@@ -1516,8 +1502,8 @@
 	spin_lock_init(&q->instances_lock);
 
 #ifdef CONFIG_PROC_FS
-	if (!proc_create("nfnetlink_queue", 0440,
-			 net->nf.proc_netfilter, &nfqnl_file_ops))
+	if (!proc_create_net("nfnetlink_queue", 0440, net->nf.proc_netfilter,
+			&nfqnl_seq_ops, sizeof(struct iter_state)))
 		return -ENOMEM;
 #endif
 	nf_register_queue_handler(net, &nfqh);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index cb7cb30..df9ab71 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1420,7 +1420,7 @@
 
 #ifdef CONFIG_AUDIT
 	if (audit_enabled) {
-		audit_log(current->audit_context, GFP_KERNEL,
+		audit_log(audit_context(), GFP_KERNEL,
 			  AUDIT_NETFILTER_CFG,
 			  "table=%s family=%u entries=%u",
 			  table->name, table->af, private->number);
@@ -1495,15 +1495,10 @@
 EXPORT_SYMBOL_GPL(xt_unregister_table);
 
 #ifdef CONFIG_PROC_FS
-struct xt_names_priv {
-	struct seq_net_private p;
-	u_int8_t af;
-};
 static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct xt_names_priv *priv = seq->private;
 	struct net *net = seq_file_net(seq);
-	u_int8_t af = priv->af;
+	u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
 
 	mutex_lock(&xt[af].mutex);
 	return seq_list_start(&net->xt.tables[af], *pos);
@@ -1511,17 +1506,15 @@
 
 static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct xt_names_priv *priv = seq->private;
 	struct net *net = seq_file_net(seq);
-	u_int8_t af = priv->af;
+	u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
 
 	return seq_list_next(v, &net->xt.tables[af], pos);
 }
 
 static void xt_table_seq_stop(struct seq_file *seq, void *v)
 {
-	struct xt_names_priv *priv = seq->private;
-	u_int8_t af = priv->af;
+	u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file));
 
 	mutex_unlock(&xt[af].mutex);
 }
@@ -1542,34 +1535,13 @@
 	.show	= xt_table_seq_show,
 };
 
-static int xt_table_open(struct inode *inode, struct file *file)
-{
-	int ret;
-	struct xt_names_priv *priv;
-
-	ret = seq_open_net(inode, file, &xt_table_seq_ops,
-			   sizeof(struct xt_names_priv));
-	if (!ret) {
-		priv = ((struct seq_file *)file->private_data)->private;
-		priv->af = (unsigned long)PDE_DATA(inode);
-	}
-	return ret;
-}
-
-static const struct file_operations xt_table_ops = {
-	.open	 = xt_table_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_net,
-};
-
 /*
  * Traverse state for ip{,6}_{tables,matches} for helping crossing
  * the multi-AF mutexes.
  */
 struct nf_mttg_trav {
 	struct list_head *head, *curr;
-	uint8_t class, nfproto;
+	uint8_t class;
 };
 
 enum {
@@ -1586,6 +1558,7 @@
 		[MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC,
 		[MTTG_TRAV_NFP_SPEC]   = MTTG_TRAV_DONE,
 	};
+	uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
 	struct nf_mttg_trav *trav = seq->private;
 
 	switch (trav->class) {
@@ -1600,9 +1573,9 @@
 		if (trav->curr != trav->head)
 			break;
 		mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
-		mutex_lock(&xt[trav->nfproto].mutex);
+		mutex_lock(&xt[nfproto].mutex);
 		trav->head = trav->curr = is_target ?
-			&xt[trav->nfproto].target : &xt[trav->nfproto].match;
+			&xt[nfproto].target : &xt[nfproto].match;
 		trav->class = next_class[trav->class];
 		break;
 	case MTTG_TRAV_NFP_SPEC:
@@ -1634,6 +1607,7 @@
 
 static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
 {
+	uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file));
 	struct nf_mttg_trav *trav = seq->private;
 
 	switch (trav->class) {
@@ -1641,7 +1615,7 @@
 		mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
 		break;
 	case MTTG_TRAV_NFP_SPEC:
-		mutex_unlock(&xt[trav->nfproto].mutex);
+		mutex_unlock(&xt[nfproto].mutex);
 		break;
 	}
 }
@@ -1680,24 +1654,6 @@
 	.show	= xt_match_seq_show,
 };
 
-static int xt_match_open(struct inode *inode, struct file *file)
-{
-	struct nf_mttg_trav *trav;
-	trav = __seq_open_private(file, &xt_match_seq_ops, sizeof(*trav));
-	if (!trav)
-		return -ENOMEM;
-
-	trav->nfproto = (unsigned long)PDE_DATA(inode);
-	return 0;
-}
-
-static const struct file_operations xt_match_ops = {
-	.open	 = xt_match_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_private,
-};
-
 static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	return xt_mttg_seq_start(seq, pos, true);
@@ -1732,24 +1688,6 @@
 	.show	= xt_target_seq_show,
 };
 
-static int xt_target_open(struct inode *inode, struct file *file)
-{
-	struct nf_mttg_trav *trav;
-	trav = __seq_open_private(file, &xt_target_seq_ops, sizeof(*trav));
-	if (!trav)
-		return -ENOMEM;
-
-	trav->nfproto = (unsigned long)PDE_DATA(inode);
-	return 0;
-}
-
-static const struct file_operations xt_target_ops = {
-	.open	 = xt_target_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_private,
-};
-
 #define FORMAT_TABLES	"_tables_names"
 #define	FORMAT_MATCHES	"_tables_matches"
 #define FORMAT_TARGETS 	"_tables_targets"
@@ -1813,8 +1751,9 @@
 
 	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
-	proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops,
-				(void *)(unsigned long)af);
+	proc = proc_create_net_data(buf, 0440, net->proc_net, &xt_table_seq_ops,
+			sizeof(struct seq_net_private),
+			(void *)(unsigned long)af);
 	if (!proc)
 		goto out;
 	if (uid_valid(root_uid) && gid_valid(root_gid))
@@ -1822,8 +1761,9 @@
 
 	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
-	proc = proc_create_data(buf, 0440, net->proc_net, &xt_match_ops,
-				(void *)(unsigned long)af);
+	proc = proc_create_seq_private(buf, 0440, net->proc_net,
+			&xt_match_seq_ops, sizeof(struct nf_mttg_trav),
+			(void *)(unsigned long)af);
 	if (!proc)
 		goto out_remove_tables;
 	if (uid_valid(root_uid) && gid_valid(root_gid))
@@ -1831,8 +1771,9 @@
 
 	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
-	proc = proc_create_data(buf, 0440, net->proc_net, &xt_target_ops,
-				(void *)(unsigned long)af);
+	proc = proc_create_seq_private(buf, 0440, net->proc_net,
+			 &xt_target_seq_ops, sizeof(struct nf_mttg_trav),
+			 (void *)(unsigned long)af);
 	if (!proc)
 		goto out_remove_matches;
 	if (uid_valid(root_uid) && gid_valid(root_gid))
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 0cd7356..9b16402 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -57,9 +57,9 @@
 }
 
 /* need to declare this at the top */
-static const struct file_operations dl_file_ops_v2;
-static const struct file_operations dl_file_ops_v1;
-static const struct file_operations dl_file_ops;
+static const struct seq_operations dl_seq_ops_v2;
+static const struct seq_operations dl_seq_ops_v1;
+static const struct seq_operations dl_seq_ops;
 
 /* hash table crap */
 struct dsthash_dst {
@@ -272,7 +272,7 @@
 {
 	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
-	const struct file_operations *fops;
+	const struct seq_operations *ops;
 	unsigned int size, i;
 	int ret;
 
@@ -321,19 +321,19 @@
 
 	switch (revision) {
 	case 1:
-		fops = &dl_file_ops_v1;
+		ops = &dl_seq_ops_v1;
 		break;
 	case 2:
-		fops = &dl_file_ops_v2;
+		ops = &dl_seq_ops_v2;
 		break;
 	default:
-		fops = &dl_file_ops;
+		ops = &dl_seq_ops;
 	}
 
-	hinfo->pde = proc_create_data(name, 0,
+	hinfo->pde = proc_create_seq_data(name, 0,
 		(family == NFPROTO_IPV4) ?
 		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
-		fops, hinfo);
+		ops, hinfo);
 	if (hinfo->pde == NULL) {
 		kfree(hinfo->name);
 		vfree(hinfo);
@@ -1057,7 +1057,7 @@
 static void *dl_seq_start(struct seq_file *s, loff_t *pos)
 	__acquires(htable->lock)
 {
-	struct xt_hashlimit_htable *htable = s->private;
+	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
 	unsigned int *bucket;
 
 	spin_lock_bh(&htable->lock);
@@ -1074,7 +1074,7 @@
 
 static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
-	struct xt_hashlimit_htable *htable = s->private;
+	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
 	unsigned int *bucket = v;
 
 	*pos = ++(*bucket);
@@ -1088,7 +1088,7 @@
 static void dl_seq_stop(struct seq_file *s, void *v)
 	__releases(htable->lock)
 {
-	struct xt_hashlimit_htable *htable = s->private;
+	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
 	unsigned int *bucket = v;
 
 	if (!IS_ERR(bucket))
@@ -1130,7 +1130,7 @@
 static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family,
 			       struct seq_file *s)
 {
-	const struct xt_hashlimit_htable *ht = s->private;
+	struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private));
 
 	spin_lock(&ent->lock);
 	/* recalculate to show accurate numbers */
@@ -1145,7 +1145,7 @@
 static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
 			       struct seq_file *s)
 {
-	const struct xt_hashlimit_htable *ht = s->private;
+	struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private));
 
 	spin_lock(&ent->lock);
 	/* recalculate to show accurate numbers */
@@ -1160,7 +1160,7 @@
 static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 			    struct seq_file *s)
 {
-	const struct xt_hashlimit_htable *ht = s->private;
+	struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->private));
 
 	spin_lock(&ent->lock);
 	/* recalculate to show accurate numbers */
@@ -1174,7 +1174,7 @@
 
 static int dl_seq_show_v2(struct seq_file *s, void *v)
 {
-	struct xt_hashlimit_htable *htable = s->private;
+	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
 	unsigned int *bucket = (unsigned int *)v;
 	struct dsthash_ent *ent;
 
@@ -1188,7 +1188,7 @@
 
 static int dl_seq_show_v1(struct seq_file *s, void *v)
 {
-	struct xt_hashlimit_htable *htable = s->private;
+	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
 	unsigned int *bucket = v;
 	struct dsthash_ent *ent;
 
@@ -1202,7 +1202,7 @@
 
 static int dl_seq_show(struct seq_file *s, void *v)
 {
-	struct xt_hashlimit_htable *htable = s->private;
+	struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->private));
 	unsigned int *bucket = v;
 	struct dsthash_ent *ent;
 
@@ -1235,62 +1235,6 @@
 	.show  = dl_seq_show
 };
 
-static int dl_proc_open_v2(struct inode *inode, struct file *file)
-{
-	int ret = seq_open(file, &dl_seq_ops_v2);
-
-	if (!ret) {
-		struct seq_file *sf = file->private_data;
-
-		sf->private = PDE_DATA(inode);
-	}
-	return ret;
-}
-
-static int dl_proc_open_v1(struct inode *inode, struct file *file)
-{
-	int ret = seq_open(file, &dl_seq_ops_v1);
-
-	if (!ret) {
-		struct seq_file *sf = file->private_data;
-		sf->private = PDE_DATA(inode);
-	}
-	return ret;
-}
-
-static int dl_proc_open(struct inode *inode, struct file *file)
-{
-	int ret = seq_open(file, &dl_seq_ops);
-
-	if (!ret) {
-		struct seq_file *sf = file->private_data;
-
-		sf->private = PDE_DATA(inode);
-	}
-	return ret;
-}
-
-static const struct file_operations dl_file_ops_v2 = {
-	.open    = dl_proc_open_v2,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release
-};
-
-static const struct file_operations dl_file_ops_v1 = {
-	.open    = dl_proc_open_v1,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release
-};
-
-static const struct file_operations dl_file_ops = {
-	.open    = dl_proc_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release
-};
-
 static int __net_init hashlimit_proc_net_init(struct net *net)
 {
 	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 9bbfc17..07085c2 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -184,8 +184,7 @@
 	}
 
 	nstamps_max += 1;
-	e = kmalloc(sizeof(*e) + sizeof(e->stamps[0]) * nstamps_max,
-		    GFP_ATOMIC);
+	e = kmalloc(struct_size(e, stamps, nstamps_max), GFP_ATOMIC);
 	if (e == NULL)
 		return NULL;
 	memcpy(&e->addr, addr, sizeof(e->addr));
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 58495f4..2f328af 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -104,7 +104,7 @@
 	if (audit_enabled == 0)
 		return NULL;
 
-	audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, type);
+	audit_buf = audit_log_start(audit_context(), GFP_ATOMIC, type);
 	if (audit_buf == NULL)
 		return NULL;
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2e2dd88..1189b84 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2635,21 +2635,6 @@
 	.stop   = netlink_seq_stop,
 	.show   = netlink_seq_show,
 };
-
-
-static int netlink_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &netlink_seq_ops,
-				sizeof(struct nl_seq_iter));
-}
-
-static const struct file_operations netlink_seq_fops = {
-	.open		= netlink_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
 #endif
 
 int netlink_register_notifier(struct notifier_block *nb)
@@ -2673,7 +2658,7 @@
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.getname =	netlink_getname,
-	.poll =		datagram_poll,
+	.poll_mask =	datagram_poll_mask,
 	.ioctl =	netlink_ioctl,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
@@ -2694,7 +2679,8 @@
 static int __net_init netlink_net_init(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	if (!proc_create("netlink", 0, net->proc_net, &netlink_seq_fops))
+	if (!proc_create_net("netlink", 0, net->proc_net, &netlink_seq_ops,
+			sizeof(struct nl_seq_iter)))
 		return -ENOMEM;
 #endif
 	return 0;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 4221d98..b97eb76 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1338,18 +1338,6 @@
 	.stop = nr_info_stop,
 	.show = nr_info_show,
 };
-
-static int nr_info_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &nr_info_seqops);
-}
-
-static const struct file_operations nr_info_fops = {
-	.open = nr_info_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
 #endif	/* CONFIG_PROC_FS */
 
 static const struct net_proto_family nr_family_ops = {
@@ -1367,7 +1355,7 @@
 	.socketpair	=	sock_no_socketpair,
 	.accept		=	nr_accept,
 	.getname	=	nr_getname,
-	.poll		=	datagram_poll,
+	.poll_mask	=	datagram_poll_mask,
 	.ioctl		=	nr_ioctl,
 	.listen		=	nr_listen,
 	.shutdown	=	sock_no_shutdown,
@@ -1450,9 +1438,9 @@
 
 	nr_loopback_init();
 
-	proc_create("nr", 0444, init_net.proc_net, &nr_info_fops);
-	proc_create("nr_neigh", 0444, init_net.proc_net, &nr_neigh_fops);
-	proc_create("nr_nodes", 0444, init_net.proc_net, &nr_nodes_fops);
+	proc_create_seq("nr", 0444, init_net.proc_net, &nr_info_seqops);
+	proc_create_seq("nr_neigh", 0444, init_net.proc_net, &nr_neigh_seqops);
+	proc_create_seq("nr_nodes", 0444, init_net.proc_net, &nr_node_seqops);
 out:
 	return rc;
 fail:
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index b5a7dcb..6485f59 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -888,25 +888,13 @@
 	return 0;
 }
 
-static const struct seq_operations nr_node_seqops = {
+const struct seq_operations nr_node_seqops = {
 	.start = nr_node_start,
 	.next = nr_node_next,
 	.stop = nr_node_stop,
 	.show = nr_node_show,
 };
 
-static int nr_node_info_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &nr_node_seqops);
-}
-
-const struct file_operations nr_nodes_fops = {
-	.open = nr_node_info_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
-
 static void *nr_neigh_start(struct seq_file *seq, loff_t *pos)
 {
 	spin_lock_bh(&nr_neigh_list_lock);
@@ -954,25 +942,12 @@
 	return 0;
 }
 
-static const struct seq_operations nr_neigh_seqops = {
+const struct seq_operations nr_neigh_seqops = {
 	.start = nr_neigh_start,
 	.next = nr_neigh_next,
 	.stop = nr_neigh_stop,
 	.show = nr_neigh_show,
 };
-
-static int nr_neigh_info_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &nr_neigh_seqops);
-}
-
-const struct file_operations nr_neigh_fops = {
-	.open = nr_neigh_info_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
-
 #endif
 
 /*
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index ea0c0c6..ab5bb14 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -548,16 +548,13 @@
 	return 0;
 }
 
-static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
-				   poll_table *wait)
+static __poll_t llcp_sock_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
 	__poll_t mask = 0;
 
 	pr_debug("%p\n", sk);
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
-
 	if (sk->sk_state == LLCP_LISTEN)
 		return llcp_accept_poll(sk);
 
@@ -899,7 +896,7 @@
 	.socketpair     = sock_no_socketpair,
 	.accept         = llcp_sock_accept,
 	.getname        = llcp_sock_getname,
-	.poll           = llcp_sock_poll,
+	.poll_mask      = llcp_sock_poll_mask,
 	.ioctl          = sock_no_ioctl,
 	.listen         = llcp_sock_listen,
 	.shutdown       = sock_no_shutdown,
@@ -919,7 +916,7 @@
 	.socketpair     = sock_no_socketpair,
 	.accept         = sock_no_accept,
 	.getname        = llcp_sock_getname,
-	.poll           = llcp_sock_poll,
+	.poll_mask      = llcp_sock_poll_mask,
 	.ioctl          = sock_no_ioctl,
 	.listen         = sock_no_listen,
 	.shutdown       = sock_no_shutdown,
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index e2188de..60c3225 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -284,7 +284,7 @@
 	.socketpair     = sock_no_socketpair,
 	.accept         = sock_no_accept,
 	.getname        = sock_no_getname,
-	.poll           = datagram_poll,
+	.poll_mask      = datagram_poll_mask,
 	.ioctl          = sock_no_ioctl,
 	.listen         = sock_no_listen,
 	.shutdown       = sock_no_shutdown,
@@ -304,7 +304,7 @@
 	.socketpair     = sock_no_socketpair,
 	.accept         = sock_no_accept,
 	.getname        = sock_no_getname,
-	.poll           = datagram_poll,
+	.poll_mask      = datagram_poll_mask,
 	.ioctl          = sock_no_ioctl,
 	.listen         = sock_no_listen,
 	.shutdown       = sock_no_shutdown,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 082f981..54ce66f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4078,12 +4078,11 @@
 	return 0;
 }
 
-static __poll_t packet_poll(struct file *file, struct socket *sock,
-				poll_table *wait)
+static __poll_t packet_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
 	struct packet_sock *po = pkt_sk(sk);
-	__poll_t mask = datagram_poll(file, sock, wait);
+	__poll_t mask = datagram_poll_mask(sock, events);
 
 	spin_lock_bh(&sk->sk_receive_queue.lock);
 	if (po->rx_ring.pg_vec) {
@@ -4425,7 +4424,7 @@
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.getname =	packet_getname_spkt,
-	.poll =		datagram_poll,
+	.poll_mask =	datagram_poll_mask,
 	.ioctl =	packet_ioctl,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
@@ -4446,7 +4445,7 @@
 	.socketpair =	sock_no_socketpair,
 	.accept =	sock_no_accept,
 	.getname =	packet_getname,
-	.poll =		packet_poll,
+	.poll_mask =	packet_poll_mask,
 	.ioctl =	packet_ioctl,
 	.listen =	sock_no_listen,
 	.shutdown =	sock_no_shutdown,
@@ -4524,20 +4523,6 @@
 	.stop	= packet_seq_stop,
 	.show	= packet_seq_show,
 };
-
-static int packet_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &packet_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations packet_seq_fops = {
-	.open		= packet_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
 #endif
 
 static int __net_init packet_net_init(struct net *net)
@@ -4545,7 +4530,8 @@
 	mutex_init(&net->packet.sklist_lock);
 	INIT_HLIST_HEAD(&net->packet.sklist);
 
-	if (!proc_create("packet", 0, net->proc_net, &packet_seq_fops))
+	if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
+			sizeof(struct seq_net_private)))
 		return -ENOMEM;
 
 	return 0;
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 7778751..6cb4f60 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -320,7 +320,8 @@
 {
 	struct phonet_net *pnn = phonet_pernet(net);
 
-	if (!proc_create("phonet", 0, net->proc_net, &pn_sock_seq_fops))
+	if (!proc_create_net("phonet", 0, net->proc_net, &pn_sock_seq_ops,
+			sizeof(struct seq_net_private)))
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&pnn->pndevs.list);
@@ -351,7 +352,8 @@
 	if (err)
 		return err;
 
-	proc_create("pnresource", 0, init_net.proc_net, &pn_res_seq_fops);
+	proc_create_net("pnresource", 0, init_net.proc_net, &pn_res_seq_ops,
+			sizeof(struct seq_net_private));
 	register_netdevice_notifier(&phonet_device_notifier);
 	err = phonet_netlink_register();
 	if (err)
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index f9b40e6..c295c4e 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -340,15 +340,12 @@
 	return sizeof(struct sockaddr_pn);
 }
 
-static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
-					poll_table *wait)
+static __poll_t pn_socket_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
 	struct pep_sock *pn = pep_sk(sk);
 	__poll_t mask = 0;
 
-	poll_wait(file, sk_sleep(sk), wait);
-
 	if (sk->sk_state == TCP_CLOSE)
 		return EPOLLERR;
 	if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -448,7 +445,7 @@
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.getname	= pn_socket_getname,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.ioctl		= pn_socket_ioctl,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
@@ -473,7 +470,7 @@
 	.socketpair	= sock_no_socketpair,
 	.accept		= pn_socket_accept,
 	.getname	= pn_socket_getname,
-	.poll		= pn_socket_poll,
+	.poll_mask	= pn_socket_poll_mask,
 	.ioctl		= pn_socket_ioctl,
 	.listen		= pn_socket_listen,
 	.shutdown	= sock_no_shutdown,
@@ -620,25 +617,12 @@
 	return 0;
 }
 
-static const struct seq_operations pn_sock_seq_ops = {
+const struct seq_operations pn_sock_seq_ops = {
 	.start = pn_sock_seq_start,
 	.next = pn_sock_seq_next,
 	.stop = pn_sock_seq_stop,
 	.show = pn_sock_seq_show,
 };
-
-static int pn_sock_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &pn_sock_seq_ops,
-				sizeof(struct seq_net_private));
-}
-
-const struct file_operations pn_sock_seq_fops = {
-	.open = pn_sock_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release_net,
-};
 #endif
 
 static struct  {
@@ -802,23 +786,10 @@
 	return 0;
 }
 
-static const struct seq_operations pn_res_seq_ops = {
+const struct seq_operations pn_res_seq_ops = {
 	.start = pn_res_seq_start,
 	.next = pn_res_seq_next,
 	.stop = pn_res_seq_stop,
 	.show = pn_res_seq_show,
 };
-
-static int pn_res_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &pn_res_seq_ops,
-				sizeof(struct seq_net_private));
-}
-
-const struct file_operations pn_res_seq_fops = {
-	.open = pn_res_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release_net,
-};
 #endif
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 2aa07b5..1b5025e 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1023,7 +1023,7 @@
 	.recvmsg	= qrtr_recvmsg,
 	.getname	= qrtr_getname,
 	.ioctl		= qrtr_ioctl,
-	.poll		= datagram_poll,
+	.poll_mask	= datagram_poll_mask,
 	.shutdown	= sock_no_shutdown,
 	.setsockopt	= sock_no_setsockopt,
 	.getsockopt	= sock_no_getsockopt,
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 9ff5e0a..5b73fea 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1453,18 +1453,6 @@
 	.stop = rose_info_stop,
 	.show = rose_info_show,
 };
-
-static int rose_info_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &rose_info_seqops);
-}
-
-static const struct file_operations rose_info_fops = {
-	.open = rose_info_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
 #endif	/* CONFIG_PROC_FS */
 
 static const struct net_proto_family rose_family_ops = {
@@ -1482,7 +1470,7 @@
 	.socketpair	=	sock_no_socketpair,
 	.accept		=	rose_accept,
 	.getname	=	rose_getname,
-	.poll		=	datagram_poll,
+	.poll_mask	=	datagram_poll_mask,
 	.ioctl		=	rose_ioctl,
 	.listen		=	rose_listen,
 	.shutdown	=	sock_no_shutdown,
@@ -1567,13 +1555,13 @@
 
 	rose_add_loopback_neigh();
 
-	proc_create("rose", 0444, init_net.proc_net, &rose_info_fops);
-	proc_create("rose_neigh", 0444, init_net.proc_net,
-		    &rose_neigh_fops);
-	proc_create("rose_nodes", 0444, init_net.proc_net,
-		    &rose_nodes_fops);
-	proc_create("rose_routes", 0444, init_net.proc_net,
-		    &rose_routes_fops);
+	proc_create_seq("rose", 0444, init_net.proc_net, &rose_info_seqops);
+	proc_create_seq("rose_neigh", 0444, init_net.proc_net,
+		    &rose_neigh_seqops);
+	proc_create_seq("rose_nodes", 0444, init_net.proc_net,
+		    &rose_node_seqops);
+	proc_create_seq("rose_routes", 0444, init_net.proc_net,
+		    &rose_route_seqops);
 out:
 	return rc;
 fail:
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 178619d..77e9f85 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -1143,25 +1143,13 @@
 	return 0;
 }
 
-static const struct seq_operations rose_node_seqops = {
+const struct seq_operations rose_node_seqops = {
 	.start = rose_node_start,
 	.next = rose_node_next,
 	.stop = rose_node_stop,
 	.show = rose_node_show,
 };
 
-static int rose_nodes_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &rose_node_seqops);
-}
-
-const struct file_operations rose_nodes_fops = {
-	.open = rose_nodes_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
-
 static void *rose_neigh_start(struct seq_file *seq, loff_t *pos)
 	__acquires(rose_neigh_list_lock)
 {
@@ -1226,26 +1214,13 @@
 }
 
 
-static const struct seq_operations rose_neigh_seqops = {
+const struct seq_operations rose_neigh_seqops = {
 	.start = rose_neigh_start,
 	.next = rose_neigh_next,
 	.stop = rose_neigh_stop,
 	.show = rose_neigh_show,
 };
 
-static int rose_neigh_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &rose_neigh_seqops);
-}
-
-const struct file_operations rose_neigh_fops = {
-	.open = rose_neigh_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
-
-
 static void *rose_route_start(struct seq_file *seq, loff_t *pos)
 	__acquires(rose_route_list_lock)
 {
@@ -1311,25 +1286,12 @@
 	return 0;
 }
 
-static const struct seq_operations rose_route_seqops = {
+struct seq_operations rose_route_seqops = {
 	.start = rose_route_start,
 	.next = rose_route_next,
 	.stop = rose_route_stop,
 	.show = rose_route_show,
 };
-
-static int rose_route_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &rose_route_seqops);
-}
-
-const struct file_operations rose_routes_fops = {
-	.open = rose_route_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release,
-};
-
 #endif /* CONFIG_PROC_FS */
 
 /*
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 2b46304..3b1ac93 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -734,15 +734,11 @@
 /*
  * permit an RxRPC socket to be polled
  */
-static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
-			       poll_table *wait)
+static __poll_t rxrpc_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
 	struct rxrpc_sock *rx = rxrpc_sk(sk);
-	__poll_t mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	__poll_t mask = 0;
 
 	/* the socket is readable if there are any messages waiting on the Rx
 	 * queue */
@@ -949,7 +945,7 @@
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.getname	= sock_no_getname,
-	.poll		= rxrpc_poll,
+	.poll_mask	= rxrpc_poll_mask,
 	.ioctl		= sock_no_ioctl,
 	.listen		= rxrpc_listen,
 	.shutdown	= rxrpc_shutdown,
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index b239311..5fb7d32 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -1053,8 +1053,8 @@
 /*
  * proc.c
  */
-extern const struct file_operations rxrpc_call_seq_fops;
-extern const struct file_operations rxrpc_connection_seq_fops;
+extern const struct seq_operations rxrpc_call_seq_ops;
+extern const struct seq_operations rxrpc_connection_seq_ops;
 
 /*
  * recvmsg.c
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index c7a023f..5d6a773 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -97,8 +97,11 @@
 	if (!rxnet->proc_net)
 		goto err_proc;
 
-	proc_create("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_fops);
-	proc_create("conns", 0444, rxnet->proc_net, &rxrpc_connection_seq_fops);
+	proc_create_net("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_ops,
+			sizeof(struct seq_net_private));
+	proc_create_net("conns", 0444, rxnet->proc_net,
+			&rxrpc_connection_seq_ops,
+			sizeof(struct seq_net_private));
 	return 0;
 
 err_proc:
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 7e45db0..d9fca8c 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -115,26 +115,13 @@
 	return 0;
 }
 
-static const struct seq_operations rxrpc_call_seq_ops = {
+const struct seq_operations rxrpc_call_seq_ops = {
 	.start  = rxrpc_call_seq_start,
 	.next   = rxrpc_call_seq_next,
 	.stop   = rxrpc_call_seq_stop,
 	.show   = rxrpc_call_seq_show,
 };
 
-static int rxrpc_call_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &rxrpc_call_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-const struct file_operations rxrpc_call_seq_fops = {
-	.open		= rxrpc_call_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 /*
  * generate a list of extant virtual connections in /proc/net/rxrpc_conns
  */
@@ -207,23 +194,9 @@
 	return 0;
 }
 
-static const struct seq_operations rxrpc_connection_seq_ops = {
+const struct seq_operations rxrpc_connection_seq_ops = {
 	.start  = rxrpc_connection_seq_start,
 	.next   = rxrpc_connection_seq_next,
 	.stop   = rxrpc_connection_seq_stop,
 	.show   = rxrpc_connection_seq_show,
 };
-
-
-static int rxrpc_connection_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &rxrpc_connection_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-const struct file_operations rxrpc_connection_seq_fops = {
-	.open		= rxrpc_connection_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 106dae7e..54eca68 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -2092,23 +2092,11 @@
 	return 0;
 }
 
-static int psched_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, psched_show, NULL);
-}
-
-static const struct file_operations psched_fops = {
-	.open = psched_open,
-	.read  = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
 static int __net_init psched_net_init(struct net *net)
 {
 	struct proc_dir_entry *e;
 
-	e = proc_create("psched", 0, net->proc_net, &psched_fops);
+	e = proc_create_single("psched", 0, net->proc_net, psched_show);
 	if (e == NULL)
 		return -ENOMEM;
 
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index e2f5a3e..40c7eb9 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -73,8 +73,8 @@
 		 * variables.  There are arrays that we encode directly
 		 * into parameters to make the rest of the operations easier.
 		 */
-		auth_hmacs = kzalloc(sizeof(*auth_hmacs) +
-				     sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp);
+		auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids,
+						 SCTP_AUTH_NUM_HMACS), gfp);
 		if (!auth_hmacs)
 			goto nomem;
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0cd2e76..7339918 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -1010,7 +1010,7 @@
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = inet_accept,
 	.getname	   = sctp_getname,
-	.poll		   = sctp_poll,
+	.poll_mask	   = sctp_poll_mask,
 	.ioctl		   = inet6_ioctl,
 	.listen		   = sctp_inet_listen,
 	.shutdown	   = inet_shutdown,
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index fd2684a..a6179b2 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -108,25 +108,13 @@
 	.show  = sctp_objcnt_seq_show,
 };
 
-static int sctp_objcnt_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &sctp_objcnt_seq_ops);
-}
-
-static const struct file_operations sctp_objcnt_ops = {
-	.open	 = sctp_objcnt_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release,
-};
-
 /* Initialize the objcount in the proc filesystem.  */
 void sctp_dbg_objcnt_init(struct net *net)
 {
 	struct proc_dir_entry *ent;
 
-	ent = proc_create("sctp_dbg_objcnt", 0,
-			  net->sctp.proc_net_sctp, &sctp_objcnt_ops);
+	ent = proc_create_seq("sctp_dbg_objcnt", 0,
+			  net->sctp.proc_net_sctp, &sctp_objcnt_seq_ops);
 	if (!ent)
 		pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
 }
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 1d9ccc6..ef5c9a8 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -88,19 +88,6 @@
 	return 0;
 }
 
-/* Initialize the seq file operations for 'snmp' object. */
-static int sctp_snmp_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, sctp_snmp_seq_show);
-}
-
-static const struct file_operations sctp_snmp_seq_fops = {
-	.open	 = sctp_snmp_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release_net,
-};
-
 /* Dump local addresses of an association/endpoint. */
 static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb)
 {
@@ -225,21 +212,6 @@
 	.show  = sctp_eps_seq_show,
 };
 
-
-/* Initialize the seq file operations for 'eps' object. */
-static int sctp_eps_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &sctp_eps_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations sctp_eps_seq_fops = {
-	.open	 = sctp_eps_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_net,
-};
-
 struct sctp_ht_iter {
 	struct seq_net_private p;
 	struct rhashtable_iter hti;
@@ -338,20 +310,6 @@
 	.show  = sctp_assocs_seq_show,
 };
 
-/* Initialize the seq file operations for 'assocs' object. */
-static int sctp_assocs_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &sctp_assoc_ops,
-			    sizeof(struct sctp_ht_iter));
-}
-
-static const struct file_operations sctp_assocs_seq_fops = {
-	.open	 = sctp_assocs_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = seq_release_net,
-};
-
 static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 {
 	struct sctp_association *assoc;
@@ -431,36 +389,23 @@
 	.show  = sctp_remaddr_seq_show,
 };
 
-static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &sctp_remaddr_ops,
-			    sizeof(struct sctp_ht_iter));
-}
-
-static const struct file_operations sctp_remaddr_seq_fops = {
-	.open = sctp_remaddr_seq_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = seq_release_net,
-};
-
 /* Set up the proc fs entry for the SCTP protocol. */
 int __net_init sctp_proc_init(struct net *net)
 {
 	net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
 	if (!net->sctp.proc_net_sctp)
 		return -ENOMEM;
-	if (!proc_create("snmp", 0444, net->sctp.proc_net_sctp,
-			 &sctp_snmp_seq_fops))
+	if (!proc_create_net_single("snmp", 0444, net->sctp.proc_net_sctp,
+			 sctp_snmp_seq_show, NULL))
 		goto cleanup;
-	if (!proc_create("eps", 0444, net->sctp.proc_net_sctp,
-			 &sctp_eps_seq_fops))
+	if (!proc_create_net("eps", 0444, net->sctp.proc_net_sctp,
+			&sctp_eps_ops, sizeof(struct seq_net_private)))
 		goto cleanup;
-	if (!proc_create("assocs", 0444, net->sctp.proc_net_sctp,
-			 &sctp_assocs_seq_fops))
+	if (!proc_create_net("assocs", 0444, net->sctp.proc_net_sctp,
+			&sctp_assoc_ops, sizeof(struct sctp_ht_iter)))
 		goto cleanup;
-	if (!proc_create("remaddr", 0444, net->sctp.proc_net_sctp,
-			 &sctp_remaddr_seq_fops))
+	if (!proc_create_net("remaddr", 0444, net->sctp.proc_net_sctp,
+			&sctp_remaddr_ops, sizeof(struct sctp_ht_iter)))
 		goto cleanup;
 	return 0;
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 6bf0a99..11d9337 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1016,7 +1016,7 @@
 	.socketpair	   = sock_no_socketpair,
 	.accept		   = inet_accept,
 	.getname	   = inet_getname,	/* Semantics are different.  */
-	.poll		   = sctp_poll,
+	.poll_mask	   = sctp_poll_mask,
 	.ioctl		   = inet_ioctl,
 	.listen		   = sctp_inet_listen,
 	.shutdown	   = inet_shutdown,	/* Looks harmless.  */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ce620e8..d20f7ad 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7717,14 +7717,12 @@
  * here, again, by modeling the current TCP/UDP code.  We don't have
  * a good way to test with it yet.
  */
-__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t sctp_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
 	struct sctp_sock *sp = sctp_sk(sk);
 	__poll_t mask;
 
-	poll_wait(file, sk_sleep(sk), wait);
-
 	sock_rps_record_flow(sk);
 
 	/* A TCP-style listening socket becomes readable when the accept queue
diff --git a/net/socket.c b/net/socket.c
index f10f1d9..af57d85 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -117,8 +117,10 @@
 static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 
 static int sock_close(struct inode *inode, struct file *file);
-static __poll_t sock_poll(struct file *file,
-			      struct poll_table_struct *wait);
+static struct wait_queue_head *sock_get_poll_head(struct file *file,
+		__poll_t events);
+static __poll_t sock_poll_mask(struct file *file, __poll_t);
+static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait);
 static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 #ifdef CONFIG_COMPAT
 static long compat_sock_ioctl(struct file *file,
@@ -141,6 +143,8 @@
 	.llseek =	no_llseek,
 	.read_iter =	sock_read_iter,
 	.write_iter =	sock_write_iter,
+	.get_poll_head = sock_get_poll_head,
+	.poll_mask =	sock_poll_mask,
 	.poll =		sock_poll,
 	.unlocked_ioctl = sock_ioctl,
 #ifdef CONFIG_COMPAT
@@ -1114,27 +1118,48 @@
 }
 EXPORT_SYMBOL(sock_create_lite);
 
+static struct wait_queue_head *sock_get_poll_head(struct file *file,
+		__poll_t events)
+{
+	struct socket *sock = file->private_data;
+
+	if (!sock->ops->poll_mask)
+		return NULL;
+	sock_poll_busy_loop(sock, events);
+	return sk_sleep(sock->sk);
+}
+
+static __poll_t sock_poll_mask(struct file *file, __poll_t events)
+{
+	struct socket *sock = file->private_data;
+
+	/*
+	 * We need to be sure we are in sync with the socket flags modification.
+	 *
+	 * This memory barrier is paired in the wq_has_sleeper.
+	 */
+	smp_mb();
+
+	/* this socket can poll_ll so tell the system call */
+	return sock->ops->poll_mask(sock, events) |
+		(sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0);
+}
+
 /* No kernel lock held - perfect */
 static __poll_t sock_poll(struct file *file, poll_table *wait)
 {
-	__poll_t busy_flag = 0;
-	struct socket *sock;
+	struct socket *sock = file->private_data;
+	__poll_t events = poll_requested_events(wait), mask = 0;
 
-	/*
-	 *      We can't return errors to poll, so it's either yes or no.
-	 */
-	sock = file->private_data;
-
-	if (sk_can_busy_loop(sock->sk)) {
-		/* this socket can poll_ll so tell the system call */
-		busy_flag = POLL_BUSY_LOOP;
-
-		/* once, only if requested by syscall */
-		if (wait && (wait->_key & POLL_BUSY_LOOP))
-			sk_busy_loop(sock->sk, 1);
+	if (sock->ops->poll) {
+		sock_poll_busy_loop(sock, events);
+		mask = sock->ops->poll(file, sock, wait);
+	} else if (sock->ops->poll_mask) {
+		sock_poll_wait(file, sock_get_poll_head(file, events), wait);
+		mask = sock->ops->poll_mask(sock, events);
 	}
 
-	return busy_flag | sock->ops->poll(file, sock, wait);
+	return mask | sock_poll_busy_flag(sock);
 }
 
 static int sock_mmap(struct file *file, struct vm_area_struct *vma)
@@ -1420,6 +1445,13 @@
 		goto out;
 	}
 
+	err = security_socket_socketpair(sock1, sock2);
+	if (unlikely(err)) {
+		sock_release(sock2);
+		sock_release(sock1);
+		goto out;
+	}
+
 	err = sock1->ops->socketpair(sock1, sock2);
 	if (unlikely(err < 0)) {
 		sock_release(sock2);
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index c81ef5e..4fda18d 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -609,22 +609,6 @@
 	return ret;
 }
 
-int rpc_rmdir(struct dentry *dentry)
-{
-	struct dentry *parent;
-	struct inode *dir;
-	int error;
-
-	parent = dget_parent(dentry);
-	dir = d_inode(parent);
-	inode_lock_nested(dir, I_MUTEX_PARENT);
-	error = __rpc_rmdir(dir, dentry);
-	inode_unlock(dir);
-	dput(parent);
-	return error;
-}
-EXPORT_SYMBOL_GPL(rpc_rmdir);
-
 static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
 {
 	int ret;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 930852c..14a5d05 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -692,10 +692,9 @@
 }
 
 /**
- * tipc_poll - read and possibly block on pollmask
+ * tipc_poll - read pollmask
  * @file: file structure associated with the socket
  * @sock: socket for which to calculate the poll bits
- * @wait: ???
  *
  * Returns pollmask value
  *
@@ -709,15 +708,12 @@
  * imply that the operation will succeed, merely that it should be performed
  * and will not block.
  */
-static __poll_t tipc_poll(struct file *file, struct socket *sock,
-			      poll_table *wait)
+static __poll_t tipc_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
 	struct tipc_sock *tsk = tipc_sk(sk);
 	__poll_t revents = 0;
 
-	sock_poll_wait(file, sk_sleep(sk), wait);
-
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
@@ -3037,7 +3033,7 @@
 	.socketpair	= tipc_socketpair,
 	.accept		= sock_no_accept,
 	.getname	= tipc_getname,
-	.poll		= tipc_poll,
+	.poll_mask	= tipc_poll_mask,
 	.ioctl		= tipc_ioctl,
 	.listen		= sock_no_listen,
 	.shutdown	= tipc_shutdown,
@@ -3058,7 +3054,7 @@
 	.socketpair	= tipc_socketpair,
 	.accept		= tipc_accept,
 	.getname	= tipc_getname,
-	.poll		= tipc_poll,
+	.poll_mask	= tipc_poll_mask,
 	.ioctl		= tipc_ioctl,
 	.listen		= tipc_listen,
 	.shutdown	= tipc_shutdown,
@@ -3079,7 +3075,7 @@
 	.socketpair	= tipc_socketpair,
 	.accept		= tipc_accept,
 	.getname	= tipc_getname,
-	.poll		= tipc_poll,
+	.poll_mask	= tipc_poll_mask,
 	.ioctl		= tipc_ioctl,
 	.listen		= tipc_listen,
 	.shutdown	= tipc_shutdown,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 68bb70a..95b02a7 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -638,9 +638,8 @@
 static int unix_socketpair(struct socket *, struct socket *);
 static int unix_accept(struct socket *, struct socket *, int, bool);
 static int unix_getname(struct socket *, struct sockaddr *, int);
-static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
-static __poll_t unix_dgram_poll(struct file *, struct socket *,
-				    poll_table *);
+static __poll_t unix_poll_mask(struct socket *, __poll_t);
+static __poll_t unix_dgram_poll_mask(struct socket *, __poll_t);
 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 static int unix_shutdown(struct socket *, int);
 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -681,7 +680,7 @@
 	.socketpair =	unix_socketpair,
 	.accept =	unix_accept,
 	.getname =	unix_getname,
-	.poll =		unix_poll,
+	.poll_mask =	unix_poll_mask,
 	.ioctl =	unix_ioctl,
 	.listen =	unix_listen,
 	.shutdown =	unix_shutdown,
@@ -704,7 +703,7 @@
 	.socketpair =	unix_socketpair,
 	.accept =	sock_no_accept,
 	.getname =	unix_getname,
-	.poll =		unix_dgram_poll,
+	.poll_mask =	unix_dgram_poll_mask,
 	.ioctl =	unix_ioctl,
 	.listen =	sock_no_listen,
 	.shutdown =	unix_shutdown,
@@ -726,7 +725,7 @@
 	.socketpair =	unix_socketpair,
 	.accept =	unix_accept,
 	.getname =	unix_getname,
-	.poll =		unix_dgram_poll,
+	.poll_mask =	unix_dgram_poll_mask,
 	.ioctl =	unix_ioctl,
 	.listen =	unix_listen,
 	.shutdown =	unix_shutdown,
@@ -2630,13 +2629,10 @@
 	return err;
 }
 
-static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
+static __poll_t unix_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk;
-	__poll_t mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	__poll_t mask = 0;
 
 	/* exceptional events? */
 	if (sk->sk_err)
@@ -2665,15 +2661,11 @@
 	return mask;
 }
 
-static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
-				    poll_table *wait)
+static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events)
 {
 	struct sock *sk = sock->sk, *other;
-	unsigned int writable;
-	__poll_t mask;
-
-	sock_poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	int writable;
+	__poll_t mask = 0;
 
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
@@ -2699,7 +2691,7 @@
 	}
 
 	/* No write status requested, avoid expensive OUT tests. */
-	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
+	if (!(events & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
 		return mask;
 
 	writable = unix_writable(sk);
@@ -2852,20 +2844,6 @@
 	.stop   = unix_seq_stop,
 	.show   = unix_seq_show,
 };
-
-static int unix_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &unix_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations unix_seq_fops = {
-	.open		= unix_seq_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
 #endif
 
 static const struct net_proto_family unix_family_ops = {
@@ -2884,7 +2862,8 @@
 		goto out;
 
 #ifdef CONFIG_PROC_FS
-	if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
+	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
+			sizeof(struct seq_net_private))) {
 		unix_sysctl_unregister(net);
 		goto out;
 	}
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c1076c1..bb5d5fa 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -850,18 +850,11 @@
 	return err;
 }
 
-static __poll_t vsock_poll(struct file *file, struct socket *sock,
-			       poll_table *wait)
+static __poll_t vsock_poll_mask(struct socket *sock, __poll_t events)
 {
-	struct sock *sk;
-	__poll_t mask;
-	struct vsock_sock *vsk;
-
-	sk = sock->sk;
-	vsk = vsock_sk(sk);
-
-	poll_wait(file, sk_sleep(sk), wait);
-	mask = 0;
+	struct sock *sk = sock->sk;
+	struct vsock_sock *vsk = vsock_sk(sk);
+	__poll_t mask = 0;
 
 	if (sk->sk_err)
 		/* Signify that there has been an error on this socket. */
@@ -1091,7 +1084,7 @@
 	.socketpair = sock_no_socketpair,
 	.accept = sock_no_accept,
 	.getname = vsock_getname,
-	.poll = vsock_poll,
+	.poll_mask = vsock_poll_mask,
 	.ioctl = sock_no_ioctl,
 	.listen = sock_no_listen,
 	.shutdown = vsock_shutdown,
@@ -1849,7 +1842,7 @@
 	.socketpair = sock_no_socketpair,
 	.accept = vsock_accept,
 	.getname = vsock_getname,
-	.poll = vsock_poll,
+	.poll_mask = vsock_poll_mask,
 	.ioctl = sock_no_ioctl,
 	.listen = vsock_listen,
 	.shutdown = vsock_shutdown,
diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c
index b4c4645..cadcf86 100644
--- a/net/wireless/wext-proc.c
+++ b/net/wireless/wext-proc.c
@@ -126,24 +126,11 @@
 	.show  = wireless_dev_seq_show,
 };
 
-static int seq_open_wireless(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &wireless_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations wireless_seq_fops = {
-	.open    = seq_open_wireless,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
 int __net_init wext_proc_init(struct net *net)
 {
 	/* Create /proc/net/wireless entry */
-	if (!proc_create("wireless", 0444, net->proc_net,
-			 &wireless_seq_fops))
+	if (!proc_create_net("wireless", 0444, net->proc_net,
+			&wireless_seq_ops, sizeof(struct seq_net_private)))
 		return -ENOMEM;
 
 	return 0;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d49aa79..f93365a 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1750,7 +1750,7 @@
 	.socketpair =	sock_no_socketpair,
 	.accept =	x25_accept,
 	.getname =	x25_getname,
-	.poll =		datagram_poll,
+	.poll_mask =	datagram_poll_mask,
 	.ioctl =	x25_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = compat_x25_ioctl,
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 64b415e..da52c9d 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -171,57 +171,21 @@
 	.show   = x25_seq_forward_show,
 };
 
-static int x25_seq_socket_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &x25_seq_socket_ops);
-}
-
-static int x25_seq_route_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &x25_seq_route_ops);
-}
-
-static int x25_seq_forward_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &x25_seq_forward_ops);
-}
-
-static const struct file_operations x25_seq_socket_fops = {
-	.open		= x25_seq_socket_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static const struct file_operations x25_seq_route_fops = {
-	.open		= x25_seq_route_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static const struct file_operations x25_seq_forward_fops = {
-	.open		= x25_seq_forward_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 int __init x25_proc_init(void)
 {
 	if (!proc_mkdir("x25", init_net.proc_net))
 		return -ENOMEM;
 
-	if (!proc_create("x25/route", 0444, init_net.proc_net,
-			 &x25_seq_route_fops))
+	if (!proc_create_seq("x25/route", 0444, init_net.proc_net,
+			 &x25_seq_route_ops))
 		goto out;
 
-	if (!proc_create("x25/socket", 0444, init_net.proc_net,
-			 &x25_seq_socket_fops))
+	if (!proc_create_seq("x25/socket", 0444, init_net.proc_net,
+			 &x25_seq_socket_ops))
 		goto out;
 
-	if (!proc_create("x25/forward", 0444, init_net.proc_net,
-			 &x25_seq_forward_fops))
+	if (!proc_create_seq("x25/forward", 0444, init_net.proc_net,
+			 &x25_seq_forward_ops))
 		goto out;
 	return 0;
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ddca4bf..c6ed245 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -300,10 +300,9 @@
 	return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
 }
 
-static unsigned int xsk_poll(struct file *file, struct socket *sock,
-			     struct poll_table_struct *wait)
+static __poll_t xsk_poll_mask(struct socket *sock, __poll_t events)
 {
-	unsigned int mask = datagram_poll(file, sock, wait);
+	__poll_t mask = datagram_poll_mask(sock, events);
 	struct sock *sk = sock->sk;
 	struct xdp_sock *xs = xdp_sk(sk);
 
@@ -694,7 +693,7 @@
 	.socketpair	= sock_no_socketpair,
 	.accept		= sock_no_accept,
 	.getname	= sock_no_getname,
-	.poll		= xsk_poll,
+	.poll_mask	= xsk_poll_mask,
 	.ioctl		= sock_no_ioctl,
 	.listen		= sock_no_listen,
 	.shutdown	= sock_no_shutdown,
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index ed06903..178318d 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -65,22 +65,10 @@
 	return 0;
 }
 
-static int xfrm_statistics_seq_open(struct inode *inode, struct file *file)
-{
-	return single_open_net(inode, file, xfrm_statistics_seq_show);
-}
-
-static const struct file_operations xfrm_statistics_seq_fops = {
-	.open	 = xfrm_statistics_seq_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release_net,
-};
-
 int __net_init xfrm_proc_init(struct net *net)
 {
-	if (!proc_create("xfrm_stat", 0444, net->proc_net,
-			 &xfrm_statistics_seq_fops))
+	if (!proc_create_net_single("xfrm_stat", 0444, net->proc_net,
+			 xfrm_statistics_seq_show, NULL))
 		return -ENOMEM;
 	return 0;
 }
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 50cee53..c8156d6 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -8,8 +8,6 @@
 empty   :=
 space   := $(empty) $(empty)
 space_escape := _-_SPACE_-_
-right_paren := )
-left_paren := (
 pound := \#
 
 ###
@@ -57,7 +55,6 @@
 #   to specify a valid file as first prerequisite (often the kbuild file)
 define filechk
 	$(Q)set -e;				\
-	$(kecho) '  CHK     $@';		\
 	mkdir -p $(dir $@);			\
 	$(filechk_$(1)) < $< > $@.tmp;		\
 	if [ -r $@ ] && cmp -s $@ $@.tmp; then	\
@@ -83,71 +80,6 @@
 			echo $(c);                                    \
 		fi)))
 
-# Tools for caching Makefile variables that are "expensive" to compute.
-#
-# Here we want to help deal with variables that take a long time to compute
-# by making it easy to store these variables in a cache.
-#
-# The canonical example here is testing for compiler flags.  On a simple system
-# each call to the compiler takes 10 ms, but on a system with a compiler that's
-# called through various wrappers it can take upwards of 100 ms.  If we have
-# 100 calls to the compiler this can take 1 second (on a simple system) or 10
-# seconds (on a complicated system).
-#
-# The "cache" will be in Makefile syntax and can be directly included.
-# Any time we try to reference a variable that's not in the cache we'll
-# calculate it and store it in the cache for next time.
-
-# Include values from last time
-make-cache := $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/,$(if $(obj),$(obj)/)).cache.mk
-$(make-cache): ;
--include $(make-cache)
-
-cached-data := $(filter __cached_%, $(.VARIABLES))
-
-# If cache exceeds 1000 lines, shrink it down to 500.
-ifneq ($(word 1000,$(cached-data)),)
-$(shell tail -n 500 $(make-cache) > $(make-cache).tmp; \
-	mv $(make-cache).tmp $(make-cache))
-endif
-
-create-cache-dir := $(if $(KBUILD_SRC),$(if $(cache-data),,1))
-
-# Usage: $(call __sanitize-opt,Hello=Hola$(comma)Goodbye Adios)
-#
-# Convert all '$', ')', '(', '\', '=', ' ', ',', ':' to '_'
-__sanitize-opt = $(subst $$,_,$(subst $(right_paren),_,$(subst $(left_paren),_,$(subst \,_,$(subst =,_,$(subst $(space),_,$(subst $(comma),_,$(subst :,_,$(1)))))))))
-
-# Usage:   $(call shell-cached,shell_command)
-# Example: $(call shell-cached,md5sum /usr/bin/gcc)
-#
-# If we've already seen a call to this exact shell command (even in a
-# previous invocation of make!) we'll return the value.  If not, we'll
-# compute it and store the result for future runs.
-#
-# This is a bit of voodoo, but basic explanation is that if the variable
-# was undefined then we'll evaluate the shell command and store the result
-# into the variable.  We'll then store that value in the cache and finally
-# output the value.
-#
-# NOTE: The $$(2) here isn't actually a parameter to __run-and-store.  We
-# happen to know that the caller will have their shell command in $(2) so the
-# result of "call"ing this will produce a reference to that $(2).  The reason
-# for this strangeness is to avoid an extra level of eval (and escaping) of
-# $(2).
-define __run-and-store
-ifeq ($(origin $(1)),undefined)
-  $$(eval $(1) := $$(shell $$(2)))
-ifeq ($(create-cache-dir),1)
-  $$(shell mkdir -p $(dir $(make-cache)))
-  $$(eval create-cache-dir :=)
-endif
-  $$(shell echo '$(1) := $$($(1))' >> $(make-cache))
-endif
-endef
-__shell-cached = $(eval $(call __run-and-store,$(1)))$($(1))
-shell-cached = $(call __shell-cached,__cached_$(call __sanitize-opt,$(1)),$(1))
-
 # output directory for tests below
 TMPOUT := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/)
 
@@ -155,36 +87,30 @@
 # Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
 # Exit code chooses option. "$$TMP" serves as a temporary file and is
 # automatically cleaned up.
-__try-run = set -e;			\
+try-run = $(shell set -e;		\
 	TMP="$(TMPOUT).$$$$.tmp";	\
 	TMPO="$(TMPOUT).$$$$.o";	\
 	if ($(1)) >/dev/null 2>&1;	\
 	then echo "$(2)";		\
 	else echo "$(3)";		\
 	fi;				\
-	rm -f "$$TMP" "$$TMPO"
-
-try-run = $(shell $(__try-run))
-
-# try-run-cached
-# This works like try-run, but the result is cached.
-try-run-cached = $(call shell-cached,$(__try-run))
+	rm -f "$$TMP" "$$TMPO")
 
 # as-option
 # Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
 
-as-option = $(call try-run-cached,\
+as-option = $(call try-run,\
 	$(CC) $(KBUILD_CFLAGS) $(1) -c -x assembler /dev/null -o "$$TMP",$(1),$(2))
 
 # as-instr
 # Usage: cflags-y += $(call as-instr,instr,option1,option2)
 
-as-instr = $(call try-run-cached,\
+as-instr = $(call try-run,\
 	printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
 
 # __cc-option
 # Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586)
-__cc-option = $(call try-run-cached,\
+__cc-option = $(call try-run,\
 	$(1) -Werror $(2) $(3) -c -x c /dev/null -o "$$TMP",$(3),$(4))
 
 # Do not attempt to build with gcc plugins during cc-option tests.
@@ -204,23 +130,23 @@
 
 # cc-option-yn
 # Usage: flag := $(call cc-option-yn,-march=winchip-c6)
-cc-option-yn = $(call try-run-cached,\
+cc-option-yn = $(call try-run,\
 	$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
 
 # cc-disable-warning
 # Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
-cc-disable-warning = $(call try-run-cached,\
+cc-disable-warning = $(call try-run,\
 	$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
 
 # cc-name
 # Expands to either gcc or clang
-cc-name = $(call shell-cached,$(CC) -v 2>&1 | grep -q "clang version" && echo clang || echo gcc)
+cc-name = $(shell $(CC) -v 2>&1 | grep -q "clang version" && echo clang || echo gcc)
 
 # cc-version
-cc-version = $(call shell-cached,$(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
+cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
 
 # cc-fullversion
-cc-fullversion = $(call shell-cached,$(CONFIG_SHELL) \
+cc-fullversion = $(shell $(CONFIG_SHELL) \
 	$(srctree)/scripts/gcc-version.sh -p $(CC))
 
 # cc-ifversion
@@ -233,21 +159,21 @@
 
 # cc-ldoption
 # Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
-cc-ldoption = $(call try-run-cached,\
+cc-ldoption = $(call try-run,\
 	$(CC) $(1) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
 
 # ld-option
 # Usage: LDFLAGS += $(call ld-option, -X)
-ld-option = $(call try-run-cached, $(LD) $(LDFLAGS) $(1) -v,$(1),$(2))
+ld-option = $(call try-run, $(LD) $(LDFLAGS) $(1) -v,$(1),$(2))
 
 # ar-option
 # Usage: KBUILD_ARFLAGS := $(call ar-option,D)
 # Important: no spaces around options
-ar-option = $(call try-run-cached, $(AR) rc$(1) "$$TMP",$(1),$(2))
+ar-option = $(call try-run, $(AR) rc$(1) "$$TMP",$(1),$(2))
 
 # ld-version
 # Note this is mainly for HJ Lu's 3 number binutil versions
-ld-version = $(call shell-cached,$(LD) --version | $(srctree)/scripts/ld-version.sh)
+ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh)
 
 # ld-ifversion
 # Usage:  $(call ld-ifversion, -ge, 22252, y)
diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include
new file mode 100644
index 0000000..bf7c0c9
--- /dev/null
+++ b/scripts/Kconfig.include
@@ -0,0 +1,27 @@
+# Kconfig helper macros
+
+# Convenient variables
+comma       := ,
+quote       := "
+squote      := '
+empty       :=
+space       := $(empty) $(empty)
+dollar      := $
+right_paren := )
+left_paren  := (
+
+# $(if-success,<command>,<then>,<else>)
+# Return <then> if <command> exits with 0, <else> otherwise.
+if-success = $(shell,{ $(1); } >/dev/null 2>&1 && echo "$(2)" || echo "$(3)")
+
+# $(success,<command>)
+# Return y if <command> exits with 0, n otherwise
+success = $(if-success,$(1),y,n)
+
+# $(cc-option,<flag>)
+# Return y if the compiler supports <flag>, n otherwise
+cc-option = $(success,$(CC) -Werror $(1) -E -x c /dev/null -o /dev/null)
+
+# $(ld-option,<flag>)
+# Return y if the linker supports <flag>, n otherwise
+ld-option = $(success,$(LD) -v $(1))
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 8bdb1dc..34d9e9c 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -147,7 +147,6 @@
 cmd_gensymtypes_c =                                                         \
     $(CPP) -D__GENKSYMS__ $(c_flags) $< |                                   \
     $(GENKSYMS) $(if $(1), -T $(2))                                         \
-     $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX))             \
      $(patsubst y,-R,$(CONFIG_MODULE_REL_CRCS))                             \
      $(if $(KBUILD_PRESERVE),-p)                                            \
      -r $(firstword $(wildcard $(2:.symtypes=.symref) /dev/null))
@@ -207,6 +206,11 @@
 endif
 
 ifdef CONFIG_FTRACE_MCOUNT_RECORD
+# gcc 5 supports generating the mcount tables directly
+ifneq ($(call cc-option,-mrecord-mcount,y),y)
+KBUILD_CFLAGS += -mrecord-mcount
+else
+# else do it all manually
 ifdef BUILD_C_RECORDMCOUNT
 ifeq ("$(origin RECORDMCOUNT_WARN)", "command line")
   RECORDMCOUNT_FLAGS = -w
@@ -259,6 +263,7 @@
   objtool_args += --retpoline
 endif
 endif
+endif
 
 
 ifdef CONFIG_MODVERSIONS
@@ -355,7 +360,6 @@
      sed 's/.*___EXPORT_SYMBOL[[:space:]]*\([a-zA-Z0-9_]*\)[[:space:]]*,.*/EXPORT_SYMBOL(\1);/' ) | \
     $(CPP) -D__GENKSYMS__ $(c_flags) -xc - |                                \
     $(GENKSYMS) $(if $(1), -T $(2))                                         \
-     $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX))             \
      $(patsubst y,-R,$(CONFIG_MODULE_REL_CRCS))                             \
      $(if $(KBUILD_PRESERVE),-p)                                            \
      -r $(firstword $(wildcard $(2:.symtypes=.symref) /dev/null))
@@ -487,15 +491,10 @@
 
 dummy-object = $(obj)/.lib_exports.o
 ksyms-lds = $(dot-target).lds
-ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
-ref_prefix = EXTERN(_
-else
-ref_prefix = EXTERN(
-endif
 
 quiet_cmd_export_list = EXPORTS $@
 cmd_export_list = $(OBJDUMP) -h $< | \
-	sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/$(ref_prefix)\1)/p' >$(ksyms-lds);\
+	sed -ne '/___ksymtab/s/.*+\([^ ]*\).*/EXTERN(\1)/p' >$(ksyms-lds);\
 	rm -f $(dummy-object);\
 	echo | $(CC) $(a_flags) -c -o $(dummy-object) -x assembler -;\
 	$(LD) $(ld_flags) -r -o $@ -T $(ksyms-lds) $(dummy-object);\
diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh
index 016b3c4..6e6d639 100755
--- a/scripts/adjust_autoksyms.sh
+++ b/scripts/adjust_autoksyms.sh
@@ -61,9 +61,6 @@
 	sed -n -e '3{s/ /\n/g;/^$/!p;}' "$mod"
 done | sort -u |
 while read sym; do
-	if [ -n "$CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX" ]; then
-		sym="${sym#_}"
-	fi
 	echo "#define __KSYM_${sym} 1"
 done >> "$new_ksyms_file"
 
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index f387538..850966f3 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -115,7 +115,7 @@
  */
 static void print_dep(const char *m, int slen, const char *dir)
 {
-	int c, i;
+	int c, prev_c = '/', i;
 
 	printf("    $(wildcard %s/", dir);
 	for (i = 0; i < slen; i++) {
@@ -124,7 +124,9 @@
 			c = '/';
 		else
 			c = tolower(c);
-		putchar(c);
+		if (c != '/' || prev_c != '/')
+			putchar(c);
+		prev_c = c;
 	}
 	printf(".h) \\\n");
 }
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 2d42eb9..e6033d3 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5121,16 +5121,6 @@
 			}
 		}
 
-# make sure symbols are always wrapped with VMLINUX_SYMBOL() ...
-# all assignments may have only one of the following with an assignment:
-#	.
-#	ALIGN(...)
-#	VMLINUX_SYMBOL(...)
-		if ($realfile eq 'vmlinux.lds.h' && $line =~ /(?:(?:^|\s)$Ident\s*=|=\s*$Ident(?:\s|$))/) {
-			WARN("MISSING_VMLINUX_SYMBOL",
-			     "vmlinux.lds.h needs VMLINUX_SYMBOL() around C-visible symbols\n" . $herecurr);
-		}
-
 # check for redundant bracing round if etc
 		if ($line =~ /(^.*)\bif\b/ && $1 !~ /else\s*$/) {
 			my ($level, $endln, @chunks) =
diff --git a/scripts/coccinelle/api/drm-get-put.cocci b/scripts/coccinelle/api/drm-get-put.cocci
index ceb71ea..3a09c97 100644
--- a/scripts/coccinelle/api/drm-get-put.cocci
+++ b/scripts/coccinelle/api/drm-get-put.cocci
@@ -40,12 +40,6 @@
 - drm_gem_object_unreference_unlocked(object)
 + drm_gem_object_put_unlocked(object)
 |
-- drm_property_reference_blob(object)
-+ drm_property_blob_get(object)
-|
-- drm_property_unreference_blob(object)
-+ drm_property_blob_put(object)
-|
 - drm_dev_unref(object)
 + drm_dev_put(object)
 )
@@ -72,10 +66,6 @@
 |
 drm_gem_object_unreference_unlocked(object)
 |
-drm_property_unreference_blob@p(object)
-|
-drm_property_reference_blob@p(object)
-|
 drm_dev_unref@p(object)
 )
 
diff --git a/scripts/coccinelle/locks/mini_lock.cocci b/scripts/coccinelle/locks/mini_lock.cocci
index 47f649b..19c6ee5 100644
--- a/scripts/coccinelle/locks/mini_lock.cocci
+++ b/scripts/coccinelle/locks/mini_lock.cocci
@@ -67,12 +67,14 @@
 @@
 
 *lock(E1@p,...);
-<+... when != E1
+... when != E1
+    when any
 if (...) {
   ... when != E1
 *  return@r ...;
 }
-...+>
+... when != E1
+    when any
 *unlock@up(E1,...);
 
 @script:python depends on org@
diff --git a/scripts/coccinelle/null/deref_null.cocci b/scripts/coccinelle/null/deref_null.cocci
index b16ccb7..cbc6184 100644
--- a/scripts/coccinelle/null/deref_null.cocci
+++ b/scripts/coccinelle/null/deref_null.cocci
@@ -14,18 +14,10 @@
 virtual org
 virtual report
 
-@ifm@
-expression *E;
-statement S1,S2;
-position p1;
-@@
-
-if@p1 ((E == NULL && ...) || ...) S1 else S2
-
 // The following two rules are separate, because both can match a single
 // expression in different ways
 @pr1 expression@
-expression *ifm.E;
+expression E;
 identifier f;
 position p1;
 @@
@@ -33,7 +25,7 @@
  (E != NULL && ...) ? <+...E->f@p1...+> : ...
 
 @pr2 expression@
-expression *ifm.E;
+expression E;
 identifier f;
 position p2;
 @@
@@ -46,6 +38,14 @@
  sizeof(<+...E->f@p2...+>)
 )
 
+@ifm@
+expression *E;
+statement S1,S2;
+position p1;
+@@
+
+if@p1 ((E == NULL && ...) || ...) S1 else S2
+
 // For org and report modes
 
 @r depends on !context && (org || report) exists@
@@ -212,16 +212,8 @@
 // The following three rules are duplicates of ifm, pr1 and pr2 respectively.
 // It is need because the previous rule as already made a "change".
 
-@ifm1 depends on context && !org && !report@
-expression *E;
-statement S1,S2;
-position p1;
-@@
-
-if@p1 ((E == NULL && ...) || ...) S1 else S2
-
 @pr11 depends on context && !org && !report expression@
-expression *ifm1.E;
+expression E;
 identifier f;
 position p1;
 @@
@@ -229,7 +221,7 @@
  (E != NULL && ...) ? <+...E->f@p1...+> : ...
 
 @pr12 depends on context && !org && !report expression@
-expression *ifm1.E;
+expression E;
 identifier f;
 position p2;
 @@
@@ -242,6 +234,14 @@
  sizeof(<+...E->f@p2...+>)
 )
 
+@ifm1 depends on context && !org && !report@
+expression *E;
+statement S1,S2;
+position p1;
+@@
+
+if@p1 ((E == NULL && ...) || ...) S1 else S2
+
 @depends on context && !org && !report exists@
 expression subE <= ifm1.E;
 expression *ifm1.E;
diff --git a/scripts/depmod.sh b/scripts/depmod.sh
index 9831cca..1a6f85e 100755
--- a/scripts/depmod.sh
+++ b/scripts/depmod.sh
@@ -3,36 +3,17 @@
 #
 # A depmod wrapper used by the toplevel Makefile
 
-if test $# -ne 3; then
-	echo "Usage: $0 /sbin/depmod <kernelrelease> <symbolprefix>" >&2
+if test $# -ne 2; then
+	echo "Usage: $0 /sbin/depmod <kernelrelease>" >&2
 	exit 1
 fi
 DEPMOD=$1
 KERNELRELEASE=$2
-SYMBOL_PREFIX=$3
 
 if ! test -r System.map -a -x "$DEPMOD"; then
 	exit 0
 fi
 
-# older versions of depmod don't support -P <symbol-prefix>
-# support was added in module-init-tools 3.13
-if test -n "$SYMBOL_PREFIX"; then
-	release=$("$DEPMOD" --version)
-	package=$(echo "$release" | cut -d' ' -f 1)
-	if test "$package" = "module-init-tools"; then
-		version=$(echo "$release" | cut -d' ' -f 2)
-		later=$(printf '%s\n' "$version" "3.13" | sort -V | tail -n 1)
-		if test "$later" != "$version"; then
-			# module-init-tools < 3.13, drop the symbol prefix
-			SYMBOL_PREFIX=""
-		fi
-	fi
-	if test -n "$SYMBOL_PREFIX"; then
-		SYMBOL_PREFIX="-P $SYMBOL_PREFIX"
-	fi
-fi
-
 # older versions of depmod require the version string to start with three
 # numbers, so we cheat with a symlink here
 depmod_hack_needed=true
@@ -55,7 +36,7 @@
 if test -n "$INSTALL_MOD_PATH"; then
 	set -- "$@" -b "$INSTALL_MOD_PATH"
 fi
-"$DEPMOD" "$@" "$KERNELRELEASE" $SYMBOL_PREFIX
+"$DEPMOD" "$@" "$KERNELRELEASE"
 ret=$?
 
 if $depmod_hack_needed; then
diff --git a/scripts/documentation-file-ref-check b/scripts/documentation-file-ref-check
index bc16599..2520bc1 100755
--- a/scripts/documentation-file-ref-check
+++ b/scripts/documentation-file-ref-check
@@ -1,15 +1,116 @@
-#!/bin/sh
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+#
 # Treewide grep for references to files under Documentation, and report
 # non-existing files in stderr.
 
-for f in $(git ls-files); do
-	for ref in $(grep -ho "Documentation/[A-Za-z0-9_.,~/*+-]*" "$f"); do
-		# presume trailing . and , are not part of the name
-		ref=${ref%%[.,]}
+use warnings;
+use strict;
+use Getopt::Long qw(:config no_auto_abbrev);
 
-		# use ls to handle wildcards
-		if ! ls $ref >/dev/null 2>&1; then
-			echo "$f: $ref" >&2
-		fi
-	done
-done
+my $scriptname = $0;
+$scriptname =~ s,.*/([^/]+/),$1,;
+
+# Parse arguments
+my $help = 0;
+my $fix = 0;
+
+GetOptions(
+	'fix' => \$fix,
+	'h|help|usage' => \$help,
+);
+
+if ($help != 0) {
+    print "$scriptname [--help] [--fix-rst]\n";
+    exit -1;
+}
+
+# Step 1: find broken references
+print "Finding broken references. This may take a while...  " if ($fix);
+
+my %broken_ref;
+
+open IN, "git grep 'Documentation/'|"
+     or die "Failed to run git grep";
+while (<IN>) {
+	next if (!m/^([^:]+):(.*)/);
+
+	my $f = $1;
+	my $ln = $2;
+
+	# Makefiles contain nasty expressions to parse docs
+	next if ($f =~ m/Makefile/);
+	# Skip this script
+	next if ($f eq $scriptname);
+
+	if ($ln =~ m,\b(\S*)(Documentation/[A-Za-z0-9\_\.\,\~/\*+-]*),) {
+		my $prefix = $1;
+		my $ref = $2;
+		my $base = $2;
+
+		$ref =~ s/[\,\.]+$//;
+
+		my $fulref = "$prefix$ref";
+
+		$fulref =~ s/^(\<file|ref)://;
+		$fulref =~ s/^[\'\`]+//;
+		$fulref =~ s,^\$\(.*\)/,,;
+		$base =~ s,.*/,,;
+
+		# Remove URL false-positives
+		next if ($fulref =~ m/^http/);
+
+		# Check if exists, evaluating wildcards
+		next if (grep -e, glob("$ref $fulref"));
+
+		if ($fix) {
+			if (!($ref =~ m/(devicetree|scripts|Kconfig|Kbuild)/)) {
+				$broken_ref{$ref}++;
+			}
+		} else {
+			print STDERR "$f: $fulref\n";
+		}
+	}
+}
+
+exit 0 if (!$fix);
+
+# Step 2: Seek for file name alternatives
+print "Auto-fixing broken references. Please double-check the results\n";
+
+foreach my $ref (keys %broken_ref) {
+	my $new =$ref;
+
+	# get just the basename
+	$new =~ s,.*/,,;
+
+	# Seek for the same name on another place, as it may have been moved
+	my $f="";
+
+	$f = qx(find . -iname $new) if ($new);
+
+	# usual reason for breakage: file renamed to .rst
+	if (!$f) {
+		$new =~ s/\.txt$/.rst/;
+		$f=qx(find . -iname $new) if ($new);
+	}
+
+	my @find = split /\s+/, $f;
+
+	if (!$f) {
+		print STDERR "ERROR: Didn't find a replacement for $ref\n";
+	} elsif (scalar(@find) > 1) {
+		print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n";
+		foreach my $j (@find) {
+			$j =~ s,^./,,;
+			print STDERR "    $j\n";
+		}
+	} else {
+		$f = $find[0];
+		$f =~ s,^./,,;
+		print "INFO: Replacing $ref to $f\n";
+		foreach my $j (qx(git grep -l $ref)) {
+			qx(sed "s\@$ref\@$f\@g" -i $j);
+		}
+	}
+}
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 1876a74..a0149db 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -56,7 +56,7 @@
 command -v ${NM} >/dev/null 2>&1 || die "nm isn't installed"
 
 usage() {
-	echo "usage: faddr2line <object file> <func+offset> <func+offset>..." >&2
+	echo "usage: faddr2line [--list] <object file> <func+offset> <func+offset>..." >&2
 	exit 1
 }
 
@@ -166,15 +166,25 @@
 		local file_lines=$(${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;")
 		[[ -z $file_lines ]] && return
 
+		if [[ $LIST = 0 ]]; then
+			echo "$file_lines" | while read -r line
+			do
+				echo $line
+			done
+			DONE=1;
+			return
+		fi
+
 		# show each line with context
 		echo "$file_lines" | while read -r line
 		do
+			echo
 			echo $line
 			n=$(echo $line | sed 's/.*:\([0-9]\+\).*/\1/g')
 			n1=$[$n-5]
 			n2=$[$n+5]
 			f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g')
-			awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f
+			awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") { if (NR=='$n') printf(">%d<", NR); else printf(" %d ", NR); printf("\t%s\n", $0)}' $f
 		done
 
 		DONE=1
@@ -185,6 +195,10 @@
 [[ $# -lt 2 ]] && usage
 
 objfile=$1
+
+LIST=0
+[[ "$objfile" == "--list" ]] && LIST=1 && shift && objfile=$1
+
 [[ ! -f $objfile ]] && die "can't find objfile $objfile"
 shift
 
diff --git a/scripts/genksyms/genksyms.c b/scripts/genksyms/genksyms.c
index c9235d8..e007840 100644
--- a/scripts/genksyms/genksyms.c
+++ b/scripts/genksyms/genksyms.c
@@ -45,7 +45,6 @@
 
 static int flag_debug, flag_dump_defs, flag_reference, flag_dump_types,
 	   flag_preserve, flag_warnings, flag_rel_crcs;
-static const char *mod_prefix = "";
 
 static int errors;
 static int nsyms;
@@ -693,10 +692,10 @@
 			fputs(">\n", debugfile);
 
 		/* Used as a linker script. */
-		printf(!flag_rel_crcs ? "%s__crc_%s = 0x%08lx;\n" :
+		printf(!flag_rel_crcs ? "__crc_%s = 0x%08lx;\n" :
 		       "SECTIONS { .rodata : ALIGN(4) { "
-		       "%s__crc_%s = .; LONG(0x%08lx); } }\n",
-		       mod_prefix, name, crc);
+		       "__crc_%s = .; LONG(0x%08lx); } }\n",
+		       name, crc);
 	}
 }
 
@@ -769,7 +768,6 @@
 
 #ifdef __GNU_LIBRARY__
 	struct option long_opts[] = {
-		{"symbol-prefix", 1, 0, 's'},
 		{"debug", 0, 0, 'd'},
 		{"warnings", 0, 0, 'w'},
 		{"quiet", 0, 0, 'q'},
@@ -789,9 +787,6 @@
 	while ((o = getopt(argc, argv, "s:dwqVDr:T:phR")) != EOF)
 #endif				/* __GNU_LIBRARY__ */
 		switch (o) {
-		case 's':
-			mod_prefix = optarg;
-			break;
 		case 'd':
 			flag_debug++;
 			break;
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 5abfbf1..a9186a9 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -62,7 +62,6 @@
 static unsigned int table_size, table_cnt;
 static int all_symbols = 0;
 static int absolute_percpu = 0;
-static char symbol_prefix_char = '\0';
 static int base_relative = 0;
 
 int token_profit[0x10000];
@@ -75,7 +74,6 @@
 static void usage(void)
 {
 	fprintf(stderr, "Usage: kallsyms [--all-symbols] "
-			"[--symbol-prefix=<prefix char>] "
 			"[--base-relative] < in.map > out.S\n");
 	exit(1);
 }
@@ -113,28 +111,22 @@
 
 static int read_symbol(FILE *in, struct sym_entry *s)
 {
-	char str[500];
-	char *sym, stype;
+	char sym[500], stype;
 	int rc;
 
-	rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str);
+	rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, sym);
 	if (rc != 3) {
-		if (rc != EOF && fgets(str, 500, in) == NULL)
+		if (rc != EOF && fgets(sym, 500, in) == NULL)
 			fprintf(stderr, "Read error or end of file.\n");
 		return -1;
 	}
-	if (strlen(str) > KSYM_NAME_LEN) {
+	if (strlen(sym) > KSYM_NAME_LEN) {
 		fprintf(stderr, "Symbol %s too long for kallsyms (%zu vs %d).\n"
 				"Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n",
-			str, strlen(str), KSYM_NAME_LEN);
+			sym, strlen(sym), KSYM_NAME_LEN);
 		return -1;
 	}
 
-	sym = str;
-	/* skip prefix char */
-	if (symbol_prefix_char && str[0] == symbol_prefix_char)
-		sym++;
-
 	/* Ignore most absolute/undefined (?) symbols. */
 	if (strcmp(sym, "_text") == 0)
 		_text = s->addr;
@@ -155,7 +147,7 @@
 		 is_arm_mapping_symbol(sym))
 		return -1;
 	/* exclude also MIPS ELF local symbols ($L123 instead of .L123) */
-	else if (str[0] == '$')
+	else if (sym[0] == '$')
 		return -1;
 	/* exclude debugging symbols */
 	else if (stype == 'N' || stype == 'n')
@@ -163,14 +155,14 @@
 
 	/* include the type field in the symbol name, so that it gets
 	 * compressed together */
-	s->len = strlen(str) + 1;
+	s->len = strlen(sym) + 1;
 	s->sym = malloc(s->len + 1);
 	if (!s->sym) {
 		fprintf(stderr, "kallsyms failure: "
 			"unable to allocate required amount of memory\n");
 		exit(EXIT_FAILURE);
 	}
-	strcpy((char *)s->sym + 1, str);
+	strcpy((char *)s->sym + 1, sym);
 	s->sym[0] = stype;
 
 	s->percpu_absolute = 0;
@@ -233,11 +225,6 @@
 	int i;
 	char *sym_name = (char *)s->sym + 1;
 
-	/* skip prefix char */
-	if (symbol_prefix_char && *sym_name == symbol_prefix_char)
-		sym_name++;
-
-
 	/* if --all-symbols is not specified, then symbols outside the text
 	 * and inittext sections are discarded */
 	if (!all_symbols) {
@@ -302,15 +289,9 @@
 
 static void output_label(char *label)
 {
-	if (symbol_prefix_char)
-		printf(".globl %c%s\n", symbol_prefix_char, label);
-	else
-		printf(".globl %s\n", label);
+	printf(".globl %s\n", label);
 	printf("\tALGN\n");
-	if (symbol_prefix_char)
-		printf("%c%s:\n", symbol_prefix_char, label);
-	else
-		printf("%s:\n", label);
+	printf("%s:\n", label);
 }
 
 /* uncompress a compressed symbol. When this function is called, the best table
@@ -424,7 +405,7 @@
 	}
 
 	output_label("kallsyms_num_syms");
-	printf("\tPTR\t%d\n", table_cnt);
+	printf("\tPTR\t%u\n", table_cnt);
 	printf("\n");
 
 	/* table of offset markers, that give the offset in the compressed stream
@@ -768,13 +749,7 @@
 				all_symbols = 1;
 			else if (strcmp(argv[i], "--absolute-percpu") == 0)
 				absolute_percpu = 1;
-			else if (strncmp(argv[i], "--symbol-prefix=", 16) == 0) {
-				char *p = &argv[i][16];
-				/* skip quote */
-				if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\''))
-					p++;
-				symbol_prefix_char = *p;
-			} else if (strcmp(argv[i], "--base-relative") == 0)
+			else if (strcmp(argv[i], "--base-relative") == 0)
 				base_relative = 1;
 			else
 				usage();
diff --git a/scripts/kconfig/.gitignore b/scripts/kconfig/.gitignore
index 2da579e..0aabc1d 100644
--- a/scripts/kconfig/.gitignore
+++ b/scripts/kconfig/.gitignore
@@ -2,9 +2,6 @@
 # Generated files
 #
 *.moc
-gconf.glade.h
-*.pot
-*.mo
 
 #
 # configuration programs
@@ -14,4 +11,3 @@
 nconf
 qconf
 gconf
-kxgettext
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 5def877..a3ac2c9 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -3,7 +3,7 @@
 # Kernel configuration targets
 # These targets are used from top-level makefile
 
-PHONY += xconfig gconfig menuconfig config syncconfig update-po-config \
+PHONY += xconfig gconfig menuconfig config syncconfig \
 	localmodconfig localyesconfig
 
 ifdef KBUILD_KCONFIG
@@ -55,29 +55,6 @@
 	fi
 	$(Q)rm -f .tmp.config
 
-# Create new linux.pot file
-# Adjust charset to UTF-8 in .po file to accept UTF-8 in Kconfig files
-update-po-config: $(obj)/kxgettext $(obj)/gconf.glade.h
-	$(Q)$(kecho) "  GEN     config.pot"
-	$(Q)xgettext --default-domain=linux                         \
-	    --add-comments --keyword=_ --keyword=N_                 \
-	    --from-code=UTF-8                                       \
-	    --files-from=$(srctree)/scripts/kconfig/POTFILES.in     \
-	    --directory=$(srctree) --directory=$(objtree)           \
-	    --output $(obj)/config.pot
-	$(Q)sed -i s/CHARSET/UTF-8/ $(obj)/config.pot
-	$(Q)(for i in `ls $(srctree)/arch/*/Kconfig      \
-	    $(srctree)/arch/*/um/Kconfig`;               \
-	    do                                           \
-		$(kecho) "  GEN     $$i";                    \
-		$(obj)/kxgettext $$i                     \
-		     >> $(obj)/config.pot;               \
-	    done )
-	$(Q)$(kecho) "  GEN     linux.pot"
-	$(Q)msguniq --sort-by-file --to-code=UTF-8 $(obj)/config.pot \
-	    --output $(obj)/linux.pot
-	$(Q)rm -f $(obj)/config.pot
-
 # These targets map 1:1 to the commandline options of 'conf'
 simple-targets := oldconfig allnoconfig allyesconfig allmodconfig \
 	alldefconfig randconfig listnewconfig olddefconfig
@@ -151,8 +128,7 @@
 # Help text used by make help
 help:
 	@echo  '  config	  - Update current config utilising a line-oriented program'
-	@echo  '  nconfig         - Update current config utilising a ncurses menu based'
-	@echo  '                    program'
+	@echo  '  nconfig         - Update current config utilising a ncurses menu based program'
 	@echo  '  menuconfig	  - Update current config utilising a menu based program'
 	@echo  '  xconfig	  - Update current config utilising a Qt based front-end'
 	@echo  '  gconfig	  - Update current config utilising a GTK+ based front-end'
@@ -172,141 +148,77 @@
 	@echo  '  kvmconfig	  - Enable additional options for kvm guest kernel support'
 	@echo  '  xenconfig       - Enable additional options for xen dom0 and guest kernel support'
 	@echo  '  tinyconfig	  - Configure the tiniest possible kernel'
-
-# lxdialog stuff
-check-lxdialog  := $(srctree)/$(src)/lxdialog/check-lxdialog.sh
-
-# Use recursively expanded variables so we do not call gcc unless
-# we really need to do so. (Do not call gcc as part of make mrproper)
-HOST_EXTRACFLAGS += $(shell $(CONFIG_SHELL) $(check-lxdialog) -ccflags) \
-                    -DLOCALE
+	@echo  '  testconfig	  - Run Kconfig unit tests (requires python3 and pytest)'
 
 # ===========================================================================
 # Shared Makefile for the various kconfig executables:
 # conf:	  Used for defconfig, oldconfig and related targets
-# nconf:  Used for the nconfig target.
-#         Utilizes ncurses
-# mconf:  Used for the menuconfig target
-#         Utilizes the lxdialog package
-# qconf:  Used for the xconfig target
-#         Based on Qt which needs to be installed to compile it
-# gconf:  Used for the gconfig target
-#         Based on GTK+ which needs to be installed to compile it
 # object files used by all kconfig flavours
 
-lxdialog := lxdialog/checklist.o lxdialog/util.o lxdialog/inputbox.o
-lxdialog += lxdialog/textbox.o lxdialog/yesno.o lxdialog/menubox.o
-
 conf-objs	:= conf.o  zconf.tab.o
-mconf-objs     := mconf.o zconf.tab.o $(lxdialog)
-nconf-objs     := nconf.o zconf.tab.o nconf.gui.o
-kxgettext-objs	:= kxgettext.o zconf.tab.o
-qconf-cxxobjs	:= qconf.o
-qconf-objs	:= zconf.tab.o
-gconf-objs	:= gconf.o zconf.tab.o
 
-hostprogs-y := conf nconf mconf kxgettext qconf gconf
+hostprogs-y := conf
 
 targets		+= zconf.lex.c
-clean-files	:= qconf.moc .tmp_qtcheck .tmp_gtkcheck
-clean-files	+= gconf.glade.h
-clean-files     += config.pot linux.pot
-
-# Check that we have the required ncurses stuff installed for lxdialog (menuconfig)
-PHONY += $(obj)/dochecklxdialog
-$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/dochecklxdialog
-$(obj)/dochecklxdialog:
-	$(Q)$(CONFIG_SHELL) $(check-lxdialog) -check $(HOSTCC) $(HOST_EXTRACFLAGS) $(HOSTLOADLIBES_mconf)
-
-always := dochecklxdialog
-
-# Add environment specific flags
-HOST_EXTRACFLAGS += $(shell $(CONFIG_SHELL) $(srctree)/$(src)/check.sh $(HOSTCC) $(HOSTCFLAGS))
-HOST_EXTRACXXFLAGS += $(shell $(CONFIG_SHELL) $(srctree)/$(src)/check.sh $(HOSTCXX) $(HOSTCXXFLAGS))
 
 # generated files seem to need this to find local include files
 HOSTCFLAGS_zconf.lex.o	:= -I$(src)
 HOSTCFLAGS_zconf.tab.o	:= -I$(src)
 
-HOSTLOADLIBES_qconf	= $(KC_QT_LIBS)
-HOSTCXXFLAGS_qconf.o	= $(KC_QT_CFLAGS)
+# nconf: Used for the nconfig target based on ncurses
+hostprogs-y	+= nconf
+nconf-objs	:= nconf.o zconf.tab.o nconf.gui.o
 
-HOSTLOADLIBES_gconf	= `pkg-config --libs gtk+-2.0 gmodule-2.0 libglade-2.0`
-HOSTCFLAGS_gconf.o	= `pkg-config --cflags gtk+-2.0 gmodule-2.0 libglade-2.0` \
-                          -Wno-missing-prototypes
+HOSTLOADLIBES_nconf	= $(shell . $(obj)/.nconf-cfg && echo $$libs)
+HOSTCFLAGS_nconf.o	= $(shell . $(obj)/.nconf-cfg && echo $$cflags)
+HOSTCFLAGS_nconf.gui.o	= $(shell . $(obj)/.nconf-cfg && echo $$cflags)
 
-HOSTLOADLIBES_mconf   = $(shell $(CONFIG_SHELL) $(check-lxdialog) -ldflags $(HOSTCC))
+$(obj)/nconf.o: $(obj)/.nconf-cfg
 
-HOSTLOADLIBES_nconf	= $(shell \
-				pkg-config --libs menuw panelw ncursesw 2>/dev/null \
-				|| pkg-config --libs menu panel ncurses 2>/dev/null \
-				|| echo "-lmenu -lpanel -lncurses"  )
-$(obj)/qconf.o: $(obj)/.tmp_qtcheck
+# mconf: Used for the menuconfig target based on lxdialog
+hostprogs-y	+= mconf
+lxdialog	:= checklist.o inputbox.o menubox.o textbox.o util.o yesno.o
+mconf-objs	:= mconf.o zconf.tab.o $(addprefix lxdialog/, $(lxdialog))
 
-ifeq ($(MAKECMDGOALS),xconfig)
-$(obj)/.tmp_qtcheck: $(src)/Makefile
--include $(obj)/.tmp_qtcheck
+HOSTLOADLIBES_mconf = $(shell . $(obj)/.mconf-cfg && echo $$libs)
+$(foreach f, mconf.o $(lxdialog), \
+  $(eval HOSTCFLAGS_$f = $$(shell . $(obj)/.mconf-cfg && echo $$$$cflags)))
 
-# Qt needs some extra effort...
-$(obj)/.tmp_qtcheck:
-	@set -e; $(kecho) "  CHECK   qt"; \
-	if pkg-config --exists Qt5Core; then \
-	    cflags="-std=c++11 -fPIC `pkg-config --cflags Qt5Core Qt5Gui Qt5Widgets`"; \
-	    libs=`pkg-config --libs Qt5Core Qt5Gui Qt5Widgets`; \
-	    moc=`pkg-config --variable=host_bins Qt5Core`/moc; \
-	elif pkg-config --exists QtCore; then \
-	    cflags=`pkg-config --cflags QtCore QtGui`; \
-	    libs=`pkg-config --libs QtCore QtGui`; \
-	    moc=`pkg-config --variable=moc_location QtCore`; \
-	else \
-	    echo >&2 "*"; \
-	    echo >&2 "* Could not find Qt via pkg-config."; \
-	    echo >&2 "* Please install either Qt 4.8 or 5.x. and make sure it's in PKG_CONFIG_PATH"; \
-	    echo >&2 "*"; \
-	    exit 1; \
-	fi; \
-	echo "KC_QT_CFLAGS=$$cflags" > $@; \
-	echo "KC_QT_LIBS=$$libs" >> $@; \
-	echo "KC_QT_MOC=$$moc" >> $@
-endif
+$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/.mconf-cfg
 
-$(obj)/gconf.o: $(obj)/.tmp_gtkcheck
+# qconf: Used for the xconfig target based on Qt
+hostprogs-y	+= qconf
+qconf-cxxobjs	:= qconf.o
+qconf-objs	:= zconf.tab.o
 
-ifeq ($(MAKECMDGOALS),gconfig)
--include $(obj)/.tmp_gtkcheck
+HOSTLOADLIBES_qconf	= $(shell . $(obj)/.qconf-cfg && echo $$libs)
+HOSTCXXFLAGS_qconf.o	= $(shell . $(obj)/.qconf-cfg && echo $$cflags)
 
-# GTK+ needs some extra effort, too...
-$(obj)/.tmp_gtkcheck:
-	@if `pkg-config --exists gtk+-2.0 gmodule-2.0 libglade-2.0`; then		\
-		if `pkg-config --atleast-version=2.0.0 gtk+-2.0`; then			\
-			touch $@;								\
-		else									\
-			echo >&2 "*"; 							\
-			echo >&2 "* GTK+ is present but version >= 2.0.0 is required.";	\
-			echo >&2 "*";							\
-			false;								\
-		fi									\
-	else										\
-		echo >&2 "*"; 								\
-		echo >&2 "* Unable to find the GTK+ installation. Please make sure that"; 	\
-		echo >&2 "* the GTK+ 2.0 development package is correctly installed..."; 	\
-		echo >&2 "* You need gtk+-2.0, glib-2.0 and libglade-2.0."; 		\
-		echo >&2 "*"; 								\
-		false;									\
-	fi
-endif
+$(obj)/qconf.o: $(obj)/.qconf-cfg $(obj)/qconf.moc
+
+quiet_cmd_moc = MOC     $@
+      cmd_moc = $(shell . $(obj)/.qconf-cfg && echo $$moc) -i $< -o $@
+
+$(obj)/%.moc: $(src)/%.h $(obj)/.qconf-cfg
+	$(call cmd,moc)
+
+# gconf: Used for the gconfig target based on GTK+
+hostprogs-y	+= gconf
+gconf-objs	:= gconf.o zconf.tab.o
+
+HOSTLOADLIBES_gconf = $(shell . $(obj)/.gconf-cfg && echo $$libs)
+HOSTCFLAGS_gconf.o  = $(shell . $(obj)/.gconf-cfg && echo $$cflags)
+
+$(obj)/gconf.o: $(obj)/.gconf-cfg
 
 $(obj)/zconf.tab.o: $(obj)/zconf.lex.c
 
-$(obj)/qconf.o: $(obj)/qconf.moc
+# check if necessary packages are available, and configure build flags
+define filechk_conf_cfg
+	$(CONFIG_SHELL) $<
+endef
 
-quiet_cmd_moc = MOC     $@
-      cmd_moc = $(KC_QT_MOC) -i $< -o $@
+$(obj)/.%conf-cfg: $(src)/%conf-cfg.sh FORCE
+	$(call filechk,conf_cfg)
 
-$(obj)/%.moc: $(src)/%.h $(obj)/.tmp_qtcheck
-	$(call cmd,moc)
-
-# Extract gconf menu items for i18n support
-$(obj)/gconf.glade.h: $(obj)/gconf.glade
-	$(Q)intltool-extract --type=gettext/glade --srcdir=$(srctree) \
-	$(obj)/gconf.glade
+clean-files += .*conf-cfg
diff --git a/scripts/kconfig/POTFILES.in b/scripts/kconfig/POTFILES.in
deleted file mode 100644
index 9674573..0000000
--- a/scripts/kconfig/POTFILES.in
+++ /dev/null
@@ -1,12 +0,0 @@
-scripts/kconfig/lxdialog/checklist.c
-scripts/kconfig/lxdialog/inputbox.c
-scripts/kconfig/lxdialog/menubox.c
-scripts/kconfig/lxdialog/textbox.c
-scripts/kconfig/lxdialog/util.c
-scripts/kconfig/lxdialog/yesno.c
-scripts/kconfig/mconf.c
-scripts/kconfig/conf.c
-scripts/kconfig/confdata.c
-scripts/kconfig/gconf.c
-scripts/kconfig/gconf.glade.h
-scripts/kconfig/qconf.cc
diff --git a/scripts/kconfig/check.sh b/scripts/kconfig/check.sh
deleted file mode 100755
index 97f0fee..0000000
--- a/scripts/kconfig/check.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Needed for systems without gettext
-$* -x c -o /dev/null - > /dev/null 2>&1 << EOF
-#include <libintl.h>
-int main()
-{
-	gettext("");
-	return 0;
-}
-EOF
-if [ ! "$?" -eq "0"  ]; then
-	echo -DKBUILD_NO_NLS;
-fi
diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index 283eeed..671ff53 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -3,7 +3,6 @@
  * Released under the terms of the GNU GPL v2.0.
  */
 
-#include <locale.h>
 #include <ctype.h>
 #include <limits.h>
 #include <stdio.h>
@@ -86,7 +85,7 @@
 	enum symbol_type type = sym_get_type(sym);
 
 	if (!sym_has_value(sym))
-		printf(_("(NEW) "));
+		printf("(NEW) ");
 
 	line[0] = '\n';
 	line[1] = 0;
@@ -133,7 +132,7 @@
 	const char *def;
 
 	while (1) {
-		printf("%*s%s ", indent - 1, "", _(menu->prompt->text));
+		printf("%*s%s ", indent - 1, "", menu->prompt->text);
 		printf("(%s) ", sym->name);
 		def = sym_get_string_value(sym);
 		if (sym_get_string_value(sym))
@@ -166,7 +165,7 @@
 	tristate oldval, newval;
 
 	while (1) {
-		printf("%*s%s ", indent - 1, "", _(menu->prompt->text));
+		printf("%*s%s ", indent - 1, "", menu->prompt->text);
 		if (sym->name)
 			printf("(%s) ", sym->name);
 		putchar('[');
@@ -251,7 +250,7 @@
 		case no:
 			return 1;
 		case mod:
-			printf("%*s%s\n", indent - 1, "", _(menu_get_prompt(menu)));
+			printf("%*s%s\n", indent - 1, "", menu_get_prompt(menu));
 			return 0;
 		case yes:
 			break;
@@ -261,7 +260,7 @@
 	while (1) {
 		int cnt, def;
 
-		printf("%*s%s\n", indent - 1, "", _(menu_get_prompt(menu)));
+		printf("%*s%s\n", indent - 1, "", menu_get_prompt(menu));
 		def_sym = sym_get_choice_value(sym);
 		cnt = def = 0;
 		line[0] = 0;
@@ -269,7 +268,7 @@
 			if (!menu_is_visible(child))
 				continue;
 			if (!child->sym) {
-				printf("%*c %s\n", indent, '*', _(menu_get_prompt(child)));
+				printf("%*c %s\n", indent, '*', menu_get_prompt(child));
 				continue;
 			}
 			cnt++;
@@ -278,14 +277,14 @@
 				printf("%*c", indent, '>');
 			} else
 				printf("%*c", indent, ' ');
-			printf(" %d. %s", cnt, _(menu_get_prompt(child)));
+			printf(" %d. %s", cnt, menu_get_prompt(child));
 			if (child->sym->name)
 				printf(" (%s)", child->sym->name);
 			if (!sym_has_value(child->sym))
-				printf(_(" (NEW)"));
+				printf(" (NEW)");
 			printf("\n");
 		}
-		printf(_("%*schoice"), indent - 1, "");
+		printf("%*schoice", indent - 1, "");
 		if (cnt == 1) {
 			printf("[1]: 1\n");
 			goto conf_childs;
@@ -372,7 +371,7 @@
 			if (prompt)
 				printf("%*c\n%*c %s\n%*c\n",
 					indent, '*',
-					indent, '*', _(prompt),
+					indent, '*', prompt,
 					indent, '*');
 		default:
 			;
@@ -437,7 +436,7 @@
 				}
 			} else {
 				if (!conf_cnt++)
-					printf(_("*\n* Restart config...\n*\n"));
+					printf("*\n* Restart config...\n*\n");
 				rootEntry = menu_get_parent_menu(menu);
 				conf(rootEntry);
 			}
@@ -498,10 +497,6 @@
 	const char *name, *defconfig_file = NULL /* gcc uninit */;
 	struct stat tmpstat;
 
-	setlocale(LC_ALL, "");
-	bindtextdomain(PACKAGE, LOCALEDIR);
-	textdomain(PACKAGE);
-
 	tty_stdio = isatty(0) && isatty(1);
 
 	while ((opt = getopt_long(ac, av, "s", long_opts, NULL)) != -1) {
@@ -559,7 +554,7 @@
 		}
 	}
 	if (ac == optind) {
-		fprintf(stderr, _("%s: Kconfig file missing\n"), av[0]);
+		fprintf(stderr, "%s: Kconfig file missing\n", av[0]);
 		conf_usage(progname);
 		exit(1);
 	}
@@ -569,12 +564,12 @@
 	if (sync_kconfig) {
 		name = conf_get_configname();
 		if (stat(name, &tmpstat)) {
-			fprintf(stderr, _("***\n"
+			fprintf(stderr, "***\n"
 				"*** Configuration file \"%s\" not found!\n"
 				"***\n"
 				"*** Please run some configurator (e.g. \"make oldconfig\" or\n"
 				"*** \"make menuconfig\" or \"make xconfig\").\n"
-				"***\n"), name);
+				"***\n", name);
 			exit(1);
 		}
 	}
@@ -585,9 +580,9 @@
 			defconfig_file = conf_get_default_confname();
 		if (conf_read(defconfig_file)) {
 			fprintf(stderr,
-				_("***\n"
+				"***\n"
 				  "*** Can't find default configuration \"%s\"!\n"
-				  "***\n"),
+				  "***\n",
 				defconfig_file);
 			exit(1);
 		}
@@ -611,7 +606,7 @@
 		if ((strcmp(name, "") != 0) && (strcmp(name, "1") != 0)) {
 			if (conf_read_simple(name, S_DEF_USER)) {
 				fprintf(stderr,
-					_("*** Can't read seed configuration \"%s\"!\n"),
+					"*** Can't read seed configuration \"%s\"!\n",
 					name);
 				exit(1);
 			}
@@ -628,7 +623,7 @@
 		if (conf_read_simple(name, S_DEF_USER) &&
 		    conf_read_simple("all.config", S_DEF_USER)) {
 			fprintf(stderr,
-				_("*** KCONFIG_ALLCONFIG set, but no \"%s\" or \"all.config\" file found\n"),
+				"*** KCONFIG_ALLCONFIG set, but no \"%s\" or \"all.config\" file found\n",
 				name);
 			exit(1);
 		}
@@ -642,7 +637,7 @@
 			name = getenv("KCONFIG_NOSILENTUPDATE");
 			if (name && *name) {
 				fprintf(stderr,
-					_("\n*** The configuration requires explicit update.\n\n"));
+					"\n*** The configuration requires explicit update.\n\n");
 				return 1;
 			}
 		}
@@ -694,22 +689,22 @@
 		 * All other commands are only used to generate a config.
 		 */
 		if (conf_get_changed() && conf_write(NULL)) {
-			fprintf(stderr, _("\n*** Error during writing of the configuration.\n\n"));
+			fprintf(stderr, "\n*** Error during writing of the configuration.\n\n");
 			exit(1);
 		}
 		if (conf_write_autoconf()) {
-			fprintf(stderr, _("\n*** Error during update of the configuration.\n\n"));
+			fprintf(stderr, "\n*** Error during update of the configuration.\n\n");
 			return 1;
 		}
 	} else if (input_mode == savedefconfig) {
 		if (conf_write_defconfig(defconfig_file)) {
-			fprintf(stderr, _("n*** Error while saving defconfig to: %s\n\n"),
+			fprintf(stderr, "n*** Error while saving defconfig to: %s\n\n",
 				defconfig_file);
 			return 1;
 		}
 	} else if (input_mode != listnewconfig) {
 		if (conf_write(NULL)) {
-			fprintf(stderr, _("\n*** Error during writing of the configuration.\n\n"));
+			fprintf(stderr, "\n*** Error during writing of the configuration.\n\n");
 			exit(1);
 		}
 	}
diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index df26c7b..39e2097 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -30,7 +30,7 @@
 static const char *conf_filename;
 static int conf_lineno, conf_warnings;
 
-const char conf_defname[] = "arch/$ARCH/defconfig";
+const char conf_defname[] = "arch/$(ARCH)/defconfig";
 
 static void conf_warning(const char *fmt, ...)
 {
@@ -81,39 +81,13 @@
 	return name ? name : "include/config/auto.conf";
 }
 
-static char *conf_expand_value(const char *in)
-{
-	struct symbol *sym;
-	const char *src;
-	static char res_value[SYMBOL_MAXLENGTH];
-	char *dst, name[SYMBOL_MAXLENGTH];
-
-	res_value[0] = 0;
-	dst = name;
-	while ((src = strchr(in, '$'))) {
-		strncat(res_value, in, src - in);
-		src++;
-		dst = name;
-		while (isalnum(*src) || *src == '_')
-			*dst++ = *src++;
-		*dst = 0;
-		sym = sym_lookup(name, 0);
-		sym_calc_value(sym);
-		strcat(res_value, sym_get_string_value(sym));
-		in = src;
-	}
-	strcat(res_value, in);
-
-	return res_value;
-}
-
 char *conf_get_default_confname(void)
 {
 	struct stat buf;
 	static char fullname[PATH_MAX+1];
 	char *env, *name;
 
-	name = conf_expand_value(conf_defname);
+	name = expand_string(conf_defname);
 	env = getenv(SRCTREE);
 	if (env) {
 		sprintf(fullname, "%s/%s", env, name);
@@ -274,10 +248,11 @@
 			if (expr_calc_value(prop->visible.expr) == no ||
 			    prop->expr->type != E_SYMBOL)
 				continue;
-			name = conf_expand_value(prop->expr->left.sym->name);
+			sym_calc_value(prop->expr->left.sym);
+			name = sym_get_string_value(prop->expr->left.sym);
 			in = zconf_fopen(name);
 			if (in) {
-				conf_message(_("using defaults found in %s"),
+				conf_message("using defaults found in %s",
 					 name);
 				goto load;
 			}
@@ -745,7 +720,7 @@
 	struct menu *menu;
 	const char *basename;
 	const char *str;
-	char dirname[PATH_MAX+1], tmpname[PATH_MAX+1], newname[PATH_MAX+1];
+	char dirname[PATH_MAX+1], tmpname[PATH_MAX+22], newname[PATH_MAX+8];
 	char *env;
 
 	dirname[0] = 0;
@@ -831,7 +806,7 @@
 			return 1;
 	}
 
-	conf_message(_("configuration written to %s"), newname);
+	conf_message("configuration written to %s", newname);
 
 	sym_set_change_count(0);
 
diff --git a/scripts/kconfig/gconf-cfg.sh b/scripts/kconfig/gconf-cfg.sh
new file mode 100755
index 0000000..533b3d8
--- /dev/null
+++ b/scripts/kconfig/gconf-cfg.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+PKG="gtk+-2.0 gmodule-2.0 libglade-2.0"
+
+if ! pkg-config --exists $PKG; then
+	echo >&2 "*"
+	echo >&2 "* Unable to find the GTK+ installation. Please make sure that"
+	echo >&2 "* the GTK+ 2.0 development package is correctly installed."
+	echo >&2 "* You need $PKG"
+	echo >&2 "*"
+	exit 1
+fi
+
+if ! pkg-config --atleast-version=2.0.0 gtk+-2.0; then
+	echo >&2 "*"
+	echo >&2 "* GTK+ is present but version >= 2.0.0 is required."
+	echo >&2 "*"
+	exit 1
+fi
+
+echo cflags=\"$(pkg-config --cflags $PKG)\"
+echo libs=\"$(pkg-config --libs $PKG)\"
diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c
index cfddddb..610c4ab 100644
--- a/scripts/kconfig/gconf.c
+++ b/scripts/kconfig/gconf.c
@@ -137,7 +137,7 @@
 
 	xml = glade_xml_new(glade_file, "window1", NULL);
 	if (!xml)
-		g_error(_("GUI loading failed !\n"));
+		g_error("GUI loading failed !\n");
 	glade_xml_signal_autoconnect(xml);
 
 	main_wnd = glade_xml_get_widget(xml, "window1");
@@ -233,7 +233,7 @@
 
 	column = gtk_tree_view_column_new();
 	gtk_tree_view_append_column(view, column);
-	gtk_tree_view_column_set_title(column, _("Options"));
+	gtk_tree_view_column_set_title(column, "Options");
 
 	renderer = gtk_cell_renderer_toggle_new();
 	gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column),
@@ -276,7 +276,7 @@
 
 	column = gtk_tree_view_column_new();
 	gtk_tree_view_append_column(view, column);
-	gtk_tree_view_column_set_title(column, _("Options"));
+	gtk_tree_view_column_set_title(column, "Options");
 
 	renderer = gtk_cell_renderer_pixbuf_new();
 	gtk_tree_view_column_pack_start(GTK_TREE_VIEW_COLUMN(column),
@@ -305,7 +305,7 @@
 
 	renderer = gtk_cell_renderer_text_new();
 	gtk_tree_view_insert_column_with_attributes(view, -1,
-						    _("Name"), renderer,
+						    "Name", renderer,
 						    "text", COL_NAME,
 						    "foreground-gdk",
 						    COL_COLOR, NULL);
@@ -329,7 +329,7 @@
 						    COL_COLOR, NULL);
 	renderer = gtk_cell_renderer_text_new();
 	gtk_tree_view_insert_column_with_attributes(view, -1,
-						    _("Value"), renderer,
+						    "Value", renderer,
 						    "text", COL_VALUE,
 						    "editable",
 						    COL_EDIT,
@@ -368,7 +368,7 @@
 {
 	GtkTextBuffer *buffer;
 	GtkTextIter start, end;
-	const char *prompt = _(menu_get_prompt(menu));
+	const char *prompt = menu_get_prompt(menu);
 	struct gstr help = str_new();
 
 	menu_get_ext_help(menu, &help);
@@ -422,7 +422,7 @@
 	if (!conf_get_changed())
 		return FALSE;
 
-	dialog = gtk_dialog_new_with_buttons(_("Warning !"),
+	dialog = gtk_dialog_new_with_buttons("Warning !",
 					     GTK_WINDOW(main_wnd),
 					     (GtkDialogFlags)
 					     (GTK_DIALOG_MODAL |
@@ -436,7 +436,7 @@
 	gtk_dialog_set_default_response(GTK_DIALOG(dialog),
 					GTK_RESPONSE_CANCEL);
 
-	label = gtk_label_new(_("\nSave configuration ?\n"));
+	label = gtk_label_new("\nSave configuration ?\n");
 	gtk_container_add(GTK_CONTAINER(GTK_DIALOG(dialog)->vbox), label);
 	gtk_widget_show(label);
 
@@ -496,7 +496,7 @@
 					     (user_data));
 
 	if (conf_read(fn))
-		text_insert_msg(_("Error"), _("Unable to load configuration !"));
+		text_insert_msg("Error", "Unable to load configuration !");
 	else
 		display_tree(&rootmenu);
 }
@@ -505,7 +505,7 @@
 {
 	GtkWidget *fs;
 
-	fs = gtk_file_selection_new(_("Load file..."));
+	fs = gtk_file_selection_new("Load file...");
 	g_signal_connect(GTK_OBJECT(GTK_FILE_SELECTION(fs)->ok_button),
 			 "clicked",
 			 G_CALLBACK(load_filename), (gpointer) fs);
@@ -524,7 +524,7 @@
 void on_save_activate(GtkMenuItem * menuitem, gpointer user_data)
 {
 	if (conf_write(NULL))
-		text_insert_msg(_("Error"), _("Unable to save configuration !"));
+		text_insert_msg("Error", "Unable to save configuration !");
 }
 
 
@@ -537,7 +537,7 @@
 					     (user_data));
 
 	if (conf_write(fn))
-		text_insert_msg(_("Error"), _("Unable to save configuration !"));
+		text_insert_msg("Error", "Unable to save configuration !");
 
 	gtk_widget_destroy(GTK_WIDGET(user_data));
 }
@@ -546,7 +546,7 @@
 {
 	GtkWidget *fs;
 
-	fs = gtk_file_selection_new(_("Save file as..."));
+	fs = gtk_file_selection_new("Save file as...");
 	g_signal_connect(GTK_OBJECT(GTK_FILE_SELECTION(fs)->ok_button),
 			 "clicked",
 			 G_CALLBACK(store_filename), (gpointer) fs);
@@ -639,7 +639,7 @@
 void on_introduction1_activate(GtkMenuItem * menuitem, gpointer user_data)
 {
 	GtkWidget *dialog;
-	const gchar *intro_text = _(
+	const gchar *intro_text = 
 	    "Welcome to gkc, the GTK+ graphical configuration tool\n"
 	    "For each option, a blank box indicates the feature is disabled, a\n"
 	    "check indicates it is enabled, and a dot indicates that it is to\n"
@@ -654,7 +654,7 @@
 	    "option.\n"
 	    "\n"
 	    "Toggling Show Debug Info under the Options menu will show \n"
-	    "the dependencies, which you can then match by examining other options.");
+	    "the dependencies, which you can then match by examining other options.";
 
 	dialog = gtk_message_dialog_new(GTK_WINDOW(main_wnd),
 					GTK_DIALOG_DESTROY_WITH_PARENT,
@@ -671,8 +671,8 @@
 {
 	GtkWidget *dialog;
 	const gchar *about_text =
-	    _("gkc is copyright (c) 2002 Romain Lievin <roms@lpg.ticalc.org>.\n"
-	      "Based on the source code from Roman Zippel.\n");
+	    "gkc is copyright (c) 2002 Romain Lievin <roms@lpg.ticalc.org>.\n"
+	      "Based on the source code from Roman Zippel.\n";
 
 	dialog = gtk_message_dialog_new(GTK_WINDOW(main_wnd),
 					GTK_DIALOG_DESTROY_WITH_PARENT,
@@ -689,9 +689,9 @@
 {
 	GtkWidget *dialog;
 	const gchar *license_text =
-	    _("gkc is released under the terms of the GNU GPL v2.\n"
+	    "gkc is released under the terms of the GNU GPL v2.\n"
 	      "For more information, please see the source code or\n"
-	      "visit http://www.fsf.org/licenses/licenses.html\n");
+	      "visit http://www.fsf.org/licenses/licenses.html\n";
 
 	dialog = gtk_message_dialog_new(GTK_WINDOW(main_wnd),
 					GTK_DIALOG_DESTROY_WITH_PARENT,
@@ -1049,7 +1049,7 @@
 	bzero(row, sizeof(row));
 
 	row[COL_OPTION] =
-	    g_strdup_printf("%s %s", _(menu_get_prompt(menu)),
+	    g_strdup_printf("%s %s", menu_get_prompt(menu),
 			    sym && !sym_has_value(sym) ? "(NEW)" : "");
 
 	if (opt_mode == OPT_ALL && !menu_is_visible(menu))
@@ -1102,7 +1102,7 @@
 
 		if (def_menu)
 			row[COL_VALUE] =
-			    g_strdup(_(menu_get_prompt(def_menu)));
+			    g_strdup(menu_get_prompt(def_menu));
 	}
 	if (sym->flags & SYMBOL_CHOICEVAL)
 		row[COL_BTNRAD] = GINT_TO_POINTER(TRUE);
@@ -1447,10 +1447,6 @@
 	char *env;
 	gchar *glade_file;
 
-	bindtextdomain(PACKAGE, LOCALEDIR);
-	bind_textdomain_codeset(PACKAGE, "UTF-8");
-	textdomain(PACKAGE);
-
 	/* GTK stuffs */
 	gtk_set_locale();
 	gtk_init(&ac, &av);
diff --git a/scripts/kconfig/kconf_id.c b/scripts/kconfig/kconf_id.c
index 3ea9c5f..b3e0ea0 100644
--- a/scripts/kconfig/kconf_id.c
+++ b/scripts/kconfig/kconf_id.c
@@ -32,7 +32,6 @@
 	{ "on",			T_ON,			TF_PARAM },
 	{ "modules",		T_OPT_MODULES,		TF_OPTION },
 	{ "defconfig_list",	T_OPT_DEFCONFIG_LIST,	TF_OPTION },
-	{ "env",		T_OPT_ENV,		TF_OPTION },
 	{ "allnoconfig_y",	T_OPT_ALLNOCONFIG_Y,	TF_OPTION },
 };
 
diff --git a/scripts/kconfig/kxgettext.c b/scripts/kconfig/kxgettext.c
deleted file mode 100644
index 240880a..0000000
--- a/scripts/kconfig/kxgettext.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>, 2005
- *
- * Released under the terms of the GNU GPL v2.0
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "lkc.h"
-
-static char *escape(const char* text, char *bf, int len)
-{
-	char *bfp = bf;
-	int multiline = strchr(text, '\n') != NULL;
-	int eol = 0;
-	int textlen = strlen(text);
-
-	if ((textlen > 0) && (text[textlen-1] == '\n'))
-		eol = 1;
-
-	*bfp++ = '"';
-	--len;
-
-	if (multiline) {
-		*bfp++ = '"';
-		*bfp++ = '\n';
-		*bfp++ = '"';
-		len -= 3;
-	}
-
-	while (*text != '\0' && len > 1) {
-		if (*text == '"')
-			*bfp++ = '\\';
-		else if (*text == '\n') {
-			*bfp++ = '\\';
-			*bfp++ = 'n';
-			*bfp++ = '"';
-			*bfp++ = '\n';
-			*bfp++ = '"';
-			len -= 5;
-			++text;
-			goto next;
-		}
-		else if (*text == '\\') {
-			*bfp++ = '\\';
-			len--;
-		}
-		*bfp++ = *text++;
-next:
-		--len;
-	}
-
-	if (multiline && eol)
-		bfp -= 3;
-
-	*bfp++ = '"';
-	*bfp = '\0';
-
-	return bf;
-}
-
-struct file_line {
-	struct file_line *next;
-	const char *file;
-	int lineno;
-};
-
-static struct file_line *file_line__new(const char *file, int lineno)
-{
-	struct file_line *self = malloc(sizeof(*self));
-
-	if (self == NULL)
-		goto out;
-
-	self->file   = file;
-	self->lineno = lineno;
-	self->next   = NULL;
-out:
-	return self;
-}
-
-struct message {
-	const char	 *msg;
-	const char	 *option;
-	struct message	 *next;
-	struct file_line *files;
-};
-
-static struct message *message__list;
-
-static struct message *message__new(const char *msg, char *option,
-				    const char *file, int lineno)
-{
-	struct message *self = malloc(sizeof(*self));
-
-	if (self == NULL)
-		goto out;
-
-	self->files = file_line__new(file, lineno);
-	if (self->files == NULL)
-		goto out_fail;
-
-	self->msg = xstrdup(msg);
-	if (self->msg == NULL)
-		goto out_fail_msg;
-
-	self->option = option;
-	self->next = NULL;
-out:
-	return self;
-out_fail_msg:
-	free(self->files);
-out_fail:
-	free(self);
-	self = NULL;
-	goto out;
-}
-
-static struct message *mesage__find(const char *msg)
-{
-	struct message *m = message__list;
-
-	while (m != NULL) {
-		if (strcmp(m->msg, msg) == 0)
-			break;
-		m = m->next;
-	}
-
-	return m;
-}
-
-static int message__add_file_line(struct message *self, const char *file,
-				  int lineno)
-{
-	int rc = -1;
-	struct file_line *fl = file_line__new(file, lineno);
-
-	if (fl == NULL)
-		goto out;
-
-	fl->next    = self->files;
-	self->files = fl;
-	rc = 0;
-out:
-	return rc;
-}
-
-static int message__add(const char *msg, char *option, const char *file,
-			int lineno)
-{
-	int rc = 0;
-	char bf[16384];
-	char *escaped = escape(msg, bf, sizeof(bf));
-	struct message *m = mesage__find(escaped);
-
-	if (m != NULL)
-		rc = message__add_file_line(m, file, lineno);
-	else {
-		m = message__new(escaped, option, file, lineno);
-
-		if (m != NULL) {
-			m->next	      = message__list;
-			message__list = m;
-		} else
-			rc = -1;
-	}
-	return rc;
-}
-
-static void menu_build_message_list(struct menu *menu)
-{
-	struct menu *child;
-
-	message__add(menu_get_prompt(menu), NULL,
-		     menu->file == NULL ? "Root Menu" : menu->file->name,
-		     menu->lineno);
-
-	if (menu->sym != NULL && menu_has_help(menu))
-		message__add(menu_get_help(menu), menu->sym->name,
-			     menu->file == NULL ? "Root Menu" : menu->file->name,
-			     menu->lineno);
-
-	for (child = menu->list; child != NULL; child = child->next)
-		if (child->prompt != NULL)
-			menu_build_message_list(child);
-}
-
-static void message__print_file_lineno(struct message *self)
-{
-	struct file_line *fl = self->files;
-
-	putchar('\n');
-	if (self->option != NULL)
-		printf("# %s:00000\n", self->option);
-
-	printf("#: %s:%d", fl->file, fl->lineno);
-	fl = fl->next;
-
-	while (fl != NULL) {
-		printf(", %s:%d", fl->file, fl->lineno);
-		fl = fl->next;
-	}
-
-	putchar('\n');
-}
-
-static void message__print_gettext_msgid_msgstr(struct message *self)
-{
-	message__print_file_lineno(self);
-
-	printf("msgid %s\n"
-	       "msgstr \"\"\n", self->msg);
-}
-
-static void menu__xgettext(void)
-{
-	struct message *m = message__list;
-
-	while (m != NULL) {
-		/* skip empty lines ("") */
-		if (strlen(m->msg) > sizeof("\"\""))
-			message__print_gettext_msgid_msgstr(m);
-		m = m->next;
-	}
-}
-
-int main(int ac, char **av)
-{
-	conf_parse(av[1]);
-
-	menu_build_message_list(menu_get_root_menu(NULL));
-	menu__xgettext();
-	return 0;
-}
diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h
index f4394af..ed3ff88 100644
--- a/scripts/kconfig/lkc.h
+++ b/scripts/kconfig/lkc.h
@@ -8,15 +8,6 @@
 
 #include "expr.h"
 
-#ifndef KBUILD_NO_NLS
-# include <libintl.h>
-#else
-static inline const char *gettext(const char *txt) { return txt; }
-static inline void textdomain(const char *domainname) {}
-static inline void bindtextdomain(const char *name, const char *dir) {}
-static inline char *bind_textdomain_codeset(const char *dn, char *c) { return c; }
-#endif
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -29,11 +20,6 @@
 #define PACKAGE "linux"
 #endif
 
-#define LOCALEDIR "/usr/share/locale"
-
-#define _(text) gettext(text)
-#define N_(text) (text)
-
 #ifndef CONFIG_
 #define CONFIG_ "CONFIG_"
 #endif
@@ -58,7 +44,6 @@
 
 #define T_OPT_MODULES		1
 #define T_OPT_DEFCONFIG_LIST	2
-#define T_OPT_ENV		3
 #define T_OPT_ALLNOCONFIG_Y	4
 
 struct kconf_id {
@@ -117,6 +102,7 @@
 void *xcalloc(size_t nmemb, size_t size);
 void *xrealloc(void *p, size_t size);
 char *xstrdup(const char *s);
+char *xstrndup(const char *s, size_t n);
 
 struct gstr {
 	size_t len;
@@ -134,9 +120,6 @@
 const char *str_get(struct gstr *gs);
 
 /* symbol.c */
-extern struct expr *sym_env_list;
-
-void sym_init(void);
 void sym_clear_all_valid(void);
 struct symbol *sym_choice_default(struct symbol *sym);
 const char *sym_get_string_default(struct symbol *sym);
diff --git a/scripts/kconfig/lkc_proto.h b/scripts/kconfig/lkc_proto.h
index 9dc8abf..a8b7a33 100644
--- a/scripts/kconfig/lkc_proto.h
+++ b/scripts/kconfig/lkc_proto.h
@@ -31,7 +31,6 @@
 
 struct symbol * sym_lookup(const char *name, int flags);
 struct symbol * sym_find(const char *name);
-char *sym_expand_string_value(const char *in);
 const char * sym_escape_string_value(const char *in);
 struct symbol ** sym_re_search(const char *pattern);
 const char * sym_type_name(enum symbol_type type);
@@ -49,5 +48,19 @@
 
 const char * prop_get_type_name(enum prop_type type);
 
+/* preprocess.c */
+enum variable_flavor {
+	VAR_SIMPLE,
+	VAR_RECURSIVE,
+	VAR_APPEND,
+};
+void env_write_dep(FILE *f, const char *auto_conf_name);
+void variable_add(const char *name, const char *value,
+		  enum variable_flavor flavor);
+void variable_all_del(void);
+char *expand_string(const char *in);
+char *expand_dollar(const char **str);
+char *expand_one_token(const char **str);
+
 /* expr.c */
 void expr_print(struct expr *e, void (*fn)(void *, struct symbol *, const char *), void *data, int prevtoken);
diff --git a/scripts/kconfig/lxdialog/check-lxdialog.sh b/scripts/kconfig/lxdialog/check-lxdialog.sh
deleted file mode 100755
index 6c0bcd9..0000000
--- a/scripts/kconfig/lxdialog/check-lxdialog.sh
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Check ncurses compatibility
-
-# What library to link
-ldflags()
-{
-	pkg-config --libs ncursesw 2>/dev/null && exit
-	pkg-config --libs ncurses 2>/dev/null && exit
-	for ext in so a dll.a dylib ; do
-		for lib in ncursesw ncurses curses ; do
-			$cc -print-file-name=lib${lib}.${ext} | grep -q /
-			if [ $? -eq 0 ]; then
-				echo "-l${lib}"
-				exit
-			fi
-		done
-	done
-	exit 1
-}
-
-# Where is ncurses.h?
-ccflags()
-{
-	if pkg-config --cflags ncursesw 2>/dev/null; then
-		echo '-DCURSES_LOC="<ncurses.h>" -DNCURSES_WIDECHAR=1'
-	elif pkg-config --cflags ncurses 2>/dev/null; then
-		echo '-DCURSES_LOC="<ncurses.h>"'
-	elif [ -f /usr/include/ncursesw/curses.h ]; then
-		echo '-I/usr/include/ncursesw -DCURSES_LOC="<curses.h>"'
-		echo ' -DNCURSES_WIDECHAR=1'
-	elif [ -f /usr/include/ncurses/ncurses.h ]; then
-		echo '-I/usr/include/ncurses -DCURSES_LOC="<ncurses.h>"'
-	elif [ -f /usr/include/ncurses/curses.h ]; then
-		echo '-I/usr/include/ncurses -DCURSES_LOC="<curses.h>"'
-	elif [ -f /usr/include/ncurses.h ]; then
-		echo '-DCURSES_LOC="<ncurses.h>"'
-	else
-		echo '-DCURSES_LOC="<curses.h>"'
-	fi
-}
-
-# Temp file, try to clean up after us
-tmp=.lxdialog.tmp
-trap "rm -f $tmp" 0 1 2 3 15
-
-# Check if we can link to ncurses
-check() {
-        $cc -x c - -o $tmp 2>/dev/null <<'EOF'
-#include CURSES_LOC
-main() {}
-EOF
-	if [ $? != 0 ]; then
-	    echo " *** Unable to find the ncurses libraries or the"       1>&2
-	    echo " *** required header files."                            1>&2
-	    echo " *** 'make menuconfig' requires the ncurses libraries." 1>&2
-	    echo " *** "                                                  1>&2
-	    echo " *** Install ncurses (ncurses-devel or libncurses-dev " 1>&2
-	    echo " *** depending on your distribution) and try again."    1>&2
-	    echo " *** "                                                  1>&2
-	    exit 1
-	fi
-}
-
-usage() {
-	printf "Usage: $0 [-check compiler options|-ccflags|-ldflags compiler options]\n"
-}
-
-if [ $# -eq 0 ]; then
-	usage
-	exit 1
-fi
-
-cc=""
-case "$1" in
-	"-check")
-		shift
-		cc="$@"
-		check
-		;;
-	"-ccflags")
-		ccflags
-		;;
-	"-ldflags")
-		shift
-		cc="$@"
-		ldflags
-		;;
-	"*")
-		usage
-		exit 1
-		;;
-esac
diff --git a/scripts/kconfig/lxdialog/checklist.c b/scripts/kconfig/lxdialog/checklist.c
index 8d016fa..2e96323 100644
--- a/scripts/kconfig/lxdialog/checklist.c
+++ b/scripts/kconfig/lxdialog/checklist.c
@@ -103,8 +103,8 @@
 	int x = width / 2 - 11;
 	int y = height - 2;
 
-	print_button(dialog, gettext("Select"), y, x, selected == 0);
-	print_button(dialog, gettext(" Help "), y, x + 14, selected == 1);
+	print_button(dialog, "Select", y, x, selected == 0);
+	print_button(dialog, " Help ", y, x + 14, selected == 1);
 
 	wmove(dialog, y, x + 1 + 14 * selected);
 	wrefresh(dialog);
diff --git a/scripts/kconfig/lxdialog/dialog.h b/scripts/kconfig/lxdialog/dialog.h
index fcffd5b..0b00be5 100644
--- a/scripts/kconfig/lxdialog/dialog.h
+++ b/scripts/kconfig/lxdialog/dialog.h
@@ -26,16 +26,10 @@
 #include <string.h>
 #include <stdbool.h>
 
-#ifndef KBUILD_NO_NLS
-# include <libintl.h>
-#else
-# define gettext(Msgid) ((const char *) (Msgid))
-#endif
-
 #ifdef __sun__
 #define CURS_MACROS
 #endif
-#include CURSES_LOC
+#include <ncurses.h>
 
 /*
  * Colors in ncurses 1.9.9e do not work properly since foreground and
diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c
index d58de1d..fe82ff6 100644
--- a/scripts/kconfig/lxdialog/inputbox.c
+++ b/scripts/kconfig/lxdialog/inputbox.c
@@ -31,8 +31,8 @@
 	int x = width / 2 - 11;
 	int y = height - 2;
 
-	print_button(dialog, gettext("  Ok  "), y, x, selected == 0);
-	print_button(dialog, gettext(" Help "), y, x + 14, selected == 1);
+	print_button(dialog, "  Ok  ", y, x, selected == 0);
+	print_button(dialog, " Help ", y, x + 14, selected == 1);
 
 	wmove(dialog, y, x + 1 + 14 * selected);
 	wrefresh(dialog);
diff --git a/scripts/kconfig/lxdialog/menubox.c b/scripts/kconfig/lxdialog/menubox.c
index 11ae9ad7..d70cab3 100644
--- a/scripts/kconfig/lxdialog/menubox.c
+++ b/scripts/kconfig/lxdialog/menubox.c
@@ -157,11 +157,11 @@
 	int x = width / 2 - 28;
 	int y = height - 2;
 
-	print_button(win, gettext("Select"), y, x, selected == 0);
-	print_button(win, gettext(" Exit "), y, x + 12, selected == 1);
-	print_button(win, gettext(" Help "), y, x + 24, selected == 2);
-	print_button(win, gettext(" Save "), y, x + 36, selected == 3);
-	print_button(win, gettext(" Load "), y, x + 48, selected == 4);
+	print_button(win, "Select", y, x, selected == 0);
+	print_button(win, " Exit ", y, x + 12, selected == 1);
+	print_button(win, " Help ", y, x + 24, selected == 2);
+	print_button(win, " Save ", y, x + 36, selected == 3);
+	print_button(win, " Load ", y, x + 48, selected == 4);
 
 	wmove(win, y, x + 1 + 12 * selected);
 	wrefresh(win);
diff --git a/scripts/kconfig/lxdialog/textbox.c b/scripts/kconfig/lxdialog/textbox.c
index 1773319..88d2818 100644
--- a/scripts/kconfig/lxdialog/textbox.c
+++ b/scripts/kconfig/lxdialog/textbox.c
@@ -129,7 +129,7 @@
 
 	print_title(dialog, title, width);
 
-	print_button(dialog, gettext(" Exit "), height - 2, width / 2 - 4, TRUE);
+	print_button(dialog, " Exit ", height - 2, width / 2 - 4, TRUE);
 	wnoutrefresh(dialog);
 	getyx(dialog, cur_y, cur_x);	/* Save cursor position */
 
diff --git a/scripts/kconfig/lxdialog/yesno.c b/scripts/kconfig/lxdialog/yesno.c
index 676fb2f8..cd1223c 100644
--- a/scripts/kconfig/lxdialog/yesno.c
+++ b/scripts/kconfig/lxdialog/yesno.c
@@ -29,8 +29,8 @@
 	int x = width / 2 - 10;
 	int y = height - 2;
 
-	print_button(dialog, gettext(" Yes "), y, x, selected == 0);
-	print_button(dialog, gettext("  No  "), y, x + 13, selected == 1);
+	print_button(dialog, " Yes ", y, x, selected == 0);
+	print_button(dialog, "  No  ", y, x + 13, selected == 1);
 
 	wmove(dialog, y, x + 1 + 13 * selected);
 	wrefresh(dialog);
diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh
new file mode 100755
index 0000000..e6f9fac
--- /dev/null
+++ b/scripts/kconfig/mconf-cfg.sh
@@ -0,0 +1,44 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+PKG="ncursesw"
+PKG2="ncurses"
+
+if pkg-config --exists $PKG; then
+	echo cflags=\"$(pkg-config --cflags $PKG)\"
+	echo libs=\"$(pkg-config --libs $PKG)\"
+	exit 0
+fi
+
+if pkg-config --exists $PKG2; then
+	echo cflags=\"$(pkg-config --cflags $PKG2)\"
+	echo libs=\"$(pkg-config --libs $PKG2)\"
+	exit 0
+fi
+
+# Unfortunately, some distributions (e.g. openSUSE) cannot find ncurses
+# by pkg-config.
+if [ -f /usr/include/ncursesw/ncurses.h ]; then
+	echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncursesw\"
+	echo libs=\"-lncursesw\"
+	exit 0
+fi
+
+if [ -f /usr/include/ncurses/ncurses.h ]; then
+	echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncurses\"
+	echo libs=\"-lncurses\"
+	exit 0
+fi
+
+if [ -f /usr/include/ncurses.h ]; then
+	echo cflags=\"-D_GNU_SOURCE\"
+	echo libs=\"-lncurses\"
+	exit 0
+fi
+
+echo >&2 "*"
+echo >&2 "* Unable to find the ncurses package."
+echo >&2 "* Install ncurses (ncurses-devel or libncurses-dev"
+echo >&2 "* depending on your distribution)."
+echo >&2 "*"
+exit 1
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index c829be8b..5294ed1 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -17,12 +17,11 @@
 #include <string.h>
 #include <signal.h>
 #include <unistd.h>
-#include <locale.h>
 
 #include "lkc.h"
 #include "lxdialog/dialog.h"
 
-static const char mconf_readme[] = N_(
+static const char mconf_readme[] =
 "Overview\n"
 "--------\n"
 "This interface lets you select features and parameters for the build.\n"
@@ -171,37 +170,37 @@
 " blackbg    => selects a color scheme with black background\n"
 " classic    => theme with blue background. The classic look\n"
 " bluetitle  => an LCD friendly version of classic. (default)\n"
-"\n"),
-menu_instructions[] = N_(
+"\n",
+menu_instructions[] =
 	"Arrow keys navigate the menu.  "
 	"<Enter> selects submenus ---> (or empty submenus ----).  "
 	"Highlighted letters are hotkeys.  "
 	"Pressing <Y> includes, <N> excludes, <M> modularizes features.  "
 	"Press <Esc><Esc> to exit, <?> for Help, </> for Search.  "
-	"Legend: [*] built-in  [ ] excluded  <M> module  < > module capable"),
-radiolist_instructions[] = N_(
+	"Legend: [*] built-in  [ ] excluded  <M> module  < > module capable",
+radiolist_instructions[] =
 	"Use the arrow keys to navigate this window or "
 	"press the hotkey of the item you wish to select "
 	"followed by the <SPACE BAR>. "
-	"Press <?> for additional information about this option."),
-inputbox_instructions_int[] = N_(
+	"Press <?> for additional information about this option.",
+inputbox_instructions_int[] =
 	"Please enter a decimal value. "
 	"Fractions will not be accepted.  "
-	"Use the <TAB> key to move from the input field to the buttons below it."),
-inputbox_instructions_hex[] = N_(
+	"Use the <TAB> key to move from the input field to the buttons below it.",
+inputbox_instructions_hex[] =
 	"Please enter a hexadecimal value. "
-	"Use the <TAB> key to move from the input field to the buttons below it."),
-inputbox_instructions_string[] = N_(
+	"Use the <TAB> key to move from the input field to the buttons below it.",
+inputbox_instructions_string[] =
 	"Please enter a string value. "
-	"Use the <TAB> key to move from the input field to the buttons below it."),
-setmod_text[] = N_(
+	"Use the <TAB> key to move from the input field to the buttons below it.",
+setmod_text[] =
 	"This feature depends on another which has been configured as a module.\n"
-	"As a result, this feature will be built as a module."),
-load_config_text[] = N_(
+	"As a result, this feature will be built as a module.",
+load_config_text[] =
 	"Enter the name of the configuration file you wish to load.  "
 	"Accept the name shown to restore the configuration you "
-	"last retrieved.  Leave blank to abort."),
-load_config_help[] = N_(
+	"last retrieved.  Leave blank to abort.",
+load_config_help[] =
 	"\n"
 	"For various reasons, one may wish to keep several different\n"
 	"configurations available on a single machine.\n"
@@ -211,11 +210,11 @@
 	"configuration.\n"
 	"\n"
 	"If you are uncertain, then you have probably never used alternate\n"
-	"configuration files. You should therefore leave this blank to abort.\n"),
-save_config_text[] = N_(
+	"configuration files. You should therefore leave this blank to abort.\n",
+save_config_text[] =
 	"Enter a filename to which this configuration should be saved "
-	"as an alternate.  Leave blank to abort."),
-save_config_help[] = N_(
+	"as an alternate.  Leave blank to abort.",
+save_config_help[] =
 	"\n"
 	"For various reasons, one may wish to keep different configurations\n"
 	"available on a single machine.\n"
@@ -225,8 +224,8 @@
 	"configuration options you have selected at that time.\n"
 	"\n"
 	"If you are uncertain what all this means then you should probably\n"
-	"leave this blank.\n"),
-search_help[] = N_(
+	"leave this blank.\n",
+search_help[] =
 	"\n"
 	"Search for symbols and display their relations.\n"
 	"Regular expressions are allowed.\n"
@@ -271,7 +270,7 @@
 	"Examples: USB	=> find all symbols containing USB\n"
 	"          ^USB => find all symbols starting with USB\n"
 	"          USB$ => find all symbols ending with USB\n"
-	"\n");
+	"\n";
 
 static int indent;
 static struct menu *current_menu;
@@ -400,19 +399,19 @@
 	struct subtitle_part stpart;
 
 	title = str_new();
-	str_printf( &title, _("Enter (sub)string or regexp to search for "
-			      "(with or without \"%s\")"), CONFIG_);
+	str_printf( &title, "Enter (sub)string or regexp to search for "
+			      "(with or without \"%s\")", CONFIG_);
 
 again:
 	dialog_clear();
-	dres = dialog_inputbox(_("Search Configuration Parameter"),
+	dres = dialog_inputbox("Search Configuration Parameter",
 			      str_get(&title),
 			      10, 75, "");
 	switch (dres) {
 	case 0:
 		break;
 	case 1:
-		show_helptext(_("Search Configuration"), search_help);
+		show_helptext("Search Configuration", search_help);
 		goto again;
 	default:
 		str_free(&title);
@@ -443,7 +442,7 @@
 
 		res = get_relations_str(sym_arr, &head);
 		set_subtitle();
-		dres = show_textbox_ext(_("Search Results"), (char *)
+		dres = show_textbox_ext("Search Results", (char *)
 					str_get(&res), 0, 0, keys, &vscroll,
 					&hscroll, &update_text, (void *)
 					&data);
@@ -491,7 +490,7 @@
 			switch (prop->type) {
 			case P_MENU:
 				child_count++;
-				prompt = _(prompt);
+				prompt = prompt;
 				if (single_menu_mode) {
 					item_make("%s%*c%s",
 						  menu->data ? "-->" : "++>",
@@ -508,7 +507,7 @@
 			case P_COMMENT:
 				if (prompt) {
 					child_count++;
-					item_make("   %*c*** %s ***", indent + 1, ' ', _(prompt));
+					item_make("   %*c*** %s ***", indent + 1, ' ', prompt);
 					item_set_tag(':');
 					item_set_data(menu);
 				}
@@ -516,7 +515,7 @@
 			default:
 				if (prompt) {
 					child_count++;
-					item_make("---%*c%s", indent + 1, ' ', _(prompt));
+					item_make("---%*c%s", indent + 1, ' ', prompt);
 					item_set_tag(':');
 					item_set_data(menu);
 				}
@@ -560,10 +559,10 @@
 			item_set_data(menu);
 		}
 
-		item_add_str("%*c%s", indent + 1, ' ', _(menu_get_prompt(menu)));
+		item_add_str("%*c%s", indent + 1, ' ', menu_get_prompt(menu));
 		if (val == yes) {
 			if (def_menu) {
-				item_add_str(" (%s)", _(menu_get_prompt(def_menu)));
+				item_add_str(" (%s)", menu_get_prompt(def_menu));
 				item_add_str("  --->");
 				if (def_menu->list) {
 					indent += 2;
@@ -575,7 +574,7 @@
 		}
 	} else {
 		if (menu == current_menu) {
-			item_make("---%*c%s", indent + 1, ' ', _(menu_get_prompt(menu)));
+			item_make("---%*c%s", indent + 1, ' ', menu_get_prompt(menu));
 			item_set_tag(':');
 			item_set_data(menu);
 			goto conf_childs;
@@ -618,17 +617,17 @@
 				tmp = indent - tmp + 4;
 				if (tmp < 0)
 					tmp = 0;
-				item_add_str("%*c%s%s", tmp, ' ', _(menu_get_prompt(menu)),
+				item_add_str("%*c%s%s", tmp, ' ', menu_get_prompt(menu),
 					     (sym_has_value(sym) || !sym_is_changable(sym)) ?
-					     "" : _(" (NEW)"));
+					     "" : " (NEW)");
 				item_set_tag('s');
 				item_set_data(menu);
 				goto conf_childs;
 			}
 		}
-		item_add_str("%*c%s%s", indent + 1, ' ', _(menu_get_prompt(menu)),
+		item_add_str("%*c%s%s", indent + 1, ' ', menu_get_prompt(menu),
 			  (sym_has_value(sym) || !sym_is_changable(sym)) ?
-			  "" : _(" (NEW)"));
+			  "" : " (NEW)");
 		if (menu->prompt->type == P_MENU) {
 			item_add_str("  %s", menu_is_empty(menu) ? "----" : "--->");
 			return;
@@ -665,8 +664,8 @@
 			break;
 		set_subtitle();
 		dialog_clear();
-		res = dialog_menu(prompt ? _(prompt) : _("Main Menu"),
-				  _(menu_instructions),
+		res = dialog_menu(prompt ? prompt : "Main Menu",
+				  menu_instructions,
 				  active_menu, &s_scroll);
 		if (res == 1 || res == KEY_ESC || res == -ERRDISPLAYTOOSMALL)
 			break;
@@ -708,7 +707,7 @@
 				show_help(submenu);
 			else {
 				reset_subtitle();
-				show_helptext(_("README"), _(mconf_readme));
+				show_helptext("README", mconf_readme);
 			}
 			break;
 		case 3:
@@ -793,13 +792,13 @@
 	help.max_width = getmaxx(stdscr) - 10;
 	menu_get_ext_help(menu, &help);
 
-	show_helptext(_(menu_get_prompt(menu)), str_get(&help));
+	show_helptext(menu_get_prompt(menu), str_get(&help));
 	str_free(&help);
 }
 
 static void conf_choice(struct menu *menu)
 {
-	const char *prompt = _(menu_get_prompt(menu));
+	const char *prompt = menu_get_prompt(menu);
 	struct menu *child;
 	struct symbol *active;
 
@@ -814,9 +813,9 @@
 			if (!menu_is_visible(child))
 				continue;
 			if (child->sym)
-				item_make("%s", _(menu_get_prompt(child)));
+				item_make("%s", menu_get_prompt(child));
 			else {
-				item_make("*** %s ***", _(menu_get_prompt(child)));
+				item_make("*** %s ***", menu_get_prompt(child));
 				item_set_tag(':');
 			}
 			item_set_data(child);
@@ -826,8 +825,8 @@
 				item_set_tag('X');
 		}
 		dialog_clear();
-		res = dialog_checklist(prompt ? _(prompt) : _("Main Menu"),
-					_(radiolist_instructions),
+		res = dialog_checklist(prompt ? prompt : "Main Menu",
+					radiolist_instructions,
 					MENUBOX_HEIGTH_MIN,
 					MENUBOX_WIDTH_MIN,
 					CHECKLIST_HEIGTH_MIN);
@@ -868,26 +867,26 @@
 
 		switch (sym_get_type(menu->sym)) {
 		case S_INT:
-			heading = _(inputbox_instructions_int);
+			heading = inputbox_instructions_int;
 			break;
 		case S_HEX:
-			heading = _(inputbox_instructions_hex);
+			heading = inputbox_instructions_hex;
 			break;
 		case S_STRING:
-			heading = _(inputbox_instructions_string);
+			heading = inputbox_instructions_string;
 			break;
 		default:
-			heading = _("Internal mconf error!");
+			heading = "Internal mconf error!";
 		}
 		dialog_clear();
-		res = dialog_inputbox(prompt ? _(prompt) : _("Main Menu"),
+		res = dialog_inputbox(prompt ? prompt : "Main Menu",
 				      heading, 10, 75,
 				      sym_get_string_value(menu->sym));
 		switch (res) {
 		case 0:
 			if (sym_set_string_value(menu->sym, dialog_input_result))
 				return;
-			show_textbox(NULL, _("You have made an invalid entry."), 5, 43);
+			show_textbox(NULL, "You have made an invalid entry.", 5, 43);
 			break;
 		case 1:
 			show_help(menu);
@@ -915,10 +914,10 @@
 				sym_set_change_count(1);
 				return;
 			}
-			show_textbox(NULL, _("File does not exist!"), 5, 38);
+			show_textbox(NULL, "File does not exist!", 5, 38);
 			break;
 		case 1:
-			show_helptext(_("Load Alternate Configuration"), load_config_help);
+			show_helptext("Load Alternate Configuration", load_config_help);
 			break;
 		case KEY_ESC:
 			return;
@@ -941,10 +940,10 @@
 				set_config_filename(dialog_input_result);
 				return;
 			}
-			show_textbox(NULL, _("Can't create file!  Probably a nonexistent directory."), 5, 60);
+			show_textbox(NULL, "Can't create file!  Probably a nonexistent directory.", 5, 60);
 			break;
 		case 1:
-			show_helptext(_("Save Alternate Configuration"), save_config_help);
+			show_helptext("Save Alternate Configuration", save_config_help);
 			break;
 		case KEY_ESC:
 			return;
@@ -961,8 +960,8 @@
 	dialog_clear();
 	if (conf_get_changed())
 		res = dialog_yesno(NULL,
-				   _("Do you wish to save your new configuration?\n"
-				     "(Press <ESC><ESC> to continue kernel configuration.)"),
+				   "Do you wish to save your new configuration?\n"
+				     "(Press <ESC><ESC> to continue kernel configuration.)",
 				   6, 60);
 	else
 		res = -1;
@@ -972,26 +971,26 @@
 	switch (res) {
 	case 0:
 		if (conf_write(filename)) {
-			fprintf(stderr, _("\n\n"
+			fprintf(stderr, "\n\n"
 					  "Error while writing of the configuration.\n"
 					  "Your configuration changes were NOT saved."
-					  "\n\n"));
+					  "\n\n");
 			return 1;
 		}
 		/* fall through */
 	case -1:
 		if (!silent)
-			printf(_("\n\n"
+			printf("\n\n"
 				 "*** End of the configuration.\n"
 				 "*** Execute 'make' to start the build or try 'make help'."
-				 "\n\n"));
+				 "\n\n");
 		res = 0;
 		break;
 	default:
 		if (!silent)
-			fprintf(stderr, _("\n\n"
+			fprintf(stderr, "\n\n"
 					  "Your configuration changes were NOT saved."
-					  "\n\n"));
+					  "\n\n");
 		if (res != KEY_ESC)
 			res = 0;
 	}
@@ -1009,10 +1008,6 @@
 	char *mode;
 	int res;
 
-	setlocale(LC_ALL, "");
-	bindtextdomain(PACKAGE, LOCALEDIR);
-	textdomain(PACKAGE);
-
 	signal(SIGINT, sig_handler);
 
 	if (ac > 1 && strcmp(av[1], "-s") == 0) {
@@ -1031,8 +1026,8 @@
 	}
 
 	if (init_dialog(NULL)) {
-		fprintf(stderr, N_("Your display is too small to run Menuconfig!\n"));
-		fprintf(stderr, N_("It must be at least 19 lines by 80 columns.\n"));
+		fprintf(stderr, "Your display is too small to run Menuconfig!\n");
+		fprintf(stderr, "It must be at least 19 lines by 80 columns.\n");
 		return 1;
 	}
 
diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index 5c5c137..379a119 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -214,9 +214,6 @@
 			zconf_error("trying to redefine defconfig symbol");
 		sym_defconfig_list->flags |= SYMBOL_AUTO;
 		break;
-	case T_OPT_ENV:
-		prop_add_env(arg);
-		break;
 	case T_OPT_ALLNOCONFIG_Y:
 		current_entry->sym->flags |= SYMBOL_ALLNOCONFIG_Y;
 		break;
@@ -711,7 +708,7 @@
 	struct menu *submenu[8], *menu, *location = NULL;
 	struct jump_key *jump = NULL;
 
-	str_printf(r, _("Prompt: %s\n"), _(prop->text));
+	str_printf(r, "Prompt: %s\n", prop->text);
 	menu = prop->menu->parent;
 	for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent) {
 		bool accessible = menu_is_visible(menu);
@@ -744,16 +741,16 @@
 	}
 
 	if (i > 0) {
-		str_printf(r, _("  Location:\n"));
+		str_printf(r, "  Location:\n");
 		for (j = 4; --i >= 0; j += 2) {
 			menu = submenu[i];
 			if (jump && menu == location)
 				jump->offset = strlen(r->s);
 			str_printf(r, "%*c-> %s", j, ' ',
-				   _(menu_get_prompt(menu)));
+				   menu_get_prompt(menu));
 			if (menu->sym) {
 				str_printf(r, " (%s [=%s])", menu->sym->name ?
-					menu->sym->name : _("<choice>"),
+					menu->sym->name : "<choice>",
 					sym_get_string_value(menu->sym));
 			}
 			str_append(r, "\n");
@@ -817,23 +814,23 @@
 
 	prop = get_symbol_prop(sym);
 	if (prop) {
-		str_printf(r, _("  Defined at %s:%d\n"), prop->menu->file->name,
+		str_printf(r, "  Defined at %s:%d\n", prop->menu->file->name,
 			prop->menu->lineno);
 		if (!expr_is_yes(prop->visible.expr)) {
-			str_append(r, _("  Depends on: "));
+			str_append(r, "  Depends on: ");
 			expr_gstr_print(prop->visible.expr, r);
 			str_append(r, "\n");
 		}
 	}
 
-	get_symbol_props_str(r, sym, P_SELECT, _("  Selects: "));
+	get_symbol_props_str(r, sym, P_SELECT, "  Selects: ");
 	if (sym->rev_dep.expr) {
 		expr_gstr_print_revdep(sym->rev_dep.expr, r, yes, "  Selected by [y]:\n");
 		expr_gstr_print_revdep(sym->rev_dep.expr, r, mod, "  Selected by [m]:\n");
 		expr_gstr_print_revdep(sym->rev_dep.expr, r, no, "  Selected by [n]:\n");
 	}
 
-	get_symbol_props_str(r, sym, P_IMPLY, _("  Implies: "));
+	get_symbol_props_str(r, sym, P_IMPLY, "  Implies: ");
 	if (sym->implied.expr) {
 		expr_gstr_print_revdep(sym->implied.expr, r, yes, "  Implied by [y]:\n");
 		expr_gstr_print_revdep(sym->implied.expr, r, mod, "  Implied by [m]:\n");
@@ -852,7 +849,7 @@
 	for (i = 0; sym_arr && (sym = sym_arr[i]); i++)
 		get_symbol_str(&res, sym, head);
 	if (!i)
-		str_append(&res, _("No matches found.\n"));
+		str_append(&res, "No matches found.\n");
 	return res;
 }
 
@@ -867,7 +864,7 @@
 			str_printf(help, "%s%s:\n\n", CONFIG_, sym->name);
 		help_text = menu_get_help(menu);
 	}
-	str_printf(help, "%s\n", _(help_text));
+	str_printf(help, "%s\n", help_text);
 	if (sym)
 		get_symbol_str(help, sym, NULL);
 }
diff --git a/scripts/kconfig/nconf-cfg.sh b/scripts/kconfig/nconf-cfg.sh
new file mode 100644
index 0000000..42f5ac7
--- /dev/null
+++ b/scripts/kconfig/nconf-cfg.sh
@@ -0,0 +1,44 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+PKG="ncursesw menuw panelw"
+PKG2="ncurses menu panel"
+
+if pkg-config --exists $PKG; then
+	echo cflags=\"$(pkg-config --cflags $PKG)\"
+	echo libs=\"$(pkg-config --libs $PKG)\"
+	exit 0
+fi
+
+if pkg-config --exists $PKG2; then
+	echo cflags=\"$(pkg-config --cflags $PKG2)\"
+	echo libs=\"$(pkg-config --libs $PKG2)\"
+	exit 0
+fi
+
+# Unfortunately, some distributions (e.g. openSUSE) cannot find ncurses
+# by pkg-config.
+if [ -f /usr/include/ncursesw/ncurses.h ]; then
+	echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncursesw\"
+	echo libs=\"-lncursesw -lmenuw -lpanelw\"
+	exit 0
+fi
+
+if [ -f /usr/include/ncurses/ncurses.h ]; then
+	echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncurses\"
+	echo libs=\"-lncurses -lmenu -lpanel\"
+	exit 0
+fi
+
+if [ -f /usr/include/ncurses.h ]; then
+	echo cflags=\"-D_GNU_SOURCE\"
+	echo libs=\"-lncurses -lmenu -lpanel\"
+	exit 0
+fi
+
+echo >&2 "*"
+echo >&2 "* Unable to find the ncurses package."
+echo >&2 "* Install ncurses (ncurses-devel or libncurses-dev"
+echo >&2 "* depending on your distribution)."
+echo >&2 "*"
+exit 1
diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c
index 0031147..97b7844 100644
--- a/scripts/kconfig/nconf.c
+++ b/scripts/kconfig/nconf.c
@@ -15,7 +15,7 @@
 #include "nconf.h"
 #include <ctype.h>
 
-static const char nconf_global_help[] = N_(
+static const char nconf_global_help[] =
 "Help windows\n"
 "------------\n"
 "o  Global help:  Unless in a data entry window, pressing <F1> will give \n"
@@ -130,8 +130,8 @@
 "\n"
 "Note that this mode can eventually be a little more CPU expensive than\n"
 "the default mode, especially with a larger number of unfolded submenus.\n"
-"\n"),
-menu_no_f_instructions[] = N_(
+"\n",
+menu_no_f_instructions[] =
 "Legend:  [*] built-in  [ ] excluded  <M> module  < > module capable.\n"
 "Submenus are designated by a trailing \"--->\", empty ones by \"----\".\n"
 "\n"
@@ -147,8 +147,8 @@
 "You do not have function keys support.\n"
 "Press <1> instead of <F1>, <2> instead of <F2>, etc.\n"
 "For verbose global help use key <1>.\n"
-"For help related to the current menu entry press <?> or <h>.\n"),
-menu_instructions[] = N_(
+"For help related to the current menu entry press <?> or <h>.\n",
+menu_instructions[] =
 "Legend:  [*] built-in  [ ] excluded  <M> module  < > module capable.\n"
 "Submenus are designated by a trailing \"--->\", empty ones by \"----\".\n"
 "\n"
@@ -163,30 +163,30 @@
 "\n"
 "Pressing <1> may be used instead of <F1>, <2> instead of <F2>, etc.\n"
 "For verbose global help press <F1>.\n"
-"For help related to the current menu entry press <?> or <h>.\n"),
-radiolist_instructions[] = N_(
+"For help related to the current menu entry press <?> or <h>.\n",
+radiolist_instructions[] =
 "Press <Up>, <Down>, <Home> or <End> to navigate a radiolist, select\n"
 "with <Space>.\n"
 "For help related to the current entry press <?> or <h>.\n"
-"For global help press <F1>.\n"),
-inputbox_instructions_int[] = N_(
+"For global help press <F1>.\n",
+inputbox_instructions_int[] =
 "Please enter a decimal value.\n"
 "Fractions will not be accepted.\n"
-"Press <Enter> to apply, <Esc> to cancel."),
-inputbox_instructions_hex[] = N_(
+"Press <Enter> to apply, <Esc> to cancel.",
+inputbox_instructions_hex[] =
 "Please enter a hexadecimal value.\n"
-"Press <Enter> to apply, <Esc> to cancel."),
-inputbox_instructions_string[] = N_(
+"Press <Enter> to apply, <Esc> to cancel.",
+inputbox_instructions_string[] =
 "Please enter a string value.\n"
-"Press <Enter> to apply, <Esc> to cancel."),
-setmod_text[] = N_(
+"Press <Enter> to apply, <Esc> to cancel.",
+setmod_text[] =
 "This feature depends on another feature which has been configured as a\n"
-"module.  As a result, the current feature will be built as a module too."),
-load_config_text[] = N_(
+"module.  As a result, the current feature will be built as a module too.",
+load_config_text[] =
 "Enter the name of the configuration file you wish to load.\n"
 "Accept the name shown to restore the configuration you last\n"
-"retrieved.  Leave empty to abort."),
-load_config_help[] = N_(
+"retrieved.  Leave empty to abort.",
+load_config_help[] =
 "For various reasons, one may wish to keep several different\n"
 "configurations available on a single machine.\n"
 "\n"
@@ -194,11 +194,11 @@
 "default one, entering its name here will allow you to load and modify\n"
 "that configuration.\n"
 "\n"
-"Leave empty to abort.\n"),
-save_config_text[] = N_(
+"Leave empty to abort.\n",
+save_config_text[] =
 "Enter a filename to which this configuration should be saved\n"
-"as an alternate.  Leave empty to abort."),
-save_config_help[] = N_(
+"as an alternate.  Leave empty to abort.",
+save_config_help[] =
 "For various reasons, one may wish to keep several different\n"
 "configurations available on a single machine.\n"
 "\n"
@@ -206,8 +206,8 @@
 "and use the current configuration as an alternate to whatever\n"
 "configuration options you have selected at that time.\n"
 "\n"
-"Leave empty to abort.\n"),
-search_help[] = N_(
+"Leave empty to abort.\n",
+search_help[] =
 "Search for symbols (configuration variable names CONFIG_*) and display\n"
 "their relations.  Regular expressions are supported.\n"
 "Example:  Search for \"^FOO\".\n"
@@ -244,7 +244,7 @@
 "USB  => find all symbols containing USB\n"
 "^USB => find all symbols starting with USB\n"
 "USB$ => find all symbols ending with USB\n"
-"\n");
+"\n";
 
 struct mitem {
 	char str[256];
@@ -388,7 +388,7 @@
 static void handle_f1(int *key, struct menu *current_item)
 {
 	show_scroll_win(main_window,
-			_("Global help"), _(nconf_global_help));
+			"Global help", nconf_global_help);
 	return;
 }
 
@@ -403,8 +403,8 @@
 static void handle_f3(int *key, struct menu *current_item)
 {
 	show_scroll_win(main_window,
-			_("Short help"),
-			_(current_instructions));
+			"Short help",
+			current_instructions);
 	return;
 }
 
@@ -412,7 +412,7 @@
 static void handle_f4(int *key, struct menu *current_item)
 {
 	int res = btn_dialog(main_window,
-			_("Show all symbols?"),
+			"Show all symbols?",
 			2,
 			"   <Show All>   ",
 			"<Don't show all>");
@@ -653,8 +653,8 @@
 		return 0;
 	}
 	res = btn_dialog(main_window,
-			_("Do you wish to save your new configuration?\n"
-				"<ESC> to cancel and resume nconfig."),
+			"Do you wish to save your new configuration?\n"
+				"<ESC> to cancel and resume nconfig.",
 			2,
 			"   <save>   ",
 			"<don't save>");
@@ -670,15 +670,15 @@
 		if (res)
 			btn_dialog(
 				main_window,
-				_("Error during writing of configuration.\n"
-				  "Your configuration changes were NOT saved."),
+				"Error during writing of configuration.\n"
+				  "Your configuration changes were NOT saved.",
 				  1,
 				  "<OK>");
 		break;
 	default:
 		btn_dialog(
 			main_window,
-			_("Your configuration changes were NOT saved."),
+			"Your configuration changes were NOT saved.",
 			1,
 			"<OK>");
 		break;
@@ -697,12 +697,12 @@
 	int dres;
 
 	title = str_new();
-	str_printf( &title, _("Enter (sub)string or regexp to search for "
-			      "(with or without \"%s\")"), CONFIG_);
+	str_printf( &title, "Enter (sub)string or regexp to search for "
+			      "(with or without \"%s\")", CONFIG_);
 
 again:
 	dres = dialog_inputbox(main_window,
-			_("Search Configuration Parameter"),
+			"Search Configuration Parameter",
 			str_get(&title),
 			"", &dialog_input_result, &dialog_input_result_len);
 	switch (dres) {
@@ -710,7 +710,7 @@
 		break;
 	case 1:
 		show_scroll_win(main_window,
-				_("Search Configuration"), search_help);
+				"Search Configuration", search_help);
 		goto again;
 	default:
 		str_free(&title);
@@ -726,7 +726,7 @@
 	res = get_relations_str(sym_arr, NULL);
 	free(sym_arr);
 	show_scroll_win(main_window,
-			_("Search Results"), str_get(&res));
+			"Search Results", str_get(&res));
 	str_free(&res);
 	str_free(&title);
 }
@@ -754,7 +754,7 @@
 			switch (ptype) {
 			case P_MENU:
 				child_count++;
-				prompt = _(prompt);
+				prompt = prompt;
 				if (single_menu_mode) {
 					item_make(menu, 'm',
 						"%s%*c%s",
@@ -775,7 +775,7 @@
 					item_make(menu, ':',
 						"   %*c*** %s ***",
 						indent + 1, ' ',
-						_(prompt));
+						prompt);
 				}
 				break;
 			default:
@@ -783,7 +783,7 @@
 					child_count++;
 					item_make(menu, ':', "---%*c%s",
 						indent + 1, ' ',
-						_(prompt));
+						prompt);
 				}
 			}
 		} else
@@ -829,11 +829,11 @@
 		}
 
 		item_add_str("%*c%s", indent + 1,
-				' ', _(menu_get_prompt(menu)));
+				' ', menu_get_prompt(menu));
 		if (val == yes) {
 			if (def_menu) {
 				item_add_str(" (%s)",
-					_(menu_get_prompt(def_menu)));
+					menu_get_prompt(def_menu));
 				item_add_str("  --->");
 				if (def_menu->list) {
 					indent += 2;
@@ -847,7 +847,7 @@
 		if (menu == current_menu) {
 			item_make(menu, ':',
 				"---%*c%s", indent + 1,
-				' ', _(menu_get_prompt(menu)));
+				' ', menu_get_prompt(menu));
 			goto conf_childs;
 		}
 		child_count++;
@@ -894,17 +894,17 @@
 				if (tmp < 0)
 					tmp = 0;
 				item_add_str("%*c%s%s", tmp, ' ',
-						_(menu_get_prompt(menu)),
+						menu_get_prompt(menu),
 						(sym_has_value(sym) ||
 						 !sym_is_changable(sym)) ? "" :
-						_(" (NEW)"));
+						" (NEW)");
 				goto conf_childs;
 			}
 		}
 		item_add_str("%*c%s%s", indent + 1, ' ',
-				_(menu_get_prompt(menu)),
+				menu_get_prompt(menu),
 				(sym_has_value(sym) || !sym_is_changable(sym)) ?
-				"" : _(" (NEW)"));
+				"" : " (NEW)");
 		if (menu->prompt && menu->prompt->type == P_MENU) {
 			item_add_str("  %s", menu_is_empty(menu) ? "----" : "--->");
 			return;
@@ -1086,8 +1086,8 @@
 		if (!child_count)
 			break;
 
-		show_menu(prompt ? _(prompt) : _("Main Menu"),
-				_(menu_instructions),
+		show_menu(prompt ? prompt : "Main Menu",
+				menu_instructions,
 				current_index, &last_top_row);
 		keypad((menu_win(curses_menu)), TRUE);
 		while (!global_exit) {
@@ -1227,13 +1227,13 @@
 
 	help = str_new();
 	menu_get_ext_help(menu, &help);
-	show_scroll_win(main_window, _(menu_get_prompt(menu)), str_get(&help));
+	show_scroll_win(main_window, menu_get_prompt(menu), str_get(&help));
 	str_free(&help);
 }
 
 static void conf_choice(struct menu *menu)
 {
-	const char *prompt = _(menu_get_prompt(menu));
+	const char *prompt = menu_get_prompt(menu);
 	struct menu *child = NULL;
 	struct symbol *active;
 	int selected_index = 0;
@@ -1256,13 +1256,13 @@
 
 			if (child->sym == sym_get_choice_value(menu->sym))
 				item_make(child, ':', "<X> %s",
-						_(menu_get_prompt(child)));
+						menu_get_prompt(child));
 			else if (child->sym)
 				item_make(child, ':', "    %s",
-						_(menu_get_prompt(child)));
+						menu_get_prompt(child));
 			else
 				item_make(child, ':', "*** %s ***",
-						_(menu_get_prompt(child)));
+						menu_get_prompt(child));
 
 			if (child->sym == active){
 				last_top_row = top_row(curses_menu);
@@ -1270,8 +1270,8 @@
 			}
 			i++;
 		}
-		show_menu(prompt ? _(prompt) : _("Choice Menu"),
-				_(radiolist_instructions),
+		show_menu(prompt ? prompt : "Choice Menu",
+				radiolist_instructions,
 				selected_index,
 				&last_top_row);
 		while (!global_exit) {
@@ -1358,19 +1358,19 @@
 
 		switch (sym_get_type(menu->sym)) {
 		case S_INT:
-			heading = _(inputbox_instructions_int);
+			heading = inputbox_instructions_int;
 			break;
 		case S_HEX:
-			heading = _(inputbox_instructions_hex);
+			heading = inputbox_instructions_hex;
 			break;
 		case S_STRING:
-			heading = _(inputbox_instructions_string);
+			heading = inputbox_instructions_string;
 			break;
 		default:
-			heading = _("Internal nconf error!");
+			heading = "Internal nconf error!";
 		}
 		res = dialog_inputbox(main_window,
-				prompt ? _(prompt) : _("Main Menu"),
+				prompt ? prompt : "Main Menu",
 				heading,
 				sym_get_string_value(menu->sym),
 				&dialog_input_result,
@@ -1381,7 +1381,7 @@
 						dialog_input_result))
 				return;
 			btn_dialog(main_window,
-				_("You have made an invalid entry."), 0);
+				"You have made an invalid entry.", 0);
 			break;
 		case 1:
 			show_help(menu);
@@ -1410,11 +1410,11 @@
 				sym_set_change_count(1);
 				return;
 			}
-			btn_dialog(main_window, _("File does not exist!"), 0);
+			btn_dialog(main_window, "File does not exist!", 0);
 			break;
 		case 1:
 			show_scroll_win(main_window,
-					_("Load Alternate Configuration"),
+					"Load Alternate Configuration",
 					load_config_help);
 			break;
 		case KEY_EXIT:
@@ -1441,13 +1441,13 @@
 				set_config_filename(dialog_input_result);
 				return;
 			}
-			btn_dialog(main_window, _("Can't create file! "
-				"Probably a nonexistent directory."),
+			btn_dialog(main_window, "Can't create file! "
+				"Probably a nonexistent directory.",
 				1, "<OK>");
 			break;
 		case 1:
 			show_scroll_win(main_window,
-				_("Save Alternate Configuration"),
+				"Save Alternate Configuration",
 				save_config_help);
 			break;
 		case KEY_EXIT:
@@ -1480,10 +1480,6 @@
 	int lines, columns;
 	char *mode;
 
-	setlocale(LC_ALL, "");
-	bindtextdomain(PACKAGE, LOCALEDIR);
-	textdomain(PACKAGE);
-
 	if (ac > 1 && strcmp(av[1], "-s") == 0) {
 		/* Silence conf_read() until the real callback is set up */
 		conf_set_message_callback(NULL);
@@ -1541,8 +1537,8 @@
 	/* check for KEY_FUNC(1) */
 	if (has_key(KEY_F(1)) == FALSE) {
 		show_scroll_win(main_window,
-				_("Instructions"),
-				_(menu_no_f_instructions));
+				"Instructions",
+				menu_no_f_instructions);
 	}
 
 	conf_set_message_callback(conf_message_callback);
diff --git a/scripts/kconfig/nconf.h b/scripts/kconfig/nconf.h
index 9f6f21d..2b9e19f 100644
--- a/scripts/kconfig/nconf.h
+++ b/scripts/kconfig/nconf.h
@@ -14,7 +14,6 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <locale.h>
 #include <ncurses.h>
 #include <menu.h>
 #include <panel.h>
diff --git a/scripts/kconfig/preprocess.c b/scripts/kconfig/preprocess.c
new file mode 100644
index 0000000..65da87f
--- /dev/null
+++ b/scripts/kconfig/preprocess.c
@@ -0,0 +1,572 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (C) 2018 Masahiro Yamada <yamada.masahiro@socionext.com>
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "list.h"
+
+#define ARRAY_SIZE(arr)		(sizeof(arr) / sizeof((arr)[0]))
+
+static char *expand_string_with_args(const char *in, int argc, char *argv[]);
+
+static void __attribute__((noreturn)) pperror(const char *format, ...)
+{
+	va_list ap;
+
+	fprintf(stderr, "%s:%d: ", current_file->name, yylineno);
+	va_start(ap, format);
+	vfprintf(stderr, format, ap);
+	va_end(ap);
+	fprintf(stderr, "\n");
+
+	exit(1);
+}
+
+/*
+ * Environment variables
+ */
+static LIST_HEAD(env_list);
+
+struct env {
+	char *name;
+	char *value;
+	struct list_head node;
+};
+
+static void env_add(const char *name, const char *value)
+{
+	struct env *e;
+
+	e = xmalloc(sizeof(*e));
+	e->name = xstrdup(name);
+	e->value = xstrdup(value);
+
+	list_add_tail(&e->node, &env_list);
+}
+
+static void env_del(struct env *e)
+{
+	list_del(&e->node);
+	free(e->name);
+	free(e->value);
+	free(e);
+}
+
+/* The returned pointer must be freed when done */
+static char *env_expand(const char *name)
+{
+	struct env *e;
+	const char *value;
+
+	if (!*name)
+		return NULL;
+
+	list_for_each_entry(e, &env_list, node) {
+		if (!strcmp(name, e->name))
+			return xstrdup(e->value);
+	}
+
+	value = getenv(name);
+	if (!value)
+		return NULL;
+
+	/*
+	 * We need to remember all referenced environment variables.
+	 * They will be written out to include/config/auto.conf.cmd
+	 */
+	env_add(name, value);
+
+	return xstrdup(value);
+}
+
+void env_write_dep(FILE *f, const char *autoconfig_name)
+{
+	struct env *e, *tmp;
+
+	list_for_each_entry_safe(e, tmp, &env_list, node) {
+		fprintf(f, "ifneq \"$(%s)\" \"%s\"\n", e->name, e->value);
+		fprintf(f, "%s: FORCE\n", autoconfig_name);
+		fprintf(f, "endif\n");
+		env_del(e);
+	}
+}
+
+/*
+ * Built-in functions
+ */
+struct function {
+	const char *name;
+	unsigned int min_args;
+	unsigned int max_args;
+	char *(*func)(int argc, char *argv[]);
+};
+
+static char *do_error_if(int argc, char *argv[])
+{
+	if (!strcmp(argv[0], "y"))
+		pperror("%s", argv[1]);
+
+	return NULL;
+}
+
+static char *do_filename(int argc, char *argv[])
+{
+	return xstrdup(current_file->name);
+}
+
+static char *do_info(int argc, char *argv[])
+{
+	printf("%s\n", argv[0]);
+
+	return xstrdup("");
+}
+
+static char *do_lineno(int argc, char *argv[])
+{
+	char buf[16];
+
+	sprintf(buf, "%d", yylineno);
+
+	return xstrdup(buf);
+}
+
+static char *do_shell(int argc, char *argv[])
+{
+	FILE *p;
+	char buf[256];
+	char *cmd;
+	size_t nread;
+	int i;
+
+	cmd = argv[0];
+
+	p = popen(cmd, "r");
+	if (!p) {
+		perror(cmd);
+		exit(1);
+	}
+
+	nread = fread(buf, 1, sizeof(buf), p);
+	if (nread == sizeof(buf))
+		nread--;
+
+	/* remove trailing new lines */
+	while (buf[nread - 1] == '\n')
+		nread--;
+
+	buf[nread] = 0;
+
+	/* replace a new line with a space */
+	for (i = 0; i < nread; i++) {
+		if (buf[i] == '\n')
+			buf[i] = ' ';
+	}
+
+	if (pclose(p) == -1) {
+		perror(cmd);
+		exit(1);
+	}
+
+	return xstrdup(buf);
+}
+
+static char *do_warning_if(int argc, char *argv[])
+{
+	if (!strcmp(argv[0], "y"))
+		fprintf(stderr, "%s:%d: %s\n",
+			current_file->name, yylineno, argv[1]);
+
+	return xstrdup("");
+}
+
+static const struct function function_table[] = {
+	/* Name		MIN	MAX	Function */
+	{ "error-if",	2,	2,	do_error_if },
+	{ "filename",	0,	0,	do_filename },
+	{ "info",	1,	1,	do_info },
+	{ "lineno",	0,	0,	do_lineno },
+	{ "shell",	1,	1,	do_shell },
+	{ "warning-if",	2,	2,	do_warning_if },
+};
+
+#define FUNCTION_MAX_ARGS		16
+
+static char *function_expand(const char *name, int argc, char *argv[])
+{
+	const struct function *f;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(function_table); i++) {
+		f = &function_table[i];
+		if (strcmp(f->name, name))
+			continue;
+
+		if (argc < f->min_args)
+			pperror("too few function arguments passed to '%s'",
+				name);
+
+		if (argc > f->max_args)
+			pperror("too many function arguments passed to '%s'",
+				name);
+
+		return f->func(argc, argv);
+	}
+
+	return NULL;
+}
+
+/*
+ * Variables (and user-defined functions)
+ */
+static LIST_HEAD(variable_list);
+
+struct variable {
+	char *name;
+	char *value;
+	enum variable_flavor flavor;
+	int exp_count;
+	struct list_head node;
+};
+
+static struct variable *variable_lookup(const char *name)
+{
+	struct variable *v;
+
+	list_for_each_entry(v, &variable_list, node) {
+		if (!strcmp(name, v->name))
+			return v;
+	}
+
+	return NULL;
+}
+
+static char *variable_expand(const char *name, int argc, char *argv[])
+{
+	struct variable *v;
+	char *res;
+
+	v = variable_lookup(name);
+	if (!v)
+		return NULL;
+
+	if (argc == 0 && v->exp_count)
+		pperror("Recursive variable '%s' references itself (eventually)",
+			name);
+
+	if (v->exp_count > 1000)
+		pperror("Too deep recursive expansion");
+
+	v->exp_count++;
+
+	if (v->flavor == VAR_RECURSIVE)
+		res = expand_string_with_args(v->value, argc, argv);
+	else
+		res = xstrdup(v->value);
+
+	v->exp_count--;
+
+	return res;
+}
+
+void variable_add(const char *name, const char *value,
+		  enum variable_flavor flavor)
+{
+	struct variable *v;
+	char *new_value;
+	bool append = false;
+
+	v = variable_lookup(name);
+	if (v) {
+		/* For defined variables, += inherits the existing flavor */
+		if (flavor == VAR_APPEND) {
+			flavor = v->flavor;
+			append = true;
+		} else {
+			free(v->value);
+		}
+	} else {
+		/* For undefined variables, += assumes the recursive flavor */
+		if (flavor == VAR_APPEND)
+			flavor = VAR_RECURSIVE;
+
+		v = xmalloc(sizeof(*v));
+		v->name = xstrdup(name);
+		v->exp_count = 0;
+		list_add_tail(&v->node, &variable_list);
+	}
+
+	v->flavor = flavor;
+
+	if (flavor == VAR_SIMPLE)
+		new_value = expand_string(value);
+	else
+		new_value = xstrdup(value);
+
+	if (append) {
+		v->value = xrealloc(v->value,
+				    strlen(v->value) + strlen(new_value) + 2);
+		strcat(v->value, " ");
+		strcat(v->value, new_value);
+		free(new_value);
+	} else {
+		v->value = new_value;
+	}
+}
+
+static void variable_del(struct variable *v)
+{
+	list_del(&v->node);
+	free(v->name);
+	free(v->value);
+	free(v);
+}
+
+void variable_all_del(void)
+{
+	struct variable *v, *tmp;
+
+	list_for_each_entry_safe(v, tmp, &variable_list, node)
+		variable_del(v);
+}
+
+/*
+ * Evaluate a clause with arguments.  argc/argv are arguments from the upper
+ * function call.
+ *
+ * Returned string must be freed when done
+ */
+static char *eval_clause(const char *str, size_t len, int argc, char *argv[])
+{
+	char *tmp, *name, *res, *endptr, *prev, *p;
+	int new_argc = 0;
+	char *new_argv[FUNCTION_MAX_ARGS];
+	int nest = 0;
+	int i;
+	unsigned long n;
+
+	tmp = xstrndup(str, len);
+
+	/*
+	 * If variable name is '1', '2', etc.  It is generally an argument
+	 * from a user-function call (i.e. local-scope variable).  If not
+	 * available, then look-up global-scope variables.
+	 */
+	n = strtoul(tmp, &endptr, 10);
+	if (!*endptr && n > 0 && n <= argc) {
+		res = xstrdup(argv[n - 1]);
+		goto free_tmp;
+	}
+
+	prev = p = tmp;
+
+	/*
+	 * Split into tokens
+	 * The function name and arguments are separated by a comma.
+	 * For example, if the function call is like this:
+	 *   $(foo,$(x),$(y))
+	 *
+	 * The input string for this helper should be:
+	 *   foo,$(x),$(y)
+	 *
+	 * and split into:
+	 *   new_argv[0] = 'foo'
+	 *   new_argv[1] = '$(x)'
+	 *   new_argv[2] = '$(y)'
+	 */
+	while (*p) {
+		if (nest == 0 && *p == ',') {
+			*p = 0;
+			if (new_argc >= FUNCTION_MAX_ARGS)
+				pperror("too many function arguments");
+			new_argv[new_argc++] = prev;
+			prev = p + 1;
+		} else if (*p == '(') {
+			nest++;
+		} else if (*p == ')') {
+			nest--;
+		}
+
+		p++;
+	}
+	new_argv[new_argc++] = prev;
+
+	/*
+	 * Shift arguments
+	 * new_argv[0] represents a function name or a variable name.  Put it
+	 * into 'name', then shift the rest of the arguments.  This simplifies
+	 * 'const' handling.
+	 */
+	name = expand_string_with_args(new_argv[0], argc, argv);
+	new_argc--;
+	for (i = 0; i < new_argc; i++)
+		new_argv[i] = expand_string_with_args(new_argv[i + 1],
+						      argc, argv);
+
+	/* Search for variables */
+	res = variable_expand(name, new_argc, new_argv);
+	if (res)
+		goto free;
+
+	/* Look for built-in functions */
+	res = function_expand(name, new_argc, new_argv);
+	if (res)
+		goto free;
+
+	/* Last, try environment variable */
+	if (new_argc == 0) {
+		res = env_expand(name);
+		if (res)
+			goto free;
+	}
+
+	res = xstrdup("");
+free:
+	for (i = 0; i < new_argc; i++)
+		free(new_argv[i]);
+	free(name);
+free_tmp:
+	free(tmp);
+
+	return res;
+}
+
+/*
+ * Expand a string that follows '$'
+ *
+ * For example, if the input string is
+ *     ($(FOO)$($(BAR)))$(BAZ)
+ * this helper evaluates
+ *     $($(FOO)$($(BAR)))
+ * and returns a new string containing the expansion (note that the string is
+ * recursively expanded), also advancing 'str' to point to the next character
+ * after the corresponding closing parenthesis, in this case, *str will be
+ *     $(BAR)
+ */
+static char *expand_dollar_with_args(const char **str, int argc, char *argv[])
+{
+	const char *p = *str;
+	const char *q;
+	int nest = 0;
+
+	/*
+	 * In Kconfig, variable/function references always start with "$(".
+	 * Neither single-letter variables as in $A nor curly braces as in ${CC}
+	 * are supported.  '$' not followed by '(' loses its special meaning.
+	 */
+	if (*p != '(') {
+		*str = p;
+		return xstrdup("$");
+	}
+
+	p++;
+	q = p;
+	while (*q) {
+		if (*q == '(') {
+			nest++;
+		} else if (*q == ')') {
+			if (nest-- == 0)
+				break;
+		}
+		q++;
+	}
+
+	if (!*q)
+		pperror("unterminated reference to '%s': missing ')'", p);
+
+	/* Advance 'str' to after the expanded initial portion of the string */
+	*str = q + 1;
+
+	return eval_clause(p, q - p, argc, argv);
+}
+
+char *expand_dollar(const char **str)
+{
+	return expand_dollar_with_args(str, 0, NULL);
+}
+
+static char *__expand_string(const char **str, bool (*is_end)(char c),
+			     int argc, char *argv[])
+{
+	const char *in, *p;
+	char *expansion, *out;
+	size_t in_len, out_len;
+
+	out = xmalloc(1);
+	*out = 0;
+	out_len = 1;
+
+	p = in = *str;
+
+	while (1) {
+		if (*p == '$') {
+			in_len = p - in;
+			p++;
+			expansion = expand_dollar_with_args(&p, argc, argv);
+			out_len += in_len + strlen(expansion);
+			out = xrealloc(out, out_len);
+			strncat(out, in, in_len);
+			strcat(out, expansion);
+			free(expansion);
+			in = p;
+			continue;
+		}
+
+		if (is_end(*p))
+			break;
+
+		p++;
+	}
+
+	in_len = p - in;
+	out_len += in_len;
+	out = xrealloc(out, out_len);
+	strncat(out, in, in_len);
+
+	/* Advance 'str' to the end character */
+	*str = p;
+
+	return out;
+}
+
+static bool is_end_of_str(char c)
+{
+	return !c;
+}
+
+/*
+ * Expand variables and functions in the given string.  Undefined variables
+ * expand to an empty string.
+ * The returned string must be freed when done.
+ */
+static char *expand_string_with_args(const char *in, int argc, char *argv[])
+{
+	return __expand_string(&in, is_end_of_str, argc, argv);
+}
+
+char *expand_string(const char *in)
+{
+	return expand_string_with_args(in, 0, NULL);
+}
+
+static bool is_end_of_token(char c)
+{
+	/* Why are '.' and '/' valid characters for symbols? */
+	return !(isalnum(c) || c == '_' || c == '-' || c == '.' || c == '/');
+}
+
+/*
+ * Expand variables in a token.  The parsing stops when a token separater
+ * (in most cases, it is a whitespace) is encountered.  'str' is updated to
+ * point to the next character.
+ *
+ * The returned string must be freed when done.
+ */
+char *expand_one_token(const char **str)
+{
+	return __expand_string(str, is_end_of_token, 0, NULL);
+}
diff --git a/scripts/kconfig/qconf-cfg.sh b/scripts/kconfig/qconf-cfg.sh
new file mode 100755
index 0000000..0862e15
--- /dev/null
+++ b/scripts/kconfig/qconf-cfg.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+PKG="Qt5Core Qt5Gui Qt5Widgets"
+PKG2="QtCore QtGui"
+
+if pkg-config --exists $PKG; then
+	echo cflags=\"-std=c++11 -fPIC $(pkg-config --cflags Qt5Core Qt5Gui Qt5Widgets)\"
+	echo libs=\"$(pkg-config --libs $PKG)\"
+	echo moc=\"$(pkg-config --variable=host_bins Qt5Core)/moc\"
+	exit 0
+fi
+
+if pkg-config --exists $PKG2; then
+	echo cflags=\"$(pkg-config --cflags $PKG2)\"
+	echo libs=\"$(pkg-config --libs $PKG2)\"
+	echo moc=\"$(pkg-config --variable=moc_location QtCore)\"
+	exit 0
+fi
+
+echo >&2 "*"
+echo >&2 "* Could not find Qt via pkg-config."
+echo >&2 "* Please install either Qt 4.8 or 5.x. and make sure it's in PKG_CONFIG_PATH"
+echo >&2 "*"
+exit 1
diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc
index ae6c725..ad9c22d 100644
--- a/scripts/kconfig/qconf.cc
+++ b/scripts/kconfig/qconf.cc
@@ -34,10 +34,6 @@
 #include "qconf.moc"
 #include "images.c"
 
-#ifdef _
-# undef _
-# define _ qgettext
-#endif
 
 static QApplication *configApp;
 static ConfigSettings *configSettings;
@@ -46,12 +42,7 @@
 
 static inline QString qgettext(const char* str)
 {
-	return QString::fromLocal8Bit(gettext(str));
-}
-
-static inline QString qgettext(const QString& str)
-{
-	return QString::fromLocal8Bit(gettext(str.toLatin1()));
+	return QString::fromLocal8Bit(str);
 }
 
 ConfigSettings::ConfigSettings()
@@ -127,7 +118,7 @@
 
 	sym = menu->sym;
 	prop = menu->prompt;
-	prompt = _(menu_get_prompt(menu));
+	prompt = qgettext(menu_get_prompt(menu));
 
 	if (prop) switch (prop->type) {
 	case P_MENU:
@@ -216,7 +207,7 @@
 		break;
 	}
 	if (!sym_has_value(sym) && visible)
-		prompt += _(" (NEW)");
+		prompt += " (NEW)";
 set_prompt:
 	setText(promptColIdx, prompt);
 }
@@ -327,7 +318,7 @@
 	setVerticalScrollMode(ScrollPerPixel);
 	setHorizontalScrollMode(ScrollPerPixel);
 
-	setHeaderLabels(QStringList() << _("Option") << _("Name") << "N" << "M" << "Y" << _("Value"));
+	setHeaderLabels(QStringList() << "Option" << "Name" << "N" << "M" << "Y" << "Value");
 
 	connect(this, SIGNAL(itemSelectionChanged(void)),
 		SLOT(updateSelection(void)));
@@ -883,7 +874,7 @@
 			QAction *action;
 
 			headerPopup = new QMenu(this);
-			action = new QAction(_("Show Name"), this);
+			action = new QAction("Show Name", this);
 			  action->setCheckable(true);
 			  connect(action, SIGNAL(toggled(bool)),
 				  parent(), SLOT(setShowName(bool)));
@@ -891,7 +882,7 @@
 				  action, SLOT(setOn(bool)));
 			  action->setChecked(showName);
 			  headerPopup->addAction(action);
-			action = new QAction(_("Show Range"), this);
+			action = new QAction("Show Range", this);
 			  action->setCheckable(true);
 			  connect(action, SIGNAL(toggled(bool)),
 				  parent(), SLOT(setShowRange(bool)));
@@ -899,7 +890,7 @@
 				  action, SLOT(setOn(bool)));
 			  action->setChecked(showRange);
 			  headerPopup->addAction(action);
-			action = new QAction(_("Show Data"), this);
+			action = new QAction("Show Data", this);
 			  action->setCheckable(true);
 			  connect(action, SIGNAL(toggled(bool)),
 				  parent(), SLOT(setShowData(bool)));
@@ -1086,7 +1077,7 @@
 	if (sym) {
 		if (_menu->prompt) {
 			head += "<big><b>";
-			head += print_filter(_(_menu->prompt->text));
+			head += print_filter(_menu->prompt->text);
 			head += "</b></big>";
 			if (sym->name) {
 				head += " (";
@@ -1117,7 +1108,7 @@
 		str_free(&help_gstr);
 	} else if (_menu->prompt) {
 		head += "<big><b>";
-		head += print_filter(_(_menu->prompt->text));
+		head += print_filter(_menu->prompt->text);
 		head += "</b></big><br><br>";
 		if (showDebug()) {
 			if (_menu->prompt->visible.expr) {
@@ -1152,7 +1143,7 @@
 		case P_PROMPT:
 		case P_MENU:
 			debug += QString().sprintf("prompt: <a href=\"m%p\">", prop->menu);
-			debug += print_filter(_(prop->text));
+			debug += print_filter(prop->text);
 			debug += "</a><br>";
 			break;
 		case P_DEFAULT:
@@ -1234,7 +1225,7 @@
 QMenu* ConfigInfoView::createStandardContextMenu(const QPoint & pos)
 {
 	QMenu* popup = Parent::createStandardContextMenu(pos);
-	QAction* action = new QAction(_("Show Debug Info"), popup);
+	QAction* action = new QAction("Show Debug Info", popup);
 	  action->setCheckable(true);
 	  connect(action, SIGNAL(toggled(bool)), SLOT(setShowDebug(bool)));
 	  connect(this, SIGNAL(showDebugChanged(bool)), action, SLOT(setOn(bool)));
@@ -1261,11 +1252,11 @@
 	QHBoxLayout* layout2 = new QHBoxLayout(0);
 	layout2->setContentsMargins(0, 0, 0, 0);
 	layout2->setSpacing(6);
-	layout2->addWidget(new QLabel(_("Find:"), this));
+	layout2->addWidget(new QLabel("Find:", this));
 	editField = new QLineEdit(this);
 	connect(editField, SIGNAL(returnPressed()), SLOT(search()));
 	layout2->addWidget(editField);
-	searchButton = new QPushButton(_("Search"), this);
+	searchButton = new QPushButton("Search", this);
 	searchButton->setAutoDefault(false);
 	connect(searchButton, SIGNAL(clicked()), SLOT(search()));
 	layout2->addWidget(searchButton);
@@ -1387,44 +1378,44 @@
 	toolBar = new QToolBar("Tools", this);
 	addToolBar(toolBar);
 
-	backAction = new QAction(QPixmap(xpm_back), _("Back"), this);
+	backAction = new QAction(QPixmap(xpm_back), "Back", this);
 	  connect(backAction, SIGNAL(triggered(bool)), SLOT(goBack()));
 	  backAction->setEnabled(false);
-	QAction *quitAction = new QAction(_("&Quit"), this);
+	QAction *quitAction = new QAction("&Quit", this);
 	quitAction->setShortcut(Qt::CTRL + Qt::Key_Q);
 	  connect(quitAction, SIGNAL(triggered(bool)), SLOT(close()));
-	QAction *loadAction = new QAction(QPixmap(xpm_load), _("&Load"), this);
+	QAction *loadAction = new QAction(QPixmap(xpm_load), "&Load", this);
 	loadAction->setShortcut(Qt::CTRL + Qt::Key_L);
 	  connect(loadAction, SIGNAL(triggered(bool)), SLOT(loadConfig()));
-	saveAction = new QAction(QPixmap(xpm_save), _("&Save"), this);
+	saveAction = new QAction(QPixmap(xpm_save), "&Save", this);
 	saveAction->setShortcut(Qt::CTRL + Qt::Key_S);
 	  connect(saveAction, SIGNAL(triggered(bool)), SLOT(saveConfig()));
 	conf_set_changed_callback(conf_changed);
 	// Set saveAction's initial state
 	conf_changed();
-	QAction *saveAsAction = new QAction(_("Save &As..."), this);
+	QAction *saveAsAction = new QAction("Save &As...", this);
 	  connect(saveAsAction, SIGNAL(triggered(bool)), SLOT(saveConfigAs()));
-	QAction *searchAction = new QAction(_("&Find"), this);
+	QAction *searchAction = new QAction("&Find", this);
 	searchAction->setShortcut(Qt::CTRL + Qt::Key_F);
 	  connect(searchAction, SIGNAL(triggered(bool)), SLOT(searchConfig()));
-	singleViewAction = new QAction(QPixmap(xpm_single_view), _("Single View"), this);
+	singleViewAction = new QAction(QPixmap(xpm_single_view), "Single View", this);
 	singleViewAction->setCheckable(true);
 	  connect(singleViewAction, SIGNAL(triggered(bool)), SLOT(showSingleView()));
-	splitViewAction = new QAction(QPixmap(xpm_split_view), _("Split View"), this);
+	splitViewAction = new QAction(QPixmap(xpm_split_view), "Split View", this);
 	splitViewAction->setCheckable(true);
 	  connect(splitViewAction, SIGNAL(triggered(bool)), SLOT(showSplitView()));
-	fullViewAction = new QAction(QPixmap(xpm_tree_view), _("Full View"), this);
+	fullViewAction = new QAction(QPixmap(xpm_tree_view), "Full View", this);
 	fullViewAction->setCheckable(true);
 	  connect(fullViewAction, SIGNAL(triggered(bool)), SLOT(showFullView()));
 
-	QAction *showNameAction = new QAction(_("Show Name"), this);
+	QAction *showNameAction = new QAction("Show Name", this);
 	  showNameAction->setCheckable(true);
 	  connect(showNameAction, SIGNAL(toggled(bool)), configView, SLOT(setShowName(bool)));
 	  showNameAction->setChecked(configView->showName());
-	QAction *showRangeAction = new QAction(_("Show Range"), this);
+	QAction *showRangeAction = new QAction("Show Range", this);
 	  showRangeAction->setCheckable(true);
 	  connect(showRangeAction, SIGNAL(toggled(bool)), configView, SLOT(setShowRange(bool)));
-	QAction *showDataAction = new QAction(_("Show Data"), this);
+	QAction *showDataAction = new QAction("Show Data", this);
 	  showDataAction->setCheckable(true);
 	  connect(showDataAction, SIGNAL(toggled(bool)), configView, SLOT(setShowData(bool)));
 
@@ -1435,21 +1426,21 @@
 	connect(optGroup, SIGNAL(triggered(QAction *)), menuView,
 		SLOT(setOptionMode(QAction *)));
 
-	configView->showNormalAction = new QAction(_("Show Normal Options"), optGroup);
-	configView->showAllAction = new QAction(_("Show All Options"), optGroup);
-	configView->showPromptAction = new QAction(_("Show Prompt Options"), optGroup);
+	configView->showNormalAction = new QAction("Show Normal Options", optGroup);
+	configView->showAllAction = new QAction("Show All Options", optGroup);
+	configView->showPromptAction = new QAction("Show Prompt Options", optGroup);
 	configView->showNormalAction->setCheckable(true);
 	configView->showAllAction->setCheckable(true);
 	configView->showPromptAction->setCheckable(true);
 
-	QAction *showDebugAction = new QAction( _("Show Debug Info"), this);
+	QAction *showDebugAction = new QAction("Show Debug Info", this);
 	  showDebugAction->setCheckable(true);
 	  connect(showDebugAction, SIGNAL(toggled(bool)), helpText, SLOT(setShowDebug(bool)));
 	  showDebugAction->setChecked(helpText->showDebug());
 
-	QAction *showIntroAction = new QAction( _("Introduction"), this);
+	QAction *showIntroAction = new QAction("Introduction", this);
 	  connect(showIntroAction, SIGNAL(triggered(bool)), SLOT(showIntro()));
-	QAction *showAboutAction = new QAction( _("About"), this);
+	QAction *showAboutAction = new QAction("About", this);
 	  connect(showAboutAction, SIGNAL(triggered(bool)), SLOT(showAbout()));
 
 	// init tool bar
@@ -1463,7 +1454,7 @@
 	toolBar->addAction(fullViewAction);
 
 	// create config menu
-	QMenu* config = menu->addMenu(_("&File"));
+	QMenu* config = menu->addMenu("&File");
 	config->addAction(loadAction);
 	config->addAction(saveAction);
 	config->addAction(saveAsAction);
@@ -1471,11 +1462,11 @@
 	config->addAction(quitAction);
 
 	// create edit menu
-	QMenu* editMenu = menu->addMenu(_("&Edit"));
+	QMenu* editMenu = menu->addMenu("&Edit");
 	editMenu->addAction(searchAction);
 
 	// create options menu
-	QMenu* optionMenu = menu->addMenu(_("&Option"));
+	QMenu* optionMenu = menu->addMenu("&Option");
 	optionMenu->addAction(showNameAction);
 	optionMenu->addAction(showRangeAction);
 	optionMenu->addAction(showDataAction);
@@ -1486,7 +1477,7 @@
 
 	// create help menu
 	menu->addSeparator();
-	QMenu* helpMenu = menu->addMenu(_("&Help"));
+	QMenu* helpMenu = menu->addMenu("&Help");
 	helpMenu->addAction(showIntroAction);
 	helpMenu->addAction(showAboutAction);
 
@@ -1534,14 +1525,14 @@
 	if (s.isNull())
 		return;
 	if (conf_read(QFile::encodeName(s)))
-		QMessageBox::information(this, "qconf", _("Unable to load configuration!"));
+		QMessageBox::information(this, "qconf", "Unable to load configuration!");
 	ConfigView::updateListAll();
 }
 
 bool ConfigMainWindow::saveConfig(void)
 {
 	if (conf_write(NULL)) {
-		QMessageBox::information(this, "qconf", _("Unable to save configuration!"));
+		QMessageBox::information(this, "qconf", "Unable to save configuration!");
 		return false;
 	}
 	return true;
@@ -1723,11 +1714,11 @@
 		e->accept();
 		return;
 	}
-	QMessageBox mb("qconf", _("Save configuration?"), QMessageBox::Warning,
+	QMessageBox mb("qconf", "Save configuration?", QMessageBox::Warning,
 			QMessageBox::Yes | QMessageBox::Default, QMessageBox::No, QMessageBox::Cancel | QMessageBox::Escape);
-	mb.setButtonText(QMessageBox::Yes, _("&Save Changes"));
-	mb.setButtonText(QMessageBox::No, _("&Discard Changes"));
-	mb.setButtonText(QMessageBox::Cancel, _("Cancel Exit"));
+	mb.setButtonText(QMessageBox::Yes, "&Save Changes");
+	mb.setButtonText(QMessageBox::No, "&Discard Changes");
+	mb.setButtonText(QMessageBox::Cancel, "Cancel Exit");
 	switch (mb.exec()) {
 	case QMessageBox::Yes:
 		if (saveConfig())
@@ -1746,7 +1737,7 @@
 
 void ConfigMainWindow::showIntro(void)
 {
-	static const QString str = _("Welcome to the qconf graphical configuration tool.\n\n"
+	static const QString str = "Welcome to the qconf graphical configuration tool.\n\n"
 		"For each option, a blank box indicates the feature is disabled, a check\n"
 		"indicates it is enabled, and a dot indicates that it is to be compiled\n"
 		"as a module.  Clicking on the box will cycle through the three states.\n\n"
@@ -1756,16 +1747,16 @@
 		"options must be enabled to support the option you are interested in, you can\n"
 		"still view the help of a grayed-out option.\n\n"
 		"Toggling Show Debug Info under the Options menu will show the dependencies,\n"
-		"which you can then match by examining other options.\n\n");
+		"which you can then match by examining other options.\n\n";
 
 	QMessageBox::information(this, "qconf", str);
 }
 
 void ConfigMainWindow::showAbout(void)
 {
-	static const QString str = _("qconf is Copyright (C) 2002 Roman Zippel <zippel@linux-m68k.org>.\n"
+	static const QString str = "qconf is Copyright (C) 2002 Roman Zippel <zippel@linux-m68k.org>.\n"
 		"Copyright (C) 2015 Boris Barbulovski <bbarbulovski@gmail.com>.\n\n"
-		"Bug reports and feature request can also be entered at http://bugzilla.kernel.org/\n");
+		"Bug reports and feature request can also be entered at http://bugzilla.kernel.org/\n";
 
 	QMessageBox::information(this, "qconf", str);
 }
@@ -1826,7 +1817,7 @@
 
 static void usage(void)
 {
-	printf(_("%s [-s] <config>\n").toLatin1().constData(), progname);
+	printf("%s [-s] <config>\n", progname);
 	exit(0);
 }
 
@@ -1835,9 +1826,6 @@
 	ConfigMainWindow* v;
 	const char *name;
 
-	bindtextdomain(PACKAGE, LOCALEDIR);
-	textdomain(PACKAGE);
-
 	progname = av[0];
 	configApp = new QApplication(ac, av);
 	if (ac > 1 && av[1][0] == '-') {
diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
index f0b2e3b..7c9a88e 100644
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -33,33 +33,6 @@
 struct symbol *modules_sym;
 tristate modules_val;
 
-struct expr *sym_env_list;
-
-static void sym_add_default(struct symbol *sym, const char *def)
-{
-	struct property *prop = prop_alloc(P_DEFAULT, sym);
-
-	prop->expr = expr_alloc_symbol(sym_lookup(def, SYMBOL_CONST));
-}
-
-void sym_init(void)
-{
-	struct symbol *sym;
-	struct utsname uts;
-	static bool inited = false;
-
-	if (inited)
-		return;
-	inited = true;
-
-	uname(&uts);
-
-	sym = sym_lookup("UNAME_RELEASE", 0);
-	sym->type = S_STRING;
-	sym->flags |= SYMBOL_AUTO;
-	sym_add_default(sym, uts.release);
-}
-
 enum symbol_type sym_get_type(struct symbol *sym)
 {
 	enum symbol_type type = sym->type;
@@ -906,59 +879,6 @@
 	return symbol;
 }
 
-/*
- * Expand symbol's names embedded in the string given in argument. Symbols'
- * name to be expanded shall be prefixed by a '$'. Unknown symbol expands to
- * the empty string.
- */
-char *sym_expand_string_value(const char *in)
-{
-	const char *src;
-	char *res;
-	size_t reslen;
-
-	/*
-	 * Note: 'in' might come from a token that's about to be
-	 * freed, so make sure to always allocate a new string
-	 */
-	reslen = strlen(in) + 1;
-	res = xmalloc(reslen);
-	res[0] = '\0';
-
-	while ((src = strchr(in, '$'))) {
-		char *p, name[SYMBOL_MAXLENGTH];
-		const char *symval = "";
-		struct symbol *sym;
-		size_t newlen;
-
-		strncat(res, in, src - in);
-		src++;
-
-		p = name;
-		while (isalnum(*src) || *src == '_')
-			*p++ = *src++;
-		*p = '\0';
-
-		sym = sym_find(name);
-		if (sym != NULL) {
-			sym_calc_value(sym);
-			symval = sym_get_string_value(sym);
-		}
-
-		newlen = strlen(res) + strlen(symval) + strlen(src) + 1;
-		if (newlen > reslen) {
-			reslen = newlen;
-			res = xrealloc(res, reslen);
-		}
-
-		strcat(res, symval);
-		in = src;
-	}
-	strcat(res, in);
-
-	return res;
-}
-
 const char *sym_escape_string_value(const char *in)
 {
 	const char *p;
@@ -1401,32 +1321,3 @@
 	}
 	return "unknown";
 }
-
-static void prop_add_env(const char *env)
-{
-	struct symbol *sym, *sym2;
-	struct property *prop;
-	char *p;
-
-	sym = current_entry->sym;
-	sym->flags |= SYMBOL_AUTO;
-	for_all_properties(sym, prop, P_ENV) {
-		sym2 = prop_get_symbol(prop);
-		if (strcmp(sym2->name, env))
-			menu_warn(current_entry, "redefining environment symbol from %s",
-				  sym2->name);
-		return;
-	}
-
-	prop = prop_alloc(P_ENV, sym);
-	prop->expr = expr_alloc_symbol(sym_lookup(env, SYMBOL_CONST));
-
-	sym_env_list = expr_alloc_one(E_LIST, sym_env_list);
-	sym_env_list->right.sym = sym;
-
-	p = getenv(env);
-	if (p)
-		sym_add_default(sym, p);
-	else
-		menu_warn(current_entry, "environment variable %s undefined", env);
-}
diff --git a/scripts/kconfig/tests/no_write_if_dep_unmet/expected_config b/scripts/kconfig/tests/no_write_if_dep_unmet/expected_config
index 0d15e41..4732288 100644
--- a/scripts/kconfig/tests/no_write_if_dep_unmet/expected_config
+++ b/scripts/kconfig/tests/no_write_if_dep_unmet/expected_config
@@ -1,5 +1,5 @@
 #
 # Automatically generated file; DO NOT EDIT.
-# Linux Kernel Configuration
+# Main menu
 #
 # CONFIG_A is not set
diff --git a/scripts/kconfig/tests/preprocess/builtin_func/Kconfig b/scripts/kconfig/tests/preprocess/builtin_func/Kconfig
new file mode 100644
index 0000000..baa3288
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/builtin_func/Kconfig
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# 'info' prints the argument to stdout.
+$(info,hello world 0)
+
+# 'warning-if', if the first argument is y, sends the second argument to stderr,
+# and the message is prefixed with the current file name and line number.
+$(warning-if,y,hello world 1)
+
+# 'error-if' is similar, but it terminates the parsing immediately.
+# The following is just no-op since the first argument is not y.
+$(error-if,n,this should not be printed)
+
+# Shorthand
+warning = $(warning-if,y,$(1))
+
+# 'shell' executes a command, and returns its stdout.
+$(warning,$(shell,echo hello world 3))
+
+# Every newline in the output is replaced with a space,
+# but any trailing newlines are deleted.
+$(warning,$(shell,printf 'hello\nworld\n\n4\n\n\n'))
+
+# 'filename' is expanded to the currently parsed file name,
+# 'lineno' to the line number.
+$(warning,filename=$(filename))
+$(warning,lineno=$(lineno))
diff --git a/scripts/kconfig/tests/preprocess/builtin_func/__init__.py b/scripts/kconfig/tests/preprocess/builtin_func/__init__.py
new file mode 100644
index 0000000..2e53ba0
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/builtin_func/__init__.py
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+"""
+Built-in function tests.
+"""
+
+def test(conf):
+    assert conf.oldaskconfig() == 0
+    assert conf.stdout_contains('expected_stdout')
+    assert conf.stderr_matches('expected_stderr')
diff --git a/scripts/kconfig/tests/preprocess/builtin_func/expected_stderr b/scripts/kconfig/tests/preprocess/builtin_func/expected_stderr
new file mode 100644
index 0000000..33ea9ca
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/builtin_func/expected_stderr
@@ -0,0 +1,5 @@
+Kconfig:8: hello world 1
+Kconfig:18: hello world 3
+Kconfig:22: hello world  4
+Kconfig:26: filename=Kconfig
+Kconfig:27: lineno=27
diff --git a/scripts/kconfig/tests/preprocess/builtin_func/expected_stdout b/scripts/kconfig/tests/preprocess/builtin_func/expected_stdout
new file mode 100644
index 0000000..82de3a7
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/builtin_func/expected_stdout
@@ -0,0 +1 @@
+hello world 0
diff --git a/scripts/kconfig/tests/preprocess/circular_expansion/Kconfig b/scripts/kconfig/tests/preprocess/circular_expansion/Kconfig
new file mode 100644
index 0000000..6838997
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/circular_expansion/Kconfig
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+X = $(Y)
+Y = $(X)
+$(info $(X))
diff --git a/scripts/kconfig/tests/preprocess/circular_expansion/__init__.py b/scripts/kconfig/tests/preprocess/circular_expansion/__init__.py
new file mode 100644
index 0000000..419bda3
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/circular_expansion/__init__.py
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+"""
+Detect circular variable expansion.
+
+If a recursively expanded variable references itself (eventually),
+it should fail with an error message.
+"""
+
+def test(conf):
+    assert conf.oldaskconfig() != 0
+    assert conf.stderr_matches('expected_stderr')
diff --git a/scripts/kconfig/tests/preprocess/circular_expansion/expected_stderr b/scripts/kconfig/tests/preprocess/circular_expansion/expected_stderr
new file mode 100644
index 0000000..cde68fa
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/circular_expansion/expected_stderr
@@ -0,0 +1 @@
+Kconfig:5: Recursive variable 'X' references itself (eventually)
diff --git a/scripts/kconfig/tests/preprocess/escape/Kconfig b/scripts/kconfig/tests/preprocess/escape/Kconfig
new file mode 100644
index 0000000..4e3f444
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/escape/Kconfig
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Shorthand
+warning = $(warning-if,y,$(1))
+
+# You can not pass commas directly to a function since they are treated as
+# delimiters. You can use the following trick to do so.
+comma   := ,
+$(warning,hello$(comma) world)
+
+# Like Make, single quotes, double quotes, spaces are treated verbatim.
+# The following prints the text as-is.
+$(warning,  ' " '"   ' ''' "'")
+
+# Unlike Make, '$' has special meaning only when it is followed by '('.
+# No need to escape '$' itself.
+$(warning,$)
+$(warning,$$)
+$ := 1
+$(warning,$($))
+
+# You need a trick to escape '$' followed by '('
+# The following should print "$(X)". It should not be expanded further.
+dollar := $
+$(warning,$(dollar)(X))
+
+# You need a trick to treat unbalanced parentheses.
+# The following should print "(".
+left_paren := (
+$(warning,$(left_paren))
+
+# A simple expanded should not be expanded multiple times.
+# The following should print "$(X)". It should not be expanded further.
+Y := $(dollar)(X)
+$(warning,$(Y))
+
+# The following should print "$(X)" as well.
+Y = $(dollar)(X)
+$(warning,$(Y))
+
+# The following should print "$(".
+# It should not be emit "unterminated reference" error.
+unterminated := $(dollar)(
+$(warning,$(unterminated))
diff --git a/scripts/kconfig/tests/preprocess/escape/__init__.py b/scripts/kconfig/tests/preprocess/escape/__init__.py
new file mode 100644
index 0000000..7ee8e74
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/escape/__init__.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+"""
+Escape sequence tests.
+"""
+
+def test(conf):
+    assert conf.oldaskconfig() == 0
+    assert conf.stderr_matches('expected_stderr')
diff --git a/scripts/kconfig/tests/preprocess/escape/expected_stderr b/scripts/kconfig/tests/preprocess/escape/expected_stderr
new file mode 100644
index 0000000..1c00957
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/escape/expected_stderr
@@ -0,0 +1,10 @@
+Kconfig:9: hello, world
+Kconfig:13:   ' " '"   ' ''' "'"
+Kconfig:17: $
+Kconfig:18: $$
+Kconfig:20: 1
+Kconfig:25: $(X)
+Kconfig:30: (
+Kconfig:35: $(X)
+Kconfig:39: $(X)
+Kconfig:44: $(
diff --git a/scripts/kconfig/tests/preprocess/variable/Kconfig b/scripts/kconfig/tests/preprocess/variable/Kconfig
new file mode 100644
index 0000000..9ce2f95
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/variable/Kconfig
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Shorthand
+warning = $(warning-if,y,$(1))
+
+# Simply expanded variable.
+X := 1
+SIMPLE := $(X)
+X := 2
+$(warning,SIMPLE = $(SIMPLE))
+
+# Recursively expanded variable.
+X := 1
+RECURSIVE = $(X)
+X := 2
+$(warning,RECURSIVE = $(RECURSIVE))
+
+# Append something to a simply expanded variable.
+Y := 3
+SIMPLE += $(Y)
+Y := 4
+$(warning,SIMPLE = $(SIMPLE))
+
+# Append something to a recursively expanded variable.
+Y := 3
+RECURSIVE += $(Y)
+Y := 4
+$(warning,RECURSIVE = $(RECURSIVE))
+
+# Use += operator to an undefined variable.
+# This works as a recursively expanded variable.
+Y := 3
+UNDEFINED_VARIABLE += $(Y)
+Y := 4
+$(warning,UNDEFINED_VARIABLE = $(UNDEFINED_VARIABLE))
+
+# You can use variable references for the lefthand side of assignment statement.
+X := A
+Y := B
+$(X)$(Y) := 5
+$(warning,AB = $(AB))
+
+# User-defined function.
+greeting = $(1), my name is $(2).
+$(warning,$(greeting,Hello,John))
+
+# The number of arguments is not checked for user-defined functions.
+# If some arguments are optional, it is useful to pass fewer parameters.
+# $(2) will be blank in this case.
+$(warning,$(greeting,Hello))
+
+# Unreferenced parameters are just ignored.
+$(warning,$(greeting,Hello,John,ignored,ignored))
diff --git a/scripts/kconfig/tests/preprocess/variable/__init__.py b/scripts/kconfig/tests/preprocess/variable/__init__.py
new file mode 100644
index 0000000..e88b170
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/variable/__init__.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+"""
+Variable and user-defined function tests.
+"""
+
+def test(conf):
+    assert conf.oldaskconfig() == 0
+    assert conf.stderr_matches('expected_stderr')
diff --git a/scripts/kconfig/tests/preprocess/variable/expected_stderr b/scripts/kconfig/tests/preprocess/variable/expected_stderr
new file mode 100644
index 0000000..a4841c3
--- /dev/null
+++ b/scripts/kconfig/tests/preprocess/variable/expected_stderr
@@ -0,0 +1,9 @@
+Kconfig:10: SIMPLE = 1
+Kconfig:16: RECURSIVE = 2
+Kconfig:22: SIMPLE = 1 3
+Kconfig:28: RECURSIVE = 2 4
+Kconfig:35: UNDEFINED_VARIABLE = 4
+Kconfig:41: AB = 5
+Kconfig:45: Hello, my name is John.
+Kconfig:50: Hello, my name is .
+Kconfig:53: Hello, my name is John.
diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c
index c6f6e21..a365594 100644
--- a/scripts/kconfig/util.c
+++ b/scripts/kconfig/util.c
@@ -14,18 +14,16 @@
 struct file *file_lookup(const char *name)
 {
 	struct file *file;
-	char *file_name = sym_expand_string_value(name);
 
 	for (file = file_list; file; file = file->next) {
 		if (!strcmp(name, file->name)) {
-			free(file_name);
 			return file;
 		}
 	}
 
 	file = xmalloc(sizeof(*file));
 	memset(file, 0, sizeof(*file));
-	file->name = file_name;
+	file->name = xstrdup(name);
 	file->next = file_list;
 	file_list = file;
 	return file;
@@ -34,8 +32,6 @@
 /* write a dependency file as used by kbuild to track dependencies */
 int file_write_dep(const char *name)
 {
-	struct symbol *sym, *env_sym;
-	struct expr *e;
 	struct file *file;
 	FILE *out;
 
@@ -54,21 +50,7 @@
 	fprintf(out, "\n%s: \\\n"
 		     "\t$(deps_config)\n\n", conf_get_autoconfig_name());
 
-	expr_list_for_each_sym(sym_env_list, e, sym) {
-		struct property *prop;
-		const char *value;
-
-		prop = sym_get_env_prop(sym);
-		env_sym = prop_get_symbol(prop);
-		if (!env_sym)
-			continue;
-		value = getenv(env_sym->name);
-		if (!value)
-			value = "";
-		fprintf(out, "ifneq \"$(%s)\" \"%s\"\n", env_sym->name, value);
-		fprintf(out, "%s: FORCE\n", conf_get_autoconfig_name());
-		fprintf(out, "endif\n");
-	}
+	env_write_dep(out, conf_get_autoconfig_name());
 
 	fprintf(out, "\n$(deps_config): ;\n");
 	fclose(out);
@@ -165,3 +147,14 @@
 	fprintf(stderr, "Out of memory.\n");
 	exit(1);
 }
+
+char *xstrndup(const char *s, size_t n)
+{
+	char *p;
+
+	p = strndup(s, n);
+	if (p)
+		return p;
+	fprintf(stderr, "Out of memory.\n");
+	exit(1);
+}
diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l
index 045093d..25bd2b8 100644
--- a/scripts/kconfig/zconf.l
+++ b/scripts/kconfig/zconf.l
@@ -1,13 +1,13 @@
 %option nostdinit noyywrap never-interactive full ecs
 %option 8bit nodefault yylineno
-%option noinput
-%x COMMAND HELP STRING PARAM
+%x COMMAND HELP STRING PARAM ASSIGN_VAL
 %{
 /*
  * Copyright (C) 2002 Roman Zippel <zippel@linux-m68k.org>
  * Released under the terms of the GNU GPL v2.0.
  */
 
+#include <assert.h>
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -35,6 +35,8 @@
 
 static int last_ts, first_ts;
 
+static char *expand_token(const char *in, size_t n);
+static void append_expanded_string(const char *in);
 static void zconf_endhelp(void);
 static void zconf_endfile(void);
 
@@ -101,17 +103,28 @@
 <COMMAND>{
 	{n}+	{
 		const struct kconf_id *id = kconf_id_lookup(yytext, yyleng);
-		BEGIN(PARAM);
 		current_pos.file = current_file;
 		current_pos.lineno = yylineno;
 		if (id && id->flags & TF_COMMAND) {
+			BEGIN(PARAM);
 			yylval.id = id;
 			return id->token;
 		}
 		alloc_string(yytext, yyleng);
 		yylval.string = text;
-		return T_WORD;
+		return T_VARIABLE;
 	}
+	({n}|$)+	{
+		/* this token includes at least one '$' */
+		yylval.string = expand_token(yytext, yyleng);
+		if (strlen(yylval.string))
+			return T_VARIABLE;
+		free(yylval.string);
+	}
+	"="	{ BEGIN(ASSIGN_VAL); yylval.flavor = VAR_RECURSIVE; return T_ASSIGN; }
+	":="	{ BEGIN(ASSIGN_VAL); yylval.flavor = VAR_SIMPLE; return T_ASSIGN; }
+	"+="	{ BEGIN(ASSIGN_VAL); yylval.flavor = VAR_APPEND; return T_ASSIGN; }
+	[[:blank:]]+
 	.	warn_ignored_character(*yytext);
 	\n	{
 		BEGIN(INITIAL);
@@ -119,6 +132,16 @@
 	}
 }
 
+<ASSIGN_VAL>{
+	[^[:blank:]\n]+.*	{
+		alloc_string(yytext, yyleng);
+		yylval.string = text;
+		return T_ASSIGN_VAL;
+	}
+	\n	{ BEGIN(INITIAL); return T_EOL; }
+	.
+}
+
 <PARAM>{
 	"&&"	return T_AND;
 	"||"	return T_OR;
@@ -147,6 +170,13 @@
 		yylval.string = text;
 		return T_WORD;
 	}
+	({n}|[/.$])+	{
+		/* this token includes at least one '$' */
+		yylval.string = expand_token(yytext, yyleng);
+		if (strlen(yylval.string))
+			return T_WORD;
+		free(yylval.string);
+	}
 	#.*	/* comment */
 	\\\n	;
 	[[:blank:]]+
@@ -157,12 +187,13 @@
 }
 
 <STRING>{
-	[^'"\\\n]+/\n	{
+	"$".*	append_expanded_string(yytext);
+	[^$'"\\\n]+/\n	{
 		append_string(yytext, yyleng);
 		yylval.string = text;
 		return T_WORD_QUOTE;
 	}
-	[^'"\\\n]+	{
+	[^$'"\\\n]+	{
 		append_string(yytext, yyleng);
 	}
 	\\.?/\n	{
@@ -249,6 +280,58 @@
 }
 
 %%
+static char *expand_token(const char *in, size_t n)
+{
+	char *out;
+	int c;
+	char c2;
+	const char *rest, *end;
+
+	new_string();
+	append_string(in, n);
+
+	/* get the whole line because we do not know the end of token. */
+	while ((c = input()) != EOF) {
+		if (c == '\n') {
+			unput(c);
+			break;
+		}
+		c2 = c;
+		append_string(&c2, 1);
+	}
+
+	rest = text;
+	out = expand_one_token(&rest);
+
+	/* push back unused characters to the input stream */
+	end = rest + strlen(rest);
+	while (end > rest)
+		unput(*--end);
+
+	free(text);
+
+	return out;
+}
+
+static void append_expanded_string(const char *str)
+{
+	const char *end;
+	char *res;
+
+	str++;
+
+	res = expand_dollar(&str);
+
+	/* push back unused characters to the input stream */
+	end = str + strlen(str);
+	while (end > str)
+		unput(*--end);
+
+	append_string(res, strlen(res));
+
+	free(res);
+}
+
 void zconf_starthelp(void)
 {
 	new_string();
diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y
index ad6305b..6f9b0aa 100644
--- a/scripts/kconfig/zconf.y
+++ b/scripts/kconfig/zconf.y
@@ -41,6 +41,7 @@
 	struct expr *expr;
 	struct menu *menu;
 	const struct kconf_id *id;
+	enum variable_flavor flavor;
 }
 
 %token <id>T_MAINMENU
@@ -77,6 +78,9 @@
 %token T_CLOSE_PAREN
 %token T_OPEN_PAREN
 %token T_EOL
+%token <string> T_VARIABLE
+%token <flavor> T_ASSIGN
+%token <string> T_ASSIGN_VAL
 
 %left T_OR
 %left T_AND
@@ -92,7 +96,7 @@
 %type <id> end
 %type <id> option_name
 %type <menu> if_entry menu_entry choice_entry
-%type <string> symbol_option_arg word_opt
+%type <string> symbol_option_arg word_opt assign_val
 
 %destructor {
 	fprintf(stderr, "%s:%d: missing end statement for this entry\n",
@@ -109,7 +113,7 @@
 %%
 input: nl start | start;
 
-start: mainmenu_stmt stmt_list | no_mainmenu_stmt stmt_list;
+start: mainmenu_stmt stmt_list | stmt_list;
 
 /* mainmenu entry */
 
@@ -118,19 +122,6 @@
 	menu_add_prompt(P_MENU, $2, NULL);
 };
 
-/* Default main menu, if there's no mainmenu entry */
-
-no_mainmenu_stmt: /* empty */
-{
-	/*
-	 * Hack: Keep the main menu title on the heap so we can safely free it
-	 * later regardless of whether it comes from the 'prompt' in
-	 * mainmenu_stmt or here
-	 */
-	menu_add_prompt(P_MENU, xstrdup("Linux Kernel Configuration"), NULL);
-};
-
-
 stmt_list:
 	  /* empty */
 	| stmt_list common_stmt
@@ -156,6 +147,7 @@
 	| config_stmt
 	| menuconfig_stmt
 	| source_stmt
+	| assignment_stmt
 ;
 
 option_error:
@@ -524,31 +516,42 @@
 word_opt: /* empty */			{ $$ = NULL; }
 	| T_WORD
 
+/* assignment statement */
+
+assignment_stmt:  T_VARIABLE T_ASSIGN assign_val T_EOL	{ variable_add($1, $3, $2); free($1); free($3); }
+
+assign_val:
+	/* empty */		{ $$ = xstrdup(""); };
+	| T_ASSIGN_VAL
+;
+
 %%
 
 void conf_parse(const char *name)
 {
-	const char *tmp;
 	struct symbol *sym;
 	int i;
 
 	zconf_initscan(name);
 
-	sym_init();
 	_menu_init();
 
 	if (getenv("ZCONF_DEBUG"))
 		yydebug = 1;
 	yyparse();
+
+	/* Variables are expanded in the parse phase. We can free them here. */
+	variable_all_del();
+
 	if (yynerrs)
 		exit(1);
 	if (!modules_sym)
 		modules_sym = sym_find( "n" );
 
-	tmp = rootmenu.prompt->text;
-	rootmenu.prompt->text = _(rootmenu.prompt->text);
-	rootmenu.prompt->text = sym_expand_string_value(rootmenu.prompt->text);
-	free((char*)tmp);
+	if (!menu_has_prompt(&rootmenu)) {
+		current_entry = &rootmenu;
+		menu_add_prompt(P_MENU, "Main menu", NULL);
+	}
 
 	menu_finalize(&rootmenu);
 	for_all_symbols(i, sym) {
@@ -780,3 +783,4 @@
 #include "expr.c"
 #include "symbol.c"
 #include "menu.c"
+#include "preprocess.c"
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 9045823..4bf811c 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -121,10 +121,6 @@
 	info KSYM ${2}
 	local kallsymopt;
 
-	if [ -n "${CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX}" ]; then
-		kallsymopt="${kallsymopt} --symbol-prefix=_"
-	fi
-
 	if [ -n "${CONFIG_KALLSYMS_ALL}" ]; then
 		kallsymopt="${kallsymopt} --all-symbols"
 	fi
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index 9fad6af..6667f7b 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -139,6 +139,9 @@
 	DEVID(hv_vmbus_device_id);
 	DEVID_FIELD(hv_vmbus_device_id, guid);
 
+	DEVID(rpmsg_device_id);
+	DEVID_FIELD(rpmsg_device_id, name);
+
 	DEVID(i2c_device_id);
 	DEVID_FIELD(i2c_device_id, name);
 
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index b9beeaa..52fd54a 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -944,6 +944,17 @@
 }
 ADD_TO_DEVTABLE("vmbus", hv_vmbus_device_id, do_vmbus_entry);
 
+/* Looks like: rpmsg:S */
+static int do_rpmsg_entry(const char *filename, void *symval,
+			  char *alias)
+{
+	DEF_FIELD_ADDR(symval, rpmsg_device_id, name);
+	sprintf(alias, RPMSG_DEVICE_MODALIAS_FMT, *name);
+
+	return 1;
+}
+ADD_TO_DEVTABLE("rpmsg", rpmsg_device_id, do_rpmsg_entry);
+
 /* Looks like: i2c:S */
 static int do_i2c_entry(const char *filename, void *symval,
 			char *alias)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 4ff08a0..1663fb1 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -19,9 +19,7 @@
 #include <stdbool.h>
 #include <errno.h>
 #include "modpost.h"
-#include "../../include/generated/autoconf.h"
 #include "../../include/linux/license.h"
-#include "../../include/linux/export.h"
 
 /* Are we using CONFIG_MODVERSIONS? */
 static int modversions = 0;
@@ -123,7 +121,7 @@
 /* A list of all modules we processed */
 static struct module *modules;
 
-static struct module *find_module(char *modname)
+static struct module *find_module(const char *modname)
 {
 	struct module *mod;
 
@@ -591,35 +589,32 @@
 static int ignore_undef_symbol(struct elf_info *info, const char *symname)
 {
 	/* ignore __this_module, it will be resolved shortly */
-	if (strcmp(symname, VMLINUX_SYMBOL_STR(__this_module)) == 0)
+	if (strcmp(symname, "__this_module") == 0)
 		return 1;
 	/* ignore global offset table */
 	if (strcmp(symname, "_GLOBAL_OFFSET_TABLE_") == 0)
 		return 1;
 	if (info->hdr->e_machine == EM_PPC)
 		/* Special register function linked on all modules during final link of .ko */
-		if (strncmp(symname, "_restgpr_", sizeof("_restgpr_") - 1) == 0 ||
-		    strncmp(symname, "_savegpr_", sizeof("_savegpr_") - 1) == 0 ||
-		    strncmp(symname, "_rest32gpr_", sizeof("_rest32gpr_") - 1) == 0 ||
-		    strncmp(symname, "_save32gpr_", sizeof("_save32gpr_") - 1) == 0 ||
-		    strncmp(symname, "_restvr_", sizeof("_restvr_") - 1) == 0 ||
-		    strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0)
+		if (strstarts(symname, "_restgpr_") ||
+		    strstarts(symname, "_savegpr_") ||
+		    strstarts(symname, "_rest32gpr_") ||
+		    strstarts(symname, "_save32gpr_") ||
+		    strstarts(symname, "_restvr_") ||
+		    strstarts(symname, "_savevr_"))
 			return 1;
 	if (info->hdr->e_machine == EM_PPC64)
 		/* Special register function linked on all modules during final link of .ko */
-		if (strncmp(symname, "_restgpr0_", sizeof("_restgpr0_") - 1) == 0 ||
-		    strncmp(symname, "_savegpr0_", sizeof("_savegpr0_") - 1) == 0 ||
-		    strncmp(symname, "_restvr_", sizeof("_restvr_") - 1) == 0 ||
-		    strncmp(symname, "_savevr_", sizeof("_savevr_") - 1) == 0 ||
+		if (strstarts(symname, "_restgpr0_") ||
+		    strstarts(symname, "_savegpr0_") ||
+		    strstarts(symname, "_restvr_") ||
+		    strstarts(symname, "_savevr_") ||
 		    strcmp(symname, ".TOC.") == 0)
 			return 1;
 	/* Do not ignore this symbol */
 	return 0;
 }
 
-#define CRC_PFX     VMLINUX_SYMBOL_STR(__crc_)
-#define KSYMTAB_PFX VMLINUX_SYMBOL_STR(__ksymtab_)
-
 static void handle_modversions(struct module *mod, struct elf_info *info,
 			       Elf_Sym *sym, const char *symname)
 {
@@ -628,13 +623,13 @@
 	bool is_crc = false;
 
 	if ((!is_vmlinux(mod->name) || mod->is_dot_o) &&
-	    strncmp(symname, "__ksymtab", 9) == 0)
+	    strstarts(symname, "__ksymtab"))
 		export = export_from_secname(info, get_secindex(info, sym));
 	else
 		export = export_from_sec(info, get_secindex(info, sym));
 
 	/* CRC'd symbol */
-	if (strncmp(symname, CRC_PFX, strlen(CRC_PFX)) == 0) {
+	if (strstarts(symname, "__crc_")) {
 		is_crc = true;
 		crc = (unsigned int) sym->st_value;
 		if (sym->st_shndx != SHN_UNDEF && sym->st_shndx != SHN_ABS) {
@@ -647,13 +642,13 @@
 				info->sechdrs[sym->st_shndx].sh_addr : 0);
 			crc = *crcp;
 		}
-		sym_update_crc(symname + strlen(CRC_PFX), mod, crc,
+		sym_update_crc(symname + strlen("__crc_"), mod, crc,
 				export);
 	}
 
 	switch (sym->st_shndx) {
 	case SHN_COMMON:
-		if (!strncmp(symname, "__gnu_lto_", sizeof("__gnu_lto_")-1)) {
+		if (strstarts(symname, "__gnu_lto_")) {
 			/* Should warn here, but modpost runs before the linker */
 		} else
 			warn("\"%s\" [%s] is COMMON symbol\n", symname, mod->name);
@@ -685,15 +680,10 @@
 		}
 #endif
 
-#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
-		if (symname[0] != '_')
-			break;
-		else
-			symname++;
-#endif
 		if (is_crc) {
 			const char *e = is_vmlinux(mod->name) ?"":".ko";
-			warn("EXPORT symbol \"%s\" [%s%s] version generation failed, symbol will not be versioned.\n", symname + strlen(CRC_PFX), mod->name, e);
+			warn("EXPORT symbol \"%s\" [%s%s] version generation failed, symbol will not be versioned.\n",
+			     symname + strlen("__crc_"), mod->name, e);
 		}
 		mod->unres = alloc_symbol(symname,
 					  ELF_ST_BIND(sym->st_info) == STB_WEAK,
@@ -701,13 +691,13 @@
 		break;
 	default:
 		/* All exported symbols */
-		if (strncmp(symname, KSYMTAB_PFX, strlen(KSYMTAB_PFX)) == 0) {
-			sym_add_exported(symname + strlen(KSYMTAB_PFX), mod,
+		if (strstarts(symname, "__ksymtab_")) {
+			sym_add_exported(symname + strlen("__ksymtab_"), mod,
 					export);
 		}
-		if (strcmp(symname, VMLINUX_SYMBOL_STR(init_module)) == 0)
+		if (strcmp(symname, "init_module") == 0)
 			mod->has_init = 1;
-		if (strcmp(symname, VMLINUX_SYMBOL_STR(cleanup_module)) == 0)
+		if (strcmp(symname, "cleanup_module") == 0)
 			mod->has_cleanup = 1;
 		break;
 	}
@@ -734,16 +724,17 @@
 	return string;
 }
 
-static char *get_next_modinfo(void *modinfo, unsigned long modinfo_len,
-			      const char *tag, char *info)
+static char *get_next_modinfo(struct elf_info *info, const char *tag,
+			      char *prev)
 {
 	char *p;
 	unsigned int taglen = strlen(tag);
-	unsigned long size = modinfo_len;
+	char *modinfo = info->modinfo;
+	unsigned long size = info->modinfo_len;
 
-	if (info) {
-		size -= info - (char *)modinfo;
-		modinfo = next_string(info, &size);
+	if (prev) {
+		size -= prev - modinfo;
+		modinfo = next_string(prev, &size);
 	}
 
 	for (p = modinfo; p; p = next_string(p, &size)) {
@@ -753,11 +744,10 @@
 	return NULL;
 }
 
-static char *get_modinfo(void *modinfo, unsigned long modinfo_len,
-			 const char *tag)
+static char *get_modinfo(struct elf_info *info, const char *tag)
 
 {
-	return get_next_modinfo(modinfo, modinfo_len, tag, NULL);
+	return get_next_modinfo(info, tag, NULL);
 }
 
 /**
@@ -1181,13 +1171,13 @@
 	/* Check for pattern 1 */
 	if (match(tosec, init_data_sections) &&
 	    match(fromsec, data_sections) &&
-	    (strncmp(fromsym, "__param", strlen("__param")) == 0))
+	    strstarts(fromsym, "__param"))
 		return 0;
 
 	/* Check for pattern 1a */
 	if (strcmp(tosec, ".init.text") == 0 &&
 	    match(fromsec, data_sections) &&
-	    (strncmp(fromsym, "__param_ops_", strlen("__param_ops_")) == 0))
+	    strstarts(fromsym, "__param_ops_"))
 		return 0;
 
 	/* Check for pattern 2 */
@@ -1542,8 +1532,7 @@
 	from = find_elf_symbol2(elf, r->r_offset, fromsec);
 	fromsym = sym_name(elf, from);
 
-	if (!strncmp(fromsym, "reference___initcall",
-		     sizeof("reference___initcall")-1))
+	if (strstarts(fromsym, "reference___initcall"))
 		return;
 
 	tosec = sec_name(elf, get_secindex(elf, sym));
@@ -1940,7 +1929,7 @@
 	return s;
 }
 
-static void read_symbols(char *modname)
+static void read_symbols(const char *modname)
 {
 	const char *symname;
 	char *version;
@@ -1961,7 +1950,7 @@
 		mod->skip = 1;
 	}
 
-	license = get_modinfo(info.modinfo, info.modinfo_len, "license");
+	license = get_modinfo(&info, "license");
 	if (!license && !is_vmlinux(modname))
 		warn("modpost: missing MODULE_LICENSE() in %s\n"
 		     "see include/linux/module.h for "
@@ -1973,8 +1962,7 @@
 			mod->gpl_compatible = 0;
 			break;
 		}
-		license = get_next_modinfo(info.modinfo, info.modinfo_len,
-					   "license", license);
+		license = get_next_modinfo(&info, "license", license);
 	}
 
 	for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
@@ -1983,11 +1971,10 @@
 		handle_modversions(mod, &info, sym, symname);
 		handle_moddevtable(mod, &info, sym, symname);
 	}
-	if (!is_vmlinux(modname) ||
-	     (is_vmlinux(modname) && vmlinux_section_warnings))
+	if (!is_vmlinux(modname) || vmlinux_section_warnings)
 		check_sec_ref(mod, modname, &info);
 
-	version = get_modinfo(info.modinfo, info.modinfo_len, "version");
+	version = get_modinfo(&info, "version");
 	if (version)
 		maybe_frob_rcs_version(modname, version, info.modinfo,
 				       version - (char *)info.hdr);
@@ -2174,9 +2161,7 @@
 
 static void add_staging_flag(struct buffer *b, const char *name)
 {
-	static const char *staging_dir = "drivers/staging";
-
-	if (strncmp(staging_dir, name, strlen(staging_dir)) == 0)
+	if (strstarts(name, "drivers/staging"))
 		buf_printf(b, "\nMODULE_INFO(staging, \"Y\");\n");
 }
 
@@ -2230,7 +2215,7 @@
 			err = 1;
 			break;
 		}
-		buf_printf(b, "\t{ %#8x, __VMLINUX_SYMBOL_STR(%s) },\n",
+		buf_printf(b, "\t{ %#8x, \"%s\" },\n",
 			   s->crc, s->name);
 	}
 
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index 6adb3a1..985d72d 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -71,22 +71,21 @@
 	packagename=user-mode-linux-$version
 fi
 
-# Try to determine maintainer and email values
-if [ -n "$DEBEMAIL" ]; then
-       email=$DEBEMAIL
-elif [ -n "$EMAIL" ]; then
-       email=$EMAIL
+email=${DEBEMAIL-$EMAIL}
+
+# use email string directly if it contains <email>
+if echo $email | grep -q '<.*>'; then
+	maintainer=$email
 else
-       email=$(id -nu)@$(hostname -f 2>/dev/null || hostname)
+	# or construct the maintainer string
+	user=${KBUILD_BUILD_USER-$(id -nu)}
+	name=${DEBFULLNAME-$user}
+	if [ -z "$email" ]; then
+		buildhost=${KBUILD_BUILD_HOST-$(hostname -f 2>/dev/null || hostname)}
+		email="$user@$buildhost"
+	fi
+	maintainer="$name <$email>"
 fi
-if [ -n "$DEBFULLNAME" ]; then
-       name=$DEBFULLNAME
-elif [ -n "$NAME" ]; then
-       name=$NAME
-else
-       name="Anonymous"
-fi
-maintainer="$name <$email>"
 
 # Try to determine distribution
 if [ -n "$KDEB_CHANGELOG_DIST" ]; then
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 8c9691c..895c40e 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -500,7 +500,7 @@
 	gpfx = 0;
 	switch (w2(ehdr->e_machine)) {
 	default:
-		fprintf(stderr, "unrecognized e_machine %d %s\n",
+		fprintf(stderr, "unrecognized e_machine %u %s\n",
 			w2(ehdr->e_machine), fname);
 		fail_file();
 		break;
diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
index b9897e2..2e77937 100644
--- a/scripts/recordmcount.h
+++ b/scripts/recordmcount.h
@@ -441,7 +441,7 @@
 			return symp - sym0;
 		}
 	}
-	fprintf(stderr, "Cannot find symbol for section %d: %s.\n",
+	fprintf(stderr, "Cannot find symbol for section %u: %s.\n",
 		txtndx, txtname);
 	fail_file();
 }
diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py
new file mode 100755
index 0000000..7deaef2
--- /dev/null
+++ b/scripts/spdxcheck.py
@@ -0,0 +1,284 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GPL-2.0
+# Copyright Thomas Gleixner <tglx@linutronix.de>
+
+from argparse import ArgumentParser
+from ply import lex, yacc
+import traceback
+import sys
+import git
+import re
+import os
+
+class ParserException(Exception):
+    def __init__(self, tok, txt):
+        self.tok = tok
+        self.txt = txt
+
+class SPDXException(Exception):
+    def __init__(self, el, txt):
+        self.el = el
+        self.txt = txt
+
+class SPDXdata(object):
+    def __init__(self):
+        self.license_files = 0
+        self.exception_files = 0
+        self.licenses = [ ]
+        self.exceptions = { }
+
+# Read the spdx data from the LICENSES directory
+def read_spdxdata(repo):
+
+    # The subdirectories of LICENSES in the kernel source
+    license_dirs = [ "preferred", "other", "exceptions" ]
+    lictree = repo.heads.master.commit.tree['LICENSES']
+
+    spdx = SPDXdata()
+
+    for d in license_dirs:
+        for el in lictree[d].traverse():
+            if not os.path.isfile(el.path):
+                continue
+
+            exception = None
+            for l in open(el.path).readlines():
+                if l.startswith('Valid-License-Identifier:'):
+                    lid = l.split(':')[1].strip().upper()
+                    if lid in spdx.licenses:
+                        raise SPDXException(el, 'Duplicate License Identifier: %s' %lid)
+                    else:
+                        spdx.licenses.append(lid)
+
+                elif l.startswith('SPDX-Exception-Identifier:'):
+                    exception = l.split(':')[1].strip().upper()
+                    spdx.exceptions[exception] = []
+
+                elif l.startswith('SPDX-Licenses:'):
+                    for lic in l.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
+                        if not lic in spdx.licenses:
+                            raise SPDXException(None, 'Exception %s missing license %s' %(ex, lic))
+                        spdx.exceptions[exception].append(lic)
+
+                elif l.startswith("License-Text:"):
+                    if exception:
+                        if not len(spdx.exceptions[exception]):
+                            raise SPDXException(el, 'Exception %s is missing SPDX-Licenses' %excid)
+                        spdx.exception_files += 1
+                    else:
+                        spdx.license_files += 1
+                    break
+    return spdx
+
+class id_parser(object):
+
+    reserved = [ 'AND', 'OR', 'WITH' ]
+    tokens = [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
+
+    precedence = ( ('nonassoc', 'AND', 'OR'), )
+
+    t_ignore = ' \t'
+
+    def __init__(self, spdx):
+        self.spdx = spdx
+        self.lasttok = None
+        self.lastid = None
+        self.lexer = lex.lex(module = self, reflags = re.UNICODE)
+        # Initialize the parser. No debug file and no parser rules stored on disk
+        # The rules are small enough to be generated on the fly
+        self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
+        self.lines_checked = 0
+        self.checked = 0
+        self.spdx_valid = 0
+        self.spdx_errors = 0
+        self.curline = 0
+        self.deepest = 0
+
+    # Validate License and Exception IDs
+    def validate(self, tok):
+        id = tok.value.upper()
+        if tok.type == 'ID':
+            if not id in self.spdx.licenses:
+                raise ParserException(tok, 'Invalid License ID')
+            self.lastid = id
+        elif tok.type == 'EXC':
+            if not self.spdx.exceptions.has_key(id):
+                raise ParserException(tok, 'Invalid Exception ID')
+            if self.lastid not in self.spdx.exceptions[id]:
+                raise ParserException(tok, 'Exception not valid for license %s' %self.lastid)
+            self.lastid = None
+        elif tok.type != 'WITH':
+            self.lastid = None
+
+    # Lexer functions
+    def t_RPAR(self, tok):
+        r'\)'
+        self.lasttok = tok.type
+        return tok
+
+    def t_LPAR(self, tok):
+        r'\('
+        self.lasttok = tok.type
+        return tok
+
+    def t_ID(self, tok):
+        r'[A-Za-z.0-9\-+]+'
+
+        if self.lasttok == 'EXC':
+            print(tok)
+            raise ParserException(tok, 'Missing parentheses')
+
+        tok.value = tok.value.strip()
+        val = tok.value.upper()
+
+        if val in self.reserved:
+            tok.type = val
+        elif self.lasttok == 'WITH':
+            tok.type = 'EXC'
+
+        self.lasttok = tok.type
+        self.validate(tok)
+        return tok
+
+    def t_error(self, tok):
+        raise ParserException(tok, 'Invalid token')
+
+    def p_expr(self, p):
+        '''expr : ID
+                | ID WITH EXC
+                | expr AND expr
+                | expr OR expr
+                | LPAR expr RPAR'''
+        pass
+
+    def p_error(self, p):
+        if not p:
+            raise ParserException(None, 'Unfinished license expression')
+        else:
+            raise ParserException(p, 'Syntax error')
+
+    def parse(self, expr):
+        self.lasttok = None
+        self.lastid = None
+        self.parser.parse(expr, lexer = self.lexer)
+
+    def parse_lines(self, fd, maxlines, fname):
+        self.checked += 1
+        self.curline = 0
+        try:
+            for line in fd:
+                self.curline += 1
+                if self.curline > maxlines:
+                    break
+                self.lines_checked += 1
+                if line.find("SPDX-License-Identifier:") < 0:
+                    continue
+                expr = line.split(':')[1].replace('*/', '').strip()
+                self.parse(expr)
+                self.spdx_valid += 1
+                #
+                # Should we check for more SPDX ids in the same file and
+                # complain if there are any?
+                #
+                break
+
+        except ParserException as pe:
+            if pe.tok:
+                col = line.find(expr) + pe.tok.lexpos
+                tok = pe.tok.value
+                sys.stdout.write('%s: %d:%d %s: %s\n' %(fname, self.curline, col, pe.txt, tok))
+            else:
+                sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, col, pe.txt))
+            self.spdx_errors += 1
+
+def scan_git_tree(tree):
+    for el in tree.traverse():
+        # Exclude stuff which would make pointless noise
+        # FIXME: Put this somewhere more sensible
+        if el.path.startswith("LICENSES"):
+            continue
+        if el.path.find("license-rules.rst") >= 0:
+            continue
+        if el.path == 'scripts/checkpatch.pl':
+            continue
+        if not os.path.isfile(el.path):
+            continue
+        parser.parse_lines(open(el.path), args.maxlines, el.path)
+
+def scan_git_subtree(tree, path):
+    for p in path.strip('/').split('/'):
+        tree = tree[p]
+    scan_git_tree(tree)
+
+if __name__ == '__main__':
+
+    ap = ArgumentParser(description='SPDX expression checker')
+    ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
+    ap.add_argument('-m', '--maxlines', type=int, default=15,
+                    help='Maximum number of lines to scan in a file. Default 15')
+    ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output')
+    args = ap.parse_args()
+
+    # Sanity check path arguments
+    if '-' in args.path and len(args.path) > 1:
+        sys.stderr.write('stdin input "-" must be the only path argument\n')
+        sys.exit(1)
+
+    try:
+        # Use git to get the valid license expressions
+        repo = git.Repo(os.getcwd())
+        assert not repo.bare
+
+        # Initialize SPDX data
+        spdx = read_spdxdata(repo)
+
+        # Initilize the parser
+        parser = id_parser(spdx)
+
+    except SPDXException as se:
+        if se.el:
+            sys.stderr.write('%s: %s\n' %(se.el.path, se.txt))
+        else:
+            sys.stderr.write('%s\n' %se.txt)
+        sys.exit(1)
+
+    except Exception as ex:
+        sys.stderr.write('FAIL: %s\n' %ex)
+        sys.stderr.write('%s\n' %traceback.format_exc())
+        sys.exit(1)
+
+    try:
+        if len(args.path) and args.path[0] == '-':
+            parser.parse_lines(sys.stdin, args.maxlines, '-')
+        else:
+            if args.path:
+                for p in args.path:
+                    if os.path.isfile(p):
+                        parser.parse_lines(open(p), args.maxlines, p)
+                    elif os.path.isdir(p):
+                        scan_git_subtree(repo.head.reference.commit.tree, p)
+                    else:
+                        sys.stderr.write('path %s does not exist\n' %p)
+                        sys.exit(1)
+            else:
+                # Full git tree scan
+                scan_git_tree(repo.head.commit.tree)
+
+            if args.verbose:
+                sys.stderr.write('\n')
+                sys.stderr.write('License files:     %12d\n' %spdx.license_files)
+                sys.stderr.write('Exception files:   %12d\n' %spdx.exception_files)
+                sys.stderr.write('License IDs        %12d\n' %len(spdx.licenses))
+                sys.stderr.write('Exception IDs      %12d\n' %len(spdx.exceptions))
+                sys.stderr.write('\n')
+                sys.stderr.write('Files checked:     %12d\n' %parser.checked)
+                sys.stderr.write('Lines checked:     %12d\n' %parser.lines_checked)
+                sys.stderr.write('Files with SPDX:   %12d\n' %parser.spdx_valid)
+                sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors)
+
+            sys.exit(0)
+
+    except Exception as ex:
+        sys.stderr.write('FAIL: %s\n' %ex)
+        sys.stderr.write('%s\n' %traceback.format_exc())
+        sys.exit(1)
diff --git a/scripts/tags.sh b/scripts/tags.sh
index 78e546f..e587610 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -28,20 +28,11 @@
 # ignore userspace tools
 ignore="$ignore ( -path ${tree}tools ) -prune -o"
 
-# Find all available archs
-find_all_archs()
-{
-	ALLSOURCE_ARCHS=""
-	for arch in `ls ${tree}arch`; do
-		ALLSOURCE_ARCHS="${ALLSOURCE_ARCHS} "${arch##\/}
-	done
-}
-
 # Detect if ALLSOURCE_ARCHS is set. If not, we assume SRCARCH
 if [ "${ALLSOURCE_ARCHS}" = "" ]; then
 	ALLSOURCE_ARCHS=${SRCARCH}
 elif [ "${ALLSOURCE_ARCHS}" = "all" ]; then
-	find_all_archs
+	ALLSOURCE_ARCHS=$(find ${tree}arch/ -mindepth 1 -maxdepth 1 -type d -printf '%f ')
 fi
 
 # find sources in arch/$ARCH
diff --git a/scripts/ver_linux b/scripts/ver_linux
index 545ec73..7227994 100755
--- a/scripts/ver_linux
+++ b/scripts/ver_linux
@@ -13,36 +13,34 @@
 	system("uname -a")
 	printf("\n")
 
-	printversion("GNU C", version("gcc -dumpversion 2>&1"))
-	printversion("GNU Make", version("make --version 2>&1"))
-	printversion("Binutils", version("ld -v 2>&1"))
-	printversion("Util-linux", version("mount --version 2>&1"))
-	printversion("Mount", version("mount --version 2>&1"))
-	printversion("Module-init-tools", version("depmod -V  2>&1"))
-	printversion("E2fsprogs", version("tune2fs 2>&1"))
-	printversion("Jfsutils", version("fsck.jfs -V 2>&1"))
-	printversion("Reiserfsprogs", version("reiserfsck -V 2>&1"))
-	printversion("Reiser4fsprogs", version("fsck.reiser4 -V 2>&1"))
-	printversion("Xfsprogs", version("xfs_db -V 2>&1"))
-	printversion("Pcmciautils", version("pccardctl -V 2>&1"))
-	printversion("Pcmcia-cs", version("cardmgr -V 2>&1"))
-	printversion("Quota-tools", version("quota -V 2>&1"))
-	printversion("PPP", version("pppd --version 2>&1"))
-	printversion("Isdn4k-utils", version("isdnctrl 2>&1"))
-	printversion("Nfs-utils", version("showmount --version 2>&1"))
+	printversion("GNU C", version("gcc -dumpversion"))
+	printversion("GNU Make", version("make --version"))
+	printversion("Binutils", version("ld -v"))
+	printversion("Util-linux", version("mount --version"))
+	printversion("Mount", version("mount --version"))
+	printversion("Module-init-tools", version("depmod -V"))
+	printversion("E2fsprogs", version("tune2fs"))
+	printversion("Jfsutils", version("fsck.jfs -V"))
+	printversion("Reiserfsprogs", version("reiserfsck -V"))
+	printversion("Reiser4fsprogs", version("fsck.reiser4 -V"))
+	printversion("Xfsprogs", version("xfs_db -V"))
+	printversion("Pcmciautils", version("pccardctl -V"))
+	printversion("Pcmcia-cs", version("cardmgr -V"))
+	printversion("Quota-tools", version("quota -V"))
+	printversion("PPP", version("pppd --version"))
+	printversion("Isdn4k-utils", version("isdnctrl"))
+	printversion("Nfs-utils", version("showmount --version"))
 
-	if (system("test -r /proc/self/maps") == 0) {
-		while (getline <"/proc/self/maps" > 0) {
-			n = split($0, procmaps, "/")
-			if (/libc.*so$/ && match(procmaps[n], /[0-9]+([.]?[0-9]+)+/)) {
-				ver = substr(procmaps[n], RSTART, RLENGTH)
-				printversion("Linux C Library", ver)
-				break
-			}
+	while (getline <"/proc/self/maps" > 0) {
+		n = split($0, procmaps, "/")
+		if (/libc.*so$/ && match(procmaps[n], /[0-9]+([.]?[0-9]+)+/)) {
+			ver = substr(procmaps[n], RSTART, RLENGTH)
+			printversion("Linux C Library", ver)
+			break
 		}
 	}
 
-	printversion("Dynamic linker (ldd)", version("ldd --version 2>&1"))
+	printversion("Dynamic linker (ldd)", version("ldd --version"))
 
 	while ("ldconfig -p 2>/dev/null" | getline > 0) {
 		if (/(libg|stdc)[+]+\.so/) {
@@ -50,28 +48,25 @@
 			break
 		}
 	}
-	if (system("test -r " libcpp) == 0)
-		printversion("Linux C++ Library", version("readlink " libcpp))
+	printversion("Linux C++ Library", version("readlink " libcpp))
+	printversion("Procps", version("ps --version"))
+	printversion("Net-tools", version("ifconfig --version"))
+	printversion("Kbd", version("loadkeys -V"))
+	printversion("Console-tools", version("loadkeys -V"))
+	printversion("Oprofile", version("oprofiled --version"))
+	printversion("Sh-utils", version("expr --v"))
+	printversion("Udev", version("udevadm --version"))
+	printversion("Wireless-tools", version("iwconfig --version"))
 
-	printversion("Procps", version("ps --version 2>&1"))
-	printversion("Net-tools", version("ifconfig --version 2>&1"))
-	printversion("Kbd", version("loadkeys -V 2>&1"))
-	printversion("Console-tools", version("loadkeys -V 2>&1"))
-	printversion("Oprofile", version("oprofiled --version 2>&1"))
-	printversion("Sh-utils", version("expr --v 2>&1"))
-	printversion("Udev", version("udevadm --version 2>&1"))
-	printversion("Wireless-tools", version("iwconfig --version 2>&1"))
-
-	if (system("test -r /proc/modules") == 0) {
-		while ("sort /proc/modules" | getline > 0) {
-			mods = mods sep $1
-			sep = " "
-		}
-		printversion("Modules Loaded", mods)
+	while ("sort /proc/modules" | getline > 0) {
+		mods = mods sep $1
+		sep = " "
 	}
+	printversion("Modules Loaded", mods)
 }
 
 function version(cmd,    ver) {
+	cmd = cmd " 2>&1"
 	while (cmd | getline > 0) {
 		if (!/ver_linux/ && match($0, /[0-9]+([.]?[0-9]+)+/)) {
 			ver = substr($0, RSTART, RLENGTH)
diff --git a/security/commoncap.c b/security/commoncap.c
index 1ce701f..f4c33ab 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -919,6 +919,8 @@
 int cap_inode_setxattr(struct dentry *dentry, const char *name,
 		       const void *value, size_t size, int flags)
 {
+	struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
+
 	/* Ignore non-security xattrs */
 	if (strncmp(name, XATTR_SECURITY_PREFIX,
 			sizeof(XATTR_SECURITY_PREFIX) - 1) != 0)
@@ -931,7 +933,7 @@
 	if (strcmp(name, XATTR_NAME_CAPS) == 0)
 		return 0;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 	return 0;
 }
@@ -949,6 +951,8 @@
  */
 int cap_inode_removexattr(struct dentry *dentry, const char *name)
 {
+	struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
+
 	/* Ignore non-security xattrs */
 	if (strncmp(name, XATTR_SECURITY_PREFIX,
 			sizeof(XATTR_SECURITY_PREFIX) - 1) != 0)
@@ -964,7 +968,7 @@
 		return 0;
 	}
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
 		return -EPERM;
 	return 0;
 }
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index a46fba3..facf9cd 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -200,7 +200,8 @@
 	int size;
 	bool ima_present = false;
 
-	if (!(inode->i_opflags & IOP_XATTR))
+	if (!(inode->i_opflags & IOP_XATTR) ||
+	    inode->i_sb->s_user_ns != &init_user_ns)
 		return -EOPNOTSUPP;
 
 	desc = init_desc(type);
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index bf88236..a02c5ac 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -326,7 +326,7 @@
 		hex_byte_pack(hash + (i * 2), iint->ima_hash->digest[i]);
 	hash[i * 2] = '\0';
 
-	ab = audit_log_start(current->audit_context, GFP_KERNEL,
+	ab = audit_log_start(audit_context(), GFP_KERNEL,
 			     AUDIT_INTEGRITY_RULE);
 	if (!ab)
 		goto out;
diff --git a/security/integrity/integrity_audit.c b/security/integrity/integrity_audit.c
index 90987d1..ab10a25 100644
--- a/security/integrity/integrity_audit.c
+++ b/security/integrity/integrity_audit.c
@@ -38,7 +38,7 @@
 	if (!integrity_audit_info && audit_info == 1)	/* Skip info messages */
 		return;
 
-	ab = audit_log_start(current->audit_context, GFP_KERNEL, audit_msgno);
+	ab = audit_log_start(audit_context(), GFP_KERNEL, audit_msgno);
 	audit_log_format(ab, "pid=%d uid=%u auid=%u ses=%u",
 			 task_pid_nr(current),
 			 from_kuid(&init_user_ns, current_cred()->uid),
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 9336237..2806e70 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -22,6 +22,7 @@
 #include <keys/user-type.h>
 #include <keys/big_key-type.h>
 #include <crypto/aead.h>
+#include <crypto/gcm.h>
 
 struct big_key_buf {
 	unsigned int		nr_pages;
@@ -85,6 +86,7 @@
  * Crypto names for big_key data authenticated encryption
  */
 static const char big_key_alg_name[] = "gcm(aes)";
+#define BIG_KEY_IV_SIZE		GCM_AES_IV_SIZE
 
 /*
  * Crypto algorithms for big_key data authenticated encryption
@@ -109,7 +111,7 @@
 	 * an .update function, so there's no chance we'll wind up reusing the
 	 * key to encrypt updated data. Simply put: one key, one encryption.
 	 */
-	u8 zero_nonce[crypto_aead_ivsize(big_key_aead)];
+	u8 zero_nonce[BIG_KEY_IV_SIZE];
 
 	aead_req = aead_request_alloc(big_key_aead, GFP_KERNEL);
 	if (!aead_req)
@@ -425,6 +427,13 @@
 		pr_err("Can't alloc crypto: %d\n", ret);
 		return ret;
 	}
+
+	if (unlikely(crypto_aead_ivsize(big_key_aead) != BIG_KEY_IV_SIZE)) {
+		WARN(1, "big key algorithm changed?");
+		ret = -EINVAL;
+		goto free_aead;
+	}
+
 	ret = crypto_aead_setauthsize(big_key_aead, ENC_AUTHTAG_SIZE);
 	if (ret < 0) {
 		pr_err("Can't set crypto auth tag len: %d\n", ret);
diff --git a/security/keys/dh.c b/security/keys/dh.c
index d1ea9f3..f740382 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c
@@ -162,8 +162,8 @@
 			goto err;
 
 		if (zlen && h) {
-			u8 tmpbuffer[h];
-			size_t chunk = min_t(size_t, zlen, h);
+			u8 tmpbuffer[32];
+			size_t chunk = min_t(size_t, zlen, sizeof(tmpbuffer));
 			memset(tmpbuffer, 0, chunk);
 
 			do {
@@ -173,7 +173,7 @@
 					goto err;
 
 				zlen -= chunk;
-				chunk = min_t(size_t, zlen, h);
+				chunk = min_t(size_t, zlen, sizeof(tmpbuffer));
 			} while (zlen);
 		}
 
@@ -183,24 +183,13 @@
 				goto err;
 		}
 
-		if (dlen < h) {
-			u8 tmpbuffer[h];
+		err = crypto_shash_final(desc, dst);
+		if (err)
+			goto err;
 
-			err = crypto_shash_final(desc, tmpbuffer);
-			if (err)
-				goto err;
-			memcpy(dst, tmpbuffer, dlen);
-			memzero_explicit(tmpbuffer, h);
-			return 0;
-		} else {
-			err = crypto_shash_final(desc, dst);
-			if (err)
-				goto err;
-
-			dlen -= h;
-			dst += h;
-			counter = cpu_to_be32(be32_to_cpu(counter) + 1);
-		}
+		dlen -= h;
+		dst += h;
+		counter = cpu_to_be32(be32_to_cpu(counter) + 1);
 	}
 
 	return 0;
@@ -216,14 +205,16 @@
 {
 	uint8_t *outbuf = NULL;
 	int ret;
+	size_t outbuf_len = round_up(buflen,
+			             crypto_shash_digestsize(sdesc->shash.tfm));
 
-	outbuf = kmalloc(buflen, GFP_KERNEL);
+	outbuf = kmalloc(outbuf_len, GFP_KERNEL);
 	if (!outbuf) {
 		ret = -ENOMEM;
 		goto err;
 	}
 
-	ret = kdf_ctr(sdesc, kbuf, kbuflen, outbuf, buflen, lzero);
+	ret = kdf_ctr(sdesc, kbuf, kbuflen, outbuf, outbuf_len, lzero);
 	if (ret)
 		goto err;
 
diff --git a/security/keys/proc.c b/security/keys/proc.c
index fbc4af5..5af2934 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -18,7 +18,6 @@
 #include <asm/errno.h>
 #include "internal.h"
 
-static int proc_keys_open(struct inode *inode, struct file *file);
 static void *proc_keys_start(struct seq_file *p, loff_t *_pos);
 static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos);
 static void proc_keys_stop(struct seq_file *p, void *v);
@@ -31,14 +30,6 @@
 	.show	= proc_keys_show,
 };
 
-static const struct file_operations proc_keys_fops = {
-	.open		= proc_keys_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int proc_key_users_open(struct inode *inode, struct file *file);
 static void *proc_key_users_start(struct seq_file *p, loff_t *_pos);
 static void *proc_key_users_next(struct seq_file *p, void *v, loff_t *_pos);
 static void proc_key_users_stop(struct seq_file *p, void *v);
@@ -51,13 +42,6 @@
 	.show	= proc_key_users_show,
 };
 
-static const struct file_operations proc_key_users_fops = {
-	.open		= proc_key_users_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
 /*
  * Declare the /proc files.
  */
@@ -65,11 +49,11 @@
 {
 	struct proc_dir_entry *p;
 
-	p = proc_create("keys", 0, NULL, &proc_keys_fops);
+	p = proc_create_seq("keys", 0, NULL, &proc_keys_ops);
 	if (!p)
 		panic("Cannot create /proc/keys\n");
 
-	p = proc_create("key-users", 0, NULL, &proc_key_users_fops);
+	p = proc_create_seq("key-users", 0, NULL, &proc_key_users_ops);
 	if (!p)
 		panic("Cannot create /proc/key-users\n");
 
@@ -96,11 +80,6 @@
 	return n;
 }
 
-static int proc_keys_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &proc_keys_ops);
-}
-
 static struct key *find_ge_key(struct seq_file *p, key_serial_t id)
 {
 	struct user_namespace *user_ns = seq_user_ns(p);
@@ -293,15 +272,6 @@
 	return __key_user_next(user_ns, n);
 }
 
-/*
- * Implement "/proc/key-users" to provides a list of the key users and their
- * quotas.
- */
-static int proc_key_users_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &proc_key_users_ops);
-}
-
 static void *proc_key_users_start(struct seq_file *p, loff_t *_pos)
 	__acquires(key_user_lock)
 {
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 67703db..f840010 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -447,7 +447,7 @@
 	if (a == NULL)
 		return;
 	/* we use GFP_ATOMIC so we won't sleep */
-	ab = audit_log_start(current->audit_context, GFP_ATOMIC | __GFP_NOWARN,
+	ab = audit_log_start(audit_context(), GFP_ATOMIC | __GFP_NOWARN,
 			     AUDIT_AVC);
 
 	if (ab == NULL)
diff --git a/security/security.c b/security/security.c
index 7bc2fde..68f46d8 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1358,6 +1358,12 @@
 						protocol, kern);
 }
 
+int security_socket_socketpair(struct socket *socka, struct socket *sockb)
+{
+	return call_int_hook(socket_socketpair, 0, socka, sockb);
+}
+EXPORT_SYMBOL(security_socket_socketpair);
+
 int security_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
 {
 	return call_int_hook(socket_bind, 0, sock, address, addrlen);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ae86724..9a46dc2 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -274,11 +274,10 @@
  * Try reloading inode security labels that have been marked as invalid.  The
  * @may_sleep parameter indicates when sleeping and thus reloading labels is
  * allowed; when set to false, returns -ECHILD when the label is
- * invalid.  The @opt_dentry parameter should be set to a dentry of the inode;
- * when no dentry is available, set it to NULL instead.
+ * invalid.  The @dentry parameter should be set to a dentry of the inode.
  */
 static int __inode_security_revalidate(struct inode *inode,
-				       struct dentry *opt_dentry,
+				       struct dentry *dentry,
 				       bool may_sleep)
 {
 	struct inode_security_struct *isec = inode->i_security;
@@ -295,7 +294,7 @@
 		 * @opt_dentry is NULL and no dentry for this inode can be
 		 * found; in that case, continue using the old label.
 		 */
-		inode_doinit_with_dentry(inode, opt_dentry);
+		inode_doinit_with_dentry(inode, dentry);
 	}
 	return 0;
 }
@@ -3308,7 +3307,8 @@
 			} else {
 				audit_size = 0;
 			}
-			ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR);
+			ab = audit_log_start(audit_context(),
+					     GFP_ATOMIC, AUDIT_SELINUX_ERR);
 			audit_log_format(ab, "op=setxattr invalid_context=");
 			audit_log_n_untrustedstring(ab, value, audit_size);
 			audit_log_end(ab);
@@ -4583,6 +4583,18 @@
 	return err;
 }
 
+static int selinux_socket_socketpair(struct socket *socka,
+				     struct socket *sockb)
+{
+	struct sk_security_struct *sksec_a = socka->sk->sk_security;
+	struct sk_security_struct *sksec_b = sockb->sk->sk_security;
+
+	sksec_a->peer_sid = sksec_b->sid;
+	sksec_b->peer_sid = sksec_a->sid;
+
+	return 0;
+}
+
 /* Range of port numbers used to automatically bind.
    Need to determine whether we should perform a name_bind
    permission check between the socket and the port number. */
@@ -6451,7 +6463,9 @@
 					audit_size = size - 1;
 				else
 					audit_size = size;
-				ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR);
+				ab = audit_log_start(audit_context(),
+						     GFP_ATOMIC,
+						     AUDIT_SELINUX_ERR);
 				audit_log_format(ab, "op=fscreate invalid_context=");
 				audit_log_n_untrustedstring(ab, value, audit_size);
 				audit_log_end(ab);
@@ -7019,6 +7033,7 @@
 
 	LSM_HOOK_INIT(socket_create, selinux_socket_create),
 	LSM_HOOK_INIT(socket_post_create, selinux_socket_post_create),
+	LSM_HOOK_INIT(socket_socketpair, selinux_socket_socketpair),
 	LSM_HOOK_INIT(socket_bind, selinux_socket_bind),
 	LSM_HOOK_INIT(socket_connect, selinux_socket_connect),
 	LSM_HOOK_INIT(socket_listen, selinux_socket_listen),
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 2451603..f3d374d 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -167,11 +167,13 @@
 				      NULL);
 		if (length)
 			goto out;
-		audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_STATUS,
-			"enforcing=%d old_enforcing=%d auid=%u ses=%u",
+		audit_log(audit_context(), GFP_KERNEL, AUDIT_MAC_STATUS,
+			"enforcing=%d old_enforcing=%d auid=%u ses=%u"
+			" enabled=%d old-enabled=%d lsm=selinux res=1",
 			new_value, old_value,
 			from_kuid(&init_user_ns, audit_get_loginuid(current)),
-			audit_get_sessionid(current));
+			audit_get_sessionid(current),
+			selinux_enabled, selinux_enabled);
 		enforcing_set(state, new_value);
 		if (new_value)
 			avc_ss_reset(state->avc, 0);
@@ -279,6 +281,7 @@
 	char *page;
 	ssize_t length;
 	int new_value;
+	int enforcing;
 
 	if (count >= PAGE_SIZE)
 		return -ENOMEM;
@@ -296,13 +299,16 @@
 		goto out;
 
 	if (new_value) {
+		enforcing = enforcing_enabled(fsi->state);
 		length = selinux_disable(fsi->state);
 		if (length)
 			goto out;
-		audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_STATUS,
-			"selinux=0 auid=%u ses=%u",
+		audit_log(audit_context(), GFP_KERNEL, AUDIT_MAC_STATUS,
+			"enforcing=%d old_enforcing=%d auid=%u ses=%u"
+			" enabled=%d old-enabled=%d lsm=selinux res=1",
+			enforcing, enforcing,
 			from_kuid(&init_user_ns, audit_get_loginuid(current)),
-			audit_get_sessionid(current));
+			audit_get_sessionid(current), 0, 1);
 	}
 
 	length = count;
@@ -453,7 +459,7 @@
 	return ret;
 }
 
-static int sel_mmap_policy_fault(struct vm_fault *vmf)
+static vm_fault_t sel_mmap_policy_fault(struct vm_fault *vmf)
 {
 	struct policy_load_memory *plm = vmf->vma->vm_file->private_data;
 	unsigned long offset;
@@ -575,8 +581,8 @@
 	length = count;
 
 out1:
-	audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_POLICY_LOAD,
-		"policy loaded auid=%u ses=%u",
+	audit_log(audit_context(), GFP_KERNEL, AUDIT_MAC_POLICY_LOAD,
+		"auid=%u ses=%u lsm=selinux res=1",
 		from_kuid(&init_user_ns, audit_get_loginuid(current)),
 		audit_get_sessionid(current));
 out:
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 3ce225e..a2d4482 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -501,7 +501,7 @@
 		goto out;
 
 	/* audit a message */
-	ab = audit_log_start(current->audit_context,
+	ab = audit_log_start(audit_context(),
 			     GFP_ATOMIC, AUDIT_SELINUX_ERR);
 	if (!ab)
 		goto out;
@@ -743,7 +743,7 @@
 		goto out;
 	if (context_struct_to_string(p, tcontext, &t, &tlen))
 		goto out;
-	audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR,
+	audit_log(audit_context(), GFP_ATOMIC, AUDIT_SELINUX_ERR,
 		  "op=security_validate_transition seresult=denied"
 		  " oldcontext=%s newcontext=%s taskcontext=%s tclass=%s",
 		  o, n, t, sym_name(p, SYM_CLASSES, tclass-1));
@@ -929,7 +929,7 @@
 					      &old_name, &length) &&
 		    !context_struct_to_string(policydb, new_context,
 					      &new_name, &length)) {
-			audit_log(current->audit_context,
+			audit_log(audit_context(),
 				  GFP_ATOMIC, AUDIT_SELINUX_ERR,
 				  "op=security_bounded_transition "
 				  "seresult=denied "
@@ -1586,7 +1586,7 @@
 		goto out;
 	if (context_struct_to_string(policydb, newcontext, &n, &nlen))
 		goto out;
-	audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR,
+	audit_log(audit_context(), GFP_ATOMIC, AUDIT_SELINUX_ERR,
 		  "op=security_compute_sid invalid_context=%s"
 		  " scontext=%s"
 		  " tcontext=%s"
@@ -2882,7 +2882,7 @@
 
 	for (i = 0; i < len; i++) {
 		if (!!values[i] != policydb->bool_val_to_struct[i]->state) {
-			audit_log(current->audit_context, GFP_ATOMIC,
+			audit_log(audit_context(), GFP_ATOMIC,
 				AUDIT_MAC_CONFIG_CHANGE,
 				"bool=%s val=%d old_val=%d auid=%u ses=%u",
 				sym_name(policydb, SYM_BOOLS, i),
@@ -3025,7 +3025,7 @@
 		if (rc) {
 			if (!context_struct_to_string(policydb, &newcon, &s,
 						      &len)) {
-				audit_log(current->audit_context,
+				audit_log(audit_context(),
 					  GFP_ATOMIC, AUDIT_SELINUX_ERR,
 					  "op=security_sid_mls_copy "
 					  "invalid_context=%s", s);
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 0b41483..dcb976f 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2842,6 +2842,27 @@
 	return smack_netlabel(sock->sk, SMACK_CIPSO_SOCKET);
 }
 
+/**
+ * smack_socket_socketpair - create socket pair
+ * @socka: one socket
+ * @sockb: another socket
+ *
+ * Cross reference the peer labels for SO_PEERSEC
+ *
+ * Returns 0 on success, and error code otherwise
+ */
+static int smack_socket_socketpair(struct socket *socka,
+		                   struct socket *sockb)
+{
+	struct socket_smack *asp = socka->sk->sk_security;
+	struct socket_smack *bsp = sockb->sk->sk_security;
+
+	asp->smk_packet = bsp->smk_out;
+	bsp->smk_packet = asp->smk_out;
+
+	return 0;
+}
+
 #ifdef SMACK_IPV6_PORT_LABELING
 /**
  * smack_socket_bind - record port binding information.
@@ -4724,6 +4745,7 @@
 	LSM_HOOK_INIT(unix_may_send, smack_unix_may_send),
 
 	LSM_HOOK_INIT(socket_post_create, smack_socket_post_create),
+	LSM_HOOK_INIT(socket_socketpair, smack_socket_socketpair),
 #ifdef SMACK_IPV6_PORT_LABELING
 	LSM_HOOK_INIT(socket_bind, smack_socket_bind),
 #endif
diff --git a/sound/Kconfig b/sound/Kconfig
index 6833db9..1140e99 100644
--- a/sound/Kconfig
+++ b/sound/Kconfig
@@ -96,6 +96,8 @@
 
 source "sound/synth/Kconfig"
 
+source "sound/xen/Kconfig"
+
 endif # SND
 
 endif # !UML
diff --git a/sound/Makefile b/sound/Makefile
index 99d8c31..797ecdc 100644
--- a/sound/Makefile
+++ b/sound/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_SOUND) += soundcore.o
 obj-$(CONFIG_DMASOUND) += oss/dmasound/
 obj-$(CONFIG_SND) += core/ i2c/ drivers/ isa/ pci/ ppc/ arm/ sh/ synth/ usb/ \
-	firewire/ sparc/ spi/ parisc/ pcmcia/ mips/ soc/ atmel/ hda/ x86/
+	firewire/ sparc/ spi/ parisc/ pcmcia/ mips/ soc/ atmel/ hda/ x86/ xen/
 obj-$(CONFIG_SND_AOA) += aoa/
 
 # This one must be compilable even if sound is configured out
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 4563432..4b01a37 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -1001,7 +1001,7 @@
 					   compr->card->proc_root);
 	if (!entry)
 		return -ENOMEM;
-	entry->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	entry->mode = S_IFDIR | 0555;
 	if (snd_info_register(entry) < 0) {
 		snd_info_free_entry(entry);
 		return -ENOMEM;
diff --git a/sound/core/device.c b/sound/core/device.c
index cb0e46f..535102d 100644
--- a/sound/core/device.c
+++ b/sound/core/device.c
@@ -240,6 +240,15 @@
 
 	if (snd_BUG_ON(!card))
 		return;
+	list_for_each_entry_safe_reverse(dev, next, &card->devices, list) {
+		/* exception: free ctl and lowlevel stuff later */
+		if (dev->type == SNDRV_DEV_CONTROL ||
+		    dev->type == SNDRV_DEV_LOWLEVEL)
+			continue;
+		__snd_device_free(dev);
+	}
+
+	/* free all */
 	list_for_each_entry_safe_reverse(dev, next, &card->devices, list)
 		__snd_device_free(dev);
 }
diff --git a/sound/core/info.c b/sound/core/info.c
index 4b36767..fe502bc5e 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -454,7 +454,7 @@
 	entry = snd_info_create_module_entry(mod, name, NULL);
 	if (!entry)
 		return NULL;
-	entry->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	entry->mode = S_IFDIR | 0555;
 	if (snd_info_register(entry) < 0) {
 		snd_info_free_entry(entry);
 		return NULL;
@@ -470,7 +470,7 @@
 	snd_proc_root = snd_info_create_entry("asound", NULL);
 	if (!snd_proc_root)
 		return -ENOMEM;
-	snd_proc_root->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	snd_proc_root->mode = S_IFDIR | 0555;
 	snd_proc_root->p = proc_mkdir("asound", NULL);
 	if (!snd_proc_root->p)
 		goto error;
@@ -716,7 +716,7 @@
 		kfree(entry);
 		return NULL;
 	}
-	entry->mode = S_IFREG | S_IRUGO;
+	entry->mode = S_IFREG | 0444;
 	entry->content = SNDRV_INFO_CONTENT_TEXT;
 	mutex_init(&entry->access);
 	INIT_LIST_HEAD(&entry->children);
diff --git a/sound/core/init.c b/sound/core/init.c
index 79b4df1c..4849c61 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -703,7 +703,7 @@
 	return count;
 }
 
-static DEVICE_ATTR(id, S_IRUGO | S_IWUSR, card_id_show_attr, card_id_store_attr);
+static DEVICE_ATTR(id, 0644, card_id_show_attr, card_id_store_attr);
 
 static ssize_t
 card_number_show_attr(struct device *dev,
@@ -713,7 +713,7 @@
 	return scnprintf(buf, PAGE_SIZE, "%i\n", card->number);
 }
 
-static DEVICE_ATTR(number, S_IRUGO, card_number_show_attr, NULL);
+static DEVICE_ATTR(number, 0444, card_number_show_attr, NULL);
 
 static struct attribute *card_dev_attrs[] = {
 	&dev_attr_id.attr,
diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c
index 379bf48..64d904b 100644
--- a/sound/core/oss/mixer_oss.c
+++ b/sound/core/oss/mixer_oss.c
@@ -1247,7 +1247,7 @@
 	if (! entry)
 		return;
 	entry->content = SNDRV_INFO_CONTENT_TEXT;
-	entry->mode = S_IFREG | S_IRUGO | S_IWUSR;
+	entry->mode = S_IFREG | 0644;
 	entry->c.text.read = snd_mixer_oss_proc_read;
 	entry->c.text.write = snd_mixer_oss_proc_write;
 	entry->private_data = mixer;
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 1980f68..905a53c 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -3045,7 +3045,7 @@
 			continue;
 		if ((entry = snd_info_create_card_entry(pcm->card, "oss", pstr->proc_root)) != NULL) {
 			entry->content = SNDRV_INFO_CONTENT_TEXT;
-			entry->mode = S_IFREG | S_IRUGO | S_IWUSR;
+			entry->mode = S_IFREG | 0644;
 			entry->c.text.read = snd_pcm_oss_proc_read;
 			entry->c.text.write = snd_pcm_oss_proc_write;
 			entry->private_data = pstr;
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index 66ac89a..c352bfb 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -530,7 +530,7 @@
 					   pcm->card->proc_root);
 	if (!entry)
 		return -ENOMEM;
-	entry->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	entry->mode = S_IFDIR | 0555;
 	if (snd_info_register(entry) < 0) {
 		snd_info_free_entry(entry);
 		return -ENOMEM;
@@ -552,7 +552,7 @@
 	if (entry) {
 		entry->c.text.read = snd_pcm_xrun_debug_read;
 		entry->c.text.write = snd_pcm_xrun_debug_write;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 		entry->private_data = pstr;
 		if (snd_info_register(entry) < 0) {
 			snd_info_free_entry(entry);
@@ -590,7 +590,7 @@
 					   substream->pstr->proc_root);
 	if (!entry)
 		return -ENOMEM;
-	entry->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	entry->mode = S_IFDIR | 0555;
 	if (snd_info_register(entry) < 0) {
 		snd_info_free_entry(entry);
 		return -ENOMEM;
@@ -647,7 +647,7 @@
 		entry->private_data = substream;
 		entry->c.text.read = NULL;
 		entry->c.text.write = snd_pcm_xrun_injection_write;
-		entry->mode = S_IFREG | S_IWUSR;
+		entry->mode = S_IFREG | 0200;
 		if (snd_info_register(entry) < 0) {
 			snd_info_free_entry(entry);
 			entry = NULL;
@@ -1087,7 +1087,7 @@
         return snprintf(buf, PAGE_SIZE, "%s\n", str);
 }
 
-static DEVICE_ATTR(pcm_class, S_IRUGO, show_pcm_class, NULL);
+static DEVICE_ATTR(pcm_class, 0444, show_pcm_class, NULL);
 static struct attribute *pcm_dev_attrs[] = {
 	&dev_attr_pcm_class.attr,
 	NULL
diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index 6491afb..39d853b 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -45,10 +45,7 @@
 
 	if (get_user(frames, src))
 		return -EFAULT;
-	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-		err = snd_pcm_playback_rewind(substream, frames);
-	else
-		err = snd_pcm_capture_rewind(substream, frames);
+	err = snd_pcm_rewind(substream, frames);
 	if (put_user(err, src))
 		return -EFAULT;
 	return err < 0 ? err : 0;
@@ -62,10 +59,7 @@
 
 	if (get_user(frames, src))
 		return -EFAULT;
-	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-		err = snd_pcm_playback_forward(substream, frames);
-	else
-		err = snd_pcm_capture_forward(substream, frames);
+	err = snd_pcm_forward(substream, frames);
 	if (put_user(err, src))
 		return -EFAULT;
 	return err < 0 ? err : 0;
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index f4a1950..44b5ae8 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -191,10 +191,7 @@
 {
 	snd_pcm_uframes_t avail;
 
-	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-		avail = snd_pcm_playback_avail(runtime);
-	else
-		avail = snd_pcm_capture_avail(runtime);
+	avail = snd_pcm_avail(substream);
 	if (avail > runtime->avail_max)
 		runtime->avail_max = avail;
 	if (runtime->status->state == SNDRV_PCM_STATE_DRAINING) {
@@ -1856,10 +1853,7 @@
 		 * This check must happen after been added to the waitqueue
 		 * and having current state be INTERRUPTIBLE.
 		 */
-		if (is_playback)
-			avail = snd_pcm_playback_avail(runtime);
-		else
-			avail = snd_pcm_capture_avail(runtime);
+		avail = snd_pcm_avail(substream);
 		if (avail >= runtime->twake)
 			break;
 		snd_pcm_stream_unlock_irq(substream);
@@ -2175,10 +2169,7 @@
 	runtime->twake = runtime->control->avail_min ? : 1;
 	if (runtime->status->state == SNDRV_PCM_STATE_RUNNING)
 		snd_pcm_update_hw_ptr(substream);
-	if (is_playback)
-		avail = snd_pcm_playback_avail(runtime);
-	else
-		avail = snd_pcm_capture_avail(runtime);
+	avail = snd_pcm_avail(substream);
 	while (size > 0) {
 		snd_pcm_uframes_t frames, appl_ptr, appl_ofs;
 		snd_pcm_uframes_t cont;
diff --git a/sound/core/pcm_local.h b/sound/core/pcm_local.h
index 16f2547..7a499d0 100644
--- a/sound/core/pcm_local.h
+++ b/sound/core/pcm_local.h
@@ -36,6 +36,24 @@
 void snd_pcm_playback_silence(struct snd_pcm_substream *substream,
 			      snd_pcm_uframes_t new_hw_ptr);
 
+static inline snd_pcm_uframes_t
+snd_pcm_avail(struct snd_pcm_substream *substream)
+{
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		return snd_pcm_playback_avail(substream->runtime);
+	else
+		return snd_pcm_capture_avail(substream->runtime);
+}
+
+static inline snd_pcm_uframes_t
+snd_pcm_hw_avail(struct snd_pcm_substream *substream)
+{
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		return snd_pcm_playback_hw_avail(substream->runtime);
+	else
+		return snd_pcm_capture_hw_avail(substream->runtime);
+}
+
 #ifdef CONFIG_SND_PCM_TIMER
 void snd_pcm_timer_resolution_change(struct snd_pcm_substream *substream);
 void snd_pcm_timer_init(struct snd_pcm_substream *substream);
diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c
index ae33e45..4b5356a 100644
--- a/sound/core/pcm_memory.c
+++ b/sound/core/pcm_memory.c
@@ -201,7 +201,7 @@
 	if ((entry = snd_info_create_card_entry(substream->pcm->card, "prealloc", substream->proc_root)) != NULL) {
 		entry->c.text.read = snd_pcm_lib_preallocate_proc_read;
 		entry->c.text.write = snd_pcm_lib_preallocate_proc_write;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 		entry->private_data = substream;
 		if (snd_info_register(entry) < 0) {
 			snd_info_free_entry(entry);
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 0e875d5..04c6301 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -99,6 +99,57 @@
 		cond_resched();
 }
 
+#define PCM_LOCK_DEFAULT	0
+#define PCM_LOCK_IRQ	1
+#define PCM_LOCK_IRQSAVE	2
+
+static unsigned long __snd_pcm_stream_lock_mode(struct snd_pcm_substream *substream,
+						unsigned int mode)
+{
+	unsigned long flags = 0;
+	if (substream->pcm->nonatomic) {
+		down_read_nested(&snd_pcm_link_rwsem, SINGLE_DEPTH_NESTING);
+		mutex_lock(&substream->self_group.mutex);
+	} else {
+		switch (mode) {
+		case PCM_LOCK_DEFAULT:
+			read_lock(&snd_pcm_link_rwlock);
+			break;
+		case PCM_LOCK_IRQ:
+			read_lock_irq(&snd_pcm_link_rwlock);
+			break;
+		case PCM_LOCK_IRQSAVE:
+			read_lock_irqsave(&snd_pcm_link_rwlock, flags);
+			break;
+		}
+		spin_lock(&substream->self_group.lock);
+	}
+	return flags;
+}
+
+static void __snd_pcm_stream_unlock_mode(struct snd_pcm_substream *substream,
+					 unsigned int mode, unsigned long flags)
+{
+	if (substream->pcm->nonatomic) {
+		mutex_unlock(&substream->self_group.mutex);
+		up_read(&snd_pcm_link_rwsem);
+	} else {
+		spin_unlock(&substream->self_group.lock);
+
+		switch (mode) {
+		case PCM_LOCK_DEFAULT:
+			read_unlock(&snd_pcm_link_rwlock);
+			break;
+		case PCM_LOCK_IRQ:
+			read_unlock_irq(&snd_pcm_link_rwlock);
+			break;
+		case PCM_LOCK_IRQSAVE:
+			read_unlock_irqrestore(&snd_pcm_link_rwlock, flags);
+			break;
+		}
+	}
+}
+
 /**
  * snd_pcm_stream_lock - Lock the PCM stream
  * @substream: PCM substream
@@ -109,13 +160,7 @@
  */
 void snd_pcm_stream_lock(struct snd_pcm_substream *substream)
 {
-	if (substream->pcm->nonatomic) {
-		down_read_nested(&snd_pcm_link_rwsem, SINGLE_DEPTH_NESTING);
-		mutex_lock(&substream->self_group.mutex);
-	} else {
-		read_lock(&snd_pcm_link_rwlock);
-		spin_lock(&substream->self_group.lock);
-	}
+	__snd_pcm_stream_lock_mode(substream, PCM_LOCK_DEFAULT);
 }
 EXPORT_SYMBOL_GPL(snd_pcm_stream_lock);
 
@@ -127,13 +172,7 @@
  */
 void snd_pcm_stream_unlock(struct snd_pcm_substream *substream)
 {
-	if (substream->pcm->nonatomic) {
-		mutex_unlock(&substream->self_group.mutex);
-		up_read(&snd_pcm_link_rwsem);
-	} else {
-		spin_unlock(&substream->self_group.lock);
-		read_unlock(&snd_pcm_link_rwlock);
-	}
+	__snd_pcm_stream_unlock_mode(substream, PCM_LOCK_DEFAULT, 0);
 }
 EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock);
 
@@ -147,9 +186,7 @@
  */
 void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream)
 {
-	if (!substream->pcm->nonatomic)
-		local_irq_disable();
-	snd_pcm_stream_lock(substream);
+	__snd_pcm_stream_lock_mode(substream, PCM_LOCK_IRQ);
 }
 EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq);
 
@@ -161,19 +198,13 @@
  */
 void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream)
 {
-	snd_pcm_stream_unlock(substream);
-	if (!substream->pcm->nonatomic)
-		local_irq_enable();
+	__snd_pcm_stream_unlock_mode(substream, PCM_LOCK_IRQ, 0);
 }
 EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq);
 
 unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream)
 {
-	unsigned long flags = 0;
-	if (!substream->pcm->nonatomic)
-		local_irq_save(flags);
-	snd_pcm_stream_lock(substream);
-	return flags;
+	return __snd_pcm_stream_lock_mode(substream, PCM_LOCK_IRQSAVE);
 }
 EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave);
 
@@ -187,9 +218,7 @@
 void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream,
 				      unsigned long flags)
 {
-	snd_pcm_stream_unlock(substream);
-	if (!substream->pcm->nonatomic)
-		local_irq_restore(flags);
+	__snd_pcm_stream_unlock_mode(substream, PCM_LOCK_IRQSAVE, flags);
 }
 EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore);
 
@@ -857,6 +886,18 @@
 	return err;
 }
 
+static inline snd_pcm_uframes_t
+snd_pcm_calc_delay(struct snd_pcm_substream *substream)
+{
+	snd_pcm_uframes_t delay;
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		delay = snd_pcm_playback_hw_avail(substream->runtime);
+	else
+		delay = snd_pcm_capture_avail(substream->runtime);
+	return delay + substream->runtime->delay;
+}
+
 int snd_pcm_status(struct snd_pcm_substream *substream,
 		   struct snd_pcm_status *status)
 {
@@ -908,21 +949,9 @@
  _tstamp_end:
 	status->appl_ptr = runtime->control->appl_ptr;
 	status->hw_ptr = runtime->status->hw_ptr;
-	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-		status->avail = snd_pcm_playback_avail(runtime);
-		if (runtime->status->state == SNDRV_PCM_STATE_RUNNING ||
-		    runtime->status->state == SNDRV_PCM_STATE_DRAINING) {
-			status->delay = runtime->buffer_size - status->avail;
-			status->delay += runtime->delay;
-		} else
-			status->delay = 0;
-	} else {
-		status->avail = snd_pcm_capture_avail(runtime);
-		if (runtime->status->state == SNDRV_PCM_STATE_RUNNING)
-			status->delay = status->avail + runtime->delay;
-		else
-			status->delay = 0;
-	}
+	status->avail = snd_pcm_avail(substream);
+	status->delay = snd_pcm_running(substream) ?
+		snd_pcm_calc_delay(substream) : 0;
 	status->avail_max = runtime->avail_max;
 	status->overrange = runtime->overrange;
 	runtime->avail_max = 0;
@@ -2610,10 +2639,9 @@
 	return ret < 0 ? 0 : frames;
 }
 
-static snd_pcm_sframes_t snd_pcm_playback_rewind(struct snd_pcm_substream *substream,
-						 snd_pcm_uframes_t frames)
+static snd_pcm_sframes_t snd_pcm_rewind(struct snd_pcm_substream *substream,
+					snd_pcm_uframes_t frames)
 {
-	struct snd_pcm_runtime *runtime = substream->runtime;
 	snd_pcm_sframes_t ret;
 
 	if (frames == 0)
@@ -2623,33 +2651,14 @@
 	ret = do_pcm_hwsync(substream);
 	if (!ret)
 		ret = rewind_appl_ptr(substream, frames,
-				      snd_pcm_playback_hw_avail(runtime));
+				      snd_pcm_hw_avail(substream));
 	snd_pcm_stream_unlock_irq(substream);
 	return ret;
 }
 
-static snd_pcm_sframes_t snd_pcm_capture_rewind(struct snd_pcm_substream *substream,
-						snd_pcm_uframes_t frames)
+static snd_pcm_sframes_t snd_pcm_forward(struct snd_pcm_substream *substream,
+					 snd_pcm_uframes_t frames)
 {
-	struct snd_pcm_runtime *runtime = substream->runtime;
-	snd_pcm_sframes_t ret;
-
-	if (frames == 0)
-		return 0;
-
-	snd_pcm_stream_lock_irq(substream);
-	ret = do_pcm_hwsync(substream);
-	if (!ret)
-		ret = rewind_appl_ptr(substream, frames,
-				      snd_pcm_capture_hw_avail(runtime));
-	snd_pcm_stream_unlock_irq(substream);
-	return ret;
-}
-
-static snd_pcm_sframes_t snd_pcm_playback_forward(struct snd_pcm_substream *substream,
-						  snd_pcm_uframes_t frames)
-{
-	struct snd_pcm_runtime *runtime = substream->runtime;
 	snd_pcm_sframes_t ret;
 
 	if (frames == 0)
@@ -2659,25 +2668,7 @@
 	ret = do_pcm_hwsync(substream);
 	if (!ret)
 		ret = forward_appl_ptr(substream, frames,
-				       snd_pcm_playback_avail(runtime));
-	snd_pcm_stream_unlock_irq(substream);
-	return ret;
-}
-
-static snd_pcm_sframes_t snd_pcm_capture_forward(struct snd_pcm_substream *substream,
-						 snd_pcm_uframes_t frames)
-{
-	struct snd_pcm_runtime *runtime = substream->runtime;
-	snd_pcm_sframes_t ret;
-
-	if (frames == 0)
-		return 0;
-
-	snd_pcm_stream_lock_irq(substream);
-	ret = do_pcm_hwsync(substream);
-	if (!ret)
-		ret = forward_appl_ptr(substream, frames,
-				       snd_pcm_capture_avail(runtime));
+				       snd_pcm_avail(substream));
 	snd_pcm_stream_unlock_irq(substream);
 	return ret;
 }
@@ -2695,19 +2686,13 @@
 static int snd_pcm_delay(struct snd_pcm_substream *substream,
 			 snd_pcm_sframes_t *delay)
 {
-	struct snd_pcm_runtime *runtime = substream->runtime;
 	int err;
 	snd_pcm_sframes_t n = 0;
 
 	snd_pcm_stream_lock_irq(substream);
 	err = do_pcm_hwsync(substream);
-	if (!err) {
-		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-			n = snd_pcm_playback_hw_avail(runtime);
-		else
-			n = snd_pcm_capture_avail(runtime);
-		n += runtime->delay;
-	}
+	if (!err)
+		n = snd_pcm_calc_delay(substream);
 	snd_pcm_stream_unlock_irq(substream);
 	if (!err)
 		*delay = n;
@@ -2834,10 +2819,7 @@
 		return -EFAULT;
 	if (put_user(0, _frames))
 		return -EFAULT;
-	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-		result = snd_pcm_playback_rewind(substream, frames);
-	else
-		result = snd_pcm_capture_rewind(substream, frames);
+	result = snd_pcm_rewind(substream, frames);
 	__put_user(result, _frames);
 	return result < 0 ? result : 0;
 }
@@ -2852,10 +2834,7 @@
 		return -EFAULT;
 	if (put_user(0, _frames))
 		return -EFAULT;
-	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-		result = snd_pcm_playback_forward(substream, frames);
-	else
-		result = snd_pcm_capture_forward(substream, frames);
+	result = snd_pcm_forward(substream, frames);
 	__put_user(result, _frames);
 	return result < 0 ? result : 0;
 }
@@ -2998,7 +2977,7 @@
 		/* provided only for OSS; capture-only and no value returned */
 		if (substream->stream != SNDRV_PCM_STREAM_CAPTURE)
 			return -EINVAL;
-		result = snd_pcm_capture_forward(substream, *frames);
+		result = snd_pcm_forward(substream, *frames);
 		return result < 0 ? result : 0;
 	}
 	case SNDRV_PCM_IOCTL_HW_PARAMS:
@@ -3140,82 +3119,46 @@
 	return result;
 }
 
-static __poll_t snd_pcm_playback_poll(struct file *file, poll_table * wait)
+static __poll_t snd_pcm_poll(struct file *file, poll_table *wait)
 {
 	struct snd_pcm_file *pcm_file;
 	struct snd_pcm_substream *substream;
 	struct snd_pcm_runtime *runtime;
-        __poll_t mask;
+	__poll_t mask, ok;
 	snd_pcm_uframes_t avail;
 
 	pcm_file = file->private_data;
 
 	substream = pcm_file->substream;
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		ok = EPOLLOUT | EPOLLWRNORM;
+	else
+		ok = EPOLLIN | EPOLLRDNORM;
 	if (PCM_RUNTIME_CHECK(substream))
-		return EPOLLOUT | EPOLLWRNORM | EPOLLERR;
-	runtime = substream->runtime;
+		return ok | EPOLLERR;
 
+	runtime = substream->runtime;
 	poll_wait(file, &runtime->sleep, wait);
 
+	mask = 0;
 	snd_pcm_stream_lock_irq(substream);
-	avail = snd_pcm_playback_avail(runtime);
+	avail = snd_pcm_avail(substream);
 	switch (runtime->status->state) {
 	case SNDRV_PCM_STATE_RUNNING:
 	case SNDRV_PCM_STATE_PREPARED:
 	case SNDRV_PCM_STATE_PAUSED:
-		if (avail >= runtime->control->avail_min) {
-			mask = EPOLLOUT | EPOLLWRNORM;
-			break;
-		}
-		/* Fall through */
-	case SNDRV_PCM_STATE_DRAINING:
-		mask = 0;
-		break;
-	default:
-		mask = EPOLLOUT | EPOLLWRNORM | EPOLLERR;
-		break;
-	}
-	snd_pcm_stream_unlock_irq(substream);
-	return mask;
-}
-
-static __poll_t snd_pcm_capture_poll(struct file *file, poll_table * wait)
-{
-	struct snd_pcm_file *pcm_file;
-	struct snd_pcm_substream *substream;
-	struct snd_pcm_runtime *runtime;
-        __poll_t mask;
-	snd_pcm_uframes_t avail;
-
-	pcm_file = file->private_data;
-
-	substream = pcm_file->substream;
-	if (PCM_RUNTIME_CHECK(substream))
-		return EPOLLIN | EPOLLRDNORM | EPOLLERR;
-	runtime = substream->runtime;
-
-	poll_wait(file, &runtime->sleep, wait);
-
-	snd_pcm_stream_lock_irq(substream);
-	avail = snd_pcm_capture_avail(runtime);
-	switch (runtime->status->state) {
-	case SNDRV_PCM_STATE_RUNNING:
-	case SNDRV_PCM_STATE_PREPARED:
-	case SNDRV_PCM_STATE_PAUSED:
-		if (avail >= runtime->control->avail_min) {
-			mask = EPOLLIN | EPOLLRDNORM;
-			break;
-		}
-		mask = 0;
+		if (avail >= runtime->control->avail_min)
+			mask = ok;
 		break;
 	case SNDRV_PCM_STATE_DRAINING:
-		if (avail > 0) {
-			mask = EPOLLIN | EPOLLRDNORM;
-			break;
+		if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) {
+			mask = ok;
+			if (!avail)
+				mask |= EPOLLERR;
 		}
-		/* Fall through */
+		break;
 	default:
-		mask = EPOLLIN | EPOLLRDNORM | EPOLLERR;
+		mask = ok | EPOLLERR;
 		break;
 	}
 	snd_pcm_stream_unlock_irq(substream);
@@ -3707,7 +3650,7 @@
 		.open =			snd_pcm_playback_open,
 		.release =		snd_pcm_release,
 		.llseek =		no_llseek,
-		.poll =			snd_pcm_playback_poll,
+		.poll =			snd_pcm_poll,
 		.unlocked_ioctl =	snd_pcm_ioctl,
 		.compat_ioctl = 	snd_pcm_ioctl_compat,
 		.mmap =			snd_pcm_mmap,
@@ -3721,7 +3664,7 @@
 		.open =			snd_pcm_capture_open,
 		.release =		snd_pcm_release,
 		.llseek =		no_llseek,
-		.poll =			snd_pcm_capture_poll,
+		.poll =			snd_pcm_poll,
 		.unlocked_ioctl =	snd_pcm_ioctl,
 		.compat_ioctl = 	snd_pcm_ioctl_compat,
 		.mmap =			snd_pcm_mmap,
diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c
index d21ece9..24d90ab 100644
--- a/sound/core/seq/seq_ports.c
+++ b/sound/core/seq/seq_ports.c
@@ -669,7 +669,7 @@
 	/* Set up the port */
 	memset(&portinfo, 0, sizeof(portinfo));
 	portinfo.addr.client = client;
-	strlcpy(portinfo.name, portname ? portname : "Unamed port",
+	strlcpy(portinfo.name, portname ? portname : "Unnamed port",
 		sizeof(portinfo.name));
 
 	portinfo.capability = cap;
diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c
index 2316757..f587d0e 100644
--- a/sound/core/seq/seq_timer.c
+++ b/sound/core/seq/seq_timer.c
@@ -371,9 +371,7 @@
 
 	tmr->ticks = 1;
 	if (!(t->hw.flags & SNDRV_TIMER_HW_SLAVE)) {
-		unsigned long r = t->hw.resolution;
-		if (! r && t->hw.c_resolution)
-			r = t->hw.c_resolution(t);
+		unsigned long r = snd_timer_resolution(tmr->timeri);
 		if (r) {
 			tmr->ticks = (unsigned int)(1000000000uL / (r * freq));
 			if (! tmr->ticks)
diff --git a/sound/core/timer.c b/sound/core/timer.c
index 0ddcae4..665089c 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -427,25 +427,35 @@
 }
 EXPORT_SYMBOL(snd_timer_close);
 
+static unsigned long snd_timer_hw_resolution(struct snd_timer *timer)
+{
+	if (timer->hw.c_resolution)
+		return timer->hw.c_resolution(timer);
+	else
+		return timer->hw.resolution;
+}
+
 unsigned long snd_timer_resolution(struct snd_timer_instance *timeri)
 {
 	struct snd_timer * timer;
+	unsigned long ret = 0;
+	unsigned long flags;
 
 	if (timeri == NULL)
 		return 0;
 	timer = timeri->timer;
 	if (timer) {
-		if (timer->hw.c_resolution)
-			return timer->hw.c_resolution(timer);
-		return timer->hw.resolution;
+		spin_lock_irqsave(&timer->lock, flags);
+		ret = snd_timer_hw_resolution(timer);
+		spin_unlock_irqrestore(&timer->lock, flags);
 	}
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(snd_timer_resolution);
 
 static void snd_timer_notify1(struct snd_timer_instance *ti, int event)
 {
-	struct snd_timer *timer;
+	struct snd_timer *timer = ti->timer;
 	unsigned long resolution = 0;
 	struct snd_timer_instance *ts;
 	struct timespec tstamp;
@@ -457,14 +467,14 @@
 	if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_START ||
 		       event > SNDRV_TIMER_EVENT_PAUSE))
 		return;
-	if (event == SNDRV_TIMER_EVENT_START ||
-	    event == SNDRV_TIMER_EVENT_CONTINUE)
-		resolution = snd_timer_resolution(ti);
+	if (timer &&
+	    (event == SNDRV_TIMER_EVENT_START ||
+	     event == SNDRV_TIMER_EVENT_CONTINUE))
+		resolution = snd_timer_hw_resolution(timer);
 	if (ti->ccallback)
 		ti->ccallback(ti, event, &tstamp, resolution);
 	if (ti->flags & SNDRV_TIMER_IFLG_SLAVE)
 		return;
-	timer = ti->timer;
 	if (timer == NULL)
 		return;
 	if (timer->hw.flags & SNDRV_TIMER_HW_SLAVE)
@@ -771,10 +781,7 @@
 	spin_lock_irqsave(&timer->lock, flags);
 
 	/* remember the current resolution */
-	if (timer->hw.c_resolution)
-		resolution = timer->hw.c_resolution(timer);
-	else
-		resolution = timer->hw.resolution;
+	resolution = snd_timer_hw_resolution(timer);
 
 	/* loop for all active instances
 	 * Here we cannot use list_for_each_entry because the active_list of a
@@ -1014,12 +1021,8 @@
 	spin_lock_irqsave(&timer->lock, flags);
 	if (event == SNDRV_TIMER_EVENT_MSTART ||
 	    event == SNDRV_TIMER_EVENT_MCONTINUE ||
-	    event == SNDRV_TIMER_EVENT_MRESUME) {
-		if (timer->hw.c_resolution)
-			resolution = timer->hw.c_resolution(timer);
-		else
-			resolution = timer->hw.resolution;
-	}
+	    event == SNDRV_TIMER_EVENT_MRESUME)
+		resolution = snd_timer_hw_resolution(timer);
 	list_for_each_entry(ti, &timer->active_list_head, active_list) {
 		if (ti->ccallback)
 			ti->ccallback(ti, event, tstamp, resolution);
@@ -1656,10 +1659,8 @@
 	mutex_lock(&register_mutex);
 	t = snd_timer_find(&tid);
 	if (t != NULL) {
-		if (t->hw.c_resolution)
-			gstatus.resolution = t->hw.c_resolution(t);
-		else
-			gstatus.resolution = t->hw.resolution;
+		spin_lock_irq(&t->lock);
+		gstatus.resolution = snd_timer_hw_resolution(t);
 		if (t->hw.precise_resolution) {
 			t->hw.precise_resolution(t, &gstatus.resolution_num,
 						 &gstatus.resolution_den);
@@ -1667,6 +1668,7 @@
 			gstatus.resolution_num = gstatus.resolution;
 			gstatus.resolution_den = 1000000000uL;
 		}
+		spin_unlock_irq(&t->lock);
 	} else {
 		err = -ENODEV;
 	}
diff --git a/sound/core/vmaster.c b/sound/core/vmaster.c
index 9e96186..fd99d8a 100644
--- a/sound/core/vmaster.c
+++ b/sound/core/vmaster.c
@@ -259,8 +259,8 @@
 	struct link_master *master_link = snd_kcontrol_chip(master);
 	struct link_slave *srec;
 
-	srec = kzalloc(sizeof(*srec) +
-		       slave->count * sizeof(*slave->vd), GFP_KERNEL);
+	srec = kzalloc(struct_size(srec, slave.vd, slave->count),
+		       GFP_KERNEL);
 	if (!srec)
 		return -ENOMEM;
 	srec->kctl = slave;
@@ -421,13 +421,15 @@
 	kctl->private_free = master_free;
 
 	/* additional (constant) TLV read */
-	if (tlv &&
-	    (tlv[0] == SNDRV_CTL_TLVT_DB_SCALE ||
-	     tlv[0] == SNDRV_CTL_TLVT_DB_MINMAX ||
-	     tlv[0] == SNDRV_CTL_TLVT_DB_MINMAX_MUTE)) {
-		kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ;
-		memcpy(master->tlv, tlv, sizeof(master->tlv));
-		kctl->tlv.p = master->tlv;
+	if (tlv) {
+		unsigned int type = tlv[SNDRV_CTL_TLVO_TYPE];
+		if (type == SNDRV_CTL_TLVT_DB_SCALE ||
+		    type == SNDRV_CTL_TLVT_DB_MINMAX ||
+		    type == SNDRV_CTL_TLVT_DB_MINMAX_MUTE) {
+			kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ;
+			memcpy(master->tlv, tlv, sizeof(master->tlv));
+			kctl->tlv.p = master->tlv;
+		}
 	}
 
 	return kctl;
diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
index eab7f59..78a2fdc 100644
--- a/sound/drivers/aloop.c
+++ b/sound/drivers/aloop.c
@@ -768,20 +768,7 @@
 	return 0;
 }
 
-static const struct snd_pcm_ops loopback_playback_ops = {
-	.open =		loopback_open,
-	.close =	loopback_close,
-	.ioctl =	snd_pcm_lib_ioctl,
-	.hw_params =	loopback_hw_params,
-	.hw_free =	loopback_hw_free,
-	.prepare =	loopback_prepare,
-	.trigger =	loopback_trigger,
-	.pointer =	loopback_pointer,
-	.page =		snd_pcm_lib_get_vmalloc_page,
-	.mmap =		snd_pcm_lib_mmap_vmalloc,
-};
-
-static const struct snd_pcm_ops loopback_capture_ops = {
+static const struct snd_pcm_ops loopback_pcm_ops = {
 	.open =		loopback_open,
 	.close =	loopback_close,
 	.ioctl =	snd_pcm_lib_ioctl,
@@ -804,8 +791,8 @@
 			  substreams, substreams, &pcm);
 	if (err < 0)
 		return err;
-	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &loopback_playback_ops);
-	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &loopback_capture_ops);
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &loopback_pcm_ops);
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &loopback_pcm_ops);
 
 	pcm->private_data = loopback;
 	pcm->info_flags = 0;
diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index 8fb9a54..9af154d 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -1042,7 +1042,7 @@
 	if (!snd_card_proc_new(chip->card, "dummy_pcm", &entry)) {
 		snd_info_set_text_ops(entry, chip, dummy_proc_read);
 		entry->c.text.write = dummy_proc_write;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 		entry->private_data = chip;
 	}
 }
diff --git a/sound/drivers/mts64.c b/sound/drivers/mts64.c
index f32e813..b68e71c 100644
--- a/sound/drivers/mts64.c
+++ b/sound/drivers/mts64.c
@@ -41,11 +41,11 @@
 static struct platform_device *platform_devices[SNDRV_CARDS]; 
 static int device_count;
 
-module_param_array(index, int, NULL, S_IRUGO);
+module_param_array(index, int, NULL, 0444);
 MODULE_PARM_DESC(index, "Index value for " CARD_NAME " soundcard.");
-module_param_array(id, charp, NULL, S_IRUGO);
+module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for " CARD_NAME " soundcard.");
-module_param_array(enable, bool, NULL, S_IRUGO);
+module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable " CARD_NAME " soundcard.");
 
 MODULE_AUTHOR("Matthias Koenig <mk@phasorlab.de>");
diff --git a/sound/drivers/opl4/opl4_proc.c b/sound/drivers/opl4/opl4_proc.c
index cd2c07f..16b2409 100644
--- a/sound/drivers/opl4/opl4_proc.c
+++ b/sound/drivers/opl4/opl4_proc.c
@@ -104,7 +104,7 @@
 	if (entry) {
 		if (opl4->hardware < OPL3_HW_OPL4_ML) {
 			/* OPL4 can access 4 MB external ROM/SRAM */
-			entry->mode |= S_IWUSR;
+			entry->mode |= 0200;
 			entry->size = 4 * 1024 * 1024;
 		} else {
 			/* OPL4-ML has 1 MB internal ROM */
diff --git a/sound/drivers/portman2x4.c b/sound/drivers/portman2x4.c
index ec8a943..3cdf0a88 100644
--- a/sound/drivers/portman2x4.c
+++ b/sound/drivers/portman2x4.c
@@ -60,11 +60,11 @@
 static struct platform_device *platform_devices[SNDRV_CARDS]; 
 static int device_count;
 
-module_param_array(index, int, NULL, S_IRUGO);
+module_param_array(index, int, NULL, 0444);
 MODULE_PARM_DESC(index, "Index value for " CARD_NAME " soundcard.");
-module_param_array(id, charp, NULL, S_IRUGO);
+module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for " CARD_NAME " soundcard.");
-module_param_array(enable, bool, NULL, S_IRUGO);
+module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable " CARD_NAME " soundcard.");
 
 MODULE_AUTHOR("Levent Guendogdu, Tobias Gehrig, Matthias Koenig");
diff --git a/sound/firewire/bebob/bebob_proc.c b/sound/firewire/bebob/bebob_proc.c
index ec24f96..8096891 100644
--- a/sound/firewire/bebob/bebob_proc.c
+++ b/sound/firewire/bebob/bebob_proc.c
@@ -183,7 +183,7 @@
 					  bebob->card->proc_root);
 	if (root == NULL)
 		return;
-	root->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	root->mode = S_IFDIR | 0555;
 	if (snd_info_register(root) < 0) {
 		snd_info_free_entry(root);
 		return;
diff --git a/sound/firewire/dice/Makefile b/sound/firewire/dice/Makefile
index 55b4be9..37062a2 100644
--- a/sound/firewire/dice/Makefile
+++ b/sound/firewire/dice/Makefile
@@ -1,3 +1,4 @@
 snd-dice-objs := dice-transaction.o dice-stream.o dice-proc.o dice-midi.o \
-		 dice-pcm.o dice-hwdep.o dice.o
+		 dice-pcm.o dice-hwdep.o dice.o dice-tcelectronic.o \
+		 dice-alesis.o dice-extension.o dice-mytek.o
 obj-$(CONFIG_SND_DICE) += snd-dice.o
diff --git a/sound/firewire/dice/dice-alesis.c b/sound/firewire/dice/dice-alesis.c
new file mode 100644
index 0000000..b2efb1c
--- /dev/null
+++ b/sound/firewire/dice/dice-alesis.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dice-alesis.c - a part of driver for DICE based devices
+ *
+ * Copyright (c) 2018 Takashi Sakamoto
+ */
+
+#include "dice.h"
+
+static const unsigned int
+alesis_io14_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = {
+	{6, 6, 4},	/* Tx0 = Analog + S/PDIF. */
+	{8, 4, 0},	/* Tx1 = ADAT1. */
+};
+
+static const unsigned int
+alesis_io26_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = {
+	{10, 10, 8},	/* Tx0 = Analog + S/PDIF. */
+	{16, 8, 0},	/* Tx1 = ADAT1 + ADAT2. */
+};
+
+int snd_dice_detect_alesis_formats(struct snd_dice *dice)
+{
+	__be32 reg;
+	u32 data;
+	int i;
+	int err;
+
+	err = snd_dice_transaction_read_tx(dice, TX_NUMBER_AUDIO, &reg,
+					   sizeof(reg));
+	if (err < 0)
+		return err;
+	data = be32_to_cpu(reg);
+
+	if (data == 4 || data == 6) {
+		memcpy(dice->tx_pcm_chs, alesis_io14_tx_pcm_chs,
+				MAX_STREAMS * SND_DICE_RATE_MODE_COUNT *
+				sizeof(unsigned int));
+	} else {
+		memcpy(dice->rx_pcm_chs, alesis_io26_tx_pcm_chs,
+				MAX_STREAMS * SND_DICE_RATE_MODE_COUNT *
+				sizeof(unsigned int));
+	}
+
+	for (i = 0; i < SND_DICE_RATE_MODE_COUNT; ++i)
+		dice->rx_pcm_chs[0][i] = 8;
+
+	dice->tx_midi_ports[0] = 1;
+	dice->rx_midi_ports[0] = 1;
+
+	return 0;
+}
diff --git a/sound/firewire/dice/dice-extension.c b/sound/firewire/dice/dice-extension.c
new file mode 100644
index 0000000..a63fcbc
--- /dev/null
+++ b/sound/firewire/dice/dice-extension.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dice-extension.c - a part of driver for DICE based devices
+ *
+ * Copyright (c) 2018 Takashi Sakamoto
+ */
+
+#include "dice.h"
+
+/* For TCD2210/2220, TCAT defines extension of application protocol. */
+
+#define DICE_EXT_APP_SPACE		0xffffe0200000uLL
+
+#define DICE_EXT_APP_CAPS_OFFSET	0x00
+#define DICE_EXT_APP_CAPS_SIZE		0x04
+#define DICE_EXT_APP_CMD_OFFSET		0x08
+#define DICE_EXT_APP_CMD_SIZE		0x0c
+#define DICE_EXT_APP_MIXER_OFFSET	0x10
+#define DICE_EXT_APP_MIXER_SIZE		0x14
+#define DICE_EXT_APP_PEAK_OFFSET	0x18
+#define DICE_EXT_APP_PEAK_SIZE		0x1c
+#define DICE_EXT_APP_ROUTER_OFFSET	0x20
+#define DICE_EXT_APP_ROUTER_SIZE	0x24
+#define DICE_EXT_APP_STREAM_OFFSET	0x28
+#define DICE_EXT_APP_STREAM_SIZE	0x2c
+#define DICE_EXT_APP_CURRENT_OFFSET	0x30
+#define DICE_EXT_APP_CURRENT_SIZE	0x34
+#define DICE_EXT_APP_STANDALONE_OFFSET	0x38
+#define DICE_EXT_APP_STANDALONE_SIZE	0x3c
+#define DICE_EXT_APP_APPLICATION_OFFSET	0x40
+#define DICE_EXT_APP_APPLICATION_SIZE	0x44
+
+#define EXT_APP_STREAM_TX_NUMBER	0x0000
+#define EXT_APP_STREAM_RX_NUMBER	0x0004
+#define EXT_APP_STREAM_ENTRIES		0x0008
+#define EXT_APP_STREAM_ENTRY_SIZE	0x010c
+#define  EXT_APP_NUMBER_AUDIO		0x0000
+#define  EXT_APP_NUMBER_MIDI		0x0004
+#define  EXT_APP_NAMES			0x0008
+#define   EXT_APP_NAMES_SIZE		256
+#define  EXT_APP_AC3			0x0108
+
+#define EXT_APP_CONFIG_LOW_ROUTER	0x0000
+#define EXT_APP_CONFIG_LOW_STREAM	0x1000
+#define EXT_APP_CONFIG_MIDDLE_ROUTER	0x2000
+#define EXT_APP_CONFIG_MIDDLE_STREAM	0x3000
+#define EXT_APP_CONFIG_HIGH_ROUTER	0x4000
+#define EXT_APP_CONFIG_HIGH_STREAM	0x5000
+
+static inline int read_transaction(struct snd_dice *dice, u64 section_addr,
+				   u32 offset, void *buf, size_t len)
+{
+	return snd_fw_transaction(dice->unit,
+				  len == 4 ? TCODE_READ_QUADLET_REQUEST :
+					     TCODE_READ_BLOCK_REQUEST,
+				  section_addr + offset, buf, len, 0);
+}
+
+static int read_stream_entries(struct snd_dice *dice, u64 section_addr,
+			       u32 base_offset, unsigned int stream_count,
+			       unsigned int mode,
+			       unsigned int pcm_channels[MAX_STREAMS][3],
+			       unsigned int midi_ports[MAX_STREAMS])
+{
+	u32 entry_offset;
+	__be32 reg[2];
+	int err;
+	int i;
+
+	for (i = 0; i < stream_count; ++i) {
+		entry_offset = base_offset + i * EXT_APP_STREAM_ENTRY_SIZE;
+		err = read_transaction(dice, section_addr,
+				    entry_offset + EXT_APP_NUMBER_AUDIO,
+				    reg, sizeof(reg));
+		if (err < 0)
+			return err;
+		pcm_channels[i][mode] = be32_to_cpu(reg[0]);
+		midi_ports[i] = max(midi_ports[i], be32_to_cpu(reg[1]));
+	}
+
+	return 0;
+}
+
+static int detect_stream_formats(struct snd_dice *dice, u64 section_addr)
+{
+	u32 base_offset;
+	__be32 reg[2];
+	unsigned int stream_count;
+	int mode;
+	int err = 0;
+
+	for (mode = 0; mode < SND_DICE_RATE_MODE_COUNT; ++mode) {
+		unsigned int cap;
+
+		/*
+		 * Some models report stream formats at highest mode, however
+		 * they don't support the mode. Check clock capabilities.
+		 */
+		if (mode == 2) {
+			cap = CLOCK_CAP_RATE_176400 | CLOCK_CAP_RATE_192000;
+		} else if (mode == 1) {
+			cap = CLOCK_CAP_RATE_88200 | CLOCK_CAP_RATE_96000;
+		} else {
+			cap = CLOCK_CAP_RATE_32000 | CLOCK_CAP_RATE_44100 |
+			      CLOCK_CAP_RATE_48000;
+		}
+		if (!(cap & dice->clock_caps))
+			continue;
+
+		base_offset = 0x2000 * mode + 0x1000;
+
+		err = read_transaction(dice, section_addr,
+				       base_offset + EXT_APP_STREAM_TX_NUMBER,
+				       &reg, sizeof(reg));
+		if (err < 0)
+			break;
+
+		base_offset += EXT_APP_STREAM_ENTRIES;
+		stream_count = be32_to_cpu(reg[0]);
+		err = read_stream_entries(dice, section_addr, base_offset,
+					  stream_count, mode,
+					  dice->tx_pcm_chs,
+					  dice->tx_midi_ports);
+		if (err < 0)
+			break;
+
+		base_offset += stream_count * EXT_APP_STREAM_ENTRY_SIZE;
+		stream_count = be32_to_cpu(reg[1]);
+		err = read_stream_entries(dice, section_addr, base_offset,
+					  stream_count,
+					  mode, dice->rx_pcm_chs,
+					  dice->rx_midi_ports);
+		if (err < 0)
+			break;
+	}
+
+	return err;
+}
+
+int snd_dice_detect_extension_formats(struct snd_dice *dice)
+{
+	__be32 *pointers;
+	unsigned int i;
+	u64 section_addr;
+	int err;
+
+	pointers = kmalloc_array(9, sizeof(__be32) * 2, GFP_KERNEL);
+	if (pointers == NULL)
+		return -ENOMEM;
+
+	err = snd_fw_transaction(dice->unit, TCODE_READ_BLOCK_REQUEST,
+				 DICE_EXT_APP_SPACE, pointers,
+				 9 * sizeof(__be32) * 2, 0);
+	if (err < 0)
+		goto end;
+
+	/* Check two of them for offset have the same value or not. */
+	for (i = 0; i < 9; ++i) {
+		int j;
+
+		for (j = i + 1; j < 9; ++j) {
+			if (pointers[i * 2] == pointers[j * 2])
+				goto end;
+		}
+	}
+
+	section_addr = DICE_EXT_APP_SPACE + be32_to_cpu(pointers[12]) * 4;
+	err = detect_stream_formats(dice, section_addr);
+end:
+	kfree(pointers);
+	return err;
+}
diff --git a/sound/firewire/dice/dice-interface.h b/sound/firewire/dice/dice-interface.h
index 15a484b..9cad3d6 100644
--- a/sound/firewire/dice/dice-interface.h
+++ b/sound/firewire/dice/dice-interface.h
@@ -175,13 +175,18 @@
 #define GLOBAL_SAMPLE_RATE		0x05c
 
 /*
+ * Some old firmware versions do not have the following global registers.
+ * Windows drivers produced by TCAT lost backward compatibility in its
+ * early release because they can handle firmware only which supports the
+ * following registers.
+ */
+
+/*
  * The version of the DICE driver specification that this device conforms to;
  * read-only.
  */
 #define GLOBAL_VERSION			0x060
 
-/* Some old firmware versions do not have the following global registers: */
-
 /*
  * Supported sample rates and clock sources; read-only.
  */
diff --git a/sound/firewire/dice/dice-midi.c b/sound/firewire/dice/dice-midi.c
index 8ff6da3..84eca8a 100644
--- a/sound/firewire/dice/dice-midi.c
+++ b/sound/firewire/dice/dice-midi.c
@@ -101,27 +101,18 @@
 		.close		= midi_close,
 		.trigger	= midi_playback_trigger,
 	};
-	__be32 reg;
 	struct snd_rawmidi *rmidi;
 	struct snd_rawmidi_str *str;
 	unsigned int midi_in_ports, midi_out_ports;
+	int i;
 	int err;
 
-	/*
-	 * Use the number of MIDI conformant data channel at current sampling
-	 * transfer frequency.
-	 */
-	err = snd_dice_transaction_read_tx(dice, TX_NUMBER_MIDI,
-					   &reg, sizeof(reg));
-	if (err < 0)
-		return err;
-	midi_in_ports = be32_to_cpu(reg);
-
-	err = snd_dice_transaction_read_rx(dice, RX_NUMBER_MIDI,
-					   &reg, sizeof(reg));
-	if (err < 0)
-		return err;
-	midi_out_ports = be32_to_cpu(reg);
+	midi_in_ports = 0;
+	midi_out_ports = 0;
+	for (i = 0; i < MAX_STREAMS; ++i) {
+		midi_in_ports = max(midi_in_ports, dice->tx_midi_ports[i]);
+		midi_out_ports = max(midi_out_ports, dice->rx_midi_ports[i]);
+	}
 
 	if (midi_in_ports + midi_out_ports == 0)
 		return 0;
diff --git a/sound/firewire/dice/dice-mytek.c b/sound/firewire/dice/dice-mytek.c
new file mode 100644
index 0000000..eb7d549
--- /dev/null
+++ b/sound/firewire/dice/dice-mytek.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dice-mytek.c - a part of driver for DICE based devices
+ *
+ * Copyright (c) 2018 Melvin Vermeeren
+ */
+
+#include "dice.h"
+
+struct dice_mytek_spec {
+	unsigned int tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT];
+	unsigned int rx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT];
+};
+
+static const struct dice_mytek_spec stereo_192_dsd_dac = {
+	/* AES, TOSLINK, SPDIF, ADAT inputs on device */
+	.tx_pcm_chs = {{8, 8, 8}, {0, 0, 0} },
+	/* PCM 44.1-192, native DSD64/DSD128 to device */
+	.rx_pcm_chs = {{4, 4, 4}, {0, 0, 0} }
+};
+
+/*
+ * Mytek has a few other firewire-capable devices, though newer models appear
+ * to lack the port more often than not. As I don't have access to any of them
+ * they are missing here. An example is the Mytek 8x192 ADDA, which is DICE.
+ */
+
+int snd_dice_detect_mytek_formats(struct snd_dice *dice)
+{
+	int i;
+	const struct dice_mytek_spec *dev;
+
+	dev = &stereo_192_dsd_dac;
+
+	memcpy(dice->tx_pcm_chs, dev->tx_pcm_chs,
+	       MAX_STREAMS * SND_DICE_RATE_MODE_COUNT * sizeof(unsigned int));
+	memcpy(dice->rx_pcm_chs, dev->rx_pcm_chs,
+	       MAX_STREAMS * SND_DICE_RATE_MODE_COUNT * sizeof(unsigned int));
+
+	for (i = 0; i < MAX_STREAMS; ++i) {
+		dice->tx_midi_ports[i] = 0;
+		dice->rx_midi_ports[i] = 0;
+	}
+
+	return 0;
+}
diff --git a/sound/firewire/dice/dice-pcm.c b/sound/firewire/dice/dice-pcm.c
index 7cb9e97..80351b2 100644
--- a/sound/firewire/dice/dice-pcm.c
+++ b/sound/firewire/dice/dice-pcm.c
@@ -9,43 +9,115 @@
 
 #include "dice.h"
 
+static int dice_rate_constraint(struct snd_pcm_hw_params *params,
+				struct snd_pcm_hw_rule *rule)
+{
+	struct snd_pcm_substream *substream = rule->private;
+	struct snd_dice *dice = substream->private_data;
+	unsigned int index = substream->pcm->device;
+
+	const struct snd_interval *c =
+		hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_CHANNELS);
+	struct snd_interval *r =
+		hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
+	struct snd_interval rates = {
+		.min = UINT_MAX, .max = 0, .integer = 1
+	};
+	unsigned int *pcm_channels;
+	enum snd_dice_rate_mode mode;
+	unsigned int i, rate;
+
+	if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+		pcm_channels = dice->tx_pcm_chs[index];
+	else
+		pcm_channels = dice->rx_pcm_chs[index];
+
+	for (i = 0; i < ARRAY_SIZE(snd_dice_rates); ++i) {
+		rate = snd_dice_rates[i];
+		if (snd_dice_stream_get_rate_mode(dice, rate, &mode) < 0)
+			continue;
+
+		if (!snd_interval_test(c, pcm_channels[mode]))
+			continue;
+
+		rates.min = min(rates.min, rate);
+		rates.max = max(rates.max, rate);
+	}
+
+	return snd_interval_refine(r, &rates);
+}
+
+static int dice_channels_constraint(struct snd_pcm_hw_params *params,
+				    struct snd_pcm_hw_rule *rule)
+{
+	struct snd_pcm_substream *substream = rule->private;
+	struct snd_dice *dice = substream->private_data;
+	unsigned int index = substream->pcm->device;
+
+	const struct snd_interval *r =
+		hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE);
+	struct snd_interval *c =
+		hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS);
+	struct snd_interval channels = {
+		.min = UINT_MAX, .max = 0, .integer = 1
+	};
+	unsigned int *pcm_channels;
+	enum snd_dice_rate_mode mode;
+	unsigned int i, rate;
+
+	if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+		pcm_channels = dice->tx_pcm_chs[index];
+	else
+		pcm_channels = dice->rx_pcm_chs[index];
+
+	for (i = 0; i < ARRAY_SIZE(snd_dice_rates); ++i) {
+		rate = snd_dice_rates[i];
+		if (snd_dice_stream_get_rate_mode(dice, rate, &mode) < 0)
+			continue;
+
+		if (!snd_interval_test(r, rate))
+			continue;
+
+		channels.min = min(channels.min, pcm_channels[mode]);
+		channels.max = max(channels.max, pcm_channels[mode]);
+	}
+
+	return snd_interval_refine(c, &channels);
+}
+
 static int limit_channels_and_rates(struct snd_dice *dice,
 				    struct snd_pcm_runtime *runtime,
 				    enum amdtp_stream_direction dir,
-				    unsigned int index, unsigned int size)
+				    unsigned int index)
 {
 	struct snd_pcm_hardware *hw = &runtime->hw;
-	struct amdtp_stream *stream;
-	unsigned int rate;
-	__be32 reg;
-	int err;
+	unsigned int *pcm_channels;
+	unsigned int i;
 
-	/*
-	 * Retrieve current Multi Bit Linear Audio data channel and limit to
-	 * it.
-	 */
-	if (dir == AMDTP_IN_STREAM) {
-		stream = &dice->tx_stream[index];
-		err = snd_dice_transaction_read_tx(dice,
-				size * index + TX_NUMBER_AUDIO,
-				&reg, sizeof(reg));
-	} else {
-		stream = &dice->rx_stream[index];
-		err = snd_dice_transaction_read_rx(dice,
-				size * index + RX_NUMBER_AUDIO,
-				&reg, sizeof(reg));
+	if (dir == AMDTP_IN_STREAM)
+		pcm_channels = dice->tx_pcm_chs[index];
+	else
+		pcm_channels = dice->rx_pcm_chs[index];
+
+	hw->channels_min = UINT_MAX;
+	hw->channels_max = 0;
+
+	for (i = 0; i < ARRAY_SIZE(snd_dice_rates); ++i) {
+		enum snd_dice_rate_mode mode;
+		unsigned int rate, channels;
+
+		rate = snd_dice_rates[i];
+		if (snd_dice_stream_get_rate_mode(dice, rate, &mode) < 0)
+			continue;
+		hw->rates |= snd_pcm_rate_to_rate_bit(rate);
+
+		channels = pcm_channels[mode];
+		if (channels == 0)
+			continue;
+		hw->channels_min = min(hw->channels_min, channels);
+		hw->channels_max = max(hw->channels_max, channels);
 	}
-	if (err < 0)
-		return err;
 
-	hw->channels_min = hw->channels_max = be32_to_cpu(reg);
-
-	/* Retrieve current sampling transfer frequency and limit to it. */
-	err = snd_dice_transaction_get_rate(dice, &rate);
-	if (err < 0)
-		return err;
-
-	hw->rates = snd_pcm_rate_to_rate_bit(rate);
 	snd_pcm_limit_hw_rates(runtime);
 
 	return 0;
@@ -56,36 +128,34 @@
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_pcm_hardware *hw = &runtime->hw;
+	unsigned int index = substream->pcm->device;
 	enum amdtp_stream_direction dir;
 	struct amdtp_stream *stream;
-	__be32 reg[2];
-	unsigned int count, size;
 	int err;
 
 	if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) {
 		hw->formats = AM824_IN_PCM_FORMAT_BITS;
 		dir = AMDTP_IN_STREAM;
-		stream = &dice->tx_stream[substream->pcm->device];
-		err = snd_dice_transaction_read_tx(dice, TX_NUMBER, reg,
-						   sizeof(reg));
+		stream = &dice->tx_stream[index];
 	} else {
 		hw->formats = AM824_OUT_PCM_FORMAT_BITS;
 		dir = AMDTP_OUT_STREAM;
-		stream = &dice->rx_stream[substream->pcm->device];
-		err = snd_dice_transaction_read_rx(dice, RX_NUMBER, reg,
-						   sizeof(reg));
+		stream = &dice->rx_stream[index];
 	}
 
+	err = limit_channels_and_rates(dice, substream->runtime, dir,
+				       index);
 	if (err < 0)
 		return err;
 
-	count = min_t(unsigned int, be32_to_cpu(reg[0]), MAX_STREAMS);
-	if (substream->pcm->device >= count)
-		return -ENXIO;
-
-	size = be32_to_cpu(reg[1]) * 4;
-	err = limit_channels_and_rates(dice, substream->runtime, dir,
-				       substream->pcm->device, size);
+	err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE,
+				  dice_rate_constraint, substream,
+				  SNDRV_PCM_HW_PARAM_CHANNELS, -1);
+	if (err < 0)
+		return err;
+	err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS,
+				  dice_channels_constraint, substream,
+				  SNDRV_PCM_HW_PARAM_RATE, -1);
 	if (err < 0)
 		return err;
 
@@ -95,6 +165,8 @@
 static int pcm_open(struct snd_pcm_substream *substream)
 {
 	struct snd_dice *dice = substream->private_data;
+	unsigned int source;
+	bool internal;
 	int err;
 
 	err = snd_dice_stream_lock_try(dice);
@@ -105,6 +177,43 @@
 	if (err < 0)
 		goto err_locked;
 
+	err = snd_dice_transaction_get_clock_source(dice, &source);
+	if (err < 0)
+		goto err_locked;
+	switch (source) {
+	case CLOCK_SOURCE_AES1:
+	case CLOCK_SOURCE_AES2:
+	case CLOCK_SOURCE_AES3:
+	case CLOCK_SOURCE_AES4:
+	case CLOCK_SOURCE_AES_ANY:
+	case CLOCK_SOURCE_ADAT:
+	case CLOCK_SOURCE_TDIF:
+	case CLOCK_SOURCE_WC:
+		internal = false;
+		break;
+	default:
+		internal = true;
+		break;
+	}
+
+	/*
+	 * When source of clock is not internal or any PCM streams are running,
+	 * available sampling rate is limited at current sampling rate.
+	 */
+	if (!internal ||
+	    amdtp_stream_pcm_running(&dice->tx_stream[0]) ||
+	    amdtp_stream_pcm_running(&dice->tx_stream[1]) ||
+	    amdtp_stream_pcm_running(&dice->rx_stream[0]) ||
+	    amdtp_stream_pcm_running(&dice->rx_stream[1])) {
+		unsigned int rate;
+
+		err = snd_dice_transaction_get_rate(dice, &rate);
+		if (err < 0)
+			goto err_locked;
+		substream->runtime->hw.rate_min = rate;
+		substream->runtime->hw.rate_max = rate;
+	}
+
 	snd_pcm_set_sync(substream);
 end:
 	return err;
@@ -318,37 +427,19 @@
 		.page      = snd_pcm_lib_get_vmalloc_page,
 		.mmap      = snd_pcm_lib_mmap_vmalloc,
 	};
-	__be32 reg;
 	struct snd_pcm *pcm;
-	unsigned int i, max_capture, max_playback, capture, playback;
+	unsigned int capture, playback;
+	int i, j;
 	int err;
 
-	/* Check whether PCM substreams are required. */
-	if (dice->force_two_pcms) {
-		max_capture = max_playback = 2;
-	} else {
-		max_capture = max_playback = 0;
-		err = snd_dice_transaction_read_tx(dice, TX_NUMBER, &reg,
-						   sizeof(reg));
-		if (err < 0)
-			return err;
-		max_capture = min_t(unsigned int, be32_to_cpu(reg), MAX_STREAMS);
-
-		err = snd_dice_transaction_read_rx(dice, RX_NUMBER, &reg,
-						   sizeof(reg));
-		if (err < 0)
-			return err;
-		max_playback = min_t(unsigned int, be32_to_cpu(reg), MAX_STREAMS);
-	}
-
 	for (i = 0; i < MAX_STREAMS; i++) {
 		capture = playback = 0;
-		if (i < max_capture)
-			capture = 1;
-		if (i < max_playback)
-			playback = 1;
-		if (capture == 0 && playback == 0)
-			break;
+		for (j = 0; j < SND_DICE_RATE_MODE_COUNT; ++j) {
+			if (dice->tx_pcm_chs[i][j] > 0)
+				capture = 1;
+			if (dice->rx_pcm_chs[i][j] > 0)
+				playback = 1;
+		}
 
 		err = snd_pcm_new(dice->card, "DICE", i, playback, capture,
 				  &pcm);
diff --git a/sound/firewire/dice/dice-proc.c b/sound/firewire/dice/dice-proc.c
index f5c1d1b..bb870fc7 100644
--- a/sound/firewire/dice/dice-proc.c
+++ b/sound/firewire/dice/dice-proc.c
@@ -148,12 +148,12 @@
 				   >> CLOCK_RATE_SHIFT));
 	snd_iprintf(buffer, "  ext status: %08x\n", buf.global.extended_status);
 	snd_iprintf(buffer, "  sample rate: %u\n", buf.global.sample_rate);
-	snd_iprintf(buffer, "  version: %u.%u.%u.%u\n",
-		    (buf.global.version >> 24) & 0xff,
-		    (buf.global.version >> 16) & 0xff,
-		    (buf.global.version >>  8) & 0xff,
-		    (buf.global.version >>  0) & 0xff);
 	if (quadlets >= 90) {
+		snd_iprintf(buffer, "  version: %u.%u.%u.%u\n",
+			    (buf.global.version >> 24) & 0xff,
+			    (buf.global.version >> 16) & 0xff,
+			    (buf.global.version >>  8) & 0xff,
+			    (buf.global.version >>  0) & 0xff);
 		snd_iprintf(buffer, "  clock caps:");
 		for (i = 0; i <= 6; ++i)
 			if (buf.global.clock_caps & (1 << i))
@@ -243,10 +243,74 @@
 	}
 }
 
-void snd_dice_create_proc(struct snd_dice *dice)
+static void dice_proc_read_formation(struct snd_info_entry *entry,
+				     struct snd_info_buffer *buffer)
+{
+	static const char *const rate_labels[] = {
+		[SND_DICE_RATE_MODE_LOW]	= "low",
+		[SND_DICE_RATE_MODE_MIDDLE]	= "middle",
+		[SND_DICE_RATE_MODE_HIGH]	= "high",
+	};
+	struct snd_dice *dice = entry->private_data;
+	int i, j;
+
+	snd_iprintf(buffer, "Output stream from unit:\n");
+	for (i = 0; i < SND_DICE_RATE_MODE_COUNT; ++i)
+		snd_iprintf(buffer, "\t%s", rate_labels[i]);
+	snd_iprintf(buffer, "\tMIDI\n");
+	for (i = 0; i < MAX_STREAMS; ++i) {
+		snd_iprintf(buffer, "Tx %u:", i);
+		for (j = 0; j < SND_DICE_RATE_MODE_COUNT; ++j)
+			snd_iprintf(buffer, "\t%u", dice->tx_pcm_chs[i][j]);
+		snd_iprintf(buffer, "\t%u\n", dice->tx_midi_ports[i]);
+	}
+
+	snd_iprintf(buffer, "Input stream to unit:\n");
+	for (i = 0; i < SND_DICE_RATE_MODE_COUNT; ++i)
+		snd_iprintf(buffer, "\t%s", rate_labels[i]);
+	snd_iprintf(buffer, "\n");
+	for (i = 0; i < MAX_STREAMS; ++i) {
+		snd_iprintf(buffer, "Rx %u:", i);
+		for (j = 0; j < SND_DICE_RATE_MODE_COUNT; ++j)
+			snd_iprintf(buffer, "\t%u", dice->rx_pcm_chs[i][j]);
+		snd_iprintf(buffer, "\t%u\n", dice->rx_midi_ports[i]);
+	}
+}
+
+static void add_node(struct snd_dice *dice, struct snd_info_entry *root,
+		     const char *name,
+		     void (*op)(struct snd_info_entry *entry,
+				struct snd_info_buffer *buffer))
 {
 	struct snd_info_entry *entry;
 
-	if (!snd_card_proc_new(dice->card, "dice", &entry))
-		snd_info_set_text_ops(entry, dice, dice_proc_read);
+	entry = snd_info_create_card_entry(dice->card, name, root);
+	if (!entry)
+		return;
+
+	snd_info_set_text_ops(entry, dice, op);
+	if (snd_info_register(entry) < 0)
+		snd_info_free_entry(entry);
+}
+
+void snd_dice_create_proc(struct snd_dice *dice)
+{
+	struct snd_info_entry *root;
+
+	/*
+	 * All nodes are automatically removed at snd_card_disconnect(),
+	 * by following to link list.
+	 */
+	root = snd_info_create_card_entry(dice->card, "firewire",
+					  dice->card->proc_root);
+	if (!root)
+		return;
+	root->mode = S_IFDIR | 0555;
+	if (snd_info_register(root) < 0) {
+		snd_info_free_entry(root);
+		return;
+	}
+
+	add_node(dice, root, "dice", dice_proc_read);
+	add_node(dice, root, "formation", dice_proc_read_formation);
 }
diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c
index 928a255..c3c892c 100644
--- a/sound/firewire/dice/dice-stream.c
+++ b/sound/firewire/dice/dice-stream.c
@@ -30,13 +30,43 @@
 	[6] = 192000,
 };
 
+int snd_dice_stream_get_rate_mode(struct snd_dice *dice, unsigned int rate,
+				  enum snd_dice_rate_mode *mode)
+{
+	/* Corresponding to each entry in snd_dice_rates. */
+	static const enum snd_dice_rate_mode modes[] = {
+		[0] = SND_DICE_RATE_MODE_LOW,
+		[1] = SND_DICE_RATE_MODE_LOW,
+		[2] = SND_DICE_RATE_MODE_LOW,
+		[3] = SND_DICE_RATE_MODE_MIDDLE,
+		[4] = SND_DICE_RATE_MODE_MIDDLE,
+		[5] = SND_DICE_RATE_MODE_HIGH,
+		[6] = SND_DICE_RATE_MODE_HIGH,
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(snd_dice_rates); i++) {
+		if (!(dice->clock_caps & BIT(i)))
+			continue;
+		if (snd_dice_rates[i] != rate)
+			continue;
+
+		*mode = modes[i];
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
 /*
  * This operation has an effect to synchronize GLOBAL_STATUS/GLOBAL_SAMPLE_RATE
  * to GLOBAL_STATUS. Especially, just after powering on, these are different.
  */
-static int ensure_phase_lock(struct snd_dice *dice)
+static int ensure_phase_lock(struct snd_dice *dice, unsigned int rate)
 {
 	__be32 reg, nominal;
+	u32 data;
+	int i;
 	int err;
 
 	err = snd_dice_transaction_read_global(dice, GLOBAL_CLOCK_SELECT,
@@ -44,9 +74,21 @@
 	if (err < 0)
 		return err;
 
+	data = be32_to_cpu(reg);
+
+	data &= ~CLOCK_RATE_MASK;
+	for (i = 0; i < ARRAY_SIZE(snd_dice_rates); ++i) {
+		if (snd_dice_rates[i] == rate)
+			break;
+	}
+	if (i == ARRAY_SIZE(snd_dice_rates))
+		return -EINVAL;
+	data |= i << CLOCK_RATE_SHIFT;
+
 	if (completion_done(&dice->clock_accepted))
 		reinit_completion(&dice->clock_accepted);
 
+	reg = cpu_to_be32(data);
 	err = snd_dice_transaction_write_global(dice, GLOBAL_CLOCK_SELECT,
 						&reg, sizeof(reg));
 	if (err < 0)
@@ -192,6 +234,7 @@
 			 unsigned int rate, struct reg_params *params)
 {
 	__be32 reg[2];
+	enum snd_dice_rate_mode mode;
 	unsigned int i, pcm_chs, midi_ports;
 	struct amdtp_stream *streams;
 	struct fw_iso_resources *resources;
@@ -206,12 +249,23 @@
 		resources = dice->rx_resources;
 	}
 
+	err = snd_dice_stream_get_rate_mode(dice, rate, &mode);
+	if (err < 0)
+		return err;
+
 	for (i = 0; i < params->count; i++) {
+		unsigned int pcm_cache;
+		unsigned int midi_cache;
+
 		if (dir == AMDTP_IN_STREAM) {
+			pcm_cache = dice->tx_pcm_chs[i][mode];
+			midi_cache = dice->tx_midi_ports[i];
 			err = snd_dice_transaction_read_tx(dice,
 					params->size * i + TX_NUMBER_AUDIO,
 					reg, sizeof(reg));
 		} else {
+			pcm_cache = dice->rx_pcm_chs[i][mode];
+			midi_cache = dice->rx_midi_ports[i];
 			err = snd_dice_transaction_read_rx(dice,
 					params->size * i + RX_NUMBER_AUDIO,
 					reg, sizeof(reg));
@@ -221,6 +275,14 @@
 		pcm_chs = be32_to_cpu(reg[0]);
 		midi_ports = be32_to_cpu(reg[1]);
 
+		/* These are important for developer of this driver. */
+		if (pcm_chs != pcm_cache || midi_ports != midi_cache) {
+			dev_info(&dice->unit->device,
+				 "cache mismatch: pcm: %u:%u, midi: %u:%u\n",
+				 pcm_chs, pcm_cache, midi_ports, midi_cache);
+			return -EPROTO;
+		}
+
 		err = keep_resources(dice, dir, i, rate, pcm_chs, midi_ports);
 		if (err < 0)
 			return err;
@@ -256,6 +318,68 @@
 	return err;
 }
 
+static int start_duplex_streams(struct snd_dice *dice, unsigned int rate)
+{
+	struct reg_params tx_params, rx_params;
+	int i;
+	int err;
+
+	err = get_register_params(dice, &tx_params, &rx_params);
+	if (err < 0)
+		return err;
+
+	/* Stop transmission. */
+	stop_streams(dice, AMDTP_IN_STREAM, &tx_params);
+	stop_streams(dice, AMDTP_OUT_STREAM, &rx_params);
+	snd_dice_transaction_clear_enable(dice);
+	release_resources(dice);
+
+	err = ensure_phase_lock(dice, rate);
+	if (err < 0) {
+		dev_err(&dice->unit->device, "fail to ensure phase lock\n");
+		return err;
+	}
+
+	/* Likely to have changed stream formats. */
+	err = get_register_params(dice, &tx_params, &rx_params);
+	if (err < 0)
+		return err;
+
+	/* Start both streams. */
+	err = start_streams(dice, AMDTP_IN_STREAM, rate, &tx_params);
+	if (err < 0)
+		goto error;
+	err = start_streams(dice, AMDTP_OUT_STREAM, rate, &rx_params);
+	if (err < 0)
+		goto error;
+
+	err = snd_dice_transaction_set_enable(dice);
+	if (err < 0) {
+		dev_err(&dice->unit->device, "fail to enable interface\n");
+		goto error;
+	}
+
+	for (i = 0; i < MAX_STREAMS; i++) {
+		if ((i < tx_params.count &&
+		    !amdtp_stream_wait_callback(&dice->tx_stream[i],
+						CALLBACK_TIMEOUT)) ||
+		    (i < rx_params.count &&
+		     !amdtp_stream_wait_callback(&dice->rx_stream[i],
+						 CALLBACK_TIMEOUT))) {
+			err = -ETIMEDOUT;
+			goto error;
+		}
+	}
+
+	return 0;
+error:
+	stop_streams(dice, AMDTP_IN_STREAM, &tx_params);
+	stop_streams(dice, AMDTP_OUT_STREAM, &rx_params);
+	snd_dice_transaction_clear_enable(dice);
+	release_resources(dice);
+	return err;
+}
+
 /*
  * MEMO: After this function, there're two states of streams:
  *  - None streams are running.
@@ -265,17 +389,13 @@
 {
 	unsigned int curr_rate;
 	unsigned int i;
-	struct reg_params tx_params, rx_params;
-	bool need_to_start;
+	enum snd_dice_rate_mode mode;
 	int err;
 
 	if (dice->substreams_counter == 0)
 		return -EIO;
 
-	err = get_register_params(dice, &tx_params, &rx_params);
-	if (err < 0)
-		return err;
-
+	/* Check sampling transmission frequency. */
 	err = snd_dice_transaction_get_rate(dice, &curr_rate);
 	if (err < 0) {
 		dev_err(&dice->unit->device,
@@ -285,72 +405,36 @@
 	if (rate == 0)
 		rate = curr_rate;
 	if (rate != curr_rate)
-		return -EINVAL;
+		goto restart;
 
-	/* Judge to need to restart streams. */
-	for (i = 0; i < MAX_STREAMS; i++) {
-		if (i < tx_params.count) {
-			if (amdtp_streaming_error(&dice->tx_stream[i]) ||
-			    !amdtp_stream_running(&dice->tx_stream[i]))
-				break;
-		}
-		if (i < rx_params.count) {
-			if (amdtp_streaming_error(&dice->rx_stream[i]) ||
-			    !amdtp_stream_running(&dice->rx_stream[i]))
-				break;
-		}
+	/* Check error of packet streaming. */
+	for (i = 0; i < MAX_STREAMS; ++i) {
+		if (amdtp_streaming_error(&dice->tx_stream[i]))
+			break;
+		if (amdtp_streaming_error(&dice->rx_stream[i]))
+			break;
 	}
-	need_to_start = (i < MAX_STREAMS);
+	if (i < MAX_STREAMS)
+		goto restart;
 
-	if (need_to_start) {
-		/* Stop transmission. */
-		snd_dice_transaction_clear_enable(dice);
-		stop_streams(dice, AMDTP_IN_STREAM, &tx_params);
-		stop_streams(dice, AMDTP_OUT_STREAM, &rx_params);
-		release_resources(dice);
-
-		err = ensure_phase_lock(dice);
-		if (err < 0) {
-			dev_err(&dice->unit->device,
-				"fail to ensure phase lock\n");
-			return err;
-		}
-
-		/* Start both streams. */
-		err = start_streams(dice, AMDTP_IN_STREAM, rate, &tx_params);
-		if (err < 0)
-			goto error;
-		err = start_streams(dice, AMDTP_OUT_STREAM, rate, &rx_params);
-		if (err < 0)
-			goto error;
-
-		err = snd_dice_transaction_set_enable(dice);
-		if (err < 0) {
-			dev_err(&dice->unit->device,
-				"fail to enable interface\n");
-			goto error;
-		}
-
-		for (i = 0; i < MAX_STREAMS; i++) {
-			if ((i < tx_params.count &&
-			    !amdtp_stream_wait_callback(&dice->tx_stream[i],
-							CALLBACK_TIMEOUT)) ||
-			    (i < rx_params.count &&
-			     !amdtp_stream_wait_callback(&dice->rx_stream[i],
-							 CALLBACK_TIMEOUT))) {
-				err = -ETIMEDOUT;
-				goto error;
-			}
-		}
+	/* Check required streams are running or not. */
+	err = snd_dice_stream_get_rate_mode(dice, rate, &mode);
+	if (err < 0)
+		return err;
+	for (i = 0; i < MAX_STREAMS; ++i) {
+		if (dice->tx_pcm_chs[i][mode] > 0 &&
+		    !amdtp_stream_running(&dice->tx_stream[i]))
+			break;
+		if (dice->rx_pcm_chs[i][mode] > 0 &&
+		    !amdtp_stream_running(&dice->rx_stream[i]))
+			break;
 	}
+	if (i < MAX_STREAMS)
+		goto restart;
 
-	return err;
-error:
-	snd_dice_transaction_clear_enable(dice);
-	stop_streams(dice, AMDTP_IN_STREAM, &tx_params);
-	stop_streams(dice, AMDTP_OUT_STREAM, &rx_params);
-	release_resources(dice);
-	return err;
+	return 0;
+restart:
+	return start_duplex_streams(dice, rate);
 }
 
 /*
@@ -484,6 +568,69 @@
 	}
 }
 
+int snd_dice_stream_detect_current_formats(struct snd_dice *dice)
+{
+	unsigned int rate;
+	enum snd_dice_rate_mode mode;
+	__be32 reg[2];
+	struct reg_params tx_params, rx_params;
+	int i;
+	int err;
+
+	/* If extended protocol is available, detect detail spec. */
+	err = snd_dice_detect_extension_formats(dice);
+	if (err >= 0)
+		return err;
+
+	/*
+	 * Available stream format is restricted at current mode of sampling
+	 * clock.
+	 */
+	err = snd_dice_transaction_get_rate(dice, &rate);
+	if (err < 0)
+		return err;
+
+	err = snd_dice_stream_get_rate_mode(dice, rate, &mode);
+	if (err < 0)
+		return err;
+
+	/*
+	 * Just after owning the unit (GLOBAL_OWNER), the unit can return
+	 * invalid stream formats. Selecting clock parameters have an effect
+	 * for the unit to refine it.
+	 */
+	err = ensure_phase_lock(dice, rate);
+	if (err < 0)
+		return err;
+
+	err = get_register_params(dice, &tx_params, &rx_params);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < tx_params.count; ++i) {
+		err = snd_dice_transaction_read_tx(dice,
+				tx_params.size * i + TX_NUMBER_AUDIO,
+				reg, sizeof(reg));
+		if (err < 0)
+			return err;
+		dice->tx_pcm_chs[i][mode] = be32_to_cpu(reg[0]);
+		dice->tx_midi_ports[i] = max_t(unsigned int,
+				be32_to_cpu(reg[1]), dice->tx_midi_ports[i]);
+	}
+	for (i = 0; i < rx_params.count; ++i) {
+		err = snd_dice_transaction_read_rx(dice,
+				rx_params.size * i + RX_NUMBER_AUDIO,
+				reg, sizeof(reg));
+		if (err < 0)
+			return err;
+		dice->rx_pcm_chs[i][mode] = be32_to_cpu(reg[0]);
+		dice->rx_midi_ports[i] = max_t(unsigned int,
+				be32_to_cpu(reg[1]), dice->rx_midi_ports[i]);
+	}
+
+	return 0;
+}
+
 static void dice_lock_changed(struct snd_dice *dice)
 {
 	dice->dev_lock_changed = true;
diff --git a/sound/firewire/dice/dice-tcelectronic.c b/sound/firewire/dice/dice-tcelectronic.c
new file mode 100644
index 0000000..a8875d2
--- /dev/null
+++ b/sound/firewire/dice/dice-tcelectronic.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dice-tc_electronic.c - a part of driver for DICE based devices
+ *
+ * Copyright (c) 2018 Takashi Sakamoto
+ */
+
+#include "dice.h"
+
+struct dice_tc_spec {
+	unsigned int tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT];
+	unsigned int rx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT];
+	bool has_midi;
+};
+
+static const struct dice_tc_spec desktop_konnekt6 = {
+	.tx_pcm_chs = {{6, 6, 2}, {0, 0, 0} },
+	.rx_pcm_chs = {{6, 6, 4}, {0, 0, 0} },
+	.has_midi = false,
+};
+
+static const struct dice_tc_spec impact_twin = {
+	.tx_pcm_chs = {{14, 10, 6}, {0, 0, 0} },
+	.rx_pcm_chs = {{14, 10, 6}, {0, 0, 0} },
+	.has_midi = true,
+};
+
+static const struct dice_tc_spec konnekt_8 = {
+	.tx_pcm_chs = {{4, 4, 3}, {0, 0, 0} },
+	.rx_pcm_chs = {{4, 4, 3}, {0, 0, 0} },
+	.has_midi = true,
+};
+
+static const struct dice_tc_spec konnekt_24d = {
+	.tx_pcm_chs = {{16, 16, 6}, {0, 0, 0} },
+	.rx_pcm_chs = {{16, 16, 6}, {0, 0, 0} },
+	.has_midi = true,
+};
+
+static const struct dice_tc_spec konnekt_live = {
+	.tx_pcm_chs = {{16, 16, 16}, {0, 0, 0} },
+	.rx_pcm_chs = {{16, 16, 16}, {0, 0, 0} },
+	.has_midi = true,
+};
+
+static const struct dice_tc_spec studio_konnekt_48 = {
+	.tx_pcm_chs = {{16, 16, 8}, {16, 16, 7} },
+	.rx_pcm_chs = {{16, 16, 8}, {14, 14, 7} },
+	.has_midi = true,
+};
+
+static const struct dice_tc_spec digital_konnekt_x32 = {
+	.tx_pcm_chs = {{16, 16, 4}, {0, 0, 0} },
+	.rx_pcm_chs = {{16, 16, 4}, {0, 0, 0} },
+	.has_midi = false,
+};
+
+int snd_dice_detect_tcelectronic_formats(struct snd_dice *dice)
+{
+	static const struct {
+		u32 model_id;
+		const struct dice_tc_spec *spec;
+	} *entry, entries[] = {
+		{0x00000020, &konnekt_24d},
+		{0x00000021, &konnekt_8},
+		{0x00000022, &studio_konnekt_48},
+		{0x00000023, &konnekt_live},
+		{0x00000024, &desktop_konnekt6},
+		{0x00000027, &impact_twin},
+		{0x00000030, &digital_konnekt_x32},
+	};
+	struct fw_csr_iterator it;
+	int key, val, model_id;
+	int i;
+
+	model_id = 0;
+	fw_csr_iterator_init(&it, dice->unit->directory);
+	while (fw_csr_iterator_next(&it, &key, &val)) {
+		if (key == CSR_MODEL) {
+			model_id = val;
+			break;
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(entries); ++i) {
+		entry = entries + i;
+		if (entry->model_id == model_id)
+			break;
+	}
+	if (i == ARRAY_SIZE(entries))
+		return -ENODEV;
+
+	memcpy(dice->tx_pcm_chs, entry->spec->tx_pcm_chs,
+	       MAX_STREAMS * SND_DICE_RATE_MODE_COUNT * sizeof(unsigned int));
+	memcpy(dice->rx_pcm_chs, entry->spec->rx_pcm_chs,
+	       MAX_STREAMS * SND_DICE_RATE_MODE_COUNT * sizeof(unsigned int));
+
+	if (entry->spec->has_midi) {
+		dice->tx_midi_ports[0] = 1;
+		dice->rx_midi_ports[0] = 1;
+	}
+
+	return 0;
+}
diff --git a/sound/firewire/dice/dice-transaction.c b/sound/firewire/dice/dice-transaction.c
index 0f03503..b7e138b 100644
--- a/sound/firewire/dice/dice-transaction.c
+++ b/sound/firewire/dice/dice-transaction.c
@@ -265,7 +265,7 @@
 static int get_subaddrs(struct snd_dice *dice)
 {
 	static const int min_values[10] = {
-		10, 0x64 / 4,
+		10, 0x60 / 4,
 		10, 0x18 / 4,
 		10, 0x18 / 4,
 		0, 0,
@@ -301,33 +301,40 @@
 		}
 	}
 
-	/*
-	 * Check that the implemented DICE driver specification major version
-	 * number matches.
-	 */
-	err = snd_fw_transaction(dice->unit, TCODE_READ_QUADLET_REQUEST,
-				 DICE_PRIVATE_SPACE +
-				 be32_to_cpu(pointers[0]) * 4 + GLOBAL_VERSION,
-				 &version, sizeof(version), 0);
-	if (err < 0)
-		goto end;
+	if (be32_to_cpu(pointers[1]) > 0x18) {
+		/*
+		 * Check that the implemented DICE driver specification major
+		 * version number matches.
+		 */
+		err = snd_fw_transaction(dice->unit, TCODE_READ_QUADLET_REQUEST,
+				DICE_PRIVATE_SPACE +
+				be32_to_cpu(pointers[0]) * 4 + GLOBAL_VERSION,
+				&version, sizeof(version), 0);
+		if (err < 0)
+			goto end;
 
-	if ((version & cpu_to_be32(0xff000000)) != cpu_to_be32(0x01000000)) {
-		dev_err(&dice->unit->device,
-			"unknown DICE version: 0x%08x\n", be32_to_cpu(version));
-		err = -ENODEV;
-		goto end;
+		if ((version & cpu_to_be32(0xff000000)) !=
+						cpu_to_be32(0x01000000)) {
+			dev_err(&dice->unit->device,
+				"unknown DICE version: 0x%08x\n",
+				be32_to_cpu(version));
+			err = -ENODEV;
+			goto end;
+		}
+
+		/* Set up later. */
+		dice->clock_caps = 1;
 	}
 
 	dice->global_offset = be32_to_cpu(pointers[0]) * 4;
 	dice->tx_offset = be32_to_cpu(pointers[2]) * 4;
 	dice->rx_offset = be32_to_cpu(pointers[4]) * 4;
-	dice->sync_offset = be32_to_cpu(pointers[6]) * 4;
-	dice->rsrv_offset = be32_to_cpu(pointers[8]) * 4;
 
-	/* Set up later. */
-	if (be32_to_cpu(pointers[1]) * 4 >= GLOBAL_CLOCK_CAPABILITIES + 4)
-		dice->clock_caps = 1;
+	/* Old firmware doesn't support these fields. */
+	if (pointers[7])
+		dice->sync_offset = be32_to_cpu(pointers[6]) * 4;
+	if (pointers[9])
+		dice->rsrv_offset = be32_to_cpu(pointers[8]) * 4;
 end:
 	kfree(pointers);
 	return err;
diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c
index 96bb01b..774eb22 100644
--- a/sound/firewire/dice/dice.c
+++ b/sound/firewire/dice/dice.c
@@ -15,40 +15,15 @@
 #define OUI_LOUD		0x000ff2
 #define OUI_FOCUSRITE		0x00130e
 #define OUI_TCELECTRONIC	0x000166
+#define OUI_ALESIS		0x000595
+#define OUI_MAUDIO		0x000d6c
+#define OUI_MYTEK		0x001ee8
 
 #define DICE_CATEGORY_ID	0x04
 #define WEISS_CATEGORY_ID	0x00
 #define LOUD_CATEGORY_ID	0x10
 
-/*
- * Some models support several isochronous channels, while these streams are not
- * always available. In this case, add the model name to this list.
- */
-static bool force_two_pcm_support(struct fw_unit *unit)
-{
-	static const char *const models[] = {
-		/* TC Electronic models. */
-		"StudioKonnekt48",
-		/* Focusrite models. */
-		"SAFFIRE_PRO_40",
-		"LIQUID_SAFFIRE_56",
-		"SAFFIRE_PRO_40_1",
-	};
-	char model[32];
-	unsigned int i;
-	int err;
-
-	err = fw_csr_string(unit->directory, CSR_MODEL, model, sizeof(model));
-	if (err < 0)
-		return false;
-
-	for (i = 0; i < ARRAY_SIZE(models); i++) {
-		if (strcmp(models[i], model) == 0)
-			break;
-	}
-
-	return i < ARRAY_SIZE(models);
-}
+#define MODEL_ALESIS_IO_BOTH	0x000001
 
 static int check_dice_category(struct fw_unit *unit)
 {
@@ -75,11 +50,6 @@
 		}
 	}
 
-	if (vendor == OUI_FOCUSRITE || vendor == OUI_TCELECTRONIC) {
-		if (force_two_pcm_support(unit))
-			return 0;
-	}
-
 	if (vendor == OUI_WEISS)
 		category = WEISS_CATEGORY_ID;
 	else if (vendor == OUI_LOUD)
@@ -186,9 +156,6 @@
 	if (err < 0)
 		return;
 
-	if (force_two_pcm_support(dice->unit))
-		dice->force_two_pcms = true;
-
 	err = snd_dice_transaction_init(dice);
 	if (err < 0)
 		goto error;
@@ -199,6 +166,10 @@
 
 	dice_card_strings(dice);
 
+	err = dice->detect_formats(dice);
+	if (err < 0)
+		goto error;
+
 	err = snd_dice_stream_init_duplex(dice);
 	if (err < 0)
 		goto error;
@@ -239,14 +210,17 @@
 		 "Sound card registration failed: %d\n", err);
 }
 
-static int dice_probe(struct fw_unit *unit, const struct ieee1394_device_id *id)
+static int dice_probe(struct fw_unit *unit,
+		      const struct ieee1394_device_id *entry)
 {
 	struct snd_dice *dice;
 	int err;
 
-	err = check_dice_category(unit);
-	if (err < 0)
-		return -ENODEV;
+	if (!entry->driver_data) {
+		err = check_dice_category(unit);
+		if (err < 0)
+			return -ENODEV;
+	}
 
 	/* Allocate this independent of sound card instance. */
 	dice = kzalloc(sizeof(struct snd_dice), GFP_KERNEL);
@@ -256,6 +230,13 @@
 	dice->unit = fw_unit_get(unit);
 	dev_set_drvdata(&unit->device, dice);
 
+	if (!entry->driver_data) {
+		dice->detect_formats = snd_dice_stream_detect_current_formats;
+	} else {
+		dice->detect_formats =
+				(snd_dice_detect_formats_t)entry->driver_data;
+	}
+
 	spin_lock_init(&dice->lock);
 	mutex_init(&dice->mutex);
 	init_completion(&dice->clock_accepted);
@@ -313,17 +294,98 @@
 #define DICE_INTERFACE	0x000001
 
 static const struct ieee1394_device_id dice_id_table[] = {
+	/* M-Audio Profire 2626 has a different value in version field. */
+	{
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
+				  IEEE1394_MATCH_MODEL_ID,
+		.vendor_id	= OUI_MAUDIO,
+		.model_id	= 0x000010,
+		.driver_data = (kernel_ulong_t)snd_dice_detect_extension_formats,
+	},
+	/* M-Audio Profire 610 has a different value in version field. */
+	{
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
+				  IEEE1394_MATCH_MODEL_ID,
+		.vendor_id	= OUI_MAUDIO,
+		.model_id	= 0x000011,
+		.driver_data = (kernel_ulong_t)snd_dice_detect_extension_formats,
+	},
+	/* TC Electronic Konnekt 24D. */
+	{
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
+				  IEEE1394_MATCH_MODEL_ID,
+		.vendor_id	= OUI_TCELECTRONIC,
+		.model_id	= 0x000020,
+		.driver_data = (kernel_ulong_t)snd_dice_detect_tcelectronic_formats,
+	},
+	/* TC Electronic Konnekt 8. */
+	{
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
+				  IEEE1394_MATCH_MODEL_ID,
+		.vendor_id	= OUI_TCELECTRONIC,
+		.model_id	= 0x000021,
+		.driver_data = (kernel_ulong_t)snd_dice_detect_tcelectronic_formats,
+	},
+	/* TC Electronic Studio Konnekt 48. */
+	{
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
+				  IEEE1394_MATCH_MODEL_ID,
+		.vendor_id	= OUI_TCELECTRONIC,
+		.model_id	= 0x000022,
+		.driver_data = (kernel_ulong_t)snd_dice_detect_tcelectronic_formats,
+	},
+	/* TC Electronic Konnekt Live. */
+	{
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
+				  IEEE1394_MATCH_MODEL_ID,
+		.vendor_id	= OUI_TCELECTRONIC,
+		.model_id	= 0x000023,
+		.driver_data = (kernel_ulong_t)snd_dice_detect_tcelectronic_formats,
+	},
+	/* TC Electronic Desktop Konnekt 6. */
+	{
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
+				  IEEE1394_MATCH_MODEL_ID,
+		.vendor_id	= OUI_TCELECTRONIC,
+		.model_id	= 0x000024,
+		.driver_data = (kernel_ulong_t)snd_dice_detect_tcelectronic_formats,
+	},
+	/* TC Electronic Impact Twin. */
+	{
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
+				  IEEE1394_MATCH_MODEL_ID,
+		.vendor_id	= OUI_TCELECTRONIC,
+		.model_id	= 0x000027,
+		.driver_data = (kernel_ulong_t)snd_dice_detect_tcelectronic_formats,
+	},
+	/* TC Electronic Digital Konnekt x32. */
+	{
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
+				  IEEE1394_MATCH_MODEL_ID,
+		.vendor_id	= OUI_TCELECTRONIC,
+		.model_id	= 0x000030,
+		.driver_data = (kernel_ulong_t)snd_dice_detect_tcelectronic_formats,
+	},
+	/* Alesis iO14/iO26. */
+	{
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
+				  IEEE1394_MATCH_MODEL_ID,
+		.vendor_id	= OUI_ALESIS,
+		.model_id	= MODEL_ALESIS_IO_BOTH,
+		.driver_data = (kernel_ulong_t)snd_dice_detect_alesis_formats,
+	},
+	/* Mytek Stereo 192 DSD-DAC. */
+	{
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
+				  IEEE1394_MATCH_MODEL_ID,
+		.vendor_id	= OUI_MYTEK,
+		.model_id	= 0x000002,
+		.driver_data = (kernel_ulong_t)snd_dice_detect_mytek_formats,
+	},
 	{
 		.match_flags = IEEE1394_MATCH_VERSION,
 		.version     = DICE_INTERFACE,
 	},
-	/* M-Audio Profire 610/2626 has a different value in version field. */
-	{
-		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
-				  IEEE1394_MATCH_SPECIFIER_ID,
-		.vendor_id	= 0x000d6c,
-		.specifier_id	= 0x000d6c,
-	},
 	{ }
 };
 MODULE_DEVICE_TABLE(ieee1394, dice_id_table);
diff --git a/sound/firewire/dice/dice.h b/sound/firewire/dice/dice.h
index da00e75..83353a3 100644
--- a/sound/firewire/dice/dice.h
+++ b/sound/firewire/dice/dice.h
@@ -63,6 +63,16 @@
  */
 #define MAX_STREAMS	2
 
+enum snd_dice_rate_mode {
+	SND_DICE_RATE_MODE_LOW = 0,
+	SND_DICE_RATE_MODE_MIDDLE,
+	SND_DICE_RATE_MODE_HIGH,
+	SND_DICE_RATE_MODE_COUNT,
+};
+
+struct snd_dice;
+typedef int (*snd_dice_detect_formats_t)(struct snd_dice *dice);
+
 struct snd_dice {
 	struct snd_card *card;
 	struct fw_unit *unit;
@@ -80,6 +90,11 @@
 	unsigned int rsrv_offset;
 
 	unsigned int clock_caps;
+	unsigned int tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT];
+	unsigned int rx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT];
+	unsigned int tx_midi_ports[MAX_STREAMS];
+	unsigned int rx_midi_ports[MAX_STREAMS];
+	snd_dice_detect_formats_t detect_formats;
 
 	struct fw_address_handler notification_handler;
 	int owner_generation;
@@ -98,8 +113,6 @@
 	bool global_enabled;
 	struct completion clock_accepted;
 	unsigned int substreams_counter;
-
-	bool force_two_pcms;
 };
 
 enum snd_dice_addr_type {
@@ -190,11 +203,14 @@
 #define SND_DICE_RATES_COUNT	7
 extern const unsigned int snd_dice_rates[SND_DICE_RATES_COUNT];
 
+int snd_dice_stream_get_rate_mode(struct snd_dice *dice, unsigned int rate,
+				  enum snd_dice_rate_mode *mode);
 int snd_dice_stream_start_duplex(struct snd_dice *dice, unsigned int rate);
 void snd_dice_stream_stop_duplex(struct snd_dice *dice);
 int snd_dice_stream_init_duplex(struct snd_dice *dice);
 void snd_dice_stream_destroy_duplex(struct snd_dice *dice);
 void snd_dice_stream_update_duplex(struct snd_dice *dice);
+int snd_dice_stream_detect_current_formats(struct snd_dice *dice);
 
 int snd_dice_stream_lock_try(struct snd_dice *dice);
 void snd_dice_stream_lock_release(struct snd_dice *dice);
@@ -207,4 +223,9 @@
 
 int snd_dice_create_midi(struct snd_dice *dice);
 
+int snd_dice_detect_tcelectronic_formats(struct snd_dice *dice);
+int snd_dice_detect_alesis_formats(struct snd_dice *dice);
+int snd_dice_detect_extension_formats(struct snd_dice *dice);
+int snd_dice_detect_mytek_formats(struct snd_dice *dice);
+
 #endif
diff --git a/sound/firewire/digi00x/digi00x-proc.c b/sound/firewire/digi00x/digi00x-proc.c
index a1d601f..6996d5a 100644
--- a/sound/firewire/digi00x/digi00x-proc.c
+++ b/sound/firewire/digi00x/digi00x-proc.c
@@ -79,7 +79,7 @@
 	if (root == NULL)
 		return;
 
-	root->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	root->mode = S_IFDIR | 0555;
 	if (snd_info_register(root) < 0) {
 		snd_info_free_entry(root);
 		return;
diff --git a/sound/firewire/fireface/ff-proc.c b/sound/firewire/fireface/ff-proc.c
index 69441d1..40ccbfd 100644
--- a/sound/firewire/fireface/ff-proc.c
+++ b/sound/firewire/fireface/ff-proc.c
@@ -52,7 +52,7 @@
 					  ff->card->proc_root);
 	if (root == NULL)
 		return;
-	root->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	root->mode = S_IFDIR | 0555;
 	if (snd_info_register(root) < 0) {
 		snd_info_free_entry(root);
 		return;
diff --git a/sound/firewire/fireworks/fireworks_proc.c b/sound/firewire/fireworks/fireworks_proc.c
index 9c21f31..779ecec 100644
--- a/sound/firewire/fireworks/fireworks_proc.c
+++ b/sound/firewire/fireworks/fireworks_proc.c
@@ -219,7 +219,7 @@
 					  efw->card->proc_root);
 	if (root == NULL)
 		return;
-	root->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	root->mode = S_IFDIR | 0555;
 	if (snd_info_register(root) < 0) {
 		snd_info_free_entry(root);
 		return;
diff --git a/sound/firewire/isight.c b/sound/firewire/isight.c
index 46092fa..3919e18 100644
--- a/sound/firewire/isight.c
+++ b/sound/firewire/isight.c
@@ -569,18 +569,20 @@
 		return err;
 	isight->gain_max = be32_to_cpu(value);
 
-	isight->gain_tlv[0] = SNDRV_CTL_TLVT_DB_MINMAX;
-	isight->gain_tlv[1] = 2 * sizeof(unsigned int);
+	isight->gain_tlv[SNDRV_CTL_TLVO_TYPE] = SNDRV_CTL_TLVT_DB_MINMAX;
+	isight->gain_tlv[SNDRV_CTL_TLVO_LEN] = 2 * sizeof(unsigned int);
 
 	err = reg_read(isight, REG_GAIN_DB_START, &value);
 	if (err < 0)
 		return err;
-	isight->gain_tlv[2] = (s32)be32_to_cpu(value) * 100;
+	isight->gain_tlv[SNDRV_CTL_TLVO_DB_MINMAX_MIN] =
+						(s32)be32_to_cpu(value) * 100;
 
 	err = reg_read(isight, REG_GAIN_DB_END, &value);
 	if (err < 0)
 		return err;
-	isight->gain_tlv[3] = (s32)be32_to_cpu(value) * 100;
+	isight->gain_tlv[SNDRV_CTL_TLVO_DB_MINMAX_MAX] =
+						(s32)be32_to_cpu(value) * 100;
 
 	ctl = snd_ctl_new1(&gain_control, isight);
 	if (ctl)
diff --git a/sound/firewire/motu/motu-proc.c b/sound/firewire/motu/motu-proc.c
index 4edc064..ab6830a 100644
--- a/sound/firewire/motu/motu-proc.c
+++ b/sound/firewire/motu/motu-proc.c
@@ -107,7 +107,7 @@
 					  motu->card->proc_root);
 	if (root == NULL)
 		return;
-	root->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	root->mode = S_IFDIR | 0555;
 	if (snd_info_register(root) < 0) {
 		snd_info_free_entry(root);
 		return;
diff --git a/sound/firewire/oxfw/oxfw-proc.c b/sound/firewire/oxfw/oxfw-proc.c
index 8ba4f9f..27dac07 100644
--- a/sound/firewire/oxfw/oxfw-proc.c
+++ b/sound/firewire/oxfw/oxfw-proc.c
@@ -103,7 +103,7 @@
 					  oxfw->card->proc_root);
 	if (root == NULL)
 		return;
-	root->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	root->mode = S_IFDIR | 0555;
 	if (snd_info_register(root) < 0) {
 		snd_info_free_entry(root);
 		return;
diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c
index 413ab63..1e5b2c8 100644
--- a/sound/firewire/oxfw/oxfw.c
+++ b/sound/firewire/oxfw/oxfw.c
@@ -49,7 +49,6 @@
 		"Tapco LINK.firewire 4x6",
 		"U.420"};
 	char model[32];
-	unsigned int i;
 	int err;
 
 	err = fw_csr_string(unit->directory, CSR_MODEL,
@@ -57,12 +56,7 @@
 	if (err < 0)
 		return false;
 
-	for (i = 0; i < ARRAY_SIZE(models); i++) {
-		if (strcmp(models[i], model) == 0)
-			break;
-	}
-
-	return (i < ARRAY_SIZE(models));
+	return match_string(models, ARRAY_SIZE(models), model) >= 0;
 }
 
 static int name_card(struct snd_oxfw *oxfw)
diff --git a/sound/firewire/tascam/tascam-proc.c b/sound/firewire/tascam/tascam-proc.c
index bfd4a4c..fee3bf3 100644
--- a/sound/firewire/tascam/tascam-proc.c
+++ b/sound/firewire/tascam/tascam-proc.c
@@ -78,7 +78,7 @@
 					  tscm->card->proc_root);
 	if (root == NULL)
 		return;
-	root->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+	root->mode = S_IFDIR | 0555;
 	if (snd_info_register(root) < 0) {
 		snd_info_free_entry(root);
 		return;
diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c
index 47a358f..419e285 100644
--- a/sound/hda/hdac_regmap.c
+++ b/sound/hda/hdac_regmap.c
@@ -65,10 +65,10 @@
 {
 	struct hdac_device *codec = dev_to_hdac_dev(dev);
 	unsigned int verb = get_verb(reg);
+	const unsigned int *v;
 	int i;
 
-	for (i = 0; i < codec->vendor_verbs.used; i++) {
-		unsigned int *v = snd_array_elem(&codec->vendor_verbs, i);
+	snd_array_for_each(&codec->vendor_verbs, i, v) {
 		if (verb == *v)
 			return true;
 	}
diff --git a/sound/isa/cmi8328.c b/sound/isa/cmi8328.c
index d09e456..de6ef1b 100644
--- a/sound/isa/cmi8328.c
+++ b/sound/isa/cmi8328.c
@@ -192,7 +192,7 @@
 }
 
 /* find index of an item in "-1"-ended array */
-int array_find(int array[], int item)
+static int array_find(int array[], int item)
 {
 	int i;
 
@@ -203,7 +203,7 @@
 	return -1;
 }
 /* the same for long */
-int array_find_l(long array[], long item)
+static int array_find_l(long array[], long item)
 {
 	int i;
 
diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c
index 45e561c..6c584d9b 100644
--- a/sound/isa/msnd/msnd_pinnacle.c
+++ b/sound/isa/msnd/msnd_pinnacle.c
@@ -757,9 +757,9 @@
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;	/* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;	/* ID for this card */
 
-module_param_array(index, int, NULL, S_IRUGO);
+module_param_array(index, int, NULL, 0444);
 MODULE_PARM_DESC(index, "Index value for msnd_pinnacle soundcard.");
-module_param_array(id, charp, NULL, S_IRUGO);
+module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ID string for msnd_pinnacle soundcard.");
 
 static long io[SNDRV_CARDS] = SNDRV_DEFAULT_PORT;
@@ -801,22 +801,22 @@
 MODULE_FIRMWARE(INITCODEFILE);
 MODULE_FIRMWARE(PERMCODEFILE);
 
-module_param_hw_array(io, long, ioport, NULL, S_IRUGO);
+module_param_hw_array(io, long, ioport, NULL, 0444);
 MODULE_PARM_DESC(io, "IO port #");
-module_param_hw_array(irq, int, irq, NULL, S_IRUGO);
-module_param_hw_array(mem, long, iomem, NULL, S_IRUGO);
-module_param_array(write_ndelay, int, NULL, S_IRUGO);
-module_param(calibrate_signal, int, S_IRUGO);
+module_param_hw_array(irq, int, irq, NULL, 0444);
+module_param_hw_array(mem, long, iomem, NULL, 0444);
+module_param_array(write_ndelay, int, NULL, 0444);
+module_param(calibrate_signal, int, 0444);
 #ifndef MSND_CLASSIC
-module_param_array(digital, int, NULL, S_IRUGO);
-module_param_hw_array(cfg, long, ioport, NULL, S_IRUGO);
-module_param_array(reset, int, 0, S_IRUGO);
-module_param_hw_array(mpu_io, long, ioport, NULL, S_IRUGO);
-module_param_hw_array(mpu_irq, int, irq, NULL, S_IRUGO);
-module_param_hw_array(ide_io0, long, ioport, NULL, S_IRUGO);
-module_param_hw_array(ide_io1, long, ioport, NULL, S_IRUGO);
-module_param_hw_array(ide_irq, int, irq, NULL, S_IRUGO);
-module_param_hw_array(joystick_io, long, ioport, NULL, S_IRUGO);
+module_param_array(digital, int, NULL, 0444);
+module_param_hw_array(cfg, long, ioport, NULL, 0444);
+module_param_array(reset, int, 0, 0444);
+module_param_hw_array(mpu_io, long, ioport, NULL, 0444);
+module_param_hw_array(mpu_irq, int, irq, NULL, 0444);
+module_param_hw_array(ide_io0, long, ioport, NULL, 0444);
+module_param_hw_array(ide_io1, long, ioport, NULL, 0444);
+module_param_hw_array(ide_irq, int, irq, NULL, 0444);
+module_param_hw_array(joystick_io, long, ioport, NULL, 0444);
 #endif
 
 
diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c
index c09d9b9..a985e91 100644
--- a/sound/isa/sc6000.c
+++ b/sound/isa/sc6000.c
@@ -592,7 +592,7 @@
 	*vport = devm_ioport_map(devptr, port[dev], 0x10);
 	if (*vport == NULL) {
 		snd_printk(KERN_ERR PFX
-			   "I/O port cannot be iomaped.\n");
+			   "I/O port cannot be iomapped.\n");
 		err = -EBUSY;
 		goto err_unmap1;
 	}
@@ -607,7 +607,7 @@
 	vmss_port = devm_ioport_map(devptr, mss_port[dev], 4);
 	if (!vmss_port) {
 		snd_printk(KERN_ERR PFX
-			   "MSS port I/O cannot be iomaped.\n");
+			   "MSS port I/O cannot be iomapped.\n");
 		err = -EBUSY;
 		goto err_unmap2;
 	}
diff --git a/sound/pci/ac97/ac97_proc.c b/sound/pci/ac97/ac97_proc.c
index 6320bf0..e120a11 100644
--- a/sound/pci/ac97/ac97_proc.c
+++ b/sound/pci/ac97/ac97_proc.c
@@ -448,7 +448,7 @@
 	if ((entry = snd_info_create_card_entry(ac97->bus->card, name, ac97->bus->proc)) != NULL) {
 		snd_info_set_text_ops(entry, ac97, snd_ac97_proc_regs_read);
 #ifdef CONFIG_SND_DEBUG
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 		entry->c.text.write = snd_ac97_proc_regs_write;
 #endif
 		if (snd_info_register(entry) < 0) {
@@ -474,7 +474,7 @@
 
 	sprintf(name, "codec97#%d", bus->num);
 	if ((entry = snd_info_create_card_entry(bus->card, name, bus->card->proc_root)) != NULL) {
-		entry->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+		entry->mode = S_IFDIR | 0555;
 		if (snd_info_register(entry) < 0) {
 			snd_info_free_entry(entry);
 			entry = NULL;
diff --git a/sound/pci/ad1889.c b/sound/pci/ad1889.c
index 0bf2c04..d9c54c0 100644
--- a/sound/pci/ad1889.c
+++ b/sound/pci/ad1889.c
@@ -258,7 +258,7 @@
 	
 	while (!(ad1889_readw(chip, AD_AC97_ACIC) & AD_AC97_ACIC_ACRDY) 
 			&& --retry)
-		mdelay(1);
+		usleep_range(1000, 2000);
 	if (!retry) {
 		dev_err(chip->card->dev, "[%s] Link is not ready.\n",
 			__func__);
@@ -872,7 +872,7 @@
 	ad1889_writew(chip, AD_DS_CCS, AD_DS_CCS_CLKEN); /* turn on clock */
 	ad1889_readw(chip, AD_DS_CCS);	/* flush posted write */
 
-	mdelay(10);
+	usleep_range(10000, 11000);
 
 	/* enable Master and Target abort interrupts */
 	ad1889_writel(chip, AD_DMA_DISR, AD_DMA_DISR_PMAE | AD_DMA_DISR_PTAE);
diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c
index 7203614..64e0961 100644
--- a/sound/pci/asihpi/asihpi.c
+++ b/sound/pci/asihpi/asihpi.c
@@ -69,27 +69,27 @@
 static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
 static bool enable_hpi_hwdep = 1;
 
-module_param_array(index, int, NULL, S_IRUGO);
+module_param_array(index, int, NULL, 0444);
 MODULE_PARM_DESC(index, "ALSA index value for AudioScience soundcard.");
 
-module_param_array(id, charp, NULL, S_IRUGO);
+module_param_array(id, charp, NULL, 0444);
 MODULE_PARM_DESC(id, "ALSA ID string for AudioScience soundcard.");
 
-module_param_array(enable, bool, NULL, S_IRUGO);
+module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "ALSA enable AudioScience soundcard.");
 
-module_param(enable_hpi_hwdep, bool, S_IRUGO|S_IWUSR);
+module_param(enable_hpi_hwdep, bool, 0644);
 MODULE_PARM_DESC(enable_hpi_hwdep,
 		"ALSA enable HPI hwdep for AudioScience soundcard ");
 
 /* identify driver */
 #ifdef KERNEL_ALSA_BUILD
 static char *build_info = "Built using headers from kernel source";
-module_param(build_info, charp, S_IRUGO);
+module_param(build_info, charp, 0444);
 MODULE_PARM_DESC(build_info, "Built using headers from kernel source");
 #else
 static char *build_info = "Built within ALSA source";
-module_param(build_info, charp, S_IRUGO);
+module_param(build_info, charp, 0444);
 MODULE_PARM_DESC(build_info, "Built within ALSA source");
 #endif
 
diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c
index b1a2a7e..7d04956 100644
--- a/sound/pci/asihpi/hpioctl.c
+++ b/sound/pci/asihpi/hpioctl.c
@@ -46,14 +46,14 @@
 #endif
 
 static int prealloc_stream_buf;
-module_param(prealloc_stream_buf, int, S_IRUGO);
+module_param(prealloc_stream_buf, int, 0444);
 MODULE_PARM_DESC(prealloc_stream_buf,
 	"Preallocate size for per-adapter stream buffer");
 
 /* Allow the debug level to be changed after module load.
  E.g.   echo 2 > /sys/module/asihpi/parameters/hpiDebugLevel
 */
-module_param(hpi_debug_level, int, S_IRUGO | S_IWUSR);
+module_param(hpi_debug_level, int, 0644);
 MODULE_PARM_DESC(hpi_debug_level, "debug verbosity 0..5");
 
 /* List of adapters found */
diff --git a/sound/pci/ca0106/ca0106_proc.c b/sound/pci/ca0106/ca0106_proc.c
index 9b2b8b3..a2c85cc 100644
--- a/sound/pci/ca0106/ca0106_proc.c
+++ b/sound/pci/ca0106/ca0106_proc.c
@@ -431,7 +431,7 @@
 	if(! snd_card_proc_new(emu->card, "ca0106_reg32", &entry)) {
 		snd_info_set_text_ops(entry, emu, snd_ca0106_proc_reg_read32);
 		entry->c.text.write = snd_ca0106_proc_reg_write32;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 	}
 	if(! snd_card_proc_new(emu->card, "ca0106_reg16", &entry))
 		snd_info_set_text_ops(entry, emu, snd_ca0106_proc_reg_read16);
@@ -440,12 +440,12 @@
 	if(! snd_card_proc_new(emu->card, "ca0106_regs1", &entry)) {
 		snd_info_set_text_ops(entry, emu, snd_ca0106_proc_reg_read1);
 		entry->c.text.write = snd_ca0106_proc_reg_write;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 	}
 	if(! snd_card_proc_new(emu->card, "ca0106_i2c", &entry)) {
 		entry->c.text.write = snd_ca0106_proc_i2c_write;
 		entry->private_data = emu;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 	}
 	if(! snd_card_proc_new(emu->card, "ca0106_regs2", &entry)) 
 		snd_info_set_text_ops(entry, emu, snd_ca0106_proc_reg_read2);
diff --git a/sound/pci/cmipci.c b/sound/pci/cmipci.c
index 26a6578..452cc79 100644
--- a/sound/pci/cmipci.c
+++ b/sound/pci/cmipci.c
@@ -1139,7 +1139,7 @@
 		struct snd_ctl_elem_value *val;
 		unsigned int i;
 
-		val = kmalloc(sizeof(*val), GFP_ATOMIC);
+		val = kmalloc(sizeof(*val), GFP_KERNEL);
 		if (!val)
 			return -ENOMEM;
 		for (i = 0; i < CM_SAVED_MIXERS; i++) {
diff --git a/sound/pci/cs46xx/cs46xx.c b/sound/pci/cs46xx/cs46xx.c
index 655fbea..4910d3f 100644
--- a/sound/pci/cs46xx/cs46xx.c
+++ b/sound/pci/cs46xx/cs46xx.c
@@ -58,7 +58,7 @@
 module_param_array(enable, bool, NULL, 0444);
 MODULE_PARM_DESC(enable, "Enable CS46xx soundcard.");
 module_param_array(external_amp, bool, NULL, 0444);
-MODULE_PARM_DESC(external_amp, "Force to enable external amplifer.");
+MODULE_PARM_DESC(external_amp, "Force to enable external amplifier.");
 module_param_array(thinkpad, bool, NULL, 0444);
 MODULE_PARM_DESC(thinkpad, "Force to enable Thinkpad's CLKRUN control.");
 module_param_array(mmap_valid, bool, NULL, 0444);
diff --git a/sound/pci/cs46xx/cs46xx_lib.c b/sound/pci/cs46xx/cs46xx_lib.c
index 0020fd0..ed1251c 100644
--- a/sound/pci/cs46xx/cs46xx_lib.c
+++ b/sound/pci/cs46xx/cs46xx_lib.c
@@ -2849,7 +2849,7 @@
 			entry->private_data = chip;
 			entry->c.ops = &snd_cs46xx_proc_io_ops;
 			entry->size = region->size;
-			entry->mode = S_IFREG | S_IRUSR;
+			entry->mode = S_IFREG | 0400;
 		}
 	}
 #ifdef CONFIG_SND_CS46XX_NEW_DSP
diff --git a/sound/pci/cs46xx/dsp_spos.c b/sound/pci/cs46xx/dsp_spos.c
index aa61615..c44eade 100644
--- a/sound/pci/cs46xx/dsp_spos.c
+++ b/sound/pci/cs46xx/dsp_spos.c
@@ -798,7 +798,7 @@
 
 	if ((entry = snd_info_create_card_entry(card, "dsp", card->proc_root)) != NULL) {
 		entry->content = SNDRV_INFO_CONTENT_TEXT;
-		entry->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+		entry->mode = S_IFDIR | 0555;
       
 		if (snd_info_register(entry) < 0) {
 			snd_info_free_entry(entry);
@@ -814,7 +814,7 @@
 	if ((entry = snd_info_create_card_entry(card, "spos_symbols", ins->proc_dsp_dir)) != NULL) {
 		entry->content = SNDRV_INFO_CONTENT_TEXT;
 		entry->private_data = chip;
-		entry->mode = S_IFREG | S_IRUGO | S_IWUSR;
+		entry->mode = S_IFREG | 0644;
 		entry->c.text.read = cs46xx_dsp_proc_symbol_table_read;
 		if (snd_info_register(entry) < 0) {
 			snd_info_free_entry(entry);
@@ -826,7 +826,7 @@
 	if ((entry = snd_info_create_card_entry(card, "spos_modules", ins->proc_dsp_dir)) != NULL) {
 		entry->content = SNDRV_INFO_CONTENT_TEXT;
 		entry->private_data = chip;
-		entry->mode = S_IFREG | S_IRUGO | S_IWUSR;
+		entry->mode = S_IFREG | 0644;
 		entry->c.text.read = cs46xx_dsp_proc_modules_read;
 		if (snd_info_register(entry) < 0) {
 			snd_info_free_entry(entry);
@@ -838,7 +838,7 @@
 	if ((entry = snd_info_create_card_entry(card, "parameter", ins->proc_dsp_dir)) != NULL) {
 		entry->content = SNDRV_INFO_CONTENT_TEXT;
 		entry->private_data = chip;
-		entry->mode = S_IFREG | S_IRUGO | S_IWUSR;
+		entry->mode = S_IFREG | 0644;
 		entry->c.text.read = cs46xx_dsp_proc_parameter_dump_read;
 		if (snd_info_register(entry) < 0) {
 			snd_info_free_entry(entry);
@@ -850,7 +850,7 @@
 	if ((entry = snd_info_create_card_entry(card, "sample", ins->proc_dsp_dir)) != NULL) {
 		entry->content = SNDRV_INFO_CONTENT_TEXT;
 		entry->private_data = chip;
-		entry->mode = S_IFREG | S_IRUGO | S_IWUSR;
+		entry->mode = S_IFREG | 0644;
 		entry->c.text.read = cs46xx_dsp_proc_sample_dump_read;
 		if (snd_info_register(entry) < 0) {
 			snd_info_free_entry(entry);
@@ -862,7 +862,7 @@
 	if ((entry = snd_info_create_card_entry(card, "task_tree", ins->proc_dsp_dir)) != NULL) {
 		entry->content = SNDRV_INFO_CONTENT_TEXT;
 		entry->private_data = chip;
-		entry->mode = S_IFREG | S_IRUGO | S_IWUSR;
+		entry->mode = S_IFREG | 0644;
 		entry->c.text.read = cs46xx_dsp_proc_task_tree_read;
 		if (snd_info_register(entry) < 0) {
 			snd_info_free_entry(entry);
@@ -874,7 +874,7 @@
 	if ((entry = snd_info_create_card_entry(card, "scb_info", ins->proc_dsp_dir)) != NULL) {
 		entry->content = SNDRV_INFO_CONTENT_TEXT;
 		entry->private_data = chip;
-		entry->mode = S_IFREG | S_IRUGO | S_IWUSR;
+		entry->mode = S_IFREG | 0644;
 		entry->c.text.read = cs46xx_dsp_proc_scb_read;
 		if (snd_info_register(entry) < 0) {
 			snd_info_free_entry(entry);
diff --git a/sound/pci/cs46xx/dsp_spos_scb_lib.c b/sound/pci/cs46xx/dsp_spos_scb_lib.c
index 7488e1b..abb01ce 100644
--- a/sound/pci/cs46xx/dsp_spos_scb_lib.c
+++ b/sound/pci/cs46xx/dsp_spos_scb_lib.c
@@ -271,7 +271,7 @@
       
 			entry->content = SNDRV_INFO_CONTENT_TEXT;
 			entry->private_data = scb_info;
-			entry->mode = S_IFREG | S_IRUGO | S_IWUSR;
+			entry->mode = S_IFREG | 0644;
       
 			entry->c.text.read = cs46xx_dsp_proc_scb_info_read;
       
diff --git a/sound/pci/ctxfi/cttimer.c b/sound/pci/ctxfi/cttimer.c
index 08e874e..2099e9c 100644
--- a/sound/pci/ctxfi/cttimer.c
+++ b/sound/pci/ctxfi/cttimer.c
@@ -17,7 +17,7 @@
 
 static bool use_system_timer;
 MODULE_PARM_DESC(use_system_timer, "Force to use system-timer");
-module_param(use_system_timer, bool, S_IRUGO);
+module_param(use_system_timer, bool, 0444);
 
 struct ct_timer_ops {
 	void (*init)(struct ct_timer_instance *);
diff --git a/sound/pci/ctxfi/xfi.c b/sound/pci/ctxfi/xfi.c
index f2f3277..b287422 100644
--- a/sound/pci/ctxfi/xfi.c
+++ b/sound/pci/ctxfi/xfi.c
@@ -26,9 +26,9 @@
 static unsigned int reference_rate = 48000;
 static unsigned int multiple = 2;
 MODULE_PARM_DESC(reference_rate, "Reference rate (default=48000)");
-module_param(reference_rate, uint, S_IRUGO);
+module_param(reference_rate, uint, 0444);
 MODULE_PARM_DESC(multiple, "Rate multiplier (default=2)");
-module_param(multiple, uint, S_IRUGO);
+module_param(multiple, uint, 0444);
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c
index 0935a5c..358ef7d 100644
--- a/sound/pci/echoaudio/echoaudio.c
+++ b/sound/pci/echoaudio/echoaudio.c
@@ -59,7 +59,7 @@
 	dev_dbg(chip->card->dev,
 		"firmware requested: %s\n", card_fw[fw_index].data);
 	snprintf(name, sizeof(name), "ea/%s", card_fw[fw_index].data);
-	err = request_firmware(fw_entry, name, pci_device(chip));
+	err = request_firmware(fw_entry, name, &chip->pci->dev);
 	if (err < 0)
 		dev_err(chip->card->dev,
 			"get_firmware(): Firmware not available (%d)\n", err);
diff --git a/sound/pci/echoaudio/echoaudio.h b/sound/pci/echoaudio/echoaudio.h
index 152ce15..44b390a 100644
--- a/sound/pci/echoaudio/echoaudio.h
+++ b/sound/pci/echoaudio/echoaudio.h
@@ -559,10 +559,4 @@
 	return out * num_busses_in(chip) + in;
 }
 
-
-#ifndef pci_device
-#define pci_device(chip) (&chip->pci->dev)
-#endif
-
-
 #endif /* _ECHOAUDIO_H_ */
diff --git a/sound/pci/emu10k1/emu10k1x.c b/sound/pci/emu10k1/emu10k1x.c
index 2c2b12a..611589c 100644
--- a/sound/pci/emu10k1/emu10k1x.c
+++ b/sound/pci/emu10k1/emu10k1x.c
@@ -1070,7 +1070,7 @@
 	if(! snd_card_proc_new(emu->card, "emu10k1x_regs", &entry)) {
 		snd_info_set_text_ops(entry, emu, snd_emu10k1x_proc_reg_read);
 		entry->c.text.write = snd_emu10k1x_proc_reg_write;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 		entry->private_data = emu;
 	}
 	
diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c
index a2b56b1..b45a01b 100644
--- a/sound/pci/emu10k1/emufx.c
+++ b/sound/pci/emu10k1/emufx.c
@@ -170,7 +170,7 @@
 	/* 0x0f */ "Rear Right",
 	/* 0x10 */ "AC97 Front Left",
 	/* 0x11 */ "AC97 Front Right",
-	/* 0x12 */ "ADC Caputre Left",
+	/* 0x12 */ "ADC Capture Left",
 	/* 0x13 */ "ADC Capture Right",
 	/* 0x14 */ NULL,
 	/* 0x15 */ NULL,
@@ -421,14 +421,10 @@
 					    snd_fx8010_irq_handler_t *handler,
 					    unsigned char gpr_running,
 					    void *private_data,
-					    struct snd_emu10k1_fx8010_irq **r_irq)
+					    struct snd_emu10k1_fx8010_irq *irq)
 {
-	struct snd_emu10k1_fx8010_irq *irq;
 	unsigned long flags;
 	
-	irq = kmalloc(sizeof(*irq), GFP_ATOMIC);
-	if (irq == NULL)
-		return -ENOMEM;
 	irq->handler = handler;
 	irq->gpr_running = gpr_running;
 	irq->private_data = private_data;
@@ -443,8 +439,6 @@
 		emu->fx8010.irq_handlers = irq;
 	}
 	spin_unlock_irqrestore(&emu->fx8010.irq_lock, flags);
-	if (r_irq)
-		*r_irq = irq;
 	return 0;
 }
 
@@ -468,7 +462,6 @@
 			tmp->next = tmp->next->next;
 	}
 	spin_unlock_irqrestore(&emu->fx8010.irq_lock, flags);
-	kfree(irq);
 	return 0;
 }
 
diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c
index cefe613..d39458a 100644
--- a/sound/pci/emu10k1/emupcm.c
+++ b/sound/pci/emu10k1/emupcm.c
@@ -1724,7 +1724,7 @@
 	case SNDRV_PCM_TRIGGER_STOP:
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
 	case SNDRV_PCM_TRIGGER_SUSPEND:
-		snd_emu10k1_fx8010_unregister_irq_handler(emu, pcm->irq); pcm->irq = NULL;
+		snd_emu10k1_fx8010_unregister_irq_handler(emu, &pcm->irq);
 		snd_emu10k1_ptr_write(emu, emu->gpr_base + pcm->gpr_trigger, 0, 0);
 		pcm->tram_pos = INITIAL_TRAM_POS(pcm->buffer_size);
 		pcm->tram_shift = 0;
diff --git a/sound/pci/emu10k1/emuproc.c b/sound/pci/emu10k1/emuproc.c
index bde0d19..b570080 100644
--- a/sound/pci/emu10k1/emuproc.c
+++ b/sound/pci/emu10k1/emuproc.c
@@ -135,7 +135,7 @@
 		/* 15 */ "Rear Right",
 		/* 16 */ "AC97 Front Left",
 		/* 17 */ "AC97 Front Right",
-		/* 18 */ "ADC Caputre Left",
+		/* 18 */ "ADC Capture Left",
 		/* 19 */ "ADC Capture Right",
 		/* 20 */ "???",
 		/* 21 */ "???",
@@ -574,32 +574,32 @@
 	if (! snd_card_proc_new(emu->card, "io_regs", &entry)) {
 		snd_info_set_text_ops(entry, emu, snd_emu_proc_io_reg_read);
 		entry->c.text.write = snd_emu_proc_io_reg_write;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 	}
 	if (! snd_card_proc_new(emu->card, "ptr_regs00a", &entry)) {
 		snd_info_set_text_ops(entry, emu, snd_emu_proc_ptr_reg_read00a);
 		entry->c.text.write = snd_emu_proc_ptr_reg_write00;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 	}
 	if (! snd_card_proc_new(emu->card, "ptr_regs00b", &entry)) {
 		snd_info_set_text_ops(entry, emu, snd_emu_proc_ptr_reg_read00b);
 		entry->c.text.write = snd_emu_proc_ptr_reg_write00;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 	}
 	if (! snd_card_proc_new(emu->card, "ptr_regs20a", &entry)) {
 		snd_info_set_text_ops(entry, emu, snd_emu_proc_ptr_reg_read20a);
 		entry->c.text.write = snd_emu_proc_ptr_reg_write20;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 	}
 	if (! snd_card_proc_new(emu->card, "ptr_regs20b", &entry)) {
 		snd_info_set_text_ops(entry, emu, snd_emu_proc_ptr_reg_read20b);
 		entry->c.text.write = snd_emu_proc_ptr_reg_write20;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 	}
 	if (! snd_card_proc_new(emu->card, "ptr_regs20c", &entry)) {
 		snd_info_set_text_ops(entry, emu, snd_emu_proc_ptr_reg_read20c);
 		entry->c.text.write = snd_emu_proc_ptr_reg_write20;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 	}
 #endif
 	
@@ -621,35 +621,35 @@
 	if (! snd_card_proc_new(emu->card, "fx8010_gpr", &entry)) {
 		entry->content = SNDRV_INFO_CONTENT_DATA;
 		entry->private_data = emu;
-		entry->mode = S_IFREG | S_IRUGO /*| S_IWUSR*/;
+		entry->mode = S_IFREG | 0444 /*| S_IWUSR*/;
 		entry->size = emu->audigy ? A_TOTAL_SIZE_GPR : TOTAL_SIZE_GPR;
 		entry->c.ops = &snd_emu10k1_proc_ops_fx8010;
 	}
 	if (! snd_card_proc_new(emu->card, "fx8010_tram_data", &entry)) {
 		entry->content = SNDRV_INFO_CONTENT_DATA;
 		entry->private_data = emu;
-		entry->mode = S_IFREG | S_IRUGO /*| S_IWUSR*/;
+		entry->mode = S_IFREG | 0444 /*| S_IWUSR*/;
 		entry->size = emu->audigy ? A_TOTAL_SIZE_TANKMEM_DATA : TOTAL_SIZE_TANKMEM_DATA ;
 		entry->c.ops = &snd_emu10k1_proc_ops_fx8010;
 	}
 	if (! snd_card_proc_new(emu->card, "fx8010_tram_addr", &entry)) {
 		entry->content = SNDRV_INFO_CONTENT_DATA;
 		entry->private_data = emu;
-		entry->mode = S_IFREG | S_IRUGO /*| S_IWUSR*/;
+		entry->mode = S_IFREG | 0444 /*| S_IWUSR*/;
 		entry->size = emu->audigy ? A_TOTAL_SIZE_TANKMEM_ADDR : TOTAL_SIZE_TANKMEM_ADDR ;
 		entry->c.ops = &snd_emu10k1_proc_ops_fx8010;
 	}
 	if (! snd_card_proc_new(emu->card, "fx8010_code", &entry)) {
 		entry->content = SNDRV_INFO_CONTENT_DATA;
 		entry->private_data = emu;
-		entry->mode = S_IFREG | S_IRUGO /*| S_IWUSR*/;
+		entry->mode = S_IFREG | 0444 /*| S_IWUSR*/;
 		entry->size = emu->audigy ? A_TOTAL_SIZE_CODE : TOTAL_SIZE_CODE;
 		entry->c.ops = &snd_emu10k1_proc_ops_fx8010;
 	}
 	if (! snd_card_proc_new(emu->card, "fx8010_acode", &entry)) {
 		entry->content = SNDRV_INFO_CONTENT_TEXT;
 		entry->private_data = emu;
-		entry->mode = S_IFREG | S_IRUGO /*| S_IWUSR*/;
+		entry->mode = S_IFREG | 0444 /*| S_IWUSR*/;
 		entry->c.text.read = snd_emu10k1_proc_acode_read;
 	}
 	return 0;
diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c
index 5865f3b..dbc7d8d 100644
--- a/sound/pci/emu10k1/memory.c
+++ b/sound/pci/emu10k1/memory.c
@@ -248,13 +248,13 @@
 static int is_valid_page(struct snd_emu10k1 *emu, dma_addr_t addr)
 {
 	if (addr & ~emu->dma_mask) {
-		dev_err(emu->card->dev,
+		dev_err_ratelimited(emu->card->dev,
 			"max memory size is 0x%lx (addr = 0x%lx)!!\n",
 			emu->dma_mask, (unsigned long)addr);
 		return 0;
 	}
 	if (addr & (EMUPAGESIZE-1)) {
-		dev_err(emu->card->dev, "page is not aligned\n");
+		dev_err_ratelimited(emu->card->dev, "page is not aligned\n");
 		return 0;
 	}
 	return 1;
@@ -345,7 +345,7 @@
 		else
 			addr = snd_pcm_sgbuf_get_addr(substream, ofs);
 		if (! is_valid_page(emu, addr)) {
-			dev_err(emu->card->dev,
+			dev_err_ratelimited(emu->card->dev,
 				"emu: failure page = %d\n", idx);
 			mutex_unlock(&hdr->block_mutex);
 			return NULL;
diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index f7a492c..4235907 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -127,11 +127,15 @@
 
 config SND_HDA_CODEC_HDMI
 	tristate "Build HDMI/DisplayPort HD-audio codec support"
+	select SND_DYNAMIC_MINORS
 	help
 	  Say Y or M here to include HDMI and DisplayPort HD-audio codec
 	  support in snd-hda-intel driver.  This includes all AMD/ATI,
 	  Intel and Nvidia HDMI/DisplayPort codecs.
 
+	  Note that this option mandatorily enables CONFIG_SND_DYNAMIC_MINORS
+	  to assure the multiple streams for DP-MST support.
+
 comment "Set to Y if you want auto-loading the codec driver"
 	depends on SND_HDA=y && SND_HDA_CODEC_HDMI=m
 
diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c
index d3ea731..b9a6b66 100644
--- a/sound/pci/hda/hda_auto_parser.c
+++ b/sound/pci/hda/hda_auto_parser.c
@@ -793,11 +793,11 @@
  */
 void snd_hda_apply_verbs(struct hda_codec *codec)
 {
+	const struct hda_verb **v;
 	int i;
-	for (i = 0; i < codec->verbs.used; i++) {
-		struct hda_verb **v = snd_array_elem(&codec->verbs, i);
+
+	snd_array_for_each(&codec->verbs, i, v)
 		snd_hda_sequence_write(codec, *v);
-	}
 }
 EXPORT_SYMBOL_GPL(snd_hda_apply_verbs);
 
@@ -890,10 +890,10 @@
 static bool pin_config_match(struct hda_codec *codec,
 			     const struct hda_pintbl *pins)
 {
+	const struct hda_pincfg *pin;
 	int i;
 
-	for (i = 0; i < codec->init_pins.used; i++) {
-		struct hda_pincfg *pin = snd_array_elem(&codec->init_pins, i);
+	snd_array_for_each(&codec->init_pins, i, pin) {
 		hda_nid_t nid = pin->nid;
 		u32 cfg = pin->cfg;
 		const struct hda_pintbl *t_pins;
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 5bc3a74..08151f3 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -481,9 +481,10 @@
 					 struct snd_array *array,
 					 hda_nid_t nid)
 {
+	struct hda_pincfg *pin;
 	int i;
-	for (i = 0; i < array->used; i++) {
-		struct hda_pincfg *pin = snd_array_elem(array, i);
+
+	snd_array_for_each(array, i, pin) {
 		if (pin->nid == nid)
 			return pin;
 	}
@@ -618,14 +619,15 @@
  */
 void snd_hda_shutup_pins(struct hda_codec *codec)
 {
+	const struct hda_pincfg *pin;
 	int i;
+
 	/* don't shut up pins when unloading the driver; otherwise it breaks
 	 * the default pin setup at the next load of the driver
 	 */
 	if (codec->bus->shutdown)
 		return;
-	for (i = 0; i < codec->init_pins.used; i++) {
-		struct hda_pincfg *pin = snd_array_elem(&codec->init_pins, i);
+	snd_array_for_each(&codec->init_pins, i, pin) {
 		/* use read here for syncing after issuing each verb */
 		snd_hda_codec_read(codec, pin->nid, 0,
 				   AC_VERB_SET_PIN_WIDGET_CONTROL, 0);
@@ -638,13 +640,14 @@
 /* Restore the pin controls cleared previously via snd_hda_shutup_pins() */
 static void restore_shutup_pins(struct hda_codec *codec)
 {
+	const struct hda_pincfg *pin;
 	int i;
+
 	if (!codec->pins_shutup)
 		return;
 	if (codec->bus->shutdown)
 		return;
-	for (i = 0; i < codec->init_pins.used; i++) {
-		struct hda_pincfg *pin = snd_array_elem(&codec->init_pins, i);
+	snd_array_for_each(&codec->init_pins, i, pin) {
 		snd_hda_codec_write(codec, pin->nid, 0,
 				    AC_VERB_SET_PIN_WIDGET_CONTROL,
 				    pin->ctrl);
@@ -697,8 +700,7 @@
 	struct hda_cvt_setup *p;
 	int i;
 
-	for (i = 0; i < codec->cvt_setups.used; i++) {
-		p = snd_array_elem(&codec->cvt_setups, i);
+	snd_array_for_each(&codec->cvt_setups, i, p) {
 		if (p->nid == nid)
 			return p;
 	}
@@ -1076,8 +1078,7 @@
 	/* make other inactive cvts with the same stream-tag dirty */
 	type = get_wcaps_type(get_wcaps(codec, nid));
 	list_for_each_codec(c, codec->bus) {
-		for (i = 0; i < c->cvt_setups.used; i++) {
-			p = snd_array_elem(&c->cvt_setups, i);
+		snd_array_for_each(&c->cvt_setups, i, p) {
 			if (!p->active && p->stream_tag == stream_tag &&
 			    get_wcaps_type(get_wcaps(c, p->nid)) == type)
 				p->dirty = 1;
@@ -1140,12 +1141,11 @@
 static void purify_inactive_streams(struct hda_codec *codec)
 {
 	struct hda_codec *c;
+	struct hda_cvt_setup *p;
 	int i;
 
 	list_for_each_codec(c, codec->bus) {
-		for (i = 0; i < c->cvt_setups.used; i++) {
-			struct hda_cvt_setup *p;
-			p = snd_array_elem(&c->cvt_setups, i);
+		snd_array_for_each(&c->cvt_setups, i, p) {
 			if (p->dirty)
 				really_cleanup_stream(c, p);
 		}
@@ -1156,10 +1156,10 @@
 /* clean up all streams; called from suspend */
 static void hda_cleanup_all_streams(struct hda_codec *codec)
 {
+	struct hda_cvt_setup *p;
 	int i;
 
-	for (i = 0; i < codec->cvt_setups.used; i++) {
-		struct hda_cvt_setup *p = snd_array_elem(&codec->cvt_setups, i);
+	snd_array_for_each(&codec->cvt_setups, i, p) {
 		if (p->stream_tag)
 			really_cleanup_stream(codec, p);
 	}
@@ -1493,10 +1493,10 @@
 	val1 = ((int)val1) * ((int)val2);
 	if (min_mute || (caps & AC_AMPCAP_MIN_MUTE))
 		val2 |= TLV_DB_SCALE_MUTE;
-	tlv[0] = SNDRV_CTL_TLVT_DB_SCALE;
-	tlv[1] = 2 * sizeof(unsigned int);
-	tlv[2] = val1;
-	tlv[3] = val2;
+	tlv[SNDRV_CTL_TLVO_TYPE] = SNDRV_CTL_TLVT_DB_SCALE;
+	tlv[SNDRV_CTL_TLVO_LEN] = 2 * sizeof(unsigned int);
+	tlv[SNDRV_CTL_TLVO_DB_SCALE_MIN] = val1;
+	tlv[SNDRV_CTL_TLVO_DB_SCALE_MUTE_AND_STEP] = val2;
 }
 
 /**
@@ -1544,10 +1544,10 @@
 	nums = (caps & AC_AMPCAP_NUM_STEPS) >> AC_AMPCAP_NUM_STEPS_SHIFT;
 	step = (caps & AC_AMPCAP_STEP_SIZE) >> AC_AMPCAP_STEP_SIZE_SHIFT;
 	step = (step + 1) * 25;
-	tlv[0] = SNDRV_CTL_TLVT_DB_SCALE;
-	tlv[1] = 2 * sizeof(unsigned int);
-	tlv[2] = -nums * step;
-	tlv[3] = step;
+	tlv[SNDRV_CTL_TLVO_TYPE] = SNDRV_CTL_TLVT_DB_SCALE;
+	tlv[SNDRV_CTL_TLVO_LEN] = 2 * sizeof(unsigned int);
+	tlv[SNDRV_CTL_TLVO_DB_SCALE_MIN] = -nums * step;
+	tlv[SNDRV_CTL_TLVO_DB_SCALE_MUTE_AND_STEP] = step;
 }
 EXPORT_SYMBOL_GPL(snd_hda_set_vmaster_tlv);
 
@@ -1845,10 +1845,10 @@
 	} else if (kctl->vd[0].access & SNDRV_CTL_ELEM_ACCESS_TLV_READ)
 		tlv = kctl->tlv.p;
 
-	if (!tlv || tlv[0] != SNDRV_CTL_TLVT_DB_SCALE)
+	if (!tlv || tlv[SNDRV_CTL_TLVO_TYPE] != SNDRV_CTL_TLVT_DB_SCALE)
 		return 0;
 
-	step = tlv[3];
+	step = tlv[SNDRV_CTL_TLVO_DB_SCALE_MUTE_AND_STEP];
 	step &= ~TLV_DB_SCALE_MUTE;
 	if (!step)
 		return 0;
@@ -1860,7 +1860,7 @@
 	}
 
 	arg->step = step;
-	val = -tlv[2] / step;
+	val = -tlv[SNDRV_CTL_TLVO_DB_SCALE_MIN] / step;
 	if (val > 0) {
 		put_kctl_with_value(slave, val);
 		return val;
@@ -2175,6 +2175,8 @@
 	int idx = kcontrol->private_value;
 	struct hda_spdif_out *spdif;
 
+	if (WARN_ON(codec->spdif_out.used <= idx))
+		return -EINVAL;
 	mutex_lock(&codec->spdif_mutex);
 	spdif = snd_array_elem(&codec->spdif_out, idx);
 	ucontrol->value.iec958.status[0] = spdif->status & 0xff;
@@ -2282,6 +2284,8 @@
 	unsigned short val;
 	int change;
 
+	if (WARN_ON(codec->spdif_out.used <= idx))
+		return -EINVAL;
 	mutex_lock(&codec->spdif_mutex);
 	spdif = snd_array_elem(&codec->spdif_out, idx);
 	nid = spdif->nid;
@@ -2308,6 +2312,8 @@
 	int idx = kcontrol->private_value;
 	struct hda_spdif_out *spdif;
 
+	if (WARN_ON(codec->spdif_out.used <= idx))
+		return -EINVAL;
 	mutex_lock(&codec->spdif_mutex);
 	spdif = snd_array_elem(&codec->spdif_out, idx);
 	ucontrol->value.integer.value[0] = spdif->ctls & AC_DIG1_ENABLE;
@@ -2336,6 +2342,8 @@
 	unsigned short val;
 	int change;
 
+	if (WARN_ON(codec->spdif_out.used <= idx))
+		return -EINVAL;
 	mutex_lock(&codec->spdif_mutex);
 	spdif = snd_array_elem(&codec->spdif_out, idx);
 	nid = spdif->nid;
@@ -2461,10 +2469,10 @@
 struct hda_spdif_out *snd_hda_spdif_out_of_nid(struct hda_codec *codec,
 					       hda_nid_t nid)
 {
+	struct hda_spdif_out *spdif;
 	int i;
-	for (i = 0; i < codec->spdif_out.used; i++) {
-		struct hda_spdif_out *spdif =
-				snd_array_elem(&codec->spdif_out, i);
+
+	snd_array_for_each(&codec->spdif_out, i, spdif) {
 		if (spdif->nid == nid)
 			return spdif;
 	}
@@ -2483,6 +2491,8 @@
 {
 	struct hda_spdif_out *spdif;
 
+	if (WARN_ON(codec->spdif_out.used <= idx))
+		return;
 	mutex_lock(&codec->spdif_mutex);
 	spdif = snd_array_elem(&codec->spdif_out, idx);
 	spdif->nid = (u16)-1;
@@ -2503,6 +2513,8 @@
 	struct hda_spdif_out *spdif;
 	unsigned short val;
 
+	if (WARN_ON(codec->spdif_out.used <= idx))
+		return;
 	mutex_lock(&codec->spdif_mutex);
 	spdif = snd_array_elem(&codec->spdif_out, idx);
 	if (spdif->nid != nid) {
diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index d1eb148..a12e594 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c
@@ -748,8 +748,10 @@
 		return err;
 	strlcpy(pcm->name, cpcm->name, sizeof(pcm->name));
 	apcm = kzalloc(sizeof(*apcm), GFP_KERNEL);
-	if (apcm == NULL)
+	if (apcm == NULL) {
+		snd_device_free(chip->card, pcm);
 		return -ENOMEM;
+	}
 	apcm->chip = chip;
 	apcm->pcm = pcm;
 	apcm->codec = codec;
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 5cc6509..db773e2 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -264,10 +264,10 @@
 				     int anchor_nid)
 {
 	struct hda_gen_spec *spec = codec->spec;
+	struct nid_path *path;
 	int i;
 
-	for (i = 0; i < spec->paths.used; i++) {
-		struct nid_path *path = snd_array_elem(&spec->paths, i);
+	snd_array_for_each(&spec->paths, i, path) {
 		if (path->depth <= 0)
 			continue;
 		if ((!from_nid || path->path[0] == from_nid) &&
@@ -325,10 +325,10 @@
 static bool is_dac_already_used(struct hda_codec *codec, hda_nid_t nid)
 {
 	struct hda_gen_spec *spec = codec->spec;
+	const struct nid_path *path;
 	int i;
 
-	for (i = 0; i < spec->paths.used; i++) {
-		struct nid_path *path = snd_array_elem(&spec->paths, i);
+	snd_array_for_each(&spec->paths, i, path) {
 		if (path->path[0] == nid)
 			return true;
 	}
@@ -351,11 +351,11 @@
 static bool is_ctl_used(struct hda_codec *codec, unsigned int val, int type)
 {
 	struct hda_gen_spec *spec = codec->spec;
+	const struct nid_path *path;
 	int i;
 
 	val &= AMP_VAL_COMPARE_MASK;
-	for (i = 0; i < spec->paths.used; i++) {
-		struct nid_path *path = snd_array_elem(&spec->paths, i);
+	snd_array_for_each(&spec->paths, i, path) {
 		if ((path->ctls[type] & AMP_VAL_COMPARE_MASK) == val)
 			return true;
 	}
@@ -638,13 +638,13 @@
 {
 	struct hda_gen_spec *spec = codec->spec;
 	int type = get_wcaps_type(get_wcaps(codec, nid));
+	const struct nid_path *path;
 	int i, n;
 
 	if (nid == codec->core.afg)
 		return true;
 
-	for (n = 0; n < spec->paths.used; n++) {
-		struct nid_path *path = snd_array_elem(&spec->paths, n);
+	snd_array_for_each(&spec->paths, n, path) {
 		if (!path->active)
 			continue;
 		if (codec->power_save_node) {
@@ -2065,7 +2065,7 @@
 			snd_hda_set_vmaster_tlv(codec, spec->vmaster_nid,
 						HDA_OUTPUT, spec->vmaster_tlv);
 			if (spec->dac_min_mute)
-				spec->vmaster_tlv[3] |= TLV_DB_SCALE_MUTE;
+				spec->vmaster_tlv[SNDRV_CTL_TLVO_DB_SCALE_MUTE_AND_STEP] |= TLV_DB_SCALE_MUTE;
 		}
 	}
 
@@ -2696,10 +2696,10 @@
 static bool find_kctl_name(struct hda_codec *codec, const char *name, int idx)
 {
 	struct hda_gen_spec *spec = codec->spec;
+	const struct snd_kcontrol_new *kctl;
 	int i;
 
-	for (i = 0; i < spec->kctls.used; i++) {
-		struct snd_kcontrol_new *kctl = snd_array_elem(&spec->kctls, i);
+	snd_array_for_each(&spec->kctls, i, kctl) {
 		if (!strcmp(kctl->name, name) && kctl->index == idx)
 			return true;
 	}
@@ -4021,8 +4021,7 @@
 	struct nid_path *path;
 	int n;
 
-	for (n = 0; n < spec->paths.used; n++) {
-		path = snd_array_elem(&spec->paths, n);
+	snd_array_for_each(&spec->paths, n, path) {
 		if (!path->depth)
 			continue;
 		if (path->path[0] == nid ||
@@ -5831,10 +5830,10 @@
  */
 static void clear_unsol_on_unused_pins(struct hda_codec *codec)
 {
+	const struct hda_pincfg *pin;
 	int i;
 
-	for (i = 0; i < codec->init_pins.used; i++) {
-		struct hda_pincfg *pin = snd_array_elem(&codec->init_pins, i);
+	snd_array_for_each(&codec->init_pins, i, pin) {
 		hda_nid_t nid = pin->nid;
 		if (is_jack_detectable(codec, nid) &&
 		    !snd_hda_jack_tbl_get(codec, nid))
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index a0c93b9..1ae1850 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2209,7 +2209,18 @@
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
 	SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+	SND_PCI_QUIRK(0x1849, 0x7662, "Asrock H81M-HDS", 0),
+	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
 	SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
+	/* https://bugzilla.redhat.com/show_bug.cgi?id=1581607 */
+	SND_PCI_QUIRK(0x1558, 0x3501, "Clevo W35xSS_370SS", 0),
+	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+	/* Note the P55A-UD3 and Z87-D3HP share the subsys id for the HDA dev */
+	SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte P55A-UD3 / Z87-D3HP", 0),
+	/* https://bugzilla.kernel.org/show_bug.cgi?id=199607 */
+	SND_PCI_QUIRK(0x8086, 0x2057, "Intel NUC5i7RYB", 0),
+	/* https://bugzilla.redhat.com/show_bug.cgi?id=1520902 */
+	SND_PCI_QUIRK(0x8086, 0x2068, "Intel NUC7i3BNB", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */
 	SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0),
 	/* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */
diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c
index 9b7efec..6ec79c5 100644
--- a/sound/pci/hda/hda_sysfs.c
+++ b/sound/pci/hda/hda_sysfs.c
@@ -80,10 +80,10 @@
 				struct snd_array *list,
 				char *buf)
 {
+	const struct hda_pincfg *pin;
 	int i, len = 0;
 	mutex_lock(&codec->user_mutex);
-	for (i = 0; i < list->used; i++) {
-		struct hda_pincfg *pin = snd_array_elem(list, i);
+	snd_array_for_each(list, i, pin) {
 		len += sprintf(buf + len, "0x%02x 0x%08x\n",
 			       pin->nid, pin->cfg);
 	}
@@ -217,10 +217,10 @@
 			       char *buf)
 {
 	struct hda_codec *codec = dev_get_drvdata(dev);
+	const struct hda_verb *v;
 	int i, len = 0;
 	mutex_lock(&codec->user_mutex);
-	for (i = 0; i < codec->init_verbs.used; i++) {
-		struct hda_verb *v = snd_array_elem(&codec->init_verbs, i);
+	snd_array_for_each(&codec->init_verbs, i, v) {
 		len += snprintf(buf + len, PAGE_SIZE - len,
 				"0x%02x 0x%03x 0x%04x\n",
 				v->nid, v->verb, v->param);
@@ -267,10 +267,10 @@
 			  char *buf)
 {
 	struct hda_codec *codec = dev_get_drvdata(dev);
+	const struct hda_hint *hint;
 	int i, len = 0;
 	mutex_lock(&codec->user_mutex);
-	for (i = 0; i < codec->hints.used; i++) {
-		struct hda_hint *hint = snd_array_elem(&codec->hints, i);
+	snd_array_for_each(&codec->hints, i, hint) {
 		len += snprintf(buf + len, PAGE_SIZE - len,
 				"%s = %s\n", hint->key, hint->val);
 	}
@@ -280,10 +280,10 @@
 
 static struct hda_hint *get_hint(struct hda_codec *codec, const char *key)
 {
+	struct hda_hint *hint;
 	int i;
 
-	for (i = 0; i < codec->hints.used; i++) {
-		struct hda_hint *hint = snd_array_elem(&codec->hints, i);
+	snd_array_for_each(&codec->hints, i, hint) {
 		if (!strcmp(hint->key, key))
 			return hint;
 	}
@@ -783,13 +783,13 @@
 void snd_hda_sysfs_clear(struct hda_codec *codec)
 {
 #ifdef CONFIG_SND_HDA_RECONFIG
+	struct hda_hint *hint;
 	int i;
 
 	/* clear init verbs */
 	snd_array_free(&codec->init_verbs);
 	/* clear hints */
-	for (i = 0; i < codec->hints.used; i++) {
-		struct hda_hint *hint = snd_array_elem(&codec->hints, i);
+	snd_array_for_each(&codec->hints, i, hint) {
 		kfree(hint->key); /* we don't need to free hint->val */
 	}
 	snd_array_free(&codec->hints);
diff --git a/sound/pci/hda/hp_x360_helper.c b/sound/pci/hda/hp_x360_helper.c
new file mode 100644
index 0000000..969542c
--- /dev/null
+++ b/sound/pci/hda/hp_x360_helper.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Fixes for HP X360 laptops with top B&O speakers
+ * to be included from codec driver
+ */
+
+static void alc295_fixup_hp_top_speakers(struct hda_codec *codec,
+		const struct hda_fixup *fix, int action)
+{
+	static const struct hda_pintbl pincfgs[] = {
+		{ 0x17, 0x90170110 },
+		{ }
+	};
+	static const struct coef_fw alc295_hp_speakers_coefs[] = {
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0000), WRITE_COEF(0x28, 0x0000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x003f), WRITE_COEF(0x28, 0x1000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0004), WRITE_COEF(0x28, 0x0600), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x006a), WRITE_COEF(0x28, 0x0006), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x006c), WRITE_COEF(0x28, 0xc0c0), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0008), WRITE_COEF(0x28, 0xb000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x002e), WRITE_COEF(0x28, 0x0800), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x006a), WRITE_COEF(0x28, 0x00c1), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x006c), WRITE_COEF(0x28, 0x0320), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0039), WRITE_COEF(0x28, 0x0000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x003b), WRITE_COEF(0x28, 0xffff), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x003c), WRITE_COEF(0x28, 0xffd0), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x003a), WRITE_COEF(0x28, 0x1dfe), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0080), WRITE_COEF(0x28, 0x0880), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x003a), WRITE_COEF(0x28, 0x0dfe), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0018), WRITE_COEF(0x28, 0x0219), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x006a), WRITE_COEF(0x28, 0x005d), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x006c), WRITE_COEF(0x28, 0x9142), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00c0), WRITE_COEF(0x28, 0x01ce), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00c1), WRITE_COEF(0x28, 0xed0c), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00c2), WRITE_COEF(0x28, 0x1c00), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00c3), WRITE_COEF(0x28, 0x0000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00c4), WRITE_COEF(0x28, 0x0200), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00c5), WRITE_COEF(0x28, 0x0000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00c6), WRITE_COEF(0x28, 0x0399), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00c7), WRITE_COEF(0x28, 0x2330), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00c8), WRITE_COEF(0x28, 0x1e5d), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00c9), WRITE_COEF(0x28, 0x6eff), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00ca), WRITE_COEF(0x28, 0x01c0), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00cb), WRITE_COEF(0x28, 0xed0c), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00cc), WRITE_COEF(0x28, 0x1c00), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00cd), WRITE_COEF(0x28, 0x0000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00ce), WRITE_COEF(0x28, 0x0200), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00cf), WRITE_COEF(0x28, 0x0000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00d0), WRITE_COEF(0x28, 0x0399), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00d1), WRITE_COEF(0x28, 0x2330), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00d2), WRITE_COEF(0x28, 0x1e5d), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x00d3), WRITE_COEF(0x28, 0x6eff), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0062), WRITE_COEF(0x28, 0x8000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0063), WRITE_COEF(0x28, 0x5f5f), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0064), WRITE_COEF(0x28, 0x1000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0065), WRITE_COEF(0x28, 0x0000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0066), WRITE_COEF(0x28, 0x4004), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0067), WRITE_COEF(0x28, 0x0802), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0068), WRITE_COEF(0x28, 0x890f), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0069), WRITE_COEF(0x28, 0xe021), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0070), WRITE_COEF(0x28, 0x8012), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0071), WRITE_COEF(0x28, 0x3450), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0072), WRITE_COEF(0x28, 0x0123), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0073), WRITE_COEF(0x28, 0x4543), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0074), WRITE_COEF(0x28, 0x2100), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0075), WRITE_COEF(0x28, 0x4321), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0076), WRITE_COEF(0x28, 0x0000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0050), WRITE_COEF(0x28, 0x8200), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x003a), WRITE_COEF(0x28, 0x1dfe), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0051), WRITE_COEF(0x28, 0x0707), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0052), WRITE_COEF(0x28, 0x4090), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x006a), WRITE_COEF(0x28, 0x0090), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x006c), WRITE_COEF(0x28, 0x721f), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0012), WRITE_COEF(0x28, 0xebeb), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x009e), WRITE_COEF(0x28, 0x0000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0060), WRITE_COEF(0x28, 0x2213), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x006a), WRITE_COEF(0x28, 0x0006), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x006c), WRITE_COEF(0x28, 0x0000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x003f), WRITE_COEF(0x28, 0x3000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0004), WRITE_COEF(0x28, 0x0500), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0040), WRITE_COEF(0x28, 0x800c), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0046), WRITE_COEF(0x28, 0xc22e), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x004b), WRITE_COEF(0x28, 0x0000), WRITE_COEF(0x29, 0xb024),
+		WRITE_COEF(0x24, 0x0012), WRITE_COEF(0x26, 0x0050), WRITE_COEF(0x28, 0x82ec), WRITE_COEF(0x29, 0xb024),
+	};
+
+	switch (action) {
+	case HDA_FIXUP_ACT_PRE_PROBE:
+		snd_hda_apply_pincfgs(codec, pincfgs);
+		alc295_fixup_disable_dac3(codec, fix, action);
+		break;
+	case HDA_FIXUP_ACT_INIT:
+		alc_process_coef_fw(codec, alc295_hp_speakers_coefs);
+		break;
+	}
+}
diff --git a/sound/pci/hda/local.h b/sound/pci/hda/local.h
deleted file mode 100644
index 3b8b7d7..0000000
--- a/sound/pci/hda/local.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- */
-
-#ifndef __HDAC_LOCAL_H
-#define __HDAC_LOCAL_H
-
-int hdac_read_parm(struct hdac_device *codec, hda_nid_t nid, int parm);
-
-#define get_wcaps(codec, nid) \
-	hdac_read_parm(codec, nid, AC_PAR_AUDIO_WIDGET_CAP)
-/* get the widget type from widget capability bits */
-static inline int get_wcaps_type(unsigned int wcaps)
-{
-	if (!wcaps)
-		return -1; /* invalid type */
-	return (wcaps & AC_WCAP_TYPE) >> AC_WCAP_TYPE_SHIFT;
-}
-
-#define get_pin_caps(codec, nid) \
-	hdac_read_parm(codec, nid, AC_PAR_PIN_CAP)
-
-static inline
-unsigned int get_pin_cfg(struct hdac_device *codec, hda_nid_t nid)
-{
-	unsigned int val;
-
-	if (snd_hdac_read(codec, nid, AC_VERB_GET_CONFIG_DEFAULT, 0, &val))
-		return -1;
-	return val;
-}
-
-#define get_amp_caps(codec, nid, dir) \
-	hdac_read_parm(codec, nid, (dir) == HDA_OUTPUT ? \
-		       AC_PAR_AMP_OUT_CAP : AC_PAR_AMP_IN_CAP)
-
-#define get_power_caps(codec, nid) \
-	hdac_read_parm(codec, nid, AC_PAR_POWER_STATE)
-
-#endif /* __HDAC_LOCAL_H */
diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index 768ea86..292e2c5 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -28,6 +28,9 @@
 #include <linux/module.h>
 #include <linux/firmware.h>
 #include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/pci.h>
 #include <sound/core.h>
 #include "hda_codec.h"
 #include "hda_local.h"
@@ -39,9 +42,15 @@
 /* Enable this to see controls for tuning purpose. */
 /*#define ENABLE_TUNING_CONTROLS*/
 
+#ifdef ENABLE_TUNING_CONTROLS
+#include <sound/tlv.h>
+#endif
+
 #define FLOAT_ZERO	0x00000000
 #define FLOAT_ONE	0x3f800000
 #define FLOAT_TWO	0x40000000
+#define FLOAT_THREE     0x40400000
+#define FLOAT_EIGHT     0x41000000
 #define FLOAT_MINUS_5	0xc0a00000
 
 #define UNSOL_TAG_DSP	0x16
@@ -72,16 +81,22 @@
 #define SCP_GET    1
 
 #define EFX_FILE   "ctefx.bin"
+#define SBZ_EFX_FILE   "ctefx-sbz.bin"
+#define R3DI_EFX_FILE  "ctefx-r3di.bin"
 
 #ifdef CONFIG_SND_HDA_CODEC_CA0132_DSP
 MODULE_FIRMWARE(EFX_FILE);
+MODULE_FIRMWARE(SBZ_EFX_FILE);
+MODULE_FIRMWARE(R3DI_EFX_FILE);
 #endif
 
-static char *dirstr[2] = { "Playback", "Capture" };
+static const char *const dirstr[2] = { "Playback", "Capture" };
 
+#define NUM_OF_OUTPUTS 3
 enum {
 	SPEAKER_OUT,
-	HEADPHONE_OUT
+	HEADPHONE_OUT,
+	SURROUND_OUT
 };
 
 enum {
@@ -89,6 +104,15 @@
 	LINE_MIC_IN
 };
 
+/* Strings for Input Source Enum Control */
+static const char *const in_src_str[3] = {"Rear Mic", "Line", "Front Mic" };
+#define IN_SRC_NUM_OF_INPUTS 3
+enum {
+	REAR_MIC,
+	REAR_LINE_IN,
+	FRONT_MIC,
+};
+
 enum {
 #define VNODE_START_NID    0x80
 	VNID_SPK = VNODE_START_NID,			/* Speaker vnid */
@@ -122,13 +146,28 @@
 	VOICEFX = IN_EFFECT_END_NID,
 	PLAY_ENHANCEMENT,
 	CRYSTAL_VOICE,
-	EFFECT_END_NID
+	EFFECT_END_NID,
+	OUTPUT_SOURCE_ENUM,
+	INPUT_SOURCE_ENUM,
+	XBASS_XOVER,
+	EQ_PRESET_ENUM,
+	SMART_VOLUME_ENUM,
+	MIC_BOOST_ENUM
 #define EFFECTS_COUNT  (EFFECT_END_NID - EFFECT_START_NID)
 };
 
 /* Effects values size*/
 #define EFFECT_VALS_MAX_COUNT 12
 
+/*
+ * Default values for the effect slider controls, they are in order of their
+ * effect NID's. Surround, Crystalizer, Dialog Plus, Smart Volume, and then
+ * X-bass.
+ */
+static const unsigned int effect_slider_defaults[] = {67, 65, 50, 74, 50};
+/* Amount of effect level sliders for ca0132_alt controls. */
+#define EFFECT_LEVEL_SLIDERS 5
+
 /* Latency introduced by DSP blocks in milliseconds. */
 #define DSP_CAPTURE_INIT_LATENCY        0
 #define DSP_CRYSTAL_VOICE_LATENCY       124
@@ -150,7 +189,7 @@
 #define EFX_DIR_OUT 0
 #define EFX_DIR_IN  1
 
-static struct ct_effect ca0132_effects[EFFECTS_COUNT] = {
+static const struct ct_effect ca0132_effects[EFFECTS_COUNT] = {
 	{ .name = "Surround",
 	  .nid = SURROUND,
 	  .mid = 0x96,
@@ -277,7 +316,7 @@
 	unsigned int def_val;/*effect default values*/
 };
 
-static struct ct_tuning_ctl ca0132_tuning_ctls[] = {
+static const struct ct_tuning_ctl ca0132_tuning_ctls[] = {
 	{ .name = "Wedge Angle",
 	  .parent_nid = VOICE_FOCUS,
 	  .nid = WEDGE_ANGLE,
@@ -392,14 +431,14 @@
 	unsigned int vals[VOICEFX_MAX_PARAM_COUNT];
 };
 
-static struct ct_voicefx ca0132_voicefx = {
+static const struct ct_voicefx ca0132_voicefx = {
 	.name = "VoiceFX Capture Switch",
 	.nid = VOICEFX,
 	.mid = 0x95,
 	.reqs = {10, 11, 12, 13, 14, 15, 16, 17, 18}
 };
 
-static struct ct_voicefx_preset ca0132_voicefx_presets[] = {
+static const struct ct_voicefx_preset ca0132_voicefx_presets[] = {
 	{ .name = "Neutral",
 	  .vals = { 0x00000000, 0x43C80000, 0x44AF0000,
 		    0x44FA0000, 0x3F800000, 0x3F800000,
@@ -472,6 +511,161 @@
 	}
 };
 
+/* ca0132 EQ presets, taken from Windows Sound Blaster Z Driver */
+
+#define EQ_PRESET_MAX_PARAM_COUNT 11
+
+struct ct_eq {
+	char *name;
+	hda_nid_t nid;
+	int mid;
+	int reqs[EQ_PRESET_MAX_PARAM_COUNT]; /*effect module request*/
+};
+
+struct ct_eq_preset {
+	char *name; /*preset name*/
+	unsigned int vals[EQ_PRESET_MAX_PARAM_COUNT];
+};
+
+static const struct ct_eq ca0132_alt_eq_enum = {
+	.name = "FX: Equalizer Preset Switch",
+	.nid = EQ_PRESET_ENUM,
+	.mid = 0x96,
+	.reqs = {10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}
+};
+
+
+static const struct ct_eq_preset ca0132_alt_eq_presets[] = {
+	{ .name = "Flat",
+	 .vals = { 0x00000000, 0x00000000, 0x00000000,
+		   0x00000000, 0x00000000, 0x00000000,
+		   0x00000000, 0x00000000, 0x00000000,
+		   0x00000000, 0x00000000	     }
+	},
+	{ .name = "Acoustic",
+	 .vals = { 0x00000000, 0x00000000, 0x3F8CCCCD,
+		   0x40000000, 0x00000000, 0x00000000,
+		   0x00000000, 0x00000000, 0x40000000,
+		   0x40000000, 0x40000000	     }
+	},
+	{ .name = "Classical",
+	 .vals = { 0x00000000, 0x00000000, 0x40C00000,
+		   0x40C00000, 0x40466666, 0x00000000,
+		   0x00000000, 0x00000000, 0x00000000,
+		   0x40466666, 0x40466666	     }
+	},
+	{ .name = "Country",
+	 .vals = { 0x00000000, 0xBF99999A, 0x00000000,
+		   0x3FA66666, 0x3FA66666, 0x3F8CCCCD,
+		   0x00000000, 0x00000000, 0x40000000,
+		   0x40466666, 0x40800000	     }
+	},
+	{ .name = "Dance",
+	 .vals = { 0x00000000, 0xBF99999A, 0x40000000,
+		   0x40466666, 0x40866666, 0xBF99999A,
+		   0xBF99999A, 0x00000000, 0x00000000,
+		   0x40800000, 0x40800000	     }
+	},
+	{ .name = "Jazz",
+	 .vals = { 0x00000000, 0x00000000, 0x00000000,
+		   0x3F8CCCCD, 0x40800000, 0x40800000,
+		   0x40800000, 0x00000000, 0x3F8CCCCD,
+		   0x40466666, 0x40466666	     }
+	},
+	{ .name = "New Age",
+	 .vals = { 0x00000000, 0x00000000, 0x40000000,
+		   0x40000000, 0x00000000, 0x00000000,
+		   0x00000000, 0x3F8CCCCD, 0x40000000,
+		   0x40000000, 0x40000000	     }
+	},
+	{ .name = "Pop",
+	 .vals = { 0x00000000, 0xBFCCCCCD, 0x00000000,
+		   0x40000000, 0x40000000, 0x00000000,
+		   0xBF99999A, 0xBF99999A, 0x00000000,
+		   0x40466666, 0x40C00000	     }
+	},
+	{ .name = "Rock",
+	 .vals = { 0x00000000, 0xBF99999A, 0xBF99999A,
+		   0x3F8CCCCD, 0x40000000, 0xBF99999A,
+		   0xBF99999A, 0x00000000, 0x00000000,
+		   0x40800000, 0x40800000	     }
+	},
+	{ .name = "Vocal",
+	 .vals = { 0x00000000, 0xC0000000, 0xBF99999A,
+		   0xBF99999A, 0x00000000, 0x40466666,
+		   0x40800000, 0x40466666, 0x00000000,
+		   0x00000000, 0x3F8CCCCD	     }
+	}
+};
+
+/* DSP command sequences for ca0132_alt_select_out */
+#define ALT_OUT_SET_MAX_COMMANDS 9 /* Max number of commands in sequence */
+struct ca0132_alt_out_set {
+	char *name; /*preset name*/
+	unsigned char commands;
+	unsigned int mids[ALT_OUT_SET_MAX_COMMANDS];
+	unsigned int reqs[ALT_OUT_SET_MAX_COMMANDS];
+	unsigned int vals[ALT_OUT_SET_MAX_COMMANDS];
+};
+
+static const struct ca0132_alt_out_set alt_out_presets[] = {
+	{ .name = "Line Out",
+	  .commands = 7,
+	  .mids = { 0x96, 0x96, 0x96, 0x8F,
+		    0x96, 0x96, 0x96 },
+	  .reqs = { 0x19, 0x17, 0x18, 0x01,
+		    0x1F, 0x15, 0x3A },
+	  .vals = { 0x3F000000, 0x42A00000, 0x00000000,
+		    0x00000000, 0x00000000, 0x00000000,
+		    0x00000000 }
+	},
+	{ .name = "Headphone",
+	  .commands = 7,
+	  .mids = { 0x96, 0x96, 0x96, 0x8F,
+		    0x96, 0x96, 0x96 },
+	  .reqs = { 0x19, 0x17, 0x18, 0x01,
+		    0x1F, 0x15, 0x3A },
+	  .vals = { 0x3F000000, 0x42A00000, 0x00000000,
+		    0x00000000, 0x00000000, 0x00000000,
+		    0x00000000 }
+	},
+	{ .name = "Surround",
+	  .commands = 8,
+	  .mids = { 0x96, 0x8F, 0x96, 0x96,
+		    0x96, 0x96, 0x96, 0x96 },
+	  .reqs = { 0x18, 0x01, 0x1F, 0x15,
+		    0x3A, 0x1A, 0x1B, 0x1C },
+	  .vals = { 0x00000000, 0x00000000, 0x00000000,
+		    0x00000000, 0x00000000, 0x00000000,
+		    0x00000000, 0x00000000 }
+	}
+};
+
+/*
+ * DSP volume setting structs. Req 1 is left volume, req 2 is right volume,
+ * and I don't know what the third req is, but it's always zero. I assume it's
+ * some sort of update or set command to tell the DSP there's new volume info.
+ */
+#define DSP_VOL_OUT 0
+#define DSP_VOL_IN  1
+
+struct ct_dsp_volume_ctl {
+	hda_nid_t vnid;
+	int mid; /* module ID*/
+	unsigned int reqs[3]; /* scp req ID */
+};
+
+static const struct ct_dsp_volume_ctl ca0132_alt_vol_ctls[] = {
+	{ .vnid = VNID_SPK,
+	  .mid = 0x32,
+	  .reqs = {3, 4, 2}
+	},
+	{ .vnid = VNID_MIC,
+	  .mid = 0x37,
+	  .reqs = {2, 3, 1}
+	}
+};
+
 enum hda_cmd_vendor_io {
 	/* for DspIO node */
 	VENDOR_DSPIO_SCP_WRITE_DATA_LOW      = 0x000,
@@ -698,11 +892,12 @@
  */
 
 struct ca0132_spec {
-	struct snd_kcontrol_new *mixers[5];
+	const struct snd_kcontrol_new *mixers[5];
 	unsigned int num_mixers;
 	const struct hda_verb *base_init_verbs;
 	const struct hda_verb *base_exit_verbs;
 	const struct hda_verb *chip_init_verbs;
+	const struct hda_verb *sbz_init_verbs;
 	struct hda_verb *spec_init_verbs;
 	struct auto_pin_cfg autocfg;
 
@@ -719,6 +914,7 @@
 	hda_nid_t shared_mic_nid;
 	hda_nid_t shared_out_nid;
 	hda_nid_t unsol_tag_hp;
+	hda_nid_t unsol_tag_front_hp; /* for desktop ca0132 codecs */
 	hda_nid_t unsol_tag_amic1;
 
 	/* chip access */
@@ -734,6 +930,9 @@
 	unsigned int scp_resp_header;
 	unsigned int scp_resp_data[4];
 	unsigned int scp_resp_count;
+	bool alt_firmware_present;
+	bool startup_check_entered;
+	bool dsp_reload;
 
 	/* mixer and effects related */
 	unsigned char dmic_ctl;
@@ -746,6 +945,17 @@
 	long effects_switch[EFFECTS_COUNT];
 	long voicefx_val;
 	long cur_mic_boost;
+	/* ca0132_alt control related values */
+	unsigned char in_enum_val;
+	unsigned char out_enum_val;
+	unsigned char mic_boost_enum_val;
+	unsigned char smart_volume_setting;
+	long fx_ctl_val[EFFECT_LEVEL_SLIDERS];
+	long xbass_xover_freq;
+	long eq_preset_val;
+	unsigned int tlv[4];
+	struct hda_vmaster_mute_hook vmaster_mute;
+
 
 	struct hda_codec *codec;
 	struct delayed_work unsol_hp_work;
@@ -754,6 +964,25 @@
 #ifdef ENABLE_TUNING_CONTROLS
 	long cur_ctl_vals[TUNING_CTLS_COUNT];
 #endif
+	/*
+	 * Sound Blaster Z PCI region 2 iomem, used for input and output
+	 * switching, and other unknown commands.
+	 */
+	void __iomem *mem_base;
+
+	/*
+	 * Whether or not to use the alt functions like alt_select_out,
+	 * alt_select_in, etc. Only used on desktop codecs for now, because of
+	 * surround sound support.
+	 */
+	bool use_alt_functions;
+
+	/*
+	 * Whether or not to use alt controls:	volume effect sliders, EQ
+	 * presets, smart volume presets, and new control names with FX prefix.
+	 * Renames PlayEnhancement and CrystalVoice too.
+	 */
+	bool use_alt_controls;
 };
 
 /*
@@ -762,6 +991,8 @@
 enum {
 	QUIRK_NONE,
 	QUIRK_ALIENWARE,
+	QUIRK_SBZ,
+	QUIRK_R3DI,
 };
 
 static const struct hda_pintbl alienware_pincfgs[] = {
@@ -778,10 +1009,44 @@
 	{}
 };
 
+/* Sound Blaster Z pin configs taken from Windows Driver */
+static const struct hda_pintbl sbz_pincfgs[] = {
+	{ 0x0b, 0x01017010 }, /* Port G -- Lineout FRONT L/R */
+	{ 0x0c, 0x014510f0 }, /* SPDIF Out 1 */
+	{ 0x0d, 0x014510f0 }, /* Digital Out */
+	{ 0x0e, 0x01c510f0 }, /* SPDIF In */
+	{ 0x0f, 0x0221701f }, /* Port A -- BackPanel HP */
+	{ 0x10, 0x01017012 }, /* Port D -- Center/LFE or FP Hp */
+	{ 0x11, 0x01017014 }, /* Port B -- LineMicIn2 / Rear L/R */
+	{ 0x12, 0x01a170f0 }, /* Port C -- LineIn1 */
+	{ 0x13, 0x908700f0 }, /* What U Hear In*/
+	{ 0x18, 0x50d000f0 }, /* N/A */
+	{}
+};
+
+/* Recon3D integrated pin configs taken from Windows Driver */
+static const struct hda_pintbl r3di_pincfgs[] = {
+	{ 0x0b, 0x01014110 }, /* Port G -- Lineout FRONT L/R */
+	{ 0x0c, 0x014510f0 }, /* SPDIF Out 1 */
+	{ 0x0d, 0x014510f0 }, /* Digital Out */
+	{ 0x0e, 0x41c520f0 }, /* SPDIF In */
+	{ 0x0f, 0x0221401f }, /* Port A -- BackPanel HP */
+	{ 0x10, 0x01016011 }, /* Port D -- Center/LFE or FP Hp */
+	{ 0x11, 0x01011014 }, /* Port B -- LineMicIn2 / Rear L/R */
+	{ 0x12, 0x02a090f0 }, /* Port C -- LineIn1 */
+	{ 0x13, 0x908700f0 }, /* What U Hear In*/
+	{ 0x18, 0x500000f0 }, /* N/A */
+	{}
+};
+
 static const struct snd_pci_quirk ca0132_quirks[] = {
 	SND_PCI_QUIRK(0x1028, 0x0685, "Alienware 15 2015", QUIRK_ALIENWARE),
 	SND_PCI_QUIRK(0x1028, 0x0688, "Alienware 17 2015", QUIRK_ALIENWARE),
 	SND_PCI_QUIRK(0x1028, 0x0708, "Alienware 15 R2 2016", QUIRK_ALIENWARE),
+	SND_PCI_QUIRK(0x1102, 0x0010, "Sound Blaster Z", QUIRK_SBZ),
+	SND_PCI_QUIRK(0x1102, 0x0023, "Sound Blaster Z", QUIRK_SBZ),
+	SND_PCI_QUIRK(0x1458, 0xA016, "Recon3Di", QUIRK_R3DI),
+	SND_PCI_QUIRK(0x1458, 0xA036, "Recon3Di", QUIRK_R3DI),
 	{}
 };
 
@@ -965,6 +1230,29 @@
 }
 
 /*
+ * Write given value to the given address through the chip I/O widget.
+ * not protected by the Mutex
+ */
+static int chipio_write_no_mutex(struct hda_codec *codec,
+		unsigned int chip_addx, const unsigned int data)
+{
+	int err;
+
+
+	/* write the address, and if successful proceed to write data */
+	err = chipio_write_address(codec, chip_addx);
+	if (err < 0)
+		goto exit;
+
+	err = chipio_write_data(codec, data);
+	if (err < 0)
+		goto exit;
+
+exit:
+	return err;
+}
+
+/*
  * Write multiple values to the given address through the chip I/O widget.
  * protected by the Mutex
  */
@@ -1058,6 +1346,81 @@
 }
 
 /*
+ * Set chip parameters through the chip I/O widget. NO MUTEX.
+ */
+static void chipio_set_control_param_no_mutex(struct hda_codec *codec,
+		enum control_param_id param_id, int param_val)
+{
+	int val;
+
+	if ((param_id < 32) && (param_val < 8)) {
+		val = (param_val << 5) | (param_id);
+		snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+				    VENDOR_CHIPIO_PARAM_SET, val);
+	} else {
+		if (chipio_send(codec, VENDOR_CHIPIO_STATUS, 0) == 0) {
+			snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+					    VENDOR_CHIPIO_PARAM_EX_ID_SET,
+					    param_id);
+			snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+					    VENDOR_CHIPIO_PARAM_EX_VALUE_SET,
+					    param_val);
+		}
+	}
+}
+/*
+ * Connect stream to a source point, and then connect
+ * that source point to a destination point.
+ */
+static void chipio_set_stream_source_dest(struct hda_codec *codec,
+				int streamid, int source_point, int dest_point)
+{
+	chipio_set_control_param_no_mutex(codec,
+			CONTROL_PARAM_STREAM_ID, streamid);
+	chipio_set_control_param_no_mutex(codec,
+			CONTROL_PARAM_STREAM_SOURCE_CONN_POINT, source_point);
+	chipio_set_control_param_no_mutex(codec,
+			CONTROL_PARAM_STREAM_DEST_CONN_POINT, dest_point);
+}
+
+/*
+ * Set number of channels in the selected stream.
+ */
+static void chipio_set_stream_channels(struct hda_codec *codec,
+				int streamid, unsigned int channels)
+{
+	chipio_set_control_param_no_mutex(codec,
+			CONTROL_PARAM_STREAM_ID, streamid);
+	chipio_set_control_param_no_mutex(codec,
+			CONTROL_PARAM_STREAMS_CHANNELS, channels);
+}
+
+/*
+ * Enable/Disable audio stream.
+ */
+static void chipio_set_stream_control(struct hda_codec *codec,
+				int streamid, int enable)
+{
+	chipio_set_control_param_no_mutex(codec,
+			CONTROL_PARAM_STREAM_ID, streamid);
+	chipio_set_control_param_no_mutex(codec,
+			CONTROL_PARAM_STREAM_CONTROL, enable);
+}
+
+
+/*
+ * Set sampling rate of the connection point. NO MUTEX.
+ */
+static void chipio_set_conn_rate_no_mutex(struct hda_codec *codec,
+				int connid, enum ca0132_sample_rate rate)
+{
+	chipio_set_control_param_no_mutex(codec,
+			CONTROL_PARAM_CONN_POINT_ID, connid);
+	chipio_set_control_param_no_mutex(codec,
+			CONTROL_PARAM_CONN_POINT_SAMPLE_RATE, rate);
+}
+
+/*
  * Set sampling rate of the connection point.
  */
 static void chipio_set_conn_rate(struct hda_codec *codec,
@@ -1420,8 +1783,8 @@
  * Returns zero or a negative error code.
  */
 static int dspio_scp(struct hda_codec *codec,
-		int mod_id, int req, int dir, void *data, unsigned int len,
-		void *reply, unsigned int *reply_len)
+		int mod_id, int src_id, int req, int dir, const void *data,
+		unsigned int len, void *reply, unsigned int *reply_len)
 {
 	int status = 0;
 	struct scp_msg scp_send, scp_reply;
@@ -1445,7 +1808,7 @@
 		return -EINVAL;
 	}
 
-	scp_send.hdr = make_scp_header(mod_id, 0x20, (dir == SCP_GET), req,
+	scp_send.hdr = make_scp_header(mod_id, src_id, (dir == SCP_GET), req,
 				       0, 0, 0, len/sizeof(unsigned int));
 	if (data != NULL && len > 0) {
 		len = min((unsigned int)(sizeof(scp_send.data)), len);
@@ -1502,15 +1865,24 @@
  * Set DSP parameters
  */
 static int dspio_set_param(struct hda_codec *codec, int mod_id,
-			int req, void *data, unsigned int len)
+			int src_id, int req, const void *data, unsigned int len)
 {
-	return dspio_scp(codec, mod_id, req, SCP_SET, data, len, NULL, NULL);
+	return dspio_scp(codec, mod_id, src_id, req, SCP_SET, data, len, NULL,
+			NULL);
 }
 
 static int dspio_set_uint_param(struct hda_codec *codec, int mod_id,
-			int req, unsigned int data)
+			int req, const unsigned int data)
 {
-	return dspio_set_param(codec, mod_id, req, &data, sizeof(unsigned int));
+	return dspio_set_param(codec, mod_id, 0x20, req, &data,
+			sizeof(unsigned int));
+}
+
+static int dspio_set_uint_param_no_source(struct hda_codec *codec, int mod_id,
+			int req, const unsigned int data)
+{
+	return dspio_set_param(codec, mod_id, 0x00, req, &data,
+			sizeof(unsigned int));
 }
 
 /*
@@ -1522,8 +1894,9 @@
 	unsigned int size = sizeof(dma_chan);
 
 	codec_dbg(codec, "     dspio_alloc_dma_chan() -- begin\n");
-	status = dspio_scp(codec, MASTERCONTROL, MASTERCONTROL_ALLOC_DMA_CHAN,
-			SCP_GET, NULL, 0, dma_chan, &size);
+	status = dspio_scp(codec, MASTERCONTROL, 0x20,
+			MASTERCONTROL_ALLOC_DMA_CHAN, SCP_GET, NULL, 0,
+			dma_chan, &size);
 
 	if (status < 0) {
 		codec_dbg(codec, "dspio_alloc_dma_chan: SCP Failed\n");
@@ -1552,8 +1925,9 @@
 	codec_dbg(codec, "     dspio_free_dma_chan() -- begin\n");
 	codec_dbg(codec, "dspio_free_dma_chan: chan=%d\n", dma_chan);
 
-	status = dspio_scp(codec, MASTERCONTROL, MASTERCONTROL_ALLOC_DMA_CHAN,
-			   SCP_SET, &dma_chan, sizeof(dma_chan), NULL, &dummy);
+	status = dspio_scp(codec, MASTERCONTROL, 0x20,
+			MASTERCONTROL_ALLOC_DMA_CHAN, SCP_SET, &dma_chan,
+			sizeof(dma_chan), NULL, &dummy);
 
 	if (status < 0) {
 		codec_dbg(codec, "dspio_free_dma_chan: SCP Failed\n");
@@ -2575,14 +2949,16 @@
  */
 static void dspload_post_setup(struct hda_codec *codec)
 {
+	struct ca0132_spec *spec = codec->spec;
 	codec_dbg(codec, "---- dspload_post_setup ------\n");
+	if (!spec->use_alt_functions) {
+		/*set DSP speaker to 2.0 configuration*/
+		chipio_write(codec, XRAM_XRAM_INST_OFFSET(0x18), 0x08080080);
+		chipio_write(codec, XRAM_XRAM_INST_OFFSET(0x19), 0x3f800000);
 
-	/*set DSP speaker to 2.0 configuration*/
-	chipio_write(codec, XRAM_XRAM_INST_OFFSET(0x18), 0x08080080);
-	chipio_write(codec, XRAM_XRAM_INST_OFFSET(0x19), 0x3f800000);
-
-	/*update write pointer*/
-	chipio_write(codec, XRAM_XRAM_INST_OFFSET(0x29), 0x00000002);
+		/*update write pointer*/
+		chipio_write(codec, XRAM_XRAM_INST_OFFSET(0x29), 0x00000002);
+	}
 }
 
 /**
@@ -2690,6 +3066,170 @@
 }
 
 /*
+ * Setup GPIO for the other variants of Core3D.
+ */
+
+/*
+ * Sets up the GPIO pins so that they are discoverable. If this isn't done,
+ * the card shows as having no GPIO pins.
+ */
+static void ca0132_gpio_init(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+
+	switch (spec->quirk) {
+	case QUIRK_SBZ:
+		snd_hda_codec_write(codec, 0x01, 0, 0x793, 0x00);
+		snd_hda_codec_write(codec, 0x01, 0, 0x794, 0x53);
+		snd_hda_codec_write(codec, 0x01, 0, 0x790, 0x23);
+		break;
+	case QUIRK_R3DI:
+		snd_hda_codec_write(codec, 0x01, 0, 0x793, 0x00);
+		snd_hda_codec_write(codec, 0x01, 0, 0x794, 0x5B);
+		break;
+	}
+
+}
+
+/* Sets the GPIO for audio output. */
+static void ca0132_gpio_setup(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+
+	switch (spec->quirk) {
+	case QUIRK_SBZ:
+		snd_hda_codec_write(codec, 0x01, 0,
+				AC_VERB_SET_GPIO_DIRECTION, 0x07);
+		snd_hda_codec_write(codec, 0x01, 0,
+				AC_VERB_SET_GPIO_MASK, 0x07);
+		snd_hda_codec_write(codec, 0x01, 0,
+				AC_VERB_SET_GPIO_DATA, 0x04);
+		snd_hda_codec_write(codec, 0x01, 0,
+				AC_VERB_SET_GPIO_DATA, 0x06);
+		break;
+	case QUIRK_R3DI:
+		snd_hda_codec_write(codec, 0x01, 0,
+				AC_VERB_SET_GPIO_DIRECTION, 0x1E);
+		snd_hda_codec_write(codec, 0x01, 0,
+				AC_VERB_SET_GPIO_MASK, 0x1F);
+		snd_hda_codec_write(codec, 0x01, 0,
+				AC_VERB_SET_GPIO_DATA, 0x0C);
+		break;
+	}
+}
+
+/*
+ * GPIO control functions for the Recon3D integrated.
+ */
+
+enum r3di_gpio_bit {
+	/* Bit 1 - Switch between front/rear mic. 0 = rear, 1 = front */
+	R3DI_MIC_SELECT_BIT = 1,
+	/* Bit 2 - Switch between headphone/line out. 0 = Headphone, 1 = Line */
+	R3DI_OUT_SELECT_BIT = 2,
+	/*
+	 * I dunno what this actually does, but it stays on until the dsp
+	 * is downloaded.
+	 */
+	R3DI_GPIO_DSP_DOWNLOADING = 3,
+	/*
+	 * Same as above, no clue what it does, but it comes on after the dsp
+	 * is downloaded.
+	 */
+	R3DI_GPIO_DSP_DOWNLOADED = 4
+};
+
+enum r3di_mic_select {
+	/* Set GPIO bit 1 to 0 for rear mic */
+	R3DI_REAR_MIC = 0,
+	/* Set GPIO bit 1 to 1 for front microphone*/
+	R3DI_FRONT_MIC = 1
+};
+
+enum r3di_out_select {
+	/* Set GPIO bit 2 to 0 for headphone */
+	R3DI_HEADPHONE_OUT = 0,
+	/* Set GPIO bit 2 to 1 for speaker */
+	R3DI_LINE_OUT = 1
+};
+enum r3di_dsp_status {
+	/* Set GPIO bit 3 to 1 until DSP is downloaded */
+	R3DI_DSP_DOWNLOADING = 0,
+	/* Set GPIO bit 4 to 1 once DSP is downloaded */
+	R3DI_DSP_DOWNLOADED = 1
+};
+
+
+static void r3di_gpio_mic_set(struct hda_codec *codec,
+		enum r3di_mic_select cur_mic)
+{
+	unsigned int cur_gpio;
+
+	/* Get the current GPIO Data setup */
+	cur_gpio = snd_hda_codec_read(codec, 0x01, 0, AC_VERB_GET_GPIO_DATA, 0);
+
+	switch (cur_mic) {
+	case R3DI_REAR_MIC:
+		cur_gpio &= ~(1 << R3DI_MIC_SELECT_BIT);
+		break;
+	case R3DI_FRONT_MIC:
+		cur_gpio |= (1 << R3DI_MIC_SELECT_BIT);
+		break;
+	}
+	snd_hda_codec_write(codec, codec->core.afg, 0,
+			    AC_VERB_SET_GPIO_DATA, cur_gpio);
+}
+
+static void r3di_gpio_out_set(struct hda_codec *codec,
+		enum r3di_out_select cur_out)
+{
+	unsigned int cur_gpio;
+
+	/* Get the current GPIO Data setup */
+	cur_gpio = snd_hda_codec_read(codec, 0x01, 0, AC_VERB_GET_GPIO_DATA, 0);
+
+	switch (cur_out) {
+	case R3DI_HEADPHONE_OUT:
+		cur_gpio &= ~(1 << R3DI_OUT_SELECT_BIT);
+		break;
+	case R3DI_LINE_OUT:
+		cur_gpio |= (1 << R3DI_OUT_SELECT_BIT);
+		break;
+	}
+	snd_hda_codec_write(codec, codec->core.afg, 0,
+			    AC_VERB_SET_GPIO_DATA, cur_gpio);
+}
+
+static void r3di_gpio_dsp_status_set(struct hda_codec *codec,
+		enum r3di_dsp_status dsp_status)
+{
+	unsigned int cur_gpio;
+
+	/* Get the current GPIO Data setup */
+	cur_gpio = snd_hda_codec_read(codec, 0x01, 0, AC_VERB_GET_GPIO_DATA, 0);
+
+	switch (dsp_status) {
+	case R3DI_DSP_DOWNLOADING:
+		cur_gpio |= (1 << R3DI_GPIO_DSP_DOWNLOADING);
+		snd_hda_codec_write(codec, codec->core.afg, 0,
+				AC_VERB_SET_GPIO_DATA, cur_gpio);
+		break;
+	case R3DI_DSP_DOWNLOADED:
+		/* Set DOWNLOADING bit to 0. */
+		cur_gpio &= ~(1 << R3DI_GPIO_DSP_DOWNLOADING);
+
+		snd_hda_codec_write(codec, codec->core.afg, 0,
+				AC_VERB_SET_GPIO_DATA, cur_gpio);
+
+		cur_gpio |= (1 << R3DI_GPIO_DSP_DOWNLOADED);
+		break;
+	}
+
+	snd_hda_codec_write(codec, codec->core.afg, 0,
+			    AC_VERB_SET_GPIO_DATA, cur_gpio);
+}
+
+/*
  * PCM callbacks
  */
 static int ca0132_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
@@ -2852,6 +3392,24 @@
 	  .tlv = { .c = ca0132_volume_tlv }, \
 	  .private_value = HDA_COMPOSE_AMP_VAL(nid, channel, 0, dir) }
 
+/*
+ * Creates a mixer control that uses defaults of HDA_CODEC_VOL except for the
+ * volume put, which is used for setting the DSP volume. This was done because
+ * the ca0132 functions were taking too much time and causing lag.
+ */
+#define CA0132_ALT_CODEC_VOL_MONO(xname, nid, channel, dir) \
+	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
+	  .name = xname, \
+	  .subdevice = HDA_SUBDEV_AMP_FLAG, \
+	  .access = SNDRV_CTL_ELEM_ACCESS_READWRITE | \
+			SNDRV_CTL_ELEM_ACCESS_TLV_READ | \
+			SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK, \
+	  .info = snd_hda_mixer_amp_volume_info, \
+	  .get = snd_hda_mixer_amp_volume_get, \
+	  .put = ca0132_alt_volume_put, \
+	  .tlv = { .c = snd_hda_mixer_amp_tlv }, \
+	  .private_value = HDA_COMPOSE_AMP_VAL(nid, channel, 0, dir) }
+
 #define CA0132_CODEC_MUTE_MONO(xname, nid, channel, dir) \
 	{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, \
 	  .name = xname, \
@@ -2864,9 +3422,88 @@
 /* stereo */
 #define CA0132_CODEC_VOL(xname, nid, dir) \
 	CA0132_CODEC_VOL_MONO(xname, nid, 3, dir)
+#define CA0132_ALT_CODEC_VOL(xname, nid, dir) \
+	CA0132_ALT_CODEC_VOL_MONO(xname, nid, 3, dir)
 #define CA0132_CODEC_MUTE(xname, nid, dir) \
 	CA0132_CODEC_MUTE_MONO(xname, nid, 3, dir)
 
+/* lookup tables */
+/*
+ * Lookup table with decibel values for the DSP. When volume is changed in
+ * Windows, the DSP is also sent the dB value in floating point. In Windows,
+ * these values have decimal points, probably because the Windows driver
+ * actually uses floating point. We can't here, so I made a lookup table of
+ * values -90 to 9. -90 is the lowest decibel value for both the ADC's and the
+ * DAC's, and 9 is the maximum.
+ */
+static const unsigned int float_vol_db_lookup[] = {
+0xC2B40000, 0xC2B20000, 0xC2B00000, 0xC2AE0000, 0xC2AC0000, 0xC2AA0000,
+0xC2A80000, 0xC2A60000, 0xC2A40000, 0xC2A20000, 0xC2A00000, 0xC29E0000,
+0xC29C0000, 0xC29A0000, 0xC2980000, 0xC2960000, 0xC2940000, 0xC2920000,
+0xC2900000, 0xC28E0000, 0xC28C0000, 0xC28A0000, 0xC2880000, 0xC2860000,
+0xC2840000, 0xC2820000, 0xC2800000, 0xC27C0000, 0xC2780000, 0xC2740000,
+0xC2700000, 0xC26C0000, 0xC2680000, 0xC2640000, 0xC2600000, 0xC25C0000,
+0xC2580000, 0xC2540000, 0xC2500000, 0xC24C0000, 0xC2480000, 0xC2440000,
+0xC2400000, 0xC23C0000, 0xC2380000, 0xC2340000, 0xC2300000, 0xC22C0000,
+0xC2280000, 0xC2240000, 0xC2200000, 0xC21C0000, 0xC2180000, 0xC2140000,
+0xC2100000, 0xC20C0000, 0xC2080000, 0xC2040000, 0xC2000000, 0xC1F80000,
+0xC1F00000, 0xC1E80000, 0xC1E00000, 0xC1D80000, 0xC1D00000, 0xC1C80000,
+0xC1C00000, 0xC1B80000, 0xC1B00000, 0xC1A80000, 0xC1A00000, 0xC1980000,
+0xC1900000, 0xC1880000, 0xC1800000, 0xC1700000, 0xC1600000, 0xC1500000,
+0xC1400000, 0xC1300000, 0xC1200000, 0xC1100000, 0xC1000000, 0xC0E00000,
+0xC0C00000, 0xC0A00000, 0xC0800000, 0xC0400000, 0xC0000000, 0xBF800000,
+0x00000000, 0x3F800000, 0x40000000, 0x40400000, 0x40800000, 0x40A00000,
+0x40C00000, 0x40E00000, 0x41000000, 0x41100000
+};
+
+/*
+ * This table counts from float 0 to 1 in increments of .01, which is
+ * useful for a few different sliders.
+ */
+static const unsigned int float_zero_to_one_lookup[] = {
+0x00000000, 0x3C23D70A, 0x3CA3D70A, 0x3CF5C28F, 0x3D23D70A, 0x3D4CCCCD,
+0x3D75C28F, 0x3D8F5C29, 0x3DA3D70A, 0x3DB851EC, 0x3DCCCCCD, 0x3DE147AE,
+0x3DF5C28F, 0x3E051EB8, 0x3E0F5C29, 0x3E19999A, 0x3E23D70A, 0x3E2E147B,
+0x3E3851EC, 0x3E428F5C, 0x3E4CCCCD, 0x3E570A3D, 0x3E6147AE, 0x3E6B851F,
+0x3E75C28F, 0x3E800000, 0x3E851EB8, 0x3E8A3D71, 0x3E8F5C29, 0x3E947AE1,
+0x3E99999A, 0x3E9EB852, 0x3EA3D70A, 0x3EA8F5C3, 0x3EAE147B, 0x3EB33333,
+0x3EB851EC, 0x3EBD70A4, 0x3EC28F5C, 0x3EC7AE14, 0x3ECCCCCD, 0x3ED1EB85,
+0x3ED70A3D, 0x3EDC28F6, 0x3EE147AE, 0x3EE66666, 0x3EEB851F, 0x3EF0A3D7,
+0x3EF5C28F, 0x3EFAE148, 0x3F000000, 0x3F028F5C, 0x3F051EB8, 0x3F07AE14,
+0x3F0A3D71, 0x3F0CCCCD, 0x3F0F5C29, 0x3F11EB85, 0x3F147AE1, 0x3F170A3D,
+0x3F19999A, 0x3F1C28F6, 0x3F1EB852, 0x3F2147AE, 0x3F23D70A, 0x3F266666,
+0x3F28F5C3, 0x3F2B851F, 0x3F2E147B, 0x3F30A3D7, 0x3F333333, 0x3F35C28F,
+0x3F3851EC, 0x3F3AE148, 0x3F3D70A4, 0x3F400000, 0x3F428F5C, 0x3F451EB8,
+0x3F47AE14, 0x3F4A3D71, 0x3F4CCCCD, 0x3F4F5C29, 0x3F51EB85, 0x3F547AE1,
+0x3F570A3D, 0x3F59999A, 0x3F5C28F6, 0x3F5EB852, 0x3F6147AE, 0x3F63D70A,
+0x3F666666, 0x3F68F5C3, 0x3F6B851F, 0x3F6E147B, 0x3F70A3D7, 0x3F733333,
+0x3F75C28F, 0x3F7851EC, 0x3F7AE148, 0x3F7D70A4, 0x3F800000
+};
+
+/*
+ * This table counts from float 10 to 1000, which is the range of the x-bass
+ * crossover slider in Windows.
+ */
+static const unsigned int float_xbass_xover_lookup[] = {
+0x41200000, 0x41A00000, 0x41F00000, 0x42200000, 0x42480000, 0x42700000,
+0x428C0000, 0x42A00000, 0x42B40000, 0x42C80000, 0x42DC0000, 0x42F00000,
+0x43020000, 0x430C0000, 0x43160000, 0x43200000, 0x432A0000, 0x43340000,
+0x433E0000, 0x43480000, 0x43520000, 0x435C0000, 0x43660000, 0x43700000,
+0x437A0000, 0x43820000, 0x43870000, 0x438C0000, 0x43910000, 0x43960000,
+0x439B0000, 0x43A00000, 0x43A50000, 0x43AA0000, 0x43AF0000, 0x43B40000,
+0x43B90000, 0x43BE0000, 0x43C30000, 0x43C80000, 0x43CD0000, 0x43D20000,
+0x43D70000, 0x43DC0000, 0x43E10000, 0x43E60000, 0x43EB0000, 0x43F00000,
+0x43F50000, 0x43FA0000, 0x43FF0000, 0x44020000, 0x44048000, 0x44070000,
+0x44098000, 0x440C0000, 0x440E8000, 0x44110000, 0x44138000, 0x44160000,
+0x44188000, 0x441B0000, 0x441D8000, 0x44200000, 0x44228000, 0x44250000,
+0x44278000, 0x442A0000, 0x442C8000, 0x442F0000, 0x44318000, 0x44340000,
+0x44368000, 0x44390000, 0x443B8000, 0x443E0000, 0x44408000, 0x44430000,
+0x44458000, 0x44480000, 0x444A8000, 0x444D0000, 0x444F8000, 0x44520000,
+0x44548000, 0x44570000, 0x44598000, 0x445C0000, 0x445E8000, 0x44610000,
+0x44638000, 0x44660000, 0x44688000, 0x446B0000, 0x446D8000, 0x44700000,
+0x44728000, 0x44750000, 0x44778000, 0x447A0000
+};
+
 /* The following are for tuning of products */
 #ifdef ENABLE_TUNING_CONTROLS
 
@@ -2942,7 +3579,7 @@
 			break;
 
 	snd_hda_power_up(codec);
-	dspio_set_param(codec, ca0132_tuning_ctls[i].mid,
+	dspio_set_param(codec, ca0132_tuning_ctls[i].mid, 0x20,
 			ca0132_tuning_ctls[i].req,
 			&(lookup[idx]), sizeof(unsigned int));
 	snd_hda_power_down(codec);
@@ -3068,8 +3705,8 @@
 	return 1;
 }
 
-static const DECLARE_TLV_DB_SCALE(voice_focus_db_scale, 2000, 100, 0);
-static const DECLARE_TLV_DB_SCALE(eq_db_scale, -2400, 100, 0);
+static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(voice_focus_db_scale, 2000, 100, 0);
+static const SNDRV_CTL_TLVD_DECLARE_DB_SCALE(eq_db_scale, -2400, 100, 0);
 
 static int add_tuning_control(struct hda_codec *codec,
 				hda_nid_t pnid, hda_nid_t nid,
@@ -3207,7 +3844,7 @@
 				    pin_ctl & ~PIN_HP);
 		/* enable speaker node */
 		pin_ctl = snd_hda_codec_read(codec, spec->out_pins[0], 0,
-					     AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+				AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
 		snd_hda_set_pin_ctl(codec, spec->out_pins[0],
 				    pin_ctl | PIN_OUT);
 	} else {
@@ -3251,13 +3888,209 @@
 	return err < 0 ? err : 0;
 }
 
+/*
+ * This function behaves similarly to the ca0132_select_out funciton above,
+ * except with a few differences. It adds the ability to select the current
+ * output with an enumerated control "output source" if the auto detect
+ * mute switch is set to off. If the auto detect mute switch is enabled, it
+ * will detect either headphone or lineout(SPEAKER_OUT) from jack detection.
+ * It also adds the ability to auto-detect the front headphone port. The only
+ * way to select surround is to disable auto detect, and set Surround with the
+ * enumerated control.
+ */
+static int ca0132_alt_select_out(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+	unsigned int pin_ctl;
+	int jack_present;
+	int auto_jack;
+	unsigned int i;
+	unsigned int tmp;
+	int err;
+	/* Default Headphone is rear headphone */
+	hda_nid_t headphone_nid = spec->out_pins[1];
+
+	codec_dbg(codec, "%s\n", __func__);
+
+	snd_hda_power_up_pm(codec);
+
+	auto_jack = spec->vnode_lswitch[VNID_HP_ASEL - VNODE_START_NID];
+
+	/*
+	 * If headphone rear or front is plugged in, set to headphone.
+	 * If neither is plugged in, set to rear line out. Only if
+	 * hp/speaker auto detect is enabled.
+	 */
+	if (auto_jack) {
+		jack_present = snd_hda_jack_detect(codec, spec->unsol_tag_hp) ||
+			   snd_hda_jack_detect(codec, spec->unsol_tag_front_hp);
+
+		if (jack_present)
+			spec->cur_out_type = HEADPHONE_OUT;
+		else
+			spec->cur_out_type = SPEAKER_OUT;
+	} else
+		spec->cur_out_type = spec->out_enum_val;
+
+	/* Begin DSP output switch */
+	tmp = FLOAT_ONE;
+	err = dspio_set_uint_param(codec, 0x96, 0x3A, tmp);
+	if (err < 0)
+		goto exit;
+
+	switch (spec->cur_out_type) {
+	case SPEAKER_OUT:
+		codec_dbg(codec, "%s speaker\n", __func__);
+		/*speaker out config*/
+		switch (spec->quirk) {
+		case QUIRK_SBZ:
+			writew(0x0007, spec->mem_base + 0x320);
+			writew(0x0104, spec->mem_base + 0x320);
+			writew(0x0101, spec->mem_base + 0x320);
+			chipio_set_control_param(codec, 0x0D, 0x18);
+			break;
+		case QUIRK_R3DI:
+			chipio_set_control_param(codec, 0x0D, 0x24);
+			r3di_gpio_out_set(codec, R3DI_LINE_OUT);
+			break;
+		}
+
+		/* disable headphone node */
+		pin_ctl = snd_hda_codec_read(codec, spec->out_pins[1], 0,
+					AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+		snd_hda_set_pin_ctl(codec, spec->out_pins[1],
+				    pin_ctl & ~PIN_HP);
+		/* enable line-out node */
+		pin_ctl = snd_hda_codec_read(codec, spec->out_pins[0], 0,
+				AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+		snd_hda_set_pin_ctl(codec, spec->out_pins[0],
+				    pin_ctl | PIN_OUT);
+		/* Enable EAPD */
+		snd_hda_codec_write(codec, spec->out_pins[0], 0,
+			AC_VERB_SET_EAPD_BTLENABLE, 0x01);
+
+		/* If PlayEnhancement is enabled, set different source */
+		if (spec->effects_switch[PLAY_ENHANCEMENT - EFFECT_START_NID])
+			dspio_set_uint_param(codec, 0x80, 0x04, FLOAT_ONE);
+		else
+			dspio_set_uint_param(codec, 0x80, 0x04, FLOAT_EIGHT);
+		break;
+	case HEADPHONE_OUT:
+		codec_dbg(codec, "%s hp\n", __func__);
+		/* Headphone out config*/
+		switch (spec->quirk) {
+		case QUIRK_SBZ:
+			writew(0x0107, spec->mem_base + 0x320);
+			writew(0x0104, spec->mem_base + 0x320);
+			writew(0x0001, spec->mem_base + 0x320);
+			chipio_set_control_param(codec, 0x0D, 0x12);
+			break;
+		case QUIRK_R3DI:
+			chipio_set_control_param(codec, 0x0D, 0x21);
+			r3di_gpio_out_set(codec, R3DI_HEADPHONE_OUT);
+			break;
+		}
+
+		snd_hda_codec_write(codec, spec->out_pins[0], 0,
+			AC_VERB_SET_EAPD_BTLENABLE, 0x00);
+
+		/* disable speaker*/
+		pin_ctl = snd_hda_codec_read(codec, spec->out_pins[0], 0,
+					AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+		snd_hda_set_pin_ctl(codec, spec->out_pins[0],
+				pin_ctl & ~PIN_HP);
+
+		/* enable headphone, either front or rear */
+
+		if (snd_hda_jack_detect(codec, spec->unsol_tag_front_hp))
+			headphone_nid = spec->out_pins[2];
+		else if (snd_hda_jack_detect(codec, spec->unsol_tag_hp))
+			headphone_nid = spec->out_pins[1];
+
+		pin_ctl = snd_hda_codec_read(codec, headphone_nid, 0,
+					AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+		snd_hda_set_pin_ctl(codec, headphone_nid,
+				    pin_ctl | PIN_HP);
+
+		if (spec->effects_switch[PLAY_ENHANCEMENT - EFFECT_START_NID])
+			dspio_set_uint_param(codec, 0x80, 0x04, FLOAT_ONE);
+		else
+			dspio_set_uint_param(codec, 0x80, 0x04, FLOAT_ZERO);
+		break;
+	case SURROUND_OUT:
+		codec_dbg(codec, "%s surround\n", __func__);
+		/* Surround out config*/
+		switch (spec->quirk) {
+		case QUIRK_SBZ:
+			writew(0x0007, spec->mem_base + 0x320);
+			writew(0x0104, spec->mem_base + 0x320);
+			writew(0x0101, spec->mem_base + 0x320);
+			chipio_set_control_param(codec, 0x0D, 0x18);
+			break;
+		case QUIRK_R3DI:
+			chipio_set_control_param(codec, 0x0D, 0x24);
+			r3di_gpio_out_set(codec, R3DI_LINE_OUT);
+			break;
+		}
+		/* enable line out node */
+		pin_ctl = snd_hda_codec_read(codec, spec->out_pins[0], 0,
+				AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+		snd_hda_set_pin_ctl(codec, spec->out_pins[0],
+						pin_ctl | PIN_OUT);
+		/* Disable headphone out */
+		pin_ctl = snd_hda_codec_read(codec, spec->out_pins[1], 0,
+					AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+		snd_hda_set_pin_ctl(codec, spec->out_pins[1],
+				    pin_ctl & ~PIN_HP);
+		/* Enable EAPD on line out */
+		snd_hda_codec_write(codec, spec->out_pins[0], 0,
+			AC_VERB_SET_EAPD_BTLENABLE, 0x01);
+		/* enable center/lfe out node */
+		pin_ctl = snd_hda_codec_read(codec, spec->out_pins[2], 0,
+					AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+		snd_hda_set_pin_ctl(codec, spec->out_pins[2],
+				    pin_ctl | PIN_OUT);
+		/* Now set rear surround node as out. */
+		pin_ctl = snd_hda_codec_read(codec, spec->out_pins[3], 0,
+					AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+		snd_hda_set_pin_ctl(codec, spec->out_pins[3],
+				    pin_ctl | PIN_OUT);
+
+		if (spec->effects_switch[PLAY_ENHANCEMENT - EFFECT_START_NID])
+			dspio_set_uint_param(codec, 0x80, 0x04, FLOAT_ONE);
+		else
+			dspio_set_uint_param(codec, 0x80, 0x04, FLOAT_EIGHT);
+		break;
+	}
+
+	/* run through the output dsp commands for line-out */
+	for (i = 0; i < alt_out_presets[spec->cur_out_type].commands; i++) {
+		err = dspio_set_uint_param(codec,
+		alt_out_presets[spec->cur_out_type].mids[i],
+		alt_out_presets[spec->cur_out_type].reqs[i],
+		alt_out_presets[spec->cur_out_type].vals[i]);
+
+		if (err < 0)
+			goto exit;
+	}
+
+exit:
+	snd_hda_power_down_pm(codec);
+
+	return err < 0 ? err : 0;
+}
+
 static void ca0132_unsol_hp_delayed(struct work_struct *work)
 {
 	struct ca0132_spec *spec = container_of(
 		to_delayed_work(work), struct ca0132_spec, unsol_hp_work);
 	struct hda_jack_tbl *jack;
 
-	ca0132_select_out(spec->codec);
+	if (spec->use_alt_functions)
+		ca0132_alt_select_out(spec->codec);
+	else
+		ca0132_select_out(spec->codec);
+
 	jack = snd_hda_jack_tbl_get(spec->codec, spec->unsol_tag_hp);
 	if (jack) {
 		jack->block_report = 0;
@@ -3268,6 +4101,10 @@
 static void ca0132_set_dmic(struct hda_codec *codec, int enable);
 static int ca0132_mic_boost_set(struct hda_codec *codec, long val);
 static int ca0132_effects_set(struct hda_codec *codec, hda_nid_t nid, long val);
+static void resume_mic1(struct hda_codec *codec, unsigned int oldval);
+static int stop_mic1(struct hda_codec *codec);
+static int ca0132_cvoice_switch_set(struct hda_codec *codec);
+static int ca0132_alt_mic_boost_set(struct hda_codec *codec, long val);
 
 /*
  * Select the active VIP source
@@ -3310,6 +4147,71 @@
 	return 1;
 }
 
+static int ca0132_alt_set_vipsource(struct hda_codec *codec, int val)
+{
+	struct ca0132_spec *spec = codec->spec;
+	unsigned int tmp;
+
+	if (spec->dsp_state != DSP_DOWNLOADED)
+		return 0;
+
+	codec_dbg(codec, "%s\n", __func__);
+
+	chipio_set_stream_control(codec, 0x03, 0);
+	chipio_set_stream_control(codec, 0x04, 0);
+
+	/* if CrystalVoice is off, vipsource should be 0 */
+	if (!spec->effects_switch[CRYSTAL_VOICE - EFFECT_START_NID] ||
+	    (val == 0) || spec->in_enum_val == REAR_LINE_IN) {
+		codec_dbg(codec, "%s: off.", __func__);
+		chipio_set_control_param(codec, CONTROL_PARAM_VIP_SOURCE, 0);
+
+		tmp = FLOAT_ZERO;
+		dspio_set_uint_param(codec, 0x80, 0x05, tmp);
+
+		chipio_set_conn_rate(codec, MEM_CONNID_MICIN1, SR_96_000);
+		chipio_set_conn_rate(codec, MEM_CONNID_MICOUT1, SR_96_000);
+		if (spec->quirk == QUIRK_R3DI)
+			chipio_set_conn_rate(codec, 0x0F, SR_96_000);
+
+
+		if (spec->in_enum_val == REAR_LINE_IN)
+			tmp = FLOAT_ZERO;
+		else {
+			if (spec->quirk == QUIRK_SBZ)
+				tmp = FLOAT_THREE;
+			else
+				tmp = FLOAT_ONE;
+		}
+
+		dspio_set_uint_param(codec, 0x80, 0x00, tmp);
+
+	} else {
+		codec_dbg(codec, "%s: on.", __func__);
+		chipio_set_conn_rate(codec, MEM_CONNID_MICIN1, SR_16_000);
+		chipio_set_conn_rate(codec, MEM_CONNID_MICOUT1, SR_16_000);
+		if (spec->quirk == QUIRK_R3DI)
+			chipio_set_conn_rate(codec, 0x0F, SR_16_000);
+
+		if (spec->effects_switch[VOICE_FOCUS - EFFECT_START_NID])
+			tmp = FLOAT_TWO;
+		else
+			tmp = FLOAT_ONE;
+		dspio_set_uint_param(codec, 0x80, 0x00, tmp);
+
+		tmp = FLOAT_ONE;
+		dspio_set_uint_param(codec, 0x80, 0x05, tmp);
+
+		msleep(20);
+		chipio_set_control_param(codec, CONTROL_PARAM_VIP_SOURCE, val);
+	}
+
+	chipio_set_stream_control(codec, 0x03, 1);
+	chipio_set_stream_control(codec, 0x04, 1);
+
+	return 1;
+}
+
 /*
  * Select the active microphone.
  * If autodetect is enabled, mic will be selected based on jack detection.
@@ -3363,6 +4265,125 @@
 }
 
 /*
+ * Select the active input.
+ * Mic detection isn't used, because it's kind of pointless on the SBZ.
+ * The front mic has no jack-detection, so the only way to switch to it
+ * is to do it manually in alsamixer.
+ */
+static int ca0132_alt_select_in(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+	unsigned int tmp;
+
+	codec_dbg(codec, "%s\n", __func__);
+
+	snd_hda_power_up_pm(codec);
+
+	chipio_set_stream_control(codec, 0x03, 0);
+	chipio_set_stream_control(codec, 0x04, 0);
+
+	spec->cur_mic_type = spec->in_enum_val;
+
+	switch (spec->cur_mic_type) {
+	case REAR_MIC:
+		switch (spec->quirk) {
+		case QUIRK_SBZ:
+			writew(0x0000, spec->mem_base + 0x320);
+			tmp = FLOAT_THREE;
+			break;
+		case QUIRK_R3DI:
+			r3di_gpio_mic_set(codec, R3DI_REAR_MIC);
+			tmp = FLOAT_ONE;
+			break;
+		default:
+			tmp = FLOAT_ONE;
+			break;
+		}
+
+		chipio_set_conn_rate(codec, MEM_CONNID_MICIN1, SR_96_000);
+		chipio_set_conn_rate(codec, MEM_CONNID_MICOUT1, SR_96_000);
+		if (spec->quirk == QUIRK_R3DI)
+			chipio_set_conn_rate(codec, 0x0F, SR_96_000);
+
+		dspio_set_uint_param(codec, 0x80, 0x00, tmp);
+
+		chipio_set_stream_control(codec, 0x03, 1);
+		chipio_set_stream_control(codec, 0x04, 1);
+
+		if (spec->quirk == QUIRK_SBZ) {
+			chipio_write(codec, 0x18B098, 0x0000000C);
+			chipio_write(codec, 0x18B09C, 0x0000000C);
+		}
+		ca0132_alt_mic_boost_set(codec, spec->mic_boost_enum_val);
+		break;
+	case REAR_LINE_IN:
+		ca0132_mic_boost_set(codec, 0);
+		switch (spec->quirk) {
+		case QUIRK_SBZ:
+			writew(0x0000, spec->mem_base + 0x320);
+			break;
+		case QUIRK_R3DI:
+			r3di_gpio_mic_set(codec, R3DI_REAR_MIC);
+			break;
+		}
+
+		chipio_set_conn_rate(codec, MEM_CONNID_MICIN1, SR_96_000);
+		chipio_set_conn_rate(codec, MEM_CONNID_MICOUT1, SR_96_000);
+		if (spec->quirk == QUIRK_R3DI)
+			chipio_set_conn_rate(codec, 0x0F, SR_96_000);
+
+		tmp = FLOAT_ZERO;
+		dspio_set_uint_param(codec, 0x80, 0x00, tmp);
+
+		if (spec->quirk == QUIRK_SBZ) {
+			chipio_write(codec, 0x18B098, 0x00000000);
+			chipio_write(codec, 0x18B09C, 0x00000000);
+		}
+
+		chipio_set_stream_control(codec, 0x03, 1);
+		chipio_set_stream_control(codec, 0x04, 1);
+		break;
+	case FRONT_MIC:
+		switch (spec->quirk) {
+		case QUIRK_SBZ:
+			writew(0x0100, spec->mem_base + 0x320);
+			writew(0x0005, spec->mem_base + 0x320);
+			tmp = FLOAT_THREE;
+			break;
+		case QUIRK_R3DI:
+			r3di_gpio_mic_set(codec, R3DI_FRONT_MIC);
+			tmp = FLOAT_ONE;
+			break;
+		default:
+			tmp = FLOAT_ONE;
+			break;
+		}
+
+		chipio_set_conn_rate(codec, MEM_CONNID_MICIN1, SR_96_000);
+		chipio_set_conn_rate(codec, MEM_CONNID_MICOUT1, SR_96_000);
+		if (spec->quirk == QUIRK_R3DI)
+			chipio_set_conn_rate(codec, 0x0F, SR_96_000);
+
+		dspio_set_uint_param(codec, 0x80, 0x00, tmp);
+
+		chipio_set_stream_control(codec, 0x03, 1);
+		chipio_set_stream_control(codec, 0x04, 1);
+
+		if (spec->quirk == QUIRK_SBZ) {
+			chipio_write(codec, 0x18B098, 0x0000000C);
+			chipio_write(codec, 0x18B09C, 0x000000CC);
+		}
+		ca0132_alt_mic_boost_set(codec, spec->mic_boost_enum_val);
+		break;
+	}
+	ca0132_cvoice_switch_set(codec);
+
+	snd_hda_power_down_pm(codec);
+	return 0;
+
+}
+
+/*
  * Check if VNODE settings take effect immediately.
  */
 static bool ca0132_is_vnode_effective(struct hda_codec *codec,
@@ -3418,7 +4439,7 @@
 static int ca0132_effects_set(struct hda_codec *codec, hda_nid_t nid, long val)
 {
 	struct ca0132_spec *spec = codec->spec;
-	unsigned int on;
+	unsigned int on, tmp;
 	int num_fx = OUT_EFFECTS_COUNT + IN_EFFECTS_COUNT;
 	int err = 0;
 	int idx = nid - EFFECT_START_NID;
@@ -3442,6 +4463,46 @@
 		/* Voice Focus applies to 2-ch Mic, Digital Mic */
 		if ((nid == VOICE_FOCUS) && (spec->cur_mic_type != DIGITAL_MIC))
 			val = 0;
+
+		/* If Voice Focus on SBZ, set to two channel. */
+		if ((nid == VOICE_FOCUS) && (spec->quirk == QUIRK_SBZ)
+				&& (spec->cur_mic_type != REAR_LINE_IN)) {
+			if (spec->effects_switch[CRYSTAL_VOICE -
+						 EFFECT_START_NID]) {
+
+				if (spec->effects_switch[VOICE_FOCUS -
+							 EFFECT_START_NID]) {
+					tmp = FLOAT_TWO;
+					val = 1;
+				} else
+					tmp = FLOAT_ONE;
+
+				dspio_set_uint_param(codec, 0x80, 0x00, tmp);
+			}
+		}
+		/*
+		 * For SBZ noise reduction, there's an extra command
+		 * to module ID 0x47. No clue why.
+		 */
+		if ((nid == NOISE_REDUCTION) && (spec->quirk == QUIRK_SBZ)
+				&& (spec->cur_mic_type != REAR_LINE_IN)) {
+			if (spec->effects_switch[CRYSTAL_VOICE -
+						 EFFECT_START_NID]) {
+				if (spec->effects_switch[NOISE_REDUCTION -
+							 EFFECT_START_NID])
+					tmp = FLOAT_ONE;
+				else
+					tmp = FLOAT_ZERO;
+			} else
+				tmp = FLOAT_ZERO;
+
+			dspio_set_uint_param(codec, 0x47, 0x00, tmp);
+		}
+
+		/* If rear line in disable effects. */
+		if (spec->use_alt_functions &&
+				spec->in_enum_val == REAR_LINE_IN)
+			val = 0;
 	}
 
 	codec_dbg(codec, "ca0132_effect_set: nid=0x%x, val=%ld\n",
@@ -3469,6 +4530,9 @@
 	codec_dbg(codec, "ca0132_pe_switch_set: val=%ld\n",
 		    spec->effects_switch[PLAY_ENHANCEMENT - EFFECT_START_NID]);
 
+	if (spec->use_alt_functions)
+		ca0132_alt_select_out(codec);
+
 	i = OUT_EFFECT_START_NID - EFFECT_START_NID;
 	nid = OUT_EFFECT_START_NID;
 	/* PE affects all out effects */
@@ -3526,7 +4590,10 @@
 
 	/* set correct vipsource */
 	oldval = stop_mic1(codec);
-	ret |= ca0132_set_vipsource(codec, 1);
+	if (spec->use_alt_functions)
+		ret |= ca0132_alt_set_vipsource(codec, 1);
+	else
+		ret |= ca0132_set_vipsource(codec, 1);
 	resume_mic1(codec, oldval);
 	return ret;
 }
@@ -3546,6 +4613,16 @@
 	return ret;
 }
 
+static int ca0132_alt_mic_boost_set(struct hda_codec *codec, long val)
+{
+	struct ca0132_spec *spec = codec->spec;
+	int ret = 0;
+
+	ret = snd_hda_codec_amp_update(codec, spec->input_pins[0], 0,
+				HDA_INPUT, 0, HDA_AMP_VOLMASK, val);
+	return ret;
+}
+
 static int ca0132_vnode_switch_set(struct snd_kcontrol *kcontrol,
 				struct snd_ctl_elem_value *ucontrol)
 {
@@ -3560,8 +4637,12 @@
 	if (nid == VNID_HP_SEL) {
 		auto_jack =
 			spec->vnode_lswitch[VNID_HP_ASEL - VNODE_START_NID];
-		if (!auto_jack)
-			ca0132_select_out(codec);
+		if (!auto_jack) {
+			if (spec->use_alt_functions)
+				ca0132_alt_select_out(codec);
+			else
+				ca0132_select_out(codec);
+		}
 		return 1;
 	}
 
@@ -3574,7 +4655,10 @@
 	}
 
 	if (nid == VNID_HP_ASEL) {
-		ca0132_select_out(codec);
+		if (spec->use_alt_functions)
+			ca0132_alt_select_out(codec);
+		else
+			ca0132_select_out(codec);
 		return 1;
 	}
 
@@ -3602,6 +4686,432 @@
 	return ret;
 }
 /* End of control change helpers. */
+/*
+ * Below I've added controls to mess with the effect levels, I've only enabled
+ * them on the Sound Blaster Z, but they would probably also work on the
+ * Chromebook. I figured they were probably tuned specifically for it, and left
+ * out for a reason.
+ */
+
+/* Sets DSP effect level from the sliders above the controls */
+static int ca0132_alt_slider_ctl_set(struct hda_codec *codec, hda_nid_t nid,
+			  const unsigned int *lookup, int idx)
+{
+	int i = 0;
+	unsigned int y;
+	/*
+	 * For X_BASS, req 2 is actually crossover freq instead of
+	 * effect level
+	 */
+	if (nid == X_BASS)
+		y = 2;
+	else
+		y = 1;
+
+	snd_hda_power_up(codec);
+	if (nid == XBASS_XOVER) {
+		for (i = 0; i < OUT_EFFECTS_COUNT; i++)
+			if (ca0132_effects[i].nid == X_BASS)
+				break;
+
+		dspio_set_param(codec, ca0132_effects[i].mid, 0x20,
+				ca0132_effects[i].reqs[1],
+				&(lookup[idx - 1]), sizeof(unsigned int));
+	} else {
+		/* Find the actual effect structure */
+		for (i = 0; i < OUT_EFFECTS_COUNT; i++)
+			if (nid == ca0132_effects[i].nid)
+				break;
+
+		dspio_set_param(codec, ca0132_effects[i].mid, 0x20,
+				ca0132_effects[i].reqs[y],
+				&(lookup[idx]), sizeof(unsigned int));
+	}
+
+	snd_hda_power_down(codec);
+
+	return 0;
+}
+
+static int ca0132_alt_xbass_xover_slider_ctl_get(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+	long *valp = ucontrol->value.integer.value;
+
+	*valp = spec->xbass_xover_freq;
+	return 0;
+}
+
+static int ca0132_alt_slider_ctl_get(struct snd_kcontrol *kcontrol,
+			  struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+	hda_nid_t nid = get_amp_nid(kcontrol);
+	long *valp = ucontrol->value.integer.value;
+	int idx = nid - OUT_EFFECT_START_NID;
+
+	*valp = spec->fx_ctl_val[idx];
+	return 0;
+}
+
+/*
+ * The X-bass crossover starts at 10hz, so the min is 1. The
+ * frequency is set in multiples of 10.
+ */
+static int ca0132_alt_xbass_xover_slider_info(struct snd_kcontrol *kcontrol,
+		struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 1;
+	uinfo->value.integer.max = 100;
+	uinfo->value.integer.step = 1;
+
+	return 0;
+}
+
+static int ca0132_alt_effect_slider_info(struct snd_kcontrol *kcontrol,
+		struct snd_ctl_elem_info *uinfo)
+{
+	int chs = get_amp_channels(kcontrol);
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = chs == 3 ? 2 : 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = 100;
+	uinfo->value.integer.step = 1;
+
+	return 0;
+}
+
+static int ca0132_alt_xbass_xover_slider_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+	hda_nid_t nid = get_amp_nid(kcontrol);
+	long *valp = ucontrol->value.integer.value;
+	int idx;
+
+	/* any change? */
+	if (spec->xbass_xover_freq == *valp)
+		return 0;
+
+	spec->xbass_xover_freq = *valp;
+
+	idx = *valp;
+	ca0132_alt_slider_ctl_set(codec, nid, float_xbass_xover_lookup, idx);
+
+	return 0;
+}
+
+static int ca0132_alt_effect_slider_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+	hda_nid_t nid = get_amp_nid(kcontrol);
+	long *valp = ucontrol->value.integer.value;
+	int idx;
+
+	idx = nid - EFFECT_START_NID;
+	/* any change? */
+	if (spec->fx_ctl_val[idx] == *valp)
+		return 0;
+
+	spec->fx_ctl_val[idx] = *valp;
+
+	idx = *valp;
+	ca0132_alt_slider_ctl_set(codec, nid, float_zero_to_one_lookup, idx);
+
+	return 0;
+}
+
+
+/*
+ * Mic Boost Enum for alternative ca0132 codecs. I didn't like that the original
+ * only has off or full 30 dB, and didn't like making a volume slider that has
+ * traditional 0-100 in alsamixer that goes in big steps. I like enum better.
+ */
+#define MIC_BOOST_NUM_OF_STEPS 4
+#define MIC_BOOST_ENUM_MAX_STRLEN 10
+
+static int ca0132_alt_mic_boost_info(struct snd_kcontrol *kcontrol,
+				 struct snd_ctl_elem_info *uinfo)
+{
+	char *sfx = "dB";
+	char namestr[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = MIC_BOOST_NUM_OF_STEPS;
+	if (uinfo->value.enumerated.item >= MIC_BOOST_NUM_OF_STEPS)
+		uinfo->value.enumerated.item = MIC_BOOST_NUM_OF_STEPS - 1;
+	sprintf(namestr, "%d %s", (uinfo->value.enumerated.item * 10), sfx);
+	strcpy(uinfo->value.enumerated.name, namestr);
+	return 0;
+}
+
+static int ca0132_alt_mic_boost_get(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+
+	ucontrol->value.enumerated.item[0] = spec->mic_boost_enum_val;
+	return 0;
+}
+
+static int ca0132_alt_mic_boost_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+	int sel = ucontrol->value.enumerated.item[0];
+	unsigned int items = MIC_BOOST_NUM_OF_STEPS;
+
+	if (sel >= items)
+		return 0;
+
+	codec_dbg(codec, "ca0132_alt_mic_boost: boost=%d\n",
+		    sel);
+
+	spec->mic_boost_enum_val = sel;
+
+	if (spec->in_enum_val != REAR_LINE_IN)
+		ca0132_alt_mic_boost_set(codec, spec->mic_boost_enum_val);
+
+	return 1;
+}
+
+
+/*
+ * Input Select Control for alternative ca0132 codecs. This exists because
+ * front microphone has no auto-detect, and we need a way to set the rear
+ * as line-in
+ */
+static int ca0132_alt_input_source_info(struct snd_kcontrol *kcontrol,
+				 struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = IN_SRC_NUM_OF_INPUTS;
+	if (uinfo->value.enumerated.item >= IN_SRC_NUM_OF_INPUTS)
+		uinfo->value.enumerated.item = IN_SRC_NUM_OF_INPUTS - 1;
+	strcpy(uinfo->value.enumerated.name,
+			in_src_str[uinfo->value.enumerated.item]);
+	return 0;
+}
+
+static int ca0132_alt_input_source_get(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+
+	ucontrol->value.enumerated.item[0] = spec->in_enum_val;
+	return 0;
+}
+
+static int ca0132_alt_input_source_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+	int sel = ucontrol->value.enumerated.item[0];
+	unsigned int items = IN_SRC_NUM_OF_INPUTS;
+
+	if (sel >= items)
+		return 0;
+
+	codec_dbg(codec, "ca0132_alt_input_select: sel=%d, preset=%s\n",
+		    sel, in_src_str[sel]);
+
+	spec->in_enum_val = sel;
+
+	ca0132_alt_select_in(codec);
+
+	return 1;
+}
+
+/* Sound Blaster Z Output Select Control */
+static int ca0132_alt_output_select_get_info(struct snd_kcontrol *kcontrol,
+				 struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = NUM_OF_OUTPUTS;
+	if (uinfo->value.enumerated.item >= NUM_OF_OUTPUTS)
+		uinfo->value.enumerated.item = NUM_OF_OUTPUTS - 1;
+	strcpy(uinfo->value.enumerated.name,
+			alt_out_presets[uinfo->value.enumerated.item].name);
+	return 0;
+}
+
+static int ca0132_alt_output_select_get(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+
+	ucontrol->value.enumerated.item[0] = spec->out_enum_val;
+	return 0;
+}
+
+static int ca0132_alt_output_select_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+	int sel = ucontrol->value.enumerated.item[0];
+	unsigned int items = NUM_OF_OUTPUTS;
+	unsigned int auto_jack;
+
+	if (sel >= items)
+		return 0;
+
+	codec_dbg(codec, "ca0132_alt_output_select: sel=%d, preset=%s\n",
+		    sel, alt_out_presets[sel].name);
+
+	spec->out_enum_val = sel;
+
+	auto_jack = spec->vnode_lswitch[VNID_HP_ASEL - VNODE_START_NID];
+
+	if (!auto_jack)
+		ca0132_alt_select_out(codec);
+
+	return 1;
+}
+
+/*
+ * Smart Volume output setting control. Three different settings, Normal,
+ * which takes the value from the smart volume slider. The two others, loud
+ * and night, disregard the slider value and have uneditable values.
+ */
+#define NUM_OF_SVM_SETTINGS 3
+static const char *const out_svm_set_enum_str[3] = {"Normal", "Loud", "Night" };
+
+static int ca0132_alt_svm_setting_info(struct snd_kcontrol *kcontrol,
+				 struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = NUM_OF_SVM_SETTINGS;
+	if (uinfo->value.enumerated.item >= NUM_OF_SVM_SETTINGS)
+		uinfo->value.enumerated.item = NUM_OF_SVM_SETTINGS - 1;
+	strcpy(uinfo->value.enumerated.name,
+			out_svm_set_enum_str[uinfo->value.enumerated.item]);
+	return 0;
+}
+
+static int ca0132_alt_svm_setting_get(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+
+	ucontrol->value.enumerated.item[0] = spec->smart_volume_setting;
+	return 0;
+}
+
+static int ca0132_alt_svm_setting_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+	int sel = ucontrol->value.enumerated.item[0];
+	unsigned int items = NUM_OF_SVM_SETTINGS;
+	unsigned int idx = SMART_VOLUME - EFFECT_START_NID;
+	unsigned int tmp;
+
+	if (sel >= items)
+		return 0;
+
+	codec_dbg(codec, "ca0132_alt_svm_setting: sel=%d, preset=%s\n",
+		    sel, out_svm_set_enum_str[sel]);
+
+	spec->smart_volume_setting = sel;
+
+	switch (sel) {
+	case 0:
+		tmp = FLOAT_ZERO;
+		break;
+	case 1:
+		tmp = FLOAT_ONE;
+		break;
+	case 2:
+		tmp = FLOAT_TWO;
+		break;
+	default:
+		tmp = FLOAT_ZERO;
+		break;
+	}
+	/* Req 2 is the Smart Volume Setting req. */
+	dspio_set_uint_param(codec, ca0132_effects[idx].mid,
+			ca0132_effects[idx].reqs[2], tmp);
+	return 1;
+}
+
+/* Sound Blaster Z EQ preset controls */
+static int ca0132_alt_eq_preset_info(struct snd_kcontrol *kcontrol,
+				 struct snd_ctl_elem_info *uinfo)
+{
+	unsigned int items = ARRAY_SIZE(ca0132_alt_eq_presets);
+
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+	uinfo->count = 1;
+	uinfo->value.enumerated.items = items;
+	if (uinfo->value.enumerated.item >= items)
+		uinfo->value.enumerated.item = items - 1;
+	strcpy(uinfo->value.enumerated.name,
+		ca0132_alt_eq_presets[uinfo->value.enumerated.item].name);
+	return 0;
+}
+
+static int ca0132_alt_eq_preset_get(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+
+	ucontrol->value.enumerated.item[0] = spec->eq_preset_val;
+	return 0;
+}
+
+static int ca0132_alt_eq_preset_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+	int i, err = 0;
+	int sel = ucontrol->value.enumerated.item[0];
+	unsigned int items = ARRAY_SIZE(ca0132_alt_eq_presets);
+
+	if (sel >= items)
+		return 0;
+
+	codec_dbg(codec, "%s: sel=%d, preset=%s\n", __func__, sel,
+			ca0132_alt_eq_presets[sel].name);
+	/*
+	 * Idx 0 is default.
+	 * Default needs to qualify with CrystalVoice state.
+	 */
+	for (i = 0; i < EQ_PRESET_MAX_PARAM_COUNT; i++) {
+		err = dspio_set_uint_param(codec, ca0132_alt_eq_enum.mid,
+				ca0132_alt_eq_enum.reqs[i],
+				ca0132_alt_eq_presets[sel].vals[i]);
+		if (err < 0)
+			break;
+	}
+
+	if (err >= 0)
+		spec->eq_preset_val = sel;
+
+	return 1;
+}
 
 static int ca0132_voicefx_info(struct snd_kcontrol *kcontrol,
 				 struct snd_ctl_elem_info *uinfo)
@@ -3753,10 +5263,15 @@
 	/* mic boost */
 	if (nid == spec->input_pins[0]) {
 		spec->cur_mic_boost = *valp;
+		if (spec->use_alt_functions) {
+			if (spec->in_enum_val != REAR_LINE_IN)
+				changed = ca0132_mic_boost_set(codec, *valp);
+		} else {
+			/* Mic boost does not apply to Digital Mic */
+			if (spec->cur_mic_type != DIGITAL_MIC)
+				changed = ca0132_mic_boost_set(codec, *valp);
+		}
 
-		/* Mic boost does not apply to Digital Mic */
-		if (spec->cur_mic_type != DIGITAL_MIC)
-			changed = ca0132_mic_boost_set(codec, *valp);
 		goto exit;
 	}
 
@@ -3768,6 +5283,41 @@
 /*
  * Volume related
  */
+/*
+ * Sets the internal DSP decibel level to match the DAC for output, and the
+ * ADC for input. Currently only the SBZ sets dsp capture volume level, and
+ * all alternative codecs set DSP playback volume.
+ */
+static void ca0132_alt_dsp_volume_put(struct hda_codec *codec, hda_nid_t nid)
+{
+	struct ca0132_spec *spec = codec->spec;
+	unsigned int dsp_dir;
+	unsigned int lookup_val;
+
+	if (nid == VNID_SPK)
+		dsp_dir = DSP_VOL_OUT;
+	else
+		dsp_dir = DSP_VOL_IN;
+
+	lookup_val = spec->vnode_lvol[nid - VNODE_START_NID];
+
+	dspio_set_uint_param(codec,
+		ca0132_alt_vol_ctls[dsp_dir].mid,
+		ca0132_alt_vol_ctls[dsp_dir].reqs[0],
+		float_vol_db_lookup[lookup_val]);
+
+	lookup_val = spec->vnode_rvol[nid - VNODE_START_NID];
+
+	dspio_set_uint_param(codec,
+		ca0132_alt_vol_ctls[dsp_dir].mid,
+		ca0132_alt_vol_ctls[dsp_dir].reqs[1],
+		float_vol_db_lookup[lookup_val]);
+
+	dspio_set_uint_param(codec,
+		ca0132_alt_vol_ctls[dsp_dir].mid,
+		ca0132_alt_vol_ctls[dsp_dir].reqs[2], FLOAT_ZERO);
+}
+
 static int ca0132_volume_info(struct snd_kcontrol *kcontrol,
 			      struct snd_ctl_elem_info *uinfo)
 {
@@ -3869,6 +5419,51 @@
 	return changed;
 }
 
+/*
+ * This function is the same as the one above, because using an if statement
+ * inside of the above volume control for the DSP volume would cause too much
+ * lag. This is a lot more smooth.
+ */
+static int ca0132_alt_volume_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+	struct ca0132_spec *spec = codec->spec;
+	hda_nid_t nid = get_amp_nid(kcontrol);
+	int ch = get_amp_channels(kcontrol);
+	long *valp = ucontrol->value.integer.value;
+	hda_nid_t vnid = 0;
+	int changed = 1;
+
+	switch (nid) {
+	case 0x02:
+		vnid = VNID_SPK;
+		break;
+	case 0x07:
+		vnid = VNID_MIC;
+		break;
+	}
+
+	/* store the left and right volume */
+	if (ch & 1) {
+		spec->vnode_lvol[vnid - VNODE_START_NID] = *valp;
+		valp++;
+	}
+	if (ch & 2) {
+		spec->vnode_rvol[vnid - VNODE_START_NID] = *valp;
+		valp++;
+	}
+
+	snd_hda_power_up(codec);
+	ca0132_alt_dsp_volume_put(codec, vnid);
+	mutex_lock(&codec->control_mutex);
+	changed = snd_hda_mixer_amp_volume_put(kcontrol, ucontrol);
+	mutex_unlock(&codec->control_mutex);
+	snd_hda_power_down(codec);
+
+	return changed;
+}
+
 static int ca0132_volume_tlv(struct snd_kcontrol *kcontrol, int op_flag,
 			     unsigned int size, unsigned int __user *tlv)
 {
@@ -3907,14 +5502,59 @@
 	return err;
 }
 
-static int add_fx_switch(struct hda_codec *codec, hda_nid_t nid,
-			 const char *pfx, int dir)
+/* Add volume slider control for effect level */
+static int ca0132_alt_add_effect_slider(struct hda_codec *codec, hda_nid_t nid,
+					const char *pfx, int dir)
 {
 	char namestr[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
 	int type = dir ? HDA_INPUT : HDA_OUTPUT;
 	struct snd_kcontrol_new knew =
+		HDA_CODEC_VOLUME_MONO(namestr, nid, 1, 0, type);
+
+	sprintf(namestr, "FX: %s %s Volume", pfx, dirstr[dir]);
+
+	knew.tlv.c = 0;
+	knew.tlv.p = 0;
+
+	switch (nid) {
+	case XBASS_XOVER:
+		knew.info = ca0132_alt_xbass_xover_slider_info;
+		knew.get = ca0132_alt_xbass_xover_slider_ctl_get;
+		knew.put = ca0132_alt_xbass_xover_slider_put;
+		break;
+	default:
+		knew.info = ca0132_alt_effect_slider_info;
+		knew.get = ca0132_alt_slider_ctl_get;
+		knew.put = ca0132_alt_effect_slider_put;
+		knew.private_value =
+			HDA_COMPOSE_AMP_VAL(nid, 1, 0, type);
+		break;
+	}
+
+	return snd_hda_ctl_add(codec, nid, snd_ctl_new1(&knew, codec));
+}
+
+/*
+ * Added FX: prefix for the alternative codecs, because otherwise the surround
+ * effect would conflict with the Surround sound volume control. Also seems more
+ * clear as to what the switches do. Left alone for others.
+ */
+static int add_fx_switch(struct hda_codec *codec, hda_nid_t nid,
+			 const char *pfx, int dir)
+{
+	struct ca0132_spec *spec = codec->spec;
+	char namestr[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
+	int type = dir ? HDA_INPUT : HDA_OUTPUT;
+	struct snd_kcontrol_new knew =
 		CA0132_CODEC_MUTE_MONO(namestr, nid, 1, type);
-	sprintf(namestr, "%s %s Switch", pfx, dirstr[dir]);
+	/* If using alt_controls, add FX: prefix. But, don't add FX:
+	 * prefix to OutFX or InFX enable controls.
+	 */
+	if ((spec->use_alt_controls) && (nid <= IN_EFFECT_END_NID))
+		sprintf(namestr, "FX: %s %s Switch", pfx, dirstr[dir]);
+	else
+		sprintf(namestr, "%s %s Switch", pfx, dirstr[dir]);
+
 	return snd_hda_ctl_add(codec, nid, snd_ctl_new1(&knew, codec));
 }
 
@@ -3929,11 +5569,141 @@
 	return snd_hda_ctl_add(codec, VOICEFX, snd_ctl_new1(&knew, codec));
 }
 
+/* Create the EQ Preset control */
+static int add_ca0132_alt_eq_presets(struct hda_codec *codec)
+{
+	struct snd_kcontrol_new knew =
+		HDA_CODEC_MUTE_MONO(ca0132_alt_eq_enum.name,
+				    EQ_PRESET_ENUM, 1, 0, HDA_OUTPUT);
+	knew.info = ca0132_alt_eq_preset_info;
+	knew.get = ca0132_alt_eq_preset_get;
+	knew.put = ca0132_alt_eq_preset_put;
+	return snd_hda_ctl_add(codec, EQ_PRESET_ENUM,
+				snd_ctl_new1(&knew, codec));
+}
+
+/*
+ * Add enumerated control for the three different settings of the smart volume
+ * output effect. Normal just uses the slider value, and loud and night are
+ * their own things that ignore that value.
+ */
+static int ca0132_alt_add_svm_enum(struct hda_codec *codec)
+{
+	struct snd_kcontrol_new knew =
+		HDA_CODEC_MUTE_MONO("FX: Smart Volume Setting",
+				    SMART_VOLUME_ENUM, 1, 0, HDA_OUTPUT);
+	knew.info = ca0132_alt_svm_setting_info;
+	knew.get = ca0132_alt_svm_setting_get;
+	knew.put = ca0132_alt_svm_setting_put;
+	return snd_hda_ctl_add(codec, SMART_VOLUME_ENUM,
+				snd_ctl_new1(&knew, codec));
+
+}
+
+/*
+ * Create an Output Select enumerated control for codecs with surround
+ * out capabilities.
+ */
+static int ca0132_alt_add_output_enum(struct hda_codec *codec)
+{
+	struct snd_kcontrol_new knew =
+		HDA_CODEC_MUTE_MONO("Output Select",
+				    OUTPUT_SOURCE_ENUM, 1, 0, HDA_OUTPUT);
+	knew.info = ca0132_alt_output_select_get_info;
+	knew.get = ca0132_alt_output_select_get;
+	knew.put = ca0132_alt_output_select_put;
+	return snd_hda_ctl_add(codec, OUTPUT_SOURCE_ENUM,
+				snd_ctl_new1(&knew, codec));
+}
+
+/*
+ * Create an Input Source enumerated control for the alternate ca0132 codecs
+ * because the front microphone has no auto-detect, and Line-in has to be set
+ * somehow.
+ */
+static int ca0132_alt_add_input_enum(struct hda_codec *codec)
+{
+	struct snd_kcontrol_new knew =
+		HDA_CODEC_MUTE_MONO("Input Source",
+				    INPUT_SOURCE_ENUM, 1, 0, HDA_INPUT);
+	knew.info = ca0132_alt_input_source_info;
+	knew.get = ca0132_alt_input_source_get;
+	knew.put = ca0132_alt_input_source_put;
+	return snd_hda_ctl_add(codec, INPUT_SOURCE_ENUM,
+				snd_ctl_new1(&knew, codec));
+}
+
+/*
+ * Add mic boost enumerated control. Switches through 0dB to 30dB. This adds
+ * more control than the original mic boost, which is either full 30dB or off.
+ */
+static int ca0132_alt_add_mic_boost_enum(struct hda_codec *codec)
+{
+	struct snd_kcontrol_new knew =
+		HDA_CODEC_MUTE_MONO("Mic Boost Capture Switch",
+				    MIC_BOOST_ENUM, 1, 0, HDA_INPUT);
+	knew.info = ca0132_alt_mic_boost_info;
+	knew.get = ca0132_alt_mic_boost_get;
+	knew.put = ca0132_alt_mic_boost_put;
+	return snd_hda_ctl_add(codec, MIC_BOOST_ENUM,
+				snd_ctl_new1(&knew, codec));
+
+}
+
+/*
+ * Need to create slave controls for the alternate codecs that have surround
+ * capabilities.
+ */
+static const char * const ca0132_alt_slave_pfxs[] = {
+	"Front", "Surround", "Center", "LFE", NULL,
+};
+
+/*
+ * Also need special channel map, because the default one is incorrect.
+ * I think this has to do with the pin for rear surround being 0x11,
+ * and the center/lfe being 0x10. Usually the pin order is the opposite.
+ */
+const struct snd_pcm_chmap_elem ca0132_alt_chmaps[] = {
+	{ .channels = 2,
+	  .map = { SNDRV_CHMAP_FL, SNDRV_CHMAP_FR } },
+	{ .channels = 4,
+	  .map = { SNDRV_CHMAP_FL, SNDRV_CHMAP_FR,
+		   SNDRV_CHMAP_RL, SNDRV_CHMAP_RR } },
+	{ .channels = 6,
+	  .map = { SNDRV_CHMAP_FL, SNDRV_CHMAP_FR,
+		   SNDRV_CHMAP_FC, SNDRV_CHMAP_LFE,
+		   SNDRV_CHMAP_RL, SNDRV_CHMAP_RR } },
+	{ }
+};
+
+/* Add the correct chmap for streams with 6 channels. */
+static void ca0132_alt_add_chmap_ctls(struct hda_codec *codec)
+{
+	int err = 0;
+	struct hda_pcm *pcm;
+
+	list_for_each_entry(pcm, &codec->pcm_list_head, list) {
+		struct hda_pcm_stream *hinfo =
+			&pcm->stream[SNDRV_PCM_STREAM_PLAYBACK];
+		struct snd_pcm_chmap *chmap;
+		const struct snd_pcm_chmap_elem *elem;
+
+		elem = ca0132_alt_chmaps;
+		if (hinfo->channels_max == 6) {
+			err = snd_pcm_add_chmap_ctls(pcm->pcm,
+					SNDRV_PCM_STREAM_PLAYBACK,
+					elem, hinfo->channels_max, 0, &chmap);
+			if (err < 0)
+				codec_dbg(codec, "snd_pcm_add_chmap_ctls failed!");
+		}
+	}
+}
+
 /*
  * When changing Node IDs for Mixer Controls below, make sure to update
  * Node IDs in ca0132_config() as well.
  */
-static struct snd_kcontrol_new ca0132_mixer[] = {
+static const struct snd_kcontrol_new ca0132_mixer[] = {
 	CA0132_CODEC_VOL("Master Playback Volume", VNID_SPK, HDA_OUTPUT),
 	CA0132_CODEC_MUTE("Master Playback Switch", VNID_SPK, HDA_OUTPUT),
 	CA0132_CODEC_VOL("Capture Volume", VNID_MIC, HDA_INPUT),
@@ -3955,10 +5725,55 @@
 	{ } /* end */
 };
 
+/*
+ * SBZ specific control mixer. Removes auto-detect for mic, and adds surround
+ * controls. Also sets both the Front Playback and Capture Volume controls to
+ * alt so they set the DSP's decibel level.
+ */
+static const struct snd_kcontrol_new sbz_mixer[] = {
+	CA0132_ALT_CODEC_VOL("Front Playback Volume", 0x02, HDA_OUTPUT),
+	CA0132_CODEC_MUTE("Front Playback Switch", VNID_SPK, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Surround Playback Volume", 0x04, 0, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Surround Playback Switch", 0x04, 0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x03, 1, 0, HDA_OUTPUT),
+	HDA_CODEC_MUTE_MONO("Center Playback Switch", 0x03, 1, 0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x03, 2, 0, HDA_OUTPUT),
+	HDA_CODEC_MUTE_MONO("LFE Playback Switch", 0x03, 2, 0, HDA_OUTPUT),
+	CA0132_ALT_CODEC_VOL("Capture Volume", 0x07, HDA_INPUT),
+	CA0132_CODEC_MUTE("Capture Switch", VNID_MIC, HDA_INPUT),
+	HDA_CODEC_VOLUME("What U Hear Capture Volume", 0x0a, 0, HDA_INPUT),
+	HDA_CODEC_MUTE("What U Hear Capture Switch", 0x0a, 0, HDA_INPUT),
+	CA0132_CODEC_MUTE_MONO("HP/Speaker Auto Detect Playback Switch",
+				VNID_HP_ASEL, 1, HDA_OUTPUT),
+	{ } /* end */
+};
+
+/*
+ * Same as the Sound Blaster Z, except doesn't use the alt volume for capture
+ * because it doesn't set decibel levels for the DSP for capture.
+ */
+static const struct snd_kcontrol_new r3di_mixer[] = {
+	CA0132_ALT_CODEC_VOL("Front Playback Volume", 0x02, HDA_OUTPUT),
+	CA0132_CODEC_MUTE("Front Playback Switch", VNID_SPK, HDA_OUTPUT),
+	HDA_CODEC_VOLUME("Surround Playback Volume", 0x04, 0, HDA_OUTPUT),
+	HDA_CODEC_MUTE("Surround Playback Switch", 0x04, 0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x03, 1, 0, HDA_OUTPUT),
+	HDA_CODEC_MUTE_MONO("Center Playback Switch", 0x03, 1, 0, HDA_OUTPUT),
+	HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x03, 2, 0, HDA_OUTPUT),
+	HDA_CODEC_MUTE_MONO("LFE Playback Switch", 0x03, 2, 0, HDA_OUTPUT),
+	CA0132_CODEC_VOL("Capture Volume", VNID_MIC, HDA_INPUT),
+	CA0132_CODEC_MUTE("Capture Switch", VNID_MIC, HDA_INPUT),
+	HDA_CODEC_VOLUME("What U Hear Capture Volume", 0x0a, 0, HDA_INPUT),
+	HDA_CODEC_MUTE("What U Hear Capture Switch", 0x0a, 0, HDA_INPUT),
+	CA0132_CODEC_MUTE_MONO("HP/Speaker Auto Detect Playback Switch",
+				VNID_HP_ASEL, 1, HDA_OUTPUT),
+	{ } /* end */
+};
+
 static int ca0132_build_controls(struct hda_codec *codec)
 {
 	struct ca0132_spec *spec = codec->spec;
-	int i, num_fx;
+	int i, num_fx, num_sliders;
 	int err = 0;
 
 	/* Add Mixer controls */
@@ -3967,29 +5782,94 @@
 		if (err < 0)
 			return err;
 	}
+	/* Setup vmaster with surround slaves for desktop ca0132 devices */
+	if (spec->use_alt_functions) {
+		snd_hda_set_vmaster_tlv(codec, spec->dacs[0], HDA_OUTPUT,
+					spec->tlv);
+		snd_hda_add_vmaster(codec, "Master Playback Volume",
+					spec->tlv, ca0132_alt_slave_pfxs,
+					"Playback Volume");
+		err = __snd_hda_add_vmaster(codec, "Master Playback Switch",
+					    NULL, ca0132_alt_slave_pfxs,
+					    "Playback Switch",
+					    true, &spec->vmaster_mute.sw_kctl);
+
+	}
 
 	/* Add in and out effects controls.
 	 * VoiceFX, PE and CrystalVoice are added separately.
 	 */
 	num_fx = OUT_EFFECTS_COUNT + IN_EFFECTS_COUNT;
 	for (i = 0; i < num_fx; i++) {
+		/* SBZ breaks if Echo Cancellation is used */
+		if (spec->quirk == QUIRK_SBZ) {
+			if (i == (ECHO_CANCELLATION - IN_EFFECT_START_NID +
+						OUT_EFFECTS_COUNT))
+				continue;
+		}
+
 		err = add_fx_switch(codec, ca0132_effects[i].nid,
 				    ca0132_effects[i].name,
 				    ca0132_effects[i].direct);
 		if (err < 0)
 			return err;
 	}
+	/*
+	 * If codec has use_alt_controls set to true, add effect level sliders,
+	 * EQ presets, and Smart Volume presets. Also, change names to add FX
+	 * prefix, and change PlayEnhancement and CrystalVoice to match.
+	 */
+	if (spec->use_alt_controls) {
+		ca0132_alt_add_svm_enum(codec);
+		add_ca0132_alt_eq_presets(codec);
+		err = add_fx_switch(codec, PLAY_ENHANCEMENT,
+					"Enable OutFX", 0);
+		if (err < 0)
+			return err;
 
-	err = add_fx_switch(codec, PLAY_ENHANCEMENT, "PlayEnhancement", 0);
-	if (err < 0)
-		return err;
+		err = add_fx_switch(codec, CRYSTAL_VOICE,
+					"Enable InFX", 1);
+		if (err < 0)
+			return err;
 
-	err = add_fx_switch(codec, CRYSTAL_VOICE, "CrystalVoice", 1);
-	if (err < 0)
-		return err;
+		num_sliders = OUT_EFFECTS_COUNT - 1;
+		for (i = 0; i < num_sliders; i++) {
+			err = ca0132_alt_add_effect_slider(codec,
+					    ca0132_effects[i].nid,
+					    ca0132_effects[i].name,
+					    ca0132_effects[i].direct);
+			if (err < 0)
+				return err;
+		}
 
+		err = ca0132_alt_add_effect_slider(codec, XBASS_XOVER,
+					"X-Bass Crossover", EFX_DIR_OUT);
+
+		if (err < 0)
+			return err;
+	} else {
+		err = add_fx_switch(codec, PLAY_ENHANCEMENT,
+					"PlayEnhancement", 0);
+		if (err < 0)
+			return err;
+
+		err = add_fx_switch(codec, CRYSTAL_VOICE,
+					"CrystalVoice", 1);
+		if (err < 0)
+			return err;
+	}
 	add_voicefx(codec);
 
+	/*
+	 * If the codec uses alt_functions, you need the enumerated controls
+	 * to select the new outputs and inputs, plus add the new mic boost
+	 * setting control.
+	 */
+	if (spec->use_alt_functions) {
+		ca0132_alt_add_output_enum(codec);
+		ca0132_alt_add_input_enum(codec);
+		ca0132_alt_add_mic_boost_enum(codec);
+	}
 #ifdef ENABLE_TUNING_CONTROLS
 	add_tuning_ctls(codec);
 #endif
@@ -4014,6 +5894,10 @@
 		if (err < 0)
 			return err;
 	}
+
+	if (spec->use_alt_functions)
+		ca0132_alt_add_chmap_ctls(codec);
+
 	return 0;
 }
 
@@ -4068,6 +5952,11 @@
 	info = snd_hda_codec_pcm_new(codec, "CA0132 Analog");
 	if (!info)
 		return -ENOMEM;
+	if (spec->use_alt_functions) {
+		info->own_chmap = true;
+		info->stream[SNDRV_PCM_STREAM_PLAYBACK].chmap
+			= ca0132_alt_chmaps;
+	}
 	info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ca0132_pcm_analog_playback;
 	info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->dacs[0];
 	info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max =
@@ -4076,12 +5965,16 @@
 	info->stream[SNDRV_PCM_STREAM_CAPTURE].substreams = 1;
 	info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adcs[0];
 
-	info = snd_hda_codec_pcm_new(codec, "CA0132 Analog Mic-In2");
-	if (!info)
-		return -ENOMEM;
-	info->stream[SNDRV_PCM_STREAM_CAPTURE] = ca0132_pcm_analog_capture;
-	info->stream[SNDRV_PCM_STREAM_CAPTURE].substreams = 1;
-	info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adcs[1];
+	/* With the DSP enabled, desktops don't use this ADC. */
+	if (spec->use_alt_functions) {
+		info = snd_hda_codec_pcm_new(codec, "CA0132 Analog Mic-In2");
+		if (!info)
+			return -ENOMEM;
+		info->stream[SNDRV_PCM_STREAM_CAPTURE] =
+			ca0132_pcm_analog_capture;
+		info->stream[SNDRV_PCM_STREAM_CAPTURE].substreams = 1;
+		info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adcs[1];
+	}
 
 	info = snd_hda_codec_pcm_new(codec, "CA0132 What U Hear");
 	if (!info)
@@ -4288,6 +6181,196 @@
 }
 
 /*
+ * Recon3Di r3di_setup_defaults sub functions.
+ */
+
+static void r3di_dsp_scp_startup(struct hda_codec *codec)
+{
+	unsigned int tmp;
+
+	tmp = 0x00000000;
+	dspio_set_uint_param_no_source(codec, 0x80, 0x0A, tmp);
+
+	tmp = 0x00000001;
+	dspio_set_uint_param_no_source(codec, 0x80, 0x0B, tmp);
+
+	tmp = 0x00000004;
+	dspio_set_uint_param_no_source(codec, 0x80, 0x0C, tmp);
+
+	tmp = 0x00000005;
+	dspio_set_uint_param_no_source(codec, 0x80, 0x0C, tmp);
+
+	tmp = 0x00000000;
+	dspio_set_uint_param_no_source(codec, 0x80, 0x0C, tmp);
+
+}
+
+static void r3di_dsp_initial_mic_setup(struct hda_codec *codec)
+{
+	unsigned int tmp;
+
+	/* Mic 1 Setup */
+	chipio_set_conn_rate(codec, MEM_CONNID_MICIN1, SR_96_000);
+	chipio_set_conn_rate(codec, MEM_CONNID_MICOUT1, SR_96_000);
+	/* This ConnPointID is unique to Recon3Di. Haven't seen it elsewhere */
+	chipio_set_conn_rate(codec, 0x0F, SR_96_000);
+	tmp = FLOAT_ONE;
+	dspio_set_uint_param(codec, 0x80, 0x00, tmp);
+
+	/* Mic 2 Setup, even though it isn't connected on SBZ */
+	chipio_set_conn_rate(codec, MEM_CONNID_MICIN2, SR_96_000);
+	chipio_set_conn_rate(codec, MEM_CONNID_MICOUT2, SR_96_000);
+	chipio_set_conn_rate(codec, 0x0F, SR_96_000);
+	tmp = FLOAT_ZERO;
+	dspio_set_uint_param(codec, 0x80, 0x01, tmp);
+}
+
+/*
+ * Initialize Sound Blaster Z analog microphones.
+ */
+static void sbz_init_analog_mics(struct hda_codec *codec)
+{
+	unsigned int tmp;
+
+	/* Mic 1 Setup */
+	chipio_set_conn_rate(codec, MEM_CONNID_MICIN1, SR_96_000);
+	chipio_set_conn_rate(codec, MEM_CONNID_MICOUT1, SR_96_000);
+	tmp = FLOAT_THREE;
+	dspio_set_uint_param(codec, 0x80, 0x00, tmp);
+
+	/* Mic 2 Setup, even though it isn't connected on SBZ */
+	chipio_set_conn_rate(codec, MEM_CONNID_MICIN2, SR_96_000);
+	chipio_set_conn_rate(codec, MEM_CONNID_MICOUT2, SR_96_000);
+	tmp = FLOAT_ZERO;
+	dspio_set_uint_param(codec, 0x80, 0x01, tmp);
+
+}
+
+/*
+ * Sets the source of stream 0x14 to connpointID 0x48, and the destination
+ * connpointID to 0x91. If this isn't done, the destination is 0x71, and
+ * you get no sound. I'm guessing this has to do with the Sound Blaster Z
+ * having an updated DAC, which changes the destination to that DAC.
+ */
+static void sbz_connect_streams(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+
+	mutex_lock(&spec->chipio_mutex);
+
+	codec_dbg(codec, "Connect Streams entered, mutex locked and loaded.\n");
+
+	chipio_set_stream_channels(codec, 0x0C, 6);
+	chipio_set_stream_control(codec, 0x0C, 1);
+
+	/* This value is 0x43 for 96khz, and 0x83 for 192khz. */
+	chipio_write_no_mutex(codec, 0x18a020, 0x00000043);
+
+	/* Setup stream 0x14 with it's source and destination points */
+	chipio_set_stream_source_dest(codec, 0x14, 0x48, 0x91);
+	chipio_set_conn_rate_no_mutex(codec, 0x48, SR_96_000);
+	chipio_set_conn_rate_no_mutex(codec, 0x91, SR_96_000);
+	chipio_set_stream_channels(codec, 0x14, 2);
+	chipio_set_stream_control(codec, 0x14, 1);
+
+	codec_dbg(codec, "Connect Streams exited, mutex released.\n");
+
+	mutex_unlock(&spec->chipio_mutex);
+
+}
+
+/*
+ * Write data through ChipIO to setup proper stream destinations.
+ * Not sure how it exactly works, but it seems to direct data
+ * to different destinations. Example is f8 to c0, e0 to c0.
+ * All I know is, if you don't set these, you get no sound.
+ */
+static void sbz_chipio_startup_data(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+
+	mutex_lock(&spec->chipio_mutex);
+	codec_dbg(codec, "Startup Data entered, mutex locked and loaded.\n");
+
+	/* These control audio output */
+	chipio_write_no_mutex(codec, 0x190060, 0x0001f8c0);
+	chipio_write_no_mutex(codec, 0x190064, 0x0001f9c1);
+	chipio_write_no_mutex(codec, 0x190068, 0x0001fac6);
+	chipio_write_no_mutex(codec, 0x19006c, 0x0001fbc7);
+	/* Signal to update I think */
+	chipio_write_no_mutex(codec, 0x19042c, 0x00000001);
+
+	chipio_set_stream_channels(codec, 0x0C, 6);
+	chipio_set_stream_control(codec, 0x0C, 1);
+	/* No clue what these control */
+	chipio_write_no_mutex(codec, 0x190030, 0x0001e0c0);
+	chipio_write_no_mutex(codec, 0x190034, 0x0001e1c1);
+	chipio_write_no_mutex(codec, 0x190038, 0x0001e4c2);
+	chipio_write_no_mutex(codec, 0x19003c, 0x0001e5c3);
+	chipio_write_no_mutex(codec, 0x190040, 0x0001e2c4);
+	chipio_write_no_mutex(codec, 0x190044, 0x0001e3c5);
+	chipio_write_no_mutex(codec, 0x190048, 0x0001e8c6);
+	chipio_write_no_mutex(codec, 0x19004c, 0x0001e9c7);
+	chipio_write_no_mutex(codec, 0x190050, 0x0001ecc8);
+	chipio_write_no_mutex(codec, 0x190054, 0x0001edc9);
+	chipio_write_no_mutex(codec, 0x190058, 0x0001eaca);
+	chipio_write_no_mutex(codec, 0x19005c, 0x0001ebcb);
+
+	chipio_write_no_mutex(codec, 0x19042c, 0x00000001);
+
+	codec_dbg(codec, "Startup Data exited, mutex released.\n");
+	mutex_unlock(&spec->chipio_mutex);
+}
+
+/*
+ * Sound Blaster Z uses these after DSP is loaded. Weird SCP commands
+ * without a 0x20 source like normal.
+ */
+static void sbz_dsp_scp_startup(struct hda_codec *codec)
+{
+	unsigned int tmp;
+
+	tmp = 0x00000003;
+	dspio_set_uint_param_no_source(codec, 0x80, 0x0C, tmp);
+
+	tmp = 0x00000000;
+	dspio_set_uint_param_no_source(codec, 0x80, 0x0A, tmp);
+
+	tmp = 0x00000001;
+	dspio_set_uint_param_no_source(codec, 0x80, 0x0B, tmp);
+
+	tmp = 0x00000004;
+	dspio_set_uint_param_no_source(codec, 0x80, 0x0C, tmp);
+
+	tmp = 0x00000005;
+	dspio_set_uint_param_no_source(codec, 0x80, 0x0C, tmp);
+
+	tmp = 0x00000000;
+	dspio_set_uint_param_no_source(codec, 0x80, 0x0C, tmp);
+
+}
+
+static void sbz_dsp_initial_mic_setup(struct hda_codec *codec)
+{
+	unsigned int tmp;
+
+	chipio_set_stream_control(codec, 0x03, 0);
+	chipio_set_stream_control(codec, 0x04, 0);
+
+	chipio_set_conn_rate(codec, MEM_CONNID_MICIN1, SR_96_000);
+	chipio_set_conn_rate(codec, MEM_CONNID_MICOUT1, SR_96_000);
+
+	tmp = FLOAT_THREE;
+	dspio_set_uint_param(codec, 0x80, 0x00, tmp);
+
+	chipio_set_stream_control(codec, 0x03, 1);
+	chipio_set_stream_control(codec, 0x04, 1);
+
+	chipio_write(codec, 0x18b098, 0x0000000c);
+	chipio_write(codec, 0x18b09C, 0x0000000c);
+}
+
+/*
  * Setup default parameters for DSP
  */
 static void ca0132_setup_defaults(struct hda_codec *codec)
@@ -4332,16 +6415,159 @@
 }
 
 /*
+ * Setup default parameters for Recon3Di DSP.
+ */
+
+static void r3di_setup_defaults(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+	unsigned int tmp;
+	int num_fx;
+	int idx, i;
+
+	if (spec->dsp_state != DSP_DOWNLOADED)
+		return;
+
+	r3di_dsp_scp_startup(codec);
+
+	r3di_dsp_initial_mic_setup(codec);
+
+	/*remove DSP headroom*/
+	tmp = FLOAT_ZERO;
+	dspio_set_uint_param(codec, 0x96, 0x3C, tmp);
+
+	/* set WUH source */
+	tmp = FLOAT_TWO;
+	dspio_set_uint_param(codec, 0x31, 0x00, tmp);
+	chipio_set_conn_rate(codec, MEM_CONNID_WUH, SR_48_000);
+
+	/* Set speaker source? */
+	dspio_set_uint_param(codec, 0x32, 0x00, tmp);
+
+	r3di_gpio_dsp_status_set(codec, R3DI_DSP_DOWNLOADED);
+
+	/* Setup effect defaults */
+	num_fx = OUT_EFFECTS_COUNT + IN_EFFECTS_COUNT + 1;
+	for (idx = 0; idx < num_fx; idx++) {
+		for (i = 0; i <= ca0132_effects[idx].params; i++) {
+			dspio_set_uint_param(codec,
+					ca0132_effects[idx].mid,
+					ca0132_effects[idx].reqs[i],
+					ca0132_effects[idx].def_vals[i]);
+		}
+	}
+
+}
+
+/*
+ * Setup default parameters for the Sound Blaster Z DSP. A lot more going on
+ * than the Chromebook setup.
+ */
+static void sbz_setup_defaults(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+	unsigned int tmp, stream_format;
+	int num_fx;
+	int idx, i;
+
+	if (spec->dsp_state != DSP_DOWNLOADED)
+		return;
+
+	sbz_dsp_scp_startup(codec);
+
+	sbz_init_analog_mics(codec);
+
+	sbz_connect_streams(codec);
+
+	sbz_chipio_startup_data(codec);
+
+	chipio_set_stream_control(codec, 0x03, 1);
+	chipio_set_stream_control(codec, 0x04, 1);
+
+	/*
+	 * Sets internal input loopback to off, used to have a switch to
+	 * enable input loopback, but turned out to be way too buggy.
+	 */
+	tmp = FLOAT_ONE;
+	dspio_set_uint_param(codec, 0x37, 0x08, tmp);
+	dspio_set_uint_param(codec, 0x37, 0x10, tmp);
+
+	/*remove DSP headroom*/
+	tmp = FLOAT_ZERO;
+	dspio_set_uint_param(codec, 0x96, 0x3C, tmp);
+
+	/* set WUH source */
+	tmp = FLOAT_TWO;
+	dspio_set_uint_param(codec, 0x31, 0x00, tmp);
+	chipio_set_conn_rate(codec, MEM_CONNID_WUH, SR_48_000);
+
+	/* Set speaker source? */
+	dspio_set_uint_param(codec, 0x32, 0x00, tmp);
+
+	sbz_dsp_initial_mic_setup(codec);
+
+
+	/* out, in effects + voicefx */
+	num_fx = OUT_EFFECTS_COUNT + IN_EFFECTS_COUNT + 1;
+	for (idx = 0; idx < num_fx; idx++) {
+		for (i = 0; i <= ca0132_effects[idx].params; i++) {
+			dspio_set_uint_param(codec,
+					ca0132_effects[idx].mid,
+					ca0132_effects[idx].reqs[i],
+					ca0132_effects[idx].def_vals[i]);
+		}
+	}
+
+	/*
+	 * Have to make a stream to bind the sound output to, otherwise
+	 * you'll get dead audio. Before I did this, it would bind to an
+	 * audio input, and would never work
+	 */
+	stream_format = snd_hdac_calc_stream_format(48000, 2,
+			SNDRV_PCM_FORMAT_S32_LE, 32, 0);
+
+	snd_hda_codec_setup_stream(codec, spec->dacs[0], spec->dsp_stream_id,
+					0, stream_format);
+
+	snd_hda_codec_cleanup_stream(codec, spec->dacs[0]);
+
+	snd_hda_codec_setup_stream(codec, spec->dacs[0], spec->dsp_stream_id,
+					0, stream_format);
+
+	snd_hda_codec_cleanup_stream(codec, spec->dacs[0]);
+}
+
+/*
  * Initialization of flags in chip
  */
 static void ca0132_init_flags(struct hda_codec *codec)
 {
-	chipio_set_control_flag(codec, CONTROL_FLAG_IDLE_ENABLE, 0);
-	chipio_set_control_flag(codec, CONTROL_FLAG_PORT_A_COMMON_MODE, 0);
-	chipio_set_control_flag(codec, CONTROL_FLAG_PORT_D_COMMON_MODE, 0);
-	chipio_set_control_flag(codec, CONTROL_FLAG_PORT_A_10KOHM_LOAD, 0);
-	chipio_set_control_flag(codec, CONTROL_FLAG_PORT_D_10KOHM_LOAD, 0);
-	chipio_set_control_flag(codec, CONTROL_FLAG_ADC_C_HIGH_PASS, 1);
+	struct ca0132_spec *spec = codec->spec;
+
+	if (spec->use_alt_functions) {
+		chipio_set_control_flag(codec, CONTROL_FLAG_DSP_96KHZ, 1);
+		chipio_set_control_flag(codec, CONTROL_FLAG_DAC_96KHZ, 1);
+		chipio_set_control_flag(codec, CONTROL_FLAG_ADC_B_96KHZ, 1);
+		chipio_set_control_flag(codec, CONTROL_FLAG_ADC_C_96KHZ, 1);
+		chipio_set_control_flag(codec, CONTROL_FLAG_SRC_RATE_96KHZ, 1);
+		chipio_set_control_flag(codec, CONTROL_FLAG_IDLE_ENABLE, 0);
+		chipio_set_control_flag(codec, CONTROL_FLAG_SPDIF2OUT, 0);
+		chipio_set_control_flag(codec,
+				CONTROL_FLAG_PORT_D_10KOHM_LOAD, 0);
+		chipio_set_control_flag(codec,
+				CONTROL_FLAG_PORT_A_10KOHM_LOAD, 1);
+	} else {
+		chipio_set_control_flag(codec, CONTROL_FLAG_IDLE_ENABLE, 0);
+		chipio_set_control_flag(codec,
+				CONTROL_FLAG_PORT_A_COMMON_MODE, 0);
+		chipio_set_control_flag(codec,
+				CONTROL_FLAG_PORT_D_COMMON_MODE, 0);
+		chipio_set_control_flag(codec,
+				CONTROL_FLAG_PORT_A_10KOHM_LOAD, 0);
+		chipio_set_control_flag(codec,
+				CONTROL_FLAG_PORT_D_10KOHM_LOAD, 0);
+		chipio_set_control_flag(codec, CONTROL_FLAG_ADC_C_HIGH_PASS, 1);
+	}
 }
 
 /*
@@ -4349,6 +6575,16 @@
  */
 static void ca0132_init_params(struct hda_codec *codec)
 {
+	struct ca0132_spec *spec = codec->spec;
+
+	if (spec->use_alt_functions) {
+		chipio_set_conn_rate(codec, MEM_CONNID_WUH, SR_48_000);
+		chipio_set_conn_rate(codec, 0x0B, SR_48_000);
+		chipio_set_control_param(codec, CONTROL_PARAM_SPDIF1_SOURCE, 0);
+		chipio_set_control_param(codec, 0, 0);
+		chipio_set_control_param(codec, CONTROL_PARAM_VIP_SOURCE, 0);
+	}
+
 	chipio_set_control_param(codec, CONTROL_PARAM_PORTA_160OHM_GAIN, 6);
 	chipio_set_control_param(codec, CONTROL_PARAM_PORTD_160OHM_GAIN, 6);
 }
@@ -4370,11 +6606,49 @@
 static bool ca0132_download_dsp_images(struct hda_codec *codec)
 {
 	bool dsp_loaded = false;
+	struct ca0132_spec *spec = codec->spec;
 	const struct dsp_image_seg *dsp_os_image;
 	const struct firmware *fw_entry;
-
-	if (request_firmware(&fw_entry, EFX_FILE, codec->card->dev) != 0)
-		return false;
+	/*
+	 * Alternate firmwares for different variants. The Recon3Di apparently
+	 * can use the default firmware, but I'll leave the option in case
+	 * it needs it again.
+	 */
+	switch (spec->quirk) {
+	case QUIRK_SBZ:
+		if (request_firmware(&fw_entry, SBZ_EFX_FILE,
+					codec->card->dev) != 0) {
+			codec_dbg(codec, "SBZ alt firmware not detected. ");
+			spec->alt_firmware_present = false;
+		} else {
+			codec_dbg(codec, "Sound Blaster Z firmware selected.");
+			spec->alt_firmware_present = true;
+		}
+		break;
+	case QUIRK_R3DI:
+		if (request_firmware(&fw_entry, R3DI_EFX_FILE,
+					codec->card->dev) != 0) {
+			codec_dbg(codec, "Recon3Di alt firmware not detected.");
+			spec->alt_firmware_present = false;
+		} else {
+			codec_dbg(codec, "Recon3Di firmware selected.");
+			spec->alt_firmware_present = true;
+		}
+		break;
+	default:
+		spec->alt_firmware_present = false;
+		break;
+	}
+	/*
+	 * Use default ctefx.bin if no alt firmware is detected, or if none
+	 * exists for your particular codec.
+	 */
+	if (!spec->alt_firmware_present) {
+		codec_dbg(codec, "Default firmware selected.");
+		if (request_firmware(&fw_entry, EFX_FILE,
+					codec->card->dev) != 0)
+			return false;
+	}
 
 	dsp_os_image = (struct dsp_image_seg *)(fw_entry->data);
 	if (dspload_image(codec, dsp_os_image, 0, 0, true, 0)) {
@@ -4402,13 +6676,17 @@
 		return; /* don't retry failures */
 
 	chipio_enable_clocks(codec);
-	spec->dsp_state = DSP_DOWNLOADING;
-	if (!ca0132_download_dsp_images(codec))
-		spec->dsp_state = DSP_DOWNLOAD_FAILED;
-	else
-		spec->dsp_state = DSP_DOWNLOADED;
+	if (spec->dsp_state != DSP_DOWNLOADED) {
+		spec->dsp_state = DSP_DOWNLOADING;
 
-	if (spec->dsp_state == DSP_DOWNLOADED)
+		if (!ca0132_download_dsp_images(codec))
+			spec->dsp_state = DSP_DOWNLOAD_FAILED;
+		else
+			spec->dsp_state = DSP_DOWNLOADED;
+	}
+
+	/* For codecs using alt functions, this is already done earlier */
+	if (spec->dsp_state == DSP_DOWNLOADED && (!spec->use_alt_functions))
 		ca0132_set_dsp_msr(codec, true);
 }
 
@@ -4454,6 +6732,10 @@
 					    amic_callback);
 	snd_hda_jack_detect_enable_callback(codec, UNSOL_TAG_DSP,
 					    ca0132_process_dsp_response);
+	/* Front headphone jack detection */
+	if (spec->use_alt_functions)
+		snd_hda_jack_detect_enable_callback(codec,
+			spec->unsol_tag_front_hp, hp_callback);
 }
 
 /*
@@ -4476,7 +6758,8 @@
 	{}
 };
 
-/* Other verbs tables.  Sends after DSP download. */
+/* Other verbs tables. Sends after DSP download. */
+
 static struct hda_verb ca0132_init_verbs0[] = {
 	/* chip init verbs */
 	{0x15, 0x70D, 0xF0},
@@ -4506,8 +6789,27 @@
 	{0x15, 0x546, 0xC9},
 	{0x15, 0x53B, 0xCE},
 	{0x15, 0x5E8, 0xC9},
-	{0x15, 0x717, 0x0D},
-	{0x15, 0x718, 0x20},
+	{}
+};
+
+/* Extra init verbs for SBZ */
+static struct hda_verb sbz_init_verbs[] = {
+	{0x15, 0x70D, 0x20},
+	{0x15, 0x70E, 0x19},
+	{0x15, 0x707, 0x00},
+	{0x15, 0x539, 0xCE},
+	{0x15, 0x546, 0xC9},
+	{0x15, 0x70D, 0xB7},
+	{0x15, 0x70E, 0x09},
+	{0x15, 0x707, 0x10},
+	{0x15, 0x70D, 0xAF},
+	{0x15, 0x70E, 0x09},
+	{0x15, 0x707, 0x01},
+	{0x15, 0x707, 0x05},
+	{0x15, 0x70D, 0x73},
+	{0x15, 0x70E, 0x09},
+	{0x15, 0x707, 0x14},
+	{0x15, 0x6FF, 0xC4},
 	{}
 };
 
@@ -4521,7 +6823,11 @@
 	mutex_init(&spec->chipio_mutex);
 
 	spec->cur_out_type = SPEAKER_OUT;
-	spec->cur_mic_type = DIGITAL_MIC;
+	if (!spec->use_alt_functions)
+		spec->cur_mic_type = DIGITAL_MIC;
+	else
+		spec->cur_mic_type = REAR_MIC;
+
 	spec->cur_mic_boost = 0;
 
 	for (i = 0; i < VNODES_COUNT; i++) {
@@ -4539,6 +6845,15 @@
 		on = (unsigned int)ca0132_effects[i].reqs[0];
 		spec->effects_switch[i] = on ? 1 : 0;
 	}
+	/*
+	 * Sets defaults for the effect slider controls, only for alternative
+	 * ca0132 codecs. Also sets x-bass crossover frequency to 80hz.
+	 */
+	if (spec->use_alt_controls) {
+		spec->xbass_xover_freq = 8;
+		for (i = 0; i < EFFECT_LEVEL_SLIDERS; i++)
+			spec->fx_ctl_val[i] = effect_slider_defaults[i];
+	}
 
 	spec->voicefx_val = 0;
 	spec->effects_switch[PLAY_ENHANCEMENT - EFFECT_START_NID] = 1;
@@ -4549,6 +6864,120 @@
 #endif
 }
 
+/*
+ * Recon3Di exit specific commands.
+ */
+/* prevents popping noise on shutdown */
+static void r3di_gpio_shutdown(struct hda_codec *codec)
+{
+	snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, 0x00);
+}
+
+/*
+ * Sound Blaster Z exit specific commands.
+ */
+static void sbz_region2_exit(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+	unsigned int i;
+
+	for (i = 0; i < 4; i++)
+		writeb(0x0, spec->mem_base + 0x100);
+	for (i = 0; i < 8; i++)
+		writeb(0xb3, spec->mem_base + 0x304);
+	/*
+	 * I believe these are GPIO, with the right most hex digit being the
+	 * gpio pin, and the second digit being on or off. We see this more in
+	 * the input/output select functions.
+	 */
+	writew(0x0000, spec->mem_base + 0x320);
+	writew(0x0001, spec->mem_base + 0x320);
+	writew(0x0104, spec->mem_base + 0x320);
+	writew(0x0005, spec->mem_base + 0x320);
+	writew(0x0007, spec->mem_base + 0x320);
+}
+
+static void sbz_set_pin_ctl_default(struct hda_codec *codec)
+{
+	hda_nid_t pins[5] = {0x0B, 0x0C, 0x0E, 0x12, 0x13};
+	unsigned int i;
+
+	snd_hda_codec_write(codec, 0x11, 0,
+			AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40);
+
+	for (i = 0; i < 5; i++)
+		snd_hda_codec_write(codec, pins[i], 0,
+				AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00);
+}
+
+static void sbz_clear_unsolicited(struct hda_codec *codec)
+{
+	hda_nid_t pins[7] = {0x0B, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13};
+	unsigned int i;
+
+	for (i = 0; i < 7; i++) {
+		snd_hda_codec_write(codec, pins[i], 0,
+				AC_VERB_SET_UNSOLICITED_ENABLE, 0x00);
+	}
+}
+
+/* On shutdown, sends commands in sets of three */
+static void sbz_gpio_shutdown_commands(struct hda_codec *codec, int dir,
+							int mask, int data)
+{
+	if (dir >= 0)
+		snd_hda_codec_write(codec, 0x01, 0,
+				AC_VERB_SET_GPIO_DIRECTION, dir);
+	if (mask >= 0)
+		snd_hda_codec_write(codec, 0x01, 0,
+				AC_VERB_SET_GPIO_MASK, mask);
+
+	if (data >= 0)
+		snd_hda_codec_write(codec, 0x01, 0,
+				AC_VERB_SET_GPIO_DATA, data);
+}
+
+static void sbz_exit_chip(struct hda_codec *codec)
+{
+	chipio_set_stream_control(codec, 0x03, 0);
+	chipio_set_stream_control(codec, 0x04, 0);
+
+	/* Mess with GPIO */
+	sbz_gpio_shutdown_commands(codec, 0x07, 0x07, -1);
+	sbz_gpio_shutdown_commands(codec, 0x07, 0x07, 0x05);
+	sbz_gpio_shutdown_commands(codec, 0x07, 0x07, 0x01);
+
+	chipio_set_stream_control(codec, 0x14, 0);
+	chipio_set_stream_control(codec, 0x0C, 0);
+
+	chipio_set_conn_rate(codec, 0x41, SR_192_000);
+	chipio_set_conn_rate(codec, 0x91, SR_192_000);
+
+	chipio_write(codec, 0x18a020, 0x00000083);
+
+	sbz_gpio_shutdown_commands(codec, 0x07, 0x07, 0x03);
+	sbz_gpio_shutdown_commands(codec, 0x07, 0x07, 0x07);
+	sbz_gpio_shutdown_commands(codec, 0x07, 0x07, 0x06);
+
+	chipio_set_stream_control(codec, 0x0C, 0);
+
+	chipio_set_control_param(codec, 0x0D, 0x24);
+
+	sbz_clear_unsolicited(codec);
+	sbz_set_pin_ctl_default(codec);
+
+	snd_hda_codec_write(codec, 0x0B, 0,
+		AC_VERB_SET_EAPD_BTLENABLE, 0x00);
+
+	if (dspload_is_loaded(codec))
+		dsp_reset(codec);
+
+	snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+		VENDOR_CHIPIO_CT_EXTENSIONS_ENABLE, 0x00);
+
+	sbz_region2_exit(codec);
+}
+
 static void ca0132_exit_chip(struct hda_codec *codec)
 {
 	/* put any chip cleanup stuffs here. */
@@ -4557,28 +6986,264 @@
 		dsp_reset(codec);
 }
 
+/*
+ * This fixes a problem that was hard to reproduce. Very rarely, I would
+ * boot up, and there would be no sound, but the DSP indicated it had loaded
+ * properly. I did a few memory dumps to see if anything was different, and
+ * there were a few areas of memory uninitialized with a1a2a3a4. This function
+ * checks if those areas are uninitialized, and if they are, it'll attempt to
+ * reload the card 3 times. Usually it fixes by the second.
+ */
+static void sbz_dsp_startup_check(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+	unsigned int dsp_data_check[4];
+	unsigned int cur_address = 0x390;
+	unsigned int i;
+	unsigned int failure = 0;
+	unsigned int reload = 3;
+
+	if (spec->startup_check_entered)
+		return;
+
+	spec->startup_check_entered = true;
+
+	for (i = 0; i < 4; i++) {
+		chipio_read(codec, cur_address, &dsp_data_check[i]);
+		cur_address += 0x4;
+	}
+	for (i = 0; i < 4; i++) {
+		if (dsp_data_check[i] == 0xa1a2a3a4)
+			failure = 1;
+	}
+
+	codec_dbg(codec, "Startup Check: %d ", failure);
+	if (failure)
+		codec_info(codec, "DSP not initialized properly. Attempting to fix.");
+	/*
+	 * While the failure condition is true, and we haven't reached our
+	 * three reload limit, continue trying to reload the driver and
+	 * fix the issue.
+	 */
+	while (failure && (reload != 0)) {
+		codec_info(codec, "Reloading... Tries left: %d", reload);
+		sbz_exit_chip(codec);
+		spec->dsp_state = DSP_DOWNLOAD_INIT;
+		codec->patch_ops.init(codec);
+		failure = 0;
+		for (i = 0; i < 4; i++) {
+			chipio_read(codec, cur_address, &dsp_data_check[i]);
+			cur_address += 0x4;
+		}
+		for (i = 0; i < 4; i++) {
+			if (dsp_data_check[i] == 0xa1a2a3a4)
+				failure = 1;
+		}
+		reload--;
+	}
+
+	if (!failure && reload < 3)
+		codec_info(codec, "DSP fixed.");
+
+	if (!failure)
+		return;
+
+	codec_info(codec, "DSP failed to initialize properly. Either try a full shutdown or a suspend to clear the internal memory.");
+}
+
+/*
+ * This is for the extra volume verbs 0x797 (left) and 0x798 (right). These add
+ * extra precision for decibel values. If you had the dB value in floating point
+ * you would take the value after the decimal point, multiply by 64, and divide
+ * by 2. So for 8.59, it's (59 * 64) / 100. Useful if someone wanted to
+ * implement fixed point or floating point dB volumes. For now, I'll set them
+ * to 0 just incase a value has lingered from a boot into Windows.
+ */
+static void ca0132_alt_vol_setup(struct hda_codec *codec)
+{
+	snd_hda_codec_write(codec, 0x02, 0, 0x797, 0x00);
+	snd_hda_codec_write(codec, 0x02, 0, 0x798, 0x00);
+	snd_hda_codec_write(codec, 0x03, 0, 0x797, 0x00);
+	snd_hda_codec_write(codec, 0x03, 0, 0x798, 0x00);
+	snd_hda_codec_write(codec, 0x04, 0, 0x797, 0x00);
+	snd_hda_codec_write(codec, 0x04, 0, 0x798, 0x00);
+	snd_hda_codec_write(codec, 0x07, 0, 0x797, 0x00);
+	snd_hda_codec_write(codec, 0x07, 0, 0x798, 0x00);
+}
+
+/*
+ * Extra commands that don't really fit anywhere else.
+ */
+static void sbz_pre_dsp_setup(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+
+	writel(0x00820680, spec->mem_base + 0x01C);
+	writel(0x00820680, spec->mem_base + 0x01C);
+
+	snd_hda_codec_write(codec, 0x15, 0, 0xd00, 0xfc);
+	snd_hda_codec_write(codec, 0x15, 0, 0xd00, 0xfd);
+	snd_hda_codec_write(codec, 0x15, 0, 0xd00, 0xfe);
+	snd_hda_codec_write(codec, 0x15, 0, 0xd00, 0xff);
+
+	chipio_write(codec, 0x18b0a4, 0x000000c2);
+
+	snd_hda_codec_write(codec, 0x11, 0,
+			AC_VERB_SET_PIN_WIDGET_CONTROL, 0x44);
+}
+
+/*
+ * Extra commands that don't really fit anywhere else.
+ */
+static void r3di_pre_dsp_setup(struct hda_codec *codec)
+{
+	chipio_write(codec, 0x18b0a4, 0x000000c2);
+
+	snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+			    VENDOR_CHIPIO_8051_ADDRESS_LOW, 0x1E);
+	snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+			    VENDOR_CHIPIO_8051_ADDRESS_HIGH, 0x1C);
+	snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+			    VENDOR_CHIPIO_8051_DATA_WRITE, 0x5B);
+
+	snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+			    VENDOR_CHIPIO_8051_ADDRESS_LOW, 0x20);
+	snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+			    VENDOR_CHIPIO_8051_ADDRESS_HIGH, 0x19);
+	snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+			    VENDOR_CHIPIO_8051_DATA_WRITE, 0x00);
+	snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+			    VENDOR_CHIPIO_8051_DATA_WRITE, 0x40);
+
+	snd_hda_codec_write(codec, 0x11, 0,
+			AC_VERB_SET_PIN_WIDGET_CONTROL, 0x04);
+}
+
+
+/*
+ * These are sent before the DSP is downloaded. Not sure
+ * what they do, or if they're necessary. Could possibly
+ * be removed. Figure they're better to leave in.
+ */
+static void sbz_region2_startup(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+
+	writel(0x00000000, spec->mem_base + 0x400);
+	writel(0x00000000, spec->mem_base + 0x408);
+	writel(0x00000000, spec->mem_base + 0x40C);
+	writel(0x00880680, spec->mem_base + 0x01C);
+	writel(0x00000083, spec->mem_base + 0xC0C);
+	writel(0x00000030, spec->mem_base + 0xC00);
+	writel(0x00000000, spec->mem_base + 0xC04);
+	writel(0x00000003, spec->mem_base + 0xC0C);
+	writel(0x00000003, spec->mem_base + 0xC0C);
+	writel(0x00000003, spec->mem_base + 0xC0C);
+	writel(0x00000003, spec->mem_base + 0xC0C);
+	writel(0x000000C1, spec->mem_base + 0xC08);
+	writel(0x000000F1, spec->mem_base + 0xC08);
+	writel(0x00000001, spec->mem_base + 0xC08);
+	writel(0x000000C7, spec->mem_base + 0xC08);
+	writel(0x000000C1, spec->mem_base + 0xC08);
+	writel(0x00000080, spec->mem_base + 0xC04);
+}
+
+/*
+ * Extra init functions for alternative ca0132 codecs. Done
+ * here so they don't clutter up the main ca0132_init function
+ * anymore than they have to.
+ */
+static void ca0132_alt_init(struct hda_codec *codec)
+{
+	struct ca0132_spec *spec = codec->spec;
+
+	ca0132_alt_vol_setup(codec);
+
+	switch (spec->quirk) {
+	case QUIRK_SBZ:
+		codec_dbg(codec, "SBZ alt_init");
+		ca0132_gpio_init(codec);
+		sbz_pre_dsp_setup(codec);
+		snd_hda_sequence_write(codec, spec->chip_init_verbs);
+		snd_hda_sequence_write(codec, spec->sbz_init_verbs);
+		break;
+	case QUIRK_R3DI:
+		codec_dbg(codec, "R3DI alt_init");
+		ca0132_gpio_init(codec);
+		ca0132_gpio_setup(codec);
+		r3di_gpio_dsp_status_set(codec, R3DI_DSP_DOWNLOADING);
+		r3di_pre_dsp_setup(codec);
+		snd_hda_sequence_write(codec, spec->chip_init_verbs);
+		snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0, 0x6FF, 0xC4);
+		break;
+	}
+}
+
 static int ca0132_init(struct hda_codec *codec)
 {
 	struct ca0132_spec *spec = codec->spec;
 	struct auto_pin_cfg *cfg = &spec->autocfg;
 	int i;
+	bool dsp_loaded;
+
+	/*
+	 * If the DSP is already downloaded, and init has been entered again,
+	 * there's only two reasons for it. One, the codec has awaken from a
+	 * suspended state, and in that case dspload_is_loaded will return
+	 * false, and the init will be ran again. The other reason it gets
+	 * re entered is on startup for some reason it triggers a suspend and
+	 * resume state. In this case, it will check if the DSP is downloaded,
+	 * and not run the init function again. For codecs using alt_functions,
+	 * it will check if the DSP is loaded properly.
+	 */
+	if (spec->dsp_state == DSP_DOWNLOADED) {
+		dsp_loaded = dspload_is_loaded(codec);
+		if (!dsp_loaded) {
+			spec->dsp_reload = true;
+			spec->dsp_state = DSP_DOWNLOAD_INIT;
+		} else {
+			if (spec->quirk == QUIRK_SBZ)
+				sbz_dsp_startup_check(codec);
+			return 0;
+		}
+	}
 
 	if (spec->dsp_state != DSP_DOWNLOAD_FAILED)
 		spec->dsp_state = DSP_DOWNLOAD_INIT;
 	spec->curr_chip_addx = INVALID_CHIP_ADDRESS;
 
+	if (spec->quirk == QUIRK_SBZ)
+		sbz_region2_startup(codec);
+
 	snd_hda_power_up_pm(codec);
 
 	ca0132_init_unsol(codec);
-
 	ca0132_init_params(codec);
 	ca0132_init_flags(codec);
+
 	snd_hda_sequence_write(codec, spec->base_init_verbs);
+
+	if (spec->quirk != QUIRK_NONE)
+		ca0132_alt_init(codec);
+
 	ca0132_download_dsp(codec);
+
 	ca0132_refresh_widget_caps(codec);
-	ca0132_setup_defaults(codec);
-	ca0132_init_analog_mic2(codec);
-	ca0132_init_dmic(codec);
+
+	if (spec->quirk == QUIRK_SBZ)
+		writew(0x0107, spec->mem_base + 0x320);
+
+	switch (spec->quirk) {
+	case QUIRK_R3DI:
+		r3di_setup_defaults(codec);
+		break;
+	case QUIRK_NONE:
+	case QUIRK_ALIENWARE:
+		ca0132_setup_defaults(codec);
+		ca0132_init_analog_mic2(codec);
+		ca0132_init_dmic(codec);
+		break;
+	}
 
 	for (i = 0; i < spec->num_outputs; i++)
 		init_output(codec, spec->out_pins[i], spec->dacs[0]);
@@ -4590,14 +7255,45 @@
 
 	init_input(codec, cfg->dig_in_pin, spec->dig_in);
 
-	snd_hda_sequence_write(codec, spec->chip_init_verbs);
-	snd_hda_sequence_write(codec, spec->spec_init_verbs);
+	if (!spec->use_alt_functions) {
+		snd_hda_sequence_write(codec, spec->chip_init_verbs);
+		snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+			    VENDOR_CHIPIO_PARAM_EX_ID_SET, 0x0D);
+		snd_hda_codec_write(codec, WIDGET_CHIP_CTRL, 0,
+			    VENDOR_CHIPIO_PARAM_EX_VALUE_SET, 0x20);
+	}
 
-	ca0132_select_out(codec);
-	ca0132_select_mic(codec);
+	if (spec->quirk == QUIRK_SBZ)
+		ca0132_gpio_setup(codec);
+
+	snd_hda_sequence_write(codec, spec->spec_init_verbs);
+	switch (spec->quirk) {
+	case QUIRK_SBZ:
+		sbz_setup_defaults(codec);
+		ca0132_alt_select_out(codec);
+		ca0132_alt_select_in(codec);
+		break;
+	case QUIRK_R3DI:
+		ca0132_alt_select_out(codec);
+		ca0132_alt_select_in(codec);
+		break;
+	default:
+		ca0132_select_out(codec);
+		ca0132_select_mic(codec);
+		break;
+	}
 
 	snd_hda_jack_report_sync(codec);
 
+	/*
+	 * Re set the PlayEnhancement switch on a resume event, because the
+	 * controls will not be reloaded.
+	 */
+	if (spec->dsp_reload) {
+		spec->dsp_reload = false;
+		ca0132_pe_switch_set(codec);
+	}
+
 	snd_hda_power_down_pm(codec);
 
 	return 0;
@@ -4609,19 +7305,39 @@
 
 	cancel_delayed_work_sync(&spec->unsol_hp_work);
 	snd_hda_power_up(codec);
-	snd_hda_sequence_write(codec, spec->base_exit_verbs);
-	ca0132_exit_chip(codec);
+	switch (spec->quirk) {
+	case QUIRK_SBZ:
+		sbz_exit_chip(codec);
+		break;
+	case QUIRK_R3DI:
+		r3di_gpio_shutdown(codec);
+		snd_hda_sequence_write(codec, spec->base_exit_verbs);
+		ca0132_exit_chip(codec);
+		break;
+	default:
+		snd_hda_sequence_write(codec, spec->base_exit_verbs);
+		ca0132_exit_chip(codec);
+		break;
+	}
 	snd_hda_power_down(codec);
+	if (spec->mem_base)
+		iounmap(spec->mem_base);
 	kfree(spec->spec_init_verbs);
 	kfree(codec->spec);
 }
 
+static void ca0132_reboot_notify(struct hda_codec *codec)
+{
+	codec->patch_ops.free(codec);
+}
+
 static const struct hda_codec_ops ca0132_patch_ops = {
 	.build_controls = ca0132_build_controls,
 	.build_pcms = ca0132_build_pcms,
 	.init = ca0132_init,
 	.free = ca0132_free,
 	.unsol_event = snd_hda_jack_unsol_event,
+	.reboot_notify = ca0132_reboot_notify,
 };
 
 static void ca0132_config(struct hda_codec *codec)
@@ -4635,9 +7351,14 @@
 
 	spec->multiout.dac_nids = spec->dacs;
 	spec->multiout.num_dacs = 3;
-	spec->multiout.max_channels = 2;
 
-	if (spec->quirk == QUIRK_ALIENWARE) {
+	if (!spec->use_alt_functions)
+		spec->multiout.max_channels = 2;
+	else
+		spec->multiout.max_channels = 6;
+
+	switch (spec->quirk) {
+	case QUIRK_ALIENWARE:
 		codec_dbg(codec, "ca0132_config: QUIRK_ALIENWARE applied.\n");
 		snd_hda_apply_pincfgs(codec, alienware_pincfgs);
 
@@ -4657,7 +7378,71 @@
 		spec->input_pins[2] = 0x13;
 		spec->shared_mic_nid = 0x7;
 		spec->unsol_tag_amic1 = 0x11;
-	} else {
+		break;
+	case QUIRK_SBZ:
+		codec_dbg(codec, "%s: QUIRK_SBZ applied.\n", __func__);
+		snd_hda_apply_pincfgs(codec, sbz_pincfgs);
+
+		spec->num_outputs = 2;
+		spec->out_pins[0] = 0x0B; /* Line out */
+		spec->out_pins[1] = 0x0F; /* Rear headphone out */
+		spec->out_pins[2] = 0x10; /* Front Headphone / Center/LFE*/
+		spec->out_pins[3] = 0x11; /* Rear surround */
+		spec->shared_out_nid = 0x2;
+		spec->unsol_tag_hp = spec->out_pins[1];
+		spec->unsol_tag_front_hp = spec->out_pins[2];
+
+		spec->adcs[0] = 0x7; /* Rear Mic / Line-in */
+		spec->adcs[1] = 0x8; /* Front Mic, but only if no DSP */
+		spec->adcs[2] = 0xa; /* what u hear */
+
+		spec->num_inputs = 2;
+		spec->input_pins[0] = 0x12; /* Rear Mic / Line-in */
+		spec->input_pins[1] = 0x13; /* What U Hear */
+		spec->shared_mic_nid = 0x7;
+		spec->unsol_tag_amic1 = spec->input_pins[0];
+
+		/* SPDIF I/O */
+		spec->dig_out = 0x05;
+		spec->multiout.dig_out_nid = spec->dig_out;
+		cfg->dig_out_pins[0] = 0x0c;
+		cfg->dig_outs = 1;
+		cfg->dig_out_type[0] = HDA_PCM_TYPE_SPDIF;
+		spec->dig_in = 0x09;
+		cfg->dig_in_pin = 0x0e;
+		cfg->dig_in_type = HDA_PCM_TYPE_SPDIF;
+		break;
+	case QUIRK_R3DI:
+		codec_dbg(codec, "%s: QUIRK_R3DI applied.\n", __func__);
+		snd_hda_apply_pincfgs(codec, r3di_pincfgs);
+
+		spec->num_outputs = 2;
+		spec->out_pins[0] = 0x0B; /* Line out */
+		spec->out_pins[1] = 0x0F; /* Rear headphone out */
+		spec->out_pins[2] = 0x10; /* Front Headphone / Center/LFE*/
+		spec->out_pins[3] = 0x11; /* Rear surround */
+		spec->shared_out_nid = 0x2;
+		spec->unsol_tag_hp = spec->out_pins[1];
+		spec->unsol_tag_front_hp = spec->out_pins[2];
+
+		spec->adcs[0] = 0x07; /* Rear Mic / Line-in */
+		spec->adcs[1] = 0x08; /* Front Mic, but only if no DSP */
+		spec->adcs[2] = 0x0a; /* what u hear */
+
+		spec->num_inputs = 2;
+		spec->input_pins[0] = 0x12; /* Rear Mic / Line-in */
+		spec->input_pins[1] = 0x13; /* What U Hear */
+		spec->shared_mic_nid = 0x7;
+		spec->unsol_tag_amic1 = spec->input_pins[0];
+
+		/* SPDIF I/O */
+		spec->dig_out = 0x05;
+		spec->multiout.dig_out_nid = spec->dig_out;
+		cfg->dig_out_pins[0] = 0x0c;
+		cfg->dig_outs = 1;
+		cfg->dig_out_type[0] = HDA_PCM_TYPE_SPDIF;
+		break;
+	default:
 		spec->num_outputs = 2;
 		spec->out_pins[0] = 0x0b; /* speaker out */
 		spec->out_pins[1] = 0x10; /* headphone out */
@@ -4684,6 +7469,7 @@
 		spec->dig_in = 0x09;
 		cfg->dig_in_pin = 0x0e;
 		cfg->dig_in_type = HDA_PCM_TYPE_SPDIF;
+		break;
 	}
 }
 
@@ -4694,6 +7480,8 @@
 	struct ca0132_spec *spec = codec->spec;
 
 	spec->chip_init_verbs = ca0132_init_verbs0;
+	if (spec->quirk == QUIRK_SBZ)
+		spec->sbz_init_verbs = sbz_init_verbs;
 	spec->spec_init_verbs = kzalloc(sizeof(struct hda_verb) * NUM_SPEC_VERBS, GFP_KERNEL);
 	if (!spec->spec_init_verbs)
 		return -ENOMEM;
@@ -4757,9 +7545,46 @@
 	else
 		spec->quirk = QUIRK_NONE;
 
+	/* Setup BAR Region 2 for Sound Blaster Z */
+	if (spec->quirk == QUIRK_SBZ) {
+		spec->mem_base = pci_iomap(codec->bus->pci, 2, 0xC20);
+		if (spec->mem_base == NULL) {
+			codec_warn(codec, "pci_iomap failed!");
+			codec_info(codec, "perhaps this is not an SBZ?");
+			spec->quirk = QUIRK_NONE;
+		}
+	}
+
 	spec->dsp_state = DSP_DOWNLOAD_INIT;
 	spec->num_mixers = 1;
-	spec->mixers[0] = ca0132_mixer;
+
+	/* Set which mixers each quirk uses. */
+	switch (spec->quirk) {
+	case QUIRK_SBZ:
+		spec->mixers[0] = sbz_mixer;
+		snd_hda_codec_set_name(codec, "Sound Blaster Z");
+		break;
+	case QUIRK_R3DI:
+		spec->mixers[0] = r3di_mixer;
+		snd_hda_codec_set_name(codec, "Recon3Di");
+		break;
+	default:
+		spec->mixers[0] = ca0132_mixer;
+		break;
+	}
+
+	/* Setup whether or not to use alt functions/controls */
+	switch (spec->quirk) {
+	case QUIRK_SBZ:
+	case QUIRK_R3DI:
+		spec->use_alt_controls = true;
+		spec->use_alt_functions = true;
+		break;
+	default:
+		spec->use_alt_controls = false;
+		spec->use_alt_functions = false;
+		break;
+	}
 
 	spec->base_init_verbs = ca0132_base_init_verbs;
 	spec->base_exit_verbs = ca0132_base_exit_verbs;
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 5b4dbce..dbf9910 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -588,6 +588,7 @@
 				    const struct hda_fixup *fix, int action)
 {
 	struct conexant_spec *spec = codec->spec;
+	struct snd_kcontrol_new *kctl;
 	int i;
 
 	if (action != HDA_FIXUP_ACT_PROBE)
@@ -606,9 +607,7 @@
 	snd_hda_codec_set_pin_target(codec, 0x1a, PIN_VREF50);
 
 	/* override mic boost control */
-	for (i = 0; i < spec->gen.kctls.used; i++) {
-		struct snd_kcontrol_new *kctl =
-			snd_array_elem(&spec->gen.kctls, i);
+	snd_array_for_each(&spec->gen.kctls, i, kctl) {
 		if (!strcmp(kctl->name, "Mic Boost Volume")) {
 			kctl->put = olpc_xo_mic_boost_put;
 			break;
@@ -965,6 +964,7 @@
 	SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO),
 	SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x103c, 0x8455, "HP Z2 G4", CXT_FIXUP_HP_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
 	SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
 	SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
@@ -998,6 +998,7 @@
 	{ .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" },
 	{ .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" },
 	{ .id = CXT_FIXUP_MUTE_LED_GPIO, .name = "mute-led-gpio" },
+	{ .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" },
 	{}
 };
 
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 7d7eb13..8840daf 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -510,7 +510,7 @@
 
 	snd_info_set_text_ops(entry, per_pin, print_eld_info);
 	entry->c.text.write = write_eld_info;
-	entry->mode |= S_IWUSR;
+	entry->mode |= 0200;
 	per_pin->proc_entry = entry;
 
 	return 0;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 01a6643..d64dcb9 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2830,6 +2830,7 @@
 
 static void alc286_shutup(struct hda_codec *codec)
 {
+	const struct hda_pincfg *pin;
 	int i;
 	int mic_pin = find_ext_mic_pin(codec);
 	/* don't shut up pins when unloading the driver; otherwise it breaks
@@ -2837,8 +2838,7 @@
 	 */
 	if (codec->bus->shutdown)
 		return;
-	for (i = 0; i < codec->init_pins.used; i++) {
-		struct hda_pincfg *pin = snd_array_elem(&codec->init_pins, i);
+	snd_array_for_each(&codec->init_pins, i, pin) {
 		/* use read here for syncing after issuing each verb */
 		if (pin->nid != mic_pin)
 			snd_hda_codec_read(codec, pin->nid, 0,
@@ -3653,30 +3653,37 @@
 	}
 }
 
-static void alc269_fixup_hp_mute_led_mic1(struct hda_codec *codec,
-				const struct hda_fixup *fix, int action)
+static void alc269_fixup_hp_mute_led_micx(struct hda_codec *codec,
+					  const struct hda_fixup *fix,
+					  int action, hda_nid_t pin)
 {
 	struct alc_spec *spec = codec->spec;
+
 	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
 		spec->mute_led_polarity = 0;
-		spec->mute_led_nid = 0x18;
+		spec->mute_led_nid = pin;
 		spec->gen.vmaster_mute.hook = alc269_fixup_mic_mute_hook;
 		spec->gen.vmaster_mute_enum = 1;
 		codec->power_filter = led_power_filter;
 	}
 }
 
+static void alc269_fixup_hp_mute_led_mic1(struct hda_codec *codec,
+				const struct hda_fixup *fix, int action)
+{
+	alc269_fixup_hp_mute_led_micx(codec, fix, action, 0x18);
+}
+
 static void alc269_fixup_hp_mute_led_mic2(struct hda_codec *codec,
 				const struct hda_fixup *fix, int action)
 {
-	struct alc_spec *spec = codec->spec;
-	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
-		spec->mute_led_polarity = 0;
-		spec->mute_led_nid = 0x19;
-		spec->gen.vmaster_mute.hook = alc269_fixup_mic_mute_hook;
-		spec->gen.vmaster_mute_enum = 1;
-		codec->power_filter = led_power_filter;
-	}
+	alc269_fixup_hp_mute_led_micx(codec, fix, action, 0x19);
+}
+
+static void alc269_fixup_hp_mute_led_mic3(struct hda_codec *codec,
+				const struct hda_fixup *fix, int action)
+{
+	alc269_fixup_hp_mute_led_micx(codec, fix, action, 0x1b);
 }
 
 /* update LED status via GPIO */
@@ -5387,6 +5394,9 @@
 /* for dell wmi mic mute led */
 #include "dell_wmi_helper.c"
 
+/* for alc295_fixup_hp_top_speakers */
+#include "hp_x360_helper.c"
+
 enum {
 	ALC269_FIXUP_SONY_VAIO,
 	ALC275_FIXUP_SONY_VAIO_GPIO2,
@@ -5413,6 +5423,7 @@
 	ALC269_FIXUP_HP_MUTE_LED,
 	ALC269_FIXUP_HP_MUTE_LED_MIC1,
 	ALC269_FIXUP_HP_MUTE_LED_MIC2,
+	ALC269_FIXUP_HP_MUTE_LED_MIC3,
 	ALC269_FIXUP_HP_GPIO_LED,
 	ALC269_FIXUP_HP_GPIO_MIC1_LED,
 	ALC269_FIXUP_HP_LINE1_MIC1_LED,
@@ -5506,6 +5517,7 @@
 	ALC298_FIXUP_TPT470_DOCK,
 	ALC255_FIXUP_DUMMY_LINEOUT_VERB,
 	ALC255_FIXUP_DELL_HEADSET_MIC,
+	ALC295_FIXUP_HP_X360,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5672,6 +5684,10 @@
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc269_fixup_hp_mute_led_mic2,
 	},
+	[ALC269_FIXUP_HP_MUTE_LED_MIC3] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc269_fixup_hp_mute_led_mic3,
+	},
 	[ALC269_FIXUP_HP_GPIO_LED] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc269_fixup_hp_gpio_led,
@@ -6375,6 +6391,12 @@
 		.chained = true,
 		.chain_id = ALC269_FIXUP_HEADSET_MIC
 	},
+	[ALC295_FIXUP_HP_X360] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc295_fixup_hp_top_speakers,
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HP_MUTE_LED_MIC3
+	}
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6494,6 +6516,7 @@
 	SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x103c, 0x8256, "HP", ALC221_FIXUP_HP_FRONT_MIC),
+	SND_PCI_QUIRK(0x103c, 0x827e, "HP x360", ALC295_FIXUP_HP_X360),
 	SND_PCI_QUIRK(0x103c, 0x82bf, "HP", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x82c0, "HP", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
@@ -6580,7 +6603,6 @@
 	SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
 	SND_PCI_QUIRK(0x17aa, 0x3138, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
 	SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
-	SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
 	SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
 	SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP),
@@ -6752,6 +6774,11 @@
 		{0x1b, 0x01111010},
 		{0x1e, 0x01451130},
 		{0x21, 0x02211020}),
+	SND_HDA_PIN_QUIRK(0x10ec0235, 0x17aa, "Lenovo", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
+		{0x12, 0x90a60140},
+		{0x14, 0x90170110},
+		{0x19, 0x02a11030},
+		{0x21, 0x02211020}),
 	SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
 		{0x12, 0x90a60140},
 		{0x14, 0x90170110},
diff --git a/sound/pci/ice1712/pontis.c b/sound/pci/ice1712/pontis.c
index 5101f40..93b8cfc 100644
--- a/sound/pci/ice1712/pontis.c
+++ b/sound/pci/ice1712/pontis.c
@@ -662,7 +662,7 @@
 	struct snd_info_entry *entry;
 	if (! snd_card_proc_new(ice->card, "wm_codec", &entry)) {
 		snd_info_set_text_ops(entry, ice, wm_proc_regs_read);
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 		entry->c.text.write = wm_proc_regs_write;
 	}
 }
diff --git a/sound/pci/ice1712/prodigy_hifi.c b/sound/pci/ice1712/prodigy_hifi.c
index 8dabd4d..d7366ad 100644
--- a/sound/pci/ice1712/prodigy_hifi.c
+++ b/sound/pci/ice1712/prodigy_hifi.c
@@ -926,7 +926,7 @@
 	struct snd_info_entry *entry;
 	if (!snd_card_proc_new(ice->card, "wm_codec", &entry)) {
 		snd_info_set_text_ops(entry, ice, wm_proc_regs_read);
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 		entry->c.text.write = wm_proc_regs_write;
 	}
 }
diff --git a/sound/pci/lola/lola_proc.c b/sound/pci/lola/lola_proc.c
index c241dc0..904e3c4 100644
--- a/sound/pci/lola/lola_proc.c
+++ b/sound/pci/lola/lola_proc.c
@@ -214,7 +214,7 @@
 		snd_info_set_text_ops(entry, chip, lola_proc_codec_read);
 	if (!snd_card_proc_new(chip->card, "codec_rw", &entry)) {
 		snd_info_set_text_ops(entry, chip, lola_proc_codec_rw_read);
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 		entry->c.text.write = lola_proc_codec_rw_write;
 	}
 	if (!snd_card_proc_new(chip->card, "regs", &entry))
diff --git a/sound/pci/oxygen/oxygen_mixer.c b/sound/pci/oxygen/oxygen_mixer.c
index 4ca1266..81af21a 100644
--- a/sound/pci/oxygen/oxygen_mixer.c
+++ b/sound/pci/oxygen/oxygen_mixer.c
@@ -1052,10 +1052,10 @@
 		[CONTROL_CD_CAPTURE_SWITCH] = "CD Capture Switch",
 		[CONTROL_AUX_CAPTURE_SWITCH] = "Aux Capture Switch",
 	};
-	unsigned int i, j;
+	unsigned int i;
 	struct snd_kcontrol_new template;
 	struct snd_kcontrol *ctl;
-	int err;
+	int j, err;
 
 	for (i = 0; i < count; ++i) {
 		template = controls[i];
@@ -1086,11 +1086,11 @@
 		err = snd_ctl_add(chip->card, ctl);
 		if (err < 0)
 			return err;
-		for (j = 0; j < CONTROL_COUNT; ++j)
-			if (!strcmp(ctl->id.name, known_ctl_names[j])) {
-				chip->controls[j] = ctl;
-				ctl->private_free = oxygen_any_ctl_free;
-			}
+		j = match_string(known_ctl_names, CONTROL_COUNT, ctl->id.name);
+		if (j >= 0) {
+			chip->controls[j] = ctl;
+			ctl->private_free = oxygen_any_ctl_free;
+		}
 	}
 	return 0;
 }
diff --git a/sound/pci/pcxhr/pcxhr.c b/sound/pci/pcxhr/pcxhr.c
index f9ae72f..e57da40 100644
--- a/sound/pci/pcxhr/pcxhr.c
+++ b/sound/pci/pcxhr/pcxhr.c
@@ -1465,7 +1465,7 @@
 	    !snd_card_proc_new(chip->card, "gpio", &entry)) {
 		snd_info_set_text_ops(entry, chip, pcxhr_proc_gpio_read);
 		entry->c.text.write = pcxhr_proc_gpo_write;
-		entry->mode |= S_IWUSR;
+		entry->mode |= 0200;
 	}
 	if (!snd_card_proc_new(chip->card, "ltc", &entry))
 		snd_info_set_text_ops(entry, chip, pcxhr_proc_ltc);
diff --git a/sound/soc/Makefile b/sound/soc/Makefile
index 8d92492..06389a5 100644
--- a/sound/soc/Makefile
+++ b/sound/soc/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-snd-soc-core-objs := soc-core.o soc-dapm.o soc-jack.o soc-cache.o soc-utils.o
+snd-soc-core-objs := soc-core.o soc-dapm.o soc-jack.o soc-utils.o
 snd-soc-core-objs += soc-pcm.o soc-io.o soc-devres.o soc-ops.o
 snd-soc-core-$(CONFIG_SND_SOC_COMPRESS) += soc-compress.o
 
diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c
index f41560e..ccddc66 100644
--- a/sound/soc/amd/acp-da7219-max98357a.c
+++ b/sound/soc/amd/acp-da7219-max98357a.c
@@ -33,17 +33,19 @@
 #include <linux/gpio.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
+#include <linux/input.h>
 #include <linux/acpi.h>
 
+#include "acp.h"
 #include "../codecs/da7219.h"
 #include "../codecs/da7219-aad.h"
 
-#define CZ_PLAT_CLK 24000000
-#define MCLK_RATE 24576000
+#define CZ_PLAT_CLK 25000000
 #define DUAL_CHANNEL		2
 
 static struct snd_soc_jack cz_jack;
 static struct clk *da7219_dai_clk;
+extern int bt_uart_enable;
 
 static int cz_da7219_init(struct snd_soc_pcm_runtime *rtd)
 {
@@ -62,7 +64,7 @@
 	}
 
 	ret = snd_soc_dai_set_pll(codec_dai, 0, DA7219_SYSCLK_PLL,
-				  CZ_PLAT_CLK, MCLK_RATE);
+				  CZ_PLAT_CLK, DA7219_PLL_FREQ_OUT_98304);
 	if (ret < 0) {
 		dev_err(rtd->dev, "can't set codec pll: %d\n", ret);
 		return ret;
@@ -80,13 +82,17 @@
 		return ret;
 	}
 
+	snd_jack_set_key(cz_jack.jack, SND_JACK_BTN_0, KEY_PLAYPAUSE);
+	snd_jack_set_key(cz_jack.jack, SND_JACK_BTN_1, KEY_VOLUMEUP);
+	snd_jack_set_key(cz_jack.jack, SND_JACK_BTN_2, KEY_VOLUMEDOWN);
+	snd_jack_set_key(cz_jack.jack, SND_JACK_BTN_3, KEY_VOICECOMMAND);
+
 	da7219_aad_jack_det(component, &cz_jack);
 
 	return 0;
 }
 
-static int cz_da7219_hw_params(struct snd_pcm_substream *substream,
-			     struct snd_pcm_hw_params *params)
+static int da7219_clk_enable(struct snd_pcm_substream *substream)
 {
 	int ret = 0;
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
@@ -100,11 +106,9 @@
 	return ret;
 }
 
-static int cz_da7219_hw_free(struct snd_pcm_substream *substream)
+static void da7219_clk_disable(void)
 {
 	clk_disable_unprepare(da7219_dai_clk);
-
-	return 0;
 }
 
 static const unsigned int channels[] = {
@@ -127,9 +131,12 @@
 	.mask = 0,
 };
 
-static int cz_fe_startup(struct snd_pcm_substream *substream)
+static int cz_da7219_startup(struct snd_pcm_substream *substream)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_card *card = rtd->card;
+	struct acp_platform_info *machine = snd_soc_card_get_drvdata(card);
 
 	/*
 	 * On this platform for PCM device we support stereo
@@ -141,23 +148,58 @@
 	snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_RATE,
 				   &constraints_rates);
 
-	return 0;
+	machine->i2s_instance = I2S_BT_INSTANCE;
+	return da7219_clk_enable(substream);
 }
 
-static struct snd_soc_ops cz_da7219_cap_ops = {
-	.hw_params = cz_da7219_hw_params,
-	.hw_free = cz_da7219_hw_free,
-	.startup = cz_fe_startup,
+static void cz_da7219_shutdown(struct snd_pcm_substream *substream)
+{
+	da7219_clk_disable();
+}
+
+static int cz_max_startup(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_card *card = rtd->card;
+	struct acp_platform_info *machine = snd_soc_card_get_drvdata(card);
+
+	machine->i2s_instance = I2S_SP_INSTANCE;
+	return da7219_clk_enable(substream);
+}
+
+static void cz_max_shutdown(struct snd_pcm_substream *substream)
+{
+	da7219_clk_disable();
+}
+
+static int cz_dmic_startup(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_card *card = rtd->card;
+	struct acp_platform_info *machine = snd_soc_card_get_drvdata(card);
+
+	machine->i2s_instance = I2S_SP_INSTANCE;
+	return da7219_clk_enable(substream);
+}
+
+static void cz_dmic_shutdown(struct snd_pcm_substream *substream)
+{
+	da7219_clk_disable();
+}
+
+static const struct snd_soc_ops cz_da7219_cap_ops = {
+	.startup = cz_da7219_startup,
+	.shutdown = cz_da7219_shutdown,
 };
 
-static struct snd_soc_ops cz_max_play_ops = {
-	.hw_params = cz_da7219_hw_params,
-	.hw_free = cz_da7219_hw_free,
+static const struct snd_soc_ops cz_max_play_ops = {
+	.startup = cz_max_startup,
+	.shutdown = cz_max_shutdown,
 };
 
-static struct snd_soc_ops cz_dmic_cap_ops = {
-	.hw_params = cz_da7219_hw_params,
-	.hw_free = cz_da7219_hw_free,
+static const struct snd_soc_ops cz_dmic_cap_ops = {
+	.startup = cz_dmic_startup,
+	.shutdown = cz_dmic_shutdown,
 };
 
 static struct snd_soc_dai_link cz_dai_7219_98357[] = {
@@ -240,10 +282,16 @@
 {
 	int ret;
 	struct snd_soc_card *card;
+	struct acp_platform_info *machine;
 
+	machine = devm_kzalloc(&pdev->dev, sizeof(struct acp_platform_info),
+			       GFP_KERNEL);
+	if (!machine)
+		return -ENOMEM;
 	card = &cz_card;
 	cz_card.dev = &pdev->dev;
 	platform_set_drvdata(pdev, card);
+	snd_soc_card_set_drvdata(card, machine);
 	ret = devm_snd_soc_register_card(&pdev->dev, &cz_card);
 	if (ret) {
 		dev_err(&pdev->dev,
@@ -251,6 +299,8 @@
 				cz_card.name, ret);
 		return ret;
 	}
+	bt_uart_enable = !device_property_read_bool(&pdev->dev,
+						    "bt-pad-enable");
 	return 0;
 }
 
diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c
index 540088d..7720384 100644
--- a/sound/soc/amd/acp-pcm-dma.c
+++ b/sound/soc/amd/acp-pcm-dma.c
@@ -37,12 +37,14 @@
 #define MAX_BUFFER (PLAYBACK_MAX_PERIOD_SIZE * PLAYBACK_MAX_NUM_PERIODS)
 #define MIN_BUFFER MAX_BUFFER
 
-#define ST_PLAYBACK_MAX_PERIOD_SIZE 8192
+#define ST_PLAYBACK_MAX_PERIOD_SIZE 4096
 #define ST_CAPTURE_MAX_PERIOD_SIZE  ST_PLAYBACK_MAX_PERIOD_SIZE
 #define ST_MAX_BUFFER (ST_PLAYBACK_MAX_PERIOD_SIZE * PLAYBACK_MAX_NUM_PERIODS)
 #define ST_MIN_BUFFER ST_MAX_BUFFER
 
 #define DRV_NAME "acp_audio_dma"
+bool bt_uart_enable = true;
+EXPORT_SYMBOL(bt_uart_enable);
 
 static const struct snd_pcm_hardware acp_pcm_hardware_playback = {
 	.info = SNDRV_PCM_INFO_INTERLEAVED |
@@ -130,7 +132,8 @@
 	writel(val, acp_mmio + (reg * 4));
 }
 
-/* Configure a given dma channel parameters - enable/disable,
+/*
+ * Configure a given dma channel parameters - enable/disable,
  * number of descriptors, priority
  */
 static void config_acp_dma_channel(void __iomem *acp_mmio, u8 ch_num,
@@ -149,11 +152,12 @@
 			& dscr_strt_idx),
 			acp_mmio, mmACP_DMA_DSCR_STRT_IDX_0 + ch_num);
 
-	/* program a DMA channel with the number of descriptors to be
+	/*
+	 * program a DMA channel with the number of descriptors to be
 	 * processed in the transfer
-	*/
+	 */
 	acp_reg_write(ACP_DMA_DSCR_CNT_0__DMAChDscrCnt_MASK & num_dscrs,
-		acp_mmio, mmACP_DMA_DSCR_CNT_0 + ch_num);
+		      acp_mmio, mmACP_DMA_DSCR_CNT_0 + ch_num);
 
 	/* set DMA channel priority */
 	acp_reg_write(priority_level, acp_mmio, mmACP_DMA_PRIO_0 + ch_num);
@@ -180,13 +184,15 @@
 	acp_reg_write(descr_info->xfer_val, acp_mmio, mmACP_SRBM_Targ_Idx_Data);
 }
 
-/* Initialize the DMA descriptor information for transfer between
+/*
+ * Initialize the DMA descriptor information for transfer between
  * system memory <-> ACP SRAM
  */
 static void set_acp_sysmem_dma_descriptors(void __iomem *acp_mmio,
-					u32 size, int direction, u32 pte_offset,
-					u16 ch, u32 sram_bank,
-					u16 dma_dscr_idx, u32 asic_type)
+					   u32 size, int direction,
+					   u32 pte_offset, u16 ch,
+					   u32 sram_bank, u16 dma_dscr_idx,
+					   u32 asic_type)
 {
 	u16 i;
 	acp_dma_dscr_transfer_t dmadscr[NUM_DSCRS_PER_CHANNEL];
@@ -195,58 +201,58 @@
 		dmadscr[i].xfer_val = 0;
 		if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
 			dma_dscr_idx = dma_dscr_idx + i;
-			dmadscr[i].dest = sram_bank + (i * (size/2));
+			dmadscr[i].dest = sram_bank + (i * (size / 2));
 			dmadscr[i].src = ACP_INTERNAL_APERTURE_WINDOW_0_ADDRESS
-				+ (pte_offset * SZ_4K) + (i * (size/2));
+				+ (pte_offset * SZ_4K) + (i * (size / 2));
 			switch (asic_type) {
 			case CHIP_STONEY:
 				dmadscr[i].xfer_val |=
-				(ACP_DMA_ATTRIBUTES_DAGB_GARLIC_TO_SHAREDMEM  << 16) |
+				(ACP_DMA_ATTR_DAGB_GARLIC_TO_SHAREDMEM  << 16) |
 				(size / 2);
 				break;
 			default:
 				dmadscr[i].xfer_val |=
-				(ACP_DMA_ATTRIBUTES_DAGB_ONION_TO_SHAREDMEM  << 16) |
+				(ACP_DMA_ATTR_DAGB_ONION_TO_SHAREDMEM  << 16) |
 				(size / 2);
 			}
 		} else {
 			dma_dscr_idx = dma_dscr_idx + i;
-			dmadscr[i].src = sram_bank + (i * (size/2));
+			dmadscr[i].src = sram_bank + (i * (size / 2));
 			dmadscr[i].dest =
 			ACP_INTERNAL_APERTURE_WINDOW_0_ADDRESS +
-			(pte_offset * SZ_4K) + (i * (size/2));
+			(pte_offset * SZ_4K) + (i * (size / 2));
 			switch (asic_type) {
 			case CHIP_STONEY:
 				dmadscr[i].xfer_val |=
 				BIT(22) |
-				(ACP_DMA_ATTRIBUTES_SHARED_MEM_TO_DAGB_GARLIC << 16) |
+				(ACP_DMA_ATTR_SHARED_MEM_TO_DAGB_GARLIC << 16) |
 				(size / 2);
 				break;
 			default:
 				dmadscr[i].xfer_val |=
 				BIT(22) |
-				(ACP_DMA_ATTRIBUTES_SHAREDMEM_TO_DAGB_ONION << 16) |
+				(ACP_DMA_ATTR_SHAREDMEM_TO_DAGB_ONION << 16) |
 				(size / 2);
 			}
 		}
 		config_dma_descriptor_in_sram(acp_mmio, dma_dscr_idx,
-						&dmadscr[i]);
+					      &dmadscr[i]);
 	}
 	config_acp_dma_channel(acp_mmio, ch,
-				dma_dscr_idx - 1,
-				NUM_DSCRS_PER_CHANNEL,
-				ACP_DMA_PRIORITY_LEVEL_NORMAL);
+			       dma_dscr_idx - 1,
+			       NUM_DSCRS_PER_CHANNEL,
+			       ACP_DMA_PRIORITY_LEVEL_NORMAL);
 }
 
-/* Initialize the DMA descriptor information for transfer between
+/*
+ * Initialize the DMA descriptor information for transfer between
  * ACP SRAM <-> I2S
  */
 static void set_acp_to_i2s_dma_descriptors(void __iomem *acp_mmio, u32 size,
-						int direction, u32 sram_bank,
-						u16 destination, u16 ch,
-						u16 dma_dscr_idx, u32 asic_type)
+					   int direction, u32 sram_bank,
+					   u16 destination, u16 ch,
+					   u16 dma_dscr_idx, u32 asic_type)
 {
-
 	u16 i;
 	acp_dma_dscr_transfer_t dmadscr[NUM_DSCRS_PER_CHANNEL];
 
@@ -254,7 +260,7 @@
 		dmadscr[i].xfer_val = 0;
 		if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
 			dma_dscr_idx = dma_dscr_idx + i;
-			dmadscr[i].src = sram_bank  + (i * (size/2));
+			dmadscr[i].src = sram_bank  + (i * (size / 2));
 			/* dmadscr[i].dest is unused by hardware. */
 			dmadscr[i].dest = 0;
 			dmadscr[i].xfer_val |= BIT(22) | (destination << 16) |
@@ -269,12 +275,12 @@
 				(destination << 16) | (size / 2);
 		}
 		config_dma_descriptor_in_sram(acp_mmio, dma_dscr_idx,
-						&dmadscr[i]);
+					      &dmadscr[i]);
 	}
 	/* Configure the DMA channel with the above descriptore */
 	config_acp_dma_channel(acp_mmio, ch, dma_dscr_idx - 1,
-				NUM_DSCRS_PER_CHANNEL,
-				ACP_DMA_PRIORITY_LEVEL_NORMAL);
+			       NUM_DSCRS_PER_CHANNEL,
+			       ACP_DMA_PRIORITY_LEVEL_NORMAL);
 }
 
 /* Create page table entries in ACP SRAM for the allocated memory */
@@ -291,7 +297,7 @@
 	for (page_idx = 0; page_idx < (num_of_pages); page_idx++) {
 		/* Load the low address of page int ACP SRAM through SRBM */
 		acp_reg_write((offset + (page_idx * 8)),
-			acp_mmio, mmACP_SRBM_Targ_Idx_Addr);
+			      acp_mmio, mmACP_SRBM_Targ_Idx_Addr);
 		addr = page_to_phys(pg);
 
 		low = lower_32_bits(addr);
@@ -301,7 +307,7 @@
 
 		/* Load the High address of page int ACP SRAM through SRBM */
 		acp_reg_write((offset + (page_idx * 8) + 4),
-			acp_mmio, mmACP_SRBM_Targ_Idx_Addr);
+			      acp_mmio, mmACP_SRBM_Targ_Idx_Addr);
 
 		/* page enable in ACP */
 		high |= BIT(31);
@@ -313,59 +319,25 @@
 }
 
 static void config_acp_dma(void __iomem *acp_mmio,
-			struct audio_substream_data *audio_config,
-			u32 asic_type)
+			   struct audio_substream_data *rtd,
+			   u32 asic_type)
 {
-	u32 pte_offset, sram_bank;
-	u16 ch1, ch2, destination, dma_dscr_idx;
-
-	if (audio_config->direction == SNDRV_PCM_STREAM_PLAYBACK) {
-		pte_offset = ACP_PLAYBACK_PTE_OFFSET;
-		ch1 = SYSRAM_TO_ACP_CH_NUM;
-		ch2 = ACP_TO_I2S_DMA_CH_NUM;
-		sram_bank = ACP_SHARED_RAM_BANK_1_ADDRESS;
-		destination = TO_ACP_I2S_1;
-
-	} else {
-		pte_offset = ACP_CAPTURE_PTE_OFFSET;
-		ch1 = SYSRAM_TO_ACP_CH_NUM;
-		ch2 = ACP_TO_I2S_DMA_CH_NUM;
-		switch (asic_type) {
-		case CHIP_STONEY:
-			sram_bank = ACP_SHARED_RAM_BANK_3_ADDRESS;
-			break;
-		default:
-			sram_bank = ACP_SHARED_RAM_BANK_5_ADDRESS;
-		}
-		destination = FROM_ACP_I2S_1;
-	}
-
-	acp_pte_config(acp_mmio, audio_config->pg, audio_config->num_of_pages,
-			pte_offset);
-	if (audio_config->direction == SNDRV_PCM_STREAM_PLAYBACK)
-		dma_dscr_idx = PLAYBACK_START_DMA_DESCR_CH12;
-	else
-		dma_dscr_idx = CAPTURE_START_DMA_DESCR_CH14;
-
+	acp_pte_config(acp_mmio, rtd->pg, rtd->num_of_pages,
+		       rtd->pte_offset);
 	/* Configure System memory <-> ACP SRAM DMA descriptors */
-	set_acp_sysmem_dma_descriptors(acp_mmio, audio_config->size,
-				       audio_config->direction, pte_offset,
-					ch1, sram_bank, dma_dscr_idx, asic_type);
-
-	if (audio_config->direction == SNDRV_PCM_STREAM_PLAYBACK)
-		dma_dscr_idx = PLAYBACK_START_DMA_DESCR_CH13;
-	else
-		dma_dscr_idx = CAPTURE_START_DMA_DESCR_CH15;
+	set_acp_sysmem_dma_descriptors(acp_mmio, rtd->size,
+				       rtd->direction, rtd->pte_offset,
+				       rtd->ch1, rtd->sram_bank,
+				       rtd->dma_dscr_idx_1, asic_type);
 	/* Configure ACP SRAM <-> I2S DMA descriptors */
-	set_acp_to_i2s_dma_descriptors(acp_mmio, audio_config->size,
-					audio_config->direction, sram_bank,
-					destination, ch2, dma_dscr_idx,
-					asic_type);
+	set_acp_to_i2s_dma_descriptors(acp_mmio, rtd->size,
+				       rtd->direction, rtd->sram_bank,
+				       rtd->destination, rtd->ch2,
+				       rtd->dma_dscr_idx_2, asic_type);
 }
 
 /* Start a given DMA channel transfer */
-static void acp_dma_start(void __iomem *acp_mmio,
-			 u16 ch_num, bool is_circular)
+static void acp_dma_start(void __iomem *acp_mmio, u16 ch_num)
 {
 	u32 dma_ctrl;
 
@@ -375,7 +347,8 @@
 	/* Invalidating the DAGB cache */
 	acp_reg_write(1, acp_mmio, mmACP_DAGB_ATU_CTRL);
 
-	/* configure the DMA channel and start the DMA transfer
+	/*
+	 * configure the DMA channel and start the DMA transfer
 	 * set dmachrun bit to start the transfer and enable the
 	 * interrupt on completion of the dma transfer
 	 */
@@ -385,6 +358,9 @@
 	case ACP_TO_I2S_DMA_CH_NUM:
 	case ACP_TO_SYSRAM_CH_NUM:
 	case I2S_TO_ACP_DMA_CH_NUM:
+	case ACP_TO_I2S_DMA_BT_INSTANCE_CH_NUM:
+	case ACP_TO_SYSRAM_BT_INSTANCE_CH_NUM:
+	case I2S_TO_ACP_DMA_BT_INSTANCE_CH_NUM:
 		dma_ctrl |= ACP_DMA_CNTL_0__DMAChIOCEn_MASK;
 		break;
 	default:
@@ -392,11 +368,8 @@
 		break;
 	}
 
-	/* enable  for ACP SRAM to/from I2S DMA channel */
-	if (is_circular == true)
-		dma_ctrl |= ACP_DMA_CNTL_0__Circular_DMA_En_MASK;
-	else
-		dma_ctrl &= ~ACP_DMA_CNTL_0__Circular_DMA_En_MASK;
+	/* circular for both DMA channel */
+	dma_ctrl |= ACP_DMA_CNTL_0__Circular_DMA_En_MASK;
 
 	acp_reg_write(dma_ctrl, acp_mmio, mmACP_DMA_CNTL_0 + ch_num);
 }
@@ -410,9 +383,10 @@
 
 	dma_ctrl = acp_reg_read(acp_mmio, mmACP_DMA_CNTL_0 + ch_num);
 
-	/* clear the dma control register fields before writing zero
+	/*
+	 * clear the dma control register fields before writing zero
 	 * in reset bit
-	*/
+	 */
 	dma_ctrl &= ~ACP_DMA_CNTL_0__DMAChRun_MASK;
 	dma_ctrl &= ~ACP_DMA_CNTL_0__DMAChIOCEn_MASK;
 
@@ -420,9 +394,10 @@
 	dma_ch_sts = acp_reg_read(acp_mmio, mmACP_DMA_CH_STS);
 
 	if (dma_ch_sts & BIT(ch_num)) {
-		/* set the reset bit for this channel to stop the dma
-		*  transfer
-		*/
+		/*
+		 * set the reset bit for this channel to stop the dma
+		 *  transfer
+		 */
 		dma_ctrl |= ACP_DMA_CNTL_0__DMAChRst_MASK;
 		acp_reg_write(dma_ctrl, acp_mmio, mmACP_DMA_CNTL_0 + ch_num);
 	}
@@ -431,13 +406,14 @@
 	while (true) {
 		dma_ch_sts = acp_reg_read(acp_mmio, mmACP_DMA_CH_STS);
 		if (!(dma_ch_sts & BIT(ch_num))) {
-			/* clear the reset flag after successfully stopping
-			* the dma transfer and break from the loop
-			*/
+			/*
+			 * clear the reset flag after successfully stopping
+			 * the dma transfer and break from the loop
+			 */
 			dma_ctrl &= ~ACP_DMA_CNTL_0__DMAChRst_MASK;
 
 			acp_reg_write(dma_ctrl, acp_mmio, mmACP_DMA_CNTL_0
-								+ ch_num);
+				      + ch_num);
 			break;
 		}
 		if (--count == 0) {
@@ -450,7 +426,7 @@
 }
 
 static void acp_set_sram_bank_state(void __iomem *acp_mmio, u16 bank,
-					bool power_on)
+				    bool power_on)
 {
 	u32 val, req_reg, sts_reg, sts_reg_mask;
 	u32 loops = 1000;
@@ -530,7 +506,7 @@
 
 	while (true) {
 		val = acp_reg_read(acp_mmio, mmACP_STATUS);
-		if (val & (u32) 0x1)
+		if (val & (u32)0x1)
 			break;
 		if (--count == 0) {
 			pr_err("Failed to reset ACP\n");
@@ -544,13 +520,20 @@
 	val &= ~ACP_SOFT_RESET__SoftResetAud_MASK;
 	acp_reg_write(val, acp_mmio, mmACP_SOFT_RESET);
 
+	/* For BT instance change pins from UART to BT */
+	if (!bt_uart_enable) {
+		val = acp_reg_read(acp_mmio, mmACP_BT_UART_PAD_SEL);
+		val |= ACP_BT_UART_PAD_SELECT_MASK;
+		acp_reg_write(val, acp_mmio, mmACP_BT_UART_PAD_SEL);
+	}
+
 	/* initiailize Onion control DAGB register */
 	acp_reg_write(ACP_ONION_CNTL_DEFAULT, acp_mmio,
-			mmACP_AXI2DAGB_ONION_CNTL);
+		      mmACP_AXI2DAGB_ONION_CNTL);
 
 	/* initiailize Garlic control DAGB registers */
 	acp_reg_write(ACP_GARLIC_CNTL_DEFAULT, acp_mmio,
-			mmACP_AXI2DAGB_GARLIC_CNTL);
+		      mmACP_AXI2DAGB_GARLIC_CNTL);
 
 	sram_pte_offset = ACP_DAGB_GRP_SRAM_BASE_ADDRESS |
 			ACP_DAGB_BASE_ADDR_GRP_1__AXI2DAGBSnoopSel_MASK |
@@ -558,17 +541,18 @@
 			ACP_DAGB_BASE_ADDR_GRP_1__AXI2DAGBGrpEnable_MASK;
 	acp_reg_write(sram_pte_offset,  acp_mmio, mmACP_DAGB_BASE_ADDR_GRP_1);
 	acp_reg_write(ACP_PAGE_SIZE_4K_ENABLE, acp_mmio,
-			mmACP_DAGB_PAGE_SIZE_GRP_1);
+		      mmACP_DAGB_PAGE_SIZE_GRP_1);
 
 	acp_reg_write(ACP_SRAM_BASE_ADDRESS, acp_mmio,
-			mmACP_DMA_DESC_BASE_ADDR);
+		      mmACP_DMA_DESC_BASE_ADDR);
 
 	/* Num of descriptiors in SRAM 0x4, means 256 descriptors;(64 * 4) */
 	acp_reg_write(0x4, acp_mmio, mmACP_DMA_DESC_MAX_NUM_DSCR);
 	acp_reg_write(ACP_EXTERNAL_INTR_CNTL__DMAIOCMask_MASK,
-		acp_mmio, mmACP_EXTERNAL_INTR_CNTL);
+		      acp_mmio, mmACP_EXTERNAL_INTR_CNTL);
 
-       /* When ACP_TILE_P1 is turned on, all SRAM banks get turned on.
+       /*
+	* When ACP_TILE_P1 is turned on, all SRAM banks get turned on.
 	* Now, turn off all of them. This can't be done in 'poweron' of
 	* ACP pm domain, as this requires ACP to be initialized.
 	* For Stoney, Memory gating is disabled,i.e SRAM Banks
@@ -606,7 +590,7 @@
 		}
 		udelay(100);
 	}
-	/** Disable ACP clock */
+	/* Disable ACP clock */
 	val = acp_reg_read(acp_mmio, mmACP_CONTROL);
 	val &= ~ACP_CONTROL__ClkEn_MASK;
 	acp_reg_write(val, acp_mmio, mmACP_CONTROL);
@@ -615,7 +599,7 @@
 
 	while (true) {
 		val = acp_reg_read(acp_mmio, mmACP_STATUS);
-		if (!(val & (u32) 0x1))
+		if (!(val & (u32)0x1))
 			break;
 		if (--count == 0) {
 			pr_err("Failed to reset ACP\n");
@@ -629,7 +613,6 @@
 /* ACP DMA irq handler routine for playback, capture usecases */
 static irqreturn_t dma_irq_handler(int irq, void *arg)
 {
-	u16 dscr_idx;
 	u32 intr_flag, ext_intr_status;
 	struct audio_drv_data *irq_data;
 	void __iomem *acp_mmio;
@@ -646,41 +629,45 @@
 
 	if ((intr_flag & BIT(ACP_TO_I2S_DMA_CH_NUM)) != 0) {
 		valid_irq = true;
-		if (acp_reg_read(acp_mmio, mmACP_DMA_CUR_DSCR_13) ==
-				PLAYBACK_START_DMA_DESCR_CH13)
-			dscr_idx = PLAYBACK_END_DMA_DESCR_CH12;
-		else
-			dscr_idx = PLAYBACK_START_DMA_DESCR_CH12;
-		config_acp_dma_channel(acp_mmio, SYSRAM_TO_ACP_CH_NUM, dscr_idx,
-				       1, 0);
-		acp_dma_start(acp_mmio, SYSRAM_TO_ACP_CH_NUM, false);
-
 		snd_pcm_period_elapsed(irq_data->play_i2ssp_stream);
-
 		acp_reg_write((intr_flag & BIT(ACP_TO_I2S_DMA_CH_NUM)) << 16,
-				acp_mmio, mmACP_EXTERNAL_INTR_STAT);
+			      acp_mmio, mmACP_EXTERNAL_INTR_STAT);
+	}
+
+	if ((intr_flag & BIT(ACP_TO_I2S_DMA_BT_INSTANCE_CH_NUM)) != 0) {
+		valid_irq = true;
+		snd_pcm_period_elapsed(irq_data->play_i2sbt_stream);
+		acp_reg_write((intr_flag &
+			      BIT(ACP_TO_I2S_DMA_BT_INSTANCE_CH_NUM)) << 16,
+			      acp_mmio, mmACP_EXTERNAL_INTR_STAT);
 	}
 
 	if ((intr_flag & BIT(I2S_TO_ACP_DMA_CH_NUM)) != 0) {
 		valid_irq = true;
-		if (acp_reg_read(acp_mmio, mmACP_DMA_CUR_DSCR_15) ==
-				CAPTURE_START_DMA_DESCR_CH15)
-			dscr_idx = CAPTURE_END_DMA_DESCR_CH14;
-		else
-			dscr_idx = CAPTURE_START_DMA_DESCR_CH14;
-		config_acp_dma_channel(acp_mmio, ACP_TO_SYSRAM_CH_NUM, dscr_idx,
-				       1, 0);
-		acp_dma_start(acp_mmio, ACP_TO_SYSRAM_CH_NUM, false);
-
+		snd_pcm_period_elapsed(irq_data->capture_i2ssp_stream);
 		acp_reg_write((intr_flag & BIT(I2S_TO_ACP_DMA_CH_NUM)) << 16,
-				acp_mmio, mmACP_EXTERNAL_INTR_STAT);
+			      acp_mmio, mmACP_EXTERNAL_INTR_STAT);
 	}
 
 	if ((intr_flag & BIT(ACP_TO_SYSRAM_CH_NUM)) != 0) {
 		valid_irq = true;
-		snd_pcm_period_elapsed(irq_data->capture_i2ssp_stream);
 		acp_reg_write((intr_flag & BIT(ACP_TO_SYSRAM_CH_NUM)) << 16,
-				acp_mmio, mmACP_EXTERNAL_INTR_STAT);
+			      acp_mmio, mmACP_EXTERNAL_INTR_STAT);
+	}
+
+	if ((intr_flag & BIT(I2S_TO_ACP_DMA_BT_INSTANCE_CH_NUM)) != 0) {
+		valid_irq = true;
+		snd_pcm_period_elapsed(irq_data->capture_i2sbt_stream);
+		acp_reg_write((intr_flag &
+			      BIT(I2S_TO_ACP_DMA_BT_INSTANCE_CH_NUM)) << 16,
+			      acp_mmio, mmACP_EXTERNAL_INTR_STAT);
+	}
+
+	if ((intr_flag & BIT(ACP_TO_SYSRAM_BT_INSTANCE_CH_NUM)) != 0) {
+		valid_irq = true;
+		acp_reg_write((intr_flag &
+			      BIT(ACP_TO_SYSRAM_BT_INSTANCE_CH_NUM)) << 16,
+			      acp_mmio, mmACP_EXTERNAL_INTR_STAT);
 	}
 
 	if (valid_irq)
@@ -695,11 +682,12 @@
 	int ret = 0;
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_soc_pcm_runtime *prtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(prtd, DRV_NAME);
+	struct snd_soc_component *component = snd_soc_rtdcom_lookup(prtd,
+								    DRV_NAME);
 	struct audio_drv_data *intr_data = dev_get_drvdata(component->dev);
 	struct audio_substream_data *adata =
 		kzalloc(sizeof(struct audio_substream_data), GFP_KERNEL);
-	if (adata == NULL)
+	if (!adata)
 		return -ENOMEM;
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
@@ -731,17 +719,19 @@
 	adata->acp_mmio = intr_data->acp_mmio;
 	runtime->private_data = adata;
 
-	/* Enable ACP irq, when neither playback or capture streams are
+	/*
+	 * Enable ACP irq, when neither playback or capture streams are
 	 * active by the time when a new stream is being opened.
 	 * This enablement is not required for another stream, if current
 	 * stream is not closed
-	*/
-	if (!intr_data->play_i2ssp_stream && !intr_data->capture_i2ssp_stream)
+	 */
+	if (!intr_data->play_i2ssp_stream && !intr_data->capture_i2ssp_stream &&
+	    !intr_data->play_i2sbt_stream && !intr_data->capture_i2sbt_stream)
 		acp_reg_write(1, adata->acp_mmio, mmACP_EXTERNAL_INTR_ENB);
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-		intr_data->play_i2ssp_stream = substream;
-		/* For Stoney, Memory gating is disabled,i.e SRAM Banks
+		/*
+		 * For Stoney, Memory gating is disabled,i.e SRAM Banks
 		 * won't be turned off. The default state for SRAM banks is ON.
 		 * Setting SRAM bank state code skipped for STONEY platform.
 		 */
@@ -751,7 +741,6 @@
 							bank, true);
 		}
 	} else {
-		intr_data->capture_i2ssp_stream = substream;
 		if (intr_data->asic_type != CHIP_STONEY) {
 			for (bank = 5; bank <= 8; bank++)
 				acp_set_sram_bank_state(intr_data->acp_mmio,
@@ -772,8 +761,11 @@
 	struct snd_pcm_runtime *runtime;
 	struct audio_substream_data *rtd;
 	struct snd_soc_pcm_runtime *prtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(prtd, DRV_NAME);
+	struct snd_soc_component *component = snd_soc_rtdcom_lookup(prtd,
+								    DRV_NAME);
 	struct audio_drv_data *adata = dev_get_drvdata(component->dev);
+	struct snd_soc_card *card = prtd->card;
+	struct acp_platform_info *pinfo = snd_soc_card_get_drvdata(card);
 
 	runtime = substream->runtime;
 	rtd = runtime->private_data;
@@ -781,14 +773,111 @@
 	if (WARN_ON(!rtd))
 		return -EINVAL;
 
+	rtd->i2s_instance = pinfo->i2s_instance;
 	if (adata->asic_type == CHIP_STONEY) {
-		val = acp_reg_read(adata->acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN);
-		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-			val |= ACP_I2S_SP_16BIT_RESOLUTION_EN;
-		else
-			val |= ACP_I2S_MIC_16BIT_RESOLUTION_EN;
-		acp_reg_write(val, adata->acp_mmio, mmACP_I2S_16BIT_RESOLUTION_EN);
+		val = acp_reg_read(adata->acp_mmio,
+				   mmACP_I2S_16BIT_RESOLUTION_EN);
+		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+			switch (rtd->i2s_instance) {
+			case I2S_BT_INSTANCE:
+				val |= ACP_I2S_BT_16BIT_RESOLUTION_EN;
+				break;
+			case I2S_SP_INSTANCE:
+			default:
+				val |= ACP_I2S_SP_16BIT_RESOLUTION_EN;
+			}
+		} else {
+			switch (rtd->i2s_instance) {
+			case I2S_BT_INSTANCE:
+				val |= ACP_I2S_BT_16BIT_RESOLUTION_EN;
+				break;
+			case I2S_SP_INSTANCE:
+			default:
+				val |= ACP_I2S_MIC_16BIT_RESOLUTION_EN;
+			}
+		}
+		acp_reg_write(val, adata->acp_mmio,
+			      mmACP_I2S_16BIT_RESOLUTION_EN);
 	}
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		switch (rtd->i2s_instance) {
+		case I2S_BT_INSTANCE:
+			rtd->pte_offset = ACP_ST_BT_PLAYBACK_PTE_OFFSET;
+			rtd->ch1 = SYSRAM_TO_ACP_BT_INSTANCE_CH_NUM;
+			rtd->ch2 = ACP_TO_I2S_DMA_BT_INSTANCE_CH_NUM;
+			rtd->sram_bank = ACP_SRAM_BANK_3_ADDRESS;
+			rtd->destination = TO_BLUETOOTH;
+			rtd->dma_dscr_idx_1 = PLAYBACK_START_DMA_DESCR_CH8;
+			rtd->dma_dscr_idx_2 = PLAYBACK_START_DMA_DESCR_CH9;
+			rtd->byte_cnt_high_reg_offset =
+					mmACP_I2S_BT_TRANSMIT_BYTE_CNT_HIGH;
+			rtd->byte_cnt_low_reg_offset =
+					mmACP_I2S_BT_TRANSMIT_BYTE_CNT_LOW;
+			adata->play_i2sbt_stream = substream;
+			break;
+		case I2S_SP_INSTANCE:
+		default:
+			switch (adata->asic_type) {
+			case CHIP_STONEY:
+				rtd->pte_offset = ACP_ST_PLAYBACK_PTE_OFFSET;
+				break;
+			default:
+				rtd->pte_offset = ACP_PLAYBACK_PTE_OFFSET;
+			}
+			rtd->ch1 = SYSRAM_TO_ACP_CH_NUM;
+			rtd->ch2 = ACP_TO_I2S_DMA_CH_NUM;
+			rtd->sram_bank = ACP_SRAM_BANK_1_ADDRESS;
+			rtd->destination = TO_ACP_I2S_1;
+			rtd->dma_dscr_idx_1 = PLAYBACK_START_DMA_DESCR_CH12;
+			rtd->dma_dscr_idx_2 = PLAYBACK_START_DMA_DESCR_CH13;
+			rtd->byte_cnt_high_reg_offset =
+					mmACP_I2S_TRANSMIT_BYTE_CNT_HIGH;
+			rtd->byte_cnt_low_reg_offset =
+					mmACP_I2S_TRANSMIT_BYTE_CNT_LOW;
+			adata->play_i2ssp_stream = substream;
+		}
+	} else {
+		switch (rtd->i2s_instance) {
+		case I2S_BT_INSTANCE:
+			rtd->pte_offset = ACP_ST_BT_CAPTURE_PTE_OFFSET;
+			rtd->ch1 = ACP_TO_SYSRAM_BT_INSTANCE_CH_NUM;
+			rtd->ch2 = I2S_TO_ACP_DMA_BT_INSTANCE_CH_NUM;
+			rtd->sram_bank = ACP_SRAM_BANK_4_ADDRESS;
+			rtd->destination = FROM_BLUETOOTH;
+			rtd->dma_dscr_idx_1 = CAPTURE_START_DMA_DESCR_CH10;
+			rtd->dma_dscr_idx_2 = CAPTURE_START_DMA_DESCR_CH11;
+			rtd->byte_cnt_high_reg_offset =
+					mmACP_I2S_BT_RECEIVE_BYTE_CNT_HIGH;
+			rtd->byte_cnt_low_reg_offset =
+					mmACP_I2S_BT_RECEIVE_BYTE_CNT_LOW;
+			adata->capture_i2sbt_stream = substream;
+			break;
+		case I2S_SP_INSTANCE:
+		default:
+			rtd->pte_offset = ACP_CAPTURE_PTE_OFFSET;
+			rtd->ch1 = ACP_TO_SYSRAM_CH_NUM;
+			rtd->ch2 = I2S_TO_ACP_DMA_CH_NUM;
+			switch (adata->asic_type) {
+			case CHIP_STONEY:
+				rtd->pte_offset = ACP_ST_CAPTURE_PTE_OFFSET;
+				rtd->sram_bank = ACP_SRAM_BANK_2_ADDRESS;
+				break;
+			default:
+				rtd->pte_offset = ACP_CAPTURE_PTE_OFFSET;
+				rtd->sram_bank = ACP_SRAM_BANK_5_ADDRESS;
+			}
+			rtd->destination = FROM_ACP_I2S_1;
+			rtd->dma_dscr_idx_1 = CAPTURE_START_DMA_DESCR_CH14;
+			rtd->dma_dscr_idx_2 = CAPTURE_START_DMA_DESCR_CH15;
+			rtd->byte_cnt_high_reg_offset =
+					mmACP_I2S_RECEIVED_BYTE_CNT_HIGH;
+			rtd->byte_cnt_low_reg_offset =
+					mmACP_I2S_RECEIVED_BYTE_CNT_LOW;
+			adata->capture_i2ssp_stream = substream;
+		}
+	}
+
 	size = params_buffer_bytes(params);
 	status = snd_pcm_lib_malloc_pages(substream, size);
 	if (status < 0)
@@ -797,7 +886,7 @@
 	memset(substream->runtime->dma_area, 0, params_buffer_bytes(params));
 	pg = virt_to_page(substream->dma_buffer.area);
 
-	if (pg != NULL) {
+	if (pg) {
 		acp_set_sram_bank_state(rtd->acp_mmio, 0, true);
 		/* Save for runtime private data */
 		rtd->pg = pg;
@@ -822,26 +911,15 @@
 	return snd_pcm_lib_free_pages(substream);
 }
 
-static u64 acp_get_byte_count(void __iomem *acp_mmio, int stream)
+static u64 acp_get_byte_count(struct audio_substream_data *rtd)
 {
-	union acp_dma_count playback_dma_count;
-	union acp_dma_count capture_dma_count;
-	u64 bytescount = 0;
+	union acp_dma_count byte_count;
 
-	if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
-		playback_dma_count.bcount.high = acp_reg_read(acp_mmio,
-					mmACP_I2S_TRANSMIT_BYTE_CNT_HIGH);
-		playback_dma_count.bcount.low  = acp_reg_read(acp_mmio,
-					mmACP_I2S_TRANSMIT_BYTE_CNT_LOW);
-		bytescount = playback_dma_count.bytescount;
-	} else {
-		capture_dma_count.bcount.high = acp_reg_read(acp_mmio,
-					mmACP_I2S_RECEIVED_BYTE_CNT_HIGH);
-		capture_dma_count.bcount.low  = acp_reg_read(acp_mmio,
-					mmACP_I2S_RECEIVED_BYTE_CNT_LOW);
-		bytescount = capture_dma_count.bytescount;
-	}
-	return bytescount;
+	byte_count.bcount.high = acp_reg_read(rtd->acp_mmio,
+					      rtd->byte_cnt_high_reg_offset);
+	byte_count.bcount.low  = acp_reg_read(rtd->acp_mmio,
+					      rtd->byte_cnt_low_reg_offset);
+	return byte_count.bytescount;
 }
 
 static snd_pcm_uframes_t acp_dma_pointer(struct snd_pcm_substream *substream)
@@ -857,15 +935,10 @@
 		return -EINVAL;
 
 	buffersize = frames_to_bytes(runtime, runtime->buffer_size);
-	bytescount = acp_get_byte_count(rtd->acp_mmio, substream->stream);
+	bytescount = acp_get_byte_count(rtd);
 
-	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-		if (bytescount > rtd->i2ssp_renderbytescount)
-			bytescount = bytescount - rtd->i2ssp_renderbytescount;
-	} else {
-		if (bytescount > rtd->i2ssp_capturebytescount)
-			bytescount = bytescount - rtd->i2ssp_capturebytescount;
-	}
+	if (bytescount > rtd->bytescount)
+		bytescount -= rtd->bytescount;
 	pos = do_div(bytescount, buffersize);
 	return bytes_to_frames(runtime, pos);
 }
@@ -883,34 +956,25 @@
 
 	if (!rtd)
 		return -EINVAL;
-	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-		config_acp_dma_channel(rtd->acp_mmio, SYSRAM_TO_ACP_CH_NUM,
-					PLAYBACK_START_DMA_DESCR_CH12,
-					NUM_DSCRS_PER_CHANNEL, 0);
-		config_acp_dma_channel(rtd->acp_mmio, ACP_TO_I2S_DMA_CH_NUM,
-					PLAYBACK_START_DMA_DESCR_CH13,
-					NUM_DSCRS_PER_CHANNEL, 0);
-	} else {
-		config_acp_dma_channel(rtd->acp_mmio, ACP_TO_SYSRAM_CH_NUM,
-					CAPTURE_START_DMA_DESCR_CH14,
-					NUM_DSCRS_PER_CHANNEL, 0);
-		config_acp_dma_channel(rtd->acp_mmio, I2S_TO_ACP_DMA_CH_NUM,
-					CAPTURE_START_DMA_DESCR_CH15,
-					NUM_DSCRS_PER_CHANNEL, 0);
-	}
+
+	config_acp_dma_channel(rtd->acp_mmio,
+			       rtd->ch1,
+			       rtd->dma_dscr_idx_1,
+			       NUM_DSCRS_PER_CHANNEL, 0);
+	config_acp_dma_channel(rtd->acp_mmio,
+			       rtd->ch2,
+			       rtd->dma_dscr_idx_2,
+			       NUM_DSCRS_PER_CHANNEL, 0);
 	return 0;
 }
 
 static int acp_dma_trigger(struct snd_pcm_substream *substream, int cmd)
 {
 	int ret;
-	u32 loops = 4000;
 	u64 bytescount = 0;
 
 	struct snd_pcm_runtime *runtime = substream->runtime;
-	struct snd_soc_pcm_runtime *prtd = substream->private_data;
 	struct audio_substream_data *rtd = runtime->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(prtd, DRV_NAME);
 
 	if (!rtd)
 		return -EINVAL;
@@ -918,59 +982,40 @@
 	case SNDRV_PCM_TRIGGER_START:
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
 	case SNDRV_PCM_TRIGGER_RESUME:
-		bytescount = acp_get_byte_count(rtd->acp_mmio,
-						substream->stream);
+		bytescount = acp_get_byte_count(rtd);
+		if (rtd->bytescount == 0)
+			rtd->bytescount = bytescount;
 		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-			if (rtd->i2ssp_renderbytescount == 0)
-				rtd->i2ssp_renderbytescount = bytescount;
-			acp_dma_start(rtd->acp_mmio,
-						SYSRAM_TO_ACP_CH_NUM, false);
-			while (acp_reg_read(rtd->acp_mmio, mmACP_DMA_CH_STS) &
-						BIT(SYSRAM_TO_ACP_CH_NUM)) {
-				if (!loops--) {
-					dev_err(component->dev,
-						"acp dma start timeout\n");
-					return -ETIMEDOUT;
-				}
-				cpu_relax();
-			}
-
-			acp_dma_start(rtd->acp_mmio,
-					ACP_TO_I2S_DMA_CH_NUM, true);
-
+			acp_dma_start(rtd->acp_mmio, rtd->ch1);
+			acp_dma_start(rtd->acp_mmio, rtd->ch2);
 		} else {
-			if (rtd->i2ssp_capturebytescount == 0)
-				rtd->i2ssp_capturebytescount = bytescount;
-			acp_dma_start(rtd->acp_mmio,
-					    I2S_TO_ACP_DMA_CH_NUM, true);
+			acp_dma_start(rtd->acp_mmio, rtd->ch2);
+			acp_dma_start(rtd->acp_mmio, rtd->ch1);
 		}
 		ret = 0;
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
 	case SNDRV_PCM_TRIGGER_SUSPEND:
-		/* Need to stop only circular DMA channels :
-		 * ACP_TO_I2S_DMA_CH_NUM / I2S_TO_ACP_DMA_CH_NUM. Non-circular
-		 * channels will stopped automatically after its transfer
-		 * completes : SYSRAM_TO_ACP_CH_NUM / ACP_TO_SYSRAM_CH_NUM
+		/* For playback, non circular dma should be stopped first
+		 * i.e Sysram to acp dma transfer channel(rtd->ch1) should be
+		 * stopped before stopping cirular dma which is acp sram to i2s
+		 * fifo dma transfer channel(rtd->ch2). Where as in Capture
+		 * scenario, i2s fifo to acp sram dma channel(rtd->ch2) stopped
+		 * first before stopping acp sram to sysram which is circular
+		 * dma(rtd->ch1).
 		 */
 		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-			ret = acp_dma_stop(rtd->acp_mmio,
-						SYSRAM_TO_ACP_CH_NUM);
-			ret = acp_dma_stop(rtd->acp_mmio,
-					ACP_TO_I2S_DMA_CH_NUM);
-			rtd->i2ssp_renderbytescount = 0;
+			acp_dma_stop(rtd->acp_mmio, rtd->ch1);
+			ret =  acp_dma_stop(rtd->acp_mmio, rtd->ch2);
 		} else {
-			ret = acp_dma_stop(rtd->acp_mmio,
-					I2S_TO_ACP_DMA_CH_NUM);
-			ret = acp_dma_stop(rtd->acp_mmio,
-						ACP_TO_SYSRAM_CH_NUM);
-			rtd->i2ssp_capturebytescount = 0;
+			acp_dma_stop(rtd->acp_mmio, rtd->ch2);
+			ret = acp_dma_stop(rtd->acp_mmio, rtd->ch1);
 		}
+		rtd->bytescount = 0;
 		break;
 	default:
 		ret = -EINVAL;
-
 	}
 	return ret;
 }
@@ -978,26 +1023,27 @@
 static int acp_dma_new(struct snd_soc_pcm_runtime *rtd)
 {
 	int ret;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, DRV_NAME);
+	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd,
+								    DRV_NAME);
 	struct audio_drv_data *adata = dev_get_drvdata(component->dev);
 
 	switch (adata->asic_type) {
 	case CHIP_STONEY:
 		ret = snd_pcm_lib_preallocate_pages_for_all(rtd->pcm,
-							SNDRV_DMA_TYPE_DEV,
-							NULL, ST_MIN_BUFFER,
-							ST_MAX_BUFFER);
+							    SNDRV_DMA_TYPE_DEV,
+							    NULL, ST_MIN_BUFFER,
+							    ST_MAX_BUFFER);
 		break;
 	default:
 		ret = snd_pcm_lib_preallocate_pages_for_all(rtd->pcm,
-							SNDRV_DMA_TYPE_DEV,
-							NULL, MIN_BUFFER,
-							MAX_BUFFER);
+							    SNDRV_DMA_TYPE_DEV,
+							    NULL, MIN_BUFFER,
+							    MAX_BUFFER);
 		break;
 	}
 	if (ret < 0)
 		dev_err(component->dev,
-				"buffer preallocation failer error:%d\n", ret);
+			"buffer preallocation failure error:%d\n", ret);
 	return ret;
 }
 
@@ -1007,38 +1053,55 @@
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct audio_substream_data *rtd = runtime->private_data;
 	struct snd_soc_pcm_runtime *prtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(prtd, DRV_NAME);
+	struct snd_soc_component *component = snd_soc_rtdcom_lookup(prtd,
+								    DRV_NAME);
 	struct audio_drv_data *adata = dev_get_drvdata(component->dev);
 
-	kfree(rtd);
-
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-		adata->play_i2ssp_stream = NULL;
-		/* For Stoney, Memory gating is disabled,i.e SRAM Banks
-		 * won't be turned off. The default state for SRAM banks is ON.
-		 * Setting SRAM bank state code skipped for STONEY platform.
-		 * added condition checks for Carrizo platform only
-		 */
-		if (adata->asic_type != CHIP_STONEY) {
-			for (bank = 1; bank <= 4; bank++)
-				acp_set_sram_bank_state(adata->acp_mmio, bank,
-				false);
+		switch (rtd->i2s_instance) {
+		case I2S_BT_INSTANCE:
+			adata->play_i2sbt_stream = NULL;
+			break;
+		case I2S_SP_INSTANCE:
+		default:
+			adata->play_i2ssp_stream = NULL;
+			/*
+			 * For Stoney, Memory gating is disabled,i.e SRAM Banks
+			 * won't be turned off. The default state for SRAM banks
+			 * is ON.Setting SRAM bank state code skipped for STONEY
+			 * platform. Added condition checks for Carrizo platform
+			 * only.
+			 */
+			if (adata->asic_type != CHIP_STONEY) {
+				for (bank = 1; bank <= 4; bank++)
+					acp_set_sram_bank_state(adata->acp_mmio,
+								bank, false);
+			}
 		}
 	} else  {
-		adata->capture_i2ssp_stream = NULL;
-		if (adata->asic_type != CHIP_STONEY) {
-			for (bank = 5; bank <= 8; bank++)
-				acp_set_sram_bank_state(adata->acp_mmio, bank,
-						     false);
+		switch (rtd->i2s_instance) {
+		case I2S_BT_INSTANCE:
+			adata->capture_i2sbt_stream = NULL;
+			break;
+		case I2S_SP_INSTANCE:
+		default:
+			adata->capture_i2ssp_stream = NULL;
+			if (adata->asic_type != CHIP_STONEY) {
+				for (bank = 5; bank <= 8; bank++)
+					acp_set_sram_bank_state(adata->acp_mmio,
+								bank, false);
+			}
 		}
 	}
 
-	/* Disable ACP irq, when the current stream is being closed and
+	/*
+	 * Disable ACP irq, when the current stream is being closed and
 	 * another stream is also not active.
-	*/
-	if (!adata->play_i2ssp_stream && !adata->capture_i2ssp_stream)
+	 */
+	if (!adata->play_i2ssp_stream && !adata->capture_i2ssp_stream &&
+	    !adata->play_i2sbt_stream && !adata->capture_i2sbt_stream)
 		acp_reg_write(0, adata->acp_mmio, mmACP_EXTERNAL_INTR_ENB);
-
+	kfree(rtd);
 	return 0;
 }
 
@@ -1054,7 +1117,7 @@
 	.prepare = acp_dma_prepare,
 };
 
-static struct snd_soc_component_driver acp_asoc_platform = {
+static const struct snd_soc_component_driver acp_asoc_platform = {
 	.name = DRV_NAME,
 	.ops = &acp_dma_ops,
 	.pcm_new = acp_dma_new,
@@ -1073,8 +1136,8 @@
 	}
 
 	audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data),
-					GFP_KERNEL);
-	if (audio_drv_data == NULL)
+				      GFP_KERNEL);
+	if (!audio_drv_data)
 		return -ENOMEM;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1082,13 +1145,16 @@
 	if (IS_ERR(audio_drv_data->acp_mmio))
 		return PTR_ERR(audio_drv_data->acp_mmio);
 
-	/* The following members gets populated in device 'open'
+	/*
+	 * The following members gets populated in device 'open'
 	 * function. Till then interrupts are disabled in 'acp_init'
 	 * and device doesn't generate any interrupts.
 	 */
 
 	audio_drv_data->play_i2ssp_stream = NULL;
 	audio_drv_data->capture_i2ssp_stream = NULL;
+	audio_drv_data->play_i2sbt_stream = NULL;
+	audio_drv_data->capture_i2sbt_stream = NULL;
 
 	audio_drv_data->asic_type =  *pdata;
 
@@ -1099,7 +1165,7 @@
 	}
 
 	status = devm_request_irq(&pdev->dev, res->start, dma_irq_handler,
-					0, "ACP_IRQ", &pdev->dev);
+				  0, "ACP_IRQ", &pdev->dev);
 	if (status) {
 		dev_err(&pdev->dev, "ACP IRQ request failed\n");
 		return status;
@@ -1115,7 +1181,7 @@
 	}
 
 	status = devm_snd_soc_register_component(&pdev->dev,
-						&acp_asoc_platform, NULL, 0);
+						 &acp_asoc_platform, NULL, 0);
 	if (status != 0) {
 		dev_err(&pdev->dev, "Fail to register ALSA platform device\n");
 		return status;
@@ -1145,6 +1211,7 @@
 {
 	u16 bank;
 	int status;
+	struct audio_substream_data *rtd;
 	struct audio_drv_data *adata = dev_get_drvdata(dev);
 
 	status = acp_init(adata->acp_mmio, adata->asic_type);
@@ -1154,28 +1221,40 @@
 	}
 
 	if (adata->play_i2ssp_stream && adata->play_i2ssp_stream->runtime) {
-		/* For Stoney, Memory gating is disabled,i.e SRAM Banks
+		/*
+		 * For Stoney, Memory gating is disabled,i.e SRAM Banks
 		 * won't be turned off. The default state for SRAM banks is ON.
 		 * Setting SRAM bank state code skipped for STONEY platform.
 		 */
 		if (adata->asic_type != CHIP_STONEY) {
 			for (bank = 1; bank <= 4; bank++)
 				acp_set_sram_bank_state(adata->acp_mmio, bank,
-						true);
+							true);
 		}
-		config_acp_dma(adata->acp_mmio,
-			adata->play_i2ssp_stream->runtime->private_data,
-			adata->asic_type);
+		rtd = adata->play_i2ssp_stream->runtime->private_data;
+		config_acp_dma(adata->acp_mmio, rtd, adata->asic_type);
 	}
-	if (adata->capture_i2ssp_stream && adata->capture_i2ssp_stream->runtime) {
+	if (adata->capture_i2ssp_stream &&
+	    adata->capture_i2ssp_stream->runtime) {
 		if (adata->asic_type != CHIP_STONEY) {
 			for (bank = 5; bank <= 8; bank++)
 				acp_set_sram_bank_state(adata->acp_mmio, bank,
-						true);
+							true);
 		}
-		config_acp_dma(adata->acp_mmio,
-			adata->capture_i2ssp_stream->runtime->private_data,
-			adata->asic_type);
+		rtd =  adata->capture_i2ssp_stream->runtime->private_data;
+		config_acp_dma(adata->acp_mmio, rtd, adata->asic_type);
+	}
+	if (adata->asic_type != CHIP_CARRIZO) {
+		if (adata->play_i2sbt_stream &&
+		    adata->play_i2sbt_stream->runtime) {
+			rtd = adata->play_i2sbt_stream->runtime->private_data;
+			config_acp_dma(adata->acp_mmio, rtd, adata->asic_type);
+		}
+		if (adata->capture_i2sbt_stream &&
+		    adata->capture_i2sbt_stream->runtime) {
+			rtd = adata->capture_i2sbt_stream->runtime->private_data;
+			config_acp_dma(adata->acp_mmio, rtd, adata->asic_type);
+		}
 	}
 	acp_reg_write(1, adata->acp_mmio, mmACP_EXTERNAL_INTR_ENB);
 	return 0;
diff --git a/sound/soc/amd/acp.h b/sound/soc/amd/acp.h
index ba01510..9cd3e96 100644
--- a/sound/soc/amd/acp.h
+++ b/sound/soc/amd/acp.h
@@ -10,17 +10,30 @@
 #define ACP_PLAYBACK_PTE_OFFSET			10
 #define ACP_CAPTURE_PTE_OFFSET			0
 
+/* Playback and Capture Offset for Stoney */
+#define ACP_ST_PLAYBACK_PTE_OFFSET	0x04
+#define ACP_ST_CAPTURE_PTE_OFFSET	0x00
+#define ACP_ST_BT_PLAYBACK_PTE_OFFSET	0x08
+#define ACP_ST_BT_CAPTURE_PTE_OFFSET	0x0c
+
 #define ACP_GARLIC_CNTL_DEFAULT			0x00000FB4
 #define ACP_ONION_CNTL_DEFAULT			0x00000FB4
 
 #define ACP_PHYSICAL_BASE			0x14000
 
-/* Playback SRAM address (as a destination in dma descriptor) */
-#define ACP_SHARED_RAM_BANK_1_ADDRESS		0x4002000
-
-/* Capture SRAM address (as a source in dma descriptor) */
-#define ACP_SHARED_RAM_BANK_5_ADDRESS		0x400A000
-#define ACP_SHARED_RAM_BANK_3_ADDRESS		0x4006000
+/*
+ * In case of I2S SP controller instance, Stoney uses SRAM bank 1 for
+ * playback and SRAM Bank 2 for capture where as in case of BT I2S
+ * Instance, Stoney uses SRAM Bank 3 for playback & SRAM Bank 4 will
+ * be used for capture. Carrizo uses I2S SP controller instance. SRAM Banks
+ * 1, 2, 3, 4 will be used for playback & SRAM Banks 5, 6, 7, 8 will be used
+ * for capture scenario.
+ */
+#define ACP_SRAM_BANK_1_ADDRESS		0x4002000
+#define ACP_SRAM_BANK_2_ADDRESS		0x4004000
+#define ACP_SRAM_BANK_3_ADDRESS		0x4006000
+#define ACP_SRAM_BANK_4_ADDRESS		0x4008000
+#define ACP_SRAM_BANK_5_ADDRESS		0x400A000
 
 #define ACP_DMA_RESET_TIME			10000
 #define ACP_CLOCK_EN_TIME_OUT_VALUE		0x000000FF
@@ -35,8 +48,13 @@
 
 #define TO_ACP_I2S_1   0x2
 #define TO_ACP_I2S_2   0x4
+#define TO_BLUETOOTH   0x3
 #define FROM_ACP_I2S_1 0xa
 #define FROM_ACP_I2S_2 0xb
+#define FROM_BLUETOOTH 0xb
+
+#define I2S_SP_INSTANCE                 0x01
+#define I2S_BT_INSTANCE                 0x02
 
 #define ACP_TILE_ON_MASK                0x03
 #define ACP_TILE_OFF_MASK               0x02
@@ -57,6 +75,14 @@
 #define ACP_TO_SYSRAM_CH_NUM 14
 #define I2S_TO_ACP_DMA_CH_NUM 15
 
+/* Playback DMA Channels for I2S BT instance */
+#define SYSRAM_TO_ACP_BT_INSTANCE_CH_NUM  8
+#define ACP_TO_I2S_DMA_BT_INSTANCE_CH_NUM 9
+
+/* Capture DMA Channels for I2S BT Instance */
+#define ACP_TO_SYSRAM_BT_INSTANCE_CH_NUM 10
+#define I2S_TO_ACP_DMA_BT_INSTANCE_CH_NUM 11
+
 #define NUM_DSCRS_PER_CHANNEL 2
 
 #define PLAYBACK_START_DMA_DESCR_CH12 0
@@ -69,9 +95,23 @@
 #define CAPTURE_START_DMA_DESCR_CH15 6
 #define CAPTURE_END_DMA_DESCR_CH15 7
 
+/* I2S BT Instance DMA Descriptors */
+#define PLAYBACK_START_DMA_DESCR_CH8 8
+#define PLAYBACK_END_DMA_DESCR_CH8 9
+#define PLAYBACK_START_DMA_DESCR_CH9 10
+#define PLAYBACK_END_DMA_DESCR_CH9 11
+
+#define CAPTURE_START_DMA_DESCR_CH10 12
+#define CAPTURE_END_DMA_DESCR_CH10 13
+#define CAPTURE_START_DMA_DESCR_CH11 14
+#define CAPTURE_END_DMA_DESCR_CH11 15
+
 #define mmACP_I2S_16BIT_RESOLUTION_EN       0x5209
 #define ACP_I2S_MIC_16BIT_RESOLUTION_EN 0x01
 #define ACP_I2S_SP_16BIT_RESOLUTION_EN	0x02
+#define ACP_I2S_BT_16BIT_RESOLUTION_EN	0x04
+#define ACP_BT_UART_PAD_SELECT_MASK	0x1
+
 enum acp_dma_priority_level {
 	/* 0x0 Specifies the DMA channel is given normal priority */
 	ACP_DMA_PRIORITY_LEVEL_NORMAL = 0x0,
@@ -84,20 +124,39 @@
 	struct page *pg;
 	unsigned int order;
 	u16 num_of_pages;
+	u16 i2s_instance;
 	u16 direction;
+	u16 ch1;
+	u16 ch2;
+	u16 destination;
+	u16 dma_dscr_idx_1;
+	u16 dma_dscr_idx_2;
+	u32 pte_offset;
+	u32 sram_bank;
+	u32 byte_cnt_high_reg_offset;
+	u32 byte_cnt_low_reg_offset;
 	uint64_t size;
-	u64 i2ssp_renderbytescount;
-	u64 i2ssp_capturebytescount;
+	u64 bytescount;
 	void __iomem *acp_mmio;
 };
 
 struct audio_drv_data {
 	struct snd_pcm_substream *play_i2ssp_stream;
 	struct snd_pcm_substream *capture_i2ssp_stream;
+	struct snd_pcm_substream *play_i2sbt_stream;
+	struct snd_pcm_substream *capture_i2sbt_stream;
 	void __iomem *acp_mmio;
 	u32 asic_type;
 };
 
+/*
+ * this structure used for platform data transfer between machine driver
+ * and dma driver
+ */
+struct acp_platform_info {
+	u16 i2s_instance;
+};
+
 union acp_dma_count {
 	struct {
 	u32 low;
@@ -115,23 +174,25 @@
 };
 
 enum {
-	ACP_DMA_ATTRIBUTES_SHAREDMEM_TO_DAGB_ONION = 0x0,
-	ACP_DMA_ATTRIBUTES_SHARED_MEM_TO_DAGB_GARLIC = 0x1,
-	ACP_DMA_ATTRIBUTES_DAGB_ONION_TO_SHAREDMEM = 0x8,
-	ACP_DMA_ATTRIBUTES_DAGB_GARLIC_TO_SHAREDMEM = 0x9,
-	ACP_DMA_ATTRIBUTES_FORCE_SIZE = 0xF
+	ACP_DMA_ATTR_SHAREDMEM_TO_DAGB_ONION = 0x0,
+	ACP_DMA_ATTR_SHARED_MEM_TO_DAGB_GARLIC = 0x1,
+	ACP_DMA_ATTR_DAGB_ONION_TO_SHAREDMEM = 0x8,
+	ACP_DMA_ATTR_DAGB_GARLIC_TO_SHAREDMEM = 0x9,
+	ACP_DMA_ATTR_FORCE_SIZE = 0xF
 };
 
 typedef struct acp_dma_dscr_transfer {
 	/* Specifies the source memory location for the DMA data transfer. */
 	u32 src;
-	/* Specifies the destination memory location to where the data will
+	/*
+	 * Specifies the destination memory location to where the data will
 	 * be transferred.
-	*/
+	 */
 	u32 dest;
-	/* Specifies the number of bytes need to be transferred
-	* from source to destination memory.Transfer direction & IOC enable
-	*/
+	/*
+	 * Specifies the number of bytes need to be transferred
+	 * from source to destination memory.Transfer direction & IOC enable
+	 */
 	u32 xfer_val;
 	/* Reserved for future use */
 	u32 reserved;
diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig
index dcee145..64b784e 100644
--- a/sound/soc/atmel/Kconfig
+++ b/sound/soc/atmel/Kconfig
@@ -88,4 +88,13 @@
 	help
 	  Say Y if you want to add support for the ASoC driver for the
 	  Axentia TSE-850 with a PCM5142 codec.
+
+config SND_ATMEL_SOC_I2S
+	tristate "Atmel ASoC driver for boards using I2S"
+	depends on OF && (ARCH_AT91 || COMPILE_TEST)
+	select SND_SOC_GENERIC_DMAENGINE_PCM
+	select REGMAP_MMIO
+	help
+	  Say Y or M if you want to add support for Atmel ASoc driver for boards
+	  using I2S.
 endif
diff --git a/sound/soc/atmel/Makefile b/sound/soc/atmel/Makefile
index 4440646..cd87cb4 100644
--- a/sound/soc/atmel/Makefile
+++ b/sound/soc/atmel/Makefile
@@ -3,10 +3,12 @@
 snd-soc-atmel-pcm-pdc-objs := atmel-pcm-pdc.o
 snd-soc-atmel-pcm-dma-objs := atmel-pcm-dma.o
 snd-soc-atmel_ssc_dai-objs := atmel_ssc_dai.o
+snd-soc-atmel-i2s-objs := atmel-i2s.o
 
 obj-$(CONFIG_SND_ATMEL_SOC_PDC) += snd-soc-atmel-pcm-pdc.o
 obj-$(CONFIG_SND_ATMEL_SOC_DMA) += snd-soc-atmel-pcm-dma.o
 obj-$(CONFIG_SND_ATMEL_SOC_SSC) += snd-soc-atmel_ssc_dai.o
+obj-$(CONFIG_SND_ATMEL_SOC_I2S) += snd-soc-atmel-i2s.o
 
 # AT91 Machine Support
 snd-soc-sam9g20-wm8731-objs := sam9g20_wm8731.o
diff --git a/sound/soc/atmel/atmel-i2s.c b/sound/soc/atmel/atmel-i2s.c
new file mode 100644
index 0000000..5d3b5af
--- /dev/null
+++ b/sound/soc/atmel/atmel-i2s.c
@@ -0,0 +1,765 @@
+/*
+ * Driver for Atmel I2S controller
+ *
+ * Copyright (C) 2015 Atmel Corporation
+ *
+ * Author: Cyrille Pitchen <cyrille.pitchen@atmel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/mfd/syscon.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/initval.h>
+#include <sound/soc.h>
+#include <sound/dmaengine_pcm.h>
+
+#define ATMEL_I2SC_MAX_TDM_CHANNELS	8
+
+/*
+ * ---- I2S Controller Register map ----
+ */
+#define ATMEL_I2SC_CR		0x0000	/* Control Register */
+#define ATMEL_I2SC_MR		0x0004	/* Mode Register */
+#define ATMEL_I2SC_SR		0x0008	/* Status Register */
+#define ATMEL_I2SC_SCR		0x000c	/* Status Clear Register */
+#define ATMEL_I2SC_SSR		0x0010	/* Status Set Register */
+#define ATMEL_I2SC_IER		0x0014	/* Interrupt Enable Register */
+#define ATMEL_I2SC_IDR		0x0018	/* Interrupt Disable Register */
+#define ATMEL_I2SC_IMR		0x001c	/* Interrupt Mask Register */
+#define ATMEL_I2SC_RHR		0x0020	/* Receiver Holding Register */
+#define ATMEL_I2SC_THR		0x0024	/* Transmitter Holding Register */
+#define ATMEL_I2SC_VERSION	0x0028	/* Version Register */
+
+/*
+ * ---- Control Register (Write-only) ----
+ */
+#define ATMEL_I2SC_CR_RXEN	BIT(0)	/* Receiver Enable */
+#define ATMEL_I2SC_CR_RXDIS	BIT(1)	/* Receiver Disable */
+#define ATMEL_I2SC_CR_CKEN	BIT(2)	/* Clock Enable */
+#define ATMEL_I2SC_CR_CKDIS	BIT(3)	/* Clock Disable */
+#define ATMEL_I2SC_CR_TXEN	BIT(4)	/* Transmitter Enable */
+#define ATMEL_I2SC_CR_TXDIS	BIT(5)	/* Transmitter Disable */
+#define ATMEL_I2SC_CR_SWRST	BIT(7)	/* Software Reset */
+
+/*
+ * ---- Mode Register (Read/Write) ----
+ */
+#define ATMEL_I2SC_MR_MODE_MASK		GENMASK(0, 0)
+#define ATMEL_I2SC_MR_MODE_SLAVE	(0 << 0)
+#define ATMEL_I2SC_MR_MODE_MASTER	(1 << 0)
+
+#define ATMEL_I2SC_MR_DATALENGTH_MASK		GENMASK(4, 2)
+#define ATMEL_I2SC_MR_DATALENGTH_32_BITS	(0 << 2)
+#define ATMEL_I2SC_MR_DATALENGTH_24_BITS	(1 << 2)
+#define ATMEL_I2SC_MR_DATALENGTH_20_BITS	(2 << 2)
+#define ATMEL_I2SC_MR_DATALENGTH_18_BITS	(3 << 2)
+#define ATMEL_I2SC_MR_DATALENGTH_16_BITS	(4 << 2)
+#define ATMEL_I2SC_MR_DATALENGTH_16_BITS_COMPACT	(5 << 2)
+#define ATMEL_I2SC_MR_DATALENGTH_8_BITS		(6 << 2)
+#define ATMEL_I2SC_MR_DATALENGTH_8_BITS_COMPACT	(7 << 2)
+
+#define ATMEL_I2SC_MR_FORMAT_MASK	GENMASK(7, 6)
+#define ATMEL_I2SC_MR_FORMAT_I2S	(0 << 6)
+#define ATMEL_I2SC_MR_FORMAT_LJ		(1 << 6)  /* Left Justified */
+#define ATMEL_I2SC_MR_FORMAT_TDM	(2 << 6)
+#define ATMEL_I2SC_MR_FORMAT_TDMLJ	(3 << 6)
+
+/* Left audio samples duplicated to right audio channel */
+#define ATMEL_I2SC_MR_RXMONO		BIT(8)
+
+/* Receiver uses one DMA channel ... */
+#define ATMEL_I2SC_MR_RXDMA_MASK	GENMASK(9, 9)
+#define ATMEL_I2SC_MR_RXDMA_SINGLE	(0 << 9)  /* for all audio channels */
+#define ATMEL_I2SC_MR_RXDMA_MULTIPLE	(1 << 9)  /* per audio channel */
+
+/* I2SDO output of I2SC is internally connected to I2SDI input */
+#define ATMEL_I2SC_MR_RXLOOP		BIT(10)
+
+/* Left audio samples duplicated to right audio channel */
+#define ATMEL_I2SC_MR_TXMONO		BIT(12)
+
+/* Transmitter uses one DMA channel ... */
+#define ATMEL_I2SC_MR_TXDMA_MASK	GENMASK(13, 13)
+#define ATMEL_I2SC_MR_TXDMA_SINGLE	(0 << 13)  /* for all audio channels */
+#define ATMEL_I2SC_MR_TXDME_MULTIPLE	(1 << 13)  /* per audio channel */
+
+/* x sample transmitted when underrun */
+#define ATMEL_I2SC_MR_TXSAME_MASK	GENMASK(14, 14)
+#define ATMEL_I2SC_MR_TXSAME_ZERO	(0 << 14)  /* Zero sample */
+#define ATMEL_I2SC_MR_TXSAME_PREVIOUS	(1 << 14)  /* Previous sample */
+
+/* Audio Clock to I2SC Master Clock ratio */
+#define ATMEL_I2SC_MR_IMCKDIV_MASK	GENMASK(21, 16)
+#define ATMEL_I2SC_MR_IMCKDIV(div) \
+	(((div) << 16) & ATMEL_I2SC_MR_IMCKDIV_MASK)
+
+/* Master Clock to fs ratio */
+#define ATMEL_I2SC_MR_IMCKFS_MASK	GENMASK(29, 24)
+#define ATMEL_I2SC_MR_IMCKFS(fs) \
+	(((fs) << 24) & ATMEL_I2SC_MR_IMCKFS_MASK)
+
+/* Master Clock mode */
+#define ATMEL_I2SC_MR_IMCKMODE_MASK	GENMASK(30, 30)
+/* 0: No master clock generated (selected clock drives I2SCK pin) */
+#define ATMEL_I2SC_MR_IMCKMODE_I2SCK	(0 << 30)
+/* 1: master clock generated (internally generated clock drives I2SMCK pin) */
+#define ATMEL_I2SC_MR_IMCKMODE_I2SMCK	(1 << 30)
+
+/* Slot Width */
+/* 0: slot is 32 bits wide for DATALENGTH = 18/20/24 bits. */
+/* 1: slot is 24 bits wide for DATALENGTH = 18/20/24 bits. */
+#define ATMEL_I2SC_MR_IWS		BIT(31)
+
+/*
+ * ---- Status Registers ----
+ */
+#define ATMEL_I2SC_SR_RXEN	BIT(0)	/* Receiver Enabled */
+#define ATMEL_I2SC_SR_RXRDY	BIT(1)	/* Receive Ready */
+#define ATMEL_I2SC_SR_RXOR	BIT(2)	/* Receive Overrun */
+
+#define ATMEL_I2SC_SR_TXEN	BIT(4)	/* Transmitter Enabled */
+#define ATMEL_I2SC_SR_TXRDY	BIT(5)	/* Transmit Ready */
+#define ATMEL_I2SC_SR_TXUR	BIT(6)	/* Transmit Underrun */
+
+/* Receive Overrun Channel */
+#define ATMEL_I2SC_SR_RXORCH_MASK	GENMASK(15, 8)
+#define ATMEL_I2SC_SR_RXORCH(ch)	(1 << (((ch) & 0x7) + 8))
+
+/* Transmit Underrun Channel */
+#define ATMEL_I2SC_SR_TXURCH_MASK	GENMASK(27, 20)
+#define ATMEL_I2SC_SR_TXURCH(ch)	(1 << (((ch) & 0x7) + 20))
+
+/*
+ * ---- Interrupt Enable/Disable/Mask Registers ----
+ */
+#define ATMEL_I2SC_INT_RXRDY	ATMEL_I2SC_SR_RXRDY
+#define ATMEL_I2SC_INT_RXOR	ATMEL_I2SC_SR_RXOR
+#define ATMEL_I2SC_INT_TXRDY	ATMEL_I2SC_SR_TXRDY
+#define ATMEL_I2SC_INT_TXUR	ATMEL_I2SC_SR_TXUR
+
+static const struct regmap_config atmel_i2s_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.max_register = ATMEL_I2SC_VERSION,
+};
+
+struct atmel_i2s_gck_param {
+	int		fs;
+	unsigned long	mck;
+	int		imckdiv;
+	int		imckfs;
+};
+
+#define I2S_MCK_12M288		12288000UL
+#define I2S_MCK_11M2896		11289600UL
+
+/* mck = (32 * (imckfs+1) / (imckdiv+1)) * fs */
+static const struct atmel_i2s_gck_param gck_params[] = {
+	/* mck = 12.288MHz */
+	{  8000, I2S_MCK_12M288, 0, 47},	/* mck = 1536 fs */
+	{ 16000, I2S_MCK_12M288, 1, 47},	/* mck =  768 fs */
+	{ 24000, I2S_MCK_12M288, 3, 63},	/* mck =  512 fs */
+	{ 32000, I2S_MCK_12M288, 3, 47},	/* mck =  384 fs */
+	{ 48000, I2S_MCK_12M288, 7, 63},	/* mck =  256 fs */
+	{ 64000, I2S_MCK_12M288, 7, 47},	/* mck =  192 fs */
+	{ 96000, I2S_MCK_12M288, 7, 31},	/* mck =  128 fs */
+	{192000, I2S_MCK_12M288, 7, 15},	/* mck =   64 fs */
+
+	/* mck = 11.2896MHz */
+	{ 11025, I2S_MCK_11M2896, 1, 63},	/* mck = 1024 fs */
+	{ 22050, I2S_MCK_11M2896, 3, 63},	/* mck =  512 fs */
+	{ 44100, I2S_MCK_11M2896, 7, 63},	/* mck =  256 fs */
+	{ 88200, I2S_MCK_11M2896, 7, 31},	/* mck =  128 fs */
+	{176400, I2S_MCK_11M2896, 7, 15},	/* mck =   64 fs */
+};
+
+struct atmel_i2s_dev;
+
+struct atmel_i2s_caps {
+	int	(*mck_init)(struct atmel_i2s_dev *, struct device_node *np);
+};
+
+struct atmel_i2s_dev {
+	struct device				*dev;
+	struct regmap				*regmap;
+	struct clk				*pclk;
+	struct clk				*gclk;
+	struct clk				*aclk;
+	struct snd_dmaengine_dai_dma_data	playback;
+	struct snd_dmaengine_dai_dma_data	capture;
+	unsigned int				fmt;
+	const struct atmel_i2s_gck_param	*gck_param;
+	const struct atmel_i2s_caps		*caps;
+};
+
+static irqreturn_t atmel_i2s_interrupt(int irq, void *dev_id)
+{
+	struct atmel_i2s_dev *dev = dev_id;
+	unsigned int sr, imr, pending, ch, mask;
+	irqreturn_t ret = IRQ_NONE;
+
+	regmap_read(dev->regmap, ATMEL_I2SC_SR, &sr);
+	regmap_read(dev->regmap, ATMEL_I2SC_IMR, &imr);
+	pending = sr & imr;
+
+	if (!pending)
+		return IRQ_NONE;
+
+	if (pending & ATMEL_I2SC_INT_RXOR) {
+		mask = ATMEL_I2SC_SR_RXOR;
+
+		for (ch = 0; ch < ATMEL_I2SC_MAX_TDM_CHANNELS; ++ch) {
+			if (sr & ATMEL_I2SC_SR_RXORCH(ch)) {
+				mask |= ATMEL_I2SC_SR_RXORCH(ch);
+				dev_err(dev->dev,
+					"RX overrun on channel %d\n", ch);
+			}
+		}
+		regmap_write(dev->regmap, ATMEL_I2SC_SCR, mask);
+		ret = IRQ_HANDLED;
+	}
+
+	if (pending & ATMEL_I2SC_INT_TXUR) {
+		mask = ATMEL_I2SC_SR_TXUR;
+
+		for (ch = 0; ch < ATMEL_I2SC_MAX_TDM_CHANNELS; ++ch) {
+			if (sr & ATMEL_I2SC_SR_TXURCH(ch)) {
+				mask |= ATMEL_I2SC_SR_TXURCH(ch);
+				dev_err(dev->dev,
+					"TX underrun on channel %d\n", ch);
+			}
+		}
+		regmap_write(dev->regmap, ATMEL_I2SC_SCR, mask);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+#define ATMEL_I2S_RATES		SNDRV_PCM_RATE_8000_192000
+
+#define ATMEL_I2S_FORMATS	(SNDRV_PCM_FMTBIT_S8 |		\
+				 SNDRV_PCM_FMTBIT_S16_LE |	\
+				 SNDRV_PCM_FMTBIT_S18_3LE |	\
+				 SNDRV_PCM_FMTBIT_S20_3LE |	\
+				 SNDRV_PCM_FMTBIT_S24_3LE |	\
+				 SNDRV_PCM_FMTBIT_S24_LE |	\
+				 SNDRV_PCM_FMTBIT_S32_LE)
+
+static int atmel_i2s_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+{
+	struct atmel_i2s_dev *dev = snd_soc_dai_get_drvdata(dai);
+
+	dev->fmt = fmt;
+	return 0;
+}
+
+static int atmel_i2s_prepare(struct snd_pcm_substream *substream,
+			     struct snd_soc_dai *dai)
+{
+	struct atmel_i2s_dev *dev = snd_soc_dai_get_drvdata(dai);
+	bool is_playback = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
+	unsigned int rhr, sr = 0;
+
+	if (is_playback) {
+		regmap_read(dev->regmap, ATMEL_I2SC_SR, &sr);
+		if (sr & ATMEL_I2SC_SR_RXRDY) {
+			/*
+			 * The RX Ready flag should not be set. However if here,
+			 * we flush (read) the Receive Holding Register to start
+			 * from a clean state.
+			 */
+			dev_dbg(dev->dev, "RXRDY is set\n");
+			regmap_read(dev->regmap, ATMEL_I2SC_RHR, &rhr);
+		}
+	}
+
+	return 0;
+}
+
+static int atmel_i2s_get_gck_param(struct atmel_i2s_dev *dev, int fs)
+{
+	int i, best;
+
+	if (!dev->gclk || !dev->aclk) {
+		dev_err(dev->dev, "cannot generate the I2S Master Clock\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Find the best possible settings to generate the I2S Master Clock
+	 * from the PLL Audio.
+	 */
+	dev->gck_param = NULL;
+	best = INT_MAX;
+	for (i = 0; i < ARRAY_SIZE(gck_params); ++i) {
+		const struct atmel_i2s_gck_param *gck_param = &gck_params[i];
+		int val = abs(fs - gck_param->fs);
+
+		if (val < best) {
+			best = val;
+			dev->gck_param = gck_param;
+		}
+	}
+
+	return 0;
+}
+
+static int atmel_i2s_hw_params(struct snd_pcm_substream *substream,
+			       struct snd_pcm_hw_params *params,
+			       struct snd_soc_dai *dai)
+{
+	struct atmel_i2s_dev *dev = snd_soc_dai_get_drvdata(dai);
+	bool is_playback = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
+	unsigned int mr = 0;
+	int ret;
+
+	switch (dev->fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		mr |= ATMEL_I2SC_MR_FORMAT_I2S;
+		break;
+
+	default:
+		dev_err(dev->dev, "unsupported bus format\n");
+		return -EINVAL;
+	}
+
+	switch (dev->fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBS_CFS:
+		/* codec is slave, so cpu is master */
+		mr |= ATMEL_I2SC_MR_MODE_MASTER;
+		ret = atmel_i2s_get_gck_param(dev, params_rate(params));
+		if (ret)
+			return ret;
+		break;
+
+	case SND_SOC_DAIFMT_CBM_CFM:
+		/* codec is master, so cpu is slave */
+		mr |= ATMEL_I2SC_MR_MODE_SLAVE;
+		dev->gck_param = NULL;
+		break;
+
+	default:
+		dev_err(dev->dev, "unsupported master/slave mode\n");
+		return -EINVAL;
+	}
+
+	switch (params_channels(params)) {
+	case 1:
+		if (is_playback)
+			mr |= ATMEL_I2SC_MR_TXMONO;
+		else
+			mr |= ATMEL_I2SC_MR_RXMONO;
+		break;
+	case 2:
+		break;
+	default:
+		dev_err(dev->dev, "unsupported number of audio channels\n");
+		return -EINVAL;
+	}
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S8:
+		mr |= ATMEL_I2SC_MR_DATALENGTH_8_BITS;
+		break;
+
+	case SNDRV_PCM_FORMAT_S16_LE:
+		mr |= ATMEL_I2SC_MR_DATALENGTH_16_BITS;
+		break;
+
+	case SNDRV_PCM_FORMAT_S18_3LE:
+		mr |= ATMEL_I2SC_MR_DATALENGTH_18_BITS | ATMEL_I2SC_MR_IWS;
+		break;
+
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		mr |= ATMEL_I2SC_MR_DATALENGTH_20_BITS | ATMEL_I2SC_MR_IWS;
+		break;
+
+	case SNDRV_PCM_FORMAT_S24_3LE:
+		mr |= ATMEL_I2SC_MR_DATALENGTH_24_BITS | ATMEL_I2SC_MR_IWS;
+		break;
+
+	case SNDRV_PCM_FORMAT_S24_LE:
+		mr |= ATMEL_I2SC_MR_DATALENGTH_24_BITS;
+		break;
+
+	case SNDRV_PCM_FORMAT_S32_LE:
+		mr |= ATMEL_I2SC_MR_DATALENGTH_32_BITS;
+		break;
+
+	default:
+		dev_err(dev->dev, "unsupported size/endianness for audio samples\n");
+		return -EINVAL;
+	}
+
+	return regmap_write(dev->regmap, ATMEL_I2SC_MR, mr);
+}
+
+static int atmel_i2s_switch_mck_generator(struct atmel_i2s_dev *dev,
+					  bool enabled)
+{
+	unsigned int mr, mr_mask;
+	unsigned long aclk_rate;
+	int ret;
+
+	mr = 0;
+	mr_mask = (ATMEL_I2SC_MR_IMCKDIV_MASK |
+		   ATMEL_I2SC_MR_IMCKFS_MASK |
+		   ATMEL_I2SC_MR_IMCKMODE_MASK);
+
+	if (!enabled) {
+		/* Disable the I2S Master Clock generator. */
+		ret = regmap_write(dev->regmap, ATMEL_I2SC_CR,
+				   ATMEL_I2SC_CR_CKDIS);
+		if (ret)
+			return ret;
+
+		/* Reset the I2S Master Clock generator settings. */
+		ret = regmap_update_bits(dev->regmap, ATMEL_I2SC_MR,
+					 mr_mask, mr);
+		if (ret)
+			return ret;
+
+		/* Disable/unprepare the PMC generated clock. */
+		clk_disable_unprepare(dev->gclk);
+
+		/* Disable/unprepare the PLL audio clock. */
+		clk_disable_unprepare(dev->aclk);
+		return 0;
+	}
+
+	if (!dev->gck_param)
+		return -EINVAL;
+
+	aclk_rate = dev->gck_param->mck * (dev->gck_param->imckdiv + 1);
+
+	/* Fist change the PLL audio clock frequency ... */
+	ret = clk_set_rate(dev->aclk, aclk_rate);
+	if (ret)
+		return ret;
+
+	/*
+	 * ... then set the PMC generated clock rate to the very same frequency
+	 * to set the gclk parent to aclk.
+	 */
+	ret = clk_set_rate(dev->gclk, aclk_rate);
+	if (ret)
+		return ret;
+
+	/* Prepare and enable the PLL audio clock first ... */
+	ret = clk_prepare_enable(dev->aclk);
+	if (ret)
+		return ret;
+
+	/* ... then prepare and enable the PMC generated clock. */
+	ret = clk_prepare_enable(dev->gclk);
+	if (ret)
+		return ret;
+
+	/* Update the Mode Register to generate the I2S Master Clock. */
+	mr |= ATMEL_I2SC_MR_IMCKDIV(dev->gck_param->imckdiv);
+	mr |= ATMEL_I2SC_MR_IMCKFS(dev->gck_param->imckfs);
+	mr |= ATMEL_I2SC_MR_IMCKMODE_I2SMCK;
+	ret = regmap_update_bits(dev->regmap, ATMEL_I2SC_MR, mr_mask, mr);
+	if (ret)
+		return ret;
+
+	/* Finally enable the I2S Master Clock generator. */
+	return regmap_write(dev->regmap, ATMEL_I2SC_CR,
+			    ATMEL_I2SC_CR_CKEN);
+}
+
+static int atmel_i2s_trigger(struct snd_pcm_substream *substream, int cmd,
+			     struct snd_soc_dai *dai)
+{
+	struct atmel_i2s_dev *dev = snd_soc_dai_get_drvdata(dai);
+	bool is_playback = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
+	bool is_master, mck_enabled;
+	unsigned int cr, mr;
+	int err;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+		cr = is_playback ? ATMEL_I2SC_CR_TXEN : ATMEL_I2SC_CR_RXEN;
+		mck_enabled = true;
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		cr = is_playback ? ATMEL_I2SC_CR_TXDIS : ATMEL_I2SC_CR_RXDIS;
+		mck_enabled = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Read the Mode Register to retrieve the master/slave state. */
+	err = regmap_read(dev->regmap, ATMEL_I2SC_MR, &mr);
+	if (err)
+		return err;
+	is_master = (mr & ATMEL_I2SC_MR_MODE_MASK) == ATMEL_I2SC_MR_MODE_MASTER;
+
+	/* If master starts, enable the audio clock. */
+	if (is_master && mck_enabled)
+		err = atmel_i2s_switch_mck_generator(dev, true);
+	if (err)
+		return err;
+
+	err = regmap_write(dev->regmap, ATMEL_I2SC_CR, cr);
+	if (err)
+		return err;
+
+	/* If master stops, disable the audio clock. */
+	if (is_master && !mck_enabled)
+		err = atmel_i2s_switch_mck_generator(dev, false);
+
+	return err;
+}
+
+static const struct snd_soc_dai_ops atmel_i2s_dai_ops = {
+	.prepare	= atmel_i2s_prepare,
+	.trigger	= atmel_i2s_trigger,
+	.hw_params	= atmel_i2s_hw_params,
+	.set_fmt	= atmel_i2s_set_dai_fmt,
+};
+
+static int atmel_i2s_dai_probe(struct snd_soc_dai *dai)
+{
+	struct atmel_i2s_dev *dev = snd_soc_dai_get_drvdata(dai);
+
+	snd_soc_dai_init_dma_data(dai, &dev->playback, &dev->capture);
+	return 0;
+}
+
+static struct snd_soc_dai_driver atmel_i2s_dai = {
+	.probe	= atmel_i2s_dai_probe,
+	.playback = {
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = ATMEL_I2S_RATES,
+		.formats = ATMEL_I2S_FORMATS,
+	},
+	.capture = {
+		.channels_min = 1,
+		.channels_max = 2,
+		.rates = ATMEL_I2S_RATES,
+		.formats = ATMEL_I2S_FORMATS,
+	},
+	.ops = &atmel_i2s_dai_ops,
+	.symmetric_rates = 1,
+};
+
+static const struct snd_soc_component_driver atmel_i2s_component = {
+	.name	= "atmel-i2s",
+};
+
+static int atmel_i2s_sama5d2_mck_init(struct atmel_i2s_dev *dev,
+				      struct device_node *np)
+{
+	struct clk *muxclk;
+	int err;
+
+	if (!dev->gclk)
+		return 0;
+
+	/* muxclk is optional, so we return error for probe defer only */
+	muxclk = devm_clk_get(dev->dev, "muxclk");
+	if (IS_ERR(muxclk)) {
+		err = PTR_ERR(muxclk);
+		if (err == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_warn(dev->dev,
+			 "failed to get the I2S clock control: %d\n", err);
+		return 0;
+	}
+
+	return clk_set_parent(muxclk, dev->gclk);
+}
+
+static const struct atmel_i2s_caps atmel_i2s_sama5d2_caps = {
+	.mck_init = atmel_i2s_sama5d2_mck_init,
+};
+
+static const struct of_device_id atmel_i2s_dt_ids[] = {
+	{
+		.compatible = "atmel,sama5d2-i2s",
+		.data = (void *)&atmel_i2s_sama5d2_caps,
+	},
+
+	{ /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(of, atmel_i2s_dt_ids);
+
+static int atmel_i2s_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	const struct of_device_id *match;
+	struct atmel_i2s_dev *dev;
+	struct resource *mem;
+	struct regmap *regmap;
+	void __iomem *base;
+	int irq;
+	int err = -ENXIO;
+	unsigned int pcm_flags = 0;
+	unsigned int version;
+
+	/* Get memory for driver data. */
+	dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	/* Get hardware capabilities. */
+	match = of_match_node(atmel_i2s_dt_ids, np);
+	if (match)
+		dev->caps = match->data;
+
+	/* Map I/O registers. */
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	regmap = devm_regmap_init_mmio(&pdev->dev, base,
+				       &atmel_i2s_regmap_config);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	/* Request IRQ. */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	err = devm_request_irq(&pdev->dev, irq, atmel_i2s_interrupt, 0,
+			       dev_name(&pdev->dev), dev);
+	if (err)
+		return err;
+
+	/* Get the peripheral clock. */
+	dev->pclk = devm_clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(dev->pclk)) {
+		err = PTR_ERR(dev->pclk);
+		dev_err(&pdev->dev,
+			"failed to get the peripheral clock: %d\n", err);
+		return err;
+	}
+
+	/* Get audio clocks to generate the I2S Master Clock (I2S_MCK) */
+	dev->aclk = devm_clk_get(&pdev->dev, "aclk");
+	dev->gclk = devm_clk_get(&pdev->dev, "gclk");
+	if (IS_ERR(dev->aclk) && IS_ERR(dev->gclk)) {
+		if (PTR_ERR(dev->aclk) == -EPROBE_DEFER ||
+		    PTR_ERR(dev->gclk) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		/* Master Mode not supported */
+		dev->aclk = NULL;
+		dev->gclk = NULL;
+	} else if (IS_ERR(dev->gclk)) {
+		err = PTR_ERR(dev->gclk);
+		dev_err(&pdev->dev,
+			"failed to get the PMC generated clock: %d\n", err);
+		return err;
+	} else if (IS_ERR(dev->aclk)) {
+		err = PTR_ERR(dev->aclk);
+		dev_err(&pdev->dev,
+			"failed to get the PLL audio clock: %d\n", err);
+		return err;
+	}
+
+	dev->dev = &pdev->dev;
+	dev->regmap = regmap;
+	platform_set_drvdata(pdev, dev);
+
+	/* Do hardware specific settings to initialize I2S_MCK generator */
+	if (dev->caps && dev->caps->mck_init) {
+		err = dev->caps->mck_init(dev, np);
+		if (err)
+			return err;
+	}
+
+	/* Enable the peripheral clock. */
+	err = clk_prepare_enable(dev->pclk);
+	if (err)
+		return err;
+
+	/* Get IP version. */
+	regmap_read(dev->regmap, ATMEL_I2SC_VERSION, &version);
+	dev_info(&pdev->dev, "hw version: %#x\n", version);
+
+	/* Enable error interrupts. */
+	regmap_write(dev->regmap, ATMEL_I2SC_IER,
+		     ATMEL_I2SC_INT_RXOR | ATMEL_I2SC_INT_TXUR);
+
+	err = devm_snd_soc_register_component(&pdev->dev,
+					      &atmel_i2s_component,
+					      &atmel_i2s_dai, 1);
+	if (err) {
+		dev_err(&pdev->dev, "failed to register DAI: %d\n", err);
+		clk_disable_unprepare(dev->pclk);
+		return err;
+	}
+
+	/* Prepare DMA config. */
+	dev->playback.addr	= (dma_addr_t)mem->start + ATMEL_I2SC_THR;
+	dev->playback.maxburst	= 1;
+	dev->capture.addr	= (dma_addr_t)mem->start + ATMEL_I2SC_RHR;
+	dev->capture.maxburst	= 1;
+
+	if (of_property_match_string(np, "dma-names", "rx-tx") == 0)
+		pcm_flags |= SND_DMAENGINE_PCM_FLAG_HALF_DUPLEX;
+	err = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, pcm_flags);
+	if (err) {
+		dev_err(&pdev->dev, "failed to register PCM: %d\n", err);
+		clk_disable_unprepare(dev->pclk);
+		return err;
+	}
+
+	return 0;
+}
+
+static int atmel_i2s_remove(struct platform_device *pdev)
+{
+	struct atmel_i2s_dev *dev = platform_get_drvdata(pdev);
+
+	clk_disable_unprepare(dev->pclk);
+
+	return 0;
+}
+
+static struct platform_driver atmel_i2s_driver = {
+	.driver		= {
+		.name	= "atmel_i2s",
+		.of_match_table	= of_match_ptr(atmel_i2s_dt_ids),
+	},
+	.probe		= atmel_i2s_probe,
+	.remove		= atmel_i2s_remove,
+};
+module_platform_driver(atmel_i2s_driver);
+
+MODULE_DESCRIPTION("Atmel I2S Controller driver");
+MODULE_AUTHOR("Cyrille Pitchen <cyrille.pitchen@atmel.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c
index a1e2c56..d3b6968 100644
--- a/sound/soc/atmel/atmel_ssc_dai.c
+++ b/sound/soc/atmel/atmel_ssc_dai.c
@@ -820,7 +820,7 @@
 		if (ret < 0) {
 			printk(KERN_WARNING
 					"atmel_ssc_dai: request_irq failure\n");
-			pr_debug("Atmel_ssc_dai: Stoping clock\n");
+			pr_debug("Atmel_ssc_dai: Stopping clock\n");
 			clk_disable(ssc_p->ssc->clk);
 			return ret;
 		}
@@ -1002,8 +1002,7 @@
 
 static int asoc_ssc_init(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct ssc_device *ssc = platform_get_drvdata(pdev);
+	struct ssc_device *ssc = dev_get_drvdata(dev);
 	int ret;
 
 	ret = snd_soc_register_component(dev, &atmel_ssc_component,
@@ -1033,8 +1032,7 @@
 
 static void asoc_ssc_exit(struct device *dev)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct ssc_device *ssc = platform_get_drvdata(pdev);
+	struct ssc_device *ssc = dev_get_drvdata(dev);
 
 	if (ssc->pdata->use_dma)
 		atmel_pcm_dma_platform_unregister(dev);
diff --git a/sound/soc/bcm/Kconfig b/sound/soc/bcm/Kconfig
index edf3671..02f50b7 100644
--- a/sound/soc/bcm/Kconfig
+++ b/sound/soc/bcm/Kconfig
@@ -11,9 +11,8 @@
 config SND_SOC_CYGNUS
 	tristate "SoC platform audio for Broadcom Cygnus chips"
 	depends on ARCH_BCM_CYGNUS || COMPILE_TEST
-	depends on HAS_DMA
 	help
 	  Say Y if you want to add support for ASoC audio on Broadcom
 	  Cygnus chips (bcm958300, bcm958305, bcm911360)
 
-	  If you don't know what to do here, say N.
\ No newline at end of file
+	  If you don't know what to do here, say N.
diff --git a/sound/soc/cirrus/Kconfig b/sound/soc/cirrus/Kconfig
index c7cd60f..e091991 100644
--- a/sound/soc/cirrus/Kconfig
+++ b/sound/soc/cirrus/Kconfig
@@ -9,6 +9,23 @@
 config SND_EP93XX_SOC_I2S
 	tristate
 
+if SND_EP93XX_SOC_I2S
+
+config SND_EP93XX_SOC_I2S_WATCHDOG
+	bool "IRQ based underflow watchdog workaround"
+	default y
+	help
+	  I2S controller on EP93xx seems to have undocumented HW issue.
+	  Underflow of internal I2S controller FIFO could confuse the
+	  state machine and the whole stream can be shifted by one byte
+	  until I2S is disabled. This option enables IRQ based watchdog
+	  which disables and re-enables I2S in case of underflow and
+	  fills FIFO with zeroes.
+
+	  If you are unsure how to answer this question, answer Y.
+
+endif # if SND_EP93XX_SOC_I2S
+
 config SND_EP93XX_SOC_AC97
 	tristate
 	select AC97_BUS
diff --git a/sound/soc/cirrus/edb93xx.c b/sound/soc/cirrus/edb93xx.c
index c53bd6f..3d011ab 100644
--- a/sound/soc/cirrus/edb93xx.c
+++ b/sound/soc/cirrus/edb93xx.c
@@ -67,7 +67,7 @@
 	.cpu_dai_name	= "ep93xx-i2s",
 	.codec_name	= "spi0.0",
 	.codec_dai_name	= "cs4271-hifi",
-	.dai_fmt	= SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_IF |
+	.dai_fmt	= SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF |
 			  SND_SOC_DAIFMT_CBS_CFS,
 	.ops		= &edb93xx_ops,
 };
diff --git a/sound/soc/cirrus/ep93xx-i2s.c b/sound/soc/cirrus/ep93xx-i2s.c
index 934f8ae..0918c5d 100644
--- a/sound/soc/cirrus/ep93xx-i2s.c
+++ b/sound/soc/cirrus/ep93xx-i2s.c
@@ -35,8 +35,12 @@
 
 #define EP93XX_I2S_TXCLKCFG		0x00
 #define EP93XX_I2S_RXCLKCFG		0x04
+#define EP93XX_I2S_GLSTS		0x08
 #define EP93XX_I2S_GLCTRL		0x0C
 
+#define EP93XX_I2S_I2STX0LFT		0x10
+#define EP93XX_I2S_I2STX0RT		0x14
+
 #define EP93XX_I2S_TXLINCTRLDATA	0x28
 #define EP93XX_I2S_TXCTRL		0x2C
 #define EP93XX_I2S_TXWRDLEN		0x30
@@ -51,7 +55,17 @@
 #define EP93XX_I2S_WRDLEN_24		(1 << 0)
 #define EP93XX_I2S_WRDLEN_32		(2 << 0)
 
-#define EP93XX_I2S_LINCTRLDATA_R_JUST	(1 << 2) /* Right justify */
+#define EP93XX_I2S_RXLINCTRLDATA_R_JUST	BIT(1) /* Right justify */
+
+#define EP93XX_I2S_TXLINCTRLDATA_R_JUST	BIT(2) /* Right justify */
+
+/*
+ * Transmit empty interrupt level select:
+ * 0 - Generate interrupt when FIFO is half empty
+ * 1 - Generate interrupt when FIFO is empty
+ */
+#define EP93XX_I2S_TXCTRL_TXEMPTY_LVL	BIT(0)
+#define EP93XX_I2S_TXCTRL_TXUFIE	BIT(1) /* Transmit interrupt enable */
 
 #define EP93XX_I2S_CLKCFG_LRS		(1 << 0) /* lrclk polarity */
 #define EP93XX_I2S_CLKCFG_CKP		(1 << 1) /* Bit clock polarity */
@@ -59,6 +73,8 @@
 #define EP93XX_I2S_CLKCFG_MASTER	(1 << 3) /* Master mode */
 #define EP93XX_I2S_CLKCFG_NBCG		(1 << 4) /* Not bit clock gating */
 
+#define EP93XX_I2S_GLSTS_TX0_FIFO_FULL	BIT(12)
+
 struct ep93xx_i2s_info {
 	struct clk			*mclk;
 	struct clk			*sclk;
@@ -96,7 +112,6 @@
 static void ep93xx_i2s_enable(struct ep93xx_i2s_info *info, int stream)
 {
 	unsigned base_reg;
-	int i;
 
 	if ((ep93xx_i2s_read_reg(info, EP93XX_I2S_TX0EN) & 0x1) == 0 &&
 	    (ep93xx_i2s_read_reg(info, EP93XX_I2S_RX0EN) & 0x1) == 0) {
@@ -109,27 +124,36 @@
 		ep93xx_i2s_write_reg(info, EP93XX_I2S_GLCTRL, 1);
 	}
 
-	/* Enable fifos */
+	/* Enable fifo */
 	if (stream == SNDRV_PCM_STREAM_PLAYBACK)
 		base_reg = EP93XX_I2S_TX0EN;
 	else
 		base_reg = EP93XX_I2S_RX0EN;
-	for (i = 0; i < 3; i++)
-		ep93xx_i2s_write_reg(info, base_reg + (i * 4), 1);
+	ep93xx_i2s_write_reg(info, base_reg, 1);
+
+	/* Enable TX IRQs (FIFO empty or underflow) */
+	if (IS_ENABLED(CONFIG_SND_EP93XX_SOC_I2S_WATCHDOG) &&
+	    stream == SNDRV_PCM_STREAM_PLAYBACK)
+		ep93xx_i2s_write_reg(info, EP93XX_I2S_TXCTRL,
+				     EP93XX_I2S_TXCTRL_TXEMPTY_LVL |
+				     EP93XX_I2S_TXCTRL_TXUFIE);
 }
 
 static void ep93xx_i2s_disable(struct ep93xx_i2s_info *info, int stream)
 {
 	unsigned base_reg;
-	int i;
 
-	/* Disable fifos */
+	/* Disable IRQs */
+	if (IS_ENABLED(CONFIG_SND_EP93XX_SOC_I2S_WATCHDOG) &&
+	    stream == SNDRV_PCM_STREAM_PLAYBACK)
+		ep93xx_i2s_write_reg(info, EP93XX_I2S_TXCTRL, 0);
+
+	/* Disable fifo */
 	if (stream == SNDRV_PCM_STREAM_PLAYBACK)
 		base_reg = EP93XX_I2S_TX0EN;
 	else
 		base_reg = EP93XX_I2S_RX0EN;
-	for (i = 0; i < 3; i++)
-		ep93xx_i2s_write_reg(info, base_reg + (i * 4), 0);
+	ep93xx_i2s_write_reg(info, base_reg, 0);
 
 	if ((ep93xx_i2s_read_reg(info, EP93XX_I2S_TX0EN) & 0x1) == 0 &&
 	    (ep93xx_i2s_read_reg(info, EP93XX_I2S_RX0EN) & 0x1) == 0) {
@@ -143,6 +167,37 @@
 	}
 }
 
+/*
+ * According to documentation I2S controller can handle underflow conditions
+ * just fine, but in reality the state machine is sometimes confused so that
+ * the whole stream is shifted by one byte. The watchdog below disables the TX
+ * FIFO, fills the buffer with zeroes and re-enables the FIFO. State machine
+ * is being reset and by filling the buffer we get some time before next
+ * underflow happens.
+ */
+static irqreturn_t ep93xx_i2s_interrupt(int irq, void *dev_id)
+{
+	struct ep93xx_i2s_info *info = dev_id;
+
+	/* Disable FIFO */
+	ep93xx_i2s_write_reg(info, EP93XX_I2S_TX0EN, 0);
+	/*
+	 * Fill TX FIFO with zeroes, this way we can defer next IRQs as much as
+	 * possible and get more time for DMA to catch up. Actually there are
+	 * only 8 samples in this FIFO, so even on 8kHz maximum deferral here is
+	 * 1ms.
+	 */
+	while (!(ep93xx_i2s_read_reg(info, EP93XX_I2S_GLSTS) &
+		 EP93XX_I2S_GLSTS_TX0_FIFO_FULL)) {
+		ep93xx_i2s_write_reg(info, EP93XX_I2S_I2STX0LFT, 0);
+		ep93xx_i2s_write_reg(info, EP93XX_I2S_I2STX0RT, 0);
+	}
+	/* Re-enable FIFO */
+	ep93xx_i2s_write_reg(info, EP93XX_I2S_TX0EN, 1);
+
+	return IRQ_HANDLED;
+}
+
 static int ep93xx_i2s_dai_probe(struct snd_soc_dai *dai)
 {
 	struct ep93xx_i2s_info *info = snd_soc_dai_get_drvdata(dai);
@@ -170,25 +225,25 @@
 				  unsigned int fmt)
 {
 	struct ep93xx_i2s_info *info = snd_soc_dai_get_drvdata(cpu_dai);
-	unsigned int clk_cfg, lin_ctrl;
+	unsigned int clk_cfg;
+	unsigned int txlin_ctrl = 0;
+	unsigned int rxlin_ctrl = 0;
 
 	clk_cfg  = ep93xx_i2s_read_reg(info, EP93XX_I2S_RXCLKCFG);
-	lin_ctrl = ep93xx_i2s_read_reg(info, EP93XX_I2S_RXLINCTRLDATA);
 
 	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
 	case SND_SOC_DAIFMT_I2S:
 		clk_cfg |= EP93XX_I2S_CLKCFG_REL;
-		lin_ctrl &= ~EP93XX_I2S_LINCTRLDATA_R_JUST;
 		break;
 
 	case SND_SOC_DAIFMT_LEFT_J:
 		clk_cfg &= ~EP93XX_I2S_CLKCFG_REL;
-		lin_ctrl &= ~EP93XX_I2S_LINCTRLDATA_R_JUST;
 		break;
 
 	case SND_SOC_DAIFMT_RIGHT_J:
 		clk_cfg &= ~EP93XX_I2S_CLKCFG_REL;
-		lin_ctrl |= EP93XX_I2S_LINCTRLDATA_R_JUST;
+		rxlin_ctrl |= EP93XX_I2S_RXLINCTRLDATA_R_JUST;
+		txlin_ctrl |= EP93XX_I2S_TXLINCTRLDATA_R_JUST;
 		break;
 
 	default:
@@ -213,32 +268,32 @@
 	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
 	case SND_SOC_DAIFMT_NB_NF:
 		/* Negative bit clock, lrclk low on left word */
-		clk_cfg &= ~(EP93XX_I2S_CLKCFG_CKP | EP93XX_I2S_CLKCFG_REL);
+		clk_cfg &= ~(EP93XX_I2S_CLKCFG_CKP | EP93XX_I2S_CLKCFG_LRS);
 		break;
 
 	case SND_SOC_DAIFMT_NB_IF:
 		/* Negative bit clock, lrclk low on right word */
 		clk_cfg &= ~EP93XX_I2S_CLKCFG_CKP;
-		clk_cfg |= EP93XX_I2S_CLKCFG_REL;
+		clk_cfg |= EP93XX_I2S_CLKCFG_LRS;
 		break;
 
 	case SND_SOC_DAIFMT_IB_NF:
 		/* Positive bit clock, lrclk low on left word */
 		clk_cfg |= EP93XX_I2S_CLKCFG_CKP;
-		clk_cfg &= ~EP93XX_I2S_CLKCFG_REL;
+		clk_cfg &= ~EP93XX_I2S_CLKCFG_LRS;
 		break;
 
 	case SND_SOC_DAIFMT_IB_IF:
 		/* Positive bit clock, lrclk low on right word */
-		clk_cfg |= EP93XX_I2S_CLKCFG_CKP | EP93XX_I2S_CLKCFG_REL;
+		clk_cfg |= EP93XX_I2S_CLKCFG_CKP | EP93XX_I2S_CLKCFG_LRS;
 		break;
 	}
 
 	/* Write new register values */
 	ep93xx_i2s_write_reg(info, EP93XX_I2S_RXCLKCFG, clk_cfg);
 	ep93xx_i2s_write_reg(info, EP93XX_I2S_TXCLKCFG, clk_cfg);
-	ep93xx_i2s_write_reg(info, EP93XX_I2S_RXLINCTRLDATA, lin_ctrl);
-	ep93xx_i2s_write_reg(info, EP93XX_I2S_TXLINCTRLDATA, lin_ctrl);
+	ep93xx_i2s_write_reg(info, EP93XX_I2S_RXLINCTRLDATA, rxlin_ctrl);
+	ep93xx_i2s_write_reg(info, EP93XX_I2S_TXLINCTRLDATA, txlin_ctrl);
 	return 0;
 }
 
@@ -392,6 +447,17 @@
 	if (IS_ERR(info->regs))
 		return PTR_ERR(info->regs);
 
+	if (IS_ENABLED(CONFIG_SND_EP93XX_SOC_I2S_WATCHDOG)) {
+		int irq = platform_get_irq(pdev, 0);
+		if (irq <= 0)
+			return irq < 0 ? irq : -ENODEV;
+
+		err = devm_request_irq(&pdev->dev, irq, ep93xx_i2s_interrupt, 0,
+				       pdev->name, info);
+		if (err)
+			return err;
+	}
+
 	info->mclk = clk_get(&pdev->dev, "mclk");
 	if (IS_ERR(info->mclk)) {
 		err = PTR_ERR(info->mclk);
diff --git a/sound/soc/cirrus/snappercl15.c b/sound/soc/cirrus/snappercl15.c
index 2334ec1..11ff7b2 100644
--- a/sound/soc/cirrus/snappercl15.c
+++ b/sound/soc/cirrus/snappercl15.c
@@ -72,7 +72,7 @@
 	.codec_dai_name	= "tlv320aic23-hifi",
 	.codec_name	= "tlv320aic23-codec.0-001a",
 	.platform_name	= "ep93xx-i2s",
-	.dai_fmt	= SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_IF |
+	.dai_fmt	= SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF |
 			  SND_SOC_DAIFMT_CBS_CFS,
 	.ops		= &snappercl15_ops,
 };
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 9548f63..63cf62e 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -106,6 +106,7 @@
 	select SND_SOC_MAX9877 if I2C
 	select SND_SOC_MC13783 if MFD_MC13XXX
 	select SND_SOC_ML26124 if I2C
+	select SND_SOC_MT6351 if MTK_PMIC_WRAP
 	select SND_SOC_NAU8540 if I2C
 	select SND_SOC_NAU8810 if I2C
 	select SND_SOC_NAU8824 if I2C
@@ -126,6 +127,7 @@
 	select SND_SOC_RT274 if I2C
 	select SND_SOC_RT286 if I2C
 	select SND_SOC_RT298 if I2C
+	select SND_SOC_RT1305 if I2C
 	select SND_SOC_RT5514 if I2C
 	select SND_SOC_RT5616 if I2C
 	select SND_SOC_RT5631 if I2C
@@ -136,12 +138,14 @@
 	select SND_SOC_RT5660 if I2C
 	select SND_SOC_RT5663 if I2C
 	select SND_SOC_RT5665 if I2C
+	select SND_SOC_RT5668 if I2C
 	select SND_SOC_RT5670 if I2C
 	select SND_SOC_RT5677 if I2C && SPI_MASTER
 	select SND_SOC_SGTL5000 if I2C
 	select SND_SOC_SI476X if MFD_SI476X_CORE
 	select SND_SOC_SIRF_AUDIO_CODEC
 	select SND_SOC_SPDIF
+	select SND_SOC_SSM2305
 	select SND_SOC_SSM2518 if I2C
 	select SND_SOC_SSM2602_SPI if SPI_MASTER
 	select SND_SOC_SSM2602_I2C if I2C
@@ -168,6 +172,7 @@
 	select SND_SOC_TPA6130A2 if I2C
 	select SND_SOC_TLV320DAC33 if I2C
 	select SND_SOC_TSCS42XX if I2C
+	select SND_SOC_TSCS454 if I2C
 	select SND_SOC_TS3A227E if I2C
 	select SND_SOC_TWL4030 if TWL4030_CORE
 	select SND_SOC_TWL6040 if TWL6040_CORE
@@ -770,8 +775,10 @@
 	default y if SND_SOC_RT5660=y
 	default y if SND_SOC_RT5663=y
 	default y if SND_SOC_RT5665=y
+	default y if SND_SOC_RT5668=y
 	default y if SND_SOC_RT5670=y
 	default y if SND_SOC_RT5677=y
+	default y if SND_SOC_RT1305=y
 	default m if SND_SOC_RT5514=m
 	default m if SND_SOC_RT5616=m
 	default m if SND_SOC_RT5640=m
@@ -781,8 +788,10 @@
 	default m if SND_SOC_RT5660=m
 	default m if SND_SOC_RT5663=m
 	default m if SND_SOC_RT5665=m
+	default m if SND_SOC_RT5668=m
 	default m if SND_SOC_RT5670=m
 	default m if SND_SOC_RT5677=m
+	default m if SND_SOC_RT1305=m
 
 config SND_SOC_RL6347A
 	tristate
@@ -805,6 +814,9 @@
 	tristate
 	depends on I2C
 
+config SND_SOC_RT1305
+	tristate
+
 config SND_SOC_RT5514
 	tristate
 
@@ -844,6 +856,9 @@
 config SND_SOC_RT5665
 	tristate
 
+config SND_SOC_RT5668
+	tristate
+
 config SND_SOC_RT5670
 	tristate
 
@@ -883,6 +898,12 @@
 config SND_SOC_SPDIF
 	tristate "S/PDIF CODEC"
 
+config SND_SOC_SSM2305
+	tristate "Analog Devices SSM2305 Class-D Amplifier"
+	help
+	  Enable support for Analog Devices SSM2305 filterless
+	  high-efficiency mono Class-D audio power amplifiers.
+
 config SND_SOC_SSM2518
 	tristate
 
@@ -1011,6 +1032,13 @@
 	help
 	  Add support for Tempo Semiconductor's TSCS42xx audio CODEC.
 
+config SND_SOC_TSCS454
+	tristate "Tempo Semiconductor TSCS454 CODEC"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  Add support for Tempo Semiconductor's TSCS454 audio CODEC.
+
 config SND_SOC_TWL4030
 	select MFD_TWL4030_AUDIO
 	tristate
@@ -1111,7 +1139,7 @@
 	depends on SND_SOC_I2C_AND_SPI
 
 config SND_SOC_WM8782
-	tristate
+	tristate "Wolfson Microelectronics WM8782 ADC"
 
 config SND_SOC_WM8804
 	tristate
@@ -1247,6 +1275,9 @@
 config SND_SOC_ML26124
 	tristate
 
+config SND_SOC_MT6351
+	tristate "MediaTek MT6351 Codec"
+
 config SND_SOC_NAU8540
        tristate "Nuvoton Technology Corporation NAU85L40 CODEC"
        depends on I2C
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index e849d14..e023fdf 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -102,6 +102,7 @@
 snd-soc-ml26124-objs := ml26124.o
 snd-soc-msm8916-analog-objs := msm8916-wcd-analog.o
 snd-soc-msm8916-digital-objs := msm8916-wcd-digital.o
+snd-soc-mt6351-objs := mt6351.o
 snd-soc-nau8540-objs := nau8540.o
 snd-soc-nau8810-objs := nau8810.o
 snd-soc-nau8824-objs := nau8824.o
@@ -126,6 +127,7 @@
 snd-soc-pcm512x-spi-objs := pcm512x-spi.o
 snd-soc-rl6231-objs := rl6231.o
 snd-soc-rl6347a-objs := rl6347a.o
+snd-soc-rt1305-objs := rt1305.o
 snd-soc-rt274-objs := rt274.o
 snd-soc-rt286-objs := rt286.o
 snd-soc-rt298-objs := rt298.o
@@ -140,6 +142,7 @@
 snd-soc-rt5660-objs := rt5660.o
 snd-soc-rt5663-objs := rt5663.o
 snd-soc-rt5665-objs := rt5665.o
+snd-soc-rt5668-objs := rt5668.o
 snd-soc-rt5670-objs := rt5670.o
 snd-soc-rt5677-objs := rt5677.o
 snd-soc-rt5677-spi-objs := rt5677-spi.o
@@ -153,6 +156,7 @@
 snd-soc-sirf-audio-codec-objs := sirf-audio-codec.o
 snd-soc-spdif-tx-objs := spdif_transmitter.o
 snd-soc-spdif-rx-objs := spdif_receiver.o
+snd-soc-ssm2305-objs := ssm2305.o
 snd-soc-ssm2518-objs := ssm2518.o
 snd-soc-ssm2602-objs := ssm2602.o
 snd-soc-ssm2602-spi-objs := ssm2602-spi.o
@@ -180,6 +184,7 @@
 snd-soc-tlv320aic3x-objs := tlv320aic3x.o
 snd-soc-tlv320dac33-objs := tlv320dac33.o
 snd-soc-tscs42xx-objs := tscs42xx.o
+snd-soc-tscs454-objs := tscs454.o
 snd-soc-ts3a227e-objs := ts3a227e.o
 snd-soc-twl4030-objs := twl4030.o
 snd-soc-twl6040-objs := twl6040.o
@@ -355,6 +360,7 @@
 obj-$(CONFIG_SND_SOC_ML26124)	+= snd-soc-ml26124.o
 obj-$(CONFIG_SND_SOC_MSM8916_WCD_ANALOG) +=snd-soc-msm8916-analog.o
 obj-$(CONFIG_SND_SOC_MSM8916_WCD_DIGITAL) +=snd-soc-msm8916-digital.o
+obj-$(CONFIG_SND_SOC_MT6351)	+= snd-soc-mt6351.o
 obj-$(CONFIG_SND_SOC_NAU8540)   += snd-soc-nau8540.o
 obj-$(CONFIG_SND_SOC_NAU8810)   += snd-soc-nau8810.o
 obj-$(CONFIG_SND_SOC_NAU8824)   += snd-soc-nau8824.o
@@ -379,6 +385,7 @@
 obj-$(CONFIG_SND_SOC_PCM512x_SPI)	+= snd-soc-pcm512x-spi.o
 obj-$(CONFIG_SND_SOC_RL6231)	+= snd-soc-rl6231.o
 obj-$(CONFIG_SND_SOC_RL6347A)	+= snd-soc-rl6347a.o
+obj-$(CONFIG_SND_SOC_RT1305)	+= snd-soc-rt1305.o
 obj-$(CONFIG_SND_SOC_RT274)	+= snd-soc-rt274.o
 obj-$(CONFIG_SND_SOC_RT286)	+= snd-soc-rt286.o
 obj-$(CONFIG_SND_SOC_RT298)	+= snd-soc-rt298.o
@@ -394,6 +401,7 @@
 obj-$(CONFIG_SND_SOC_RT5660)	+= snd-soc-rt5660.o
 obj-$(CONFIG_SND_SOC_RT5663)	+= snd-soc-rt5663.o
 obj-$(CONFIG_SND_SOC_RT5665)	+= snd-soc-rt5665.o
+obj-$(CONFIG_SND_SOC_RT5668)	+= snd-soc-rt5668.o
 obj-$(CONFIG_SND_SOC_RT5670)	+= snd-soc-rt5670.o
 obj-$(CONFIG_SND_SOC_RT5677)	+= snd-soc-rt5677.o
 obj-$(CONFIG_SND_SOC_RT5677_SPI)	+= snd-soc-rt5677-spi.o
@@ -404,6 +412,7 @@
 obj-$(CONFIG_SND_SOC_SI476X)	+= snd-soc-si476x.o
 obj-$(CONFIG_SND_SOC_SPDIF)	+= snd-soc-spdif-rx.o snd-soc-spdif-tx.o
 obj-$(CONFIG_SND_SOC_SIRF_AUDIO_CODEC) += sirf-audio-codec.o
+obj-$(CONFIG_SND_SOC_SSM2305)	+= snd-soc-ssm2305.o
 obj-$(CONFIG_SND_SOC_SSM2518)	+= snd-soc-ssm2518.o
 obj-$(CONFIG_SND_SOC_SSM2602)	+= snd-soc-ssm2602.o
 obj-$(CONFIG_SND_SOC_SSM2602_SPI)	+= snd-soc-ssm2602-spi.o
@@ -432,6 +441,7 @@
 obj-$(CONFIG_SND_SOC_TLV320AIC3X)	+= snd-soc-tlv320aic3x.o
 obj-$(CONFIG_SND_SOC_TLV320DAC33)	+= snd-soc-tlv320dac33.o
 obj-$(CONFIG_SND_SOC_TSCS42XX)	+= snd-soc-tscs42xx.o
+obj-$(CONFIG_SND_SOC_TSCS454)	+= snd-soc-tscs454.o
 obj-$(CONFIG_SND_SOC_TS3A227E)	+= snd-soc-ts3a227e.o
 obj-$(CONFIG_SND_SOC_TWL4030)	+= snd-soc-twl4030.o
 obj-$(CONFIG_SND_SOC_TWL6040)	+= snd-soc-twl6040.o
diff --git a/sound/soc/codecs/adau17x1.c b/sound/soc/codecs/adau17x1.c
index 12bf24c..ae41edd 100644
--- a/sound/soc/codecs/adau17x1.c
+++ b/sound/soc/codecs/adau17x1.c
@@ -843,6 +843,15 @@
 	struct adau *adau = snd_soc_component_get_drvdata(component);
 	struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
 
+	/* Check if sample rate is the same as before. If it is there is no
+	 * point in performing the below steps as the call to
+	 * sigmadsp_setup(...) will return directly when it finds the sample
+	 * rate to be the same as before. By checking this we can prevent an
+	 * audiable popping noise which occours when toggling DSP_RUN.
+	 */
+	if (adau->sigmadsp->current_samplerate == rate)
+		return 0;
+
 	snd_soc_dapm_mutex_lock(dapm);
 
 	ret = regmap_read(adau->regmap, ADAU17X1_DSP_SAMPLING_RATE, &dspsr);
diff --git a/sound/soc/codecs/cs35l35.c b/sound/soc/codecs/cs35l35.c
index a4a2cb1..bd6226b 100644
--- a/sound/soc/codecs/cs35l35.c
+++ b/sound/soc/codecs/cs35l35.c
@@ -1105,6 +1105,7 @@
 	.readable_reg = cs35l35_readable_register,
 	.precious_reg = cs35l35_precious_register,
 	.cache_type = REGCACHE_RBTREE,
+	.use_single_rw = true,
 };
 
 static irqreturn_t cs35l35_irq(int irq, void *data)
diff --git a/sound/soc/codecs/cs43130.c b/sound/soc/codecs/cs43130.c
index feca0a6..80dc421 100644
--- a/sound/soc/codecs/cs43130.c
+++ b/sound/soc/codecs/cs43130.c
@@ -1733,10 +1733,10 @@
 	return cs43130_show_ac(dev, buf, HP_RIGHT);
 }
 
-static DEVICE_ATTR(hpload_dc_l, S_IRUGO, cs43130_show_dc_l, NULL);
-static DEVICE_ATTR(hpload_dc_r, S_IRUGO, cs43130_show_dc_r, NULL);
-static DEVICE_ATTR(hpload_ac_l, S_IRUGO, cs43130_show_ac_l, NULL);
-static DEVICE_ATTR(hpload_ac_r, S_IRUGO, cs43130_show_ac_r, NULL);
+static DEVICE_ATTR(hpload_dc_l, 0444, cs43130_show_dc_l, NULL);
+static DEVICE_ATTR(hpload_dc_r, 0444, cs43130_show_dc_r, NULL);
+static DEVICE_ATTR(hpload_ac_l, 0444, cs43130_show_ac_l, NULL);
+static DEVICE_ATTR(hpload_ac_r, 0444, cs43130_show_ac_r, NULL);
 
 static struct reg_sequence hp_en_cal_seq[] = {
 	{CS43130_INT_MASK_4, CS43130_INT_MASK_ALL},
diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c
index 865f64c..fb515aa 100644
--- a/sound/soc/codecs/max98088.c
+++ b/sound/soc/codecs/max98088.c
@@ -1382,15 +1382,12 @@
 
 static int max98088_get_channel(struct snd_soc_component *component, const char *name)
 {
-	int i;
+	int ret;
 
-	for (i = 0; i < ARRAY_SIZE(eq_mode_name); i++)
-		if (strcmp(name, eq_mode_name[i]) == 0)
-			return i;
-
-	/* Shouldn't happen */
-	dev_err(component->dev, "Bad EQ channel name '%s'\n", name);
-	return -EINVAL;
+	ret = match_string(eq_mode_name, ARRAY_SIZE(eq_mode_name), name);
+	if (ret < 0)
+		dev_err(component->dev, "Bad EQ channel name '%s'\n", name);
+	return ret;
 }
 
 static void max98088_setup_eq1(struct snd_soc_component *component)
diff --git a/sound/soc/codecs/max98095.c b/sound/soc/codecs/max98095.c
index 6bf2d0b..3b3a10d 100644
--- a/sound/soc/codecs/max98095.c
+++ b/sound/soc/codecs/max98095.c
@@ -1634,15 +1634,12 @@
 static int max98095_get_bq_channel(struct snd_soc_component *component,
 				   const char *name)
 {
-	int i;
+	int ret;
 
-	for (i = 0; i < ARRAY_SIZE(bq_mode_name); i++)
-		if (strcmp(name, bq_mode_name[i]) == 0)
-			return i;
-
-	/* Shouldn't happen */
-	dev_err(component->dev, "Bad biquad channel name '%s'\n", name);
-	return -EINVAL;
+	ret = match_string(bq_mode_name, ARRAY_SIZE(bq_mode_name), name);
+	if (ret < 0)
+		dev_err(component->dev, "Bad biquad channel name '%s'\n", name);
+	return ret;
 }
 
 static int max98095_put_bq_enum(struct snd_kcontrol *kcontrol,
diff --git a/sound/soc/codecs/max9860.c b/sound/soc/codecs/max9860.c
index 5bbf889..de3d44e 100644
--- a/sound/soc/codecs/max9860.c
+++ b/sound/soc/codecs/max9860.c
@@ -1,23 +1,14 @@
-/*
- * Driver for the MAX9860 Mono Audio Voice Codec
- *
- * https://datasheets.maximintegrated.com/en/ds/MAX9860.pdf
- *
- * The driver does not support sidetone since the DVST register field is
- * backwards with the mute near the maximum level instead of the minimum.
- *
- * Author: Peter Rosin <peda@axentia.s>
- *         Copyright 2016 Axentia Technologies
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// Driver for the MAX9860 Mono Audio Voice Codec
+//
+// https://datasheets.maximintegrated.com/en/ds/MAX9860.pdf
+//
+// The driver does not support sidetone since the DVST register field is
+// backwards with the mute near the maximum level instead of the minimum.
+//
+// Author: Peter Rosin <peda@axentia.s>
+//         Copyright 2016 Axentia Technologies
 
 #include <linux/init.h>
 #include <linux/module.h>
@@ -443,7 +434,8 @@
 		ret = regmap_update_bits(max9860->regmap, MAX9860_AUDIOCLKHIGH,
 					 MAX9860_PLL, MAX9860_PLL);
 		if (ret) {
-			dev_err(component->dev, "Failed to enable PLL: %d\n", ret);
+			dev_err(component->dev, "Failed to enable PLL: %d\n",
+				ret);
 			return ret;
 		}
 	}
@@ -515,7 +507,8 @@
 		ret = regmap_update_bits(max9860->regmap, MAX9860_PWRMAN,
 					 MAX9860_SHDN, MAX9860_SHDN);
 		if (ret) {
-			dev_err(component->dev, "Failed to remove SHDN: %d\n", ret);
+			dev_err(component->dev, "Failed to remove SHDN: %d\n",
+				ret);
 			return ret;
 		}
 		break;
@@ -598,8 +591,7 @@
 	SET_RUNTIME_PM_OPS(max9860_suspend, max9860_resume, NULL)
 };
 
-static int max9860_probe(struct i2c_client *i2c,
-			 const struct i2c_device_id *id)
+static int max9860_probe(struct i2c_client *i2c)
 {
 	struct device *dev = &i2c->dev;
 	struct max9860_priv *max9860;
@@ -698,7 +690,7 @@
 	pm_runtime_idle(dev);
 
 	ret = devm_snd_soc_register_component(dev, &max9860_component_driver,
-				     &max9860_dai, 1);
+					      &max9860_dai, 1);
 	if (ret) {
 		dev_err(dev, "Failed to register CODEC: %d\n", ret);
 		goto err_pm;
@@ -736,7 +728,7 @@
 MODULE_DEVICE_TABLE(of, max9860_of_match);
 
 static struct i2c_driver max9860_i2c_driver = {
-	.probe	        = max9860_probe,
+	.probe_new      = max9860_probe,
 	.remove         = max9860_remove,
 	.id_table       = max9860_i2c_id,
 	.driver         = {
diff --git a/sound/soc/codecs/max9860.h b/sound/soc/codecs/max9860.h
index 22041bd..e07b905 100644
--- a/sound/soc/codecs/max9860.h
+++ b/sound/soc/codecs/max9860.h
@@ -1,17 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Driver for the MAX9860 Mono Audio Voice Codec
  *
  * Author: Peter Rosin <peda@axentia.s>
  *         Copyright 2016 Axentia Technologies
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
  */
 
 #ifndef _SND_SOC_MAX9860
diff --git a/sound/soc/codecs/mt6351.c b/sound/soc/codecs/mt6351.c
new file mode 100644
index 0000000..f73dcd7
--- /dev/null
+++ b/sound/soc/codecs/mt6351.c
@@ -0,0 +1,1505 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// mt6351.c  --  mt6351 ALSA SoC audio codec driver
+//
+// Copyright (c) 2018 MediaTek Inc.
+// Author: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
+
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/delay.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include <sound/tlv.h>
+
+#include "mt6351.h"
+
+/* MT6351_TOP_CLKSQ */
+#define RG_CLKSQ_EN_AUD_BIT (0)
+
+/* MT6351_TOP_CKPDN_CON0 */
+#define RG_AUDNCP_CK_PDN_BIT (12)
+#define RG_AUDIF_CK_PDN_BIT (13)
+#define RG_AUD_CK_PDN_BIT (14)
+#define RG_ZCD13M_CK_PDN_BIT (15)
+
+/* MT6351_AUDDEC_ANA_CON0 */
+#define RG_AUDDACLPWRUP_VAUDP32_BIT (0)
+#define RG_AUDDACRPWRUP_VAUDP32_BIT (1)
+#define RG_AUD_DAC_PWR_UP_VA32_BIT (2)
+#define RG_AUD_DAC_PWL_UP_VA32_BIT (3)
+
+#define RG_AUDHSPWRUP_VAUDP32_BIT (4)
+
+#define RG_AUDHPLPWRUP_VAUDP32_BIT (5)
+#define RG_AUDHPRPWRUP_VAUDP32_BIT (6)
+
+#define RG_AUDHSMUXINPUTSEL_VAUDP32_SFT (7)
+#define RG_AUDHSMUXINPUTSEL_VAUDP32_MASK (0x3)
+
+#define RG_AUDHPLMUXINPUTSEL_VAUDP32_SFT (9)
+#define RG_AUDHPLMUXINPUTSEL_VAUDP32_MASK (0x3)
+
+#define RG_AUDHPRMUXINPUTSEL_VAUDP32_SFT (11)
+#define RG_AUDHPRMUXINPUTSEL_VAUDP32_MASK (0x3)
+
+#define RG_AUDHSSCDISABLE_VAUDP32 (13)
+#define RG_AUDHPLSCDISABLE_VAUDP32_BIT (14)
+#define RG_AUDHPRSCDISABLE_VAUDP32_BIT (15)
+
+/* MT6351_AUDDEC_ANA_CON1 */
+#define RG_HSOUTPUTSTBENH_VAUDP32_BIT (8)
+
+/* MT6351_AUDDEC_ANA_CON3 */
+#define RG_AUDLOLPWRUP_VAUDP32_BIT (2)
+
+#define RG_AUDLOLMUXINPUTSEL_VAUDP32_SFT (3)
+#define RG_AUDLOLMUXINPUTSEL_VAUDP32_MASK (0x3)
+
+#define RG_AUDLOLSCDISABLE_VAUDP32_BIT (5)
+#define RG_LOOUTPUTSTBENH_VAUDP32_BIT (9)
+
+/* MT6351_AUDDEC_ANA_CON6 */
+#define RG_ABIDEC_RSVD0_VAUDP32_HPL_BIT (8)
+#define RG_ABIDEC_RSVD0_VAUDP32_HPR_BIT (9)
+#define RG_ABIDEC_RSVD0_VAUDP32_HS_BIT (10)
+#define RG_ABIDEC_RSVD0_VAUDP32_LOL_BIT (11)
+
+/* MT6351_AUDDEC_ANA_CON9 */
+#define RG_AUDIBIASPWRDN_VAUDP32_BIT (8)
+#define RG_RSTB_DECODER_VA32_BIT (9)
+#define RG_AUDGLB_PWRDN_VA32_BIT (12)
+
+#define RG_LCLDO_DEC_EN_VA32_BIT (13)
+#define RG_LCLDO_DEC_REMOTE_SENSE_VA18_BIT (15)
+/* MT6351_AUDDEC_ANA_CON10 */
+#define RG_NVREG_EN_VAUDP32_BIT (8)
+
+#define RG_AUDGLB_LP2_VOW_EN_VA32 10
+
+/* MT6351_AFE_UL_DL_CON0 */
+#define RG_AFE_ON_BIT (0)
+
+/* MT6351_AFE_DL_SRC2_CON0_L */
+#define RG_DL_2_SRC_ON_TMP_CTL_PRE_BIT (0)
+
+/* MT6351_AFE_UL_SRC_CON0_L */
+#define UL_SRC_ON_TMP_CTL (0)
+
+/* MT6351_AFE_TOP_CON0 */
+#define RG_DL_SINE_ON_SFT (0)
+#define RG_DL_SINE_ON_MASK (0x1)
+
+#define RG_UL_SINE_ON_SFT (1)
+#define RG_UL_SINE_ON_MASK (0x1)
+
+/* MT6351_AUDIO_TOP_CON0 */
+#define AUD_TOP_PDN_RESERVED_BIT 0
+#define AUD_TOP_PWR_CLK_DIS_CTL_BIT 2
+#define AUD_TOP_PDN_ADC_CTL_BIT 5
+#define AUD_TOP_PDN_DAC_CTL_BIT 6
+#define AUD_TOP_PDN_AFE_CTL_BIT 7
+
+/* MT6351_AFE_SGEN_CFG0 */
+#define SGEN_C_MUTE_SW_CTL_BIT 6
+#define SGEN_C_DAC_EN_CTL_BIT 7
+
+/* MT6351_AFE_NCP_CFG0 */
+#define RG_NCP_ON_BIT 0
+
+/* MT6351_LDO_VUSB33_CON0 */
+#define RG_VUSB33_EN 1
+#define RG_VUSB33_ON_CTRL 3
+
+/* MT6351_LDO_VA18_CON0 */
+#define RG_VA18_EN 1
+#define RG_VA18_ON_CTRL 3
+
+/* MT6351_AUDENC_ANA_CON0 */
+#define RG_AUDPREAMPLON 0
+#define RG_AUDPREAMPLDCCEN 1
+#define RG_AUDPREAMPLDCPRECHARGE 2
+
+#define RG_AUDPREAMPLINPUTSEL_SFT (4)
+#define RG_AUDPREAMPLINPUTSEL_MASK (0x3)
+
+#define RG_AUDADCLPWRUP 12
+
+#define RG_AUDADCLINPUTSEL_SFT (13)
+#define RG_AUDADCLINPUTSEL_MASK (0x3)
+
+/* MT6351_AUDENC_ANA_CON1 */
+#define RG_AUDPREAMPRON 0
+#define RG_AUDPREAMPRDCCEN 1
+#define RG_AUDPREAMPRDCPRECHARGE 2
+
+#define RG_AUDPREAMPRINPUTSEL_SFT (4)
+#define RG_AUDPREAMPRINPUTSEL_MASK (0x3)
+
+#define RG_AUDADCRPWRUP 12
+
+#define RG_AUDADCRINPUTSEL_SFT (13)
+#define RG_AUDADCRINPUTSEL_MASK (0x3)
+
+/* MT6351_AUDENC_ANA_CON3 */
+#define RG_AUDADCCLKRSTB 6
+
+/* MT6351_AUDENC_ANA_CON9 */
+#define RG_AUDPWDBMICBIAS0 0
+#define RG_AUDMICBIAS0VREF 4
+#define RG_AUDMICBIAS0LOWPEN 7
+
+#define RG_AUDPWDBMICBIAS2 8
+#define RG_AUDMICBIAS2VREF 12
+#define RG_AUDMICBIAS2LOWPEN 15
+
+/* MT6351_AUDENC_ANA_CON10 */
+#define RG_AUDPWDBMICBIAS1 0
+#define RG_AUDMICBIAS1DCSW1NEN 2
+#define RG_AUDMICBIAS1VREF 4
+#define RG_AUDMICBIAS1LOWPEN 7
+
+enum {
+	AUDIO_ANALOG_VOLUME_HSOUTL,
+	AUDIO_ANALOG_VOLUME_HSOUTR,
+	AUDIO_ANALOG_VOLUME_HPOUTL,
+	AUDIO_ANALOG_VOLUME_HPOUTR,
+	AUDIO_ANALOG_VOLUME_LINEOUTL,
+	AUDIO_ANALOG_VOLUME_LINEOUTR,
+	AUDIO_ANALOG_VOLUME_MICAMP1,
+	AUDIO_ANALOG_VOLUME_MICAMP2,
+	AUDIO_ANALOG_VOLUME_TYPE_MAX
+};
+
+/* Supply subseq */
+enum {
+	SUPPLY_SUBSEQ_SETTING,
+	SUPPLY_SUBSEQ_ENABLE,
+	SUPPLY_SUBSEQ_MICBIAS,
+};
+
+#define REG_STRIDE 2
+
+struct mt6351_priv {
+	struct device *dev;
+	struct regmap *regmap;
+
+	unsigned int dl_rate;
+	unsigned int ul_rate;
+
+	int ana_gain[AUDIO_ANALOG_VOLUME_TYPE_MAX];
+
+	int hp_en_counter;
+};
+
+static void set_hp_gain_zero(struct snd_soc_component *cmpnt)
+{
+	regmap_update_bits(cmpnt->regmap, MT6351_ZCD_CON2,
+			   0x1f << 7, 0x8 << 7);
+	regmap_update_bits(cmpnt->regmap, MT6351_ZCD_CON2,
+			   0x1f << 0, 0x8 << 0);
+}
+
+static unsigned int get_cap_reg_val(struct snd_soc_component *cmpnt,
+				    unsigned int rate)
+{
+	switch (rate) {
+	case 8000:
+		return 0;
+	case 16000:
+		return 1;
+	case 32000:
+		return 2;
+	case 48000:
+		return 3;
+	case 96000:
+		return 4;
+	case 192000:
+		return 5;
+	default:
+		dev_warn(cmpnt->dev, "%s(), error rate %d, return 3",
+			 __func__, rate);
+		return 3;
+	}
+}
+
+static unsigned int get_play_reg_val(struct snd_soc_component *cmpnt,
+				     unsigned int rate)
+{
+	switch (rate) {
+	case 8000:
+		return 0;
+	case 11025:
+		return 1;
+	case 12000:
+		return 2;
+	case 16000:
+		return 3;
+	case 22050:
+		return 4;
+	case 24000:
+		return 5;
+	case 32000:
+		return 6;
+	case 44100:
+		return 7;
+	case 48000:
+	case 96000:
+	case 192000:
+		return 8;
+	default:
+		dev_warn(cmpnt->dev, "%s(), error rate %d, return 8",
+			 __func__, rate);
+		return 8;
+	}
+}
+
+static int mt6351_codec_dai_hw_params(struct snd_pcm_substream *substream,
+				      struct snd_pcm_hw_params *params,
+				      struct snd_soc_dai *dai)
+{
+	struct snd_soc_component *cmpnt = dai->component;
+	struct mt6351_priv *priv = snd_soc_component_get_drvdata(cmpnt);
+	unsigned int rate = params_rate(params);
+
+	dev_dbg(priv->dev, "%s(), substream->stream %d, rate %d\n",
+		__func__, substream->stream, rate);
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		priv->dl_rate = rate;
+	else if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+		priv->ul_rate = rate;
+
+	return 0;
+}
+
+static const struct snd_soc_dai_ops mt6351_codec_dai_ops = {
+	.hw_params = mt6351_codec_dai_hw_params,
+};
+
+#define MT6351_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S16_BE |\
+			SNDRV_PCM_FMTBIT_U16_LE | SNDRV_PCM_FMTBIT_U16_BE |\
+			SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S24_BE |\
+			SNDRV_PCM_FMTBIT_U24_LE | SNDRV_PCM_FMTBIT_U24_BE |\
+			SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_S32_BE |\
+			SNDRV_PCM_FMTBIT_U32_LE | SNDRV_PCM_FMTBIT_U32_BE)
+
+static struct snd_soc_dai_driver mt6351_dai_driver[] = {
+	{
+		.name = "mt6351-snd-codec-aif1",
+		.playback = {
+			.stream_name = "AIF1 Playback",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = SNDRV_PCM_RATE_8000_48000 |
+				 SNDRV_PCM_RATE_96000 |
+				 SNDRV_PCM_RATE_192000,
+			.formats = MT6351_FORMATS,
+		},
+		.capture = {
+			.stream_name = "AIF1 Capture",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = SNDRV_PCM_RATE_8000 |
+				 SNDRV_PCM_RATE_16000 |
+				 SNDRV_PCM_RATE_32000 |
+				 SNDRV_PCM_RATE_48000 |
+				 SNDRV_PCM_RATE_96000 |
+				 SNDRV_PCM_RATE_192000,
+			.formats = MT6351_FORMATS,
+		},
+		.ops = &mt6351_codec_dai_ops,
+	},
+};
+
+enum {
+	HP_GAIN_SET_ZERO,
+	HP_GAIN_RESTORE,
+};
+
+static void hp_gain_ramp_set(struct snd_soc_component *cmpnt, int hp_gain_ctl)
+{
+	struct mt6351_priv *priv = snd_soc_component_get_drvdata(cmpnt);
+	int idx, old_idx, offset, reg_idx;
+
+	if (hp_gain_ctl == HP_GAIN_SET_ZERO) {
+		idx = 8;	/* 0dB */
+		old_idx = priv->ana_gain[AUDIO_ANALOG_VOLUME_HPOUTL];
+	} else {
+		idx = priv->ana_gain[AUDIO_ANALOG_VOLUME_HPOUTL];
+		old_idx = 8;	/* 0dB */
+	}
+	dev_dbg(priv->dev, "%s(), idx %d, old_idx %d\n",
+		__func__, idx, old_idx);
+
+	if (idx > old_idx)
+		offset = idx - old_idx;
+	else
+		offset = old_idx - idx;
+
+	reg_idx = old_idx;
+
+	while (offset > 0) {
+		reg_idx = idx > old_idx ? reg_idx + 1 : reg_idx - 1;
+
+		/* check valid range, and set value */
+		if ((reg_idx >= 0 && reg_idx <= 0x12) || reg_idx == 0x1f) {
+			regmap_update_bits(cmpnt->regmap,
+					   MT6351_ZCD_CON2,
+					   0xf9f,
+					   (reg_idx << 7) | reg_idx);
+			usleep_range(100, 120);
+		}
+		offset--;
+	}
+}
+
+static void hp_zcd_enable(struct snd_soc_component *cmpnt)
+{
+	/* Enable ZCD, for minimize pop noise */
+	/* when adjust gain during HP buffer on */
+	regmap_update_bits(cmpnt->regmap, MT6351_ZCD_CON0, 0x7 << 8, 0x1 << 8);
+	regmap_update_bits(cmpnt->regmap, MT6351_ZCD_CON0, 0x1 << 7, 0x0 << 7);
+
+	/* timeout, 1=5ms, 0=30ms */
+	regmap_update_bits(cmpnt->regmap, MT6351_ZCD_CON0, 0x1 << 6, 0x1 << 6);
+
+	regmap_update_bits(cmpnt->regmap, MT6351_ZCD_CON0, 0x3 << 4, 0x0 << 4);
+	regmap_update_bits(cmpnt->regmap, MT6351_ZCD_CON0, 0x7 << 1, 0x5 << 1);
+	regmap_update_bits(cmpnt->regmap, MT6351_ZCD_CON0, 0x1 << 0, 0x1 << 0);
+}
+
+static void hp_zcd_disable(struct snd_soc_component *cmpnt)
+{
+	regmap_write(cmpnt->regmap, MT6351_ZCD_CON0, 0x0000);
+}
+
+static const DECLARE_TLV_DB_SCALE(playback_tlv, -1000, 100, 0);
+static const DECLARE_TLV_DB_SCALE(pga_tlv, 0, 600, 0);
+
+static const struct snd_kcontrol_new mt6351_snd_controls[] = {
+	/* dl pga gain */
+	SOC_DOUBLE_TLV("Headphone Volume",
+		       MT6351_ZCD_CON2, 0, 7, 0x12, 1,
+		       playback_tlv),
+	SOC_DOUBLE_TLV("Lineout Volume",
+		       MT6351_ZCD_CON1, 0, 7, 0x12, 1,
+		       playback_tlv),
+	SOC_SINGLE_TLV("Handset Volume",
+		       MT6351_ZCD_CON3, 0, 0x12, 1,
+		       playback_tlv),
+       /* ul pga gain */
+	SOC_DOUBLE_R_TLV("PGA Volume",
+			 MT6351_AUDENC_ANA_CON0, MT6351_AUDENC_ANA_CON1,
+			 8, 4, 0,
+			 pga_tlv),
+};
+
+/* MUX */
+
+/* LOL MUX */
+static const char *const lo_in_mux_map[] = {
+	"Open", "Mute", "Playback", "Test Mode",
+};
+
+static int lo_in_mux_map_value[] = {
+	0x0, 0x1, 0x2, 0x3,
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(lo_in_mux_map_enum,
+				  MT6351_AUDDEC_ANA_CON3,
+				  RG_AUDLOLMUXINPUTSEL_VAUDP32_SFT,
+				  RG_AUDLOLMUXINPUTSEL_VAUDP32_MASK,
+				  lo_in_mux_map,
+				  lo_in_mux_map_value);
+
+static const struct snd_kcontrol_new lo_in_mux_control =
+	SOC_DAPM_ENUM("In Select", lo_in_mux_map_enum);
+
+/*HP MUX */
+static const char *const hp_in_mux_map[] = {
+	"Open", "LoudSPK Playback", "Audio Playback", "Test Mode",
+};
+
+static int hp_in_mux_map_value[] = {
+	0x0, 0x1, 0x2, 0x3,
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(hpl_in_mux_map_enum,
+				  MT6351_AUDDEC_ANA_CON0,
+				  RG_AUDHPLMUXINPUTSEL_VAUDP32_SFT,
+				  RG_AUDHPLMUXINPUTSEL_VAUDP32_MASK,
+				  hp_in_mux_map,
+				  hp_in_mux_map_value);
+
+static const struct snd_kcontrol_new hpl_in_mux_control =
+	SOC_DAPM_ENUM("HPL Select", hpl_in_mux_map_enum);
+
+static SOC_VALUE_ENUM_SINGLE_DECL(hpr_in_mux_map_enum,
+				  MT6351_AUDDEC_ANA_CON0,
+				  RG_AUDHPRMUXINPUTSEL_VAUDP32_SFT,
+				  RG_AUDHPRMUXINPUTSEL_VAUDP32_MASK,
+				  hp_in_mux_map,
+				  hp_in_mux_map_value);
+
+static const struct snd_kcontrol_new hpr_in_mux_control =
+	SOC_DAPM_ENUM("HPR Select", hpr_in_mux_map_enum);
+
+/* RCV MUX */
+static const char *const rcv_in_mux_map[] = {
+	"Open", "Mute", "Voice Playback", "Test Mode",
+};
+
+static int rcv_in_mux_map_value[] = {
+	0x0, 0x1, 0x2, 0x3,
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(rcv_in_mux_map_enum,
+				  MT6351_AUDDEC_ANA_CON0,
+				  RG_AUDHSMUXINPUTSEL_VAUDP32_SFT,
+				  RG_AUDHSMUXINPUTSEL_VAUDP32_MASK,
+				  rcv_in_mux_map,
+				  rcv_in_mux_map_value);
+
+static const struct snd_kcontrol_new rcv_in_mux_control =
+	SOC_DAPM_ENUM("RCV Select", rcv_in_mux_map_enum);
+
+/* DAC In MUX */
+static const char *const dac_in_mux_map[] = {
+	"Normal Path", "Sgen",
+};
+
+static int dac_in_mux_map_value[] = {
+	0x0, 0x1,
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(dac_in_mux_map_enum,
+				  MT6351_AFE_TOP_CON0,
+				  RG_DL_SINE_ON_SFT,
+				  RG_DL_SINE_ON_MASK,
+				  dac_in_mux_map,
+				  dac_in_mux_map_value);
+
+static const struct snd_kcontrol_new dac_in_mux_control =
+	SOC_DAPM_ENUM("DAC Select", dac_in_mux_map_enum);
+
+/* AIF Out MUX */
+static SOC_VALUE_ENUM_SINGLE_DECL(aif_out_mux_map_enum,
+				  MT6351_AFE_TOP_CON0,
+				  RG_UL_SINE_ON_SFT,
+				  RG_UL_SINE_ON_MASK,
+				  dac_in_mux_map,
+				  dac_in_mux_map_value);
+
+static const struct snd_kcontrol_new aif_out_mux_control =
+	SOC_DAPM_ENUM("AIF Out Select", aif_out_mux_map_enum);
+
+/* ADC L MUX */
+static const char *const adc_left_mux_map[] = {
+	"Idle", "AIN0", "Left Preamplifier", "Idle_1",
+};
+
+static int adc_left_mux_map_value[] = {
+	0x0, 0x1, 0x2, 0x3,
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(adc_left_mux_map_enum,
+				  MT6351_AUDENC_ANA_CON0,
+				  RG_AUDADCLINPUTSEL_SFT,
+				  RG_AUDADCLINPUTSEL_MASK,
+				  adc_left_mux_map,
+				  adc_left_mux_map_value);
+
+static const struct snd_kcontrol_new adc_left_mux_control =
+	SOC_DAPM_ENUM("ADC L Select", adc_left_mux_map_enum);
+
+/* ADC R MUX */
+static const char *const adc_right_mux_map[] = {
+	"Idle", "AIN0", "Right Preamplifier", "Idle_1",
+};
+
+static int adc_right_mux_map_value[] = {
+	0x0, 0x1, 0x2, 0x3,
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(adc_right_mux_map_enum,
+				  MT6351_AUDENC_ANA_CON1,
+				  RG_AUDADCRINPUTSEL_SFT,
+				  RG_AUDADCRINPUTSEL_MASK,
+				  adc_right_mux_map,
+				  adc_right_mux_map_value);
+
+static const struct snd_kcontrol_new adc_right_mux_control =
+	SOC_DAPM_ENUM("ADC R Select", adc_right_mux_map_enum);
+
+/* PGA L MUX */
+static const char *const pga_left_mux_map[] = {
+	"None", "AIN0", "AIN1", "AIN2",
+};
+
+static int pga_left_mux_map_value[] = {
+	0x0, 0x1, 0x2, 0x3,
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(pga_left_mux_map_enum,
+				  MT6351_AUDENC_ANA_CON0,
+				  RG_AUDPREAMPLINPUTSEL_SFT,
+				  RG_AUDPREAMPLINPUTSEL_MASK,
+				  pga_left_mux_map,
+				  pga_left_mux_map_value);
+
+static const struct snd_kcontrol_new pga_left_mux_control =
+	SOC_DAPM_ENUM("PGA L Select", pga_left_mux_map_enum);
+
+/* PGA R MUX */
+static const char *const pga_right_mux_map[] = {
+	"None", "AIN0", "AIN3", "AIN2",
+};
+
+static int pga_right_mux_map_value[] = {
+	0x0, 0x1, 0x2, 0x3,
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(pga_right_mux_map_enum,
+				  MT6351_AUDENC_ANA_CON1,
+				  RG_AUDPREAMPRINPUTSEL_SFT,
+				  RG_AUDPREAMPRINPUTSEL_MASK,
+				  pga_right_mux_map,
+				  pga_right_mux_map_value);
+
+static const struct snd_kcontrol_new pga_right_mux_control =
+	SOC_DAPM_ENUM("PGA R Select", pga_right_mux_map_enum);
+
+static int mt_reg_set_clr_event(struct snd_soc_dapm_widget *w,
+				struct snd_kcontrol *kcontrol,
+				int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		if (w->on_val) {
+			/* SET REG */
+			regmap_update_bits(cmpnt->regmap,
+					   w->reg + REG_STRIDE,
+					   0x1 << w->shift,
+					   0x1 << w->shift);
+		} else {
+			/* CLR REG */
+			regmap_update_bits(cmpnt->regmap,
+					   w->reg + REG_STRIDE * 2,
+					   0x1 << w->shift,
+					   0x1 << w->shift);
+		}
+		break;
+	case SND_SOC_DAPM_PRE_PMD:
+		if (w->off_val) {
+			/* SET REG */
+			regmap_update_bits(cmpnt->regmap,
+					   w->reg + REG_STRIDE,
+					   0x1 << w->shift,
+					   0x1 << w->shift);
+		} else {
+			/* CLR REG */
+			regmap_update_bits(cmpnt->regmap,
+					   w->reg + REG_STRIDE * 2,
+					   0x1 << w->shift,
+					   0x1 << w->shift);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int mt_ncp_event(struct snd_soc_dapm_widget *w,
+			struct snd_kcontrol *kcontrol,
+			int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		regmap_update_bits(cmpnt->regmap, MT6351_AFE_NCP_CFG1,
+				   0xffff, 0x1515);
+		/* NCP: ck1 and ck2 clock frequecy adjust configure */
+		regmap_update_bits(cmpnt->regmap, MT6351_AFE_NCP_CFG0,
+				   0xfffe, 0x8C00);
+		break;
+	case SND_SOC_DAPM_POST_PMU:
+		usleep_range(250, 270);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int mt_sgen_event(struct snd_soc_dapm_widget *w,
+			 struct snd_kcontrol *kcontrol,
+			 int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		regmap_update_bits(cmpnt->regmap, MT6351_AFE_SGEN_CFG0,
+				   0xffef, 0x0008);
+		regmap_update_bits(cmpnt->regmap, MT6351_AFE_SGEN_CFG1,
+				   0xffff, 0x0101);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int mt_aif_in_event(struct snd_soc_dapm_widget *w,
+			   struct snd_kcontrol *kcontrol,
+			   int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+	struct mt6351_priv *priv = snd_soc_component_get_drvdata(cmpnt);
+
+	dev_dbg(priv->dev, "%s(), event 0x%x, rate %d\n",
+		__func__, event, priv->dl_rate);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		/* sdm audio fifo clock power on */
+		regmap_update_bits(cmpnt->regmap, MT6351_AFUNC_AUD_CON2,
+				   0xffff, 0x0006);
+		/* scrambler clock on enable */
+		regmap_update_bits(cmpnt->regmap, MT6351_AFUNC_AUD_CON0,
+				   0xffff, 0xC3A1);
+		/* sdm power on */
+		regmap_update_bits(cmpnt->regmap, MT6351_AFUNC_AUD_CON2,
+				   0xffff, 0x0003);
+		/* sdm fifo enable */
+		regmap_update_bits(cmpnt->regmap, MT6351_AFUNC_AUD_CON2,
+				   0xffff, 0x000B);
+		/* set attenuation gain */
+		regmap_update_bits(cmpnt->regmap, MT6351_AFE_DL_SDM_CON1,
+				   0xffff, 0x001E);
+
+		regmap_write(cmpnt->regmap, MT6351_AFE_PMIC_NEWIF_CFG0,
+			     (get_play_reg_val(cmpnt, priv->dl_rate) << 12) |
+			     0x330);
+		regmap_write(cmpnt->regmap, MT6351_AFE_DL_SRC2_CON0_H,
+			     (get_play_reg_val(cmpnt, priv->dl_rate) << 12) |
+			     0x300);
+
+		regmap_update_bits(cmpnt->regmap, MT6351_AFE_PMIC_NEWIF_CFG2,
+				   0x8000, 0x8000);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int mt_hp_event(struct snd_soc_dapm_widget *w,
+		       struct snd_kcontrol *kcontrol,
+		       int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+	struct mt6351_priv *priv = snd_soc_component_get_drvdata(cmpnt);
+	int reg;
+
+	dev_dbg(priv->dev, "%s(), event 0x%x, hp_en_counter %d\n",
+		__func__, event, priv->hp_en_counter);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		priv->hp_en_counter++;
+		if (priv->hp_en_counter > 1)
+			break;	/* already enabled, do nothing */
+		else if (priv->hp_en_counter <= 0)
+			dev_err(priv->dev, "%s(), hp_en_counter %d <= 0\n",
+				__func__,
+				priv->hp_en_counter);
+
+		hp_zcd_disable(cmpnt);
+
+		/* from yoyo HQA script */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON6,
+				   0x0700, 0x0700);
+
+		/* save target gain to restore after hardware open complete */
+		regmap_read(cmpnt->regmap, MT6351_ZCD_CON2, &reg);
+		priv->ana_gain[AUDIO_ANALOG_VOLUME_HPOUTL] = reg & 0x1f;
+		priv->ana_gain[AUDIO_ANALOG_VOLUME_HPOUTR] = (reg >> 7) & 0x1f;
+
+		/* Set HPR/HPL gain as minimum (~ -40dB) */
+		regmap_update_bits(cmpnt->regmap,
+				   MT6351_ZCD_CON2, 0xffff, 0x0F9F);
+		/* Set HS gain as minimum (~ -40dB) */
+		regmap_update_bits(cmpnt->regmap,
+				   MT6351_ZCD_CON3, 0xffff, 0x001F);
+		/* De_OSC of HP */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON2,
+				   0x0001, 0x0001);
+		/* enable output STBENH */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON1,
+				   0xffff, 0x2000);
+		/* De_OSC of voice, enable output STBENH */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON1,
+				   0xffff, 0x2100);
+		/* Enable voice driver */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON0,
+				   0x0010, 0xE090);
+		/* Enable pre-charge buffer  */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON1,
+				   0xffff, 0x2140);
+
+		usleep_range(50, 60);
+
+		/* Apply digital DC compensation value to DAC */
+		set_hp_gain_zero(cmpnt);
+
+		/* Enable HPR/HPL */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON1,
+				   0xffff, 0x2100);
+		/* Disable pre-charge buffer */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON1,
+				   0xffff, 0x2000);
+		/* Disable De_OSC of voice */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON0,
+				   0x0010, 0xF4EF);
+		/* Disable voice buffer */
+
+		/* from yoyo HQ */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON6,
+				   0x0700, 0x0300);
+
+		/* Enable ZCD, for minimize pop noise */
+		/* when adjust gain during HP buffer on */
+		hp_zcd_enable(cmpnt);
+
+		/* apply volume setting */
+		hp_gain_ramp_set(cmpnt, HP_GAIN_RESTORE);
+
+		break;
+	case SND_SOC_DAPM_PRE_PMD:
+		priv->hp_en_counter--;
+		if (priv->hp_en_counter > 0)
+			break;	/* still being used, don't close */
+		else if (priv->hp_en_counter < 0)
+			dev_err(priv->dev, "%s(), hp_en_counter %d <= 0\n",
+				__func__,
+				priv->hp_en_counter);
+
+		/* Disable AUD_ZCD */
+		hp_zcd_disable(cmpnt);
+
+		/* Set HPR/HPL gain as -1dB, step by step */
+		hp_gain_ramp_set(cmpnt, HP_GAIN_SET_ZERO);
+
+		set_hp_gain_zero(cmpnt);
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		if (priv->hp_en_counter > 0)
+			break;	/* still being used, don't close */
+		else if (priv->hp_en_counter < 0)
+			dev_err(priv->dev, "%s(), hp_en_counter %d <= 0\n",
+				__func__,
+				priv->hp_en_counter);
+
+		/* reset*/
+		regmap_update_bits(cmpnt->regmap,
+				   MT6351_AUDDEC_ANA_CON6,
+				   0x0700,
+				   0x0000);
+		/* De_OSC of HP */
+		regmap_update_bits(cmpnt->regmap,
+				   MT6351_AUDDEC_ANA_CON2,
+				   0x0001,
+				   0x0000);
+
+		/* apply volume setting */
+		hp_gain_ramp_set(cmpnt, HP_GAIN_RESTORE);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int mt_aif_out_event(struct snd_soc_dapm_widget *w,
+			    struct snd_kcontrol *kcontrol,
+			    int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+	struct mt6351_priv *priv = snd_soc_component_get_drvdata(cmpnt);
+
+	dev_dbg(priv->dev, "%s(), event 0x%x, rate %d\n",
+		__func__, event, priv->ul_rate);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		/* dcclk_div=11'b00100000011, dcclk_ref_ck_sel=2'b00 */
+		regmap_update_bits(cmpnt->regmap, MT6351_AFE_DCCLK_CFG0,
+				   0xffff, 0x2062);
+		/* dcclk_pdn=1'b0 */
+		regmap_update_bits(cmpnt->regmap, MT6351_AFE_DCCLK_CFG0,
+				   0xffff, 0x2060);
+		/* dcclk_gen_on=1'b1 */
+		regmap_update_bits(cmpnt->regmap, MT6351_AFE_DCCLK_CFG0,
+				   0xffff, 0x2061);
+
+		/* UL sample rate and mode configure */
+		regmap_update_bits(cmpnt->regmap, MT6351_AFE_UL_SRC_CON0_H,
+				   0x000E,
+				   get_cap_reg_val(cmpnt, priv->ul_rate) << 1);
+
+		/* fixed 260k path for 8/16/32/48 */
+		if (priv->ul_rate <= 48000) {
+			/* anc ul path src on */
+			regmap_update_bits(cmpnt->regmap,
+					   MT6351_AFE_HPANC_CFG0,
+					   0x1 << 1,
+					   0x1 << 1);
+			/* ANC clk pdn release */
+			regmap_update_bits(cmpnt->regmap,
+					   MT6351_AFE_HPANC_CFG0,
+					   0x1 << 0,
+					   0x0 << 0);
+		}
+		break;
+	case SND_SOC_DAPM_PRE_PMD:
+		/* fixed 260k path for 8/16/32/48 */
+		if (priv->ul_rate <= 48000) {
+			/* anc ul path src on */
+			regmap_update_bits(cmpnt->regmap,
+					   MT6351_AFE_HPANC_CFG0,
+					   0x1 << 1,
+					   0x0 << 1);
+			/* ANC clk pdn release */
+			regmap_update_bits(cmpnt->regmap,
+					   MT6351_AFE_HPANC_CFG0,
+					   0x1 << 0,
+					   0x1 << 0);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int mt_adc_clkgen_event(struct snd_soc_dapm_widget *w,
+			       struct snd_kcontrol *kcontrol,
+			       int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		/* Audio ADC clock gen. mode: 00_divided by 2 (Normal) */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON3,
+				   0x3 << 4, 0x0);
+		break;
+	case SND_SOC_DAPM_POST_PMU:
+		/* ADC CLK from: 00_13MHz from CLKSQ (Default) */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON3,
+				   0x3 << 2, 0x0);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int mt_pga_left_event(struct snd_soc_dapm_widget *w,
+			     struct snd_kcontrol *kcontrol,
+			     int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		/* Audio L PGA precharge on */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON0,
+				   0x3 << RG_AUDPREAMPLDCPRECHARGE,
+				   0x1 << RG_AUDPREAMPLDCPRECHARGE);
+		/* Audio L PGA mode: 1_DCC */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON0,
+				   0x3 << RG_AUDPREAMPLDCCEN,
+				   0x1 << RG_AUDPREAMPLDCCEN);
+		break;
+	case SND_SOC_DAPM_POST_PMU:
+		usleep_range(100, 120);
+		/* Audio L PGA precharge off */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON0,
+				   0x3 << RG_AUDPREAMPLDCPRECHARGE,
+				   0x0 << RG_AUDPREAMPLDCPRECHARGE);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int mt_pga_right_event(struct snd_soc_dapm_widget *w,
+			      struct snd_kcontrol *kcontrol,
+			      int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		/* Audio R PGA precharge on */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON1,
+				   0x3 << RG_AUDPREAMPRDCPRECHARGE,
+				   0x1 << RG_AUDPREAMPRDCPRECHARGE);
+		/* Audio R PGA mode: 1_DCC */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON1,
+				   0x3 << RG_AUDPREAMPRDCCEN,
+				   0x1 << RG_AUDPREAMPRDCCEN);
+		break;
+	case SND_SOC_DAPM_POST_PMU:
+		usleep_range(100, 120);
+		/* Audio R PGA precharge off */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON1,
+				   0x3 << RG_AUDPREAMPRDCPRECHARGE,
+				   0x0 << RG_AUDPREAMPRDCPRECHARGE);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int mt_mic_bias_0_event(struct snd_soc_dapm_widget *w,
+			       struct snd_kcontrol *kcontrol,
+			       int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		/* MIC Bias 0 LowPower: 0_Normal */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON9,
+				   0x3 << RG_AUDMICBIAS0LOWPEN, 0x0);
+		/* MISBIAS0 = 1P9V */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON9,
+				   0x7 << RG_AUDMICBIAS0VREF,
+				   0x2 << RG_AUDMICBIAS0VREF);
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		/* MISBIAS0 = 1P97 */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON9,
+				   0x7 << RG_AUDMICBIAS0VREF,
+				   0x0 << RG_AUDMICBIAS0VREF);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int mt_mic_bias_1_event(struct snd_soc_dapm_widget *w,
+			       struct snd_kcontrol *kcontrol,
+			       int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		/* MIC Bias 1 LowPower: 0_Normal */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON10,
+				   0x3 << RG_AUDMICBIAS1LOWPEN, 0x0);
+		/* MISBIAS1 = 2P7V */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON10,
+				   0x7 << RG_AUDMICBIAS1VREF,
+				   0x7 << RG_AUDMICBIAS1VREF);
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		/* MISBIAS1 = 1P7V */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON10,
+				   0x7 << RG_AUDMICBIAS1VREF,
+				   0x0 << RG_AUDMICBIAS1VREF);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int mt_mic_bias_2_event(struct snd_soc_dapm_widget *w,
+			       struct snd_kcontrol *kcontrol,
+			       int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		/* MIC Bias 2 LowPower: 0_Normal */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON9,
+				   0x3 << RG_AUDMICBIAS2LOWPEN, 0x0);
+		/* MISBIAS2 = 1P9V */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON9,
+				   0x7 << RG_AUDMICBIAS2VREF,
+				   0x2 << RG_AUDMICBIAS2VREF);
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		/* MISBIAS2 = 1P97 */
+		regmap_update_bits(cmpnt->regmap, MT6351_AUDENC_ANA_CON9,
+				   0x7 << RG_AUDMICBIAS2VREF,
+				   0x0 << RG_AUDMICBIAS2VREF);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+/* DAPM Kcontrols */
+static const struct snd_kcontrol_new mt_lineout_control =
+	SOC_DAPM_SINGLE("Switch", MT6351_AUDDEC_ANA_CON3,
+			RG_AUDLOLPWRUP_VAUDP32_BIT, 1, 0);
+
+/* DAPM Widgets */
+static const struct snd_soc_dapm_widget mt6351_dapm_widgets[] = {
+	/* Digital Clock */
+	SND_SOC_DAPM_SUPPLY("AUDIO_TOP_AFE_CTL", MT6351_AUDIO_TOP_CON0,
+			    AUD_TOP_PDN_AFE_CTL_BIT, 1, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("AUDIO_TOP_DAC_CTL", MT6351_AUDIO_TOP_CON0,
+			    AUD_TOP_PDN_DAC_CTL_BIT, 1, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("AUDIO_TOP_ADC_CTL", MT6351_AUDIO_TOP_CON0,
+			    AUD_TOP_PDN_ADC_CTL_BIT, 1, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("AUDIO_TOP_PWR_CLK", MT6351_AUDIO_TOP_CON0,
+			    AUD_TOP_PWR_CLK_DIS_CTL_BIT, 1, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("AUDIO_TOP_PDN_RESERVED", MT6351_AUDIO_TOP_CON0,
+			    AUD_TOP_PDN_RESERVED_BIT, 1, NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY("NCP", MT6351_AFE_NCP_CFG0,
+			    RG_NCP_ON_BIT, 0,
+			    mt_ncp_event,
+			    SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
+
+	SND_SOC_DAPM_SUPPLY("DL Digital Clock", SND_SOC_NOPM,
+			    0, 0, NULL, 0),
+
+	/* Global Supply*/
+	SND_SOC_DAPM_SUPPLY("AUDGLB", MT6351_AUDDEC_ANA_CON9,
+			    RG_AUDGLB_PWRDN_VA32_BIT, 1, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("CLKSQ Audio", MT6351_TOP_CLKSQ,
+			    RG_CLKSQ_EN_AUD_BIT, 0,
+			    mt_reg_set_clr_event,
+			    SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+	SND_SOC_DAPM_SUPPLY("ZCD13M_CK", MT6351_TOP_CKPDN_CON0,
+			    RG_ZCD13M_CK_PDN_BIT, 1,
+			    mt_reg_set_clr_event,
+			    SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+	SND_SOC_DAPM_SUPPLY("AUD_CK", MT6351_TOP_CKPDN_CON0,
+			    RG_AUD_CK_PDN_BIT, 1,
+			    mt_reg_set_clr_event,
+			    SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+	SND_SOC_DAPM_SUPPLY("AUDIF_CK", MT6351_TOP_CKPDN_CON0,
+			    RG_AUDIF_CK_PDN_BIT, 1,
+			    mt_reg_set_clr_event,
+			    SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+	SND_SOC_DAPM_SUPPLY("AUDNCP_CK", MT6351_TOP_CKPDN_CON0,
+			    RG_AUDNCP_CK_PDN_BIT, 1,
+			    mt_reg_set_clr_event,
+			    SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+
+	SND_SOC_DAPM_SUPPLY("AFE_ON", MT6351_AFE_UL_DL_CON0, RG_AFE_ON_BIT, 0,
+			    NULL, 0),
+
+	/* AIF Rx*/
+	SND_SOC_DAPM_AIF_IN_E("AIF_RX", "AIF1 Playback", 0,
+			      MT6351_AFE_DL_SRC2_CON0_L,
+			      RG_DL_2_SRC_ON_TMP_CTL_PRE_BIT, 0,
+			      mt_aif_in_event, SND_SOC_DAPM_PRE_PMU),
+
+	/* DL Supply */
+	SND_SOC_DAPM_SUPPLY("DL Power Supply", SND_SOC_NOPM,
+			    0, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("NV Regulator", MT6351_AUDDEC_ANA_CON10,
+			    RG_NVREG_EN_VAUDP32_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("AUD_CLK", MT6351_AUDDEC_ANA_CON9,
+			    RG_RSTB_DECODER_VA32_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("IBIST", MT6351_AUDDEC_ANA_CON9,
+			    RG_AUDIBIASPWRDN_VAUDP32_BIT, 1, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("LDO", MT6351_AUDDEC_ANA_CON9,
+			    RG_LCLDO_DEC_EN_VA32_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("LDO_REMOTE_SENSE", MT6351_AUDDEC_ANA_CON9,
+			    RG_LCLDO_DEC_REMOTE_SENSE_VA18_BIT, 0, NULL, 0),
+
+	/* DAC */
+	SND_SOC_DAPM_MUX("DAC In Mux", SND_SOC_NOPM, 0, 0, &dac_in_mux_control),
+
+	SND_SOC_DAPM_DAC("DACL", NULL, MT6351_AUDDEC_ANA_CON0,
+			 RG_AUDDACLPWRUP_VAUDP32_BIT, 0),
+	SND_SOC_DAPM_SUPPLY("DACL_BIASGEN", MT6351_AUDDEC_ANA_CON0,
+			    RG_AUD_DAC_PWL_UP_VA32_BIT, 0, NULL, 0),
+
+	SND_SOC_DAPM_DAC("DACR", NULL, MT6351_AUDDEC_ANA_CON0,
+			 RG_AUDDACRPWRUP_VAUDP32_BIT, 0),
+	SND_SOC_DAPM_SUPPLY("DACR_BIASGEN", MT6351_AUDDEC_ANA_CON0,
+			    RG_AUD_DAC_PWR_UP_VA32_BIT, 0, NULL, 0),
+	/* LOL */
+	SND_SOC_DAPM_MUX("LOL Mux", SND_SOC_NOPM, 0, 0, &lo_in_mux_control),
+
+	SND_SOC_DAPM_SUPPLY("LO Stability Enh", MT6351_AUDDEC_ANA_CON3,
+			    RG_LOOUTPUTSTBENH_VAUDP32_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("LOL Bias Gen", MT6351_AUDDEC_ANA_CON6,
+			    RG_ABIDEC_RSVD0_VAUDP32_LOL_BIT, 0, NULL, 0),
+
+	SND_SOC_DAPM_OUT_DRV("LOL Buffer", MT6351_AUDDEC_ANA_CON3,
+			     RG_AUDLOLPWRUP_VAUDP32_BIT, 0, NULL, 0),
+
+	/* Headphone */
+	SND_SOC_DAPM_MUX("HPL Mux", SND_SOC_NOPM, 0, 0, &hpl_in_mux_control),
+	SND_SOC_DAPM_MUX("HPR Mux", SND_SOC_NOPM, 0, 0, &hpr_in_mux_control),
+
+	SND_SOC_DAPM_OUT_DRV_E("HPL Power", MT6351_AUDDEC_ANA_CON0,
+			       RG_AUDHPLPWRUP_VAUDP32_BIT, 0, NULL, 0,
+			       mt_hp_event,
+			       SND_SOC_DAPM_PRE_PMU |
+			       SND_SOC_DAPM_PRE_PMD |
+			       SND_SOC_DAPM_POST_PMD),
+	SND_SOC_DAPM_OUT_DRV_E("HPR Power", MT6351_AUDDEC_ANA_CON0,
+			       RG_AUDHPRPWRUP_VAUDP32_BIT, 0, NULL, 0,
+			       mt_hp_event,
+			       SND_SOC_DAPM_PRE_PMU |
+			       SND_SOC_DAPM_PRE_PMD |
+			       SND_SOC_DAPM_POST_PMD),
+
+	/* Receiver */
+	SND_SOC_DAPM_MUX("RCV Mux", SND_SOC_NOPM, 0, 0, &rcv_in_mux_control),
+
+	SND_SOC_DAPM_SUPPLY("RCV Stability Enh", MT6351_AUDDEC_ANA_CON1,
+			    RG_HSOUTPUTSTBENH_VAUDP32_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("RCV Bias Gen", MT6351_AUDDEC_ANA_CON6,
+			    RG_ABIDEC_RSVD0_VAUDP32_HS_BIT, 0, NULL, 0),
+
+	SND_SOC_DAPM_OUT_DRV("RCV Buffer", MT6351_AUDDEC_ANA_CON0,
+			     RG_AUDHSPWRUP_VAUDP32_BIT, 0, NULL, 0),
+
+	/* Outputs */
+	SND_SOC_DAPM_OUTPUT("Receiver"),
+	SND_SOC_DAPM_OUTPUT("Headphone L"),
+	SND_SOC_DAPM_OUTPUT("Headphone R"),
+	SND_SOC_DAPM_OUTPUT("LINEOUT L"),
+
+	/* SGEN */
+	SND_SOC_DAPM_SUPPLY("SGEN DL Enable", MT6351_AFE_SGEN_CFG0,
+			    SGEN_C_DAC_EN_CTL_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("SGEN MUTE", MT6351_AFE_SGEN_CFG0,
+			    SGEN_C_MUTE_SW_CTL_BIT, 1,
+			    mt_sgen_event, SND_SOC_DAPM_PRE_PMU),
+	SND_SOC_DAPM_SUPPLY("SGEN DL SRC", MT6351_AFE_DL_SRC2_CON0_L,
+			    RG_DL_2_SRC_ON_TMP_CTL_PRE_BIT, 0, NULL, 0),
+
+	SND_SOC_DAPM_INPUT("SGEN DL"),
+
+	/* Uplinks */
+	SND_SOC_DAPM_AIF_OUT_E("AIF1TX", "AIF1 Capture", 0,
+			       MT6351_AFE_UL_SRC_CON0_L,
+			       UL_SRC_ON_TMP_CTL, 0,
+			       mt_aif_out_event,
+			       SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_PRE_PMD),
+
+	SND_SOC_DAPM_SUPPLY_S("VUSB33_LDO", SUPPLY_SUBSEQ_ENABLE,
+			      MT6351_LDO_VUSB33_CON0, RG_VUSB33_EN, 0,
+			      NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("VUSB33_LDO_CTRL", SUPPLY_SUBSEQ_SETTING,
+			      MT6351_LDO_VUSB33_CON0, RG_VUSB33_ON_CTRL, 1,
+			      NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY_S("VA18_LDO", SUPPLY_SUBSEQ_ENABLE,
+			      MT6351_LDO_VA18_CON0, RG_VA18_EN, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("VA18_LDO_CTRL", SUPPLY_SUBSEQ_SETTING,
+			      MT6351_LDO_VA18_CON0, RG_VA18_ON_CTRL, 1,
+			      NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY_S("ADC CLKGEN", SUPPLY_SUBSEQ_ENABLE,
+			      MT6351_AUDENC_ANA_CON3, RG_AUDADCCLKRSTB, 0,
+			      mt_adc_clkgen_event,
+			      SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
+
+	/* Uplinks MUX */
+	SND_SOC_DAPM_MUX("AIF Out Mux", SND_SOC_NOPM, 0, 0,
+			 &aif_out_mux_control),
+
+	SND_SOC_DAPM_MUX("ADC L Mux", SND_SOC_NOPM, 0, 0,
+			 &adc_left_mux_control),
+	SND_SOC_DAPM_MUX("ADC R Mux", SND_SOC_NOPM, 0, 0,
+			 &adc_right_mux_control),
+
+	SND_SOC_DAPM_ADC("ADC L", NULL,
+			 MT6351_AUDENC_ANA_CON0, RG_AUDADCLPWRUP, 0),
+	SND_SOC_DAPM_ADC("ADC R", NULL,
+			 MT6351_AUDENC_ANA_CON1, RG_AUDADCRPWRUP, 0),
+
+	SND_SOC_DAPM_MUX("PGA L Mux", SND_SOC_NOPM, 0, 0,
+			 &pga_left_mux_control),
+	SND_SOC_DAPM_MUX("PGA R Mux", SND_SOC_NOPM, 0, 0,
+			 &pga_right_mux_control),
+
+	SND_SOC_DAPM_PGA_E("PGA L", MT6351_AUDENC_ANA_CON0, RG_AUDPREAMPLON, 0,
+			   NULL, 0,
+			   mt_pga_left_event,
+			   SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
+	SND_SOC_DAPM_PGA_E("PGA R", MT6351_AUDENC_ANA_CON1, RG_AUDPREAMPRON, 0,
+			   NULL, 0,
+			   mt_pga_right_event,
+			   SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
+
+	/* main mic mic bias */
+	SND_SOC_DAPM_SUPPLY_S("Mic Bias 0", SUPPLY_SUBSEQ_MICBIAS,
+			      MT6351_AUDENC_ANA_CON9, RG_AUDPWDBMICBIAS0, 0,
+			      mt_mic_bias_0_event,
+			      SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
+	/* ref mic mic bias */
+	SND_SOC_DAPM_SUPPLY_S("Mic Bias 2", SUPPLY_SUBSEQ_MICBIAS,
+			      MT6351_AUDENC_ANA_CON9, RG_AUDPWDBMICBIAS2, 0,
+			      mt_mic_bias_2_event,
+			      SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
+	/* headset mic1/2 mic bias */
+	SND_SOC_DAPM_SUPPLY_S("Mic Bias 1", SUPPLY_SUBSEQ_MICBIAS,
+			      MT6351_AUDENC_ANA_CON10, RG_AUDPWDBMICBIAS1, 0,
+			      mt_mic_bias_1_event,
+			      SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
+	SND_SOC_DAPM_SUPPLY_S("Mic Bias 1 DCC pull high", SUPPLY_SUBSEQ_MICBIAS,
+			      MT6351_AUDENC_ANA_CON10,
+			      RG_AUDMICBIAS1DCSW1NEN, 0,
+			      NULL, 0),
+
+	/* UL input */
+	SND_SOC_DAPM_INPUT("AIN0"),
+	SND_SOC_DAPM_INPUT("AIN1"),
+	SND_SOC_DAPM_INPUT("AIN2"),
+	SND_SOC_DAPM_INPUT("AIN3"),
+};
+
+static const struct snd_soc_dapm_route mt6351_dapm_routes[] = {
+	/* Capture */
+	{"AIF1TX", NULL, "AIF Out Mux"},
+	{"AIF1TX", NULL, "VUSB33_LDO"},
+	{"VUSB33_LDO", NULL, "VUSB33_LDO_CTRL"},
+	{"AIF1TX", NULL, "VA18_LDO"},
+	{"VA18_LDO", NULL, "VA18_LDO_CTRL"},
+
+	{"AIF1TX", NULL, "AUDGLB"},
+	{"AIF1TX", NULL, "CLKSQ Audio"},
+
+	{"AIF1TX", NULL, "AFE_ON"},
+
+	{"AIF1TX", NULL, "AUDIO_TOP_AFE_CTL"},
+	{"AIF1TX", NULL, "AUDIO_TOP_ADC_CTL"},
+	{"AIF1TX", NULL, "AUDIO_TOP_PWR_CLK"},
+	{"AIF1TX", NULL, "AUDIO_TOP_PDN_RESERVED"},
+
+	{"AIF Out Mux", "Normal Path", "ADC L"},
+	{"AIF Out Mux", "Normal Path", "ADC R"},
+
+	{"ADC L", NULL, "ADC L Mux"},
+	{"ADC L", NULL, "AUD_CK"},
+	{"ADC L", NULL, "AUDIF_CK"},
+	{"ADC L", NULL, "ADC CLKGEN"},
+	{"ADC R", NULL, "ADC R Mux"},
+	{"ADC R", NULL, "AUD_CK"},
+	{"ADC R", NULL, "AUDIF_CK"},
+	{"ADC R", NULL, "ADC CLKGEN"},
+
+	{"ADC L Mux", "AIN0", "AIN0"},
+	{"ADC L Mux", "Left Preamplifier", "PGA L"},
+
+	{"ADC R Mux", "AIN0", "AIN0"},
+	{"ADC R Mux", "Right Preamplifier", "PGA R"},
+
+	{"PGA L", NULL, "PGA L Mux"},
+	{"PGA R", NULL, "PGA R Mux"},
+
+	{"PGA L Mux", "AIN0", "AIN0"},
+	{"PGA L Mux", "AIN1", "AIN1"},
+	{"PGA L Mux", "AIN2", "AIN2"},
+
+	{"PGA R Mux", "AIN0", "AIN0"},
+	{"PGA R Mux", "AIN3", "AIN3"},
+	{"PGA R Mux", "AIN2", "AIN2"},
+
+	{"AIN0", NULL, "Mic Bias 0"},
+	{"AIN2", NULL, "Mic Bias 2"},
+
+	{"AIN1", NULL, "Mic Bias 1"},
+	{"AIN1", NULL, "Mic Bias 1 DCC pull high"},
+
+	/* DL Supply */
+	{"DL Power Supply", NULL, "AUDGLB"},
+	{"DL Power Supply", NULL, "CLKSQ Audio"},
+	{"DL Power Supply", NULL, "ZCD13M_CK"},
+	{"DL Power Supply", NULL, "AUD_CK"},
+	{"DL Power Supply", NULL, "AUDIF_CK"},
+	{"DL Power Supply", NULL, "AUDNCP_CK"},
+
+	{"DL Power Supply", NULL, "NV Regulator"},
+	{"DL Power Supply", NULL, "AUD_CLK"},
+	{"DL Power Supply", NULL, "IBIST"},
+	{"DL Power Supply", NULL, "LDO"},
+	{"LDO", NULL, "LDO_REMOTE_SENSE"},
+
+	/* DL Digital Supply */
+	{"DL Digital Clock", NULL, "AUDIO_TOP_AFE_CTL"},
+	{"DL Digital Clock", NULL, "AUDIO_TOP_DAC_CTL"},
+	{"DL Digital Clock", NULL, "AUDIO_TOP_PWR_CLK"},
+	{"DL Digital Clock", NULL, "AUDIO_TOP_PDN_RESERVED"},
+	{"DL Digital Clock", NULL, "NCP"},
+	{"DL Digital Clock", NULL, "AFE_ON"},
+
+	{"AIF_RX", NULL, "DL Digital Clock"},
+
+	/* DL Path */
+	{"DAC In Mux", "Normal Path", "AIF_RX"},
+
+	{"DAC In Mux", "Sgen", "SGEN DL"},
+	{"SGEN DL", NULL, "SGEN DL SRC"},
+	{"SGEN DL", NULL, "SGEN MUTE"},
+	{"SGEN DL", NULL, "SGEN DL Enable"},
+	{"SGEN DL", NULL, "DL Digital Clock"},
+
+	{"DACL", NULL, "DAC In Mux"},
+	{"DACL", NULL, "DL Power Supply"},
+	{"DACL", NULL, "DACL_BIASGEN"},
+
+	{"DACR", NULL, "DAC In Mux"},
+	{"DACR", NULL, "DL Power Supply"},
+	{"DACR", NULL, "DACR_BIASGEN"},
+
+	{"LOL Mux", "Playback", "DACL"},
+
+	{"LOL Buffer", NULL, "LOL Mux"},
+	{"LOL Buffer", NULL, "LO Stability Enh"},
+	{"LOL Buffer", NULL, "LOL Bias Gen"},
+
+	{"LINEOUT L", NULL, "LOL Buffer"},
+
+	/* Headphone Path */
+	{"HPL Mux", "Audio Playback", "DACL"},
+	{"HPR Mux", "Audio Playback", "DACR"},
+
+	{"HPL Mux", "LoudSPK Playback", "DACL"},
+	{"HPR Mux", "LoudSPK Playback", "DACR"},
+
+	{"HPL Power", NULL, "HPL Mux"},
+	{"HPR Power", NULL, "HPR Mux"},
+
+	{"Headphone L", NULL, "HPL Power"},
+	{"Headphone R", NULL, "HPR Power"},
+
+	/* Receiver Path */
+	{"RCV Mux", "Voice Playback", "DACL"},
+
+	{"RCV Buffer", NULL, "RCV Mux"},
+	{"RCV Buffer", NULL, "RCV Stability Enh"},
+	{"RCV Buffer", NULL, "RCV Bias Gen"},
+
+	{"Receiver", NULL, "RCV Buffer"},
+};
+
+static int mt6351_codec_init_reg(struct snd_soc_component *cmpnt)
+{
+	int ret = 0;
+
+	/* Disable CLKSQ 26MHz */
+	regmap_update_bits(cmpnt->regmap, MT6351_TOP_CLKSQ, 0x0001, 0x0);
+	/* disable AUDGLB */
+	regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON9,
+			   0x1000, 0x1000);
+	/* Turn off AUDNCP_CLKDIV engine clock,Turn off AUD 26M */
+	regmap_update_bits(cmpnt->regmap, MT6351_TOP_CKPDN_CON0_SET,
+			   0x3800, 0x3800);
+	/* Disable HeadphoneL/HeadphoneR/voice short circuit protection */
+	regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON0,
+			   0xe000, 0xe000);
+	/* [5] = 1, disable LO buffer left short circuit protection */
+	regmap_update_bits(cmpnt->regmap, MT6351_AUDDEC_ANA_CON3,
+			   0x20, 0x20);
+	/* Reverse the PMIC clock*/
+	regmap_update_bits(cmpnt->regmap, MT6351_AFE_PMIC_NEWIF_CFG2,
+			   0x8000, 0x8000);
+	return ret;
+}
+
+static int mt6351_codec_probe(struct snd_soc_component *cmpnt)
+{
+	struct mt6351_priv *priv = snd_soc_component_get_drvdata(cmpnt);
+
+	snd_soc_component_init_regmap(cmpnt, priv->regmap);
+
+	mt6351_codec_init_reg(cmpnt);
+	return 0;
+}
+
+static const struct snd_soc_component_driver mt6351_soc_component_driver = {
+	.probe = mt6351_codec_probe,
+	.controls = mt6351_snd_controls,
+	.num_controls = ARRAY_SIZE(mt6351_snd_controls),
+	.dapm_widgets = mt6351_dapm_widgets,
+	.num_dapm_widgets = ARRAY_SIZE(mt6351_dapm_widgets),
+	.dapm_routes = mt6351_dapm_routes,
+	.num_dapm_routes = ARRAY_SIZE(mt6351_dapm_routes),
+};
+
+static int mt6351_codec_driver_probe(struct platform_device *pdev)
+{
+	struct mt6351_priv *priv;
+
+	priv = devm_kzalloc(&pdev->dev,
+			    sizeof(struct mt6351_priv),
+			    GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	dev_set_drvdata(&pdev->dev, priv);
+
+	priv->dev = &pdev->dev;
+
+	priv->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!priv->regmap)
+		return -ENODEV;
+
+	dev_dbg(priv->dev, "%s(), dev name %s\n",
+		__func__, dev_name(&pdev->dev));
+
+	return devm_snd_soc_register_component(&pdev->dev,
+					       &mt6351_soc_component_driver,
+					       mt6351_dai_driver,
+					       ARRAY_SIZE(mt6351_dai_driver));
+}
+
+static const struct of_device_id mt6351_of_match[] = {
+	{.compatible = "mediatek,mt6351-sound",},
+	{}
+};
+
+static struct platform_driver mt6351_codec_driver = {
+	.driver = {
+		.name = "mt6351-sound",
+		.of_match_table = mt6351_of_match,
+	},
+	.probe = mt6351_codec_driver_probe,
+};
+
+module_platform_driver(mt6351_codec_driver)
+
+/* Module information */
+MODULE_DESCRIPTION("MT6351 ALSA SoC codec driver");
+MODULE_AUTHOR("KaiChieh Chuang <kaichieh.chuang@mediatek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/codecs/mt6351.h b/sound/soc/codecs/mt6351.h
new file mode 100644
index 0000000..04b2ab6
--- /dev/null
+++ b/sound/soc/codecs/mt6351.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mt6351.h  --  mt6351 ALSA SoC audio codec driver
+ *
+ * Copyright (c) 2018 MediaTek Inc.
+ * Author: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
+ */
+
+#ifndef __MT6351_H__
+#define __MT6351_H__
+
+#define MT6351_AFE_UL_DL_CON0               (0x2000 + 0x0000)
+#define MT6351_AFE_DL_SRC2_CON0_H           (0x2000 + 0x0002)
+#define MT6351_AFE_DL_SRC2_CON0_L           (0x2000 + 0x0004)
+#define MT6351_AFE_DL_SDM_CON0              (0x2000 + 0x0006)
+#define MT6351_AFE_DL_SDM_CON1              (0x2000 + 0x0008)
+#define MT6351_AFE_UL_SRC_CON0_H            (0x2000 + 0x000a)
+#define MT6351_AFE_UL_SRC_CON0_L            (0x2000 + 0x000c)
+#define MT6351_AFE_UL_SRC_CON1_H            (0x2000 + 0x000e)
+#define MT6351_AFE_UL_SRC_CON1_L            (0x2000 + 0x0010)
+#define MT6351_AFE_TOP_CON0                 (0x2000 + 0x0012)
+#define MT6351_AUDIO_TOP_CON0               (0x2000 + 0x0014)
+#define MT6351_AFE_DL_SRC_MON0              (0x2000 + 0x0016)
+#define MT6351_AFE_DL_SDM_TEST0             (0x2000 + 0x0018)
+#define MT6351_AFE_MON_DEBUG0               (0x2000 + 0x001a)
+#define MT6351_AFUNC_AUD_CON0               (0x2000 + 0x001c)
+#define MT6351_AFUNC_AUD_CON1               (0x2000 + 0x001e)
+#define MT6351_AFUNC_AUD_CON2               (0x2000 + 0x0020)
+#define MT6351_AFUNC_AUD_CON3               (0x2000 + 0x0022)
+#define MT6351_AFUNC_AUD_CON4               (0x2000 + 0x0024)
+#define MT6351_AFUNC_AUD_MON0               (0x2000 + 0x0026)
+#define MT6351_AFUNC_AUD_MON1               (0x2000 + 0x0028)
+#define MT6351_AFE_UP8X_FIFO_CFG0           (0x2000 + 0x002c)
+#define MT6351_AFE_UP8X_FIFO_LOG_MON0       (0x2000 + 0x002e)
+#define MT6351_AFE_UP8X_FIFO_LOG_MON1       (0x2000 + 0x0030)
+#define MT6351_AFE_DL_DC_COMP_CFG0          (0x2000 + 0x0032)
+#define MT6351_AFE_DL_DC_COMP_CFG1          (0x2000 + 0x0034)
+#define MT6351_AFE_DL_DC_COMP_CFG2          (0x2000 + 0x0036)
+#define MT6351_AFE_PMIC_NEWIF_CFG0          (0x2000 + 0x0038)
+#define MT6351_AFE_PMIC_NEWIF_CFG1          (0x2000 + 0x003a)
+#define MT6351_AFE_PMIC_NEWIF_CFG2          (0x2000 + 0x003c)
+#define MT6351_AFE_PMIC_NEWIF_CFG3          (0x2000 + 0x003e)
+#define MT6351_AFE_SGEN_CFG0                (0x2000 + 0x0040)
+#define MT6351_AFE_SGEN_CFG1                (0x2000 + 0x0042)
+#define MT6351_AFE_ADDA2_UP8X_FIFO_LOG_MON0 (0x2000 + 0x004c)
+#define MT6351_AFE_ADDA2_UP8X_FIFO_LOG_MON1 (0x2000 + 0x004e)
+#define MT6351_AFE_ADDA2_PMIC_NEWIF_CFG0    (0x2000 + 0x0050)
+#define MT6351_AFE_ADDA2_PMIC_NEWIF_CFG1    (0x2000 + 0x0052)
+#define MT6351_AFE_ADDA2_PMIC_NEWIF_CFG2    (0x2000 + 0x0054)
+#define MT6351_AFE_DCCLK_CFG0               (0x2000 + 0x0090)
+#define MT6351_AFE_DCCLK_CFG1               (0x2000 + 0x0092)
+#define MT6351_AFE_HPANC_CFG0               (0x2000 + 0x0094)
+#define MT6351_AFE_NCP_CFG0                 (0x2000 + 0x0096)
+#define MT6351_AFE_NCP_CFG1                 (0x2000 + 0x0098)
+
+#define MT6351_TOP_CKPDN_CON0      0x023A
+#define MT6351_TOP_CKPDN_CON0_SET  0x023C
+#define MT6351_TOP_CKPDN_CON0_CLR  0x023E
+
+#define MT6351_TOP_CLKSQ           0x029A
+#define MT6351_TOP_CLKSQ_SET       0x029C
+#define MT6351_TOP_CLKSQ_CLR       0x029E
+
+#define MT6351_ZCD_CON0            0x0800
+#define MT6351_ZCD_CON1            0x0802
+#define MT6351_ZCD_CON2            0x0804
+#define MT6351_ZCD_CON3            0x0806
+#define MT6351_ZCD_CON4            0x0808
+#define MT6351_ZCD_CON5            0x080A
+
+#define MT6351_LDO_VA18_CON0       0x0A00
+#define MT6351_LDO_VA18_CON1       0x0A02
+#define MT6351_LDO_VUSB33_CON0     0x0A16
+#define MT6351_LDO_VUSB33_CON1     0x0A18
+
+#define MT6351_AUDDEC_ANA_CON0     0x0CF2
+#define MT6351_AUDDEC_ANA_CON1     0x0CF4
+#define MT6351_AUDDEC_ANA_CON2     0x0CF6
+#define MT6351_AUDDEC_ANA_CON3     0x0CF8
+#define MT6351_AUDDEC_ANA_CON4     0x0CFA
+#define MT6351_AUDDEC_ANA_CON5     0x0CFC
+#define MT6351_AUDDEC_ANA_CON6     0x0CFE
+#define MT6351_AUDDEC_ANA_CON7     0x0D00
+#define MT6351_AUDDEC_ANA_CON8     0x0D02
+#define MT6351_AUDDEC_ANA_CON9     0x0D04
+#define MT6351_AUDDEC_ANA_CON10    0x0D06
+
+#define MT6351_AUDENC_ANA_CON0     0x0D08
+#define MT6351_AUDENC_ANA_CON1     0x0D0A
+#define MT6351_AUDENC_ANA_CON2     0x0D0C
+#define MT6351_AUDENC_ANA_CON3     0x0D0E
+#define MT6351_AUDENC_ANA_CON4     0x0D10
+#define MT6351_AUDENC_ANA_CON5     0x0D12
+#define MT6351_AUDENC_ANA_CON6     0x0D14
+#define MT6351_AUDENC_ANA_CON7     0x0D16
+#define MT6351_AUDENC_ANA_CON8     0x0D18
+#define MT6351_AUDENC_ANA_CON9     0x0D1A
+#define MT6351_AUDENC_ANA_CON10    0x0D1C
+#define MT6351_AUDENC_ANA_CON11    0x0D1E
+#define MT6351_AUDENC_ANA_CON12    0x0D20
+#define MT6351_AUDENC_ANA_CON13    0x0D22
+#define MT6351_AUDENC_ANA_CON14    0x0D24
+#define MT6351_AUDENC_ANA_CON15    0x0D26
+#define MT6351_AUDENC_ANA_CON16    0x0D28
+#endif
diff --git a/sound/soc/codecs/nau8810.c b/sound/soc/codecs/nau8810.c
index ca2ba1c..bfd74b8 100644
--- a/sound/soc/codecs/nau8810.c
+++ b/sound/soc/codecs/nau8810.c
@@ -373,9 +373,11 @@
 };
 
 /* PGA Mute */
-static const struct snd_kcontrol_new nau8810_inpga_mute[] = {
+static const struct snd_kcontrol_new nau8810_pgaboost_mixer_controls[] = {
 	SOC_DAPM_SINGLE("PGA Mute Switch", NAU8810_REG_PGAGAIN,
-		NAU8810_PGAMT_SFT, 1, 0),
+		NAU8810_PGAMT_SFT, 1, 1),
+	SOC_DAPM_SINGLE("PMIC PGA Switch", NAU8810_REG_ADCBOOST,
+		NAU8810_PMICBSTGAIN_SFT, 0x7, 0),
 };
 
 /* Input PGA */
@@ -386,11 +388,6 @@
 		NAU8810_PMICPGA_SFT, 1, 0),
 };
 
-/* Mic Input boost vol */
-static const struct snd_kcontrol_new nau8810_mic_boost_controls =
-	SOC_DAPM_SINGLE("Mic Volume", NAU8810_REG_ADCBOOST,
-		NAU8810_PMICBSTGAIN_SFT, 0x7, 0);
-
 /* Loopback Switch */
 static const struct snd_kcontrol_new nau8810_loopback =
 	SOC_DAPM_SINGLE("Switch", NAU8810_REG_COMP,
@@ -429,8 +426,8 @@
 		NAU8810_PGA_EN_SFT, 0, nau8810_inpga,
 		ARRAY_SIZE(nau8810_inpga)),
 	SND_SOC_DAPM_MIXER("Input Boost Stage", NAU8810_REG_POWER2,
-		NAU8810_BST_EN_SFT, 0, nau8810_inpga_mute,
-		ARRAY_SIZE(nau8810_inpga_mute)),
+		NAU8810_BST_EN_SFT, 0, nau8810_pgaboost_mixer_controls,
+		ARRAY_SIZE(nau8810_pgaboost_mixer_controls)),
 
 	SND_SOC_DAPM_SUPPLY("Mic Bias", NAU8810_REG_POWER1,
 		NAU8810_MICBIAS_EN_SFT, 0, NULL, 0),
@@ -469,8 +466,8 @@
 	/* Input Boost Stage */
 	{"ADC", NULL, "Input Boost Stage"},
 	{"ADC", NULL, "PLL", check_mclk_select_pll},
-	{"Input Boost Stage", NULL, "Input PGA"},
-	{"Input Boost Stage", NULL, "MICP"},
+	{"Input Boost Stage", "PGA Mute Switch", "Input PGA"},
+	{"Input Boost Stage", "PMIC PGA Switch", "MICP"},
 
 	/* Input PGA */
 	{"Input PGA", NULL, "Mic Bias"},
diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c
index 637e952..6bd1445 100644
--- a/sound/soc/codecs/nau8824.c
+++ b/sound/soc/codecs/nau8824.c
@@ -205,11 +205,11 @@
 	if (timeout) {
 		ret = down_timeout(&nau8824->jd_sem, timeout);
 		if (ret < 0)
-			dev_warn(nau8824->dev, "Acquire semaphone timeout\n");
+			dev_warn(nau8824->dev, "Acquire semaphore timeout\n");
 	} else {
 		ret = down_interruptible(&nau8824->jd_sem);
 		if (ret < 0)
-			dev_warn(nau8824->dev, "Acquire semaphone fail\n");
+			dev_warn(nau8824->dev, "Acquire semaphore fail\n");
 	}
 
 	return ret;
@@ -409,6 +409,15 @@
 
 	SOC_SINGLE("DACL LR Mix", NAU8824_REG_DAC_MUTE_CTRL, 0, 1, 0),
 	SOC_SINGLE("DACR LR Mix", NAU8824_REG_DAC_MUTE_CTRL, 1, 1, 0),
+
+	SOC_SINGLE("THD for key media",
+		NAU8824_REG_VDET_THRESHOLD_1, 8, 0xff, 0),
+	SOC_SINGLE("THD for key voice command",
+		NAU8824_REG_VDET_THRESHOLD_1, 0, 0xff, 0),
+	SOC_SINGLE("THD for key volume up",
+		NAU8824_REG_VDET_THRESHOLD_2, 8, 0xff, 0),
+	SOC_SINGLE("THD for key volume down",
+		NAU8824_REG_VDET_THRESHOLD_2, 0, 0xff, 0),
 };
 
 static int nau8824_output_dac_event(struct snd_soc_dapm_widget *w,
diff --git a/sound/soc/codecs/pcm1789.c b/sound/soc/codecs/pcm1789.c
index 507ac94..21f1521 100644
--- a/sound/soc/codecs/pcm1789.c
+++ b/sound/soc/codecs/pcm1789.c
@@ -3,7 +3,7 @@
 // Copyright (C) 2018 Bootlin
 // Mylène Josserand <mylene.josserand@bootlin.com>
 
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/workqueue.h>
 
diff --git a/sound/soc/codecs/pcm512x-i2c.c b/sound/soc/codecs/pcm512x-i2c.c
index 5f9c069..0fe5ced 100644
--- a/sound/soc/codecs/pcm512x-i2c.c
+++ b/sound/soc/codecs/pcm512x-i2c.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
+#include <linux/acpi.h>
 
 #include "pcm512x.h"
 
@@ -52,6 +53,7 @@
 };
 MODULE_DEVICE_TABLE(i2c, pcm512x_i2c_id);
 
+#if defined(CONFIG_OF)
 static const struct of_device_id pcm512x_of_match[] = {
 	{ .compatible = "ti,pcm5121", },
 	{ .compatible = "ti,pcm5122", },
@@ -60,6 +62,18 @@
 	{ }
 };
 MODULE_DEVICE_TABLE(of, pcm512x_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id pcm512x_acpi_match[] = {
+	{ "104C5121", 0 },
+	{ "104C5122", 0 },
+	{ "104C5141", 0 },
+	{ "104C5142", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, pcm512x_acpi_match);
+#endif
 
 static struct i2c_driver pcm512x_i2c_driver = {
 	.probe 		= pcm512x_i2c_probe,
@@ -67,7 +81,8 @@
 	.id_table	= pcm512x_i2c_id,
 	.driver		= {
 		.name	= "pcm512x",
-		.of_match_table = pcm512x_of_match,
+		.of_match_table = of_match_ptr(pcm512x_of_match),
+		.acpi_match_table = ACPI_PTR(pcm512x_acpi_match),
 		.pm     = &pcm512x_pm_ops,
 	},
 };
diff --git a/sound/soc/codecs/rt1305.c b/sound/soc/codecs/rt1305.c
new file mode 100644
index 0000000..f4c8c45
--- /dev/null
+++ b/sound/soc/codecs/rt1305.c
@@ -0,0 +1,1191 @@
+/*
+ * rt1305.c  --  RT1305 ALSA SoC amplifier component driver
+ *
+ * Copyright 2018 Realtek Semiconductor Corp.
+ * Author: Shuming Fan <shumingf@realtek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/acpi.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/regmap.h>
+#include <linux/of_gpio.h>
+#include <linux/platform_device.h>
+#include <linux/firmware.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+
+#include "rl6231.h"
+#include "rt1305.h"
+
+
+#define RT1305_PR_RANGE_BASE (0xff + 1)
+#define RT1305_PR_SPACING 0x100
+
+#define RT1305_PR_BASE (RT1305_PR_RANGE_BASE + (0 * RT1305_PR_SPACING))
+
+
+static const struct regmap_range_cfg rt1305_ranges[] = {
+	{
+		.name = "PR",
+		.range_min = RT1305_PR_BASE,
+		.range_max = RT1305_PR_BASE + 0xff,
+		.selector_reg = RT1305_PRIV_INDEX,
+		.selector_mask = 0xff,
+		.selector_shift = 0x0,
+		.window_start = RT1305_PRIV_DATA,
+		.window_len = 0x1,
+	},
+};
+
+
+static const struct reg_sequence init_list[] = {
+
+	{ RT1305_PR_BASE + 0xcf, 0x5548 },
+	{ RT1305_PR_BASE + 0x5d, 0x0442 },
+	{ RT1305_PR_BASE + 0xc1, 0x0320 },
+
+	{ RT1305_POWER_STATUS, 0x0000 },
+
+	{ RT1305_SPK_TEMP_PROTECTION_1, 0xd6de },
+	{ RT1305_SPK_TEMP_PROTECTION_2, 0x0707 },
+	{ RT1305_SPK_TEMP_PROTECTION_3, 0x4090 },
+
+	{ RT1305_DAC_SET_1, 0xdfdf },	/* 4 ohm 2W  */
+	{ RT1305_ADC_SET_3, 0x0219 },
+	{ RT1305_ADC_SET_1, 0x170f },	/* 0.2 ohm RSense*/
+
+};
+#define RT1305_INIT_REG_LEN ARRAY_SIZE(init_list)
+
+struct rt1305_priv {
+	struct snd_soc_component *component;
+	struct regmap *regmap;
+
+	int sysclk;
+	int sysclk_src;
+	int lrck;
+	int bclk;
+	int master;
+
+	int pll_src;
+	int pll_in;
+	int pll_out;
+};
+
+static const struct reg_default rt1305_reg[] = {
+
+	{ 0x04, 0x0400 },
+	{ 0x05, 0x0880 },
+	{ 0x06, 0x0000 },
+	{ 0x07, 0x3100 },
+	{ 0x08, 0x8000 },
+	{ 0x09, 0x0000 },
+	{ 0x0a, 0x087e },
+	{ 0x0b, 0x0020 },
+	{ 0x0c, 0x0802 },
+	{ 0x0d, 0x0020 },
+	{ 0x10, 0x1d1d },
+	{ 0x11, 0x1d1d },
+	{ 0x12, 0xffff },
+	{ 0x14, 0x000c },
+	{ 0x16, 0x1717 },
+	{ 0x17, 0x4000 },
+	{ 0x18, 0x0019 },
+	{ 0x20, 0x0000 },
+	{ 0x22, 0x0000 },
+	{ 0x24, 0x0000 },
+	{ 0x26, 0x0000 },
+	{ 0x28, 0x0000 },
+	{ 0x2a, 0x4000 },
+	{ 0x2b, 0x3000 },
+	{ 0x2d, 0x6000 },
+	{ 0x2e, 0x0000 },
+	{ 0x2f, 0x8000 },
+	{ 0x32, 0x0000 },
+	{ 0x39, 0x0001 },
+	{ 0x3a, 0x0000 },
+	{ 0x3b, 0x1020 },
+	{ 0x3c, 0x0000 },
+	{ 0x3d, 0x0000 },
+	{ 0x3e, 0x4c00 },
+	{ 0x3f, 0x3000 },
+	{ 0x40, 0x000c },
+	{ 0x42, 0x0400 },
+	{ 0x46, 0xc22c },
+	{ 0x47, 0x0000 },
+	{ 0x4b, 0x0000 },
+	{ 0x4c, 0x0300 },
+	{ 0x4f, 0xf000 },
+	{ 0x50, 0xc200 },
+	{ 0x51, 0x1f1f },
+	{ 0x52, 0x01f0 },
+	{ 0x53, 0x407f },
+	{ 0x54, 0xffff },
+	{ 0x58, 0x4005 },
+	{ 0x5e, 0x0000 },
+	{ 0x5f, 0x0000 },
+	{ 0x60, 0xee13 },
+	{ 0x62, 0x0000 },
+	{ 0x63, 0x5f5f },
+	{ 0x64, 0x0040 },
+	{ 0x65, 0x4000 },
+	{ 0x66, 0x4004 },
+	{ 0x67, 0x0306 },
+	{ 0x68, 0x8c04 },
+	{ 0x69, 0xe021 },
+	{ 0x6a, 0x0000 },
+	{ 0x6c, 0xaaaa },
+	{ 0x70, 0x0333 },
+	{ 0x71, 0x3330 },
+	{ 0x72, 0x3333 },
+	{ 0x73, 0x3300 },
+	{ 0x74, 0x0000 },
+	{ 0x75, 0x0000 },
+	{ 0x76, 0x0000 },
+	{ 0x7a, 0x0003 },
+	{ 0x7c, 0x10ec },
+	{ 0x7e, 0x6251 },
+	{ 0x80, 0x0800 },
+	{ 0x81, 0x4000 },
+	{ 0x82, 0x0000 },
+	{ 0x90, 0x7a01 },
+	{ 0x91, 0x8431 },
+	{ 0x92, 0x0180 },
+	{ 0x93, 0x0000 },
+	{ 0x94, 0x0000 },
+	{ 0x95, 0x0000 },
+	{ 0x96, 0x0000 },
+	{ 0x97, 0x0000 },
+	{ 0x98, 0x0000 },
+	{ 0x99, 0x0000 },
+	{ 0x9a, 0x0000 },
+	{ 0x9b, 0x0000 },
+	{ 0x9c, 0x0000 },
+	{ 0x9d, 0x0000 },
+	{ 0x9e, 0x0000 },
+	{ 0x9f, 0x0000 },
+	{ 0xa0, 0x0000 },
+	{ 0xb0, 0x8200 },
+	{ 0xb1, 0x00ff },
+	{ 0xb2, 0x0008 },
+	{ 0xc0, 0x0200 },
+	{ 0xc1, 0x0000 },
+	{ 0xc2, 0x0000 },
+	{ 0xc3, 0x0000 },
+	{ 0xc4, 0x0000 },
+	{ 0xc5, 0x0000 },
+	{ 0xc6, 0x0000 },
+	{ 0xc7, 0x0000 },
+	{ 0xc8, 0x0000 },
+	{ 0xc9, 0x0000 },
+	{ 0xca, 0x0200 },
+	{ 0xcb, 0x0000 },
+	{ 0xcc, 0x0000 },
+	{ 0xcd, 0x0000 },
+	{ 0xce, 0x0000 },
+	{ 0xcf, 0x0000 },
+	{ 0xd0, 0x0000 },
+	{ 0xd1, 0x0000 },
+	{ 0xd2, 0x0000 },
+	{ 0xd3, 0x0000 },
+	{ 0xd4, 0x0200 },
+	{ 0xd5, 0x0000 },
+	{ 0xd6, 0x0000 },
+	{ 0xd7, 0x0000 },
+	{ 0xd8, 0x0000 },
+	{ 0xd9, 0x0000 },
+	{ 0xda, 0x0000 },
+	{ 0xdb, 0x0000 },
+	{ 0xdc, 0x0000 },
+	{ 0xdd, 0x0000 },
+	{ 0xde, 0x0200 },
+	{ 0xdf, 0x0000 },
+	{ 0xe0, 0x0000 },
+	{ 0xe1, 0x0000 },
+	{ 0xe2, 0x0000 },
+	{ 0xe3, 0x0000 },
+	{ 0xe4, 0x0000 },
+	{ 0xe5, 0x0000 },
+	{ 0xe6, 0x0000 },
+	{ 0xe7, 0x0000 },
+	{ 0xe8, 0x0200 },
+	{ 0xe9, 0x0000 },
+	{ 0xea, 0x0000 },
+	{ 0xeb, 0x0000 },
+	{ 0xec, 0x0000 },
+	{ 0xed, 0x0000 },
+	{ 0xee, 0x0000 },
+	{ 0xef, 0x0000 },
+	{ 0xf0, 0x0000 },
+	{ 0xf1, 0x0000 },
+	{ 0xf2, 0x0200 },
+	{ 0xf3, 0x0000 },
+	{ 0xf4, 0x0000 },
+	{ 0xf5, 0x0000 },
+	{ 0xf6, 0x0000 },
+	{ 0xf7, 0x0000 },
+	{ 0xf8, 0x0000 },
+	{ 0xf9, 0x0000 },
+	{ 0xfa, 0x0000 },
+	{ 0xfb, 0x0000 },
+};
+
+static int rt1305_reg_init(struct snd_soc_component *component)
+{
+	struct rt1305_priv *rt1305 = snd_soc_component_get_drvdata(component);
+
+	regmap_multi_reg_write(rt1305->regmap, init_list, RT1305_INIT_REG_LEN);
+	return 0;
+}
+
+static bool rt1305_volatile_register(struct device *dev, unsigned int reg)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(rt1305_ranges); i++) {
+		if (reg >= rt1305_ranges[i].range_min &&
+			reg <= rt1305_ranges[i].range_max) {
+			return true;
+		}
+	}
+
+	switch (reg) {
+	case RT1305_RESET:
+	case RT1305_SPDIF_IN_SET_1:
+	case RT1305_SPDIF_IN_SET_2:
+	case RT1305_SPDIF_IN_SET_3:
+	case RT1305_POWER_CTRL_2:
+	case RT1305_CLOCK_DETECT:
+	case RT1305_BIQUAD_SET_1:
+	case RT1305_BIQUAD_SET_2:
+	case RT1305_EQ_SET_2:
+	case RT1305_SPK_TEMP_PROTECTION_0:
+	case RT1305_SPK_TEMP_PROTECTION_2:
+	case RT1305_SPK_DC_DETECT_1:
+	case RT1305_SILENCE_DETECT:
+	case RT1305_VERSION_ID:
+	case RT1305_VENDOR_ID:
+	case RT1305_DEVICE_ID:
+	case RT1305_EFUSE_1:
+	case RT1305_EFUSE_3:
+	case RT1305_DC_CALIB_1:
+	case RT1305_DC_CALIB_3:
+	case RT1305_DAC_OFFSET_1:
+	case RT1305_DAC_OFFSET_2:
+	case RT1305_DAC_OFFSET_3:
+	case RT1305_DAC_OFFSET_4:
+	case RT1305_DAC_OFFSET_5:
+	case RT1305_DAC_OFFSET_6:
+	case RT1305_DAC_OFFSET_7:
+	case RT1305_DAC_OFFSET_8:
+	case RT1305_DAC_OFFSET_9:
+	case RT1305_DAC_OFFSET_10:
+	case RT1305_DAC_OFFSET_11:
+	case RT1305_TRIM_1:
+	case RT1305_TRIM_2:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool rt1305_readable_register(struct device *dev, unsigned int reg)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(rt1305_ranges); i++) {
+		if (reg >= rt1305_ranges[i].range_min &&
+			reg <= rt1305_ranges[i].range_max) {
+			return true;
+		}
+	}
+
+	switch (reg) {
+	case RT1305_RESET:
+	case RT1305_CLK_1 ... RT1305_CAL_EFUSE_CLOCK:
+	case RT1305_PLL0_1 ... RT1305_PLL1_2:
+	case RT1305_MIXER_CTRL_1:
+	case RT1305_MIXER_CTRL_2:
+	case RT1305_DAC_SET_1:
+	case RT1305_DAC_SET_2:
+	case RT1305_ADC_SET_1:
+	case RT1305_ADC_SET_2:
+	case RT1305_ADC_SET_3:
+	case RT1305_PATH_SET:
+	case RT1305_SPDIF_IN_SET_1:
+	case RT1305_SPDIF_IN_SET_2:
+	case RT1305_SPDIF_IN_SET_3:
+	case RT1305_SPDIF_OUT_SET_1:
+	case RT1305_SPDIF_OUT_SET_2:
+	case RT1305_SPDIF_OUT_SET_3:
+	case RT1305_I2S_SET_1:
+	case RT1305_I2S_SET_2:
+	case RT1305_PBTL_MONO_MODE_SRC:
+	case RT1305_MANUALLY_I2C_DEVICE:
+	case RT1305_POWER_STATUS:
+	case RT1305_POWER_CTRL_1:
+	case RT1305_POWER_CTRL_2:
+	case RT1305_POWER_CTRL_3:
+	case RT1305_POWER_CTRL_4:
+	case RT1305_POWER_CTRL_5:
+	case RT1305_CLOCK_DETECT:
+	case RT1305_BIQUAD_SET_1:
+	case RT1305_BIQUAD_SET_2:
+	case RT1305_ADJUSTED_HPF_1:
+	case RT1305_ADJUSTED_HPF_2:
+	case RT1305_EQ_SET_1:
+	case RT1305_EQ_SET_2:
+	case RT1305_SPK_TEMP_PROTECTION_0:
+	case RT1305_SPK_TEMP_PROTECTION_1:
+	case RT1305_SPK_TEMP_PROTECTION_2:
+	case RT1305_SPK_TEMP_PROTECTION_3:
+	case RT1305_SPK_DC_DETECT_1:
+	case RT1305_SPK_DC_DETECT_2:
+	case RT1305_LOUDNESS:
+	case RT1305_THERMAL_FOLD_BACK_1:
+	case RT1305_THERMAL_FOLD_BACK_2:
+	case RT1305_SILENCE_DETECT ... RT1305_SPK_EXCURSION_LIMITER_7:
+	case RT1305_VERSION_ID:
+	case RT1305_VENDOR_ID:
+	case RT1305_DEVICE_ID:
+	case RT1305_EFUSE_1:
+	case RT1305_EFUSE_2:
+	case RT1305_EFUSE_3:
+	case RT1305_DC_CALIB_1:
+	case RT1305_DC_CALIB_2:
+	case RT1305_DC_CALIB_3:
+	case RT1305_DAC_OFFSET_1 ... RT1305_DAC_OFFSET_14:
+	case RT1305_TRIM_1:
+	case RT1305_TRIM_2:
+	case RT1305_TUNE_INTERNAL_OSC:
+	case RT1305_BIQUAD1_H0_L_28_16 ... RT1305_BIQUAD3_A2_R_15_0:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const DECLARE_TLV_DB_SCALE(dac_vol_tlv, -9435, 37, 0);
+
+static const char * const rt1305_rx_data_ch_select[] = {
+	"LR",
+	"RL",
+	"Copy L",
+	"Copy R",
+};
+
+static SOC_ENUM_SINGLE_DECL(rt1305_rx_data_ch_enum, RT1305_I2S_SET_2, 2,
+	rt1305_rx_data_ch_select);
+
+static void rt1305_reset(struct regmap *regmap)
+{
+	regmap_write(regmap, RT1305_RESET, 0);
+}
+
+static const struct snd_kcontrol_new rt1305_snd_controls[] = {
+	SOC_DOUBLE_TLV("DAC Playback Volume", RT1305_DAC_SET_1,
+			8, 0, 0xff, 0, dac_vol_tlv),
+
+	/* I2S Data Channel Selection */
+	SOC_ENUM("RX Channel Select", rt1305_rx_data_ch_enum),
+};
+
+static int rt1305_is_rc_clk_from_pll(struct snd_soc_dapm_widget *source,
+			 struct snd_soc_dapm_widget *sink)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(source->dapm);
+	struct rt1305_priv *rt1305 = snd_soc_component_get_drvdata(component);
+	unsigned int val;
+
+	snd_soc_component_read(component, RT1305_CLK_1, &val);
+
+	if (rt1305->sysclk_src == RT1305_FS_SYS_PRE_S_PLL1 &&
+		(val & RT1305_SEL_PLL_SRC_2_RCCLK))
+		return 1;
+	else
+		return 0;
+}
+
+static int rt1305_is_sys_clk_from_pll(struct snd_soc_dapm_widget *source,
+			 struct snd_soc_dapm_widget *sink)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(source->dapm);
+	struct rt1305_priv *rt1305 = snd_soc_component_get_drvdata(component);
+
+	if (rt1305->sysclk_src == RT1305_FS_SYS_PRE_S_PLL1)
+		return 1;
+	else
+		return 0;
+}
+
+static int rt1305_classd_event(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		snd_soc_component_update_bits(component, RT1305_POWER_CTRL_1,
+			RT1305_POW_PDB_JD_MASK, RT1305_POW_PDB_JD);
+		break;
+	case SND_SOC_DAPM_PRE_PMD:
+		snd_soc_component_update_bits(component, RT1305_POWER_CTRL_1,
+			RT1305_POW_PDB_JD_MASK, 0);
+		usleep_range(150000, 200000);
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+static const struct snd_kcontrol_new rt1305_sto_dac_l =
+	SOC_DAPM_SINGLE("Switch", RT1305_DAC_SET_2,
+		RT1305_DVOL_MUTE_L_EN_SFT, 1, 1);
+
+static const struct snd_kcontrol_new rt1305_sto_dac_r =
+	SOC_DAPM_SINGLE("Switch", RT1305_DAC_SET_2,
+		RT1305_DVOL_MUTE_R_EN_SFT, 1, 1);
+
+static const struct snd_soc_dapm_widget rt1305_dapm_widgets[] = {
+	SND_SOC_DAPM_SUPPLY("PLL0", RT1305_POWER_CTRL_1,
+		RT1305_POW_PLL0_EN_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("PLL1", RT1305_POWER_CTRL_1,
+		RT1305_POW_PLL1_EN_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("MBIAS", RT1305_POWER_CTRL_1,
+		RT1305_POW_MBIAS_LV_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("BG MBIAS", RT1305_POWER_CTRL_1,
+		RT1305_POW_BG_MBIAS_LV_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("LDO2", RT1305_POWER_CTRL_1,
+		RT1305_POW_LDO2_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("BG2", RT1305_POWER_CTRL_1,
+		RT1305_POW_BG2_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("LDO2 IB2", RT1305_POWER_CTRL_1,
+		RT1305_POW_LDO2_IB2_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("VREF", RT1305_POWER_CTRL_1,
+		RT1305_POW_VREF_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("VREF1", RT1305_POWER_CTRL_1,
+		RT1305_POW_VREF1_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("VREF2", RT1305_POWER_CTRL_1,
+		RT1305_POW_VREF2_BIT, 0, NULL, 0),
+
+
+	SND_SOC_DAPM_SUPPLY("DISC VREF", RT1305_POWER_CTRL_2,
+		RT1305_POW_DISC_VREF_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("FASTB VREF", RT1305_POWER_CTRL_2,
+		RT1305_POW_FASTB_VREF_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("ULTRA FAST VREF", RT1305_POWER_CTRL_2,
+		RT1305_POW_ULTRA_FAST_VREF_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("CHOP DAC", RT1305_POWER_CTRL_2,
+		RT1305_POW_CKXEN_DAC_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("CKGEN DAC", RT1305_POWER_CTRL_2,
+		RT1305_POW_EN_CKGEN_DAC_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("CLAMP", RT1305_POWER_CTRL_2,
+		RT1305_POW_CLAMP_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("BUFL", RT1305_POWER_CTRL_2,
+		RT1305_POW_BUFL_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("BUFR", RT1305_POWER_CTRL_2,
+		RT1305_POW_BUFR_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("CKGEN ADC", RT1305_POWER_CTRL_2,
+		RT1305_POW_EN_CKGEN_ADC_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("ADC3 L", RT1305_POWER_CTRL_2,
+		RT1305_POW_ADC3_L_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("ADC3 R", RT1305_POWER_CTRL_2,
+		RT1305_POW_ADC3_R_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("TRIOSC", RT1305_POWER_CTRL_2,
+		RT1305_POW_TRIOSC_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("AVDD1", RT1305_POWER_CTRL_2,
+		RT1305_POR_AVDD1_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("AVDD2", RT1305_POWER_CTRL_2,
+		RT1305_POR_AVDD2_BIT, 0, NULL, 0),
+
+
+	SND_SOC_DAPM_SUPPLY("VSENSE R", RT1305_POWER_CTRL_3,
+		RT1305_POW_VSENSE_RCH_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("VSENSE L", RT1305_POWER_CTRL_3,
+		RT1305_POW_VSENSE_LCH_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("ISENSE R", RT1305_POWER_CTRL_3,
+		RT1305_POW_ISENSE_RCH_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("ISENSE L", RT1305_POWER_CTRL_3,
+		RT1305_POW_ISENSE_LCH_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("POR AVDD1", RT1305_POWER_CTRL_3,
+		RT1305_POW_POR_AVDD1_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("POR AVDD2", RT1305_POWER_CTRL_3,
+		RT1305_POW_POR_AVDD2_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("VCM 6172", RT1305_POWER_CTRL_3,
+		RT1305_EN_VCM_6172_BIT, 0, NULL, 0),
+
+
+	/* Audio Interface */
+	SND_SOC_DAPM_AIF_IN("AIF1RX", "AIF1 Playback", 0, SND_SOC_NOPM, 0, 0),
+
+	/* Digital Interface */
+	SND_SOC_DAPM_SUPPLY("DAC L Power", RT1305_POWER_CTRL_2,
+		RT1305_POW_DAC1_L_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("DAC R Power", RT1305_POWER_CTRL_2,
+		RT1305_POW_DAC1_R_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_DAC("DAC", NULL, SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_SWITCH("DAC L", SND_SOC_NOPM, 0, 0, &rt1305_sto_dac_l),
+	SND_SOC_DAPM_SWITCH("DAC R", SND_SOC_NOPM, 0, 0, &rt1305_sto_dac_r),
+
+	/* Output Lines */
+	SND_SOC_DAPM_PGA_E("CLASS D", SND_SOC_NOPM, 0, 0, NULL, 0,
+		rt1305_classd_event,
+		SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMU),
+	SND_SOC_DAPM_OUTPUT("SPOL"),
+	SND_SOC_DAPM_OUTPUT("SPOR"),
+};
+
+static const struct snd_soc_dapm_route rt1305_dapm_routes[] = {
+
+	{ "DAC", NULL, "AIF1RX" },
+
+	{ "DAC", NULL, "PLL0", rt1305_is_rc_clk_from_pll },
+	{ "DAC", NULL, "PLL1", rt1305_is_sys_clk_from_pll },
+
+	{ "DAC", NULL, "MBIAS" },
+	{ "DAC", NULL, "BG MBIAS" },
+	{ "DAC", NULL, "LDO2" },
+	{ "DAC", NULL, "BG2" },
+	{ "DAC", NULL, "LDO2 IB2" },
+	{ "DAC", NULL, "VREF" },
+	{ "DAC", NULL, "VREF1" },
+	{ "DAC", NULL, "VREF2" },
+
+	{ "DAC", NULL, "DISC VREF" },
+	{ "DAC", NULL, "FASTB VREF" },
+	{ "DAC", NULL, "ULTRA FAST VREF" },
+	{ "DAC", NULL, "CHOP DAC" },
+	{ "DAC", NULL, "CKGEN DAC" },
+	{ "DAC", NULL, "CLAMP" },
+	{ "DAC", NULL, "CKGEN ADC" },
+	{ "DAC", NULL, "TRIOSC" },
+	{ "DAC", NULL, "AVDD1" },
+	{ "DAC", NULL, "AVDD2" },
+
+	{ "DAC", NULL, "POR AVDD1" },
+	{ "DAC", NULL, "POR AVDD2" },
+	{ "DAC", NULL, "VCM 6172" },
+
+	{ "DAC L", "Switch", "DAC" },
+	{ "DAC R", "Switch", "DAC" },
+
+	{ "DAC R", NULL, "VSENSE R" },
+	{ "DAC L", NULL, "VSENSE L" },
+	{ "DAC R", NULL, "ISENSE R" },
+	{ "DAC L", NULL, "ISENSE L" },
+	{ "DAC L", NULL, "ADC3 L" },
+	{ "DAC R", NULL, "ADC3 R" },
+	{ "DAC L", NULL, "BUFL" },
+	{ "DAC R", NULL, "BUFR" },
+	{ "DAC L", NULL, "DAC L Power" },
+	{ "DAC R", NULL, "DAC R Power" },
+
+	{ "CLASS D", NULL, "DAC L" },
+	{ "CLASS D", NULL, "DAC R" },
+
+	{ "SPOL", NULL, "CLASS D" },
+	{ "SPOR", NULL, "CLASS D" },
+};
+
+static int rt1305_get_clk_info(int sclk, int rate)
+{
+	int i, pd[] = {1, 2, 3, 4, 6, 8, 12, 16};
+
+	if (sclk <= 0 || rate <= 0)
+		return -EINVAL;
+
+	rate = rate << 8;
+	for (i = 0; i < ARRAY_SIZE(pd); i++)
+		if (sclk == rate * pd[i])
+			return i;
+
+	return -EINVAL;
+}
+
+static int rt1305_hw_params(struct snd_pcm_substream *substream,
+	struct snd_pcm_hw_params *params, struct snd_soc_dai *dai)
+{
+	struct snd_soc_component *component = dai->component;
+	struct rt1305_priv *rt1305 = snd_soc_component_get_drvdata(component);
+	unsigned int val_len = 0, val_clk, mask_clk;
+	int pre_div, bclk_ms, frame_size;
+
+	rt1305->lrck = params_rate(params);
+	pre_div = rt1305_get_clk_info(rt1305->sysclk, rt1305->lrck);
+	if (pre_div < 0) {
+		dev_warn(component->dev, "Force using PLL ");
+		snd_soc_dai_set_pll(dai, 0, RT1305_PLL1_S_BCLK,
+			rt1305->lrck * 64, rt1305->lrck * 256);
+		snd_soc_dai_set_sysclk(dai, RT1305_FS_SYS_PRE_S_PLL1,
+			rt1305->lrck * 256, SND_SOC_CLOCK_IN);
+		pre_div = 0;
+	}
+	frame_size = snd_soc_params_to_frame_size(params);
+	if (frame_size < 0) {
+		dev_err(component->dev, "Unsupported frame size: %d\n",
+			frame_size);
+		return -EINVAL;
+	}
+
+	bclk_ms = frame_size > 32;
+	rt1305->bclk = rt1305->lrck * (32 << bclk_ms);
+
+	dev_dbg(component->dev, "bclk_ms is %d and pre_div is %d for iis %d\n",
+				bclk_ms, pre_div, dai->id);
+
+	dev_dbg(component->dev, "lrck is %dHz and pre_div is %d for iis %d\n",
+				rt1305->lrck, pre_div, dai->id);
+
+	switch (params_width(params)) {
+	case 16:
+		val_len |= RT1305_I2S_DL_SEL_16B;
+		break;
+	case 20:
+		val_len |= RT1305_I2S_DL_SEL_20B;
+		break;
+	case 24:
+		val_len |= RT1305_I2S_DL_SEL_24B;
+		break;
+	case 8:
+		val_len |= RT1305_I2S_DL_SEL_8B;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (dai->id) {
+	case RT1305_AIF1:
+		mask_clk = RT1305_DIV_FS_SYS_MASK;
+		val_clk = pre_div << RT1305_DIV_FS_SYS_SFT;
+		snd_soc_component_update_bits(component, RT1305_I2S_SET_2,
+			RT1305_I2S_DL_SEL_MASK,
+			val_len);
+		break;
+	default:
+		dev_err(component->dev, "Invalid dai->id: %d\n", dai->id);
+		return -EINVAL;
+	}
+
+	snd_soc_component_update_bits(component, RT1305_CLK_2,
+		mask_clk, val_clk);
+
+	return 0;
+}
+
+static int rt1305_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+{
+	struct snd_soc_component *component = dai->component;
+	struct rt1305_priv *rt1305 = snd_soc_component_get_drvdata(component);
+	unsigned int reg_val = 0, reg1_val = 0;
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		reg_val |= RT1305_SEL_I2S_OUT_MODE_M;
+		rt1305->master = 1;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		reg_val |= RT1305_SEL_I2S_OUT_MODE_S;
+		rt1305->master = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		reg1_val |= RT1305_I2S_BCLK_INV;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		reg1_val |= RT1305_I2S_DF_SEL_LEFT;
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		reg1_val |= RT1305_I2S_DF_SEL_PCM_A;
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		reg1_val |= RT1305_I2S_DF_SEL_PCM_B;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (dai->id) {
+	case RT1305_AIF1:
+		snd_soc_component_update_bits(component, RT1305_I2S_SET_1,
+			RT1305_SEL_I2S_OUT_MODE_MASK, reg_val);
+		snd_soc_component_update_bits(component, RT1305_I2S_SET_2,
+			RT1305_I2S_DF_SEL_MASK | RT1305_I2S_BCLK_MASK,
+			reg1_val);
+		break;
+	default:
+		dev_err(component->dev, "Invalid dai->id: %d\n", dai->id);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int rt1305_set_component_sysclk(struct snd_soc_component *component,
+		int clk_id, int source, unsigned int freq, int dir)
+{
+	struct rt1305_priv *rt1305 = snd_soc_component_get_drvdata(component);
+	unsigned int reg_val = 0;
+
+	if (freq == rt1305->sysclk && clk_id == rt1305->sysclk_src)
+		return 0;
+
+	switch (clk_id) {
+	case RT1305_FS_SYS_PRE_S_MCLK:
+		reg_val |= RT1305_SEL_FS_SYS_PRE_MCLK;
+		snd_soc_component_update_bits(component,
+			RT1305_CLOCK_DETECT, RT1305_SEL_CLK_DET_SRC_MASK,
+			RT1305_SEL_CLK_DET_SRC_MCLK);
+		break;
+	case RT1305_FS_SYS_PRE_S_PLL1:
+		reg_val |= RT1305_SEL_FS_SYS_PRE_PLL;
+		break;
+	case RT1305_FS_SYS_PRE_S_RCCLK:
+		reg_val |= RT1305_SEL_FS_SYS_PRE_RCCLK;
+		break;
+	default:
+		dev_err(component->dev, "Invalid clock id (%d)\n", clk_id);
+		return -EINVAL;
+	}
+	snd_soc_component_update_bits(component, RT1305_CLK_1,
+		RT1305_SEL_FS_SYS_PRE_MASK, reg_val);
+	rt1305->sysclk = freq;
+	rt1305->sysclk_src = clk_id;
+
+	dev_dbg(component->dev, "Sysclk is %dHz and clock id is %d\n",
+		freq, clk_id);
+
+	return 0;
+}
+
+static int rt1305_set_component_pll(struct snd_soc_component *component,
+		int pll_id, int source, unsigned int freq_in,
+		unsigned int freq_out)
+{
+	struct rt1305_priv *rt1305 = snd_soc_component_get_drvdata(component);
+	struct rl6231_pll_code pll_code;
+	int ret;
+
+	if (source == rt1305->pll_src && freq_in == rt1305->pll_in &&
+	    freq_out == rt1305->pll_out)
+		return 0;
+
+	if (!freq_in || !freq_out) {
+		dev_dbg(component->dev, "PLL disabled\n");
+
+		rt1305->pll_in = 0;
+		rt1305->pll_out = 0;
+		snd_soc_component_update_bits(component, RT1305_CLK_1,
+			RT1305_SEL_FS_SYS_PRE_MASK | RT1305_SEL_PLL_SRC_1_MASK,
+			RT1305_SEL_FS_SYS_PRE_PLL | RT1305_SEL_PLL_SRC_1_BCLK);
+		return 0;
+	}
+
+	switch (source) {
+	case RT1305_PLL2_S_MCLK:
+		snd_soc_component_update_bits(component, RT1305_CLK_1,
+			RT1305_SEL_PLL_SRC_2_MASK | RT1305_SEL_PLL_SRC_1_MASK |
+			RT1305_DIV_PLL_SRC_2_MASK,
+			RT1305_SEL_PLL_SRC_2_MCLK | RT1305_SEL_PLL_SRC_1_PLL2);
+		snd_soc_component_update_bits(component,
+			RT1305_CLOCK_DETECT, RT1305_SEL_CLK_DET_SRC_MASK,
+			RT1305_SEL_CLK_DET_SRC_MCLK);
+		break;
+	case RT1305_PLL1_S_BCLK:
+		snd_soc_component_update_bits(component,
+			RT1305_CLK_1, RT1305_SEL_PLL_SRC_1_MASK,
+			RT1305_SEL_PLL_SRC_1_BCLK);
+		break;
+	case RT1305_PLL2_S_RCCLK:
+		snd_soc_component_update_bits(component, RT1305_CLK_1,
+			RT1305_SEL_PLL_SRC_2_MASK | RT1305_SEL_PLL_SRC_1_MASK |
+			RT1305_DIV_PLL_SRC_2_MASK,
+			RT1305_SEL_PLL_SRC_2_RCCLK | RT1305_SEL_PLL_SRC_1_PLL2);
+		freq_in = 98304000;
+		break;
+	default:
+		dev_err(component->dev, "Unknown PLL Source %d\n", source);
+		return -EINVAL;
+	}
+
+	ret = rl6231_pll_calc(freq_in, freq_out, &pll_code);
+	if (ret < 0) {
+		dev_err(component->dev, "Unsupport input clock %d\n", freq_in);
+		return ret;
+	}
+
+	dev_dbg(component->dev, "bypass=%d m=%d n=%d k=%d\n",
+		pll_code.m_bp, (pll_code.m_bp ? 0 : pll_code.m_code),
+		pll_code.n_code, pll_code.k_code);
+
+	snd_soc_component_write(component, RT1305_PLL1_1,
+		(pll_code.m_bp ? 0 : pll_code.m_code) << RT1305_PLL_1_M_SFT |
+		pll_code.m_bp << RT1305_PLL_1_M_BYPASS_SFT |
+		pll_code.n_code);
+	snd_soc_component_write(component, RT1305_PLL1_2,
+		pll_code.k_code);
+
+	rt1305->pll_in = freq_in;
+	rt1305->pll_out = freq_out;
+	rt1305->pll_src = source;
+
+	return 0;
+}
+
+static int rt1305_probe(struct snd_soc_component *component)
+{
+	struct rt1305_priv *rt1305 = snd_soc_component_get_drvdata(component);
+
+	rt1305->component = component;
+
+	/* initial settings */
+	rt1305_reg_init(component);
+
+	return 0;
+}
+
+static void rt1305_remove(struct snd_soc_component *component)
+{
+	struct rt1305_priv *rt1305 = snd_soc_component_get_drvdata(component);
+
+	rt1305_reset(rt1305->regmap);
+}
+
+#ifdef CONFIG_PM
+static int rt1305_suspend(struct snd_soc_component *component)
+{
+	struct rt1305_priv *rt1305 = snd_soc_component_get_drvdata(component);
+
+	regcache_cache_only(rt1305->regmap, true);
+	regcache_mark_dirty(rt1305->regmap);
+
+	return 0;
+}
+
+static int rt1305_resume(struct snd_soc_component *component)
+{
+	struct rt1305_priv *rt1305 = snd_soc_component_get_drvdata(component);
+
+	regcache_cache_only(rt1305->regmap, false);
+	regcache_sync(rt1305->regmap);
+
+	return 0;
+}
+#else
+#define rt1305_suspend NULL
+#define rt1305_resume NULL
+#endif
+
+#define RT1305_STEREO_RATES SNDRV_PCM_RATE_8000_192000
+#define RT1305_FORMATS (SNDRV_PCM_FMTBIT_S8 | \
+			SNDRV_PCM_FMTBIT_S20_3LE | SNDRV_PCM_FMTBIT_S16_LE | \
+			SNDRV_PCM_FMTBIT_S24_LE)
+
+static const struct snd_soc_dai_ops rt1305_aif_dai_ops = {
+	.hw_params = rt1305_hw_params,
+	.set_fmt = rt1305_set_dai_fmt,
+};
+
+static struct snd_soc_dai_driver rt1305_dai[] = {
+	{
+		.name = "rt1305-aif",
+		.playback = {
+			.stream_name = "AIF1 Playback",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = RT1305_STEREO_RATES,
+			.formats = RT1305_FORMATS,
+		},
+		.ops = &rt1305_aif_dai_ops,
+	},
+};
+
+static const struct snd_soc_component_driver soc_component_dev_rt1305 = {
+	.probe = rt1305_probe,
+	.remove = rt1305_remove,
+	.suspend = rt1305_suspend,
+	.resume = rt1305_resume,
+	.controls = rt1305_snd_controls,
+	.num_controls = ARRAY_SIZE(rt1305_snd_controls),
+	.dapm_widgets = rt1305_dapm_widgets,
+	.num_dapm_widgets = ARRAY_SIZE(rt1305_dapm_widgets),
+	.dapm_routes = rt1305_dapm_routes,
+	.num_dapm_routes = ARRAY_SIZE(rt1305_dapm_routes),
+	.set_sysclk = rt1305_set_component_sysclk,
+	.set_pll = rt1305_set_component_pll,
+	.use_pmdown_time	= 1,
+	.endianness		= 1,
+	.non_legacy_dai_naming	= 1,
+};
+
+static const struct regmap_config rt1305_regmap = {
+	.reg_bits = 8,
+	.val_bits = 16,
+	.max_register = RT1305_MAX_REG + 1 + (ARRAY_SIZE(rt1305_ranges) *
+					       RT1305_PR_SPACING),
+	.volatile_reg = rt1305_volatile_register,
+	.readable_reg = rt1305_readable_register,
+	.cache_type = REGCACHE_RBTREE,
+	.reg_defaults = rt1305_reg,
+	.num_reg_defaults = ARRAY_SIZE(rt1305_reg),
+	.ranges = rt1305_ranges,
+	.num_ranges = ARRAY_SIZE(rt1305_ranges),
+	.use_single_rw = true,
+};
+
+#if defined(CONFIG_OF)
+static const struct of_device_id rt1305_of_match[] = {
+	{ .compatible = "realtek,rt1305", },
+	{ .compatible = "realtek,rt1306", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, rt1305_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static struct acpi_device_id rt1305_acpi_match[] = {
+	{"10EC1305", 0,},
+	{"10EC1306", 0,},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, rt1305_acpi_match);
+#endif
+
+static const struct i2c_device_id rt1305_i2c_id[] = {
+	{ "rt1305", 0 },
+	{ "rt1306", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, rt1305_i2c_id);
+
+static void rt1305_calibrate(struct rt1305_priv *rt1305)
+{
+	unsigned int valmsb, vallsb, offsetl, offsetr;
+	unsigned int rh, rl, rhl, r0ohm;
+	u64 r0l, r0r;
+
+	regcache_cache_bypass(rt1305->regmap, true);
+
+	rt1305_reset(rt1305->regmap);
+	regmap_write(rt1305->regmap, RT1305_ADC_SET_3, 0x0219);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0xcf, 0x5548);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0xc1, 0x0320);
+	regmap_write(rt1305->regmap, RT1305_CLOCK_DETECT, 0x1000);
+	regmap_write(rt1305->regmap, RT1305_CLK_1, 0x0600);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_3, 0xffd0);
+	regmap_write(rt1305->regmap, RT1305_EFUSE_1, 0x0080);
+	regmap_write(rt1305->regmap, RT1305_EFUSE_1, 0x0880);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_1, 0x0dfe);
+
+	/* Sin Gen */
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0x5d, 0x0442);
+
+	regmap_write(rt1305->regmap, RT1305_CAL_EFUSE_CLOCK, 0xb000);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0xc3, 0xd4a0);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0xcc, 0x00cc);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0xc1, 0x0320);
+	regmap_write(rt1305->regmap, RT1305_POWER_STATUS, 0x0000);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_2, 0xffff);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_3, 0xfc20);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0x06, 0x00c0);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_3, 0xfca0);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_3, 0xfce0);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_3, 0xfcf0);
+
+	/* EFUSE read */
+	regmap_write(rt1305->regmap, RT1305_EFUSE_1, 0x0080);
+	regmap_write(rt1305->regmap, RT1305_EFUSE_1, 0x0880);
+	regmap_write(rt1305->regmap, RT1305_EFUSE_1, 0x0880);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_3, 0xfce0);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_3, 0xfca0);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_3, 0xfc20);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0x06, 0x0000);
+	regmap_write(rt1305->regmap, RT1305_EFUSE_1, 0x0000);
+
+	regmap_read(rt1305->regmap, RT1305_DAC_OFFSET_5, &valmsb);
+	regmap_read(rt1305->regmap, RT1305_DAC_OFFSET_6, &vallsb);
+	offsetl = valmsb << 16 | vallsb;
+	regmap_read(rt1305->regmap, RT1305_DAC_OFFSET_7, &valmsb);
+	regmap_read(rt1305->regmap, RT1305_DAC_OFFSET_8, &vallsb);
+	offsetr = valmsb << 16 | vallsb;
+	pr_info("DC offsetl=0x%x, offsetr=0x%x\n", offsetl, offsetr);
+
+	/* R0 calibration */
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0x5d, 0x9542);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_3, 0xfcf0);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_2, 0xffff);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_1, 0x1dfe);
+	regmap_write(rt1305->regmap, RT1305_SILENCE_DETECT, 0x0e13);
+	regmap_write(rt1305->regmap, RT1305_CLK_1, 0x0650);
+
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0x50, 0x0064);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0x51, 0x0770);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0x52, 0xc30c);
+	regmap_write(rt1305->regmap, RT1305_SPK_TEMP_PROTECTION_1, 0x8200);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0xd4, 0xfb00);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0xd4, 0xff80);
+	msleep(2000);
+	regmap_read(rt1305->regmap, RT1305_PR_BASE + 0x55, &rh);
+	regmap_read(rt1305->regmap, RT1305_PR_BASE + 0x56, &rl);
+	rhl = (rh << 16) | rl;
+	r0ohm = (rhl*10) / 33554432;
+
+	pr_debug("Left_rhl = 0x%x rh=0x%x rl=0x%x\n", rhl, rh, rl);
+	pr_info("Left channel %d.%dohm\n", (r0ohm/10), (r0ohm%10));
+
+	r0l = 562949953421312;
+	if (rhl != 0)
+		do_div(r0l, rhl);
+	pr_debug("Left_r0 = 0x%llx\n", r0l);
+
+	regmap_write(rt1305->regmap, RT1305_SPK_TEMP_PROTECTION_1, 0x9200);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0xd4, 0xfb00);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0xd4, 0xff80);
+	msleep(2000);
+	regmap_read(rt1305->regmap, RT1305_PR_BASE + 0x55, &rh);
+	regmap_read(rt1305->regmap, RT1305_PR_BASE + 0x56, &rl);
+	rhl = (rh << 16) | rl;
+	r0ohm = (rhl*10) / 33554432;
+
+	pr_debug("Right_rhl = 0x%x rh=0x%x rl=0x%x\n", rhl, rh, rl);
+	pr_info("Right channel %d.%dohm\n", (r0ohm/10), (r0ohm%10));
+
+	r0r = 562949953421312;
+	if (rhl != 0)
+		do_div(r0r, rhl);
+	pr_debug("Right_r0 = 0x%llx\n", r0r);
+
+	regmap_write(rt1305->regmap, RT1305_SPK_TEMP_PROTECTION_1, 0xc2ec);
+
+	if ((r0l > R0_UPPER) && (r0l < R0_LOWER) &&
+		(r0r > R0_UPPER) && (r0r < R0_LOWER)) {
+		regmap_write(rt1305->regmap, RT1305_PR_BASE + 0x4e,
+			(r0l >> 16) & 0xffff);
+		regmap_write(rt1305->regmap, RT1305_PR_BASE + 0x4f,
+			r0l & 0xffff);
+		regmap_write(rt1305->regmap, RT1305_PR_BASE + 0xfe,
+			((r0r >> 16) & 0xffff) | 0xf800);
+		regmap_write(rt1305->regmap, RT1305_PR_BASE + 0xfd,
+			r0r & 0xffff);
+	} else {
+		pr_err("R0 calibration failed\n");
+	}
+
+	/* restore some registers */
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_1, 0x0dfe);
+	usleep_range(200000, 400000);
+	regmap_write(rt1305->regmap, RT1305_PR_BASE + 0x5d, 0x0442);
+	regmap_write(rt1305->regmap, RT1305_CLOCK_DETECT, 0x3000);
+	regmap_write(rt1305->regmap, RT1305_CLK_1, 0x0400);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_1, 0x0000);
+	regmap_write(rt1305->regmap, RT1305_CAL_EFUSE_CLOCK, 0x8000);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_2, 0x1020);
+	regmap_write(rt1305->regmap, RT1305_POWER_CTRL_3, 0x0000);
+
+	regcache_cache_bypass(rt1305->regmap, false);
+}
+
+static int rt1305_i2c_probe(struct i2c_client *i2c,
+		    const struct i2c_device_id *id)
+{
+	struct rt1305_priv *rt1305;
+	int ret;
+	unsigned int val;
+
+	rt1305 = devm_kzalloc(&i2c->dev, sizeof(struct rt1305_priv),
+				GFP_KERNEL);
+	if (rt1305 == NULL)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, rt1305);
+
+	rt1305->regmap = devm_regmap_init_i2c(i2c, &rt1305_regmap);
+	if (IS_ERR(rt1305->regmap)) {
+		ret = PTR_ERR(rt1305->regmap);
+		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
+			ret);
+		return ret;
+	}
+
+	regmap_read(rt1305->regmap, RT1305_DEVICE_ID, &val);
+	if (val != RT1305_DEVICE_ID_NUM) {
+		dev_err(&i2c->dev,
+			"Device with ID register %x is not rt1305\n", val);
+		return -ENODEV;
+	}
+
+	rt1305_reset(rt1305->regmap);
+	rt1305_calibrate(rt1305);
+
+	return snd_soc_register_component(&i2c->dev, &soc_component_dev_rt1305,
+			rt1305_dai, ARRAY_SIZE(rt1305_dai));
+}
+
+static int rt1305_i2c_remove(struct i2c_client *i2c)
+{
+	snd_soc_unregister_component(&i2c->dev);
+
+	return 0;
+}
+
+static void rt1305_i2c_shutdown(struct i2c_client *client)
+{
+	struct rt1305_priv *rt1305 = i2c_get_clientdata(client);
+
+	rt1305_reset(rt1305->regmap);
+}
+
+
+static struct i2c_driver rt1305_i2c_driver = {
+	.driver = {
+		.name = "rt1305",
+#if defined(CONFIG_OF)
+		.of_match_table = rt1305_of_match,
+#endif
+#if defined(CONFIG_ACPI)
+		.acpi_match_table = ACPI_PTR(rt1305_acpi_match)
+#endif
+	},
+	.probe = rt1305_i2c_probe,
+	.remove   = rt1305_i2c_remove,
+	.shutdown = rt1305_i2c_shutdown,
+	.id_table = rt1305_i2c_id,
+};
+module_i2c_driver(rt1305_i2c_driver);
+
+MODULE_DESCRIPTION("ASoC RT1305 amplifier driver");
+MODULE_AUTHOR("Shuming Fan <shumingf@realtek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/codecs/rt1305.h b/sound/soc/codecs/rt1305.h
new file mode 100644
index 0000000..bde86f9
--- /dev/null
+++ b/sound/soc/codecs/rt1305.h
@@ -0,0 +1,276 @@
+/*
+ * RT1305.h  --  RT1305 ALSA SoC amplifier component driver
+ *
+ * Copyright 2018 Realtek Semiconductor Corp.
+ * Author: Shuming Fan <shumingf@realtek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _RT1305_H_
+#define _RT1305_H_
+
+#define RT1305_DEVICE_ID_NUM 0x6251
+
+#define RT1305_RESET				0x00
+#define RT1305_CLK_1				0x04
+#define RT1305_CLK_2				0x05
+#define RT1305_CLK_3				0x06
+#define RT1305_DFLL_REG				0x07
+#define RT1305_CAL_EFUSE_CLOCK	0x08
+#define RT1305_PLL0_1				0x0a
+#define RT1305_PLL0_2				0x0b
+#define RT1305_PLL1_1				0x0c
+#define RT1305_PLL1_2				0x0d
+#define RT1305_MIXER_CTRL_1 0x10
+#define RT1305_MIXER_CTRL_2 0x11
+#define RT1305_DAC_SET_1             0x12
+#define RT1305_DAC_SET_2             0x14
+#define RT1305_ADC_SET_1            0x16
+#define RT1305_ADC_SET_2            0x17
+#define RT1305_ADC_SET_3            0x18
+#define RT1305_PATH_SET             0x20
+#define RT1305_SPDIF_IN_SET_1                 0x22
+#define RT1305_SPDIF_IN_SET_2                 0x24
+#define RT1305_SPDIF_IN_SET_3                 0x26
+#define RT1305_SPDIF_OUT_SET_1                 0x28
+#define RT1305_SPDIF_OUT_SET_2                 0x2a
+#define RT1305_SPDIF_OUT_SET_3                 0x2b
+#define RT1305_I2S_SET_1                       0x2d
+#define RT1305_I2S_SET_2                      0x2e
+#define RT1305_PBTL_MONO_MODE_SRC            0x2f
+#define RT1305_MANUALLY_I2C_DEVICE 0x32
+#define RT1305_POWER_STATUS                  0x39
+#define RT1305_POWER_CTRL_1                  0x3a
+#define RT1305_POWER_CTRL_2                  0x3b
+#define RT1305_POWER_CTRL_3                  0x3c
+#define RT1305_POWER_CTRL_4                  0x3d
+#define RT1305_POWER_CTRL_5                  0x3e
+#define RT1305_CLOCK_DETECT                  0x3f
+#define RT1305_BIQUAD_SET_1                  0x40
+#define RT1305_BIQUAD_SET_2                  0x42
+#define RT1305_ADJUSTED_HPF_1             0x46
+#define RT1305_ADJUSTED_HPF_2               0x47
+#define RT1305_EQ_SET_1                  0x4b
+#define RT1305_EQ_SET_2                  0x4c
+#define RT1305_SPK_TEMP_PROTECTION_0 0x4f
+#define RT1305_SPK_TEMP_PROTECTION_1 0x50
+#define RT1305_SPK_TEMP_PROTECTION_2 0x51
+#define RT1305_SPK_TEMP_PROTECTION_3 0x52
+#define RT1305_SPK_DC_DETECT_1                  0x53
+#define RT1305_SPK_DC_DETECT_2                  0x54
+#define RT1305_LOUDNESS 0x58
+#define RT1305_THERMAL_FOLD_BACK_1 0x5e
+#define RT1305_THERMAL_FOLD_BACK_2 0x5f
+#define RT1305_SILENCE_DETECT                  0x60
+#define RT1305_ALC_DRC_1                  0x62
+#define RT1305_ALC_DRC_2                  0x63
+#define RT1305_ALC_DRC_3                  0x64
+#define RT1305_ALC_DRC_4                  0x65
+#define RT1305_PRIV_INDEX			0x6a
+#define RT1305_PRIV_DATA			0x6c
+#define RT1305_SPK_EXCURSION_LIMITER_7 0x76
+#define RT1305_VERSION_ID			0x7a
+#define RT1305_VENDOR_ID			0x7c
+#define RT1305_DEVICE_ID			0x7e
+#define RT1305_EFUSE_1                  0x80
+#define RT1305_EFUSE_2                  0x81
+#define RT1305_EFUSE_3                  0x82
+#define RT1305_DC_CALIB_1                  0x90
+#define RT1305_DC_CALIB_2                  0x91
+#define RT1305_DC_CALIB_3                  0x92
+#define RT1305_DAC_OFFSET_1            0x93
+#define RT1305_DAC_OFFSET_2            0x94
+#define RT1305_DAC_OFFSET_3            0x95
+#define RT1305_DAC_OFFSET_4            0x96
+#define RT1305_DAC_OFFSET_5            0x97
+#define RT1305_DAC_OFFSET_6            0x98
+#define RT1305_DAC_OFFSET_7            0x99
+#define RT1305_DAC_OFFSET_8            0x9a
+#define RT1305_DAC_OFFSET_9            0x9b
+#define RT1305_DAC_OFFSET_10            0x9c
+#define RT1305_DAC_OFFSET_11            0x9d
+#define RT1305_DAC_OFFSET_12            0x9e
+#define RT1305_DAC_OFFSET_13            0x9f
+#define RT1305_DAC_OFFSET_14            0xa0
+#define RT1305_TRIM_1                  0xb0
+#define RT1305_TRIM_2                  0xb1
+#define RT1305_TUNE_INTERNAL_OSC             0xb2
+#define RT1305_BIQUAD1_H0_L_28_16 0xc0
+#define RT1305_BIQUAD3_A2_R_15_0 0xfb
+#define RT1305_MAX_REG	                 0xff
+
+/* CLOCK-1 (0x04) */
+#define RT1305_SEL_PLL_SRC_2_MASK			(0x1 << 15)
+#define RT1305_SEL_PLL_SRC_2_SFT			15
+#define RT1305_SEL_PLL_SRC_2_MCLK			(0x0 << 15)
+#define RT1305_SEL_PLL_SRC_2_RCCLK			(0x1 << 15)
+#define RT1305_DIV_PLL_SRC_2_MASK			(0x3 << 13)
+#define RT1305_DIV_PLL_SRC_2_SFT			13
+#define RT1305_SEL_PLL_SRC_1_MASK			(0x3 << 10)
+#define RT1305_SEL_PLL_SRC_1_SFT			10
+#define RT1305_SEL_PLL_SRC_1_PLL2			(0x0 << 10)
+#define RT1305_SEL_PLL_SRC_1_BCLK			(0x1 << 10)
+#define RT1305_SEL_PLL_SRC_1_DFLL			(0x2 << 10)
+#define RT1305_SEL_FS_SYS_PRE_MASK			(0x3 << 8)
+#define RT1305_SEL_FS_SYS_PRE_SFT			8
+#define RT1305_SEL_FS_SYS_PRE_MCLK			(0x0 << 8)
+#define RT1305_SEL_FS_SYS_PRE_PLL			(0x1 << 8)
+#define RT1305_SEL_FS_SYS_PRE_RCCLK			(0x2 << 8)
+#define RT1305_DIV_FS_SYS_MASK				(0x7 << 4)
+#define RT1305_DIV_FS_SYS_SFT				4
+
+/* PLL1M/N/K Code-1 (0x0c) */
+#define RT1305_PLL_1_M_SFT		12
+#define RT1305_PLL_1_M_BYPASS_MASK			(0x1 << 11)
+#define RT1305_PLL_1_M_BYPASS_SFT		11
+#define RT1305_PLL_1_M_BYPASS			(0x1 << 11)
+#define RT1305_PLL_1_N_MASK			(0x1ff << 0)
+
+/* DAC Setting (0x14) */
+#define RT1305_DVOL_MUTE_L_EN_SFT		15
+#define RT1305_DVOL_MUTE_R_EN_SFT		14
+
+/* I2S Setting-1 (0x2d) */
+#define RT1305_SEL_I2S_OUT_MODE_MASK		(0x1 << 15)
+#define RT1305_SEL_I2S_OUT_MODE_SFT			15
+#define RT1305_SEL_I2S_OUT_MODE_S			(0x0 << 15)
+#define RT1305_SEL_I2S_OUT_MODE_M			(0x1 << 15)
+
+/* I2S Setting-2 (0x2e) */
+#define RT1305_I2S_DF_SEL_MASK			(0x3 << 12)
+#define RT1305_I2S_DF_SEL_SFT			12
+#define RT1305_I2S_DF_SEL_I2S			(0x0 << 12)
+#define RT1305_I2S_DF_SEL_LEFT			(0x1 << 12)
+#define RT1305_I2S_DF_SEL_PCM_A			(0x2 << 12)
+#define RT1305_I2S_DF_SEL_PCM_B			(0x3 << 12)
+#define RT1305_I2S_DL_SEL_MASK			(0x3 << 10)
+#define RT1305_I2S_DL_SEL_SFT			10
+#define RT1305_I2S_DL_SEL_16B			(0x0 << 10)
+#define RT1305_I2S_DL_SEL_20B			(0x1 << 10)
+#define RT1305_I2S_DL_SEL_24B			(0x2 << 10)
+#define RT1305_I2S_DL_SEL_8B			(0x3 << 10)
+#define RT1305_I2S_BCLK_MASK		(0x1 << 9)
+#define RT1305_I2S_BCLK_SFT			9
+#define RT1305_I2S_BCLK_NORMAL		(0x0 << 9)
+#define RT1305_I2S_BCLK_INV			(0x1 << 9)
+
+/* Power Control-1 (0x3a) */
+#define RT1305_POW_PDB_JD_MASK				(0x1 << 12)
+#define RT1305_POW_PDB_JD				(0x1 << 12)
+#define RT1305_POW_PDB_JD_BIT			12
+#define RT1305_POW_PLL0_EN				(0x1 << 11)
+#define RT1305_POW_PLL0_EN_BIT			11
+#define RT1305_POW_PLL1_EN				(0x1 << 10)
+#define RT1305_POW_PLL1_EN_BIT			10
+#define RT1305_POW_PDB_JD_POLARITY				(0x1 << 9)
+#define RT1305_POW_PDB_JD_POLARITY_BIT			9
+#define RT1305_POW_MBIAS_LV				(0x1 << 8)
+#define RT1305_POW_MBIAS_LV_BIT			8
+#define RT1305_POW_BG_MBIAS_LV				(0x1 << 7)
+#define RT1305_POW_BG_MBIAS_LV_BIT			7
+#define RT1305_POW_LDO2				(0x1 << 6)
+#define RT1305_POW_LDO2_BIT			6
+#define RT1305_POW_BG2				(0x1 << 5)
+#define RT1305_POW_BG2_BIT			5
+#define RT1305_POW_LDO2_IB2				(0x1 << 4)
+#define RT1305_POW_LDO2_IB2_BIT			4
+#define RT1305_POW_VREF				(0x1 << 3)
+#define RT1305_POW_VREF_BIT			3
+#define RT1305_POW_VREF1				(0x1 << 2)
+#define RT1305_POW_VREF1_BIT			2
+#define RT1305_POW_VREF2				(0x1 << 1)
+#define RT1305_POW_VREF2_BIT			1
+
+/* Power Control-2 (0x3b) */
+#define RT1305_POW_DISC_VREF           (1 << 15)
+#define RT1305_POW_DISC_VREF_BIT       15
+#define RT1305_POW_FASTB_VREF          (1 << 14)
+#define RT1305_POW_FASTB_VREF_BIT          14
+#define RT1305_POW_ULTRA_FAST_VREF     (1 << 13)
+#define RT1305_POW_ULTRA_FAST_VREF_BIT     13
+#define RT1305_POW_CKXEN_DAC           (1 << 12)
+#define RT1305_POW_CKXEN_DAC_BIT           12
+#define RT1305_POW_EN_CKGEN_DAC        (1 << 11)
+#define RT1305_POW_EN_CKGEN_DAC_BIT        11
+#define RT1305_POW_DAC1_L          (1 << 10)
+#define RT1305_POW_DAC1_L_BIT          10
+#define RT1305_POW_DAC1_R          (1 << 9)
+#define RT1305_POW_DAC1_R_BIT          9
+#define RT1305_POW_CLAMP           (1 << 8)
+#define RT1305_POW_CLAMP_BIT           8
+#define RT1305_POW_BUFL            (1 << 7)
+#define RT1305_POW_BUFL_BIT            7
+#define RT1305_POW_BUFR              (1 << 6)
+#define RT1305_POW_BUFR_BIT              6
+#define RT1305_POW_EN_CKGEN_ADC       (1 << 5)
+#define RT1305_POW_EN_CKGEN_ADC_BIT       5
+#define RT1305_POW_ADC3_L             (1 << 4)
+#define RT1305_POW_ADC3_L_BIT             4
+#define RT1305_POW_ADC3_R             (1 << 3)
+#define RT1305_POW_ADC3_R_BIT             3
+#define RT1305_POW_TRIOSC               (1 << 2)
+#define RT1305_POW_TRIOSC_BIT               2
+#define RT1305_POR_AVDD1              (1 << 1)
+#define RT1305_POR_AVDD1_BIT              1
+#define RT1305_POR_AVDD2           (1 << 0)
+#define RT1305_POR_AVDD2_BIT           0
+
+/* Power Control-3 (0x3c) */
+#define RT1305_POW_VSENSE_RCH           (1 << 15)
+#define RT1305_POW_VSENSE_RCH_BIT        15
+#define RT1305_POW_VSENSE_LCH           (1 << 14)
+#define RT1305_POW_VSENSE_LCH_BIT           14
+#define RT1305_POW_ISENSE_RCH            (1 << 13)
+#define RT1305_POW_ISENSE_RCH_BIT          13
+#define RT1305_POW_ISENSE_LCH            (1 << 12)
+#define RT1305_POW_ISENSE_LCH_BIT            12
+#define RT1305_POW_POR_AVDD1            (1 << 11)
+#define RT1305_POW_POR_AVDD1_BIT          11
+#define RT1305_POW_POR_AVDD2            (1 << 10)
+#define RT1305_POW_POR_AVDD2_BIT            10
+#define RT1305_EN_K_HV            (1 << 9)
+#define RT1305_EN_K_HV_BIT           9
+#define RT1305_EN_PRE_K_HV            (1 << 8)
+#define RT1305_EN_PRE_K_HV_BIT           8
+#define RT1305_EN_EFUSE_1P8V            (1 << 7)
+#define RT1305_EN_EFUSE_1P8V_BIT           7
+#define RT1305_EN_EFUSE_5V             (1 << 6)
+#define RT1305_EN_EFUSE_5V_BIT           6
+#define RT1305_EN_VCM_6172           (1 << 5)
+#define RT1305_EN_VCM_6172_BIT          5
+#define RT1305_POR_EFUSE           (1 << 4)
+#define RT1305_POR_EFUSE_BIT             4
+
+/* Clock Detect (0x3f) */
+#define RT1305_SEL_CLK_DET_SRC_MASK			(0x1 << 12)
+#define RT1305_SEL_CLK_DET_SRC_SFT			12
+#define RT1305_SEL_CLK_DET_SRC_MCLK			(0x0 << 12)
+#define RT1305_SEL_CLK_DET_SRC_BCLK			(0x1 << 12)
+
+
+/* System Clock Source */
+enum {
+	RT1305_FS_SYS_PRE_S_MCLK,
+	RT1305_FS_SYS_PRE_S_PLL1,
+	RT1305_FS_SYS_PRE_S_RCCLK,	/* 98.304M Hz */
+};
+
+/* PLL Source 1/2 */
+enum {
+	RT1305_PLL1_S_BCLK,
+	RT1305_PLL2_S_MCLK,
+	RT1305_PLL2_S_RCCLK,	/* 98.304M Hz */
+};
+
+enum {
+	RT1305_AIF1,
+	RT1305_AIFS
+};
+
+#define R0_UPPER 0x2E8BA2 //5.5 ohm
+#define R0_LOWER 0x666666 //2.5 ohm
+
+#endif		/* end of _RT1305_H_ */
diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c
index 0556742..8bf8d36 100644
--- a/sound/soc/codecs/rt5640.c
+++ b/sound/soc/codecs/rt5640.c
@@ -24,6 +24,7 @@
 #include <linux/spi/spi.h>
 #include <linux/acpi.h>
 #include <sound/core.h>
+#include <sound/jack.h>
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
 #include <sound/soc.h>
@@ -476,20 +477,6 @@
 	return idx;
 }
 
-static int is_sys_clk_from_pll(struct snd_soc_dapm_widget *source,
-			 struct snd_soc_dapm_widget *sink)
-{
-	struct snd_soc_component *component = snd_soc_dapm_to_component(source->dapm);
-	unsigned int val;
-
-	val = snd_soc_component_read32(component, RT5640_GLB_CLK);
-	val &= RT5640_SCLK_SRC_MASK;
-	if (val == RT5640_SCLK_SRC_PLL1)
-		return 1;
-	else
-		return 0;
-}
-
 static int is_using_asrc(struct snd_soc_dapm_widget *source,
 			 struct snd_soc_dapm_widget *sink)
 {
@@ -1071,9 +1058,6 @@
 }
 
 static const struct snd_soc_dapm_widget rt5640_dapm_widgets[] = {
-	SND_SOC_DAPM_SUPPLY("PLL1", RT5640_PWR_ANLG2,
-			RT5640_PWR_PLL_BIT, 0, NULL, 0),
-
 	/* ASRC */
 	SND_SOC_DAPM_SUPPLY_S("Stereo Filter ASRC", 1, RT5640_ASRC_1,
 			 15, 0, NULL, 0),
@@ -1427,22 +1411,18 @@
 	{"Stereo ADC MIXL", "ADC1 Switch", "Stereo ADC L1 Mux"},
 	{"Stereo ADC MIXL", "ADC2 Switch", "Stereo ADC L2 Mux"},
 	{"Stereo ADC MIXL", NULL, "Stereo Filter"},
-	{"Stereo Filter", NULL, "PLL1", is_sys_clk_from_pll},
 
 	{"Stereo ADC MIXR", "ADC1 Switch", "Stereo ADC R1 Mux"},
 	{"Stereo ADC MIXR", "ADC2 Switch", "Stereo ADC R2 Mux"},
 	{"Stereo ADC MIXR", NULL, "Stereo Filter"},
-	{"Stereo Filter", NULL, "PLL1", is_sys_clk_from_pll},
 
 	{"Mono ADC MIXL", "ADC1 Switch", "Mono ADC L1 Mux"},
 	{"Mono ADC MIXL", "ADC2 Switch", "Mono ADC L2 Mux"},
 	{"Mono ADC MIXL", NULL, "Mono Left Filter"},
-	{"Mono Left Filter", NULL, "PLL1", is_sys_clk_from_pll},
 
 	{"Mono ADC MIXR", "ADC1 Switch", "Mono ADC R1 Mux"},
 	{"Mono ADC MIXR", "ADC2 Switch", "Mono ADC R2 Mux"},
 	{"Mono ADC MIXR", NULL, "Mono Right Filter"},
-	{"Mono Right Filter", NULL, "PLL1", is_sys_clk_from_pll},
 
 	{"IF2 ADC L", NULL, "Mono ADC MIXL"},
 	{"IF2 ADC R", NULL, "Mono ADC MIXR"},
@@ -1512,10 +1492,8 @@
 	{"DIG MIXR", "DAC R1 Switch", "DAC MIXR"},
 
 	{"DAC L1", NULL, "Stereo DAC MIXL"},
-	{"DAC L1", NULL, "PLL1", is_sys_clk_from_pll},
 	{"DAC L1", NULL, "DAC L1 Power"},
 	{"DAC R1", NULL, "Stereo DAC MIXR"},
-	{"DAC R1", NULL, "PLL1", is_sys_clk_from_pll},
 	{"DAC R1", NULL, "DAC R1 Power"},
 
 	{"SPK MIXL", "REC MIXL Switch", "RECMIXL"},
@@ -1622,10 +1600,8 @@
 	{"DIG MIXL", "DAC L2 Switch", "DAC L2 Mux"},
 
 	{"DAC L2", NULL, "Mono DAC MIXL"},
-	{"DAC L2", NULL, "PLL1", is_sys_clk_from_pll},
 	{"DAC L2", NULL, "DAC L2 Power"},
 	{"DAC R2", NULL, "Mono DAC MIXR"},
-	{"DAC R2", NULL, "PLL1", is_sys_clk_from_pll},
 	{"DAC R2", NULL, "DAC R2 Power"},
 
 	{"SPK MIXL", "DAC L2 Switch", "DAC L2"},
@@ -1861,6 +1837,7 @@
 	struct snd_soc_component *component = dai->component;
 	struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component);
 	unsigned int reg_val = 0;
+	unsigned int pll_bit = 0;
 
 	if (freq == rt5640->sysclk && clk_id == rt5640->sysclk_src)
 		return 0;
@@ -1871,6 +1848,7 @@
 		break;
 	case RT5640_SCLK_S_PLL1:
 		reg_val |= RT5640_SCLK_SRC_PLL1;
+		pll_bit |= RT5640_PWR_PLL;
 		break;
 	case RT5640_SCLK_S_RCCLK:
 		reg_val |= RT5640_SCLK_SRC_RCCLK;
@@ -1879,6 +1857,8 @@
 		dev_err(component->dev, "Invalid clock id (%d)\n", clk_id);
 		return -EINVAL;
 	}
+	snd_soc_component_update_bits(component, RT5640_PWR_ANLG2,
+		RT5640_PWR_PLL, pll_bit);
 	snd_soc_component_update_bits(component, RT5640_GLB_CLK,
 		RT5640_SCLK_SRC_MASK, reg_val);
 	rt5640->sysclk = freq;
@@ -2114,10 +2094,376 @@
 }
 EXPORT_SYMBOL_GPL(rt5640_sel_asrc_clk_src);
 
+static void rt5640_enable_micbias1_for_ovcd(struct snd_soc_component *component)
+{
+	struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
+
+	snd_soc_dapm_mutex_lock(dapm);
+	snd_soc_dapm_force_enable_pin_unlocked(dapm, "LDO2");
+	snd_soc_dapm_force_enable_pin_unlocked(dapm, "MICBIAS1");
+	/* OVCD is unreliable when used with RCCLK as sysclk-source */
+	snd_soc_dapm_force_enable_pin_unlocked(dapm, "Platform Clock");
+	snd_soc_dapm_sync_unlocked(dapm);
+	snd_soc_dapm_mutex_unlock(dapm);
+}
+
+static void rt5640_disable_micbias1_for_ovcd(struct snd_soc_component *component)
+{
+	struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
+
+	snd_soc_dapm_mutex_lock(dapm);
+	snd_soc_dapm_disable_pin_unlocked(dapm, "Platform Clock");
+	snd_soc_dapm_disable_pin_unlocked(dapm, "MICBIAS1");
+	snd_soc_dapm_disable_pin_unlocked(dapm, "LDO2");
+	snd_soc_dapm_sync_unlocked(dapm);
+	snd_soc_dapm_mutex_unlock(dapm);
+}
+
+static void rt5640_enable_micbias1_ovcd_irq(struct snd_soc_component *component)
+{
+	struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component);
+
+	snd_soc_component_update_bits(component, RT5640_IRQ_CTRL2,
+		RT5640_IRQ_MB1_OC_MASK, RT5640_IRQ_MB1_OC_NOR);
+	rt5640->ovcd_irq_enabled = true;
+}
+
+static void rt5640_disable_micbias1_ovcd_irq(struct snd_soc_component *component)
+{
+	struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component);
+
+	snd_soc_component_update_bits(component, RT5640_IRQ_CTRL2,
+		RT5640_IRQ_MB1_OC_MASK, RT5640_IRQ_MB1_OC_BP);
+	rt5640->ovcd_irq_enabled = false;
+}
+
+static void rt5640_clear_micbias1_ovcd(struct snd_soc_component *component)
+{
+	snd_soc_component_update_bits(component, RT5640_IRQ_CTRL2,
+		RT5640_MB1_OC_STATUS, 0);
+}
+
+static bool rt5640_micbias1_ovcd(struct snd_soc_component *component)
+{
+	int val;
+
+	val = snd_soc_component_read32(component, RT5640_IRQ_CTRL2);
+	dev_dbg(component->dev, "irq ctrl2 %#04x\n", val);
+
+	return (val & RT5640_MB1_OC_STATUS);
+}
+
+static bool rt5640_jack_inserted(struct snd_soc_component *component)
+{
+	struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component);
+	int val;
+
+	val = snd_soc_component_read32(component, RT5640_INT_IRQ_ST);
+	dev_dbg(component->dev, "irq status %#04x\n", val);
+
+	if (rt5640->jd_inverted)
+		return !(val & RT5640_JD_STATUS);
+	else
+		return (val & RT5640_JD_STATUS);
+}
+
+/* Jack detect and button-press timings */
+#define JACK_SETTLE_TIME	100 /* milli seconds */
+#define JACK_DETECT_COUNT	5
+#define JACK_DETECT_MAXCOUNT	20  /* Aprox. 2 seconds worth of tries */
+#define JACK_UNPLUG_TIME	80  /* milli seconds */
+#define BP_POLL_TIME		10  /* milli seconds */
+#define BP_POLL_MAXCOUNT	200 /* assume something is wrong after this */
+#define BP_THRESHOLD		3
+
+static void rt5640_start_button_press_work(struct snd_soc_component *component)
+{
+	struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component);
+
+	rt5640->poll_count = 0;
+	rt5640->press_count = 0;
+	rt5640->release_count = 0;
+	rt5640->pressed = false;
+	rt5640->press_reported = false;
+	rt5640_clear_micbias1_ovcd(component);
+	schedule_delayed_work(&rt5640->bp_work, msecs_to_jiffies(BP_POLL_TIME));
+}
+
+static void rt5640_button_press_work(struct work_struct *work)
+{
+	struct rt5640_priv *rt5640 =
+		container_of(work, struct rt5640_priv, bp_work.work);
+	struct snd_soc_component *component = rt5640->component;
+
+	/* Check the jack was not removed underneath us */
+	if (!rt5640_jack_inserted(component))
+		return;
+
+	if (rt5640_micbias1_ovcd(component)) {
+		rt5640->release_count = 0;
+		rt5640->press_count++;
+		/* Remember till after JACK_UNPLUG_TIME wait */
+		if (rt5640->press_count >= BP_THRESHOLD)
+			rt5640->pressed = true;
+		rt5640_clear_micbias1_ovcd(component);
+	} else {
+		rt5640->press_count = 0;
+		rt5640->release_count++;
+	}
+
+	/*
+	 * The pins get temporarily shorted on jack unplug, so we poll for
+	 * at least JACK_UNPLUG_TIME milli-seconds before reporting a press.
+	 */
+	rt5640->poll_count++;
+	if (rt5640->poll_count < (JACK_UNPLUG_TIME / BP_POLL_TIME)) {
+		schedule_delayed_work(&rt5640->bp_work,
+				      msecs_to_jiffies(BP_POLL_TIME));
+		return;
+	}
+
+	if (rt5640->pressed && !rt5640->press_reported) {
+		dev_dbg(component->dev, "headset button press\n");
+		snd_soc_jack_report(rt5640->jack, SND_JACK_BTN_0,
+				    SND_JACK_BTN_0);
+		rt5640->press_reported = true;
+	}
+
+	if (rt5640->release_count >= BP_THRESHOLD) {
+		if (rt5640->press_reported) {
+			dev_dbg(component->dev, "headset button release\n");
+			snd_soc_jack_report(rt5640->jack, 0, SND_JACK_BTN_0);
+		}
+		/* Re-enable OVCD IRQ to detect next press */
+		rt5640_enable_micbias1_ovcd_irq(component);
+		return; /* Stop polling */
+	}
+
+	schedule_delayed_work(&rt5640->bp_work, msecs_to_jiffies(BP_POLL_TIME));
+}
+
+static int rt5640_detect_headset(struct snd_soc_component *component)
+{
+	int i, headset_count = 0, headphone_count = 0;
+
+	/*
+	 * We get the insertion event before the jack is fully inserted at which
+	 * point the second ring on a TRRS connector may short the 2nd ring and
+	 * sleeve contacts, also the overcurrent detection is not entirely
+	 * reliable. So we try several times with a wait in between until we
+	 * detect the same type JACK_DETECT_COUNT times in a row.
+	 */
+	for (i = 0; i < JACK_DETECT_MAXCOUNT; i++) {
+		/* Clear any previous over-current status flag */
+		rt5640_clear_micbias1_ovcd(component);
+
+		msleep(JACK_SETTLE_TIME);
+
+		/* Check the jack is still connected before checking ovcd */
+		if (!rt5640_jack_inserted(component))
+			return 0;
+
+		if (rt5640_micbias1_ovcd(component)) {
+			/*
+			 * Over current detected, there is a short between the
+			 * 2nd ring contact and the ground, so a TRS connector
+			 * without a mic contact and thus plain headphones.
+			 */
+			dev_dbg(component->dev, "jack mic-gnd shorted\n");
+			headset_count = 0;
+			headphone_count++;
+			if (headphone_count == JACK_DETECT_COUNT)
+				return SND_JACK_HEADPHONE;
+		} else {
+			dev_dbg(component->dev, "jack mic-gnd open\n");
+			headphone_count = 0;
+			headset_count++;
+			if (headset_count == JACK_DETECT_COUNT)
+				return SND_JACK_HEADSET;
+		}
+	}
+
+	dev_err(component->dev, "Error detecting headset vs headphones, bad contact?, assuming headphones\n");
+	return SND_JACK_HEADPHONE;
+}
+
+static void rt5640_jack_work(struct work_struct *work)
+{
+	struct rt5640_priv *rt5640 =
+		container_of(work, struct rt5640_priv, jack_work);
+	struct snd_soc_component *component = rt5640->component;
+	int status;
+
+	if (!rt5640_jack_inserted(component)) {
+		/* Jack removed, or spurious IRQ? */
+		if (rt5640->jack->status & SND_JACK_HEADPHONE) {
+			if (rt5640->jack->status & SND_JACK_MICROPHONE) {
+				cancel_delayed_work_sync(&rt5640->bp_work);
+				rt5640_disable_micbias1_ovcd_irq(component);
+				rt5640_disable_micbias1_for_ovcd(component);
+			}
+			snd_soc_jack_report(rt5640->jack, 0,
+					    SND_JACK_HEADSET | SND_JACK_BTN_0);
+			dev_dbg(component->dev, "jack unplugged\n");
+		}
+	} else if (!(rt5640->jack->status & SND_JACK_HEADPHONE)) {
+		/* Jack inserted */
+		WARN_ON(rt5640->ovcd_irq_enabled);
+		rt5640_enable_micbias1_for_ovcd(component);
+		status = rt5640_detect_headset(component);
+		if (status == SND_JACK_HEADSET) {
+			/* Enable ovcd IRQ for button press detect. */
+			rt5640_enable_micbias1_ovcd_irq(component);
+		} else {
+			/* No more need for overcurrent detect. */
+			rt5640_disable_micbias1_for_ovcd(component);
+		}
+		dev_dbg(component->dev, "detect status %#02x\n", status);
+		snd_soc_jack_report(rt5640->jack, status, SND_JACK_HEADSET);
+	} else if (rt5640->ovcd_irq_enabled && rt5640_micbias1_ovcd(component)) {
+		dev_dbg(component->dev, "OVCD IRQ\n");
+
+		/*
+		 * The ovcd IRQ keeps firing while the button is pressed, so
+		 * we disable it and start polling the button until released.
+		 *
+		 * The disable will make the IRQ pin 0 again and since we get
+		 * IRQs on both edges (so as to detect both jack plugin and
+		 * unplug) this means we will immediately get another IRQ.
+		 * The ovcd_irq_enabled check above makes the 2ND IRQ a NOP.
+		 */
+		rt5640_disable_micbias1_ovcd_irq(component);
+		rt5640_start_button_press_work(component);
+
+		/*
+		 * If the jack-detect IRQ flag goes high (unplug) after our
+		 * above rt5640_jack_inserted() check and before we have
+		 * disabled the OVCD IRQ, the IRQ pin will stay high and as
+		 * we react to edges, we miss the unplug event -> recheck.
+		 */
+		queue_work(system_long_wq, &rt5640->jack_work);
+	}
+}
+
+static irqreturn_t rt5640_irq(int irq, void *data)
+{
+	struct rt5640_priv *rt5640 = data;
+
+	if (rt5640->jack)
+		queue_work(system_long_wq, &rt5640->jack_work);
+
+	return IRQ_HANDLED;
+}
+
+static void rt5640_cancel_work(void *data)
+{
+	struct rt5640_priv *rt5640 = data;
+
+	cancel_work_sync(&rt5640->jack_work);
+	cancel_delayed_work_sync(&rt5640->bp_work);
+}
+
+static void rt5640_enable_jack_detect(struct snd_soc_component *component,
+				      struct snd_soc_jack *jack)
+{
+	struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component);
+
+	/* Select JD-source */
+	snd_soc_component_update_bits(component, RT5640_JD_CTRL,
+		RT5640_JD_MASK, rt5640->jd_src);
+
+	/* Selecting GPIO01 as an interrupt */
+	snd_soc_component_update_bits(component, RT5640_GPIO_CTRL1,
+		RT5640_GP1_PIN_MASK, RT5640_GP1_PIN_IRQ);
+
+	/* Set GPIO1 output */
+	snd_soc_component_update_bits(component, RT5640_GPIO_CTRL3,
+		RT5640_GP1_PF_MASK, RT5640_GP1_PF_OUT);
+
+	/* Enabling jd2 in general control 1 */
+	snd_soc_component_write(component, RT5640_DUMMY1, 0x3f41);
+
+	/* Enabling jd2 in general control 2 */
+	snd_soc_component_write(component, RT5640_DUMMY2, 0x4001);
+
+	snd_soc_component_write(component, RT5640_PR_BASE + RT5640_BIAS_CUR4,
+		0xa800 | rt5640->ovcd_sf);
+
+	snd_soc_component_update_bits(component, RT5640_MICBIAS,
+		RT5640_MIC1_OVTH_MASK | RT5640_MIC1_OVCD_MASK,
+		rt5640->ovcd_th | RT5640_MIC1_OVCD_EN);
+
+	/*
+	 * The over-current-detect is only reliable in detecting the absence
+	 * of over-current, when the mic-contact in the jack is short-circuited,
+	 * the hardware periodically retries if it can apply the bias-current
+	 * leading to the ovcd status flip-flopping 1-0-1 with it being 0 about
+	 * 10% of the time, as we poll the ovcd status bit we might hit that
+	 * 10%, so we enable sticky mode and when checking OVCD we clear the
+	 * status, msleep() a bit and then check to get a reliable reading.
+	 */
+	snd_soc_component_update_bits(component, RT5640_IRQ_CTRL2,
+		RT5640_MB1_OC_STKY_MASK, RT5640_MB1_OC_STKY_EN);
+
+	/*
+	 * All IRQs get or-ed together, so we need the jack IRQ to report 0
+	 * when a jack is inserted so that the OVCD IRQ then toggles the IRQ
+	 * pin 0/1 instead of it being stuck to 1. So we invert the JD polarity
+	 * on systems where the hardware does not already do this.
+	 */
+	if (rt5640->jd_inverted)
+		snd_soc_component_write(component, RT5640_IRQ_CTRL1,
+					RT5640_IRQ_JD_NOR);
+	else
+		snd_soc_component_write(component, RT5640_IRQ_CTRL1,
+					RT5640_IRQ_JD_NOR | RT5640_JD_P_INV);
+
+	rt5640->jack = jack;
+	if (rt5640->jack->status & SND_JACK_MICROPHONE) {
+		rt5640_enable_micbias1_for_ovcd(component);
+		rt5640_enable_micbias1_ovcd_irq(component);
+	}
+
+	enable_irq(rt5640->irq);
+	/* sync initial jack state */
+	queue_work(system_long_wq, &rt5640->jack_work);
+}
+
+static void rt5640_disable_jack_detect(struct snd_soc_component *component)
+{
+	struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component);
+
+	disable_irq(rt5640->irq);
+	rt5640_cancel_work(rt5640);
+
+	if (rt5640->jack->status & SND_JACK_MICROPHONE) {
+		rt5640_disable_micbias1_ovcd_irq(component);
+		rt5640_disable_micbias1_for_ovcd(component);
+		snd_soc_jack_report(rt5640->jack, 0, SND_JACK_BTN_0);
+	}
+
+	rt5640->jack = NULL;
+}
+
+static int rt5640_set_jack(struct snd_soc_component *component,
+			   struct snd_soc_jack *jack, void *data)
+{
+	if (jack)
+		rt5640_enable_jack_detect(component, jack);
+	else
+		rt5640_disable_jack_detect(component);
+
+	return 0;
+}
+
 static int rt5640_probe(struct snd_soc_component *component)
 {
 	struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
 	struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component);
+	u32 dmic1_data_pin = 0;
+	u32 dmic2_data_pin = 0;
+	bool dmic_en = false;
+	u32 val;
 
 	/* Check if MCLK provided */
 	rt5640->mclk = devm_clk_get(component->dev, "mclk");
@@ -2159,9 +2505,86 @@
 		return -ENODEV;
 	}
 
-	if (rt5640->pdata.dmic_en)
-		rt5640_dmic_enable(component, rt5640->pdata.dmic1_data_pin,
-					  rt5640->pdata.dmic2_data_pin);
+	/*
+	 * Note on some platforms the platform code may need to add device-props
+	 * rather then relying only on properties set by the firmware.
+	 * Therefor the property parsing MUST be done here, rather then from
+	 * rt5640_i2c_probe(), so that the platform-code can attach extra
+	 * properties before calling snd_soc_register_card().
+	 */
+	if (device_property_read_bool(component->dev, "realtek,in1-differential"))
+		snd_soc_component_update_bits(component, RT5640_IN1_IN2,
+					      RT5640_IN_DF1, RT5640_IN_DF1);
+
+	if (device_property_read_bool(component->dev, "realtek,in2-differential"))
+		snd_soc_component_update_bits(component, RT5640_IN3_IN4,
+					      RT5640_IN_DF2, RT5640_IN_DF2);
+
+	if (device_property_read_bool(component->dev, "realtek,in3-differential"))
+		snd_soc_component_update_bits(component, RT5640_IN1_IN2,
+					      RT5640_IN_DF2, RT5640_IN_DF2);
+
+	if (device_property_read_u32(component->dev, "realtek,dmic1-data-pin",
+				     &val) == 0 && val) {
+		dmic1_data_pin = val - 1;
+		dmic_en = true;
+	}
+
+	if (device_property_read_u32(component->dev, "realtek,dmic2-data-pin",
+				     &val) == 0 && val) {
+		dmic2_data_pin = val - 1;
+		dmic_en = true;
+	}
+
+	if (dmic_en)
+		rt5640_dmic_enable(component, dmic1_data_pin, dmic2_data_pin);
+
+	if (device_property_read_u32(component->dev,
+				     "realtek,jack-detect-source", &val) == 0) {
+		if (val <= RT5640_JD_SRC_GPIO4)
+			rt5640->jd_src = val << RT5640_JD_SFT;
+		else
+			dev_warn(component->dev, "Warning: Invalid jack-detect-source value: %d, leaving jack-detect disabled\n",
+				 val);
+	}
+
+	if (!device_property_read_bool(component->dev, "realtek,jack-detect-not-inverted"))
+		rt5640->jd_inverted = true;
+
+	/*
+	 * Testing on various boards has shown that good defaults for the OVCD
+	 * threshold and scale-factor are 2000µA and 0.75. For an effective
+	 * limit of 1500µA, this seems to be more reliable then 1500µA and 1.0.
+	 */
+	rt5640->ovcd_th = RT5640_MIC1_OVTH_2000UA;
+	rt5640->ovcd_sf = RT5640_MIC_OVCD_SF_0P75;
+
+	if (device_property_read_u32(component->dev,
+			"realtek,over-current-threshold-microamp", &val) == 0) {
+		switch (val) {
+		case 600:
+			rt5640->ovcd_th = RT5640_MIC1_OVTH_600UA;
+			break;
+		case 1500:
+			rt5640->ovcd_th = RT5640_MIC1_OVTH_1500UA;
+			break;
+		case 2000:
+			rt5640->ovcd_th = RT5640_MIC1_OVTH_2000UA;
+			break;
+		default:
+			dev_warn(component->dev, "Warning: Invalid over-current-threshold-microamp value: %d, defaulting to 2000uA\n",
+				 val);
+		}
+	}
+
+	if (device_property_read_u32(component->dev,
+			"realtek,over-current-scale-factor", &val) == 0) {
+		if (val <= RT5640_OVCD_SF_1P5)
+			rt5640->ovcd_sf = val << RT5640_MIC_OVCD_SF_SFT;
+		else
+			dev_warn(component->dev, "Warning: Invalid over-current-scale-factor value: %d, defaulting to 0.75\n",
+				 val);
+	}
 
 	return 0;
 }
@@ -2180,8 +2603,8 @@
 	rt5640_reset(component);
 	regcache_cache_only(rt5640->regmap, true);
 	regcache_mark_dirty(rt5640->regmap);
-	if (gpio_is_valid(rt5640->pdata.ldo1_en))
-		gpio_set_value_cansleep(rt5640->pdata.ldo1_en, 0);
+	if (gpio_is_valid(rt5640->ldo1_en))
+		gpio_set_value_cansleep(rt5640->ldo1_en, 0);
 
 	return 0;
 }
@@ -2190,8 +2613,8 @@
 {
 	struct rt5640_priv *rt5640 = snd_soc_component_get_drvdata(component);
 
-	if (gpio_is_valid(rt5640->pdata.ldo1_en)) {
-		gpio_set_value_cansleep(rt5640->pdata.ldo1_en, 1);
+	if (gpio_is_valid(rt5640->ldo1_en)) {
+		gpio_set_value_cansleep(rt5640->ldo1_en, 1);
 		msleep(400);
 	}
 
@@ -2263,6 +2686,7 @@
 	.suspend		= rt5640_suspend,
 	.resume			= rt5640_resume,
 	.set_bias_level		= rt5640_set_bias_level,
+	.set_jack		= rt5640_set_jack,
 	.controls		= rt5640_snd_controls,
 	.num_controls		= ARRAY_SIZE(rt5640_snd_controls),
 	.dapm_widgets		= rt5640_dapm_widgets,
@@ -2323,22 +2747,16 @@
 
 static int rt5640_parse_dt(struct rt5640_priv *rt5640, struct device_node *np)
 {
-	rt5640->pdata.in1_diff = of_property_read_bool(np,
-					"realtek,in1-differential");
-	rt5640->pdata.in2_diff = of_property_read_bool(np,
-					"realtek,in2-differential");
-
-	rt5640->pdata.ldo1_en = of_get_named_gpio(np,
-					"realtek,ldo1-en-gpios", 0);
+	rt5640->ldo1_en = of_get_named_gpio(np, "realtek,ldo1-en-gpios", 0);
 	/*
 	 * LDO1_EN is optional (it may be statically tied on the board).
 	 * -ENOENT means that the property doesn't exist, i.e. there is no
 	 * GPIO, so is not an error. Any other error code means the property
 	 * exists, but could not be parsed.
 	 */
-	if (!gpio_is_valid(rt5640->pdata.ldo1_en) &&
-			(rt5640->pdata.ldo1_en != -ENOENT))
-		return rt5640->pdata.ldo1_en;
+	if (!gpio_is_valid(rt5640->ldo1_en) &&
+			(rt5640->ldo1_en != -ENOENT))
+		return rt5640->ldo1_en;
 
 	return 0;
 }
@@ -2346,7 +2764,6 @@
 static int rt5640_i2c_probe(struct i2c_client *i2c,
 		    const struct i2c_device_id *id)
 {
-	struct rt5640_platform_data *pdata = dev_get_platdata(&i2c->dev);
 	struct rt5640_priv *rt5640;
 	int ret;
 	unsigned int val;
@@ -2358,22 +2775,12 @@
 		return -ENOMEM;
 	i2c_set_clientdata(i2c, rt5640);
 
-	if (pdata) {
-		rt5640->pdata = *pdata;
-		/*
-		 * Translate zero'd out (default) pdata value to an invalid
-		 * GPIO ID. This makes the pdata and DT paths consistent in
-		 * terms of the value left in this field when no GPIO is
-		 * specified, but means we can't actually use GPIO 0.
-		 */
-		if (!rt5640->pdata.ldo1_en)
-			rt5640->pdata.ldo1_en = -EINVAL;
-	} else if (i2c->dev.of_node) {
+	if (i2c->dev.of_node) {
 		ret = rt5640_parse_dt(rt5640, i2c->dev.of_node);
 		if (ret)
 			return ret;
 	} else
-		rt5640->pdata.ldo1_en = -EINVAL;
+		rt5640->ldo1_en = -EINVAL;
 
 	rt5640->regmap = devm_regmap_init_i2c(i2c, &rt5640_regmap);
 	if (IS_ERR(rt5640->regmap)) {
@@ -2383,13 +2790,13 @@
 		return ret;
 	}
 
-	if (gpio_is_valid(rt5640->pdata.ldo1_en)) {
-		ret = devm_gpio_request_one(&i2c->dev, rt5640->pdata.ldo1_en,
+	if (gpio_is_valid(rt5640->ldo1_en)) {
+		ret = devm_gpio_request_one(&i2c->dev, rt5640->ldo1_en,
 					    GPIOF_OUT_INIT_HIGH,
 					    "RT5640 LDO1_EN");
 		if (ret < 0) {
 			dev_err(&i2c->dev, "Failed to request LDO1_EN %d: %d\n",
-				rt5640->pdata.ldo1_en, ret);
+				rt5640->ldo1_en, ret);
 			return ret;
 		}
 		msleep(400);
@@ -2412,19 +2819,27 @@
 	regmap_update_bits(rt5640->regmap, RT5640_DUMMY1,
 				RT5640_MCLK_DET, RT5640_MCLK_DET);
 
-	if (rt5640->pdata.in1_diff)
-		regmap_update_bits(rt5640->regmap, RT5640_IN1_IN2,
-					RT5640_IN_DF1, RT5640_IN_DF1);
-
-	if (rt5640->pdata.in2_diff)
-		regmap_update_bits(rt5640->regmap, RT5640_IN3_IN4,
-					RT5640_IN_DF2, RT5640_IN_DF2);
-
-	if (rt5640->pdata.in3_diff)
-		regmap_update_bits(rt5640->regmap, RT5640_IN1_IN2,
-					RT5640_IN_DF2, RT5640_IN_DF2);
-
 	rt5640->hp_mute = 1;
+	rt5640->irq = i2c->irq;
+	INIT_DELAYED_WORK(&rt5640->bp_work, rt5640_button_press_work);
+	INIT_WORK(&rt5640->jack_work, rt5640_jack_work);
+
+	/* Make sure work is stopped on probe-error / remove */
+	ret = devm_add_action_or_reset(&i2c->dev, rt5640_cancel_work, rt5640);
+	if (ret)
+		return ret;
+
+	ret = devm_request_irq(&i2c->dev, rt5640->irq, rt5640_irq,
+			       IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING
+			       | IRQF_ONESHOT, "rt5640", rt5640);
+	if (ret == 0) {
+		/* Gets re-enabled by rt5640_set_jack() */
+		disable_irq(rt5640->irq);
+	} else {
+		dev_warn(&i2c->dev, "Failed to reguest IRQ %d: %d\n",
+			 rt5640->irq, ret);
+		rt5640->irq = -ENXIO;
+	}
 
 	return devm_snd_soc_register_component(&i2c->dev,
 				      &soc_component_dev_rt5640,
diff --git a/sound/soc/codecs/rt5640.h b/sound/soc/codecs/rt5640.h
index c473e8a..e29e3e7 100644
--- a/sound/soc/codecs/rt5640.h
+++ b/sound/soc/codecs/rt5640.h
@@ -13,7 +13,8 @@
 #define _RT5640_H
 
 #include <linux/clk.h>
-#include <sound/rt5640.h>
+#include <linux/workqueue.h>
+#include <dt-bindings/sound/rt5640.h>
 
 /* Info */
 #define RT5640_RESET				0x00
@@ -146,6 +147,7 @@
 
 
 /* Index of Codec Private Register definition */
+#define RT5640_BIAS_CUR4			0x15
 #define RT5640_CHPUMP_INT_REG1			0x24
 #define RT5640_MAMP_INT_REG2			0x37
 #define RT5640_3D_SPK				0x63
@@ -1607,10 +1609,17 @@
 #define RT5640_MB2_OC_P_SFT			6
 #define RT5640_MB2_OC_P_NOR			(0x0 << 6)
 #define RT5640_MB2_OC_P_INV			(0x1 << 6)
-#define RT5640_MB1_OC_CLR			(0x1 << 3)
-#define RT5640_MB1_OC_CLR_SFT			3
-#define RT5640_MB2_OC_CLR			(0x1 << 2)
-#define RT5640_MB2_OC_CLR_SFT			2
+#define RT5640_MB1_OC_STATUS			(0x1 << 3)
+#define RT5640_MB1_OC_STATUS_SFT		3
+#define RT5640_MB2_OC_STATUS			(0x1 << 2)
+#define RT5640_MB2_OC_STATUS_SFT		2
+
+/* GPIO and Internal Status (0xbf) */
+#define RT5640_GPIO1_STATUS			(0x1 << 8)
+#define RT5640_GPIO2_STATUS			(0x1 << 7)
+#define RT5640_JD_STATUS			(0x1 << 4)
+#define RT5640_OVT_STATUS			(0x1 << 3)
+#define RT5640_CLS_D_OVCD_STATUS		(0x1 << 0)
 
 /* GPIO Control 1 (0xc0) */
 #define RT5640_GP1_PIN_MASK			(0x1 << 15)
@@ -1978,6 +1987,15 @@
 #define RT5640_MCLK_DET				(0x1 << 11)
 
 /* Codec Private Register definition */
+
+/* MIC Over current threshold scale factor (0x15) */
+#define RT5640_MIC_OVCD_SF_MASK			(0x3 << 8)
+#define RT5640_MIC_OVCD_SF_SFT			8
+#define RT5640_MIC_OVCD_SF_0P5			(0x0 << 8)
+#define RT5640_MIC_OVCD_SF_0P75			(0x1 << 8)
+#define RT5640_MIC_OVCD_SF_1P0			(0x2 << 8)
+#define RT5640_MIC_OVCD_SF_1P5			(0x3 << 8)
+
 /* 3D Speaker Control (0x63) */
 #define RT5640_3D_SPK_MASK			(0x1 << 15)
 #define RT5640_3D_SPK_SFT			15
@@ -2103,10 +2121,11 @@
 
 struct rt5640_priv {
 	struct snd_soc_component *component;
-	struct rt5640_platform_data pdata;
 	struct regmap *regmap;
 	struct clk *mclk;
 
+	int ldo1_en; /* GPIO for LDO1_EN */
+	int irq;
 	int sysclk;
 	int sysclk_src;
 	int lrck[RT5640_AIFS];
@@ -2119,6 +2138,21 @@
 
 	bool hp_mute;
 	bool asrc_en;
+
+	/* Jack and button detect data */
+	bool ovcd_irq_enabled;
+	bool pressed;
+	bool press_reported;
+	int press_count;
+	int release_count;
+	int poll_count;
+	struct delayed_work bp_work;
+	struct work_struct jack_work;
+	struct snd_soc_jack *jack;
+	unsigned int jd_src;
+	bool jd_inverted;
+	unsigned int ovcd_th;
+	unsigned int ovcd_sf;
 };
 
 int rt5640_dmic_enable(struct snd_soc_component *component,
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index bc8d829..7123845 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3652,6 +3652,11 @@
 	.inv_jd1_1 = true,
 };
 
+static const struct rt5645_platform_data lenovo_ideapad_miix_310_pdata = {
+	.jd_mode = 3,
+	.in2_diff = true,
+};
+
 static const struct rt5645_platform_data jd_mode3_platform_data = {
 	.jd_mode = 3,
 };
@@ -3735,6 +3740,24 @@
 		},
 		.driver_data = (void *)&jd_mode3_platform_data,
 	},
+	{
+		.ident = "Lenovo Ideapad Miix 310",
+		.matches = {
+		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "80SG"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "MIIX 310-10ICR"),
+		},
+		.driver_data = (void *)&lenovo_ideapad_miix_310_pdata,
+	},
+	{
+		.ident = "Lenovo Ideapad Miix 320",
+		.matches = {
+		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "80XF"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"),
+		},
+		.driver_data = (void *)&intel_braswell_platform_data,
+	},
 	{ }
 };
 
diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c
index 20c0aee..9bd24ad 100644
--- a/sound/soc/codecs/rt5663.c
+++ b/sound/soc/codecs/rt5663.c
@@ -72,6 +72,8 @@
 static const struct reg_sequence rt5663_patch_list[] = {
 	{ 0x002a, 0x8020 },
 	{ 0x0086, 0x0028 },
+	{ 0x0117, 0x0f28 },
+	{ 0x02fb, 0x8089 },
 };
 
 static const struct reg_default rt5663_v2_reg[] = {
@@ -593,7 +595,7 @@
 	{ 0x0113, 0x2000 },
 	{ 0x0114, 0x0000 },
 	{ 0x0116, 0x0000 },
-	{ 0x0117, 0x0f00 },
+	{ 0x0117, 0x0f28 },
 	{ 0x0118, 0x0006 },
 	{ 0x0125, 0x2424 },
 	{ 0x0126, 0x5550 },
@@ -693,7 +695,7 @@
 	{ 0x0251, 0x0000 },
 	{ 0x0252, 0x028a },
 	{ 0x02fa, 0x0000 },
-	{ 0x02fb, 0x00a4 },
+	{ 0x02fb, 0x8089 },
 	{ 0x02fc, 0x0300 },
 	{ 0x0300, 0x0000 },
 	{ 0x03d0, 0x0000 },
@@ -1556,6 +1558,14 @@
 			RT5663_PWR_MB_MASK | RT5663_LDO1_DVO_MASK |
 			RT5663_AMP_HP_MASK, RT5663_PWR_MB |
 			RT5663_LDO1_DVO_0_9V | RT5663_AMP_HP_3X);
+		snd_soc_component_update_bits(component, RT5663_PWR_ANLG_1,
+			RT5663_PWR_VREF1_MASK | RT5663_PWR_VREF2_MASK |
+			RT5663_PWR_FV1_MASK | RT5663_PWR_FV2_MASK,
+			RT5663_PWR_VREF1 | RT5663_PWR_VREF2);
+		msleep(20);
+		snd_soc_component_update_bits(component, RT5663_PWR_ANLG_1,
+			RT5663_PWR_FV1_MASK | RT5663_PWR_FV2_MASK,
+			RT5663_PWR_FV1 | RT5663_PWR_FV2);
 		snd_soc_component_update_bits(component, RT5663_AUTO_1MRC_CLK,
 			RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN);
 		snd_soc_component_update_bits(component, RT5663_IRQ_1,
@@ -1613,7 +1623,10 @@
 			break;
 		default:
 			rt5663->jack_type = SND_JACK_HEADPHONE;
-
+			snd_soc_component_update_bits(component,
+				RT5663_PWR_ANLG_1,
+				RT5663_PWR_MB_MASK | RT5663_PWR_VREF1_MASK |
+				RT5663_PWR_VREF2_MASK, 0);
 			if (rt5663->pdata.impedance_sensing_num)
 				break;
 
@@ -1638,6 +1651,9 @@
 		if (rt5663->jack_type == SND_JACK_HEADSET)
 			rt5663_enable_push_button_irq(component, false);
 		rt5663->jack_type = 0;
+		snd_soc_component_update_bits(component, RT5663_PWR_ANLG_1,
+			RT5663_PWR_MB_MASK | RT5663_PWR_VREF1_MASK |
+			RT5663_PWR_VREF2_MASK, 0);
 	}
 
 	dev_dbg(component->dev, "jack_type = %d\n", rt5663->jack_type);
@@ -1840,8 +1856,8 @@
 	return IRQ_HANDLED;
 }
 
-int rt5663_set_jack_detect(struct snd_soc_component *component,
-	struct snd_soc_jack *hs_jack)
+static int rt5663_set_jack_detect(struct snd_soc_component *component,
+	struct snd_soc_jack *hs_jack, void *data)
 {
 	struct rt5663_priv *rt5663 = snd_soc_component_get_drvdata(component);
 
@@ -1851,7 +1867,6 @@
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rt5663_set_jack_detect);
 
 static bool rt5663_check_jd_status(struct snd_soc_component *component)
 {
@@ -2307,6 +2322,8 @@
 				RT5663_HP_SIG_SRC1_MASK,
 				RT5663_HP_SIG_SRC1_SILENCE);
 		} else {
+			snd_soc_component_update_bits(component,
+				RT5663_DACREF_LDO, 0x3e0e, 0x3a0a);
 			snd_soc_component_write(component, RT5663_DEPOP_2, 0x3003);
 			snd_soc_component_update_bits(component, RT5663_HP_CHARGE_PUMP_1,
 				RT5663_OVCD_HP_MASK, RT5663_OVCD_HP_DIS);
@@ -2332,6 +2349,8 @@
 			snd_soc_component_update_bits(component, RT5663_DEPOP_1, 0x3000, 0x0);
 			snd_soc_component_update_bits(component, RT5663_HP_CHARGE_PUMP_1,
 				RT5663_OVCD_HP_MASK, RT5663_OVCD_HP_EN);
+			snd_soc_component_update_bits(component,
+				RT5663_DACREF_LDO, 0x3e0e, 0);
 		}
 		break;
 
@@ -3086,9 +3105,17 @@
 		break;
 
 	case SND_SOC_BIAS_OFF:
-		snd_soc_component_update_bits(component, RT5663_PWR_ANLG_1,
-			RT5663_PWR_VREF1_MASK | RT5663_PWR_VREF2_MASK |
-			RT5663_PWR_FV1 | RT5663_PWR_FV2, 0x0);
+		if (rt5663->jack_type != SND_JACK_HEADSET)
+			snd_soc_component_update_bits(component,
+				RT5663_PWR_ANLG_1,
+				RT5663_PWR_VREF1_MASK | RT5663_PWR_VREF2_MASK |
+				RT5663_PWR_FV1 | RT5663_PWR_FV2 |
+				RT5663_PWR_MB_MASK, 0);
+		else
+			snd_soc_component_update_bits(component,
+				RT5663_PWR_ANLG_1,
+				RT5663_PWR_FV1_MASK | RT5663_PWR_FV2_MASK,
+				RT5663_PWR_FV1 | RT5663_PWR_FV2);
 		break;
 
 	default:
@@ -3216,10 +3243,10 @@
 	.num_dapm_widgets	= ARRAY_SIZE(rt5663_dapm_widgets),
 	.dapm_routes		= rt5663_dapm_routes,
 	.num_dapm_routes	= ARRAY_SIZE(rt5663_dapm_routes),
+	.set_jack		= rt5663_set_jack_detect,
 	.use_pmdown_time	= 1,
 	.endianness		= 1,
 	.non_legacy_dai_naming	= 1,
-
 };
 
 static const struct regmap_config rt5663_v2_regmap = {
@@ -3310,6 +3337,7 @@
 	regmap_write(rt5663->regmap, RT5663_HP_IMP_SEN_19, 0x000c);
 	regmap_write(rt5663->regmap, RT5663_DUMMY_1, 0x0324);
 	regmap_write(rt5663->regmap, RT5663_DIG_MISC, 0x8001);
+	regmap_write(rt5663->regmap, RT5663_VREFADJ_OP, 0x0f28);
 	regmap_write(rt5663->regmap, RT5663_PWR_ANLG_1, 0xa23b);
 	msleep(30);
 	regmap_write(rt5663->regmap, RT5663_PWR_ANLG_1, 0xf23b);
@@ -3344,6 +3372,7 @@
 	regmap_write(rt5663->regmap, RT5663_PWR_ANLG_2, 0x8003);
 	regmap_write(rt5663->regmap, RT5663_PWR_ANLG_3, 0x018c);
 	regmap_write(rt5663->regmap, RT5663_HP_CHARGE_PUMP_1, 0x1e32);
+	regmap_write(rt5663->regmap, RT5663_DUMMY_2, 0x8089);
 	regmap_write(rt5663->regmap, RT5663_DACREF_LDO, 0x3b0b);
 	msleep(40);
 	regmap_write(rt5663->regmap, RT5663_STO_DAC_MIXER, 0x0000);
@@ -3578,15 +3607,9 @@
 		regmap_update_bits(rt5663->regmap, RT5663_GPIO_1,
 			RT5663_GPIO1_TYPE_MASK, RT5663_GPIO1_TYPE_EN);
 		regmap_write(rt5663->regmap, RT5663_VREF_RECMIX, 0x0032);
-		regmap_write(rt5663->regmap, RT5663_PWR_ANLG_1, 0xa2be);
-		msleep(20);
-		regmap_write(rt5663->regmap, RT5663_PWR_ANLG_1, 0xf2be);
 		regmap_update_bits(rt5663->regmap, RT5663_GPIO_2,
 			RT5663_GP1_PIN_CONF_MASK | RT5663_SEL_GPIO1_MASK,
 			RT5663_GP1_PIN_CONF_OUTPUT | RT5663_SEL_GPIO1_EN);
-		/* DACREF LDO control */
-		regmap_update_bits(rt5663->regmap, RT5663_DACREF_LDO, 0x3e0e,
-			0x3a0a);
 		regmap_update_bits(rt5663->regmap, RT5663_RECMIX,
 			RT5663_RECMIX1_BST1_MASK, RT5663_RECMIX1_BST1_ON);
 		regmap_update_bits(rt5663->regmap, RT5663_TDM_2,
diff --git a/sound/soc/codecs/rt5663.h b/sound/soc/codecs/rt5663.h
index 865203c..794cf3f 100644
--- a/sound/soc/codecs/rt5663.h
+++ b/sound/soc/codecs/rt5663.h
@@ -1125,8 +1125,6 @@
 	RT5663_AD_STEREO_FILTER = 0x2,
 };
 
-int rt5663_set_jack_detect(struct snd_soc_component *component,
-	struct snd_soc_jack *hs_jack);
 int rt5663_sel_asrc_clk_src(struct snd_soc_component *component,
 	unsigned int filter_mask, unsigned int clk_src);
 
diff --git a/sound/soc/codecs/rt5668.c b/sound/soc/codecs/rt5668.c
new file mode 100644
index 0000000..3c19d03
--- /dev/null
+++ b/sound/soc/codecs/rt5668.c
@@ -0,0 +1,2639 @@
+/*
+ * rt5668.c  --  RT5668B ALSA SoC audio component driver
+ *
+ * Copyright 2018 Realtek Semiconductor Corp.
+ * Author: Bard Liao <bardliao@realtek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/acpi.h>
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
+#include <linux/regulator/consumer.h>
+#include <linux/mutex.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/jack.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+#include <sound/rt5668.h>
+
+#include "rl6231.h"
+#include "rt5668.h"
+
+#define RT5668_NUM_SUPPLIES 3
+
+static const char *rt5668_supply_names[RT5668_NUM_SUPPLIES] = {
+	"AVDD",
+	"MICVDD",
+	"VBAT",
+};
+
+struct rt5668_priv {
+	struct snd_soc_component *component;
+	struct rt5668_platform_data pdata;
+	struct regmap *regmap;
+	struct snd_soc_jack *hs_jack;
+	struct regulator_bulk_data supplies[RT5668_NUM_SUPPLIES];
+	struct delayed_work jack_detect_work;
+	struct delayed_work jd_check_work;
+	struct mutex calibrate_mutex;
+
+	int sysclk;
+	int sysclk_src;
+	int lrck[RT5668_AIFS];
+	int bclk[RT5668_AIFS];
+	int master[RT5668_AIFS];
+
+	int pll_src;
+	int pll_in;
+	int pll_out;
+
+	int jack_type;
+};
+
+static const struct reg_default rt5668_reg[] = {
+	{0x0002, 0x8080},
+	{0x0003, 0x8000},
+	{0x0005, 0x0000},
+	{0x0006, 0x0000},
+	{0x0008, 0x800f},
+	{0x000b, 0x0000},
+	{0x0010, 0x4040},
+	{0x0011, 0x0000},
+	{0x0012, 0x1404},
+	{0x0013, 0x1000},
+	{0x0014, 0xa00a},
+	{0x0015, 0x0404},
+	{0x0016, 0x0404},
+	{0x0019, 0xafaf},
+	{0x001c, 0x2f2f},
+	{0x001f, 0x0000},
+	{0x0022, 0x5757},
+	{0x0023, 0x0039},
+	{0x0024, 0x000b},
+	{0x0026, 0xc0c4},
+	{0x0029, 0x8080},
+	{0x002a, 0xa0a0},
+	{0x002b, 0x0300},
+	{0x0030, 0x0000},
+	{0x003c, 0x0080},
+	{0x0044, 0x0c0c},
+	{0x0049, 0x0000},
+	{0x0061, 0x0000},
+	{0x0062, 0x0000},
+	{0x0063, 0x003f},
+	{0x0064, 0x0000},
+	{0x0065, 0x0000},
+	{0x0066, 0x0030},
+	{0x0067, 0x0000},
+	{0x006b, 0x0000},
+	{0x006c, 0x0000},
+	{0x006d, 0x2200},
+	{0x006e, 0x0a10},
+	{0x0070, 0x8000},
+	{0x0071, 0x8000},
+	{0x0073, 0x0000},
+	{0x0074, 0x0000},
+	{0x0075, 0x0002},
+	{0x0076, 0x0001},
+	{0x0079, 0x0000},
+	{0x007a, 0x0000},
+	{0x007b, 0x0000},
+	{0x007c, 0x0100},
+	{0x007e, 0x0000},
+	{0x0080, 0x0000},
+	{0x0081, 0x0000},
+	{0x0082, 0x0000},
+	{0x0083, 0x0000},
+	{0x0084, 0x0000},
+	{0x0085, 0x0000},
+	{0x0086, 0x0005},
+	{0x0087, 0x0000},
+	{0x0088, 0x0000},
+	{0x008c, 0x0003},
+	{0x008d, 0x0000},
+	{0x008e, 0x0060},
+	{0x008f, 0x1000},
+	{0x0091, 0x0c26},
+	{0x0092, 0x0073},
+	{0x0093, 0x0000},
+	{0x0094, 0x0080},
+	{0x0098, 0x0000},
+	{0x009a, 0x0000},
+	{0x009b, 0x0000},
+	{0x009c, 0x0000},
+	{0x009d, 0x0000},
+	{0x009e, 0x100c},
+	{0x009f, 0x0000},
+	{0x00a0, 0x0000},
+	{0x00a3, 0x0002},
+	{0x00a4, 0x0001},
+	{0x00ae, 0x2040},
+	{0x00af, 0x0000},
+	{0x00b6, 0x0000},
+	{0x00b7, 0x0000},
+	{0x00b8, 0x0000},
+	{0x00b9, 0x0002},
+	{0x00be, 0x0000},
+	{0x00c0, 0x0160},
+	{0x00c1, 0x82a0},
+	{0x00c2, 0x0000},
+	{0x00d0, 0x0000},
+	{0x00d1, 0x2244},
+	{0x00d2, 0x3300},
+	{0x00d3, 0x2200},
+	{0x00d4, 0x0000},
+	{0x00d9, 0x0009},
+	{0x00da, 0x0000},
+	{0x00db, 0x0000},
+	{0x00dc, 0x00c0},
+	{0x00dd, 0x2220},
+	{0x00de, 0x3131},
+	{0x00df, 0x3131},
+	{0x00e0, 0x3131},
+	{0x00e2, 0x0000},
+	{0x00e3, 0x4000},
+	{0x00e4, 0x0aa0},
+	{0x00e5, 0x3131},
+	{0x00e6, 0x3131},
+	{0x00e7, 0x3131},
+	{0x00e8, 0x3131},
+	{0x00ea, 0xb320},
+	{0x00eb, 0x0000},
+	{0x00f0, 0x0000},
+	{0x00f1, 0x00d0},
+	{0x00f2, 0x00d0},
+	{0x00f6, 0x0000},
+	{0x00fa, 0x0000},
+	{0x00fb, 0x0000},
+	{0x00fc, 0x0000},
+	{0x00fd, 0x0000},
+	{0x00fe, 0x10ec},
+	{0x00ff, 0x6530},
+	{0x0100, 0xa0a0},
+	{0x010b, 0x0000},
+	{0x010c, 0xae00},
+	{0x010d, 0xaaa0},
+	{0x010e, 0x8aa2},
+	{0x010f, 0x02a2},
+	{0x0110, 0xc000},
+	{0x0111, 0x04a2},
+	{0x0112, 0x2800},
+	{0x0113, 0x0000},
+	{0x0117, 0x0100},
+	{0x0125, 0x0410},
+	{0x0132, 0x6026},
+	{0x0136, 0x5555},
+	{0x0138, 0x3700},
+	{0x013a, 0x2000},
+	{0x013b, 0x2000},
+	{0x013c, 0x2005},
+	{0x013f, 0x0000},
+	{0x0142, 0x0000},
+	{0x0145, 0x0002},
+	{0x0146, 0x0000},
+	{0x0147, 0x0000},
+	{0x0148, 0x0000},
+	{0x0149, 0x0000},
+	{0x0150, 0x79a1},
+	{0x0151, 0x0000},
+	{0x0160, 0x4ec0},
+	{0x0161, 0x0080},
+	{0x0162, 0x0200},
+	{0x0163, 0x0800},
+	{0x0164, 0x0000},
+	{0x0165, 0x0000},
+	{0x0166, 0x0000},
+	{0x0167, 0x000f},
+	{0x0168, 0x000f},
+	{0x0169, 0x0021},
+	{0x0190, 0x413d},
+	{0x0194, 0x0000},
+	{0x0195, 0x0000},
+	{0x0197, 0x0022},
+	{0x0198, 0x0000},
+	{0x0199, 0x0000},
+	{0x01af, 0x0000},
+	{0x01b0, 0x0400},
+	{0x01b1, 0x0000},
+	{0x01b2, 0x0000},
+	{0x01b3, 0x0000},
+	{0x01b4, 0x0000},
+	{0x01b5, 0x0000},
+	{0x01b6, 0x01c3},
+	{0x01b7, 0x02a0},
+	{0x01b8, 0x03e9},
+	{0x01b9, 0x1389},
+	{0x01ba, 0xc351},
+	{0x01bb, 0x0009},
+	{0x01bc, 0x0018},
+	{0x01bd, 0x002a},
+	{0x01be, 0x004c},
+	{0x01bf, 0x0097},
+	{0x01c0, 0x433d},
+	{0x01c1, 0x2800},
+	{0x01c2, 0x0000},
+	{0x01c3, 0x0000},
+	{0x01c4, 0x0000},
+	{0x01c5, 0x0000},
+	{0x01c6, 0x0000},
+	{0x01c7, 0x0000},
+	{0x01c8, 0x40af},
+	{0x01c9, 0x0702},
+	{0x01ca, 0x0000},
+	{0x01cb, 0x0000},
+	{0x01cc, 0x5757},
+	{0x01cd, 0x5757},
+	{0x01ce, 0x5757},
+	{0x01cf, 0x5757},
+	{0x01d0, 0x5757},
+	{0x01d1, 0x5757},
+	{0x01d2, 0x5757},
+	{0x01d3, 0x5757},
+	{0x01d4, 0x5757},
+	{0x01d5, 0x5757},
+	{0x01d6, 0x0000},
+	{0x01d7, 0x0008},
+	{0x01d8, 0x0029},
+	{0x01d9, 0x3333},
+	{0x01da, 0x0000},
+	{0x01db, 0x0004},
+	{0x01dc, 0x0000},
+	{0x01de, 0x7c00},
+	{0x01df, 0x0320},
+	{0x01e0, 0x06a1},
+	{0x01e1, 0x0000},
+	{0x01e2, 0x0000},
+	{0x01e3, 0x0000},
+	{0x01e4, 0x0000},
+	{0x01e6, 0x0001},
+	{0x01e7, 0x0000},
+	{0x01e8, 0x0000},
+	{0x01ea, 0x0000},
+	{0x01eb, 0x0000},
+	{0x01ec, 0x0000},
+	{0x01ed, 0x0000},
+	{0x01ee, 0x0000},
+	{0x01ef, 0x0000},
+	{0x01f0, 0x0000},
+	{0x01f1, 0x0000},
+	{0x01f2, 0x0000},
+	{0x01f3, 0x0000},
+	{0x01f4, 0x0000},
+	{0x0210, 0x6297},
+	{0x0211, 0xa005},
+	{0x0212, 0x824c},
+	{0x0213, 0xf7ff},
+	{0x0214, 0xf24c},
+	{0x0215, 0x0102},
+	{0x0216, 0x00a3},
+	{0x0217, 0x0048},
+	{0x0218, 0xa2c0},
+	{0x0219, 0x0400},
+	{0x021a, 0x00c8},
+	{0x021b, 0x00c0},
+	{0x021c, 0x0000},
+	{0x0250, 0x4500},
+	{0x0251, 0x40b3},
+	{0x0252, 0x0000},
+	{0x0253, 0x0000},
+	{0x0254, 0x0000},
+	{0x0255, 0x0000},
+	{0x0256, 0x0000},
+	{0x0257, 0x0000},
+	{0x0258, 0x0000},
+	{0x0259, 0x0000},
+	{0x025a, 0x0005},
+	{0x0270, 0x0000},
+	{0x02ff, 0x0110},
+	{0x0300, 0x001f},
+	{0x0301, 0x032c},
+	{0x0302, 0x5f21},
+	{0x0303, 0x4000},
+	{0x0304, 0x4000},
+	{0x0305, 0x06d5},
+	{0x0306, 0x8000},
+	{0x0307, 0x0700},
+	{0x0310, 0x4560},
+	{0x0311, 0xa4a8},
+	{0x0312, 0x7418},
+	{0x0313, 0x0000},
+	{0x0314, 0x0006},
+	{0x0315, 0xffff},
+	{0x0316, 0xc400},
+	{0x0317, 0x0000},
+	{0x03c0, 0x7e00},
+	{0x03c1, 0x8000},
+	{0x03c2, 0x8000},
+	{0x03c3, 0x8000},
+	{0x03c4, 0x8000},
+	{0x03c5, 0x8000},
+	{0x03c6, 0x8000},
+	{0x03c7, 0x8000},
+	{0x03c8, 0x8000},
+	{0x03c9, 0x8000},
+	{0x03ca, 0x8000},
+	{0x03cb, 0x8000},
+	{0x03cc, 0x8000},
+	{0x03d0, 0x0000},
+	{0x03d1, 0x0000},
+	{0x03d2, 0x0000},
+	{0x03d3, 0x0000},
+	{0x03d4, 0x2000},
+	{0x03d5, 0x2000},
+	{0x03d6, 0x0000},
+	{0x03d7, 0x0000},
+	{0x03d8, 0x2000},
+	{0x03d9, 0x2000},
+	{0x03da, 0x2000},
+	{0x03db, 0x2000},
+	{0x03dc, 0x0000},
+	{0x03dd, 0x0000},
+	{0x03de, 0x0000},
+	{0x03df, 0x2000},
+	{0x03e0, 0x0000},
+	{0x03e1, 0x0000},
+	{0x03e2, 0x0000},
+	{0x03e3, 0x0000},
+	{0x03e4, 0x0000},
+	{0x03e5, 0x0000},
+	{0x03e6, 0x0000},
+	{0x03e7, 0x0000},
+	{0x03e8, 0x0000},
+	{0x03e9, 0x0000},
+	{0x03ea, 0x0000},
+	{0x03eb, 0x0000},
+	{0x03ec, 0x0000},
+	{0x03ed, 0x0000},
+	{0x03ee, 0x0000},
+	{0x03ef, 0x0000},
+	{0x03f0, 0x0800},
+	{0x03f1, 0x0800},
+	{0x03f2, 0x0800},
+	{0x03f3, 0x0800},
+};
+
+static bool rt5668_volatile_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case RT5668_RESET:
+	case RT5668_CBJ_CTRL_2:
+	case RT5668_INT_ST_1:
+	case RT5668_4BTN_IL_CMD_1:
+	case RT5668_AJD1_CTRL:
+	case RT5668_HP_CALIB_CTRL_1:
+	case RT5668_DEVICE_ID:
+	case RT5668_I2C_MODE:
+	case RT5668_HP_CALIB_CTRL_10:
+	case RT5668_EFUSE_CTRL_2:
+	case RT5668_JD_TOP_VC_VTRL:
+	case RT5668_HP_IMP_SENS_CTRL_19:
+	case RT5668_IL_CMD_1:
+	case RT5668_SAR_IL_CMD_2:
+	case RT5668_SAR_IL_CMD_4:
+	case RT5668_SAR_IL_CMD_10:
+	case RT5668_SAR_IL_CMD_11:
+	case RT5668_EFUSE_CTRL_6...RT5668_EFUSE_CTRL_11:
+	case RT5668_HP_CALIB_STA_1...RT5668_HP_CALIB_STA_11:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool rt5668_readable_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case RT5668_RESET:
+	case RT5668_VERSION_ID:
+	case RT5668_VENDOR_ID:
+	case RT5668_DEVICE_ID:
+	case RT5668_HP_CTRL_1:
+	case RT5668_HP_CTRL_2:
+	case RT5668_HPL_GAIN:
+	case RT5668_HPR_GAIN:
+	case RT5668_I2C_CTRL:
+	case RT5668_CBJ_BST_CTRL:
+	case RT5668_CBJ_CTRL_1:
+	case RT5668_CBJ_CTRL_2:
+	case RT5668_CBJ_CTRL_3:
+	case RT5668_CBJ_CTRL_4:
+	case RT5668_CBJ_CTRL_5:
+	case RT5668_CBJ_CTRL_6:
+	case RT5668_CBJ_CTRL_7:
+	case RT5668_DAC1_DIG_VOL:
+	case RT5668_STO1_ADC_DIG_VOL:
+	case RT5668_STO1_ADC_BOOST:
+	case RT5668_HP_IMP_GAIN_1:
+	case RT5668_HP_IMP_GAIN_2:
+	case RT5668_SIDETONE_CTRL:
+	case RT5668_STO1_ADC_MIXER:
+	case RT5668_AD_DA_MIXER:
+	case RT5668_STO1_DAC_MIXER:
+	case RT5668_A_DAC1_MUX:
+	case RT5668_DIG_INF2_DATA:
+	case RT5668_REC_MIXER:
+	case RT5668_CAL_REC:
+	case RT5668_ALC_BACK_GAIN:
+	case RT5668_PWR_DIG_1:
+	case RT5668_PWR_DIG_2:
+	case RT5668_PWR_ANLG_1:
+	case RT5668_PWR_ANLG_2:
+	case RT5668_PWR_ANLG_3:
+	case RT5668_PWR_MIXER:
+	case RT5668_PWR_VOL:
+	case RT5668_CLK_DET:
+	case RT5668_RESET_LPF_CTRL:
+	case RT5668_RESET_HPF_CTRL:
+	case RT5668_DMIC_CTRL_1:
+	case RT5668_I2S1_SDP:
+	case RT5668_I2S2_SDP:
+	case RT5668_ADDA_CLK_1:
+	case RT5668_ADDA_CLK_2:
+	case RT5668_I2S1_F_DIV_CTRL_1:
+	case RT5668_I2S1_F_DIV_CTRL_2:
+	case RT5668_TDM_CTRL:
+	case RT5668_TDM_ADDA_CTRL_1:
+	case RT5668_TDM_ADDA_CTRL_2:
+	case RT5668_DATA_SEL_CTRL_1:
+	case RT5668_TDM_TCON_CTRL:
+	case RT5668_GLB_CLK:
+	case RT5668_PLL_CTRL_1:
+	case RT5668_PLL_CTRL_2:
+	case RT5668_PLL_TRACK_1:
+	case RT5668_PLL_TRACK_2:
+	case RT5668_PLL_TRACK_3:
+	case RT5668_PLL_TRACK_4:
+	case RT5668_PLL_TRACK_5:
+	case RT5668_PLL_TRACK_6:
+	case RT5668_PLL_TRACK_11:
+	case RT5668_SDW_REF_CLK:
+	case RT5668_DEPOP_1:
+	case RT5668_DEPOP_2:
+	case RT5668_HP_CHARGE_PUMP_1:
+	case RT5668_HP_CHARGE_PUMP_2:
+	case RT5668_MICBIAS_1:
+	case RT5668_MICBIAS_2:
+	case RT5668_PLL_TRACK_12:
+	case RT5668_PLL_TRACK_14:
+	case RT5668_PLL2_CTRL_1:
+	case RT5668_PLL2_CTRL_2:
+	case RT5668_PLL2_CTRL_3:
+	case RT5668_PLL2_CTRL_4:
+	case RT5668_RC_CLK_CTRL:
+	case RT5668_I2S_M_CLK_CTRL_1:
+	case RT5668_I2S2_F_DIV_CTRL_1:
+	case RT5668_I2S2_F_DIV_CTRL_2:
+	case RT5668_EQ_CTRL_1:
+	case RT5668_EQ_CTRL_2:
+	case RT5668_IRQ_CTRL_1:
+	case RT5668_IRQ_CTRL_2:
+	case RT5668_IRQ_CTRL_3:
+	case RT5668_IRQ_CTRL_4:
+	case RT5668_INT_ST_1:
+	case RT5668_GPIO_CTRL_1:
+	case RT5668_GPIO_CTRL_2:
+	case RT5668_GPIO_CTRL_3:
+	case RT5668_HP_AMP_DET_CTRL_1:
+	case RT5668_HP_AMP_DET_CTRL_2:
+	case RT5668_MID_HP_AMP_DET:
+	case RT5668_LOW_HP_AMP_DET:
+	case RT5668_DELAY_BUF_CTRL:
+	case RT5668_SV_ZCD_1:
+	case RT5668_SV_ZCD_2:
+	case RT5668_IL_CMD_1:
+	case RT5668_IL_CMD_2:
+	case RT5668_IL_CMD_3:
+	case RT5668_IL_CMD_4:
+	case RT5668_IL_CMD_5:
+	case RT5668_IL_CMD_6:
+	case RT5668_4BTN_IL_CMD_1:
+	case RT5668_4BTN_IL_CMD_2:
+	case RT5668_4BTN_IL_CMD_3:
+	case RT5668_4BTN_IL_CMD_4:
+	case RT5668_4BTN_IL_CMD_5:
+	case RT5668_4BTN_IL_CMD_6:
+	case RT5668_4BTN_IL_CMD_7:
+	case RT5668_ADC_STO1_HP_CTRL_1:
+	case RT5668_ADC_STO1_HP_CTRL_2:
+	case RT5668_AJD1_CTRL:
+	case RT5668_JD1_THD:
+	case RT5668_JD2_THD:
+	case RT5668_JD_CTRL_1:
+	case RT5668_DUMMY_1:
+	case RT5668_DUMMY_2:
+	case RT5668_DUMMY_3:
+	case RT5668_DAC_ADC_DIG_VOL1:
+	case RT5668_BIAS_CUR_CTRL_2:
+	case RT5668_BIAS_CUR_CTRL_3:
+	case RT5668_BIAS_CUR_CTRL_4:
+	case RT5668_BIAS_CUR_CTRL_5:
+	case RT5668_BIAS_CUR_CTRL_6:
+	case RT5668_BIAS_CUR_CTRL_7:
+	case RT5668_BIAS_CUR_CTRL_8:
+	case RT5668_BIAS_CUR_CTRL_9:
+	case RT5668_BIAS_CUR_CTRL_10:
+	case RT5668_VREF_REC_OP_FB_CAP_CTRL:
+	case RT5668_CHARGE_PUMP_1:
+	case RT5668_DIG_IN_CTRL_1:
+	case RT5668_PAD_DRIVING_CTRL:
+	case RT5668_SOFT_RAMP_DEPOP:
+	case RT5668_CHOP_DAC:
+	case RT5668_CHOP_ADC:
+	case RT5668_CALIB_ADC_CTRL:
+	case RT5668_VOL_TEST:
+	case RT5668_SPKVDD_DET_STA:
+	case RT5668_TEST_MODE_CTRL_1:
+	case RT5668_TEST_MODE_CTRL_2:
+	case RT5668_TEST_MODE_CTRL_3:
+	case RT5668_TEST_MODE_CTRL_4:
+	case RT5668_TEST_MODE_CTRL_5:
+	case RT5668_PLL1_INTERNAL:
+	case RT5668_PLL2_INTERNAL:
+	case RT5668_STO_NG2_CTRL_1:
+	case RT5668_STO_NG2_CTRL_2:
+	case RT5668_STO_NG2_CTRL_3:
+	case RT5668_STO_NG2_CTRL_4:
+	case RT5668_STO_NG2_CTRL_5:
+	case RT5668_STO_NG2_CTRL_6:
+	case RT5668_STO_NG2_CTRL_7:
+	case RT5668_STO_NG2_CTRL_8:
+	case RT5668_STO_NG2_CTRL_9:
+	case RT5668_STO_NG2_CTRL_10:
+	case RT5668_STO1_DAC_SIL_DET:
+	case RT5668_SIL_PSV_CTRL1:
+	case RT5668_SIL_PSV_CTRL2:
+	case RT5668_SIL_PSV_CTRL3:
+	case RT5668_SIL_PSV_CTRL4:
+	case RT5668_SIL_PSV_CTRL5:
+	case RT5668_HP_IMP_SENS_CTRL_01:
+	case RT5668_HP_IMP_SENS_CTRL_02:
+	case RT5668_HP_IMP_SENS_CTRL_03:
+	case RT5668_HP_IMP_SENS_CTRL_04:
+	case RT5668_HP_IMP_SENS_CTRL_05:
+	case RT5668_HP_IMP_SENS_CTRL_06:
+	case RT5668_HP_IMP_SENS_CTRL_07:
+	case RT5668_HP_IMP_SENS_CTRL_08:
+	case RT5668_HP_IMP_SENS_CTRL_09:
+	case RT5668_HP_IMP_SENS_CTRL_10:
+	case RT5668_HP_IMP_SENS_CTRL_11:
+	case RT5668_HP_IMP_SENS_CTRL_12:
+	case RT5668_HP_IMP_SENS_CTRL_13:
+	case RT5668_HP_IMP_SENS_CTRL_14:
+	case RT5668_HP_IMP_SENS_CTRL_15:
+	case RT5668_HP_IMP_SENS_CTRL_16:
+	case RT5668_HP_IMP_SENS_CTRL_17:
+	case RT5668_HP_IMP_SENS_CTRL_18:
+	case RT5668_HP_IMP_SENS_CTRL_19:
+	case RT5668_HP_IMP_SENS_CTRL_20:
+	case RT5668_HP_IMP_SENS_CTRL_21:
+	case RT5668_HP_IMP_SENS_CTRL_22:
+	case RT5668_HP_IMP_SENS_CTRL_23:
+	case RT5668_HP_IMP_SENS_CTRL_24:
+	case RT5668_HP_IMP_SENS_CTRL_25:
+	case RT5668_HP_IMP_SENS_CTRL_26:
+	case RT5668_HP_IMP_SENS_CTRL_27:
+	case RT5668_HP_IMP_SENS_CTRL_28:
+	case RT5668_HP_IMP_SENS_CTRL_29:
+	case RT5668_HP_IMP_SENS_CTRL_30:
+	case RT5668_HP_IMP_SENS_CTRL_31:
+	case RT5668_HP_IMP_SENS_CTRL_32:
+	case RT5668_HP_IMP_SENS_CTRL_33:
+	case RT5668_HP_IMP_SENS_CTRL_34:
+	case RT5668_HP_IMP_SENS_CTRL_35:
+	case RT5668_HP_IMP_SENS_CTRL_36:
+	case RT5668_HP_IMP_SENS_CTRL_37:
+	case RT5668_HP_IMP_SENS_CTRL_38:
+	case RT5668_HP_IMP_SENS_CTRL_39:
+	case RT5668_HP_IMP_SENS_CTRL_40:
+	case RT5668_HP_IMP_SENS_CTRL_41:
+	case RT5668_HP_IMP_SENS_CTRL_42:
+	case RT5668_HP_IMP_SENS_CTRL_43:
+	case RT5668_HP_LOGIC_CTRL_1:
+	case RT5668_HP_LOGIC_CTRL_2:
+	case RT5668_HP_LOGIC_CTRL_3:
+	case RT5668_HP_CALIB_CTRL_1:
+	case RT5668_HP_CALIB_CTRL_2:
+	case RT5668_HP_CALIB_CTRL_3:
+	case RT5668_HP_CALIB_CTRL_4:
+	case RT5668_HP_CALIB_CTRL_5:
+	case RT5668_HP_CALIB_CTRL_6:
+	case RT5668_HP_CALIB_CTRL_7:
+	case RT5668_HP_CALIB_CTRL_9:
+	case RT5668_HP_CALIB_CTRL_10:
+	case RT5668_HP_CALIB_CTRL_11:
+	case RT5668_HP_CALIB_STA_1:
+	case RT5668_HP_CALIB_STA_2:
+	case RT5668_HP_CALIB_STA_3:
+	case RT5668_HP_CALIB_STA_4:
+	case RT5668_HP_CALIB_STA_5:
+	case RT5668_HP_CALIB_STA_6:
+	case RT5668_HP_CALIB_STA_7:
+	case RT5668_HP_CALIB_STA_8:
+	case RT5668_HP_CALIB_STA_9:
+	case RT5668_HP_CALIB_STA_10:
+	case RT5668_HP_CALIB_STA_11:
+	case RT5668_SAR_IL_CMD_1:
+	case RT5668_SAR_IL_CMD_2:
+	case RT5668_SAR_IL_CMD_3:
+	case RT5668_SAR_IL_CMD_4:
+	case RT5668_SAR_IL_CMD_5:
+	case RT5668_SAR_IL_CMD_6:
+	case RT5668_SAR_IL_CMD_7:
+	case RT5668_SAR_IL_CMD_8:
+	case RT5668_SAR_IL_CMD_9:
+	case RT5668_SAR_IL_CMD_10:
+	case RT5668_SAR_IL_CMD_11:
+	case RT5668_SAR_IL_CMD_12:
+	case RT5668_SAR_IL_CMD_13:
+	case RT5668_EFUSE_CTRL_1:
+	case RT5668_EFUSE_CTRL_2:
+	case RT5668_EFUSE_CTRL_3:
+	case RT5668_EFUSE_CTRL_4:
+	case RT5668_EFUSE_CTRL_5:
+	case RT5668_EFUSE_CTRL_6:
+	case RT5668_EFUSE_CTRL_7:
+	case RT5668_EFUSE_CTRL_8:
+	case RT5668_EFUSE_CTRL_9:
+	case RT5668_EFUSE_CTRL_10:
+	case RT5668_EFUSE_CTRL_11:
+	case RT5668_JD_TOP_VC_VTRL:
+	case RT5668_DRC1_CTRL_0:
+	case RT5668_DRC1_CTRL_1:
+	case RT5668_DRC1_CTRL_2:
+	case RT5668_DRC1_CTRL_3:
+	case RT5668_DRC1_CTRL_4:
+	case RT5668_DRC1_CTRL_5:
+	case RT5668_DRC1_CTRL_6:
+	case RT5668_DRC1_HARD_LMT_CTRL_1:
+	case RT5668_DRC1_HARD_LMT_CTRL_2:
+	case RT5668_DRC1_PRIV_1:
+	case RT5668_DRC1_PRIV_2:
+	case RT5668_DRC1_PRIV_3:
+	case RT5668_DRC1_PRIV_4:
+	case RT5668_DRC1_PRIV_5:
+	case RT5668_DRC1_PRIV_6:
+	case RT5668_DRC1_PRIV_7:
+	case RT5668_DRC1_PRIV_8:
+	case RT5668_EQ_AUTO_RCV_CTRL1:
+	case RT5668_EQ_AUTO_RCV_CTRL2:
+	case RT5668_EQ_AUTO_RCV_CTRL3:
+	case RT5668_EQ_AUTO_RCV_CTRL4:
+	case RT5668_EQ_AUTO_RCV_CTRL5:
+	case RT5668_EQ_AUTO_RCV_CTRL6:
+	case RT5668_EQ_AUTO_RCV_CTRL7:
+	case RT5668_EQ_AUTO_RCV_CTRL8:
+	case RT5668_EQ_AUTO_RCV_CTRL9:
+	case RT5668_EQ_AUTO_RCV_CTRL10:
+	case RT5668_EQ_AUTO_RCV_CTRL11:
+	case RT5668_EQ_AUTO_RCV_CTRL12:
+	case RT5668_EQ_AUTO_RCV_CTRL13:
+	case RT5668_ADC_L_EQ_LPF1_A1:
+	case RT5668_R_EQ_LPF1_A1:
+	case RT5668_L_EQ_LPF1_H0:
+	case RT5668_R_EQ_LPF1_H0:
+	case RT5668_L_EQ_BPF1_A1:
+	case RT5668_R_EQ_BPF1_A1:
+	case RT5668_L_EQ_BPF1_A2:
+	case RT5668_R_EQ_BPF1_A2:
+	case RT5668_L_EQ_BPF1_H0:
+	case RT5668_R_EQ_BPF1_H0:
+	case RT5668_L_EQ_BPF2_A1:
+	case RT5668_R_EQ_BPF2_A1:
+	case RT5668_L_EQ_BPF2_A2:
+	case RT5668_R_EQ_BPF2_A2:
+	case RT5668_L_EQ_BPF2_H0:
+	case RT5668_R_EQ_BPF2_H0:
+	case RT5668_L_EQ_BPF3_A1:
+	case RT5668_R_EQ_BPF3_A1:
+	case RT5668_L_EQ_BPF3_A2:
+	case RT5668_R_EQ_BPF3_A2:
+	case RT5668_L_EQ_BPF3_H0:
+	case RT5668_R_EQ_BPF3_H0:
+	case RT5668_L_EQ_BPF4_A1:
+	case RT5668_R_EQ_BPF4_A1:
+	case RT5668_L_EQ_BPF4_A2:
+	case RT5668_R_EQ_BPF4_A2:
+	case RT5668_L_EQ_BPF4_H0:
+	case RT5668_R_EQ_BPF4_H0:
+	case RT5668_L_EQ_HPF1_A1:
+	case RT5668_R_EQ_HPF1_A1:
+	case RT5668_L_EQ_HPF1_H0:
+	case RT5668_R_EQ_HPF1_H0:
+	case RT5668_L_EQ_PRE_VOL:
+	case RT5668_R_EQ_PRE_VOL:
+	case RT5668_L_EQ_POST_VOL:
+	case RT5668_R_EQ_POST_VOL:
+	case RT5668_I2C_MODE:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const DECLARE_TLV_DB_SCALE(hp_vol_tlv, -2250, 150, 0);
+static const DECLARE_TLV_DB_SCALE(dac_vol_tlv, -65625, 375, 0);
+static const DECLARE_TLV_DB_SCALE(adc_vol_tlv, -17625, 375, 0);
+static const DECLARE_TLV_DB_SCALE(adc_bst_tlv, 0, 1200, 0);
+
+/* {0, +20, +24, +30, +35, +40, +44, +50, +52} dB */
+static const DECLARE_TLV_DB_RANGE(bst_tlv,
+	0, 0, TLV_DB_SCALE_ITEM(0, 0, 0),
+	1, 1, TLV_DB_SCALE_ITEM(2000, 0, 0),
+	2, 2, TLV_DB_SCALE_ITEM(2400, 0, 0),
+	3, 5, TLV_DB_SCALE_ITEM(3000, 500, 0),
+	6, 6, TLV_DB_SCALE_ITEM(4400, 0, 0),
+	7, 7, TLV_DB_SCALE_ITEM(5000, 0, 0),
+	8, 8, TLV_DB_SCALE_ITEM(5200, 0, 0)
+);
+
+/* Interface data select */
+static const char * const rt5668_data_select[] = {
+	"L/R", "R/L", "L/L", "R/R"
+};
+
+static SOC_ENUM_SINGLE_DECL(rt5668_if2_adc_enum,
+	RT5668_DIG_INF2_DATA, RT5668_IF2_ADC_SEL_SFT, rt5668_data_select);
+
+static SOC_ENUM_SINGLE_DECL(rt5668_if1_01_adc_enum,
+	RT5668_TDM_ADDA_CTRL_1, RT5668_IF1_ADC1_SEL_SFT, rt5668_data_select);
+
+static SOC_ENUM_SINGLE_DECL(rt5668_if1_23_adc_enum,
+	RT5668_TDM_ADDA_CTRL_1, RT5668_IF1_ADC2_SEL_SFT, rt5668_data_select);
+
+static SOC_ENUM_SINGLE_DECL(rt5668_if1_45_adc_enum,
+	RT5668_TDM_ADDA_CTRL_1, RT5668_IF1_ADC3_SEL_SFT, rt5668_data_select);
+
+static SOC_ENUM_SINGLE_DECL(rt5668_if1_67_adc_enum,
+	RT5668_TDM_ADDA_CTRL_1, RT5668_IF1_ADC4_SEL_SFT, rt5668_data_select);
+
+static const struct snd_kcontrol_new rt5668_if2_adc_swap_mux =
+	SOC_DAPM_ENUM("IF2 ADC Swap Mux", rt5668_if2_adc_enum);
+
+static const struct snd_kcontrol_new rt5668_if1_01_adc_swap_mux =
+	SOC_DAPM_ENUM("IF1 01 ADC Swap Mux", rt5668_if1_01_adc_enum);
+
+static const struct snd_kcontrol_new rt5668_if1_23_adc_swap_mux =
+	SOC_DAPM_ENUM("IF1 23 ADC Swap Mux", rt5668_if1_23_adc_enum);
+
+static const struct snd_kcontrol_new rt5668_if1_45_adc_swap_mux =
+	SOC_DAPM_ENUM("IF1 45 ADC Swap Mux", rt5668_if1_45_adc_enum);
+
+static const struct snd_kcontrol_new rt5668_if1_67_adc_swap_mux =
+	SOC_DAPM_ENUM("IF1 67 ADC Swap Mux", rt5668_if1_67_adc_enum);
+
+static void rt5668_reset(struct regmap *regmap)
+{
+	regmap_write(regmap, RT5668_RESET, 0);
+	regmap_write(regmap, RT5668_I2C_MODE, 1);
+}
+/**
+ * rt5668_sel_asrc_clk_src - select ASRC clock source for a set of filters
+ * @component: SoC audio component device.
+ * @filter_mask: mask of filters.
+ * @clk_src: clock source
+ *
+ * The ASRC function is for asynchronous MCLK and LRCK. Also, since RT5668 can
+ * only support standard 32fs or 64fs i2s format, ASRC should be enabled to
+ * support special i2s clock format such as Intel's 100fs(100 * sampling rate).
+ * ASRC function will track i2s clock and generate a corresponding system clock
+ * for codec. This function provides an API to select the clock source for a
+ * set of filters specified by the mask. And the component driver will turn on
+ * ASRC for these filters if ASRC is selected as their clock source.
+ */
+int rt5668_sel_asrc_clk_src(struct snd_soc_component *component,
+		unsigned int filter_mask, unsigned int clk_src)
+{
+
+	switch (clk_src) {
+	case RT5668_CLK_SEL_SYS:
+	case RT5668_CLK_SEL_I2S1_ASRC:
+	case RT5668_CLK_SEL_I2S2_ASRC:
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (filter_mask & RT5668_DA_STEREO1_FILTER) {
+		snd_soc_component_update_bits(component, RT5668_PLL_TRACK_2,
+			RT5668_FILTER_CLK_SEL_MASK,
+			clk_src << RT5668_FILTER_CLK_SEL_SFT);
+	}
+
+	if (filter_mask & RT5668_AD_STEREO1_FILTER) {
+		snd_soc_component_update_bits(component, RT5668_PLL_TRACK_3,
+			RT5668_FILTER_CLK_SEL_MASK,
+			clk_src << RT5668_FILTER_CLK_SEL_SFT);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rt5668_sel_asrc_clk_src);
+
+static int rt5668_button_detect(struct snd_soc_component *component)
+{
+	int btn_type, val;
+
+	val = snd_soc_component_read32(component, RT5668_4BTN_IL_CMD_1);
+	btn_type = val & 0xfff0;
+	snd_soc_component_write(component, RT5668_4BTN_IL_CMD_1, val);
+	pr_debug("%s btn_type=%x\n", __func__, btn_type);
+
+	return btn_type;
+}
+
+static void rt5668_enable_push_button_irq(struct snd_soc_component *component,
+		bool enable)
+{
+	if (enable) {
+		snd_soc_component_update_bits(component, RT5668_SAR_IL_CMD_1,
+			RT5668_SAR_BUTT_DET_MASK, RT5668_SAR_BUTT_DET_EN);
+		snd_soc_component_update_bits(component, RT5668_SAR_IL_CMD_13,
+			RT5668_SAR_SOUR_MASK, RT5668_SAR_SOUR_BTN);
+		snd_soc_component_write(component, RT5668_IL_CMD_1, 0x0040);
+		snd_soc_component_update_bits(component, RT5668_4BTN_IL_CMD_2,
+			RT5668_4BTN_IL_MASK | RT5668_4BTN_IL_RST_MASK,
+			RT5668_4BTN_IL_EN | RT5668_4BTN_IL_NOR);
+		snd_soc_component_update_bits(component, RT5668_IRQ_CTRL_3,
+			RT5668_IL_IRQ_MASK, RT5668_IL_IRQ_EN);
+	} else {
+		snd_soc_component_update_bits(component, RT5668_IRQ_CTRL_3,
+			RT5668_IL_IRQ_MASK, RT5668_IL_IRQ_DIS);
+		snd_soc_component_update_bits(component, RT5668_SAR_IL_CMD_1,
+			RT5668_SAR_BUTT_DET_MASK, RT5668_SAR_BUTT_DET_DIS);
+		snd_soc_component_update_bits(component, RT5668_4BTN_IL_CMD_2,
+			RT5668_4BTN_IL_MASK, RT5668_4BTN_IL_DIS);
+		snd_soc_component_update_bits(component, RT5668_4BTN_IL_CMD_2,
+			RT5668_4BTN_IL_RST_MASK, RT5668_4BTN_IL_RST);
+		snd_soc_component_update_bits(component, RT5668_SAR_IL_CMD_13,
+			RT5668_SAR_SOUR_MASK, RT5668_SAR_SOUR_TYPE);
+	}
+}
+
+/**
+ * rt5668_headset_detect - Detect headset.
+ * @component: SoC audio component device.
+ * @jack_insert: Jack insert or not.
+ *
+ * Detect whether is headset or not when jack inserted.
+ *
+ * Returns detect status.
+ */
+static int rt5668_headset_detect(struct snd_soc_component *component,
+		int jack_insert)
+{
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+	struct snd_soc_dapm_context *dapm =
+		snd_soc_component_get_dapm(component);
+	unsigned int val, count;
+
+	if (jack_insert) {
+		snd_soc_dapm_force_enable_pin(dapm, "CBJ Power");
+		snd_soc_dapm_sync(dapm);
+		snd_soc_component_update_bits(component, RT5668_CBJ_CTRL_1,
+			RT5668_TRIG_JD_MASK, RT5668_TRIG_JD_HIGH);
+
+		count = 0;
+		val = snd_soc_component_read32(component, RT5668_CBJ_CTRL_2)
+			& RT5668_JACK_TYPE_MASK;
+		while (val == 0 && count < 50) {
+			usleep_range(10000, 15000);
+			val = snd_soc_component_read32(component,
+				RT5668_CBJ_CTRL_2) & RT5668_JACK_TYPE_MASK;
+			count++;
+		}
+
+		switch (val) {
+		case 0x1:
+		case 0x2:
+			rt5668->jack_type = SND_JACK_HEADSET;
+			rt5668_enable_push_button_irq(component, true);
+			break;
+		default:
+			rt5668->jack_type = SND_JACK_HEADPHONE;
+		}
+
+	} else {
+		rt5668_enable_push_button_irq(component, false);
+		snd_soc_component_update_bits(component, RT5668_CBJ_CTRL_1,
+			RT5668_TRIG_JD_MASK, RT5668_TRIG_JD_LOW);
+		snd_soc_dapm_disable_pin(dapm, "CBJ Power");
+		snd_soc_dapm_sync(dapm);
+
+		rt5668->jack_type = 0;
+	}
+
+	dev_dbg(component->dev, "jack_type = %d\n", rt5668->jack_type);
+	return rt5668->jack_type;
+}
+
+static irqreturn_t rt5668_irq(int irq, void *data)
+{
+	struct rt5668_priv *rt5668 = data;
+
+	mod_delayed_work(system_power_efficient_wq,
+			&rt5668->jack_detect_work, msecs_to_jiffies(250));
+
+	return IRQ_HANDLED;
+}
+
+static void rt5668_jd_check_handler(struct work_struct *work)
+{
+	struct rt5668_priv *rt5668 = container_of(work, struct rt5668_priv,
+		jd_check_work.work);
+
+	if (snd_soc_component_read32(rt5668->component, RT5668_AJD1_CTRL)
+		& RT5668_JDH_RS_MASK) {
+		/* jack out */
+		rt5668->jack_type = rt5668_headset_detect(rt5668->component, 0);
+
+		snd_soc_jack_report(rt5668->hs_jack, rt5668->jack_type,
+				SND_JACK_HEADSET |
+				SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+				SND_JACK_BTN_2 | SND_JACK_BTN_3);
+	} else {
+		schedule_delayed_work(&rt5668->jd_check_work, 500);
+	}
+}
+
+static int rt5668_set_jack_detect(struct snd_soc_component *component,
+	struct snd_soc_jack *hs_jack, void *data)
+{
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+
+	switch (rt5668->pdata.jd_src) {
+	case RT5668_JD1:
+		snd_soc_component_update_bits(component, RT5668_CBJ_CTRL_2,
+			RT5668_EXT_JD_SRC, RT5668_EXT_JD_SRC_MANUAL);
+		snd_soc_component_write(component, RT5668_CBJ_CTRL_1, 0xd002);
+		snd_soc_component_update_bits(component, RT5668_CBJ_CTRL_3,
+			RT5668_CBJ_IN_BUF_EN, RT5668_CBJ_IN_BUF_EN);
+		snd_soc_component_update_bits(component, RT5668_SAR_IL_CMD_1,
+			RT5668_SAR_POW_MASK, RT5668_SAR_POW_EN);
+		regmap_update_bits(rt5668->regmap, RT5668_GPIO_CTRL_1,
+			RT5668_GP1_PIN_MASK, RT5668_GP1_PIN_IRQ);
+		regmap_update_bits(rt5668->regmap, RT5668_RC_CLK_CTRL,
+				RT5668_POW_IRQ | RT5668_POW_JDH |
+				RT5668_POW_ANA, RT5668_POW_IRQ |
+				RT5668_POW_JDH | RT5668_POW_ANA);
+		regmap_update_bits(rt5668->regmap, RT5668_PWR_ANLG_2,
+			RT5668_PWR_JDH | RT5668_PWR_JDL,
+			RT5668_PWR_JDH | RT5668_PWR_JDL);
+		regmap_update_bits(rt5668->regmap, RT5668_IRQ_CTRL_2,
+			RT5668_JD1_EN_MASK | RT5668_JD1_POL_MASK,
+			RT5668_JD1_EN | RT5668_JD1_POL_NOR);
+		mod_delayed_work(system_power_efficient_wq,
+			   &rt5668->jack_detect_work, msecs_to_jiffies(250));
+		break;
+
+	case RT5668_JD_NULL:
+		regmap_update_bits(rt5668->regmap, RT5668_IRQ_CTRL_2,
+			RT5668_JD1_EN_MASK, RT5668_JD1_DIS);
+		regmap_update_bits(rt5668->regmap, RT5668_RC_CLK_CTRL,
+				RT5668_POW_JDH | RT5668_POW_JDL, 0);
+		break;
+
+	default:
+		dev_warn(component->dev, "Wrong JD source\n");
+		break;
+	}
+
+	rt5668->hs_jack = hs_jack;
+
+	return 0;
+}
+
+static void rt5668_jack_detect_handler(struct work_struct *work)
+{
+	struct rt5668_priv *rt5668 =
+		container_of(work, struct rt5668_priv, jack_detect_work.work);
+	int val, btn_type;
+
+	while (!rt5668->component)
+		usleep_range(10000, 15000);
+
+	while (!rt5668->component->card->instantiated)
+		usleep_range(10000, 15000);
+
+	mutex_lock(&rt5668->calibrate_mutex);
+
+	val = snd_soc_component_read32(rt5668->component, RT5668_AJD1_CTRL)
+		& RT5668_JDH_RS_MASK;
+	if (!val) {
+		/* jack in */
+		if (rt5668->jack_type == 0) {
+			/* jack was out, report jack type */
+			rt5668->jack_type =
+				rt5668_headset_detect(rt5668->component, 1);
+		} else {
+			/* jack is already in, report button event */
+			rt5668->jack_type = SND_JACK_HEADSET;
+			btn_type = rt5668_button_detect(rt5668->component);
+			/**
+			 * rt5668 can report three kinds of button behavior,
+			 * one click, double click and hold. However,
+			 * currently we will report button pressed/released
+			 * event. So all the three button behaviors are
+			 * treated as button pressed.
+			 */
+			switch (btn_type) {
+			case 0x8000:
+			case 0x4000:
+			case 0x2000:
+				rt5668->jack_type |= SND_JACK_BTN_0;
+				break;
+			case 0x1000:
+			case 0x0800:
+			case 0x0400:
+				rt5668->jack_type |= SND_JACK_BTN_1;
+				break;
+			case 0x0200:
+			case 0x0100:
+			case 0x0080:
+				rt5668->jack_type |= SND_JACK_BTN_2;
+				break;
+			case 0x0040:
+			case 0x0020:
+			case 0x0010:
+				rt5668->jack_type |= SND_JACK_BTN_3;
+				break;
+			case 0x0000: /* unpressed */
+				break;
+			default:
+				btn_type = 0;
+				dev_err(rt5668->component->dev,
+					"Unexpected button code 0x%04x\n",
+					btn_type);
+				break;
+			}
+		}
+	} else {
+		/* jack out */
+		rt5668->jack_type = rt5668_headset_detect(rt5668->component, 0);
+	}
+
+	snd_soc_jack_report(rt5668->hs_jack, rt5668->jack_type,
+			SND_JACK_HEADSET |
+			    SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+			    SND_JACK_BTN_2 | SND_JACK_BTN_3);
+
+	if (rt5668->jack_type & (SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+		SND_JACK_BTN_2 | SND_JACK_BTN_3))
+		schedule_delayed_work(&rt5668->jd_check_work, 0);
+	else
+		cancel_delayed_work_sync(&rt5668->jd_check_work);
+
+	mutex_unlock(&rt5668->calibrate_mutex);
+}
+
+static const struct snd_kcontrol_new rt5668_snd_controls[] = {
+	/* Headphone Output Volume */
+	SOC_DOUBLE_R_TLV("Headphone Playback Volume", RT5668_HPL_GAIN,
+		RT5668_HPR_GAIN, RT5668_G_HP_SFT, 15, 1, hp_vol_tlv),
+
+	/* DAC Digital Volume */
+	SOC_DOUBLE_TLV("DAC1 Playback Volume", RT5668_DAC1_DIG_VOL,
+		RT5668_L_VOL_SFT, RT5668_R_VOL_SFT, 175, 0, dac_vol_tlv),
+
+	/* IN Boost Volume */
+	SOC_SINGLE_TLV("CBJ Boost Volume", RT5668_CBJ_BST_CTRL,
+		RT5668_BST_CBJ_SFT, 8, 0, bst_tlv),
+
+	/* ADC Digital Volume Control */
+	SOC_DOUBLE("STO1 ADC Capture Switch", RT5668_STO1_ADC_DIG_VOL,
+		RT5668_L_MUTE_SFT, RT5668_R_MUTE_SFT, 1, 1),
+	SOC_DOUBLE_TLV("STO1 ADC Capture Volume", RT5668_STO1_ADC_DIG_VOL,
+		RT5668_L_VOL_SFT, RT5668_R_VOL_SFT, 127, 0, adc_vol_tlv),
+
+	/* ADC Boost Volume Control */
+	SOC_DOUBLE_TLV("STO1 ADC Boost Gain Volume", RT5668_STO1_ADC_BOOST,
+		RT5668_STO1_ADC_L_BST_SFT, RT5668_STO1_ADC_R_BST_SFT,
+		3, 0, adc_bst_tlv),
+};
+
+
+static int rt5668_div_sel(struct rt5668_priv *rt5668,
+			  int target, const int div[], int size)
+{
+	int i;
+
+	if (rt5668->sysclk < target) {
+		pr_err("sysclk rate %d is too low\n",
+			rt5668->sysclk);
+		return 0;
+	}
+
+	for (i = 0; i < size - 1; i++) {
+		pr_info("div[%d]=%d\n", i, div[i]);
+		if (target * div[i] == rt5668->sysclk)
+			return i;
+		if (target * div[i + 1] > rt5668->sysclk) {
+			pr_err("can't find div for sysclk %d\n",
+				rt5668->sysclk);
+			return i;
+		}
+	}
+
+	if (target * div[i] < rt5668->sysclk)
+		pr_err("sysclk rate %d is too high\n",
+			rt5668->sysclk);
+
+	return size - 1;
+
+}
+
+/**
+ * set_dmic_clk - Set parameter of dmic.
+ *
+ * @w: DAPM widget.
+ * @kcontrol: The kcontrol of this widget.
+ * @event: Event id.
+ *
+ * Choose dmic clock between 1MHz and 3MHz.
+ * It is better for clock to approximate 3MHz.
+ */
+static int set_dmic_clk(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+	int idx = -EINVAL;
+	static const int div[] = {2, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128};
+
+	idx = rt5668_div_sel(rt5668, 1500000, div, ARRAY_SIZE(div));
+
+	snd_soc_component_update_bits(component, RT5668_DMIC_CTRL_1,
+		RT5668_DMIC_CLK_MASK, idx << RT5668_DMIC_CLK_SFT);
+
+	return 0;
+}
+
+static int set_filter_clk(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+	int ref, val, reg, idx = -EINVAL;
+	static const int div[] = {1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48};
+
+	val = snd_soc_component_read32(component, RT5668_GPIO_CTRL_1) &
+		RT5668_GP4_PIN_MASK;
+	if (w->shift == RT5668_PWR_ADC_S1F_BIT &&
+		val == RT5668_GP4_PIN_ADCDAT2)
+		ref = 256 * rt5668->lrck[RT5668_AIF2];
+	else
+		ref = 256 * rt5668->lrck[RT5668_AIF1];
+
+	idx = rt5668_div_sel(rt5668, ref, div, ARRAY_SIZE(div));
+
+	if (w->shift == RT5668_PWR_ADC_S1F_BIT)
+		reg = RT5668_PLL_TRACK_3;
+	else
+		reg = RT5668_PLL_TRACK_2;
+
+	snd_soc_component_update_bits(component, reg,
+		RT5668_FILTER_CLK_SEL_MASK, idx << RT5668_FILTER_CLK_SEL_SFT);
+
+	return 0;
+}
+
+static int is_sys_clk_from_pll1(struct snd_soc_dapm_widget *w,
+			 struct snd_soc_dapm_widget *sink)
+{
+	unsigned int val;
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+
+	val = snd_soc_component_read32(component, RT5668_GLB_CLK);
+	val &= RT5668_SCLK_SRC_MASK;
+	if (val == RT5668_SCLK_SRC_PLL1)
+		return 1;
+	else
+		return 0;
+}
+
+static int is_using_asrc(struct snd_soc_dapm_widget *w,
+			 struct snd_soc_dapm_widget *sink)
+{
+	unsigned int reg, shift, val;
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+
+	switch (w->shift) {
+	case RT5668_ADC_STO1_ASRC_SFT:
+		reg = RT5668_PLL_TRACK_3;
+		shift = RT5668_FILTER_CLK_SEL_SFT;
+		break;
+	case RT5668_DAC_STO1_ASRC_SFT:
+		reg = RT5668_PLL_TRACK_2;
+		shift = RT5668_FILTER_CLK_SEL_SFT;
+		break;
+	default:
+		return 0;
+	}
+
+	val = (snd_soc_component_read32(component, reg) >> shift) & 0xf;
+	switch (val) {
+	case RT5668_CLK_SEL_I2S1_ASRC:
+	case RT5668_CLK_SEL_I2S2_ASRC:
+		return 1;
+	default:
+		return 0;
+	}
+
+}
+
+/* Digital Mixer */
+static const struct snd_kcontrol_new rt5668_sto1_adc_l_mix[] = {
+	SOC_DAPM_SINGLE("ADC1 Switch", RT5668_STO1_ADC_MIXER,
+			RT5668_M_STO1_ADC_L1_SFT, 1, 1),
+	SOC_DAPM_SINGLE("ADC2 Switch", RT5668_STO1_ADC_MIXER,
+			RT5668_M_STO1_ADC_L2_SFT, 1, 1),
+};
+
+static const struct snd_kcontrol_new rt5668_sto1_adc_r_mix[] = {
+	SOC_DAPM_SINGLE("ADC1 Switch", RT5668_STO1_ADC_MIXER,
+			RT5668_M_STO1_ADC_R1_SFT, 1, 1),
+	SOC_DAPM_SINGLE("ADC2 Switch", RT5668_STO1_ADC_MIXER,
+			RT5668_M_STO1_ADC_R2_SFT, 1, 1),
+};
+
+static const struct snd_kcontrol_new rt5668_dac_l_mix[] = {
+	SOC_DAPM_SINGLE("Stereo ADC Switch", RT5668_AD_DA_MIXER,
+			RT5668_M_ADCMIX_L_SFT, 1, 1),
+	SOC_DAPM_SINGLE("DAC1 Switch", RT5668_AD_DA_MIXER,
+			RT5668_M_DAC1_L_SFT, 1, 1),
+};
+
+static const struct snd_kcontrol_new rt5668_dac_r_mix[] = {
+	SOC_DAPM_SINGLE("Stereo ADC Switch", RT5668_AD_DA_MIXER,
+			RT5668_M_ADCMIX_R_SFT, 1, 1),
+	SOC_DAPM_SINGLE("DAC1 Switch", RT5668_AD_DA_MIXER,
+			RT5668_M_DAC1_R_SFT, 1, 1),
+};
+
+static const struct snd_kcontrol_new rt5668_sto1_dac_l_mix[] = {
+	SOC_DAPM_SINGLE("DAC L1 Switch", RT5668_STO1_DAC_MIXER,
+			RT5668_M_DAC_L1_STO_L_SFT, 1, 1),
+	SOC_DAPM_SINGLE("DAC R1 Switch", RT5668_STO1_DAC_MIXER,
+			RT5668_M_DAC_R1_STO_L_SFT, 1, 1),
+};
+
+static const struct snd_kcontrol_new rt5668_sto1_dac_r_mix[] = {
+	SOC_DAPM_SINGLE("DAC L1 Switch", RT5668_STO1_DAC_MIXER,
+			RT5668_M_DAC_L1_STO_R_SFT, 1, 1),
+	SOC_DAPM_SINGLE("DAC R1 Switch", RT5668_STO1_DAC_MIXER,
+			RT5668_M_DAC_R1_STO_R_SFT, 1, 1),
+};
+
+/* Analog Input Mixer */
+static const struct snd_kcontrol_new rt5668_rec1_l_mix[] = {
+	SOC_DAPM_SINGLE("CBJ Switch", RT5668_REC_MIXER,
+			RT5668_M_CBJ_RM1_L_SFT, 1, 1),
+};
+
+/* STO1 ADC1 Source */
+/* MX-26 [13] [5] */
+static const char * const rt5668_sto1_adc1_src[] = {
+	"DAC MIX", "ADC"
+};
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5668_sto1_adc1l_enum, RT5668_STO1_ADC_MIXER,
+	RT5668_STO1_ADC1L_SRC_SFT, rt5668_sto1_adc1_src);
+
+static const struct snd_kcontrol_new rt5668_sto1_adc1l_mux =
+	SOC_DAPM_ENUM("Stereo1 ADC1L Source", rt5668_sto1_adc1l_enum);
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5668_sto1_adc1r_enum, RT5668_STO1_ADC_MIXER,
+	RT5668_STO1_ADC1R_SRC_SFT, rt5668_sto1_adc1_src);
+
+static const struct snd_kcontrol_new rt5668_sto1_adc1r_mux =
+	SOC_DAPM_ENUM("Stereo1 ADC1L Source", rt5668_sto1_adc1r_enum);
+
+/* STO1 ADC Source */
+/* MX-26 [11:10] [3:2] */
+static const char * const rt5668_sto1_adc_src[] = {
+	"ADC1 L", "ADC1 R"
+};
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5668_sto1_adcl_enum, RT5668_STO1_ADC_MIXER,
+	RT5668_STO1_ADCL_SRC_SFT, rt5668_sto1_adc_src);
+
+static const struct snd_kcontrol_new rt5668_sto1_adcl_mux =
+	SOC_DAPM_ENUM("Stereo1 ADCL Source", rt5668_sto1_adcl_enum);
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5668_sto1_adcr_enum, RT5668_STO1_ADC_MIXER,
+	RT5668_STO1_ADCR_SRC_SFT, rt5668_sto1_adc_src);
+
+static const struct snd_kcontrol_new rt5668_sto1_adcr_mux =
+	SOC_DAPM_ENUM("Stereo1 ADCR Source", rt5668_sto1_adcr_enum);
+
+/* STO1 ADC2 Source */
+/* MX-26 [12] [4] */
+static const char * const rt5668_sto1_adc2_src[] = {
+	"DAC MIX", "DMIC"
+};
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5668_sto1_adc2l_enum, RT5668_STO1_ADC_MIXER,
+	RT5668_STO1_ADC2L_SRC_SFT, rt5668_sto1_adc2_src);
+
+static const struct snd_kcontrol_new rt5668_sto1_adc2l_mux =
+	SOC_DAPM_ENUM("Stereo1 ADC2L Source", rt5668_sto1_adc2l_enum);
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5668_sto1_adc2r_enum, RT5668_STO1_ADC_MIXER,
+	RT5668_STO1_ADC2R_SRC_SFT, rt5668_sto1_adc2_src);
+
+static const struct snd_kcontrol_new rt5668_sto1_adc2r_mux =
+	SOC_DAPM_ENUM("Stereo1 ADC2R Source", rt5668_sto1_adc2r_enum);
+
+/* MX-79 [6:4] I2S1 ADC data location */
+static const unsigned int rt5668_if1_adc_slot_values[] = {
+	0,
+	2,
+	4,
+	6,
+};
+
+static const char * const rt5668_if1_adc_slot_src[] = {
+	"Slot 0", "Slot 2", "Slot 4", "Slot 6"
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(rt5668_if1_adc_slot_enum,
+	RT5668_TDM_CTRL, RT5668_TDM_ADC_LCA_SFT, RT5668_TDM_ADC_LCA_MASK,
+	rt5668_if1_adc_slot_src, rt5668_if1_adc_slot_values);
+
+static const struct snd_kcontrol_new rt5668_if1_adc_slot_mux =
+	SOC_DAPM_ENUM("IF1 ADC Slot location", rt5668_if1_adc_slot_enum);
+
+/* Analog DAC L1 Source, Analog DAC R1 Source*/
+/* MX-2B [4], MX-2B [0]*/
+static const char * const rt5668_alg_dac1_src[] = {
+	"Stereo1 DAC Mixer", "DAC1"
+};
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5668_alg_dac_l1_enum, RT5668_A_DAC1_MUX,
+	RT5668_A_DACL1_SFT, rt5668_alg_dac1_src);
+
+static const struct snd_kcontrol_new rt5668_alg_dac_l1_mux =
+	SOC_DAPM_ENUM("Analog DAC L1 Source", rt5668_alg_dac_l1_enum);
+
+static SOC_ENUM_SINGLE_DECL(
+	rt5668_alg_dac_r1_enum, RT5668_A_DAC1_MUX,
+	RT5668_A_DACR1_SFT, rt5668_alg_dac1_src);
+
+static const struct snd_kcontrol_new rt5668_alg_dac_r1_mux =
+	SOC_DAPM_ENUM("Analog DAC R1 Source", rt5668_alg_dac_r1_enum);
+
+/* Out Switch */
+static const struct snd_kcontrol_new hpol_switch =
+	SOC_DAPM_SINGLE_AUTODISABLE("Switch", RT5668_HP_CTRL_1,
+					RT5668_L_MUTE_SFT, 1, 1);
+static const struct snd_kcontrol_new hpor_switch =
+	SOC_DAPM_SINGLE_AUTODISABLE("Switch", RT5668_HP_CTRL_1,
+					RT5668_R_MUTE_SFT, 1, 1);
+
+static int rt5668_hp_event(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		snd_soc_component_write(component,
+			RT5668_HP_LOGIC_CTRL_2, 0x0012);
+		snd_soc_component_write(component,
+			RT5668_HP_CTRL_2, 0x6000);
+		snd_soc_component_update_bits(component, RT5668_STO_NG2_CTRL_1,
+			RT5668_NG2_EN_MASK, RT5668_NG2_EN);
+		snd_soc_component_update_bits(component,
+			RT5668_DEPOP_1, 0x60, 0x60);
+		break;
+
+	case SND_SOC_DAPM_POST_PMD:
+		snd_soc_component_update_bits(component,
+			RT5668_DEPOP_1, 0x60, 0x0);
+		snd_soc_component_write(component,
+			RT5668_HP_CTRL_2, 0x0000);
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 0;
+
+}
+
+static int set_dmic_power(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		/*Add delay to avoid pop noise*/
+		msleep(150);
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+static int rt5655_set_verf(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+
+	switch (event) {
+	case SND_SOC_DAPM_PRE_PMU:
+		switch (w->shift) {
+		case RT5668_PWR_VREF1_BIT:
+			snd_soc_component_update_bits(component,
+				RT5668_PWR_ANLG_1, RT5668_PWR_FV1, 0);
+			break;
+
+		case RT5668_PWR_VREF2_BIT:
+			snd_soc_component_update_bits(component,
+				RT5668_PWR_ANLG_1, RT5668_PWR_FV2, 0);
+			break;
+
+		default:
+			break;
+		}
+		break;
+
+	case SND_SOC_DAPM_POST_PMU:
+		usleep_range(15000, 20000);
+		switch (w->shift) {
+		case RT5668_PWR_VREF1_BIT:
+			snd_soc_component_update_bits(component,
+				RT5668_PWR_ANLG_1, RT5668_PWR_FV1,
+				RT5668_PWR_FV1);
+			break;
+
+		case RT5668_PWR_VREF2_BIT:
+			snd_soc_component_update_bits(component,
+				RT5668_PWR_ANLG_1, RT5668_PWR_FV2,
+				RT5668_PWR_FV2);
+			break;
+
+		default:
+			break;
+		}
+		break;
+
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+static const unsigned int rt5668_adcdat_pin_values[] = {
+	1,
+	3,
+};
+
+static const char * const rt5668_adcdat_pin_select[] = {
+	"ADCDAT1",
+	"ADCDAT2",
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(rt5668_adcdat_pin_enum,
+	RT5668_GPIO_CTRL_1, RT5668_GP4_PIN_SFT, RT5668_GP4_PIN_MASK,
+	rt5668_adcdat_pin_select, rt5668_adcdat_pin_values);
+
+static const struct snd_kcontrol_new rt5668_adcdat_pin_ctrl =
+	SOC_DAPM_ENUM("ADCDAT", rt5668_adcdat_pin_enum);
+
+static const struct snd_soc_dapm_widget rt5668_dapm_widgets[] = {
+	SND_SOC_DAPM_SUPPLY("LDO2", RT5668_PWR_ANLG_3, RT5668_PWR_LDO2_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("PLL1", RT5668_PWR_ANLG_3, RT5668_PWR_PLL_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("PLL2B", RT5668_PWR_ANLG_3, RT5668_PWR_PLL2B_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("PLL2F", RT5668_PWR_ANLG_3, RT5668_PWR_PLL2F_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Vref1", RT5668_PWR_ANLG_1, RT5668_PWR_VREF1_BIT, 0,
+		rt5655_set_verf, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
+	SND_SOC_DAPM_SUPPLY("Vref2", RT5668_PWR_ANLG_1, RT5668_PWR_VREF2_BIT, 0,
+		rt5655_set_verf, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU),
+
+	/* ASRC */
+	SND_SOC_DAPM_SUPPLY_S("DAC STO1 ASRC", 1, RT5668_PLL_TRACK_1,
+		RT5668_DAC_STO1_ASRC_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("ADC STO1 ASRC", 1, RT5668_PLL_TRACK_1,
+		RT5668_ADC_STO1_ASRC_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("AD ASRC", 1, RT5668_PLL_TRACK_1,
+		RT5668_AD_ASRC_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("DA ASRC", 1, RT5668_PLL_TRACK_1,
+		RT5668_DA_ASRC_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("DMIC ASRC", 1, RT5668_PLL_TRACK_1,
+		RT5668_DMIC_ASRC_SFT, 0, NULL, 0),
+
+	/* Input Side */
+	SND_SOC_DAPM_SUPPLY("MICBIAS1", RT5668_PWR_ANLG_2, RT5668_PWR_MB1_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("MICBIAS2", RT5668_PWR_ANLG_2, RT5668_PWR_MB2_BIT,
+		0, NULL, 0),
+
+	/* Input Lines */
+	SND_SOC_DAPM_INPUT("DMIC L1"),
+	SND_SOC_DAPM_INPUT("DMIC R1"),
+
+	SND_SOC_DAPM_INPUT("IN1P"),
+
+	SND_SOC_DAPM_SUPPLY("DMIC CLK", SND_SOC_NOPM, 0, 0,
+		set_dmic_clk, SND_SOC_DAPM_PRE_PMU),
+	SND_SOC_DAPM_SUPPLY("DMIC1 Power", RT5668_DMIC_CTRL_1,
+		RT5668_DMIC_1_EN_SFT, 0, set_dmic_power, SND_SOC_DAPM_POST_PMU),
+
+	/* Boost */
+	SND_SOC_DAPM_PGA("BST1 CBJ", SND_SOC_NOPM,
+		0, 0, NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY("CBJ Power", RT5668_PWR_ANLG_3,
+		RT5668_PWR_CBJ_BIT, 0, NULL, 0),
+
+	/* REC Mixer */
+	SND_SOC_DAPM_MIXER("RECMIX1L", SND_SOC_NOPM, 0, 0, rt5668_rec1_l_mix,
+		ARRAY_SIZE(rt5668_rec1_l_mix)),
+	SND_SOC_DAPM_SUPPLY("RECMIX1L Power", RT5668_PWR_ANLG_2,
+		RT5668_PWR_RM1_L_BIT, 0, NULL, 0),
+
+	/* ADCs */
+	SND_SOC_DAPM_ADC("ADC1 L", NULL, SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_ADC("ADC1 R", NULL, SND_SOC_NOPM, 0, 0),
+
+	SND_SOC_DAPM_SUPPLY("ADC1 L Power", RT5668_PWR_DIG_1,
+		RT5668_PWR_ADC_L1_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("ADC1 R Power", RT5668_PWR_DIG_1,
+		RT5668_PWR_ADC_R1_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("ADC1 clock", RT5668_CHOP_ADC,
+		RT5668_CKGEN_ADC1_SFT, 0, NULL, 0),
+
+	/* ADC Mux */
+	SND_SOC_DAPM_MUX("Stereo1 ADC L1 Mux", SND_SOC_NOPM, 0, 0,
+		&rt5668_sto1_adc1l_mux),
+	SND_SOC_DAPM_MUX("Stereo1 ADC R1 Mux", SND_SOC_NOPM, 0, 0,
+		&rt5668_sto1_adc1r_mux),
+	SND_SOC_DAPM_MUX("Stereo1 ADC L2 Mux", SND_SOC_NOPM, 0, 0,
+		&rt5668_sto1_adc2l_mux),
+	SND_SOC_DAPM_MUX("Stereo1 ADC R2 Mux", SND_SOC_NOPM, 0, 0,
+		&rt5668_sto1_adc2r_mux),
+	SND_SOC_DAPM_MUX("Stereo1 ADC L Mux", SND_SOC_NOPM, 0, 0,
+		&rt5668_sto1_adcl_mux),
+	SND_SOC_DAPM_MUX("Stereo1 ADC R Mux", SND_SOC_NOPM, 0, 0,
+		&rt5668_sto1_adcr_mux),
+	SND_SOC_DAPM_MUX("IF1_ADC Mux", SND_SOC_NOPM, 0, 0,
+		&rt5668_if1_adc_slot_mux),
+
+	/* ADC Mixer */
+	SND_SOC_DAPM_SUPPLY("ADC Stereo1 Filter", RT5668_PWR_DIG_2,
+		RT5668_PWR_ADC_S1F_BIT, 0, set_filter_clk,
+		SND_SOC_DAPM_PRE_PMU),
+	SND_SOC_DAPM_MIXER("Stereo1 ADC MIXL", RT5668_STO1_ADC_DIG_VOL,
+		RT5668_L_MUTE_SFT, 1, rt5668_sto1_adc_l_mix,
+		ARRAY_SIZE(rt5668_sto1_adc_l_mix)),
+	SND_SOC_DAPM_MIXER("Stereo1 ADC MIXR", RT5668_STO1_ADC_DIG_VOL,
+		RT5668_R_MUTE_SFT, 1, rt5668_sto1_adc_r_mix,
+		ARRAY_SIZE(rt5668_sto1_adc_r_mix)),
+
+	/* ADC PGA */
+	SND_SOC_DAPM_PGA("Stereo1 ADC MIX", SND_SOC_NOPM, 0, 0, NULL, 0),
+
+	/* Digital Interface */
+	SND_SOC_DAPM_SUPPLY("I2S1", RT5668_PWR_DIG_1, RT5668_PWR_I2S1_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("I2S2", RT5668_PWR_DIG_1, RT5668_PWR_I2S2_BIT,
+		0, NULL, 0),
+	SND_SOC_DAPM_PGA("IF1 DAC1", SND_SOC_NOPM, 0, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("IF1 DAC1 L", SND_SOC_NOPM, 0, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("IF1 DAC1 R", SND_SOC_NOPM, 0, 0, NULL, 0),
+
+	/* Digital Interface Select */
+	SND_SOC_DAPM_MUX("IF1 01 ADC Swap Mux", SND_SOC_NOPM, 0, 0,
+			&rt5668_if1_01_adc_swap_mux),
+	SND_SOC_DAPM_MUX("IF1 23 ADC Swap Mux", SND_SOC_NOPM, 0, 0,
+			&rt5668_if1_23_adc_swap_mux),
+	SND_SOC_DAPM_MUX("IF1 45 ADC Swap Mux", SND_SOC_NOPM, 0, 0,
+			&rt5668_if1_45_adc_swap_mux),
+	SND_SOC_DAPM_MUX("IF1 67 ADC Swap Mux", SND_SOC_NOPM, 0, 0,
+			&rt5668_if1_67_adc_swap_mux),
+	SND_SOC_DAPM_MUX("IF2 ADC Swap Mux", SND_SOC_NOPM, 0, 0,
+			&rt5668_if2_adc_swap_mux),
+
+	SND_SOC_DAPM_MUX("ADCDAT Mux", SND_SOC_NOPM, 0, 0,
+			&rt5668_adcdat_pin_ctrl),
+
+	/* Audio Interface */
+	SND_SOC_DAPM_AIF_OUT("AIF1TX", "AIF1 Capture", 0,
+		RT5668_I2S1_SDP, RT5668_SEL_ADCDAT_SFT, 1),
+	SND_SOC_DAPM_AIF_OUT("AIF2TX", "AIF2 Capture", 0,
+		RT5668_I2S2_SDP, RT5668_I2S2_PIN_CFG_SFT, 1),
+	SND_SOC_DAPM_AIF_IN("AIF1RX", "AIF1 Playback", 0, SND_SOC_NOPM, 0, 0),
+
+	/* Output Side */
+	/* DAC mixer before sound effect  */
+	SND_SOC_DAPM_MIXER("DAC1 MIXL", SND_SOC_NOPM, 0, 0,
+		rt5668_dac_l_mix, ARRAY_SIZE(rt5668_dac_l_mix)),
+	SND_SOC_DAPM_MIXER("DAC1 MIXR", SND_SOC_NOPM, 0, 0,
+		rt5668_dac_r_mix, ARRAY_SIZE(rt5668_dac_r_mix)),
+
+	/* DAC channel Mux */
+	SND_SOC_DAPM_MUX("DAC L1 Source", SND_SOC_NOPM, 0, 0,
+		&rt5668_alg_dac_l1_mux),
+	SND_SOC_DAPM_MUX("DAC R1 Source", SND_SOC_NOPM, 0, 0,
+		&rt5668_alg_dac_r1_mux),
+
+	/* DAC Mixer */
+	SND_SOC_DAPM_SUPPLY("DAC Stereo1 Filter", RT5668_PWR_DIG_2,
+		RT5668_PWR_DAC_S1F_BIT, 0, set_filter_clk,
+		SND_SOC_DAPM_PRE_PMU),
+	SND_SOC_DAPM_MIXER("Stereo1 DAC MIXL", SND_SOC_NOPM, 0, 0,
+		rt5668_sto1_dac_l_mix, ARRAY_SIZE(rt5668_sto1_dac_l_mix)),
+	SND_SOC_DAPM_MIXER("Stereo1 DAC MIXR", SND_SOC_NOPM, 0, 0,
+		rt5668_sto1_dac_r_mix, ARRAY_SIZE(rt5668_sto1_dac_r_mix)),
+
+	/* DACs */
+	SND_SOC_DAPM_DAC("DAC L1", NULL, RT5668_PWR_DIG_1,
+		RT5668_PWR_DAC_L1_BIT, 0),
+	SND_SOC_DAPM_DAC("DAC R1", NULL, RT5668_PWR_DIG_1,
+		RT5668_PWR_DAC_R1_BIT, 0),
+	SND_SOC_DAPM_SUPPLY_S("DAC 1 Clock", 3, RT5668_CHOP_DAC,
+		RT5668_CKGEN_DAC1_SFT, 0, NULL, 0),
+
+	/* HPO */
+	SND_SOC_DAPM_PGA_S("HP Amp", 1, SND_SOC_NOPM, 0, 0, rt5668_hp_event,
+		SND_SOC_DAPM_POST_PMD | SND_SOC_DAPM_PRE_PMU),
+
+	SND_SOC_DAPM_SUPPLY("HP Amp L", RT5668_PWR_ANLG_1,
+		RT5668_PWR_HA_L_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("HP Amp R", RT5668_PWR_ANLG_1,
+		RT5668_PWR_HA_R_BIT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("Charge Pump", 1, RT5668_DEPOP_1,
+		RT5668_PUMP_EN_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("Capless", 2, RT5668_DEPOP_1,
+		RT5668_CAPLESS_EN_SFT, 0, NULL, 0),
+
+	SND_SOC_DAPM_SWITCH("HPOL Playback", SND_SOC_NOPM, 0, 0,
+		&hpol_switch),
+	SND_SOC_DAPM_SWITCH("HPOR Playback", SND_SOC_NOPM, 0, 0,
+		&hpor_switch),
+
+	/* CLK DET */
+	SND_SOC_DAPM_SUPPLY("CLKDET SYS", RT5668_CLK_DET,
+		RT5668_SYS_CLK_DET_SFT,	0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("CLKDET PLL1", RT5668_CLK_DET,
+		RT5668_PLL1_CLK_DET_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("CLKDET PLL2", RT5668_CLK_DET,
+		RT5668_PLL2_CLK_DET_SFT, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("CLKDET", RT5668_CLK_DET,
+		RT5668_POW_CLK_DET_SFT, 0, NULL, 0),
+
+	/* Output Lines */
+	SND_SOC_DAPM_OUTPUT("HPOL"),
+	SND_SOC_DAPM_OUTPUT("HPOR"),
+
+};
+
+static const struct snd_soc_dapm_route rt5668_dapm_routes[] = {
+	/*PLL*/
+	{"ADC Stereo1 Filter", NULL, "PLL1", is_sys_clk_from_pll1},
+	{"DAC Stereo1 Filter", NULL, "PLL1", is_sys_clk_from_pll1},
+
+	/*ASRC*/
+	{"ADC Stereo1 Filter", NULL, "ADC STO1 ASRC", is_using_asrc},
+	{"DAC Stereo1 Filter", NULL, "DAC STO1 ASRC", is_using_asrc},
+	{"ADC STO1 ASRC", NULL, "AD ASRC"},
+	{"DAC STO1 ASRC", NULL, "DA ASRC"},
+
+	/*Vref*/
+	{"MICBIAS1", NULL, "Vref1"},
+	{"MICBIAS1", NULL, "Vref2"},
+	{"MICBIAS2", NULL, "Vref1"},
+	{"MICBIAS2", NULL, "Vref2"},
+
+	{"CLKDET SYS", NULL, "CLKDET"},
+
+	{"IN1P", NULL, "LDO2"},
+
+	{"BST1 CBJ", NULL, "IN1P"},
+	{"BST1 CBJ", NULL, "CBJ Power"},
+	{"CBJ Power", NULL, "Vref2"},
+
+	{"RECMIX1L", "CBJ Switch", "BST1 CBJ"},
+	{"RECMIX1L", NULL, "RECMIX1L Power"},
+
+	{"ADC1 L", NULL, "RECMIX1L"},
+	{"ADC1 L", NULL, "ADC1 L Power"},
+	{"ADC1 L", NULL, "ADC1 clock"},
+
+	{"DMIC L1", NULL, "DMIC CLK"},
+	{"DMIC L1", NULL, "DMIC1 Power"},
+	{"DMIC R1", NULL, "DMIC CLK"},
+	{"DMIC R1", NULL, "DMIC1 Power"},
+	{"DMIC CLK", NULL, "DMIC ASRC"},
+
+	{"Stereo1 ADC L Mux", "ADC1 L", "ADC1 L"},
+	{"Stereo1 ADC L Mux", "ADC1 R", "ADC1 R"},
+	{"Stereo1 ADC R Mux", "ADC1 L", "ADC1 L"},
+	{"Stereo1 ADC R Mux", "ADC1 R", "ADC1 R"},
+
+	{"Stereo1 ADC L1 Mux", "ADC", "Stereo1 ADC L Mux"},
+	{"Stereo1 ADC L1 Mux", "DAC MIX", "Stereo1 DAC MIXL"},
+	{"Stereo1 ADC L2 Mux", "DMIC", "DMIC L1"},
+	{"Stereo1 ADC L2 Mux", "DAC MIX", "Stereo1 DAC MIXL"},
+
+	{"Stereo1 ADC R1 Mux", "ADC", "Stereo1 ADC R Mux"},
+	{"Stereo1 ADC R1 Mux", "DAC MIX", "Stereo1 DAC MIXR"},
+	{"Stereo1 ADC R2 Mux", "DMIC", "DMIC R1"},
+	{"Stereo1 ADC R2 Mux", "DAC MIX", "Stereo1 DAC MIXR"},
+
+	{"Stereo1 ADC MIXL", "ADC1 Switch", "Stereo1 ADC L1 Mux"},
+	{"Stereo1 ADC MIXL", "ADC2 Switch", "Stereo1 ADC L2 Mux"},
+	{"Stereo1 ADC MIXL", NULL, "ADC Stereo1 Filter"},
+
+	{"Stereo1 ADC MIXR", "ADC1 Switch", "Stereo1 ADC R1 Mux"},
+	{"Stereo1 ADC MIXR", "ADC2 Switch", "Stereo1 ADC R2 Mux"},
+	{"Stereo1 ADC MIXR", NULL, "ADC Stereo1 Filter"},
+
+	{"Stereo1 ADC MIX", NULL, "Stereo1 ADC MIXL"},
+	{"Stereo1 ADC MIX", NULL, "Stereo1 ADC MIXR"},
+
+	{"IF1 01 ADC Swap Mux", "L/R", "Stereo1 ADC MIX"},
+	{"IF1 01 ADC Swap Mux", "L/L", "Stereo1 ADC MIX"},
+	{"IF1 01 ADC Swap Mux", "R/L", "Stereo1 ADC MIX"},
+	{"IF1 01 ADC Swap Mux", "R/R", "Stereo1 ADC MIX"},
+	{"IF1 23 ADC Swap Mux", "L/R", "Stereo1 ADC MIX"},
+	{"IF1 23 ADC Swap Mux", "R/L", "Stereo1 ADC MIX"},
+	{"IF1 23 ADC Swap Mux", "L/L", "Stereo1 ADC MIX"},
+	{"IF1 23 ADC Swap Mux", "R/R", "Stereo1 ADC MIX"},
+	{"IF1 45 ADC Swap Mux", "L/R", "Stereo1 ADC MIX"},
+	{"IF1 45 ADC Swap Mux", "R/L", "Stereo1 ADC MIX"},
+	{"IF1 45 ADC Swap Mux", "L/L", "Stereo1 ADC MIX"},
+	{"IF1 45 ADC Swap Mux", "R/R", "Stereo1 ADC MIX"},
+	{"IF1 67 ADC Swap Mux", "L/R", "Stereo1 ADC MIX"},
+	{"IF1 67 ADC Swap Mux", "R/L", "Stereo1 ADC MIX"},
+	{"IF1 67 ADC Swap Mux", "L/L", "Stereo1 ADC MIX"},
+	{"IF1 67 ADC Swap Mux", "R/R", "Stereo1 ADC MIX"},
+
+	{"IF1_ADC Mux", "Slot 0", "IF1 01 ADC Swap Mux"},
+	{"IF1_ADC Mux", "Slot 2", "IF1 23 ADC Swap Mux"},
+	{"IF1_ADC Mux", "Slot 4", "IF1 45 ADC Swap Mux"},
+	{"IF1_ADC Mux", "Slot 6", "IF1 67 ADC Swap Mux"},
+	{"IF1_ADC Mux", NULL, "I2S1"},
+	{"ADCDAT Mux", "ADCDAT1", "IF1_ADC Mux"},
+	{"AIF1TX", NULL, "ADCDAT Mux"},
+	{"IF2 ADC Swap Mux", "L/R", "Stereo1 ADC MIX"},
+	{"IF2 ADC Swap Mux", "R/L", "Stereo1 ADC MIX"},
+	{"IF2 ADC Swap Mux", "L/L", "Stereo1 ADC MIX"},
+	{"IF2 ADC Swap Mux", "R/R", "Stereo1 ADC MIX"},
+	{"ADCDAT Mux", "ADCDAT2", "IF2 ADC Swap Mux"},
+	{"AIF2TX", NULL, "ADCDAT Mux"},
+
+	{"IF1 DAC1 L", NULL, "AIF1RX"},
+	{"IF1 DAC1 L", NULL, "I2S1"},
+	{"IF1 DAC1 L", NULL, "DAC Stereo1 Filter"},
+	{"IF1 DAC1 R", NULL, "AIF1RX"},
+	{"IF1 DAC1 R", NULL, "I2S1"},
+	{"IF1 DAC1 R", NULL, "DAC Stereo1 Filter"},
+
+	{"DAC1 MIXL", "Stereo ADC Switch", "Stereo1 ADC MIXL"},
+	{"DAC1 MIXL", "DAC1 Switch", "IF1 DAC1 L"},
+	{"DAC1 MIXR", "Stereo ADC Switch", "Stereo1 ADC MIXR"},
+	{"DAC1 MIXR", "DAC1 Switch", "IF1 DAC1 R"},
+
+	{"Stereo1 DAC MIXL", "DAC L1 Switch", "DAC1 MIXL"},
+	{"Stereo1 DAC MIXL", "DAC R1 Switch", "DAC1 MIXR"},
+
+	{"Stereo1 DAC MIXR", "DAC R1 Switch", "DAC1 MIXR"},
+	{"Stereo1 DAC MIXR", "DAC L1 Switch", "DAC1 MIXL"},
+
+	{"DAC L1 Source", "DAC1", "DAC1 MIXL"},
+	{"DAC L1 Source", "Stereo1 DAC Mixer", "Stereo1 DAC MIXL"},
+	{"DAC R1 Source", "DAC1", "DAC1 MIXR"},
+	{"DAC R1 Source", "Stereo1 DAC Mixer", "Stereo1 DAC MIXR"},
+
+	{"DAC L1", NULL, "DAC L1 Source"},
+	{"DAC R1", NULL, "DAC R1 Source"},
+
+	{"DAC L1", NULL, "DAC 1 Clock"},
+	{"DAC R1", NULL, "DAC 1 Clock"},
+
+	{"HP Amp", NULL, "DAC L1"},
+	{"HP Amp", NULL, "DAC R1"},
+	{"HP Amp", NULL, "HP Amp L"},
+	{"HP Amp", NULL, "HP Amp R"},
+	{"HP Amp", NULL, "Capless"},
+	{"HP Amp", NULL, "Charge Pump"},
+	{"HP Amp", NULL, "CLKDET SYS"},
+	{"HP Amp", NULL, "CBJ Power"},
+	{"HP Amp", NULL, "Vref2"},
+	{"HPOL Playback", "Switch", "HP Amp"},
+	{"HPOR Playback", "Switch", "HP Amp"},
+	{"HPOL", NULL, "HPOL Playback"},
+	{"HPOR", NULL, "HPOR Playback"},
+};
+
+static int rt5668_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask,
+			unsigned int rx_mask, int slots, int slot_width)
+{
+	struct snd_soc_component *component = dai->component;
+	unsigned int val = 0;
+
+	switch (slots) {
+	case 4:
+		val |= RT5668_TDM_TX_CH_4;
+		val |= RT5668_TDM_RX_CH_4;
+		break;
+	case 6:
+		val |= RT5668_TDM_TX_CH_6;
+		val |= RT5668_TDM_RX_CH_6;
+		break;
+	case 8:
+		val |= RT5668_TDM_TX_CH_8;
+		val |= RT5668_TDM_RX_CH_8;
+		break;
+	case 2:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	snd_soc_component_update_bits(component, RT5668_TDM_CTRL,
+		RT5668_TDM_TX_CH_MASK | RT5668_TDM_RX_CH_MASK, val);
+
+	switch (slot_width) {
+	case 16:
+		val = RT5668_TDM_CL_16;
+		break;
+	case 20:
+		val = RT5668_TDM_CL_20;
+		break;
+	case 24:
+		val = RT5668_TDM_CL_24;
+		break;
+	case 32:
+		val = RT5668_TDM_CL_32;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	snd_soc_component_update_bits(component, RT5668_TDM_TCON_CTRL,
+		RT5668_TDM_CL_MASK, val);
+
+	return 0;
+}
+
+
+static int rt5668_hw_params(struct snd_pcm_substream *substream,
+	struct snd_pcm_hw_params *params, struct snd_soc_dai *dai)
+{
+	struct snd_soc_component *component = dai->component;
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+	unsigned int len_1 = 0, len_2 = 0;
+	int pre_div, frame_size;
+
+	rt5668->lrck[dai->id] = params_rate(params);
+	pre_div = rl6231_get_clk_info(rt5668->sysclk, rt5668->lrck[dai->id]);
+
+	frame_size = snd_soc_params_to_frame_size(params);
+	if (frame_size < 0) {
+		dev_err(component->dev, "Unsupported frame size: %d\n",
+			frame_size);
+		return -EINVAL;
+	}
+
+	dev_dbg(dai->dev, "lrck is %dHz and pre_div is %d for iis %d\n",
+				rt5668->lrck[dai->id], pre_div, dai->id);
+
+	switch (params_width(params)) {
+	case 16:
+		break;
+	case 20:
+		len_1 |= RT5668_I2S1_DL_20;
+		len_2 |= RT5668_I2S2_DL_20;
+		break;
+	case 24:
+		len_1 |= RT5668_I2S1_DL_24;
+		len_2 |= RT5668_I2S2_DL_24;
+		break;
+	case 32:
+		len_1 |= RT5668_I2S1_DL_32;
+		len_2 |= RT5668_I2S2_DL_24;
+		break;
+	case 8:
+		len_1 |= RT5668_I2S2_DL_8;
+		len_2 |= RT5668_I2S2_DL_8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (dai->id) {
+	case RT5668_AIF1:
+		snd_soc_component_update_bits(component, RT5668_I2S1_SDP,
+			RT5668_I2S1_DL_MASK, len_1);
+		if (rt5668->master[RT5668_AIF1]) {
+			snd_soc_component_update_bits(component,
+				RT5668_ADDA_CLK_1, RT5668_I2S_M_DIV_MASK,
+				pre_div << RT5668_I2S_M_DIV_SFT);
+		}
+		if (params_channels(params) == 1) /* mono mode */
+			snd_soc_component_update_bits(component,
+				RT5668_I2S1_SDP, RT5668_I2S1_MONO_MASK,
+				RT5668_I2S1_MONO_EN);
+		else
+			snd_soc_component_update_bits(component,
+				RT5668_I2S1_SDP, RT5668_I2S1_MONO_MASK,
+				RT5668_I2S1_MONO_DIS);
+		break;
+	case RT5668_AIF2:
+		snd_soc_component_update_bits(component, RT5668_I2S2_SDP,
+			RT5668_I2S2_DL_MASK, len_2);
+		if (rt5668->master[RT5668_AIF2]) {
+			snd_soc_component_update_bits(component,
+				RT5668_I2S_M_CLK_CTRL_1, RT5668_I2S2_M_PD_MASK,
+				pre_div << RT5668_I2S2_M_PD_SFT);
+		}
+		if (params_channels(params) == 1) /* mono mode */
+			snd_soc_component_update_bits(component,
+				RT5668_I2S2_SDP, RT5668_I2S2_MONO_MASK,
+				RT5668_I2S2_MONO_EN);
+		else
+			snd_soc_component_update_bits(component,
+				RT5668_I2S2_SDP, RT5668_I2S2_MONO_MASK,
+				RT5668_I2S2_MONO_DIS);
+		break;
+	default:
+		dev_err(component->dev, "Invalid dai->id: %d\n", dai->id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rt5668_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+{
+	struct snd_soc_component *component = dai->component;
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+	unsigned int reg_val = 0, tdm_ctrl = 0;
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		rt5668->master[dai->id] = 1;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		rt5668->master[dai->id] = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		reg_val |= RT5668_I2S_BP_INV;
+		tdm_ctrl |= RT5668_TDM_S_BP_INV;
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		if (dai->id == RT5668_AIF1)
+			tdm_ctrl |= RT5668_TDM_S_LP_INV | RT5668_TDM_M_BP_INV;
+		else
+			return -EINVAL;
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		if (dai->id == RT5668_AIF1)
+			tdm_ctrl |= RT5668_TDM_S_BP_INV | RT5668_TDM_S_LP_INV |
+				    RT5668_TDM_M_BP_INV | RT5668_TDM_M_LP_INV;
+		else
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		reg_val |= RT5668_I2S_DF_LEFT;
+		tdm_ctrl |= RT5668_TDM_DF_LEFT;
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		reg_val |= RT5668_I2S_DF_PCM_A;
+		tdm_ctrl |= RT5668_TDM_DF_PCM_A;
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		reg_val |= RT5668_I2S_DF_PCM_B;
+		tdm_ctrl |= RT5668_TDM_DF_PCM_B;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (dai->id) {
+	case RT5668_AIF1:
+		snd_soc_component_update_bits(component, RT5668_I2S1_SDP,
+			RT5668_I2S_DF_MASK, reg_val);
+		snd_soc_component_update_bits(component, RT5668_TDM_TCON_CTRL,
+			RT5668_TDM_MS_MASK | RT5668_TDM_S_BP_MASK |
+			RT5668_TDM_DF_MASK | RT5668_TDM_M_BP_MASK |
+			RT5668_TDM_M_LP_MASK | RT5668_TDM_S_LP_MASK,
+			tdm_ctrl | rt5668->master[dai->id]);
+		break;
+	case RT5668_AIF2:
+		if (rt5668->master[dai->id] == 0)
+			reg_val |= RT5668_I2S2_MS_S;
+		snd_soc_component_update_bits(component, RT5668_I2S2_SDP,
+			RT5668_I2S2_MS_MASK | RT5668_I2S_BP_MASK |
+			RT5668_I2S_DF_MASK, reg_val);
+		break;
+	default:
+		dev_err(component->dev, "Invalid dai->id: %d\n", dai->id);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int rt5668_set_component_sysclk(struct snd_soc_component *component,
+		int clk_id, int source, unsigned int freq, int dir)
+{
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+	unsigned int reg_val = 0, src = 0;
+
+	if (freq == rt5668->sysclk && clk_id == rt5668->sysclk_src)
+		return 0;
+
+	switch (clk_id) {
+	case RT5668_SCLK_S_MCLK:
+		reg_val |= RT5668_SCLK_SRC_MCLK;
+		src = RT5668_CLK_SRC_MCLK;
+		break;
+	case RT5668_SCLK_S_PLL1:
+		reg_val |= RT5668_SCLK_SRC_PLL1;
+		src = RT5668_CLK_SRC_PLL1;
+		break;
+	case RT5668_SCLK_S_PLL2:
+		reg_val |= RT5668_SCLK_SRC_PLL2;
+		src = RT5668_CLK_SRC_PLL2;
+		break;
+	case RT5668_SCLK_S_RCCLK:
+		reg_val |= RT5668_SCLK_SRC_RCCLK;
+		src = RT5668_CLK_SRC_RCCLK;
+		break;
+	default:
+		dev_err(component->dev, "Invalid clock id (%d)\n", clk_id);
+		return -EINVAL;
+	}
+	snd_soc_component_update_bits(component, RT5668_GLB_CLK,
+		RT5668_SCLK_SRC_MASK, reg_val);
+
+	if (rt5668->master[RT5668_AIF2]) {
+		snd_soc_component_update_bits(component,
+			RT5668_I2S_M_CLK_CTRL_1, RT5668_I2S2_SRC_MASK,
+			src << RT5668_I2S2_SRC_SFT);
+	}
+
+	rt5668->sysclk = freq;
+	rt5668->sysclk_src = clk_id;
+
+	dev_dbg(component->dev, "Sysclk is %dHz and clock id is %d\n",
+		freq, clk_id);
+
+	return 0;
+}
+
+static int rt5668_set_component_pll(struct snd_soc_component *component,
+		int pll_id, int source, unsigned int freq_in,
+		unsigned int freq_out)
+{
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+	struct rl6231_pll_code pll_code;
+	int ret;
+
+	if (source == rt5668->pll_src && freq_in == rt5668->pll_in &&
+	    freq_out == rt5668->pll_out)
+		return 0;
+
+	if (!freq_in || !freq_out) {
+		dev_dbg(component->dev, "PLL disabled\n");
+
+		rt5668->pll_in = 0;
+		rt5668->pll_out = 0;
+		snd_soc_component_update_bits(component, RT5668_GLB_CLK,
+			RT5668_SCLK_SRC_MASK, RT5668_SCLK_SRC_MCLK);
+		return 0;
+	}
+
+	switch (source) {
+	case RT5668_PLL1_S_MCLK:
+		snd_soc_component_update_bits(component, RT5668_GLB_CLK,
+			RT5668_PLL1_SRC_MASK, RT5668_PLL1_SRC_MCLK);
+		break;
+	case RT5668_PLL1_S_BCLK1:
+		snd_soc_component_update_bits(component, RT5668_GLB_CLK,
+				RT5668_PLL1_SRC_MASK, RT5668_PLL1_SRC_BCLK1);
+		break;
+	default:
+		dev_err(component->dev, "Unknown PLL Source %d\n", source);
+		return -EINVAL;
+	}
+
+	ret = rl6231_pll_calc(freq_in, freq_out, &pll_code);
+	if (ret < 0) {
+		dev_err(component->dev, "Unsupport input clock %d\n", freq_in);
+		return ret;
+	}
+
+	dev_dbg(component->dev, "bypass=%d m=%d n=%d k=%d\n",
+		pll_code.m_bp, (pll_code.m_bp ? 0 : pll_code.m_code),
+		pll_code.n_code, pll_code.k_code);
+
+	snd_soc_component_write(component, RT5668_PLL_CTRL_1,
+		pll_code.n_code << RT5668_PLL_N_SFT | pll_code.k_code);
+	snd_soc_component_write(component, RT5668_PLL_CTRL_2,
+		(pll_code.m_bp ? 0 : pll_code.m_code) << RT5668_PLL_M_SFT |
+		pll_code.m_bp << RT5668_PLL_M_BP_SFT);
+
+	rt5668->pll_in = freq_in;
+	rt5668->pll_out = freq_out;
+	rt5668->pll_src = source;
+
+	return 0;
+}
+
+static int rt5668_set_bclk_ratio(struct snd_soc_dai *dai, unsigned int ratio)
+{
+	struct snd_soc_component *component = dai->component;
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+
+	rt5668->bclk[dai->id] = ratio;
+
+	switch (ratio) {
+	case 64:
+		snd_soc_component_update_bits(component, RT5668_ADDA_CLK_2,
+			RT5668_I2S2_BCLK_MS2_MASK,
+			RT5668_I2S2_BCLK_MS2_64);
+		break;
+	case 32:
+		snd_soc_component_update_bits(component, RT5668_ADDA_CLK_2,
+			RT5668_I2S2_BCLK_MS2_MASK,
+			RT5668_I2S2_BCLK_MS2_32);
+		break;
+	default:
+		dev_err(dai->dev, "Invalid bclk ratio %d\n", ratio);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rt5668_set_bias_level(struct snd_soc_component *component,
+			enum snd_soc_bias_level level)
+{
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+
+	switch (level) {
+	case SND_SOC_BIAS_PREPARE:
+		regmap_update_bits(rt5668->regmap, RT5668_PWR_ANLG_1,
+			RT5668_PWR_MB | RT5668_PWR_BG,
+			RT5668_PWR_MB | RT5668_PWR_BG);
+		regmap_update_bits(rt5668->regmap, RT5668_PWR_DIG_1,
+			RT5668_DIG_GATE_CTRL | RT5668_PWR_LDO,
+			RT5668_DIG_GATE_CTRL | RT5668_PWR_LDO);
+		break;
+
+	case SND_SOC_BIAS_STANDBY:
+		regmap_update_bits(rt5668->regmap, RT5668_PWR_ANLG_1,
+			RT5668_PWR_MB, RT5668_PWR_MB);
+		regmap_update_bits(rt5668->regmap, RT5668_PWR_DIG_1,
+			RT5668_DIG_GATE_CTRL, RT5668_DIG_GATE_CTRL);
+		break;
+	case SND_SOC_BIAS_OFF:
+		regmap_update_bits(rt5668->regmap, RT5668_PWR_DIG_1,
+			RT5668_DIG_GATE_CTRL | RT5668_PWR_LDO, 0);
+		regmap_update_bits(rt5668->regmap, RT5668_PWR_ANLG_1,
+			RT5668_PWR_MB | RT5668_PWR_BG, 0);
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int rt5668_probe(struct snd_soc_component *component)
+{
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+
+	rt5668->component = component;
+
+	return 0;
+}
+
+static void rt5668_remove(struct snd_soc_component *component)
+{
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+
+	rt5668_reset(rt5668->regmap);
+}
+
+#ifdef CONFIG_PM
+static int rt5668_suspend(struct snd_soc_component *component)
+{
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+
+	regcache_cache_only(rt5668->regmap, true);
+	regcache_mark_dirty(rt5668->regmap);
+	return 0;
+}
+
+static int rt5668_resume(struct snd_soc_component *component)
+{
+	struct rt5668_priv *rt5668 = snd_soc_component_get_drvdata(component);
+
+	regcache_cache_only(rt5668->regmap, false);
+	regcache_sync(rt5668->regmap);
+
+	return 0;
+}
+#else
+#define rt5668_suspend NULL
+#define rt5668_resume NULL
+#endif
+
+#define RT5668_STEREO_RATES SNDRV_PCM_RATE_8000_192000
+#define RT5668_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE | \
+		SNDRV_PCM_FMTBIT_S24_LE | SNDRV_PCM_FMTBIT_S8)
+
+static const struct snd_soc_dai_ops rt5668_aif1_dai_ops = {
+	.hw_params = rt5668_hw_params,
+	.set_fmt = rt5668_set_dai_fmt,
+	.set_tdm_slot = rt5668_set_tdm_slot,
+};
+
+static const struct snd_soc_dai_ops rt5668_aif2_dai_ops = {
+	.hw_params = rt5668_hw_params,
+	.set_fmt = rt5668_set_dai_fmt,
+	.set_bclk_ratio = rt5668_set_bclk_ratio,
+};
+
+static struct snd_soc_dai_driver rt5668_dai[] = {
+	{
+		.name = "rt5668-aif1",
+		.id = RT5668_AIF1,
+		.playback = {
+			.stream_name = "AIF1 Playback",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = RT5668_STEREO_RATES,
+			.formats = RT5668_FORMATS,
+		},
+		.capture = {
+			.stream_name = "AIF1 Capture",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = RT5668_STEREO_RATES,
+			.formats = RT5668_FORMATS,
+		},
+		.ops = &rt5668_aif1_dai_ops,
+	},
+	{
+		.name = "rt5668-aif2",
+		.id = RT5668_AIF2,
+		.capture = {
+			.stream_name = "AIF2 Capture",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = RT5668_STEREO_RATES,
+			.formats = RT5668_FORMATS,
+		},
+		.ops = &rt5668_aif2_dai_ops,
+	},
+};
+
+static const struct snd_soc_component_driver soc_component_dev_rt5668 = {
+	.probe = rt5668_probe,
+	.remove = rt5668_remove,
+	.suspend = rt5668_suspend,
+	.resume = rt5668_resume,
+	.set_bias_level = rt5668_set_bias_level,
+	.controls = rt5668_snd_controls,
+	.num_controls = ARRAY_SIZE(rt5668_snd_controls),
+	.dapm_widgets = rt5668_dapm_widgets,
+	.num_dapm_widgets = ARRAY_SIZE(rt5668_dapm_widgets),
+	.dapm_routes = rt5668_dapm_routes,
+	.num_dapm_routes = ARRAY_SIZE(rt5668_dapm_routes),
+	.set_sysclk = rt5668_set_component_sysclk,
+	.set_pll = rt5668_set_component_pll,
+	.set_jack = rt5668_set_jack_detect,
+	.use_pmdown_time	= 1,
+	.endianness		= 1,
+	.non_legacy_dai_naming	= 1,
+};
+
+static const struct regmap_config rt5668_regmap = {
+	.reg_bits = 16,
+	.val_bits = 16,
+	.max_register = RT5668_I2C_MODE,
+	.volatile_reg = rt5668_volatile_register,
+	.readable_reg = rt5668_readable_register,
+	.cache_type = REGCACHE_RBTREE,
+	.reg_defaults = rt5668_reg,
+	.num_reg_defaults = ARRAY_SIZE(rt5668_reg),
+	.use_single_rw = true,
+};
+
+static const struct i2c_device_id rt5668_i2c_id[] = {
+	{"rt5668b", 0},
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, rt5668_i2c_id);
+
+static int rt5668_parse_dt(struct rt5668_priv *rt5668, struct device *dev)
+{
+
+	of_property_read_u32(dev->of_node, "realtek,dmic1-data-pin",
+		&rt5668->pdata.dmic1_data_pin);
+	of_property_read_u32(dev->of_node, "realtek,dmic1-clk-pin",
+		&rt5668->pdata.dmic1_clk_pin);
+	of_property_read_u32(dev->of_node, "realtek,jd-src",
+		&rt5668->pdata.jd_src);
+
+	rt5668->pdata.ldo1_en = of_get_named_gpio(dev->of_node,
+		"realtek,ldo1-en-gpios", 0);
+
+	return 0;
+}
+
+static void rt5668_calibrate(struct rt5668_priv *rt5668)
+{
+	int value, count;
+
+	mutex_lock(&rt5668->calibrate_mutex);
+
+	rt5668_reset(rt5668->regmap);
+	regmap_write(rt5668->regmap, RT5668_PWR_ANLG_1, 0xa2bf);
+	usleep_range(15000, 20000);
+	regmap_write(rt5668->regmap, RT5668_PWR_ANLG_1, 0xf2bf);
+	regmap_write(rt5668->regmap, RT5668_MICBIAS_2, 0x0380);
+	regmap_write(rt5668->regmap, RT5668_PWR_DIG_1, 0x8001);
+	regmap_write(rt5668->regmap, RT5668_TEST_MODE_CTRL_1, 0x0000);
+	regmap_write(rt5668->regmap, RT5668_STO1_DAC_MIXER, 0x2080);
+	regmap_write(rt5668->regmap, RT5668_STO1_ADC_MIXER, 0x4040);
+	regmap_write(rt5668->regmap, RT5668_DEPOP_1, 0x0069);
+	regmap_write(rt5668->regmap, RT5668_CHOP_DAC, 0x3000);
+	regmap_write(rt5668->regmap, RT5668_HP_CTRL_2, 0x6000);
+	regmap_write(rt5668->regmap, RT5668_HP_CHARGE_PUMP_1, 0x0f26);
+	regmap_write(rt5668->regmap, RT5668_CALIB_ADC_CTRL, 0x7f05);
+	regmap_write(rt5668->regmap, RT5668_STO1_ADC_MIXER, 0x686c);
+	regmap_write(rt5668->regmap, RT5668_CAL_REC, 0x0d0d);
+	regmap_write(rt5668->regmap, RT5668_HP_CALIB_CTRL_9, 0x000f);
+	regmap_write(rt5668->regmap, RT5668_PWR_DIG_1, 0x8d01);
+	regmap_write(rt5668->regmap, RT5668_HP_CALIB_CTRL_2, 0x0321);
+	regmap_write(rt5668->regmap, RT5668_HP_LOGIC_CTRL_2, 0x0004);
+	regmap_write(rt5668->regmap, RT5668_HP_CALIB_CTRL_1, 0x7c00);
+	regmap_write(rt5668->regmap, RT5668_HP_CALIB_CTRL_3, 0x06a1);
+	regmap_write(rt5668->regmap, RT5668_A_DAC1_MUX, 0x0311);
+	regmap_write(rt5668->regmap, RT5668_RESET_HPF_CTRL, 0x0000);
+	regmap_write(rt5668->regmap, RT5668_ADC_STO1_HP_CTRL_1, 0x3320);
+
+	regmap_write(rt5668->regmap, RT5668_HP_CALIB_CTRL_1, 0xfc00);
+
+	for (count = 0; count < 60; count++) {
+		regmap_read(rt5668->regmap, RT5668_HP_CALIB_STA_1, &value);
+		if (!(value & 0x8000))
+			break;
+
+		usleep_range(10000, 10005);
+	}
+
+	if (count >= 60)
+		pr_err("HP Calibration Failure\n");
+
+	/* restore settings */
+	regmap_write(rt5668->regmap, RT5668_STO1_ADC_MIXER, 0xc0c4);
+	regmap_write(rt5668->regmap, RT5668_PWR_DIG_1, 0x0000);
+
+	mutex_unlock(&rt5668->calibrate_mutex);
+
+}
+
+static int rt5668_i2c_probe(struct i2c_client *i2c,
+		    const struct i2c_device_id *id)
+{
+	struct rt5668_platform_data *pdata = dev_get_platdata(&i2c->dev);
+	struct rt5668_priv *rt5668;
+	int i, ret;
+	unsigned int val;
+
+	rt5668 = devm_kzalloc(&i2c->dev, sizeof(struct rt5668_priv),
+		GFP_KERNEL);
+
+	if (rt5668 == NULL)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, rt5668);
+
+	if (pdata)
+		rt5668->pdata = *pdata;
+	else
+		rt5668_parse_dt(rt5668, &i2c->dev);
+
+	rt5668->regmap = devm_regmap_init_i2c(i2c, &rt5668_regmap);
+	if (IS_ERR(rt5668->regmap)) {
+		ret = PTR_ERR(rt5668->regmap);
+		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
+			ret);
+		return ret;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(rt5668->supplies); i++)
+		rt5668->supplies[i].supply = rt5668_supply_names[i];
+
+	ret = devm_regulator_bulk_get(&i2c->dev, ARRAY_SIZE(rt5668->supplies),
+				      rt5668->supplies);
+	if (ret != 0) {
+		dev_err(&i2c->dev, "Failed to request supplies: %d\n", ret);
+		return ret;
+	}
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(rt5668->supplies),
+				    rt5668->supplies);
+	if (ret != 0) {
+		dev_err(&i2c->dev, "Failed to enable supplies: %d\n", ret);
+		return ret;
+	}
+
+	if (gpio_is_valid(rt5668->pdata.ldo1_en)) {
+		if (devm_gpio_request_one(&i2c->dev, rt5668->pdata.ldo1_en,
+					  GPIOF_OUT_INIT_HIGH, "rt5668"))
+			dev_err(&i2c->dev, "Fail gpio_request gpio_ldo\n");
+	}
+
+	/* Sleep for 300 ms miniumum */
+	usleep_range(300000, 350000);
+
+	regmap_write(rt5668->regmap, RT5668_I2C_MODE, 0x1);
+	usleep_range(10000, 15000);
+
+	regmap_read(rt5668->regmap, RT5668_DEVICE_ID, &val);
+	if (val != DEVICE_ID) {
+		pr_err("Device with ID register %x is not rt5668\n", val);
+		return -ENODEV;
+	}
+
+	rt5668_reset(rt5668->regmap);
+
+	rt5668_calibrate(rt5668);
+
+	regmap_write(rt5668->regmap, RT5668_DEPOP_1, 0x0000);
+
+	/* DMIC pin*/
+	if (rt5668->pdata.dmic1_data_pin != RT5668_DMIC1_NULL) {
+		switch (rt5668->pdata.dmic1_data_pin) {
+		case RT5668_DMIC1_DATA_GPIO2: /* share with LRCK2 */
+			regmap_update_bits(rt5668->regmap, RT5668_DMIC_CTRL_1,
+				RT5668_DMIC_1_DP_MASK, RT5668_DMIC_1_DP_GPIO2);
+			regmap_update_bits(rt5668->regmap, RT5668_GPIO_CTRL_1,
+				RT5668_GP2_PIN_MASK, RT5668_GP2_PIN_DMIC_SDA);
+			break;
+
+		case RT5668_DMIC1_DATA_GPIO5: /* share with DACDAT1 */
+			regmap_update_bits(rt5668->regmap, RT5668_DMIC_CTRL_1,
+				RT5668_DMIC_1_DP_MASK, RT5668_DMIC_1_DP_GPIO5);
+			regmap_update_bits(rt5668->regmap, RT5668_GPIO_CTRL_1,
+				RT5668_GP5_PIN_MASK, RT5668_GP5_PIN_DMIC_SDA);
+			break;
+
+		default:
+			dev_dbg(&i2c->dev, "invalid DMIC_DAT pin\n");
+			break;
+		}
+
+		switch (rt5668->pdata.dmic1_clk_pin) {
+		case RT5668_DMIC1_CLK_GPIO1: /* share with IRQ */
+			regmap_update_bits(rt5668->regmap, RT5668_GPIO_CTRL_1,
+				RT5668_GP1_PIN_MASK, RT5668_GP1_PIN_DMIC_CLK);
+			break;
+
+		case RT5668_DMIC1_CLK_GPIO3: /* share with BCLK2 */
+			regmap_update_bits(rt5668->regmap, RT5668_GPIO_CTRL_1,
+				RT5668_GP3_PIN_MASK, RT5668_GP3_PIN_DMIC_CLK);
+			break;
+
+		default:
+			dev_dbg(&i2c->dev, "invalid DMIC_CLK pin\n");
+			break;
+		}
+	}
+
+	regmap_update_bits(rt5668->regmap, RT5668_PWR_ANLG_1,
+			RT5668_LDO1_DVO_MASK | RT5668_HP_DRIVER_MASK,
+			RT5668_LDO1_DVO_14 | RT5668_HP_DRIVER_5X);
+	regmap_write(rt5668->regmap, RT5668_MICBIAS_2, 0x0380);
+	regmap_update_bits(rt5668->regmap, RT5668_GPIO_CTRL_1,
+			RT5668_GP4_PIN_MASK | RT5668_GP5_PIN_MASK,
+			RT5668_GP4_PIN_ADCDAT1 | RT5668_GP5_PIN_DACDAT1);
+	regmap_write(rt5668->regmap, RT5668_TEST_MODE_CTRL_1, 0x0000);
+
+	INIT_DELAYED_WORK(&rt5668->jack_detect_work,
+				rt5668_jack_detect_handler);
+	INIT_DELAYED_WORK(&rt5668->jd_check_work,
+				rt5668_jd_check_handler);
+
+	mutex_init(&rt5668->calibrate_mutex);
+
+	if (i2c->irq) {
+		ret = devm_request_threaded_irq(&i2c->dev, i2c->irq, NULL,
+			rt5668_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING
+			| IRQF_ONESHOT, "rt5668", rt5668);
+		if (ret)
+			dev_err(&i2c->dev, "Failed to reguest IRQ: %d\n", ret);
+
+	}
+
+	return snd_soc_register_component(&i2c->dev, &soc_component_dev_rt5668,
+			rt5668_dai, ARRAY_SIZE(rt5668_dai));
+}
+
+static int rt5668_i2c_remove(struct i2c_client *i2c)
+{
+	snd_soc_unregister_component(&i2c->dev);
+
+	return 0;
+}
+
+static void rt5668_i2c_shutdown(struct i2c_client *client)
+{
+	struct rt5668_priv *rt5668 = i2c_get_clientdata(client);
+
+	rt5668_reset(rt5668->regmap);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id rt5668_of_match[] = {
+	{.compatible = "realtek,rt5668b"},
+	{},
+};
+MODULE_DEVICE_TABLE(of, rt5668_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id rt5668_acpi_match[] = {
+	{"10EC5668", 0,},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, rt5668_acpi_match);
+#endif
+
+static struct i2c_driver rt5668_i2c_driver = {
+	.driver = {
+		.name = "rt5668b",
+		.of_match_table = of_match_ptr(rt5668_of_match),
+		.acpi_match_table = ACPI_PTR(rt5668_acpi_match),
+	},
+	.probe = rt5668_i2c_probe,
+	.remove = rt5668_i2c_remove,
+	.shutdown = rt5668_i2c_shutdown,
+	.id_table = rt5668_i2c_id,
+};
+module_i2c_driver(rt5668_i2c_driver);
+
+MODULE_DESCRIPTION("ASoC RT5668B driver");
+MODULE_AUTHOR("Bard Liao <bardliao@realtek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/codecs/rt5668.h b/sound/soc/codecs/rt5668.h
new file mode 100644
index 0000000..3e7bcfd
--- /dev/null
+++ b/sound/soc/codecs/rt5668.h
@@ -0,0 +1,1318 @@
+/*
+ * rt5668.h  --  RT5668/RT5658 ALSA SoC audio driver
+ *
+ * Copyright 2018 Realtek Microelectronics
+ * Author: Bard Liao <bardliao@realtek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __RT5668_H__
+#define __RT5668_H__
+
+#include <sound/rt5668.h>
+
+#define DEVICE_ID 0x6530
+
+/* Info */
+#define RT5668_RESET				0x0000
+#define RT5668_VERSION_ID			0x00fd
+#define RT5668_VENDOR_ID			0x00fe
+#define RT5668_DEVICE_ID			0x00ff
+/*  I/O - Output */
+#define RT5668_HP_CTRL_1			0x0002
+#define RT5668_HP_CTRL_2			0x0003
+#define RT5668_HPL_GAIN				0x0005
+#define RT5668_HPR_GAIN				0x0006
+
+#define RT5668_I2C_CTRL				0x0008
+
+/* I/O - Input */
+#define RT5668_CBJ_BST_CTRL			0x000b
+#define RT5668_CBJ_CTRL_1			0x0010
+#define RT5668_CBJ_CTRL_2			0x0011
+#define RT5668_CBJ_CTRL_3			0x0012
+#define RT5668_CBJ_CTRL_4			0x0013
+#define RT5668_CBJ_CTRL_5			0x0014
+#define RT5668_CBJ_CTRL_6			0x0015
+#define RT5668_CBJ_CTRL_7			0x0016
+/* I/O - ADC/DAC/DMIC */
+#define RT5668_DAC1_DIG_VOL			0x0019
+#define RT5668_STO1_ADC_DIG_VOL			0x001c
+#define RT5668_STO1_ADC_BOOST			0x001f
+#define RT5668_HP_IMP_GAIN_1			0x0022
+#define RT5668_HP_IMP_GAIN_2			0x0023
+/* Mixer - D-D */
+#define RT5668_SIDETONE_CTRL			0x0024
+#define RT5668_STO1_ADC_MIXER			0x0026
+#define RT5668_AD_DA_MIXER			0x0029
+#define RT5668_STO1_DAC_MIXER			0x002a
+#define RT5668_A_DAC1_MUX			0x002b
+#define RT5668_DIG_INF2_DATA			0x0030
+/* Mixer - ADC */
+#define RT5668_REC_MIXER			0x003c
+#define RT5668_CAL_REC				0x0044
+#define RT5668_ALC_BACK_GAIN			0x0049
+/* Power */
+#define RT5668_PWR_DIG_1			0x0061
+#define RT5668_PWR_DIG_2			0x0062
+#define RT5668_PWR_ANLG_1			0x0063
+#define RT5668_PWR_ANLG_2			0x0064
+#define RT5668_PWR_ANLG_3			0x0065
+#define RT5668_PWR_MIXER			0x0066
+#define RT5668_PWR_VOL				0x0067
+/* Clock Detect */
+#define RT5668_CLK_DET				0x006b
+/* Filter Auto Reset */
+#define RT5668_RESET_LPF_CTRL			0x006c
+#define RT5668_RESET_HPF_CTRL			0x006d
+/* DMIC */
+#define RT5668_DMIC_CTRL_1			0x006e
+/* Format - ADC/DAC */
+#define RT5668_I2S1_SDP				0x0070
+#define RT5668_I2S2_SDP				0x0071
+#define RT5668_ADDA_CLK_1			0x0073
+#define RT5668_ADDA_CLK_2			0x0074
+#define RT5668_I2S1_F_DIV_CTRL_1		0x0075
+#define RT5668_I2S1_F_DIV_CTRL_2		0x0076
+/* Format - TDM Control */
+#define RT5668_TDM_CTRL				0x0079
+#define RT5668_TDM_ADDA_CTRL_1			0x007a
+#define RT5668_TDM_ADDA_CTRL_2			0x007b
+#define RT5668_DATA_SEL_CTRL_1			0x007c
+#define RT5668_TDM_TCON_CTRL			0x007e
+/* Function - Analog */
+#define RT5668_GLB_CLK				0x0080
+#define RT5668_PLL_CTRL_1			0x0081
+#define RT5668_PLL_CTRL_2			0x0082
+#define RT5668_PLL_TRACK_1			0x0083
+#define RT5668_PLL_TRACK_2			0x0084
+#define RT5668_PLL_TRACK_3			0x0085
+#define RT5668_PLL_TRACK_4			0x0086
+#define RT5668_PLL_TRACK_5			0x0087
+#define RT5668_PLL_TRACK_6			0x0088
+#define RT5668_PLL_TRACK_11			0x008c
+#define RT5668_SDW_REF_CLK			0x008d
+#define RT5668_DEPOP_1				0x008e
+#define RT5668_DEPOP_2				0x008f
+#define RT5668_HP_CHARGE_PUMP_1			0x0091
+#define RT5668_HP_CHARGE_PUMP_2			0x0092
+#define RT5668_MICBIAS_1			0x0093
+#define RT5668_MICBIAS_2			0x0094
+#define RT5668_PLL_TRACK_12			0x0098
+#define RT5668_PLL_TRACK_14			0x009a
+#define RT5668_PLL2_CTRL_1			0x009b
+#define RT5668_PLL2_CTRL_2			0x009c
+#define RT5668_PLL2_CTRL_3			0x009d
+#define RT5668_PLL2_CTRL_4			0x009e
+#define RT5668_RC_CLK_CTRL			0x009f
+#define RT5668_I2S_M_CLK_CTRL_1			0x00a0
+#define RT5668_I2S2_F_DIV_CTRL_1		0x00a3
+#define RT5668_I2S2_F_DIV_CTRL_2		0x00a4
+/* Function - Digital */
+#define RT5668_EQ_CTRL_1			0x00ae
+#define RT5668_EQ_CTRL_2			0x00af
+#define RT5668_IRQ_CTRL_1			0x00b6
+#define RT5668_IRQ_CTRL_2			0x00b7
+#define RT5668_IRQ_CTRL_3			0x00b8
+#define RT5668_IRQ_CTRL_4			0x00b9
+#define RT5668_INT_ST_1				0x00be
+#define RT5668_GPIO_CTRL_1			0x00c0
+#define RT5668_GPIO_CTRL_2			0x00c1
+#define RT5668_GPIO_CTRL_3			0x00c2
+#define RT5668_HP_AMP_DET_CTRL_1		0x00d0
+#define RT5668_HP_AMP_DET_CTRL_2		0x00d1
+#define RT5668_MID_HP_AMP_DET			0x00d2
+#define RT5668_LOW_HP_AMP_DET			0x00d3
+#define RT5668_DELAY_BUF_CTRL			0x00d4
+#define RT5668_SV_ZCD_1				0x00d9
+#define RT5668_SV_ZCD_2				0x00da
+#define RT5668_IL_CMD_1				0x00db
+#define RT5668_IL_CMD_2				0x00dc
+#define RT5668_IL_CMD_3				0x00dd
+#define RT5668_IL_CMD_4				0x00de
+#define RT5668_IL_CMD_5				0x00df
+#define RT5668_IL_CMD_6				0x00e0
+#define RT5668_4BTN_IL_CMD_1			0x00e2
+#define RT5668_4BTN_IL_CMD_2			0x00e3
+#define RT5668_4BTN_IL_CMD_3			0x00e4
+#define RT5668_4BTN_IL_CMD_4			0x00e5
+#define RT5668_4BTN_IL_CMD_5			0x00e6
+#define RT5668_4BTN_IL_CMD_6			0x00e7
+#define RT5668_4BTN_IL_CMD_7			0x00e8
+
+#define RT5668_ADC_STO1_HP_CTRL_1		0x00ea
+#define RT5668_ADC_STO1_HP_CTRL_2		0x00eb
+#define RT5668_AJD1_CTRL			0x00f0
+#define RT5668_JD1_THD				0x00f1
+#define RT5668_JD2_THD				0x00f2
+#define RT5668_JD_CTRL_1			0x00f6
+/* General Control */
+#define RT5668_DUMMY_1				0x00fa
+#define RT5668_DUMMY_2				0x00fb
+#define RT5668_DUMMY_3				0x00fc
+
+#define RT5668_DAC_ADC_DIG_VOL1			0x0100
+#define RT5668_BIAS_CUR_CTRL_2			0x010b
+#define RT5668_BIAS_CUR_CTRL_3			0x010c
+#define RT5668_BIAS_CUR_CTRL_4			0x010d
+#define RT5668_BIAS_CUR_CTRL_5			0x010e
+#define RT5668_BIAS_CUR_CTRL_6			0x010f
+#define RT5668_BIAS_CUR_CTRL_7			0x0110
+#define RT5668_BIAS_CUR_CTRL_8			0x0111
+#define RT5668_BIAS_CUR_CTRL_9			0x0112
+#define RT5668_BIAS_CUR_CTRL_10			0x0113
+#define RT5668_VREF_REC_OP_FB_CAP_CTRL		0x0117
+#define RT5668_CHARGE_PUMP_1			0x0125
+#define RT5668_DIG_IN_CTRL_1			0x0132
+#define RT5668_PAD_DRIVING_CTRL			0x0136
+#define RT5668_SOFT_RAMP_DEPOP			0x0138
+#define RT5668_CHOP_DAC				0x013a
+#define RT5668_CHOP_ADC				0x013b
+#define RT5668_CALIB_ADC_CTRL			0x013c
+#define RT5668_VOL_TEST				0x013f
+#define RT5668_SPKVDD_DET_STA			0x0142
+#define RT5668_TEST_MODE_CTRL_1			0x0145
+#define RT5668_TEST_MODE_CTRL_2			0x0146
+#define RT5668_TEST_MODE_CTRL_3			0x0147
+#define RT5668_TEST_MODE_CTRL_4			0x0148
+#define RT5668_TEST_MODE_CTRL_5			0x0149
+#define RT5668_PLL1_INTERNAL			0x0150
+#define RT5668_PLL2_INTERNAL			0x0151
+#define RT5668_STO_NG2_CTRL_1			0x0160
+#define RT5668_STO_NG2_CTRL_2			0x0161
+#define RT5668_STO_NG2_CTRL_3			0x0162
+#define RT5668_STO_NG2_CTRL_4			0x0163
+#define RT5668_STO_NG2_CTRL_5			0x0164
+#define RT5668_STO_NG2_CTRL_6			0x0165
+#define RT5668_STO_NG2_CTRL_7			0x0166
+#define RT5668_STO_NG2_CTRL_8			0x0167
+#define RT5668_STO_NG2_CTRL_9			0x0168
+#define RT5668_STO_NG2_CTRL_10			0x0169
+#define RT5668_STO1_DAC_SIL_DET			0x0190
+#define RT5668_SIL_PSV_CTRL1			0x0194
+#define RT5668_SIL_PSV_CTRL2			0x0195
+#define RT5668_SIL_PSV_CTRL3			0x0197
+#define RT5668_SIL_PSV_CTRL4			0x0198
+#define RT5668_SIL_PSV_CTRL5			0x0199
+#define RT5668_HP_IMP_SENS_CTRL_01		0x01af
+#define RT5668_HP_IMP_SENS_CTRL_02		0x01b0
+#define RT5668_HP_IMP_SENS_CTRL_03		0x01b1
+#define RT5668_HP_IMP_SENS_CTRL_04		0x01b2
+#define RT5668_HP_IMP_SENS_CTRL_05		0x01b3
+#define RT5668_HP_IMP_SENS_CTRL_06		0x01b4
+#define RT5668_HP_IMP_SENS_CTRL_07		0x01b5
+#define RT5668_HP_IMP_SENS_CTRL_08		0x01b6
+#define RT5668_HP_IMP_SENS_CTRL_09		0x01b7
+#define RT5668_HP_IMP_SENS_CTRL_10		0x01b8
+#define RT5668_HP_IMP_SENS_CTRL_11		0x01b9
+#define RT5668_HP_IMP_SENS_CTRL_12		0x01ba
+#define RT5668_HP_IMP_SENS_CTRL_13		0x01bb
+#define RT5668_HP_IMP_SENS_CTRL_14		0x01bc
+#define RT5668_HP_IMP_SENS_CTRL_15		0x01bd
+#define RT5668_HP_IMP_SENS_CTRL_16		0x01be
+#define RT5668_HP_IMP_SENS_CTRL_17		0x01bf
+#define RT5668_HP_IMP_SENS_CTRL_18		0x01c0
+#define RT5668_HP_IMP_SENS_CTRL_19		0x01c1
+#define RT5668_HP_IMP_SENS_CTRL_20		0x01c2
+#define RT5668_HP_IMP_SENS_CTRL_21		0x01c3
+#define RT5668_HP_IMP_SENS_CTRL_22		0x01c4
+#define RT5668_HP_IMP_SENS_CTRL_23		0x01c5
+#define RT5668_HP_IMP_SENS_CTRL_24		0x01c6
+#define RT5668_HP_IMP_SENS_CTRL_25		0x01c7
+#define RT5668_HP_IMP_SENS_CTRL_26		0x01c8
+#define RT5668_HP_IMP_SENS_CTRL_27		0x01c9
+#define RT5668_HP_IMP_SENS_CTRL_28		0x01ca
+#define RT5668_HP_IMP_SENS_CTRL_29		0x01cb
+#define RT5668_HP_IMP_SENS_CTRL_30		0x01cc
+#define RT5668_HP_IMP_SENS_CTRL_31		0x01cd
+#define RT5668_HP_IMP_SENS_CTRL_32		0x01ce
+#define RT5668_HP_IMP_SENS_CTRL_33		0x01cf
+#define RT5668_HP_IMP_SENS_CTRL_34		0x01d0
+#define RT5668_HP_IMP_SENS_CTRL_35		0x01d1
+#define RT5668_HP_IMP_SENS_CTRL_36		0x01d2
+#define RT5668_HP_IMP_SENS_CTRL_37		0x01d3
+#define RT5668_HP_IMP_SENS_CTRL_38		0x01d4
+#define RT5668_HP_IMP_SENS_CTRL_39		0x01d5
+#define RT5668_HP_IMP_SENS_CTRL_40		0x01d6
+#define RT5668_HP_IMP_SENS_CTRL_41		0x01d7
+#define RT5668_HP_IMP_SENS_CTRL_42		0x01d8
+#define RT5668_HP_IMP_SENS_CTRL_43		0x01d9
+#define RT5668_HP_LOGIC_CTRL_1			0x01da
+#define RT5668_HP_LOGIC_CTRL_2			0x01db
+#define RT5668_HP_LOGIC_CTRL_3			0x01dc
+#define RT5668_HP_CALIB_CTRL_1			0x01de
+#define RT5668_HP_CALIB_CTRL_2			0x01df
+#define RT5668_HP_CALIB_CTRL_3			0x01e0
+#define RT5668_HP_CALIB_CTRL_4			0x01e1
+#define RT5668_HP_CALIB_CTRL_5			0x01e2
+#define RT5668_HP_CALIB_CTRL_6			0x01e3
+#define RT5668_HP_CALIB_CTRL_7			0x01e4
+#define RT5668_HP_CALIB_CTRL_9			0x01e6
+#define RT5668_HP_CALIB_CTRL_10			0x01e7
+#define RT5668_HP_CALIB_CTRL_11			0x01e8
+#define RT5668_HP_CALIB_STA_1			0x01ea
+#define RT5668_HP_CALIB_STA_2			0x01eb
+#define RT5668_HP_CALIB_STA_3			0x01ec
+#define RT5668_HP_CALIB_STA_4			0x01ed
+#define RT5668_HP_CALIB_STA_5			0x01ee
+#define RT5668_HP_CALIB_STA_6			0x01ef
+#define RT5668_HP_CALIB_STA_7			0x01f0
+#define RT5668_HP_CALIB_STA_8			0x01f1
+#define RT5668_HP_CALIB_STA_9			0x01f2
+#define RT5668_HP_CALIB_STA_10			0x01f3
+#define RT5668_HP_CALIB_STA_11			0x01f4
+#define RT5668_SAR_IL_CMD_1			0x0210
+#define RT5668_SAR_IL_CMD_2			0x0211
+#define RT5668_SAR_IL_CMD_3			0x0212
+#define RT5668_SAR_IL_CMD_4			0x0213
+#define RT5668_SAR_IL_CMD_5			0x0214
+#define RT5668_SAR_IL_CMD_6			0x0215
+#define RT5668_SAR_IL_CMD_7			0x0216
+#define RT5668_SAR_IL_CMD_8			0x0217
+#define RT5668_SAR_IL_CMD_9			0x0218
+#define RT5668_SAR_IL_CMD_10			0x0219
+#define RT5668_SAR_IL_CMD_11			0x021a
+#define RT5668_SAR_IL_CMD_12			0x021b
+#define RT5668_SAR_IL_CMD_13			0x021c
+#define RT5668_EFUSE_CTRL_1			0x0250
+#define RT5668_EFUSE_CTRL_2			0x0251
+#define RT5668_EFUSE_CTRL_3			0x0252
+#define RT5668_EFUSE_CTRL_4			0x0253
+#define RT5668_EFUSE_CTRL_5			0x0254
+#define RT5668_EFUSE_CTRL_6			0x0255
+#define RT5668_EFUSE_CTRL_7			0x0256
+#define RT5668_EFUSE_CTRL_8			0x0257
+#define RT5668_EFUSE_CTRL_9			0x0258
+#define RT5668_EFUSE_CTRL_10			0x0259
+#define RT5668_EFUSE_CTRL_11			0x025a
+#define RT5668_JD_TOP_VC_VTRL			0x0270
+#define RT5668_DRC1_CTRL_0			0x02ff
+#define RT5668_DRC1_CTRL_1			0x0300
+#define RT5668_DRC1_CTRL_2			0x0301
+#define RT5668_DRC1_CTRL_3			0x0302
+#define RT5668_DRC1_CTRL_4			0x0303
+#define RT5668_DRC1_CTRL_5			0x0304
+#define RT5668_DRC1_CTRL_6			0x0305
+#define RT5668_DRC1_HARD_LMT_CTRL_1		0x0306
+#define RT5668_DRC1_HARD_LMT_CTRL_2		0x0307
+#define RT5668_DRC1_PRIV_1			0x0310
+#define RT5668_DRC1_PRIV_2			0x0311
+#define RT5668_DRC1_PRIV_3			0x0312
+#define RT5668_DRC1_PRIV_4			0x0313
+#define RT5668_DRC1_PRIV_5			0x0314
+#define RT5668_DRC1_PRIV_6			0x0315
+#define RT5668_DRC1_PRIV_7			0x0316
+#define RT5668_DRC1_PRIV_8			0x0317
+#define RT5668_EQ_AUTO_RCV_CTRL1		0x03c0
+#define RT5668_EQ_AUTO_RCV_CTRL2		0x03c1
+#define RT5668_EQ_AUTO_RCV_CTRL3		0x03c2
+#define RT5668_EQ_AUTO_RCV_CTRL4		0x03c3
+#define RT5668_EQ_AUTO_RCV_CTRL5		0x03c4
+#define RT5668_EQ_AUTO_RCV_CTRL6		0x03c5
+#define RT5668_EQ_AUTO_RCV_CTRL7		0x03c6
+#define RT5668_EQ_AUTO_RCV_CTRL8		0x03c7
+#define RT5668_EQ_AUTO_RCV_CTRL9		0x03c8
+#define RT5668_EQ_AUTO_RCV_CTRL10		0x03c9
+#define RT5668_EQ_AUTO_RCV_CTRL11		0x03ca
+#define RT5668_EQ_AUTO_RCV_CTRL12		0x03cb
+#define RT5668_EQ_AUTO_RCV_CTRL13		0x03cc
+#define RT5668_ADC_L_EQ_LPF1_A1			0x03d0
+#define RT5668_R_EQ_LPF1_A1			0x03d1
+#define RT5668_L_EQ_LPF1_H0			0x03d2
+#define RT5668_R_EQ_LPF1_H0			0x03d3
+#define RT5668_L_EQ_BPF1_A1			0x03d4
+#define RT5668_R_EQ_BPF1_A1			0x03d5
+#define RT5668_L_EQ_BPF1_A2			0x03d6
+#define RT5668_R_EQ_BPF1_A2			0x03d7
+#define RT5668_L_EQ_BPF1_H0			0x03d8
+#define RT5668_R_EQ_BPF1_H0			0x03d9
+#define RT5668_L_EQ_BPF2_A1			0x03da
+#define RT5668_R_EQ_BPF2_A1			0x03db
+#define RT5668_L_EQ_BPF2_A2			0x03dc
+#define RT5668_R_EQ_BPF2_A2			0x03dd
+#define RT5668_L_EQ_BPF2_H0			0x03de
+#define RT5668_R_EQ_BPF2_H0			0x03df
+#define RT5668_L_EQ_BPF3_A1			0x03e0
+#define RT5668_R_EQ_BPF3_A1			0x03e1
+#define RT5668_L_EQ_BPF3_A2			0x03e2
+#define RT5668_R_EQ_BPF3_A2			0x03e3
+#define RT5668_L_EQ_BPF3_H0			0x03e4
+#define RT5668_R_EQ_BPF3_H0			0x03e5
+#define RT5668_L_EQ_BPF4_A1			0x03e6
+#define RT5668_R_EQ_BPF4_A1			0x03e7
+#define RT5668_L_EQ_BPF4_A2			0x03e8
+#define RT5668_R_EQ_BPF4_A2			0x03e9
+#define RT5668_L_EQ_BPF4_H0			0x03ea
+#define RT5668_R_EQ_BPF4_H0			0x03eb
+#define RT5668_L_EQ_HPF1_A1			0x03ec
+#define RT5668_R_EQ_HPF1_A1			0x03ed
+#define RT5668_L_EQ_HPF1_H0			0x03ee
+#define RT5668_R_EQ_HPF1_H0			0x03ef
+#define RT5668_L_EQ_PRE_VOL			0x03f0
+#define RT5668_R_EQ_PRE_VOL			0x03f1
+#define RT5668_L_EQ_POST_VOL			0x03f2
+#define RT5668_R_EQ_POST_VOL			0x03f3
+#define RT5668_I2C_MODE				0xffff
+
+
+/* global definition */
+#define RT5668_L_MUTE				(0x1 << 15)
+#define RT5668_L_MUTE_SFT			15
+#define RT5668_VOL_L_MUTE			(0x1 << 14)
+#define RT5668_VOL_L_SFT			14
+#define RT5668_R_MUTE				(0x1 << 7)
+#define RT5668_R_MUTE_SFT			7
+#define RT5668_VOL_R_MUTE			(0x1 << 6)
+#define RT5668_VOL_R_SFT			6
+#define RT5668_L_VOL_MASK			(0x3f << 8)
+#define RT5668_L_VOL_SFT			8
+#define RT5668_R_VOL_MASK			(0x3f)
+#define RT5668_R_VOL_SFT			0
+
+/*Headphone Amp L/R Analog Gain and Digital NG2 Gain Control (0x0005 0x0006)*/
+#define RT5668_G_HP				(0xf << 8)
+#define RT5668_G_HP_SFT				8
+#define RT5668_G_STO_DA_DMIX			(0xf)
+#define RT5668_G_STO_DA_SFT			0
+
+/* CBJ Control (0x000b) */
+#define RT5668_BST_CBJ_MASK			(0xf << 8)
+#define RT5668_BST_CBJ_SFT			8
+
+/* Embeeded Jack and Type Detection Control 1 (0x0010) */
+#define RT5668_EMB_JD_EN			(0x1 << 15)
+#define RT5668_EMB_JD_EN_SFT			15
+#define RT5668_EMB_JD_RST			(0x1 << 14)
+#define RT5668_JD_MODE				(0x1 << 13)
+#define RT5668_JD_MODE_SFT			13
+#define RT5668_DET_TYPE				(0x1 << 12)
+#define RT5668_DET_TYPE_SFT			12
+#define RT5668_POLA_EXT_JD_MASK			(0x1 << 11)
+#define RT5668_POLA_EXT_JD_LOW			(0x1 << 11)
+#define RT5668_POLA_EXT_JD_HIGH			(0x0 << 11)
+#define RT5668_EXT_JD_DIG			(0x1 << 9)
+#define RT5668_POL_FAST_OFF_MASK		(0x1 << 8)
+#define RT5668_POL_FAST_OFF_HIGH		(0x1 << 8)
+#define RT5668_POL_FAST_OFF_LOW			(0x0 << 8)
+#define RT5668_FAST_OFF_MASK			(0x1 << 7)
+#define RT5668_FAST_OFF_EN			(0x1 << 7)
+#define RT5668_FAST_OFF_DIS			(0x0 << 7)
+#define RT5668_VREF_POW_MASK			(0x1 << 6)
+#define RT5668_VREF_POW_FSM			(0x0 << 6)
+#define RT5668_VREF_POW_REG			(0x1 << 6)
+#define RT5668_MB1_PATH_MASK			(0x1 << 5)
+#define RT5668_CTRL_MB1_REG			(0x1 << 5)
+#define RT5668_CTRL_MB1_FSM			(0x0 << 5)
+#define RT5668_MB2_PATH_MASK			(0x1 << 4)
+#define RT5668_CTRL_MB2_REG			(0x1 << 4)
+#define RT5668_CTRL_MB2_FSM			(0x0 << 4)
+#define RT5668_TRIG_JD_MASK			(0x1 << 3)
+#define RT5668_TRIG_JD_HIGH			(0x1 << 3)
+#define RT5668_TRIG_JD_LOW			(0x0 << 3)
+#define RT5668_MIC_CAP_MASK			(0x1 << 1)
+#define RT5668_MIC_CAP_HS			(0x1 << 1)
+#define RT5668_MIC_CAP_HP			(0x0 << 1)
+#define RT5668_MIC_CAP_SRC_MASK			(0x1)
+#define RT5668_MIC_CAP_SRC_REG			(0x1)
+#define RT5668_MIC_CAP_SRC_ANA			(0x0)
+
+/* Embeeded Jack and Type Detection Control 2 (0x0011) */
+#define RT5668_EXT_JD_SRC			(0x7 << 4)
+#define RT5668_EXT_JD_SRC_SFT			4
+#define RT5668_EXT_JD_SRC_GPIO_JD1		(0x0 << 4)
+#define RT5668_EXT_JD_SRC_GPIO_JD2		(0x1 << 4)
+#define RT5668_EXT_JD_SRC_JDH			(0x2 << 4)
+#define RT5668_EXT_JD_SRC_JDL			(0x3 << 4)
+#define RT5668_EXT_JD_SRC_MANUAL		(0x4 << 4)
+#define RT5668_JACK_TYPE_MASK			(0x3)
+
+/* Combo Jack and Type Detection Control 3 (0x0012) */
+#define RT5668_CBJ_IN_BUF_EN			(0x1 << 7)
+
+/* Combo Jack and Type Detection Control 4 (0x0013) */
+#define RT5668_SEL_SHT_MID_TON_MASK		(0x3 << 12)
+#define RT5668_SEL_SHT_MID_TON_2		(0x0 << 12)
+#define RT5668_SEL_SHT_MID_TON_3		(0x1 << 12)
+#define RT5668_CBJ_JD_TEST_MASK			(0x1 << 6)
+#define RT5668_CBJ_JD_TEST_NORM			(0x0 << 6)
+#define RT5668_CBJ_JD_TEST_MODE			(0x1 << 6)
+
+/* DAC1 Digital Volume (0x0019) */
+#define RT5668_DAC_L1_VOL_MASK			(0xff << 8)
+#define RT5668_DAC_L1_VOL_SFT			8
+#define RT5668_DAC_R1_VOL_MASK			(0xff)
+#define RT5668_DAC_R1_VOL_SFT			0
+
+/* ADC Digital Volume Control (0x001c) */
+#define RT5668_ADC_L_VOL_MASK			(0x7f << 8)
+#define RT5668_ADC_L_VOL_SFT			8
+#define RT5668_ADC_R_VOL_MASK			(0x7f)
+#define RT5668_ADC_R_VOL_SFT			0
+
+/* Stereo1 ADC Boost Gain Control (0x001f) */
+#define RT5668_STO1_ADC_L_BST_MASK		(0x3 << 14)
+#define RT5668_STO1_ADC_L_BST_SFT		14
+#define RT5668_STO1_ADC_R_BST_MASK		(0x3 << 12)
+#define RT5668_STO1_ADC_R_BST_SFT		12
+
+/* Sidetone Control (0x0024) */
+#define RT5668_ST_SRC_SEL			(0x1 << 8)
+#define RT5668_ST_SRC_SFT			8
+#define RT5668_ST_EN_MASK			(0x1 << 6)
+#define RT5668_ST_DIS				(0x0 << 6)
+#define RT5668_ST_EN				(0x1 << 6)
+#define RT5668_ST_EN_SFT			6
+
+/* Stereo1 ADC Mixer Control (0x0026) */
+#define RT5668_M_STO1_ADC_L1			(0x1 << 15)
+#define RT5668_M_STO1_ADC_L1_SFT		15
+#define RT5668_M_STO1_ADC_L2			(0x1 << 14)
+#define RT5668_M_STO1_ADC_L2_SFT		14
+#define RT5668_STO1_ADC1L_SRC_MASK		(0x1 << 13)
+#define RT5668_STO1_ADC1L_SRC_SFT		13
+#define RT5668_STO1_ADC1_SRC_ADC		(0x1 << 13)
+#define RT5668_STO1_ADC1_SRC_DACMIX		(0x0 << 13)
+#define RT5668_STO1_ADC2L_SRC_MASK		(0x1 << 12)
+#define RT5668_STO1_ADC2L_SRC_SFT		12
+#define RT5668_STO1_ADCL_SRC_MASK		(0x3 << 10)
+#define RT5668_STO1_ADCL_SRC_SFT		10
+#define RT5668_STO1_DD_L_SRC_MASK		(0x1 << 9)
+#define RT5668_STO1_DD_L_SRC_SFT		9
+#define RT5668_STO1_DMIC_SRC_MASK		(0x1 << 8)
+#define RT5668_STO1_DMIC_SRC_SFT		8
+#define RT5668_STO1_DMIC_SRC_DMIC2		(0x1 << 8)
+#define RT5668_STO1_DMIC_SRC_DMIC1		(0x0 << 8)
+#define RT5668_M_STO1_ADC_R1			(0x1 << 7)
+#define RT5668_M_STO1_ADC_R1_SFT		7
+#define RT5668_M_STO1_ADC_R2			(0x1 << 6)
+#define RT5668_M_STO1_ADC_R2_SFT		6
+#define RT5668_STO1_ADC1R_SRC_MASK		(0x1 << 5)
+#define RT5668_STO1_ADC1R_SRC_SFT		5
+#define RT5668_STO1_ADC2R_SRC_MASK		(0x1 << 4)
+#define RT5668_STO1_ADC2R_SRC_SFT		4
+#define RT5668_STO1_ADCR_SRC_MASK		(0x3 << 2)
+#define RT5668_STO1_ADCR_SRC_SFT		2
+
+/* ADC Mixer to DAC Mixer Control (0x0029) */
+#define RT5668_M_ADCMIX_L			(0x1 << 15)
+#define RT5668_M_ADCMIX_L_SFT			15
+#define RT5668_M_DAC1_L				(0x1 << 14)
+#define RT5668_M_DAC1_L_SFT			14
+#define RT5668_DAC1_R_SEL_MASK			(0x1 << 10)
+#define RT5668_DAC1_R_SEL_SFT			10
+#define RT5668_DAC1_L_SEL_MASK			(0x1 << 8)
+#define RT5668_DAC1_L_SEL_SFT			8
+#define RT5668_M_ADCMIX_R			(0x1 << 7)
+#define RT5668_M_ADCMIX_R_SFT			7
+#define RT5668_M_DAC1_R				(0x1 << 6)
+#define RT5668_M_DAC1_R_SFT			6
+
+/* Stereo1 DAC Mixer Control (0x002a) */
+#define RT5668_M_DAC_L1_STO_L			(0x1 << 15)
+#define RT5668_M_DAC_L1_STO_L_SFT		15
+#define RT5668_G_DAC_L1_STO_L_MASK		(0x1 << 14)
+#define RT5668_G_DAC_L1_STO_L_SFT		14
+#define RT5668_M_DAC_R1_STO_L			(0x1 << 13)
+#define RT5668_M_DAC_R1_STO_L_SFT		13
+#define RT5668_G_DAC_R1_STO_L_MASK		(0x1 << 12)
+#define RT5668_G_DAC_R1_STO_L_SFT		12
+#define RT5668_M_DAC_L1_STO_R			(0x1 << 7)
+#define RT5668_M_DAC_L1_STO_R_SFT		7
+#define RT5668_G_DAC_L1_STO_R_MASK		(0x1 << 6)
+#define RT5668_G_DAC_L1_STO_R_SFT		6
+#define RT5668_M_DAC_R1_STO_R			(0x1 << 5)
+#define RT5668_M_DAC_R1_STO_R_SFT		5
+#define RT5668_G_DAC_R1_STO_R_MASK		(0x1 << 4)
+#define RT5668_G_DAC_R1_STO_R_SFT		4
+
+/* Analog DAC1 Input Source Control (0x002b) */
+#define RT5668_M_ST_STO_L			(0x1 << 9)
+#define RT5668_M_ST_STO_L_SFT			9
+#define RT5668_M_ST_STO_R			(0x1 << 8)
+#define RT5668_M_ST_STO_R_SFT			8
+#define RT5668_DAC_L1_SRC_MASK			(0x3 << 4)
+#define RT5668_A_DACL1_SFT			4
+#define RT5668_DAC_R1_SRC_MASK			(0x3)
+#define RT5668_A_DACR1_SFT			0
+
+/* Digital Interface Data Control (0x0030) */
+#define RT5668_IF2_ADC_SEL_MASK			(0x3 << 0)
+#define RT5668_IF2_ADC_SEL_SFT			0
+
+/* REC Left Mixer Control 2 (0x003c) */
+#define RT5668_G_CBJ_RM1_L			(0x7 << 10)
+#define RT5668_G_CBJ_RM1_L_SFT			10
+#define RT5668_M_CBJ_RM1_L			(0x1 << 7)
+#define RT5668_M_CBJ_RM1_L_SFT			7
+
+/* Power Management for Digital 1 (0x0061) */
+#define RT5668_PWR_I2S1				(0x1 << 15)
+#define RT5668_PWR_I2S1_BIT			15
+#define RT5668_PWR_I2S2				(0x1 << 14)
+#define RT5668_PWR_I2S2_BIT			14
+#define RT5668_PWR_DAC_L1			(0x1 << 11)
+#define RT5668_PWR_DAC_L1_BIT			11
+#define RT5668_PWR_DAC_R1			(0x1 << 10)
+#define RT5668_PWR_DAC_R1_BIT			10
+#define RT5668_PWR_LDO				(0x1 << 8)
+#define RT5668_PWR_LDO_BIT			8
+#define RT5668_PWR_ADC_L1			(0x1 << 4)
+#define RT5668_PWR_ADC_L1_BIT			4
+#define RT5668_PWR_ADC_R1			(0x1 << 3)
+#define RT5668_PWR_ADC_R1_BIT			3
+#define RT5668_DIG_GATE_CTRL			(0x1 << 0)
+#define RT5668_DIG_GATE_CTRL_SFT		0
+
+
+/* Power Management for Digital 2 (0x0062) */
+#define RT5668_PWR_ADC_S1F			(0x1 << 15)
+#define RT5668_PWR_ADC_S1F_BIT			15
+#define RT5668_PWR_DAC_S1F			(0x1 << 10)
+#define RT5668_PWR_DAC_S1F_BIT			10
+
+/* Power Management for Analog 1 (0x0063) */
+#define RT5668_PWR_VREF1			(0x1 << 15)
+#define RT5668_PWR_VREF1_BIT			15
+#define RT5668_PWR_FV1				(0x1 << 14)
+#define RT5668_PWR_FV1_BIT			14
+#define RT5668_PWR_VREF2			(0x1 << 13)
+#define RT5668_PWR_VREF2_BIT			13
+#define RT5668_PWR_FV2				(0x1 << 12)
+#define RT5668_PWR_FV2_BIT			12
+#define RT5668_LDO1_DBG_MASK			(0x3 << 10)
+#define RT5668_PWR_MB				(0x1 << 9)
+#define RT5668_PWR_MB_BIT			9
+#define RT5668_PWR_BG				(0x1 << 7)
+#define RT5668_PWR_BG_BIT			7
+#define RT5668_LDO1_BYPASS_MASK			(0x1 << 6)
+#define RT5668_LDO1_BYPASS			(0x1 << 6)
+#define RT5668_LDO1_NOT_BYPASS			(0x0 << 6)
+#define RT5668_PWR_MA_BIT			6
+#define RT5668_LDO1_DVO_MASK			(0x3 << 4)
+#define RT5668_LDO1_DVO_09			(0x0 << 4)
+#define RT5668_LDO1_DVO_10			(0x1 << 4)
+#define RT5668_LDO1_DVO_12			(0x2 << 4)
+#define RT5668_LDO1_DVO_14			(0x3 << 4)
+#define RT5668_HP_DRIVER_MASK			(0x3 << 2)
+#define RT5668_HP_DRIVER_1X			(0x0 << 2)
+#define RT5668_HP_DRIVER_3X			(0x1 << 2)
+#define RT5668_HP_DRIVER_5X			(0x3 << 2)
+#define RT5668_PWR_HA_L				(0x1 << 1)
+#define RT5668_PWR_HA_L_BIT			1
+#define RT5668_PWR_HA_R				(0x1 << 0)
+#define RT5668_PWR_HA_R_BIT			0
+
+/* Power Management for Analog 2 (0x0064) */
+#define RT5668_PWR_MB1				(0x1 << 11)
+#define RT5668_PWR_MB1_PWR_DOWN			(0x0 << 11)
+#define RT5668_PWR_MB1_BIT			11
+#define RT5668_PWR_MB2				(0x1 << 10)
+#define RT5668_PWR_MB2_PWR_DOWN			(0x0 << 10)
+#define RT5668_PWR_MB2_BIT			10
+#define RT5668_PWR_JDH				(0x1 << 3)
+#define RT5668_PWR_JDH_BIT			3
+#define RT5668_PWR_JDL				(0x1 << 2)
+#define RT5668_PWR_JDL_BIT			2
+#define RT5668_PWR_RM1_L			(0x1 << 1)
+#define RT5668_PWR_RM1_L_BIT			1
+
+/* Power Management for Analog 3 (0x0065) */
+#define RT5668_PWR_CBJ				(0x1 << 9)
+#define RT5668_PWR_CBJ_BIT			9
+#define RT5668_PWR_PLL				(0x1 << 6)
+#define RT5668_PWR_PLL_BIT			6
+#define RT5668_PWR_PLL2B			(0x1 << 5)
+#define RT5668_PWR_PLL2B_BIT			5
+#define RT5668_PWR_PLL2F			(0x1 << 4)
+#define RT5668_PWR_PLL2F_BIT			4
+#define RT5668_PWR_LDO2				(0x1 << 2)
+#define RT5668_PWR_LDO2_BIT			2
+#define RT5668_PWR_DET_SPKVDD			(0x1 << 1)
+#define RT5668_PWR_DET_SPKVDD_BIT		1
+
+/* Power Management for Mixer (0x0066) */
+#define RT5668_PWR_STO1_DAC_L			(0x1 << 5)
+#define RT5668_PWR_STO1_DAC_L_BIT		5
+#define RT5668_PWR_STO1_DAC_R			(0x1 << 4)
+#define RT5668_PWR_STO1_DAC_R_BIT		4
+
+/* MCLK and System Clock Detection Control (0x006b) */
+#define RT5668_SYS_CLK_DET			(0x1 << 15)
+#define RT5668_SYS_CLK_DET_SFT			15
+#define RT5668_PLL1_CLK_DET			(0x1 << 14)
+#define RT5668_PLL1_CLK_DET_SFT			14
+#define RT5668_PLL2_CLK_DET			(0x1 << 13)
+#define RT5668_PLL2_CLK_DET_SFT			13
+#define RT5668_POW_CLK_DET2_SFT			8
+#define RT5668_POW_CLK_DET_SFT			0
+
+/* Digital Microphone Control 1 (0x006e) */
+#define RT5668_DMIC_1_EN_MASK			(0x1 << 15)
+#define RT5668_DMIC_1_EN_SFT			15
+#define RT5668_DMIC_1_DIS			(0x0 << 15)
+#define RT5668_DMIC_1_EN			(0x1 << 15)
+#define RT5668_DMIC_1_DP_MASK			(0x3 << 4)
+#define RT5668_DMIC_1_DP_SFT			4
+#define RT5668_DMIC_1_DP_GPIO2			(0x0 << 4)
+#define RT5668_DMIC_1_DP_GPIO5			(0x1 << 4)
+#define RT5668_DMIC_CLK_MASK			(0xf << 0)
+#define RT5668_DMIC_CLK_SFT			0
+
+/* I2S1 Audio Serial Data Port Control (0x0070) */
+#define RT5668_SEL_ADCDAT_MASK			(0x1 << 15)
+#define RT5668_SEL_ADCDAT_OUT			(0x0 << 15)
+#define RT5668_SEL_ADCDAT_IN			(0x1 << 15)
+#define RT5668_SEL_ADCDAT_SFT			15
+#define RT5668_I2S1_TX_CHL_MASK			(0x7 << 12)
+#define RT5668_I2S1_TX_CHL_SFT			12
+#define RT5668_I2S1_TX_CHL_16			(0x0 << 12)
+#define RT5668_I2S1_TX_CHL_20			(0x1 << 12)
+#define RT5668_I2S1_TX_CHL_24			(0x2 << 12)
+#define RT5668_I2S1_TX_CHL_32			(0x3 << 12)
+#define RT5668_I2S1_TX_CHL_8			(0x4 << 12)
+#define RT5668_I2S1_RX_CHL_MASK			(0x7 << 8)
+#define RT5668_I2S1_RX_CHL_SFT			8
+#define RT5668_I2S1_RX_CHL_16			(0x0 << 8)
+#define RT5668_I2S1_RX_CHL_20			(0x1 << 8)
+#define RT5668_I2S1_RX_CHL_24			(0x2 << 8)
+#define RT5668_I2S1_RX_CHL_32			(0x3 << 8)
+#define RT5668_I2S1_RX_CHL_8			(0x4 << 8)
+#define RT5668_I2S1_MONO_MASK			(0x1 << 7)
+#define RT5668_I2S1_MONO_EN			(0x1 << 7)
+#define RT5668_I2S1_MONO_DIS			(0x0 << 7)
+#define RT5668_I2S2_MONO_MASK			(0x1 << 6)
+#define RT5668_I2S2_MONO_EN			(0x1 << 6)
+#define RT5668_I2S2_MONO_DIS			(0x0 << 6)
+#define RT5668_I2S1_DL_MASK			(0x7 << 4)
+#define RT5668_I2S1_DL_SFT			4
+#define RT5668_I2S1_DL_16			(0x0 << 4)
+#define RT5668_I2S1_DL_20			(0x1 << 4)
+#define RT5668_I2S1_DL_24			(0x2 << 4)
+#define RT5668_I2S1_DL_32			(0x3 << 4)
+#define RT5668_I2S1_DL_8			(0x4 << 4)
+
+/* I2S1/2 Audio Serial Data Port Control (0x0070)(0x0071) */
+#define RT5668_I2S2_MS_MASK			(0x1 << 15)
+#define RT5668_I2S2_MS_SFT			15
+#define RT5668_I2S2_MS_M			(0x0 << 15)
+#define RT5668_I2S2_MS_S			(0x1 << 15)
+#define RT5668_I2S2_PIN_CFG_MASK		(0x1 << 14)
+#define RT5668_I2S2_PIN_CFG_SFT			14
+#define RT5668_I2S2_CLK_SEL_MASK		(0x1 << 11)
+#define RT5668_I2S2_CLK_SEL_SFT			11
+#define RT5668_I2S2_OUT_MASK			(0x1 << 9)
+#define RT5668_I2S2_OUT_SFT			9
+#define RT5668_I2S2_OUT_UM			(0x0 << 9)
+#define RT5668_I2S2_OUT_M			(0x1 << 9)
+#define RT5668_I2S_BP_MASK			(0x1 << 8)
+#define RT5668_I2S_BP_SFT			8
+#define RT5668_I2S_BP_NOR			(0x0 << 8)
+#define RT5668_I2S_BP_INV			(0x1 << 8)
+#define RT5668_I2S2_MONO_EN			(0x1 << 6)
+#define RT5668_I2S2_MONO_DIS			(0x0 << 6)
+#define RT5668_I2S2_DL_MASK			(0x3 << 4)
+#define RT5668_I2S2_DL_SFT			4
+#define RT5668_I2S2_DL_16			(0x0 << 4)
+#define RT5668_I2S2_DL_20			(0x1 << 4)
+#define RT5668_I2S2_DL_24			(0x2 << 4)
+#define RT5668_I2S2_DL_8			(0x3 << 4)
+#define RT5668_I2S_DF_MASK			(0x7)
+#define RT5668_I2S_DF_SFT			0
+#define RT5668_I2S_DF_I2S			(0x0)
+#define RT5668_I2S_DF_LEFT			(0x1)
+#define RT5668_I2S_DF_PCM_A			(0x2)
+#define RT5668_I2S_DF_PCM_B			(0x3)
+#define RT5668_I2S_DF_PCM_A_N			(0x6)
+#define RT5668_I2S_DF_PCM_B_N			(0x7)
+
+/* ADC/DAC Clock Control 1 (0x0073) */
+#define RT5668_ADC_OSR_MASK			(0xf << 12)
+#define RT5668_ADC_OSR_SFT			12
+#define RT5668_ADC_OSR_D_1			(0x0 << 12)
+#define RT5668_ADC_OSR_D_2			(0x1 << 12)
+#define RT5668_ADC_OSR_D_4			(0x2 << 12)
+#define RT5668_ADC_OSR_D_6			(0x3 << 12)
+#define RT5668_ADC_OSR_D_8			(0x4 << 12)
+#define RT5668_ADC_OSR_D_12			(0x5 << 12)
+#define RT5668_ADC_OSR_D_16			(0x6 << 12)
+#define RT5668_ADC_OSR_D_24			(0x7 << 12)
+#define RT5668_ADC_OSR_D_32			(0x8 << 12)
+#define RT5668_ADC_OSR_D_48			(0x9 << 12)
+#define RT5668_I2S_M_DIV_MASK			(0xf << 12)
+#define RT5668_I2S_M_DIV_SFT			8
+#define RT5668_I2S_M_D_1			(0x0 << 8)
+#define RT5668_I2S_M_D_2			(0x1 << 8)
+#define RT5668_I2S_M_D_3			(0x2 << 8)
+#define RT5668_I2S_M_D_4			(0x3 << 8)
+#define RT5668_I2S_M_D_6			(0x4 << 8)
+#define RT5668_I2S_M_D_8			(0x5 << 8)
+#define RT5668_I2S_M_D_12			(0x6 << 8)
+#define RT5668_I2S_M_D_16			(0x7 << 8)
+#define RT5668_I2S_M_D_24			(0x8 << 8)
+#define RT5668_I2S_M_D_32			(0x9 << 8)
+#define RT5668_I2S_M_D_48			(0x10 << 8)
+#define RT5668_I2S_CLK_SRC_MASK			(0x7 << 4)
+#define RT5668_I2S_CLK_SRC_SFT			4
+#define RT5668_I2S_CLK_SRC_MCLK			(0x0 << 4)
+#define RT5668_I2S_CLK_SRC_PLL1			(0x1 << 4)
+#define RT5668_I2S_CLK_SRC_PLL2			(0x2 << 4)
+#define RT5668_I2S_CLK_SRC_SDW			(0x3 << 4)
+#define RT5668_I2S_CLK_SRC_RCCLK		(0x4 << 4) /* 25M */
+#define RT5668_DAC_OSR_MASK			(0xf << 0)
+#define RT5668_DAC_OSR_SFT			0
+#define RT5668_DAC_OSR_D_1			(0x0 << 0)
+#define RT5668_DAC_OSR_D_2			(0x1 << 0)
+#define RT5668_DAC_OSR_D_4			(0x2 << 0)
+#define RT5668_DAC_OSR_D_6			(0x3 << 0)
+#define RT5668_DAC_OSR_D_8			(0x4 << 0)
+#define RT5668_DAC_OSR_D_12			(0x5 << 0)
+#define RT5668_DAC_OSR_D_16			(0x6 << 0)
+#define RT5668_DAC_OSR_D_24			(0x7 << 0)
+#define RT5668_DAC_OSR_D_32			(0x8 << 0)
+#define RT5668_DAC_OSR_D_48			(0x9 << 0)
+
+/* ADC/DAC Clock Control 2 (0x0074) */
+#define RT5668_I2S2_BCLK_MS2_MASK		(0x1 << 11)
+#define RT5668_I2S2_BCLK_MS2_SFT		11
+#define RT5668_I2S2_BCLK_MS2_32			(0x0 << 11)
+#define RT5668_I2S2_BCLK_MS2_64			(0x1 << 11)
+
+
+/* TDM control 1 (0x0079) */
+#define RT5668_TDM_TX_CH_MASK			(0x3 << 12)
+#define RT5668_TDM_TX_CH_2			(0x0 << 12)
+#define RT5668_TDM_TX_CH_4			(0x1 << 12)
+#define RT5668_TDM_TX_CH_6			(0x2 << 12)
+#define RT5668_TDM_TX_CH_8			(0x3 << 12)
+#define RT5668_TDM_RX_CH_MASK			(0x3 << 8)
+#define RT5668_TDM_RX_CH_2			(0x0 << 8)
+#define RT5668_TDM_RX_CH_4			(0x1 << 8)
+#define RT5668_TDM_RX_CH_6			(0x2 << 8)
+#define RT5668_TDM_RX_CH_8			(0x3 << 8)
+#define RT5668_TDM_ADC_LCA_MASK			(0xf << 4)
+#define RT5668_TDM_ADC_LCA_SFT			4
+#define RT5668_TDM_ADC_DL_SFT			0
+
+/* TDM control 3 (0x007a) */
+#define RT5668_IF1_ADC1_SEL_SFT			14
+#define RT5668_IF1_ADC2_SEL_SFT			12
+#define RT5668_IF1_ADC3_SEL_SFT			10
+#define RT5668_IF1_ADC4_SEL_SFT			8
+#define RT5668_TDM_ADC_SEL_SFT			4
+
+/* TDM/I2S control (0x007e) */
+#define RT5668_TDM_S_BP_MASK			(0x1 << 15)
+#define RT5668_TDM_S_BP_SFT			15
+#define RT5668_TDM_S_BP_NOR			(0x0 << 15)
+#define RT5668_TDM_S_BP_INV			(0x1 << 15)
+#define RT5668_TDM_S_LP_MASK			(0x1 << 14)
+#define RT5668_TDM_S_LP_SFT			14
+#define RT5668_TDM_S_LP_NOR			(0x0 << 14)
+#define RT5668_TDM_S_LP_INV			(0x1 << 14)
+#define RT5668_TDM_DF_MASK			(0x7 << 11)
+#define RT5668_TDM_DF_SFT			11
+#define RT5668_TDM_DF_I2S			(0x0 << 11)
+#define RT5668_TDM_DF_LEFT			(0x1 << 11)
+#define RT5668_TDM_DF_PCM_A			(0x2 << 11)
+#define RT5668_TDM_DF_PCM_B			(0x3 << 11)
+#define RT5668_TDM_DF_PCM_A_N			(0x6 << 11)
+#define RT5668_TDM_DF_PCM_B_N			(0x7 << 11)
+#define RT5668_TDM_CL_MASK			(0x3 << 4)
+#define RT5668_TDM_CL_16			(0x0 << 4)
+#define RT5668_TDM_CL_20			(0x1 << 4)
+#define RT5668_TDM_CL_24			(0x2 << 4)
+#define RT5668_TDM_CL_32			(0x3 << 4)
+#define RT5668_TDM_M_BP_MASK			(0x1 << 2)
+#define RT5668_TDM_M_BP_SFT			2
+#define RT5668_TDM_M_BP_NOR			(0x0 << 2)
+#define RT5668_TDM_M_BP_INV			(0x1 << 2)
+#define RT5668_TDM_M_LP_MASK			(0x1 << 1)
+#define RT5668_TDM_M_LP_SFT			1
+#define RT5668_TDM_M_LP_NOR			(0x0 << 1)
+#define RT5668_TDM_M_LP_INV			(0x1 << 1)
+#define RT5668_TDM_MS_MASK			(0x1 << 0)
+#define RT5668_TDM_MS_SFT			0
+#define RT5668_TDM_MS_M				(0x0 << 0)
+#define RT5668_TDM_MS_S				(0x1 << 0)
+
+/* Global Clock Control (0x0080) */
+#define RT5668_SCLK_SRC_MASK			(0x7 << 13)
+#define RT5668_SCLK_SRC_SFT			13
+#define RT5668_SCLK_SRC_MCLK			(0x0 << 13)
+#define RT5668_SCLK_SRC_PLL1			(0x1 << 13)
+#define RT5668_SCLK_SRC_PLL2			(0x2 << 13)
+#define RT5668_SCLK_SRC_SDW			(0x3 << 13)
+#define RT5668_SCLK_SRC_RCCLK			(0x4 << 13)
+#define RT5668_PLL1_SRC_MASK			(0x3 << 10)
+#define RT5668_PLL1_SRC_SFT			10
+#define RT5668_PLL1_SRC_MCLK			(0x0 << 10)
+#define RT5668_PLL1_SRC_BCLK1			(0x1 << 10)
+#define RT5668_PLL1_SRC_SDW			(0x2 << 10)
+#define RT5668_PLL1_SRC_RC			(0x3 << 10)
+#define RT5668_PLL2_SRC_MASK			(0x3 << 8)
+#define RT5668_PLL2_SRC_SFT			8
+#define RT5668_PLL2_SRC_MCLK			(0x0 << 8)
+#define RT5668_PLL2_SRC_BCLK1			(0x1 << 8)
+#define RT5668_PLL2_SRC_SDW			(0x2 << 8)
+#define RT5668_PLL2_SRC_RC			(0x3 << 8)
+
+
+
+#define RT5668_PLL_INP_MAX			40000000
+#define RT5668_PLL_INP_MIN			256000
+/* PLL M/N/K Code Control 1 (0x0081) */
+#define RT5668_PLL_N_MAX			0x001ff
+#define RT5668_PLL_N_MASK			(RT5668_PLL_N_MAX << 7)
+#define RT5668_PLL_N_SFT			7
+#define RT5668_PLL_K_MAX			0x001f
+#define RT5668_PLL_K_MASK			(RT5668_PLL_K_MAX)
+#define RT5668_PLL_K_SFT			0
+
+/* PLL M/N/K Code Control 2 (0x0082) */
+#define RT5668_PLL_M_MAX			0x00f
+#define RT5668_PLL_M_MASK			(RT5668_PLL_M_MAX << 12)
+#define RT5668_PLL_M_SFT			12
+#define RT5668_PLL_M_BP				(0x1 << 11)
+#define RT5668_PLL_M_BP_SFT			11
+#define RT5668_PLL_K_BP				(0x1 << 10)
+#define RT5668_PLL_K_BP_SFT			10
+
+/* PLL tracking mode 1 (0x0083) */
+#define RT5668_DA_ASRC_MASK			(0x1 << 13)
+#define RT5668_DA_ASRC_SFT			13
+#define RT5668_DAC_STO1_ASRC_MASK		(0x1 << 12)
+#define RT5668_DAC_STO1_ASRC_SFT		12
+#define RT5668_AD_ASRC_MASK			(0x1 << 8)
+#define RT5668_AD_ASRC_SFT			8
+#define RT5668_AD_ASRC_SEL_MASK			(0x1 << 4)
+#define RT5668_AD_ASRC_SEL_SFT			4
+#define RT5668_DMIC_ASRC_MASK			(0x1 << 3)
+#define RT5668_DMIC_ASRC_SFT			3
+#define RT5668_ADC_STO1_ASRC_MASK		(0x1 << 2)
+#define RT5668_ADC_STO1_ASRC_SFT		2
+#define RT5668_DA_ASRC_SEL_MASK			(0x1 << 0)
+#define RT5668_DA_ASRC_SEL_SFT			0
+
+/* PLL tracking mode 2 3 (0x0084)(0x0085)*/
+#define RT5668_FILTER_CLK_SEL_MASK		(0x7 << 12)
+#define RT5668_FILTER_CLK_SEL_SFT		12
+
+/* ASRC Control 4 (0x0086) */
+#define RT5668_ASRCIN_FTK_N1_MASK		(0x3 << 14)
+#define RT5668_ASRCIN_FTK_N1_SFT		14
+#define RT5668_ASRCIN_FTK_N2_MASK		(0x3 << 12)
+#define RT5668_ASRCIN_FTK_N2_SFT		12
+#define RT5668_ASRCIN_FTK_M1_MASK		(0x7 << 8)
+#define RT5668_ASRCIN_FTK_M1_SFT		8
+#define RT5668_ASRCIN_FTK_M2_MASK		(0x7 << 4)
+#define RT5668_ASRCIN_FTK_M2_SFT		4
+
+/* SoundWire reference clk (0x008d) */
+#define RT5668_PLL2_OUT_MASK			(0x1 << 8)
+#define RT5668_PLL2_OUT_98M			(0x0 << 8)
+#define RT5668_PLL2_OUT_49M			(0x1 << 8)
+#define RT5668_SDW_REF_2_MASK			(0xf << 4)
+#define RT5668_SDW_REF_2_SFT			4
+#define RT5668_SDW_REF_2_48K			(0x0 << 4)
+#define RT5668_SDW_REF_2_96K			(0x1 << 4)
+#define RT5668_SDW_REF_2_192K			(0x2 << 4)
+#define RT5668_SDW_REF_2_32K			(0x3 << 4)
+#define RT5668_SDW_REF_2_24K			(0x4 << 4)
+#define RT5668_SDW_REF_2_16K			(0x5 << 4)
+#define RT5668_SDW_REF_2_12K			(0x6 << 4)
+#define RT5668_SDW_REF_2_8K			(0x7 << 4)
+#define RT5668_SDW_REF_2_44K			(0x8 << 4)
+#define RT5668_SDW_REF_2_88K			(0x9 << 4)
+#define RT5668_SDW_REF_2_176K			(0xa << 4)
+#define RT5668_SDW_REF_2_353K			(0xb << 4)
+#define RT5668_SDW_REF_2_22K			(0xc << 4)
+#define RT5668_SDW_REF_2_384K			(0xd << 4)
+#define RT5668_SDW_REF_2_11K			(0xe << 4)
+#define RT5668_SDW_REF_1_MASK			(0xf << 0)
+#define RT5668_SDW_REF_1_SFT			0
+#define RT5668_SDW_REF_1_48K			(0x0 << 0)
+#define RT5668_SDW_REF_1_96K			(0x1 << 0)
+#define RT5668_SDW_REF_1_192K			(0x2 << 0)
+#define RT5668_SDW_REF_1_32K			(0x3 << 0)
+#define RT5668_SDW_REF_1_24K			(0x4 << 0)
+#define RT5668_SDW_REF_1_16K			(0x5 << 0)
+#define RT5668_SDW_REF_1_12K			(0x6 << 0)
+#define RT5668_SDW_REF_1_8K			(0x7 << 0)
+#define RT5668_SDW_REF_1_44K			(0x8 << 0)
+#define RT5668_SDW_REF_1_88K			(0x9 << 0)
+#define RT5668_SDW_REF_1_176K			(0xa << 0)
+#define RT5668_SDW_REF_1_353K			(0xb << 0)
+#define RT5668_SDW_REF_1_22K			(0xc << 0)
+#define RT5668_SDW_REF_1_384K			(0xd << 0)
+#define RT5668_SDW_REF_1_11K			(0xe << 0)
+
+/* Depop Mode Control 1 (0x008e) */
+#define RT5668_PUMP_EN				(0x1 << 3)
+#define RT5668_PUMP_EN_SFT				3
+#define RT5668_CAPLESS_EN			(0x1 << 0)
+#define RT5668_CAPLESS_EN_SFT			0
+
+/* Depop Mode Control 2 (0x8f) */
+#define RT5668_RAMP_MASK			(0x1 << 12)
+#define RT5668_RAMP_SFT				12
+#define RT5668_RAMP_DIS				(0x0 << 12)
+#define RT5668_RAMP_EN				(0x1 << 12)
+#define RT5668_BPS_MASK				(0x1 << 11)
+#define RT5668_BPS_SFT				11
+#define RT5668_BPS_DIS				(0x0 << 11)
+#define RT5668_BPS_EN				(0x1 << 11)
+#define RT5668_FAST_UPDN_MASK			(0x1 << 10)
+#define RT5668_FAST_UPDN_SFT			10
+#define RT5668_FAST_UPDN_DIS			(0x0 << 10)
+#define RT5668_FAST_UPDN_EN			(0x1 << 10)
+#define RT5668_VLO_MASK				(0x1 << 7)
+#define RT5668_VLO_SFT				7
+#define RT5668_VLO_3V				(0x0 << 7)
+#define RT5668_VLO_33V				(0x1 << 7)
+
+/* HPOUT charge pump 1 (0x0091) */
+#define RT5668_OSW_L_MASK			(0x1 << 11)
+#define RT5668_OSW_L_SFT			11
+#define RT5668_OSW_L_DIS			(0x0 << 11)
+#define RT5668_OSW_L_EN				(0x1 << 11)
+#define RT5668_OSW_R_MASK			(0x1 << 10)
+#define RT5668_OSW_R_SFT			10
+#define RT5668_OSW_R_DIS			(0x0 << 10)
+#define RT5668_OSW_R_EN				(0x1 << 10)
+#define RT5668_PM_HP_MASK			(0x3 << 8)
+#define RT5668_PM_HP_SFT			8
+#define RT5668_PM_HP_LV				(0x0 << 8)
+#define RT5668_PM_HP_MV				(0x1 << 8)
+#define RT5668_PM_HP_HV				(0x2 << 8)
+#define RT5668_IB_HP_MASK			(0x3 << 6)
+#define RT5668_IB_HP_SFT			6
+#define RT5668_IB_HP_125IL			(0x0 << 6)
+#define RT5668_IB_HP_25IL			(0x1 << 6)
+#define RT5668_IB_HP_5IL			(0x2 << 6)
+#define RT5668_IB_HP_1IL			(0x3 << 6)
+
+/* Micbias Control1 (0x93) */
+#define RT5668_MIC1_OV_MASK			(0x3 << 14)
+#define RT5668_MIC1_OV_SFT			14
+#define RT5668_MIC1_OV_2V7			(0x0 << 14)
+#define RT5668_MIC1_OV_2V4			(0x1 << 14)
+#define RT5668_MIC1_OV_2V25			(0x3 << 14)
+#define RT5668_MIC1_OV_1V8			(0x4 << 14)
+#define RT5668_MIC1_CLK_MASK			(0x1 << 13)
+#define RT5668_MIC1_CLK_SFT			13
+#define RT5668_MIC1_CLK_DIS			(0x0 << 13)
+#define RT5668_MIC1_CLK_EN			(0x1 << 13)
+#define RT5668_MIC1_OVCD_MASK			(0x1 << 12)
+#define RT5668_MIC1_OVCD_SFT			12
+#define RT5668_MIC1_OVCD_DIS			(0x0 << 12)
+#define RT5668_MIC1_OVCD_EN			(0x1 << 12)
+#define RT5668_MIC1_OVTH_MASK			(0x3 << 10)
+#define RT5668_MIC1_OVTH_SFT			10
+#define RT5668_MIC1_OVTH_768UA			(0x0 << 10)
+#define RT5668_MIC1_OVTH_960UA			(0x1 << 10)
+#define RT5668_MIC1_OVTH_1152UA			(0x2 << 10)
+#define RT5668_MIC1_OVTH_1960UA			(0x3 << 10)
+#define RT5668_MIC2_OV_MASK			(0x3 << 8)
+#define RT5668_MIC2_OV_SFT			8
+#define RT5668_MIC2_OV_2V7			(0x0 << 8)
+#define RT5668_MIC2_OV_2V4			(0x1 << 8)
+#define RT5668_MIC2_OV_2V25			(0x3 << 8)
+#define RT5668_MIC2_OV_1V8			(0x4 << 8)
+#define RT5668_MIC2_CLK_MASK			(0x1 << 7)
+#define RT5668_MIC2_CLK_SFT			7
+#define RT5668_MIC2_CLK_DIS			(0x0 << 7)
+#define RT5668_MIC2_CLK_EN			(0x1 << 7)
+#define RT5668_MIC2_OVTH_MASK			(0x3 << 4)
+#define RT5668_MIC2_OVTH_SFT			4
+#define RT5668_MIC2_OVTH_768UA			(0x0 << 4)
+#define RT5668_MIC2_OVTH_960UA			(0x1 << 4)
+#define RT5668_MIC2_OVTH_1152UA			(0x2 << 4)
+#define RT5668_MIC2_OVTH_1960UA			(0x3 << 4)
+#define RT5668_PWR_MB_MASK			(0x1 << 3)
+#define RT5668_PWR_MB_SFT			3
+#define RT5668_PWR_MB_PD			(0x0 << 3)
+#define RT5668_PWR_MB_PU			(0x1 << 3)
+
+/* Micbias Control2 (0x0094) */
+#define RT5668_PWR_CLK25M_MASK			(0x1 << 9)
+#define RT5668_PWR_CLK25M_SFT			9
+#define RT5668_PWR_CLK25M_PD			(0x0 << 9)
+#define RT5668_PWR_CLK25M_PU			(0x1 << 9)
+#define RT5668_PWR_CLK1M_MASK			(0x1 << 8)
+#define RT5668_PWR_CLK1M_SFT			8
+#define RT5668_PWR_CLK1M_PD			(0x0 << 8)
+#define RT5668_PWR_CLK1M_PU			(0x1 << 8)
+
+/* RC Clock Control (0x009f) */
+#define RT5668_POW_IRQ				(0x1 << 15)
+#define RT5668_POW_JDH				(0x1 << 14)
+#define RT5668_POW_JDL				(0x1 << 13)
+#define RT5668_POW_ANA				(0x1 << 12)
+
+/* I2S Master Mode Clock Control 1 (0x00a0) */
+#define RT5668_CLK_SRC_MCLK			(0x0)
+#define RT5668_CLK_SRC_PLL1			(0x1)
+#define RT5668_CLK_SRC_PLL2			(0x2)
+#define RT5668_CLK_SRC_SDW			(0x3)
+#define RT5668_CLK_SRC_RCCLK			(0x4)
+#define RT5668_I2S_PD_1				(0x0)
+#define RT5668_I2S_PD_2				(0x1)
+#define RT5668_I2S_PD_3				(0x2)
+#define RT5668_I2S_PD_4				(0x3)
+#define RT5668_I2S_PD_6				(0x4)
+#define RT5668_I2S_PD_8				(0x5)
+#define RT5668_I2S_PD_12			(0x6)
+#define RT5668_I2S_PD_16			(0x7)
+#define RT5668_I2S_PD_24			(0x8)
+#define RT5668_I2S_PD_32			(0x9)
+#define RT5668_I2S_PD_48			(0xa)
+#define RT5668_I2S2_SRC_MASK			(0x3 << 4)
+#define RT5668_I2S2_SRC_SFT			4
+#define RT5668_I2S2_M_PD_MASK			(0xf << 0)
+#define RT5668_I2S2_M_PD_SFT			0
+
+/* IRQ Control 1 (0x00b6) */
+#define RT5668_JD1_PULSE_EN_MASK		(0x1 << 10)
+#define RT5668_JD1_PULSE_EN_SFT			10
+#define RT5668_JD1_PULSE_DIS			(0x0 << 10)
+#define RT5668_JD1_PULSE_EN			(0x1 << 10)
+
+/* IRQ Control 2 (0x00b7) */
+#define RT5668_JD1_EN_MASK			(0x1 << 15)
+#define RT5668_JD1_EN_SFT			15
+#define RT5668_JD1_DIS				(0x0 << 15)
+#define RT5668_JD1_EN				(0x1 << 15)
+#define RT5668_JD1_POL_MASK			(0x1 << 13)
+#define RT5668_JD1_POL_NOR			(0x0 << 13)
+#define RT5668_JD1_POL_INV			(0x1 << 13)
+
+/* IRQ Control 3 (0x00b8) */
+#define RT5668_IL_IRQ_MASK			(0x1 << 7)
+#define RT5668_IL_IRQ_DIS			(0x0 << 7)
+#define RT5668_IL_IRQ_EN			(0x1 << 7)
+
+/* GPIO Control 1 (0x00c0) */
+#define RT5668_GP1_PIN_MASK			(0x3 << 14)
+#define RT5668_GP1_PIN_SFT			14
+#define RT5668_GP1_PIN_GPIO1			(0x0 << 14)
+#define RT5668_GP1_PIN_IRQ			(0x1 << 14)
+#define RT5668_GP1_PIN_DMIC_CLK			(0x2 << 14)
+#define RT5668_GP2_PIN_MASK			(0x3 << 12)
+#define RT5668_GP2_PIN_SFT			12
+#define RT5668_GP2_PIN_GPIO2			(0x0 << 12)
+#define RT5668_GP2_PIN_LRCK2			(0x1 << 12)
+#define RT5668_GP2_PIN_DMIC_SDA			(0x2 << 12)
+#define RT5668_GP3_PIN_MASK			(0x3 << 10)
+#define RT5668_GP3_PIN_SFT			10
+#define RT5668_GP3_PIN_GPIO3			(0x0 << 10)
+#define RT5668_GP3_PIN_BCLK2			(0x1 << 10)
+#define RT5668_GP3_PIN_DMIC_CLK			(0x2 << 10)
+#define RT5668_GP4_PIN_MASK			(0x3 << 8)
+#define RT5668_GP4_PIN_SFT			8
+#define RT5668_GP4_PIN_GPIO4			(0x0 << 8)
+#define RT5668_GP4_PIN_ADCDAT1			(0x1 << 8)
+#define RT5668_GP4_PIN_DMIC_CLK			(0x2 << 8)
+#define RT5668_GP4_PIN_ADCDAT2			(0x3 << 8)
+#define RT5668_GP5_PIN_MASK			(0x3 << 6)
+#define RT5668_GP5_PIN_SFT			6
+#define RT5668_GP5_PIN_GPIO5			(0x0 << 6)
+#define RT5668_GP5_PIN_DACDAT1			(0x1 << 6)
+#define RT5668_GP5_PIN_DMIC_SDA			(0x2 << 6)
+#define RT5668_GP6_PIN_MASK			(0x1 << 5)
+#define RT5668_GP6_PIN_SFT			5
+#define RT5668_GP6_PIN_GPIO6			(0x0 << 5)
+#define RT5668_GP6_PIN_LRCK1			(0x1 << 5)
+
+/* GPIO Control 2 (0x00c1)*/
+#define RT5668_GP1_PF_MASK			(0x1 << 15)
+#define RT5668_GP1_PF_IN			(0x0 << 15)
+#define RT5668_GP1_PF_OUT			(0x1 << 15)
+#define RT5668_GP1_OUT_MASK			(0x1 << 14)
+#define RT5668_GP1_OUT_L			(0x0 << 14)
+#define RT5668_GP1_OUT_H			(0x1 << 14)
+#define RT5668_GP2_PF_MASK			(0x1 << 13)
+#define RT5668_GP2_PF_IN			(0x0 << 13)
+#define RT5668_GP2_PF_OUT			(0x1 << 13)
+#define RT5668_GP2_OUT_MASK			(0x1 << 12)
+#define RT5668_GP2_OUT_L			(0x0 << 12)
+#define RT5668_GP2_OUT_H			(0x1 << 12)
+#define RT5668_GP3_PF_MASK			(0x1 << 11)
+#define RT5668_GP3_PF_IN			(0x0 << 11)
+#define RT5668_GP3_PF_OUT			(0x1 << 11)
+#define RT5668_GP3_OUT_MASK			(0x1 << 10)
+#define RT5668_GP3_OUT_L			(0x0 << 10)
+#define RT5668_GP3_OUT_H			(0x1 << 10)
+#define RT5668_GP4_PF_MASK			(0x1 << 9)
+#define RT5668_GP4_PF_IN			(0x0 << 9)
+#define RT5668_GP4_PF_OUT			(0x1 << 9)
+#define RT5668_GP4_OUT_MASK			(0x1 << 8)
+#define RT5668_GP4_OUT_L			(0x0 << 8)
+#define RT5668_GP4_OUT_H			(0x1 << 8)
+#define RT5668_GP5_PF_MASK			(0x1 << 7)
+#define RT5668_GP5_PF_IN			(0x0 << 7)
+#define RT5668_GP5_PF_OUT			(0x1 << 7)
+#define RT5668_GP5_OUT_MASK			(0x1 << 6)
+#define RT5668_GP5_OUT_L			(0x0 << 6)
+#define RT5668_GP5_OUT_H			(0x1 << 6)
+#define RT5668_GP6_PF_MASK			(0x1 << 5)
+#define RT5668_GP6_PF_IN			(0x0 << 5)
+#define RT5668_GP6_PF_OUT			(0x1 << 5)
+#define RT5668_GP6_OUT_MASK			(0x1 << 4)
+#define RT5668_GP6_OUT_L			(0x0 << 4)
+#define RT5668_GP6_OUT_H			(0x1 << 4)
+
+
+/* GPIO Status (0x00c2) */
+#define RT5668_GP6_STA				(0x1 << 6)
+#define RT5668_GP5_STA				(0x1 << 5)
+#define RT5668_GP4_STA				(0x1 << 4)
+#define RT5668_GP3_STA				(0x1 << 3)
+#define RT5668_GP2_STA				(0x1 << 2)
+#define RT5668_GP1_STA				(0x1 << 1)
+
+/* Soft volume and zero cross control 1 (0x00d9) */
+#define RT5668_SV_MASK				(0x1 << 15)
+#define RT5668_SV_SFT				15
+#define RT5668_SV_DIS				(0x0 << 15)
+#define RT5668_SV_EN				(0x1 << 15)
+#define RT5668_ZCD_MASK				(0x1 << 10)
+#define RT5668_ZCD_SFT				10
+#define RT5668_ZCD_PD				(0x0 << 10)
+#define RT5668_ZCD_PU				(0x1 << 10)
+#define RT5668_SV_DLY_MASK			(0xf)
+#define RT5668_SV_DLY_SFT			0
+
+/* Soft volume and zero cross control 2 (0x00da) */
+#define RT5668_ZCD_BST1_CBJ_MASK		(0x1 << 7)
+#define RT5668_ZCD_BST1_CBJ_SFT			7
+#define RT5668_ZCD_BST1_CBJ_DIS			(0x0 << 7)
+#define RT5668_ZCD_BST1_CBJ_EN			(0x1 << 7)
+#define RT5668_ZCD_RECMIX_MASK			(0x1)
+#define RT5668_ZCD_RECMIX_SFT			0
+#define RT5668_ZCD_RECMIX_DIS			(0x0)
+#define RT5668_ZCD_RECMIX_EN			(0x1)
+
+/* 4 Button Inline Command Control 2 (0x00e3) */
+#define RT5668_4BTN_IL_MASK			(0x1 << 15)
+#define RT5668_4BTN_IL_EN			(0x1 << 15)
+#define RT5668_4BTN_IL_DIS			(0x0 << 15)
+#define RT5668_4BTN_IL_RST_MASK			(0x1 << 14)
+#define RT5668_4BTN_IL_NOR			(0x1 << 14)
+#define RT5668_4BTN_IL_RST			(0x0 << 14)
+
+/* Analog JD Control (0x00f0) */
+#define RT5668_JDH_RS_MASK			(0x1 << 4)
+#define RT5668_JDH_NO_PLUG			(0x1 << 4)
+#define RT5668_JDH_PLUG				(0x0 << 4)
+
+/* Chopper and Clock control for DAC (0x013a)*/
+#define RT5668_CKXEN_DAC1_MASK			(0x1 << 13)
+#define RT5668_CKXEN_DAC1_SFT			13
+#define RT5668_CKGEN_DAC1_MASK			(0x1 << 12)
+#define RT5668_CKGEN_DAC1_SFT			12
+
+/* Chopper and Clock control for ADC (0x013b)*/
+#define RT5668_CKXEN_ADC1_MASK			(0x1 << 13)
+#define RT5668_CKXEN_ADC1_SFT			13
+#define RT5668_CKGEN_ADC1_MASK			(0x1 << 12)
+#define RT5668_CKGEN_ADC1_SFT			12
+
+/* Volume test (0x013f)*/
+#define RT5668_SEL_CLK_VOL_MASK			(0x1 << 15)
+#define RT5668_SEL_CLK_VOL_EN			(0x1 << 15)
+#define RT5668_SEL_CLK_VOL_DIS			(0x0 << 15)
+
+/* Test Mode Control 1 (0x0145) */
+#define RT5668_AD2DA_LB_MASK			(0x1 << 10)
+#define RT5668_AD2DA_LB_SFT			10
+
+/* Stereo Noise Gate Control 1 (0x0160) */
+#define RT5668_NG2_EN_MASK			(0x1 << 15)
+#define RT5668_NG2_EN				(0x1 << 15)
+#define RT5668_NG2_DIS				(0x0 << 15)
+
+/* Stereo1 DAC Silence Detection Control (0x0190) */
+#define RT5668_DEB_STO_DAC_MASK			(0x7 << 4)
+#define RT5668_DEB_80_MS			(0x0 << 4)
+
+/* SAR ADC Inline Command Control 1 (0x0210) */
+#define RT5668_SAR_BUTT_DET_MASK		(0x1 << 15)
+#define RT5668_SAR_BUTT_DET_EN			(0x1 << 15)
+#define RT5668_SAR_BUTT_DET_DIS			(0x0 << 15)
+#define RT5668_SAR_BUTDET_MODE_MASK		(0x1 << 14)
+#define RT5668_SAR_BUTDET_POW_SAV		(0x1 << 14)
+#define RT5668_SAR_BUTDET_POW_NORM		(0x0 << 14)
+#define RT5668_SAR_BUTDET_RST_MASK		(0x1 << 13)
+#define RT5668_SAR_BUTDET_RST_NORMAL		(0x1 << 13)
+#define RT5668_SAR_BUTDET_RST			(0x0 << 13)
+#define RT5668_SAR_POW_MASK			(0x1 << 12)
+#define RT5668_SAR_POW_EN			(0x1 << 12)
+#define RT5668_SAR_POW_DIS			(0x0 << 12)
+#define RT5668_SAR_RST_MASK			(0x1 << 11)
+#define RT5668_SAR_RST_NORMAL			(0x1 << 11)
+#define RT5668_SAR_RST				(0x0 << 11)
+#define RT5668_SAR_BYPASS_MASK			(0x1 << 10)
+#define RT5668_SAR_BYPASS_EN			(0x1 << 10)
+#define RT5668_SAR_BYPASS_DIS			(0x0 << 10)
+#define RT5668_SAR_SEL_MB1_MASK			(0x1 << 9)
+#define RT5668_SAR_SEL_MB1_SEL			(0x1 << 9)
+#define RT5668_SAR_SEL_MB1_NOSEL		(0x0 << 9)
+#define RT5668_SAR_SEL_MB2_MASK			(0x1 << 8)
+#define RT5668_SAR_SEL_MB2_SEL			(0x1 << 8)
+#define RT5668_SAR_SEL_MB2_NOSEL		(0x0 << 8)
+#define RT5668_SAR_SEL_MODE_MASK		(0x1 << 7)
+#define RT5668_SAR_SEL_MODE_CMP			(0x1 << 7)
+#define RT5668_SAR_SEL_MODE_ADC			(0x0 << 7)
+#define RT5668_SAR_SEL_MB1_MB2_MASK		(0x1 << 5)
+#define RT5668_SAR_SEL_MB1_MB2_AUTO		(0x1 << 5)
+#define RT5668_SAR_SEL_MB1_MB2_MANU		(0x0 << 5)
+#define RT5668_SAR_SEL_SIGNAL_MASK		(0x1 << 4)
+#define RT5668_SAR_SEL_SIGNAL_AUTO		(0x1 << 4)
+#define RT5668_SAR_SEL_SIGNAL_MANU		(0x0 << 4)
+
+/* SAR ADC Inline Command Control 13 (0x021c) */
+#define RT5668_SAR_SOUR_MASK			(0x3f)
+#define RT5668_SAR_SOUR_BTN			(0x3f)
+#define RT5668_SAR_SOUR_TYPE			(0x0)
+
+
+/* System Clock Source */
+enum {
+	RT5668_SCLK_S_MCLK,
+	RT5668_SCLK_S_PLL1,
+	RT5668_SCLK_S_PLL2,
+	RT5668_SCLK_S_RCCLK,
+};
+
+/* PLL Source */
+enum {
+	RT5668_PLL1_S_MCLK,
+	RT5668_PLL1_S_BCLK1,
+	RT5668_PLL1_S_RCCLK,
+};
+
+enum {
+	RT5668_AIF1,
+	RT5668_AIF2,
+	RT5668_AIFS
+};
+
+/* filter mask */
+enum {
+	RT5668_DA_STEREO1_FILTER = 0x1,
+	RT5668_AD_STEREO1_FILTER = (0x1 << 1),
+};
+
+enum {
+	RT5668_CLK_SEL_SYS,
+	RT5668_CLK_SEL_I2S1_ASRC,
+	RT5668_CLK_SEL_I2S2_ASRC,
+};
+
+int rt5668_sel_asrc_clk_src(struct snd_soc_component *component,
+		unsigned int filter_mask, unsigned int clk_src);
+
+#endif /* __RT5668_H__ */
diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c
index dc7df33..732ef92 100644
--- a/sound/soc/codecs/rt5670.c
+++ b/sound/soc/codecs/rt5670.c
@@ -71,7 +71,7 @@
 
 static const struct reg_sequence init_list[] = {
 	{ RT5670_PR_BASE + 0x14, 0x9a8a },
-	{ RT5670_PR_BASE + 0x38, 0x3ba1 },
+	{ RT5670_PR_BASE + 0x38, 0x1fe1 },
 	{ RT5670_PR_BASE + 0x3d, 0x3640 },
 	{ 0x8a, 0x0123 },
 };
diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c
index bc1a23d..8a0181a 100644
--- a/sound/soc/codecs/rt5677.c
+++ b/sound/soc/codecs/rt5677.c
@@ -5006,13 +5006,6 @@
 	.num_ranges = ARRAY_SIZE(rt5677_ranges),
 };
 
-static const struct i2c_device_id rt5677_i2c_id[] = {
-	{ "rt5677", RT5677 },
-	{ "rt5676", RT5676 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, rt5677_i2c_id);
-
 static const struct of_device_id rt5677_of_match[] = {
 	{ .compatible = "realtek,rt5677", RT5677 },
 	{ }
@@ -5130,8 +5123,7 @@
 		regmap_del_irq_chip(i2c->irq, rt5677->irq_data);
 }
 
-static int rt5677_i2c_probe(struct i2c_client *i2c,
-		    const struct i2c_device_id *id)
+static int rt5677_i2c_probe(struct i2c_client *i2c)
 {
 	struct rt5677_priv *rt5677;
 	int ret;
@@ -5278,9 +5270,8 @@
 		.of_match_table = rt5677_of_match,
 		.acpi_match_table = ACPI_PTR(rt5677_acpi_match),
 	},
-	.probe = rt5677_i2c_probe,
+	.probe_new = rt5677_i2c_probe,
 	.remove   = rt5677_i2c_remove,
-	.id_table = rt5677_i2c_id,
 };
 module_i2c_driver(rt5677_i2c_driver);
 
diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c
index 7c1d658..60764f6 100644
--- a/sound/soc/codecs/sgtl5000.c
+++ b/sound/soc/codecs/sgtl5000.c
@@ -1,12 +1,8 @@
-/*
- * sgtl5000.c  --  SGTL5000 ALSA SoC Audio driver
- *
- * Copyright 2010-2011 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// sgtl5000.c  --  SGTL5000 ALSA SoC Audio driver
+//
+// Copyright 2010-2011 Freescale Semiconductor, Inc. All Rights Reserved.
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -457,7 +453,7 @@
  * avc_put_threshold function: register_value = 10^(dB/20) * 0.636 * 2^15 ==>
  * dB = ( fls(register_value) - 14.347 ) * 6.02
  *
- * As this calculation is expensive and the threshold dB values may not exeed
+ * As this calculation is expensive and the threshold dB values may not exceed
  * 0 to 96 we use pre-calculated values.
  */
 static int avc_get_threshold(struct snd_kcontrol *kcontrol,
@@ -490,7 +486,7 @@
  *
  * The register value is calculated by following formula:
  *                                    register_value = 10^(dB/20) * 0.636 * 2^15
- * As this calculation is expensive and the threshold dB values may not exeed
+ * As this calculation is expensive and the threshold dB values may not exceed
  * 0 to 96 we use pre-calculated values.
  */
 static int avc_put_threshold(struct snd_kcontrol *kcontrol,
diff --git a/sound/soc/codecs/sgtl5000.h b/sound/soc/codecs/sgtl5000.h
index 28cf637..18cae08 100644
--- a/sound/soc/codecs/sgtl5000.h
+++ b/sound/soc/codecs/sgtl5000.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * sgtl5000.h - SGTL5000 audio codec interface
  *
  * Copyright 2010-2011 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef _SGTL5000_H
diff --git a/sound/soc/codecs/ssm2305.c b/sound/soc/codecs/ssm2305.c
new file mode 100644
index 0000000..2968959
--- /dev/null
+++ b/sound/soc/codecs/ssm2305.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Analog Devices SSM2305 Amplifier Driver
+//
+// Copyright (C) 2018 Pengutronix, Marco Felsch <kernel@pengutronix.de>
+//
+
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <sound/soc.h>
+
+#define DRV_NAME "ssm2305"
+
+struct ssm2305 {
+	/* shutdown gpio  */
+	struct gpio_desc *gpiod_shutdown;
+};
+
+static int ssm2305_power_event(struct snd_soc_dapm_widget *w,
+			       struct snd_kcontrol *kctrl, int event)
+{
+	struct snd_soc_component *c = snd_soc_dapm_to_component(w->dapm);
+	struct ssm2305 *data = snd_soc_component_get_drvdata(c);
+
+	gpiod_set_value_cansleep(data->gpiod_shutdown,
+				 SND_SOC_DAPM_EVENT_ON(event));
+
+	return 0;
+}
+
+static const struct snd_soc_dapm_widget ssm2305_dapm_widgets[] = {
+	/* Stereo input/output */
+	SND_SOC_DAPM_INPUT("L_IN"),
+	SND_SOC_DAPM_INPUT("R_IN"),
+	SND_SOC_DAPM_OUTPUT("L_OUT"),
+	SND_SOC_DAPM_OUTPUT("R_OUT"),
+
+	SND_SOC_DAPM_SUPPLY("Power", SND_SOC_NOPM, 0, 0, ssm2305_power_event,
+			    SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
+};
+
+static const struct snd_soc_dapm_route ssm2305_dapm_routes[] = {
+	{ "L_OUT", NULL, "L_IN" },
+	{ "R_OUT", NULL, "R_IN" },
+	{ "L_IN", NULL, "Power" },
+	{ "R_IN", NULL, "Power" },
+};
+
+static const struct snd_soc_component_driver ssm2305_component_driver = {
+	.dapm_widgets		= ssm2305_dapm_widgets,
+	.num_dapm_widgets	= ARRAY_SIZE(ssm2305_dapm_widgets),
+	.dapm_routes		= ssm2305_dapm_routes,
+	.num_dapm_routes	= ARRAY_SIZE(ssm2305_dapm_routes),
+};
+
+static int ssm2305_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ssm2305 *priv;
+	int err;
+
+	/* Allocate the private data */
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, priv);
+
+	/* Get shutdown gpio */
+	priv->gpiod_shutdown = devm_gpiod_get(dev, "shutdown",
+					      GPIOD_OUT_LOW);
+	if (IS_ERR(priv->gpiod_shutdown)) {
+		err = PTR_ERR(priv->gpiod_shutdown);
+		if (err != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get 'shutdown' gpio: %d\n",
+				err);
+		return err;
+	}
+
+	return devm_snd_soc_register_component(dev, &ssm2305_component_driver,
+					       NULL, 0);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id ssm2305_of_match[] = {
+	{ .compatible = "adi,ssm2305", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ssm2305_of_match);
+#endif
+
+static struct platform_driver ssm2305_driver = {
+	.driver = {
+		.name = DRV_NAME,
+		.of_match_table = of_match_ptr(ssm2305_of_match),
+	},
+	.probe = ssm2305_probe,
+};
+
+module_platform_driver(ssm2305_driver);
+
+MODULE_DESCRIPTION("ASoC SSM2305 amplifier driver");
+MODULE_AUTHOR("Marco Felsch <m.felsch@pengutronix.de>");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/codecs/tas6424.c b/sound/soc/codecs/tas6424.c
index 4f3a16c..14999b9 100644
--- a/sound/soc/codecs/tas6424.c
+++ b/sound/soc/codecs/tas6424.c
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/regulator/consumer.h>
 #include <linux/delay.h>
+#include <linux/gpio/consumer.h>
 
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
@@ -43,6 +44,8 @@
 	unsigned int last_fault1;
 	unsigned int last_fault2;
 	unsigned int last_warn;
+	struct gpio_desc *standby_gpio;
+	struct gpio_desc *mute_gpio;
 };
 
 /*
@@ -61,6 +64,8 @@
 		       TAS6424_CH3_VOL_CTRL, 0, 0xff, 0, dac_tlv),
 	SOC_SINGLE_TLV("Speaker Driver CH4 Playback Volume",
 		       TAS6424_CH4_VOL_CTRL, 0, 0xff, 0, dac_tlv),
+	SOC_SINGLE_STROBE("Auto Diagnostics Switch", TAS6424_DC_DIAG_CTRL1,
+			  TAS6424_LDGBYPASS_SHIFT, 1),
 };
 
 static int tas6424_dac_event(struct snd_soc_dapm_widget *w,
@@ -249,10 +254,16 @@
 static int tas6424_mute(struct snd_soc_dai *dai, int mute)
 {
 	struct snd_soc_component *component = dai->component;
+	struct tas6424_data *tas6424 = snd_soc_component_get_drvdata(component);
 	unsigned int val;
 
 	dev_dbg(component->dev, "%s() mute=%d\n", __func__, mute);
 
+	if (tas6424->mute_gpio) {
+		gpiod_set_value_cansleep(tas6424->mute_gpio, mute);
+		return 0;
+	}
+
 	if (mute)
 		val = TAS6424_ALL_STATE_MUTE;
 	else
@@ -287,6 +298,12 @@
 {
 	struct tas6424_data *tas6424 = snd_soc_component_get_drvdata(component);
 	int ret;
+	u8 chan_states;
+	int no_auto_diags = 0;
+	unsigned int reg_val;
+
+	if (!regmap_read(tas6424->regmap, TAS6424_DC_DIAG_CTRL1, &reg_val))
+		no_auto_diags = reg_val & TAS6424_LDGBYPASS_MASK;
 
 	ret = regulator_bulk_enable(ARRAY_SIZE(tas6424->supplies),
 				    tas6424->supplies);
@@ -303,12 +320,25 @@
 		return ret;
 	}
 
-	snd_soc_component_write(component, TAS6424_CH_STATE_CTRL, TAS6424_ALL_STATE_MUTE);
+	if (tas6424->mute_gpio) {
+		gpiod_set_value_cansleep(tas6424->mute_gpio, 0);
+		/*
+		 * channels are muted via the mute pin.  Don't also mute
+		 * them via the registers so that subsequent register
+		 * access is not necessary to un-mute the channels
+		 */
+		chan_states = TAS6424_ALL_STATE_PLAY;
+	} else {
+		chan_states = TAS6424_ALL_STATE_MUTE;
+	}
+	snd_soc_component_write(component, TAS6424_CH_STATE_CTRL, chan_states);
 
 	/* any time we come out of HIZ, the output channels automatically run DC
-	 * load diagnostics, wait here until this completes
+	 * load diagnostics if autodiagnotics are enabled. wait here until this
+	 * completes.
 	 */
-	msleep(230);
+	if (!no_auto_diags)
+		msleep(230);
 
 	return 0;
 }
@@ -627,6 +657,38 @@
 		return ret;
 	}
 
+	/*
+	 * Get control of the standby pin and set it LOW to take the codec
+	 * out of the stand-by mode.
+	 * Note: The actual pin polarity is taken care of in the GPIO lib
+	 * according the polarity specified in the DTS.
+	 */
+	tas6424->standby_gpio = devm_gpiod_get_optional(dev, "standby",
+						      GPIOD_OUT_LOW);
+	if (IS_ERR(tas6424->standby_gpio)) {
+		if (PTR_ERR(tas6424->standby_gpio) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_info(dev, "failed to get standby GPIO: %ld\n",
+			PTR_ERR(tas6424->standby_gpio));
+		tas6424->standby_gpio = NULL;
+	}
+
+	/*
+	 * Get control of the mute pin and set it HIGH in order to start with
+	 * all the output muted.
+	 * Note: The actual pin polarity is taken care of in the GPIO lib
+	 * according the polarity specified in the DTS.
+	 */
+	tas6424->mute_gpio = devm_gpiod_get_optional(dev, "mute",
+						      GPIOD_OUT_HIGH);
+	if (IS_ERR(tas6424->mute_gpio)) {
+		if (PTR_ERR(tas6424->mute_gpio) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_info(dev, "failed to get nmute GPIO: %ld\n",
+			PTR_ERR(tas6424->mute_gpio));
+		tas6424->mute_gpio = NULL;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(tas6424->supplies); i++)
 		tas6424->supplies[i].supply = tas6424_supply_names[i];
 	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(tas6424->supplies),
@@ -671,6 +733,10 @@
 
 	cancel_delayed_work_sync(&tas6424->fault_check_work);
 
+	/* put the codec in stand-by */
+	if (tas6424->standby_gpio)
+		gpiod_set_value_cansleep(tas6424->standby_gpio, 1);
+
 	ret = regulator_bulk_disable(ARRAY_SIZE(tas6424->supplies),
 				     tas6424->supplies);
 	if (ret < 0) {
diff --git a/sound/soc/codecs/tas6424.h b/sound/soc/codecs/tas6424.h
index 4305883..b5958c4 100644
--- a/sound/soc/codecs/tas6424.h
+++ b/sound/soc/codecs/tas6424.h
@@ -111,6 +111,10 @@
 					 TAS6424_CH3_STATE_DIAG | \
 					 TAS6424_CH4_STATE_DIAG)
 
+/* TAS6424_DC_DIAG_CTRL1 */
+#define TAS6424_LDGBYPASS_SHIFT		0
+#define TAS6424_LDGBYPASS_MASK		BIT(TAS6424_LDGBYPASS_SHIFT)
+
 /* TAS6424_GLOB_FAULT1_REG */
 #define TAS6424_FAULT_CLOCK		BIT(4)
 #define TAS6424_FAULT_PVDD_OV		BIT(3)
diff --git a/sound/soc/codecs/tfa9879.c b/sound/soc/codecs/tfa9879.c
index 6d213c6..abc114a 100644
--- a/sound/soc/codecs/tfa9879.c
+++ b/sound/soc/codecs/tfa9879.c
@@ -1,15 +1,9 @@
-/*
- * tfa9879.c  --  driver for NXP Semiconductors TFA9879
- *
- * Copyright (C) 2014 Axentia Technologies AB
- * Author: Peter Rosin <peda@axentia.se>
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// tfa9879.c  --  driver for NXP Semiconductors TFA9879
+//
+// Copyright (C) 2014 Axentia Technologies AB
+// Author: Peter Rosin <peda@axentia.se>
 
 #include <linux/module.h>
 #include <linux/init.h>
@@ -88,13 +82,14 @@
 	}
 
 	if (tfa9879->lsb_justified)
-		snd_soc_component_update_bits(component, TFA9879_SERIAL_INTERFACE_1,
-				    TFA9879_I2S_SET_MASK,
-				    i2s_set << TFA9879_I2S_SET_SHIFT);
+		snd_soc_component_update_bits(component,
+					      TFA9879_SERIAL_INTERFACE_1,
+					      TFA9879_I2S_SET_MASK,
+					      i2s_set << TFA9879_I2S_SET_SHIFT);
 
 	snd_soc_component_update_bits(component, TFA9879_SERIAL_INTERFACE_1,
-			    TFA9879_I2S_FS_MASK,
-			    fs << TFA9879_I2S_FS_SHIFT);
+				      TFA9879_I2S_FS_MASK,
+				      fs << TFA9879_I2S_FS_SHIFT);
 	return 0;
 }
 
@@ -103,8 +98,8 @@
 	struct snd_soc_component *component = dai->component;
 
 	snd_soc_component_update_bits(component, TFA9879_MISC_CONTROL,
-			    TFA9879_S_MUTE_MASK,
-			    !!mute << TFA9879_S_MUTE_SHIFT);
+				      TFA9879_S_MUTE_MASK,
+				      !!mute << TFA9879_S_MUTE_SHIFT);
 
 	return 0;
 }
@@ -152,11 +147,11 @@
 	}
 
 	snd_soc_component_update_bits(component, TFA9879_SERIAL_INTERFACE_1,
-			    TFA9879_SCK_POL_MASK,
-			    sck_pol << TFA9879_SCK_POL_SHIFT);
+				      TFA9879_SCK_POL_MASK,
+				      sck_pol << TFA9879_SCK_POL_SHIFT);
 	snd_soc_component_update_bits(component, TFA9879_SERIAL_INTERFACE_1,
-			    TFA9879_I2S_SET_MASK,
-			    i2s_set << TFA9879_I2S_SET_SHIFT);
+				      TFA9879_I2S_SET_MASK,
+				      i2s_set << TFA9879_I2S_SET_SHIFT);
 	return 0;
 }
 
@@ -276,8 +271,7 @@
 	.ops = &tfa9879_dai_ops,
 };
 
-static int tfa9879_i2c_probe(struct i2c_client *i2c,
-			     const struct i2c_device_id *id)
+static int tfa9879_i2c_probe(struct i2c_client *i2c)
 {
 	struct tfa9879_priv *tfa9879;
 	int i;
@@ -298,7 +292,7 @@
 			     tfa9879_regs[i].reg, tfa9879_regs[i].def);
 
 	return devm_snd_soc_register_component(&i2c->dev, &tfa9879_component,
-				      &tfa9879_dai, 1);
+					       &tfa9879_dai, 1);
 }
 
 static const struct i2c_device_id tfa9879_i2c_id[] = {
@@ -318,7 +312,7 @@
 		.name = "tfa9879",
 		.of_match_table = tfa9879_of_match,
 	},
-	.probe = tfa9879_i2c_probe,
+	.probe_new = tfa9879_i2c_probe,
 	.id_table = tfa9879_i2c_id,
 };
 
diff --git a/sound/soc/codecs/tfa9879.h b/sound/soc/codecs/tfa9879.h
index 3408c90..66c88d0 100644
--- a/sound/soc/codecs/tfa9879.h
+++ b/sound/soc/codecs/tfa9879.h
@@ -1,14 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * tfa9879.h  --  driver for NXP Semiconductors TFA9879
  *
  * Copyright (C) 2014 Axentia Technologies AB
  * Author: Peter Rosin <peda@axentia.se>
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
  */
 
 #ifndef _TFA9879_H
diff --git a/sound/soc/codecs/tscs42xx.c b/sound/soc/codecs/tscs42xx.c
index bbfc73a..d18ff17 100644
--- a/sound/soc/codecs/tscs42xx.c
+++ b/sound/soc/codecs/tscs42xx.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/clk.h>
 #include <sound/tlv.h>
 #include <sound/pcm_params.h>
 #include <sound/soc.h>
@@ -31,7 +32,6 @@
 
 	int bclk_ratio;
 	int samplerate;
-	unsigned int blrcm;
 	struct mutex audio_params_lock;
 
 	u8 coeff_ram[COEFF_RAM_SIZE];
@@ -42,7 +42,8 @@
 
 	struct regmap *regmap;
 
-	struct device *dev;
+	struct clk *sysclk;
+	int sysclk_src_id;
 };
 
 struct coeff_ram_ctl {
@@ -204,7 +205,8 @@
 		break;
 	default:
 		ret = -EINVAL;
-		dev_err(component->dev, "Unrecognized PLL output freq (%d)\n", ret);
+		dev_err(component->dev,
+				"Unrecognized PLL output freq (%d)\n", ret);
 		return ret;
 	}
 
@@ -261,7 +263,8 @@
 static int coeff_ram_get(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+	struct snd_soc_component *component =
+		snd_soc_kcontrol_component(kcontrol);
 	struct tscs42xx *tscs42xx = snd_soc_component_get_drvdata(component);
 	struct coeff_ram_ctl *ctl =
 		(struct coeff_ram_ctl *)kcontrol->private_value;
@@ -280,7 +283,8 @@
 static int coeff_ram_put(struct snd_kcontrol *kcontrol,
 	struct snd_ctl_elem_value *ucontrol)
 {
-	struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+	struct snd_soc_component *component =
+		snd_soc_kcontrol_component(kcontrol);
 	struct tscs42xx *tscs42xx = snd_soc_component_get_drvdata(component);
 	struct coeff_ram_ctl *ctl =
 		(struct coeff_ram_ctl *)kcontrol->private_value;
@@ -363,7 +367,8 @@
 static int pll_event(struct snd_soc_dapm_widget *w,
 		     struct snd_kcontrol *kcontrol, int event)
 {
-	struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
 	int ret;
 
 	if (SND_SOC_DAPM_EVENT_ON(event))
@@ -377,7 +382,8 @@
 static int dac_event(struct snd_soc_dapm_widget *w,
 		     struct snd_kcontrol *kcontrol, int event)
 {
-	struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
 	struct tscs42xx *tscs42xx = snd_soc_component_get_drvdata(component);
 	int ret;
 
@@ -819,16 +825,19 @@
 		dev_err(component->dev, "Unsupported format width (%d)\n", ret);
 		return ret;
 	}
-	ret = snd_soc_component_update_bits(component, R_AIC1, RM_AIC1_WL, width);
+	ret = snd_soc_component_update_bits(component,
+			R_AIC1, RM_AIC1_WL, width);
 	if (ret < 0) {
-		dev_err(component->dev, "Failed to set sample width (%d)\n", ret);
+		dev_err(component->dev,
+				"Failed to set sample width (%d)\n", ret);
 		return ret;
 	}
 
 	return 0;
 }
 
-static int setup_sample_rate(struct snd_soc_component *component, unsigned int rate)
+static int setup_sample_rate(struct snd_soc_component *component,
+		unsigned int rate)
 {
 	struct tscs42xx *tscs42xx = snd_soc_component_get_drvdata(component);
 	unsigned int br, bm;
@@ -881,24 +890,32 @@
 	}
 
 	/* DAC and ADC share bit and frame clock */
-	ret = snd_soc_component_update_bits(component, R_DACSR, RM_DACSR_DBR, br);
+	ret = snd_soc_component_update_bits(component,
+			R_DACSR, RM_DACSR_DBR, br);
 	if (ret < 0) {
-		dev_err(component->dev, "Failed to update register (%d)\n", ret);
+		dev_err(component->dev,
+				"Failed to update register (%d)\n", ret);
 		return ret;
 	}
-	ret = snd_soc_component_update_bits(component, R_DACSR, RM_DACSR_DBM, bm);
+	ret = snd_soc_component_update_bits(component,
+			R_DACSR, RM_DACSR_DBM, bm);
 	if (ret < 0) {
-		dev_err(component->dev, "Failed to update register (%d)\n", ret);
+		dev_err(component->dev,
+				"Failed to update register (%d)\n", ret);
 		return ret;
 	}
-	ret = snd_soc_component_update_bits(component, R_ADCSR, RM_DACSR_DBR, br);
+	ret = snd_soc_component_update_bits(component,
+			R_ADCSR, RM_DACSR_DBR, br);
 	if (ret < 0) {
-		dev_err(component->dev, "Failed to update register (%d)\n", ret);
+		dev_err(component->dev,
+				"Failed to update register (%d)\n", ret);
 		return ret;
 	}
-	ret = snd_soc_component_update_bits(component, R_ADCSR, RM_DACSR_DBM, bm);
+	ret = snd_soc_component_update_bits(component,
+			R_ADCSR, RM_DACSR_DBM, bm);
 	if (ret < 0) {
-		dev_err(component->dev, "Failed to update register (%d)\n", ret);
+		dev_err(component->dev,
+				"Failed to update register (%d)\n", ret);
 		return ret;
 	}
 
@@ -1076,7 +1093,8 @@
 
 	ret = setup_sample_rate(component, params_rate(params));
 	if (ret < 0) {
-		dev_err(component->dev, "Failed to setup sample rate (%d)\n", ret);
+		dev_err(component->dev,
+				"Failed to setup sample rate (%d)\n", ret);
 		return ret;
 	}
 
@@ -1087,7 +1105,8 @@
 {
 	int ret;
 
-	ret = snd_soc_component_update_bits(component, R_CNVRTR1, RM_CNVRTR1_DACMU,
+	ret = snd_soc_component_update_bits(component,
+			R_CNVRTR1, RM_CNVRTR1_DACMU,
 		RV_CNVRTR1_DACMU_ENABLE);
 	if (ret < 0) {
 		dev_err(component->dev, "Failed to mute DAC (%d)\n",
@@ -1102,7 +1121,8 @@
 {
 	int ret;
 
-	ret = snd_soc_component_update_bits(component, R_CNVRTR1, RM_CNVRTR1_DACMU,
+	ret = snd_soc_component_update_bits(component,
+			R_CNVRTR1, RM_CNVRTR1_DACMU,
 		RV_CNVRTR1_DACMU_DISABLE);
 	if (ret < 0) {
 		dev_err(component->dev, "Failed to unmute DAC (%d)\n",
@@ -1117,8 +1137,8 @@
 {
 	int ret;
 
-	ret = snd_soc_component_update_bits(component, R_CNVRTR0, RM_CNVRTR0_ADCMU,
-		RV_CNVRTR0_ADCMU_ENABLE);
+	ret = snd_soc_component_update_bits(component,
+			R_CNVRTR0, RM_CNVRTR0_ADCMU, RV_CNVRTR0_ADCMU_ENABLE);
 	if (ret < 0) {
 		dev_err(component->dev, "Failed to mute ADC (%d)\n",
 				ret);
@@ -1132,8 +1152,8 @@
 {
 	int ret;
 
-	ret = snd_soc_component_update_bits(component, R_CNVRTR0, RM_CNVRTR0_ADCMU,
-		RV_CNVRTR0_ADCMU_DISABLE);
+	ret = snd_soc_component_update_bits(component,
+			R_CNVRTR0, RM_CNVRTR0_ADCMU, RV_CNVRTR0_ADCMU_DISABLE);
 	if (ret < 0) {
 		dev_err(component->dev, "Failed to unmute ADC (%d)\n",
 				ret);
@@ -1171,8 +1191,8 @@
 	/* Slave mode not supported since it needs always-on frame clock */
 	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
 	case SND_SOC_DAIFMT_CBM_CFM:
-		ret = snd_soc_component_update_bits(component, R_AIC1, RM_AIC1_MS,
-				RV_AIC1_MS_MASTER);
+		ret = snd_soc_component_update_bits(component,
+				R_AIC1, RM_AIC1_MS, RV_AIC1_MS_MASTER);
 		if (ret < 0) {
 			dev_err(component->dev,
 				"Failed to set codec DAI master (%d)\n", ret);
@@ -1211,14 +1231,18 @@
 		return -EINVAL;
 	}
 
-	ret = snd_soc_component_update_bits(component, R_DACSR, RM_DACSR_DBCM, value);
+	ret = snd_soc_component_update_bits(component,
+			R_DACSR, RM_DACSR_DBCM, value);
 	if (ret < 0) {
-		dev_err(component->dev, "Failed to set DAC BCLK ratio (%d)\n", ret);
+		dev_err(component->dev,
+				"Failed to set DAC BCLK ratio (%d)\n", ret);
 		return ret;
 	}
-	ret = snd_soc_component_update_bits(component, R_ADCSR, RM_ADCSR_ABCM, value);
+	ret = snd_soc_component_update_bits(component,
+			R_ADCSR, RM_ADCSR_ABCM, value);
 	if (ret < 0) {
-		dev_err(component->dev, "Failed to set ADC BCLK ratio (%d)\n", ret);
+		dev_err(component->dev,
+				"Failed to set ADC BCLK ratio (%d)\n", ret);
 		return ret;
 	}
 
@@ -1231,56 +1255,11 @@
 	return 0;
 }
 
-static int tscs42xx_set_dai_sysclk(struct snd_soc_dai *codec_dai,
-	int clk_id, unsigned int freq, int dir)
-{
-	struct snd_soc_component *component = codec_dai->component;
-	int ret;
-
-	switch (clk_id) {
-	case TSCS42XX_PLL_SRC_XTAL:
-	case TSCS42XX_PLL_SRC_MCLK1:
-		ret = snd_soc_component_write(component, R_PLLREFSEL,
-				RV_PLLREFSEL_PLL1_REF_SEL_XTAL_MCLK1 |
-				RV_PLLREFSEL_PLL2_REF_SEL_XTAL_MCLK1);
-		if (ret < 0) {
-			dev_err(component->dev,
-				"Failed to set pll reference input (%d)\n",
-				ret);
-			return ret;
-		}
-		break;
-	case TSCS42XX_PLL_SRC_MCLK2:
-		ret = snd_soc_component_write(component, R_PLLREFSEL,
-				RV_PLLREFSEL_PLL1_REF_SEL_MCLK2 |
-				RV_PLLREFSEL_PLL2_REF_SEL_MCLK2);
-		if (ret < 0) {
-			dev_err(component->dev,
-				"Failed to set PLL reference (%d)\n", ret);
-			return ret;
-		}
-		break;
-	default:
-		dev_err(component->dev, "pll src is unsupported\n");
-		return -EINVAL;
-	}
-
-	ret = set_pll_ctl_from_input_freq(component, freq);
-	if (ret < 0) {
-		dev_err(component->dev,
-			"Failed to setup PLL input freq (%d)\n", ret);
-		return ret;
-	}
-
-	return 0;
-}
-
 static const struct snd_soc_dai_ops tscs42xx_dai_ops = {
 	.hw_params	= tscs42xx_hw_params,
 	.mute_stream	= tscs42xx_mute_stream,
 	.set_fmt	= tscs42xx_set_dai_fmt,
 	.set_bclk_ratio = tscs42xx_set_dai_bclk_ratio,
-	.set_sysclk	= tscs42xx_set_dai_sysclk,
 };
 
 static int part_is_valid(struct tscs42xx *tscs42xx)
@@ -1309,7 +1288,58 @@
 	};
 }
 
-static struct snd_soc_component_driver soc_codec_dev_tscs42xx = {
+static int set_sysclk(struct snd_soc_component *component)
+{
+	struct tscs42xx *tscs42xx = snd_soc_component_get_drvdata(component);
+	unsigned long freq;
+	int ret;
+
+	switch (tscs42xx->sysclk_src_id) {
+	case TSCS42XX_PLL_SRC_XTAL:
+	case TSCS42XX_PLL_SRC_MCLK1:
+		ret = snd_soc_component_write(component, R_PLLREFSEL,
+				RV_PLLREFSEL_PLL1_REF_SEL_XTAL_MCLK1 |
+				RV_PLLREFSEL_PLL2_REF_SEL_XTAL_MCLK1);
+		if (ret < 0) {
+			dev_err(component->dev,
+				"Failed to set pll reference input (%d)\n",
+				ret);
+			return ret;
+		}
+		break;
+	case TSCS42XX_PLL_SRC_MCLK2:
+		ret = snd_soc_component_write(component, R_PLLREFSEL,
+				RV_PLLREFSEL_PLL1_REF_SEL_MCLK2 |
+				RV_PLLREFSEL_PLL2_REF_SEL_MCLK2);
+		if (ret < 0) {
+			dev_err(component->dev,
+				"Failed to set PLL reference (%d)\n", ret);
+			return ret;
+		}
+		break;
+	default:
+		dev_err(component->dev, "pll src is unsupported\n");
+		return -EINVAL;
+	}
+
+	freq = clk_get_rate(tscs42xx->sysclk);
+	ret = set_pll_ctl_from_input_freq(component, freq);
+	if (ret < 0) {
+		dev_err(component->dev,
+			"Failed to setup PLL input freq (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int tscs42xx_probe(struct snd_soc_component *component)
+{
+	return set_sysclk(component);
+}
+
+static const struct snd_soc_component_driver soc_codec_dev_tscs42xx = {
+	.probe			= tscs42xx_probe,
 	.dapm_widgets		= tscs42xx_dapm_widgets,
 	.num_dapm_widgets	= ARRAY_SIZE(tscs42xx_dapm_widgets),
 	.dapm_routes		= tscs42xx_intercon,
@@ -1367,11 +1397,15 @@
 	{ R_AIC2, RV_AIC2_BLRCM_DAC_BCLK_LRCLK_SHARED },
 };
 
+static char const * const src_names[TSCS42XX_PLL_SRC_CNT] = {
+	"xtal", "mclk1", "mclk2"};
+
 static int tscs42xx_i2c_probe(struct i2c_client *i2c,
 		const struct i2c_device_id *id)
 {
 	struct tscs42xx *tscs42xx;
-	int ret = 0;
+	int src;
+	int ret;
 
 	tscs42xx = devm_kzalloc(&i2c->dev, sizeof(*tscs42xx), GFP_KERNEL);
 	if (!tscs42xx) {
@@ -1381,12 +1415,29 @@
 		return ret;
 	}
 	i2c_set_clientdata(i2c, tscs42xx);
-	tscs42xx->dev = &i2c->dev;
+
+	for (src = TSCS42XX_PLL_SRC_XTAL; src < TSCS42XX_PLL_SRC_CNT; src++) {
+		tscs42xx->sysclk = devm_clk_get(&i2c->dev, src_names[src]);
+		if (!IS_ERR(tscs42xx->sysclk)) {
+			break;
+		} else if (PTR_ERR(tscs42xx->sysclk) != -ENOENT) {
+			ret = PTR_ERR(tscs42xx->sysclk);
+			dev_err(&i2c->dev, "Failed to get sysclk (%d)\n", ret);
+			return ret;
+		}
+	}
+	if (src == TSCS42XX_PLL_SRC_CNT) {
+		ret = -EINVAL;
+		dev_err(&i2c->dev, "Failed to get a valid clock name (%d)\n",
+				ret);
+		return ret;
+	}
+	tscs42xx->sysclk_src_id = src;
 
 	tscs42xx->regmap = devm_regmap_init_i2c(i2c, &tscs42xx_regmap);
 	if (IS_ERR(tscs42xx->regmap)) {
 		ret = PTR_ERR(tscs42xx->regmap);
-		dev_err(tscs42xx->dev, "Failed to allocate regmap (%d)\n", ret);
+		dev_err(&i2c->dev, "Failed to allocate regmap (%d)\n", ret);
 		return ret;
 	}
 
@@ -1394,21 +1445,21 @@
 
 	ret = part_is_valid(tscs42xx);
 	if (ret <= 0) {
-		dev_err(tscs42xx->dev, "No valid part (%d)\n", ret);
+		dev_err(&i2c->dev, "No valid part (%d)\n", ret);
 		ret = -ENODEV;
 		return ret;
 	}
 
 	ret = regmap_write(tscs42xx->regmap, R_RESET, RV_RESET_ENABLE);
 	if (ret < 0) {
-		dev_err(tscs42xx->dev, "Failed to reset device (%d)\n", ret);
+		dev_err(&i2c->dev, "Failed to reset device (%d)\n", ret);
 		return ret;
 	}
 
 	ret = regmap_register_patch(tscs42xx->regmap, tscs42xx_patch,
 			ARRAY_SIZE(tscs42xx_patch));
 	if (ret < 0) {
-		dev_err(tscs42xx->dev, "Failed to apply patch (%d)\n", ret);
+		dev_err(&i2c->dev, "Failed to apply patch (%d)\n", ret);
 		return ret;
 	}
 
@@ -1416,10 +1467,10 @@
 	mutex_init(&tscs42xx->coeff_ram_lock);
 	mutex_init(&tscs42xx->pll_lock);
 
-	ret = devm_snd_soc_register_component(tscs42xx->dev, &soc_codec_dev_tscs42xx,
-			&tscs42xx_dai, 1);
+	ret = devm_snd_soc_register_component(&i2c->dev,
+			&soc_codec_dev_tscs42xx, &tscs42xx_dai, 1);
 	if (ret) {
-		dev_err(tscs42xx->dev, "Failed to register codec (%d)\n", ret);
+		dev_err(&i2c->dev, "Failed to register codec (%d)\n", ret);
 		return ret;
 	}
 
diff --git a/sound/soc/codecs/tscs42xx.h b/sound/soc/codecs/tscs42xx.h
index d4a30bc..814c8f3 100644
--- a/sound/soc/codecs/tscs42xx.h
+++ b/sound/soc/codecs/tscs42xx.h
@@ -7,10 +7,10 @@
 #define __WOOKIE_H__
 
 enum {
-	TSCS42XX_PLL_SRC_NONE,
 	TSCS42XX_PLL_SRC_XTAL,
 	TSCS42XX_PLL_SRC_MCLK1,
 	TSCS42XX_PLL_SRC_MCLK2,
+	TSCS42XX_PLL_SRC_CNT,
 };
 
 #define R_HPVOLL        0x0
diff --git a/sound/soc/codecs/tscs454.c b/sound/soc/codecs/tscs454.c
new file mode 100644
index 0000000..ff85a0b
--- /dev/null
+++ b/sound/soc/codecs/tscs454.c
@@ -0,0 +1,3497 @@
+// SPDX-License-Identifier: GPL-2.0
+// tscs454.c -- TSCS454 ALSA SoC Audio driver
+// Copyright 2018 Tempo Semiconductor, Inc.
+// Author: Steven Eckhoff <steven.eckhoff.opensource@gmail.com>
+
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/regmap.h>
+#include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+
+#include <sound/tlv.h>
+#include <sound/pcm_params.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+
+#include "tscs454.h"
+
+static const unsigned int PLL_48K_RATE = (48000 * 256);
+static const unsigned int PLL_44_1K_RATE = (44100 * 256);
+
+#define COEFF_SIZE 3
+#define BIQUAD_COEFF_COUNT 5
+#define BIQUAD_SIZE (COEFF_SIZE * BIQUAD_COEFF_COUNT)
+
+#define COEFF_RAM_MAX_ADDR 0xcd
+#define COEFF_RAM_COEFF_COUNT (COEFF_RAM_MAX_ADDR + 1)
+#define COEFF_RAM_SIZE (COEFF_SIZE * COEFF_RAM_COEFF_COUNT)
+
+enum {
+	TSCS454_DAI1_ID,
+	TSCS454_DAI2_ID,
+	TSCS454_DAI3_ID,
+	TSCS454_DAI_COUNT,
+};
+
+struct pll {
+	int id;
+	unsigned int users;
+	struct mutex lock;
+};
+
+static inline void pll_init(struct pll *pll, int id)
+{
+	pll->id = id;
+	mutex_init(&pll->lock);
+}
+
+struct internal_rate {
+	struct pll *pll;
+};
+
+struct aif {
+	unsigned int id;
+	bool master;
+	struct pll *pll;
+};
+
+static inline void aif_init(struct aif *aif, unsigned int id)
+{
+	aif->id = id;
+}
+
+struct coeff_ram {
+	u8 cache[COEFF_RAM_SIZE];
+	bool synced;
+	struct mutex lock;
+};
+
+static inline void init_coeff_ram_cache(u8 *cache)
+{
+	static const u8 norm_addrs[] = { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x19,
+		0x1f, 0x20, 0x25, 0x2a, 0x2f, 0x34, 0x39, 0x3f, 0x40, 0x45,
+		0x4a, 0x4f, 0x54, 0x59, 0x5f, 0x60, 0x65, 0x6a, 0x6f, 0x74,
+		0x79, 0x7f, 0x80, 0x85, 0x8c, 0x91, 0x96, 0x97, 0x9c, 0xa3,
+		0xa8, 0xad, 0xaf, 0xb0, 0xb5, 0xba, 0xbf, 0xc4, 0xc9};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(norm_addrs); i++)
+		cache[((norm_addrs[i] + 1) * COEFF_SIZE) - 1] = 0x40;
+}
+
+static inline void coeff_ram_init(struct coeff_ram *ram)
+{
+	init_coeff_ram_cache(ram->cache);
+	mutex_init(&ram->lock);
+}
+
+struct aifs_status {
+	u8 streams;
+};
+
+static inline void set_aif_status_active(struct aifs_status *status,
+		int aif_id, bool playback)
+{
+	u8 mask = 0x01 << (aif_id * 2 + !playback);
+
+	status->streams |= mask;
+}
+
+static inline void set_aif_status_inactive(struct aifs_status *status,
+		int aif_id, bool playback)
+{
+	u8 mask = ~(0x01 << (aif_id * 2 + !playback));
+
+	status->streams &= mask;
+}
+
+static bool aifs_active(struct aifs_status *status)
+{
+	return status->streams;
+}
+
+static bool aif_active(struct aifs_status *status, int aif_id)
+{
+	return (0x03 << aif_id * 2) & status->streams;
+}
+
+struct tscs454 {
+	struct regmap *regmap;
+	struct aif aifs[TSCS454_DAI_COUNT];
+
+	struct aifs_status aifs_status;
+	struct mutex aifs_status_lock;
+
+	struct pll pll1;
+	struct pll pll2;
+	struct internal_rate internal_rate;
+
+	struct coeff_ram dac_ram;
+	struct coeff_ram spk_ram;
+	struct coeff_ram sub_ram;
+
+	struct clk *sysclk;
+	int sysclk_src_id;
+	unsigned int bclk_freq;
+};
+
+struct coeff_ram_ctl {
+	unsigned int addr;
+	struct soc_bytes_ext bytes_ext;
+};
+
+static const struct reg_sequence tscs454_patch[] = {
+	/* Assign ASRC out of the box so DAI 1 just works */
+	{ R_AUDIOMUX1, FV_ASRCIMUX_I2S1 | FV_I2S2MUX_I2S2 },
+	{ R_AUDIOMUX2, FV_ASRCOMUX_I2S1 | FV_DACMUX_I2S1 | FV_I2S3MUX_I2S3 },
+	{ R_AUDIOMUX3, FV_CLSSDMUX_I2S1 | FV_SUBMUX_I2S1_LR },
+	{ R_TDMCTL0, FV_TDMMD_256 },
+	{ VIRT_ADDR(0x0A, 0x13), 1 << 3 },
+};
+
+static bool tscs454_volatile(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case R_PLLSTAT:
+
+	case R_SPKCRRDL:
+	case R_SPKCRRDM:
+	case R_SPKCRRDH:
+	case R_SPKCRS:
+
+	case R_DACCRRDL:
+	case R_DACCRRDM:
+	case R_DACCRRDH:
+	case R_DACCRS:
+
+	case R_SUBCRRDL:
+	case R_SUBCRRDM:
+	case R_SUBCRRDH:
+	case R_SUBCRS:
+		return true;
+	default:
+		return false;
+	};
+}
+
+static bool tscs454_writable(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case R_SPKCRRDL:
+	case R_SPKCRRDM:
+	case R_SPKCRRDH:
+
+	case R_DACCRRDL:
+	case R_DACCRRDM:
+	case R_DACCRRDH:
+
+	case R_SUBCRRDL:
+	case R_SUBCRRDM:
+	case R_SUBCRRDH:
+		return false;
+	default:
+		return true;
+	};
+}
+
+static bool tscs454_readable(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case R_SPKCRWDL:
+	case R_SPKCRWDM:
+	case R_SPKCRWDH:
+
+	case R_DACCRWDL:
+	case R_DACCRWDM:
+	case R_DACCRWDH:
+
+	case R_SUBCRWDL:
+	case R_SUBCRWDM:
+	case R_SUBCRWDH:
+		return false;
+	default:
+		return true;
+	};
+}
+
+static bool tscs454_precious(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case R_SPKCRWDL:
+	case R_SPKCRWDM:
+	case R_SPKCRWDH:
+	case R_SPKCRRDL:
+	case R_SPKCRRDM:
+	case R_SPKCRRDH:
+
+	case R_DACCRWDL:
+	case R_DACCRWDM:
+	case R_DACCRWDH:
+	case R_DACCRRDL:
+	case R_DACCRRDM:
+	case R_DACCRRDH:
+
+	case R_SUBCRWDL:
+	case R_SUBCRWDM:
+	case R_SUBCRWDH:
+	case R_SUBCRRDL:
+	case R_SUBCRRDM:
+	case R_SUBCRRDH:
+		return true;
+	default:
+		return false;
+	};
+}
+
+static const struct regmap_range_cfg tscs454_regmap_range_cfg = {
+	.name = "Pages",
+	.range_min = VIRT_BASE,
+	.range_max = VIRT_ADDR(0xFE, 0x02),
+	.selector_reg = R_PAGESEL,
+	.selector_mask = 0xff,
+	.selector_shift = 0,
+	.window_start = 0,
+	.window_len = 0x100,
+};
+
+static struct regmap_config const tscs454_regmap_cfg = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.writeable_reg = tscs454_writable,
+	.readable_reg = tscs454_readable,
+	.volatile_reg = tscs454_volatile,
+	.precious_reg = tscs454_precious,
+	.ranges = &tscs454_regmap_range_cfg,
+	.num_ranges = 1,
+	.max_register = VIRT_ADDR(0xFE, 0x02),
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static inline int tscs454_data_init(struct tscs454 *tscs454,
+		struct i2c_client *i2c)
+{
+	int i;
+	int ret;
+
+	tscs454->regmap = devm_regmap_init_i2c(i2c, &tscs454_regmap_cfg);
+	if (IS_ERR(tscs454->regmap)) {
+		ret = PTR_ERR(tscs454->regmap);
+		return ret;
+	}
+
+	for (i = 0; i < TSCS454_DAI_COUNT; i++)
+		aif_init(&tscs454->aifs[i], i);
+
+	mutex_init(&tscs454->aifs_status_lock);
+	pll_init(&tscs454->pll1, 1);
+	pll_init(&tscs454->pll2, 2);
+
+	coeff_ram_init(&tscs454->dac_ram);
+	coeff_ram_init(&tscs454->spk_ram);
+	coeff_ram_init(&tscs454->sub_ram);
+
+	return 0;
+}
+
+struct reg_setting {
+	unsigned int addr;
+	unsigned int val;
+};
+
+static int coeff_ram_get(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *component =
+		snd_soc_kcontrol_component(kcontrol);
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+	struct coeff_ram_ctl *ctl =
+		(struct coeff_ram_ctl *)kcontrol->private_value;
+	struct soc_bytes_ext *params = &ctl->bytes_ext;
+	u8 *coeff_ram;
+	struct mutex *coeff_ram_lock;
+
+	if (strstr(kcontrol->id.name, "DAC")) {
+		coeff_ram = tscs454->dac_ram.cache;
+		coeff_ram_lock = &tscs454->dac_ram.lock;
+	} else if (strstr(kcontrol->id.name, "Speaker")) {
+		coeff_ram = tscs454->spk_ram.cache;
+		coeff_ram_lock = &tscs454->spk_ram.lock;
+	} else if (strstr(kcontrol->id.name, "Sub")) {
+		coeff_ram = tscs454->sub_ram.cache;
+		coeff_ram_lock = &tscs454->sub_ram.lock;
+	} else {
+		return -EINVAL;
+	}
+
+	mutex_lock(coeff_ram_lock);
+
+	memcpy(ucontrol->value.bytes.data,
+		&coeff_ram[ctl->addr * COEFF_SIZE], params->max);
+
+	mutex_unlock(coeff_ram_lock);
+
+	return 0;
+}
+
+#define DACCRSTAT_MAX_TRYS 10
+static int write_coeff_ram(struct snd_soc_component *component, u8 *coeff_ram,
+		unsigned int r_stat, unsigned int r_addr, unsigned int r_wr,
+		unsigned int coeff_addr, unsigned int coeff_cnt)
+{
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+	unsigned int val;
+	int cnt;
+	int trys;
+	int ret;
+
+	for (cnt = 0; cnt < coeff_cnt; cnt++, coeff_addr++) {
+
+		for (trys = 0; trys < DACCRSTAT_MAX_TRYS; trys++) {
+			ret = snd_soc_component_read(component, r_stat, &val);
+			if (ret < 0) {
+				dev_err(component->dev,
+					"Failed to read stat (%d)\n", ret);
+				return ret;
+			}
+			if (!val)
+				break;
+		}
+
+		if (trys == DACCRSTAT_MAX_TRYS) {
+			ret = -EIO;
+			dev_err(component->dev,
+				"Coefficient write error (%d)\n", ret);
+			return ret;
+		}
+
+		ret = regmap_write(tscs454->regmap, r_addr, coeff_addr);
+		if (ret < 0) {
+			dev_err(component->dev,
+				"Failed to write dac ram address (%d)\n", ret);
+			return ret;
+		}
+
+		ret = regmap_bulk_write(tscs454->regmap, r_wr,
+			&coeff_ram[coeff_addr * COEFF_SIZE],
+			COEFF_SIZE);
+		if (ret < 0) {
+			dev_err(component->dev,
+				"Failed to write dac ram (%d)\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int coeff_ram_put(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *component =
+		snd_soc_kcontrol_component(kcontrol);
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+	struct coeff_ram_ctl *ctl =
+		(struct coeff_ram_ctl *)kcontrol->private_value;
+	struct soc_bytes_ext *params = &ctl->bytes_ext;
+	unsigned int coeff_cnt = params->max / COEFF_SIZE;
+	u8 *coeff_ram;
+	struct mutex *coeff_ram_lock;
+	bool *coeff_ram_synced;
+	unsigned int r_stat;
+	unsigned int r_addr;
+	unsigned int r_wr;
+	unsigned int val;
+	int ret;
+
+	if (strstr(kcontrol->id.name, "DAC")) {
+		coeff_ram = tscs454->dac_ram.cache;
+		coeff_ram_lock = &tscs454->dac_ram.lock;
+		coeff_ram_synced = &tscs454->dac_ram.synced;
+		r_stat = R_DACCRS;
+		r_addr = R_DACCRADD;
+		r_wr = R_DACCRWDL;
+	} else if (strstr(kcontrol->id.name, "Speaker")) {
+		coeff_ram = tscs454->spk_ram.cache;
+		coeff_ram_lock = &tscs454->spk_ram.lock;
+		coeff_ram_synced = &tscs454->spk_ram.synced;
+		r_stat = R_SPKCRS;
+		r_addr = R_SPKCRADD;
+		r_wr = R_SPKCRWDL;
+	} else if (strstr(kcontrol->id.name, "Sub")) {
+		coeff_ram = tscs454->sub_ram.cache;
+		coeff_ram_lock = &tscs454->sub_ram.lock;
+		coeff_ram_synced = &tscs454->sub_ram.synced;
+		r_stat = R_SUBCRS;
+		r_addr = R_SUBCRADD;
+		r_wr = R_SUBCRWDL;
+	} else {
+		return -EINVAL;
+	}
+
+	mutex_lock(coeff_ram_lock);
+
+	*coeff_ram_synced = false;
+
+	memcpy(&coeff_ram[ctl->addr * COEFF_SIZE],
+		ucontrol->value.bytes.data, params->max);
+
+	mutex_lock(&tscs454->pll1.lock);
+	mutex_lock(&tscs454->pll2.lock);
+
+	ret = snd_soc_component_read(component, R_PLLSTAT, &val);
+	if (ret < 0) {
+		dev_err(component->dev, "Failed to read PLL status (%d)\n",
+				ret);
+		goto exit;
+	}
+	if (val) { /* PLLs locked */
+		ret = write_coeff_ram(component, coeff_ram,
+			r_stat, r_addr, r_wr,
+			ctl->addr, coeff_cnt);
+		if (ret < 0) {
+			dev_err(component->dev,
+				"Failed to flush coeff ram cache (%d)\n", ret);
+			goto exit;
+		}
+		*coeff_ram_synced = true;
+	}
+
+	ret = 0;
+exit:
+	mutex_unlock(&tscs454->pll2.lock);
+	mutex_unlock(&tscs454->pll1.lock);
+	mutex_unlock(coeff_ram_lock);
+
+	return ret;
+}
+
+static inline int coeff_ram_sync(struct snd_soc_component *component,
+		struct tscs454 *tscs454)
+{
+	int ret;
+
+	mutex_lock(&tscs454->dac_ram.lock);
+	if (!tscs454->dac_ram.synced) {
+		ret = write_coeff_ram(component, tscs454->dac_ram.cache,
+				R_DACCRS, R_DACCRADD, R_DACCRWDL,
+				0x00, COEFF_RAM_COEFF_COUNT);
+		if (ret < 0) {
+			mutex_unlock(&tscs454->dac_ram.lock);
+			return ret;
+		}
+	}
+	mutex_unlock(&tscs454->dac_ram.lock);
+
+	mutex_lock(&tscs454->spk_ram.lock);
+	if (!tscs454->spk_ram.synced) {
+		ret = write_coeff_ram(component, tscs454->spk_ram.cache,
+				R_SPKCRS, R_SPKCRADD, R_SPKCRWDL,
+				0x00, COEFF_RAM_COEFF_COUNT);
+		if (ret < 0) {
+			mutex_unlock(&tscs454->spk_ram.lock);
+			return ret;
+		}
+	}
+	mutex_unlock(&tscs454->spk_ram.lock);
+
+	mutex_lock(&tscs454->sub_ram.lock);
+	if (!tscs454->sub_ram.synced) {
+		ret = write_coeff_ram(component, tscs454->sub_ram.cache,
+				R_SUBCRS, R_SUBCRADD, R_SUBCRWDL,
+				0x00, COEFF_RAM_COEFF_COUNT);
+		if (ret < 0) {
+			mutex_unlock(&tscs454->sub_ram.lock);
+			return ret;
+		}
+	}
+	mutex_unlock(&tscs454->sub_ram.lock);
+
+	return 0;
+}
+
+#define PLL_REG_SETTINGS_COUNT 11
+struct pll_ctl {
+	int freq_in;
+	struct reg_setting settings[PLL_REG_SETTINGS_COUNT];
+};
+
+#define PLL_CTL(f, t, c1, r1, o1, f1l, f1h, c2, r2, o2, f2l, f2h)	\
+	{								\
+		.freq_in = f,						\
+		.settings = {						\
+			{R_PLL1CTL,	c1},				\
+			{R_PLL1RDIV,	r1},				\
+			{R_PLL1ODIV,	o1},				\
+			{R_PLL1FDIVL,	f1l},				\
+			{R_PLL1FDIVH,	f1h},				\
+			{R_PLL2CTL,	c2},				\
+			{R_PLL2RDIV,	r2},				\
+			{R_PLL2ODIV,	o2},				\
+			{R_PLL2FDIVL,	f2l},				\
+			{R_PLL2FDIVH,	f2h},				\
+			{R_TIMEBASE,	t},				\
+		},							\
+	}
+
+static const struct pll_ctl pll_ctls[] = {
+	PLL_CTL(1411200, 0x05,
+		0xB9, 0x07, 0x02, 0xC3, 0x04,
+		0x5A, 0x02, 0x03, 0xE0, 0x01),
+	PLL_CTL(1536000, 0x05,
+		0x5A, 0x02, 0x03, 0xE0, 0x01,
+		0x5A, 0x02, 0x03, 0xB9, 0x01),
+	PLL_CTL(2822400, 0x0A,
+		0x63, 0x07, 0x04, 0xC3, 0x04,
+		0x62, 0x07, 0x03, 0x48, 0x03),
+	PLL_CTL(3072000, 0x0B,
+		0x62, 0x07, 0x03, 0x48, 0x03,
+		0x5A, 0x04, 0x03, 0xB9, 0x01),
+	PLL_CTL(5644800, 0x15,
+		0x63, 0x0E, 0x04, 0xC3, 0x04,
+		0x5A, 0x08, 0x03, 0xE0, 0x01),
+	PLL_CTL(6144000, 0x17,
+		0x5A, 0x08, 0x03, 0xE0, 0x01,
+		0x5A, 0x08, 0x03, 0xB9, 0x01),
+	PLL_CTL(12000000, 0x2E,
+		0x5B, 0x19, 0x03, 0x00, 0x03,
+		0x6A, 0x19, 0x05, 0x98, 0x04),
+	PLL_CTL(19200000, 0x4A,
+		0x53, 0x14, 0x03, 0x80, 0x01,
+		0x5A, 0x19, 0x03, 0xB9, 0x01),
+	PLL_CTL(22000000, 0x55,
+		0x6A, 0x37, 0x05, 0x00, 0x06,
+		0x62, 0x26, 0x03, 0x49, 0x02),
+	PLL_CTL(22579200, 0x57,
+		0x62, 0x31, 0x03, 0x20, 0x03,
+		0x53, 0x1D, 0x03, 0xB3, 0x01),
+	PLL_CTL(24000000, 0x5D,
+		0x53, 0x19, 0x03, 0x80, 0x01,
+		0x5B, 0x19, 0x05, 0x4C, 0x02),
+	PLL_CTL(24576000, 0x5F,
+		0x53, 0x1D, 0x03, 0xB3, 0x01,
+		0x62, 0x40, 0x03, 0x72, 0x03),
+	PLL_CTL(27000000, 0x68,
+		0x62, 0x4B, 0x03, 0x00, 0x04,
+		0x6A, 0x7D, 0x03, 0x20, 0x06),
+	PLL_CTL(36000000, 0x8C,
+		0x5B, 0x4B, 0x03, 0x00, 0x03,
+		0x6A, 0x7D, 0x03, 0x98, 0x04),
+	PLL_CTL(11289600, 0x2B,
+		0x6A, 0x31, 0x03, 0x40, 0x06,
+		0x5A, 0x12, 0x03, 0x1C, 0x02),
+	PLL_CTL(26000000, 0x65,
+		0x63, 0x41, 0x05, 0x00, 0x06,
+		0x5A, 0x26, 0x03, 0xEF, 0x01),
+	PLL_CTL(12288000, 0x2F,
+		0x5A, 0x12, 0x03, 0x1C, 0x02,
+		0x62, 0x20, 0x03, 0x72, 0x03),
+	PLL_CTL(40000000, 0x9B,
+		0xA2, 0x7D, 0x03, 0x80, 0x04,
+		0x63, 0x7D, 0x05, 0xE4, 0x06),
+	PLL_CTL(512000, 0x01,
+		0x62, 0x01, 0x03, 0xD0, 0x02,
+		0x5B, 0x01, 0x04, 0x72, 0x03),
+	PLL_CTL(705600, 0x02,
+		0x62, 0x02, 0x03, 0x15, 0x04,
+		0x62, 0x01, 0x04, 0x80, 0x02),
+	PLL_CTL(1024000, 0x03,
+		0x62, 0x02, 0x03, 0xD0, 0x02,
+		0x5B, 0x02, 0x04, 0x72, 0x03),
+	PLL_CTL(2048000, 0x07,
+		0x62, 0x04, 0x03, 0xD0, 0x02,
+		0x5B, 0x04, 0x04, 0x72, 0x03),
+	PLL_CTL(2400000, 0x08,
+		0x62, 0x05, 0x03, 0x00, 0x03,
+		0x63, 0x05, 0x05, 0x98, 0x04),
+};
+
+static inline const struct pll_ctl *get_pll_ctl(unsigned long freq_in)
+{
+	int i;
+	struct pll_ctl const *pll_ctl = NULL;
+
+	for (i = 0; i < ARRAY_SIZE(pll_ctls); ++i)
+		if (pll_ctls[i].freq_in == freq_in) {
+			pll_ctl = &pll_ctls[i];
+			break;
+		}
+
+	return pll_ctl;
+}
+
+enum {
+	PLL_INPUT_XTAL = 0,
+	PLL_INPUT_MCLK1,
+	PLL_INPUT_MCLK2,
+	PLL_INPUT_BCLK,
+};
+
+static int set_sysclk(struct snd_soc_component *component)
+{
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+	struct pll_ctl const *pll_ctl;
+	unsigned long freq;
+	int i;
+	int ret;
+
+	if (tscs454->sysclk_src_id < PLL_INPUT_BCLK)
+		freq = clk_get_rate(tscs454->sysclk);
+	else
+		freq = tscs454->bclk_freq;
+	pll_ctl = get_pll_ctl(freq);
+	if (!pll_ctl) {
+		ret = -EINVAL;
+		dev_err(component->dev,
+				"Invalid PLL input %lu (%d)\n", freq, ret);
+		return ret;
+	}
+
+	for (i = 0; i < PLL_REG_SETTINGS_COUNT; ++i) {
+		ret = snd_soc_component_write(component,
+				pll_ctl->settings[i].addr,
+				pll_ctl->settings[i].val);
+		if (ret < 0) {
+			dev_err(component->dev,
+					"Failed to set pll setting (%d)\n",
+					ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static inline void reserve_pll(struct pll *pll)
+{
+	mutex_lock(&pll->lock);
+	pll->users++;
+	mutex_unlock(&pll->lock);
+}
+
+static inline void free_pll(struct pll *pll)
+{
+	mutex_lock(&pll->lock);
+	pll->users--;
+	mutex_unlock(&pll->lock);
+}
+
+static int pll_connected(struct snd_soc_dapm_widget *source,
+		struct snd_soc_dapm_widget *sink)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(source->dapm);
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+	int users;
+
+	if (strstr(source->name, "PLL 1")) {
+		mutex_lock(&tscs454->pll1.lock);
+		users = tscs454->pll1.users;
+		mutex_unlock(&tscs454->pll1.lock);
+		dev_dbg(component->dev, "%s(): PLL 1 users = %d\n", __func__,
+				users);
+	} else {
+		mutex_lock(&tscs454->pll2.lock);
+		users = tscs454->pll2.users;
+		mutex_unlock(&tscs454->pll2.lock);
+		dev_dbg(component->dev, "%s(): PLL 2 users = %d\n", __func__,
+				users);
+	}
+
+	return users;
+}
+
+/*
+ * PLL must be enabled after power up and must be disabled before power down
+ * for proper clock switching.
+ */
+static int pll_power_event(struct snd_soc_dapm_widget *w,
+		struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_component *component =
+		snd_soc_dapm_to_component(w->dapm);
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+	bool enable;
+	bool pll1;
+	unsigned int msk;
+	unsigned int val;
+	int ret;
+
+	if (strstr(w->name, "PLL 1"))
+		pll1 = true;
+	else
+		pll1 = false;
+
+	msk = pll1 ? FM_PLLCTL_PLL1CLKEN : FM_PLLCTL_PLL2CLKEN;
+
+	if (event == SND_SOC_DAPM_POST_PMU)
+		enable = true;
+	else
+		enable = false;
+
+	if (enable)
+		val = pll1 ? FV_PLL1CLKEN_ENABLE : FV_PLL2CLKEN_ENABLE;
+	else
+		val = pll1 ? FV_PLL1CLKEN_DISABLE : FV_PLL2CLKEN_DISABLE;
+
+	ret = snd_soc_component_update_bits(component, R_PLLCTL, msk, val);
+	if (ret < 0) {
+		dev_err(component->dev, "Failed to %s PLL %d  (%d)\n",
+				enable ? "enable" : "disable",
+				pll1 ? 1 : 2,
+				ret);
+		return ret;
+	}
+
+	if (enable) {
+		msleep(20); // Wait for lock
+		ret = coeff_ram_sync(component, tscs454);
+		if (ret < 0) {
+			dev_err(component->dev,
+					"Failed to sync coeff ram (%d)\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static inline int aif_set_master(struct snd_soc_component *component,
+		unsigned int aif_id, bool master)
+{
+	unsigned int reg;
+	unsigned int mask;
+	unsigned int val;
+	int ret;
+
+	switch (aif_id) {
+	case TSCS454_DAI1_ID:
+		reg = R_I2SP1CTL;
+		break;
+	case TSCS454_DAI2_ID:
+		reg = R_I2SP2CTL;
+		break;
+	case TSCS454_DAI3_ID:
+		reg = R_I2SP3CTL;
+		break;
+	default:
+		ret = -ENODEV;
+		dev_err(component->dev, "Unknown DAI %d (%d)\n", aif_id, ret);
+		return ret;
+	}
+	mask = FM_I2SPCTL_PORTMS;
+	val = master ? FV_PORTMS_MASTER : FV_PORTMS_SLAVE;
+
+	ret = snd_soc_component_update_bits(component, reg, mask, val);
+	if (ret < 0) {
+		dev_err(component->dev, "Failed to set DAI %d to %s (%d)\n",
+			aif_id, master ? "master" : "slave", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static inline
+int aif_prepare(struct snd_soc_component *component, struct aif *aif)
+{
+	int ret;
+
+	ret = aif_set_master(component, aif->id, aif->master);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static inline int aif_free(struct snd_soc_component *component,
+		struct aif *aif, bool playback)
+{
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+
+	mutex_lock(&tscs454->aifs_status_lock);
+
+	dev_dbg(component->dev, "%s(): aif %d\n", __func__, aif->id);
+
+	set_aif_status_inactive(&tscs454->aifs_status, aif->id, playback);
+
+	dev_dbg(component->dev, "Set aif %d inactive. Streams status is 0x%x\n",
+		aif->id, tscs454->aifs_status.streams);
+
+	if (!aif_active(&tscs454->aifs_status, aif->id)) {
+		/* Do config in slave mode */
+		aif_set_master(component, aif->id, false);
+		dev_dbg(component->dev, "Freeing pll %d from aif %d\n",
+				aif->pll->id, aif->id);
+		free_pll(aif->pll);
+	}
+
+	if (!aifs_active(&tscs454->aifs_status)) {
+		dev_dbg(component->dev, "Freeing pll %d from ir\n",
+				tscs454->internal_rate.pll->id);
+		free_pll(tscs454->internal_rate.pll);
+	}
+
+	mutex_unlock(&tscs454->aifs_status_lock);
+
+	return 0;
+}
+
+/* R_PLLCTL PG 0 ADDR 0x15 */
+static char const * const bclk_sel_txt[] = {
+		"BCLK 1", "BCLK 2", "BCLK 3"};
+
+static struct soc_enum const bclk_sel_enum =
+		SOC_ENUM_SINGLE(R_PLLCTL, FB_PLLCTL_BCLKSEL,
+				ARRAY_SIZE(bclk_sel_txt), bclk_sel_txt);
+
+/* R_ISRC PG 0 ADDR 0x16 */
+static char const * const isrc_br_txt[] = {
+		"44.1kHz", "48kHz"};
+
+static struct soc_enum const isrc_br_enum =
+		SOC_ENUM_SINGLE(R_ISRC, FB_ISRC_IBR,
+				ARRAY_SIZE(isrc_br_txt), isrc_br_txt);
+
+static char const * const isrc_bm_txt[] = {
+		"0.25x", "0.5x", "1.0x", "2.0x"};
+
+static struct soc_enum const isrc_bm_enum =
+		SOC_ENUM_SINGLE(R_ISRC, FB_ISRC_IBM,
+				ARRAY_SIZE(isrc_bm_txt), isrc_bm_txt);
+
+/* R_SCLKCTL PG 0 ADDR 0x18 */
+static char const * const modular_rate_txt[] = {
+	"Reserved", "Half", "Full", "Auto",};
+
+static struct soc_enum const adc_modular_rate_enum =
+	SOC_ENUM_SINGLE(R_SCLKCTL, FB_SCLKCTL_ASDM,
+			ARRAY_SIZE(modular_rate_txt), modular_rate_txt);
+
+static struct soc_enum const dac_modular_rate_enum =
+	SOC_ENUM_SINGLE(R_SCLKCTL, FB_SCLKCTL_DSDM,
+			ARRAY_SIZE(modular_rate_txt), modular_rate_txt);
+
+/* R_I2SIDCTL PG 0 ADDR 0x38 */
+static char const * const data_ctrl_txt[] = {
+	"L/R", "L/L", "R/R", "R/L"};
+
+static struct soc_enum const data_in_ctrl_enums[] = {
+	SOC_ENUM_SINGLE(R_I2SIDCTL, FB_I2SIDCTL_I2SI1DCTL,
+			ARRAY_SIZE(data_ctrl_txt), data_ctrl_txt),
+	SOC_ENUM_SINGLE(R_I2SIDCTL, FB_I2SIDCTL_I2SI2DCTL,
+			ARRAY_SIZE(data_ctrl_txt), data_ctrl_txt),
+	SOC_ENUM_SINGLE(R_I2SIDCTL, FB_I2SIDCTL_I2SI3DCTL,
+			ARRAY_SIZE(data_ctrl_txt), data_ctrl_txt),
+};
+
+/* R_I2SODCTL PG 0 ADDR 0x39 */
+static struct soc_enum const data_out_ctrl_enums[] = {
+	SOC_ENUM_SINGLE(R_I2SODCTL, FB_I2SODCTL_I2SO1DCTL,
+			ARRAY_SIZE(data_ctrl_txt), data_ctrl_txt),
+	SOC_ENUM_SINGLE(R_I2SODCTL, FB_I2SODCTL_I2SO2DCTL,
+			ARRAY_SIZE(data_ctrl_txt), data_ctrl_txt),
+	SOC_ENUM_SINGLE(R_I2SODCTL, FB_I2SODCTL_I2SO3DCTL,
+			ARRAY_SIZE(data_ctrl_txt), data_ctrl_txt),
+};
+
+/* R_AUDIOMUX1 PG 0 ADDR 0x3A */
+static char const * const asrc_mux_txt[] = {
+		"None", "DAI 1", "DAI 2", "DAI 3"};
+
+static struct soc_enum const asrc_in_mux_enum =
+		SOC_ENUM_SINGLE(R_AUDIOMUX1, FB_AUDIOMUX1_ASRCIMUX,
+				ARRAY_SIZE(asrc_mux_txt), asrc_mux_txt);
+
+static char const * const dai_mux_txt[] = {
+		"CH 0_1", "CH 2_3", "CH 4_5", "ADC/DMic 1",
+		"DMic 2", "ClassD", "DAC", "Sub"};
+
+static struct soc_enum const dai2_mux_enum =
+		SOC_ENUM_SINGLE(R_AUDIOMUX1, FB_AUDIOMUX1_I2S2MUX,
+				ARRAY_SIZE(dai_mux_txt), dai_mux_txt);
+
+static struct snd_kcontrol_new const dai2_mux_dapm_enum =
+		SOC_DAPM_ENUM("DAI 2 Mux",  dai2_mux_enum);
+
+static struct soc_enum const dai1_mux_enum =
+		SOC_ENUM_SINGLE(R_AUDIOMUX1, FB_AUDIOMUX1_I2S1MUX,
+				ARRAY_SIZE(dai_mux_txt), dai_mux_txt);
+
+static struct snd_kcontrol_new const dai1_mux_dapm_enum =
+		SOC_DAPM_ENUM("DAI 1 Mux", dai1_mux_enum);
+
+/* R_AUDIOMUX2 PG 0 ADDR 0x3B */
+static struct soc_enum const asrc_out_mux_enum =
+		SOC_ENUM_SINGLE(R_AUDIOMUX2, FB_AUDIOMUX2_ASRCOMUX,
+				ARRAY_SIZE(asrc_mux_txt), asrc_mux_txt);
+
+static struct soc_enum const dac_mux_enum =
+		SOC_ENUM_SINGLE(R_AUDIOMUX2, FB_AUDIOMUX2_DACMUX,
+				ARRAY_SIZE(dai_mux_txt), dai_mux_txt);
+
+static struct snd_kcontrol_new const dac_mux_dapm_enum =
+		SOC_DAPM_ENUM("DAC Mux", dac_mux_enum);
+
+static struct soc_enum const dai3_mux_enum =
+		SOC_ENUM_SINGLE(R_AUDIOMUX2, FB_AUDIOMUX2_I2S3MUX,
+				ARRAY_SIZE(dai_mux_txt), dai_mux_txt);
+
+static struct snd_kcontrol_new const dai3_mux_dapm_enum =
+		SOC_DAPM_ENUM("DAI 3 Mux", dai3_mux_enum);
+
+/* R_AUDIOMUX3 PG 0 ADDR 0x3C */
+static char const * const sub_mux_txt[] = {
+		"CH 0", "CH 1", "CH 0 + 1",
+		"CH 2", "CH 3", "CH 2 + 3",
+		"CH 4", "CH 5", "CH 4 + 5",
+		"ADC/DMic 1 Left", "ADC/DMic 1 Right",
+		"ADC/DMic 1 Left Plus Right",
+		"DMic 2 Left", "DMic 2 Right", "DMic 2 Left Plus Right",
+		"ClassD Left", "ClassD Right", "ClassD Left Plus Right"};
+
+static struct soc_enum const sub_mux_enum =
+		SOC_ENUM_SINGLE(R_AUDIOMUX3, FB_AUDIOMUX3_SUBMUX,
+				ARRAY_SIZE(sub_mux_txt), sub_mux_txt);
+
+static struct snd_kcontrol_new const sub_mux_dapm_enum =
+		SOC_DAPM_ENUM("Sub Mux", sub_mux_enum);
+
+static struct soc_enum const classd_mux_enum =
+		SOC_ENUM_SINGLE(R_AUDIOMUX3, FB_AUDIOMUX3_CLSSDMUX,
+				ARRAY_SIZE(dai_mux_txt), dai_mux_txt);
+
+static struct snd_kcontrol_new const classd_mux_dapm_enum =
+		SOC_DAPM_ENUM("ClassD Mux", classd_mux_enum);
+
+/* R_HSDCTL1 PG 1 ADDR 0x01 */
+static char const * const jack_type_txt[] = {
+		"3 Terminal", "4 Terminal"};
+
+static struct soc_enum const hp_jack_type_enum =
+		SOC_ENUM_SINGLE(R_HSDCTL1, FB_HSDCTL1_HPJKTYPE,
+				ARRAY_SIZE(jack_type_txt), jack_type_txt);
+
+static char const * const hs_det_pol_txt[] = {
+		"Rising", "Falling"};
+
+static struct soc_enum const hs_det_pol_enum =
+		SOC_ENUM_SINGLE(R_HSDCTL1, FB_HSDCTL1_HSDETPOL,
+				ARRAY_SIZE(hs_det_pol_txt), hs_det_pol_txt);
+
+/* R_HSDCTL1 PG 1 ADDR 0x02 */
+static char const * const hs_mic_bias_force_txt[] = {
+		"Off", "Ring", "Sleeve"};
+
+static struct soc_enum const hs_mic_bias_force_enum =
+		SOC_ENUM_SINGLE(R_HSDCTL2, FB_HSDCTL2_FMICBIAS1,
+				ARRAY_SIZE(hs_mic_bias_force_txt),
+				hs_mic_bias_force_txt);
+
+static char const * const plug_type_txt[] = {
+		"OMTP", "CTIA", "Reserved", "Headphone"};
+
+static struct soc_enum const plug_type_force_enum =
+		SOC_ENUM_SINGLE(R_HSDCTL2, FB_HSDCTL2_FPLUGTYPE,
+		ARRAY_SIZE(plug_type_txt), plug_type_txt);
+
+
+/* R_CH0AIC PG 1 ADDR 0x06 */
+static char const * const in_bst_mux_txt[] = {
+		"Input 1", "Input 2", "Input 3", "D2S"};
+
+static struct soc_enum const in_bst_mux_ch0_enum =
+		SOC_ENUM_SINGLE(R_CH0AIC, FB_CH0AIC_INSELL,
+				ARRAY_SIZE(in_bst_mux_txt),
+				in_bst_mux_txt);
+static struct snd_kcontrol_new const in_bst_mux_ch0_dapm_enum =
+		SOC_DAPM_ENUM("Input Boost Channel 0 Enum",
+				in_bst_mux_ch0_enum);
+
+static DECLARE_TLV_DB_SCALE(in_bst_vol_tlv_arr, 0, 1000, 0);
+
+static char const * const adc_mux_txt[] = {
+		"Input 1 Boost Bypass", "Input 2 Boost Bypass",
+		"Input 3 Boost Bypass", "Input Boost"};
+
+static struct soc_enum const adc_mux_ch0_enum =
+		SOC_ENUM_SINGLE(R_CH0AIC, FB_CH0AIC_LADCIN,
+				ARRAY_SIZE(adc_mux_txt), adc_mux_txt);
+static struct snd_kcontrol_new const adc_mux_ch0_dapm_enum =
+		SOC_DAPM_ENUM("ADC Channel 0 Enum", adc_mux_ch0_enum);
+
+static char const * const in_proc_mux_txt[] = {
+		"ADC", "DMic"};
+
+static struct soc_enum const in_proc_ch0_enum =
+		SOC_ENUM_SINGLE(R_CH0AIC, FB_CH0AIC_IPCH0S,
+				ARRAY_SIZE(in_proc_mux_txt), in_proc_mux_txt);
+static struct snd_kcontrol_new const in_proc_mux_ch0_dapm_enum =
+		SOC_DAPM_ENUM("Input Processor Channel 0 Enum",
+				in_proc_ch0_enum);
+
+/* R_CH1AIC PG 1 ADDR 0x07 */
+static struct soc_enum const in_bst_mux_ch1_enum =
+		SOC_ENUM_SINGLE(R_CH1AIC, FB_CH1AIC_INSELR,
+				ARRAY_SIZE(in_bst_mux_txt),
+				in_bst_mux_txt);
+static struct snd_kcontrol_new const in_bst_mux_ch1_dapm_enum =
+		SOC_DAPM_ENUM("Input Boost Channel 1 Enum",
+				in_bst_mux_ch1_enum);
+
+static struct soc_enum const adc_mux_ch1_enum =
+		SOC_ENUM_SINGLE(R_CH1AIC, FB_CH1AIC_RADCIN,
+				ARRAY_SIZE(adc_mux_txt), adc_mux_txt);
+static struct snd_kcontrol_new const adc_mux_ch1_dapm_enum =
+		SOC_DAPM_ENUM("ADC Channel 1 Enum", adc_mux_ch1_enum);
+
+static struct soc_enum const in_proc_ch1_enum =
+		SOC_ENUM_SINGLE(R_CH1AIC, FB_CH1AIC_IPCH1S,
+				ARRAY_SIZE(in_proc_mux_txt), in_proc_mux_txt);
+static struct snd_kcontrol_new const in_proc_mux_ch1_dapm_enum =
+		SOC_DAPM_ENUM("Input Processor Channel 1 Enum",
+				in_proc_ch1_enum);
+
+/* R_ICTL0 PG 1 ADDR 0x0A */
+static char const * const pol_txt[] = {
+		"Normal", "Invert"};
+
+static struct soc_enum const in_pol_ch1_enum =
+		SOC_ENUM_SINGLE(R_ICTL0, FB_ICTL0_IN0POL,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+static struct soc_enum const in_pol_ch0_enum =
+		SOC_ENUM_SINGLE(R_ICTL0, FB_ICTL0_IN1POL,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+static char const * const in_proc_ch_sel_txt[] = {
+		"Normal", "Mono Mix to Channel 0",
+		"Mono Mix to Channel 1", "Add"};
+
+static struct soc_enum const in_proc_ch01_sel_enum =
+		SOC_ENUM_SINGLE(R_ICTL0, FB_ICTL0_INPCH10SEL,
+				ARRAY_SIZE(in_proc_ch_sel_txt),
+				in_proc_ch_sel_txt);
+
+/* R_ICTL1 PG 1 ADDR 0x0B */
+static struct soc_enum const in_pol_ch3_enum =
+		SOC_ENUM_SINGLE(R_ICTL1, FB_ICTL1_IN2POL,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+static struct soc_enum const in_pol_ch2_enum =
+		SOC_ENUM_SINGLE(R_ICTL1, FB_ICTL1_IN3POL,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+static struct soc_enum const in_proc_ch23_sel_enum =
+		SOC_ENUM_SINGLE(R_ICTL1, FB_ICTL1_INPCH32SEL,
+				ARRAY_SIZE(in_proc_ch_sel_txt),
+				in_proc_ch_sel_txt);
+
+/* R_MICBIAS PG 1 ADDR 0x0C */
+static char const * const mic_bias_txt[] = {
+		"2.5V", "2.1V", "1.8V", "Vdd"};
+
+static struct soc_enum const mic_bias_2_enum =
+		SOC_ENUM_SINGLE(R_MICBIAS, FB_MICBIAS_MICBOV2,
+				ARRAY_SIZE(mic_bias_txt), mic_bias_txt);
+
+static struct soc_enum const mic_bias_1_enum =
+		SOC_ENUM_SINGLE(R_MICBIAS, FB_MICBIAS_MICBOV1,
+				ARRAY_SIZE(mic_bias_txt), mic_bias_txt);
+
+/* R_PGACTL0 PG 1 ADDR 0x0D */
+/* R_PGACTL1 PG 1 ADDR 0x0E */
+/* R_PGACTL2 PG 1 ADDR 0x0F */
+/* R_PGACTL3 PG 1 ADDR 0x10 */
+static DECLARE_TLV_DB_SCALE(in_pga_vol_tlv_arr, -1725, 75, 0);
+
+/* R_ICH0VOL PG1 ADDR 0x12 */
+/* R_ICH1VOL PG1 ADDR 0x13 */
+/* R_ICH2VOL PG1 ADDR 0x14 */
+/* R_ICH3VOL PG1 ADDR 0x15 */
+static DECLARE_TLV_DB_MINMAX(in_vol_tlv_arr, -7125, 2400);
+
+/* R_ASRCILVOL PG1 ADDR 0x16 */
+/* R_ASRCIRVOL PG1 ADDR 0x17 */
+/* R_ASRCOLVOL PG1 ADDR 0x18 */
+/* R_ASRCORVOL PG1 ADDR 0x19 */
+static DECLARE_TLV_DB_MINMAX(asrc_vol_tlv_arr, -9562, 600);
+
+/* R_ALCCTL0 PG1 ADDR 0x1D */
+static char const * const alc_mode_txt[] = {
+		"ALC", "Limiter"};
+
+static struct soc_enum const alc_mode_enum =
+		SOC_ENUM_SINGLE(R_ALCCTL0, FB_ALCCTL0_ALCMODE,
+				ARRAY_SIZE(alc_mode_txt), alc_mode_txt);
+
+static char const * const alc_ref_text[] = {
+		"Channel 0", "Channel 1", "Channel 2", "Channel 3", "Peak"};
+
+static struct soc_enum const alc_ref_enum =
+		SOC_ENUM_SINGLE(R_ALCCTL0, FB_ALCCTL0_ALCREF,
+				ARRAY_SIZE(alc_ref_text), alc_ref_text);
+
+/* R_ALCCTL1 PG 1 ADDR 0x1E */
+static DECLARE_TLV_DB_SCALE(alc_max_gain_tlv_arr, -1200, 600, 0);
+static DECLARE_TLV_DB_SCALE(alc_target_tlv_arr, -2850, 150, 0);
+
+/* R_ALCCTL2 PG 1 ADDR 0x1F */
+static DECLARE_TLV_DB_SCALE(alc_min_gain_tlv_arr, -1725, 600, 0);
+
+/* R_NGATE PG 1 ADDR 0x21 */
+static DECLARE_TLV_DB_SCALE(ngth_tlv_arr, -7650, 150, 0);
+
+static char const * const ngate_type_txt[] = {
+		"PGA Constant", "ADC Mute"};
+
+static struct soc_enum const ngate_type_enum =
+		SOC_ENUM_SINGLE(R_NGATE, FB_NGATE_NGG,
+				ARRAY_SIZE(ngate_type_txt), ngate_type_txt);
+
+/* R_DMICCTL PG 1 ADDR 0x22 */
+static char const * const dmic_mono_sel_txt[] = {
+		"Stereo", "Mono"};
+
+static struct soc_enum const dmic_mono_sel_enum =
+		SOC_ENUM_SINGLE(R_DMICCTL, FB_DMICCTL_DMONO,
+			ARRAY_SIZE(dmic_mono_sel_txt), dmic_mono_sel_txt);
+
+/* R_DACCTL PG 2 ADDR 0x01 */
+static struct soc_enum const dac_pol_r_enum =
+		SOC_ENUM_SINGLE(R_DACCTL, FB_DACCTL_DACPOLR,
+			ARRAY_SIZE(pol_txt), pol_txt);
+
+static struct soc_enum const dac_pol_l_enum =
+		SOC_ENUM_SINGLE(R_DACCTL, FB_DACCTL_DACPOLL,
+			ARRAY_SIZE(pol_txt), pol_txt);
+
+static char const * const dac_dith_txt[] = {
+		"Half", "Full", "Disabled", "Static"};
+
+static struct soc_enum const dac_dith_enum =
+		SOC_ENUM_SINGLE(R_DACCTL, FB_DACCTL_DACDITH,
+			ARRAY_SIZE(dac_dith_txt), dac_dith_txt);
+
+/* R_SPKCTL PG 2 ADDR 0x02 */
+static struct soc_enum const spk_pol_r_enum =
+		SOC_ENUM_SINGLE(R_SPKCTL, FB_SPKCTL_SPKPOLR,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+static struct soc_enum const spk_pol_l_enum =
+		SOC_ENUM_SINGLE(R_SPKCTL, FB_SPKCTL_SPKPOLL,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+/* R_SUBCTL PG 2 ADDR 0x03 */
+static struct soc_enum const sub_pol_enum =
+		SOC_ENUM_SINGLE(R_SUBCTL, FB_SUBCTL_SUBPOL,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+/* R_MVOLL PG 2 ADDR 0x08 */
+/* R_MVOLR PG 2 ADDR 0x09 */
+static DECLARE_TLV_DB_MINMAX(mvol_tlv_arr, -9562, 0);
+
+/* R_HPVOLL PG 2 ADDR 0x0A */
+/* R_HPVOLR PG 2 ADDR 0x0B */
+static DECLARE_TLV_DB_SCALE(hp_vol_tlv_arr, -8850, 75, 0);
+
+/* R_SPKVOLL PG 2 ADDR 0x0C */
+/* R_SPKVOLR PG 2 ADDR 0x0D */
+static DECLARE_TLV_DB_SCALE(spk_vol_tlv_arr, -7725, 75, 0);
+
+/* R_SPKEQFILT PG 3 ADDR 0x01 */
+static char const * const eq_txt[] = {
+	"Pre Scale",
+	"Pre Scale + EQ Band 0",
+	"Pre Scale + EQ Band 0 - 1",
+	"Pre Scale + EQ Band 0 - 2",
+	"Pre Scale + EQ Band 0 - 3",
+	"Pre Scale + EQ Band 0 - 4",
+	"Pre Scale + EQ Band 0 - 5",
+};
+
+static struct soc_enum const spk_eq_enums[] = {
+	SOC_ENUM_SINGLE(R_SPKEQFILT, FB_SPKEQFILT_EQ2BE,
+		ARRAY_SIZE(eq_txt), eq_txt),
+	SOC_ENUM_SINGLE(R_SPKEQFILT, FB_SPKEQFILT_EQ1BE,
+		ARRAY_SIZE(eq_txt), eq_txt),
+};
+
+/* R_SPKMBCCTL PG 3 ADDR 0x0B */
+static char const * const lvl_mode_txt[] = {
+		"Average", "Peak"};
+
+static struct soc_enum const spk_mbc3_lvl_det_mode_enum =
+		SOC_ENUM_SINGLE(R_SPKMBCCTL, FB_SPKMBCCTL_LVLMODE3,
+				ARRAY_SIZE(lvl_mode_txt), lvl_mode_txt);
+
+static char const * const win_sel_txt[] = {
+		"512", "64"};
+
+static struct soc_enum const spk_mbc3_win_sel_enum =
+		SOC_ENUM_SINGLE(R_SPKMBCCTL, FB_SPKMBCCTL_WINSEL3,
+				ARRAY_SIZE(win_sel_txt), win_sel_txt);
+
+static struct soc_enum const spk_mbc2_lvl_det_mode_enum =
+		SOC_ENUM_SINGLE(R_SPKMBCCTL, FB_SPKMBCCTL_LVLMODE2,
+				ARRAY_SIZE(lvl_mode_txt), lvl_mode_txt);
+
+static struct soc_enum const spk_mbc2_win_sel_enum =
+		SOC_ENUM_SINGLE(R_SPKMBCCTL, FB_SPKMBCCTL_WINSEL2,
+				ARRAY_SIZE(win_sel_txt), win_sel_txt);
+
+static struct soc_enum const spk_mbc1_lvl_det_mode_enum =
+		SOC_ENUM_SINGLE(R_SPKMBCCTL, FB_SPKMBCCTL_LVLMODE1,
+				ARRAY_SIZE(lvl_mode_txt), lvl_mode_txt);
+
+static struct soc_enum const spk_mbc1_win_sel_enum =
+		SOC_ENUM_SINGLE(R_SPKMBCCTL, FB_SPKMBCCTL_WINSEL1,
+				ARRAY_SIZE(win_sel_txt), win_sel_txt);
+
+/* R_SPKMBCMUG1 PG 3 ADDR 0x0C */
+static struct soc_enum const spk_mbc1_phase_pol_enum =
+		SOC_ENUM_SINGLE(R_SPKMBCMUG1, FB_SPKMBCMUG_PHASE,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+static DECLARE_TLV_DB_MINMAX(mbc_mug_tlv_arr, -4650, 0);
+
+/* R_SPKMBCTHR1 PG 3 ADDR 0x0D */
+static DECLARE_TLV_DB_MINMAX(thr_tlv_arr, -9562, 0);
+
+/* R_SPKMBCRAT1 PG 3 ADDR 0x0E */
+static char const * const comp_rat_txt[] = {
+		"Reserved", "1.5:1", "2:1", "3:1", "4:1", "5:1", "6:1",
+		"7:1", "8:1", "9:1", "10:1", "11:1", "12:1", "13:1", "14:1",
+		"15:1", "16:1", "17:1", "18:1", "19:1", "20:1"};
+
+static struct soc_enum const spk_mbc1_comp_rat_enum =
+		SOC_ENUM_SINGLE(R_SPKMBCRAT1, FB_SPKMBCRAT_RATIO,
+				ARRAY_SIZE(comp_rat_txt), comp_rat_txt);
+
+/* R_SPKMBCMUG2 PG 3 ADDR 0x13 */
+static struct soc_enum const spk_mbc2_phase_pol_enum =
+		SOC_ENUM_SINGLE(R_SPKMBCMUG2, FB_SPKMBCMUG_PHASE,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+/* R_SPKMBCRAT2 PG 3 ADDR 0x15 */
+static struct soc_enum const spk_mbc2_comp_rat_enum =
+		SOC_ENUM_SINGLE(R_SPKMBCRAT2, FB_SPKMBCRAT_RATIO,
+				ARRAY_SIZE(comp_rat_txt), comp_rat_txt);
+
+/* R_SPKMBCMUG3 PG 3 ADDR 0x1A */
+static struct soc_enum const spk_mbc3_phase_pol_enum =
+		SOC_ENUM_SINGLE(R_SPKMBCMUG3, FB_SPKMBCMUG_PHASE,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+/* R_SPKMBCRAT3 PG 3 ADDR 0x1C */
+static struct soc_enum const spk_mbc3_comp_rat_enum =
+		SOC_ENUM_SINGLE(R_SPKMBCRAT3, FB_SPKMBCRAT_RATIO,
+				ARRAY_SIZE(comp_rat_txt), comp_rat_txt);
+
+/* R_SPKCLECTL PG 3 ADDR 0x21 */
+static struct soc_enum const spk_cle_lvl_mode_enum =
+		SOC_ENUM_SINGLE(R_SPKCLECTL, FB_SPKCLECTL_LVLMODE,
+				ARRAY_SIZE(lvl_mode_txt), lvl_mode_txt);
+
+static struct soc_enum const spk_cle_win_sel_enum =
+		SOC_ENUM_SINGLE(R_SPKCLECTL, FB_SPKCLECTL_WINSEL,
+				ARRAY_SIZE(win_sel_txt), win_sel_txt);
+
+/* R_SPKCLEMUG PG 3 ADDR 0x22 */
+static DECLARE_TLV_DB_MINMAX(cle_mug_tlv_arr, 0, 4650);
+
+/* R_SPKCOMPRAT PG 3 ADDR 0x24 */
+static struct soc_enum const spk_comp_rat_enum =
+		SOC_ENUM_SINGLE(R_SPKCOMPRAT, FB_SPKCOMPRAT_RATIO,
+				ARRAY_SIZE(comp_rat_txt), comp_rat_txt);
+
+/* R_SPKEXPTHR PG 3 ADDR 0x2F */
+static char const * const exp_rat_txt[] = {
+		"Reserved", "Reserved", "1:2", "1:3",
+		"1:4", "1:5", "1:6", "1:7"};
+
+static struct soc_enum const spk_exp_rat_enum =
+		SOC_ENUM_SINGLE(R_SPKEXPRAT, FB_SPKEXPRAT_RATIO,
+				ARRAY_SIZE(exp_rat_txt), exp_rat_txt);
+
+/* R_DACEQFILT PG 4 ADDR 0x01 */
+static struct soc_enum const dac_eq_enums[] = {
+	SOC_ENUM_SINGLE(R_DACEQFILT, FB_DACEQFILT_EQ2BE,
+		ARRAY_SIZE(eq_txt), eq_txt),
+	SOC_ENUM_SINGLE(R_DACEQFILT, FB_DACEQFILT_EQ1BE,
+		ARRAY_SIZE(eq_txt), eq_txt),
+};
+
+/* R_DACMBCCTL PG 4 ADDR 0x0B */
+static struct soc_enum const dac_mbc3_lvl_det_mode_enum =
+		SOC_ENUM_SINGLE(R_DACMBCCTL, FB_DACMBCCTL_LVLMODE3,
+				ARRAY_SIZE(lvl_mode_txt), lvl_mode_txt);
+
+static struct soc_enum const dac_mbc3_win_sel_enum =
+		SOC_ENUM_SINGLE(R_DACMBCCTL, FB_DACMBCCTL_WINSEL3,
+				ARRAY_SIZE(win_sel_txt), win_sel_txt);
+
+static struct soc_enum const dac_mbc2_lvl_det_mode_enum =
+		SOC_ENUM_SINGLE(R_DACMBCCTL, FB_DACMBCCTL_LVLMODE2,
+				ARRAY_SIZE(lvl_mode_txt), lvl_mode_txt);
+
+static struct soc_enum const dac_mbc2_win_sel_enum =
+		SOC_ENUM_SINGLE(R_DACMBCCTL, FB_DACMBCCTL_WINSEL2,
+				ARRAY_SIZE(win_sel_txt), win_sel_txt);
+
+static struct soc_enum const dac_mbc1_lvl_det_mode_enum =
+		SOC_ENUM_SINGLE(R_DACMBCCTL, FB_DACMBCCTL_LVLMODE1,
+				ARRAY_SIZE(lvl_mode_txt), lvl_mode_txt);
+
+static struct soc_enum const dac_mbc1_win_sel_enum =
+		SOC_ENUM_SINGLE(R_DACMBCCTL, FB_DACMBCCTL_WINSEL1,
+				ARRAY_SIZE(win_sel_txt), win_sel_txt);
+
+/* R_DACMBCMUG1 PG 4 ADDR 0x0C */
+static struct soc_enum const dac_mbc1_phase_pol_enum =
+		SOC_ENUM_SINGLE(R_DACMBCMUG1, FB_DACMBCMUG_PHASE,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+/* R_DACMBCRAT1 PG 4 ADDR 0x0E */
+static struct soc_enum const dac_mbc1_comp_rat_enum =
+		SOC_ENUM_SINGLE(R_DACMBCRAT1, FB_DACMBCRAT_RATIO,
+				ARRAY_SIZE(comp_rat_txt), comp_rat_txt);
+
+/* R_DACMBCMUG2 PG 4 ADDR 0x13 */
+static struct soc_enum const dac_mbc2_phase_pol_enum =
+		SOC_ENUM_SINGLE(R_DACMBCMUG2, FB_DACMBCMUG_PHASE,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+/* R_DACMBCRAT2 PG 4 ADDR 0x15 */
+static struct soc_enum const dac_mbc2_comp_rat_enum =
+		SOC_ENUM_SINGLE(R_DACMBCRAT2, FB_DACMBCRAT_RATIO,
+				ARRAY_SIZE(comp_rat_txt), comp_rat_txt);
+
+/* R_DACMBCMUG3 PG 4 ADDR 0x1A */
+static struct soc_enum const dac_mbc3_phase_pol_enum =
+		SOC_ENUM_SINGLE(R_DACMBCMUG3, FB_DACMBCMUG_PHASE,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+/* R_DACMBCRAT3 PG 4 ADDR 0x1C */
+static struct soc_enum const dac_mbc3_comp_rat_enum =
+		SOC_ENUM_SINGLE(R_DACMBCRAT3, FB_DACMBCRAT_RATIO,
+				ARRAY_SIZE(comp_rat_txt), comp_rat_txt);
+
+/* R_DACCLECTL PG 4 ADDR 0x21 */
+static struct soc_enum const dac_cle_lvl_mode_enum =
+		SOC_ENUM_SINGLE(R_DACCLECTL, FB_DACCLECTL_LVLMODE,
+				ARRAY_SIZE(lvl_mode_txt), lvl_mode_txt);
+
+static struct soc_enum const dac_cle_win_sel_enum =
+		SOC_ENUM_SINGLE(R_DACCLECTL, FB_DACCLECTL_WINSEL,
+				ARRAY_SIZE(win_sel_txt), win_sel_txt);
+
+/* R_DACCOMPRAT PG 4 ADDR 0x24 */
+static struct soc_enum const dac_comp_rat_enum =
+		SOC_ENUM_SINGLE(R_DACCOMPRAT, FB_DACCOMPRAT_RATIO,
+				ARRAY_SIZE(comp_rat_txt), comp_rat_txt);
+
+/* R_DACEXPRAT PG 4 ADDR 0x30 */
+static struct soc_enum const dac_exp_rat_enum =
+		SOC_ENUM_SINGLE(R_DACEXPRAT, FB_DACEXPRAT_RATIO,
+				ARRAY_SIZE(exp_rat_txt), exp_rat_txt);
+
+/* R_SUBEQFILT PG 5 ADDR 0x01 */
+static struct soc_enum const sub_eq_enums[] = {
+	SOC_ENUM_SINGLE(R_SUBEQFILT, FB_SUBEQFILT_EQ2BE,
+		ARRAY_SIZE(eq_txt), eq_txt),
+	SOC_ENUM_SINGLE(R_SUBEQFILT, FB_SUBEQFILT_EQ1BE,
+		ARRAY_SIZE(eq_txt), eq_txt),
+};
+
+/* R_SUBMBCCTL PG 5 ADDR 0x0B */
+static struct soc_enum const sub_mbc3_lvl_det_mode_enum =
+		SOC_ENUM_SINGLE(R_SUBMBCCTL, FB_SUBMBCCTL_LVLMODE3,
+				ARRAY_SIZE(lvl_mode_txt), lvl_mode_txt);
+
+static struct soc_enum const sub_mbc3_win_sel_enum =
+		SOC_ENUM_SINGLE(R_SUBMBCCTL, FB_SUBMBCCTL_WINSEL3,
+				ARRAY_SIZE(win_sel_txt), win_sel_txt);
+
+static struct soc_enum const sub_mbc2_lvl_det_mode_enum =
+		SOC_ENUM_SINGLE(R_SUBMBCCTL, FB_SUBMBCCTL_LVLMODE2,
+				ARRAY_SIZE(lvl_mode_txt), lvl_mode_txt);
+
+static struct soc_enum const sub_mbc2_win_sel_enum =
+		SOC_ENUM_SINGLE(R_SUBMBCCTL, FB_SUBMBCCTL_WINSEL2,
+				ARRAY_SIZE(win_sel_txt), win_sel_txt);
+
+static struct soc_enum const sub_mbc1_lvl_det_mode_enum =
+		SOC_ENUM_SINGLE(R_SUBMBCCTL, FB_SUBMBCCTL_LVLMODE1,
+				ARRAY_SIZE(lvl_mode_txt), lvl_mode_txt);
+
+static struct soc_enum const sub_mbc1_win_sel_enum =
+		SOC_ENUM_SINGLE(R_SUBMBCCTL, FB_SUBMBCCTL_WINSEL1,
+				ARRAY_SIZE(win_sel_txt), win_sel_txt);
+
+/* R_SUBMBCMUG1 PG 5 ADDR 0x0C */
+static struct soc_enum const sub_mbc1_phase_pol_enum =
+		SOC_ENUM_SINGLE(R_SUBMBCMUG1, FB_SUBMBCMUG_PHASE,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+/* R_SUBMBCRAT1 PG 5 ADDR 0x0E */
+static struct soc_enum const sub_mbc1_comp_rat_enum =
+		SOC_ENUM_SINGLE(R_SUBMBCRAT1, FB_SUBMBCRAT_RATIO,
+				ARRAY_SIZE(comp_rat_txt), comp_rat_txt);
+
+/* R_SUBMBCMUG2 PG 5 ADDR 0x13 */
+static struct soc_enum const sub_mbc2_phase_pol_enum =
+		SOC_ENUM_SINGLE(R_SUBMBCMUG2, FB_SUBMBCMUG_PHASE,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+/* R_SUBMBCRAT2 PG 5 ADDR 0x15 */
+static struct soc_enum const sub_mbc2_comp_rat_enum =
+		SOC_ENUM_SINGLE(R_SUBMBCRAT2, FB_SUBMBCRAT_RATIO,
+				ARRAY_SIZE(comp_rat_txt), comp_rat_txt);
+
+/* R_SUBMBCMUG3 PG 5 ADDR 0x1A */
+static struct soc_enum const sub_mbc3_phase_pol_enum =
+		SOC_ENUM_SINGLE(R_SUBMBCMUG3, FB_SUBMBCMUG_PHASE,
+				ARRAY_SIZE(pol_txt), pol_txt);
+
+/* R_SUBMBCRAT3 PG 5 ADDR 0x1C */
+static struct soc_enum const sub_mbc3_comp_rat_enum =
+		SOC_ENUM_SINGLE(R_SUBMBCRAT3, FB_SUBMBCRAT_RATIO,
+				ARRAY_SIZE(comp_rat_txt), comp_rat_txt);
+
+/* R_SUBCLECTL PG 5 ADDR 0x21 */
+static struct soc_enum const sub_cle_lvl_mode_enum =
+		SOC_ENUM_SINGLE(R_SUBCLECTL, FB_SUBCLECTL_LVLMODE,
+				ARRAY_SIZE(lvl_mode_txt), lvl_mode_txt);
+static struct soc_enum const sub_cle_win_sel_enum =
+		SOC_ENUM_SINGLE(R_SUBCLECTL, FB_SUBCLECTL_WINSEL,
+				ARRAY_SIZE(win_sel_txt), win_sel_txt);
+
+/* R_SUBCOMPRAT PG 5 ADDR 0x24 */
+static struct soc_enum const sub_comp_rat_enum =
+		SOC_ENUM_SINGLE(R_SUBCOMPRAT, FB_SUBCOMPRAT_RATIO,
+				ARRAY_SIZE(comp_rat_txt), comp_rat_txt);
+
+/* R_SUBEXPRAT PG 5 ADDR 0x30 */
+static struct soc_enum const sub_exp_rat_enum =
+		SOC_ENUM_SINGLE(R_SUBEXPRAT, FB_SUBEXPRAT_RATIO,
+				ARRAY_SIZE(exp_rat_txt), exp_rat_txt);
+
+static int bytes_info_ext(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_info *ucontrol)
+{
+	struct coeff_ram_ctl *ctl =
+		(struct coeff_ram_ctl *)kcontrol->private_value;
+	struct soc_bytes_ext *params = &ctl->bytes_ext;
+
+	ucontrol->type = SNDRV_CTL_ELEM_TYPE_BYTES;
+	ucontrol->count = params->max;
+
+	return 0;
+}
+
+/* CH 0_1 Input Mux */
+static char const * const ch_0_1_mux_txt[] = {"DAI 1", "TDM 0_1"};
+
+static struct soc_enum const ch_0_1_mux_enum =
+		SOC_ENUM_SINGLE(SND_SOC_NOPM, 0,
+				ARRAY_SIZE(ch_0_1_mux_txt), ch_0_1_mux_txt);
+
+static struct snd_kcontrol_new const ch_0_1_mux_dapm_enum =
+		SOC_DAPM_ENUM("CH 0_1 Input Mux", ch_0_1_mux_enum);
+
+/* CH 2_3 Input Mux */
+static char const * const ch_2_3_mux_txt[] = {"DAI 2", "TDM 2_3"};
+
+static struct soc_enum const ch_2_3_mux_enum =
+		SOC_ENUM_SINGLE(SND_SOC_NOPM, 0,
+				ARRAY_SIZE(ch_2_3_mux_txt), ch_2_3_mux_txt);
+
+static struct snd_kcontrol_new const ch_2_3_mux_dapm_enum =
+		SOC_DAPM_ENUM("CH 2_3 Input Mux", ch_2_3_mux_enum);
+
+/* CH 4_5 Input Mux */
+static char const * const ch_4_5_mux_txt[] = {"DAI 3", "TDM 4_5"};
+
+static struct soc_enum const ch_4_5_mux_enum =
+		SOC_ENUM_SINGLE(SND_SOC_NOPM, 0,
+				ARRAY_SIZE(ch_4_5_mux_txt), ch_4_5_mux_txt);
+
+static struct snd_kcontrol_new const ch_4_5_mux_dapm_enum =
+		SOC_DAPM_ENUM("CH 4_5 Input Mux", ch_4_5_mux_enum);
+
+#define COEFF_RAM_CTL(xname, xcount, xaddr) \
+{	.iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \
+	.info = bytes_info_ext, \
+	.get = coeff_ram_get, .put = coeff_ram_put, \
+	.private_value = (unsigned long)&(struct coeff_ram_ctl) { \
+		.addr = xaddr, \
+		.bytes_ext = {.max = xcount, }, \
+	} \
+}
+
+static struct snd_kcontrol_new const tscs454_snd_controls[] = {
+	/* R_PLLCTL PG 0 ADDR 0x15 */
+	SOC_ENUM("PLL BCLK Input", bclk_sel_enum),
+	/* R_ISRC PG 0 ADDR 0x16 */
+	SOC_ENUM("Internal Rate", isrc_br_enum),
+	SOC_ENUM("Internal Rate Multiple", isrc_bm_enum),
+	/* R_SCLKCTL PG 0 ADDR 0x18 */
+	SOC_ENUM("ADC Modular Rate", adc_modular_rate_enum),
+	SOC_ENUM("DAC Modular Rate", dac_modular_rate_enum),
+	/* R_ASRC PG 0 ADDR 0x28 */
+	SOC_SINGLE("ASRC Out High Bandwidth Switch",
+			R_ASRC, FB_ASRC_ASRCOBW, 1, 0),
+	SOC_SINGLE("ASRC In High Bandwidth Switch",
+			R_ASRC, FB_ASRC_ASRCIBW, 1, 0),
+	/* R_I2SIDCTL PG 0 ADDR 0x38 */
+	SOC_ENUM("I2S 1 Data In Control", data_in_ctrl_enums[0]),
+	SOC_ENUM("I2S 2 Data In Control", data_in_ctrl_enums[1]),
+	SOC_ENUM("I2S 3 Data In Control", data_in_ctrl_enums[2]),
+	/* R_I2SODCTL PG 0 ADDR 0x39 */
+	SOC_ENUM("I2S 1 Data Out Control", data_out_ctrl_enums[0]),
+	SOC_ENUM("I2S 2 Data Out Control", data_out_ctrl_enums[1]),
+	SOC_ENUM("I2S 3 Data Out Control", data_out_ctrl_enums[2]),
+	/* R_AUDIOMUX1 PG 0 ADDR 0x3A */
+	SOC_ENUM("ASRC In", asrc_in_mux_enum),
+	/* R_AUDIOMUX2 PG 0 ADDR 0x3B */
+	SOC_ENUM("ASRC Out", asrc_out_mux_enum),
+	/* R_HSDCTL1 PG 1 ADDR 0x01 */
+	SOC_ENUM("Headphone Jack Type", hp_jack_type_enum),
+	SOC_ENUM("Headset Detection Polarity", hs_det_pol_enum),
+	SOC_SINGLE("Headphone Detection Switch",
+			R_HSDCTL1, FB_HSDCTL1_HPID_EN, 1, 0),
+	SOC_SINGLE("Headset OMTP/CTIA Switch",
+			R_HSDCTL1, FB_HSDCTL1_GBLHS_EN, 1, 0),
+	/* R_HSDCTL1 PG 1 ADDR 0x02 */
+	SOC_ENUM("Headset Mic Bias Force", hs_mic_bias_force_enum),
+	SOC_SINGLE("Manual Mic Bias Switch",
+			R_HSDCTL2, FB_HSDCTL2_MB1MODE, 1, 0),
+	SOC_SINGLE("Ring/Sleeve Auto Switch",
+			R_HSDCTL2, FB_HSDCTL2_SWMODE, 1, 0),
+	SOC_ENUM("Manual Mode Plug Type", plug_type_force_enum),
+	/* R_CH0AIC PG 1 ADDR 0x06 */
+	SOC_SINGLE_TLV("Input Boost Channel 0 Volume", R_CH0AIC,
+			FB_CHAIC_MICBST, 0x3, 0, in_bst_vol_tlv_arr),
+	/* R_CH1AIC PG 1 ADDR 0x07 */
+	SOC_SINGLE_TLV("Input Boost Channel 1 Volume", R_CH1AIC,
+			FB_CHAIC_MICBST, 0x3, 0, in_bst_vol_tlv_arr),
+	/* R_CH2AIC PG 1 ADDR 0x08 */
+	SOC_SINGLE_TLV("Input Boost Channel 2 Volume", R_CH2AIC,
+			FB_CHAIC_MICBST, 0x3, 0, in_bst_vol_tlv_arr),
+	/* R_CH3AIC PG 1 ADDR 0x09 */
+	SOC_SINGLE_TLV("Input Boost Channel 3 Volume", R_CH3AIC,
+			FB_CHAIC_MICBST, 0x3, 0, in_bst_vol_tlv_arr),
+	/* R_ICTL0 PG 1 ADDR 0x0A */
+	SOC_ENUM("Input Channel 1 Polarity", in_pol_ch1_enum),
+	SOC_ENUM("Input Channel 0 Polarity", in_pol_ch0_enum),
+	SOC_ENUM("Input Processor Channel 0/1 Operation",
+			in_proc_ch01_sel_enum),
+	SOC_SINGLE("Input Channel 1 Mute Switch",
+			R_ICTL0, FB_ICTL0_IN1MUTE, 1, 0),
+	SOC_SINGLE("Input Channel 0 Mute Switch",
+			R_ICTL0, FB_ICTL0_IN0MUTE, 1, 0),
+	SOC_SINGLE("Input Channel 1 HPF Disable Switch",
+			R_ICTL0, FB_ICTL0_IN1HP, 1, 0),
+	SOC_SINGLE("Input Channel 0 HPF Disable Switch",
+			R_ICTL0, FB_ICTL0_IN0HP, 1, 0),
+	/* R_ICTL1 PG 1 ADDR 0x0B */
+	SOC_ENUM("Input Channel 3 Polarity", in_pol_ch3_enum),
+	SOC_ENUM("Input Channel 2 Polarity", in_pol_ch2_enum),
+	SOC_ENUM("Input Processor Channel 2/3 Operation",
+			in_proc_ch23_sel_enum),
+	SOC_SINGLE("Input Channel 3 Mute Switch",
+			R_ICTL1, FB_ICTL1_IN3MUTE, 1, 0),
+	SOC_SINGLE("Input Channel 2 Mute Switch",
+			R_ICTL1, FB_ICTL1_IN2MUTE, 1, 0),
+	SOC_SINGLE("Input Channel 3 HPF Disable Switch",
+			R_ICTL1, FB_ICTL1_IN3HP, 1, 0),
+	SOC_SINGLE("Input Channel 2 HPF Disable Switch",
+			R_ICTL1, FB_ICTL1_IN2HP, 1, 0),
+	/* R_MICBIAS PG 1 ADDR 0x0C */
+	SOC_ENUM("Mic Bias 2 Voltage", mic_bias_2_enum),
+	SOC_ENUM("Mic Bias 1 Voltage", mic_bias_1_enum),
+	/* R_PGACTL0 PG 1 ADDR 0x0D */
+	SOC_SINGLE("Input Channel 0 PGA Mute Switch",
+			R_PGACTL0, FB_PGACTL_PGAMUTE, 1, 0),
+	SOC_SINGLE_TLV("Input Channel 0 PGA Volume", R_PGACTL0,
+			FB_PGACTL_PGAVOL,
+			FM_PGACTL_PGAVOL, 0, in_pga_vol_tlv_arr),
+	/* R_PGACTL1 PG 1 ADDR 0x0E */
+	SOC_SINGLE("Input Channel 1 PGA Mute Switch",
+			R_PGACTL1, FB_PGACTL_PGAMUTE, 1, 0),
+	SOC_SINGLE_TLV("Input Channel 1 PGA Volume", R_PGACTL1,
+			FB_PGACTL_PGAVOL,
+			FM_PGACTL_PGAVOL, 0, in_pga_vol_tlv_arr),
+	/* R_PGACTL2 PG 1 ADDR 0x0F */
+	SOC_SINGLE("Input Channel 2 PGA Mute Switch",
+			R_PGACTL2, FB_PGACTL_PGAMUTE, 1, 0),
+	SOC_SINGLE_TLV("Input Channel 2 PGA Volume", R_PGACTL2,
+			FB_PGACTL_PGAVOL,
+			FM_PGACTL_PGAVOL, 0, in_pga_vol_tlv_arr),
+	/* R_PGACTL3 PG 1 ADDR 0x10 */
+	SOC_SINGLE("Input Channel 3 PGA Mute Switch",
+			R_PGACTL3, FB_PGACTL_PGAMUTE, 1, 0),
+	SOC_SINGLE_TLV("Input Channel 3 PGA Volume", R_PGACTL3,
+			FB_PGACTL_PGAVOL,
+			FM_PGACTL_PGAVOL, 0, in_pga_vol_tlv_arr),
+	/* R_ICH0VOL PG 1 ADDR 0x12 */
+	SOC_SINGLE_TLV("Input Channel 0 Volume", R_ICH0VOL,
+			FB_ICHVOL_ICHVOL, FM_ICHVOL_ICHVOL, 0, in_vol_tlv_arr),
+	/* R_ICH1VOL PG 1 ADDR 0x13 */
+	SOC_SINGLE_TLV("Input Channel 1 Volume", R_ICH1VOL,
+			FB_ICHVOL_ICHVOL, FM_ICHVOL_ICHVOL, 0, in_vol_tlv_arr),
+	/* R_ICH2VOL PG 1 ADDR 0x14 */
+	SOC_SINGLE_TLV("Input Channel 2 Volume", R_ICH2VOL,
+			FB_ICHVOL_ICHVOL, FM_ICHVOL_ICHVOL, 0, in_vol_tlv_arr),
+	/* R_ICH3VOL PG 1 ADDR 0x15 */
+	SOC_SINGLE_TLV("Input Channel 3 Volume", R_ICH3VOL,
+			FB_ICHVOL_ICHVOL, FM_ICHVOL_ICHVOL, 0, in_vol_tlv_arr),
+	/* R_ASRCILVOL PG 1 ADDR 0x16 */
+	SOC_SINGLE_TLV("ASRC Input Left Volume", R_ASRCILVOL,
+			FB_ASRCILVOL_ASRCILVOL, FM_ASRCILVOL_ASRCILVOL,
+			0, asrc_vol_tlv_arr),
+	/* R_ASRCIRVOL PG 1 ADDR 0x17 */
+	SOC_SINGLE_TLV("ASRC Input Right Volume", R_ASRCIRVOL,
+			FB_ASRCIRVOL_ASRCIRVOL, FM_ASRCIRVOL_ASRCIRVOL,
+			0, asrc_vol_tlv_arr),
+	/* R_ASRCOLVOL PG 1 ADDR 0x18 */
+	SOC_SINGLE_TLV("ASRC Output Left Volume", R_ASRCOLVOL,
+			FB_ASRCOLVOL_ASRCOLVOL, FM_ASRCOLVOL_ASRCOLVOL,
+			0, asrc_vol_tlv_arr),
+	/* R_ASRCORVOL PG 1 ADDR 0x19 */
+	SOC_SINGLE_TLV("ASRC Output Right Volume", R_ASRCORVOL,
+			FB_ASRCORVOL_ASRCOLVOL, FM_ASRCORVOL_ASRCOLVOL,
+			0, asrc_vol_tlv_arr),
+	/* R_IVOLCTLU PG 1 ADDR 0x1C */
+	/* R_ALCCTL0 PG 1 ADDR 0x1D */
+	SOC_ENUM("ALC Mode", alc_mode_enum),
+	SOC_ENUM("ALC Reference", alc_ref_enum),
+	SOC_SINGLE("Input Channel 3 ALC Switch",
+			R_ALCCTL0, FB_ALCCTL0_ALCEN3, 1, 0),
+	SOC_SINGLE("Input Channel 2 ALC Switch",
+			R_ALCCTL0, FB_ALCCTL0_ALCEN2, 1, 0),
+	SOC_SINGLE("Input Channel 1 ALC Switch",
+			R_ALCCTL0, FB_ALCCTL0_ALCEN1, 1, 0),
+	SOC_SINGLE("Input Channel 0 ALC Switch",
+			R_ALCCTL0, FB_ALCCTL0_ALCEN0, 1, 0),
+	/* R_ALCCTL1 PG 1 ADDR 0x1E */
+	SOC_SINGLE_TLV("ALC Max Gain Volume", R_ALCCTL1,
+			FB_ALCCTL1_MAXGAIN, FM_ALCCTL1_MAXGAIN,
+			0, alc_max_gain_tlv_arr),
+	SOC_SINGLE_TLV("ALC Target Volume", R_ALCCTL1,
+			FB_ALCCTL1_ALCL, FM_ALCCTL1_ALCL,
+			0, alc_target_tlv_arr),
+	/* R_ALCCTL2 PG 1 ADDR 0x1F */
+	SOC_SINGLE("ALC Zero Cross Switch",
+			R_ALCCTL2, FB_ALCCTL2_ALCZC, 1, 0),
+	SOC_SINGLE_TLV("ALC Min Gain Volume", R_ALCCTL2,
+			FB_ALCCTL2_MINGAIN, FM_ALCCTL2_MINGAIN,
+			0, alc_min_gain_tlv_arr),
+	SOC_SINGLE_RANGE("ALC Hold", R_ALCCTL2,
+			FB_ALCCTL2_HLD, 0, FM_ALCCTL2_HLD, 0),
+	/* R_ALCCTL3 PG 1 ADDR 0x20 */
+	SOC_SINGLE_RANGE("ALC Decay", R_ALCCTL3,
+			FB_ALCCTL3_DCY, 0, FM_ALCCTL3_DCY, 0),
+	SOC_SINGLE_RANGE("ALC Attack", R_ALCCTL3,
+			FB_ALCCTL3_ATK, 0, FM_ALCCTL3_ATK, 0),
+	/* R_NGATE PG 1 ADDR 0x21 */
+	SOC_SINGLE_TLV("Noise Gate Threshold Volume", R_NGATE,
+			FB_NGATE_NGTH, FM_NGATE_NGTH, 0, ngth_tlv_arr),
+	SOC_ENUM("Noise Gate Type", ngate_type_enum),
+	SOC_SINGLE("Noise Gate Switch", R_NGATE, FB_NGATE_NGAT, 1, 0),
+	/* R_DMICCTL PG 1 ADDR 0x22 */
+	SOC_SINGLE("Digital Mic 2 Switch", R_DMICCTL, FB_DMICCTL_DMIC2EN, 1, 0),
+	SOC_SINGLE("Digital Mic 1 Switch", R_DMICCTL, FB_DMICCTL_DMIC1EN, 1, 0),
+	SOC_ENUM("Digital Mic Mono Select", dmic_mono_sel_enum),
+	/* R_DACCTL PG 2 ADDR 0x01 */
+	SOC_ENUM("DAC Polarity Left", dac_pol_r_enum),
+	SOC_ENUM("DAC Polarity Right", dac_pol_l_enum),
+	SOC_ENUM("DAC Dither", dac_dith_enum),
+	SOC_SINGLE("DAC Mute Switch", R_DACCTL, FB_DACCTL_DACMUTE, 1, 0),
+	SOC_SINGLE("DAC De-Emphasis Switch", R_DACCTL, FB_DACCTL_DACDEM, 1, 0),
+	/* R_SPKCTL PG 2 ADDR 0x02 */
+	SOC_ENUM("Speaker Polarity Right", spk_pol_r_enum),
+	SOC_ENUM("Speaker Polarity Left", spk_pol_l_enum),
+	SOC_SINGLE("Speaker Mute Switch", R_SPKCTL, FB_SPKCTL_SPKMUTE, 1, 0),
+	SOC_SINGLE("Speaker De-Emphasis Switch",
+			R_SPKCTL, FB_SPKCTL_SPKDEM, 1, 0),
+	/* R_SUBCTL PG 2 ADDR 0x03 */
+	SOC_ENUM("Sub Polarity", sub_pol_enum),
+	SOC_SINGLE("SUB Mute Switch", R_SUBCTL, FB_SUBCTL_SUBMUTE, 1, 0),
+	SOC_SINGLE("Sub De-Emphasis Switch", R_SUBCTL, FB_SUBCTL_SUBDEM, 1, 0),
+	/* R_DCCTL PG 2 ADDR 0x04 */
+	SOC_SINGLE("Sub DC Removal Switch", R_DCCTL, FB_DCCTL_SUBDCBYP, 1, 1),
+	SOC_SINGLE("DAC DC Removal Switch", R_DCCTL, FB_DCCTL_DACDCBYP, 1, 1),
+	SOC_SINGLE("Speaker DC Removal Switch",
+			R_DCCTL, FB_DCCTL_SPKDCBYP, 1, 1),
+	SOC_SINGLE("DC Removal Coefficient Switch", R_DCCTL, FB_DCCTL_DCCOEFSEL,
+			FM_DCCTL_DCCOEFSEL, 0),
+	/* R_OVOLCTLU PG 2 ADDR 0x06 */
+	SOC_SINGLE("Output Fade Switch", R_OVOLCTLU, FB_OVOLCTLU_OFADE, 1, 0),
+	/* R_MVOLL PG 2 ADDR 0x08 */
+	/* R_MVOLR PG 2 ADDR 0x09 */
+	SOC_DOUBLE_R_TLV("Master Volume", R_MVOLL, R_MVOLR,
+			FB_MVOLL_MVOL_L, FM_MVOLL_MVOL_L, 0, mvol_tlv_arr),
+	/* R_HPVOLL PG 2 ADDR 0x0A */
+	/* R_HPVOLR PG 2 ADDR 0x0B */
+	SOC_DOUBLE_R_TLV("Headphone Volume", R_HPVOLL, R_HPVOLR,
+			FB_HPVOLL_HPVOL_L, FM_HPVOLL_HPVOL_L, 0,
+			hp_vol_tlv_arr),
+	/* R_SPKVOLL PG 2 ADDR 0x0C */
+	/* R_SPKVOLR PG 2 ADDR 0x0D */
+	SOC_DOUBLE_R_TLV("Speaker Volume", R_SPKVOLL, R_SPKVOLR,
+			FB_SPKVOLL_SPKVOL_L, FM_SPKVOLL_SPKVOL_L, 0,
+			spk_vol_tlv_arr),
+	/* R_SUBVOL PG 2 ADDR 0x10 */
+	SOC_SINGLE_TLV("Sub Volume", R_SUBVOL,
+			FB_SUBVOL_SUBVOL, FM_SUBVOL_SUBVOL, 0, spk_vol_tlv_arr),
+	/* R_SPKEQFILT PG 3 ADDR 0x01 */
+	SOC_SINGLE("Speaker EQ 2 Switch",
+			R_SPKEQFILT, FB_SPKEQFILT_EQ2EN, 1, 0),
+	SOC_ENUM("Speaker EQ 2 Band", spk_eq_enums[0]),
+	SOC_SINGLE("Speaker EQ 1 Switch",
+			R_SPKEQFILT, FB_SPKEQFILT_EQ1EN, 1, 0),
+	SOC_ENUM("Speaker EQ 1 Band", spk_eq_enums[1]),
+	/* R_SPKMBCEN PG 3 ADDR 0x0A */
+	SOC_SINGLE("Speaker MBC 3 Switch",
+			R_SPKMBCEN, FB_SPKMBCEN_MBCEN3, 1, 0),
+	SOC_SINGLE("Speaker MBC 2 Switch",
+			R_SPKMBCEN, FB_SPKMBCEN_MBCEN2, 1, 0),
+	SOC_SINGLE("Speaker MBC 1 Switch",
+			R_SPKMBCEN, FB_SPKMBCEN_MBCEN1, 1, 0),
+	/* R_SPKMBCCTL PG 3 ADDR 0x0B */
+	SOC_ENUM("Speaker MBC 3 Mode", spk_mbc3_lvl_det_mode_enum),
+	SOC_ENUM("Speaker MBC 3 Window", spk_mbc3_win_sel_enum),
+	SOC_ENUM("Speaker MBC 2 Mode", spk_mbc2_lvl_det_mode_enum),
+	SOC_ENUM("Speaker MBC 2 Window", spk_mbc2_win_sel_enum),
+	SOC_ENUM("Speaker MBC 1 Mode", spk_mbc1_lvl_det_mode_enum),
+	SOC_ENUM("Speaker MBC 1 Window", spk_mbc1_win_sel_enum),
+	/* R_SPKMBCMUG1 PG 3 ADDR 0x0C */
+	SOC_ENUM("Speaker MBC 1 Phase Polarity", spk_mbc1_phase_pol_enum),
+	SOC_SINGLE_TLV("Speaker MBC1 Make-Up Gain Volume", R_SPKMBCMUG1,
+			FB_SPKMBCMUG_MUGAIN, FM_SPKMBCMUG_MUGAIN,
+			0, mbc_mug_tlv_arr),
+	/* R_SPKMBCTHR1 PG 3 ADDR 0x0D */
+	SOC_SINGLE_TLV("Speaker MBC 1 Compressor Threshold Volume",
+			R_SPKMBCTHR1, FB_SPKMBCTHR_THRESH, FM_SPKMBCTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_SPKMBCRAT1 PG 3 ADDR 0x0E */
+	SOC_ENUM("Speaker MBC 1 Compressor Ratio", spk_mbc1_comp_rat_enum),
+	/* R_SPKMBCATK1L PG 3 ADDR 0x0F */
+	/* R_SPKMBCATK1H PG 3 ADDR 0x10 */
+	SND_SOC_BYTES("Speaker MBC 1 Attack", R_SPKMBCATK1L, 2),
+	/* R_SPKMBCREL1L PG 3 ADDR 0x11 */
+	/* R_SPKMBCREL1H PG 3 ADDR 0x12 */
+	SND_SOC_BYTES("Speaker MBC 1 Release", R_SPKMBCREL1L, 2),
+	/* R_SPKMBCMUG2 PG 3 ADDR 0x13 */
+	SOC_ENUM("Speaker MBC 2 Phase Polarity", spk_mbc2_phase_pol_enum),
+	SOC_SINGLE_TLV("Speaker MBC2 Make-Up Gain Volume", R_SPKMBCMUG2,
+			FB_SPKMBCMUG_MUGAIN, FM_SPKMBCMUG_MUGAIN,
+			0, mbc_mug_tlv_arr),
+	/* R_SPKMBCTHR2 PG 3 ADDR 0x14 */
+	SOC_SINGLE_TLV("Speaker MBC 2 Compressor Threshold Volume",
+			R_SPKMBCTHR2, FB_SPKMBCTHR_THRESH, FM_SPKMBCTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_SPKMBCRAT2 PG 3 ADDR 0x15 */
+	SOC_ENUM("Speaker MBC 2 Compressor Ratio", spk_mbc2_comp_rat_enum),
+	/* R_SPKMBCATK2L PG 3 ADDR 0x16 */
+	/* R_SPKMBCATK2H PG 3 ADDR 0x17 */
+	SND_SOC_BYTES("Speaker MBC 2 Attack", R_SPKMBCATK2L, 2),
+	/* R_SPKMBCREL2L PG 3 ADDR 0x18 */
+	/* R_SPKMBCREL2H PG 3 ADDR 0x19 */
+	SND_SOC_BYTES("Speaker MBC 2 Release", R_SPKMBCREL2L, 2),
+	/* R_SPKMBCMUG3 PG 3 ADDR 0x1A */
+	SOC_ENUM("Speaker MBC 3 Phase Polarity", spk_mbc3_phase_pol_enum),
+	SOC_SINGLE_TLV("Speaker MBC 3 Make-Up Gain Volume", R_SPKMBCMUG3,
+			FB_SPKMBCMUG_MUGAIN, FM_SPKMBCMUG_MUGAIN,
+			0, mbc_mug_tlv_arr),
+	/* R_SPKMBCTHR3 PG 3 ADDR 0x1B */
+	SOC_SINGLE_TLV("Speaker MBC 3 Threshold Volume", R_SPKMBCTHR3,
+			FB_SPKMBCTHR_THRESH, FM_SPKMBCTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_SPKMBCRAT3 PG 3 ADDR 0x1C */
+	SOC_ENUM("Speaker MBC 3 Compressor Ratio", spk_mbc3_comp_rat_enum),
+	/* R_SPKMBCATK3L PG 3 ADDR 0x1D */
+	/* R_SPKMBCATK3H PG 3 ADDR 0x1E */
+	SND_SOC_BYTES("Speaker MBC 3 Attack", R_SPKMBCATK3L, 3),
+	/* R_SPKMBCREL3L PG 3 ADDR 0x1F */
+	/* R_SPKMBCREL3H PG 3 ADDR 0x20 */
+	SND_SOC_BYTES("Speaker MBC 3 Release", R_SPKMBCREL3L, 3),
+	/* R_SPKCLECTL PG 3 ADDR 0x21 */
+	SOC_ENUM("Speaker CLE Level Mode", spk_cle_lvl_mode_enum),
+	SOC_ENUM("Speaker CLE Window", spk_cle_win_sel_enum),
+	SOC_SINGLE("Speaker CLE Expander Switch",
+			R_SPKCLECTL, FB_SPKCLECTL_EXPEN, 1, 0),
+	SOC_SINGLE("Speaker CLE Limiter Switch",
+			R_SPKCLECTL, FB_SPKCLECTL_LIMEN, 1, 0),
+	SOC_SINGLE("Speaker CLE Compressor Switch",
+			R_SPKCLECTL, FB_SPKCLECTL_COMPEN, 1, 0),
+	/* R_SPKCLEMUG PG 3 ADDR 0x22 */
+	SOC_SINGLE_TLV("Speaker CLE Make-Up Gain Volume", R_SPKCLEMUG,
+			FB_SPKCLEMUG_MUGAIN, FM_SPKCLEMUG_MUGAIN,
+			0, cle_mug_tlv_arr),
+	/* R_SPKCOMPTHR PG 3 ADDR 0x23 */
+	SOC_SINGLE_TLV("Speaker Compressor Threshold Volume", R_SPKCOMPTHR,
+			FB_SPKCOMPTHR_THRESH, FM_SPKCOMPTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_SPKCOMPRAT PG 3 ADDR 0x24 */
+	SOC_ENUM("Speaker Compressor Ratio", spk_comp_rat_enum),
+	/* R_SPKCOMPATKL PG 3 ADDR 0x25 */
+	/* R_SPKCOMPATKH PG 3 ADDR 0x26 */
+	SND_SOC_BYTES("Speaker Compressor Attack", R_SPKCOMPATKL, 2),
+	/* R_SPKCOMPRELL PG 3 ADDR 0x27 */
+	/* R_SPKCOMPRELH PG 3 ADDR 0x28 */
+	SND_SOC_BYTES("Speaker Compressor Release", R_SPKCOMPRELL, 2),
+	/* R_SPKLIMTHR PG 3 ADDR 0x29 */
+	SOC_SINGLE_TLV("Speaker Limiter Threshold Volume", R_SPKLIMTHR,
+			FB_SPKLIMTHR_THRESH, FM_SPKLIMTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_SPKLIMTGT PG 3 ADDR 0x2A */
+	SOC_SINGLE_TLV("Speaker Limiter Target Volume", R_SPKLIMTGT,
+			FB_SPKLIMTGT_TARGET, FM_SPKLIMTGT_TARGET,
+			0, thr_tlv_arr),
+	/* R_SPKLIMATKL PG 3 ADDR 0x2B */
+	/* R_SPKLIMATKH PG 3 ADDR 0x2C */
+	SND_SOC_BYTES("Speaker Limiter Attack", R_SPKLIMATKL, 2),
+	/* R_SPKLIMRELL PG 3 ADDR 0x2D */
+	/* R_SPKLIMRELR PG 3 ADDR 0x2E */
+	SND_SOC_BYTES("Speaker Limiter Release", R_SPKLIMRELL, 2),
+	/* R_SPKEXPTHR PG 3 ADDR 0x2F */
+	SOC_SINGLE_TLV("Speaker Expander Threshold Volume", R_SPKEXPTHR,
+			FB_SPKEXPTHR_THRESH, FM_SPKEXPTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_SPKEXPRAT PG 3 ADDR 0x30 */
+	SOC_ENUM("Speaker Expander Ratio", spk_exp_rat_enum),
+	/* R_SPKEXPATKL PG 3 ADDR 0x31 */
+	/* R_SPKEXPATKR PG 3 ADDR 0x32 */
+	SND_SOC_BYTES("Speaker Expander Attack", R_SPKEXPATKL, 2),
+	/* R_SPKEXPRELL PG 3 ADDR 0x33 */
+	/* R_SPKEXPRELR PG 3 ADDR 0x34 */
+	SND_SOC_BYTES("Speaker Expander Release", R_SPKEXPRELL, 2),
+	/* R_SPKFXCTL PG 3 ADDR 0x35 */
+	SOC_SINGLE("Speaker 3D Switch", R_SPKFXCTL, FB_SPKFXCTL_3DEN, 1, 0),
+	SOC_SINGLE("Speaker Treble Enhancement Switch",
+			R_SPKFXCTL, FB_SPKFXCTL_TEEN, 1, 0),
+	SOC_SINGLE("Speaker Treble NLF Switch",
+			R_SPKFXCTL, FB_SPKFXCTL_TNLFBYP, 1, 1),
+	SOC_SINGLE("Speaker Bass Enhancement Switch",
+			R_SPKFXCTL, FB_SPKFXCTL_BEEN, 1, 0),
+	SOC_SINGLE("Speaker Bass NLF Switch",
+			R_SPKFXCTL, FB_SPKFXCTL_BNLFBYP, 1, 1),
+	/* R_DACEQFILT PG 4 ADDR 0x01 */
+	SOC_SINGLE("DAC EQ 2 Switch",
+			R_DACEQFILT, FB_DACEQFILT_EQ2EN, 1, 0),
+	SOC_ENUM("DAC EQ 2 Band", dac_eq_enums[0]),
+	SOC_SINGLE("DAC EQ 1 Switch", R_DACEQFILT, FB_DACEQFILT_EQ1EN, 1, 0),
+	SOC_ENUM("DAC EQ 1 Band", dac_eq_enums[1]),
+	/* R_DACMBCEN PG 4 ADDR 0x0A */
+	SOC_SINGLE("DAC MBC 3 Switch", R_DACMBCEN, FB_DACMBCEN_MBCEN3, 1, 0),
+	SOC_SINGLE("DAC MBC 2 Switch", R_DACMBCEN, FB_DACMBCEN_MBCEN2, 1, 0),
+	SOC_SINGLE("DAC MBC 1 Switch", R_DACMBCEN, FB_DACMBCEN_MBCEN1, 1, 0),
+	/* R_DACMBCCTL PG 4 ADDR 0x0B */
+	SOC_ENUM("DAC MBC 3 Mode", dac_mbc3_lvl_det_mode_enum),
+	SOC_ENUM("DAC MBC 3 Window", dac_mbc3_win_sel_enum),
+	SOC_ENUM("DAC MBC 2 Mode", dac_mbc2_lvl_det_mode_enum),
+	SOC_ENUM("DAC MBC 2 Window", dac_mbc2_win_sel_enum),
+	SOC_ENUM("DAC MBC 1 Mode", dac_mbc1_lvl_det_mode_enum),
+	SOC_ENUM("DAC MBC 1 Window", dac_mbc1_win_sel_enum),
+	/* R_DACMBCMUG1 PG 4 ADDR 0x0C */
+	SOC_ENUM("DAC MBC 1 Phase Polarity", dac_mbc1_phase_pol_enum),
+	SOC_SINGLE_TLV("DAC MBC 1 Make-Up Gain Volume", R_DACMBCMUG1,
+			FB_DACMBCMUG_MUGAIN, FM_DACMBCMUG_MUGAIN,
+			0, mbc_mug_tlv_arr),
+	/* R_DACMBCTHR1 PG 4 ADDR 0x0D */
+	SOC_SINGLE_TLV("DAC MBC 1 Compressor Threshold Volume", R_DACMBCTHR1,
+			FB_DACMBCTHR_THRESH, FM_DACMBCTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_DACMBCRAT1 PG 4 ADDR 0x0E */
+	SOC_ENUM("DAC MBC 1 Compressor Ratio", dac_mbc1_comp_rat_enum),
+	/* R_DACMBCATK1L PG 4 ADDR 0x0F */
+	/* R_DACMBCATK1H PG 4 ADDR 0x10 */
+	SND_SOC_BYTES("DAC MBC 1 Attack", R_DACMBCATK1L, 2),
+	/* R_DACMBCREL1L PG 4 ADDR 0x11 */
+	/* R_DACMBCREL1H PG 4 ADDR 0x12 */
+	SND_SOC_BYTES("DAC MBC 1 Release", R_DACMBCREL1L, 2),
+	/* R_DACMBCMUG2 PG 4 ADDR 0x13 */
+	SOC_ENUM("DAC MBC 2 Phase Polarity", dac_mbc2_phase_pol_enum),
+	SOC_SINGLE_TLV("DAC MBC 2 Make-Up Gain Volume", R_DACMBCMUG2,
+			FB_DACMBCMUG_MUGAIN, FM_DACMBCMUG_MUGAIN,
+			0, mbc_mug_tlv_arr),
+	/* R_DACMBCTHR2 PG 4 ADDR 0x14 */
+	SOC_SINGLE_TLV("DAC MBC 2 Compressor Threshold Volume", R_DACMBCTHR2,
+			FB_DACMBCTHR_THRESH, FM_DACMBCTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_DACMBCRAT2 PG 4 ADDR 0x15 */
+	SOC_ENUM("DAC MBC 2 Compressor Ratio", dac_mbc2_comp_rat_enum),
+	/* R_DACMBCATK2L PG 4 ADDR 0x16 */
+	/* R_DACMBCATK2H PG 4 ADDR 0x17 */
+	SND_SOC_BYTES("DAC MBC 2 Attack", R_DACMBCATK2L, 2),
+	/* R_DACMBCREL2L PG 4 ADDR 0x18 */
+	/* R_DACMBCREL2H PG 4 ADDR 0x19 */
+	SND_SOC_BYTES("DAC MBC 2 Release", R_DACMBCREL2L, 2),
+	/* R_DACMBCMUG3 PG 4 ADDR 0x1A */
+	SOC_ENUM("DAC MBC 3 Phase Polarity", dac_mbc3_phase_pol_enum),
+	SOC_SINGLE_TLV("DAC MBC 3 Make-Up Gain Volume", R_DACMBCMUG3,
+			FB_DACMBCMUG_MUGAIN, FM_DACMBCMUG_MUGAIN,
+			0, mbc_mug_tlv_arr),
+	/* R_DACMBCTHR3 PG 4 ADDR 0x1B */
+	SOC_SINGLE_TLV("DAC MBC 3 Threshold Volume", R_DACMBCTHR3,
+			FB_DACMBCTHR_THRESH, FM_DACMBCTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_DACMBCRAT3 PG 4 ADDR 0x1C */
+	SOC_ENUM("DAC MBC 3 Compressor Ratio", dac_mbc3_comp_rat_enum),
+	/* R_DACMBCATK3L PG 4 ADDR 0x1D */
+	/* R_DACMBCATK3H PG 4 ADDR 0x1E */
+	SND_SOC_BYTES("DAC MBC 3 Attack", R_DACMBCATK3L, 3),
+	/* R_DACMBCREL3L PG 4 ADDR 0x1F */
+	/* R_DACMBCREL3H PG 4 ADDR 0x20 */
+	SND_SOC_BYTES("DAC MBC 3 Release", R_DACMBCREL3L, 3),
+	/* R_DACCLECTL PG 4 ADDR 0x21 */
+	SOC_ENUM("DAC CLE Level Mode", dac_cle_lvl_mode_enum),
+	SOC_ENUM("DAC CLE Window", dac_cle_win_sel_enum),
+	SOC_SINGLE("DAC CLE Expander Switch",
+			R_DACCLECTL, FB_DACCLECTL_EXPEN, 1, 0),
+	SOC_SINGLE("DAC CLE Limiter Switch",
+			R_DACCLECTL, FB_DACCLECTL_LIMEN, 1, 0),
+	SOC_SINGLE("DAC CLE Compressor Switch",
+			R_DACCLECTL, FB_DACCLECTL_COMPEN, 1, 0),
+	/* R_DACCLEMUG PG 4 ADDR 0x22 */
+	SOC_SINGLE_TLV("DAC CLE Make-Up Gain Volume", R_DACCLEMUG,
+			FB_DACCLEMUG_MUGAIN, FM_DACCLEMUG_MUGAIN,
+			0, cle_mug_tlv_arr),
+	/* R_DACCOMPTHR PG 4 ADDR 0x23 */
+	SOC_SINGLE_TLV("DAC Compressor Threshold Volume", R_DACCOMPTHR,
+			FB_DACCOMPTHR_THRESH, FM_DACCOMPTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_DACCOMPRAT PG 4 ADDR 0x24 */
+	SOC_ENUM("DAC Compressor Ratio", dac_comp_rat_enum),
+	/* R_DACCOMPATKL PG 4 ADDR 0x25 */
+	/* R_DACCOMPATKH PG 4 ADDR 0x26 */
+	SND_SOC_BYTES("DAC Compressor Attack", R_DACCOMPATKL, 2),
+	/* R_DACCOMPRELL PG 4 ADDR 0x27 */
+	/* R_DACCOMPRELH PG 4 ADDR 0x28 */
+	SND_SOC_BYTES("DAC Compressor Release", R_DACCOMPRELL, 2),
+	/* R_DACLIMTHR PG 4 ADDR 0x29 */
+	SOC_SINGLE_TLV("DAC Limiter Threshold Volume", R_DACLIMTHR,
+			FB_DACLIMTHR_THRESH, FM_DACLIMTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_DACLIMTGT PG 4 ADDR 0x2A */
+	SOC_SINGLE_TLV("DAC Limiter Target Volume", R_DACLIMTGT,
+			FB_DACLIMTGT_TARGET, FM_DACLIMTGT_TARGET,
+			0, thr_tlv_arr),
+	/* R_DACLIMATKL PG 4 ADDR 0x2B */
+	/* R_DACLIMATKH PG 4 ADDR 0x2C */
+	SND_SOC_BYTES("DAC Limiter Attack", R_DACLIMATKL, 2),
+	/* R_DACLIMRELL PG 4 ADDR 0x2D */
+	/* R_DACLIMRELR PG 4 ADDR 0x2E */
+	SND_SOC_BYTES("DAC Limiter Release", R_DACLIMRELL, 2),
+	/* R_DACEXPTHR PG 4 ADDR 0x2F */
+	SOC_SINGLE_TLV("DAC Expander Threshold Volume", R_DACEXPTHR,
+			FB_DACEXPTHR_THRESH, FM_DACEXPTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_DACEXPRAT PG 4 ADDR 0x30 */
+	SOC_ENUM("DAC Expander Ratio", dac_exp_rat_enum),
+	/* R_DACEXPATKL PG 4 ADDR 0x31 */
+	/* R_DACEXPATKR PG 4 ADDR 0x32 */
+	SND_SOC_BYTES("DAC Expander Attack", R_DACEXPATKL, 2),
+	/* R_DACEXPRELL PG 4 ADDR 0x33 */
+	/* R_DACEXPRELR PG 4 ADDR 0x34 */
+	SND_SOC_BYTES("DAC Expander Release", R_DACEXPRELL, 2),
+	/* R_DACFXCTL PG 4 ADDR 0x35 */
+	SOC_SINGLE("DAC 3D Switch", R_DACFXCTL, FB_DACFXCTL_3DEN, 1, 0),
+	SOC_SINGLE("DAC Treble Enhancement Switch",
+			R_DACFXCTL, FB_DACFXCTL_TEEN, 1, 0),
+	SOC_SINGLE("DAC Treble NLF Switch",
+			R_DACFXCTL, FB_DACFXCTL_TNLFBYP, 1, 1),
+	SOC_SINGLE("DAC Bass Enhancement Switch",
+			R_DACFXCTL, FB_DACFXCTL_BEEN, 1, 0),
+	SOC_SINGLE("DAC Bass NLF Switch",
+			R_DACFXCTL, FB_DACFXCTL_BNLFBYP, 1, 1),
+	/* R_SUBEQFILT PG 5 ADDR 0x01 */
+	SOC_SINGLE("Sub EQ 2 Switch",
+			R_SUBEQFILT, FB_SUBEQFILT_EQ2EN, 1, 0),
+	SOC_ENUM("Sub EQ 2 Band", sub_eq_enums[0]),
+	SOC_SINGLE("Sub EQ 1 Switch", R_SUBEQFILT, FB_SUBEQFILT_EQ1EN, 1, 0),
+	SOC_ENUM("Sub EQ 1 Band", sub_eq_enums[1]),
+	/* R_SUBMBCEN PG 5 ADDR 0x0A */
+	SOC_SINGLE("Sub MBC 3 Switch", R_SUBMBCEN, FB_SUBMBCEN_MBCEN3, 1, 0),
+	SOC_SINGLE("Sub MBC 2 Switch", R_SUBMBCEN, FB_SUBMBCEN_MBCEN2, 1, 0),
+	SOC_SINGLE("Sub MBC 1 Switch", R_SUBMBCEN, FB_SUBMBCEN_MBCEN1, 1, 0),
+	/* R_SUBMBCCTL PG 5 ADDR 0x0B */
+	SOC_ENUM("Sub MBC 3 Mode", sub_mbc3_lvl_det_mode_enum),
+	SOC_ENUM("Sub MBC 3 Window", sub_mbc3_win_sel_enum),
+	SOC_ENUM("Sub MBC 2 Mode", sub_mbc2_lvl_det_mode_enum),
+	SOC_ENUM("Sub MBC 2 Window", sub_mbc2_win_sel_enum),
+	SOC_ENUM("Sub MBC 1 Mode", sub_mbc1_lvl_det_mode_enum),
+	SOC_ENUM("Sub MBC 1 Window", sub_mbc1_win_sel_enum),
+	/* R_SUBMBCMUG1 PG 5 ADDR 0x0C */
+	SOC_ENUM("Sub MBC 1 Phase Polarity", sub_mbc1_phase_pol_enum),
+	SOC_SINGLE_TLV("Sub MBC 1 Make-Up Gain Volume", R_SUBMBCMUG1,
+			FB_SUBMBCMUG_MUGAIN, FM_SUBMBCMUG_MUGAIN,
+			0, mbc_mug_tlv_arr),
+	/* R_SUBMBCTHR1 PG 5 ADDR 0x0D */
+	SOC_SINGLE_TLV("Sub MBC 1 Compressor Threshold Volume", R_SUBMBCTHR1,
+			FB_SUBMBCTHR_THRESH, FM_SUBMBCTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_SUBMBCRAT1 PG 5 ADDR 0x0E */
+	SOC_ENUM("Sub MBC 1 Compressor Ratio", sub_mbc1_comp_rat_enum),
+	/* R_SUBMBCATK1L PG 5 ADDR 0x0F */
+	/* R_SUBMBCATK1H PG 5 ADDR 0x10 */
+	SND_SOC_BYTES("Sub MBC 1 Attack", R_SUBMBCATK1L, 2),
+	/* R_SUBMBCREL1L PG 5 ADDR 0x11 */
+	/* R_SUBMBCREL1H PG 5 ADDR 0x12 */
+	SND_SOC_BYTES("Sub MBC 1 Release", R_SUBMBCREL1L, 2),
+	/* R_SUBMBCMUG2 PG 5 ADDR 0x13 */
+	SOC_ENUM("Sub MBC 2 Phase Polarity", sub_mbc2_phase_pol_enum),
+	SOC_SINGLE_TLV("Sub MBC 2 Make-Up Gain Volume", R_SUBMBCMUG2,
+			FB_SUBMBCMUG_MUGAIN, FM_SUBMBCMUG_MUGAIN,
+			0, mbc_mug_tlv_arr),
+	/* R_SUBMBCTHR2 PG 5 ADDR 0x14 */
+	SOC_SINGLE_TLV("Sub MBC 2 Compressor Threshold Volume", R_SUBMBCTHR2,
+			FB_SUBMBCTHR_THRESH, FM_SUBMBCTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_SUBMBCRAT2 PG 5 ADDR 0x15 */
+	SOC_ENUM("Sub MBC 2 Compressor Ratio", sub_mbc2_comp_rat_enum),
+	/* R_SUBMBCATK2L PG 5 ADDR 0x16 */
+	/* R_SUBMBCATK2H PG 5 ADDR 0x17 */
+	SND_SOC_BYTES("Sub MBC 2 Attack", R_SUBMBCATK2L, 2),
+	/* R_SUBMBCREL2L PG 5 ADDR 0x18 */
+	/* R_SUBMBCREL2H PG 5 ADDR 0x19 */
+	SND_SOC_BYTES("Sub MBC 2 Release", R_SUBMBCREL2L, 2),
+	/* R_SUBMBCMUG3 PG 5 ADDR 0x1A */
+	SOC_ENUM("Sub MBC 3 Phase Polarity", sub_mbc3_phase_pol_enum),
+	SOC_SINGLE_TLV("Sub MBC 3 Make-Up Gain Volume", R_SUBMBCMUG3,
+			FB_SUBMBCMUG_MUGAIN, FM_SUBMBCMUG_MUGAIN,
+			0, mbc_mug_tlv_arr),
+	/* R_SUBMBCTHR3 PG 5 ADDR 0x1B */
+	SOC_SINGLE_TLV("Sub MBC 3 Threshold Volume", R_SUBMBCTHR3,
+			FB_SUBMBCTHR_THRESH, FM_SUBMBCTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_SUBMBCRAT3 PG 5 ADDR 0x1C */
+	SOC_ENUM("Sub MBC 3 Compressor Ratio", sub_mbc3_comp_rat_enum),
+	/* R_SUBMBCATK3L PG 5 ADDR 0x1D */
+	/* R_SUBMBCATK3H PG 5 ADDR 0x1E */
+	SND_SOC_BYTES("Sub MBC 3 Attack", R_SUBMBCATK3L, 3),
+	/* R_SUBMBCREL3L PG 5 ADDR 0x1F */
+	/* R_SUBMBCREL3H PG 5 ADDR 0x20 */
+	SND_SOC_BYTES("Sub MBC 3 Release", R_SUBMBCREL3L, 3),
+	/* R_SUBCLECTL PG 5 ADDR 0x21 */
+	SOC_ENUM("Sub CLE Level Mode", sub_cle_lvl_mode_enum),
+	SOC_ENUM("Sub CLE Window", sub_cle_win_sel_enum),
+	SOC_SINGLE("Sub CLE Expander Switch",
+			R_SUBCLECTL, FB_SUBCLECTL_EXPEN, 1, 0),
+	SOC_SINGLE("Sub CLE Limiter Switch",
+			R_SUBCLECTL, FB_SUBCLECTL_LIMEN, 1, 0),
+	SOC_SINGLE("Sub CLE Compressor Switch",
+			R_SUBCLECTL, FB_SUBCLECTL_COMPEN, 1, 0),
+	/* R_SUBCLEMUG PG 5 ADDR 0x22 */
+	SOC_SINGLE_TLV("Sub CLE Make-Up Gain Volume", R_SUBCLEMUG,
+			FB_SUBCLEMUG_MUGAIN, FM_SUBCLEMUG_MUGAIN,
+			0, cle_mug_tlv_arr),
+	/* R_SUBCOMPTHR PG 5 ADDR 0x23 */
+	SOC_SINGLE_TLV("Sub Compressor Threshold Volume", R_SUBCOMPTHR,
+			FB_SUBCOMPTHR_THRESH, FM_SUBCOMPTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_SUBCOMPRAT PG 5 ADDR 0x24 */
+	SOC_ENUM("Sub Compressor Ratio", sub_comp_rat_enum),
+	/* R_SUBCOMPATKL PG 5 ADDR 0x25 */
+	/* R_SUBCOMPATKH PG 5 ADDR 0x26 */
+	SND_SOC_BYTES("Sub Compressor Attack", R_SUBCOMPATKL, 2),
+	/* R_SUBCOMPRELL PG 5 ADDR 0x27 */
+	/* R_SUBCOMPRELH PG 5 ADDR 0x28 */
+	SND_SOC_BYTES("Sub Compressor Release", R_SUBCOMPRELL, 2),
+	/* R_SUBLIMTHR PG 5 ADDR 0x29 */
+	SOC_SINGLE_TLV("Sub Limiter Threshold Volume", R_SUBLIMTHR,
+			FB_SUBLIMTHR_THRESH, FM_SUBLIMTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_SUBLIMTGT PG 5 ADDR 0x2A */
+	SOC_SINGLE_TLV("Sub Limiter Target Volume", R_SUBLIMTGT,
+			FB_SUBLIMTGT_TARGET, FM_SUBLIMTGT_TARGET,
+			0, thr_tlv_arr),
+	/* R_SUBLIMATKL PG 5 ADDR 0x2B */
+	/* R_SUBLIMATKH PG 5 ADDR 0x2C */
+	SND_SOC_BYTES("Sub Limiter Attack", R_SUBLIMATKL, 2),
+	/* R_SUBLIMRELL PG 5 ADDR 0x2D */
+	/* R_SUBLIMRELR PG 5 ADDR 0x2E */
+	SND_SOC_BYTES("Sub Limiter Release", R_SUBLIMRELL, 2),
+	/* R_SUBEXPTHR PG 5 ADDR 0x2F */
+	SOC_SINGLE_TLV("Sub Expander Threshold Volume", R_SUBEXPTHR,
+			FB_SUBEXPTHR_THRESH, FM_SUBEXPTHR_THRESH,
+			0, thr_tlv_arr),
+	/* R_SUBEXPRAT PG 5 ADDR 0x30 */
+	SOC_ENUM("Sub Expander Ratio", sub_exp_rat_enum),
+	/* R_SUBEXPATKL PG 5 ADDR 0x31 */
+	/* R_SUBEXPATKR PG 5 ADDR 0x32 */
+	SND_SOC_BYTES("Sub Expander Attack", R_SUBEXPATKL, 2),
+	/* R_SUBEXPRELL PG 5 ADDR 0x33 */
+	/* R_SUBEXPRELR PG 5 ADDR 0x34 */
+	SND_SOC_BYTES("Sub Expander Release", R_SUBEXPRELL, 2),
+	/* R_SUBFXCTL PG 5 ADDR 0x35 */
+	SOC_SINGLE("Sub Treble Enhancement Switch",
+			R_SUBFXCTL, FB_SUBFXCTL_TEEN, 1, 0),
+	SOC_SINGLE("Sub Treble NLF Switch",
+			R_SUBFXCTL, FB_SUBFXCTL_TNLFBYP, 1, 1),
+	SOC_SINGLE("Sub Bass Enhancement Switch",
+			R_SUBFXCTL, FB_SUBFXCTL_BEEN, 1, 0),
+	SOC_SINGLE("Sub Bass NLF Switch",
+			R_SUBFXCTL, FB_SUBFXCTL_BNLFBYP, 1, 1),
+	COEFF_RAM_CTL("DAC Cascade 1 Left BiQuad 1", BIQUAD_SIZE, 0x00),
+	COEFF_RAM_CTL("DAC Cascade 1 Left BiQuad 2", BIQUAD_SIZE, 0x05),
+	COEFF_RAM_CTL("DAC Cascade 1 Left BiQuad 3", BIQUAD_SIZE, 0x0a),
+	COEFF_RAM_CTL("DAC Cascade 1 Left BiQuad 4", BIQUAD_SIZE, 0x0f),
+	COEFF_RAM_CTL("DAC Cascade 1 Left BiQuad 5", BIQUAD_SIZE, 0x14),
+	COEFF_RAM_CTL("DAC Cascade 1 Left BiQuad 6", BIQUAD_SIZE, 0x19),
+
+	COEFF_RAM_CTL("DAC Cascade 1 Right BiQuad 1", BIQUAD_SIZE, 0x20),
+	COEFF_RAM_CTL("DAC Cascade 1 Right BiQuad 2", BIQUAD_SIZE, 0x25),
+	COEFF_RAM_CTL("DAC Cascade 1 Right BiQuad 3", BIQUAD_SIZE, 0x2a),
+	COEFF_RAM_CTL("DAC Cascade 1 Right BiQuad 4", BIQUAD_SIZE, 0x2f),
+	COEFF_RAM_CTL("DAC Cascade 1 Right BiQuad 5", BIQUAD_SIZE, 0x34),
+	COEFF_RAM_CTL("DAC Cascade 1 Right BiQuad 6", BIQUAD_SIZE, 0x39),
+
+	COEFF_RAM_CTL("DAC Cascade 1 Left Prescale", COEFF_SIZE, 0x1f),
+	COEFF_RAM_CTL("DAC Cascade 1 Right Prescale", COEFF_SIZE, 0x3f),
+
+	COEFF_RAM_CTL("DAC Cascade 2 Left BiQuad 1", BIQUAD_SIZE, 0x40),
+	COEFF_RAM_CTL("DAC Cascade 2 Left BiQuad 2", BIQUAD_SIZE, 0x45),
+	COEFF_RAM_CTL("DAC Cascade 2 Left BiQuad 3", BIQUAD_SIZE, 0x4a),
+	COEFF_RAM_CTL("DAC Cascade 2 Left BiQuad 4", BIQUAD_SIZE, 0x4f),
+	COEFF_RAM_CTL("DAC Cascade 2 Left BiQuad 5", BIQUAD_SIZE, 0x54),
+	COEFF_RAM_CTL("DAC Cascade 2 Left BiQuad 6", BIQUAD_SIZE, 0x59),
+
+	COEFF_RAM_CTL("DAC Cascade 2 Right BiQuad 1", BIQUAD_SIZE, 0x60),
+	COEFF_RAM_CTL("DAC Cascade 2 Right BiQuad 2", BIQUAD_SIZE, 0x65),
+	COEFF_RAM_CTL("DAC Cascade 2 Right BiQuad 3", BIQUAD_SIZE, 0x6a),
+	COEFF_RAM_CTL("DAC Cascade 2 Right BiQuad 4", BIQUAD_SIZE, 0x6f),
+	COEFF_RAM_CTL("DAC Cascade 2 Right BiQuad 5", BIQUAD_SIZE, 0x74),
+	COEFF_RAM_CTL("DAC Cascade 2 Right BiQuad 6", BIQUAD_SIZE, 0x79),
+
+	COEFF_RAM_CTL("DAC Cascade 2 Left Prescale", COEFF_SIZE, 0x5f),
+	COEFF_RAM_CTL("DAC Cascade 2 Right Prescale", COEFF_SIZE, 0x7f),
+
+	COEFF_RAM_CTL("DAC Bass Extraction BiQuad 1", BIQUAD_SIZE, 0x80),
+	COEFF_RAM_CTL("DAC Bass Extraction BiQuad 2", BIQUAD_SIZE, 0x85),
+
+	COEFF_RAM_CTL("DAC Bass Non Linear Function 1", COEFF_SIZE, 0x8a),
+	COEFF_RAM_CTL("DAC Bass Non Linear Function 2", COEFF_SIZE, 0x8b),
+
+	COEFF_RAM_CTL("DAC Bass Limiter BiQuad", BIQUAD_SIZE, 0x8c),
+
+	COEFF_RAM_CTL("DAC Bass Cut Off BiQuad", BIQUAD_SIZE, 0x91),
+
+	COEFF_RAM_CTL("DAC Bass Mix", COEFF_SIZE, 0x96),
+
+	COEFF_RAM_CTL("DAC Treb Extraction BiQuad 1", BIQUAD_SIZE, 0x97),
+	COEFF_RAM_CTL("DAC Treb Extraction BiQuad 2", BIQUAD_SIZE, 0x9c),
+
+	COEFF_RAM_CTL("DAC Treb Non Linear Function 1", COEFF_SIZE, 0xa1),
+	COEFF_RAM_CTL("DAC Treb Non Linear Function 2", COEFF_SIZE, 0xa2),
+
+	COEFF_RAM_CTL("DAC Treb Limiter BiQuad", BIQUAD_SIZE, 0xa3),
+
+	COEFF_RAM_CTL("DAC Treb Cut Off BiQuad", BIQUAD_SIZE, 0xa8),
+
+	COEFF_RAM_CTL("DAC Treb Mix", COEFF_SIZE, 0xad),
+
+	COEFF_RAM_CTL("DAC 3D", COEFF_SIZE, 0xae),
+
+	COEFF_RAM_CTL("DAC 3D Mix", COEFF_SIZE, 0xaf),
+
+	COEFF_RAM_CTL("DAC MBC 1 BiQuad 1", BIQUAD_SIZE, 0xb0),
+	COEFF_RAM_CTL("DAC MBC 1 BiQuad 2", BIQUAD_SIZE, 0xb5),
+
+	COEFF_RAM_CTL("DAC MBC 2 BiQuad 1", BIQUAD_SIZE, 0xba),
+	COEFF_RAM_CTL("DAC MBC 2 BiQuad 2", BIQUAD_SIZE, 0xbf),
+
+	COEFF_RAM_CTL("DAC MBC 3 BiQuad 1", BIQUAD_SIZE, 0xc4),
+	COEFF_RAM_CTL("DAC MBC 3 BiQuad 2", BIQUAD_SIZE, 0xc9),
+
+	COEFF_RAM_CTL("Speaker Cascade 1 Left BiQuad 1", BIQUAD_SIZE, 0x00),
+	COEFF_RAM_CTL("Speaker Cascade 1 Left BiQuad 2", BIQUAD_SIZE, 0x05),
+	COEFF_RAM_CTL("Speaker Cascade 1 Left BiQuad 3", BIQUAD_SIZE, 0x0a),
+	COEFF_RAM_CTL("Speaker Cascade 1 Left BiQuad 4", BIQUAD_SIZE, 0x0f),
+	COEFF_RAM_CTL("Speaker Cascade 1 Left BiQuad 5", BIQUAD_SIZE, 0x14),
+	COEFF_RAM_CTL("Speaker Cascade 1 Left BiQuad 6", BIQUAD_SIZE, 0x19),
+
+	COEFF_RAM_CTL("Speaker Cascade 1 Right BiQuad 1", BIQUAD_SIZE, 0x20),
+	COEFF_RAM_CTL("Speaker Cascade 1 Right BiQuad 2", BIQUAD_SIZE, 0x25),
+	COEFF_RAM_CTL("Speaker Cascade 1 Right BiQuad 3", BIQUAD_SIZE, 0x2a),
+	COEFF_RAM_CTL("Speaker Cascade 1 Right BiQuad 4", BIQUAD_SIZE, 0x2f),
+	COEFF_RAM_CTL("Speaker Cascade 1 Right BiQuad 5", BIQUAD_SIZE, 0x34),
+	COEFF_RAM_CTL("Speaker Cascade 1 Right BiQuad 6", BIQUAD_SIZE, 0x39),
+
+	COEFF_RAM_CTL("Speaker Cascade 1 Left Prescale", COEFF_SIZE, 0x1f),
+	COEFF_RAM_CTL("Speaker Cascade 1 Right Prescale", COEFF_SIZE, 0x3f),
+
+	COEFF_RAM_CTL("Speaker Cascade 2 Left BiQuad 1", BIQUAD_SIZE, 0x40),
+	COEFF_RAM_CTL("Speaker Cascade 2 Left BiQuad 2", BIQUAD_SIZE, 0x45),
+	COEFF_RAM_CTL("Speaker Cascade 2 Left BiQuad 3", BIQUAD_SIZE, 0x4a),
+	COEFF_RAM_CTL("Speaker Cascade 2 Left BiQuad 4", BIQUAD_SIZE, 0x4f),
+	COEFF_RAM_CTL("Speaker Cascade 2 Left BiQuad 5", BIQUAD_SIZE, 0x54),
+	COEFF_RAM_CTL("Speaker Cascade 2 Left BiQuad 6", BIQUAD_SIZE, 0x59),
+
+	COEFF_RAM_CTL("Speaker Cascade 2 Right BiQuad 1", BIQUAD_SIZE, 0x60),
+	COEFF_RAM_CTL("Speaker Cascade 2 Right BiQuad 2", BIQUAD_SIZE, 0x65),
+	COEFF_RAM_CTL("Speaker Cascade 2 Right BiQuad 3", BIQUAD_SIZE, 0x6a),
+	COEFF_RAM_CTL("Speaker Cascade 2 Right BiQuad 4", BIQUAD_SIZE, 0x6f),
+	COEFF_RAM_CTL("Speaker Cascade 2 Right BiQuad 5", BIQUAD_SIZE, 0x74),
+	COEFF_RAM_CTL("Speaker Cascade 2 Right BiQuad 6", BIQUAD_SIZE, 0x79),
+
+	COEFF_RAM_CTL("Speaker Cascade 2 Left Prescale", COEFF_SIZE, 0x5f),
+	COEFF_RAM_CTL("Speaker Cascade 2 Right Prescale", COEFF_SIZE, 0x7f),
+
+	COEFF_RAM_CTL("Speaker Bass Extraction BiQuad 1", BIQUAD_SIZE, 0x80),
+	COEFF_RAM_CTL("Speaker Bass Extraction BiQuad 2", BIQUAD_SIZE, 0x85),
+
+	COEFF_RAM_CTL("Speaker Bass Non Linear Function 1", COEFF_SIZE, 0x8a),
+	COEFF_RAM_CTL("Speaker Bass Non Linear Function 2", COEFF_SIZE, 0x8b),
+
+	COEFF_RAM_CTL("Speaker Bass Limiter BiQuad", BIQUAD_SIZE, 0x8c),
+
+	COEFF_RAM_CTL("Speaker Bass Cut Off BiQuad", BIQUAD_SIZE, 0x91),
+
+	COEFF_RAM_CTL("Speaker Bass Mix", COEFF_SIZE, 0x96),
+
+	COEFF_RAM_CTL("Speaker Treb Extraction BiQuad 1", BIQUAD_SIZE, 0x97),
+	COEFF_RAM_CTL("Speaker Treb Extraction BiQuad 2", BIQUAD_SIZE, 0x9c),
+
+	COEFF_RAM_CTL("Speaker Treb Non Linear Function 1", COEFF_SIZE, 0xa1),
+	COEFF_RAM_CTL("Speaker Treb Non Linear Function 2", COEFF_SIZE, 0xa2),
+
+	COEFF_RAM_CTL("Speaker Treb Limiter BiQuad", BIQUAD_SIZE, 0xa3),
+
+	COEFF_RAM_CTL("Speaker Treb Cut Off BiQuad", BIQUAD_SIZE, 0xa8),
+
+	COEFF_RAM_CTL("Speaker Treb Mix", COEFF_SIZE, 0xad),
+
+	COEFF_RAM_CTL("Speaker 3D", COEFF_SIZE, 0xae),
+
+	COEFF_RAM_CTL("Speaker 3D Mix", COEFF_SIZE, 0xaf),
+
+	COEFF_RAM_CTL("Speaker MBC 1 BiQuad 1", BIQUAD_SIZE, 0xb0),
+	COEFF_RAM_CTL("Speaker MBC 1 BiQuad 2", BIQUAD_SIZE, 0xb5),
+
+	COEFF_RAM_CTL("Speaker MBC 2 BiQuad 1", BIQUAD_SIZE, 0xba),
+	COEFF_RAM_CTL("Speaker MBC 2 BiQuad 2", BIQUAD_SIZE, 0xbf),
+
+	COEFF_RAM_CTL("Speaker MBC 3 BiQuad 1", BIQUAD_SIZE, 0xc4),
+	COEFF_RAM_CTL("Speaker MBC 3 BiQuad 2", BIQUAD_SIZE, 0xc9),
+
+	COEFF_RAM_CTL("Sub Cascade 1 Left BiQuad 1", BIQUAD_SIZE, 0x00),
+	COEFF_RAM_CTL("Sub Cascade 1 Left BiQuad 2", BIQUAD_SIZE, 0x05),
+	COEFF_RAM_CTL("Sub Cascade 1 Left BiQuad 3", BIQUAD_SIZE, 0x0a),
+	COEFF_RAM_CTL("Sub Cascade 1 Left BiQuad 4", BIQUAD_SIZE, 0x0f),
+	COEFF_RAM_CTL("Sub Cascade 1 Left BiQuad 5", BIQUAD_SIZE, 0x14),
+	COEFF_RAM_CTL("Sub Cascade 1 Left BiQuad 6", BIQUAD_SIZE, 0x19),
+
+	COEFF_RAM_CTL("Sub Cascade 1 Right BiQuad 1", BIQUAD_SIZE, 0x20),
+	COEFF_RAM_CTL("Sub Cascade 1 Right BiQuad 2", BIQUAD_SIZE, 0x25),
+	COEFF_RAM_CTL("Sub Cascade 1 Right BiQuad 3", BIQUAD_SIZE, 0x2a),
+	COEFF_RAM_CTL("Sub Cascade 1 Right BiQuad 4", BIQUAD_SIZE, 0x2f),
+	COEFF_RAM_CTL("Sub Cascade 1 Right BiQuad 5", BIQUAD_SIZE, 0x34),
+	COEFF_RAM_CTL("Sub Cascade 1 Right BiQuad 6", BIQUAD_SIZE, 0x39),
+
+	COEFF_RAM_CTL("Sub Cascade 1 Left Prescale", COEFF_SIZE, 0x1f),
+	COEFF_RAM_CTL("Sub Cascade 1 Right Prescale", COEFF_SIZE, 0x3f),
+
+	COEFF_RAM_CTL("Sub Cascade 2 Left BiQuad 1", BIQUAD_SIZE, 0x40),
+	COEFF_RAM_CTL("Sub Cascade 2 Left BiQuad 2", BIQUAD_SIZE, 0x45),
+	COEFF_RAM_CTL("Sub Cascade 2 Left BiQuad 3", BIQUAD_SIZE, 0x4a),
+	COEFF_RAM_CTL("Sub Cascade 2 Left BiQuad 4", BIQUAD_SIZE, 0x4f),
+	COEFF_RAM_CTL("Sub Cascade 2 Left BiQuad 5", BIQUAD_SIZE, 0x54),
+	COEFF_RAM_CTL("Sub Cascade 2 Left BiQuad 6", BIQUAD_SIZE, 0x59),
+
+	COEFF_RAM_CTL("Sub Cascade 2 Right BiQuad 1", BIQUAD_SIZE, 0x60),
+	COEFF_RAM_CTL("Sub Cascade 2 Right BiQuad 2", BIQUAD_SIZE, 0x65),
+	COEFF_RAM_CTL("Sub Cascade 2 Right BiQuad 3", BIQUAD_SIZE, 0x6a),
+	COEFF_RAM_CTL("Sub Cascade 2 Right BiQuad 4", BIQUAD_SIZE, 0x6f),
+	COEFF_RAM_CTL("Sub Cascade 2 Right BiQuad 5", BIQUAD_SIZE, 0x74),
+	COEFF_RAM_CTL("Sub Cascade 2 Right BiQuad 6", BIQUAD_SIZE, 0x79),
+
+	COEFF_RAM_CTL("Sub Cascade 2 Left Prescale", COEFF_SIZE, 0x5f),
+	COEFF_RAM_CTL("Sub Cascade 2 Right Prescale", COEFF_SIZE, 0x7f),
+
+	COEFF_RAM_CTL("Sub Bass Extraction BiQuad 1", BIQUAD_SIZE, 0x80),
+	COEFF_RAM_CTL("Sub Bass Extraction BiQuad 2", BIQUAD_SIZE, 0x85),
+
+	COEFF_RAM_CTL("Sub Bass Non Linear Function 1", COEFF_SIZE, 0x8a),
+	COEFF_RAM_CTL("Sub Bass Non Linear Function 2", COEFF_SIZE, 0x8b),
+
+	COEFF_RAM_CTL("Sub Bass Limiter BiQuad", BIQUAD_SIZE, 0x8c),
+
+	COEFF_RAM_CTL("Sub Bass Cut Off BiQuad", BIQUAD_SIZE, 0x91),
+
+	COEFF_RAM_CTL("Sub Bass Mix", COEFF_SIZE, 0x96),
+
+	COEFF_RAM_CTL("Sub Treb Extraction BiQuad 1", BIQUAD_SIZE, 0x97),
+	COEFF_RAM_CTL("Sub Treb Extraction BiQuad 2", BIQUAD_SIZE, 0x9c),
+
+	COEFF_RAM_CTL("Sub Treb Non Linear Function 1", COEFF_SIZE, 0xa1),
+	COEFF_RAM_CTL("Sub Treb Non Linear Function 2", COEFF_SIZE, 0xa2),
+
+	COEFF_RAM_CTL("Sub Treb Limiter BiQuad", BIQUAD_SIZE, 0xa3),
+
+	COEFF_RAM_CTL("Sub Treb Cut Off BiQuad", BIQUAD_SIZE, 0xa8),
+
+	COEFF_RAM_CTL("Sub Treb Mix", COEFF_SIZE, 0xad),
+
+	COEFF_RAM_CTL("Sub 3D", COEFF_SIZE, 0xae),
+
+	COEFF_RAM_CTL("Sub 3D Mix", COEFF_SIZE, 0xaf),
+
+	COEFF_RAM_CTL("Sub MBC 1 BiQuad 1", BIQUAD_SIZE, 0xb0),
+	COEFF_RAM_CTL("Sub MBC 1 BiQuad 2", BIQUAD_SIZE, 0xb5),
+
+	COEFF_RAM_CTL("Sub MBC 2 BiQuad 1", BIQUAD_SIZE, 0xba),
+	COEFF_RAM_CTL("Sub MBC 2 BiQuad 2", BIQUAD_SIZE, 0xbf),
+
+	COEFF_RAM_CTL("Sub MBC 3 BiQuad 1", BIQUAD_SIZE, 0xc4),
+	COEFF_RAM_CTL("Sub MBC 3 BiQuad 2", BIQUAD_SIZE, 0xc9),
+};
+
+static struct snd_soc_dapm_widget const tscs454_dapm_widgets[] = {
+	/* R_PLLCTL PG 0 ADDR 0x15 */
+	SND_SOC_DAPM_SUPPLY("PLL 1 Power", R_PLLCTL, FB_PLLCTL_PU_PLL1, 0,
+			pll_power_event,
+			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_PRE_PMD),
+	SND_SOC_DAPM_SUPPLY("PLL 2 Power", R_PLLCTL, FB_PLLCTL_PU_PLL2, 0,
+			pll_power_event,
+			SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_PRE_PMD),
+	/* R_I2SPINC0 PG 0 ADDR 0x22 */
+	SND_SOC_DAPM_AIF_OUT("DAI 3 Out", "DAI 3 Capture", 0,
+			R_I2SPINC0, FB_I2SPINC0_SDO3TRI, 1),
+	SND_SOC_DAPM_AIF_OUT("DAI 2 Out", "DAI 2 Capture", 0,
+			R_I2SPINC0, FB_I2SPINC0_SDO2TRI, 1),
+	SND_SOC_DAPM_AIF_OUT("DAI 1 Out", "DAI 1 Capture", 0,
+			R_I2SPINC0, FB_I2SPINC0_SDO1TRI, 1),
+	/* R_PWRM0 PG 0 ADDR 0x33 */
+	SND_SOC_DAPM_ADC("Input Processor Channel 3", NULL,
+			R_PWRM0, FB_PWRM0_INPROC3PU, 0),
+	SND_SOC_DAPM_ADC("Input Processor Channel 2", NULL,
+			R_PWRM0, FB_PWRM0_INPROC2PU, 0),
+	SND_SOC_DAPM_ADC("Input Processor Channel 1", NULL,
+			R_PWRM0, FB_PWRM0_INPROC1PU, 0),
+	SND_SOC_DAPM_ADC("Input Processor Channel 0", NULL,
+			R_PWRM0, FB_PWRM0_INPROC0PU, 0),
+	SND_SOC_DAPM_SUPPLY("Mic Bias 2",
+			R_PWRM0, FB_PWRM0_MICB2PU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Mic Bias 1", R_PWRM0,
+			FB_PWRM0_MICB1PU, 0, NULL, 0),
+	/* R_PWRM1 PG 0 ADDR 0x34 */
+	SND_SOC_DAPM_SUPPLY("Sub Power", R_PWRM1, FB_PWRM1_SUBPU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Headphone Left Power",
+			R_PWRM1, FB_PWRM1_HPLPU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Headphone Right Power",
+			R_PWRM1, FB_PWRM1_HPRPU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Speaker Left Power",
+			R_PWRM1, FB_PWRM1_SPKLPU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Speaker Right Power",
+			R_PWRM1, FB_PWRM1_SPKRPU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Differential Input 2 Power",
+			R_PWRM1, FB_PWRM1_D2S2PU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Differential Input 1 Power",
+			R_PWRM1, FB_PWRM1_D2S1PU, 0, NULL, 0),
+	/* R_PWRM2 PG 0 ADDR 0x35 */
+	SND_SOC_DAPM_SUPPLY("DAI 3 Out Power",
+			R_PWRM2, FB_PWRM2_I2S3OPU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("DAI 2 Out Power",
+			R_PWRM2, FB_PWRM2_I2S2OPU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("DAI 1 Out Power",
+			R_PWRM2, FB_PWRM2_I2S1OPU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("DAI 3 In Power",
+			R_PWRM2, FB_PWRM2_I2S3IPU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("DAI 2 In Power",
+			R_PWRM2, FB_PWRM2_I2S2IPU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("DAI 1 In Power",
+			R_PWRM2, FB_PWRM2_I2S1IPU, 0, NULL, 0),
+	/* R_PWRM3 PG 0 ADDR 0x36 */
+	SND_SOC_DAPM_SUPPLY("Line Out Left Power",
+			R_PWRM3, FB_PWRM3_LLINEPU, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Line Out Right Power",
+			R_PWRM3, FB_PWRM3_RLINEPU, 0, NULL, 0),
+	/* R_PWRM4 PG 0 ADDR 0x37 */
+	SND_SOC_DAPM_DAC("Sub", NULL, R_PWRM4, FB_PWRM4_OPSUBPU, 0),
+	SND_SOC_DAPM_DAC("DAC Left", NULL, R_PWRM4, FB_PWRM4_OPDACLPU, 0),
+	SND_SOC_DAPM_DAC("DAC Right", NULL, R_PWRM4, FB_PWRM4_OPDACRPU, 0),
+	SND_SOC_DAPM_DAC("ClassD Left", NULL, R_PWRM4, FB_PWRM4_OPSPKLPU, 0),
+	SND_SOC_DAPM_DAC("ClassD Right", NULL, R_PWRM4, FB_PWRM4_OPSPKRPU, 0),
+	/* R_AUDIOMUX1  PG 0 ADDR 0x3A */
+	SND_SOC_DAPM_MUX("DAI 2 Out Mux", SND_SOC_NOPM, 0, 0,
+			&dai2_mux_dapm_enum),
+	SND_SOC_DAPM_MUX("DAI 1 Out Mux", SND_SOC_NOPM, 0, 0,
+			&dai1_mux_dapm_enum),
+	/* R_AUDIOMUX2 PG 0 ADDR 0x3B */
+	SND_SOC_DAPM_MUX("DAC Mux", SND_SOC_NOPM, 0, 0,
+			&dac_mux_dapm_enum),
+	SND_SOC_DAPM_MUX("DAI 3 Out Mux", SND_SOC_NOPM, 0, 0,
+			&dai3_mux_dapm_enum),
+	/* R_AUDIOMUX3 PG 0 ADDR 0x3C */
+	SND_SOC_DAPM_MUX("Sub Mux", SND_SOC_NOPM, 0, 0,
+			&sub_mux_dapm_enum),
+	SND_SOC_DAPM_MUX("Speaker Mux", SND_SOC_NOPM, 0, 0,
+			&classd_mux_dapm_enum),
+	/* R_HSDCTL1 PG 1 ADDR 0x01 */
+	SND_SOC_DAPM_SUPPLY("GHS Detect Power", R_HSDCTL1,
+			FB_HSDCTL1_CON_DET_PWD, 1, NULL, 0),
+	/* R_CH0AIC PG 1 ADDR 0x06 */
+	SND_SOC_DAPM_MUX("Input Boost Channel 0 Mux", SND_SOC_NOPM, 0, 0,
+			&in_bst_mux_ch0_dapm_enum),
+	SND_SOC_DAPM_MUX("ADC Channel 0 Mux", SND_SOC_NOPM, 0, 0,
+			&adc_mux_ch0_dapm_enum),
+	SND_SOC_DAPM_MUX("Input Processor Channel 0 Mux", SND_SOC_NOPM, 0, 0,
+			&in_proc_mux_ch0_dapm_enum),
+	/* R_CH1AIC PG 1 ADDR 0x07 */
+	SND_SOC_DAPM_MUX("Input Boost Channel 1 Mux", SND_SOC_NOPM, 0, 0,
+			&in_bst_mux_ch1_dapm_enum),
+	SND_SOC_DAPM_MUX("ADC Channel 1 Mux", SND_SOC_NOPM, 0, 0,
+			&adc_mux_ch1_dapm_enum),
+	SND_SOC_DAPM_MUX("Input Processor Channel 1 Mux", SND_SOC_NOPM, 0, 0,
+			&in_proc_mux_ch1_dapm_enum),
+	/* Virtual */
+	SND_SOC_DAPM_AIF_IN("DAI 3 In", "DAI 3 Playback", 0,
+			SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_AIF_IN("DAI 2 In", "DAI 2 Playback", 0,
+			SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_AIF_IN("DAI 1 In", "DAI 1 Playback", 0,
+			SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_SUPPLY("PLLs", SND_SOC_NOPM, 0, 0, NULL, 0),
+	SND_SOC_DAPM_OUTPUT("Sub Out"),
+	SND_SOC_DAPM_OUTPUT("Headphone Left"),
+	SND_SOC_DAPM_OUTPUT("Headphone Right"),
+	SND_SOC_DAPM_OUTPUT("Speaker Left"),
+	SND_SOC_DAPM_OUTPUT("Speaker Right"),
+	SND_SOC_DAPM_OUTPUT("Line Out Left"),
+	SND_SOC_DAPM_OUTPUT("Line Out Right"),
+	SND_SOC_DAPM_INPUT("D2S 2"),
+	SND_SOC_DAPM_INPUT("D2S 1"),
+	SND_SOC_DAPM_INPUT("Line In 1 Left"),
+	SND_SOC_DAPM_INPUT("Line In 1 Right"),
+	SND_SOC_DAPM_INPUT("Line In 2 Left"),
+	SND_SOC_DAPM_INPUT("Line In 2 Right"),
+	SND_SOC_DAPM_INPUT("Line In 3 Left"),
+	SND_SOC_DAPM_INPUT("Line In 3 Right"),
+	SND_SOC_DAPM_INPUT("DMic 1"),
+	SND_SOC_DAPM_INPUT("DMic 2"),
+
+	SND_SOC_DAPM_MUX("CH 0_1 Mux", SND_SOC_NOPM, 0, 0,
+			&ch_0_1_mux_dapm_enum),
+	SND_SOC_DAPM_MUX("CH 2_3 Mux", SND_SOC_NOPM, 0, 0,
+			&ch_2_3_mux_dapm_enum),
+	SND_SOC_DAPM_MUX("CH 4_5 Mux", SND_SOC_NOPM, 0, 0,
+			&ch_4_5_mux_dapm_enum),
+};
+
+static struct snd_soc_dapm_route const tscs454_intercon[] = {
+	/* PLLs */
+	{"PLLs", NULL, "PLL 1 Power", pll_connected},
+	{"PLLs", NULL, "PLL 2 Power", pll_connected},
+	/* Inputs */
+	{"DAI 3 In", NULL, "DAI 3 In Power"},
+	{"DAI 2 In", NULL, "DAI 2 In Power"},
+	{"DAI 1 In", NULL, "DAI 1 In Power"},
+	/* Outputs */
+	{"DAI 3 Out", NULL, "DAI 3 Out Power"},
+	{"DAI 2 Out", NULL, "DAI 2 Out Power"},
+	{"DAI 1 Out", NULL, "DAI 1 Out Power"},
+	/* Ch Muxing */
+	{"CH 0_1 Mux", "DAI 1", "DAI 1 In"},
+	{"CH 0_1 Mux", "TDM 0_1", "DAI 1 In"},
+	{"CH 2_3 Mux", "DAI 2", "DAI 2 In"},
+	{"CH 2_3 Mux", "TDM 2_3", "DAI 1 In"},
+	{"CH 4_5 Mux", "DAI 3", "DAI 2 In"},
+	{"CH 4_5 Mux", "TDM 4_5", "DAI 1 In"},
+	/* In/Out Muxing */
+	{"DAI 1 Out Mux", "CH 0_1", "CH 0_1 Mux"},
+	{"DAI 1 Out Mux", "CH 2_3", "CH 2_3 Mux"},
+	{"DAI 1 Out Mux", "CH 4_5", "CH 4_5 Mux"},
+	{"DAI 2 Out Mux", "CH 0_1", "CH 0_1 Mux"},
+	{"DAI 2 Out Mux", "CH 2_3", "CH 2_3 Mux"},
+	{"DAI 2 Out Mux", "CH 4_5", "CH 4_5 Mux"},
+	{"DAI 3 Out Mux", "CH 0_1", "CH 0_1 Mux"},
+	{"DAI 3 Out Mux", "CH 2_3", "CH 2_3 Mux"},
+	{"DAI 3 Out Mux", "CH 4_5", "CH 4_5 Mux"},
+	/******************
+	 * Playback Paths *
+	 ******************/
+	/* DAC Path */
+	{"DAC Mux", "CH 4_5", "CH 4_5 Mux"},
+	{"DAC Mux", "CH 2_3", "CH 2_3 Mux"},
+	{"DAC Mux", "CH 0_1", "CH 0_1 Mux"},
+	{"DAC Left", NULL, "DAC Mux"},
+	{"DAC Right", NULL, "DAC Mux"},
+	{"DAC Left", NULL, "PLLs"},
+	{"DAC Right", NULL, "PLLs"},
+	{"Headphone Left", NULL, "Headphone Left Power"},
+	{"Headphone Right", NULL, "Headphone Right Power"},
+	{"Headphone Left", NULL, "DAC Left"},
+	{"Headphone Right", NULL, "DAC Right"},
+	/* Line Out */
+	{"Line Out Left", NULL, "Line Out Left Power"},
+	{"Line Out Right", NULL, "Line Out Right Power"},
+	{"Line Out Left", NULL, "DAC Left"},
+	{"Line Out Right", NULL, "DAC Right"},
+	/* ClassD Path */
+	{"Speaker Mux", "CH 4_5", "CH 4_5 Mux"},
+	{"Speaker Mux", "CH 2_3", "CH 2_3 Mux"},
+	{"Speaker Mux", "CH 0_1", "CH 0_1 Mux"},
+	{"ClassD Left", NULL, "Speaker Mux"},
+	{"ClassD Right", NULL, "Speaker Mux"},
+	{"ClassD Left", NULL, "PLLs"},
+	{"ClassD Right", NULL, "PLLs"},
+	{"Speaker Left", NULL, "Speaker Left Power"},
+	{"Speaker Right", NULL, "Speaker Right Power"},
+	{"Speaker Left", NULL, "ClassD Left"},
+	{"Speaker Right", NULL, "ClassD Right"},
+	/* Sub Path */
+	{"Sub Mux", "CH 4", "CH 4_5 Mux"},
+	{"Sub Mux", "CH 5", "CH 4_5 Mux"},
+	{"Sub Mux", "CH 4 + 5", "CH 4_5 Mux"},
+	{"Sub Mux", "CH 2", "CH 2_3 Mux"},
+	{"Sub Mux", "CH 3", "CH 2_3 Mux"},
+	{"Sub Mux", "CH 2 + 3", "CH 2_3 Mux"},
+	{"Sub Mux", "CH 0", "CH 0_1 Mux"},
+	{"Sub Mux", "CH 1", "CH 0_1 Mux"},
+	{"Sub Mux", "CH 0 + 1", "CH 0_1 Mux"},
+	{"Sub Mux", "ADC/DMic 1 Left", "Input Processor Channel 0"},
+	{"Sub Mux", "ADC/DMic 1 Right", "Input Processor Channel 1"},
+	{"Sub Mux", "ADC/DMic 1 Left Plus Right", "Input Processor Channel 0"},
+	{"Sub Mux", "ADC/DMic 1 Left Plus Right", "Input Processor Channel 1"},
+	{"Sub Mux", "DMic 2 Left", "DMic 2"},
+	{"Sub Mux", "DMic 2 Right", "DMic 2"},
+	{"Sub Mux", "DMic 2 Left Plus Right", "DMic 2"},
+	{"Sub Mux", "ClassD Left", "ClassD Left"},
+	{"Sub Mux", "ClassD Right", "ClassD Right"},
+	{"Sub Mux", "ClassD Left Plus Right", "ClassD Left"},
+	{"Sub Mux", "ClassD Left Plus Right", "ClassD Right"},
+	{"Sub", NULL, "Sub Mux"},
+	{"Sub", NULL, "PLLs"},
+	{"Sub Out", NULL, "Sub Power"},
+	{"Sub Out", NULL, "Sub"},
+	/*****************
+	 * Capture Paths *
+	 *****************/
+	{"Input Boost Channel 0 Mux", "Input 3", "Line In 3 Left"},
+	{"Input Boost Channel 0 Mux", "Input 2", "Line In 2 Left"},
+	{"Input Boost Channel 0 Mux", "Input 1", "Line In 1 Left"},
+	{"Input Boost Channel 0 Mux", "D2S", "D2S 1"},
+
+	{"Input Boost Channel 1 Mux", "Input 3", "Line In 3 Right"},
+	{"Input Boost Channel 1 Mux", "Input 2", "Line In 2 Right"},
+	{"Input Boost Channel 1 Mux", "Input 1", "Line In 1 Right"},
+	{"Input Boost Channel 1 Mux", "D2S", "D2S 2"},
+
+	{"ADC Channel 0 Mux", "Input 3 Boost Bypass", "Line In 3 Left"},
+	{"ADC Channel 0 Mux", "Input 2 Boost Bypass", "Line In 2 Left"},
+	{"ADC Channel 0 Mux", "Input 1 Boost Bypass", "Line In 1 Left"},
+	{"ADC Channel 0 Mux", "Input Boost", "Input Boost Channel 0 Mux"},
+
+	{"ADC Channel 1 Mux", "Input 3 Boost Bypass", "Line In 3 Right"},
+	{"ADC Channel 1 Mux", "Input 2 Boost Bypass", "Line In 2 Right"},
+	{"ADC Channel 1 Mux", "Input 1 Boost Bypass", "Line In 1 Right"},
+	{"ADC Channel 1 Mux", "Input Boost", "Input Boost Channel 1 Mux"},
+
+	{"Input Processor Channel 0 Mux", "ADC", "ADC Channel 0 Mux"},
+	{"Input Processor Channel 0 Mux", "DMic", "DMic 1"},
+
+	{"Input Processor Channel 0", NULL, "PLLs"},
+	{"Input Processor Channel 0", NULL, "Input Processor Channel 0 Mux"},
+
+	{"Input Processor Channel 1 Mux", "ADC", "ADC Channel 1 Mux"},
+	{"Input Processor Channel 1 Mux", "DMic", "DMic 1"},
+
+	{"Input Processor Channel 1", NULL, "PLLs"},
+	{"Input Processor Channel 1", NULL, "Input Processor Channel 1 Mux"},
+
+	{"Input Processor Channel 2", NULL, "PLLs"},
+	{"Input Processor Channel 2", NULL, "DMic 2"},
+
+	{"Input Processor Channel 3", NULL, "PLLs"},
+	{"Input Processor Channel 3", NULL, "DMic 2"},
+
+	{"DAI 1 Out Mux", "ADC/DMic 1", "Input Processor Channel 0"},
+	{"DAI 1 Out Mux", "ADC/DMic 1", "Input Processor Channel 1"},
+	{"DAI 1 Out Mux", "DMic 2", "Input Processor Channel 2"},
+	{"DAI 1 Out Mux", "DMic 2", "Input Processor Channel 3"},
+
+	{"DAI 2 Out Mux", "ADC/DMic 1", "Input Processor Channel 0"},
+	{"DAI 2 Out Mux", "ADC/DMic 1", "Input Processor Channel 1"},
+	{"DAI 2 Out Mux", "DMic 2", "Input Processor Channel 2"},
+	{"DAI 2 Out Mux", "DMic 2", "Input Processor Channel 3"},
+
+	{"DAI 3 Out Mux", "ADC/DMic 1", "Input Processor Channel 0"},
+	{"DAI 3 Out Mux", "ADC/DMic 1", "Input Processor Channel 1"},
+	{"DAI 3 Out Mux", "DMic 2", "Input Processor Channel 2"},
+	{"DAI 3 Out Mux", "DMic 2", "Input Processor Channel 3"},
+
+	{"DAI 1 Out", NULL, "DAI 1 Out Mux"},
+	{"DAI 2 Out", NULL, "DAI 2 Out Mux"},
+	{"DAI 3 Out", NULL, "DAI 3 Out Mux"},
+};
+
+/* This is used when BCLK is sourcing the PLLs */
+static int tscs454_set_sysclk(struct snd_soc_dai *dai,
+		int clk_id, unsigned int freq, int dir)
+{
+	struct snd_soc_component *component = dai->component;
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+	unsigned int val;
+	int bclk_dai;
+	int ret;
+
+	dev_dbg(component->dev, "%s(): freq = %u\n", __func__, freq);
+
+	ret = snd_soc_component_read(component, R_PLLCTL, &val);
+	if (ret < 0)
+		return ret;
+
+	bclk_dai = (val & FM_PLLCTL_BCLKSEL) >> FB_PLLCTL_BCLKSEL;
+	if (bclk_dai != dai->id)
+		return 0;
+
+	tscs454->bclk_freq = freq;
+	return set_sysclk(component);
+}
+
+static int tscs454_set_bclk_ratio(struct snd_soc_dai *dai,
+		unsigned int ratio)
+{
+	unsigned int mask;
+	int ret;
+	struct snd_soc_component *component = dai->component;
+	unsigned int val;
+	int shift;
+
+	dev_dbg(component->dev, "set_bclk_ratio() id = %d ratio = %u\n",
+			dai->id, ratio);
+
+	switch (dai->id) {
+	case TSCS454_DAI1_ID:
+		mask = FM_I2SCMC_BCMP1;
+		shift = FB_I2SCMC_BCMP1;
+		break;
+	case TSCS454_DAI2_ID:
+		mask = FM_I2SCMC_BCMP2;
+		shift = FB_I2SCMC_BCMP2;
+		break;
+	case TSCS454_DAI3_ID:
+		mask = FM_I2SCMC_BCMP3;
+		shift = FB_I2SCMC_BCMP3;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Unknown audio interface (%d)\n", ret);
+		return ret;
+	}
+
+	switch (ratio) {
+	case 32:
+		val = I2SCMC_BCMP_32X;
+		break;
+	case 40:
+		val = I2SCMC_BCMP_40X;
+		break;
+	case 64:
+		val = I2SCMC_BCMP_64X;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Unsupported bclk ratio (%d)\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_component_update_bits(component,
+			R_I2SCMC, mask, val << shift);
+	if (ret < 0) {
+		dev_err(component->dev,
+				"Failed to set DAI BCLK ratio (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static inline int set_aif_master_from_fmt(struct snd_soc_component *component,
+		struct aif *aif, unsigned int fmt)
+{
+	int ret;
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		aif->master = true;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		aif->master = false;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Unsupported format (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static inline int set_aif_tdm_delay(struct snd_soc_component *component,
+		unsigned int dai_id, bool delay)
+{
+	unsigned int reg;
+	int ret;
+
+	switch (dai_id) {
+	case TSCS454_DAI1_ID:
+		reg = R_TDMCTL0;
+		break;
+	case TSCS454_DAI2_ID:
+		reg = R_PCMP2CTL0;
+		break;
+	case TSCS454_DAI3_ID:
+		reg = R_PCMP3CTL0;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev,
+				"DAI %d unknown (%d)\n", dai_id + 1, ret);
+		return ret;
+	}
+	ret = snd_soc_component_update_bits(component,
+			reg, FM_TDMCTL0_BDELAY, delay);
+	if (ret < 0) {
+		dev_err(component->dev, "Failed to setup tdm format (%d)\n",
+				ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static inline int set_aif_format_from_fmt(struct snd_soc_component *component,
+		unsigned int dai_id, unsigned int fmt)
+{
+	unsigned int reg;
+	unsigned int val;
+	int ret;
+
+	switch (dai_id) {
+	case TSCS454_DAI1_ID:
+		reg = R_I2SP1CTL;
+		break;
+	case TSCS454_DAI2_ID:
+		reg = R_I2SP2CTL;
+		break;
+	case TSCS454_DAI3_ID:
+		reg = R_I2SP3CTL;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev,
+				"DAI %d unknown (%d)\n", dai_id + 1, ret);
+		return ret;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_RIGHT_J:
+		val = FV_FORMAT_RIGHT;
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+		val = FV_FORMAT_LEFT;
+		break;
+	case SND_SOC_DAIFMT_I2S:
+		val = FV_FORMAT_I2S;
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		ret = set_aif_tdm_delay(component, dai_id, true);
+		if (ret < 0)
+			return ret;
+		val = FV_FORMAT_TDM;
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		ret = set_aif_tdm_delay(component, dai_id, false);
+		if (ret < 0)
+			return ret;
+		val = FV_FORMAT_TDM;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Format unsupported (%d)\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_component_update_bits(component,
+			reg, FM_I2SPCTL_FORMAT, val);
+	if (ret < 0) {
+		dev_err(component->dev, "Failed to set DAI %d format (%d)\n",
+				dai_id + 1, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static inline int
+set_aif_clock_format_from_fmt(struct snd_soc_component *component,
+		unsigned int dai_id, unsigned int fmt)
+{
+	unsigned int reg;
+	unsigned int val;
+	int ret;
+
+	switch (dai_id) {
+	case TSCS454_DAI1_ID:
+		reg = R_I2SP1CTL;
+		break;
+	case TSCS454_DAI2_ID:
+		reg = R_I2SP2CTL;
+		break;
+	case TSCS454_DAI3_ID:
+		reg = R_I2SP3CTL;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev,
+				"DAI %d unknown (%d)\n", dai_id + 1, ret);
+		return ret;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		val = FV_BCLKP_NOT_INVERTED | FV_LRCLKP_NOT_INVERTED;
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		val = FV_BCLKP_NOT_INVERTED | FV_LRCLKP_INVERTED;
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		val = FV_BCLKP_INVERTED | FV_LRCLKP_NOT_INVERTED;
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		val = FV_BCLKP_INVERTED | FV_LRCLKP_INVERTED;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Format unknown (%d)\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_component_update_bits(component, reg,
+			FM_I2SPCTL_BCLKP | FM_I2SPCTL_LRCLKP, val);
+	if (ret < 0) {
+		dev_err(component->dev,
+				"Failed to set clock polarity for DAI%d (%d)\n",
+				dai_id + 1, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int tscs454_set_dai_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+{
+	struct snd_soc_component *component = dai->component;
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+	struct aif *aif = &tscs454->aifs[dai->id];
+	int ret;
+
+	ret = set_aif_master_from_fmt(component, aif, fmt);
+	if (ret < 0)
+		return ret;
+
+	ret = set_aif_format_from_fmt(component, dai->id, fmt);
+	if (ret < 0)
+		return ret;
+
+	ret = set_aif_clock_format_from_fmt(component, dai->id, fmt);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int tscs454_dai1_set_tdm_slot(struct snd_soc_dai *dai,
+		unsigned int tx_mask, unsigned int rx_mask, int slots,
+		int slot_width)
+{
+	struct snd_soc_component *component = dai->component;
+	unsigned int val;
+	int ret;
+
+	if (!slots)
+		return 0;
+
+	if (tx_mask >= (1 << slots) || rx_mask >= (1 << slots)) {
+		ret = -EINVAL;
+		dev_err(component->dev, "Invalid TDM slot mask (%d)\n", ret);
+		return ret;
+	}
+
+	switch (slots) {
+	case 2:
+		val = FV_TDMSO_2 | FV_TDMSI_2;
+		break;
+	case 4:
+		val = FV_TDMSO_4 | FV_TDMSI_4;
+		break;
+	case 6:
+		val = FV_TDMSO_6 | FV_TDMSI_6;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Invalid number of slots (%d)\n", ret);
+		return ret;
+	}
+
+	switch (slot_width) {
+	case 16:
+		val = val | FV_TDMDSS_16;
+		break;
+	case 24:
+		val = val | FV_TDMDSS_24;
+		break;
+	case 32:
+		val = val | FV_TDMDSS_32;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Invalid TDM slot width (%d)\n", ret);
+		return ret;
+	}
+	ret = snd_soc_component_write(component, R_TDMCTL1, val);
+	if (ret < 0) {
+		dev_err(component->dev, "Failed to set slots (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int tscs454_dai23_set_tdm_slot(struct snd_soc_dai *dai,
+		unsigned int tx_mask, unsigned int rx_mask, int slots,
+		int slot_width)
+{
+	struct snd_soc_component *component = dai->component;
+	unsigned int reg;
+	unsigned int val;
+	int ret;
+
+	if (!slots)
+		return 0;
+
+	if (tx_mask >= (1 << slots) || rx_mask >= (1 << slots)) {
+		ret = -EINVAL;
+		dev_err(component->dev, "Invalid TDM slot mask (%d)\n", ret);
+		return ret;
+	}
+
+	switch (dai->id) {
+	case TSCS454_DAI2_ID:
+		reg = R_PCMP2CTL1;
+		break;
+	case TSCS454_DAI3_ID:
+		reg = R_PCMP3CTL1;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Unrecognized interface %d (%d)\n",
+				dai->id, ret);
+		return ret;
+	}
+
+	switch (slots) {
+	case 1:
+		val = FV_PCMSOP_1 | FV_PCMSIP_1;
+		break;
+	case 2:
+		val = FV_PCMSOP_2 | FV_PCMSIP_2;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Invalid number of slots (%d)\n", ret);
+		return ret;
+	}
+
+	switch (slot_width) {
+	case 16:
+		val = val | FV_PCMDSSP_16;
+		break;
+	case 24:
+		val = val | FV_PCMDSSP_24;
+		break;
+	case 32:
+		val = val | FV_PCMDSSP_32;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Invalid TDM slot width (%d)\n", ret);
+		return ret;
+	}
+	ret = snd_soc_component_write(component, reg, val);
+	if (ret < 0) {
+		dev_err(component->dev, "Failed to set slots (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int set_aif_fs(struct snd_soc_component *component,
+		unsigned int id,
+		unsigned int rate)
+{
+	unsigned int reg;
+	unsigned int br;
+	unsigned int bm;
+	int ret;
+
+	switch (rate) {
+	case 8000:
+		br = FV_I2SMBR_32;
+		bm = FV_I2SMBM_0PT25;
+		break;
+	case 16000:
+		br = FV_I2SMBR_32;
+		bm = FV_I2SMBM_0PT5;
+		break;
+	case 24000:
+		br = FV_I2SMBR_48;
+		bm = FV_I2SMBM_0PT5;
+		break;
+	case 32000:
+		br = FV_I2SMBR_32;
+		bm = FV_I2SMBM_1;
+		break;
+	case 48000:
+		br = FV_I2SMBR_48;
+		bm = FV_I2SMBM_1;
+		break;
+	case 96000:
+		br = FV_I2SMBR_48;
+		bm = FV_I2SMBM_2;
+		break;
+	case 11025:
+		br = FV_I2SMBR_44PT1;
+		bm = FV_I2SMBM_0PT25;
+		break;
+	case 22050:
+		br = FV_I2SMBR_44PT1;
+		bm = FV_I2SMBM_0PT5;
+		break;
+	case 44100:
+		br = FV_I2SMBR_44PT1;
+		bm = FV_I2SMBM_1;
+		break;
+	case 88200:
+		br = FV_I2SMBR_44PT1;
+		bm = FV_I2SMBM_2;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Unsupported sample rate (%d)\n", ret);
+		return ret;
+	}
+
+	switch (id) {
+	case TSCS454_DAI1_ID:
+		reg = R_I2S1MRATE;
+		break;
+	case TSCS454_DAI2_ID:
+		reg = R_I2S2MRATE;
+		break;
+	case TSCS454_DAI3_ID:
+		reg = R_I2S3MRATE;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "DAI ID not recognized (%d)\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_component_update_bits(component, reg,
+			FM_I2SMRATE_I2SMBR | FM_I2SMRATE_I2SMBM, br|bm);
+	if (ret < 0) {
+		dev_err(component->dev,
+				"Failed to update register (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int set_aif_sample_format(struct snd_soc_component *component,
+		snd_pcm_format_t format,
+		int aif_id)
+{
+	unsigned int reg;
+	unsigned int width;
+	int ret;
+
+	switch (format) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		width = FV_WL_16;
+		break;
+	case SNDRV_PCM_FORMAT_S20_3LE:
+		width = FV_WL_20;
+		break;
+	case SNDRV_PCM_FORMAT_S24_3LE:
+		width = FV_WL_24;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+	case SNDRV_PCM_FORMAT_S32_LE:
+		width = FV_WL_32;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Unsupported format width (%d)\n", ret);
+		return ret;
+	}
+
+	switch (aif_id) {
+	case TSCS454_DAI1_ID:
+		reg = R_I2SP1CTL;
+		break;
+	case TSCS454_DAI2_ID:
+		reg = R_I2SP2CTL;
+		break;
+	case TSCS454_DAI3_ID:
+		reg = R_I2SP3CTL;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "AIF ID not recognized (%d)\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_component_update_bits(component,
+			reg, FM_I2SPCTL_WL, width);
+	if (ret < 0) {
+		dev_err(component->dev,
+				"Failed to set sample width (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int tscs454_hw_params(struct snd_pcm_substream *substream,
+		struct snd_pcm_hw_params *params,
+		struct snd_soc_dai *dai)
+{
+	struct snd_soc_component *component = dai->component;
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+	unsigned int fs = params_rate(params);
+	struct aif *aif = &tscs454->aifs[dai->id];
+	unsigned int val;
+	int ret;
+
+	mutex_lock(&tscs454->aifs_status_lock);
+
+	dev_dbg(component->dev, "%s(): aif %d fs = %u\n", __func__,
+			aif->id, fs);
+
+	if (!aif_active(&tscs454->aifs_status, aif->id)) {
+		if (PLL_44_1K_RATE % fs)
+			aif->pll = &tscs454->pll1;
+		else
+			aif->pll = &tscs454->pll2;
+
+		dev_dbg(component->dev, "Reserving pll %d for aif %d\n",
+				aif->pll->id, aif->id);
+
+		reserve_pll(aif->pll);
+	}
+
+	if (!aifs_active(&tscs454->aifs_status)) { /* First active aif */
+		ret = snd_soc_component_read(component, R_ISRC, &val);
+		if (ret < 0)
+			goto exit;
+
+		if ((val & FM_ISRC_IBR) == FV_IBR_48)
+			tscs454->internal_rate.pll = &tscs454->pll1;
+		else
+			tscs454->internal_rate.pll = &tscs454->pll2;
+
+		dev_dbg(component->dev, "Reserving pll %d for ir\n",
+				tscs454->internal_rate.pll->id);
+
+		reserve_pll(tscs454->internal_rate.pll);
+	}
+
+	ret = set_aif_fs(component, aif->id, fs);
+	if (ret < 0) {
+		dev_err(component->dev, "Failed to set aif fs (%d)\n", ret);
+		goto exit;
+	}
+
+	ret = set_aif_sample_format(component, params_format(params), aif->id);
+	if (ret < 0) {
+		dev_err(component->dev,
+				"Failed to set aif sample format (%d)\n", ret);
+		goto exit;
+	}
+
+	set_aif_status_active(&tscs454->aifs_status, aif->id,
+			substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
+
+	dev_dbg(component->dev, "Set aif %d active. Streams status is 0x%x\n",
+		aif->id, tscs454->aifs_status.streams);
+
+	ret = 0;
+exit:
+	mutex_unlock(&tscs454->aifs_status_lock);
+
+	return ret;
+}
+
+static int tscs454_hw_free(struct snd_pcm_substream *substream,
+		struct snd_soc_dai *dai)
+{
+	struct snd_soc_component *component = dai->component;
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+	struct aif *aif = &tscs454->aifs[dai->id];
+
+	return aif_free(component, aif,
+			substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
+}
+
+static int tscs454_prepare(struct snd_pcm_substream *substream,
+		struct snd_soc_dai *dai)
+{
+	int ret;
+	struct snd_soc_component *component = dai->component;
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+	struct aif *aif = &tscs454->aifs[dai->id];
+
+	ret = aif_prepare(component, aif);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static struct snd_soc_dai_ops const tscs454_dai1_ops = {
+	.set_sysclk	= tscs454_set_sysclk,
+	.set_bclk_ratio = tscs454_set_bclk_ratio,
+	.set_fmt	= tscs454_set_dai_fmt,
+	.set_tdm_slot	= tscs454_dai1_set_tdm_slot,
+	.hw_params	= tscs454_hw_params,
+	.hw_free	= tscs454_hw_free,
+	.prepare	= tscs454_prepare,
+};
+
+static struct snd_soc_dai_ops const tscs454_dai23_ops = {
+	.set_sysclk	= tscs454_set_sysclk,
+	.set_bclk_ratio = tscs454_set_bclk_ratio,
+	.set_fmt	= tscs454_set_dai_fmt,
+	.set_tdm_slot	= tscs454_dai23_set_tdm_slot,
+	.hw_params	= tscs454_hw_params,
+	.hw_free	= tscs454_hw_free,
+	.prepare	= tscs454_prepare,
+};
+
+static int tscs454_probe(struct snd_soc_component *component)
+{
+	struct tscs454 *tscs454 = snd_soc_component_get_drvdata(component);
+	unsigned int val;
+	int ret = 0;
+
+	switch (tscs454->sysclk_src_id) {
+	case PLL_INPUT_XTAL:
+		val = FV_PLLISEL_XTAL;
+		break;
+	case PLL_INPUT_MCLK1:
+		val = FV_PLLISEL_MCLK1;
+		break;
+	case PLL_INPUT_MCLK2:
+		val = FV_PLLISEL_MCLK2;
+		break;
+	case PLL_INPUT_BCLK:
+		val = FV_PLLISEL_BCLK;
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(component->dev, "Invalid sysclk src id (%d)\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_component_update_bits(component, R_PLLCTL,
+			FM_PLLCTL_PLLISEL, val);
+	if (ret < 0) {
+		dev_err(component->dev, "Failed to set PLL input (%d)\n", ret);
+		return ret;
+	}
+
+	if (tscs454->sysclk_src_id < PLL_INPUT_BCLK)
+		ret = set_sysclk(component);
+
+	return ret;
+}
+
+static const struct snd_soc_component_driver soc_component_dev_tscs454 = {
+	.probe =	tscs454_probe,
+	.dapm_widgets = tscs454_dapm_widgets,
+	.num_dapm_widgets = ARRAY_SIZE(tscs454_dapm_widgets),
+	.dapm_routes = tscs454_intercon,
+	.num_dapm_routes = ARRAY_SIZE(tscs454_intercon),
+	.controls =	tscs454_snd_controls,
+	.num_controls = ARRAY_SIZE(tscs454_snd_controls),
+};
+
+#define TSCS454_RATES SNDRV_PCM_RATE_8000_96000
+
+#define TSCS454_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE \
+	| SNDRV_PCM_FMTBIT_S24_3LE | SNDRV_PCM_FMTBIT_S24_LE \
+	| SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_driver tscs454_dais[] = {
+	{
+		.name = "tscs454-dai1",
+		.id = TSCS454_DAI1_ID,
+		.playback = {
+			.stream_name = "DAI 1 Playback",
+			.channels_min = 1,
+			.channels_max = 6,
+			.rates = TSCS454_RATES,
+			.formats = TSCS454_FORMATS,},
+		.capture = {
+			.stream_name = "DAI 1 Capture",
+			.channels_min = 1,
+			.channels_max = 6,
+			.rates = TSCS454_RATES,
+			.formats = TSCS454_FORMATS,},
+		.ops = &tscs454_dai1_ops,
+		.symmetric_rates = 1,
+		.symmetric_channels = 1,
+		.symmetric_samplebits = 1,
+	},
+	{
+		.name = "tscs454-dai2",
+		.id = TSCS454_DAI2_ID,
+		.playback = {
+			.stream_name = "DAI 2 Playback",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = TSCS454_RATES,
+			.formats = TSCS454_FORMATS,},
+		.capture = {
+			.stream_name = "DAI 2 Capture",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = TSCS454_RATES,
+			.formats = TSCS454_FORMATS,},
+		.ops = &tscs454_dai23_ops,
+		.symmetric_rates = 1,
+		.symmetric_channels = 1,
+		.symmetric_samplebits = 1,
+	},
+	{
+		.name = "tscs454-dai3",
+		.id = TSCS454_DAI3_ID,
+		.playback = {
+			.stream_name = "DAI 3 Playback",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = TSCS454_RATES,
+			.formats = TSCS454_FORMATS,},
+		.capture = {
+			.stream_name = "DAI 3 Capture",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = TSCS454_RATES,
+			.formats = TSCS454_FORMATS,},
+		.ops = &tscs454_dai23_ops,
+		.symmetric_rates = 1,
+		.symmetric_channels = 1,
+		.symmetric_samplebits = 1,
+	},
+};
+
+static char const * const src_names[] = {
+	"xtal", "mclk1", "mclk2", "bclk"};
+
+static int tscs454_i2c_probe(struct i2c_client *i2c,
+		const struct i2c_device_id *id)
+{
+	struct tscs454 *tscs454;
+	int src;
+	int ret;
+
+	tscs454 = devm_kzalloc(&i2c->dev, sizeof(*tscs454), GFP_KERNEL);
+	if (!tscs454)
+		return -ENOMEM;
+
+	ret = tscs454_data_init(tscs454, i2c);
+	if (ret < 0)
+		return ret;
+
+	i2c_set_clientdata(i2c, tscs454);
+
+	for (src = PLL_INPUT_XTAL; src < PLL_INPUT_BCLK; src++) {
+		tscs454->sysclk = devm_clk_get(&i2c->dev, src_names[src]);
+		if (!IS_ERR(tscs454->sysclk)) {
+			break;
+		} else if (PTR_ERR(tscs454->sysclk) != -ENOENT) {
+			ret = PTR_ERR(tscs454->sysclk);
+			dev_err(&i2c->dev, "Failed to get sysclk (%d)\n", ret);
+			return ret;
+		}
+	}
+	dev_dbg(&i2c->dev, "PLL input is %s\n", src_names[src]);
+	tscs454->sysclk_src_id = src;
+
+	ret = regmap_write(tscs454->regmap,
+			R_RESET, FV_RESET_PWR_ON_DEFAULTS);
+	if (ret < 0) {
+		dev_err(&i2c->dev, "Failed to reset the component (%d)\n", ret);
+		return ret;
+	}
+	regcache_mark_dirty(tscs454->regmap);
+
+	ret = regmap_register_patch(tscs454->regmap, tscs454_patch,
+			ARRAY_SIZE(tscs454_patch));
+	if (ret < 0) {
+		dev_err(&i2c->dev, "Failed to apply patch (%d)\n", ret);
+		return ret;
+	}
+	/* Sync pg sel reg with cache */
+	regmap_write(tscs454->regmap, R_PAGESEL, 0x00);
+
+	ret = snd_soc_register_component(&i2c->dev, &soc_component_dev_tscs454,
+			tscs454_dais, ARRAY_SIZE(tscs454_dais));
+	if (ret) {
+		dev_err(&i2c->dev, "Failed to register component (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct i2c_device_id tscs454_i2c_id[] = {
+	{ "tscs454", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tscs454_i2c_id);
+
+static const struct of_device_id tscs454_of_match[] = {
+	{ .compatible = "tempo,tscs454", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tscs454_of_match);
+
+static struct i2c_driver tscs454_i2c_driver = {
+	.driver = {
+		.name = "tscs454",
+		.of_match_table = tscs454_of_match,
+	},
+	.probe =    tscs454_i2c_probe,
+	.id_table = tscs454_i2c_id,
+};
+
+module_i2c_driver(tscs454_i2c_driver);
+
+MODULE_AUTHOR("Tempo Semiconductor <steven.eckhoff.opensource@gmail.com");
+MODULE_DESCRIPTION("ASoC TSCS454 driver");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/codecs/tscs454.h b/sound/soc/codecs/tscs454.h
new file mode 100644
index 0000000..1142d73
--- /dev/null
+++ b/sound/soc/codecs/tscs454.h
@@ -0,0 +1,2323 @@
+// SPDX-License-Identifier: GPL-2.0
+// tscs454.h -- TSCS454 ALSA SoC Audio driver
+// Copyright 2018 Tempo Semiconductor, Inc.
+// Author: Steven Eckhoff <steven.eckhoff.opensource@gmail.com>
+
+#ifndef __REDWOODPUBLIC_H__
+#define __REDWOODPUBLIC_H__
+
+#define VIRT_BASE 0x00
+#define PAGE_LEN 0x100
+#define VIRT_PAGE_BASE(page) (VIRT_BASE + (PAGE_LEN * page))
+#define VIRT_ADDR(page, address) (VIRT_PAGE_BASE(page) + address)
+#define ADDR(page, virt_address) (virt_address - VIRT_PAGE_BASE(page))
+
+#define R_PAGESEL                       0x0
+#define R_RESET                         VIRT_ADDR(0x0, 0x1)
+#define R_IRQEN                         VIRT_ADDR(0x0, 0x2)
+#define R_IRQMASK                       VIRT_ADDR(0x0, 0x3)
+#define R_IRQSTAT                       VIRT_ADDR(0x0, 0x4)
+#define R_DEVADD0                       VIRT_ADDR(0x0, 0x6)
+#define R_DEVID                         VIRT_ADDR(0x0, 0x8)
+#define R_DEVREV                        VIRT_ADDR(0x0, 0x9)
+#define R_PLLSTAT                       VIRT_ADDR(0x0, 0x0A)
+#define R_PLL1CTL                       VIRT_ADDR(0x0, 0x0B)
+#define R_PLL1RDIV                      VIRT_ADDR(0x0, 0x0C)
+#define R_PLL1ODIV                      VIRT_ADDR(0x0, 0x0D)
+#define R_PLL1FDIVL                     VIRT_ADDR(0x0, 0x0E)
+#define R_PLL1FDIVH                     VIRT_ADDR(0x0, 0x0F)
+#define R_PLL2CTL                       VIRT_ADDR(0x0, 0x10)
+#define R_PLL2RDIV                      VIRT_ADDR(0x0, 0x11)
+#define R_PLL2ODIV                      VIRT_ADDR(0x0, 0x12)
+#define R_PLL2FDIVL                     VIRT_ADDR(0x0, 0x13)
+#define R_PLL2FDIVH                     VIRT_ADDR(0x0, 0x14)
+#define R_PLLCTL                        VIRT_ADDR(0x0, 0x15)
+#define R_ISRC                          VIRT_ADDR(0x0, 0x16)
+#define R_SCLKCTL                       VIRT_ADDR(0x0, 0x18)
+#define R_TIMEBASE                      VIRT_ADDR(0x0, 0x19)
+#define R_I2SP1CTL                      VIRT_ADDR(0x0, 0x1A)
+#define R_I2SP2CTL                      VIRT_ADDR(0x0, 0x1B)
+#define R_I2SP3CTL                      VIRT_ADDR(0x0, 0x1C)
+#define R_I2S1MRATE                     VIRT_ADDR(0x0, 0x1D)
+#define R_I2S2MRATE                     VIRT_ADDR(0x0, 0x1E)
+#define R_I2S3MRATE                     VIRT_ADDR(0x0, 0x1F)
+#define R_I2SCMC                        VIRT_ADDR(0x0, 0x20)
+#define R_MCLK2PINC                     VIRT_ADDR(0x0, 0x21)
+#define R_I2SPINC0                      VIRT_ADDR(0x0, 0x22)
+#define R_I2SPINC1                      VIRT_ADDR(0x0, 0x23)
+#define R_I2SPINC2                      VIRT_ADDR(0x0, 0x24)
+#define R_GPIOCTL0                      VIRT_ADDR(0x0, 0x25)
+#define R_GPIOCTL1                      VIRT_ADDR(0x0, 0x26)
+#define R_ASRC                          VIRT_ADDR(0x0, 0x28)
+#define R_TDMCTL0                       VIRT_ADDR(0x0, 0x2D)
+#define R_TDMCTL1                       VIRT_ADDR(0x0, 0x2E)
+#define R_PCMP2CTL0                     VIRT_ADDR(0x0, 0x2F)
+#define R_PCMP2CTL1                     VIRT_ADDR(0x0, 0x30)
+#define R_PCMP3CTL0                     VIRT_ADDR(0x0, 0x31)
+#define R_PCMP3CTL1                     VIRT_ADDR(0x0, 0x32)
+#define R_PWRM0                         VIRT_ADDR(0x0, 0x33)
+#define R_PWRM1                         VIRT_ADDR(0x0, 0x34)
+#define R_PWRM2                         VIRT_ADDR(0x0, 0x35)
+#define R_PWRM3                         VIRT_ADDR(0x0, 0x36)
+#define R_PWRM4                         VIRT_ADDR(0x0, 0x37)
+#define R_I2SIDCTL                      VIRT_ADDR(0x0, 0x38)
+#define R_I2SODCTL                      VIRT_ADDR(0x0, 0x39)
+#define R_AUDIOMUX1                     VIRT_ADDR(0x0, 0x3A)
+#define R_AUDIOMUX2                     VIRT_ADDR(0x0, 0x3B)
+#define R_AUDIOMUX3                     VIRT_ADDR(0x0, 0x3C)
+#define R_HSDCTL1                       VIRT_ADDR(0x1, 0x1)
+#define R_HSDCTL2                       VIRT_ADDR(0x1, 0x2)
+#define R_HSDSTAT                       VIRT_ADDR(0x1, 0x3)
+#define R_HSDDELAY                      VIRT_ADDR(0x1, 0x4)
+#define R_BUTCTL                        VIRT_ADDR(0x1, 0x5)
+#define R_CH0AIC                        VIRT_ADDR(0x1, 0x6)
+#define R_CH1AIC                        VIRT_ADDR(0x1, 0x7)
+#define R_CH2AIC                        VIRT_ADDR(0x1, 0x8)
+#define R_CH3AIC                        VIRT_ADDR(0x1, 0x9)
+#define R_ICTL0                         VIRT_ADDR(0x1, 0x0A)
+#define R_ICTL1                         VIRT_ADDR(0x1, 0x0B)
+#define R_MICBIAS                       VIRT_ADDR(0x1, 0x0C)
+#define R_PGACTL0                       VIRT_ADDR(0x1, 0x0D)
+#define R_PGACTL1                       VIRT_ADDR(0x1, 0x0E)
+#define R_PGACTL2                       VIRT_ADDR(0x1, 0x0F)
+#define R_PGACTL3                       VIRT_ADDR(0x1, 0x10)
+#define R_PGAZ                          VIRT_ADDR(0x1, 0x11)
+#define R_ICH0VOL                       VIRT_ADDR(0x1, 0x12)
+#define R_ICH1VOL                       VIRT_ADDR(0x1, 0x13)
+#define R_ICH2VOL                       VIRT_ADDR(0x1, 0x14)
+#define R_ICH3VOL                       VIRT_ADDR(0x1, 0x15)
+#define R_ASRCILVOL                     VIRT_ADDR(0x1, 0x16)
+#define R_ASRCIRVOL                     VIRT_ADDR(0x1, 0x17)
+#define R_ASRCOLVOL                     VIRT_ADDR(0x1, 0x18)
+#define R_ASRCORVOL                     VIRT_ADDR(0x1, 0x19)
+#define R_IVOLCTLU                      VIRT_ADDR(0x1, 0x1C)
+#define R_ALCCTL0                       VIRT_ADDR(0x1, 0x1D)
+#define R_ALCCTL1                       VIRT_ADDR(0x1, 0x1E)
+#define R_ALCCTL2                       VIRT_ADDR(0x1, 0x1F)
+#define R_ALCCTL3                       VIRT_ADDR(0x1, 0x20)
+#define R_NGATE                         VIRT_ADDR(0x1, 0x21)
+#define R_DMICCTL                       VIRT_ADDR(0x1, 0x22)
+#define R_DACCTL                        VIRT_ADDR(0x2, 0x1)
+#define R_SPKCTL                        VIRT_ADDR(0x2, 0x2)
+#define R_SUBCTL                        VIRT_ADDR(0x2, 0x3)
+#define R_DCCTL                         VIRT_ADDR(0x2, 0x4)
+#define R_OVOLCTLU                      VIRT_ADDR(0x2, 0x6)
+#define R_MUTEC                         VIRT_ADDR(0x2, 0x7)
+#define R_MVOLL                         VIRT_ADDR(0x2, 0x8)
+#define R_MVOLR                         VIRT_ADDR(0x2, 0x9)
+#define R_HPVOLL                        VIRT_ADDR(0x2, 0x0A)
+#define R_HPVOLR                        VIRT_ADDR(0x2, 0x0B)
+#define R_SPKVOLL                       VIRT_ADDR(0x2, 0x0C)
+#define R_SPKVOLR                       VIRT_ADDR(0x2, 0x0D)
+#define R_SUBVOL                        VIRT_ADDR(0x2, 0x10)
+#define R_COP0                          VIRT_ADDR(0x2, 0x11)
+#define R_COP1                          VIRT_ADDR(0x2, 0x12)
+#define R_COPSTAT                       VIRT_ADDR(0x2, 0x13)
+#define R_PWM0                          VIRT_ADDR(0x2, 0x14)
+#define R_PWM1                          VIRT_ADDR(0x2, 0x15)
+#define R_PWM2                          VIRT_ADDR(0x2, 0x16)
+#define R_PWM3                          VIRT_ADDR(0x2, 0x17)
+#define R_HPSW                          VIRT_ADDR(0x2, 0x18)
+#define R_THERMTS                       VIRT_ADDR(0x2, 0x19)
+#define R_THERMSPK1                     VIRT_ADDR(0x2, 0x1A)
+#define R_THERMSTAT                     VIRT_ADDR(0x2, 0x1B)
+#define R_SCSTAT                        VIRT_ADDR(0x2, 0x1C)
+#define R_SDMON                         VIRT_ADDR(0x2, 0x1D)
+#define R_SPKEQFILT                     VIRT_ADDR(0x3, 0x1)
+#define R_SPKCRWDL                      VIRT_ADDR(0x3, 0x2)
+#define R_SPKCRWDM                      VIRT_ADDR(0x3, 0x3)
+#define R_SPKCRWDH                      VIRT_ADDR(0x3, 0x4)
+#define R_SPKCRRDL                      VIRT_ADDR(0x3, 0x5)
+#define R_SPKCRRDM                      VIRT_ADDR(0x3, 0x6)
+#define R_SPKCRRDH                      VIRT_ADDR(0x3, 0x7)
+#define R_SPKCRADD                      VIRT_ADDR(0x3, 0x8)
+#define R_SPKCRS                        VIRT_ADDR(0x3, 0x9)
+#define R_SPKMBCEN                      VIRT_ADDR(0x3, 0x0A)
+#define R_SPKMBCCTL                     VIRT_ADDR(0x3, 0x0B)
+#define R_SPKMBCMUG1                    VIRT_ADDR(0x3, 0x0C)
+#define R_SPKMBCTHR1                    VIRT_ADDR(0x3, 0x0D)
+#define R_SPKMBCRAT1                    VIRT_ADDR(0x3, 0x0E)
+#define R_SPKMBCATK1L                   VIRT_ADDR(0x3, 0x0F)
+#define R_SPKMBCATK1H                   VIRT_ADDR(0x3, 0x10)
+#define R_SPKMBCREL1L                   VIRT_ADDR(0x3, 0x11)
+#define R_SPKMBCREL1H                   VIRT_ADDR(0x3, 0x12)
+#define R_SPKMBCMUG2                    VIRT_ADDR(0x3, 0x13)
+#define R_SPKMBCTHR2                    VIRT_ADDR(0x3, 0x14)
+#define R_SPKMBCRAT2                    VIRT_ADDR(0x3, 0x15)
+#define R_SPKMBCATK2L                   VIRT_ADDR(0x3, 0x16)
+#define R_SPKMBCATK2H                   VIRT_ADDR(0x3, 0x17)
+#define R_SPKMBCREL2L                   VIRT_ADDR(0x3, 0x18)
+#define R_SPKMBCREL2H                   VIRT_ADDR(0x3, 0x19)
+#define R_SPKMBCMUG3                    VIRT_ADDR(0x3, 0x1A)
+#define R_SPKMBCTHR3                    VIRT_ADDR(0x3, 0x1B)
+#define R_SPKMBCRAT3                    VIRT_ADDR(0x3, 0x1C)
+#define R_SPKMBCATK3L                   VIRT_ADDR(0x3, 0x1D)
+#define R_SPKMBCATK3H                   VIRT_ADDR(0x3, 0x1E)
+#define R_SPKMBCREL3L                   VIRT_ADDR(0x3, 0x1F)
+#define R_SPKMBCREL3H                   VIRT_ADDR(0x3, 0x20)
+#define R_SPKCLECTL                     VIRT_ADDR(0x3, 0x21)
+#define R_SPKCLEMUG                     VIRT_ADDR(0x3, 0x22)
+#define R_SPKCOMPTHR                    VIRT_ADDR(0x3, 0x23)
+#define R_SPKCOMPRAT                    VIRT_ADDR(0x3, 0x24)
+#define R_SPKCOMPATKL                   VIRT_ADDR(0x3, 0x25)
+#define R_SPKCOMPATKH                   VIRT_ADDR(0x3, 0x26)
+#define R_SPKCOMPRELL                   VIRT_ADDR(0x3, 0x27)
+#define R_SPKCOMPRELH                   VIRT_ADDR(0x3, 0x28)
+#define R_SPKLIMTHR                     VIRT_ADDR(0x3, 0x29)
+#define R_SPKLIMTGT                     VIRT_ADDR(0x3, 0x2A)
+#define R_SPKLIMATKL                    VIRT_ADDR(0x3, 0x2B)
+#define R_SPKLIMATKH                    VIRT_ADDR(0x3, 0x2C)
+#define R_SPKLIMRELL                    VIRT_ADDR(0x3, 0x2D)
+#define R_SPKLIMRELH                    VIRT_ADDR(0x3, 0x2E)
+#define R_SPKEXPTHR                     VIRT_ADDR(0x3, 0x2F)
+#define R_SPKEXPRAT                     VIRT_ADDR(0x3, 0x30)
+#define R_SPKEXPATKL                    VIRT_ADDR(0x3, 0x31)
+#define R_SPKEXPATKH                    VIRT_ADDR(0x3, 0x32)
+#define R_SPKEXPRELL                    VIRT_ADDR(0x3, 0x33)
+#define R_SPKEXPRELH                    VIRT_ADDR(0x3, 0x34)
+#define R_SPKFXCTL                      VIRT_ADDR(0x3, 0x35)
+#define R_DACEQFILT                     VIRT_ADDR(0x4, 0x1)
+#define R_DACCRWDL                      VIRT_ADDR(0x4, 0x2)
+#define R_DACCRWDM                      VIRT_ADDR(0x4, 0x3)
+#define R_DACCRWDH                      VIRT_ADDR(0x4, 0x4)
+#define R_DACCRRDL                      VIRT_ADDR(0x4, 0x5)
+#define R_DACCRRDM                      VIRT_ADDR(0x4, 0x6)
+#define R_DACCRRDH                      VIRT_ADDR(0x4, 0x7)
+#define R_DACCRADD                      VIRT_ADDR(0x4, 0x8)
+#define R_DACCRS                        VIRT_ADDR(0x4, 0x9)
+#define R_DACMBCEN                      VIRT_ADDR(0x4, 0x0A)
+#define R_DACMBCCTL                     VIRT_ADDR(0x4, 0x0B)
+#define R_DACMBCMUG1                    VIRT_ADDR(0x4, 0x0C)
+#define R_DACMBCTHR1                    VIRT_ADDR(0x4, 0x0D)
+#define R_DACMBCRAT1                    VIRT_ADDR(0x4, 0x0E)
+#define R_DACMBCATK1L                   VIRT_ADDR(0x4, 0x0F)
+#define R_DACMBCATK1H                   VIRT_ADDR(0x4, 0x10)
+#define R_DACMBCREL1L                   VIRT_ADDR(0x4, 0x11)
+#define R_DACMBCREL1H                   VIRT_ADDR(0x4, 0x12)
+#define R_DACMBCMUG2                    VIRT_ADDR(0x4, 0x13)
+#define R_DACMBCTHR2                    VIRT_ADDR(0x4, 0x14)
+#define R_DACMBCRAT2                    VIRT_ADDR(0x4, 0x15)
+#define R_DACMBCATK2L                   VIRT_ADDR(0x4, 0x16)
+#define R_DACMBCATK2H                   VIRT_ADDR(0x4, 0x17)
+#define R_DACMBCREL2L                   VIRT_ADDR(0x4, 0x18)
+#define R_DACMBCREL2H                   VIRT_ADDR(0x4, 0x19)
+#define R_DACMBCMUG3                    VIRT_ADDR(0x4, 0x1A)
+#define R_DACMBCTHR3                    VIRT_ADDR(0x4, 0x1B)
+#define R_DACMBCRAT3                    VIRT_ADDR(0x4, 0x1C)
+#define R_DACMBCATK3L                   VIRT_ADDR(0x4, 0x1D)
+#define R_DACMBCATK3H                   VIRT_ADDR(0x4, 0x1E)
+#define R_DACMBCREL3L                   VIRT_ADDR(0x4, 0x1F)
+#define R_DACMBCREL3H                   VIRT_ADDR(0x4, 0x20)
+#define R_DACCLECTL                     VIRT_ADDR(0x4, 0x21)
+#define R_DACCLEMUG                     VIRT_ADDR(0x4, 0x22)
+#define R_DACCOMPTHR                    VIRT_ADDR(0x4, 0x23)
+#define R_DACCOMPRAT                    VIRT_ADDR(0x4, 0x24)
+#define R_DACCOMPATKL                   VIRT_ADDR(0x4, 0x25)
+#define R_DACCOMPATKH                   VIRT_ADDR(0x4, 0x26)
+#define R_DACCOMPRELL                   VIRT_ADDR(0x4, 0x27)
+#define R_DACCOMPRELH                   VIRT_ADDR(0x4, 0x28)
+#define R_DACLIMTHR                     VIRT_ADDR(0x4, 0x29)
+#define R_DACLIMTGT                     VIRT_ADDR(0x4, 0x2A)
+#define R_DACLIMATKL                    VIRT_ADDR(0x4, 0x2B)
+#define R_DACLIMATKH                    VIRT_ADDR(0x4, 0x2C)
+#define R_DACLIMRELL                    VIRT_ADDR(0x4, 0x2D)
+#define R_DACLIMRELH                    VIRT_ADDR(0x4, 0x2E)
+#define R_DACEXPTHR                     VIRT_ADDR(0x4, 0x2F)
+#define R_DACEXPRAT                     VIRT_ADDR(0x4, 0x30)
+#define R_DACEXPATKL                    VIRT_ADDR(0x4, 0x31)
+#define R_DACEXPATKH                    VIRT_ADDR(0x4, 0x32)
+#define R_DACEXPRELL                    VIRT_ADDR(0x4, 0x33)
+#define R_DACEXPRELH                    VIRT_ADDR(0x4, 0x34)
+#define R_DACFXCTL                      VIRT_ADDR(0x4, 0x35)
+#define R_SUBEQFILT                     VIRT_ADDR(0x5, 0x1)
+#define R_SUBCRWDL                      VIRT_ADDR(0x5, 0x2)
+#define R_SUBCRWDM                      VIRT_ADDR(0x5, 0x3)
+#define R_SUBCRWDH                      VIRT_ADDR(0x5, 0x4)
+#define R_SUBCRRDL                      VIRT_ADDR(0x5, 0x5)
+#define R_SUBCRRDM                      VIRT_ADDR(0x5, 0x6)
+#define R_SUBCRRDH                      VIRT_ADDR(0x5, 0x7)
+#define R_SUBCRADD                      VIRT_ADDR(0x5, 0x8)
+#define R_SUBCRS                        VIRT_ADDR(0x5, 0x9)
+#define R_SUBMBCEN                      VIRT_ADDR(0x5, 0x0A)
+#define R_SUBMBCCTL                     VIRT_ADDR(0x5, 0x0B)
+#define R_SUBMBCMUG1                    VIRT_ADDR(0x5, 0x0C)
+#define R_SUBMBCTHR1                    VIRT_ADDR(0x5, 0x0D)
+#define R_SUBMBCRAT1                    VIRT_ADDR(0x5, 0x0E)
+#define R_SUBMBCATK1L                   VIRT_ADDR(0x5, 0x0F)
+#define R_SUBMBCATK1H                   VIRT_ADDR(0x5, 0x10)
+#define R_SUBMBCREL1L                   VIRT_ADDR(0x5, 0x11)
+#define R_SUBMBCREL1H                   VIRT_ADDR(0x5, 0x12)
+#define R_SUBMBCMUG2                    VIRT_ADDR(0x5, 0x13)
+#define R_SUBMBCTHR2                    VIRT_ADDR(0x5, 0x14)
+#define R_SUBMBCRAT2                    VIRT_ADDR(0x5, 0x15)
+#define R_SUBMBCATK2L                   VIRT_ADDR(0x5, 0x16)
+#define R_SUBMBCATK2H                   VIRT_ADDR(0x5, 0x17)
+#define R_SUBMBCREL2L                   VIRT_ADDR(0x5, 0x18)
+#define R_SUBMBCREL2H                   VIRT_ADDR(0x5, 0x19)
+#define R_SUBMBCMUG3                    VIRT_ADDR(0x5, 0x1A)
+#define R_SUBMBCTHR3                    VIRT_ADDR(0x5, 0x1B)
+#define R_SUBMBCRAT3                    VIRT_ADDR(0x5, 0x1C)
+#define R_SUBMBCATK3L                   VIRT_ADDR(0x5, 0x1D)
+#define R_SUBMBCATK3H                   VIRT_ADDR(0x5, 0x1E)
+#define R_SUBMBCREL3L                   VIRT_ADDR(0x5, 0x1F)
+#define R_SUBMBCREL3H                   VIRT_ADDR(0x5, 0x20)
+#define R_SUBCLECTL                     VIRT_ADDR(0x5, 0x21)
+#define R_SUBCLEMUG                     VIRT_ADDR(0x5, 0x22)
+#define R_SUBCOMPTHR                    VIRT_ADDR(0x5, 0x23)
+#define R_SUBCOMPRAT                    VIRT_ADDR(0x5, 0x24)
+#define R_SUBCOMPATKL                   VIRT_ADDR(0x5, 0x25)
+#define R_SUBCOMPATKH                   VIRT_ADDR(0x5, 0x26)
+#define R_SUBCOMPRELL                   VIRT_ADDR(0x5, 0x27)
+#define R_SUBCOMPRELH                   VIRT_ADDR(0x5, 0x28)
+#define R_SUBLIMTHR                     VIRT_ADDR(0x5, 0x29)
+#define R_SUBLIMTGT                     VIRT_ADDR(0x5, 0x2A)
+#define R_SUBLIMATKL                    VIRT_ADDR(0x5, 0x2B)
+#define R_SUBLIMATKH                    VIRT_ADDR(0x5, 0x2C)
+#define R_SUBLIMRELL                    VIRT_ADDR(0x5, 0x2D)
+#define R_SUBLIMRELH                    VIRT_ADDR(0x5, 0x2E)
+#define R_SUBEXPTHR                     VIRT_ADDR(0x5, 0x2F)
+#define R_SUBEXPRAT                     VIRT_ADDR(0x5, 0x30)
+#define R_SUBEXPATKL                    VIRT_ADDR(0x5, 0x31)
+#define R_SUBEXPATKH                    VIRT_ADDR(0x5, 0x32)
+#define R_SUBEXPRELL                    VIRT_ADDR(0x5, 0x33)
+#define R_SUBEXPRELH                    VIRT_ADDR(0x5, 0x34)
+#define R_SUBFXCTL                      VIRT_ADDR(0x5, 0x35)
+
+// *** PLLCTL ***
+#define FB_PLLCTL_VCCI_PLL                                  6
+#define FM_PLLCTL_VCCI_PLL                                  0xC0
+
+#define FB_PLLCTL_RZ_PLL                                    3
+#define FM_PLLCTL_RZ_PLL                                    0x38
+
+#define FB_PLLCTL_CP_PLL                                    0
+#define FM_PLLCTL_CP_PLL                                    0x7
+
+// *** PLLRDIV ***
+#define FB_PLLRDIV_REFDIV_PLL                               0
+#define FM_PLLRDIV_REFDIV_PLL                               0xFF
+
+// *** PLLODIV ***
+#define FB_PLLODIV_OUTDIV_PLL                               0
+#define FM_PLLODIV_OUTDIV_PLL                               0xFF
+
+// *** PLLFDIVL ***
+#define FB_PLLFDIVL_FBDIVL_PLL                              0
+#define FM_PLLFDIVL_FBDIVL_PLL                              0xFF
+
+// *** PLLFDIVH ***
+#define FB_PLLFDIVH_FBDIVH_PLL                              0
+#define FM_PLLFDIVH_FBDIVH_PLL                              0xF
+
+// *** I2SPCTL ***
+#define FB_I2SPCTL_BCLKSTAT                                 7
+#define FM_I2SPCTL_BCLKSTAT                                 0x80
+#define FV_BCLKSTAT_LOST                                    0x80
+#define FV_BCLKSTAT_NOT_LOST                                0x0
+
+#define FB_I2SPCTL_BCLKP                                    6
+#define FM_I2SPCTL_BCLKP                                    0x40
+#define FV_BCLKP_NOT_INVERTED                               0x0
+#define FV_BCLKP_INVERTED                                   0x40
+
+#define FB_I2SPCTL_PORTMS                                   5
+#define FM_I2SPCTL_PORTMS                                   0x20
+#define FV_PORTMS_SLAVE                                     0x0
+#define FV_PORTMS_MASTER                                    0x20
+
+#define FB_I2SPCTL_LRCLKP                                   4
+#define FM_I2SPCTL_LRCLKP                                   0x10
+#define FV_LRCLKP_NOT_INVERTED                              0x0
+#define FV_LRCLKP_INVERTED                                  0x10
+
+#define FB_I2SPCTL_WL                                       2
+#define FM_I2SPCTL_WL                                       0xC
+#define FV_WL_16                                            0x0
+#define FV_WL_20                                            0x4
+#define FV_WL_24                                            0x8
+#define FV_WL_32                                            0xC
+
+#define FB_I2SPCTL_FORMAT                                   0
+#define FM_I2SPCTL_FORMAT                                   0x3
+#define FV_FORMAT_RIGHT                                     0x0
+#define FV_FORMAT_LEFT                                      0x1
+#define FV_FORMAT_I2S                                       0x2
+#define FV_FORMAT_TDM                                       0x3
+
+// *** I2SMRATE ***
+#define FB_I2SMRATE_I2SMCLKHALF                             7
+#define FM_I2SMRATE_I2SMCLKHALF                             0x80
+#define FV_I2SMCLKHALF_I2S1MCLKDIV_DIV_2                    0x0
+#define FV_I2SMCLKHALF_I2S1MCLKDIV_ONLY                     0x80
+
+#define FB_I2SMRATE_I2SMCLKDIV                              5
+#define FM_I2SMRATE_I2SMCLKDIV                              0x60
+#define FV_I2SMCLKDIV_125                                   0x0
+#define FV_I2SMCLKDIV_128                                   0x20
+#define FV_I2SMCLKDIV_136                                   0x40
+#define FV_I2SMCLKDIV_192                                   0x60
+
+#define FB_I2SMRATE_I2SMBR                                  3
+#define FM_I2SMRATE_I2SMBR                                  0x18
+#define FV_I2SMBR_32                                        0x0
+#define FV_I2SMBR_44PT1                                     0x8
+#define FV_I2SMBR_48                                        0x10
+#define FV_I2SMBR_MCLK_MODE                                 0x18
+
+#define FB_I2SMRATE_I2SMBM                                  0
+#define FM_I2SMRATE_I2SMBM                                  0x3
+#define FV_I2SMBM_0PT25                                     0x0
+#define FV_I2SMBM_0PT5                                      0x1
+#define FV_I2SMBM_1                                         0x2
+#define FV_I2SMBM_2                                         0x3
+
+// *** PCMPCTL0 ***
+#define FB_PCMPCTL0_PCMFLENP                                2
+#define FM_PCMPCTL0_PCMFLENP                                0x4
+#define FV_PCMFLENP_128                                     0x0
+#define FV_PCMFLENP_256                                     0x4
+
+#define FB_PCMPCTL0_SLSYNCP                                 1
+#define FM_PCMPCTL0_SLSYNCP                                 0x2
+#define FV_SLSYNCP_SHORT                                    0x0
+#define FV_SLSYNCP_LONG                                     0x2
+
+#define FB_PCMPCTL0_BDELAYP                                 0
+#define FM_PCMPCTL0_BDELAYP                                 0x1
+#define FV_BDELAYP_NO_DELAY                                 0x0
+#define FV_BDELAYP_1BCLK_DELAY                              0x1
+
+// *** PCMPCTL1 ***
+#define FB_PCMPCTL1_PCMMOMP                                 6
+#define FM_PCMPCTL1_PCMMOMP                                 0x40
+
+#define FB_PCMPCTL1_PCMSOP                                  5
+#define FM_PCMPCTL1_PCMSOP                                  0x20
+#define FV_PCMSOP_1                                         0x0
+#define FV_PCMSOP_2                                         0x20
+
+#define FB_PCMPCTL1_PCMDSSP                                 3
+#define FM_PCMPCTL1_PCMDSSP                                 0x18
+#define FV_PCMDSSP_16                                       0x0
+#define FV_PCMDSSP_24                                       0x8
+#define FV_PCMDSSP_32                                       0x10
+
+#define FB_PCMPCTL1_PCMMIMP                                 1
+#define FM_PCMPCTL1_PCMMIMP                                 0x2
+
+#define FB_PCMPCTL1_PCMSIP                                  0
+#define FM_PCMPCTL1_PCMSIP                                  0x1
+#define FV_PCMSIP_1                                         0x0
+#define FV_PCMSIP_2                                         0x1
+
+// *** CHAIC ***
+#define FB_CHAIC_MICBST                                     4
+#define FM_CHAIC_MICBST                                     0x30
+
+// *** PGACTL ***
+#define FB_PGACTL_PGAMUTE                                   7
+#define FM_PGACTL_PGAMUTE                                   0x80
+
+#define FB_PGACTL_PGAVOL                                    0
+#define FM_PGACTL_PGAVOL                                    0x3F
+
+// *** ICHVOL ***
+#define FB_ICHVOL_ICHVOL                                    0
+#define FM_ICHVOL_ICHVOL                                    0xFF
+
+// *** SPKMBCMUG ***
+#define FB_SPKMBCMUG_PHASE                                  5
+#define FM_SPKMBCMUG_PHASE                                  0x20
+
+#define FB_SPKMBCMUG_MUGAIN                                 0
+#define FM_SPKMBCMUG_MUGAIN                                 0x1F
+
+// *** SPKMBCTHR ***
+#define FB_SPKMBCTHR_THRESH                                 0
+#define FM_SPKMBCTHR_THRESH                                 0xFF
+
+// *** SPKMBCRAT ***
+#define FB_SPKMBCRAT_RATIO                                  0
+#define FM_SPKMBCRAT_RATIO                                  0x1F
+
+// *** SPKMBCATKL ***
+#define FB_SPKMBCATKL_TCATKL                                0
+#define FM_SPKMBCATKL_TCATKL                                0xFF
+
+// *** SPKMBCATKH ***
+#define FB_SPKMBCATKH_TCATKH                                0
+#define FM_SPKMBCATKH_TCATKH                                0xFF
+
+// *** SPKMBCRELL ***
+#define FB_SPKMBCRELL_TCRELL                                0
+#define FM_SPKMBCRELL_TCRELL                                0xFF
+
+// *** SPKMBCRELH ***
+#define FB_SPKMBCRELH_TCRELH                                0
+#define FM_SPKMBCRELH_TCRELH                                0xFF
+
+// *** DACMBCMUG ***
+#define FB_DACMBCMUG_PHASE                                  5
+#define FM_DACMBCMUG_PHASE                                  0x20
+
+#define FB_DACMBCMUG_MUGAIN                                 0
+#define FM_DACMBCMUG_MUGAIN                                 0x1F
+
+// *** DACMBCTHR ***
+#define FB_DACMBCTHR_THRESH                                 0
+#define FM_DACMBCTHR_THRESH                                 0xFF
+
+// *** DACMBCRAT ***
+#define FB_DACMBCRAT_RATIO                                  0
+#define FM_DACMBCRAT_RATIO                                  0x1F
+
+// *** DACMBCATKL ***
+#define FB_DACMBCATKL_TCATKL                                0
+#define FM_DACMBCATKL_TCATKL                                0xFF
+
+// *** DACMBCATKH ***
+#define FB_DACMBCATKH_TCATKH                                0
+#define FM_DACMBCATKH_TCATKH                                0xFF
+
+// *** DACMBCRELL ***
+#define FB_DACMBCRELL_TCRELL                                0
+#define FM_DACMBCRELL_TCRELL                                0xFF
+
+// *** DACMBCRELH ***
+#define FB_DACMBCRELH_TCRELH                                0
+#define FM_DACMBCRELH_TCRELH                                0xFF
+
+// *** SUBMBCMUG ***
+#define FB_SUBMBCMUG_PHASE                                  5
+#define FM_SUBMBCMUG_PHASE                                  0x20
+
+#define FB_SUBMBCMUG_MUGAIN                                 0
+#define FM_SUBMBCMUG_MUGAIN                                 0x1F
+
+// *** SUBMBCTHR ***
+#define FB_SUBMBCTHR_THRESH                                 0
+#define FM_SUBMBCTHR_THRESH                                 0xFF
+
+// *** SUBMBCRAT ***
+#define FB_SUBMBCRAT_RATIO                                  0
+#define FM_SUBMBCRAT_RATIO                                  0x1F
+
+// *** SUBMBCATKL ***
+#define FB_SUBMBCATKL_TCATKL                                0
+#define FM_SUBMBCATKL_TCATKL                                0xFF
+
+// *** SUBMBCATKH ***
+#define FB_SUBMBCATKH_TCATKH                                0
+#define FM_SUBMBCATKH_TCATKH                                0xFF
+
+// *** SUBMBCRELL ***
+#define FB_SUBMBCRELL_TCRELL                                0
+#define FM_SUBMBCRELL_TCRELL                                0xFF
+
+// *** SUBMBCRELH ***
+#define FB_SUBMBCRELH_TCRELH                                0
+#define FM_SUBMBCRELH_TCRELH                                0xFF
+
+// *** PAGESEL ***
+#define FB_PAGESEL_PAGESEL                                  0
+#define FM_PAGESEL_PAGESEL                                  0xFF
+
+// *** RESET ***
+#define FB_RESET_RESET                                      0
+#define FM_RESET_RESET                                      0xFF
+#define FV_RESET_PWR_ON_DEFAULTS                            0x85
+
+// *** IRQEN ***
+#define FB_IRQEN_THRMINTEN                                  6
+#define FM_IRQEN_THRMINTEN                                  0x40
+#define FV_THRMINTEN_ENABLED                                0x40
+#define FV_THRMINTEN_DISABLED                               0x0
+
+#define FB_IRQEN_HBPINTEN                                   5
+#define FM_IRQEN_HBPINTEN                                   0x20
+#define FV_HBPINTEN_ENABLED                                 0x20
+#define FV_HBPINTEN_DISABLED                                0x0
+
+#define FB_IRQEN_HSDINTEN                                   4
+#define FM_IRQEN_HSDINTEN                                   0x10
+#define FV_HSDINTEN_ENABLED                                 0x10
+#define FV_HSDINTEN_DISABLED                                0x0
+
+#define FB_IRQEN_HPDINTEN                                   3
+#define FM_IRQEN_HPDINTEN                                   0x8
+#define FV_HPDINTEN_ENABLED                                 0x8
+#define FV_HPDINTEN_DISABLED                                0x0
+
+#define FB_IRQEN_GPIO3INTEN                                 1
+#define FM_IRQEN_GPIO3INTEN                                 0x2
+#define FV_GPIO3INTEN_ENABLED                               0x2
+#define FV_GPIO3INTEN_DISABLED                              0x0
+
+#define FB_IRQEN_GPIO2INTEN                                 0
+#define FM_IRQEN_GPIO2INTEN                                 0x1
+#define FV_GPIO2INTEN_ENABLED                               0x1
+#define FV_GPIO2INTEN_DISABLED                              0x0
+
+#define IRQEN_GPIOINTEN_ENABLED                             0x1
+#define IRQEN_GPIOINTEN_DISABLED                            0x0
+
+// *** IRQMASK ***
+#define FB_IRQMASK_THRMIM                                   6
+#define FM_IRQMASK_THRMIM                                   0x40
+#define FV_THRMIM_MASKED                                    0x0
+#define FV_THRMIM_NOT_MASKED                                0x40
+
+#define FB_IRQMASK_HBPIM                                    5
+#define FM_IRQMASK_HBPIM                                    0x20
+#define FV_HBPIM_MASKED                                     0x0
+#define FV_HBPIM_NOT_MASKED                                 0x20
+
+#define FB_IRQMASK_HSDIM                                    4
+#define FM_IRQMASK_HSDIM                                    0x10
+#define FV_HSDIM_MASKED                                     0x0
+#define FV_HSDIM_NOT_MASKED                                 0x10
+
+#define FB_IRQMASK_HPDIM                                    3
+#define FM_IRQMASK_HPDIM                                    0x8
+#define FV_HPDIM_MASKED                                     0x0
+#define FV_HPDIM_NOT_MASKED                                 0x8
+
+#define FB_IRQMASK_GPIO3M                                   1
+#define FM_IRQMASK_GPIO3M                                   0x2
+#define FV_GPIO3M_MASKED                                    0x0
+#define FV_GPIO3M_NOT_MASKED                                0x2
+
+#define FB_IRQMASK_GPIO2M                                   0
+#define FM_IRQMASK_GPIO2M                                   0x1
+#define FV_GPIO2M_MASKED                                    0x0
+#define FV_GPIO2M_NOT_MASKED                                0x1
+
+#define IRQMASK_GPIOM_MASKED                                0x0
+#define IRQMASK_GPIOM_NOT_MASKED                            0x1
+
+// *** IRQSTAT ***
+#define FB_IRQSTAT_THRMINT                                  6
+#define FM_IRQSTAT_THRMINT                                  0x40
+#define FV_THRMINT_INTERRUPTED                              0x40
+#define FV_THRMINT_NOT_INTERRUPTED                          0x0
+
+#define FB_IRQSTAT_HBPINT                                   5
+#define FM_IRQSTAT_HBPINT                                   0x20
+#define FV_HBPINT_INTERRUPTED                               0x20
+#define FV_HBPINT_NOT_INTERRUPTED                           0x0
+
+#define FB_IRQSTAT_HSDINT                                   4
+#define FM_IRQSTAT_HSDINT                                   0x10
+#define FV_HSDINT_INTERRUPTED                               0x10
+#define FV_HSDINT_NOT_INTERRUPTED                           0x0
+
+#define FB_IRQSTAT_HPDINT                                   3
+#define FM_IRQSTAT_HPDINT                                   0x8
+#define FV_HPDINT_INTERRUPTED                               0x8
+#define FV_HPDINT_NOT_INTERRUPTED                           0x0
+
+#define FB_IRQSTAT_GPIO3INT                                 1
+#define FM_IRQSTAT_GPIO3INT                                 0x2
+#define FV_GPIO3INT_INTERRUPTED                             0x2
+#define FV_GPIO3INT_NOT_INTERRUPTED                         0x0
+
+#define FB_IRQSTAT_GPIO2INT                                 0
+#define FM_IRQSTAT_GPIO2INT                                 0x1
+#define FV_GPIO2INT_INTERRUPTED                             0x1
+#define FV_GPIO2INT_NOT_INTERRUPTED                         0x0
+
+#define IRQSTAT_GPIOINT_INTERRUPTED                         0x1
+#define IRQSTAT_GPIOINT_NOT_INTERRUPTED                     0x0
+
+// *** DEVADD0 ***
+#define FB_DEVADD0_DEVADD0                                  1
+#define FM_DEVADD0_DEVADD0                                  0xFE
+
+#define FB_DEVADD0_I2C_ADDRLK                               0
+#define FM_DEVADD0_I2C_ADDRLK                               0x1
+#define FV_I2C_ADDRLK_LOCK                                  0x1
+
+// *** DEVID ***
+#define FB_DEVID_DEV_ID                                     0
+#define FM_DEVID_DEV_ID                                     0xFF
+
+// *** DEVREV ***
+#define FB_DEVREV_MAJ_REV                                   4
+#define FM_DEVREV_MAJ_REV                                   0xF0
+
+#define FB_DEVREV_MIN_REV                                   0
+#define FM_DEVREV_MIN_REV                                   0xF
+
+// *** PLLSTAT ***
+#define FB_PLLSTAT_PLL2LK                                   1
+#define FM_PLLSTAT_PLL2LK                                   0x2
+#define FV_PLL2LK_LOCKED                                    0x2
+#define FV_PLL2LK_UNLOCKED                                  0x0
+
+#define FB_PLLSTAT_PLL1LK                                   0
+#define FM_PLLSTAT_PLL1LK                                   0x1
+#define FV_PLL1LK_LOCKED                                    0x1
+#define FV_PLL1LK_UNLOCKED                                  0x0
+
+#define PLLSTAT_PLLLK_LOCKED                                0x1
+#define PLLSTAT_PLLLK_UNLOCKED                              0x0
+
+// *** PLLCTL ***
+#define FB_PLLCTL_PU_PLL2                                   7
+#define FM_PLLCTL_PU_PLL2                                   0x80
+#define FV_PU_PLL2_PWR_UP                                   0x80
+#define FV_PU_PLL2_PWR_DWN                                  0x0
+
+#define FB_PLLCTL_PU_PLL1                                   6
+#define FM_PLLCTL_PU_PLL1                                   0x40
+#define FV_PU_PLL1_PWR_UP                                   0x40
+#define FV_PU_PLL1_PWR_DWN                                  0x0
+
+#define FB_PLLCTL_PLL2CLKEN                                 5
+#define FM_PLLCTL_PLL2CLKEN                                 0x20
+#define FV_PLL2CLKEN_ENABLE                                 0x20
+#define FV_PLL2CLKEN_DISABLE                                0x0
+
+#define FB_PLLCTL_PLL1CLKEN                                 4
+#define FM_PLLCTL_PLL1CLKEN                                 0x10
+#define FV_PLL1CLKEN_ENABLE                                 0x10
+#define FV_PLL1CLKEN_DISABLE                                0x0
+
+#define FB_PLLCTL_BCLKSEL                                   2
+#define FM_PLLCTL_BCLKSEL                                   0xC
+#define FV_BCLKSEL_BCLK1                                    0x0
+#define FV_BCLKSEL_BCLK2                                    0x4
+#define FV_BCLKSEL_BCLK3                                    0x8
+
+#define FB_PLLCTL_PLLISEL                                   0
+#define FM_PLLCTL_PLLISEL                                   0x3
+#define FV_PLLISEL_XTAL                                     0x0
+#define FV_PLLISEL_MCLK1                                    0x1
+#define FV_PLLISEL_MCLK2                                    0x2
+#define FV_PLLISEL_BCLK                                     0x3
+
+#define PLLCTL_PU_PLL_PWR_UP                                0x1
+#define PLLCTL_PU_PLL_PWR_DWN                               0x0
+#define PLLCTL_PLLCLKEN_ENABLE                              0x1
+#define PLLCTL_PLLCLKEN_DISABLE                             0x0
+
+// *** ISRC ***
+#define FB_ISRC_IBR                                         2
+#define FM_ISRC_IBR                                         0x4
+#define FV_IBR_44PT1                                        0x0
+#define FV_IBR_48                                           0x4
+
+#define FB_ISRC_IBM                                         0
+#define FM_ISRC_IBM                                         0x3
+#define FV_IBM_0PT25                                        0x0
+#define FV_IBM_0PT5                                         0x1
+#define FV_IBM_1                                            0x2
+#define FV_IBM_2                                            0x3
+
+// *** SCLKCTL ***
+#define FB_SCLKCTL_ASDM                                     6
+#define FM_SCLKCTL_ASDM                                     0xC0
+#define FV_ASDM_HALF                                        0x40
+#define FV_ASDM_FULL                                        0x80
+#define FV_ASDM_AUTO                                        0xC0
+
+#define FB_SCLKCTL_DSDM                                     4
+#define FM_SCLKCTL_DSDM                                     0x30
+#define FV_DSDM_HALF                                        0x10
+#define FV_DSDM_FULL                                        0x20
+#define FV_DSDM_AUTO                                        0x30
+
+// *** TIMEBASE ***
+#define FB_TIMEBASE_TIMEBASE                                0
+#define FM_TIMEBASE_TIMEBASE                                0xFF
+
+// *** I2SCMC ***
+#define FB_I2SCMC_BCMP3                                     4
+#define FM_I2SCMC_BCMP3                                     0x30
+#define FV_BCMP3_AUTO                                       0x0
+#define FV_BCMP3_32X                                        0x10
+#define FV_BCMP3_40X                                        0x20
+#define FV_BCMP3_64X                                        0x30
+
+#define FB_I2SCMC_BCMP2                                     2
+#define FM_I2SCMC_BCMP2                                     0xC
+#define FV_BCMP2_AUTO                                       0x0
+#define FV_BCMP2_32X                                        0x4
+#define FV_BCMP2_40X                                        0x8
+#define FV_BCMP2_64X                                        0xC
+
+#define FB_I2SCMC_BCMP1                                     0
+#define FM_I2SCMC_BCMP1                                     0x3
+#define FV_BCMP1_AUTO                                       0x0
+#define FV_BCMP1_32X                                        0x1
+#define FV_BCMP1_40X                                        0x2
+#define FV_BCMP1_64X                                        0x3
+
+#define I2SCMC_BCMP_AUTO                                    0x0
+#define I2SCMC_BCMP_32X                                     0x1
+#define I2SCMC_BCMP_40X                                     0x2
+#define I2SCMC_BCMP_64X                                     0x3
+
+// *** MCLK2PINC ***
+#define FB_MCLK2PINC_SLEWOUT                                4
+#define FM_MCLK2PINC_SLEWOUT                                0xF0
+
+#define FB_MCLK2PINC_MCLK2IO                                2
+#define FM_MCLK2PINC_MCLK2IO                                0x4
+#define FV_MCLK2IO_INPUT                                    0x0
+#define FV_MCLK2IO_OUTPUT                                   0x4
+
+#define FB_MCLK2PINC_MCLK2OS                                0
+#define FM_MCLK2PINC_MCLK2OS                                0x3
+#define FV_MCLK2OS_24PT576                                  0x0
+#define FV_MCLK2OS_22PT5792                                 0x1
+#define FV_MCLK2OS_PLL2                                     0x2
+
+// *** I2SPINC0 ***
+#define FB_I2SPINC0_SDO3TRI                                 7
+#define FM_I2SPINC0_SDO3TRI                                 0x80
+
+#define FB_I2SPINC0_SDO2TRI                                 6
+#define FM_I2SPINC0_SDO2TRI                                 0x40
+
+#define FB_I2SPINC0_SDO1TRI                                 5
+#define FM_I2SPINC0_SDO1TRI                                 0x20
+
+#define FB_I2SPINC0_PCM3TRI                                 2
+#define FM_I2SPINC0_PCM3TRI                                 0x4
+
+#define FB_I2SPINC0_PCM2TRI                                 1
+#define FM_I2SPINC0_PCM2TRI                                 0x2
+
+#define FB_I2SPINC0_PCM1TRI                                 0
+#define FM_I2SPINC0_PCM1TRI                                 0x1
+
+// *** I2SPINC1 ***
+#define FB_I2SPINC1_SDO3PDD                                 2
+#define FM_I2SPINC1_SDO3PDD                                 0x4
+
+#define FB_I2SPINC1_SDO2PDD                                 1
+#define FM_I2SPINC1_SDO2PDD                                 0x2
+
+#define FB_I2SPINC1_SDO1PDD                                 0
+#define FM_I2SPINC1_SDO1PDD                                 0x1
+
+// *** I2SPINC2 ***
+#define FB_I2SPINC2_LR3PDD                                  5
+#define FM_I2SPINC2_LR3PDD                                  0x20
+
+#define FB_I2SPINC2_BC3PDD                                  4
+#define FM_I2SPINC2_BC3PDD                                  0x10
+
+#define FB_I2SPINC2_LR2PDD                                  3
+#define FM_I2SPINC2_LR2PDD                                  0x8
+
+#define FB_I2SPINC2_BC2PDD                                  2
+#define FM_I2SPINC2_BC2PDD                                  0x4
+
+#define FB_I2SPINC2_LR1PDD                                  1
+#define FM_I2SPINC2_LR1PDD                                  0x2
+
+#define FB_I2SPINC2_BC1PDD                                  0
+#define FM_I2SPINC2_BC1PDD                                  0x1
+
+// *** GPIOCTL0 ***
+#define FB_GPIOCTL0_GPIO3INTP                               7
+#define FM_GPIOCTL0_GPIO3INTP                               0x80
+
+#define FB_GPIOCTL0_GPIO2INTP                               6
+#define FM_GPIOCTL0_GPIO2INTP                               0x40
+
+#define FB_GPIOCTL0_GPIO3CFG                                5
+#define FM_GPIOCTL0_GPIO3CFG                                0x20
+
+#define FB_GPIOCTL0_GPIO2CFG                                4
+#define FM_GPIOCTL0_GPIO2CFG                                0x10
+
+#define FB_GPIOCTL0_GPIO3IO                                 3
+#define FM_GPIOCTL0_GPIO3IO                                 0x8
+
+#define FB_GPIOCTL0_GPIO2IO                                 2
+#define FM_GPIOCTL0_GPIO2IO                                 0x4
+
+#define FB_GPIOCTL0_GPIO1IO                                 1
+#define FM_GPIOCTL0_GPIO1IO                                 0x2
+
+#define FB_GPIOCTL0_GPIO0IO                                 0
+#define FM_GPIOCTL0_GPIO0IO                                 0x1
+
+// *** GPIOCTL1 ***
+#define FB_GPIOCTL1_GPIO3                                   7
+#define FM_GPIOCTL1_GPIO3                                   0x80
+
+#define FB_GPIOCTL1_GPIO2                                   6
+#define FM_GPIOCTL1_GPIO2                                   0x40
+
+#define FB_GPIOCTL1_GPIO1                                   5
+#define FM_GPIOCTL1_GPIO1                                   0x20
+
+#define FB_GPIOCTL1_GPIO0                                   4
+#define FM_GPIOCTL1_GPIO0                                   0x10
+
+#define FB_GPIOCTL1_GPIO3RD                                 3
+#define FM_GPIOCTL1_GPIO3RD                                 0x8
+
+#define FB_GPIOCTL1_GPIO2RD                                 2
+#define FM_GPIOCTL1_GPIO2RD                                 0x4
+
+#define FB_GPIOCTL1_GPIO1RD                                 1
+#define FM_GPIOCTL1_GPIO1RD                                 0x2
+
+#define FB_GPIOCTL1_GPIO0RD                                 0
+#define FM_GPIOCTL1_GPIO0RD                                 0x1
+
+// *** ASRC ***
+#define FB_ASRC_ASRCOBW                                     7
+#define FM_ASRC_ASRCOBW                                     0x80
+
+#define FB_ASRC_ASRCIBW                                     6
+#define FM_ASRC_ASRCIBW                                     0x40
+
+#define FB_ASRC_ASRCOB                                      5
+#define FM_ASRC_ASRCOB                                      0x20
+#define FV_ASRCOB_ACTIVE                                    0x0
+#define FV_ASRCOB_BYPASSED                                  0x20
+
+#define FB_ASRC_ASRCIB                                      4
+#define FM_ASRC_ASRCIB                                      0x10
+#define FV_ASRCIB_ACTIVE                                    0x0
+#define FV_ASRCIB_BYPASSED                                  0x10
+
+#define FB_ASRC_ASRCOL                                      3
+#define FM_ASRC_ASRCOL                                      0x8
+
+#define FB_ASRC_ASRCIL                                      2
+#define FM_ASRC_ASRCIL                                      0x4
+
+// *** TDMCTL0 ***
+#define FB_TDMCTL0_TDMMD                                    2
+#define FM_TDMCTL0_TDMMD                                    0x4
+#define FV_TDMMD_200                                        0x0
+#define FV_TDMMD_256                                        0x4
+
+#define FB_TDMCTL0_SLSYNC                                   1
+#define FM_TDMCTL0_SLSYNC                                   0x2
+#define FV_SLSYNC_SHORT                                     0x0
+#define FV_SLSYNC_LONG                                      0x2
+
+#define FB_TDMCTL0_BDELAY                                   0
+#define FM_TDMCTL0_BDELAY                                   0x1
+#define FV_BDELAY_NO_DELAY                                  0x0
+#define FV_BDELAY_1BCLK_DELAY                               0x1
+
+// *** TDMCTL1 ***
+#define FB_TDMCTL1_TDMSO                                    5
+#define FM_TDMCTL1_TDMSO                                    0x60
+#define FV_TDMSO_2                                          0x0
+#define FV_TDMSO_4                                          0x20
+#define FV_TDMSO_6                                          0x40
+
+#define FB_TDMCTL1_TDMDSS                                   3
+#define FM_TDMCTL1_TDMDSS                                   0x18
+#define FV_TDMDSS_16                                        0x0
+#define FV_TDMDSS_24                                        0x10
+#define FV_TDMDSS_32                                        0x18
+
+#define FB_TDMCTL1_TDMSI                                    0
+#define FM_TDMCTL1_TDMSI                                    0x3
+#define FV_TDMSI_2                                          0x0
+#define FV_TDMSI_4                                          0x1
+#define FV_TDMSI_6                                          0x2
+
+// *** PWRM0 ***
+#define FB_PWRM0_INPROC3PU                                  6
+#define FM_PWRM0_INPROC3PU                                  0x40
+
+#define FB_PWRM0_INPROC2PU                                  5
+#define FM_PWRM0_INPROC2PU                                  0x20
+
+#define FB_PWRM0_INPROC1PU                                  4
+#define FM_PWRM0_INPROC1PU                                  0x10
+
+#define FB_PWRM0_INPROC0PU                                  3
+#define FM_PWRM0_INPROC0PU                                  0x8
+
+#define FB_PWRM0_MICB2PU                                    2
+#define FM_PWRM0_MICB2PU                                    0x4
+
+#define FB_PWRM0_MICB1PU                                    1
+#define FM_PWRM0_MICB1PU                                    0x2
+
+#define FB_PWRM0_MCLKPEN                                    0
+#define FM_PWRM0_MCLKPEN                                    0x1
+
+// *** PWRM1 ***
+#define FB_PWRM1_SUBPU                                      7
+#define FM_PWRM1_SUBPU                                      0x80
+
+#define FB_PWRM1_HPLPU                                      6
+#define FM_PWRM1_HPLPU                                      0x40
+
+#define FB_PWRM1_HPRPU                                      5
+#define FM_PWRM1_HPRPU                                      0x20
+
+#define FB_PWRM1_SPKLPU                                     4
+#define FM_PWRM1_SPKLPU                                     0x10
+
+#define FB_PWRM1_SPKRPU                                     3
+#define FM_PWRM1_SPKRPU                                     0x8
+
+#define FB_PWRM1_D2S2PU                                     2
+#define FM_PWRM1_D2S2PU                                     0x4
+
+#define FB_PWRM1_D2S1PU                                     1
+#define FM_PWRM1_D2S1PU                                     0x2
+
+#define FB_PWRM1_VREFPU                                     0
+#define FM_PWRM1_VREFPU                                     0x1
+
+// *** PWRM2 ***
+#define FB_PWRM2_I2S3OPU                                    5
+#define FM_PWRM2_I2S3OPU                                    0x20
+#define FV_I2S3OPU_PWR_DOWN                                 0x0
+#define FV_I2S3OPU_PWR_UP                                   0x20
+
+#define FB_PWRM2_I2S2OPU                                    4
+#define FM_PWRM2_I2S2OPU                                    0x10
+#define FV_I2S2OPU_PWR_DOWN                                 0x0
+#define FV_I2S2OPU_PWR_UP                                   0x10
+
+#define FB_PWRM2_I2S1OPU                                    3
+#define FM_PWRM2_I2S1OPU                                    0x8
+#define FV_I2S1OPU_PWR_DOWN                                 0x0
+#define FV_I2S1OPU_PWR_UP                                   0x8
+
+#define FB_PWRM2_I2S3IPU                                    2
+#define FM_PWRM2_I2S3IPU                                    0x4
+#define FV_I2S3IPU_PWR_DOWN                                 0x0
+#define FV_I2S3IPU_PWR_UP                                   0x4
+
+#define FB_PWRM2_I2S2IPU                                    1
+#define FM_PWRM2_I2S2IPU                                    0x2
+#define FV_I2S2IPU_PWR_DOWN                                 0x0
+#define FV_I2S2IPU_PWR_UP                                   0x2
+
+#define FB_PWRM2_I2S1IPU                                    0
+#define FM_PWRM2_I2S1IPU                                    0x1
+#define FV_I2S1IPU_PWR_DOWN                                 0x0
+#define FV_I2S1IPU_PWR_UP                                   0x1
+
+#define PWRM2_I2SOPU_PWR_DOWN                               0x0
+#define PWRM2_I2SOPU_PWR_UP                                 0x1
+#define PWRM2_I2SIPU_PWR_DOWN                               0x0
+#define PWRM2_I2SIPU_PWR_UP                                 0x1
+
+// *** PWRM3 ***
+#define FB_PWRM3_BGSBUP                                     6
+#define FM_PWRM3_BGSBUP                                     0x40
+#define FV_BGSBUP_ON                                        0x0
+#define FV_BGSBUP_OFF                                       0x40
+
+#define FB_PWRM3_VGBAPU                                     5
+#define FM_PWRM3_VGBAPU                                     0x20
+#define FV_VGBAPU_ON                                        0x0
+#define FV_VGBAPU_OFF                                       0x20
+
+#define FB_PWRM3_LLINEPU                                    4
+#define FM_PWRM3_LLINEPU                                    0x10
+
+#define FB_PWRM3_RLINEPU                                    3
+#define FM_PWRM3_RLINEPU                                    0x8
+
+// *** PWRM4 ***
+#define FB_PWRM4_OPSUBPU                                    4
+#define FM_PWRM4_OPSUBPU                                    0x10
+
+#define FB_PWRM4_OPDACLPU                                   3
+#define FM_PWRM4_OPDACLPU                                   0x8
+
+#define FB_PWRM4_OPDACRPU                                   2
+#define FM_PWRM4_OPDACRPU                                   0x4
+
+#define FB_PWRM4_OPSPKLPU                                   1
+#define FM_PWRM4_OPSPKLPU                                   0x2
+
+#define FB_PWRM4_OPSPKRPU                                   0
+#define FM_PWRM4_OPSPKRPU                                   0x1
+
+// *** I2SIDCTL ***
+#define FB_I2SIDCTL_I2SI3DCTL                               4
+#define FM_I2SIDCTL_I2SI3DCTL                               0x30
+
+#define FB_I2SIDCTL_I2SI2DCTL                               2
+#define FM_I2SIDCTL_I2SI2DCTL                               0xC
+
+#define FB_I2SIDCTL_I2SI1DCTL                               0
+#define FM_I2SIDCTL_I2SI1DCTL                               0x3
+
+// *** I2SODCTL ***
+#define FB_I2SODCTL_I2SO3DCTL                               4
+#define FM_I2SODCTL_I2SO3DCTL                               0x30
+
+#define FB_I2SODCTL_I2SO2DCTL                               2
+#define FM_I2SODCTL_I2SO2DCTL                               0xC
+
+#define FB_I2SODCTL_I2SO1DCTL                               0
+#define FM_I2SODCTL_I2SO1DCTL                               0x3
+
+// *** AUDIOMUX1 ***
+#define FB_AUDIOMUX1_ASRCIMUX                               6
+#define FM_AUDIOMUX1_ASRCIMUX                               0xC0
+#define FV_ASRCIMUX_NONE                                    0x0
+#define FV_ASRCIMUX_I2S1                                    0x40
+#define FV_ASRCIMUX_I2S2                                    0x80
+#define FV_ASRCIMUX_I2S3                                    0xC0
+
+#define FB_AUDIOMUX1_I2S2MUX                                3
+#define FM_AUDIOMUX1_I2S2MUX                                0x38
+#define FV_I2S2MUX_I2S1                                     0x0
+#define FV_I2S2MUX_I2S2                                     0x8
+#define FV_I2S2MUX_I2S3                                     0x10
+#define FV_I2S2MUX_ADC_DMIC                                 0x18
+#define FV_I2S2MUX_DMIC2                                    0x20
+#define FV_I2S2MUX_CLASSD_DSP                               0x28
+#define FV_I2S2MUX_DAC_DSP                                  0x30
+#define FV_I2S2MUX_SUB_DSP                                  0x38
+
+#define FB_AUDIOMUX1_I2S1MUX                                0
+#define FM_AUDIOMUX1_I2S1MUX                                0x7
+#define FV_I2S1MUX_I2S1                                     0x0
+#define FV_I2S1MUX_I2S2                                     0x1
+#define FV_I2S1MUX_I2S3                                     0x2
+#define FV_I2S1MUX_ADC_DMIC                                 0x3
+#define FV_I2S1MUX_DMIC2                                    0x4
+#define FV_I2S1MUX_CLASSD_DSP                               0x5
+#define FV_I2S1MUX_DAC_DSP                                  0x6
+#define FV_I2S1MUX_SUB_DSP                                  0x7
+
+#define AUDIOMUX1_I2SMUX_I2S1                               0x0
+#define AUDIOMUX1_I2SMUX_I2S2                               0x1
+#define AUDIOMUX1_I2SMUX_I2S3                               0x2
+#define AUDIOMUX1_I2SMUX_ADC_DMIC                           0x3
+#define AUDIOMUX1_I2SMUX_DMIC2                              0x4
+#define AUDIOMUX1_I2SMUX_CLASSD_DSP                         0x5
+#define AUDIOMUX1_I2SMUX_DAC_DSP                            0x6
+#define AUDIOMUX1_I2SMUX_SUB_DSP                            0x7
+
+// *** AUDIOMUX2 ***
+#define FB_AUDIOMUX2_ASRCOMUX                               6
+#define FM_AUDIOMUX2_ASRCOMUX                               0xC0
+#define FV_ASRCOMUX_NONE                                    0x0
+#define FV_ASRCOMUX_I2S1                                    0x40
+#define FV_ASRCOMUX_I2S2                                    0x80
+#define FV_ASRCOMUX_I2S3                                    0xC0
+
+#define FB_AUDIOMUX2_DACMUX                                 3
+#define FM_AUDIOMUX2_DACMUX                                 0x38
+#define FV_DACMUX_I2S1                                      0x0
+#define FV_DACMUX_I2S2                                      0x8
+#define FV_DACMUX_I2S3                                      0x10
+#define FV_DACMUX_ADC_DMIC                                  0x18
+#define FV_DACMUX_DMIC2                                     0x20
+#define FV_DACMUX_CLASSD_DSP                                0x28
+#define FV_DACMUX_DAC_DSP                                   0x30
+#define FV_DACMUX_SUB_DSP                                   0x38
+
+#define FB_AUDIOMUX2_I2S3MUX                                0
+#define FM_AUDIOMUX2_I2S3MUX                                0x7
+#define FV_I2S3MUX_I2S1                                     0x0
+#define FV_I2S3MUX_I2S2                                     0x1
+#define FV_I2S3MUX_I2S3                                     0x2
+#define FV_I2S3MUX_ADC_DMIC                                 0x3
+#define FV_I2S3MUX_DMIC2                                    0x4
+#define FV_I2S3MUX_CLASSD_DSP                               0x5
+#define FV_I2S3MUX_DAC_DSP                                  0x6
+#define FV_I2S3MUX_SUB_DSP                                  0x7
+
+// *** AUDIOMUX3 ***
+#define FB_AUDIOMUX3_SUBMUX                                 3
+#define FM_AUDIOMUX3_SUBMUX                                 0xF8
+#define FV_SUBMUX_I2S1_L                                    0x0
+#define FV_SUBMUX_I2S1_R                                    0x8
+#define FV_SUBMUX_I2S1_LR                                   0x10
+#define FV_SUBMUX_I2S2_L                                    0x18
+#define FV_SUBMUX_I2S2_R                                    0x20
+#define FV_SUBMUX_I2S2_LR                                   0x28
+#define FV_SUBMUX_I2S3_L                                    0x30
+#define FV_SUBMUX_I2S3_R                                    0x38
+#define FV_SUBMUX_I2S3_LR                                   0x40
+#define FV_SUBMUX_ADC_DMIC_L                                0x48
+#define FV_SUBMUX_ADC_DMIC_R                                0x50
+#define FV_SUBMUX_ADC_DMIC_LR                               0x58
+#define FV_SUBMUX_DMIC_L                                    0x60
+#define FV_SUBMUX_DMIC_R                                    0x68
+#define FV_SUBMUX_DMIC_LR                                   0x70
+#define FV_SUBMUX_CLASSD_DSP_L                              0x78
+#define FV_SUBMUX_CLASSD_DSP_R                              0x80
+#define FV_SUBMUX_CLASSD_DSP_LR                             0x88
+
+#define FB_AUDIOMUX3_CLSSDMUX                               0
+#define FM_AUDIOMUX3_CLSSDMUX                               0x7
+#define FV_CLSSDMUX_I2S1                                    0x0
+#define FV_CLSSDMUX_I2S2                                    0x1
+#define FV_CLSSDMUX_I2S3                                    0x2
+#define FV_CLSSDMUX_ADC_DMIC                                0x3
+#define FV_CLSSDMUX_DMIC2                                   0x4
+#define FV_CLSSDMUX_CLASSD_DSP                              0x5
+#define FV_CLSSDMUX_DAC_DSP                                 0x6
+#define FV_CLSSDMUX_SUB_DSP                                 0x7
+
+// *** HSDCTL1 ***
+#define FB_HSDCTL1_HPJKTYPE                                 7
+#define FM_HSDCTL1_HPJKTYPE                                 0x80
+
+#define FB_HSDCTL1_CON_DET_PWD                              6
+#define FM_HSDCTL1_CON_DET_PWD                              0x40
+
+#define FB_HSDCTL1_DETCYC                                   4
+#define FM_HSDCTL1_DETCYC                                   0x30
+
+#define FB_HSDCTL1_HPDLYBYP                                 3
+#define FM_HSDCTL1_HPDLYBYP                                 0x8
+
+#define FB_HSDCTL1_HSDETPOL                                 2
+#define FM_HSDCTL1_HSDETPOL                                 0x4
+
+#define FB_HSDCTL1_HPID_EN                                  1
+#define FM_HSDCTL1_HPID_EN                                  0x2
+
+#define FB_HSDCTL1_GBLHS_EN                                 0
+#define FM_HSDCTL1_GBLHS_EN                                 0x1
+
+// *** HSDCTL2 ***
+#define FB_HSDCTL2_FMICBIAS1                                6
+#define FM_HSDCTL2_FMICBIAS1                                0xC0
+
+#define FB_HSDCTL2_MB1MODE                                  5
+#define FM_HSDCTL2_MB1MODE                                  0x20
+#define FV_MB1MODE_AUTO                                     0x0
+#define FV_MB1MODE_MANUAL                                   0x20
+
+#define FB_HSDCTL2_FORCETRG                                 4
+#define FM_HSDCTL2_FORCETRG                                 0x10
+
+#define FB_HSDCTL2_SWMODE                                   3
+#define FM_HSDCTL2_SWMODE                                   0x8
+
+#define FB_HSDCTL2_GHSHIZ                                   2
+#define FM_HSDCTL2_GHSHIZ                                   0x4
+
+#define FB_HSDCTL2_FPLUGTYPE                                0
+#define FM_HSDCTL2_FPLUGTYPE                                0x3
+
+// *** HSDSTAT ***
+#define FB_HSDSTAT_MBIAS1DRV                                5
+#define FM_HSDSTAT_MBIAS1DRV                                0x60
+
+#define FB_HSDSTAT_HSDETSTAT                                3
+#define FM_HSDSTAT_HSDETSTAT                                0x8
+
+#define FB_HSDSTAT_PLUGTYPE                                 1
+#define FM_HSDSTAT_PLUGTYPE                                 0x6
+
+#define FB_HSDSTAT_HSDETDONE                                0
+#define FM_HSDSTAT_HSDETDONE                                0x1
+
+// *** HSDDELAY ***
+#define FB_HSDDELAY_T_STABLE                                0
+#define FM_HSDDELAY_T_STABLE                                0x7
+
+// *** BUTCTL ***
+#define FB_BUTCTL_BPUSHSTAT                                 7
+#define FM_BUTCTL_BPUSHSTAT                                 0x80
+
+#define FB_BUTCTL_BPUSHDET                                  6
+#define FM_BUTCTL_BPUSHDET                                  0x40
+
+#define FB_BUTCTL_BPUSHEN                                   5
+#define FM_BUTCTL_BPUSHEN                                   0x20
+
+#define FB_BUTCTL_BSTABLE_L                                 3
+#define FM_BUTCTL_BSTABLE_L                                 0x18
+
+#define FB_BUTCTL_BSTABLE_S                                 0
+#define FM_BUTCTL_BSTABLE_S                                 0x7
+
+// *** CH0AIC ***
+#define FB_CH0AIC_INSELL                                    6
+#define FM_CH0AIC_INSELL                                    0xC0
+
+#define FB_CH0AIC_MICBST0                                   4
+#define FM_CH0AIC_MICBST0                                   0x30
+
+#define FB_CH0AIC_LADCIN                                    2
+#define FM_CH0AIC_LADCIN                                    0xC
+
+#define FB_CH0AIC_IN_BYPS_L_SEL                             1
+#define FM_CH0AIC_IN_BYPS_L_SEL                             0x2
+
+#define FB_CH0AIC_IPCH0S                                    0
+#define FM_CH0AIC_IPCH0S                                    0x1
+
+// *** CH1AIC ***
+#define FB_CH1AIC_INSELR                                    6
+#define FM_CH1AIC_INSELR                                    0xC0
+
+#define FB_CH1AIC_MICBST1                                   4
+#define FM_CH1AIC_MICBST1                                   0x30
+
+#define FB_CH1AIC_RADCIN                                    2
+#define FM_CH1AIC_RADCIN                                    0xC
+
+#define FB_CH1AIC_IN_BYPS_R_SEL                             1
+#define FM_CH1AIC_IN_BYPS_R_SEL                             0x2
+
+#define FB_CH1AIC_IPCH1S                                    0
+#define FM_CH1AIC_IPCH1S                                    0x1
+
+// *** ICTL0 ***
+#define FB_ICTL0_IN1POL                                     7
+#define FM_ICTL0_IN1POL                                     0x80
+
+#define FB_ICTL0_IN0POL                                     6
+#define FM_ICTL0_IN0POL                                     0x40
+
+#define FB_ICTL0_INPCH10SEL                                 4
+#define FM_ICTL0_INPCH10SEL                                 0x30
+
+#define FB_ICTL0_IN1MUTE                                    3
+#define FM_ICTL0_IN1MUTE                                    0x8
+
+#define FB_ICTL0_IN0MUTE                                    2
+#define FM_ICTL0_IN0MUTE                                    0x4
+
+#define FB_ICTL0_IN1HP                                      1
+#define FM_ICTL0_IN1HP                                      0x2
+
+#define FB_ICTL0_IN0HP                                      0
+#define FM_ICTL0_IN0HP                                      0x1
+
+// *** ICTL1 ***
+#define FB_ICTL1_IN3POL                                     7
+#define FM_ICTL1_IN3POL                                     0x80
+
+#define FB_ICTL1_IN2POL                                     6
+#define FM_ICTL1_IN2POL                                     0x40
+
+#define FB_ICTL1_INPCH32SEL                                 4
+#define FM_ICTL1_INPCH32SEL                                 0x30
+
+#define FB_ICTL1_IN3MUTE                                    3
+#define FM_ICTL1_IN3MUTE                                    0x8
+
+#define FB_ICTL1_IN2MUTE                                    2
+#define FM_ICTL1_IN2MUTE                                    0x4
+
+#define FB_ICTL1_IN3HP                                      1
+#define FM_ICTL1_IN3HP                                      0x2
+
+#define FB_ICTL1_IN2HP                                      0
+#define FM_ICTL1_IN2HP                                      0x1
+
+// *** MICBIAS ***
+#define FB_MICBIAS_MICBOV2                                  4
+#define FM_MICBIAS_MICBOV2                                  0x30
+
+#define FB_MICBIAS_MICBOV1                                  6
+#define FM_MICBIAS_MICBOV1                                  0xC0
+
+#define FB_MICBIAS_SPARE1                                   2
+#define FM_MICBIAS_SPARE1                                   0xC
+
+#define FB_MICBIAS_SPARE2                                   0
+#define FM_MICBIAS_SPARE2                                   0x3
+
+// *** PGAZ ***
+#define FB_PGAZ_INHPOR                                      1
+#define FM_PGAZ_INHPOR                                      0x2
+
+#define FB_PGAZ_TOEN                                        0
+#define FM_PGAZ_TOEN                                        0x1
+
+// *** ASRCILVOL ***
+#define FB_ASRCILVOL_ASRCILVOL                              0
+#define FM_ASRCILVOL_ASRCILVOL                              0xFF
+
+// *** ASRCIRVOL ***
+#define FB_ASRCIRVOL_ASRCIRVOL                              0
+#define FM_ASRCIRVOL_ASRCIRVOL                              0xFF
+
+// *** ASRCOLVOL ***
+#define FB_ASRCOLVOL_ASRCOLVOL                              0
+#define FM_ASRCOLVOL_ASRCOLVOL                              0xFF
+
+// *** ASRCORVOL ***
+#define FB_ASRCORVOL_ASRCOLVOL                              0
+#define FM_ASRCORVOL_ASRCOLVOL                              0xFF
+
+// *** IVOLCTLU ***
+#define FB_IVOLCTLU_IFADE                                   3
+#define FM_IVOLCTLU_IFADE                                   0x8
+
+#define FB_IVOLCTLU_INPVOLU                                 2
+#define FM_IVOLCTLU_INPVOLU                                 0x4
+
+#define FB_IVOLCTLU_PGAVOLU                                 1
+#define FM_IVOLCTLU_PGAVOLU                                 0x2
+
+#define FB_IVOLCTLU_ASRCVOLU                                0
+#define FM_IVOLCTLU_ASRCVOLU                                0x1
+
+// *** ALCCTL0 ***
+#define FB_ALCCTL0_ALCMODE                                  7
+#define FM_ALCCTL0_ALCMODE                                  0x80
+
+#define FB_ALCCTL0_ALCREF                                   4
+#define FM_ALCCTL0_ALCREF                                   0x70
+
+#define FB_ALCCTL0_ALCEN3                                   3
+#define FM_ALCCTL0_ALCEN3                                   0x8
+
+#define FB_ALCCTL0_ALCEN2                                   2
+#define FM_ALCCTL0_ALCEN2                                   0x4
+
+#define FB_ALCCTL0_ALCEN1                                   1
+#define FM_ALCCTL0_ALCEN1                                   0x2
+
+#define FB_ALCCTL0_ALCEN0                                   0
+#define FM_ALCCTL0_ALCEN0                                   0x1
+
+// *** ALCCTL1 ***
+#define FB_ALCCTL1_MAXGAIN                                  4
+#define FM_ALCCTL1_MAXGAIN                                  0x70
+
+#define FB_ALCCTL1_ALCL                                     0
+#define FM_ALCCTL1_ALCL                                     0xF
+
+// *** ALCCTL2 ***
+#define FB_ALCCTL2_ALCZC                                    7
+#define FM_ALCCTL2_ALCZC                                    0x80
+
+#define FB_ALCCTL2_MINGAIN                                  4
+#define FM_ALCCTL2_MINGAIN                                  0x70
+
+#define FB_ALCCTL2_HLD                                      0
+#define FM_ALCCTL2_HLD                                      0xF
+
+// *** ALCCTL3 ***
+#define FB_ALCCTL3_DCY                                      4
+#define FM_ALCCTL3_DCY                                      0xF0
+
+#define FB_ALCCTL3_ATK                                      0
+#define FM_ALCCTL3_ATK                                      0xF
+
+// *** NGATE ***
+#define FB_NGATE_NGTH                                       3
+#define FM_NGATE_NGTH                                       0xF8
+
+#define FB_NGATE_NGG                                        1
+#define FM_NGATE_NGG                                        0x6
+
+#define FB_NGATE_NGAT                                       0
+#define FM_NGATE_NGAT                                       0x1
+
+// *** DMICCTL ***
+#define FB_DMICCTL_DMIC2EN                                  7
+#define FM_DMICCTL_DMIC2EN                                  0x80
+
+#define FB_DMICCTL_DMIC1EN                                  6
+#define FM_DMICCTL_DMIC1EN                                  0x40
+
+#define FB_DMICCTL_DMONO                                    4
+#define FM_DMICCTL_DMONO                                    0x10
+
+#define FB_DMICCTL_DMDCLK                                   2
+#define FM_DMICCTL_DMDCLK                                   0xC
+
+#define FB_DMICCTL_DMRATE                                   0
+#define FM_DMICCTL_DMRATE                                   0x3
+
+// *** DACCTL ***
+#define FB_DACCTL_DACPOLR                                   7
+#define FM_DACCTL_DACPOLR                                   0x80
+#define FV_DACPOLR_NORMAL                                   0x0
+#define FV_DACPOLR_INVERTED                                 0x80
+
+#define FB_DACCTL_DACPOLL                                   6
+#define FM_DACCTL_DACPOLL                                   0x40
+#define FV_DACPOLL_NORMAL                                   0x0
+#define FV_DACPOLL_INVERTED                                 0x40
+
+#define FB_DACCTL_DACDITH                                   4
+#define FM_DACCTL_DACDITH                                   0x30
+#define FV_DACDITH_DYNAMIC_HALF                             0x0
+#define FV_DACDITH_DYNAMIC_FULL                             0x10
+#define FV_DACDITH_DISABLED                                 0x20
+#define FV_DACDITH_STATIC                                   0x30
+
+#define FB_DACCTL_DACMUTE                                   3
+#define FM_DACCTL_DACMUTE                                   0x8
+#define FV_DACMUTE_ENABLE                                   0x8
+#define FV_DACMUTE_DISABLE                                  0x0
+
+#define FB_DACCTL_DACDEM                                    2
+#define FM_DACCTL_DACDEM                                    0x4
+#define FV_DACDEM_ENABLE                                    0x4
+#define FV_DACDEM_DISABLE                                   0x0
+
+#define FB_DACCTL_ABYPASS                                   0
+#define FM_DACCTL_ABYPASS                                   0x1
+
+// *** SPKCTL ***
+#define FB_SPKCTL_SPKPOLR                                   7
+#define FM_SPKCTL_SPKPOLR                                   0x80
+#define FV_SPKPOLR_NORMAL                                   0x0
+#define FV_SPKPOLR_INVERTED                                 0x80
+
+#define FB_SPKCTL_SPKPOLL                                   6
+#define FM_SPKCTL_SPKPOLL                                   0x40
+#define FV_SPKPOLL_NORMAL                                   0x0
+#define FV_SPKPOLL_INVERTED                                 0x40
+
+#define FB_SPKCTL_SPKMUTE                                   3
+#define FM_SPKCTL_SPKMUTE                                   0x8
+#define FV_SPKMUTE_ENABLE                                   0x8
+#define FV_SPKMUTE_DISABLE                                  0x0
+
+#define FB_SPKCTL_SPKDEM                                    2
+#define FM_SPKCTL_SPKDEM                                    0x4
+#define FV_SPKDEM_ENABLE                                    0x4
+#define FV_SPKDEM_DISABLE                                   0x0
+
+// *** SUBCTL ***
+#define FB_SUBCTL_SUBPOL                                    7
+#define FM_SUBCTL_SUBPOL                                    0x80
+
+#define FB_SUBCTL_SUBMUTE                                   3
+#define FM_SUBCTL_SUBMUTE                                   0x8
+
+#define FB_SUBCTL_SUBDEM                                    2
+#define FM_SUBCTL_SUBDEM                                    0x4
+
+#define FB_SUBCTL_SUBMUX                                    1
+#define FM_SUBCTL_SUBMUX                                    0x2
+
+#define FB_SUBCTL_SUBILMDIS                                 0
+#define FM_SUBCTL_SUBILMDIS                                 0x1
+
+// *** DCCTL ***
+#define FB_DCCTL_SUBDCBYP                                   7
+#define FM_DCCTL_SUBDCBYP                                   0x80
+
+#define FB_DCCTL_DACDCBYP                                   6
+#define FM_DCCTL_DACDCBYP                                   0x40
+
+#define FB_DCCTL_SPKDCBYP                                   5
+#define FM_DCCTL_SPKDCBYP                                   0x20
+
+#define FB_DCCTL_DCCOEFSEL                                  0
+#define FM_DCCTL_DCCOEFSEL                                  0x7
+
+// *** OVOLCTLU ***
+#define FB_OVOLCTLU_OFADE                                   4
+#define FM_OVOLCTLU_OFADE                                   0x10
+
+#define FB_OVOLCTLU_SUBVOLU                                 3
+#define FM_OVOLCTLU_SUBVOLU                                 0x8
+
+#define FB_OVOLCTLU_MVOLU                                   2
+#define FM_OVOLCTLU_MVOLU                                   0x4
+
+#define FB_OVOLCTLU_SPKVOLU                                 1
+#define FM_OVOLCTLU_SPKVOLU                                 0x2
+
+#define FB_OVOLCTLU_HPVOLU                                  0
+#define FM_OVOLCTLU_HPVOLU                                  0x1
+
+// *** MUTEC ***
+#define FB_MUTEC_ZDSTAT                                     7
+#define FM_MUTEC_ZDSTAT                                     0x80
+
+#define FB_MUTEC_ZDLEN                                      4
+#define FM_MUTEC_ZDLEN                                      0x30
+
+#define FB_MUTEC_APWD                                       3
+#define FM_MUTEC_APWD                                       0x8
+
+#define FB_MUTEC_AMUTE                                      2
+#define FM_MUTEC_AMUTE                                      0x4
+
+// *** MVOLL ***
+#define FB_MVOLL_MVOL_L                                     0
+#define FM_MVOLL_MVOL_L                                     0xFF
+
+// *** MVOLR ***
+#define FB_MVOLR_MVOL_R                                     0
+#define FM_MVOLR_MVOL_R                                     0xFF
+
+// *** HPVOLL ***
+#define FB_HPVOLL_HPVOL_L                                   0
+#define FM_HPVOLL_HPVOL_L                                   0x7F
+
+// *** HPVOLR ***
+#define FB_HPVOLR_HPVOL_R                                   0
+#define FM_HPVOLR_HPVOL_R                                   0x7F
+
+// *** SPKVOLL ***
+#define FB_SPKVOLL_SPKVOL_L                                 0
+#define FM_SPKVOLL_SPKVOL_L                                 0x7F
+
+// *** SPKVOLR ***
+#define FB_SPKVOLR_SPKVOL_R                                 0
+#define FM_SPKVOLR_SPKVOL_R                                 0x7F
+
+// *** SUBVOL ***
+#define FB_SUBVOL_SUBVOL                                    0
+#define FM_SUBVOL_SUBVOL                                    0x7F
+
+// *** COP0 ***
+#define FB_COP0_COPATTEN                                    7
+#define FM_COP0_COPATTEN                                    0x80
+
+#define FB_COP0_COPGAIN                                     6
+#define FM_COP0_COPGAIN                                     0x40
+
+#define FB_COP0_HDELTAEN                                    5
+#define FM_COP0_HDELTAEN                                    0x20
+
+#define FB_COP0_COPTARGET                                   0
+#define FM_COP0_COPTARGET                                   0x1F
+
+// *** COP1 ***
+#define FB_COP1_HDCOMPMODE                                  6
+#define FM_COP1_HDCOMPMODE                                  0x40
+
+#define FB_COP1_AVGLENGTH                                   2
+#define FM_COP1_AVGLENGTH                                   0x3C
+
+#define FB_COP1_MONRATE                                     0
+#define FM_COP1_MONRATE                                     0x3
+
+// *** COPSTAT ***
+#define FB_COPSTAT_HDELTADET                                7
+#define FM_COPSTAT_HDELTADET                                0x80
+
+#define FB_COPSTAT_UV                                       6
+#define FM_COPSTAT_UV                                       0x40
+
+#define FB_COPSTAT_COPADJ                                   0
+#define FM_COPSTAT_COPADJ                                   0x3F
+
+// *** PWM0 ***
+#define FB_PWM0_SCTO                                        6
+#define FM_PWM0_SCTO                                        0xC0
+
+#define FB_PWM0_UVLO                                        5
+#define FM_PWM0_UVLO                                        0x20
+
+#define FB_PWM0_BFDIS                                       3
+#define FM_PWM0_BFDIS                                       0x8
+
+#define FB_PWM0_PWMMODE                                     2
+#define FM_PWM0_PWMMODE                                     0x4
+
+#define FB_PWM0_NOOFFSET                                    0
+#define FM_PWM0_NOOFFSET                                    0x1
+
+// *** PWM1 ***
+#define FB_PWM1_DITHPOS                                     4
+#define FM_PWM1_DITHPOS                                     0x70
+
+#define FB_PWM1_DYNDITH                                     1
+#define FM_PWM1_DYNDITH                                     0x2
+
+#define FB_PWM1_DITHDIS                                     0
+#define FM_PWM1_DITHDIS                                     0x1
+
+// *** PWM2 ***
+// *** PWM3 ***
+#define FB_PWM3_PWMMUX                                      6
+#define FM_PWM3_PWMMUX                                      0xC0
+
+#define FB_PWM3_CVALUE                                      0
+#define FM_PWM3_CVALUE                                      0x7
+
+// *** HPSW ***
+#define FB_HPSW_HPDETSTATE                                  4
+#define FM_HPSW_HPDETSTATE                                  0x10
+
+#define FB_HPSW_HPSWEN                                      2
+#define FM_HPSW_HPSWEN                                      0xC
+
+#define FB_HPSW_HPSWPOL                                     1
+#define FM_HPSW_HPSWPOL                                     0x2
+
+#define FB_HPSW_TSDEN                                       0
+#define FM_HPSW_TSDEN                                       0x1
+
+// *** THERMTS ***
+#define FB_THERMTS_TRIPHS                                   7
+#define FM_THERMTS_TRIPHS                                   0x80
+
+#define FB_THERMTS_TRIPLS                                   6
+#define FM_THERMTS_TRIPLS                                   0x40
+
+#define FB_THERMTS_TRIPSPLIT                                4
+#define FM_THERMTS_TRIPSPLIT                                0x30
+
+#define FB_THERMTS_TRIPSHIFT                                2
+#define FM_THERMTS_TRIPSHIFT                                0xC
+
+#define FB_THERMTS_TSPOLL                                   0
+#define FM_THERMTS_TSPOLL                                   0x3
+
+// *** THERMSPK1 ***
+#define FB_THERMSPK1_FORCEPWD                               7
+#define FM_THERMSPK1_FORCEPWD                               0x80
+
+#define FB_THERMSPK1_INSTCUTMODE                            6
+#define FM_THERMSPK1_INSTCUTMODE                            0x40
+
+#define FB_THERMSPK1_INCRATIO                               4
+#define FM_THERMSPK1_INCRATIO                               0x30
+
+#define FB_THERMSPK1_INCSTEP                                2
+#define FM_THERMSPK1_INCSTEP                                0xC
+
+#define FB_THERMSPK1_DECSTEP                                0
+#define FM_THERMSPK1_DECSTEP                                0x3
+
+// *** THERMSTAT ***
+#define FB_THERMSTAT_FPWDS                                  7
+#define FM_THERMSTAT_FPWDS                                  0x80
+
+#define FB_THERMSTAT_VOLSTAT                                0
+#define FM_THERMSTAT_VOLSTAT                                0x7F
+
+// *** SCSTAT ***
+#define FB_SCSTAT_ESDF                                      3
+#define FM_SCSTAT_ESDF                                      0x18
+
+#define FB_SCSTAT_CPF                                       2
+#define FM_SCSTAT_CPF                                       0x4
+
+#define FB_SCSTAT_CLSDF                                     0
+#define FM_SCSTAT_CLSDF                                     0x3
+
+// *** SDMON ***
+#define FB_SDMON_SDFORCE                                    7
+#define FM_SDMON_SDFORCE                                    0x80
+
+#define FB_SDMON_SDVALUE                                    0
+#define FM_SDMON_SDVALUE                                    0x1F
+
+// *** SPKEQFILT ***
+#define FB_SPKEQFILT_EQ2EN                                  7
+#define FM_SPKEQFILT_EQ2EN                                  0x80
+#define FV_EQ2EN_ENABLE                                     0x80
+#define FV_EQ2EN_DISABLE                                    0x0
+
+#define FB_SPKEQFILT_EQ2BE                                  4
+#define FM_SPKEQFILT_EQ2BE                                  0x70
+
+#define FB_SPKEQFILT_EQ1EN                                  3
+#define FM_SPKEQFILT_EQ1EN                                  0x8
+#define FV_EQ1EN_ENABLE                                     0x8
+#define FV_EQ1EN_DISABLE                                    0x0
+
+#define FB_SPKEQFILT_EQ1BE                                  0
+#define FM_SPKEQFILT_EQ1BE                                  0x7
+
+#define SPKEQFILT_EQEN_ENABLE                               0x1
+#define SPKEQFILT_EQEN_DISABLE                              0x0
+
+// *** SPKCRWDL ***
+#define FB_SPKCRWDL_WDATA_L                                 0
+#define FM_SPKCRWDL_WDATA_L                                 0xFF
+
+// *** SPKCRWDM ***
+#define FB_SPKCRWDM_WDATA_M                                 0
+#define FM_SPKCRWDM_WDATA_M                                 0xFF
+
+// *** SPKCRWDH ***
+#define FB_SPKCRWDH_WDATA_H                                 0
+#define FM_SPKCRWDH_WDATA_H                                 0xFF
+
+// *** SPKCRRDL ***
+#define FB_SPKCRRDL_RDATA_L                                 0
+#define FM_SPKCRRDL_RDATA_L                                 0xFF
+
+// *** SPKCRRDM ***
+#define FB_SPKCRRDM_RDATA_M                                 0
+#define FM_SPKCRRDM_RDATA_M                                 0xFF
+
+// *** SPKCRRDH ***
+#define FB_SPKCRRDH_RDATA_H                                 0
+#define FM_SPKCRRDH_RDATA_H                                 0xFF
+
+// *** SPKCRADD ***
+#define FB_SPKCRADD_ADDRESS                                 0
+#define FM_SPKCRADD_ADDRESS                                 0xFF
+
+// *** SPKCRS ***
+#define FB_SPKCRS_ACCSTAT                                   7
+#define FM_SPKCRS_ACCSTAT                                   0x80
+
+// *** SPKMBCEN ***
+#define FB_SPKMBCEN_MBCEN3                                  2
+#define FM_SPKMBCEN_MBCEN3                                  0x4
+#define FV_MBCEN3_ENABLE                                    0x4
+#define FV_MBCEN3_DISABLE                                   0x0
+
+#define FB_SPKMBCEN_MBCEN2                                  1
+#define FM_SPKMBCEN_MBCEN2                                  0x2
+#define FV_MBCEN2_ENABLE                                    0x2
+#define FV_MBCEN2_DISABLE                                   0x0
+
+#define FB_SPKMBCEN_MBCEN1                                  0
+#define FM_SPKMBCEN_MBCEN1                                  0x1
+#define FV_MBCEN1_ENABLE                                    0x1
+#define FV_MBCEN1_DISABLE                                   0x0
+
+#define SPKMBCEN_MBCEN_ENABLE                               0x1
+#define SPKMBCEN_MBCEN_DISABLE                              0x0
+
+// *** SPKMBCCTL ***
+#define FB_SPKMBCCTL_LVLMODE3                               5
+#define FM_SPKMBCCTL_LVLMODE3                               0x20
+
+#define FB_SPKMBCCTL_WINSEL3                                4
+#define FM_SPKMBCCTL_WINSEL3                                0x10
+
+#define FB_SPKMBCCTL_LVLMODE2                               3
+#define FM_SPKMBCCTL_LVLMODE2                               0x8
+
+#define FB_SPKMBCCTL_WINSEL2                                2
+#define FM_SPKMBCCTL_WINSEL2                                0x4
+
+#define FB_SPKMBCCTL_LVLMODE1                               1
+#define FM_SPKMBCCTL_LVLMODE1                               0x2
+
+#define FB_SPKMBCCTL_WINSEL1                                0
+#define FM_SPKMBCCTL_WINSEL1                                0x1
+
+// *** SPKCLECTL ***
+#define FB_SPKCLECTL_LVLMODE                                4
+#define FM_SPKCLECTL_LVLMODE                                0x10
+
+#define FB_SPKCLECTL_WINSEL                                 3
+#define FM_SPKCLECTL_WINSEL                                 0x8
+
+#define FB_SPKCLECTL_EXPEN                                  2
+#define FM_SPKCLECTL_EXPEN                                  0x4
+#define FV_EXPEN_ENABLE                                     0x4
+#define FV_EXPEN_DISABLE                                    0x0
+
+#define FB_SPKCLECTL_LIMEN                                  1
+#define FM_SPKCLECTL_LIMEN                                  0x2
+#define FV_LIMEN_ENABLE                                     0x2
+#define FV_LIMEN_DISABLE                                    0x0
+
+#define FB_SPKCLECTL_COMPEN                                 0
+#define FM_SPKCLECTL_COMPEN                                 0x1
+#define FV_COMPEN_ENABLE                                    0x1
+#define FV_COMPEN_DISABLE                                   0x0
+
+// *** SPKCLEMUG ***
+#define FB_SPKCLEMUG_MUGAIN                                 0
+#define FM_SPKCLEMUG_MUGAIN                                 0x1F
+
+// *** SPKCOMPTHR ***
+#define FB_SPKCOMPTHR_THRESH                                0
+#define FM_SPKCOMPTHR_THRESH                                0xFF
+
+// *** SPKCOMPRAT ***
+#define FB_SPKCOMPRAT_RATIO                                 0
+#define FM_SPKCOMPRAT_RATIO                                 0x1F
+
+// *** SPKCOMPATKL ***
+#define FB_SPKCOMPATKL_TCATKL                               0
+#define FM_SPKCOMPATKL_TCATKL                               0xFF
+
+// *** SPKCOMPATKH ***
+#define FB_SPKCOMPATKH_TCATKH                               0
+#define FM_SPKCOMPATKH_TCATKH                               0xFF
+
+// *** SPKCOMPRELL ***
+#define FB_SPKCOMPRELL_TCRELL                               0
+#define FM_SPKCOMPRELL_TCRELL                               0xFF
+
+// *** SPKCOMPRELH ***
+#define FB_SPKCOMPRELH_TCRELH                               0
+#define FM_SPKCOMPRELH_TCRELH                               0xFF
+
+// *** SPKLIMTHR ***
+#define FB_SPKLIMTHR_THRESH                                 0
+#define FM_SPKLIMTHR_THRESH                                 0xFF
+
+// *** SPKLIMTGT ***
+#define FB_SPKLIMTGT_TARGET                                 0
+#define FM_SPKLIMTGT_TARGET                                 0xFF
+
+// *** SPKLIMATKL ***
+#define FB_SPKLIMATKL_TCATKL                                0
+#define FM_SPKLIMATKL_TCATKL                                0xFF
+
+// *** SPKLIMATKH ***
+#define FB_SPKLIMATKH_TCATKH                                0
+#define FM_SPKLIMATKH_TCATKH                                0xFF
+
+// *** SPKLIMRELL ***
+#define FB_SPKLIMRELL_TCRELL                                0
+#define FM_SPKLIMRELL_TCRELL                                0xFF
+
+// *** SPKLIMRELH ***
+#define FB_SPKLIMRELH_TCRELH                                0
+#define FM_SPKLIMRELH_TCRELH                                0xFF
+
+// *** SPKEXPTHR ***
+#define FB_SPKEXPTHR_THRESH                                 0
+#define FM_SPKEXPTHR_THRESH                                 0xFF
+
+// *** SPKEXPRAT ***
+#define FB_SPKEXPRAT_RATIO                                  0
+#define FM_SPKEXPRAT_RATIO                                  0x7
+
+// *** SPKEXPATKL ***
+#define FB_SPKEXPATKL_TCATKL                                0
+#define FM_SPKEXPATKL_TCATKL                                0xFF
+
+// *** SPKEXPATKH ***
+#define FB_SPKEXPATKH_TCATKH                                0
+#define FM_SPKEXPATKH_TCATKH                                0xFF
+
+// *** SPKEXPRELL ***
+#define FB_SPKEXPRELL_TCRELL                                0
+#define FM_SPKEXPRELL_TCRELL                                0xFF
+
+// *** SPKEXPRELH ***
+#define FB_SPKEXPRELH_TCRELH                                0
+#define FM_SPKEXPRELH_TCRELH                                0xFF
+
+// *** SPKFXCTL ***
+#define FB_SPKFXCTL_3DEN                                    4
+#define FM_SPKFXCTL_3DEN                                    0x10
+
+#define FB_SPKFXCTL_TEEN                                    3
+#define FM_SPKFXCTL_TEEN                                    0x8
+
+#define FB_SPKFXCTL_TNLFBYP                                 2
+#define FM_SPKFXCTL_TNLFBYP                                 0x4
+
+#define FB_SPKFXCTL_BEEN                                    1
+#define FM_SPKFXCTL_BEEN                                    0x2
+
+#define FB_SPKFXCTL_BNLFBYP                                 0
+#define FM_SPKFXCTL_BNLFBYP                                 0x1
+
+// *** DACEQFILT ***
+#define FB_DACEQFILT_EQ2EN                                  7
+#define FM_DACEQFILT_EQ2EN                                  0x80
+#define FV_EQ2EN_ENABLE                                     0x80
+#define FV_EQ2EN_DISABLE                                    0x0
+
+#define FB_DACEQFILT_EQ2BE                                  4
+#define FM_DACEQFILT_EQ2BE                                  0x70
+
+#define FB_DACEQFILT_EQ1EN                                  3
+#define FM_DACEQFILT_EQ1EN                                  0x8
+#define FV_EQ1EN_ENABLE                                     0x8
+#define FV_EQ1EN_DISABLE                                    0x0
+
+#define FB_DACEQFILT_EQ1BE                                  0
+#define FM_DACEQFILT_EQ1BE                                  0x7
+
+#define DACEQFILT_EQEN_ENABLE                               0x1
+#define DACEQFILT_EQEN_DISABLE                              0x0
+
+// *** DACCRWDL ***
+#define FB_DACCRWDL_WDATA_L                                 0
+#define FM_DACCRWDL_WDATA_L                                 0xFF
+
+// *** DACCRWDM ***
+#define FB_DACCRWDM_WDATA_M                                 0
+#define FM_DACCRWDM_WDATA_M                                 0xFF
+
+// *** DACCRWDH ***
+#define FB_DACCRWDH_WDATA_H                                 0
+#define FM_DACCRWDH_WDATA_H                                 0xFF
+
+// *** DACCRRDL ***
+#define FB_DACCRRDL_RDATA_L                                 0
+#define FM_DACCRRDL_RDATA_L                                 0xFF
+
+// *** DACCRRDM ***
+#define FB_DACCRRDM_RDATA_M                                 0
+#define FM_DACCRRDM_RDATA_M                                 0xFF
+
+// *** DACCRRDH ***
+#define FB_DACCRRDH_RDATA_H                                 0
+#define FM_DACCRRDH_RDATA_H                                 0xFF
+
+// *** DACCRADD ***
+#define FB_DACCRADD_ADDRESS                                 0
+#define FM_DACCRADD_ADDRESS                                 0xFF
+
+// *** DACCRS ***
+#define FB_DACCRS_ACCSTAT                                   7
+#define FM_DACCRS_ACCSTAT                                   0x80
+
+// *** DACMBCEN ***
+#define FB_DACMBCEN_MBCEN3                                  2
+#define FM_DACMBCEN_MBCEN3                                  0x4
+#define FV_MBCEN3_ENABLE                                    0x4
+#define FV_MBCEN3_DISABLE                                   0x0
+
+#define FB_DACMBCEN_MBCEN2                                  1
+#define FM_DACMBCEN_MBCEN2                                  0x2
+#define FV_MBCEN2_ENABLE                                    0x2
+#define FV_MBCEN2_DISABLE                                   0x0
+
+#define FB_DACMBCEN_MBCEN1                                  0
+#define FM_DACMBCEN_MBCEN1                                  0x1
+#define FV_MBCEN1_ENABLE                                    0x1
+#define FV_MBCEN1_DISABLE                                   0x0
+
+#define DACMBCEN_MBCEN_ENABLE                               0x1
+#define DACMBCEN_MBCEN_DISABLE                              0x0
+
+// *** DACMBCCTL ***
+#define FB_DACMBCCTL_LVLMODE3                               5
+#define FM_DACMBCCTL_LVLMODE3                               0x20
+
+#define FB_DACMBCCTL_WINSEL3                                4
+#define FM_DACMBCCTL_WINSEL3                                0x10
+
+#define FB_DACMBCCTL_LVLMODE2                               3
+#define FM_DACMBCCTL_LVLMODE2                               0x8
+
+#define FB_DACMBCCTL_WINSEL2                                2
+#define FM_DACMBCCTL_WINSEL2                                0x4
+
+#define FB_DACMBCCTL_LVLMODE1                               1
+#define FM_DACMBCCTL_LVLMODE1                               0x2
+
+#define FB_DACMBCCTL_WINSEL1                                0
+#define FM_DACMBCCTL_WINSEL1                                0x1
+
+// *** DACCLECTL ***
+#define FB_DACCLECTL_LVLMODE                                4
+#define FM_DACCLECTL_LVLMODE                                0x10
+
+#define FB_DACCLECTL_WINSEL                                 3
+#define FM_DACCLECTL_WINSEL                                 0x8
+
+#define FB_DACCLECTL_EXPEN                                  2
+#define FM_DACCLECTL_EXPEN                                  0x4
+#define FV_EXPEN_ENABLE                                     0x4
+#define FV_EXPEN_DISABLE                                    0x0
+
+#define FB_DACCLECTL_LIMEN                                  1
+#define FM_DACCLECTL_LIMEN                                  0x2
+#define FV_LIMEN_ENABLE                                     0x2
+#define FV_LIMEN_DISABLE                                    0x0
+
+#define FB_DACCLECTL_COMPEN                                 0
+#define FM_DACCLECTL_COMPEN                                 0x1
+#define FV_COMPEN_ENABLE                                    0x1
+#define FV_COMPEN_DISABLE                                   0x0
+
+// *** DACCLEMUG ***
+#define FB_DACCLEMUG_MUGAIN                                 0
+#define FM_DACCLEMUG_MUGAIN                                 0x1F
+
+// *** DACCOMPTHR ***
+#define FB_DACCOMPTHR_THRESH                                0
+#define FM_DACCOMPTHR_THRESH                                0xFF
+
+// *** DACCOMPRAT ***
+#define FB_DACCOMPRAT_RATIO                                 0
+#define FM_DACCOMPRAT_RATIO                                 0x1F
+
+// *** DACCOMPATKL ***
+#define FB_DACCOMPATKL_TCATKL                               0
+#define FM_DACCOMPATKL_TCATKL                               0xFF
+
+// *** DACCOMPATKH ***
+#define FB_DACCOMPATKH_TCATKH                               0
+#define FM_DACCOMPATKH_TCATKH                               0xFF
+
+// *** DACCOMPRELL ***
+#define FB_DACCOMPRELL_TCRELL                               0
+#define FM_DACCOMPRELL_TCRELL                               0xFF
+
+// *** DACCOMPRELH ***
+#define FB_DACCOMPRELH_TCRELH                               0
+#define FM_DACCOMPRELH_TCRELH                               0xFF
+
+// *** DACLIMTHR ***
+#define FB_DACLIMTHR_THRESH                                 0
+#define FM_DACLIMTHR_THRESH                                 0xFF
+
+// *** DACLIMTGT ***
+#define FB_DACLIMTGT_TARGET                                 0
+#define FM_DACLIMTGT_TARGET                                 0xFF
+
+// *** DACLIMATKL ***
+#define FB_DACLIMATKL_TCATKL                                0
+#define FM_DACLIMATKL_TCATKL                                0xFF
+
+// *** DACLIMATKH ***
+#define FB_DACLIMATKH_TCATKH                                0
+#define FM_DACLIMATKH_TCATKH                                0xFF
+
+// *** DACLIMRELL ***
+#define FB_DACLIMRELL_TCRELL                                0
+#define FM_DACLIMRELL_TCRELL                                0xFF
+
+// *** DACLIMRELH ***
+#define FB_DACLIMRELH_TCRELH                                0
+#define FM_DACLIMRELH_TCRELH                                0xFF
+
+// *** DACEXPTHR ***
+#define FB_DACEXPTHR_THRESH                                 0
+#define FM_DACEXPTHR_THRESH                                 0xFF
+
+// *** DACEXPRAT ***
+#define FB_DACEXPRAT_RATIO                                  0
+#define FM_DACEXPRAT_RATIO                                  0x7
+
+// *** DACEXPATKL ***
+#define FB_DACEXPATKL_TCATKL                                0
+#define FM_DACEXPATKL_TCATKL                                0xFF
+
+// *** DACEXPATKH ***
+#define FB_DACEXPATKH_TCATKH                                0
+#define FM_DACEXPATKH_TCATKH                                0xFF
+
+// *** DACEXPRELL ***
+#define FB_DACEXPRELL_TCRELL                                0
+#define FM_DACEXPRELL_TCRELL                                0xFF
+
+// *** DACEXPRELH ***
+#define FB_DACEXPRELH_TCRELH                                0
+#define FM_DACEXPRELH_TCRELH                                0xFF
+
+// *** DACFXCTL ***
+#define FB_DACFXCTL_3DEN                                    4
+#define FM_DACFXCTL_3DEN                                    0x10
+
+#define FB_DACFXCTL_TEEN                                    3
+#define FM_DACFXCTL_TEEN                                    0x8
+
+#define FB_DACFXCTL_TNLFBYP                                 2
+#define FM_DACFXCTL_TNLFBYP                                 0x4
+
+#define FB_DACFXCTL_BEEN                                    1
+#define FM_DACFXCTL_BEEN                                    0x2
+
+#define FB_DACFXCTL_BNLFBYP                                 0
+#define FM_DACFXCTL_BNLFBYP                                 0x1
+
+// *** SUBEQFILT ***
+#define FB_SUBEQFILT_EQ2EN                                  7
+#define FM_SUBEQFILT_EQ2EN                                  0x80
+#define FV_EQ2EN_ENABLE                                     0x80
+#define FV_EQ2EN_DISABLE                                    0x0
+
+#define FB_SUBEQFILT_EQ2BE                                  4
+#define FM_SUBEQFILT_EQ2BE                                  0x70
+
+#define FB_SUBEQFILT_EQ1EN                                  3
+#define FM_SUBEQFILT_EQ1EN                                  0x8
+#define FV_EQ1EN_ENABLE                                     0x8
+#define FV_EQ1EN_DISABLE                                    0x0
+
+#define FB_SUBEQFILT_EQ1BE                                  0
+#define FM_SUBEQFILT_EQ1BE                                  0x7
+
+#define SUBEQFILT_EQEN_ENABLE                               0x1
+#define SUBEQFILT_EQEN_DISABLE                              0x0
+
+// *** SUBCRWDL ***
+#define FB_SUBCRWDL_WDATA_L                                 0
+#define FM_SUBCRWDL_WDATA_L                                 0xFF
+
+// *** SUBCRWDM ***
+#define FB_SUBCRWDM_WDATA_M                                 0
+#define FM_SUBCRWDM_WDATA_M                                 0xFF
+
+// *** SUBCRWDH ***
+#define FB_SUBCRWDH_WDATA_H                                 0
+#define FM_SUBCRWDH_WDATA_H                                 0xFF
+
+// *** SUBCRRDL ***
+#define FB_SUBCRRDL_RDATA_L                                 0
+#define FM_SUBCRRDL_RDATA_L                                 0xFF
+
+// *** SUBCRRDM ***
+#define FB_SUBCRRDM_RDATA_M                                 0
+#define FM_SUBCRRDM_RDATA_M                                 0xFF
+
+// *** SUBCRRDH ***
+#define FB_SUBCRRDH_RDATA_H                                 0
+#define FM_SUBCRRDH_RDATA_H                                 0xFF
+
+// *** SUBCRADD ***
+#define FB_SUBCRADD_ADDRESS                                 0
+#define FM_SUBCRADD_ADDRESS                                 0xFF
+
+// *** SUBCRS ***
+#define FB_SUBCRS_ACCSTAT                                   7
+#define FM_SUBCRS_ACCSTAT                                   0x80
+
+// *** SUBMBCEN ***
+#define FB_SUBMBCEN_MBCEN3                                  2
+#define FM_SUBMBCEN_MBCEN3                                  0x4
+#define FV_MBCEN3_ENABLE                                    0x4
+#define FV_MBCEN3_DISABLE                                   0x0
+
+#define FB_SUBMBCEN_MBCEN2                                  1
+#define FM_SUBMBCEN_MBCEN2                                  0x2
+#define FV_MBCEN2_ENABLE                                    0x2
+#define FV_MBCEN2_DISABLE                                   0x0
+
+#define FB_SUBMBCEN_MBCEN1                                  0
+#define FM_SUBMBCEN_MBCEN1                                  0x1
+#define FV_MBCEN1_ENABLE                                    0x1
+#define FV_MBCEN1_DISABLE                                   0x0
+
+#define SUBMBCEN_MBCEN_ENABLE                               0x1
+#define SUBMBCEN_MBCEN_DISABLE                              0x0
+
+// *** SUBMBCCTL ***
+#define FB_SUBMBCCTL_LVLMODE3                               5
+#define FM_SUBMBCCTL_LVLMODE3                               0x20
+
+#define FB_SUBMBCCTL_WINSEL3                                4
+#define FM_SUBMBCCTL_WINSEL3                                0x10
+
+#define FB_SUBMBCCTL_LVLMODE2                               3
+#define FM_SUBMBCCTL_LVLMODE2                               0x8
+
+#define FB_SUBMBCCTL_WINSEL2                                2
+#define FM_SUBMBCCTL_WINSEL2                                0x4
+
+#define FB_SUBMBCCTL_LVLMODE1                               1
+#define FM_SUBMBCCTL_LVLMODE1                               0x2
+
+#define FB_SUBMBCCTL_WINSEL1                                0
+#define FM_SUBMBCCTL_WINSEL1                                0x1
+
+// *** SUBCLECTL ***
+#define FB_SUBCLECTL_LVLMODE                                4
+#define FM_SUBCLECTL_LVLMODE                                0x10
+
+#define FB_SUBCLECTL_WINSEL                                 3
+#define FM_SUBCLECTL_WINSEL                                 0x8
+
+#define FB_SUBCLECTL_EXPEN                                  2
+#define FM_SUBCLECTL_EXPEN                                  0x4
+#define FV_EXPEN_ENABLE                                     0x4
+#define FV_EXPEN_DISABLE                                    0x0
+
+#define FB_SUBCLECTL_LIMEN                                  1
+#define FM_SUBCLECTL_LIMEN                                  0x2
+#define FV_LIMEN_ENABLE                                     0x2
+#define FV_LIMEN_DISABLE                                    0x0
+
+#define FB_SUBCLECTL_COMPEN                                 0
+#define FM_SUBCLECTL_COMPEN                                 0x1
+#define FV_COMPEN_ENABLE                                    0x1
+#define FV_COMPEN_DISABLE                                   0x0
+
+// *** SUBCLEMUG ***
+#define FB_SUBCLEMUG_MUGAIN                                 0
+#define FM_SUBCLEMUG_MUGAIN                                 0x1F
+
+// *** SUBCOMPTHR ***
+#define FB_SUBCOMPTHR_THRESH                                0
+#define FM_SUBCOMPTHR_THRESH                                0xFF
+
+// *** SUBCOMPRAT ***
+#define FB_SUBCOMPRAT_RATIO                                 0
+#define FM_SUBCOMPRAT_RATIO                                 0x1F
+
+// *** SUBCOMPATKL ***
+#define FB_SUBCOMPATKL_TCATKL                               0
+#define FM_SUBCOMPATKL_TCATKL                               0xFF
+
+// *** SUBCOMPATKH ***
+#define FB_SUBCOMPATKH_TCATKH                               0
+#define FM_SUBCOMPATKH_TCATKH                               0xFF
+
+// *** SUBCOMPRELL ***
+#define FB_SUBCOMPRELL_TCRELL                               0
+#define FM_SUBCOMPRELL_TCRELL                               0xFF
+
+// *** SUBCOMPRELH ***
+#define FB_SUBCOMPRELH_TCRELH                               0
+#define FM_SUBCOMPRELH_TCRELH                               0xFF
+
+// *** SUBLIMTHR ***
+#define FB_SUBLIMTHR_THRESH                                 0
+#define FM_SUBLIMTHR_THRESH                                 0xFF
+
+// *** SUBLIMTGT ***
+#define FB_SUBLIMTGT_TARGET                                 0
+#define FM_SUBLIMTGT_TARGET                                 0xFF
+
+// *** SUBLIMATKL ***
+#define FB_SUBLIMATKL_TCATKL                                0
+#define FM_SUBLIMATKL_TCATKL                                0xFF
+
+// *** SUBLIMATKH ***
+#define FB_SUBLIMATKH_TCATKH                                0
+#define FM_SUBLIMATKH_TCATKH                                0xFF
+
+// *** SUBLIMRELL ***
+#define FB_SUBLIMRELL_TCRELL                                0
+#define FM_SUBLIMRELL_TCRELL                                0xFF
+
+// *** SUBLIMRELH ***
+#define FB_SUBLIMRELH_TCRELH                                0
+#define FM_SUBLIMRELH_TCRELH                                0xFF
+
+// *** SUBEXPTHR ***
+#define FB_SUBEXPTHR_THRESH                                 0
+#define FM_SUBEXPTHR_THRESH                                 0xFF
+
+// *** SUBEXPRAT ***
+#define FB_SUBEXPRAT_RATIO                                  0
+#define FM_SUBEXPRAT_RATIO                                  0x7
+
+// *** SUBEXPATKL ***
+#define FB_SUBEXPATKL_TCATKL                                0
+#define FM_SUBEXPATKL_TCATKL                                0xFF
+
+// *** SUBEXPATKH ***
+#define FB_SUBEXPATKH_TCATKH                                0
+#define FM_SUBEXPATKH_TCATKH                                0xFF
+
+// *** SUBEXPRELL ***
+#define FB_SUBEXPRELL_TCRELL                                0
+#define FM_SUBEXPRELL_TCRELL                                0xFF
+
+// *** SUBEXPRELH ***
+#define FB_SUBEXPRELH_TCRELH                                0
+#define FM_SUBEXPRELH_TCRELH                                0xFF
+
+// *** SUBFXCTL ***
+#define FB_SUBFXCTL_TEEN                                    3
+#define FM_SUBFXCTL_TEEN                                    0x8
+
+#define FB_SUBFXCTL_TNLFBYP                                 2
+#define FM_SUBFXCTL_TNLFBYP                                 0x4
+
+#define FB_SUBFXCTL_BEEN                                    1
+#define FM_SUBFXCTL_BEEN                                    0x2
+
+#define FB_SUBFXCTL_BNLFBYP                                 0
+#define FM_SUBFXCTL_BNLFBYP                                 0x1
+
+#endif /* __REDWOODPUBLIC_H__ */
diff --git a/sound/soc/codecs/wm2200.c b/sound/soc/codecs/wm2200.c
index d5f4bbf..3663b9f 100644
--- a/sound/soc/codecs/wm2200.c
+++ b/sound/soc/codecs/wm2200.c
@@ -1155,8 +1155,8 @@
 SND_SOC_BYTES_MASK("EQL Coefficients", WM2200_EQL_1, 20, WM2200_EQL_ENA),
 SND_SOC_BYTES_MASK("EQR Coefficients", WM2200_EQR_1, 20, WM2200_EQR_ENA),
 
-SND_SOC_BYTES("LHPF1 Coefficeints", WM2200_HPLPF1_2, 1),
-SND_SOC_BYTES("LHPF2 Coefficeints", WM2200_HPLPF2_2, 1),
+SND_SOC_BYTES("LHPF1 Coefficients", WM2200_HPLPF1_2, 1),
+SND_SOC_BYTES("LHPF2 Coefficients", WM2200_HPLPF2_2, 1),
 
 SOC_SINGLE("OUT1 High Performance Switch", WM2200_DAC_DIGITAL_VOLUME_1L,
 	   WM2200_OUT1_OSR_SHIFT, 1, 0),
diff --git a/sound/soc/codecs/wm5100.c b/sound/soc/codecs/wm5100.c
index 87f9a99..ba89d9d 100644
--- a/sound/soc/codecs/wm5100.c
+++ b/sound/soc/codecs/wm5100.c
@@ -573,10 +573,10 @@
 SND_SOC_BYTES_MASK("DRC Coefficients", WM5100_DRC1_CTRL1, 5,
 		   WM5100_DRCL_ENA | WM5100_DRCR_ENA),
 
-SND_SOC_BYTES("LHPF1 Coefficeints", WM5100_HPLPF1_2, 1),
-SND_SOC_BYTES("LHPF2 Coefficeints", WM5100_HPLPF2_2, 1),
-SND_SOC_BYTES("LHPF3 Coefficeints", WM5100_HPLPF3_2, 1),
-SND_SOC_BYTES("LHPF4 Coefficeints", WM5100_HPLPF4_2, 1),
+SND_SOC_BYTES("LHPF1 Coefficients", WM5100_HPLPF1_2, 1),
+SND_SOC_BYTES("LHPF2 Coefficients", WM5100_HPLPF2_2, 1),
+SND_SOC_BYTES("LHPF3 Coefficients", WM5100_HPLPF3_2, 1),
+SND_SOC_BYTES("LHPF4 Coefficients", WM5100_HPLPF4_2, 1),
 
 SOC_SINGLE("HPOUT1 High Performance Switch", WM5100_OUT_VOLUME_1L,
 	   WM5100_OUT1_OSR_SHIFT, 1, 0),
diff --git a/sound/soc/codecs/wm8782.c b/sound/soc/codecs/wm8782.c
index 7949703..317db9a 100644
--- a/sound/soc/codecs/wm8782.c
+++ b/sound/soc/codecs/wm8782.c
@@ -67,9 +67,18 @@
 			&soc_component_dev_wm8782, &wm8782_dai, 1);
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id wm8782_of_match[] = {
+	{ .compatible = "wlf,wm8782", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, wm8782_of_match);
+#endif
+
 static struct platform_driver wm8782_codec_driver = {
 	.driver = {
 		.name = "wm8782",
+		.of_match_table = of_match_ptr(wm8782_of_match),
 	},
 	.probe = wm8782_probe,
 };
diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c
index 20fdcae..f13ef33 100644
--- a/sound/soc/codecs/wm8904.c
+++ b/sound/soc/codecs/wm8904.c
@@ -596,7 +596,7 @@
 SOC_DOUBLE_R_TLV("Digital Capture Volume", WM8904_ADC_DIGITAL_VOLUME_LEFT,
 		 WM8904_ADC_DIGITAL_VOLUME_RIGHT, 1, 119, 0, digital_tlv),
 
-SOC_ENUM("Left Caputure Mode", lin_mode),
+SOC_ENUM("Left Capture Mode", lin_mode),
 SOC_ENUM("Right Capture Mode", rin_mode),
 
 /* No TLV since it depends on mode */
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 82b0927..2175dcc 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -627,22 +627,21 @@
 	if (!root)
 		goto err;
 
-	if (!debugfs_create_bool("booted", S_IRUGO, root, &dsp->booted))
+	if (!debugfs_create_bool("booted", 0444, root, &dsp->booted))
 		goto err;
 
-	if (!debugfs_create_bool("running", S_IRUGO, root, &dsp->running))
+	if (!debugfs_create_bool("running", 0444, root, &dsp->running))
 		goto err;
 
-	if (!debugfs_create_x32("fw_id", S_IRUGO, root, &dsp->fw_id))
+	if (!debugfs_create_x32("fw_id", 0444, root, &dsp->fw_id))
 		goto err;
 
-	if (!debugfs_create_x32("fw_version", S_IRUGO, root,
-				&dsp->fw_id_version))
+	if (!debugfs_create_x32("fw_version", 0444, root, &dsp->fw_id_version))
 		goto err;
 
 	for (i = 0; i < ARRAY_SIZE(wm_adsp_debugfs_fops); ++i) {
 		if (!debugfs_create_file(wm_adsp_debugfs_fops[i].name,
-					 S_IRUGO, root, dsp,
+					 0444, root, dsp,
 					 &wm_adsp_debugfs_fops[i].fops))
 			goto err;
 	}
@@ -2666,9 +2665,9 @@
 	dsp->preloaded = ucontrol->value.integer.value[0];
 
 	if (ucontrol->value.integer.value[0])
-		snd_soc_dapm_force_enable_pin(dapm, preload);
+		snd_soc_component_force_enable_pin(component, preload);
 	else
-		snd_soc_dapm_disable_pin(dapm, preload);
+		snd_soc_component_disable_pin(component, preload);
 
 	snd_soc_dapm_sync(dapm);
 
@@ -2852,11 +2851,11 @@
 
 int wm_adsp2_component_probe(struct wm_adsp *dsp, struct snd_soc_component *component)
 {
-	struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
 	char preload[32];
 
 	snprintf(preload, ARRAY_SIZE(preload), "DSP%d Preload", dsp->num);
-	snd_soc_dapm_disable_pin(dapm, preload);
+
+	snd_soc_component_disable_pin(component, preload);
 
 	wm_adsp2_init_debugfs(dsp, component);
 
diff --git a/sound/soc/davinci/Kconfig b/sound/soc/davinci/Kconfig
index 6b732d8..778faff 100644
--- a/sound/soc/davinci/Kconfig
+++ b/sound/soc/davinci/Kconfig
@@ -24,7 +24,7 @@
 
 config SND_DAVINCI_SOC_MCASP
 	tristate "Multichannel Audio Serial Port (McASP) support"
-	depends on SND_OMAP_SOC || SND_EDMA_SOC
+	depends on SND_SDMA_SOC || SND_EDMA_SOC
 	help
 	  Say Y or M here if you want to have support for McASP IP found in
 	  various Texas Instruments SoCs like:
diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c
index 03ba218..1f96c9d 100644
--- a/sound/soc/davinci/davinci-mcasp.c
+++ b/sound/soc/davinci/davinci-mcasp.c
@@ -36,9 +36,9 @@
 #include <sound/initval.h>
 #include <sound/soc.h>
 #include <sound/dmaengine_pcm.h>
-#include <sound/omap-pcm.h>
 
 #include "edma-pcm.h"
+#include "../omap/sdma-pcm.h"
 #include "davinci-mcasp.h"
 
 #define MCASP_MAX_AFIFO_DEPTH	64
@@ -789,7 +789,7 @@
 					rx_ser < max_active_serializers) {
 			mcasp_clr_bits(mcasp, DAVINCI_MCASP_PDIR_REG, AXR(i));
 			rx_ser++;
-		} else {
+		} else if (mcasp->serial_dir[i] == INACTIVE_MODE) {
 			mcasp_mod_bits(mcasp, DAVINCI_MCASP_XRSRCTL_REG(i),
 				       SRMOD_INACTIVE, SRMOD_MASK);
 		}
@@ -2048,10 +2048,10 @@
 #endif
 		break;
 	case PCM_SDMA:
-#if IS_BUILTIN(CONFIG_SND_OMAP_SOC) || \
+#if IS_BUILTIN(CONFIG_SND_SDMA_SOC) || \
 	(IS_MODULE(CONFIG_SND_DAVINCI_SOC_MCASP) && \
-	 IS_MODULE(CONFIG_SND_OMAP_SOC))
-		ret = omap_pcm_platform_register(&pdev->dev);
+	 IS_MODULE(CONFIG_SND_SDMA_SOC))
+		ret = sdma_pcm_platform_register(&pdev->dev, NULL, NULL);
 #else
 		dev_err(&pdev->dev, "Missing SND_SDMA_SOC\n");
 		ret = -EINVAL;
diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c
index da8fd98..8f43110 100644
--- a/sound/soc/fsl/fsl_esai.c
+++ b/sound/soc/fsl/fsl_esai.c
@@ -1,12 +1,8 @@
-/*
- * Freescale ESAI ALSA SoC Digital Audio Interface (DAI) driver
- *
- * Copyright (C) 2014 Freescale Semiconductor, Inc.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// Freescale ESAI ALSA SoC Digital Audio Interface (DAI) driver
+//
+// Copyright (C) 2014 Freescale Semiconductor, Inc.
 
 #include <linux/clk.h>
 #include <linux/dmaengine.h>
@@ -226,6 +222,12 @@
 	unsigned long clk_rate;
 	int ret;
 
+	if (freq == 0) {
+		dev_err(dai->dev, "%sput freq of HCK%c should not be 0Hz\n",
+			in ? "in" : "out", tx ? 'T' : 'R');
+		return -EINVAL;
+	}
+
 	/* Bypass divider settings if the requirement doesn't change */
 	if (freq == esai_priv->hck_rate[tx] && dir == esai_priv->hck_dir[tx])
 		return 0;
diff --git a/sound/soc/fsl/fsl_esai.h b/sound/soc/fsl/fsl_esai.h
index 5e793bb..f873588 100644
--- a/sound/soc/fsl/fsl_esai.h
+++ b/sound/soc/fsl/fsl_esai.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * fsl_esai.h - ALSA ESAI interface for the Freescale i.MX SoC
  *
  * Copyright (C) 2014 Freescale Semiconductor, Inc.
  *
  * Author: Nicolin Chen <Guangyu.Chen@freescale.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 #ifndef _FSL_ESAI_DAI_H
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index 18e5ce8..4163f2c 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -1,14 +1,8 @@
-/*
- * Freescale ALSA SoC Digital Audio Interface (SAI) driver.
- *
- * Copyright 2012-2015 Freescale Semiconductor, Inc.
- *
- * This program is free software, you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation, either version 2 of the License, or(at your
- * option) any later version.
- *
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Freescale ALSA SoC Digital Audio Interface (SAI) driver.
+//
+// Copyright 2012-2015 Freescale Semiconductor, Inc.
 
 #include <linux/clk.h>
 #include <linux/delay.h>
diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h
index d9ed7be..24cb156 100644
--- a/sound/soc/fsl/fsl_sai.h
+++ b/sound/soc/fsl/fsl_sai.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright 2012-2013 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __FSL_SAI_H
diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c
index 4f7469c..9b59d87 100644
--- a/sound/soc/fsl/fsl_spdif.c
+++ b/sound/soc/fsl/fsl_spdif.c
@@ -1,17 +1,13 @@
-/*
- * Freescale S/PDIF ALSA SoC Digital Audio Interface (DAI) driver
- *
- * Copyright (C) 2013 Freescale Semiconductor, Inc.
- *
- * Based on stmp3xxx_spdif_dai.c
- * Vladimir Barinov <vbarinov@embeddedalley.com>
- * Copyright 2008 SigmaTel, Inc
- * Copyright 2008 Embedded Alley Solutions, Inc
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2.  This program  is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// Freescale S/PDIF ALSA SoC Digital Audio Interface (DAI) driver
+//
+// Copyright (C) 2013 Freescale Semiconductor, Inc.
+//
+// Based on stmp3xxx_spdif_dai.c
+// Vladimir Barinov <vbarinov@embeddedalley.com>
+// Copyright 2008 SigmaTel, Inc
+// Copyright 2008 Embedded Alley Solutions, Inc
 
 #include <linux/bitrev.h>
 #include <linux/clk.h>
diff --git a/sound/soc/fsl/fsl_spdif.h b/sound/soc/fsl/fsl_spdif.h
index 00bd351..7666dab 100644
--- a/sound/soc/fsl/fsl_spdif.h
+++ b/sound/soc/fsl/fsl_spdif.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * fsl_spdif.h - ALSA S/PDIF interface for the Freescale i.MX SoC
  *
@@ -8,10 +9,6 @@
  * Based on fsl_ssi.h
  * Author: Timur Tabi <timur@freescale.com>
  * Copyright 2007-2008 Freescale Semiconductor, Inc.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2.  This program  is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 #ifndef _FSL_SPDIF_DAI_H
diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
index 89df2d9..0a64822 100644
--- a/sound/soc/fsl/fsl_ssi.c
+++ b/sound/soc/fsl/fsl_ssi.c
@@ -1,34 +1,29 @@
-/*
- * Freescale SSI ALSA SoC Digital Audio Interface (DAI) driver
- *
- * Author: Timur Tabi <timur@freescale.com>
- *
- * Copyright 2007-2010 Freescale Semiconductor, Inc.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2.  This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- *
- *
- * Some notes why imx-pcm-fiq is used instead of DMA on some boards:
- *
- * The i.MX SSI core has some nasty limitations in AC97 mode. While most
- * sane processor vendors have a FIFO per AC97 slot, the i.MX has only
- * one FIFO which combines all valid receive slots. We cannot even select
- * which slots we want to receive. The WM9712 with which this driver
- * was developed with always sends GPIO status data in slot 12 which
- * we receive in our (PCM-) data stream. The only chance we have is to
- * manually skip this data in the FIQ handler. With sampling rates different
- * from 48000Hz not every frame has valid receive data, so the ratio
- * between pcm data and GPIO status data changes. Our FIQ handler is not
- * able to handle this, hence this driver only works with 48000Hz sampling
- * rate.
- * Reading and writing AC97 registers is another challenge. The core
- * provides us status bits when the read register is updated with *another*
- * value. When we read the same register two times (and the register still
- * contains the same value) these status bits are not set. We work
- * around this by not polling these bits but only wait a fixed delay.
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// Freescale SSI ALSA SoC Digital Audio Interface (DAI) driver
+//
+// Author: Timur Tabi <timur@freescale.com>
+//
+// Copyright 2007-2010 Freescale Semiconductor, Inc.
+//
+// Some notes why imx-pcm-fiq is used instead of DMA on some boards:
+//
+// The i.MX SSI core has some nasty limitations in AC97 mode. While most
+// sane processor vendors have a FIFO per AC97 slot, the i.MX has only
+// one FIFO which combines all valid receive slots. We cannot even select
+// which slots we want to receive. The WM9712 with which this driver
+// was developed with always sends GPIO status data in slot 12 which
+// we receive in our (PCM-) data stream. The only chance we have is to
+// manually skip this data in the FIQ handler. With sampling rates different
+// from 48000Hz not every frame has valid receive data, so the ratio
+// between pcm data and GPIO status data changes. Our FIQ handler is not
+// able to handle this, hence this driver only works with 48000Hz sampling
+// rate.
+// Reading and writing AC97 registers is another challenge. The core
+// provides us status bits when the read register is updated with *another*
+// value. When we read the same register two times (and the register still
+// contains the same value) these status bits are not set. We work
+// around this by not polling these bits but only wait a fixed delay.
 
 #include <linux/init.h>
 #include <linux/io.h>
@@ -385,8 +380,7 @@
 {
 	struct fsl_ssi *ssi = dev_id;
 	struct regmap *regs = ssi->regs;
-	__be32 sisr;
-	__be32 sisr2;
+	u32 sisr, sisr2;
 
 	regmap_read(regs, REG_SSI_SISR, &sisr);
 
diff --git a/sound/soc/fsl/fsl_ssi.h b/sound/soc/fsl/fsl_ssi.h
index 18f8dd5..0bdda60 100644
--- a/sound/soc/fsl/fsl_ssi.h
+++ b/sound/soc/fsl/fsl_ssi.h
@@ -1,12 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * fsl_ssi.h - ALSA SSI interface for the Freescale MPC8610 and i.MX SoC
  *
  * Author: Timur Tabi <timur@freescale.com>
  *
- * Copyright 2007-2008 Freescale Semiconductor, Inc.  This file is licensed
- * under the terms of the GNU General Public License version 2.  This
- * program is licensed "as is" without any warranty of any kind, whether
- * express or implied.
+ * Copyright 2007-2008 Freescale Semiconductor, Inc.
  */
 
 #ifndef _MPC8610_I2S_H
diff --git a/sound/soc/fsl/fsl_ssi_dbg.c b/sound/soc/fsl/fsl_ssi_dbg.c
index 7aac63e..1255dfe 100644
--- a/sound/soc/fsl/fsl_ssi_dbg.c
+++ b/sound/soc/fsl/fsl_ssi_dbg.c
@@ -1,14 +1,10 @@
-/*
- * Freescale SSI ALSA SoC Digital Audio Interface (DAI) debugging functions
- *
- * Copyright 2014 Markus Pargmann <mpa@pengutronix.de>, Pengutronix
- *
- * Splitted from fsl_ssi.c
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2.  This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
+// SPDX-License-Identifier: GPL-2.0
+//
+// Freescale SSI ALSA SoC Digital Audio Interface (DAI) debugging functions
+//
+// Copyright 2014 Markus Pargmann <mpa@pengutronix.de>, Pengutronix
+//
+// Split from fsl_ssi.c
 
 #include <linux/debugfs.h>
 #include <linux/device.h>
@@ -146,7 +142,7 @@
 	if (!ssi_dbg->dbg_dir)
 		return -ENOMEM;
 
-	ssi_dbg->dbg_stats = debugfs_create_file("stats", S_IRUGO,
+	ssi_dbg->dbg_stats = debugfs_create_file("stats", 0444,
 						 ssi_dbg->dbg_dir, ssi_dbg,
 						 &fsl_ssi_stats_ops);
 	if (!ssi_dbg->dbg_stats) {
diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index 6959a74..4a516c4 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -135,6 +135,18 @@
 	asoc_simple_card_clk_disable(&dai_props->codec_dai);
 }
 
+static int asoc_simple_set_clk_rate(struct asoc_simple_dai *simple_dai,
+				    unsigned long rate)
+{
+	if (!simple_dai->clk)
+		return 0;
+
+	if (clk_get_rate(simple_dai->clk) == rate)
+		return 0;
+
+	return clk_set_rate(simple_dai->clk, rate);
+}
+
 static int asoc_simple_card_hw_params(struct snd_pcm_substream *substream,
 				      struct snd_pcm_hw_params *params)
 {
@@ -154,6 +166,15 @@
 
 	if (mclk_fs) {
 		mclk = params_rate(params) * mclk_fs;
+
+		ret = asoc_simple_set_clk_rate(&dai_props->codec_dai, mclk);
+		if (ret < 0)
+			return ret;
+
+		ret = asoc_simple_set_clk_rate(&dai_props->cpu_dai, mclk);
+		if (ret < 0)
+			return ret;
+
 		ret = snd_soc_dai_set_sysclk(codec_dai, 0, mclk,
 					     SND_SOC_CLOCK_IN);
 		if (ret && ret != -ENOTSUPP)
diff --git a/sound/soc/hisilicon/hi6210-i2s.c b/sound/soc/hisilicon/hi6210-i2s.c
index 07a5720..53344a3 100644
--- a/sound/soc/hisilicon/hi6210-i2s.c
+++ b/sound/soc/hisilicon/hi6210-i2s.c
@@ -498,7 +498,7 @@
 			hi6210_i2s_txctrl(cpu_dai, 0);
 		break;
 	default:
-		dev_err(cpu_dai->dev, "uknown cmd\n");
+		dev_err(cpu_dai->dev, "unknown cmd\n");
 		return -EINVAL;
 	}
 	return 0;
diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig
index addac2a..0caa1f4 100644
--- a/sound/soc/intel/Kconfig
+++ b/sound/soc/intel/Kconfig
@@ -61,7 +61,7 @@
 
 config SND_SOC_INTEL_BAYTRAIL
 	tristate "Baytrail (legacy) Platforms"
-	depends on DMADEVICES && ACPI
+	depends on DMADEVICES && ACPI && SND_SST_ATOM_HIFI2_PLATFORM=n
 	select SND_SOC_INTEL_SST
 	select SND_SOC_INTEL_SST_ACPI
 	select SND_SOC_INTEL_SST_FIRMWARE
diff --git a/sound/soc/intel/boards/bxt_da7219_max98357a.c b/sound/soc/intel/boards/bxt_da7219_max98357a.c
index 668c093..40eb979 100644
--- a/sound/soc/intel/boards/bxt_da7219_max98357a.c
+++ b/sound/soc/intel/boards/bxt_da7219_max98357a.c
@@ -571,7 +571,7 @@
 {
 	struct bxt_card_private *ctx;
 
-	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_ATOMIC);
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
diff --git a/sound/soc/intel/boards/bxt_rt298.c b/sound/soc/intel/boards/bxt_rt298.c
index c7e9024..b68c289 100644
--- a/sound/soc/intel/boards/bxt_rt298.c
+++ b/sound/soc/intel/boards/bxt_rt298.c
@@ -593,7 +593,7 @@
 		}
 	}
 
-	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_ATOMIC);
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
diff --git a/sound/soc/intel/boards/byt-max98090.c b/sound/soc/intel/boards/byt-max98090.c
index 0f8b820..f128363 100644
--- a/sound/soc/intel/boards/byt-max98090.c
+++ b/sound/soc/intel/boards/byt-max98090.c
@@ -151,7 +151,7 @@
 	struct byt_max98090_private *priv;
 	int ret_val;
 
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_ATOMIC);
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv) {
 		dev_err(&pdev->dev, "allocation failed\n");
 		return -ENOMEM;
diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c
index 305e7f4..adc26df 100644
--- a/sound/soc/intel/boards/bytcht_es8316.c
+++ b/sound/soc/intel/boards/bytcht_es8316.c
@@ -243,7 +243,7 @@
 	int i;
 	int ret = 0;
 
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_ATOMIC);
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index a8d8bff..33065ba 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -17,6 +17,7 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
+#include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -25,6 +26,7 @@
 #include <linux/clk.h>
 #include <linux/device.h>
 #include <linux/dmi.h>
+#include <linux/input.h>
 #include <linux/slab.h>
 #include <asm/cpu_device_id.h>
 #include <asm/platform_sst_audio.h>
@@ -33,6 +35,7 @@
 #include <sound/soc.h>
 #include <sound/jack.h>
 #include <sound/soc-acpi.h>
+#include <dt-bindings/sound/rt5640.h>
 #include "../../codecs/rt5640.h"
 #include "../atom/sst-atom-controls.h"
 #include "../common/sst-dsp.h"
@@ -44,17 +47,53 @@
 	BYT_RT5640_IN3_MAP,
 };
 
-#define BYT_RT5640_MAP(quirk)	((quirk) &  GENMASK(7, 0))
-#define BYT_RT5640_DMIC_EN	BIT(16)
-#define BYT_RT5640_MONO_SPEAKER BIT(17)
-#define BYT_RT5640_DIFF_MIC     BIT(18) /* defaut is single-ended */
-#define BYT_RT5640_SSP2_AIF2    BIT(19) /* default is using AIF1  */
-#define BYT_RT5640_SSP0_AIF1    BIT(20)
-#define BYT_RT5640_SSP0_AIF2    BIT(21)
-#define BYT_RT5640_MCLK_EN	BIT(22)
-#define BYT_RT5640_MCLK_25MHZ	BIT(23)
+enum {
+	BYT_RT5640_JD_SRC_GPIO1		= (RT5640_JD_SRC_GPIO1 << 4),
+	BYT_RT5640_JD_SRC_JD1_IN4P	= (RT5640_JD_SRC_JD1_IN4P << 4),
+	BYT_RT5640_JD_SRC_JD2_IN4N	= (RT5640_JD_SRC_JD2_IN4N << 4),
+	BYT_RT5640_JD_SRC_GPIO2		= (RT5640_JD_SRC_GPIO2 << 4),
+	BYT_RT5640_JD_SRC_GPIO3		= (RT5640_JD_SRC_GPIO3 << 4),
+	BYT_RT5640_JD_SRC_GPIO4		= (RT5640_JD_SRC_GPIO4 << 4),
+};
+
+enum {
+	BYT_RT5640_OVCD_TH_600UA	= (6 << 8),
+	BYT_RT5640_OVCD_TH_1500UA	= (15 << 8),
+	BYT_RT5640_OVCD_TH_2000UA	= (20 << 8),
+};
+
+enum {
+	BYT_RT5640_OVCD_SF_0P5		= (RT5640_OVCD_SF_0P5 << 13),
+	BYT_RT5640_OVCD_SF_0P75		= (RT5640_OVCD_SF_0P75 << 13),
+	BYT_RT5640_OVCD_SF_1P0		= (RT5640_OVCD_SF_1P0 << 13),
+	BYT_RT5640_OVCD_SF_1P5		= (RT5640_OVCD_SF_1P5 << 13),
+};
+
+#define BYT_RT5640_MAP(quirk)		((quirk) &  GENMASK(3, 0))
+#define BYT_RT5640_JDSRC(quirk)		(((quirk) & GENMASK(7, 4)) >> 4)
+#define BYT_RT5640_OVCD_TH(quirk)	(((quirk) & GENMASK(12, 8)) >> 8)
+#define BYT_RT5640_OVCD_SF(quirk)	(((quirk) & GENMASK(14, 13)) >> 13)
+#define BYT_RT5640_JD_NOT_INV		BIT(16)
+#define BYT_RT5640_MONO_SPEAKER		BIT(17)
+#define BYT_RT5640_DIFF_MIC		BIT(18) /* default is single-ended */
+#define BYT_RT5640_SSP2_AIF2		BIT(19) /* default is using AIF1  */
+#define BYT_RT5640_SSP0_AIF1		BIT(20)
+#define BYT_RT5640_SSP0_AIF2		BIT(21)
+#define BYT_RT5640_MCLK_EN		BIT(22)
+#define BYT_RT5640_MCLK_25MHZ		BIT(23)
+
+#define BYTCR_INPUT_DEFAULTS				\
+	(BYT_RT5640_IN3_MAP |				\
+	 BYT_RT5640_JD_SRC_JD1_IN4P |			\
+	 BYT_RT5640_OVCD_TH_2000UA |			\
+	 BYT_RT5640_OVCD_SF_0P75 |			\
+	 BYT_RT5640_DIFF_MIC)
+
+/* in-diff or dmic-pin + jdsrc + ovcd-th + -sf + jd-inv + terminating entry */
+#define MAX_NO_PROPS 6
 
 struct byt_rt5640_private {
+	struct snd_soc_jack jack;
 	struct clk *mclk;
 };
 static bool is_bytcr;
@@ -67,7 +106,6 @@
 static void log_quirks(struct device *dev)
 {
 	int map;
-	bool has_dmic = false;
 	bool has_mclk = false;
 	bool has_ssp0 = false;
 	bool has_ssp0_aif1 = false;
@@ -78,11 +116,9 @@
 	switch (map) {
 	case BYT_RT5640_DMIC1_MAP:
 		dev_info(dev, "quirk DMIC1_MAP enabled\n");
-		has_dmic = true;
 		break;
 	case BYT_RT5640_DMIC2_MAP:
 		dev_info(dev, "quirk DMIC2_MAP enabled\n");
-		has_dmic = true;
 		break;
 	case BYT_RT5640_IN1_MAP:
 		dev_info(dev, "quirk IN1_MAP enabled\n");
@@ -94,20 +130,20 @@
 		dev_err(dev, "quirk map 0x%x is not supported, microphone input will not work\n", map);
 		break;
 	}
-	if (byt_rt5640_quirk & BYT_RT5640_DMIC_EN) {
-		if (has_dmic)
-			dev_info(dev, "quirk DMIC enabled\n");
-		else
-			dev_err(dev, "quirk DMIC enabled but no DMIC input set, will be ignored\n");
+	if (BYT_RT5640_JDSRC(byt_rt5640_quirk)) {
+		dev_info(dev, "quirk realtek,jack-detect-source %ld\n",
+			 BYT_RT5640_JDSRC(byt_rt5640_quirk));
+		dev_info(dev, "quirk realtek,over-current-threshold-microamp %ld\n",
+			 BYT_RT5640_OVCD_TH(byt_rt5640_quirk) * 100);
+		dev_info(dev, "quirk realtek,over-current-scale-factor %ld\n",
+			 BYT_RT5640_OVCD_SF(byt_rt5640_quirk));
 	}
+	if (byt_rt5640_quirk & BYT_RT5640_JD_NOT_INV)
+		dev_info(dev, "quirk JD_NOT_INV enabled\n");
 	if (byt_rt5640_quirk & BYT_RT5640_MONO_SPEAKER)
 		dev_info(dev, "quirk MONO_SPEAKER enabled\n");
-	if (byt_rt5640_quirk & BYT_RT5640_DIFF_MIC) {
-		if (!has_dmic)
-			dev_info(dev, "quirk DIFF_MIC enabled\n");
-		else
-			dev_info(dev, "quirk DIFF_MIC enabled but DMIC input selected, will be ignored\n");
-	}
+	if (byt_rt5640_quirk & BYT_RT5640_DIFF_MIC)
+		dev_info(dev, "quirk DIFF_MIC enabled\n");
 	if (byt_rt5640_quirk & BYT_RT5640_SSP0_AIF1) {
 		dev_info(dev, "quirk SSP0_AIF1 enabled\n");
 		has_ssp0 = true;
@@ -141,6 +177,52 @@
 	}
 }
 
+static int byt_rt5640_prepare_and_enable_pll1(struct snd_soc_dai *codec_dai,
+					      int rate)
+{
+	int ret;
+
+	/* Configure the PLL before selecting it */
+	if (!(byt_rt5640_quirk & BYT_RT5640_MCLK_EN)) {
+		/* use bitclock as PLL input */
+		if ((byt_rt5640_quirk & BYT_RT5640_SSP0_AIF1) ||
+		    (byt_rt5640_quirk & BYT_RT5640_SSP0_AIF2)) {
+			/* 2x16 bit slots on SSP0 */
+			ret = snd_soc_dai_set_pll(codec_dai, 0,
+						  RT5640_PLL1_S_BCLK1,
+						  rate * 32, rate * 512);
+		} else {
+			/* 2x15 bit slots on SSP2 */
+			ret = snd_soc_dai_set_pll(codec_dai, 0,
+						  RT5640_PLL1_S_BCLK1,
+						  rate * 50, rate * 512);
+		}
+	} else {
+		if (byt_rt5640_quirk & BYT_RT5640_MCLK_25MHZ) {
+			ret = snd_soc_dai_set_pll(codec_dai, 0,
+						  RT5640_PLL1_S_MCLK,
+						  25000000, rate * 512);
+		} else {
+			ret = snd_soc_dai_set_pll(codec_dai, 0,
+						  RT5640_PLL1_S_MCLK,
+						  19200000, rate * 512);
+		}
+	}
+
+	if (ret < 0) {
+		dev_err(codec_dai->component->dev, "can't set pll: %d\n", ret);
+		return ret;
+	}
+
+	ret = snd_soc_dai_set_sysclk(codec_dai, RT5640_SCLK_S_PLL1,
+				     rate * 512, SND_SOC_CLOCK_IN);
+	if (ret < 0) {
+		dev_err(codec_dai->component->dev, "can't set clock %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
 
 #define BYT_CODEC_DAI1	"rt5640-aif1"
 #define BYT_CODEC_DAI2	"rt5640-aif2"
@@ -173,9 +255,7 @@
 				return ret;
 			}
 		}
-		ret = snd_soc_dai_set_sysclk(codec_dai, RT5640_SCLK_S_PLL1,
-					     48000 * 512,
-					     SND_SOC_CLOCK_IN);
+		ret = byt_rt5640_prepare_and_enable_pll1(codec_dai, 48000);
 	} else {
 		/*
 		 * Set codec clock source to internal clock before
@@ -295,129 +375,41 @@
 	SOC_DAPM_PIN_SWITCH("Speaker"),
 };
 
+static struct snd_soc_jack_pin rt5640_pins[] = {
+	{
+		.pin	= "Headphone",
+		.mask	= SND_JACK_HEADPHONE,
+	},
+	{
+		.pin	= "Headset Mic",
+		.mask	= SND_JACK_MICROPHONE,
+	},
+};
+
 static int byt_rt5640_aif1_hw_params(struct snd_pcm_substream *substream,
 					struct snd_pcm_hw_params *params)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_dai *codec_dai = rtd->codec_dai;
-	int ret;
+	struct snd_soc_dai *dai = rtd->codec_dai;
 
-	ret = snd_soc_dai_set_sysclk(codec_dai, RT5640_SCLK_S_PLL1,
-				     params_rate(params) * 512,
-				     SND_SOC_CLOCK_IN);
-
-	if (ret < 0) {
-		dev_err(rtd->dev, "can't set codec clock %d\n", ret);
-		return ret;
-	}
-
-	if (!(byt_rt5640_quirk & BYT_RT5640_MCLK_EN)) {
-		/* use bitclock as PLL input */
-		if ((byt_rt5640_quirk & BYT_RT5640_SSP0_AIF1) ||
-			(byt_rt5640_quirk & BYT_RT5640_SSP0_AIF2)) {
-
-			/* 2x16 bit slots on SSP0 */
-			ret = snd_soc_dai_set_pll(codec_dai, 0,
-						RT5640_PLL1_S_BCLK1,
-						params_rate(params) * 32,
-						params_rate(params) * 512);
-		} else {
-			/* 2x15 bit slots on SSP2 */
-			ret = snd_soc_dai_set_pll(codec_dai, 0,
-						RT5640_PLL1_S_BCLK1,
-						params_rate(params) * 50,
-						params_rate(params) * 512);
-		}
-	} else {
-		if (byt_rt5640_quirk & BYT_RT5640_MCLK_25MHZ) {
-			ret = snd_soc_dai_set_pll(codec_dai, 0,
-						RT5640_PLL1_S_MCLK,
-						25000000,
-						params_rate(params) * 512);
-		} else {
-			ret = snd_soc_dai_set_pll(codec_dai, 0,
-						RT5640_PLL1_S_MCLK,
-						19200000,
-						params_rate(params) * 512);
-		}
-	}
-
-	if (ret < 0) {
-		dev_err(rtd->dev, "can't set codec pll: %d\n", ret);
-		return ret;
-	}
-
-	return 0;
+	return byt_rt5640_prepare_and_enable_pll1(dai, params_rate(params));
 }
 
-static int byt_rt5640_quirk_cb(const struct dmi_system_id *id)
-{
-	byt_rt5640_quirk = (unsigned long)id->driver_data;
-	return 1;
-}
-
+/* Please keep this list alphabetically sorted */
 static const struct dmi_system_id byt_rt5640_quirk_table[] = {
-	{
-		.callback = byt_rt5640_quirk_cb,
+	{	/* Acer Iconia Tab 8 W1-810 */
 		.matches = {
-			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"),
-		},
-		.driver_data = (void *)(BYT_RT5640_IN1_MAP |
-					BYT_RT5640_MCLK_EN),
-	},
-	{
-		.callback = byt_rt5640_quirk_cb,
-		.matches = {
-			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TAF"),
-		},
-		.driver_data = (void *)(BYT_RT5640_IN1_MAP |
-					BYT_RT5640_MONO_SPEAKER |
-					BYT_RT5640_DIFF_MIC |
-					BYT_RT5640_SSP0_AIF2 |
-					BYT_RT5640_MCLK_EN),
-	},
-	{
-		.callback = byt_rt5640_quirk_cb,
-		.matches = {
-			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "DellInc."),
-			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Venue 8 Pro 5830"),
-		},
-		.driver_data = (void *)(BYT_RT5640_DMIC2_MAP |
-					BYT_RT5640_DMIC_EN |
-					BYT_RT5640_MCLK_EN),
-	},
-	{
-		.callback = byt_rt5640_quirk_cb,
-		.matches = {
-			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
-			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HP ElitePad 1000 G2"),
-		},
-		.driver_data = (void *)(BYT_RT5640_IN1_MAP |
-					BYT_RT5640_MCLK_EN),
-	},
-	{
-		.callback = byt_rt5640_quirk_cb,
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Circuitco"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "Minnowboard Max B3 PLATFORM"),
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Iconia W1-810"),
 		},
 		.driver_data = (void *)(BYT_RT5640_DMIC1_MAP |
-					BYT_RT5640_DMIC_EN),
+					BYT_RT5640_JD_SRC_JD1_IN4P |
+					BYT_RT5640_OVCD_TH_2000UA |
+					BYT_RT5640_OVCD_SF_0P75 |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
 	},
 	{
-		.callback = byt_rt5640_quirk_cb,
-		.matches = {
-			DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"),
-			DMI_MATCH(DMI_BOARD_NAME, "tPAD"),
-		},
-		.driver_data = (void *)(BYT_RT5640_IN3_MAP |
-					BYT_RT5640_MCLK_EN |
-					BYT_RT5640_SSP0_AIF1),
-	},
-	{
-		.callback = byt_rt5640_quirk_cb,
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW5-012"),
@@ -428,11 +420,207 @@
 
 	},
 	{
-		.callback = byt_rt5640_quirk_cb,
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ARCHOS 80 Cesium"),
+		},
+		.driver_data = (void *)(BYTCR_INPUT_DEFAULTS |
+					BYT_RT5640_MONO_SPEAKER |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"),
+		},
+		.driver_data = (void *)(BYT_RT5640_IN1_MAP |
+					BYT_RT5640_JD_SRC_JD2_IN4N |
+					BYT_RT5640_OVCD_TH_2000UA |
+					BYT_RT5640_OVCD_SF_0P75 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TAF"),
+		},
+		.driver_data = (void *)(BYT_RT5640_IN1_MAP |
+					BYT_RT5640_MONO_SPEAKER |
+					BYT_RT5640_DIFF_MIC |
+					BYT_RT5640_SSP0_AIF2 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{	/* Chuwi Vi8 (CWI506) */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "i86"),
+			/* The above are too generic, also match BIOS info */
+			DMI_MATCH(DMI_BIOS_VERSION, "CHUWI.D86JLBNR"),
+		},
+		.driver_data = (void *)(BYTCR_INPUT_DEFAULTS |
+					BYT_RT5640_MONO_SPEAKER |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Circuitco"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Minnowboard Max B3 PLATFORM"),
+		},
+		.driver_data = (void *)(BYT_RT5640_DMIC1_MAP),
+	},
+	{
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Venue 8 Pro 5830"),
+		},
+		.driver_data = (void *)(BYT_RT5640_DMIC1_MAP |
+					BYT_RT5640_JD_SRC_JD2_IN4N |
+					BYT_RT5640_OVCD_TH_2000UA |
+					BYT_RT5640_OVCD_SF_0P75 |
+					BYT_RT5640_MONO_SPEAKER |
+					BYT_RT5640_MCLK_EN),
+	},
+	{
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HP ElitePad 1000 G2"),
+		},
+		.driver_data = (void *)(BYT_RT5640_IN1_MAP |
+					BYT_RT5640_MCLK_EN),
+	},
+	{	/* HP Pavilion x2 10-n000nd */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"),
+		},
+		.driver_data = (void *)(BYT_RT5640_DMIC1_MAP |
+					BYT_RT5640_JD_SRC_JD2_IN4N |
+					BYT_RT5640_OVCD_TH_1500UA |
+					BYT_RT5640_OVCD_SF_0P75 |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{	/* HP Stream 7 */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "HP Stream 7 Tablet"),
+		},
+		.driver_data = (void *)(BYTCR_INPUT_DEFAULTS |
+					BYT_RT5640_MONO_SPEAKER |
+					BYT_RT5640_JD_NOT_INV |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{	/* I.T.Works TW891 */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "TW891"),
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "To be filled by O.E.M."),
+			DMI_EXACT_MATCH(DMI_BOARD_NAME, "TW891"),
+		},
+		.driver_data = (void *)(BYTCR_INPUT_DEFAULTS |
+					BYT_RT5640_MONO_SPEAKER |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{	/* Lamina I8270 / T701BR.SE */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Lamina"),
+			DMI_EXACT_MATCH(DMI_BOARD_NAME, "T701BR.SE"),
+		},
+		.driver_data = (void *)(BYTCR_INPUT_DEFAULTS |
+					BYT_RT5640_MONO_SPEAKER |
+					BYT_RT5640_JD_NOT_INV |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{	/* MSI S100 tablet */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Micro-Star International Co., Ltd."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "S100"),
+		},
+		.driver_data = (void *)(BYT_RT5640_IN1_MAP |
+					BYT_RT5640_JD_SRC_JD2_IN4N |
+					BYT_RT5640_OVCD_TH_2000UA |
+					BYT_RT5640_OVCD_SF_0P75 |
+					BYT_RT5640_MONO_SPEAKER |
+					BYT_RT5640_DIFF_MIC |
+					BYT_RT5640_MCLK_EN),
+	},
+	{	/* Pipo W4 */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
+			DMI_EXACT_MATCH(DMI_BOARD_NAME, "Aptio CRB"),
+			/* The above are too generic, also match BIOS info */
+			DMI_MATCH(DMI_BIOS_VERSION, "V8L_WIN32_CHIPHD"),
+		},
+		.driver_data = (void *)(BYTCR_INPUT_DEFAULTS |
+					BYT_RT5640_MONO_SPEAKER |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{	/* Point of View Mobii TAB-P800W (V2.0) */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
+			DMI_EXACT_MATCH(DMI_BOARD_NAME, "Aptio CRB"),
+			/* The above are too generic, also match BIOS info */
+			DMI_EXACT_MATCH(DMI_BIOS_VERSION, "3BAIR1014"),
+			DMI_EXACT_MATCH(DMI_BIOS_DATE, "10/24/2014"),
+		},
+		.driver_data = (void *)(BYT_RT5640_IN1_MAP |
+					BYT_RT5640_JD_SRC_JD2_IN4N |
+					BYT_RT5640_OVCD_TH_2000UA |
+					BYT_RT5640_OVCD_SF_0P75 |
+					BYT_RT5640_MONO_SPEAKER |
+					BYT_RT5640_DIFF_MIC |
+					BYT_RT5640_SSP0_AIF2 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{	/* Point of View Mobii TAB-P800W (V2.1) */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
+			DMI_EXACT_MATCH(DMI_BOARD_NAME, "Aptio CRB"),
+			/* The above are too generic, also match BIOS info */
+			DMI_EXACT_MATCH(DMI_BIOS_VERSION, "3BAIR1013"),
+			DMI_EXACT_MATCH(DMI_BIOS_DATE, "08/22/2014"),
+		},
+		.driver_data = (void *)(BYT_RT5640_IN1_MAP |
+					BYT_RT5640_JD_SRC_JD2_IN4N |
+					BYT_RT5640_OVCD_TH_2000UA |
+					BYT_RT5640_OVCD_SF_0P75 |
+					BYT_RT5640_MONO_SPEAKER |
+					BYT_RT5640_DIFF_MIC |
+					BYT_RT5640_SSP0_AIF2 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"),
+			DMI_MATCH(DMI_BOARD_NAME, "tPAD"),
+		},
+		.driver_data = (void *)(BYT_RT5640_IN3_MAP |
+					BYT_RT5640_MCLK_EN |
+					BYT_RT5640_SSP0_AIF1),
+	},
+	{	/* Toshiba Satellite Click Mini L9W-B */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SATELLITE Click Mini L9W-B"),
+		},
+		.driver_data = (void *)(BYT_RT5640_DMIC1_MAP |
+					BYT_RT5640_JD_SRC_JD2_IN4N |
+					BYT_RT5640_OVCD_TH_1500UA |
+					BYT_RT5640_OVCD_SF_0P75 |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
+	},
+	{	/* Catch-all for generic Insyde tablets, must be last */
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
 		},
-		.driver_data = (void *)(BYT_RT5640_IN3_MAP |
+		.driver_data = (void *)(BYTCR_INPUT_DEFAULTS |
 					BYT_RT5640_MCLK_EN |
 					BYT_RT5640_SSP0_AIF1),
 
@@ -440,6 +628,64 @@
 	{}
 };
 
+/*
+ * Note this MUST be called before snd_soc_register_card(), so that the props
+ * are in place before the codec component driver's probe function parses them.
+ */
+static int byt_rt5640_add_codec_device_props(const char *i2c_dev_name)
+{
+	struct property_entry props[MAX_NO_PROPS] = {};
+	struct device *i2c_dev;
+	int ret, cnt = 0;
+
+	i2c_dev = bus_find_device_by_name(&i2c_bus_type, NULL, i2c_dev_name);
+	if (!i2c_dev)
+		return -EPROBE_DEFER;
+
+	switch (BYT_RT5640_MAP(byt_rt5640_quirk)) {
+	case BYT_RT5640_DMIC1_MAP:
+		props[cnt++] = PROPERTY_ENTRY_U32("realtek,dmic1-data-pin",
+						  RT5640_DMIC1_DATA_PIN_IN1P);
+		break;
+	case BYT_RT5640_DMIC2_MAP:
+		props[cnt++] = PROPERTY_ENTRY_U32("realtek,dmic2-data-pin",
+						  RT5640_DMIC2_DATA_PIN_IN1N);
+		break;
+	case BYT_RT5640_IN1_MAP:
+		if (byt_rt5640_quirk & BYT_RT5640_DIFF_MIC)
+			props[cnt++] =
+				PROPERTY_ENTRY_BOOL("realtek,in1-differential");
+		break;
+	case BYT_RT5640_IN3_MAP:
+		if (byt_rt5640_quirk & BYT_RT5640_DIFF_MIC)
+			props[cnt++] =
+				PROPERTY_ENTRY_BOOL("realtek,in3-differential");
+		break;
+	}
+
+	if (BYT_RT5640_JDSRC(byt_rt5640_quirk)) {
+		props[cnt++] = PROPERTY_ENTRY_U32(
+				    "realtek,jack-detect-source",
+				    BYT_RT5640_JDSRC(byt_rt5640_quirk));
+
+		props[cnt++] = PROPERTY_ENTRY_U32(
+				    "realtek,over-current-threshold-microamp",
+				    BYT_RT5640_OVCD_TH(byt_rt5640_quirk) * 100);
+
+		props[cnt++] = PROPERTY_ENTRY_U32(
+				    "realtek,over-current-scale-factor",
+				    BYT_RT5640_OVCD_SF(byt_rt5640_quirk));
+	}
+
+	if (byt_rt5640_quirk & BYT_RT5640_JD_NOT_INV)
+		props[cnt++] = PROPERTY_ENTRY_BOOL("realtek,jack-detect-not-inverted");
+
+	ret = device_add_properties(i2c_dev, props);
+	put_device(i2c_dev);
+
+	return ret;
+}
+
 static int byt_rt5640_init(struct snd_soc_pcm_runtime *runtime)
 {
 	struct snd_soc_card *card = runtime->card;
@@ -451,6 +697,11 @@
 
 	card->dapm.idle_bias_off = true;
 
+	/* Start with RC clk for jack-detect (we disable MCLK below) */
+	if (byt_rt5640_quirk & BYT_RT5640_MCLK_EN)
+		snd_soc_component_update_bits(component, RT5640_GLB_CLK,
+			RT5640_SCLK_SRC_MASK, RT5640_SCLK_SRC_RCCLK);
+
 	rt5640_sel_asrc_clk_src(component,
 				RT5640_DA_STEREO_FILTER |
 				RT5640_DA_MONO_L_FILTER	|
@@ -521,17 +772,6 @@
 	if (ret)
 		return ret;
 
-	if (byt_rt5640_quirk & BYT_RT5640_DIFF_MIC) {
-		snd_soc_component_update_bits(component,  RT5640_IN1_IN2, RT5640_IN_DF1,
-				    RT5640_IN_DF1);
-	}
-
-	if (byt_rt5640_quirk & BYT_RT5640_DMIC_EN) {
-		ret = rt5640_dmic_enable(component, 0, 0);
-		if (ret)
-			return ret;
-	}
-
 	snd_soc_dapm_ignore_suspend(&card->dapm, "Headphone");
 	snd_soc_dapm_ignore_suspend(&card->dapm, "Speaker");
 
@@ -555,11 +795,27 @@
 		else
 			ret = clk_set_rate(priv->mclk, 19200000);
 
-		if (ret)
+		if (ret) {
 			dev_err(card->dev, "unable to set MCLK rate\n");
+			return ret;
+		}
 	}
 
-	return ret;
+	if (BYT_RT5640_JDSRC(byt_rt5640_quirk)) {
+		ret = snd_soc_card_jack_new(card, "Headset",
+					    SND_JACK_HEADSET | SND_JACK_BTN_0,
+					    &priv->jack, rt5640_pins,
+					    ARRAY_SIZE(rt5640_pins));
+		if (ret) {
+			dev_err(card->dev, "Jack creation failed %d\n", ret);
+			return ret;
+		}
+		snd_jack_set_key(priv->jack.jack, SND_JACK_BTN_0,
+				 KEY_PLAYPAUSE);
+		snd_soc_component_set_jack(component, &priv->jack, NULL);
+	}
+
+	return 0;
 }
 
 static const struct snd_soc_pcm_stream byt_rt5640_dai_params = {
@@ -701,6 +957,48 @@
 };
 
 /* SoC card */
+static char byt_rt5640_codec_name[SND_ACPI_I2C_ID_LEN];
+static char byt_rt5640_codec_aif_name[12]; /*  = "rt5640-aif[1|2]" */
+static char byt_rt5640_cpu_dai_name[10]; /*  = "ssp[0|2]-port" */
+static char byt_rt5640_long_name[40]; /* = "bytcr-rt5640-*-spk-*-mic" */
+
+static int byt_rt5640_suspend(struct snd_soc_card *card)
+{
+	struct snd_soc_component *component;
+
+	if (!BYT_RT5640_JDSRC(byt_rt5640_quirk))
+		return 0;
+
+	list_for_each_entry(component, &card->component_dev_list, card_list) {
+		if (!strcmp(component->name, byt_rt5640_codec_name)) {
+			dev_dbg(component->dev, "disabling jack detect before suspend\n");
+			snd_soc_component_set_jack(component, NULL, NULL);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int byt_rt5640_resume(struct snd_soc_card *card)
+{
+	struct byt_rt5640_private *priv = snd_soc_card_get_drvdata(card);
+	struct snd_soc_component *component;
+
+	if (!BYT_RT5640_JDSRC(byt_rt5640_quirk))
+		return 0;
+
+	list_for_each_entry(component, &card->component_dev_list, card_list) {
+		if (!strcmp(component->name, byt_rt5640_codec_name)) {
+			dev_dbg(component->dev, "re-enabling jack detect after resume\n");
+			snd_soc_component_set_jack(component, &priv->jack, NULL);
+			break;
+		}
+	}
+
+	return 0;
+}
+
 static struct snd_soc_card byt_rt5640_card = {
 	.name = "bytcr-rt5640",
 	.owner = THIS_MODULE,
@@ -711,12 +1009,10 @@
 	.dapm_routes = byt_rt5640_audio_map,
 	.num_dapm_routes = ARRAY_SIZE(byt_rt5640_audio_map),
 	.fully_routed = true,
+	.suspend_pre = byt_rt5640_suspend,
+	.resume_post = byt_rt5640_resume,
 };
 
-static char byt_rt5640_codec_name[SND_ACPI_I2C_ID_LEN];
-static char byt_rt5640_codec_aif_name[12]; /*  = "rt5640-aif[1|2]" */
-static char byt_rt5640_cpu_dai_name[10]; /*  = "ssp[0|2]-port" */
-
 static bool is_valleyview(void)
 {
 	static const struct x86_cpu_id cpu_ids[] = {
@@ -736,6 +1032,8 @@
 
 static int snd_byt_rt5640_mc_probe(struct platform_device *pdev)
 {
+	const char * const map_name[] = { "dmic1", "dmic2", "in1", "in3" };
+	const struct dmi_system_id *dmi_id;
 	struct byt_rt5640_private *priv;
 	struct snd_soc_acpi_mach *mach;
 	const char *i2c_name = NULL;
@@ -744,7 +1042,7 @@
 	int i;
 
 	is_bytcr = false;
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_ATOMIC);
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
@@ -829,20 +1127,29 @@
 		}
 
 		/* change defaults for Baytrail-CR capture */
-		byt_rt5640_quirk |= BYT_RT5640_IN1_MAP;
-		byt_rt5640_quirk |= BYT_RT5640_DIFF_MIC;
+		byt_rt5640_quirk |= BYTCR_INPUT_DEFAULTS;
 	} else {
-		byt_rt5640_quirk |= (BYT_RT5640_DMIC1_MAP |
-				BYT_RT5640_DMIC_EN);
+		byt_rt5640_quirk |= BYT_RT5640_DMIC1_MAP |
+				    BYT_RT5640_JD_SRC_JD2_IN4N |
+				    BYT_RT5640_OVCD_TH_2000UA |
+				    BYT_RT5640_OVCD_SF_0P75;
 	}
 
 	/* check quirks before creating card */
-	dmi_check_system(byt_rt5640_quirk_table);
+	dmi_id = dmi_first_match(byt_rt5640_quirk_table);
+	if (dmi_id)
+		byt_rt5640_quirk = (unsigned long)dmi_id->driver_data;
 	if (quirk_override) {
 		dev_info(&pdev->dev, "Overriding quirk 0x%x => 0x%x\n",
 			 (unsigned int)byt_rt5640_quirk, quirk_override);
 		byt_rt5640_quirk = quirk_override;
 	}
+
+	/* Must be called before register_card, also see declaration comment. */
+	ret_val = byt_rt5640_add_codec_device_props(byt_rt5640_codec_name);
+	if (ret_val)
+		return ret_val;
+
 	log_quirks(&pdev->dev);
 
 	if ((byt_rt5640_quirk & BYT_RT5640_SSP2_AIF2) ||
@@ -889,6 +1196,13 @@
 		}
 	}
 
+	snprintf(byt_rt5640_long_name, sizeof(byt_rt5640_long_name),
+		 "bytcr-rt5640-%s-spk-%s-mic",
+		 (byt_rt5640_quirk & BYT_RT5640_MONO_SPEAKER) ?
+			"mono" : "stereo",
+		 map_name[BYT_RT5640_MAP(byt_rt5640_quirk)]);
+	byt_rt5640_card.long_name = byt_rt5640_long_name;
+
 	ret_val = devm_snd_soc_register_card(&pdev->dev, &byt_rt5640_card);
 
 	if (ret_val) {
diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c
index 1b1997f..987720e 100644
--- a/sound/soc/intel/boards/bytcr_rt5651.c
+++ b/sound/soc/intel/boards/bytcr_rt5651.c
@@ -706,6 +706,7 @@
 static char byt_rt5651_codec_name[SND_ACPI_I2C_ID_LEN];
 static char byt_rt5651_codec_aif_name[12]; /*  = "rt5651-aif[1|2]" */
 static char byt_rt5651_cpu_dai_name[10]; /*  = "ssp[0|2]-port" */
+static char byt_rt5651_long_name[40]; /* = "bytcr-rt5651-*-spk-*-mic" */
 
 static bool is_valleyview(void)
 {
@@ -726,6 +727,10 @@
 
 static int snd_byt_rt5651_mc_probe(struct platform_device *pdev)
 {
+	const char * const intmic_name[] =
+		{ "dmic", "in1", "in2", "in12", "in1", "in2" };
+	const char * const hsmic_name[] =
+		{  "in2", "in2", "in1",  "in3", "in3", "in3" };
 	struct byt_rt5651_private *priv;
 	struct snd_soc_acpi_mach *mach;
 	const char *i2c_name = NULL;
@@ -734,7 +739,7 @@
 	int dai_index = 0;
 	int i;
 
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_ATOMIC);
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
@@ -856,9 +861,10 @@
 	if (byt_rt5651_quirk & BYT_RT5651_MCLK_EN) {
 		priv->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3");
 		if (IS_ERR(priv->mclk)) {
+			ret_val = PTR_ERR(priv->mclk);
 			dev_err(&pdev->dev,
-				"Failed to get MCLK from pmc_plt_clk_3: %ld\n",
-				PTR_ERR(priv->mclk));
+				"Failed to get MCLK from pmc_plt_clk_3: %d\n",
+				ret_val);
 			/*
 			 * Fall back to bit clock usage for -ENOENT (clock not
 			 * available likely due to missing dependencies), bail
@@ -870,6 +876,12 @@
 		}
 	}
 
+	snprintf(byt_rt5651_long_name, sizeof(byt_rt5651_long_name),
+		 "bytcr-rt5651-%s-intmic-%s-hsmic",
+		 intmic_name[BYT_RT5651_MAP(byt_rt5651_quirk)],
+		 hsmic_name[BYT_RT5651_MAP(byt_rt5651_quirk)]);
+	byt_rt5651_card.long_name = byt_rt5651_long_name;
+
 	ret_val = devm_snd_soc_register_card(&pdev->dev, &byt_rt5651_card);
 
 	if (ret_val) {
diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
index d3e1c7e..db6976f 100644
--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
@@ -391,7 +391,7 @@
 	int ret_val = 0;
 	struct cht_mc_private *drv;
 
-	drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_ATOMIC);
+	drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL);
 	if (!drv)
 		return -ENOMEM;
 
diff --git a/sound/soc/intel/boards/cht_bsw_nau8824.c b/sound/soc/intel/boards/cht_bsw_nau8824.c
index 680f2b3..30c4697 100644
--- a/sound/soc/intel/boards/cht_bsw_nau8824.c
+++ b/sound/soc/intel/boards/cht_bsw_nau8824.c
@@ -120,7 +120,7 @@
 	 * KEY_VOLUMEUP
 	 * KEY_VOLUMEDOWN
 	 */
-	jack_type = SND_JACK_HEADPHONE | SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+	jack_type = SND_JACK_HEADSET | SND_JACK_BTN_0 | SND_JACK_BTN_1 |
 		SND_JACK_BTN_2 | SND_JACK_BTN_3;
 	ret = snd_soc_card_jack_new(runtime->card, "Headset", jack_type, jack,
 		cht_bsw_jack_pins, ARRAY_SIZE(cht_bsw_jack_pins));
@@ -248,7 +248,7 @@
 	struct cht_mc_private *drv;
 	int ret_val;
 
-	drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_ATOMIC);
+	drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL);
 	if (!drv)
 		return -ENOMEM;
 	snd_soc_card_set_drvdata(&snd_soc_card_cht, drv);
diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c
index 49ba1a9..f5a5ea6 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5645.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5645.c
@@ -539,7 +539,7 @@
 	int ret_val = 0;
 	int i;
 
-	drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_ATOMIC);
+	drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL);
 	if (!drv)
 		return -ENOMEM;
 
diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c
index e68ec32..e5aa130 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5672.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5672.c
@@ -189,13 +189,6 @@
 	if (devm_acpi_dev_add_driver_gpios(component->dev, cht_rt5672_gpios))
 		dev_warn(runtime->dev, "Unable to add GPIO mapping table\n");
 
-	/* TDM 4 slots 24 bit, set Rx & Tx bitmask to 4 active slots */
-	ret = snd_soc_dai_set_tdm_slot(codec_dai, 0xF, 0xF, 4, 24);
-	if (ret < 0) {
-		dev_err(runtime->dev, "can't set codec TDM slot %d\n", ret);
-		return ret;
-	}
-
 	/* Select codec ASRC clock source to track I2S1 clock, because codec
 	 * is in slave mode and 100fs I2S format (BCLK = 100 * LRCLK) cannot
 	 * be supported by RT5672. Otherwise, ASRC will be disabled and cause
@@ -252,6 +245,7 @@
 			SNDRV_PCM_HW_PARAM_RATE);
 	struct snd_interval *channels = hw_param_interval(params,
 						SNDRV_PCM_HW_PARAM_CHANNELS);
+	int ret;
 
 	/* The DSP will covert the FE rate to 48k, stereo, 24bits */
 	rate->min = rate->max = 48000;
@@ -259,6 +253,26 @@
 
 	/* set SSP2 to 24-bit */
 	params_set_format(params, SNDRV_PCM_FORMAT_S24_LE);
+
+	/*
+	 * Default mode for SSP configuration is TDM 4 slot
+	 */
+	ret = snd_soc_dai_set_fmt(rtd->codec_dai,
+				  SND_SOC_DAIFMT_DSP_B |
+				  SND_SOC_DAIFMT_IB_NF |
+				  SND_SOC_DAIFMT_CBS_CFS);
+	if (ret < 0) {
+		dev_err(rtd->dev, "can't set format to TDM %d\n", ret);
+		return ret;
+	}
+
+	/* TDM 4 slots 24 bit, set Rx & Tx bitmask to 4 active slots */
+	ret = snd_soc_dai_set_tdm_slot(rtd->codec_dai, 0xF, 0xF, 4, 24);
+	if (ret < 0) {
+		dev_err(rtd->dev, "can't set codec TDM slot %d\n", ret);
+		return ret;
+	}
+
 	return 0;
 }
 
@@ -315,8 +329,6 @@
 		.nonatomic = true,
 		.codec_dai_name = "rt5670-aif1",
 		.codec_name = "i2c-10EC5670:00",
-		.dai_fmt = SND_SOC_DAIFMT_DSP_B | SND_SOC_DAIFMT_IB_NF
-					| SND_SOC_DAIFMT_CBS_CFS,
 		.init = cht_codec_init,
 		.be_hw_params_fixup = cht_codec_fixup,
 		.dpcm_playback = 1,
diff --git a/sound/soc/intel/boards/kbl_da7219_max98357a.c b/sound/soc/intel/boards/kbl_da7219_max98357a.c
index e84baaf..94294c2 100644
--- a/sound/soc/intel/boards/kbl_da7219_max98357a.c
+++ b/sound/soc/intel/boards/kbl_da7219_max98357a.c
@@ -65,14 +65,6 @@
 		return -EIO;
 	}
 
-	/* Configure sysclk for codec */
-	ret = snd_soc_dai_set_sysclk(codec_dai, DA7219_CLKSRC_MCLK, 24576000,
-				     SND_SOC_CLOCK_IN);
-	if (ret) {
-		dev_err(card->dev, "can't set codec sysclk configuration\n");
-		return ret;
-	}
-
 	if (SND_SOC_DAPM_EVENT_OFF(event)) {
 		ret = snd_soc_dai_set_pll(codec_dai, 0,
 				     DA7219_SYSCLK_MCLK, 0, 0);
@@ -169,9 +161,18 @@
 {
 	struct kbl_codec_private *ctx = snd_soc_card_get_drvdata(rtd->card);
 	struct snd_soc_component *component = rtd->codec_dai->component;
+	struct snd_soc_dai *codec_dai = rtd->codec_dai;
 	struct snd_soc_jack *jack;
 	int ret;
 
+	/* Configure sysclk for codec */
+	ret = snd_soc_dai_set_sysclk(codec_dai, DA7219_CLKSRC_MCLK, 24576000,
+						SND_SOC_CLOCK_IN);
+	if (ret) {
+		dev_err(rtd->dev, "can't set codec sysclk configuration\n");
+		return ret;
+	}
+
 	/*
 	 * Headset buttons map to the google Reference headset.
 	 * These can be configured by userspace.
@@ -572,7 +573,7 @@
 {
 	struct kbl_codec_private *ctx;
 
-	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_ATOMIC);
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
diff --git a/sound/soc/intel/boards/kbl_rt5663_max98927.c b/sound/soc/intel/boards/kbl_rt5663_max98927.c
index 0e6b7e7..3a61252 100644
--- a/sound/soc/intel/boards/kbl_rt5663_max98927.c
+++ b/sound/soc/intel/boards/kbl_rt5663_max98927.c
@@ -299,7 +299,8 @@
 	snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEUP);
 	snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOLUMEDOWN);
 
-	rt5663_set_jack_detect(component, &ctx->kabylake_headset);
+	snd_soc_component_set_jack(component, &ctx->kabylake_headset, NULL);
+
 	return ret;
 }
 
@@ -972,7 +973,7 @@
 	struct skl_machine_pdata *pdata;
 	int ret;
 
-	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_ATOMIC);
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
index 69c3d84..92f5fb2 100644
--- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
+++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
@@ -200,7 +200,7 @@
 	snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEUP);
 	snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOLUMEDOWN);
 
-	rt5663_set_jack_detect(component, &ctx->kabylake_headset);
+	snd_soc_component_set_jack(component, &ctx->kabylake_headset, NULL);
 
 	ret = snd_soc_dapm_ignore_suspend(&rtd->card->dapm, "DMIC");
 	if (ret)
@@ -651,7 +651,7 @@
 	struct kbl_codec_private *ctx;
 	struct skl_machine_pdata *pdata;
 
-	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_ATOMIC);
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
diff --git a/sound/soc/intel/boards/skl_nau88l25_max98357a.c b/sound/soc/intel/boards/skl_nau88l25_max98357a.c
index 9a7a064..3ff6646 100644
--- a/sound/soc/intel/boards/skl_nau88l25_max98357a.c
+++ b/sound/soc/intel/boards/skl_nau88l25_max98357a.c
@@ -643,7 +643,7 @@
 	struct skl_nau8825_private *ctx;
 	struct skl_machine_pdata *pdata;
 
-	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_ATOMIC);
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
diff --git a/sound/soc/intel/boards/skl_nau88l25_ssm4567.c b/sound/soc/intel/boards/skl_nau88l25_ssm4567.c
index 212ac89..b0610bba 100644
--- a/sound/soc/intel/boards/skl_nau88l25_ssm4567.c
+++ b/sound/soc/intel/boards/skl_nau88l25_ssm4567.c
@@ -696,7 +696,7 @@
 	struct skl_nau88125_private *ctx;
 	struct skl_machine_pdata *pdata;
 
-	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_ATOMIC);
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
diff --git a/sound/soc/intel/boards/skl_rt286.c b/sound/soc/intel/boards/skl_rt286.c
index 737d561..38a1495 100644
--- a/sound/soc/intel/boards/skl_rt286.c
+++ b/sound/soc/intel/boards/skl_rt286.c
@@ -527,7 +527,7 @@
 {
 	struct skl_rt286_private *ctx;
 
-	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_ATOMIC);
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
diff --git a/sound/soc/intel/skylake/skl-debug.c b/sound/soc/intel/skylake/skl-debug.c
index a016455..5d7ac2e 100644
--- a/sound/soc/intel/skylake/skl-debug.c
+++ b/sound/soc/intel/skylake/skl-debug.c
@@ -15,10 +15,10 @@
 
 #include <linux/pci.h>
 #include <linux/debugfs.h>
+#include <uapi/sound/skl-tplg-interface.h>
 #include "skl.h"
 #include "skl-sst-dsp.h"
 #include "skl-sst-ipc.h"
-#include "skl-tplg-interface.h"
 #include "skl-topology.h"
 #include "../common/sst-dsp.h"
 #include "../common/sst-dsp-priv.h"
@@ -142,8 +142,8 @@
 			mconfig->max_out_queue, ret, false);
 
 	ret += snprintf(buf + ret, MOD_BUF - ret,
-			"Other:\n\tDomain %d\n\tHomogenous Input %s\n\t"
-			"Homogenous Output %s\n\tIn Queue Mask %d\n\t"
+			"Other:\n\tDomain %d\n\tHomogeneous Input %s\n\t"
+			"Homogeneous Output %s\n\tIn Queue Mask %d\n\t"
 			"Out Queue Mask %d\n\tDMA ID %d\n\tMem Pages %d\n\t"
 			"Module Type %d\n\tModule State %d\n",
 			mconfig->domain,
diff --git a/sound/soc/intel/skylake/skl-messages.c b/sound/soc/intel/skylake/skl-messages.c
index 57d4a58..d5f9c30 100644
--- a/sound/soc/intel/skylake/skl-messages.c
+++ b/sound/soc/intel/skylake/skl-messages.c
@@ -21,6 +21,7 @@
 #include <linux/pci.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
+#include <uapi/sound/skl-tplg-interface.h>
 #include "skl-sst-dsp.h"
 #include "cnl-sst-dsp.h"
 #include "skl-sst-ipc.h"
@@ -28,7 +29,6 @@
 #include "../common/sst-dsp.h"
 #include "../common/sst-dsp-priv.h"
 #include "skl-topology.h"
-#include "skl-tplg-interface.h"
 
 static int skl_alloc_dma_buf(struct device *dev,
 		struct snd_dma_buffer *dmab, size_t size)
@@ -225,7 +225,7 @@
 		.id = 0x9d71,
 		.num_cores = 2,
 		.loader_ops = skl_get_loader_ops,
-		.init = kbl_sst_dsp_init,
+		.init = skl_sst_dsp_init,
 		.init_fw = skl_sst_init_fw,
 		.cleanup = skl_sst_dsp_cleanup
 	},
diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index 15cb8ac..afa86b9 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -268,15 +268,31 @@
 {
 	struct skl *skl = get_skl_ctx(dai->dev);
 	struct skl_module_cfg *mconfig;
+	int ret;
 
 	dev_dbg(dai->dev, "%s: %s\n", __func__, dai->name);
 
 	mconfig = skl_tplg_fe_get_cpr_module(dai, substream->stream);
 
-	/* In case of XRUN recovery, reset the FW pipe to clean state */
-	if (mconfig && (substream->runtime->status->state ==
-					SNDRV_PCM_STATE_XRUN))
-		skl_reset_pipe(skl->skl_sst, mconfig->pipe);
+	/*
+	 * In case of XRUN recovery or in the case when the application
+	 * calls prepare another time, reset the FW pipe to clean state
+	 */
+	if (mconfig &&
+		(substream->runtime->status->state == SNDRV_PCM_STATE_XRUN ||
+		 mconfig->pipe->state == SKL_PIPE_CREATED ||
+		 mconfig->pipe->state == SKL_PIPE_PAUSED)) {
+
+		ret = skl_reset_pipe(skl->skl_sst, mconfig->pipe);
+
+		if (ret < 0)
+			return ret;
+
+		ret = skl_pcm_host_dma_prepare(dai->dev,
+					mconfig->pipe->p_params);
+		if (ret < 0)
+			return ret;
+	}
 
 	return 0;
 }
@@ -366,9 +382,21 @@
 {
 	struct hdac_ext_bus *ebus = dev_get_drvdata(dai->dev);
 	struct hdac_ext_stream *stream = get_hdac_ext_stream(substream);
+	struct skl *skl = get_skl_ctx(dai->dev);
+	struct skl_module_cfg *mconfig;
+	int ret;
 
 	dev_dbg(dai->dev, "%s: %s\n", __func__, dai->name);
 
+	mconfig = skl_tplg_fe_get_cpr_module(dai, substream->stream);
+
+	if (mconfig) {
+		ret = skl_reset_pipe(skl->skl_sst, mconfig->pipe);
+		if (ret < 0)
+			dev_err(dai->dev, "%s:Reset failed ret =%d",
+						__func__, ret);
+	}
+
 	snd_hdac_stream_cleanup(hdac_stream(stream));
 	hdac_stream(stream)->prepared = 0;
 
diff --git a/sound/soc/intel/skylake/skl-sst-dsp.h b/sound/soc/intel/skylake/skl-sst-dsp.h
index 12fc9a7..e1d6f67 100644
--- a/sound/soc/intel/skylake/skl-sst-dsp.h
+++ b/sound/soc/intel/skylake/skl-sst-dsp.h
@@ -231,9 +231,6 @@
 int skl_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq,
 		const char *fw_name, struct skl_dsp_loader_ops dsp_ops,
 		struct skl_sst **dsp);
-int kbl_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq,
-		const char *fw_name, struct skl_dsp_loader_ops dsp_ops,
-		struct skl_sst **dsp);
 int bxt_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq,
 		const char *fw_name, struct skl_dsp_loader_ops dsp_ops,
 		struct skl_sst **dsp);
diff --git a/sound/soc/intel/skylake/skl-sst.c b/sound/soc/intel/skylake/skl-sst.c
index 5a7e41b6..5951bbd 100644
--- a/sound/soc/intel/skylake/skl-sst.c
+++ b/sound/soc/intel/skylake/skl-sst.c
@@ -390,7 +390,7 @@
 }
 
 static int
-kbl_load_library(struct sst_dsp *ctx, struct skl_lib_info *linfo, int lib_count)
+skl_load_library(struct sst_dsp *ctx, struct skl_lib_info *linfo, int lib_count)
 {
 	struct skl_sst *skl = ctx->thread_context;
 	struct firmware stripped_fw;
@@ -508,16 +508,7 @@
 	.set_state_D3 = skl_set_dsp_D3,
 	.load_fw = skl_load_base_firmware,
 	.get_fw_errcode = skl_get_errorcode,
-	.load_mod = skl_load_module,
-	.unload_mod = skl_unload_module,
-};
-
-static const struct skl_dsp_fw_ops kbl_fw_ops = {
-	.set_state_D0 = skl_set_dsp_D0,
-	.set_state_D3 = skl_set_dsp_D3,
-	.load_fw = skl_load_base_firmware,
-	.get_fw_errcode = skl_get_errorcode,
-	.load_library = kbl_load_library,
+	.load_library = skl_load_library,
 	.load_mod = skl_load_module,
 	.unload_mod = skl_unload_module,
 };
@@ -573,27 +564,6 @@
 }
 EXPORT_SYMBOL_GPL(skl_sst_dsp_init);
 
-int kbl_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq,
-		const char *fw_name, struct skl_dsp_loader_ops dsp_ops,
-		struct skl_sst **dsp)
-{
-	struct sst_dsp *sst;
-	int ret;
-
-	ret = skl_sst_dsp_init(dev, mmio_base, irq, fw_name, dsp_ops, dsp);
-	if (ret < 0) {
-		dev_err(dev, "%s: Init failed %d\n", __func__, ret);
-		return ret;
-	}
-
-	sst = (*dsp)->dsp;
-	sst->fw_ops = kbl_fw_ops;
-
-	return 0;
-
-}
-EXPORT_SYMBOL_GPL(kbl_sst_dsp_init);
-
 int skl_sst_init_fw(struct device *dev, struct skl_sst *ctx)
 {
 	int ret;
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index 3b1dca4..2c51297 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -19,14 +19,15 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/firmware.h>
+#include <linux/uuid.h>
 #include <sound/soc.h>
 #include <sound/soc-topology.h>
 #include <uapi/sound/snd_sst_tokens.h>
+#include <uapi/sound/skl-tplg-interface.h>
 #include "skl-sst-dsp.h"
 #include "skl-sst-ipc.h"
 #include "skl-topology.h"
 #include "skl.h"
-#include "skl-tplg-interface.h"
 #include "../common/sst-dsp.h"
 #include "../common/sst-dsp-priv.h"
 
@@ -2724,6 +2725,167 @@
 	return -EINVAL;
 }
 
+/* Functions to parse private data from configuration file format v4 */
+
+/*
+ * Add pipeline from topology binary into driver pipeline list
+ *
+ * If already added we return that instance
+ * Otherwise we create a new instance and add into driver list
+ */
+static int skl_tplg_add_pipe_v4(struct device *dev,
+				struct skl_module_cfg *mconfig, struct skl *skl,
+				struct skl_dfw_v4_pipe *dfw_pipe)
+{
+	struct skl_pipeline *ppl;
+	struct skl_pipe *pipe;
+	struct skl_pipe_params *params;
+
+	list_for_each_entry(ppl, &skl->ppl_list, node) {
+		if (ppl->pipe->ppl_id == dfw_pipe->pipe_id) {
+			mconfig->pipe = ppl->pipe;
+			return 0;
+		}
+	}
+
+	ppl = devm_kzalloc(dev, sizeof(*ppl), GFP_KERNEL);
+	if (!ppl)
+		return -ENOMEM;
+
+	pipe = devm_kzalloc(dev, sizeof(*pipe), GFP_KERNEL);
+	if (!pipe)
+		return -ENOMEM;
+
+	params = devm_kzalloc(dev, sizeof(*params), GFP_KERNEL);
+	if (!params)
+		return -ENOMEM;
+
+	pipe->ppl_id = dfw_pipe->pipe_id;
+	pipe->memory_pages = dfw_pipe->memory_pages;
+	pipe->pipe_priority = dfw_pipe->pipe_priority;
+	pipe->conn_type = dfw_pipe->conn_type;
+	pipe->state = SKL_PIPE_INVALID;
+	pipe->p_params = params;
+	INIT_LIST_HEAD(&pipe->w_list);
+
+	ppl->pipe = pipe;
+	list_add(&ppl->node, &skl->ppl_list);
+
+	mconfig->pipe = pipe;
+
+	return 0;
+}
+
+static void skl_fill_module_pin_info_v4(struct skl_dfw_v4_module_pin *dfw_pin,
+					struct skl_module_pin *m_pin,
+					bool is_dynamic, int max_pin)
+{
+	int i;
+
+	for (i = 0; i < max_pin; i++) {
+		m_pin[i].id.module_id = dfw_pin[i].module_id;
+		m_pin[i].id.instance_id = dfw_pin[i].instance_id;
+		m_pin[i].in_use = false;
+		m_pin[i].is_dynamic = is_dynamic;
+		m_pin[i].pin_state = SKL_PIN_UNBIND;
+	}
+}
+
+static void skl_tplg_fill_fmt_v4(struct skl_module_pin_fmt *dst_fmt,
+				 struct skl_dfw_v4_module_fmt *src_fmt,
+				 int pins)
+{
+	int i;
+
+	for (i = 0; i < pins; i++) {
+		dst_fmt[i].fmt.channels  = src_fmt[i].channels;
+		dst_fmt[i].fmt.s_freq = src_fmt[i].freq;
+		dst_fmt[i].fmt.bit_depth = src_fmt[i].bit_depth;
+		dst_fmt[i].fmt.valid_bit_depth = src_fmt[i].valid_bit_depth;
+		dst_fmt[i].fmt.ch_cfg = src_fmt[i].ch_cfg;
+		dst_fmt[i].fmt.ch_map = src_fmt[i].ch_map;
+		dst_fmt[i].fmt.interleaving_style =
+						src_fmt[i].interleaving_style;
+		dst_fmt[i].fmt.sample_type = src_fmt[i].sample_type;
+	}
+}
+
+static int skl_tplg_get_pvt_data_v4(struct snd_soc_tplg_dapm_widget *tplg_w,
+				    struct skl *skl, struct device *dev,
+				    struct skl_module_cfg *mconfig)
+{
+	struct skl_dfw_v4_module *dfw =
+				(struct skl_dfw_v4_module *)tplg_w->priv.data;
+	int ret;
+
+	dev_dbg(dev, "Parsing Skylake v4 widget topology data\n");
+
+	ret = guid_parse(dfw->uuid, (guid_t *)mconfig->guid);
+	if (ret)
+		return ret;
+	mconfig->id.module_id = -1;
+	mconfig->id.instance_id = dfw->instance_id;
+	mconfig->module->resources[0].cps = dfw->max_mcps;
+	mconfig->module->resources[0].ibs = dfw->ibs;
+	mconfig->module->resources[0].obs = dfw->obs;
+	mconfig->core_id = dfw->core_id;
+	mconfig->module->max_input_pins = dfw->max_in_queue;
+	mconfig->module->max_output_pins = dfw->max_out_queue;
+	mconfig->module->loadable = dfw->is_loadable;
+	skl_tplg_fill_fmt_v4(mconfig->module->formats[0].inputs, dfw->in_fmt,
+			     MAX_IN_QUEUE);
+	skl_tplg_fill_fmt_v4(mconfig->module->formats[0].outputs, dfw->out_fmt,
+			     MAX_OUT_QUEUE);
+
+	mconfig->params_fixup = dfw->params_fixup;
+	mconfig->converter = dfw->converter;
+	mconfig->m_type = dfw->module_type;
+	mconfig->vbus_id = dfw->vbus_id;
+	mconfig->module->resources[0].is_pages = dfw->mem_pages;
+
+	ret = skl_tplg_add_pipe_v4(dev, mconfig, skl, &dfw->pipe);
+	if (ret)
+		return ret;
+
+	mconfig->dev_type = dfw->dev_type;
+	mconfig->hw_conn_type = dfw->hw_conn_type;
+	mconfig->time_slot = dfw->time_slot;
+	mconfig->formats_config.caps_size = dfw->caps.caps_size;
+
+	mconfig->m_in_pin = devm_kzalloc(dev,
+				MAX_IN_QUEUE * sizeof(*mconfig->m_in_pin),
+				GFP_KERNEL);
+	if (!mconfig->m_in_pin)
+		return -ENOMEM;
+
+	mconfig->m_out_pin = devm_kzalloc(dev,
+				MAX_OUT_QUEUE * sizeof(*mconfig->m_out_pin),
+				GFP_KERNEL);
+	if (!mconfig->m_out_pin)
+		return -ENOMEM;
+
+	skl_fill_module_pin_info_v4(dfw->in_pin, mconfig->m_in_pin,
+				    dfw->is_dynamic_in_pin,
+				    mconfig->module->max_input_pins);
+	skl_fill_module_pin_info_v4(dfw->out_pin, mconfig->m_out_pin,
+				    dfw->is_dynamic_out_pin,
+				    mconfig->module->max_output_pins);
+
+	if (mconfig->formats_config.caps_size) {
+		mconfig->formats_config.set_params = dfw->caps.set_params;
+		mconfig->formats_config.param_id = dfw->caps.param_id;
+		mconfig->formats_config.caps =
+		devm_kzalloc(dev, mconfig->formats_config.caps_size,
+			     GFP_KERNEL);
+		if (!mconfig->formats_config.caps)
+			return -ENOMEM;
+		memcpy(mconfig->formats_config.caps, dfw->caps.caps,
+		       dfw->caps.caps_size);
+	}
+
+	return 0;
+}
+
 /*
  * Parse the private data for the token and corresponding value.
  * The private data can have multiple data blocks. So, a data block
@@ -2739,6 +2901,13 @@
 	char *data;
 	int ret;
 
+	/*
+	 * v4 configuration files have a valid UUID at the start of
+	 * the widget's private data.
+	 */
+	if (uuid_is_valid((char *)tplg_w->priv.data))
+		return skl_tplg_get_pvt_data_v4(tplg_w, skl, dev, mconfig);
+
 	/* Read the NUM_DATA_BLOCKS descriptor */
 	array = (struct snd_soc_tplg_vendor_array *)tplg_w->priv.data;
 	ret = skl_tplg_get_desc_blocks(dev, array);
diff --git a/sound/soc/intel/skylake/skl-topology.h b/sound/soc/intel/skylake/skl-topology.h
index b1e0667..6d7e056 100644
--- a/sound/soc/intel/skylake/skl-topology.h
+++ b/sound/soc/intel/skylake/skl-topology.h
@@ -25,8 +25,8 @@
 
 #include <sound/hdaudio_ext.h>
 #include <sound/soc.h>
+#include <uapi/sound/skl-tplg-interface.h>
 #include "skl.h"
-#include "skl-tplg-interface.h"
 
 #define BITS_PER_BYTE 8
 #define MAX_TS_GROUPS 8
diff --git a/sound/soc/intel/skylake/skl-tplg-interface.h b/sound/soc/intel/skylake/skl-tplg-interface.h
deleted file mode 100644
index f8d1749..0000000
--- a/sound/soc/intel/skylake/skl-tplg-interface.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * skl-tplg-interface.h - Intel DSP FW private data interface
- *
- * Copyright (C) 2015 Intel Corp
- * Author: Jeeja KP <jeeja.kp@intel.com>
- *	    Nilofer, Samreen <samreen.nilofer@intel.com>
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#ifndef __HDA_TPLG_INTERFACE_H__
-#define __HDA_TPLG_INTERFACE_H__
-
-/*
- * Default types range from 0~12. type can range from 0 to 0xff
- * SST types start at higher to avoid any overlapping in future
- */
-#define SKL_CONTROL_TYPE_BYTE_TLV	0x100
-#define SKL_CONTROL_TYPE_MIC_SELECT	0x102
-
-#define HDA_SST_CFG_MAX	900 /* size of copier cfg*/
-#define MAX_IN_QUEUE 8
-#define MAX_OUT_QUEUE 8
-
-#define SKL_UUID_STR_SZ 40
-/* Event types goes here */
-/* Reserve event type 0 for no event handlers */
-enum skl_event_types {
-	SKL_EVENT_NONE = 0,
-	SKL_MIXER_EVENT,
-	SKL_MUX_EVENT,
-	SKL_VMIXER_EVENT,
-	SKL_PGA_EVENT
-};
-
-/**
- * enum skl_ch_cfg - channel configuration
- *
- * @SKL_CH_CFG_MONO:	One channel only
- * @SKL_CH_CFG_STEREO:	L & R
- * @SKL_CH_CFG_2_1:	L, R & LFE
- * @SKL_CH_CFG_3_0:	L, C & R
- * @SKL_CH_CFG_3_1:	L, C, R & LFE
- * @SKL_CH_CFG_QUATRO:	L, R, Ls & Rs
- * @SKL_CH_CFG_4_0:	L, C, R & Cs
- * @SKL_CH_CFG_5_0:	L, C, R, Ls & Rs
- * @SKL_CH_CFG_5_1:	L, C, R, Ls, Rs & LFE
- * @SKL_CH_CFG_DUAL_MONO: One channel replicated in two
- * @SKL_CH_CFG_I2S_DUAL_STEREO_0: Stereo(L,R) in 4 slots, 1st stream:[ L, R, -, - ]
- * @SKL_CH_CFG_I2S_DUAL_STEREO_1: Stereo(L,R) in 4 slots, 2nd stream:[ -, -, L, R ]
- * @SKL_CH_CFG_INVALID:	Invalid
- */
-enum skl_ch_cfg {
-	SKL_CH_CFG_MONO = 0,
-	SKL_CH_CFG_STEREO = 1,
-	SKL_CH_CFG_2_1 = 2,
-	SKL_CH_CFG_3_0 = 3,
-	SKL_CH_CFG_3_1 = 4,
-	SKL_CH_CFG_QUATRO = 5,
-	SKL_CH_CFG_4_0 = 6,
-	SKL_CH_CFG_5_0 = 7,
-	SKL_CH_CFG_5_1 = 8,
-	SKL_CH_CFG_DUAL_MONO = 9,
-	SKL_CH_CFG_I2S_DUAL_STEREO_0 = 10,
-	SKL_CH_CFG_I2S_DUAL_STEREO_1 = 11,
-	SKL_CH_CFG_4_CHANNEL = 12,
-	SKL_CH_CFG_INVALID
-};
-
-enum skl_module_type {
-	SKL_MODULE_TYPE_MIXER = 0,
-	SKL_MODULE_TYPE_COPIER,
-	SKL_MODULE_TYPE_UPDWMIX,
-	SKL_MODULE_TYPE_SRCINT,
-	SKL_MODULE_TYPE_ALGO,
-	SKL_MODULE_TYPE_BASE_OUTFMT,
-	SKL_MODULE_TYPE_KPB,
-	SKL_MODULE_TYPE_MIC_SELECT,
-};
-
-enum skl_core_affinity {
-	SKL_AFFINITY_CORE_0 = 0,
-	SKL_AFFINITY_CORE_1,
-	SKL_AFFINITY_CORE_MAX
-};
-
-enum skl_pipe_conn_type {
-	SKL_PIPE_CONN_TYPE_NONE = 0,
-	SKL_PIPE_CONN_TYPE_FE,
-	SKL_PIPE_CONN_TYPE_BE
-};
-
-enum skl_hw_conn_type {
-	SKL_CONN_NONE = 0,
-	SKL_CONN_SOURCE = 1,
-	SKL_CONN_SINK = 2
-};
-
-enum skl_dev_type {
-	SKL_DEVICE_BT = 0x0,
-	SKL_DEVICE_DMIC = 0x1,
-	SKL_DEVICE_I2S = 0x2,
-	SKL_DEVICE_SLIMBUS = 0x3,
-	SKL_DEVICE_HDALINK = 0x4,
-	SKL_DEVICE_HDAHOST = 0x5,
-	SKL_DEVICE_NONE
-};
-
-/**
- * enum skl_interleaving - interleaving style
- *
- * @SKL_INTERLEAVING_PER_CHANNEL: [s1_ch1...s1_chN,...,sM_ch1...sM_chN]
- * @SKL_INTERLEAVING_PER_SAMPLE: [s1_ch1...sM_ch1,...,s1_chN...sM_chN]
- */
-enum skl_interleaving {
-	SKL_INTERLEAVING_PER_CHANNEL = 0,
-	SKL_INTERLEAVING_PER_SAMPLE = 1,
-};
-
-enum skl_sample_type {
-	SKL_SAMPLE_TYPE_INT_MSB = 0,
-	SKL_SAMPLE_TYPE_INT_LSB = 1,
-	SKL_SAMPLE_TYPE_INT_SIGNED = 2,
-	SKL_SAMPLE_TYPE_INT_UNSIGNED = 3,
-	SKL_SAMPLE_TYPE_FLOAT = 4
-};
-
-enum module_pin_type {
-	/* All pins of the module takes same PCM inputs or outputs
-	* e.g. mixout
-	*/
-	SKL_PIN_TYPE_HOMOGENEOUS,
-	/* All pins of the module takes different PCM inputs or outputs
-	* e.g mux
-	*/
-	SKL_PIN_TYPE_HETEROGENEOUS,
-};
-
-enum skl_module_param_type {
-	SKL_PARAM_DEFAULT = 0,
-	SKL_PARAM_INIT,
-	SKL_PARAM_SET,
-	SKL_PARAM_BIND
-};
-
-struct skl_dfw_algo_data {
-	u32 set_params:2;
-	u32 rsvd:30;
-	u32 param_id;
-	u32 max;
-	char params[0];
-} __packed;
-
-enum skl_tkn_dir {
-	SKL_DIR_IN,
-	SKL_DIR_OUT
-};
-
-enum skl_tuple_type {
-	SKL_TYPE_TUPLE,
-	SKL_TYPE_DATA
-};
-
-#endif
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index abf3247..f0d9793 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -127,10 +127,17 @@
  */
 static int skl_init_chip(struct hdac_bus *bus, bool full_reset)
 {
+	struct hdac_ext_bus *ebus = hbus_to_ebus(bus);
+	struct hdac_ext_link *hlink;
 	int ret;
 
 	skl_enable_miscbdcge(bus->dev, false);
 	ret = snd_hdac_bus_init_chip(bus, full_reset);
+
+	/* Reset stream-to-link mapping */
+	list_for_each_entry(hlink, &ebus->hlink_list, list)
+		bus->io_ops->reg_writel(0, hlink->ml_addr + AZX_REG_ML_LOSIDV);
+
 	skl_enable_miscbdcge(bus->dev, true);
 
 	return ret;
diff --git a/sound/soc/kirkwood/Kconfig b/sound/soc/kirkwood/Kconfig
index bc3c7b5..132bb83 100644
--- a/sound/soc/kirkwood/Kconfig
+++ b/sound/soc/kirkwood/Kconfig
@@ -1,7 +1,6 @@
 config SND_KIRKWOOD_SOC
 	tristate "SoC Audio for the Marvell Kirkwood and Dove chips"
 	depends on ARCH_DOVE || ARCH_MVEBU || COMPILE_TEST
-	depends on HAS_DMA
 	help
 	  Say Y or M if you want to add support for codecs attached to
 	  the Kirkwood I2S interface. You will also need to select the
diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig
index 5c68797..e731d40 100644
--- a/sound/soc/mediatek/Kconfig
+++ b/sound/soc/mediatek/Kconfig
@@ -32,6 +32,26 @@
 	  Select Y if you have such device.
 	  If unsure select "N".
 
+config SND_SOC_MT6797
+	tristate "ASoC support for Mediatek MT6797 chip"
+	depends on ARCH_MEDIATEK
+	select SND_SOC_MEDIATEK
+	help
+	  This adds ASoC driver for Mediatek MT6797 boards
+	  that can be used with other codecs.
+	  Select Y if you have such device.
+	  If unsure select "N".
+
+config SND_SOC_MT6797_MT6351
+	tristate "ASoc Audio driver for MT6797 with MT6351 codec"
+	depends on SND_SOC_MT6797 && MTK_PMIC_WRAP
+	select SND_SOC_MT6351
+	help
+	  This adds ASoC driver for Mediatek MT6797 boards
+	  with the MT6351 codecs.
+	  Select Y if you have such device.
+	  If unsure select "N".
+
 config SND_SOC_MT8173
 	tristate "ASoC support for Mediatek MT8173 chip"
 	depends on ARCH_MEDIATEK
diff --git a/sound/soc/mediatek/Makefile b/sound/soc/mediatek/Makefile
index 6bcab35..3bb2c47 100644
--- a/sound/soc/mediatek/Makefile
+++ b/sound/soc/mediatek/Makefile
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_SND_SOC_MEDIATEK) += common/
 obj-$(CONFIG_SND_SOC_MT2701) += mt2701/
+obj-$(CONFIG_SND_SOC_MT6797) += mt6797/
 obj-$(CONFIG_SND_SOC_MT8173) += mt8173/
diff --git a/sound/soc/mediatek/common/Makefile b/sound/soc/mediatek/common/Makefile
index a55d33b..cdadabc 100644
--- a/sound/soc/mediatek/common/Makefile
+++ b/sound/soc/mediatek/common/Makefile
@@ -1,16 +1,4 @@
-#
-# Copyright (C) 2015 MediaTek Inc.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-
+# SPDX-License-Identifier: GPL-2.0
 # platform driver
 snd-soc-mtk-common-objs := mtk-afe-platform-driver.o mtk-afe-fe-dai.o
 obj-$(CONFIG_SND_SOC_MEDIATEK) += snd-soc-mtk-common.o
diff --git a/sound/soc/mediatek/common/mtk-afe-fe-dai.c b/sound/soc/mediatek/common/mtk-afe-fe-dai.c
index c91e5f4..cf4978b 100644
--- a/sound/soc/mediatek/common/mtk-afe-fe-dai.c
+++ b/sound/soc/mediatek/common/mtk-afe-fe-dai.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * mtk-afe-fe-dais.c  --  Mediatek afe fe dai operator
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Garlic Tseng <garlic.tseng@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
@@ -44,8 +36,7 @@
 		       struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	int memif_num = rtd->cpu_dai->id;
 	struct mtk_base_afe_memif *memif = &afe->memif[memif_num];
@@ -107,8 +98,7 @@
 			 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mtk_base_afe_memif *memif = &afe->memif[rtd->cpu_dai->id];
 	int irq_id;
 
@@ -131,8 +121,7 @@
 			 struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mtk_base_afe_memif *memif = &afe->memif[rtd->cpu_dai->id];
 	int msb_at_bit33 = 0;
 	int ret, fs = 0;
@@ -196,8 +185,7 @@
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_pcm_runtime * const runtime = substream->runtime;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mtk_base_afe_memif *memif = &afe->memif[rtd->cpu_dai->id];
 	struct mtk_base_afe_irq *irqs = &afe->irqs[memif->irq_usage];
 	const struct mtk_base_irq_data *irq_data = irqs->irq_data;
@@ -260,8 +248,7 @@
 		       struct snd_soc_dai *dai)
 {
 	struct snd_soc_pcm_runtime *rtd  = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mtk_base_afe_memif *memif = &afe->memif[rtd->cpu_dai->id];
 	int hd_audio = 0;
 
@@ -333,7 +320,7 @@
 
 int mtk_afe_dai_suspend(struct snd_soc_dai *dai)
 {
-	struct mtk_base_afe *afe = dev_get_drvdata(dai->dev);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct device *dev = afe->dev;
 	struct regmap *regmap = afe->regmap;
 	int i;
@@ -358,7 +345,7 @@
 
 int mtk_afe_dai_resume(struct snd_soc_dai *dai)
 {
-	struct mtk_base_afe *afe = dev_get_drvdata(dai->dev);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct device *dev = afe->dev;
 	struct regmap *regmap = afe->regmap;
 	int i = 0;
@@ -383,4 +370,3 @@
 MODULE_DESCRIPTION("Mediatek simple fe dai operator");
 MODULE_AUTHOR("Garlic Tseng <garlic.tseng@mediatek.com>");
 MODULE_LICENSE("GPL v2");
-
diff --git a/sound/soc/mediatek/common/mtk-afe-fe-dai.h b/sound/soc/mediatek/common/mtk-afe-fe-dai.h
index 28cb178..55074fb 100644
--- a/sound/soc/mediatek/common/mtk-afe-fe-dai.h
+++ b/sound/soc/mediatek/common/mtk-afe-fe-dai.h
@@ -1,17 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * mtk-afe-fe-dais.h  --  Mediatek afe fe dai operator definition
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Garlic Tseng <garlic.tseng@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef _MTK_AFE_FE_DAI_H_
diff --git a/sound/soc/mediatek/common/mtk-afe-platform-driver.c b/sound/soc/mediatek/common/mtk-afe-platform-driver.c
index 53215b5..51ec4ff 100644
--- a/sound/soc/mediatek/common/mtk-afe-platform-driver.c
+++ b/sound/soc/mediatek/common/mtk-afe-platform-driver.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * mtk-afe-platform-driver.c  --  Mediatek afe platform driver
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Garlic Tseng <garlic.tseng@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
@@ -21,6 +13,86 @@
 #include "mtk-afe-platform-driver.h"
 #include "mtk-base-afe.h"
 
+int mtk_afe_combine_sub_dai(struct mtk_base_afe *afe)
+{
+	struct snd_soc_dai_driver *sub_dai_drivers;
+	size_t num_dai_drivers = 0, dai_idx = 0;
+	int i;
+
+	if (!afe->sub_dais) {
+		dev_err(afe->dev, "%s(), sub_dais == NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	/* calcualte total dai driver size */
+	for (i = 0; i < afe->num_sub_dais; i++) {
+		if (afe->sub_dais[i].dai_drivers &&
+		    afe->sub_dais[i].num_dai_drivers != 0)
+			num_dai_drivers += afe->sub_dais[i].num_dai_drivers;
+	}
+
+	dev_info(afe->dev, "%s(), num of dai %zd\n", __func__, num_dai_drivers);
+
+	/* combine sub_dais */
+	afe->num_dai_drivers = num_dai_drivers;
+	afe->dai_drivers = devm_kcalloc(afe->dev,
+					num_dai_drivers,
+					sizeof(struct snd_soc_dai_driver),
+					GFP_KERNEL);
+	if (!afe->dai_drivers)
+		return -ENOMEM;
+
+	for (i = 0; i < afe->num_sub_dais; i++) {
+		if (afe->sub_dais[i].dai_drivers &&
+		    afe->sub_dais[i].num_dai_drivers != 0) {
+			sub_dai_drivers = afe->sub_dais[i].dai_drivers;
+			/* dai driver */
+			memcpy(&afe->dai_drivers[dai_idx],
+			       sub_dai_drivers,
+			       afe->sub_dais[i].num_dai_drivers *
+			       sizeof(struct snd_soc_dai_driver));
+			dai_idx += afe->sub_dais[i].num_dai_drivers;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mtk_afe_combine_sub_dai);
+
+int mtk_afe_add_sub_dai_control(struct snd_soc_component *component)
+{
+	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	int i;
+
+	if (!afe->sub_dais) {
+		dev_err(afe->dev, "%s(), sub_dais == NULL\n", __func__);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < afe->num_sub_dais; i++) {
+		if (afe->sub_dais[i].controls)
+			snd_soc_add_component_controls(component,
+				afe->sub_dais[i].controls,
+				afe->sub_dais[i].num_controls);
+
+		if (afe->sub_dais[i].dapm_widgets)
+			snd_soc_dapm_new_controls(&component->dapm,
+				afe->sub_dais[i].dapm_widgets,
+				afe->sub_dais[i].num_dapm_widgets);
+
+		if (afe->sub_dais[i].dapm_routes)
+			snd_soc_dapm_add_routes(&component->dapm,
+				afe->sub_dais[i].dapm_routes,
+				afe->sub_dais[i].num_dapm_routes);
+	}
+
+	snd_soc_dapm_new_widgets(component->dapm.card);
+
+	return 0;
+
+}
+EXPORT_SYMBOL_GPL(mtk_afe_add_sub_dai_control);
+
 static snd_pcm_uframes_t mtk_afe_pcm_pointer
 			 (struct snd_pcm_substream *substream)
 {
@@ -56,28 +128,31 @@
 	return bytes_to_frames(substream->runtime, pcm_ptr_bytes);
 }
 
-static const struct snd_pcm_ops mtk_afe_pcm_ops = {
+const struct snd_pcm_ops mtk_afe_pcm_ops = {
 	.ioctl = snd_pcm_lib_ioctl,
 	.pointer = mtk_afe_pcm_pointer,
 };
+EXPORT_SYMBOL_GPL(mtk_afe_pcm_ops);
 
-static int mtk_afe_pcm_new(struct snd_soc_pcm_runtime *rtd)
+int mtk_afe_pcm_new(struct snd_soc_pcm_runtime *rtd)
 {
 	size_t size;
-	struct snd_card *card = rtd->card->snd_card;
 	struct snd_pcm *pcm = rtd->pcm;
 	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
 	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
 
 	size = afe->mtk_afe_hardware->buffer_bytes_max;
 	return snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
-						     card->dev, size, size);
+						     afe->dev,
+						     size, size);
 }
+EXPORT_SYMBOL_GPL(mtk_afe_pcm_new);
 
-static void mtk_afe_pcm_free(struct snd_pcm *pcm)
+void mtk_afe_pcm_free(struct snd_pcm *pcm)
 {
 	snd_pcm_lib_preallocate_free_for_all(pcm);
 }
+EXPORT_SYMBOL_GPL(mtk_afe_pcm_free);
 
 const struct snd_soc_component_driver mtk_afe_pcm_platform = {
 	.name = AFE_PCM_NAME,
diff --git a/sound/soc/mediatek/common/mtk-afe-platform-driver.h b/sound/soc/mediatek/common/mtk-afe-platform-driver.h
index 8dcdbed..88df679 100644
--- a/sound/soc/mediatek/common/mtk-afe-platform-driver.h
+++ b/sound/soc/mediatek/common/mtk-afe-platform-driver.h
@@ -1,24 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * mtk-afe-platform-driver.h  --  Mediatek afe platform driver definition
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Garlic Tseng <garlic.tseng@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef _MTK_AFE_PLATFORM_DRIVER_H_
 #define _MTK_AFE_PLATFORM_DRIVER_H_
 
 #define AFE_PCM_NAME "mtk-afe-pcm"
+extern const struct snd_pcm_ops mtk_afe_pcm_ops;
 extern const struct snd_soc_component_driver mtk_afe_pcm_platform;
 
+struct mtk_base_afe;
+struct snd_pcm;
+struct snd_soc_component;
+struct snd_soc_pcm_runtime;
+
+
+int mtk_afe_pcm_new(struct snd_soc_pcm_runtime *rtd);
+void mtk_afe_pcm_free(struct snd_pcm *pcm);
+
+int mtk_afe_combine_sub_dai(struct mtk_base_afe *afe);
+int mtk_afe_add_sub_dai_control(struct snd_soc_component *component);
 #endif
 
diff --git a/sound/soc/mediatek/common/mtk-base-afe.h b/sound/soc/mediatek/common/mtk-base-afe.h
index 3a78f6f..bcf562f0 100644
--- a/sound/soc/mediatek/common/mtk-base-afe.h
+++ b/sound/soc/mediatek/common/mtk-base-afe.h
@@ -1,22 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * mtk-base-afe.h  --  Mediatek base afe structure
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Garlic Tseng <garlic.tseng@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef _MTK_BASE_AFE_H_
 #define _MTK_BASE_AFE_H_
 
+#define MTK_STREAM_NUM (SNDRV_PCM_STREAM_LAST + 1)
+
 struct mtk_base_memif_data {
 	int id;
 	const char *name;
@@ -54,6 +48,7 @@
 struct device;
 struct mtk_base_afe_memif;
 struct mtk_base_afe_irq;
+struct mtk_base_afe_dai;
 struct regmap;
 struct snd_pcm_substream;
 struct snd_soc_dai;
@@ -77,6 +72,11 @@
 	struct mtk_base_afe_irq *irqs;
 	int irqs_size;
 
+	struct mtk_base_afe_dai *sub_dais;
+	int num_sub_dais;
+	struct snd_soc_dai_driver *dai_drivers;
+	unsigned int num_dai_drivers;
+
 	const struct snd_pcm_hardware *mtk_afe_hardware;
 	int (*memif_fs)(struct snd_pcm_substream *substream,
 			unsigned int rate);
@@ -100,5 +100,17 @@
 	int irq_occupyed;
 };
 
+struct mtk_base_afe_dai {
+	struct snd_soc_dai_driver *dai_drivers;
+	unsigned int num_dai_drivers;
+
+	const struct snd_kcontrol_new *controls;
+	unsigned int num_controls;
+	const struct snd_soc_dapm_widget *dapm_widgets;
+	unsigned int num_dapm_widgets;
+	const struct snd_soc_dapm_route *dapm_routes;
+	unsigned int num_dapm_routes;
+};
+
 #endif
 
diff --git a/sound/soc/mediatek/mt2701/Makefile b/sound/soc/mediatek/mt2701/Makefile
index c91deb6..21d5e69 100644
--- a/sound/soc/mediatek/mt2701/Makefile
+++ b/sound/soc/mediatek/mt2701/Makefile
@@ -1,16 +1,4 @@
-#
-# Copyright (C) 2015 MediaTek Inc.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-
+# SPDX-License-Identifier: GPL-2.0
 # platform driver
 snd-soc-mt2701-afe-objs := mt2701-afe-pcm.o mt2701-afe-clock-ctrl.o
 obj-$(CONFIG_SND_SOC_MT2701) += snd-soc-mt2701-afe.o
diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c b/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c
index 949fc3a..ae62089 100644
--- a/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c
+++ b/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.c
@@ -1,17 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * mt2701-afe-clock-ctrl.c  --  Mediatek 2701 afe clock ctrl
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Garlic Tseng <garlic.tseng@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ *	   Ryder Lee <ryder.lee@mediatek.com>
  */
 
 #include "mt2701-afe-common.h"
@@ -43,8 +36,9 @@
 	}
 
 	/* Get I2S related clocks */
-	for (i = 0; i < MT2701_I2S_NUM; i++) {
+	for (i = 0; i < afe_priv->soc->i2s_num; i++) {
 		struct mt2701_i2s_path *i2s_path = &afe_priv->i2s_path[i];
+		struct clk *i2s_ck;
 		char name[13];
 
 		snprintf(name, sizeof(name), "i2s%d_src_sel", i);
@@ -69,18 +63,20 @@
 		}
 
 		snprintf(name, sizeof(name), "i2so%d_hop_ck", i);
-		i2s_path->hop_ck[I2S_OUT] = devm_clk_get(afe->dev, name);
-		if (IS_ERR(i2s_path->hop_ck[I2S_OUT])) {
+		i2s_ck = devm_clk_get(afe->dev, name);
+		if (IS_ERR(i2s_ck)) {
 			dev_err(afe->dev, "failed to get %s\n", name);
-			return PTR_ERR(i2s_path->hop_ck[I2S_OUT]);
+			return PTR_ERR(i2s_ck);
 		}
+		i2s_path->hop_ck[SNDRV_PCM_STREAM_PLAYBACK] = i2s_ck;
 
 		snprintf(name, sizeof(name), "i2si%d_hop_ck", i);
-		i2s_path->hop_ck[I2S_IN] = devm_clk_get(afe->dev, name);
-		if (IS_ERR(i2s_path->hop_ck[I2S_IN])) {
+		i2s_ck = devm_clk_get(afe->dev, name);
+		if (IS_ERR(i2s_ck)) {
 			dev_err(afe->dev, "failed to get %s\n", name);
-			return PTR_ERR(i2s_path->hop_ck[I2S_IN]);
+			return PTR_ERR(i2s_ck);
 		}
+		i2s_path->hop_ck[SNDRV_PCM_STREAM_CAPTURE] = i2s_ck;
 
 		snprintf(name, sizeof(name), "asrc%d_out_ck", i);
 		i2s_path->asrco_ck = devm_clk_get(afe->dev, name);
@@ -102,10 +98,10 @@
 	return 0;
 }
 
-int mt2701_afe_enable_i2s(struct mtk_base_afe *afe, int id, int dir)
+int mt2701_afe_enable_i2s(struct mtk_base_afe *afe,
+			  struct mt2701_i2s_path *i2s_path,
+			  int dir)
 {
-	struct mt2701_afe_private *afe_priv = afe->platform_priv;
-	struct mt2701_i2s_path *i2s_path = &afe_priv->i2s_path[id];
 	int ret;
 
 	ret = clk_prepare_enable(i2s_path->asrco_ck);
@@ -128,11 +124,10 @@
 	return ret;
 }
 
-void mt2701_afe_disable_i2s(struct mtk_base_afe *afe, int id, int dir)
+void mt2701_afe_disable_i2s(struct mtk_base_afe *afe,
+			    struct mt2701_i2s_path *i2s_path,
+			    int dir)
 {
-	struct mt2701_afe_private *afe_priv = afe->platform_priv;
-	struct mt2701_i2s_path *i2s_path = &afe_priv->i2s_path[id];
-
 	clk_disable_unprepare(i2s_path->hop_ck[dir]);
 	clk_disable_unprepare(i2s_path->asrco_ck);
 }
@@ -272,27 +267,32 @@
 	return 0;
 }
 
-void mt2701_mclk_configuration(struct mtk_base_afe *afe, int id, int domain,
-			       int mclk)
+int mt2701_mclk_configuration(struct mtk_base_afe *afe, int id)
+
 {
 	struct mt2701_afe_private *priv = afe->platform_priv;
 	struct mt2701_i2s_path *i2s_path = &priv->i2s_path[id];
-	int ret;
+	int ret = -EINVAL;
 
 	/* Set mclk source */
-	if (domain == 0)
+	if (!(MT2701_PLL_DOMAIN_0_RATE % i2s_path->mclk_rate))
 		ret = clk_set_parent(i2s_path->sel_ck,
 				     priv->base_ck[MT2701_TOP_AUD_MCLK_SRC0]);
-	else
+	else if (!(MT2701_PLL_DOMAIN_1_RATE % i2s_path->mclk_rate))
 		ret = clk_set_parent(i2s_path->sel_ck,
 				     priv->base_ck[MT2701_TOP_AUD_MCLK_SRC1]);
 
-	if (ret)
-		dev_err(afe->dev, "failed to set domain%d mclk source %d\n",
-			domain, ret);
+	if (ret) {
+		dev_err(afe->dev, "failed to set mclk source\n");
+		return ret;
+	}
 
 	/* Set mclk divider */
-	ret = clk_set_rate(i2s_path->div_ck, mclk);
-	if (ret)
+	ret = clk_set_rate(i2s_path->div_ck, i2s_path->mclk_rate);
+	if (ret) {
 		dev_err(afe->dev, "failed to set mclk divider %d\n", ret);
+		return ret;
+	}
+
+	return 0;
 }
diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.h b/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.h
index 15417d9..580fead 100644
--- a/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.h
+++ b/sound/soc/mediatek/mt2701/mt2701-afe-clock-ctrl.h
@@ -1,37 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * mt2701-afe-clock-ctrl.h  --  Mediatek 2701 afe clock ctrl definition
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Garlic Tseng <garlic.tseng@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ *	   Ryder Lee <ryder.lee@mediatek.com>
  */
 
 #ifndef _MT2701_AFE_CLOCK_CTRL_H_
 #define _MT2701_AFE_CLOCK_CTRL_H_
 
 struct mtk_base_afe;
+struct mt2701_i2s_path;
 
 int mt2701_init_clock(struct mtk_base_afe *afe);
 int mt2701_afe_enable_clock(struct mtk_base_afe *afe);
 int mt2701_afe_disable_clock(struct mtk_base_afe *afe);
 
-int mt2701_afe_enable_i2s(struct mtk_base_afe *afe, int id, int dir);
-void mt2701_afe_disable_i2s(struct mtk_base_afe *afe, int id, int dir);
+int mt2701_afe_enable_i2s(struct mtk_base_afe *afe,
+			  struct mt2701_i2s_path *path,
+			  int dir);
+void mt2701_afe_disable_i2s(struct mtk_base_afe *afe,
+			    struct mt2701_i2s_path *path,
+			    int dir);
 int mt2701_afe_enable_mclk(struct mtk_base_afe *afe, int id);
 void mt2701_afe_disable_mclk(struct mtk_base_afe *afe, int id);
 
 int mt2701_enable_btmrg_clk(struct mtk_base_afe *afe);
 void mt2701_disable_btmrg_clk(struct mtk_base_afe *afe);
 
-void mt2701_mclk_configuration(struct mtk_base_afe *afe, int id, int domain,
-			       int mclk);
+int mt2701_mclk_configuration(struct mtk_base_afe *afe, int id);
 
 #endif
diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-common.h b/sound/soc/mediatek/mt2701/mt2701-afe-common.h
index ae8ddea..d44faba 100644
--- a/sound/soc/mediatek/mt2701/mt2701-afe-common.h
+++ b/sound/soc/mediatek/mt2701/mt2701-afe-common.h
@@ -1,17 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * mt2701-afe-common.h  --  Mediatek 2701 audio driver definitions
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Garlic Tseng <garlic.tseng@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef _MT_2701_AFE_COMMON_H_
@@ -23,10 +15,8 @@
 #include "mt2701-reg.h"
 #include "../common/mtk-base-afe.h"
 
-#define MT2701_STREAM_DIR_NUM (SNDRV_PCM_STREAM_LAST + 1)
 #define MT2701_PLL_DOMAIN_0_RATE	98304000
 #define MT2701_PLL_DOMAIN_1_RATE	90316800
-#define MT2701_I2S_NUM	4
 
 enum {
 	MT2701_MEMIF_DL1,
@@ -100,30 +90,30 @@
 	int i2s_asrc_fs_mask;
 };
 
-enum mt2701_i2s_dir {
-	I2S_OUT,
-	I2S_IN,
-	I2S_DIR_NUM,
-};
-
 struct mt2701_i2s_path {
-	int dai_id;
 	int mclk_rate;
-	int on[I2S_DIR_NUM];
-	int occupied[I2S_DIR_NUM];
-	const struct mt2701_i2s_data *i2s_data[I2S_DIR_NUM];
-	struct clk *hop_ck[I2S_DIR_NUM];
+	int on[MTK_STREAM_NUM];
+	int occupied[MTK_STREAM_NUM];
+	const struct mt2701_i2s_data *i2s_data[MTK_STREAM_NUM];
+	struct clk *hop_ck[MTK_STREAM_NUM];
 	struct clk *sel_ck;
 	struct clk *div_ck;
 	struct clk *mclk_ck;
 	struct clk *asrco_ck;
 };
 
+struct mt2701_soc_variants {
+	bool has_one_heart_mode;
+	int i2s_num;
+};
+
 struct mt2701_afe_private {
-	struct mt2701_i2s_path i2s_path[MT2701_I2S_NUM];
+	struct mt2701_i2s_path *i2s_path;
 	struct clk *base_ck[MT2701_BASE_CLK_NUM];
 	struct clk *mrgif_ck;
-	bool mrg_enable[MT2701_STREAM_DIR_NUM];
+	bool mrg_enable[MTK_STREAM_NUM];
+
+	const struct mt2701_soc_variants *soc;
 };
 
 #endif
diff --git a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c
index 73cce33..828d11c 100644
--- a/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c
+++ b/sound/soc/mediatek/mt2701/mt2701-afe-pcm.c
@@ -1,18 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Mediatek ALSA SoC AFE platform driver for 2701
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Garlic Tseng <garlic.tseng@mediatek.com>
- *             Ir Lian <ir.lian@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ *	   Ir Lian <ir.lian@mediatek.com>
+ *	   Ryder Lee <ryder.lee@mediatek.com>
  */
 
 #include <linux/delay.h>
@@ -20,6 +13,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/pm_runtime.h>
 
 #include "mt2701-afe-common.h"
@@ -36,7 +30,7 @@
 	.period_bytes_max = 1024 * 256,
 	.periods_min = 4,
 	.periods_max = 1024,
-	.buffer_bytes_max = 1024 * 1024 * 16,
+	.buffer_bytes_max = 1024 * 1024,
 	.fifo_size = 0,
 };
 
@@ -68,9 +62,10 @@
 
 static int mt2701_dai_num_to_i2s(struct mtk_base_afe *afe, int num)
 {
+	struct mt2701_afe_private *afe_priv = afe->platform_priv;
 	int val = num - MT2701_IO_I2S;
 
-	if (val < 0 || val >= MT2701_I2S_NUM) {
+	if (val < 0 || val >= afe_priv->soc->i2s_num) {
 		dev_err(afe->dev, "%s, num not available, num %d, val %d\n",
 			__func__, num, val);
 		return -EINVAL;
@@ -92,44 +87,26 @@
 static int mt2701_afe_i2s_startup(struct snd_pcm_substream *substream,
 				  struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
+	struct mt2701_afe_private *afe_priv = afe->platform_priv;
 	int i2s_num = mt2701_dai_num_to_i2s(afe, dai->id);
+	bool mode = afe_priv->soc->has_one_heart_mode;
 
 	if (i2s_num < 0)
 		return i2s_num;
 
-	return mt2701_afe_enable_mclk(afe, i2s_num);
+	return mt2701_afe_enable_mclk(afe, mode ? 1 : i2s_num);
 }
 
-static int mt2701_afe_i2s_path_shutdown(struct snd_pcm_substream *substream,
-					struct snd_soc_dai *dai,
-					int i2s_num,
-					int dir_invert)
+static int mt2701_afe_i2s_path_disable(struct mtk_base_afe *afe,
+				       struct mt2701_i2s_path *i2s_path,
+				       int stream_dir)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
-	struct mt2701_afe_private *afe_priv = afe->platform_priv;
-	struct mt2701_i2s_path *i2s_path = &afe_priv->i2s_path[i2s_num];
-	const struct mt2701_i2s_data *i2s_data;
-	int stream_dir = substream->stream;
+	const struct mt2701_i2s_data *i2s_data = i2s_path->i2s_data[stream_dir];
 
-	if (dir_invert)	{
-		if (stream_dir == SNDRV_PCM_STREAM_PLAYBACK)
-			stream_dir = SNDRV_PCM_STREAM_CAPTURE;
-		else
-			stream_dir = SNDRV_PCM_STREAM_PLAYBACK;
-	}
-	i2s_data = i2s_path->i2s_data[stream_dir];
-
-	i2s_path->on[stream_dir]--;
-	if (i2s_path->on[stream_dir] < 0) {
-		dev_warn(afe->dev, "i2s_path->on: %d, dir: %d\n",
-			 i2s_path->on[stream_dir], stream_dir);
+	if (--i2s_path->on[stream_dir] < 0)
 		i2s_path->on[stream_dir] = 0;
-	}
+
 	if (i2s_path->on[stream_dir])
 		return 0;
 
@@ -137,7 +114,7 @@
 	regmap_update_bits(afe->regmap, i2s_data->i2s_ctrl_reg,
 			   ASYS_I2S_CON_I2S_EN, 0);
 
-	mt2701_afe_disable_i2s(afe, i2s_num, stream_dir);
+	mt2701_afe_disable_i2s(afe, i2s_path, stream_dir);
 
 	return 0;
 }
@@ -145,12 +122,11 @@
 static void mt2701_afe_i2s_shutdown(struct snd_pcm_substream *substream,
 				    struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mt2701_afe_private *afe_priv = afe->platform_priv;
 	int i2s_num = mt2701_dai_num_to_i2s(afe, dai->id);
 	struct mt2701_i2s_path *i2s_path;
+	bool mode = afe_priv->soc->has_one_heart_mode;
 
 	if (i2s_num < 0)
 		return;
@@ -160,50 +136,33 @@
 	if (i2s_path->occupied[substream->stream])
 		i2s_path->occupied[substream->stream] = 0;
 	else
-		goto I2S_UNSTART;
+		goto exit;
 
-	mt2701_afe_i2s_path_shutdown(substream, dai, i2s_num, 0);
+	mt2701_afe_i2s_path_disable(afe, i2s_path, substream->stream);
 
 	/* need to disable i2s-out path when disable i2s-in */
 	if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
-		mt2701_afe_i2s_path_shutdown(substream, dai, i2s_num, 1);
+		mt2701_afe_i2s_path_disable(afe, i2s_path, !substream->stream);
 
-I2S_UNSTART:
+exit:
 	/* disable mclk */
-	mt2701_afe_disable_mclk(afe, i2s_num);
+	mt2701_afe_disable_mclk(afe, mode ? 1 : i2s_num);
 }
 
-static int mt2701_i2s_path_prepare_enable(struct snd_pcm_substream *substream,
-					  struct snd_soc_dai *dai,
-					  int i2s_num,
-					  int dir_invert)
+static int mt2701_i2s_path_enable(struct mtk_base_afe *afe,
+				  struct mt2701_i2s_path *i2s_path,
+				  int stream_dir, int rate)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	const struct mt2701_i2s_data *i2s_data = i2s_path->i2s_data[stream_dir];
 	struct mt2701_afe_private *afe_priv = afe->platform_priv;
-	struct mt2701_i2s_path *i2s_path = &afe_priv->i2s_path[i2s_num];
-	const struct mt2701_i2s_data *i2s_data;
-	struct snd_pcm_runtime * const runtime = substream->runtime;
 	int reg, fs, w_len = 1; /* now we support bck 64bits only */
-	int stream_dir = substream->stream;
-	unsigned int mask = 0, val = 0;
-
-	if (dir_invert) {
-		if (stream_dir == SNDRV_PCM_STREAM_PLAYBACK)
-			stream_dir = SNDRV_PCM_STREAM_CAPTURE;
-		else
-			stream_dir = SNDRV_PCM_STREAM_PLAYBACK;
-	}
-	i2s_data = i2s_path->i2s_data[stream_dir];
+	unsigned int mask, val;
 
 	/* no need to enable if already done */
-	i2s_path->on[stream_dir]++;
-
-	if (i2s_path->on[stream_dir] != 1)
+	if (++i2s_path->on[stream_dir] != 1)
 		return 0;
 
-	fs = mt2701_afe_i2s_fs(runtime->rate);
+	fs = mt2701_afe_i2s_fs(rate);
 
 	mask = ASYS_I2S_CON_FS |
 	       ASYS_I2S_CON_I2S_COUPLE_MODE | /* 0 */
@@ -217,22 +176,24 @@
 	if (stream_dir == SNDRV_PCM_STREAM_CAPTURE) {
 		mask |= ASYS_I2S_IN_PHASE_FIX;
 		val |= ASYS_I2S_IN_PHASE_FIX;
+		reg = ASMI_TIMING_CON1;
+	} else {
+		if (afe_priv->soc->has_one_heart_mode) {
+			mask |= ASYS_I2S_CON_ONE_HEART_MODE;
+			val |= ASYS_I2S_CON_ONE_HEART_MODE;
+		}
+		reg = ASMO_TIMING_CON1;
 	}
 
 	regmap_update_bits(afe->regmap, i2s_data->i2s_ctrl_reg, mask, val);
 
-	if (stream_dir == SNDRV_PCM_STREAM_PLAYBACK)
-		reg = ASMO_TIMING_CON1;
-	else
-		reg = ASMI_TIMING_CON1;
-
 	regmap_update_bits(afe->regmap, reg,
 			   i2s_data->i2s_asrc_fs_mask
 			   << i2s_data->i2s_asrc_fs_shift,
 			   fs << i2s_data->i2s_asrc_fs_shift);
 
 	/* enable i2s */
-	mt2701_afe_enable_i2s(afe, i2s_num, stream_dir);
+	mt2701_afe_enable_i2s(afe, i2s_path, stream_dir);
 
 	/* reset i2s hw status before enable */
 	regmap_update_bits(afe->regmap, i2s_data->i2s_ctrl_reg,
@@ -249,45 +210,33 @@
 static int mt2701_afe_i2s_prepare(struct snd_pcm_substream *substream,
 				  struct snd_soc_dai *dai)
 {
-	int clk_domain;
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mt2701_afe_private *afe_priv = afe->platform_priv;
-	int i2s_num = mt2701_dai_num_to_i2s(afe, dai->id);
+	int ret, i2s_num = mt2701_dai_num_to_i2s(afe, dai->id);
 	struct mt2701_i2s_path *i2s_path;
-	int mclk_rate;
+	bool mode = afe_priv->soc->has_one_heart_mode;
 
 	if (i2s_num < 0)
 		return i2s_num;
 
 	i2s_path = &afe_priv->i2s_path[i2s_num];
-	mclk_rate = i2s_path->mclk_rate;
 
 	if (i2s_path->occupied[substream->stream])
 		return -EBUSY;
+
+	ret = mt2701_mclk_configuration(afe, mode ? 1 : i2s_num);
+	if (ret)
+		return ret;
+
 	i2s_path->occupied[substream->stream] = 1;
 
-	if (MT2701_PLL_DOMAIN_0_RATE % mclk_rate == 0) {
-		clk_domain = 0;
-	} else if (MT2701_PLL_DOMAIN_1_RATE % mclk_rate == 0) {
-		clk_domain = 1;
-	} else {
-		dev_err(dai->dev, "%s() bad mclk rate %d\n",
-			__func__, mclk_rate);
-		return -EINVAL;
-	}
-	mt2701_mclk_configuration(afe, i2s_num, clk_domain, mclk_rate);
+	/* need to enable i2s-out path when enable i2s-in */
+	if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+		mt2701_i2s_path_enable(afe, i2s_path, !substream->stream,
+				       substream->runtime->rate);
 
-	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-		mt2701_i2s_path_prepare_enable(substream, dai, i2s_num, 0);
-	} else {
-		/* need to enable i2s-out path when enable i2s-in */
-		/* prepare for another direction "out" */
-		mt2701_i2s_path_prepare_enable(substream, dai, i2s_num, 1);
-		/* prepare for "in" */
-		mt2701_i2s_path_prepare_enable(substream, dai, i2s_num, 0);
-	}
+	mt2701_i2s_path_enable(afe, i2s_path, substream->stream,
+			       substream->runtime->rate);
 
 	return 0;
 }
@@ -295,30 +244,29 @@
 static int mt2701_afe_i2s_set_sysclk(struct snd_soc_dai *dai, int clk_id,
 				     unsigned int freq, int dir)
 {
-	struct mtk_base_afe *afe = dev_get_drvdata(dai->dev);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mt2701_afe_private *afe_priv = afe->platform_priv;
 	int i2s_num = mt2701_dai_num_to_i2s(afe, dai->id);
+	bool mode = afe_priv->soc->has_one_heart_mode;
 
 	if (i2s_num < 0)
 		return i2s_num;
 
 	/* mclk */
 	if (dir == SND_SOC_CLOCK_IN) {
-		dev_warn(dai->dev,
-			 "%s() warning: mt2701 doesn't support mclk input\n",
-			__func__);
+		dev_warn(dai->dev, "The SoCs doesn't support mclk input\n");
 		return -EINVAL;
 	}
-	afe_priv->i2s_path[i2s_num].mclk_rate = freq;
+
+	afe_priv->i2s_path[mode ? 1 : i2s_num].mclk_rate = freq;
+
 	return 0;
 }
 
 static int mt2701_btmrg_startup(struct snd_pcm_substream *substream,
 				struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mt2701_afe_private *afe_priv = afe->platform_priv;
 	int ret;
 
@@ -327,6 +275,7 @@
 		return ret;
 
 	afe_priv->mrg_enable[substream->stream] = 1;
+
 	return 0;
 }
 
@@ -334,17 +283,14 @@
 				  struct snd_pcm_hw_params *params,
 				  struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	int stream_fs;
 	u32 val, msk;
 
 	stream_fs = params_rate(params);
 
-	if ((stream_fs != 8000) && (stream_fs != 16000)) {
-		dev_err(afe->dev, "%s() btmgr not support this stream_fs %d\n",
-			__func__, stream_fs);
+	if (stream_fs != 8000 && stream_fs != 16000) {
+		dev_err(afe->dev, "unsupported rate %d\n", stream_fs);
 		return -EINVAL;
 	}
 
@@ -378,9 +324,7 @@
 static void mt2701_btmrg_shutdown(struct snd_pcm_substream *substream,
 				  struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mt2701_afe_private *afe_priv = afe->platform_priv;
 
 	/* if the other direction stream is not occupied */
@@ -393,28 +337,26 @@
 				   AFE_MRGIF_CON_MRG_I2S_EN, 0);
 		mt2701_disable_btmrg_clk(afe);
 	}
+
 	afe_priv->mrg_enable[substream->stream] = 0;
 }
 
 static int mt2701_simple_fe_startup(struct snd_pcm_substream *substream,
 				    struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
-	int stream_dir = substream->stream;
-	int memif_num = rtd->cpu_dai->id;
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mtk_base_afe_memif *memif_tmp;
+	int stream_dir = substream->stream;
 
 	/* can't run single DL & DLM at the same time */
 	if (stream_dir == SNDRV_PCM_STREAM_PLAYBACK) {
 		memif_tmp = &afe->memif[MT2701_MEMIF_DLM];
 		if (memif_tmp->substream) {
-			dev_warn(afe->dev, "%s memif is not available, stream_dir %d, memif_num %d\n",
-				 __func__, stream_dir, memif_num);
+			dev_warn(afe->dev, "memif is not available");
 			return -EBUSY;
 		}
 	}
+
 	return mtk_afe_fe_startup(substream, dai);
 }
 
@@ -422,27 +364,23 @@
 				      struct snd_pcm_hw_params *params,
 				      struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	int stream_dir = substream->stream;
 
 	/* single DL use PAIR_INTERLEAVE */
-	if (stream_dir == SNDRV_PCM_STREAM_PLAYBACK) {
+	if (stream_dir == SNDRV_PCM_STREAM_PLAYBACK)
 		regmap_update_bits(afe->regmap,
 				   AFE_MEMIF_PBUF_SIZE,
 				   AFE_MEMIF_PBUF_SIZE_DLM_MASK,
 				   AFE_MEMIF_PBUF_SIZE_PAIR_INTERLEAVE);
-	}
+
 	return mtk_afe_fe_hw_params(substream, params, dai);
 }
 
 static int mt2701_dlm_fe_startup(struct snd_pcm_substream *substream,
 				 struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mtk_base_afe_memif *memif_tmp;
 	const struct mtk_base_memif_data *memif_data;
 	int i;
@@ -468,9 +406,7 @@
 static void mt2701_dlm_fe_shutdown(struct snd_pcm_substream *substream,
 				   struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	const struct mtk_base_memif_data *memif_data;
 	int i;
 
@@ -481,6 +417,7 @@
 				   1 << memif_data->agent_disable_shift,
 				   1 << memif_data->agent_disable_shift);
 	}
+
 	return mtk_afe_fe_shutdown(substream, dai);
 }
 
@@ -488,9 +425,7 @@
 				   struct snd_pcm_hw_params *params,
 				   struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	int channels = params_channels(params);
 
 	regmap_update_bits(afe->regmap,
@@ -512,9 +447,7 @@
 static int mt2701_dlm_fe_trigger(struct snd_pcm_substream *substream,
 				 int cmd, struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mtk_base_afe_memif *memif_tmp = &afe->memif[MT2701_MEMIF_DL1];
 
 	switch (cmd) {
@@ -547,6 +480,7 @@
 		fs = mt2701_afe_i2s_fs(rate);
 	else
 		fs = (rate == 16000 ? 1 : 0);
+
 	return fs;
 }
 
@@ -1024,7 +958,17 @@
 	{ "O31", "I35 Switch", "I35" },
 };
 
+static int mt2701_afe_pcm_probe(struct snd_soc_component *component)
+{
+	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+
+	snd_soc_component_init_regmap(component, afe->regmap);
+
+	return 0;
+}
+
 static const struct snd_soc_component_driver mt2701_afe_pcm_dai_component = {
+	.probe = mt2701_afe_pcm_probe,
 	.name = "mt2701-afe-pcm-dai",
 	.dapm_widgets = mt2701_afe_pcm_widgets,
 	.num_dapm_widgets = ARRAY_SIZE(mt2701_afe_pcm_widgets),
@@ -1324,63 +1268,24 @@
 	}
 };
 
-static const struct mt2701_i2s_data mt2701_i2s_data[MT2701_I2S_NUM][2] = {
+static const struct mt2701_i2s_data mt2701_i2s_data[][2] = {
 	{
-		{
-			.i2s_ctrl_reg = ASYS_I2SO1_CON,
-			.i2s_asrc_fs_shift = 0,
-			.i2s_asrc_fs_mask = 0x1f,
-
-		},
-		{
-			.i2s_ctrl_reg = ASYS_I2SIN1_CON,
-			.i2s_asrc_fs_shift = 0,
-			.i2s_asrc_fs_mask = 0x1f,
-
-		},
+		{ ASYS_I2SO1_CON, 0, 0x1f },
+		{ ASYS_I2SIN1_CON, 0, 0x1f },
 	},
 	{
-		{
-			.i2s_ctrl_reg = ASYS_I2SO2_CON,
-			.i2s_asrc_fs_shift = 5,
-			.i2s_asrc_fs_mask = 0x1f,
-
-		},
-		{
-			.i2s_ctrl_reg = ASYS_I2SIN2_CON,
-			.i2s_asrc_fs_shift = 5,
-			.i2s_asrc_fs_mask = 0x1f,
-
-		},
+		{ ASYS_I2SO2_CON, 5, 0x1f },
+		{ ASYS_I2SIN2_CON, 5, 0x1f },
 	},
 	{
-		{
-			.i2s_ctrl_reg = ASYS_I2SO3_CON,
-			.i2s_asrc_fs_shift = 10,
-			.i2s_asrc_fs_mask = 0x1f,
-
-		},
-		{
-			.i2s_ctrl_reg = ASYS_I2SIN3_CON,
-			.i2s_asrc_fs_shift = 10,
-			.i2s_asrc_fs_mask = 0x1f,
-
-		},
+		{ ASYS_I2SO3_CON, 10, 0x1f },
+		{ ASYS_I2SIN3_CON, 10, 0x1f },
 	},
 	{
-		{
-			.i2s_ctrl_reg = ASYS_I2SO4_CON,
-			.i2s_asrc_fs_shift = 15,
-			.i2s_asrc_fs_mask = 0x1f,
-
-		},
-		{
-			.i2s_ctrl_reg = ASYS_I2SIN4_CON,
-			.i2s_asrc_fs_shift = 15,
-			.i2s_asrc_fs_mask = 0x1f,
-
-		},
+		{ ASYS_I2SO4_CON, 15, 0x1f },
+		{ ASYS_I2SIN4_CON, 15, 0x1f },
 	},
+	/* TODO - extend control registers supported by newer SoCs */
 };
 
 static irqreturn_t mt2701_asys_isr(int irq_id, void *dev)
@@ -1398,10 +1303,12 @@
 		memif = &afe->memif[id];
 		if (memif->irq_usage < 0)
 			continue;
+
 		irq = &afe->irqs[memif->irq_usage];
-		if (status & 1 << (irq->irq_data->irq_clr_shift))
+		if (status & 1 << irq->irq_data->irq_clr_shift)
 			snd_pcm_period_elapsed(memif->substream);
 	}
+
 	return IRQ_HANDLED;
 }
 
@@ -1419,22 +1326,6 @@
 	return mt2701_afe_enable_clock(afe);
 }
 
-static int mt2701_afe_add_component(struct mtk_base_afe *afe)
-{
-	struct snd_soc_component *component;
-
-	component = kzalloc(sizeof(*component), GFP_KERNEL);
-	if (!component)
-		return -ENOMEM;
-
-	component->regmap = afe->regmap;
-
-	return snd_soc_add_component(afe->dev, component,
-				     &mt2701_afe_pcm_dai_component,
-				     mt2701_afe_pcm_dais,
-				     ARRAY_SIZE(mt2701_afe_pcm_dais));
-}
-
 static int mt2701_afe_pcm_dev_probe(struct platform_device *pdev)
 {
 	struct mtk_base_afe *afe;
@@ -1452,9 +1343,16 @@
 		return -ENOMEM;
 
 	afe_priv = afe->platform_priv;
+	afe_priv->soc = of_device_get_match_data(&pdev->dev);
 	afe->dev = &pdev->dev;
 	dev = afe->dev;
 
+	afe_priv->i2s_path = devm_kzalloc(dev, afe_priv->soc->i2s_num *
+					  sizeof(struct mt2701_i2s_path),
+					  GFP_KERNEL);
+	if (!afe_priv->i2s_path)
+		return -ENOMEM;
+
 	irq_id = platform_get_irq_byname(pdev, "asys");
 	if (irq_id < 0) {
 		dev_err(dev, "unable to get ASYS IRQ\n");
@@ -1499,11 +1397,11 @@
 		afe->irqs[i].irq_data = &irq_data[i];
 
 	/* I2S initialize */
-	for (i = 0; i < MT2701_I2S_NUM; i++) {
-		afe_priv->i2s_path[i].i2s_data[I2S_OUT]
-			= &mt2701_i2s_data[i][I2S_OUT];
-		afe_priv->i2s_path[i].i2s_data[I2S_IN]
-			= &mt2701_i2s_data[i][I2S_IN];
+	for (i = 0; i < afe_priv->soc->i2s_num; i++) {
+		afe_priv->i2s_path[i].i2s_data[SNDRV_PCM_STREAM_PLAYBACK] =
+			&mt2701_i2s_data[i][SNDRV_PCM_STREAM_PLAYBACK];
+		afe_priv->i2s_path[i].i2s_data[SNDRV_PCM_STREAM_CAPTURE] =
+			&mt2701_i2s_data[i][SNDRV_PCM_STREAM_CAPTURE];
 	}
 
 	afe->mtk_afe_hardware = &mt2701_afe_hardware;
@@ -1538,7 +1436,10 @@
 		goto err_platform;
 	}
 
-	ret = mt2701_afe_add_component(afe);
+	ret = devm_snd_soc_register_component(&pdev->dev,
+					 &mt2701_afe_pcm_dai_component,
+					 mt2701_afe_pcm_dais,
+					 ARRAY_SIZE(mt2701_afe_pcm_dais));
 	if (ret) {
 		dev_warn(dev, "err_dai_component\n");
 		goto err_platform;
@@ -1564,8 +1465,18 @@
 	return 0;
 }
 
+static const struct mt2701_soc_variants mt2701_soc_v1 = {
+	.i2s_num = 4,
+};
+
+static const struct mt2701_soc_variants mt2701_soc_v2 = {
+	.has_one_heart_mode = true,
+	.i2s_num = 4,
+};
+
 static const struct of_device_id mt2701_afe_pcm_dt_match[] = {
-	{ .compatible = "mediatek,mt2701-audio", },
+	{ .compatible = "mediatek,mt2701-audio", .data = &mt2701_soc_v1 },
+	{ .compatible = "mediatek,mt7622-audio", .data = &mt2701_soc_v2 },
 	{},
 };
 MODULE_DEVICE_TABLE(of, mt2701_afe_pcm_dt_match);
diff --git a/sound/soc/mediatek/mt2701/mt2701-cs42448.c b/sound/soc/mediatek/mt2701/mt2701-cs42448.c
index 70f61d5..666282b 100644
--- a/sound/soc/mediatek/mt2701/mt2701-cs42448.c
+++ b/sound/soc/mediatek/mt2701/mt2701-cs42448.c
@@ -1,19 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * mt2701-cs42448.c  --  MT2701 CS42448 ALSA SoC machine driver
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Ir Lian <ir.lian@mediatek.com>
- *              Garlic Tseng <garlic.tseng@mediatek.com>
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ *	   Garlic Tseng <garlic.tseng@mediatek.com>
  */
 
 #include <linux/module.h>
diff --git a/sound/soc/mediatek/mt2701/mt2701-reg.h b/sound/soc/mediatek/mt2701/mt2701-reg.h
index 18e6769..c84d14c 100644
--- a/sound/soc/mediatek/mt2701/mt2701-reg.h
+++ b/sound/soc/mediatek/mt2701/mt2701-reg.h
@@ -1,17 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * mt2701-reg.h  --  Mediatek 2701 audio driver reg definition
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Garlic Tseng <garlic.tseng@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef _MT2701_REG_H_
@@ -138,6 +130,7 @@
 #define ASYS_I2S_CON_FS_SET(x)		((x) << 8)
 #define ASYS_I2S_CON_RESET		(0x1 << 30)
 #define ASYS_I2S_CON_I2S_EN		(0x1 << 0)
+#define ASYS_I2S_CON_ONE_HEART_MODE	(0x1 << 16)
 #define ASYS_I2S_CON_I2S_COUPLE_MODE	(0x1 << 17)
 /* 0:EIAJ 1:I2S */
 #define ASYS_I2S_CON_I2S_MODE		(0x1 << 3)
diff --git a/sound/soc/mediatek/mt2701/mt2701-wm8960.c b/sound/soc/mediatek/mt2701/mt2701-wm8960.c
index a08ce23..89f34ef 100644
--- a/sound/soc/mediatek/mt2701/mt2701-wm8960.c
+++ b/sound/soc/mediatek/mt2701/mt2701-wm8960.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * mt2701-wm8960.c  --  MT2701 WM8960 ALSA SoC machine driver
  *
  * Copyright (c) 2017 MediaTek Inc.
  * Author: Ryder Lee <ryder.lee@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
diff --git a/sound/soc/mediatek/mt6797/Makefile b/sound/soc/mediatek/mt6797/Makefile
new file mode 100644
index 0000000..bf6e179e
--- /dev/null
+++ b/sound/soc/mediatek/mt6797/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# platform driver
+snd-soc-mt6797-afe-objs := \
+	mt6797-afe-pcm.o \
+	mt6797-afe-clk.o \
+	mt6797-dai-pcm.o \
+	mt6797-dai-hostless.o \
+	mt6797-dai-adda.o
+
+obj-$(CONFIG_SND_SOC_MT6797) += snd-soc-mt6797-afe.o
+
+# machine driver
+obj-$(CONFIG_SND_SOC_MT6797_MT6351) += mt6797-mt6351.o
diff --git a/sound/soc/mediatek/mt6797/mt6797-afe-clk.c b/sound/soc/mediatek/mt6797/mt6797-afe-clk.c
new file mode 100644
index 0000000..6f3e6ac
--- /dev/null
+++ b/sound/soc/mediatek/mt6797/mt6797-afe-clk.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// mt6797-afe-clk.c  --  Mediatek 6797 afe clock ctrl
+//
+// Copyright (c) 2018 MediaTek Inc.
+// Author: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
+
+#include <linux/clk.h>
+
+#include "mt6797-afe-common.h"
+#include "mt6797-afe-clk.h"
+
+enum {
+	CLK_INFRA_SYS_AUD,
+	CLK_INFRA_SYS_AUD_26M,
+	CLK_TOP_MUX_AUD,
+	CLK_TOP_MUX_AUD_BUS,
+	CLK_TOP_SYSPLL3_D4,
+	CLK_TOP_SYSPLL1_D4,
+	CLK_CLK26M,
+	CLK_NUM
+};
+
+static const char *aud_clks[CLK_NUM] = {
+	[CLK_INFRA_SYS_AUD] = "infra_sys_audio_clk",
+	[CLK_INFRA_SYS_AUD_26M] = "infra_sys_audio_26m",
+	[CLK_TOP_MUX_AUD] = "top_mux_audio",
+	[CLK_TOP_MUX_AUD_BUS] = "top_mux_aud_intbus",
+	[CLK_TOP_SYSPLL3_D4] = "top_sys_pll3_d4",
+	[CLK_TOP_SYSPLL1_D4] = "top_sys_pll1_d4",
+	[CLK_CLK26M] = "top_clk26m_clk",
+};
+
+int mt6797_init_clock(struct mtk_base_afe *afe)
+{
+	struct mt6797_afe_private *afe_priv = afe->platform_priv;
+	int i;
+
+	afe_priv->clk = devm_kcalloc(afe->dev, CLK_NUM, sizeof(*afe_priv->clk),
+				     GFP_KERNEL);
+	if (!afe_priv->clk)
+		return -ENOMEM;
+
+	for (i = 0; i < CLK_NUM; i++) {
+		afe_priv->clk[i] = devm_clk_get(afe->dev, aud_clks[i]);
+		if (IS_ERR(afe_priv->clk[i])) {
+			dev_err(afe->dev, "%s(), devm_clk_get %s fail, ret %ld\n",
+				__func__, aud_clks[i],
+				PTR_ERR(afe_priv->clk[i]));
+			return PTR_ERR(afe_priv->clk[i]);
+		}
+	}
+
+	return 0;
+}
+
+int mt6797_afe_enable_clock(struct mtk_base_afe *afe)
+{
+	struct mt6797_afe_private *afe_priv = afe->platform_priv;
+	int ret;
+
+	ret = clk_prepare_enable(afe_priv->clk[CLK_INFRA_SYS_AUD]);
+	if (ret) {
+		dev_err(afe->dev, "%s(), clk_prepare_enable %s fail %d\n",
+			__func__, aud_clks[CLK_INFRA_SYS_AUD], ret);
+		goto CLK_INFRA_SYS_AUDIO_ERR;
+	}
+
+	ret = clk_prepare_enable(afe_priv->clk[CLK_INFRA_SYS_AUD_26M]);
+	if (ret) {
+		dev_err(afe->dev, "%s(), clk_prepare_enable %s fail %d\n",
+			__func__, aud_clks[CLK_INFRA_SYS_AUD_26M], ret);
+		goto CLK_INFRA_SYS_AUD_26M_ERR;
+	}
+
+	ret = clk_prepare_enable(afe_priv->clk[CLK_TOP_MUX_AUD]);
+	if (ret) {
+		dev_err(afe->dev, "%s(), clk_prepare_enable %s fail %d\n",
+			__func__, aud_clks[CLK_TOP_MUX_AUD], ret);
+		goto CLK_MUX_AUDIO_ERR;
+	}
+
+	ret = clk_set_parent(afe_priv->clk[CLK_TOP_MUX_AUD],
+			     afe_priv->clk[CLK_CLK26M]);
+	if (ret) {
+		dev_err(afe->dev, "%s(), clk_set_parent %s-%s fail %d\n",
+			__func__, aud_clks[CLK_TOP_MUX_AUD],
+			aud_clks[CLK_CLK26M], ret);
+		goto CLK_MUX_AUDIO_ERR;
+	}
+
+	ret = clk_prepare_enable(afe_priv->clk[CLK_TOP_MUX_AUD_BUS]);
+	if (ret) {
+		dev_err(afe->dev, "%s(), clk_prepare_enable %s fail %d\n",
+			__func__, aud_clks[CLK_TOP_MUX_AUD_BUS], ret);
+		goto CLK_MUX_AUDIO_INTBUS_ERR;
+	}
+
+	return ret;
+
+CLK_MUX_AUDIO_INTBUS_ERR:
+	clk_disable_unprepare(afe_priv->clk[CLK_TOP_MUX_AUD_BUS]);
+CLK_MUX_AUDIO_ERR:
+	clk_disable_unprepare(afe_priv->clk[CLK_TOP_MUX_AUD]);
+CLK_INFRA_SYS_AUD_26M_ERR:
+	clk_disable_unprepare(afe_priv->clk[CLK_INFRA_SYS_AUD_26M]);
+CLK_INFRA_SYS_AUDIO_ERR:
+	clk_disable_unprepare(afe_priv->clk[CLK_INFRA_SYS_AUD]);
+
+	return 0;
+}
+
+int mt6797_afe_disable_clock(struct mtk_base_afe *afe)
+{
+	struct mt6797_afe_private *afe_priv = afe->platform_priv;
+
+	clk_disable_unprepare(afe_priv->clk[CLK_TOP_MUX_AUD_BUS]);
+	clk_disable_unprepare(afe_priv->clk[CLK_TOP_MUX_AUD]);
+	clk_disable_unprepare(afe_priv->clk[CLK_INFRA_SYS_AUD_26M]);
+	clk_disable_unprepare(afe_priv->clk[CLK_INFRA_SYS_AUD]);
+
+	return 0;
+}
diff --git a/sound/soc/mediatek/mt6797/mt6797-afe-clk.h b/sound/soc/mediatek/mt6797/mt6797-afe-clk.h
new file mode 100644
index 0000000..a6f0cb5
--- /dev/null
+++ b/sound/soc/mediatek/mt6797/mt6797-afe-clk.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mt6797-afe-clk.h  --  Mediatek 6797 afe clock ctrl definition
+ *
+ * Copyright (c) 2018 MediaTek Inc.
+ * Author: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
+ */
+
+#ifndef _MT6797_AFE_CLK_H_
+#define _MT6797_AFE_CLK_H_
+
+struct mtk_base_afe;
+
+int mt6797_init_clock(struct mtk_base_afe *afe);
+int mt6797_afe_enable_clock(struct mtk_base_afe *afe);
+int mt6797_afe_disable_clock(struct mtk_base_afe *afe);
+#endif
diff --git a/sound/soc/mediatek/mt6797/mt6797-afe-common.h b/sound/soc/mediatek/mt6797/mt6797-afe-common.h
new file mode 100644
index 0000000..22eb7b4
--- /dev/null
+++ b/sound/soc/mediatek/mt6797/mt6797-afe-common.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mt6797-afe-common.h  --  Mediatek 6797 audio driver definitions
+ *
+ * Copyright (c) 2018 MediaTek Inc.
+ * Author: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
+ */
+
+#ifndef _MT_6797_AFE_COMMON_H_
+#define _MT_6797_AFE_COMMON_H_
+
+#include <sound/soc.h>
+#include <linux/regmap.h>
+#include "../common/mtk-base-afe.h"
+
+enum {
+	MT6797_MEMIF_DL1,
+	MT6797_MEMIF_DL2,
+	MT6797_MEMIF_DL3,
+	MT6797_MEMIF_VUL,
+	MT6797_MEMIF_AWB,
+	MT6797_MEMIF_VUL12,
+	MT6797_MEMIF_DAI,
+	MT6797_MEMIF_MOD_DAI,
+	MT6797_MEMIF_NUM,
+	MT6797_DAI_ADDA = MT6797_MEMIF_NUM,
+	MT6797_DAI_PCM_1,
+	MT6797_DAI_PCM_2,
+	MT6797_DAI_HOSTLESS_LPBK,
+	MT6797_DAI_HOSTLESS_SPEECH,
+	MT6797_DAI_NUM,
+};
+
+enum {
+	MT6797_IRQ_1,
+	MT6797_IRQ_2,
+	MT6797_IRQ_3,
+	MT6797_IRQ_4,
+	MT6797_IRQ_7,
+	MT6797_IRQ_NUM,
+};
+
+struct clk;
+
+struct mt6797_afe_private {
+	struct clk **clk;
+};
+
+unsigned int mt6797_general_rate_transform(struct device *dev,
+					   unsigned int rate);
+unsigned int mt6797_rate_transform(struct device *dev,
+				   unsigned int rate, int aud_blk);
+
+/* dai register */
+int mt6797_dai_adda_register(struct mtk_base_afe *afe);
+int mt6797_dai_pcm_register(struct mtk_base_afe *afe);
+int mt6797_dai_hostless_register(struct mtk_base_afe *afe);
+#endif
diff --git a/sound/soc/mediatek/mt6797/mt6797-afe-pcm.c b/sound/soc/mediatek/mt6797/mt6797-afe-pcm.c
new file mode 100644
index 0000000..6c5dd9f
--- /dev/null
+++ b/sound/soc/mediatek/mt6797/mt6797-afe-pcm.c
@@ -0,0 +1,914 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Mediatek ALSA SoC AFE platform driver for 6797
+//
+// Copyright (c) 2018 MediaTek Inc.
+// Author: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/pm_runtime.h>
+
+#include "mt6797-afe-common.h"
+#include "mt6797-afe-clk.h"
+#include "mt6797-interconnection.h"
+#include "mt6797-reg.h"
+#include "../common/mtk-afe-platform-driver.h"
+#include "../common/mtk-afe-fe-dai.h"
+
+enum {
+	MTK_AFE_RATE_8K = 0,
+	MTK_AFE_RATE_11K = 1,
+	MTK_AFE_RATE_12K = 2,
+	MTK_AFE_RATE_384K = 3,
+	MTK_AFE_RATE_16K = 4,
+	MTK_AFE_RATE_22K = 5,
+	MTK_AFE_RATE_24K = 6,
+	MTK_AFE_RATE_130K = 7,
+	MTK_AFE_RATE_32K = 8,
+	MTK_AFE_RATE_44K = 9,
+	MTK_AFE_RATE_48K = 10,
+	MTK_AFE_RATE_88K = 11,
+	MTK_AFE_RATE_96K = 12,
+	MTK_AFE_RATE_174K = 13,
+	MTK_AFE_RATE_192K = 14,
+	MTK_AFE_RATE_260K = 15,
+};
+
+enum {
+	MTK_AFE_DAI_MEMIF_RATE_8K = 0,
+	MTK_AFE_DAI_MEMIF_RATE_16K = 1,
+	MTK_AFE_DAI_MEMIF_RATE_32K = 2,
+};
+
+enum {
+	MTK_AFE_PCM_RATE_8K = 0,
+	MTK_AFE_PCM_RATE_16K = 1,
+	MTK_AFE_PCM_RATE_32K = 2,
+	MTK_AFE_PCM_RATE_48K = 3,
+};
+
+unsigned int mt6797_general_rate_transform(struct device *dev,
+					   unsigned int rate)
+{
+	switch (rate) {
+	case 8000:
+		return MTK_AFE_RATE_8K;
+	case 11025:
+		return MTK_AFE_RATE_11K;
+	case 12000:
+		return MTK_AFE_RATE_12K;
+	case 16000:
+		return MTK_AFE_RATE_16K;
+	case 22050:
+		return MTK_AFE_RATE_22K;
+	case 24000:
+		return MTK_AFE_RATE_24K;
+	case 32000:
+		return MTK_AFE_RATE_32K;
+	case 44100:
+		return MTK_AFE_RATE_44K;
+	case 48000:
+		return MTK_AFE_RATE_48K;
+	case 88200:
+		return MTK_AFE_RATE_88K;
+	case 96000:
+		return MTK_AFE_RATE_96K;
+	case 130000:
+		return MTK_AFE_RATE_130K;
+	case 176400:
+		return MTK_AFE_RATE_174K;
+	case 192000:
+		return MTK_AFE_RATE_192K;
+	case 260000:
+		return MTK_AFE_RATE_260K;
+	default:
+		dev_warn(dev, "%s(), rate %u invalid, use %d!!!\n",
+			 __func__, rate, MTK_AFE_RATE_48K);
+		return MTK_AFE_RATE_48K;
+	}
+}
+
+static unsigned int dai_memif_rate_transform(struct device *dev,
+					     unsigned int rate)
+{
+	switch (rate) {
+	case 8000:
+		return MTK_AFE_DAI_MEMIF_RATE_8K;
+	case 16000:
+		return MTK_AFE_DAI_MEMIF_RATE_16K;
+	case 32000:
+		return MTK_AFE_DAI_MEMIF_RATE_32K;
+	default:
+		dev_warn(dev, "%s(), rate %u invalid, use %d!!!\n",
+			 __func__, rate, MTK_AFE_DAI_MEMIF_RATE_16K);
+		return MTK_AFE_DAI_MEMIF_RATE_16K;
+	}
+}
+
+unsigned int mt6797_rate_transform(struct device *dev,
+				   unsigned int rate, int aud_blk)
+{
+	switch (aud_blk) {
+	case MT6797_MEMIF_DAI:
+	case MT6797_MEMIF_MOD_DAI:
+		return dai_memif_rate_transform(dev, rate);
+	default:
+		return mt6797_general_rate_transform(dev, rate);
+	}
+}
+
+static const struct snd_pcm_hardware mt6797_afe_hardware = {
+	.info = SNDRV_PCM_INFO_MMAP |
+		SNDRV_PCM_INFO_INTERLEAVED |
+		SNDRV_PCM_INFO_MMAP_VALID,
+	.formats = SNDRV_PCM_FMTBIT_S16_LE |
+		   SNDRV_PCM_FMTBIT_S24_LE |
+		   SNDRV_PCM_FMTBIT_S32_LE,
+	.period_bytes_min = 256,
+	.period_bytes_max = 4 * 48 * 1024,
+	.periods_min = 2,
+	.periods_max = 256,
+	.buffer_bytes_max = 8 * 48 * 1024,
+	.fifo_size = 0,
+};
+
+static int mt6797_memif_fs(struct snd_pcm_substream *substream,
+			   unsigned int rate)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_component *component =
+		snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
+	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	int id = rtd->cpu_dai->id;
+
+	return mt6797_rate_transform(afe->dev, rate, id);
+}
+
+static int mt6797_irq_fs(struct snd_pcm_substream *substream, unsigned int rate)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_component *component =
+		snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
+	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+
+	return mt6797_general_rate_transform(afe->dev, rate);
+}
+
+#define MTK_PCM_RATES (SNDRV_PCM_RATE_8000_48000 |\
+		       SNDRV_PCM_RATE_88200 |\
+		       SNDRV_PCM_RATE_96000 |\
+		       SNDRV_PCM_RATE_176400 |\
+		       SNDRV_PCM_RATE_192000)
+
+#define MTK_PCM_DAI_RATES (SNDRV_PCM_RATE_8000 |\
+			   SNDRV_PCM_RATE_16000 |\
+			   SNDRV_PCM_RATE_32000)
+
+#define MTK_PCM_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
+			 SNDRV_PCM_FMTBIT_S24_LE |\
+			 SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_driver mt6797_memif_dai_driver[] = {
+	/* FE DAIs: memory intefaces to CPU */
+	{
+		.name = "DL1",
+		.id = MT6797_MEMIF_DL1,
+		.playback = {
+			.stream_name = "DL1",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_PCM_RATES,
+			.formats = MTK_PCM_FORMATS,
+		},
+		.ops = &mtk_afe_fe_ops,
+	},
+	{
+		.name = "DL2",
+		.id = MT6797_MEMIF_DL2,
+		.playback = {
+			.stream_name = "DL2",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_PCM_RATES,
+			.formats = MTK_PCM_FORMATS,
+		},
+		.ops = &mtk_afe_fe_ops,
+	},
+	{
+		.name = "DL3",
+		.id = MT6797_MEMIF_DL3,
+		.playback = {
+			.stream_name = "DL3",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_PCM_RATES,
+			.formats = MTK_PCM_FORMATS,
+		},
+		.ops = &mtk_afe_fe_ops,
+	},
+	{
+		.name = "UL1",
+		.id = MT6797_MEMIF_VUL12,
+		.capture = {
+			.stream_name = "UL1",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_PCM_RATES,
+			.formats = MTK_PCM_FORMATS,
+		},
+		.ops = &mtk_afe_fe_ops,
+	},
+	{
+		.name = "UL2",
+		.id = MT6797_MEMIF_AWB,
+		.capture = {
+			.stream_name = "UL2",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_PCM_RATES,
+			.formats = MTK_PCM_FORMATS,
+		},
+		.ops = &mtk_afe_fe_ops,
+	},
+	{
+		.name = "UL3",
+		.id = MT6797_MEMIF_VUL,
+		.capture = {
+			.stream_name = "UL3",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_PCM_RATES,
+			.formats = MTK_PCM_FORMATS,
+		},
+		.ops = &mtk_afe_fe_ops,
+	},
+	{
+		.name = "UL_MONO_1",
+		.id = MT6797_MEMIF_MOD_DAI,
+		.capture = {
+			.stream_name = "UL_MONO_1",
+			.channels_min = 1,
+			.channels_max = 1,
+			.rates = MTK_PCM_DAI_RATES,
+			.formats = MTK_PCM_FORMATS,
+		},
+		.ops = &mtk_afe_fe_ops,
+	},
+	{
+		.name = "UL_MONO_2",
+		.id = MT6797_MEMIF_DAI,
+		.capture = {
+			.stream_name = "UL_MONO_2",
+			.channels_min = 1,
+			.channels_max = 1,
+			.rates = MTK_PCM_DAI_RATES,
+			.formats = MTK_PCM_FORMATS,
+		},
+		.ops = &mtk_afe_fe_ops,
+	},
+};
+
+/* dma widget & routes*/
+static const struct snd_kcontrol_new memif_ul1_ch1_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH1", AFE_CONN21,
+				    I_ADDA_UL_CH1, 1, 0),
+};
+
+static const struct snd_kcontrol_new memif_ul1_ch2_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH2", AFE_CONN22,
+				    I_ADDA_UL_CH2, 1, 0),
+};
+
+static const struct snd_kcontrol_new memif_ul2_ch1_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH1", AFE_CONN5,
+				    I_ADDA_UL_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL1_CH1", AFE_CONN5,
+				    I_DL1_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL2_CH1", AFE_CONN5,
+				    I_DL2_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL3_CH1", AFE_CONN5,
+				    I_DL3_CH1, 1, 0),
+};
+
+static const struct snd_kcontrol_new memif_ul2_ch2_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH2", AFE_CONN6,
+				    I_ADDA_UL_CH2, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL1_CH2", AFE_CONN6,
+				    I_DL1_CH2, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL2_CH2", AFE_CONN6,
+				    I_DL2_CH2, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL3_CH2", AFE_CONN6,
+				    I_DL3_CH2, 1, 0),
+};
+
+static const struct snd_kcontrol_new memif_ul3_ch1_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH1", AFE_CONN9,
+				    I_ADDA_UL_CH1, 1, 0),
+};
+
+static const struct snd_kcontrol_new memif_ul3_ch2_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH2", AFE_CONN10,
+				    I_ADDA_UL_CH2, 1, 0),
+};
+
+static const struct snd_kcontrol_new memif_ul_mono_1_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH1", AFE_CONN12,
+				    I_ADDA_UL_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH2", AFE_CONN12,
+				    I_ADDA_UL_CH2, 1, 0),
+};
+
+static const struct snd_kcontrol_new memif_ul_mono_2_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH1", AFE_CONN11,
+				    I_ADDA_UL_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH2", AFE_CONN11,
+				    I_ADDA_UL_CH2, 1, 0),
+};
+
+static const struct snd_soc_dapm_widget mt6797_memif_widgets[] = {
+	/* memif */
+	SND_SOC_DAPM_MIXER("UL1_CH1", SND_SOC_NOPM, 0, 0,
+			   memif_ul1_ch1_mix, ARRAY_SIZE(memif_ul1_ch1_mix)),
+	SND_SOC_DAPM_MIXER("UL1_CH2", SND_SOC_NOPM, 0, 0,
+			   memif_ul1_ch2_mix, ARRAY_SIZE(memif_ul1_ch2_mix)),
+
+	SND_SOC_DAPM_MIXER("UL2_CH1", SND_SOC_NOPM, 0, 0,
+			   memif_ul2_ch1_mix, ARRAY_SIZE(memif_ul2_ch1_mix)),
+	SND_SOC_DAPM_MIXER("UL2_CH2", SND_SOC_NOPM, 0, 0,
+			   memif_ul2_ch2_mix, ARRAY_SIZE(memif_ul2_ch2_mix)),
+
+	SND_SOC_DAPM_MIXER("UL3_CH1", SND_SOC_NOPM, 0, 0,
+			   memif_ul3_ch1_mix, ARRAY_SIZE(memif_ul3_ch1_mix)),
+	SND_SOC_DAPM_MIXER("UL3_CH2", SND_SOC_NOPM, 0, 0,
+			   memif_ul3_ch2_mix, ARRAY_SIZE(memif_ul3_ch2_mix)),
+
+	SND_SOC_DAPM_MIXER("UL_MONO_1_CH1", SND_SOC_NOPM, 0, 0,
+			   memif_ul_mono_1_mix,
+			   ARRAY_SIZE(memif_ul_mono_1_mix)),
+
+	SND_SOC_DAPM_MIXER("UL_MONO_2_CH1", SND_SOC_NOPM, 0, 0,
+			   memif_ul_mono_2_mix,
+			   ARRAY_SIZE(memif_ul_mono_2_mix)),
+};
+
+static const struct snd_soc_dapm_route mt6797_memif_routes[] = {
+	/* capture */
+	{"UL1", NULL, "UL1_CH1"},
+	{"UL1", NULL, "UL1_CH2"},
+	{"UL1_CH1", "ADDA_UL_CH1", "ADDA Capture"},
+	{"UL1_CH2", "ADDA_UL_CH2", "ADDA Capture"},
+
+	{"UL2", NULL, "UL2_CH1"},
+	{"UL2", NULL, "UL2_CH2"},
+	{"UL2_CH1", "ADDA_UL_CH1", "ADDA Capture"},
+	{"UL2_CH2", "ADDA_UL_CH2", "ADDA Capture"},
+
+	{"UL3", NULL, "UL3_CH1"},
+	{"UL3", NULL, "UL3_CH2"},
+	{"UL3_CH1", "ADDA_UL_CH1", "ADDA Capture"},
+	{"UL3_CH2", "ADDA_UL_CH2", "ADDA Capture"},
+
+	{"UL_MONO_1", NULL, "UL_MONO_1_CH1"},
+	{"UL_MONO_1_CH1", "ADDA_UL_CH1", "ADDA Capture"},
+	{"UL_MONO_1_CH1", "ADDA_UL_CH2", "ADDA Capture"},
+
+	{"UL_MONO_2", NULL, "UL_MONO_2_CH1"},
+	{"UL_MONO_2_CH1", "ADDA_UL_CH1", "ADDA Capture"},
+	{"UL_MONO_2_CH1", "ADDA_UL_CH2", "ADDA Capture"},
+};
+
+static const struct snd_soc_component_driver mt6797_afe_pcm_dai_component = {
+	.name = "mt6797-afe-pcm-dai",
+};
+
+static const struct mtk_base_memif_data memif_data[MT6797_MEMIF_NUM] = {
+	[MT6797_MEMIF_DL1] = {
+		.name = "DL1",
+		.id = MT6797_MEMIF_DL1,
+		.reg_ofs_base = AFE_DL1_BASE,
+		.reg_ofs_cur = AFE_DL1_CUR,
+		.fs_reg = AFE_DAC_CON1,
+		.fs_shift = DL1_MODE_SFT,
+		.fs_maskbit = DL1_MODE_MASK,
+		.mono_reg = AFE_DAC_CON1,
+		.mono_shift = DL1_DATA_SFT,
+		.enable_reg = AFE_DAC_CON0,
+		.enable_shift = DL1_ON_SFT,
+		.hd_reg = AFE_MEMIF_HD_MODE,
+		.hd_shift = DL1_HD_SFT,
+		.agent_disable_reg = -1,
+		.agent_disable_shift = -1,
+		.msb_reg = -1,
+		.msb_shift = -1,
+	},
+	[MT6797_MEMIF_DL2] = {
+		.name = "DL2",
+		.id = MT6797_MEMIF_DL2,
+		.reg_ofs_base = AFE_DL2_BASE,
+		.reg_ofs_cur = AFE_DL2_CUR,
+		.fs_reg = AFE_DAC_CON1,
+		.fs_shift = DL2_MODE_SFT,
+		.fs_maskbit = DL2_MODE_MASK,
+		.mono_reg = AFE_DAC_CON1,
+		.mono_shift = DL2_DATA_SFT,
+		.enable_reg = AFE_DAC_CON0,
+		.enable_shift = DL2_ON_SFT,
+		.hd_reg = AFE_MEMIF_HD_MODE,
+		.hd_shift = DL2_HD_SFT,
+		.agent_disable_reg = -1,
+		.agent_disable_shift = -1,
+		.msb_reg = -1,
+		.msb_shift = -1,
+	},
+	[MT6797_MEMIF_DL3] = {
+		.name = "DL3",
+		.id = MT6797_MEMIF_DL3,
+		.reg_ofs_base = AFE_DL3_BASE,
+		.reg_ofs_cur = AFE_DL3_CUR,
+		.fs_reg = AFE_DAC_CON0,
+		.fs_shift = DL3_MODE_SFT,
+		.fs_maskbit = DL3_MODE_MASK,
+		.mono_reg = AFE_DAC_CON1,
+		.mono_shift = DL3_DATA_SFT,
+		.enable_reg = AFE_DAC_CON0,
+		.enable_shift = DL3_ON_SFT,
+		.hd_reg = AFE_MEMIF_HD_MODE,
+		.hd_shift = DL3_HD_SFT,
+		.agent_disable_reg = -1,
+		.agent_disable_shift = -1,
+		.msb_reg = -1,
+		.msb_shift = -1,
+	},
+	[MT6797_MEMIF_VUL] = {
+		.name = "VUL",
+		.id = MT6797_MEMIF_VUL,
+		.reg_ofs_base = AFE_VUL_BASE,
+		.reg_ofs_cur = AFE_VUL_CUR,
+		.fs_reg = AFE_DAC_CON1,
+		.fs_shift = VUL_MODE_SFT,
+		.fs_maskbit = VUL_MODE_MASK,
+		.mono_reg = AFE_DAC_CON1,
+		.mono_shift = VUL_DATA_SFT,
+		.enable_reg = AFE_DAC_CON0,
+		.enable_shift = VUL_ON_SFT,
+		.hd_reg = AFE_MEMIF_HD_MODE,
+		.hd_shift = VUL_HD_SFT,
+		.agent_disable_reg = -1,
+		.agent_disable_shift = -1,
+		.msb_reg = -1,
+		.msb_shift = -1,
+	},
+	[MT6797_MEMIF_AWB] = {
+		.name = "AWB",
+		.id = MT6797_MEMIF_AWB,
+		.reg_ofs_base = AFE_AWB_BASE,
+		.reg_ofs_cur = AFE_AWB_CUR,
+		.fs_reg = AFE_DAC_CON1,
+		.fs_shift = AWB_MODE_SFT,
+		.fs_maskbit = AWB_MODE_MASK,
+		.mono_reg = AFE_DAC_CON1,
+		.mono_shift = AWB_DATA_SFT,
+		.enable_reg = AFE_DAC_CON0,
+		.enable_shift = AWB_ON_SFT,
+		.hd_reg = AFE_MEMIF_HD_MODE,
+		.hd_shift = AWB_HD_SFT,
+		.agent_disable_reg = -1,
+		.agent_disable_shift = -1,
+		.msb_reg = -1,
+		.msb_shift = -1,
+	},
+	[MT6797_MEMIF_VUL12] = {
+		.name = "VUL12",
+		.id = MT6797_MEMIF_VUL12,
+		.reg_ofs_base = AFE_VUL_D2_BASE,
+		.reg_ofs_cur = AFE_VUL_D2_CUR,
+		.fs_reg = AFE_DAC_CON0,
+		.fs_shift = VUL_DATA2_MODE_SFT,
+		.fs_maskbit = VUL_DATA2_MODE_MASK,
+		.mono_reg = AFE_DAC_CON0,
+		.mono_shift = VUL_DATA2_DATA_SFT,
+		.enable_reg = AFE_DAC_CON0,
+		.enable_shift = VUL_DATA2_ON_SFT,
+		.hd_reg = AFE_MEMIF_HD_MODE,
+		.hd_shift = VUL_DATA2_HD_SFT,
+		.agent_disable_reg = -1,
+		.agent_disable_shift = -1,
+		.msb_reg = -1,
+		.msb_shift = -1,
+	},
+	[MT6797_MEMIF_DAI] = {
+		.name = "DAI",
+		.id = MT6797_MEMIF_DAI,
+		.reg_ofs_base = AFE_DAI_BASE,
+		.reg_ofs_cur = AFE_DAI_CUR,
+		.fs_reg = AFE_DAC_CON0,
+		.fs_shift = DAI_MODE_SFT,
+		.fs_maskbit = DAI_MODE_MASK,
+		.mono_reg = -1,
+		.mono_shift = 0,
+		.enable_reg = AFE_DAC_CON0,
+		.enable_shift = DAI_ON_SFT,
+		.hd_reg = AFE_MEMIF_HD_MODE,
+		.hd_shift = DAI_HD_SFT,
+		.agent_disable_reg = -1,
+		.agent_disable_shift = -1,
+		.msb_reg = -1,
+		.msb_shift = -1,
+	},
+	[MT6797_MEMIF_MOD_DAI] = {
+		.name = "MOD_DAI",
+		.id = MT6797_MEMIF_MOD_DAI,
+		.reg_ofs_base = AFE_MOD_DAI_BASE,
+		.reg_ofs_cur = AFE_MOD_DAI_CUR,
+		.fs_reg = AFE_DAC_CON1,
+		.fs_shift = MOD_DAI_MODE_SFT,
+		.fs_maskbit = MOD_DAI_MODE_MASK,
+		.mono_reg = -1,
+		.mono_shift = 0,
+		.enable_reg = AFE_DAC_CON0,
+		.enable_shift = MOD_DAI_ON_SFT,
+		.hd_reg = AFE_MEMIF_HD_MODE,
+		.hd_shift = MOD_DAI_HD_SFT,
+		.agent_disable_reg = -1,
+		.agent_disable_shift = -1,
+		.msb_reg = -1,
+		.msb_shift = -1,
+	},
+};
+
+static const struct mtk_base_irq_data irq_data[MT6797_IRQ_NUM] = {
+	[MT6797_IRQ_1] = {
+		.id = MT6797_IRQ_1,
+		.irq_cnt_reg = AFE_IRQ_MCU_CNT1,
+		.irq_cnt_shift = AFE_IRQ_MCU_CNT1_SFT,
+		.irq_cnt_maskbit = AFE_IRQ_MCU_CNT1_MASK,
+		.irq_fs_reg = AFE_IRQ_MCU_CON,
+		.irq_fs_shift = IRQ1_MCU_MODE_SFT,
+		.irq_fs_maskbit = IRQ1_MCU_MODE_MASK,
+		.irq_en_reg = AFE_IRQ_MCU_CON,
+		.irq_en_shift = IRQ1_MCU_ON_SFT,
+		.irq_clr_reg = AFE_IRQ_MCU_CLR,
+		.irq_clr_shift = IRQ1_MCU_CLR_SFT,
+	},
+	[MT6797_IRQ_2] = {
+		.id = MT6797_IRQ_2,
+		.irq_cnt_reg = AFE_IRQ_MCU_CNT2,
+		.irq_cnt_shift = AFE_IRQ_MCU_CNT2_SFT,
+		.irq_cnt_maskbit = AFE_IRQ_MCU_CNT2_MASK,
+		.irq_fs_reg = AFE_IRQ_MCU_CON,
+		.irq_fs_shift = IRQ2_MCU_MODE_SFT,
+		.irq_fs_maskbit = IRQ2_MCU_MODE_MASK,
+		.irq_en_reg = AFE_IRQ_MCU_CON,
+		.irq_en_shift = IRQ2_MCU_ON_SFT,
+		.irq_clr_reg = AFE_IRQ_MCU_CLR,
+		.irq_clr_shift = IRQ2_MCU_CLR_SFT,
+	},
+	[MT6797_IRQ_3] = {
+		.id = MT6797_IRQ_3,
+		.irq_cnt_reg = AFE_IRQ_MCU_CNT3,
+		.irq_cnt_shift = AFE_IRQ_MCU_CNT3_SFT,
+		.irq_cnt_maskbit = AFE_IRQ_MCU_CNT3_MASK,
+		.irq_fs_reg = AFE_IRQ_MCU_CON,
+		.irq_fs_shift = IRQ3_MCU_MODE_SFT,
+		.irq_fs_maskbit = IRQ3_MCU_MODE_MASK,
+		.irq_en_reg = AFE_IRQ_MCU_CON,
+		.irq_en_shift = IRQ3_MCU_ON_SFT,
+		.irq_clr_reg = AFE_IRQ_MCU_CLR,
+		.irq_clr_shift = IRQ3_MCU_CLR_SFT,
+	},
+	[MT6797_IRQ_4] = {
+		.id = MT6797_IRQ_4,
+		.irq_cnt_reg = AFE_IRQ_MCU_CNT4,
+		.irq_cnt_shift = AFE_IRQ_MCU_CNT4_SFT,
+		.irq_cnt_maskbit = AFE_IRQ_MCU_CNT4_MASK,
+		.irq_fs_reg = AFE_IRQ_MCU_CON,
+		.irq_fs_shift = IRQ4_MCU_MODE_SFT,
+		.irq_fs_maskbit = IRQ4_MCU_MODE_MASK,
+		.irq_en_reg = AFE_IRQ_MCU_CON,
+		.irq_en_shift = IRQ4_MCU_ON_SFT,
+		.irq_clr_reg = AFE_IRQ_MCU_CLR,
+		.irq_clr_shift = IRQ4_MCU_CLR_SFT,
+	},
+	[MT6797_IRQ_7] = {
+		.id = MT6797_IRQ_7,
+		.irq_cnt_reg = AFE_IRQ_MCU_CNT7,
+		.irq_cnt_shift = AFE_IRQ_MCU_CNT7_SFT,
+		.irq_cnt_maskbit = AFE_IRQ_MCU_CNT7_MASK,
+		.irq_fs_reg = AFE_IRQ_MCU_CON,
+		.irq_fs_shift = IRQ7_MCU_MODE_SFT,
+		.irq_fs_maskbit = IRQ7_MCU_MODE_MASK,
+		.irq_en_reg = AFE_IRQ_MCU_CON,
+		.irq_en_shift = IRQ7_MCU_ON_SFT,
+		.irq_clr_reg = AFE_IRQ_MCU_CLR,
+		.irq_clr_shift = IRQ7_MCU_CLR_SFT,
+	},
+};
+
+static const struct regmap_config mt6797_afe_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.max_register = AFE_MAX_REGISTER,
+};
+
+static irqreturn_t mt6797_afe_irq_handler(int irq_id, void *dev)
+{
+	struct mtk_base_afe *afe = dev;
+	struct mtk_base_afe_irq *irq;
+	unsigned int status;
+	unsigned int mcu_en;
+	int ret;
+	int i;
+	irqreturn_t irq_ret = IRQ_HANDLED;
+
+	/* get irq that is sent to MCU */
+	regmap_read(afe->regmap, AFE_IRQ_MCU_EN, &mcu_en);
+
+	ret = regmap_read(afe->regmap, AFE_IRQ_MCU_STATUS, &status);
+	if (ret || (status & mcu_en) == 0) {
+		dev_err(afe->dev, "%s(), irq status err, ret %d, status 0x%x, mcu_en 0x%x\n",
+			__func__, ret, status, mcu_en);
+
+		/* only clear IRQ which is sent to MCU */
+		status = mcu_en & AFE_IRQ_STATUS_BITS;
+
+		irq_ret = IRQ_NONE;
+		goto err_irq;
+	}
+
+	for (i = 0; i < MT6797_MEMIF_NUM; i++) {
+		struct mtk_base_afe_memif *memif = &afe->memif[i];
+
+		if (!memif->substream)
+			continue;
+
+		irq = &afe->irqs[memif->irq_usage];
+
+		if (status & (1 << irq->irq_data->irq_en_shift))
+			snd_pcm_period_elapsed(memif->substream);
+	}
+
+err_irq:
+	/* clear irq */
+	regmap_write(afe->regmap,
+		     AFE_IRQ_MCU_CLR,
+		     status & AFE_IRQ_STATUS_BITS);
+
+	return irq_ret;
+}
+
+static int mt6797_afe_runtime_suspend(struct device *dev)
+{
+	struct mtk_base_afe *afe = dev_get_drvdata(dev);
+	unsigned int afe_on_retm;
+	int retry = 0;
+
+	/* disable AFE */
+	regmap_update_bits(afe->regmap, AFE_DAC_CON0, AFE_ON_MASK_SFT, 0x0);
+	do {
+		regmap_read(afe->regmap, AFE_DAC_CON0, &afe_on_retm);
+		if ((afe_on_retm & AFE_ON_RETM_MASK_SFT) == 0)
+			break;
+
+		udelay(10);
+	} while (++retry < 100000);
+
+	if (retry)
+		dev_warn(afe->dev, "%s(), retry %d\n", __func__, retry);
+
+	/* make sure all irq status are cleared */
+	regmap_update_bits(afe->regmap, AFE_IRQ_MCU_CLR, 0xffff, 0xffff);
+
+	return mt6797_afe_disable_clock(afe);
+}
+
+static int mt6797_afe_runtime_resume(struct device *dev)
+{
+	struct mtk_base_afe *afe = dev_get_drvdata(dev);
+	int ret;
+
+	ret = mt6797_afe_enable_clock(afe);
+	if (ret)
+		return ret;
+
+	/* irq signal to mcu only */
+	regmap_write(afe->regmap, AFE_IRQ_MCU_EN, AFE_IRQ_MCU_EN_MASK_SFT);
+
+	/* force all memif use normal mode */
+	regmap_update_bits(afe->regmap, AFE_MEMIF_HDALIGN,
+			   0x7ff << 16, 0x7ff << 16);
+	/* force cpu use normal mode when access sram data */
+	regmap_update_bits(afe->regmap, AFE_MEMIF_MSB,
+			   CPU_COMPACT_MODE_MASK_SFT, 0);
+	/* force cpu use 8_24 format when writing 32bit data */
+	regmap_update_bits(afe->regmap, AFE_MEMIF_MSB,
+			   CPU_HD_ALIGN_MASK_SFT, 0);
+
+	/* set all output port to 24bit */
+	regmap_update_bits(afe->regmap, AFE_CONN_24BIT,
+			   0x3fffffff, 0x3fffffff);
+
+	/* enable AFE */
+	regmap_update_bits(afe->regmap, AFE_DAC_CON0,
+			   AFE_ON_MASK_SFT,
+			   0x1 << AFE_ON_SFT);
+
+	return 0;
+}
+
+static int mt6797_afe_component_probe(struct snd_soc_component *component)
+{
+	return mtk_afe_add_sub_dai_control(component);
+}
+
+static const struct snd_soc_component_driver mt6797_afe_component = {
+	.name = AFE_PCM_NAME,
+	.ops = &mtk_afe_pcm_ops,
+	.pcm_new = mtk_afe_pcm_new,
+	.pcm_free = mtk_afe_pcm_free,
+	.probe = mt6797_afe_component_probe,
+};
+
+static int mt6797_afe_pcm_dev_probe(struct platform_device *pdev)
+{
+	struct mtk_base_afe *afe;
+	struct mt6797_afe_private *afe_priv;
+	struct resource *res;
+	struct device *dev;
+	int i, irq_id, ret;
+
+	afe = devm_kzalloc(&pdev->dev, sizeof(*afe), GFP_KERNEL);
+	if (!afe)
+		return -ENOMEM;
+
+	afe->platform_priv = devm_kzalloc(&pdev->dev, sizeof(*afe_priv),
+					  GFP_KERNEL);
+	if (!afe->platform_priv)
+		return -ENOMEM;
+
+	afe_priv = afe->platform_priv;
+	afe->dev = &pdev->dev;
+	dev = afe->dev;
+
+	/* initial audio related clock */
+	ret = mt6797_init_clock(afe);
+	if (ret) {
+		dev_err(dev, "init clock error\n");
+		return ret;
+	}
+
+	/* regmap init */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	afe->base_addr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(afe->base_addr))
+		return PTR_ERR(afe->base_addr);
+
+	afe->regmap = devm_regmap_init_mmio(&pdev->dev, afe->base_addr,
+					    &mt6797_afe_regmap_config);
+	if (IS_ERR(afe->regmap))
+		return PTR_ERR(afe->regmap);
+
+	/* init memif */
+	afe->memif_size = MT6797_MEMIF_NUM;
+	afe->memif = devm_kcalloc(dev, afe->memif_size, sizeof(*afe->memif),
+				  GFP_KERNEL);
+	if (!afe->memif)
+		return -ENOMEM;
+
+	for (i = 0; i < afe->memif_size; i++) {
+		afe->memif[i].data = &memif_data[i];
+		afe->memif[i].irq_usage = -1;
+	}
+
+	mutex_init(&afe->irq_alloc_lock);
+
+	/* irq initialize */
+	afe->irqs_size = MT6797_IRQ_NUM;
+	afe->irqs = devm_kcalloc(dev, afe->irqs_size, sizeof(*afe->irqs),
+				 GFP_KERNEL);
+	if (!afe->irqs)
+		return -ENOMEM;
+
+	for (i = 0; i < afe->irqs_size; i++)
+		afe->irqs[i].irq_data = &irq_data[i];
+
+	/* request irq */
+	irq_id = platform_get_irq(pdev, 0);
+	if (!irq_id) {
+		dev_err(dev, "%s no irq found\n", dev->of_node->name);
+		return -ENXIO;
+	}
+	ret = devm_request_irq(dev, irq_id, mt6797_afe_irq_handler,
+			       IRQF_TRIGGER_NONE, "asys-isr", (void *)afe);
+	if (ret) {
+		dev_err(dev, "could not request_irq for asys-isr\n");
+		return ret;
+	}
+
+	/* init sub_dais */
+	afe->num_sub_dais = MT6797_DAI_NUM;
+	afe->sub_dais = devm_kcalloc(dev, afe->num_sub_dais,
+				     sizeof(*afe->sub_dais),
+				     GFP_KERNEL);
+	if (!afe->sub_dais)
+		return -ENOMEM;
+
+	mt6797_dai_adda_register(afe);
+	mt6797_dai_pcm_register(afe);
+	mt6797_dai_hostless_register(afe);
+
+	afe->sub_dais[MT6797_MEMIF_DL1].dai_drivers = mt6797_memif_dai_driver;
+	afe->sub_dais[MT6797_MEMIF_DL1].num_dai_drivers =
+		ARRAY_SIZE(mt6797_memif_dai_driver);
+	afe->sub_dais[MT6797_MEMIF_DL1].dapm_widgets = mt6797_memif_widgets;
+	afe->sub_dais[MT6797_MEMIF_DL1].num_dapm_widgets =
+		ARRAY_SIZE(mt6797_memif_widgets);
+	afe->sub_dais[MT6797_MEMIF_DL1].dapm_routes = mt6797_memif_routes;
+	afe->sub_dais[MT6797_MEMIF_DL1].num_dapm_routes =
+		ARRAY_SIZE(mt6797_memif_routes);
+
+	/* init dai_driver and component_driver */
+	mtk_afe_combine_sub_dai(afe);
+
+	afe->mtk_afe_hardware = &mt6797_afe_hardware;
+	afe->memif_fs = mt6797_memif_fs;
+	afe->irq_fs = mt6797_irq_fs;
+
+	afe->runtime_resume = mt6797_afe_runtime_resume;
+	afe->runtime_suspend = mt6797_afe_runtime_suspend;
+
+	platform_set_drvdata(pdev, afe);
+
+	pm_runtime_enable(dev);
+	if (!pm_runtime_enabled(dev))
+		goto err_pm_disable;
+	pm_runtime_get_sync(&pdev->dev);
+
+	/* register component */
+	ret = devm_snd_soc_register_component(dev, &mt6797_afe_component,
+					      NULL, 0);
+	if (ret) {
+		dev_warn(dev, "err_platform\n");
+		goto err_pm_disable;
+	}
+
+	ret = devm_snd_soc_register_component(afe->dev,
+				     &mt6797_afe_pcm_dai_component,
+				     afe->dai_drivers,
+				     afe->num_dai_drivers);
+	if (ret) {
+		dev_warn(dev, "err_dai_component\n");
+		goto err_pm_disable;
+	}
+
+	return 0;
+
+err_pm_disable:
+	pm_runtime_disable(dev);
+
+	return ret;
+}
+
+static int mt6797_afe_pcm_dev_remove(struct platform_device *pdev)
+{
+	pm_runtime_disable(&pdev->dev);
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		mt6797_afe_runtime_suspend(&pdev->dev);
+	pm_runtime_put_sync(&pdev->dev);
+
+	return 0;
+}
+
+static const struct of_device_id mt6797_afe_pcm_dt_match[] = {
+	{ .compatible = "mediatek,mt6797-audio", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mt6797_afe_pcm_dt_match);
+
+static const struct dev_pm_ops mt6797_afe_pm_ops = {
+	SET_RUNTIME_PM_OPS(mt6797_afe_runtime_suspend,
+			   mt6797_afe_runtime_resume, NULL)
+};
+
+static struct platform_driver mt6797_afe_pcm_driver = {
+	.driver = {
+		   .name = "mt6797-audio",
+		   .of_match_table = mt6797_afe_pcm_dt_match,
+#ifdef CONFIG_PM
+		   .pm = &mt6797_afe_pm_ops,
+#endif
+	},
+	.probe = mt6797_afe_pcm_dev_probe,
+	.remove = mt6797_afe_pcm_dev_remove,
+};
+
+module_platform_driver(mt6797_afe_pcm_driver);
+
+MODULE_DESCRIPTION("Mediatek ALSA SoC AFE platform driver for 6797");
+MODULE_AUTHOR("KaiChieh Chuang <kaichieh.chuang@mediatek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/mediatek/mt6797/mt6797-dai-adda.c b/sound/soc/mediatek/mt6797/mt6797-dai-adda.c
new file mode 100644
index 0000000..ad08326
--- /dev/null
+++ b/sound/soc/mediatek/mt6797/mt6797-dai-adda.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// MediaTek ALSA SoC Audio DAI ADDA Control
+//
+// Copyright (c) 2018 MediaTek Inc.
+// Author: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
+
+#include <linux/regmap.h>
+#include <linux/delay.h>
+#include "mt6797-afe-common.h"
+#include "mt6797-interconnection.h"
+#include "mt6797-reg.h"
+
+enum {
+	MTK_AFE_ADDA_DL_RATE_8K = 0,
+	MTK_AFE_ADDA_DL_RATE_11K = 1,
+	MTK_AFE_ADDA_DL_RATE_12K = 2,
+	MTK_AFE_ADDA_DL_RATE_16K = 3,
+	MTK_AFE_ADDA_DL_RATE_22K = 4,
+	MTK_AFE_ADDA_DL_RATE_24K = 5,
+	MTK_AFE_ADDA_DL_RATE_32K = 6,
+	MTK_AFE_ADDA_DL_RATE_44K = 7,
+	MTK_AFE_ADDA_DL_RATE_48K = 8,
+	MTK_AFE_ADDA_DL_RATE_96K = 9,
+	MTK_AFE_ADDA_DL_RATE_192K = 10,
+};
+
+enum {
+	MTK_AFE_ADDA_UL_RATE_8K = 0,
+	MTK_AFE_ADDA_UL_RATE_16K = 1,
+	MTK_AFE_ADDA_UL_RATE_32K = 2,
+	MTK_AFE_ADDA_UL_RATE_48K = 3,
+	MTK_AFE_ADDA_UL_RATE_96K = 4,
+	MTK_AFE_ADDA_UL_RATE_192K = 5,
+	MTK_AFE_ADDA_UL_RATE_48K_HD = 6,
+};
+
+static unsigned int adda_dl_rate_transform(struct mtk_base_afe *afe,
+					   unsigned int rate)
+{
+	switch (rate) {
+	case 8000:
+		return MTK_AFE_ADDA_DL_RATE_8K;
+	case 11025:
+		return MTK_AFE_ADDA_DL_RATE_11K;
+	case 12000:
+		return MTK_AFE_ADDA_DL_RATE_12K;
+	case 16000:
+		return MTK_AFE_ADDA_DL_RATE_16K;
+	case 22050:
+		return MTK_AFE_ADDA_DL_RATE_22K;
+	case 24000:
+		return MTK_AFE_ADDA_DL_RATE_24K;
+	case 32000:
+		return MTK_AFE_ADDA_DL_RATE_32K;
+	case 44100:
+		return MTK_AFE_ADDA_DL_RATE_44K;
+	case 48000:
+		return MTK_AFE_ADDA_DL_RATE_48K;
+	case 96000:
+		return MTK_AFE_ADDA_DL_RATE_96K;
+	case 192000:
+		return MTK_AFE_ADDA_DL_RATE_192K;
+	default:
+		dev_warn(afe->dev, "%s(), rate %d invalid, use 48kHz!!!\n",
+			 __func__, rate);
+		return MTK_AFE_ADDA_DL_RATE_48K;
+	}
+}
+
+static unsigned int adda_ul_rate_transform(struct mtk_base_afe *afe,
+					   unsigned int rate)
+{
+	switch (rate) {
+	case 8000:
+		return MTK_AFE_ADDA_UL_RATE_8K;
+	case 16000:
+		return MTK_AFE_ADDA_UL_RATE_16K;
+	case 32000:
+		return MTK_AFE_ADDA_UL_RATE_32K;
+	case 48000:
+		return MTK_AFE_ADDA_UL_RATE_48K;
+	case 96000:
+		return MTK_AFE_ADDA_UL_RATE_96K;
+	case 192000:
+		return MTK_AFE_ADDA_UL_RATE_192K;
+	default:
+		dev_warn(afe->dev, "%s(), rate %d invalid, use 48kHz!!!\n",
+			 __func__, rate);
+		return MTK_AFE_ADDA_UL_RATE_48K;
+	}
+}
+
+/* dai component */
+static const struct snd_kcontrol_new mtk_adda_dl_ch1_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("DL1_CH1", AFE_CONN3, I_DL1_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL2_CH1", AFE_CONN3, I_DL2_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL3_CH1", AFE_CONN3, I_DL3_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH2", AFE_CONN3,
+				    I_ADDA_UL_CH2, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH1", AFE_CONN3,
+				    I_ADDA_UL_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("PCM_1_CAP_CH1", AFE_CONN3,
+				    I_PCM_1_CAP_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("PCM_2_CAP_CH1", AFE_CONN3,
+				    I_PCM_2_CAP_CH1, 1, 0),
+};
+
+static const struct snd_kcontrol_new mtk_adda_dl_ch2_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("DL1_CH1", AFE_CONN4, I_DL1_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL1_CH2", AFE_CONN4, I_DL1_CH2, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL2_CH1", AFE_CONN4, I_DL2_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL2_CH2", AFE_CONN4, I_DL2_CH2, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL3_CH1", AFE_CONN4, I_DL3_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL3_CH2", AFE_CONN4, I_DL3_CH2, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH2", AFE_CONN4,
+				    I_ADDA_UL_CH2, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH1", AFE_CONN4,
+				    I_ADDA_UL_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("PCM_1_CAP_CH1", AFE_CONN4,
+				    I_PCM_1_CAP_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("PCM_2_CAP_CH1", AFE_CONN4,
+				    I_PCM_2_CAP_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("PCM_1_CAP_CH2", AFE_CONN4,
+				    I_PCM_1_CAP_CH2, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("PCM_2_CAP_CH2", AFE_CONN4,
+				    I_PCM_2_CAP_CH2, 1, 0),
+};
+
+static int mtk_adda_ul_event(struct snd_soc_dapm_widget *w,
+			     struct snd_kcontrol *kcontrol,
+			     int event)
+{
+	struct snd_soc_component *cmpnt = snd_soc_dapm_to_component(w->dapm);
+	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(cmpnt);
+
+	dev_dbg(afe->dev, "%s(), name %s, event 0x%x\n",
+		__func__, w->name, event);
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMD:
+		/* should delayed 1/fs(smallest is 8k) = 125us before afe off */
+		usleep_range(125, 135);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+enum {
+	SUPPLY_SEQ_AUD_TOP_PDN,
+	SUPPLY_SEQ_ADDA_AFE_ON,
+	SUPPLY_SEQ_ADDA_DL_ON,
+	SUPPLY_SEQ_ADDA_UL_ON,
+};
+
+static const struct snd_soc_dapm_widget mtk_dai_adda_widgets[] = {
+	/* adda */
+	SND_SOC_DAPM_MIXER("ADDA_DL_CH1", SND_SOC_NOPM, 0, 0,
+			   mtk_adda_dl_ch1_mix,
+			   ARRAY_SIZE(mtk_adda_dl_ch1_mix)),
+	SND_SOC_DAPM_MIXER("ADDA_DL_CH2", SND_SOC_NOPM, 0, 0,
+			   mtk_adda_dl_ch2_mix,
+			   ARRAY_SIZE(mtk_adda_dl_ch2_mix)),
+
+	SND_SOC_DAPM_SUPPLY_S("ADDA Enable", SUPPLY_SEQ_ADDA_AFE_ON,
+			      AFE_ADDA_UL_DL_CON0, ADDA_AFE_ON_SFT, 0,
+			      NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY_S("ADDA Playback Enable", SUPPLY_SEQ_ADDA_DL_ON,
+			      AFE_ADDA_DL_SRC2_CON0,
+			      DL_2_SRC_ON_TMP_CTL_PRE_SFT, 0,
+			      NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY_S("ADDA Capture Enable", SUPPLY_SEQ_ADDA_UL_ON,
+			      AFE_ADDA_UL_SRC_CON0,
+			      UL_SRC_ON_TMP_CTL_SFT, 0,
+			      mtk_adda_ul_event,
+			      SND_SOC_DAPM_POST_PMD),
+
+	SND_SOC_DAPM_SUPPLY_S("aud_dac_clk", SUPPLY_SEQ_AUD_TOP_PDN,
+			      AUDIO_TOP_CON0, PDN_DAC_SFT, 1,
+			      NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("aud_dac_predis_clk", SUPPLY_SEQ_AUD_TOP_PDN,
+			      AUDIO_TOP_CON0, PDN_DAC_PREDIS_SFT, 1,
+			      NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY_S("aud_adc_clk", SUPPLY_SEQ_AUD_TOP_PDN,
+			      AUDIO_TOP_CON0, PDN_ADC_SFT, 1,
+			      NULL, 0),
+
+	SND_SOC_DAPM_CLOCK_SUPPLY("mtkaif_26m_clk"),
+};
+
+static const struct snd_soc_dapm_route mtk_dai_adda_routes[] = {
+	/* playback */
+	{"ADDA_DL_CH1", "DL1_CH1", "DL1"},
+	{"ADDA_DL_CH2", "DL1_CH1", "DL1"},
+	{"ADDA_DL_CH2", "DL1_CH2", "DL1"},
+
+	{"ADDA_DL_CH1", "DL2_CH1", "DL2"},
+	{"ADDA_DL_CH2", "DL2_CH1", "DL2"},
+	{"ADDA_DL_CH2", "DL2_CH2", "DL2"},
+
+	{"ADDA_DL_CH1", "DL3_CH1", "DL3"},
+	{"ADDA_DL_CH2", "DL3_CH1", "DL3"},
+	{"ADDA_DL_CH2", "DL3_CH2", "DL3"},
+
+	{"ADDA Playback", NULL, "ADDA_DL_CH1"},
+	{"ADDA Playback", NULL, "ADDA_DL_CH2"},
+
+	/* adda enable */
+	{"ADDA Playback", NULL, "ADDA Enable"},
+	{"ADDA Playback", NULL, "ADDA Playback Enable"},
+	{"ADDA Capture", NULL, "ADDA Enable"},
+	{"ADDA Capture", NULL, "ADDA Capture Enable"},
+
+	/* clk */
+	{"ADDA Playback", NULL, "mtkaif_26m_clk"},
+	{"ADDA Playback", NULL, "aud_dac_clk"},
+	{"ADDA Playback", NULL, "aud_dac_predis_clk"},
+
+	{"ADDA Capture", NULL, "mtkaif_26m_clk"},
+	{"ADDA Capture", NULL, "aud_adc_clk"},
+};
+
+/* dai ops */
+static int mtk_dai_adda_hw_params(struct snd_pcm_substream *substream,
+				  struct snd_pcm_hw_params *params,
+				  struct snd_soc_dai *dai)
+{
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
+	unsigned int rate = params_rate(params);
+
+	dev_dbg(afe->dev, "%s(), id %d, stream %d, rate %d\n",
+		__func__, dai->id, substream->stream, rate);
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		unsigned int dl_src2_con0 = 0;
+		unsigned int dl_src2_con1 = 0;
+
+		/* clean predistortion */
+		regmap_write(afe->regmap, AFE_ADDA_PREDIS_CON0, 0);
+		regmap_write(afe->regmap, AFE_ADDA_PREDIS_CON1, 0);
+
+		/* set input sampling rate */
+		dl_src2_con0 = adda_dl_rate_transform(afe, rate) << 28;
+
+		/* set output mode */
+		switch (rate) {
+		case 192000:
+			dl_src2_con0 |= (0x1 << 24); /* UP_SAMPLING_RATE_X2 */
+			dl_src2_con0 |= 1 << 14;
+			break;
+		case 96000:
+			dl_src2_con0 |= (0x2 << 24); /* UP_SAMPLING_RATE_X4 */
+			dl_src2_con0 |= 1 << 14;
+			break;
+		default:
+			dl_src2_con0 |= (0x3 << 24); /* UP_SAMPLING_RATE_X8 */
+			break;
+		}
+
+		/* turn off mute function */
+		dl_src2_con0 |= (0x03 << 11);
+
+		/* set voice input data if input sample rate is 8k or 16k */
+		if (rate == 8000 || rate == 16000)
+			dl_src2_con0 |= 0x01 << 5;
+
+		if (rate < 96000) {
+			/* SA suggest apply -0.3db to audio/speech path */
+			dl_src2_con1 = 0xf74f0000;
+		} else {
+			/* SA suggest apply -0.3db to audio/speech path
+			 * with DL gain set to half,
+			 * 0xFFFF = 0dB -> 0x8000 = 0dB when 96k, 192k
+			 */
+			dl_src2_con1 = 0x7ba70000;
+		}
+
+		/* turn on down-link gain */
+		dl_src2_con0 = dl_src2_con0 | (0x01 << 1);
+
+		regmap_write(afe->regmap, AFE_ADDA_DL_SRC2_CON0, dl_src2_con0);
+		regmap_write(afe->regmap, AFE_ADDA_DL_SRC2_CON1, dl_src2_con1);
+	} else {
+		unsigned int voice_mode = 0;
+		unsigned int ul_src_con0 = 0;	/* default value */
+
+		/* Using Internal ADC */
+		regmap_update_bits(afe->regmap,
+				   AFE_ADDA_TOP_CON0,
+				   0x1 << 0,
+				   0x0 << 0);
+
+		voice_mode = adda_ul_rate_transform(afe, rate);
+
+		ul_src_con0 |= (voice_mode << 17) & (0x7 << 17);
+
+		/* up8x txif sat on */
+		regmap_write(afe->regmap, AFE_ADDA_NEWIF_CFG0, 0x03F87201);
+
+		if (rate >= 96000) {	/* hires */
+			/* use hires format [1 0 23] */
+			regmap_update_bits(afe->regmap,
+					   AFE_ADDA_NEWIF_CFG0,
+					   0x1 << 5,
+					   0x1 << 5);
+
+			regmap_update_bits(afe->regmap,
+					   AFE_ADDA_NEWIF_CFG2,
+					   0xf << 28,
+					   voice_mode << 28);
+		} else {	/* normal 8~48k */
+			/* use fixed 260k anc path */
+			regmap_update_bits(afe->regmap,
+					   AFE_ADDA_NEWIF_CFG2,
+					   0xf << 28,
+					   8 << 28);
+
+			/* ul_use_cic_out */
+			ul_src_con0 |= 0x1 << 20;
+		}
+
+		regmap_update_bits(afe->regmap,
+				   AFE_ADDA_NEWIF_CFG2,
+				   0xf << 28,
+				   8 << 28);
+
+		regmap_update_bits(afe->regmap,
+				   AFE_ADDA_UL_SRC_CON0,
+				   0xfffffffe,
+				   ul_src_con0);
+	}
+
+	return 0;
+}
+
+static const struct snd_soc_dai_ops mtk_dai_adda_ops = {
+	.hw_params = mtk_dai_adda_hw_params,
+};
+
+/* dai driver */
+#define MTK_ADDA_PLAYBACK_RATES (SNDRV_PCM_RATE_8000_48000 |\
+				 SNDRV_PCM_RATE_96000 |\
+				 SNDRV_PCM_RATE_192000)
+
+#define MTK_ADDA_CAPTURE_RATES (SNDRV_PCM_RATE_8000 |\
+				SNDRV_PCM_RATE_16000 |\
+				SNDRV_PCM_RATE_32000 |\
+				SNDRV_PCM_RATE_48000 |\
+				SNDRV_PCM_RATE_96000 |\
+				SNDRV_PCM_RATE_192000)
+
+#define MTK_ADDA_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
+			  SNDRV_PCM_FMTBIT_S24_LE |\
+			  SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_driver mtk_dai_adda_driver[] = {
+	{
+		.name = "ADDA",
+		.id = MT6797_DAI_ADDA,
+		.playback = {
+			.stream_name = "ADDA Playback",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_ADDA_PLAYBACK_RATES,
+			.formats = MTK_ADDA_FORMATS,
+		},
+		.capture = {
+			.stream_name = "ADDA Capture",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_ADDA_CAPTURE_RATES,
+			.formats = MTK_ADDA_FORMATS,
+		},
+		.ops = &mtk_dai_adda_ops,
+	},
+};
+
+int mt6797_dai_adda_register(struct mtk_base_afe *afe)
+{
+	int id = MT6797_DAI_ADDA;
+
+	afe->sub_dais[id].dai_drivers = mtk_dai_adda_driver;
+	afe->sub_dais[id].num_dai_drivers = ARRAY_SIZE(mtk_dai_adda_driver);
+
+	afe->sub_dais[id].dapm_widgets = mtk_dai_adda_widgets;
+	afe->sub_dais[id].num_dapm_widgets = ARRAY_SIZE(mtk_dai_adda_widgets);
+	afe->sub_dais[id].dapm_routes = mtk_dai_adda_routes;
+	afe->sub_dais[id].num_dapm_routes = ARRAY_SIZE(mtk_dai_adda_routes);
+	return 0;
+}
diff --git a/sound/soc/mediatek/mt6797/mt6797-dai-hostless.c b/sound/soc/mediatek/mt6797/mt6797-dai-hostless.c
new file mode 100644
index 0000000..4cf985b
--- /dev/null
+++ b/sound/soc/mediatek/mt6797/mt6797-dai-hostless.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// MediaTek ALSA SoC Audio DAI Hostless Control
+//
+// Copyright (c) 2018 MediaTek Inc.
+// Author: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
+
+#include "mt6797-afe-common.h"
+
+/* dai component */
+static const struct snd_soc_dapm_route mtk_dai_hostless_routes[] = {
+	/* Hostless ADDA Loopback */
+	{"ADDA_DL_CH1", "ADDA_UL_CH1", "Hostless LPBK DL"},
+	{"ADDA_DL_CH1", "ADDA_UL_CH2", "Hostless LPBK DL"},
+	{"ADDA_DL_CH2", "ADDA_UL_CH1", "Hostless LPBK DL"},
+	{"ADDA_DL_CH2", "ADDA_UL_CH2", "Hostless LPBK DL"},
+	{"Hostless LPBK UL", NULL, "ADDA Capture"},
+
+	/* Hostless Speech */
+	{"ADDA_DL_CH1", "PCM_1_CAP_CH1", "Hostless Speech DL"},
+	{"ADDA_DL_CH2", "PCM_1_CAP_CH1", "Hostless Speech DL"},
+	{"ADDA_DL_CH2", "PCM_1_CAP_CH2", "Hostless Speech DL"},
+	{"ADDA_DL_CH1", "PCM_2_CAP_CH1", "Hostless Speech DL"},
+	{"ADDA_DL_CH2", "PCM_2_CAP_CH1", "Hostless Speech DL"},
+	{"ADDA_DL_CH2", "PCM_2_CAP_CH2", "Hostless Speech DL"},
+	{"PCM_1_PB_CH1", "ADDA_UL_CH1", "Hostless Speech DL"},
+	{"PCM_1_PB_CH2", "ADDA_UL_CH2", "Hostless Speech DL"},
+	{"PCM_2_PB_CH1", "ADDA_UL_CH1", "Hostless Speech DL"},
+	{"PCM_2_PB_CH2", "ADDA_UL_CH2", "Hostless Speech DL"},
+
+	{"Hostless Speech UL", NULL, "PCM 1 Capture"},
+	{"Hostless Speech UL", NULL, "PCM 2 Capture"},
+	{"Hostless Speech UL", NULL, "ADDA Capture"},
+};
+
+/* dai ops */
+static int mtk_dai_hostless_startup(struct snd_pcm_substream *substream,
+				    struct snd_soc_dai *dai)
+{
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
+
+	return snd_soc_set_runtime_hwparams(substream, afe->mtk_afe_hardware);
+}
+
+static const struct snd_soc_dai_ops mtk_dai_hostless_ops = {
+	.startup = mtk_dai_hostless_startup,
+};
+
+/* dai driver */
+#define MTK_HOSTLESS_RATES (SNDRV_PCM_RATE_8000_48000 |\
+			   SNDRV_PCM_RATE_88200 |\
+			   SNDRV_PCM_RATE_96000 |\
+			   SNDRV_PCM_RATE_176400 |\
+			   SNDRV_PCM_RATE_192000)
+
+#define MTK_HOSTLESS_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
+			     SNDRV_PCM_FMTBIT_S24_LE |\
+			     SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_driver mtk_dai_hostless_driver[] = {
+	{
+		.name = "Hostless LPBK DAI",
+		.id = MT6797_DAI_HOSTLESS_LPBK,
+		.playback = {
+			.stream_name = "Hostless LPBK DL",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_HOSTLESS_RATES,
+			.formats = MTK_HOSTLESS_FORMATS,
+		},
+		.capture = {
+			.stream_name = "Hostless LPBK UL",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_HOSTLESS_RATES,
+			.formats = MTK_HOSTLESS_FORMATS,
+		},
+		.ops = &mtk_dai_hostless_ops,
+	},
+	{
+		.name = "Hostless Speech DAI",
+		.id = MT6797_DAI_HOSTLESS_SPEECH,
+		.playback = {
+			.stream_name = "Hostless Speech DL",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_HOSTLESS_RATES,
+			.formats = MTK_HOSTLESS_FORMATS,
+		},
+		.capture = {
+			.stream_name = "Hostless Speech UL",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_HOSTLESS_RATES,
+			.formats = MTK_HOSTLESS_FORMATS,
+		},
+		.ops = &mtk_dai_hostless_ops,
+	},
+};
+
+int mt6797_dai_hostless_register(struct mtk_base_afe *afe)
+{
+	int id = MT6797_DAI_HOSTLESS_LPBK;
+
+	afe->sub_dais[id].dai_drivers = mtk_dai_hostless_driver;
+	afe->sub_dais[id].num_dai_drivers = ARRAY_SIZE(mtk_dai_hostless_driver);
+
+	afe->sub_dais[id].dapm_routes = mtk_dai_hostless_routes;
+	afe->sub_dais[id].num_dapm_routes = ARRAY_SIZE(mtk_dai_hostless_routes);
+
+	return 0;
+}
diff --git a/sound/soc/mediatek/mt6797/mt6797-dai-pcm.c b/sound/soc/mediatek/mt6797/mt6797-dai-pcm.c
new file mode 100644
index 0000000..16d5b50
--- /dev/null
+++ b/sound/soc/mediatek/mt6797/mt6797-dai-pcm.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// MediaTek ALSA SoC Audio DAI I2S Control
+//
+// Copyright (c) 2018 MediaTek Inc.
+// Author: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
+
+#include <linux/regmap.h>
+#include <sound/pcm_params.h>
+#include "mt6797-afe-common.h"
+#include "mt6797-interconnection.h"
+#include "mt6797-reg.h"
+
+enum AUD_TX_LCH_RPT {
+	AUD_TX_LCH_RPT_NO_REPEAT = 0,
+	AUD_TX_LCH_RPT_REPEAT = 1
+};
+
+enum AUD_VBT_16K_MODE {
+	AUD_VBT_16K_MODE_DISABLE = 0,
+	AUD_VBT_16K_MODE_ENABLE = 1
+};
+
+enum AUD_EXT_MODEM {
+	AUD_EXT_MODEM_SELECT_INTERNAL = 0,
+	AUD_EXT_MODEM_SELECT_EXTERNAL = 1
+};
+
+enum AUD_PCM_SYNC_TYPE {
+	/* bck sync length = 1 */
+	AUD_PCM_ONE_BCK_CYCLE_SYNC = 0,
+	/* bck sync length = PCM_INTF_CON1[9:13] */
+	AUD_PCM_EXTENDED_BCK_CYCLE_SYNC = 1
+};
+
+enum AUD_BT_MODE {
+	AUD_BT_MODE_DUAL_MIC_ON_TX = 0,
+	AUD_BT_MODE_SINGLE_MIC_ON_TX = 1
+};
+
+enum AUD_PCM_AFIFO_SRC {
+	/* slave mode & external modem uses different crystal */
+	AUD_PCM_AFIFO_ASRC = 0,
+	/* slave mode & external modem uses the same crystal */
+	AUD_PCM_AFIFO_AFIFO = 1
+};
+
+enum AUD_PCM_CLOCK_SOURCE {
+	AUD_PCM_CLOCK_MASTER_MODE = 0,
+	AUD_PCM_CLOCK_SLAVE_MODE = 1
+};
+
+enum AUD_PCM_WLEN {
+	AUD_PCM_WLEN_PCM_32_BCK_CYCLES = 0,
+	AUD_PCM_WLEN_PCM_64_BCK_CYCLES = 1
+};
+
+enum AUD_PCM_MODE {
+	AUD_PCM_MODE_PCM_MODE_8K = 0,
+	AUD_PCM_MODE_PCM_MODE_16K = 1,
+	AUD_PCM_MODE_PCM_MODE_32K = 2,
+	AUD_PCM_MODE_PCM_MODE_48K = 3,
+};
+
+enum AUD_PCM_FMT {
+	AUD_PCM_FMT_I2S = 0,
+	AUD_PCM_FMT_EIAJ = 1,
+	AUD_PCM_FMT_PCM_MODE_A = 2,
+	AUD_PCM_FMT_PCM_MODE_B = 3
+};
+
+enum AUD_BCLK_OUT_INV {
+	AUD_BCLK_OUT_INV_NO_INVERSE = 0,
+	AUD_BCLK_OUT_INV_INVERSE = 1
+};
+
+enum AUD_PCM_EN {
+	AUD_PCM_EN_DISABLE = 0,
+	AUD_PCM_EN_ENABLE = 1
+};
+
+/* dai component */
+static const struct snd_kcontrol_new mtk_pcm_1_playback_ch1_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH1", AFE_CONN7,
+				    I_ADDA_UL_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL2_CH1", AFE_CONN7,
+				    I_DL2_CH1, 1, 0),
+};
+
+static const struct snd_kcontrol_new mtk_pcm_1_playback_ch2_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH2", AFE_CONN8,
+				    I_ADDA_UL_CH2, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL2_CH2", AFE_CONN8,
+				    I_DL2_CH2, 1, 0),
+};
+
+static const struct snd_kcontrol_new mtk_pcm_1_playback_ch4_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("DL1_CH1", AFE_CONN27,
+				    I_DL1_CH1, 1, 0),
+};
+
+static const struct snd_kcontrol_new mtk_pcm_2_playback_ch1_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH1", AFE_CONN17,
+				    I_ADDA_UL_CH1, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL2_CH1", AFE_CONN17,
+				    I_DL2_CH1, 1, 0),
+};
+
+static const struct snd_kcontrol_new mtk_pcm_2_playback_ch2_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("ADDA_UL_CH2", AFE_CONN18,
+				    I_ADDA_UL_CH2, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("DL2_CH2", AFE_CONN18,
+				    I_DL2_CH2, 1, 0),
+};
+
+static const struct snd_kcontrol_new mtk_pcm_2_playback_ch4_mix[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("DL1_CH1", AFE_CONN24,
+				    I_DL1_CH1, 1, 0),
+};
+
+static const struct snd_soc_dapm_widget mtk_dai_pcm_widgets[] = {
+	/* inter-connections */
+	SND_SOC_DAPM_MIXER("PCM_1_PB_CH1", SND_SOC_NOPM, 0, 0,
+			   mtk_pcm_1_playback_ch1_mix,
+			   ARRAY_SIZE(mtk_pcm_1_playback_ch1_mix)),
+	SND_SOC_DAPM_MIXER("PCM_1_PB_CH2", SND_SOC_NOPM, 0, 0,
+			   mtk_pcm_1_playback_ch2_mix,
+			   ARRAY_SIZE(mtk_pcm_1_playback_ch2_mix)),
+	SND_SOC_DAPM_MIXER("PCM_1_PB_CH4", SND_SOC_NOPM, 0, 0,
+			   mtk_pcm_1_playback_ch4_mix,
+			   ARRAY_SIZE(mtk_pcm_1_playback_ch4_mix)),
+	SND_SOC_DAPM_MIXER("PCM_2_PB_CH1", SND_SOC_NOPM, 0, 0,
+			   mtk_pcm_2_playback_ch1_mix,
+			   ARRAY_SIZE(mtk_pcm_2_playback_ch1_mix)),
+	SND_SOC_DAPM_MIXER("PCM_2_PB_CH2", SND_SOC_NOPM, 0, 0,
+			   mtk_pcm_2_playback_ch2_mix,
+			   ARRAY_SIZE(mtk_pcm_2_playback_ch2_mix)),
+	SND_SOC_DAPM_MIXER("PCM_2_PB_CH4", SND_SOC_NOPM, 0, 0,
+			   mtk_pcm_2_playback_ch4_mix,
+			   ARRAY_SIZE(mtk_pcm_2_playback_ch4_mix)),
+
+	SND_SOC_DAPM_SUPPLY("PCM_1_EN", PCM_INTF_CON1, PCM_EN_SFT, 0,
+			    NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY("PCM_2_EN", PCM2_INTF_CON, PCM2_EN_SFT, 0,
+			    NULL, 0),
+
+	SND_SOC_DAPM_INPUT("MD1_TO_AFE"),
+	SND_SOC_DAPM_INPUT("MD2_TO_AFE"),
+	SND_SOC_DAPM_OUTPUT("AFE_TO_MD1"),
+	SND_SOC_DAPM_OUTPUT("AFE_TO_MD2"),
+};
+
+static const struct snd_soc_dapm_route mtk_dai_pcm_routes[] = {
+	{"PCM 1 Playback", NULL, "PCM_1_PB_CH1"},
+	{"PCM 1 Playback", NULL, "PCM_1_PB_CH2"},
+	{"PCM 1 Playback", NULL, "PCM_1_PB_CH4"},
+	{"PCM 2 Playback", NULL, "PCM_2_PB_CH1"},
+	{"PCM 2 Playback", NULL, "PCM_2_PB_CH2"},
+	{"PCM 2 Playback", NULL, "PCM_2_PB_CH4"},
+
+	{"PCM 1 Playback", NULL, "PCM_1_EN"},
+	{"PCM 2 Playback", NULL, "PCM_2_EN"},
+	{"PCM 1 Capture", NULL, "PCM_1_EN"},
+	{"PCM 2 Capture", NULL, "PCM_2_EN"},
+
+	{"AFE_TO_MD1", NULL, "PCM 2 Playback"},
+	{"AFE_TO_MD2", NULL, "PCM 1 Playback"},
+	{"PCM 2 Capture", NULL, "MD1_TO_AFE"},
+	{"PCM 1 Capture", NULL, "MD2_TO_AFE"},
+
+	{"PCM_1_PB_CH1", "DL2_CH1", "DL2"},
+	{"PCM_1_PB_CH2", "DL2_CH2", "DL2"},
+	{"PCM_1_PB_CH4", "DL1_CH1", "DL1"},
+	{"PCM_2_PB_CH1", "DL2_CH1", "DL2"},
+	{"PCM_2_PB_CH2", "DL2_CH2", "DL2"},
+	{"PCM_2_PB_CH4", "DL1_CH1", "DL1"},
+};
+
+/* dai ops */
+static int mtk_dai_pcm_hw_params(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *dai)
+{
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
+	unsigned int rate = params_rate(params);
+	unsigned int rate_reg = mt6797_rate_transform(afe->dev, rate, dai->id);
+	unsigned int pcm_con = 0;
+
+	dev_dbg(afe->dev, "%s(), id %d, stream %d, rate %d, rate_reg %d, widget active p %d, c %d\n",
+		__func__,
+		dai->id,
+		substream->stream,
+		rate,
+		rate_reg,
+		dai->playback_widget->active,
+		dai->capture_widget->active);
+
+	if (dai->playback_widget->active || dai->capture_widget->active)
+		return 0;
+
+	switch (dai->id) {
+	case MT6797_DAI_PCM_1:
+		pcm_con |= AUD_BCLK_OUT_INV_NO_INVERSE << PCM_BCLK_OUT_INV_SFT;
+		pcm_con |= AUD_TX_LCH_RPT_NO_REPEAT << PCM_TX_LCH_RPT_SFT;
+		pcm_con |= AUD_VBT_16K_MODE_DISABLE << PCM_VBT_16K_MODE_SFT;
+		pcm_con |= AUD_EXT_MODEM_SELECT_INTERNAL << PCM_EXT_MODEM_SFT;
+		pcm_con |= 0 << PCM_SYNC_LENGTH_SFT;
+		pcm_con |= AUD_PCM_ONE_BCK_CYCLE_SYNC << PCM_SYNC_TYPE_SFT;
+		pcm_con |= AUD_BT_MODE_DUAL_MIC_ON_TX << PCM_BT_MODE_SFT;
+		pcm_con |= AUD_PCM_AFIFO_AFIFO << PCM_BYP_ASRC_SFT;
+		pcm_con |= AUD_PCM_CLOCK_SLAVE_MODE << PCM_SLAVE_SFT;
+		pcm_con |= rate_reg << PCM_MODE_SFT;
+		pcm_con |= AUD_PCM_FMT_PCM_MODE_B << PCM_FMT_SFT;
+
+		regmap_update_bits(afe->regmap, PCM_INTF_CON1,
+				   0xfffffffe, pcm_con);
+		break;
+	case MT6797_DAI_PCM_2:
+		pcm_con |= AUD_TX_LCH_RPT_NO_REPEAT << PCM2_TX_LCH_RPT_SFT;
+		pcm_con |= AUD_VBT_16K_MODE_DISABLE << PCM2_VBT_16K_MODE_SFT;
+		pcm_con |= AUD_BT_MODE_DUAL_MIC_ON_TX << PCM2_BT_MODE_SFT;
+		pcm_con |= AUD_PCM_AFIFO_AFIFO << PCM2_AFIFO_SFT;
+		pcm_con |= AUD_PCM_WLEN_PCM_32_BCK_CYCLES << PCM2_WLEN_SFT;
+		pcm_con |= rate_reg << PCM2_MODE_SFT;
+		pcm_con |= AUD_PCM_FMT_PCM_MODE_B << PCM2_FMT_SFT;
+
+		regmap_update_bits(afe->regmap, PCM2_INTF_CON,
+				   0xfffffffe, pcm_con);
+		break;
+	default:
+		dev_warn(afe->dev, "%s(), id %d not support\n",
+			 __func__, dai->id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct snd_soc_dai_ops mtk_dai_pcm_ops = {
+	.hw_params = mtk_dai_pcm_hw_params,
+};
+
+/* dai driver */
+#define MTK_PCM_RATES (SNDRV_PCM_RATE_8000 |\
+		       SNDRV_PCM_RATE_16000 |\
+		       SNDRV_PCM_RATE_32000 |\
+		       SNDRV_PCM_RATE_48000)
+
+#define MTK_PCM_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
+			 SNDRV_PCM_FMTBIT_S24_LE |\
+			 SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_driver mtk_dai_pcm_driver[] = {
+	{
+		.name = "PCM 1",
+		.id = MT6797_DAI_PCM_1,
+		.playback = {
+			.stream_name = "PCM 1 Playback",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_PCM_RATES,
+			.formats = MTK_PCM_FORMATS,
+		},
+		.capture = {
+			.stream_name = "PCM 1 Capture",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_PCM_RATES,
+			.formats = MTK_PCM_FORMATS,
+		},
+		.ops = &mtk_dai_pcm_ops,
+		.symmetric_rates = 1,
+		.symmetric_samplebits = 1,
+	},
+	{
+		.name = "PCM 2",
+		.id = MT6797_DAI_PCM_2,
+		.playback = {
+			.stream_name = "PCM 2 Playback",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_PCM_RATES,
+			.formats = MTK_PCM_FORMATS,
+		},
+		.capture = {
+			.stream_name = "PCM 2 Capture",
+			.channels_min = 1,
+			.channels_max = 2,
+			.rates = MTK_PCM_RATES,
+			.formats = MTK_PCM_FORMATS,
+		},
+		.ops = &mtk_dai_pcm_ops,
+		.symmetric_rates = 1,
+		.symmetric_samplebits = 1,
+	},
+};
+
+int mt6797_dai_pcm_register(struct mtk_base_afe *afe)
+{
+	int id = MT6797_DAI_PCM_1;
+
+	afe->sub_dais[id].dai_drivers = mtk_dai_pcm_driver;
+	afe->sub_dais[id].num_dai_drivers = ARRAY_SIZE(mtk_dai_pcm_driver);
+
+	afe->sub_dais[id].dapm_widgets = mtk_dai_pcm_widgets;
+	afe->sub_dais[id].num_dapm_widgets = ARRAY_SIZE(mtk_dai_pcm_widgets);
+	afe->sub_dais[id].dapm_routes = mtk_dai_pcm_routes;
+	afe->sub_dais[id].num_dapm_routes = ARRAY_SIZE(mtk_dai_pcm_routes);
+
+	return 0;
+}
diff --git a/sound/soc/mediatek/mt6797/mt6797-interconnection.h b/sound/soc/mediatek/mt6797/mt6797-interconnection.h
new file mode 100644
index 0000000..07b759b
--- /dev/null
+++ b/sound/soc/mediatek/mt6797/mt6797-interconnection.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Mediatek MT6797 audio driver interconnection definition
+ *
+ * Copyright (c) 2018 MediaTek Inc.
+ * Author: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
+ */
+
+#ifndef _MT6797_INTERCONNECTION_H_
+#define _MT6797_INTERCONNECTION_H_
+
+#define I_I2S0_CH1 0
+#define I_I2S0_CH2 1
+#define I_ADDA_UL_CH1 3
+#define I_ADDA_UL_CH2 4
+#define I_DL1_CH1 5
+#define I_DL1_CH2 6
+#define I_DL2_CH1 7
+#define I_DL2_CH2 8
+#define I_PCM_1_CAP_CH1 9
+#define I_GAIN1_OUT_CH1 10
+#define I_GAIN1_OUT_CH2 11
+#define I_GAIN2_OUT_CH1 12
+#define I_GAIN2_OUT_CH2 13
+#define I_PCM_2_CAP_CH1 14
+#define I_PCM_2_CAP_CH2 21
+#define I_PCM_1_CAP_CH2 22
+#define I_DL3_CH1 23
+#define I_DL3_CH2 24
+#define I_I2S2_CH1 25
+#define I_I2S2_CH2 26
+
+#endif
diff --git a/sound/soc/mediatek/mt6797/mt6797-mt6351.c b/sound/soc/mediatek/mt6797/mt6797-mt6351.c
new file mode 100644
index 0000000..b1558c5
--- /dev/null
+++ b/sound/soc/mediatek/mt6797/mt6797-mt6351.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// mt6797-mt6351.c  --  MT6797 MT6351 ALSA SoC machine driver
+//
+// Copyright (c) 2018 MediaTek Inc.
+// Author: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
+
+#include <linux/module.h>
+#include <sound/soc.h>
+
+#include "mt6797-afe-common.h"
+
+static struct snd_soc_dai_link mt6797_mt6351_dai_links[] = {
+	/* FE */
+	{
+		.name = "Playback_1",
+		.stream_name = "Playback_1",
+		.cpu_dai_name = "DL1",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.trigger = {SND_SOC_DPCM_TRIGGER_PRE,
+			    SND_SOC_DPCM_TRIGGER_PRE},
+		.dynamic = 1,
+		.dpcm_playback = 1,
+	},
+	{
+		.name = "Playback_2",
+		.stream_name = "Playback_2",
+		.cpu_dai_name = "DL2",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.trigger = {SND_SOC_DPCM_TRIGGER_PRE,
+			    SND_SOC_DPCM_TRIGGER_PRE},
+		.dynamic = 1,
+		.dpcm_playback = 1,
+	},
+	{
+		.name = "Playback_3",
+		.stream_name = "Playback_3",
+		.cpu_dai_name = "DL3",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.trigger = {SND_SOC_DPCM_TRIGGER_PRE,
+			    SND_SOC_DPCM_TRIGGER_PRE},
+		.dynamic = 1,
+		.dpcm_playback = 1,
+	},
+	{
+		.name = "Capture_1",
+		.stream_name = "Capture_1",
+		.cpu_dai_name = "UL1",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.trigger = {SND_SOC_DPCM_TRIGGER_PRE,
+			    SND_SOC_DPCM_TRIGGER_PRE},
+		.dynamic = 1,
+		.dpcm_capture = 1,
+	},
+	{
+		.name = "Capture_2",
+		.stream_name = "Capture_2",
+		.cpu_dai_name = "UL2",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.trigger = {SND_SOC_DPCM_TRIGGER_PRE,
+			    SND_SOC_DPCM_TRIGGER_PRE},
+		.dynamic = 1,
+		.dpcm_capture = 1,
+	},
+	{
+		.name = "Capture_3",
+		.stream_name = "Capture_3",
+		.cpu_dai_name = "UL3",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.trigger = {SND_SOC_DPCM_TRIGGER_PRE,
+			    SND_SOC_DPCM_TRIGGER_PRE},
+		.dynamic = 1,
+		.dpcm_capture = 1,
+	},
+	{
+		.name = "Capture_Mono_1",
+		.stream_name = "Capture_Mono_1",
+		.cpu_dai_name = "UL_MONO_1",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.trigger = {SND_SOC_DPCM_TRIGGER_PRE,
+			    SND_SOC_DPCM_TRIGGER_PRE},
+		.dynamic = 1,
+		.dpcm_capture = 1,
+	},
+	{
+		.name = "Hostless_LPBK",
+		.stream_name = "Hostless_LPBK",
+		.cpu_dai_name = "Hostless LPBK DAI",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.trigger = {SND_SOC_DPCM_TRIGGER_PRE,
+			    SND_SOC_DPCM_TRIGGER_PRE},
+		.dynamic = 1,
+		.dpcm_playback = 1,
+		.dpcm_capture = 1,
+		.ignore_suspend = 1,
+	},
+	{
+		.name = "Hostless_Speech",
+		.stream_name = "Hostless_Speech",
+		.cpu_dai_name = "Hostless Speech DAI",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.trigger = {SND_SOC_DPCM_TRIGGER_PRE,
+			    SND_SOC_DPCM_TRIGGER_PRE},
+		.dynamic = 1,
+		.dpcm_playback = 1,
+		.dpcm_capture = 1,
+		.ignore_suspend = 1,
+	},
+	/* BE */
+	{
+		.name = "Primary Codec",
+		.cpu_dai_name = "ADDA",
+		.codec_dai_name = "mt6351-snd-codec-aif1",
+		.no_pcm = 1,
+		.dpcm_playback = 1,
+		.dpcm_capture = 1,
+		.ignore_suspend = 1,
+	},
+	{
+		.name = "PCM 1",
+		.cpu_dai_name = "PCM 1",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.no_pcm = 1,
+		.dpcm_playback = 1,
+		.dpcm_capture = 1,
+		.ignore_suspend = 1,
+	},
+	{
+		.name = "PCM 2",
+		.cpu_dai_name = "PCM 2",
+		.codec_name = "snd-soc-dummy",
+		.codec_dai_name = "snd-soc-dummy-dai",
+		.no_pcm = 1,
+		.dpcm_playback = 1,
+		.dpcm_capture = 1,
+		.ignore_suspend = 1,
+	},
+};
+
+static struct snd_soc_card mt6797_mt6351_card = {
+	.name = "mt6797-mt6351",
+	.owner = THIS_MODULE,
+	.dai_link = mt6797_mt6351_dai_links,
+	.num_links = ARRAY_SIZE(mt6797_mt6351_dai_links),
+};
+
+static int mt6797_mt6351_dev_probe(struct platform_device *pdev)
+{
+	struct snd_soc_card *card = &mt6797_mt6351_card;
+	struct device_node *platform_node, *codec_node;
+	int ret, i;
+
+	card->dev = &pdev->dev;
+
+	platform_node = of_parse_phandle(pdev->dev.of_node,
+					 "mediatek,platform", 0);
+	if (!platform_node) {
+		dev_err(&pdev->dev, "Property 'platform' missing or invalid\n");
+		return -EINVAL;
+	}
+	for (i = 0; i < card->num_links; i++) {
+		if (mt6797_mt6351_dai_links[i].platform_name)
+			continue;
+		mt6797_mt6351_dai_links[i].platform_of_node = platform_node;
+	}
+
+	codec_node = of_parse_phandle(pdev->dev.of_node,
+				      "mediatek,audio-codec", 0);
+	if (!codec_node) {
+		dev_err(&pdev->dev,
+			"Property 'audio-codec' missing or invalid\n");
+		return -EINVAL;
+	}
+	for (i = 0; i < card->num_links; i++) {
+		if (mt6797_mt6351_dai_links[i].codec_name)
+			continue;
+		mt6797_mt6351_dai_links[i].codec_of_node = codec_node;
+	}
+
+	ret = devm_snd_soc_register_card(&pdev->dev, card);
+	if (ret)
+		dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n",
+			__func__, ret);
+
+	return ret;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id mt6797_mt6351_dt_match[] = {
+	{.compatible = "mediatek,mt6797-mt6351-sound",},
+	{}
+};
+#endif
+
+static struct platform_driver mt6797_mt6351_driver = {
+	.driver = {
+		.name = "mt6797-mt6351",
+		.owner = THIS_MODULE,
+#ifdef CONFIG_OF
+		.of_match_table = mt6797_mt6351_dt_match,
+#endif
+	},
+	.probe = mt6797_mt6351_dev_probe,
+};
+
+module_platform_driver(mt6797_mt6351_driver);
+
+/* Module information */
+MODULE_DESCRIPTION("MT6797 MT6351 ALSA SoC machine driver");
+MODULE_AUTHOR("KaiChieh Chuang <kaichieh.chuang@mediatek.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("mt6797 mt6351 soc card");
+
diff --git a/sound/soc/mediatek/mt6797/mt6797-reg.h b/sound/soc/mediatek/mt6797/mt6797-reg.h
new file mode 100644
index 0000000..978f146
--- /dev/null
+++ b/sound/soc/mediatek/mt6797/mt6797-reg.h
@@ -0,0 +1,1015 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mt6797-reg.h  --  Mediatek 6797 audio driver reg definition
+ *
+ * Copyright (c) 2018 MediaTek Inc.
+ * Author: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
+ */
+
+#ifndef _MT6797_REG_H_
+#define _MT6797_REG_H_
+
+#define AUDIO_TOP_CON0            0x0000
+#define AUDIO_TOP_CON1            0x0004
+#define AUDIO_TOP_CON3            0x000c
+#define AFE_DAC_CON0              0x0010
+#define AFE_DAC_CON1              0x0014
+#define AFE_I2S_CON               0x0018
+#define AFE_DAIBT_CON0            0x001c
+#define AFE_CONN0                 0x0020
+#define AFE_CONN1                 0x0024
+#define AFE_CONN2                 0x0028
+#define AFE_CONN3                 0x002c
+#define AFE_CONN4                 0x0030
+#define AFE_I2S_CON1              0x0034
+#define AFE_I2S_CON2              0x0038
+#define AFE_MRGIF_CON             0x003c
+#define AFE_DL1_BASE              0x0040
+#define AFE_DL1_CUR               0x0044
+#define AFE_DL1_END               0x0048
+#define AFE_I2S_CON3              0x004c
+#define AFE_DL2_BASE              0x0050
+#define AFE_DL2_CUR               0x0054
+#define AFE_DL2_END               0x0058
+#define AFE_CONN5                 0x005c
+#define AFE_CONN_24BIT            0x006c
+#define AFE_AWB_BASE              0x0070
+#define AFE_AWB_END               0x0078
+#define AFE_AWB_CUR               0x007c
+#define AFE_VUL_BASE              0x0080
+#define AFE_VUL_END               0x0088
+#define AFE_VUL_CUR               0x008c
+#define AFE_DAI_BASE              0x0090
+#define AFE_DAI_END               0x0098
+#define AFE_DAI_CUR               0x009c
+#define AFE_CONN6                 0x00bc
+#define AFE_MEMIF_MSB             0x00cc
+#define AFE_MEMIF_MON0            0x00d0
+#define AFE_MEMIF_MON1            0x00d4
+#define AFE_MEMIF_MON2            0x00d8
+#define AFE_MEMIF_MON4            0x00e0
+#define AFE_ADDA_DL_SRC2_CON0     0x0108
+#define AFE_ADDA_DL_SRC2_CON1     0x010c
+#define AFE_ADDA_UL_SRC_CON0      0x0114
+#define AFE_ADDA_UL_SRC_CON1      0x0118
+#define AFE_ADDA_TOP_CON0         0x0120
+#define AFE_ADDA_UL_DL_CON0       0x0124
+#define AFE_ADDA_SRC_DEBUG        0x012c
+#define AFE_ADDA_SRC_DEBUG_MON0   0x0130
+#define AFE_ADDA_SRC_DEBUG_MON1   0x0134
+#define AFE_ADDA_NEWIF_CFG0       0x0138
+#define AFE_ADDA_NEWIF_CFG1       0x013c
+#define AFE_ADDA_NEWIF_CFG2       0x0140
+#define AFE_DMA_CTL               0x0150
+#define AFE_DMA_MON0              0x0154
+#define AFE_DMA_MON1              0x0158
+#define AFE_SIDETONE_DEBUG        0x01d0
+#define AFE_SIDETONE_MON          0x01d4
+#define AFE_SIDETONE_CON0         0x01e0
+#define AFE_SIDETONE_COEFF        0x01e4
+#define AFE_SIDETONE_CON1         0x01e8
+#define AFE_SIDETONE_GAIN         0x01ec
+#define AFE_SGEN_CON0             0x01f0
+#define AFE_SINEGEN_CON_TDM       0x01fc
+#define AFE_TOP_CON0              0x0200
+#define AFE_ADDA_PREDIS_CON0      0x0260
+#define AFE_ADDA_PREDIS_CON1      0x0264
+#define AFE_MRGIF_MON0            0x0270
+#define AFE_MRGIF_MON1            0x0274
+#define AFE_MRGIF_MON2            0x0278
+#define AFE_I2S_MON               0x027c
+#define AFE_MOD_DAI_BASE          0x0330
+#define AFE_MOD_DAI_END           0x0338
+#define AFE_MOD_DAI_CUR           0x033c
+#define AFE_VUL_D2_BASE           0x0350
+#define AFE_VUL_D2_END            0x0358
+#define AFE_VUL_D2_CUR            0x035c
+#define AFE_DL3_BASE              0x0360
+#define AFE_DL3_CUR               0x0364
+#define AFE_DL3_END               0x0368
+#define AFE_HDMI_OUT_CON0         0x0370
+#define AFE_HDMI_BASE             0x0374
+#define AFE_HDMI_CUR              0x0378
+#define AFE_HDMI_END              0x037c
+#define AFE_HDMI_CONN0            0x0390
+#define AFE_IRQ3_MCU_CNT_MON      0x0398
+#define AFE_IRQ4_MCU_CNT_MON      0x039c
+#define AFE_IRQ_MCU_CON           0x03a0
+#define AFE_IRQ_MCU_STATUS        0x03a4
+#define AFE_IRQ_MCU_CLR           0x03a8
+#define AFE_IRQ_MCU_CNT1          0x03ac
+#define AFE_IRQ_MCU_CNT2          0x03b0
+#define AFE_IRQ_MCU_EN            0x03b4
+#define AFE_IRQ_MCU_MON2          0x03b8
+#define AFE_IRQ_MCU_CNT5          0x03bc
+#define AFE_IRQ1_MCU_CNT_MON      0x03c0
+#define AFE_IRQ2_MCU_CNT_MON      0x03c4
+#define AFE_IRQ1_MCU_EN_CNT_MON   0x03c8
+#define AFE_IRQ5_MCU_CNT_MON      0x03cc
+#define AFE_MEMIF_MINLEN          0x03d0
+#define AFE_MEMIF_MAXLEN          0x03d4
+#define AFE_MEMIF_PBUF_SIZE       0x03d8
+#define AFE_IRQ_MCU_CNT7          0x03dc
+#define AFE_IRQ7_MCU_CNT_MON      0x03e0
+#define AFE_IRQ_MCU_CNT3          0x03e4
+#define AFE_IRQ_MCU_CNT4          0x03e8
+#define AFE_APLL1_TUNER_CFG       0x03f0
+#define AFE_APLL2_TUNER_CFG       0x03f4
+#define AFE_MEMIF_HD_MODE         0x03f8
+#define AFE_MEMIF_HDALIGN         0x03fc
+#define AFE_GAIN1_CON0            0x0410
+#define AFE_GAIN1_CON1            0x0414
+#define AFE_GAIN1_CON2            0x0418
+#define AFE_GAIN1_CON3            0x041c
+#define AFE_CONN7                 0x0420
+#define AFE_GAIN1_CUR             0x0424
+#define AFE_GAIN2_CON0            0x0428
+#define AFE_GAIN2_CON1            0x042c
+#define AFE_GAIN2_CON2            0x0430
+#define AFE_GAIN2_CON3            0x0434
+#define AFE_CONN8                 0x0438
+#define AFE_GAIN2_CUR             0x043c
+#define AFE_CONN9                 0x0440
+#define AFE_CONN10                0x0444
+#define AFE_CONN11                0x0448
+#define AFE_CONN12                0x044c
+#define AFE_CONN13                0x0450
+#define AFE_CONN14                0x0454
+#define AFE_CONN15                0x0458
+#define AFE_CONN16                0x045c
+#define AFE_CONN17                0x0460
+#define AFE_CONN18                0x0464
+#define AFE_CONN19                0x0468
+#define AFE_CONN20                0x046c
+#define AFE_CONN21                0x0470
+#define AFE_CONN22                0x0474
+#define AFE_CONN23                0x0478
+#define AFE_CONN24                0x047c
+#define AFE_CONN_RS               0x0494
+#define AFE_CONN_DI               0x0498
+#define AFE_CONN25                0x04b0
+#define AFE_CONN26                0x04b4
+#define AFE_CONN27                0x04b8
+#define AFE_CONN28                0x04bc
+#define AFE_CONN29                0x04c0
+#define AFE_SRAM_DELSEL_CON0      0x04f0
+#define AFE_SRAM_DELSEL_CON1      0x04f4
+#define AFE_ASRC_CON0             0x0500
+#define AFE_ASRC_CON1             0x0504
+#define AFE_ASRC_CON2             0x0508
+#define AFE_ASRC_CON3             0x050c
+#define AFE_ASRC_CON4             0x0510
+#define AFE_ASRC_CON5             0x0514
+#define AFE_ASRC_CON6             0x0518
+#define AFE_ASRC_CON7             0x051c
+#define AFE_ASRC_CON8             0x0520
+#define AFE_ASRC_CON9             0x0524
+#define AFE_ASRC_CON10            0x0528
+#define AFE_ASRC_CON11            0x052c
+#define PCM_INTF_CON1             0x0530
+#define PCM_INTF_CON2             0x0538
+#define PCM2_INTF_CON             0x053c
+#define AFE_TDM_CON1              0x0548
+#define AFE_TDM_CON2              0x054c
+#define AFE_ASRC_CON13            0x0550
+#define AFE_ASRC_CON14            0x0554
+#define AFE_ASRC_CON15            0x0558
+#define AFE_ASRC_CON16            0x055c
+#define AFE_ASRC_CON17            0x0560
+#define AFE_ASRC_CON18            0x0564
+#define AFE_ASRC_CON19            0x0568
+#define AFE_ASRC_CON20            0x056c
+#define AFE_ASRC_CON21            0x0570
+#define CLK_AUDDIV_0              0x05a0
+#define CLK_AUDDIV_1              0x05a4
+#define CLK_AUDDIV_2              0x05a8
+#define CLK_AUDDIV_3              0x05ac
+#define AUDIO_TOP_DBG_CON         0x05c8
+#define AUDIO_TOP_DBG_MON0        0x05cc
+#define AUDIO_TOP_DBG_MON1        0x05d0
+#define AUDIO_TOP_DBG_MON2        0x05d4
+#define AFE_ADDA2_TOP_CON0        0x0600
+#define AFE_ASRC4_CON0            0x06c0
+#define AFE_ASRC4_CON1            0x06c4
+#define AFE_ASRC4_CON2            0x06c8
+#define AFE_ASRC4_CON3            0x06cc
+#define AFE_ASRC4_CON4            0x06d0
+#define AFE_ASRC4_CON5            0x06d4
+#define AFE_ASRC4_CON6            0x06d8
+#define AFE_ASRC4_CON7            0x06dc
+#define AFE_ASRC4_CON8            0x06e0
+#define AFE_ASRC4_CON9            0x06e4
+#define AFE_ASRC4_CON10           0x06e8
+#define AFE_ASRC4_CON11           0x06ec
+#define AFE_ASRC4_CON12           0x06f0
+#define AFE_ASRC4_CON13           0x06f4
+#define AFE_ASRC4_CON14           0x06f8
+#define AFE_ASRC2_CON0            0x0700
+#define AFE_ASRC2_CON1            0x0704
+#define AFE_ASRC2_CON2            0x0708
+#define AFE_ASRC2_CON3            0x070c
+#define AFE_ASRC2_CON4            0x0710
+#define AFE_ASRC2_CON5            0x0714
+#define AFE_ASRC2_CON6            0x0718
+#define AFE_ASRC2_CON7            0x071c
+#define AFE_ASRC2_CON8            0x0720
+#define AFE_ASRC2_CON9            0x0724
+#define AFE_ASRC2_CON10           0x0728
+#define AFE_ASRC2_CON11           0x072c
+#define AFE_ASRC2_CON12           0x0730
+#define AFE_ASRC2_CON13           0x0734
+#define AFE_ASRC2_CON14           0x0738
+#define AFE_ASRC3_CON0            0x0740
+#define AFE_ASRC3_CON1            0x0744
+#define AFE_ASRC3_CON2            0x0748
+#define AFE_ASRC3_CON3            0x074c
+#define AFE_ASRC3_CON4            0x0750
+#define AFE_ASRC3_CON5            0x0754
+#define AFE_ASRC3_CON6            0x0758
+#define AFE_ASRC3_CON7            0x075c
+#define AFE_ASRC3_CON8            0x0760
+#define AFE_ASRC3_CON9            0x0764
+#define AFE_ASRC3_CON10           0x0768
+#define AFE_ASRC3_CON11           0x076c
+#define AFE_ASRC3_CON12           0x0770
+#define AFE_ASRC3_CON13           0x0774
+#define AFE_ASRC3_CON14           0x0778
+#define AFE_GENERAL_REG0          0x0800
+#define AFE_GENERAL_REG1          0x0804
+#define AFE_GENERAL_REG2          0x0808
+#define AFE_GENERAL_REG3          0x080c
+#define AFE_GENERAL_REG4          0x0810
+#define AFE_GENERAL_REG5          0x0814
+#define AFE_GENERAL_REG6          0x0818
+#define AFE_GENERAL_REG7          0x081c
+#define AFE_GENERAL_REG8          0x0820
+#define AFE_GENERAL_REG9          0x0824
+#define AFE_GENERAL_REG10         0x0828
+#define AFE_GENERAL_REG11         0x082c
+#define AFE_GENERAL_REG12         0x0830
+#define AFE_GENERAL_REG13         0x0834
+#define AFE_GENERAL_REG14         0x0838
+#define AFE_GENERAL_REG15         0x083c
+#define AFE_CBIP_CFG0             0x0840
+#define AFE_CBIP_MON0             0x0844
+#define AFE_CBIP_SLV_MUX_MON0     0x0848
+#define AFE_CBIP_SLV_DECODER_MON0 0x084c
+
+#define AFE_MAX_REGISTER AFE_CBIP_SLV_DECODER_MON0
+#define AFE_IRQ_STATUS_BITS 0x5f
+
+/* AUDIO_TOP_CON0 */
+#define AHB_IDLE_EN_INT_SFT                                 30
+#define AHB_IDLE_EN_INT_MASK                                0x1
+#define AHB_IDLE_EN_INT_MASK_SFT                            (0x1 << 30)
+#define AHB_IDLE_EN_EXT_SFT                                 29
+#define AHB_IDLE_EN_EXT_MASK                                0x1
+#define AHB_IDLE_EN_EXT_MASK_SFT                            (0x1 << 29)
+#define PDN_TML_SFT                                         27
+#define PDN_TML_MASK                                        0x1
+#define PDN_TML_MASK_SFT                                    (0x1 << 27)
+#define PDN_DAC_PREDIS_SFT                                  26
+#define PDN_DAC_PREDIS_MASK                                 0x1
+#define PDN_DAC_PREDIS_MASK_SFT                             (0x1 << 26)
+#define PDN_DAC_SFT                                         25
+#define PDN_DAC_MASK                                        0x1
+#define PDN_DAC_MASK_SFT                                    (0x1 << 25)
+#define PDN_ADC_SFT                                         24
+#define PDN_ADC_MASK                                        0x1
+#define PDN_ADC_MASK_SFT                                    (0x1 << 24)
+#define PDN_TDM_CK_SFT                                      20
+#define PDN_TDM_CK_MASK                                     0x1
+#define PDN_TDM_CK_MASK_SFT                                 (0x1 << 20)
+#define PDN_APLL_TUNER_SFT                                  19
+#define PDN_APLL_TUNER_MASK                                 0x1
+#define PDN_APLL_TUNER_MASK_SFT                             (0x1 << 19)
+#define PDN_APLL2_TUNER_SFT                                 18
+#define PDN_APLL2_TUNER_MASK                                0x1
+#define PDN_APLL2_TUNER_MASK_SFT                            (0x1 << 18)
+#define APB3_SEL_SFT                                        14
+#define APB3_SEL_MASK                                       0x1
+#define APB3_SEL_MASK_SFT                                   (0x1 << 14)
+#define APB_R2T_SFT                                         13
+#define APB_R2T_MASK                                        0x1
+#define APB_R2T_MASK_SFT                                    (0x1 << 13)
+#define APB_W2T_SFT                                         12
+#define APB_W2T_MASK                                        0x1
+#define APB_W2T_MASK_SFT                                    (0x1 << 12)
+#define PDN_24M_SFT                                         9
+#define PDN_24M_MASK                                        0x1
+#define PDN_24M_MASK_SFT                                    (0x1 << 9)
+#define PDN_22M_SFT                                         8
+#define PDN_22M_MASK                                        0x1
+#define PDN_22M_MASK_SFT                                    (0x1 << 8)
+#define PDN_ADDA4_ADC_SFT                                   7
+#define PDN_ADDA4_ADC_MASK                                  0x1
+#define PDN_ADDA4_ADC_MASK_SFT                              (0x1 << 7)
+#define PDN_I2S_SFT                                         6
+#define PDN_I2S_MASK                                        0x1
+#define PDN_I2S_MASK_SFT                                    (0x1 << 6)
+#define PDN_AFE_SFT                                         2
+#define PDN_AFE_MASK                                        0x1
+#define PDN_AFE_MASK_SFT                                    (0x1 << 2)
+
+/* AUDIO_TOP_CON1 */
+#define PDN_ADC_HIRES_TML_SFT                               17
+#define PDN_ADC_HIRES_TML_MASK                              0x1
+#define PDN_ADC_HIRES_TML_MASK_SFT                          (0x1 << 17)
+#define PDN_ADC_HIRES_SFT                                   16
+#define PDN_ADC_HIRES_MASK                                  0x1
+#define PDN_ADC_HIRES_MASK_SFT                              (0x1 << 16)
+#define I2S4_BCLK_SW_CG_SFT                                 7
+#define I2S4_BCLK_SW_CG_MASK                                0x1
+#define I2S4_BCLK_SW_CG_MASK_SFT                            (0x1 << 7)
+#define I2S3_BCLK_SW_CG_SFT                                 6
+#define I2S3_BCLK_SW_CG_MASK                                0x1
+#define I2S3_BCLK_SW_CG_MASK_SFT                            (0x1 << 6)
+#define I2S2_BCLK_SW_CG_SFT                                 5
+#define I2S2_BCLK_SW_CG_MASK                                0x1
+#define I2S2_BCLK_SW_CG_MASK_SFT                            (0x1 << 5)
+#define I2S1_BCLK_SW_CG_SFT                                 4
+#define I2S1_BCLK_SW_CG_MASK                                0x1
+#define I2S1_BCLK_SW_CG_MASK_SFT                            (0x1 << 4)
+#define I2S_SOFT_RST2_SFT                                   2
+#define I2S_SOFT_RST2_MASK                                  0x1
+#define I2S_SOFT_RST2_MASK_SFT                              (0x1 << 2)
+#define I2S_SOFT_RST_SFT                                    1
+#define I2S_SOFT_RST_MASK                                   0x1
+#define I2S_SOFT_RST_MASK_SFT                               (0x1 << 1)
+
+/* AFE_DAC_CON0 */
+#define AFE_AWB_RETM_SFT                                    31
+#define AFE_AWB_RETM_MASK                                   0x1
+#define AFE_AWB_RETM_MASK_SFT                               (0x1 << 31)
+#define AFE_DL1_DATA2_RETM_SFT                              30
+#define AFE_DL1_DATA2_RETM_MASK                             0x1
+#define AFE_DL1_DATA2_RETM_MASK_SFT                         (0x1 << 30)
+#define AFE_DL2_RETM_SFT                                    29
+#define AFE_DL2_RETM_MASK                                   0x1
+#define AFE_DL2_RETM_MASK_SFT                               (0x1 << 29)
+#define AFE_DL1_RETM_SFT                                    28
+#define AFE_DL1_RETM_MASK                                   0x1
+#define AFE_DL1_RETM_MASK_SFT                               (0x1 << 28)
+#define AFE_ON_RETM_SFT                                     27
+#define AFE_ON_RETM_MASK                                    0x1
+#define AFE_ON_RETM_MASK_SFT                                (0x1 << 27)
+#define MOD_DAI_DUP_WR_SFT                                  26
+#define MOD_DAI_DUP_WR_MASK                                 0x1
+#define MOD_DAI_DUP_WR_MASK_SFT                             (0x1 << 26)
+#define DAI_MODE_SFT                                        24
+#define DAI_MODE_MASK                                       0x3
+#define DAI_MODE_MASK_SFT                                   (0x3 << 24)
+#define VUL_DATA2_MODE_SFT                                  20
+#define VUL_DATA2_MODE_MASK                                 0xf
+#define VUL_DATA2_MODE_MASK_SFT                             (0xf << 20)
+#define DL1_DATA2_MODE_SFT                                  16
+#define DL1_DATA2_MODE_MASK                                 0xf
+#define DL1_DATA2_MODE_MASK_SFT                             (0xf << 16)
+#define DL3_MODE_SFT                                        12
+#define DL3_MODE_MASK                                       0xf
+#define DL3_MODE_MASK_SFT                                   (0xf << 12)
+#define VUL_DATA2_R_MONO_SFT                                11
+#define VUL_DATA2_R_MONO_MASK                               0x1
+#define VUL_DATA2_R_MONO_MASK_SFT                           (0x1 << 11)
+#define VUL_DATA2_DATA_SFT                                  10
+#define VUL_DATA2_DATA_MASK                                 0x1
+#define VUL_DATA2_DATA_MASK_SFT                             (0x1 << 10)
+#define VUL_DATA2_ON_SFT                                    9
+#define VUL_DATA2_ON_MASK                                   0x1
+#define VUL_DATA2_ON_MASK_SFT                               (0x1 << 9)
+#define DL1_DATA2_ON_SFT                                    8
+#define DL1_DATA2_ON_MASK                                   0x1
+#define DL1_DATA2_ON_MASK_SFT                               (0x1 << 8)
+#define MOD_DAI_ON_SFT                                      7
+#define MOD_DAI_ON_MASK                                     0x1
+#define MOD_DAI_ON_MASK_SFT                                 (0x1 << 7)
+#define AWB_ON_SFT                                          6
+#define AWB_ON_MASK                                         0x1
+#define AWB_ON_MASK_SFT                                     (0x1 << 6)
+#define DL3_ON_SFT                                          5
+#define DL3_ON_MASK                                         0x1
+#define DL3_ON_MASK_SFT                                     (0x1 << 5)
+#define DAI_ON_SFT                                          4
+#define DAI_ON_MASK                                         0x1
+#define DAI_ON_MASK_SFT                                     (0x1 << 4)
+#define VUL_ON_SFT                                          3
+#define VUL_ON_MASK                                         0x1
+#define VUL_ON_MASK_SFT                                     (0x1 << 3)
+#define DL2_ON_SFT                                          2
+#define DL2_ON_MASK                                         0x1
+#define DL2_ON_MASK_SFT                                     (0x1 << 2)
+#define DL1_ON_SFT                                          1
+#define DL1_ON_MASK                                         0x1
+#define DL1_ON_MASK_SFT                                     (0x1 << 1)
+#define AFE_ON_SFT                                          0
+#define AFE_ON_MASK                                         0x1
+#define AFE_ON_MASK_SFT                                     (0x1 << 0)
+
+/* AFE_DAC_CON1 */
+#define MOD_DAI_MODE_SFT                                    30
+#define MOD_DAI_MODE_MASK                                   0x3
+#define MOD_DAI_MODE_MASK_SFT                               (0x3 << 30)
+#define DAI_DUP_WR_SFT                                      29
+#define DAI_DUP_WR_MASK                                     0x1
+#define DAI_DUP_WR_MASK_SFT                                 (0x1 << 29)
+#define VUL_R_MONO_SFT                                      28
+#define VUL_R_MONO_MASK                                     0x1
+#define VUL_R_MONO_MASK_SFT                                 (0x1 << 28)
+#define VUL_DATA_SFT                                        27
+#define VUL_DATA_MASK                                       0x1
+#define VUL_DATA_MASK_SFT                                   (0x1 << 27)
+#define AXI_2X1_CG_DISABLE_SFT                              26
+#define AXI_2X1_CG_DISABLE_MASK                             0x1
+#define AXI_2X1_CG_DISABLE_MASK_SFT                         (0x1 << 26)
+#define AWB_R_MONO_SFT                                      25
+#define AWB_R_MONO_MASK                                     0x1
+#define AWB_R_MONO_MASK_SFT                                 (0x1 << 25)
+#define AWB_DATA_SFT                                        24
+#define AWB_DATA_MASK                                       0x1
+#define AWB_DATA_MASK_SFT                                   (0x1 << 24)
+#define DL3_DATA_SFT                                        23
+#define DL3_DATA_MASK                                       0x1
+#define DL3_DATA_MASK_SFT                                   (0x1 << 23)
+#define DL2_DATA_SFT                                        22
+#define DL2_DATA_MASK                                       0x1
+#define DL2_DATA_MASK_SFT                                   (0x1 << 22)
+#define DL1_DATA_SFT                                        21
+#define DL1_DATA_MASK                                       0x1
+#define DL1_DATA_MASK_SFT                                   (0x1 << 21)
+#define DL1_DATA2_DATA_SFT                                  20
+#define DL1_DATA2_DATA_MASK                                 0x1
+#define DL1_DATA2_DATA_MASK_SFT                             (0x1 << 20)
+#define VUL_MODE_SFT                                        16
+#define VUL_MODE_MASK                                       0xf
+#define VUL_MODE_MASK_SFT                                   (0xf << 16)
+#define AWB_MODE_SFT                                        12
+#define AWB_MODE_MASK                                       0xf
+#define AWB_MODE_MASK_SFT                                   (0xf << 12)
+#define I2S_MODE_SFT                                        8
+#define I2S_MODE_MASK                                       0xf
+#define I2S_MODE_MASK_SFT                                   (0xf << 8)
+#define DL2_MODE_SFT                                        4
+#define DL2_MODE_MASK                                       0xf
+#define DL2_MODE_MASK_SFT                                   (0xf << 4)
+#define DL1_MODE_SFT                                        0
+#define DL1_MODE_MASK                                       0xf
+#define DL1_MODE_MASK_SFT                                   (0xf << 0)
+
+/* AFE_ADDA_DL_SRC2_CON0 */
+#define DL_2_INPUT_MODE_CTL_SFT                             28
+#define DL_2_INPUT_MODE_CTL_MASK                            0xf
+#define DL_2_INPUT_MODE_CTL_MASK_SFT                        (0xf << 28)
+#define DL_2_CH1_SATURATION_EN_CTL_SFT                      27
+#define DL_2_CH1_SATURATION_EN_CTL_MASK                     0x1
+#define DL_2_CH1_SATURATION_EN_CTL_MASK_SFT                 (0x1 << 27)
+#define DL_2_CH2_SATURATION_EN_CTL_SFT                      26
+#define DL_2_CH2_SATURATION_EN_CTL_MASK                     0x1
+#define DL_2_CH2_SATURATION_EN_CTL_MASK_SFT                 (0x1 << 26)
+#define DL_2_OUTPUT_SEL_CTL_SFT                             24
+#define DL_2_OUTPUT_SEL_CTL_MASK                            0x3
+#define DL_2_OUTPUT_SEL_CTL_MASK_SFT                        (0x3 << 24)
+#define DL_2_FADEIN_0START_EN_SFT                           16
+#define DL_2_FADEIN_0START_EN_MASK                          0x3
+#define DL_2_FADEIN_0START_EN_MASK_SFT                      (0x3 << 16)
+#define DL_DISABLE_HW_CG_CTL_SFT                            15
+#define DL_DISABLE_HW_CG_CTL_MASK                           0x1
+#define DL_DISABLE_HW_CG_CTL_MASK_SFT                       (0x1 << 15)
+#define C_DATA_EN_SEL_CTL_PRE_SFT                           14
+#define C_DATA_EN_SEL_CTL_PRE_MASK                          0x1
+#define C_DATA_EN_SEL_CTL_PRE_MASK_SFT                      (0x1 << 14)
+#define DL_2_SIDE_TONE_ON_CTL_PRE_SFT                       13
+#define DL_2_SIDE_TONE_ON_CTL_PRE_MASK                      0x1
+#define DL_2_SIDE_TONE_ON_CTL_PRE_MASK_SFT                  (0x1 << 13)
+#define DL_2_MUTE_CH1_OFF_CTL_PRE_SFT                       12
+#define DL_2_MUTE_CH1_OFF_CTL_PRE_MASK                      0x1
+#define DL_2_MUTE_CH1_OFF_CTL_PRE_MASK_SFT                  (0x1 << 12)
+#define DL_2_MUTE_CH2_OFF_CTL_PRE_SFT                       11
+#define DL_2_MUTE_CH2_OFF_CTL_PRE_MASK                      0x1
+#define DL_2_MUTE_CH2_OFF_CTL_PRE_MASK_SFT                  (0x1 << 11)
+#define DL2_ARAMPSP_CTL_PRE_SFT                             9
+#define DL2_ARAMPSP_CTL_PRE_MASK                            0x3
+#define DL2_ARAMPSP_CTL_PRE_MASK_SFT                        (0x3 << 9)
+#define DL_2_IIRMODE_CTL_PRE_SFT                            6
+#define DL_2_IIRMODE_CTL_PRE_MASK                           0x7
+#define DL_2_IIRMODE_CTL_PRE_MASK_SFT                       (0x7 << 6)
+#define DL_2_VOICE_MODE_CTL_PRE_SFT                         5
+#define DL_2_VOICE_MODE_CTL_PRE_MASK                        0x1
+#define DL_2_VOICE_MODE_CTL_PRE_MASK_SFT                    (0x1 << 5)
+#define D2_2_MUTE_CH1_ON_CTL_PRE_SFT                        4
+#define D2_2_MUTE_CH1_ON_CTL_PRE_MASK                       0x1
+#define D2_2_MUTE_CH1_ON_CTL_PRE_MASK_SFT                   (0x1 << 4)
+#define D2_2_MUTE_CH2_ON_CTL_PRE_SFT                        3
+#define D2_2_MUTE_CH2_ON_CTL_PRE_MASK                       0x1
+#define D2_2_MUTE_CH2_ON_CTL_PRE_MASK_SFT                   (0x1 << 3)
+#define DL_2_IIR_ON_CTL_PRE_SFT                             2
+#define DL_2_IIR_ON_CTL_PRE_MASK                            0x1
+#define DL_2_IIR_ON_CTL_PRE_MASK_SFT                        (0x1 << 2)
+#define DL_2_GAIN_ON_CTL_PRE_SFT                            1
+#define DL_2_GAIN_ON_CTL_PRE_MASK                           0x1
+#define DL_2_GAIN_ON_CTL_PRE_MASK_SFT                       (0x1 << 1)
+#define DL_2_SRC_ON_TMP_CTL_PRE_SFT                         0
+#define DL_2_SRC_ON_TMP_CTL_PRE_MASK                        0x1
+#define DL_2_SRC_ON_TMP_CTL_PRE_MASK_SFT                    (0x1 << 0)
+
+/* AFE_ADDA_DL_SRC2_CON1 */
+#define DL_2_GAIN_CTL_PRE_SFT                               16
+#define DL_2_GAIN_CTL_PRE_MASK                              0xffff
+#define DL_2_GAIN_CTL_PRE_MASK_SFT                          (0xffff << 16)
+#define DL_2_GAIN_MODE_CTL_SFT                              0
+#define DL_2_GAIN_MODE_CTL_MASK                             0x1
+#define DL_2_GAIN_MODE_CTL_MASK_SFT                         (0x1 << 0)
+
+/* AFE_ADDA_UL_SRC_CON0 */
+#define C_COMB_OUT_SIN_GEN_CTL_SFT                          31
+#define C_COMB_OUT_SIN_GEN_CTL_MASK                         0x1
+#define C_COMB_OUT_SIN_GEN_CTL_MASK_SFT                     (0x1 << 31)
+#define C_BASEBAND_SIN_GEN_CTL_SFT                          30
+#define C_BASEBAND_SIN_GEN_CTL_MASK                         0x1
+#define C_BASEBAND_SIN_GEN_CTL_MASK_SFT                     (0x1 << 30)
+#define C_DIGMIC_PHASE_SEL_CH1_CTL_SFT                      27
+#define C_DIGMIC_PHASE_SEL_CH1_CTL_MASK                     0x7
+#define C_DIGMIC_PHASE_SEL_CH1_CTL_MASK_SFT                 (0x7 << 27)
+#define C_DIGMIC_PHASE_SEL_CH2_CTL_SFT                      24
+#define C_DIGMIC_PHASE_SEL_CH2_CTL_MASK                     0x7
+#define C_DIGMIC_PHASE_SEL_CH2_CTL_MASK_SFT                 (0x7 << 24)
+#define C_TWO_DIGITAL_MIC_CTL_SFT                           23
+#define C_TWO_DIGITAL_MIC_CTL_MASK                          0x1
+#define C_TWO_DIGITAL_MIC_CTL_MASK_SFT                      (0x1 << 23)
+#define UL_MODE_3P25M_CH2_CTL_SFT                           22
+#define UL_MODE_3P25M_CH2_CTL_MASK                          0x1
+#define UL_MODE_3P25M_CH2_CTL_MASK_SFT                      (0x1 << 22)
+#define UL_MODE_3P25M_CH1_CTL_SFT                           21
+#define UL_MODE_3P25M_CH1_CTL_MASK                          0x1
+#define UL_MODE_3P25M_CH1_CTL_MASK_SFT                      (0x1 << 21)
+#define UL_SRC_USE_CIC_OUT_CTL_SFT                          20
+#define UL_SRC_USE_CIC_OUT_CTL_MASK                         0x1
+#define UL_SRC_USE_CIC_OUT_CTL_MASK_SFT                     (0x1 << 20)
+#define UL_VOICE_MODE_CH1_CH2_CTL_SFT                       17
+#define UL_VOICE_MODE_CH1_CH2_CTL_MASK                      0x7
+#define UL_VOICE_MODE_CH1_CH2_CTL_MASK_SFT                  (0x7 << 17)
+#define DMIC_LOW_POWER_MODE_CTL_SFT                         14
+#define DMIC_LOW_POWER_MODE_CTL_MASK                        0x3
+#define DMIC_LOW_POWER_MODE_CTL_MASK_SFT                    (0x3 << 14)
+#define DMIC_48K_SEL_CTL_SFT                                13
+#define DMIC_48K_SEL_CTL_MASK                               0x1
+#define DMIC_48K_SEL_CTL_MASK_SFT                           (0x1 << 13)
+#define UL_DISABLE_HW_CG_CTL_SFT                            12
+#define UL_DISABLE_HW_CG_CTL_MASK                           0x1
+#define UL_DISABLE_HW_CG_CTL_MASK_SFT                       (0x1 << 12)
+#define UL_IIR_ON_TMP_CTL_SFT                               10
+#define UL_IIR_ON_TMP_CTL_MASK                              0x1
+#define UL_IIR_ON_TMP_CTL_MASK_SFT                          (0x1 << 10)
+#define UL_IIRMODE_CTL_SFT                                  7
+#define UL_IIRMODE_CTL_MASK                                 0x7
+#define UL_IIRMODE_CTL_MASK_SFT                             (0x7 << 7)
+#define DIGMIC_3P25M_1P625M_SEL_CTL_SFT                     5
+#define DIGMIC_3P25M_1P625M_SEL_CTL_MASK                    0x1
+#define DIGMIC_3P25M_1P625M_SEL_CTL_MASK_SFT                (0x1 << 5)
+#define AGC_260K_SEL_CH2_CTL_SFT                            4
+#define AGC_260K_SEL_CH2_CTL_MASK                           0x1
+#define AGC_260K_SEL_CH2_CTL_MASK_SFT                       (0x1 << 4)
+#define AGC_260K_SEL_CH1_CTL_SFT                            3
+#define AGC_260K_SEL_CH1_CTL_MASK                           0x1
+#define AGC_260K_SEL_CH1_CTL_MASK_SFT                       (0x1 << 3)
+#define UL_LOOP_BACK_MODE_CTL_SFT                           2
+#define UL_LOOP_BACK_MODE_CTL_MASK                          0x1
+#define UL_LOOP_BACK_MODE_CTL_MASK_SFT                      (0x1 << 2)
+#define UL_SDM_3_LEVEL_CTL_SFT                              1
+#define UL_SDM_3_LEVEL_CTL_MASK                             0x1
+#define UL_SDM_3_LEVEL_CTL_MASK_SFT                         (0x1 << 1)
+#define UL_SRC_ON_TMP_CTL_SFT                               0
+#define UL_SRC_ON_TMP_CTL_MASK                              0x1
+#define UL_SRC_ON_TMP_CTL_MASK_SFT                          (0x1 << 0)
+
+/* AFE_ADDA_UL_SRC_CON1 */
+#define C_SDM_RESET_CTL_SFT                                 31
+#define C_SDM_RESET_CTL_MASK                                0x1
+#define C_SDM_RESET_CTL_MASK_SFT                            (0x1 << 31)
+#define ADITHON_CTL_SFT                                     30
+#define ADITHON_CTL_MASK                                    0x1
+#define ADITHON_CTL_MASK_SFT                                (0x1 << 30)
+#define ADITHVAL_CTL_SFT                                    28
+#define ADITHVAL_CTL_MASK                                   0x3
+#define ADITHVAL_CTL_MASK_SFT                               (0x3 << 28)
+#define C_DAC_EN_CTL_SFT                                    27
+#define C_DAC_EN_CTL_MASK                                   0x1
+#define C_DAC_EN_CTL_MASK_SFT                               (0x1 << 27)
+#define C_MUTE_SW_CTL_SFT                                   26
+#define C_MUTE_SW_CTL_MASK                                  0x1
+#define C_MUTE_SW_CTL_MASK_SFT                              (0x1 << 26)
+#define ASDM_SRC_SEL_CTL_SFT                                25
+#define ASDM_SRC_SEL_CTL_MASK                               0x1
+#define ASDM_SRC_SEL_CTL_MASK_SFT                           (0x1 << 25)
+#define C_AMP_DIV_CH2_CTL_SFT                               21
+#define C_AMP_DIV_CH2_CTL_MASK                              0x7
+#define C_AMP_DIV_CH2_CTL_MASK_SFT                          (0x7 << 21)
+#define C_FREQ_DIV_CH2_CTL_SFT                              16
+#define C_FREQ_DIV_CH2_CTL_MASK                             0x1f
+#define C_FREQ_DIV_CH2_CTL_MASK_SFT                         (0x1f << 16)
+#define C_SINE_MODE_CH2_CTL_SFT                             12
+#define C_SINE_MODE_CH2_CTL_MASK                            0xf
+#define C_SINE_MODE_CH2_CTL_MASK_SFT                        (0xf << 12)
+#define C_AMP_DIV_CH1_CTL_SFT                               9
+#define C_AMP_DIV_CH1_CTL_MASK                              0x7
+#define C_AMP_DIV_CH1_CTL_MASK_SFT                          (0x7 << 9)
+#define C_FREQ_DIV_CH1_CTL_SFT                              4
+#define C_FREQ_DIV_CH1_CTL_MASK                             0x1f
+#define C_FREQ_DIV_CH1_CTL_MASK_SFT                         (0x1f << 4)
+#define C_SINE_MODE_CH1_CTL_SFT                             0
+#define C_SINE_MODE_CH1_CTL_MASK                            0xf
+#define C_SINE_MODE_CH1_CTL_MASK_SFT                        (0xf << 0)
+
+/* AFE_ADDA_TOP_CON0 */
+#define C_LOOP_BACK_MODE_CTL_SFT                            12
+#define C_LOOP_BACK_MODE_CTL_MASK                           0xf
+#define C_LOOP_BACK_MODE_CTL_MASK_SFT                       (0xf << 12)
+#define C_EXT_ADC_CTL_SFT                                   0
+#define C_EXT_ADC_CTL_MASK                                  0x1
+#define C_EXT_ADC_CTL_MASK_SFT                              (0x1 << 0)
+
+/* AFE_ADDA_UL_DL_CON0 */
+#define AFE_UL_DL_CON0_RESERVED_SFT                         1
+#define AFE_UL_DL_CON0_RESERVED_MASK                        0x3fff
+#define AFE_UL_DL_CON0_RESERVED_MASK_SFT                    (0x3fff << 1)
+#define ADDA_AFE_ON_SFT                                     0
+#define ADDA_AFE_ON_MASK                                    0x1
+#define ADDA_AFE_ON_MASK_SFT                                (0x1 << 0)
+
+/* AFE_IRQ_MCU_CON */
+#define IRQ7_MCU_MODE_SFT                                   24
+#define IRQ7_MCU_MODE_MASK                                  0xf
+#define IRQ7_MCU_MODE_MASK_SFT                              (0xf << 24)
+#define IRQ4_MCU_MODE_SFT                                   20
+#define IRQ4_MCU_MODE_MASK                                  0xf
+#define IRQ4_MCU_MODE_MASK_SFT                              (0xf << 20)
+#define IRQ3_MCU_MODE_SFT                                   16
+#define IRQ3_MCU_MODE_MASK                                  0xf
+#define IRQ3_MCU_MODE_MASK_SFT                              (0xf << 16)
+#define IRQ7_MCU_ON_SFT                                     14
+#define IRQ7_MCU_ON_MASK                                    0x1
+#define IRQ7_MCU_ON_MASK_SFT                                (0x1 << 14)
+#define IRQ5_MCU_ON_SFT                                     12
+#define IRQ5_MCU_ON_MASK                                    0x1
+#define IRQ5_MCU_ON_MASK_SFT                                (0x1 << 12)
+#define IRQ2_MCU_MODE_SFT                                   8
+#define IRQ2_MCU_MODE_MASK                                  0xf
+#define IRQ2_MCU_MODE_MASK_SFT                              (0xf << 8)
+#define IRQ1_MCU_MODE_SFT                                   4
+#define IRQ1_MCU_MODE_MASK                                  0xf
+#define IRQ1_MCU_MODE_MASK_SFT                              (0xf << 4)
+#define IRQ4_MCU_ON_SFT                                     3
+#define IRQ4_MCU_ON_MASK                                    0x1
+#define IRQ4_MCU_ON_MASK_SFT                                (0x1 << 3)
+#define IRQ3_MCU_ON_SFT                                     2
+#define IRQ3_MCU_ON_MASK                                    0x1
+#define IRQ3_MCU_ON_MASK_SFT                                (0x1 << 2)
+#define IRQ2_MCU_ON_SFT                                     1
+#define IRQ2_MCU_ON_MASK                                    0x1
+#define IRQ2_MCU_ON_MASK_SFT                                (0x1 << 1)
+#define IRQ1_MCU_ON_SFT                                     0
+#define IRQ1_MCU_ON_MASK                                    0x1
+#define IRQ1_MCU_ON_MASK_SFT                                (0x1 << 0)
+
+/* AFE_IRQ_MCU_EN */
+#define AFE_IRQ_CM4_EN_SFT                                  16
+#define AFE_IRQ_CM4_EN_MASK                                 0x7f
+#define AFE_IRQ_CM4_EN_MASK_SFT                             (0x7f << 16)
+#define AFE_IRQ_MD32_EN_SFT                                 8
+#define AFE_IRQ_MD32_EN_MASK                                0x7f
+#define AFE_IRQ_MD32_EN_MASK_SFT                            (0x7f << 8)
+#define AFE_IRQ_MCU_EN_SFT                                  0
+#define AFE_IRQ_MCU_EN_MASK                                 0x7f
+#define AFE_IRQ_MCU_EN_MASK_SFT                             (0x7f << 0)
+
+/* AFE_IRQ_MCU_CLR */
+#define IRQ7_MCU_CLR_SFT                                    6
+#define IRQ7_MCU_CLR_MASK                                   0x1
+#define IRQ7_MCU_CLR_MASK_SFT                               (0x1 << 6)
+#define IRQ5_MCU_CLR_SFT                                    4
+#define IRQ5_MCU_CLR_MASK                                   0x1
+#define IRQ5_MCU_CLR_MASK_SFT                               (0x1 << 4)
+#define IRQ4_MCU_CLR_SFT                                    3
+#define IRQ4_MCU_CLR_MASK                                   0x1
+#define IRQ4_MCU_CLR_MASK_SFT                               (0x1 << 3)
+#define IRQ3_MCU_CLR_SFT                                    2
+#define IRQ3_MCU_CLR_MASK                                   0x1
+#define IRQ3_MCU_CLR_MASK_SFT                               (0x1 << 2)
+#define IRQ2_MCU_CLR_SFT                                    1
+#define IRQ2_MCU_CLR_MASK                                   0x1
+#define IRQ2_MCU_CLR_MASK_SFT                               (0x1 << 1)
+#define IRQ1_MCU_CLR_SFT                                    0
+#define IRQ1_MCU_CLR_MASK                                   0x1
+#define IRQ1_MCU_CLR_MASK_SFT                               (0x1 << 0)
+
+/* AFE_IRQ_MCU_CNT1 */
+#define AFE_IRQ_MCU_CNT1_SFT                                0
+#define AFE_IRQ_MCU_CNT1_MASK                               0x3ffff
+#define AFE_IRQ_MCU_CNT1_MASK_SFT                           (0x3ffff << 0)
+
+/* AFE_IRQ_MCU_CNT2 */
+#define AFE_IRQ_MCU_CNT2_SFT                                0
+#define AFE_IRQ_MCU_CNT2_MASK                               0x3ffff
+#define AFE_IRQ_MCU_CNT2_MASK_SFT                           (0x3ffff << 0)
+
+/* AFE_IRQ_MCU_CNT3 */
+#define AFE_IRQ_MCU_CNT3_SFT                                0
+#define AFE_IRQ_MCU_CNT3_MASK                               0x3ffff
+#define AFE_IRQ_MCU_CNT3_MASK_SFT                           (0x3ffff << 0)
+
+/* AFE_IRQ_MCU_CNT4 */
+#define AFE_IRQ_MCU_CNT4_SFT                                0
+#define AFE_IRQ_MCU_CNT4_MASK                               0x3ffff
+#define AFE_IRQ_MCU_CNT4_MASK_SFT                           (0x3ffff << 0)
+
+/* AFE_IRQ_MCU_CNT5 */
+#define AFE_IRQ_MCU_CNT5_SFT                                0
+#define AFE_IRQ_MCU_CNT5_MASK                               0x3ffff
+#define AFE_IRQ_MCU_CNT5_MASK_SFT                           (0x3ffff << 0)
+
+/* AFE_IRQ_MCU_CNT7 */
+#define AFE_IRQ_MCU_CNT7_SFT                                0
+#define AFE_IRQ_MCU_CNT7_MASK                               0x3ffff
+#define AFE_IRQ_MCU_CNT7_MASK_SFT                           (0x3ffff << 0)
+
+/* AFE_MEMIF_MSB */
+#define CPU_COMPACT_MODE_SFT                                23
+#define CPU_COMPACT_MODE_MASK                               0x1
+#define CPU_COMPACT_MODE_MASK_SFT                           (0x1 << 23)
+#define CPU_HD_ALIGN_SFT                                    22
+#define CPU_HD_ALIGN_MASK                                   0x1
+#define CPU_HD_ALIGN_MASK_SFT                               (0x1 << 22)
+
+/* AFE_MEMIF_HD_MODE */
+#define HDMI_HD_SFT                                         20
+#define HDMI_HD_MASK                                        0x3
+#define HDMI_HD_MASK_SFT                                    (0x3 << 20)
+#define MOD_DAI_HD_SFT                                      18
+#define MOD_DAI_HD_MASK                                     0x3
+#define MOD_DAI_HD_MASK_SFT                                 (0x3 << 18)
+#define DAI_HD_SFT                                          16
+#define DAI_HD_MASK                                         0x3
+#define DAI_HD_MASK_SFT                                     (0x3 << 16)
+#define VUL_DATA2_HD_SFT                                    12
+#define VUL_DATA2_HD_MASK                                   0x3
+#define VUL_DATA2_HD_MASK_SFT                               (0x3 << 12)
+#define VUL_HD_SFT                                          10
+#define VUL_HD_MASK                                         0x3
+#define VUL_HD_MASK_SFT                                     (0x3 << 10)
+#define AWB_HD_SFT                                          8
+#define AWB_HD_MASK                                         0x3
+#define AWB_HD_MASK_SFT                                     (0x3 << 8)
+#define DL3_HD_SFT                                          6
+#define DL3_HD_MASK                                         0x3
+#define DL3_HD_MASK_SFT                                     (0x3 << 6)
+#define DL2_HD_SFT                                          4
+#define DL2_HD_MASK                                         0x3
+#define DL2_HD_MASK_SFT                                     (0x3 << 4)
+#define DL1_DATA2_HD_SFT                                    2
+#define DL1_DATA2_HD_MASK                                   0x3
+#define DL1_DATA2_HD_MASK_SFT                               (0x3 << 2)
+#define DL1_HD_SFT                                          0
+#define DL1_HD_MASK                                         0x3
+#define DL1_HD_MASK_SFT                                     (0x3 << 0)
+
+/* AFE_MEMIF_HDALIGN */
+#define HDMI_NORMAL_MODE_SFT                                26
+#define HDMI_NORMAL_MODE_MASK                               0x1
+#define HDMI_NORMAL_MODE_MASK_SFT                           (0x1 << 26)
+#define MOD_DAI_NORMAL_MODE_SFT                             25
+#define MOD_DAI_NORMAL_MODE_MASK                            0x1
+#define MOD_DAI_NORMAL_MODE_MASK_SFT                        (0x1 << 25)
+#define DAI_NORMAL_MODE_SFT                                 24
+#define DAI_NORMAL_MODE_MASK                                0x1
+#define DAI_NORMAL_MODE_MASK_SFT                            (0x1 << 24)
+#define VUL_DATA2_NORMAL_MODE_SFT                           22
+#define VUL_DATA2_NORMAL_MODE_MASK                          0x1
+#define VUL_DATA2_NORMAL_MODE_MASK_SFT                      (0x1 << 22)
+#define VUL_NORMAL_MODE_SFT                                 21
+#define VUL_NORMAL_MODE_MASK                                0x1
+#define VUL_NORMAL_MODE_MASK_SFT                            (0x1 << 21)
+#define AWB_NORMAL_MODE_SFT                                 20
+#define AWB_NORMAL_MODE_MASK                                0x1
+#define AWB_NORMAL_MODE_MASK_SFT                            (0x1 << 20)
+#define DL3_NORMAL_MODE_SFT                                 19
+#define DL3_NORMAL_MODE_MASK                                0x1
+#define DL3_NORMAL_MODE_MASK_SFT                            (0x1 << 19)
+#define DL2_NORMAL_MODE_SFT                                 18
+#define DL2_NORMAL_MODE_MASK                                0x1
+#define DL2_NORMAL_MODE_MASK_SFT                            (0x1 << 18)
+#define DL1_DATA2_NORMAL_MODE_SFT                           17
+#define DL1_DATA2_NORMAL_MODE_MASK                          0x1
+#define DL1_DATA2_NORMAL_MODE_MASK_SFT                      (0x1 << 17)
+#define DL1_NORMAL_MODE_SFT                                 16
+#define DL1_NORMAL_MODE_MASK                                0x1
+#define DL1_NORMAL_MODE_MASK_SFT                            (0x1 << 16)
+#define HDMI_HD_ALIGN_SFT                                   10
+#define HDMI_HD_ALIGN_MASK                                  0x1
+#define HDMI_HD_ALIGN_MASK_SFT                              (0x1 << 10)
+#define MOD_DAI_HD_ALIGN_SFT                                9
+#define MOD_DAI_HD_ALIGN_MASK                               0x1
+#define MOD_DAI_HD_ALIGN_MASK_SFT                           (0x1 << 9)
+#define DAI_ALIGN_SFT                                       8
+#define DAI_ALIGN_MASK                                      0x1
+#define DAI_ALIGN_MASK_SFT                                  (0x1 << 8)
+#define VUL2_HD_ALIGN_SFT                                   7
+#define VUL2_HD_ALIGN_MASK                                  0x1
+#define VUL2_HD_ALIGN_MASK_SFT                              (0x1 << 7)
+#define VUL_DATA2_HD_ALIGN_SFT                              6
+#define VUL_DATA2_HD_ALIGN_MASK                             0x1
+#define VUL_DATA2_HD_ALIGN_MASK_SFT                         (0x1 << 6)
+#define VUL_HD_ALIGN_SFT                                    5
+#define VUL_HD_ALIGN_MASK                                   0x1
+#define VUL_HD_ALIGN_MASK_SFT                               (0x1 << 5)
+#define AWB_HD_ALIGN_SFT                                    4
+#define AWB_HD_ALIGN_MASK                                   0x1
+#define AWB_HD_ALIGN_MASK_SFT                               (0x1 << 4)
+#define DL3_HD_ALIGN_SFT                                    3
+#define DL3_HD_ALIGN_MASK                                   0x1
+#define DL3_HD_ALIGN_MASK_SFT                               (0x1 << 3)
+#define DL2_HD_ALIGN_SFT                                    2
+#define DL2_HD_ALIGN_MASK                                   0x1
+#define DL2_HD_ALIGN_MASK_SFT                               (0x1 << 2)
+#define DL1_DATA2_HD_ALIGN_SFT                              1
+#define DL1_DATA2_HD_ALIGN_MASK                             0x1
+#define DL1_DATA2_HD_ALIGN_MASK_SFT                         (0x1 << 1)
+#define DL1_HD_ALIGN_SFT                                    0
+#define DL1_HD_ALIGN_MASK                                   0x1
+#define DL1_HD_ALIGN_MASK_SFT                               (0x1 << 0)
+
+/* PCM_INTF_CON1 */
+#define PCM_FIX_VALUE_SEL_SFT                               31
+#define PCM_FIX_VALUE_SEL_MASK                              0x1
+#define PCM_FIX_VALUE_SEL_MASK_SFT                          (0x1 << 31)
+#define PCM_BUFFER_LOOPBACK_SFT                             30
+#define PCM_BUFFER_LOOPBACK_MASK                            0x1
+#define PCM_BUFFER_LOOPBACK_MASK_SFT                        (0x1 << 30)
+#define PCM_PARALLEL_LOOPBACK_SFT                           29
+#define PCM_PARALLEL_LOOPBACK_MASK                          0x1
+#define PCM_PARALLEL_LOOPBACK_MASK_SFT                      (0x1 << 29)
+#define PCM_SERIAL_LOOPBACK_SFT                             28
+#define PCM_SERIAL_LOOPBACK_MASK                            0x1
+#define PCM_SERIAL_LOOPBACK_MASK_SFT                        (0x1 << 28)
+#define PCM_DAI_PCM_LOOPBACK_SFT                            27
+#define PCM_DAI_PCM_LOOPBACK_MASK                           0x1
+#define PCM_DAI_PCM_LOOPBACK_MASK_SFT                       (0x1 << 27)
+#define PCM_I2S_PCM_LOOPBACK_SFT                            26
+#define PCM_I2S_PCM_LOOPBACK_MASK                           0x1
+#define PCM_I2S_PCM_LOOPBACK_MASK_SFT                       (0x1 << 26)
+#define PCM_SYNC_DELSEL_SFT                                 25
+#define PCM_SYNC_DELSEL_MASK                                0x1
+#define PCM_SYNC_DELSEL_MASK_SFT                            (0x1 << 25)
+#define PCM_TX_LR_SWAP_SFT                                  24
+#define PCM_TX_LR_SWAP_MASK                                 0x1
+#define PCM_TX_LR_SWAP_MASK_SFT                             (0x1 << 24)
+#define PCM_SYNC_OUT_INV_SFT                                23
+#define PCM_SYNC_OUT_INV_MASK                               0x1
+#define PCM_SYNC_OUT_INV_MASK_SFT                           (0x1 << 23)
+#define PCM_BCLK_OUT_INV_SFT                                22
+#define PCM_BCLK_OUT_INV_MASK                               0x1
+#define PCM_BCLK_OUT_INV_MASK_SFT                           (0x1 << 22)
+#define PCM_SYNC_IN_INV_SFT                                 21
+#define PCM_SYNC_IN_INV_MASK                                0x1
+#define PCM_SYNC_IN_INV_MASK_SFT                            (0x1 << 21)
+#define PCM_BCLK_IN_INV_SFT                                 20
+#define PCM_BCLK_IN_INV_MASK                                0x1
+#define PCM_BCLK_IN_INV_MASK_SFT                            (0x1 << 20)
+#define PCM_TX_LCH_RPT_SFT                                  19
+#define PCM_TX_LCH_RPT_MASK                                 0x1
+#define PCM_TX_LCH_RPT_MASK_SFT                             (0x1 << 19)
+#define PCM_VBT_16K_MODE_SFT                                18
+#define PCM_VBT_16K_MODE_MASK                               0x1
+#define PCM_VBT_16K_MODE_MASK_SFT                           (0x1 << 18)
+#define PCM_EXT_MODEM_SFT                                   17
+#define PCM_EXT_MODEM_MASK                                  0x1
+#define PCM_EXT_MODEM_MASK_SFT                              (0x1 << 17)
+#define PCM_24BIT_SFT                                       16
+#define PCM_24BIT_MASK                                      0x1
+#define PCM_24BIT_MASK_SFT                                  (0x1 << 16)
+#define PCM_WLEN_SFT                                        14
+#define PCM_WLEN_MASK                                       0x3
+#define PCM_WLEN_MASK_SFT                                   (0x3 << 14)
+#define PCM_SYNC_LENGTH_SFT                                 9
+#define PCM_SYNC_LENGTH_MASK                                0x1f
+#define PCM_SYNC_LENGTH_MASK_SFT                            (0x1f << 9)
+#define PCM_SYNC_TYPE_SFT                                   8
+#define PCM_SYNC_TYPE_MASK                                  0x1
+#define PCM_SYNC_TYPE_MASK_SFT                              (0x1 << 8)
+#define PCM_BT_MODE_SFT                                     7
+#define PCM_BT_MODE_MASK                                    0x1
+#define PCM_BT_MODE_MASK_SFT                                (0x1 << 7)
+#define PCM_BYP_ASRC_SFT                                    6
+#define PCM_BYP_ASRC_MASK                                   0x1
+#define PCM_BYP_ASRC_MASK_SFT                               (0x1 << 6)
+#define PCM_SLAVE_SFT                                       5
+#define PCM_SLAVE_MASK                                      0x1
+#define PCM_SLAVE_MASK_SFT                                  (0x1 << 5)
+#define PCM_MODE_SFT                                        3
+#define PCM_MODE_MASK                                       0x3
+#define PCM_MODE_MASK_SFT                                   (0x3 << 3)
+#define PCM_FMT_SFT                                         1
+#define PCM_FMT_MASK                                        0x3
+#define PCM_FMT_MASK_SFT                                    (0x3 << 1)
+#define PCM_EN_SFT                                          0
+#define PCM_EN_MASK                                         0x1
+#define PCM_EN_MASK_SFT                                     (0x1 << 0)
+
+/* PCM_INTF_CON2 */
+#define PCM1_TX_FIFO_OV_SFT                                 31
+#define PCM1_TX_FIFO_OV_MASK                                0x1
+#define PCM1_TX_FIFO_OV_MASK_SFT                            (0x1 << 31)
+#define PCM1_RX_FIFO_OV_SFT                                 30
+#define PCM1_RX_FIFO_OV_MASK                                0x1
+#define PCM1_RX_FIFO_OV_MASK_SFT                            (0x1 << 30)
+#define PCM2_TX_FIFO_OV_SFT                                 29
+#define PCM2_TX_FIFO_OV_MASK                                0x1
+#define PCM2_TX_FIFO_OV_MASK_SFT                            (0x1 << 29)
+#define PCM2_RX_FIFO_OV_SFT                                 28
+#define PCM2_RX_FIFO_OV_MASK                                0x1
+#define PCM2_RX_FIFO_OV_MASK_SFT                            (0x1 << 28)
+#define PCM1_SYNC_GLITCH_SFT                                27
+#define PCM1_SYNC_GLITCH_MASK                               0x1
+#define PCM1_SYNC_GLITCH_MASK_SFT                           (0x1 << 27)
+#define PCM2_SYNC_GLITCH_SFT                                26
+#define PCM2_SYNC_GLITCH_MASK                               0x1
+#define PCM2_SYNC_GLITCH_MASK_SFT                           (0x1 << 26)
+#define PCM1_PCM2_LOOPBACK_SFT                              15
+#define PCM1_PCM2_LOOPBACK_MASK                             0x1
+#define PCM1_PCM2_LOOPBACK_MASK_SFT                         (0x1 << 15)
+#define DAI_PCM_LOOPBACK_CH_SFT                             13
+#define DAI_PCM_LOOPBACK_CH_MASK                            0x1
+#define DAI_PCM_LOOPBACK_CH_MASK_SFT                        (0x1 << 13)
+#define I2S_PCM_LOOPBACK_CH_SFT                             12
+#define I2S_PCM_LOOPBACK_CH_MASK                            0x1
+#define I2S_PCM_LOOPBACK_CH_MASK_SFT                        (0x1 << 12)
+#define PCM_USE_MD3_SFT                                     8
+#define PCM_USE_MD3_MASK                                    0x1
+#define PCM_USE_MD3_MASK_SFT                                (0x1 << 8)
+#define TX_FIX_VALUE_SFT                                    0
+#define TX_FIX_VALUE_MASK                                   0xff
+#define TX_FIX_VALUE_MASK_SFT                               (0xff << 0)
+
+/* PCM2_INTF_CON */
+#define PCM2_TX_FIX_VALUE_SFT                                24
+#define PCM2_TX_FIX_VALUE_MASK                               0xff
+#define PCM2_TX_FIX_VALUE_MASK_SFT                           (0xff << 24)
+#define PCM2_FIX_VALUE_SEL_SFT                               23
+#define PCM2_FIX_VALUE_SEL_MASK                              0x1
+#define PCM2_FIX_VALUE_SEL_MASK_SFT                          (0x1 << 23)
+#define PCM2_BUFFER_LOOPBACK_SFT                             22
+#define PCM2_BUFFER_LOOPBACK_MASK                            0x1
+#define PCM2_BUFFER_LOOPBACK_MASK_SFT                        (0x1 << 22)
+#define PCM2_PARALLEL_LOOPBACK_SFT                           21
+#define PCM2_PARALLEL_LOOPBACK_MASK                          0x1
+#define PCM2_PARALLEL_LOOPBACK_MASK_SFT                      (0x1 << 21)
+#define PCM2_SERIAL_LOOPBACK_SFT                             20
+#define PCM2_SERIAL_LOOPBACK_MASK                            0x1
+#define PCM2_SERIAL_LOOPBACK_MASK_SFT                        (0x1 << 20)
+#define PCM2_DAI_PCM_LOOPBACK_SFT                            19
+#define PCM2_DAI_PCM_LOOPBACK_MASK                           0x1
+#define PCM2_DAI_PCM_LOOPBACK_MASK_SFT                       (0x1 << 19)
+#define PCM2_I2S_PCM_LOOPBACK_SFT                            18
+#define PCM2_I2S_PCM_LOOPBACK_MASK                           0x1
+#define PCM2_I2S_PCM_LOOPBACK_MASK_SFT                       (0x1 << 18)
+#define PCM2_SYNC_DELSEL_SFT                                 17
+#define PCM2_SYNC_DELSEL_MASK                                0x1
+#define PCM2_SYNC_DELSEL_MASK_SFT                            (0x1 << 17)
+#define PCM2_TX_LR_SWAP_SFT                                  16
+#define PCM2_TX_LR_SWAP_MASK                                 0x1
+#define PCM2_TX_LR_SWAP_MASK_SFT                             (0x1 << 16)
+#define PCM2_SYNC_IN_INV_SFT                                 15
+#define PCM2_SYNC_IN_INV_MASK                                0x1
+#define PCM2_SYNC_IN_INV_MASK_SFT                            (0x1 << 15)
+#define PCM2_BCLK_IN_INV_SFT                                 14
+#define PCM2_BCLK_IN_INV_MASK                                0x1
+#define PCM2_BCLK_IN_INV_MASK_SFT                            (0x1 << 14)
+#define PCM2_TX_LCH_RPT_SFT                                  13
+#define PCM2_TX_LCH_RPT_MASK                                 0x1
+#define PCM2_TX_LCH_RPT_MASK_SFT                             (0x1 << 13)
+#define PCM2_VBT_16K_MODE_SFT                                12
+#define PCM2_VBT_16K_MODE_MASK                               0x1
+#define PCM2_VBT_16K_MODE_MASK_SFT                           (0x1 << 12)
+#define PCM2_LOOPBACK_CH_SEL_SFT                             10
+#define PCM2_LOOPBACK_CH_SEL_MASK                            0x3
+#define PCM2_LOOPBACK_CH_SEL_MASK_SFT                        (0x3 << 10)
+#define PCM2_TX2_BT_MODE_SFT                                 8
+#define PCM2_TX2_BT_MODE_MASK                                0x1
+#define PCM2_TX2_BT_MODE_MASK_SFT                            (0x1 << 8)
+#define PCM2_BT_MODE_SFT                                     7
+#define PCM2_BT_MODE_MASK                                    0x1
+#define PCM2_BT_MODE_MASK_SFT                                (0x1 << 7)
+#define PCM2_AFIFO_SFT                                       6
+#define PCM2_AFIFO_MASK                                      0x1
+#define PCM2_AFIFO_MASK_SFT                                  (0x1 << 6)
+#define PCM2_WLEN_SFT                                        5
+#define PCM2_WLEN_MASK                                       0x1
+#define PCM2_WLEN_MASK_SFT                                   (0x1 << 5)
+#define PCM2_MODE_SFT                                        3
+#define PCM2_MODE_MASK                                       0x3
+#define PCM2_MODE_MASK_SFT                                   (0x3 << 3)
+#define PCM2_FMT_SFT                                         1
+#define PCM2_FMT_MASK                                        0x3
+#define PCM2_FMT_MASK_SFT                                    (0x3 << 1)
+#define PCM2_EN_SFT                                          0
+#define PCM2_EN_MASK                                         0x1
+#define PCM2_EN_MASK_SFT                                     (0x1 << 0)
+#endif
diff --git a/sound/soc/mediatek/mt8173/mt8173-afe-common.h b/sound/soc/mediatek/mt8173/mt8173-afe-common.h
index 9a4837c..396fe23 100644
--- a/sound/soc/mediatek/mt8173/mt8173-afe-common.h
+++ b/sound/soc/mediatek/mt8173/mt8173-afe-common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * mt8173_afe_common.h  --  Mediatek 8173 audio driver common definitions
  *
@@ -6,15 +7,6 @@
  *             Sascha Hauer <s.hauer@pengutronix.de>
  *             Hidalgo Huang <hidalgo.huang@mediatek.com>
  *             Ir Lian <ir.lian@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef _MT8173_AFE_COMMON_H_
diff --git a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c
index 65d1433..c0b6697 100644
--- a/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c
+++ b/sound/soc/mediatek/mt8173/mt8173-afe-pcm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Mediatek 8173 ALSA SoC AFE platform driver
  *
@@ -6,15 +7,6 @@
  *             Sascha Hauer <s.hauer@pengutronix.de>
  *             Hidalgo Huang <hidalgo.huang@mediatek.com>
  *             Ir Lian <ir.lian@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/delay.h>
@@ -303,9 +295,7 @@
 static int mt8173_afe_i2s_startup(struct snd_pcm_substream *substream,
 				  struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 
 	if (dai->active)
 		return 0;
@@ -318,9 +308,7 @@
 static void mt8173_afe_i2s_shutdown(struct snd_pcm_substream *substream,
 				    struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 
 	if (dai->active)
 		return;
@@ -334,10 +322,8 @@
 static int mt8173_afe_i2s_prepare(struct snd_pcm_substream *substream,
 				  struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_pcm_runtime * const runtime = substream->runtime;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mt8173_afe_private *afe_priv = afe->platform_priv;
 	int ret;
 
@@ -358,9 +344,7 @@
 static int mt8173_afe_hdmi_startup(struct snd_pcm_substream *substream,
 				   struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mt8173_afe_private *afe_priv = afe->platform_priv;
 
 	if (dai->active)
@@ -374,9 +358,7 @@
 static void mt8173_afe_hdmi_shutdown(struct snd_pcm_substream *substream,
 				     struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mt8173_afe_private *afe_priv = afe->platform_priv;
 
 	if (dai->active)
@@ -389,10 +371,8 @@
 static int mt8173_afe_hdmi_prepare(struct snd_pcm_substream *substream,
 				   struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_pcm_runtime * const runtime = substream->runtime;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 	struct mt8173_afe_private *afe_priv = afe->platform_priv;
 
 	unsigned int val;
@@ -454,9 +434,7 @@
 static int mt8173_afe_hdmi_trigger(struct snd_pcm_substream *substream, int cmd,
 				   struct snd_soc_dai *dai)
 {
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, AFE_PCM_NAME);
-	struct mtk_base_afe *afe = snd_soc_component_get_drvdata(component);
+	struct mtk_base_afe *afe = snd_soc_dai_get_drvdata(dai);
 
 	dev_info(afe->dev, "%s cmd=%d %s\n", __func__, cmd, dai->name);
 
diff --git a/sound/soc/mediatek/mt8173/mt8173-max98090.c b/sound/soc/mediatek/mt8173/mt8173-max98090.c
index b49b527..902d111 100644
--- a/sound/soc/mediatek/mt8173/mt8173-max98090.c
+++ b/sound/soc/mediatek/mt8173/mt8173-max98090.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * mt8173-max98090.c  --  MT8173 MAX98090 ALSA SoC machine driver
  *
  * Copyright (c) 2015 MediaTek Inc.
  * Author: Koro Chen <koro.chen@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c
index 904f3ee..582174d 100644
--- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c
+++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * mt8173-rt5650-rt5514.c  --  MT8173 machine driver with RT5650/5514 codecs
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Koro Chen <koro.chen@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c
index 9c61b8c..b3670c8 100644
--- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c
+++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * mt8173-rt5650-rt5676.c  --  MT8173 machine driver with RT5650/5676 codecs
  *
  * Copyright (c) 2015 MediaTek Inc.
  * Author: Koro Chen <koro.chen@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650.c b/sound/soc/mediatek/mt8173/mt8173-rt5650.c
index 84aa09d..7a89b4a 100644
--- a/sound/soc/mediatek/mt8173/mt8173-rt5650.c
+++ b/sound/soc/mediatek/mt8173/mt8173-rt5650.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * mt8173-rt5650.c  --  MT8173 machine driver with RT5650 codecs
  *
  * Copyright (c) 2016 MediaTek Inc.
  * Author: Koro Chen <koro.chen@mediatek.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/module.h>
diff --git a/sound/soc/omap/Kconfig b/sound/soc/omap/Kconfig
index f5451c7..6dccea6 100644
--- a/sound/soc/omap/Kconfig
+++ b/sound/soc/omap/Kconfig
@@ -1,7 +1,12 @@
 config SND_OMAP_SOC
-	tristate "SoC Audio for the Texas Instruments OMAP chips"
+	tristate "SoC Audio for Texas Instruments OMAP chips (deprecated)"
 	depends on (ARCH_OMAP && DMA_OMAP) || (ARM && COMPILE_TEST)
-	select SND_DMAENGINE_PCM
+	select SND_SDMA_SOC
+
+config SND_SDMA_SOC
+	tristate "SoC Audio for Texas Instruments chips using sDMA"
+	depends on DMA_OMAP || COMPILE_TEST
+	select SND_SOC_GENERIC_DMAENGINE_PCM
 
 config SND_OMAP_SOC_DMIC
 	tristate
@@ -14,7 +19,7 @@
 
 config SND_OMAP_SOC_HDMI_AUDIO
 	tristate "HDMI audio support for OMAP4+ based SoCs"
-	depends on SND_OMAP_SOC
+	depends on SND_SDMA_SOC
 	help
 	  For HDMI audio to work OMAPDSS HDMI support should be
 	  enabled.
@@ -29,8 +34,7 @@
 
 config SND_OMAP_SOC_N810
 	tristate "SoC Audio support for Nokia N810"
-	depends on SND_OMAP_SOC && MACH_NOKIA_N810 && I2C
-	depends on OMAP_MUX
+	depends on SND_SDMA_SOC && MACH_NOKIA_N810 && I2C
 	select SND_OMAP_SOC_MCBSP
 	select SND_SOC_TLV320AIC3X
 	help
@@ -38,7 +42,7 @@
 
 config SND_OMAP_SOC_RX51
 	tristate "SoC Audio support for Nokia N900 (RX-51)"
-	depends on SND_OMAP_SOC && ARM && I2C
+	depends on SND_SDMA_SOC && ARM && I2C
 	select SND_OMAP_SOC_MCBSP
 	select SND_SOC_TLV320AIC3X
 	select SND_SOC_TPA6130A2
@@ -49,7 +53,7 @@
 
 config SND_OMAP_SOC_AMS_DELTA
 	tristate "SoC Audio support for Amstrad E3 (Delta) videophone"
-	depends on SND_OMAP_SOC && MACH_AMS_DELTA && TTY
+	depends on SND_SDMA_SOC && MACH_AMS_DELTA && TTY
 	select SND_OMAP_SOC_MCBSP
 	select SND_SOC_CX20442
 	help
@@ -68,7 +72,7 @@
 
 config SND_OMAP_SOC_OSK5912
 	tristate "SoC Audio support for omap osk5912"
-	depends on SND_OMAP_SOC && MACH_OMAP_OSK && I2C
+	depends on SND_SDMA_SOC && MACH_OMAP_OSK && I2C
 	select SND_OMAP_SOC_MCBSP
 	select SND_SOC_TLV320AIC23_I2C
 	help
@@ -76,7 +80,7 @@
 
 config SND_OMAP_SOC_AM3517EVM
 	tristate "SoC Audio support for OMAP3517 / AM3517 EVM"
-	depends on SND_OMAP_SOC && MACH_OMAP3517EVM && I2C
+	depends on SND_SDMA_SOC && MACH_OMAP3517EVM && I2C
 	select SND_OMAP_SOC_MCBSP
 	select SND_SOC_TLV320AIC23_I2C
 	help
@@ -85,7 +89,7 @@
 
 config SND_OMAP_SOC_OMAP_TWL4030
 	tristate "SoC Audio support for TI SoC based boards with twl4030 codec"
-	depends on TWL4030_CORE && SND_OMAP_SOC
+	depends on TWL4030_CORE && SND_SDMA_SOC
 	select SND_OMAP_SOC_MCBSP
 	select SND_SOC_TWL4030
 	help
@@ -100,7 +104,7 @@
 
 config SND_OMAP_SOC_OMAP_ABE_TWL6040
 	tristate "SoC Audio support for OMAP boards using ABE and twl6040 codec"
-	depends on TWL6040_CORE && SND_OMAP_SOC && COMMON_CLK
+	depends on TWL6040_CORE && SND_SDMA_SOC && COMMON_CLK
 	depends on ARCH_OMAP4 || (SOC_OMAP5 && MFD_PALMAS) || COMPILE_TEST
 	select SND_OMAP_SOC_DMIC
 	select SND_OMAP_SOC_MCPDM
@@ -118,7 +122,7 @@
 
 config SND_OMAP_SOC_OMAP3_PANDORA
 	tristate "SoC Audio support for OMAP3 Pandora"
-	depends on TWL4030_CORE && SND_OMAP_SOC && MACH_OMAP3_PANDORA
+	depends on TWL4030_CORE && SND_SDMA_SOC && MACH_OMAP3_PANDORA
 	select SND_OMAP_SOC_MCBSP
 	select SND_SOC_TWL4030
 	help
diff --git a/sound/soc/omap/Makefile b/sound/soc/omap/Makefile
index a6785dc..53eba34 100644
--- a/sound/soc/omap/Makefile
+++ b/sound/soc/omap/Makefile
@@ -1,12 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
 # OMAP Platform Support
-snd-soc-omap-objs := omap-pcm.o
+snd-soc-sdma-objs := sdma-pcm.o
 snd-soc-omap-dmic-objs := omap-dmic.o
 snd-soc-omap-mcbsp-objs := omap-mcbsp.o mcbsp.o
 snd-soc-omap-mcpdm-objs := omap-mcpdm.o
 snd-soc-omap-hdmi-audio-objs := omap-hdmi-audio.o
 
-obj-$(CONFIG_SND_OMAP_SOC) += snd-soc-omap.o
+obj-$(CONFIG_SND_SDMA_SOC) += snd-soc-sdma.o
 obj-$(CONFIG_SND_OMAP_SOC_DMIC) += snd-soc-omap-dmic.o
 obj-$(CONFIG_SND_OMAP_SOC_MCBSP) += snd-soc-omap-mcbsp.o
 obj-$(CONFIG_SND_OMAP_SOC_MCPDM) += snd-soc-omap-mcpdm.o
diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c
index 71e5f31..9cfefe4 100644
--- a/sound/soc/omap/n810.c
+++ b/sound/soc/omap/n810.c
@@ -80,9 +80,9 @@
 	else
 		snd_soc_dapm_disable_pin_unlocked(dapm, "Headphone Jack");
 	if (line1l)
-		snd_soc_dapm_enable_pin_unlocked(dapm, "LINE1L");
+		snd_soc_dapm_enable_pin_unlocked(dapm, "HS Mic");
 	else
-		snd_soc_dapm_disable_pin_unlocked(dapm, "LINE1L");
+		snd_soc_dapm_disable_pin_unlocked(dapm, "HS Mic");
 
 	if (n810_dmic_func)
 		snd_soc_dapm_enable_pin_unlocked(dapm, "DMic");
@@ -222,6 +222,7 @@
 	SND_SOC_DAPM_SPK("Ext Spk", n810_spk_event),
 	SND_SOC_DAPM_HP("Headphone Jack", n810_jack_event),
 	SND_SOC_DAPM_MIC("DMic", NULL),
+	SND_SOC_DAPM_MIC("HS Mic", NULL),
 };
 
 static const struct snd_soc_dapm_route audio_map[] = {
@@ -231,8 +232,14 @@
 	{"Ext Spk", NULL, "LLOUT"},
 	{"Ext Spk", NULL, "RLOUT"},
 
-	{"DMic Rate 64", NULL, "Mic Bias"},
-	{"Mic Bias", NULL, "DMic"},
+	{"DMic Rate 64", NULL, "DMic"},
+	{"DMic", NULL, "Mic Bias"},
+
+	/*
+	 * Note that the mic bias is coming from Retu/Vilma and we don't have
+	 * control over it atm. The analog HS mic is not working. <- TODO
+	 */
+	{"LINE1L", NULL, "HS Mic"},
 };
 
 static const char *spk_function[] = {"Off", "On"};
@@ -257,9 +264,9 @@
 static struct snd_soc_dai_link n810_dai = {
 	.name = "TLV320AIC33",
 	.stream_name = "AIC33",
-	.cpu_dai_name = "omap-mcbsp.2",
-	.platform_name = "omap-mcbsp.2",
-	.codec_name = "tlv320aic3x-codec.2-0018",
+	.cpu_dai_name = "48076000.mcbsp",
+	.platform_name = "48076000.mcbsp",
+	.codec_name = "tlv320aic3x-codec.1-0018",
 	.codec_dai_name = "tlv320aic3x-hifi",
 	.dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF |
 		   SND_SOC_DAIFMT_CBM_CFM,
diff --git a/sound/soc/omap/omap-dmic.c b/sound/soc/omap/omap-dmic.c
index b2f5d2f..51dd7c6 100644
--- a/sound/soc/omap/omap-dmic.c
+++ b/sound/soc/omap/omap-dmic.c
@@ -40,9 +40,9 @@
 #include <sound/initval.h>
 #include <sound/soc.h>
 #include <sound/dmaengine_pcm.h>
-#include <sound/omap-pcm.h>
 
 #include "omap-dmic.h"
+#include "sdma-pcm.h"
 
 struct omap_dmic {
 	struct device *dev;
@@ -501,7 +501,7 @@
 	if (ret)
 		return ret;
 
-	ret = omap_pcm_platform_register(&pdev->dev);
+	ret = sdma_pcm_platform_register(&pdev->dev, NULL, "up_link");
 	if (ret)
 		return ret;
 
diff --git a/sound/soc/omap/omap-hdmi-audio.c b/sound/soc/omap/omap-hdmi-audio.c
index 8eeac7c..8a99a88 100644
--- a/sound/soc/omap/omap-hdmi-audio.c
+++ b/sound/soc/omap/omap-hdmi-audio.c
@@ -26,9 +26,10 @@
 #include <sound/dmaengine_pcm.h>
 #include <uapi/sound/asound.h>
 #include <sound/asoundef.h>
-#include <sound/omap-pcm.h>
 #include <sound/omap-hdmi-audio.h>
 
+#include "sdma-pcm.h"
+
 #define DRV_NAME "omap-hdmi-audio"
 
 struct hdmi_audio_data {
@@ -352,7 +353,7 @@
 	if (ret)
 		return ret;
 
-	ret = omap_pcm_platform_register(ad->dssdev);
+	ret = sdma_pcm_platform_register(ad->dssdev, "audio_tx", NULL);
 	if (ret)
 		return ret;
 
diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c
index 6b40bdb..d0ebb6b 100644
--- a/sound/soc/omap/omap-mcbsp.c
+++ b/sound/soc/omap/omap-mcbsp.c
@@ -34,11 +34,11 @@
 #include <sound/initval.h>
 #include <sound/soc.h>
 #include <sound/dmaengine_pcm.h>
-#include <sound/omap-pcm.h>
 
 #include <linux/platform_data/asoc-ti-mcbsp.h>
 #include "mcbsp.h"
 #include "omap-mcbsp.h"
+#include "sdma-pcm.h"
 
 #define OMAP_MCBSP_RATES	(SNDRV_PCM_RATE_8000_96000)
 
@@ -868,7 +868,7 @@
 	if (ret)
 		return ret;
 
-	return omap_pcm_platform_register(&pdev->dev);
+	return sdma_pcm_platform_register(&pdev->dev, NULL, NULL);
 }
 
 static int asoc_mcbsp_remove(struct platform_device *pdev)
diff --git a/sound/soc/omap/omap-mcpdm.c b/sound/soc/omap/omap-mcpdm.c
index 64609c7..0e97360 100644
--- a/sound/soc/omap/omap-mcpdm.c
+++ b/sound/soc/omap/omap-mcpdm.c
@@ -40,9 +40,9 @@
 #include <sound/pcm_params.h>
 #include <sound/soc.h>
 #include <sound/dmaengine_pcm.h>
-#include <sound/omap-pcm.h>
 
 #include "omap-mcpdm.h"
+#include "sdma-pcm.h"
 
 struct mcpdm_link_config {
 	u32 link_mask; /* channel mask for the direction */
@@ -548,7 +548,7 @@
 	if (ret)
 		return ret;
 
-	return omap_pcm_platform_register(&pdev->dev);
+	return sdma_pcm_platform_register(&pdev->dev, "dn_link", "up_link");
 }
 
 static const struct of_device_id omap_mcpdm_of_match[] = {
diff --git a/sound/soc/omap/omap-pcm.c b/sound/soc/omap/omap-pcm.c
deleted file mode 100644
index 778cc8f..0000000
--- a/sound/soc/omap/omap-pcm.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * omap-pcm.c  --  ALSA PCM interface for the OMAP SoC
- *
- * Copyright (C) 2008 Nokia Corporation
- *
- * Contact: Jarkko Nikula <jarkko.nikula@bitmer.com>
- *          Peter Ujfalusi <peter.ujfalusi@ti.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- *
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/omap-dma.h>
-#include <sound/core.h>
-#include <sound/pcm.h>
-#include <sound/pcm_params.h>
-#include <sound/dmaengine_pcm.h>
-#include <sound/soc.h>
-#include <sound/omap-pcm.h>
-
-#ifdef CONFIG_ARCH_OMAP1
-#define pcm_omap1510()	cpu_is_omap1510()
-#else
-#define pcm_omap1510()	0
-#endif
-
-static struct snd_pcm_hardware omap_pcm_hardware = {
-	.info			= SNDRV_PCM_INFO_MMAP |
-				  SNDRV_PCM_INFO_MMAP_VALID |
-				  SNDRV_PCM_INFO_INTERLEAVED |
-				  SNDRV_PCM_INFO_PAUSE |
-				  SNDRV_PCM_INFO_RESUME |
-				  SNDRV_PCM_INFO_NO_PERIOD_WAKEUP,
-	.period_bytes_min	= 32,
-	.period_bytes_max	= 64 * 1024,
-	.periods_min		= 2,
-	.periods_max		= 255,
-	.buffer_bytes_max	= 128 * 1024,
-};
-
-/* sDMA supports only 1, 2, and 4 byte transfer elements. */
-static void omap_pcm_limit_supported_formats(void)
-{
-	int i;
-
-	for (i = 0; i <= SNDRV_PCM_FORMAT_LAST; i++) {
-		switch (snd_pcm_format_physical_width(i)) {
-		case 8:
-		case 16:
-		case 32:
-			omap_pcm_hardware.formats |= (1LL << i);
-			break;
-		default:
-			break;
-		}
-	}
-}
-
-/* this may get called several times by oss emulation */
-static int omap_pcm_hw_params(struct snd_pcm_substream *substream,
-			      struct snd_pcm_hw_params *params)
-{
-	struct snd_pcm_runtime *runtime = substream->runtime;
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct omap_pcm_dma_data *dma_data;
-	struct dma_slave_config config;
-	struct dma_chan *chan;
-	int err = 0;
-
-	memset(&config, 0x00, sizeof(config));
-
-	dma_data = snd_soc_dai_get_dma_data(rtd->cpu_dai, substream);
-
-	/* return if this is a bufferless transfer e.g.
-	 * codec <--> BT codec or GSM modem -- lg FIXME */
-	if (!dma_data)
-		return 0;
-
-	snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer);
-	runtime->dma_bytes = params_buffer_bytes(params);
-
-	chan = snd_dmaengine_pcm_get_chan(substream);
-	if (!chan)
-		return -EINVAL;
-
-	/* fills in addr_width and direction */
-	err = snd_hwparams_to_dma_slave_config(substream, params, &config);
-	if (err)
-		return err;
-
-	snd_dmaengine_pcm_set_config_from_dai_data(substream,
-			snd_soc_dai_get_dma_data(rtd->cpu_dai, substream),
-			&config);
-
-	return dmaengine_slave_config(chan, &config);
-}
-
-static int omap_pcm_hw_free(struct snd_pcm_substream *substream)
-{
-	snd_pcm_set_runtime_buffer(substream, NULL);
-	return 0;
-}
-
-static snd_pcm_uframes_t omap_pcm_pointer(struct snd_pcm_substream *substream)
-{
-	snd_pcm_uframes_t offset;
-
-	if (pcm_omap1510())
-		offset = snd_dmaengine_pcm_pointer_no_residue(substream);
-	else
-		offset = snd_dmaengine_pcm_pointer(substream);
-
-	return offset;
-}
-
-static int omap_pcm_open(struct snd_pcm_substream *substream)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_dmaengine_dai_dma_data *dma_data;
-	int ret;
-
-	snd_soc_set_runtime_hwparams(substream, &omap_pcm_hardware);
-
-	dma_data = snd_soc_dai_get_dma_data(rtd->cpu_dai, substream);
-
-	/* DT boot: filter_data is the DMA name */
-	if (rtd->cpu_dai->dev->of_node) {
-		struct dma_chan *chan;
-
-		chan = dma_request_slave_channel(rtd->cpu_dai->dev,
-						 dma_data->filter_data);
-		ret = snd_dmaengine_pcm_open(substream, chan);
-	} else {
-		ret = snd_dmaengine_pcm_open_request_chan(substream,
-							  omap_dma_filter_fn,
-							  dma_data->filter_data);
-	}
-	return ret;
-}
-
-static int omap_pcm_mmap(struct snd_pcm_substream *substream,
-	struct vm_area_struct *vma)
-{
-	struct snd_pcm_runtime *runtime = substream->runtime;
-
-	return dma_mmap_wc(substream->pcm->card->dev, vma, runtime->dma_area,
-			   runtime->dma_addr, runtime->dma_bytes);
-}
-
-static const struct snd_pcm_ops omap_pcm_ops = {
-	.open		= omap_pcm_open,
-	.close		= snd_dmaengine_pcm_close_release_chan,
-	.ioctl		= snd_pcm_lib_ioctl,
-	.hw_params	= omap_pcm_hw_params,
-	.hw_free	= omap_pcm_hw_free,
-	.trigger	= snd_dmaengine_pcm_trigger,
-	.pointer	= omap_pcm_pointer,
-	.mmap		= omap_pcm_mmap,
-};
-
-static int omap_pcm_preallocate_dma_buffer(struct snd_pcm *pcm,
-	int stream)
-{
-	struct snd_pcm_substream *substream = pcm->streams[stream].substream;
-	struct snd_dma_buffer *buf = &substream->dma_buffer;
-	size_t size = omap_pcm_hardware.buffer_bytes_max;
-
-	buf->dev.type = SNDRV_DMA_TYPE_DEV;
-	buf->dev.dev = pcm->card->dev;
-	buf->private_data = NULL;
-	buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
-	if (!buf->area)
-		return -ENOMEM;
-
-	buf->bytes = size;
-	return 0;
-}
-
-static void omap_pcm_free_dma_buffers(struct snd_pcm *pcm)
-{
-	struct snd_pcm_substream *substream;
-	struct snd_dma_buffer *buf;
-	int stream;
-
-	for (stream = 0; stream < 2; stream++) {
-		substream = pcm->streams[stream].substream;
-		if (!substream)
-			continue;
-
-		buf = &substream->dma_buffer;
-		if (!buf->area)
-			continue;
-
-		dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
-		buf->area = NULL;
-	}
-}
-
-static int omap_pcm_new(struct snd_soc_pcm_runtime *rtd)
-{
-	struct snd_card *card = rtd->card->snd_card;
-	struct snd_pcm *pcm = rtd->pcm;
-	int ret;
-
-	ret = dma_coerce_mask_and_coherent(card->dev, DMA_BIT_MASK(32));
-	if (ret)
-		return ret;
-
-	if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream) {
-		ret = omap_pcm_preallocate_dma_buffer(pcm,
-			SNDRV_PCM_STREAM_PLAYBACK);
-		if (ret)
-			goto out;
-	}
-
-	if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream) {
-		ret = omap_pcm_preallocate_dma_buffer(pcm,
-			SNDRV_PCM_STREAM_CAPTURE);
-		if (ret)
-			goto out;
-	}
-
-out:
-	/* free preallocated buffers in case of error */
-	if (ret)
-		omap_pcm_free_dma_buffers(pcm);
-
-	return ret;
-}
-
-static const struct snd_soc_component_driver omap_soc_component = {
-	.ops		= &omap_pcm_ops,
-	.pcm_new	= omap_pcm_new,
-	.pcm_free	= omap_pcm_free_dma_buffers,
-};
-
-int omap_pcm_platform_register(struct device *dev)
-{
-	omap_pcm_limit_supported_formats();
-	return devm_snd_soc_register_component(dev, &omap_soc_component,
-					       NULL, 0);
-}
-EXPORT_SYMBOL_GPL(omap_pcm_platform_register);
-
-MODULE_AUTHOR("Jarkko Nikula <jarkko.nikula@bitmer.com>");
-MODULE_DESCRIPTION("OMAP PCM DMA module");
-MODULE_LICENSE("GPL");
diff --git a/sound/soc/omap/sdma-pcm.c b/sound/soc/omap/sdma-pcm.c
new file mode 100644
index 0000000..21a9c24
--- /dev/null
+++ b/sound/soc/omap/sdma-pcm.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/dmaengine_pcm.h>
+#include <linux/omap-dmaengine.h>
+
+#include "sdma-pcm.h"
+
+static const struct snd_pcm_hardware sdma_pcm_hardware = {
+	.info			= SNDRV_PCM_INFO_MMAP |
+				  SNDRV_PCM_INFO_MMAP_VALID |
+				  SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME |
+				  SNDRV_PCM_INFO_NO_PERIOD_WAKEUP |
+				  SNDRV_PCM_INFO_INTERLEAVED,
+	.period_bytes_min	= 32,
+	.period_bytes_max	= 64 * 1024,
+	.buffer_bytes_max	= 128 * 1024,
+	.periods_min		= 2,
+	.periods_max		= 255,
+};
+
+static const struct snd_dmaengine_pcm_config sdma_dmaengine_pcm_config = {
+	.pcm_hardware = &sdma_pcm_hardware,
+	.prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config,
+	.compat_filter_fn = omap_dma_filter_fn,
+	.prealloc_buffer_size = 128 * 1024,
+};
+
+int sdma_pcm_platform_register(struct device *dev,
+			       char *txdmachan, char *rxdmachan)
+{
+	struct snd_dmaengine_pcm_config *config;
+	unsigned int flags = SND_DMAENGINE_PCM_FLAG_COMPAT;
+
+	/* Standard names for the directions: 'tx' and 'rx' */
+	if (!txdmachan && !rxdmachan)
+		return devm_snd_dmaengine_pcm_register(dev,
+						&sdma_dmaengine_pcm_config,
+						flags);
+
+	config = devm_kzalloc(dev, sizeof(*config), GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+
+	*config = sdma_dmaengine_pcm_config;
+
+	if (!txdmachan || !rxdmachan) {
+		/* One direction only PCM */
+		flags |= SND_DMAENGINE_PCM_FLAG_HALF_DUPLEX;
+		if (!txdmachan) {
+			txdmachan = rxdmachan;
+			rxdmachan = NULL;
+		}
+	}
+
+	config->chan_names[0] = txdmachan;
+	config->chan_names[1] = rxdmachan;
+
+	return devm_snd_dmaengine_pcm_register(dev, config, flags);
+}
+EXPORT_SYMBOL_GPL(sdma_pcm_platform_register);
+
+MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@ti.com>");
+MODULE_DESCRIPTION("sDMA PCM ASoC platform driver");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/omap/sdma-pcm.h b/sound/soc/omap/sdma-pcm.h
new file mode 100644
index 0000000..34a7f90
--- /dev/null
+++ b/sound/soc/omap/sdma-pcm.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright (C) 2018 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#ifndef __SDMA_PCM_H__
+#define __SDMA_PCM_H__
+
+#if IS_ENABLED(CONFIG_SND_SDMA_SOC)
+int sdma_pcm_platform_register(struct device *dev,
+			       char *txdmachan, char *rxdmachan);
+#else
+static inline int sdma_pcm_platform_register(struct device *dev,
+					     char *txdmachan, char *rxdmachan)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_SND_SDMA_SOC */
+
+#endif /* __SDMA_PCM_H__ */
diff --git a/sound/soc/pxa/Kconfig b/sound/soc/pxa/Kconfig
index 484ab3c..960744e 100644
--- a/sound/soc/pxa/Kconfig
+++ b/sound/soc/pxa/Kconfig
@@ -1,7 +1,6 @@
 config SND_PXA2XX_SOC
 	tristate "SoC Audio for the Intel PXA2xx chip"
 	depends on ARCH_PXA || COMPILE_TEST
-	depends on HAS_DMA
 	select SND_PXA2XX_LIB
 	help
 	  Say Y or M if you want to add support for codecs attached to
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 0291c7c..6fc9860 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -43,7 +43,8 @@
 struct ssp_priv {
 	struct ssp_device *ssp;
 	unsigned int sysclk;
-	int dai_fmt;
+	unsigned int dai_fmt;
+	unsigned int configured_dai_fmt;
 #ifdef CONFIG_PM
 	uint32_t	cr0;
 	uint32_t	cr1;
@@ -216,10 +217,9 @@
 {
 	struct ssp_priv *priv = snd_soc_dai_get_drvdata(cpu_dai);
 	struct ssp_device *ssp = priv->ssp;
-	int val;
 
 	u32 sscr0 = pxa_ssp_read_reg(ssp, SSCR0) &
-		~(SSCR0_ECS |  SSCR0_NCS | SSCR0_MOD | SSCR0_ACS);
+		~(SSCR0_ECS | SSCR0_NCS | SSCR0_MOD | SSCR0_ACS);
 
 	dev_dbg(&ssp->pdev->dev,
 		"pxa_ssp_set_dai_sysclk id: %d, clk_id %d, freq %u\n",
@@ -257,8 +257,7 @@
 	 * on PXA2xx.  On PXA3xx it must be enabled when doing so. */
 	if (ssp->type != PXA3xx_SSP)
 		clk_disable_unprepare(ssp->clk);
-	val = pxa_ssp_read_reg(ssp, SSCR0) | sscr0;
-	pxa_ssp_write_reg(ssp, SSCR0, val);
+	pxa_ssp_write_reg(ssp, SSCR0, sscr0);
 	if (ssp->type != PXA3xx_SSP)
 		clk_prepare_enable(ssp->clk);
 
@@ -433,36 +432,72 @@
 	return 0;
 }
 
+static int pxa_ssp_set_dai_fmt(struct snd_soc_dai *cpu_dai,
+			       unsigned int fmt)
+{
+	struct ssp_priv *priv = snd_soc_dai_get_drvdata(cpu_dai);
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+	case SND_SOC_DAIFMT_CBM_CFS:
+	case SND_SOC_DAIFMT_CBS_CFS:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+	case SND_SOC_DAIFMT_NB_IF:
+	case SND_SOC_DAIFMT_IB_IF:
+	case SND_SOC_DAIFMT_IB_NF:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+	case SND_SOC_DAIFMT_DSP_A:
+	case SND_SOC_DAIFMT_DSP_B:
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* Settings will be applied in hw_params() */
+	priv->dai_fmt = fmt;
+
+	return 0;
+}
+
 /*
  * Set up the SSP DAI format.
  * The SSP Port must be inactive before calling this function as the
  * physical interface format is changed.
  */
-static int pxa_ssp_set_dai_fmt(struct snd_soc_dai *cpu_dai,
-		unsigned int fmt)
+static int pxa_ssp_configure_dai_fmt(struct ssp_priv *priv)
 {
-	struct ssp_priv *priv = snd_soc_dai_get_drvdata(cpu_dai);
 	struct ssp_device *ssp = priv->ssp;
 	u32 sscr0, sscr1, sspsp, scfr;
 
 	/* check if we need to change anything at all */
-	if (priv->dai_fmt == fmt)
+	if (priv->configured_dai_fmt == priv->dai_fmt)
 		return 0;
 
-	/* we can only change the settings if the port is not in use */
-	if (pxa_ssp_read_reg(ssp, SSCR0) & SSCR0_SSE) {
-		dev_err(&ssp->pdev->dev,
-			"can't change hardware dai format: stream is in use");
-		return -EINVAL;
-	}
-
 	/* reset port settings */
 	sscr0 = pxa_ssp_read_reg(ssp, SSCR0) &
-		~(SSCR0_ECS |  SSCR0_NCS | SSCR0_MOD | SSCR0_ACS);
-	sscr1 = SSCR1_RxTresh(8) | SSCR1_TxTresh(7);
-	sspsp = 0;
+		~(SSCR0_PSP | SSCR0_MOD);
+	sscr1 = pxa_ssp_read_reg(ssp, SSCR1) &
+		~(SSCR1_SCLKDIR | SSCR1_SFRMDIR | SSCR1_SCFR |
+		  SSCR1_RWOT | SSCR1_TRAIL | SSCR1_TFT | SSCR1_RFT);
+	sspsp = pxa_ssp_read_reg(ssp, SSPSP) &
+		~(SSPSP_SFRMP | SSPSP_SCMODE(3));
 
-	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	sscr1 |= SSCR1_RxTresh(8) | SSCR1_TxTresh(7);
+
+	switch (priv->dai_fmt & SND_SOC_DAIFMT_MASTER_MASK) {
 	case SND_SOC_DAIFMT_CBM_CFM:
 		sscr1 |= SSCR1_SCLKDIR | SSCR1_SFRMDIR | SSCR1_SCFR;
 		break;
@@ -475,7 +510,7 @@
 		return -EINVAL;
 	}
 
-	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	switch (priv->dai_fmt & SND_SOC_DAIFMT_INV_MASK) {
 	case SND_SOC_DAIFMT_NB_NF:
 		sspsp |= SSPSP_SFRMP;
 		break;
@@ -491,7 +526,7 @@
 		return -EINVAL;
 	}
 
-	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	switch (priv->dai_fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
 	case SND_SOC_DAIFMT_I2S:
 		sscr0 |= SSCR0_PSP;
 		sscr1 |= SSCR1_RWOT | SSCR1_TRAIL;
@@ -513,7 +548,7 @@
 	pxa_ssp_write_reg(ssp, SSCR1, sscr1);
 	pxa_ssp_write_reg(ssp, SSPSP, sspsp);
 
-	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	switch (priv->dai_fmt & SND_SOC_DAIFMT_MASTER_MASK) {
 	case SND_SOC_DAIFMT_CBM_CFM:
 	case SND_SOC_DAIFMT_CBM_CFS:
 		scfr = pxa_ssp_read_reg(ssp, SSCR1) | SSCR1_SCFR;
@@ -530,7 +565,7 @@
 	 * we have to defer some things until hw_params() where we
 	 * know parameters like the sample size.
 	 */
-	priv->dai_fmt = fmt;
+	priv->configured_dai_fmt = priv->dai_fmt;
 
 	return 0;
 }
@@ -551,6 +586,7 @@
 	int width = snd_pcm_format_physical_width(params_format(params));
 	int ttsa = pxa_ssp_read_reg(ssp, SSTSA) & 0xf;
 	struct snd_dmaengine_dai_dma_data *dma_data;
+	int ret;
 
 	dma_data = snd_soc_dai_get_dma_data(cpu_dai, substream);
 
@@ -566,6 +602,10 @@
 	if (pxa_ssp_read_reg(ssp, SSCR0) & SSCR0_SSE)
 		return 0;
 
+	ret = pxa_ssp_configure_dai_fmt(priv);
+	if (ret < 0)
+		return ret;
+
 	/* clear selected SSP bits */
 	sscr0 = pxa_ssp_read_reg(ssp, SSCR0) & ~(SSCR0_DSS | SSCR0_EDSS);
 
diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig
index 8ec9a07..87838fa 100644
--- a/sound/soc/qcom/Kconfig
+++ b/sound/soc/qcom/Kconfig
@@ -11,24 +11,21 @@
 
 config SND_SOC_LPASS_PLATFORM
 	tristate
-	depends on HAS_DMA
 	select REGMAP_MMIO
 
 config SND_SOC_LPASS_IPQ806X
 	tristate
-	depends on HAS_DMA
 	select SND_SOC_LPASS_CPU
 	select SND_SOC_LPASS_PLATFORM
 
 config SND_SOC_LPASS_APQ8016
 	tristate
-	depends on HAS_DMA
 	select SND_SOC_LPASS_CPU
 	select SND_SOC_LPASS_PLATFORM
 
 config SND_SOC_STORM
 	tristate "ASoC I2S support for Storm boards"
-	depends on SND_SOC_QCOM && HAS_DMA
+	depends on SND_SOC_QCOM
 	select SND_SOC_LPASS_IPQ806X
 	select SND_SOC_MAX98357A
 	help
@@ -37,9 +34,59 @@
 
 config SND_SOC_APQ8016_SBC
 	tristate "SoC Audio support for APQ8016 SBC platforms"
-	depends on SND_SOC_QCOM && HAS_DMA
+	depends on SND_SOC_QCOM
 	select SND_SOC_LPASS_APQ8016
 	help
           Support for Qualcomm Technologies LPASS audio block in
           APQ8016 SOC-based systems.
           Say Y if you want to use audio devices on MI2S.
+
+config SND_SOC_QDSP6_COMMON
+	tristate
+
+config SND_SOC_QDSP6_CORE
+	tristate
+
+config SND_SOC_QDSP6_AFE
+	tristate
+
+config SND_SOC_QDSP6_AFE_DAI
+	tristate
+
+config SND_SOC_QDSP6_ADM
+	tristate
+
+config SND_SOC_QDSP6_ROUTING
+	tristate
+
+config SND_SOC_QDSP6_ASM
+	tristate
+
+config SND_SOC_QDSP6_ASM_DAI
+	tristate
+
+config SND_SOC_QDSP6
+	tristate "SoC ALSA audio driver for QDSP6"
+	depends on QCOM_APR && HAS_DMA
+	select SND_SOC_QDSP6_COMMON
+	select SND_SOC_QDSP6_CORE
+	select SND_SOC_QDSP6_AFE
+	select SND_SOC_QDSP6_AFE_DAI
+	select SND_SOC_QDSP6_ADM
+	select SND_SOC_QDSP6_ROUTING
+	select SND_SOC_QDSP6_ASM
+	select SND_SOC_QDSP6_ASM_DAI
+	help
+	 To add support for MSM QDSP6 Soc Audio.
+	 This will enable sound soc platform specific
+	 audio drivers. This includes q6asm, q6adm,
+	 q6afe interfaces to DSP using apr.
+
+config SND_SOC_MSM8996
+	tristate "SoC Machine driver for MSM8996 and APQ8096 boards"
+	depends on QCOM_APR
+	select SND_SOC_QDSP6
+	help
+          Support for Qualcomm Technologies LPASS audio block in
+          APQ8096 SoC-based systems.
+          Say Y if you want to use audio device on this SoCs
diff --git a/sound/soc/qcom/Makefile b/sound/soc/qcom/Makefile
index d528035..206945b 100644
--- a/sound/soc/qcom/Makefile
+++ b/sound/soc/qcom/Makefile
@@ -13,6 +13,11 @@
 # Machine
 snd-soc-storm-objs := storm.o
 snd-soc-apq8016-sbc-objs := apq8016_sbc.o
+snd-soc-apq8096-objs := apq8096.o
 
 obj-$(CONFIG_SND_SOC_STORM) += snd-soc-storm.o
 obj-$(CONFIG_SND_SOC_APQ8016_SBC) += snd-soc-apq8016-sbc.o
+obj-$(CONFIG_SND_SOC_MSM8996) += snd-soc-apq8096.o
+
+#DSP lib
+obj-$(CONFIG_SND_SOC_QDSP6) += qdsp6/
diff --git a/sound/soc/qcom/apq8016_sbc.c b/sound/soc/qcom/apq8016_sbc.c
index 7044287..1dd23bb 100644
--- a/sound/soc/qcom/apq8016_sbc.c
+++ b/sound/soc/qcom/apq8016_sbc.c
@@ -147,7 +147,8 @@
 	num_links = of_get_child_count(node);
 
 	/* Allocate the private data and the DAI link array */
-	data = devm_kzalloc(dev, sizeof(*data) + sizeof(*link) * num_links,
+	data = devm_kzalloc(dev,
+			    struct_size(data, dai_link, num_links),
 			    GFP_KERNEL);
 	if (!data)
 		return ERR_PTR(-ENOMEM);
diff --git a/sound/soc/qcom/apq8096.c b/sound/soc/qcom/apq8096.c
new file mode 100644
index 0000000..561cd42
--- /dev/null
+++ b/sound/soc/qcom/apq8096.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/soc/qcom/apr.h>
+#include <linux/module.h>
+#include <linux/component.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/pcm.h>
+
+static int apq8096_be_hw_params_fixup(struct snd_soc_pcm_runtime *rtd,
+				      struct snd_pcm_hw_params *params)
+{
+	struct snd_interval *rate = hw_param_interval(params,
+					SNDRV_PCM_HW_PARAM_RATE);
+	struct snd_interval *channels = hw_param_interval(params,
+					SNDRV_PCM_HW_PARAM_CHANNELS);
+
+	rate->min = rate->max = 48000;
+	channels->min = channels->max = 2;
+
+	return 0;
+}
+
+static int apq8096_sbc_parse_of(struct snd_soc_card *card)
+{
+	struct device_node *np;
+	struct device_node *codec = NULL;
+	struct device_node *platform = NULL;
+	struct device_node *cpu = NULL;
+	struct device *dev = card->dev;
+	struct snd_soc_dai_link *link;
+	int ret, num_links;
+
+	ret = snd_soc_of_parse_card_name(card, "qcom,model");
+	if (ret) {
+		dev_err(dev, "Error parsing card name: %d\n", ret);
+		return ret;
+	}
+
+	/* DAPM routes */
+	if (of_property_read_bool(dev->of_node, "qcom,audio-routing")) {
+		ret = snd_soc_of_parse_audio_routing(card,
+					"qcom,audio-routing");
+		if (ret)
+			return ret;
+	}
+
+	/* Populate links */
+	num_links = of_get_child_count(dev->of_node);
+
+	/* Allocate the DAI link array */
+	card->dai_link = kcalloc(num_links, sizeof(*link), GFP_KERNEL);
+	if (!card->dai_link)
+		return -ENOMEM;
+
+	card->num_links	= num_links;
+	link = card->dai_link;
+
+	for_each_child_of_node(dev->of_node, np) {
+		cpu = of_get_child_by_name(np, "cpu");
+		if (!cpu) {
+			dev_err(dev, "Can't find cpu DT node\n");
+			ret = -EINVAL;
+			goto err;
+		}
+
+		link->cpu_of_node = of_parse_phandle(cpu, "sound-dai", 0);
+		if (!link->cpu_of_node) {
+			dev_err(card->dev, "error getting cpu phandle\n");
+			ret = -EINVAL;
+			goto err;
+		}
+
+		ret = snd_soc_of_get_dai_name(cpu, &link->cpu_dai_name);
+		if (ret) {
+			dev_err(card->dev, "error getting cpu dai name\n");
+			goto err;
+		}
+
+		platform = of_get_child_by_name(np, "platform");
+		codec = of_get_child_by_name(np, "codec");
+		if (codec && platform) {
+			link->platform_of_node = of_parse_phandle(platform,
+								  "sound-dai",
+								   0);
+			if (!link->platform_of_node) {
+				dev_err(card->dev, "platform dai not found\n");
+				ret = -EINVAL;
+				goto err;
+			}
+
+			ret = snd_soc_of_get_dai_link_codecs(dev, codec, link);
+			if (ret < 0) {
+				dev_err(card->dev, "codec dai not found\n");
+				goto err;
+			}
+			link->no_pcm = 1;
+			link->ignore_pmdown_time = 1;
+			link->be_hw_params_fixup = apq8096_be_hw_params_fixup;
+		} else {
+			link->platform_of_node = link->cpu_of_node;
+			link->codec_dai_name = "snd-soc-dummy-dai";
+			link->codec_name = "snd-soc-dummy";
+			link->dynamic = 1;
+		}
+
+		link->ignore_suspend = 1;
+		ret = of_property_read_string(np, "link-name", &link->name);
+		if (ret) {
+			dev_err(card->dev, "error getting codec dai_link name\n");
+			goto err;
+		}
+
+		link->dpcm_playback = 1;
+		link->dpcm_capture = 1;
+		link->stream_name = link->name;
+		link++;
+	}
+
+	return 0;
+err:
+	of_node_put(cpu);
+	of_node_put(codec);
+	of_node_put(platform);
+	kfree(card->dai_link);
+	return ret;
+}
+
+static int apq8096_bind(struct device *dev)
+{
+	struct snd_soc_card *card;
+	int ret;
+
+	card = kzalloc(sizeof(*card), GFP_KERNEL);
+	if (!card)
+		return -ENOMEM;
+
+	component_bind_all(dev, card);
+	card->dev = dev;
+	ret = apq8096_sbc_parse_of(card);
+	if (ret) {
+		dev_err(dev, "Error parsing OF data\n");
+		goto err;
+	}
+
+	ret = snd_soc_register_card(card);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	component_unbind_all(dev, card);
+	kfree(card);
+	return ret;
+}
+
+static void apq8096_unbind(struct device *dev)
+{
+	struct snd_soc_card *card = dev_get_drvdata(dev);
+
+	snd_soc_unregister_card(card);
+	component_unbind_all(dev, card);
+	kfree(card->dai_link);
+	kfree(card);
+}
+
+static const struct component_master_ops apq8096_ops = {
+	.bind = apq8096_bind,
+	.unbind = apq8096_unbind,
+};
+
+static int apq8016_compare_of(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+static void apq8016_release_of(struct device *dev, void *data)
+{
+	of_node_put(data);
+}
+
+static int add_audio_components(struct device *dev,
+				struct component_match **matchptr)
+{
+	struct device_node *np, *platform, *cpu, *node, *dai_node;
+
+	node = dev->of_node;
+
+	for_each_child_of_node(node, np) {
+		cpu = of_get_child_by_name(np, "cpu");
+		if (cpu) {
+			dai_node = of_parse_phandle(cpu, "sound-dai", 0);
+			of_node_get(dai_node);
+			component_match_add_release(dev, matchptr,
+						    apq8016_release_of,
+						    apq8016_compare_of,
+						    dai_node);
+		}
+
+		platform = of_get_child_by_name(np, "platform");
+		if (platform) {
+			dai_node = of_parse_phandle(platform, "sound-dai", 0);
+			component_match_add_release(dev, matchptr,
+						    apq8016_release_of,
+						    apq8016_compare_of,
+						    dai_node);
+		}
+	}
+
+	return 0;
+}
+
+static int apq8096_platform_probe(struct platform_device *pdev)
+{
+	struct component_match *match = NULL;
+	int ret;
+
+	ret = add_audio_components(&pdev->dev, &match);
+	if (ret)
+		return ret;
+
+	return component_master_add_with_match(&pdev->dev, &apq8096_ops, match);
+}
+
+static int apq8096_platform_remove(struct platform_device *pdev)
+{
+	component_master_del(&pdev->dev, &apq8096_ops);
+
+	return 0;
+}
+
+static const struct of_device_id msm_snd_apq8096_dt_match[] = {
+	{.compatible = "qcom,apq8096-sndcard"},
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, msm_snd_apq8096_dt_match);
+
+static struct platform_driver msm_snd_apq8096_driver = {
+	.probe  = apq8096_platform_probe,
+	.remove = apq8096_platform_remove,
+	.driver = {
+		.name = "msm-snd-apq8096",
+		.owner = THIS_MODULE,
+		.of_match_table = msm_snd_apq8096_dt_match,
+	},
+};
+module_platform_driver(msm_snd_apq8096_driver);
+MODULE_AUTHOR("Srinivas Kandagatla <srinivas.kandagatla@linaro.org");
+MODULE_DESCRIPTION("APQ8096 ASoC Machine Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/qcom/qdsp6/Makefile b/sound/soc/qcom/qdsp6/Makefile
new file mode 100644
index 0000000..c33b3ca
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/Makefile
@@ -0,0 +1,8 @@
+obj-$(CONFIG_SND_SOC_QDSP6_COMMON) += q6dsp-common.o
+obj-$(CONFIG_SND_SOC_QDSP6_CORE) += q6core.o
+obj-$(CONFIG_SND_SOC_QDSP6_AFE) += q6afe.o
+obj-$(CONFIG_SND_SOC_QDSP6_AFE_DAI) += q6afe-dai.o
+obj-$(CONFIG_SND_SOC_QDSP6_ADM) += q6adm.o
+obj-$(CONFIG_SND_SOC_QDSP6_ROUTING) += q6routing.o
+obj-$(CONFIG_SND_SOC_QDSP6_ASM) += q6asm.o
+obj-$(CONFIG_SND_SOC_QDSP6_ASM_DAI) += q6asm-dai.o
diff --git a/sound/soc/qcom/qdsp6/q6adm.c b/sound/soc/qcom/qdsp6/q6adm.c
new file mode 100644
index 0000000..9983c66
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6adm.c
@@ -0,0 +1,646 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2011-2017, The Linux Foundation. All rights reserved.
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/jiffies.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/kref.h>
+#include <linux/wait.h>
+#include <linux/soc/qcom/apr.h>
+#include <linux/platform_device.h>
+#include <sound/asound.h>
+#include "q6adm.h"
+#include "q6afe.h"
+#include "q6core.h"
+#include "q6dsp-errno.h"
+#include "q6dsp-common.h"
+
+#define ADM_CMD_DEVICE_OPEN_V5		0x00010326
+#define ADM_CMDRSP_DEVICE_OPEN_V5	0x00010329
+#define ADM_CMD_DEVICE_CLOSE_V5		0x00010327
+#define ADM_CMD_MATRIX_MAP_ROUTINGS_V5	0x00010325
+
+#define TIMEOUT_MS 1000
+#define RESET_COPP_ID 99
+#define INVALID_COPP_ID 0xFF
+/* Definition for a legacy device session. */
+#define ADM_LEGACY_DEVICE_SESSION	0
+#define ADM_MATRIX_ID_AUDIO_RX		0
+#define ADM_MATRIX_ID_AUDIO_TX		1
+
+struct q6copp {
+	int afe_port;
+	int copp_idx;
+	int id;
+	int topology;
+	int mode;
+	int rate;
+	int bit_width;
+	int channels;
+	int app_type;
+	int acdb_id;
+
+	struct aprv2_ibasic_rsp_result_t result;
+	struct kref refcount;
+	wait_queue_head_t wait;
+	struct list_head node;
+	struct q6adm *adm;
+};
+
+struct q6adm {
+	struct apr_device *apr;
+	struct device *dev;
+	struct q6core_svc_api_info ainfo;
+	unsigned long copp_bitmap[AFE_MAX_PORTS];
+	struct list_head copps_list;
+	spinlock_t copps_list_lock;
+	struct aprv2_ibasic_rsp_result_t result;
+	struct mutex lock;
+	wait_queue_head_t matrix_map_wait;
+	struct platform_device *pdev_routing;
+};
+
+struct q6adm_cmd_device_open_v5 {
+	u16 flags;
+	u16 mode_of_operation;
+	u16 endpoint_id_1;
+	u16 endpoint_id_2;
+	u32 topology_id;
+	u16 dev_num_channel;
+	u16 bit_width;
+	u32 sample_rate;
+	u8 dev_channel_mapping[8];
+} __packed;
+
+struct q6adm_cmd_matrix_map_routings_v5 {
+	u32 matrix_id;
+	u32 num_sessions;
+} __packed;
+
+struct q6adm_session_map_node_v5 {
+	u16 session_id;
+	u16 num_copps;
+} __packed;
+
+static struct q6copp *q6adm_find_copp(struct q6adm *adm, int port_idx,
+				  int copp_idx)
+{
+	struct q6copp *c = NULL;
+	struct q6copp *ret = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&adm->copps_list_lock, flags);
+	list_for_each_entry(c, &adm->copps_list, node) {
+		if ((port_idx == c->afe_port) && (copp_idx == c->copp_idx)) {
+			ret = c;
+			kref_get(&c->refcount);
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&adm->copps_list_lock, flags);
+
+	return ret;
+
+}
+
+static void q6adm_free_copp(struct kref *ref)
+{
+	struct q6copp *c = container_of(ref, struct q6copp, refcount);
+	struct q6adm *adm = c->adm;
+	unsigned long flags;
+
+	spin_lock_irqsave(&adm->copps_list_lock, flags);
+	clear_bit(c->copp_idx, &adm->copp_bitmap[c->afe_port]);
+	list_del(&c->node);
+	spin_unlock_irqrestore(&adm->copps_list_lock, flags);
+	kfree(c);
+}
+
+static int q6adm_callback(struct apr_device *adev, struct apr_resp_pkt *data)
+{
+	struct aprv2_ibasic_rsp_result_t *result = data->payload;
+	int port_idx, copp_idx;
+	struct apr_hdr *hdr = &data->hdr;
+	struct q6copp *copp;
+	struct q6adm *adm = dev_get_drvdata(&adev->dev);
+
+	if (!data->payload_size)
+		return 0;
+
+	copp_idx = (hdr->token) & 0XFF;
+	port_idx = ((hdr->token) >> 16) & 0xFF;
+	if (port_idx < 0 || port_idx >= AFE_MAX_PORTS) {
+		dev_err(&adev->dev, "Invalid port idx %d token %d\n",
+		       port_idx, hdr->token);
+		return 0;
+	}
+	if (copp_idx < 0 || copp_idx >= MAX_COPPS_PER_PORT) {
+		dev_err(&adev->dev, "Invalid copp idx %d token %d\n",
+			copp_idx, hdr->token);
+		return 0;
+	}
+
+	switch (hdr->opcode) {
+	case APR_BASIC_RSP_RESULT: {
+		if (result->status != 0) {
+			dev_err(&adev->dev, "cmd = 0x%x return error = 0x%x\n",
+				result->opcode, result->status);
+		}
+		switch (result->opcode) {
+		case ADM_CMD_DEVICE_OPEN_V5:
+		case ADM_CMD_DEVICE_CLOSE_V5:
+			copp = q6adm_find_copp(adm, port_idx, copp_idx);
+			if (!copp)
+				return 0;
+
+			copp->result = *result;
+			wake_up(&copp->wait);
+			kref_put(&copp->refcount, q6adm_free_copp);
+			break;
+		case ADM_CMD_MATRIX_MAP_ROUTINGS_V5:
+			adm->result = *result;
+			wake_up(&adm->matrix_map_wait);
+			break;
+
+		default:
+			dev_err(&adev->dev, "Unknown Cmd: 0x%x\n",
+				result->opcode);
+			break;
+		}
+		return 0;
+	}
+	case ADM_CMDRSP_DEVICE_OPEN_V5: {
+		struct adm_cmd_rsp_device_open_v5 {
+			u32 status;
+			u16 copp_id;
+			u16 reserved;
+		} __packed * open = data->payload;
+
+		copp = q6adm_find_copp(adm, port_idx, copp_idx);
+		if (!copp)
+			return 0;
+
+		if (open->copp_id == INVALID_COPP_ID) {
+			dev_err(&adev->dev, "Invalid coppid rxed %d\n",
+				open->copp_id);
+			copp->result.status = ADSP_EBADPARAM;
+			wake_up(&copp->wait);
+			kref_put(&copp->refcount, q6adm_free_copp);
+			break;
+		}
+		copp->result.opcode = hdr->opcode;
+		copp->id = open->copp_id;
+		wake_up(&copp->wait);
+		kref_put(&copp->refcount, q6adm_free_copp);
+	}
+	break;
+	default:
+		dev_err(&adev->dev, "Unknown cmd:0x%x\n",
+		       hdr->opcode);
+		break;
+	}
+
+	return 0;
+}
+
+static struct q6copp *q6adm_alloc_copp(struct q6adm *adm, int port_idx)
+{
+	struct q6copp *c;
+	int idx;
+
+	idx = find_first_zero_bit(&adm->copp_bitmap[port_idx],
+				  MAX_COPPS_PER_PORT);
+
+	if (idx > MAX_COPPS_PER_PORT)
+		return ERR_PTR(-EBUSY);
+
+	c = kzalloc(sizeof(*c), GFP_ATOMIC);
+	if (!c)
+		return ERR_PTR(-ENOMEM);
+
+	set_bit(idx, &adm->copp_bitmap[port_idx]);
+	c->copp_idx = idx;
+	c->afe_port = port_idx;
+	c->adm = adm;
+
+	init_waitqueue_head(&c->wait);
+
+	return c;
+}
+
+static int q6adm_apr_send_copp_pkt(struct q6adm *adm, struct q6copp *copp,
+				   struct apr_pkt *pkt, uint32_t rsp_opcode)
+{
+	struct device *dev = adm->dev;
+	uint32_t opcode = pkt->hdr.opcode;
+	int ret;
+
+	mutex_lock(&adm->lock);
+	copp->result.opcode = 0;
+	copp->result.status = 0;
+	ret = apr_send_pkt(adm->apr, pkt);
+	if (ret < 0) {
+		dev_err(dev, "Failed to send APR packet\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	/* Wait for the callback with copp id */
+	if (rsp_opcode)
+		ret = wait_event_timeout(copp->wait,
+					 (copp->result.opcode == opcode) ||
+					 (copp->result.opcode == rsp_opcode),
+					 msecs_to_jiffies(TIMEOUT_MS));
+	else
+		ret = wait_event_timeout(copp->wait,
+					 (copp->result.opcode == opcode),
+					 msecs_to_jiffies(TIMEOUT_MS));
+
+	if (!ret) {
+		dev_err(dev, "ADM copp cmd timedout\n");
+		ret = -ETIMEDOUT;
+	} else if (copp->result.status > 0) {
+		dev_err(dev, "DSP returned error[%d]\n",
+			copp->result.status);
+		ret = -EINVAL;
+	}
+
+err:
+	mutex_unlock(&adm->lock);
+	return ret;
+}
+
+static int q6adm_device_close(struct q6adm *adm, struct q6copp *copp,
+			      int port_id, int copp_idx)
+{
+	struct apr_pkt close;
+
+	close.hdr.hdr_field = APR_HDR_FIELD(APR_MSG_TYPE_SEQ_CMD,
+					APR_HDR_LEN(APR_HDR_SIZE),
+					APR_PKT_VER);
+	close.hdr.pkt_size = sizeof(close);
+	close.hdr.src_port = port_id;
+	close.hdr.dest_port = copp->id;
+	close.hdr.token = port_id << 16 | copp_idx;
+	close.hdr.opcode = ADM_CMD_DEVICE_CLOSE_V5;
+
+	return q6adm_apr_send_copp_pkt(adm, copp, &close, 0);
+}
+
+static struct q6copp *q6adm_find_matching_copp(struct q6adm *adm,
+					       int port_id, int topology,
+					       int mode, int rate,
+					       int channel_mode, int bit_width,
+					       int app_type)
+{
+	struct q6copp *c = NULL;
+	struct q6copp *ret = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&adm->copps_list_lock, flags);
+
+	list_for_each_entry(c, &adm->copps_list, node) {
+		if ((port_id == c->afe_port) && (topology == c->topology) &&
+		    (mode == c->mode) && (rate == c->rate) &&
+		    (bit_width == c->bit_width) && (app_type == c->app_type)) {
+			ret = c;
+			kref_get(&c->refcount);
+		}
+	}
+	spin_unlock_irqrestore(&adm->copps_list_lock, flags);
+
+	return ret;
+}
+
+static int q6adm_device_open(struct q6adm *adm, struct q6copp *copp,
+			     int port_id, int path, int topology,
+			     int channel_mode, int bit_width, int rate)
+{
+	struct q6adm_cmd_device_open_v5 *open;
+	int afe_port = q6afe_get_port_id(port_id);
+	struct apr_pkt *pkt;
+	void *p;
+	int ret, pkt_size;
+
+	pkt_size = APR_HDR_SIZE + sizeof(*open);
+	p = kzalloc(pkt_size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	open = p + APR_HDR_SIZE;
+	pkt->hdr.hdr_field = APR_HDR_FIELD(APR_MSG_TYPE_SEQ_CMD,
+					   APR_HDR_LEN(APR_HDR_SIZE),
+					   APR_PKT_VER);
+	pkt->hdr.pkt_size = pkt_size;
+	pkt->hdr.src_port = afe_port;
+	pkt->hdr.dest_port = afe_port;
+	pkt->hdr.token = port_id << 16 | copp->copp_idx;
+	pkt->hdr.opcode = ADM_CMD_DEVICE_OPEN_V5;
+	open->flags = ADM_LEGACY_DEVICE_SESSION;
+	open->mode_of_operation = path;
+	open->endpoint_id_1 = afe_port;
+	open->topology_id = topology;
+	open->dev_num_channel = channel_mode & 0x00FF;
+	open->bit_width = bit_width;
+	open->sample_rate = rate;
+
+	ret = q6dsp_map_channels(&open->dev_channel_mapping[0],
+				 channel_mode);
+	if (ret)
+		goto err;
+
+	ret = q6adm_apr_send_copp_pkt(adm, copp, pkt,
+				      ADM_CMDRSP_DEVICE_OPEN_V5);
+
+err:
+	kfree(pkt);
+	return ret;
+}
+
+/**
+ * q6adm_open() - open adm and grab a free copp
+ *
+ * @dev: Pointer to adm child device.
+ * @port_id: port id
+ * @path: playback or capture path.
+ * @rate: rate at which copp is required.
+ * @channel_mode: channel mode
+ * @topology: adm topology id
+ * @perf_mode: performace mode.
+ * @bit_width: audio sample bit width
+ * @app_type: Application type.
+ * @acdb_id: ACDB id
+ *
+ * Return: Will be an negative on error or a valid copp pointer on success.
+ */
+struct q6copp *q6adm_open(struct device *dev, int port_id, int path, int rate,
+	       int channel_mode, int topology, int perf_mode,
+	       uint16_t bit_width, int app_type, int acdb_id)
+{
+	struct q6adm *adm = dev_get_drvdata(dev->parent);
+	struct q6copp *copp;
+	unsigned long flags;
+	int ret = 0;
+
+	if (port_id < 0) {
+		dev_err(dev, "Invalid port_id 0x%x\n", port_id);
+		return ERR_PTR(-EINVAL);
+	}
+
+	copp = q6adm_find_matching_copp(adm, port_id, topology, perf_mode,
+				      rate, channel_mode, bit_width, app_type);
+	if (copp) {
+		dev_err(dev, "Found Matching Copp 0x%x\n", copp->copp_idx);
+		return copp;
+	}
+
+	spin_lock_irqsave(&adm->copps_list_lock, flags);
+	copp = q6adm_alloc_copp(adm, port_id);
+	if (IS_ERR_OR_NULL(copp)) {
+		spin_unlock_irqrestore(&adm->copps_list_lock, flags);
+		return ERR_CAST(copp);
+	}
+
+	list_add_tail(&copp->node, &adm->copps_list);
+	spin_unlock_irqrestore(&adm->copps_list_lock, flags);
+
+	kref_init(&copp->refcount);
+	copp->topology = topology;
+	copp->mode = perf_mode;
+	copp->rate = rate;
+	copp->channels = channel_mode;
+	copp->bit_width = bit_width;
+	copp->app_type = app_type;
+
+
+	ret = q6adm_device_open(adm, copp, port_id, path, topology,
+				channel_mode, bit_width, rate);
+	if (ret < 0) {
+		kref_put(&copp->refcount, q6adm_free_copp);
+		return ERR_PTR(ret);
+	}
+
+	return copp;
+}
+EXPORT_SYMBOL_GPL(q6adm_open);
+
+/**
+ * q6adm_get_copp_id() - get copp index
+ *
+ * @copp: Pointer to valid copp
+ *
+ * Return: Will be an negative on error or a valid copp index on success.
+ **/
+int q6adm_get_copp_id(struct q6copp *copp)
+{
+	if (!copp)
+		return -EINVAL;
+
+	return copp->copp_idx;
+}
+EXPORT_SYMBOL_GPL(q6adm_get_copp_id);
+
+/**
+ * q6adm_matrix_map() - Map asm streams and afe ports using payload
+ *
+ * @dev: Pointer to adm child device.
+ * @path: playback or capture path.
+ * @payload_map: map between session id and afe ports.
+ * @perf_mode: Performace mode.
+ *
+ * Return: Will be an negative on error or a zero on success.
+ */
+int q6adm_matrix_map(struct device *dev, int path,
+		     struct route_payload payload_map, int perf_mode)
+{
+	struct q6adm *adm = dev_get_drvdata(dev->parent);
+	struct q6adm_cmd_matrix_map_routings_v5 *route;
+	struct q6adm_session_map_node_v5 *node;
+	struct apr_pkt *pkt;
+	uint16_t *copps_list;
+	int pkt_size, ret, i, copp_idx;
+	void *matrix_map = NULL;
+	struct q6copp *copp;
+
+	/* Assumes port_ids have already been validated during adm_open */
+	pkt_size = (APR_HDR_SIZE + sizeof(*route) +  sizeof(*node) +
+		    (sizeof(uint32_t) * payload_map.num_copps));
+
+	matrix_map = kzalloc(pkt_size, GFP_KERNEL);
+	if (!matrix_map)
+		return -ENOMEM;
+
+	pkt = matrix_map;
+	route = matrix_map + APR_HDR_SIZE;
+	node = matrix_map + APR_HDR_SIZE + sizeof(*route);
+	copps_list = matrix_map + APR_HDR_SIZE + sizeof(*route) + sizeof(*node);
+
+	pkt->hdr.hdr_field = APR_HDR_FIELD(APR_MSG_TYPE_SEQ_CMD,
+					   APR_HDR_LEN(APR_HDR_SIZE),
+					   APR_PKT_VER);
+	pkt->hdr.pkt_size = pkt_size;
+	pkt->hdr.token = 0;
+	pkt->hdr.opcode = ADM_CMD_MATRIX_MAP_ROUTINGS_V5;
+	route->num_sessions = 1;
+
+	switch (path) {
+	case ADM_PATH_PLAYBACK:
+		route->matrix_id = ADM_MATRIX_ID_AUDIO_RX;
+		break;
+	case ADM_PATH_LIVE_REC:
+		route->matrix_id = ADM_MATRIX_ID_AUDIO_TX;
+		break;
+	default:
+		dev_err(dev, "Wrong path set[%d]\n", path);
+		break;
+	}
+
+	node->session_id = payload_map.session_id;
+	node->num_copps = payload_map.num_copps;
+
+	for (i = 0; i < payload_map.num_copps; i++) {
+		int port_idx = payload_map.port_id[i];
+
+		if (port_idx < 0) {
+			dev_err(dev, "Invalid port_id 0x%x\n",
+				payload_map.port_id[i]);
+			kfree(pkt);
+			return -EINVAL;
+		}
+		copp_idx = payload_map.copp_idx[i];
+
+		copp = q6adm_find_copp(adm, port_idx, copp_idx);
+		if (!copp) {
+			kfree(pkt);
+			return -EINVAL;
+		}
+
+		copps_list[i] = copp->id;
+		kref_put(&copp->refcount, q6adm_free_copp);
+	}
+
+	mutex_lock(&adm->lock);
+	adm->result.status = 0;
+	adm->result.opcode = 0;
+
+	ret = apr_send_pkt(adm->apr, pkt);
+	if (ret < 0) {
+		dev_err(dev, "routing for stream %d failed ret %d\n",
+		       payload_map.session_id, ret);
+		goto fail_cmd;
+	}
+	ret = wait_event_timeout(adm->matrix_map_wait,
+				 adm->result.opcode == pkt->hdr.opcode,
+				 msecs_to_jiffies(TIMEOUT_MS));
+	if (!ret) {
+		dev_err(dev, "routing for stream %d failed\n",
+		       payload_map.session_id);
+		ret = -ETIMEDOUT;
+		goto fail_cmd;
+	} else if (adm->result.status > 0) {
+		dev_err(dev, "DSP returned error[%d]\n",
+			adm->result.status);
+		ret = -EINVAL;
+		goto fail_cmd;
+	}
+
+fail_cmd:
+	mutex_unlock(&adm->lock);
+	kfree(pkt);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(q6adm_matrix_map);
+
+/**
+ * q6adm_close() - Close adm copp
+ *
+ * @dev: Pointer to adm child device.
+ * @copp: pointer to previously opened copp
+ *
+ * Return: Will be an negative on error or a zero on success.
+ */
+int q6adm_close(struct device *dev, struct q6copp *copp)
+{
+	struct q6adm *adm = dev_get_drvdata(dev->parent);
+	int ret = 0;
+
+	ret = q6adm_device_close(adm, copp, copp->afe_port, copp->copp_idx);
+	if (ret < 0) {
+		dev_err(adm->dev, "Failed to close copp %d\n", ret);
+		return ret;
+	}
+
+	kref_put(&copp->refcount, q6adm_free_copp);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(q6adm_close);
+
+static int q6adm_probe(struct apr_device *adev)
+{
+	struct device *dev = &adev->dev;
+	struct device_node *dais_np;
+	struct q6adm *adm;
+
+	adm = devm_kzalloc(&adev->dev, sizeof(*adm), GFP_KERNEL);
+	if (!adm)
+		return -ENOMEM;
+
+	adm->apr = adev;
+	dev_set_drvdata(&adev->dev, adm);
+	adm->dev = dev;
+	q6core_get_svc_api_info(adev->svc_id, &adm->ainfo);
+	mutex_init(&adm->lock);
+	init_waitqueue_head(&adm->matrix_map_wait);
+
+	INIT_LIST_HEAD(&adm->copps_list);
+	spin_lock_init(&adm->copps_list_lock);
+
+	dais_np = of_get_child_by_name(dev->of_node, "routing");
+	if (dais_np) {
+		adm->pdev_routing = of_platform_device_create(dais_np,
+							   "q6routing", dev);
+		of_node_put(dais_np);
+	}
+
+	return 0;
+}
+
+static int q6adm_remove(struct apr_device *adev)
+{
+	struct q6adm *adm = dev_get_drvdata(&adev->dev);
+
+	if (adm->pdev_routing)
+		of_platform_device_destroy(&adm->pdev_routing->dev, NULL);
+
+	return 0;
+}
+
+static const struct of_device_id q6adm_device_id[]  = {
+	{ .compatible = "qcom,q6adm" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, q6adm_device_id);
+
+static struct apr_driver qcom_q6adm_driver = {
+	.probe = q6adm_probe,
+	.remove = q6adm_remove,
+	.callback = q6adm_callback,
+	.driver = {
+		.name = "qcom-q6adm",
+		.of_match_table = of_match_ptr(q6adm_device_id),
+	},
+};
+
+module_apr_driver(qcom_q6adm_driver);
+MODULE_DESCRIPTION("Q6 Audio Device Manager");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/qcom/qdsp6/q6adm.h b/sound/soc/qcom/qdsp6/q6adm.h
new file mode 100644
index 0000000..4f56999
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6adm.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __Q6_ADM_V2_H__
+#define __Q6_ADM_V2_H__
+
+#define ADM_PATH_PLAYBACK	0x1
+#define ADM_PATH_LIVE_REC	0x2
+#define MAX_COPPS_PER_PORT	8
+#define NULL_COPP_TOPOLOGY	0x00010312
+
+/* multiple copp per stream. */
+struct route_payload {
+	int num_copps;
+	int session_id;
+	int copp_idx[MAX_COPPS_PER_PORT];
+	int port_id[MAX_COPPS_PER_PORT];
+};
+
+struct q6copp;
+struct q6copp *q6adm_open(struct device *dev, int port_id, int path, int rate,
+			   int channel_mode, int topology, int perf_mode,
+			   uint16_t bit_width, int app_type, int acdb_id);
+int q6adm_close(struct device *dev, struct q6copp *copp);
+int q6adm_get_copp_id(struct q6copp *copp);
+int q6adm_matrix_map(struct device *dev, int path,
+		     struct route_payload payload_map, int perf_mode);
+
+#endif /* __Q6_ADM_V2_H__ */
diff --git a/sound/soc/qcom/qdsp6/q6afe-dai.c b/sound/soc/qcom/qdsp6/q6afe-dai.c
new file mode 100644
index 0000000..5002dd0
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6afe-dai.c
@@ -0,0 +1,1303 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2011-2017, The Linux Foundation. All rights reserved.
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/component.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <sound/pcm.h>
+#include <sound/soc.h>
+#include <sound/pcm_params.h>
+#include "q6afe.h"
+
+#define Q6AFE_TDM_PB_DAI(pre, num, did) {				\
+		.playback = {						\
+			.stream_name = pre" TDM"#num" Playback",	\
+			.rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\
+				SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000 |\
+				SNDRV_PCM_RATE_176400,			\
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |		\
+				   SNDRV_PCM_FMTBIT_S24_LE |		\
+				   SNDRV_PCM_FMTBIT_S32_LE,		\
+			.channels_min = 1,				\
+			.channels_max = 8,				\
+			.rate_min = 8000,				\
+			.rate_max = 176400,				\
+		},							\
+		.name = #did,						\
+		.ops = &q6tdm_ops,					\
+		.id = did,						\
+		.probe = msm_dai_q6_dai_probe,				\
+		.remove = msm_dai_q6_dai_remove,			\
+	}
+
+#define Q6AFE_TDM_CAP_DAI(pre, num, did) {				\
+		.capture = {						\
+			.stream_name = pre" TDM"#num" Capture",		\
+			.rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\
+				SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000 |\
+				SNDRV_PCM_RATE_176400,			\
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |		\
+				   SNDRV_PCM_FMTBIT_S24_LE |		\
+				   SNDRV_PCM_FMTBIT_S32_LE,		\
+			.channels_min = 1,				\
+			.channels_max = 8,				\
+			.rate_min = 8000,				\
+			.rate_max = 176400,				\
+		},							\
+		.name = #did,						\
+		.ops = &q6tdm_ops,					\
+		.id = did,						\
+		.probe = msm_dai_q6_dai_probe,				\
+		.remove = msm_dai_q6_dai_remove,			\
+	}
+
+struct q6afe_dai_priv_data {
+	uint32_t sd_line_mask;
+	uint32_t sync_mode;
+	uint32_t sync_src;
+	uint32_t data_out_enable;
+	uint32_t invert_sync;
+	uint32_t data_delay;
+	uint32_t data_align;
+};
+
+struct q6afe_dai_data {
+	struct q6afe_port *port[AFE_PORT_MAX];
+	struct q6afe_port_config port_config[AFE_PORT_MAX];
+	bool is_port_started[AFE_PORT_MAX];
+	struct q6afe_dai_priv_data priv[AFE_PORT_MAX];
+};
+
+static int q6slim_hw_params(struct snd_pcm_substream *substream,
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
+{
+
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+	struct q6afe_slim_cfg *slim = &dai_data->port_config[dai->id].slim;
+
+	slim->num_channels = params_channels(params);
+	slim->sample_rate = params_rate(params);
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+	case SNDRV_PCM_FORMAT_SPECIAL:
+		slim->bit_width = 16;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		slim->bit_width = 24;
+		break;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		slim->bit_width = 32;
+		break;
+	default:
+		pr_err("%s: format %d\n",
+			__func__, params_format(params));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int q6hdmi_hw_params(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params,
+				struct snd_soc_dai *dai)
+{
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+	int channels = params_channels(params);
+	struct q6afe_hdmi_cfg *hdmi = &dai_data->port_config[dai->id].hdmi;
+
+	hdmi->sample_rate = params_rate(params);
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		hdmi->bit_width = 16;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		hdmi->bit_width = 24;
+		break;
+	}
+
+	/* HDMI spec CEA-861-E: Table 28 Audio InfoFrame Data Byte 4 */
+	switch (channels) {
+	case 2:
+		hdmi->channel_allocation = 0;
+		break;
+	case 3:
+		hdmi->channel_allocation = 0x02;
+		break;
+	case 4:
+		hdmi->channel_allocation = 0x06;
+		break;
+	case 5:
+		hdmi->channel_allocation = 0x0A;
+		break;
+	case 6:
+		hdmi->channel_allocation = 0x0B;
+		break;
+	case 7:
+		hdmi->channel_allocation = 0x12;
+		break;
+	case 8:
+		hdmi->channel_allocation = 0x13;
+		break;
+	default:
+		dev_err(dai->dev, "invalid Channels = %u\n", channels);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int q6i2s_hw_params(struct snd_pcm_substream *substream,
+			   struct snd_pcm_hw_params *params,
+			   struct snd_soc_dai *dai)
+{
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+	struct q6afe_i2s_cfg *i2s = &dai_data->port_config[dai->id].i2s_cfg;
+
+	i2s->sample_rate = params_rate(params);
+	i2s->bit_width = params_width(params);
+	i2s->num_channels = params_channels(params);
+	i2s->sd_line_mask = dai_data->priv[dai->id].sd_line_mask;
+
+	return 0;
+}
+
+static int q6i2s_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
+{
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+	struct q6afe_i2s_cfg *i2s = &dai_data->port_config[dai->id].i2s_cfg;
+
+	i2s->fmt = fmt;
+
+	return 0;
+}
+
+static int q6tdm_set_tdm_slot(struct snd_soc_dai *dai,
+				unsigned int tx_mask,
+				unsigned int rx_mask,
+				int slots, int slot_width)
+{
+
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+	struct q6afe_tdm_cfg *tdm = &dai_data->port_config[dai->id].tdm;
+	unsigned int cap_mask;
+	int rc = 0;
+
+	/* HW only supports 16 and 32 bit slot width configuration */
+	if ((slot_width != 16) && (slot_width != 32)) {
+		dev_err(dai->dev, "%s: invalid slot_width %d\n",
+			__func__, slot_width);
+		return -EINVAL;
+	}
+
+	/* HW supports 1-32 slots configuration. Typical: 1, 2, 4, 8, 16, 32 */
+	switch (slots) {
+	case 2:
+		cap_mask = 0x03;
+		break;
+	case 4:
+		cap_mask = 0x0F;
+		break;
+	case 8:
+		cap_mask = 0xFF;
+		break;
+	case 16:
+		cap_mask = 0xFFFF;
+		break;
+	default:
+		dev_err(dai->dev, "%s: invalid slots %d\n",
+			__func__, slots);
+		return -EINVAL;
+	}
+
+	switch (dai->id) {
+	case PRIMARY_TDM_RX_0 ... QUINARY_TDM_TX_7:
+		tdm->nslots_per_frame = slots;
+		tdm->slot_width = slot_width;
+		/* TDM RX dais ids are even and tx are odd */
+		tdm->slot_mask = (dai->id & 0x1 ? tx_mask : rx_mask) & cap_mask;
+		break;
+	default:
+		dev_err(dai->dev, "%s: invalid dai id 0x%x\n",
+			__func__, dai->id);
+		return -EINVAL;
+	}
+
+	return rc;
+}
+
+static int q6tdm_set_channel_map(struct snd_soc_dai *dai,
+				unsigned int tx_num, unsigned int *tx_slot,
+				unsigned int rx_num, unsigned int *rx_slot)
+{
+
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+	struct q6afe_tdm_cfg *tdm = &dai_data->port_config[dai->id].tdm;
+	int rc = 0;
+	int i = 0;
+
+	switch (dai->id) {
+	case PRIMARY_TDM_RX_0 ... QUINARY_TDM_TX_7:
+		if (dai->id & 0x1) {
+			if (!tx_slot) {
+				dev_err(dai->dev, "tx slot not found\n");
+				return -EINVAL;
+			}
+			if (tx_num > AFE_PORT_MAX_AUDIO_CHAN_CNT) {
+				dev_err(dai->dev, "invalid tx num %d\n",
+					tx_num);
+				return -EINVAL;
+			}
+
+			for (i = 0; i < tx_num; i++)
+				tdm->ch_mapping[i] = tx_slot[i];
+
+			for (i = tx_num; i < AFE_PORT_MAX_AUDIO_CHAN_CNT; i++)
+				tdm->ch_mapping[i] = Q6AFE_CMAP_INVALID;
+
+			tdm->num_channels = tx_num;
+		} else {
+			/* rx */
+			if (!rx_slot) {
+				dev_err(dai->dev, "rx slot not found\n");
+				return -EINVAL;
+			}
+			if (rx_num > AFE_PORT_MAX_AUDIO_CHAN_CNT) {
+				dev_err(dai->dev, "invalid rx num %d\n",
+					rx_num);
+				return -EINVAL;
+			}
+
+			for (i = 0; i < rx_num; i++)
+				tdm->ch_mapping[i] = rx_slot[i];
+
+			for (i = rx_num; i < AFE_PORT_MAX_AUDIO_CHAN_CNT; i++)
+				tdm->ch_mapping[i] = Q6AFE_CMAP_INVALID;
+
+			tdm->num_channels = rx_num;
+		}
+
+		break;
+	default:
+		dev_err(dai->dev, "%s: invalid dai id 0x%x\n",
+			__func__, dai->id);
+		return -EINVAL;
+	}
+
+	return rc;
+}
+
+static int q6tdm_hw_params(struct snd_pcm_substream *substream,
+			   struct snd_pcm_hw_params *params,
+			   struct snd_soc_dai *dai)
+{
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+	struct q6afe_tdm_cfg *tdm = &dai_data->port_config[dai->id].tdm;
+
+	tdm->bit_width = params_width(params);
+	tdm->sample_rate = params_rate(params);
+	tdm->num_channels = params_channels(params);
+	tdm->data_align_type = dai_data->priv[dai->id].data_align;
+	tdm->sync_src = dai_data->priv[dai->id].sync_src;
+	tdm->sync_mode = dai_data->priv[dai->id].sync_mode;
+
+	return 0;
+}
+static void q6afe_dai_shutdown(struct snd_pcm_substream *substream,
+				struct snd_soc_dai *dai)
+{
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+	int rc;
+
+	rc = q6afe_port_stop(dai_data->port[dai->id]);
+	if (rc < 0)
+		dev_err(dai->dev, "fail to close AFE port (%d)\n", rc);
+
+	dai_data->is_port_started[dai->id] = false;
+
+}
+
+static int q6afe_dai_prepare(struct snd_pcm_substream *substream,
+		struct snd_soc_dai *dai)
+{
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+	int rc;
+
+	if (dai_data->is_port_started[dai->id]) {
+		/* stop the port and restart with new port config */
+		rc = q6afe_port_stop(dai_data->port[dai->id]);
+		if (rc < 0) {
+			dev_err(dai->dev, "fail to close AFE port (%d)\n", rc);
+			return rc;
+		}
+	}
+
+	switch (dai->id) {
+	case HDMI_RX:
+		q6afe_hdmi_port_prepare(dai_data->port[dai->id],
+					&dai_data->port_config[dai->id].hdmi);
+		break;
+	case SLIMBUS_0_RX ... SLIMBUS_6_TX:
+		q6afe_slim_port_prepare(dai_data->port[dai->id],
+					&dai_data->port_config[dai->id].slim);
+		break;
+	case PRIMARY_MI2S_RX ... QUATERNARY_MI2S_TX:
+		rc = q6afe_i2s_port_prepare(dai_data->port[dai->id],
+			       &dai_data->port_config[dai->id].i2s_cfg);
+		if (rc < 0) {
+			dev_err(dai->dev, "fail to prepare AFE port %x\n",
+				dai->id);
+			return rc;
+		}
+		break;
+	case PRIMARY_TDM_RX_0 ... QUINARY_TDM_TX_7:
+		q6afe_tdm_port_prepare(dai_data->port[dai->id],
+					&dai_data->port_config[dai->id].tdm);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rc = q6afe_port_start(dai_data->port[dai->id]);
+	if (rc < 0) {
+		dev_err(dai->dev, "fail to start AFE port %x\n", dai->id);
+		return rc;
+	}
+	dai_data->is_port_started[dai->id] = true;
+
+	return 0;
+}
+
+static int q6slim_set_channel_map(struct snd_soc_dai *dai,
+				unsigned int tx_num, unsigned int *tx_slot,
+				unsigned int rx_num, unsigned int *rx_slot)
+{
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+	struct q6afe_port_config *pcfg = &dai_data->port_config[dai->id];
+	int i;
+
+	if (!rx_slot) {
+		pr_err("%s: rx slot not found\n", __func__);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < rx_num; i++) {
+		pcfg->slim.ch_mapping[i] =   rx_slot[i];
+		pr_debug("%s: find number of channels[%d] ch[%d]\n",
+		       __func__, i, rx_slot[i]);
+	}
+
+	pcfg->slim.num_channels = rx_num;
+
+	pr_debug("%s: SLIMBUS_%d_RX cnt[%d] ch[%d %d]\n", __func__,
+		(dai->id - SLIMBUS_0_RX) / 2, rx_num,
+		pcfg->slim.ch_mapping[0],
+		pcfg->slim.ch_mapping[1]);
+
+	return 0;
+}
+
+static int q6afe_mi2s_set_sysclk(struct snd_soc_dai *dai,
+		int clk_id, unsigned int freq, int dir)
+{
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+	struct q6afe_port *port = dai_data->port[dai->id];
+
+	switch (clk_id) {
+	case LPAIF_DIG_CLK:
+		return q6afe_port_set_sysclk(port, clk_id, 0, 5, freq, dir);
+	case LPAIF_BIT_CLK:
+	case LPAIF_OSR_CLK:
+		return q6afe_port_set_sysclk(port, clk_id,
+					     Q6AFE_LPASS_CLK_SRC_INTERNAL,
+					     Q6AFE_LPASS_CLK_ROOT_DEFAULT,
+					     freq, dir);
+	case Q6AFE_LPASS_CLK_ID_PRI_MI2S_IBIT ... Q6AFE_LPASS_CLK_ID_QUI_MI2S_OSR:
+	case Q6AFE_LPASS_CLK_ID_MCLK_1 ... Q6AFE_LPASS_CLK_ID_INT_MCLK_1:
+		return q6afe_port_set_sysclk(port, clk_id,
+					     Q6AFE_LPASS_CLK_ATTRIBUTE_COUPLE_NO,
+					     Q6AFE_LPASS_CLK_ROOT_DEFAULT,
+					     freq, dir);
+	case Q6AFE_LPASS_CLK_ID_PRI_TDM_IBIT ... Q6AFE_LPASS_CLK_ID_QUIN_TDM_EBIT:
+		return q6afe_port_set_sysclk(port, clk_id,
+					     Q6AFE_LPASS_CLK_ATTRIBUTE_INVERT_COUPLE_NO,
+					     Q6AFE_LPASS_CLK_ROOT_DEFAULT,
+					     freq, dir);
+	}
+
+	return 0;
+}
+
+static const struct snd_soc_dapm_route q6afe_dapm_routes[] = {
+	{"HDMI Playback", NULL, "HDMI_RX"},
+	{"Slimbus1 Playback", NULL, "SLIMBUS_1_RX"},
+	{"Slimbus2 Playback", NULL, "SLIMBUS_2_RX"},
+	{"Slimbus3 Playback", NULL, "SLIMBUS_3_RX"},
+	{"Slimbus4 Playback", NULL, "SLIMBUS_4_RX"},
+	{"Slimbus5 Playback", NULL, "SLIMBUS_5_RX"},
+	{"Slimbus6 Playback", NULL, "SLIMBUS_6_RX"},
+
+	{"Primary MI2S Playback", NULL, "PRI_MI2S_RX"},
+	{"Secondary MI2S Playback", NULL, "SEC_MI2S_RX"},
+	{"Tertiary MI2S Playback", NULL, "TERT_MI2S_RX"},
+	{"Quaternary MI2S Playback", NULL, "QUAT_MI2S_RX"},
+
+	{"Primary TDM0 Playback", NULL, "PRIMARY_TDM_RX_0"},
+	{"Primary TDM1 Playback", NULL, "PRIMARY_TDM_RX_1"},
+	{"Primary TDM2 Playback", NULL, "PRIMARY_TDM_RX_2"},
+	{"Primary TDM3 Playback", NULL, "PRIMARY_TDM_RX_3"},
+	{"Primary TDM4 Playback", NULL, "PRIMARY_TDM_RX_4"},
+	{"Primary TDM5 Playback", NULL, "PRIMARY_TDM_RX_5"},
+	{"Primary TDM6 Playback", NULL, "PRIMARY_TDM_RX_6"},
+	{"Primary TDM7 Playback", NULL, "PRIMARY_TDM_RX_7"},
+
+	{"Secondary TDM0 Playback", NULL, "SEC_TDM_RX_0"},
+	{"Secondary TDM1 Playback", NULL, "SEC_TDM_RX_1"},
+	{"Secondary TDM2 Playback", NULL, "SEC_TDM_RX_2"},
+	{"Secondary TDM3 Playback", NULL, "SEC_TDM_RX_3"},
+	{"Secondary TDM4 Playback", NULL, "SEC_TDM_RX_4"},
+	{"Secondary TDM5 Playback", NULL, "SEC_TDM_RX_5"},
+	{"Secondary TDM6 Playback", NULL, "SEC_TDM_RX_6"},
+	{"Secondary TDM7 Playback", NULL, "SEC_TDM_RX_7"},
+
+	{"Tertiary TDM0 Playback", NULL, "TERT_TDM_RX_0"},
+	{"Tertiary TDM1 Playback", NULL, "TERT_TDM_RX_1"},
+	{"Tertiary TDM2 Playback", NULL, "TERT_TDM_RX_2"},
+	{"Tertiary TDM3 Playback", NULL, "TERT_TDM_RX_3"},
+	{"Tertiary TDM4 Playback", NULL, "TERT_TDM_RX_4"},
+	{"Tertiary TDM5 Playback", NULL, "TERT_TDM_RX_5"},
+	{"Tertiary TDM6 Playback", NULL, "TERT_TDM_RX_6"},
+	{"Tertiary TDM7 Playback", NULL, "TERT_TDM_RX_7"},
+
+	{"Quaternary TDM0 Playback", NULL, "QUAT_TDM_RX_0"},
+	{"Quaternary TDM1 Playback", NULL, "QUAT_TDM_RX_1"},
+	{"Quaternary TDM2 Playback", NULL, "QUAT_TDM_RX_2"},
+	{"Quaternary TDM3 Playback", NULL, "QUAT_TDM_RX_3"},
+	{"Quaternary TDM4 Playback", NULL, "QUAT_TDM_RX_4"},
+	{"Quaternary TDM5 Playback", NULL, "QUAT_TDM_RX_5"},
+	{"Quaternary TDM6 Playback", NULL, "QUAT_TDM_RX_6"},
+	{"Quaternary TDM7 Playback", NULL, "QUAT_TDM_RX_7"},
+
+	{"Quinary TDM0 Playback", NULL, "QUIN_TDM_RX_0"},
+	{"Quinary TDM1 Playback", NULL, "QUIN_TDM_RX_1"},
+	{"Quinary TDM2 Playback", NULL, "QUIN_TDM_RX_2"},
+	{"Quinary TDM3 Playback", NULL, "QUIN_TDM_RX_3"},
+	{"Quinary TDM4 Playback", NULL, "QUIN_TDM_RX_4"},
+	{"Quinary TDM5 Playback", NULL, "QUIN_TDM_RX_5"},
+	{"Quinary TDM6 Playback", NULL, "QUIN_TDM_RX_6"},
+	{"Quinary TDM7 Playback", NULL, "QUIN_TDM_RX_7"},
+
+	{"PRIMARY_TDM_TX_0", NULL, "Primary TDM0 Capture"},
+	{"PRIMARY_TDM_TX_1", NULL, "Primary TDM1 Capture"},
+	{"PRIMARY_TDM_TX_2", NULL, "Primary TDM2 Capture"},
+	{"PRIMARY_TDM_TX_3", NULL, "Primary TDM3 Capture"},
+	{"PRIMARY_TDM_TX_4", NULL, "Primary TDM4 Capture"},
+	{"PRIMARY_TDM_TX_5", NULL, "Primary TDM5 Capture"},
+	{"PRIMARY_TDM_TX_6", NULL, "Primary TDM6 Capture"},
+	{"PRIMARY_TDM_TX_7", NULL, "Primary TDM7 Capture"},
+
+	{"SEC_TDM_TX_0", NULL, "Secondary TDM0 Capture"},
+	{"SEC_TDM_TX_1", NULL, "Secondary TDM1 Capture"},
+	{"SEC_TDM_TX_2", NULL, "Secondary TDM2 Capture"},
+	{"SEC_TDM_TX_3", NULL, "Secondary TDM3 Capture"},
+	{"SEC_TDM_TX_4", NULL, "Secondary TDM4 Capture"},
+	{"SEC_TDM_TX_5", NULL, "Secondary TDM5 Capture"},
+	{"SEC_TDM_TX_6", NULL, "Secondary TDM6 Capture"},
+	{"SEC_TDM_TX_7", NULL, "Secondary TDM7 Capture"},
+
+	{"TERT_TDM_TX_0", NULL, "Tertiary TDM0 Capture"},
+	{"TERT_TDM_TX_1", NULL, "Tertiary TDM1 Capture"},
+	{"TERT_TDM_TX_2", NULL, "Tertiary TDM2 Capture"},
+	{"TERT_TDM_TX_3", NULL, "Tertiary TDM3 Capture"},
+	{"TERT_TDM_TX_4", NULL, "Tertiary TDM4 Capture"},
+	{"TERT_TDM_TX_5", NULL, "Tertiary TDM5 Capture"},
+	{"TERT_TDM_TX_6", NULL, "Tertiary TDM6 Capture"},
+	{"TERT_TDM_TX_7", NULL, "Tertiary TDM7 Capture"},
+
+	{"QUAT_TDM_TX_0", NULL, "Quaternary TDM0 Capture"},
+	{"QUAT_TDM_TX_1", NULL, "Quaternary TDM1 Capture"},
+	{"QUAT_TDM_TX_2", NULL, "Quaternary TDM2 Capture"},
+	{"QUAT_TDM_TX_3", NULL, "Quaternary TDM3 Capture"},
+	{"QUAT_TDM_TX_4", NULL, "Quaternary TDM4 Capture"},
+	{"QUAT_TDM_TX_5", NULL, "Quaternary TDM5 Capture"},
+	{"QUAT_TDM_TX_6", NULL, "Quaternary TDM6 Capture"},
+	{"QUAT_TDM_TX_7", NULL, "Quaternary TDM7 Capture"},
+
+	{"QUIN_TDM_TX_0", NULL, "Quinary TDM0 Capture"},
+	{"QUIN_TDM_TX_1", NULL, "Quinary TDM1 Capture"},
+	{"QUIN_TDM_TX_2", NULL, "Quinary TDM2 Capture"},
+	{"QUIN_TDM_TX_3", NULL, "Quinary TDM3 Capture"},
+	{"QUIN_TDM_TX_4", NULL, "Quinary TDM4 Capture"},
+	{"QUIN_TDM_TX_5", NULL, "Quinary TDM5 Capture"},
+	{"QUIN_TDM_TX_6", NULL, "Quinary TDM6 Capture"},
+	{"QUIN_TDM_TX_7", NULL, "Quinary TDM7 Capture"},
+
+	{"TERT_MI2S_TX", NULL, "Tertiary MI2S Capture"},
+	{"PRI_MI2S_TX", NULL, "Primary MI2S Capture"},
+	{"SEC_MI2S_TX", NULL, "Secondary MI2S Capture"},
+	{"QUAT_MI2S_TX", NULL, "Quaternary MI2S Capture"},
+};
+
+static struct snd_soc_dai_ops q6hdmi_ops = {
+	.prepare	= q6afe_dai_prepare,
+	.hw_params	= q6hdmi_hw_params,
+	.shutdown	= q6afe_dai_shutdown,
+};
+
+static struct snd_soc_dai_ops q6i2s_ops = {
+	.prepare	= q6afe_dai_prepare,
+	.hw_params	= q6i2s_hw_params,
+	.set_fmt	= q6i2s_set_fmt,
+	.shutdown	= q6afe_dai_shutdown,
+	.set_sysclk	= q6afe_mi2s_set_sysclk,
+};
+
+static struct snd_soc_dai_ops q6slim_ops = {
+	.prepare	= q6afe_dai_prepare,
+	.hw_params	= q6slim_hw_params,
+	.shutdown	= q6afe_dai_shutdown,
+	.set_channel_map = q6slim_set_channel_map,
+};
+
+static struct snd_soc_dai_ops q6tdm_ops = {
+	.prepare	= q6afe_dai_prepare,
+	.shutdown	= q6afe_dai_shutdown,
+	.set_sysclk	= q6afe_mi2s_set_sysclk,
+	.set_tdm_slot     = q6tdm_set_tdm_slot,
+	.set_channel_map  = q6tdm_set_channel_map,
+	.hw_params        = q6tdm_hw_params,
+};
+
+static int msm_dai_q6_dai_probe(struct snd_soc_dai *dai)
+{
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+	struct q6afe_port *port;
+
+	port = q6afe_port_get_from_id(dai->dev, dai->id);
+	if (IS_ERR(port)) {
+		dev_err(dai->dev, "Unable to get afe port\n");
+		return -EINVAL;
+	}
+	dai_data->port[dai->id] = port;
+
+	return 0;
+}
+
+static int msm_dai_q6_dai_remove(struct snd_soc_dai *dai)
+{
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev);
+
+	q6afe_port_put(dai_data->port[dai->id]);
+	dai_data->port[dai->id] = NULL;
+
+	return 0;
+}
+
+static struct snd_soc_dai_driver q6afe_dais[] = {
+	{
+		.playback = {
+			.stream_name = "HDMI Playback",
+			.rates = SNDRV_PCM_RATE_48000 |
+				 SNDRV_PCM_RATE_96000 |
+				 SNDRV_PCM_RATE_192000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.channels_min = 2,
+			.channels_max = 8,
+			.rate_max =     192000,
+			.rate_min =	48000,
+		},
+		.ops = &q6hdmi_ops,
+		.id = HDMI_RX,
+		.name = "HDMI",
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.name = "SLIMBUS_0_RX",
+		.ops = &q6slim_ops,
+		.id = SLIMBUS_0_RX,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+		.playback = {
+			.stream_name = "Slimbus Playback",
+			.rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 |
+				 SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_96000 |
+				 SNDRV_PCM_RATE_192000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.channels_min = 1,
+			.channels_max = 8,
+			.rate_min = 8000,
+			.rate_max = 192000,
+		},
+	}, {
+		.playback = {
+			.stream_name = "Slimbus1 Playback",
+			.rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |
+				 SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_96000 |
+				 SNDRV_PCM_RATE_192000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.channels_min = 1,
+			.channels_max = 2,
+			.rate_min = 8000,
+			.rate_max = 192000,
+		},
+		.name = "SLIMBUS_1_RX",
+		.ops = &q6slim_ops,
+		.id = SLIMBUS_1_RX,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.playback = {
+			.stream_name = "Slimbus2 Playback",
+			.rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 |
+				 SNDRV_PCM_RATE_16000 | SNDRV_PCM_RATE_96000 |
+				 SNDRV_PCM_RATE_192000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.channels_min = 1,
+			.channels_max = 8,
+			.rate_min = 8000,
+			.rate_max = 192000,
+		},
+		.name = "SLIMBUS_2_RX",
+		.ops = &q6slim_ops,
+		.id = SLIMBUS_2_RX,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.playback = {
+			.stream_name = "Slimbus3 Playback",
+			.rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |
+				 SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_96000 |
+				 SNDRV_PCM_RATE_192000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.channels_min = 1,
+			.channels_max = 2,
+			.rate_min = 8000,
+			.rate_max = 192000,
+		},
+		.name = "SLIMBUS_3_RX",
+		.ops = &q6slim_ops,
+		.id = SLIMBUS_3_RX,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.playback = {
+			.stream_name = "Slimbus4 Playback",
+			.rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |
+				 SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_96000 |
+				 SNDRV_PCM_RATE_192000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.channels_min = 1,
+			.channels_max = 2,
+			.rate_min = 8000,
+			.rate_max = 192000,
+		},
+		.name = "SLIMBUS_4_RX",
+		.ops = &q6slim_ops,
+		.id = SLIMBUS_4_RX,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.playback = {
+			.stream_name = "Slimbus5 Playback",
+			.rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |
+				 SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_96000 |
+				 SNDRV_PCM_RATE_192000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.channels_min = 1,
+			.channels_max = 2,
+			.rate_min = 8000,
+			.rate_max = 192000,
+		},
+		.name = "SLIMBUS_5_RX",
+		.ops = &q6slim_ops,
+		.id = SLIMBUS_5_RX,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.playback = {
+			.stream_name = "Slimbus6 Playback",
+			.rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |
+				 SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_96000 |
+				 SNDRV_PCM_RATE_192000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.channels_min = 1,
+			.channels_max = 2,
+			.rate_min = 8000,
+			.rate_max = 192000,
+		},
+		.ops = &q6slim_ops,
+		.name = "SLIMBUS_6_RX",
+		.id = SLIMBUS_6_RX,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.playback = {
+			.stream_name = "Primary MI2S Playback",
+			.rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 |
+				 SNDRV_PCM_RATE_16000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.rate_min =     8000,
+			.rate_max =     48000,
+		},
+		.id = PRIMARY_MI2S_RX,
+		.name = "PRI_MI2S_RX",
+		.ops = &q6i2s_ops,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.capture = {
+			.stream_name = "Primary MI2S Capture",
+			.rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 |
+				 SNDRV_PCM_RATE_16000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.rate_min =     8000,
+			.rate_max =     48000,
+		},
+		.id = PRIMARY_MI2S_TX,
+		.name = "PRI_MI2S_TX",
+		.ops = &q6i2s_ops,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.playback = {
+			.stream_name = "Secondary MI2S Playback",
+			.rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 |
+				 SNDRV_PCM_RATE_16000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE,
+			.rate_min =     8000,
+			.rate_max =     48000,
+		},
+		.name = "SEC_MI2S_RX",
+		.id = SECONDARY_MI2S_RX,
+		.ops = &q6i2s_ops,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.capture = {
+			.stream_name = "Secondary MI2S Capture",
+			.rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 |
+				 SNDRV_PCM_RATE_16000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.rate_min =     8000,
+			.rate_max =     48000,
+		},
+		.id = SECONDARY_MI2S_TX,
+		.name = "SEC_MI2S_TX",
+		.ops = &q6i2s_ops,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.playback = {
+			.stream_name = "Tertiary MI2S Playback",
+			.rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 |
+				 SNDRV_PCM_RATE_16000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE,
+			.rate_min =     8000,
+			.rate_max =     48000,
+		},
+		.name = "TERT_MI2S_RX",
+		.id = TERTIARY_MI2S_RX,
+		.ops = &q6i2s_ops,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.capture = {
+			.stream_name = "Tertiary MI2S Capture",
+			.rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 |
+				 SNDRV_PCM_RATE_16000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.rate_min =     8000,
+			.rate_max =     48000,
+		},
+		.id = TERTIARY_MI2S_TX,
+		.name = "TERT_MI2S_TX",
+		.ops = &q6i2s_ops,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.playback = {
+			.stream_name = "Quaternary MI2S Playback",
+			.rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 |
+				 SNDRV_PCM_RATE_16000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE,
+			.rate_min =     8000,
+			.rate_max =     48000,
+		},
+		.name = "QUAT_MI2S_RX",
+		.id = QUATERNARY_MI2S_RX,
+		.ops = &q6i2s_ops,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	}, {
+		.capture = {
+			.stream_name = "Quaternary MI2S Capture",
+			.rates = SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_8000 |
+				 SNDRV_PCM_RATE_16000,
+			.formats = SNDRV_PCM_FMTBIT_S16_LE |
+				   SNDRV_PCM_FMTBIT_S24_LE,
+			.rate_min =     8000,
+			.rate_max =     48000,
+		},
+		.id = QUATERNARY_MI2S_TX,
+		.name = "QUAT_MI2S_TX",
+		.ops = &q6i2s_ops,
+		.probe = msm_dai_q6_dai_probe,
+		.remove = msm_dai_q6_dai_remove,
+	},
+	Q6AFE_TDM_PB_DAI("Primary", 0, PRIMARY_TDM_RX_0),
+	Q6AFE_TDM_PB_DAI("Primary", 1, PRIMARY_TDM_RX_1),
+	Q6AFE_TDM_PB_DAI("Primary", 2, PRIMARY_TDM_RX_2),
+	Q6AFE_TDM_PB_DAI("Primary", 3, PRIMARY_TDM_RX_3),
+	Q6AFE_TDM_PB_DAI("Primary", 4, PRIMARY_TDM_RX_4),
+	Q6AFE_TDM_PB_DAI("Primary", 5, PRIMARY_TDM_RX_5),
+	Q6AFE_TDM_PB_DAI("Primary", 6, PRIMARY_TDM_RX_6),
+	Q6AFE_TDM_PB_DAI("Primary", 7, PRIMARY_TDM_RX_7),
+	Q6AFE_TDM_CAP_DAI("Primary", 0, PRIMARY_TDM_TX_0),
+	Q6AFE_TDM_CAP_DAI("Primary", 1, PRIMARY_TDM_TX_1),
+	Q6AFE_TDM_CAP_DAI("Primary", 2, PRIMARY_TDM_TX_2),
+	Q6AFE_TDM_CAP_DAI("Primary", 3, PRIMARY_TDM_TX_3),
+	Q6AFE_TDM_CAP_DAI("Primary", 4, PRIMARY_TDM_TX_4),
+	Q6AFE_TDM_CAP_DAI("Primary", 5, PRIMARY_TDM_TX_5),
+	Q6AFE_TDM_CAP_DAI("Primary", 6, PRIMARY_TDM_TX_6),
+	Q6AFE_TDM_CAP_DAI("Primary", 7, PRIMARY_TDM_TX_7),
+	Q6AFE_TDM_PB_DAI("Secondary", 0, SECONDARY_TDM_RX_0),
+	Q6AFE_TDM_PB_DAI("Secondary", 1, SECONDARY_TDM_RX_1),
+	Q6AFE_TDM_PB_DAI("Secondary", 2, SECONDARY_TDM_RX_2),
+	Q6AFE_TDM_PB_DAI("Secondary", 3, SECONDARY_TDM_RX_3),
+	Q6AFE_TDM_PB_DAI("Secondary", 4, SECONDARY_TDM_RX_4),
+	Q6AFE_TDM_PB_DAI("Secondary", 5, SECONDARY_TDM_RX_5),
+	Q6AFE_TDM_PB_DAI("Secondary", 6, SECONDARY_TDM_RX_6),
+	Q6AFE_TDM_PB_DAI("Secondary", 7, SECONDARY_TDM_RX_7),
+	Q6AFE_TDM_CAP_DAI("Secondary", 0, SECONDARY_TDM_TX_0),
+	Q6AFE_TDM_CAP_DAI("Secondary", 1, SECONDARY_TDM_TX_1),
+	Q6AFE_TDM_CAP_DAI("Secondary", 2, SECONDARY_TDM_TX_2),
+	Q6AFE_TDM_CAP_DAI("Secondary", 3, SECONDARY_TDM_TX_3),
+	Q6AFE_TDM_CAP_DAI("Secondary", 4, SECONDARY_TDM_TX_4),
+	Q6AFE_TDM_CAP_DAI("Secondary", 5, SECONDARY_TDM_TX_5),
+	Q6AFE_TDM_CAP_DAI("Secondary", 6, SECONDARY_TDM_TX_6),
+	Q6AFE_TDM_CAP_DAI("Secondary", 7, SECONDARY_TDM_TX_7),
+	Q6AFE_TDM_PB_DAI("Tertiary", 0, TERTIARY_TDM_RX_0),
+	Q6AFE_TDM_PB_DAI("Tertiary", 1, TERTIARY_TDM_RX_1),
+	Q6AFE_TDM_PB_DAI("Tertiary", 2, TERTIARY_TDM_RX_2),
+	Q6AFE_TDM_PB_DAI("Tertiary", 3, TERTIARY_TDM_RX_3),
+	Q6AFE_TDM_PB_DAI("Tertiary", 4, TERTIARY_TDM_RX_4),
+	Q6AFE_TDM_PB_DAI("Tertiary", 5, TERTIARY_TDM_RX_5),
+	Q6AFE_TDM_PB_DAI("Tertiary", 6, TERTIARY_TDM_RX_6),
+	Q6AFE_TDM_PB_DAI("Tertiary", 7, TERTIARY_TDM_RX_7),
+	Q6AFE_TDM_CAP_DAI("Tertiary", 0, TERTIARY_TDM_TX_0),
+	Q6AFE_TDM_CAP_DAI("Tertiary", 1, TERTIARY_TDM_TX_1),
+	Q6AFE_TDM_CAP_DAI("Tertiary", 2, TERTIARY_TDM_TX_2),
+	Q6AFE_TDM_CAP_DAI("Tertiary", 3, TERTIARY_TDM_TX_3),
+	Q6AFE_TDM_CAP_DAI("Tertiary", 4, TERTIARY_TDM_TX_4),
+	Q6AFE_TDM_CAP_DAI("Tertiary", 5, TERTIARY_TDM_TX_5),
+	Q6AFE_TDM_CAP_DAI("Tertiary", 6, TERTIARY_TDM_TX_6),
+	Q6AFE_TDM_CAP_DAI("Tertiary", 7, TERTIARY_TDM_TX_7),
+	Q6AFE_TDM_PB_DAI("Quaternary", 0, QUATERNARY_TDM_RX_0),
+	Q6AFE_TDM_PB_DAI("Quaternary", 1, QUATERNARY_TDM_RX_1),
+	Q6AFE_TDM_PB_DAI("Quaternary", 2, QUATERNARY_TDM_RX_2),
+	Q6AFE_TDM_PB_DAI("Quaternary", 3, QUATERNARY_TDM_RX_3),
+	Q6AFE_TDM_PB_DAI("Quaternary", 4, QUATERNARY_TDM_RX_4),
+	Q6AFE_TDM_PB_DAI("Quaternary", 5, QUATERNARY_TDM_RX_5),
+	Q6AFE_TDM_PB_DAI("Quaternary", 6, QUATERNARY_TDM_RX_6),
+	Q6AFE_TDM_PB_DAI("Quaternary", 7, QUATERNARY_TDM_RX_7),
+	Q6AFE_TDM_CAP_DAI("Quaternary", 0, QUATERNARY_TDM_TX_0),
+	Q6AFE_TDM_CAP_DAI("Quaternary", 1, QUATERNARY_TDM_TX_1),
+	Q6AFE_TDM_CAP_DAI("Quaternary", 2, QUATERNARY_TDM_TX_2),
+	Q6AFE_TDM_CAP_DAI("Quaternary", 3, QUATERNARY_TDM_TX_3),
+	Q6AFE_TDM_CAP_DAI("Quaternary", 4, QUATERNARY_TDM_TX_4),
+	Q6AFE_TDM_CAP_DAI("Quaternary", 5, QUATERNARY_TDM_TX_5),
+	Q6AFE_TDM_CAP_DAI("Quaternary", 6, QUATERNARY_TDM_TX_6),
+	Q6AFE_TDM_CAP_DAI("Quaternary", 7, QUATERNARY_TDM_TX_7),
+	Q6AFE_TDM_PB_DAI("Quinary", 0, QUINARY_TDM_RX_0),
+	Q6AFE_TDM_PB_DAI("Quinary", 1, QUINARY_TDM_RX_1),
+	Q6AFE_TDM_PB_DAI("Quinary", 2, QUINARY_TDM_RX_2),
+	Q6AFE_TDM_PB_DAI("Quinary", 3, QUINARY_TDM_RX_3),
+	Q6AFE_TDM_PB_DAI("Quinary", 4, QUINARY_TDM_RX_4),
+	Q6AFE_TDM_PB_DAI("Quinary", 5, QUINARY_TDM_RX_5),
+	Q6AFE_TDM_PB_DAI("Quinary", 6, QUINARY_TDM_RX_6),
+	Q6AFE_TDM_PB_DAI("Quinary", 7, QUINARY_TDM_RX_7),
+	Q6AFE_TDM_CAP_DAI("Quinary", 0, QUINARY_TDM_TX_0),
+	Q6AFE_TDM_CAP_DAI("Quinary", 1, QUINARY_TDM_TX_1),
+	Q6AFE_TDM_CAP_DAI("Quinary", 2, QUINARY_TDM_TX_2),
+	Q6AFE_TDM_CAP_DAI("Quinary", 3, QUINARY_TDM_TX_3),
+	Q6AFE_TDM_CAP_DAI("Quinary", 4, QUINARY_TDM_TX_4),
+	Q6AFE_TDM_CAP_DAI("Quinary", 5, QUINARY_TDM_TX_5),
+	Q6AFE_TDM_CAP_DAI("Quinary", 6, QUINARY_TDM_TX_6),
+	Q6AFE_TDM_CAP_DAI("Quinary", 7, QUINARY_TDM_TX_7),
+};
+
+static int q6afe_of_xlate_dai_name(struct snd_soc_component *component,
+				   struct of_phandle_args *args,
+				   const char **dai_name)
+{
+	int id = args->args[0];
+	int ret = -EINVAL;
+	int i;
+
+	for (i = 0; i  < ARRAY_SIZE(q6afe_dais); i++) {
+		if (q6afe_dais[i].id == id) {
+			*dai_name = q6afe_dais[i].name;
+			ret = 0;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static const struct snd_soc_dapm_widget q6afe_dai_widgets[] = {
+	SND_SOC_DAPM_AIF_OUT("HDMI_RX", "HDMI Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_0_RX", "Slimbus Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_1_RX", "Slimbus1 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_2_RX", "Slimbus2 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_3_RX", "Slimbus3 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_4_RX", "Slimbus4 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_5_RX", "Slimbus5 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_6_RX", "Slimbus6 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUAT_MI2S_RX", "Quaternary MI2S Playback",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUAT_MI2S_TX", "Quaternary MI2S Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("TERT_MI2S_RX", "Tertiary MI2S Playback",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("TERT_MI2S_TX", "Tertiary MI2S Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SEC_MI2S_RX", "Secondary MI2S Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SEC_MI2S_TX", "Secondary MI2S Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SEC_MI2S_RX_SD1",
+			"Secondary MI2S Playback SD1",
+			0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("PRI_MI2S_RX", "Primary MI2S Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("PRI_MI2S_TX", "Primary MI2S Capture",
+						0, 0, 0, 0),
+
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_0", "Primary TDM0 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_1", "Primary TDM1 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_2", "Primary TDM2 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_3", "Primary TDM3 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_4", "Primary TDM4 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_5", "Primary TDM5 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_6", "Primary TDM6 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_7", "Primary TDM7 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_0", "Primary TDM0 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_1", "Primary TDM1 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_2", "Primary TDM2 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_3", "Primary TDM3 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_4", "Primary TDM4 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_5", "Primary TDM5 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_6", "Primary TDM6 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_7", "Primary TDM7 Capture",
+						0, 0, 0, 0),
+
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_0", "Secondary TDM0 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_1", "Secondary TDM1 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_2", "Secondary TDM2 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_3", "Secondary TDM3 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_4", "Secondary TDM4 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_5", "Secondary TDM5 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_6", "Secondary TDM6 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_7", "Secondary TDM7 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_0", "Secondary TDM0 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_1", "Secondary TDM1 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_2", "Secondary TDM2 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_3", "Secondary TDM3 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_4", "Secondary TDM4 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_5", "Secondary TDM5 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_6", "Secondary TDM6 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_7", "Secondary TDM7 Capture",
+						0, 0, 0, 0),
+
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_0", "Tertiary TDM0 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_1", "Tertiary TDM1 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_2", "Tertiary TDM2 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_3", "Tertiary TDM3 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_4", "Tertiary TDM4 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_5", "Tertiary TDM5 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_6", "Tertiary TDM6 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_7", "Tertiary TDM7 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_0", "Tertiary TDM0 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_1", "Tertiary TDM1 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_2", "Tertiary TDM2 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_3", "Tertiary TDM3 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_4", "Tertiary TDM4 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_5", "Tertiary TDM5 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_6", "Tertiary TDM6 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_7", "Tertiary TDM7 Capture",
+						0, 0, 0, 0),
+
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_0", "Quaternary TDM0 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_1", "Quaternary TDM1 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_2", "Quaternary TDM2 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_3", "Quaternary TDM3 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_4", "Quaternary TDM4 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_5", "Quaternary TDM5 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_6", "Quaternary TDM6 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_7", "Quaternary TDM7 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_0", "Quaternary TDM0 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_1", "Quaternary TDM1 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_2", "Quaternary TDM2 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_3", "Quaternary TDM3 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_4", "Quaternary TDM4 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_5", "Quaternary TDM5 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_6", "Quaternary TDM6 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_7", "Quaternary TDM7 Capture",
+						0, 0, 0, 0),
+
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_0", "Quinary TDM0 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_1", "Quinary TDM1 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_2", "Quinary TDM2 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_3", "Quinary TDM3 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_4", "Quinary TDM4 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_5", "Quinary TDM5 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_6", "Quinary TDM6 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_7", "Quinary TDM7 Playback",
+			     0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_0", "Quinary TDM0 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_1", "Quinary TDM1 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_2", "Quinary TDM2 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_3", "Quinary TDM3 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_4", "Quinary TDM4 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_5", "Quinary TDM5 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_6", "Quinary TDM6 Capture",
+						0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_7", "Quinary TDM7 Capture",
+						0, 0, 0, 0),
+};
+
+static const struct snd_soc_component_driver q6afe_dai_component = {
+	.name		= "q6afe-dai-component",
+	.dapm_widgets = q6afe_dai_widgets,
+	.num_dapm_widgets = ARRAY_SIZE(q6afe_dai_widgets),
+	.dapm_routes = q6afe_dapm_routes,
+	.num_dapm_routes = ARRAY_SIZE(q6afe_dapm_routes),
+	.of_xlate_dai_name = q6afe_of_xlate_dai_name,
+
+};
+
+static void of_q6afe_parse_dai_data(struct device *dev,
+				    struct q6afe_dai_data *data)
+{
+	struct device_node *node;
+	int ret;
+
+	for_each_child_of_node(dev->of_node, node) {
+		unsigned int lines[Q6AFE_MAX_MI2S_LINES];
+		struct q6afe_dai_priv_data *priv;
+		int id, i, num_lines;
+
+		ret = of_property_read_u32(node, "reg", &id);
+		if (ret || id > AFE_PORT_MAX) {
+			dev_err(dev, "valid dai id not found:%d\n", ret);
+			continue;
+		}
+
+		switch (id) {
+		/* MI2S specific properties */
+		case PRIMARY_MI2S_RX ... QUATERNARY_MI2S_TX:
+			priv = &data->priv[id];
+			ret = of_property_read_variable_u32_array(node,
+							"qcom,sd-lines",
+							lines, 0,
+							Q6AFE_MAX_MI2S_LINES);
+			if (ret < 0)
+				num_lines = 0;
+			else
+				num_lines = ret;
+
+			priv->sd_line_mask = 0;
+
+			for (i = 0; i < num_lines; i++)
+				priv->sd_line_mask |= BIT(lines[i]);
+
+			break;
+		case PRIMARY_TDM_RX_0 ... QUINARY_TDM_TX_7:
+			priv = &data->priv[id];
+			ret = of_property_read_u32(node, "qcom,tdm-sync-mode",
+						   &priv->sync_mode);
+			if (ret) {
+				dev_err(dev, "No Sync mode from DT\n");
+				break;
+			}
+			ret = of_property_read_u32(node, "qcom,tdm-sync-src",
+						   &priv->sync_src);
+			if (ret) {
+				dev_err(dev, "No Sync Src from DT\n");
+				break;
+			}
+			ret = of_property_read_u32(node, "qcom,tdm-data-out",
+						   &priv->data_out_enable);
+			if (ret) {
+				dev_err(dev, "No Data out enable from DT\n");
+				break;
+			}
+			ret = of_property_read_u32(node, "qcom,tdm-invert-sync",
+						   &priv->invert_sync);
+			if (ret) {
+				dev_err(dev, "No Invert sync from DT\n");
+				break;
+			}
+			ret = of_property_read_u32(node, "qcom,tdm-data-delay",
+						   &priv->data_delay);
+			if (ret) {
+				dev_err(dev, "No Data Delay from DT\n");
+				break;
+			}
+			ret = of_property_read_u32(node, "qcom,tdm-data-align",
+						   &priv->data_align);
+			if (ret) {
+				dev_err(dev, "No Data align from DT\n");
+				break;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static int q6afe_dai_bind(struct device *dev, struct device *master, void *data)
+{
+	struct q6afe_dai_data *dai_data;
+
+	dai_data = kzalloc(sizeof(*dai_data), GFP_KERNEL);
+	if (!dai_data)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, dai_data);
+
+	of_q6afe_parse_dai_data(dev, dai_data);
+
+	return snd_soc_register_component(dev, &q6afe_dai_component,
+					  q6afe_dais, ARRAY_SIZE(q6afe_dais));
+}
+
+static void q6afe_dai_unbind(struct device *dev, struct device *master,
+			     void *data)
+{
+	struct q6afe_dai_data *dai_data = dev_get_drvdata(dev);
+
+	snd_soc_unregister_component(dev);
+	kfree(dai_data);
+}
+
+static const struct component_ops q6afe_dai_comp_ops = {
+	.bind   = q6afe_dai_bind,
+	.unbind = q6afe_dai_unbind,
+};
+
+static int q6afe_dai_dev_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &q6afe_dai_comp_ops);
+}
+
+static int q6afe_dai_dev_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &q6afe_dai_comp_ops);
+	return 0;
+}
+
+static struct platform_driver q6afe_dai_platform_driver = {
+	.driver = {
+		.name = "q6afe-dai",
+	},
+	.probe = q6afe_dai_dev_probe,
+	.remove = q6afe_dai_dev_remove,
+};
+module_platform_driver(q6afe_dai_platform_driver);
+
+MODULE_DESCRIPTION("Q6 Audio Fronend dai driver");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/qcom/qdsp6/q6afe.c b/sound/soc/qcom/qdsp6/q6afe.c
new file mode 100644
index 0000000..01f4321
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6afe.c
@@ -0,0 +1,1495 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2011-2017, The Linux Foundation. All rights reserved.
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+#include <linux/jiffies.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/kref.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/soc/qcom/apr.h>
+#include <sound/soc.h>
+#include <sound/soc-dai.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include "q6dsp-errno.h"
+#include "q6core.h"
+#include "q6afe.h"
+
+/* AFE CMDs */
+#define AFE_PORT_CMD_DEVICE_START	0x000100E5
+#define AFE_PORT_CMD_DEVICE_STOP	0x000100E6
+#define AFE_PORT_CMD_SET_PARAM_V2	0x000100EF
+#define AFE_SVC_CMD_SET_PARAM		0x000100f3
+#define AFE_PORT_CMDRSP_GET_PARAM_V2	0x00010106
+#define AFE_PARAM_ID_HDMI_CONFIG	0x00010210
+#define AFE_MODULE_AUDIO_DEV_INTERFACE	0x0001020C
+#define AFE_MODULE_TDM			0x0001028A
+
+#define AFE_PARAM_ID_CDC_SLIMBUS_SLAVE_CFG 0x00010235
+
+#define AFE_PARAM_ID_LPAIF_CLK_CONFIG	0x00010238
+#define AFE_PARAM_ID_INT_DIGITAL_CDC_CLK_CONFIG	0x00010239
+
+#define AFE_PARAM_ID_SLIMBUS_CONFIG    0x00010212
+#define AFE_PARAM_ID_I2S_CONFIG	0x0001020D
+#define AFE_PARAM_ID_TDM_CONFIG	0x0001029D
+#define AFE_PARAM_ID_PORT_SLOT_MAPPING_CONFIG	0x00010297
+
+/* I2S config specific */
+#define AFE_API_VERSION_I2S_CONFIG	0x1
+#define AFE_PORT_I2S_SD0		0x1
+#define AFE_PORT_I2S_SD1		0x2
+#define AFE_PORT_I2S_SD2		0x3
+#define AFE_PORT_I2S_SD3		0x4
+#define AFE_PORT_I2S_SD0_MASK		BIT(0x1)
+#define AFE_PORT_I2S_SD1_MASK		BIT(0x2)
+#define AFE_PORT_I2S_SD2_MASK		BIT(0x3)
+#define AFE_PORT_I2S_SD3_MASK		BIT(0x4)
+#define AFE_PORT_I2S_SD0_1_MASK		GENMASK(2, 1)
+#define AFE_PORT_I2S_SD2_3_MASK		GENMASK(4, 3)
+#define AFE_PORT_I2S_SD0_1_2_MASK	GENMASK(3, 1)
+#define AFE_PORT_I2S_SD0_1_2_3_MASK	GENMASK(4, 1)
+#define AFE_PORT_I2S_QUAD01		0x5
+#define AFE_PORT_I2S_QUAD23		0x6
+#define AFE_PORT_I2S_6CHS		0x7
+#define AFE_PORT_I2S_8CHS		0x8
+#define AFE_PORT_I2S_MONO		0x0
+#define AFE_PORT_I2S_STEREO		0x1
+#define AFE_PORT_CONFIG_I2S_WS_SRC_EXTERNAL	0x0
+#define AFE_PORT_CONFIG_I2S_WS_SRC_INTERNAL	0x1
+#define AFE_LINEAR_PCM_DATA				0x0
+
+
+/* Port IDs */
+#define AFE_API_VERSION_HDMI_CONFIG	0x1
+#define AFE_PORT_ID_MULTICHAN_HDMI_RX	0x100E
+
+#define AFE_API_VERSION_SLIMBUS_CONFIG 0x1
+/* Clock set API version */
+#define AFE_API_VERSION_CLOCK_SET 1
+#define Q6AFE_LPASS_CLK_CONFIG_API_VERSION	0x1
+#define AFE_MODULE_CLOCK_SET		0x0001028F
+#define AFE_PARAM_ID_CLOCK_SET		0x00010290
+
+/* SLIMbus Rx port on channel 0. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_0_RX      0x4000
+/* SLIMbus Tx port on channel 0. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_0_TX      0x4001
+/* SLIMbus Rx port on channel 1. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_1_RX      0x4002
+/* SLIMbus Tx port on channel 1. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_1_TX      0x4003
+/* SLIMbus Rx port on channel 2. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_2_RX      0x4004
+/* SLIMbus Tx port on channel 2. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_2_TX      0x4005
+/* SLIMbus Rx port on channel 3. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_3_RX      0x4006
+/* SLIMbus Tx port on channel 3. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_3_TX      0x4007
+/* SLIMbus Rx port on channel 4. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_4_RX      0x4008
+/* SLIMbus Tx port on channel 4. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_4_TX      0x4009
+/* SLIMbus Rx port on channel 5. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_5_RX      0x400a
+/* SLIMbus Tx port on channel 5. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_5_TX      0x400b
+/* SLIMbus Rx port on channel 6. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_6_RX      0x400c
+/* SLIMbus Tx port on channel 6. */
+#define AFE_PORT_ID_SLIMBUS_MULTI_CHAN_6_TX      0x400d
+#define AFE_PORT_ID_PRIMARY_MI2S_RX         0x1000
+#define AFE_PORT_ID_PRIMARY_MI2S_TX         0x1001
+#define AFE_PORT_ID_SECONDARY_MI2S_RX       0x1002
+#define AFE_PORT_ID_SECONDARY_MI2S_TX       0x1003
+#define AFE_PORT_ID_TERTIARY_MI2S_RX        0x1004
+#define AFE_PORT_ID_TERTIARY_MI2S_TX        0x1005
+#define AFE_PORT_ID_QUATERNARY_MI2S_RX      0x1006
+#define AFE_PORT_ID_QUATERNARY_MI2S_TX      0x1007
+
+/* Start of the range of port IDs for TDM devices. */
+#define AFE_PORT_ID_TDM_PORT_RANGE_START	0x9000
+
+/* End of the range of port IDs for TDM devices. */
+#define AFE_PORT_ID_TDM_PORT_RANGE_END \
+	(AFE_PORT_ID_TDM_PORT_RANGE_START+0x50-1)
+
+/* Size of the range of port IDs for TDM ports. */
+#define AFE_PORT_ID_TDM_PORT_RANGE_SIZE \
+	(AFE_PORT_ID_TDM_PORT_RANGE_END - \
+	AFE_PORT_ID_TDM_PORT_RANGE_START+1)
+
+#define AFE_PORT_ID_PRIMARY_TDM_RX \
+	(AFE_PORT_ID_TDM_PORT_RANGE_START + 0x00)
+#define AFE_PORT_ID_PRIMARY_TDM_RX_1 \
+	(AFE_PORT_ID_PRIMARY_TDM_RX + 0x02)
+#define AFE_PORT_ID_PRIMARY_TDM_RX_2 \
+	(AFE_PORT_ID_PRIMARY_TDM_RX + 0x04)
+#define AFE_PORT_ID_PRIMARY_TDM_RX_3 \
+	(AFE_PORT_ID_PRIMARY_TDM_RX + 0x06)
+#define AFE_PORT_ID_PRIMARY_TDM_RX_4 \
+	(AFE_PORT_ID_PRIMARY_TDM_RX + 0x08)
+#define AFE_PORT_ID_PRIMARY_TDM_RX_5 \
+	(AFE_PORT_ID_PRIMARY_TDM_RX + 0x0A)
+#define AFE_PORT_ID_PRIMARY_TDM_RX_6 \
+	(AFE_PORT_ID_PRIMARY_TDM_RX + 0x0C)
+#define AFE_PORT_ID_PRIMARY_TDM_RX_7 \
+	(AFE_PORT_ID_PRIMARY_TDM_RX + 0x0E)
+
+#define AFE_PORT_ID_PRIMARY_TDM_TX \
+	(AFE_PORT_ID_TDM_PORT_RANGE_START + 0x01)
+#define AFE_PORT_ID_PRIMARY_TDM_TX_1 \
+	(AFE_PORT_ID_PRIMARY_TDM_TX + 0x02)
+#define AFE_PORT_ID_PRIMARY_TDM_TX_2 \
+	(AFE_PORT_ID_PRIMARY_TDM_TX + 0x04)
+#define AFE_PORT_ID_PRIMARY_TDM_TX_3 \
+	(AFE_PORT_ID_PRIMARY_TDM_TX + 0x06)
+#define AFE_PORT_ID_PRIMARY_TDM_TX_4 \
+	(AFE_PORT_ID_PRIMARY_TDM_TX + 0x08)
+#define AFE_PORT_ID_PRIMARY_TDM_TX_5 \
+	(AFE_PORT_ID_PRIMARY_TDM_TX + 0x0A)
+#define AFE_PORT_ID_PRIMARY_TDM_TX_6 \
+	(AFE_PORT_ID_PRIMARY_TDM_TX + 0x0C)
+#define AFE_PORT_ID_PRIMARY_TDM_TX_7 \
+	(AFE_PORT_ID_PRIMARY_TDM_TX + 0x0E)
+
+#define AFE_PORT_ID_SECONDARY_TDM_RX \
+	(AFE_PORT_ID_TDM_PORT_RANGE_START + 0x10)
+#define AFE_PORT_ID_SECONDARY_TDM_RX_1 \
+	(AFE_PORT_ID_SECONDARY_TDM_RX + 0x02)
+#define AFE_PORT_ID_SECONDARY_TDM_RX_2 \
+	(AFE_PORT_ID_SECONDARY_TDM_RX + 0x04)
+#define AFE_PORT_ID_SECONDARY_TDM_RX_3 \
+	(AFE_PORT_ID_SECONDARY_TDM_RX + 0x06)
+#define AFE_PORT_ID_SECONDARY_TDM_RX_4 \
+	(AFE_PORT_ID_SECONDARY_TDM_RX + 0x08)
+#define AFE_PORT_ID_SECONDARY_TDM_RX_5 \
+	(AFE_PORT_ID_SECONDARY_TDM_RX + 0x0A)
+#define AFE_PORT_ID_SECONDARY_TDM_RX_6 \
+	(AFE_PORT_ID_SECONDARY_TDM_RX + 0x0C)
+#define AFE_PORT_ID_SECONDARY_TDM_RX_7 \
+	(AFE_PORT_ID_SECONDARY_TDM_RX + 0x0E)
+
+#define AFE_PORT_ID_SECONDARY_TDM_TX \
+	(AFE_PORT_ID_TDM_PORT_RANGE_START + 0x11)
+#define AFE_PORT_ID_SECONDARY_TDM_TX_1 \
+	(AFE_PORT_ID_SECONDARY_TDM_TX + 0x02)
+#define AFE_PORT_ID_SECONDARY_TDM_TX_2 \
+	(AFE_PORT_ID_SECONDARY_TDM_TX + 0x04)
+#define AFE_PORT_ID_SECONDARY_TDM_TX_3 \
+	(AFE_PORT_ID_SECONDARY_TDM_TX + 0x06)
+#define AFE_PORT_ID_SECONDARY_TDM_TX_4 \
+	(AFE_PORT_ID_SECONDARY_TDM_TX + 0x08)
+#define AFE_PORT_ID_SECONDARY_TDM_TX_5 \
+	(AFE_PORT_ID_SECONDARY_TDM_TX + 0x0A)
+#define AFE_PORT_ID_SECONDARY_TDM_TX_6 \
+	(AFE_PORT_ID_SECONDARY_TDM_TX + 0x0C)
+#define AFE_PORT_ID_SECONDARY_TDM_TX_7 \
+	(AFE_PORT_ID_SECONDARY_TDM_TX + 0x0E)
+
+#define AFE_PORT_ID_TERTIARY_TDM_RX \
+	(AFE_PORT_ID_TDM_PORT_RANGE_START + 0x20)
+#define AFE_PORT_ID_TERTIARY_TDM_RX_1 \
+	(AFE_PORT_ID_TERTIARY_TDM_RX + 0x02)
+#define AFE_PORT_ID_TERTIARY_TDM_RX_2 \
+	(AFE_PORT_ID_TERTIARY_TDM_RX + 0x04)
+#define AFE_PORT_ID_TERTIARY_TDM_RX_3 \
+	(AFE_PORT_ID_TERTIARY_TDM_RX + 0x06)
+#define AFE_PORT_ID_TERTIARY_TDM_RX_4 \
+	(AFE_PORT_ID_TERTIARY_TDM_RX + 0x08)
+#define AFE_PORT_ID_TERTIARY_TDM_RX_5 \
+	(AFE_PORT_ID_TERTIARY_TDM_RX + 0x0A)
+#define AFE_PORT_ID_TERTIARY_TDM_RX_6 \
+	(AFE_PORT_ID_TERTIARY_TDM_RX + 0x0C)
+#define AFE_PORT_ID_TERTIARY_TDM_RX_7 \
+	(AFE_PORT_ID_TERTIARY_TDM_RX + 0x0E)
+
+#define AFE_PORT_ID_TERTIARY_TDM_TX \
+	(AFE_PORT_ID_TDM_PORT_RANGE_START + 0x21)
+#define AFE_PORT_ID_TERTIARY_TDM_TX_1 \
+	(AFE_PORT_ID_TERTIARY_TDM_TX + 0x02)
+#define AFE_PORT_ID_TERTIARY_TDM_TX_2 \
+	(AFE_PORT_ID_TERTIARY_TDM_TX + 0x04)
+#define AFE_PORT_ID_TERTIARY_TDM_TX_3 \
+	(AFE_PORT_ID_TERTIARY_TDM_TX + 0x06)
+#define AFE_PORT_ID_TERTIARY_TDM_TX_4 \
+	(AFE_PORT_ID_TERTIARY_TDM_TX + 0x08)
+#define AFE_PORT_ID_TERTIARY_TDM_TX_5 \
+	(AFE_PORT_ID_TERTIARY_TDM_TX + 0x0A)
+#define AFE_PORT_ID_TERTIARY_TDM_TX_6 \
+	(AFE_PORT_ID_TERTIARY_TDM_TX + 0x0C)
+#define AFE_PORT_ID_TERTIARY_TDM_TX_7 \
+	(AFE_PORT_ID_TERTIARY_TDM_TX + 0x0E)
+
+#define AFE_PORT_ID_QUATERNARY_TDM_RX \
+	(AFE_PORT_ID_TDM_PORT_RANGE_START + 0x30)
+#define AFE_PORT_ID_QUATERNARY_TDM_RX_1 \
+	(AFE_PORT_ID_QUATERNARY_TDM_RX + 0x02)
+#define AFE_PORT_ID_QUATERNARY_TDM_RX_2 \
+	(AFE_PORT_ID_QUATERNARY_TDM_RX + 0x04)
+#define AFE_PORT_ID_QUATERNARY_TDM_RX_3 \
+	(AFE_PORT_ID_QUATERNARY_TDM_RX + 0x06)
+#define AFE_PORT_ID_QUATERNARY_TDM_RX_4 \
+	(AFE_PORT_ID_QUATERNARY_TDM_RX + 0x08)
+#define AFE_PORT_ID_QUATERNARY_TDM_RX_5 \
+	(AFE_PORT_ID_QUATERNARY_TDM_RX + 0x0A)
+#define AFE_PORT_ID_QUATERNARY_TDM_RX_6 \
+	(AFE_PORT_ID_QUATERNARY_TDM_RX + 0x0C)
+#define AFE_PORT_ID_QUATERNARY_TDM_RX_7 \
+	(AFE_PORT_ID_QUATERNARY_TDM_RX + 0x0E)
+
+#define AFE_PORT_ID_QUATERNARY_TDM_TX \
+	(AFE_PORT_ID_TDM_PORT_RANGE_START + 0x31)
+#define AFE_PORT_ID_QUATERNARY_TDM_TX_1 \
+	(AFE_PORT_ID_QUATERNARY_TDM_TX + 0x02)
+#define AFE_PORT_ID_QUATERNARY_TDM_TX_2 \
+	(AFE_PORT_ID_QUATERNARY_TDM_TX + 0x04)
+#define AFE_PORT_ID_QUATERNARY_TDM_TX_3 \
+	(AFE_PORT_ID_QUATERNARY_TDM_TX + 0x06)
+#define AFE_PORT_ID_QUATERNARY_TDM_TX_4 \
+	(AFE_PORT_ID_QUATERNARY_TDM_TX + 0x08)
+#define AFE_PORT_ID_QUATERNARY_TDM_TX_5 \
+	(AFE_PORT_ID_QUATERNARY_TDM_TX + 0x0A)
+#define AFE_PORT_ID_QUATERNARY_TDM_TX_6 \
+	(AFE_PORT_ID_QUATERNARY_TDM_TX + 0x0C)
+#define AFE_PORT_ID_QUATERNARY_TDM_TX_7 \
+	(AFE_PORT_ID_QUATERNARY_TDM_TX + 0x0E)
+
+#define AFE_PORT_ID_QUINARY_TDM_RX \
+	(AFE_PORT_ID_TDM_PORT_RANGE_START + 0x40)
+#define AFE_PORT_ID_QUINARY_TDM_RX_1 \
+	(AFE_PORT_ID_QUINARY_TDM_RX + 0x02)
+#define AFE_PORT_ID_QUINARY_TDM_RX_2 \
+	(AFE_PORT_ID_QUINARY_TDM_RX + 0x04)
+#define AFE_PORT_ID_QUINARY_TDM_RX_3 \
+	(AFE_PORT_ID_QUINARY_TDM_RX + 0x06)
+#define AFE_PORT_ID_QUINARY_TDM_RX_4 \
+	(AFE_PORT_ID_QUINARY_TDM_RX + 0x08)
+#define AFE_PORT_ID_QUINARY_TDM_RX_5 \
+	(AFE_PORT_ID_QUINARY_TDM_RX + 0x0A)
+#define AFE_PORT_ID_QUINARY_TDM_RX_6 \
+	(AFE_PORT_ID_QUINARY_TDM_RX + 0x0C)
+#define AFE_PORT_ID_QUINARY_TDM_RX_7 \
+	(AFE_PORT_ID_QUINARY_TDM_RX + 0x0E)
+
+#define AFE_PORT_ID_QUINARY_TDM_TX \
+	(AFE_PORT_ID_TDM_PORT_RANGE_START + 0x41)
+#define AFE_PORT_ID_QUINARY_TDM_TX_1 \
+	(AFE_PORT_ID_QUINARY_TDM_TX + 0x02)
+#define AFE_PORT_ID_QUINARY_TDM_TX_2 \
+	(AFE_PORT_ID_QUINARY_TDM_TX + 0x04)
+#define AFE_PORT_ID_QUINARY_TDM_TX_3 \
+	(AFE_PORT_ID_QUINARY_TDM_TX + 0x06)
+#define AFE_PORT_ID_QUINARY_TDM_TX_4 \
+	(AFE_PORT_ID_QUINARY_TDM_TX + 0x08)
+#define AFE_PORT_ID_QUINARY_TDM_TX_5 \
+	(AFE_PORT_ID_QUINARY_TDM_TX + 0x0A)
+#define AFE_PORT_ID_QUINARY_TDM_TX_6 \
+	(AFE_PORT_ID_QUINARY_TDM_TX + 0x0C)
+#define AFE_PORT_ID_QUINARY_TDM_TX_7 \
+	(AFE_PORT_ID_QUINARY_TDM_TX + 0x0E)
+
+#define Q6AFE_LPASS_MODE_CLK1_VALID 1
+#define Q6AFE_LPASS_MODE_CLK2_VALID 2
+#define Q6AFE_LPASS_CLK_SRC_INTERNAL 1
+#define Q6AFE_LPASS_CLK_ROOT_DEFAULT 0
+#define AFE_API_VERSION_TDM_CONFIG              1
+#define AFE_API_VERSION_SLOT_MAPPING_CONFIG	1
+
+#define TIMEOUT_MS 1000
+#define AFE_CMD_RESP_AVAIL	0
+#define AFE_CMD_RESP_NONE	1
+
+struct q6afe {
+	struct apr_device *apr;
+	struct device *dev;
+	struct q6core_svc_api_info ainfo;
+	struct mutex lock;
+	struct list_head port_list;
+	spinlock_t port_list_lock;
+	struct platform_device *pdev_dais;
+};
+
+struct afe_port_cmd_device_start {
+	u16 port_id;
+	u16 reserved;
+} __packed;
+
+struct afe_port_cmd_device_stop {
+	u16 port_id;
+	u16 reserved;
+/* Reserved for 32-bit alignment. This field must be set to 0.*/
+} __packed;
+
+struct afe_port_param_data_v2 {
+	u32 module_id;
+	u32 param_id;
+	u16 param_size;
+	u16 reserved;
+} __packed;
+
+struct afe_svc_cmd_set_param {
+	uint32_t payload_size;
+	uint32_t payload_address_lsw;
+	uint32_t payload_address_msw;
+	uint32_t mem_map_handle;
+} __packed;
+
+struct afe_port_cmd_set_param_v2 {
+	u16 port_id;
+	u16 payload_size;
+	u32 payload_address_lsw;
+	u32 payload_address_msw;
+	u32 mem_map_handle;
+} __packed;
+
+struct afe_param_id_hdmi_multi_chan_audio_cfg {
+	u32 hdmi_cfg_minor_version;
+	u16 datatype;
+	u16 channel_allocation;
+	u32 sample_rate;
+	u16 bit_width;
+	u16 reserved;
+} __packed;
+
+struct afe_param_id_slimbus_cfg {
+	u32                  sb_cfg_minor_version;
+/* Minor version used for tracking the version of the SLIMBUS
+ * configuration interface.
+ * Supported values: #AFE_API_VERSION_SLIMBUS_CONFIG
+ */
+
+	u16                  slimbus_dev_id;
+/* SLIMbus hardware device ID, which is required to handle
+ * multiple SLIMbus hardware blocks.
+ * Supported values: - #AFE_SLIMBUS_DEVICE_1 - #AFE_SLIMBUS_DEVICE_2
+ */
+	u16                  bit_width;
+/* Bit width of the sample.
+ * Supported values: 16, 24
+ */
+	u16                  data_format;
+/* Data format supported by the SLIMbus hardware. The default is
+ * 0 (#AFE_SB_DATA_FORMAT_NOT_INDICATED), which indicates the
+ * hardware does not perform any format conversions before the data
+ * transfer.
+ */
+	u16                  num_channels;
+/* Number of channels.
+ * Supported values: 1 to #AFE_PORT_MAX_AUDIO_CHAN_CNT
+ */
+	u8  shared_ch_mapping[AFE_PORT_MAX_AUDIO_CHAN_CNT];
+/* Mapping of shared channel IDs (128 to 255) to which the
+ * master port is to be connected.
+ * Shared_channel_mapping[i] represents the shared channel assigned
+ * for audio channel i in multichannel audio data.
+ */
+	u32              sample_rate;
+/* Sampling rate of the port.
+ * Supported values:
+ * - #AFE_PORT_SAMPLE_RATE_8K
+ * - #AFE_PORT_SAMPLE_RATE_16K
+ * - #AFE_PORT_SAMPLE_RATE_48K
+ * - #AFE_PORT_SAMPLE_RATE_96K
+ * - #AFE_PORT_SAMPLE_RATE_192K
+ */
+} __packed;
+
+struct afe_clk_cfg {
+	u32                  i2s_cfg_minor_version;
+	u32                  clk_val1;
+	u32                  clk_val2;
+	u16                  clk_src;
+	u16                  clk_root;
+	u16                  clk_set_mode;
+	u16                  reserved;
+} __packed;
+
+struct afe_digital_clk_cfg {
+	u32                  i2s_cfg_minor_version;
+	u32                  clk_val;
+	u16                  clk_root;
+	u16                  reserved;
+} __packed;
+
+struct afe_param_id_i2s_cfg {
+	u32	i2s_cfg_minor_version;
+	u16	bit_width;
+	u16	channel_mode;
+	u16	mono_stereo;
+	u16	ws_src;
+	u32	sample_rate;
+	u16	data_format;
+	u16	reserved;
+} __packed;
+
+struct afe_param_id_tdm_cfg {
+	u32	tdm_cfg_minor_version;
+	u32	num_channels;
+	u32	sample_rate;
+	u32	bit_width;
+	u16	data_format;
+	u16	sync_mode;
+	u16	sync_src;
+	u16	nslots_per_frame;
+	u16	ctrl_data_out_enable;
+	u16	ctrl_invert_sync_pulse;
+	u16	ctrl_sync_data_delay;
+	u16	slot_width;
+	u32	slot_mask;
+} __packed;
+
+union afe_port_config {
+	struct afe_param_id_hdmi_multi_chan_audio_cfg hdmi_multi_ch;
+	struct afe_param_id_slimbus_cfg           slim_cfg;
+	struct afe_param_id_i2s_cfg	i2s_cfg;
+	struct afe_param_id_tdm_cfg	tdm_cfg;
+} __packed;
+
+
+struct afe_clk_set {
+	uint32_t clk_set_minor_version;
+	uint32_t clk_id;
+	uint32_t clk_freq_in_hz;
+	uint16_t clk_attri;
+	uint16_t clk_root;
+	uint32_t enable;
+};
+
+struct afe_param_id_slot_mapping_cfg {
+	u32	minor_version;
+	u16	num_channels;
+	u16	bitwidth;
+	u32	data_align_type;
+	u16	ch_mapping[AFE_PORT_MAX_AUDIO_CHAN_CNT];
+} __packed;
+
+struct q6afe_port {
+	wait_queue_head_t wait;
+	union afe_port_config port_cfg;
+	struct afe_param_id_slot_mapping_cfg *scfg;
+	struct aprv2_ibasic_rsp_result_t result;
+	int token;
+	int id;
+	int cfg_type;
+	struct q6afe *afe;
+	struct kref refcount;
+	struct list_head node;
+};
+
+struct afe_port_map {
+	int port_id;
+	int token;
+	int is_rx;
+	int is_dig_pcm;
+};
+
+/*
+ * Mapping between Virtual Port IDs to DSP AFE Port ID
+ * On B Family SoCs DSP Port IDs are consistent across multiple SoCs
+ * on A Family SoCs DSP port IDs are same as virtual Port IDs.
+ */
+
+static struct afe_port_map port_maps[AFE_PORT_MAX] = {
+	[HDMI_RX] = { AFE_PORT_ID_MULTICHAN_HDMI_RX, HDMI_RX, 1, 1},
+	[SLIMBUS_0_RX] = { AFE_PORT_ID_SLIMBUS_MULTI_CHAN_0_RX,
+				SLIMBUS_0_RX, 1, 1},
+	[SLIMBUS_1_RX] = { AFE_PORT_ID_SLIMBUS_MULTI_CHAN_1_RX,
+				SLIMBUS_1_RX, 1, 1},
+	[SLIMBUS_2_RX] = { AFE_PORT_ID_SLIMBUS_MULTI_CHAN_2_RX,
+				SLIMBUS_2_RX, 1, 1},
+	[SLIMBUS_3_RX] = { AFE_PORT_ID_SLIMBUS_MULTI_CHAN_3_RX,
+				SLIMBUS_3_RX, 1, 1},
+	[SLIMBUS_4_RX] = { AFE_PORT_ID_SLIMBUS_MULTI_CHAN_4_RX,
+				SLIMBUS_4_RX, 1, 1},
+	[SLIMBUS_5_RX] = { AFE_PORT_ID_SLIMBUS_MULTI_CHAN_5_RX,
+				SLIMBUS_5_RX, 1, 1},
+	[SLIMBUS_6_RX] = { AFE_PORT_ID_SLIMBUS_MULTI_CHAN_6_RX,
+				SLIMBUS_6_RX, 1, 1},
+	[PRIMARY_MI2S_RX] = { AFE_PORT_ID_PRIMARY_MI2S_RX,
+				PRIMARY_MI2S_RX, 1, 1},
+	[PRIMARY_MI2S_TX] = { AFE_PORT_ID_PRIMARY_MI2S_TX,
+				PRIMARY_MI2S_RX, 0, 1},
+	[SECONDARY_MI2S_RX] = { AFE_PORT_ID_SECONDARY_MI2S_RX,
+				SECONDARY_MI2S_RX, 1, 1},
+	[SECONDARY_MI2S_TX] = { AFE_PORT_ID_SECONDARY_MI2S_TX,
+				SECONDARY_MI2S_TX, 0, 1},
+	[TERTIARY_MI2S_RX] = { AFE_PORT_ID_TERTIARY_MI2S_RX,
+				TERTIARY_MI2S_RX, 1, 1},
+	[TERTIARY_MI2S_TX] = { AFE_PORT_ID_TERTIARY_MI2S_TX,
+				TERTIARY_MI2S_TX, 0, 1},
+	[QUATERNARY_MI2S_RX] = { AFE_PORT_ID_QUATERNARY_MI2S_RX,
+				QUATERNARY_MI2S_RX, 1, 1},
+	[QUATERNARY_MI2S_TX] = { AFE_PORT_ID_QUATERNARY_MI2S_TX,
+				QUATERNARY_MI2S_TX, 0, 1},
+	[PRIMARY_TDM_RX_0] =  { AFE_PORT_ID_PRIMARY_TDM_RX,
+				PRIMARY_TDM_RX_0, 1, 1},
+	[PRIMARY_TDM_TX_0] =  { AFE_PORT_ID_PRIMARY_TDM_TX,
+				PRIMARY_TDM_TX_0, 0, 1},
+	[PRIMARY_TDM_RX_1] =  { AFE_PORT_ID_PRIMARY_TDM_RX_1,
+				PRIMARY_TDM_RX_1, 1, 1},
+	[PRIMARY_TDM_TX_1] =  { AFE_PORT_ID_PRIMARY_TDM_TX_1,
+				PRIMARY_TDM_TX_1, 0, 1},
+	[PRIMARY_TDM_RX_2] =  { AFE_PORT_ID_PRIMARY_TDM_RX_2,
+				PRIMARY_TDM_RX_2, 1, 1},
+	[PRIMARY_TDM_TX_2] =  { AFE_PORT_ID_PRIMARY_TDM_TX_2,
+				PRIMARY_TDM_TX_2, 0, 1},
+	[PRIMARY_TDM_RX_3] =  { AFE_PORT_ID_PRIMARY_TDM_RX_3,
+				PRIMARY_TDM_RX_3, 1, 1},
+	[PRIMARY_TDM_TX_3] =  { AFE_PORT_ID_PRIMARY_TDM_TX_3,
+				PRIMARY_TDM_TX_3, 0, 1},
+	[PRIMARY_TDM_RX_4] =  { AFE_PORT_ID_PRIMARY_TDM_RX_4,
+				PRIMARY_TDM_RX_4, 1, 1},
+	[PRIMARY_TDM_TX_4] =  { AFE_PORT_ID_PRIMARY_TDM_TX_4,
+				PRIMARY_TDM_TX_4, 0, 1},
+	[PRIMARY_TDM_RX_5] =  { AFE_PORT_ID_PRIMARY_TDM_RX_5,
+				PRIMARY_TDM_RX_5, 1, 1},
+	[PRIMARY_TDM_TX_5] =  { AFE_PORT_ID_PRIMARY_TDM_TX_5,
+				PRIMARY_TDM_TX_5, 0, 1},
+	[PRIMARY_TDM_RX_6] =  { AFE_PORT_ID_PRIMARY_TDM_RX_6,
+				PRIMARY_TDM_RX_6, 1, 1},
+	[PRIMARY_TDM_TX_6] =  { AFE_PORT_ID_PRIMARY_TDM_TX_6,
+				PRIMARY_TDM_TX_6, 0, 1},
+	[PRIMARY_TDM_RX_7] =  { AFE_PORT_ID_PRIMARY_TDM_RX_7,
+				PRIMARY_TDM_RX_7, 1, 1},
+	[PRIMARY_TDM_TX_7] =  { AFE_PORT_ID_PRIMARY_TDM_TX_7,
+				PRIMARY_TDM_TX_7, 0, 1},
+	[SECONDARY_TDM_RX_0] =  { AFE_PORT_ID_SECONDARY_TDM_RX,
+				SECONDARY_TDM_RX_0, 1, 1},
+	[SECONDARY_TDM_TX_0] =  { AFE_PORT_ID_SECONDARY_TDM_TX,
+				SECONDARY_TDM_TX_0, 0, 1},
+	[SECONDARY_TDM_RX_1] =  { AFE_PORT_ID_SECONDARY_TDM_RX_1,
+				SECONDARY_TDM_RX_1, 1, 1},
+	[SECONDARY_TDM_TX_1] =  { AFE_PORT_ID_SECONDARY_TDM_TX_1,
+				SECONDARY_TDM_TX_1, 0, 1},
+	[SECONDARY_TDM_RX_2] =  { AFE_PORT_ID_SECONDARY_TDM_RX_2,
+				SECONDARY_TDM_RX_2, 1, 1},
+	[SECONDARY_TDM_TX_2] =  { AFE_PORT_ID_SECONDARY_TDM_TX_2,
+				SECONDARY_TDM_TX_2, 0, 1},
+	[SECONDARY_TDM_RX_3] =  { AFE_PORT_ID_SECONDARY_TDM_RX_3,
+				SECONDARY_TDM_RX_3, 1, 1},
+	[SECONDARY_TDM_TX_3] =  { AFE_PORT_ID_SECONDARY_TDM_TX_3,
+				SECONDARY_TDM_TX_3, 0, 1},
+	[SECONDARY_TDM_RX_4] =  { AFE_PORT_ID_SECONDARY_TDM_RX_4,
+				SECONDARY_TDM_RX_4, 1, 1},
+	[SECONDARY_TDM_TX_4] =  { AFE_PORT_ID_SECONDARY_TDM_TX_4,
+				SECONDARY_TDM_TX_4, 0, 1},
+	[SECONDARY_TDM_RX_5] =  { AFE_PORT_ID_SECONDARY_TDM_RX_5,
+				SECONDARY_TDM_RX_5, 1, 1},
+	[SECONDARY_TDM_TX_5] =  { AFE_PORT_ID_SECONDARY_TDM_TX_5,
+				SECONDARY_TDM_TX_5, 0, 1},
+	[SECONDARY_TDM_RX_6] =  { AFE_PORT_ID_SECONDARY_TDM_RX_6,
+				SECONDARY_TDM_RX_6, 1, 1},
+	[SECONDARY_TDM_TX_6] =  { AFE_PORT_ID_SECONDARY_TDM_TX_6,
+				SECONDARY_TDM_TX_6, 0, 1},
+	[SECONDARY_TDM_RX_7] =  { AFE_PORT_ID_SECONDARY_TDM_RX_7,
+				SECONDARY_TDM_RX_7, 1, 1},
+	[SECONDARY_TDM_TX_7] =  { AFE_PORT_ID_SECONDARY_TDM_TX_7,
+				SECONDARY_TDM_TX_7, 0, 1},
+	[TERTIARY_TDM_RX_0] =  { AFE_PORT_ID_TERTIARY_TDM_RX,
+				TERTIARY_TDM_RX_0, 1, 1},
+	[TERTIARY_TDM_TX_0] =  { AFE_PORT_ID_TERTIARY_TDM_TX,
+				TERTIARY_TDM_TX_0, 0, 1},
+	[TERTIARY_TDM_RX_1] =  { AFE_PORT_ID_TERTIARY_TDM_RX_1,
+				TERTIARY_TDM_RX_1, 1, 1},
+	[TERTIARY_TDM_TX_1] =  { AFE_PORT_ID_TERTIARY_TDM_TX_1,
+				TERTIARY_TDM_TX_1, 0, 1},
+	[TERTIARY_TDM_RX_2] =  { AFE_PORT_ID_TERTIARY_TDM_RX_2,
+				TERTIARY_TDM_RX_2, 1, 1},
+	[TERTIARY_TDM_TX_2] =  { AFE_PORT_ID_TERTIARY_TDM_TX_2,
+				TERTIARY_TDM_TX_2, 0, 1},
+	[TERTIARY_TDM_RX_3] =  { AFE_PORT_ID_TERTIARY_TDM_RX_3,
+				TERTIARY_TDM_RX_3, 1, 1},
+	[TERTIARY_TDM_TX_3] =  { AFE_PORT_ID_TERTIARY_TDM_TX_3,
+				TERTIARY_TDM_TX_3, 0, 1},
+	[TERTIARY_TDM_RX_4] =  { AFE_PORT_ID_TERTIARY_TDM_RX_4,
+				TERTIARY_TDM_RX_4, 1, 1},
+	[TERTIARY_TDM_TX_4] =  { AFE_PORT_ID_TERTIARY_TDM_TX_4,
+				TERTIARY_TDM_TX_4, 0, 1},
+	[TERTIARY_TDM_RX_5] =  { AFE_PORT_ID_TERTIARY_TDM_RX_5,
+				TERTIARY_TDM_RX_5, 1, 1},
+	[TERTIARY_TDM_TX_5] =  { AFE_PORT_ID_TERTIARY_TDM_TX_5,
+				TERTIARY_TDM_TX_5, 0, 1},
+	[TERTIARY_TDM_RX_6] =  { AFE_PORT_ID_TERTIARY_TDM_RX_6,
+				TERTIARY_TDM_RX_6, 1, 1},
+	[TERTIARY_TDM_TX_6] =  { AFE_PORT_ID_TERTIARY_TDM_TX_6,
+				TERTIARY_TDM_TX_6, 0, 1},
+	[TERTIARY_TDM_RX_7] =  { AFE_PORT_ID_TERTIARY_TDM_RX_7,
+				TERTIARY_TDM_RX_7, 1, 1},
+	[TERTIARY_TDM_TX_7] =  { AFE_PORT_ID_TERTIARY_TDM_TX_7,
+				TERTIARY_TDM_TX_7, 0, 1},
+	[QUATERNARY_TDM_RX_0] =  { AFE_PORT_ID_QUATERNARY_TDM_RX,
+				QUATERNARY_TDM_RX_0, 1, 1},
+	[QUATERNARY_TDM_TX_0] =  { AFE_PORT_ID_QUATERNARY_TDM_TX,
+				QUATERNARY_TDM_TX_0, 0, 1},
+	[QUATERNARY_TDM_RX_1] =  { AFE_PORT_ID_QUATERNARY_TDM_RX_1,
+				QUATERNARY_TDM_RX_1, 1, 1},
+	[QUATERNARY_TDM_TX_1] =  { AFE_PORT_ID_QUATERNARY_TDM_TX_1,
+				QUATERNARY_TDM_TX_1, 0, 1},
+	[QUATERNARY_TDM_RX_2] =  { AFE_PORT_ID_QUATERNARY_TDM_RX_2,
+				QUATERNARY_TDM_RX_2, 1, 1},
+	[QUATERNARY_TDM_TX_2] =  { AFE_PORT_ID_QUATERNARY_TDM_TX_2,
+				QUATERNARY_TDM_TX_2, 0, 1},
+	[QUATERNARY_TDM_RX_3] =  { AFE_PORT_ID_QUATERNARY_TDM_RX_3,
+				QUATERNARY_TDM_RX_3, 1, 1},
+	[QUATERNARY_TDM_TX_3] =  { AFE_PORT_ID_QUATERNARY_TDM_TX_3,
+				QUATERNARY_TDM_TX_3, 0, 1},
+	[QUATERNARY_TDM_RX_4] =  { AFE_PORT_ID_QUATERNARY_TDM_RX_4,
+				QUATERNARY_TDM_RX_4, 1, 1},
+	[QUATERNARY_TDM_TX_4] =  { AFE_PORT_ID_QUATERNARY_TDM_TX_4,
+				QUATERNARY_TDM_TX_4, 0, 1},
+	[QUATERNARY_TDM_RX_5] =  { AFE_PORT_ID_QUATERNARY_TDM_RX_5,
+				QUATERNARY_TDM_RX_5, 1, 1},
+	[QUATERNARY_TDM_TX_5] =  { AFE_PORT_ID_QUATERNARY_TDM_TX_5,
+				QUATERNARY_TDM_TX_5, 0, 1},
+	[QUATERNARY_TDM_RX_6] =  { AFE_PORT_ID_QUATERNARY_TDM_RX_6,
+				QUATERNARY_TDM_RX_6, 1, 1},
+	[QUATERNARY_TDM_TX_6] =  { AFE_PORT_ID_QUATERNARY_TDM_TX_6,
+				QUATERNARY_TDM_TX_6, 0, 1},
+	[QUATERNARY_TDM_RX_7] =  { AFE_PORT_ID_QUATERNARY_TDM_RX_7,
+				QUATERNARY_TDM_RX_7, 1, 1},
+	[QUATERNARY_TDM_TX_7] =  { AFE_PORT_ID_QUATERNARY_TDM_TX_7,
+				QUATERNARY_TDM_TX_7, 0, 1},
+	[QUINARY_TDM_RX_0] =  { AFE_PORT_ID_QUINARY_TDM_RX,
+				QUINARY_TDM_RX_0, 1, 1},
+	[QUINARY_TDM_TX_0] =  { AFE_PORT_ID_QUINARY_TDM_TX,
+				QUINARY_TDM_TX_0, 0, 1},
+	[QUINARY_TDM_RX_1] =  { AFE_PORT_ID_QUINARY_TDM_RX_1,
+				QUINARY_TDM_RX_1, 1, 1},
+	[QUINARY_TDM_TX_1] =  { AFE_PORT_ID_QUINARY_TDM_TX_1,
+				QUINARY_TDM_TX_1, 0, 1},
+	[QUINARY_TDM_RX_2] =  { AFE_PORT_ID_QUINARY_TDM_RX_2,
+				QUINARY_TDM_RX_2, 1, 1},
+	[QUINARY_TDM_TX_2] =  { AFE_PORT_ID_QUINARY_TDM_TX_2,
+				QUINARY_TDM_TX_2, 0, 1},
+	[QUINARY_TDM_RX_3] =  { AFE_PORT_ID_QUINARY_TDM_RX_3,
+				QUINARY_TDM_RX_3, 1, 1},
+	[QUINARY_TDM_TX_3] =  { AFE_PORT_ID_QUINARY_TDM_TX_3,
+				QUINARY_TDM_TX_3, 0, 1},
+	[QUINARY_TDM_RX_4] =  { AFE_PORT_ID_QUINARY_TDM_RX_4,
+				QUINARY_TDM_RX_4, 1, 1},
+	[QUINARY_TDM_TX_4] =  { AFE_PORT_ID_QUINARY_TDM_TX_4,
+				QUINARY_TDM_TX_4, 0, 1},
+	[QUINARY_TDM_RX_5] =  { AFE_PORT_ID_QUINARY_TDM_RX_5,
+				QUINARY_TDM_RX_5, 1, 1},
+	[QUINARY_TDM_TX_5] =  { AFE_PORT_ID_QUINARY_TDM_TX_5,
+				QUINARY_TDM_TX_5, 0, 1},
+	[QUINARY_TDM_RX_6] =  { AFE_PORT_ID_QUINARY_TDM_RX_6,
+				QUINARY_TDM_RX_6, 1, 1},
+	[QUINARY_TDM_TX_6] =  { AFE_PORT_ID_QUINARY_TDM_TX_6,
+				QUINARY_TDM_TX_6, 0, 1},
+	[QUINARY_TDM_RX_7] =  { AFE_PORT_ID_QUINARY_TDM_RX_7,
+				QUINARY_TDM_RX_7, 1, 1},
+	[QUINARY_TDM_TX_7] =  { AFE_PORT_ID_QUINARY_TDM_TX_7,
+				QUINARY_TDM_TX_7, 0, 1},
+};
+
+static void q6afe_port_free(struct kref *ref)
+{
+	struct q6afe_port *port;
+	struct q6afe *afe;
+	unsigned long flags;
+
+	port = container_of(ref, struct q6afe_port, refcount);
+	afe = port->afe;
+	spin_lock_irqsave(&afe->port_list_lock, flags);
+	list_del(&port->node);
+	spin_unlock_irqrestore(&afe->port_list_lock, flags);
+	kfree(port->scfg);
+	kfree(port);
+}
+
+static struct q6afe_port *q6afe_find_port(struct q6afe *afe, int token)
+{
+	struct q6afe_port *p = NULL;
+	struct q6afe_port *ret = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&afe->port_list_lock, flags);
+	list_for_each_entry(p, &afe->port_list, node)
+		if (p->token == token) {
+			ret = p;
+			kref_get(&p->refcount);
+			break;
+		}
+
+	spin_unlock_irqrestore(&afe->port_list_lock, flags);
+	return ret;
+}
+
+static int q6afe_callback(struct apr_device *adev, struct apr_resp_pkt *data)
+{
+	struct q6afe *afe = dev_get_drvdata(&adev->dev);
+	struct aprv2_ibasic_rsp_result_t *res;
+	struct apr_hdr *hdr = &data->hdr;
+	struct q6afe_port *port;
+
+	if (!data->payload_size)
+		return 0;
+
+	res = data->payload;
+	switch (hdr->opcode) {
+	case APR_BASIC_RSP_RESULT: {
+		if (res->status) {
+			dev_err(afe->dev, "cmd = 0x%x returned error = 0x%x\n",
+				res->opcode, res->status);
+		}
+		switch (res->opcode) {
+		case AFE_PORT_CMD_SET_PARAM_V2:
+		case AFE_PORT_CMD_DEVICE_STOP:
+		case AFE_PORT_CMD_DEVICE_START:
+		case AFE_SVC_CMD_SET_PARAM:
+			port = q6afe_find_port(afe, hdr->token);
+			if (port) {
+				port->result = *res;
+				wake_up(&port->wait);
+				kref_put(&port->refcount, q6afe_port_free);
+			}
+			break;
+		default:
+			dev_err(afe->dev, "Unknown cmd 0x%x\n",	res->opcode);
+			break;
+		}
+	}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * q6afe_get_port_id() - Get port id from a given port index
+ *
+ * @index: port index
+ *
+ * Return: Will be an negative on error or valid port_id on success
+ */
+int q6afe_get_port_id(int index)
+{
+	if (index < 0 || index > AFE_PORT_MAX)
+		return -EINVAL;
+
+	return port_maps[index].port_id;
+}
+EXPORT_SYMBOL_GPL(q6afe_get_port_id);
+
+static int afe_apr_send_pkt(struct q6afe *afe, struct apr_pkt *pkt,
+			    struct q6afe_port *port)
+{
+	wait_queue_head_t *wait = &port->wait;
+	struct apr_hdr *hdr = &pkt->hdr;
+	int ret;
+
+	mutex_lock(&afe->lock);
+	port->result.opcode = 0;
+	port->result.status = 0;
+
+	ret = apr_send_pkt(afe->apr, pkt);
+	if (ret < 0) {
+		dev_err(afe->dev, "packet not transmitted (%d)\n", ret);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = wait_event_timeout(*wait, (port->result.opcode == hdr->opcode),
+				 msecs_to_jiffies(TIMEOUT_MS));
+	if (!ret) {
+		ret = -ETIMEDOUT;
+	} else if (port->result.status > 0) {
+		dev_err(afe->dev, "DSP returned error[%x]\n",
+			port->result.status);
+		ret = -EINVAL;
+	} else {
+		ret = 0;
+	}
+
+err:
+	mutex_unlock(&afe->lock);
+
+	return ret;
+}
+
+static int q6afe_port_set_param(struct q6afe_port *port, void *data,
+				int param_id, int module_id, int psize)
+{
+	struct afe_svc_cmd_set_param *param;
+	struct afe_port_param_data_v2 *pdata;
+	struct q6afe *afe = port->afe;
+	struct apr_pkt *pkt;
+	u16 port_id = port->id;
+	int ret, pkt_size;
+	void *p, *pl;
+
+	pkt_size = APR_HDR_SIZE + sizeof(*param) + sizeof(*pdata) + psize;
+	p = kzalloc(pkt_size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	param = p + APR_HDR_SIZE;
+	pdata = p + APR_HDR_SIZE + sizeof(*param);
+	pl = p + APR_HDR_SIZE + sizeof(*param) + sizeof(*pdata);
+	memcpy(pl, data, psize);
+
+	pkt->hdr.hdr_field = APR_HDR_FIELD(APR_MSG_TYPE_SEQ_CMD,
+					   APR_HDR_LEN(APR_HDR_SIZE),
+					   APR_PKT_VER);
+	pkt->hdr.pkt_size = pkt_size;
+	pkt->hdr.src_port = 0;
+	pkt->hdr.dest_port = 0;
+	pkt->hdr.token = port->token;
+	pkt->hdr.opcode = AFE_SVC_CMD_SET_PARAM;
+
+	param->payload_size = sizeof(*pdata) + psize;
+	param->payload_address_lsw = 0x00;
+	param->payload_address_msw = 0x00;
+	param->mem_map_handle = 0x00;
+	pdata->module_id = module_id;
+	pdata->param_id = param_id;
+	pdata->param_size = psize;
+
+	ret = afe_apr_send_pkt(afe, pkt, port);
+	if (ret)
+		dev_err(afe->dev, "AFE enable for port 0x%x failed %d\n",
+		       port_id, ret);
+
+	kfree(pkt);
+	return ret;
+}
+
+static int q6afe_port_set_param_v2(struct q6afe_port *port, void *data,
+				   int param_id, int module_id, int psize)
+{
+	struct afe_port_cmd_set_param_v2 *param;
+	struct afe_port_param_data_v2 *pdata;
+	struct q6afe *afe = port->afe;
+	struct apr_pkt *pkt;
+	u16 port_id = port->id;
+	int ret, pkt_size;
+	void *p, *pl;
+
+	pkt_size = APR_HDR_SIZE + sizeof(*param) + sizeof(*pdata) + psize;
+	p = kzalloc(pkt_size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	param = p + APR_HDR_SIZE;
+	pdata = p + APR_HDR_SIZE + sizeof(*param);
+	pl = p + APR_HDR_SIZE + sizeof(*param) + sizeof(*pdata);
+	memcpy(pl, data, psize);
+
+	pkt->hdr.hdr_field = APR_HDR_FIELD(APR_MSG_TYPE_SEQ_CMD,
+					   APR_HDR_LEN(APR_HDR_SIZE),
+					   APR_PKT_VER);
+	pkt->hdr.pkt_size = pkt_size;
+	pkt->hdr.src_port = 0;
+	pkt->hdr.dest_port = 0;
+	pkt->hdr.token = port->token;
+	pkt->hdr.opcode = AFE_PORT_CMD_SET_PARAM_V2;
+
+	param->port_id = port_id;
+	param->payload_size = sizeof(*pdata) + psize;
+	param->payload_address_lsw = 0x00;
+	param->payload_address_msw = 0x00;
+	param->mem_map_handle = 0x00;
+	pdata->module_id = module_id;
+	pdata->param_id = param_id;
+	pdata->param_size = psize;
+
+	ret = afe_apr_send_pkt(afe, pkt, port);
+	if (ret)
+		dev_err(afe->dev, "AFE enable for port 0x%x failed %d\n",
+		       port_id, ret);
+
+	kfree(pkt);
+	return ret;
+}
+
+static int q6afe_set_lpass_clock(struct q6afe_port *port,
+				 struct afe_clk_cfg *cfg)
+{
+	return q6afe_port_set_param_v2(port, cfg,
+				       AFE_PARAM_ID_LPAIF_CLK_CONFIG,
+				       AFE_MODULE_AUDIO_DEV_INTERFACE,
+				       sizeof(*cfg));
+}
+
+static int q6afe_set_lpass_clock_v2(struct q6afe_port *port,
+				 struct afe_clk_set *cfg)
+{
+	return q6afe_port_set_param(port, cfg, AFE_PARAM_ID_CLOCK_SET,
+				    AFE_MODULE_CLOCK_SET, sizeof(*cfg));
+}
+
+static int q6afe_set_digital_codec_core_clock(struct q6afe_port *port,
+					      struct afe_digital_clk_cfg *cfg)
+{
+	return q6afe_port_set_param_v2(port, cfg,
+				       AFE_PARAM_ID_INT_DIGITAL_CDC_CLK_CONFIG,
+				       AFE_MODULE_AUDIO_DEV_INTERFACE,
+				       sizeof(*cfg));
+}
+
+int q6afe_port_set_sysclk(struct q6afe_port *port, int clk_id,
+			  int clk_src, int clk_root,
+			  unsigned int freq, int dir)
+{
+	struct afe_clk_cfg ccfg = {0,};
+	struct afe_clk_set cset = {0,};
+	struct afe_digital_clk_cfg dcfg = {0,};
+	int ret;
+
+	switch (clk_id) {
+	case LPAIF_DIG_CLK:
+		dcfg.i2s_cfg_minor_version = AFE_API_VERSION_I2S_CONFIG;
+		dcfg.clk_val = freq;
+		dcfg.clk_root = clk_root;
+		ret = q6afe_set_digital_codec_core_clock(port, &dcfg);
+		break;
+	case LPAIF_BIT_CLK:
+		ccfg.i2s_cfg_minor_version = AFE_API_VERSION_I2S_CONFIG;
+		ccfg.clk_val1 = freq;
+		ccfg.clk_src = clk_src;
+		ccfg.clk_root = clk_root;
+		ccfg.clk_set_mode = Q6AFE_LPASS_MODE_CLK1_VALID;
+		ret = q6afe_set_lpass_clock(port, &ccfg);
+		break;
+
+	case LPAIF_OSR_CLK:
+		ccfg.i2s_cfg_minor_version = AFE_API_VERSION_I2S_CONFIG;
+		ccfg.clk_val2 = freq;
+		ccfg.clk_src = clk_src;
+		ccfg.clk_root = clk_root;
+		ccfg.clk_set_mode = Q6AFE_LPASS_MODE_CLK2_VALID;
+		ret = q6afe_set_lpass_clock(port, &ccfg);
+		break;
+	case Q6AFE_LPASS_CLK_ID_PRI_MI2S_IBIT ... Q6AFE_LPASS_CLK_ID_QUI_MI2S_OSR:
+	case Q6AFE_LPASS_CLK_ID_MCLK_1 ... Q6AFE_LPASS_CLK_ID_INT_MCLK_1:
+	case Q6AFE_LPASS_CLK_ID_PRI_TDM_IBIT ... Q6AFE_LPASS_CLK_ID_QUIN_TDM_EBIT:
+		cset.clk_set_minor_version = AFE_API_VERSION_CLOCK_SET;
+		cset.clk_id = clk_id;
+		cset.clk_freq_in_hz = freq;
+		cset.clk_attri = clk_src;
+		cset.clk_root = clk_root;
+		cset.enable = !!freq;
+		ret = q6afe_set_lpass_clock_v2(port, &cset);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(q6afe_port_set_sysclk);
+
+/**
+ * q6afe_port_stop() - Stop a afe port
+ *
+ * @port: Instance of port to stop
+ *
+ * Return: Will be an negative on packet size on success.
+ */
+int q6afe_port_stop(struct q6afe_port *port)
+{
+	struct afe_port_cmd_device_stop *stop;
+	struct q6afe *afe = port->afe;
+	struct apr_pkt *pkt;
+	int port_id = port->id;
+	int ret = 0;
+	int index, pkt_size;
+	void *p;
+
+	port_id = port->id;
+	index = port->token;
+	if (index < 0 || index > AFE_PORT_MAX) {
+		dev_err(afe->dev, "AFE port index[%d] invalid!\n", index);
+		return -EINVAL;
+	}
+
+	pkt_size = APR_HDR_SIZE + sizeof(*stop);
+	p = kzalloc(pkt_size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	stop = p + APR_HDR_SIZE;
+
+	pkt->hdr.hdr_field = APR_HDR_FIELD(APR_MSG_TYPE_SEQ_CMD,
+					   APR_HDR_LEN(APR_HDR_SIZE),
+					   APR_PKT_VER);
+	pkt->hdr.pkt_size = pkt_size;
+	pkt->hdr.src_port = 0;
+	pkt->hdr.dest_port = 0;
+	pkt->hdr.token = index;
+	pkt->hdr.opcode = AFE_PORT_CMD_DEVICE_STOP;
+	stop->port_id = port_id;
+	stop->reserved = 0;
+
+	ret = afe_apr_send_pkt(afe, pkt, port);
+	if (ret)
+		dev_err(afe->dev, "AFE close failed %d\n", ret);
+
+	kfree(pkt);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(q6afe_port_stop);
+
+/**
+ * q6afe_slim_port_prepare() - Prepare slim afe port.
+ *
+ * @port: Instance of afe port
+ * @cfg: SLIM configuration for the afe port
+ *
+ */
+void q6afe_slim_port_prepare(struct q6afe_port *port,
+			     struct q6afe_slim_cfg *cfg)
+{
+	union afe_port_config *pcfg = &port->port_cfg;
+
+	pcfg->slim_cfg.sb_cfg_minor_version = AFE_API_VERSION_SLIMBUS_CONFIG;
+	pcfg->slim_cfg.sample_rate = cfg->sample_rate;
+	pcfg->slim_cfg.bit_width = cfg->bit_width;
+	pcfg->slim_cfg.num_channels = cfg->num_channels;
+	pcfg->slim_cfg.data_format = cfg->data_format;
+	pcfg->slim_cfg.shared_ch_mapping[0] = cfg->ch_mapping[0];
+	pcfg->slim_cfg.shared_ch_mapping[1] = cfg->ch_mapping[1];
+	pcfg->slim_cfg.shared_ch_mapping[2] = cfg->ch_mapping[2];
+	pcfg->slim_cfg.shared_ch_mapping[3] = cfg->ch_mapping[3];
+
+}
+EXPORT_SYMBOL_GPL(q6afe_slim_port_prepare);
+
+/**
+ * q6afe_tdm_port_prepare() - Prepare tdm afe port.
+ *
+ * @port: Instance of afe port
+ * @cfg: TDM configuration for the afe port
+ *
+ */
+void q6afe_tdm_port_prepare(struct q6afe_port *port,
+			     struct q6afe_tdm_cfg *cfg)
+{
+	union afe_port_config *pcfg = &port->port_cfg;
+
+	pcfg->tdm_cfg.tdm_cfg_minor_version = AFE_API_VERSION_TDM_CONFIG;
+	pcfg->tdm_cfg.num_channels = cfg->num_channels;
+	pcfg->tdm_cfg.sample_rate = cfg->sample_rate;
+	pcfg->tdm_cfg.bit_width = cfg->bit_width;
+	pcfg->tdm_cfg.data_format = cfg->data_format;
+	pcfg->tdm_cfg.sync_mode = cfg->sync_mode;
+	pcfg->tdm_cfg.sync_src = cfg->sync_src;
+	pcfg->tdm_cfg.nslots_per_frame = cfg->nslots_per_frame;
+
+	pcfg->tdm_cfg.slot_width = cfg->slot_width;
+	pcfg->tdm_cfg.slot_mask = cfg->slot_mask;
+	port->scfg = kzalloc(sizeof(*port->scfg), GFP_KERNEL);
+	if (!port->scfg)
+		return;
+
+	port->scfg->minor_version = AFE_API_VERSION_SLOT_MAPPING_CONFIG;
+	port->scfg->num_channels = cfg->num_channels;
+	port->scfg->bitwidth = cfg->bit_width;
+	port->scfg->data_align_type = cfg->data_align_type;
+	memcpy(port->scfg->ch_mapping, cfg->ch_mapping,
+			sizeof(u16) * AFE_PORT_MAX_AUDIO_CHAN_CNT);
+}
+EXPORT_SYMBOL_GPL(q6afe_tdm_port_prepare);
+
+/**
+ * q6afe_hdmi_port_prepare() - Prepare hdmi afe port.
+ *
+ * @port: Instance of afe port
+ * @cfg: HDMI configuration for the afe port
+ *
+ */
+void q6afe_hdmi_port_prepare(struct q6afe_port *port,
+			     struct q6afe_hdmi_cfg *cfg)
+{
+	union afe_port_config *pcfg = &port->port_cfg;
+
+	pcfg->hdmi_multi_ch.hdmi_cfg_minor_version =
+					AFE_API_VERSION_HDMI_CONFIG;
+	pcfg->hdmi_multi_ch.datatype = cfg->datatype;
+	pcfg->hdmi_multi_ch.channel_allocation = cfg->channel_allocation;
+	pcfg->hdmi_multi_ch.sample_rate = cfg->sample_rate;
+	pcfg->hdmi_multi_ch.bit_width = cfg->bit_width;
+}
+EXPORT_SYMBOL_GPL(q6afe_hdmi_port_prepare);
+
+/**
+ * q6afe_i2s_port_prepare() - Prepare i2s afe port.
+ *
+ * @port: Instance of afe port
+ * @cfg: I2S configuration for the afe port
+ * Return: Will be an negative on error and zero on success.
+ */
+int q6afe_i2s_port_prepare(struct q6afe_port *port, struct q6afe_i2s_cfg *cfg)
+{
+	union afe_port_config *pcfg = &port->port_cfg;
+	struct device *dev = port->afe->dev;
+	int num_sd_lines;
+
+	pcfg->i2s_cfg.i2s_cfg_minor_version = AFE_API_VERSION_I2S_CONFIG;
+	pcfg->i2s_cfg.sample_rate = cfg->sample_rate;
+	pcfg->i2s_cfg.bit_width = cfg->bit_width;
+	pcfg->i2s_cfg.data_format = AFE_LINEAR_PCM_DATA;
+
+	switch (cfg->fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBS_CFS:
+		pcfg->i2s_cfg.ws_src = AFE_PORT_CONFIG_I2S_WS_SRC_INTERNAL;
+		break;
+	case SND_SOC_DAIFMT_CBM_CFM:
+		/* CPU is slave */
+		pcfg->i2s_cfg.ws_src = AFE_PORT_CONFIG_I2S_WS_SRC_EXTERNAL;
+		break;
+	default:
+		break;
+	}
+
+	num_sd_lines = hweight_long(cfg->sd_line_mask);
+
+	switch (num_sd_lines) {
+	case 0:
+		dev_err(dev, "no line is assigned\n");
+		return -EINVAL;
+	case 1:
+		switch (cfg->sd_line_mask) {
+		case AFE_PORT_I2S_SD0_MASK:
+			pcfg->i2s_cfg.channel_mode = AFE_PORT_I2S_SD0;
+			break;
+		case AFE_PORT_I2S_SD1_MASK:
+			pcfg->i2s_cfg.channel_mode = AFE_PORT_I2S_SD1;
+			break;
+		case AFE_PORT_I2S_SD2_MASK:
+			pcfg->i2s_cfg.channel_mode = AFE_PORT_I2S_SD2;
+			break;
+		case AFE_PORT_I2S_SD3_MASK:
+			pcfg->i2s_cfg.channel_mode = AFE_PORT_I2S_SD3;
+			break;
+		default:
+			dev_err(dev, "Invalid SD lines\n");
+			return -EINVAL;
+		}
+		break;
+	case 2:
+		switch (cfg->sd_line_mask) {
+		case AFE_PORT_I2S_SD0_1_MASK:
+			pcfg->i2s_cfg.channel_mode = AFE_PORT_I2S_QUAD01;
+			break;
+		case AFE_PORT_I2S_SD2_3_MASK:
+			pcfg->i2s_cfg.channel_mode = AFE_PORT_I2S_QUAD23;
+			break;
+		default:
+			dev_err(dev, "Invalid SD lines\n");
+			return -EINVAL;
+		}
+		break;
+	case 3:
+		switch (cfg->sd_line_mask) {
+		case AFE_PORT_I2S_SD0_1_2_MASK:
+			pcfg->i2s_cfg.channel_mode = AFE_PORT_I2S_6CHS;
+			break;
+		default:
+			dev_err(dev, "Invalid SD lines\n");
+			return -EINVAL;
+		}
+		break;
+	case 4:
+		switch (cfg->sd_line_mask) {
+		case AFE_PORT_I2S_SD0_1_2_3_MASK:
+			pcfg->i2s_cfg.channel_mode = AFE_PORT_I2S_8CHS;
+
+			break;
+		default:
+			dev_err(dev, "Invalid SD lines\n");
+			return -EINVAL;
+		}
+		break;
+	default:
+		dev_err(dev, "Invalid SD lines\n");
+		return -EINVAL;
+	}
+
+	switch (cfg->num_channels) {
+	case 1:
+	case 2:
+		switch (pcfg->i2s_cfg.channel_mode) {
+		case AFE_PORT_I2S_QUAD01:
+		case AFE_PORT_I2S_6CHS:
+		case AFE_PORT_I2S_8CHS:
+			pcfg->i2s_cfg.channel_mode = AFE_PORT_I2S_SD0;
+			break;
+		case AFE_PORT_I2S_QUAD23:
+				pcfg->i2s_cfg.channel_mode = AFE_PORT_I2S_SD2;
+			break;
+		}
+
+		if (cfg->num_channels == 2)
+			pcfg->i2s_cfg.mono_stereo = AFE_PORT_I2S_STEREO;
+		else
+			pcfg->i2s_cfg.mono_stereo = AFE_PORT_I2S_MONO;
+
+		break;
+	case 3:
+	case 4:
+		if (pcfg->i2s_cfg.channel_mode < AFE_PORT_I2S_QUAD01) {
+			dev_err(dev, "Invalid Channel mode\n");
+			return -EINVAL;
+		}
+		break;
+	case 5:
+	case 6:
+		if (pcfg->i2s_cfg.channel_mode < AFE_PORT_I2S_6CHS) {
+			dev_err(dev, "Invalid Channel mode\n");
+			return -EINVAL;
+		}
+		break;
+	case 7:
+	case 8:
+		if (pcfg->i2s_cfg.channel_mode < AFE_PORT_I2S_8CHS) {
+			dev_err(dev, "Invalid Channel mode\n");
+			return -EINVAL;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(q6afe_i2s_port_prepare);
+
+/**
+ * q6afe_port_start() - Start a afe port
+ *
+ * @port: Instance of port to start
+ *
+ * Return: Will be an negative on packet size on success.
+ */
+int q6afe_port_start(struct q6afe_port *port)
+{
+	struct afe_port_cmd_device_start *start;
+	struct q6afe *afe = port->afe;
+	int port_id = port->id;
+	int ret, param_id = port->cfg_type;
+	struct apr_pkt *pkt;
+	int pkt_size;
+	void *p;
+
+	ret  = q6afe_port_set_param_v2(port, &port->port_cfg, param_id,
+				       AFE_MODULE_AUDIO_DEV_INTERFACE,
+				       sizeof(port->port_cfg));
+	if (ret) {
+		dev_err(afe->dev, "AFE enable for port 0x%x failed %d\n",
+			port_id, ret);
+		return ret;
+	}
+
+	if (port->scfg) {
+		ret  = q6afe_port_set_param_v2(port, port->scfg,
+					AFE_PARAM_ID_PORT_SLOT_MAPPING_CONFIG,
+					AFE_MODULE_TDM, sizeof(*port->scfg));
+		if (ret) {
+			dev_err(afe->dev, "AFE enable for port 0x%x failed %d\n",
+			port_id, ret);
+			return ret;
+		}
+	}
+
+	pkt_size = APR_HDR_SIZE + sizeof(*start);
+	p = kzalloc(pkt_size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	start = p + APR_HDR_SIZE;
+
+	pkt->hdr.hdr_field = APR_HDR_FIELD(APR_MSG_TYPE_SEQ_CMD,
+					    APR_HDR_LEN(APR_HDR_SIZE),
+					    APR_PKT_VER);
+	pkt->hdr.pkt_size = pkt_size;
+	pkt->hdr.src_port = 0;
+	pkt->hdr.dest_port = 0;
+	pkt->hdr.token = port->token;
+	pkt->hdr.opcode = AFE_PORT_CMD_DEVICE_START;
+
+	start->port_id = port_id;
+
+	ret = afe_apr_send_pkt(afe, pkt, port);
+	if (ret)
+		dev_err(afe->dev, "AFE enable for port 0x%x failed %d\n",
+			port_id, ret);
+
+	kfree(pkt);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(q6afe_port_start);
+
+/**
+ * q6afe_port_get_from_id() - Get port instance from a port id
+ *
+ * @dev: Pointer to afe child device.
+ * @id: port id
+ *
+ * Return: Will be an error pointer on error or a valid afe port
+ * on success.
+ */
+struct q6afe_port *q6afe_port_get_from_id(struct device *dev, int id)
+{
+	int port_id;
+	struct q6afe *afe = dev_get_drvdata(dev->parent);
+	struct q6afe_port *port;
+	unsigned long flags;
+	int cfg_type;
+
+	if (id < 0 || id > AFE_PORT_MAX) {
+		dev_err(dev, "AFE port token[%d] invalid!\n", id);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* if port is multiple times bind/unbind before callback finishes */
+	port = q6afe_find_port(afe, id);
+	if (port) {
+		dev_err(dev, "AFE Port already open\n");
+		return port;
+	}
+
+	port_id = port_maps[id].port_id;
+
+	switch (port_id) {
+	case AFE_PORT_ID_MULTICHAN_HDMI_RX:
+		cfg_type = AFE_PARAM_ID_HDMI_CONFIG;
+		break;
+	case AFE_PORT_ID_SLIMBUS_MULTI_CHAN_0_RX:
+	case AFE_PORT_ID_SLIMBUS_MULTI_CHAN_1_RX:
+	case AFE_PORT_ID_SLIMBUS_MULTI_CHAN_2_RX:
+	case AFE_PORT_ID_SLIMBUS_MULTI_CHAN_3_RX:
+	case AFE_PORT_ID_SLIMBUS_MULTI_CHAN_4_RX:
+	case AFE_PORT_ID_SLIMBUS_MULTI_CHAN_5_RX:
+	case AFE_PORT_ID_SLIMBUS_MULTI_CHAN_6_RX:
+		cfg_type = AFE_PARAM_ID_SLIMBUS_CONFIG;
+		break;
+
+	case AFE_PORT_ID_PRIMARY_MI2S_RX:
+	case AFE_PORT_ID_PRIMARY_MI2S_TX:
+	case AFE_PORT_ID_SECONDARY_MI2S_RX:
+	case AFE_PORT_ID_SECONDARY_MI2S_TX:
+	case AFE_PORT_ID_TERTIARY_MI2S_RX:
+	case AFE_PORT_ID_TERTIARY_MI2S_TX:
+	case AFE_PORT_ID_QUATERNARY_MI2S_RX:
+	case AFE_PORT_ID_QUATERNARY_MI2S_TX:
+		cfg_type = AFE_PARAM_ID_I2S_CONFIG;
+		break;
+	case AFE_PORT_ID_PRIMARY_TDM_RX ... AFE_PORT_ID_QUINARY_TDM_TX_7:
+		cfg_type = AFE_PARAM_ID_TDM_CONFIG;
+		break;
+
+	default:
+		dev_err(dev, "Invalid port id 0x%x\n", port_id);
+		return ERR_PTR(-EINVAL);
+	}
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return ERR_PTR(-ENOMEM);
+
+	init_waitqueue_head(&port->wait);
+
+	port->token = id;
+	port->id = port_id;
+	port->afe = afe;
+	port->cfg_type = cfg_type;
+	kref_init(&port->refcount);
+
+	spin_lock_irqsave(&afe->port_list_lock, flags);
+	list_add_tail(&port->node, &afe->port_list);
+	spin_unlock_irqrestore(&afe->port_list_lock, flags);
+
+	return port;
+
+}
+EXPORT_SYMBOL_GPL(q6afe_port_get_from_id);
+
+/**
+ * q6afe_port_put() - Release port reference
+ *
+ * @port: Instance of port to put
+ */
+void q6afe_port_put(struct q6afe_port *port)
+{
+	kref_put(&port->refcount, q6afe_port_free);
+}
+EXPORT_SYMBOL_GPL(q6afe_port_put);
+
+static int q6afe_probe(struct apr_device *adev)
+{
+	struct q6afe *afe;
+	struct device *dev = &adev->dev;
+	struct device_node *dais_np;
+
+	afe = devm_kzalloc(dev, sizeof(*afe), GFP_KERNEL);
+	if (!afe)
+		return -ENOMEM;
+
+	q6core_get_svc_api_info(adev->svc_id, &afe->ainfo);
+	afe->apr = adev;
+	mutex_init(&afe->lock);
+	afe->dev = dev;
+	INIT_LIST_HEAD(&afe->port_list);
+	spin_lock_init(&afe->port_list_lock);
+
+	dev_set_drvdata(dev, afe);
+
+	dais_np = of_get_child_by_name(dev->of_node, "dais");
+	if (dais_np) {
+		afe->pdev_dais = of_platform_device_create(dais_np,
+							   "q6afe-dai", dev);
+		of_node_put(dais_np);
+	}
+
+	return 0;
+}
+
+static int q6afe_remove(struct apr_device *adev)
+{
+	struct q6afe *afe = dev_get_drvdata(&adev->dev);
+
+	if (afe->pdev_dais)
+		of_platform_device_destroy(&afe->pdev_dais->dev, NULL);
+
+	return 0;
+}
+
+static const struct of_device_id q6afe_device_id[]  = {
+	{ .compatible = "qcom,q6afe" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, q6afe_device_id);
+
+static struct apr_driver qcom_q6afe_driver = {
+	.probe = q6afe_probe,
+	.remove = q6afe_remove,
+	.callback = q6afe_callback,
+	.driver = {
+		.name = "qcom-q6afe",
+		.of_match_table = of_match_ptr(q6afe_device_id),
+
+	},
+};
+
+module_apr_driver(qcom_q6afe_driver);
+MODULE_DESCRIPTION("Q6 Audio Front End");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/qcom/qdsp6/q6afe.h b/sound/soc/qcom/qdsp6/q6afe.h
new file mode 100644
index 0000000..c7ed542
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6afe.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __Q6AFE_H__
+#define __Q6AFE_H__
+
+#include <dt-bindings/sound/qcom,q6afe.h>
+
+#define AFE_PORT_MAX		105
+
+#define MSM_AFE_PORT_TYPE_RX 0
+#define MSM_AFE_PORT_TYPE_TX 1
+#define AFE_MAX_PORTS AFE_PORT_MAX
+
+#define Q6AFE_MAX_MI2S_LINES	4
+
+#define AFE_MAX_CHAN_COUNT	8
+#define AFE_PORT_MAX_AUDIO_CHAN_CNT	0x8
+
+#define Q6AFE_LPASS_CLK_SRC_INTERNAL 1
+#define Q6AFE_LPASS_CLK_ROOT_DEFAULT 0
+
+#define LPAIF_DIG_CLK	1
+#define LPAIF_BIT_CLK	2
+#define LPAIF_OSR_CLK	3
+
+/* Clock ID for Primary I2S IBIT */
+#define Q6AFE_LPASS_CLK_ID_PRI_MI2S_IBIT                          0x100
+/* Clock ID for Primary I2S EBIT */
+#define Q6AFE_LPASS_CLK_ID_PRI_MI2S_EBIT                          0x101
+/* Clock ID for Secondary I2S IBIT */
+#define Q6AFE_LPASS_CLK_ID_SEC_MI2S_IBIT                          0x102
+/* Clock ID for Secondary I2S EBIT */
+#define Q6AFE_LPASS_CLK_ID_SEC_MI2S_EBIT                          0x103
+/* Clock ID for Tertiary I2S IBIT */
+#define Q6AFE_LPASS_CLK_ID_TER_MI2S_IBIT                          0x104
+/* Clock ID for Tertiary I2S EBIT */
+#define Q6AFE_LPASS_CLK_ID_TER_MI2S_EBIT                          0x105
+/* Clock ID for Quartnery I2S IBIT */
+#define Q6AFE_LPASS_CLK_ID_QUAD_MI2S_IBIT                         0x106
+/* Clock ID for Quartnery I2S EBIT */
+#define Q6AFE_LPASS_CLK_ID_QUAD_MI2S_EBIT                         0x107
+/* Clock ID for Speaker I2S IBIT */
+#define Q6AFE_LPASS_CLK_ID_SPEAKER_I2S_IBIT                       0x108
+/* Clock ID for Speaker I2S EBIT */
+#define Q6AFE_LPASS_CLK_ID_SPEAKER_I2S_EBIT                       0x109
+/* Clock ID for Speaker I2S OSR */
+#define Q6AFE_LPASS_CLK_ID_SPEAKER_I2S_OSR                        0x10A
+
+/* Clock ID for QUINARY  I2S IBIT */
+#define Q6AFE_LPASS_CLK_ID_QUI_MI2S_IBIT			0x10B
+/* Clock ID for QUINARY  I2S EBIT */
+#define Q6AFE_LPASS_CLK_ID_QUI_MI2S_EBIT			0x10C
+/* Clock ID for SENARY  I2S IBIT */
+#define Q6AFE_LPASS_CLK_ID_SEN_MI2S_IBIT			0x10D
+/* Clock ID for SENARY  I2S EBIT */
+#define Q6AFE_LPASS_CLK_ID_SEN_MI2S_EBIT			0x10E
+/* Clock ID for INT0 I2S IBIT  */
+#define Q6AFE_LPASS_CLK_ID_INT0_MI2S_IBIT                       0x10F
+/* Clock ID for INT1 I2S IBIT  */
+#define Q6AFE_LPASS_CLK_ID_INT1_MI2S_IBIT                       0x110
+/* Clock ID for INT2 I2S IBIT  */
+#define Q6AFE_LPASS_CLK_ID_INT2_MI2S_IBIT                       0x111
+/* Clock ID for INT3 I2S IBIT  */
+#define Q6AFE_LPASS_CLK_ID_INT3_MI2S_IBIT                       0x112
+/* Clock ID for INT4 I2S IBIT  */
+#define Q6AFE_LPASS_CLK_ID_INT4_MI2S_IBIT                       0x113
+/* Clock ID for INT5 I2S IBIT  */
+#define Q6AFE_LPASS_CLK_ID_INT5_MI2S_IBIT                       0x114
+/* Clock ID for INT6 I2S IBIT  */
+#define Q6AFE_LPASS_CLK_ID_INT6_MI2S_IBIT                       0x115
+
+/* Clock ID for QUINARY MI2S OSR CLK  */
+#define Q6AFE_LPASS_CLK_ID_QUI_MI2S_OSR                         0x116
+
+/* Clock ID for Primary PCM IBIT */
+#define Q6AFE_LPASS_CLK_ID_PRI_PCM_IBIT                           0x200
+/* Clock ID for Primary PCM EBIT */
+#define Q6AFE_LPASS_CLK_ID_PRI_PCM_EBIT                           0x201
+/* Clock ID for Secondary PCM IBIT */
+#define Q6AFE_LPASS_CLK_ID_SEC_PCM_IBIT                           0x202
+/* Clock ID for Secondary PCM EBIT */
+#define Q6AFE_LPASS_CLK_ID_SEC_PCM_EBIT                           0x203
+/* Clock ID for Tertiary PCM IBIT */
+#define Q6AFE_LPASS_CLK_ID_TER_PCM_IBIT                           0x204
+/* Clock ID for Tertiary PCM EBIT */
+#define Q6AFE_LPASS_CLK_ID_TER_PCM_EBIT                           0x205
+/* Clock ID for Quartery PCM IBIT */
+#define Q6AFE_LPASS_CLK_ID_QUAD_PCM_IBIT                          0x206
+/* Clock ID for Quartery PCM EBIT */
+#define Q6AFE_LPASS_CLK_ID_QUAD_PCM_EBIT                          0x207
+/* Clock ID for Quinary PCM IBIT */
+#define Q6AFE_LPASS_CLK_ID_QUIN_PCM_IBIT                          0x208
+/* Clock ID for Quinary PCM EBIT */
+#define Q6AFE_LPASS_CLK_ID_QUIN_PCM_EBIT                          0x209
+/* Clock ID for QUINARY PCM OSR  */
+#define Q6AFE_LPASS_CLK_ID_QUI_PCM_OSR                            0x20A
+
+/** Clock ID for Primary TDM IBIT */
+#define Q6AFE_LPASS_CLK_ID_PRI_TDM_IBIT                           0x200
+/** Clock ID for Primary TDM EBIT */
+#define Q6AFE_LPASS_CLK_ID_PRI_TDM_EBIT                           0x201
+/** Clock ID for Secondary TDM IBIT */
+#define Q6AFE_LPASS_CLK_ID_SEC_TDM_IBIT                           0x202
+/** Clock ID for Secondary TDM EBIT */
+#define Q6AFE_LPASS_CLK_ID_SEC_TDM_EBIT                           0x203
+/** Clock ID for Tertiary TDM IBIT */
+#define Q6AFE_LPASS_CLK_ID_TER_TDM_IBIT                           0x204
+/** Clock ID for Tertiary TDM EBIT */
+#define Q6AFE_LPASS_CLK_ID_TER_TDM_EBIT                           0x205
+/** Clock ID for Quartery TDM IBIT */
+#define Q6AFE_LPASS_CLK_ID_QUAD_TDM_IBIT                          0x206
+/** Clock ID for Quartery TDM EBIT */
+#define Q6AFE_LPASS_CLK_ID_QUAD_TDM_EBIT                          0x207
+/** Clock ID for Quinary TDM IBIT */
+#define Q6AFE_LPASS_CLK_ID_QUIN_TDM_IBIT                          0x208
+/** Clock ID for Quinary TDM EBIT */
+#define Q6AFE_LPASS_CLK_ID_QUIN_TDM_EBIT                          0x209
+/** Clock ID for Quinary TDM OSR */
+#define Q6AFE_LPASS_CLK_ID_QUIN_TDM_OSR                           0x20A
+
+/* Clock ID for MCLK1 */
+#define Q6AFE_LPASS_CLK_ID_MCLK_1                                 0x300
+/* Clock ID for MCLK2 */
+#define Q6AFE_LPASS_CLK_ID_MCLK_2                                 0x301
+/* Clock ID for MCLK3 */
+#define Q6AFE_LPASS_CLK_ID_MCLK_3                                 0x302
+/* Clock ID for MCLK4 */
+#define Q6AFE_LPASS_CLK_ID_MCLK_4                                 0x304
+/* Clock ID for Internal Digital Codec Core */
+#define Q6AFE_LPASS_CLK_ID_INTERNAL_DIGITAL_CODEC_CORE            0x303
+/* Clock ID for INT MCLK0 */
+#define Q6AFE_LPASS_CLK_ID_INT_MCLK_0                             0x305
+/* Clock ID for INT MCLK1 */
+#define Q6AFE_LPASS_CLK_ID_INT_MCLK_1                             0x306
+
+/* Clock attribute for invalid use (reserved for internal usage) */
+#define Q6AFE_LPASS_CLK_ATTRIBUTE_INVALID		0x0
+/* Clock attribute for no couple case */
+#define Q6AFE_LPASS_CLK_ATTRIBUTE_COUPLE_NO		0x1
+/* Clock attribute for dividend couple case */
+#define Q6AFE_LPASS_CLK_ATTRIBUTE_COUPLE_DIVIDEND	0x2
+/* Clock attribute for divisor couple case */
+#define Q6AFE_LPASS_CLK_ATTRIBUTE_COUPLE_DIVISOR	0x3
+/* Clock attribute for invert and no couple case */
+#define Q6AFE_LPASS_CLK_ATTRIBUTE_INVERT_COUPLE_NO	0x4
+
+#define Q6AFE_CMAP_INVALID		0xFFFF
+
+struct q6afe_hdmi_cfg {
+	u16                  datatype;
+	u16                  channel_allocation;
+	u32                  sample_rate;
+	u16                  bit_width;
+};
+
+struct q6afe_slim_cfg {
+	u32	sample_rate;
+	u16	bit_width;
+	u16	data_format;
+	u16	num_channels;
+	u8	ch_mapping[AFE_MAX_CHAN_COUNT];
+};
+
+struct q6afe_i2s_cfg {
+	u32	sample_rate;
+	u16	bit_width;
+	u16	data_format;
+	u16	num_channels;
+	u32	sd_line_mask;
+	int fmt;
+};
+
+struct q6afe_tdm_cfg {
+	u16	num_channels;
+	u32	sample_rate;
+	u16	bit_width;
+	u16	data_format;
+	u16	sync_mode;
+	u16	sync_src;
+	u16	nslots_per_frame;
+	u16	slot_width;
+	u16	slot_mask;
+	u32	data_align_type;
+	u16	ch_mapping[AFE_MAX_CHAN_COUNT];
+};
+
+struct q6afe_port_config {
+	struct q6afe_hdmi_cfg hdmi;
+	struct q6afe_slim_cfg slim;
+	struct q6afe_i2s_cfg i2s_cfg;
+	struct q6afe_tdm_cfg tdm;
+};
+
+struct q6afe_port;
+
+struct q6afe_port *q6afe_port_get_from_id(struct device *dev, int id);
+int q6afe_port_start(struct q6afe_port *port);
+int q6afe_port_stop(struct q6afe_port *port);
+void q6afe_port_put(struct q6afe_port *port);
+int q6afe_get_port_id(int index);
+void q6afe_hdmi_port_prepare(struct q6afe_port *port,
+			    struct q6afe_hdmi_cfg *cfg);
+void q6afe_slim_port_prepare(struct q6afe_port *port,
+			  struct q6afe_slim_cfg *cfg);
+int q6afe_i2s_port_prepare(struct q6afe_port *port, struct q6afe_i2s_cfg *cfg);
+void q6afe_tdm_port_prepare(struct q6afe_port *port, struct q6afe_tdm_cfg *cfg);
+
+int q6afe_port_set_sysclk(struct q6afe_port *port, int clk_id,
+			  int clk_src, int clk_root,
+			  unsigned int freq, int dir);
+#endif /* __Q6AFE_H__ */
diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c
new file mode 100644
index 0000000..349c6a8
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6asm-dai.c
@@ -0,0 +1,624 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2011-2017, The Linux Foundation. All rights reserved.
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/component.h>
+#include <sound/soc.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/pcm.h>
+#include <asm/dma.h>
+#include <linux/dma-mapping.h>
+#include <linux/of_device.h>
+#include <sound/pcm_params.h>
+#include "q6asm.h"
+#include "q6routing.h"
+#include "q6dsp-errno.h"
+
+#define DRV_NAME	"q6asm-fe-dai"
+
+#define PLAYBACK_MIN_NUM_PERIODS    2
+#define PLAYBACK_MAX_NUM_PERIODS   8
+#define PLAYBACK_MAX_PERIOD_SIZE    65536
+#define PLAYBACK_MIN_PERIOD_SIZE    128
+#define CAPTURE_MIN_NUM_PERIODS     2
+#define CAPTURE_MAX_NUM_PERIODS     8
+#define CAPTURE_MAX_PERIOD_SIZE     4096
+#define CAPTURE_MIN_PERIOD_SIZE     320
+#define SID_MASK_DEFAULT	0xF
+
+enum stream_state {
+	Q6ASM_STREAM_IDLE = 0,
+	Q6ASM_STREAM_STOPPED,
+	Q6ASM_STREAM_RUNNING,
+};
+
+struct q6asm_dai_rtd {
+	struct snd_pcm_substream *substream;
+	phys_addr_t phys;
+	unsigned int pcm_size;
+	unsigned int pcm_count;
+	unsigned int pcm_irq_pos;       /* IRQ position */
+	unsigned int periods;
+	uint16_t bits_per_sample;
+	uint16_t source; /* Encoding source bit mask */
+	struct audio_client *audio_client;
+	uint16_t session_id;
+	enum stream_state state;
+};
+
+struct q6asm_dai_data {
+	long long int sid;
+};
+
+static struct snd_pcm_hardware q6asm_dai_hardware_capture = {
+	.info =                 (SNDRV_PCM_INFO_MMAP |
+				SNDRV_PCM_INFO_BLOCK_TRANSFER |
+				SNDRV_PCM_INFO_MMAP_VALID |
+				SNDRV_PCM_INFO_INTERLEAVED |
+				SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME),
+	.formats =              (SNDRV_PCM_FMTBIT_S16_LE |
+				SNDRV_PCM_FMTBIT_S24_LE),
+	.rates =                SNDRV_PCM_RATE_8000_48000,
+	.rate_min =             8000,
+	.rate_max =             48000,
+	.channels_min =         1,
+	.channels_max =         4,
+	.buffer_bytes_max =     CAPTURE_MAX_NUM_PERIODS *
+				CAPTURE_MAX_PERIOD_SIZE,
+	.period_bytes_min =	CAPTURE_MIN_PERIOD_SIZE,
+	.period_bytes_max =     CAPTURE_MAX_PERIOD_SIZE,
+	.periods_min =          CAPTURE_MIN_NUM_PERIODS,
+	.periods_max =          CAPTURE_MAX_NUM_PERIODS,
+	.fifo_size =            0,
+};
+
+static struct snd_pcm_hardware q6asm_dai_hardware_playback = {
+	.info =                 (SNDRV_PCM_INFO_MMAP |
+				SNDRV_PCM_INFO_BLOCK_TRANSFER |
+				SNDRV_PCM_INFO_MMAP_VALID |
+				SNDRV_PCM_INFO_INTERLEAVED |
+				SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_RESUME),
+	.formats =              (SNDRV_PCM_FMTBIT_S16_LE |
+				SNDRV_PCM_FMTBIT_S24_LE),
+	.rates =                SNDRV_PCM_RATE_8000_192000,
+	.rate_min =             8000,
+	.rate_max =             192000,
+	.channels_min =         1,
+	.channels_max =         8,
+	.buffer_bytes_max =     (PLAYBACK_MAX_NUM_PERIODS *
+				PLAYBACK_MAX_PERIOD_SIZE),
+	.period_bytes_min =	PLAYBACK_MIN_PERIOD_SIZE,
+	.period_bytes_max =     PLAYBACK_MAX_PERIOD_SIZE,
+	.periods_min =          PLAYBACK_MIN_NUM_PERIODS,
+	.periods_max =          PLAYBACK_MAX_NUM_PERIODS,
+	.fifo_size =            0,
+};
+
+#define Q6ASM_FEDAI_DRIVER(num) { \
+		.playback = {						\
+			.stream_name = "MultiMedia"#num" Playback",	\
+			.rates = (SNDRV_PCM_RATE_8000_192000|		\
+					SNDRV_PCM_RATE_KNOT),		\
+			.formats = (SNDRV_PCM_FMTBIT_S16_LE |		\
+					SNDRV_PCM_FMTBIT_S24_LE),	\
+			.channels_min = 1,				\
+			.channels_max = 8,				\
+			.rate_min =     8000,				\
+			.rate_max =	192000,				\
+		},							\
+		.capture = {						\
+			.stream_name = "MultiMedia"#num" Capture",	\
+			.rates = (SNDRV_PCM_RATE_8000_48000|		\
+					SNDRV_PCM_RATE_KNOT),		\
+			.formats = (SNDRV_PCM_FMTBIT_S16_LE |		\
+				    SNDRV_PCM_FMTBIT_S24_LE),		\
+			.channels_min = 1,				\
+			.channels_max = 4,				\
+			.rate_min =     8000,				\
+			.rate_max =	48000,				\
+		},							\
+		.name = "MultiMedia"#num,				\
+		.probe = fe_dai_probe,					\
+		.id = MSM_FRONTEND_DAI_MULTIMEDIA##num,			\
+	}
+
+/* Conventional and unconventional sample rate supported */
+static unsigned int supported_sample_rates[] = {
+	8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000,
+	88200, 96000, 176400, 192000
+};
+
+static struct snd_pcm_hw_constraint_list constraints_sample_rates = {
+	.count = ARRAY_SIZE(supported_sample_rates),
+	.list = supported_sample_rates,
+	.mask = 0,
+};
+
+static void event_handler(uint32_t opcode, uint32_t token,
+			  uint32_t *payload, void *priv)
+{
+	struct q6asm_dai_rtd *prtd = priv;
+	struct snd_pcm_substream *substream = prtd->substream;
+
+	switch (opcode) {
+	case ASM_CLIENT_EVENT_CMD_RUN_DONE:
+		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+			q6asm_write_async(prtd->audio_client,
+				   prtd->pcm_count, 0, 0, NO_TIMESTAMP);
+		break;
+	case ASM_CLIENT_EVENT_CMD_EOS_DONE:
+		prtd->state = Q6ASM_STREAM_STOPPED;
+		break;
+	case ASM_CLIENT_EVENT_DATA_WRITE_DONE: {
+		prtd->pcm_irq_pos += prtd->pcm_count;
+		snd_pcm_period_elapsed(substream);
+		if (prtd->state == Q6ASM_STREAM_RUNNING)
+			q6asm_write_async(prtd->audio_client,
+					   prtd->pcm_count, 0, 0, NO_TIMESTAMP);
+
+		break;
+		}
+	case ASM_CLIENT_EVENT_DATA_READ_DONE:
+		prtd->pcm_irq_pos += prtd->pcm_count;
+		snd_pcm_period_elapsed(substream);
+		if (prtd->state == Q6ASM_STREAM_RUNNING)
+			q6asm_read(prtd->audio_client);
+
+		break;
+	default:
+		break;
+	}
+}
+
+static int q6asm_dai_prepare(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct snd_soc_pcm_runtime *soc_prtd = substream->private_data;
+	struct q6asm_dai_rtd *prtd = runtime->private_data;
+	struct snd_soc_component *c = snd_soc_rtdcom_lookup(soc_prtd, DRV_NAME);
+	struct q6asm_dai_data *pdata;
+	int ret, i;
+
+	pdata = snd_soc_component_get_drvdata(c);
+	if (!pdata)
+		return -EINVAL;
+
+	if (!prtd || !prtd->audio_client) {
+		pr_err("%s: private data null or audio client freed\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	prtd->pcm_count = snd_pcm_lib_period_bytes(substream);
+	prtd->pcm_irq_pos = 0;
+	/* rate and channels are sent to audio driver */
+	if (prtd->state) {
+		/* clear the previous setup if any  */
+		q6asm_cmd(prtd->audio_client, CMD_CLOSE);
+		q6asm_unmap_memory_regions(substream->stream,
+					   prtd->audio_client);
+		q6routing_stream_close(soc_prtd->dai_link->id,
+					 substream->stream);
+	}
+
+	ret = q6asm_map_memory_regions(substream->stream, prtd->audio_client,
+				       prtd->phys,
+				       (prtd->pcm_size / prtd->periods),
+				       prtd->periods);
+
+	if (ret < 0) {
+		pr_err("Audio Start: Buffer Allocation failed rc = %d\n",
+							ret);
+		return -ENOMEM;
+	}
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		ret = q6asm_open_write(prtd->audio_client, FORMAT_LINEAR_PCM,
+				       prtd->bits_per_sample);
+	} else if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) {
+		ret = q6asm_open_read(prtd->audio_client, FORMAT_LINEAR_PCM,
+				       prtd->bits_per_sample);
+	}
+
+	if (ret < 0) {
+		pr_err("%s: q6asm_open_write failed\n", __func__);
+		q6asm_audio_client_free(prtd->audio_client);
+		prtd->audio_client = NULL;
+		return -ENOMEM;
+	}
+
+	prtd->session_id = q6asm_get_session_id(prtd->audio_client);
+	ret = q6routing_stream_open(soc_prtd->dai_link->id, LEGACY_PCM_MODE,
+			      prtd->session_id, substream->stream);
+	if (ret) {
+		pr_err("%s: stream reg failed ret:%d\n", __func__, ret);
+		return ret;
+	}
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		ret = q6asm_media_format_block_multi_ch_pcm(
+				prtd->audio_client, runtime->rate,
+				runtime->channels, NULL,
+				prtd->bits_per_sample);
+	} else if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) {
+		ret = q6asm_enc_cfg_blk_pcm_format_support(prtd->audio_client,
+					runtime->rate, runtime->channels,
+					prtd->bits_per_sample);
+
+		/* Queue the buffers */
+		for (i = 0; i < runtime->periods; i++)
+			q6asm_read(prtd->audio_client);
+
+	}
+	if (ret < 0)
+		pr_info("%s: CMD Format block failed\n", __func__);
+
+	prtd->state = Q6ASM_STREAM_RUNNING;
+
+	return 0;
+}
+
+static int q6asm_dai_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	int ret = 0;
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct q6asm_dai_rtd *prtd = runtime->private_data;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_RESUME:
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+		ret = q6asm_run_nowait(prtd->audio_client, 0, 0, 0);
+		break;
+	case SNDRV_PCM_TRIGGER_STOP:
+		prtd->state = Q6ASM_STREAM_STOPPED;
+		ret = q6asm_cmd_nowait(prtd->audio_client, CMD_EOS);
+		break;
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		ret = q6asm_cmd_nowait(prtd->audio_client, CMD_PAUSE);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int q6asm_dai_open(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct snd_soc_pcm_runtime *soc_prtd = substream->private_data;
+	struct snd_soc_dai *cpu_dai = soc_prtd->cpu_dai;
+	struct snd_soc_component *c = snd_soc_rtdcom_lookup(soc_prtd, DRV_NAME);
+	struct q6asm_dai_rtd *prtd;
+	struct q6asm_dai_data *pdata;
+	struct device *dev = c->dev;
+	int ret = 0;
+	int stream_id;
+
+	stream_id = cpu_dai->driver->id;
+
+	pdata = snd_soc_component_get_drvdata(c);
+	if (!pdata) {
+		pr_err("Drv data not found ..\n");
+		return -EINVAL;
+	}
+
+	prtd = kzalloc(sizeof(struct q6asm_dai_rtd), GFP_KERNEL);
+	if (prtd == NULL)
+		return -ENOMEM;
+
+	prtd->substream = substream;
+	prtd->audio_client = q6asm_audio_client_alloc(dev,
+				(q6asm_cb)event_handler, prtd, stream_id,
+				LEGACY_PCM_MODE);
+	if (!prtd->audio_client) {
+		pr_info("%s: Could not allocate memory\n", __func__);
+		kfree(prtd);
+		return -ENOMEM;
+	}
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		runtime->hw = q6asm_dai_hardware_playback;
+	else if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+		runtime->hw = q6asm_dai_hardware_capture;
+
+	ret = snd_pcm_hw_constraint_list(runtime, 0,
+				SNDRV_PCM_HW_PARAM_RATE,
+				&constraints_sample_rates);
+	if (ret < 0)
+		pr_info("snd_pcm_hw_constraint_list failed\n");
+	/* Ensure that buffer size is a multiple of period size */
+	ret = snd_pcm_hw_constraint_integer(runtime,
+					    SNDRV_PCM_HW_PARAM_PERIODS);
+	if (ret < 0)
+		pr_info("snd_pcm_hw_constraint_integer failed\n");
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		ret = snd_pcm_hw_constraint_minmax(runtime,
+			SNDRV_PCM_HW_PARAM_BUFFER_BYTES,
+			PLAYBACK_MIN_NUM_PERIODS * PLAYBACK_MIN_PERIOD_SIZE,
+			PLAYBACK_MAX_NUM_PERIODS * PLAYBACK_MAX_PERIOD_SIZE);
+		if (ret < 0) {
+			pr_err("constraint for buffer bytes min max ret = %d\n",
+									ret);
+		}
+	}
+
+	ret = snd_pcm_hw_constraint_step(runtime, 0,
+		SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 32);
+	if (ret < 0) {
+		pr_err("constraint for period bytes step ret = %d\n",
+								ret);
+	}
+	ret = snd_pcm_hw_constraint_step(runtime, 0,
+		SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 32);
+	if (ret < 0) {
+		pr_err("constraint for buffer bytes step ret = %d\n",
+								ret);
+	}
+
+	runtime->private_data = prtd;
+
+	snd_soc_set_runtime_hwparams(substream, &q6asm_dai_hardware_playback);
+
+	runtime->dma_bytes = q6asm_dai_hardware_playback.buffer_bytes_max;
+
+
+	if (pdata->sid < 0)
+		prtd->phys = substream->dma_buffer.addr;
+	else
+		prtd->phys = substream->dma_buffer.addr | (pdata->sid << 32);
+
+	snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer);
+
+	return 0;
+}
+
+static int q6asm_dai_close(struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct snd_soc_pcm_runtime *soc_prtd = substream->private_data;
+	struct q6asm_dai_rtd *prtd = runtime->private_data;
+
+	if (prtd->audio_client) {
+		q6asm_cmd(prtd->audio_client, CMD_CLOSE);
+		q6asm_unmap_memory_regions(substream->stream,
+					   prtd->audio_client);
+		q6asm_audio_client_free(prtd->audio_client);
+		prtd->audio_client = NULL;
+	}
+	q6routing_stream_close(soc_prtd->dai_link->id,
+						substream->stream);
+	kfree(prtd);
+	return 0;
+}
+
+static snd_pcm_uframes_t q6asm_dai_pointer(struct snd_pcm_substream *substream)
+{
+
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct q6asm_dai_rtd *prtd = runtime->private_data;
+
+	if (prtd->pcm_irq_pos >= prtd->pcm_size)
+		prtd->pcm_irq_pos = 0;
+
+	return bytes_to_frames(runtime, (prtd->pcm_irq_pos));
+}
+
+static int q6asm_dai_mmap(struct snd_pcm_substream *substream,
+				struct vm_area_struct *vma)
+{
+
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct snd_soc_pcm_runtime *soc_prtd = substream->private_data;
+	struct snd_soc_component *c = snd_soc_rtdcom_lookup(soc_prtd, DRV_NAME);
+	struct device *dev = c->dev;
+
+	return dma_mmap_coherent(dev, vma,
+			runtime->dma_area, runtime->dma_addr,
+			runtime->dma_bytes);
+}
+
+static int q6asm_dai_hw_params(struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct q6asm_dai_rtd *prtd = runtime->private_data;
+
+	prtd->pcm_size = params_buffer_bytes(params);
+	prtd->periods = params_periods(params);
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		prtd->bits_per_sample = 16;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		prtd->bits_per_sample = 24;
+		break;
+	}
+
+	return 0;
+}
+
+static struct snd_pcm_ops q6asm_dai_ops = {
+	.open           = q6asm_dai_open,
+	.hw_params	= q6asm_dai_hw_params,
+	.close          = q6asm_dai_close,
+	.ioctl          = snd_pcm_lib_ioctl,
+	.prepare        = q6asm_dai_prepare,
+	.trigger        = q6asm_dai_trigger,
+	.pointer        = q6asm_dai_pointer,
+	.mmap		= q6asm_dai_mmap,
+};
+
+static int q6asm_dai_pcm_new(struct snd_soc_pcm_runtime *rtd)
+{
+	struct snd_pcm_substream *psubstream, *csubstream;
+	struct snd_soc_component *c = snd_soc_rtdcom_lookup(rtd, DRV_NAME);
+	struct snd_pcm *pcm = rtd->pcm;
+	struct device *dev;
+	int size, ret;
+
+	dev = c->dev;
+	size = q6asm_dai_hardware_playback.buffer_bytes_max;
+	psubstream = pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream;
+	if (psubstream) {
+		ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, dev, size,
+					  &psubstream->dma_buffer);
+		if (ret) {
+			dev_err(dev, "Cannot allocate buffer(s)\n");
+			return ret;
+		}
+	}
+
+	csubstream = pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream;
+	if (csubstream) {
+		ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, dev, size,
+					  &csubstream->dma_buffer);
+		if (ret) {
+			dev_err(dev, "Cannot allocate buffer(s)\n");
+			if (psubstream)
+				snd_dma_free_pages(&psubstream->dma_buffer);
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static void q6asm_dai_pcm_free(struct snd_pcm *pcm)
+{
+	struct snd_pcm_substream *substream;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(pcm->streams); i++) {
+		substream = pcm->streams[i].substream;
+		if (substream) {
+			snd_dma_free_pages(&substream->dma_buffer);
+			substream->dma_buffer.area = NULL;
+			substream->dma_buffer.addr = 0;
+		}
+	}
+}
+
+static const struct snd_soc_dapm_route afe_pcm_routes[] = {
+	{"MM_DL1",  NULL, "MultiMedia1 Playback" },
+	{"MM_DL2",  NULL, "MultiMedia2 Playback" },
+	{"MM_DL3",  NULL, "MultiMedia3 Playback" },
+	{"MM_DL4",  NULL, "MultiMedia4 Playback" },
+	{"MM_DL5",  NULL, "MultiMedia5 Playback" },
+	{"MM_DL6",  NULL, "MultiMedia6 Playback" },
+	{"MM_DL7",  NULL, "MultiMedia7 Playback" },
+	{"MM_DL7",  NULL, "MultiMedia8 Playback" },
+	{"MultiMedia1 Capture", NULL, "MM_UL1"},
+	{"MultiMedia2 Capture", NULL, "MM_UL2"},
+	{"MultiMedia3 Capture", NULL, "MM_UL3"},
+	{"MultiMedia4 Capture", NULL, "MM_UL4"},
+	{"MultiMedia5 Capture", NULL, "MM_UL5"},
+	{"MultiMedia6 Capture", NULL, "MM_UL6"},
+	{"MultiMedia7 Capture", NULL, "MM_UL7"},
+	{"MultiMedia8 Capture", NULL, "MM_UL8"},
+
+};
+
+static int fe_dai_probe(struct snd_soc_dai *dai)
+{
+	struct snd_soc_dapm_context *dapm;
+
+	dapm = snd_soc_component_get_dapm(dai->component);
+	snd_soc_dapm_add_routes(dapm, afe_pcm_routes,
+				ARRAY_SIZE(afe_pcm_routes));
+
+	return 0;
+}
+
+
+static const struct snd_soc_component_driver q6asm_fe_dai_component = {
+	.name		= DRV_NAME,
+	.ops		= &q6asm_dai_ops,
+	.pcm_new	= q6asm_dai_pcm_new,
+	.pcm_free	= q6asm_dai_pcm_free,
+
+};
+
+static struct snd_soc_dai_driver q6asm_fe_dais[] = {
+	Q6ASM_FEDAI_DRIVER(1),
+	Q6ASM_FEDAI_DRIVER(2),
+	Q6ASM_FEDAI_DRIVER(3),
+	Q6ASM_FEDAI_DRIVER(4),
+	Q6ASM_FEDAI_DRIVER(5),
+	Q6ASM_FEDAI_DRIVER(6),
+	Q6ASM_FEDAI_DRIVER(7),
+	Q6ASM_FEDAI_DRIVER(8),
+};
+
+static int q6asm_dai_bind(struct device *dev, struct device *master, void *data)
+{
+	struct device_node *node = dev->of_node;
+	struct of_phandle_args args;
+	struct q6asm_dai_data *pdata;
+	int rc;
+
+	pdata = kzalloc(sizeof(struct q6asm_dai_data), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	rc = of_parse_phandle_with_fixed_args(node, "iommus", 1, 0, &args);
+	if (rc < 0)
+		pdata->sid = -1;
+	else
+		pdata->sid = args.args[0] & SID_MASK_DEFAULT;
+
+	dev_set_drvdata(dev, pdata);
+
+	return snd_soc_register_component(dev, &q6asm_fe_dai_component,
+					q6asm_fe_dais,
+					ARRAY_SIZE(q6asm_fe_dais));
+}
+static void q6asm_dai_unbind(struct device *dev, struct device *master,
+			     void *data)
+{
+	struct q6asm_dai_data *pdata = dev_get_drvdata(dev);
+
+	snd_soc_unregister_component(dev);
+
+	kfree(pdata);
+
+}
+
+static const struct component_ops q6asm_dai_comp_ops = {
+	.bind   = q6asm_dai_bind,
+	.unbind = q6asm_dai_unbind,
+};
+
+static int q6asm_dai_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &q6asm_dai_comp_ops);
+}
+
+static int q6asm_dai_dev_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &q6asm_dai_comp_ops);
+	return 0;
+}
+
+static struct platform_driver q6asm_dai_platform_driver = {
+	.driver = {
+		.name = "q6asm-dai",
+	},
+	.probe = q6asm_dai_probe,
+	.remove = q6asm_dai_dev_remove,
+};
+module_platform_driver(q6asm_dai_platform_driver);
+
+MODULE_DESCRIPTION("Q6ASM dai driver");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/qcom/qdsp6/q6asm.c b/sound/soc/qcom/qdsp6/q6asm.c
new file mode 100644
index 0000000..5308523
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6asm.c
@@ -0,0 +1,1399 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2011-2017, The Linux Foundation. All rights reserved.
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/module.h>
+#include <linux/soc/qcom/apr.h>
+#include <linux/device.h>
+#include <linux/of_platform.h>
+#include <linux/spinlock.h>
+#include <linux/kref.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <uapi/sound/asound.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include "q6asm.h"
+#include "q6core.h"
+#include "q6dsp-errno.h"
+#include "q6dsp-common.h"
+
+#define ASM_STREAM_CMD_CLOSE			0x00010BCD
+#define ASM_STREAM_CMD_FLUSH			0x00010BCE
+#define ASM_SESSION_CMD_PAUSE			0x00010BD3
+#define ASM_DATA_CMD_EOS			0x00010BDB
+#define ASM_NULL_POPP_TOPOLOGY			0x00010C68
+#define ASM_STREAM_CMD_FLUSH_READBUFS		0x00010C09
+#define ASM_STREAM_CMD_SET_ENCDEC_PARAM		0x00010C10
+#define ASM_STREAM_POSTPROC_TOPO_ID_NONE	0x00010C68
+#define ASM_CMD_SHARED_MEM_MAP_REGIONS		0x00010D92
+#define ASM_CMDRSP_SHARED_MEM_MAP_REGIONS	0x00010D93
+#define ASM_CMD_SHARED_MEM_UNMAP_REGIONS	0x00010D94
+#define ASM_DATA_CMD_MEDIA_FMT_UPDATE_V2	0x00010D98
+#define ASM_DATA_EVENT_WRITE_DONE_V2		0x00010D99
+#define ASM_PARAM_ID_ENCDEC_ENC_CFG_BLK_V2	0x00010DA3
+#define ASM_SESSION_CMD_RUN_V2			0x00010DAA
+#define ASM_MEDIA_FMT_MULTI_CHANNEL_PCM_V2	0x00010DA5
+#define ASM_DATA_CMD_WRITE_V2			0x00010DAB
+#define ASM_DATA_CMD_READ_V2			0x00010DAC
+#define ASM_SESSION_CMD_SUSPEND			0x00010DEC
+#define ASM_STREAM_CMD_OPEN_WRITE_V3		0x00010DB3
+#define ASM_STREAM_CMD_OPEN_READ_V3                 0x00010DB4
+#define ASM_DATA_EVENT_READ_DONE_V2 0x00010D9A
+#define ASM_STREAM_CMD_OPEN_READWRITE_V2        0x00010D8D
+
+
+#define ASM_LEGACY_STREAM_SESSION	0
+/* Bit shift for the stream_perf_mode subfield. */
+#define ASM_SHIFT_STREAM_PERF_MODE_FLAG_IN_OPEN_READ              29
+#define ASM_END_POINT_DEVICE_MATRIX	0
+#define ASM_DEFAULT_APP_TYPE		0
+#define ASM_SYNC_IO_MODE		0x0001
+#define ASM_ASYNC_IO_MODE		0x0002
+#define ASM_TUN_READ_IO_MODE		0x0004	/* tunnel read write mode */
+#define ASM_TUN_WRITE_IO_MODE		0x0008	/* tunnel read write mode */
+#define ASM_SHIFT_GAPLESS_MODE_FLAG	31
+#define ADSP_MEMORY_MAP_SHMEM8_4K_POOL	3
+
+struct avs_cmd_shared_mem_map_regions {
+	u16 mem_pool_id;
+	u16 num_regions;
+	u32 property_flag;
+} __packed;
+
+struct avs_shared_map_region_payload {
+	u32 shm_addr_lsw;
+	u32 shm_addr_msw;
+	u32 mem_size_bytes;
+} __packed;
+
+struct avs_cmd_shared_mem_unmap_regions {
+	u32 mem_map_handle;
+} __packed;
+
+struct asm_data_cmd_media_fmt_update_v2 {
+	u32 fmt_blk_size;
+} __packed;
+
+struct asm_multi_channel_pcm_fmt_blk_v2 {
+	struct asm_data_cmd_media_fmt_update_v2 fmt_blk;
+	u16 num_channels;
+	u16 bits_per_sample;
+	u32 sample_rate;
+	u16 is_signed;
+	u16 reserved;
+	u8 channel_mapping[PCM_MAX_NUM_CHANNEL];
+} __packed;
+
+struct asm_stream_cmd_set_encdec_param {
+	u32                  param_id;
+	u32                  param_size;
+} __packed;
+
+struct asm_enc_cfg_blk_param_v2 {
+	u32                  frames_per_buf;
+	u32                  enc_cfg_blk_size;
+} __packed;
+
+struct asm_multi_channel_pcm_enc_cfg_v2 {
+	struct asm_stream_cmd_set_encdec_param  encdec;
+	struct asm_enc_cfg_blk_param_v2	encblk;
+	uint16_t  num_channels;
+	uint16_t  bits_per_sample;
+	uint32_t  sample_rate;
+	uint16_t  is_signed;
+	uint16_t  reserved;
+	uint8_t   channel_mapping[8];
+} __packed;
+
+struct asm_data_cmd_read_v2 {
+	u32                  buf_addr_lsw;
+	u32                  buf_addr_msw;
+	u32                  mem_map_handle;
+	u32                  buf_size;
+	u32                  seq_id;
+} __packed;
+
+struct asm_data_cmd_read_v2_done {
+	u32	status;
+	u32	buf_addr_lsw;
+	u32	buf_addr_msw;
+};
+
+struct asm_stream_cmd_open_read_v3 {
+	u32                    mode_flags;
+	u32                    src_endpointype;
+	u32                    preprocopo_id;
+	u32                    enc_cfg_id;
+	u16                    bits_per_sample;
+	u16                    reserved;
+} __packed;
+
+struct asm_data_cmd_write_v2 {
+	u32 buf_addr_lsw;
+	u32 buf_addr_msw;
+	u32 mem_map_handle;
+	u32 buf_size;
+	u32 seq_id;
+	u32 timestamp_lsw;
+	u32 timestamp_msw;
+	u32 flags;
+} __packed;
+
+struct asm_stream_cmd_open_write_v3 {
+	uint32_t mode_flags;
+	uint16_t sink_endpointype;
+	uint16_t bits_per_sample;
+	uint32_t postprocopo_id;
+	uint32_t dec_fmt_id;
+} __packed;
+
+struct asm_session_cmd_run_v2 {
+	u32 flags;
+	u32 time_lsw;
+	u32 time_msw;
+} __packed;
+
+struct audio_buffer {
+	phys_addr_t phys;
+	uint32_t size;		/* size of buffer */
+};
+
+struct audio_port_data {
+	struct audio_buffer *buf;
+	uint32_t num_periods;
+	uint32_t dsp_buf;
+	uint32_t mem_map_handle;
+};
+
+struct q6asm {
+	struct apr_device *adev;
+	struct device *dev;
+	struct q6core_svc_api_info ainfo;
+	wait_queue_head_t mem_wait;
+	struct platform_device *pcmdev;
+	spinlock_t slock;
+	struct audio_client *session[MAX_SESSIONS + 1];
+	struct platform_device *pdev_dais;
+};
+
+struct audio_client {
+	int session;
+	q6asm_cb cb;
+	void *priv;
+	uint32_t io_mode;
+	struct apr_device *adev;
+	struct mutex cmd_lock;
+	spinlock_t lock;
+	struct kref refcount;
+	/* idx:1 out port, 0: in port */
+	struct audio_port_data port[2];
+	wait_queue_head_t cmd_wait;
+	struct aprv2_ibasic_rsp_result_t result;
+	int perf_mode;
+	int stream_id;
+	struct q6asm *q6asm;
+	struct device *dev;
+};
+
+static inline void q6asm_add_hdr(struct audio_client *ac, struct apr_hdr *hdr,
+				 uint32_t pkt_size, bool cmd_flg,
+				 uint32_t stream_id)
+{
+	hdr->hdr_field = APR_SEQ_CMD_HDR_FIELD;
+	hdr->src_port = ((ac->session << 8) & 0xFF00) | (stream_id);
+	hdr->dest_port = ((ac->session << 8) & 0xFF00) | (stream_id);
+	hdr->pkt_size = pkt_size;
+	if (cmd_flg)
+		hdr->token = ac->session;
+}
+
+static int q6asm_apr_send_session_pkt(struct q6asm *a, struct audio_client *ac,
+				      struct apr_pkt *pkt, uint32_t rsp_opcode)
+{
+	struct apr_hdr *hdr = &pkt->hdr;
+	int rc;
+
+	mutex_lock(&ac->cmd_lock);
+	ac->result.opcode = 0;
+	ac->result.status = 0;
+	rc = apr_send_pkt(a->adev, pkt);
+	if (rc < 0)
+		goto err;
+
+	if (rsp_opcode)
+		rc = wait_event_timeout(a->mem_wait,
+					(ac->result.opcode == hdr->opcode) ||
+					(ac->result.opcode == rsp_opcode),
+					5 * HZ);
+	else
+		rc = wait_event_timeout(a->mem_wait,
+					(ac->result.opcode == hdr->opcode),
+					5 * HZ);
+
+	if (!rc) {
+		dev_err(a->dev, "CMD timeout\n");
+		rc = -ETIMEDOUT;
+	} else if (ac->result.status > 0) {
+		dev_err(a->dev, "DSP returned error[%x]\n",
+			ac->result.status);
+		rc = -EINVAL;
+	}
+
+err:
+	mutex_unlock(&ac->cmd_lock);
+	return rc;
+}
+
+static int __q6asm_memory_unmap(struct audio_client *ac,
+				phys_addr_t buf_add, int dir)
+{
+	struct avs_cmd_shared_mem_unmap_regions *mem_unmap;
+	struct q6asm *a = dev_get_drvdata(ac->dev->parent);
+	struct apr_pkt *pkt;
+	int rc, pkt_size;
+	void *p;
+
+	if (ac->port[dir].mem_map_handle == 0) {
+		dev_err(ac->dev, "invalid mem handle\n");
+		return -EINVAL;
+	}
+
+	pkt_size = APR_HDR_SIZE + sizeof(*mem_unmap);
+	p = kzalloc(pkt_size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	mem_unmap = p + APR_HDR_SIZE;
+
+	pkt->hdr.hdr_field = APR_SEQ_CMD_HDR_FIELD;
+	pkt->hdr.src_port = 0;
+	pkt->hdr.dest_port = 0;
+	pkt->hdr.pkt_size = pkt_size;
+	pkt->hdr.token = ((ac->session << 8) | dir);
+
+	pkt->hdr.opcode = ASM_CMD_SHARED_MEM_UNMAP_REGIONS;
+	mem_unmap->mem_map_handle = ac->port[dir].mem_map_handle;
+
+	rc = q6asm_apr_send_session_pkt(a, ac, pkt, 0);
+	if (rc < 0) {
+		kfree(pkt);
+		return rc;
+	}
+
+	ac->port[dir].mem_map_handle = 0;
+
+	kfree(pkt);
+	return 0;
+}
+
+
+static void q6asm_audio_client_free_buf(struct audio_client *ac,
+					struct audio_port_data *port)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ac->lock, flags);
+	port->num_periods = 0;
+	kfree(port->buf);
+	port->buf = NULL;
+	spin_unlock_irqrestore(&ac->lock, flags);
+}
+
+/**
+ * q6asm_unmap_memory_regions() - unmap memory regions in the dsp.
+ *
+ * @dir: direction of audio stream
+ * @ac: audio client instanace
+ *
+ * Return: Will be an negative value on failure or zero on success
+ */
+int q6asm_unmap_memory_regions(unsigned int dir, struct audio_client *ac)
+{
+	struct audio_port_data *port;
+	int cnt = 0;
+	int rc = 0;
+
+	port = &ac->port[dir];
+	if (!port->buf) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	cnt = port->num_periods - 1;
+	if (cnt >= 0) {
+		rc = __q6asm_memory_unmap(ac, port->buf[dir].phys, dir);
+		if (rc < 0) {
+			dev_err(ac->dev, "%s: Memory_unmap_regions failed %d\n",
+				__func__, rc);
+			goto err;
+		}
+	}
+
+	q6asm_audio_client_free_buf(ac, port);
+
+err:
+	return rc;
+}
+EXPORT_SYMBOL_GPL(q6asm_unmap_memory_regions);
+
+static int __q6asm_memory_map_regions(struct audio_client *ac, int dir,
+				      size_t period_sz, unsigned int periods,
+				      bool is_contiguous)
+{
+	struct avs_cmd_shared_mem_map_regions *cmd = NULL;
+	struct avs_shared_map_region_payload *mregions = NULL;
+	struct q6asm *a = dev_get_drvdata(ac->dev->parent);
+	struct audio_port_data *port = NULL;
+	struct audio_buffer *ab = NULL;
+	struct apr_pkt *pkt;
+	void *p;
+	unsigned long flags;
+	uint32_t num_regions, buf_sz;
+	int rc, i, pkt_size;
+
+	if (is_contiguous) {
+		num_regions = 1;
+		buf_sz = period_sz * periods;
+	} else {
+		buf_sz = period_sz;
+		num_regions = periods;
+	}
+
+	/* DSP expects size should be aligned to 4K */
+	buf_sz = ALIGN(buf_sz, 4096);
+
+	pkt_size = APR_HDR_SIZE + sizeof(*cmd) +
+		   (sizeof(*mregions) * num_regions);
+
+	p = kzalloc(pkt_size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	cmd = p + APR_HDR_SIZE;
+	mregions = p + APR_HDR_SIZE +  sizeof(*cmd);
+
+	pkt->hdr.hdr_field = APR_SEQ_CMD_HDR_FIELD;
+	pkt->hdr.src_port = 0;
+	pkt->hdr.dest_port = 0;
+	pkt->hdr.pkt_size = pkt_size;
+	pkt->hdr.token = ((ac->session << 8) | dir);
+	pkt->hdr.opcode = ASM_CMD_SHARED_MEM_MAP_REGIONS;
+
+	cmd->mem_pool_id = ADSP_MEMORY_MAP_SHMEM8_4K_POOL;
+	cmd->num_regions = num_regions;
+	cmd->property_flag = 0x00;
+
+	spin_lock_irqsave(&ac->lock, flags);
+	port = &ac->port[dir];
+
+	for (i = 0; i < num_regions; i++) {
+		ab = &port->buf[i];
+		mregions->shm_addr_lsw = lower_32_bits(ab->phys);
+		mregions->shm_addr_msw = upper_32_bits(ab->phys);
+		mregions->mem_size_bytes = buf_sz;
+		++mregions;
+	}
+	spin_unlock_irqrestore(&ac->lock, flags);
+
+	rc = q6asm_apr_send_session_pkt(a, ac, pkt,
+					ASM_CMDRSP_SHARED_MEM_MAP_REGIONS);
+
+	kfree(pkt);
+
+	return rc;
+}
+
+/**
+ * q6asm_map_memory_regions() - map memory regions in the dsp.
+ *
+ * @dir: direction of audio stream
+ * @ac: audio client instanace
+ * @phys: physcial address that needs mapping.
+ * @period_sz: audio period size
+ * @periods: number of periods
+ *
+ * Return: Will be an negative value on failure or zero on success
+ */
+int q6asm_map_memory_regions(unsigned int dir, struct audio_client *ac,
+			     phys_addr_t phys,
+			     size_t period_sz, unsigned int periods)
+{
+	struct audio_buffer *buf;
+	unsigned long flags;
+	int cnt;
+	int rc;
+
+	spin_lock_irqsave(&ac->lock, flags);
+	if (ac->port[dir].buf) {
+		dev_err(ac->dev, "Buffer already allocated\n");
+		spin_unlock_irqrestore(&ac->lock, flags);
+		return 0;
+	}
+
+	buf = kzalloc(((sizeof(struct audio_buffer)) * periods), GFP_ATOMIC);
+	if (!buf) {
+		spin_unlock_irqrestore(&ac->lock, flags);
+		return -ENOMEM;
+	}
+
+
+	ac->port[dir].buf = buf;
+
+	buf[0].phys = phys;
+	buf[0].size = period_sz;
+
+	for (cnt = 1; cnt < periods; cnt++) {
+		if (period_sz > 0) {
+			buf[cnt].phys = buf[0].phys + (cnt * period_sz);
+			buf[cnt].size = period_sz;
+		}
+	}
+	ac->port[dir].num_periods = periods;
+
+	spin_unlock_irqrestore(&ac->lock, flags);
+
+	rc = __q6asm_memory_map_regions(ac, dir, period_sz, periods, 1);
+	if (rc < 0) {
+		dev_err(ac->dev, "Memory_map_regions failed\n");
+		q6asm_audio_client_free_buf(ac, &ac->port[dir]);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(q6asm_map_memory_regions);
+
+static void q6asm_audio_client_release(struct kref *ref)
+{
+	struct audio_client *ac;
+	struct q6asm *a;
+	unsigned long flags;
+
+	ac = container_of(ref, struct audio_client, refcount);
+	a = ac->q6asm;
+
+	spin_lock_irqsave(&a->slock, flags);
+	a->session[ac->session] = NULL;
+	spin_unlock_irqrestore(&a->slock, flags);
+
+	kfree(ac);
+}
+
+/**
+ * q6asm_audio_client_free() - Freee allocated audio client
+ *
+ * @ac: audio client to free
+ */
+void q6asm_audio_client_free(struct audio_client *ac)
+{
+	kref_put(&ac->refcount, q6asm_audio_client_release);
+}
+EXPORT_SYMBOL_GPL(q6asm_audio_client_free);
+
+static struct audio_client *q6asm_get_audio_client(struct q6asm *a,
+						   int session_id)
+{
+	struct audio_client *ac = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&a->slock, flags);
+	if ((session_id <= 0) || (session_id > MAX_SESSIONS)) {
+		dev_err(a->dev, "invalid session: %d\n", session_id);
+		goto err;
+	}
+
+	/* check for valid session */
+	if (!a->session[session_id])
+		goto err;
+	else if (a->session[session_id]->session != session_id)
+		goto err;
+
+	ac = a->session[session_id];
+	kref_get(&ac->refcount);
+err:
+	spin_unlock_irqrestore(&a->slock, flags);
+	return ac;
+}
+
+static int32_t q6asm_stream_callback(struct apr_device *adev,
+				     struct apr_resp_pkt *data,
+				     int session_id)
+{
+	struct q6asm *q6asm = dev_get_drvdata(&adev->dev);
+	struct aprv2_ibasic_rsp_result_t *result;
+	struct apr_hdr *hdr = &data->hdr;
+	struct audio_port_data *port;
+	struct audio_client *ac;
+	uint32_t client_event = 0;
+	int ret = 0;
+
+	ac = q6asm_get_audio_client(q6asm, session_id);
+	if (!ac)/* Audio client might already be freed by now */
+		return 0;
+
+	result = data->payload;
+
+	switch (hdr->opcode) {
+	case APR_BASIC_RSP_RESULT:
+		switch (result->opcode) {
+		case ASM_SESSION_CMD_PAUSE:
+			client_event = ASM_CLIENT_EVENT_CMD_PAUSE_DONE;
+			break;
+		case ASM_SESSION_CMD_SUSPEND:
+			client_event = ASM_CLIENT_EVENT_CMD_SUSPEND_DONE;
+			break;
+		case ASM_DATA_CMD_EOS:
+			client_event = ASM_CLIENT_EVENT_CMD_EOS_DONE;
+			break;
+		case ASM_STREAM_CMD_FLUSH:
+			client_event = ASM_CLIENT_EVENT_CMD_FLUSH_DONE;
+			break;
+		case ASM_SESSION_CMD_RUN_V2:
+			client_event = ASM_CLIENT_EVENT_CMD_RUN_DONE;
+			break;
+		case ASM_STREAM_CMD_CLOSE:
+			client_event = ASM_CLIENT_EVENT_CMD_CLOSE_DONE;
+			break;
+		case ASM_STREAM_CMD_FLUSH_READBUFS:
+			client_event = ASM_CLIENT_EVENT_CMD_OUT_FLUSH_DONE;
+			break;
+		case ASM_STREAM_CMD_OPEN_WRITE_V3:
+		case ASM_STREAM_CMD_OPEN_READ_V3:
+		case ASM_STREAM_CMD_OPEN_READWRITE_V2:
+		case ASM_STREAM_CMD_SET_ENCDEC_PARAM:
+		case ASM_DATA_CMD_MEDIA_FMT_UPDATE_V2:
+			if (result->status != 0) {
+				dev_err(ac->dev,
+					"cmd = 0x%x returned error = 0x%x\n",
+					result->opcode, result->status);
+				ac->result = *result;
+				wake_up(&ac->cmd_wait);
+				ret = 0;
+				goto done;
+			}
+			break;
+		default:
+			dev_err(ac->dev, "command[0x%x] not expecting rsp\n",
+				result->opcode);
+			break;
+		}
+
+		ac->result = *result;
+		wake_up(&ac->cmd_wait);
+
+		if (ac->cb)
+			ac->cb(client_event, hdr->token,
+			       data->payload, ac->priv);
+
+		ret = 0;
+		goto done;
+
+	case ASM_DATA_EVENT_WRITE_DONE_V2:
+		client_event = ASM_CLIENT_EVENT_DATA_WRITE_DONE;
+		if (ac->io_mode & ASM_SYNC_IO_MODE) {
+			phys_addr_t phys;
+			unsigned long flags;
+
+			spin_lock_irqsave(&ac->lock, flags);
+
+			port =  &ac->port[SNDRV_PCM_STREAM_PLAYBACK];
+
+			if (!port->buf) {
+				spin_unlock_irqrestore(&ac->lock, flags);
+				ret = 0;
+				goto done;
+			}
+
+			phys = port->buf[hdr->token].phys;
+
+			if (lower_32_bits(phys) != result->opcode ||
+			    upper_32_bits(phys) != result->status) {
+				dev_err(ac->dev, "Expected addr %pa\n",
+					&port->buf[hdr->token].phys);
+				spin_unlock_irqrestore(&ac->lock, flags);
+				ret = -EINVAL;
+				goto done;
+			}
+			spin_unlock_irqrestore(&ac->lock, flags);
+		}
+		break;
+	case ASM_DATA_EVENT_READ_DONE_V2:
+		client_event = ASM_CLIENT_EVENT_DATA_READ_DONE;
+		if (ac->io_mode & ASM_SYNC_IO_MODE) {
+			struct asm_data_cmd_read_v2_done *done = data->payload;
+			unsigned long flags;
+			phys_addr_t phys;
+
+			spin_lock_irqsave(&ac->lock, flags);
+			port =  &ac->port[SNDRV_PCM_STREAM_CAPTURE];
+			if (!port->buf) {
+				spin_unlock_irqrestore(&ac->lock, flags);
+				ret = 0;
+				goto done;
+			}
+
+			phys = port->buf[hdr->token].phys;
+
+			if (upper_32_bits(phys) != done->buf_addr_msw ||
+			    lower_32_bits(phys) != done->buf_addr_lsw) {
+				dev_err(ac->dev, "Expected addr %pa %08x-%08x\n",
+					&port->buf[hdr->token].phys,
+					done->buf_addr_lsw,
+					done->buf_addr_msw);
+				spin_unlock_irqrestore(&ac->lock, flags);
+				ret = -EINVAL;
+				goto done;
+			}
+			spin_unlock_irqrestore(&ac->lock, flags);
+		}
+
+		break;
+	}
+
+	if (ac->cb)
+		ac->cb(client_event, hdr->token, data->payload, ac->priv);
+
+done:
+	kref_put(&ac->refcount, q6asm_audio_client_release);
+	return ret;
+}
+
+static int q6asm_srvc_callback(struct apr_device *adev,
+			       struct apr_resp_pkt *data)
+{
+	struct q6asm *q6asm = dev_get_drvdata(&adev->dev);
+	struct aprv2_ibasic_rsp_result_t *result;
+	struct audio_port_data *port;
+	struct audio_client *ac = NULL;
+	struct apr_hdr *hdr = &data->hdr;
+	struct q6asm *a;
+	uint32_t sid = 0;
+	uint32_t dir = 0;
+	int session_id;
+
+	session_id = (hdr->dest_port >> 8) & 0xFF;
+	if (session_id)
+		return q6asm_stream_callback(adev, data, session_id);
+
+	sid = (hdr->token >> 8) & 0x0F;
+	ac = q6asm_get_audio_client(q6asm, sid);
+	if (!ac) {
+		dev_err(&adev->dev, "Audio Client not active\n");
+		return 0;
+	}
+
+	a = dev_get_drvdata(ac->dev->parent);
+	dir = (hdr->token & 0x0F);
+	port = &ac->port[dir];
+	result = data->payload;
+
+	switch (hdr->opcode) {
+	case APR_BASIC_RSP_RESULT:
+		switch (result->opcode) {
+		case ASM_CMD_SHARED_MEM_MAP_REGIONS:
+		case ASM_CMD_SHARED_MEM_UNMAP_REGIONS:
+			ac->result = *result;
+			wake_up(&a->mem_wait);
+			break;
+		default:
+			dev_err(&adev->dev, "command[0x%x] not expecting rsp\n",
+				 result->opcode);
+			break;
+		}
+		goto done;
+	case ASM_CMDRSP_SHARED_MEM_MAP_REGIONS:
+		ac->result.status = 0;
+		ac->result.opcode = hdr->opcode;
+		port->mem_map_handle = result->opcode;
+		wake_up(&a->mem_wait);
+		break;
+	case ASM_CMD_SHARED_MEM_UNMAP_REGIONS:
+		ac->result.opcode = hdr->opcode;
+		ac->result.status = 0;
+		port->mem_map_handle = 0;
+		wake_up(&a->mem_wait);
+		break;
+	default:
+		dev_dbg(&adev->dev, "command[0x%x]success [0x%x]\n",
+			result->opcode, result->status);
+		break;
+	}
+
+	if (ac->cb)
+		ac->cb(hdr->opcode, hdr->token, data->payload, ac->priv);
+
+done:
+	kref_put(&ac->refcount, q6asm_audio_client_release);
+
+	return 0;
+}
+
+/**
+ * q6asm_get_session_id() - get session id for audio client
+ *
+ * @c: audio client pointer
+ *
+ * Return: Will be an session id of the audio client.
+ */
+int q6asm_get_session_id(struct audio_client *c)
+{
+	return c->session;
+}
+EXPORT_SYMBOL_GPL(q6asm_get_session_id);
+
+/**
+ * q6asm_audio_client_alloc() - Allocate a new audio client
+ *
+ * @dev: Pointer to asm child device.
+ * @cb: event callback.
+ * @priv: private data associated with this client.
+ * @stream_id: stream id
+ * @perf_mode: performace mode for this client
+ *
+ * Return: Will be an error pointer on error or a valid audio client
+ * on success.
+ */
+struct audio_client *q6asm_audio_client_alloc(struct device *dev, q6asm_cb cb,
+					      void *priv, int stream_id,
+					      int perf_mode)
+{
+	struct q6asm *a = dev_get_drvdata(dev->parent);
+	struct audio_client *ac;
+	unsigned long flags;
+
+	ac = q6asm_get_audio_client(a, stream_id + 1);
+	if (ac) {
+		dev_err(dev, "Audio Client already active\n");
+		return ac;
+	}
+
+	ac = kzalloc(sizeof(*ac), GFP_KERNEL);
+	if (!ac)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_irqsave(&a->slock, flags);
+	a->session[stream_id + 1] = ac;
+	spin_unlock_irqrestore(&a->slock, flags);
+	ac->session = stream_id + 1;
+	ac->cb = cb;
+	ac->dev = dev;
+	ac->q6asm = a;
+	ac->priv = priv;
+	ac->io_mode = ASM_SYNC_IO_MODE;
+	ac->perf_mode = perf_mode;
+	/* DSP expects stream id from 1 */
+	ac->stream_id = 1;
+	ac->adev = a->adev;
+	kref_init(&ac->refcount);
+
+	init_waitqueue_head(&ac->cmd_wait);
+	mutex_init(&ac->cmd_lock);
+	spin_lock_init(&ac->lock);
+
+	return ac;
+}
+EXPORT_SYMBOL_GPL(q6asm_audio_client_alloc);
+
+static int q6asm_ac_send_cmd_sync(struct audio_client *ac, struct apr_pkt *pkt)
+{
+	struct apr_hdr *hdr = &pkt->hdr;
+	int rc;
+
+	mutex_lock(&ac->cmd_lock);
+	ac->result.opcode = 0;
+	ac->result.status = 0;
+
+	rc = apr_send_pkt(ac->adev, pkt);
+	if (rc < 0)
+		goto err;
+
+	rc = wait_event_timeout(ac->cmd_wait,
+				(ac->result.opcode == hdr->opcode), 5 * HZ);
+	if (!rc) {
+		dev_err(ac->dev, "CMD timeout\n");
+		rc =  -ETIMEDOUT;
+		goto err;
+	}
+
+	if (ac->result.status > 0) {
+		dev_err(ac->dev, "DSP returned error[%x]\n",
+			ac->result.status);
+		rc = -EINVAL;
+	} else {
+		rc = 0;
+	}
+
+
+err:
+	mutex_unlock(&ac->cmd_lock);
+	return rc;
+}
+
+/**
+ * q6asm_open_write() - Open audio client for writing
+ *
+ * @ac: audio client pointer
+ * @format: audio sample format
+ * @bits_per_sample: bits per sample
+ *
+ * Return: Will be an negative value on error or zero on success
+ */
+int q6asm_open_write(struct audio_client *ac, uint32_t format,
+		     uint16_t bits_per_sample)
+{
+	struct asm_stream_cmd_open_write_v3 *open;
+	struct apr_pkt *pkt;
+	void *p;
+	int rc, pkt_size;
+
+	pkt_size = APR_HDR_SIZE + sizeof(*open);
+
+	p = kzalloc(pkt_size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	open = p + APR_HDR_SIZE;
+	q6asm_add_hdr(ac, &pkt->hdr, pkt_size, true, ac->stream_id);
+
+	pkt->hdr.opcode = ASM_STREAM_CMD_OPEN_WRITE_V3;
+	open->mode_flags = 0x00;
+	open->mode_flags |= ASM_LEGACY_STREAM_SESSION;
+
+	/* source endpoint : matrix */
+	open->sink_endpointype = ASM_END_POINT_DEVICE_MATRIX;
+	open->bits_per_sample = bits_per_sample;
+	open->postprocopo_id = ASM_NULL_POPP_TOPOLOGY;
+
+	switch (format) {
+	case FORMAT_LINEAR_PCM:
+		open->dec_fmt_id = ASM_MEDIA_FMT_MULTI_CHANNEL_PCM_V2;
+		break;
+	default:
+		dev_err(ac->dev, "Invalid format 0x%x\n", format);
+		rc = -EINVAL;
+		goto err;
+	}
+
+	rc = q6asm_ac_send_cmd_sync(ac, pkt);
+	if (rc < 0)
+		goto err;
+
+	ac->io_mode |= ASM_TUN_WRITE_IO_MODE;
+
+err:
+	kfree(pkt);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(q6asm_open_write);
+
+static int __q6asm_run(struct audio_client *ac, uint32_t flags,
+	      uint32_t msw_ts, uint32_t lsw_ts, bool wait)
+{
+	struct asm_session_cmd_run_v2 *run;
+	struct apr_pkt *pkt;
+	int pkt_size, rc;
+	void *p;
+
+	pkt_size = APR_HDR_SIZE + sizeof(*run);
+	p = kzalloc(pkt_size, GFP_ATOMIC);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	run = p + APR_HDR_SIZE;
+
+	q6asm_add_hdr(ac, &pkt->hdr, pkt_size, true, ac->stream_id);
+
+	pkt->hdr.opcode = ASM_SESSION_CMD_RUN_V2;
+	run->flags = flags;
+	run->time_lsw = lsw_ts;
+	run->time_msw = msw_ts;
+	if (wait) {
+		rc = q6asm_ac_send_cmd_sync(ac, pkt);
+	} else {
+		rc = apr_send_pkt(ac->adev, pkt);
+		if (rc == pkt_size)
+			rc = 0;
+	}
+
+	kfree(pkt);
+	return rc;
+}
+
+/**
+ * q6asm_run() - start the audio client
+ *
+ * @ac: audio client pointer
+ * @flags: flags associated with write
+ * @msw_ts: timestamp msw
+ * @lsw_ts: timestamp lsw
+ *
+ * Return: Will be an negative value on error or zero on success
+ */
+int q6asm_run(struct audio_client *ac, uint32_t flags,
+	      uint32_t msw_ts, uint32_t lsw_ts)
+{
+	return __q6asm_run(ac, flags, msw_ts, lsw_ts, true);
+}
+EXPORT_SYMBOL_GPL(q6asm_run);
+
+/**
+ * q6asm_run_nowait() - start the audio client withou blocking
+ *
+ * @ac: audio client pointer
+ * @flags: flags associated with write
+ * @msw_ts: timestamp msw
+ * @lsw_ts: timestamp lsw
+ *
+ * Return: Will be an negative value on error or zero on success
+ */
+int q6asm_run_nowait(struct audio_client *ac, uint32_t flags,
+	      uint32_t msw_ts, uint32_t lsw_ts)
+{
+	return __q6asm_run(ac, flags, msw_ts, lsw_ts, false);
+}
+EXPORT_SYMBOL_GPL(q6asm_run_nowait);
+
+/**
+ * q6asm_media_format_block_multi_ch_pcm() - setup pcm configuration
+ *
+ * @ac: audio client pointer
+ * @rate: audio sample rate
+ * @channels: number of audio channels.
+ * @channel_map: channel map pointer
+ * @bits_per_sample: bits per sample
+ *
+ * Return: Will be an negative value on error or zero on success
+ */
+int q6asm_media_format_block_multi_ch_pcm(struct audio_client *ac,
+					  uint32_t rate, uint32_t channels,
+					  u8 channel_map[PCM_MAX_NUM_CHANNEL],
+					  uint16_t bits_per_sample)
+{
+	struct asm_multi_channel_pcm_fmt_blk_v2 *fmt;
+	struct apr_pkt *pkt;
+	u8 *channel_mapping;
+	void *p;
+	int rc, pkt_size;
+
+	pkt_size = APR_HDR_SIZE + sizeof(*fmt);
+	p = kzalloc(pkt_size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	fmt = p + APR_HDR_SIZE;
+
+	q6asm_add_hdr(ac, &pkt->hdr, pkt_size, true, ac->stream_id);
+
+	pkt->hdr.opcode = ASM_DATA_CMD_MEDIA_FMT_UPDATE_V2;
+	fmt->fmt_blk.fmt_blk_size = sizeof(*fmt) - sizeof(fmt->fmt_blk);
+	fmt->num_channels = channels;
+	fmt->bits_per_sample = bits_per_sample;
+	fmt->sample_rate = rate;
+	fmt->is_signed = 1;
+
+	channel_mapping = fmt->channel_mapping;
+
+	if (channel_map) {
+		memcpy(channel_mapping, channel_map, PCM_MAX_NUM_CHANNEL);
+	} else {
+		if (q6dsp_map_channels(channel_mapping, channels)) {
+			dev_err(ac->dev, " map channels failed %d\n", channels);
+			rc = -EINVAL;
+			goto err;
+		}
+	}
+
+	rc = q6asm_ac_send_cmd_sync(ac, pkt);
+
+err:
+	kfree(pkt);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(q6asm_media_format_block_multi_ch_pcm);
+
+/**
+ * q6asm_enc_cfg_blk_pcm_format_support() - setup pcm configuration for capture
+ *
+ * @ac: audio client pointer
+ * @rate: audio sample rate
+ * @channels: number of audio channels.
+ * @bits_per_sample: bits per sample
+ *
+ * Return: Will be an negative value on error or zero on success
+ */
+int q6asm_enc_cfg_blk_pcm_format_support(struct audio_client *ac,
+		uint32_t rate, uint32_t channels, uint16_t bits_per_sample)
+{
+	struct asm_multi_channel_pcm_enc_cfg_v2  *enc_cfg;
+	struct apr_pkt *pkt;
+	u8 *channel_mapping;
+	u32 frames_per_buf = 0;
+	int pkt_size, rc;
+	void *p;
+
+	pkt_size = APR_HDR_SIZE + sizeof(*enc_cfg);
+	p = kzalloc(pkt_size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	enc_cfg = p + APR_HDR_SIZE;
+	q6asm_add_hdr(ac, &pkt->hdr, pkt_size, true, ac->stream_id);
+
+	pkt->hdr.opcode = ASM_STREAM_CMD_SET_ENCDEC_PARAM;
+	enc_cfg->encdec.param_id = ASM_PARAM_ID_ENCDEC_ENC_CFG_BLK_V2;
+	enc_cfg->encdec.param_size = sizeof(*enc_cfg) - sizeof(enc_cfg->encdec);
+	enc_cfg->encblk.frames_per_buf = frames_per_buf;
+	enc_cfg->encblk.enc_cfg_blk_size  = enc_cfg->encdec.param_size -
+					sizeof(struct asm_enc_cfg_blk_param_v2);
+
+	enc_cfg->num_channels = channels;
+	enc_cfg->bits_per_sample = bits_per_sample;
+	enc_cfg->sample_rate = rate;
+	enc_cfg->is_signed = 1;
+	channel_mapping = enc_cfg->channel_mapping;
+
+	if (q6dsp_map_channels(channel_mapping, channels)) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	rc = q6asm_ac_send_cmd_sync(ac, pkt);
+err:
+	kfree(pkt);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(q6asm_enc_cfg_blk_pcm_format_support);
+
+/**
+ * q6asm_read() - read data of period size from audio client
+ *
+ * @ac: audio client pointer
+ *
+ * Return: Will be an negative value on error or zero on success
+ */
+int q6asm_read(struct audio_client *ac)
+{
+	struct asm_data_cmd_read_v2 *read;
+	struct audio_port_data *port;
+	struct audio_buffer *ab;
+	struct apr_pkt *pkt;
+	unsigned long flags;
+	int pkt_size;
+	int rc = 0;
+	void *p;
+
+	pkt_size = APR_HDR_SIZE + sizeof(*read);
+	p = kzalloc(pkt_size, GFP_ATOMIC);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	read = p + APR_HDR_SIZE;
+
+	spin_lock_irqsave(&ac->lock, flags);
+	port = &ac->port[SNDRV_PCM_STREAM_CAPTURE];
+	q6asm_add_hdr(ac, &pkt->hdr, pkt_size, false, ac->stream_id);
+	ab = &port->buf[port->dsp_buf];
+	pkt->hdr.opcode = ASM_DATA_CMD_READ_V2;
+	read->buf_addr_lsw = lower_32_bits(ab->phys);
+	read->buf_addr_msw = upper_32_bits(ab->phys);
+	read->mem_map_handle = port->mem_map_handle;
+
+	read->buf_size = ab->size;
+	read->seq_id = port->dsp_buf;
+	pkt->hdr.token = port->dsp_buf;
+
+	port->dsp_buf++;
+
+	if (port->dsp_buf >= port->num_periods)
+		port->dsp_buf = 0;
+
+	spin_unlock_irqrestore(&ac->lock, flags);
+	rc = apr_send_pkt(ac->adev, pkt);
+	if (rc == pkt_size)
+		rc = 0;
+	else
+		pr_err("read op[0x%x]rc[%d]\n", pkt->hdr.opcode, rc);
+
+	kfree(pkt);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(q6asm_read);
+
+static int __q6asm_open_read(struct audio_client *ac,
+		uint32_t format, uint16_t bits_per_sample)
+{
+	struct asm_stream_cmd_open_read_v3 *open;
+	struct apr_pkt *pkt;
+	int pkt_size, rc;
+	void *p;
+
+	pkt_size = APR_HDR_SIZE + sizeof(*open);
+	p = kzalloc(pkt_size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	open = p + APR_HDR_SIZE;
+
+	q6asm_add_hdr(ac, &pkt->hdr,  pkt_size, true, ac->stream_id);
+	pkt->hdr.opcode = ASM_STREAM_CMD_OPEN_READ_V3;
+	/* Stream prio : High, provide meta info with encoded frames */
+	open->src_endpointype = ASM_END_POINT_DEVICE_MATRIX;
+
+	open->preprocopo_id = ASM_STREAM_POSTPROC_TOPO_ID_NONE;
+	open->bits_per_sample = bits_per_sample;
+	open->mode_flags = 0x0;
+
+	open->mode_flags |= ASM_LEGACY_STREAM_SESSION <<
+				ASM_SHIFT_STREAM_PERF_MODE_FLAG_IN_OPEN_READ;
+
+	switch (format) {
+	case FORMAT_LINEAR_PCM:
+		open->mode_flags |= 0x00;
+		open->enc_cfg_id = ASM_MEDIA_FMT_MULTI_CHANNEL_PCM_V2;
+		break;
+	default:
+		pr_err("Invalid format[%d]\n", format);
+	}
+
+	rc = q6asm_ac_send_cmd_sync(ac, pkt);
+
+	kfree(pkt);
+	return rc;
+}
+
+/**
+ * q6asm_open_read() - Open audio client for reading
+ *
+ * @ac: audio client pointer
+ * @format: audio sample format
+ * @bits_per_sample: bits per sample
+ *
+ * Return: Will be an negative value on error or zero on success
+ */
+int q6asm_open_read(struct audio_client *ac, uint32_t format,
+			uint16_t bits_per_sample)
+{
+	return __q6asm_open_read(ac, format, bits_per_sample);
+}
+EXPORT_SYMBOL_GPL(q6asm_open_read);
+
+/**
+ * q6asm_write_async() - non blocking write
+ *
+ * @ac: audio client pointer
+ * @len: lenght in bytes
+ * @msw_ts: timestamp msw
+ * @lsw_ts: timestamp lsw
+ * @wflags: flags associated with write
+ *
+ * Return: Will be an negative value on error or zero on success
+ */
+int q6asm_write_async(struct audio_client *ac, uint32_t len, uint32_t msw_ts,
+		       uint32_t lsw_ts, uint32_t wflags)
+{
+	struct asm_data_cmd_write_v2 *write;
+	struct audio_port_data *port;
+	struct audio_buffer *ab;
+	unsigned long flags;
+	struct apr_pkt *pkt;
+	int pkt_size;
+	int rc = 0;
+	void *p;
+
+	pkt_size = APR_HDR_SIZE + sizeof(*write);
+	p = kzalloc(pkt_size, GFP_ATOMIC);
+	if (!p)
+		return -ENOMEM;
+
+	pkt = p;
+	write = p + APR_HDR_SIZE;
+
+	spin_lock_irqsave(&ac->lock, flags);
+	port = &ac->port[SNDRV_PCM_STREAM_PLAYBACK];
+	q6asm_add_hdr(ac, &pkt->hdr, pkt_size, false, ac->stream_id);
+
+	ab = &port->buf[port->dsp_buf];
+	pkt->hdr.token = port->dsp_buf;
+	pkt->hdr.opcode = ASM_DATA_CMD_WRITE_V2;
+	write->buf_addr_lsw = lower_32_bits(ab->phys);
+	write->buf_addr_msw = upper_32_bits(ab->phys);
+	write->buf_size = len;
+	write->seq_id = port->dsp_buf;
+	write->timestamp_lsw = lsw_ts;
+	write->timestamp_msw = msw_ts;
+	write->mem_map_handle =
+	    ac->port[SNDRV_PCM_STREAM_PLAYBACK].mem_map_handle;
+
+	if (wflags == NO_TIMESTAMP)
+		write->flags = (wflags & 0x800000FF);
+	else
+		write->flags = (0x80000000 | wflags);
+
+	port->dsp_buf++;
+
+	if (port->dsp_buf >= port->num_periods)
+		port->dsp_buf = 0;
+
+	spin_unlock_irqrestore(&ac->lock, flags);
+	rc = apr_send_pkt(ac->adev, pkt);
+	if (rc == pkt_size)
+		rc = 0;
+
+	kfree(pkt);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(q6asm_write_async);
+
+static void q6asm_reset_buf_state(struct audio_client *ac)
+{
+	struct audio_port_data *port = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ac->lock, flags);
+	port = &ac->port[SNDRV_PCM_STREAM_PLAYBACK];
+	port->dsp_buf = 0;
+	port = &ac->port[SNDRV_PCM_STREAM_CAPTURE];
+	port->dsp_buf = 0;
+	spin_unlock_irqrestore(&ac->lock, flags);
+}
+
+static int __q6asm_cmd(struct audio_client *ac, int cmd, bool wait)
+{
+	int stream_id = ac->stream_id;
+	struct apr_pkt pkt;
+	int rc;
+
+	q6asm_add_hdr(ac, &pkt.hdr, APR_HDR_SIZE, true, stream_id);
+
+	switch (cmd) {
+	case CMD_PAUSE:
+		pkt.hdr.opcode = ASM_SESSION_CMD_PAUSE;
+		break;
+	case CMD_SUSPEND:
+		pkt.hdr.opcode = ASM_SESSION_CMD_SUSPEND;
+		break;
+	case CMD_FLUSH:
+		pkt.hdr.opcode = ASM_STREAM_CMD_FLUSH;
+		break;
+	case CMD_OUT_FLUSH:
+		pkt.hdr.opcode = ASM_STREAM_CMD_FLUSH_READBUFS;
+		break;
+	case CMD_EOS:
+		pkt.hdr.opcode = ASM_DATA_CMD_EOS;
+		break;
+	case CMD_CLOSE:
+		pkt.hdr.opcode = ASM_STREAM_CMD_CLOSE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (wait)
+		rc = q6asm_ac_send_cmd_sync(ac, &pkt);
+	else
+		return apr_send_pkt(ac->adev, &pkt);
+
+	if (rc < 0)
+		return rc;
+
+	if (cmd == CMD_FLUSH)
+		q6asm_reset_buf_state(ac);
+
+	return 0;
+}
+
+/**
+ * q6asm_cmd() - run cmd on audio client
+ *
+ * @ac: audio client pointer
+ * @cmd: command to run on audio client.
+ *
+ * Return: Will be an negative value on error or zero on success
+ */
+int q6asm_cmd(struct audio_client *ac, int cmd)
+{
+	return __q6asm_cmd(ac, cmd, true);
+}
+EXPORT_SYMBOL_GPL(q6asm_cmd);
+
+/**
+ * q6asm_cmd_nowait() - non blocking, run cmd on audio client
+ *
+ * @ac: audio client pointer
+ * @cmd: command to run on audio client.
+ *
+ * Return: Will be an negative value on error or zero on success
+ */
+int q6asm_cmd_nowait(struct audio_client *ac, int cmd)
+{
+	return __q6asm_cmd(ac, cmd, false);
+}
+EXPORT_SYMBOL_GPL(q6asm_cmd_nowait);
+
+static int q6asm_probe(struct apr_device *adev)
+{
+	struct device *dev = &adev->dev;
+	struct device_node *dais_np;
+	struct q6asm *q6asm;
+
+	q6asm = devm_kzalloc(dev, sizeof(*q6asm), GFP_KERNEL);
+	if (!q6asm)
+		return -ENOMEM;
+
+	q6core_get_svc_api_info(adev->svc_id, &q6asm->ainfo);
+
+	q6asm->dev = dev;
+	q6asm->adev = adev;
+	init_waitqueue_head(&q6asm->mem_wait);
+	spin_lock_init(&q6asm->slock);
+	dev_set_drvdata(dev, q6asm);
+
+	dais_np = of_get_child_by_name(dev->of_node, "dais");
+	if (dais_np) {
+		q6asm->pdev_dais = of_platform_device_create(dais_np,
+							   "q6asm-dai", dev);
+		of_node_put(dais_np);
+	}
+
+	return 0;
+}
+
+static int q6asm_remove(struct apr_device *adev)
+{
+	struct q6asm *q6asm = dev_get_drvdata(&adev->dev);
+
+	if (q6asm->pdev_dais)
+		of_platform_device_destroy(&q6asm->pdev_dais->dev, NULL);
+
+	return 0;
+}
+static const struct of_device_id q6asm_device_id[]  = {
+	{ .compatible = "qcom,q6asm" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, q6asm_device_id);
+
+static struct apr_driver qcom_q6asm_driver = {
+	.probe = q6asm_probe,
+	.remove = q6asm_remove,
+	.callback = q6asm_srvc_callback,
+	.driver = {
+		.name = "qcom-q6asm",
+		.of_match_table = of_match_ptr(q6asm_device_id),
+	},
+};
+
+module_apr_driver(qcom_q6asm_driver);
+MODULE_DESCRIPTION("Q6 Audio Stream Manager driver");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/qcom/qdsp6/q6asm.h b/sound/soc/qcom/qdsp6/q6asm.h
new file mode 100644
index 0000000..9f5fb57
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6asm.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __Q6_ASM_H__
+#define __Q6_ASM_H__
+#include "q6dsp-common.h"
+#include <dt-bindings/sound/qcom,q6asm.h>
+
+/* ASM client callback events */
+#define CMD_PAUSE			0x0001
+#define ASM_CLIENT_EVENT_CMD_PAUSE_DONE		0x1001
+#define CMD_FLUSH				0x0002
+#define ASM_CLIENT_EVENT_CMD_FLUSH_DONE		0x1002
+#define CMD_EOS				0x0003
+#define ASM_CLIENT_EVENT_CMD_EOS_DONE		0x1003
+#define CMD_CLOSE				0x0004
+#define ASM_CLIENT_EVENT_CMD_CLOSE_DONE		0x1004
+#define CMD_OUT_FLUSH				0x0005
+#define ASM_CLIENT_EVENT_CMD_OUT_FLUSH_DONE	0x1005
+#define CMD_SUSPEND				0x0006
+#define ASM_CLIENT_EVENT_CMD_SUSPEND_DONE	0x1006
+#define ASM_CLIENT_EVENT_CMD_RUN_DONE		0x1008
+#define ASM_CLIENT_EVENT_DATA_WRITE_DONE	0x1009
+#define ASM_CLIENT_EVENT_DATA_READ_DONE		0x100a
+
+enum {
+	LEGACY_PCM_MODE = 0,
+	LOW_LATENCY_PCM_MODE,
+	ULTRA_LOW_LATENCY_PCM_MODE,
+	ULL_POST_PROCESSING_PCM_MODE,
+};
+
+#define MAX_SESSIONS	8
+#define NO_TIMESTAMP    0xFF00
+#define FORMAT_LINEAR_PCM   0x0000
+
+typedef void (*q6asm_cb) (uint32_t opcode, uint32_t token,
+			  void *payload, void *priv);
+struct audio_client;
+struct audio_client *q6asm_audio_client_alloc(struct device *dev,
+					      q6asm_cb cb, void *priv,
+					      int session_id, int perf_mode);
+void q6asm_audio_client_free(struct audio_client *ac);
+int q6asm_write_async(struct audio_client *ac, uint32_t len, uint32_t msw_ts,
+		       uint32_t lsw_ts, uint32_t flags);
+int q6asm_open_write(struct audio_client *ac, uint32_t format,
+		     uint16_t bits_per_sample);
+
+int q6asm_open_read(struct audio_client *ac, uint32_t format,
+		     uint16_t bits_per_sample);
+int q6asm_enc_cfg_blk_pcm_format_support(struct audio_client *ac,
+		uint32_t rate, uint32_t channels, uint16_t bits_per_sample);
+int q6asm_read(struct audio_client *ac);
+
+int q6asm_media_format_block_multi_ch_pcm(struct audio_client *ac,
+					  uint32_t rate, uint32_t channels,
+					  u8 channel_map[PCM_MAX_NUM_CHANNEL],
+					  uint16_t bits_per_sample);
+int q6asm_run(struct audio_client *ac, uint32_t flags, uint32_t msw_ts,
+	      uint32_t lsw_ts);
+int q6asm_run_nowait(struct audio_client *ac, uint32_t flags, uint32_t msw_ts,
+		     uint32_t lsw_ts);
+int q6asm_cmd(struct audio_client *ac, int cmd);
+int q6asm_cmd_nowait(struct audio_client *ac, int cmd);
+int q6asm_get_session_id(struct audio_client *ac);
+int q6asm_map_memory_regions(unsigned int dir,
+			     struct audio_client *ac,
+			     phys_addr_t phys,
+			     size_t bufsz, unsigned int bufcnt);
+int q6asm_unmap_memory_regions(unsigned int dir, struct audio_client *ac);
+#endif /* __Q6_ASM_H__ */
diff --git a/sound/soc/qcom/qdsp6/q6core.c b/sound/soc/qcom/qdsp6/q6core.c
new file mode 100644
index 0000000..06f03a5
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6core.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2011-2017, The Linux Foundation. All rights reserved.
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/jiffies.h>
+#include <linux/wait.h>
+#include <linux/soc/qcom/apr.h>
+#include "q6core.h"
+#include "q6dsp-errno.h"
+
+#define ADSP_STATE_READY_TIMEOUT_MS    3000
+#define Q6_READY_TIMEOUT_MS 100
+#define AVCS_CMD_ADSP_EVENT_GET_STATE		0x0001290C
+#define AVCS_CMDRSP_ADSP_EVENT_GET_STATE	0x0001290D
+#define AVCS_GET_VERSIONS       0x00012905
+#define AVCS_GET_VERSIONS_RSP   0x00012906
+#define AVCS_CMD_GET_FWK_VERSION	0x001292c
+#define AVCS_CMDRSP_GET_FWK_VERSION	0x001292d
+
+struct avcs_svc_info {
+	uint32_t service_id;
+	uint32_t version;
+} __packed;
+
+struct avcs_cmdrsp_get_version {
+	uint32_t build_id;
+	uint32_t num_services;
+	struct avcs_svc_info svc_api_info[];
+} __packed;
+
+/* for ADSP2.8 and above */
+struct avcs_svc_api_info {
+	uint32_t service_id;
+	uint32_t api_version;
+	uint32_t api_branch_version;
+} __packed;
+
+struct avcs_cmdrsp_get_fwk_version {
+	uint32_t build_major_version;
+	uint32_t build_minor_version;
+	uint32_t build_branch_version;
+	uint32_t build_subbranch_version;
+	uint32_t num_services;
+	struct avcs_svc_api_info svc_api_info[];
+} __packed;
+
+struct q6core {
+	struct apr_device *adev;
+	wait_queue_head_t wait;
+	uint32_t avcs_state;
+	struct mutex lock;
+	bool resp_received;
+	uint32_t num_services;
+	struct avcs_cmdrsp_get_fwk_version *fwk_version;
+	struct avcs_cmdrsp_get_version *svc_version;
+	bool fwk_version_supported;
+	bool get_state_supported;
+	bool get_version_supported;
+	bool is_version_requested;
+};
+
+static struct q6core *g_core;
+
+static int q6core_callback(struct apr_device *adev, struct apr_resp_pkt *data)
+{
+	struct q6core *core = dev_get_drvdata(&adev->dev);
+	struct aprv2_ibasic_rsp_result_t *result;
+	struct apr_hdr *hdr = &data->hdr;
+
+	result = data->payload;
+	switch (hdr->opcode) {
+	case APR_BASIC_RSP_RESULT:{
+		result = data->payload;
+		switch (result->opcode) {
+		case AVCS_GET_VERSIONS:
+			if (result->status == ADSP_EUNSUPPORTED)
+				core->get_version_supported = false;
+			core->resp_received = true;
+			break;
+		case AVCS_CMD_GET_FWK_VERSION:
+			if (result->status == ADSP_EUNSUPPORTED)
+				core->fwk_version_supported = false;
+			core->resp_received = true;
+			break;
+		case AVCS_CMD_ADSP_EVENT_GET_STATE:
+			if (result->status == ADSP_EUNSUPPORTED)
+				core->get_state_supported = false;
+			core->resp_received = true;
+			break;
+		}
+		break;
+	}
+	case AVCS_CMDRSP_GET_FWK_VERSION: {
+		struct avcs_cmdrsp_get_fwk_version *fwk;
+		int bytes;
+
+		fwk = data->payload;
+		bytes = sizeof(*fwk) + fwk->num_services *
+				sizeof(fwk->svc_api_info[0]);
+
+		core->fwk_version = kzalloc(bytes, GFP_ATOMIC);
+		if (!core->fwk_version)
+			return -ENOMEM;
+
+		memcpy(core->fwk_version, data->payload, bytes);
+
+		core->fwk_version_supported = true;
+		core->resp_received = true;
+
+		break;
+	}
+	case AVCS_GET_VERSIONS_RSP: {
+		struct avcs_cmdrsp_get_version *v;
+		int len;
+
+		v = data->payload;
+
+		len = sizeof(*v) + v->num_services * sizeof(v->svc_api_info[0]);
+
+		core->svc_version = kzalloc(len, GFP_ATOMIC);
+		if (!core->svc_version)
+			return -ENOMEM;
+
+		memcpy(core->svc_version, data->payload, len);
+
+		core->get_version_supported = true;
+		core->resp_received = true;
+
+		break;
+	}
+	case AVCS_CMDRSP_ADSP_EVENT_GET_STATE:
+		core->get_state_supported = true;
+		core->avcs_state = result->opcode;
+
+		core->resp_received = true;
+		break;
+	default:
+		dev_err(&adev->dev, "Message id from adsp core svc: 0x%x\n",
+			hdr->opcode);
+		break;
+	}
+
+	if (core->resp_received)
+		wake_up(&core->wait);
+
+	return 0;
+}
+
+static int q6core_get_fwk_versions(struct q6core *core)
+{
+	struct apr_device *adev = core->adev;
+	struct apr_pkt pkt;
+	int rc;
+
+	pkt.hdr.hdr_field = APR_HDR_FIELD(APR_MSG_TYPE_SEQ_CMD,
+				      APR_HDR_LEN(APR_HDR_SIZE), APR_PKT_VER);
+	pkt.hdr.pkt_size = APR_HDR_SIZE;
+	pkt.hdr.opcode = AVCS_CMD_GET_FWK_VERSION;
+
+	rc = apr_send_pkt(adev, &pkt);
+	if (rc < 0)
+		return rc;
+
+	rc = wait_event_timeout(core->wait, (core->resp_received),
+				msecs_to_jiffies(Q6_READY_TIMEOUT_MS));
+	if (rc > 0 && core->resp_received) {
+		core->resp_received = false;
+
+		if (!core->fwk_version_supported)
+			return -ENOTSUPP;
+		else
+			return 0;
+	}
+
+
+	return rc;
+}
+
+static int q6core_get_svc_versions(struct q6core *core)
+{
+	struct apr_device *adev = core->adev;
+	struct apr_pkt pkt;
+	int rc;
+
+	pkt.hdr.hdr_field = APR_HDR_FIELD(APR_MSG_TYPE_SEQ_CMD,
+				      APR_HDR_LEN(APR_HDR_SIZE), APR_PKT_VER);
+	pkt.hdr.pkt_size = APR_HDR_SIZE;
+	pkt.hdr.opcode = AVCS_GET_VERSIONS;
+
+	rc = apr_send_pkt(adev, &pkt);
+	if (rc < 0)
+		return rc;
+
+	rc = wait_event_timeout(core->wait, (core->resp_received),
+				msecs_to_jiffies(Q6_READY_TIMEOUT_MS));
+	if (rc > 0 && core->resp_received) {
+		core->resp_received = false;
+		return 0;
+	}
+
+	return rc;
+}
+
+static bool __q6core_is_adsp_ready(struct q6core *core)
+{
+	struct apr_device *adev = core->adev;
+	struct apr_pkt pkt;
+	int rc;
+
+	core->get_state_supported = false;
+
+	pkt.hdr.hdr_field = APR_HDR_FIELD(APR_MSG_TYPE_SEQ_CMD,
+				      APR_HDR_LEN(APR_HDR_SIZE), APR_PKT_VER);
+	pkt.hdr.pkt_size = APR_HDR_SIZE;
+	pkt.hdr.opcode = AVCS_CMD_ADSP_EVENT_GET_STATE;
+
+	rc = apr_send_pkt(adev, &pkt);
+	if (rc < 0)
+		return false;
+
+	rc = wait_event_timeout(core->wait, (core->resp_received),
+				msecs_to_jiffies(Q6_READY_TIMEOUT_MS));
+	if (rc > 0 && core->resp_received) {
+		core->resp_received = false;
+
+		if (core->avcs_state)
+			return true;
+	}
+
+	/* assume that the adsp is up if we not support this command */
+	if (!core->get_state_supported)
+		return true;
+
+	return false;
+}
+
+/**
+ * q6core_get_svc_api_info() - Get version number of a service.
+ *
+ * @svc_id: service id of the service.
+ * @ainfo: Valid struct pointer to fill svc api information.
+ *
+ * Return: zero on success and error code on failure or unsupported
+ */
+int q6core_get_svc_api_info(int svc_id, struct q6core_svc_api_info *ainfo)
+{
+	int i;
+	int ret = -ENOTSUPP;
+
+	if (!g_core || !ainfo)
+		return 0;
+
+	mutex_lock(&g_core->lock);
+	if (!g_core->is_version_requested) {
+		if (q6core_get_fwk_versions(g_core) == -ENOTSUPP)
+			q6core_get_svc_versions(g_core);
+		g_core->is_version_requested = true;
+	}
+
+	if (g_core->fwk_version_supported) {
+		for (i = 0; i < g_core->fwk_version->num_services; i++) {
+			struct avcs_svc_api_info *info;
+
+			info = &g_core->fwk_version->svc_api_info[i];
+			if (svc_id != info->service_id)
+				continue;
+
+			ainfo->api_version = info->api_version;
+			ainfo->api_branch_version = info->api_branch_version;
+			ret = 0;
+			break;
+		}
+	} else if (g_core->get_version_supported) {
+		for (i = 0; i < g_core->svc_version->num_services; i++) {
+			struct avcs_svc_info *info;
+
+			info = &g_core->svc_version->svc_api_info[i];
+			if (svc_id != info->service_id)
+				continue;
+
+			ainfo->api_version = info->version;
+			ainfo->api_branch_version = 0;
+			ret = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&g_core->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(q6core_get_svc_api_info);
+
+/**
+ * q6core_is_adsp_ready() - Get status of adsp
+ *
+ * Return: Will be an true if adsp is ready and false if not.
+ */
+bool q6core_is_adsp_ready(void)
+{
+	unsigned long  timeout;
+	bool ret = false;
+
+	if (!g_core)
+		return false;
+
+	mutex_lock(&g_core->lock);
+	timeout = jiffies + msecs_to_jiffies(ADSP_STATE_READY_TIMEOUT_MS);
+	for (;;) {
+		if (__q6core_is_adsp_ready(g_core)) {
+			ret = true;
+			break;
+		}
+
+		if (!time_after(timeout, jiffies)) {
+			ret = false;
+			break;
+		}
+	}
+
+	mutex_unlock(&g_core->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(q6core_is_adsp_ready);
+
+static int q6core_probe(struct apr_device *adev)
+{
+	g_core = kzalloc(sizeof(*g_core), GFP_KERNEL);
+	if (!g_core)
+		return -ENOMEM;
+
+	dev_set_drvdata(&adev->dev, g_core);
+
+	mutex_init(&g_core->lock);
+	g_core->adev = adev;
+	init_waitqueue_head(&g_core->wait);
+	return 0;
+}
+
+static int q6core_exit(struct apr_device *adev)
+{
+	struct q6core *core = dev_get_drvdata(&adev->dev);
+
+	if (core->fwk_version_supported)
+		kfree(core->fwk_version);
+	if (core->get_version_supported)
+		kfree(core->svc_version);
+
+	g_core = NULL;
+	kfree(core);
+
+	return 0;
+}
+
+static const struct of_device_id q6core_device_id[]  = {
+	{ .compatible = "qcom,q6core" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, q6core_device_id);
+
+static struct apr_driver qcom_q6core_driver = {
+	.probe = q6core_probe,
+	.remove = q6core_exit,
+	.callback = q6core_callback,
+	.driver = {
+		.name = "qcom-q6core",
+		.of_match_table = of_match_ptr(q6core_device_id),
+	},
+};
+
+module_apr_driver(qcom_q6core_driver);
+MODULE_DESCRIPTION("q6 core");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/qcom/qdsp6/q6core.h b/sound/soc/qcom/qdsp6/q6core.h
new file mode 100644
index 0000000..4105b1d
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6core.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __Q6CORE_H__
+#define __Q6CORE_H__
+
+struct q6core_svc_api_info {
+	uint32_t service_id;
+	uint32_t api_version;
+	uint32_t api_branch_version;
+};
+
+bool q6core_is_adsp_ready(void);
+int q6core_get_svc_api_info(int svc_id, struct q6core_svc_api_info *ainfo);
+
+#endif /* __Q6CORE_H__ */
diff --git a/sound/soc/qcom/qdsp6/q6dsp-common.c b/sound/soc/qcom/qdsp6/q6dsp-common.c
new file mode 100644
index 0000000..d393003
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6dsp-common.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2011-2017, The Linux Foundation. All rights reserved.
+// Copyright (c) 2018, Linaro Limited
+
+#include "q6dsp-common.h"
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+
+int q6dsp_map_channels(u8 ch_map[PCM_MAX_NUM_CHANNEL], int ch)
+{
+	memset(ch_map, 0, PCM_MAX_NUM_CHANNEL);
+
+	switch (ch) {
+	case 1:
+		ch_map[0] = PCM_CHANNEL_FC;
+		break;
+	case 2:
+		ch_map[0] = PCM_CHANNEL_FL;
+		ch_map[1] = PCM_CHANNEL_FR;
+		break;
+	case 3:
+		ch_map[0] = PCM_CHANNEL_FL;
+		ch_map[1] = PCM_CHANNEL_FR;
+		ch_map[2] = PCM_CHANNEL_FC;
+		break;
+	case 4:
+		ch_map[0] = PCM_CHANNEL_FL;
+		ch_map[1] = PCM_CHANNEL_FR;
+		ch_map[2] = PCM_CHANNEL_LS;
+		ch_map[3] = PCM_CHANNEL_RS;
+		break;
+	case 5:
+		ch_map[0] = PCM_CHANNEL_FL;
+		ch_map[1] = PCM_CHANNEL_FR;
+		ch_map[2] = PCM_CHANNEL_FC;
+		ch_map[3] = PCM_CHANNEL_LS;
+		ch_map[4] = PCM_CHANNEL_RS;
+		break;
+	case 6:
+		ch_map[0] = PCM_CHANNEL_FL;
+		ch_map[1] = PCM_CHANNEL_FR;
+		ch_map[2] = PCM_CHANNEL_LFE;
+		ch_map[3] = PCM_CHANNEL_FC;
+		ch_map[4] = PCM_CHANNEL_LS;
+		ch_map[5] = PCM_CHANNEL_RS;
+		break;
+	case 8:
+		ch_map[0] = PCM_CHANNEL_FL;
+		ch_map[1] = PCM_CHANNEL_FR;
+		ch_map[2] = PCM_CHANNEL_LFE;
+		ch_map[3] = PCM_CHANNEL_FC;
+		ch_map[4] = PCM_CHANNEL_LS;
+		ch_map[5] = PCM_CHANNEL_RS;
+		ch_map[6] = PCM_CHANNEL_LB;
+		ch_map[7] = PCM_CHANNEL_RB;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(q6dsp_map_channels);
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/qcom/qdsp6/q6dsp-common.h b/sound/soc/qcom/qdsp6/q6dsp-common.h
new file mode 100644
index 0000000..01094d1
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6dsp-common.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __Q6DSP_COMMON_H__
+#define __Q6DSP_COMMON_H__
+
+#include <linux/kernel.h>
+
+#define PCM_MAX_NUM_CHANNEL  8
+#define PCM_CHANNEL_NULL 0
+
+#define PCM_CHANNEL_FL    1	/* Front left channel. */
+#define PCM_CHANNEL_FR    2	/* Front right channel. */
+#define PCM_CHANNEL_FC    3	/* Front center channel. */
+#define PCM_CHANNEL_LS   4	/* Left surround channel. */
+#define PCM_CHANNEL_RS   5	/* Right surround channel. */
+#define PCM_CHANNEL_LFE  6	/* Low frequency effect channel. */
+#define PCM_CHANNEL_CS   7	/* Center surround channel; Rear center ch */
+#define PCM_CHANNEL_LB   8	/* Left back channel; Rear left channel. */
+#define PCM_CHANNEL_RB   9	/* Right back channel; Rear right channel. */
+#define PCM_CHANNELS   10	/* Top surround channel. */
+
+int q6dsp_map_channels(u8 ch_map[PCM_MAX_NUM_CHANNEL], int ch);
+
+#endif /* __Q6DSP_COMMON_H__ */
diff --git a/sound/soc/qcom/qdsp6/q6dsp-errno.h b/sound/soc/qcom/qdsp6/q6dsp-errno.h
new file mode 100644
index 0000000..1ec00ff
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6dsp-errno.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __Q6DSP_ERR_NO_H__
+#define __Q6DSP_ERR_NO_H__
+#include <linux/kernel.h>
+
+/* Success. The operation completed with no errors. */
+#define ADSP_EOK          0x00000000
+/* General failure. */
+#define ADSP_EFAILED      0x00000001
+/* Bad operation parameter. */
+#define ADSP_EBADPARAM    0x00000002
+/* Unsupported routine or operation. */
+#define ADSP_EUNSUPPORTED 0x00000003
+/* Unsupported version. */
+#define ADSP_EVERSION     0x00000004
+/* Unexpected problem encountered. */
+#define ADSP_EUNEXPECTED  0x00000005
+/* Unhandled problem occurred. */
+#define ADSP_EPANIC       0x00000006
+/* Unable to allocate resource. */
+#define ADSP_ENORESOURCE  0x00000007
+/* Invalid handle. */
+#define ADSP_EHANDLE      0x00000008
+/* Operation is already processed. */
+#define ADSP_EALREADY     0x00000009
+/* Operation is not ready to be processed. */
+#define ADSP_ENOTREADY    0x0000000A
+/* Operation is pending completion. */
+#define ADSP_EPENDING     0x0000000B
+/* Operation could not be accepted or processed. */
+#define ADSP_EBUSY        0x0000000C
+/* Operation aborted due to an error. */
+#define ADSP_EABORTED     0x0000000D
+/* Operation preempted by a higher priority. */
+#define ADSP_EPREEMPTED   0x0000000E
+/* Operation requests intervention to complete. */
+#define ADSP_ECONTINUE    0x0000000F
+/* Operation requests immediate intervention to complete. */
+#define ADSP_EIMMEDIATE   0x00000010
+/* Operation is not implemented. */
+#define ADSP_ENOTIMPL     0x00000011
+/* Operation needs more data or resources. */
+#define ADSP_ENEEDMORE    0x00000012
+/* Operation does not have memory. */
+#define ADSP_ENOMEMORY    0x00000014
+/* Item does not exist. */
+#define ADSP_ENOTEXIST    0x00000015
+/* Max count for adsp error code sent to HLOS*/
+
+#endif /*__Q6DSP_ERR_NO_H__ */
diff --git a/sound/soc/qcom/qdsp6/q6routing.c b/sound/soc/qcom/qdsp6/q6routing.c
new file mode 100644
index 0000000..593f66b
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6routing.c
@@ -0,0 +1,1006 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2011-2017, The Linux Foundation. All rights reserved.
+// Copyright (c) 2018, Linaro Limited
+
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/bitops.h>
+#include <linux/component.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include <sound/core.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <sound/pcm.h>
+#include <sound/control.h>
+#include <sound/asound.h>
+#include <sound/pcm_params.h>
+#include "q6afe.h"
+#include "q6asm.h"
+#include "q6adm.h"
+#include "q6routing.h"
+
+#define DRV_NAME "q6routing-component"
+
+#define Q6ROUTING_RX_MIXERS(id)						\
+	SOC_SINGLE_EXT("MultiMedia1", id,				\
+	MSM_FRONTEND_DAI_MULTIMEDIA1, 1, 0, msm_routing_get_audio_mixer,\
+	msm_routing_put_audio_mixer),					\
+	SOC_SINGLE_EXT("MultiMedia2", id,				\
+	MSM_FRONTEND_DAI_MULTIMEDIA2, 1, 0, msm_routing_get_audio_mixer,\
+	msm_routing_put_audio_mixer),					\
+	SOC_SINGLE_EXT("MultiMedia3", id,				\
+	MSM_FRONTEND_DAI_MULTIMEDIA3, 1, 0, msm_routing_get_audio_mixer,\
+	msm_routing_put_audio_mixer),					\
+	SOC_SINGLE_EXT("MultiMedia4", id,				\
+	MSM_FRONTEND_DAI_MULTIMEDIA4, 1, 0, msm_routing_get_audio_mixer,\
+	msm_routing_put_audio_mixer),					\
+	SOC_SINGLE_EXT("MultiMedia5", id,				\
+	MSM_FRONTEND_DAI_MULTIMEDIA5, 1, 0, msm_routing_get_audio_mixer,\
+	msm_routing_put_audio_mixer),					\
+	SOC_SINGLE_EXT("MultiMedia6", id,				\
+	MSM_FRONTEND_DAI_MULTIMEDIA6, 1, 0, msm_routing_get_audio_mixer,\
+	msm_routing_put_audio_mixer),					\
+	SOC_SINGLE_EXT("MultiMedia7", id,				\
+	MSM_FRONTEND_DAI_MULTIMEDIA7, 1, 0, msm_routing_get_audio_mixer,\
+	msm_routing_put_audio_mixer),					\
+	SOC_SINGLE_EXT("MultiMedia8", id,				\
+	MSM_FRONTEND_DAI_MULTIMEDIA8, 1, 0, msm_routing_get_audio_mixer,\
+	msm_routing_put_audio_mixer),
+
+#define Q6ROUTING_RX_DAPM_ROUTE(mix_name, s)	\
+	{ mix_name, "MultiMedia1", "MM_DL1" },	\
+	{ mix_name, "MultiMedia2", "MM_DL2" },	\
+	{ mix_name, "MultiMedia3", "MM_DL3" },	\
+	{ mix_name, "MultiMedia4", "MM_DL4" },	\
+	{ mix_name, "MultiMedia5", "MM_DL5" },	\
+	{ mix_name, "MultiMedia6", "MM_DL6" },	\
+	{ mix_name, "MultiMedia7", "MM_DL7" },	\
+	{ mix_name, "MultiMedia8", "MM_DL8" },	\
+	{ s, NULL, mix_name }
+
+#define Q6ROUTING_TX_DAPM_ROUTE(mix_name)		\
+	{ mix_name, "PRI_MI2S_TX", "PRI_MI2S_TX" },	\
+	{ mix_name, "SEC_MI2S_TX", "SEC_MI2S_TX" },	\
+	{ mix_name, "QUAT_MI2S_TX", "QUAT_MI2S_TX" },	\
+	{ mix_name, "TERT_MI2S_TX", "TERT_MI2S_TX" },		\
+	{ mix_name, "PRIMARY_TDM_TX_0", "PRIMARY_TDM_TX_0"},	\
+	{ mix_name, "PRIMARY_TDM_TX_1", "PRIMARY_TDM_TX_1"},	\
+	{ mix_name, "PRIMARY_TDM_TX_2", "PRIMARY_TDM_TX_2"},	\
+	{ mix_name, "PRIMARY_TDM_TX_3", "PRIMARY_TDM_TX_3"},	\
+	{ mix_name, "PRIMARY_TDM_TX_4", "PRIMARY_TDM_TX_4"},	\
+	{ mix_name, "PRIMARY_TDM_TX_5", "PRIMARY_TDM_TX_5"},	\
+	{ mix_name, "PRIMARY_TDM_TX_6", "PRIMARY_TDM_TX_6"},	\
+	{ mix_name, "PRIMARY_TDM_TX_7", "PRIMARY_TDM_TX_7"},	\
+	{ mix_name, "SEC_TDM_TX_0", "SEC_TDM_TX_0"},		\
+	{ mix_name, "SEC_TDM_TX_1", "SEC_TDM_TX_1"},		\
+	{ mix_name, "SEC_TDM_TX_2", "SEC_TDM_TX_2"},		\
+	{ mix_name, "SEC_TDM_TX_3", "SEC_TDM_TX_3"},		\
+	{ mix_name, "SEC_TDM_TX_4", "SEC_TDM_TX_4"},		\
+	{ mix_name, "SEC_TDM_TX_5", "SEC_TDM_TX_5"},		\
+	{ mix_name, "SEC_TDM_TX_6", "SEC_TDM_TX_6"},		\
+	{ mix_name, "SEC_TDM_TX_7", "SEC_TDM_TX_7"},		\
+	{ mix_name, "TERT_TDM_TX_0", "TERT_TDM_TX_0"},		\
+	{ mix_name, "TERT_TDM_TX_1", "TERT_TDM_TX_1"},		\
+	{ mix_name, "TERT_TDM_TX_2", "TERT_TDM_TX_2"},		\
+	{ mix_name, "TERT_TDM_TX_3", "TERT_TDM_TX_3"},		\
+	{ mix_name, "TERT_TDM_TX_4", "TERT_TDM_TX_4"},		\
+	{ mix_name, "TERT_TDM_TX_5", "TERT_TDM_TX_5"},		\
+	{ mix_name, "TERT_TDM_TX_6", "TERT_TDM_TX_6"},		\
+	{ mix_name, "TERT_TDM_TX_7", "TERT_TDM_TX_7"},		\
+	{ mix_name, "QUAT_TDM_TX_0", "QUAT_TDM_TX_0"},		\
+	{ mix_name, "QUAT_TDM_TX_1", "QUAT_TDM_TX_1"},		\
+	{ mix_name, "QUAT_TDM_TX_2", "QUAT_TDM_TX_2"},		\
+	{ mix_name, "QUAT_TDM_TX_3", "QUAT_TDM_TX_3"},		\
+	{ mix_name, "QUAT_TDM_TX_4", "QUAT_TDM_TX_4"},		\
+	{ mix_name, "QUAT_TDM_TX_5", "QUAT_TDM_TX_5"},		\
+	{ mix_name, "QUAT_TDM_TX_6", "QUAT_TDM_TX_6"},		\
+	{ mix_name, "QUAT_TDM_TX_7", "QUAT_TDM_TX_7"},		\
+	{ mix_name, "QUIN_TDM_TX_0", "QUIN_TDM_TX_0"},		\
+	{ mix_name, "QUIN_TDM_TX_1", "QUIN_TDM_TX_1"},		\
+	{ mix_name, "QUIN_TDM_TX_2", "QUIN_TDM_TX_2"},		\
+	{ mix_name, "QUIN_TDM_TX_3", "QUIN_TDM_TX_3"},		\
+	{ mix_name, "QUIN_TDM_TX_4", "QUIN_TDM_TX_4"},		\
+	{ mix_name, "QUIN_TDM_TX_5", "QUIN_TDM_TX_5"},		\
+	{ mix_name, "QUIN_TDM_TX_6", "QUIN_TDM_TX_6"},		\
+	{ mix_name, "QUIN_TDM_TX_7", "QUIN_TDM_TX_7"}
+
+#define Q6ROUTING_TX_MIXERS(id)						\
+	SOC_SINGLE_EXT("PRI_MI2S_TX", PRIMARY_MI2S_TX,			\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("SEC_MI2S_TX", SECONDARY_MI2S_TX,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("TERT_MI2S_TX", TERTIARY_MI2S_TX,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUAT_MI2S_TX", QUATERNARY_MI2S_TX,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("PRIMARY_TDM_TX_0", PRIMARY_TDM_TX_0,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("PRIMARY_TDM_TX_1", PRIMARY_TDM_TX_1,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("PRIMARY_TDM_TX_2", PRIMARY_TDM_TX_2,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("PRIMARY_TDM_TX_3", PRIMARY_TDM_TX_3,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("PRIMARY_TDM_TX_4", PRIMARY_TDM_TX_4,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("PRIMARY_TDM_TX_5", PRIMARY_TDM_TX_5,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("PRIMARY_TDM_TX_6", PRIMARY_TDM_TX_6,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("PRIMARY_TDM_TX_7", PRIMARY_TDM_TX_7,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("SEC_TDM_TX_0", SECONDARY_TDM_TX_0,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("SEC_TDM_TX_1", SECONDARY_TDM_TX_1,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("SEC_TDM_TX_2", SECONDARY_TDM_TX_2,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("SEC_TDM_TX_3", SECONDARY_TDM_TX_3,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("SEC_TDM_TX_4", SECONDARY_TDM_TX_4,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("SEC_TDM_TX_5", SECONDARY_TDM_TX_5,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("SEC_TDM_TX_6", SECONDARY_TDM_TX_6,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("SEC_TDM_TX_7", SECONDARY_TDM_TX_7,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("TERT_TDM_TX_0", TERTIARY_TDM_TX_0,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("TERT_TDM_TX_1", TERTIARY_TDM_TX_1,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("TERT_TDM_TX_2", TERTIARY_TDM_TX_2,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("TERT_TDM_TX_3", TERTIARY_TDM_TX_3,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("TERT_TDM_TX_4", TERTIARY_TDM_TX_4,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("TERT_TDM_TX_5", TERTIARY_TDM_TX_5,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("TERT_TDM_TX_6", TERTIARY_TDM_TX_6,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("TERT_TDM_TX_7", TERTIARY_TDM_TX_7,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUAT_TDM_TX_0", QUATERNARY_TDM_TX_0,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUAT_TDM_TX_1", QUATERNARY_TDM_TX_1,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUAT_TDM_TX_2", QUATERNARY_TDM_TX_2,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUAT_TDM_TX_3", QUATERNARY_TDM_TX_3,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUAT_TDM_TX_4", QUATERNARY_TDM_TX_4,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUAT_TDM_TX_5", QUATERNARY_TDM_TX_5,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUAT_TDM_TX_6", QUATERNARY_TDM_TX_6,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUAT_TDM_TX_7", QUATERNARY_TDM_TX_7,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUIN_TDM_TX_0", QUINARY_TDM_TX_0,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUIN_TDM_TX_1", QUINARY_TDM_TX_1,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUIN_TDM_TX_2", QUINARY_TDM_TX_2,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUIN_TDM_TX_3", QUINARY_TDM_TX_3,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUIN_TDM_TX_4", QUINARY_TDM_TX_4,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUIN_TDM_TX_5", QUINARY_TDM_TX_5,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUIN_TDM_TX_6", QUINARY_TDM_TX_6,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),				\
+	SOC_SINGLE_EXT("QUIN_TDM_TX_7", QUINARY_TDM_TX_7,		\
+		id, 1, 0, msm_routing_get_audio_mixer,			\
+		msm_routing_put_audio_mixer),
+
+struct session_data {
+	int state;
+	int port_id;
+	int path_type;
+	int app_type;
+	int acdb_id;
+	int sample_rate;
+	int bits_per_sample;
+	int channels;
+	int perf_mode;
+	int numcopps;
+	int fedai_id;
+	unsigned long copp_map;
+	struct q6copp *copps[MAX_COPPS_PER_PORT];
+};
+
+struct msm_routing_data {
+	struct session_data sessions[MAX_SESSIONS];
+	struct session_data port_data[AFE_MAX_PORTS];
+	struct device *dev;
+	struct mutex lock;
+};
+
+static struct msm_routing_data *routing_data;
+
+/**
+ * q6routing_stream_open() - Register a new stream for route setup
+ *
+ * @fedai_id: Frontend dai id.
+ * @perf_mode: Performance mode.
+ * @stream_id: ASM stream id to map.
+ * @stream_type: Direction of stream
+ *
+ * Return: Will be an negative on error or a zero on success.
+ */
+int q6routing_stream_open(int fedai_id, int perf_mode,
+			   int stream_id, int stream_type)
+{
+	int j, topology, num_copps = 0;
+	struct route_payload payload;
+	struct q6copp *copp;
+	int copp_idx;
+	struct session_data *session, *pdata;
+
+	if (!routing_data) {
+		pr_err("Routing driver not yet ready\n");
+		return -EINVAL;
+	}
+
+	session = &routing_data->sessions[stream_id - 1];
+	pdata = &routing_data->port_data[session->port_id];
+
+	mutex_lock(&routing_data->lock);
+	session->fedai_id = fedai_id;
+
+	session->path_type = pdata->path_type;
+	session->sample_rate = pdata->sample_rate;
+	session->channels = pdata->channels;
+	session->bits_per_sample = pdata->bits_per_sample;
+
+	payload.num_copps = 0; /* only RX needs to use payload */
+	topology = NULL_COPP_TOPOLOGY;
+	copp = q6adm_open(routing_data->dev, session->port_id,
+			      session->path_type, session->sample_rate,
+			      session->channels, topology, perf_mode,
+			      session->bits_per_sample, 0, 0);
+
+	if (!copp) {
+		mutex_unlock(&routing_data->lock);
+		return -EINVAL;
+	}
+
+	copp_idx = q6adm_get_copp_id(copp);
+	set_bit(copp_idx, &session->copp_map);
+	session->copps[copp_idx] = copp;
+
+	for_each_set_bit(j, &session->copp_map, MAX_COPPS_PER_PORT) {
+		payload.port_id[num_copps] = session->port_id;
+		payload.copp_idx[num_copps] = j;
+		num_copps++;
+	}
+
+	if (num_copps) {
+		payload.num_copps = num_copps;
+		payload.session_id = stream_id;
+		q6adm_matrix_map(routing_data->dev, session->path_type,
+				 payload, perf_mode);
+	}
+	mutex_unlock(&routing_data->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(q6routing_stream_open);
+
+static struct session_data *get_session_from_id(struct msm_routing_data *data,
+						int fedai_id)
+{
+	int i;
+
+	for (i = 0; i < MAX_SESSIONS; i++) {
+		if (fedai_id == data->sessions[i].fedai_id)
+			return &data->sessions[i];
+	}
+
+	return NULL;
+}
+/**
+ * q6routing_stream_close() - Deregister a stream
+ *
+ * @fedai_id: Frontend dai id.
+ * @stream_type: Direction of stream
+ *
+ * Return: Will be an negative on error or a zero on success.
+ */
+void q6routing_stream_close(int fedai_id, int stream_type)
+{
+	struct session_data *session;
+	int idx;
+
+	session = get_session_from_id(routing_data, fedai_id);
+	if (!session)
+		return;
+
+	for_each_set_bit(idx, &session->copp_map, MAX_COPPS_PER_PORT) {
+		if (session->copps[idx]) {
+			q6adm_close(routing_data->dev, session->copps[idx]);
+			session->copps[idx] = NULL;
+		}
+	}
+
+	session->fedai_id = -1;
+	session->copp_map = 0;
+}
+EXPORT_SYMBOL_GPL(q6routing_stream_close);
+
+static int msm_routing_get_audio_mixer(struct snd_kcontrol *kcontrol,
+				       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_dapm_context *dapm =
+	    snd_soc_dapm_kcontrol_dapm(kcontrol);
+	struct soc_mixer_control *mc =
+	    (struct soc_mixer_control *)kcontrol->private_value;
+	int session_id = mc->shift;
+	struct snd_soc_component *c = snd_soc_dapm_to_component(dapm);
+	struct msm_routing_data *priv = dev_get_drvdata(c->dev);
+	struct session_data *session = &priv->sessions[session_id];
+
+	if (session->port_id == mc->reg)
+		ucontrol->value.integer.value[0] = 1;
+	else
+		ucontrol->value.integer.value[0] = 0;
+
+	return 0;
+}
+
+static int msm_routing_put_audio_mixer(struct snd_kcontrol *kcontrol,
+				       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_dapm_context *dapm =
+				    snd_soc_dapm_kcontrol_dapm(kcontrol);
+	struct snd_soc_component *c = snd_soc_dapm_to_component(dapm);
+	struct msm_routing_data *data = dev_get_drvdata(c->dev);
+	struct soc_mixer_control *mc =
+		    (struct soc_mixer_control *)kcontrol->private_value;
+	struct snd_soc_dapm_update *update = NULL;
+	int be_id = mc->reg;
+	int session_id = mc->shift;
+	struct session_data *session = &data->sessions[session_id];
+
+	if (ucontrol->value.integer.value[0]) {
+		session->port_id = be_id;
+		snd_soc_dapm_mixer_update_power(dapm, kcontrol, 1, update);
+	} else {
+		session->port_id = -1;
+		snd_soc_dapm_mixer_update_power(dapm, kcontrol, 0, update);
+	}
+
+	return 1;
+}
+
+static const struct snd_kcontrol_new hdmi_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(HDMI_RX) };
+
+static const struct snd_kcontrol_new primary_mi2s_rx_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(PRIMARY_MI2S_RX) };
+
+static const struct snd_kcontrol_new secondary_mi2s_rx_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SECONDARY_MI2S_RX) };
+
+static const struct snd_kcontrol_new quaternary_mi2s_rx_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUATERNARY_MI2S_RX) };
+
+static const struct snd_kcontrol_new tertiary_mi2s_rx_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(TERTIARY_MI2S_RX) };
+
+static const struct snd_kcontrol_new slimbus_rx_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SLIMBUS_0_RX) };
+
+static const struct snd_kcontrol_new slimbus_1_rx_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SLIMBUS_1_RX) };
+
+static const struct snd_kcontrol_new slimbus_2_rx_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SLIMBUS_2_RX) };
+
+static const struct snd_kcontrol_new slimbus_3_rx_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SLIMBUS_3_RX) };
+
+static const struct snd_kcontrol_new slimbus_4_rx_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SLIMBUS_4_RX) };
+
+static const struct snd_kcontrol_new slimbus_5_rx_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SLIMBUS_5_RX) };
+
+static const struct snd_kcontrol_new slimbus_6_rx_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SLIMBUS_6_RX) };
+
+static const struct snd_kcontrol_new pri_tdm_rx_0_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(PRIMARY_TDM_RX_0) };
+
+static const struct snd_kcontrol_new pri_tdm_rx_1_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(PRIMARY_TDM_RX_1) };
+
+static const struct snd_kcontrol_new pri_tdm_rx_2_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(PRIMARY_TDM_RX_2) };
+
+static const struct snd_kcontrol_new pri_tdm_rx_3_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(PRIMARY_TDM_RX_3) };
+
+static const struct snd_kcontrol_new pri_tdm_rx_4_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(PRIMARY_TDM_RX_4) };
+
+static const struct snd_kcontrol_new pri_tdm_rx_5_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(PRIMARY_TDM_RX_5) };
+
+static const struct snd_kcontrol_new pri_tdm_rx_6_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(PRIMARY_TDM_RX_6) };
+
+static const struct snd_kcontrol_new pri_tdm_rx_7_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(PRIMARY_TDM_RX_7) };
+
+static const struct snd_kcontrol_new sec_tdm_rx_0_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SECONDARY_TDM_RX_0) };
+
+static const struct snd_kcontrol_new sec_tdm_rx_1_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SECONDARY_TDM_RX_1) };
+
+static const struct snd_kcontrol_new sec_tdm_rx_2_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SECONDARY_TDM_RX_2) };
+
+static const struct snd_kcontrol_new sec_tdm_rx_3_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SECONDARY_TDM_RX_3) };
+
+static const struct snd_kcontrol_new sec_tdm_rx_4_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SECONDARY_TDM_RX_4) };
+
+static const struct snd_kcontrol_new sec_tdm_rx_5_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SECONDARY_TDM_RX_5) };
+
+static const struct snd_kcontrol_new sec_tdm_rx_6_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SECONDARY_TDM_RX_6) };
+
+static const struct snd_kcontrol_new sec_tdm_rx_7_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(SECONDARY_TDM_RX_7) };
+
+static const struct snd_kcontrol_new tert_tdm_rx_0_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(TERTIARY_TDM_RX_0) };
+
+static const struct snd_kcontrol_new tert_tdm_rx_1_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(TERTIARY_TDM_RX_1) };
+
+static const struct snd_kcontrol_new tert_tdm_rx_2_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(TERTIARY_TDM_RX_2) };
+
+static const struct snd_kcontrol_new tert_tdm_rx_3_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(TERTIARY_TDM_RX_3) };
+
+static const struct snd_kcontrol_new tert_tdm_rx_4_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(TERTIARY_TDM_RX_4) };
+
+static const struct snd_kcontrol_new tert_tdm_rx_5_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(TERTIARY_TDM_RX_5) };
+
+static const struct snd_kcontrol_new tert_tdm_rx_6_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(TERTIARY_TDM_RX_6) };
+
+static const struct snd_kcontrol_new tert_tdm_rx_7_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(TERTIARY_TDM_RX_7) };
+
+static const struct snd_kcontrol_new quat_tdm_rx_0_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUATERNARY_TDM_RX_0) };
+
+static const struct snd_kcontrol_new quat_tdm_rx_1_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUATERNARY_TDM_RX_1) };
+
+static const struct snd_kcontrol_new quat_tdm_rx_2_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUATERNARY_TDM_RX_2) };
+
+static const struct snd_kcontrol_new quat_tdm_rx_3_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUATERNARY_TDM_RX_3) };
+
+static const struct snd_kcontrol_new quat_tdm_rx_4_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUATERNARY_TDM_RX_4) };
+
+static const struct snd_kcontrol_new quat_tdm_rx_5_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUATERNARY_TDM_RX_5) };
+
+static const struct snd_kcontrol_new quat_tdm_rx_6_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUATERNARY_TDM_RX_6) };
+
+static const struct snd_kcontrol_new quat_tdm_rx_7_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUATERNARY_TDM_RX_7) };
+
+static const struct snd_kcontrol_new quin_tdm_rx_0_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUINARY_TDM_RX_0) };
+
+static const struct snd_kcontrol_new quin_tdm_rx_1_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUINARY_TDM_RX_1) };
+
+static const struct snd_kcontrol_new quin_tdm_rx_2_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUINARY_TDM_RX_2) };
+
+static const struct snd_kcontrol_new quin_tdm_rx_3_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUINARY_TDM_RX_3) };
+
+static const struct snd_kcontrol_new quin_tdm_rx_4_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUINARY_TDM_RX_4) };
+
+static const struct snd_kcontrol_new quin_tdm_rx_5_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUINARY_TDM_RX_5) };
+
+static const struct snd_kcontrol_new quin_tdm_rx_6_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUINARY_TDM_RX_6) };
+
+static const struct snd_kcontrol_new quin_tdm_rx_7_mixer_controls[] = {
+	Q6ROUTING_RX_MIXERS(QUINARY_TDM_RX_7) };
+
+
+static const struct snd_kcontrol_new mmul1_mixer_controls[] = {
+	Q6ROUTING_TX_MIXERS(MSM_FRONTEND_DAI_MULTIMEDIA1) };
+
+static const struct snd_kcontrol_new mmul2_mixer_controls[] = {
+	Q6ROUTING_TX_MIXERS(MSM_FRONTEND_DAI_MULTIMEDIA2) };
+
+static const struct snd_kcontrol_new mmul3_mixer_controls[] = {
+	Q6ROUTING_TX_MIXERS(MSM_FRONTEND_DAI_MULTIMEDIA3) };
+
+static const struct snd_kcontrol_new mmul4_mixer_controls[] = {
+	Q6ROUTING_TX_MIXERS(MSM_FRONTEND_DAI_MULTIMEDIA4) };
+
+static const struct snd_kcontrol_new mmul5_mixer_controls[] = {
+	Q6ROUTING_TX_MIXERS(MSM_FRONTEND_DAI_MULTIMEDIA5) };
+
+static const struct snd_kcontrol_new mmul6_mixer_controls[] = {
+	Q6ROUTING_TX_MIXERS(MSM_FRONTEND_DAI_MULTIMEDIA6) };
+
+static const struct snd_kcontrol_new mmul7_mixer_controls[] = {
+	Q6ROUTING_TX_MIXERS(MSM_FRONTEND_DAI_MULTIMEDIA7) };
+
+static const struct snd_kcontrol_new mmul8_mixer_controls[] = {
+	Q6ROUTING_TX_MIXERS(MSM_FRONTEND_DAI_MULTIMEDIA8) };
+
+static const struct snd_soc_dapm_widget msm_qdsp6_widgets[] = {
+	/* Frontend AIF */
+	SND_SOC_DAPM_AIF_IN("MM_DL1", "MultiMedia1 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("MM_DL2", "MultiMedia2 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("MM_DL3", "MultiMedia3 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("MM_DL4", "MultiMedia4 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("MM_DL5", "MultiMedia5 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("MM_DL6", "MultiMedia6 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("MM_DL7", "MultiMedia7 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("MM_DL8", "MultiMedia8 Playback", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("MM_UL1", "MultiMedia1 Capture", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("MM_UL2", "MultiMedia2 Capture", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("MM_UL3", "MultiMedia3 Capture", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("MM_UL4", "MultiMedia4 Capture", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("MM_UL5", "MultiMedia5 Capture", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("MM_UL6", "MultiMedia6 Capture", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("MM_UL7", "MultiMedia7 Capture", 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("MM_UL8", "MultiMedia8 Capture", 0, 0, 0, 0),
+
+	/* Mixer definitions */
+	SND_SOC_DAPM_MIXER("HDMI Mixer", SND_SOC_NOPM, 0, 0,
+			   hdmi_mixer_controls,
+			   ARRAY_SIZE(hdmi_mixer_controls)),
+
+	SND_SOC_DAPM_MIXER("SLIMBUS_0_RX Audio Mixer", SND_SOC_NOPM, 0, 0,
+			   slimbus_rx_mixer_controls,
+			   ARRAY_SIZE(slimbus_rx_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SLIMBUS_1_RX Audio Mixer", SND_SOC_NOPM, 0, 0,
+			   slimbus_1_rx_mixer_controls,
+			   ARRAY_SIZE(slimbus_1_rx_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SLIMBUS_2_RX Audio Mixer", SND_SOC_NOPM, 0, 0,
+			   slimbus_2_rx_mixer_controls,
+			   ARRAY_SIZE(slimbus_2_rx_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SLIMBUS_3_RX Audio Mixer", SND_SOC_NOPM, 0, 0,
+			   slimbus_3_rx_mixer_controls,
+			   ARRAY_SIZE(slimbus_3_rx_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SLIMBUS_4_RX Audio Mixer", SND_SOC_NOPM, 0, 0,
+			   slimbus_4_rx_mixer_controls,
+			   ARRAY_SIZE(slimbus_4_rx_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SLIMBUS_5_RX Audio Mixer", SND_SOC_NOPM, 0, 0,
+			   slimbus_5_rx_mixer_controls,
+			    ARRAY_SIZE(slimbus_5_rx_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SLIMBUS_6_RX Audio Mixer", SND_SOC_NOPM, 0, 0,
+			   slimbus_6_rx_mixer_controls,
+			   ARRAY_SIZE(slimbus_6_rx_mixer_controls)),
+	SND_SOC_DAPM_MIXER("PRI_MI2S_RX Audio Mixer", SND_SOC_NOPM, 0, 0,
+			   primary_mi2s_rx_mixer_controls,
+			   ARRAY_SIZE(primary_mi2s_rx_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SEC_MI2S_RX Audio Mixer", SND_SOC_NOPM, 0, 0,
+			   secondary_mi2s_rx_mixer_controls,
+			   ARRAY_SIZE(secondary_mi2s_rx_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUAT_MI2S_RX Audio Mixer", SND_SOC_NOPM, 0, 0,
+			   quaternary_mi2s_rx_mixer_controls,
+			   ARRAY_SIZE(quaternary_mi2s_rx_mixer_controls)),
+	SND_SOC_DAPM_MIXER("TERT_MI2S_RX Audio Mixer", SND_SOC_NOPM, 0, 0,
+			   tertiary_mi2s_rx_mixer_controls,
+			   ARRAY_SIZE(tertiary_mi2s_rx_mixer_controls)),
+	SND_SOC_DAPM_MIXER("PRIMARY_TDM_RX_0 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				pri_tdm_rx_0_mixer_controls,
+				ARRAY_SIZE(pri_tdm_rx_0_mixer_controls)),
+	SND_SOC_DAPM_MIXER("PRIMARY_TDM_RX_1 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				pri_tdm_rx_1_mixer_controls,
+				ARRAY_SIZE(pri_tdm_rx_1_mixer_controls)),
+	SND_SOC_DAPM_MIXER("PRIMARY_TDM_RX_2 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				pri_tdm_rx_2_mixer_controls,
+				ARRAY_SIZE(pri_tdm_rx_2_mixer_controls)),
+	SND_SOC_DAPM_MIXER("PRIMARY_TDM_RX_3 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				pri_tdm_rx_3_mixer_controls,
+				ARRAY_SIZE(pri_tdm_rx_3_mixer_controls)),
+	SND_SOC_DAPM_MIXER("PRIMARY_TDM_RX_4 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				pri_tdm_rx_4_mixer_controls,
+				ARRAY_SIZE(pri_tdm_rx_4_mixer_controls)),
+	SND_SOC_DAPM_MIXER("PRIMARY_TDM_RX_5 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				pri_tdm_rx_5_mixer_controls,
+				ARRAY_SIZE(pri_tdm_rx_5_mixer_controls)),
+	SND_SOC_DAPM_MIXER("PRIMARY_TDM_RX_6 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				pri_tdm_rx_6_mixer_controls,
+				ARRAY_SIZE(pri_tdm_rx_6_mixer_controls)),
+	SND_SOC_DAPM_MIXER("PRIMARY_TDM_RX_7 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				pri_tdm_rx_7_mixer_controls,
+				ARRAY_SIZE(pri_tdm_rx_7_mixer_controls)),
+
+	SND_SOC_DAPM_MIXER("SEC_TDM_RX_0 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				sec_tdm_rx_0_mixer_controls,
+				ARRAY_SIZE(sec_tdm_rx_0_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SEC_TDM_RX_1 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				sec_tdm_rx_1_mixer_controls,
+				ARRAY_SIZE(sec_tdm_rx_1_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SEC_TDM_RX_2 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				sec_tdm_rx_2_mixer_controls,
+				ARRAY_SIZE(sec_tdm_rx_2_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SEC_TDM_RX_3 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				sec_tdm_rx_3_mixer_controls,
+				ARRAY_SIZE(sec_tdm_rx_3_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SEC_TDM_RX_4 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				sec_tdm_rx_4_mixer_controls,
+				ARRAY_SIZE(sec_tdm_rx_4_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SEC_TDM_RX_5 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				sec_tdm_rx_5_mixer_controls,
+				ARRAY_SIZE(sec_tdm_rx_5_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SEC_TDM_RX_6 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				sec_tdm_rx_6_mixer_controls,
+				ARRAY_SIZE(sec_tdm_rx_6_mixer_controls)),
+	SND_SOC_DAPM_MIXER("SEC_TDM_RX_7 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				sec_tdm_rx_7_mixer_controls,
+				ARRAY_SIZE(sec_tdm_rx_7_mixer_controls)),
+
+	SND_SOC_DAPM_MIXER("TERT_TDM_RX_0 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				tert_tdm_rx_0_mixer_controls,
+				ARRAY_SIZE(tert_tdm_rx_0_mixer_controls)),
+	SND_SOC_DAPM_MIXER("TERT_TDM_RX_1 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				tert_tdm_rx_1_mixer_controls,
+				ARRAY_SIZE(tert_tdm_rx_1_mixer_controls)),
+	SND_SOC_DAPM_MIXER("TERT_TDM_RX_2 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				tert_tdm_rx_2_mixer_controls,
+				ARRAY_SIZE(tert_tdm_rx_2_mixer_controls)),
+	SND_SOC_DAPM_MIXER("TERT_TDM_RX_3 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				tert_tdm_rx_3_mixer_controls,
+				ARRAY_SIZE(tert_tdm_rx_3_mixer_controls)),
+	SND_SOC_DAPM_MIXER("TERT_TDM_RX_4 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				tert_tdm_rx_4_mixer_controls,
+				ARRAY_SIZE(tert_tdm_rx_4_mixer_controls)),
+	SND_SOC_DAPM_MIXER("TERT_TDM_RX_5 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				tert_tdm_rx_5_mixer_controls,
+				ARRAY_SIZE(tert_tdm_rx_5_mixer_controls)),
+	SND_SOC_DAPM_MIXER("TERT_TDM_RX_6 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				tert_tdm_rx_6_mixer_controls,
+				ARRAY_SIZE(tert_tdm_rx_6_mixer_controls)),
+	SND_SOC_DAPM_MIXER("TERT_TDM_RX_7 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				tert_tdm_rx_7_mixer_controls,
+				ARRAY_SIZE(tert_tdm_rx_7_mixer_controls)),
+
+	SND_SOC_DAPM_MIXER("QUAT_TDM_RX_0 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quat_tdm_rx_0_mixer_controls,
+				ARRAY_SIZE(quat_tdm_rx_0_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUAT_TDM_RX_1 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quat_tdm_rx_1_mixer_controls,
+				ARRAY_SIZE(quat_tdm_rx_1_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUAT_TDM_RX_2 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quat_tdm_rx_2_mixer_controls,
+				ARRAY_SIZE(quat_tdm_rx_2_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUAT_TDM_RX_3 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quat_tdm_rx_3_mixer_controls,
+				ARRAY_SIZE(quat_tdm_rx_3_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUAT_TDM_RX_4 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quat_tdm_rx_4_mixer_controls,
+				ARRAY_SIZE(quat_tdm_rx_4_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUAT_TDM_RX_5 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quat_tdm_rx_5_mixer_controls,
+				ARRAY_SIZE(quat_tdm_rx_5_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUAT_TDM_RX_6 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quat_tdm_rx_6_mixer_controls,
+				ARRAY_SIZE(quat_tdm_rx_6_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUAT_TDM_RX_7 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quat_tdm_rx_7_mixer_controls,
+				ARRAY_SIZE(quat_tdm_rx_7_mixer_controls)),
+
+	SND_SOC_DAPM_MIXER("QUIN_TDM_RX_0 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quin_tdm_rx_0_mixer_controls,
+				ARRAY_SIZE(quin_tdm_rx_0_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUIN_TDM_RX_1 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quin_tdm_rx_1_mixer_controls,
+				ARRAY_SIZE(quin_tdm_rx_1_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUIN_TDM_RX_2 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quin_tdm_rx_2_mixer_controls,
+				ARRAY_SIZE(quin_tdm_rx_2_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUIN_TDM_RX_3 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quin_tdm_rx_3_mixer_controls,
+				ARRAY_SIZE(quin_tdm_rx_3_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUIN_TDM_RX_4 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quin_tdm_rx_4_mixer_controls,
+				ARRAY_SIZE(quin_tdm_rx_4_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUIN_TDM_RX_5 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quin_tdm_rx_5_mixer_controls,
+				ARRAY_SIZE(quin_tdm_rx_5_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUIN_TDM_RX_6 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quin_tdm_rx_6_mixer_controls,
+				ARRAY_SIZE(quin_tdm_rx_6_mixer_controls)),
+	SND_SOC_DAPM_MIXER("QUIN_TDM_RX_7 Audio Mixer", SND_SOC_NOPM, 0, 0,
+				quin_tdm_rx_7_mixer_controls,
+				ARRAY_SIZE(quin_tdm_rx_7_mixer_controls)),
+	SND_SOC_DAPM_MIXER("MultiMedia1 Mixer", SND_SOC_NOPM, 0, 0,
+		mmul1_mixer_controls, ARRAY_SIZE(mmul1_mixer_controls)),
+	SND_SOC_DAPM_MIXER("MultiMedia2 Mixer", SND_SOC_NOPM, 0, 0,
+		mmul2_mixer_controls, ARRAY_SIZE(mmul2_mixer_controls)),
+	SND_SOC_DAPM_MIXER("MultiMedia3 Mixer", SND_SOC_NOPM, 0, 0,
+		mmul3_mixer_controls, ARRAY_SIZE(mmul3_mixer_controls)),
+	SND_SOC_DAPM_MIXER("MultiMedia4 Mixer", SND_SOC_NOPM, 0, 0,
+		mmul4_mixer_controls, ARRAY_SIZE(mmul4_mixer_controls)),
+	SND_SOC_DAPM_MIXER("MultiMedia5 Mixer", SND_SOC_NOPM, 0, 0,
+		mmul5_mixer_controls, ARRAY_SIZE(mmul5_mixer_controls)),
+	SND_SOC_DAPM_MIXER("MultiMedia6 Mixer", SND_SOC_NOPM, 0, 0,
+		mmul6_mixer_controls, ARRAY_SIZE(mmul6_mixer_controls)),
+	SND_SOC_DAPM_MIXER("MultiMedia7 Mixer", SND_SOC_NOPM, 0, 0,
+		mmul7_mixer_controls, ARRAY_SIZE(mmul7_mixer_controls)),
+	SND_SOC_DAPM_MIXER("MultiMedia8 Mixer", SND_SOC_NOPM, 0, 0,
+		mmul8_mixer_controls, ARRAY_SIZE(mmul8_mixer_controls)),
+
+};
+
+static const struct snd_soc_dapm_route intercon[] = {
+	Q6ROUTING_RX_DAPM_ROUTE("HDMI Mixer", "HDMI_RX"),
+	Q6ROUTING_RX_DAPM_ROUTE("SLIMBUS_0_RX Audio Mixer", "SLIMBUS_0_RX"),
+	Q6ROUTING_RX_DAPM_ROUTE("SLIMBUS_1_RX Audio Mixer", "SLIMBUS_1_RX"),
+	Q6ROUTING_RX_DAPM_ROUTE("SLIMBUS_2_RX Audio Mixer", "SLIMBUS_2_RX"),
+	Q6ROUTING_RX_DAPM_ROUTE("SLIMBUS_3_RX Audio Mixer", "SLIMBUS_3_RX"),
+	Q6ROUTING_RX_DAPM_ROUTE("SLIMBUS_4_RX Audio Mixer", "SLIMBUS_4_RX"),
+	Q6ROUTING_RX_DAPM_ROUTE("SLIMBUS_5_RX Audio Mixer", "SLIMBUS_5_RX"),
+	Q6ROUTING_RX_DAPM_ROUTE("SLIMBUS_6_RX Audio Mixer", "SLIMBUS_6_RX"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUAT_MI2S_RX Audio Mixer", "QUAT_MI2S_RX"),
+	Q6ROUTING_RX_DAPM_ROUTE("TERT_MI2S_RX Audio Mixer", "TERT_MI2S_RX"),
+	Q6ROUTING_RX_DAPM_ROUTE("SEC_MI2S_RX Audio Mixer", "SEC_MI2S_RX"),
+	Q6ROUTING_RX_DAPM_ROUTE("PRI_MI2S_RX Audio Mixer", "PRI_MI2S_RX"),
+	Q6ROUTING_RX_DAPM_ROUTE("PRIMARY_TDM_RX_0 Audio Mixer",
+				"PRIMARY_TDM_RX_0"),
+	Q6ROUTING_RX_DAPM_ROUTE("PRIMARY_TDM_RX_1 Audio Mixer",
+				"PRIMARY_TDM_RX_1"),
+	Q6ROUTING_RX_DAPM_ROUTE("PRIMARY_TDM_RX_2 Audio Mixer",
+				"PRIMARY_TDM_RX_2"),
+	Q6ROUTING_RX_DAPM_ROUTE("PRIMARY_TDM_RX_3 Audio Mixer",
+				"PRIMARY_TDM_RX_3"),
+	Q6ROUTING_RX_DAPM_ROUTE("PRIMARY_TDM_RX_4 Audio Mixer",
+				"PRIMARY_TDM_RX_4"),
+	Q6ROUTING_RX_DAPM_ROUTE("PRIMARY_TDM_RX_5 Audio Mixer",
+				"PRIMARY_TDM_RX_5"),
+	Q6ROUTING_RX_DAPM_ROUTE("PRIMARY_TDM_RX_6 Audio Mixer",
+				"PRIMARY_TDM_RX_6"),
+	Q6ROUTING_RX_DAPM_ROUTE("PRIMARY_TDM_RX_7 Audio Mixer",
+				"PRIMARY_TDM_RX_7"),
+	Q6ROUTING_RX_DAPM_ROUTE("SEC_TDM_RX_0 Audio Mixer", "SEC_TDM_RX_0"),
+	Q6ROUTING_RX_DAPM_ROUTE("SEC_TDM_RX_1 Audio Mixer", "SEC_TDM_RX_1"),
+	Q6ROUTING_RX_DAPM_ROUTE("SEC_TDM_RX_2 Audio Mixer", "SEC_TDM_RX_2"),
+	Q6ROUTING_RX_DAPM_ROUTE("SEC_TDM_RX_3 Audio Mixer", "SEC_TDM_RX_3"),
+	Q6ROUTING_RX_DAPM_ROUTE("SEC_TDM_RX_4 Audio Mixer", "SEC_TDM_RX_4"),
+	Q6ROUTING_RX_DAPM_ROUTE("SEC_TDM_RX_5 Audio Mixer", "SEC_TDM_RX_5"),
+	Q6ROUTING_RX_DAPM_ROUTE("SEC_TDM_RX_6 Audio Mixer", "SEC_TDM_RX_6"),
+	Q6ROUTING_RX_DAPM_ROUTE("SEC_TDM_RX_7 Audio Mixer", "SEC_TDM_RX_7"),
+	Q6ROUTING_RX_DAPM_ROUTE("TERT_TDM_RX_0 Audio Mixer", "TERT_TDM_RX_0"),
+	Q6ROUTING_RX_DAPM_ROUTE("TERT_TDM_RX_1 Audio Mixer", "TERT_TDM_RX_1"),
+	Q6ROUTING_RX_DAPM_ROUTE("TERT_TDM_RX_2 Audio Mixer", "TERT_TDM_RX_2"),
+	Q6ROUTING_RX_DAPM_ROUTE("TERT_TDM_RX_3 Audio Mixer", "TERT_TDM_RX_3"),
+	Q6ROUTING_RX_DAPM_ROUTE("TERT_TDM_RX_4 Audio Mixer", "TERT_TDM_RX_4"),
+	Q6ROUTING_RX_DAPM_ROUTE("TERT_TDM_RX_5 Audio Mixer", "TERT_TDM_RX_5"),
+	Q6ROUTING_RX_DAPM_ROUTE("TERT_TDM_RX_6 Audio Mixer", "TERT_TDM_RX_6"),
+	Q6ROUTING_RX_DAPM_ROUTE("TERT_TDM_RX_7 Audio Mixer", "TERT_TDM_RX_7"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUAT_TDM_RX_0 Audio Mixer", "QUAT_TDM_RX_0"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUAT_TDM_RX_1 Audio Mixer", "QUAT_TDM_RX_1"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUAT_TDM_RX_2 Audio Mixer", "QUAT_TDM_RX_2"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUAT_TDM_RX_3 Audio Mixer", "QUAT_TDM_RX_3"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUAT_TDM_RX_4 Audio Mixer", "QUAT_TDM_RX_4"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUAT_TDM_RX_5 Audio Mixer", "QUAT_TDM_RX_5"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUAT_TDM_RX_6 Audio Mixer", "QUAT_TDM_RX_6"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUAT_TDM_RX_7 Audio Mixer", "QUAT_TDM_RX_7"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUIN_TDM_RX_0 Audio Mixer", "QUIN_TDM_RX_0"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUIN_TDM_RX_1 Audio Mixer", "QUIN_TDM_RX_1"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUIN_TDM_RX_2 Audio Mixer", "QUIN_TDM_RX_2"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUIN_TDM_RX_3 Audio Mixer", "QUIN_TDM_RX_3"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUIN_TDM_RX_4 Audio Mixer", "QUIN_TDM_RX_4"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUIN_TDM_RX_5 Audio Mixer", "QUIN_TDM_RX_5"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUIN_TDM_RX_6 Audio Mixer", "QUIN_TDM_RX_6"),
+	Q6ROUTING_RX_DAPM_ROUTE("QUIN_TDM_RX_7 Audio Mixer", "QUIN_TDM_RX_7"),
+	Q6ROUTING_TX_DAPM_ROUTE("MultiMedia1 Mixer"),
+	Q6ROUTING_TX_DAPM_ROUTE("MultiMedia2 Mixer"),
+	Q6ROUTING_TX_DAPM_ROUTE("MultiMedia3 Mixer"),
+	Q6ROUTING_TX_DAPM_ROUTE("MultiMedia4 Mixer"),
+	Q6ROUTING_TX_DAPM_ROUTE("MultiMedia5 Mixer"),
+	Q6ROUTING_TX_DAPM_ROUTE("MultiMedia6 Mixer"),
+	Q6ROUTING_TX_DAPM_ROUTE("MultiMedia7 Mixer"),
+	Q6ROUTING_TX_DAPM_ROUTE("MultiMedia8 Mixer"),
+
+	{"MM_UL1", NULL, "MultiMedia1 Mixer"},
+	{"MM_UL2", NULL, "MultiMedia2 Mixer"},
+	{"MM_UL3", NULL, "MultiMedia3 Mixer"},
+	{"MM_UL4", NULL, "MultiMedia4 Mixer"},
+	{"MM_UL5", NULL, "MultiMedia5 Mixer"},
+	{"MM_UL6", NULL, "MultiMedia6 Mixer"},
+	{"MM_UL7", NULL, "MultiMedia7 Mixer"},
+	{"MM_UL8", NULL, "MultiMedia8 Mixer"},
+};
+
+static int routing_hw_params(struct snd_pcm_substream *substream,
+				     struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_component *c = snd_soc_rtdcom_lookup(rtd, DRV_NAME);
+	struct msm_routing_data *data = dev_get_drvdata(c->dev);
+	unsigned int be_id = rtd->cpu_dai->id;
+	struct session_data *session;
+	int path_type;
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		path_type = ADM_PATH_PLAYBACK;
+	else
+		path_type = ADM_PATH_LIVE_REC;
+
+	if (be_id > AFE_MAX_PORTS)
+		return -EINVAL;
+
+	session = &data->port_data[be_id];
+
+	mutex_lock(&data->lock);
+
+	session->path_type = path_type;
+	session->sample_rate = params_rate(params);
+	session->channels = params_channels(params);
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+			session->bits_per_sample = 16;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+			session->bits_per_sample = 24;
+		break;
+	default:
+		break;
+	}
+
+	mutex_unlock(&data->lock);
+	return 0;
+}
+
+static struct snd_pcm_ops q6pcm_routing_ops = {
+	.hw_params = routing_hw_params,
+};
+
+static int msm_routing_probe(struct snd_soc_component *c)
+{
+	int i;
+
+	for (i = 0; i < MAX_SESSIONS; i++)
+		routing_data->sessions[i].port_id = -1;
+
+	return 0;
+}
+
+static const struct snd_soc_component_driver msm_soc_routing_component = {
+	.ops = &q6pcm_routing_ops,
+	.probe = msm_routing_probe,
+	.name = DRV_NAME,
+	.dapm_widgets = msm_qdsp6_widgets,
+	.num_dapm_widgets = ARRAY_SIZE(msm_qdsp6_widgets),
+	.dapm_routes = intercon,
+	.num_dapm_routes = ARRAY_SIZE(intercon),
+};
+
+static int q6routing_dai_bind(struct device *dev, struct device *master,
+			      void *data)
+{
+	routing_data = kzalloc(sizeof(*routing_data), GFP_KERNEL);
+	if (!routing_data)
+		return -ENOMEM;
+
+	routing_data->dev = dev;
+
+	mutex_init(&routing_data->lock);
+	dev_set_drvdata(dev, routing_data);
+
+	return snd_soc_register_component(dev, &msm_soc_routing_component,
+					  NULL, 0);
+}
+
+static void q6routing_dai_unbind(struct device *dev, struct device *master,
+				 void *d)
+{
+	struct msm_routing_data *data = dev_get_drvdata(dev);
+
+	snd_soc_unregister_component(dev);
+
+	kfree(data);
+
+	routing_data = NULL;
+}
+
+static const struct component_ops q6routing_dai_comp_ops = {
+	.bind   = q6routing_dai_bind,
+	.unbind = q6routing_dai_unbind,
+};
+
+static int q6pcm_routing_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &q6routing_dai_comp_ops);
+}
+
+static int q6pcm_routing_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &q6routing_dai_comp_ops);
+	return 0;
+}
+
+static struct platform_driver q6pcm_routing_platform_driver = {
+	.driver = {
+		.name = "q6routing",
+	},
+	.probe = q6pcm_routing_probe,
+	.remove = q6pcm_routing_remove,
+};
+module_platform_driver(q6pcm_routing_platform_driver);
+
+MODULE_DESCRIPTION("Q6 Routing platform");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/qcom/qdsp6/q6routing.h b/sound/soc/qcom/qdsp6/q6routing.h
new file mode 100644
index 0000000..35514e6
--- /dev/null
+++ b/sound/soc/qcom/qdsp6/q6routing.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _Q6_PCM_ROUTING_H
+#define _Q6_PCM_ROUTING_H
+
+int q6routing_stream_open(int fedai_id, int perf_mode,
+			   int stream_id, int stream_type);
+void q6routing_stream_close(int fedai_id, int stream_type);
+
+#endif /*_Q6_PCM_ROUTING_H */
diff --git a/sound/soc/rockchip/rk3399_gru_sound.c b/sound/soc/rockchip/rk3399_gru_sound.c
index 9a10181..f184168 100644
--- a/sound/soc/rockchip/rk3399_gru_sound.c
+++ b/sound/soc/rockchip/rk3399_gru_sound.c
@@ -220,45 +220,6 @@
 	return 0;
 }
 
-static int rockchip_sound_cdndp_hw_params(struct snd_pcm_substream *substream,
-					  struct snd_pcm_hw_params *params)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
-	struct snd_soc_dai *codec_dai = rtd->codec_dai;
-	int mclk, ret;
-
-	/* in bypass mode, the mclk has to be one of the frequencies below */
-	switch (params_rate(params)) {
-	case 8000:
-	case 16000:
-	case 24000:
-	case 32000:
-	case 48000:
-	case 64000:
-	case 96000:
-		mclk = 12288000;
-		break;
-	case 11025:
-	case 22050:
-	case 44100:
-	case 88200:
-		mclk = 11289600;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	ret = snd_soc_dai_set_sysclk(cpu_dai, 0, mclk,
-				     SND_SOC_CLOCK_OUT);
-	if (ret < 0) {
-		dev_err(codec_dai->dev, "Can't set cpu clock out %d\n", ret);
-		return ret;
-	}
-
-	return 0;
-}
-
 static int rockchip_sound_dmic_hw_params(struct snd_pcm_substream *substream,
 			     struct snd_pcm_hw_params *params)
 {
@@ -293,10 +254,6 @@
 	.hw_params = rockchip_sound_da7219_hw_params,
 };
 
-static const struct snd_soc_ops rockchip_sound_cdndp_ops = {
-	.hw_params = rockchip_sound_cdndp_hw_params,
-};
-
 static const struct snd_soc_ops rockchip_sound_dmic_ops = {
 	.hw_params = rockchip_sound_dmic_hw_params,
 };
@@ -323,8 +280,7 @@
 	[DAILINK_CDNDP] = {
 		.name = "DP",
 		.stream_name = "DP PCM",
-		.codec_dai_name = "i2s-hifi",
-		.ops = &rockchip_sound_cdndp_ops,
+		.codec_dai_name = "spdif-hifi",
 		.dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF |
 			SND_SOC_DAIFMT_CBS_CFS,
 	},
diff --git a/sound/soc/sh/Kconfig b/sound/soc/sh/Kconfig
index 1aa5cd7..0ae0800 100644
--- a/sound/soc/sh/Kconfig
+++ b/sound/soc/sh/Kconfig
@@ -1,5 +1,5 @@
-menu "SoC Audio support for SuperH"
-	depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST
+menu "SoC Audio support for Renesas SoCs"
+	depends on SUPERH || ARCH_RENESAS || COMPILE_TEST
 
 config SND_SOC_PCM_SH7760
 	tristate "SoC Audio support for Renesas SH7760"
@@ -28,7 +28,7 @@
 
 config SND_SOC_SH4_SIU
 	tristate
-	depends on (SUPERH || ARCH_SHMOBILE) && HAVE_CLK
+	depends on ARCH_SHMOBILE && HAVE_CLK
 	select DMA_ENGINE
 	select DMADEVICES
 	select SH_DMAE
diff --git a/sound/soc/sh/rcar/cmd.c b/sound/soc/sh/rcar/cmd.c
index f1d4fb5..4221937 100644
--- a/sound/soc/sh/rcar/cmd.c
+++ b/sound/soc/sh/rcar/cmd.c
@@ -125,6 +125,13 @@
 	.stop	= rsnd_cmd_stop,
 };
 
+static struct rsnd_mod *rsnd_cmd_mod_get(struct rsnd_priv *priv, int id)
+{
+	if (WARN_ON(id < 0 || id >= rsnd_cmd_nr(priv)))
+		id = 0;
+
+	return rsnd_mod_get((struct rsnd_cmd *)(priv->cmd) + id);
+}
 int rsnd_cmd_attach(struct rsnd_dai_stream *io, int id)
 {
 	struct rsnd_priv *priv = rsnd_io_to_priv(io);
@@ -133,14 +140,6 @@
 	return rsnd_dai_connect(mod, io, mod->type);
 }
 
-struct rsnd_mod *rsnd_cmd_mod_get(struct rsnd_priv *priv, int id)
-{
-	if (WARN_ON(id < 0 || id >= rsnd_cmd_nr(priv)))
-		id = 0;
-
-	return rsnd_mod_get((struct rsnd_cmd *)(priv->cmd) + id);
-}
-
 int rsnd_cmd_probe(struct rsnd_priv *priv)
 {
 	struct device *dev = rsnd_priv_to_dev(priv);
diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
index 94f081b..af04d41 100644
--- a/sound/soc/sh/rcar/core.c
+++ b/sound/soc/sh/rcar/core.c
@@ -111,7 +111,7 @@
 static const struct of_device_id rsnd_of_match[] = {
 	{ .compatible = "renesas,rcar_sound-gen1", .data = (void *)RSND_GEN1 },
 	{ .compatible = "renesas,rcar_sound-gen2", .data = (void *)RSND_GEN2 },
-	{ .compatible = "renesas,rcar_sound-gen3", .data = (void *)RSND_GEN2 }, /* gen2 compatible */
+	{ .compatible = "renesas,rcar_sound-gen3", .data = (void *)RSND_GEN3 },
 	{},
 };
 MODULE_DEVICE_TABLE(of, rsnd_of_match);
@@ -1352,6 +1352,37 @@
 #define PREALLOC_BUFFER		(32 * 1024)
 #define PREALLOC_BUFFER_MAX	(32 * 1024)
 
+static int rsnd_preallocate_pages(struct snd_soc_pcm_runtime *rtd,
+				  struct rsnd_dai_stream *io,
+				  int stream)
+{
+	struct rsnd_priv *priv = rsnd_io_to_priv(io);
+	struct device *dev = rsnd_priv_to_dev(priv);
+	struct snd_pcm_substream *substream;
+	int err;
+
+	/*
+	 * use Audio-DMAC dev if we can use IPMMU
+	 * see
+	 *	rsnd_dmaen_attach()
+	 */
+	if (io->dmac_dev)
+		dev = io->dmac_dev;
+
+	for (substream = rtd->pcm->streams[stream].substream;
+	     substream;
+	     substream = substream->next) {
+		err = snd_pcm_lib_preallocate_pages(substream,
+					SNDRV_DMA_TYPE_DEV,
+					dev,
+					PREALLOC_BUFFER, PREALLOC_BUFFER_MAX);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 static int rsnd_pcm_new(struct snd_soc_pcm_runtime *rtd)
 {
 	struct snd_soc_dai *dai = rtd->cpu_dai;
@@ -1366,11 +1397,17 @@
 	if (ret)
 		return ret;
 
-	return snd_pcm_lib_preallocate_pages_for_all(
-		rtd->pcm,
-		SNDRV_DMA_TYPE_DEV,
-		rtd->card->snd_card->dev,
-		PREALLOC_BUFFER, PREALLOC_BUFFER_MAX);
+	ret = rsnd_preallocate_pages(rtd, &rdai->playback,
+				     SNDRV_PCM_STREAM_PLAYBACK);
+	if (ret)
+		return ret;
+
+	ret = rsnd_preallocate_pages(rtd, &rdai->capture,
+				     SNDRV_PCM_STREAM_CAPTURE);
+	if (ret)
+		return ret;
+
+	return 0;
 }
 
 static const struct snd_soc_component_driver rsnd_soc_component = {
diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c
index 41de234..ef82b94 100644
--- a/sound/soc/sh/rcar/dma.c
+++ b/sound/soc/sh/rcar/dma.c
@@ -253,6 +253,13 @@
 		return -EAGAIN;
 	}
 
+	/*
+	 * use it for IPMMU if needed
+	 * see
+	 *	rsnd_preallocate_pages()
+	 */
+	io->dmac_dev = chan->device->dev;
+
 	dma_release_channel(chan);
 
 	dmac->dmaen_num++;
@@ -695,7 +702,7 @@
 
 	rsnd_dma_of_path(mod, io, is_play, &mod_from, &mod_to);
 
-	/* for Gen2 */
+	/* for Gen2 or later */
 	if (mod_from && mod_to) {
 		ops	= &rsnd_dmapp_ops;
 		attach	= rsnd_dmapp_attach;
@@ -773,7 +780,7 @@
 		return 0;
 
 	/*
-	 * for Gen2
+	 * for Gen2 or later
 	 */
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "audmapp");
 	dmac = devm_kzalloc(dev, sizeof(*dmac), GFP_KERNEL);
diff --git a/sound/soc/sh/rcar/gen.c b/sound/soc/sh/rcar/gen.c
index f04c410..25642e9 100644
--- a/sound/soc/sh/rcar/gen.c
+++ b/sound/soc/sh/rcar/gen.c
@@ -414,7 +414,8 @@
 	ret = -ENODEV;
 	if (rsnd_is_gen1(priv))
 		ret = rsnd_gen1_probe(priv);
-	else if (rsnd_is_gen2(priv))
+	else if (rsnd_is_gen2(priv) ||
+		 rsnd_is_gen3(priv))
 		ret = rsnd_gen2_probe(priv);
 
 	if (ret < 0)
diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h
index 172c8d6..6d7280d 100644
--- a/sound/soc/sh/rcar/rsnd.h
+++ b/sound/soc/sh/rcar/rsnd.h
@@ -435,6 +435,7 @@
 	struct snd_pcm_substream *substream;
 	struct rsnd_mod *mod[RSND_MOD_MAX];
 	struct rsnd_dai *rdai;
+	struct device *dmac_dev; /* for IPMMU */
 	u32 parent_ssi_status;
 };
 #define rsnd_io_to_mod(io, i)	((i) < RSND_MOD_MAX ? (io)->mod[(i)] : NULL)
@@ -538,6 +539,7 @@
 #define RSND_GEN_MASK	(0xF << 0)
 #define RSND_GEN1	(1 << 0)
 #define RSND_GEN2	(2 << 0)
+#define RSND_GEN3	(3 << 0)
 
 	/*
 	 * below value will be filled on rsnd_gen_probe()
@@ -609,6 +611,7 @@
 
 #define rsnd_is_gen1(priv)	(((priv)->flags & RSND_GEN_MASK) == RSND_GEN1)
 #define rsnd_is_gen2(priv)	(((priv)->flags & RSND_GEN_MASK) == RSND_GEN2)
+#define rsnd_is_gen3(priv)	(((priv)->flags & RSND_GEN_MASK) == RSND_GEN3)
 
 #define rsnd_flags_has(p, f) ((p)->flags & (f))
 #define rsnd_flags_set(p, f) ((p)->flags |= (f))
@@ -775,7 +778,6 @@
 int rsnd_cmd_probe(struct rsnd_priv *priv);
 void rsnd_cmd_remove(struct rsnd_priv *priv);
 int rsnd_cmd_attach(struct rsnd_dai_stream *io, int id);
-struct rsnd_mod *rsnd_cmd_mod_get(struct rsnd_priv *priv, int id);
 
 void rsnd_mod_make_sure(struct rsnd_mod *mod, enum rsnd_mod_type type);
 #ifdef DEBUG
diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c
index 333b802..9538f76 100644
--- a/sound/soc/sh/rcar/ssi.c
+++ b/sound/soc/sh/rcar/ssi.c
@@ -171,7 +171,7 @@
 		if (status & bit)
 			return;
 
-		udelay(50);
+		udelay(5);
 	}
 
 	dev_warn(dev, "%s[%d] status check failed\n",
@@ -1004,19 +1004,26 @@
 	struct device *dev = rsnd_priv_to_dev(priv);
 	struct rsnd_mod *mod = rsnd_io_to_mod_ssi(io);
 	struct rsnd_ssi *ssi;
+	struct device_node *remote_node = of_graph_get_port_parent(remote_ep);
+
+	/* support Gen3 only */
+	if (!rsnd_is_gen3(priv))
+		return;
 
 	if (!mod)
 		return;
 
 	ssi  = rsnd_mod_to_ssi(mod);
 
-	if (strstr(remote_ep->full_name, "hdmi0")) {
+	/* HDMI0 */
+	if (strstr(remote_node->full_name, "hdmi@fead0000")) {
 		rsnd_flags_set(ssi, RSND_SSI_HDMI0);
 		dev_dbg(dev, "%s[%d] connected to HDMI0\n",
 			 rsnd_mod_name(mod), rsnd_mod_id(mod));
 	}
 
-	if (strstr(remote_ep->full_name, "hdmi1")) {
+	/* HDMI1 */
+	if (strstr(remote_node->full_name, "hdmi@feae0000")) {
 		rsnd_flags_set(ssi, RSND_SSI_HDMI1);
 		dev_dbg(dev, "%s[%d] connected to HDMI1\n",
 			rsnd_mod_name(mod), rsnd_mod_id(mod));
diff --git a/sound/soc/soc-cache.c b/sound/soc/soc-cache.c
deleted file mode 100644
index 07f4335..0000000
--- a/sound/soc/soc-cache.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * soc-cache.c  --  ASoC register cache helpers
- *
- * Copyright 2009 Wolfson Microelectronics PLC.
- *
- * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- */
-
-#include <sound/soc.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-
-int snd_soc_cache_init(struct snd_soc_codec *codec)
-{
-	const struct snd_soc_codec_driver *codec_drv = codec->driver;
-	size_t reg_size;
-
-	reg_size = codec_drv->reg_cache_size * codec_drv->reg_word_size;
-
-	if (!reg_size)
-		return 0;
-
-	dev_dbg(codec->dev, "ASoC: Initializing cache for %s codec\n",
-				codec->component.name);
-
-	if (codec_drv->reg_cache_default)
-		codec->reg_cache = kmemdup(codec_drv->reg_cache_default,
-					   reg_size, GFP_KERNEL);
-	else
-		codec->reg_cache = kzalloc(reg_size, GFP_KERNEL);
-	if (!codec->reg_cache)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/*
- * NOTE: keep in mind that this function might be called
- * multiple times.
- */
-int snd_soc_cache_exit(struct snd_soc_codec *codec)
-{
-	dev_dbg(codec->dev, "ASoC: Destroying cache for %s codec\n",
-			codec->component.name);
-	kfree(codec->reg_cache);
-	codec->reg_cache = NULL;
-	return 0;
-}
diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c
index 8240268..e095115 100644
--- a/sound/soc/soc-compress.c
+++ b/sound/soc/soc-compress.c
@@ -26,14 +26,65 @@
 #include <sound/initval.h>
 #include <sound/soc-dpcm.h>
 
+static int soc_compr_components_open(struct snd_compr_stream *cstream,
+				     struct snd_soc_component **last)
+{
+	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
+	struct snd_soc_component *component;
+	struct snd_soc_rtdcom_list *rtdcom;
+	int ret;
+
+	for_each_rtdcom(rtd, rtdcom) {
+		component = rtdcom->component;
+
+		if (!component->driver->compr_ops ||
+		    !component->driver->compr_ops->open)
+			continue;
+
+		ret = component->driver->compr_ops->open(cstream);
+		if (ret < 0) {
+			dev_err(component->dev,
+				"Compress ASoC: can't open platform %s: %d\n",
+				component->name, ret);
+
+			*last = component;
+			return ret;
+		}
+	}
+
+	*last = NULL;
+	return 0;
+}
+
+static int soc_compr_components_free(struct snd_compr_stream *cstream,
+				     struct snd_soc_component *last)
+{
+	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
+	struct snd_soc_component *component;
+	struct snd_soc_rtdcom_list *rtdcom;
+
+	for_each_rtdcom(rtd, rtdcom) {
+		component = rtdcom->component;
+
+		if (component == last)
+			break;
+
+		if (!component->driver->compr_ops ||
+		    !component->driver->compr_ops->free)
+			continue;
+
+		component->driver->compr_ops->free(cstream);
+	}
+
+	return 0;
+}
+
 static int soc_compr_open(struct snd_compr_stream *cstream)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
-	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
-	int ret = 0, __ret;
+	int ret;
 
 	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
 
@@ -47,35 +98,7 @@
 		}
 	}
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->open) {
-		ret = platform->driver->compr_ops->open(cstream);
-		if (ret < 0) {
-			dev_err(platform->dev,
-				"Compress ASoC: can't open platform %s: %d\n",
-				platform->component.name, ret);
-			goto plat_err;
-		}
-	}
-
-	for_each_rtdcom(rtd, rtdcom) {
-		component = rtdcom->component;
-
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
-		if (!component->driver->compr_ops ||
-		    !component->driver->compr_ops->open)
-			continue;
-
-		__ret = component->driver->compr_ops->open(cstream);
-		if (__ret < 0) {
-			dev_err(component->dev,
-				"Compress ASoC: can't open platform %s: %d\n",
-				component->name, __ret);
-			ret = __ret;
-		}
-	}
+	ret = soc_compr_components_open(cstream, &component);
 	if (ret < 0)
 		goto machine_err;
 
@@ -96,23 +119,8 @@
 	return 0;
 
 machine_err:
-	for_each_rtdcom(rtd, rtdcom) {
-		component = rtdcom->component;
+	soc_compr_components_free(cstream, component);
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
-		if (!component->driver->compr_ops ||
-		    !component->driver->compr_ops->free)
-			continue;
-
-		component->driver->compr_ops->free(cstream);
-	}
-
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->free)
-		platform->driver->compr_ops->free(cstream);
-plat_err:
 	if (cpu_dai->driver->cops && cpu_dai->driver->cops->shutdown)
 		cpu_dai->driver->cops->shutdown(cstream, cpu_dai);
 out:
@@ -125,14 +133,12 @@
 	struct snd_soc_pcm_runtime *fe = cstream->private_data;
 	struct snd_pcm_substream *fe_substream =
 		 fe->pcm->streams[cstream->direction].substream;
-	struct snd_soc_platform *platform = fe->platform;
 	struct snd_soc_component *component;
-	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = fe->cpu_dai;
 	struct snd_soc_dpcm *dpcm;
 	struct snd_soc_dapm_widget_list *list;
 	int stream;
-	int ret = 0, __ret;
+	int ret;
 
 	if (cstream->direction == SND_COMPRESS_PLAYBACK)
 		stream = SNDRV_PCM_STREAM_PLAYBACK;
@@ -151,35 +157,7 @@
 		}
 	}
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->open) {
-		ret = platform->driver->compr_ops->open(cstream);
-		if (ret < 0) {
-			dev_err(platform->dev,
-				"Compress ASoC: can't open platform %s: %d\n",
-				platform->component.name, ret);
-			goto plat_err;
-		}
-	}
-
-	for_each_rtdcom(fe, rtdcom) {
-		component = rtdcom->component;
-
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
-		if (!component->driver->compr_ops ||
-		    !component->driver->compr_ops->open)
-			continue;
-
-		__ret = component->driver->compr_ops->open(cstream);
-		if (__ret < 0) {
-			dev_err(component->dev,
-				"Compress ASoC: can't open platform %s: %d\n",
-				component->name, __ret);
-			ret = __ret;
-		}
-	}
+	ret = soc_compr_components_open(cstream, &component);
 	if (ret < 0)
 		goto machine_err;
 
@@ -235,23 +213,8 @@
 	if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown)
 		fe->dai_link->compr_ops->shutdown(cstream);
 machine_err:
-	for_each_rtdcom(fe, rtdcom) {
-		component = rtdcom->component;
+	soc_compr_components_free(cstream, component);
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
-		if (!component->driver->compr_ops ||
-		    !component->driver->compr_ops->free)
-			continue;
-
-		component->driver->compr_ops->free(cstream);
-	}
-
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->free)
-		platform->driver->compr_ops->free(cstream);
-plat_err:
 	if (cpu_dai->driver->cops && cpu_dai->driver->cops->shutdown)
 		cpu_dai->driver->cops->shutdown(cstream, cpu_dai);
 out:
@@ -292,9 +255,6 @@
 static int soc_compr_free(struct snd_compr_stream *cstream)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
-	struct snd_soc_component *component;
-	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 	struct snd_soc_dai *codec_dai = rtd->codec_dai;
 	int stream;
@@ -319,22 +279,7 @@
 	if (rtd->dai_link->compr_ops && rtd->dai_link->compr_ops->shutdown)
 		rtd->dai_link->compr_ops->shutdown(cstream);
 
-	for_each_rtdcom(rtd, rtdcom) {
-		component = rtdcom->component;
-
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
-		if (!component->driver->compr_ops ||
-		    !component->driver->compr_ops->free)
-			continue;
-
-		component->driver->compr_ops->free(cstream);
-	}
-
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->free)
-		platform->driver->compr_ops->free(cstream);
+	soc_compr_components_free(cstream, NULL);
 
 	if (cpu_dai->driver->cops && cpu_dai->driver->cops->shutdown)
 		cpu_dai->driver->cops->shutdown(cstream, cpu_dai);
@@ -342,8 +287,8 @@
 	if (cstream->direction == SND_COMPRESS_PLAYBACK) {
 		if (snd_soc_runtime_ignore_pmdown_time(rtd)) {
 			snd_soc_dapm_stream_event(rtd,
-					SNDRV_PCM_STREAM_PLAYBACK,
-					SND_SOC_DAPM_STREAM_STOP);
+						  SNDRV_PCM_STREAM_PLAYBACK,
+						  SND_SOC_DAPM_STREAM_STOP);
 		} else {
 			rtd->pop_wait = 1;
 			queue_delayed_work(system_power_efficient_wq,
@@ -353,8 +298,8 @@
 	} else {
 		/* capture streams can be powered down now */
 		snd_soc_dapm_stream_event(rtd,
-			SNDRV_PCM_STREAM_CAPTURE,
-			SND_SOC_DAPM_STREAM_STOP);
+					  SNDRV_PCM_STREAM_CAPTURE,
+					  SND_SOC_DAPM_STREAM_STOP);
 	}
 
 	mutex_unlock(&rtd->pcm_mutex);
@@ -364,9 +309,6 @@
 static int soc_compr_free_fe(struct snd_compr_stream *cstream)
 {
 	struct snd_soc_pcm_runtime *fe = cstream->private_data;
-	struct snd_soc_platform *platform = fe->platform;
-	struct snd_soc_component *component;
-	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = fe->cpu_dai;
 	struct snd_soc_dpcm *dpcm;
 	int stream, ret;
@@ -404,22 +346,7 @@
 	if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown)
 		fe->dai_link->compr_ops->shutdown(cstream);
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->free)
-		platform->driver->compr_ops->free(cstream);
-
-	for_each_rtdcom(fe, rtdcom) {
-		component = rtdcom->component;
-
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
-		if (!component->driver->compr_ops ||
-		    !component->driver->compr_ops->free)
-			continue;
-
-		component->driver->compr_ops->free(cstream);
-	}
+	soc_compr_components_free(cstream, NULL);
 
 	if (cpu_dai->driver->cops && cpu_dai->driver->cops->shutdown)
 		cpu_dai->driver->cops->shutdown(cstream, cpu_dai);
@@ -432,7 +359,6 @@
 {
 
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *codec_dai = rtd->codec_dai;
@@ -441,19 +367,9 @@
 
 	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->trigger) {
-		ret = platform->driver->compr_ops->trigger(cstream, cmd);
-		if (ret < 0)
-			goto out;
-	}
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->trigger)
 			continue;
@@ -485,7 +401,6 @@
 static int soc_compr_trigger_fe(struct snd_compr_stream *cstream, int cmd)
 {
 	struct snd_soc_pcm_runtime *fe = cstream->private_data;
-	struct snd_soc_platform *platform = fe->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = fe->cpu_dai;
@@ -494,19 +409,9 @@
 	if (cmd == SND_COMPR_TRIGGER_PARTIAL_DRAIN ||
 		cmd == SND_COMPR_TRIGGER_DRAIN) {
 
-		if (platform &&
-		    platform->driver->compr_ops &&
-		    platform->driver->compr_ops->trigger)
-			return platform->driver->compr_ops->trigger(cstream,
-								    cmd);
-
 		for_each_rtdcom(fe, rtdcom) {
 			component = rtdcom->component;
 
-			/* ignore duplication for now */
-			if (platform && (component == &platform->component))
-				continue;
-
 			if (!component->driver->compr_ops ||
 			    !component->driver->compr_ops->trigger)
 				continue;
@@ -523,7 +428,6 @@
 	else
 		stream = SNDRV_PCM_STREAM_CAPTURE;
 
-
 	mutex_lock_nested(&fe->card->mutex, SND_SOC_CARD_CLASS_RUNTIME);
 
 	if (cpu_dai->driver->cops && cpu_dai->driver->cops->trigger) {
@@ -532,19 +436,9 @@
 			goto out;
 	}
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->trigger) {
-		ret = platform->driver->compr_ops->trigger(cstream, cmd);
-		if (ret < 0)
-			goto out;
-	}
-
 	for_each_rtdcom(fe, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->trigger)
 			continue;
@@ -585,7 +479,6 @@
 					struct snd_compr_params *params)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
@@ -593,11 +486,12 @@
 
 	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
 
-	/* first we call set_params for the platform driver
-	 * this should configure the soc side
-	 * if the machine has compressed ops then we call that as well
-	 * expectation is that platform and machine will configure everything
-	 * for this compress path, like configuring pcm port for codec
+	/*
+	 * First we call set_params for the CPU DAI, then the component
+	 * driver this should configure the SoC side. If the machine has
+	 * compressed ops then we call that as well. The expectation is
+	 * that these callbacks will configure everything for this compress
+	 * path, like configuring a PCM port for a CODEC.
 	 */
 	if (cpu_dai->driver->cops && cpu_dai->driver->cops->set_params) {
 		ret = cpu_dai->driver->cops->set_params(cstream, params, cpu_dai);
@@ -605,19 +499,9 @@
 			goto err;
 	}
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->set_params) {
-		ret = platform->driver->compr_ops->set_params(cstream, params);
-		if (ret < 0)
-			goto err;
-	}
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->set_params)
 			continue;
@@ -637,10 +521,10 @@
 
 	if (cstream->direction == SND_COMPRESS_PLAYBACK)
 		snd_soc_dapm_stream_event(rtd, SNDRV_PCM_STREAM_PLAYBACK,
-					SND_SOC_DAPM_STREAM_START);
+					  SND_SOC_DAPM_STREAM_START);
 	else
 		snd_soc_dapm_stream_event(rtd, SNDRV_PCM_STREAM_CAPTURE,
-					SND_SOC_DAPM_STREAM_START);
+					  SND_SOC_DAPM_STREAM_START);
 
 	/* cancel any delayed stream shutdown that is pending */
 	rtd->pop_wait = 0;
@@ -656,12 +540,11 @@
 }
 
 static int soc_compr_set_params_fe(struct snd_compr_stream *cstream,
-					struct snd_compr_params *params)
+				   struct snd_compr_params *params)
 {
 	struct snd_soc_pcm_runtime *fe = cstream->private_data;
 	struct snd_pcm_substream *fe_substream =
 		 fe->pcm->streams[cstream->direction].substream;
-	struct snd_soc_platform *platform = fe->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = fe->cpu_dai;
@@ -680,19 +563,9 @@
 			goto out;
 	}
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->set_params) {
-		ret = platform->driver->compr_ops->set_params(cstream, params);
-		if (ret < 0)
-			goto out;
-	}
-
 	for_each_rtdcom(fe, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->set_params)
 			continue;
@@ -738,10 +611,9 @@
 }
 
 static int soc_compr_get_params(struct snd_compr_stream *cstream,
-					struct snd_codec *params)
+				struct snd_codec *params)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
@@ -755,19 +627,9 @@
 			goto err;
 	}
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->get_params) {
-		ret = platform->driver->compr_ops->get_params(cstream, params);
-		if (ret < 0)
-			goto err;
-	}
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->get_params)
 			continue;
@@ -783,29 +645,18 @@
 }
 
 static int soc_compr_get_caps(struct snd_compr_stream *cstream,
-				struct snd_compr_caps *caps)
+			      struct snd_compr_caps *caps)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	int ret = 0, __ret;
 
 	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->get_caps) {
-		ret = platform->driver->compr_ops->get_caps(cstream, caps);
-		if (ret < 0)
-			goto err;
-	}
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->get_caps)
 			continue;
@@ -815,35 +666,23 @@
 			ret = __ret;
 	}
 
-err:
 	mutex_unlock(&rtd->pcm_mutex);
 	return ret;
 }
 
 static int soc_compr_get_codec_caps(struct snd_compr_stream *cstream,
-				struct snd_compr_codec_caps *codec)
+				    struct snd_compr_codec_caps *codec)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	int ret = 0, __ret;
 
 	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->get_codec_caps) {
-		ret = platform->driver->compr_ops->get_codec_caps(cstream, codec);
-		if (ret < 0)
-			goto err;
-	}
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->get_codec_caps)
 			continue;
@@ -853,7 +692,6 @@
 			ret = __ret;
 	}
 
-err:
 	mutex_unlock(&rtd->pcm_mutex);
 	return ret;
 }
@@ -861,7 +699,6 @@
 static int soc_compr_ack(struct snd_compr_stream *cstream, size_t bytes)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
@@ -875,19 +712,9 @@
 			goto err;
 	}
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->ack) {
-		ret = platform->driver->compr_ops->ack(cstream, bytes);
-		if (ret < 0)
-			goto err;
-	}
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->ack)
 			continue;
@@ -903,10 +730,9 @@
 }
 
 static int soc_compr_pointer(struct snd_compr_stream *cstream,
-			struct snd_compr_tstamp *tstamp)
+			     struct snd_compr_tstamp *tstamp)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	int ret = 0, __ret;
@@ -917,19 +743,9 @@
 	if (cpu_dai->driver->cops && cpu_dai->driver->cops->pointer)
 		cpu_dai->driver->cops->pointer(cstream, tstamp, cpu_dai);
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->pointer) {
-		ret = platform->driver->compr_ops->pointer(cstream, tstamp);
-		if (ret < 0)
-			goto err;
-	}
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->pointer)
 			continue;
@@ -939,7 +755,6 @@
 			ret = __ret;
 	}
 
-err:
 	mutex_unlock(&rtd->pcm_mutex);
 	return ret;
 }
@@ -948,26 +763,15 @@
 			  char __user *buf, size_t count)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	int ret = 0;
 
 	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->copy) {
-		ret = platform->driver->compr_ops->copy(cstream, buf, count);
-		if (ret < 0)
-			goto err;
-	}
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->copy)
 			continue;
@@ -976,16 +780,14 @@
 		break;
 	}
 
-err:
 	mutex_unlock(&rtd->pcm_mutex);
 	return ret;
 }
 
 static int soc_compr_set_metadata(struct snd_compr_stream *cstream,
-				struct snd_compr_metadata *metadata)
+				  struct snd_compr_metadata *metadata)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
@@ -997,19 +799,9 @@
 			return ret;
 	}
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->set_metadata) {
-		ret = platform->driver->compr_ops->set_metadata(cstream, metadata);
-		if (ret < 0)
-			return ret;
-	}
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->set_metadata)
 			continue;
@@ -1023,10 +815,9 @@
 }
 
 static int soc_compr_get_metadata(struct snd_compr_stream *cstream,
-				struct snd_compr_metadata *metadata)
+				  struct snd_compr_metadata *metadata)
 {
 	struct snd_soc_pcm_runtime *rtd = cstream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
@@ -1038,19 +829,9 @@
 			return ret;
 	}
 
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->get_metadata) {
-		ret = platform->driver->compr_ops->get_metadata(cstream, metadata);
-		if (ret < 0)
-			return ret;
-	}
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->get_metadata)
 			continue;
@@ -1103,7 +884,6 @@
  */
 int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num)
 {
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *codec_dai = rtd->codec_dai;
@@ -1184,26 +964,17 @@
 		memcpy(compr->ops, &soc_compr_ops, sizeof(soc_compr_ops));
 	}
 
-
-	/* Add copy callback for not memory mapped DSPs */
-	if (platform && platform->driver->compr_ops && platform->driver->compr_ops->copy)
-		compr->ops->copy = soc_compr_copy;
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->compr_ops ||
 		    !component->driver->compr_ops->copy)
 			continue;
 
 		compr->ops->copy = soc_compr_copy;
+		break;
 	}
 
-
 	mutex_init(&compr->lock);
 	ret = snd_compress_new(rtd->card->snd_card, num, direction,
 				new_name, compr);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index bf7ca32a..3d56f1f 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -56,8 +56,6 @@
 #endif
 
 static DEFINE_MUTEX(client_mutex);
-static LIST_HEAD(platform_list);
-static LIST_HEAD(codec_list);
 static LIST_HEAD(component_list);
 
 /*
@@ -83,98 +81,6 @@
 	NULL,	/* terminator */
 };
 
-/* returns the minimum number of bytes needed to represent
- * a particular given value */
-static int min_bytes_needed(unsigned long val)
-{
-	int c = 0;
-	int i;
-
-	for (i = (sizeof val * 8) - 1; i >= 0; --i, ++c)
-		if (val & (1UL << i))
-			break;
-	c = (sizeof val * 8) - c;
-	if (!c || (c % 8))
-		c = (c + 8) / 8;
-	else
-		c /= 8;
-	return c;
-}
-
-/* fill buf which is 'len' bytes with a formatted
- * string of the form 'reg: value\n' */
-static int format_register_str(struct snd_soc_codec *codec,
-			       unsigned int reg, char *buf, size_t len)
-{
-	int wordsize = min_bytes_needed(codec->driver->reg_cache_size) * 2;
-	int regsize = codec->driver->reg_word_size * 2;
-	int ret;
-
-	/* +2 for ': ' and + 1 for '\n' */
-	if (wordsize + regsize + 2 + 1 != len)
-		return -EINVAL;
-
-	sprintf(buf, "%.*x: ", wordsize, reg);
-	buf += wordsize + 2;
-
-	ret = snd_soc_read(codec, reg);
-	if (ret < 0)
-		memset(buf, 'X', regsize);
-	else
-		sprintf(buf, "%.*x", regsize, ret);
-	buf[regsize] = '\n';
-	/* no NUL-termination needed */
-	return 0;
-}
-
-/* codec register dump */
-static ssize_t soc_codec_reg_show(struct snd_soc_codec *codec, char *buf,
-				  size_t count, loff_t pos)
-{
-	int i, step = 1;
-	int wordsize, regsize;
-	int len;
-	size_t total = 0;
-	loff_t p = 0;
-
-	wordsize = min_bytes_needed(codec->driver->reg_cache_size) * 2;
-	regsize = codec->driver->reg_word_size * 2;
-
-	len = wordsize + regsize + 2 + 1;
-
-	if (!codec->driver->reg_cache_size)
-		return 0;
-
-	if (codec->driver->reg_cache_step)
-		step = codec->driver->reg_cache_step;
-
-	for (i = 0; i < codec->driver->reg_cache_size; i += step) {
-		/* only support larger than PAGE_SIZE bytes debugfs
-		 * entries for the default case */
-		if (p >= pos) {
-			if (total + len >= count - 1)
-				break;
-			format_register_str(codec, i, buf + total, len);
-			total += len;
-		}
-		p += len;
-	}
-
-	total = min(total, count - 1);
-
-	return total;
-}
-
-static ssize_t codec_reg_show(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct snd_soc_pcm_runtime *rtd = dev_get_drvdata(dev);
-
-	return soc_codec_reg_show(rtd->codec, buf, PAGE_SIZE, 0);
-}
-
-static DEVICE_ATTR_RO(codec_reg);
-
 static ssize_t pmdown_time_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -200,7 +106,6 @@
 static DEVICE_ATTR(pmdown_time, 0644, pmdown_time_show, pmdown_time_set);
 
 static struct attribute *soc_dev_attrs[] = {
-	&dev_attr_codec_reg.attr,
 	&dev_attr_pmdown_time.attr,
 	NULL
 };
@@ -233,71 +138,6 @@
 };
 
 #ifdef CONFIG_DEBUG_FS
-static ssize_t codec_reg_read_file(struct file *file, char __user *user_buf,
-				   size_t count, loff_t *ppos)
-{
-	ssize_t ret;
-	struct snd_soc_codec *codec = file->private_data;
-	char *buf;
-
-	if (*ppos < 0 || !count)
-		return -EINVAL;
-
-	buf = kmalloc(count, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	ret = soc_codec_reg_show(codec, buf, count, *ppos);
-	if (ret >= 0) {
-		if (copy_to_user(user_buf, buf, ret)) {
-			kfree(buf);
-			return -EFAULT;
-		}
-		*ppos += ret;
-	}
-
-	kfree(buf);
-	return ret;
-}
-
-static ssize_t codec_reg_write_file(struct file *file,
-		const char __user *user_buf, size_t count, loff_t *ppos)
-{
-	char buf[32];
-	size_t buf_size;
-	char *start = buf;
-	unsigned long reg, value;
-	struct snd_soc_codec *codec = file->private_data;
-	int ret;
-
-	buf_size = min(count, (sizeof(buf)-1));
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-	buf[buf_size] = 0;
-
-	while (*start == ' ')
-		start++;
-	reg = simple_strtoul(start, &start, 16);
-	while (*start == ' ')
-		start++;
-	ret = kstrtoul(start, 16, &value);
-	if (ret)
-		return ret;
-
-	/* Userspace has been fiddling around behind the kernel's back */
-	add_taint(TAINT_USER, LOCKDEP_NOW_UNRELIABLE);
-
-	snd_soc_write(codec, reg, value);
-	return buf_size;
-}
-
-static const struct file_operations codec_reg_fops = {
-	.open = simple_open,
-	.read = codec_reg_read_file,
-	.write = codec_reg_write_file,
-	.llseek = default_llseek,
-};
-
 static void soc_init_component_debugfs(struct snd_soc_component *component)
 {
 	if (!component->card->debugfs_card_root)
@@ -326,9 +166,6 @@
 
 	snd_soc_dapm_debugfs_init(snd_soc_component_get_dapm(component),
 		component->debugfs_root);
-
-	if (component->init_debugfs)
-		component->init_debugfs(component);
 }
 
 static void soc_cleanup_component_debugfs(struct snd_soc_component *component)
@@ -336,34 +173,6 @@
 	debugfs_remove_recursive(component->debugfs_root);
 }
 
-static void soc_init_codec_debugfs(struct snd_soc_component *component)
-{
-	struct snd_soc_codec *codec = snd_soc_component_to_codec(component);
-	struct dentry *debugfs_reg;
-
-	debugfs_reg = debugfs_create_file("codec_reg", 0644,
-					  codec->component.debugfs_root,
-					  codec, &codec_reg_fops);
-	if (!debugfs_reg)
-		dev_warn(codec->dev,
-			"ASoC: Failed to create codec register debugfs file\n");
-}
-
-static int codec_list_show(struct seq_file *m, void *v)
-{
-	struct snd_soc_codec *codec;
-
-	mutex_lock(&client_mutex);
-
-	list_for_each_entry(codec, &codec_list, list)
-		seq_printf(m, "%s\n", codec->component.name);
-
-	mutex_unlock(&client_mutex);
-
-	return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(codec_list);
-
 static int dai_list_show(struct seq_file *m, void *v)
 {
 	struct snd_soc_component *component;
@@ -381,20 +190,20 @@
 }
 DEFINE_SHOW_ATTRIBUTE(dai_list);
 
-static int platform_list_show(struct seq_file *m, void *v)
+static int component_list_show(struct seq_file *m, void *v)
 {
-	struct snd_soc_platform *platform;
+	struct snd_soc_component *component;
 
 	mutex_lock(&client_mutex);
 
-	list_for_each_entry(platform, &platform_list, list)
-		seq_printf(m, "%s\n", platform->component.name);
+	list_for_each_entry(component, &component_list, list)
+		seq_printf(m, "%s\n", component->name);
 
 	mutex_unlock(&client_mutex);
 
 	return 0;
 }
-DEFINE_SHOW_ATTRIBUTE(platform_list);
+DEFINE_SHOW_ATTRIBUTE(component_list);
 
 static void soc_init_card_debugfs(struct snd_soc_card *card)
 {
@@ -431,17 +240,13 @@
 		return;
 	}
 
-	if (!debugfs_create_file("codecs", 0444, snd_soc_debugfs_root, NULL,
-				 &codec_list_fops))
-		pr_warn("ASoC: Failed to create CODEC list debugfs file\n");
-
 	if (!debugfs_create_file("dais", 0444, snd_soc_debugfs_root, NULL,
 				 &dai_list_fops))
 		pr_warn("ASoC: Failed to create DAI list debugfs file\n");
 
-	if (!debugfs_create_file("platforms", 0444, snd_soc_debugfs_root, NULL,
-				 &platform_list_fops))
-		pr_warn("ASoC: Failed to create platform list debugfs file\n");
+	if (!debugfs_create_file("components", 0444, snd_soc_debugfs_root, NULL,
+				 &component_list_fops))
+		pr_warn("ASoC: Failed to create component list debugfs file\n");
 }
 
 static void snd_soc_debugfs_exit(void)
@@ -451,8 +256,6 @@
 
 #else
 
-#define soc_init_codec_debugfs NULL
-
 static inline void soc_init_component_debugfs(
 	struct snd_soc_component *component)
 {
@@ -732,8 +535,8 @@
 				}
 
 			case SND_SOC_BIAS_OFF:
-				if (component->suspend)
-					component->suspend(component);
+				if (component->driver->suspend)
+					component->driver->suspend(component);
 				component->suspended = 1;
 				if (component->regmap)
 					regcache_mark_dirty(component->regmap);
@@ -804,8 +607,8 @@
 
 	list_for_each_entry(component, &card->component_dev_list, card_list) {
 		if (component->suspended) {
-			if (component->resume)
-				component->resume(component);
+			if (component->driver->resume)
+				component->driver->resume(component);
 			component->suspended = 0;
 		}
 	}
@@ -1045,7 +848,6 @@
 	struct snd_soc_dai_link_component cpu_dai_component;
 	struct snd_soc_component *component;
 	struct snd_soc_dai **codec_dais;
-	struct snd_soc_platform *platform;
 	struct device_node *platform_of_node;
 	const char *platform_name;
 	int i;
@@ -1089,7 +891,6 @@
 
 	/* Single codec links expect codec and codec_dai in runtime data */
 	rtd->codec_dai = codec_dais[0];
-	rtd->codec = rtd->codec_dai->codec;
 
 	/* if there's no platform we match on the empty platform */
 	platform_name = dai_link->platform_name;
@@ -1113,23 +914,6 @@
 		snd_soc_rtdcom_add(rtd, component);
 	}
 
-	/* find one from the set of registered platforms */
-	list_for_each_entry(platform, &platform_list, list) {
-		platform_of_node = platform->dev->of_node;
-		if (!platform_of_node && platform->dev->parent->of_node)
-			platform_of_node = platform->dev->parent->of_node;
-
-		if (dai_link->platform_of_node) {
-			if (platform_of_node != dai_link->platform_of_node)
-				continue;
-		} else {
-			if (strcmp(platform->component.name, platform_name))
-				continue;
-		}
-
-		rtd->platform = platform;
-	}
-
 	soc_add_pcm_runtime(card, rtd);
 	return 0;
 
@@ -1145,8 +929,8 @@
 
 	list_del(&component->card_list);
 
-	if (component->remove)
-		component->remove(component);
+	if (component->driver->remove)
+		component->driver->remove(component);
 
 	snd_soc_dapm_free(snd_soc_component_get_dapm(component));
 
@@ -1421,7 +1205,12 @@
 
 	for (i = 0; i < card->num_configs; i++) {
 		struct snd_soc_codec_conf *map = &card->codec_conf[i];
-		if (map->of_node && component->dev->of_node != map->of_node)
+		struct device_node *component_of_node = component->dev->of_node;
+
+		if (!component_of_node && component->dev->parent)
+			component_of_node = component->dev->parent->of_node;
+
+		if (map->of_node && component_of_node != map->of_node)
 			continue;
 		if (map->dev_name && strcmp(component->name, map->dev_name))
 			continue;
@@ -1480,8 +1269,8 @@
 		}
 	}
 
-	if (component->probe) {
-		ret = component->probe(component);
+	if (component->driver->probe) {
+		ret = component->driver->probe(component);
 		if (ret < 0) {
 			dev_err(component->dev,
 				"ASoC: failed to probe component %d\n", ret);
@@ -1838,24 +1627,6 @@
 	}
 }
 
-static int snd_soc_init_codec_cache(struct snd_soc_codec *codec)
-{
-	int ret;
-
-	if (codec->cache_init)
-		return 0;
-
-	ret = snd_soc_cache_init(codec);
-	if (ret < 0) {
-		dev_err(codec->dev,
-			"ASoC: Failed to set cache compression type: %d\n",
-			ret);
-		return ret;
-	}
-	codec->cache_init = 1;
-	return 0;
-}
-
 /**
  * snd_soc_runtime_set_dai_fmt() - Change DAI link format for a ASoC runtime
  * @rtd: The runtime for which the DAI link format should be changed
@@ -1890,8 +1661,7 @@
 
 	/* Flip the polarity for the "CPU" end of a CODEC<->CODEC link */
 	/* the component which has non_legacy_dai_naming is Codec */
-	if (cpu_dai->codec ||
-	    cpu_dai->component->driver->non_legacy_dai_naming) {
+	if (cpu_dai->component->driver->non_legacy_dai_naming) {
 		unsigned int inv_dai_fmt;
 
 		inv_dai_fmt = dai_fmt & ~SND_SOC_DAIFMT_MASTER_MASK;
@@ -2078,7 +1848,6 @@
 
 static int snd_soc_instantiate_card(struct snd_soc_card *card)
 {
-	struct snd_soc_codec *codec;
 	struct snd_soc_pcm_runtime *rtd;
 	struct snd_soc_dai_link *dai_link;
 	int ret, i, order;
@@ -2104,15 +1873,6 @@
 	for (i = 0; i < card->num_links; i++)
 		snd_soc_add_dai_link(card, card->dai_link+i);
 
-	/* initialize the register cache for each available codec */
-	list_for_each_entry(codec, &codec_list, list) {
-		if (codec->cache_init)
-			continue;
-		ret = snd_soc_init_codec_cache(codec);
-		if (ret < 0)
-			goto base_error;
-	}
-
 	/* card bind complete so register a sound card */
 	ret = snd_card_new(card->dev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1,
 			card->owner, 0, &card->snd_card);
@@ -2494,43 +2254,6 @@
 EXPORT_SYMBOL_GPL(snd_soc_add_component_controls);
 
 /**
- * snd_soc_add_codec_controls - add an array of controls to a codec.
- * Convenience function to add a list of controls. Many codecs were
- * duplicating this code.
- *
- * @codec: codec to add controls to
- * @controls: array of controls to add
- * @num_controls: number of elements in the array
- *
- * Return 0 for success, else error.
- */
-int snd_soc_add_codec_controls(struct snd_soc_codec *codec,
-	const struct snd_kcontrol_new *controls, unsigned int num_controls)
-{
-	return snd_soc_add_component_controls(&codec->component, controls,
-		num_controls);
-}
-EXPORT_SYMBOL_GPL(snd_soc_add_codec_controls);
-
-/**
- * snd_soc_add_platform_controls - add an array of controls to a platform.
- * Convenience function to add a list of controls.
- *
- * @platform: platform to add controls to
- * @controls: array of controls to add
- * @num_controls: number of elements in the array
- *
- * Return 0 for success, else error.
- */
-int snd_soc_add_platform_controls(struct snd_soc_platform *platform,
-	const struct snd_kcontrol_new *controls, unsigned int num_controls)
-{
-	return snd_soc_add_component_controls(&platform->component, controls,
-		num_controls);
-}
-EXPORT_SYMBOL_GPL(snd_soc_add_platform_controls);
-
-/**
  * snd_soc_add_card_controls - add an array of controls to a SoC card.
  * Convenience function to add a list of controls.
  *
@@ -2591,27 +2314,6 @@
 EXPORT_SYMBOL_GPL(snd_soc_dai_set_sysclk);
 
 /**
- * snd_soc_codec_set_sysclk - configure CODEC system or master clock.
- * @codec: CODEC
- * @clk_id: DAI specific clock ID
- * @source: Source for the clock
- * @freq: new clock frequency in Hz
- * @dir: new clock direction - input/output.
- *
- * Configures the CODEC master (MCLK) or system (SYSCLK) clocking.
- */
-int snd_soc_codec_set_sysclk(struct snd_soc_codec *codec, int clk_id,
-			     int source, unsigned int freq, int dir)
-{
-	if (codec->driver->set_sysclk)
-		return codec->driver->set_sysclk(codec, clk_id, source,
-						 freq, dir);
-	else
-		return -ENOTSUPP;
-}
-EXPORT_SYMBOL_GPL(snd_soc_codec_set_sysclk);
-
-/**
  * snd_soc_component_set_sysclk - configure COMPONENT system or master clock.
  * @component: COMPONENT
  * @clk_id: DAI specific clock ID
@@ -2624,11 +2326,6 @@
 int snd_soc_component_set_sysclk(struct snd_soc_component *component, int clk_id,
 			     int source, unsigned int freq, int dir)
 {
-	/* will be removed */
-	if (component->set_sysclk)
-		return component->set_sysclk(component, clk_id, source,
-					     freq, dir);
-
 	if (component->driver->set_sysclk)
 		return component->driver->set_sysclk(component, clk_id, source,
 						 freq, dir);
@@ -2680,27 +2377,6 @@
 EXPORT_SYMBOL_GPL(snd_soc_dai_set_pll);
 
 /*
- * snd_soc_codec_set_pll - configure codec PLL.
- * @codec: CODEC
- * @pll_id: DAI specific PLL ID
- * @source: DAI specific source for the PLL
- * @freq_in: PLL input clock frequency in Hz
- * @freq_out: requested PLL output clock frequency in Hz
- *
- * Configures and enables PLL to generate output clock based on input clock.
- */
-int snd_soc_codec_set_pll(struct snd_soc_codec *codec, int pll_id, int source,
-			  unsigned int freq_in, unsigned int freq_out)
-{
-	if (codec->driver->set_pll)
-		return codec->driver->set_pll(codec, pll_id, source,
-					      freq_in, freq_out);
-	else
-		return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(snd_soc_codec_set_pll);
-
-/*
  * snd_soc_component_set_pll - configure component PLL.
  * @component: COMPONENT
  * @pll_id: DAI specific PLL ID
@@ -2714,11 +2390,6 @@
 			      int source, unsigned int freq_in,
 			      unsigned int freq_out)
 {
-	/* will be removed */
-	if (component->set_pll)
-		return component->set_pll(component, pll_id, source,
-					      freq_in, freq_out);
-
 	if (component->driver->set_pll)
 		return component->driver->set_pll(component, pll_id, source,
 					      freq_in, freq_out);
@@ -3112,8 +2783,7 @@
  *                     parent's name.
  */
 static int snd_soc_register_dais(struct snd_soc_component *component,
-	struct snd_soc_dai_driver *dai_drv, size_t count,
-	bool legacy_dai_naming)
+				 struct snd_soc_dai_driver *dai_drv, size_t count)
 {
 	struct device *dev = component->dev;
 	struct snd_soc_dai *dai;
@@ -3125,7 +2795,7 @@
 	for (i = 0; i < count; i++) {
 
 		dai = soc_add_dai(component, dai_drv + i,
-				count == 1 && legacy_dai_naming);
+				  count == 1 && !component->driver->non_legacy_dai_naming);
 		if (dai == NULL) {
 			ret = -ENOMEM;
 			goto err;
@@ -3198,22 +2868,6 @@
 	return component->driver->stream_event(component, event);
 }
 
-static int snd_soc_component_drv_pcm_new(struct snd_soc_component *component,
-					struct snd_soc_pcm_runtime *rtd)
-{
-	if (component->driver->pcm_new)
-		return component->driver->pcm_new(rtd);
-
-	return 0;
-}
-
-static void snd_soc_component_drv_pcm_free(struct snd_soc_component *component,
-					  struct snd_pcm *pcm)
-{
-	if (component->driver->pcm_free)
-		component->driver->pcm_free(pcm);
-}
-
 static int snd_soc_component_set_bias_level(struct snd_soc_dapm_context *dapm,
 					enum snd_soc_bias_level level)
 {
@@ -3235,15 +2889,6 @@
 
 	component->dev = dev;
 	component->driver = driver;
-	component->probe = component->driver->probe;
-	component->remove = component->driver->remove;
-	component->suspend = component->driver->suspend;
-	component->resume = component->driver->resume;
-	component->set_sysclk = component->driver->set_sysclk;
-	component->set_pll = component->driver->set_pll;
-	component->set_jack = component->driver->set_jack;
-	component->pcm_new = snd_soc_component_drv_pcm_new;
-	component->pcm_free = snd_soc_component_drv_pcm_free;
 
 	dapm = snd_soc_component_get_dapm(component);
 	dapm->dev = dev;
@@ -3312,9 +2957,11 @@
 
 #endif
 
-static void snd_soc_component_add_unlocked(struct snd_soc_component *component)
+static void snd_soc_component_add(struct snd_soc_component *component)
 {
-	if (!component->write && !component->read) {
+	mutex_lock(&client_mutex);
+
+	if (!component->driver->write && !component->driver->read) {
 		if (!component->regmap)
 			component->regmap = dev_get_regmap(component->dev, NULL);
 		if (component->regmap)
@@ -3323,12 +2970,7 @@
 
 	list_add(&component->list, &component_list);
 	INIT_LIST_HEAD(&component->dobj_list);
-}
 
-static void snd_soc_component_add(struct snd_soc_component *component)
-{
-	mutex_lock(&client_mutex);
-	snd_soc_component_add_unlocked(component);
 	mutex_unlock(&client_mutex);
 }
 
@@ -3396,9 +3038,6 @@
 	if (ret)
 		goto err_free;
 
-	component->ignore_pmdown_time = true;
-	component->registered_as_component = true;
-
 	if (component_driver->endianness) {
 		for (i = 0; i < num_dai; i++) {
 			convert_endianness_formats(&dai_drv[i].playback);
@@ -3406,8 +3045,7 @@
 		}
 	}
 
-	ret = snd_soc_register_dais(component, dai_drv, num_dai,
-				    !component_driver->non_legacy_dai_naming);
+	ret = snd_soc_register_dais(component, dai_drv, num_dai);
 	if (ret < 0) {
 		dev_err(dev, "ASoC: Failed to register DAIs: %d\n", ret);
 		goto err_cleanup;
@@ -3453,8 +3091,7 @@
 
 	mutex_lock(&client_mutex);
 	list_for_each_entry(component, &component_list, list) {
-		if (dev != component->dev ||
-		    !component->registered_as_component)
+		if (dev != component->dev)
 			continue;
 
 		snd_soc_tplg_component_remove(component, SND_SOC_TPLG_INDEX_ALL);
@@ -3503,375 +3140,6 @@
 }
 EXPORT_SYMBOL_GPL(snd_soc_lookup_component);
 
-static int snd_soc_platform_drv_probe(struct snd_soc_component *component)
-{
-	struct snd_soc_platform *platform = snd_soc_component_to_platform(component);
-
-	return platform->driver->probe(platform);
-}
-
-static void snd_soc_platform_drv_remove(struct snd_soc_component *component)
-{
-	struct snd_soc_platform *platform = snd_soc_component_to_platform(component);
-
-	platform->driver->remove(platform);
-}
-
-static int snd_soc_platform_drv_pcm_new(struct snd_soc_component *component,
-					struct snd_soc_pcm_runtime *rtd)
-{
-	struct snd_soc_platform *platform = snd_soc_component_to_platform(component);
-
-	if (platform->driver->pcm_new)
-		return platform->driver->pcm_new(rtd);
-
-	return 0;
-}
-
-static void snd_soc_platform_drv_pcm_free(struct snd_soc_component *component,
-					  struct snd_pcm *pcm)
-{
-	struct snd_soc_platform *platform = snd_soc_component_to_platform(component);
-
-	if (platform->driver->pcm_free)
-		platform->driver->pcm_free(pcm);
-}
-
-/**
- * snd_soc_add_platform - Add a platform to the ASoC core
- * @dev: The parent device for the platform
- * @platform: The platform to add
- * @platform_drv: The driver for the platform
- */
-int snd_soc_add_platform(struct device *dev, struct snd_soc_platform *platform,
-		const struct snd_soc_platform_driver *platform_drv)
-{
-	int ret;
-
-	ret = snd_soc_component_initialize(&platform->component,
-			&platform_drv->component_driver, dev);
-	if (ret)
-		return ret;
-
-	platform->dev = dev;
-	platform->driver = platform_drv;
-
-	if (platform_drv->probe)
-		platform->component.probe = snd_soc_platform_drv_probe;
-	if (platform_drv->remove)
-		platform->component.remove = snd_soc_platform_drv_remove;
-	if (platform_drv->pcm_new)
-		platform->component.pcm_new = snd_soc_platform_drv_pcm_new;
-	if (platform_drv->pcm_free)
-		platform->component.pcm_free = snd_soc_platform_drv_pcm_free;
-
-#ifdef CONFIG_DEBUG_FS
-	platform->component.debugfs_prefix = "platform";
-#endif
-
-	mutex_lock(&client_mutex);
-	snd_soc_component_add_unlocked(&platform->component);
-	list_add(&platform->list, &platform_list);
-	mutex_unlock(&client_mutex);
-
-	dev_dbg(dev, "ASoC: Registered platform '%s'\n",
-		platform->component.name);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(snd_soc_add_platform);
-
-/**
- * snd_soc_register_platform - Register a platform with the ASoC core
- *
- * @dev: The device for the platform
- * @platform_drv: The driver for the platform
- */
-int snd_soc_register_platform(struct device *dev,
-		const struct snd_soc_platform_driver *platform_drv)
-{
-	struct snd_soc_platform *platform;
-	int ret;
-
-	dev_dbg(dev, "ASoC: platform register %s\n", dev_name(dev));
-
-	platform = kzalloc(sizeof(struct snd_soc_platform), GFP_KERNEL);
-	if (platform == NULL)
-		return -ENOMEM;
-
-	ret = snd_soc_add_platform(dev, platform, platform_drv);
-	if (ret)
-		kfree(platform);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(snd_soc_register_platform);
-
-/**
- * snd_soc_remove_platform - Remove a platform from the ASoC core
- * @platform: the platform to remove
- */
-void snd_soc_remove_platform(struct snd_soc_platform *platform)
-{
-
-	mutex_lock(&client_mutex);
-	list_del(&platform->list);
-	snd_soc_component_del_unlocked(&platform->component);
-	mutex_unlock(&client_mutex);
-
-	dev_dbg(platform->dev, "ASoC: Unregistered platform '%s'\n",
-		platform->component.name);
-
-	snd_soc_component_cleanup(&platform->component);
-}
-EXPORT_SYMBOL_GPL(snd_soc_remove_platform);
-
-struct snd_soc_platform *snd_soc_lookup_platform(struct device *dev)
-{
-	struct snd_soc_platform *platform;
-
-	mutex_lock(&client_mutex);
-	list_for_each_entry(platform, &platform_list, list) {
-		if (dev == platform->dev) {
-			mutex_unlock(&client_mutex);
-			return platform;
-		}
-	}
-	mutex_unlock(&client_mutex);
-
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(snd_soc_lookup_platform);
-
-/**
- * snd_soc_unregister_platform - Unregister a platform from the ASoC core
- *
- * @dev: platform to unregister
- */
-void snd_soc_unregister_platform(struct device *dev)
-{
-	struct snd_soc_platform *platform;
-
-	platform = snd_soc_lookup_platform(dev);
-	if (!platform)
-		return;
-
-	snd_soc_remove_platform(platform);
-	kfree(platform);
-}
-EXPORT_SYMBOL_GPL(snd_soc_unregister_platform);
-
-static int snd_soc_codec_drv_probe(struct snd_soc_component *component)
-{
-	struct snd_soc_codec *codec = snd_soc_component_to_codec(component);
-
-	return codec->driver->probe(codec);
-}
-
-static void snd_soc_codec_drv_remove(struct snd_soc_component *component)
-{
-	struct snd_soc_codec *codec = snd_soc_component_to_codec(component);
-
-	codec->driver->remove(codec);
-}
-
-static int snd_soc_codec_drv_suspend(struct snd_soc_component *component)
-{
-	struct snd_soc_codec *codec = snd_soc_component_to_codec(component);
-
-	return codec->driver->suspend(codec);
-}
-
-static int snd_soc_codec_drv_resume(struct snd_soc_component *component)
-{
-	struct snd_soc_codec *codec = snd_soc_component_to_codec(component);
-
-	return codec->driver->resume(codec);
-}
-
-static int snd_soc_codec_drv_write(struct snd_soc_component *component,
-	unsigned int reg, unsigned int val)
-{
-	struct snd_soc_codec *codec = snd_soc_component_to_codec(component);
-
-	return codec->driver->write(codec, reg, val);
-}
-
-static int snd_soc_codec_drv_read(struct snd_soc_component *component,
-	unsigned int reg, unsigned int *val)
-{
-	struct snd_soc_codec *codec = snd_soc_component_to_codec(component);
-
-	*val = codec->driver->read(codec, reg);
-
-	return 0;
-}
-
-static int snd_soc_codec_set_sysclk_(struct snd_soc_component *component,
-			  int clk_id, int source, unsigned int freq, int dir)
-{
-	struct snd_soc_codec *codec = snd_soc_component_to_codec(component);
-
-	return snd_soc_codec_set_sysclk(codec, clk_id, source, freq, dir);
-}
-
-static int snd_soc_codec_set_pll_(struct snd_soc_component *component,
-				  int pll_id, int source, unsigned int freq_in,
-				  unsigned int freq_out)
-{
-	struct snd_soc_codec *codec = snd_soc_component_to_codec(component);
-
-	return snd_soc_codec_set_pll(codec, pll_id, source, freq_in, freq_out);
-}
-
-static int snd_soc_codec_set_jack_(struct snd_soc_component *component,
-			       struct snd_soc_jack *jack, void *data)
-{
-	struct snd_soc_codec *codec = snd_soc_component_to_codec(component);
-
-	return snd_soc_codec_set_jack(codec, jack, data);
-}
-
-static int snd_soc_codec_set_bias_level(struct snd_soc_dapm_context *dapm,
-	enum snd_soc_bias_level level)
-{
-	struct snd_soc_codec *codec = snd_soc_dapm_to_codec(dapm);
-
-	return codec->driver->set_bias_level(codec, level);
-}
-
-/**
- * snd_soc_register_codec - Register a codec with the ASoC core
- *
- * @dev: The parent device for this codec
- * @codec_drv: Codec driver
- * @dai_drv: The associated DAI driver
- * @num_dai: Number of DAIs
- */
-int snd_soc_register_codec(struct device *dev,
-			   const struct snd_soc_codec_driver *codec_drv,
-			   struct snd_soc_dai_driver *dai_drv,
-			   int num_dai)
-{
-	struct snd_soc_dapm_context *dapm;
-	struct snd_soc_codec *codec;
-	struct snd_soc_dai *dai;
-	int ret, i;
-
-	dev_dbg(dev, "codec register %s\n", dev_name(dev));
-
-	codec = kzalloc(sizeof(struct snd_soc_codec), GFP_KERNEL);
-	if (codec == NULL)
-		return -ENOMEM;
-
-	codec->component.codec = codec;
-
-	ret = snd_soc_component_initialize(&codec->component,
-			&codec_drv->component_driver, dev);
-	if (ret)
-		goto err_free;
-
-	if (codec_drv->probe)
-		codec->component.probe = snd_soc_codec_drv_probe;
-	if (codec_drv->remove)
-		codec->component.remove = snd_soc_codec_drv_remove;
-	if (codec_drv->suspend)
-		codec->component.suspend = snd_soc_codec_drv_suspend;
-	if (codec_drv->resume)
-		codec->component.resume = snd_soc_codec_drv_resume;
-	if (codec_drv->write)
-		codec->component.write = snd_soc_codec_drv_write;
-	if (codec_drv->read)
-		codec->component.read = snd_soc_codec_drv_read;
-	if (codec_drv->set_sysclk)
-		codec->component.set_sysclk = snd_soc_codec_set_sysclk_;
-	if (codec_drv->set_pll)
-		codec->component.set_pll = snd_soc_codec_set_pll_;
-	if (codec_drv->set_jack)
-		codec->component.set_jack = snd_soc_codec_set_jack_;
-	codec->component.ignore_pmdown_time = codec_drv->ignore_pmdown_time;
-
-	dapm = snd_soc_codec_get_dapm(codec);
-	dapm->idle_bias_off = codec_drv->idle_bias_off;
-	dapm->suspend_bias_off = codec_drv->suspend_bias_off;
-	if (codec_drv->seq_notifier)
-		dapm->seq_notifier = codec_drv->seq_notifier;
-	if (codec_drv->set_bias_level)
-		dapm->set_bias_level = snd_soc_codec_set_bias_level;
-	codec->dev = dev;
-	codec->driver = codec_drv;
-	codec->component.val_bytes = codec_drv->reg_word_size;
-
-#ifdef CONFIG_DEBUG_FS
-	codec->component.init_debugfs = soc_init_codec_debugfs;
-	codec->component.debugfs_prefix = "codec";
-#endif
-
-	if (codec_drv->get_regmap)
-		codec->component.regmap = codec_drv->get_regmap(dev);
-
-	for (i = 0; i < num_dai; i++) {
-		convert_endianness_formats(&dai_drv[i].playback);
-		convert_endianness_formats(&dai_drv[i].capture);
-	}
-
-	ret = snd_soc_register_dais(&codec->component, dai_drv, num_dai, false);
-	if (ret < 0) {
-		dev_err(dev, "ASoC: Failed to register DAIs: %d\n", ret);
-		goto err_cleanup;
-	}
-
-	list_for_each_entry(dai, &codec->component.dai_list, list)
-		dai->codec = codec;
-
-	mutex_lock(&client_mutex);
-	snd_soc_component_add_unlocked(&codec->component);
-	list_add(&codec->list, &codec_list);
-	mutex_unlock(&client_mutex);
-
-	dev_dbg(codec->dev, "ASoC: Registered codec '%s'\n",
-		codec->component.name);
-	return 0;
-
-err_cleanup:
-	snd_soc_component_cleanup(&codec->component);
-err_free:
-	kfree(codec);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(snd_soc_register_codec);
-
-/**
- * snd_soc_unregister_codec - Unregister a codec from the ASoC core
- *
- * @dev: codec to unregister
- */
-void snd_soc_unregister_codec(struct device *dev)
-{
-	struct snd_soc_codec *codec;
-
-	mutex_lock(&client_mutex);
-	list_for_each_entry(codec, &codec_list, list) {
-		if (dev == codec->dev)
-			goto found;
-	}
-	mutex_unlock(&client_mutex);
-	return;
-
-found:
-	list_del(&codec->list);
-	snd_soc_component_del_unlocked(&codec->component);
-	mutex_unlock(&client_mutex);
-
-	dev_dbg(codec->dev, "ASoC: Unregistered codec '%s'\n",
-			codec->component.name);
-
-	snd_soc_component_cleanup(&codec->component);
-	snd_soc_cache_exit(codec);
-	kfree(codec);
-}
-EXPORT_SYMBOL_GPL(snd_soc_unregister_codec);
-
 /* Retrieve a card's name from device tree */
 int snd_soc_of_parse_card_name(struct snd_soc_card *card,
 			       const char *propname)
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 2d97091..255cad4 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -433,6 +433,8 @@
 static void dapm_kcontrol_free(struct snd_kcontrol *kctl)
 {
 	struct dapm_kcontrol_data *data = snd_kcontrol_chip(kctl);
+
+	list_del(&data->paths);
 	kfree(data->wlist);
 	kfree(data);
 }
@@ -724,18 +726,14 @@
 		item = 0;
 	}
 
-	for (i = 0; i < e->items; i++) {
-		if (!(strcmp(control_name, e->texts[i]))) {
-			path->name = e->texts[i];
-			if (i == item)
-				path->connect = 1;
-			else
-				path->connect = 0;
-			return 0;
-		}
-	}
+	i = match_string(e->texts, e->items, control_name);
+	if (i < 0)
+		return -ENODEV;
 
-	return -ENODEV;
+	path->name = e->texts[i];
+	path->connect = (i == item);
+	return 0;
+
 }
 
 /* set up initial codec paths */
@@ -1088,7 +1086,7 @@
 	list_for_each(it, widgets)
 		size++;
 
-	*list = kzalloc(sizeof(**list) + size * sizeof(*w), GFP_KERNEL);
+	*list = kzalloc(struct_size(*list, widgets, size), GFP_KERNEL);
 	if (*list == NULL)
 		return -ENOMEM;
 
diff --git a/sound/soc/soc-devres.c b/sound/soc/soc-devres.c
index a57921e..7ac745d 100644
--- a/sound/soc/soc-devres.c
+++ b/sound/soc/soc-devres.c
@@ -52,41 +52,6 @@
 }
 EXPORT_SYMBOL_GPL(devm_snd_soc_register_component);
 
-static void devm_platform_release(struct device *dev, void *res)
-{
-	snd_soc_unregister_platform(*(struct device **)res);
-}
-
-/**
- * devm_snd_soc_register_platform - resource managed platform registration
- * @dev: Device used to manage platform
- * @platform_drv: platform to register
- *
- * Register a platform driver with automatic unregistration when the device is
- * unregistered.
- */
-int devm_snd_soc_register_platform(struct device *dev,
-			const struct snd_soc_platform_driver *platform_drv)
-{
-	struct device **ptr;
-	int ret;
-
-	ptr = devres_alloc(devm_platform_release, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return -ENOMEM;
-
-	ret = snd_soc_register_platform(dev, platform_drv);
-	if (ret == 0) {
-		*ptr = dev;
-		devres_add(dev, ptr);
-	} else {
-		devres_free(ptr);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(devm_snd_soc_register_platform);
-
 static void devm_card_release(struct device *dev, void *res)
 {
 	snd_soc_unregister_card(*(struct snd_soc_card **)res);
diff --git a/sound/soc/soc-io.c b/sound/soc/soc-io.c
index d36a192..026cd53 100644
--- a/sound/soc/soc-io.c
+++ b/sound/soc/soc-io.c
@@ -32,8 +32,6 @@
 
 	if (component->regmap)
 		ret = regmap_read(component->regmap, reg, val);
-	else if (component->read)
-		ret = component->read(component, reg, val);
 	else if (component->driver->read) {
 		*val = component->driver->read(component, reg);
 		ret = 0;
@@ -72,8 +70,6 @@
 {
 	if (component->regmap)
 		return regmap_write(component->regmap, reg, val);
-	else if (component->write)
-		return component->write(component, reg, val);
 	else if (component->driver->write)
 		return component->driver->write(component, reg, val);
 	else
@@ -209,82 +205,3 @@
 	return old != new;
 }
 EXPORT_SYMBOL_GPL(snd_soc_component_test_bits);
-
-unsigned int snd_soc_read(struct snd_soc_codec *codec, unsigned int reg)
-{
-	unsigned int val;
-	int ret;
-
-	ret = snd_soc_component_read(&codec->component, reg, &val);
-	if (ret < 0)
-		return -1;
-
-	return val;
-}
-EXPORT_SYMBOL_GPL(snd_soc_read);
-
-int snd_soc_write(struct snd_soc_codec *codec, unsigned int reg,
-	unsigned int val)
-{
-	return snd_soc_component_write(&codec->component, reg, val);
-}
-EXPORT_SYMBOL_GPL(snd_soc_write);
-
-/**
- * snd_soc_update_bits - update codec register bits
- * @codec: audio codec
- * @reg: codec register
- * @mask: register mask
- * @value: new value
- *
- * Writes new register value.
- *
- * Returns 1 for change, 0 for no change, or negative error code.
- */
-int snd_soc_update_bits(struct snd_soc_codec *codec, unsigned int reg,
-				unsigned int mask, unsigned int value)
-{
-	return snd_soc_component_update_bits(&codec->component, reg, mask,
-		value);
-}
-EXPORT_SYMBOL_GPL(snd_soc_update_bits);
-
-/**
- * snd_soc_test_bits - test register for change
- * @codec: audio codec
- * @reg: codec register
- * @mask: register mask
- * @value: new value
- *
- * Tests a register with a new value and checks if the new value is
- * different from the old value.
- *
- * Returns 1 for change else 0.
- */
-int snd_soc_test_bits(struct snd_soc_codec *codec, unsigned int reg,
-				unsigned int mask, unsigned int value)
-{
-	return snd_soc_component_test_bits(&codec->component, reg, mask, value);
-}
-EXPORT_SYMBOL_GPL(snd_soc_test_bits);
-
-int snd_soc_platform_read(struct snd_soc_platform *platform,
-					unsigned int reg)
-{
-	unsigned int val;
-	int ret;
-
-	ret = snd_soc_component_read(&platform->component, reg, &val);
-	if (ret < 0)
-		return -1;
-
-	return val;
-}
-EXPORT_SYMBOL_GPL(snd_soc_platform_read);
-
-int snd_soc_platform_write(struct snd_soc_platform *platform,
-					 unsigned int reg, unsigned int val)
-{
-	return snd_soc_component_write(&platform->component, reg, val);
-}
-EXPORT_SYMBOL_GPL(snd_soc_platform_write);
diff --git a/sound/soc/soc-jack.c b/sound/soc/soc-jack.c
index 99902ae1..b2b1604 100644
--- a/sound/soc/soc-jack.c
+++ b/sound/soc/soc-jack.c
@@ -29,24 +29,6 @@
 };
 
 /**
- * snd_soc_codec_set_jack - configure codec jack.
- * @codec: CODEC
- * @jack: structure to use for the jack
- * @data: can be used if codec driver need extra data for configuring jack
- *
- * Configures and enables jack detection function.
- */
-int snd_soc_codec_set_jack(struct snd_soc_codec *codec,
-	struct snd_soc_jack *jack, void *data)
-{
-	if (codec->driver->set_jack)
-		return codec->driver->set_jack(codec, jack, data);
-	else
-		return -ENOTSUPP;
-}
-EXPORT_SYMBOL_GPL(snd_soc_codec_set_jack);
-
-/**
  * snd_soc_component_set_jack - configure component jack.
  * @component: COMPONENTs
  * @jack: structure to use for the jack
@@ -57,10 +39,6 @@
 int snd_soc_component_set_jack(struct snd_soc_component *component,
 			       struct snd_soc_jack *jack, void *data)
 {
-	/* will be removed */
-	if (component->set_jack)
-		return component->set_jack(component, jack, data);
-
 	if (component->driver->set_jack)
 		return component->driver->set_jack(component, jack, data);
 
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 68d9dc9..5e7ae47 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -135,7 +135,6 @@
 {
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_component *component;
-	int i;
 	bool ignore = true;
 
 	if (!rtd->pmdown_time || rtd->dai_link->ignore_pmdown_time)
@@ -147,10 +146,6 @@
 		ignore &= !component->driver->use_pmdown_time;
 	}
 
-	/* this will be removed */
-	for (i = 0; i < rtd->num_codecs; i++)
-		ignore &= rtd->codec_dais[i]->component->ignore_pmdown_time;
-
 	return ignore;
 }
 
@@ -456,13 +451,12 @@
 /*
  * Called by ALSA when a PCM substream is opened, the runtime->hw record is
  * then initialized and any private data can be allocated. This also calls
- * startup for the cpu DAI, platform, machine and codec DAI.
+ * startup for the cpu DAI, component, machine and codec DAI.
  */
 static int soc_pcm_open(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_pcm_runtime *runtime = substream->runtime;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
@@ -492,23 +486,10 @@
 		}
 	}
 
-	if (platform && platform->driver->ops && platform->driver->ops->open) {
-		ret = platform->driver->ops->open(substream);
-		if (ret < 0) {
-			dev_err(platform->dev, "ASoC: can't open platform"
-				" %s: %d\n", platform->component.name, ret);
-			goto platform_err;
-		}
-	}
-
 	ret = 0;
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->ops ||
 		    !component->driver->ops->open)
 			continue;
@@ -517,7 +498,7 @@
 		if (__ret < 0) {
 			dev_err(component->dev,
 				"ASoC: can't open component %s: %d\n",
-				component->name, ret);
+				component->name, __ret);
 			ret = __ret;
 		}
 	}
@@ -634,10 +615,6 @@
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->ops ||
 		    !component->driver->ops->close)
 			continue;
@@ -645,10 +622,6 @@
 		component->driver->ops->close(substream);
 	}
 
-	if (platform && platform->driver->ops && platform->driver->ops->close)
-		platform->driver->ops->close(substream);
-
-platform_err:
 	if (cpu_dai->driver->ops->shutdown)
 		cpu_dai->driver->ops->shutdown(substream, cpu_dai);
 out:
@@ -701,13 +674,12 @@
 
 /*
  * Called by ALSA when a PCM substream is closed. Private data can be
- * freed here. The cpu DAI, codec DAI, machine and platform are also
+ * freed here. The cpu DAI, codec DAI, machine and components are also
  * shutdown.
  */
 static int soc_pcm_close(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
@@ -742,16 +714,9 @@
 	if (rtd->dai_link->ops->shutdown)
 		rtd->dai_link->ops->shutdown(substream);
 
-	if (platform && platform->driver->ops && platform->driver->ops->close)
-		platform->driver->ops->close(substream);
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->ops ||
 		    !component->driver->ops->close)
 			continue;
@@ -805,7 +770,6 @@
 static int soc_pcm_prepare(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
@@ -823,22 +787,9 @@
 		}
 	}
 
-	if (platform && platform->driver->ops && platform->driver->ops->prepare) {
-		ret = platform->driver->ops->prepare(substream);
-		if (ret < 0) {
-			dev_err(platform->dev, "ASoC: platform prepare error:"
-				" %d\n", ret);
-			goto out;
-		}
-	}
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->ops ||
 		    !component->driver->ops->prepare)
 			continue;
@@ -932,7 +883,6 @@
 				struct snd_pcm_hw_params *params)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
@@ -994,23 +944,10 @@
 	if (ret < 0)
 		goto interface_err;
 
-	if (platform && platform->driver->ops && platform->driver->ops->hw_params) {
-		ret = platform->driver->ops->hw_params(substream, params);
-		if (ret < 0) {
-			dev_err(platform->dev, "ASoC: %s hw params failed: %d\n",
-			       platform->component.name, ret);
-			goto platform_err;
-		}
-	}
-
 	ret = 0;
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->ops ||
 		    !component->driver->ops->hw_params)
 			continue;
@@ -1019,7 +956,7 @@
 		if (__ret < 0) {
 			dev_err(component->dev,
 				"ASoC: %s hw params failed: %d\n",
-				component->name, ret);
+				component->name, __ret);
 			ret = __ret;
 		}
 	}
@@ -1043,10 +980,6 @@
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->ops ||
 		    !component->driver->ops->hw_free)
 			continue;
@@ -1054,10 +987,6 @@
 		component->driver->ops->hw_free(substream);
 	}
 
-	if (platform && platform->driver->ops && platform->driver->ops->hw_free)
-		platform->driver->ops->hw_free(substream);
-
-platform_err:
 	if (cpu_dai->driver->ops->hw_free)
 		cpu_dai->driver->ops->hw_free(substream, cpu_dai);
 
@@ -1085,7 +1014,6 @@
 static int soc_pcm_hw_free(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
@@ -1123,18 +1051,10 @@
 	if (rtd->dai_link->ops->hw_free)
 		rtd->dai_link->ops->hw_free(substream);
 
-	/* free any DMA resources */
-	if (platform && platform->driver->ops && platform->driver->ops->hw_free)
-		platform->driver->ops->hw_free(substream);
-
 	/* free any component resources */
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->ops ||
 		    !component->driver->ops->hw_free)
 			continue;
@@ -1159,7 +1079,6 @@
 static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
@@ -1176,19 +1095,9 @@
 		}
 	}
 
-	if (platform && platform->driver->ops && platform->driver->ops->trigger) {
-		ret = platform->driver->ops->trigger(substream, cmd);
-		if (ret < 0)
-			return ret;
-	}
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->ops ||
 		    !component->driver->ops->trigger)
 			continue;
@@ -1240,13 +1149,12 @@
 }
 /*
  * soc level wrapper for pointer callback
- * If cpu_dai, codec_dai, platform driver has the delay callback, than
+ * If cpu_dai, codec_dai, component driver has the delay callback, then
  * the runtime->delay will be updated accordingly.
  */
 static snd_pcm_uframes_t soc_pcm_pointer(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
@@ -1257,16 +1165,9 @@
 	snd_pcm_sframes_t codec_delay = 0;
 	int i;
 
-	if (platform && platform->driver->ops && platform->driver->ops->pointer)
-		offset = platform->driver->ops->pointer(substream);
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->ops ||
 		    !component->driver->ops->pointer)
 			continue;
@@ -1878,14 +1779,15 @@
 
 		/* Symmetry only applies if we've got an active stream. */
 		if (rtd->cpu_dai->active) {
-			err = soc_pcm_apply_symmetry(be_substream, rtd->cpu_dai);
+			err = soc_pcm_apply_symmetry(fe_substream,
+						     rtd->cpu_dai);
 			if (err < 0)
 				return err;
 		}
 
 		for (i = 0; i < rtd->num_codecs; i++) {
 			if (rtd->codec_dais[i]->active) {
-				err = soc_pcm_apply_symmetry(be_substream,
+				err = soc_pcm_apply_symmetry(fe_substream,
 							     rtd->codec_dais[i]);
 				if (err < 0)
 					return err;
@@ -1965,8 +1867,10 @@
 			continue;
 
 		if ((be->dpcm[stream].state != SND_SOC_DPCM_STATE_HW_FREE) &&
-		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN))
-			continue;
+		    (be->dpcm[stream].state != SND_SOC_DPCM_STATE_OPEN)) {
+			soc_pcm_hw_free(be_substream);
+			be->dpcm[stream].state = SND_SOC_DPCM_STATE_HW_FREE;
+		}
 
 		dev_dbg(be->dev, "ASoC: close BE %s\n",
 			be->dai_link->name);
@@ -2470,20 +2374,12 @@
 		     unsigned int cmd, void *arg)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 
-	if (platform && platform->driver->ops && platform->driver->ops->ioctl)
-		return platform->driver->ops->ioctl(substream, cmd, arg);
-
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		/* ignore duplication for now */
-		if (platform && (component == &platform->component))
-			continue;
-
 		if (!component->driver->ops ||
 		    !component->driver->ops->ioctl)
 			continue;
@@ -2847,8 +2743,8 @@
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		if (component->pcm_free)
-			component->pcm_free(component, pcm);
+		if (component->driver->pcm_free)
+			component->driver->pcm_free(pcm);
 	}
 }
 
@@ -2987,7 +2883,6 @@
 /* create a new pcm */
 int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 {
-	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_dai *codec_dai;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 	struct snd_soc_component *component;
@@ -3106,16 +3001,6 @@
 			rtd->ops.mmap		= soc_rtdcom_mmap;
 	}
 
-	/* overwrite */
-	if (platform && platform->driver->ops) {
-		rtd->ops.ack		= platform->driver->ops->ack;
-		rtd->ops.copy_user	= platform->driver->ops->copy_user;
-		rtd->ops.copy_kernel	= platform->driver->ops->copy_kernel;
-		rtd->ops.fill_silence	= platform->driver->ops->fill_silence;
-		rtd->ops.page		= platform->driver->ops->page;
-		rtd->ops.mmap		= platform->driver->ops->mmap;
-	}
-
 	if (playback)
 		snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &rtd->ops);
 
@@ -3125,10 +3010,10 @@
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		if (!component->pcm_new)
+		if (!component->driver->pcm_new)
 			continue;
 
-		ret = component->pcm_new(component, rtd);
+		ret = component->driver->pcm_new(rtd);
 		if (ret < 0) {
 			dev_err(component->dev,
 				"ASoC: pcm constructor failed: %d\n",
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 986b8b2..3fd5d9c 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -54,62 +54,6 @@
 #define SOC_TPLG_PASS_START	SOC_TPLG_PASS_MANIFEST
 #define SOC_TPLG_PASS_END	SOC_TPLG_PASS_LINK
 
-/*
- * Old version of ABI structs, supported for backward compatibility.
- */
-
-/* Manifest v4 */
-struct snd_soc_tplg_manifest_v4 {
-	__le32 size;		/* in bytes of this structure */
-	__le32 control_elems;	/* number of control elements */
-	__le32 widget_elems;	/* number of widget elements */
-	__le32 graph_elems;	/* number of graph elements */
-	__le32 pcm_elems;	/* number of PCM elements */
-	__le32 dai_link_elems;	/* number of DAI link elements */
-	struct snd_soc_tplg_private priv;
-} __packed;
-
-/* Stream Capabilities v4 */
-struct snd_soc_tplg_stream_caps_v4 {
-	__le32 size;		/* in bytes of this structure */
-	char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
-	__le64 formats;	/* supported formats SNDRV_PCM_FMTBIT_* */
-	__le32 rates;		/* supported rates SNDRV_PCM_RATE_* */
-	__le32 rate_min;	/* min rate */
-	__le32 rate_max;	/* max rate */
-	__le32 channels_min;	/* min channels */
-	__le32 channels_max;	/* max channels */
-	__le32 periods_min;	/* min number of periods */
-	__le32 periods_max;	/* max number of periods */
-	__le32 period_size_min;	/* min period size bytes */
-	__le32 period_size_max;	/* max period size bytes */
-	__le32 buffer_size_min;	/* min buffer size bytes */
-	__le32 buffer_size_max;	/* max buffer size bytes */
-} __packed;
-
-/* PCM v4 */
-struct snd_soc_tplg_pcm_v4 {
-	__le32 size;		/* in bytes of this structure */
-	char pcm_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
-	char dai_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
-	__le32 pcm_id;		/* unique ID - used to match with DAI link */
-	__le32 dai_id;		/* unique ID - used to match */
-	__le32 playback;	/* supports playback mode */
-	__le32 capture;		/* supports capture mode */
-	__le32 compress;	/* 1 = compressed; 0 = PCM */
-	struct snd_soc_tplg_stream stream[SND_SOC_TPLG_STREAM_CONFIG_MAX]; /* for DAI link */
-	__le32 num_streams;	/* number of streams */
-	struct snd_soc_tplg_stream_caps_v4 caps[2]; /* playback and capture for DAI */
-} __packed;
-
-/* Physical link config v4 */
-struct snd_soc_tplg_link_config_v4 {
-	__le32 size;            /* in bytes of this structure */
-	__le32 id;              /* unique ID - used to match */
-	struct snd_soc_tplg_stream stream[SND_SOC_TPLG_STREAM_CONFIG_MAX]; /* supported configs playback and captrure */
-	__le32 num_streams;     /* number of streams */
-} __packed;
-
 /* topology context */
 struct soc_tplg {
 	const struct firmware *fw;
@@ -1754,6 +1698,9 @@
 		set_stream_info(stream, caps);
 	}
 
+	if (pcm->compress)
+		dai_drv->compress_new = snd_soc_new_compress;
+
 	/* pass control to component driver for optional further init */
 	ret = soc_tplg_dai_load(tplg, dai_drv);
 	if (ret < 0) {
@@ -2006,6 +1953,21 @@
 
 		link->dai_fmt = hw_config->fmt & SND_SOC_DAIFMT_FORMAT_MASK;
 
+		/* clock gating */
+		switch (hw_config->clock_gated) {
+		case SND_SOC_TPLG_DAI_CLK_GATE_GATED:
+			link->dai_fmt |= SND_SOC_DAIFMT_GATED;
+			break;
+
+		case SND_SOC_TPLG_DAI_CLK_GATE_CONT:
+			link->dai_fmt |= SND_SOC_DAIFMT_CONT;
+			break;
+
+		default:
+			/* ignore the value */
+			break;
+		}
+
 		/* clock signal polarity */
 		invert_bclk = hw_config->invert_bclk;
 		invert_fsync = hw_config->invert_fsync;
@@ -2019,13 +1981,15 @@
 			link->dai_fmt |= SND_SOC_DAIFMT_IB_IF;
 
 		/* clock masters */
-		bclk_master = hw_config->bclk_master;
-		fsync_master = hw_config->fsync_master;
-		if (!bclk_master && !fsync_master)
+		bclk_master = (hw_config->bclk_master ==
+			       SND_SOC_TPLG_BCLK_CM);
+		fsync_master = (hw_config->fsync_master ==
+				SND_SOC_TPLG_FSYNC_CM);
+		if (bclk_master && fsync_master)
 			link->dai_fmt |= SND_SOC_DAIFMT_CBM_CFM;
-		else if (bclk_master && !fsync_master)
-			link->dai_fmt |= SND_SOC_DAIFMT_CBS_CFM;
 		else if (!bclk_master && fsync_master)
+			link->dai_fmt |= SND_SOC_DAIFMT_CBS_CFM;
+		else if (bclk_master && !fsync_master)
 			link->dai_fmt |= SND_SOC_DAIFMT_CBM_CFS;
 		else
 			link->dai_fmt |= SND_SOC_DAIFMT_CBS_CFS;
@@ -2293,8 +2257,11 @@
 	*manifest = NULL;
 
 	if (src->size != sizeof(*src_v4)) {
-		dev_err(tplg->dev, "ASoC: invalid manifest size\n");
-		return -EINVAL;
+		dev_warn(tplg->dev, "ASoC: invalid manifest size %d\n",
+			 src->size);
+		if (src->size)
+			return -EINVAL;
+		src->size = sizeof(*src_v4);
 	}
 
 	dev_warn(tplg->dev, "ASoC: old version of manifest\n");
diff --git a/sound/soc/uniphier/aio-compress.c b/sound/soc/uniphier/aio-compress.c
index 4c1027a..17f773a 100644
--- a/sound/soc/uniphier/aio-compress.c
+++ b/sound/soc/uniphier/aio-compress.c
@@ -3,19 +3,6 @@
 // Socionext UniPhier AIO Compress Audio driver.
 //
 // Copyright (c) 2017-2018 Socionext Inc.
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; version 2
-// of the License.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, see <http://www.gnu.org/licenses/>.
 
 #include <linux/bitfield.h>
 #include <linux/circ_buf.h>
diff --git a/sound/soc/uniphier/aio-core.c b/sound/soc/uniphier/aio-core.c
index 6d50042..638cb3f 100644
--- a/sound/soc/uniphier/aio-core.c
+++ b/sound/soc/uniphier/aio-core.c
@@ -3,19 +3,6 @@
 // Socionext UniPhier AIO ALSA common driver.
 //
 // Copyright (c) 2016-2018 Socionext Inc.
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; version 2
-// of the License.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, see <http://www.gnu.org/licenses/>.
 
 #include <linux/bitfield.h>
 #include <linux/errno.h>
@@ -673,6 +660,64 @@
 }
 
 /**
+ * aio_port_get_volume - get volume of AIO port block
+ * @sub: the AIO substream pointer
+ *
+ * Return: current volume, range is 0x0000 - 0xffff
+ */
+int aio_port_get_volume(struct uniphier_aio_sub *sub)
+{
+	struct regmap *r = sub->aio->chip->regmap;
+	u32 v;
+
+	regmap_read(r, OPORTMXTYVOLGAINSTATUS(sub->swm->oport.map, 0), &v);
+
+	return FIELD_GET(OPORTMXTYVOLGAINSTATUS_CUR_MASK, v);
+}
+
+/**
+ * aio_port_set_volume - set volume of AIO port block
+ * @sub: the AIO substream pointer
+ * @vol: target volume, range is 0x0000 - 0xffff.
+ *
+ * Change digital volume and perfome fade-out/fade-in effect for specified
+ * output slot of port. Gained PCM value can calculate as the following:
+ *   Gained = Original * vol / 0x4000
+ */
+void aio_port_set_volume(struct uniphier_aio_sub *sub, int vol)
+{
+	struct regmap *r = sub->aio->chip->regmap;
+	int oport_map = sub->swm->oport.map;
+	int cur, diff, slope = 0, fs;
+
+	if (sub->swm->dir == PORT_DIR_INPUT)
+		return;
+
+	cur = aio_port_get_volume(sub);
+	diff = abs(vol - cur);
+	fs = params_rate(&sub->params);
+	if (fs)
+		slope = diff / AUD_VOL_FADE_TIME * 1000 / fs;
+	slope = max(1, slope);
+
+	regmap_update_bits(r, OPORTMXTYVOLPARA1(oport_map, 0),
+			   OPORTMXTYVOLPARA1_SLOPEU_MASK, slope << 16);
+	regmap_update_bits(r, OPORTMXTYVOLPARA2(oport_map, 0),
+			   OPORTMXTYVOLPARA2_TARGET_MASK, vol);
+
+	if (cur < vol)
+		regmap_update_bits(r, OPORTMXTYVOLPARA2(oport_map, 0),
+				   OPORTMXTYVOLPARA2_FADE_MASK,
+				   OPORTMXTYVOLPARA2_FADE_FADEIN);
+	else
+		regmap_update_bits(r, OPORTMXTYVOLPARA2(oport_map, 0),
+				   OPORTMXTYVOLPARA2_FADE_MASK,
+				   OPORTMXTYVOLPARA2_FADE_FADEOUT);
+
+	regmap_write(r, AOUTFADECTR0, BIT(oport_map));
+}
+
+/**
  * aio_if_set_param - set parameters of AIO DMA I/F block
  * @sub: the AIO substream pointer
  * @pass_through: Zero if sound data is LPCM, otherwise if data is not LPCM.
diff --git a/sound/soc/uniphier/aio-cpu.c b/sound/soc/uniphier/aio-cpu.c
index 1e5eb8e..80daec1 100644
--- a/sound/soc/uniphier/aio-cpu.c
+++ b/sound/soc/uniphier/aio-cpu.c
@@ -3,19 +3,6 @@
 // Socionext UniPhier AIO ALSA CPU DAI driver.
 //
 // Copyright (c) 2016-2018 Socionext Inc.
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; version 2
-// of the License.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, see <http://www.gnu.org/licenses/>.
 
 #include <linux/clk.h>
 #include <linux/errno.h>
@@ -45,6 +32,35 @@
 	return chip->plls[pll_id].enable;
 }
 
+/**
+ * find_volume - find volume supported HW port by HW port number
+ * @chip: the AIO chip pointer
+ * @oport_hw: HW port number, one of AUD_HW_XXXX
+ *
+ * Find AIO device from device list by HW port number. Volume feature is
+ * available only in Output and PCM ports, this limitation comes from HW
+ * specifications.
+ *
+ * Return: The pointer of AIO substream if successful, otherwise NULL on error.
+ */
+static struct uniphier_aio_sub *find_volume(struct uniphier_aio_chip *chip,
+					    int oport_hw)
+{
+	int i;
+
+	for (i = 0; i < chip->num_aios; i++) {
+		struct uniphier_aio_sub *sub = &chip->aios[i].sub[0];
+
+		if (!sub->swm)
+			continue;
+
+		if (sub->swm->oport.hw == oport_hw)
+			return sub;
+	}
+
+	return NULL;
+}
+
 static bool match_spec(const struct uniphier_aio_spec *spec,
 		       const char *name, int dir)
 {
@@ -300,6 +316,7 @@
 	sub->setting = 1;
 
 	aio_port_reset(sub);
+	aio_port_set_volume(sub, sub->vol);
 	aio_src_reset(sub);
 
 	return 0;
@@ -386,6 +403,8 @@
 
 		sub->swm = &spec->swm;
 		sub->spec = spec;
+
+		sub->vol = AUD_VOL_INIT;
 	}
 
 	aio_iecout_set_enable(aio->chip, true);
@@ -462,8 +481,116 @@
 }
 EXPORT_SYMBOL_GPL(uniphier_aio_dai_resume);
 
+static int uniphier_aio_vol_info(struct snd_kcontrol *kcontrol,
+				 struct snd_ctl_elem_info *uinfo)
+{
+	uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER;
+	uinfo->count = 1;
+	uinfo->value.integer.min = 0;
+	uinfo->value.integer.max = AUD_VOL_MAX;
+
+	return 0;
+}
+
+static int uniphier_aio_vol_get(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+	struct uniphier_aio_chip *chip = snd_soc_component_get_drvdata(comp);
+	struct uniphier_aio_sub *sub;
+	int oport_hw = kcontrol->private_value;
+
+	sub = find_volume(chip, oport_hw);
+	if (!sub)
+		return 0;
+
+	ucontrol->value.integer.value[0] = sub->vol;
+
+	return 0;
+}
+
+static int uniphier_aio_vol_put(struct snd_kcontrol *kcontrol,
+				struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+	struct uniphier_aio_chip *chip = snd_soc_component_get_drvdata(comp);
+	struct uniphier_aio_sub *sub;
+	int oport_hw = kcontrol->private_value;
+
+	sub = find_volume(chip, oport_hw);
+	if (!sub)
+		return 0;
+
+	if (sub->vol == ucontrol->value.integer.value[0])
+		return 0;
+	sub->vol = ucontrol->value.integer.value[0];
+
+	aio_port_set_volume(sub, sub->vol);
+
+	return 0;
+}
+
+static const struct snd_kcontrol_new uniphier_aio_controls[] = {
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+		.name = "HPCMOUT1 Volume",
+		.info = uniphier_aio_vol_info,
+		.get = uniphier_aio_vol_get,
+		.put = uniphier_aio_vol_put,
+		.private_value = AUD_HW_HPCMOUT1,
+	},
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+		.name = "PCMOUT1 Volume",
+		.info = uniphier_aio_vol_info,
+		.get = uniphier_aio_vol_get,
+		.put = uniphier_aio_vol_put,
+		.private_value = AUD_HW_PCMOUT1,
+	},
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+		.name = "PCMOUT2 Volume",
+		.info = uniphier_aio_vol_info,
+		.get = uniphier_aio_vol_get,
+		.put = uniphier_aio_vol_put,
+		.private_value = AUD_HW_PCMOUT2,
+	},
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+		.name = "PCMOUT3 Volume",
+		.info = uniphier_aio_vol_info,
+		.get = uniphier_aio_vol_get,
+		.put = uniphier_aio_vol_put,
+		.private_value = AUD_HW_PCMOUT3,
+	},
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+		.name = "HIECOUT1 Volume",
+		.info = uniphier_aio_vol_info,
+		.get = uniphier_aio_vol_get,
+		.put = uniphier_aio_vol_put,
+		.private_value = AUD_HW_HIECOUT1,
+	},
+	{
+		.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+		.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+		.name = "IECOUT1 Volume",
+		.info = uniphier_aio_vol_info,
+		.get = uniphier_aio_vol_get,
+		.put = uniphier_aio_vol_put,
+		.private_value = AUD_HW_IECOUT1,
+	},
+};
+
 static const struct snd_soc_component_driver uniphier_aio_component = {
 	.name = "uniphier-aio",
+	.controls = uniphier_aio_controls,
+	.num_controls = ARRAY_SIZE(uniphier_aio_controls),
 };
 
 int uniphier_aio_probe(struct platform_device *pdev)
diff --git a/sound/soc/uniphier/aio-dma.c b/sound/soc/uniphier/aio-dma.c
index ef7bafa..4ec6b65 100644
--- a/sound/soc/uniphier/aio-dma.c
+++ b/sound/soc/uniphier/aio-dma.c
@@ -3,19 +3,6 @@
 // Socionext UniPhier AIO DMA driver.
 //
 // Copyright (c) 2016-2018 Socionext Inc.
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; version 2
-// of the License.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, see <http://www.gnu.org/licenses/>.
 
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
diff --git a/sound/soc/uniphier/aio-ld11.c b/sound/soc/uniphier/aio-ld11.c
index 4c4dd3d..ab04d33 100644
--- a/sound/soc/uniphier/aio-ld11.c
+++ b/sound/soc/uniphier/aio-ld11.c
@@ -3,19 +3,6 @@
 // Socionext UniPhier AIO ALSA driver for LD11/LD20.
 //
 // Copyright (c) 2016-2018 Socionext Inc.
-//
-// This program is free software; you can redistribute it and/or
-// modify it under the terms of the GNU General Public License
-// as published by the Free Software Foundation; version 2
-// of the License.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program; if not, see <http://www.gnu.org/licenses/>.
 
 #include <linux/module.h>
 
diff --git a/sound/soc/uniphier/aio-reg.h b/sound/soc/uniphier/aio-reg.h
index 136d356..45fdc6a 100644
--- a/sound/soc/uniphier/aio-reg.h
+++ b/sound/soc/uniphier/aio-reg.h
@@ -3,19 +3,6 @@
  * Socionext UniPhier AIO ALSA driver.
  *
  * Copyright (c) 2016-2018 Socionext Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef SND_UNIPHIER_AIO_REG_H__
@@ -182,6 +169,7 @@
 #define PBINMXPAUSECTR1(n)               (0x20208 + 0x40 * (n))
 
 /* AOUT */
+#define AOUTFADECTR0                     0x40020
 #define AOUTENCTR0                       0x40040
 #define AOUTENCTR1                       0x40044
 #define AOUTENCTR2                       0x40048
@@ -192,6 +180,9 @@
 #define AOUTSRCRSTCTR1                   0x400c4
 #define AOUTSRCRSTCTR2                   0x400c8
 
+/* AOUT PCMOUT has 5 slots, slot0-3: D0-3, slot4: DMIX */
+#define OPORT_SLOT_MAX                     5
+
 /* AOUT(PCMOUTN) */
 #define OPORTMXCTR1(n)                   (0x42000 + 0x400 * (n))
 #define   OPORTMXCTR1_I2SLRSEL_MASK        (0x11 << 10)
@@ -372,11 +363,30 @@
 #define   OPORTMXMASK_XCKMSK_ON            (0x0 << 0)
 #define   OPORTMXMASK_XCKMSK_OFF           (0x7 << 0)
 #define OPORTMXDEBUG(n)                  (0x420fc + 0x400 * (n))
-#define OPORTMXT0RSTCTR(n)               (0x4211c + 0x400 * (n))
-#define OPORTMXT1RSTCTR(n)               (0x4213c + 0x400 * (n))
-#define OPORTMXT2RSTCTR(n)               (0x4215c + 0x400 * (n))
-#define OPORTMXT3RSTCTR(n)               (0x4217c + 0x400 * (n))
-#define OPORTMXT4RSTCTR(n)               (0x4219c + 0x400 * (n))
+#define OPORTMXTYVOLPARA1(n, m)          (0x42100 + 0x400 * (n) + 0x20 * (m))
+#define   OPORTMXTYVOLPARA1_SLOPEU_MASK    GENMASK(31, 16)
+#define OPORTMXTYVOLPARA2(n, m)          (0x42104 + 0x400 * (n) + 0x20 * (m))
+#define   OPORTMXTYVOLPARA2_FADE_MASK      GENMASK(17, 16)
+#define   OPORTMXTYVOLPARA2_FADE_NOOP      (0x0 << 16)
+#define   OPORTMXTYVOLPARA2_FADE_FADEOUT   (0x1 << 16)
+#define   OPORTMXTYVOLPARA2_FADE_FADEIN    (0x2 << 16)
+#define   OPORTMXTYVOLPARA2_TARGET_MASK    GENMASK(15, 0)
+#define OPORTMXTYVOLGAINSTATUS(n, m)     (0x42108 + 0x400 * (n) + 0x20 * (m))
+#define   OPORTMXTYVOLGAINSTATUS_CUR_MASK  GENMASK(15, 0)
+#define OPORTMXTYSLOTCTR(n, m)           (0x42114 + 0x400 * (n) + 0x20 * (m))
+#define   OPORTMXTYSLOTCTR_SLOTSEL_MASK    GENMASK(11, 8)
+#define   OPORTMXTYSLOTCTR_SLOTSEL_SLOT0   (0x8 << 8)
+#define   OPORTMXTYSLOTCTR_SLOTSEL_SLOT1   (0x9 << 8)
+#define   OPORTMXTYSLOTCTR_SLOTSEL_SLOT2   (0xa << 8)
+#define   OPORTMXTYSLOTCTR_SLOTSEL_SLOT3   (0xb << 8)
+#define   OPORTMXTYSLOTCTR_SLOTSEL_SLOT4   (0xc << 8)
+#define   OPORTMXT0SLOTCTR_MUTEOFF_MASK    BIT(1)
+#define   OPORTMXT0SLOTCTR_MUTEOFF_MUTE    (0x0 << 1)
+#define   OPORTMXT0SLOTCTR_MUTEOFF_UNMUTE  (0x1 << 1)
+#define OPORTMXTYRSTCTR(n, m)            (0x4211c + 0x400 * (n) + 0x20 * (m))
+#define   OPORTMXT0RSTCTR_RST_MASK         BIT(1)
+#define   OPORTMXT0RSTCTR_RST_OFF          (0x0 << 1)
+#define   OPORTMXT0RSTCTR_RST_ON           (0x1 << 1)
 
 #define SBF_(frame, shift)    (((frame) * 2 - 1) << shift)
 
diff --git a/sound/soc/uniphier/aio.h b/sound/soc/uniphier/aio.h
index 8cab4a5..aa89c2f 100644
--- a/sound/soc/uniphier/aio.h
+++ b/sound/soc/uniphier/aio.h
@@ -3,19 +3,6 @@
  * Socionext UniPhier AIO ALSA driver.
  *
  * Copyright (c) 2016-2018 Socionext Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef SND_UNIPHIER_AIO_H__
@@ -143,6 +130,10 @@
 #define AUD_PLLDIV_1_1    2
 #define AUD_PLLDIV_2_3    3
 
+#define AUD_VOL_INIT         0x4000 /* +0dB */
+#define AUD_VOL_MAX          0xffff /* +6dB */
+#define AUD_VOL_FADE_TIME    20 /* 20ms */
+
 #define AUD_RING_SIZE            (128 * 1024)
 
 #define AUD_MIN_FRAGMENT         4
@@ -244,6 +235,7 @@
 	/* For PCM audio */
 	struct snd_pcm_substream *substream;
 	struct snd_pcm_hw_params params;
+	int vol;
 
 	/* For compress audio */
 	struct snd_compr_stream *cstream;
@@ -336,6 +328,8 @@
 int aio_port_set_param(struct uniphier_aio_sub *sub, int pass_through,
 		       const struct snd_pcm_hw_params *params);
 void aio_port_set_enable(struct uniphier_aio_sub *sub, int enable);
+int aio_port_get_volume(struct uniphier_aio_sub *sub);
+void aio_port_set_volume(struct uniphier_aio_sub *sub, int vol);
 int aio_if_set_param(struct uniphier_aio_sub *sub, int pass_through);
 int aio_oport_set_stream_type(struct uniphier_aio_sub *sub,
 			      enum IEC61937_PC pc);
diff --git a/sound/soc/uniphier/evea.c b/sound/soc/uniphier/evea.c
index 73fd673..f9c1016 100644
--- a/sound/soc/uniphier/evea.c
+++ b/sound/soc/uniphier/evea.c
@@ -54,8 +54,21 @@
 	int switch_hp;
 };
 
+static const char * const linsw1_sel1_text[] = {
+	"LIN1", "LIN2", "LIN3"
+};
+
+static SOC_ENUM_SINGLE_DECL(linsw1_sel1_enum,
+	ALINSW1, ALINSW1_SEL1_SHIFT,
+	linsw1_sel1_text);
+
+static const struct snd_kcontrol_new linesw1_mux[] = {
+	SOC_DAPM_ENUM("Line In 1 Source", linsw1_sel1_enum),
+};
+
 static const struct snd_soc_dapm_widget evea_widgets[] = {
-	SND_SOC_DAPM_ADC("ADC", "Capture", SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_ADC("ADC", NULL, SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_MUX("Line In 1 Mux", SND_SOC_NOPM, 0, 0, linesw1_mux),
 	SND_SOC_DAPM_INPUT("LIN1_LP"),
 	SND_SOC_DAPM_INPUT("LIN1_RP"),
 	SND_SOC_DAPM_INPUT("LIN2_LP"),
@@ -63,7 +76,9 @@
 	SND_SOC_DAPM_INPUT("LIN3_LP"),
 	SND_SOC_DAPM_INPUT("LIN3_RP"),
 
-	SND_SOC_DAPM_DAC("DAC", "Playback", SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_DAC("DAC HP", NULL, SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_DAC("DAC LO1", NULL, SND_SOC_NOPM, 0, 0),
+	SND_SOC_DAPM_DAC("DAC LO2", NULL, SND_SOC_NOPM, 0, 0),
 	SND_SOC_DAPM_OUTPUT("HP1_L"),
 	SND_SOC_DAPM_OUTPUT("HP1_R"),
 	SND_SOC_DAPM_OUTPUT("LO2_L"),
@@ -71,17 +86,22 @@
 };
 
 static const struct snd_soc_dapm_route evea_routes[] = {
-	{ "ADC", NULL, "LIN1_LP" },
-	{ "ADC", NULL, "LIN1_RP" },
-	{ "ADC", NULL, "LIN2_LP" },
-	{ "ADC", NULL, "LIN2_RP" },
-	{ "ADC", NULL, "LIN3_LP" },
-	{ "ADC", NULL, "LIN3_RP" },
+	{ "Line In 1", NULL, "ADC" },
+	{ "ADC", NULL, "Line In 1 Mux" },
+	{ "Line In 1 Mux", "LIN1", "LIN1_LP" },
+	{ "Line In 1 Mux", "LIN1", "LIN1_RP" },
+	{ "Line In 1 Mux", "LIN2", "LIN2_LP" },
+	{ "Line In 1 Mux", "LIN2", "LIN2_RP" },
+	{ "Line In 1 Mux", "LIN3", "LIN3_LP" },
+	{ "Line In 1 Mux", "LIN3", "LIN3_RP" },
 
-	{ "HP1_L", NULL, "DAC" },
-	{ "HP1_R", NULL, "DAC" },
-	{ "LO2_L", NULL, "DAC" },
-	{ "LO2_R", NULL, "DAC" },
+	{ "DAC HP", NULL, "Headphone 1" },
+	{ "DAC LO1", NULL, "Line Out 1" },
+	{ "DAC LO2", NULL, "Line Out 2" },
+	{ "HP1_L", NULL, "DAC HP" },
+	{ "HP1_R", NULL, "DAC HP" },
+	{ "LO2_L", NULL, "DAC LO2" },
+	{ "LO2_R", NULL, "DAC LO2" },
 };
 
 static void evea_set_power_state_on(struct evea_priv *evea)
@@ -280,16 +300,7 @@
 	return evea_update_switch_hp(evea);
 }
 
-static const char * const linsw1_sel1_text[] = {
-	"LIN1", "LIN2", "LIN3"
-};
-
-static SOC_ENUM_SINGLE_DECL(linsw1_sel1_enum,
-	ALINSW1, ALINSW1_SEL1_SHIFT,
-	linsw1_sel1_text);
-
 static const struct snd_kcontrol_new evea_controls[] = {
-	SOC_ENUM("Line Capture Source", linsw1_sel1_enum),
 	SOC_SINGLE_BOOL_EXT("Line Capture Switch", 0,
 			    evea_get_switch_lin, evea_set_switch_lin),
 	SOC_SINGLE_BOOL_EXT("Line Playback Switch", 0,
diff --git a/sound/soc/zte/zx-i2s.c b/sound/soc/zte/zx-i2s.c
index 9a05659..0d36630 100644
--- a/sound/soc/zte/zx-i2s.c
+++ b/sound/soc/zte/zx-i2s.c
@@ -20,9 +20,6 @@
 #include <sound/core.h>
 #include <sound/dmaengine_pcm.h>
 #include <sound/initval.h>
-#include <sound/pcm.h>
-#include <sound/pcm_params.h>
-#include <sound/soc.h>
 
 #define ZX_I2S_PROCESS_CTRL	0x04
 #define ZX_I2S_TIMING_CTRL	0x08
@@ -197,7 +194,7 @@
 		val |= (ZX_I2S_TIMING_I2S | ZX_I2S_TIMING_LSB_JUSTIF);
 		break;
 	default:
-		dev_err(cpu_dai->dev, "Unknown i2s timeing\n");
+		dev_err(cpu_dai->dev, "Unknown i2s timing\n");
 		return -EINVAL;
 	}
 
diff --git a/sound/sound_core.c b/sound/sound_core.c
index b4efb22..40ad000 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -413,7 +413,7 @@
 		break;
 	}
 	return sound_insert_unit(&chains[chain], fops, -1, unit, max_unit,
-				 name, S_IRUSR | S_IWUSR, dev);
+				 name, 0600, dev);
 }
  
 EXPORT_SYMBOL(register_sound_special_device);
@@ -440,7 +440,7 @@
 int register_sound_mixer(const struct file_operations *fops, int dev)
 {
 	return sound_insert_unit(&chains[0], fops, dev, 0, 128,
-				 "mixer", S_IRUSR | S_IWUSR, NULL);
+				 "mixer", 0600, NULL);
 }
 
 EXPORT_SYMBOL(register_sound_mixer);
@@ -468,7 +468,7 @@
 int register_sound_dsp(const struct file_operations *fops, int dev)
 {
 	return sound_insert_unit(&chains[3], fops, dev, 3, 131,
-				 "dsp", S_IWUSR | S_IRUSR, NULL);
+				 "dsp", 0600, NULL);
 }
 
 EXPORT_SYMBOL(register_sound_dsp);
diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index abc7bd5..7609ece 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -2518,7 +2518,7 @@
 #ifdef DBRI_DEBUG
 	if (!snd_card_proc_new(card, "debug", &entry)) {
 		snd_info_set_text_ops(entry, dbri, dbri_debug_read);
-		entry->mode = S_IFREG | S_IRUGO;	/* Readable only. */
+		entry->mode = S_IFREG | 0444;	/* Readable only. */
 	}
 #endif
 }
@@ -2542,7 +2542,7 @@
 	dbri->irq = irq;
 
 	dbri->dma = dma_zalloc_coherent(&op->dev, sizeof(struct dbri_dma),
-					&dbri->dma_dvma, GFP_ATOMIC);
+					&dbri->dma_dvma, GFP_KERNEL);
 	if (!dbri->dma)
 		return -ENOMEM;
 
diff --git a/sound/usb/card.c b/sound/usb/card.c
index 4a1c6bb..a1ed798 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -86,6 +86,8 @@
 static bool autoclock = true;
 static char *quirk_alias[SNDRV_CARDS];
 
+bool snd_usb_use_vmalloc = true;
+
 module_param_array(index, int, NULL, 0444);
 MODULE_PARM_DESC(index, "Index value for the USB audio adapter.");
 module_param_array(id, charp, NULL, 0444);
@@ -105,6 +107,8 @@
 MODULE_PARM_DESC(autoclock, "Enable auto-clock selection for UAC2 devices (default: yes).");
 module_param_array(quirk_alias, charp, NULL, 0444);
 MODULE_PARM_DESC(quirk_alias, "Quirk aliases, e.g. 0123abcd:5678beef.");
+module_param_named(use_vmalloc, snd_usb_use_vmalloc, bool, 0444);
+MODULE_PARM_DESC(use_vmalloc, "Use vmalloc for PCM intermediate buffers (default: yes).");
 
 /*
  * we keep the snd_usb_audio_t instances by ourselves for merging
@@ -221,32 +225,13 @@
 	struct usb_device *dev = chip->dev;
 	struct usb_host_interface *host_iface;
 	struct usb_interface_descriptor *altsd;
-	void *control_header;
 	int i, protocol;
-	int rest_bytes;
 
 	/* find audiocontrol interface */
 	host_iface = &usb_ifnum_to_if(dev, ctrlif)->altsetting[0];
-	control_header = snd_usb_find_csint_desc(host_iface->extra,
-						 host_iface->extralen,
-						 NULL, UAC_HEADER);
 	altsd = get_iface_desc(host_iface);
 	protocol = altsd->bInterfaceProtocol;
 
-	if (!control_header) {
-		dev_err(&dev->dev, "cannot find UAC_HEADER\n");
-		return -EINVAL;
-	}
-
-	rest_bytes = (void *)(host_iface->extra + host_iface->extralen) -
-		control_header;
-
-	/* just to be sure -- this shouldn't hit at all */
-	if (rest_bytes <= 0) {
-		dev_err(&dev->dev, "invalid control header\n");
-		return -EINVAL;
-	}
-
 	switch (protocol) {
 	default:
 		dev_warn(&dev->dev,
@@ -255,7 +240,25 @@
 		/* fall through */
 
 	case UAC_VERSION_1: {
-		struct uac1_ac_header_descriptor *h1 = control_header;
+		struct uac1_ac_header_descriptor *h1;
+		int rest_bytes;
+
+		h1 = snd_usb_find_csint_desc(host_iface->extra,
+							 host_iface->extralen,
+							 NULL, UAC_HEADER);
+		if (!h1) {
+			dev_err(&dev->dev, "cannot find UAC_HEADER\n");
+			return -EINVAL;
+		}
+
+		rest_bytes = (void *)(host_iface->extra +
+				host_iface->extralen) - (void *)h1;
+
+		/* just to be sure -- this shouldn't hit at all */
+		if (rest_bytes <= 0) {
+			dev_err(&dev->dev, "invalid control header\n");
+			return -EINVAL;
+		}
 
 		if (rest_bytes < sizeof(*h1)) {
 			dev_err(&dev->dev, "too short v1 buffer descriptor\n");
@@ -308,6 +311,20 @@
 			return -EINVAL;
 		}
 
+		if (protocol == UAC_VERSION_3) {
+			int badd = assoc->bFunctionSubClass;
+
+			if (badd != UAC3_FUNCTION_SUBCLASS_FULL_ADC_3_0 &&
+			    (badd < UAC3_FUNCTION_SUBCLASS_GENERIC_IO ||
+			     badd > UAC3_FUNCTION_SUBCLASS_SPEAKERPHONE)) {
+				dev_err(&dev->dev,
+					"Unsupported UAC3 BADD profile\n");
+				return -EINVAL;
+			}
+
+			chip->badd_profile = badd;
+		}
+
 		for (i = 0; i < assoc->bInterfaceCount; i++) {
 			int intf = assoc->bFirstInterface + i;
 
@@ -329,8 +346,9 @@
  *
  */
 
-static int snd_usb_audio_free(struct snd_usb_audio *chip)
+static void snd_usb_audio_free(struct snd_card *card)
 {
+	struct snd_usb_audio *chip = card->private_data;
 	struct snd_usb_endpoint *ep, *n;
 
 	list_for_each_entry_safe(ep, n, &chip->ep_list, list)
@@ -339,108 +357,51 @@
 	mutex_destroy(&chip->mutex);
 	if (!atomic_read(&chip->shutdown))
 		dev_set_drvdata(&chip->dev->dev, NULL);
-	kfree(chip);
-	return 0;
 }
 
-static int snd_usb_audio_dev_free(struct snd_device *device)
+static void usb_audio_make_shortname(struct usb_device *dev,
+				     struct snd_usb_audio *chip,
+				     const struct snd_usb_audio_quirk *quirk)
 {
-	struct snd_usb_audio *chip = device->device_data;
-	return snd_usb_audio_free(chip);
-}
+	struct snd_card *card = chip->card;
 
-/*
- * create a chip instance and set its names.
- */
-static int snd_usb_audio_create(struct usb_interface *intf,
-				struct usb_device *dev, int idx,
-				const struct snd_usb_audio_quirk *quirk,
-				unsigned int usb_id,
-				struct snd_usb_audio **rchip)
-{
-	struct snd_card *card;
-	struct snd_usb_audio *chip;
-	int err, len;
-	char component[14];
-	static struct snd_device_ops ops = {
-		.dev_free =	snd_usb_audio_dev_free,
-	};
-
-	*rchip = NULL;
-
-	switch (snd_usb_get_speed(dev)) {
-	case USB_SPEED_LOW:
-	case USB_SPEED_FULL:
-	case USB_SPEED_HIGH:
-	case USB_SPEED_WIRELESS:
-	case USB_SPEED_SUPER:
-	case USB_SPEED_SUPER_PLUS:
-		break;
-	default:
-		dev_err(&dev->dev, "unknown device speed %d\n", snd_usb_get_speed(dev));
-		return -ENXIO;
+	if (quirk && quirk->product_name && *quirk->product_name) {
+		strlcpy(card->shortname, quirk->product_name,
+			sizeof(card->shortname));
+		return;
 	}
 
-	err = snd_card_new(&intf->dev, index[idx], id[idx], THIS_MODULE,
-			   0, &card);
-	if (err < 0) {
-		dev_err(&dev->dev, "cannot create card instance %d\n", idx);
-		return err;
-	}
-
-	chip = kzalloc(sizeof(*chip), GFP_KERNEL);
-	if (! chip) {
-		snd_card_free(card);
-		return -ENOMEM;
-	}
-
-	mutex_init(&chip->mutex);
-	init_waitqueue_head(&chip->shutdown_wait);
-	chip->index = idx;
-	chip->dev = dev;
-	chip->card = card;
-	chip->setup = device_setup[idx];
-	chip->autoclock = autoclock;
-	atomic_set(&chip->active, 1); /* avoid autopm during probing */
-	atomic_set(&chip->usage_count, 0);
-	atomic_set(&chip->shutdown, 0);
-
-	chip->usb_id = usb_id;
-	INIT_LIST_HEAD(&chip->pcm_list);
-	INIT_LIST_HEAD(&chip->ep_list);
-	INIT_LIST_HEAD(&chip->midi_list);
-	INIT_LIST_HEAD(&chip->mixer_list);
-
-	if ((err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops)) < 0) {
-		snd_usb_audio_free(chip);
-		snd_card_free(card);
-		return err;
-	}
-
-	strcpy(card->driver, "USB-Audio");
-	sprintf(component, "USB%04x:%04x",
-		USB_ID_VENDOR(chip->usb_id), USB_ID_PRODUCT(chip->usb_id));
-	snd_component_add(card, component);
-
 	/* retrieve the device string as shortname */
-	if (quirk && quirk->product_name && *quirk->product_name) {
-		strlcpy(card->shortname, quirk->product_name, sizeof(card->shortname));
-	} else {
-		if (!dev->descriptor.iProduct ||
-		    usb_string(dev, dev->descriptor.iProduct,
-		    card->shortname, sizeof(card->shortname)) <= 0) {
-			/* no name available from anywhere, so use ID */
-			sprintf(card->shortname, "USB Device %#04x:%#04x",
-				USB_ID_VENDOR(chip->usb_id),
-				USB_ID_PRODUCT(chip->usb_id));
-		}
+	if (!dev->descriptor.iProduct ||
+	    usb_string(dev, dev->descriptor.iProduct,
+		       card->shortname, sizeof(card->shortname)) <= 0) {
+		/* no name available from anywhere, so use ID */
+		sprintf(card->shortname, "USB Device %#04x:%#04x",
+			USB_ID_VENDOR(chip->usb_id),
+			USB_ID_PRODUCT(chip->usb_id));
 	}
-	strim(card->shortname);
 
-	/* retrieve the vendor and device strings as longname */
+	strim(card->shortname);
+}
+
+static void usb_audio_make_longname(struct usb_device *dev,
+				    struct snd_usb_audio *chip,
+				    const struct snd_usb_audio_quirk *quirk)
+{
+	struct snd_card *card = chip->card;
+	int len;
+
+	/* shortcut - if any pre-defined string is given, use it */
+	if (quirk && quirk->profile_name && *quirk->profile_name) {
+		strlcpy(card->longname, quirk->profile_name,
+			sizeof(card->longname));
+		return;
+	}
+
 	if (quirk && quirk->vendor_name && *quirk->vendor_name) {
 		len = strlcpy(card->longname, quirk->vendor_name, sizeof(card->longname));
 	} else {
+		/* retrieve the vendor and device strings as longname */
 		if (dev->descriptor.iManufacturer)
 			len = usb_string(dev, dev->descriptor.iManufacturer,
 					 card->longname, sizeof(card->longname));
@@ -480,6 +441,71 @@
 	default:
 		break;
 	}
+}
+
+/*
+ * create a chip instance and set its names.
+ */
+static int snd_usb_audio_create(struct usb_interface *intf,
+				struct usb_device *dev, int idx,
+				const struct snd_usb_audio_quirk *quirk,
+				unsigned int usb_id,
+				struct snd_usb_audio **rchip)
+{
+	struct snd_card *card;
+	struct snd_usb_audio *chip;
+	int err;
+	char component[14];
+
+	*rchip = NULL;
+
+	switch (snd_usb_get_speed(dev)) {
+	case USB_SPEED_LOW:
+	case USB_SPEED_FULL:
+	case USB_SPEED_HIGH:
+	case USB_SPEED_WIRELESS:
+	case USB_SPEED_SUPER:
+	case USB_SPEED_SUPER_PLUS:
+		break;
+	default:
+		dev_err(&dev->dev, "unknown device speed %d\n", snd_usb_get_speed(dev));
+		return -ENXIO;
+	}
+
+	err = snd_card_new(&intf->dev, index[idx], id[idx], THIS_MODULE,
+			   sizeof(*chip), &card);
+	if (err < 0) {
+		dev_err(&dev->dev, "cannot create card instance %d\n", idx);
+		return err;
+	}
+
+	chip = card->private_data;
+	mutex_init(&chip->mutex);
+	init_waitqueue_head(&chip->shutdown_wait);
+	chip->index = idx;
+	chip->dev = dev;
+	chip->card = card;
+	chip->setup = device_setup[idx];
+	chip->autoclock = autoclock;
+	atomic_set(&chip->active, 1); /* avoid autopm during probing */
+	atomic_set(&chip->usage_count, 0);
+	atomic_set(&chip->shutdown, 0);
+
+	chip->usb_id = usb_id;
+	INIT_LIST_HEAD(&chip->pcm_list);
+	INIT_LIST_HEAD(&chip->ep_list);
+	INIT_LIST_HEAD(&chip->midi_list);
+	INIT_LIST_HEAD(&chip->mixer_list);
+
+	card->private_free = snd_usb_audio_free;
+
+	strcpy(card->driver, "USB-Audio");
+	sprintf(component, "USB%04x:%04x",
+		USB_ID_VENDOR(chip->usb_id), USB_ID_PRODUCT(chip->usb_id));
+	snd_component_add(card, component);
+
+	usb_audio_make_shortname(dev, chip, quirk);
+	usb_audio_make_longname(dev, chip, quirk);
 
 	snd_usb_audio_create_proc(chip);
 
diff --git a/sound/usb/clock.c b/sound/usb/clock.c
index 0b030d8..c797496 100644
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -443,10 +443,11 @@
 	data[0] = rate;
 	data[1] = rate >> 8;
 	data[2] = rate >> 16;
-	if ((err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC_SET_CUR,
-				   USB_TYPE_CLASS | USB_RECIP_ENDPOINT | USB_DIR_OUT,
-				   UAC_EP_CS_ATTR_SAMPLE_RATE << 8, ep,
-				   data, sizeof(data))) < 0) {
+	err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC_SET_CUR,
+			      USB_TYPE_CLASS | USB_RECIP_ENDPOINT | USB_DIR_OUT,
+			      UAC_EP_CS_ATTR_SAMPLE_RATE << 8, ep,
+			      data, sizeof(data));
+	if (err < 0) {
 		dev_err(&dev->dev, "%d:%d: cannot set freq %d to ep %#x\n",
 			iface, fmt->altsetting, rate, ep);
 		return err;
@@ -460,10 +461,11 @@
 	if (chip->sample_rate_read_error > 2)
 		return 0;
 
-	if ((err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC_GET_CUR,
-				   USB_TYPE_CLASS | USB_RECIP_ENDPOINT | USB_DIR_IN,
-				   UAC_EP_CS_ATTR_SAMPLE_RATE << 8, ep,
-				   data, sizeof(data))) < 0) {
+	err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC_GET_CUR,
+			      USB_TYPE_CLASS | USB_RECIP_ENDPOINT | USB_DIR_IN,
+			      UAC_EP_CS_ATTR_SAMPLE_RATE << 8, ep,
+			      data, sizeof(data));
+	if (err < 0) {
 		dev_err(&dev->dev, "%d:%d: cannot get freq at ep %#x\n",
 			iface, fmt->altsetting, ep);
 		chip->sample_rate_read_error++;
@@ -587,8 +589,15 @@
 	default:
 		return set_sample_rate_v1(chip, iface, alts, fmt, rate);
 
-	case UAC_VERSION_2:
 	case UAC_VERSION_3:
+		if (chip->badd_profile >= UAC3_FUNCTION_SUBCLASS_GENERIC_IO) {
+			if (rate != UAC3_BADD_SAMPLING_RATE)
+				return -ENXIO;
+			else
+				return 0;
+		}
+	/* fall through */
+	case UAC_VERSION_2:
 		return set_sample_rate_v2v3(chip, iface, alts, fmt, rate);
 	}
 }
diff --git a/sound/usb/helper.h b/sound/usb/helper.h
index 4463e6d..d338bd0 100644
--- a/sound/usb/helper.h
+++ b/sound/usb/helper.h
@@ -18,16 +18,12 @@
  * retrieve usb_interface descriptor from the host interface
  * (conditional for compatibility with the older API)
  */
-#ifndef get_iface_desc
 #define get_iface_desc(iface)	(&(iface)->desc)
 #define get_endpoint(alt,ep)	(&(alt)->endpoint[ep].desc)
 #define get_ep_desc(ep)		(&(ep)->desc)
 #define get_cfg_desc(cfg)	(&(cfg)->desc)
-#endif
 
-#ifndef snd_usb_get_speed
 #define snd_usb_get_speed(dev) ((dev)->speed)
-#endif
 
 static inline int snd_usb_ctrl_intf(struct snd_usb_audio *chip)
 {
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index bb5ab7a..898afd3 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -112,14 +112,12 @@
 #include "mixer_maps.c"
 
 static const struct usbmix_name_map *
-find_map(struct mixer_build *state, int unitid, int control)
+find_map(const struct usbmix_name_map *p, int unitid, int control)
 {
-	const struct usbmix_name_map *p = state->map;
-
 	if (!p)
 		return NULL;
 
-	for (p = state->map; p->id; p++) {
+	for (; p->id; p++) {
 		if (p->id == unitid &&
 		    (!control || !p->control || control == p->control))
 			return p;
@@ -201,10 +199,10 @@
 /*
  * copy a string with the given id
  */
-static int snd_usb_copy_string_desc(struct mixer_build *state,
+static int snd_usb_copy_string_desc(struct snd_usb_audio *chip,
 				    int index, char *buf, int maxlen)
 {
-	int len = usb_string(state->chip->dev, index, buf, maxlen - 1);
+	int len = usb_string(chip->dev, index, buf, maxlen - 1);
 
 	if (len < 0)
 		return 0;
@@ -600,7 +598,8 @@
 
 	while (snd_ctl_find_id(mixer->chip->card, &kctl->id))
 		kctl->id.index++;
-	if ((err = snd_ctl_add(mixer->chip->card, kctl)) < 0) {
+	err = snd_ctl_add(mixer->chip->card, kctl);
+	if (err < 0) {
 		usb_audio_dbg(mixer->chip, "cannot add control (err = %d)\n",
 			      err);
 		return err;
@@ -658,14 +657,14 @@
 	{ 0 },
 };
 
-static int get_term_name(struct mixer_build *state, struct usb_audio_term *iterm,
+static int get_term_name(struct snd_usb_audio *chip, struct usb_audio_term *iterm,
 			 unsigned char *name, int maxlen, int term_only)
 {
 	struct iterm_name_combo *names;
 	int len;
 
 	if (iterm->name) {
-		len = snd_usb_copy_string_desc(state, iterm->name,
+		len = snd_usb_copy_string_desc(chip, iterm->name,
 						name, maxlen);
 		if (len)
 			return len;
@@ -719,6 +718,66 @@
 }
 
 /*
+ * Get logical cluster information for UAC3 devices.
+ */
+static int get_cluster_channels_v3(struct mixer_build *state, unsigned int cluster_id)
+{
+	struct uac3_cluster_header_descriptor c_header;
+	int err;
+
+	err = snd_usb_ctl_msg(state->chip->dev,
+			usb_rcvctrlpipe(state->chip->dev, 0),
+			UAC3_CS_REQ_HIGH_CAPABILITY_DESCRIPTOR,
+			USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
+			cluster_id,
+			snd_usb_ctrl_intf(state->chip),
+			&c_header, sizeof(c_header));
+	if (err < 0)
+		goto error;
+	if (err != sizeof(c_header)) {
+		err = -EIO;
+		goto error;
+	}
+
+	return c_header.bNrChannels;
+
+error:
+	usb_audio_err(state->chip, "cannot request logical cluster ID: %d (err: %d)\n", cluster_id, err);
+	return err;
+}
+
+/*
+ * Get number of channels for a Mixer Unit.
+ */
+static int uac_mixer_unit_get_channels(struct mixer_build *state,
+				       struct uac_mixer_unit_descriptor *desc)
+{
+	int mu_channels;
+
+	if (desc->bLength < 11)
+		return -EINVAL;
+	if (!desc->bNrInPins)
+		return -EINVAL;
+
+	switch (state->mixer->protocol) {
+	case UAC_VERSION_1:
+	case UAC_VERSION_2:
+	default:
+		mu_channels = uac_mixer_unit_bNrChannels(desc);
+		break;
+	case UAC_VERSION_3:
+		mu_channels = get_cluster_channels_v3(state,
+				uac3_mixer_unit_wClusterDescrID(desc));
+		break;
+	}
+
+	if (!mu_channels)
+		return -EINVAL;
+
+	return mu_channels;
+}
+
+/*
  * parse the source unit recursively until it reaches to a terminal
  * or a branched unit.
  */
@@ -844,8 +903,12 @@
 				term->id = id;
 				term->type = le16_to_cpu(d->wTerminalType);
 
-				/* REVISIT: UAC3 IT doesn't have channels/cfg */
-				term->channels = 0;
+				err = get_cluster_channels_v3(state, le16_to_cpu(d->wClusterDescrID));
+				if (err < 0)
+					return err;
+				term->channels = err;
+
+				/* REVISIT: UAC3 IT doesn't have channels cfg */
 				term->chconfig = 0;
 
 				term->name = le16_to_cpu(d->wTerminalDescrStr);
@@ -865,6 +928,18 @@
 				term->name = le16_to_cpu(d->wClockSourceStr);
 				return 0;
 			}
+			case UAC3_MIXER_UNIT: {
+				struct uac_mixer_unit_descriptor *d = p1;
+
+				err = uac_mixer_unit_get_channels(state, d);
+				if (err < 0)
+					return err;
+
+				term->channels = err;
+				term->type = d->bDescriptorSubtype << 16; /* virtual type */
+
+				return 0;
+			}
 			default:
 				return -ENODEV;
 			}
@@ -1258,6 +1333,51 @@
 	return 0;
 }
 
+/* get the connectors status and report it as boolean type */
+static int mixer_ctl_connector_get(struct snd_kcontrol *kcontrol,
+				   struct snd_ctl_elem_value *ucontrol)
+{
+	struct usb_mixer_elem_info *cval = kcontrol->private_data;
+	struct snd_usb_audio *chip = cval->head.mixer->chip;
+	int idx = 0, validx, ret, val;
+
+	validx = cval->control << 8 | 0;
+
+	ret = snd_usb_lock_shutdown(chip) ? -EIO : 0;
+	if (ret)
+		goto error;
+
+	idx = snd_usb_ctrl_intf(chip) | (cval->head.id << 8);
+	if (cval->head.mixer->protocol == UAC_VERSION_2) {
+		struct uac2_connectors_ctl_blk uac2_conn;
+
+		ret = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), UAC2_CS_CUR,
+				      USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
+				      validx, idx, &uac2_conn, sizeof(uac2_conn));
+		val = !!uac2_conn.bNrChannels;
+	} else { /* UAC_VERSION_3 */
+		struct uac3_insertion_ctl_blk uac3_conn;
+
+		ret = snd_usb_ctl_msg(chip->dev, usb_rcvctrlpipe(chip->dev, 0), UAC2_CS_CUR,
+				      USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
+				      validx, idx, &uac3_conn, sizeof(uac3_conn));
+		val = !!uac3_conn.bmConInserted;
+	}
+
+	snd_usb_unlock_shutdown(chip);
+
+	if (ret < 0) {
+error:
+		usb_audio_err(chip,
+			"cannot get connectors status: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n",
+			UAC_GET_CUR, validx, idx, cval->val_type);
+		return ret;
+	}
+
+	ucontrol->value.integer.value[0] = val;
+	return 0;
+}
+
 static struct snd_kcontrol_new usb_feature_unit_ctl = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "", /* will be filled later manually */
@@ -1288,6 +1408,15 @@
 	.put = NULL,
 };
 
+static const struct snd_kcontrol_new usb_connector_ctl_ro = {
+	.iface = SNDRV_CTL_ELEM_IFACE_CARD,
+	.name = "", /* will be filled later manually */
+	.access = SNDRV_CTL_ELEM_ACCESS_READ,
+	.info = snd_ctl_boolean_mono_info,
+	.get = mixer_ctl_connector_get,
+	.put = NULL,
+};
+
 /*
  * This symbol is exported in order to allow the mixer quirks to
  * hook up to the standard feature unit control mechanism
@@ -1341,16 +1470,16 @@
 	return NULL;
 }
 
-static void build_feature_ctl(struct mixer_build *state, void *raw_desc,
-			      unsigned int ctl_mask, int control,
-			      struct usb_audio_term *iterm, int unitid,
-			      int readonly_mask)
+static void __build_feature_ctl(struct usb_mixer_interface *mixer,
+				const struct usbmix_name_map *imap,
+				unsigned int ctl_mask, int control,
+				struct usb_audio_term *iterm,
+				struct usb_audio_term *oterm,
+				int unitid, int nameid, int readonly_mask)
 {
-	struct uac_feature_unit_descriptor *desc = raw_desc;
 	struct usb_feature_control_info *ctl_info;
 	unsigned int len = 0;
 	int mapped_name = 0;
-	int nameid = uac_feature_unit_iFeature(desc);
 	struct snd_kcontrol *kctl;
 	struct usb_mixer_elem_info *cval;
 	const struct usbmix_name_map *map;
@@ -1361,14 +1490,14 @@
 		return;
 	}
 
-	map = find_map(state, unitid, control);
+	map = find_map(imap, unitid, control);
 	if (check_ignored_ctl(map))
 		return;
 
 	cval = kzalloc(sizeof(*cval), GFP_KERNEL);
 	if (!cval)
 		return;
-	snd_usb_mixer_elem_init_std(&cval->head, state->mixer, unitid);
+	snd_usb_mixer_elem_init_std(&cval->head, mixer, unitid);
 	cval->control = control;
 	cval->cmask = ctl_mask;
 
@@ -1377,7 +1506,7 @@
 		kfree(cval);
 		return;
 	}
-	if (state->mixer->protocol == UAC_VERSION_1)
+	if (mixer->protocol == UAC_VERSION_1)
 		cval->val_type = ctl_info->type;
 	else /* UAC_VERSION_2 */
 		cval->val_type = ctl_info->type_uac2 >= 0 ?
@@ -1406,7 +1535,7 @@
 		kctl = snd_ctl_new1(&usb_feature_unit_ctl, cval);
 
 	if (!kctl) {
-		usb_audio_err(state->chip, "cannot malloc kcontrol\n");
+		usb_audio_err(mixer->chip, "cannot malloc kcontrol\n");
 		kfree(cval);
 		return;
 	}
@@ -1415,7 +1544,7 @@
 	len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
 	mapped_name = len != 0;
 	if (!len && nameid)
-		len = snd_usb_copy_string_desc(state, nameid,
+		len = snd_usb_copy_string_desc(mixer->chip, nameid,
 				kctl->id.name, sizeof(kctl->id.name));
 
 	switch (control) {
@@ -1430,10 +1559,12 @@
 		 * - otherwise, anonymous name.
 		 */
 		if (!len) {
-			len = get_term_name(state, iterm, kctl->id.name,
-					    sizeof(kctl->id.name), 1);
-			if (!len)
-				len = get_term_name(state, &state->oterm,
+			if (iterm)
+				len = get_term_name(mixer->chip, iterm,
+						    kctl->id.name,
+						    sizeof(kctl->id.name), 1);
+			if (!len && oterm)
+				len = get_term_name(mixer->chip, oterm,
 						    kctl->id.name,
 						    sizeof(kctl->id.name), 1);
 			if (!len)
@@ -1442,15 +1573,15 @@
 		}
 
 		if (!mapped_name)
-			check_no_speaker_on_headset(kctl, state->mixer->chip->card);
+			check_no_speaker_on_headset(kctl, mixer->chip->card);
 
 		/*
 		 * determine the stream direction:
 		 * if the connected output is USB stream, then it's likely a
 		 * capture stream.  otherwise it should be playback (hopefully :)
 		 */
-		if (!mapped_name && !(state->oterm.type >> 16)) {
-			if ((state->oterm.type & 0xff00) == 0x0100)
+		if (!mapped_name && oterm && !(oterm->type >> 16)) {
+			if ((oterm->type & 0xff00) == 0x0100)
 				append_ctl_name(kctl, " Capture");
 			else
 				append_ctl_name(kctl, " Playback");
@@ -1478,7 +1609,7 @@
 		}
 	}
 
-	snd_usb_mixer_fu_apply_quirk(state->mixer, cval, unitid, kctl);
+	snd_usb_mixer_fu_apply_quirk(mixer, cval, unitid, kctl);
 
 	range = (cval->max - cval->min) / cval->res;
 	/*
@@ -1487,26 +1618,46 @@
 	 * devices. It will definitively catch all buggy Logitech devices.
 	 */
 	if (range > 384) {
-		usb_audio_warn(state->chip,
+		usb_audio_warn(mixer->chip,
 			       "Warning! Unlikely big volume range (=%u), cval->res is probably wrong.",
 			       range);
-		usb_audio_warn(state->chip,
+		usb_audio_warn(mixer->chip,
 			       "[%d] FU [%s] ch = %d, val = %d/%d/%d",
 			       cval->head.id, kctl->id.name, cval->channels,
 			       cval->min, cval->max, cval->res);
 	}
 
-	usb_audio_dbg(state->chip, "[%d] FU [%s] ch = %d, val = %d/%d/%d\n",
+	usb_audio_dbg(mixer->chip, "[%d] FU [%s] ch = %d, val = %d/%d/%d\n",
 		      cval->head.id, kctl->id.name, cval->channels,
 		      cval->min, cval->max, cval->res);
 	snd_usb_mixer_add_control(&cval->head, kctl);
 }
 
+static void build_feature_ctl(struct mixer_build *state, void *raw_desc,
+			      unsigned int ctl_mask, int control,
+			      struct usb_audio_term *iterm, int unitid,
+			      int readonly_mask)
+{
+	struct uac_feature_unit_descriptor *desc = raw_desc;
+	int nameid = uac_feature_unit_iFeature(desc);
+
+	__build_feature_ctl(state->mixer, state->map, ctl_mask, control,
+			iterm, &state->oterm, unitid, nameid, readonly_mask);
+}
+
+static void build_feature_ctl_badd(struct usb_mixer_interface *mixer,
+			      unsigned int ctl_mask, int control, int unitid,
+			      const struct usbmix_name_map *badd_map)
+{
+	__build_feature_ctl(mixer, badd_map, ctl_mask, control,
+			NULL, NULL, unitid, 0, 0);
+}
+
 static void get_connector_control_name(struct mixer_build *state,
 				       struct usb_audio_term *term,
 				       bool is_input, char *name, int name_size)
 {
-	int name_len = get_term_name(state, term, name, name_size, 0);
+	int name_len = get_term_name(state->chip, term, name, name_size, 0);
 
 	if (name_len == 0)
 		strlcpy(name, "Unknown", name_size);
@@ -1534,17 +1685,25 @@
 		return;
 	snd_usb_mixer_elem_init_std(&cval->head, state->mixer, term->id);
 	/*
-	 * The first byte from reading the UAC2_TE_CONNECTOR control returns the
-	 * number of channels connected.  This boolean ctl will simply report
-	 * if any channels are connected or not.
-	 * (Audio20_final.pdf Table 5-10: Connector Control CUR Parameter Block)
+	 * UAC2: The first byte from reading the UAC2_TE_CONNECTOR control returns the
+	 * number of channels connected.
+	 *
+	 * UAC3: The first byte specifies size of bitmap for the inserted controls. The
+	 * following byte(s) specifies which connectors are inserted.
+	 *
+	 * This boolean ctl will simply report if any channels are connected
+	 * or not.
 	 */
-	cval->control = UAC2_TE_CONNECTOR;
+	if (state->mixer->protocol == UAC_VERSION_2)
+		cval->control = UAC2_TE_CONNECTOR;
+	else /* UAC_VERSION_3 */
+		cval->control = UAC3_TE_INSERTION;
+
 	cval->val_type = USB_MIXER_BOOLEAN;
 	cval->channels = 1; /* report true if any channel is connected */
 	cval->min = 0;
 	cval->max = 1;
-	kctl = snd_ctl_new1(&usb_bool_master_control_ctl_ro, cval);
+	kctl = snd_ctl_new1(&usb_connector_ctl_ro, cval);
 	if (!kctl) {
 		usb_audio_err(state->chip, "cannot malloc kcontrol\n");
 		kfree(cval);
@@ -1605,7 +1764,7 @@
 	}
 
 	kctl->private_free = snd_usb_mixer_elem_free;
-	ret = snd_usb_copy_string_desc(state, hdr->iClockSource,
+	ret = snd_usb_copy_string_desc(state->chip, hdr->iClockSource,
 				       name, sizeof(name));
 	if (ret > 0)
 		snprintf(kctl->id.name, sizeof(kctl->id.name),
@@ -1692,7 +1851,8 @@
 	}
 
 	/* parse the source unit */
-	if ((err = parse_audio_unit(state, hdr->bSourceID)) < 0)
+	err = parse_audio_unit(state, hdr->bSourceID);
+	if (err < 0)
 		return err;
 
 	/* determine the input source type and name */
@@ -1806,16 +1966,15 @@
  */
 static void build_mixer_unit_ctl(struct mixer_build *state,
 				 struct uac_mixer_unit_descriptor *desc,
-				 int in_pin, int in_ch, int unitid,
-				 struct usb_audio_term *iterm)
+				 int in_pin, int in_ch, int num_outs,
+				 int unitid, struct usb_audio_term *iterm)
 {
 	struct usb_mixer_elem_info *cval;
-	unsigned int num_outs = uac_mixer_unit_bNrChannels(desc);
 	unsigned int i, len;
 	struct snd_kcontrol *kctl;
 	const struct usbmix_name_map *map;
 
-	map = find_map(state, unitid, 0);
+	map = find_map(state->map, unitid, 0);
 	if (check_ignored_ctl(map))
 		return;
 
@@ -1848,7 +2007,7 @@
 
 	len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
 	if (!len)
-		len = get_term_name(state, iterm, kctl->id.name,
+		len = get_term_name(state->chip, iterm, kctl->id.name,
 				    sizeof(kctl->id.name), 0);
 	if (!len)
 		len = sprintf(kctl->id.name, "Mixer Source %d", in_ch + 1);
@@ -1863,16 +2022,28 @@
 				      void *raw_desc)
 {
 	struct usb_audio_term iterm;
-	struct uac2_input_terminal_descriptor *d = raw_desc;
+	unsigned int control, bmctls, term_id;
 
-	check_input_term(state, d->bTerminalID, &iterm);
 	if (state->mixer->protocol == UAC_VERSION_2) {
-		/* Check for jack detection. */
-		if (uac_v2v3_control_is_readable(le16_to_cpu(d->bmControls),
-						 UAC2_TE_CONNECTOR)) {
-			build_connector_control(state, &iterm, true);
-		}
+		struct uac2_input_terminal_descriptor *d_v2 = raw_desc;
+		control = UAC2_TE_CONNECTOR;
+		term_id = d_v2->bTerminalID;
+		bmctls = le16_to_cpu(d_v2->bmControls);
+	} else if (state->mixer->protocol == UAC_VERSION_3) {
+		struct uac3_input_terminal_descriptor *d_v3 = raw_desc;
+		control = UAC3_TE_INSERTION;
+		term_id = d_v3->bTerminalID;
+		bmctls = le32_to_cpu(d_v3->bmControls);
+	} else {
+		return 0; /* UAC1. No Insertion control */
 	}
+
+	check_input_term(state, term_id, &iterm);
+
+	/* Check for jack detection. */
+	if (uac_v2v3_control_is_readable(bmctls, control))
+		build_connector_control(state, &iterm, true);
+
 	return 0;
 }
 
@@ -1887,14 +2058,17 @@
 	int input_pins, num_ins, num_outs;
 	int pin, ich, err;
 
-	if (desc->bLength < 11 || !(input_pins = desc->bNrInPins) ||
-	    !(num_outs = uac_mixer_unit_bNrChannels(desc))) {
+	err = uac_mixer_unit_get_channels(state, desc);
+	if (err < 0) {
 		usb_audio_err(state->chip,
 			      "invalid MIXER UNIT descriptor %d\n",
 			      unitid);
-		return -EINVAL;
+		return err;
 	}
 
+	num_outs = err;
+	input_pins = desc->bNrInPins;
+
 	num_ins = 0;
 	ich = 0;
 	for (pin = 0; pin < input_pins; pin++) {
@@ -1921,7 +2095,7 @@
 				}
 			}
 			if (ich_has_controls)
-				build_mixer_unit_ctl(state, desc, pin, ich,
+				build_mixer_unit_ctl(state, desc, pin, ich, num_outs,
 						     unitid, &iterm);
 		}
 	}
@@ -2098,7 +2272,8 @@
 	}
 
 	for (i = 0; i < num_ins; i++) {
-		if ((err = parse_audio_unit(state, desc->baSourceID[i])) < 0)
+		err = parse_audio_unit(state, desc->baSourceID[i]);
+		if (err < 0)
 			return err;
 	}
 
@@ -2114,7 +2289,7 @@
 
 		if (!(controls[valinfo->control / 8] & (1 << ((valinfo->control % 8) - 1))))
 			continue;
-		map = find_map(state, unitid, valinfo->control);
+		map = find_map(state->map, unitid, valinfo->control);
 		if (check_ignored_ctl(map))
 			continue;
 		cval = kzalloc(sizeof(*cval), GFP_KERNEL);
@@ -2162,7 +2337,8 @@
 			nameid = uac_processing_unit_iProcessing(desc, state->mixer->protocol);
 			len = 0;
 			if (nameid)
-				len = snd_usb_copy_string_desc(state, nameid,
+				len = snd_usb_copy_string_desc(state->chip,
+							       nameid,
 							       kctl->id.name,
 							       sizeof(kctl->id.name));
 			if (!len)
@@ -2310,14 +2486,15 @@
 	}
 
 	for (i = 0; i < desc->bNrInPins; i++) {
-		if ((err = parse_audio_unit(state, desc->baSourceID[i])) < 0)
+		err = parse_audio_unit(state, desc->baSourceID[i]);
+		if (err < 0)
 			return err;
 	}
 
 	if (desc->bNrInPins == 1) /* only one ? nonsense! */
 		return 0;
 
-	map = find_map(state, unitid, 0);
+	map = find_map(state->map, unitid, 0);
 	if (check_ignored_ctl(map))
 		return 0;
 
@@ -2358,7 +2535,8 @@
 		len = check_mapped_selector_name(state, unitid, i, namelist[i],
 						 MAX_ITEM_NAME_LEN);
 		if (! len && check_input_term(state, desc->baSourceID[i], &iterm) >= 0)
-			len = get_term_name(state, &iterm, namelist[i], MAX_ITEM_NAME_LEN, 0);
+			len = get_term_name(state->chip, &iterm, namelist[i],
+					    MAX_ITEM_NAME_LEN, 0);
 		if (! len)
 			sprintf(namelist[i], "Input %u", i);
 	}
@@ -2380,12 +2558,12 @@
 		/* if iSelector is given, use it */
 		nameid = uac_selector_unit_iSelector(desc);
 		if (nameid)
-			len = snd_usb_copy_string_desc(state, nameid,
+			len = snd_usb_copy_string_desc(state->chip, nameid,
 						       kctl->id.name,
 						       sizeof(kctl->id.name));
 		/* ... or pick up the terminal name at next */
 		if (!len)
-			len = get_term_name(state, &state->oterm,
+			len = get_term_name(state->chip, &state->oterm,
 				    kctl->id.name, sizeof(kctl->id.name), 0);
 		/* ... or use the fixed string "USB" as the last resort */
 		if (!len)
@@ -2458,7 +2636,7 @@
 	} else { /* UAC_VERSION_3 */
 		switch (p1[2]) {
 		case UAC_INPUT_TERMINAL:
-			return 0; /* NOP */
+			return parse_audio_input_terminal(state, unitid, p1);
 		case UAC3_MIXER_UNIT:
 			return parse_audio_mixer_unit(state, unitid, p1);
 		case UAC3_CLOCK_SOURCE:
@@ -2503,6 +2681,246 @@
 	return 0;
 }
 
+/* UAC3 predefined channels configuration */
+struct uac3_badd_profile {
+	int subclass;
+	const char *name;
+	int c_chmask;	/* capture channels mask */
+	int p_chmask;	/* playback channels mask */
+	int st_chmask;	/* side tone mixing channel mask */
+};
+
+static struct uac3_badd_profile uac3_badd_profiles[] = {
+	{
+		/*
+		 * BAIF, BAOF or combination of both
+		 * IN: Mono or Stereo cfg, Mono alt possible
+		 * OUT: Mono or Stereo cfg, Mono alt possible
+		 */
+		.subclass = UAC3_FUNCTION_SUBCLASS_GENERIC_IO,
+		.name = "GENERIC IO",
+		.c_chmask = -1,		/* dynamic channels */
+		.p_chmask = -1,		/* dynamic channels */
+	},
+	{
+		/* BAOF; Stereo only cfg, Mono alt possible */
+		.subclass = UAC3_FUNCTION_SUBCLASS_HEADPHONE,
+		.name = "HEADPHONE",
+		.p_chmask = 3,
+	},
+	{
+		/* BAOF; Mono or Stereo cfg, Mono alt possible */
+		.subclass = UAC3_FUNCTION_SUBCLASS_SPEAKER,
+		.name = "SPEAKER",
+		.p_chmask = -1,		/* dynamic channels */
+	},
+	{
+		/* BAIF; Mono or Stereo cfg, Mono alt possible */
+		.subclass = UAC3_FUNCTION_SUBCLASS_MICROPHONE,
+		.name = "MICROPHONE",
+		.c_chmask = -1,		/* dynamic channels */
+	},
+	{
+		/*
+		 * BAIOF topology
+		 * IN: Mono only
+		 * OUT: Mono or Stereo cfg, Mono alt possible
+		 */
+		.subclass = UAC3_FUNCTION_SUBCLASS_HEADSET,
+		.name = "HEADSET",
+		.c_chmask = 1,
+		.p_chmask = -1,		/* dynamic channels */
+		.st_chmask = 1,
+	},
+	{
+		/* BAIOF; IN: Mono only; OUT: Stereo only, Mono alt possible */
+		.subclass = UAC3_FUNCTION_SUBCLASS_HEADSET_ADAPTER,
+		.name = "HEADSET ADAPTER",
+		.c_chmask = 1,
+		.p_chmask = 3,
+		.st_chmask = 1,
+	},
+	{
+		/* BAIF + BAOF; IN: Mono only; OUT: Mono only */
+		.subclass = UAC3_FUNCTION_SUBCLASS_SPEAKERPHONE,
+		.name = "SPEAKERPHONE",
+		.c_chmask = 1,
+		.p_chmask = 1,
+	},
+	{ 0 } /* terminator */
+};
+
+static bool uac3_badd_func_has_valid_channels(struct usb_mixer_interface *mixer,
+					      struct uac3_badd_profile *f,
+					      int c_chmask, int p_chmask)
+{
+	/*
+	 * If both playback/capture channels are dynamic, make sure
+	 * at least one channel is present
+	 */
+	if (f->c_chmask < 0 && f->p_chmask < 0) {
+		if (!c_chmask && !p_chmask) {
+			usb_audio_warn(mixer->chip, "BAAD %s: no channels?",
+				       f->name);
+			return false;
+		}
+		return true;
+	}
+
+	if ((f->c_chmask < 0 && !c_chmask) ||
+	    (f->c_chmask >= 0 && f->c_chmask != c_chmask)) {
+		usb_audio_warn(mixer->chip, "BAAD %s c_chmask mismatch",
+			       f->name);
+		return false;
+	}
+	if ((f->p_chmask < 0 && !p_chmask) ||
+	    (f->p_chmask >= 0 && f->p_chmask != p_chmask)) {
+		usb_audio_warn(mixer->chip, "BAAD %s p_chmask mismatch",
+			       f->name);
+		return false;
+	}
+	return true;
+}
+
+/*
+ * create mixer controls for UAC3 BADD profiles
+ *
+ * UAC3 BADD device doesn't contain CS descriptors thus we will guess everything
+ *
+ * BADD device may contain Mixer Unit, which doesn't have any controls, skip it
+ */
+static int snd_usb_mixer_controls_badd(struct usb_mixer_interface *mixer,
+				       int ctrlif)
+{
+	struct usb_device *dev = mixer->chip->dev;
+	struct usb_interface_assoc_descriptor *assoc;
+	int badd_profile = mixer->chip->badd_profile;
+	struct uac3_badd_profile *f;
+	const struct usbmix_ctl_map *map;
+	int p_chmask = 0, c_chmask = 0, st_chmask = 0;
+	int i;
+
+	assoc = usb_ifnum_to_if(dev, ctrlif)->intf_assoc;
+
+	/* Detect BADD capture/playback channels from AS EP descriptors */
+	for (i = 0; i < assoc->bInterfaceCount; i++) {
+		int intf = assoc->bFirstInterface + i;
+
+		struct usb_interface *iface;
+		struct usb_host_interface *alts;
+		struct usb_interface_descriptor *altsd;
+		unsigned int maxpacksize;
+		char dir_in;
+		int chmask, num;
+
+		if (intf == ctrlif)
+			continue;
+
+		iface = usb_ifnum_to_if(dev, intf);
+		num = iface->num_altsetting;
+
+		if (num < 2)
+			return -EINVAL;
+
+		/*
+		 * The number of Channels in an AudioStreaming interface
+		 * and the audio sample bit resolution (16 bits or 24
+		 * bits) can be derived from the wMaxPacketSize field in
+		 * the Standard AS Audio Data Endpoint descriptor in
+		 * Alternate Setting 1
+		 */
+		alts = &iface->altsetting[1];
+		altsd = get_iface_desc(alts);
+
+		if (altsd->bNumEndpoints < 1)
+			return -EINVAL;
+
+		/* check direction */
+		dir_in = (get_endpoint(alts, 0)->bEndpointAddress & USB_DIR_IN);
+		maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize);
+
+		switch (maxpacksize) {
+		default:
+			usb_audio_err(mixer->chip,
+				"incorrect wMaxPacketSize 0x%x for BADD profile\n",
+				maxpacksize);
+			return -EINVAL;
+		case UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_16:
+		case UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_16:
+		case UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_24:
+		case UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_24:
+			chmask = 1;
+			break;
+		case UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_16:
+		case UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_16:
+		case UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_24:
+		case UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_24:
+			chmask = 3;
+			break;
+		}
+
+		if (dir_in)
+			c_chmask = chmask;
+		else
+			p_chmask = chmask;
+	}
+
+	usb_audio_dbg(mixer->chip,
+		"UAC3 BADD profile 0x%x: detected c_chmask=%d p_chmask=%d\n",
+		badd_profile, c_chmask, p_chmask);
+
+	/* check the mapping table */
+	for (map = uac3_badd_usbmix_ctl_maps; map->id; map++) {
+		if (map->id == badd_profile)
+			break;
+	}
+
+	if (!map->id)
+		return -EINVAL;
+
+	for (f = uac3_badd_profiles; f->name; f++) {
+		if (badd_profile == f->subclass)
+			break;
+	}
+	if (!f->name)
+		return -EINVAL;
+	if (!uac3_badd_func_has_valid_channels(mixer, f, c_chmask, p_chmask))
+		return -EINVAL;
+	st_chmask = f->st_chmask;
+
+	/* Playback */
+	if (p_chmask) {
+		/* Master channel, always writable */
+		build_feature_ctl_badd(mixer, 0, UAC_FU_MUTE,
+				       UAC3_BADD_FU_ID2, map->map);
+		/* Mono/Stereo volume channels, always writable */
+		build_feature_ctl_badd(mixer, p_chmask, UAC_FU_VOLUME,
+				       UAC3_BADD_FU_ID2, map->map);
+	}
+
+	/* Capture */
+	if (c_chmask) {
+		/* Master channel, always writable */
+		build_feature_ctl_badd(mixer, 0, UAC_FU_MUTE,
+				       UAC3_BADD_FU_ID5, map->map);
+		/* Mono/Stereo volume channels, always writable */
+		build_feature_ctl_badd(mixer, c_chmask, UAC_FU_VOLUME,
+				       UAC3_BADD_FU_ID5, map->map);
+	}
+
+	/* Side tone-mixing */
+	if (st_chmask) {
+		/* Master channel, always writable */
+		build_feature_ctl_badd(mixer, 0, UAC_FU_MUTE,
+				       UAC3_BADD_FU_ID7, map->map);
+		/* Mono volume channel, always writable */
+		build_feature_ctl_badd(mixer, 1, UAC_FU_VOLUME,
+				       UAC3_BADD_FU_ID7, map->map);
+	}
+
+	return 0;
+}
+
 /*
  * create mixer controls
  *
@@ -2596,6 +3014,12 @@
 			err = parse_audio_unit(&state, desc->bCSourceID);
 			if (err < 0 && err != -EINVAL)
 				return err;
+
+			if (uac_v2v3_control_is_readable(le32_to_cpu(desc->bmControls),
+							 UAC3_TE_INSERTION)) {
+				build_connector_control(&state, &state.oterm,
+							false);
+			}
 		}
 	}
 
@@ -2606,9 +3030,9 @@
 {
 	struct usb_mixer_elem_list *list;
 
-	for (list = mixer->id_elems[unitid]; list; list = list->next_id_elem) {
+	for_each_mixer_elem(list, mixer, unitid) {
 		struct usb_mixer_elem_info *info =
-			(struct usb_mixer_elem_info *)list;
+			mixer_elem_list_to_info(list);
 		/* invalidate cache, so the value is read from the device */
 		info->cached = 0;
 		snd_ctl_notify(mixer->chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
@@ -2619,7 +3043,7 @@
 static void snd_usb_mixer_dump_cval(struct snd_info_buffer *buffer,
 				    struct usb_mixer_elem_list *list)
 {
-	struct usb_mixer_elem_info *cval = (struct usb_mixer_elem_info *)list;
+	struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list);
 	static char *val_types[] = {"BOOLEAN", "INV_BOOLEAN",
 				    "S8", "U8", "S16", "U16"};
 	snd_iprintf(buffer, "    Info: id=%i, control=%i, cmask=0x%x, "
@@ -2645,8 +3069,7 @@
 				mixer->ignore_ctl_error);
 		snd_iprintf(buffer, "Card: %s\n", chip->card->longname);
 		for (unitid = 0; unitid < MAX_ID_ELEMS; unitid++) {
-			for (list = mixer->id_elems[unitid]; list;
-			     list = list->next_id_elem) {
+			for_each_mixer_elem(list, mixer, unitid) {
 				snd_iprintf(buffer, "  Unit: %i\n", list->id);
 				if (list->kctl)
 					snd_iprintf(buffer,
@@ -2676,19 +3099,19 @@
 		return;
 	}
 
-	for (list = mixer->id_elems[unitid]; list; list = list->next_id_elem)
+	for_each_mixer_elem(list, mixer, unitid)
 		count++;
 
 	if (count == 0)
 		return;
 
-	for (list = mixer->id_elems[unitid]; list; list = list->next_id_elem) {
+	for_each_mixer_elem(list, mixer, unitid) {
 		struct usb_mixer_elem_info *info;
 
 		if (!list->kctl)
 			continue;
 
-		info = (struct usb_mixer_elem_info *)list;
+		info = mixer_elem_list_to_info(list);
 		if (count > 1 && info->control != control)
 			continue;
 
@@ -2809,6 +3232,48 @@
 	return 0;
 }
 
+static int keep_iface_ctl_get(struct snd_kcontrol *kcontrol,
+			      struct snd_ctl_elem_value *ucontrol)
+{
+	struct usb_mixer_interface *mixer = snd_kcontrol_chip(kcontrol);
+
+	ucontrol->value.integer.value[0] = mixer->chip->keep_iface;
+	return 0;
+}
+
+static int keep_iface_ctl_put(struct snd_kcontrol *kcontrol,
+			      struct snd_ctl_elem_value *ucontrol)
+{
+	struct usb_mixer_interface *mixer = snd_kcontrol_chip(kcontrol);
+	bool keep_iface = !!ucontrol->value.integer.value[0];
+
+	if (mixer->chip->keep_iface == keep_iface)
+		return 0;
+	mixer->chip->keep_iface = keep_iface;
+	return 1;
+}
+
+static const struct snd_kcontrol_new keep_iface_ctl = {
+	.iface = SNDRV_CTL_ELEM_IFACE_CARD,
+	.name = "Keep Interface",
+	.info = snd_ctl_boolean_mono_info,
+	.get = keep_iface_ctl_get,
+	.put = keep_iface_ctl_put,
+};
+
+static int create_keep_iface_ctl(struct usb_mixer_interface *mixer)
+{
+	struct snd_kcontrol *kctl = snd_ctl_new1(&keep_iface_ctl, mixer);
+
+	/* need only one control per card */
+	if (snd_ctl_find_id(mixer->chip->card, &kctl->id)) {
+		snd_ctl_free_one(kctl);
+		return 0;
+	}
+
+	return snd_ctl_add(mixer->chip->card, kctl);
+}
+
 int snd_usb_create_mixer(struct snd_usb_audio *chip, int ctrlif,
 			 int ignore_error)
 {
@@ -2847,8 +3312,21 @@
 		break;
 	}
 
-	if ((err = snd_usb_mixer_controls(mixer)) < 0 ||
-	    (err = snd_usb_mixer_status_create(mixer)) < 0)
+	if (mixer->protocol == UAC_VERSION_3 &&
+			chip->badd_profile >= UAC3_FUNCTION_SUBCLASS_GENERIC_IO) {
+		err = snd_usb_mixer_controls_badd(mixer, ctrlif);
+		if (err < 0)
+			goto _error;
+	} else {
+		err = snd_usb_mixer_controls(mixer);
+		if (err < 0)
+			goto _error;
+		err = snd_usb_mixer_status_create(mixer);
+		if (err < 0)
+			goto _error;
+	}
+	err = create_keep_iface_ctl(mixer);
+	if (err < 0)
 		goto _error;
 
 	snd_usb_mixer_apply_create_quirk(mixer);
@@ -2909,7 +3387,7 @@
 
 static int restore_mixer_value(struct usb_mixer_elem_list *list)
 {
-	struct usb_mixer_elem_info *cval = (struct usb_mixer_elem_info *)list;
+	struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list);
 	int c, err, idx;
 
 	if (cval->cmask) {
@@ -2945,8 +3423,7 @@
 	if (reset_resume) {
 		/* restore cached mixer values */
 		for (id = 0; id < MAX_ID_ELEMS; id++) {
-			for (list = mixer->id_elems[id]; list;
-			     list = list->next_id_elem) {
+			for_each_mixer_elem(list, mixer, id) {
 				if (list->resume) {
 					err = list->resume(list);
 					if (err < 0)
@@ -2956,6 +3433,8 @@
 		}
 	}
 
+	snd_usb_mixer_resume_quirk(mixer);
+
 	return snd_usb_mixer_activate(mixer);
 }
 #endif
diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h
index ba27f7a..e026534 100644
--- a/sound/usb/mixer.h
+++ b/sound/usb/mixer.h
@@ -53,6 +53,12 @@
 	usb_mixer_elem_resume_func_t resume;
 };
 
+/* iterate over mixer element list of the given unit id */
+#define for_each_mixer_elem(list, mixer, id)	\
+	for ((list) = (mixer)->id_elems[id]; (list); (list) = (list)->next_id_elem)
+#define mixer_elem_list_to_info(list) \
+	container_of(list, struct usb_mixer_elem_info, head)
+
 struct usb_mixer_elem_info {
 	struct usb_mixer_elem_list head;
 	unsigned int control;	/* CS or ICN (high byte) */
diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index eaa03ac..71069e1 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -485,3 +485,68 @@
 	{ 0 } /* terminator */
 };
 
+/*
+ * Control map entries for UAC3 BADD profiles
+ */
+
+static struct usbmix_name_map uac3_badd_generic_io_map[] = {
+	{ UAC3_BADD_FU_ID2, "Generic Out Playback" },
+	{ UAC3_BADD_FU_ID5, "Generic In Capture" },
+	{ 0 }					/* terminator */
+};
+static struct usbmix_name_map uac3_badd_headphone_map[] = {
+	{ UAC3_BADD_FU_ID2, "Headphone Playback" },
+	{ 0 }					/* terminator */
+};
+static struct usbmix_name_map uac3_badd_speaker_map[] = {
+	{ UAC3_BADD_FU_ID2, "Speaker Playback" },
+	{ 0 }					/* terminator */
+};
+static struct usbmix_name_map uac3_badd_microphone_map[] = {
+	{ UAC3_BADD_FU_ID5, "Mic Capture" },
+	{ 0 }					/* terminator */
+};
+/* Covers also 'headset adapter' profile */
+static struct usbmix_name_map uac3_badd_headset_map[] = {
+	{ UAC3_BADD_FU_ID2, "Headset Playback" },
+	{ UAC3_BADD_FU_ID5, "Headset Capture" },
+	{ UAC3_BADD_FU_ID7, "Sidetone Mixing" },
+	{ 0 }					/* terminator */
+};
+static struct usbmix_name_map uac3_badd_speakerphone_map[] = {
+	{ UAC3_BADD_FU_ID2, "Speaker Playback" },
+	{ UAC3_BADD_FU_ID5, "Mic Capture" },
+	{ 0 }					/* terminator */
+};
+
+static struct usbmix_ctl_map uac3_badd_usbmix_ctl_maps[] = {
+	{
+		.id = UAC3_FUNCTION_SUBCLASS_GENERIC_IO,
+		.map = uac3_badd_generic_io_map,
+	},
+	{
+		.id = UAC3_FUNCTION_SUBCLASS_HEADPHONE,
+		.map = uac3_badd_headphone_map,
+	},
+	{
+		.id = UAC3_FUNCTION_SUBCLASS_SPEAKER,
+		.map = uac3_badd_speaker_map,
+	},
+	{
+		.id = UAC3_FUNCTION_SUBCLASS_MICROPHONE,
+		.map = uac3_badd_microphone_map,
+	},
+	{
+		.id = UAC3_FUNCTION_SUBCLASS_HEADSET,
+		.map = uac3_badd_headset_map,
+	},
+	{
+		.id = UAC3_FUNCTION_SUBCLASS_HEADSET_ADAPTER,
+		.map = uac3_badd_headset_map,
+	},
+	{
+		.id = UAC3_FUNCTION_SUBCLASS_SPEAKERPHONE,
+		.map = uac3_badd_speakerphone_map,
+	},
+	{ 0 } /* terminator */
+};
diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index 56537a1..e82a72f 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -1172,7 +1172,7 @@
 	int unitid = 12; /* SamleRate ExtensionUnit ID */
 
 	list_for_each_entry(mixer, &chip->mixer_list, list) {
-		cval = (struct usb_mixer_elem_info *)mixer->id_elems[unitid];
+		cval = mixer_elem_list_to_info(mixer->id_elems[unitid]);
 		if (cval) {
 			snd_usb_mixer_set_ctl_value(cval, UAC_SET_CUR,
 						    cval->control << 8,
@@ -1799,12 +1799,33 @@
 					  NULL);
 }
 
+static void dell_dock_init_vol(struct snd_usb_audio *chip, int ch, int id)
+{
+	u16 buf = 0;
+
+	snd_usb_ctl_msg(chip->dev, usb_sndctrlpipe(chip->dev, 0), UAC_SET_CUR,
+			USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_OUT,
+			ch, snd_usb_ctrl_intf(chip) | (id << 8),
+			&buf, 2);
+}
+
+static int dell_dock_mixer_init(struct usb_mixer_interface *mixer)
+{
+	/* fix to 0dB playback volumes */
+	dell_dock_init_vol(mixer->chip, 1, 16);
+	dell_dock_init_vol(mixer->chip, 2, 16);
+	dell_dock_init_vol(mixer->chip, 1, 19);
+	dell_dock_init_vol(mixer->chip, 2, 19);
+	return 0;
+}
+
 int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer)
 {
 	int err = 0;
 	struct snd_info_entry *entry;
 
-	if ((err = snd_usb_soundblaster_remote_init(mixer)) < 0)
+	err = snd_usb_soundblaster_remote_init(mixer);
+	if (err < 0)
 		return err;
 
 	switch (mixer->chip->usb_id) {
@@ -1828,8 +1849,6 @@
 	/* EMU0204 */
 	case USB_ID(0x041e, 0x3f19):
 		err = snd_emu0204_controls_create(mixer);
-		if (err < 0)
-			break;
 		break;
 
 	case USB_ID(0x0763, 0x2030): /* M-Audio Fast Track C400 */
@@ -1884,11 +1903,25 @@
 	case USB_ID(0x041e, 0x323b): /* Creative Sound Blaster E1 */
 		err = snd_soundblaster_e1_switch_create(mixer);
 		break;
+	case USB_ID(0x0bda, 0x4014): /* Dell WD15 dock */
+		err = dell_dock_mixer_init(mixer);
+		break;
 	}
 
 	return err;
 }
 
+#ifdef CONFIG_PM
+void snd_usb_mixer_resume_quirk(struct usb_mixer_interface *mixer)
+{
+	switch (mixer->chip->usb_id) {
+	case USB_ID(0x0bda, 0x4014): /* Dell WD15 dock */
+		dell_dock_mixer_init(mixer);
+		break;
+	}
+}
+#endif
+
 void snd_usb_mixer_rc_memory_change(struct usb_mixer_interface *mixer,
 				    int unitid)
 {
diff --git a/sound/usb/mixer_quirks.h b/sound/usb/mixer_quirks.h
index b5abd32..52be26d 100644
--- a/sound/usb/mixer_quirks.h
+++ b/sound/usb/mixer_quirks.h
@@ -14,5 +14,9 @@
 				  struct usb_mixer_elem_info *cval, int unitid,
 				  struct snd_kcontrol *kctl);
 
+#ifdef CONFIG_PM
+void snd_usb_mixer_resume_quirk(struct usb_mixer_interface *mixer);
+#endif
+
 #endif /* SND_USB_MIXER_QUIRKS_H */
 
diff --git a/sound/usb/mixer_scarlett.c b/sound/usb/mixer_scarlett.c
index c33e237..4aeb948 100644
--- a/sound/usb/mixer_scarlett.c
+++ b/sound/usb/mixer_scarlett.c
@@ -287,8 +287,7 @@
 
 static int scarlett_ctl_resume(struct usb_mixer_elem_list *list)
 {
-	struct usb_mixer_elem_info *elem =
-		container_of(list, struct usb_mixer_elem_info, head);
+	struct usb_mixer_elem_info *elem = mixer_elem_list_to_info(list);
 	int i;
 
 	for (i = 0; i < elem->channels; i++)
@@ -447,8 +446,7 @@
 
 static int scarlett_ctl_enum_resume(struct usb_mixer_elem_list *list)
 {
-	struct usb_mixer_elem_info *elem =
-		container_of(list, struct usb_mixer_elem_info, head);
+	struct usb_mixer_elem_info *elem = mixer_elem_list_to_info(list);
 
 	if (elem->cached)
 		snd_usb_set_cur_mix_value(elem, 0, 0, *elem->cache_val);
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 3cbfae6..78d1cad 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -76,10 +76,9 @@
  */
 static snd_pcm_uframes_t snd_usb_pcm_pointer(struct snd_pcm_substream *substream)
 {
-	struct snd_usb_substream *subs;
+	struct snd_usb_substream *subs = substream->runtime->private_data;
 	unsigned int hwptr_done;
 
-	subs = (struct snd_usb_substream *)substream->runtime->private_data;
 	if (atomic_read(&subs->stream->chip->shutdown))
 		return SNDRV_PCM_POS_XRUN;
 	spin_lock(&subs->lock);
@@ -164,10 +163,11 @@
 	ep = get_endpoint(alts, 0)->bEndpointAddress;
 
 	data[0] = 1;
-	if ((err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC_SET_CUR,
-				   USB_TYPE_CLASS|USB_RECIP_ENDPOINT|USB_DIR_OUT,
-				   UAC_EP_CS_ATTR_PITCH_CONTROL << 8, ep,
-				   data, sizeof(data))) < 0) {
+	err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC_SET_CUR,
+			      USB_TYPE_CLASS|USB_RECIP_ENDPOINT|USB_DIR_OUT,
+			      UAC_EP_CS_ATTR_PITCH_CONTROL << 8, ep,
+			      data, sizeof(data));
+	if (err < 0) {
 		usb_audio_err(chip, "%d:%d: cannot set enable PITCH\n",
 			      iface, ep);
 		return err;
@@ -185,10 +185,11 @@
 	int err;
 
 	data[0] = 1;
-	if ((err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC2_CS_CUR,
-				   USB_TYPE_CLASS | USB_RECIP_ENDPOINT | USB_DIR_OUT,
-				   UAC2_EP_CS_PITCH << 8, 0,
-				   data, sizeof(data))) < 0) {
+	err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC2_CS_CUR,
+			      USB_TYPE_CLASS | USB_RECIP_ENDPOINT | USB_DIR_OUT,
+			      UAC2_EP_CS_PITCH << 8, 0,
+			      data, sizeof(data));
+	if (err < 0) {
 		usb_audio_err(chip, "%d:%d: cannot set enable PITCH (v2)\n",
 			      iface, fmt->altsetting);
 		return err;
@@ -321,6 +322,7 @@
 	struct usb_host_interface *alts;
 	struct usb_interface *iface;
 	unsigned int ep;
+	unsigned int ifnum;
 
 	/* Implicit feedback sync EPs consumers are always playback EPs */
 	if (subs->direction != SNDRV_PCM_STREAM_PLAYBACK)
@@ -330,44 +332,27 @@
 	case USB_ID(0x0763, 0x2030): /* M-Audio Fast Track C400 */
 	case USB_ID(0x0763, 0x2031): /* M-Audio Fast Track C600 */
 		ep = 0x81;
-		iface = usb_ifnum_to_if(dev, 3);
-
-		if (!iface || iface->num_altsetting == 0)
-			return -EINVAL;
-
-		alts = &iface->altsetting[1];
-		goto add_sync_ep;
-		break;
+		ifnum = 3;
+		goto add_sync_ep_from_ifnum;
 	case USB_ID(0x0763, 0x2080): /* M-Audio FastTrack Ultra */
 	case USB_ID(0x0763, 0x2081):
 		ep = 0x81;
-		iface = usb_ifnum_to_if(dev, 2);
-
-		if (!iface || iface->num_altsetting == 0)
-			return -EINVAL;
-
-		alts = &iface->altsetting[1];
-		goto add_sync_ep;
-	case USB_ID(0x2466, 0x8003):
+		ifnum = 2;
+		goto add_sync_ep_from_ifnum;
+	case USB_ID(0x2466, 0x8003): /* Fractal Audio Axe-Fx II */
 		ep = 0x86;
-		iface = usb_ifnum_to_if(dev, 2);
-
-		if (!iface || iface->num_altsetting == 0)
-			return -EINVAL;
-
-		alts = &iface->altsetting[1];
-		goto add_sync_ep;
-	case USB_ID(0x1397, 0x0002):
+		ifnum = 2;
+		goto add_sync_ep_from_ifnum;
+	case USB_ID(0x2466, 0x8010): /* Fractal Audio Axe-Fx III */
 		ep = 0x81;
-		iface = usb_ifnum_to_if(dev, 1);
-
-		if (!iface || iface->num_altsetting == 0)
-			return -EINVAL;
-
-		alts = &iface->altsetting[1];
-		goto add_sync_ep;
-
+		ifnum = 2;
+		goto add_sync_ep_from_ifnum;
+	case USB_ID(0x1397, 0x0002): /* Behringer UFX1204 */
+		ep = 0x81;
+		ifnum = 1;
+		goto add_sync_ep_from_ifnum;
 	}
+
 	if (attr == USB_ENDPOINT_SYNC_ASYNC &&
 	    altsd->bInterfaceClass == USB_CLASS_VENDOR_SPEC &&
 	    altsd->bInterfaceProtocol == 2 &&
@@ -382,6 +367,14 @@
 	/* No quirk */
 	return 0;
 
+add_sync_ep_from_ifnum:
+	iface = usb_ifnum_to_if(dev, ifnum);
+
+	if (!iface || iface->num_altsetting == 0)
+		return -EINVAL;
+
+	alts = &iface->altsetting[1];
+
 add_sync_ep:
 	subs->sync_endpoint = snd_usb_add_endpoint(subs->stream->chip,
 						   alts, ep, !subs->direction,
@@ -507,7 +500,7 @@
 	iface = usb_ifnum_to_if(dev, fmt->iface);
 	if (WARN_ON(!iface))
 		return -EINVAL;
-	alts = &iface->altsetting[fmt->altset_idx];
+	alts = usb_altnum_to_altsetting(iface, fmt->altsetting);
 	altsd = get_iface_desc(alts);
 	if (WARN_ON(altsd->bAlternateSetting != fmt->altsetting))
 		return -EINVAL;
@@ -517,21 +510,21 @@
 
 	/* close the old interface */
 	if (subs->interface >= 0 && subs->interface != fmt->iface) {
-		err = usb_set_interface(subs->dev, subs->interface, 0);
-		if (err < 0) {
-			dev_err(&dev->dev,
-				"%d:%d: return to setting 0 failed (%d)\n",
-				fmt->iface, fmt->altsetting, err);
-			return -EIO;
+		if (!subs->stream->chip->keep_iface) {
+			err = usb_set_interface(subs->dev, subs->interface, 0);
+			if (err < 0) {
+				dev_err(&dev->dev,
+					"%d:%d: return to setting 0 failed (%d)\n",
+					fmt->iface, fmt->altsetting, err);
+				return -EIO;
+			}
 		}
 		subs->interface = -1;
 		subs->altset_idx = 0;
 	}
 
 	/* set interface */
-	if (subs->interface != fmt->iface ||
-	    subs->altset_idx != fmt->altset_idx) {
-
+	if (iface->cur_altsetting != alts) {
 		err = snd_usb_select_mode_quirk(subs, fmt);
 		if (err < 0)
 			return -EIO;
@@ -545,12 +538,11 @@
 		}
 		dev_dbg(&dev->dev, "setting usb interface %d:%d\n",
 			fmt->iface, fmt->altsetting);
-		subs->interface = fmt->iface;
-		subs->altset_idx = fmt->altset_idx;
-
 		snd_usb_set_interface_quirk(dev);
 	}
 
+	subs->interface = fmt->iface;
+	subs->altset_idx = fmt->altset_idx;
 	subs->data_endpoint = snd_usb_add_endpoint(subs->stream->chip,
 						   alts, fmt->endpoint, subs->direction,
 						   SND_USB_ENDPOINT_TYPE_DATA);
@@ -736,7 +728,11 @@
 	struct audioformat *fmt;
 	int ret;
 
-	ret = snd_pcm_lib_alloc_vmalloc_buffer(substream,
+	if (snd_usb_use_vmalloc)
+		ret = snd_pcm_lib_alloc_vmalloc_buffer(substream,
+						       params_buffer_bytes(hw_params));
+	else
+		ret = snd_pcm_lib_malloc_pages(substream,
 					       params_buffer_bytes(hw_params));
 	if (ret < 0)
 		return ret;
@@ -789,7 +785,11 @@
 		snd_usb_endpoint_deactivate(subs->data_endpoint);
 		snd_usb_unlock_shutdown(subs->stream->chip);
 	}
-	return snd_pcm_lib_free_vmalloc_buffer(substream);
+
+	if (snd_usb_use_vmalloc)
+		return snd_pcm_lib_free_vmalloc_buffer(substream);
+	else
+		return snd_pcm_lib_free_pages(substream);
 }
 
 /*
@@ -1181,9 +1181,6 @@
 		pt = 125 * (1 << fp->datainterval);
 		ptmin = min(ptmin, pt);
 	}
-	err = snd_usb_autoresume(subs->stream->chip);
-	if (err < 0)
-		return err;
 
 	param_period_time_if_needed = SNDRV_PCM_HW_PARAM_PERIOD_TIME;
 	if (subs->speed == USB_SPEED_FULL)
@@ -1192,30 +1189,37 @@
 	if (ptmin == 1000)
 		/* if period time doesn't go below 1 ms, no rules needed */
 		param_period_time_if_needed = -1;
-	snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIOD_TIME,
-				     ptmin, UINT_MAX);
 
-	if ((err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE,
-				       hw_rule_rate, subs,
-				       SNDRV_PCM_HW_PARAM_FORMAT,
-				       SNDRV_PCM_HW_PARAM_CHANNELS,
-				       param_period_time_if_needed,
-				       -1)) < 0)
-		goto rep_err;
-	if ((err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS,
-				       hw_rule_channels, subs,
-				       SNDRV_PCM_HW_PARAM_FORMAT,
-				       SNDRV_PCM_HW_PARAM_RATE,
-				       param_period_time_if_needed,
-				       -1)) < 0)
-		goto rep_err;
-	if ((err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT,
-				       hw_rule_format, subs,
-				       SNDRV_PCM_HW_PARAM_RATE,
-				       SNDRV_PCM_HW_PARAM_CHANNELS,
-				       param_period_time_if_needed,
-				       -1)) < 0)
-		goto rep_err;
+	err = snd_pcm_hw_constraint_minmax(runtime,
+					   SNDRV_PCM_HW_PARAM_PERIOD_TIME,
+					   ptmin, UINT_MAX);
+	if (err < 0)
+		return err;
+
+	err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE,
+				  hw_rule_rate, subs,
+				  SNDRV_PCM_HW_PARAM_FORMAT,
+				  SNDRV_PCM_HW_PARAM_CHANNELS,
+				  param_period_time_if_needed,
+				  -1);
+	if (err < 0)
+		return err;
+	err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS,
+				  hw_rule_channels, subs,
+				  SNDRV_PCM_HW_PARAM_FORMAT,
+				  SNDRV_PCM_HW_PARAM_RATE,
+				  param_period_time_if_needed,
+				  -1);
+	if (err < 0)
+		return err;
+	err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT,
+				  hw_rule_format, subs,
+				  SNDRV_PCM_HW_PARAM_RATE,
+				  SNDRV_PCM_HW_PARAM_CHANNELS,
+				  param_period_time_if_needed,
+				  -1);
+	if (err < 0)
+		return err;
 	if (param_period_time_if_needed >= 0) {
 		err = snd_pcm_hw_rule_add(runtime, 0,
 					  SNDRV_PCM_HW_PARAM_PERIOD_TIME,
@@ -1225,19 +1229,18 @@
 					  SNDRV_PCM_HW_PARAM_RATE,
 					  -1);
 		if (err < 0)
-			goto rep_err;
+			return err;
 	}
-	if ((err = snd_usb_pcm_check_knot(runtime, subs)) < 0)
-		goto rep_err;
-	return 0;
+	err = snd_usb_pcm_check_knot(runtime, subs);
+	if (err < 0)
+		return err;
 
-rep_err:
-	snd_usb_autosuspend(subs->stream->chip);
-	return err;
+	return snd_usb_autoresume(subs->stream->chip);
 }
 
-static int snd_usb_pcm_open(struct snd_pcm_substream *substream, int direction)
+static int snd_usb_pcm_open(struct snd_pcm_substream *substream)
 {
+	int direction = substream->stream;
 	struct snd_usb_stream *as = snd_pcm_substream_chip(substream);
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	struct snd_usb_substream *subs = &as->substream[direction];
@@ -1257,14 +1260,16 @@
 	return setup_hw_info(runtime, subs);
 }
 
-static int snd_usb_pcm_close(struct snd_pcm_substream *substream, int direction)
+static int snd_usb_pcm_close(struct snd_pcm_substream *substream)
 {
+	int direction = substream->stream;
 	struct snd_usb_stream *as = snd_pcm_substream_chip(substream);
 	struct snd_usb_substream *subs = &as->substream[direction];
 
 	stop_endpoints(subs, true);
 
-	if (subs->interface >= 0 &&
+	if (!as->chip->keep_iface &&
+	    subs->interface >= 0 &&
 	    !snd_usb_lock_shutdown(subs->stream->chip)) {
 		usb_set_interface(subs->dev, subs->interface, 0);
 		subs->interface = -1;
@@ -1311,7 +1316,7 @@
 		if (bytes % (runtime->sample_bits >> 3) != 0) {
 			int oldbytes = bytes;
 			bytes = frames * stride;
-			dev_warn(&subs->dev->dev,
+			dev_warn_ratelimited(&subs->dev->dev,
 				 "Corrected urb data len. %d->%d\n",
 							oldbytes, bytes);
 		}
@@ -1619,26 +1624,6 @@
 	spin_unlock_irqrestore(&subs->lock, flags);
 }
 
-static int snd_usb_playback_open(struct snd_pcm_substream *substream)
-{
-	return snd_usb_pcm_open(substream, SNDRV_PCM_STREAM_PLAYBACK);
-}
-
-static int snd_usb_playback_close(struct snd_pcm_substream *substream)
-{
-	return snd_usb_pcm_close(substream, SNDRV_PCM_STREAM_PLAYBACK);
-}
-
-static int snd_usb_capture_open(struct snd_pcm_substream *substream)
-{
-	return snd_usb_pcm_open(substream, SNDRV_PCM_STREAM_CAPTURE);
-}
-
-static int snd_usb_capture_close(struct snd_pcm_substream *substream)
-{
-	return snd_usb_pcm_close(substream, SNDRV_PCM_STREAM_CAPTURE);
-}
-
 static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substream,
 					      int cmd)
 {
@@ -1700,8 +1685,8 @@
 }
 
 static const struct snd_pcm_ops snd_usb_playback_ops = {
-	.open =		snd_usb_playback_open,
-	.close =	snd_usb_playback_close,
+	.open =		snd_usb_pcm_open,
+	.close =	snd_usb_pcm_close,
 	.ioctl =	snd_pcm_lib_ioctl,
 	.hw_params =	snd_usb_hw_params,
 	.hw_free =	snd_usb_hw_free,
@@ -1713,8 +1698,8 @@
 };
 
 static const struct snd_pcm_ops snd_usb_capture_ops = {
-	.open =		snd_usb_capture_open,
-	.close =	snd_usb_capture_close,
+	.open =		snd_usb_pcm_open,
+	.close =	snd_usb_pcm_close,
 	.ioctl =	snd_pcm_lib_ioctl,
 	.hw_params =	snd_usb_hw_params,
 	.hw_free =	snd_usb_hw_free,
@@ -1725,9 +1710,50 @@
 	.mmap =		snd_pcm_lib_mmap_vmalloc,
 };
 
+static const struct snd_pcm_ops snd_usb_playback_dev_ops = {
+	.open =		snd_usb_pcm_open,
+	.close =	snd_usb_pcm_close,
+	.ioctl =	snd_pcm_lib_ioctl,
+	.hw_params =	snd_usb_hw_params,
+	.hw_free =	snd_usb_hw_free,
+	.prepare =	snd_usb_pcm_prepare,
+	.trigger =	snd_usb_substream_playback_trigger,
+	.pointer =	snd_usb_pcm_pointer,
+	.page =		snd_pcm_sgbuf_ops_page,
+};
+
+static const struct snd_pcm_ops snd_usb_capture_dev_ops = {
+	.open =		snd_usb_pcm_open,
+	.close =	snd_usb_pcm_close,
+	.ioctl =	snd_pcm_lib_ioctl,
+	.hw_params =	snd_usb_hw_params,
+	.hw_free =	snd_usb_hw_free,
+	.prepare =	snd_usb_pcm_prepare,
+	.trigger =	snd_usb_substream_capture_trigger,
+	.pointer =	snd_usb_pcm_pointer,
+	.page =		snd_pcm_sgbuf_ops_page,
+};
+
 void snd_usb_set_pcm_ops(struct snd_pcm *pcm, int stream)
 {
-	snd_pcm_set_ops(pcm, stream,
-			stream == SNDRV_PCM_STREAM_PLAYBACK ?
-			&snd_usb_playback_ops : &snd_usb_capture_ops);
+	const struct snd_pcm_ops *ops;
+
+	if (snd_usb_use_vmalloc)
+		ops = stream == SNDRV_PCM_STREAM_PLAYBACK ?
+			&snd_usb_playback_ops : &snd_usb_capture_ops;
+	else
+		ops = stream == SNDRV_PCM_STREAM_PLAYBACK ?
+			&snd_usb_playback_dev_ops : &snd_usb_capture_dev_ops;
+	snd_pcm_set_ops(pcm, stream, ops);
+}
+
+void snd_usb_preallocate_buffer(struct snd_usb_substream *subs)
+{
+	struct snd_pcm *pcm = subs->stream->pcm;
+	struct snd_pcm_substream *s = pcm->streams[subs->direction].substream;
+	struct device *dev = subs->dev->bus->controller;
+
+	if (!snd_usb_use_vmalloc)
+		snd_pcm_lib_preallocate_pages(s, SNDRV_DMA_TYPE_DEV_SG,
+					      dev, 64*1024, 512*1024);
 }
diff --git a/sound/usb/pcm.h b/sound/usb/pcm.h
index 35740d5..f77ec58 100644
--- a/sound/usb/pcm.h
+++ b/sound/usb/pcm.h
@@ -10,6 +10,7 @@
 int snd_usb_init_pitch(struct snd_usb_audio *chip, int iface,
 		       struct usb_host_interface *alts,
 		       struct audioformat *fmt);
+void snd_usb_preallocate_buffer(struct snd_usb_substream *subs);
 
 
 #endif /* __USBAUDIO_PCM_H */
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 754e632..0e37e35 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -3371,5 +3371,15 @@
 		}
 	}
 },
+/* Dell WD15 Dock */
+{
+	USB_DEVICE(0x0bda, 0x4014),
+	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+		.vendor_name = "Dell",
+		.product_name = "WD15 Dock",
+		.profile_name = "Dell-WD15-Dock",
+		.ifnum = QUIRK_NO_INTERFACE
+	}
+},
 
 #undef USB_DEVICE_VENDOR_SPEC
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index acbeb52..f4b6917 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -851,6 +851,36 @@
 	return 0; /* Successful boot */
 }
 
+static int snd_usb_axefx3_boot_quirk(struct usb_device *dev)
+{
+	int err;
+
+	dev_dbg(&dev->dev, "Waiting for Axe-Fx III to boot up...\n");
+
+	/* If the Axe-Fx III has not fully booted, it will timeout when trying
+	 * to enable the audio streaming interface. A more generous timeout is
+	 * used here to detect when the Axe-Fx III has finished booting as the
+	 * set interface message will be acked once it has
+	 */
+	err = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
+				USB_REQ_SET_INTERFACE, USB_RECIP_INTERFACE,
+				1, 1, NULL, 0, 120000);
+	if (err < 0) {
+		dev_err(&dev->dev,
+			"failed waiting for Axe-Fx III to boot: %d\n", err);
+		return err;
+	}
+
+	dev_dbg(&dev->dev, "Axe-Fx III is now ready\n");
+
+	err = usb_set_interface(dev, 1, 0);
+	if (err < 0)
+		dev_dbg(&dev->dev,
+			"error stopping Axe-Fx III interface: %d\n", err);
+
+	return 0;
+}
+
 /*
  * Setup quirks
  */
@@ -1026,6 +1056,8 @@
 		return snd_usb_fasttrackpro_boot_quirk(dev);
 	case USB_ID(0x047f, 0xc010): /* Plantronics Gamecom 780 */
 		return snd_usb_gamecon780_boot_quirk(dev);
+	case USB_ID(0x2466, 0x8010): /* Fractal Audio Axe-Fx 3 */
+		return snd_usb_axefx3_boot_quirk(dev);
 	}
 
 	return 0;
@@ -1327,20 +1359,47 @@
 
 	/* XMOS based USB DACs */
 	switch (chip->usb_id) {
+	case USB_ID(0x1511, 0x0037): /* AURALiC VEGA */
+	case USB_ID(0x20b1, 0x0002): /* Wyred 4 Sound DAC-2 DSD */
+	case USB_ID(0x20b1, 0x2004): /* Matrix Audio X-SPDIF 2 */
 	case USB_ID(0x20b1, 0x3008): /* iFi Audio micro/nano iDSD */
 	case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */
 	case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */
 	case USB_ID(0x22d9, 0x0416): /* OPPO HA-1 */
+	case USB_ID(0x22d9, 0x0436): /* OPPO Sonica */
+	case USB_ID(0x22d9, 0x0461): /* OPPO UDP-205 */
+	case USB_ID(0x2522, 0x0012): /* LH Labs VI DAC Infinity */
+	case USB_ID(0x25ce, 0x001f): /* Mytek Brooklyn DAC */
+	case USB_ID(0x25ce, 0x0021): /* Mytek Manhattan DAC */
+	case USB_ID(0x25ce, 0x8025): /* Mytek Brooklyn DAC+ */
 	case USB_ID(0x2772, 0x0230): /* Pro-Ject Pre Box S2 Digital */
 		if (fp->altsetting == 2)
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 		break;
 
+	case USB_ID(0x0d8c, 0x0316): /* Hegel HD12 DSD */
+	case USB_ID(0x16b0, 0x06b2): /* NuPrime DAC-10 */
+	case USB_ID(0x16d0, 0x0733): /* Furutech ADL Stratos */
+	case USB_ID(0x16d0, 0x09db): /* NuPrime Audio DAC-9 */
+	case USB_ID(0x1db5, 0x0003): /* Bryston BDA3 */
 	case USB_ID(0x20b1, 0x000a): /* Gustard DAC-X20U */
+	case USB_ID(0x20b1, 0x2005): /* Denafrips Ares DAC */
 	case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */
 	case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */
+	case USB_ID(0x20b1, 0x3021): /* Eastern El. MiniMax Tube DAC Supreme */
 	case USB_ID(0x20b1, 0x3023): /* Aune X1S 32BIT/384 DSD DAC */
+	case USB_ID(0x20b1, 0x302d): /* Unison Research Unico CD Due */
+	case USB_ID(0x20b1, 0x3036): /* Holo Springs Level 3 R2R DAC */
+	case USB_ID(0x20b1, 0x307b): /* CH Precision C1 DAC */
+	case USB_ID(0x20b1, 0x3086): /* Singxer F-1 converter board */
+	case USB_ID(0x22d9, 0x0426): /* OPPO HA-2 */
+	case USB_ID(0x22e1, 0xca01): /* HDTA Serenade DSD */
+	case USB_ID(0x249c, 0x9326): /* M2Tech Young MkIII */
 	case USB_ID(0x2616, 0x0106): /* PS Audio NuWave DAC */
+	case USB_ID(0x2622, 0x0041): /* Audiolab M-DAC+ */
+	case USB_ID(0x27f7, 0x3002): /* W4S DAC-2v2SE */
+	case USB_ID(0x29a2, 0x0086): /* Mutec MC3+ USB */
+	case USB_ID(0x6b42, 0x0042): /* MSB Technology */
 		if (fp->altsetting == 3)
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 		break;
diff --git a/sound/usb/stream.c b/sound/usb/stream.c
index 5ed3345..729afd8 100644
--- a/sound/usb/stream.c
+++ b/sound/usb/stream.c
@@ -106,6 +106,8 @@
 	subs->ep_num = fp->endpoint;
 	if (fp->channels > subs->channels_max)
 		subs->channels_max = fp->channels;
+
+	snd_usb_preallocate_buffer(subs);
 }
 
 /* kctl callbacks for usb-audio channel maps */
@@ -633,6 +635,395 @@
 	return NULL;
 }
 
+static struct audioformat *
+audio_format_alloc_init(struct snd_usb_audio *chip,
+		       struct usb_host_interface *alts,
+		       int protocol, int iface_no, int altset_idx,
+		       int altno, int num_channels, int clock)
+{
+	struct audioformat *fp;
+
+	fp = kzalloc(sizeof(*fp), GFP_KERNEL);
+	if (!fp)
+		return NULL;
+
+	fp->iface = iface_no;
+	fp->altsetting = altno;
+	fp->altset_idx = altset_idx;
+	fp->endpoint = get_endpoint(alts, 0)->bEndpointAddress;
+	fp->ep_attr = get_endpoint(alts, 0)->bmAttributes;
+	fp->datainterval = snd_usb_parse_datainterval(chip, alts);
+	fp->protocol = protocol;
+	fp->maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize);
+	fp->channels = num_channels;
+	if (snd_usb_get_speed(chip->dev) == USB_SPEED_HIGH)
+		fp->maxpacksize = (((fp->maxpacksize >> 11) & 3) + 1)
+				* (fp->maxpacksize & 0x7ff);
+	fp->clock = clock;
+	INIT_LIST_HEAD(&fp->list);
+
+	return fp;
+}
+
+static struct audioformat *
+snd_usb_get_audioformat_uac12(struct snd_usb_audio *chip,
+			      struct usb_host_interface *alts,
+			      int protocol, int iface_no, int altset_idx,
+			      int altno, int stream, int bm_quirk)
+{
+	struct usb_device *dev = chip->dev;
+	struct uac_format_type_i_continuous_descriptor *fmt;
+	unsigned int num_channels = 0, chconfig = 0;
+	struct audioformat *fp;
+	int clock = 0;
+	u64 format;
+
+	/* get audio formats */
+	if (protocol == UAC_VERSION_1) {
+		struct uac1_as_header_descriptor *as =
+			snd_usb_find_csint_desc(alts->extra, alts->extralen,
+						NULL, UAC_AS_GENERAL);
+		struct uac_input_terminal_descriptor *iterm;
+
+		if (!as) {
+			dev_err(&dev->dev,
+				"%u:%d : UAC_AS_GENERAL descriptor not found\n",
+				iface_no, altno);
+			return NULL;
+		}
+
+		if (as->bLength < sizeof(*as)) {
+			dev_err(&dev->dev,
+				"%u:%d : invalid UAC_AS_GENERAL desc\n",
+				iface_no, altno);
+			return NULL;
+		}
+
+		format = le16_to_cpu(as->wFormatTag); /* remember the format value */
+
+		iterm = snd_usb_find_input_terminal_descriptor(chip->ctrl_intf,
+							     as->bTerminalLink);
+		if (iterm) {
+			num_channels = iterm->bNrChannels;
+			chconfig = le16_to_cpu(iterm->wChannelConfig);
+		}
+	} else { /* UAC_VERSION_2 */
+		struct uac2_input_terminal_descriptor *input_term;
+		struct uac2_output_terminal_descriptor *output_term;
+		struct uac2_as_header_descriptor *as =
+			snd_usb_find_csint_desc(alts->extra, alts->extralen,
+						NULL, UAC_AS_GENERAL);
+
+		if (!as) {
+			dev_err(&dev->dev,
+				"%u:%d : UAC_AS_GENERAL descriptor not found\n",
+				iface_no, altno);
+			return NULL;
+		}
+
+		if (as->bLength < sizeof(*as)) {
+			dev_err(&dev->dev,
+				"%u:%d : invalid UAC_AS_GENERAL desc\n",
+				iface_no, altno);
+			return NULL;
+		}
+
+		num_channels = as->bNrChannels;
+		format = le32_to_cpu(as->bmFormats);
+		chconfig = le32_to_cpu(as->bmChannelConfig);
+
+		/*
+		 * lookup the terminal associated to this interface
+		 * to extract the clock
+		 */
+		input_term = snd_usb_find_input_terminal_descriptor(chip->ctrl_intf,
+								    as->bTerminalLink);
+		if (input_term) {
+			clock = input_term->bCSourceID;
+			if (!chconfig && (num_channels == input_term->bNrChannels))
+				chconfig = le32_to_cpu(input_term->bmChannelConfig);
+			goto found_clock;
+		}
+
+		output_term = snd_usb_find_output_terminal_descriptor(chip->ctrl_intf,
+								      as->bTerminalLink);
+		if (output_term) {
+			clock = output_term->bCSourceID;
+			goto found_clock;
+		}
+
+		dev_err(&dev->dev,
+			"%u:%d : bogus bTerminalLink %d\n",
+			iface_no, altno, as->bTerminalLink);
+		return NULL;
+	}
+
+found_clock:
+	/* get format type */
+	fmt = snd_usb_find_csint_desc(alts->extra, alts->extralen,
+				      NULL, UAC_FORMAT_TYPE);
+	if (!fmt) {
+		dev_err(&dev->dev,
+			"%u:%d : no UAC_FORMAT_TYPE desc\n",
+			iface_no, altno);
+		return NULL;
+	}
+	if (((protocol == UAC_VERSION_1) && (fmt->bLength < 8))
+			|| ((protocol == UAC_VERSION_2) &&
+					(fmt->bLength < 6))) {
+		dev_err(&dev->dev,
+			"%u:%d : invalid UAC_FORMAT_TYPE desc\n",
+			iface_no, altno);
+		return NULL;
+	}
+
+	/*
+	 * Blue Microphones workaround: The last altsetting is
+	 * identical with the previous one, except for a larger
+	 * packet size, but is actually a mislabeled two-channel
+	 * setting; ignore it.
+	 *
+	 * Part 2: analyze quirk flag and format
+	 */
+	if (bm_quirk && fmt->bNrChannels == 1 && fmt->bSubframeSize == 2)
+		return NULL;
+
+	fp = audio_format_alloc_init(chip, alts, protocol, iface_no,
+				     altset_idx, altno, num_channels, clock);
+	if (!fp)
+		return ERR_PTR(-ENOMEM);
+
+	fp->attributes = parse_uac_endpoint_attributes(chip, alts, protocol,
+						       iface_no);
+
+	/* some quirks for attributes here */
+	snd_usb_audioformat_attributes_quirk(chip, fp, stream);
+
+	/* ok, let's parse further... */
+	if (snd_usb_parse_audio_format(chip, fp, format,
+					fmt, stream) < 0) {
+		kfree(fp->rate_table);
+		kfree(fp);
+		return NULL;
+	}
+
+	/* Create chmap */
+	if (fp->channels != num_channels)
+		chconfig = 0;
+
+	fp->chmap = convert_chmap(fp->channels, chconfig, protocol);
+
+	return fp;
+}
+
+static struct audioformat *
+snd_usb_get_audioformat_uac3(struct snd_usb_audio *chip,
+			     struct usb_host_interface *alts,
+			     int iface_no, int altset_idx,
+			     int altno, int stream)
+{
+	struct usb_device *dev = chip->dev;
+	struct uac3_input_terminal_descriptor *input_term;
+	struct uac3_output_terminal_descriptor *output_term;
+	struct uac3_cluster_header_descriptor *cluster;
+	struct uac3_as_header_descriptor *as = NULL;
+	struct uac3_hc_descriptor_header hc_header;
+	struct snd_pcm_chmap_elem *chmap;
+	unsigned char badd_profile;
+	u64 badd_formats = 0;
+	unsigned int num_channels;
+	struct audioformat *fp;
+	u16 cluster_id, wLength;
+	int clock = 0;
+	int err;
+
+	badd_profile = chip->badd_profile;
+
+	if (badd_profile >= UAC3_FUNCTION_SUBCLASS_GENERIC_IO) {
+		unsigned int maxpacksize =
+			le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize);
+
+		switch (maxpacksize) {
+		default:
+			dev_err(&dev->dev,
+				"%u:%d : incorrect wMaxPacketSize for BADD profile\n",
+				iface_no, altno);
+			return NULL;
+		case UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_16:
+		case UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_16:
+			badd_formats = SNDRV_PCM_FMTBIT_S16_LE;
+			num_channels = 1;
+			break;
+		case UAC3_BADD_EP_MAXPSIZE_SYNC_MONO_24:
+		case UAC3_BADD_EP_MAXPSIZE_ASYNC_MONO_24:
+			badd_formats = SNDRV_PCM_FMTBIT_S24_3LE;
+			num_channels = 1;
+			break;
+		case UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_16:
+		case UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_16:
+			badd_formats = SNDRV_PCM_FMTBIT_S16_LE;
+			num_channels = 2;
+			break;
+		case UAC3_BADD_EP_MAXPSIZE_SYNC_STEREO_24:
+		case UAC3_BADD_EP_MAXPSIZE_ASYNC_STEREO_24:
+			badd_formats = SNDRV_PCM_FMTBIT_S24_3LE;
+			num_channels = 2;
+			break;
+		}
+
+		chmap = kzalloc(sizeof(*chmap), GFP_KERNEL);
+		if (!chmap)
+			return ERR_PTR(-ENOMEM);
+
+		if (num_channels == 1) {
+			chmap->map[0] = SNDRV_CHMAP_MONO;
+		} else {
+			chmap->map[0] = SNDRV_CHMAP_FL;
+			chmap->map[1] = SNDRV_CHMAP_FR;
+		}
+
+		chmap->channels = num_channels;
+		clock = UAC3_BADD_CS_ID9;
+		goto found_clock;
+	}
+
+	as = snd_usb_find_csint_desc(alts->extra, alts->extralen,
+				     NULL, UAC_AS_GENERAL);
+	if (!as) {
+		dev_err(&dev->dev,
+			"%u:%d : UAC_AS_GENERAL descriptor not found\n",
+			iface_no, altno);
+		return NULL;
+	}
+
+	if (as->bLength < sizeof(*as)) {
+		dev_err(&dev->dev,
+			"%u:%d : invalid UAC_AS_GENERAL desc\n",
+			iface_no, altno);
+		return NULL;
+	}
+
+	cluster_id = le16_to_cpu(as->wClusterDescrID);
+	if (!cluster_id) {
+		dev_err(&dev->dev,
+			"%u:%d : no cluster descriptor\n",
+			iface_no, altno);
+		return NULL;
+	}
+
+	/*
+	 * Get number of channels and channel map through
+	 * High Capability Cluster Descriptor
+	 *
+	 * First step: get High Capability header and
+	 * read size of Cluster Descriptor
+	 */
+	err = snd_usb_ctl_msg(chip->dev,
+			usb_rcvctrlpipe(chip->dev, 0),
+			UAC3_CS_REQ_HIGH_CAPABILITY_DESCRIPTOR,
+			USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
+			cluster_id,
+			snd_usb_ctrl_intf(chip),
+			&hc_header, sizeof(hc_header));
+	if (err < 0)
+		return ERR_PTR(err);
+	else if (err != sizeof(hc_header)) {
+		dev_err(&dev->dev,
+			"%u:%d : can't get High Capability descriptor\n",
+			iface_no, altno);
+		return ERR_PTR(-EIO);
+	}
+
+	/*
+	 * Second step: allocate needed amount of memory
+	 * and request Cluster Descriptor
+	 */
+	wLength = le16_to_cpu(hc_header.wLength);
+	cluster = kzalloc(wLength, GFP_KERNEL);
+	if (!cluster)
+		return ERR_PTR(-ENOMEM);
+	err = snd_usb_ctl_msg(chip->dev,
+			usb_rcvctrlpipe(chip->dev, 0),
+			UAC3_CS_REQ_HIGH_CAPABILITY_DESCRIPTOR,
+			USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
+			cluster_id,
+			snd_usb_ctrl_intf(chip),
+			cluster, wLength);
+	if (err < 0) {
+		kfree(cluster);
+		return ERR_PTR(err);
+	} else if (err != wLength) {
+		dev_err(&dev->dev,
+			"%u:%d : can't get Cluster Descriptor\n",
+			iface_no, altno);
+		kfree(cluster);
+		return ERR_PTR(-EIO);
+	}
+
+	num_channels = cluster->bNrChannels;
+	chmap = convert_chmap_v3(cluster);
+	kfree(cluster);
+
+	/*
+	 * lookup the terminal associated to this interface
+	 * to extract the clock
+	 */
+	input_term = snd_usb_find_input_terminal_descriptor(chip->ctrl_intf,
+							    as->bTerminalLink);
+	if (input_term) {
+		clock = input_term->bCSourceID;
+		goto found_clock;
+	}
+
+	output_term = snd_usb_find_output_terminal_descriptor(chip->ctrl_intf,
+							     as->bTerminalLink);
+	if (output_term) {
+		clock = output_term->bCSourceID;
+		goto found_clock;
+	}
+
+	dev_err(&dev->dev, "%u:%d : bogus bTerminalLink %d\n",
+			iface_no, altno, as->bTerminalLink);
+	kfree(chmap);
+	return NULL;
+
+found_clock:
+	fp = audio_format_alloc_init(chip, alts, UAC_VERSION_3, iface_no,
+				     altset_idx, altno, num_channels, clock);
+	if (!fp) {
+		kfree(chmap);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fp->chmap = chmap;
+
+	if (badd_profile >= UAC3_FUNCTION_SUBCLASS_GENERIC_IO) {
+		fp->attributes = 0; /* No attributes */
+
+		fp->fmt_type = UAC_FORMAT_TYPE_I;
+		fp->formats = badd_formats;
+
+		fp->nr_rates = 0;	/* SNDRV_PCM_RATE_CONTINUOUS */
+		fp->rate_min = UAC3_BADD_SAMPLING_RATE;
+		fp->rate_max = UAC3_BADD_SAMPLING_RATE;
+		fp->rates = SNDRV_PCM_RATE_CONTINUOUS;
+
+	} else {
+		fp->attributes = parse_uac_endpoint_attributes(chip, alts,
+							       UAC_VERSION_3,
+							       iface_no);
+		/* ok, let's parse further... */
+		if (snd_usb_parse_audio_format_v3(chip, fp, as, stream) < 0) {
+			kfree(fp->chmap);
+			kfree(fp->rate_table);
+			kfree(fp);
+			return NULL;
+		}
+	}
+
+	return fp;
+}
+
 int snd_usb_parse_audio_interface(struct snd_usb_audio *chip, int iface_no)
 {
 	struct usb_device *dev;
@@ -640,13 +1031,8 @@
 	struct usb_host_interface *alts;
 	struct usb_interface_descriptor *altsd;
 	int i, altno, err, stream;
-	u64 format = 0;
-	unsigned int num_channels = 0;
 	struct audioformat *fp = NULL;
-	int num, protocol, clock = 0;
-	struct uac_format_type_i_continuous_descriptor *fmt = NULL;
-	struct snd_pcm_chmap_elem *chmap_v3 = NULL;
-	unsigned int chconfig;
+	int num, protocol;
 
 	dev = chip->dev;
 
@@ -695,303 +1081,48 @@
 		    protocol <= 2)
 			protocol = UAC_VERSION_1;
 
-		chconfig = 0;
-		/* get audio formats */
 		switch (protocol) {
 		default:
 			dev_dbg(&dev->dev, "%u:%d: unknown interface protocol %#02x, assuming v1\n",
 				iface_no, altno, protocol);
 			protocol = UAC_VERSION_1;
 			/* fall through */
-
-		case UAC_VERSION_1: {
-			struct uac1_as_header_descriptor *as =
-				snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_AS_GENERAL);
-			struct uac_input_terminal_descriptor *iterm;
-
-			if (!as) {
-				dev_err(&dev->dev,
-					"%u:%d : UAC_AS_GENERAL descriptor not found\n",
-					iface_no, altno);
-				continue;
-			}
-
-			if (as->bLength < sizeof(*as)) {
-				dev_err(&dev->dev,
-					"%u:%d : invalid UAC_AS_GENERAL desc\n",
-					iface_no, altno);
-				continue;
-			}
-
-			format = le16_to_cpu(as->wFormatTag); /* remember the format value */
-
-			iterm = snd_usb_find_input_terminal_descriptor(chip->ctrl_intf,
-								       as->bTerminalLink);
-			if (iterm) {
-				num_channels = iterm->bNrChannels;
-				chconfig = le16_to_cpu(iterm->wChannelConfig);
-			}
-
-			break;
-		}
-
+		case UAC_VERSION_1:
+			/* fall through */
 		case UAC_VERSION_2: {
-			struct uac2_input_terminal_descriptor *input_term;
-			struct uac2_output_terminal_descriptor *output_term;
-			struct uac2_as_header_descriptor *as =
-				snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_AS_GENERAL);
-
-			if (!as) {
-				dev_err(&dev->dev,
-					"%u:%d : UAC_AS_GENERAL descriptor not found\n",
-					iface_no, altno);
-				continue;
-			}
-
-			if (as->bLength < sizeof(*as)) {
-				dev_err(&dev->dev,
-					"%u:%d : invalid UAC_AS_GENERAL desc\n",
-					iface_no, altno);
-				continue;
-			}
-
-			num_channels = as->bNrChannels;
-			format = le32_to_cpu(as->bmFormats);
-			chconfig = le32_to_cpu(as->bmChannelConfig);
-
-			/* lookup the terminal associated to this interface
-			 * to extract the clock */
-			input_term = snd_usb_find_input_terminal_descriptor(chip->ctrl_intf,
-									    as->bTerminalLink);
-			if (input_term) {
-				clock = input_term->bCSourceID;
-				if (!chconfig && (num_channels == input_term->bNrChannels))
-					chconfig = le32_to_cpu(input_term->bmChannelConfig);
-				break;
-			}
-
-			output_term = snd_usb_find_output_terminal_descriptor(chip->ctrl_intf,
-									      as->bTerminalLink);
-			if (output_term) {
-				clock = output_term->bCSourceID;
-				break;
-			}
-
-			dev_err(&dev->dev,
-				"%u:%d : bogus bTerminalLink %d\n",
-				iface_no, altno, as->bTerminalLink);
-			continue;
-		}
-
-		case UAC_VERSION_3: {
-			struct uac3_input_terminal_descriptor *input_term;
-			struct uac3_output_terminal_descriptor *output_term;
-			struct uac3_as_header_descriptor *as;
-			struct uac3_cluster_header_descriptor *cluster;
-			struct uac3_hc_descriptor_header hc_header;
-			u16 cluster_id, wLength;
-
-			as = snd_usb_find_csint_desc(alts->extra,
-							alts->extralen,
-							NULL, UAC_AS_GENERAL);
-
-			if (!as) {
-				dev_err(&dev->dev,
-					"%u:%d : UAC_AS_GENERAL descriptor not found\n",
-					iface_no, altno);
-				continue;
-			}
-
-			if (as->bLength < sizeof(*as)) {
-				dev_err(&dev->dev,
-					"%u:%d : invalid UAC_AS_GENERAL desc\n",
-					iface_no, altno);
-				continue;
-			}
-
-			cluster_id = le16_to_cpu(as->wClusterDescrID);
-			if (!cluster_id) {
-				dev_err(&dev->dev,
-					"%u:%d : no cluster descriptor\n",
-					iface_no, altno);
-				continue;
-			}
-
-			/*
-			 * Get number of channels and channel map through
-			 * High Capability Cluster Descriptor
-			 *
-			 * First step: get High Capability header and
-			 * read size of Cluster Descriptor
-			 */
-			err = snd_usb_ctl_msg(chip->dev,
-					usb_rcvctrlpipe(chip->dev, 0),
-					UAC3_CS_REQ_HIGH_CAPABILITY_DESCRIPTOR,
-					USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
-					cluster_id,
-					snd_usb_ctrl_intf(chip),
-					&hc_header, sizeof(hc_header));
-			if (err < 0)
-				return err;
-			else if (err != sizeof(hc_header)) {
-				dev_err(&dev->dev,
-					"%u:%d : can't get High Capability descriptor\n",
-					iface_no, altno);
-				return -EIO;
-			}
-
-			/*
-			 * Second step: allocate needed amount of memory
-			 * and request Cluster Descriptor
-			 */
-			wLength = le16_to_cpu(hc_header.wLength);
-			cluster = kzalloc(wLength, GFP_KERNEL);
-			if (!cluster)
-				return -ENOMEM;
-			err = snd_usb_ctl_msg(chip->dev,
-					usb_rcvctrlpipe(chip->dev, 0),
-					UAC3_CS_REQ_HIGH_CAPABILITY_DESCRIPTOR,
-					USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
-					cluster_id,
-					snd_usb_ctrl_intf(chip),
-					cluster, wLength);
-			if (err < 0) {
-				kfree(cluster);
-				return err;
-			} else if (err != wLength) {
-				dev_err(&dev->dev,
-					"%u:%d : can't get Cluster Descriptor\n",
-					iface_no, altno);
-				kfree(cluster);
-				return -EIO;
-			}
-
-			num_channels = cluster->bNrChannels;
-			chmap_v3 = convert_chmap_v3(cluster);
-
-			kfree(cluster);
-
-			format = le64_to_cpu(as->bmFormats);
-
-			/* lookup the terminal associated to this interface
-			 * to extract the clock */
-			input_term = snd_usb_find_input_terminal_descriptor(
-							chip->ctrl_intf,
-							as->bTerminalLink);
-
-			if (input_term) {
-				clock = input_term->bCSourceID;
-				break;
-			}
-
-			output_term = snd_usb_find_output_terminal_descriptor(chip->ctrl_intf,
-									      as->bTerminalLink);
-			if (output_term) {
-				clock = output_term->bCSourceID;
-				break;
-			}
-
-			dev_err(&dev->dev,
-				"%u:%d : bogus bTerminalLink %d\n",
-				iface_no, altno, as->bTerminalLink);
-			continue;
-		}
-		}
-
-		if (protocol == UAC_VERSION_1 || protocol == UAC_VERSION_2) {
-			/* get format type */
-			fmt = snd_usb_find_csint_desc(alts->extra,
-						      alts->extralen,
-						      NULL, UAC_FORMAT_TYPE);
-			if (!fmt) {
-				dev_err(&dev->dev,
-					"%u:%d : no UAC_FORMAT_TYPE desc\n",
-					iface_no, altno);
-				continue;
-			}
-			if (((protocol == UAC_VERSION_1) && (fmt->bLength < 8))
-					|| ((protocol == UAC_VERSION_2) &&
-							(fmt->bLength < 6))) {
-				dev_err(&dev->dev,
-					"%u:%d : invalid UAC_FORMAT_TYPE desc\n",
-					iface_no, altno);
-				continue;
-			}
+			int bm_quirk = 0;
 
 			/*
 			 * Blue Microphones workaround: The last altsetting is
 			 * identical with the previous one, except for a larger
 			 * packet size, but is actually a mislabeled two-channel
 			 * setting; ignore it.
+			 *
+			 * Part 1: prepare quirk flag
 			 */
-			if (fmt->bNrChannels == 1 &&
-			    fmt->bSubframeSize == 2 &&
-			    altno == 2 && num == 3 &&
+			if (altno == 2 && num == 3 &&
 			    fp && fp->altsetting == 1 && fp->channels == 1 &&
 			    fp->formats == SNDRV_PCM_FMTBIT_S16_LE &&
 			    protocol == UAC_VERSION_1 &&
 			    le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize) ==
 							fp->maxpacksize * 2)
-				continue;
+				bm_quirk = 1;
+
+			fp = snd_usb_get_audioformat_uac12(chip, alts, protocol,
+							   iface_no, i, altno,
+							   stream, bm_quirk);
+			break;
+		}
+		case UAC_VERSION_3:
+			fp = snd_usb_get_audioformat_uac3(chip, alts,
+						iface_no, i, altno, stream);
+			break;
 		}
 
-		fp = kzalloc(sizeof(*fp), GFP_KERNEL);
 		if (!fp)
-			return -ENOMEM;
-
-		fp->iface = iface_no;
-		fp->altsetting = altno;
-		fp->altset_idx = i;
-		fp->endpoint = get_endpoint(alts, 0)->bEndpointAddress;
-		fp->ep_attr = get_endpoint(alts, 0)->bmAttributes;
-		fp->datainterval = snd_usb_parse_datainterval(chip, alts);
-		fp->protocol = protocol;
-		fp->maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize);
-		fp->channels = num_channels;
-		if (snd_usb_get_speed(dev) == USB_SPEED_HIGH)
-			fp->maxpacksize = (((fp->maxpacksize >> 11) & 3) + 1)
-					* (fp->maxpacksize & 0x7ff);
-		fp->attributes = parse_uac_endpoint_attributes(chip, alts, protocol, iface_no);
-		fp->clock = clock;
-		INIT_LIST_HEAD(&fp->list);
-
-		/* some quirks for attributes here */
-		snd_usb_audioformat_attributes_quirk(chip, fp, stream);
-
-		/* ok, let's parse further... */
-		if (protocol == UAC_VERSION_1 || protocol == UAC_VERSION_2) {
-			if (snd_usb_parse_audio_format(chip, fp, format,
-							fmt, stream) < 0) {
-				kfree(fp->rate_table);
-				kfree(fp);
-				fp = NULL;
-				continue;
-			}
-		} else {
-			struct uac3_as_header_descriptor *as;
-
-			as = snd_usb_find_csint_desc(alts->extra,
-						     alts->extralen,
-						     NULL, UAC_AS_GENERAL);
-
-			if (snd_usb_parse_audio_format_v3(chip, fp, as,
-								stream) < 0) {
-				kfree(fp->rate_table);
-				kfree(fp);
-				fp = NULL;
-				continue;
-			}
-		}
-
-		/* Create chmap */
-		if (fp->channels != num_channels)
-			chconfig = 0;
-
-		if (protocol == UAC_VERSION_3)
-			fp->chmap = chmap_v3;
-		else
-			fp->chmap = convert_chmap(fp->channels, chconfig,
-						  protocol);
+			continue;
+		else if (IS_ERR(fp))
+			return PTR_ERR(fp);
 
 		dev_dbg(&dev->dev, "%u:%d: add audio endpoint %#x\n", iface_no, altno, fp->endpoint);
 		err = snd_usb_add_audio_stream(chip, stream, fp);
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index 4d5c89a..b9faeca 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -49,6 +49,8 @@
 	int num_suspended_intf;
 	int sample_rate_read_error;
 
+	int badd_profile;		/* UAC3 BADD profile */
+
 	struct list_head pcm_list;	/* list of pcm streams */
 	struct list_head ep_list;	/* list of audio-related endpoints */
 	int pcm_devs;
@@ -59,6 +61,9 @@
 
 	int setup;			/* from the 'device_setup' module param */
 	bool autoclock;			/* from the 'autoclock' module param */
+	bool keep_iface;		/* keep interface/altset after closing
+					 * or parameter change
+					 */
 
 	struct usb_host_interface *ctrl_intf;	/* the audio control interface */
 };
@@ -109,6 +114,7 @@
 struct snd_usb_audio_quirk {
 	const char *vendor_name;
 	const char *product_name;
+	const char *profile_name;	/* override the card->longname */
 	int16_t ifnum;
 	uint16_t type;
 	const void *data;
@@ -121,4 +127,6 @@
 int snd_usb_lock_shutdown(struct snd_usb_audio *chip);
 void snd_usb_unlock_shutdown(struct snd_usb_audio *chip);
 
+extern bool snd_usb_use_vmalloc;
+
 #endif /* __USBAUDIO_H */
diff --git a/sound/xen/Kconfig b/sound/xen/Kconfig
new file mode 100644
index 0000000..4f1fcee
--- /dev/null
+++ b/sound/xen/Kconfig
@@ -0,0 +1,10 @@
+# ALSA Xen drivers
+
+config SND_XEN_FRONTEND
+	tristate "Xen para-virtualized sound frontend driver"
+	depends on XEN
+	select SND_PCM
+	select XEN_XENBUS_FRONTEND
+	help
+	  Choose this option if you want to enable a para-virtualized
+	  frontend sound driver for Xen guest OSes.
diff --git a/sound/xen/Makefile b/sound/xen/Makefile
new file mode 100644
index 0000000..1e6470e
--- /dev/null
+++ b/sound/xen/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0 OR MIT
+
+snd_xen_front-objs := xen_snd_front.o \
+		      xen_snd_front_cfg.o \
+		      xen_snd_front_evtchnl.o \
+		      xen_snd_front_shbuf.o \
+		      xen_snd_front_alsa.o
+
+obj-$(CONFIG_SND_XEN_FRONTEND) += snd_xen_front.o
diff --git a/sound/xen/xen_snd_front.c b/sound/xen/xen_snd_front.c
new file mode 100644
index 0000000..b089b13
--- /dev/null
+++ b/sound/xen/xen_snd_front.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/*
+ * Xen para-virtual sound device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+
+#include <xen/page.h>
+#include <xen/platform_pci.h>
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+
+#include <xen/interface/io/sndif.h>
+
+#include "xen_snd_front.h"
+#include "xen_snd_front_alsa.h"
+#include "xen_snd_front_evtchnl.h"
+#include "xen_snd_front_shbuf.h"
+
+static struct xensnd_req *
+be_stream_prepare_req(struct xen_snd_front_evtchnl *evtchnl, u8 operation)
+{
+	struct xensnd_req *req;
+
+	req = RING_GET_REQUEST(&evtchnl->u.req.ring,
+			       evtchnl->u.req.ring.req_prod_pvt);
+	req->operation = operation;
+	req->id = evtchnl->evt_next_id++;
+	evtchnl->evt_id = req->id;
+	return req;
+}
+
+static int be_stream_do_io(struct xen_snd_front_evtchnl *evtchnl)
+{
+	if (unlikely(evtchnl->state != EVTCHNL_STATE_CONNECTED))
+		return -EIO;
+
+	reinit_completion(&evtchnl->u.req.completion);
+	xen_snd_front_evtchnl_flush(evtchnl);
+	return 0;
+}
+
+static int be_stream_wait_io(struct xen_snd_front_evtchnl *evtchnl)
+{
+	if (wait_for_completion_timeout(&evtchnl->u.req.completion,
+			msecs_to_jiffies(VSND_WAIT_BACK_MS)) <= 0)
+		return -ETIMEDOUT;
+
+	return evtchnl->u.req.resp_status;
+}
+
+int xen_snd_front_stream_query_hw_param(struct xen_snd_front_evtchnl *evtchnl,
+					struct xensnd_query_hw_param *hw_param_req,
+					struct xensnd_query_hw_param *hw_param_resp)
+{
+	struct xensnd_req *req;
+	int ret;
+
+	mutex_lock(&evtchnl->u.req.req_io_lock);
+
+	mutex_lock(&evtchnl->ring_io_lock);
+	req = be_stream_prepare_req(evtchnl, XENSND_OP_HW_PARAM_QUERY);
+	req->op.hw_param = *hw_param_req;
+	mutex_unlock(&evtchnl->ring_io_lock);
+
+	ret = be_stream_do_io(evtchnl);
+
+	if (ret == 0)
+		ret = be_stream_wait_io(evtchnl);
+
+	if (ret == 0)
+		*hw_param_resp = evtchnl->u.req.resp.hw_param;
+
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	return ret;
+}
+
+int xen_snd_front_stream_prepare(struct xen_snd_front_evtchnl *evtchnl,
+				 struct xen_snd_front_shbuf *sh_buf,
+				 u8 format, unsigned int channels,
+				 unsigned int rate, u32 buffer_sz,
+				 u32 period_sz)
+{
+	struct xensnd_req *req;
+	int ret;
+
+	mutex_lock(&evtchnl->u.req.req_io_lock);
+
+	mutex_lock(&evtchnl->ring_io_lock);
+	req = be_stream_prepare_req(evtchnl, XENSND_OP_OPEN);
+	req->op.open.pcm_format = format;
+	req->op.open.pcm_channels = channels;
+	req->op.open.pcm_rate = rate;
+	req->op.open.buffer_sz = buffer_sz;
+	req->op.open.period_sz = period_sz;
+	req->op.open.gref_directory = xen_snd_front_shbuf_get_dir_start(sh_buf);
+	mutex_unlock(&evtchnl->ring_io_lock);
+
+	ret = be_stream_do_io(evtchnl);
+
+	if (ret == 0)
+		ret = be_stream_wait_io(evtchnl);
+
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	return ret;
+}
+
+int xen_snd_front_stream_close(struct xen_snd_front_evtchnl *evtchnl)
+{
+	struct xensnd_req *req;
+	int ret;
+
+	mutex_lock(&evtchnl->u.req.req_io_lock);
+
+	mutex_lock(&evtchnl->ring_io_lock);
+	req = be_stream_prepare_req(evtchnl, XENSND_OP_CLOSE);
+	mutex_unlock(&evtchnl->ring_io_lock);
+
+	ret = be_stream_do_io(evtchnl);
+
+	if (ret == 0)
+		ret = be_stream_wait_io(evtchnl);
+
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	return ret;
+}
+
+int xen_snd_front_stream_write(struct xen_snd_front_evtchnl *evtchnl,
+			       unsigned long pos, unsigned long count)
+{
+	struct xensnd_req *req;
+	int ret;
+
+	mutex_lock(&evtchnl->u.req.req_io_lock);
+
+	mutex_lock(&evtchnl->ring_io_lock);
+	req = be_stream_prepare_req(evtchnl, XENSND_OP_WRITE);
+	req->op.rw.length = count;
+	req->op.rw.offset = pos;
+	mutex_unlock(&evtchnl->ring_io_lock);
+
+	ret = be_stream_do_io(evtchnl);
+
+	if (ret == 0)
+		ret = be_stream_wait_io(evtchnl);
+
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	return ret;
+}
+
+int xen_snd_front_stream_read(struct xen_snd_front_evtchnl *evtchnl,
+			      unsigned long pos, unsigned long count)
+{
+	struct xensnd_req *req;
+	int ret;
+
+	mutex_lock(&evtchnl->u.req.req_io_lock);
+
+	mutex_lock(&evtchnl->ring_io_lock);
+	req = be_stream_prepare_req(evtchnl, XENSND_OP_READ);
+	req->op.rw.length = count;
+	req->op.rw.offset = pos;
+	mutex_unlock(&evtchnl->ring_io_lock);
+
+	ret = be_stream_do_io(evtchnl);
+
+	if (ret == 0)
+		ret = be_stream_wait_io(evtchnl);
+
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	return ret;
+}
+
+int xen_snd_front_stream_trigger(struct xen_snd_front_evtchnl *evtchnl,
+				 int type)
+{
+	struct xensnd_req *req;
+	int ret;
+
+	mutex_lock(&evtchnl->u.req.req_io_lock);
+
+	mutex_lock(&evtchnl->ring_io_lock);
+	req = be_stream_prepare_req(evtchnl, XENSND_OP_TRIGGER);
+	req->op.trigger.type = type;
+	mutex_unlock(&evtchnl->ring_io_lock);
+
+	ret = be_stream_do_io(evtchnl);
+
+	if (ret == 0)
+		ret = be_stream_wait_io(evtchnl);
+
+	mutex_unlock(&evtchnl->u.req.req_io_lock);
+	return ret;
+}
+
+static void xen_snd_drv_fini(struct xen_snd_front_info *front_info)
+{
+	xen_snd_front_alsa_fini(front_info);
+	xen_snd_front_evtchnl_free_all(front_info);
+}
+
+static int sndback_initwait(struct xen_snd_front_info *front_info)
+{
+	int num_streams;
+	int ret;
+
+	ret = xen_snd_front_cfg_card(front_info, &num_streams);
+	if (ret < 0)
+		return ret;
+
+	/* create event channels for all streams and publish */
+	ret = xen_snd_front_evtchnl_create_all(front_info, num_streams);
+	if (ret < 0)
+		return ret;
+
+	return xen_snd_front_evtchnl_publish_all(front_info);
+}
+
+static int sndback_connect(struct xen_snd_front_info *front_info)
+{
+	return xen_snd_front_alsa_init(front_info);
+}
+
+static void sndback_disconnect(struct xen_snd_front_info *front_info)
+{
+	xen_snd_drv_fini(front_info);
+	xenbus_switch_state(front_info->xb_dev, XenbusStateInitialising);
+}
+
+static void sndback_changed(struct xenbus_device *xb_dev,
+			    enum xenbus_state backend_state)
+{
+	struct xen_snd_front_info *front_info = dev_get_drvdata(&xb_dev->dev);
+	int ret;
+
+	dev_dbg(&xb_dev->dev, "Backend state is %s, front is %s\n",
+		xenbus_strstate(backend_state),
+		xenbus_strstate(xb_dev->state));
+
+	switch (backend_state) {
+	case XenbusStateReconfiguring:
+		/* fall through */
+	case XenbusStateReconfigured:
+		/* fall through */
+	case XenbusStateInitialised:
+		/* fall through */
+		break;
+
+	case XenbusStateInitialising:
+		/* Recovering after backend unexpected closure. */
+		sndback_disconnect(front_info);
+		break;
+
+	case XenbusStateInitWait:
+		/* Recovering after backend unexpected closure. */
+		sndback_disconnect(front_info);
+
+		ret = sndback_initwait(front_info);
+		if (ret < 0)
+			xenbus_dev_fatal(xb_dev, ret, "initializing frontend");
+		else
+			xenbus_switch_state(xb_dev, XenbusStateInitialised);
+		break;
+
+	case XenbusStateConnected:
+		if (xb_dev->state != XenbusStateInitialised)
+			break;
+
+		ret = sndback_connect(front_info);
+		if (ret < 0)
+			xenbus_dev_fatal(xb_dev, ret, "initializing frontend");
+		else
+			xenbus_switch_state(xb_dev, XenbusStateConnected);
+		break;
+
+	case XenbusStateClosing:
+		/*
+		 * In this state backend starts freeing resources,
+		 * so let it go into closed state first, so we can also
+		 * remove ours.
+		 */
+		break;
+
+	case XenbusStateUnknown:
+		/* fall through */
+	case XenbusStateClosed:
+		if (xb_dev->state == XenbusStateClosed)
+			break;
+
+		sndback_disconnect(front_info);
+		break;
+	}
+}
+
+static int xen_drv_probe(struct xenbus_device *xb_dev,
+			 const struct xenbus_device_id *id)
+{
+	struct xen_snd_front_info *front_info;
+
+	front_info = devm_kzalloc(&xb_dev->dev,
+				  sizeof(*front_info), GFP_KERNEL);
+	if (!front_info)
+		return -ENOMEM;
+
+	front_info->xb_dev = xb_dev;
+	dev_set_drvdata(&xb_dev->dev, front_info);
+
+	return xenbus_switch_state(xb_dev, XenbusStateInitialising);
+}
+
+static int xen_drv_remove(struct xenbus_device *dev)
+{
+	struct xen_snd_front_info *front_info = dev_get_drvdata(&dev->dev);
+	int to = 100;
+
+	xenbus_switch_state(dev, XenbusStateClosing);
+
+	/*
+	 * On driver removal it is disconnected from XenBus,
+	 * so no backend state change events come via .otherend_changed
+	 * callback. This prevents us from exiting gracefully, e.g.
+	 * signaling the backend to free event channels, waiting for its
+	 * state to change to XenbusStateClosed and cleaning at our end.
+	 * Normally when front driver removed backend will finally go into
+	 * XenbusStateInitWait state.
+	 *
+	 * Workaround: read backend's state manually and wait with time-out.
+	 */
+	while ((xenbus_read_unsigned(front_info->xb_dev->otherend, "state",
+				     XenbusStateUnknown) != XenbusStateInitWait) &&
+	       --to)
+		msleep(10);
+
+	if (!to) {
+		unsigned int state;
+
+		state = xenbus_read_unsigned(front_info->xb_dev->otherend,
+					     "state", XenbusStateUnknown);
+		pr_err("Backend state is %s while removing driver\n",
+		       xenbus_strstate(state));
+	}
+
+	xen_snd_drv_fini(front_info);
+	xenbus_frontend_closed(dev);
+	return 0;
+}
+
+static const struct xenbus_device_id xen_drv_ids[] = {
+	{ XENSND_DRIVER_NAME },
+	{ "" }
+};
+
+static struct xenbus_driver xen_driver = {
+	.ids = xen_drv_ids,
+	.probe = xen_drv_probe,
+	.remove = xen_drv_remove,
+	.otherend_changed = sndback_changed,
+};
+
+static int __init xen_drv_init(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	if (!xen_has_pv_devices())
+		return -ENODEV;
+
+	/* At the moment we only support case with XEN_PAGE_SIZE == PAGE_SIZE */
+	if (XEN_PAGE_SIZE != PAGE_SIZE) {
+		pr_err(XENSND_DRIVER_NAME ": different kernel and Xen page sizes are not supported: XEN_PAGE_SIZE (%lu) != PAGE_SIZE (%lu)\n",
+		       XEN_PAGE_SIZE, PAGE_SIZE);
+		return -ENODEV;
+	}
+
+	pr_info("Initialising Xen " XENSND_DRIVER_NAME " frontend driver\n");
+	return xenbus_register_frontend(&xen_driver);
+}
+
+static void __exit xen_drv_fini(void)
+{
+	pr_info("Unregistering Xen " XENSND_DRIVER_NAME " frontend driver\n");
+	xenbus_unregister_driver(&xen_driver);
+}
+
+module_init(xen_drv_init);
+module_exit(xen_drv_fini);
+
+MODULE_DESCRIPTION("Xen virtual sound device frontend");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("xen:" XENSND_DRIVER_NAME);
+MODULE_SUPPORTED_DEVICE("{{ALSA,Virtual soundcard}}");
diff --git a/sound/xen/xen_snd_front.h b/sound/xen/xen_snd_front.h
new file mode 100644
index 0000000..a2ea246
--- /dev/null
+++ b/sound/xen/xen_snd_front.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ * Xen para-virtual sound device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#ifndef __XEN_SND_FRONT_H
+#define __XEN_SND_FRONT_H
+
+#include "xen_snd_front_cfg.h"
+
+struct xen_snd_front_card_info;
+struct xen_snd_front_evtchnl;
+struct xen_snd_front_evtchnl_pair;
+struct xen_snd_front_shbuf;
+struct xensnd_query_hw_param;
+
+struct xen_snd_front_info {
+	struct xenbus_device *xb_dev;
+
+	struct xen_snd_front_card_info *card_info;
+
+	int num_evt_pairs;
+	struct xen_snd_front_evtchnl_pair *evt_pairs;
+
+	struct xen_front_cfg_card cfg;
+};
+
+int xen_snd_front_stream_query_hw_param(struct xen_snd_front_evtchnl *evtchnl,
+					struct xensnd_query_hw_param *hw_param_req,
+					struct xensnd_query_hw_param *hw_param_resp);
+
+int xen_snd_front_stream_prepare(struct xen_snd_front_evtchnl *evtchnl,
+				 struct xen_snd_front_shbuf *sh_buf,
+				 u8 format, unsigned int channels,
+				 unsigned int rate, u32 buffer_sz,
+				 u32 period_sz);
+
+int xen_snd_front_stream_close(struct xen_snd_front_evtchnl *evtchnl);
+
+int xen_snd_front_stream_write(struct xen_snd_front_evtchnl *evtchnl,
+			       unsigned long pos, unsigned long count);
+
+int xen_snd_front_stream_read(struct xen_snd_front_evtchnl *evtchnl,
+			      unsigned long pos, unsigned long count);
+
+int xen_snd_front_stream_trigger(struct xen_snd_front_evtchnl *evtchnl,
+				 int type);
+
+#endif /* __XEN_SND_FRONT_H */
diff --git a/sound/xen/xen_snd_front_alsa.c b/sound/xen/xen_snd_front_alsa.c
new file mode 100644
index 0000000..5a2bd70
--- /dev/null
+++ b/sound/xen/xen_snd_front_alsa.c
@@ -0,0 +1,822 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/*
+ * Xen para-virtual sound device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#include <linux/platform_device.h>
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+
+#include <xen/xenbus.h>
+
+#include "xen_snd_front.h"
+#include "xen_snd_front_alsa.h"
+#include "xen_snd_front_cfg.h"
+#include "xen_snd_front_evtchnl.h"
+#include "xen_snd_front_shbuf.h"
+
+struct xen_snd_front_pcm_stream_info {
+	struct xen_snd_front_info *front_info;
+	struct xen_snd_front_evtchnl_pair *evt_pair;
+	struct xen_snd_front_shbuf sh_buf;
+	int index;
+
+	bool is_open;
+	struct snd_pcm_hardware pcm_hw;
+
+	/* Number of processed frames as reported by the backend. */
+	snd_pcm_uframes_t be_cur_frame;
+	/* Current HW pointer to be reported via .period callback. */
+	atomic_t hw_ptr;
+	/* Modulo of the number of processed frames - for period detection. */
+	u32 out_frames;
+};
+
+struct xen_snd_front_pcm_instance_info {
+	struct xen_snd_front_card_info *card_info;
+	struct snd_pcm *pcm;
+	struct snd_pcm_hardware pcm_hw;
+	int num_pcm_streams_pb;
+	struct xen_snd_front_pcm_stream_info *streams_pb;
+	int num_pcm_streams_cap;
+	struct xen_snd_front_pcm_stream_info *streams_cap;
+};
+
+struct xen_snd_front_card_info {
+	struct xen_snd_front_info *front_info;
+	struct snd_card *card;
+	struct snd_pcm_hardware pcm_hw;
+	int num_pcm_instances;
+	struct xen_snd_front_pcm_instance_info *pcm_instances;
+};
+
+struct alsa_sndif_sample_format {
+	u8 sndif;
+	snd_pcm_format_t alsa;
+};
+
+struct alsa_sndif_hw_param {
+	u8 sndif;
+	snd_pcm_hw_param_t alsa;
+};
+
+static const struct alsa_sndif_sample_format ALSA_SNDIF_FORMATS[] = {
+	{
+		.sndif = XENSND_PCM_FORMAT_U8,
+		.alsa = SNDRV_PCM_FORMAT_U8
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_S8,
+		.alsa = SNDRV_PCM_FORMAT_S8
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_U16_LE,
+		.alsa = SNDRV_PCM_FORMAT_U16_LE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_U16_BE,
+		.alsa = SNDRV_PCM_FORMAT_U16_BE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_S16_LE,
+		.alsa = SNDRV_PCM_FORMAT_S16_LE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_S16_BE,
+		.alsa = SNDRV_PCM_FORMAT_S16_BE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_U24_LE,
+		.alsa = SNDRV_PCM_FORMAT_U24_LE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_U24_BE,
+		.alsa = SNDRV_PCM_FORMAT_U24_BE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_S24_LE,
+		.alsa = SNDRV_PCM_FORMAT_S24_LE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_S24_BE,
+		.alsa = SNDRV_PCM_FORMAT_S24_BE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_U32_LE,
+		.alsa = SNDRV_PCM_FORMAT_U32_LE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_U32_BE,
+		.alsa = SNDRV_PCM_FORMAT_U32_BE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_S32_LE,
+		.alsa = SNDRV_PCM_FORMAT_S32_LE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_S32_BE,
+		.alsa = SNDRV_PCM_FORMAT_S32_BE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_A_LAW,
+		.alsa = SNDRV_PCM_FORMAT_A_LAW
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_MU_LAW,
+		.alsa = SNDRV_PCM_FORMAT_MU_LAW
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_F32_LE,
+		.alsa = SNDRV_PCM_FORMAT_FLOAT_LE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_F32_BE,
+		.alsa = SNDRV_PCM_FORMAT_FLOAT_BE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_F64_LE,
+		.alsa = SNDRV_PCM_FORMAT_FLOAT64_LE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_F64_BE,
+		.alsa = SNDRV_PCM_FORMAT_FLOAT64_BE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_IEC958_SUBFRAME_LE,
+		.alsa = SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_IEC958_SUBFRAME_BE,
+		.alsa = SNDRV_PCM_FORMAT_IEC958_SUBFRAME_BE
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_IMA_ADPCM,
+		.alsa = SNDRV_PCM_FORMAT_IMA_ADPCM
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_MPEG,
+		.alsa = SNDRV_PCM_FORMAT_MPEG
+	},
+	{
+		.sndif = XENSND_PCM_FORMAT_GSM,
+		.alsa = SNDRV_PCM_FORMAT_GSM
+	},
+};
+
+static int to_sndif_format(snd_pcm_format_t format)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ALSA_SNDIF_FORMATS); i++)
+		if (ALSA_SNDIF_FORMATS[i].alsa == format)
+			return ALSA_SNDIF_FORMATS[i].sndif;
+
+	return -EINVAL;
+}
+
+static u64 to_sndif_formats_mask(u64 alsa_formats)
+{
+	u64 mask;
+	int i;
+
+	mask = 0;
+	for (i = 0; i < ARRAY_SIZE(ALSA_SNDIF_FORMATS); i++)
+		if (1 << ALSA_SNDIF_FORMATS[i].alsa & alsa_formats)
+			mask |= 1 << ALSA_SNDIF_FORMATS[i].sndif;
+
+	return mask;
+}
+
+static u64 to_alsa_formats_mask(u64 sndif_formats)
+{
+	u64 mask;
+	int i;
+
+	mask = 0;
+	for (i = 0; i < ARRAY_SIZE(ALSA_SNDIF_FORMATS); i++)
+		if (1 << ALSA_SNDIF_FORMATS[i].sndif & sndif_formats)
+			mask |= 1 << ALSA_SNDIF_FORMATS[i].alsa;
+
+	return mask;
+}
+
+static void stream_clear(struct xen_snd_front_pcm_stream_info *stream)
+{
+	stream->is_open = false;
+	stream->be_cur_frame = 0;
+	stream->out_frames = 0;
+	atomic_set(&stream->hw_ptr, 0);
+	xen_snd_front_evtchnl_pair_clear(stream->evt_pair);
+	xen_snd_front_shbuf_clear(&stream->sh_buf);
+}
+
+static void stream_free(struct xen_snd_front_pcm_stream_info *stream)
+{
+	xen_snd_front_shbuf_free(&stream->sh_buf);
+	stream_clear(stream);
+}
+
+static struct xen_snd_front_pcm_stream_info *
+stream_get(struct snd_pcm_substream *substream)
+{
+	struct xen_snd_front_pcm_instance_info *pcm_instance =
+			snd_pcm_substream_chip(substream);
+	struct xen_snd_front_pcm_stream_info *stream;
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		stream = &pcm_instance->streams_pb[substream->number];
+	else
+		stream = &pcm_instance->streams_cap[substream->number];
+
+	return stream;
+}
+
+static int alsa_hw_rule(struct snd_pcm_hw_params *params,
+			struct snd_pcm_hw_rule *rule)
+{
+	struct xen_snd_front_pcm_stream_info *stream = rule->private;
+	struct device *dev = &stream->front_info->xb_dev->dev;
+	struct snd_mask *formats =
+			hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
+	struct snd_interval *rates =
+			hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
+	struct snd_interval *channels =
+			hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS);
+	struct snd_interval *period =
+			hw_param_interval(params,
+					  SNDRV_PCM_HW_PARAM_PERIOD_SIZE);
+	struct snd_interval *buffer =
+			hw_param_interval(params,
+					  SNDRV_PCM_HW_PARAM_BUFFER_SIZE);
+	struct xensnd_query_hw_param req;
+	struct xensnd_query_hw_param resp;
+	struct snd_interval interval;
+	struct snd_mask mask;
+	u64 sndif_formats;
+	int changed, ret;
+
+	/* Collect all the values we need for the query. */
+
+	req.formats = to_sndif_formats_mask((u64)formats->bits[0] |
+					    (u64)(formats->bits[1]) << 32);
+
+	req.rates.min = rates->min;
+	req.rates.max = rates->max;
+
+	req.channels.min = channels->min;
+	req.channels.max = channels->max;
+
+	req.buffer.min = buffer->min;
+	req.buffer.max = buffer->max;
+
+	req.period.min = period->min;
+	req.period.max = period->max;
+
+	ret = xen_snd_front_stream_query_hw_param(&stream->evt_pair->req,
+						  &req, &resp);
+	if (ret < 0) {
+		/* Check if this is due to backend communication error. */
+		if (ret == -EIO || ret == -ETIMEDOUT)
+			dev_err(dev, "Failed to query ALSA HW parameters\n");
+		return ret;
+	}
+
+	/* Refine HW parameters after the query. */
+	changed  = 0;
+
+	sndif_formats = to_alsa_formats_mask(resp.formats);
+	snd_mask_none(&mask);
+	mask.bits[0] = (u32)sndif_formats;
+	mask.bits[1] = (u32)(sndif_formats >> 32);
+	ret = snd_mask_refine(formats, &mask);
+	if (ret < 0)
+		return ret;
+	changed |= ret;
+
+	interval.openmin = 0;
+	interval.openmax = 0;
+	interval.integer = 1;
+
+	interval.min = resp.rates.min;
+	interval.max = resp.rates.max;
+	ret = snd_interval_refine(rates, &interval);
+	if (ret < 0)
+		return ret;
+	changed |= ret;
+
+	interval.min = resp.channels.min;
+	interval.max = resp.channels.max;
+	ret = snd_interval_refine(channels, &interval);
+	if (ret < 0)
+		return ret;
+	changed |= ret;
+
+	interval.min = resp.buffer.min;
+	interval.max = resp.buffer.max;
+	ret = snd_interval_refine(buffer, &interval);
+	if (ret < 0)
+		return ret;
+	changed |= ret;
+
+	interval.min = resp.period.min;
+	interval.max = resp.period.max;
+	ret = snd_interval_refine(period, &interval);
+	if (ret < 0)
+		return ret;
+	changed |= ret;
+
+	return changed;
+}
+
+static int alsa_open(struct snd_pcm_substream *substream)
+{
+	struct xen_snd_front_pcm_instance_info *pcm_instance =
+			snd_pcm_substream_chip(substream);
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	struct xen_snd_front_info *front_info =
+			pcm_instance->card_info->front_info;
+	struct device *dev = &front_info->xb_dev->dev;
+	int ret;
+
+	/*
+	 * Return our HW properties: override defaults with those configured
+	 * via XenStore.
+	 */
+	runtime->hw = stream->pcm_hw;
+	runtime->hw.info &= ~(SNDRV_PCM_INFO_MMAP |
+			      SNDRV_PCM_INFO_MMAP_VALID |
+			      SNDRV_PCM_INFO_DOUBLE |
+			      SNDRV_PCM_INFO_BATCH |
+			      SNDRV_PCM_INFO_NONINTERLEAVED |
+			      SNDRV_PCM_INFO_RESUME |
+			      SNDRV_PCM_INFO_PAUSE);
+	runtime->hw.info |= SNDRV_PCM_INFO_INTERLEAVED;
+
+	stream->evt_pair = &front_info->evt_pairs[stream->index];
+
+	stream->front_info = front_info;
+
+	stream->evt_pair->evt.u.evt.substream = substream;
+
+	stream_clear(stream);
+
+	xen_snd_front_evtchnl_pair_set_connected(stream->evt_pair, true);
+
+	ret = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT,
+				  alsa_hw_rule, stream,
+				  SNDRV_PCM_HW_PARAM_FORMAT, -1);
+	if (ret) {
+		dev_err(dev, "Failed to add HW rule for SNDRV_PCM_HW_PARAM_FORMAT\n");
+		return ret;
+	}
+
+	ret = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE,
+				  alsa_hw_rule, stream,
+				  SNDRV_PCM_HW_PARAM_RATE, -1);
+	if (ret) {
+		dev_err(dev, "Failed to add HW rule for SNDRV_PCM_HW_PARAM_RATE\n");
+		return ret;
+	}
+
+	ret = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS,
+				  alsa_hw_rule, stream,
+				  SNDRV_PCM_HW_PARAM_CHANNELS, -1);
+	if (ret) {
+		dev_err(dev, "Failed to add HW rule for SNDRV_PCM_HW_PARAM_CHANNELS\n");
+		return ret;
+	}
+
+	ret = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
+				  alsa_hw_rule, stream,
+				  SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1);
+	if (ret) {
+		dev_err(dev, "Failed to add HW rule for SNDRV_PCM_HW_PARAM_PERIOD_SIZE\n");
+		return ret;
+	}
+
+	ret = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_SIZE,
+				  alsa_hw_rule, stream,
+				  SNDRV_PCM_HW_PARAM_BUFFER_SIZE, -1);
+	if (ret) {
+		dev_err(dev, "Failed to add HW rule for SNDRV_PCM_HW_PARAM_BUFFER_SIZE\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int alsa_close(struct snd_pcm_substream *substream)
+{
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+
+	xen_snd_front_evtchnl_pair_set_connected(stream->evt_pair, false);
+	return 0;
+}
+
+static int alsa_hw_params(struct snd_pcm_substream *substream,
+			  struct snd_pcm_hw_params *params)
+{
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+	int ret;
+
+	/*
+	 * This callback may be called multiple times,
+	 * so free the previously allocated shared buffer if any.
+	 */
+	stream_free(stream);
+
+	ret = xen_snd_front_shbuf_alloc(stream->front_info->xb_dev,
+					&stream->sh_buf,
+					params_buffer_bytes(params));
+	if (ret < 0) {
+		stream_free(stream);
+		dev_err(&stream->front_info->xb_dev->dev,
+			"Failed to allocate buffers for stream with index %d\n",
+			stream->index);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int alsa_hw_free(struct snd_pcm_substream *substream)
+{
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+	int ret;
+
+	ret = xen_snd_front_stream_close(&stream->evt_pair->req);
+	stream_free(stream);
+	return ret;
+}
+
+static int alsa_prepare(struct snd_pcm_substream *substream)
+{
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+
+	if (!stream->is_open) {
+		struct snd_pcm_runtime *runtime = substream->runtime;
+		u8 sndif_format;
+		int ret;
+
+		ret = to_sndif_format(runtime->format);
+		if (ret < 0) {
+			dev_err(&stream->front_info->xb_dev->dev,
+				"Unsupported sample format: %d\n",
+				runtime->format);
+			return ret;
+		}
+		sndif_format = ret;
+
+		ret = xen_snd_front_stream_prepare(&stream->evt_pair->req,
+						   &stream->sh_buf,
+						   sndif_format,
+						   runtime->channels,
+						   runtime->rate,
+						   snd_pcm_lib_buffer_bytes(substream),
+						   snd_pcm_lib_period_bytes(substream));
+		if (ret < 0)
+			return ret;
+
+		stream->is_open = true;
+	}
+
+	return 0;
+}
+
+static int alsa_trigger(struct snd_pcm_substream *substream, int cmd)
+{
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+	int type;
+
+	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		type = XENSND_OP_TRIGGER_START;
+		break;
+
+	case SNDRV_PCM_TRIGGER_RESUME:
+		type = XENSND_OP_TRIGGER_RESUME;
+		break;
+
+	case SNDRV_PCM_TRIGGER_STOP:
+		type = XENSND_OP_TRIGGER_STOP;
+		break;
+
+	case SNDRV_PCM_TRIGGER_SUSPEND:
+		type = XENSND_OP_TRIGGER_PAUSE;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return xen_snd_front_stream_trigger(&stream->evt_pair->req, type);
+}
+
+void xen_snd_front_alsa_handle_cur_pos(struct xen_snd_front_evtchnl *evtchnl,
+				       u64 pos_bytes)
+{
+	struct snd_pcm_substream *substream = evtchnl->u.evt.substream;
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+	snd_pcm_uframes_t delta, new_hw_ptr, cur_frame;
+
+	cur_frame = bytes_to_frames(substream->runtime, pos_bytes);
+
+	delta = cur_frame - stream->be_cur_frame;
+	stream->be_cur_frame = cur_frame;
+
+	new_hw_ptr = (snd_pcm_uframes_t)atomic_read(&stream->hw_ptr);
+	new_hw_ptr = (new_hw_ptr + delta) % substream->runtime->buffer_size;
+	atomic_set(&stream->hw_ptr, (int)new_hw_ptr);
+
+	stream->out_frames += delta;
+	if (stream->out_frames > substream->runtime->period_size) {
+		stream->out_frames %= substream->runtime->period_size;
+		snd_pcm_period_elapsed(substream);
+	}
+}
+
+static snd_pcm_uframes_t alsa_pointer(struct snd_pcm_substream *substream)
+{
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+
+	return (snd_pcm_uframes_t)atomic_read(&stream->hw_ptr);
+}
+
+static int alsa_pb_copy_user(struct snd_pcm_substream *substream,
+			     int channel, unsigned long pos, void __user *src,
+			     unsigned long count)
+{
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+
+	if (unlikely(pos + count > stream->sh_buf.buffer_sz))
+		return -EINVAL;
+
+	if (copy_from_user(stream->sh_buf.buffer + pos, src, count))
+		return -EFAULT;
+
+	return xen_snd_front_stream_write(&stream->evt_pair->req, pos, count);
+}
+
+static int alsa_pb_copy_kernel(struct snd_pcm_substream *substream,
+			       int channel, unsigned long pos, void *src,
+			       unsigned long count)
+{
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+
+	if (unlikely(pos + count > stream->sh_buf.buffer_sz))
+		return -EINVAL;
+
+	memcpy(stream->sh_buf.buffer + pos, src, count);
+
+	return xen_snd_front_stream_write(&stream->evt_pair->req, pos, count);
+}
+
+static int alsa_cap_copy_user(struct snd_pcm_substream *substream,
+			      int channel, unsigned long pos, void __user *dst,
+			      unsigned long count)
+{
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+	int ret;
+
+	if (unlikely(pos + count > stream->sh_buf.buffer_sz))
+		return -EINVAL;
+
+	ret = xen_snd_front_stream_read(&stream->evt_pair->req, pos, count);
+	if (ret < 0)
+		return ret;
+
+	return copy_to_user(dst, stream->sh_buf.buffer + pos, count) ?
+		-EFAULT : 0;
+}
+
+static int alsa_cap_copy_kernel(struct snd_pcm_substream *substream,
+				int channel, unsigned long pos, void *dst,
+				unsigned long count)
+{
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+	int ret;
+
+	if (unlikely(pos + count > stream->sh_buf.buffer_sz))
+		return -EINVAL;
+
+	ret = xen_snd_front_stream_read(&stream->evt_pair->req, pos, count);
+	if (ret < 0)
+		return ret;
+
+	memcpy(dst, stream->sh_buf.buffer + pos, count);
+
+	return 0;
+}
+
+static int alsa_pb_fill_silence(struct snd_pcm_substream *substream,
+				int channel, unsigned long pos,
+				unsigned long count)
+{
+	struct xen_snd_front_pcm_stream_info *stream = stream_get(substream);
+
+	if (unlikely(pos + count > stream->sh_buf.buffer_sz))
+		return -EINVAL;
+
+	memset(stream->sh_buf.buffer + pos, 0, count);
+
+	return xen_snd_front_stream_write(&stream->evt_pair->req, pos, count);
+}
+
+/*
+ * FIXME: The mmaped data transfer is asynchronous and there is no
+ * ack signal from user-space when it is done. This is the
+ * reason it is not implemented in the PV driver as we do need
+ * to know when the buffer can be transferred to the backend.
+ */
+
+static struct snd_pcm_ops snd_drv_alsa_playback_ops = {
+	.open = alsa_open,
+	.close = alsa_close,
+	.ioctl = snd_pcm_lib_ioctl,
+	.hw_params = alsa_hw_params,
+	.hw_free = alsa_hw_free,
+	.prepare = alsa_prepare,
+	.trigger = alsa_trigger,
+	.pointer = alsa_pointer,
+	.copy_user = alsa_pb_copy_user,
+	.copy_kernel = alsa_pb_copy_kernel,
+	.fill_silence = alsa_pb_fill_silence,
+};
+
+static struct snd_pcm_ops snd_drv_alsa_capture_ops = {
+	.open = alsa_open,
+	.close = alsa_close,
+	.ioctl = snd_pcm_lib_ioctl,
+	.hw_params = alsa_hw_params,
+	.hw_free = alsa_hw_free,
+	.prepare = alsa_prepare,
+	.trigger = alsa_trigger,
+	.pointer = alsa_pointer,
+	.copy_user = alsa_cap_copy_user,
+	.copy_kernel = alsa_cap_copy_kernel,
+};
+
+static int new_pcm_instance(struct xen_snd_front_card_info *card_info,
+			    struct xen_front_cfg_pcm_instance *instance_cfg,
+			    struct xen_snd_front_pcm_instance_info *pcm_instance_info)
+{
+	struct snd_pcm *pcm;
+	int ret, i;
+
+	dev_dbg(&card_info->front_info->xb_dev->dev,
+		"New PCM device \"%s\" with id %d playback %d capture %d",
+		instance_cfg->name,
+		instance_cfg->device_id,
+		instance_cfg->num_streams_pb,
+		instance_cfg->num_streams_cap);
+
+	pcm_instance_info->card_info = card_info;
+
+	pcm_instance_info->pcm_hw = instance_cfg->pcm_hw;
+
+	if (instance_cfg->num_streams_pb) {
+		pcm_instance_info->streams_pb =
+				devm_kcalloc(&card_info->card->card_dev,
+					     instance_cfg->num_streams_pb,
+					     sizeof(struct xen_snd_front_pcm_stream_info),
+					     GFP_KERNEL);
+		if (!pcm_instance_info->streams_pb)
+			return -ENOMEM;
+	}
+
+	if (instance_cfg->num_streams_cap) {
+		pcm_instance_info->streams_cap =
+				devm_kcalloc(&card_info->card->card_dev,
+					     instance_cfg->num_streams_cap,
+					     sizeof(struct xen_snd_front_pcm_stream_info),
+					     GFP_KERNEL);
+		if (!pcm_instance_info->streams_cap)
+			return -ENOMEM;
+	}
+
+	pcm_instance_info->num_pcm_streams_pb =
+			instance_cfg->num_streams_pb;
+	pcm_instance_info->num_pcm_streams_cap =
+			instance_cfg->num_streams_cap;
+
+	for (i = 0; i < pcm_instance_info->num_pcm_streams_pb; i++) {
+		pcm_instance_info->streams_pb[i].pcm_hw =
+			instance_cfg->streams_pb[i].pcm_hw;
+		pcm_instance_info->streams_pb[i].index =
+			instance_cfg->streams_pb[i].index;
+	}
+
+	for (i = 0; i < pcm_instance_info->num_pcm_streams_cap; i++) {
+		pcm_instance_info->streams_cap[i].pcm_hw =
+			instance_cfg->streams_cap[i].pcm_hw;
+		pcm_instance_info->streams_cap[i].index =
+			instance_cfg->streams_cap[i].index;
+	}
+
+	ret = snd_pcm_new(card_info->card, instance_cfg->name,
+			  instance_cfg->device_id,
+			  instance_cfg->num_streams_pb,
+			  instance_cfg->num_streams_cap,
+			  &pcm);
+	if (ret < 0)
+		return ret;
+
+	pcm->private_data = pcm_instance_info;
+	pcm->info_flags = 0;
+	/* we want to handle all PCM operations in non-atomic context */
+	pcm->nonatomic = true;
+	strncpy(pcm->name, "Virtual card PCM", sizeof(pcm->name));
+
+	if (instance_cfg->num_streams_pb)
+		snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK,
+				&snd_drv_alsa_playback_ops);
+
+	if (instance_cfg->num_streams_cap)
+		snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE,
+				&snd_drv_alsa_capture_ops);
+
+	pcm_instance_info->pcm = pcm;
+	return 0;
+}
+
+int xen_snd_front_alsa_init(struct xen_snd_front_info *front_info)
+{
+	struct device *dev = &front_info->xb_dev->dev;
+	struct xen_front_cfg_card *cfg = &front_info->cfg;
+	struct xen_snd_front_card_info *card_info;
+	struct snd_card *card;
+	int ret, i;
+
+	dev_dbg(dev, "Creating virtual sound card\n");
+
+	ret = snd_card_new(dev, 0, XENSND_DRIVER_NAME, THIS_MODULE,
+			   sizeof(struct xen_snd_front_card_info), &card);
+	if (ret < 0)
+		return ret;
+
+	card_info = card->private_data;
+	card_info->front_info = front_info;
+	front_info->card_info = card_info;
+	card_info->card = card;
+	card_info->pcm_instances =
+			devm_kcalloc(dev, cfg->num_pcm_instances,
+				     sizeof(struct xen_snd_front_pcm_instance_info),
+				     GFP_KERNEL);
+	if (!card_info->pcm_instances) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	card_info->num_pcm_instances = cfg->num_pcm_instances;
+	card_info->pcm_hw = cfg->pcm_hw;
+
+	for (i = 0; i < cfg->num_pcm_instances; i++) {
+		ret = new_pcm_instance(card_info, &cfg->pcm_instances[i],
+				       &card_info->pcm_instances[i]);
+		if (ret < 0)
+			goto fail;
+	}
+
+	strncpy(card->driver, XENSND_DRIVER_NAME, sizeof(card->driver));
+	strncpy(card->shortname, cfg->name_short, sizeof(card->shortname));
+	strncpy(card->longname, cfg->name_long, sizeof(card->longname));
+
+	ret = snd_card_register(card);
+	if (ret < 0)
+		goto fail;
+
+	return 0;
+
+fail:
+	snd_card_free(card);
+	return ret;
+}
+
+void xen_snd_front_alsa_fini(struct xen_snd_front_info *front_info)
+{
+	struct xen_snd_front_card_info *card_info;
+	struct snd_card *card;
+
+	card_info = front_info->card_info;
+	if (!card_info)
+		return;
+
+	card = card_info->card;
+	if (!card)
+		return;
+
+	dev_dbg(&front_info->xb_dev->dev, "Removing virtual sound card %d\n",
+		card->number);
+	snd_card_free(card);
+
+	/* Card_info will be freed when destroying front_info->xb_dev->dev. */
+	card_info->card = NULL;
+}
diff --git a/sound/xen/xen_snd_front_alsa.h b/sound/xen/xen_snd_front_alsa.h
new file mode 100644
index 0000000..18abd9e
--- /dev/null
+++ b/sound/xen/xen_snd_front_alsa.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ * Xen para-virtual sound device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#ifndef __XEN_SND_FRONT_ALSA_H
+#define __XEN_SND_FRONT_ALSA_H
+
+struct xen_snd_front_info;
+
+int xen_snd_front_alsa_init(struct xen_snd_front_info *front_info);
+
+void xen_snd_front_alsa_fini(struct xen_snd_front_info *front_info);
+
+void xen_snd_front_alsa_handle_cur_pos(struct xen_snd_front_evtchnl *evtchnl,
+				       u64 pos_bytes);
+
+#endif /* __XEN_SND_FRONT_ALSA_H */
diff --git a/sound/xen/xen_snd_front_cfg.c b/sound/xen/xen_snd_front_cfg.c
new file mode 100644
index 0000000..eda077c
--- /dev/null
+++ b/sound/xen/xen_snd_front_cfg.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/*
+ * Xen para-virtual sound device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#include <xen/xenbus.h>
+
+#include <xen/interface/io/sndif.h>
+
+#include "xen_snd_front.h"
+#include "xen_snd_front_cfg.h"
+
+/* Maximum number of supported streams. */
+#define VSND_MAX_STREAM		8
+
+struct cfg_hw_sample_rate {
+	const char *name;
+	unsigned int mask;
+	unsigned int value;
+};
+
+static const struct cfg_hw_sample_rate CFG_HW_SUPPORTED_RATES[] = {
+	{ .name = "5512",   .mask = SNDRV_PCM_RATE_5512,   .value = 5512 },
+	{ .name = "8000",   .mask = SNDRV_PCM_RATE_8000,   .value = 8000 },
+	{ .name = "11025",  .mask = SNDRV_PCM_RATE_11025,  .value = 11025 },
+	{ .name = "16000",  .mask = SNDRV_PCM_RATE_16000,  .value = 16000 },
+	{ .name = "22050",  .mask = SNDRV_PCM_RATE_22050,  .value = 22050 },
+	{ .name = "32000",  .mask = SNDRV_PCM_RATE_32000,  .value = 32000 },
+	{ .name = "44100",  .mask = SNDRV_PCM_RATE_44100,  .value = 44100 },
+	{ .name = "48000",  .mask = SNDRV_PCM_RATE_48000,  .value = 48000 },
+	{ .name = "64000",  .mask = SNDRV_PCM_RATE_64000,  .value = 64000 },
+	{ .name = "96000",  .mask = SNDRV_PCM_RATE_96000,  .value = 96000 },
+	{ .name = "176400", .mask = SNDRV_PCM_RATE_176400, .value = 176400 },
+	{ .name = "192000", .mask = SNDRV_PCM_RATE_192000, .value = 192000 },
+};
+
+struct cfg_hw_sample_format {
+	const char *name;
+	u64 mask;
+};
+
+static const struct cfg_hw_sample_format CFG_HW_SUPPORTED_FORMATS[] = {
+	{
+		.name = XENSND_PCM_FORMAT_U8_STR,
+		.mask = SNDRV_PCM_FMTBIT_U8
+	},
+	{
+		.name = XENSND_PCM_FORMAT_S8_STR,
+		.mask = SNDRV_PCM_FMTBIT_S8
+	},
+	{
+		.name = XENSND_PCM_FORMAT_U16_LE_STR,
+		.mask = SNDRV_PCM_FMTBIT_U16_LE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_U16_BE_STR,
+		.mask = SNDRV_PCM_FMTBIT_U16_BE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_S16_LE_STR,
+		.mask = SNDRV_PCM_FMTBIT_S16_LE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_S16_BE_STR,
+		.mask = SNDRV_PCM_FMTBIT_S16_BE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_U24_LE_STR,
+		.mask = SNDRV_PCM_FMTBIT_U24_LE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_U24_BE_STR,
+		.mask = SNDRV_PCM_FMTBIT_U24_BE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_S24_LE_STR,
+		.mask = SNDRV_PCM_FMTBIT_S24_LE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_S24_BE_STR,
+		.mask = SNDRV_PCM_FMTBIT_S24_BE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_U32_LE_STR,
+		.mask = SNDRV_PCM_FMTBIT_U32_LE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_U32_BE_STR,
+		.mask = SNDRV_PCM_FMTBIT_U32_BE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_S32_LE_STR,
+		.mask = SNDRV_PCM_FMTBIT_S32_LE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_S32_BE_STR,
+		.mask = SNDRV_PCM_FMTBIT_S32_BE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_A_LAW_STR,
+		.mask = SNDRV_PCM_FMTBIT_A_LAW
+	},
+	{
+		.name = XENSND_PCM_FORMAT_MU_LAW_STR,
+		.mask = SNDRV_PCM_FMTBIT_MU_LAW
+	},
+	{
+		.name = XENSND_PCM_FORMAT_F32_LE_STR,
+		.mask = SNDRV_PCM_FMTBIT_FLOAT_LE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_F32_BE_STR,
+		.mask = SNDRV_PCM_FMTBIT_FLOAT_BE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_F64_LE_STR,
+		.mask = SNDRV_PCM_FMTBIT_FLOAT64_LE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_F64_BE_STR,
+		.mask = SNDRV_PCM_FMTBIT_FLOAT64_BE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_IEC958_SUBFRAME_LE_STR,
+		.mask = SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_IEC958_SUBFRAME_BE_STR,
+		.mask = SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_BE
+	},
+	{
+		.name = XENSND_PCM_FORMAT_IMA_ADPCM_STR,
+		.mask = SNDRV_PCM_FMTBIT_IMA_ADPCM
+	},
+	{
+		.name = XENSND_PCM_FORMAT_MPEG_STR,
+		.mask = SNDRV_PCM_FMTBIT_MPEG
+	},
+	{
+		.name = XENSND_PCM_FORMAT_GSM_STR,
+		.mask = SNDRV_PCM_FMTBIT_GSM
+	},
+};
+
+static void cfg_hw_rates(char *list, unsigned int len,
+			 const char *path, struct snd_pcm_hardware *pcm_hw)
+{
+	char *cur_rate;
+	unsigned int cur_mask;
+	unsigned int cur_value;
+	unsigned int rates;
+	unsigned int rate_min;
+	unsigned int rate_max;
+	int i;
+
+	rates = 0;
+	rate_min = -1;
+	rate_max = 0;
+	while ((cur_rate = strsep(&list, XENSND_LIST_SEPARATOR))) {
+		for (i = 0; i < ARRAY_SIZE(CFG_HW_SUPPORTED_RATES); i++)
+			if (!strncasecmp(cur_rate,
+					 CFG_HW_SUPPORTED_RATES[i].name,
+					 XENSND_SAMPLE_RATE_MAX_LEN)) {
+				cur_mask = CFG_HW_SUPPORTED_RATES[i].mask;
+				cur_value = CFG_HW_SUPPORTED_RATES[i].value;
+				rates |= cur_mask;
+				if (rate_min > cur_value)
+					rate_min = cur_value;
+				if (rate_max < cur_value)
+					rate_max = cur_value;
+			}
+	}
+
+	if (rates) {
+		pcm_hw->rates = rates;
+		pcm_hw->rate_min = rate_min;
+		pcm_hw->rate_max = rate_max;
+	}
+}
+
+static void cfg_formats(char *list, unsigned int len,
+			const char *path, struct snd_pcm_hardware *pcm_hw)
+{
+	u64 formats;
+	char *cur_format;
+	int i;
+
+	formats = 0;
+	while ((cur_format = strsep(&list, XENSND_LIST_SEPARATOR))) {
+		for (i = 0; i < ARRAY_SIZE(CFG_HW_SUPPORTED_FORMATS); i++)
+			if (!strncasecmp(cur_format,
+					 CFG_HW_SUPPORTED_FORMATS[i].name,
+					 XENSND_SAMPLE_FORMAT_MAX_LEN))
+				formats |= CFG_HW_SUPPORTED_FORMATS[i].mask;
+	}
+
+	if (formats)
+		pcm_hw->formats = formats;
+}
+
+#define MAX_BUFFER_SIZE		(64 * 1024)
+#define MIN_PERIOD_SIZE		64
+#define MAX_PERIOD_SIZE		MAX_BUFFER_SIZE
+#define USE_FORMATS		(SNDRV_PCM_FMTBIT_U8 | \
+				 SNDRV_PCM_FMTBIT_S16_LE)
+#define USE_RATE		(SNDRV_PCM_RATE_CONTINUOUS | \
+				 SNDRV_PCM_RATE_8000_48000)
+#define USE_RATE_MIN		5512
+#define USE_RATE_MAX		48000
+#define USE_CHANNELS_MIN	1
+#define USE_CHANNELS_MAX	2
+#define USE_PERIODS_MIN		2
+#define USE_PERIODS_MAX		(MAX_BUFFER_SIZE / MIN_PERIOD_SIZE)
+
+static const struct snd_pcm_hardware SND_DRV_PCM_HW_DEFAULT = {
+	.info = (SNDRV_PCM_INFO_MMAP |
+		 SNDRV_PCM_INFO_INTERLEAVED |
+		 SNDRV_PCM_INFO_RESUME |
+		 SNDRV_PCM_INFO_MMAP_VALID),
+	.formats = USE_FORMATS,
+	.rates = USE_RATE,
+	.rate_min = USE_RATE_MIN,
+	.rate_max = USE_RATE_MAX,
+	.channels_min = USE_CHANNELS_MIN,
+	.channels_max = USE_CHANNELS_MAX,
+	.buffer_bytes_max = MAX_BUFFER_SIZE,
+	.period_bytes_min = MIN_PERIOD_SIZE,
+	.period_bytes_max = MAX_PERIOD_SIZE,
+	.periods_min = USE_PERIODS_MIN,
+	.periods_max = USE_PERIODS_MAX,
+	.fifo_size = 0,
+};
+
+static void cfg_read_pcm_hw(const char *path,
+			    struct snd_pcm_hardware *parent_pcm_hw,
+			    struct snd_pcm_hardware *pcm_hw)
+{
+	char *list;
+	int val;
+	size_t buf_sz;
+	unsigned int len;
+
+	/* Inherit parent's PCM HW and read overrides from XenStore. */
+	if (parent_pcm_hw)
+		*pcm_hw = *parent_pcm_hw;
+	else
+		*pcm_hw = SND_DRV_PCM_HW_DEFAULT;
+
+	val = xenbus_read_unsigned(path, XENSND_FIELD_CHANNELS_MIN, 0);
+	if (val)
+		pcm_hw->channels_min = val;
+
+	val = xenbus_read_unsigned(path, XENSND_FIELD_CHANNELS_MAX, 0);
+	if (val)
+		pcm_hw->channels_max = val;
+
+	list = xenbus_read(XBT_NIL, path, XENSND_FIELD_SAMPLE_RATES, &len);
+	if (!IS_ERR(list)) {
+		cfg_hw_rates(list, len, path, pcm_hw);
+		kfree(list);
+	}
+
+	list = xenbus_read(XBT_NIL, path, XENSND_FIELD_SAMPLE_FORMATS, &len);
+	if (!IS_ERR(list)) {
+		cfg_formats(list, len, path, pcm_hw);
+		kfree(list);
+	}
+
+	buf_sz = xenbus_read_unsigned(path, XENSND_FIELD_BUFFER_SIZE, 0);
+	if (buf_sz)
+		pcm_hw->buffer_bytes_max = buf_sz;
+
+	/* Update configuration to match new values. */
+	if (pcm_hw->channels_min > pcm_hw->channels_max)
+		pcm_hw->channels_min = pcm_hw->channels_max;
+
+	if (pcm_hw->rate_min > pcm_hw->rate_max)
+		pcm_hw->rate_min = pcm_hw->rate_max;
+
+	pcm_hw->period_bytes_max = pcm_hw->buffer_bytes_max;
+
+	pcm_hw->periods_max = pcm_hw->period_bytes_max /
+		pcm_hw->period_bytes_min;
+}
+
+static int cfg_get_stream_type(const char *path, int index,
+			       int *num_pb, int *num_cap)
+{
+	char *str = NULL;
+	char *stream_path;
+	int ret;
+
+	*num_pb = 0;
+	*num_cap = 0;
+	stream_path = kasprintf(GFP_KERNEL, "%s/%d", path, index);
+	if (!stream_path) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	str = xenbus_read(XBT_NIL, stream_path, XENSND_FIELD_TYPE, NULL);
+	if (IS_ERR(str)) {
+		ret = PTR_ERR(str);
+		str = NULL;
+		goto fail;
+	}
+
+	if (!strncasecmp(str, XENSND_STREAM_TYPE_PLAYBACK,
+			 sizeof(XENSND_STREAM_TYPE_PLAYBACK))) {
+		(*num_pb)++;
+	} else if (!strncasecmp(str, XENSND_STREAM_TYPE_CAPTURE,
+			      sizeof(XENSND_STREAM_TYPE_CAPTURE))) {
+		(*num_cap)++;
+	} else {
+		ret = -EINVAL;
+		goto fail;
+	}
+	ret = 0;
+
+fail:
+	kfree(stream_path);
+	kfree(str);
+	return ret;
+}
+
+static int cfg_stream(struct xen_snd_front_info *front_info,
+		      struct xen_front_cfg_pcm_instance *pcm_instance,
+		      const char *path, int index, int *cur_pb, int *cur_cap,
+		      int *stream_cnt)
+{
+	char *str = NULL;
+	char *stream_path;
+	struct xen_front_cfg_stream *stream;
+	int ret;
+
+	stream_path = devm_kasprintf(&front_info->xb_dev->dev,
+				     GFP_KERNEL, "%s/%d", path, index);
+	if (!stream_path) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	str = xenbus_read(XBT_NIL, stream_path, XENSND_FIELD_TYPE, NULL);
+	if (IS_ERR(str)) {
+		ret = PTR_ERR(str);
+		str = NULL;
+		goto fail;
+	}
+
+	if (!strncasecmp(str, XENSND_STREAM_TYPE_PLAYBACK,
+			 sizeof(XENSND_STREAM_TYPE_PLAYBACK))) {
+		stream = &pcm_instance->streams_pb[(*cur_pb)++];
+	} else if (!strncasecmp(str, XENSND_STREAM_TYPE_CAPTURE,
+			      sizeof(XENSND_STREAM_TYPE_CAPTURE))) {
+		stream = &pcm_instance->streams_cap[(*cur_cap)++];
+	} else {
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	/* Get next stream index. */
+	stream->index = (*stream_cnt)++;
+	stream->xenstore_path = stream_path;
+	/*
+	 * Check XenStore if PCM HW configuration exists for this stream
+	 * and update if so, e.g. we inherit all values from device's PCM HW,
+	 * but can still override some of the values for the stream.
+	 */
+	cfg_read_pcm_hw(stream->xenstore_path,
+			&pcm_instance->pcm_hw, &stream->pcm_hw);
+	ret = 0;
+
+fail:
+	kfree(str);
+	return ret;
+}
+
+static int cfg_device(struct xen_snd_front_info *front_info,
+		      struct xen_front_cfg_pcm_instance *pcm_instance,
+		      struct snd_pcm_hardware *parent_pcm_hw,
+		      const char *path, int node_index, int *stream_cnt)
+{
+	char *str;
+	char *device_path;
+	int ret, i, num_streams;
+	int num_pb, num_cap;
+	int cur_pb, cur_cap;
+	char node[3];
+
+	device_path = kasprintf(GFP_KERNEL, "%s/%d", path, node_index);
+	if (!device_path)
+		return -ENOMEM;
+
+	str = xenbus_read(XBT_NIL, device_path, XENSND_FIELD_DEVICE_NAME, NULL);
+	if (!IS_ERR(str)) {
+		strlcpy(pcm_instance->name, str, sizeof(pcm_instance->name));
+		kfree(str);
+	}
+
+	pcm_instance->device_id = node_index;
+
+	/*
+	 * Check XenStore if PCM HW configuration exists for this device
+	 * and update if so, e.g. we inherit all values from card's PCM HW,
+	 * but can still override some of the values for the device.
+	 */
+	cfg_read_pcm_hw(device_path, parent_pcm_hw, &pcm_instance->pcm_hw);
+
+	/* Find out how many streams were configured in Xen store. */
+	num_streams = 0;
+	do {
+		snprintf(node, sizeof(node), "%d", num_streams);
+		if (!xenbus_exists(XBT_NIL, device_path, node))
+			break;
+
+		num_streams++;
+	} while (num_streams < VSND_MAX_STREAM);
+
+	pcm_instance->num_streams_pb = 0;
+	pcm_instance->num_streams_cap = 0;
+	/* Get number of playback and capture streams. */
+	for (i = 0; i < num_streams; i++) {
+		ret = cfg_get_stream_type(device_path, i, &num_pb, &num_cap);
+		if (ret < 0)
+			goto fail;
+
+		pcm_instance->num_streams_pb += num_pb;
+		pcm_instance->num_streams_cap += num_cap;
+	}
+
+	if (pcm_instance->num_streams_pb) {
+		pcm_instance->streams_pb =
+				devm_kcalloc(&front_info->xb_dev->dev,
+					     pcm_instance->num_streams_pb,
+					     sizeof(struct xen_front_cfg_stream),
+					     GFP_KERNEL);
+		if (!pcm_instance->streams_pb) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+	}
+
+	if (pcm_instance->num_streams_cap) {
+		pcm_instance->streams_cap =
+				devm_kcalloc(&front_info->xb_dev->dev,
+					     pcm_instance->num_streams_cap,
+					     sizeof(struct xen_front_cfg_stream),
+					     GFP_KERNEL);
+		if (!pcm_instance->streams_cap) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+	}
+
+	cur_pb = 0;
+	cur_cap = 0;
+	for (i = 0; i < num_streams; i++) {
+		ret = cfg_stream(front_info, pcm_instance, device_path, i,
+				 &cur_pb, &cur_cap, stream_cnt);
+		if (ret < 0)
+			goto fail;
+	}
+	ret = 0;
+
+fail:
+	kfree(device_path);
+	return ret;
+}
+
+int xen_snd_front_cfg_card(struct xen_snd_front_info *front_info,
+			   int *stream_cnt)
+{
+	struct xenbus_device *xb_dev = front_info->xb_dev;
+	struct xen_front_cfg_card *cfg = &front_info->cfg;
+	int ret, num_devices, i;
+	char node[3];
+
+	*stream_cnt = 0;
+	num_devices = 0;
+	do {
+		snprintf(node, sizeof(node), "%d", num_devices);
+		if (!xenbus_exists(XBT_NIL, xb_dev->nodename, node))
+			break;
+
+		num_devices++;
+	} while (num_devices < SNDRV_PCM_DEVICES);
+
+	if (!num_devices) {
+		dev_warn(&xb_dev->dev,
+			 "No devices configured for sound card at %s\n",
+			 xb_dev->nodename);
+		return -ENODEV;
+	}
+
+	/* Start from default PCM HW configuration for the card. */
+	cfg_read_pcm_hw(xb_dev->nodename, NULL, &cfg->pcm_hw);
+
+	cfg->pcm_instances =
+			devm_kcalloc(&front_info->xb_dev->dev, num_devices,
+				     sizeof(struct xen_front_cfg_pcm_instance),
+				     GFP_KERNEL);
+	if (!cfg->pcm_instances)
+		return -ENOMEM;
+
+	for (i = 0; i < num_devices; i++) {
+		ret = cfg_device(front_info, &cfg->pcm_instances[i],
+				 &cfg->pcm_hw, xb_dev->nodename, i, stream_cnt);
+		if (ret < 0)
+			return ret;
+	}
+	cfg->num_pcm_instances = num_devices;
+	return 0;
+}
+
diff --git a/sound/xen/xen_snd_front_cfg.h b/sound/xen/xen_snd_front_cfg.h
new file mode 100644
index 0000000..2353fcc
--- /dev/null
+++ b/sound/xen/xen_snd_front_cfg.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ * Xen para-virtual sound device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#ifndef __XEN_SND_FRONT_CFG_H
+#define __XEN_SND_FRONT_CFG_H
+
+#include <sound/core.h>
+#include <sound/pcm.h>
+
+struct xen_snd_front_info;
+
+struct xen_front_cfg_stream {
+	int index;
+	char *xenstore_path;
+	struct snd_pcm_hardware pcm_hw;
+};
+
+struct xen_front_cfg_pcm_instance {
+	char name[80];
+	int device_id;
+	struct snd_pcm_hardware pcm_hw;
+	int  num_streams_pb;
+	struct xen_front_cfg_stream *streams_pb;
+	int  num_streams_cap;
+	struct xen_front_cfg_stream *streams_cap;
+};
+
+struct xen_front_cfg_card {
+	char name_short[32];
+	char name_long[80];
+	struct snd_pcm_hardware pcm_hw;
+	int num_pcm_instances;
+	struct xen_front_cfg_pcm_instance *pcm_instances;
+};
+
+int xen_snd_front_cfg_card(struct xen_snd_front_info *front_info,
+			   int *stream_cnt);
+
+#endif /* __XEN_SND_FRONT_CFG_H */
diff --git a/sound/xen/xen_snd_front_evtchnl.c b/sound/xen/xen_snd_front_evtchnl.c
new file mode 100644
index 0000000..102d6e0
--- /dev/null
+++ b/sound/xen/xen_snd_front_evtchnl.c
@@ -0,0 +1,494 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/*
+ * Xen para-virtual sound device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+
+#include "xen_snd_front.h"
+#include "xen_snd_front_alsa.h"
+#include "xen_snd_front_cfg.h"
+#include "xen_snd_front_evtchnl.h"
+
+static irqreturn_t evtchnl_interrupt_req(int irq, void *dev_id)
+{
+	struct xen_snd_front_evtchnl *channel = dev_id;
+	struct xen_snd_front_info *front_info = channel->front_info;
+	struct xensnd_resp *resp;
+	RING_IDX i, rp;
+
+	if (unlikely(channel->state != EVTCHNL_STATE_CONNECTED))
+		return IRQ_HANDLED;
+
+	mutex_lock(&channel->ring_io_lock);
+
+again:
+	rp = channel->u.req.ring.sring->rsp_prod;
+	/* Ensure we see queued responses up to rp. */
+	rmb();
+
+	/*
+	 * Assume that the backend is trusted to always write sane values
+	 * to the ring counters, so no overflow checks on frontend side
+	 * are required.
+	 */
+	for (i = channel->u.req.ring.rsp_cons; i != rp; i++) {
+		resp = RING_GET_RESPONSE(&channel->u.req.ring, i);
+		if (resp->id != channel->evt_id)
+			continue;
+		switch (resp->operation) {
+		case XENSND_OP_OPEN:
+			/* fall through */
+		case XENSND_OP_CLOSE:
+			/* fall through */
+		case XENSND_OP_READ:
+			/* fall through */
+		case XENSND_OP_WRITE:
+			/* fall through */
+		case XENSND_OP_TRIGGER:
+			channel->u.req.resp_status = resp->status;
+			complete(&channel->u.req.completion);
+			break;
+		case XENSND_OP_HW_PARAM_QUERY:
+			channel->u.req.resp_status = resp->status;
+			channel->u.req.resp.hw_param =
+					resp->resp.hw_param;
+			complete(&channel->u.req.completion);
+			break;
+
+		default:
+			dev_err(&front_info->xb_dev->dev,
+				"Operation %d is not supported\n",
+				resp->operation);
+			break;
+		}
+	}
+
+	channel->u.req.ring.rsp_cons = i;
+	if (i != channel->u.req.ring.req_prod_pvt) {
+		int more_to_do;
+
+		RING_FINAL_CHECK_FOR_RESPONSES(&channel->u.req.ring,
+					       more_to_do);
+		if (more_to_do)
+			goto again;
+	} else {
+		channel->u.req.ring.sring->rsp_event = i + 1;
+	}
+
+	mutex_unlock(&channel->ring_io_lock);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t evtchnl_interrupt_evt(int irq, void *dev_id)
+{
+	struct xen_snd_front_evtchnl *channel = dev_id;
+	struct xensnd_event_page *page = channel->u.evt.page;
+	u32 cons, prod;
+
+	if (unlikely(channel->state != EVTCHNL_STATE_CONNECTED))
+		return IRQ_HANDLED;
+
+	mutex_lock(&channel->ring_io_lock);
+
+	prod = page->in_prod;
+	/* Ensure we see ring contents up to prod. */
+	virt_rmb();
+	if (prod == page->in_cons)
+		goto out;
+
+	/*
+	 * Assume that the backend is trusted to always write sane values
+	 * to the ring counters, so no overflow checks on frontend side
+	 * are required.
+	 */
+	for (cons = page->in_cons; cons != prod; cons++) {
+		struct xensnd_evt *event;
+
+		event = &XENSND_IN_RING_REF(page, cons);
+		if (unlikely(event->id != channel->evt_id++))
+			continue;
+
+		switch (event->type) {
+		case XENSND_EVT_CUR_POS:
+			xen_snd_front_alsa_handle_cur_pos(channel,
+							  event->op.cur_pos.position);
+			break;
+		}
+	}
+
+	page->in_cons = cons;
+	/* Ensure ring contents. */
+	virt_wmb();
+
+out:
+	mutex_unlock(&channel->ring_io_lock);
+	return IRQ_HANDLED;
+}
+
+void xen_snd_front_evtchnl_flush(struct xen_snd_front_evtchnl *channel)
+{
+	int notify;
+
+	channel->u.req.ring.req_prod_pvt++;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&channel->u.req.ring, notify);
+	if (notify)
+		notify_remote_via_irq(channel->irq);
+}
+
+static void evtchnl_free(struct xen_snd_front_info *front_info,
+			 struct xen_snd_front_evtchnl *channel)
+{
+	unsigned long page = 0;
+
+	if (channel->type == EVTCHNL_TYPE_REQ)
+		page = (unsigned long)channel->u.req.ring.sring;
+	else if (channel->type == EVTCHNL_TYPE_EVT)
+		page = (unsigned long)channel->u.evt.page;
+
+	if (!page)
+		return;
+
+	channel->state = EVTCHNL_STATE_DISCONNECTED;
+	if (channel->type == EVTCHNL_TYPE_REQ) {
+		/* Release all who still waits for response if any. */
+		channel->u.req.resp_status = -EIO;
+		complete_all(&channel->u.req.completion);
+	}
+
+	if (channel->irq)
+		unbind_from_irqhandler(channel->irq, channel);
+
+	if (channel->port)
+		xenbus_free_evtchn(front_info->xb_dev, channel->port);
+
+	/* End access and free the page. */
+	if (channel->gref != GRANT_INVALID_REF)
+		gnttab_end_foreign_access(channel->gref, 0, page);
+	else
+		free_page(page);
+
+	memset(channel, 0, sizeof(*channel));
+}
+
+void xen_snd_front_evtchnl_free_all(struct xen_snd_front_info *front_info)
+{
+	int i;
+
+	if (!front_info->evt_pairs)
+		return;
+
+	for (i = 0; i < front_info->num_evt_pairs; i++) {
+		evtchnl_free(front_info, &front_info->evt_pairs[i].req);
+		evtchnl_free(front_info, &front_info->evt_pairs[i].evt);
+	}
+
+	kfree(front_info->evt_pairs);
+	front_info->evt_pairs = NULL;
+}
+
+static int evtchnl_alloc(struct xen_snd_front_info *front_info, int index,
+			 struct xen_snd_front_evtchnl *channel,
+			 enum xen_snd_front_evtchnl_type type)
+{
+	struct xenbus_device *xb_dev = front_info->xb_dev;
+	unsigned long page;
+	grant_ref_t gref;
+	irq_handler_t handler;
+	char *handler_name = NULL;
+	int ret;
+
+	memset(channel, 0, sizeof(*channel));
+	channel->type = type;
+	channel->index = index;
+	channel->front_info = front_info;
+	channel->state = EVTCHNL_STATE_DISCONNECTED;
+	channel->gref = GRANT_INVALID_REF;
+	page = get_zeroed_page(GFP_KERNEL);
+	if (!page) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	handler_name = kasprintf(GFP_KERNEL, "%s-%s", XENSND_DRIVER_NAME,
+				 type == EVTCHNL_TYPE_REQ ?
+				 XENSND_FIELD_RING_REF :
+				 XENSND_FIELD_EVT_RING_REF);
+	if (!handler_name) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	mutex_init(&channel->ring_io_lock);
+
+	if (type == EVTCHNL_TYPE_REQ) {
+		struct xen_sndif_sring *sring = (struct xen_sndif_sring *)page;
+
+		init_completion(&channel->u.req.completion);
+		mutex_init(&channel->u.req.req_io_lock);
+		SHARED_RING_INIT(sring);
+		FRONT_RING_INIT(&channel->u.req.ring, sring, XEN_PAGE_SIZE);
+
+		ret = xenbus_grant_ring(xb_dev, sring, 1, &gref);
+		if (ret < 0) {
+			channel->u.req.ring.sring = NULL;
+			goto fail;
+		}
+
+		handler = evtchnl_interrupt_req;
+	} else {
+		ret = gnttab_grant_foreign_access(xb_dev->otherend_id,
+						  virt_to_gfn((void *)page), 0);
+		if (ret < 0)
+			goto fail;
+
+		channel->u.evt.page = (struct xensnd_event_page *)page;
+		gref = ret;
+		handler = evtchnl_interrupt_evt;
+	}
+
+	channel->gref = gref;
+
+	ret = xenbus_alloc_evtchn(xb_dev, &channel->port);
+	if (ret < 0)
+		goto fail;
+
+	ret = bind_evtchn_to_irq(channel->port);
+	if (ret < 0) {
+		dev_err(&xb_dev->dev,
+			"Failed to bind IRQ for domid %d port %d: %d\n",
+			front_info->xb_dev->otherend_id, channel->port, ret);
+		goto fail;
+	}
+
+	channel->irq = ret;
+
+	ret = request_threaded_irq(channel->irq, NULL, handler,
+				   IRQF_ONESHOT, handler_name, channel);
+	if (ret < 0) {
+		dev_err(&xb_dev->dev, "Failed to request IRQ %d: %d\n",
+			channel->irq, ret);
+		goto fail;
+	}
+
+	kfree(handler_name);
+	return 0;
+
+fail:
+	if (page)
+		free_page(page);
+	kfree(handler_name);
+	dev_err(&xb_dev->dev, "Failed to allocate ring: %d\n", ret);
+	return ret;
+}
+
+int xen_snd_front_evtchnl_create_all(struct xen_snd_front_info *front_info,
+				     int num_streams)
+{
+	struct xen_front_cfg_card *cfg = &front_info->cfg;
+	struct device *dev = &front_info->xb_dev->dev;
+	int d, ret = 0;
+
+	front_info->evt_pairs =
+			kcalloc(num_streams,
+				sizeof(struct xen_snd_front_evtchnl_pair),
+				GFP_KERNEL);
+	if (!front_info->evt_pairs)
+		return -ENOMEM;
+
+	/* Iterate over devices and their streams and create event channels. */
+	for (d = 0; d < cfg->num_pcm_instances; d++) {
+		struct xen_front_cfg_pcm_instance *pcm_instance;
+		int s, index;
+
+		pcm_instance = &cfg->pcm_instances[d];
+
+		for (s = 0; s < pcm_instance->num_streams_pb; s++) {
+			index = pcm_instance->streams_pb[s].index;
+
+			ret = evtchnl_alloc(front_info, index,
+					    &front_info->evt_pairs[index].req,
+					    EVTCHNL_TYPE_REQ);
+			if (ret < 0) {
+				dev_err(dev, "Error allocating control channel\n");
+				goto fail;
+			}
+
+			ret = evtchnl_alloc(front_info, index,
+					    &front_info->evt_pairs[index].evt,
+					    EVTCHNL_TYPE_EVT);
+			if (ret < 0) {
+				dev_err(dev, "Error allocating in-event channel\n");
+				goto fail;
+			}
+		}
+
+		for (s = 0; s < pcm_instance->num_streams_cap; s++) {
+			index = pcm_instance->streams_cap[s].index;
+
+			ret = evtchnl_alloc(front_info, index,
+					    &front_info->evt_pairs[index].req,
+					    EVTCHNL_TYPE_REQ);
+			if (ret < 0) {
+				dev_err(dev, "Error allocating control channel\n");
+				goto fail;
+			}
+
+			ret = evtchnl_alloc(front_info, index,
+					    &front_info->evt_pairs[index].evt,
+					    EVTCHNL_TYPE_EVT);
+			if (ret < 0) {
+				dev_err(dev, "Error allocating in-event channel\n");
+				goto fail;
+			}
+		}
+	}
+
+	front_info->num_evt_pairs = num_streams;
+	return 0;
+
+fail:
+	xen_snd_front_evtchnl_free_all(front_info);
+	return ret;
+}
+
+static int evtchnl_publish(struct xenbus_transaction xbt,
+			   struct xen_snd_front_evtchnl *channel,
+			   const char *path, const char *node_ring,
+			   const char *node_chnl)
+{
+	struct xenbus_device *xb_dev = channel->front_info->xb_dev;
+	int ret;
+
+	/* Write control channel ring reference. */
+	ret = xenbus_printf(xbt, path, node_ring, "%u", channel->gref);
+	if (ret < 0) {
+		dev_err(&xb_dev->dev, "Error writing ring-ref: %d\n", ret);
+		return ret;
+	}
+
+	/* Write event channel ring reference. */
+	ret = xenbus_printf(xbt, path, node_chnl, "%u", channel->port);
+	if (ret < 0) {
+		dev_err(&xb_dev->dev, "Error writing event channel: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int xen_snd_front_evtchnl_publish_all(struct xen_snd_front_info *front_info)
+{
+	struct xen_front_cfg_card *cfg = &front_info->cfg;
+	struct xenbus_transaction xbt;
+	int ret, d;
+
+again:
+	ret = xenbus_transaction_start(&xbt);
+	if (ret < 0) {
+		xenbus_dev_fatal(front_info->xb_dev, ret,
+				 "starting transaction");
+		return ret;
+	}
+
+	for (d = 0; d < cfg->num_pcm_instances; d++) {
+		struct xen_front_cfg_pcm_instance *pcm_instance;
+		int s, index;
+
+		pcm_instance = &cfg->pcm_instances[d];
+
+		for (s = 0; s < pcm_instance->num_streams_pb; s++) {
+			index = pcm_instance->streams_pb[s].index;
+
+			ret = evtchnl_publish(xbt,
+					      &front_info->evt_pairs[index].req,
+					      pcm_instance->streams_pb[s].xenstore_path,
+					      XENSND_FIELD_RING_REF,
+					      XENSND_FIELD_EVT_CHNL);
+			if (ret < 0)
+				goto fail;
+
+			ret = evtchnl_publish(xbt,
+					      &front_info->evt_pairs[index].evt,
+					      pcm_instance->streams_pb[s].xenstore_path,
+					      XENSND_FIELD_EVT_RING_REF,
+					      XENSND_FIELD_EVT_EVT_CHNL);
+			if (ret < 0)
+				goto fail;
+		}
+
+		for (s = 0; s < pcm_instance->num_streams_cap; s++) {
+			index = pcm_instance->streams_cap[s].index;
+
+			ret = evtchnl_publish(xbt,
+					      &front_info->evt_pairs[index].req,
+					      pcm_instance->streams_cap[s].xenstore_path,
+					      XENSND_FIELD_RING_REF,
+					      XENSND_FIELD_EVT_CHNL);
+			if (ret < 0)
+				goto fail;
+
+			ret = evtchnl_publish(xbt,
+					      &front_info->evt_pairs[index].evt,
+					      pcm_instance->streams_cap[s].xenstore_path,
+					      XENSND_FIELD_EVT_RING_REF,
+					      XENSND_FIELD_EVT_EVT_CHNL);
+			if (ret < 0)
+				goto fail;
+		}
+	}
+	ret = xenbus_transaction_end(xbt, 0);
+	if (ret < 0) {
+		if (ret == -EAGAIN)
+			goto again;
+
+		xenbus_dev_fatal(front_info->xb_dev, ret,
+				 "completing transaction");
+		goto fail_to_end;
+	}
+	return 0;
+fail:
+	xenbus_transaction_end(xbt, 1);
+fail_to_end:
+	xenbus_dev_fatal(front_info->xb_dev, ret, "writing XenStore");
+	return ret;
+}
+
+void xen_snd_front_evtchnl_pair_set_connected(struct xen_snd_front_evtchnl_pair *evt_pair,
+					      bool is_connected)
+{
+	enum xen_snd_front_evtchnl_state state;
+
+	if (is_connected)
+		state = EVTCHNL_STATE_CONNECTED;
+	else
+		state = EVTCHNL_STATE_DISCONNECTED;
+
+	mutex_lock(&evt_pair->req.ring_io_lock);
+	evt_pair->req.state = state;
+	mutex_unlock(&evt_pair->req.ring_io_lock);
+
+	mutex_lock(&evt_pair->evt.ring_io_lock);
+	evt_pair->evt.state = state;
+	mutex_unlock(&evt_pair->evt.ring_io_lock);
+}
+
+void xen_snd_front_evtchnl_pair_clear(struct xen_snd_front_evtchnl_pair *evt_pair)
+{
+	mutex_lock(&evt_pair->req.ring_io_lock);
+	evt_pair->req.evt_next_id = 0;
+	mutex_unlock(&evt_pair->req.ring_io_lock);
+
+	mutex_lock(&evt_pair->evt.ring_io_lock);
+	evt_pair->evt.evt_next_id = 0;
+	mutex_unlock(&evt_pair->evt.ring_io_lock);
+}
+
diff --git a/sound/xen/xen_snd_front_evtchnl.h b/sound/xen/xen_snd_front_evtchnl.h
new file mode 100644
index 0000000..cbe51fd
--- /dev/null
+++ b/sound/xen/xen_snd_front_evtchnl.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ * Xen para-virtual sound device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#ifndef __XEN_SND_FRONT_EVTCHNL_H
+#define __XEN_SND_FRONT_EVTCHNL_H
+
+#include <xen/interface/io/sndif.h>
+
+struct xen_snd_front_info;
+
+#ifndef GRANT_INVALID_REF
+/*
+ * FIXME: usage of grant reference 0 as invalid grant reference:
+ * grant reference 0 is valid, but never exposed to a PV driver,
+ * because of the fact it is already in use/reserved by the PV console.
+ */
+#define GRANT_INVALID_REF	0
+#endif
+
+/* Timeout in ms to wait for backend to respond. */
+#define VSND_WAIT_BACK_MS	3000
+
+enum xen_snd_front_evtchnl_state {
+	EVTCHNL_STATE_DISCONNECTED,
+	EVTCHNL_STATE_CONNECTED,
+};
+
+enum xen_snd_front_evtchnl_type {
+	EVTCHNL_TYPE_REQ,
+	EVTCHNL_TYPE_EVT,
+};
+
+struct xen_snd_front_evtchnl {
+	struct xen_snd_front_info *front_info;
+	int gref;
+	int port;
+	int irq;
+	int index;
+	/* State of the event channel. */
+	enum xen_snd_front_evtchnl_state state;
+	enum xen_snd_front_evtchnl_type type;
+	/* Either response id or incoming event id. */
+	u16 evt_id;
+	/* Next request id or next expected event id. */
+	u16 evt_next_id;
+	/* Shared ring access lock. */
+	struct mutex ring_io_lock;
+	union {
+		struct {
+			struct xen_sndif_front_ring ring;
+			struct completion completion;
+			/* Serializer for backend IO: request/response. */
+			struct mutex req_io_lock;
+
+			/* Latest response status. */
+			int resp_status;
+			union {
+				struct xensnd_query_hw_param hw_param;
+			} resp;
+		} req;
+		struct {
+			struct xensnd_event_page *page;
+			/* This is needed to handle XENSND_EVT_CUR_POS event. */
+			struct snd_pcm_substream *substream;
+		} evt;
+	} u;
+};
+
+struct xen_snd_front_evtchnl_pair {
+	struct xen_snd_front_evtchnl req;
+	struct xen_snd_front_evtchnl evt;
+};
+
+int xen_snd_front_evtchnl_create_all(struct xen_snd_front_info *front_info,
+				     int num_streams);
+
+void xen_snd_front_evtchnl_free_all(struct xen_snd_front_info *front_info);
+
+int xen_snd_front_evtchnl_publish_all(struct xen_snd_front_info *front_info);
+
+void xen_snd_front_evtchnl_flush(struct xen_snd_front_evtchnl *evtchnl);
+
+void xen_snd_front_evtchnl_pair_set_connected(struct xen_snd_front_evtchnl_pair *evt_pair,
+					      bool is_connected);
+
+void xen_snd_front_evtchnl_pair_clear(struct xen_snd_front_evtchnl_pair *evt_pair);
+
+#endif /* __XEN_SND_FRONT_EVTCHNL_H */
diff --git a/sound/xen/xen_snd_front_shbuf.c b/sound/xen/xen_snd_front_shbuf.c
new file mode 100644
index 0000000..07ac176
--- /dev/null
+++ b/sound/xen/xen_snd_front_shbuf.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/*
+ * Xen para-virtual sound device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#include <linux/kernel.h>
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+
+#include "xen_snd_front_shbuf.h"
+
+grant_ref_t xen_snd_front_shbuf_get_dir_start(struct xen_snd_front_shbuf *buf)
+{
+	if (!buf->grefs)
+		return GRANT_INVALID_REF;
+
+	return buf->grefs[0];
+}
+
+void xen_snd_front_shbuf_clear(struct xen_snd_front_shbuf *buf)
+{
+	memset(buf, 0, sizeof(*buf));
+}
+
+void xen_snd_front_shbuf_free(struct xen_snd_front_shbuf *buf)
+{
+	int i;
+
+	if (buf->grefs) {
+		for (i = 0; i < buf->num_grefs; i++)
+			if (buf->grefs[i] != GRANT_INVALID_REF)
+				gnttab_end_foreign_access(buf->grefs[i],
+							  0, 0UL);
+		kfree(buf->grefs);
+	}
+	kfree(buf->directory);
+	free_pages_exact(buf->buffer, buf->buffer_sz);
+	xen_snd_front_shbuf_clear(buf);
+}
+
+/*
+ * number of grant references a page can hold with respect to the
+ * xensnd_page_directory header
+ */
+#define XENSND_NUM_GREFS_PER_PAGE ((XEN_PAGE_SIZE - \
+		offsetof(struct xensnd_page_directory, gref)) / \
+		sizeof(grant_ref_t))
+
+static void fill_page_dir(struct xen_snd_front_shbuf *buf,
+			  int num_pages_dir)
+{
+	struct xensnd_page_directory *page_dir;
+	unsigned char *ptr;
+	int i, cur_gref, grefs_left, to_copy;
+
+	ptr = buf->directory;
+	grefs_left = buf->num_grefs - num_pages_dir;
+	/*
+	 * skip grant references at the beginning, they are for pages granted
+	 * for the page directory itself
+	 */
+	cur_gref = num_pages_dir;
+	for (i = 0; i < num_pages_dir; i++) {
+		page_dir = (struct xensnd_page_directory *)ptr;
+		if (grefs_left <= XENSND_NUM_GREFS_PER_PAGE) {
+			to_copy = grefs_left;
+			page_dir->gref_dir_next_page = GRANT_INVALID_REF;
+		} else {
+			to_copy = XENSND_NUM_GREFS_PER_PAGE;
+			page_dir->gref_dir_next_page = buf->grefs[i + 1];
+		}
+
+		memcpy(&page_dir->gref, &buf->grefs[cur_gref],
+		       to_copy * sizeof(grant_ref_t));
+
+		ptr += XEN_PAGE_SIZE;
+		grefs_left -= to_copy;
+		cur_gref += to_copy;
+	}
+}
+
+static int grant_references(struct xenbus_device *xb_dev,
+			    struct xen_snd_front_shbuf *buf,
+			    int num_pages_dir, int num_pages_buffer,
+			    int num_grefs)
+{
+	grant_ref_t priv_gref_head;
+	unsigned long frame;
+	int ret, i, j, cur_ref;
+	int otherend_id;
+
+	ret = gnttab_alloc_grant_references(num_grefs, &priv_gref_head);
+	if (ret)
+		return ret;
+
+	buf->num_grefs = num_grefs;
+	otherend_id = xb_dev->otherend_id;
+	j = 0;
+
+	for (i = 0; i < num_pages_dir; i++) {
+		cur_ref = gnttab_claim_grant_reference(&priv_gref_head);
+		if (cur_ref < 0) {
+			ret = cur_ref;
+			goto fail;
+		}
+
+		frame = xen_page_to_gfn(virt_to_page(buf->directory +
+						     XEN_PAGE_SIZE * i));
+		gnttab_grant_foreign_access_ref(cur_ref, otherend_id, frame, 0);
+		buf->grefs[j++] = cur_ref;
+	}
+
+	for (i = 0; i < num_pages_buffer; i++) {
+		cur_ref = gnttab_claim_grant_reference(&priv_gref_head);
+		if (cur_ref < 0) {
+			ret = cur_ref;
+			goto fail;
+		}
+
+		frame = xen_page_to_gfn(virt_to_page(buf->buffer +
+						     XEN_PAGE_SIZE * i));
+		gnttab_grant_foreign_access_ref(cur_ref, otherend_id, frame, 0);
+		buf->grefs[j++] = cur_ref;
+	}
+
+	gnttab_free_grant_references(priv_gref_head);
+	fill_page_dir(buf, num_pages_dir);
+	return 0;
+
+fail:
+	gnttab_free_grant_references(priv_gref_head);
+	return ret;
+}
+
+static int alloc_int_buffers(struct xen_snd_front_shbuf *buf,
+			     int num_pages_dir, int num_pages_buffer,
+			     int num_grefs)
+{
+	buf->grefs = kcalloc(num_grefs, sizeof(*buf->grefs), GFP_KERNEL);
+	if (!buf->grefs)
+		return -ENOMEM;
+
+	buf->directory = kcalloc(num_pages_dir, XEN_PAGE_SIZE, GFP_KERNEL);
+	if (!buf->directory)
+		goto fail;
+
+	buf->buffer_sz = num_pages_buffer * XEN_PAGE_SIZE;
+	buf->buffer = alloc_pages_exact(buf->buffer_sz, GFP_KERNEL);
+	if (!buf->buffer)
+		goto fail;
+
+	return 0;
+
+fail:
+	kfree(buf->grefs);
+	buf->grefs = NULL;
+	kfree(buf->directory);
+	buf->directory = NULL;
+	return -ENOMEM;
+}
+
+int xen_snd_front_shbuf_alloc(struct xenbus_device *xb_dev,
+			      struct xen_snd_front_shbuf *buf,
+			      unsigned int buffer_sz)
+{
+	int num_pages_buffer, num_pages_dir, num_grefs;
+	int ret;
+
+	xen_snd_front_shbuf_clear(buf);
+
+	num_pages_buffer = DIV_ROUND_UP(buffer_sz, XEN_PAGE_SIZE);
+	/* number of pages the page directory consumes itself */
+	num_pages_dir = DIV_ROUND_UP(num_pages_buffer,
+				     XENSND_NUM_GREFS_PER_PAGE);
+	num_grefs = num_pages_buffer + num_pages_dir;
+
+	ret = alloc_int_buffers(buf, num_pages_dir,
+				num_pages_buffer, num_grefs);
+	if (ret < 0)
+		return ret;
+
+	ret = grant_references(xb_dev, buf, num_pages_dir, num_pages_buffer,
+			       num_grefs);
+	if (ret < 0)
+		return ret;
+
+	fill_page_dir(buf, num_pages_dir);
+	return 0;
+}
diff --git a/sound/xen/xen_snd_front_shbuf.h b/sound/xen/xen_snd_front_shbuf.h
new file mode 100644
index 0000000..d28e97c
--- /dev/null
+++ b/sound/xen/xen_snd_front_shbuf.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+/*
+ * Xen para-virtual sound device
+ *
+ * Copyright (C) 2016-2018 EPAM Systems Inc.
+ *
+ * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
+ */
+
+#ifndef __XEN_SND_FRONT_SHBUF_H
+#define __XEN_SND_FRONT_SHBUF_H
+
+#include <xen/grant_table.h>
+
+#include "xen_snd_front_evtchnl.h"
+
+struct xen_snd_front_shbuf {
+	int num_grefs;
+	grant_ref_t *grefs;
+	u8 *directory;
+	u8 *buffer;
+	size_t buffer_sz;
+};
+
+grant_ref_t xen_snd_front_shbuf_get_dir_start(struct xen_snd_front_shbuf *buf);
+
+int xen_snd_front_shbuf_alloc(struct xenbus_device *xb_dev,
+			      struct xen_snd_front_shbuf *buf,
+			      unsigned int buffer_sz);
+
+void xen_snd_front_shbuf_clear(struct xen_snd_front_shbuf *buf);
+
+void xen_snd_front_shbuf_free(struct xen_snd_front_shbuf *buf);
+
+#endif /* __XEN_SND_FRONT_SHBUF_H */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 578793e..fb00a2f 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -198,7 +198,6 @@
 #define X86_FEATURE_CAT_L2		( 7*32+ 5) /* Cache Allocation Technology L2 */
 #define X86_FEATURE_CDP_L3		( 7*32+ 6) /* Code and Data Prioritization L3 */
 #define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
-
 #define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
@@ -207,13 +206,19 @@
 #define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_CDP_L2		( 7*32+15) /* Code and Data Prioritization L2 */
-
+#define X86_FEATURE_MSR_SPEC_CTRL	( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD		( 7*32+17) /* Speculative Store Bypass Disable */
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
 #define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
 #define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
-
 #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
 #define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* "" Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD		( 7*32+24)  /* "" AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS		( 7*32+25) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB		( 7*32+26) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP		( 7*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN			( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW		( 8*32+ 0) /* Intel TPR Shadow */
@@ -274,9 +279,10 @@
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
 #define X86_FEATURE_IRPERF		(13*32+ 1) /* Instructions Retired Count */
 #define X86_FEATURE_XSAVEERPTR		(13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_IBPB		(13*32+12) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_IBRS		(13*32+14) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_STIBP		(13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_IBPB		(13*32+12) /* "" Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS		(13*32+14) /* "" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP		(13*32+15) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_VIRT_SSBD		(13*32+25) /* Virtualized Speculative Store Bypass Disable */
 
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
 #define X86_FEATURE_DTHERM		(14*32+ 0) /* Digital Thermal Sensor */
@@ -334,6 +340,7 @@
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */
 
 /*
  * BUG word(s)
@@ -363,5 +370,6 @@
 #define X86_BUG_CPU_MELTDOWN		X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
 #define X86_BUG_SPECTRE_V1		X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
 #define X86_BUG_SPECTRE_V2		X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS	X86_BUG(17) /* CPU is affected by speculative store bypass attack */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index a3a4427..70fe612 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -21,6 +21,9 @@
 /* &a[0] degrades to a pointer: a different type from an array */
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 
+#ifndef __pure
+#define  __pure		__attribute__((pure))
+#endif
 #define  noinline	__attribute__((noinline))
 #ifndef __packed
 #define __packed	__attribute__((packed))
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index af5f8c2..db9f15f 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -207,4 +207,16 @@
 # define PR_SVE_VL_LEN_MASK		0xffff
 # define PR_SVE_VL_INHERIT		(1 << 17) /* inherit across exec */
 
+/* Per task speculation control */
+#define PR_GET_SPECULATION_CTRL		52
+#define PR_SET_SPECULATION_CTRL		53
+/* Speculation control variants */
+# define PR_SPEC_STORE_BYPASS		0
+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+# define PR_SPEC_NOT_AFFECTED		0
+# define PR_SPEC_PRCTL			(1UL << 0)
+# define PR_SPEC_ENABLE			(1UL << 1)
+# define PR_SPEC_DISABLE		(1UL << 2)
+# define PR_SPEC_FORCE_DISABLE		(1UL << 3)
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index 7b7fd0b..1200374 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -13,11 +13,9 @@
 
 #include "tracing_path.h"
 
-
-char tracing_mnt[PATH_MAX]         = "/sys/kernel/debug";
-char tracing_path[PATH_MAX]        = "/sys/kernel/debug/tracing";
-char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
-
+static char tracing_mnt[PATH_MAX]  = "/sys/kernel/debug";
+static char tracing_path[PATH_MAX]        = "/sys/kernel/debug/tracing";
+static char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
 
 static void __tracing_path_set(const char *tracing, const char *mountpoint)
 {
@@ -76,7 +74,7 @@
 {
 	char *file;
 
-	if (asprintf(&file, "%s/%s", tracing_path, name) < 0)
+	if (asprintf(&file, "%s/%s", tracing_path_mount(), name) < 0)
 		return NULL;
 
 	return file;
@@ -87,6 +85,34 @@
 	free(file);
 }
 
+char *get_events_file(const char *name)
+{
+	char *file;
+
+	if (asprintf(&file, "%s/events/%s", tracing_path_mount(), name) < 0)
+		return NULL;
+
+	return file;
+}
+
+void put_events_file(char *file)
+{
+	free(file);
+}
+
+DIR *tracing_events__opendir(void)
+{
+	DIR *dir = NULL;
+	char *path = get_tracing_file("events");
+
+	if (path) {
+		dir = opendir(path);
+		put_events_file(path);
+	}
+
+	return dir;
+}
+
 int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
 				   const char *sys, const char *name)
 {
@@ -129,7 +155,7 @@
 		snprintf(buf, size,
 			 "Error:\tNo permissions to read %s/%s\n"
 			 "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
-			 tracing_events_path, filename, tracing_mnt);
+			 tracing_events_path, filename, tracing_path_mount());
 	}
 		break;
 	default:
diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h
index 0066f06..a19136b 100644
--- a/tools/lib/api/fs/tracing_path.h
+++ b/tools/lib/api/fs/tracing_path.h
@@ -3,9 +3,9 @@
 #define __API_FS_TRACING_PATH_H
 
 #include <linux/types.h>
+#include <dirent.h>
 
-extern char tracing_path[];
-extern char tracing_events_path[];
+DIR *tracing_events__opendir(void);
 
 void tracing_path_set(const char *mountpoint);
 const char *tracing_path_mount(void);
@@ -13,5 +13,10 @@
 char *get_tracing_file(const char *name);
 void put_tracing_file(char *file);
 
+char *get_events_file(const char *name);
+void put_events_file(char *file);
+
+#define zput_events_file(ptr) ({ free(*ptr); *ptr = NULL; })
+
 int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name);
 #endif /* __API_FS_TRACING_PATH_H */
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
index 689b6a1..96d8305 100644
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -10,6 +10,12 @@
 	return (type == 't' || type == 'w') ? STT_FUNC : STT_OBJECT;
 }
 
+bool kallsyms__is_function(char symbol_type)
+{
+	symbol_type = toupper(symbol_type);
+	return symbol_type == 'T' || symbol_type == 'W';
+}
+
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
 					  char type, u64 start))
diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h
index bc40101..72ab987 100644
--- a/tools/lib/symbol/kallsyms.h
+++ b/tools/lib/symbol/kallsyms.h
@@ -20,6 +20,8 @@
 
 u8 kallsyms2elf_type(char type);
 
+bool kallsyms__is_function(char symbol_type);
+
 int kallsyms__parse(const char *filename, void *arg,
 		    int (*process_symbol)(void *arg, const char *name,
 					  char type, u64 start));
diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt
index 956b1ae4..33ba98d 100644
--- a/tools/memory-model/Documentation/cheatsheet.txt
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -1,6 +1,6 @@
                                   Prior Operation     Subsequent Operation
                                   ---------------  ---------------------------
-                               C  Self  R  W  RWM  Self  R  W  DR  DW  RMW  SV
+                               C  Self  R  W  RMW  Self  R  W  DR  DW  RMW  SV
                               --  ----  -  -  ---  ----  -  -  --  --  ---  --
 
 Store, e.g., WRITE_ONCE()            Y                                       Y
@@ -14,7 +14,7 @@
 smp_mb() & synchronize_rcu()  CP        Y  Y    Y        Y  Y   Y   Y    Y
 Successful full non-void RMW  CP     Y  Y  Y    Y     Y  Y  Y   Y   Y    Y   Y
 smp_mb__before_atomic()       CP        Y  Y    Y        a  a   a   a    Y
-smp_mb__after_atomic()        CP        a  a    Y        Y  Y   Y   Y
+smp_mb__after_atomic()        CP        a  a    Y        Y  Y   Y   Y    Y
 
 
 Key:	C:	Ordering is cumulative
@@ -26,4 +26,5 @@
 	DR:	Dependent read (address dependency)
 	DW:	Dependent write (address, data, or control dependency)
 	RMW:	Atomic read-modify-write operation
-	SV	Same-variable access
+	SELF:	Orders self, as opposed to accesses before and/or after
+	SV:	Orders later accesses to the same variable
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index a727c82..1b09f31 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -27,7 +27,7 @@
   19. AND THEN THERE WAS ALPHA
   20. THE HAPPENS-BEFORE RELATION: hb
   21. THE PROPAGATES-BEFORE RELATION: pb
-  22. RCU RELATIONS: link, gp-link, rscs-link, and rcu-path
+  22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
   23. ODDS AND ENDS
 
 
@@ -1451,8 +1451,8 @@
 the content of the LKMM's "propagation" axiom.
 
 
-RCU RELATIONS: link, gp-link, rscs-link, and rcu-path
------------------------------------------------------
+RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
+----------------------------------------------------
 
 RCU (Read-Copy-Update) is a powerful synchronization mechanism.  It
 rests on two concepts: grace periods and read-side critical sections.
@@ -1509,8 +1509,8 @@
 propagate to P1 before the end of the grace period, violating the
 Guarantee.
 
-In the kernel's implementations of RCU, the business about stores
-propagating to every CPU is realized by placing strong fences at
+In the kernel's implementations of RCU, the requirements for stores
+to propagate to every CPU are fulfilled by placing strong fences at
 suitable places in the RCU-related code.  Thus, if a critical section
 starts before a grace period does then the critical section's CPU will
 execute an smp_mb() fence after the end of the critical section and
@@ -1523,72 +1523,124 @@
 What exactly do we mean by saying that a critical section "starts
 before" or "ends after" a grace period?  Some aspects of the meaning
 are pretty obvious, as in the example above, but the details aren't
-entirely clear.  The LKMM formalizes this notion by means of a
-relation with the unfortunately generic name "link".  It is a very
-general relation; among other things, X ->link Z includes cases where
-X happens-before or is equal to some event Y which is equal to or
-comes before Z in the coherence order.  Taking Y = Z, this says that
-X ->rfe Z implies X ->link Z, and taking Y = X, it says that X ->fr Z
-and X ->co Z each imply X ->link Z.
+entirely clear.  The LKMM formalizes this notion by means of the
+rcu-link relation.  rcu-link encompasses a very general notion of
+"before": Among other things, X ->rcu-link Z includes cases where X
+happens-before or is equal to some event Y which is equal to or comes
+before Z in the coherence order.  When Y = Z this says that X ->rfe Z
+implies X ->rcu-link Z.  In addition, when Y = X it says that X ->fr Z
+and X ->co Z each imply X ->rcu-link Z.
 
-The formal definition of the link relation is more than a little
+The formal definition of the rcu-link relation is more than a little
 obscure, and we won't give it here.  It is closely related to the pb
 relation, and the details don't matter unless you want to comb through
 a somewhat lengthy formal proof.  Pretty much all you need to know
-about link is the information in the preceding paragraph.
+about rcu-link is the information in the preceding paragraph.
 
-The LKMM goes on to define the gp-link and rscs-link relations.  They
-bring grace periods and read-side critical sections into the picture,
-in the following way:
+The LKMM also defines the gp and rscs relations.  They bring grace
+periods and read-side critical sections into the picture, in the
+following way:
 
-	E ->gp-link F means there is a synchronize_rcu() fence event S
-	and an event X such that E ->po S, either S ->po X or S = X,
-	and X ->link F.  In other words, E and F are connected by a
-	grace period followed by an instance of link.
+	E ->gp F means there is a synchronize_rcu() fence event S such
+	that E ->po S and either S ->po F or S = F.  In simple terms,
+	there is a grace period po-between E and F.
 
-	E ->rscs-link F means there is a critical section delimited by
-	an rcu_read_lock() fence L and an rcu_read_unlock() fence U,
-	and an event X such that E ->po U, either L ->po X or L = X,
-	and X ->link F.  Roughly speaking, this says that some event
-	in the same critical section as E is connected by link to F.
+	E ->rscs F means there is a critical section delimited by an
+	rcu_read_lock() fence L and an rcu_read_unlock() fence U, such
+	that E ->po U and either L ->po F or L = F.  You can think of
+	this as saying that E and F are in the same critical section
+	(in fact, it also allows E to be po-before the start of the
+	critical section and F to be po-after the end).
 
-If we think of the link relation as standing for an extended "before",
-then E ->gp-link F says that E executes before a grace period which
-ends before F executes.  (In fact it says more than this, because it
-includes cases where E executes before a grace period and some store
-propagates to F's CPU before F executes and doesn't propagate to some
-other CPU until after the grace period ends.)  Similarly,
-E ->rscs-link F says that E is part of (or before the start of) a
-critical section which starts before F executes.
+If we think of the rcu-link relation as standing for an extended
+"before", then X ->gp Y ->rcu-link Z says that X executes before a
+grace period which ends before Z executes.  (In fact it covers more
+than this, because it also includes cases where X executes before a
+grace period and some store propagates to Z's CPU before Z executes
+but doesn't propagate to some other CPU until after the grace period
+ends.)  Similarly, X ->rscs Y ->rcu-link Z says that X is part of (or
+before the start of) a critical section which starts before Z
+executes.
+
+The LKMM goes on to define the rcu-fence relation as a sequence of gp
+and rscs links separated by rcu-link links, in which the number of gp
+links is >= the number of rscs links.  For example:
+
+	X ->gp Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
+
+would imply that X ->rcu-fence V, because this sequence contains two
+gp links and only one rscs link.  (It also implies that X ->rcu-fence T
+and Z ->rcu-fence V.)  On the other hand:
+
+	X ->rscs Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
+
+does not imply X ->rcu-fence V, because the sequence contains only
+one gp link but two rscs links.
+
+The rcu-fence relation is important because the Grace Period Guarantee
+means that rcu-fence acts kind of like a strong fence.  In particular,
+if W is a write and we have W ->rcu-fence Z, the Guarantee says that W
+will propagate to every CPU before Z executes.
+
+To prove this in full generality requires some intellectual effort.
+We'll consider just a very simple case:
+
+	W ->gp X ->rcu-link Y ->rscs Z.
+
+This formula means that there is a grace period G and a critical
+section C such that:
+
+	1. W is po-before G;
+
+	2. X is equal to or po-after G;
+
+	3. X comes "before" Y in some sense;
+
+	4. Y is po-before the end of C;
+
+	5. Z is equal to or po-after the start of C.
+
+From 2 - 4 we deduce that the grace period G ends before the critical
+section C.  Then the second part of the Grace Period Guarantee says
+not only that G starts before C does, but also that W (which executes
+on G's CPU before G starts) must propagate to every CPU before C
+starts.  In particular, W propagates to every CPU before Z executes
+(or finishes executing, in the case where Z is equal to the
+rcu_read_lock() fence event which starts C.)  This sort of reasoning
+can be expanded to handle all the situations covered by rcu-fence.
+
+Finally, the LKMM defines the RCU-before (rb) relation in terms of
+rcu-fence.  This is done in essentially the same way as the pb
+relation was defined in terms of strong-fence.  We will omit the
+details; the end result is that E ->rb F implies E must execute before
+F, just as E ->pb F does (and for much the same reasons).
 
 Putting this all together, the LKMM expresses the Grace Period
-Guarantee by requiring that there are no cycles consisting of gp-link
-and rscs-link connections in which the number of gp-link instances is
->= the number of rscs-link instances.  It does this by defining the
-rcu-path relation to link events E and F whenever it is possible to
-pass from E to F by a sequence of gp-link and rscs-link connections
-with at least as many of the former as the latter.  The LKMM's "rcu"
-axiom then says that there are no events E such that E ->rcu-path E.
+Guarantee by requiring that the rb relation does not contain a cycle.
+Equivalently, this "rcu" axiom requires that there are no events E and
+F with E ->rcu-link F ->rcu-fence E.  Or to put it a third way, the
+axiom requires that there are no cycles consisting of gp and rscs
+alternating with rcu-link, where the number of gp links is >= the
+number of rscs links.
 
-Justifying this axiom takes some intellectual effort, but it is in
-fact a valid formalization of the Grace Period Guarantee.  We won't
-attempt to go through the detailed argument, but the following
-analysis gives a taste of what is involved.  Suppose we have a
-violation of the first part of the Guarantee: A critical section
-starts before a grace period, and some store propagates to the
-critical section's CPU before the end of the critical section but
-doesn't propagate to some other CPU until after the end of the grace
-period.
+Justifying the axiom isn't easy, but it is in fact a valid
+formalization of the Grace Period Guarantee.  We won't attempt to go
+through the detailed argument, but the following analysis gives a
+taste of what is involved.  Suppose we have a violation of the first
+part of the Guarantee: A critical section starts before a grace
+period, and some store propagates to the critical section's CPU before
+the end of the critical section but doesn't propagate to some other
+CPU until after the end of the grace period.
 
 Putting symbols to these ideas, let L and U be the rcu_read_lock() and
 rcu_read_unlock() fence events delimiting the critical section in
 question, and let S be the synchronize_rcu() fence event for the grace
 period.  Saying that the critical section starts before S means there
 are events E and F where E is po-after L (which marks the start of the
-critical section), E is "before" F in the sense of the link relation,
-and F is po-before the grace period S:
+critical section), E is "before" F in the sense of the rcu-link
+relation, and F is po-before the grace period S:
 
-	L ->po E ->link F ->po S.
+	L ->po E ->rcu-link F ->po S.
 
 Let W be the store mentioned above, let Z come before the end of the
 critical section and witness that W propagates to the critical
@@ -1600,16 +1652,19 @@
 
 The fr link from Y to W indicates that W has not propagated to Y's CPU
 at the time that Y executes.  From this, it can be shown (see the
-discussion of the link relation earlier) that X and Z are connected by
-link, yielding:
+discussion of the rcu-link relation earlier) that X and Z are related
+by rcu-link, yielding:
 
-	S ->po X ->link Z ->po U.
+	S ->po X ->rcu-link Z ->po U.
 
-These formulas say that S is po-between F and X, hence F ->gp-link Z
-via X.  They also say that Z comes before the end of the critical
-section and E comes after its start, hence Z ->rscs-link F via E.  But
-now we have a forbidden cycle: F ->gp-link Z ->rscs-link F.  Thus the
-"rcu" axiom rules out this violation of the Grace Period Guarantee.
+The formulas say that S is po-between F and X, hence F ->gp X.  They
+also say that Z comes before the end of the critical section and E
+comes after its start, hence Z ->rscs E.  From all this we obtain:
+
+	F ->gp X ->rcu-link Z ->rscs E ->rcu-link F,
+
+a forbidden cycle.  Thus the "rcu" axiom rules out this violation of
+the Grace Period Guarantee.
 
 For something a little more down-to-earth, let's see how the axiom
 works out in practice.  Consider the RCU code example from above, this
@@ -1635,18 +1690,18 @@
 	}
 
 
-If r2 = 0 at the end then P0's store at X overwrites the value
-that P1's load at Z reads from, so we have Z ->fre X and thus
-Z ->link X.  In addition, there is a synchronize_rcu() between Y and
-Z, so therefore we have Y ->gp-link X.
+If r2 = 0 at the end then P0's store at X overwrites the value that
+P1's load at Z reads from, so we have Z ->fre X and thus Z ->rcu-link X.
+In addition, there is a synchronize_rcu() between Y and Z, so therefore
+we have Y ->gp Z.
 
 If r1 = 1 at the end then P1's load at Y reads from P0's store at W,
-so we have W ->link Y.  In addition, W and X are in the same critical
-section, so therefore we have X ->rscs-link Y.
+so we have W ->rcu-link Y.  In addition, W and X are in the same critical
+section, so therefore we have X ->rscs W.
 
-This gives us a cycle, Y ->gp-link X ->rscs-link Y, with one gp-link
-and one rscs-link, violating the "rcu" axiom.  Hence the outcome is
-not allowed by the LKMM, as we would expect.
+Then X ->rscs W ->rcu-link Y ->gp Z ->rcu-link X is a forbidden cycle,
+violating the "rcu" axiom.  Hence the outcome is not allowed by the
+LKMM, as we would expect.
 
 For contrast, let's see what can happen in a more complicated example:
 
@@ -1682,15 +1737,11 @@
 	}
 
 If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows
-that W ->rscs-link Y via X, Y ->gp-link U via Z, and U ->rscs-link W
-via V.  And just as before, this gives a cycle:
-
-	W ->rscs-link Y ->gp-link U ->rscs-link W.
-
-However, this cycle has fewer gp-link instances than rscs-link
-instances, and consequently the outcome is not forbidden by the LKMM.
-The following instruction timing diagram shows how it might actually
-occur:
+that W ->rscs X ->rcu-link Y ->gp Z ->rcu-link U ->rscs V ->rcu-link W.
+However this cycle is not forbidden, because the sequence of relations
+contains fewer instances of gp (one) than of rscs (two).  Consequently
+the outcome is allowed by the LKMM.  The following instruction timing
+diagram shows how it might actually occur:
 
 P0			P1			P2
 --------------------	--------------------	--------------------
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt
index ba2e34c..b177f3e 100644
--- a/tools/memory-model/Documentation/references.txt
+++ b/tools/memory-model/Documentation/references.txt
@@ -63,15 +63,22 @@
 	Principles of Programming Languages (POPL 2017). ACM, New York,
 	NY, USA, 429–442.
 
+o	Christopher Pulte, Shaked Flur, Will Deacon, Jon French,
+	Susmit Sarkar, and Peter Sewell. 2018. "Simplifying ARM concurrency:
+	multicopy-atomic axiomatic and operational models for ARMv8". In
+	Proceedings of the ACM on Programming Languages, Volume 2, Issue
+	POPL, Article No. 19. ACM, New York, NY, USA.
+
 
 Linux-kernel memory model
 =========================
 
-o	Andrea Parri, Alan Stern, Luc Maranget, Paul E. McKenney,
-	and Jade Alglave.  2017. "A formal model of
-	Linux-kernel memory ordering - companion webpage".
-	http://moscova.inria.fr/∼maranget/cats7/linux/. (2017). [Online;
-	accessed 30-January-2017].
+o	Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
+	Alan Stern.  2018. "Frightening small children and disconcerting
+	grown-ups: Concurrency in the Linux kernel". In Proceedings of
+	the 23rd International Conference on Architectural Support for
+	Programming Languages and Operating Systems (ASPLOS 2018). ACM,
+	New York, NY, USA, 405-418.  Webpage: http://diy.inria.fr/linux/.
 
 o	Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
 	Alan Stern.  2017.  "A formal kernel memory-ordering model (part 1)"
diff --git a/tools/memory-model/README b/tools/memory-model/README
index 0b3a5f3..734f7fe 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -20,7 +20,7 @@
 REQUIREMENTS
 ============
 
-Version 7.48 of the "herd7" and "klitmus7" tools must be downloaded
+Version 7.49 of the "herd7" and "klitmus7" tools must be downloaded
 separately:
 
   https://github.com/herd/herdtools7
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index 432c7cf..64f5740 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -5,10 +5,10 @@
  * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
  *                    Andrea Parri <parri.andrea@gmail.com>
  *
- * An earlier version of this file appears in the companion webpage for
+ * An earlier version of this file appeared in the companion webpage for
  * "Frightening small children and disconcerting grown-ups: Concurrency
  * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
- * which is to appear in ASPLOS 2018.
+ * which appeared in ASPLOS 2018.
  *)
 
 "Linux-kernel memory consistency model"
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index df97db0..59b5cbe 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -5,10 +5,10 @@
  * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
  *                    Andrea Parri <parri.andrea@gmail.com>
  *
- * An earlier version of this file appears in the companion webpage for
+ * An earlier version of this file appeared in the companion webpage for
  * "Frightening small children and disconcerting grown-ups: Concurrency
  * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
- * which is to appear in ASPLOS 2018.
+ * which appeared in ASPLOS 2018.
  *)
 
 "Linux-kernel memory consistency model"
@@ -100,22 +100,29 @@
  * one but two non-rf relations, but only in conjunction with an RCU
  * read-side critical section.
  *)
-let link = hb* ; pb* ; prop
-
-(* Chains that affect the RCU grace-period guarantee *)
-let gp-link = gp ; link
-let rscs-link = rscs ; link
+let rcu-link = hb* ; pb* ; prop
 
 (*
- * A cycle containing at least as many grace periods as RCU read-side
- * critical sections is forbidden.
+ * Any sequence containing at least as many grace periods as RCU read-side
+ * critical sections (joined by rcu-link) acts as a generalized strong fence.
  *)
-let rec rcu-path =
-	gp-link |
-	(gp-link ; rscs-link) |
-	(rscs-link ; gp-link) |
-	(rcu-path ; rcu-path) |
-	(gp-link ; rcu-path ; rscs-link) |
-	(rscs-link ; rcu-path ; gp-link)
+let rec rcu-fence = gp |
+	(gp ; rcu-link ; rscs) |
+	(rscs ; rcu-link ; gp) |
+	(gp ; rcu-link ; rcu-fence ; rcu-link ; rscs) |
+	(rscs ; rcu-link ; rcu-fence ; rcu-link ; gp) |
+	(rcu-fence ; rcu-link ; rcu-fence)
 
-irreflexive rcu-path as rcu
+(* rb orders instructions just as pb does *)
+let rb = prop ; rcu-fence ; hb* ; pb*
+
+irreflexive rb as rcu
+
+(*
+ * The happens-before, propagation, and rcu constraints are all
+ * expressions of temporal ordering.  They could be replaced by
+ * a single constraint on an "executes-before" relation, xb:
+ *
+ * let xb = hb | pb | rb
+ * acyclic xb as executes-before
+ *)
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index 397e4e6..6fa3eb2 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -1,9 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0+
 //
-// An earlier version of this file appears in the companion webpage for
+// An earlier version of this file appeared in the companion webpage for
 // "Frightening small children and disconcerting grown-ups: Concurrency
 // in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
-// which is to appear in ASPLOS 2018.
+// which appeared in ASPLOS 2018.
 
 // ONCE
 READ_ONCE(X) __load{once}(X)
@@ -14,14 +14,15 @@
 smp_load_acquire(X) __load{acquire}(*X)
 rcu_assign_pointer(X,V) { __store{release}(X,V); }
 rcu_dereference(X) __load{once}(X)
+smp_store_mb(X,V) { __store{once}(X,V); __fence{mb}; }
 
 // Fences
-smp_mb() { __fence{mb} ; }
-smp_rmb() { __fence{rmb} ; }
-smp_wmb() { __fence{wmb} ; }
-smp_mb__before_atomic() { __fence{before-atomic} ; }
-smp_mb__after_atomic() { __fence{after-atomic} ; }
-smp_mb__after_spinlock() { __fence{after-spinlock} ; }
+smp_mb() { __fence{mb}; }
+smp_rmb() { __fence{rmb}; }
+smp_wmb() { __fence{wmb}; }
+smp_mb__before_atomic() { __fence{before-atomic}; }
+smp_mb__after_atomic() { __fence{after-atomic}; }
+smp_mb__after_spinlock() { __fence{after-spinlock}; }
 
 // Exchange
 xchg(X,V)  __xchg{mb}(X,V)
@@ -34,26 +35,27 @@
 cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
 
 // Spinlocks
-spin_lock(X) { __lock(X) ; }
-spin_unlock(X) { __unlock(X) ; }
+spin_lock(X) { __lock(X); }
+spin_unlock(X) { __unlock(X); }
 spin_trylock(X) __trylock(X)
+spin_is_locked(X) __islocked(X)
 
 // RCU
 rcu_read_lock() { __fence{rcu-lock}; }
-rcu_read_unlock() { __fence{rcu-unlock};}
+rcu_read_unlock() { __fence{rcu-unlock}; }
 synchronize_rcu() { __fence{sync-rcu}; }
 synchronize_rcu_expedited() { __fence{sync-rcu}; }
 
 // Atomic
 atomic_read(X) READ_ONCE(*X)
-atomic_set(X,V) { WRITE_ONCE(*X,V) ; }
+atomic_set(X,V) { WRITE_ONCE(*X,V); }
 atomic_read_acquire(X) smp_load_acquire(X)
 atomic_set_release(X,V) { smp_store_release(X,V); }
 
-atomic_add(V,X) { __atomic_op(X,+,V) ; }
-atomic_sub(V,X) { __atomic_op(X,-,V) ; }
-atomic_inc(X)   { __atomic_op(X,+,1) ; }
-atomic_dec(X)   { __atomic_op(X,-,1) ; }
+atomic_add(V,X) { __atomic_op(X,+,V); }
+atomic_sub(V,X) { __atomic_op(X,-,V); }
+atomic_inc(X)   { __atomic_op(X,+,1); }
+atomic_dec(X)   { __atomic_op(X,-,1); }
 
 atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V)
 atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V)
diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore
new file mode 100644
index 0000000..6e2ddc54
--- /dev/null
+++ b/tools/memory-model/litmus-tests/.gitignore
@@ -0,0 +1 @@
+*.litmus.out
diff --git a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus
index 50d5db9..98a3716 100644
--- a/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus
+++ b/tools/memory-model/litmus-tests/IRIW+mbonceonces+OnceOnce.litmus
@@ -7,7 +7,7 @@
  * between each pairs of reads.  In other words, is smp_mb() sufficient to
  * cause two different reading processes to agree on the order of a pair
  * of writes, where each write is to a different variable by a different
- * process?
+ * process?  This litmus test exercises LKMM's "propagation" rule.
  *)
 
 {}
diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
new file mode 100644
index 0000000..50f4d62
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
@@ -0,0 +1,35 @@
+C MP+polockmbonce+poacquiresilsil
+
+(*
+ * Result: Never
+ *
+ * Do spinlocks combined with smp_mb__after_spinlock() provide order
+ * to outside observers using spin_is_locked() to sense the lock-held
+ * state, ordered by acquire?  Note that when the first spin_is_locked()
+ * returns false and the second true, we know that the smp_load_acquire()
+ * executed before the lock was acquired (loosely speaking).
+ *)
+
+{
+}
+
+P0(spinlock_t *lo, int *x)
+{
+	spin_lock(lo);
+	smp_mb__after_spinlock();
+	WRITE_ONCE(*x, 1);
+	spin_unlock(lo);
+}
+
+P1(spinlock_t *lo, int *x)
+{
+	int r1;
+	int r2;
+	int r3;
+
+	r1 = smp_load_acquire(x);
+	r2 = spin_is_locked(lo);
+	r3 = spin_is_locked(lo);
+}
+
+exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
new file mode 100644
index 0000000..abf81e7
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
@@ -0,0 +1,34 @@
+C MP+polockonce+poacquiresilsil
+
+(*
+ * Result: Sometimes
+ *
+ * Do spinlocks provide order to outside observers using spin_is_locked()
+ * to sense the lock-held state, ordered by acquire?  Note that when the
+ * first spin_is_locked() returns false and the second true, we know that
+ * the smp_load_acquire() executed before the lock was acquired (loosely
+ * speaking).
+ *)
+
+{
+}
+
+P0(spinlock_t *lo, int *x)
+{
+	spin_lock(lo);
+	WRITE_ONCE(*x, 1);
+	spin_unlock(lo);
+}
+
+P1(spinlock_t *lo, int *x)
+{
+	int r1;
+	int r2;
+	int r3;
+
+	r1 = smp_load_acquire(x);
+	r2 = spin_is_locked(lo);
+	r3 = spin_is_locked(lo);
+}
+
+exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README
index 04096fb..17eb9a8 100644
--- a/tools/memory-model/litmus-tests/README
+++ b/tools/memory-model/litmus-tests/README
@@ -23,7 +23,8 @@
 	between each pairs of reads.  In other words, is smp_mb()
 	sufficient to cause two different reading processes to agree on
 	the order of a pair of writes, where each write is to a different
-	variable by a different process?
+	variable by a different process?  This litmus test is forbidden
+	by LKMM's propagation rule.
 
 IRIW+poonceonces+OnceOnce.litmus
 	Test of independent reads from independent writes with nothing
@@ -63,6 +64,16 @@
 MP+onceassign+derefonce.litmus
 	As below, but with rcu_assign_pointer() and an rcu_dereference().
 
+MP+polockmbonce+poacquiresilsil.litmus
+	Protect the access with a lock and an smp_mb__after_spinlock()
+	in one process, and use an acquire load followed by a pair of
+	spin_is_locked() calls in the other process.
+
+MP+polockonce+poacquiresilsil.litmus
+	Protect the access with a lock in one process, and use an
+	acquire load followed by a pair of spin_is_locked() calls
+	in the other process.
+
 MP+polocks.litmus
 	As below, but with the second access of the writer process
 	and the first access of reader process protected by a lock.
@@ -109,8 +120,10 @@
 
 WRC+poonceonces+Once.litmus
 WRC+pooncerelease+rmbonceonce+Once.litmus
-	These two are members of an extension of the MP litmus-test class
-	in which the first write is moved to a separate process.
+	These two are members of an extension of the MP litmus-test
+	class in which the first write is moved to a separate process.
+	The second is forbidden because smp_store_release() is
+	A-cumulative in LKMM.
 
 Z6.0+pooncelock+pooncelock+pombonce.litmus
 	Is the ordering provided by a spin_unlock() and a subsequent
diff --git a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus b/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus
index 97fcbff..ad3448b 100644
--- a/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus
+++ b/tools/memory-model/litmus-tests/WRC+pooncerelease+rmbonceonce+Once.litmus
@@ -5,7 +5,9 @@
  *
  * This litmus test is an extension of the message-passing pattern, where
  * the first write is moved to a separate process.  Because it features
- * a release and a read memory barrier, it should be forbidden.
+ * a release and a read memory barrier, it should be forbidden.  More
+ * specifically, this litmus test is forbidden because smp_store_release()
+ * is A-cumulative in LKMM.
  *)
 
 {}
diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat
index ba4a4ec..305ded1 100644
--- a/tools/memory-model/lock.cat
+++ b/tools/memory-model/lock.cat
@@ -4,46 +4,72 @@
  * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>
  *)
 
-(* Generate coherence orders and handle lock operations *)
+(*
+ * Generate coherence orders and handle lock operations
+ *
+ * Warning: spin_is_locked() crashes herd7 versions strictly before 7.48.
+ * spin_is_locked() is functional from herd7 version 7.49.
+ *)
 
 include "cross.cat"
 
-(* From lock reads to their partner lock writes *)
+(*
+ * The lock-related events generated by herd are as follows:
+ *
+ * LKR		Lock-Read: the read part of a spin_lock() or successful
+ *			spin_trylock() read-modify-write event pair
+ * LKW		Lock-Write: the write part of a spin_lock() or successful
+ *			spin_trylock() RMW event pair
+ * UL		Unlock: a spin_unlock() event
+ * LF		Lock-Fail: a failed spin_trylock() event
+ * RL		Read-Locked: a spin_is_locked() event which returns True
+ * RU		Read-Unlocked: a spin_is_locked() event which returns False
+ *
+ * LKR and LKW events always come paired, like all RMW event sequences.
+ *
+ * LKR, LF, RL, and RU are read events; LKR has Acquire ordering.
+ * LKW and UL are write events; UL has Release ordering.
+ * LKW, LF, RL, and RU have no ordering properties.
+ *)
+
+(* Backward compatibility *)
+let RL = try RL with emptyset
+let RU = try RU with emptyset
+
+(* Treat RL as a kind of LF: a read with no ordering properties *)
+let LF = LF | RL
+
+(* There should be no ordinary R or W accesses to spinlocks *)
+let ALL-LOCKS = LKR | LKW | UL | LF | RU
+flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses
+
+(* Link Lock-Reads to their RMW-partner Lock-Writes *)
 let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po)
 let rmw = rmw | lk-rmw
 
-(*
- * A paired LKR must always see an unlocked value; spin_lock() calls nested
- * inside a critical section (for the same lock) always deadlock.
- *)
-empty ([LKW] ; po-loc ; [domain(lk-rmw)]) \ (po-loc ; [UL] ; po-loc)
-	as lock-nest
-
-(* The litmus test is invalid if an LKW event is not part of an RMW pair *)
+(* The litmus test is invalid if an LKR/LKW event is not part of an RMW pair *)
 flag ~empty LKW \ range(lk-rmw) as unpaired-LKW
-
-(* This will be allowed if we implement spin_is_locked() *)
 flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR
 
-(* There should be no R or W accesses to spinlocks *)
-let ALL-LOCKS = LKR | LKW | UL | LF
-flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses
+(*
+ * An LKR must always see an unlocked value; spin_lock() calls nested
+ * inside a critical section (for the same lock) always deadlock.
+ *)
+empty ([LKW] ; po-loc ; [LKR]) \ (po-loc ; [UL] ; po-loc) as lock-nest
 
 (* The final value of a spinlock should not be tested *)
 flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final
 
-
 (*
  * Put lock operations in their appropriate classes, but leave UL out of W
  * until after the co relation has been generated.
  *)
-let R = R | LKR | LF
+let R = R | LKR | LF | RU
 let W = W | LKW
 
 let Release = Release | UL
 let Acquire = Acquire | LKR
 
-
 (* Match LKW events to their corresponding UL events *)
 let critical = ([LKW] ; po-loc ; [UL]) \ (po-loc ; [LKW | UL] ; po-loc)
 
@@ -53,27 +79,48 @@
 let UNMATCHED-LKW = LKW \ domain(critical)
 empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks
 
-
 (* rfi for LF events: link each LKW to the LF events in its critical section *)
 let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc)
 
 (* rfe for LF events *)
 let all-possible-rfe-lf =
-  (*
-   * Given an LF event r, compute the possible rfe edges for that event
-   * (all those starting from LKW events in other threads),
-   * and then convert that relation to a set of single-edge relations.
-   *)
-  let possible-rfe-lf r =
-    let pair-to-relation p = p ++ 0
-    in map pair-to-relation ((LKW * {r}) & loc & ext)
-  (* Do this for each LF event r that isn't in rfi-lf *)
-  in map possible-rfe-lf (LF \ range(rfi-lf))
+	(*
+	 * Given an LF event r, compute the possible rfe edges for that event
+	 * (all those starting from LKW events in other threads),
+	 * and then convert that relation to a set of single-edge relations.
+	 *)
+	let possible-rfe-lf r =
+		let pair-to-relation p = p ++ 0
+		in map pair-to-relation ((LKW * {r}) & loc & ext)
+	(* Do this for each LF event r that isn't in rfi-lf *)
+	in map possible-rfe-lf (LF \ range(rfi-lf))
 
 (* Generate all rf relations for LF events *)
 with rfe-lf from cross(all-possible-rfe-lf)
-let rf = rf | rfi-lf | rfe-lf
+let rf-lf = rfe-lf | rfi-lf
 
+(*
+ * RU, i.e., spin_is_locked() returning False, is slightly different.
+ * We rely on the memory model to rule out cases where spin_is_locked()
+ * within one of the lock's critical sections returns False.
+ *)
+
+(* rfi for RU events: an RU may read from the last po-previous UL *)
+let rfi-ru = ([UL] ; po-loc ; [RU]) \ ([UL] ; po-loc ; [LKW] ; po-loc)
+
+(* rfe for RU events: an RU may read from an external UL or the initial write *)
+let all-possible-rfe-ru =
+	let possible-rfe-ru r =
+		let pair-to-relation p = p ++ 0
+		in map pair-to-relation (((UL | IW) * {r}) & loc & ext)
+	in map possible-rfe-ru RU
+
+(* Generate all rf relations for RU events *)
+with rfe-ru from cross(all-possible-rfe-ru)
+let rf-ru = rfe-ru | rfi-ru
+
+(* Final rf relation *)
+let rf = rf | rf-lf | rf-ru
 
 (* Generate all co relations, including LKW events but not UL *)
 let co0 = co0 | ([IW] ; loc ; [LKW]) |
diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
new file mode 100644
index 0000000..af0aa15
--- /dev/null
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -0,0 +1,73 @@
+#!/bin/sh
+#
+# Run herd tests on all .litmus files in the specified directory (which
+# defaults to litmus-tests) and check each file's result against a "Result:"
+# comment within that litmus test.  If the verification result does not
+# match that specified in the litmus test, this script prints an error
+# message prefixed with "^^^".  It also outputs verification results to
+# a file whose name is that of the specified litmus test, but with ".out"
+# appended.
+#
+# Usage:
+#	sh checkalllitmus.sh [ directory ]
+#
+# The LINUX_HERD_OPTIONS environment variable may be used to specify
+# arguments to herd, whose default is defined by the checklitmus.sh script.
+# Thus, one would normally run this in the directory containing the memory
+# model, specifying the pathname of the litmus test to check.
+#
+# This script makes no attempt to run the litmus tests concurrently.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright IBM Corporation, 2018
+#
+# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+litmusdir=${1-litmus-tests}
+if test -d "$litmusdir" -a -r "$litmusdir" -a -x "$litmusdir"
+then
+	:
+else
+	echo ' --- ' error: $litmusdir is not an accessible directory
+	exit 255
+fi
+
+# Find the checklitmus script.  If it is not where we expect it, then
+# assume that the caller has the PATH environment variable set
+# appropriately.
+if test -x scripts/checklitmus.sh
+then
+	clscript=scripts/checklitmus.sh
+else
+	clscript=checklitmus.sh
+fi
+
+# Run the script on all the litmus tests in the specified directory
+ret=0
+for i in litmus-tests/*.litmus
+do
+	if ! $clscript $i
+	then
+		ret=1
+	fi
+done
+if test "$ret" -ne 0
+then
+	echo " ^^^ VERIFICATION MISMATCHES"
+else
+	echo All litmus tests verified as was expected.
+fi
+exit $ret
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
new file mode 100644
index 0000000..e2e4774
--- /dev/null
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+#
+# Run a herd test and check the result against a "Result:" comment within
+# the litmus test.  If the verification result does not match that specified
+# in the litmus test, this script prints an error message prefixed with
+# "^^^" and exits with a non-zero status.  It also outputs verification
+# results to a file whose name is that of the specified litmus test, but
+# with ".out" appended.
+#
+# Usage:
+#	sh checklitmus.sh file.litmus
+#
+# The LINUX_HERD_OPTIONS environment variable may be used to specify
+# arguments to herd, which default to "-conf linux-kernel.cfg".  Thus,
+# one would normally run this in the directory containing the memory model,
+# specifying the pathname of the litmus test to check.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright IBM Corporation, 2018
+#
+# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+litmus=$1
+herdoptions=${LINUX_HERD_OPTIONS--conf linux-kernel.cfg}
+
+if test -f "$litmus" -a -r "$litmus"
+then
+	:
+else
+	echo ' --- ' error: \"$litmus\" is not a readable file
+	exit 255
+fi
+if grep -q '^ \* Result: ' $litmus
+then
+	outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'`
+else
+	outcome=specified
+fi
+
+echo Herd options: $herdoptions > $litmus.out
+/usr/bin/time herd7 -o ~/tmp $herdoptions $litmus >> $litmus.out 2>&1
+grep "Herd options:" $litmus.out
+grep '^Observation' $litmus.out
+if grep -q '^Observation' $litmus.out
+then
+	:
+else
+	cat $litmus.out
+	echo ' ^^^ Verification error'
+	echo ' ^^^ Verification error' >> $litmus.out 2>&1
+	exit 255
+fi
+if test "$outcome" = DEADLOCK
+then
+	echo grep 3 and 4
+	if grep '^Observation' $litmus.out | grep -q 'Never 0 0$'
+	then
+		ret=0
+	else
+		echo " ^^^ Unexpected non-$outcome verification"
+		echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1
+		ret=1
+	fi
+elif grep '^Observation' $litmus.out | grep -q $outcome || test "$outcome" = Maybe
+then
+	ret=0
+else
+	echo " ^^^ Unexpected non-$outcome verification"
+	echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1
+	ret=1
+fi
+tail -2 $litmus.out | head -1
+exit $ret
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index db11478..42261a9 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -47,7 +47,8 @@
 man7dir=$(mandir)/man7
 
 ASCIIDOC=asciidoc
-ASCIIDOC_EXTRA = --unsafe
+ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
+ASCIIDOC_HTML = xhtml11
 MANPAGE_XSL = manpage-normal.xsl
 XMLTO_EXTRA =
 INSTALL?=install
@@ -55,6 +56,14 @@
 DOC_REF = origin/man
 HTML_REF = origin/html
 
+ifdef USE_ASCIIDOCTOR
+ASCIIDOC = asciidoctor
+ASCIIDOC_EXTRA = -a compat-mode
+ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
+ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual"
+ASCIIDOC_HTML = xhtml5
+endif
+
 infodir?=$(prefix)/share/info
 MAKEINFO=makeinfo
 INSTALL_INFO=install-info
@@ -73,10 +82,12 @@
 	missing_tools = $(ASCIIDOC)
 endif
 
+ifndef USE_ASCIIDOCTOR
 _tmp_tool_path := $(call get-executable,$(XMLTO))
 ifeq ($(_tmp_tool_path),)
 	missing_tools += $(XMLTO)
 endif
+endif
 
 #
 # For asciidoc ...
@@ -264,17 +275,25 @@
 
 $(MAN_HTML): $(OUTPUT)%.html : %.txt
 	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
+	$(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
 		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
 	mv $@+ $@
 
+ifdef USE_ASCIIDOCTOR
+$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.txt
+	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+	$(ASCIIDOC) -b manpage -d manpage \
+		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+	mv $@+ $@
+endif
+
 $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
 	$(QUIET_XMLTO)$(RM) $@ && \
 	$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
 
 $(OUTPUT)%.xml : %.txt
 	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
+	$(ASCIIDOC) -b docbook -d manpage \
 		$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
 	mv $@+ $@
 
@@ -321,13 +340,13 @@
 	mv $@+ $@
 
 $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
-	$(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt
+	$(QUIET_ASCIIDOC)$(ASCIIDOC) -b $(ASCIIDOC_HTML) $*.txt
 
 WEBDOC_DEST = /pub/software/tools/perf/docs
 
 $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
 	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
+	sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b $(ASCIIDOC_HTML) - >$@+ && \
 	mv $@+ $@
 
 # UNIMPLEMENTED
diff --git a/tools/perf/Documentation/asciidoctor-extensions.rb b/tools/perf/Documentation/asciidoctor-extensions.rb
new file mode 100644
index 0000000..d148fe9
--- /dev/null
+++ b/tools/perf/Documentation/asciidoctor-extensions.rb
@@ -0,0 +1,29 @@
+require 'asciidoctor'
+require 'asciidoctor/extensions'
+
+module Perf
+  module Documentation
+    class LinkPerfProcessor < Asciidoctor::Extensions::InlineMacroProcessor
+      use_dsl
+
+      named :chrome
+
+      def process(parent, target, attrs)
+        if parent.document.basebackend? 'html'
+          %(<a href="#{target}.html">#{target}(#{attrs[1]})</a>\n)
+        elsif parent.document.basebackend? 'manpage'
+          "#{target}(#{attrs[1]})"
+        elsif parent.document.basebackend? 'docbook'
+          "<citerefentry>\n" \
+            "<refentrytitle>#{target}</refentrytitle>" \
+            "<manvolnum>#{attrs[1]}</manvolnum>\n" \
+          "</citerefentry>\n"
+        end
+      end
+    end
+  end
+end
+
+Asciidoctor::Extensions.register do
+  inline_macro Perf::Documentation::LinkPerfProcessor, :linkperf
+end
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index 73c2650..f6de095 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -48,6 +48,9 @@
 --purge=::
         Purge all cached binaries including older caches which have specified
 	path from the cache.
+-P::
+--purge-all::
+	Purge all cached binaries. This will flush out entire cache.
 -M::
 --missing=::
 	List missing build ids in the cache for the specified file.
@@ -59,7 +62,9 @@
 	exactly same build-id, that is replaced by new one. It can be used
 	to update kallsyms and kernel dso to vmlinux in order to support
 	annotation.
-
+-l::
+--list::
+	List all valid binaries from cache.
 -v::
 --verbose::
 	Be more verbose.
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index e6c3b4e..3a822f3 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -116,6 +116,22 @@
 print counts using a CSV-style output to make it easy to import directly into
 spreadsheets. Columns are separated by the string specified in SEP.
 
+--table:: Display time for each run (-r option), in a table format, e.g.:
+
+  $ perf stat --null -r 5 --table perf bench sched pipe
+
+   Performance counter stats for 'perf bench sched pipe' (5 runs):
+
+             # Table of individual measurements:
+             5.189 (-0.293) #
+             5.189 (-0.294) #
+             5.186 (-0.296) #
+             5.663 (+0.181) ##
+             6.186 (+0.703) ####
+
+             # Final result:
+             5.483 +- 0.198 seconds time elapsed  ( +-  3.62% )
+
 -G name::
 --cgroup name::
 monitor only in the container (cgroup) called "name". This option is available only
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index d00f0d5..dfb218f 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -111,8 +111,8 @@
 A structure defining the number of CPUs.
 
 struct nr_cpus {
-       uint32_t nr_cpus_online;
        uint32_t nr_cpus_available; /* CPUs not yet onlined */
+       uint32_t nr_cpus_online;
 };
 
 	HEADER_CPUDESC = 8,
@@ -153,10 +153,18 @@
 	HEADER_CPU_TOPOLOGY = 13,
 
 String lists defining the core and CPU threads topology.
+The string lists are followed by a variable length array
+which contains core_id and socket_id of each cpu.
+The number of entries can be determined by the size of the
+section minus the sizes of both string lists.
 
 struct {
        struct perf_header_string_list cores; /* Variable length */
        struct perf_header_string_list threads; /* Variable length */
+       struct {
+	      uint32_t core_id;
+	      uint32_t socket_id;
+       } cpus[nr]; /* Variable length records */
 };
 
 Example:
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index ae7dc46..b5ac356 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -885,6 +885,8 @@
 
 # Among the variables below, these:
 #   perfexecdir
+#   perf_include_dir
+#   perf_examples_dir
 #   template_dir
 #   mandir
 #   infodir
@@ -904,6 +906,8 @@
 mandir = share/man
 infodir = share/info
 perfexecdir = libexec/perf-core
+perf_include_dir = lib/include/perf
+perf_examples_dir = lib/examples/perf
 sharedir = $(prefix)/share
 template_dir = share/perf-core/templates
 STRACE_GROUPS_DIR = share/perf-core/strace/groups
@@ -934,6 +938,8 @@
 mandir_SQ = $(subst ','\'',$(mandir))
 infodir_SQ = $(subst ','\'',$(infodir))
 perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
+perf_include_dir_SQ = $(subst ','\'',$(perf_include_dir))
+perf_examples_dir_SQ = $(subst ','\'',$(perf_examples_dir))
 template_dir_SQ = $(subst ','\'',$(template_dir))
 htmldir_SQ = $(subst ','\'',$(htmldir))
 tipdir_SQ = $(subst ','\'',$(tipdir))
@@ -944,14 +950,20 @@
 
 ifneq ($(filter /%,$(firstword $(perfexecdir))),)
 perfexec_instdir = $(perfexecdir)
+perf_include_instdir = $(perf_include_dir)
+perf_examples_instdir = $(perf_examples_dir)
 STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR)
 tip_instdir = $(tipdir)
 else
 perfexec_instdir = $(prefix)/$(perfexecdir)
+perf_include_instdir = $(prefix)/$(perf_include_dir)
+perf_examples_instdir = $(prefix)/$(perf_examples_dir)
 STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR)
 tip_instdir = $(prefix)/$(tipdir)
 endif
 perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
+perf_include_instdir_SQ = $(subst ','\'',$(perf_include_instdir))
+perf_examples_instdir_SQ = $(subst ','\'',$(perf_examples_instdir))
 STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR))
 tip_instdir_SQ = $(subst ','\'',$(tip_instdir))
 
@@ -999,6 +1011,8 @@
 $(call detected_var,STRACE_GROUPS_DIR_SQ)
 $(call detected_var,prefix_SQ)
 $(call detected_var,perfexecdir_SQ)
+$(call detected_var,perf_include_dir_SQ)
+$(call detected_var,perf_examples_dir_SQ)
 $(call detected_var,tipdir_SQ)
 $(call detected_var,srcdir_SQ)
 $(call detected_var,LIBDIR)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 83e453d..ecc9fc9 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -767,6 +767,16 @@
 endif
 	$(call QUIET_INSTALL, libexec) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifndef NO_LIBBPF
+	$(call QUIET_INSTALL, lib) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
+	$(call QUIET_INSTALL, include/bpf) \
+		$(INSTALL) include/bpf/*.h '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
+	$(call QUIET_INSTALL, lib) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
+	$(call QUIET_INSTALL, examples/bpf) \
+		$(INSTALL) examples/bpf/*.c '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
+endif
 	$(call QUIET_INSTALL, perf-archive) \
 		$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 	$(call QUIET_INSTALL, perf-with-kcore) \
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c
index 8cb3477..9a0242e 100644
--- a/tools/perf/arch/arm/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -25,7 +25,7 @@
 
 	sp = (unsigned long) regs[PERF_REG_ARM_SP];
 
-	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	map = map_groups__find(thread->mg, (u64)sp);
 	if (!map) {
 		pr_debug("failed to get stack map\n");
 		free(buf);
diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c
index e907f0f..5522ce3 100644
--- a/tools/perf/arch/arm64/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c
@@ -25,7 +25,7 @@
 
 	sp = (unsigned long) regs[PERF_REG_ARM64_SP];
 
-	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	map = map_groups__find(thread->mg, (u64)sp);
 	if (!map) {
 		pr_debug("failed to get stack map\n");
 		free(buf);
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
index 30cbbd6..5f39efe 100644
--- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@
 
 	sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
 
-	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	map = map_groups__find(thread->mg, (u64)sp);
 	if (!map) {
 		pr_debug("failed to get stack map\n");
 		free(buf);
diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index 0c370f8..3598b8b 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -248,8 +248,7 @@
 
 	ip = chain->ips[2];
 
-	thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
-			MAP__FUNCTION, ip, &al);
+	thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
 
 	if (al.map)
 		dso = al.map->dso;
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
index 95036c7..7879df3 100644
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -26,7 +26,7 @@
 
 	sp = (unsigned long) regs[PERF_REG_X86_SP];
 
-	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	map = map_groups__find(thread->mg, (u64)sp);
 	if (!map) {
 		pr_debug("failed to get stack map\n");
 		free(buf);
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index f95e6f4..844b8f3 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -4,6 +4,8 @@
 libperf-y += kvm-stat.o
 libperf-y += perf_regs.o
 libperf-y += group.o
+libperf-y += machine.o
+libperf-y += event.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
new file mode 100644
index 0000000..675a021
--- /dev/null
+++ b/tools/perf/arch/x86/util/event.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/string.h>
+
+#include "../../util/machine.h"
+#include "../../util/tool.h"
+#include "../../util/map.h"
+#include "../../util/util.h"
+#include "../../util/debug.h"
+
+#if defined(__x86_64__)
+
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
+				       perf_event__handler_t process,
+				       struct machine *machine)
+{
+	int rc = 0;
+	struct map *pos;
+	struct map_groups *kmaps = &machine->kmaps;
+	struct maps *maps = &kmaps->maps;
+	union perf_event *event = zalloc(sizeof(event->mmap) +
+					 machine->id_hdr_size);
+
+	if (!event) {
+		pr_debug("Not enough memory synthesizing mmap event "
+			 "for extra kernel maps\n");
+		return -1;
+	}
+
+	for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+		struct kmap *kmap;
+		size_t size;
+
+		if (!__map__is_extra_kernel_map(pos))
+			continue;
+
+		kmap = map__kmap(pos);
+
+		size = sizeof(event->mmap) - sizeof(event->mmap.filename) +
+		       PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) +
+		       machine->id_hdr_size;
+
+		memset(event, 0, size);
+
+		event->mmap.header.type = PERF_RECORD_MMAP;
+
+		/*
+		 * kernel uses 0 for user space maps, see kernel/perf_event.c
+		 * __perf_event_mmap
+		 */
+		if (machine__is_host(machine))
+			event->header.misc = PERF_RECORD_MISC_KERNEL;
+		else
+			event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+
+		event->mmap.header.size = size;
+
+		event->mmap.start = pos->start;
+		event->mmap.len   = pos->end - pos->start;
+		event->mmap.pgoff = pos->pgoff;
+		event->mmap.pid   = machine->pid;
+
+		strlcpy(event->mmap.filename, kmap->name, PATH_MAX);
+
+		if (perf_tool__process_synth_event(tool, event, machine,
+						   process) != 0) {
+			rc = -1;
+			break;
+		}
+	}
+
+	free(event);
+	return rc;
+}
+
+#endif
diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c
new file mode 100644
index 0000000..4520ac5
--- /dev/null
+++ b/tools/perf/arch/x86/util/machine.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/string.h>
+#include <stdlib.h>
+
+#include "../../util/machine.h"
+#include "../../util/map.h"
+#include "../../util/symbol.h"
+#include "../../util/sane_ctype.h"
+
+#include <symbol/kallsyms.h>
+
+#if defined(__x86_64__)
+
+struct extra_kernel_map_info {
+	int cnt;
+	int max_cnt;
+	struct extra_kernel_map *maps;
+	bool get_entry_trampolines;
+	u64 entry_trampoline;
+};
+
+static int add_extra_kernel_map(struct extra_kernel_map_info *mi, u64 start,
+				u64 end, u64 pgoff, const char *name)
+{
+	if (mi->cnt >= mi->max_cnt) {
+		void *buf;
+		size_t sz;
+
+		mi->max_cnt = mi->max_cnt ? mi->max_cnt * 2 : 32;
+		sz = sizeof(struct extra_kernel_map) * mi->max_cnt;
+		buf = realloc(mi->maps, sz);
+		if (!buf)
+			return -1;
+		mi->maps = buf;
+	}
+
+	mi->maps[mi->cnt].start = start;
+	mi->maps[mi->cnt].end   = end;
+	mi->maps[mi->cnt].pgoff = pgoff;
+	strlcpy(mi->maps[mi->cnt].name, name, KMAP_NAME_LEN);
+
+	mi->cnt += 1;
+
+	return 0;
+}
+
+static int find_extra_kernel_maps(void *arg, const char *name, char type,
+				  u64 start)
+{
+	struct extra_kernel_map_info *mi = arg;
+
+	if (!mi->entry_trampoline && kallsyms2elf_binding(type) == STB_GLOBAL &&
+	    !strcmp(name, "_entry_trampoline")) {
+		mi->entry_trampoline = start;
+		return 0;
+	}
+
+	if (is_entry_trampoline(name)) {
+		u64 end = start + page_size;
+
+		return add_extra_kernel_map(mi, start, end, 0, name);
+	}
+
+	return 0;
+}
+
+int machine__create_extra_kernel_maps(struct machine *machine,
+				      struct dso *kernel)
+{
+	struct extra_kernel_map_info mi = { .cnt = 0, };
+	char filename[PATH_MAX];
+	int ret;
+	int i;
+
+	machine__get_kallsyms_filename(machine, filename, PATH_MAX);
+
+	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+		return 0;
+
+	ret = kallsyms__parse(filename, &mi, find_extra_kernel_maps);
+	if (ret)
+		goto out_free;
+
+	if (!mi.entry_trampoline)
+		goto out_free;
+
+	for (i = 0; i < mi.cnt; i++) {
+		struct extra_kernel_map *xm = &mi.maps[i];
+
+		xm->pgoff = mi.entry_trampoline;
+		ret = machine__create_extra_kernel_map(machine, kernel, xm);
+		if (ret)
+			goto out_free;
+	}
+
+	machine->trampolines_mapped = mi.cnt;
+out_free:
+	free(mi.maps);
+	return ret;
+}
+
+#endif
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 51709a9..da57042 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -45,6 +45,7 @@
 	bool	   print_line;
 	bool	   skip_missing;
 	bool	   has_br_stack;
+	bool	   group_set;
 	const char *sym_hist_filter;
 	const char *cpu_list;
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -228,7 +229,7 @@
 		 */
 		if (al->sym != NULL) {
 			rb_erase(&al->sym->rb_node,
-				 &al->map->dso->symbols[al->map->type]);
+				 &al->map->dso->symbols);
 			symbol__delete(al->sym);
 			dso__reset_find_symbol_cache(al->map->dso);
 		}
@@ -508,6 +509,9 @@
 		    "Don't shorten the displayed pathnames"),
 	OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
 		    "Skip symbols that cannot be annotated"),
+	OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group,
+			&annotate.group_set,
+			"Show event group information together"),
 	OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
 	OPT_CALLBACK(0, "symfs", NULL, "directory",
 		     "Look for files with symbols relative to this directory",
@@ -570,6 +574,9 @@
 	annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
 						      HEADER_BRANCH_STACK);
 
+	if (annotate.group_set)
+		perf_evlist__force_leader(annotate.session->evlist);
+
 	ret = symbol__annotation_init();
 	if (ret < 0)
 		goto out_delete;
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 41db2cb..115110a 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -25,6 +25,7 @@
 #include "util/session.h"
 #include "util/symbol.h"
 #include "util/time-utils.h"
+#include "util/probe-file.h"
 
 static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
 {
@@ -239,6 +240,34 @@
 	return err;
 }
 
+static int build_id_cache__purge_all(void)
+{
+	struct strlist *list;
+	struct str_node *pos;
+	int err = 0;
+	char *buf;
+
+	list = build_id_cache__list_all(false);
+	if (!list) {
+		pr_debug("Failed to get buildids: -%d\n", errno);
+		return -EINVAL;
+	}
+
+	strlist__for_each_entry(pos, list) {
+		buf = build_id_cache__origname(pos->s);
+		err = build_id_cache__remove_s(pos->s);
+		pr_debug("Removing %s (%s): %s\n", buf, pos->s,
+			 err ? "FAIL" : "Ok");
+		free(buf);
+		if (err)
+			break;
+	}
+	strlist__delete(list);
+
+	pr_debug("Purged all: %s\n", err ? "FAIL" : "Ok");
+	return err;
+}
+
 static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
 {
 	char filename[PATH_MAX];
@@ -297,6 +326,26 @@
 	return err;
 }
 
+static int build_id_cache__show_all(void)
+{
+	struct strlist *bidlist;
+	struct str_node *nd;
+	char *buf;
+
+	bidlist = build_id_cache__list_all(true);
+	if (!bidlist) {
+		pr_debug("Failed to get buildids: -%d\n", errno);
+		return -1;
+	}
+	strlist__for_each_entry(nd, bidlist) {
+		buf = build_id_cache__origname(nd->s);
+		fprintf(stdout, "%s %s\n", nd->s, buf);
+		free(buf);
+	}
+	strlist__delete(bidlist);
+	return 0;
+}
+
 int cmd_buildid_cache(int argc, const char **argv)
 {
 	struct strlist *list;
@@ -304,6 +353,9 @@
 	int ret = 0;
 	int ns_id = -1;
 	bool force = false;
+	bool list_files = false;
+	bool opts_flag = false;
+	bool purge_all = false;
 	char const *add_name_list_str = NULL,
 		   *remove_name_list_str = NULL,
 		   *purge_name_list_str = NULL,
@@ -327,6 +379,8 @@
 		    "file(s) to remove"),
 	OPT_STRING('p', "purge", &purge_name_list_str, "file list",
 		    "file(s) to remove (remove old caches too)"),
+	OPT_BOOLEAN('P', "purge-all", &purge_all, "purge all cached files"),
+	OPT_BOOLEAN('l', "list", &list_files, "list all cached files"),
 	OPT_STRING('M', "missing", &missing_filename, "file",
 		   "to find missing build ids in the cache"),
 	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -344,11 +398,20 @@
 	argc = parse_options(argc, argv, buildid_cache_options,
 			     buildid_cache_usage, 0);
 
-	if (argc || (!add_name_list_str && !kcore_filename &&
-		     !remove_name_list_str && !purge_name_list_str &&
-		     !missing_filename && !update_name_list_str))
+	opts_flag = add_name_list_str || kcore_filename ||
+		remove_name_list_str || purge_name_list_str ||
+		missing_filename || update_name_list_str ||
+		purge_all;
+
+	if (argc || !(list_files || opts_flag))
 		usage_with_options(buildid_cache_usage, buildid_cache_options);
 
+	/* -l is exclusive. It can not be used with other options. */
+	if (list_files && opts_flag) {
+		usage_with_options_msg(buildid_cache_usage,
+			buildid_cache_options, "-l is exclusive.\n");
+	}
+
 	if (ns_id > 0)
 		nsi = nsinfo__new(ns_id);
 
@@ -366,6 +429,11 @@
 
 	setup_pager();
 
+	if (list_files) {
+		ret = build_id_cache__show_all();
+		goto out;
+	}
+
 	if (add_name_list_str) {
 		list = strlist__new(add_name_list_str, NULL);
 		if (list) {
@@ -420,6 +488,13 @@
 		}
 	}
 
+	if (purge_all) {
+		if (build_id_cache__purge_all()) {
+			pr_warning("Couldn't remove some caches. Error: %s.\n",
+				str_error_r(errno, sbuf, sizeof(sbuf)));
+		}
+	}
+
 	if (missing_filename)
 		ret = build_id_cache__fprintf_missing(session, stdout);
 
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 40fe919..a3b3463 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -440,9 +440,7 @@
 		goto repipe;
 	}
 
-	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
-
-	if (al.map != NULL) {
+	if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
 		if (!al.map->dso->hit) {
 			al.map->dso->hit = 1;
 			if (map__load(al.map) >= 0) {
diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c
index bcfb363..90d1a23 100644
--- a/tools/perf/builtin-kallsyms.c
+++ b/tools/perf/builtin-kallsyms.c
@@ -27,7 +27,7 @@
 
 	for (i = 0; i < argc; ++i) {
 		struct map *map;
-		struct symbol *symbol = machine__find_kernel_function_by_name(machine, argv[i], &map);
+		struct symbol *symbol = machine__find_kernel_symbol_by_name(machine, argv[i], &map);
 
 		if (symbol == NULL) {
 			printf("%s: not found\n", argv[i]);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index ae11e4c..54d3f21 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1004,7 +1004,7 @@
 		if (is_caller) {
 			addr = data->call_site;
 			if (!raw_ip)
-				sym = machine__find_kernel_function(machine, addr, &map);
+				sym = machine__find_kernel_symbol(machine, addr, &map);
 		} else
 			addr = data->ptr;
 
@@ -1068,7 +1068,7 @@
 		char *caller = buf;
 
 		data = rb_entry(next, struct page_stat, node);
-		sym = machine__find_kernel_function(machine, data->callsite, &map);
+		sym = machine__find_kernel_symbol(machine, data->callsite, &map);
 		if (sym)
 			caller = sym->name;
 		else
@@ -1110,7 +1110,7 @@
 		char *caller = buf;
 
 		data = rb_entry(next, struct page_stat, node);
-		sym = machine__find_kernel_function(machine, data->callsite, &map);
+		sym = machine__find_kernel_symbol(machine, data->callsite, &map);
 		if (sym)
 			caller = sym->name;
 		else
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 0f198f6..ad978e3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -194,20 +194,11 @@
 	return err;
 }
 
-/*
- * Events in data file are not collect in groups, but we still want
- * the group display. Set the artificial group and set the leader's
- * forced_leader flag to notify the display code.
- */
 static void setup_forced_leader(struct report *report,
 				struct perf_evlist *evlist)
 {
-	if (report->group_set && !evlist->nr_groups) {
-		struct perf_evsel *leader = perf_evlist__first(evlist);
-
-		perf_evlist__set_leader(evlist);
-		leader->forced_leader = true;
-	}
+	if (report->group_set)
+		perf_evlist__force_leader(evlist);
 }
 
 static int process_feature_event(struct perf_tool *tool,
@@ -523,12 +514,9 @@
 		    "As no suitable kallsyms nor vmlinux was found, kernel samples\n"
 		    "can't be resolved.";
 
-		if (kernel_map) {
-			const struct dso *kdso = kernel_map->dso;
-			if (!RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION])) {
-				desc = "If some relocation was applied (e.g. "
-				       "kexec) symbols may be misresolved.";
-			}
+		if (kernel_map && map__has_symbols(kernel_map)) {
+			desc = "If some relocation was applied (e.g. "
+			       "kexec) symbols may be misresolved.";
 		}
 
 		ui__warning(
@@ -718,10 +706,7 @@
 
 static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp)
 {
-	int printed = 0, i;
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		printed += maps__fprintf_task(&mg->maps[i], indent, fp);
-	return printed;
+	return maps__fprintf_task(&mg->maps, indent, fp);
 }
 
 static void task__print_level(struct task *task, FILE *fp, int level)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index e0a9845..cefc881 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -153,8 +153,8 @@
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD,
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+			      PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
@@ -165,8 +165,9 @@
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT,
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+			      PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
+			      PERF_OUTPUT_BPF_OUTPUT,
 
 		.invalid_fields = PERF_OUTPUT_TRACE,
 	},
@@ -185,10 +186,10 @@
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD |  PERF_OUTPUT_ADDR |
-			      PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT |
-			      PERF_OUTPUT_PHYS_ADDR,
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+			      PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
+			      PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC |
+			      PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR,
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
@@ -199,8 +200,8 @@
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_PERIOD,
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+			      PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
@@ -211,8 +212,8 @@
 		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
 			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
-			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
-			      PERF_OUTPUT_SYNTH,
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+			      PERF_OUTPUT_DSO | PERF_OUTPUT_SYNTH,
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
@@ -544,6 +545,7 @@
 			if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) {
 				output[j].fields |= PERF_OUTPUT_IP;
 				output[j].fields |= PERF_OUTPUT_SYM;
+				output[j].fields |= PERF_OUTPUT_SYMOFFSET;
 				output[j].fields |= PERF_OUTPUT_DSO;
 				set_print_ip_opts(attr);
 				goto out;
@@ -717,8 +719,8 @@
 		if (PRINT_FIELD(DSO)) {
 			memset(&alf, 0, sizeof(alf));
 			memset(&alt, 0, sizeof(alt));
-			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
-			thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
+			thread__find_map(thread, sample->cpumode, from, &alf);
+			thread__find_map(thread, sample->cpumode, to, &alt);
 		}
 
 		printed += fprintf(fp, " 0x%"PRIx64, from);
@@ -764,13 +766,8 @@
 		from = br->entries[i].from;
 		to   = br->entries[i].to;
 
-		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
-		if (alf.map)
-			alf.sym = map__find_symbol(alf.map, alf.addr);
-
-		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
-		if (alt.map)
-			alt.sym = map__find_symbol(alt.map, alt.addr);
+		thread__find_symbol(thread, sample->cpumode, from, &alf);
+		thread__find_symbol(thread, sample->cpumode, to, &alt);
 
 		printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp);
 		if (PRINT_FIELD(DSO)) {
@@ -814,12 +811,12 @@
 		from = br->entries[i].from;
 		to   = br->entries[i].to;
 
-		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
-		if (alf.map && !alf.map->dso->adjust_symbols)
+		if (thread__find_map(thread, sample->cpumode, from, &alf) &&
+		    !alf.map->dso->adjust_symbols)
 			from = map__map_ip(alf.map, from);
 
-		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
-		if (alt.map && !alt.map->dso->adjust_symbols)
+		if (thread__find_map(thread, sample->cpumode, to, &alt) &&
+		    !alt.map->dso->adjust_symbols)
 			to = map__map_ip(alt.map, to);
 
 		printed += fprintf(fp, " 0x%"PRIx64, from);
@@ -882,8 +879,7 @@
 		return 0;
 	}
 
-	thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al);
-	if (!al.map || !al.map->dso) {
+	if (!thread__find_map(thread, *cpumode, start, &al) || !al.map->dso) {
 		pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
 		return 0;
 	}
@@ -933,10 +929,8 @@
 
 	memset(&al, 0, sizeof(al));
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
-	if (!al.map)
-		thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
-				      addr, &al);
+	thread__find_map(thread, cpumode, addr, &al);
+
 	if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end)
 		return 0;
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f17dc60..a4f662a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -164,6 +164,7 @@
 static bool			metric_only			= false;
 static bool			force_metric_only		= false;
 static bool			no_merge			= false;
+static bool			walltime_run_table		= false;
 static struct timespec		ref_time;
 static struct cpu_map		*aggr_map;
 static aggr_get_id_t		aggr_get_id;
@@ -173,6 +174,7 @@
 static int			output_fd;
 static int			print_free_counters_hint;
 static int			print_mixed_hw_group_error;
+static u64			*walltime_run;
 
 struct perf_stat {
 	bool			 record;
@@ -569,7 +571,7 @@
 	return leader;
 }
 
-static int __run_perf_stat(int argc, const char **argv)
+static int __run_perf_stat(int argc, const char **argv, int run_idx)
 {
 	int interval = stat_config.interval;
 	int times = stat_config.times;
@@ -752,6 +754,9 @@
 
 	t1 = rdclock();
 
+	if (walltime_run_table)
+		walltime_run[run_idx] = t1 - t0;
+
 	update_stats(&walltime_nsecs_stats, t1 - t0);
 
 	/*
@@ -766,7 +771,7 @@
 	return WEXITSTATUS(status);
 }
 
-static int run_perf_stat(int argc, const char **argv)
+static int run_perf_stat(int argc, const char **argv, int run_idx)
 {
 	int ret;
 
@@ -779,7 +784,7 @@
 	if (sync_run)
 		sync();
 
-	ret = __run_perf_stat(argc, argv);
+	ret = __run_perf_stat(argc, argv, run_idx);
 	if (ret)
 		return ret;
 
@@ -1764,19 +1769,67 @@
 	}
 }
 
+static int get_precision(double num)
+{
+	if (num > 1)
+		return 0;
+
+	return lround(ceil(-log10(num)));
+}
+
+static void print_table(FILE *output, int precision, double avg)
+{
+	char tmp[64];
+	int idx, indent = 0;
+
+	scnprintf(tmp, 64, " %17.*f", precision, avg);
+	while (tmp[indent] == ' ')
+		indent++;
+
+	fprintf(output, "%*s# Table of individual measurements:\n", indent, "");
+
+	for (idx = 0; idx < run_count; idx++) {
+		double run = (double) walltime_run[idx] / NSEC_PER_SEC;
+		int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5);
+
+		fprintf(output, " %17.*f (%+.*f) ",
+			precision, run, precision, run - avg);
+
+		for (h = 0; h < n; h++)
+			fprintf(output, "#");
+
+		fprintf(output, "\n");
+	}
+
+	fprintf(output, "\n%*s# Final result:\n", indent, "");
+}
+
 static void print_footer(void)
 {
+	double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
 	FILE *output = stat_config.output;
 	int n;
 
 	if (!null_run)
 		fprintf(output, "\n");
-	fprintf(output, " %17.9f seconds time elapsed",
-			avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC);
-	if (run_count > 1) {
-		fprintf(output, "                                        ");
-		print_noise_pct(stddev_stats(&walltime_nsecs_stats),
-				avg_stats(&walltime_nsecs_stats));
+
+	if (run_count == 1) {
+		fprintf(output, " %17.9f seconds time elapsed", avg);
+	} else {
+		double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
+		/*
+		 * Display at most 2 more significant
+		 * digits than the stddev inaccuracy.
+		 */
+		int precision = get_precision(sd) + 2;
+
+		if (walltime_run_table)
+			print_table(output, precision, avg);
+
+		fprintf(output, " %17.*f +- %.*f seconds time elapsed",
+			precision, avg, precision, sd);
+
+		print_noise_pct(sd, avg);
 	}
 	fprintf(output, "\n\n");
 
@@ -1952,6 +2005,8 @@
 		    "be more verbose (show counter open errors, etc)"),
 	OPT_INTEGER('r', "repeat", &run_count,
 		    "repeat command and print average + stddev (max: 100, forever: 0)"),
+	OPT_BOOLEAN(0, "table", &walltime_run_table,
+		    "display details about each run (only with -r option)"),
 	OPT_BOOLEAN('n', "null", &null_run,
 		    "null run - dont start any counters"),
 	OPT_INCR('d', "detailed", &detailed_run,
@@ -2843,6 +2898,13 @@
 		goto out;
 	}
 
+	if (walltime_run_table && run_count <= 1) {
+		fprintf(stderr, "--table is only supported with -r\n");
+		parse_options_usage(stat_usage, stat_options, "r", 1);
+		parse_options_usage(NULL, stat_options, "table", 0);
+		goto out;
+	}
+
 	if (output_fd < 0) {
 		fprintf(stderr, "argument to --log-fd must be a > 0\n");
 		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
@@ -2897,6 +2959,14 @@
 		run_count = 1;
 	}
 
+	if (walltime_run_table) {
+		walltime_run = zalloc(run_count * sizeof(walltime_run[0]));
+		if (!walltime_run) {
+			pr_err("failed to setup -r option");
+			goto out;
+		}
+	}
+
 	if ((stat_config.aggr_mode == AGGR_THREAD) &&
 		!target__has_task(&target)) {
 		if (!target.system_wide || target.cpu_list) {
@@ -3012,7 +3082,7 @@
 			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
 				run_idx + 1);
 
-		status = run_perf_stat(argc, argv);
+		status = run_perf_stat(argc, argv, run_idx);
 		if (forever && status != -1) {
 			print_counters(NULL, argc, argv);
 			perf_stat__reset_stats();
@@ -3060,6 +3130,8 @@
 	perf_stat__exit_aggr_mode();
 	perf_evlist__free_stats(evsel_list);
 out:
+	free(walltime_run);
+
 	if (smi_cost && smi_reset)
 		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
 
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 813698a..a827919 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -533,12 +533,8 @@
 		}
 
 		tal.filtered = 0;
-		thread__find_addr_location(al.thread, cpumode,
-					   MAP__FUNCTION, ip, &tal);
-
-		if (tal.sym)
-			fprintf(f, "..... %016" PRIx64 " %s\n", ip,
-				tal.sym->name);
+		if (thread__find_symbol(al.thread, cpumode, ip, &tal))
+			fprintf(f, "..... %016" PRIx64 " %s\n", ip, tal.sym->name);
 		else
 			fprintf(f, "..... %016" PRIx64 "\n", ip);
 	}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f39bd60..7a349fc 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -742,7 +742,7 @@
 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
 "Check /proc/sys/kernel/kptr_restrict.\n\n"
 "Kernel%s samples will not be resolved.\n",
-			  al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
+			  al.map && map__has_symbols(al.map) ?
 			  " modules" : "");
 			if (use_browser <= 0)
 				sleep(5);
@@ -750,7 +750,7 @@
 		machine->kptr_restrict_warned = true;
 	}
 
-	if (al.sym == NULL) {
+	if (al.sym == NULL && al.map != NULL) {
 		const char *msg = "Kernel samples will not be resolved.\n";
 		/*
 		 * As we do lazy loading of symtabs we only will know if the
@@ -764,8 +764,7 @@
 		 * invalid --vmlinux ;-)
 		 */
 		if (!machine->kptr_restrict_warned && !top->vmlinux_warned &&
-		    al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
-		    RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
+		    __map__is_kernel(al.map) && map__has_symbols(al.map)) {
 			if (symbol_conf.vmlinux_name) {
 				char serr[256];
 				dso__strerror_load(al.map->dso, serr, sizeof(serr));
@@ -1265,7 +1264,7 @@
 			.proc_map_timeout    = 500,
 			.overwrite	= 1,
 		},
-		.max_stack	     = sysctl_perf_event_max_stack,
+		.max_stack	     = sysctl__max_stack(),
 		.sym_pcnt_filter     = 5,
 		.nr_threads_synthesize = UINT_MAX,
 	};
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 3ad17ee..560aed7 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2024,8 +2024,7 @@
 	if (trace->summary_only)
 		goto out;
 
-	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
-			      sample->ip, &al);
+	thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
 
 	trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
 
@@ -2037,12 +2036,10 @@
 
 	fprintf(trace->output, "] => ");
 
-	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
-				   sample->addr, &al);
+	thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
 
 	if (!al.map) {
-		thread__find_addr_location(thread, sample->cpumode,
-					   MAP__FUNCTION, sample->addr, &al);
+		thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
 
 		if (al.map)
 			map_type = 'x';
@@ -3165,7 +3162,7 @@
 		mmap_pages_user_set = false;
 
 	if (trace.max_stack == UINT_MAX) {
-		trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
+		trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
 		max_stack_user_set = false;
 	}
 
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 9aff89b..10f333e 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -55,23 +55,27 @@
 include/uapi/asm-generic/mman-common.h
 '
 
+check_2 () {
+  file1=$1
+  file2=$2
+
+  shift
+  shift
+
+  cmd="diff $* $file1 $file2 > /dev/null"
+
+  test -f $file2 &&
+  eval $cmd || echo "Warning: Kernel ABI header at 'tools/$file' differs from latest version at '$file'" >&2
+}
+
 check () {
   file=$1
 
   shift
-  opts=
-  while [ -n "$*" ]; do
-    opts="$opts \"$1\""
-    shift
-  done
 
-  cmd="diff $opts ../$file ../../$file > /dev/null"
-
-  test -f ../../$file &&
-  eval $cmd || echo "Warning: Kernel ABI header at 'tools/$file' differs from latest version at '$file'" >&2
+  check_2 ../$file ../../$file $*
 }
 
-
 # Check if we have the kernel headers (tools/perf/../../include), else
 # we're probably on a detached tarball, so no point in trying to check
 # differences.
@@ -83,7 +87,7 @@
 done
 
 # diff with extra ignore lines
-check arch/x86/lib/memcpy_64.S        -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"
-check arch/x86/lib/memset_64.S        -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"
-check include/uapi/asm-generic/mman.h -I "^#include <\(uapi/\)*asm-generic/mman-common.h>"
-check include/uapi/linux/mman.h       -I "^#include <\(uapi/\)*asm/mman.h>"
+check arch/x86/lib/memcpy_64.S        '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
+check arch/x86/lib/memset_64.S        '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
+check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common.h>"'
+check include/uapi/linux/mman.h       '-I "^#include <\(uapi/\)*asm/mman.h>"'
diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c
new file mode 100644
index 0000000..b9c20321
--- /dev/null
+++ b/tools/perf/examples/bpf/5sec.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+    Description:
+
+    . Disable strace like syscall tracing (--no-syscalls), or try tracing
+      just some (-e *sleep).
+
+    . Attach a filter function to a kernel function, returning when it should
+      be considered, i.e. appear on the output.
+
+    . Run it system wide, so that any sleep of >= 5 seconds and < than 6
+      seconds gets caught.
+
+    . Ask for callgraphs using DWARF info, so that userspace can be unwound
+
+    . While this is running, run something like "sleep 5s".
+
+    . If we decide to add tv_nsec as well, then it becomes:
+
+      int probe(hrtimer_nanosleep, rqtp->tv_sec rqtp->tv_nsec)(void *ctx, int err, long sec, long nsec)
+
+      I.e. add where it comes from (rqtp->tv_nsec) and where it will be
+      accessible in the function body (nsec)
+
+    # perf trace --no-syscalls -e tools/perf/examples/bpf/5sec.c/call-graph=dwarf/
+         0.000 perf_bpf_probe:func:(ffffffff9811b5f0) tv_sec=5
+                                           hrtimer_nanosleep ([kernel.kallsyms])
+                                           __x64_sys_nanosleep ([kernel.kallsyms])
+                                           do_syscall_64 ([kernel.kallsyms])
+                                           entry_SYSCALL_64 ([kernel.kallsyms])
+                                           __GI___nanosleep (/usr/lib64/libc-2.26.so)
+                                           rpl_nanosleep (/usr/bin/sleep)
+                                           xnanosleep (/usr/bin/sleep)
+                                           main (/usr/bin/sleep)
+                                           __libc_start_main (/usr/lib64/libc-2.26.so)
+                                           _start (/usr/bin/sleep)
+    ^C#
+
+   Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
+*/
+
+#include <bpf.h>
+
+int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec)
+{
+	return sec == 5;
+}
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c
new file mode 100644
index 0000000..3776d26
--- /dev/null
+++ b/tools/perf/examples/bpf/empty.c
@@ -0,0 +1,3 @@
+#include <bpf.h>
+
+license(GPL);
diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h
new file mode 100644
index 0000000..dd764ad
--- /dev/null
+++ b/tools/perf/include/bpf/bpf.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _PERF_BPF_H
+#define _PERF_BPF_H
+#define SEC(NAME) __attribute__((section(NAME),  used))
+
+#define probe(function, vars) \
+	SEC(#function "=" #function " " #vars) function
+
+#define license(name) \
+char _license[] SEC("license") = #name; \
+int _version SEC("version") = LINUX_VERSION_CODE;
+
+#endif /* _PERF_BPF_H */
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 20a08cb..51c8150 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -238,7 +238,7 @@
 			(*argc)--;
 		} else if (strstarts(cmd, CMD_DEBUGFS_DIR)) {
 			tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR));
-			fprintf(stderr, "dir: %s\n", tracing_path);
+			fprintf(stderr, "dir: %s\n", tracing_path_mount());
 			if (envchanged)
 				*envchanged = 1;
 		} else if (!strcmp(cmd, "--list-cmds")) {
@@ -421,22 +421,11 @@
 	pthread_sigmask(SIG_UNBLOCK, &set, NULL);
 }
 
-#ifdef _SC_LEVEL1_DCACHE_LINESIZE
-#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
-#else
-static void cache_line_size(int *cacheline_sizep)
-{
-	if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
-		pr_debug("cannot determine cache line size");
-}
-#endif
-
 int main(int argc, const char **argv)
 {
 	int err;
 	const char *cmd;
 	char sbuf[STRERR_BUFSIZE];
-	int value;
 
 	/* libsubcmd init */
 	exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
@@ -444,13 +433,6 @@
 
 	/* The page_size is placed in util object. */
 	page_size = sysconf(_SC_PAGE_SIZE);
-	cache_line_size(&cacheline_size);
-
-	if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
-		sysctl_perf_event_max_stack = value;
-
-	if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
-		sysctl_perf_event_max_contexts_per_stack = value;
 
 	cmd = extract_argv0_path(argv[0]);
 	if (!cmd)
@@ -458,15 +440,11 @@
 
 	srandom(time(NULL));
 
-	perf_config__init();
 	err = perf_config(perf_default_config, NULL);
 	if (err)
 		return err;
 	set_buildid_dir(NULL);
 
-	/* get debugfs/tracefs mount point from /proc/mounts */
-	tracing_path_mount();
-
 	/*
 	 * "perf-xxxx" is the same as "perf xxxx", but we obviously:
 	 *
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index cac8f88..2bde505 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -654,6 +654,15 @@
 			continue;
 
 		pr_info("%2d: %s\n", i, t->desc);
+
+		if (t->subtest.get_nr) {
+			int subn = t->subtest.get_nr();
+			int subi;
+
+			for (subi = 0; subi < subn; subi++)
+				pr_info("%2d:%1d: %s\n", i, subi + 1,
+					t->subtest.get_desc(subi));
+		}
 	}
 
 	perf_test__list_shell(argc, argv, i);
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 9993635..afa4ce2 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -236,14 +236,13 @@
 
 	pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
-	if (!al.map || !al.map->dso) {
+	if (!thread__find_map(thread, cpumode, addr, &al) || !al.map->dso) {
 		if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
 			pr_debug("Hypervisor address can not be resolved - skipping\n");
 			return 0;
 		}
 
-		pr_debug("thread__find_addr_map failed\n");
+		pr_debug("thread__find_map failed\n");
 		return -1;
 	}
 
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index f7c5b61..b889a28f 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -131,20 +131,20 @@
 			goto out;
 
 		/* emulate dso__load() */
-		dso__set_loaded(dso, MAP__FUNCTION);
+		dso__set_loaded(dso);
 
 		for (k = 0; k < fake_symbols[i].nr_syms; k++) {
 			struct symbol *sym;
 			struct fake_sym *fsym = &fake_symbols[i].syms[k];
 
 			sym = symbol__new(fsym->start, fsym->length,
-					  STB_GLOBAL, fsym->name);
+					  STB_GLOBAL, STT_FUNC, fsym->name);
 			if (sym == NULL) {
 				dso__put(dso);
 				goto out;
 			}
 
-			symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
+			symbols__insert(&dso->symbols, sym);
 		}
 
 		dso__put(dso);
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 868d82b5..b1af249 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -188,9 +188,8 @@
 
 		pr_debug("looking for map %p\n", td->map);
 
-		thread__find_addr_map(thread,
-				      PERF_RECORD_MISC_USER, MAP__FUNCTION,
-				      (unsigned long) (td->map + 1), &al);
+		thread__find_map(thread, PERF_RECORD_MISC_USER,
+				 (unsigned long) (td->map + 1), &al);
 
 		thread__put(thread);
 
@@ -218,7 +217,7 @@
  *   perf_event__synthesize_threads    (global)
  *
  * We test we can find all memory maps via:
- *   thread__find_addr_map
+ *   thread__find_map
  *
  * by using all thread objects.
  */
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 18b0644..b9ebe15 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1309,18 +1309,26 @@
 	return 0;
 }
 
+static int test__intel_pt(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+	TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "intel_pt//u") == 0);
+	return 0;
+}
+
 static int count_tracepoints(void)
 {
 	struct dirent *events_ent;
 	DIR *events_dir;
 	int cnt = 0;
 
-	events_dir = opendir(tracing_events_path);
+	events_dir = tracing_events__opendir();
 
 	TEST_ASSERT_VAL("Can't open events dir", events_dir);
 
 	while ((events_ent = readdir(events_dir))) {
-		char sys_path[PATH_MAX];
+		char *sys_path;
 		struct dirent *sys_ent;
 		DIR *sys_dir;
 
@@ -1331,8 +1339,8 @@
 		    || !strcmp(events_ent->d_name, "header_page"))
 			continue;
 
-		scnprintf(sys_path, PATH_MAX, "%s/%s",
-			  tracing_events_path, events_ent->d_name);
+		sys_path = get_events_file(events_ent->d_name);
+		TEST_ASSERT_VAL("Can't get sys path", sys_path);
 
 		sys_dir = opendir(sys_path);
 		TEST_ASSERT_VAL("Can't open sys dir", sys_dir);
@@ -1348,6 +1356,7 @@
 		}
 
 		closedir(sys_dir);
+		put_events_file(sys_path);
 	}
 
 	closedir(events_dir);
@@ -1637,6 +1646,11 @@
 		.check = test__checkevent_config_cache,
 		.id    = 51,
 	},
+	{
+		.name  = "intel_pt//u",
+		.check = test__intel_pt,
+		.id    = 52,
+	},
 };
 
 static struct evlist_test test__events_pmu[] = {
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index ee86473..650b208 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -16,18 +16,18 @@
 trace_libc_inet_pton_backtrace() {
 	idx=0
 	expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)"
-	expected[1]=".*inet_pton[[:space:]]\($libc|inlined\)$"
+	expected[1]=".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
 	case "$(uname -m)" in
 	s390x)
 		eventattr='call-graph=dwarf,max-stack=4'
-		expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
-		expected[3]="(__GI_)?getaddrinfo[[:space:]]\($libc|inlined\)$"
-		expected[4]="main[[:space:]]\(.*/bin/ping.*\)$"
+		expected[2]="gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
+		expected[3]="(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
+		expected[4]="main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$"
 		;;
 	*)
 		eventattr='max-stack=3'
-		expected[2]="getaddrinfo[[:space:]]\($libc\)$"
-		expected[3]=".*\(.*/bin/ping.*\)$"
+		expected[2]="getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$"
+		expected[3]=".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$"
 		;;
 	esac
 
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 17cb1bb..40e30a2 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -70,6 +70,27 @@
 	session = perf_session__new(&data, false, NULL);
 	TEST_ASSERT_VAL("can't get session", session);
 
+	/* On platforms with large numbers of CPUs process_cpu_topology()
+	 * might issue an error while reading the perf.data file section
+	 * HEADER_CPU_TOPOLOGY and the cpu_topology_map pointed to by member
+	 * cpu is a NULL pointer.
+	 * Example: On s390
+	 *   CPU 0 is on core_id 0 and physical_package_id 6
+	 *   CPU 1 is on core_id 1 and physical_package_id 3
+	 *
+	 *   Core_id and physical_package_id are platform and architecture
+	 *   dependend and might have higher numbers than the CPU id.
+	 *   This actually depends on the configuration.
+	 *
+	 *  In this case process_cpu_topology() prints error message:
+	 *  "socket_id number is too big. You may need to upgrade the
+	 *  perf tool."
+	 *
+	 *  This is the reason why this test might be skipped.
+	 */
+	if (!session->header.env.cpu)
+		return TEST_SKIP;
+
 	for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
 		if (!cpu_map__has(map, i))
 			continue;
@@ -95,7 +116,7 @@
 {
 	char path[PATH_MAX];
 	struct cpu_map *map;
-	int ret = -1;
+	int ret = TEST_FAIL;
 
 	TEST_ASSERT_VAL("can't get templ file", !get_temp(path));
 
@@ -110,12 +131,9 @@
 		goto free_path;
 	}
 
-	if (check_cpu_topology(path, map))
-		goto free_map;
-	ret = 0;
-
-free_map:
+	ret = check_cpu_topology(path, map);
 	cpu_map__put(map);
+
 free_path:
 	unlink(path);
 	return ret;
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 1e5adb6..7691980 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -19,8 +19,7 @@
 	struct symbol *sym;
 	struct map *kallsyms_map, *vmlinux_map, *map;
 	struct machine kallsyms, vmlinux;
-	enum map_type type = MAP__FUNCTION;
-	struct maps *maps = &vmlinux.kmaps.maps[type];
+	struct maps *maps = machine__kernel_maps(&vmlinux);
 	u64 mem_start, mem_end;
 	bool header_printed;
 
@@ -56,7 +55,7 @@
 	 * be compacted against the list of modules found in the "vmlinux"
 	 * code and with the one got from /proc/modules from the "kallsyms" code.
 	 */
-	if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type) <= 0) {
+	if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms") <= 0) {
 		pr_debug("dso__load_kallsyms ");
 		goto out;
 	}
@@ -94,7 +93,7 @@
 	 * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
 	 * to fixup the symbols.
 	 */
-	if (machine__load_vmlinux_path(&vmlinux, type) <= 0) {
+	if (machine__load_vmlinux_path(&vmlinux) <= 0) {
 		pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
 		err = TEST_SKIP;
 		goto out;
@@ -108,7 +107,7 @@
 	 * in the kallsyms dso. For the ones that are in both, check its names and
 	 * end addresses too.
 	 */
-	for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) {
+	map__for_each_symbol(vmlinux_map, sym, nd) {
 		struct symbol *pair, *first_pair;
 
 		sym  = rb_entry(nd, struct symbol, rb_node);
@@ -119,8 +118,7 @@
 		mem_start = vmlinux_map->unmap_ip(vmlinux_map, sym->start);
 		mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
 
-		first_pair = machine__find_kernel_symbol(&kallsyms, type,
-							 mem_start, NULL);
+		first_pair = machine__find_kernel_symbol(&kallsyms, mem_start, NULL);
 		pair = first_pair;
 
 		if (pair && UM(pair->start) == mem_start) {
@@ -149,7 +147,7 @@
 				 */
 				continue;
 			} else {
-				pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL);
+				pair = machine__find_kernel_symbol_by_name(&kallsyms, sym->name, NULL);
 				if (pair) {
 					if (UM(pair->start) == mem_start)
 						goto next_pair;
@@ -183,7 +181,7 @@
 		 * so use the short name, less descriptive but the same ("[kernel]" in
 		 * both cases.
 		 */
-		pair = map_groups__find_by_name(&kallsyms.kmaps, type,
+		pair = map_groups__find_by_name(&kallsyms.kmaps,
 						(map->dso->kernel ?
 							map->dso->short_name :
 							map->dso->name));
@@ -206,7 +204,7 @@
 		mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start);
 		mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end);
 
-		pair = map_groups__find(&kallsyms.kmaps, type, mem_start);
+		pair = map_groups__find(&kallsyms.kmaps, mem_start);
 		if (pair == NULL || pair->priv)
 			continue;
 
@@ -228,7 +226,7 @@
 
 	header_printed = false;
 
-	maps = &kallsyms.kmaps.maps[type];
+	maps = machine__kernel_maps(&kallsyms);
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		if (!map->priv) {
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index 0be4138..f247221 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-header_dir=$1
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
 printf "static const char *prctl_options[] = {\n"
 regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*'
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 3781d74..8be40fa 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -695,6 +695,7 @@
 		"O             Bump offset level (jump targets -> +call -> all -> cycle thru)\n"
 		"s             Toggle source code view\n"
 		"t             Circulate percent, total period, samples view\n"
+		"c             Show min/max cycle\n"
 		"/             Search string\n"
 		"k             Toggle line numbers\n"
 		"P             Print to [symbol_name].annotation file.\n"
@@ -791,6 +792,13 @@
 				notes->options->show_total_period = true;
 			annotation__update_column_widths(notes);
 			continue;
+		case 'c':
+			if (notes->options->show_minmax_cycle)
+				notes->options->show_minmax_cycle = false;
+			else
+				notes->options->show_minmax_cycle = true;
+			annotation__update_column_widths(notes);
+			continue;
 		case K_LEFT:
 		case K_ESC:
 		case 'q':
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index e03fa75..5b8b8c6 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -104,7 +104,7 @@
 {
 	struct map_browser mb = {
 		.b = {
-			.entries = &map->dso->symbols[map->type],
+			.entries = &map->dso->symbols,
 			.refresh = ui_browser__rb_tree_refresh,
 			.seek	 = ui_browser__rb_tree_seek,
 			.write	 = map_browser__write,
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 6832fcb..c1eb476 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -819,8 +819,7 @@
 		}
 
 		if (h->ms.map == NULL && verbose > 1) {
-			__map_groups__fprintf_maps(h->thread->mg,
-						   MAP__FUNCTION, fp);
+			map_groups__fprintf(h->thread->mg, fp);
 			fprintf(fp, "%.10s end\n", graph_dotted_line);
 		}
 	}
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8052373..5d4c45b 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -152,6 +152,8 @@
 libperf-$(CONFIG_CXX) += c++/
 
 CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
+
 # avoid compiler warnings in 32-bit mode
 CFLAGS_genelf_debug.o  += -Wno-packed
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 5d74a30..7189768 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -760,6 +760,15 @@
 	ch[offset].num_aggr++;
 	ch[offset].cycles_aggr += cycles;
 
+	if (cycles > ch[offset].cycles_max)
+		ch[offset].cycles_max = cycles;
+
+	if (ch[offset].cycles_min) {
+		if (cycles && cycles < ch[offset].cycles_min)
+			ch[offset].cycles_min = cycles;
+	} else
+		ch[offset].cycles_min = cycles;
+
 	if (!have_start && ch[offset].have_start)
 		return 0;
 	if (ch[offset].num) {
@@ -953,8 +962,11 @@
 			if (ch->have_start)
 				annotation__count_and_fill(notes, ch->start, offset, ch);
 			al = notes->offsets[offset];
-			if (al && ch->num_aggr)
+			if (al && ch->num_aggr) {
 				al->cycles = ch->cycles_aggr / ch->num_aggr;
+				al->cycles_max = ch->cycles_max;
+				al->cycles_min = ch->cycles_min;
+			}
 			notes->have_cycles = true;
 		}
 	}
@@ -1953,6 +1965,7 @@
 	u64 len;
 	int width = symbol_conf.show_total_period ? 12 : 8;
 	int graph_dotted_len;
+	char buf[512];
 
 	filename = strdup(dso->long_name);
 	if (!filename)
@@ -1965,8 +1978,11 @@
 
 	len = symbol__size(sym);
 
-	if (perf_evsel__is_group_event(evsel))
+	if (perf_evsel__is_group_event(evsel)) {
 		width *= evsel->nr_members;
+		perf_evsel__group_desc(evsel, buf, sizeof(buf));
+		evsel_name = buf;
+	}
 
 	graph_dotted_len = printf(" %-*.*s|	Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n",
 				  width, width, symbol_conf.show_total_period ? "Period" :
@@ -2486,13 +2502,38 @@
 		else
 			obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC");
 
-		if (al->cycles)
-			obj__printf(obj, "%*" PRIu64 " ",
+		if (!notes->options->show_minmax_cycle) {
+			if (al->cycles)
+				obj__printf(obj, "%*" PRIu64 " ",
 					   ANNOTATION__CYCLES_WIDTH - 1, al->cycles);
-		else if (!show_title)
-			obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " ");
-		else
-			obj__printf(obj, "%*s ", ANNOTATION__CYCLES_WIDTH - 1, "Cycle");
+			else if (!show_title)
+				obj__printf(obj, "%*s",
+					    ANNOTATION__CYCLES_WIDTH, " ");
+			else
+				obj__printf(obj, "%*s ",
+					    ANNOTATION__CYCLES_WIDTH - 1,
+					    "Cycle");
+		} else {
+			if (al->cycles) {
+				char str[32];
+
+				scnprintf(str, sizeof(str),
+					"%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")",
+					al->cycles, al->cycles_min,
+					al->cycles_max);
+
+				obj__printf(obj, "%*s ",
+					    ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
+					    str);
+			} else if (!show_title)
+				obj__printf(obj, "%*s",
+					    ANNOTATION__MINMAX_CYCLES_WIDTH,
+					    " ");
+			else
+				obj__printf(obj, "%*s ",
+					    ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
+					    "Cycle(min/max)");
+		}
 	}
 
 	obj__printf(obj, " ");
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index f28a9e4..5080b6d 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -61,6 +61,7 @@
 
 #define ANNOTATION__IPC_WIDTH 6
 #define ANNOTATION__CYCLES_WIDTH 6
+#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
 
 struct annotation_options {
 	bool hide_src_code,
@@ -69,7 +70,8 @@
 	     show_linenr,
 	     show_nr_jumps,
 	     show_nr_samples,
-	     show_total_period;
+	     show_total_period,
+	     show_minmax_cycle;
 	u8   offset_level;
 };
 
@@ -105,6 +107,8 @@
 	int			 jump_sources;
 	float			 ipc;
 	u64			 cycles;
+	u64			 cycles_max;
+	u64			 cycles_min;
 	size_t			 privsize;
 	char			*path;
 	u32			 idx;
@@ -186,6 +190,8 @@
 	u64	start;
 	u64	cycles;
 	u64	cycles_aggr;
+	u64	cycles_max;
+	u64	cycles_min;
 	u32	num;
 	u32	num_aggr;
 	u8	have_start;
@@ -239,6 +245,9 @@
 
 static inline int annotation__cycles_width(struct annotation *notes)
 {
+	if (notes->have_cycles && notes->options->show_minmax_cycle)
+		return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH;
+
 	return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
 }
 
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 857de69..d056447 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1679,7 +1679,7 @@
 static bool kern_sym_match(struct sym_args *args, const char *name, char type)
 {
 	/* A function with the same name, and global or the n'th found or any */
-	return symbol_type__is_a(type, MAP__FUNCTION) &&
+	return kallsyms__is_function(type) &&
 	       !strcmp(name, args->name) &&
 	       ((args->global && isupper(type)) ||
 		(args->selected && ++(args->cnt) == args->idx) ||
@@ -1784,7 +1784,7 @@
 {
 	struct sym_args *args = arg;
 
-	if (!symbol_type__is_a(type, MAP__FUNCTION))
+	if (!kallsyms__is_function(type))
 		return 0;
 
 	if (!args->started) {
@@ -1915,7 +1915,7 @@
 
 	pr_err("Multiple symbols with name '%s'\n", sym_name);
 
-	sym = dso__first_symbol(dso, MAP__FUNCTION);
+	sym = dso__first_symbol(dso);
 	while (sym) {
 		if (dso_sym_match(sym, sym_name, &cnt, -1)) {
 			pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
@@ -1945,7 +1945,7 @@
 	*start = 0;
 	*size = 0;
 
-	sym = dso__first_symbol(dso, MAP__FUNCTION);
+	sym = dso__first_symbol(dso);
 	while (sym) {
 		if (*start) {
 			if (!*size)
@@ -1972,8 +1972,8 @@
 
 static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
 {
-	struct symbol *first_sym = dso__first_symbol(dso, MAP__FUNCTION);
-	struct symbol *last_sym = dso__last_symbol(dso, MAP__FUNCTION);
+	struct symbol *first_sym = dso__first_symbol(dso);
+	struct symbol *last_sym = dso__last_symbol(dso);
 
 	if (!first_sym || !last_sym) {
 		pr_err("Failed to determine filter for %s\nNo symbols found.\n",
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index af7ad81..cee6587 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -66,7 +66,7 @@
 	}
 
 	obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name);
-	if (IS_ERR(obj)) {
+	if (IS_ERR_OR_NULL(obj)) {
 		pr_debug("bpf: failed to load buffer\n");
 		return ERR_PTR(-EINVAL);
 	}
@@ -102,14 +102,14 @@
 			pr_debug("bpf: successfull builtin compilation\n");
 		obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
 
-		if (!IS_ERR(obj) && llvm_param.dump_obj)
+		if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
 			llvm__dump_obj(filename, obj_buf, obj_buf_sz);
 
 		free(obj_buf);
 	} else
 		obj = bpf_object__open(filename);
 
-	if (IS_ERR(obj)) {
+	if (IS_ERR_OR_NULL(obj)) {
 		pr_debug("bpf: failed to load %s\n", filename);
 		return obj;
 	}
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 537eadd..04b1d53 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -47,9 +47,7 @@
 		return -1;
 	}
 
-	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
-
-	if (al.map != NULL)
+	if (thread__find_map(thread, sample->cpumode, sample->ip, &al))
 		al.map->dso->hit = 1;
 
 	thread__put(thread);
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 84eb939..5ac1570 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -707,6 +707,14 @@
 	return set;
 }
 
+static int perf_config__init(void)
+{
+	if (config_set == NULL)
+		config_set = perf_config_set__new();
+
+	return config_set == NULL;
+}
+
 int perf_config(config_fn_t fn, void *data)
 {
 	int ret = 0;
@@ -714,7 +722,7 @@
 	struct perf_config_section *section;
 	struct perf_config_item *item;
 
-	if (config_set == NULL)
+	if (config_set == NULL && perf_config__init())
 		return -1;
 
 	perf_config_set__for_each_entry(config_set, section, item) {
@@ -735,12 +743,6 @@
 	return ret;
 }
 
-void perf_config__init(void)
-{
-	if (config_set == NULL)
-		config_set = perf_config_set__new();
-}
-
 void perf_config__exit(void)
 {
 	perf_config_set__delete(config_set);
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index baf82bf..bd0a589 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -38,7 +38,6 @@
 void perf_config_set__delete(struct perf_config_set *set);
 int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
 			     const char *var, const char *value);
-void perf_config__init(void);
 void perf_config__exit(void);
 void perf_config__refresh(void);
 
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index c8b98fa..4d5fc37 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -96,11 +96,19 @@
 	/* Nothing to do, might as well just return */
 	if (decoder->packet_count == 0)
 		return 0;
+	/*
+	 * The queueing process in function cs_etm_decoder__buffer_packet()
+	 * increments the tail *before* using it.  This is somewhat counter
+	 * intuitive but it has the advantage of centralizing tail management
+	 * at a single location.  Because of that we need to follow the same
+	 * heuristic with the head, i.e we increment it before using its
+	 * value.  Otherwise the first element of the packet queue is not
+	 * used.
+	 */
+	decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
 
 	*packet = decoder->packet_buffer[decoder->head];
 
-	decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
-
 	decoder->packet_count--;
 
 	return 1;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index bf16dc9..822ba91 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -270,9 +270,7 @@
 		thread = etmq->etm->unknown_thread;
 	}
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, address, &al);
-
-	if (!al.map || !al.map->dso)
+	if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso)
 		return 0;
 
 	if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index b0c2b5c..7123746 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -247,9 +247,9 @@
 		*dso_db_id = dso->db_id;
 
 		if (!al->sym) {
-			al->sym = symbol__new(al->addr, 0, 0, "unknown");
+			al->sym = symbol__new(al->addr, 0, 0, 0, "unknown");
 			if (al->sym)
-				dso__insert_symbol(dso, al->map->type, al->sym);
+				dso__insert_symbol(dso, al->sym);
 		}
 
 		if (al->sym) {
@@ -315,8 +315,7 @@
 		al.addr = node->ip;
 
 		if (al.map && !al.sym)
-			al.sym = dso__find_symbol(al.map->dso, MAP__FUNCTION,
-						  al.addr);
+			al.sym = dso__find_symbol(al.map->dso, al.addr);
 
 		db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset);
 
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 36ef45b..cdfc2e5 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1014,7 +1014,7 @@
 	struct dso *dso = dso__new(name);
 
 	if (dso)
-		map = map__new2(0, dso, MAP__FUNCTION);
+		map = map__new2(0, dso);
 
 	return map;
 }
@@ -1176,19 +1176,19 @@
 	return dso->short_name_len;
 }
 
-bool dso__loaded(const struct dso *dso, enum map_type type)
+bool dso__loaded(const struct dso *dso)
 {
-	return dso->loaded & (1 << type);
+	return dso->loaded;
 }
 
-bool dso__sorted_by_name(const struct dso *dso, enum map_type type)
+bool dso__sorted_by_name(const struct dso *dso)
 {
-	return dso->sorted_by_name & (1 << type);
+	return dso->sorted_by_name;
 }
 
-void dso__set_sorted_by_name(struct dso *dso, enum map_type type)
+void dso__set_sorted_by_name(struct dso *dso)
 {
-	dso->sorted_by_name |= (1 << type);
+	dso->sorted_by_name = true;
 }
 
 struct dso *dso__new(const char *name)
@@ -1196,12 +1196,10 @@
 	struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1);
 
 	if (dso != NULL) {
-		int i;
 		strcpy(dso->name, name);
 		dso__set_long_name(dso, dso->name, false);
 		dso__set_short_name(dso, dso->name, false);
-		for (i = 0; i < MAP__NR_TYPES; ++i)
-			dso->symbols[i] = dso->symbol_names[i] = RB_ROOT;
+		dso->symbols = dso->symbol_names = RB_ROOT;
 		dso->data.cache = RB_ROOT;
 		dso->inlined_nodes = RB_ROOT;
 		dso->srclines = RB_ROOT;
@@ -1231,8 +1229,6 @@
 
 void dso__delete(struct dso *dso)
 {
-	int i;
-
 	if (!RB_EMPTY_NODE(&dso->rb_node))
 		pr_err("DSO %s is still in rbtree when being deleted!\n",
 		       dso->long_name);
@@ -1240,8 +1236,7 @@
 	/* free inlines first, as they reference symbols */
 	inlines__tree_delete(&dso->inlined_nodes);
 	srcline__tree_delete(&dso->srclines);
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		symbols__delete(&dso->symbols[i]);
+	symbols__delete(&dso->symbols);
 
 	if (dso->short_name_allocated) {
 		zfree((char **)&dso->short_name);
@@ -1451,9 +1446,7 @@
 	size_t ret = 0;
 
 	list_for_each_entry(pos, head, node) {
-		int i;
-		for (i = 0; i < MAP__NR_TYPES; ++i)
-			ret += dso__fprintf(pos, i, fp);
+		ret += dso__fprintf(pos, fp);
 	}
 
 	return ret;
@@ -1467,18 +1460,17 @@
 	return fprintf(fp, "%s", sbuild_id);
 }
 
-size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
+size_t dso__fprintf(struct dso *dso, FILE *fp)
 {
 	struct rb_node *nd;
 	size_t ret = fprintf(fp, "dso: %s (", dso->short_name);
 
 	if (dso->short_name != dso->long_name)
 		ret += fprintf(fp, "%s, ", dso->long_name);
-	ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type],
-		       dso__loaded(dso, type) ? "" : "NOT ");
+	ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT ");
 	ret += dso__fprintf_buildid(dso, fp);
 	ret += fprintf(fp, ")\n");
-	for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&dso->symbols); nd; nd = rb_next(nd)) {
 		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
 		ret += symbol__fprintf(pos, fp);
 	}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index c229dbe..ef69de2 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -140,14 +140,14 @@
 	struct list_head node;
 	struct rb_node	 rb_node;	/* rbtree node sorted by long name */
 	struct rb_root	 *root;		/* root of rbtree that rb_node is in */
-	struct rb_root	 symbols[MAP__NR_TYPES];
-	struct rb_root	 symbol_names[MAP__NR_TYPES];
+	struct rb_root	 symbols;
+	struct rb_root	 symbol_names;
 	struct rb_root	 inlined_nodes;
 	struct rb_root	 srclines;
 	struct {
 		u64		addr;
 		struct symbol	*symbol;
-	} last_find_result[MAP__NR_TYPES];
+	} last_find_result;
 	void		 *a2l;
 	char		 *symsrc_filename;
 	unsigned int	 a2l_fails;
@@ -164,8 +164,8 @@
 	u8		 short_name_allocated:1;
 	u8		 long_name_allocated:1;
 	u8		 is_64_bit:1;
-	u8		 sorted_by_name;
-	u8		 loaded;
+	bool		 sorted_by_name;
+	bool		 loaded;
 	u8		 rel;
 	u8		 build_id[BUILD_ID_SIZE];
 	u64		 text_offset;
@@ -202,14 +202,13 @@
  * @dso: the 'struct dso *' in which symbols itereated
  * @pos: the 'struct symbol *' to use as a loop cursor
  * @n: the 'struct rb_node *' to use as a temporary storage
- * @type: the 'enum map_type' type of symbols
  */
-#define dso__for_each_symbol(dso, pos, n, type)	\
-	symbols__for_each_entry(&(dso)->symbols[(type)], pos, n)
+#define dso__for_each_symbol(dso, pos, n)	\
+	symbols__for_each_entry(&(dso)->symbols, pos, n)
 
-static inline void dso__set_loaded(struct dso *dso, enum map_type type)
+static inline void dso__set_loaded(struct dso *dso)
 {
-	dso->loaded |= (1 << type);
+	dso->loaded = true;
 }
 
 struct dso *dso__new(const char *name);
@@ -231,11 +230,16 @@
 
 #define dso__zput(dso) __dso__zput(&dso)
 
-bool dso__loaded(const struct dso *dso, enum map_type type);
+bool dso__loaded(const struct dso *dso);
 
-bool dso__sorted_by_name(const struct dso *dso, enum map_type type);
-void dso__set_sorted_by_name(struct dso *dso, enum map_type type);
-void dso__sort_by_name(struct dso *dso, enum map_type type);
+static inline bool dso__has_symbols(const struct dso *dso)
+{
+	return !RB_EMPTY_ROOT(&dso->symbols);
+}
+
+bool dso__sorted_by_name(const struct dso *dso);
+void dso__set_sorted_by_name(struct dso *dso);
+void dso__sort_by_name(struct dso *dso);
 
 void dso__set_build_id(struct dso *dso, void *build_id);
 bool dso__build_id_equal(const struct dso *dso, u8 *build_id);
@@ -349,9 +353,8 @@
 size_t __dsos__fprintf(struct list_head *head, FILE *fp);
 
 size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
-size_t dso__fprintf_symbols_by_name(struct dso *dso,
-				    enum map_type type, FILE *fp);
-size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp);
+size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp);
+size_t dso__fprintf(struct dso *dso, FILE *fp);
 
 static inline bool dso__is_vmlinux(struct dso *dso)
 {
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 4c84276..59f38c7 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -93,6 +93,37 @@
 	return 0;
 }
 
+static int perf_env__read_arch(struct perf_env *env)
+{
+	struct utsname uts;
+
+	if (env->arch)
+		return 0;
+
+	if (!uname(&uts))
+		env->arch = strdup(uts.machine);
+
+	return env->arch ? 0 : -ENOMEM;
+}
+
+static int perf_env__read_nr_cpus_avail(struct perf_env *env)
+{
+	if (env->nr_cpus_avail == 0)
+		env->nr_cpus_avail = cpu__max_present_cpu();
+
+	return env->nr_cpus_avail ? 0 : -ENOENT;
+}
+
+const char *perf_env__raw_arch(struct perf_env *env)
+{
+	return env && !perf_env__read_arch(env) ? env->arch : "unknown";
+}
+
+int perf_env__nr_cpus_avail(struct perf_env *env)
+{
+	return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0;
+}
+
 void cpu_cache_level__free(struct cpu_cache_level *cache)
 {
 	free(cache->type);
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index c4ef2e5..1f3ccc36 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -76,4 +76,7 @@
 void cpu_cache_level__free(struct cpu_cache_level *cache);
 
 const char *perf_env__arch(struct perf_env *env);
+const char *perf_env__raw_arch(struct perf_env *env);
+int perf_env__nr_cpus_avail(struct perf_env *env);
+
 #endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 98ff3a6..0c8ecf0 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -88,10 +88,10 @@
 	return perf_ns__names[id];
 }
 
-static int perf_tool__process_synth_event(struct perf_tool *tool,
-					  union perf_event *event,
-					  struct machine *machine,
-					  perf_event__handler_t process)
+int perf_tool__process_synth_event(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct machine *machine,
+				   perf_event__handler_t process)
 {
 	struct perf_sample synth_sample = {
 	.pid	   = -1,
@@ -464,8 +464,7 @@
 {
 	int rc = 0;
 	struct map *pos;
-	struct map_groups *kmaps = &machine->kmaps;
-	struct maps *maps = &kmaps->maps[MAP__FUNCTION];
+	struct maps *maps = machine__kernel_maps(machine);
 	union perf_event *event = zalloc((sizeof(event->mmap) +
 					  machine->id_hdr_size));
 	if (event == NULL) {
@@ -488,7 +487,7 @@
 	for (pos = maps__first(maps); pos; pos = map__next(pos)) {
 		size_t size;
 
-		if (__map__is_kernel(pos))
+		if (!__map__is_kmodule(pos))
 			continue;
 
 		size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
@@ -869,7 +868,7 @@
 	 * Must be a function or at least an alias, as in PARISC64, where "_text" is
 	 * an 'A' to the same address as "_stext".
 	 */
-	if (!(symbol_type__is_a(type, MAP__FUNCTION) ||
+	if (!(kallsyms__is_function(type) ||
 	      type == 'A') || strcmp(name, args->name))
 		return 0;
 
@@ -889,9 +888,16 @@
 	return 0;
 }
 
-int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
-				       perf_event__handler_t process,
-				       struct machine *machine)
+int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
+					      perf_event__handler_t process __maybe_unused,
+					      struct machine *machine __maybe_unused)
+{
+	return 0;
+}
+
+static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+						perf_event__handler_t process,
+						struct machine *machine)
 {
 	size_t size;
 	struct map *map = machine__kernel_map(machine);
@@ -944,6 +950,19 @@
 	return err;
 }
 
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+				       perf_event__handler_t process,
+				       struct machine *machine)
+{
+	int err;
+
+	err = __perf_event__synthesize_kernel_mmap(tool, process, machine);
+	if (err < 0)
+		return err;
+
+	return perf_event__synthesize_extra_kmaps(tool, process, machine);
+}
+
 int perf_event__synthesize_thread_map2(struct perf_tool *tool,
 				      struct thread_map *threads,
 				      perf_event__handler_t process,
@@ -1489,9 +1508,8 @@
 	return machine__process_event(machine, event, sample);
 }
 
-void thread__find_addr_map(struct thread *thread, u8 cpumode,
-			   enum map_type type, u64 addr,
-			   struct addr_location *al)
+struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
+			     struct addr_location *al)
 {
 	struct map_groups *mg = thread->mg;
 	struct machine *machine = mg->machine;
@@ -1505,7 +1523,7 @@
 
 	if (machine == NULL) {
 		al->map = NULL;
-		return;
+		return NULL;
 	}
 
 	if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
@@ -1533,10 +1551,10 @@
 			!perf_host)
 			al->filtered |= (1 << HIST_FILTER__HOST);
 
-		return;
+		return NULL;
 	}
 try_again:
-	al->map = map_groups__find(mg, type, al->addr);
+	al->map = map_groups__find(mg, al->addr);
 	if (al->map == NULL) {
 		/*
 		 * If this is outside of all known maps, and is a negative
@@ -1563,17 +1581,17 @@
 			map__load(al->map);
 		al->addr = al->map->map_ip(al->map, al->addr);
 	}
+
+	return al->map;
 }
 
-void thread__find_addr_location(struct thread *thread,
-				u8 cpumode, enum map_type type, u64 addr,
-				struct addr_location *al)
+struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
+				   u64 addr, struct addr_location *al)
 {
-	thread__find_addr_map(thread, cpumode, type, addr, al);
-	if (al->map != NULL)
+	al->sym = NULL;
+	if (thread__find_map(thread, cpumode, addr, al))
 		al->sym = map__find_symbol(al->map, al->addr);
-	else
-		al->sym = NULL;
+	return al->sym;
 }
 
 /*
@@ -1590,7 +1608,7 @@
 		return -1;
 
 	dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
-	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al);
+	thread__find_map(thread, sample->cpumode, sample->ip, al);
 	dump_printf(" ...... dso: %s\n",
 		    al->map ? al->map->dso->long_name :
 			al->level == 'H' ? "[hypervisor]" : "<not found>");
@@ -1669,10 +1687,7 @@
 void thread__resolve(struct thread *thread, struct addr_location *al,
 		     struct perf_sample *sample)
 {
-	thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al);
-	if (!al->map)
-		thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE,
-				      sample->addr, al);
+	thread__find_map(thread, sample->cpumode, sample->addr, al);
 
 	al->cpu = sample->cpu;
 	al->sym = NULL;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 0f79474..bfa60bc 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -750,6 +750,10 @@
 			     union perf_event *event,
 			     struct perf_sample *sample,
 			     struct machine *machine);
+int perf_tool__process_synth_event(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct machine *machine,
+				   perf_event__handler_t process);
 int perf_event__process(struct perf_tool *tool,
 			union perf_event *event,
 			struct perf_sample *sample,
@@ -796,6 +800,10 @@
 				       bool mmap_data,
 				       unsigned int proc_map_timeout);
 
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
+				       perf_event__handler_t process,
+				       struct machine *machine);
+
 size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index a59281d..e7a4b31 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1795,3 +1795,18 @@
 
 	return true;
 }
+
+/*
+ * Events in data file are not collect in groups, but we still want
+ * the group display. Set the artificial group and set the leader's
+ * forced_leader flag to notify the display code.
+ */
+void perf_evlist__force_leader(struct perf_evlist *evlist)
+{
+	if (!evlist->nr_groups) {
+		struct perf_evsel *leader = perf_evlist__first(evlist);
+
+		perf_evlist__set_leader(evlist);
+		leader->forced_leader = true;
+	}
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 6c41b2f..dc66436 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -309,4 +309,7 @@
 					    union perf_event *event);
 
 bool perf_evlist__exclude_kernel(struct perf_evlist *evlist);
+
+void perf_evlist__force_leader(struct perf_evlist *evlist);
+
 #endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4cd2cf9..150db5ed 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2862,7 +2862,7 @@
 			return scnprintf(msg, size,
 					 "Not enough memory to setup event with callchain.\n"
 					 "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
-					 "Hint: Current value: %d", sysctl_perf_event_max_stack);
+					 "Hint: Current value: %d", sysctl__max_stack());
 		break;
 	case ENODEV:
 		if (target->cpu_list)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 92ec009..b13f5f2 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -127,6 +127,7 @@
 	bool			precise_max;
 	bool			ignore_missing_thread;
 	bool			forced_leader;
+	bool			use_uncore_alias;
 	/* parse modifier helper */
 	int			exclude_GH;
 	int			nr_members;
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index c540d47..aafbe54 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -114,7 +114,7 @@
 
 	fd = open("/dev/urandom", O_RDONLY);
 	if (fd == -1)
-		err(1, "cannot access /dev/urandom for builid");
+		err(1, "cannot access /dev/urandom for buildid");
 
 	sret = read(fd, note->build_id, sz);
 
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 72db274..7f0c83b 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -335,8 +335,7 @@
 	if (!thread)
 		return -1;
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
-	if (!al.map || !al.map->dso)
+	if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
 		goto out_put;
 
 	len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf,
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h
index e23578c..2669c9f 100644
--- a/tools/perf/util/intel-pt-decoder/insn.h
+++ b/tools/perf/util/intel-pt-decoder/insn.h
@@ -208,4 +208,22 @@
 	return insn_offset_displacement(insn) + insn->displacement.nbytes;
 }
 
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+	return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+		(insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+		 X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
 #endif /* _ASM_X86_INSN_H */
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 0effaff..492986a 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -442,8 +442,7 @@
 	}
 
 	while (1) {
-		thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
-		if (!al.map || !al.map->dso)
+		if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso)
 			return -EINVAL;
 
 		if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
@@ -596,8 +595,7 @@
 	if (!thread)
 		return -EINVAL;
 
-	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
-	if (!al.map || !al.map->dso)
+	if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
 		return -EINVAL;
 
 	offset = al.map->map_ip(al.map, ip);
@@ -1565,7 +1563,7 @@
 	if (map__load(map))
 		return 0;
 
-	start = dso__first_symbol(map->dso, MAP__FUNCTION);
+	start = dso__first_symbol(map->dso);
 
 	for (sym = start; sym; sym = dso__next_symbol(sym)) {
 		if (sym->binding == STB_GLOBAL &&
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 1cca0a2..976e658 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -14,11 +14,12 @@
 #include "config.h"
 #include "util.h"
 #include <sys/wait.h>
+#include <subcmd/exec-cmd.h>
 
 #define CLANG_BPF_CMD_DEFAULT_TEMPLATE				\
 		"$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
 		"-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE "	\
-		"$CLANG_OPTIONS $KERNEL_INC_OPTIONS "		\
+		"$CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS " \
 		"-Wno-unused-value -Wno-pointer-sign "		\
 		"-working-directory $WORKING_DIR "		\
 		"-c \"$CLANG_SOURCE\" -target bpf -O2 -o -"
@@ -212,7 +213,7 @@
 "     \t\thttp://llvm.org/apt\n\n"
 "     \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n"
 "     \toption in [llvm] section of ~/.perfconfig to:\n\n"
-"     \t  \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \\\n"
+"     \t  \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n"
 "     \t     -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n"
 "     \t     -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n"
 "     \t(Replace /path/to/llc with path to your llc)\n\n"
@@ -431,9 +432,11 @@
 	const char *clang_opt = llvm_param.clang_opt;
 	char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64];
 	char serr[STRERR_BUFSIZE];
-	char *kbuild_dir = NULL, *kbuild_include_opts = NULL;
+	char *kbuild_dir = NULL, *kbuild_include_opts = NULL,
+	     *perf_bpf_include_opts = NULL;
 	const char *template = llvm_param.clang_bpf_cmd_template;
-	char *command_echo, *command_out;
+	char *command_echo = NULL, *command_out;
+	char *perf_include_dir = system_path(PERF_INCLUDE_DIR);
 
 	if (path[0] != '-' && realpath(path, abspath) == NULL) {
 		err = errno;
@@ -471,12 +474,14 @@
 
 	snprintf(linux_version_code_str, sizeof(linux_version_code_str),
 		 "0x%x", kernel_version);
-
+	if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0)
+		goto errout;
 	force_set_env("NR_CPUS", nr_cpus_avail_str);
 	force_set_env("LINUX_VERSION_CODE", linux_version_code_str);
 	force_set_env("CLANG_EXEC", clang_path);
 	force_set_env("CLANG_OPTIONS", clang_opt);
 	force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts);
+	force_set_env("PERF_BPF_INC_OPTIONS", perf_bpf_include_opts);
 	force_set_env("WORKING_DIR", kbuild_dir ? : ".");
 
 	/*
@@ -512,6 +517,8 @@
 	free(command_out);
 	free(kbuild_dir);
 	free(kbuild_include_opts);
+	free(perf_bpf_include_opts);
+	free(perf_include_dir);
 
 	if (!p_obj_buf)
 		free(obj_buf);
@@ -526,6 +533,8 @@
 	free(kbuild_dir);
 	free(kbuild_include_opts);
 	free(obj_buf);
+	free(perf_bpf_include_opts);
+	free(perf_include_dir);
 	if (p_obj_buf)
 		*p_obj_buf = NULL;
 	if (p_obj_buf_sz)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 32d5049..e7b4a8b 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -24,6 +24,7 @@
 
 #include "sane_ctype.h"
 #include <symbol/kallsyms.h>
+#include <linux/mman.h>
 
 static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
 
@@ -81,8 +82,7 @@
 	machine->kptr_restrict_warned = false;
 	machine->comm_exec = false;
 	machine->kernel_start = 0;
-
-	memset(machine->vmlinux_maps, 0, sizeof(machine->vmlinux_maps));
+	machine->vmlinux_map = NULL;
 
 	machine->root_dir = strdup(root_dir);
 	if (machine->root_dir == NULL)
@@ -137,13 +137,11 @@
 	struct machine *machine = machine__new_host();
 	/*
 	 * FIXME:
-	 * 1) MAP__FUNCTION will go away when we stop loading separate maps for
-	 *    functions and data objects.
-	 * 2) We should switch to machine__load_kallsyms(), i.e. not explicitely
+	 * 1) We should switch to machine__load_kallsyms(), i.e. not explicitely
 	 *    ask for not using the kcore parsing code, once this one is fixed
 	 *    to create a map per module.
 	 */
-	if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) {
+	if (machine && machine__load_kallsyms(machine, "/proc/kallsyms") <= 0) {
 		machine__delete(machine);
 		machine = NULL;
 	}
@@ -673,8 +671,7 @@
 	if (kmod_path__parse_name(&m, filename))
 		return NULL;
 
-	map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION,
-				       m.name);
+	map = map_groups__find_by_name(&machine->kmaps, m.name);
 	if (map) {
 		/*
 		 * If the map's dso is an offline module, give dso__load()
@@ -689,7 +686,7 @@
 	if (dso == NULL)
 		goto out;
 
-	map = map__new2(start, dso, MAP__FUNCTION);
+	map = map__new2(start, dso);
 	if (map == NULL)
 		goto out;
 
@@ -810,8 +807,8 @@
 	u64 start;
 };
 
-static void machine__get_kallsyms_filename(struct machine *machine, char *buf,
-					   size_t bufsz)
+void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+				    size_t bufsz)
 {
 	if (machine__is_default_guest(machine))
 		scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms);
@@ -854,65 +851,171 @@
 	return 0;
 }
 
+int machine__create_extra_kernel_map(struct machine *machine,
+				     struct dso *kernel,
+				     struct extra_kernel_map *xm)
+{
+	struct kmap *kmap;
+	struct map *map;
+
+	map = map__new2(xm->start, kernel);
+	if (!map)
+		return -1;
+
+	map->end   = xm->end;
+	map->pgoff = xm->pgoff;
+
+	kmap = map__kmap(map);
+
+	kmap->kmaps = &machine->kmaps;
+	strlcpy(kmap->name, xm->name, KMAP_NAME_LEN);
+
+	map_groups__insert(&machine->kmaps, map);
+
+	pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n",
+		  kmap->name, map->start, map->end);
+
+	map__put(map);
+
+	return 0;
+}
+
+static u64 find_entry_trampoline(struct dso *dso)
+{
+	/* Duplicates are removed so lookup all aliases */
+	const char *syms[] = {
+		"_entry_trampoline",
+		"__entry_trampoline_start",
+		"entry_SYSCALL_64_trampoline",
+	};
+	struct symbol *sym = dso__first_symbol(dso);
+	unsigned int i;
+
+	for (; sym; sym = dso__next_symbol(sym)) {
+		if (sym->binding != STB_GLOBAL)
+			continue;
+		for (i = 0; i < ARRAY_SIZE(syms); i++) {
+			if (!strcmp(sym->name, syms[i]))
+				return sym->start;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * These values can be used for kernels that do not have symbols for the entry
+ * trampolines in kallsyms.
+ */
+#define X86_64_CPU_ENTRY_AREA_PER_CPU	0xfffffe0000000000ULL
+#define X86_64_CPU_ENTRY_AREA_SIZE	0x2c000
+#define X86_64_ENTRY_TRAMPOLINE		0x6000
+
+/* Map x86_64 PTI entry trampolines */
+int machine__map_x86_64_entry_trampolines(struct machine *machine,
+					  struct dso *kernel)
+{
+	struct map_groups *kmaps = &machine->kmaps;
+	struct maps *maps = &kmaps->maps;
+	int nr_cpus_avail, cpu;
+	bool found = false;
+	struct map *map;
+	u64 pgoff;
+
+	/*
+	 * In the vmlinux case, pgoff is a virtual address which must now be
+	 * mapped to a vmlinux offset.
+	 */
+	for (map = maps__first(maps); map; map = map__next(map)) {
+		struct kmap *kmap = __map__kmap(map);
+		struct map *dest_map;
+
+		if (!kmap || !is_entry_trampoline(kmap->name))
+			continue;
+
+		dest_map = map_groups__find(kmaps, map->pgoff);
+		if (dest_map != map)
+			map->pgoff = dest_map->map_ip(dest_map, map->pgoff);
+		found = true;
+	}
+	if (found || machine->trampolines_mapped)
+		return 0;
+
+	pgoff = find_entry_trampoline(kernel);
+	if (!pgoff)
+		return 0;
+
+	nr_cpus_avail = machine__nr_cpus_avail(machine);
+
+	/* Add a 1 page map for each CPU's entry trampoline */
+	for (cpu = 0; cpu < nr_cpus_avail; cpu++) {
+		u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU +
+			 cpu * X86_64_CPU_ENTRY_AREA_SIZE +
+			 X86_64_ENTRY_TRAMPOLINE;
+		struct extra_kernel_map xm = {
+			.start = va,
+			.end   = va + page_size,
+			.pgoff = pgoff,
+		};
+
+		strlcpy(xm.name, ENTRY_TRAMPOLINE_NAME, KMAP_NAME_LEN);
+
+		if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0)
+			return -1;
+	}
+
+	machine->trampolines_mapped = nr_cpus_avail;
+
+	return 0;
+}
+
+int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unused,
+					     struct dso *kernel __maybe_unused)
+{
+	return 0;
+}
+
 static int
 __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
 {
-	int type;
+	struct kmap *kmap;
+	struct map *map;
 
 	/* In case of renewal the kernel map, destroy previous one */
 	machine__destroy_kernel_maps(machine);
 
-	for (type = 0; type < MAP__NR_TYPES; ++type) {
-		struct kmap *kmap;
-		struct map *map;
+	machine->vmlinux_map = map__new2(0, kernel);
+	if (machine->vmlinux_map == NULL)
+		return -1;
 
-		machine->vmlinux_maps[type] = map__new2(0, kernel, type);
-		if (machine->vmlinux_maps[type] == NULL)
-			return -1;
+	machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip;
+	map = machine__kernel_map(machine);
+	kmap = map__kmap(map);
+	if (!kmap)
+		return -1;
 
-		machine->vmlinux_maps[type]->map_ip =
-			machine->vmlinux_maps[type]->unmap_ip =
-				identity__map_ip;
-		map = __machine__kernel_map(machine, type);
-		kmap = map__kmap(map);
-		if (!kmap)
-			return -1;
-
-		kmap->kmaps = &machine->kmaps;
-		map_groups__insert(&machine->kmaps, map);
-	}
+	kmap->kmaps = &machine->kmaps;
+	map_groups__insert(&machine->kmaps, map);
 
 	return 0;
 }
 
 void machine__destroy_kernel_maps(struct machine *machine)
 {
-	int type;
+	struct kmap *kmap;
+	struct map *map = machine__kernel_map(machine);
 
-	for (type = 0; type < MAP__NR_TYPES; ++type) {
-		struct kmap *kmap;
-		struct map *map = __machine__kernel_map(machine, type);
+	if (map == NULL)
+		return;
 
-		if (map == NULL)
-			continue;
-
-		kmap = map__kmap(map);
-		map_groups__remove(&machine->kmaps, map);
-		if (kmap && kmap->ref_reloc_sym) {
-			/*
-			 * ref_reloc_sym is shared among all maps, so free just
-			 * on one of them.
-			 */
-			if (type == MAP__FUNCTION) {
-				zfree((char **)&kmap->ref_reloc_sym->name);
-				zfree(&kmap->ref_reloc_sym);
-			} else
-				kmap->ref_reloc_sym = NULL;
-		}
-
-		map__put(machine->vmlinux_maps[type]);
-		machine->vmlinux_maps[type] = NULL;
+	kmap = map__kmap(map);
+	map_groups__remove(&machine->kmaps, map);
+	if (kmap && kmap->ref_reloc_sym) {
+		zfree((char **)&kmap->ref_reloc_sym->name);
+		zfree(&kmap->ref_reloc_sym);
 	}
+
+	map__zput(machine->vmlinux_map);
 }
 
 int machines__create_guest_kernel_maps(struct machines *machines)
@@ -989,32 +1092,31 @@
 	return machine__create_kernel_maps(machine);
 }
 
-int machine__load_kallsyms(struct machine *machine, const char *filename,
-			     enum map_type type)
+int machine__load_kallsyms(struct machine *machine, const char *filename)
 {
 	struct map *map = machine__kernel_map(machine);
 	int ret = __dso__load_kallsyms(map->dso, filename, map, true);
 
 	if (ret > 0) {
-		dso__set_loaded(map->dso, type);
+		dso__set_loaded(map->dso);
 		/*
 		 * Since /proc/kallsyms will have multiple sessions for the
 		 * kernel, with modules between them, fixup the end of all
 		 * sections.
 		 */
-		__map_groups__fixup_end(&machine->kmaps, type);
+		map_groups__fixup_end(&machine->kmaps);
 	}
 
 	return ret;
 }
 
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
+int machine__load_vmlinux_path(struct machine *machine)
 {
 	struct map *map = machine__kernel_map(machine);
 	int ret = dso__load_vmlinux_path(map->dso, map);
 
 	if (ret > 0)
-		dso__set_loaded(map->dso, type);
+		dso__set_loaded(map->dso);
 
 	return ret;
 }
@@ -1055,10 +1157,9 @@
 static int map_groups__set_module_path(struct map_groups *mg, const char *path,
 				       struct kmod_path *m)
 {
-	struct map *map;
 	char *long_name;
+	struct map *map = map_groups__find_by_name(mg, m->name);
 
-	map = map_groups__find_by_name(mg, MAP__FUNCTION, m->name);
 	if (map == NULL)
 		return 0;
 
@@ -1207,19 +1308,14 @@
 static void machine__set_kernel_mmap(struct machine *machine,
 				     u64 start, u64 end)
 {
-	int i;
-
-	for (i = 0; i < MAP__NR_TYPES; i++) {
-		machine->vmlinux_maps[i]->start = start;
-		machine->vmlinux_maps[i]->end   = end;
-
-		/*
-		 * Be a bit paranoid here, some perf.data file came with
-		 * a zero sized synthesized MMAP event for the kernel.
-		 */
-		if (start == 0 && end == 0)
-			machine->vmlinux_maps[i]->end = ~0ULL;
-	}
+	machine->vmlinux_map->start = start;
+	machine->vmlinux_map->end   = end;
+	/*
+	 * Be a bit paranoid here, some perf.data file came with
+	 * a zero sized synthesized MMAP event for the kernel.
+	 */
+	if (start == 0 && end == 0)
+		machine->vmlinux_map->end = ~0ULL;
 }
 
 int machine__create_kernel_maps(struct machine *machine)
@@ -1234,9 +1330,8 @@
 		return -1;
 
 	ret = __machine__create_kernel_maps(machine, kernel);
-	dso__put(kernel);
 	if (ret < 0)
-		return -1;
+		goto out_put;
 
 	if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
 		if (machine__is_host(machine))
@@ -1249,9 +1344,10 @@
 
 	if (!machine__get_running_kernel_start(machine, &name, &addr)) {
 		if (name &&
-		    maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
+		    map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, addr)) {
 			machine__destroy_kernel_maps(machine);
-			return -1;
+			ret = -1;
+			goto out_put;
 		}
 
 		/* we have a real start address now, so re-order the kmaps */
@@ -1267,12 +1363,16 @@
 		map__put(map);
 	}
 
+	if (machine__create_extra_kernel_maps(machine, kernel))
+		pr_debug("Problems creating extra kernel maps, continuing anyway...\n");
+
 	/* update end address of the kernel map using adjacent module address */
 	map = map__next(machine__kernel_map(machine));
 	if (map)
 		machine__set_kernel_mmap(machine, addr, map->start);
-
-	return 0;
+out_put:
+	dso__put(kernel);
+	return ret;
 }
 
 static bool machine__uses_kcore(struct machine *machine)
@@ -1287,6 +1387,32 @@
 	return false;
 }
 
+static bool perf_event__is_extra_kernel_mmap(struct machine *machine,
+					     union perf_event *event)
+{
+	return machine__is(machine, "x86_64") &&
+	       is_entry_trampoline(event->mmap.filename);
+}
+
+static int machine__process_extra_kernel_map(struct machine *machine,
+					     union perf_event *event)
+{
+	struct map *kernel_map = machine__kernel_map(machine);
+	struct dso *kernel = kernel_map ? kernel_map->dso : NULL;
+	struct extra_kernel_map xm = {
+		.start = event->mmap.start,
+		.end   = event->mmap.start + event->mmap.len,
+		.pgoff = event->mmap.pgoff,
+	};
+
+	if (kernel == NULL)
+		return -1;
+
+	strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);
+
+	return machine__create_extra_kernel_map(machine, kernel, &xm);
+}
+
 static int machine__process_kernel_mmap_event(struct machine *machine,
 					      union perf_event *event)
 {
@@ -1379,9 +1505,9 @@
 		 * time /proc/sys/kernel/kptr_restrict was non zero.
 		 */
 		if (event->mmap.pgoff != 0) {
-			maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
-							 symbol_name,
-							 event->mmap.pgoff);
+			map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map,
+							symbol_name,
+							event->mmap.pgoff);
 		}
 
 		if (machine__is_default_guest(machine)) {
@@ -1390,6 +1516,8 @@
 			 */
 			dso__load(kernel, machine__kernel_map(machine));
 		}
+	} else if (perf_event__is_extra_kernel_mmap(machine, event)) {
+		return machine__process_extra_kernel_map(machine, event);
 	}
 	return 0;
 out_problem:
@@ -1402,7 +1530,6 @@
 {
 	struct thread *thread;
 	struct map *map;
-	enum map_type type;
 	int ret = 0;
 
 	if (dump_trace)
@@ -1421,11 +1548,6 @@
 	if (thread == NULL)
 		goto out_problem;
 
-	if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
-		type = MAP__VARIABLE;
-	else
-		type = MAP__FUNCTION;
-
 	map = map__new(machine, event->mmap2.start,
 			event->mmap2.len, event->mmap2.pgoff,
 			event->mmap2.maj,
@@ -1433,7 +1555,7 @@
 			event->mmap2.ino_generation,
 			event->mmap2.prot,
 			event->mmap2.flags,
-			event->mmap2.filename, type, thread);
+			event->mmap2.filename, thread);
 
 	if (map == NULL)
 		goto out_problem_map;
@@ -1460,7 +1582,7 @@
 {
 	struct thread *thread;
 	struct map *map;
-	enum map_type type;
+	u32 prot = 0;
 	int ret = 0;
 
 	if (dump_trace)
@@ -1479,16 +1601,14 @@
 	if (thread == NULL)
 		goto out_problem;
 
-	if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
-		type = MAP__VARIABLE;
-	else
-		type = MAP__FUNCTION;
+	if (!(event->header.misc & PERF_RECORD_MISC_MMAP_DATA))
+		prot = PROT_EXEC;
 
 	map = map__new(machine, event->mmap.start,
 			event->mmap.len, event->mmap.pgoff,
-			0, 0, 0, 0, 0, 0,
+			0, 0, 0, 0, prot, 0,
 			event->mmap.filename,
-			type, thread);
+			thread);
 
 	if (map == NULL)
 		goto out_problem_map;
@@ -1664,7 +1784,7 @@
 	 * Thus, we have to try consecutively until we find a match
 	 * or else, the symbol is unknown
 	 */
-	thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al);
+	thread__find_cpumode_addr_location(thread, ip, &al);
 
 	ams->addr = ip;
 	ams->al_addr = al.addr;
@@ -1681,15 +1801,7 @@
 
 	memset(&al, 0, sizeof(al));
 
-	thread__find_addr_location(thread, m, MAP__VARIABLE, addr, &al);
-	if (al.map == NULL) {
-		/*
-		 * some shared data regions have execute bit set which puts
-		 * their mapping in the MAP__FUNCTION type array.
-		 * Check there as a fallback option before dropping the sample.
-		 */
-		thread__find_addr_location(thread, m, MAP__FUNCTION, addr, &al);
-	}
+	thread__find_symbol(thread, m, addr, &al);
 
 	ams->addr = addr;
 	ams->al_addr = al.addr;
@@ -1758,8 +1870,7 @@
 	al.filtered = 0;
 	al.sym = NULL;
 	if (!cpumode) {
-		thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
-						   ip, &al);
+		thread__find_cpumode_addr_location(thread, ip, &al);
 	} else {
 		if (ip >= PERF_CONTEXT_MAX) {
 			switch (ip) {
@@ -1784,8 +1895,7 @@
 			}
 			return 0;
 		}
-		thread__find_addr_location(thread, *cpumode, MAP__FUNCTION,
-					   ip, &al);
+		thread__find_symbol(thread, *cpumode, ip, &al);
 	}
 
 	if (al.sym != NULL) {
@@ -1810,7 +1920,7 @@
 	}
 
 	srcline = callchain_srcline(al.map, al.sym, al.addr);
-	return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
+	return callchain_cursor_append(cursor, ip, al.map, al.sym,
 				       branch, flags, nr_loop_iter,
 				       iter_cycles, branch_from, srcline);
 }
@@ -2342,6 +2452,20 @@
 	return 0;
 }
 
+/*
+ * Compares the raw arch string. N.B. see instead perf_env__arch() if a
+ * normalized arch is needed.
+ */
+bool machine__is(struct machine *machine, const char *arch)
+{
+	return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
+}
+
+int machine__nr_cpus_avail(struct machine *machine)
+{
+	return machine ? perf_env__nr_cpus_avail(machine->env) : 0;
+}
+
 int machine__get_kernel_start(struct machine *machine)
 {
 	struct map *map = machine__kernel_map(machine);
@@ -2358,7 +2482,12 @@
 	machine->kernel_start = 1ULL << 63;
 	if (map) {
 		err = map__load(map);
-		if (!err)
+		/*
+		 * On x86_64, PTI entry trampolines are less than the
+		 * start of kernel text, but still above 2^63. So leave
+		 * kernel_start = 1ULL << 63 for x86_64.
+		 */
+		if (!err && !machine__is(machine, "x86_64"))
 			machine->kernel_start = map->start;
 	}
 	return err;
@@ -2373,7 +2502,7 @@
 {
 	struct machine *machine = vmachine;
 	struct map *map;
-	struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map);
+	struct symbol *sym = machine__find_kernel_symbol(machine, *addrp, &map);
 
 	if (sym == NULL)
 		return NULL;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 66cc200..1de7660 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -49,13 +49,14 @@
 	struct perf_env   *env;
 	struct dsos	  dsos;
 	struct map_groups kmaps;
-	struct map	  *vmlinux_maps[MAP__NR_TYPES];
+	struct map	  *vmlinux_map;
 	u64		  kernel_start;
 	pid_t		  *current_tid;
 	union { /* Tool specific area */
 		void	  *priv;
 		u64	  db_id;
 	};
+	bool		  trampolines_mapped;
 };
 
 static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
@@ -64,16 +65,22 @@
 	return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
 }
 
-static inline
-struct map *__machine__kernel_map(struct machine *machine, enum map_type type)
-{
-	return machine->vmlinux_maps[type];
-}
-
+/*
+ * The main kernel (vmlinux) map
+ */
 static inline
 struct map *machine__kernel_map(struct machine *machine)
 {
-	return __machine__kernel_map(machine, MAP__FUNCTION);
+	return machine->vmlinux_map;
+}
+
+/*
+ * kernel (the one returned by machine__kernel_map()) plus kernel modules maps
+ */
+static inline
+struct maps *machine__kernel_maps(struct machine *machine)
+{
+	return &machine->kmaps.maps;
 }
 
 int machine__get_kernel_start(struct machine *machine);
@@ -182,6 +189,9 @@
 	return machine ? machine->pid == HOST_KERNEL_ID : false;
 }
 
+bool machine__is(struct machine *machine, const char *arch);
+int machine__nr_cpus_avail(struct machine *machine);
+
 struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
 struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
 
@@ -190,44 +200,27 @@
 size_t machine__fprintf(struct machine *machine, FILE *fp);
 
 static inline
-struct symbol *machine__find_kernel_symbol(struct machine *machine,
-					   enum map_type type, u64 addr,
+struct symbol *machine__find_kernel_symbol(struct machine *machine, u64 addr,
 					   struct map **mapp)
 {
-	return map_groups__find_symbol(&machine->kmaps, type, addr, mapp);
+	return map_groups__find_symbol(&machine->kmaps, addr, mapp);
 }
 
 static inline
 struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
-						   enum map_type type, const char *name,
+						   const char *name,
 						   struct map **mapp)
 {
-	return map_groups__find_symbol_by_name(&machine->kmaps, type, name, mapp);
-}
-
-static inline
-struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
-					     struct map **mapp)
-{
-	return machine__find_kernel_symbol(machine, MAP__FUNCTION, addr,
-					   mapp);
-}
-
-static inline
-struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
-						     const char *name,
-						     struct map **mapp)
-{
-	return map_groups__find_function_by_name(&machine->kmaps, name, mapp);
+	return map_groups__find_symbol_by_name(&machine->kmaps, name, mapp);
 }
 
 struct map *machine__findnew_module_map(struct machine *machine, u64 start,
 					const char *filename);
 int arch__fix_module_text_start(u64 *start, const char *name);
 
-int machine__load_kallsyms(struct machine *machine, const char *filename,
-			   enum map_type type);
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type);
+int machine__load_kallsyms(struct machine *machine, const char *filename);
+
+int machine__load_vmlinux_path(struct machine *machine);
 
 size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
 				     bool (skip)(struct dso *dso, int parm), int parm);
@@ -276,4 +269,25 @@
  */
 char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp);
 
+void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+				    size_t bufsz);
+
+int machine__create_extra_kernel_maps(struct machine *machine,
+				      struct dso *kernel);
+
+/* Kernel-space maps for symbols that are outside the main kernel map and module maps */
+struct extra_kernel_map {
+	u64 start;
+	u64 end;
+	u64 pgoff;
+	char name[KMAP_NAME_LEN];
+};
+
+int machine__create_extra_kernel_map(struct machine *machine,
+				     struct dso *kernel,
+				     struct extra_kernel_map *xm);
+
+int machine__map_x86_64_entry_trampolines(struct machine *machine,
+					  struct dso *kernel);
+
 #endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 8fe5703..6ae97ed 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -22,11 +22,6 @@
 
 static void __maps__insert(struct maps *maps, struct map *map);
 
-const char *map_type__name[MAP__NR_TYPES] = {
-	[MAP__FUNCTION] = "Functions",
-	[MAP__VARIABLE] = "Variables",
-};
-
 static inline int is_anon_memory(const char *filename, u32 flags)
 {
 	return flags & MAP_HUGETLB ||
@@ -129,10 +124,8 @@
 	return false;
 }
 
-void map__init(struct map *map, enum map_type type,
-	       u64 start, u64 end, u64 pgoff, struct dso *dso)
+void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso)
 {
-	map->type     = type;
 	map->start    = start;
 	map->end      = end;
 	map->pgoff    = pgoff;
@@ -149,7 +142,7 @@
 struct map *map__new(struct machine *machine, u64 start, u64 len,
 		     u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
 		     u64 ino_gen, u32 prot, u32 flags, char *filename,
-		     enum map_type type, struct thread *thread)
+		     struct thread *thread)
 {
 	struct map *map = malloc(sizeof(*map));
 	struct nsinfo *nsi = NULL;
@@ -173,7 +166,7 @@
 		map->flags = flags;
 		nsi = nsinfo__get(thread->nsinfo);
 
-		if ((anon || no_dso) && nsi && type == MAP__FUNCTION) {
+		if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
 			snprintf(newfilename, sizeof(newfilename),
 				 "/tmp/perf-%d.map", nsi->pid);
 			filename = newfilename;
@@ -203,7 +196,7 @@
 		if (dso == NULL)
 			goto out_delete;
 
-		map__init(map, type, start, start + len, pgoff, dso);
+		map__init(map, start, start + len, pgoff, dso);
 
 		if (anon || no_dso) {
 			map->map_ip = map->unmap_ip = identity__map_ip;
@@ -213,8 +206,8 @@
 			 * functions still return NULL, and we avoid the
 			 * unnecessary map__load warning.
 			 */
-			if (type != MAP__FUNCTION)
-				dso__set_loaded(dso, map->type);
+			if (!(prot & PROT_EXEC))
+				dso__set_loaded(dso);
 		}
 		dso->nsinfo = nsi;
 		dso__put(dso);
@@ -231,7 +224,7 @@
  * they are loaded) and for vmlinux, where only after we load all the
  * symbols we'll know where it starts and ends.
  */
-struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
+struct map *map__new2(u64 start, struct dso *dso)
 {
 	struct map *map = calloc(1, (sizeof(*map) +
 				     (dso->kernel ? sizeof(struct kmap) : 0)));
@@ -239,7 +232,7 @@
 		/*
 		 * ->end will be filled after we load all the symbols
 		 */
-		map__init(map, type, start, 0, 0, dso);
+		map__init(map, start, 0, 0, dso);
 	}
 
 	return map;
@@ -256,7 +249,19 @@
  */
 bool __map__is_kernel(const struct map *map)
 {
-	return __machine__kernel_map(map->groups->machine, map->type) == map;
+	return machine__kernel_map(map->groups->machine) == map;
+}
+
+bool __map__is_extra_kernel_map(const struct map *map)
+{
+	struct kmap *kmap = __map__kmap((struct map *)map);
+
+	return kmap && kmap->name[0];
+}
+
+bool map__has_symbols(const struct map *map)
+{
+	return dso__has_symbols(map->dso);
 }
 
 static void map__exit(struct map *map)
@@ -279,7 +284,7 @@
 
 void map__fixup_start(struct map *map)
 {
-	struct rb_root *symbols = &map->dso->symbols[map->type];
+	struct rb_root *symbols = &map->dso->symbols;
 	struct rb_node *nd = rb_first(symbols);
 	if (nd != NULL) {
 		struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
@@ -289,7 +294,7 @@
 
 void map__fixup_end(struct map *map)
 {
-	struct rb_root *symbols = &map->dso->symbols[map->type];
+	struct rb_root *symbols = &map->dso->symbols;
 	struct rb_node *nd = rb_last(symbols);
 	if (nd != NULL) {
 		struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
@@ -304,7 +309,7 @@
 	const char *name = map->dso->long_name;
 	int nr;
 
-	if (dso__loaded(map->dso, map->type))
+	if (dso__loaded(map->dso))
 		return 0;
 
 	nr = dso__load(map->dso, map);
@@ -348,7 +353,7 @@
 	if (map__load(map) < 0)
 		return NULL;
 
-	return dso__find_symbol(map->dso, map->type, addr);
+	return dso__find_symbol(map->dso, addr);
 }
 
 struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
@@ -356,10 +361,10 @@
 	if (map__load(map) < 0)
 		return NULL;
 
-	if (!dso__sorted_by_name(map->dso, map->type))
-		dso__sort_by_name(map->dso, map->type);
+	if (!dso__sorted_by_name(map->dso))
+		dso__sort_by_name(map->dso);
 
-	return dso__find_symbol_by_name(map->dso, map->type, name);
+	return dso__find_symbol_by_name(map->dso, name);
 }
 
 struct map *map__clone(struct map *from)
@@ -494,10 +499,7 @@
 
 void map_groups__init(struct map_groups *mg, struct machine *machine)
 {
-	int i;
-	for (i = 0; i < MAP__NR_TYPES; ++i) {
-		maps__init(&mg->maps[i]);
-	}
+	maps__init(&mg->maps);
 	mg->machine = machine;
 	refcount_set(&mg->refcnt, 1);
 }
@@ -525,22 +527,12 @@
 
 void map_groups__exit(struct map_groups *mg)
 {
-	int i;
-
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		maps__exit(&mg->maps[i]);
+	maps__exit(&mg->maps);
 }
 
 bool map_groups__empty(struct map_groups *mg)
 {
-	int i;
-
-	for (i = 0; i < MAP__NR_TYPES; ++i) {
-		if (maps__first(&mg->maps[i]))
-			return false;
-	}
-
-	return true;
+	return !maps__first(&mg->maps);
 }
 
 struct map_groups *map_groups__new(struct machine *machine)
@@ -566,10 +558,9 @@
 }
 
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
-				       enum map_type type, u64 addr,
-				       struct map **mapp)
+				       u64 addr, struct map **mapp)
 {
-	struct map *map = map_groups__find(mg, type, addr);
+	struct map *map = map_groups__find(mg, addr);
 
 	/* Ensure map is loaded before using map->map_ip */
 	if (map != NULL && map__load(map) >= 0) {
@@ -608,13 +599,10 @@
 }
 
 struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
-					       enum map_type type,
 					       const char *name,
 					       struct map **mapp)
 {
-	struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp);
-
-	return sym;
+	return maps__find_symbol_by_name(&mg->maps, name, mapp);
 }
 
 int map_groups__find_ams(struct addr_map_symbol *ams)
@@ -622,8 +610,7 @@
 	if (ams->addr < ams->map->start || ams->addr >= ams->map->end) {
 		if (ams->map->groups == NULL)
 			return -1;
-		ams->map = map_groups__find(ams->map->groups, ams->map->type,
-					    ams->addr);
+		ams->map = map_groups__find(ams->map->groups, ams->addr);
 		if (ams->map == NULL)
 			return -1;
 	}
@@ -646,7 +633,7 @@
 		printed += fprintf(fp, "Map:");
 		printed += map__fprintf(pos, fp);
 		if (verbose > 2) {
-			printed += dso__fprintf(pos->dso, pos->type, fp);
+			printed += dso__fprintf(pos->dso, fp);
 			printed += fprintf(fp, "--\n");
 		}
 	}
@@ -656,24 +643,14 @@
 	return printed;
 }
 
-size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
-				  FILE *fp)
-{
-	size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
-	return printed += maps__fprintf(&mg->maps[type], fp);
-}
-
 size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
 {
-	size_t printed = 0, i;
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		printed += __map_groups__fprintf_maps(mg, i, fp);
-	return printed;
+	return maps__fprintf(&mg->maps, fp);
 }
 
 static void __map_groups__insert(struct map_groups *mg, struct map *map)
 {
-	__maps__insert(&mg->maps[map->type], map);
+	__maps__insert(&mg->maps, map);
 	map->groups = mg;
 }
 
@@ -758,19 +735,18 @@
 int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 				   FILE *fp)
 {
-	return maps__fixup_overlappings(&mg->maps[map->type], map, fp);
+	return maps__fixup_overlappings(&mg->maps, map, fp);
 }
 
 /*
  * XXX This should not really _copy_ te maps, but refcount them.
  */
-int map_groups__clone(struct thread *thread,
-		      struct map_groups *parent, enum map_type type)
+int map_groups__clone(struct thread *thread, struct map_groups *parent)
 {
 	struct map_groups *mg = thread->mg;
 	int err = -ENOMEM;
 	struct map *map;
-	struct maps *maps = &parent->maps[type];
+	struct maps *maps = &parent->maps;
 
 	down_read(&maps->lock);
 
@@ -877,13 +853,20 @@
 	return NULL;
 }
 
+struct kmap *__map__kmap(struct map *map)
+{
+	if (!map->dso || !map->dso->kernel)
+		return NULL;
+	return (struct kmap *)(map + 1);
+}
+
 struct kmap *map__kmap(struct map *map)
 {
-	if (!map->dso || !map->dso->kernel) {
+	struct kmap *kmap = __map__kmap(map);
+
+	if (!kmap)
 		pr_err("Internal error: map__kmap with a non-kernel map\n");
-		return NULL;
-	}
-	return (struct kmap *)(map + 1);
+	return kmap;
 }
 
 struct map_groups *map__kmaps(struct map *map)
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 0e9bbe0..97e2a06 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -8,19 +8,11 @@
 #include <linux/rbtree.h>
 #include <pthread.h>
 #include <stdio.h>
+#include <string.h>
 #include <stdbool.h>
 #include <linux/types.h>
 #include "rwsem.h"
 
-enum map_type {
-	MAP__FUNCTION = 0,
-	MAP__VARIABLE,
-};
-
-#define MAP__NR_TYPES (MAP__VARIABLE + 1)
-
-extern const char *map_type__name[MAP__NR_TYPES];
-
 struct dso;
 struct ip_callchain;
 struct ref_reloc_sym;
@@ -35,7 +27,6 @@
 	};
 	u64			start;
 	u64			end;
-	u8 /* enum map_type */	type;
 	bool			erange_warned;
 	u32			priv;
 	u32			prot;
@@ -56,9 +47,12 @@
 	refcount_t		refcnt;
 };
 
+#define KMAP_NAME_LEN 256
+
 struct kmap {
 	struct ref_reloc_sym	*ref_reloc_sym;
 	struct map_groups	*kmaps;
+	char			name[KMAP_NAME_LEN];
 };
 
 struct maps {
@@ -67,7 +61,7 @@
 };
 
 struct map_groups {
-	struct maps	 maps[MAP__NR_TYPES];
+	struct maps	 maps;
 	struct machine	 *machine;
 	refcount_t	 refcnt;
 };
@@ -85,6 +79,7 @@
 
 void map_groups__put(struct map_groups *mg);
 
+struct kmap *__map__kmap(struct map *map);
 struct kmap *map__kmap(struct map *map);
 struct map_groups *map__kmaps(struct map *map);
 
@@ -125,7 +120,7 @@
  * Note: caller must ensure map->dso is not NULL (map is loaded).
  */
 #define map__for_each_symbol(map, pos, n)	\
-	dso__for_each_symbol(map->dso, pos, n, map->type)
+	dso__for_each_symbol(map->dso, pos, n)
 
 /* map__for_each_symbol_with_name - iterate over the symbols in the given map
  *                                  that have the given name
@@ -144,13 +139,13 @@
 #define map__for_each_symbol_by_name(map, sym_name, pos)		\
 	__map__for_each_symbol_by_name(map, sym_name, (pos))
 
-void map__init(struct map *map, enum map_type type,
+void map__init(struct map *map,
 	       u64 start, u64 end, u64 pgoff, struct dso *dso);
 struct map *map__new(struct machine *machine, u64 start, u64 len,
 		     u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
 		     u64 ino_gen, u32 prot, u32 flags,
-		     char *filename, enum map_type type, struct thread *thread);
-struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
+		     char *filename, struct thread *thread);
+struct map *map__new2(u64 start, struct dso *dso);
 void map__delete(struct map *map);
 struct map *map__clone(struct map *map);
 
@@ -185,8 +180,6 @@
 
 void map__reloc_vmlinux(struct map *map);
 
-size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
-				  FILE *fp);
 void maps__insert(struct maps *maps, struct map *map);
 void maps__remove(struct maps *maps, struct map *map);
 struct map *maps__find(struct maps *maps, u64 addr);
@@ -197,34 +190,29 @@
 void map_groups__init(struct map_groups *mg, struct machine *machine);
 void map_groups__exit(struct map_groups *mg);
 int map_groups__clone(struct thread *thread,
-		      struct map_groups *parent, enum map_type type);
+		      struct map_groups *parent);
 size_t map_groups__fprintf(struct map_groups *mg, FILE *fp);
 
-int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name,
-				     u64 addr);
+int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
+				    u64 addr);
 
 static inline void map_groups__insert(struct map_groups *mg, struct map *map)
 {
-	maps__insert(&mg->maps[map->type], map);
+	maps__insert(&mg->maps, map);
 	map->groups = mg;
 }
 
 static inline void map_groups__remove(struct map_groups *mg, struct map *map)
 {
-	maps__remove(&mg->maps[map->type], map);
+	maps__remove(&mg->maps, map);
 }
 
-static inline struct map *map_groups__find(struct map_groups *mg,
-					   enum map_type type, u64 addr)
+static inline struct map *map_groups__find(struct map_groups *mg, u64 addr)
 {
-	return maps__find(&mg->maps[type], addr);
+	return maps__find(&mg->maps, addr);
 }
 
-static inline struct map *map_groups__first(struct map_groups *mg,
-					    enum map_type type)
-{
-	return maps__first(&mg->maps[type]);
-}
+struct map *map_groups__first(struct map_groups *mg);
 
 static inline struct map *map_groups__next(struct map *map)
 {
@@ -232,11 +220,9 @@
 }
 
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
-				       enum map_type type, u64 addr,
-				       struct map **mapp);
+				       u64 addr, struct map **mapp);
 
 struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
-					       enum map_type type,
 					       const char *name,
 					       struct map **mapp);
 
@@ -244,24 +230,26 @@
 
 int map_groups__find_ams(struct addr_map_symbol *ams);
 
-static inline
-struct symbol *map_groups__find_function_by_name(struct map_groups *mg,
-						 const char *name, struct map **mapp)
-{
-	return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp);
-}
-
 int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 				   FILE *fp);
 
-struct map *map_groups__find_by_name(struct map_groups *mg,
-				     enum map_type type, const char *name);
+struct map *map_groups__find_by_name(struct map_groups *mg, const char *name);
 
 bool __map__is_kernel(const struct map *map);
+bool __map__is_extra_kernel_map(const struct map *map);
 
 static inline bool __map__is_kmodule(const struct map *map)
 {
-	return !__map__is_kernel(map);
+	return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map);
+}
+
+bool map__has_symbols(const struct map *map);
+
+#define ENTRY_TRAMPOLINE_NAME "__entry_SYSCALL_64_trampoline"
+
+static inline bool is_entry_trampoline(const char *name)
+{
+	return !strcmp(name, ENTRY_TRAMPOLINE_NAME);
 }
 
 #endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index b8b8a95..15eec49 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -156,13 +156,12 @@
 		    (strcmp(sys_dirent->d_name, ".")) &&	\
 		    (strcmp(sys_dirent->d_name, "..")))
 
-static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
+static int tp_event_has_id(const char *dir_path, struct dirent *evt_dir)
 {
 	char evt_path[MAXPATHLEN];
 	int fd;
 
-	snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path,
-			sys_dir->d_name, evt_dir->d_name);
+	snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, evt_dir->d_name);
 	fd = open(evt_path, O_RDONLY);
 	if (fd < 0)
 		return -EINVAL;
@@ -171,12 +170,12 @@
 	return 0;
 }
 
-#define for_each_event(sys_dirent, evt_dir, evt_dirent)		\
+#define for_each_event(dir_path, evt_dir, evt_dirent)		\
 	while ((evt_dirent = readdir(evt_dir)) != NULL)		\
 		if (evt_dirent->d_type == DT_DIR &&		\
 		    (strcmp(evt_dirent->d_name, ".")) &&	\
 		    (strcmp(evt_dirent->d_name, "..")) &&	\
-		    (!tp_event_has_id(sys_dirent, evt_dirent)))
+		    (!tp_event_has_id(dir_path, evt_dirent)))
 
 #define MAX_EVENT_LENGTH 512
 
@@ -190,21 +189,21 @@
 	int fd;
 	u64 id;
 	char evt_path[MAXPATHLEN];
-	char dir_path[MAXPATHLEN];
+	char *dir_path;
 
-	sys_dir = opendir(tracing_events_path);
+	sys_dir = tracing_events__opendir();
 	if (!sys_dir)
 		return NULL;
 
 	for_each_subsystem(sys_dir, sys_dirent) {
-
-		snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
-			 sys_dirent->d_name);
+		dir_path = get_events_file(sys_dirent->d_name);
+		if (!dir_path)
+			continue;
 		evt_dir = opendir(dir_path);
 		if (!evt_dir)
-			continue;
+			goto next;
 
-		for_each_event(sys_dirent, evt_dir, evt_dirent) {
+		for_each_event(dir_path, evt_dir, evt_dirent) {
 
 			scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
 				  evt_dirent->d_name);
@@ -218,6 +217,7 @@
 			close(fd);
 			id = atoll(id_buf);
 			if (id == config) {
+				put_events_file(dir_path);
 				closedir(evt_dir);
 				closedir(sys_dir);
 				path = zalloc(sizeof(*path));
@@ -242,6 +242,8 @@
 			}
 		}
 		closedir(evt_dir);
+next:
+		put_events_file(dir_path);
 	}
 
 	closedir(sys_dir);
@@ -512,14 +514,19 @@
 				      struct parse_events_error *err,
 				      struct list_head *head_config)
 {
-	char evt_path[MAXPATHLEN];
+	char *evt_path;
 	struct dirent *evt_ent;
 	DIR *evt_dir;
 	int ret = 0, found = 0;
 
-	snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name);
+	evt_path = get_events_file(sys_name);
+	if (!evt_path) {
+		tracepoint_error(err, errno, sys_name, evt_name);
+		return -1;
+	}
 	evt_dir = opendir(evt_path);
 	if (!evt_dir) {
+		put_events_file(evt_path);
 		tracepoint_error(err, errno, sys_name, evt_name);
 		return -1;
 	}
@@ -545,6 +552,7 @@
 		ret = -1;
 	}
 
+	put_events_file(evt_path);
 	closedir(evt_dir);
 	return ret;
 }
@@ -570,7 +578,7 @@
 	DIR *events_dir;
 	int ret = 0;
 
-	events_dir = opendir(tracing_events_path);
+	events_dir = tracing_events__opendir();
 	if (!events_dir) {
 		tracepoint_error(err, errno, sys_name, evt_name);
 		return -1;
@@ -1219,13 +1227,16 @@
 
 int parse_events_add_pmu(struct parse_events_state *parse_state,
 			 struct list_head *list, char *name,
-			 struct list_head *head_config, bool auto_merge_stats)
+			 struct list_head *head_config,
+			 bool auto_merge_stats,
+			 bool use_alias)
 {
 	struct perf_event_attr attr;
 	struct perf_pmu_info info;
 	struct perf_pmu *pmu;
 	struct perf_evsel *evsel;
 	struct parse_events_error *err = parse_state->error;
+	bool use_uncore_alias;
 	LIST_HEAD(config_terms);
 
 	pmu = perf_pmu__find(name);
@@ -1244,11 +1255,14 @@
 		memset(&attr, 0, sizeof(attr));
 	}
 
+	use_uncore_alias = (pmu->is_uncore && use_alias);
+
 	if (!head_config) {
 		attr.type = pmu->type;
 		evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
 		if (evsel) {
 			evsel->pmu_name = name;
+			evsel->use_uncore_alias = use_uncore_alias;
 			return 0;
 		} else {
 			return -ENOMEM;
@@ -1282,6 +1296,7 @@
 		evsel->metric_expr = info.metric_expr;
 		evsel->metric_name = info.metric_name;
 		evsel->pmu_name = name;
+		evsel->use_uncore_alias = use_uncore_alias;
 	}
 
 	return evsel ? 0 : -ENOMEM;
@@ -1317,7 +1332,8 @@
 				list_add_tail(&term->list, head);
 
 				if (!parse_events_add_pmu(parse_state, list,
-							  pmu->name, head, true)) {
+							  pmu->name, head,
+							  true, true)) {
 					pr_debug("%s -> %s/%s/\n", str,
 						 pmu->name, alias->str);
 					ok++;
@@ -1339,7 +1355,120 @@
 	return parse_events__modifier_event(list, event_mod, true);
 }
 
-void parse_events__set_leader(char *name, struct list_head *list)
+/*
+ * Check if the two uncore PMUs are from the same uncore block
+ * The format of the uncore PMU name is uncore_#blockname_#pmuidx
+ */
+static bool is_same_uncore_block(const char *pmu_name_a, const char *pmu_name_b)
+{
+	char *end_a, *end_b;
+
+	end_a = strrchr(pmu_name_a, '_');
+	end_b = strrchr(pmu_name_b, '_');
+
+	if (!end_a || !end_b)
+		return false;
+
+	if ((end_a - pmu_name_a) != (end_b - pmu_name_b))
+		return false;
+
+	return (strncmp(pmu_name_a, pmu_name_b, end_a - pmu_name_a) == 0);
+}
+
+static int
+parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list,
+					   struct parse_events_state *parse_state)
+{
+	struct perf_evsel *evsel, *leader;
+	uintptr_t *leaders;
+	bool is_leader = true;
+	int i, nr_pmu = 0, total_members, ret = 0;
+
+	leader = list_first_entry(list, struct perf_evsel, node);
+	evsel = list_last_entry(list, struct perf_evsel, node);
+	total_members = evsel->idx - leader->idx + 1;
+
+	leaders = calloc(total_members, sizeof(uintptr_t));
+	if (WARN_ON(!leaders))
+		return 0;
+
+	/*
+	 * Going through the whole group and doing sanity check.
+	 * All members must use alias, and be from the same uncore block.
+	 * Also, storing the leader events in an array.
+	 */
+	__evlist__for_each_entry(list, evsel) {
+
+		/* Only split the uncore group which members use alias */
+		if (!evsel->use_uncore_alias)
+			goto out;
+
+		/* The events must be from the same uncore block */
+		if (!is_same_uncore_block(leader->pmu_name, evsel->pmu_name))
+			goto out;
+
+		if (!is_leader)
+			continue;
+		/*
+		 * If the event's PMU name starts to repeat, it must be a new
+		 * event. That can be used to distinguish the leader from
+		 * other members, even they have the same event name.
+		 */
+		if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) {
+			is_leader = false;
+			continue;
+		}
+		/* The name is always alias name */
+		WARN_ON(strcmp(leader->name, evsel->name));
+
+		/* Store the leader event for each PMU */
+		leaders[nr_pmu++] = (uintptr_t) evsel;
+	}
+
+	/* only one event alias */
+	if (nr_pmu == total_members) {
+		parse_state->nr_groups--;
+		goto handled;
+	}
+
+	/*
+	 * An uncore event alias is a joint name which means the same event
+	 * runs on all PMUs of a block.
+	 * Perf doesn't support mixed events from different PMUs in the same
+	 * group. The big group has to be split into multiple small groups
+	 * which only include the events from the same PMU.
+	 *
+	 * Here the uncore event aliases must be from the same uncore block.
+	 * The number of PMUs must be same for each alias. The number of new
+	 * small groups equals to the number of PMUs.
+	 * Setting the leader event for corresponding members in each group.
+	 */
+	i = 0;
+	__evlist__for_each_entry(list, evsel) {
+		if (i >= nr_pmu)
+			i = 0;
+		evsel->leader = (struct perf_evsel *) leaders[i++];
+	}
+
+	/* The number of members and group name are same for each group */
+	for (i = 0; i < nr_pmu; i++) {
+		evsel = (struct perf_evsel *) leaders[i];
+		evsel->nr_members = total_members / nr_pmu;
+		evsel->group_name = name ? strdup(name) : NULL;
+	}
+
+	/* Take the new small groups into account */
+	parse_state->nr_groups += nr_pmu - 1;
+
+handled:
+	ret = 1;
+out:
+	free(leaders);
+	return ret;
+}
+
+void parse_events__set_leader(char *name, struct list_head *list,
+			      struct parse_events_state *parse_state)
 {
 	struct perf_evsel *leader;
 
@@ -1348,6 +1477,9 @@
 		return;
 	}
 
+	if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
+		return;
+
 	__perf_evlist__set_leader(list);
 	leader = list_entry(list->next, struct perf_evsel, node);
 	leader->group_name = name ? strdup(name) : NULL;
@@ -1968,13 +2100,13 @@
 	DIR *sys_dir, *evt_dir;
 	struct dirent *sys_dirent, *evt_dirent;
 	char evt_path[MAXPATHLEN];
-	char dir_path[MAXPATHLEN];
+	char *dir_path;
 	char **evt_list = NULL;
 	unsigned int evt_i = 0, evt_num = 0;
 	bool evt_num_known = false;
 
 restart:
-	sys_dir = opendir(tracing_events_path);
+	sys_dir = tracing_events__opendir();
 	if (!sys_dir)
 		return;
 
@@ -1989,13 +2121,14 @@
 		    !strglobmatch(sys_dirent->d_name, subsys_glob))
 			continue;
 
-		snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
-			 sys_dirent->d_name);
+		dir_path = get_events_file(sys_dirent->d_name);
+		if (!dir_path)
+			continue;
 		evt_dir = opendir(dir_path);
 		if (!evt_dir)
-			continue;
+			goto next;
 
-		for_each_event(sys_dirent, evt_dir, evt_dirent) {
+		for_each_event(dir_path, evt_dir, evt_dirent) {
 			if (event_glob != NULL &&
 			    !strglobmatch(evt_dirent->d_name, event_glob))
 				continue;
@@ -2009,11 +2142,15 @@
 				 sys_dirent->d_name, evt_dirent->d_name);
 
 			evt_list[evt_i] = strdup(evt_path);
-			if (evt_list[evt_i] == NULL)
+			if (evt_list[evt_i] == NULL) {
+				put_events_file(dir_path);
 				goto out_close_evt_dir;
+			}
 			evt_i++;
 		}
 		closedir(evt_dir);
+next:
+		put_events_file(dir_path);
 	}
 	closedir(sys_dir);
 
@@ -2061,21 +2198,21 @@
 	DIR *sys_dir, *evt_dir;
 	struct dirent *sys_dirent, *evt_dirent;
 	char evt_path[MAXPATHLEN];
-	char dir_path[MAXPATHLEN];
+	char *dir_path;
 
-	sys_dir = opendir(tracing_events_path);
+	sys_dir = tracing_events__opendir();
 	if (!sys_dir)
 		return 0;
 
 	for_each_subsystem(sys_dir, sys_dirent) {
-
-		snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path,
-			 sys_dirent->d_name);
+		dir_path = get_events_file(sys_dirent->d_name);
+		if (!dir_path)
+			continue;
 		evt_dir = opendir(dir_path);
 		if (!evt_dir)
-			continue;
+			goto next;
 
-		for_each_event(sys_dirent, evt_dir, evt_dirent) {
+		for_each_event(dir_path, evt_dir, evt_dirent) {
 			snprintf(evt_path, MAXPATHLEN, "%s:%s",
 				 sys_dirent->d_name, evt_dirent->d_name);
 			if (!strcmp(evt_path, event_string)) {
@@ -2085,6 +2222,8 @@
 			}
 		}
 		closedir(evt_dir);
+next:
+		put_events_file(dir_path);
 	}
 	closedir(sys_dir);
 	return 0;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 5015cfd..4473dac 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -167,7 +167,9 @@
 				void *ptr, char *type, u64 len);
 int parse_events_add_pmu(struct parse_events_state *parse_state,
 			 struct list_head *list, char *name,
-			 struct list_head *head_config, bool auto_merge_stats);
+			 struct list_head *head_config,
+			 bool auto_merge_stats,
+			 bool use_alias);
 
 int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 			       char *str,
@@ -178,7 +180,8 @@
 
 enum perf_pmu_event_symbol_type
 perf_pmu__parse_check(const char *name);
-void parse_events__set_leader(char *name, struct list_head *list);
+void parse_events__set_leader(char *name, struct list_head *list,
+			      struct parse_events_state *parse_state);
 void parse_events_update_lists(struct list_head *list_event,
 			       struct list_head *list_all);
 void parse_events_evlist_error(struct parse_events_state *parse_state,
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 7afeb80..e37608a 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -161,7 +161,7 @@
 	struct list_head *list = $3;
 
 	inc_group_count(list, _parse_state);
-	parse_events__set_leader($1, list);
+	parse_events__set_leader($1, list, _parse_state);
 	$$ = list;
 }
 |
@@ -170,7 +170,7 @@
 	struct list_head *list = $2;
 
 	inc_group_count(list, _parse_state);
-	parse_events__set_leader(NULL, list);
+	parse_events__set_leader(NULL, list, _parse_state);
 	$$ = list;
 }
 
@@ -232,7 +232,7 @@
 		YYABORT;
 
 	ALLOC_LIST(list);
-	if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) {
+	if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) {
 		struct perf_pmu *pmu = NULL;
 		int ok = 0;
 		char *pattern;
@@ -251,7 +251,7 @@
 					free(pattern);
 					YYABORT;
 				}
-				if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true))
+				if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false))
 					ok++;
 				parse_events_terms__delete(terms);
 			}
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index e1dbc98..3094f11 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -111,17 +111,6 @@
 	symbol__exit();
 }
 
-static struct symbol *__find_kernel_function_by_name(const char *name,
-						     struct map **mapp)
-{
-	return machine__find_kernel_function_by_name(host_machine, name, mapp);
-}
-
-static struct symbol *__find_kernel_function(u64 addr, struct map **mapp)
-{
-	return machine__find_kernel_function(host_machine, addr, mapp);
-}
-
 static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
 {
 	/* kmap->ref_reloc_sym should be set if host_machine is initialized */
@@ -149,7 +138,7 @@
 	if (reloc_sym && strcmp(name, reloc_sym->name) == 0)
 		*addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr;
 	else {
-		sym = __find_kernel_function_by_name(name, &map);
+		sym = machine__find_kernel_symbol_by_name(host_machine, name, &map);
 		if (!sym)
 			return -ENOENT;
 		*addr = map->unmap_ip(map, sym->start) -
@@ -161,8 +150,7 @@
 
 static struct map *kernel_get_module_map(const char *module)
 {
-	struct map_groups *grp = &host_machine->kmaps;
-	struct maps *maps = &grp->maps[MAP__FUNCTION];
+	struct maps *maps = machine__kernel_maps(host_machine);
 	struct map *pos;
 
 	/* A file path -- this is an offline module */
@@ -341,7 +329,7 @@
 		char module_name[128];
 
 		snprintf(module_name, sizeof(module_name), "[%s]", module);
-		map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name);
+		map = map_groups__find_by_name(&host_machine->kmaps, module_name);
 		if (map) {
 			dso = map->dso;
 			goto found;
@@ -2098,7 +2086,7 @@
 		}
 		if (addr) {
 			addr += tp->offset;
-			sym = __find_kernel_function(addr, &map);
+			sym = machine__find_kernel_symbol(host_machine, addr, &map);
 		}
 	}
 
@@ -3504,19 +3492,18 @@
 			       (target) ? : "kernel");
 		goto end;
 	}
-	if (!dso__sorted_by_name(map->dso, map->type))
-		dso__sort_by_name(map->dso, map->type);
+	if (!dso__sorted_by_name(map->dso))
+		dso__sort_by_name(map->dso);
 
 	/* Show all (filtered) symbols */
 	setup_pager();
 
-        for (nd = rb_first(&map->dso->symbol_names[map->type]); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&map->dso->symbol_names); nd; nd = rb_next(nd)) {
 		struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
 
 		if (strfilter__compare(_filter, pos->sym.name))
 			printf("%s\n", pos->sym.name);
-        }
-
+	}
 end:
 	map__put(map);
 	exit_probe_symbol_maps();
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 4ae1123..b76088f 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -84,8 +84,7 @@
 	char buf[PATH_MAX];
 	int ret;
 
-	ret = e_snprintf(buf, PATH_MAX, "%s/%s",
-			 tracing_path, trace_file);
+	ret = e_snprintf(buf, PATH_MAX, "%s/%s", tracing_path_mount(), trace_file);
 	if (ret >= 0) {
 		pr_debug("Opening %s write=%d\n", buf, readwrite);
 		if (readwrite && !probe_event_dry_run)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 10dd5fc..7f8afac 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -531,6 +531,8 @@
 			PyLong_FromUnsignedLongLong(sample->period));
 	pydict_set_item_string_decref(dict_sample, "phys_addr",
 			PyLong_FromUnsignedLongLong(sample->phys_addr));
+	pydict_set_item_string_decref(dict_sample, "addr",
+			PyLong_FromUnsignedLongLong(sample->addr));
 	set_sample_read_in_dict(dict_sample, sample, evsel);
 	pydict_set_item_string_decref(dict, "sample", dict_sample);
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f4a7a43..b998bb4 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1973,12 +1973,11 @@
 	return false;
 }
 
-int maps__set_kallsyms_ref_reloc_sym(struct map **maps,
-				     const char *symbol_name, u64 addr)
+int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
 {
 	char *bracket;
-	int i;
 	struct ref_reloc_sym *ref;
+	struct kmap *kmap;
 
 	ref = zalloc(sizeof(struct ref_reloc_sym));
 	if (ref == NULL)
@@ -1996,13 +1995,9 @@
 
 	ref->addr = addr;
 
-	for (i = 0; i < MAP__NR_TYPES; ++i) {
-		struct kmap *kmap = map__kmap(maps[i]);
-
-		if (!kmap)
-			continue;
+	kmap = map__kmap(map);
+	if (kmap)
 		kmap->ref_reloc_sym = ref;
-	}
 
 	return 0;
 }
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 26a68df..4058ade 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2,7 +2,7 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <regex.h>
-#include <sys/mman.h>
+#include <linux/mman.h>
 #include "sort.h"
 #include "hist.h"
 #include "comm.h"
@@ -282,7 +282,7 @@
 
 	ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
 	if (sym && map) {
-		if (map->type == MAP__VARIABLE) {
+		if (sym->type == STT_OBJECT) {
 			ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
 			ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
 					ip - map->unmap_ip(map, sym->start));
@@ -1211,7 +1211,7 @@
 
 		/* print [s] for shared data mmaps */
 		if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
-		     map && (map->type == MAP__VARIABLE) &&
+		     map && !(map->prot & PROT_EXEC) &&
 		    (map->flags & MAP_SHARED) &&
 		    (map->maj || map->min || map->ino ||
 		     map->ino_generation))
@@ -2582,7 +2582,7 @@
 		if (sort__mode != SORT_MODE__MEMORY)
 			return -EINVAL;
 
-		if (sd->entry == &sort_mem_dcacheline && cacheline_size == 0)
+		if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0)
 			return -EINVAL;
 
 		if (sd->entry == &sort_mem_daddr_sym)
@@ -2628,7 +2628,7 @@
 		if (*tok) {
 			ret = sort_dimension__add(list, tok, evlist, level);
 			if (ret == -EINVAL) {
-				if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok)))
+				if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok)))
 					pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
 				else
 					pr_err("Invalid --sort key: `%s'", tok);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 035b62e..9e68962 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -186,13 +186,13 @@
 static inline u64 cl_address(u64 address)
 {
 	/* return the cacheline of the address */
-	return (address & ~(cacheline_size - 1));
+	return (address & ~(cacheline_size() - 1));
 }
 
 static inline u64 cl_offset(u64 address)
 {
 	/* return the cacheline of the address */
-	return (address & (cacheline_size - 1));
+	return (address & (cacheline_size() - 1));
 }
 
 enum sort_mode {
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 3c21fd0..09d6746 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -103,6 +103,7 @@
 		inline_sym = symbol__new(base_sym ? base_sym->start : 0,
 					 base_sym ? base_sym->end : 0,
 					 base_sym ? base_sym->binding : 0,
+					 base_sym ? base_sym->type : 0,
 					 funcname);
 		if (inline_sym)
 			inline_sym->inlined = 1;
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 8f56ba4..36efb98 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -7,8 +7,7 @@
 #include "xyarray.h"
 #include "rblist.h"
 
-struct stats
-{
+struct stats {
 	double n, mean, M2;
 	u64 max, min;
 };
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 2de7705..29770ea 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -114,16 +114,9 @@
 		sym->st_shndx != SHN_ABS;
 }
 
-static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type)
+static bool elf_sym__filter(GElf_Sym *sym)
 {
-	switch (type) {
-	case MAP__FUNCTION:
-		return elf_sym__is_function(sym);
-	case MAP__VARIABLE:
-		return elf_sym__is_object(sym);
-	default:
-		return false;
-	}
+	return elf_sym__is_function(sym) || elf_sym__is_object(sym);
 }
 
 static inline const char *elf_sym__name(const GElf_Sym *sym,
@@ -150,17 +143,10 @@
 	return strstr(elf_sec__name(shdr, secstrs), "data") != NULL;
 }
 
-static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs,
-			  enum map_type type)
+static bool elf_sec__filter(GElf_Shdr *shdr, Elf_Data *secstrs)
 {
-	switch (type) {
-	case MAP__FUNCTION:
-		return elf_sec__is_text(shdr, secstrs);
-	case MAP__VARIABLE:
-		return elf_sec__is_data(shdr, secstrs);
-	default:
-		return false;
-	}
+	return elf_sec__is_text(shdr, secstrs) || 
+	       elf_sec__is_data(shdr, secstrs);
 }
 
 static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
@@ -256,7 +242,7 @@
  * And always look at the original dso, not at debuginfo packages, that
  * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
  */
-int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map)
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
 {
 	uint32_t nr_rel_entries, idx;
 	GElf_Sym sym;
@@ -364,12 +350,12 @@
 			free(demangled);
 
 			f = symbol__new(plt_offset, plt_entry_size,
-					STB_GLOBAL, sympltname);
+					STB_GLOBAL, STT_FUNC, sympltname);
 			if (!f)
 				goto out_elf_end;
 
 			plt_offset += plt_entry_size;
-			symbols__insert(&dso->symbols[map->type], f);
+			symbols__insert(&dso->symbols, f);
 			++nr;
 		}
 	} else if (shdr_rel_plt.sh_type == SHT_REL) {
@@ -390,12 +376,12 @@
 			free(demangled);
 
 			f = symbol__new(plt_offset, plt_entry_size,
-					STB_GLOBAL, sympltname);
+					STB_GLOBAL, STT_FUNC, sympltname);
 			if (!f)
 				goto out_elf_end;
 
 			plt_offset += plt_entry_size;
-			symbols__insert(&dso->symbols[map->type], f);
+			symbols__insert(&dso->symbols, f);
 			++nr;
 		}
 	}
@@ -811,6 +797,110 @@
 void __weak arch__sym_update(struct symbol *s __maybe_unused,
 		GElf_Sym *sym __maybe_unused) { }
 
+static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
+				      GElf_Sym *sym, GElf_Shdr *shdr,
+				      struct map_groups *kmaps, struct kmap *kmap,
+				      struct dso **curr_dsop, struct map **curr_mapp,
+				      const char *section_name,
+				      bool adjust_kernel_syms, bool kmodule, bool *remap_kernel)
+{
+	struct dso *curr_dso = *curr_dsop;
+	struct map *curr_map;
+	char dso_name[PATH_MAX];
+
+	/* Adjust symbol to map to file offset */
+	if (adjust_kernel_syms)
+		sym->st_value -= shdr->sh_addr - shdr->sh_offset;
+
+	if (strcmp(section_name, (curr_dso->short_name + dso->short_name_len)) == 0)
+		return 0;
+
+	if (strcmp(section_name, ".text") == 0) {
+		/*
+		 * The initial kernel mapping is based on
+		 * kallsyms and identity maps.  Overwrite it to
+		 * map to the kernel dso.
+		 */
+		if (*remap_kernel && dso->kernel) {
+			*remap_kernel = false;
+			map->start = shdr->sh_addr + ref_reloc(kmap);
+			map->end = map->start + shdr->sh_size;
+			map->pgoff = shdr->sh_offset;
+			map->map_ip = map__map_ip;
+			map->unmap_ip = map__unmap_ip;
+			/* Ensure maps are correctly ordered */
+			if (kmaps) {
+				map__get(map);
+				map_groups__remove(kmaps, map);
+				map_groups__insert(kmaps, map);
+				map__put(map);
+			}
+		}
+
+		/*
+		 * The initial module mapping is based on
+		 * /proc/modules mapped to offset zero.
+		 * Overwrite it to map to the module dso.
+		 */
+		if (*remap_kernel && kmodule) {
+			*remap_kernel = false;
+			map->pgoff = shdr->sh_offset;
+		}
+
+		*curr_mapp = map;
+		*curr_dsop = dso;
+		return 0;
+	}
+
+	if (!kmap)
+		return 0;
+
+	snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name);
+
+	curr_map = map_groups__find_by_name(kmaps, dso_name);
+	if (curr_map == NULL) {
+		u64 start = sym->st_value;
+
+		if (kmodule)
+			start += map->start + shdr->sh_offset;
+
+		curr_dso = dso__new(dso_name);
+		if (curr_dso == NULL)
+			return -1;
+		curr_dso->kernel = dso->kernel;
+		curr_dso->long_name = dso->long_name;
+		curr_dso->long_name_len = dso->long_name_len;
+		curr_map = map__new2(start, curr_dso);
+		dso__put(curr_dso);
+		if (curr_map == NULL)
+			return -1;
+
+		if (adjust_kernel_syms) {
+			curr_map->start  = shdr->sh_addr + ref_reloc(kmap);
+			curr_map->end	 = curr_map->start + shdr->sh_size;
+			curr_map->pgoff	 = shdr->sh_offset;
+		} else {
+			curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
+		}
+		curr_dso->symtab_type = dso->symtab_type;
+		map_groups__insert(kmaps, curr_map);
+		/*
+		 * Add it before we drop the referece to curr_map, i.e. while
+		 * we still are sure to have a reference to this DSO via
+		 * *curr_map->dso.
+		 */
+		dsos__add(&map->groups->machine->dsos, curr_dso);
+		/* kmaps already got it */
+		map__put(curr_map);
+		dso__set_loaded(curr_dso);
+		*curr_mapp = curr_map;
+		*curr_dsop = curr_dso;
+	} else
+		*curr_dsop = curr_map->dso;
+
+	return 0;
+}
+
 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 		  struct symsrc *runtime_ss, int kmodule)
 {
@@ -844,7 +934,7 @@
 	 * have the wrong values for the dso maps, so remove them.
 	 */
 	if (kmodule && syms_ss->symtab)
-		symbols__delete(&dso->symbols[map->type]);
+		symbols__delete(&dso->symbols);
 
 	if (!syms_ss->symtab) {
 		/*
@@ -921,10 +1011,10 @@
 
 	dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
 	/*
-	 * Initial kernel and module mappings do not map to the dso.  For
-	 * function mappings, flag the fixups.
+	 * Initial kernel and module mappings do not map to the dso.
+	 * Flag the fixups.
 	 */
-	if (map->type == MAP__FUNCTION && (dso->kernel || kmodule)) {
+	if (dso->kernel || kmodule) {
 		remap_kernel = true;
 		adjust_kernel_syms = dso->adjust_symbols;
 	}
@@ -936,7 +1026,7 @@
 		const char *section_name;
 		bool used_opd = false;
 
-		if (!is_label && !elf_sym__is_a(&sym, map->type))
+		if (!is_label && !elf_sym__filter(&sym))
 			continue;
 
 		/* Reject ARM ELF "mapping symbols": these aren't unique and
@@ -974,7 +1064,7 @@
 
 		gelf_getshdr(sec, &shdr);
 
-		if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type))
+		if (is_label && !elf_sec__filter(&shdr, secstrs))
 			continue;
 
 		section_name = elf_sec__name(&shdr, secstrs);
@@ -982,134 +1072,37 @@
 		/* On ARM, symbols for thumb functions have 1 added to
 		 * the symbol address as a flag - remove it */
 		if ((ehdr.e_machine == EM_ARM) &&
-		    (map->type == MAP__FUNCTION) &&
+		    (GELF_ST_TYPE(sym.st_info) == STT_FUNC) &&
 		    (sym.st_value & 1))
 			--sym.st_value;
 
 		if (dso->kernel || kmodule) {
-			char dso_name[PATH_MAX];
-
-			/* Adjust symbol to map to file offset */
-			if (adjust_kernel_syms)
-				sym.st_value -= shdr.sh_addr - shdr.sh_offset;
-
-			if (strcmp(section_name,
-				   (curr_dso->short_name +
-				    dso->short_name_len)) == 0)
-				goto new_symbol;
-
-			if (strcmp(section_name, ".text") == 0) {
-				/*
-				 * The initial kernel mapping is based on
-				 * kallsyms and identity maps.  Overwrite it to
-				 * map to the kernel dso.
-				 */
-				if (remap_kernel && dso->kernel) {
-					remap_kernel = false;
-					map->start = shdr.sh_addr +
-						     ref_reloc(kmap);
-					map->end = map->start + shdr.sh_size;
-					map->pgoff = shdr.sh_offset;
-					map->map_ip = map__map_ip;
-					map->unmap_ip = map__unmap_ip;
-					/* Ensure maps are correctly ordered */
-					if (kmaps) {
-						map__get(map);
-						map_groups__remove(kmaps, map);
-						map_groups__insert(kmaps, map);
-						map__put(map);
-					}
-				}
-
-				/*
-				 * The initial module mapping is based on
-				 * /proc/modules mapped to offset zero.
-				 * Overwrite it to map to the module dso.
-				 */
-				if (remap_kernel && kmodule) {
-					remap_kernel = false;
-					map->pgoff = shdr.sh_offset;
-				}
-
-				curr_map = map;
-				curr_dso = dso;
-				goto new_symbol;
-			}
-
-			if (!kmap)
-				goto new_symbol;
-
-			snprintf(dso_name, sizeof(dso_name),
-				 "%s%s", dso->short_name, section_name);
-
-			curr_map = map_groups__find_by_name(kmaps, map->type, dso_name);
-			if (curr_map == NULL) {
-				u64 start = sym.st_value;
-
-				if (kmodule)
-					start += map->start + shdr.sh_offset;
-
-				curr_dso = dso__new(dso_name);
-				if (curr_dso == NULL)
-					goto out_elf_end;
-				curr_dso->kernel = dso->kernel;
-				curr_dso->long_name = dso->long_name;
-				curr_dso->long_name_len = dso->long_name_len;
-				curr_map = map__new2(start, curr_dso,
-						     map->type);
-				dso__put(curr_dso);
-				if (curr_map == NULL) {
-					goto out_elf_end;
-				}
-				if (adjust_kernel_syms) {
-					curr_map->start = shdr.sh_addr +
-							  ref_reloc(kmap);
-					curr_map->end = curr_map->start +
-							shdr.sh_size;
-					curr_map->pgoff = shdr.sh_offset;
-				} else {
-					curr_map->map_ip = identity__map_ip;
-					curr_map->unmap_ip = identity__map_ip;
-				}
-				curr_dso->symtab_type = dso->symtab_type;
-				map_groups__insert(kmaps, curr_map);
-				/*
-				 * Add it before we drop the referece to curr_map,
-				 * i.e. while we still are sure to have a reference
-				 * to this DSO via curr_map->dso.
-				 */
-				dsos__add(&map->groups->machine->dsos, curr_dso);
-				/* kmaps already got it */
-				map__put(curr_map);
-				dso__set_loaded(curr_dso, map->type);
-			} else
-				curr_dso = curr_map->dso;
-
-			goto new_symbol;
-		}
-
-		if ((used_opd && runtime_ss->adjust_symbols)
-				|| (!used_opd && syms_ss->adjust_symbols)) {
+			if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map,
+						       section_name, adjust_kernel_syms, kmodule, &remap_kernel))
+				goto out_elf_end;
+		} else if ((used_opd && runtime_ss->adjust_symbols) ||
+			   (!used_opd && syms_ss->adjust_symbols)) {
 			pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
 				  "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
 				  (u64)sym.st_value, (u64)shdr.sh_addr,
 				  (u64)shdr.sh_offset);
 			sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 		}
-new_symbol:
+
 		demangled = demangle_sym(dso, kmodule, elf_name);
 		if (demangled != NULL)
 			elf_name = demangled;
 
 		f = symbol__new(sym.st_value, sym.st_size,
-				GELF_ST_BIND(sym.st_info), elf_name);
+				GELF_ST_BIND(sym.st_info),
+				GELF_ST_TYPE(sym.st_info), elf_name);
 		free(demangled);
 		if (!f)
 			goto out_elf_end;
 
 		arch__sym_update(f, &sym);
 
-		__symbols__insert(&curr_dso->symbols[curr_map->type], f, dso->kernel);
+		__symbols__insert(&curr_dso->symbols, f, dso->kernel);
 		nr++;
 	}
 
@@ -1117,14 +1110,14 @@
 	 * For misannotated, zeroed, ASM function sizes.
 	 */
 	if (nr > 0) {
-		symbols__fixup_end(&dso->symbols[map->type]);
-		symbols__fixup_duplicate(&dso->symbols[map->type]);
+		symbols__fixup_end(&dso->symbols);
+		symbols__fixup_duplicate(&dso->symbols);
 		if (kmap) {
 			/*
 			 * We need to fixup this here too because we create new
 			 * maps here, for things like vsyscall sections.
 			 */
-			__map_groups__fixup_end(kmaps, map->type);
+			map_groups__fixup_end(kmaps);
 		}
 	}
 	err = nr;
@@ -1393,8 +1386,16 @@
 
 struct phdr_data {
 	off_t offset;
+	off_t rel;
 	u64 addr;
 	u64 len;
+	struct list_head node;
+	struct phdr_data *remaps;
+};
+
+struct sym_data {
+	u64 addr;
+	struct list_head node;
 };
 
 struct kcore_copy_info {
@@ -1404,16 +1405,78 @@
 	u64 last_symbol;
 	u64 first_module;
 	u64 last_module_symbol;
-	struct phdr_data kernel_map;
-	struct phdr_data modules_map;
+	size_t phnum;
+	struct list_head phdrs;
+	struct list_head syms;
 };
 
+#define kcore_copy__for_each_phdr(k, p) \
+	list_for_each_entry((p), &(k)->phdrs, node)
+
+static struct phdr_data *phdr_data__new(u64 addr, u64 len, off_t offset)
+{
+	struct phdr_data *p = zalloc(sizeof(*p));
+
+	if (p) {
+		p->addr   = addr;
+		p->len    = len;
+		p->offset = offset;
+	}
+
+	return p;
+}
+
+static struct phdr_data *kcore_copy_info__addnew(struct kcore_copy_info *kci,
+						 u64 addr, u64 len,
+						 off_t offset)
+{
+	struct phdr_data *p = phdr_data__new(addr, len, offset);
+
+	if (p)
+		list_add_tail(&p->node, &kci->phdrs);
+
+	return p;
+}
+
+static void kcore_copy__free_phdrs(struct kcore_copy_info *kci)
+{
+	struct phdr_data *p, *tmp;
+
+	list_for_each_entry_safe(p, tmp, &kci->phdrs, node) {
+		list_del(&p->node);
+		free(p);
+	}
+}
+
+static struct sym_data *kcore_copy__new_sym(struct kcore_copy_info *kci,
+					    u64 addr)
+{
+	struct sym_data *s = zalloc(sizeof(*s));
+
+	if (s) {
+		s->addr = addr;
+		list_add_tail(&s->node, &kci->syms);
+	}
+
+	return s;
+}
+
+static void kcore_copy__free_syms(struct kcore_copy_info *kci)
+{
+	struct sym_data *s, *tmp;
+
+	list_for_each_entry_safe(s, tmp, &kci->syms, node) {
+		list_del(&s->node);
+		free(s);
+	}
+}
+
 static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
 					u64 start)
 {
 	struct kcore_copy_info *kci = arg;
 
-	if (!symbol_type__is_a(type, MAP__FUNCTION))
+	if (!kallsyms__is_function(type))
 		return 0;
 
 	if (strchr(name, '[')) {
@@ -1438,6 +1501,9 @@
 		return 0;
 	}
 
+	if (is_entry_trampoline(name) && !kcore_copy__new_sym(kci, start))
+		return -1;
+
 	return 0;
 }
 
@@ -1487,27 +1553,39 @@
 	return 0;
 }
 
-static void kcore_copy__map(struct phdr_data *p, u64 start, u64 end, u64 pgoff,
-			    u64 s, u64 e)
+static int kcore_copy__map(struct kcore_copy_info *kci, u64 start, u64 end,
+			   u64 pgoff, u64 s, u64 e)
 {
-	if (p->addr || s < start || s >= end)
-		return;
+	u64 len, offset;
 
-	p->addr = s;
-	p->offset = (s - start) + pgoff;
-	p->len = e < end ? e - s : end - s;
+	if (s < start || s >= end)
+		return 0;
+
+	offset = (s - start) + pgoff;
+	len = e < end ? e - s : end - s;
+
+	return kcore_copy_info__addnew(kci, s, len, offset) ? 0 : -1;
 }
 
 static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data)
 {
 	struct kcore_copy_info *kci = data;
 	u64 end = start + len;
+	struct sym_data *sdat;
 
-	kcore_copy__map(&kci->kernel_map, start, end, pgoff, kci->stext,
-			kci->etext);
+	if (kcore_copy__map(kci, start, end, pgoff, kci->stext, kci->etext))
+		return -1;
 
-	kcore_copy__map(&kci->modules_map, start, end, pgoff, kci->first_module,
-			kci->last_module_symbol);
+	if (kcore_copy__map(kci, start, end, pgoff, kci->first_module,
+			    kci->last_module_symbol))
+		return -1;
+
+	list_for_each_entry(sdat, &kci->syms, node) {
+		u64 s = round_down(sdat->addr, page_size);
+
+		if (kcore_copy__map(kci, start, end, pgoff, s, s + len))
+			return -1;
+	}
 
 	return 0;
 }
@@ -1520,6 +1598,64 @@
 	return 0;
 }
 
+static void kcore_copy__find_remaps(struct kcore_copy_info *kci)
+{
+	struct phdr_data *p, *k = NULL;
+	u64 kend;
+
+	if (!kci->stext)
+		return;
+
+	/* Find phdr that corresponds to the kernel map (contains stext) */
+	kcore_copy__for_each_phdr(kci, p) {
+		u64 pend = p->addr + p->len - 1;
+
+		if (p->addr <= kci->stext && pend >= kci->stext) {
+			k = p;
+			break;
+		}
+	}
+
+	if (!k)
+		return;
+
+	kend = k->offset + k->len;
+
+	/* Find phdrs that remap the kernel */
+	kcore_copy__for_each_phdr(kci, p) {
+		u64 pend = p->offset + p->len;
+
+		if (p == k)
+			continue;
+
+		if (p->offset >= k->offset && pend <= kend)
+			p->remaps = k;
+	}
+}
+
+static void kcore_copy__layout(struct kcore_copy_info *kci)
+{
+	struct phdr_data *p;
+	off_t rel = 0;
+
+	kcore_copy__find_remaps(kci);
+
+	kcore_copy__for_each_phdr(kci, p) {
+		if (!p->remaps) {
+			p->rel = rel;
+			rel += p->len;
+		}
+		kci->phnum += 1;
+	}
+
+	kcore_copy__for_each_phdr(kci, p) {
+		struct phdr_data *k = p->remaps;
+
+		if (k)
+			p->rel = p->offset - k->offset + k->rel;
+	}
+}
+
 static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
 				 Elf *elf)
 {
@@ -1555,7 +1691,12 @@
 	if (kci->first_module && !kci->last_module_symbol)
 		return -1;
 
-	return kcore_copy__read_maps(kci, elf);
+	if (kcore_copy__read_maps(kci, elf))
+		return -1;
+
+	kcore_copy__layout(kci);
+
+	return 0;
 }
 
 static int kcore_copy__copy_file(const char *from_dir, const char *to_dir,
@@ -1678,12 +1819,15 @@
 {
 	struct kcore kcore;
 	struct kcore extract;
-	size_t count = 2;
 	int idx = 0, err = -1;
-	off_t offset = page_size, sz, modules_offset = 0;
+	off_t offset, sz;
 	struct kcore_copy_info kci = { .stext = 0, };
 	char kcore_filename[PATH_MAX];
 	char extract_filename[PATH_MAX];
+	struct phdr_data *p;
+
+	INIT_LIST_HEAD(&kci.phdrs);
+	INIT_LIST_HEAD(&kci.syms);
 
 	if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms"))
 		return -1;
@@ -1703,20 +1847,17 @@
 	if (kcore__init(&extract, extract_filename, kcore.elfclass, false))
 		goto out_kcore_close;
 
-	if (!kci.modules_map.addr)
-		count -= 1;
-
-	if (kcore__copy_hdr(&kcore, &extract, count))
+	if (kcore__copy_hdr(&kcore, &extract, kci.phnum))
 		goto out_extract_close;
 
-	if (kcore__add_phdr(&extract, idx++, offset, kci.kernel_map.addr,
-			    kci.kernel_map.len))
-		goto out_extract_close;
+	offset = gelf_fsize(extract.elf, ELF_T_EHDR, 1, EV_CURRENT) +
+		 gelf_fsize(extract.elf, ELF_T_PHDR, kci.phnum, EV_CURRENT);
+	offset = round_up(offset, page_size);
 
-	if (kci.modules_map.addr) {
-		modules_offset = offset + kci.kernel_map.len;
-		if (kcore__add_phdr(&extract, idx, modules_offset,
-				    kci.modules_map.addr, kci.modules_map.len))
+	kcore_copy__for_each_phdr(&kci, p) {
+		off_t offs = p->rel + offset;
+
+		if (kcore__add_phdr(&extract, idx++, offs, p->addr, p->len))
 			goto out_extract_close;
 	}
 
@@ -1724,14 +1865,14 @@
 	if (sz < 0 || sz > offset)
 		goto out_extract_close;
 
-	if (copy_bytes(kcore.fd, kci.kernel_map.offset, extract.fd, offset,
-		       kci.kernel_map.len))
-		goto out_extract_close;
+	kcore_copy__for_each_phdr(&kci, p) {
+		off_t offs = p->rel + offset;
 
-	if (modules_offset && copy_bytes(kcore.fd, kci.modules_map.offset,
-					 extract.fd, modules_offset,
-					 kci.modules_map.len))
-		goto out_extract_close;
+		if (p->remaps)
+			continue;
+		if (copy_bytes(kcore.fd, p->offset, extract.fd, offs, p->len))
+			goto out_extract_close;
+	}
 
 	if (kcore_copy__compare_file(from_dir, to_dir, "modules"))
 		goto out_extract_close;
@@ -1754,6 +1895,9 @@
 	if (err)
 		kcore_copy__unlink(to_dir, "kallsyms");
 
+	kcore_copy__free_phdrs(&kci);
+	kcore_copy__free_syms(&kci);
+
 	return err;
 }
 
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index ff48d0d..7119df7 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -288,8 +288,7 @@
 }
 
 int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused,
-				struct symsrc *ss __maybe_unused,
-				struct map *map __maybe_unused)
+				struct symsrc *ss __maybe_unused)
 {
 	return 0;
 }
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 1466814..8c84437 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -5,6 +5,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <linux/kernel.h>
+#include <linux/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/param.h>
@@ -70,18 +71,10 @@
 
 #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab)
 
-bool symbol_type__is_a(char symbol_type, enum map_type map_type)
+static bool symbol_type__filter(char symbol_type)
 {
 	symbol_type = toupper(symbol_type);
-
-	switch (map_type) {
-	case MAP__FUNCTION:
-		return symbol_type == 'T' || symbol_type == 'W';
-	case MAP__VARIABLE:
-		return symbol_type == 'D';
-	default:
-		return false;
-	}
+	return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D';
 }
 
 static int prefix_underscores_count(const char *str)
@@ -228,9 +221,9 @@
 		curr->end = roundup(curr->start, 4096) + 4096;
 }
 
-void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
+void map_groups__fixup_end(struct map_groups *mg)
 {
-	struct maps *maps = &mg->maps[type];
+	struct maps *maps = &mg->maps;
 	struct map *next, *curr;
 
 	down_write(&maps->lock);
@@ -256,7 +249,7 @@
 	up_write(&maps->lock);
 }
 
-struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name)
 {
 	size_t namelen = strlen(name) + 1;
 	struct symbol *sym = calloc(1, (symbol_conf.priv_size +
@@ -274,6 +267,7 @@
 
 	sym->start   = start;
 	sym->end     = len ? start + len : start;
+	sym->type    = type;
 	sym->binding = binding;
 	sym->namelen = namelen - 1;
 
@@ -484,45 +478,40 @@
 
 void dso__reset_find_symbol_cache(struct dso *dso)
 {
-	enum map_type type;
-
-	for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) {
-		dso->last_find_result[type].addr   = 0;
-		dso->last_find_result[type].symbol = NULL;
-	}
+	dso->last_find_result.addr   = 0;
+	dso->last_find_result.symbol = NULL;
 }
 
-void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym)
+void dso__insert_symbol(struct dso *dso, struct symbol *sym)
 {
-	__symbols__insert(&dso->symbols[type], sym, dso->kernel);
+	__symbols__insert(&dso->symbols, sym, dso->kernel);
 
 	/* update the symbol cache if necessary */
-	if (dso->last_find_result[type].addr >= sym->start &&
-	    (dso->last_find_result[type].addr < sym->end ||
+	if (dso->last_find_result.addr >= sym->start &&
+	    (dso->last_find_result.addr < sym->end ||
 	    sym->start == sym->end)) {
-		dso->last_find_result[type].symbol = sym;
+		dso->last_find_result.symbol = sym;
 	}
 }
 
-struct symbol *dso__find_symbol(struct dso *dso,
-				enum map_type type, u64 addr)
+struct symbol *dso__find_symbol(struct dso *dso, u64 addr)
 {
-	if (dso->last_find_result[type].addr != addr || dso->last_find_result[type].symbol == NULL) {
-		dso->last_find_result[type].addr   = addr;
-		dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr);
+	if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) {
+		dso->last_find_result.addr   = addr;
+		dso->last_find_result.symbol = symbols__find(&dso->symbols, addr);
 	}
 
-	return dso->last_find_result[type].symbol;
+	return dso->last_find_result.symbol;
 }
 
-struct symbol *dso__first_symbol(struct dso *dso, enum map_type type)
+struct symbol *dso__first_symbol(struct dso *dso)
 {
-	return symbols__first(&dso->symbols[type]);
+	return symbols__first(&dso->symbols);
 }
 
-struct symbol *dso__last_symbol(struct dso *dso, enum map_type type)
+struct symbol *dso__last_symbol(struct dso *dso)
 {
-	return symbols__last(&dso->symbols[type]);
+	return symbols__last(&dso->symbols);
 }
 
 struct symbol *dso__next_symbol(struct symbol *sym)
@@ -539,24 +528,22 @@
 }
 
  /*
-  * Teturns first symbol that matched with @name.
+  * Returns first symbol that matched with @name.
   */
-struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
-					const char *name)
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name)
 {
-	struct symbol *s = symbols__find_by_name(&dso->symbol_names[type], name,
+	struct symbol *s = symbols__find_by_name(&dso->symbol_names, name,
 						 SYMBOL_TAG_INCLUDE__NONE);
 	if (!s)
-		s = symbols__find_by_name(&dso->symbol_names[type], name,
+		s = symbols__find_by_name(&dso->symbol_names, name,
 					  SYMBOL_TAG_INCLUDE__DEFAULT_ONLY);
 	return s;
 }
 
-void dso__sort_by_name(struct dso *dso, enum map_type type)
+void dso__sort_by_name(struct dso *dso)
 {
-	dso__set_sorted_by_name(dso, type);
-	return symbols__sort_by_name(&dso->symbol_names[type],
-				     &dso->symbols[type]);
+	dso__set_sorted_by_name(dso);
+	return symbols__sort_by_name(&dso->symbol_names, &dso->symbols);
 }
 
 int modules__parse(const char *filename, void *arg,
@@ -621,11 +608,6 @@
 	return err;
 }
 
-struct process_kallsyms_args {
-	struct map *map;
-	struct dso *dso;
-};
-
 /*
  * These are symbols in the kernel image, so make sure that
  * sym is from a kernel DSO.
@@ -661,10 +643,10 @@
 				       char type, u64 start)
 {
 	struct symbol *sym;
-	struct process_kallsyms_args *a = arg;
-	struct rb_root *root = &a->dso->symbols[a->map->type];
+	struct dso *dso = arg;
+	struct rb_root *root = &dso->symbols;
 
-	if (!symbol_type__is_a(type, a->map->type))
+	if (!symbol_type__filter(type))
 		return 0;
 
 	/*
@@ -672,7 +654,7 @@
 	 * symbols, setting length to 0, and rely on
 	 * symbols__fixup_end() to fix it up.
 	 */
-	sym = symbol__new(start, 0, kallsyms2elf_binding(type), name);
+	sym = symbol__new(start, 0, kallsyms2elf_binding(type), kallsyms2elf_type(type), name);
 	if (sym == NULL)
 		return -ENOMEM;
 	/*
@@ -689,21 +671,18 @@
  * so that we can in the next step set the symbol ->end address and then
  * call kernel_maps__split_kallsyms.
  */
-static int dso__load_all_kallsyms(struct dso *dso, const char *filename,
-				  struct map *map)
+static int dso__load_all_kallsyms(struct dso *dso, const char *filename)
 {
-	struct process_kallsyms_args args = { .map = map, .dso = dso, };
-	return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
+	return kallsyms__parse(filename, dso, map__process_kallsym_symbol);
 }
 
-static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
+static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct dso *dso)
 {
-	struct map_groups *kmaps = map__kmaps(map);
 	struct map *curr_map;
 	struct symbol *pos;
 	int count = 0;
-	struct rb_root old_root = dso->symbols[map->type];
-	struct rb_root *root = &dso->symbols[map->type];
+	struct rb_root old_root = dso->symbols;
+	struct rb_root *root = &dso->symbols;
 	struct rb_node *next = rb_first(root);
 
 	if (!kmaps)
@@ -723,7 +702,7 @@
 		if (module)
 			*module = '\0';
 
-		curr_map = map_groups__find(kmaps, map->type, pos->start);
+		curr_map = map_groups__find(kmaps, pos->start);
 
 		if (!curr_map) {
 			symbol__delete(pos);
@@ -733,7 +712,7 @@
 		pos->start -= curr_map->start - curr_map->pgoff;
 		if (pos->end)
 			pos->end -= curr_map->start - curr_map->pgoff;
-		symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+		symbols__insert(&curr_map->dso->symbols, pos);
 		++count;
 	}
 
@@ -748,22 +727,25 @@
  * kernel range is broken in several maps, named [kernel].N, as we don't have
  * the original ELF section names vmlinux have.
  */
-static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
+static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, u64 delta,
+				      struct map *initial_map)
 {
-	struct map_groups *kmaps = map__kmaps(map);
 	struct machine *machine;
-	struct map *curr_map = map;
+	struct map *curr_map = initial_map;
 	struct symbol *pos;
 	int count = 0, moved = 0;
-	struct rb_root *root = &dso->symbols[map->type];
+	struct rb_root *root = &dso->symbols;
 	struct rb_node *next = rb_first(root);
 	int kernel_range = 0;
+	bool x86_64;
 
 	if (!kmaps)
 		return -1;
 
 	machine = kmaps->machine;
 
+	x86_64 = machine__is(machine, "x86_64");
+
 	while (next) {
 		char *module;
 
@@ -778,7 +760,7 @@
 			*module++ = '\0';
 
 			if (strcmp(curr_map->dso->short_name, module)) {
-				if (curr_map != map &&
+				if (curr_map != initial_map &&
 				    dso->kernel == DSO_TYPE_GUEST_KERNEL &&
 				    machine__is_default_guest(machine)) {
 					/*
@@ -788,18 +770,16 @@
 					 * symbols are in its kmap. Mark it as
 					 * loaded.
 					 */
-					dso__set_loaded(curr_map->dso,
-							curr_map->type);
+					dso__set_loaded(curr_map->dso);
 				}
 
-				curr_map = map_groups__find_by_name(kmaps,
-							map->type, module);
+				curr_map = map_groups__find_by_name(kmaps, module);
 				if (curr_map == NULL) {
 					pr_debug("%s/proc/{kallsyms,modules} "
 					         "inconsistency while looking "
 						 "for \"%s\" module!\n",
 						 machine->root_dir, module);
-					curr_map = map;
+					curr_map = initial_map;
 					goto discard_symbol;
 				}
 
@@ -809,11 +789,21 @@
 			}
 			/*
 			 * So that we look just like we get from .ko files,
-			 * i.e. not prelinked, relative to map->start.
+			 * i.e. not prelinked, relative to initial_map->start.
 			 */
 			pos->start = curr_map->map_ip(curr_map, pos->start);
 			pos->end   = curr_map->map_ip(curr_map, pos->end);
-		} else if (curr_map != map) {
+		} else if (x86_64 && is_entry_trampoline(pos->name)) {
+			/*
+			 * These symbols are not needed anymore since the
+			 * trampoline maps refer to the text section and it's
+			 * symbols instead. Avoid having to deal with
+			 * relocations, and the assumption that the first symbol
+			 * is the start of kernel text, by simply removing the
+			 * symbols at this point.
+			 */
+			goto discard_symbol;
+		} else if (curr_map != initial_map) {
 			char dso_name[PATH_MAX];
 			struct dso *ndso;
 
@@ -824,7 +814,7 @@
 			}
 
 			if (count == 0) {
-				curr_map = map;
+				curr_map = initial_map;
 				goto add_symbol;
 			}
 
@@ -843,7 +833,7 @@
 
 			ndso->kernel = dso->kernel;
 
-			curr_map = map__new2(pos->start, ndso, map->type);
+			curr_map = map__new2(pos->start, ndso);
 			if (curr_map == NULL) {
 				dso__put(ndso);
 				return -1;
@@ -858,9 +848,9 @@
 			pos->end -= delta;
 		}
 add_symbol:
-		if (curr_map != map) {
+		if (curr_map != initial_map) {
 			rb_erase(&pos->rb_node, root);
-			symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+			symbols__insert(&curr_map->dso->symbols, pos);
 			++moved;
 		} else
 			++count;
@@ -871,10 +861,10 @@
 		symbol__delete(pos);
 	}
 
-	if (curr_map != map &&
+	if (curr_map != initial_map &&
 	    dso->kernel == DSO_TYPE_GUEST_KERNEL &&
 	    machine__is_default_guest(kmaps->machine)) {
-		dso__set_loaded(curr_map->dso, curr_map->type);
+		dso__set_loaded(curr_map->dso);
 	}
 
 	return count + moved;
@@ -1035,7 +1025,12 @@
 	return ret;
 }
 
-static int do_validate_kcore_modules(const char *filename, struct map *map,
+struct map *map_groups__first(struct map_groups *mg)
+{
+	return maps__first(&mg->maps);
+}
+
+static int do_validate_kcore_modules(const char *filename,
 				  struct map_groups *kmaps)
 {
 	struct rb_root modules = RB_ROOT;
@@ -1046,13 +1041,12 @@
 	if (err)
 		return err;
 
-	old_map = map_groups__first(kmaps, map->type);
+	old_map = map_groups__first(kmaps);
 	while (old_map) {
 		struct map *next = map_groups__next(old_map);
 		struct module_info *mi;
 
-		if (old_map == map || old_map->start == map->start) {
-			/* The kernel map */
+		if (!__map__is_kmodule(old_map)) {
 			old_map = next;
 			continue;
 		}
@@ -1109,7 +1103,7 @@
 					     kallsyms_filename))
 		return -EINVAL;
 
-	if (do_validate_kcore_modules(modules_filename, map, kmaps))
+	if (do_validate_kcore_modules(modules_filename, kmaps))
 		return -EINVAL;
 
 	return 0;
@@ -1138,7 +1132,6 @@
 
 struct kcore_mapfn_data {
 	struct dso *dso;
-	enum map_type type;
 	struct list_head maps;
 };
 
@@ -1147,7 +1140,7 @@
 	struct kcore_mapfn_data *md = data;
 	struct map *map;
 
-	map = map__new2(start, md->dso, md->type);
+	map = map__new2(start, md->dso);
 	if (map == NULL)
 		return -ENOMEM;
 
@@ -1163,13 +1156,13 @@
 			   const char *kallsyms_filename)
 {
 	struct map_groups *kmaps = map__kmaps(map);
-	struct machine *machine;
 	struct kcore_mapfn_data md;
 	struct map *old_map, *new_map, *replacement_map = NULL;
+	struct machine *machine;
 	bool is_64_bit;
 	int err, fd;
 	char kcore_filename[PATH_MAX];
-	struct symbol *sym;
+	u64 stext;
 
 	if (!kmaps)
 		return -EINVAL;
@@ -1177,7 +1170,7 @@
 	machine = kmaps->machine;
 
 	/* This function requires that the map is the kernel map */
-	if (map != machine->vmlinux_maps[map->type])
+	if (!__map__is_kernel(map))
 		return -EINVAL;
 
 	if (!filename_from_kallsyms_filename(kcore_filename, "kcore",
@@ -1189,7 +1182,6 @@
 		return -EINVAL;
 
 	md.dso = dso;
-	md.type = map->type;
 	INIT_LIST_HEAD(&md.maps);
 
 	fd = open(kcore_filename, O_RDONLY);
@@ -1200,7 +1192,7 @@
 	}
 
 	/* Read new maps into temporary lists */
-	err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md,
+	err = file__read_maps(fd, map->prot & PROT_EXEC, kcore_mapfn, &md,
 			      &is_64_bit);
 	if (err)
 		goto out_err;
@@ -1212,7 +1204,7 @@
 	}
 
 	/* Remove old maps */
-	old_map = map_groups__first(kmaps, map->type);
+	old_map = map_groups__first(kmaps);
 	while (old_map) {
 		struct map *next = map_groups__next(old_map);
 
@@ -1220,14 +1212,15 @@
 			map_groups__remove(kmaps, old_map);
 		old_map = next;
 	}
+	machine->trampolines_mapped = false;
 
-	/* Find the kernel map using the first symbol */
-	sym = dso__first_symbol(dso, map->type);
-	list_for_each_entry(new_map, &md.maps, node) {
-		if (sym && sym->start >= new_map->start &&
-		    sym->start < new_map->end) {
-			replacement_map = new_map;
-			break;
+	/* Find the kernel map using the '_stext' symbol */
+	if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) {
+		list_for_each_entry(new_map, &md.maps, node) {
+			if (stext >= new_map->start && stext < new_map->end) {
+				replacement_map = new_map;
+				break;
+			}
 		}
 	}
 
@@ -1256,6 +1249,19 @@
 		map__put(new_map);
 	}
 
+	if (machine__is(machine, "x86_64")) {
+		u64 addr;
+
+		/*
+		 * If one of the corresponding symbols is there, assume the
+		 * entry trampoline maps are too.
+		 */
+		if (!kallsyms__get_function_start(kallsyms_filename,
+						  ENTRY_TRAMPOLINE_NAME,
+						  &addr))
+			machine->trampolines_mapped = true;
+	}
+
 	/*
 	 * Set the data type and long name so that kcore can be read via
 	 * dso__data_read_addr().
@@ -1268,7 +1274,7 @@
 
 	close(fd);
 
-	if (map->type == MAP__FUNCTION)
+	if (map->prot & PROT_EXEC)
 		pr_debug("Using %s for kernel object code\n", kcore_filename);
 	else
 		pr_debug("Using %s for kernel data\n", kcore_filename);
@@ -1289,14 +1295,10 @@
  * If the kernel is relocated at boot time, kallsyms won't match.  Compute the
  * delta based on the relocation reference symbol.
  */
-static int kallsyms__delta(struct map *map, const char *filename, u64 *delta)
+static int kallsyms__delta(struct kmap *kmap, const char *filename, u64 *delta)
 {
-	struct kmap *kmap = map__kmap(map);
 	u64 addr;
 
-	if (!kmap)
-		return -1;
-
 	if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name)
 		return 0;
 
@@ -1310,19 +1312,23 @@
 int __dso__load_kallsyms(struct dso *dso, const char *filename,
 			 struct map *map, bool no_kcore)
 {
+	struct kmap *kmap = map__kmap(map);
 	u64 delta = 0;
 
 	if (symbol__restricted_filename(filename, "/proc/kallsyms"))
 		return -1;
 
-	if (dso__load_all_kallsyms(dso, filename, map) < 0)
+	if (!kmap || !kmap->kmaps)
 		return -1;
 
-	if (kallsyms__delta(map, filename, &delta))
+	if (dso__load_all_kallsyms(dso, filename) < 0)
 		return -1;
 
-	symbols__fixup_end(&dso->symbols[map->type]);
-	symbols__fixup_duplicate(&dso->symbols[map->type]);
+	if (kallsyms__delta(kmap, filename, &delta))
+		return -1;
+
+	symbols__fixup_end(&dso->symbols);
+	symbols__fixup_duplicate(&dso->symbols);
 
 	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
 		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
@@ -1330,9 +1336,9 @@
 		dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
 
 	if (!no_kcore && !dso__load_kcore(dso, map, filename))
-		return dso__split_kallsyms_for_kcore(dso, map);
+		return map_groups__split_kallsyms_for_kcore(kmap->kmaps, dso);
 	else
-		return dso__split_kallsyms(dso, map, delta);
+		return map_groups__split_kallsyms(kmap->kmaps, dso, delta, map);
 }
 
 int dso__load_kallsyms(struct dso *dso, const char *filename,
@@ -1341,8 +1347,7 @@
 	return __dso__load_kallsyms(dso, filename, map, false);
 }
 
-static int dso__load_perf_map(const char *map_path, struct dso *dso,
-			      struct map *map)
+static int dso__load_perf_map(const char *map_path, struct dso *dso)
 {
 	char *line = NULL;
 	size_t n;
@@ -1379,12 +1384,12 @@
 		if (len + 2 >= line_len)
 			continue;
 
-		sym = symbol__new(start, size, STB_GLOBAL, line + len);
+		sym = symbol__new(start, size, STB_GLOBAL, STT_FUNC, line + len);
 
 		if (sym == NULL)
 			goto out_delete_line;
 
-		symbols__insert(&dso->symbols[map->type], sym);
+		symbols__insert(&dso->symbols, sym);
 		nr_syms++;
 	}
 
@@ -1509,25 +1514,27 @@
 	pthread_mutex_lock(&dso->lock);
 
 	/* check again under the dso->lock */
-	if (dso__loaded(dso, map->type)) {
+	if (dso__loaded(dso)) {
 		ret = 1;
 		goto out;
 	}
 
-	if (dso->kernel) {
-		if (dso->kernel == DSO_TYPE_KERNEL)
-			ret = dso__load_kernel_sym(dso, map);
-		else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
-			ret = dso__load_guest_kernel_sym(dso, map);
-
-		goto out;
-	}
-
 	if (map->groups && map->groups->machine)
 		machine = map->groups->machine;
 	else
 		machine = NULL;
 
+	if (dso->kernel) {
+		if (dso->kernel == DSO_TYPE_KERNEL)
+			ret = dso__load_kernel_sym(dso, map);
+		else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+			ret = dso__load_guest_kernel_sym(dso, map);
+
+		if (machine__is(machine, "x86_64"))
+			machine__map_x86_64_entry_trampolines(machine, dso);
+		goto out;
+	}
+
 	dso->adjust_symbols = 0;
 
 	if (perfmap) {
@@ -1542,7 +1549,7 @@
 			goto out;
 		}
 
-		ret = dso__load_perf_map(map_path, dso, map);
+		ret = dso__load_perf_map(map_path, dso);
 		dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
 					     DSO_BINARY_TYPE__NOT_FOUND;
 		goto out;
@@ -1651,7 +1658,7 @@
 	if (ret > 0) {
 		int nr_plt;
 
-		nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map);
+		nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss);
 		if (nr_plt > 0)
 			ret += nr_plt;
 	}
@@ -1663,17 +1670,16 @@
 	if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
 		ret = 0;
 out:
-	dso__set_loaded(dso, map->type);
+	dso__set_loaded(dso);
 	pthread_mutex_unlock(&dso->lock);
 	nsinfo__mountns_exit(&nsc);
 
 	return ret;
 }
 
-struct map *map_groups__find_by_name(struct map_groups *mg,
-				     enum map_type type, const char *name)
+struct map *map_groups__find_by_name(struct map_groups *mg, const char *name)
 {
-	struct maps *maps = &mg->maps[type];
+	struct maps *maps = &mg->maps;
 	struct map *map;
 
 	down_read(&maps->lock);
@@ -1720,7 +1726,7 @@
 		else
 			dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
 		dso__set_long_name(dso, vmlinux, vmlinux_allocated);
-		dso__set_loaded(dso, map->type);
+		dso__set_loaded(dso);
 		pr_debug("Using %s for symbols\n", symfs_vmlinux);
 	}
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 70c16741..1a16438 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -57,7 +57,8 @@
 	u64		start;
 	u64		end;
 	u16		namelen;
-	u8		binding;
+	u8		type:4;
+	u8		binding:4;
 	u8		idle:1;
 	u8		ignore:1;
 	u8		inlined:1;
@@ -259,17 +260,16 @@
 			 bool no_kcore);
 int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
 
-void dso__insert_symbol(struct dso *dso, enum map_type type,
+void dso__insert_symbol(struct dso *dso,
 			struct symbol *sym);
 
-struct symbol *dso__find_symbol(struct dso *dso, enum map_type type,
-				u64 addr);
-struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type,
-					const char *name);
+struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name);
+
 struct symbol *symbol__next_by_name(struct symbol *sym);
 
-struct symbol *dso__first_symbol(struct dso *dso, enum map_type type);
-struct symbol *dso__last_symbol(struct dso *dso, enum map_type type);
+struct symbol *dso__first_symbol(struct dso *dso);
+struct symbol *dso__last_symbol(struct dso *dso);
 struct symbol *dso__next_symbol(struct symbol *sym);
 
 enum dso_type dso__type_fd(int fd);
@@ -288,7 +288,7 @@
 void symbol__elf_init(void);
 int symbol__annotation_init(void);
 
-struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name);
 size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
 				      const struct addr_location *al,
 				      bool unknown_as_addr,
@@ -300,7 +300,6 @@
 				 bool unknown_as_addr, FILE *fp);
 size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
 size_t symbol__fprintf(struct symbol *sym, FILE *fp);
-bool symbol_type__is_a(char symbol_type, enum map_type map_type);
 bool symbol__restricted_filename(const char *filename,
 				 const char *restricted_filename);
 int symbol__config_symfs(const struct option *opt __maybe_unused,
@@ -308,8 +307,7 @@
 
 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 		  struct symsrc *runtime_ss, int kmodule);
-int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss,
-				struct map *map);
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
 
 char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
 
@@ -317,7 +315,7 @@
 void symbols__insert(struct rb_root *symbols, struct symbol *sym);
 void symbols__fixup_duplicate(struct rb_root *symbols);
 void symbols__fixup_end(struct rb_root *symbols);
-void __map_groups__fixup_end(struct map_groups *mg, enum map_type type);
+void map_groups__fixup_end(struct map_groups *mg);
 
 typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
 int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
index 6dd2cb8..ed0205c 100644
--- a/tools/perf/util/symbol_fprintf.c
+++ b/tools/perf/util/symbol_fprintf.c
@@ -58,13 +58,13 @@
 }
 
 size_t dso__fprintf_symbols_by_name(struct dso *dso,
-				    enum map_type type, FILE *fp)
+				    FILE *fp)
 {
 	size_t ret = 0;
 	struct rb_node *nd;
 	struct symbol_name_rb_node *pos;
 
-	for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&dso->symbol_names); nd; nd = rb_next(nd)) {
 		pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
 		fprintf(fp, "%s\n", pos->sym.name);
 	}
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 68b65b1..2048d39 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -302,23 +302,20 @@
 static int __thread__prepare_access(struct thread *thread)
 {
 	bool initialized = false;
-	int i, err = 0;
+	int err = 0;
+	struct maps *maps = &thread->mg->maps;
+	struct map *map;
 
-	for (i = 0; i < MAP__NR_TYPES; ++i) {
-		struct maps *maps = &thread->mg->maps[i];
-		struct map *map;
+	down_read(&maps->lock);
 
-		down_read(&maps->lock);
-
-		for (map = maps__first(maps); map; map = map__next(map)) {
-			err = unwind__prepare_access(thread, map, &initialized);
-			if (err || initialized)
-				break;
-		}
-
-		up_read(&maps->lock);
+	for (map = maps__first(maps); map; map = map__next(map)) {
+		err = unwind__prepare_access(thread, map, &initialized);
+		if (err || initialized)
+			break;
 	}
 
+	up_read(&maps->lock);
+
 	return err;
 }
 
@@ -335,8 +332,6 @@
 static int thread__clone_map_groups(struct thread *thread,
 				    struct thread *parent)
 {
-	int i;
-
 	/* This is new thread, we share map groups for process. */
 	if (thread->pid_ == parent->pid_)
 		return thread__prepare_access(thread);
@@ -348,9 +343,8 @@
 	}
 
 	/* But this one is new process, copy maps. */
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		if (map_groups__clone(thread, parent->mg, i) < 0)
-			return -ENOMEM;
+	if (map_groups__clone(thread, parent->mg) < 0)
+		return -ENOMEM;
 
 	return 0;
 }
@@ -371,8 +365,7 @@
 	return thread__clone_map_groups(thread, parent);
 }
 
-void thread__find_cpumode_addr_location(struct thread *thread,
-					enum map_type type, u64 addr,
+void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
 					struct addr_location *al)
 {
 	size_t i;
@@ -384,7 +377,7 @@
 	};
 
 	for (i = 0; i < ARRAY_SIZE(cpumodes); i++) {
-		thread__find_addr_location(thread, cpumodes[i], type, addr, al);
+		thread__find_symbol(thread, cpumodes[i], addr, al);
 		if (al->map)
 			break;
 	}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 14d44c3..07606aa 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -92,16 +92,13 @@
 
 struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
 
-void thread__find_addr_map(struct thread *thread,
-			   u8 cpumode, enum map_type type, u64 addr,
-			   struct addr_location *al);
+struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
+			     struct addr_location *al);
 
-void thread__find_addr_location(struct thread *thread,
-				u8 cpumode, enum map_type type, u64 addr,
-				struct addr_location *al);
+struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
+				   u64 addr, struct addr_location *al);
 
-void thread__find_cpumode_addr_location(struct thread *thread,
-					enum map_type type, u64 addr,
+void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
 					struct addr_location *al);
 
 static inline void *thread__priv(struct thread *thread)
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index d7f2113..c85d0d1 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -103,11 +103,10 @@
 
 static int record_header_files(void)
 {
-	char *path;
+	char *path = get_events_file("header_page");
 	struct stat st;
 	int err = -EIO;
 
-	path = get_tracing_file("events/header_page");
 	if (!path) {
 		pr_debug("can't get tracing/events/header_page");
 		return -ENOMEM;
@@ -128,9 +127,9 @@
 		goto out;
 	}
 
-	put_tracing_file(path);
+	put_events_file(path);
 
-	path = get_tracing_file("events/header_event");
+	path = get_events_file("header_event");
 	if (!path) {
 		pr_debug("can't get tracing/events/header_event");
 		err = -ENOMEM;
@@ -154,7 +153,7 @@
 
 	err = 0;
 out:
-	put_tracing_file(path);
+	put_events_file(path);
 	return err;
 }
 
@@ -243,7 +242,7 @@
 	char *path;
 	int ret;
 
-	path = get_tracing_file("events/ftrace");
+	path = get_events_file("ftrace");
 	if (!path) {
 		pr_debug("can't get tracing/events/ftrace");
 		return -ENOMEM;
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index 16a7763..1aa3686 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -75,6 +75,7 @@
 static struct event_format*
 tp_format(const char *sys, const char *name)
 {
+	char *tp_dir = get_events_file(sys);
 	struct pevent *pevent = tevent.pevent;
 	struct event_format *event = NULL;
 	char path[PATH_MAX];
@@ -82,8 +83,11 @@
 	char *data;
 	int err;
 
-	scnprintf(path, PATH_MAX, "%s/%s/%s/format",
-		  tracing_events_path, sys, name);
+	if (!tp_dir)
+		return ERR_PTR(-errno);
+
+	scnprintf(path, PATH_MAX, "%s/%s/format", tp_dir, name);
+	put_events_file(tp_dir);
 
 	err = filename__read_str(path, &data, &size);
 	if (err)
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 7bdd239..538db4e 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -28,10 +28,11 @@
 {
 	Dwfl_Module *mod;
 	struct dso *dso = NULL;
-
-	thread__find_addr_location(ui->thread,
-				   PERF_RECORD_MISC_USER,
-				   MAP__FUNCTION, ip, al);
+	/*
+	 * Some callers will use al->sym, so we can't just use the
+	 * cheaper thread__find_map() here.
+	 */
+	thread__find_symbol(ui->thread, PERF_RECORD_MISC_USER, ip, al);
 
 	if (al->map)
 		dso = al->map->dso;
@@ -103,19 +104,7 @@
 	struct addr_location al;
 	ssize_t size;
 
-	thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
-			      MAP__FUNCTION, addr, &al);
-	if (!al.map) {
-		/*
-		 * We've seen cases (softice) where DWARF unwinder went
-		 * through non executable mmaps, which we need to lookup
-		 * in MAP__VARIABLE tree.
-		 */
-		thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
-				      MAP__VARIABLE, addr, &al);
-	}
-
-	if (!al.map) {
+	if (!thread__find_map(ui->thread, PERF_RECORD_MISC_USER, addr, &al)) {
 		pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
 		return -1;
 	}
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index af87304..6a11bc7 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -366,19 +366,7 @@
 static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
 {
 	struct addr_location al;
-
-	thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
-			      MAP__FUNCTION, ip, &al);
-	if (!al.map) {
-		/*
-		 * We've seen cases (softice) where DWARF unwinder went
-		 * through non executable mmaps, which we need to lookup
-		 * in MAP__VARIABLE tree.
-		 */
-		thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER,
-				      MAP__VARIABLE, ip, &al);
-	}
-	return al.map;
+	return thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al);
 }
 
 static int
@@ -586,12 +574,9 @@
 	struct unwind_entry e;
 	struct addr_location al;
 
-	thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
-				   MAP__FUNCTION, ip, &al);
-
+	e.sym = thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
 	e.ip = al.addr;
 	e.map = al.map;
-	e.sym = al.sym;
 
 	pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
 		 al.sym ? al.sym->name : "''",
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 1019bbc..eac5b85 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -38,11 +38,43 @@
 }
 
 unsigned int page_size;
-int cacheline_size;
+
+#ifdef _SC_LEVEL1_DCACHE_LINESIZE
+#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
+#else
+static void cache_line_size(int *cacheline_sizep)
+{
+	if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
+		pr_debug("cannot determine cache line size");
+}
+#endif
+
+int cacheline_size(void)
+{
+	static int size;
+
+	if (!size)
+		cache_line_size(&size);
+
+	return size;
+}
 
 int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
 int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK;
 
+int sysctl__max_stack(void)
+{
+	int value;
+
+	if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
+		sysctl_perf_event_max_stack = value;
+
+	if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
+		sysctl_perf_event_max_contexts_per_stack = value;
+
+	return sysctl_perf_event_max_stack;
+}
+
 bool test_attr__enabled;
 
 bool perf_host  = true;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index c9626c2..dc58254 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -43,7 +43,9 @@
 int hex2u64(const char *ptr, u64 *val);
 
 extern unsigned int page_size;
-extern int cacheline_size;
+int __pure cacheline_size(void);
+
+int sysctl__max_stack(void);
 
 int fetch_kernel_version(unsigned int *puint,
 			 char *str, size_t str_sz);
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 0acb1ec..741af20 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -139,12 +139,10 @@
 					      struct thread *thread)
 {
 	enum dso_type dso_type = DSO__TYPE_UNKNOWN;
-	struct map *map;
-	struct dso *dso;
+	struct map *map = map_groups__first(thread->mg);
 
-	map = map_groups__first(thread->mg, MAP__FUNCTION);
 	for (; map ; map = map_groups__next(map)) {
-		dso = map->dso;
+		struct dso *dso = map->dso;
 		if (!dso || dso->long_name[0] != '/')
 			continue;
 		dso_type = dso__type(dso, machine);
diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py
index abb4c38..8ee626c 100755
--- a/tools/power/pm-graph/bootgraph.py
+++ b/tools/power/pm-graph/bootgraph.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python2
 #
 # Tool for analyzing boot timing
 # Copyright (c) 2013, Intel Corporation.
diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8
index 18baaf6..070be2c 100644
--- a/tools/power/pm-graph/sleepgraph.8
+++ b/tools/power/pm-graph/sleepgraph.8
@@ -168,6 +168,7 @@
 in the current folder. The output page is a table of tests with
 suspend and resume values sorted by suspend mode, host, and kernel.
 Includes test averages by mode and links to the test html files.
+Use -genhtml to include tests with missing html.
 .TP
 \fB-modes\fR
 List available suspend modes.
@@ -179,6 +180,9 @@
 \fB-fpdt\fR
 Print out the contents of the ACPI Firmware Performance Data Table.
 .TP
+\fB-battery\fR
+Print out battery status and current charge.
+.TP
 \fB-sysinfo\fR
 Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode.
 .TP
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 266409f..0c76047 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python2
 #
 # Tool for analyzing suspend/resume timing
 # Copyright (c) 2013, Intel Corporation.
@@ -69,7 +69,7 @@
 #	 store system values and test parameters
 class SystemValues:
 	title = 'SleepGraph'
-	version = '5.0'
+	version = '5.1'
 	ansi = False
 	rs = 0
 	display = 0
@@ -240,7 +240,7 @@
 	kprobes = dict()
 	timeformat = '%.3f'
 	cmdline = '%s %s' % \
-			(os.path.basename(sys.argv[0]), string.join(sys.argv[1:], ' '))
+			(os.path.basename(sys.argv[0]), ' '.join(sys.argv[1:]))
 	def __init__(self):
 		self.archargs = 'args_'+platform.machine()
 		self.hostname = platform.node()
@@ -917,12 +917,18 @@
 			self.devicegroups.append([phase])
 		self.errorinfo = {'suspend':[],'resume':[]}
 	def extractErrorInfo(self):
+		elist = {
+			'HWERROR' : '.*\[ *Hardware Error *\].*',
+			'FWBUG'   : '.*\[ *Firmware Bug *\].*',
+			'BUG'     : '.*BUG.*',
+			'ERROR'   : '.*ERROR.*',
+			'WARNING' : '.*WARNING.*',
+			'IRQ'     : '.*genirq: .*',
+			'TASKFAIL': '.*Freezing of tasks failed.*',
+		}
 		lf = sysvals.openlog(sysvals.dmesgfile, 'r')
 		i = 0
 		list = []
-		# sl = start line, et = error time, el = error line
-		type = 'ERROR'
-		sl = et = el = -1
 		for line in lf:
 			i += 1
 			m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
@@ -931,43 +937,13 @@
 			t = float(m.group('ktime'))
 			if t < self.start or t > self.end:
 				continue
-			if t < self.tSuspended:
-				dir = 'suspend'
-			else:
-				dir = 'resume'
+			dir = 'suspend' if t < self.tSuspended else 'resume'
 			msg = m.group('msg')
-			if re.match('-*\[ *cut here *\]-*', msg):
-				type = 'WARNING'
-				sl = i
-			elif re.match('genirq: .*', msg):
-				type = 'IRQ'
-				sl = i
-			elif re.match('BUG: .*', msg) or re.match('kernel BUG .*', msg):
-				type = 'BUG'
-				sl = i
-			elif re.match('-*\[ *end trace .*\]-*', msg) or \
-				re.match('R13: .*', msg):
-				if et >= 0 and sl >= 0:
-					list.append((type, dir, et, sl, i))
+			for err in elist:
+				if re.match(elist[err], msg):
+					list.append((err, dir, t, i, i))
 					self.kerror = True
-					sl = et = el = -1
-					type = 'ERROR'
-			elif 'Call Trace:' in msg:
-				if el >= 0 and et >= 0:
-					list.append((type, dir, et, el, el))
-					self.kerror = True
-				et, el = t, i
-				if sl < 0 or type == 'BUG':
-					slval = i
-					if sl >= 0:
-						slval = sl
-					list.append((type, dir, et, slval, i))
-					self.kerror = True
-					sl = et = el = -1
-					type = 'ERROR'
-		if el >= 0 and et >= 0:
-			list.append((type, dir, et, el, el))
-			self.kerror = True
+					break
 		for e in list:
 			type, dir, t, idx1, idx2 = e
 			sysvals.vprint('kernel %s found in %s at %f' % (type, dir, t))
@@ -2331,12 +2307,14 @@
 		sv.suspendmode = data.stamp['mode']
 		if sv.suspendmode == 'command' and sv.ftracefile != '':
 			modes = ['on', 'freeze', 'standby', 'mem', 'disk']
-			out = Popen(['grep', 'machine_suspend', sv.ftracefile],
-				stderr=PIPE, stdout=PIPE).stdout.read()
-			m = re.match('.* machine_suspend\[(?P<mode>.*)\]', out)
-			if m and m.group('mode') in ['1', '2', '3', '4']:
-				sv.suspendmode = modes[int(m.group('mode'))]
-				data.stamp['mode'] = sv.suspendmode
+			fp = sysvals.openlog(sv.ftracefile, 'r')
+			for line in fp:
+				m = re.match('.* machine_suspend\[(?P<mode>.*)\]', line)
+				if m and m.group('mode') in ['1', '2', '3', '4']:
+					sv.suspendmode = modes[int(m.group('mode'))]
+					data.stamp['mode'] = sv.suspendmode
+					break
+			fp.close()
 		m = re.match(self.cmdlinefmt, self.cmdline)
 		if m:
 			sv.cmdline = m.group('cmd')
@@ -2413,7 +2391,7 @@
 #	 markers, and/or kprobes required for primary parsing.
 def doesTraceLogHaveTraceEvents():
 	kpcheck = ['_cal: (', '_cpu_down()']
-	techeck = sysvals.traceevents[:]
+	techeck = ['suspend_resume']
 	tmcheck = ['SUSPEND START', 'RESUME COMPLETE']
 	sysvals.usekprobes = False
 	fp = sysvals.openlog(sysvals.ftracefile, 'r')
@@ -2808,7 +2786,7 @@
 				# -- phase changes --
 				# start of kernel suspend
 				if(re.match('suspend_enter\[.*', t.name)):
-					if(isbegin):
+					if(isbegin and data.start == data.tKernSus):
 						data.dmesg[phase]['start'] = t.time
 						data.tKernSus = t.time
 					continue
@@ -3072,13 +3050,20 @@
 					sysvals.vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name))
 					cg.newActionFromFunction(data)
 	if sysvals.suspendmode == 'command':
-		return testdata
+		return (testdata, '')
 
 	# fill in any missing phases
+	error = []
 	for data in testdata:
+		tn = '' if len(testdata) == 1 else ('%d' % (data.testnumber + 1))
+		terr = ''
 		lp = data.phases[0]
 		for p in data.phases:
 			if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0):
+				if not terr:
+					print 'TEST%s FAILED: %s failed in %s phase' % (tn, sysvals.suspendmode, lp)
+					terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, lp)
+					error.append(terr)
 				sysvals.vprint('WARNING: phase "%s" is missing!' % p)
 			if(data.dmesg[p]['start'] < 0):
 				data.dmesg[p]['start'] = data.dmesg[lp]['end']
@@ -3106,7 +3091,7 @@
 			for j in range(i + 1, tc):
 				testdata[j].mergeOverlapDevices(devlist)
 		testdata[0].stitchTouchingThreads(testdata[1:])
-	return testdata
+	return (testdata, ', '.join(error))
 
 # Function: loadKernelLog
 # Description:
@@ -3173,7 +3158,7 @@
 	if data:
 		testruns.append(data)
 	if len(testruns) < 1:
-		doError(' dmesg log has no suspend/resume data: %s' \
+		print('ERROR: dmesg log has no suspend/resume data: %s' \
 			% sysvals.dmesgfile)
 
 	# fix lines with same timestamp/function with the call and return swapped
@@ -3521,68 +3506,144 @@
 		.summary {border:1px solid;}\n\
 		th {border: 1px solid black;background:#222;color:white;}\n\
 		td {font: 16px "Times New Roman";text-align: center;}\n\
-		tr.alt td {background:#ddd;}\n\
-		tr.avg td {background:#aaa;}\n\
+		tr.head td {border: 1px solid black;background:#aaa;}\n\
+		tr.alt {background-color:#ddd;}\n\
+		tr.notice {color:red;}\n\
+		.minval {background-color:#BBFFBB;}\n\
+		.medval {background-color:#BBBBFF;}\n\
+		.maxval {background-color:#FFBBBB;}\n\
+		.head a {color:#000;text-decoration: none;}\n\
 	</style>\n</head>\n<body>\n'
 
+	# extract the test data into list
+	list = dict()
+	tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []]
+	iMin, iMed, iMax = [0, 0], [0, 0], [0, 0]
+	num = 0
+	lastmode = ''
+	cnt = {'pass':0, 'fail':0, 'hang':0}
+	for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])):
+		mode = data['mode']
+		if mode not in list:
+			list[mode] = {'data': [], 'avg': [0,0], 'min': [0,0], 'max': [0,0], 'med': [0,0]}
+		if lastmode and lastmode != mode and num > 0:
+			for i in range(2):
+				s = sorted(tMed[i])
+				list[lastmode]['med'][i] = s[int(len(s)/2)]
+				iMed[i] = tMed[i].index(list[lastmode]['med'][i])
+			list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num]
+			list[lastmode]['min'] = tMin
+			list[lastmode]['max'] = tMax
+			list[lastmode]['idx'] = (iMin, iMed, iMax)
+			tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []]
+			iMin, iMed, iMax = [0, 0], [0, 0], [0, 0]
+			num = 0
+		tVal = [float(data['suspend']), float(data['resume'])]
+		list[mode]['data'].append([data['host'], data['kernel'],
+			data['time'], tVal[0], tVal[1], data['url'], data['result'],
+			data['issues']])
+		idx = len(list[mode]['data']) - 1
+		if data['result'] == 'pass':
+			cnt['pass'] += 1
+			for i in range(2):
+				tMed[i].append(tVal[i])
+				tAvg[i] += tVal[i]
+				if tMin[i] == 0 or tVal[i] < tMin[i]:
+					iMin[i] = idx
+					tMin[i] = tVal[i]
+				if tMax[i] == 0 or tVal[i] > tMax[i]:
+					iMax[i] = idx
+					tMax[i] = tVal[i]
+			num += 1
+		elif data['result'] == 'hang':
+			cnt['hang'] += 1
+		elif data['result'] == 'fail':
+			cnt['fail'] += 1
+		lastmode = mode
+	if lastmode and num > 0:
+		for i in range(2):
+			s = sorted(tMed[i])
+			list[lastmode]['med'][i] = s[int(len(s)/2)]
+			iMed[i] = tMed[i].index(list[lastmode]['med'][i])
+		list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num]
+		list[lastmode]['min'] = tMin
+		list[lastmode]['max'] = tMax
+		list[lastmode]['idx'] = (iMin, iMed, iMax)
+
 	# group test header
-	html += '<div class="stamp">%s (%d tests)</div>\n' % (folder, len(testruns))
+	desc = []
+	for ilk in sorted(cnt, reverse=True):
+		if cnt[ilk] > 0:
+			desc.append('%d %s' % (cnt[ilk], ilk))
+	html += '<div class="stamp">%s (%d tests: %s)</div>\n' % (folder, len(testruns), ', '.join(desc))
 	th = '\t<th>{0}</th>\n'
 	td = '\t<td>{0}</td>\n'
+	tdh = '\t<td{1}>{0}</td>\n'
 	tdlink = '\t<td><a href="{0}">html</a></td>\n'
 
 	# table header
 	html += '<table class="summary">\n<tr>\n' + th.format('#') +\
 		th.format('Mode') + th.format('Host') + th.format('Kernel') +\
-		th.format('Test Time') + th.format('Suspend') + th.format('Resume') +\
-		th.format('Detail') + '</tr>\n'
+		th.format('Test Time') + th.format('Result') + th.format('Issues') +\
+		th.format('Suspend') + th.format('Resume') + th.format('Detail') + '</tr>\n'
 
-	# test data, 1 row per test
-	avg = '<tr class="avg"><td></td><td></td><td></td><td></td>'+\
-		'<td>Average of {0} {1} tests</td><td>{2}</td><td>{3}</td><td></td></tr>\n'
-	sTimeAvg = rTimeAvg = 0.0
-	mode = ''
-	num = 0
-	for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])):
-		if mode != data['mode']:
-			# test average line
-			if(num > 0):
-				sTimeAvg /= (num - 1)
-				rTimeAvg /= (num - 1)
-				html += avg.format('%d' % (num - 1), mode,
-					'%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg)
-			sTimeAvg = rTimeAvg = 0.0
-			mode = data['mode']
-			num = 1
-		# alternate row color
-		if num % 2 == 1:
-			html += '<tr class="alt">\n'
+	# export list into html
+	head = '<tr class="head"><td>{0}</td><td>{1}</td>'+\
+		'<td colspan=8 class="sus">Suspend Avg={2} '+\
+		'<span class=minval><a href="#s{10}min">Min={3}</a></span> '+\
+		'<span class=medval><a href="#s{10}med">Med={4}</a></span> '+\
+		'<span class=maxval><a href="#s{10}max">Max={5}</a></span> '+\
+		'Resume Avg={6} '+\
+		'<span class=minval><a href="#r{10}min">Min={7}</a></span> '+\
+		'<span class=medval><a href="#r{10}med">Med={8}</a></span> '+\
+		'<span class=maxval><a href="#r{10}max">Max={9}</a></span></td>'+\
+		'</tr>\n'
+	headnone = '<tr class="head"><td>{0}</td><td>{1}</td><td colspan=8></td></tr>\n'
+	for mode in list:
+		# header line for each suspend mode
+		num = 0
+		tAvg, tMin, tMax, tMed = list[mode]['avg'], list[mode]['min'],\
+			list[mode]['max'], list[mode]['med']
+		count = len(list[mode]['data'])
+		if 'idx' in list[mode]:
+			iMin, iMed, iMax = list[mode]['idx']
+			html += head.format('%d' % count, mode.upper(),
+				'%.3f' % tAvg[0], '%.3f' % tMin[0], '%.3f' % tMed[0], '%.3f' % tMax[0],
+				'%.3f' % tAvg[1], '%.3f' % tMin[1], '%.3f' % tMed[1], '%.3f' % tMax[1],
+				mode.lower()
+			)
 		else:
-			html += '<tr>\n'
-		html += td.format("%d" % num)
-		num += 1
-		# basic info
-		for item in ['mode', 'host', 'kernel', 'time']:
-			val = "unknown"
-			if(item in data):
-				val = data[item]
-			html += td.format(val)
-		# suspend time
-		sTime = float(data['suspend'])
-		sTimeAvg += sTime
-		html += td.format('%.3f ms' % sTime)
-		# resume time
-		rTime = float(data['resume'])
-		rTimeAvg += rTime
-		html += td.format('%.3f ms' % rTime)
-		# link to the output html
-		html += tdlink.format(data['url']) + '</tr>\n'
-	# last test average line
-	if(num > 0):
-		sTimeAvg /= (num - 1)
-		rTimeAvg /= (num - 1)
-		html += avg.format('%d' % (num - 1), mode,
-			'%3.3f ms' % sTimeAvg, '%3.3f ms' % rTimeAvg)
+			iMin = iMed = iMax = [-1, -1, -1]
+			html += headnone.format('%d' % count, mode.upper())
+		for d in list[mode]['data']:
+			# row classes - alternate row color
+			rcls = ['alt'] if num % 2 == 1 else []
+			if d[6] != 'pass':
+				rcls.append('notice')
+			html += '<tr class="'+(' '.join(rcls))+'">\n' if len(rcls) > 0 else '<tr>\n'
+			# figure out if the line has sus or res highlighted
+			idx = list[mode]['data'].index(d)
+			tHigh = ['', '']
+			for i in range(2):
+				tag = 's%s' % mode if i == 0 else 'r%s' % mode
+				if idx == iMin[i]:
+					tHigh[i] = ' id="%smin" class=minval title="Minimum"' % tag
+				elif idx == iMax[i]:
+					tHigh[i] = ' id="%smax" class=maxval title="Maximum"' % tag
+				elif idx == iMed[i]:
+					tHigh[i] = ' id="%smed" class=medval title="Median"' % tag
+			html += td.format("%d" % (list[mode]['data'].index(d) + 1)) # row
+			html += td.format(mode)										# mode
+			html += td.format(d[0])										# host
+			html += td.format(d[1])										# kernel
+			html += td.format(d[2])										# time
+			html += td.format(d[6])										# result
+			html += td.format(d[7])										# issues
+			html += tdh.format('%.3f ms' % d[3], tHigh[0]) if d[3] else td.format('')	# suspend
+			html += tdh.format('%.3f ms' % d[4], tHigh[1]) if d[4] else td.format('')	# resume
+			html += tdlink.format(d[5]) if d[5] else td.format('')		# url
+			html += '</tr>\n'
+			num += 1
 
 	# flush the data to file
 	hf = open(htmlfile, 'w')
@@ -3607,7 +3668,7 @@
 #	 testruns: array of Data objects from parseKernelLog or parseTraceLog
 # Output:
 #	 True if the html file was created, false if it failed
-def createHTML(testruns):
+def createHTML(testruns, testfail):
 	if len(testruns) < 1:
 		print('ERROR: Not enough test data to build a timeline')
 		return
@@ -3641,6 +3702,7 @@
 		'<td class="purple">{4}Firmware Resume: {2} ms</td>'\
 		'<td class="yellow" title="time from firmware mode to return from kernel enter_state({5}) [kernel time only]">{4}Kernel Resume: {3} ms</td>'\
 		'</tr>\n</table>\n'
+	html_fail = '<table class="testfail"><tr><td>{0}</td></tr></table>\n'
 
 	# html format variables
 	scaleH = 20
@@ -3708,6 +3770,9 @@
 					resume_time, testdesc, stitle, rtitle)
 			devtl.html += thtml
 
+	if testfail:
+		devtl.html += html_fail.format(testfail)
+
 	# time scale for potentially multiple datasets
 	t0 = testruns[0].start
 	tMax = testruns[-1].end
@@ -4006,6 +4071,7 @@
 		.blue {background:rgba(169,208,245,0.4);}\n\
 		.time1 {font:22px Arial;border:1px solid;}\n\
 		.time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\
+		.testfail {font:bold 22px Arial;color:red;border:1px dashed;}\n\
 		td {text-align:center;}\n\
 		r {color:#500000;font:15px Tahoma;}\n\
 		n {color:#505050;font:15px Tahoma;}\n\
@@ -4927,6 +4993,25 @@
 		count += 1
 	return out
 
+def getBattery():
+	p = '/sys/class/power_supply'
+	bat = dict()
+	for d in os.listdir(p):
+		type = sysvals.getVal(os.path.join(p, d, 'type')).strip().lower()
+		if type != 'battery':
+			continue
+		for v in ['status', 'energy_now', 'capacity_now']:
+			bat[v] = sysvals.getVal(os.path.join(p, d, v)).strip().lower()
+		break
+	ac = True
+	if 'status' in bat and 'discharging' in bat['status']:
+		ac = False
+	charge = 0
+	for v in ['energy_now', 'capacity_now']:
+		if v in bat and bat[v]:
+			charge = int(bat[v])
+	return (ac, charge)
+
 # Function: getFPDT
 # Description:
 #	 Read the acpi bios tables and pull out FPDT, the firmware data
@@ -5202,8 +5287,9 @@
 
 def processData(live=False):
 	print('PROCESSING DATA')
+	error = ''
 	if(sysvals.usetraceevents):
-		testruns = parseTraceLog(live)
+		testruns, error = parseTraceLog(live)
 		if sysvals.dmesgfile:
 			for data in testruns:
 				data.extractErrorInfo()
@@ -5220,15 +5306,18 @@
 		for data in testruns:
 			data.debugPrint()
 		sys.exit()
-
+	if len(testruns) < 1:
+		return (testruns, {'error': 'timeline generation failed'})
 	sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile)
-	createHTML(testruns)
+	createHTML(testruns, error)
 	print('DONE')
 	data = testruns[0]
 	stamp = data.stamp
 	stamp['suspend'], stamp['resume'] = data.getTimeValues()
 	if data.fwValid:
 		stamp['fwsuspend'], stamp['fwresume'] = data.fwSuspend, data.fwResume
+	if error:
+		stamp['error'] = error
 	return (testruns, stamp)
 
 # Function: rerunTest
@@ -5268,58 +5357,88 @@
 	sysvals.sudouser(sysvals.testdir)
 	sysvals.outputResult(stamp, n)
 
-def find_in_html(html, strs, div=False):
-	for str in strs:
-		l = len(str)
-		i = html.find(str)
-		if i >= 0:
+def find_in_html(html, start, end, firstonly=True):
+	n, out = 0, []
+	while n < len(html):
+		m = re.search(start, html[n:])
+		if not m:
 			break
-	if i < 0:
+		i = m.end()
+		m = re.search(end, html[n+i:])
+		if not m:
+			break
+		j = m.start()
+		str = html[n+i:n+i+j]
+		if end == 'ms':
+			num = re.search(r'[-+]?\d*\.\d+|\d+', str)
+			str = num.group() if num else 'NaN'
+		if firstonly:
+			return str
+		out.append(str)
+		n += i+j
+	if firstonly:
 		return ''
-	if not div:
-		return re.search(r'[-+]?\d*\.\d+|\d+', html[i+l:i+l+50]).group()
-	n = html[i+l:].find('</div>')
-	if n < 0:
-		return ''
-	return html[i+l:i+l+n]
+	return out
 
 # Function: runSummary
 # Description:
 #	 create a summary of tests in a sub-directory
-def runSummary(subdir, local=True):
+def runSummary(subdir, local=True, genhtml=False):
 	inpath = os.path.abspath(subdir)
 	outpath = inpath
 	if local:
 		outpath = os.path.abspath('.')
 	print('Generating a summary of folder "%s"' % inpath)
+	if genhtml:
+		for dirname, dirnames, filenames in os.walk(subdir):
+			sysvals.dmesgfile = sysvals.ftracefile = sysvals.htmlfile = ''
+			for filename in filenames:
+				if(re.match('.*_dmesg.txt', filename)):
+					sysvals.dmesgfile = os.path.join(dirname, filename)
+				elif(re.match('.*_ftrace.txt', filename)):
+					sysvals.ftracefile = os.path.join(dirname, filename)
+			sysvals.setOutputFile()
+			if sysvals.ftracefile and sysvals.htmlfile and \
+				not os.path.exists(sysvals.htmlfile):
+				print('FTRACE: %s' % sysvals.ftracefile)
+				if sysvals.dmesgfile:
+					print('DMESG : %s' % sysvals.dmesgfile)
+				rerunTest()
 	testruns = []
 	for dirname, dirnames, filenames in os.walk(subdir):
 		for filename in filenames:
 			if(not re.match('.*.html', filename)):
 				continue
 			file = os.path.join(dirname, filename)
-			html = open(file, 'r').read(10000)
-			suspend = find_in_html(html,
-				['Kernel Suspend: ', 'Kernel Suspend Time: '])
-			resume = find_in_html(html,
-				['Kernel Resume: ', 'Kernel Resume Time: '])
-			line = find_in_html(html, ['<div class="stamp">'], True)
+			html = open(file, 'r').read()
+			suspend = find_in_html(html, 'Kernel Suspend', 'ms')
+			resume = find_in_html(html, 'Kernel Resume', 'ms')
+			line = find_in_html(html, '<div class="stamp">', '</div>')
 			stmp = line.split()
-			if not suspend or not resume or len(stmp) < 4:
+			if not suspend or not resume or len(stmp) != 8:
 				continue
+			try:
+				dt = datetime.strptime(' '.join(stmp[3:]), '%B %d %Y, %I:%M:%S %p')
+			except:
+				continue
+			tstr = dt.strftime('%Y/%m/%d %H:%M:%S')
+			error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>')
+			result = 'fail' if error else 'pass'
+			ilist = []
+			e = find_in_html(html, 'class="err"[\w=":;\.%\- ]*>', '&rarr;</div>', False)
+			for i in list(set(e)):
+				ilist.append('%sx%d' % (i, e.count(i)) if e.count(i) > 1 else i)
 			data = {
+				'mode': stmp[2],
 				'host': stmp[0],
 				'kernel': stmp[1],
-				'mode': stmp[2],
-				'time': string.join(stmp[3:], ' '),
+				'time': tstr,
+				'result': result,
+				'issues': ','.join(ilist),
 				'suspend': suspend,
 				'resume': resume,
 				'url': os.path.relpath(file, outpath),
 			}
-			if len(stmp) == 7:
-				data['kernel'] = 'unknown'
-				data['mode'] = stmp[1]
-				data['time'] = string.join(stmp[2:], ' ')
 			testruns.append(data)
 	outfile = os.path.join(outpath, 'summary.html')
 	print('Summary file: %s' % outfile)
@@ -5609,11 +5728,12 @@
 	print('   -modes       List available suspend modes')
 	print('   -status      Test to see if the system is enabled to run this tool')
 	print('   -fpdt        Print out the contents of the ACPI Firmware Performance Data Table')
+	print('   -battery     Print out battery info (if available)')
 	print('   -sysinfo     Print out system info extracted from BIOS')
 	print('   -devinfo     Print out the pm settings of all devices which support runtime suspend')
 	print('   -flist       Print the list of functions currently being captured in ftrace')
 	print('   -flistall    Print all functions capable of being captured in ftrace')
-	print('   -summary directory  Create a summary of all test in this dir')
+	print('   -summary dir Create a summary of tests in this dir [-genhtml builds missing html]')
 	print('  [redo]')
 	print('   -ftrace ftracefile  Create HTML output using ftrace input (used with -dmesg)')
 	print('   -dmesg dmesgfile    Create HTML output using dmesg (used with -ftrace)')
@@ -5623,8 +5743,9 @@
 # ----------------- MAIN --------------------
 # exec start (skipped if script is loaded as library)
 if __name__ == '__main__':
+	genhtml = False
 	cmd = ''
-	simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status']
+	simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status', '-battery']
 	if '-f' in sys.argv:
 		sysvals.cgskip = sysvals.configFile('cgskip.txt')
 	# loop through the command line arguments
@@ -5660,6 +5781,8 @@
 			sysvals.skiphtml = True
 		elif(arg == '-cgdump'):
 			sysvals.cgdump = True
+		elif(arg == '-genhtml'):
+			genhtml = True
 		elif(arg == '-addlogs'):
 			sysvals.dmesglog = sysvals.ftracelog = True
 		elif(arg == '-verbose'):
@@ -5856,6 +5979,8 @@
 			statusCheck(True)
 		elif(cmd == 'fpdt'):
 			getFPDT(True)
+		elif(cmd == 'battery'):
+			print 'AC Connect: %s\nCharge: %d' % getBattery()
 		elif(cmd == 'sysinfo'):
 			sysvals.printSystemInfo(True)
 		elif(cmd == 'devinfo'):
@@ -5867,7 +5992,7 @@
 		elif(cmd == 'flistall'):
 			sysvals.getFtraceFilterFunctions(False)
 		elif(cmd == 'summary'):
-			runSummary(sysvals.outdir, True)
+			runSummary(sysvals.outdir, True, genhtml)
 		sys.exit()
 
 	# if instructed, re-analyze existing data files
@@ -5920,7 +6045,7 @@
 			print('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count']))
 			sysvals.logmsg = ''
 		if not sysvals.skiphtml:
-			runSummary(sysvals.outdir, False)
+			runSummary(sysvals.outdir, False, False)
 		sysvals.sudouser(sysvals.outdir)
 	else:
 		if sysvals.outdir:
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index 29f50d4..84e2b64 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -28,6 +28,7 @@
 import os
 import time
 import re
+import signal
 import sys
 import getopt
 import Gnuplot
@@ -78,11 +79,12 @@
     print('    Or')
     print('      ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>')
     print('    To generate trace file, parse and plot, use (sudo required):')
-    print('      sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name>')
+    print('      sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>')
     print('    Or')
-    print('      sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name>')
+    print('      sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>')
     print('    Optional argument:')
-    print('      cpus:  comma separated list of CPUs')
+    print('      cpus:   comma separated list of CPUs')
+    print('      kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240')
     print('  Output:')
     print('    If not already present, creates a "results/test_name" folder in the current working directory with:')
     print('      cpu.csv - comma seperated values file with trace contents and some additional calculations.')
@@ -379,7 +381,7 @@
         f_handle.close()
     except:
         print('IO error clearing trace file ')
-        quit()
+        sys.exit(2)
 
 def enable_trace():
     """ Enable trace """
@@ -389,7 +391,7 @@
                  , 'w').write("1")
     except:
         print('IO error enabling trace ')
-        quit()
+        sys.exit(2)
 
 def disable_trace():
     """ Disable trace """
@@ -399,17 +401,17 @@
                  , 'w').write("0")
     except:
         print('IO error disabling trace ')
-        quit()
+        sys.exit(2)
 
 def set_trace_buffer_size():
     """ Set trace buffer size """
 
     try:
-       open('/sys/kernel/debug/tracing/buffer_size_kb'
-                 , 'w').write("10240")
+       with open('/sys/kernel/debug/tracing/buffer_size_kb', 'w') as fp:
+          fp.write(memory)
     except:
-        print('IO error setting trace buffer size ')
-        quit()
+       print('IO error setting trace buffer size ')
+       sys.exit(2)
 
 def free_trace_buffer():
     """ Free the trace buffer memory """
@@ -418,8 +420,8 @@
        open('/sys/kernel/debug/tracing/buffer_size_kb'
                  , 'w').write("1")
     except:
-        print('IO error setting trace buffer size ')
-        quit()
+        print('IO error freeing trace buffer ')
+        sys.exit(2)
 
 def read_trace_data(filename):
     """ Read and parse trace data """
@@ -431,7 +433,7 @@
         data = open(filename, 'r').read()
     except:
         print('Error opening ', filename)
-        quit()
+        sys.exit(2)
 
     for line in data.splitlines():
         search_obj = \
@@ -489,10 +491,22 @@
 # Now seperate the main overall csv file into per CPU csv files.
     split_csv()
 
+def signal_handler(signal, frame):
+    print(' SIGINT: Forcing cleanup before exit.')
+    if interval:
+        disable_trace()
+        clear_trace_file()
+        # Free the memory
+        free_trace_buffer()
+        sys.exit(0)
+
+signal.signal(signal.SIGINT, signal_handler)
+
 interval = ""
 filename = ""
 cpu_list = ""
 testname = ""
+memory = "10240"
 graph_data_present = False;
 
 valid1 = False
@@ -501,7 +515,7 @@
 cpu_mask = zeros((MAX_CPUS,), dtype=int)
 
 try:
-    opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:",["help","trace_file=","interval=","cpu=","name="])
+    opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
 except getopt.GetoptError:
     print_help()
     sys.exit(2)
@@ -521,6 +535,8 @@
     elif opt in ("-n", "--name"):
         valid2 = True
         testname = arg
+    elif opt in ("-m", "--memory"):
+        memory = arg
 
 if not (valid1 and valid2):
     print_help()
@@ -569,6 +585,11 @@
 
 read_trace_data(filename)
 
+clear_trace_file()
+# Free the memory
+if interval:
+    free_trace_buffer()
+
 if graph_data_present == False:
     print('No valid data to plot')
     sys.exit(2)
@@ -593,9 +614,4 @@
     for f in files:
         fix_ownership(f)
 
-clear_trace_file()
-# Free the memory
-if interval:
-    free_trace_buffer()
-
 os.chdir('../../')
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index a9bc914..2ab25aa 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -25,4 +25,4 @@
 	install -d  $(DESTDIR)$(PREFIX)/bin
 	install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat
 	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
-	install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8
+	install -m 644 turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index ccf2a69..ca9ef70 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -54,9 +54,12 @@
 .PP
 \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set.  If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed.  Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed.  Otherwise, the system summary plus the specified set of CPUs are printed.  The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44
 .PP
-\fB--hide column\fP do not show the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--hide sysfs" to hide the sysfs statistics columns as a group.
+\fB--hide column\fP do not show the specified built-in columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--hide sysfs" to hide the sysfs statistics columns as a group.
 .PP
-\fB--show column\fP show only the specified columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--show sysfs" to show the sysfs statistics columns as a group.
+\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default.  Currently the only built-in counters disabled by default are "usec" and "Time_Of_Day_Seconds".
+The column name "all" can be used to enable all disabled-by-default built-in counters.
+.PP
+\fB--show column\fP show only the specified built-in columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--show sysfs" to show the sysfs statistics columns as a group.
 .PP
 \fB--Dump\fP displays the raw counter values.
 .PP
@@ -64,6 +67,8 @@
 .PP
 \fB--interval seconds\fP overrides the default 5.0 second measurement interval.
 .PP
+\fB--num_iterations num\fP number of the measurement iterations.
+.PP
 \fB--out output_file\fP turbostat output is written to the specified output_file.
 The file is truncated if it already exists, and it is created if it does not exist.
 .PP
@@ -86,6 +91,8 @@
 The system configuration dump (if --quiet is not used) is followed by statistics.  The first row of the statistics labels the content of each column (below).  The second row of statistics is the system summary line.  The system summary line has a '-' in the columns for the Package, Core, and CPU.  The contents of the system summary line depends on the type of column.  Columns that count items (eg. IRQ) show the sum across all CPUs in the system.  Columns that show a percentage show the average across all CPUs in the system.  Columns that dump raw MSR values simply show 0 in the summary.  After the system summary row, each row describes a specific Package/Core/CPU.  Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
 .SH COLUMN DESCRIPTIONS
 .nf
+\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any.  This counter is disabled by default, and is enabled with "--enable usec", or --debug.  On the summary row, usec refers to the total elapsed time to collect the counters on all cpus.
+\fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected.  This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug".  On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU.
 \fBCore\fP processor core number.  Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
 \fBCPU\fP Linux CPU (logical processor) number.  Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
 \fBPackage\fP processor package number -- not present on systems with a single processor package.
@@ -262,6 +269,21 @@
 
 .fi
 
+.SH INPUT
+
+For interval-mode, turbostat will immediately end the current interval
+when it sees a newline on standard input.
+turbostat will then start the next interval.
+Control-C will be send a SIGINT to turbostat,
+which will immediately abort the program with no further processing.
+.SH SIGNALS
+
+SIGINT will interrupt interval-mode.
+The end-of-interval data will be collected and displayed before turbostat exits.
+
+SIGUSR1 will end current interval,
+end-of-interval data will be collected and displayed before turbostat
+starts a new interval.
 .SH NOTES
 
 .B "turbostat "
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index bd9c6b3..d6cff30 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -29,6 +29,7 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
+#include <sys/select.h>
 #include <sys/resource.h>
 #include <fcntl.h>
 #include <signal.h>
@@ -47,9 +48,13 @@
 char *proc_stat = "/proc/stat";
 FILE *outf;
 int *fd_percpu;
+struct timeval interval_tv = {5, 0};
 struct timespec interval_ts = {5, 0};
+struct timespec one_msec = {0, 1000000};
+unsigned int num_iterations;
 unsigned int debug;
 unsigned int quiet;
+unsigned int shown;
 unsigned int sums_need_wide_columns;
 unsigned int rapl_joules;
 unsigned int summary_only;
@@ -58,6 +63,7 @@
 unsigned int do_snb_cstates;
 unsigned int do_knl_cstates;
 unsigned int do_slm_cstates;
+unsigned int do_cnl_cstates;
 unsigned int use_c1_residency_msr;
 unsigned int has_aperf;
 unsigned int has_epb;
@@ -80,6 +86,8 @@
 unsigned int do_dts;
 unsigned int do_ptm;
 unsigned long long  gfx_cur_rc6_ms;
+unsigned long long cpuidle_cur_cpu_lpi_us;
+unsigned long long cpuidle_cur_sys_lpi_us;
 unsigned int gfx_cur_mhz;
 unsigned int tcc_activation_temp;
 unsigned int tcc_activation_temp_override;
@@ -87,6 +95,7 @@
 double rapl_dram_energy_units, rapl_energy_units;
 double rapl_joule_counter_range;
 unsigned int do_core_perf_limit_reasons;
+unsigned int has_automatic_cstate_conversion;
 unsigned int do_gfx_perf_limit_reasons;
 unsigned int do_ring_perf_limit_reasons;
 unsigned int crystal_hz;
@@ -147,7 +156,9 @@
 #define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
 cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
 size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
-#define MAX_ADDED_COUNTERS 16
+#define MAX_ADDED_COUNTERS 8
+#define MAX_ADDED_THREAD_COUNTERS 24
+#define BITMASK_SIZE 32
 
 struct thread_data {
 	struct timeval tv_begin;
@@ -162,7 +173,7 @@
 	unsigned int flags;
 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
-	unsigned long long counter[MAX_ADDED_COUNTERS];
+	unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
 } *thread_even, *thread_odd;
 
 struct core_data {
@@ -183,6 +194,8 @@
 	unsigned long long pc8;
 	unsigned long long pc9;
 	unsigned long long pc10;
+	unsigned long long cpu_lpi;
+	unsigned long long sys_lpi;
 	unsigned long long pkg_wtd_core_c0;
 	unsigned long long pkg_any_core_c0;
 	unsigned long long pkg_any_gfxe_c0;
@@ -203,12 +216,21 @@
 #define ODD_COUNTERS thread_odd, core_odd, package_odd
 #define EVEN_COUNTERS thread_even, core_even, package_even
 
-#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \
-	(thread_base + (pkg_no) * topo.num_cores_per_pkg * \
-		topo.num_threads_per_core + \
-		(core_no) * topo.num_threads_per_core + (thread_no))
-#define GET_CORE(core_base, core_no, pkg_no) \
-	(core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
+#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no)	      \
+	((thread_base) +						      \
+	 ((pkg_no) *							      \
+	  topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
+	 ((node_no) * topo.cores_per_node * topo.threads_per_core) +	      \
+	 ((core_no) * topo.threads_per_core) +				      \
+	 (thread_no))
+
+#define GET_CORE(core_base, core_no, node_no, pkg_no)			\
+	((core_base) +							\
+	 ((pkg_no) *  topo.nodes_per_pkg * topo.cores_per_node) +	\
+	 ((node_no) * topo.cores_per_node) +				\
+	 (core_no))
+
+
 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
 
 enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
@@ -244,14 +266,25 @@
 	struct pkg_data packages;
 } average;
 
+struct cpu_topology {
+	int physical_package_id;
+	int logical_cpu_id;
+	int physical_node_id;
+	int logical_node_id;	/* 0-based count within the package */
+	int physical_core_id;
+	int thread_id;
+	cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
+} *cpus;
 
 struct topo_params {
 	int num_packages;
 	int num_cpus;
 	int num_cores;
 	int max_cpu_num;
-	int num_cores_per_pkg;
-	int num_threads_per_core;
+	int max_node_num;
+	int nodes_per_pkg;
+	int cores_per_node;
+	int threads_per_core;
 } topo;
 
 struct timeval tv_even, tv_odd, tv_delta;
@@ -273,27 +306,33 @@
 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
 	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
 {
-	int retval, pkg_no, core_no, thread_no;
+	int retval, pkg_no, core_no, thread_no, node_no;
 
 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
-		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
-			for (thread_no = 0; thread_no <
-				topo.num_threads_per_core; ++thread_no) {
-				struct thread_data *t;
-				struct core_data *c;
-				struct pkg_data *p;
+		for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
+			for (node_no = 0; node_no < topo.nodes_per_pkg;
+			     node_no++) {
+				for (thread_no = 0; thread_no <
+					topo.threads_per_core; ++thread_no) {
+					struct thread_data *t;
+					struct core_data *c;
+					struct pkg_data *p;
 
-				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
+					t = GET_THREAD(thread_base, thread_no,
+						       core_no, node_no,
+						       pkg_no);
 
-				if (cpu_is_not_present(t->cpu_id))
-					continue;
+					if (cpu_is_not_present(t->cpu_id))
+						continue;
 
-				c = GET_CORE(core_base, core_no, pkg_no);
-				p = GET_PKG(pkg_base, pkg_no);
+					c = GET_CORE(core_base, core_no,
+						     node_no, pkg_no);
+					p = GET_PKG(pkg_base, pkg_no);
 
-				retval = func(t, c, p);
-				if (retval)
-					return retval;
+					retval = func(t, c, p);
+					if (retval)
+						return retval;
+				}
 			}
 		}
 	}
@@ -346,6 +385,8 @@
  * Thus, strings that are proper sub-sets must follow their more specific peers.
  */
 struct msr_counter bic[] = {
+	{ 0x0, "usec" },
+	{ 0x0, "Time_Of_Day_Seconds" },
 	{ 0x0, "Package" },
 	{ 0x0, "Avg_MHz" },
 	{ 0x0, "Bzy_MHz" },
@@ -369,7 +410,9 @@
 	{ 0x0, "Pkg%pc7" },
 	{ 0x0, "Pkg%pc8" },
 	{ 0x0, "Pkg%pc9" },
-	{ 0x0, "Pkg%pc10" },
+	{ 0x0, "Pk%pc10" },
+	{ 0x0, "CPU%LPI" },
+	{ 0x0, "SYS%LPI" },
 	{ 0x0, "PkgWatt" },
 	{ 0x0, "CorWatt" },
 	{ 0x0, "GFXWatt" },
@@ -389,62 +432,72 @@
 	{ 0x0, "Any%C0" },
 	{ 0x0, "GFX%C0" },
 	{ 0x0, "CPUGFX%" },
+	{ 0x0, "Node%" },
 };
 
 
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
-#define	BIC_Package	(1ULL << 0)
-#define	BIC_Avg_MHz	(1ULL << 1)
-#define	BIC_Bzy_MHz	(1ULL << 2)
-#define	BIC_TSC_MHz	(1ULL << 3)
-#define	BIC_IRQ		(1ULL << 4)
-#define	BIC_SMI		(1ULL << 5)
-#define	BIC_Busy	(1ULL << 6)
-#define	BIC_CPU_c1	(1ULL << 7)
-#define	BIC_CPU_c3	(1ULL << 8)
-#define	BIC_CPU_c6	(1ULL << 9)
-#define	BIC_CPU_c7	(1ULL << 10)
-#define	BIC_ThreadC	(1ULL << 11)
-#define	BIC_CoreTmp	(1ULL << 12)
-#define	BIC_CoreCnt	(1ULL << 13)
-#define	BIC_PkgTmp	(1ULL << 14)
-#define	BIC_GFX_rc6	(1ULL << 15)
-#define	BIC_GFXMHz	(1ULL << 16)
-#define	BIC_Pkgpc2	(1ULL << 17)
-#define	BIC_Pkgpc3	(1ULL << 18)
-#define	BIC_Pkgpc6	(1ULL << 19)
-#define	BIC_Pkgpc7	(1ULL << 20)
-#define	BIC_Pkgpc8	(1ULL << 21)
-#define	BIC_Pkgpc9	(1ULL << 22)
-#define	BIC_Pkgpc10	(1ULL << 23)
-#define	BIC_PkgWatt	(1ULL << 24)
-#define	BIC_CorWatt	(1ULL << 25)
-#define	BIC_GFXWatt	(1ULL << 26)
-#define	BIC_PkgCnt	(1ULL << 27)
-#define	BIC_RAMWatt	(1ULL << 28)
-#define	BIC_PKG__	(1ULL << 29)
-#define	BIC_RAM__	(1ULL << 30)
-#define	BIC_Pkg_J	(1ULL << 31)
-#define	BIC_Cor_J	(1ULL << 32)
-#define	BIC_GFX_J	(1ULL << 33)
-#define	BIC_RAM_J	(1ULL << 34)
-#define	BIC_Core	(1ULL << 35)
-#define	BIC_CPU		(1ULL << 36)
-#define	BIC_Mod_c6	(1ULL << 37)
-#define	BIC_sysfs	(1ULL << 38)
-#define	BIC_Totl_c0	(1ULL << 39)
-#define	BIC_Any_c0	(1ULL << 40)
-#define	BIC_GFX_c0	(1ULL << 41)
-#define	BIC_CPUGFX	(1ULL << 42)
+#define	BIC_USEC	(1ULL << 0)
+#define	BIC_TOD		(1ULL << 1)
+#define	BIC_Package	(1ULL << 2)
+#define	BIC_Avg_MHz	(1ULL << 3)
+#define	BIC_Bzy_MHz	(1ULL << 4)
+#define	BIC_TSC_MHz	(1ULL << 5)
+#define	BIC_IRQ		(1ULL << 6)
+#define	BIC_SMI		(1ULL << 7)
+#define	BIC_Busy	(1ULL << 8)
+#define	BIC_CPU_c1	(1ULL << 9)
+#define	BIC_CPU_c3	(1ULL << 10)
+#define	BIC_CPU_c6	(1ULL << 11)
+#define	BIC_CPU_c7	(1ULL << 12)
+#define	BIC_ThreadC	(1ULL << 13)
+#define	BIC_CoreTmp	(1ULL << 14)
+#define	BIC_CoreCnt	(1ULL << 15)
+#define	BIC_PkgTmp	(1ULL << 16)
+#define	BIC_GFX_rc6	(1ULL << 17)
+#define	BIC_GFXMHz	(1ULL << 18)
+#define	BIC_Pkgpc2	(1ULL << 19)
+#define	BIC_Pkgpc3	(1ULL << 20)
+#define	BIC_Pkgpc6	(1ULL << 21)
+#define	BIC_Pkgpc7	(1ULL << 22)
+#define	BIC_Pkgpc8	(1ULL << 23)
+#define	BIC_Pkgpc9	(1ULL << 24)
+#define	BIC_Pkgpc10	(1ULL << 25)
+#define BIC_CPU_LPI	(1ULL << 26)
+#define BIC_SYS_LPI	(1ULL << 27)
+#define	BIC_PkgWatt	(1ULL << 26)
+#define	BIC_CorWatt	(1ULL << 27)
+#define	BIC_GFXWatt	(1ULL << 28)
+#define	BIC_PkgCnt	(1ULL << 29)
+#define	BIC_RAMWatt	(1ULL << 30)
+#define	BIC_PKG__	(1ULL << 31)
+#define	BIC_RAM__	(1ULL << 32)
+#define	BIC_Pkg_J	(1ULL << 33)
+#define	BIC_Cor_J	(1ULL << 34)
+#define	BIC_GFX_J	(1ULL << 35)
+#define	BIC_RAM_J	(1ULL << 36)
+#define	BIC_Core	(1ULL << 37)
+#define	BIC_CPU		(1ULL << 38)
+#define	BIC_Mod_c6	(1ULL << 39)
+#define	BIC_sysfs	(1ULL << 40)
+#define	BIC_Totl_c0	(1ULL << 41)
+#define	BIC_Any_c0	(1ULL << 42)
+#define	BIC_GFX_c0	(1ULL << 43)
+#define	BIC_CPUGFX	(1ULL << 44)
+#define	BIC_Node	(1ULL << 45)
 
-unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL;
-unsigned long long bic_present = BIC_sysfs;
+#define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD)
+
+unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs;
 
 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
+#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
 
+
 #define MAX_DEFERRED 16
 char *deferred_skip_names[MAX_DEFERRED];
 int deferred_skip_index;
@@ -469,9 +522,10 @@
 	"--cpu	cpu-set	limit output to summary plus cpu-set:\n"
 	"		{core | package | j,k,l..m,n-p }\n"
 	"--quiet	skip decoding system configuration header\n"
-	"--interval sec	Override default 5-second measurement interval\n"
+	"--interval sec.subsec	Override default 5-second measurement interval\n"
 	"--help		print this help message\n"
 	"--list		list column headers only\n"
+	"--num_iterations num   number of the measurement iterations\n"
 	"--out file	create or truncate \"file\" for all output\n"
 	"--version	print version information\n"
 	"\n"
@@ -496,6 +550,9 @@
 		if (comma)
 			*comma = '\0';
 
+		if (!strcmp(name_list, "all"))
+			return ~0;
+
 		for (i = 0; i < MAX_BIC; ++i) {
 			if (!strcmp(name_list, bic[i].name)) {
 				retval |= (1ULL << i);
@@ -532,10 +589,14 @@
 	struct msr_counter *mp;
 	int printed = 0;
 
-	if (debug)
-		outp += sprintf(outp, "usec %s", delim);
+	if (DO_BIC(BIC_USEC))
+		outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_TOD))
+		outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Package))
 		outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_Node))
+		outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Core))
 		outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU))
@@ -576,7 +637,7 @@
 
 	if (DO_BIC(BIC_CPU_c1))
 		outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
-	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
+	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
 		outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU_c6))
 		outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
@@ -635,6 +696,10 @@
 		outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Pkgpc10))
 		outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_CPU_LPI))
+		outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_SYS_LPI))
+		outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
 
 	if (do_rapl && !rapl_joules) {
 		if (DO_BIC(BIC_PkgWatt))
@@ -739,6 +804,9 @@
 		outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
 		outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
 		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
+		outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
+		outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
+		outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
 		outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
 		outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
 		outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
@@ -786,7 +854,7 @@
 		(cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
 		return 0;
 
-	if (debug) {
+	if (DO_BIC(BIC_USEC)) {
 		/* on each row, print how many usec each timestamp took to gather */
 		struct timeval tv;
 
@@ -794,6 +862,10 @@
 		outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
 	}
 
+	/* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
+	if (DO_BIC(BIC_TOD))
+		outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
+
 	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
 
 	tsc = t->tsc * tsc_tweak;
@@ -802,6 +874,8 @@
 	if (t == &average.threads) {
 		if (DO_BIC(BIC_Package))
 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+		if (DO_BIC(BIC_Node))
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_Core))
 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_CPU))
@@ -813,6 +887,15 @@
 			else
 				outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		}
+		if (DO_BIC(BIC_Node)) {
+			if (t)
+				outp += sprintf(outp, "%s%d",
+						(printed++ ? delim : ""),
+					      cpus[t->cpu_id].physical_node_id);
+			else
+				outp += sprintf(outp, "%s-",
+						(printed++ ? delim : ""));
+		}
 		if (DO_BIC(BIC_Core)) {
 			if (c)
 				outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
@@ -882,7 +965,7 @@
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		goto done;
 
-	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates)
+	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
 	if (DO_BIC(BIC_CPU_c6))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
@@ -959,6 +1042,11 @@
 	if (DO_BIC(BIC_Pkgpc10))
 		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
 
+	if (DO_BIC(BIC_CPU_LPI))
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+	if (DO_BIC(BIC_SYS_LPI))
+		outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+
 	/*
  	 * If measurement interval exceeds minimum RAPL Joule Counter range,
  	 * indicate that results are suspect by printing "**" in fraction place.
@@ -1006,7 +1094,8 @@
 	}
 
 done:
-	outp += sprintf(outp, "\n");
+	if (*(outp - 1) != '\n')
+		outp += sprintf(outp, "\n");
 
 	return 0;
 }
@@ -1083,6 +1172,8 @@
 	old->pc8 = new->pc8 - old->pc8;
 	old->pc9 = new->pc9 - old->pc9;
 	old->pc10 = new->pc10 - old->pc10;
+	old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
+	old->sys_lpi = new->sys_lpi - old->sys_lpi;
 	old->pkg_temp_c = new->pkg_temp_c;
 
 	/* flag an error when rc6 counter resets/wraps */
@@ -1140,6 +1231,15 @@
 	int i;
 	struct msr_counter *mp;
 
+	/*
+	 * the timestamps from start of measurement interval are in "old"
+	 * the timestamp from end of measurement interval are in "new"
+	 * over-write old w/ new so we can print end of interval values
+	 */
+
+	old->tv_begin = new->tv_begin;
+	old->tv_end = new->tv_end;
+
 	old->tsc = new->tsc - old->tsc;
 
 	/* check for TSC < 1 Mcycles over interval */
@@ -1228,6 +1328,11 @@
 	int i;
 	struct msr_counter  *mp;
 
+	t->tv_begin.tv_sec = 0;
+	t->tv_begin.tv_usec = 0;
+	t->tv_end.tv_sec = 0;
+	t->tv_end.tv_usec = 0;
+
 	t->tsc = 0;
 	t->aperf = 0;
 	t->mperf = 0;
@@ -1260,6 +1365,8 @@
 	p->pc8 = 0;
 	p->pc9 = 0;
 	p->pc10 = 0;
+	p->cpu_lpi = 0;
+	p->sys_lpi = 0;
 
 	p->energy_pkg = 0;
 	p->energy_dram = 0;
@@ -1286,6 +1393,13 @@
 	int i;
 	struct msr_counter *mp;
 
+	/* remember first tv_begin */
+	if (average.threads.tv_begin.tv_sec == 0)
+		average.threads.tv_begin = t->tv_begin;
+
+	/* remember last tv_end */
+	average.threads.tv_end = t->tv_end;
+
 	average.threads.tsc += t->tsc;
 	average.threads.aperf += t->aperf;
 	average.threads.mperf += t->mperf;
@@ -1341,6 +1455,9 @@
 	average.packages.pc9 += p->pc9;
 	average.packages.pc10 += p->pc10;
 
+	average.packages.cpu_lpi = p->cpu_lpi;
+	average.packages.sys_lpi = p->sys_lpi;
+
 	average.packages.energy_pkg += p->energy_pkg;
 	average.packages.energy_dram += p->energy_dram;
 	average.packages.energy_cores += p->energy_cores;
@@ -1487,7 +1604,7 @@
 		if (get_msr(cpu, mp->msr_num, counterp))
 			return -1;
 	} else {
-		char path[128];
+		char path[128 + PATH_BYTES];
 
 		if (mp->flags & SYSFS_PERCPU) {
 			sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
@@ -1603,7 +1720,7 @@
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		goto done;
 
-	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates) {
+	if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) {
 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
 			return -6;
 	}
@@ -1684,6 +1801,11 @@
 		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
 			return -13;
 
+	if (DO_BIC(BIC_CPU_LPI))
+		p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
+	if (DO_BIC(BIC_SYS_LPI))
+		p->sys_lpi = cpuidle_cur_sys_lpi_us;
+
 	if (do_rapl & RAPL_PKG) {
 		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
 			return -13;
@@ -1769,7 +1891,7 @@
 int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
 
 
 static void
@@ -2071,12 +2193,9 @@
 
 	get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
 
-#define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
-#define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
-
 	fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
 
-	fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
+	fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
 		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
 		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
 		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
@@ -2084,6 +2203,15 @@
 		(msr & (1 << 15)) ? "" : "UN",
 		(unsigned int)msr & 0xF,
 		pkg_cstate_limit_strings[pkg_cstate_limit]);
+
+#define AUTOMATIC_CSTATE_CONVERSION		(1UL << 16)
+	if (has_automatic_cstate_conversion) {
+		fprintf(outf, ", automatic c-state conversion=%s",
+			(msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
+	}
+
+	fprintf(outf, ")\n");
+
 	return;
 }
 
@@ -2184,6 +2312,8 @@
 
 void free_all_buffers(void)
 {
+	int i;
+
 	CPU_FREE(cpu_present_set);
 	cpu_present_set = NULL;
 	cpu_present_setsize = 0;
@@ -2216,6 +2346,12 @@
 
 	free(irq_column_2_cpu);
 	free(irqs_per_cpu);
+
+	for (i = 0; i <= topo.max_cpu_num; ++i) {
+		if (cpus[i].put_ids)
+			CPU_FREE(cpus[i].put_ids);
+	}
+	free(cpus);
 }
 
 
@@ -2240,44 +2376,6 @@
 }
 
 /*
- * get_cpu_position_in_core(cpu)
- * return the position of the CPU among its HT siblings in the core
- * return -1 if the sibling is not in list
- */
-int get_cpu_position_in_core(int cpu)
-{
-	char path[64];
-	FILE *filep;
-	int this_cpu;
-	char character;
-	int i;
-
-	sprintf(path,
-		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
-		cpu);
-	filep = fopen(path, "r");
-	if (filep == NULL) {
-		perror(path);
-		exit(1);
-	}
-
-	for (i = 0; i < topo.num_threads_per_core; i++) {
-		fscanf(filep, "%d", &this_cpu);
-		if (this_cpu == cpu) {
-			fclose(filep);
-			return i;
-		}
-
-		/* Account for no separator after last thread*/
-		if (i != (topo.num_threads_per_core - 1))
-			fscanf(filep, "%c", &character);
-	}
-
-	fclose(filep);
-	return -1;
-}
-
-/*
  * cpu_is_first_core_in_package(cpu)
  * return 1 if given CPU is 1st core in package
  */
@@ -2296,35 +2394,115 @@
 	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
 }
 
-int get_num_ht_siblings(int cpu)
+void set_node_data(void)
 {
 	char path[80];
 	FILE *filep;
-	int sib1;
-	int matches = 0;
-	char character;
-	char str[100];
-	char *ch;
+	int pkg, node, cpu;
 
-	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
-	filep = fopen_or_die(path, "r");
+	struct pkg_node_info {
+		int count;
+		int min;
+	} *pni;
 
-	/*
-	 * file format:
-	 * A ',' separated or '-' separated set of numbers
-	 * (eg 1-2 or 1,3,4,5)
-	 */
-	fscanf(filep, "%d%c\n", &sib1, &character);
-	fseek(filep, 0, SEEK_SET);
-	fgets(str, 100, filep);
-	ch = strchr(str, character);
-	while (ch != NULL) {
-		matches++;
-		ch = strchr(ch+1, character);
+	pni = calloc(topo.num_packages, sizeof(struct pkg_node_info));
+	if (!pni)
+		err(1, "calloc pkg_node_count");
+
+	for (pkg = 0; pkg < topo.num_packages; pkg++)
+		pni[pkg].min = topo.num_cpus;
+
+	for (node = 0; node <= topo.max_node_num; node++) {
+		/* find the "first" cpu in the node */
+		sprintf(path, "/sys/bus/node/devices/node%d/cpulist", node);
+		filep = fopen(path, "r");
+		if (!filep)
+			continue;
+		fscanf(filep, "%d", &cpu);
+		fclose(filep);
+
+		pkg = cpus[cpu].physical_package_id;
+		pni[pkg].count++;
+
+		if (node < pni[pkg].min)
+			pni[pkg].min = node;
 	}
 
+	for (pkg = 0; pkg < topo.num_packages; pkg++)
+		if (pni[pkg].count > topo.nodes_per_pkg)
+			topo.nodes_per_pkg = pni[0].count;
+
+	for (cpu = 0; cpu < topo.num_cpus; cpu++) {
+		pkg = cpus[cpu].physical_package_id;
+		node = cpus[cpu].physical_node_id;
+		cpus[cpu].logical_node_id = node - pni[pkg].min;
+	}
+	free(pni);
+
+}
+
+int get_physical_node_id(struct cpu_topology *thiscpu)
+{
+	char path[80];
+	FILE *filep;
+	int i;
+	int cpu = thiscpu->logical_cpu_id;
+
+	for (i = 0; i <= topo.max_cpu_num; i++) {
+		sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
+			cpu, i);
+		filep = fopen(path, "r");
+		if (!filep)
+			continue;
+		fclose(filep);
+		return i;
+	}
+	return -1;
+}
+
+int get_thread_siblings(struct cpu_topology *thiscpu)
+{
+	char path[80], character;
+	FILE *filep;
+	unsigned long map;
+	int so, shift, sib_core;
+	int cpu = thiscpu->logical_cpu_id;
+	int offset = topo.max_cpu_num + 1;
+	size_t size;
+	int thread_id = 0;
+
+	thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
+	if (thiscpu->thread_id < 0)
+		thiscpu->thread_id = thread_id++;
+	if (!thiscpu->put_ids)
+		return -1;
+
+	size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+	CPU_ZERO_S(size, thiscpu->put_ids);
+
+	sprintf(path,
+		"/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
+	filep = fopen_or_die(path, "r");
+	do {
+		offset -= BITMASK_SIZE;
+		fscanf(filep, "%lx%c", &map, &character);
+		for (shift = 0; shift < BITMASK_SIZE; shift++) {
+			if ((map >> shift) & 0x1) {
+				so = shift + offset;
+				sib_core = get_core_id(so);
+				if (sib_core == thiscpu->physical_core_id) {
+					CPU_SET_S(so, size, thiscpu->put_ids);
+					if ((so != cpu) &&
+					    (cpus[so].thread_id < 0))
+						cpus[so].thread_id =
+								    thread_id++;
+				}
+			}
+		}
+	} while (!strncmp(&character, ",", 1));
 	fclose(filep);
-	return matches+1;
+
+	return CPU_COUNT_S(size, thiscpu->put_ids);
 }
 
 /*
@@ -2339,32 +2517,42 @@
 	struct thread_data *thread_base2, struct core_data *core_base2,
 	struct pkg_data *pkg_base2)
 {
-	int retval, pkg_no, core_no, thread_no;
+	int retval, pkg_no, node_no, core_no, thread_no;
 
 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
-		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) {
-			for (thread_no = 0; thread_no <
-				topo.num_threads_per_core; ++thread_no) {
-				struct thread_data *t, *t2;
-				struct core_data *c, *c2;
-				struct pkg_data *p, *p2;
+		for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
+			for (core_no = 0; core_no < topo.cores_per_node;
+			     ++core_no) {
+				for (thread_no = 0; thread_no <
+					topo.threads_per_core; ++thread_no) {
+					struct thread_data *t, *t2;
+					struct core_data *c, *c2;
+					struct pkg_data *p, *p2;
 
-				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no);
+					t = GET_THREAD(thread_base, thread_no,
+						       core_no, node_no,
+						       pkg_no);
 
-				if (cpu_is_not_present(t->cpu_id))
-					continue;
+					if (cpu_is_not_present(t->cpu_id))
+						continue;
 
-				t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no);
+					t2 = GET_THREAD(thread_base2, thread_no,
+							core_no, node_no,
+							pkg_no);
 
-				c = GET_CORE(core_base, core_no, pkg_no);
-				c2 = GET_CORE(core_base2, core_no, pkg_no);
+					c = GET_CORE(core_base, core_no,
+						     node_no, pkg_no);
+					c2 = GET_CORE(core_base2, core_no,
+						      node_no,
+						      pkg_no);
 
-				p = GET_PKG(pkg_base, pkg_no);
-				p2 = GET_PKG(pkg_base2, pkg_no);
+					p = GET_PKG(pkg_base, pkg_no);
+					p2 = GET_PKG(pkg_base2, pkg_no);
 
-				retval = func(t, c, p, t2, c2, p2);
-				if (retval)
-					return retval;
+					retval = func(t, c, p, t2, c2, p2);
+					if (retval)
+						return retval;
+				}
 			}
 		}
 	}
@@ -2409,6 +2597,20 @@
 	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
 }
 
+void set_max_cpu_num(void)
+{
+	FILE *filep;
+	unsigned long dummy;
+
+	topo.max_cpu_num = 0;
+	filep = fopen_or_die(
+			"/sys/devices/system/cpu/cpu0/topology/thread_siblings",
+			"r");
+	while (fscanf(filep, "%lx,", &dummy) == 1)
+		topo.max_cpu_num += BITMASK_SIZE;
+	fclose(filep);
+	topo.max_cpu_num--; /* 0 based */
+}
 
 /*
  * count_cpus()
@@ -2416,10 +2618,7 @@
  */
 int count_cpus(int cpu)
 {
-	if (topo.max_cpu_num < cpu)
-		topo.max_cpu_num = cpu;
-
-	topo.num_cpus += 1;
+	topo.num_cpus++;
 	return 0;
 }
 int mark_cpu_present(int cpu)
@@ -2428,6 +2627,12 @@
 	return 0;
 }
 
+int init_thread_id(int cpu)
+{
+	cpus[cpu].thread_id = -1;
+	return 0;
+}
+
 /*
  * snapshot_proc_interrupts()
  *
@@ -2542,6 +2747,52 @@
 }
 
 /*
+ * snapshot_cpu_lpi()
+ *
+ * record snapshot of
+ * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_cpu_lpi_us(void)
+{
+	FILE *fp;
+	int retval;
+
+	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
+
+	retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
+	if (retval != 1)
+		err(1, "CPU LPI");
+
+	fclose(fp);
+
+	return 0;
+}
+/*
+ * snapshot_sys_lpi()
+ *
+ * record snapshot of
+ * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_sys_lpi_us(void)
+{
+	FILE *fp;
+	int retval;
+
+	fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
+
+	retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
+	if (retval != 1)
+		err(1, "SYS LPI");
+
+	fclose(fp);
+
+	return 0;
+}
+/*
  * snapshot /proc and /sys files
  *
  * return 1 if configuration restart needed, else return 0
@@ -2558,13 +2809,83 @@
 	if (DO_BIC(BIC_GFXMHz))
 		snapshot_gfx_mhz();
 
+	if (DO_BIC(BIC_CPU_LPI))
+		snapshot_cpu_lpi_us();
+
+	if (DO_BIC(BIC_SYS_LPI))
+		snapshot_sys_lpi_us();
+
 	return 0;
 }
 
+int exit_requested;
+
+static void signal_handler (int signal)
+{
+	switch (signal) {
+	case SIGINT:
+		exit_requested = 1;
+		if (debug)
+			fprintf(stderr, " SIGINT\n");
+		break;
+	case SIGUSR1:
+		if (debug > 1)
+			fprintf(stderr, "SIGUSR1\n");
+		break;
+	}
+	/* make sure this manually-invoked interval is at least 1ms long */
+	nanosleep(&one_msec, NULL);
+}
+
+void setup_signal_handler(void)
+{
+	struct sigaction sa;
+
+	memset(&sa, 0, sizeof(sa));
+
+	sa.sa_handler = &signal_handler;
+
+	if (sigaction(SIGINT, &sa, NULL) < 0)
+		err(1, "sigaction SIGINT");
+	if (sigaction(SIGUSR1, &sa, NULL) < 0)
+		err(1, "sigaction SIGUSR1");
+}
+
+void do_sleep(void)
+{
+	struct timeval select_timeout;
+	fd_set readfds;
+	int retval;
+
+	FD_ZERO(&readfds);
+	FD_SET(0, &readfds);
+
+	if (!isatty(fileno(stdin))) {
+		nanosleep(&interval_ts, NULL);
+		return;
+	}
+
+	select_timeout = interval_tv;
+	retval = select(1, &readfds, NULL, NULL, &select_timeout);
+
+	if (retval == 1) {
+		switch (getc(stdin)) {
+		case 'q':
+			exit_requested = 1;
+			break;
+		}
+		/* make sure this manually-invoked interval is at least 1ms long */
+		nanosleep(&one_msec, NULL);
+	}
+}
+
 void turbostat_loop()
 {
 	int retval;
 	int restarted = 0;
+	int done_iters = 0;
+
+	setup_signal_handler();
 
 restart:
 	restarted++;
@@ -2581,6 +2902,7 @@
 		goto restart;
 	}
 	restarted = 0;
+	done_iters = 0;
 	gettimeofday(&tv_even, (struct timezone *)NULL);
 
 	while (1) {
@@ -2588,7 +2910,7 @@
 			re_initialize();
 			goto restart;
 		}
-		nanosleep(&interval_ts, NULL);
+		do_sleep();
 		if (snapshot_proc_sysfs_files())
 			goto restart;
 		retval = for_all_cpus(get_counters, ODD_COUNTERS);
@@ -2607,7 +2929,11 @@
 		compute_average(EVEN_COUNTERS);
 		format_all_counters(EVEN_COUNTERS);
 		flush_output_stdout();
-		nanosleep(&interval_ts, NULL);
+		if (exit_requested)
+			break;
+		if (num_iterations && ++done_iters >= num_iterations)
+			break;
+		do_sleep();
 		if (snapshot_proc_sysfs_files())
 			goto restart;
 		retval = for_all_cpus(get_counters, EVEN_COUNTERS);
@@ -2626,6 +2952,10 @@
 		compute_average(ODD_COUNTERS);
 		format_all_counters(ODD_COUNTERS);
 		flush_output_stdout();
+		if (exit_requested)
+			break;
+		if (num_iterations && ++done_iters >= num_iterations)
+			break;
 	}
 }
 
@@ -2740,6 +3070,7 @@
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
+	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 		pkg_cstate_limits = hsw_pkg_cstate_limits;
 		has_misc_feature_control = 1;
 		break;
@@ -2945,6 +3276,7 @@
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
+	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 
 	case INTEL_FAM6_XEON_PHI_KNL:	/* Knights Landing */
@@ -3399,6 +3731,7 @@
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
+	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
 		BIC_PRESENT(BIC_PKG__);
 		BIC_PRESENT(BIC_RAM__);
@@ -3523,6 +3856,12 @@
 	}
 }
 
+void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
+{
+	if (is_skx(family, model) || is_bdx(family, model))
+		has_automatic_cstate_conversion = 1;
+}
+
 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
 	unsigned long long msr;
@@ -3728,6 +4067,7 @@
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
+	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 	case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
 	case INTEL_FAM6_ATOM_GEMINI_LAKE:
@@ -3761,6 +4101,7 @@
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
+	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 	case INTEL_FAM6_ATOM_GOLDMONT:	/* BXT */
 	case INTEL_FAM6_ATOM_GEMINI_LAKE:
 		return 1;
@@ -3786,6 +4127,7 @@
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
+	case INTEL_FAM6_CANNONLAKE_MOBILE:	/* CNL */
 		return 1;
 	}
 	return 0;
@@ -3815,6 +4157,19 @@
 	return 0;
 }
 
+int is_cnl(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
+		return 1;
+	}
+
+	return 0;
+}
+
 unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
 {
 	if (is_knl(family, model))
@@ -3947,7 +4302,7 @@
 			base_cpu, msr,
 			msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
 			msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
-			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "No-" : "",
+			msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
 			msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
 			msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
 }
@@ -4152,7 +4507,6 @@
 				case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
 					crystal_hz = 24000000;	/* 24.0 MHz */
 					break;
-				case INTEL_FAM6_SKYLAKE_X:	/* SKX */
 				case INTEL_FAM6_ATOM_DENVERTON:	/* DNV */
 					crystal_hz = 25000000;	/* 25.0 MHz */
 					break;
@@ -4253,6 +4607,7 @@
 	}
 	do_slm_cstates = is_slm(family, model);
 	do_knl_cstates  = is_knl(family, model);
+	do_cnl_cstates = is_cnl(family, model);
 
 	if (!quiet)
 		decode_misc_pwr_mgmt_msr();
@@ -4262,6 +4617,7 @@
 
 	rapl_probe(family, model);
 	perf_limit_reasons_probe(family, model);
+	automatic_cstate_conversion_probe(family, model);
 
 	if (!quiet)
 		dump_cstate_pstate_config_info(family, model);
@@ -4280,6 +4636,16 @@
 	if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
 		BIC_PRESENT(BIC_GFXMHz);
 
+	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
+		BIC_PRESENT(BIC_CPU_LPI);
+	else
+		BIC_NOT_PRESENT(BIC_CPU_LPI);
+
+	if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
+		BIC_PRESENT(BIC_SYS_LPI);
+	else
+		BIC_NOT_PRESENT(BIC_SYS_LPI);
+
 	if (!quiet)
 		decode_misc_feature_control();
 
@@ -4310,14 +4676,10 @@
 	int max_core_id = 0;
 	int max_package_id = 0;
 	int max_siblings = 0;
-	struct cpu_topology {
-		int core_id;
-		int physical_package_id;
-	} *cpus;
 
 	/* Initialize num_cpus, max_cpu_num */
+	set_max_cpu_num();
 	topo.num_cpus = 0;
-	topo.max_cpu_num = 0;
 	for_all_proc_cpus(count_cpus);
 	if (!summary_only && topo.num_cpus > 1)
 		BIC_PRESENT(BIC_CPU);
@@ -4357,6 +4719,7 @@
 	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
 	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
 
+	for_all_proc_cpus(init_thread_id);
 
 	/*
 	 * For online cpus
@@ -4370,26 +4733,45 @@
 				fprintf(outf, "cpu%d NOT PRESENT\n", i);
 			continue;
 		}
-		cpus[i].core_id = get_core_id(i);
-		if (cpus[i].core_id > max_core_id)
-			max_core_id = cpus[i].core_id;
 
+		cpus[i].logical_cpu_id = i;
+
+		/* get package information */
 		cpus[i].physical_package_id = get_physical_package_id(i);
 		if (cpus[i].physical_package_id > max_package_id)
 			max_package_id = cpus[i].physical_package_id;
 
-		siblings = get_num_ht_siblings(i);
+		/* get numa node information */
+		cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
+		if (cpus[i].physical_node_id > topo.max_node_num)
+			topo.max_node_num = cpus[i].physical_node_id;
+
+		/* get core information */
+		cpus[i].physical_core_id = get_core_id(i);
+		if (cpus[i].physical_core_id > max_core_id)
+			max_core_id = cpus[i].physical_core_id;
+
+		/* get thread information */
+		siblings = get_thread_siblings(&cpus[i]);
 		if (siblings > max_siblings)
 			max_siblings = siblings;
+		if (cpus[i].thread_id != -1)
+			topo.num_cores++;
+
 		if (debug > 1)
-			fprintf(outf, "cpu %d pkg %d core %d\n",
-				i, cpus[i].physical_package_id, cpus[i].core_id);
+			fprintf(outf,
+				"cpu %d pkg %d node %d core %d thread %d\n",
+				i, cpus[i].physical_package_id,
+				cpus[i].physical_node_id,
+				cpus[i].physical_core_id,
+				cpus[i].thread_id);
 	}
-	topo.num_cores_per_pkg = max_core_id + 1;
+
+	topo.cores_per_node = max_core_id + 1;
 	if (debug > 1)
 		fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
-			max_core_id, topo.num_cores_per_pkg);
-	if (!summary_only && topo.num_cores_per_pkg > 1)
+			max_core_id, topo.cores_per_node);
+	if (!summary_only && topo.cores_per_node > 1)
 		BIC_PRESENT(BIC_Core);
 
 	topo.num_packages = max_package_id + 1;
@@ -4399,33 +4781,38 @@
 	if (!summary_only && topo.num_packages > 1)
 		BIC_PRESENT(BIC_Package);
 
-	topo.num_threads_per_core = max_siblings;
+	set_node_data();
+	if (debug > 1)
+		fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
+	if (!summary_only && topo.nodes_per_pkg > 1)
+		BIC_PRESENT(BIC_Node);
+
+	topo.threads_per_core = max_siblings;
 	if (debug > 1)
 		fprintf(outf, "max_siblings %d\n", max_siblings);
-
-	free(cpus);
 }
 
 void
-allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
+allocate_counters(struct thread_data **t, struct core_data **c,
+		  struct pkg_data **p)
 {
 	int i;
+	int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
+			topo.num_packages;
+	int num_threads = topo.threads_per_core * num_cores;
 
-	*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
-		topo.num_packages, sizeof(struct thread_data));
+	*t = calloc(num_threads, sizeof(struct thread_data));
 	if (*t == NULL)
 		goto error;
 
-	for (i = 0; i < topo.num_threads_per_core *
-		topo.num_cores_per_pkg * topo.num_packages; i++)
+	for (i = 0; i < num_threads; i++)
 		(*t)[i].cpu_id = -1;
 
-	*c = calloc(topo.num_cores_per_pkg * topo.num_packages,
-		sizeof(struct core_data));
+	*c = calloc(num_cores, sizeof(struct core_data));
 	if (*c == NULL)
 		goto error;
 
-	for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
+	for (i = 0; i < num_cores; i++)
 		(*c)[i].core_id = -1;
 
 	*p = calloc(topo.num_packages, sizeof(struct pkg_data));
@@ -4442,47 +4829,39 @@
 /*
  * init_counter()
  *
- * set cpu_id, core_num, pkg_num
  * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
- *
- * increment topo.num_cores when 1st core in pkg seen
  */
 void init_counter(struct thread_data *thread_base, struct core_data *core_base,
-	struct pkg_data *pkg_base, int thread_num, int core_num,
-	int pkg_num, int cpu_id)
+	struct pkg_data *pkg_base, int cpu_id)
 {
+	int pkg_id = cpus[cpu_id].physical_package_id;
+	int node_id = cpus[cpu_id].logical_node_id;
+	int core_id = cpus[cpu_id].physical_core_id;
+	int thread_id = cpus[cpu_id].thread_id;
 	struct thread_data *t;
 	struct core_data *c;
 	struct pkg_data *p;
 
-	t = GET_THREAD(thread_base, thread_num, core_num, pkg_num);
-	c = GET_CORE(core_base, core_num, pkg_num);
-	p = GET_PKG(pkg_base, pkg_num);
+	t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
+	c = GET_CORE(core_base, core_id, node_id, pkg_id);
+	p = GET_PKG(pkg_base, pkg_id);
 
 	t->cpu_id = cpu_id;
-	if (thread_num == 0) {
+	if (thread_id == 0) {
 		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
 		if (cpu_is_first_core_in_package(cpu_id))
 			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
 	}
 
-	c->core_id = core_num;
-	p->package_id = pkg_num;
+	c->core_id = core_id;
+	p->package_id = pkg_id;
 }
 
 
 int initialize_counters(int cpu_id)
 {
-	int my_thread_id, my_core_id, my_package_id;
-
-	my_package_id = get_physical_package_id(cpu_id);
-	my_core_id = get_core_id(cpu_id);
-	my_thread_id = get_cpu_position_in_core(cpu_id);
-	if (!my_thread_id)
-		topo.num_cores++;
-
-	init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
-	init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
+	init_counter(EVEN_COUNTERS, cpu_id);
+	init_counter(ODD_COUNTERS, cpu_id);
 	return 0;
 }
 
@@ -4630,7 +5009,7 @@
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 17.06.23"
+	fprintf(outf, "turbostat version 18.06.01"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 
@@ -4661,7 +5040,7 @@
 		msrp->next = sys.tp;
 		sys.tp = msrp;
 		sys.added_thread_counters++;
-		if (sys.added_thread_counters > MAX_ADDED_COUNTERS) {
+		if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
 			fprintf(stderr, "exceeded max %d added thread counters\n",
 				MAX_ADDED_COUNTERS);
 			exit(-1);
@@ -4820,7 +5199,7 @@
 	if (!DO_BIC(BIC_sysfs))
 		return;
 
-	for (state = 10; state > 0; --state) {
+	for (state = 10; state >= 0; --state) {
 
 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
 			base_cpu, state);
@@ -4847,7 +5226,7 @@
 				FORMAT_PERCENT, SYSFS_PERCPU);
 	}
 
-	for (state = 10; state > 0; --state) {
+	for (state = 10; state >= 0; --state) {
 
 		sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
 			base_cpu, state);
@@ -4960,34 +5339,6 @@
 	exit(-1);
 }
 
-int shown;
-/*
- * parse_show_hide() - process cmdline to set default counter action
- */
-void parse_show_hide(char *optarg, enum show_hide_mode new_mode)
-{
-	/*
-	 * --show: show only those specified
-	 *  The 1st invocation will clear and replace the enabled mask
-	 *  subsequent invocations can add to it.
-	 */
-	if (new_mode == SHOW_LIST) {
-		if (shown == 0)
-			bic_enabled = bic_lookup(optarg, new_mode);
-		else
-			bic_enabled |= bic_lookup(optarg, new_mode);
-		shown = 1;
-
-		return;
-	}
-
-	/*
-	 * --hide: do not show those specified
-	 *  multiple invocations simply clear more bits in enabled mask
-	 */
-	bic_enabled &= ~bic_lookup(optarg, new_mode);
-
-}
 
 void cmdline(int argc, char **argv)
 {
@@ -4998,7 +5349,9 @@
 		{"cpu",		required_argument,	0, 'c'},
 		{"Dump",	no_argument,		0, 'D'},
 		{"debug",	no_argument,		0, 'd'},	/* internal, not documented */
+		{"enable",	required_argument,	0, 'e'},
 		{"interval",	required_argument,	0, 'i'},
+		{"num_iterations",	required_argument,	0, 'n'},
 		{"help",	no_argument,		0, 'h'},
 		{"hide",	required_argument,	0, 'H'},	// meh, -h taken by --help
 		{"Joules",	no_argument,		0, 'J'},
@@ -5014,7 +5367,7 @@
 
 	progname = argv[0];
 
-	while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:qST:v",
+	while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
 				long_options, &option_index)) != -1) {
 		switch (opt) {
 		case 'a':
@@ -5026,11 +5379,20 @@
 		case 'D':
 			dump_only++;
 			break;
+		case 'e':
+			/* --enable specified counter */
+			bic_enabled |= bic_lookup(optarg, SHOW_LIST);
+			break;
 		case 'd':
 			debug++;
+			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
 			break;
 		case 'H':
-			parse_show_hide(optarg, HIDE_LIST);
+			/*
+			 * --hide: do not show those specified
+			 *  multiple invocations simply clear more bits in enabled mask
+			 */
+			bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
 			break;
 		case 'h':
 		default:
@@ -5046,7 +5408,8 @@
 					exit(2);
 				}
 
-				interval_ts.tv_sec = interval;
+				interval_tv.tv_sec = interval_ts.tv_sec = interval;
+				interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
 				interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
 			}
 			break;
@@ -5054,6 +5417,7 @@
 			rapl_joules++;
 			break;
 		case 'l':
+			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
 			list_header_only++;
 			quiet++;
 			break;
@@ -5063,8 +5427,26 @@
 		case 'q':
 			quiet = 1;
 			break;
+		case 'n':
+			num_iterations = strtod(optarg, NULL);
+
+			if (num_iterations <= 0) {
+				fprintf(outf, "iterations %d should be positive number\n",
+					num_iterations);
+				exit(2);
+			}
+			break;
 		case 's':
-			parse_show_hide(optarg, SHOW_LIST);
+			/*
+			 * --show: show only those specified
+			 *  The 1st invocation will clear and replace the enabled mask
+			 *  subsequent invocations can add to it.
+			 */
+			if (shown == 0)
+				bic_enabled = bic_lookup(optarg, SHOW_LIST);
+			else
+				bic_enabled |= bic_lookup(optarg, SHOW_LIST);
+			shown = 1;
 			break;
 		case 'S':
 			summary_only++;
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
index 2447b1b..f4534fb 100644
--- a/tools/power/x86/x86_energy_perf_policy/Makefile
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -24,5 +24,5 @@
 	install -d  $(DESTDIR)$(PREFIX)/bin
 	install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy
 	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
-	install x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
+	install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
 
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 32aafa9..305130d 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -3,6 +3,7 @@
 TARGETS += bpf
 TARGETS += breakpoints
 TARGETS += capabilities
+TARGETS += cgroup
 TARGETS += cpufreq
 TARGETS += cpu-hotplug
 TARGETS += efivarfs
@@ -28,6 +29,7 @@
 TARGETS += proc
 TARGETS += pstore
 TARGETS += ptrace
+TARGETS += rtc
 TARGETS += seccomp
 TARGETS += sigaltstack
 TARGETS += size
@@ -134,7 +136,8 @@
 	echo "else" >> $(ALL_SCRIPT)
 	echo "  OUTPUT=/dev/stdout" >> $(ALL_SCRIPT)
 	echo "fi" >> $(ALL_SCRIPT)
-	echo "export KSFT_TAP_LEVEL=`echo 1`" >> $(ALL_SCRIPT)
+	echo "export KSFT_TAP_LEVEL=1" >> $(ALL_SCRIPT)
+	echo "export skip=4" >> $(ALL_SCRIPT)
 
 	for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
index f6304d2..72c25a3 100644
--- a/tools/testing/selftests/android/Makefile
+++ b/tools/testing/selftests/android/Makefile
@@ -18,10 +18,6 @@
 		fi \
 	done
 
-override define RUN_TESTS
-	@cd $(OUTPUT); ./run.sh
-endef
-
 override define INSTALL_RULE
 	mkdir -p $(INSTALL_PATH)
 	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
@@ -33,10 +29,6 @@
 	done;
 endef
 
-override define EMIT_TESTS
-	echo "./run.sh"
-endef
-
 override define CLEAN
 	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh
index a1aff50..69e676c 100755
--- a/tools/testing/selftests/android/ion/ion_test.sh
+++ b/tools/testing/selftests/android/ion/ion_test.sh
@@ -4,6 +4,9 @@
 TCID="ion_test.sh"
 errcode=0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 run_test()
 {
 	heaptype=$1
@@ -25,7 +28,7 @@
 	uid=$(id -u)
 	if [ $uid -ne 0 ]; then
 		echo $TCID: must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
@@ -35,7 +38,7 @@
 	if [ ! -e $DEVICE ]; then
 		echo $TCID: No $DEVICE device found >&2
 		echo $TCID: May be CONFIG_ION is not set >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index 3fece06..f82dcc1 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -143,10 +143,14 @@
 	int err;
 	struct itimerspec spec = {};
 
+	if (getuid() != 0)
+		ksft_exit_skip("Please run the test as root - Exiting.\n");
+
 	power_state_fd = open("/sys/power/state", O_RDWR);
 	if (power_state_fd < 0)
 		ksft_exit_fail_msg(
-			"open(\"/sys/power/state\") failed (is this test running as root?)\n");
+			"open(\"/sys/power/state\") failed %s)\n",
+			strerror(errno));
 
 	timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
 	if (timerfd < 0)
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
new file mode 100644
index 0000000..f7a3139
--- /dev/null
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wall
+
+all:
+
+TEST_GEN_PROGS = test_memcontrol
+
+include ../lib.mk
+
+$(OUTPUT)/test_memcontrol: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
new file mode 100644
index 0000000..b69bdeb
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "cgroup_util.h"
+
+static ssize_t read_text(const char *path, char *buf, size_t max_len)
+{
+	ssize_t len;
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	len = read(fd, buf, max_len - 1);
+	if (len < 0)
+		goto out;
+
+	buf[len] = 0;
+out:
+	close(fd);
+	return len;
+}
+
+static ssize_t write_text(const char *path, char *buf, size_t len)
+{
+	int fd;
+
+	fd = open(path, O_WRONLY | O_APPEND);
+	if (fd < 0)
+		return fd;
+
+	len = write(fd, buf, len);
+	if (len < 0) {
+		close(fd);
+		return len;
+	}
+
+	close(fd);
+
+	return len;
+}
+
+char *cg_name(const char *root, const char *name)
+{
+	size_t len = strlen(root) + strlen(name) + 2;
+	char *ret = malloc(len);
+
+	snprintf(ret, len, "%s/%s", root, name);
+
+	return ret;
+}
+
+char *cg_name_indexed(const char *root, const char *name, int index)
+{
+	size_t len = strlen(root) + strlen(name) + 10;
+	char *ret = malloc(len);
+
+	snprintf(ret, len, "%s/%s_%d", root, name, index);
+
+	return ret;
+}
+
+int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
+{
+	char path[PATH_MAX];
+
+	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+
+	if (read_text(path, buf, len) >= 0)
+		return 0;
+
+	return -1;
+}
+
+int cg_read_strcmp(const char *cgroup, const char *control,
+		   const char *expected)
+{
+	size_t size = strlen(expected) + 1;
+	char *buf;
+
+	buf = malloc(size);
+	if (!buf)
+		return -1;
+
+	if (cg_read(cgroup, control, buf, size))
+		return -1;
+
+	return strcmp(expected, buf);
+}
+
+int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
+{
+	char buf[PAGE_SIZE];
+
+	if (cg_read(cgroup, control, buf, sizeof(buf)))
+		return -1;
+
+	return strstr(buf, needle) ? 0 : -1;
+}
+
+long cg_read_long(const char *cgroup, const char *control)
+{
+	char buf[128];
+
+	if (cg_read(cgroup, control, buf, sizeof(buf)))
+		return -1;
+
+	return atol(buf);
+}
+
+long cg_read_key_long(const char *cgroup, const char *control, const char *key)
+{
+	char buf[PAGE_SIZE];
+	char *ptr;
+
+	if (cg_read(cgroup, control, buf, sizeof(buf)))
+		return -1;
+
+	ptr = strstr(buf, key);
+	if (!ptr)
+		return -1;
+
+	return atol(ptr + strlen(key));
+}
+
+int cg_write(const char *cgroup, const char *control, char *buf)
+{
+	char path[PATH_MAX];
+	size_t len = strlen(buf);
+
+	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+
+	if (write_text(path, buf, len) == len)
+		return 0;
+
+	return -1;
+}
+
+int cg_find_unified_root(char *root, size_t len)
+{
+	char buf[10 * PAGE_SIZE];
+	char *fs, *mount, *type;
+	const char delim[] = "\n\t ";
+
+	if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
+		return -1;
+
+	/*
+	 * Example:
+	 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
+	 */
+	for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
+		mount = strtok(NULL, delim);
+		type = strtok(NULL, delim);
+		strtok(NULL, delim);
+		strtok(NULL, delim);
+		strtok(NULL, delim);
+
+		if (strcmp(fs, "cgroup") == 0 &&
+		    strcmp(type, "cgroup2") == 0) {
+			strncpy(root, mount, len);
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+int cg_create(const char *cgroup)
+{
+	return mkdir(cgroup, 0644);
+}
+
+static int cg_killall(const char *cgroup)
+{
+	char buf[PAGE_SIZE];
+	char *ptr = buf;
+
+	if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
+		return -1;
+
+	while (ptr < buf + sizeof(buf)) {
+		int pid = strtol(ptr, &ptr, 10);
+
+		if (pid == 0)
+			break;
+		if (*ptr)
+			ptr++;
+		else
+			break;
+		if (kill(pid, SIGKILL))
+			return -1;
+	}
+
+	return 0;
+}
+
+int cg_destroy(const char *cgroup)
+{
+	int ret;
+
+retry:
+	ret = rmdir(cgroup);
+	if (ret && errno == EBUSY) {
+		ret = cg_killall(cgroup);
+		if (ret)
+			return ret;
+		usleep(100);
+		goto retry;
+	}
+
+	if (ret && errno == ENOENT)
+		ret = 0;
+
+	return ret;
+}
+
+int cg_run(const char *cgroup,
+	   int (*fn)(const char *cgroup, void *arg),
+	   void *arg)
+{
+	int pid, retcode;
+
+	pid = fork();
+	if (pid < 0) {
+		return pid;
+	} else if (pid == 0) {
+		char buf[64];
+
+		snprintf(buf, sizeof(buf), "%d", getpid());
+		if (cg_write(cgroup, "cgroup.procs", buf))
+			exit(EXIT_FAILURE);
+		exit(fn(cgroup, arg));
+	} else {
+		waitpid(pid, &retcode, 0);
+		if (WIFEXITED(retcode))
+			return WEXITSTATUS(retcode);
+		else
+			return -1;
+	}
+}
+
+int cg_run_nowait(const char *cgroup,
+		  int (*fn)(const char *cgroup, void *arg),
+		  void *arg)
+{
+	int pid;
+
+	pid = fork();
+	if (pid == 0) {
+		char buf[64];
+
+		snprintf(buf, sizeof(buf), "%d", getpid());
+		if (cg_write(cgroup, "cgroup.procs", buf))
+			exit(EXIT_FAILURE);
+		exit(fn(cgroup, arg));
+	}
+
+	return pid;
+}
+
+int get_temp_fd(void)
+{
+	return open(".", O_TMPFILE | O_RDWR | O_EXCL);
+}
+
+int alloc_pagecache(int fd, size_t size)
+{
+	char buf[PAGE_SIZE];
+	struct stat st;
+	int i;
+
+	if (fstat(fd, &st))
+		goto cleanup;
+
+	size += st.st_size;
+
+	if (ftruncate(fd, size))
+		goto cleanup;
+
+	for (i = 0; i < size; i += sizeof(buf))
+		read(fd, buf, sizeof(buf));
+
+	return 0;
+
+cleanup:
+	return -1;
+}
+
+int alloc_anon(const char *cgroup, void *arg)
+{
+	size_t size = (unsigned long)arg;
+	char *buf, *ptr;
+
+	buf = malloc(size);
+	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+		*ptr = 0;
+
+	free(buf);
+	return 0;
+}
+
+int is_swap_enabled(void)
+{
+	char buf[PAGE_SIZE];
+	const char delim[] = "\n";
+	int cnt = 0;
+	char *line;
+
+	if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
+		return -1;
+
+	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
+		cnt++;
+
+	return cnt > 1;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
new file mode 100644
index 0000000..fe82a29
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+
+#define PAGE_SIZE 4096
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define MB(x) (x << 20)
+
+/*
+ * Checks if two given values differ by less than err% of their sum.
+ */
+static inline int values_close(long a, long b, int err)
+{
+	return abs(a - b) <= (a + b) / 100 * err;
+}
+
+extern int cg_find_unified_root(char *root, size_t len);
+extern char *cg_name(const char *root, const char *name);
+extern char *cg_name_indexed(const char *root, const char *name, int index);
+extern int cg_create(const char *cgroup);
+extern int cg_destroy(const char *cgroup);
+extern int cg_read(const char *cgroup, const char *control,
+		   char *buf, size_t len);
+extern int cg_read_strcmp(const char *cgroup, const char *control,
+			  const char *expected);
+extern int cg_read_strstr(const char *cgroup, const char *control,
+			  const char *needle);
+extern long cg_read_long(const char *cgroup, const char *control);
+long cg_read_key_long(const char *cgroup, const char *control, const char *key);
+extern int cg_write(const char *cgroup, const char *control, char *buf);
+extern int cg_run(const char *cgroup,
+		  int (*fn)(const char *cgroup, void *arg),
+		  void *arg);
+extern int cg_run_nowait(const char *cgroup,
+			 int (*fn)(const char *cgroup, void *arg),
+			 void *arg);
+extern int get_temp_fd(void);
+extern int alloc_pagecache(int fd, size_t size);
+extern int alloc_anon(const char *cgroup, void *arg);
+extern int is_swap_enabled(void);
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
new file mode 100644
index 0000000..cf0bddc
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -0,0 +1,1015 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+/*
+ * This test creates two nested cgroups with and without enabling
+ * the memory controller.
+ */
+static int test_memcg_subtree_control(const char *root)
+{
+	char *parent, *child, *parent2, *child2;
+	int ret = KSFT_FAIL;
+	char buf[PAGE_SIZE];
+
+	/* Create two nested cgroups with the memory controller enabled */
+	parent = cg_name(root, "memcg_test_0");
+	child = cg_name(root, "memcg_test_0/memcg_test_1");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_read_strstr(child, "cgroup.controllers", "memory"))
+		goto cleanup;
+
+	/* Create two nested cgroups without enabling memory controller */
+	parent2 = cg_name(root, "memcg_test_1");
+	child2 = cg_name(root, "memcg_test_1/memcg_test_1");
+	if (!parent2 || !child2)
+		goto cleanup;
+
+	if (cg_create(parent2))
+		goto cleanup;
+
+	if (cg_create(child2))
+		goto cleanup;
+
+	if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
+		goto cleanup;
+
+	if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(child);
+	cg_destroy(parent);
+	free(parent);
+	free(child);
+
+	cg_destroy(child2);
+	cg_destroy(parent2);
+	free(parent2);
+	free(child2);
+
+	return ret;
+}
+
+static int alloc_anon_50M_check(const char *cgroup, void *arg)
+{
+	size_t size = MB(50);
+	char *buf, *ptr;
+	long anon, current;
+	int ret = -1;
+
+	buf = malloc(size);
+	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+		*ptr = 0;
+
+	current = cg_read_long(cgroup, "memory.current");
+	if (current < size)
+		goto cleanup;
+
+	if (!values_close(size, current, 3))
+		goto cleanup;
+
+	anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
+	if (anon < 0)
+		goto cleanup;
+
+	if (!values_close(anon, current, 3))
+		goto cleanup;
+
+	ret = 0;
+cleanup:
+	free(buf);
+	return ret;
+}
+
+static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
+{
+	size_t size = MB(50);
+	int ret = -1;
+	long current, file;
+	int fd;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		return -1;
+
+	if (alloc_pagecache(fd, size))
+		goto cleanup;
+
+	current = cg_read_long(cgroup, "memory.current");
+	if (current < size)
+		goto cleanup;
+
+	file = cg_read_key_long(cgroup, "memory.stat", "file ");
+	if (file < 0)
+		goto cleanup;
+
+	if (!values_close(file, current, 10))
+		goto cleanup;
+
+	ret = 0;
+
+cleanup:
+	close(fd);
+	return ret;
+}
+
+/*
+ * This test create a memory cgroup, allocates
+ * some anonymous memory and some pagecache
+ * and check memory.current and some memory.stat values.
+ */
+static int test_memcg_current(const char *root)
+{
+	int ret = KSFT_FAIL;
+	long current;
+	char *memcg;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	current = cg_read_long(memcg, "memory.current");
+	if (current != 0)
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_anon_50M_check, NULL))
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+static int alloc_pagecache_50M(const char *cgroup, void *arg)
+{
+	int fd = (long)arg;
+
+	return alloc_pagecache(fd, MB(50));
+}
+
+static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
+{
+	int fd = (long)arg;
+	int ppid = getppid();
+
+	if (alloc_pagecache(fd, MB(50)))
+		return -1;
+
+	while (getppid() == ppid)
+		sleep(1);
+
+	return 0;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A       memory.min = 50M,  memory.max = 200M
+ * A/B     memory.min = 50M,  memory.current = 50M
+ * A/B/C   memory.min = 75M,  memory.current = 50M
+ * A/B/D   memory.min = 25M,  memory.current = 50M
+ * A/B/E   memory.min = 500M, memory.current = 0
+ * A/B/F   memory.min = 0,    memory.current = 50M
+ *
+ * Usages are pagecache, but the test keeps a running
+ * process in every leaf cgroup.
+ * Then it creates A/G and creates a significant
+ * memory pressure in it.
+ *
+ * A/B    memory.current ~= 50M
+ * A/B/C  memory.current ~= 33M
+ * A/B/D  memory.current ~= 17M
+ * A/B/E  memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is
+ * unprotected memory in A available, and checks
+ * checks that memory.min protects pagecache even
+ * in this case.
+ */
+static int test_memcg_min(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent[3] = {NULL};
+	char *children[4] = {NULL};
+	long c[4];
+	int i, attempts;
+	int fd;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		goto cleanup;
+
+	parent[0] = cg_name(root, "memcg_test_0");
+	if (!parent[0])
+		goto cleanup;
+
+	parent[1] = cg_name(parent[0], "memcg_test_1");
+	if (!parent[1])
+		goto cleanup;
+
+	parent[2] = cg_name(parent[0], "memcg_test_2");
+	if (!parent[2])
+		goto cleanup;
+
+	if (cg_create(parent[0]))
+		goto cleanup;
+
+	if (cg_read_long(parent[0], "memory.min")) {
+		ret = KSFT_SKIP;
+		goto cleanup;
+	}
+
+	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "memory.max", "200M"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_create(parent[1]))
+		goto cleanup;
+
+	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_create(parent[2]))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
+		if (!children[i])
+			goto cleanup;
+
+		if (cg_create(children[i]))
+			goto cleanup;
+
+		if (i == 2)
+			continue;
+
+		cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
+			      (void *)(long)fd);
+	}
+
+	if (cg_write(parent[0], "memory.min", "50M"))
+		goto cleanup;
+	if (cg_write(parent[1], "memory.min", "50M"))
+		goto cleanup;
+	if (cg_write(children[0], "memory.min", "75M"))
+		goto cleanup;
+	if (cg_write(children[1], "memory.min", "25M"))
+		goto cleanup;
+	if (cg_write(children[2], "memory.min", "500M"))
+		goto cleanup;
+	if (cg_write(children[3], "memory.min", "0"))
+		goto cleanup;
+
+	attempts = 0;
+	while (!values_close(cg_read_long(parent[1], "memory.current"),
+			     MB(150), 3)) {
+		if (attempts++ > 5)
+			break;
+		sleep(1);
+	}
+
+	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
+		goto cleanup;
+
+	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++)
+		c[i] = cg_read_long(children[i], "memory.current");
+
+	if (!values_close(c[0], MB(33), 10))
+		goto cleanup;
+
+	if (!values_close(c[1], MB(17), 10))
+		goto cleanup;
+
+	if (!values_close(c[2], 0, 1))
+		goto cleanup;
+
+	if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
+		goto cleanup;
+
+	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
+		if (!children[i])
+			continue;
+
+		cg_destroy(children[i]);
+		free(children[i]);
+	}
+
+	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
+		if (!parent[i])
+			continue;
+
+		cg_destroy(parent[i]);
+		free(parent[i]);
+	}
+	close(fd);
+	return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A       memory.low = 50M,  memory.max = 200M
+ * A/B     memory.low = 50M,  memory.current = 50M
+ * A/B/C   memory.low = 75M,  memory.current = 50M
+ * A/B/D   memory.low = 25M,  memory.current = 50M
+ * A/B/E   memory.low = 500M, memory.current = 0
+ * A/B/F   memory.low = 0,    memory.current = 50M
+ *
+ * Usages are pagecache.
+ * Then it creates A/G an creates a significant
+ * memory pressure in it.
+ *
+ * Then it checks actual memory usages and expects that:
+ * A/B    memory.current ~= 50M
+ * A/B/   memory.current ~= 33M
+ * A/B/D  memory.current ~= 17M
+ * A/B/E  memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is
+ * unprotected memory in A available,
+ * and checks low and oom events in memory.events.
+ */
+static int test_memcg_low(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent[3] = {NULL};
+	char *children[4] = {NULL};
+	long low, oom;
+	long c[4];
+	int i;
+	int fd;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		goto cleanup;
+
+	parent[0] = cg_name(root, "memcg_test_0");
+	if (!parent[0])
+		goto cleanup;
+
+	parent[1] = cg_name(parent[0], "memcg_test_1");
+	if (!parent[1])
+		goto cleanup;
+
+	parent[2] = cg_name(parent[0], "memcg_test_2");
+	if (!parent[2])
+		goto cleanup;
+
+	if (cg_create(parent[0]))
+		goto cleanup;
+
+	if (cg_read_long(parent[0], "memory.low"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "memory.max", "200M"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_create(parent[1]))
+		goto cleanup;
+
+	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_create(parent[2]))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
+		if (!children[i])
+			goto cleanup;
+
+		if (cg_create(children[i]))
+			goto cleanup;
+
+		if (i == 2)
+			continue;
+
+		if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
+			goto cleanup;
+	}
+
+	if (cg_write(parent[0], "memory.low", "50M"))
+		goto cleanup;
+	if (cg_write(parent[1], "memory.low", "50M"))
+		goto cleanup;
+	if (cg_write(children[0], "memory.low", "75M"))
+		goto cleanup;
+	if (cg_write(children[1], "memory.low", "25M"))
+		goto cleanup;
+	if (cg_write(children[2], "memory.low", "500M"))
+		goto cleanup;
+	if (cg_write(children[3], "memory.low", "0"))
+		goto cleanup;
+
+	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
+		goto cleanup;
+
+	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++)
+		c[i] = cg_read_long(children[i], "memory.current");
+
+	if (!values_close(c[0], MB(33), 10))
+		goto cleanup;
+
+	if (!values_close(c[1], MB(17), 10))
+		goto cleanup;
+
+	if (!values_close(c[2], 0, 1))
+		goto cleanup;
+
+	if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
+		fprintf(stderr,
+			"memory.low prevents from allocating anon memory\n");
+		goto cleanup;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		oom = cg_read_key_long(children[i], "memory.events", "oom ");
+		low = cg_read_key_long(children[i], "memory.events", "low ");
+
+		if (oom)
+			goto cleanup;
+		if (i < 2 && low <= 0)
+			goto cleanup;
+		if (i >= 2 && low)
+			goto cleanup;
+	}
+
+	ret = KSFT_PASS;
+
+cleanup:
+	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
+		if (!children[i])
+			continue;
+
+		cg_destroy(children[i]);
+		free(children[i]);
+	}
+
+	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
+		if (!parent[i])
+			continue;
+
+		cg_destroy(parent[i]);
+		free(parent[i]);
+	}
+	close(fd);
+	return ret;
+}
+
+static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
+{
+	size_t size = MB(50);
+	int ret = -1;
+	long current;
+	int fd;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		return -1;
+
+	if (alloc_pagecache(fd, size))
+		goto cleanup;
+
+	current = cg_read_long(cgroup, "memory.current");
+	if (current <= MB(29) || current > MB(30))
+		goto cleanup;
+
+	ret = 0;
+
+cleanup:
+	close(fd);
+	return ret;
+
+}
+
+/*
+ * This test checks that memory.high limits the amount of
+ * memory which can be consumed by either anonymous memory
+ * or pagecache.
+ */
+static int test_memcg_high(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+	long high;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_read_strcmp(memcg, "memory.high", "max\n"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.high", "30M"))
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
+		goto cleanup;
+
+	high = cg_read_key_long(memcg, "memory.events", "high ");
+	if (high <= 0)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+/*
+ * This test checks that memory.max limits the amount of
+ * memory which can be consumed by either anonymous memory
+ * or pagecache.
+ */
+static int test_memcg_max(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+	long current, max;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.max", "30M"))
+		goto cleanup;
+
+	/* Should be killed by OOM killer */
+	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
+		goto cleanup;
+
+	current = cg_read_long(memcg, "memory.current");
+	if (current > MB(30) || !current)
+		goto cleanup;
+
+	max = cg_read_key_long(memcg, "memory.events", "max ");
+	if (max <= 0)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
+{
+	long mem_max = (long)arg;
+	size_t size = MB(50);
+	char *buf, *ptr;
+	long mem_current, swap_current;
+	int ret = -1;
+
+	buf = malloc(size);
+	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+		*ptr = 0;
+
+	mem_current = cg_read_long(cgroup, "memory.current");
+	if (!mem_current || !values_close(mem_current, mem_max, 3))
+		goto cleanup;
+
+	swap_current = cg_read_long(cgroup, "memory.swap.current");
+	if (!swap_current ||
+	    !values_close(mem_current + swap_current, size, 3))
+		goto cleanup;
+
+	ret = 0;
+cleanup:
+	free(buf);
+	return ret;
+}
+
+/*
+ * This test checks that memory.swap.max limits the amount of
+ * anonymous memory which can be swapped out.
+ */
+static int test_memcg_swap_max(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+	long max;
+
+	if (!is_swap_enabled())
+		return KSFT_SKIP;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_read_long(memcg, "memory.swap.current")) {
+		ret = KSFT_SKIP;
+		goto cleanup;
+	}
+
+	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "30M"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.max", "30M"))
+		goto cleanup;
+
+	/* Should be killed by OOM killer */
+	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
+		goto cleanup;
+
+	max = cg_read_key_long(memcg, "memory.events", "max ");
+	if (max <= 0)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM. Then it checks for oom and oom_kill events in
+ * memory.events.
+ */
+static int test_memcg_oom_events(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.max", "30M"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_read_strcmp(memcg, "cgroup.procs", ""))
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+struct tcp_server_args {
+	unsigned short port;
+	int ctl[2];
+};
+
+static int tcp_server(const char *cgroup, void *arg)
+{
+	struct tcp_server_args *srv_args = arg;
+	struct sockaddr_in6 saddr = { 0 };
+	socklen_t slen = sizeof(saddr);
+	int sk, client_sk, ctl_fd, yes = 1, ret = -1;
+
+	close(srv_args->ctl[0]);
+	ctl_fd = srv_args->ctl[1];
+
+	saddr.sin6_family = AF_INET6;
+	saddr.sin6_addr = in6addr_any;
+	saddr.sin6_port = htons(srv_args->port);
+
+	sk = socket(AF_INET6, SOCK_STREAM, 0);
+	if (sk < 0)
+		return ret;
+
+	if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
+		goto cleanup;
+
+	if (bind(sk, (struct sockaddr *)&saddr, slen)) {
+		write(ctl_fd, &errno, sizeof(errno));
+		goto cleanup;
+	}
+
+	if (listen(sk, 1))
+		goto cleanup;
+
+	ret = 0;
+	if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
+		ret = -1;
+		goto cleanup;
+	}
+
+	client_sk = accept(sk, NULL, NULL);
+	if (client_sk < 0)
+		goto cleanup;
+
+	ret = -1;
+	for (;;) {
+		uint8_t buf[0x100000];
+
+		if (write(client_sk, buf, sizeof(buf)) <= 0) {
+			if (errno == ECONNRESET)
+				ret = 0;
+			break;
+		}
+	}
+
+	close(client_sk);
+
+cleanup:
+	close(sk);
+	return ret;
+}
+
+static int tcp_client(const char *cgroup, unsigned short port)
+{
+	const char server[] = "localhost";
+	struct addrinfo *ai;
+	char servport[6];
+	int retries = 0x10; /* nice round number */
+	int sk, ret;
+
+	snprintf(servport, sizeof(servport), "%hd", port);
+	ret = getaddrinfo(server, servport, NULL, &ai);
+	if (ret)
+		return ret;
+
+	sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+	if (sk < 0)
+		goto free_ainfo;
+
+	ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
+	if (ret < 0)
+		goto close_sk;
+
+	ret = KSFT_FAIL;
+	while (retries--) {
+		uint8_t buf[0x100000];
+		long current, sock;
+
+		if (read(sk, buf, sizeof(buf)) <= 0)
+			goto close_sk;
+
+		current = cg_read_long(cgroup, "memory.current");
+		sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
+
+		if (current < 0 || sock < 0)
+			goto close_sk;
+
+		if (current < sock)
+			goto close_sk;
+
+		if (values_close(current, sock, 10)) {
+			ret = KSFT_PASS;
+			break;
+		}
+	}
+
+close_sk:
+	close(sk);
+free_ainfo:
+	freeaddrinfo(ai);
+	return ret;
+}
+
+/*
+ * This test checks socket memory accounting.
+ * The test forks a TCP server listens on a random port between 1000
+ * and 61000. Once it gets a client connection, it starts writing to
+ * its socket.
+ * The TCP client interleaves reads from the socket with check whether
+ * memory.current and memory.stat.sock are similar.
+ */
+static int test_memcg_sock(const char *root)
+{
+	int bind_retries = 5, ret = KSFT_FAIL, pid, err;
+	unsigned short port;
+	char *memcg;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	while (bind_retries--) {
+		struct tcp_server_args args;
+
+		if (pipe(args.ctl))
+			goto cleanup;
+
+		port = args.port = 1000 + rand() % 60000;
+
+		pid = cg_run_nowait(memcg, tcp_server, &args);
+		if (pid < 0)
+			goto cleanup;
+
+		close(args.ctl[1]);
+		if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
+			goto cleanup;
+		close(args.ctl[0]);
+
+		if (!err)
+			break;
+		if (err != EADDRINUSE)
+			goto cleanup;
+
+		waitpid(pid, NULL, 0);
+	}
+
+	if (err == EADDRINUSE) {
+		ret = KSFT_SKIP;
+		goto cleanup;
+	}
+
+	if (tcp_client(memcg, port) != KSFT_PASS)
+		goto cleanup;
+
+	waitpid(pid, &err, 0);
+	if (WEXITSTATUS(err))
+		goto cleanup;
+
+	if (cg_read_long(memcg, "memory.current") < 0)
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.stat", "sock "))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+#define T(x) { x, #x }
+struct memcg_test {
+	int (*fn)(const char *root);
+	const char *name;
+} tests[] = {
+	T(test_memcg_subtree_control),
+	T(test_memcg_current),
+	T(test_memcg_min),
+	T(test_memcg_low),
+	T(test_memcg_high),
+	T(test_memcg_max),
+	T(test_memcg_oom_events),
+	T(test_memcg_swap_max),
+	T(test_memcg_sock),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+	char root[PATH_MAX];
+	int i, ret = EXIT_SUCCESS;
+
+	if (cg_find_unified_root(root, sizeof(root)))
+		ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+	/*
+	 * Check that memory controller is available:
+	 * memory is listed in cgroup.controllers
+	 */
+	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+		ksft_exit_skip("memory controller isn't available\n");
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		switch (tests[i].fn(root)) {
+		case KSFT_PASS:
+			ksft_test_result_pass("%s\n", tests[i].name);
+			break;
+		case KSFT_SKIP:
+			ksft_test_result_skip("%s\n", tests[i].name);
+			break;
+		default:
+			ret = EXIT_FAILURE;
+			ksft_test_result_fail("%s\n", tests[i].name);
+			break;
+		}
+	}
+
+	return ret;
+}
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
index f3a8933..bab13dd 100755
--- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -2,6 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 SYSFS=
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
 
 prerequisite()
 {
@@ -9,7 +11,7 @@
 
 	if [ $UID != 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	taskset -p 01 $$
@@ -18,12 +20,12 @@
 
 	if [ ! -d "$SYSFS" ]; then
 		echo $msg sysfs is not mounted >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then
 		echo $msg cpu hotplug is not supported >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	echo "CPU online/offline summary:"
@@ -32,7 +34,7 @@
 
 	if [[ "$online_cpus" = "$online_max" ]]; then
 		echo "$msg: since there is only one cpu: $online_cpus"
-		exit 0
+		exit $ksft_skip
 	fi
 
 	echo -e "\t Cpus in online state: $online_cpus"
@@ -237,12 +239,12 @@
 
 	if [ ! -d "$DEBUGFS" ]; then
 		echo $msg debugfs is not mounted >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
 		echo $msg cpu-notifier-error-inject module is not available >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index d83922d..31f8c9a 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -13,6 +13,9 @@
 CPUROOT=
 CPUFREQROOT=
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 helpme()
 {
 	printf "Usage: $0 [-h] [-todg args]
@@ -38,7 +41,7 @@
 
 	if [ $UID != 0 ]; then
 		echo $msg must be run as root >&2
-		exit 2
+		exit $ksft_skip
 	fi
 
 	taskset -p 01 $$
diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
new file mode 100755
index 0000000..1893d0f
--- /dev/null
+++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+usage() { echo "usbip_test.sh -b <busid> -p <usbip tools path>"; exit 1; }
+
+while getopts "h:b:p:" arg; do
+    case "${arg}" in
+	h)
+	    usage
+	    ;;
+	b)
+	    busid=${OPTARG}
+	    ;;
+	p)
+	    tools_path=${OPTARG}
+	    ;;
+	*)
+	    usage
+	    ;;
+    esac
+done
+shift $((OPTIND-1))
+
+if [ -z "${busid}" ]; then
+	usage
+fi
+
+echo "Running USB over IP Testing on $busid";
+
+test_end_msg="End of USB over IP Testing on $busid"
+
+if [ $UID != 0 ]; then
+	echo "Please run usbip_test as root [SKIP]"
+	echo $test_end_msg
+	exit $ksft_skip
+fi
+
+echo "Load usbip_host module"
+if ! /sbin/modprobe -q -n usbip_host; then
+	echo "usbip_test: module usbip_host is not found [SKIP]"
+	echo $test_end_msg
+	exit $ksft_skip
+fi
+
+if /sbin/modprobe -q usbip_host; then
+	/sbin/modprobe -q -r test_bitmap
+	echo "usbip_test: module usbip_host is loaded [OK]"
+else
+	echo "usbip_test: module usbip_host failed to load [FAIL]"
+	echo $test_end_msg
+	exit 1
+fi
+
+echo "Load vhci_hcd module"
+if /sbin/modprobe -q vhci_hcd; then
+	/sbin/modprobe -q -r test_bitmap
+	echo "usbip_test: module vhci_hcd is loaded [OK]"
+else
+	echo "usbip_test: module vhci_hcd failed to load [FAIL]"
+	echo $test_end_msg
+	exit 1
+fi
+echo "=============================================================="
+
+cd $tools_path;
+
+if [ ! -f src/usbip ]; then
+	echo "Please build usbip tools"
+	echo $test_end_msg
+	exit $ksft_skip
+fi
+
+echo "Expect to see export-able devices";
+src/usbip list -l;
+echo "=============================================================="
+
+echo "Run lsusb to see all usb devices"
+lsusb -t;
+echo "=============================================================="
+
+src/usbipd -D;
+
+echo "Get exported devices from localhost - expect to see none";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "bind devices";
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - bound devices should be under usbip_host control"
+lsusb -t;
+echo "=============================================================="
+
+echo "bind devices - expect already bound messages"
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Get exported devices from localhost - expect to see exported devices";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "unbind devices";
+src/usbip unbind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - bound devices should be rebound to original drivers"
+lsusb -t;
+echo "=============================================================="
+
+echo "unbind devices - expect no devices bound message";
+src/usbip unbind -b $busid;
+echo "=============================================================="
+
+echo "Get exported devices from localhost - expect to see none";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - should fail with no devices"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "bind devices";
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "List imported devices - expect to see exported devices";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - should work"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "List imported devices - expect to see imported devices";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - expect already imported messages"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "Un-import devices";
+src/usbip detach -p 00;
+src/usbip detach -p 01;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Un-import devices - expect no devices to detach messages";
+src/usbip detach -p 00;
+src/usbip detach -p 01;
+echo "=============================================================="
+
+echo "Detach invalid port tests - expect invalid port error message";
+src/usbip detach -p 100;
+echo "=============================================================="
+
+echo "Expect to see export-able devices";
+src/usbip list -l;
+echo "=============================================================="
+
+echo "Remove usbip_host module";
+rmmod usbip_host;
+
+echo "Run lsusb - bound devices should be rebound to original drivers"
+lsusb -t;
+echo "=============================================================="
+
+echo "Run bind without usbip_host - expect fail"
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - devices that failed to bind aren't bound to any driver"
+lsusb -t;
+echo "=============================================================="
+
+echo "modprobe usbip_host - does it work?"
+/sbin/modprobe usbip_host
+echo "Should see -busid- is not in match_busid table... skip! dmesg"
+echo "=============================================================="
+dmesg | grep "is not in match_busid table"
+echo "=============================================================="
+
+echo $test_end_msg
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
index c6d5790..a47029a 100755
--- a/tools/testing/selftests/efivarfs/efivarfs.sh
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@@ -4,18 +4,21 @@
 efivarfs_mount=/sys/firmware/efi/efivars
 test_guid=210be57c-9849-4fc7-a635-e6382d1aec27
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 check_prereqs()
 {
 	local msg="skip all tests:"
 
 	if [ $UID != 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then
 		echo $msg efivarfs is not mounted on $efivarfs_mount >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 67cd459..47cbf54 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -20,6 +20,8 @@
 #include <string.h>
 #include <unistd.h>
 
+#include "../kselftest.h"
+
 static char longpath[2 * PATH_MAX] = "";
 static char *envp[] = { "IN_TEST=yes", NULL, NULL };
 static char *argv[] = { "execveat", "99", NULL };
@@ -249,8 +251,8 @@
 	errno = 0;
 	execveat_(-1, NULL, NULL, NULL, 0);
 	if (errno == ENOSYS) {
-		printf("[FAIL] ENOSYS calling execveat - no kernel support?\n");
-		return 1;
+		ksft_exit_skip(
+			"ENOSYS calling execveat - no kernel support?\n");
 	}
 
 	/* Change file position to confirm it doesn't affect anything */
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index 5c7d700..129880f 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
+CFLAGS += -I../../../../usr/include/
 TEST_GEN_PROGS := devpts_pts
 TEST_GEN_PROGS_EXTENDED := dnotify_test
 
diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c
index b9055e9..b1fc9b9 100644
--- a/tools/testing/selftests/filesystems/devpts_pts.c
+++ b/tools/testing/selftests/filesystems/devpts_pts.c
@@ -8,9 +8,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <sys/ioctl.h>
+#include <asm/ioctls.h>
 #include <sys/mount.h>
 #include <sys/wait.h>
+#include "../kselftest.h"
 
 static bool terminal_dup2(int duplicate, int original)
 {
@@ -125,10 +126,12 @@
 		if (errno == EINVAL) {
 			fprintf(stderr, "TIOCGPTPEER is not supported. "
 					"Skipping test.\n");
-			fret = EXIT_SUCCESS;
+			fret = KSFT_SKIP;
+		} else {
+			fprintf(stderr,
+				"Failed to perform TIOCGPTPEER ioctl\n");
+			fret = EXIT_FAILURE;
 		}
-
-		fprintf(stderr, "Failed to perform TIOCGPTPEER ioctl\n");
 		goto do_cleanup;
 	}
 
@@ -279,9 +282,9 @@
 	int ret;
 
 	if (!isatty(STDIN_FILENO)) {
-		fprintf(stderr, "Standard input file desciptor is not attached "
+		fprintf(stderr, "Standard input file descriptor is not attached "
 				"to a terminal. Skipping test\n");
-		exit(EXIT_FAILURE);
+		exit(KSFT_SKIP);
 	}
 
 	ret = unshare(CLONE_NEWNS);
diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
index 8e2e34a..70d18be 100755
--- a/tools/testing/selftests/firmware/fw_fallback.sh
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -74,7 +74,7 @@
 {
 	if [ ! -e "$DIR"/trigger_custom_fallback ]; then
 		echo "$0: custom fallback trigger not present, ignoring test" >&2
-		return 1
+		exit $ksft_skip
 	fi
 
 	local name="$1"
@@ -107,7 +107,7 @@
 {
 	if [ ! -e "$DIR"/trigger_custom_fallback ]; then
 		echo "$0: canceling custom fallback trigger not present, ignoring test" >&2
-		return 1
+		exit $ksft_skip
 	fi
 
 	local name="$1"
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index 6452d21..a4320c4 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -30,6 +30,7 @@
 
 if [ ! -e "$DIR"/trigger_async_request ]; then
 	echo "$0: empty filename: async trigger not present, ignoring test" >&2
+	exit $ksft_skip
 else
 	if printf '\000' >"$DIR"/trigger_async_request 2> /dev/null; then
 		echo "$0: empty filename should not succeed (async)" >&2
@@ -69,6 +70,7 @@
 # Try the asynchronous version too
 if [ ! -e "$DIR"/trigger_async_request ]; then
 	echo "$0: firmware loading: async trigger not present, ignoring test" >&2
+	exit $ksft_skip
 else
 	if ! echo -n "$NAME" >"$DIR"/trigger_async_request ; then
 		echo "$0: could not trigger async request" >&2
@@ -89,7 +91,7 @@
 {
 	if [ ! -f $DIR/reset ]; then
 		echo "Configuration triggers not present, ignoring test"
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index 962d7f4..6c5f1b2 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -9,11 +9,14 @@
 PROC_CONFIG="/proc/config.gz"
 TEST_DIR=$(dirname $0)
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 print_reqs_exit()
 {
 	echo "You must have the following enabled in your kernel:" >&2
 	cat $TEST_DIR/config >&2
-	exit 1
+	exit $ksft_skip
 }
 
 test_modprobe()
@@ -88,7 +91,7 @@
 	if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then
 		if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
 			echo "usermode helper disabled so ignoring test"
-			exit 0
+			exit $ksft_skip
 		fi
 	fi
 }
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 2a4f16f..e4645d5 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -15,12 +15,27 @@
     echo nop > current_tracer
 }
 
-reset_trigger() { # reset all current setting triggers
-    grep -v ^# events/*/*/trigger |
+reset_trigger_file() {
+    # remove action triggers first
+    grep -H ':on[^:]*(' $@ |
     while read line; do
-        cmd=`echo $line | cut -f2- -d: | cut -f1 -d" "`
-	echo "!$cmd" > `echo $line | cut -f1 -d:`
+        cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+	file=`echo $line | cut -f1 -d:`
+	echo "!$cmd" >> $file
     done
+    grep -Hv ^# $@ |
+    while read line; do
+        cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+	file=`echo $line | cut -f1 -d:`
+	echo "!$cmd" > $file
+    done
+}
+
+reset_trigger() { # reset all current setting triggers
+    if [ -d events/synthetic ]; then
+        reset_trigger_file events/synthetic/*/trigger
+    fi
+    reset_trigger_file events/*/*/trigger
 }
 
 reset_events_filter() { # reset all current setting filters
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
new file mode 100644
index 0000000..2acbfe2
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
@@ -0,0 +1,49 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram trigger
+# flags: instance
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+    echo "event trace_marker is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+    echo "hist trigger is not supported"
+    exit_unsupported
+fi
+
+do_reset
+
+echo "Test histogram trace_marker tigger"
+
+echo 'hist:keys=common_pid' > events/ftrace/print/trigger
+for i in `seq 1 10` ; do echo "hello" > trace_marker; done
+grep 'hitcount: *10$' events/ftrace/print/hist > /dev/null || \
+    fail "hist trigger did not trigger correct times on trace_marker"
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
new file mode 100644
index 0000000..6748e8c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test snapshot trigger
+# flags: instance
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    echo 0 > snapshot
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f snapshot ]; then
+    echo "snapshot is not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+    echo "event trace_marker is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+test_trace() {
+    file=$1
+    x=$2
+
+    cat $file | while read line; do
+	comment=`echo $line | sed -e 's/^#//'`
+	if [ "$line" != "$comment" ]; then
+	    continue
+	fi
+	echo "testing $line for >$x<"
+	match=`echo $line | sed -e "s/>$x<//"`
+	if [ "$line" == "$match" ]; then
+	    fail "$line does not have >$x< in it"
+	fi
+	let x=$x+2
+    done
+}
+
+do_reset
+
+echo "Test snapshot trace_marker tigger"
+
+echo 'snapshot' > events/ftrace/print/trigger
+
+# make sure the snapshot is allocated
+
+grep -q 'Snapshot is allocated' snapshot
+
+for i in `seq 1 10` ; do echo "hello >$i<" > trace_marker; done
+
+test_trace trace 1
+test_trace snapshot 2
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
new file mode 100644
index 0000000..0a69c5d
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
@@ -0,0 +1,68 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram with synthetic event against kernel event
+# flags:
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    echo > synthetic_events
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic events not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+    echo "event trace_marker is not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/sched/sched_waking ]; then
+    echo "event sched_waking is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+    echo "hist trigger is not supported"
+    exit_unsupported
+fi
+
+do_reset
+
+echo "Test histogram kernel event to trace_marker latency histogram trigger"
+
+echo 'latency u64 lat' > synthetic_events
+echo 'hist:keys=pid:ts0=common_timestamp.usecs' > events/sched/sched_waking/trigger
+echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).latency($lat)' > events/ftrace/print/trigger
+echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger
+sleep 1
+echo "hello" > trace_marker
+
+grep 'hitcount: *1$' events/ftrace/print/hist > /dev/null || \
+    fail "hist trigger did not trigger correct times on trace_marker"
+
+grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
+    fail "hist trigger did not trigger "
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
new file mode 100644
index 0000000..3666dd6
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
@@ -0,0 +1,66 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram with synthetic event
+# flags:
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    echo > synthetic_events
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic events not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+    echo "event trace_marker is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+    echo "hist trigger is not supported"
+    exit_unsupported
+fi
+
+do_reset
+
+echo "Test histogram trace_marker to trace_marker latency histogram trigger"
+
+echo 'latency u64 lat' > synthetic_events
+echo 'hist:keys=common_pid:ts0=common_timestamp.usecs if buf == "start"' > events/ftrace/print/trigger
+echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(ftrace.print).latency($lat) if buf == "end"' >> events/ftrace/print/trigger
+echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger
+echo -n "start" > trace_marker
+echo -n "end" > trace_marker
+
+cnt=`grep 'hitcount: *1$' events/ftrace/print/hist | wc -l`
+
+if [ $cnt -ne 2 ]; then
+    fail "hist trace_marker trigger did not trigger correctly"
+fi
+
+grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
+    fail "hist trigger did not trigger "
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 8497a37..12631f0 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -17,14 +17,6 @@
 		fi \
 	done
 
-override define RUN_TESTS
-	@export KSFT_TAP_LEVEL=`echo 1`;
-	@echo "TAP version 13";
-	@echo "selftests: futex";
-	@echo "========================================";
-	@cd $(OUTPUT); ./run.sh
-endef
-
 override define INSTALL_RULE
 	mkdir -p $(INSTALL_PATH)
 	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
@@ -36,10 +28,6 @@
 	done;
 endef
 
-override define EMIT_TESTS
-	echo "./run.sh"
-endef
-
 override define CLEAN
 	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh
index 183fb93..7f35b98 100755
--- a/tools/testing/selftests/gpio/gpio-mockup.sh
+++ b/tools/testing/selftests/gpio/gpio-mockup.sh
@@ -2,10 +2,11 @@
 # SPDX-License-Identifier: GPL-2.0
 
 #exit status
-#1: run as non-root user
+#1: Internal error
 #2: sysfs/debugfs not mount
 #3: insert module fail when gpio-mockup is a module.
-#4: other reason.
+#4: Skip test including run as non-root user.
+#5: other reason.
 
 SYSFS=
 GPIO_SYSFS=
@@ -15,6 +16,9 @@
 dev_type=
 module=
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 usage()
 {
 	echo "Usage:"
@@ -34,7 +38,7 @@
 	msg="skip all tests:"
 	if [ $UID != 0 ]; then
 		echo $msg must be run as root >&2
-		exit 1
+		exit $ksft_skip
 	fi
 	SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
 	if [ ! -d "$SYSFS" ]; then
@@ -73,7 +77,7 @@
 die()
 {
 	remove_module
-	exit 4
+	exit 5
 }
 
 test_chips()
diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
index d21edea..f6cd03a 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -9,6 +9,8 @@
 #include <sys/timeb.h>
 #include <sched.h>
 #include <errno.h>
+#include <string.h>
+#include "../kselftest.h"
 
 void usage(char *name) {
 	printf ("Usage: %s cpunum\n", name);
@@ -41,8 +43,8 @@
 	fd = open(msr_file_name, O_RDONLY);
 
 	if (fd == -1) {
-		perror("Failed to open");
-		return 1;
+		printf("/dev/cpu/%d/msr: %s\n", cpu, strerror(errno));
+		return KSFT_SKIP;
 	}
 
 	CPU_ZERO(&cpuset);
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
index c670359..e7008f6 100755
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -30,9 +30,18 @@
 
 EVALUATE_ONLY=0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
 	echo "$0 # Skipped: Test can only run on x86 architectures."
-	exit 0
+	exit $ksft_skip
+fi
+
+msg="skip all tests:"
+if [ $UID != 0 ] && [ $EVALUATE_ONLY == 0 ]; then
+    echo $msg please run this as root >&2
+    exit $ksft_skip
 fi
 
 max_cpus=$(($(nproc)-1))
@@ -48,11 +57,12 @@
 
 	echo "sleeping for 5 seconds"
 	sleep 5
-	num_freqs=$(cat /proc/cpuinfo | grep MHz | sort -u | wc -l)
-	if [ $num_freqs -le 2 ]; then
-		cat /proc/cpuinfo | grep MHz | sort -u | tail -1 > /tmp/result.$1
+	grep MHz /proc/cpuinfo | sort -u > /tmp/result.freqs
+	num_freqs=$(wc -l /tmp/result.freqs | awk ' { print $1 } ')
+	if [ $num_freqs -ge 2 ]; then
+		tail -n 1 /tmp/result.freqs > /tmp/result.$1
 	else
-		cat /proc/cpuinfo | grep MHz | sort -u > /tmp/result.$1
+		cp /tmp/result.freqs /tmp/result.$1
 	fi
 	./msr 0 >> /tmp/result.$1
 
@@ -82,32 +92,37 @@
 max_freq=$(($_max_freq / 1000))
 
 
-for freq in `seq $max_freq -100 $min_freq`
+[ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq`
 do
 	echo "Setting maximum frequency to $freq"
 	cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null
-	[ $EVALUATE_ONLY -eq 0 ] && run_test $freq
+	run_test $freq
 done
 
-echo "=============================================================================="
+[ $EVALUATE_ONLY -eq 0 ] && cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
 
+echo "========================================================================"
 echo "The marketing frequency of the cpu is $mkt_freq MHz"
 echo "The maximum frequency of the cpu is $max_freq MHz"
 echo "The minimum frequency of the cpu is $min_freq MHz"
 
-cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
-
 # make a pretty table
-echo "Target      Actual      Difference     MSR(0x199)     max_perf_pct"
+echo "Target Actual Difference MSR(0x199) max_perf_pct" | tr " " "\n" > /tmp/result.tab
 for freq in `seq $max_freq -100 $min_freq`
 do
 	result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ')
 	msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ')
 	max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' )
-	if [ $result_freq -eq $freq ]; then
-		echo " $freq        $result_freq             0          $msr         $(($max_perf_pct*3300))"
-	else
-		echo " $freq        $result_freq          $(($result_freq-$freq))          $msr          $(($max_perf_pct*$max_freq))"
-	fi
+	cat >> /tmp/result.tab << EOF
+$freq
+$result_freq
+$((result_freq - freq))
+$msr
+$((max_perf_pct * max_freq))
+EOF
 done
+
+# print the table
+pr -aTt -5 < /tmp/result.tab
+
 exit 0
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index ee9382b..dac927e 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -196,10 +196,9 @@
 	int msg, pid, err;
 	struct msgque_data msgque;
 
-	if (getuid() != 0) {
-		printf("Please run the test as root - Exiting.\n");
-		return ksft_exit_fail();
-	}
+	if (getuid() != 0)
+		return ksft_exit_skip(
+				"Please run the test as root - Exiting.\n");
 
 	msgque.key = ftok(argv[0], 822155650);
 	if (msgque.key == -1) {
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index 7956ea3..0a76314 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -62,13 +62,16 @@
 ALL_TESTS="$ALL_TESTS 0008:150:1"
 ALL_TESTS="$ALL_TESTS 0009:150:1"
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 test_modprobe()
 {
        if [ ! -d $DIR ]; then
                echo "$0: $DIR not present" >&2
                echo "You must have the following enabled in your kernel:" >&2
                cat $TEST_DIR/config >&2
-               exit 1
+               exit $ksft_skip
        fi
 }
 
@@ -105,12 +108,12 @@
 {
 	if ! which modprobe 2> /dev/null > /dev/null; then
 		echo "$0: You need modprobe installed" >&2
-		exit 1
+		exit $ksft_skip
 	fi
 
 	if ! which kmod 2> /dev/null > /dev/null; then
 		echo "$0: You need kmod installed" >&2
-		exit 1
+		exit $ksft_skip
 	fi
 
 	# kmod 19 has a bad bug where it returns 0 when modprobe
@@ -124,13 +127,13 @@
 		echo "$0: You need at least kmod 20" >&2
 		echo "kmod <= 19 is buggy, for details see:" >&2
 		echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
-		exit 1
+		exit $ksft_skip
 	fi
 
 	uid=$(id -u)
 	if [ $uid -ne 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 1b9d8ec..15e6b75 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -20,7 +20,7 @@
 #define KSFT_XFAIL 2
 #define KSFT_XPASS 3
 /* Treat skip as pass */
-#define KSFT_SKIP  KSFT_PASS
+#define KSFT_SKIP  4
 
 /* counters */
 struct ksft_count {
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
new file mode 100644
index 0000000..63fc1ab
--- /dev/null
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -0,0 +1,3 @@
+set_sregs_test
+sync_regs_test
+vmx_tsc_adjust_test
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index c9f5b7d..cd01144 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -13,6 +13,8 @@
 #include <execinfo.h>
 #include <sys/syscall.h>
 
+#include "../../kselftest.h"
+
 /* Dumps the current stack trace to stderr. */
 static void __attribute__((noinline)) test_dump_stack(void);
 static void test_dump_stack(void)
@@ -70,8 +72,9 @@
 
 		fprintf(stderr, "==== Test Assertion Failure ====\n"
 			"  %s:%u: %s\n"
-			"  pid=%d tid=%d\n",
-			file, line, exp_str, getpid(), gettid());
+			"  pid=%d tid=%d - %s\n",
+			file, line, exp_str, getpid(), gettid(),
+			strerror(errno));
 		test_dump_stack();
 		if (fmt) {
 			fputs("  ", stderr);
@@ -80,6 +83,8 @@
 		}
 		va_end(ap);
 
+		if (errno == EACCES)
+			ksft_exit_skip("Access denied - Exiting.\n");
 		exit(254);
 	}
 
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
index aaa6332..d7cb794 100644
--- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
@@ -28,6 +28,8 @@
 #include <string.h>
 #include <sys/ioctl.h>
 
+#include "../kselftest.h"
+
 #ifndef MSR_IA32_TSC_ADJUST
 #define MSR_IA32_TSC_ADJUST 0x3b
 #endif
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index c1b1a4d..6466294 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -19,25 +19,43 @@
 all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
 
 .ONESHELL:
+define RUN_TEST_PRINT_RESULT
+	TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST";	\
+	echo $$TEST_HDR_MSG;					\
+	echo "========================================";	\
+	if [ ! -x $$TEST ]; then	\
+		echo "$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.";\
+		echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
+	else					\
+		cd `dirname $$TEST` > /dev/null; \
+		if [ "X$(summary)" != "X" ]; then	\
+			(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && \
+			echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
+			(if [ $$? -eq $$skip ]; then	\
+				echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]";				\
+			else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]";					\
+			fi;)			\
+		else				\
+			(./$$BASENAME_TEST &&	\
+			echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") ||						\
+			(if [ $$? -eq $$skip ]; then \
+				echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
+			else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]";				\
+			fi;)		\
+		fi;				\
+		cd - > /dev/null;		\
+	fi;
+endef
+
 define RUN_TESTS
 	@export KSFT_TAP_LEVEL=`echo 1`;		\
 	test_num=`echo 0`;				\
+	skip=`echo 4`;					\
 	echo "TAP version 13";				\
 	for TEST in $(1); do				\
 		BASENAME_TEST=`basename $$TEST`;	\
 		test_num=`echo $$test_num+1 | bc`;	\
-		echo "selftests: $$BASENAME_TEST";	\
-		echo "========================================";	\
-		if [ ! -x $$TEST ]; then	\
-			echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
-			echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
-		else					\
-		if [ "X$(summary)" != "X" ]; then		\
-				cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
-			else				\
-				cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
-			fi;				\
-		fi;					\
+		$(call RUN_TEST_PRINT_RESULT,$(TEST),$(BASENAME_TEST),$(test_num),$(skip))						\
 	done;
 endef
 
@@ -76,9 +94,18 @@
 endif
 
 define EMIT_TESTS
-	@for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
+	@test_num=`echo 0`;				\
+	for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
 		BASENAME_TEST=`basename $$TEST`;	\
-		echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
+		test_num=`echo $$test_num+1 | bc`;	\
+		TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST";	\
+		echo "echo $$TEST_HDR_MSG";	\
+		if [ ! -x $$TEST ]; then	\
+			echo "echo \"$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.\"";		\
+			echo "echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\""; \
+		else
+			echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"ok 1..$$test_num $$TEST_HDR_MSG [PASS]\") || (if [ \$$? -eq \$$skip ]; then echo \"not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]\"; else echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\"; fi;)"; \
+		fi;		\
 	done;
 endef
 
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index 0836006..70d5711 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -3,6 +3,6 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-TEST_PROGS := printf.sh bitmap.sh
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh
index 4dee4d2..5a90006 100755
--- a/tools/testing/selftests/lib/bitmap.sh
+++ b/tools/testing/selftests/lib/bitmap.sh
@@ -1,9 +1,13 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 # Runs bitmap infrastructure tests using test_bitmap kernel module
 if ! /sbin/modprobe -q -n test_bitmap; then
-	echo "bitmap: [SKIP]"
-	exit 77
+	echo "bitmap: module test_bitmap is not found [SKIP]"
+	exit $ksft_skip
 fi
 
 if /sbin/modprobe -q test_bitmap; then
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
index b363994..78e7483 100755
--- a/tools/testing/selftests/lib/prime_numbers.sh
+++ b/tools/testing/selftests/lib/prime_numbers.sh
@@ -2,9 +2,12 @@
 # SPDX-License-Identifier: GPL-2.0
 # Checks fast/slow prime_number generation for inconsistencies
 
-if ! /sbin/modprobe -q -r prime_numbers; then
-	echo "prime_numbers: [SKIP]"
-	exit 77
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n prime_numbers; then
+	echo "prime_numbers: module prime_numbers is not found [SKIP]"
+	exit $ksft_skip
 fi
 
 if /sbin/modprobe -q prime_numbers selftest=65536; then
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
index 0c37377..45a23e2 100755
--- a/tools/testing/selftests/lib/printf.sh
+++ b/tools/testing/selftests/lib/printf.sh
@@ -1,9 +1,13 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 # Runs printf infrastructure using test_printf kernel module
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 if ! /sbin/modprobe -q -n test_printf; then
-	echo "printf: [SKIP]"
-	exit 77
+	echo "printf: module test_printf is not found [SKIP]"
+	exit $ksft_skip
 fi
 
 if /sbin/modprobe -q test_printf; then
diff --git a/tools/testing/selftests/locking/Makefile b/tools/testing/selftests/locking/Makefile
new file mode 100644
index 0000000..6e7761a
--- /dev/null
+++ b/tools/testing/selftests/locking/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for locking/ww_mutx selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := ww_mutex.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh
old mode 100644
new mode 100755
index 2c3d6b1..91e4ac7
--- a/tools/testing/selftests/locking/ww_mutex.sh
+++ b/tools/testing/selftests/locking/ww_mutex.sh
@@ -1,6 +1,14 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 # Runs API tests for struct ww_mutex (Wait/Wound mutexes)
+if ! /sbin/modprobe -q -n test-ww_mutex; then
+	echo "ww_mutex: module test-ww_mutex is not found [SKIP]"
+	exit $ksft_skip
+fi
 
 if /sbin/modprobe -q test-ww_mutex; then
        /sbin/modprobe -q -r test-ww_mutex
diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile
index c82cec2..60826d7 100644
--- a/tools/testing/selftests/media_tests/Makefile
+++ b/tools/testing/selftests/media_tests/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+#
+CFLAGS += -I../ -I../../../../usr/include/
 TEST_GEN_PROGS := media_device_test media_device_open video_device_test
-all: $(TEST_GEN_PROGS)
 
 include ../lib.mk
diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c
index a5ce543..93183a3 100644
--- a/tools/testing/selftests/media_tests/media_device_open.c
+++ b/tools/testing/selftests/media_tests/media_device_open.c
@@ -34,6 +34,8 @@
 #include <sys/stat.h>
 #include <linux/media.h>
 
+#include "../kselftest.h"
+
 int main(int argc, char **argv)
 {
 	int opt;
@@ -61,10 +63,8 @@
 		}
 	}
 
-	if (getuid() != 0) {
-		printf("Please run the test as root - Exiting.\n");
-		exit(-1);
-	}
+	if (getuid() != 0)
+		ksft_exit_skip("Please run the test as root - Exiting.\n");
 
 	/* Open Media device and keep it open */
 	fd = open(media_device, O_RDWR);
diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c
index 421a367..4b99533 100644
--- a/tools/testing/selftests/media_tests/media_device_test.c
+++ b/tools/testing/selftests/media_tests/media_device_test.c
@@ -39,6 +39,8 @@
 #include <time.h>
 #include <linux/media.h>
 
+#include "../kselftest.h"
+
 int main(int argc, char **argv)
 {
 	int opt;
@@ -66,10 +68,8 @@
 		}
 	}
 
-	if (getuid() != 0) {
-		printf("Please run the test as root - Exiting.\n");
-		exit(-1);
-	}
+	if (getuid() != 0)
+		ksft_exit_skip("Please run the test as root - Exiting.\n");
 
 	/* Generate random number of interations */
 	srand((unsigned int) time(NULL));
@@ -88,7 +88,7 @@
 	       "other Oops in the dmesg. Enable KaSan kernel\n"
 	       "config option for use-after-free error detection.\n\n");
 
-	printf("Running test for %d iternations\n", count);
+	printf("Running test for %d iterations\n", count);
 
 	while (count > 0) {
 		ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi);
diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c
index 22bffd5..6793f8e 100644
--- a/tools/testing/selftests/membarrier/membarrier_test.c
+++ b/tools/testing/selftests/membarrier/membarrier_test.c
@@ -293,10 +293,9 @@
 		}
 		ksft_exit_fail_msg("sys_membarrier() failed\n");
 	}
-	if (!(ret & MEMBARRIER_CMD_GLOBAL)) {
-		ksft_test_result_fail("sys_membarrier() CMD_GLOBAL query failed\n");
-		ksft_exit_fail_msg("sys_membarrier is not supported.\n");
-	}
+	if (!(ret & MEMBARRIER_CMD_GLOBAL))
+		ksft_exit_skip(
+			"sys_membarrier unsupported: CMD_GLOBAL not found.\n");
 
 	ksft_test_result_pass("sys_membarrier available\n");
 	return 0;
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index 0862e6f..53a8481 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -4,9 +4,9 @@
 CFLAGS += -I../../../../include/
 CFLAGS += -I../../../../usr/include/
 
-TEST_PROGS := run_tests.sh
-TEST_FILES := run_fuse_test.sh
-TEST_GEN_FILES := memfd_test fuse_mnt fuse_test
+TEST_GEN_PROGS := memfd_test
+TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh
+TEST_GEN_FILES := fuse_mnt fuse_test
 
 fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
 
diff --git a/tools/testing/selftests/memfd/run_hugetlbfs_test.sh b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
new file mode 100755
index 0000000..fb633eeb
--- /dev/null
+++ b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# please run as root
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+#
+# To test memfd_create with hugetlbfs, there needs to be hpages_test
+# huge pages free.  Attempt to allocate enough pages to test.
+#
+hpages_test=8
+
+#
+# Get count of free huge pages from /proc/meminfo
+#
+while read name size unit; do
+        if [ "$name" = "HugePages_Free:" ]; then
+                freepgs=$size
+        fi
+done < /proc/meminfo
+
+#
+# If not enough free huge pages for test, attempt to increase
+#
+if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then
+	nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
+	hpages_needed=`expr $hpages_test - $freepgs`
+
+	if [ $UID != 0 ]; then
+		echo "Please run memfd with hugetlbfs test as root"
+		exit $ksft_skip
+	fi
+
+	echo 3 > /proc/sys/vm/drop_caches
+	echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
+	while read name size unit; do
+		if [ "$name" = "HugePages_Free:" ]; then
+			freepgs=$size
+		fi
+	done < /proc/meminfo
+fi
+
+#
+# If still not enough huge pages available, exit.  But, give back any huge
+# pages potentially allocated above.
+#
+if [ $freepgs -lt $hpages_test ]; then
+	# nr_hugepgs non-zero only if we attempted to increase
+	if [ -n "$nr_hugepgs" ]; then
+		echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+	fi
+	printf "Not enough huge pages available (%d < %d)\n" \
+		$freepgs $needpgs
+	exit $ksft_skip
+fi
+
+#
+# Run the hugetlbfs test
+#
+./memfd_test hugetlbfs
+./run_fuse_test.sh hugetlbfs
+
+#
+# Give back any huge pages allocated for the test
+#
+if [ -n "$nr_hugepgs" ]; then
+	echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+fi
diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_tests.sh
deleted file mode 100755
index c2d41ed..0000000
--- a/tools/testing/selftests/memfd/run_tests.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/bash
-# please run as root
-
-#
-# Normal tests requiring no special resources
-#
-./run_fuse_test.sh
-./memfd_test
-
-#
-# To test memfd_create with hugetlbfs, there needs to be hpages_test
-# huge pages free.  Attempt to allocate enough pages to test.
-#
-hpages_test=8
-
-#
-# Get count of free huge pages from /proc/meminfo
-#
-while read name size unit; do
-        if [ "$name" = "HugePages_Free:" ]; then
-                freepgs=$size
-        fi
-done < /proc/meminfo
-
-#
-# If not enough free huge pages for test, attempt to increase
-#
-if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then
-	nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
-	hpages_needed=`expr $hpages_test - $freepgs`
-
-	echo 3 > /proc/sys/vm/drop_caches
-	echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
-	if [ $? -ne 0 ]; then
-		echo "Please run this test as root"
-		exit 1
-	fi
-	while read name size unit; do
-		if [ "$name" = "HugePages_Free:" ]; then
-			freepgs=$size
-		fi
-	done < /proc/meminfo
-fi
-
-#
-# If still not enough huge pages available, exit.  But, give back any huge
-# pages potentially allocated above.
-#
-if [ $freepgs -lt $hpages_test ]; then
-	# nr_hugepgs non-zero only if we attempted to increase
-	if [ -n "$nr_hugepgs" ]; then
-		echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
-	fi
-	printf "Not enough huge pages available (%d < %d)\n" \
-		$freepgs $needpgs
-	exit 1
-fi
-
-#
-# Run the hugetlbfs test
-#
-./memfd_test hugetlbfs
-./run_fuse_test.sh hugetlbfs
-
-#
-# Give back any huge pages allocated for the test
-#
-if [ -n "$nr_hugepgs" ]; then
-	echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
-fi
diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
index 686da51..e0a625e 100644
--- a/tools/testing/selftests/memory-hotplug/Makefile
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@@ -4,11 +4,8 @@
 include ../lib.mk
 
 TEST_PROGS := mem-on-off-test.sh
-override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
-
-override EMIT_TESTS := echo "$(subst @,,$(RUN_TESTS))"
 
 run_full_test:
-	@/bin/bash ./mem-on-off-test.sh && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
+	@/bin/bash ./mem-on-off-test.sh -r 10 && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
 
 clean:
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
index ae2c790..b37585e 100755
--- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
+++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
@@ -3,30 +3,33 @@
 
 SYSFS=
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 prerequisite()
 {
 	msg="skip all tests:"
 
 	if [ $UID != 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
 
 	if [ ! -d "$SYSFS" ]; then
 		echo $msg sysfs is not mounted >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then
 		echo $msg memory hotplug is not supported >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	if ! grep -q 1 $SYSFS/devices/system/memory/memory*/removable; then
 		echo $msg no hot-pluggable memory >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 
@@ -133,7 +136,8 @@
 
 error=-12
 priority=0
-ratio=10
+# Run with default of ratio=2 for Kselftest run
+ratio=2
 retval=0
 
 while getopts e:hp:r: opt; do
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
index e094f71..0268907 100644
--- a/tools/testing/selftests/mount/Makefile
+++ b/tools/testing/selftests/mount/Makefile
@@ -3,15 +3,7 @@
 CFLAGS = -Wall \
          -O2
 
-TEST_GEN_PROGS := unprivileged-remount-test
+TEST_PROGS := run_tests.sh
+TEST_GEN_FILES := unprivileged-remount-test
 
 include ../lib.mk
-
-override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \
-		      then	\
-				./unprivileged-remount-test ; \
-		      else	\
-				echo "WARN: No /proc/self/uid_map exist, test skipped." ; \
-		      fi
-override EMIT_TESTS := echo "$(RUN_TESTS)"
-
diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_tests.sh
new file mode 100755
index 0000000..4ab8f50
--- /dev/null
+++ b/tools/testing/selftests/mount/run_tests.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Run mount selftests
+if [ -f /proc/self/uid_map ] ; then
+	./unprivileged-remount-test ;
+else
+	echo "WARN: No /proc/self/uid_map exist, test skipped." ;
+	exit $ksft_skip
+fi
diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
index 743d3f9..8a58055 100644
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -1,17 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -O2
 LDLIBS = -lrt -lpthread -lpopt
+
 TEST_GEN_PROGS := mq_open_tests mq_perf_tests
 
 include ../lib.mk
-
-override define RUN_TESTS
-	@$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
-	@$(OUTPUT)/mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
-endef
-
-override define EMIT_TESTS
-	echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\""
-	echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\""
-endef
-
diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c
index e0a74bd..9403ac0 100644
--- a/tools/testing/selftests/mqueue/mq_open_tests.c
+++ b/tools/testing/selftests/mqueue/mq_open_tests.c
@@ -33,6 +33,8 @@
 #include <mqueue.h>
 #include <error.h>
 
+#include "../kselftest.h"
+
 static char *usage =
 "Usage:\n"
 "  %s path\n"
@@ -53,6 +55,7 @@
 int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize;
 FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize;
 char *queue_path;
+char *default_queue_path = "/test1";
 mqd_t queue = -1;
 
 static inline void __set(FILE *stream, int value, char *err_msg);
@@ -238,35 +241,33 @@
 	struct mq_attr attr, result;
 
 	if (argc != 2) {
-		fprintf(stderr, "Must pass a valid queue name\n\n");
-		fprintf(stderr, usage, argv[0]);
-		exit(1);
-	}
+		printf("Using Default queue path - %s\n", default_queue_path);
+		queue_path = default_queue_path;
+	} else {
 
 	/*
 	 * Although we can create a msg queue with a non-absolute path name,
 	 * unlink will fail.  So, if the name doesn't start with a /, add one
 	 * when we save it.
 	 */
-	if (*argv[1] == '/')
-		queue_path = strdup(argv[1]);
-	else {
-		queue_path = malloc(strlen(argv[1]) + 2);
-		if (!queue_path) {
-			perror("malloc()");
-			exit(1);
+		if (*argv[1] == '/')
+			queue_path = strdup(argv[1]);
+		else {
+			queue_path = malloc(strlen(argv[1]) + 2);
+			if (!queue_path) {
+				perror("malloc()");
+				exit(1);
+			}
+			queue_path[0] = '/';
+			queue_path[1] = 0;
+			strcat(queue_path, argv[1]);
 		}
-		queue_path[0] = '/';
-		queue_path[1] = 0;
-		strcat(queue_path, argv[1]);
 	}
 
-	if (getuid() != 0) {
-		fprintf(stderr, "Not running as root, but almost all tests "
+	if (getuid() != 0)
+		ksft_exit_skip("Not running as root, but almost all tests "
 			"require root in order to modify\nsystem settings.  "
 			"Exiting.\n");
-		exit(1);
-	}
 
 	/* Find out what files there are for us to make tweaks in */
 	def_msgs = fopen(DEF_MSGS, "r+");
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index 8188f72..b019e0b 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -39,6 +39,8 @@
 #include <popt.h>
 #include <error.h>
 
+#include "../kselftest.h"
+
 static char *usage =
 "Usage:\n"
 "  %s [-c #[,#..] -f] path\n"
@@ -626,12 +628,10 @@
 		cpus_to_pin[0] = cpus_online - 1;
 	}
 
-	if (getuid() != 0) {
-		fprintf(stderr, "Not running as root, but almost all tests "
+	if (getuid() != 0)
+		ksft_exit_skip("Not running as root, but almost all tests "
 			"require root in order to modify\nsystem settings.  "
 			"Exiting.\n");
-		exit(1);
-	}
 
 	max_msgs = fopen(MAX_MSGS, "r+");
 	max_msgsize = fopen(MAX_MSGSIZE, "r+");
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 9780c5a..78245d6 100644
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -5,6 +5,8 @@
 # different events.
 
 ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
 TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric"
@@ -1396,18 +1398,18 @@
 
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip;
 fi
 
 if [ ! -x "$(command -v ip)" ]; then
 	echo "SKIP: Could not run test without ip tool"
-	exit 0
+	exit $ksft_skip
 fi
 
 ip route help 2>&1 | grep -q fibmatch
 if [ $? -ne 0 ]; then
 	echo "SKIP: iproute2 too old, missing fibmatch"
-	exit 0
+	exit $ksft_skip
 fi
 
 # start clean
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
index 903679e..e3afcb42 100755
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -8,6 +8,9 @@
 # if not they probably have failed earlier in the boot process and their logged error will be catched by another test
 #
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 # this function will try to up the interface
 # if already up, nothing done
 # arg1: network interface name
@@ -18,7 +21,7 @@
 	ip link show "$netdev" |grep -q UP
 	if [ $? -eq 0 ];then
 		echo "SKIP: $netdev: interface already up"
-		return 0
+		return $ksft_skip
 	fi
 
 	ip link set "$netdev" up
@@ -61,12 +64,12 @@
 	ip address show "$netdev" |grep '^[[:space:]]*inet'
 	if [ $? -eq 0 ];then
 		echo "SKIP: $netdev: already have an IP"
-		return 0
+		return $ksft_skip
 	fi
 
 	# TODO what ipaddr to set ? DHCP ?
 	echo "SKIP: $netdev: set IP address"
-	return 0
+	return $ksft_skip
 }
 
 # test an ethtool command
@@ -84,6 +87,7 @@
 	if [ $ret -ne 0 ];then
 		if [ $ret -eq "$1" ];then
 			echo "SKIP: $netdev: ethtool $2 not supported"
+			return $ksft_skip
 		else
 			echo "FAIL: $netdev: ethtool $2"
 			return 1
@@ -104,7 +108,7 @@
 	ethtool --version 2>/dev/null >/dev/null
 	if [ $? -ne 0 ];then
 		echo "SKIP: ethtool not present"
-		return 1
+		return $ksft_skip
 	fi
 
 	TMP_ETHTOOL_FEATURES="$(mktemp)"
@@ -176,13 +180,13 @@
 #check for needed privileges
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 ip link show 2>/dev/null >/dev/null
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not run test without the ip tool"
-	exit 0
+	exit $ksft_skip
 fi
 
 TMP_LIST_NETDEV="$(mktemp)"
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 7651fd4..f8cc38a 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -43,6 +43,9 @@
 #	that MTU is properly calculated instead when MTU is not configured from
 #	userspace
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 tests="
 	pmtu_vti6_exception		vti6: PMTU exceptions
 	pmtu_vti4_exception		vti4: PMTU exceptions
@@ -162,7 +165,7 @@
 }
 
 setup() {
-	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return 1
+	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
 
 	cleanup_done=0
 	for arg do
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 7f6cd9f..7ec4fa4 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -60,6 +60,8 @@
 
 #include "psock_lib.h"
 
+#include "../kselftest.h"
+
 #ifndef bug_on
 # define bug_on(cond)		assert(!(cond))
 #endif
@@ -825,7 +827,7 @@
 		fprintf(stderr, "test: skip %s %s since user and kernel "
 			"space have different bit width\n",
 			tpacket_str[version], type_str[type]);
-		return 0;
+		return KSFT_SKIP;
 	}
 
 	sock = pfsocket(version);
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 760faef..0d7a44f 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -7,6 +7,9 @@
 devdummy="test-dummy0"
 ret=0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 # set global exit status, but never reset nonzero one.
 check_err()
 {
@@ -333,7 +336,7 @@
 	ip link show type vrf 2>/dev/null
 	if [ $? -ne 0 ]; then
 		echo "SKIP: vrf: iproute2 too old"
-		return 0
+		return $ksft_skip
 	fi
 
 	ip link add "$vrfname" type vrf table 10
@@ -409,7 +412,7 @@
 	ip fou help 2>&1 |grep -q 'Usage: ip fou'
 	if [ $? -ne 0 ];then
 		echo "SKIP: fou: iproute2 too old"
-		return 1
+		return $ksft_skip
 	fi
 
 	ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null
@@ -444,7 +447,7 @@
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
 		echo "SKIP encap tests: cannot add net namespace $testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	ip netns exec "$testns" ip link set lo up
@@ -469,7 +472,7 @@
 	ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
 	if [ $? -ne 0 ]; then
 		echo "SKIP: macsec: iproute2 too old"
-		return 0
+		return $ksft_skip
 	fi
 
 	ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
@@ -613,14 +616,14 @@
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
 		echo "SKIP gretap tests: cannot add net namespace $testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	ip link help gretap 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: gretap: iproute2 too old"
 		ip netns del "$testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	# test native tunnel
@@ -663,14 +666,14 @@
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
 		echo "SKIP ip6gretap tests: cannot add net namespace $testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	ip link help ip6gretap 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: ip6gretap: iproute2 too old"
 		ip netns del "$testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	# test native tunnel
@@ -713,13 +716,13 @@
 	ip link help erspan 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: erspan: iproute2 too old"
-		return 1
+		return $ksft_skip
 	fi
 
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
 		echo "SKIP erspan tests: cannot add net namespace $testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	# test native tunnel erspan v1
@@ -778,13 +781,13 @@
 	ip link help ip6erspan 2>&1 | grep -q "^Usage:"
 	if [ $? -ne 0 ];then
 		echo "SKIP: ip6erspan: iproute2 too old"
-		return 1
+		return $ksft_skip
 	fi
 
 	ip netns add "$testns"
 	if [ $? -ne 0 ]; then
 		echo "SKIP ip6erspan tests: cannot add net namespace $testns"
-		return 1
+		return $ksft_skip
 	fi
 
 	# test native tunnel ip6erspan v1
@@ -865,14 +868,14 @@
 #check for needed privileges
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
-	exit 0
+	exit $ksft_skip
 fi
 
 for x in ip tc;do
 	$x -Version 2>/dev/null >/dev/null
 	if [ $? -ne 0 ];then
 		echo "SKIP: Could not run test without the $x tool"
-		exit 0
+		exit $ksft_skip
 	fi
 done
 
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
new file mode 100755
index 0000000..98f650c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# Invoke a text editor on all console.log files for all runs with diagnostics,
+# that is, on all such files having a console.log.diags counterpart.
+# Note that both console.log.diags and console.log are passed to the
+# editor (currently defaulting to "vi"), allowing the user to get an
+# idea of what to search for in the console.log file.
+#
+# Usage: kvm-find-errors.sh directory
+#
+# The "directory" above should end with the date/time directory, for example,
+# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
+
+rundir="${1}"
+if test -z "$rundir" -o ! -d "$rundir"
+then
+	echo Usage: $0 directory
+fi
+editor=${EDITOR-vi}
+
+# Find builds with errors
+files=
+for i in ${rundir}/*/Make.out
+do
+	if egrep -q "error:|warning:" < $i
+	then
+		egrep "error:|warning:" < $i > $i.diags
+		files="$files $i.diags $i"
+	fi
+done
+if test -n "$files"
+then
+	$editor $files
+else
+	echo No build errors.
+fi
+if grep -q -e "--buildonly" < ${rundir}/log
+then
+	echo Build-only run, no console logs to check.
+fi
+
+# Find console logs with errors
+files=
+for i in ${rundir}/*/console.log
+do
+	if test -r $i.diags
+	then
+		files="$files $i.diags $i"
+	fi
+done
+if test -n "$files"
+then
+	$editor $files
+else
+	echo No errors in console logs.
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index c2e1bb6..477ecb1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -34,11 +34,15 @@
 
 configfile=`echo $i | sed -e 's/^.*\///'`
 ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'`
+stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
+	    tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
+	    awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
+	    tr -d '\012\015'`"
 if test -z "$ngps"
 then
-	echo "$configfile -------"
+	echo "$configfile ------- " $stopstate
 else
-	title="$configfile ------- $ngps grace periods"
+	title="$configfile ------- $ngps GPs"
 	dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
 	if test -z "$dur"
 	then
@@ -46,9 +50,9 @@
 	else
 		ngpsps=`awk -v ngps=$ngps -v dur=$dur '
 			BEGIN { print ngps / dur }' < /dev/null`
-		title="$title ($ngpsps per second)"
+		title="$title ($ngpsps/s)"
 	fi
-	echo $title
+	echo $title $stopstate
 	nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
 	if test -z "$nclosecalls"
 	then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index f7e988f..c27e978 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -48,10 +48,6 @@
 				cat $i/Make.oldconfig.err
 			fi
 			parse-build.sh $i/Make.out $configfile
-			if test "$TORTURE_SUITE" != rcuperf
-			then
-				parse-torture.sh $i/console.log $configfile
-			fi
 			parse-console.sh $i/console.log $configfile
 			if test -r $i/Warnings
 			then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 5f8fbb0..c5b0f94 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -267,5 +267,4 @@
 	echo Unknown PID, cannot kill qemu command
 fi
 
-parse-torture.sh $resdir/console.log $title
 parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 08aa7d5..1729343 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -24,57 +24,146 @@
 #
 # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 
+T=${TMPDIR-/tmp}/parse-console.sh.$$
 file="$1"
 title="$2"
 
+trap 'rm -f $T.seq $T.diags' 0
+
 . functions.sh
 
+# Check for presence and readability of console output file
+if test -f "$file" -a -r "$file"
+then
+	:
+else
+	echo $title unreadable console output file: $file
+	exit 1
+fi
 if grep -Pq '\x00' < $file
 then
 	print_warning Console output contains nul bytes, old qemu still running?
 fi
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
-if test -s $1.diags
+cat /dev/null > $file.diags
+
+# Check for proper termination, except that rcuperf runs don't indicate this.
+if test "$TORTURE_SUITE" != rcuperf
 then
-	print_warning Assertion failure in $file $title
-	# cat $1.diags
+	# check for abject failure
+
+	if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
+	then
+		nerrs=`grep --binary-files=text '!!!' $file |
+		tail -1 |
+		awk '
+		{
+			for (i=NF-8;i<=NF;i++)
+				sum+=$i;
+		}
+		END { print sum }'`
+		print_bug $title FAILURE, $nerrs instances
+		exit
+	fi
+
+	grep --binary-files=text 'torture:.*ver:' $file |
+	egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
+	sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
+	awk '
+	BEGIN	{
+		ver = 0;
+		badseq = 0;
+		}
+
+		{
+		if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
+			badseqno1 = ver;
+			badseqno2 = $5;
+			badseqnr = NR;
+			badseq = 1;
+		}
+		ver = $5
+		}
+
+	END	{
+		if (badseq) {
+			if (badseqno1 == badseqno2 && badseqno2 == ver)
+				print "GP HANG at " ver " torture stat " badseqnr;
+			else
+				print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
+		}
+		}' > $T.seq
+
+	if grep -q SUCCESS $file
+	then
+		if test -s $T.seq
+		then
+			print_warning $title `cat $T.seq`
+			echo "   " $file
+			exit 2
+		fi
+	else
+		if grep -q "_HOTPLUG:" $file
+		then
+			print_warning HOTPLUG FAILURES $title `cat $T.seq`
+			echo "   " $file
+			exit 3
+		fi
+		echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
+		if test -s $T.seq
+		then
+			print_warning $title `cat $T.seq`
+		fi
+		exit 2
+	fi
+fi | tee -a $file.diags
+
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
+grep -v 'ODEBUG: ' |
+grep -v 'Warning: unable to open an initial console' > $T.diags
+if test -s $T.diags
+then
+	print_warning "Assertion failure in $file $title"
+	# cat $T.diags
 	summary=""
-	n_badness=`grep -c Badness $1`
+	n_badness=`grep -c Badness $file`
 	if test "$n_badness" -ne 0
 	then
 		summary="$summary  Badness: $n_badness"
 	fi
-	n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'`
+	n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
 	if test "$n_warn" -ne 0
 	then
 		summary="$summary  Warnings: $n_warn"
 	fi
-	n_bugs=`egrep -c 'BUG|Oops:' $1`
+	n_bugs=`egrep -c 'BUG|Oops:' $file`
 	if test "$n_bugs" -ne 0
 	then
 		summary="$summary  Bugs: $n_bugs"
 	fi
-	n_calltrace=`grep -c 'Call Trace:' $1`
+	n_calltrace=`grep -c 'Call Trace:' $file`
 	if test "$n_calltrace" -ne 0
 	then
 		summary="$summary  Call Traces: $n_calltrace"
 	fi
-	n_lockdep=`grep -c =========== $1`
+	n_lockdep=`grep -c =========== $file`
 	if test "$n_badness" -ne 0
 	then
 		summary="$summary  lockdep: $n_badness"
 	fi
-	n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1`
+	n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
 	if test "$n_stalls" -ne 0
 	then
 		summary="$summary  Stalls: $n_stalls"
 	fi
-	n_starves=`grep -c 'rcu_.*kthread starved for' $1`
+	n_starves=`grep -c 'rcu_.*kthread starved for' $file`
 	if test "$n_starves" -ne 0
 	then
 		summary="$summary  Starves: $n_starves"
 	fi
 	print_warning Summary: $summary
-else
-	rm $1.diags
+	cat $T.diags >> $file.diags
+fi
+if ! test -s $file.diags
+then
+	rm -f $file.diags
 fi
diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh
deleted file mode 100755
index 5987e50..0000000
--- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-#
-# Check the console output from a torture run for goodness.
-# The "file" is a pathname on the local system, and "title" is
-# a text string for error-message purposes.
-#
-# The file must contain torture output, but can be interspersed
-# with other dmesg text, as in console-log output.
-#
-# Usage: parse-torture.sh file title
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-# Copyright (C) IBM Corporation, 2011
-#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-
-T=${TMPDIR-/tmp}/parse-torture.sh.$$
-file="$1"
-title="$2"
-
-trap 'rm -f $T.seq' 0
-
-. functions.sh
-
-# check for presence of torture output file.
-
-if test -f "$file" -a -r "$file"
-then
-	:
-else
-	echo $title unreadable torture output file: $file
-	exit 1
-fi
-
-# check for abject failure
-
-if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
-then
-	nerrs=`grep --binary-files=text '!!!' $file | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
-	print_bug $title FAILURE, $nerrs instances
-	echo "   " $url
-	exit
-fi
-
-grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
-awk '
-BEGIN	{
-	ver = 0;
-	badseq = 0;
-	}
-
-	{
-	if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
-		badseqno1 = ver;
-		badseqno2 = $5;
-		badseqnr = NR;
-		badseq = 1;
-	}
-	ver = $5
-	}
-
-END	{
-	if (badseq) {
-		if (badseqno1 == badseqno2 && badseqno2 == ver)
-			print "GP HANG at " ver " torture stat " badseqnr;
-		else
-			print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
-	}
-	}' > $T.seq
-
-if grep -q SUCCESS $file
-then
-	if test -s $T.seq
-	then
-		print_warning $title $title `cat $T.seq`
-		echo "   " $file
-		exit 2
-	fi
-else
-	if grep -q "_HOTPLUG:" $file
-	then
-		print_warning HOTPLUG FAILURES $title `cat $T.seq`
-		echo "   " $file
-		exit 3
-	fi
-	echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
-	if test -s $T.seq
-	then
-		print_warning $title `cat $T.seq`
-	fi
-	exit 2
-fi
diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore
new file mode 100644
index 0000000..d0ad44f
--- /dev/null
+++ b/tools/testing/selftests/rtc/.gitignore
@@ -0,0 +1,2 @@
+rtctest
+setdate
diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile
new file mode 100644
index 0000000..de9c856
--- /dev/null
+++ b/tools/testing/selftests/rtc/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O3 -Wl,-no-as-needed -Wall
+LDFLAGS += -lrt -lpthread -lm
+
+TEST_GEN_PROGS = rtctest
+
+TEST_GEN_PROGS_EXTENDED = setdate
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c
new file mode 100644
index 0000000..e20b017
--- /dev/null
+++ b/tools/testing/selftests/rtc/rtctest.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Real Time Clock Driver Test Program
+ *
+ * Copyright (c) 2018 Alexandre Belloni <alexandre.belloni@bootlin.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/rtc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#define NUM_UIE 3
+#define ALARM_DELTA 3
+
+static char *rtc_file = "/dev/rtc0";
+
+FIXTURE(rtc) {
+	int fd;
+};
+
+FIXTURE_SETUP(rtc) {
+	self->fd = open(rtc_file, O_RDONLY);
+	ASSERT_NE(-1, self->fd);
+}
+
+FIXTURE_TEARDOWN(rtc) {
+	close(self->fd);
+}
+
+TEST_F(rtc, date_read) {
+	int rc;
+	struct rtc_time rtc_tm;
+
+	/* Read the RTC time/date */
+	rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm);
+	ASSERT_NE(-1, rc);
+
+	TH_LOG("Current RTC date/time is %02d/%02d/%02d %02d:%02d:%02d.",
+	       rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
+	       rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+}
+
+TEST_F(rtc, uie_read) {
+	int i, rc, irq = 0;
+	unsigned long data;
+
+	/* Turn on update interrupts */
+	rc = ioctl(self->fd, RTC_UIE_ON, 0);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip update IRQs not supported.");
+		return;
+	}
+
+	for (i = 0; i < NUM_UIE; i++) {
+		/* This read will block */
+		rc = read(self->fd, &data, sizeof(data));
+		ASSERT_NE(-1, rc);
+		irq++;
+	}
+
+	EXPECT_EQ(NUM_UIE, irq);
+
+	rc = ioctl(self->fd, RTC_UIE_OFF, 0);
+	ASSERT_NE(-1, rc);
+}
+
+TEST_F(rtc, uie_select) {
+	int i, rc, irq = 0;
+	unsigned long data;
+
+	/* Turn on update interrupts */
+	rc = ioctl(self->fd, RTC_UIE_ON, 0);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip update IRQs not supported.");
+		return;
+	}
+
+	for (i = 0; i < NUM_UIE; i++) {
+		struct timeval tv = { .tv_sec = 2 };
+		fd_set readfds;
+
+		FD_ZERO(&readfds);
+		FD_SET(self->fd, &readfds);
+		/* The select will wait until an RTC interrupt happens. */
+		rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+		ASSERT_NE(-1, rc);
+		ASSERT_NE(0, rc);
+
+		/* This read won't block */
+		rc = read(self->fd, &data, sizeof(unsigned long));
+		ASSERT_NE(-1, rc);
+		irq++;
+	}
+
+	EXPECT_EQ(NUM_UIE, irq);
+
+	rc = ioctl(self->fd, RTC_UIE_OFF, 0);
+	ASSERT_NE(-1, rc);
+}
+
+TEST_F(rtc, alarm_alm_set) {
+	struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
+	unsigned long data;
+	struct rtc_time tm;
+	fd_set readfds;
+	time_t secs, new;
+	int rc;
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+	ASSERT_NE(-1, rc);
+
+	secs = timegm((struct tm *)&tm) + ALARM_DELTA;
+	gmtime_r(&secs, (struct tm *)&tm);
+
+	rc = ioctl(self->fd, RTC_ALM_SET, &tm);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip alarms are not supported.");
+		return;
+	}
+
+	rc = ioctl(self->fd, RTC_ALM_READ, &tm);
+	ASSERT_NE(-1, rc);
+
+	TH_LOG("Alarm time now set to %02d:%02d:%02d.",
+	       tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+	/* Enable alarm interrupts */
+	rc = ioctl(self->fd, RTC_AIE_ON, 0);
+	ASSERT_NE(-1, rc);
+
+	FD_ZERO(&readfds);
+	FD_SET(self->fd, &readfds);
+
+	rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+	ASSERT_NE(-1, rc);
+	EXPECT_NE(0, rc);
+
+	/* Disable alarm interrupts */
+	rc = ioctl(self->fd, RTC_AIE_OFF, 0);
+	ASSERT_NE(-1, rc);
+
+	if (rc == 0)
+		return;
+
+	rc = read(self->fd, &data, sizeof(unsigned long));
+	ASSERT_NE(-1, rc);
+	TH_LOG("data: %lx", data);
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+	ASSERT_NE(-1, rc);
+
+	new = timegm((struct tm *)&tm);
+	ASSERT_EQ(new, secs);
+}
+
+TEST_F(rtc, alarm_wkalm_set) {
+	struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
+	struct rtc_wkalrm alarm = { 0 };
+	struct rtc_time tm;
+	unsigned long data;
+	fd_set readfds;
+	time_t secs, new;
+	int rc;
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
+	ASSERT_NE(-1, rc);
+
+	secs = timegm((struct tm *)&alarm.time) + ALARM_DELTA;
+	gmtime_r(&secs, (struct tm *)&alarm.time);
+
+	alarm.enabled = 1;
+
+	rc = ioctl(self->fd, RTC_WKALM_SET, &alarm);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip alarms are not supported.");
+		return;
+	}
+
+	rc = ioctl(self->fd, RTC_WKALM_RD, &alarm);
+	ASSERT_NE(-1, rc);
+
+	TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.",
+	       alarm.time.tm_mday, alarm.time.tm_mon + 1,
+	       alarm.time.tm_year + 1900, alarm.time.tm_hour,
+	       alarm.time.tm_min, alarm.time.tm_sec);
+
+	FD_ZERO(&readfds);
+	FD_SET(self->fd, &readfds);
+
+	rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+	ASSERT_NE(-1, rc);
+	EXPECT_NE(0, rc);
+
+	rc = read(self->fd, &data, sizeof(unsigned long));
+	ASSERT_NE(-1, rc);
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+	ASSERT_NE(-1, rc);
+
+	new = timegm((struct tm *)&tm);
+	ASSERT_EQ(new, secs);
+}
+
+static void __attribute__((constructor))
+__constructor_order_last(void)
+{
+	if (!__constructor_order)
+		__constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
+}
+
+int main(int argc, char **argv)
+{
+	switch (argc) {
+	case 2:
+		rtc_file = argv[1];
+		/* FALLTHROUGH */
+	case 1:
+		break;
+	default:
+		fprintf(stderr, "usage: %s [rtcdev]\n", argv[0]);
+		return 1;
+	}
+
+	return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/timers/rtctest_setdate.c b/tools/testing/selftests/rtc/setdate.c
similarity index 100%
rename from tools/testing/selftests/timers/rtctest_setdate.c
rename to tools/testing/selftests/rtc/setdate.c
diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore
index 2c8ac84..32a9ead 100644
--- a/tools/testing/selftests/timers/.gitignore
+++ b/tools/testing/selftests/timers/.gitignore
@@ -9,7 +9,7 @@
 nsleep-lat
 posix_timers
 raw_skew
-rtctest
+rtcpie
 set-2038
 set-tai
 set-timer-lat
@@ -19,4 +19,3 @@
 adjtick
 set-tz
 freq-step
-rtctest_setdate
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 3496680..c02683c 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -5,13 +5,13 @@
 # these are all "safe" tests that don't modify
 # system time or require escalated privileges
 TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
-	     inconsistency-check raw_skew threadtest rtctest
+	     inconsistency-check raw_skew threadtest rtcpie
 
 DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
 		      skew_consistency clocksource-switch freq-step leap-a-day \
 		      leapcrash set-tai set-2038 set-tz
 
-TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) rtctest_setdate
+TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
 
 
 include ../lib.mk
diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c
new file mode 100644
index 0000000..47b5bad
--- /dev/null
+++ b/tools/testing/selftests/timers/rtcpie.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Real Time Clock Periodic Interrupt test program
+ *
+ * Since commit 6610e0893b8bc ("RTC: Rework RTC code to use timerqueue for
+ * events"), PIE are completely handled using hrtimers, without actually using
+ * any underlying hardware RTC.
+ *
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+/*
+ * This expects the new RTC class driver framework, working with
+ * clocks that will often not be clones of what the PC-AT had.
+ * Use the command line to specify another RTC if you need one.
+ */
+static const char default_rtc[] = "/dev/rtc0";
+
+int main(int argc, char **argv)
+{
+	int i, fd, retval, irqcount = 0;
+	unsigned long tmp, data, old_pie_rate;
+	const char *rtc = default_rtc;
+	struct timeval start, end, diff;
+
+	switch (argc) {
+	case 2:
+		rtc = argv[1];
+		/* FALLTHROUGH */
+	case 1:
+		break;
+	default:
+		fprintf(stderr, "usage:  rtctest [rtcdev] [d]\n");
+		return 1;
+	}
+
+	fd = open(rtc, O_RDONLY);
+
+	if (fd ==  -1) {
+		perror(rtc);
+		exit(errno);
+	}
+
+	/* Read periodic IRQ rate */
+	retval = ioctl(fd, RTC_IRQP_READ, &old_pie_rate);
+	if (retval == -1) {
+		/* not all RTCs support periodic IRQs */
+		if (errno == EINVAL) {
+			fprintf(stderr, "\nNo periodic IRQ support\n");
+			goto done;
+		}
+		perror("RTC_IRQP_READ ioctl");
+		exit(errno);
+	}
+	fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", old_pie_rate);
+
+	fprintf(stderr, "Counting 20 interrupts at:");
+	fflush(stderr);
+
+	/* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
+	for (tmp=2; tmp<=64; tmp*=2) {
+
+		retval = ioctl(fd, RTC_IRQP_SET, tmp);
+		if (retval == -1) {
+			/* not all RTCs can change their periodic IRQ rate */
+			if (errno == EINVAL) {
+				fprintf(stderr,
+					"\n...Periodic IRQ rate is fixed\n");
+				goto done;
+			}
+			perror("RTC_IRQP_SET ioctl");
+			exit(errno);
+		}
+
+		fprintf(stderr, "\n%ldHz:\t", tmp);
+		fflush(stderr);
+
+		/* Enable periodic interrupts */
+		retval = ioctl(fd, RTC_PIE_ON, 0);
+		if (retval == -1) {
+			perror("RTC_PIE_ON ioctl");
+			exit(errno);
+		}
+
+		for (i=1; i<21; i++) {
+			gettimeofday(&start, NULL);
+			/* This blocks */
+			retval = read(fd, &data, sizeof(unsigned long));
+			if (retval == -1) {
+				perror("read");
+				exit(errno);
+			}
+			gettimeofday(&end, NULL);
+			timersub(&end, &start, &diff);
+			if (diff.tv_sec > 0 ||
+			    diff.tv_usec > ((1000000L / tmp) * 1.10)) {
+				fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
+				       diff.tv_sec, diff.tv_usec,
+				       (1000000L / tmp));
+				fflush(stdout);
+				exit(-1);
+			}
+
+			fprintf(stderr, " %d",i);
+			fflush(stderr);
+			irqcount++;
+		}
+
+		/* Disable periodic interrupts */
+		retval = ioctl(fd, RTC_PIE_OFF, 0);
+		if (retval == -1) {
+			perror("RTC_PIE_OFF ioctl");
+			exit(errno);
+		}
+	}
+
+done:
+	ioctl(fd, RTC_IRQP_SET, old_pie_rate);
+
+	fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c
deleted file mode 100644
index 411eff6..0000000
--- a/tools/testing/selftests/timers/rtctest.c
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- *      Real Time Clock Driver Test/Example Program
- *
- *      Compile with:
- *		     gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest
- *
- *      Copyright (C) 1996, Paul Gortmaker.
- *
- *      Released under the GNU General Public License, version 2,
- *      included herein by reference.
- *
- */
-
-#include <stdio.h>
-#include <linux/rtc.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <errno.h>
-
-#ifndef ARRAY_SIZE
-# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
-/*
- * This expects the new RTC class driver framework, working with
- * clocks that will often not be clones of what the PC-AT had.
- * Use the command line to specify another RTC if you need one.
- */
-static const char default_rtc[] = "/dev/rtc0";
-
-static struct rtc_time cutoff_dates[] = {
-	{
-		.tm_year = 70, /* 1970 -1900 */
-		.tm_mday = 1,
-	},
-	/* signed time_t 19/01/2038 3:14:08 */
-	{
-		.tm_year = 138,
-		.tm_mday = 19,
-	},
-	{
-		.tm_year = 138,
-		.tm_mday = 20,
-	},
-	{
-		.tm_year = 199, /* 2099 -1900 */
-		.tm_mday = 1,
-	},
-	{
-		.tm_year = 200, /* 2100 -1900 */
-		.tm_mday = 1,
-	},
-	/* unsigned time_t 07/02/2106 7:28:15*/
-	{
-		.tm_year = 205,
-		.tm_mon = 1,
-		.tm_mday = 7,
-	},
-	{
-		.tm_year = 206,
-		.tm_mon = 1,
-		.tm_mday = 8,
-	},
-	/* signed time on 64bit in nanoseconds 12/04/2262 01:47:16*/
-	{
-		.tm_year = 362,
-		.tm_mon = 3,
-		.tm_mday = 12,
-	},
-	{
-		.tm_year = 362, /* 2262 -1900 */
-		.tm_mon = 3,
-		.tm_mday = 13,
-	},
-};
-
-static int compare_dates(struct rtc_time *a, struct rtc_time *b)
-{
-	if (a->tm_year != b->tm_year ||
-	    a->tm_mon != b->tm_mon ||
-	    a->tm_mday != b->tm_mday ||
-	    a->tm_hour != b->tm_hour ||
-	    a->tm_min != b->tm_min ||
-	    ((b->tm_sec - a->tm_sec) > 1))
-		return 1;
-
-	return 0;
-}
-
-int main(int argc, char **argv)
-{
-	int i, fd, retval, irqcount = 0, dangerous = 0;
-	unsigned long tmp, data;
-	struct rtc_time rtc_tm;
-	const char *rtc = default_rtc;
-	struct timeval start, end, diff;
-
-	switch (argc) {
-	case 3:
-		if (*argv[2] == 'd')
-			dangerous = 1;
-	case 2:
-		rtc = argv[1];
-		/* FALLTHROUGH */
-	case 1:
-		break;
-	default:
-		fprintf(stderr, "usage:  rtctest [rtcdev] [d]\n");
-		return 1;
-	}
-
-	fd = open(rtc, O_RDONLY);
-
-	if (fd ==  -1) {
-		perror(rtc);
-		exit(errno);
-	}
-
-	fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n");
-
-	/* Turn on update interrupts (one per second) */
-	retval = ioctl(fd, RTC_UIE_ON, 0);
-	if (retval == -1) {
-		if (errno == EINVAL) {
-			fprintf(stderr,
-				"\n...Update IRQs not supported.\n");
-			goto test_READ;
-		}
-		perror("RTC_UIE_ON ioctl");
-		exit(errno);
-	}
-
-	fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:",
-			rtc);
-	fflush(stderr);
-	for (i=1; i<6; i++) {
-		/* This read will block */
-		retval = read(fd, &data, sizeof(unsigned long));
-		if (retval == -1) {
-			perror("read");
-			exit(errno);
-		}
-		fprintf(stderr, " %d",i);
-		fflush(stderr);
-		irqcount++;
-	}
-
-	fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:");
-	fflush(stderr);
-	for (i=1; i<6; i++) {
-		struct timeval tv = {5, 0};     /* 5 second timeout on select */
-		fd_set readfds;
-
-		FD_ZERO(&readfds);
-		FD_SET(fd, &readfds);
-		/* The select will wait until an RTC interrupt happens. */
-		retval = select(fd+1, &readfds, NULL, NULL, &tv);
-		if (retval == -1) {
-		        perror("select");
-		        exit(errno);
-		}
-		/* This read won't block unlike the select-less case above. */
-		retval = read(fd, &data, sizeof(unsigned long));
-		if (retval == -1) {
-		        perror("read");
-		        exit(errno);
-		}
-		fprintf(stderr, " %d",i);
-		fflush(stderr);
-		irqcount++;
-	}
-
-	/* Turn off update interrupts */
-	retval = ioctl(fd, RTC_UIE_OFF, 0);
-	if (retval == -1) {
-		perror("RTC_UIE_OFF ioctl");
-		exit(errno);
-	}
-
-test_READ:
-	/* Read the RTC time/date */
-	retval = ioctl(fd, RTC_RD_TIME, &rtc_tm);
-	if (retval == -1) {
-		perror("RTC_RD_TIME ioctl");
-		exit(errno);
-	}
-
-	fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
-		rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
-		rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
-
-	/* Set the alarm to 5 sec in the future, and check for rollover */
-	rtc_tm.tm_sec += 5;
-	if (rtc_tm.tm_sec >= 60) {
-		rtc_tm.tm_sec %= 60;
-		rtc_tm.tm_min++;
-	}
-	if (rtc_tm.tm_min == 60) {
-		rtc_tm.tm_min = 0;
-		rtc_tm.tm_hour++;
-	}
-	if (rtc_tm.tm_hour == 24)
-		rtc_tm.tm_hour = 0;
-
-	retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
-	if (retval == -1) {
-		if (errno == EINVAL) {
-			fprintf(stderr,
-				"\n...Alarm IRQs not supported.\n");
-			goto test_PIE;
-		}
-
-		perror("RTC_ALM_SET ioctl");
-		exit(errno);
-	}
-
-	/* Read the current alarm settings */
-	retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
-	if (retval == -1) {
-		if (errno == EINVAL) {
-			fprintf(stderr,
-					"\n...EINVAL reading current alarm setting.\n");
-			goto test_PIE;
-		}
-		perror("RTC_ALM_READ ioctl");
-		exit(errno);
-	}
-
-	fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n",
-		rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
-
-	/* Enable alarm interrupts */
-	retval = ioctl(fd, RTC_AIE_ON, 0);
-	if (retval == -1) {
-		if (errno == EINVAL || errno == EIO) {
-			fprintf(stderr,
-				"\n...Alarm IRQs not supported.\n");
-			goto test_PIE;
-		}
-
-		perror("RTC_AIE_ON ioctl");
-		exit(errno);
-	}
-
-	fprintf(stderr, "Waiting 5 seconds for alarm...");
-	fflush(stderr);
-	/* This blocks until the alarm ring causes an interrupt */
-	retval = read(fd, &data, sizeof(unsigned long));
-	if (retval == -1) {
-		perror("read");
-		exit(errno);
-	}
-	irqcount++;
-	fprintf(stderr, " okay. Alarm rang.\n");
-
-	/* Disable alarm interrupts */
-	retval = ioctl(fd, RTC_AIE_OFF, 0);
-	if (retval == -1) {
-		perror("RTC_AIE_OFF ioctl");
-		exit(errno);
-	}
-
-test_PIE:
-	/* Read periodic IRQ rate */
-	retval = ioctl(fd, RTC_IRQP_READ, &tmp);
-	if (retval == -1) {
-		/* not all RTCs support periodic IRQs */
-		if (errno == EINVAL) {
-			fprintf(stderr, "\nNo periodic IRQ support\n");
-			goto test_DATE;
-		}
-		perror("RTC_IRQP_READ ioctl");
-		exit(errno);
-	}
-	fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp);
-
-	fprintf(stderr, "Counting 20 interrupts at:");
-	fflush(stderr);
-
-	/* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
-	for (tmp=2; tmp<=64; tmp*=2) {
-
-		retval = ioctl(fd, RTC_IRQP_SET, tmp);
-		if (retval == -1) {
-			/* not all RTCs can change their periodic IRQ rate */
-			if (errno == EINVAL) {
-				fprintf(stderr,
-					"\n...Periodic IRQ rate is fixed\n");
-				goto test_DATE;
-			}
-			perror("RTC_IRQP_SET ioctl");
-			exit(errno);
-		}
-
-		fprintf(stderr, "\n%ldHz:\t", tmp);
-		fflush(stderr);
-
-		/* Enable periodic interrupts */
-		retval = ioctl(fd, RTC_PIE_ON, 0);
-		if (retval == -1) {
-			perror("RTC_PIE_ON ioctl");
-			exit(errno);
-		}
-
-		for (i=1; i<21; i++) {
-			gettimeofday(&start, NULL);
-			/* This blocks */
-			retval = read(fd, &data, sizeof(unsigned long));
-			if (retval == -1) {
-				perror("read");
-				exit(errno);
-			}
-			gettimeofday(&end, NULL);
-			timersub(&end, &start, &diff);
-			if (diff.tv_sec > 0 ||
-			    diff.tv_usec > ((1000000L / tmp) * 1.10)) {
-				fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
-				       diff.tv_sec, diff.tv_usec,
-				       (1000000L / tmp));
-				fflush(stdout);
-				exit(-1);
-			}
-
-			fprintf(stderr, " %d",i);
-			fflush(stderr);
-			irqcount++;
-		}
-
-		/* Disable periodic interrupts */
-		retval = ioctl(fd, RTC_PIE_OFF, 0);
-		if (retval == -1) {
-			perror("RTC_PIE_OFF ioctl");
-			exit(errno);
-		}
-	}
-
-test_DATE:
-	if (!dangerous)
-		goto done;
-
-	fprintf(stderr, "\nTesting problematic dates\n");
-
-	for (i = 0; i < ARRAY_SIZE(cutoff_dates); i++) {
-		struct rtc_time current;
-
-		/* Write the new date in RTC */
-		retval = ioctl(fd, RTC_SET_TIME, &cutoff_dates[i]);
-		if (retval == -1) {
-			perror("RTC_SET_TIME ioctl");
-			close(fd);
-			exit(errno);
-		}
-
-		/* Read back */
-		retval = ioctl(fd, RTC_RD_TIME, &current);
-		if (retval == -1) {
-			perror("RTC_RD_TIME ioctl");
-			exit(errno);
-		}
-
-		if(compare_dates(&cutoff_dates[i], &current)) {
-			fprintf(stderr,"Setting date %d failed\n",
-			        cutoff_dates[i].tm_year + 1900);
-			goto done;
-		}
-
-		cutoff_dates[i].tm_sec += 5;
-
-		/* Write the new alarm in RTC */
-		retval = ioctl(fd, RTC_ALM_SET, &cutoff_dates[i]);
-		if (retval == -1) {
-			perror("RTC_ALM_SET ioctl");
-			close(fd);
-			exit(errno);
-		}
-
-		/* Read back */
-		retval = ioctl(fd, RTC_ALM_READ, &current);
-		if (retval == -1) {
-			perror("RTC_ALM_READ ioctl");
-			exit(errno);
-		}
-
-		if(compare_dates(&cutoff_dates[i], &current)) {
-			fprintf(stderr,"Setting alarm %d failed\n",
-			        cutoff_dates[i].tm_year + 1900);
-			goto done;
-		}
-
-		fprintf(stderr, "Setting year %d is OK \n",
-			cutoff_dates[i].tm_year + 1900);
-	}
-done:
-	fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
-
-	close(fd);
-
-	return 0;
-}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 39f66bc..1865201 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -8,6 +8,7 @@
 UNAME_M := $(shell uname -m)
 CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
 CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
 			check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
@@ -31,7 +32,12 @@
 BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
 BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
 
-CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie
+CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
+
+# call32_from_64 in thunks.S uses absolute addresses.
+ifeq ($(CAN_BUILD_WITH_NOPIE),1)
+CFLAGS += -no-pie
+endif
 
 define gen-target-rule-32
 $(1) $(1)_32: $(OUTPUT)/$(1)_32
diff --git a/tools/testing/selftests/x86/trivial_program.c b/tools/testing/selftests/x86/trivial_program.c
new file mode 100644
index 0000000..46a4471
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_program.c
@@ -0,0 +1,10 @@
+/* Trivial program to check that compilation with certain flags is working. */
+
+#include <stdio.h>
+
+int
+main(void)
+{
+	puts("");
+	return 0;
+}
diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c
index c9c8161..4204359 100644
--- a/tools/usb/usbip/libsrc/vhci_driver.c
+++ b/tools/usb/usbip/libsrc/vhci_driver.c
@@ -135,11 +135,11 @@
 	return 0;
 }
 
-static int get_nports(void)
+static int get_nports(struct udev_device *hc_device)
 {
 	const char *attr_nports;
 
-	attr_nports = udev_device_get_sysattr_value(vhci_driver->hc_device, "nports");
+	attr_nports = udev_device_get_sysattr_value(hc_device, "nports");
 	if (!attr_nports) {
 		err("udev_device_get_sysattr_value nports failed");
 		return -1;
@@ -242,35 +242,41 @@
 
 int usbip_vhci_driver_open(void)
 {
+	int nports;
+	struct udev_device *hc_device;
+
 	udev_context = udev_new();
 	if (!udev_context) {
 		err("udev_new failed");
 		return -1;
 	}
 
-	vhci_driver = calloc(1, sizeof(struct usbip_vhci_driver));
-
 	/* will be freed in usbip_driver_close() */
-	vhci_driver->hc_device =
+	hc_device =
 		udev_device_new_from_subsystem_sysname(udev_context,
 						       USBIP_VHCI_BUS_TYPE,
 						       USBIP_VHCI_DEVICE_NAME);
-	if (!vhci_driver->hc_device) {
+	if (!hc_device) {
 		err("udev_device_new_from_subsystem_sysname failed");
 		goto err;
 	}
 
-	vhci_driver->nports = get_nports();
-	dbg("available ports: %d", vhci_driver->nports);
-
-	if (vhci_driver->nports <= 0) {
+	nports = get_nports(hc_device);
+	if (nports <= 0) {
 		err("no available ports");
 		goto err;
-	} else if (vhci_driver->nports > MAXNPORT) {
-		err("port number exceeds %d", MAXNPORT);
+	}
+	dbg("available ports: %d", nports);
+
+	vhci_driver = calloc(1, sizeof(struct usbip_vhci_driver) +
+			nports * sizeof(struct usbip_imported_device));
+	if (!vhci_driver) {
+		err("vhci_driver allocation failed");
 		goto err;
 	}
 
+	vhci_driver->nports = nports;
+	vhci_driver->hc_device = hc_device;
 	vhci_driver->ncontrollers = get_ncontrollers();
 	dbg("available controllers: %d", vhci_driver->ncontrollers);
 
@@ -285,7 +291,7 @@
 	return 0;
 
 err:
-	udev_device_unref(vhci_driver->hc_device);
+	udev_device_unref(hc_device);
 
 	if (vhci_driver)
 		free(vhci_driver);
diff --git a/tools/usb/usbip/libsrc/vhci_driver.h b/tools/usb/usbip/libsrc/vhci_driver.h
index 418b404..6c9aca2 100644
--- a/tools/usb/usbip/libsrc/vhci_driver.h
+++ b/tools/usb/usbip/libsrc/vhci_driver.h
@@ -13,7 +13,6 @@
 
 #define USBIP_VHCI_BUS_TYPE "platform"
 #define USBIP_VHCI_DEVICE_NAME "vhci_hcd.0"
-#define MAXNPORT 128
 
 enum hub_speed {
 	HUB_SPEED_HIGH = 0,
@@ -41,7 +40,7 @@
 
 	int ncontrollers;
 	int nports;
-	struct usbip_imported_device idev[MAXNPORT];
+	struct usbip_imported_device idev[];
 };
 
 
diff --git a/tools/usb/usbip/src/usbip_detach.c b/tools/usb/usbip/src/usbip_detach.c
index 9db9d21..777f728 100644
--- a/tools/usb/usbip/src/usbip_detach.c
+++ b/tools/usb/usbip/src/usbip_detach.c
@@ -43,9 +43,12 @@
 
 static int detach_port(char *port)
 {
-	int ret;
+	int ret = 0;
 	uint8_t portnum;
 	char path[PATH_MAX+1];
+	int i;
+	struct usbip_imported_device *idev;
+	int found = 0;
 
 	unsigned int port_len = strlen(port);
 
@@ -55,27 +58,48 @@
 			return -1;
 		}
 
-	/* check max port */
-
 	portnum = atoi(port);
 
-	/* remove the port state file */
-
-	snprintf(path, PATH_MAX, VHCI_STATE_PATH"/port%d", portnum);
-
-	remove(path);
-	rmdir(VHCI_STATE_PATH);
-
 	ret = usbip_vhci_driver_open();
 	if (ret < 0) {
 		err("open vhci_driver");
 		return -1;
 	}
 
-	ret = usbip_vhci_detach_device(portnum);
-	if (ret < 0)
-		return -1;
+	/* check for invalid port */
+	for (i = 0; i < vhci_driver->nports; i++) {
+		idev = &vhci_driver->idev[i];
 
+		if (idev->port == portnum) {
+			found = 1;
+			if (idev->status != VDEV_ST_NULL)
+				break;
+			info("Port %d is already detached!\n", idev->port);
+			goto call_driver_close;
+		}
+	}
+
+	if (!found) {
+		err("Invalid port %s > maxports %d",
+			port, vhci_driver->nports);
+		goto call_driver_close;
+	}
+
+	/* remove the port state file */
+	snprintf(path, PATH_MAX, VHCI_STATE_PATH"/port%d", portnum);
+
+	remove(path);
+	rmdir(VHCI_STATE_PATH);
+
+	ret = usbip_vhci_detach_device(portnum);
+	if (ret < 0) {
+		ret = -1;
+		err("Port %d detach request failed!\n", portnum);
+		goto call_driver_close;
+	}
+	info("Port %d is now detached!\n", portnum);
+
+call_driver_close:
 	usbip_vhci_driver_close();
 
 	return ret;
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
index 1571e24..f91aeb5f 100644
--- a/tools/virtio/linux/dma-mapping.h
+++ b/tools/virtio/linux/dma-mapping.h
@@ -6,8 +6,6 @@
 # error Virtio userspace code does not support CONFIG_HAS_DMA
 #endif
 
-#define PCI_DMA_BUS_IS_PHYS 1
-
 enum dma_data_direction {
 	DMA_BIDIRECTIONAL = 0,
 	DMA_TO_DEVICE = 1,
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 7f6a944..8d90de2 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1401,6 +1401,7 @@
 {
 	siginfo_t info;
 
+	clear_siginfo(&info);
 	info.si_signo   = SIGBUS;
 	info.si_errno   = 0;
 	info.si_code    = BUS_MCEERR_AR;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 6e865e8..90d30fb 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -397,7 +397,7 @@
 	 * Check if there was an event already pending on the eventfd
 	 * before we registered, and trigger it as if we didn't miss it.
 	 */
-	events = f.file->f_op->poll(f.file, &irqfd->pt);
+	events = vfs_poll(f.file, &irqfd->pt);
 
 	if (events & EPOLLIN)
 		schedule_work(&irqfd->inject);